diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,210429 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 30057, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 3.0465035438537598, + "learning_rate": 4e-08, + "loss": 3.7659, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 3.150376796722412, + "learning_rate": 8e-08, + "loss": 3.7818, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 2.9999306201934814, + "learning_rate": 1.2000000000000002e-07, + "loss": 3.7177, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 2.96038556098938, + "learning_rate": 1.6e-07, + "loss": 3.7639, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 2.9733896255493164, + "learning_rate": 2.0000000000000002e-07, + "loss": 3.7662, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 2.938199996948242, + "learning_rate": 2.4000000000000003e-07, + "loss": 3.7483, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 3.0853888988494873, + "learning_rate": 2.8e-07, + "loss": 3.7558, + "step": 7 + }, + { + "epoch": 0.0, + "grad_norm": 3.0000698566436768, + "learning_rate": 3.2e-07, + "loss": 3.7618, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 2.9343674182891846, + "learning_rate": 3.6e-07, + "loss": 3.747, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 2.893684148788452, + "learning_rate": 4.0000000000000003e-07, + "loss": 3.7545, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 3.076113700866699, + "learning_rate": 4.4e-07, + "loss": 3.7813, + "step": 11 + }, + { + "epoch": 0.0, + "grad_norm": 2.9859817028045654, + "learning_rate": 4.800000000000001e-07, + "loss": 3.7794, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 3.052562713623047, + "learning_rate": 5.2e-07, + "loss": 3.7256, + "step": 13 + }, + { + "epoch": 0.0, + "grad_norm": 2.993824005126953, + "learning_rate": 5.6e-07, + "loss": 3.7362, + "step": 14 + }, + { + "epoch": 0.0, + "grad_norm": 3.080522060394287, + "learning_rate": 6.000000000000001e-07, + "loss": 3.7595, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 2.9374191761016846, + "learning_rate": 6.4e-07, + "loss": 3.7689, + "step": 16 + }, + { + "epoch": 0.0, + "grad_norm": 2.9945151805877686, + "learning_rate": 6.800000000000001e-07, + "loss": 3.7712, + "step": 17 + }, + { + "epoch": 0.0, + "grad_norm": 2.9775748252868652, + "learning_rate": 7.2e-07, + "loss": 3.7035, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 3.0326344966888428, + "learning_rate": 7.6e-07, + "loss": 3.7356, + "step": 19 + }, + { + "epoch": 0.0, + "grad_norm": 2.946686029434204, + "learning_rate": 8.000000000000001e-07, + "loss": 3.739, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 2.9172182083129883, + "learning_rate": 8.400000000000001e-07, + "loss": 3.7233, + "step": 21 + }, + { + "epoch": 0.0, + "grad_norm": 2.9136874675750732, + "learning_rate": 8.8e-07, + "loss": 3.7611, + "step": 22 + }, + { + "epoch": 0.0, + "grad_norm": 2.94134521484375, + "learning_rate": 9.200000000000001e-07, + "loss": 3.7566, + "step": 23 + }, + { + "epoch": 0.0, + "grad_norm": 2.8853530883789062, + "learning_rate": 9.600000000000001e-07, + "loss": 3.7405, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 2.9336092472076416, + "learning_rate": 1.0000000000000002e-06, + "loss": 3.7482, + "step": 25 + }, + { + "epoch": 0.0, + "grad_norm": 3.00654673576355, + "learning_rate": 1.04e-06, + "loss": 3.6785, + "step": 26 + }, + { + "epoch": 0.0, + "grad_norm": 2.8633406162261963, + "learning_rate": 1.08e-06, + "loss": 3.7673, + "step": 27 + }, + { + "epoch": 0.0, + "grad_norm": 2.936859369277954, + "learning_rate": 1.12e-06, + "loss": 3.7625, + "step": 28 + }, + { + "epoch": 0.0, + "grad_norm": 2.8874454498291016, + "learning_rate": 1.1600000000000001e-06, + "loss": 3.7298, + "step": 29 + }, + { + "epoch": 0.0, + "grad_norm": 2.8940701484680176, + "learning_rate": 1.2000000000000002e-06, + "loss": 3.7046, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 2.86255145072937, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.7295, + "step": 31 + }, + { + "epoch": 0.0, + "grad_norm": 2.8795437812805176, + "learning_rate": 1.28e-06, + "loss": 3.6807, + "step": 32 + }, + { + "epoch": 0.0, + "grad_norm": 2.8854005336761475, + "learning_rate": 1.32e-06, + "loss": 3.7068, + "step": 33 + }, + { + "epoch": 0.0, + "grad_norm": 2.959508180618286, + "learning_rate": 1.3600000000000001e-06, + "loss": 3.7281, + "step": 34 + }, + { + "epoch": 0.0, + "grad_norm": 2.840251922607422, + "learning_rate": 1.4000000000000001e-06, + "loss": 3.6889, + "step": 35 + }, + { + "epoch": 0.0, + "grad_norm": 2.74824857711792, + "learning_rate": 1.44e-06, + "loss": 3.6982, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 2.772235631942749, + "learning_rate": 1.48e-06, + "loss": 3.6856, + "step": 37 + }, + { + "epoch": 0.0, + "grad_norm": 2.7824931144714355, + "learning_rate": 1.52e-06, + "loss": 3.6938, + "step": 38 + }, + { + "epoch": 0.0, + "grad_norm": 2.721611261367798, + "learning_rate": 1.56e-06, + "loss": 3.6782, + "step": 39 + }, + { + "epoch": 0.0, + "grad_norm": 2.773103952407837, + "learning_rate": 1.6000000000000001e-06, + "loss": 3.6949, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 2.671250581741333, + "learning_rate": 1.6400000000000002e-06, + "loss": 3.6881, + "step": 41 + }, + { + "epoch": 0.0, + "grad_norm": 2.7398197650909424, + "learning_rate": 1.6800000000000002e-06, + "loss": 3.7502, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 2.7565064430236816, + "learning_rate": 1.72e-06, + "loss": 3.6778, + "step": 43 + }, + { + "epoch": 0.0, + "grad_norm": 2.6314198970794678, + "learning_rate": 1.76e-06, + "loss": 3.7221, + "step": 44 + }, + { + "epoch": 0.0, + "grad_norm": 2.642784595489502, + "learning_rate": 1.8000000000000001e-06, + "loss": 3.6884, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 2.591935873031616, + "learning_rate": 1.8400000000000002e-06, + "loss": 3.6326, + "step": 46 + }, + { + "epoch": 0.0, + "grad_norm": 2.6190409660339355, + "learning_rate": 1.8800000000000002e-06, + "loss": 3.6362, + "step": 47 + }, + { + "epoch": 0.0, + "grad_norm": 2.597822904586792, + "learning_rate": 1.9200000000000003e-06, + "loss": 3.6443, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 2.5975561141967773, + "learning_rate": 1.9600000000000003e-06, + "loss": 3.6985, + "step": 49 + }, + { + "epoch": 0.0, + "grad_norm": 2.5982890129089355, + "learning_rate": 2.0000000000000003e-06, + "loss": 3.7032, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 2.6563563346862793, + "learning_rate": 2.04e-06, + "loss": 3.6719, + "step": 51 + }, + { + "epoch": 0.0, + "grad_norm": 2.49603009223938, + "learning_rate": 2.08e-06, + "loss": 3.6482, + "step": 52 + }, + { + "epoch": 0.0, + "grad_norm": 2.5944085121154785, + "learning_rate": 2.12e-06, + "loss": 3.6701, + "step": 53 + }, + { + "epoch": 0.0, + "grad_norm": 2.583873987197876, + "learning_rate": 2.16e-06, + "loss": 3.6752, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 2.457430601119995, + "learning_rate": 2.2e-06, + "loss": 3.6557, + "step": 55 + }, + { + "epoch": 0.0, + "grad_norm": 2.4788320064544678, + "learning_rate": 2.24e-06, + "loss": 3.619, + "step": 56 + }, + { + "epoch": 0.0, + "grad_norm": 2.5159473419189453, + "learning_rate": 2.28e-06, + "loss": 3.5979, + "step": 57 + }, + { + "epoch": 0.0, + "grad_norm": 2.412249803543091, + "learning_rate": 2.3200000000000002e-06, + "loss": 3.6781, + "step": 58 + }, + { + "epoch": 0.0, + "grad_norm": 2.4523682594299316, + "learning_rate": 2.3600000000000003e-06, + "loss": 3.6549, + "step": 59 + }, + { + "epoch": 0.0, + "grad_norm": 2.4825010299682617, + "learning_rate": 2.4000000000000003e-06, + "loss": 3.6678, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 2.424654960632324, + "learning_rate": 2.4400000000000004e-06, + "loss": 3.6639, + "step": 61 + }, + { + "epoch": 0.0, + "grad_norm": 2.4321727752685547, + "learning_rate": 2.4800000000000004e-06, + "loss": 3.6527, + "step": 62 + }, + { + "epoch": 0.0, + "grad_norm": 2.429910182952881, + "learning_rate": 2.52e-06, + "loss": 3.6242, + "step": 63 + }, + { + "epoch": 0.0, + "grad_norm": 2.3708126544952393, + "learning_rate": 2.56e-06, + "loss": 3.6498, + "step": 64 + }, + { + "epoch": 0.0, + "grad_norm": 2.3835256099700928, + "learning_rate": 2.6e-06, + "loss": 3.6286, + "step": 65 + }, + { + "epoch": 0.0, + "grad_norm": 2.327960729598999, + "learning_rate": 2.64e-06, + "loss": 3.5969, + "step": 66 + }, + { + "epoch": 0.0, + "grad_norm": 2.318544864654541, + "learning_rate": 2.68e-06, + "loss": 3.6336, + "step": 67 + }, + { + "epoch": 0.0, + "grad_norm": 2.247493028640747, + "learning_rate": 2.7200000000000002e-06, + "loss": 3.6448, + "step": 68 + }, + { + "epoch": 0.0, + "grad_norm": 2.3056132793426514, + "learning_rate": 2.7600000000000003e-06, + "loss": 3.6668, + "step": 69 + }, + { + "epoch": 0.0, + "grad_norm": 2.285571813583374, + "learning_rate": 2.8000000000000003e-06, + "loss": 3.6235, + "step": 70 + }, + { + "epoch": 0.0, + "grad_norm": 2.2843549251556396, + "learning_rate": 2.84e-06, + "loss": 3.5946, + "step": 71 + }, + { + "epoch": 0.0, + "grad_norm": 2.2572786808013916, + "learning_rate": 2.88e-06, + "loss": 3.5957, + "step": 72 + }, + { + "epoch": 0.0, + "grad_norm": 2.1217710971832275, + "learning_rate": 2.92e-06, + "loss": 3.5305, + "step": 73 + }, + { + "epoch": 0.0, + "grad_norm": 2.0943527221679688, + "learning_rate": 2.96e-06, + "loss": 3.5057, + "step": 74 + }, + { + "epoch": 0.0, + "grad_norm": 2.1098923683166504, + "learning_rate": 3e-06, + "loss": 3.5331, + "step": 75 + }, + { + "epoch": 0.0, + "grad_norm": 2.1389548778533936, + "learning_rate": 3.04e-06, + "loss": 3.598, + "step": 76 + }, + { + "epoch": 0.0, + "grad_norm": 2.1352689266204834, + "learning_rate": 3.08e-06, + "loss": 3.5457, + "step": 77 + }, + { + "epoch": 0.0, + "grad_norm": 2.105759382247925, + "learning_rate": 3.12e-06, + "loss": 3.5604, + "step": 78 + }, + { + "epoch": 0.0, + "grad_norm": 2.1769442558288574, + "learning_rate": 3.1600000000000002e-06, + "loss": 3.576, + "step": 79 + }, + { + "epoch": 0.0, + "grad_norm": 2.043865442276001, + "learning_rate": 3.2000000000000003e-06, + "loss": 3.486, + "step": 80 + }, + { + "epoch": 0.0, + "grad_norm": 2.0029489994049072, + "learning_rate": 3.2400000000000003e-06, + "loss": 3.5656, + "step": 81 + }, + { + "epoch": 0.0, + "grad_norm": 1.9802006483078003, + "learning_rate": 3.2800000000000004e-06, + "loss": 3.5367, + "step": 82 + }, + { + "epoch": 0.0, + "grad_norm": 1.99752676486969, + "learning_rate": 3.3200000000000004e-06, + "loss": 3.5021, + "step": 83 + }, + { + "epoch": 0.0, + "grad_norm": 1.9963101148605347, + "learning_rate": 3.3600000000000004e-06, + "loss": 3.5611, + "step": 84 + }, + { + "epoch": 0.0, + "grad_norm": 1.9249954223632812, + "learning_rate": 3.4000000000000005e-06, + "loss": 3.5318, + "step": 85 + }, + { + "epoch": 0.0, + "grad_norm": 1.9695982933044434, + "learning_rate": 3.44e-06, + "loss": 3.5412, + "step": 86 + }, + { + "epoch": 0.0, + "grad_norm": 1.9668208360671997, + "learning_rate": 3.48e-06, + "loss": 3.5427, + "step": 87 + }, + { + "epoch": 0.0, + "grad_norm": 1.8661860227584839, + "learning_rate": 3.52e-06, + "loss": 3.5171, + "step": 88 + }, + { + "epoch": 0.0, + "grad_norm": 1.9764589071273804, + "learning_rate": 3.5600000000000002e-06, + "loss": 3.4664, + "step": 89 + }, + { + "epoch": 0.0, + "grad_norm": 1.8449292182922363, + "learning_rate": 3.6000000000000003e-06, + "loss": 3.4972, + "step": 90 + }, + { + "epoch": 0.0, + "grad_norm": 1.8575401306152344, + "learning_rate": 3.6400000000000003e-06, + "loss": 3.4985, + "step": 91 + }, + { + "epoch": 0.0, + "grad_norm": 1.8120278120040894, + "learning_rate": 3.6800000000000003e-06, + "loss": 3.5152, + "step": 92 + }, + { + "epoch": 0.0, + "grad_norm": 1.7762423753738403, + "learning_rate": 3.7200000000000004e-06, + "loss": 3.5163, + "step": 93 + }, + { + "epoch": 0.0, + "grad_norm": 1.766471266746521, + "learning_rate": 3.7600000000000004e-06, + "loss": 3.4853, + "step": 94 + }, + { + "epoch": 0.0, + "grad_norm": 1.8277347087860107, + "learning_rate": 3.8000000000000005e-06, + "loss": 3.4618, + "step": 95 + }, + { + "epoch": 0.0, + "grad_norm": 1.7625563144683838, + "learning_rate": 3.8400000000000005e-06, + "loss": 3.4531, + "step": 96 + }, + { + "epoch": 0.0, + "grad_norm": 1.7076278924942017, + "learning_rate": 3.88e-06, + "loss": 3.4581, + "step": 97 + }, + { + "epoch": 0.0, + "grad_norm": 1.7219356298446655, + "learning_rate": 3.920000000000001e-06, + "loss": 3.5053, + "step": 98 + }, + { + "epoch": 0.0, + "grad_norm": 1.6866194009780884, + "learning_rate": 3.96e-06, + "loss": 3.4822, + "step": 99 + }, + { + "epoch": 0.0, + "grad_norm": 1.6395337581634521, + "learning_rate": 4.000000000000001e-06, + "loss": 3.4826, + "step": 100 + }, + { + "epoch": 0.0, + "grad_norm": 1.6182422637939453, + "learning_rate": 4.04e-06, + "loss": 3.4144, + "step": 101 + }, + { + "epoch": 0.0, + "grad_norm": 1.6277965307235718, + "learning_rate": 4.08e-06, + "loss": 3.4628, + "step": 102 + }, + { + "epoch": 0.0, + "grad_norm": 1.6274092197418213, + "learning_rate": 4.12e-06, + "loss": 3.4251, + "step": 103 + }, + { + "epoch": 0.0, + "grad_norm": 1.5992956161499023, + "learning_rate": 4.16e-06, + "loss": 3.4467, + "step": 104 + }, + { + "epoch": 0.0, + "grad_norm": 1.5614099502563477, + "learning_rate": 4.2000000000000004e-06, + "loss": 3.4939, + "step": 105 + }, + { + "epoch": 0.0, + "grad_norm": 1.5395946502685547, + "learning_rate": 4.24e-06, + "loss": 3.4063, + "step": 106 + }, + { + "epoch": 0.0, + "grad_norm": 1.5085062980651855, + "learning_rate": 4.2800000000000005e-06, + "loss": 3.4316, + "step": 107 + }, + { + "epoch": 0.0, + "grad_norm": 1.5296846628189087, + "learning_rate": 4.32e-06, + "loss": 3.4604, + "step": 108 + }, + { + "epoch": 0.0, + "grad_norm": 1.5282198190689087, + "learning_rate": 4.360000000000001e-06, + "loss": 3.4176, + "step": 109 + }, + { + "epoch": 0.0, + "grad_norm": 1.5429089069366455, + "learning_rate": 4.4e-06, + "loss": 3.4957, + "step": 110 + }, + { + "epoch": 0.0, + "grad_norm": 1.4946658611297607, + "learning_rate": 4.440000000000001e-06, + "loss": 3.4818, + "step": 111 + }, + { + "epoch": 0.0, + "grad_norm": 1.4442675113677979, + "learning_rate": 4.48e-06, + "loss": 3.4255, + "step": 112 + }, + { + "epoch": 0.0, + "grad_norm": 1.4594030380249023, + "learning_rate": 4.520000000000001e-06, + "loss": 3.3965, + "step": 113 + }, + { + "epoch": 0.0, + "grad_norm": 1.4332916736602783, + "learning_rate": 4.56e-06, + "loss": 3.362, + "step": 114 + }, + { + "epoch": 0.0, + "grad_norm": 1.431638479232788, + "learning_rate": 4.600000000000001e-06, + "loss": 3.4073, + "step": 115 + }, + { + "epoch": 0.0, + "grad_norm": 1.4588367938995361, + "learning_rate": 4.6400000000000005e-06, + "loss": 3.4292, + "step": 116 + }, + { + "epoch": 0.0, + "grad_norm": 1.3806376457214355, + "learning_rate": 4.680000000000001e-06, + "loss": 3.4422, + "step": 117 + }, + { + "epoch": 0.0, + "grad_norm": 1.3173532485961914, + "learning_rate": 4.7200000000000005e-06, + "loss": 3.3783, + "step": 118 + }, + { + "epoch": 0.0, + "grad_norm": 1.369532823562622, + "learning_rate": 4.76e-06, + "loss": 3.3777, + "step": 119 + }, + { + "epoch": 0.0, + "grad_norm": 1.3398481607437134, + "learning_rate": 4.800000000000001e-06, + "loss": 3.4096, + "step": 120 + }, + { + "epoch": 0.0, + "grad_norm": 1.2976102828979492, + "learning_rate": 4.84e-06, + "loss": 3.3861, + "step": 121 + }, + { + "epoch": 0.0, + "grad_norm": 1.3191262483596802, + "learning_rate": 4.880000000000001e-06, + "loss": 3.4147, + "step": 122 + }, + { + "epoch": 0.0, + "grad_norm": 1.3748778104782104, + "learning_rate": 4.92e-06, + "loss": 3.3931, + "step": 123 + }, + { + "epoch": 0.0, + "grad_norm": 1.2943419218063354, + "learning_rate": 4.960000000000001e-06, + "loss": 3.3655, + "step": 124 + }, + { + "epoch": 0.0, + "grad_norm": 1.3425780534744263, + "learning_rate": 5e-06, + "loss": 3.4313, + "step": 125 + }, + { + "epoch": 0.0, + "grad_norm": 1.278464436531067, + "learning_rate": 5.04e-06, + "loss": 3.3519, + "step": 126 + }, + { + "epoch": 0.0, + "grad_norm": 1.2368496656417847, + "learning_rate": 5.0800000000000005e-06, + "loss": 3.3372, + "step": 127 + }, + { + "epoch": 0.0, + "grad_norm": 1.2316712141036987, + "learning_rate": 5.12e-06, + "loss": 3.325, + "step": 128 + }, + { + "epoch": 0.0, + "grad_norm": 1.177514672279358, + "learning_rate": 5.1600000000000006e-06, + "loss": 3.3653, + "step": 129 + }, + { + "epoch": 0.0, + "grad_norm": 1.2126216888427734, + "learning_rate": 5.2e-06, + "loss": 3.3799, + "step": 130 + }, + { + "epoch": 0.0, + "grad_norm": 1.159932017326355, + "learning_rate": 5.240000000000001e-06, + "loss": 3.3239, + "step": 131 + }, + { + "epoch": 0.0, + "grad_norm": 1.2238554954528809, + "learning_rate": 5.28e-06, + "loss": 3.3393, + "step": 132 + }, + { + "epoch": 0.0, + "grad_norm": 1.1728185415267944, + "learning_rate": 5.320000000000001e-06, + "loss": 3.3066, + "step": 133 + }, + { + "epoch": 0.0, + "grad_norm": 1.16297447681427, + "learning_rate": 5.36e-06, + "loss": 3.306, + "step": 134 + }, + { + "epoch": 0.0, + "grad_norm": 1.1513962745666504, + "learning_rate": 5.400000000000001e-06, + "loss": 3.3035, + "step": 135 + }, + { + "epoch": 0.0, + "grad_norm": 1.190287470817566, + "learning_rate": 5.4400000000000004e-06, + "loss": 3.311, + "step": 136 + }, + { + "epoch": 0.0, + "grad_norm": 1.1670467853546143, + "learning_rate": 5.480000000000001e-06, + "loss": 3.2978, + "step": 137 + }, + { + "epoch": 0.0, + "grad_norm": 1.0985170602798462, + "learning_rate": 5.5200000000000005e-06, + "loss": 3.3456, + "step": 138 + }, + { + "epoch": 0.0, + "grad_norm": 1.119649887084961, + "learning_rate": 5.560000000000001e-06, + "loss": 3.2801, + "step": 139 + }, + { + "epoch": 0.0, + "grad_norm": 1.1619948148727417, + "learning_rate": 5.600000000000001e-06, + "loss": 3.2839, + "step": 140 + }, + { + "epoch": 0.0, + "grad_norm": 1.0866366624832153, + "learning_rate": 5.64e-06, + "loss": 3.2348, + "step": 141 + }, + { + "epoch": 0.0, + "grad_norm": 1.0735145807266235, + "learning_rate": 5.68e-06, + "loss": 3.2547, + "step": 142 + }, + { + "epoch": 0.0, + "grad_norm": 1.08341646194458, + "learning_rate": 5.72e-06, + "loss": 3.2889, + "step": 143 + }, + { + "epoch": 0.0, + "grad_norm": 1.0448150634765625, + "learning_rate": 5.76e-06, + "loss": 3.2736, + "step": 144 + }, + { + "epoch": 0.0, + "grad_norm": 1.1440234184265137, + "learning_rate": 5.8e-06, + "loss": 3.2545, + "step": 145 + }, + { + "epoch": 0.0, + "grad_norm": 1.0658066272735596, + "learning_rate": 5.84e-06, + "loss": 3.2877, + "step": 146 + }, + { + "epoch": 0.0, + "grad_norm": 1.024755597114563, + "learning_rate": 5.8800000000000005e-06, + "loss": 3.3036, + "step": 147 + }, + { + "epoch": 0.0, + "grad_norm": 1.0687769651412964, + "learning_rate": 5.92e-06, + "loss": 3.2758, + "step": 148 + }, + { + "epoch": 0.0, + "grad_norm": 1.014294147491455, + "learning_rate": 5.9600000000000005e-06, + "loss": 3.2958, + "step": 149 + }, + { + "epoch": 0.0, + "grad_norm": 1.03204345703125, + "learning_rate": 6e-06, + "loss": 3.2687, + "step": 150 + }, + { + "epoch": 0.01, + "grad_norm": 1.007180094718933, + "learning_rate": 6.040000000000001e-06, + "loss": 3.2561, + "step": 151 + }, + { + "epoch": 0.01, + "grad_norm": 1.020760178565979, + "learning_rate": 6.08e-06, + "loss": 3.2406, + "step": 152 + }, + { + "epoch": 0.01, + "grad_norm": 0.9961280226707458, + "learning_rate": 6.120000000000001e-06, + "loss": 3.2289, + "step": 153 + }, + { + "epoch": 0.01, + "grad_norm": 1.0122206211090088, + "learning_rate": 6.16e-06, + "loss": 3.2285, + "step": 154 + }, + { + "epoch": 0.01, + "grad_norm": 1.0377811193466187, + "learning_rate": 6.200000000000001e-06, + "loss": 3.1956, + "step": 155 + }, + { + "epoch": 0.01, + "grad_norm": 0.9753139019012451, + "learning_rate": 6.24e-06, + "loss": 3.2803, + "step": 156 + }, + { + "epoch": 0.01, + "grad_norm": 0.9736471772193909, + "learning_rate": 6.280000000000001e-06, + "loss": 3.2468, + "step": 157 + }, + { + "epoch": 0.01, + "grad_norm": 0.9775617122650146, + "learning_rate": 6.3200000000000005e-06, + "loss": 3.1796, + "step": 158 + }, + { + "epoch": 0.01, + "grad_norm": 0.9449413418769836, + "learning_rate": 6.360000000000001e-06, + "loss": 3.2506, + "step": 159 + }, + { + "epoch": 0.01, + "grad_norm": 0.9355918169021606, + "learning_rate": 6.4000000000000006e-06, + "loss": 3.2165, + "step": 160 + }, + { + "epoch": 0.01, + "grad_norm": 0.9462165236473083, + "learning_rate": 6.440000000000001e-06, + "loss": 3.2012, + "step": 161 + }, + { + "epoch": 0.01, + "grad_norm": 0.9783335328102112, + "learning_rate": 6.480000000000001e-06, + "loss": 3.2207, + "step": 162 + }, + { + "epoch": 0.01, + "grad_norm": 0.9430908560752869, + "learning_rate": 6.520000000000001e-06, + "loss": 3.2187, + "step": 163 + }, + { + "epoch": 0.01, + "grad_norm": 0.9193970561027527, + "learning_rate": 6.560000000000001e-06, + "loss": 3.1716, + "step": 164 + }, + { + "epoch": 0.01, + "grad_norm": 0.9126341938972473, + "learning_rate": 6.600000000000001e-06, + "loss": 3.2563, + "step": 165 + }, + { + "epoch": 0.01, + "grad_norm": 0.9091991186141968, + "learning_rate": 6.640000000000001e-06, + "loss": 3.1949, + "step": 166 + }, + { + "epoch": 0.01, + "grad_norm": 0.9056283235549927, + "learning_rate": 6.680000000000001e-06, + "loss": 3.1961, + "step": 167 + }, + { + "epoch": 0.01, + "grad_norm": 0.9239982962608337, + "learning_rate": 6.720000000000001e-06, + "loss": 3.2164, + "step": 168 + }, + { + "epoch": 0.01, + "grad_norm": 0.8863268494606018, + "learning_rate": 6.760000000000001e-06, + "loss": 3.1951, + "step": 169 + }, + { + "epoch": 0.01, + "grad_norm": 0.9363348484039307, + "learning_rate": 6.800000000000001e-06, + "loss": 3.1816, + "step": 170 + }, + { + "epoch": 0.01, + "grad_norm": 0.9083738923072815, + "learning_rate": 6.8400000000000014e-06, + "loss": 3.18, + "step": 171 + }, + { + "epoch": 0.01, + "grad_norm": 0.8851535320281982, + "learning_rate": 6.88e-06, + "loss": 3.1975, + "step": 172 + }, + { + "epoch": 0.01, + "grad_norm": 0.8942703604698181, + "learning_rate": 6.92e-06, + "loss": 3.2521, + "step": 173 + }, + { + "epoch": 0.01, + "grad_norm": 0.8518399000167847, + "learning_rate": 6.96e-06, + "loss": 3.1863, + "step": 174 + }, + { + "epoch": 0.01, + "grad_norm": 0.8763868808746338, + "learning_rate": 7e-06, + "loss": 3.1749, + "step": 175 + }, + { + "epoch": 0.01, + "grad_norm": 0.8461930155754089, + "learning_rate": 7.04e-06, + "loss": 3.2139, + "step": 176 + }, + { + "epoch": 0.01, + "grad_norm": 0.879628598690033, + "learning_rate": 7.08e-06, + "loss": 3.2323, + "step": 177 + }, + { + "epoch": 0.01, + "grad_norm": 0.890159010887146, + "learning_rate": 7.1200000000000004e-06, + "loss": 3.1331, + "step": 178 + }, + { + "epoch": 0.01, + "grad_norm": 0.8850238919258118, + "learning_rate": 7.16e-06, + "loss": 3.1267, + "step": 179 + }, + { + "epoch": 0.01, + "grad_norm": 0.8491682410240173, + "learning_rate": 7.2000000000000005e-06, + "loss": 3.1237, + "step": 180 + }, + { + "epoch": 0.01, + "grad_norm": 0.8608806133270264, + "learning_rate": 7.24e-06, + "loss": 3.1464, + "step": 181 + }, + { + "epoch": 0.01, + "grad_norm": 0.8595521450042725, + "learning_rate": 7.280000000000001e-06, + "loss": 3.1473, + "step": 182 + }, + { + "epoch": 0.01, + "grad_norm": 0.8413703441619873, + "learning_rate": 7.32e-06, + "loss": 3.1822, + "step": 183 + }, + { + "epoch": 0.01, + "grad_norm": 0.8773772120475769, + "learning_rate": 7.360000000000001e-06, + "loss": 3.1492, + "step": 184 + }, + { + "epoch": 0.01, + "grad_norm": 0.8412697911262512, + "learning_rate": 7.4e-06, + "loss": 3.2014, + "step": 185 + }, + { + "epoch": 0.01, + "grad_norm": 0.8224411606788635, + "learning_rate": 7.440000000000001e-06, + "loss": 3.1793, + "step": 186 + }, + { + "epoch": 0.01, + "grad_norm": 0.8693506717681885, + "learning_rate": 7.48e-06, + "loss": 3.1193, + "step": 187 + }, + { + "epoch": 0.01, + "grad_norm": 0.873849093914032, + "learning_rate": 7.520000000000001e-06, + "loss": 3.0898, + "step": 188 + }, + { + "epoch": 0.01, + "grad_norm": 0.8525444865226746, + "learning_rate": 7.5600000000000005e-06, + "loss": 3.0659, + "step": 189 + }, + { + "epoch": 0.01, + "grad_norm": 0.8342667818069458, + "learning_rate": 7.600000000000001e-06, + "loss": 3.1473, + "step": 190 + }, + { + "epoch": 0.01, + "grad_norm": 0.8115031719207764, + "learning_rate": 7.640000000000001e-06, + "loss": 3.1542, + "step": 191 + }, + { + "epoch": 0.01, + "grad_norm": 0.8224273324012756, + "learning_rate": 7.680000000000001e-06, + "loss": 3.1223, + "step": 192 + }, + { + "epoch": 0.01, + "grad_norm": 0.8318130970001221, + "learning_rate": 7.72e-06, + "loss": 3.1424, + "step": 193 + }, + { + "epoch": 0.01, + "grad_norm": 0.857342541217804, + "learning_rate": 7.76e-06, + "loss": 3.1513, + "step": 194 + }, + { + "epoch": 0.01, + "grad_norm": 0.8313223719596863, + "learning_rate": 7.800000000000002e-06, + "loss": 3.1, + "step": 195 + }, + { + "epoch": 0.01, + "grad_norm": 0.8101739883422852, + "learning_rate": 7.840000000000001e-06, + "loss": 3.1125, + "step": 196 + }, + { + "epoch": 0.01, + "grad_norm": 0.8099242448806763, + "learning_rate": 7.88e-06, + "loss": 3.164, + "step": 197 + }, + { + "epoch": 0.01, + "grad_norm": 0.8177672028541565, + "learning_rate": 7.92e-06, + "loss": 3.1515, + "step": 198 + }, + { + "epoch": 0.01, + "grad_norm": 0.8439546823501587, + "learning_rate": 7.960000000000002e-06, + "loss": 3.1087, + "step": 199 + }, + { + "epoch": 0.01, + "grad_norm": 0.80745929479599, + "learning_rate": 8.000000000000001e-06, + "loss": 3.1463, + "step": 200 + }, + { + "epoch": 0.01, + "grad_norm": 0.8696184754371643, + "learning_rate": 8.040000000000001e-06, + "loss": 3.1645, + "step": 201 + }, + { + "epoch": 0.01, + "grad_norm": 0.7973880767822266, + "learning_rate": 8.08e-06, + "loss": 3.111, + "step": 202 + }, + { + "epoch": 0.01, + "grad_norm": 0.8098641037940979, + "learning_rate": 8.120000000000002e-06, + "loss": 3.0834, + "step": 203 + }, + { + "epoch": 0.01, + "grad_norm": 0.8531391620635986, + "learning_rate": 8.16e-06, + "loss": 3.0951, + "step": 204 + }, + { + "epoch": 0.01, + "grad_norm": 0.8008347749710083, + "learning_rate": 8.2e-06, + "loss": 3.1017, + "step": 205 + }, + { + "epoch": 0.01, + "grad_norm": 0.7965345978736877, + "learning_rate": 8.24e-06, + "loss": 3.1638, + "step": 206 + }, + { + "epoch": 0.01, + "grad_norm": 0.7986938953399658, + "learning_rate": 8.28e-06, + "loss": 3.0561, + "step": 207 + }, + { + "epoch": 0.01, + "grad_norm": 0.7870634198188782, + "learning_rate": 8.32e-06, + "loss": 3.0892, + "step": 208 + }, + { + "epoch": 0.01, + "grad_norm": 0.7703922986984253, + "learning_rate": 8.36e-06, + "loss": 3.1091, + "step": 209 + }, + { + "epoch": 0.01, + "grad_norm": 0.804821252822876, + "learning_rate": 8.400000000000001e-06, + "loss": 3.094, + "step": 210 + }, + { + "epoch": 0.01, + "grad_norm": 0.7656739950180054, + "learning_rate": 8.44e-06, + "loss": 3.1213, + "step": 211 + }, + { + "epoch": 0.01, + "grad_norm": 0.8040069937705994, + "learning_rate": 8.48e-06, + "loss": 3.0878, + "step": 212 + }, + { + "epoch": 0.01, + "grad_norm": 0.7826994061470032, + "learning_rate": 8.52e-06, + "loss": 3.0809, + "step": 213 + }, + { + "epoch": 0.01, + "grad_norm": 0.7905282378196716, + "learning_rate": 8.560000000000001e-06, + "loss": 3.033, + "step": 214 + }, + { + "epoch": 0.01, + "grad_norm": 0.7479476928710938, + "learning_rate": 8.6e-06, + "loss": 3.0792, + "step": 215 + }, + { + "epoch": 0.01, + "grad_norm": 0.7685500979423523, + "learning_rate": 8.64e-06, + "loss": 3.1118, + "step": 216 + }, + { + "epoch": 0.01, + "grad_norm": 0.7756505012512207, + "learning_rate": 8.68e-06, + "loss": 3.0581, + "step": 217 + }, + { + "epoch": 0.01, + "grad_norm": 0.7733978629112244, + "learning_rate": 8.720000000000001e-06, + "loss": 3.1143, + "step": 218 + }, + { + "epoch": 0.01, + "grad_norm": 0.785362720489502, + "learning_rate": 8.76e-06, + "loss": 3.0869, + "step": 219 + }, + { + "epoch": 0.01, + "grad_norm": 0.7898158431053162, + "learning_rate": 8.8e-06, + "loss": 3.1271, + "step": 220 + }, + { + "epoch": 0.01, + "grad_norm": 0.7982162833213806, + "learning_rate": 8.84e-06, + "loss": 3.052, + "step": 221 + }, + { + "epoch": 0.01, + "grad_norm": 0.7816964983940125, + "learning_rate": 8.880000000000001e-06, + "loss": 3.0878, + "step": 222 + }, + { + "epoch": 0.01, + "grad_norm": 0.7702066898345947, + "learning_rate": 8.920000000000001e-06, + "loss": 3.0505, + "step": 223 + }, + { + "epoch": 0.01, + "grad_norm": 0.753216028213501, + "learning_rate": 8.96e-06, + "loss": 3.0464, + "step": 224 + }, + { + "epoch": 0.01, + "grad_norm": 0.8036941885948181, + "learning_rate": 9e-06, + "loss": 3.0093, + "step": 225 + }, + { + "epoch": 0.01, + "grad_norm": 0.7765317559242249, + "learning_rate": 9.040000000000002e-06, + "loss": 3.0214, + "step": 226 + }, + { + "epoch": 0.01, + "grad_norm": 0.7712041139602661, + "learning_rate": 9.080000000000001e-06, + "loss": 3.0204, + "step": 227 + }, + { + "epoch": 0.01, + "grad_norm": 0.7455520033836365, + "learning_rate": 9.12e-06, + "loss": 3.089, + "step": 228 + }, + { + "epoch": 0.01, + "grad_norm": 0.7890076637268066, + "learning_rate": 9.16e-06, + "loss": 2.9956, + "step": 229 + }, + { + "epoch": 0.01, + "grad_norm": 0.7368062734603882, + "learning_rate": 9.200000000000002e-06, + "loss": 2.95, + "step": 230 + }, + { + "epoch": 0.01, + "grad_norm": 0.7542151212692261, + "learning_rate": 9.240000000000001e-06, + "loss": 3.0833, + "step": 231 + }, + { + "epoch": 0.01, + "grad_norm": 0.7973144054412842, + "learning_rate": 9.280000000000001e-06, + "loss": 2.9593, + "step": 232 + }, + { + "epoch": 0.01, + "grad_norm": 0.7747605443000793, + "learning_rate": 9.32e-06, + "loss": 3.0537, + "step": 233 + }, + { + "epoch": 0.01, + "grad_norm": 0.7799984812736511, + "learning_rate": 9.360000000000002e-06, + "loss": 3.0924, + "step": 234 + }, + { + "epoch": 0.01, + "grad_norm": 0.760521650314331, + "learning_rate": 9.4e-06, + "loss": 3.0049, + "step": 235 + }, + { + "epoch": 0.01, + "grad_norm": 0.7392805218696594, + "learning_rate": 9.440000000000001e-06, + "loss": 3.0076, + "step": 236 + }, + { + "epoch": 0.01, + "grad_norm": 0.7766023278236389, + "learning_rate": 9.48e-06, + "loss": 3.0305, + "step": 237 + }, + { + "epoch": 0.01, + "grad_norm": 0.7325561046600342, + "learning_rate": 9.52e-06, + "loss": 3.0102, + "step": 238 + }, + { + "epoch": 0.01, + "grad_norm": 0.7446618676185608, + "learning_rate": 9.56e-06, + "loss": 3.0799, + "step": 239 + }, + { + "epoch": 0.01, + "grad_norm": 0.7309117913246155, + "learning_rate": 9.600000000000001e-06, + "loss": 3.0657, + "step": 240 + }, + { + "epoch": 0.01, + "grad_norm": 0.7644285559654236, + "learning_rate": 9.640000000000001e-06, + "loss": 2.9969, + "step": 241 + }, + { + "epoch": 0.01, + "grad_norm": 0.7534785866737366, + "learning_rate": 9.68e-06, + "loss": 3.043, + "step": 242 + }, + { + "epoch": 0.01, + "grad_norm": 0.756115198135376, + "learning_rate": 9.72e-06, + "loss": 3.034, + "step": 243 + }, + { + "epoch": 0.01, + "grad_norm": 0.7088952660560608, + "learning_rate": 9.760000000000001e-06, + "loss": 3.0089, + "step": 244 + }, + { + "epoch": 0.01, + "grad_norm": 0.7141646146774292, + "learning_rate": 9.800000000000001e-06, + "loss": 2.9914, + "step": 245 + }, + { + "epoch": 0.01, + "grad_norm": 0.7243670225143433, + "learning_rate": 9.84e-06, + "loss": 3.0275, + "step": 246 + }, + { + "epoch": 0.01, + "grad_norm": 0.7706453204154968, + "learning_rate": 9.88e-06, + "loss": 2.9697, + "step": 247 + }, + { + "epoch": 0.01, + "grad_norm": 0.7732541561126709, + "learning_rate": 9.920000000000002e-06, + "loss": 3.0194, + "step": 248 + }, + { + "epoch": 0.01, + "grad_norm": 0.7577429413795471, + "learning_rate": 9.960000000000001e-06, + "loss": 2.983, + "step": 249 + }, + { + "epoch": 0.01, + "grad_norm": 0.7420429587364197, + "learning_rate": 1e-05, + "loss": 3.0236, + "step": 250 + }, + { + "epoch": 0.01, + "grad_norm": 0.7250332236289978, + "learning_rate": 1.004e-05, + "loss": 3.0748, + "step": 251 + }, + { + "epoch": 0.01, + "grad_norm": 0.7337601780891418, + "learning_rate": 1.008e-05, + "loss": 2.95, + "step": 252 + }, + { + "epoch": 0.01, + "grad_norm": 0.7252420783042908, + "learning_rate": 1.0120000000000001e-05, + "loss": 2.9728, + "step": 253 + }, + { + "epoch": 0.01, + "grad_norm": 0.7484934329986572, + "learning_rate": 1.0160000000000001e-05, + "loss": 3.026, + "step": 254 + }, + { + "epoch": 0.01, + "grad_norm": 0.751584529876709, + "learning_rate": 1.02e-05, + "loss": 3.0389, + "step": 255 + }, + { + "epoch": 0.01, + "grad_norm": 0.716543972492218, + "learning_rate": 1.024e-05, + "loss": 2.9926, + "step": 256 + }, + { + "epoch": 0.01, + "grad_norm": 0.7203758955001831, + "learning_rate": 1.0280000000000002e-05, + "loss": 3.0239, + "step": 257 + }, + { + "epoch": 0.01, + "grad_norm": 0.7086358070373535, + "learning_rate": 1.0320000000000001e-05, + "loss": 2.9674, + "step": 258 + }, + { + "epoch": 0.01, + "grad_norm": 0.6801788210868835, + "learning_rate": 1.036e-05, + "loss": 2.9808, + "step": 259 + }, + { + "epoch": 0.01, + "grad_norm": 0.7126123309135437, + "learning_rate": 1.04e-05, + "loss": 3.0083, + "step": 260 + }, + { + "epoch": 0.01, + "grad_norm": 0.7446174025535583, + "learning_rate": 1.0440000000000002e-05, + "loss": 2.9397, + "step": 261 + }, + { + "epoch": 0.01, + "grad_norm": 0.7366334199905396, + "learning_rate": 1.0480000000000001e-05, + "loss": 3.0222, + "step": 262 + }, + { + "epoch": 0.01, + "grad_norm": 0.7224438786506653, + "learning_rate": 1.0520000000000001e-05, + "loss": 2.9666, + "step": 263 + }, + { + "epoch": 0.01, + "grad_norm": 0.7296943664550781, + "learning_rate": 1.056e-05, + "loss": 2.9767, + "step": 264 + }, + { + "epoch": 0.01, + "grad_norm": 0.7285534143447876, + "learning_rate": 1.0600000000000002e-05, + "loss": 2.9713, + "step": 265 + }, + { + "epoch": 0.01, + "grad_norm": 0.7207460999488831, + "learning_rate": 1.0640000000000001e-05, + "loss": 2.9924, + "step": 266 + }, + { + "epoch": 0.01, + "grad_norm": 0.7306897640228271, + "learning_rate": 1.0680000000000001e-05, + "loss": 3.0354, + "step": 267 + }, + { + "epoch": 0.01, + "grad_norm": 0.6970009803771973, + "learning_rate": 1.072e-05, + "loss": 2.9905, + "step": 268 + }, + { + "epoch": 0.01, + "grad_norm": 0.7516326904296875, + "learning_rate": 1.0760000000000002e-05, + "loss": 2.9387, + "step": 269 + }, + { + "epoch": 0.01, + "grad_norm": 0.7385818958282471, + "learning_rate": 1.0800000000000002e-05, + "loss": 2.95, + "step": 270 + }, + { + "epoch": 0.01, + "grad_norm": 0.7146047353744507, + "learning_rate": 1.0840000000000001e-05, + "loss": 2.929, + "step": 271 + }, + { + "epoch": 0.01, + "grad_norm": 0.7485090494155884, + "learning_rate": 1.0880000000000001e-05, + "loss": 2.932, + "step": 272 + }, + { + "epoch": 0.01, + "grad_norm": 0.7019323110580444, + "learning_rate": 1.0920000000000002e-05, + "loss": 3.0092, + "step": 273 + }, + { + "epoch": 0.01, + "grad_norm": 0.7071720361709595, + "learning_rate": 1.0960000000000002e-05, + "loss": 2.9729, + "step": 274 + }, + { + "epoch": 0.01, + "grad_norm": 0.7166799306869507, + "learning_rate": 1.1000000000000001e-05, + "loss": 2.9956, + "step": 275 + }, + { + "epoch": 0.01, + "grad_norm": 0.7043001055717468, + "learning_rate": 1.1040000000000001e-05, + "loss": 2.9648, + "step": 276 + }, + { + "epoch": 0.01, + "grad_norm": 0.6917253732681274, + "learning_rate": 1.1080000000000002e-05, + "loss": 3.01, + "step": 277 + }, + { + "epoch": 0.01, + "grad_norm": 0.7230256199836731, + "learning_rate": 1.1120000000000002e-05, + "loss": 2.9972, + "step": 278 + }, + { + "epoch": 0.01, + "grad_norm": 0.7071083188056946, + "learning_rate": 1.1160000000000002e-05, + "loss": 2.9082, + "step": 279 + }, + { + "epoch": 0.01, + "grad_norm": 0.7028868794441223, + "learning_rate": 1.1200000000000001e-05, + "loss": 2.9577, + "step": 280 + }, + { + "epoch": 0.01, + "grad_norm": 0.7002159357070923, + "learning_rate": 1.1240000000000002e-05, + "loss": 2.9465, + "step": 281 + }, + { + "epoch": 0.01, + "grad_norm": 0.7107498645782471, + "learning_rate": 1.128e-05, + "loss": 2.9748, + "step": 282 + }, + { + "epoch": 0.01, + "grad_norm": 0.7359018325805664, + "learning_rate": 1.132e-05, + "loss": 2.9724, + "step": 283 + }, + { + "epoch": 0.01, + "grad_norm": 0.7131576538085938, + "learning_rate": 1.136e-05, + "loss": 2.9494, + "step": 284 + }, + { + "epoch": 0.01, + "grad_norm": 0.7292269468307495, + "learning_rate": 1.14e-05, + "loss": 2.9838, + "step": 285 + }, + { + "epoch": 0.01, + "grad_norm": 0.7375213503837585, + "learning_rate": 1.144e-05, + "loss": 2.9809, + "step": 286 + }, + { + "epoch": 0.01, + "grad_norm": 0.7094799876213074, + "learning_rate": 1.148e-05, + "loss": 2.993, + "step": 287 + }, + { + "epoch": 0.01, + "grad_norm": 0.7429577708244324, + "learning_rate": 1.152e-05, + "loss": 2.8905, + "step": 288 + }, + { + "epoch": 0.01, + "grad_norm": 0.7328982353210449, + "learning_rate": 1.156e-05, + "loss": 2.9366, + "step": 289 + }, + { + "epoch": 0.01, + "grad_norm": 0.7140700221061707, + "learning_rate": 1.16e-05, + "loss": 2.9992, + "step": 290 + }, + { + "epoch": 0.01, + "grad_norm": 0.6961988210678101, + "learning_rate": 1.164e-05, + "loss": 2.8959, + "step": 291 + }, + { + "epoch": 0.01, + "grad_norm": 0.7188063263893127, + "learning_rate": 1.168e-05, + "loss": 2.9282, + "step": 292 + }, + { + "epoch": 0.01, + "grad_norm": 0.7033876180648804, + "learning_rate": 1.172e-05, + "loss": 2.9789, + "step": 293 + }, + { + "epoch": 0.01, + "grad_norm": 0.6968033313751221, + "learning_rate": 1.1760000000000001e-05, + "loss": 2.8764, + "step": 294 + }, + { + "epoch": 0.01, + "grad_norm": 0.7316171526908875, + "learning_rate": 1.18e-05, + "loss": 2.9158, + "step": 295 + }, + { + "epoch": 0.01, + "grad_norm": 0.7288230061531067, + "learning_rate": 1.184e-05, + "loss": 2.9427, + "step": 296 + }, + { + "epoch": 0.01, + "grad_norm": 0.7153042554855347, + "learning_rate": 1.188e-05, + "loss": 2.9563, + "step": 297 + }, + { + "epoch": 0.01, + "grad_norm": 0.6916014552116394, + "learning_rate": 1.1920000000000001e-05, + "loss": 2.9446, + "step": 298 + }, + { + "epoch": 0.01, + "grad_norm": 0.6955585479736328, + "learning_rate": 1.196e-05, + "loss": 2.9574, + "step": 299 + }, + { + "epoch": 0.01, + "grad_norm": 0.7189493179321289, + "learning_rate": 1.2e-05, + "loss": 2.9507, + "step": 300 + }, + { + "epoch": 0.01, + "grad_norm": 0.7120326161384583, + "learning_rate": 1.204e-05, + "loss": 2.9545, + "step": 301 + }, + { + "epoch": 0.01, + "grad_norm": 0.7113020420074463, + "learning_rate": 1.2080000000000001e-05, + "loss": 2.9529, + "step": 302 + }, + { + "epoch": 0.01, + "grad_norm": 0.6919089555740356, + "learning_rate": 1.2120000000000001e-05, + "loss": 2.922, + "step": 303 + }, + { + "epoch": 0.01, + "grad_norm": 0.6794406175613403, + "learning_rate": 1.216e-05, + "loss": 2.8967, + "step": 304 + }, + { + "epoch": 0.01, + "grad_norm": 0.6871897578239441, + "learning_rate": 1.22e-05, + "loss": 2.9408, + "step": 305 + }, + { + "epoch": 0.01, + "grad_norm": 0.6826269030570984, + "learning_rate": 1.2240000000000001e-05, + "loss": 2.9366, + "step": 306 + }, + { + "epoch": 0.01, + "grad_norm": 0.6960402131080627, + "learning_rate": 1.2280000000000001e-05, + "loss": 2.9211, + "step": 307 + }, + { + "epoch": 0.01, + "grad_norm": 0.7198580503463745, + "learning_rate": 1.232e-05, + "loss": 2.901, + "step": 308 + }, + { + "epoch": 0.01, + "grad_norm": 0.7064822912216187, + "learning_rate": 1.236e-05, + "loss": 3.0367, + "step": 309 + }, + { + "epoch": 0.01, + "grad_norm": 0.6707227230072021, + "learning_rate": 1.2400000000000002e-05, + "loss": 2.9083, + "step": 310 + }, + { + "epoch": 0.01, + "grad_norm": 0.696674644947052, + "learning_rate": 1.2440000000000001e-05, + "loss": 2.9226, + "step": 311 + }, + { + "epoch": 0.01, + "grad_norm": 0.6905208230018616, + "learning_rate": 1.248e-05, + "loss": 2.9, + "step": 312 + }, + { + "epoch": 0.01, + "grad_norm": 0.7072402238845825, + "learning_rate": 1.252e-05, + "loss": 2.9486, + "step": 313 + }, + { + "epoch": 0.01, + "grad_norm": 0.7020350098609924, + "learning_rate": 1.2560000000000002e-05, + "loss": 2.8765, + "step": 314 + }, + { + "epoch": 0.01, + "grad_norm": 0.6880193948745728, + "learning_rate": 1.2600000000000001e-05, + "loss": 2.9133, + "step": 315 + }, + { + "epoch": 0.01, + "grad_norm": 0.6863030791282654, + "learning_rate": 1.2640000000000001e-05, + "loss": 2.8506, + "step": 316 + }, + { + "epoch": 0.01, + "grad_norm": 0.7022314071655273, + "learning_rate": 1.268e-05, + "loss": 2.8672, + "step": 317 + }, + { + "epoch": 0.01, + "grad_norm": 0.7113156914710999, + "learning_rate": 1.2720000000000002e-05, + "loss": 2.9071, + "step": 318 + }, + { + "epoch": 0.01, + "grad_norm": 0.7123892903327942, + "learning_rate": 1.2760000000000001e-05, + "loss": 2.9003, + "step": 319 + }, + { + "epoch": 0.01, + "grad_norm": 0.7179726958274841, + "learning_rate": 1.2800000000000001e-05, + "loss": 2.986, + "step": 320 + }, + { + "epoch": 0.01, + "grad_norm": 0.7394989728927612, + "learning_rate": 1.284e-05, + "loss": 2.9379, + "step": 321 + }, + { + "epoch": 0.01, + "grad_norm": 0.6829982399940491, + "learning_rate": 1.2880000000000002e-05, + "loss": 2.8756, + "step": 322 + }, + { + "epoch": 0.01, + "grad_norm": 0.7195976972579956, + "learning_rate": 1.2920000000000002e-05, + "loss": 2.9435, + "step": 323 + }, + { + "epoch": 0.01, + "grad_norm": 0.7341179251670837, + "learning_rate": 1.2960000000000001e-05, + "loss": 2.9394, + "step": 324 + }, + { + "epoch": 0.01, + "grad_norm": 0.6563425064086914, + "learning_rate": 1.3000000000000001e-05, + "loss": 2.8949, + "step": 325 + }, + { + "epoch": 0.01, + "grad_norm": 0.6852063536643982, + "learning_rate": 1.3040000000000002e-05, + "loss": 2.8471, + "step": 326 + }, + { + "epoch": 0.01, + "grad_norm": 0.6816968321800232, + "learning_rate": 1.3080000000000002e-05, + "loss": 2.8727, + "step": 327 + }, + { + "epoch": 0.01, + "grad_norm": 0.6697700619697571, + "learning_rate": 1.3120000000000001e-05, + "loss": 2.8435, + "step": 328 + }, + { + "epoch": 0.01, + "grad_norm": 0.7308948636054993, + "learning_rate": 1.3160000000000001e-05, + "loss": 2.8462, + "step": 329 + }, + { + "epoch": 0.01, + "grad_norm": 0.6817566752433777, + "learning_rate": 1.3200000000000002e-05, + "loss": 2.8388, + "step": 330 + }, + { + "epoch": 0.01, + "grad_norm": 0.698800265789032, + "learning_rate": 1.3240000000000002e-05, + "loss": 2.8698, + "step": 331 + }, + { + "epoch": 0.01, + "grad_norm": 0.6767727136611938, + "learning_rate": 1.3280000000000002e-05, + "loss": 2.8912, + "step": 332 + }, + { + "epoch": 0.01, + "grad_norm": 0.6929364204406738, + "learning_rate": 1.3320000000000001e-05, + "loss": 2.8582, + "step": 333 + }, + { + "epoch": 0.01, + "grad_norm": 0.7088776230812073, + "learning_rate": 1.3360000000000003e-05, + "loss": 2.8587, + "step": 334 + }, + { + "epoch": 0.01, + "grad_norm": 0.6721728444099426, + "learning_rate": 1.3400000000000002e-05, + "loss": 2.8662, + "step": 335 + }, + { + "epoch": 0.01, + "grad_norm": 0.6921935677528381, + "learning_rate": 1.3440000000000002e-05, + "loss": 2.8704, + "step": 336 + }, + { + "epoch": 0.01, + "grad_norm": 0.6760930418968201, + "learning_rate": 1.3480000000000001e-05, + "loss": 2.8223, + "step": 337 + }, + { + "epoch": 0.01, + "grad_norm": 0.7038088440895081, + "learning_rate": 1.3520000000000003e-05, + "loss": 2.8716, + "step": 338 + }, + { + "epoch": 0.01, + "grad_norm": 0.6843159794807434, + "learning_rate": 1.3560000000000002e-05, + "loss": 2.832, + "step": 339 + }, + { + "epoch": 0.01, + "grad_norm": 0.7381107807159424, + "learning_rate": 1.3600000000000002e-05, + "loss": 2.8612, + "step": 340 + }, + { + "epoch": 0.01, + "grad_norm": 0.728920042514801, + "learning_rate": 1.3640000000000002e-05, + "loss": 2.8409, + "step": 341 + }, + { + "epoch": 0.01, + "grad_norm": 0.6590824127197266, + "learning_rate": 1.3680000000000003e-05, + "loss": 2.8014, + "step": 342 + }, + { + "epoch": 0.01, + "grad_norm": 0.7200900912284851, + "learning_rate": 1.3720000000000002e-05, + "loss": 2.8949, + "step": 343 + }, + { + "epoch": 0.01, + "grad_norm": 0.7507455348968506, + "learning_rate": 1.376e-05, + "loss": 2.8645, + "step": 344 + }, + { + "epoch": 0.01, + "grad_norm": 0.6519306898117065, + "learning_rate": 1.38e-05, + "loss": 2.8846, + "step": 345 + }, + { + "epoch": 0.01, + "grad_norm": 0.65754234790802, + "learning_rate": 1.384e-05, + "loss": 2.831, + "step": 346 + }, + { + "epoch": 0.01, + "grad_norm": 0.6758772730827332, + "learning_rate": 1.3880000000000001e-05, + "loss": 2.8777, + "step": 347 + }, + { + "epoch": 0.01, + "grad_norm": 0.6951612234115601, + "learning_rate": 1.392e-05, + "loss": 2.843, + "step": 348 + }, + { + "epoch": 0.01, + "grad_norm": 0.6868584156036377, + "learning_rate": 1.396e-05, + "loss": 2.8518, + "step": 349 + }, + { + "epoch": 0.01, + "grad_norm": 0.7019930481910706, + "learning_rate": 1.4e-05, + "loss": 2.8578, + "step": 350 + }, + { + "epoch": 0.01, + "grad_norm": 0.6601002216339111, + "learning_rate": 1.4040000000000001e-05, + "loss": 2.9024, + "step": 351 + }, + { + "epoch": 0.01, + "grad_norm": 0.6641451716423035, + "learning_rate": 1.408e-05, + "loss": 2.8361, + "step": 352 + }, + { + "epoch": 0.01, + "grad_norm": 0.6696758270263672, + "learning_rate": 1.412e-05, + "loss": 2.8426, + "step": 353 + }, + { + "epoch": 0.01, + "grad_norm": 0.6642027497291565, + "learning_rate": 1.416e-05, + "loss": 2.7937, + "step": 354 + }, + { + "epoch": 0.01, + "grad_norm": 0.6785578727722168, + "learning_rate": 1.4200000000000001e-05, + "loss": 2.8962, + "step": 355 + }, + { + "epoch": 0.01, + "grad_norm": 0.6706116795539856, + "learning_rate": 1.4240000000000001e-05, + "loss": 2.9164, + "step": 356 + }, + { + "epoch": 0.01, + "grad_norm": 0.6934748291969299, + "learning_rate": 1.428e-05, + "loss": 2.8039, + "step": 357 + }, + { + "epoch": 0.01, + "grad_norm": 0.6594704985618591, + "learning_rate": 1.432e-05, + "loss": 2.9253, + "step": 358 + }, + { + "epoch": 0.01, + "grad_norm": 0.6488956809043884, + "learning_rate": 1.4360000000000001e-05, + "loss": 2.856, + "step": 359 + }, + { + "epoch": 0.01, + "grad_norm": 0.6686562895774841, + "learning_rate": 1.4400000000000001e-05, + "loss": 2.811, + "step": 360 + }, + { + "epoch": 0.01, + "grad_norm": 0.6786785125732422, + "learning_rate": 1.444e-05, + "loss": 2.79, + "step": 361 + }, + { + "epoch": 0.01, + "grad_norm": 0.6907126307487488, + "learning_rate": 1.448e-05, + "loss": 2.8413, + "step": 362 + }, + { + "epoch": 0.01, + "grad_norm": 0.697150468826294, + "learning_rate": 1.4520000000000002e-05, + "loss": 2.9201, + "step": 363 + }, + { + "epoch": 0.01, + "grad_norm": 0.6734716296195984, + "learning_rate": 1.4560000000000001e-05, + "loss": 2.8742, + "step": 364 + }, + { + "epoch": 0.01, + "grad_norm": 0.6664982438087463, + "learning_rate": 1.46e-05, + "loss": 2.8202, + "step": 365 + }, + { + "epoch": 0.01, + "grad_norm": 0.69278484582901, + "learning_rate": 1.464e-05, + "loss": 2.8756, + "step": 366 + }, + { + "epoch": 0.01, + "grad_norm": 0.6641896963119507, + "learning_rate": 1.4680000000000002e-05, + "loss": 2.7929, + "step": 367 + }, + { + "epoch": 0.01, + "grad_norm": 0.6322620511054993, + "learning_rate": 1.4720000000000001e-05, + "loss": 2.8188, + "step": 368 + }, + { + "epoch": 0.01, + "grad_norm": 0.725165605545044, + "learning_rate": 1.4760000000000001e-05, + "loss": 2.8202, + "step": 369 + }, + { + "epoch": 0.01, + "grad_norm": 0.6899129748344421, + "learning_rate": 1.48e-05, + "loss": 2.8277, + "step": 370 + }, + { + "epoch": 0.01, + "grad_norm": 0.6651990413665771, + "learning_rate": 1.4840000000000002e-05, + "loss": 2.839, + "step": 371 + }, + { + "epoch": 0.01, + "grad_norm": 0.6614822149276733, + "learning_rate": 1.4880000000000002e-05, + "loss": 2.8896, + "step": 372 + }, + { + "epoch": 0.01, + "grad_norm": 0.6750214099884033, + "learning_rate": 1.4920000000000001e-05, + "loss": 2.7936, + "step": 373 + }, + { + "epoch": 0.01, + "grad_norm": 0.6761062741279602, + "learning_rate": 1.496e-05, + "loss": 2.8398, + "step": 374 + }, + { + "epoch": 0.01, + "grad_norm": 0.6523001194000244, + "learning_rate": 1.5000000000000002e-05, + "loss": 2.8743, + "step": 375 + }, + { + "epoch": 0.01, + "grad_norm": 0.6744080185890198, + "learning_rate": 1.5040000000000002e-05, + "loss": 2.8022, + "step": 376 + }, + { + "epoch": 0.01, + "grad_norm": 0.6425319314002991, + "learning_rate": 1.5080000000000001e-05, + "loss": 2.785, + "step": 377 + }, + { + "epoch": 0.01, + "grad_norm": 0.6872668266296387, + "learning_rate": 1.5120000000000001e-05, + "loss": 2.8434, + "step": 378 + }, + { + "epoch": 0.01, + "grad_norm": 0.6897056102752686, + "learning_rate": 1.516e-05, + "loss": 2.8262, + "step": 379 + }, + { + "epoch": 0.01, + "grad_norm": 0.6656489968299866, + "learning_rate": 1.5200000000000002e-05, + "loss": 2.8067, + "step": 380 + }, + { + "epoch": 0.01, + "grad_norm": 0.6480472683906555, + "learning_rate": 1.5240000000000001e-05, + "loss": 2.8246, + "step": 381 + }, + { + "epoch": 0.01, + "grad_norm": 0.6812193989753723, + "learning_rate": 1.5280000000000003e-05, + "loss": 2.7835, + "step": 382 + }, + { + "epoch": 0.01, + "grad_norm": 0.6591140031814575, + "learning_rate": 1.5320000000000002e-05, + "loss": 2.8388, + "step": 383 + }, + { + "epoch": 0.01, + "grad_norm": 0.6715266704559326, + "learning_rate": 1.5360000000000002e-05, + "loss": 2.8536, + "step": 384 + }, + { + "epoch": 0.01, + "grad_norm": 0.7042847275733948, + "learning_rate": 1.54e-05, + "loss": 2.7849, + "step": 385 + }, + { + "epoch": 0.01, + "grad_norm": 0.6863837242126465, + "learning_rate": 1.544e-05, + "loss": 2.8218, + "step": 386 + }, + { + "epoch": 0.01, + "grad_norm": 0.6509554982185364, + "learning_rate": 1.548e-05, + "loss": 2.8161, + "step": 387 + }, + { + "epoch": 0.01, + "grad_norm": 0.6764253973960876, + "learning_rate": 1.552e-05, + "loss": 2.8487, + "step": 388 + }, + { + "epoch": 0.01, + "grad_norm": 0.6829914450645447, + "learning_rate": 1.556e-05, + "loss": 2.8393, + "step": 389 + }, + { + "epoch": 0.01, + "grad_norm": 0.6526641249656677, + "learning_rate": 1.5600000000000003e-05, + "loss": 2.7928, + "step": 390 + }, + { + "epoch": 0.01, + "grad_norm": 0.6726005673408508, + "learning_rate": 1.5640000000000003e-05, + "loss": 2.7424, + "step": 391 + }, + { + "epoch": 0.01, + "grad_norm": 0.6745621562004089, + "learning_rate": 1.5680000000000002e-05, + "loss": 2.8066, + "step": 392 + }, + { + "epoch": 0.01, + "grad_norm": 0.7079181671142578, + "learning_rate": 1.5720000000000002e-05, + "loss": 2.8011, + "step": 393 + }, + { + "epoch": 0.01, + "grad_norm": 0.7400582432746887, + "learning_rate": 1.576e-05, + "loss": 2.8468, + "step": 394 + }, + { + "epoch": 0.01, + "grad_norm": 0.6972211003303528, + "learning_rate": 1.58e-05, + "loss": 2.864, + "step": 395 + }, + { + "epoch": 0.01, + "grad_norm": 0.6685325503349304, + "learning_rate": 1.584e-05, + "loss": 2.808, + "step": 396 + }, + { + "epoch": 0.01, + "grad_norm": 0.6581268310546875, + "learning_rate": 1.588e-05, + "loss": 2.8589, + "step": 397 + }, + { + "epoch": 0.01, + "grad_norm": 0.6619047522544861, + "learning_rate": 1.5920000000000003e-05, + "loss": 2.8215, + "step": 398 + }, + { + "epoch": 0.01, + "grad_norm": 0.6885353326797485, + "learning_rate": 1.5960000000000003e-05, + "loss": 2.8224, + "step": 399 + }, + { + "epoch": 0.01, + "grad_norm": 0.6520381569862366, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.88, + "step": 400 + }, + { + "epoch": 0.01, + "grad_norm": 0.6369414925575256, + "learning_rate": 1.6040000000000002e-05, + "loss": 2.7924, + "step": 401 + }, + { + "epoch": 0.01, + "grad_norm": 0.733415424823761, + "learning_rate": 1.6080000000000002e-05, + "loss": 2.7917, + "step": 402 + }, + { + "epoch": 0.01, + "grad_norm": 0.6861952543258667, + "learning_rate": 1.612e-05, + "loss": 2.8369, + "step": 403 + }, + { + "epoch": 0.01, + "grad_norm": 0.6843462586402893, + "learning_rate": 1.616e-05, + "loss": 2.8397, + "step": 404 + }, + { + "epoch": 0.01, + "grad_norm": 0.6312543153762817, + "learning_rate": 1.62e-05, + "loss": 2.7933, + "step": 405 + }, + { + "epoch": 0.01, + "grad_norm": 0.6630678176879883, + "learning_rate": 1.6240000000000004e-05, + "loss": 2.7953, + "step": 406 + }, + { + "epoch": 0.01, + "grad_norm": 0.6870042681694031, + "learning_rate": 1.628e-05, + "loss": 2.8, + "step": 407 + }, + { + "epoch": 0.01, + "grad_norm": 0.6712439060211182, + "learning_rate": 1.632e-05, + "loss": 2.8001, + "step": 408 + }, + { + "epoch": 0.01, + "grad_norm": 0.6623268127441406, + "learning_rate": 1.636e-05, + "loss": 2.8163, + "step": 409 + }, + { + "epoch": 0.01, + "grad_norm": 0.7159444689750671, + "learning_rate": 1.64e-05, + "loss": 2.8255, + "step": 410 + }, + { + "epoch": 0.01, + "grad_norm": 0.6556423902511597, + "learning_rate": 1.6440000000000002e-05, + "loss": 2.7916, + "step": 411 + }, + { + "epoch": 0.01, + "grad_norm": 0.7008485794067383, + "learning_rate": 1.648e-05, + "loss": 2.7285, + "step": 412 + }, + { + "epoch": 0.01, + "grad_norm": 0.686048686504364, + "learning_rate": 1.652e-05, + "loss": 2.7355, + "step": 413 + }, + { + "epoch": 0.01, + "grad_norm": 0.6576399207115173, + "learning_rate": 1.656e-05, + "loss": 2.7946, + "step": 414 + }, + { + "epoch": 0.01, + "grad_norm": 0.6346274018287659, + "learning_rate": 1.66e-05, + "loss": 2.8302, + "step": 415 + }, + { + "epoch": 0.01, + "grad_norm": 0.678077220916748, + "learning_rate": 1.664e-05, + "loss": 2.7957, + "step": 416 + }, + { + "epoch": 0.01, + "grad_norm": 0.6638278365135193, + "learning_rate": 1.668e-05, + "loss": 2.7873, + "step": 417 + }, + { + "epoch": 0.01, + "grad_norm": 0.7128313183784485, + "learning_rate": 1.672e-05, + "loss": 2.7681, + "step": 418 + }, + { + "epoch": 0.01, + "grad_norm": 0.662057638168335, + "learning_rate": 1.6760000000000002e-05, + "loss": 2.7985, + "step": 419 + }, + { + "epoch": 0.01, + "grad_norm": 0.6595914959907532, + "learning_rate": 1.6800000000000002e-05, + "loss": 2.7548, + "step": 420 + }, + { + "epoch": 0.01, + "grad_norm": 0.6682320237159729, + "learning_rate": 1.684e-05, + "loss": 2.7219, + "step": 421 + }, + { + "epoch": 0.01, + "grad_norm": 0.6644014120101929, + "learning_rate": 1.688e-05, + "loss": 2.7981, + "step": 422 + }, + { + "epoch": 0.01, + "grad_norm": 0.6698048114776611, + "learning_rate": 1.692e-05, + "loss": 2.7955, + "step": 423 + }, + { + "epoch": 0.01, + "grad_norm": 0.6910617351531982, + "learning_rate": 1.696e-05, + "loss": 2.7795, + "step": 424 + }, + { + "epoch": 0.01, + "grad_norm": 0.6983128786087036, + "learning_rate": 1.7e-05, + "loss": 2.7893, + "step": 425 + }, + { + "epoch": 0.01, + "grad_norm": 0.6963634490966797, + "learning_rate": 1.704e-05, + "loss": 2.754, + "step": 426 + }, + { + "epoch": 0.01, + "grad_norm": 0.6780691742897034, + "learning_rate": 1.7080000000000002e-05, + "loss": 2.777, + "step": 427 + }, + { + "epoch": 0.01, + "grad_norm": 0.6582500338554382, + "learning_rate": 1.7120000000000002e-05, + "loss": 2.7576, + "step": 428 + }, + { + "epoch": 0.01, + "grad_norm": 0.6800390481948853, + "learning_rate": 1.7160000000000002e-05, + "loss": 2.8126, + "step": 429 + }, + { + "epoch": 0.01, + "grad_norm": 0.6702778339385986, + "learning_rate": 1.72e-05, + "loss": 2.7482, + "step": 430 + }, + { + "epoch": 0.01, + "grad_norm": 0.6659464240074158, + "learning_rate": 1.724e-05, + "loss": 2.803, + "step": 431 + }, + { + "epoch": 0.01, + "grad_norm": 0.6686845421791077, + "learning_rate": 1.728e-05, + "loss": 2.7626, + "step": 432 + }, + { + "epoch": 0.01, + "grad_norm": 0.6473426818847656, + "learning_rate": 1.732e-05, + "loss": 2.7717, + "step": 433 + }, + { + "epoch": 0.01, + "grad_norm": 0.7178492546081543, + "learning_rate": 1.736e-05, + "loss": 2.8441, + "step": 434 + }, + { + "epoch": 0.01, + "grad_norm": 0.6490538716316223, + "learning_rate": 1.7400000000000003e-05, + "loss": 2.7456, + "step": 435 + }, + { + "epoch": 0.01, + "grad_norm": 0.6390330791473389, + "learning_rate": 1.7440000000000002e-05, + "loss": 2.7918, + "step": 436 + }, + { + "epoch": 0.01, + "grad_norm": 0.6829019784927368, + "learning_rate": 1.7480000000000002e-05, + "loss": 2.8072, + "step": 437 + }, + { + "epoch": 0.01, + "grad_norm": 0.6568471193313599, + "learning_rate": 1.752e-05, + "loss": 2.824, + "step": 438 + }, + { + "epoch": 0.01, + "grad_norm": 0.6428776383399963, + "learning_rate": 1.756e-05, + "loss": 2.7419, + "step": 439 + }, + { + "epoch": 0.01, + "grad_norm": 0.654801607131958, + "learning_rate": 1.76e-05, + "loss": 2.7541, + "step": 440 + }, + { + "epoch": 0.01, + "grad_norm": 0.7071441411972046, + "learning_rate": 1.764e-05, + "loss": 2.7414, + "step": 441 + }, + { + "epoch": 0.01, + "grad_norm": 0.672399640083313, + "learning_rate": 1.768e-05, + "loss": 2.7646, + "step": 442 + }, + { + "epoch": 0.01, + "grad_norm": 0.655575692653656, + "learning_rate": 1.7720000000000003e-05, + "loss": 2.7776, + "step": 443 + }, + { + "epoch": 0.01, + "grad_norm": 0.6810017228126526, + "learning_rate": 1.7760000000000003e-05, + "loss": 2.8182, + "step": 444 + }, + { + "epoch": 0.01, + "grad_norm": 0.7078613042831421, + "learning_rate": 1.7800000000000002e-05, + "loss": 2.8253, + "step": 445 + }, + { + "epoch": 0.01, + "grad_norm": 0.7234245538711548, + "learning_rate": 1.7840000000000002e-05, + "loss": 2.7853, + "step": 446 + }, + { + "epoch": 0.01, + "grad_norm": 0.662403404712677, + "learning_rate": 1.788e-05, + "loss": 2.7444, + "step": 447 + }, + { + "epoch": 0.01, + "grad_norm": 0.6751728057861328, + "learning_rate": 1.792e-05, + "loss": 2.7368, + "step": 448 + }, + { + "epoch": 0.01, + "grad_norm": 0.6327471733093262, + "learning_rate": 1.796e-05, + "loss": 2.7995, + "step": 449 + }, + { + "epoch": 0.01, + "grad_norm": 0.6822514533996582, + "learning_rate": 1.8e-05, + "loss": 2.8227, + "step": 450 + }, + { + "epoch": 0.02, + "grad_norm": 0.6658573746681213, + "learning_rate": 1.8040000000000003e-05, + "loss": 2.7752, + "step": 451 + }, + { + "epoch": 0.02, + "grad_norm": 0.6631488800048828, + "learning_rate": 1.8080000000000003e-05, + "loss": 2.7549, + "step": 452 + }, + { + "epoch": 0.02, + "grad_norm": 0.6481293439865112, + "learning_rate": 1.8120000000000003e-05, + "loss": 2.8309, + "step": 453 + }, + { + "epoch": 0.02, + "grad_norm": 0.657621443271637, + "learning_rate": 1.8160000000000002e-05, + "loss": 2.7902, + "step": 454 + }, + { + "epoch": 0.02, + "grad_norm": 0.6684351563453674, + "learning_rate": 1.8200000000000002e-05, + "loss": 2.7507, + "step": 455 + }, + { + "epoch": 0.02, + "grad_norm": 0.6583429574966431, + "learning_rate": 1.824e-05, + "loss": 2.7849, + "step": 456 + }, + { + "epoch": 0.02, + "grad_norm": 0.6741542220115662, + "learning_rate": 1.828e-05, + "loss": 2.7141, + "step": 457 + }, + { + "epoch": 0.02, + "grad_norm": 0.675751805305481, + "learning_rate": 1.832e-05, + "loss": 2.7368, + "step": 458 + }, + { + "epoch": 0.02, + "grad_norm": 0.7003194093704224, + "learning_rate": 1.8360000000000004e-05, + "loss": 2.7647, + "step": 459 + }, + { + "epoch": 0.02, + "grad_norm": 0.6883391737937927, + "learning_rate": 1.8400000000000003e-05, + "loss": 2.725, + "step": 460 + }, + { + "epoch": 0.02, + "grad_norm": 0.6776552200317383, + "learning_rate": 1.8440000000000003e-05, + "loss": 2.7805, + "step": 461 + }, + { + "epoch": 0.02, + "grad_norm": 0.6525712013244629, + "learning_rate": 1.8480000000000003e-05, + "loss": 2.766, + "step": 462 + }, + { + "epoch": 0.02, + "grad_norm": 0.7016055583953857, + "learning_rate": 1.8520000000000002e-05, + "loss": 2.7154, + "step": 463 + }, + { + "epoch": 0.02, + "grad_norm": 0.6612386107444763, + "learning_rate": 1.8560000000000002e-05, + "loss": 2.7324, + "step": 464 + }, + { + "epoch": 0.02, + "grad_norm": 0.6514604687690735, + "learning_rate": 1.86e-05, + "loss": 2.7433, + "step": 465 + }, + { + "epoch": 0.02, + "grad_norm": 0.6845662593841553, + "learning_rate": 1.864e-05, + "loss": 2.7655, + "step": 466 + }, + { + "epoch": 0.02, + "grad_norm": 0.6435591578483582, + "learning_rate": 1.8680000000000004e-05, + "loss": 2.7784, + "step": 467 + }, + { + "epoch": 0.02, + "grad_norm": 0.7050468325614929, + "learning_rate": 1.8720000000000004e-05, + "loss": 2.7106, + "step": 468 + }, + { + "epoch": 0.02, + "grad_norm": 0.6791504621505737, + "learning_rate": 1.876e-05, + "loss": 2.7462, + "step": 469 + }, + { + "epoch": 0.02, + "grad_norm": 0.6569073796272278, + "learning_rate": 1.88e-05, + "loss": 2.758, + "step": 470 + }, + { + "epoch": 0.02, + "grad_norm": 0.6673763990402222, + "learning_rate": 1.884e-05, + "loss": 2.7988, + "step": 471 + }, + { + "epoch": 0.02, + "grad_norm": 0.6800898313522339, + "learning_rate": 1.8880000000000002e-05, + "loss": 2.7622, + "step": 472 + }, + { + "epoch": 0.02, + "grad_norm": 0.6763074994087219, + "learning_rate": 1.8920000000000002e-05, + "loss": 2.7525, + "step": 473 + }, + { + "epoch": 0.02, + "grad_norm": 0.6501556634902954, + "learning_rate": 1.896e-05, + "loss": 2.7921, + "step": 474 + }, + { + "epoch": 0.02, + "grad_norm": 0.6746877431869507, + "learning_rate": 1.9e-05, + "loss": 2.7625, + "step": 475 + }, + { + "epoch": 0.02, + "grad_norm": 0.6819021701812744, + "learning_rate": 1.904e-05, + "loss": 2.697, + "step": 476 + }, + { + "epoch": 0.02, + "grad_norm": 0.664782702922821, + "learning_rate": 1.908e-05, + "loss": 2.727, + "step": 477 + }, + { + "epoch": 0.02, + "grad_norm": 0.6812900304794312, + "learning_rate": 1.912e-05, + "loss": 2.7274, + "step": 478 + }, + { + "epoch": 0.02, + "grad_norm": 0.6716505289077759, + "learning_rate": 1.916e-05, + "loss": 2.7538, + "step": 479 + }, + { + "epoch": 0.02, + "grad_norm": 0.7137351632118225, + "learning_rate": 1.9200000000000003e-05, + "loss": 2.8465, + "step": 480 + }, + { + "epoch": 0.02, + "grad_norm": 0.6622009873390198, + "learning_rate": 1.9240000000000002e-05, + "loss": 2.701, + "step": 481 + }, + { + "epoch": 0.02, + "grad_norm": 0.6714833974838257, + "learning_rate": 1.9280000000000002e-05, + "loss": 2.6702, + "step": 482 + }, + { + "epoch": 0.02, + "grad_norm": 0.6557419300079346, + "learning_rate": 1.932e-05, + "loss": 2.7614, + "step": 483 + }, + { + "epoch": 0.02, + "grad_norm": 0.6650912761688232, + "learning_rate": 1.936e-05, + "loss": 2.7574, + "step": 484 + }, + { + "epoch": 0.02, + "grad_norm": 0.6602796912193298, + "learning_rate": 1.94e-05, + "loss": 2.681, + "step": 485 + }, + { + "epoch": 0.02, + "grad_norm": 0.6280622482299805, + "learning_rate": 1.944e-05, + "loss": 2.7098, + "step": 486 + }, + { + "epoch": 0.02, + "grad_norm": 0.6766891479492188, + "learning_rate": 1.948e-05, + "loss": 2.7737, + "step": 487 + }, + { + "epoch": 0.02, + "grad_norm": 0.6489236950874329, + "learning_rate": 1.9520000000000003e-05, + "loss": 2.7599, + "step": 488 + }, + { + "epoch": 0.02, + "grad_norm": 0.6757470965385437, + "learning_rate": 1.9560000000000002e-05, + "loss": 2.7535, + "step": 489 + }, + { + "epoch": 0.02, + "grad_norm": 0.696772038936615, + "learning_rate": 1.9600000000000002e-05, + "loss": 2.7023, + "step": 490 + }, + { + "epoch": 0.02, + "grad_norm": 0.6625874638557434, + "learning_rate": 1.9640000000000002e-05, + "loss": 2.7597, + "step": 491 + }, + { + "epoch": 0.02, + "grad_norm": 0.6646535992622375, + "learning_rate": 1.968e-05, + "loss": 2.7365, + "step": 492 + }, + { + "epoch": 0.02, + "grad_norm": 0.6632100343704224, + "learning_rate": 1.972e-05, + "loss": 2.796, + "step": 493 + }, + { + "epoch": 0.02, + "grad_norm": 0.6804953217506409, + "learning_rate": 1.976e-05, + "loss": 2.7826, + "step": 494 + }, + { + "epoch": 0.02, + "grad_norm": 0.6702829599380493, + "learning_rate": 1.98e-05, + "loss": 2.736, + "step": 495 + }, + { + "epoch": 0.02, + "grad_norm": 0.667017936706543, + "learning_rate": 1.9840000000000003e-05, + "loss": 2.7403, + "step": 496 + }, + { + "epoch": 0.02, + "grad_norm": 0.6766886711120605, + "learning_rate": 1.9880000000000003e-05, + "loss": 2.7651, + "step": 497 + }, + { + "epoch": 0.02, + "grad_norm": 0.6707794666290283, + "learning_rate": 1.9920000000000002e-05, + "loss": 2.7039, + "step": 498 + }, + { + "epoch": 0.02, + "grad_norm": 0.6904656291007996, + "learning_rate": 1.9960000000000002e-05, + "loss": 2.6754, + "step": 499 + }, + { + "epoch": 0.02, + "grad_norm": 0.6930393576622009, + "learning_rate": 2e-05, + "loss": 2.7282, + "step": 500 + }, + { + "epoch": 0.02, + "grad_norm": 0.6510630249977112, + "learning_rate": 1.9999999943512933e-05, + "loss": 2.6839, + "step": 501 + }, + { + "epoch": 0.02, + "grad_norm": 0.74442458152771, + "learning_rate": 1.9999999774051723e-05, + "loss": 2.6939, + "step": 502 + }, + { + "epoch": 0.02, + "grad_norm": 0.6496220827102661, + "learning_rate": 1.9999999491616376e-05, + "loss": 2.7106, + "step": 503 + }, + { + "epoch": 0.02, + "grad_norm": 0.686384916305542, + "learning_rate": 1.99999990962069e-05, + "loss": 2.703, + "step": 504 + }, + { + "epoch": 0.02, + "grad_norm": 0.6780949234962463, + "learning_rate": 1.999999858782329e-05, + "loss": 2.7165, + "step": 505 + }, + { + "epoch": 0.02, + "grad_norm": 0.6805633306503296, + "learning_rate": 1.999999796646556e-05, + "loss": 2.7438, + "step": 506 + }, + { + "epoch": 0.02, + "grad_norm": 0.6860983967781067, + "learning_rate": 1.999999723213371e-05, + "loss": 2.6235, + "step": 507 + }, + { + "epoch": 0.02, + "grad_norm": 0.6692408919334412, + "learning_rate": 1.9999996384827757e-05, + "loss": 2.6712, + "step": 508 + }, + { + "epoch": 0.02, + "grad_norm": 0.676289439201355, + "learning_rate": 1.99999954245477e-05, + "loss": 2.7818, + "step": 509 + }, + { + "epoch": 0.02, + "grad_norm": 0.6712286472320557, + "learning_rate": 1.9999994351293557e-05, + "loss": 2.7149, + "step": 510 + }, + { + "epoch": 0.02, + "grad_norm": 0.6756742000579834, + "learning_rate": 1.9999993165065338e-05, + "loss": 2.7127, + "step": 511 + }, + { + "epoch": 0.02, + "grad_norm": 0.6648697853088379, + "learning_rate": 1.999999186586306e-05, + "loss": 2.725, + "step": 512 + }, + { + "epoch": 0.02, + "grad_norm": 0.6322717666625977, + "learning_rate": 1.999999045368673e-05, + "loss": 2.7108, + "step": 513 + }, + { + "epoch": 0.02, + "grad_norm": 0.6628721952438354, + "learning_rate": 1.999998892853637e-05, + "loss": 2.7401, + "step": 514 + }, + { + "epoch": 0.02, + "grad_norm": 0.6629957556724548, + "learning_rate": 1.9999987290411998e-05, + "loss": 2.6868, + "step": 515 + }, + { + "epoch": 0.02, + "grad_norm": 0.665867269039154, + "learning_rate": 1.9999985539313627e-05, + "loss": 2.7289, + "step": 516 + }, + { + "epoch": 0.02, + "grad_norm": 0.6519331336021423, + "learning_rate": 1.999998367524128e-05, + "loss": 2.7796, + "step": 517 + }, + { + "epoch": 0.02, + "grad_norm": 0.6539009213447571, + "learning_rate": 1.999998169819498e-05, + "loss": 2.7258, + "step": 518 + }, + { + "epoch": 0.02, + "grad_norm": 0.6564478278160095, + "learning_rate": 1.9999979608174746e-05, + "loss": 2.7301, + "step": 519 + }, + { + "epoch": 0.02, + "grad_norm": 0.6618817448616028, + "learning_rate": 1.9999977405180604e-05, + "loss": 2.7535, + "step": 520 + }, + { + "epoch": 0.02, + "grad_norm": 0.6433423161506653, + "learning_rate": 1.9999975089212576e-05, + "loss": 2.7296, + "step": 521 + }, + { + "epoch": 0.02, + "grad_norm": 0.6719511151313782, + "learning_rate": 1.9999972660270695e-05, + "loss": 2.7366, + "step": 522 + }, + { + "epoch": 0.02, + "grad_norm": 0.6653806567192078, + "learning_rate": 1.9999970118354978e-05, + "loss": 2.7508, + "step": 523 + }, + { + "epoch": 0.02, + "grad_norm": 0.712948739528656, + "learning_rate": 1.999996746346546e-05, + "loss": 2.7074, + "step": 524 + }, + { + "epoch": 0.02, + "grad_norm": 0.6509090065956116, + "learning_rate": 1.9999964695602172e-05, + "loss": 2.72, + "step": 525 + }, + { + "epoch": 0.02, + "grad_norm": 0.6476247906684875, + "learning_rate": 1.9999961814765144e-05, + "loss": 2.665, + "step": 526 + }, + { + "epoch": 0.02, + "grad_norm": 0.6574861407279968, + "learning_rate": 1.9999958820954405e-05, + "loss": 2.7054, + "step": 527 + }, + { + "epoch": 0.02, + "grad_norm": 0.6908071637153625, + "learning_rate": 1.9999955714169994e-05, + "loss": 2.6794, + "step": 528 + }, + { + "epoch": 0.02, + "grad_norm": 0.6708104610443115, + "learning_rate": 1.9999952494411942e-05, + "loss": 2.7726, + "step": 529 + }, + { + "epoch": 0.02, + "grad_norm": 0.6745717525482178, + "learning_rate": 1.999994916168029e-05, + "loss": 2.6955, + "step": 530 + }, + { + "epoch": 0.02, + "grad_norm": 0.6689810752868652, + "learning_rate": 1.9999945715975068e-05, + "loss": 2.6382, + "step": 531 + }, + { + "epoch": 0.02, + "grad_norm": 0.6525925397872925, + "learning_rate": 1.9999942157296325e-05, + "loss": 2.715, + "step": 532 + }, + { + "epoch": 0.02, + "grad_norm": 0.6486679911613464, + "learning_rate": 1.9999938485644096e-05, + "loss": 2.7475, + "step": 533 + }, + { + "epoch": 0.02, + "grad_norm": 0.6664499640464783, + "learning_rate": 1.999993470101842e-05, + "loss": 2.7645, + "step": 534 + }, + { + "epoch": 0.02, + "grad_norm": 0.6587504744529724, + "learning_rate": 1.999993080341934e-05, + "loss": 2.699, + "step": 535 + }, + { + "epoch": 0.02, + "grad_norm": 0.6723874807357788, + "learning_rate": 1.9999926792846907e-05, + "loss": 2.7214, + "step": 536 + }, + { + "epoch": 0.02, + "grad_norm": 0.674460232257843, + "learning_rate": 1.999992266930116e-05, + "loss": 2.747, + "step": 537 + }, + { + "epoch": 0.02, + "grad_norm": 0.7156085968017578, + "learning_rate": 1.9999918432782147e-05, + "loss": 2.7014, + "step": 538 + }, + { + "epoch": 0.02, + "grad_norm": 0.6768877506256104, + "learning_rate": 1.9999914083289918e-05, + "loss": 2.645, + "step": 539 + }, + { + "epoch": 0.02, + "grad_norm": 0.6726024746894836, + "learning_rate": 1.9999909620824517e-05, + "loss": 2.6898, + "step": 540 + }, + { + "epoch": 0.02, + "grad_norm": 0.6556665301322937, + "learning_rate": 1.9999905045386e-05, + "loss": 2.7152, + "step": 541 + }, + { + "epoch": 0.02, + "grad_norm": 0.6767259836196899, + "learning_rate": 1.9999900356974417e-05, + "loss": 2.6983, + "step": 542 + }, + { + "epoch": 0.02, + "grad_norm": 0.6736220121383667, + "learning_rate": 1.9999895555589816e-05, + "loss": 2.6764, + "step": 543 + }, + { + "epoch": 0.02, + "grad_norm": 0.6530930399894714, + "learning_rate": 1.9999890641232256e-05, + "loss": 2.7439, + "step": 544 + }, + { + "epoch": 0.02, + "grad_norm": 0.6498753428459167, + "learning_rate": 1.9999885613901796e-05, + "loss": 2.6926, + "step": 545 + }, + { + "epoch": 0.02, + "grad_norm": 0.6203898191452026, + "learning_rate": 1.9999880473598486e-05, + "loss": 2.6375, + "step": 546 + }, + { + "epoch": 0.02, + "grad_norm": 0.6569789052009583, + "learning_rate": 1.999987522032239e-05, + "loss": 2.676, + "step": 547 + }, + { + "epoch": 0.02, + "grad_norm": 0.6854923367500305, + "learning_rate": 1.9999869854073563e-05, + "loss": 2.676, + "step": 548 + }, + { + "epoch": 0.02, + "grad_norm": 0.6644775867462158, + "learning_rate": 1.9999864374852065e-05, + "loss": 2.6663, + "step": 549 + }, + { + "epoch": 0.02, + "grad_norm": 0.6881424188613892, + "learning_rate": 1.9999858782657962e-05, + "loss": 2.7408, + "step": 550 + }, + { + "epoch": 0.02, + "grad_norm": 0.6853224635124207, + "learning_rate": 1.9999853077491313e-05, + "loss": 2.6661, + "step": 551 + }, + { + "epoch": 0.02, + "grad_norm": 0.6817418932914734, + "learning_rate": 1.9999847259352188e-05, + "loss": 2.6988, + "step": 552 + }, + { + "epoch": 0.02, + "grad_norm": 0.667364239692688, + "learning_rate": 1.9999841328240642e-05, + "loss": 2.6994, + "step": 553 + }, + { + "epoch": 0.02, + "grad_norm": 0.6320204138755798, + "learning_rate": 1.9999835284156755e-05, + "loss": 2.6949, + "step": 554 + }, + { + "epoch": 0.02, + "grad_norm": 0.6620521545410156, + "learning_rate": 1.999982912710059e-05, + "loss": 2.7165, + "step": 555 + }, + { + "epoch": 0.02, + "grad_norm": 0.6537284255027771, + "learning_rate": 1.9999822857072214e-05, + "loss": 2.7449, + "step": 556 + }, + { + "epoch": 0.02, + "grad_norm": 0.6808788776397705, + "learning_rate": 1.9999816474071698e-05, + "loss": 2.7075, + "step": 557 + }, + { + "epoch": 0.02, + "grad_norm": 0.6649172306060791, + "learning_rate": 1.999980997809912e-05, + "loss": 2.7462, + "step": 558 + }, + { + "epoch": 0.02, + "grad_norm": 0.6705018877983093, + "learning_rate": 1.9999803369154546e-05, + "loss": 2.676, + "step": 559 + }, + { + "epoch": 0.02, + "grad_norm": 0.6761060953140259, + "learning_rate": 1.999979664723806e-05, + "loss": 2.6689, + "step": 560 + }, + { + "epoch": 0.02, + "grad_norm": 0.6275860071182251, + "learning_rate": 1.999978981234972e-05, + "loss": 2.7209, + "step": 561 + }, + { + "epoch": 0.02, + "grad_norm": 0.6734023690223694, + "learning_rate": 1.9999782864489624e-05, + "loss": 2.7057, + "step": 562 + }, + { + "epoch": 0.02, + "grad_norm": 0.67657470703125, + "learning_rate": 1.9999775803657843e-05, + "loss": 2.7356, + "step": 563 + }, + { + "epoch": 0.02, + "grad_norm": 0.6549201607704163, + "learning_rate": 1.999976862985445e-05, + "loss": 2.7061, + "step": 564 + }, + { + "epoch": 0.02, + "grad_norm": 0.6668903827667236, + "learning_rate": 1.9999761343079536e-05, + "loss": 2.7054, + "step": 565 + }, + { + "epoch": 0.02, + "grad_norm": 0.6936042904853821, + "learning_rate": 1.9999753943333173e-05, + "loss": 2.6769, + "step": 566 + }, + { + "epoch": 0.02, + "grad_norm": 0.6518430113792419, + "learning_rate": 1.9999746430615454e-05, + "loss": 2.6028, + "step": 567 + }, + { + "epoch": 0.02, + "grad_norm": 0.6561490893363953, + "learning_rate": 1.9999738804926463e-05, + "loss": 2.7336, + "step": 568 + }, + { + "epoch": 0.02, + "grad_norm": 0.6570992469787598, + "learning_rate": 1.999973106626628e-05, + "loss": 2.6616, + "step": 569 + }, + { + "epoch": 0.02, + "grad_norm": 0.6535264253616333, + "learning_rate": 1.9999723214634995e-05, + "loss": 2.7044, + "step": 570 + }, + { + "epoch": 0.02, + "grad_norm": 0.66302490234375, + "learning_rate": 1.99997152500327e-05, + "loss": 2.7032, + "step": 571 + }, + { + "epoch": 0.02, + "grad_norm": 0.6635570526123047, + "learning_rate": 1.9999707172459483e-05, + "loss": 2.6611, + "step": 572 + }, + { + "epoch": 0.02, + "grad_norm": 0.6971732974052429, + "learning_rate": 1.999969898191543e-05, + "loss": 2.6773, + "step": 573 + }, + { + "epoch": 0.02, + "grad_norm": 0.6466277837753296, + "learning_rate": 1.9999690678400644e-05, + "loss": 2.6378, + "step": 574 + }, + { + "epoch": 0.02, + "grad_norm": 0.6631940007209778, + "learning_rate": 1.9999682261915213e-05, + "loss": 2.6803, + "step": 575 + }, + { + "epoch": 0.02, + "grad_norm": 0.6431226134300232, + "learning_rate": 1.9999673732459226e-05, + "loss": 2.6544, + "step": 576 + }, + { + "epoch": 0.02, + "grad_norm": 0.7220097184181213, + "learning_rate": 1.999966509003279e-05, + "loss": 2.6962, + "step": 577 + }, + { + "epoch": 0.02, + "grad_norm": 0.6664537191390991, + "learning_rate": 1.9999656334636e-05, + "loss": 2.6991, + "step": 578 + }, + { + "epoch": 0.02, + "grad_norm": 0.6441308259963989, + "learning_rate": 1.999964746626895e-05, + "loss": 2.6741, + "step": 579 + }, + { + "epoch": 0.02, + "grad_norm": 0.658420205116272, + "learning_rate": 1.9999638484931744e-05, + "loss": 2.6795, + "step": 580 + }, + { + "epoch": 0.02, + "grad_norm": 0.6778020858764648, + "learning_rate": 1.9999629390624484e-05, + "loss": 2.7202, + "step": 581 + }, + { + "epoch": 0.02, + "grad_norm": 0.6455041170120239, + "learning_rate": 1.9999620183347267e-05, + "loss": 2.7167, + "step": 582 + }, + { + "epoch": 0.02, + "grad_norm": 0.6703903675079346, + "learning_rate": 1.9999610863100205e-05, + "loss": 2.7611, + "step": 583 + }, + { + "epoch": 0.02, + "grad_norm": 0.6881388425827026, + "learning_rate": 1.99996014298834e-05, + "loss": 2.6577, + "step": 584 + }, + { + "epoch": 0.02, + "grad_norm": 0.6835602521896362, + "learning_rate": 1.9999591883696958e-05, + "loss": 2.6309, + "step": 585 + }, + { + "epoch": 0.02, + "grad_norm": 0.7014316916465759, + "learning_rate": 1.999958222454099e-05, + "loss": 2.6758, + "step": 586 + }, + { + "epoch": 0.02, + "grad_norm": 0.682877779006958, + "learning_rate": 1.99995724524156e-05, + "loss": 2.6665, + "step": 587 + }, + { + "epoch": 0.02, + "grad_norm": 0.6595706939697266, + "learning_rate": 1.99995625673209e-05, + "loss": 2.7068, + "step": 588 + }, + { + "epoch": 0.02, + "grad_norm": 0.6841151118278503, + "learning_rate": 1.9999552569257003e-05, + "loss": 2.6623, + "step": 589 + }, + { + "epoch": 0.02, + "grad_norm": 0.6765275001525879, + "learning_rate": 1.999954245822402e-05, + "loss": 2.638, + "step": 590 + }, + { + "epoch": 0.02, + "grad_norm": 0.6671230792999268, + "learning_rate": 1.9999532234222067e-05, + "loss": 2.6075, + "step": 591 + }, + { + "epoch": 0.02, + "grad_norm": 0.6446040868759155, + "learning_rate": 1.999952189725126e-05, + "loss": 2.6693, + "step": 592 + }, + { + "epoch": 0.02, + "grad_norm": 0.6680780053138733, + "learning_rate": 1.9999511447311713e-05, + "loss": 2.6921, + "step": 593 + }, + { + "epoch": 0.02, + "grad_norm": 0.6937766075134277, + "learning_rate": 1.999950088440355e-05, + "loss": 2.7175, + "step": 594 + }, + { + "epoch": 0.02, + "grad_norm": 0.7104864120483398, + "learning_rate": 1.9999490208526884e-05, + "loss": 2.7056, + "step": 595 + }, + { + "epoch": 0.02, + "grad_norm": 0.6789050698280334, + "learning_rate": 1.9999479419681837e-05, + "loss": 2.6834, + "step": 596 + }, + { + "epoch": 0.02, + "grad_norm": 0.6690239906311035, + "learning_rate": 1.9999468517868534e-05, + "loss": 2.7016, + "step": 597 + }, + { + "epoch": 0.02, + "grad_norm": 0.6288480758666992, + "learning_rate": 1.9999457503087094e-05, + "loss": 2.6674, + "step": 598 + }, + { + "epoch": 0.02, + "grad_norm": 0.6539272665977478, + "learning_rate": 1.9999446375337642e-05, + "loss": 2.6529, + "step": 599 + }, + { + "epoch": 0.02, + "grad_norm": 0.6495893597602844, + "learning_rate": 1.9999435134620308e-05, + "loss": 2.6728, + "step": 600 + }, + { + "epoch": 0.02, + "grad_norm": 0.6763492226600647, + "learning_rate": 1.9999423780935216e-05, + "loss": 2.6764, + "step": 601 + }, + { + "epoch": 0.02, + "grad_norm": 0.6604596376419067, + "learning_rate": 1.9999412314282495e-05, + "loss": 2.6782, + "step": 602 + }, + { + "epoch": 0.02, + "grad_norm": 0.6613422632217407, + "learning_rate": 1.999940073466227e-05, + "loss": 2.6759, + "step": 603 + }, + { + "epoch": 0.02, + "grad_norm": 0.6469343304634094, + "learning_rate": 1.9999389042074678e-05, + "loss": 2.5908, + "step": 604 + }, + { + "epoch": 0.02, + "grad_norm": 0.6556013822555542, + "learning_rate": 1.9999377236519853e-05, + "loss": 2.699, + "step": 605 + }, + { + "epoch": 0.02, + "grad_norm": 0.6714016199111938, + "learning_rate": 1.9999365317997918e-05, + "loss": 2.6614, + "step": 606 + }, + { + "epoch": 0.02, + "grad_norm": 0.6952104568481445, + "learning_rate": 1.9999353286509015e-05, + "loss": 2.6303, + "step": 607 + }, + { + "epoch": 0.02, + "grad_norm": 0.6528002619743347, + "learning_rate": 1.9999341142053282e-05, + "loss": 2.6291, + "step": 608 + }, + { + "epoch": 0.02, + "grad_norm": 0.6445615887641907, + "learning_rate": 1.999932888463085e-05, + "loss": 2.5913, + "step": 609 + }, + { + "epoch": 0.02, + "grad_norm": 0.6463565826416016, + "learning_rate": 1.999931651424186e-05, + "loss": 2.6824, + "step": 610 + }, + { + "epoch": 0.02, + "grad_norm": 0.73136967420578, + "learning_rate": 1.9999304030886454e-05, + "loss": 2.6801, + "step": 611 + }, + { + "epoch": 0.02, + "grad_norm": 0.6517104506492615, + "learning_rate": 1.9999291434564774e-05, + "loss": 2.6405, + "step": 612 + }, + { + "epoch": 0.02, + "grad_norm": 0.6433521509170532, + "learning_rate": 1.9999278725276955e-05, + "loss": 2.6644, + "step": 613 + }, + { + "epoch": 0.02, + "grad_norm": 0.619030237197876, + "learning_rate": 1.9999265903023147e-05, + "loss": 2.641, + "step": 614 + }, + { + "epoch": 0.02, + "grad_norm": 0.7042115330696106, + "learning_rate": 1.999925296780349e-05, + "loss": 2.6658, + "step": 615 + }, + { + "epoch": 0.02, + "grad_norm": 0.6369872689247131, + "learning_rate": 1.9999239919618138e-05, + "loss": 2.5443, + "step": 616 + }, + { + "epoch": 0.02, + "grad_norm": 0.6518825888633728, + "learning_rate": 1.999922675846723e-05, + "loss": 2.6424, + "step": 617 + }, + { + "epoch": 0.02, + "grad_norm": 0.6387699246406555, + "learning_rate": 1.9999213484350917e-05, + "loss": 2.6548, + "step": 618 + }, + { + "epoch": 0.02, + "grad_norm": 0.6791165471076965, + "learning_rate": 1.9999200097269354e-05, + "loss": 2.6927, + "step": 619 + }, + { + "epoch": 0.02, + "grad_norm": 0.660203754901886, + "learning_rate": 1.9999186597222685e-05, + "loss": 2.6376, + "step": 620 + }, + { + "epoch": 0.02, + "grad_norm": 0.6412535905838013, + "learning_rate": 1.999917298421107e-05, + "loss": 2.6069, + "step": 621 + }, + { + "epoch": 0.02, + "grad_norm": 0.6473847031593323, + "learning_rate": 1.9999159258234658e-05, + "loss": 2.6949, + "step": 622 + }, + { + "epoch": 0.02, + "grad_norm": 0.6589338779449463, + "learning_rate": 1.99991454192936e-05, + "loss": 2.6337, + "step": 623 + }, + { + "epoch": 0.02, + "grad_norm": 0.6539522409439087, + "learning_rate": 1.9999131467388062e-05, + "loss": 2.672, + "step": 624 + }, + { + "epoch": 0.02, + "grad_norm": 0.6796513795852661, + "learning_rate": 1.9999117402518194e-05, + "loss": 2.6508, + "step": 625 + }, + { + "epoch": 0.02, + "grad_norm": 0.6824401021003723, + "learning_rate": 1.999910322468416e-05, + "loss": 2.6328, + "step": 626 + }, + { + "epoch": 0.02, + "grad_norm": 0.6502139568328857, + "learning_rate": 1.9999088933886117e-05, + "loss": 2.7286, + "step": 627 + }, + { + "epoch": 0.02, + "grad_norm": 0.6533864736557007, + "learning_rate": 1.999907453012423e-05, + "loss": 2.5907, + "step": 628 + }, + { + "epoch": 0.02, + "grad_norm": 0.6478393077850342, + "learning_rate": 1.999906001339865e-05, + "loss": 2.5878, + "step": 629 + }, + { + "epoch": 0.02, + "grad_norm": 0.6769579648971558, + "learning_rate": 1.999904538370956e-05, + "loss": 2.6677, + "step": 630 + }, + { + "epoch": 0.02, + "grad_norm": 0.6979731321334839, + "learning_rate": 1.9999030641057108e-05, + "loss": 2.6418, + "step": 631 + }, + { + "epoch": 0.02, + "grad_norm": 0.6344602704048157, + "learning_rate": 1.9999015785441472e-05, + "loss": 2.6724, + "step": 632 + }, + { + "epoch": 0.02, + "grad_norm": 0.6661173105239868, + "learning_rate": 1.9999000816862812e-05, + "loss": 2.6335, + "step": 633 + }, + { + "epoch": 0.02, + "grad_norm": 0.6379936933517456, + "learning_rate": 1.9998985735321304e-05, + "loss": 2.6162, + "step": 634 + }, + { + "epoch": 0.02, + "grad_norm": 0.646026074886322, + "learning_rate": 1.9998970540817114e-05, + "loss": 2.6188, + "step": 635 + }, + { + "epoch": 0.02, + "grad_norm": 0.659218430519104, + "learning_rate": 1.9998955233350418e-05, + "loss": 2.6599, + "step": 636 + }, + { + "epoch": 0.02, + "grad_norm": 0.7054409980773926, + "learning_rate": 1.999893981292138e-05, + "loss": 2.6265, + "step": 637 + }, + { + "epoch": 0.02, + "grad_norm": 0.6546733379364014, + "learning_rate": 1.9998924279530185e-05, + "loss": 2.6258, + "step": 638 + }, + { + "epoch": 0.02, + "grad_norm": 0.6818681359291077, + "learning_rate": 1.9998908633177e-05, + "loss": 2.6579, + "step": 639 + }, + { + "epoch": 0.02, + "grad_norm": 0.6307359933853149, + "learning_rate": 1.999889287386201e-05, + "loss": 2.6183, + "step": 640 + }, + { + "epoch": 0.02, + "grad_norm": 0.6655917763710022, + "learning_rate": 1.999887700158538e-05, + "loss": 2.7134, + "step": 641 + }, + { + "epoch": 0.02, + "grad_norm": 0.6762456297874451, + "learning_rate": 1.9998861016347305e-05, + "loss": 2.6249, + "step": 642 + }, + { + "epoch": 0.02, + "grad_norm": 0.6500999331474304, + "learning_rate": 1.9998844918147956e-05, + "loss": 2.6587, + "step": 643 + }, + { + "epoch": 0.02, + "grad_norm": 0.6819188594818115, + "learning_rate": 1.9998828706987517e-05, + "loss": 2.6848, + "step": 644 + }, + { + "epoch": 0.02, + "grad_norm": 0.6916864514350891, + "learning_rate": 1.9998812382866172e-05, + "loss": 2.6942, + "step": 645 + }, + { + "epoch": 0.02, + "grad_norm": 0.6661266088485718, + "learning_rate": 1.9998795945784102e-05, + "loss": 2.6034, + "step": 646 + }, + { + "epoch": 0.02, + "grad_norm": 0.777336835861206, + "learning_rate": 1.9998779395741496e-05, + "loss": 2.6785, + "step": 647 + }, + { + "epoch": 0.02, + "grad_norm": 0.6751137375831604, + "learning_rate": 1.9998762732738538e-05, + "loss": 2.6326, + "step": 648 + }, + { + "epoch": 0.02, + "grad_norm": 0.667270839214325, + "learning_rate": 1.9998745956775422e-05, + "loss": 2.6204, + "step": 649 + }, + { + "epoch": 0.02, + "grad_norm": 0.6546599864959717, + "learning_rate": 1.9998729067852335e-05, + "loss": 2.6316, + "step": 650 + }, + { + "epoch": 0.02, + "grad_norm": 0.6469268202781677, + "learning_rate": 1.9998712065969465e-05, + "loss": 2.6385, + "step": 651 + }, + { + "epoch": 0.02, + "grad_norm": 0.6478886604309082, + "learning_rate": 1.9998694951127007e-05, + "loss": 2.6157, + "step": 652 + }, + { + "epoch": 0.02, + "grad_norm": 0.6824221014976501, + "learning_rate": 1.999867772332515e-05, + "loss": 2.617, + "step": 653 + }, + { + "epoch": 0.02, + "grad_norm": 0.6531621217727661, + "learning_rate": 1.9998660382564097e-05, + "loss": 2.5561, + "step": 654 + }, + { + "epoch": 0.02, + "grad_norm": 0.6488674283027649, + "learning_rate": 1.9998642928844035e-05, + "loss": 2.5838, + "step": 655 + }, + { + "epoch": 0.02, + "grad_norm": 0.6546266078948975, + "learning_rate": 1.9998625362165166e-05, + "loss": 2.7035, + "step": 656 + }, + { + "epoch": 0.02, + "grad_norm": 0.6306750774383545, + "learning_rate": 1.9998607682527688e-05, + "loss": 2.635, + "step": 657 + }, + { + "epoch": 0.02, + "grad_norm": 0.675563633441925, + "learning_rate": 1.9998589889931797e-05, + "loss": 2.6274, + "step": 658 + }, + { + "epoch": 0.02, + "grad_norm": 0.6688238382339478, + "learning_rate": 1.99985719843777e-05, + "loss": 2.6487, + "step": 659 + }, + { + "epoch": 0.02, + "grad_norm": 0.6733666658401489, + "learning_rate": 1.9998553965865598e-05, + "loss": 2.6012, + "step": 660 + }, + { + "epoch": 0.02, + "grad_norm": 0.6428125500679016, + "learning_rate": 1.999853583439569e-05, + "loss": 2.6639, + "step": 661 + }, + { + "epoch": 0.02, + "grad_norm": 0.714256227016449, + "learning_rate": 1.9998517589968183e-05, + "loss": 2.6594, + "step": 662 + }, + { + "epoch": 0.02, + "grad_norm": 0.6464070677757263, + "learning_rate": 1.999849923258329e-05, + "loss": 2.674, + "step": 663 + }, + { + "epoch": 0.02, + "grad_norm": 0.6534208059310913, + "learning_rate": 1.9998480762241206e-05, + "loss": 2.626, + "step": 664 + }, + { + "epoch": 0.02, + "grad_norm": 0.6571592688560486, + "learning_rate": 1.999846217894215e-05, + "loss": 2.6457, + "step": 665 + }, + { + "epoch": 0.02, + "grad_norm": 0.6504070162773132, + "learning_rate": 1.9998443482686325e-05, + "loss": 2.6566, + "step": 666 + }, + { + "epoch": 0.02, + "grad_norm": 0.6504026651382446, + "learning_rate": 1.9998424673473945e-05, + "loss": 2.622, + "step": 667 + }, + { + "epoch": 0.02, + "grad_norm": 0.6780567765235901, + "learning_rate": 1.9998405751305226e-05, + "loss": 2.6311, + "step": 668 + }, + { + "epoch": 0.02, + "grad_norm": 0.6685227155685425, + "learning_rate": 1.9998386716180377e-05, + "loss": 2.7076, + "step": 669 + }, + { + "epoch": 0.02, + "grad_norm": 0.7406482100486755, + "learning_rate": 1.9998367568099612e-05, + "loss": 2.5774, + "step": 670 + }, + { + "epoch": 0.02, + "grad_norm": 0.6598697900772095, + "learning_rate": 1.9998348307063154e-05, + "loss": 2.6187, + "step": 671 + }, + { + "epoch": 0.02, + "grad_norm": 0.633364737033844, + "learning_rate": 1.9998328933071215e-05, + "loss": 2.6663, + "step": 672 + }, + { + "epoch": 0.02, + "grad_norm": 0.6449758410453796, + "learning_rate": 1.9998309446124013e-05, + "loss": 2.5952, + "step": 673 + }, + { + "epoch": 0.02, + "grad_norm": 0.6687312722206116, + "learning_rate": 1.9998289846221772e-05, + "loss": 2.624, + "step": 674 + }, + { + "epoch": 0.02, + "grad_norm": 0.6782962679862976, + "learning_rate": 1.9998270133364713e-05, + "loss": 2.6179, + "step": 675 + }, + { + "epoch": 0.02, + "grad_norm": 0.6188567280769348, + "learning_rate": 1.9998250307553056e-05, + "loss": 2.5809, + "step": 676 + }, + { + "epoch": 0.02, + "grad_norm": 0.6713731288909912, + "learning_rate": 1.999823036878703e-05, + "loss": 2.5994, + "step": 677 + }, + { + "epoch": 0.02, + "grad_norm": 0.6576871871948242, + "learning_rate": 1.9998210317066853e-05, + "loss": 2.5337, + "step": 678 + }, + { + "epoch": 0.02, + "grad_norm": 0.6445668339729309, + "learning_rate": 1.9998190152392757e-05, + "loss": 2.6452, + "step": 679 + }, + { + "epoch": 0.02, + "grad_norm": 0.6413486003875732, + "learning_rate": 1.9998169874764968e-05, + "loss": 2.6452, + "step": 680 + }, + { + "epoch": 0.02, + "grad_norm": 0.6560171842575073, + "learning_rate": 1.9998149484183712e-05, + "loss": 2.5967, + "step": 681 + }, + { + "epoch": 0.02, + "grad_norm": 0.6555939316749573, + "learning_rate": 1.9998128980649228e-05, + "loss": 2.6335, + "step": 682 + }, + { + "epoch": 0.02, + "grad_norm": 0.6610609889030457, + "learning_rate": 1.999810836416174e-05, + "loss": 2.6311, + "step": 683 + }, + { + "epoch": 0.02, + "grad_norm": 0.6651795506477356, + "learning_rate": 1.999808763472148e-05, + "loss": 2.6476, + "step": 684 + }, + { + "epoch": 0.02, + "grad_norm": 0.6404531002044678, + "learning_rate": 1.999806679232869e-05, + "loss": 2.6348, + "step": 685 + }, + { + "epoch": 0.02, + "grad_norm": 0.6947271823883057, + "learning_rate": 1.99980458369836e-05, + "loss": 2.597, + "step": 686 + }, + { + "epoch": 0.02, + "grad_norm": 0.6479017734527588, + "learning_rate": 1.9998024768686445e-05, + "loss": 2.6128, + "step": 687 + }, + { + "epoch": 0.02, + "grad_norm": 0.6455385684967041, + "learning_rate": 1.999800358743747e-05, + "loss": 2.6298, + "step": 688 + }, + { + "epoch": 0.02, + "grad_norm": 0.6617756485939026, + "learning_rate": 1.9997982293236906e-05, + "loss": 2.632, + "step": 689 + }, + { + "epoch": 0.02, + "grad_norm": 0.6836099028587341, + "learning_rate": 1.9997960886084996e-05, + "loss": 2.591, + "step": 690 + }, + { + "epoch": 0.02, + "grad_norm": 0.6432330012321472, + "learning_rate": 1.9997939365981988e-05, + "loss": 2.6081, + "step": 691 + }, + { + "epoch": 0.02, + "grad_norm": 0.6443216800689697, + "learning_rate": 1.999791773292812e-05, + "loss": 2.6129, + "step": 692 + }, + { + "epoch": 0.02, + "grad_norm": 0.6464120745658875, + "learning_rate": 1.9997895986923634e-05, + "loss": 2.6489, + "step": 693 + }, + { + "epoch": 0.02, + "grad_norm": 0.6480265855789185, + "learning_rate": 1.9997874127968778e-05, + "loss": 2.6174, + "step": 694 + }, + { + "epoch": 0.02, + "grad_norm": 0.6874150037765503, + "learning_rate": 1.99978521560638e-05, + "loss": 2.6843, + "step": 695 + }, + { + "epoch": 0.02, + "grad_norm": 0.6760188937187195, + "learning_rate": 1.999783007120895e-05, + "loss": 2.6175, + "step": 696 + }, + { + "epoch": 0.02, + "grad_norm": 0.6596389412879944, + "learning_rate": 1.9997807873404477e-05, + "loss": 2.6296, + "step": 697 + }, + { + "epoch": 0.02, + "grad_norm": 0.6853276491165161, + "learning_rate": 1.9997785562650626e-05, + "loss": 2.616, + "step": 698 + }, + { + "epoch": 0.02, + "grad_norm": 0.6755525469779968, + "learning_rate": 1.9997763138947653e-05, + "loss": 2.6266, + "step": 699 + }, + { + "epoch": 0.02, + "grad_norm": 0.6434488296508789, + "learning_rate": 1.999774060229581e-05, + "loss": 2.6305, + "step": 700 + }, + { + "epoch": 0.02, + "grad_norm": 0.634428858757019, + "learning_rate": 1.9997717952695357e-05, + "loss": 2.5954, + "step": 701 + }, + { + "epoch": 0.02, + "grad_norm": 0.6757248640060425, + "learning_rate": 1.9997695190146543e-05, + "loss": 2.6468, + "step": 702 + }, + { + "epoch": 0.02, + "grad_norm": 0.6874866485595703, + "learning_rate": 1.999767231464963e-05, + "loss": 2.581, + "step": 703 + }, + { + "epoch": 0.02, + "grad_norm": 0.6547470092773438, + "learning_rate": 1.9997649326204874e-05, + "loss": 2.6262, + "step": 704 + }, + { + "epoch": 0.02, + "grad_norm": 0.6525161862373352, + "learning_rate": 1.9997626224812533e-05, + "loss": 2.56, + "step": 705 + }, + { + "epoch": 0.02, + "grad_norm": 0.6906682848930359, + "learning_rate": 1.999760301047287e-05, + "loss": 2.6286, + "step": 706 + }, + { + "epoch": 0.02, + "grad_norm": 0.6766973733901978, + "learning_rate": 1.999757968318615e-05, + "loss": 2.6347, + "step": 707 + }, + { + "epoch": 0.02, + "grad_norm": 0.6386931538581848, + "learning_rate": 1.9997556242952633e-05, + "loss": 2.5733, + "step": 708 + }, + { + "epoch": 0.02, + "grad_norm": 0.6369397640228271, + "learning_rate": 1.9997532689772584e-05, + "loss": 2.5631, + "step": 709 + }, + { + "epoch": 0.02, + "grad_norm": 0.6598371267318726, + "learning_rate": 1.999750902364627e-05, + "loss": 2.6438, + "step": 710 + }, + { + "epoch": 0.02, + "grad_norm": 0.630191445350647, + "learning_rate": 1.999748524457396e-05, + "loss": 2.5638, + "step": 711 + }, + { + "epoch": 0.02, + "grad_norm": 0.6679804921150208, + "learning_rate": 1.999746135255592e-05, + "loss": 2.6384, + "step": 712 + }, + { + "epoch": 0.02, + "grad_norm": 0.6520076394081116, + "learning_rate": 1.999743734759242e-05, + "loss": 2.6141, + "step": 713 + }, + { + "epoch": 0.02, + "grad_norm": 0.6715118885040283, + "learning_rate": 1.999741322968373e-05, + "loss": 2.5587, + "step": 714 + }, + { + "epoch": 0.02, + "grad_norm": 0.683879017829895, + "learning_rate": 1.9997388998830125e-05, + "loss": 2.6455, + "step": 715 + }, + { + "epoch": 0.02, + "grad_norm": 0.6658943891525269, + "learning_rate": 1.999736465503188e-05, + "loss": 2.6418, + "step": 716 + }, + { + "epoch": 0.02, + "grad_norm": 0.6488131880760193, + "learning_rate": 1.9997340198289266e-05, + "loss": 2.5955, + "step": 717 + }, + { + "epoch": 0.02, + "grad_norm": 0.6493475437164307, + "learning_rate": 1.9997315628602564e-05, + "loss": 2.6433, + "step": 718 + }, + { + "epoch": 0.02, + "grad_norm": 0.6291093230247498, + "learning_rate": 1.999729094597205e-05, + "loss": 2.5813, + "step": 719 + }, + { + "epoch": 0.02, + "grad_norm": 0.6662304997444153, + "learning_rate": 1.9997266150398e-05, + "loss": 2.6456, + "step": 720 + }, + { + "epoch": 0.02, + "grad_norm": 0.63725745677948, + "learning_rate": 1.9997241241880695e-05, + "loss": 2.6204, + "step": 721 + }, + { + "epoch": 0.02, + "grad_norm": 0.652138888835907, + "learning_rate": 1.9997216220420415e-05, + "loss": 2.6295, + "step": 722 + }, + { + "epoch": 0.02, + "grad_norm": 0.654179573059082, + "learning_rate": 1.9997191086017454e-05, + "loss": 2.6221, + "step": 723 + }, + { + "epoch": 0.02, + "grad_norm": 0.6462941765785217, + "learning_rate": 1.9997165838672078e-05, + "loss": 2.603, + "step": 724 + }, + { + "epoch": 0.02, + "grad_norm": 0.6551197171211243, + "learning_rate": 1.9997140478384586e-05, + "loss": 2.6124, + "step": 725 + }, + { + "epoch": 0.02, + "grad_norm": 0.6915521025657654, + "learning_rate": 1.999711500515526e-05, + "loss": 2.522, + "step": 726 + }, + { + "epoch": 0.02, + "grad_norm": 0.6308431625366211, + "learning_rate": 1.9997089418984385e-05, + "loss": 2.5981, + "step": 727 + }, + { + "epoch": 0.02, + "grad_norm": 0.6642795205116272, + "learning_rate": 1.9997063719872252e-05, + "loss": 2.5855, + "step": 728 + }, + { + "epoch": 0.02, + "grad_norm": 0.6863251328468323, + "learning_rate": 1.9997037907819155e-05, + "loss": 2.5617, + "step": 729 + }, + { + "epoch": 0.02, + "grad_norm": 0.6366772055625916, + "learning_rate": 1.9997011982825382e-05, + "loss": 2.6026, + "step": 730 + }, + { + "epoch": 0.02, + "grad_norm": 0.6680012345314026, + "learning_rate": 1.9996985944891223e-05, + "loss": 2.5771, + "step": 731 + }, + { + "epoch": 0.02, + "grad_norm": 0.6940551996231079, + "learning_rate": 1.999695979401698e-05, + "loss": 2.5927, + "step": 732 + }, + { + "epoch": 0.02, + "grad_norm": 0.6337440609931946, + "learning_rate": 1.999693353020294e-05, + "loss": 2.5422, + "step": 733 + }, + { + "epoch": 0.02, + "grad_norm": 0.7387632131576538, + "learning_rate": 1.9996907153449408e-05, + "loss": 2.5464, + "step": 734 + }, + { + "epoch": 0.02, + "grad_norm": 0.654753565788269, + "learning_rate": 1.9996880663756678e-05, + "loss": 2.5678, + "step": 735 + }, + { + "epoch": 0.02, + "grad_norm": 0.6943520307540894, + "learning_rate": 1.9996854061125044e-05, + "loss": 2.6603, + "step": 736 + }, + { + "epoch": 0.02, + "grad_norm": 0.6481813788414001, + "learning_rate": 1.9996827345554814e-05, + "loss": 2.5998, + "step": 737 + }, + { + "epoch": 0.02, + "grad_norm": 0.6417766213417053, + "learning_rate": 1.999680051704629e-05, + "loss": 2.5979, + "step": 738 + }, + { + "epoch": 0.02, + "grad_norm": 0.6641683578491211, + "learning_rate": 1.999677357559977e-05, + "loss": 2.5453, + "step": 739 + }, + { + "epoch": 0.02, + "grad_norm": 0.6462195515632629, + "learning_rate": 1.9996746521215558e-05, + "loss": 2.5863, + "step": 740 + }, + { + "epoch": 0.02, + "grad_norm": 0.6744074821472168, + "learning_rate": 1.9996719353893967e-05, + "loss": 2.614, + "step": 741 + }, + { + "epoch": 0.02, + "grad_norm": 0.6678904294967651, + "learning_rate": 1.9996692073635297e-05, + "loss": 2.6401, + "step": 742 + }, + { + "epoch": 0.02, + "grad_norm": 0.6701170802116394, + "learning_rate": 1.999666468043986e-05, + "loss": 2.6479, + "step": 743 + }, + { + "epoch": 0.02, + "grad_norm": 0.6428682208061218, + "learning_rate": 1.999663717430796e-05, + "loss": 2.576, + "step": 744 + }, + { + "epoch": 0.02, + "grad_norm": 0.6440454721450806, + "learning_rate": 1.9996609555239918e-05, + "loss": 2.6177, + "step": 745 + }, + { + "epoch": 0.02, + "grad_norm": 0.6575391292572021, + "learning_rate": 1.9996581823236037e-05, + "loss": 2.5527, + "step": 746 + }, + { + "epoch": 0.02, + "grad_norm": 0.6564323902130127, + "learning_rate": 1.9996553978296632e-05, + "loss": 2.6262, + "step": 747 + }, + { + "epoch": 0.02, + "grad_norm": 0.6306195259094238, + "learning_rate": 1.9996526020422024e-05, + "loss": 2.613, + "step": 748 + }, + { + "epoch": 0.02, + "grad_norm": 0.6638990640640259, + "learning_rate": 1.9996497949612516e-05, + "loss": 2.6113, + "step": 749 + }, + { + "epoch": 0.02, + "grad_norm": 0.6677066683769226, + "learning_rate": 1.9996469765868437e-05, + "loss": 2.6854, + "step": 750 + }, + { + "epoch": 0.02, + "grad_norm": 0.6621503829956055, + "learning_rate": 1.99964414691901e-05, + "loss": 2.64, + "step": 751 + }, + { + "epoch": 0.03, + "grad_norm": 0.629335880279541, + "learning_rate": 1.9996413059577827e-05, + "loss": 2.5173, + "step": 752 + }, + { + "epoch": 0.03, + "grad_norm": 0.633747398853302, + "learning_rate": 1.9996384537031936e-05, + "loss": 2.5694, + "step": 753 + }, + { + "epoch": 0.03, + "grad_norm": 0.6534228324890137, + "learning_rate": 1.9996355901552752e-05, + "loss": 2.5989, + "step": 754 + }, + { + "epoch": 0.03, + "grad_norm": 0.6445450186729431, + "learning_rate": 1.9996327153140596e-05, + "loss": 2.603, + "step": 755 + }, + { + "epoch": 0.03, + "grad_norm": 0.6484500169754028, + "learning_rate": 1.9996298291795798e-05, + "loss": 2.6099, + "step": 756 + }, + { + "epoch": 0.03, + "grad_norm": 0.6291806101799011, + "learning_rate": 1.9996269317518675e-05, + "loss": 2.6044, + "step": 757 + }, + { + "epoch": 0.03, + "grad_norm": 0.6487911939620972, + "learning_rate": 1.9996240230309562e-05, + "loss": 2.5647, + "step": 758 + }, + { + "epoch": 0.03, + "grad_norm": 0.6223908066749573, + "learning_rate": 1.9996211030168786e-05, + "loss": 2.5609, + "step": 759 + }, + { + "epoch": 0.03, + "grad_norm": 0.6909140348434448, + "learning_rate": 1.999618171709668e-05, + "loss": 2.6373, + "step": 760 + }, + { + "epoch": 0.03, + "grad_norm": 0.642636239528656, + "learning_rate": 1.9996152291093564e-05, + "loss": 2.5481, + "step": 761 + }, + { + "epoch": 0.03, + "grad_norm": 0.6456642150878906, + "learning_rate": 1.9996122752159782e-05, + "loss": 2.5837, + "step": 762 + }, + { + "epoch": 0.03, + "grad_norm": 0.6698892116546631, + "learning_rate": 1.999609310029566e-05, + "loss": 2.5738, + "step": 763 + }, + { + "epoch": 0.03, + "grad_norm": 0.6802642941474915, + "learning_rate": 1.999606333550154e-05, + "loss": 2.6074, + "step": 764 + }, + { + "epoch": 0.03, + "grad_norm": 0.6410853862762451, + "learning_rate": 1.9996033457777753e-05, + "loss": 2.5666, + "step": 765 + }, + { + "epoch": 0.03, + "grad_norm": 0.7001330852508545, + "learning_rate": 1.9996003467124638e-05, + "loss": 2.5943, + "step": 766 + }, + { + "epoch": 0.03, + "grad_norm": 0.6452129483222961, + "learning_rate": 1.9995973363542535e-05, + "loss": 2.5296, + "step": 767 + }, + { + "epoch": 0.03, + "grad_norm": 0.7368355393409729, + "learning_rate": 1.9995943147031784e-05, + "loss": 2.5956, + "step": 768 + }, + { + "epoch": 0.03, + "grad_norm": 0.6864637732505798, + "learning_rate": 1.9995912817592724e-05, + "loss": 2.6749, + "step": 769 + }, + { + "epoch": 0.03, + "grad_norm": 0.6283546090126038, + "learning_rate": 1.99958823752257e-05, + "loss": 2.5796, + "step": 770 + }, + { + "epoch": 0.03, + "grad_norm": 0.6918172240257263, + "learning_rate": 1.9995851819931053e-05, + "loss": 2.5778, + "step": 771 + }, + { + "epoch": 0.03, + "grad_norm": 0.6927192211151123, + "learning_rate": 1.999582115170913e-05, + "loss": 2.5523, + "step": 772 + }, + { + "epoch": 0.03, + "grad_norm": 0.7102265954017639, + "learning_rate": 1.9995790370560284e-05, + "loss": 2.6021, + "step": 773 + }, + { + "epoch": 0.03, + "grad_norm": 0.6565168499946594, + "learning_rate": 1.999575947648485e-05, + "loss": 2.5271, + "step": 774 + }, + { + "epoch": 0.03, + "grad_norm": 0.6337746977806091, + "learning_rate": 1.9995728469483185e-05, + "loss": 2.6003, + "step": 775 + }, + { + "epoch": 0.03, + "grad_norm": 0.6818331480026245, + "learning_rate": 1.999569734955564e-05, + "loss": 2.6418, + "step": 776 + }, + { + "epoch": 0.03, + "grad_norm": 0.6816015839576721, + "learning_rate": 1.9995666116702562e-05, + "loss": 2.6616, + "step": 777 + }, + { + "epoch": 0.03, + "grad_norm": 0.6537896990776062, + "learning_rate": 1.9995634770924308e-05, + "loss": 2.6179, + "step": 778 + }, + { + "epoch": 0.03, + "grad_norm": 0.6448139548301697, + "learning_rate": 1.999560331222123e-05, + "loss": 2.5361, + "step": 779 + }, + { + "epoch": 0.03, + "grad_norm": 0.6397492289543152, + "learning_rate": 1.9995571740593682e-05, + "loss": 2.5291, + "step": 780 + }, + { + "epoch": 0.03, + "grad_norm": 0.6577461957931519, + "learning_rate": 1.9995540056042025e-05, + "loss": 2.5625, + "step": 781 + }, + { + "epoch": 0.03, + "grad_norm": 0.6541372537612915, + "learning_rate": 1.9995508258566613e-05, + "loss": 2.5595, + "step": 782 + }, + { + "epoch": 0.03, + "grad_norm": 0.7245307564735413, + "learning_rate": 1.9995476348167807e-05, + "loss": 2.6065, + "step": 783 + }, + { + "epoch": 0.03, + "grad_norm": 0.6526775360107422, + "learning_rate": 1.9995444324845967e-05, + "loss": 2.5722, + "step": 784 + }, + { + "epoch": 0.03, + "grad_norm": 0.6915601491928101, + "learning_rate": 1.9995412188601455e-05, + "loss": 2.5494, + "step": 785 + }, + { + "epoch": 0.03, + "grad_norm": 0.6616017818450928, + "learning_rate": 1.9995379939434634e-05, + "loss": 2.5553, + "step": 786 + }, + { + "epoch": 0.03, + "grad_norm": 0.6422218680381775, + "learning_rate": 1.999534757734587e-05, + "loss": 2.5838, + "step": 787 + }, + { + "epoch": 0.03, + "grad_norm": 0.6715728044509888, + "learning_rate": 1.999531510233552e-05, + "loss": 2.6227, + "step": 788 + }, + { + "epoch": 0.03, + "grad_norm": 0.6504473686218262, + "learning_rate": 1.9995282514403965e-05, + "loss": 2.5664, + "step": 789 + }, + { + "epoch": 0.03, + "grad_norm": 0.649907648563385, + "learning_rate": 1.9995249813551566e-05, + "loss": 2.5876, + "step": 790 + }, + { + "epoch": 0.03, + "grad_norm": 0.6391369104385376, + "learning_rate": 1.999521699977869e-05, + "loss": 2.6225, + "step": 791 + }, + { + "epoch": 0.03, + "grad_norm": 0.6849904656410217, + "learning_rate": 1.999518407308571e-05, + "loss": 2.6067, + "step": 792 + }, + { + "epoch": 0.03, + "grad_norm": 0.6667712926864624, + "learning_rate": 1.9995151033472998e-05, + "loss": 2.5911, + "step": 793 + }, + { + "epoch": 0.03, + "grad_norm": 0.6548218727111816, + "learning_rate": 1.9995117880940927e-05, + "loss": 2.5405, + "step": 794 + }, + { + "epoch": 0.03, + "grad_norm": 0.6442228555679321, + "learning_rate": 1.999508461548987e-05, + "loss": 2.5827, + "step": 795 + }, + { + "epoch": 0.03, + "grad_norm": 0.6406388878822327, + "learning_rate": 1.999505123712021e-05, + "loss": 2.5889, + "step": 796 + }, + { + "epoch": 0.03, + "grad_norm": 0.6656167507171631, + "learning_rate": 1.9995017745832318e-05, + "loss": 2.5114, + "step": 797 + }, + { + "epoch": 0.03, + "grad_norm": 0.6376672387123108, + "learning_rate": 1.9994984141626572e-05, + "loss": 2.5279, + "step": 798 + }, + { + "epoch": 0.03, + "grad_norm": 0.6719859838485718, + "learning_rate": 1.999495042450335e-05, + "loss": 2.6545, + "step": 799 + }, + { + "epoch": 0.03, + "grad_norm": 0.6306279897689819, + "learning_rate": 1.9994916594463038e-05, + "loss": 2.5553, + "step": 800 + }, + { + "epoch": 0.03, + "grad_norm": 0.6618303656578064, + "learning_rate": 1.9994882651506015e-05, + "loss": 2.6115, + "step": 801 + }, + { + "epoch": 0.03, + "grad_norm": 0.6502376794815063, + "learning_rate": 1.999484859563267e-05, + "loss": 2.5675, + "step": 802 + }, + { + "epoch": 0.03, + "grad_norm": 0.6250829696655273, + "learning_rate": 1.9994814426843377e-05, + "loss": 2.575, + "step": 803 + }, + { + "epoch": 0.03, + "grad_norm": 0.6839468479156494, + "learning_rate": 1.999478014513853e-05, + "loss": 2.6039, + "step": 804 + }, + { + "epoch": 0.03, + "grad_norm": 0.6998984813690186, + "learning_rate": 1.9994745750518517e-05, + "loss": 2.573, + "step": 805 + }, + { + "epoch": 0.03, + "grad_norm": 0.6413207650184631, + "learning_rate": 1.999471124298372e-05, + "loss": 2.6052, + "step": 806 + }, + { + "epoch": 0.03, + "grad_norm": 0.6643601655960083, + "learning_rate": 1.9994676622534537e-05, + "loss": 2.5425, + "step": 807 + }, + { + "epoch": 0.03, + "grad_norm": 0.6712691187858582, + "learning_rate": 1.999464188917135e-05, + "loss": 2.6618, + "step": 808 + }, + { + "epoch": 0.03, + "grad_norm": 0.7033228874206543, + "learning_rate": 1.999460704289456e-05, + "loss": 2.6179, + "step": 809 + }, + { + "epoch": 0.03, + "grad_norm": 0.6693732738494873, + "learning_rate": 1.9994572083704558e-05, + "loss": 2.5589, + "step": 810 + }, + { + "epoch": 0.03, + "grad_norm": 0.6438301205635071, + "learning_rate": 1.9994537011601737e-05, + "loss": 2.561, + "step": 811 + }, + { + "epoch": 0.03, + "grad_norm": 0.6705179214477539, + "learning_rate": 1.9994501826586493e-05, + "loss": 2.5712, + "step": 812 + }, + { + "epoch": 0.03, + "grad_norm": 0.6689388751983643, + "learning_rate": 1.9994466528659222e-05, + "loss": 2.6007, + "step": 813 + }, + { + "epoch": 0.03, + "grad_norm": 0.7577647566795349, + "learning_rate": 1.999443111782033e-05, + "loss": 2.5506, + "step": 814 + }, + { + "epoch": 0.03, + "grad_norm": 0.6508129239082336, + "learning_rate": 1.999439559407021e-05, + "loss": 2.5662, + "step": 815 + }, + { + "epoch": 0.03, + "grad_norm": 0.6562962532043457, + "learning_rate": 1.999435995740927e-05, + "loss": 2.5491, + "step": 816 + }, + { + "epoch": 0.03, + "grad_norm": 0.6568298935890198, + "learning_rate": 1.9994324207837902e-05, + "loss": 2.5852, + "step": 817 + }, + { + "epoch": 0.03, + "grad_norm": 0.6908994913101196, + "learning_rate": 1.9994288345356522e-05, + "loss": 2.567, + "step": 818 + }, + { + "epoch": 0.03, + "grad_norm": 0.6363824009895325, + "learning_rate": 1.9994252369965527e-05, + "loss": 2.5956, + "step": 819 + }, + { + "epoch": 0.03, + "grad_norm": 0.6722463369369507, + "learning_rate": 1.9994216281665326e-05, + "loss": 2.5892, + "step": 820 + }, + { + "epoch": 0.03, + "grad_norm": 0.6953749060630798, + "learning_rate": 1.9994180080456327e-05, + "loss": 2.6019, + "step": 821 + }, + { + "epoch": 0.03, + "grad_norm": 0.6519436836242676, + "learning_rate": 1.9994143766338937e-05, + "loss": 2.6338, + "step": 822 + }, + { + "epoch": 0.03, + "grad_norm": 0.6399454474449158, + "learning_rate": 1.999410733931357e-05, + "loss": 2.5947, + "step": 823 + }, + { + "epoch": 0.03, + "grad_norm": 0.6465761661529541, + "learning_rate": 1.9994070799380636e-05, + "loss": 2.5638, + "step": 824 + }, + { + "epoch": 0.03, + "grad_norm": 0.6767170429229736, + "learning_rate": 1.9994034146540544e-05, + "loss": 2.5329, + "step": 825 + }, + { + "epoch": 0.03, + "grad_norm": 0.6768900752067566, + "learning_rate": 1.9993997380793714e-05, + "loss": 2.5725, + "step": 826 + }, + { + "epoch": 0.03, + "grad_norm": 0.6659291982650757, + "learning_rate": 1.9993960502140558e-05, + "loss": 2.5938, + "step": 827 + }, + { + "epoch": 0.03, + "grad_norm": 0.6561067700386047, + "learning_rate": 1.999392351058149e-05, + "loss": 2.5791, + "step": 828 + }, + { + "epoch": 0.03, + "grad_norm": 0.6598148941993713, + "learning_rate": 1.9993886406116936e-05, + "loss": 2.5582, + "step": 829 + }, + { + "epoch": 0.03, + "grad_norm": 0.6995514631271362, + "learning_rate": 1.9993849188747312e-05, + "loss": 2.5651, + "step": 830 + }, + { + "epoch": 0.03, + "grad_norm": 0.6523492932319641, + "learning_rate": 1.999381185847303e-05, + "loss": 2.5384, + "step": 831 + }, + { + "epoch": 0.03, + "grad_norm": 0.661693274974823, + "learning_rate": 1.9993774415294525e-05, + "loss": 2.6182, + "step": 832 + }, + { + "epoch": 0.03, + "grad_norm": 0.6545494198799133, + "learning_rate": 1.999373685921221e-05, + "loss": 2.5564, + "step": 833 + }, + { + "epoch": 0.03, + "grad_norm": 0.6930851936340332, + "learning_rate": 1.9993699190226516e-05, + "loss": 2.6188, + "step": 834 + }, + { + "epoch": 0.03, + "grad_norm": 0.655387818813324, + "learning_rate": 1.999366140833786e-05, + "loss": 2.5456, + "step": 835 + }, + { + "epoch": 0.03, + "grad_norm": 0.65291827917099, + "learning_rate": 1.9993623513546678e-05, + "loss": 2.5705, + "step": 836 + }, + { + "epoch": 0.03, + "grad_norm": 0.6709448099136353, + "learning_rate": 1.9993585505853392e-05, + "loss": 2.6113, + "step": 837 + }, + { + "epoch": 0.03, + "grad_norm": 0.6598061323165894, + "learning_rate": 1.9993547385258438e-05, + "loss": 2.5802, + "step": 838 + }, + { + "epoch": 0.03, + "grad_norm": 0.6680628657341003, + "learning_rate": 1.999350915176224e-05, + "loss": 2.5515, + "step": 839 + }, + { + "epoch": 0.03, + "grad_norm": 0.7139716744422913, + "learning_rate": 1.999347080536523e-05, + "loss": 2.534, + "step": 840 + }, + { + "epoch": 0.03, + "grad_norm": 0.7240101099014282, + "learning_rate": 1.9993432346067848e-05, + "loss": 2.634, + "step": 841 + }, + { + "epoch": 0.03, + "grad_norm": 0.6930354237556458, + "learning_rate": 1.9993393773870522e-05, + "loss": 2.5582, + "step": 842 + }, + { + "epoch": 0.03, + "grad_norm": 0.6855827569961548, + "learning_rate": 1.999335508877369e-05, + "loss": 2.604, + "step": 843 + }, + { + "epoch": 0.03, + "grad_norm": 0.6531574130058289, + "learning_rate": 1.9993316290777786e-05, + "loss": 2.5969, + "step": 844 + }, + { + "epoch": 0.03, + "grad_norm": 0.6608261466026306, + "learning_rate": 1.9993277379883256e-05, + "loss": 2.5502, + "step": 845 + }, + { + "epoch": 0.03, + "grad_norm": 0.6478691101074219, + "learning_rate": 1.999323835609053e-05, + "loss": 2.5824, + "step": 846 + }, + { + "epoch": 0.03, + "grad_norm": 0.6754261255264282, + "learning_rate": 1.999319921940006e-05, + "loss": 2.5495, + "step": 847 + }, + { + "epoch": 0.03, + "grad_norm": 0.6677714586257935, + "learning_rate": 1.999315996981228e-05, + "loss": 2.5857, + "step": 848 + }, + { + "epoch": 0.03, + "grad_norm": 0.6594604849815369, + "learning_rate": 1.9993120607327632e-05, + "loss": 2.5494, + "step": 849 + }, + { + "epoch": 0.03, + "grad_norm": 0.736385703086853, + "learning_rate": 1.9993081131946566e-05, + "loss": 2.559, + "step": 850 + }, + { + "epoch": 0.03, + "grad_norm": 0.6718853712081909, + "learning_rate": 1.9993041543669522e-05, + "loss": 2.6263, + "step": 851 + }, + { + "epoch": 0.03, + "grad_norm": 0.6250438094139099, + "learning_rate": 1.9993001842496956e-05, + "loss": 2.5529, + "step": 852 + }, + { + "epoch": 0.03, + "grad_norm": 0.6653687953948975, + "learning_rate": 1.999296202842931e-05, + "loss": 2.5763, + "step": 853 + }, + { + "epoch": 0.03, + "grad_norm": 0.6824021339416504, + "learning_rate": 1.9992922101467035e-05, + "loss": 2.5404, + "step": 854 + }, + { + "epoch": 0.03, + "grad_norm": 0.6717216968536377, + "learning_rate": 1.9992882061610585e-05, + "loss": 2.5815, + "step": 855 + }, + { + "epoch": 0.03, + "grad_norm": 0.6455835700035095, + "learning_rate": 1.9992841908860408e-05, + "loss": 2.5123, + "step": 856 + }, + { + "epoch": 0.03, + "grad_norm": 0.6545624136924744, + "learning_rate": 1.9992801643216958e-05, + "loss": 2.552, + "step": 857 + }, + { + "epoch": 0.03, + "grad_norm": 0.6351718306541443, + "learning_rate": 1.9992761264680692e-05, + "loss": 2.5563, + "step": 858 + }, + { + "epoch": 0.03, + "grad_norm": 0.6632485389709473, + "learning_rate": 1.999272077325207e-05, + "loss": 2.5478, + "step": 859 + }, + { + "epoch": 0.03, + "grad_norm": 0.6408211588859558, + "learning_rate": 1.9992680168931537e-05, + "loss": 2.5527, + "step": 860 + }, + { + "epoch": 0.03, + "grad_norm": 0.668522298336029, + "learning_rate": 1.9992639451719563e-05, + "loss": 2.5704, + "step": 861 + }, + { + "epoch": 0.03, + "grad_norm": 0.6781651973724365, + "learning_rate": 1.999259862161661e-05, + "loss": 2.5559, + "step": 862 + }, + { + "epoch": 0.03, + "grad_norm": 0.687554657459259, + "learning_rate": 1.9992557678623127e-05, + "loss": 2.528, + "step": 863 + }, + { + "epoch": 0.03, + "grad_norm": 0.6585277318954468, + "learning_rate": 1.9992516622739588e-05, + "loss": 2.5532, + "step": 864 + }, + { + "epoch": 0.03, + "grad_norm": 0.6704707145690918, + "learning_rate": 1.9992475453966448e-05, + "loss": 2.52, + "step": 865 + }, + { + "epoch": 0.03, + "grad_norm": 0.6548302173614502, + "learning_rate": 1.9992434172304182e-05, + "loss": 2.6041, + "step": 866 + }, + { + "epoch": 0.03, + "grad_norm": 0.6596145629882812, + "learning_rate": 1.999239277775325e-05, + "loss": 2.5099, + "step": 867 + }, + { + "epoch": 0.03, + "grad_norm": 0.6465863585472107, + "learning_rate": 1.9992351270314114e-05, + "loss": 2.5177, + "step": 868 + }, + { + "epoch": 0.03, + "grad_norm": 0.653800368309021, + "learning_rate": 1.9992309649987256e-05, + "loss": 2.5483, + "step": 869 + }, + { + "epoch": 0.03, + "grad_norm": 0.6711927056312561, + "learning_rate": 1.999226791677314e-05, + "loss": 2.5558, + "step": 870 + }, + { + "epoch": 0.03, + "grad_norm": 0.7025273442268372, + "learning_rate": 1.9992226070672234e-05, + "loss": 2.6125, + "step": 871 + }, + { + "epoch": 0.03, + "grad_norm": 0.6411182284355164, + "learning_rate": 1.9992184111685012e-05, + "loss": 2.5495, + "step": 872 + }, + { + "epoch": 0.03, + "grad_norm": 0.6790621876716614, + "learning_rate": 1.9992142039811954e-05, + "loss": 2.5984, + "step": 873 + }, + { + "epoch": 0.03, + "grad_norm": 0.633189857006073, + "learning_rate": 1.999209985505353e-05, + "loss": 2.4997, + "step": 874 + }, + { + "epoch": 0.03, + "grad_norm": 0.6589198708534241, + "learning_rate": 1.9992057557410218e-05, + "loss": 2.5168, + "step": 875 + }, + { + "epoch": 0.03, + "grad_norm": 0.6354584097862244, + "learning_rate": 1.9992015146882495e-05, + "loss": 2.5859, + "step": 876 + }, + { + "epoch": 0.03, + "grad_norm": 0.6384302973747253, + "learning_rate": 1.999197262347084e-05, + "loss": 2.5465, + "step": 877 + }, + { + "epoch": 0.03, + "grad_norm": 0.6694098711013794, + "learning_rate": 1.9991929987175734e-05, + "loss": 2.5271, + "step": 878 + }, + { + "epoch": 0.03, + "grad_norm": 0.658335268497467, + "learning_rate": 1.9991887237997663e-05, + "loss": 2.5177, + "step": 879 + }, + { + "epoch": 0.03, + "grad_norm": 0.654478907585144, + "learning_rate": 1.9991844375937103e-05, + "loss": 2.5239, + "step": 880 + }, + { + "epoch": 0.03, + "grad_norm": 0.6494770050048828, + "learning_rate": 1.999180140099454e-05, + "loss": 2.6151, + "step": 881 + }, + { + "epoch": 0.03, + "grad_norm": 0.6490781307220459, + "learning_rate": 1.999175831317046e-05, + "loss": 2.5109, + "step": 882 + }, + { + "epoch": 0.03, + "grad_norm": 0.6607816815376282, + "learning_rate": 1.999171511246535e-05, + "loss": 2.5087, + "step": 883 + }, + { + "epoch": 0.03, + "grad_norm": 0.6606966853141785, + "learning_rate": 1.9991671798879705e-05, + "loss": 2.5071, + "step": 884 + }, + { + "epoch": 0.03, + "grad_norm": 0.6823313236236572, + "learning_rate": 1.9991628372414e-05, + "loss": 2.6367, + "step": 885 + }, + { + "epoch": 0.03, + "grad_norm": 0.6771349906921387, + "learning_rate": 1.9991584833068738e-05, + "loss": 2.569, + "step": 886 + }, + { + "epoch": 0.03, + "grad_norm": 0.6409324407577515, + "learning_rate": 1.9991541180844403e-05, + "loss": 2.5392, + "step": 887 + }, + { + "epoch": 0.03, + "grad_norm": 0.6323897242546082, + "learning_rate": 1.9991497415741492e-05, + "loss": 2.5465, + "step": 888 + }, + { + "epoch": 0.03, + "grad_norm": 0.705708384513855, + "learning_rate": 1.9991453537760498e-05, + "loss": 2.5695, + "step": 889 + }, + { + "epoch": 0.03, + "grad_norm": 0.6647756099700928, + "learning_rate": 1.999140954690192e-05, + "loss": 2.5555, + "step": 890 + }, + { + "epoch": 0.03, + "grad_norm": 0.6726013422012329, + "learning_rate": 1.9991365443166253e-05, + "loss": 2.5399, + "step": 891 + }, + { + "epoch": 0.03, + "grad_norm": 0.6620315909385681, + "learning_rate": 1.9991321226553992e-05, + "loss": 2.573, + "step": 892 + }, + { + "epoch": 0.03, + "grad_norm": 0.674985408782959, + "learning_rate": 1.9991276897065642e-05, + "loss": 2.4754, + "step": 893 + }, + { + "epoch": 0.03, + "grad_norm": 0.6528021097183228, + "learning_rate": 1.99912324547017e-05, + "loss": 2.5645, + "step": 894 + }, + { + "epoch": 0.03, + "grad_norm": 0.6696645021438599, + "learning_rate": 1.9991187899462668e-05, + "loss": 2.5324, + "step": 895 + }, + { + "epoch": 0.03, + "grad_norm": 0.6654466390609741, + "learning_rate": 1.9991143231349052e-05, + "loss": 2.5137, + "step": 896 + }, + { + "epoch": 0.03, + "grad_norm": 0.7002142667770386, + "learning_rate": 1.9991098450361354e-05, + "loss": 2.5477, + "step": 897 + }, + { + "epoch": 0.03, + "grad_norm": 0.6448867321014404, + "learning_rate": 1.999105355650008e-05, + "loss": 2.5726, + "step": 898 + }, + { + "epoch": 0.03, + "grad_norm": 0.6595983505249023, + "learning_rate": 1.999100854976574e-05, + "loss": 2.544, + "step": 899 + }, + { + "epoch": 0.03, + "grad_norm": 0.6471636295318604, + "learning_rate": 1.999096343015884e-05, + "loss": 2.5599, + "step": 900 + }, + { + "epoch": 0.03, + "grad_norm": 0.669156551361084, + "learning_rate": 1.9990918197679893e-05, + "loss": 2.5373, + "step": 901 + }, + { + "epoch": 0.03, + "grad_norm": 0.6504368782043457, + "learning_rate": 1.9990872852329402e-05, + "loss": 2.6072, + "step": 902 + }, + { + "epoch": 0.03, + "grad_norm": 0.651648759841919, + "learning_rate": 1.999082739410789e-05, + "loss": 2.5806, + "step": 903 + }, + { + "epoch": 0.03, + "grad_norm": 0.7247188687324524, + "learning_rate": 1.9990781823015863e-05, + "loss": 2.557, + "step": 904 + }, + { + "epoch": 0.03, + "grad_norm": 0.7064061164855957, + "learning_rate": 1.9990736139053838e-05, + "loss": 2.5846, + "step": 905 + }, + { + "epoch": 0.03, + "grad_norm": 0.6628711223602295, + "learning_rate": 1.999069034222233e-05, + "loss": 2.5903, + "step": 906 + }, + { + "epoch": 0.03, + "grad_norm": 0.6578488349914551, + "learning_rate": 1.9990644432521862e-05, + "loss": 2.5834, + "step": 907 + }, + { + "epoch": 0.03, + "grad_norm": 0.6611040234565735, + "learning_rate": 1.9990598409952944e-05, + "loss": 2.5381, + "step": 908 + }, + { + "epoch": 0.03, + "grad_norm": 0.6613810062408447, + "learning_rate": 1.9990552274516104e-05, + "loss": 2.5014, + "step": 909 + }, + { + "epoch": 0.03, + "grad_norm": 0.6636083722114563, + "learning_rate": 1.9990506026211856e-05, + "loss": 2.58, + "step": 910 + }, + { + "epoch": 0.03, + "grad_norm": 0.6571870446205139, + "learning_rate": 1.9990459665040728e-05, + "loss": 2.5282, + "step": 911 + }, + { + "epoch": 0.03, + "grad_norm": 0.7053223848342896, + "learning_rate": 1.9990413191003243e-05, + "loss": 2.5964, + "step": 912 + }, + { + "epoch": 0.03, + "grad_norm": 0.6731041073799133, + "learning_rate": 1.999036660409992e-05, + "loss": 2.5569, + "step": 913 + }, + { + "epoch": 0.03, + "grad_norm": 0.6329122185707092, + "learning_rate": 1.9990319904331293e-05, + "loss": 2.5011, + "step": 914 + }, + { + "epoch": 0.03, + "grad_norm": 0.6895326972007751, + "learning_rate": 1.9990273091697886e-05, + "loss": 2.5158, + "step": 915 + }, + { + "epoch": 0.03, + "grad_norm": 0.6591411828994751, + "learning_rate": 1.999022616620023e-05, + "loss": 2.5797, + "step": 916 + }, + { + "epoch": 0.03, + "grad_norm": 0.637761116027832, + "learning_rate": 1.9990179127838854e-05, + "loss": 2.5037, + "step": 917 + }, + { + "epoch": 0.03, + "grad_norm": 0.6519005298614502, + "learning_rate": 1.9990131976614285e-05, + "loss": 2.558, + "step": 918 + }, + { + "epoch": 0.03, + "grad_norm": 0.6424410343170166, + "learning_rate": 1.9990084712527063e-05, + "loss": 2.5329, + "step": 919 + }, + { + "epoch": 0.03, + "grad_norm": 0.6602208614349365, + "learning_rate": 1.9990037335577718e-05, + "loss": 2.6004, + "step": 920 + }, + { + "epoch": 0.03, + "grad_norm": 0.6828067302703857, + "learning_rate": 1.9989989845766785e-05, + "loss": 2.5192, + "step": 921 + }, + { + "epoch": 0.03, + "grad_norm": 0.6715533137321472, + "learning_rate": 1.99899422430948e-05, + "loss": 2.5238, + "step": 922 + }, + { + "epoch": 0.03, + "grad_norm": 0.6458566784858704, + "learning_rate": 1.9989894527562308e-05, + "loss": 2.5603, + "step": 923 + }, + { + "epoch": 0.03, + "grad_norm": 0.6470980048179626, + "learning_rate": 1.998984669916984e-05, + "loss": 2.5343, + "step": 924 + }, + { + "epoch": 0.03, + "grad_norm": 0.6514043211936951, + "learning_rate": 1.9989798757917936e-05, + "loss": 2.5131, + "step": 925 + }, + { + "epoch": 0.03, + "grad_norm": 0.6701914668083191, + "learning_rate": 1.998975070380714e-05, + "loss": 2.514, + "step": 926 + }, + { + "epoch": 0.03, + "grad_norm": 0.6525755524635315, + "learning_rate": 1.9989702536837998e-05, + "loss": 2.5436, + "step": 927 + }, + { + "epoch": 0.03, + "grad_norm": 0.6849528551101685, + "learning_rate": 1.998965425701105e-05, + "loss": 2.5781, + "step": 928 + }, + { + "epoch": 0.03, + "grad_norm": 0.636716365814209, + "learning_rate": 1.9989605864326842e-05, + "loss": 2.5617, + "step": 929 + }, + { + "epoch": 0.03, + "grad_norm": 0.6573632955551147, + "learning_rate": 1.9989557358785923e-05, + "loss": 2.5828, + "step": 930 + }, + { + "epoch": 0.03, + "grad_norm": 0.6261988878250122, + "learning_rate": 1.998950874038884e-05, + "loss": 2.5312, + "step": 931 + }, + { + "epoch": 0.03, + "grad_norm": 0.6581807732582092, + "learning_rate": 1.9989460009136138e-05, + "loss": 2.4843, + "step": 932 + }, + { + "epoch": 0.03, + "grad_norm": 0.6432085037231445, + "learning_rate": 1.9989411165028373e-05, + "loss": 2.6105, + "step": 933 + }, + { + "epoch": 0.03, + "grad_norm": 0.6609717011451721, + "learning_rate": 1.9989362208066096e-05, + "loss": 2.5045, + "step": 934 + }, + { + "epoch": 0.03, + "grad_norm": 0.6460633277893066, + "learning_rate": 1.998931313824986e-05, + "loss": 2.5195, + "step": 935 + }, + { + "epoch": 0.03, + "grad_norm": 0.651472270488739, + "learning_rate": 1.9989263955580217e-05, + "loss": 2.5869, + "step": 936 + }, + { + "epoch": 0.03, + "grad_norm": 0.6696816682815552, + "learning_rate": 1.9989214660057722e-05, + "loss": 2.5588, + "step": 937 + }, + { + "epoch": 0.03, + "grad_norm": 0.6702761650085449, + "learning_rate": 1.9989165251682937e-05, + "loss": 2.4629, + "step": 938 + }, + { + "epoch": 0.03, + "grad_norm": 0.633421778678894, + "learning_rate": 1.9989115730456415e-05, + "loss": 2.5433, + "step": 939 + }, + { + "epoch": 0.03, + "grad_norm": 0.6504607200622559, + "learning_rate": 1.998906609637872e-05, + "loss": 2.5454, + "step": 940 + }, + { + "epoch": 0.03, + "grad_norm": 0.6157642602920532, + "learning_rate": 1.9989016349450413e-05, + "loss": 2.4893, + "step": 941 + }, + { + "epoch": 0.03, + "grad_norm": 0.6734313368797302, + "learning_rate": 1.998896648967205e-05, + "loss": 2.533, + "step": 942 + }, + { + "epoch": 0.03, + "grad_norm": 0.7046616673469543, + "learning_rate": 1.9988916517044193e-05, + "loss": 2.5584, + "step": 943 + }, + { + "epoch": 0.03, + "grad_norm": 0.6411042809486389, + "learning_rate": 1.9988866431567418e-05, + "loss": 2.5767, + "step": 944 + }, + { + "epoch": 0.03, + "grad_norm": 0.7102134227752686, + "learning_rate": 1.9988816233242283e-05, + "loss": 2.4594, + "step": 945 + }, + { + "epoch": 0.03, + "grad_norm": 0.6810867190361023, + "learning_rate": 1.998876592206935e-05, + "loss": 2.5435, + "step": 946 + }, + { + "epoch": 0.03, + "grad_norm": 0.6689637303352356, + "learning_rate": 1.99887154980492e-05, + "loss": 2.5045, + "step": 947 + }, + { + "epoch": 0.03, + "grad_norm": 0.6038787364959717, + "learning_rate": 1.99886649611824e-05, + "loss": 2.5357, + "step": 948 + }, + { + "epoch": 0.03, + "grad_norm": 0.6568419337272644, + "learning_rate": 1.998861431146951e-05, + "loss": 2.5547, + "step": 949 + }, + { + "epoch": 0.03, + "grad_norm": 0.691541850566864, + "learning_rate": 1.998856354891111e-05, + "loss": 2.4988, + "step": 950 + }, + { + "epoch": 0.03, + "grad_norm": 0.6747137904167175, + "learning_rate": 1.9988512673507778e-05, + "loss": 2.5228, + "step": 951 + }, + { + "epoch": 0.03, + "grad_norm": 0.6712235808372498, + "learning_rate": 1.998846168526008e-05, + "loss": 2.5865, + "step": 952 + }, + { + "epoch": 0.03, + "grad_norm": 0.6347312927246094, + "learning_rate": 1.9988410584168595e-05, + "loss": 2.5126, + "step": 953 + }, + { + "epoch": 0.03, + "grad_norm": 0.6735199689865112, + "learning_rate": 1.9988359370233904e-05, + "loss": 2.5157, + "step": 954 + }, + { + "epoch": 0.03, + "grad_norm": 0.6382014751434326, + "learning_rate": 1.998830804345658e-05, + "loss": 2.4868, + "step": 955 + }, + { + "epoch": 0.03, + "grad_norm": 0.6418236494064331, + "learning_rate": 1.9988256603837204e-05, + "loss": 2.5458, + "step": 956 + }, + { + "epoch": 0.03, + "grad_norm": 0.6546878814697266, + "learning_rate": 1.9988205051376363e-05, + "loss": 2.559, + "step": 957 + }, + { + "epoch": 0.03, + "grad_norm": 0.6818661093711853, + "learning_rate": 1.998815338607463e-05, + "loss": 2.5724, + "step": 958 + }, + { + "epoch": 0.03, + "grad_norm": 0.6850147247314453, + "learning_rate": 1.9988101607932597e-05, + "loss": 2.5586, + "step": 959 + }, + { + "epoch": 0.03, + "grad_norm": 0.649859607219696, + "learning_rate": 1.9988049716950845e-05, + "loss": 2.5217, + "step": 960 + }, + { + "epoch": 0.03, + "grad_norm": 0.6329843401908875, + "learning_rate": 1.998799771312996e-05, + "loss": 2.5878, + "step": 961 + }, + { + "epoch": 0.03, + "grad_norm": 0.6516240239143372, + "learning_rate": 1.998794559647053e-05, + "loss": 2.5493, + "step": 962 + }, + { + "epoch": 0.03, + "grad_norm": 0.6538608074188232, + "learning_rate": 1.9987893366973145e-05, + "loss": 2.5151, + "step": 963 + }, + { + "epoch": 0.03, + "grad_norm": 0.6739526391029358, + "learning_rate": 1.9987841024638397e-05, + "loss": 2.5796, + "step": 964 + }, + { + "epoch": 0.03, + "grad_norm": 0.6733515858650208, + "learning_rate": 1.9987788569466873e-05, + "loss": 2.53, + "step": 965 + }, + { + "epoch": 0.03, + "grad_norm": 0.648504912853241, + "learning_rate": 1.9987736001459167e-05, + "loss": 2.5676, + "step": 966 + }, + { + "epoch": 0.03, + "grad_norm": 0.6834846138954163, + "learning_rate": 1.998768332061587e-05, + "loss": 2.5821, + "step": 967 + }, + { + "epoch": 0.03, + "grad_norm": 0.6586293578147888, + "learning_rate": 1.9987630526937584e-05, + "loss": 2.5652, + "step": 968 + }, + { + "epoch": 0.03, + "grad_norm": 0.6561817526817322, + "learning_rate": 1.99875776204249e-05, + "loss": 2.5752, + "step": 969 + }, + { + "epoch": 0.03, + "grad_norm": 0.6438115835189819, + "learning_rate": 1.9987524601078415e-05, + "loss": 2.5822, + "step": 970 + }, + { + "epoch": 0.03, + "grad_norm": 0.6456990838050842, + "learning_rate": 1.9987471468898732e-05, + "loss": 2.5854, + "step": 971 + }, + { + "epoch": 0.03, + "grad_norm": 0.6499293446540833, + "learning_rate": 1.998741822388645e-05, + "loss": 2.5406, + "step": 972 + }, + { + "epoch": 0.03, + "grad_norm": 0.6462386846542358, + "learning_rate": 1.9987364866042172e-05, + "loss": 2.5013, + "step": 973 + }, + { + "epoch": 0.03, + "grad_norm": 0.6346514821052551, + "learning_rate": 1.9987311395366497e-05, + "loss": 2.5343, + "step": 974 + }, + { + "epoch": 0.03, + "grad_norm": 0.6457494497299194, + "learning_rate": 1.9987257811860033e-05, + "loss": 2.5178, + "step": 975 + }, + { + "epoch": 0.03, + "grad_norm": 0.6581549644470215, + "learning_rate": 1.9987204115523383e-05, + "loss": 2.5252, + "step": 976 + }, + { + "epoch": 0.03, + "grad_norm": 0.6944414973258972, + "learning_rate": 1.9987150306357154e-05, + "loss": 2.5317, + "step": 977 + }, + { + "epoch": 0.03, + "grad_norm": 0.6609979271888733, + "learning_rate": 1.998709638436195e-05, + "loss": 2.4422, + "step": 978 + }, + { + "epoch": 0.03, + "grad_norm": 0.6353131532669067, + "learning_rate": 1.9987042349538386e-05, + "loss": 2.5222, + "step": 979 + }, + { + "epoch": 0.03, + "grad_norm": 0.6307664513587952, + "learning_rate": 1.998698820188707e-05, + "loss": 2.4825, + "step": 980 + }, + { + "epoch": 0.03, + "grad_norm": 0.64664226770401, + "learning_rate": 1.9986933941408617e-05, + "loss": 2.5223, + "step": 981 + }, + { + "epoch": 0.03, + "grad_norm": 0.6608175039291382, + "learning_rate": 1.9986879568103635e-05, + "loss": 2.5063, + "step": 982 + }, + { + "epoch": 0.03, + "grad_norm": 0.6379284262657166, + "learning_rate": 1.9986825081972743e-05, + "loss": 2.4928, + "step": 983 + }, + { + "epoch": 0.03, + "grad_norm": 0.649713397026062, + "learning_rate": 1.998677048301655e-05, + "loss": 2.497, + "step": 984 + }, + { + "epoch": 0.03, + "grad_norm": 0.7069547772407532, + "learning_rate": 1.998671577123568e-05, + "loss": 2.5541, + "step": 985 + }, + { + "epoch": 0.03, + "grad_norm": 0.6581557989120483, + "learning_rate": 1.9986660946630747e-05, + "loss": 2.5545, + "step": 986 + }, + { + "epoch": 0.03, + "grad_norm": 0.6670517325401306, + "learning_rate": 1.9986606009202372e-05, + "loss": 2.5601, + "step": 987 + }, + { + "epoch": 0.03, + "grad_norm": 0.6529316306114197, + "learning_rate": 1.9986550958951173e-05, + "loss": 2.5066, + "step": 988 + }, + { + "epoch": 0.03, + "grad_norm": 0.649377167224884, + "learning_rate": 1.9986495795877777e-05, + "loss": 2.5401, + "step": 989 + }, + { + "epoch": 0.03, + "grad_norm": 0.6688002347946167, + "learning_rate": 1.99864405199828e-05, + "loss": 2.566, + "step": 990 + }, + { + "epoch": 0.03, + "grad_norm": 0.6878781318664551, + "learning_rate": 1.9986385131266876e-05, + "loss": 2.5301, + "step": 991 + }, + { + "epoch": 0.03, + "grad_norm": 0.6727731823921204, + "learning_rate": 1.9986329629730624e-05, + "loss": 2.551, + "step": 992 + }, + { + "epoch": 0.03, + "grad_norm": 0.7031287550926208, + "learning_rate": 1.998627401537467e-05, + "loss": 2.5642, + "step": 993 + }, + { + "epoch": 0.03, + "grad_norm": 0.6427720189094543, + "learning_rate": 1.9986218288199644e-05, + "loss": 2.4915, + "step": 994 + }, + { + "epoch": 0.03, + "grad_norm": 0.673617959022522, + "learning_rate": 1.9986162448206177e-05, + "loss": 2.5943, + "step": 995 + }, + { + "epoch": 0.03, + "grad_norm": 0.642113983631134, + "learning_rate": 1.9986106495394905e-05, + "loss": 2.51, + "step": 996 + }, + { + "epoch": 0.03, + "grad_norm": 0.7054376602172852, + "learning_rate": 1.9986050429766447e-05, + "loss": 2.4431, + "step": 997 + }, + { + "epoch": 0.03, + "grad_norm": 0.6631945967674255, + "learning_rate": 1.9985994251321447e-05, + "loss": 2.5198, + "step": 998 + }, + { + "epoch": 0.03, + "grad_norm": 0.6940524578094482, + "learning_rate": 1.9985937960060535e-05, + "loss": 2.5656, + "step": 999 + }, + { + "epoch": 0.03, + "grad_norm": 0.654110312461853, + "learning_rate": 1.998588155598435e-05, + "loss": 2.4896, + "step": 1000 + }, + { + "epoch": 0.03, + "grad_norm": 0.6796788573265076, + "learning_rate": 1.9985825039093524e-05, + "loss": 2.5385, + "step": 1001 + }, + { + "epoch": 0.03, + "grad_norm": 0.662747859954834, + "learning_rate": 1.9985768409388703e-05, + "loss": 2.567, + "step": 1002 + }, + { + "epoch": 0.03, + "grad_norm": 0.6902099251747131, + "learning_rate": 1.998571166687052e-05, + "loss": 2.5387, + "step": 1003 + }, + { + "epoch": 0.03, + "grad_norm": 0.6432726979255676, + "learning_rate": 1.998565481153962e-05, + "loss": 2.4743, + "step": 1004 + }, + { + "epoch": 0.03, + "grad_norm": 0.6323050856590271, + "learning_rate": 1.9985597843396644e-05, + "loss": 2.4914, + "step": 1005 + }, + { + "epoch": 0.03, + "grad_norm": 0.6601359844207764, + "learning_rate": 1.9985540762442237e-05, + "loss": 2.4834, + "step": 1006 + }, + { + "epoch": 0.03, + "grad_norm": 0.6630439758300781, + "learning_rate": 1.998548356867704e-05, + "loss": 2.5815, + "step": 1007 + }, + { + "epoch": 0.03, + "grad_norm": 0.6577600240707397, + "learning_rate": 1.9985426262101702e-05, + "loss": 2.5185, + "step": 1008 + }, + { + "epoch": 0.03, + "grad_norm": 0.6772057414054871, + "learning_rate": 1.9985368842716873e-05, + "loss": 2.49, + "step": 1009 + }, + { + "epoch": 0.03, + "grad_norm": 0.6612805724143982, + "learning_rate": 1.9985311310523193e-05, + "loss": 2.5623, + "step": 1010 + }, + { + "epoch": 0.03, + "grad_norm": 0.6473296880722046, + "learning_rate": 1.9985253665521323e-05, + "loss": 2.5419, + "step": 1011 + }, + { + "epoch": 0.03, + "grad_norm": 0.6880604028701782, + "learning_rate": 1.9985195907711907e-05, + "loss": 2.5163, + "step": 1012 + }, + { + "epoch": 0.03, + "grad_norm": 0.66536945104599, + "learning_rate": 1.9985138037095598e-05, + "loss": 2.5087, + "step": 1013 + }, + { + "epoch": 0.03, + "grad_norm": 0.6414652466773987, + "learning_rate": 1.9985080053673057e-05, + "loss": 2.5284, + "step": 1014 + }, + { + "epoch": 0.03, + "grad_norm": 0.6616004109382629, + "learning_rate": 1.998502195744493e-05, + "loss": 2.5418, + "step": 1015 + }, + { + "epoch": 0.03, + "grad_norm": 0.6728946566581726, + "learning_rate": 1.9984963748411873e-05, + "loss": 2.4478, + "step": 1016 + }, + { + "epoch": 0.03, + "grad_norm": 0.6659784317016602, + "learning_rate": 1.9984905426574553e-05, + "loss": 2.4466, + "step": 1017 + }, + { + "epoch": 0.03, + "grad_norm": 0.6631350517272949, + "learning_rate": 1.998484699193362e-05, + "loss": 2.4974, + "step": 1018 + }, + { + "epoch": 0.03, + "grad_norm": 0.7058129906654358, + "learning_rate": 1.998478844448974e-05, + "loss": 2.5615, + "step": 1019 + }, + { + "epoch": 0.03, + "grad_norm": 0.6937713027000427, + "learning_rate": 1.9984729784243572e-05, + "loss": 2.5515, + "step": 1020 + }, + { + "epoch": 0.03, + "grad_norm": 0.6276131272315979, + "learning_rate": 1.9984671011195776e-05, + "loss": 2.4617, + "step": 1021 + }, + { + "epoch": 0.03, + "grad_norm": 0.6824151277542114, + "learning_rate": 1.9984612125347017e-05, + "loss": 2.4982, + "step": 1022 + }, + { + "epoch": 0.03, + "grad_norm": 0.6697954535484314, + "learning_rate": 1.9984553126697965e-05, + "loss": 2.5127, + "step": 1023 + }, + { + "epoch": 0.03, + "grad_norm": 0.6829949617385864, + "learning_rate": 1.998449401524928e-05, + "loss": 2.5176, + "step": 1024 + }, + { + "epoch": 0.03, + "grad_norm": 0.6801559329032898, + "learning_rate": 1.9984434791001637e-05, + "loss": 2.4857, + "step": 1025 + }, + { + "epoch": 0.03, + "grad_norm": 0.65267413854599, + "learning_rate": 1.9984375453955703e-05, + "loss": 2.5389, + "step": 1026 + }, + { + "epoch": 0.03, + "grad_norm": 0.6562937498092651, + "learning_rate": 1.998431600411214e-05, + "loss": 2.5219, + "step": 1027 + }, + { + "epoch": 0.03, + "grad_norm": 0.6441564559936523, + "learning_rate": 1.998425644147163e-05, + "loss": 2.5182, + "step": 1028 + }, + { + "epoch": 0.03, + "grad_norm": 0.6421415209770203, + "learning_rate": 1.9984196766034842e-05, + "loss": 2.4975, + "step": 1029 + }, + { + "epoch": 0.03, + "grad_norm": 0.6659252643585205, + "learning_rate": 1.998413697780245e-05, + "loss": 2.5263, + "step": 1030 + }, + { + "epoch": 0.03, + "grad_norm": 0.6721411347389221, + "learning_rate": 1.9984077076775132e-05, + "loss": 2.4861, + "step": 1031 + }, + { + "epoch": 0.03, + "grad_norm": 0.6564668416976929, + "learning_rate": 1.9984017062953556e-05, + "loss": 2.5109, + "step": 1032 + }, + { + "epoch": 0.03, + "grad_norm": 0.6405675411224365, + "learning_rate": 1.9983956936338413e-05, + "loss": 2.5029, + "step": 1033 + }, + { + "epoch": 0.03, + "grad_norm": 0.6317880749702454, + "learning_rate": 1.998389669693037e-05, + "loss": 2.4935, + "step": 1034 + }, + { + "epoch": 0.03, + "grad_norm": 0.630470871925354, + "learning_rate": 1.9983836344730116e-05, + "loss": 2.5577, + "step": 1035 + }, + { + "epoch": 0.03, + "grad_norm": 0.6827713847160339, + "learning_rate": 1.998377587973833e-05, + "loss": 2.4935, + "step": 1036 + }, + { + "epoch": 0.03, + "grad_norm": 0.7336617112159729, + "learning_rate": 1.9983715301955696e-05, + "loss": 2.5107, + "step": 1037 + }, + { + "epoch": 0.03, + "grad_norm": 0.6455040574073792, + "learning_rate": 1.9983654611382897e-05, + "loss": 2.4355, + "step": 1038 + }, + { + "epoch": 0.03, + "grad_norm": 0.6398971676826477, + "learning_rate": 1.998359380802062e-05, + "loss": 2.5325, + "step": 1039 + }, + { + "epoch": 0.03, + "grad_norm": 0.6680201292037964, + "learning_rate": 1.998353289186955e-05, + "loss": 2.5276, + "step": 1040 + }, + { + "epoch": 0.03, + "grad_norm": 0.683854877948761, + "learning_rate": 1.9983471862930377e-05, + "loss": 2.4616, + "step": 1041 + }, + { + "epoch": 0.03, + "grad_norm": 0.6794975996017456, + "learning_rate": 1.998341072120379e-05, + "loss": 2.5964, + "step": 1042 + }, + { + "epoch": 0.03, + "grad_norm": 0.6380020976066589, + "learning_rate": 1.9983349466690478e-05, + "loss": 2.5364, + "step": 1043 + }, + { + "epoch": 0.03, + "grad_norm": 0.6510722637176514, + "learning_rate": 1.9983288099391137e-05, + "loss": 2.5136, + "step": 1044 + }, + { + "epoch": 0.03, + "grad_norm": 0.6584125757217407, + "learning_rate": 1.9983226619306455e-05, + "loss": 2.5509, + "step": 1045 + }, + { + "epoch": 0.03, + "grad_norm": 0.6628699898719788, + "learning_rate": 1.998316502643713e-05, + "loss": 2.5028, + "step": 1046 + }, + { + "epoch": 0.03, + "grad_norm": 0.6480531692504883, + "learning_rate": 1.998310332078386e-05, + "loss": 2.4796, + "step": 1047 + }, + { + "epoch": 0.03, + "grad_norm": 0.6770830154418945, + "learning_rate": 1.9983041502347337e-05, + "loss": 2.5208, + "step": 1048 + }, + { + "epoch": 0.03, + "grad_norm": 0.6712511777877808, + "learning_rate": 1.998297957112826e-05, + "loss": 2.5165, + "step": 1049 + }, + { + "epoch": 0.03, + "grad_norm": 0.664597749710083, + "learning_rate": 1.9982917527127334e-05, + "loss": 2.4918, + "step": 1050 + }, + { + "epoch": 0.03, + "grad_norm": 0.6505897641181946, + "learning_rate": 1.9982855370345256e-05, + "loss": 2.5114, + "step": 1051 + }, + { + "epoch": 0.04, + "grad_norm": 0.6663317084312439, + "learning_rate": 1.9982793100782727e-05, + "loss": 2.5902, + "step": 1052 + }, + { + "epoch": 0.04, + "grad_norm": 0.6359817981719971, + "learning_rate": 1.998273071844045e-05, + "loss": 2.5107, + "step": 1053 + }, + { + "epoch": 0.04, + "grad_norm": 0.6670275330543518, + "learning_rate": 1.9982668223319135e-05, + "loss": 2.4613, + "step": 1054 + }, + { + "epoch": 0.04, + "grad_norm": 0.6699387431144714, + "learning_rate": 1.9982605615419483e-05, + "loss": 2.5226, + "step": 1055 + }, + { + "epoch": 0.04, + "grad_norm": 0.6547272801399231, + "learning_rate": 1.9982542894742205e-05, + "loss": 2.5363, + "step": 1056 + }, + { + "epoch": 0.04, + "grad_norm": 0.6491037011146545, + "learning_rate": 1.9982480061288007e-05, + "loss": 2.5274, + "step": 1057 + }, + { + "epoch": 0.04, + "grad_norm": 0.6636024713516235, + "learning_rate": 1.9982417115057598e-05, + "loss": 2.5055, + "step": 1058 + }, + { + "epoch": 0.04, + "grad_norm": 0.7068822979927063, + "learning_rate": 1.9982354056051695e-05, + "loss": 2.5102, + "step": 1059 + }, + { + "epoch": 0.04, + "grad_norm": 0.6638553738594055, + "learning_rate": 1.9982290884271002e-05, + "loss": 2.5082, + "step": 1060 + }, + { + "epoch": 0.04, + "grad_norm": 0.6805009841918945, + "learning_rate": 1.9982227599716237e-05, + "loss": 2.5645, + "step": 1061 + }, + { + "epoch": 0.04, + "grad_norm": 0.6925747990608215, + "learning_rate": 1.9982164202388116e-05, + "loss": 2.5329, + "step": 1062 + }, + { + "epoch": 0.04, + "grad_norm": 0.7054527997970581, + "learning_rate": 1.9982100692287356e-05, + "loss": 2.5673, + "step": 1063 + }, + { + "epoch": 0.04, + "grad_norm": 0.6304382681846619, + "learning_rate": 1.998203706941467e-05, + "loss": 2.4827, + "step": 1064 + }, + { + "epoch": 0.04, + "grad_norm": 0.7037270069122314, + "learning_rate": 1.998197333377078e-05, + "loss": 2.5371, + "step": 1065 + }, + { + "epoch": 0.04, + "grad_norm": 0.6439043879508972, + "learning_rate": 1.9981909485356405e-05, + "loss": 2.4485, + "step": 1066 + }, + { + "epoch": 0.04, + "grad_norm": 0.6447720527648926, + "learning_rate": 1.9981845524172264e-05, + "loss": 2.5516, + "step": 1067 + }, + { + "epoch": 0.04, + "grad_norm": 0.6577944755554199, + "learning_rate": 1.9981781450219086e-05, + "loss": 2.4957, + "step": 1068 + }, + { + "epoch": 0.04, + "grad_norm": 0.6646751761436462, + "learning_rate": 1.998171726349759e-05, + "loss": 2.4641, + "step": 1069 + }, + { + "epoch": 0.04, + "grad_norm": 0.6744557023048401, + "learning_rate": 1.99816529640085e-05, + "loss": 2.4677, + "step": 1070 + }, + { + "epoch": 0.04, + "grad_norm": 0.6579509973526001, + "learning_rate": 1.9981588551752548e-05, + "loss": 2.4634, + "step": 1071 + }, + { + "epoch": 0.04, + "grad_norm": 0.6456015706062317, + "learning_rate": 1.9981524026730453e-05, + "loss": 2.4869, + "step": 1072 + }, + { + "epoch": 0.04, + "grad_norm": 0.6739247441291809, + "learning_rate": 1.9981459388942957e-05, + "loss": 2.5445, + "step": 1073 + }, + { + "epoch": 0.04, + "grad_norm": 0.6222259402275085, + "learning_rate": 1.9981394638390777e-05, + "loss": 2.5074, + "step": 1074 + }, + { + "epoch": 0.04, + "grad_norm": 0.6447417140007019, + "learning_rate": 1.9981329775074653e-05, + "loss": 2.4642, + "step": 1075 + }, + { + "epoch": 0.04, + "grad_norm": 0.6405796408653259, + "learning_rate": 1.9981264798995313e-05, + "loss": 2.509, + "step": 1076 + }, + { + "epoch": 0.04, + "grad_norm": 0.6870517134666443, + "learning_rate": 1.9981199710153495e-05, + "loss": 2.514, + "step": 1077 + }, + { + "epoch": 0.04, + "grad_norm": 0.6250279545783997, + "learning_rate": 1.998113450854993e-05, + "loss": 2.5398, + "step": 1078 + }, + { + "epoch": 0.04, + "grad_norm": 0.6689534187316895, + "learning_rate": 1.9981069194185355e-05, + "loss": 2.5051, + "step": 1079 + }, + { + "epoch": 0.04, + "grad_norm": 0.6547778248786926, + "learning_rate": 1.9981003767060513e-05, + "loss": 2.4934, + "step": 1080 + }, + { + "epoch": 0.04, + "grad_norm": 0.6595370769500732, + "learning_rate": 1.998093822717614e-05, + "loss": 2.4987, + "step": 1081 + }, + { + "epoch": 0.04, + "grad_norm": 0.6451910138130188, + "learning_rate": 1.9980872574532975e-05, + "loss": 2.4533, + "step": 1082 + }, + { + "epoch": 0.04, + "grad_norm": 0.6690686345100403, + "learning_rate": 1.998080680913176e-05, + "loss": 2.5395, + "step": 1083 + }, + { + "epoch": 0.04, + "grad_norm": 0.6448755264282227, + "learning_rate": 1.998074093097324e-05, + "loss": 2.5186, + "step": 1084 + }, + { + "epoch": 0.04, + "grad_norm": 0.6348246335983276, + "learning_rate": 1.9980674940058163e-05, + "loss": 2.5081, + "step": 1085 + }, + { + "epoch": 0.04, + "grad_norm": 0.628466010093689, + "learning_rate": 1.9980608836387263e-05, + "loss": 2.4754, + "step": 1086 + }, + { + "epoch": 0.04, + "grad_norm": 0.7128363847732544, + "learning_rate": 1.9980542619961298e-05, + "loss": 2.5362, + "step": 1087 + }, + { + "epoch": 0.04, + "grad_norm": 0.6283236145973206, + "learning_rate": 1.9980476290781007e-05, + "loss": 2.5807, + "step": 1088 + }, + { + "epoch": 0.04, + "grad_norm": 0.6560370326042175, + "learning_rate": 1.998040984884715e-05, + "loss": 2.5315, + "step": 1089 + }, + { + "epoch": 0.04, + "grad_norm": 0.6526508927345276, + "learning_rate": 1.998034329416047e-05, + "loss": 2.5156, + "step": 1090 + }, + { + "epoch": 0.04, + "grad_norm": 0.6681846976280212, + "learning_rate": 1.9980276626721714e-05, + "loss": 2.5672, + "step": 1091 + }, + { + "epoch": 0.04, + "grad_norm": 0.6755825281143188, + "learning_rate": 1.998020984653165e-05, + "loss": 2.4959, + "step": 1092 + }, + { + "epoch": 0.04, + "grad_norm": 0.6869197487831116, + "learning_rate": 1.9980142953591017e-05, + "loss": 2.4981, + "step": 1093 + }, + { + "epoch": 0.04, + "grad_norm": 0.6513407826423645, + "learning_rate": 1.9980075947900582e-05, + "loss": 2.5005, + "step": 1094 + }, + { + "epoch": 0.04, + "grad_norm": 0.6888585686683655, + "learning_rate": 1.9980008829461097e-05, + "loss": 2.5683, + "step": 1095 + }, + { + "epoch": 0.04, + "grad_norm": 0.6763022541999817, + "learning_rate": 1.9979941598273323e-05, + "loss": 2.5077, + "step": 1096 + }, + { + "epoch": 0.04, + "grad_norm": 0.6463805437088013, + "learning_rate": 1.9979874254338014e-05, + "loss": 2.5235, + "step": 1097 + }, + { + "epoch": 0.04, + "grad_norm": 0.6434422731399536, + "learning_rate": 1.997980679765594e-05, + "loss": 2.5124, + "step": 1098 + }, + { + "epoch": 0.04, + "grad_norm": 0.6373206973075867, + "learning_rate": 1.9979739228227852e-05, + "loss": 2.4345, + "step": 1099 + }, + { + "epoch": 0.04, + "grad_norm": 0.6587280035018921, + "learning_rate": 1.9979671546054523e-05, + "loss": 2.5792, + "step": 1100 + }, + { + "epoch": 0.04, + "grad_norm": 0.7423656582832336, + "learning_rate": 1.9979603751136708e-05, + "loss": 2.4723, + "step": 1101 + }, + { + "epoch": 0.04, + "grad_norm": 0.6750621795654297, + "learning_rate": 1.9979535843475185e-05, + "loss": 2.5118, + "step": 1102 + }, + { + "epoch": 0.04, + "grad_norm": 0.687773585319519, + "learning_rate": 1.997946782307071e-05, + "loss": 2.5273, + "step": 1103 + }, + { + "epoch": 0.04, + "grad_norm": 0.6503051519393921, + "learning_rate": 1.9979399689924058e-05, + "loss": 2.4664, + "step": 1104 + }, + { + "epoch": 0.04, + "grad_norm": 0.6419747471809387, + "learning_rate": 1.9979331444035998e-05, + "loss": 2.535, + "step": 1105 + }, + { + "epoch": 0.04, + "grad_norm": 0.664393961429596, + "learning_rate": 1.9979263085407297e-05, + "loss": 2.5013, + "step": 1106 + }, + { + "epoch": 0.04, + "grad_norm": 0.6789538860321045, + "learning_rate": 1.997919461403873e-05, + "loss": 2.5463, + "step": 1107 + }, + { + "epoch": 0.04, + "grad_norm": 0.6544622778892517, + "learning_rate": 1.9979126029931072e-05, + "loss": 2.4452, + "step": 1108 + }, + { + "epoch": 0.04, + "grad_norm": 0.6510677337646484, + "learning_rate": 1.9979057333085097e-05, + "loss": 2.5176, + "step": 1109 + }, + { + "epoch": 0.04, + "grad_norm": 0.6380664110183716, + "learning_rate": 1.9978988523501578e-05, + "loss": 2.5324, + "step": 1110 + }, + { + "epoch": 0.04, + "grad_norm": 0.6380454897880554, + "learning_rate": 1.99789196011813e-05, + "loss": 2.4898, + "step": 1111 + }, + { + "epoch": 0.04, + "grad_norm": 0.6302733421325684, + "learning_rate": 1.997885056612503e-05, + "loss": 2.51, + "step": 1112 + }, + { + "epoch": 0.04, + "grad_norm": 0.6730328798294067, + "learning_rate": 1.997878141833356e-05, + "loss": 2.5229, + "step": 1113 + }, + { + "epoch": 0.04, + "grad_norm": 0.6901289820671082, + "learning_rate": 1.997871215780766e-05, + "loss": 2.4794, + "step": 1114 + }, + { + "epoch": 0.04, + "grad_norm": 0.6590227484703064, + "learning_rate": 1.9978642784548127e-05, + "loss": 2.5325, + "step": 1115 + }, + { + "epoch": 0.04, + "grad_norm": 0.6847108602523804, + "learning_rate": 1.997857329855573e-05, + "loss": 2.4953, + "step": 1116 + }, + { + "epoch": 0.04, + "grad_norm": 0.7078675031661987, + "learning_rate": 1.997850369983126e-05, + "loss": 2.5027, + "step": 1117 + }, + { + "epoch": 0.04, + "grad_norm": 0.6547713279724121, + "learning_rate": 1.9978433988375504e-05, + "loss": 2.4926, + "step": 1118 + }, + { + "epoch": 0.04, + "grad_norm": 0.6798385977745056, + "learning_rate": 1.997836416418925e-05, + "loss": 2.4743, + "step": 1119 + }, + { + "epoch": 0.04, + "grad_norm": 0.6295072436332703, + "learning_rate": 1.9978294227273283e-05, + "loss": 2.5097, + "step": 1120 + }, + { + "epoch": 0.04, + "grad_norm": 0.6345159411430359, + "learning_rate": 1.9978224177628396e-05, + "loss": 2.5039, + "step": 1121 + }, + { + "epoch": 0.04, + "grad_norm": 0.622097909450531, + "learning_rate": 1.997815401525538e-05, + "loss": 2.4708, + "step": 1122 + }, + { + "epoch": 0.04, + "grad_norm": 0.6950234174728394, + "learning_rate": 1.997808374015503e-05, + "loss": 2.4114, + "step": 1123 + }, + { + "epoch": 0.04, + "grad_norm": 0.6793590188026428, + "learning_rate": 1.9978013352328135e-05, + "loss": 2.5618, + "step": 1124 + }, + { + "epoch": 0.04, + "grad_norm": 0.6684526205062866, + "learning_rate": 1.9977942851775497e-05, + "loss": 2.5244, + "step": 1125 + }, + { + "epoch": 0.04, + "grad_norm": 0.6539324522018433, + "learning_rate": 1.9977872238497902e-05, + "loss": 2.478, + "step": 1126 + }, + { + "epoch": 0.04, + "grad_norm": 0.6612284779548645, + "learning_rate": 1.9977801512496156e-05, + "loss": 2.5333, + "step": 1127 + }, + { + "epoch": 0.04, + "grad_norm": 0.6670842170715332, + "learning_rate": 1.997773067377106e-05, + "loss": 2.5114, + "step": 1128 + }, + { + "epoch": 0.04, + "grad_norm": 0.7031385898590088, + "learning_rate": 1.9977659722323407e-05, + "loss": 2.5545, + "step": 1129 + }, + { + "epoch": 0.04, + "grad_norm": 0.7332308888435364, + "learning_rate": 1.9977588658154003e-05, + "loss": 2.4543, + "step": 1130 + }, + { + "epoch": 0.04, + "grad_norm": 0.6536996960639954, + "learning_rate": 1.997751748126365e-05, + "loss": 2.5495, + "step": 1131 + }, + { + "epoch": 0.04, + "grad_norm": 0.6535505652427673, + "learning_rate": 1.9977446191653153e-05, + "loss": 2.4958, + "step": 1132 + }, + { + "epoch": 0.04, + "grad_norm": 0.6539435386657715, + "learning_rate": 1.997737478932331e-05, + "loss": 2.5181, + "step": 1133 + }, + { + "epoch": 0.04, + "grad_norm": 0.6357453465461731, + "learning_rate": 1.997730327427494e-05, + "loss": 2.5334, + "step": 1134 + }, + { + "epoch": 0.04, + "grad_norm": 0.6689437031745911, + "learning_rate": 1.9977231646508845e-05, + "loss": 2.4705, + "step": 1135 + }, + { + "epoch": 0.04, + "grad_norm": 0.6831153631210327, + "learning_rate": 1.997715990602583e-05, + "loss": 2.5026, + "step": 1136 + }, + { + "epoch": 0.04, + "grad_norm": 0.6624339818954468, + "learning_rate": 1.9977088052826713e-05, + "loss": 2.4989, + "step": 1137 + }, + { + "epoch": 0.04, + "grad_norm": 0.6393553018569946, + "learning_rate": 1.99770160869123e-05, + "loss": 2.5103, + "step": 1138 + }, + { + "epoch": 0.04, + "grad_norm": 0.6625566482543945, + "learning_rate": 1.997694400828341e-05, + "loss": 2.501, + "step": 1139 + }, + { + "epoch": 0.04, + "grad_norm": 0.6858672499656677, + "learning_rate": 1.9976871816940854e-05, + "loss": 2.5205, + "step": 1140 + }, + { + "epoch": 0.04, + "grad_norm": 0.7091208100318909, + "learning_rate": 1.997679951288544e-05, + "loss": 2.4409, + "step": 1141 + }, + { + "epoch": 0.04, + "grad_norm": 0.6620068550109863, + "learning_rate": 1.9976727096117997e-05, + "loss": 2.4803, + "step": 1142 + }, + { + "epoch": 0.04, + "grad_norm": 0.643110990524292, + "learning_rate": 1.9976654566639338e-05, + "loss": 2.4967, + "step": 1143 + }, + { + "epoch": 0.04, + "grad_norm": 0.686629593372345, + "learning_rate": 1.9976581924450284e-05, + "loss": 2.4724, + "step": 1144 + }, + { + "epoch": 0.04, + "grad_norm": 0.650425136089325, + "learning_rate": 1.9976509169551655e-05, + "loss": 2.5077, + "step": 1145 + }, + { + "epoch": 0.04, + "grad_norm": 0.6549974083900452, + "learning_rate": 1.997643630194427e-05, + "loss": 2.4616, + "step": 1146 + }, + { + "epoch": 0.04, + "grad_norm": 0.6313915848731995, + "learning_rate": 1.9976363321628957e-05, + "loss": 2.4944, + "step": 1147 + }, + { + "epoch": 0.04, + "grad_norm": 0.6537429094314575, + "learning_rate": 1.997629022860653e-05, + "loss": 2.492, + "step": 1148 + }, + { + "epoch": 0.04, + "grad_norm": 0.6742879152297974, + "learning_rate": 1.997621702287783e-05, + "loss": 2.5183, + "step": 1149 + }, + { + "epoch": 0.04, + "grad_norm": 0.6380449533462524, + "learning_rate": 1.9976143704443676e-05, + "loss": 2.5207, + "step": 1150 + }, + { + "epoch": 0.04, + "grad_norm": 0.7004801034927368, + "learning_rate": 1.9976070273304896e-05, + "loss": 2.5485, + "step": 1151 + }, + { + "epoch": 0.04, + "grad_norm": 0.7349691390991211, + "learning_rate": 1.997599672946232e-05, + "loss": 2.4807, + "step": 1152 + }, + { + "epoch": 0.04, + "grad_norm": 0.650223970413208, + "learning_rate": 1.997592307291678e-05, + "loss": 2.4691, + "step": 1153 + }, + { + "epoch": 0.04, + "grad_norm": 0.6400137543678284, + "learning_rate": 1.9975849303669104e-05, + "loss": 2.4726, + "step": 1154 + }, + { + "epoch": 0.04, + "grad_norm": 0.6403338313102722, + "learning_rate": 1.9975775421720135e-05, + "loss": 2.4817, + "step": 1155 + }, + { + "epoch": 0.04, + "grad_norm": 0.6457794308662415, + "learning_rate": 1.99757014270707e-05, + "loss": 2.5177, + "step": 1156 + }, + { + "epoch": 0.04, + "grad_norm": 0.688164472579956, + "learning_rate": 1.9975627319721635e-05, + "loss": 2.5542, + "step": 1157 + }, + { + "epoch": 0.04, + "grad_norm": 0.7092720866203308, + "learning_rate": 1.997555309967378e-05, + "loss": 2.5016, + "step": 1158 + }, + { + "epoch": 0.04, + "grad_norm": 0.7378236651420593, + "learning_rate": 1.9975478766927973e-05, + "loss": 2.504, + "step": 1159 + }, + { + "epoch": 0.04, + "grad_norm": 0.6497578024864197, + "learning_rate": 1.997540432148505e-05, + "loss": 2.4617, + "step": 1160 + }, + { + "epoch": 0.04, + "grad_norm": 0.6363285183906555, + "learning_rate": 1.997532976334586e-05, + "loss": 2.5158, + "step": 1161 + }, + { + "epoch": 0.04, + "grad_norm": 0.6313586831092834, + "learning_rate": 1.9975255092511236e-05, + "loss": 2.5007, + "step": 1162 + }, + { + "epoch": 0.04, + "grad_norm": 0.6521604657173157, + "learning_rate": 1.9975180308982027e-05, + "loss": 2.5119, + "step": 1163 + }, + { + "epoch": 0.04, + "grad_norm": 0.6570411324501038, + "learning_rate": 1.997510541275908e-05, + "loss": 2.4148, + "step": 1164 + }, + { + "epoch": 0.04, + "grad_norm": 0.6731036305427551, + "learning_rate": 1.9975030403843232e-05, + "loss": 2.4901, + "step": 1165 + }, + { + "epoch": 0.04, + "grad_norm": 0.6561059951782227, + "learning_rate": 1.9974955282235343e-05, + "loss": 2.4606, + "step": 1166 + }, + { + "epoch": 0.04, + "grad_norm": 0.6508211493492126, + "learning_rate": 1.997488004793625e-05, + "loss": 2.5522, + "step": 1167 + }, + { + "epoch": 0.04, + "grad_norm": 0.6997376680374146, + "learning_rate": 1.997480470094681e-05, + "loss": 2.5891, + "step": 1168 + }, + { + "epoch": 0.04, + "grad_norm": 0.6416847109794617, + "learning_rate": 1.9974729241267875e-05, + "loss": 2.5059, + "step": 1169 + }, + { + "epoch": 0.04, + "grad_norm": 0.6912742257118225, + "learning_rate": 1.997465366890029e-05, + "loss": 2.5393, + "step": 1170 + }, + { + "epoch": 0.04, + "grad_norm": 0.74570232629776, + "learning_rate": 1.9974577983844917e-05, + "loss": 2.438, + "step": 1171 + }, + { + "epoch": 0.04, + "grad_norm": 0.6463032364845276, + "learning_rate": 1.9974502186102604e-05, + "loss": 2.5287, + "step": 1172 + }, + { + "epoch": 0.04, + "grad_norm": 0.6580987572669983, + "learning_rate": 1.9974426275674216e-05, + "loss": 2.4704, + "step": 1173 + }, + { + "epoch": 0.04, + "grad_norm": 0.6640790700912476, + "learning_rate": 1.9974350252560603e-05, + "loss": 2.5309, + "step": 1174 + }, + { + "epoch": 0.04, + "grad_norm": 0.71222323179245, + "learning_rate": 1.9974274116762626e-05, + "loss": 2.4359, + "step": 1175 + }, + { + "epoch": 0.04, + "grad_norm": 0.6606084704399109, + "learning_rate": 1.9974197868281146e-05, + "loss": 2.4748, + "step": 1176 + }, + { + "epoch": 0.04, + "grad_norm": 0.6555746793746948, + "learning_rate": 1.9974121507117023e-05, + "loss": 2.4552, + "step": 1177 + }, + { + "epoch": 0.04, + "grad_norm": 0.6389197707176208, + "learning_rate": 1.997404503327112e-05, + "loss": 2.4947, + "step": 1178 + }, + { + "epoch": 0.04, + "grad_norm": 0.6883360147476196, + "learning_rate": 1.9973968446744304e-05, + "loss": 2.4779, + "step": 1179 + }, + { + "epoch": 0.04, + "grad_norm": 0.6988077759742737, + "learning_rate": 1.9973891747537436e-05, + "loss": 2.4605, + "step": 1180 + }, + { + "epoch": 0.04, + "grad_norm": 0.6717812418937683, + "learning_rate": 1.9973814935651384e-05, + "loss": 2.526, + "step": 1181 + }, + { + "epoch": 0.04, + "grad_norm": 0.686249315738678, + "learning_rate": 1.997373801108702e-05, + "loss": 2.5249, + "step": 1182 + }, + { + "epoch": 0.04, + "grad_norm": 0.6526146531105042, + "learning_rate": 1.9973660973845206e-05, + "loss": 2.5213, + "step": 1183 + }, + { + "epoch": 0.04, + "grad_norm": 0.6313891410827637, + "learning_rate": 1.9973583823926815e-05, + "loss": 2.5178, + "step": 1184 + }, + { + "epoch": 0.04, + "grad_norm": 0.6542050242424011, + "learning_rate": 1.997350656133272e-05, + "loss": 2.5248, + "step": 1185 + }, + { + "epoch": 0.04, + "grad_norm": 0.6645975112915039, + "learning_rate": 1.9973429186063794e-05, + "loss": 2.4744, + "step": 1186 + }, + { + "epoch": 0.04, + "grad_norm": 0.7057430148124695, + "learning_rate": 1.997335169812091e-05, + "loss": 2.5526, + "step": 1187 + }, + { + "epoch": 0.04, + "grad_norm": 0.6480329632759094, + "learning_rate": 1.997327409750494e-05, + "loss": 2.5371, + "step": 1188 + }, + { + "epoch": 0.04, + "grad_norm": 0.6526750922203064, + "learning_rate": 1.9973196384216767e-05, + "loss": 2.489, + "step": 1189 + }, + { + "epoch": 0.04, + "grad_norm": 0.6777902841567993, + "learning_rate": 1.9973118558257267e-05, + "loss": 2.511, + "step": 1190 + }, + { + "epoch": 0.04, + "grad_norm": 0.652764081954956, + "learning_rate": 1.9973040619627318e-05, + "loss": 2.5249, + "step": 1191 + }, + { + "epoch": 0.04, + "grad_norm": 0.650433361530304, + "learning_rate": 1.9972962568327797e-05, + "loss": 2.5095, + "step": 1192 + }, + { + "epoch": 0.04, + "grad_norm": 0.6494837403297424, + "learning_rate": 1.9972884404359593e-05, + "loss": 2.5041, + "step": 1193 + }, + { + "epoch": 0.04, + "grad_norm": 0.6666718125343323, + "learning_rate": 1.9972806127723586e-05, + "loss": 2.4341, + "step": 1194 + }, + { + "epoch": 0.04, + "grad_norm": 0.6602668762207031, + "learning_rate": 1.997272773842066e-05, + "loss": 2.4594, + "step": 1195 + }, + { + "epoch": 0.04, + "grad_norm": 0.6686780452728271, + "learning_rate": 1.9972649236451697e-05, + "loss": 2.4198, + "step": 1196 + }, + { + "epoch": 0.04, + "grad_norm": 0.6318867802619934, + "learning_rate": 1.997257062181759e-05, + "loss": 2.5143, + "step": 1197 + }, + { + "epoch": 0.04, + "grad_norm": 0.6604911088943481, + "learning_rate": 1.9972491894519225e-05, + "loss": 2.483, + "step": 1198 + }, + { + "epoch": 0.04, + "grad_norm": 0.7131222486495972, + "learning_rate": 1.997241305455749e-05, + "loss": 2.4482, + "step": 1199 + }, + { + "epoch": 0.04, + "grad_norm": 0.6666430234909058, + "learning_rate": 1.9972334101933278e-05, + "loss": 2.5161, + "step": 1200 + }, + { + "epoch": 0.04, + "grad_norm": 0.6561883091926575, + "learning_rate": 1.9972255036647482e-05, + "loss": 2.4734, + "step": 1201 + }, + { + "epoch": 0.04, + "grad_norm": 0.6368906497955322, + "learning_rate": 1.997217585870099e-05, + "loss": 2.448, + "step": 1202 + }, + { + "epoch": 0.04, + "grad_norm": 0.6550028324127197, + "learning_rate": 1.9972096568094698e-05, + "loss": 2.4797, + "step": 1203 + }, + { + "epoch": 0.04, + "grad_norm": 0.6599278450012207, + "learning_rate": 1.9972017164829506e-05, + "loss": 2.4667, + "step": 1204 + }, + { + "epoch": 0.04, + "grad_norm": 0.6668063402175903, + "learning_rate": 1.997193764890631e-05, + "loss": 2.4909, + "step": 1205 + }, + { + "epoch": 0.04, + "grad_norm": 0.6499751806259155, + "learning_rate": 1.9971858020326002e-05, + "loss": 2.5002, + "step": 1206 + }, + { + "epoch": 0.04, + "grad_norm": 0.6999074220657349, + "learning_rate": 1.9971778279089483e-05, + "loss": 2.4121, + "step": 1207 + }, + { + "epoch": 0.04, + "grad_norm": 0.6676874756813049, + "learning_rate": 1.9971698425197666e-05, + "loss": 2.4775, + "step": 1208 + }, + { + "epoch": 0.04, + "grad_norm": 0.6871338486671448, + "learning_rate": 1.9971618458651437e-05, + "loss": 2.4924, + "step": 1209 + }, + { + "epoch": 0.04, + "grad_norm": 0.6738062500953674, + "learning_rate": 1.9971538379451713e-05, + "loss": 2.4968, + "step": 1210 + }, + { + "epoch": 0.04, + "grad_norm": 0.6423949599266052, + "learning_rate": 1.9971458187599385e-05, + "loss": 2.4529, + "step": 1211 + }, + { + "epoch": 0.04, + "grad_norm": 0.6390488743782043, + "learning_rate": 1.9971377883095372e-05, + "loss": 2.4903, + "step": 1212 + }, + { + "epoch": 0.04, + "grad_norm": 0.6854246258735657, + "learning_rate": 1.9971297465940574e-05, + "loss": 2.5195, + "step": 1213 + }, + { + "epoch": 0.04, + "grad_norm": 0.67038893699646, + "learning_rate": 1.99712169361359e-05, + "loss": 2.5053, + "step": 1214 + }, + { + "epoch": 0.04, + "grad_norm": 0.67030930519104, + "learning_rate": 1.997113629368226e-05, + "loss": 2.5108, + "step": 1215 + }, + { + "epoch": 0.04, + "grad_norm": 0.65314781665802, + "learning_rate": 1.997105553858057e-05, + "loss": 2.4184, + "step": 1216 + }, + { + "epoch": 0.04, + "grad_norm": 0.709152102470398, + "learning_rate": 1.9970974670831732e-05, + "loss": 2.5151, + "step": 1217 + }, + { + "epoch": 0.04, + "grad_norm": 0.6381149291992188, + "learning_rate": 1.997089369043667e-05, + "loss": 2.5103, + "step": 1218 + }, + { + "epoch": 0.04, + "grad_norm": 0.6892956495285034, + "learning_rate": 1.9970812597396293e-05, + "loss": 2.5183, + "step": 1219 + }, + { + "epoch": 0.04, + "grad_norm": 0.7551789879798889, + "learning_rate": 1.997073139171152e-05, + "loss": 2.4912, + "step": 1220 + }, + { + "epoch": 0.04, + "grad_norm": 0.6651291847229004, + "learning_rate": 1.9970650073383267e-05, + "loss": 2.4808, + "step": 1221 + }, + { + "epoch": 0.04, + "grad_norm": 0.6543241739273071, + "learning_rate": 1.997056864241245e-05, + "loss": 2.5344, + "step": 1222 + }, + { + "epoch": 0.04, + "grad_norm": 0.6531636118888855, + "learning_rate": 1.99704870988e-05, + "loss": 2.5213, + "step": 1223 + }, + { + "epoch": 0.04, + "grad_norm": 0.6759975552558899, + "learning_rate": 1.9970405442546822e-05, + "loss": 2.4585, + "step": 1224 + }, + { + "epoch": 0.04, + "grad_norm": 0.6605139374732971, + "learning_rate": 1.9970323673653848e-05, + "loss": 2.5745, + "step": 1225 + }, + { + "epoch": 0.04, + "grad_norm": 0.7091869711875916, + "learning_rate": 1.9970241792122e-05, + "loss": 2.4767, + "step": 1226 + }, + { + "epoch": 0.04, + "grad_norm": 0.6900755167007446, + "learning_rate": 1.9970159797952204e-05, + "loss": 2.5013, + "step": 1227 + }, + { + "epoch": 0.04, + "grad_norm": 0.6499425768852234, + "learning_rate": 1.9970077691145387e-05, + "loss": 2.4593, + "step": 1228 + }, + { + "epoch": 0.04, + "grad_norm": 0.6916418671607971, + "learning_rate": 1.9969995471702474e-05, + "loss": 2.5865, + "step": 1229 + }, + { + "epoch": 0.04, + "grad_norm": 0.696540117263794, + "learning_rate": 1.9969913139624398e-05, + "loss": 2.4665, + "step": 1230 + }, + { + "epoch": 0.04, + "grad_norm": 0.7007367610931396, + "learning_rate": 1.9969830694912083e-05, + "loss": 2.4568, + "step": 1231 + }, + { + "epoch": 0.04, + "grad_norm": 0.6523316502571106, + "learning_rate": 1.9969748137566466e-05, + "loss": 2.5282, + "step": 1232 + }, + { + "epoch": 0.04, + "grad_norm": 0.6738514304161072, + "learning_rate": 1.9969665467588473e-05, + "loss": 2.5556, + "step": 1233 + }, + { + "epoch": 0.04, + "grad_norm": 0.6605009436607361, + "learning_rate": 1.9969582684979044e-05, + "loss": 2.5033, + "step": 1234 + }, + { + "epoch": 0.04, + "grad_norm": 0.6775720119476318, + "learning_rate": 1.9969499789739117e-05, + "loss": 2.4566, + "step": 1235 + }, + { + "epoch": 0.04, + "grad_norm": 0.6666238307952881, + "learning_rate": 1.9969416781869618e-05, + "loss": 2.4371, + "step": 1236 + }, + { + "epoch": 0.04, + "grad_norm": 0.7366918921470642, + "learning_rate": 1.9969333661371496e-05, + "loss": 2.4109, + "step": 1237 + }, + { + "epoch": 0.04, + "grad_norm": 0.6864652633666992, + "learning_rate": 1.996925042824568e-05, + "loss": 2.5461, + "step": 1238 + }, + { + "epoch": 0.04, + "grad_norm": 0.6476344466209412, + "learning_rate": 1.996916708249312e-05, + "loss": 2.4926, + "step": 1239 + }, + { + "epoch": 0.04, + "grad_norm": 0.6725216507911682, + "learning_rate": 1.996908362411475e-05, + "loss": 2.4423, + "step": 1240 + }, + { + "epoch": 0.04, + "grad_norm": 0.6774707436561584, + "learning_rate": 1.996900005311152e-05, + "loss": 2.4367, + "step": 1241 + }, + { + "epoch": 0.04, + "grad_norm": 0.6803374886512756, + "learning_rate": 1.9968916369484362e-05, + "loss": 2.3583, + "step": 1242 + }, + { + "epoch": 0.04, + "grad_norm": 0.6821350455284119, + "learning_rate": 1.9968832573234235e-05, + "loss": 2.4773, + "step": 1243 + }, + { + "epoch": 0.04, + "grad_norm": 0.6509692668914795, + "learning_rate": 1.9968748664362078e-05, + "loss": 2.4623, + "step": 1244 + }, + { + "epoch": 0.04, + "grad_norm": 0.6849445104598999, + "learning_rate": 1.996866464286884e-05, + "loss": 2.5288, + "step": 1245 + }, + { + "epoch": 0.04, + "grad_norm": 0.6803519129753113, + "learning_rate": 1.9968580508755472e-05, + "loss": 2.5977, + "step": 1246 + }, + { + "epoch": 0.04, + "grad_norm": 0.6328503489494324, + "learning_rate": 1.9968496262022926e-05, + "loss": 2.4331, + "step": 1247 + }, + { + "epoch": 0.04, + "grad_norm": 0.6332231760025024, + "learning_rate": 1.996841190267215e-05, + "loss": 2.4471, + "step": 1248 + }, + { + "epoch": 0.04, + "grad_norm": 0.6625400185585022, + "learning_rate": 1.9968327430704094e-05, + "loss": 2.4662, + "step": 1249 + }, + { + "epoch": 0.04, + "grad_norm": 0.6836045384407043, + "learning_rate": 1.996824284611972e-05, + "loss": 2.5067, + "step": 1250 + }, + { + "epoch": 0.04, + "grad_norm": 0.6243551969528198, + "learning_rate": 1.9968158148919984e-05, + "loss": 2.4404, + "step": 1251 + }, + { + "epoch": 0.04, + "grad_norm": 0.6712891459465027, + "learning_rate": 1.9968073339105834e-05, + "loss": 2.4645, + "step": 1252 + }, + { + "epoch": 0.04, + "grad_norm": 0.6650150418281555, + "learning_rate": 1.9967988416678234e-05, + "loss": 2.4724, + "step": 1253 + }, + { + "epoch": 0.04, + "grad_norm": 0.688060462474823, + "learning_rate": 1.9967903381638145e-05, + "loss": 2.4722, + "step": 1254 + }, + { + "epoch": 0.04, + "grad_norm": 0.6574703454971313, + "learning_rate": 1.9967818233986524e-05, + "loss": 2.4526, + "step": 1255 + }, + { + "epoch": 0.04, + "grad_norm": 0.642695963382721, + "learning_rate": 1.9967732973724335e-05, + "loss": 2.4421, + "step": 1256 + }, + { + "epoch": 0.04, + "grad_norm": 0.6508745551109314, + "learning_rate": 1.996764760085254e-05, + "loss": 2.492, + "step": 1257 + }, + { + "epoch": 0.04, + "grad_norm": 0.6607152223587036, + "learning_rate": 1.9967562115372102e-05, + "loss": 2.5089, + "step": 1258 + }, + { + "epoch": 0.04, + "grad_norm": 0.6321751475334167, + "learning_rate": 1.996747651728399e-05, + "loss": 2.5196, + "step": 1259 + }, + { + "epoch": 0.04, + "grad_norm": 0.6504089832305908, + "learning_rate": 1.9967390806589174e-05, + "loss": 2.4672, + "step": 1260 + }, + { + "epoch": 0.04, + "grad_norm": 0.6916740536689758, + "learning_rate": 1.9967304983288614e-05, + "loss": 2.5157, + "step": 1261 + }, + { + "epoch": 0.04, + "grad_norm": 0.630699872970581, + "learning_rate": 1.9967219047383283e-05, + "loss": 2.4376, + "step": 1262 + }, + { + "epoch": 0.04, + "grad_norm": 0.6660124659538269, + "learning_rate": 1.9967132998874152e-05, + "loss": 2.4542, + "step": 1263 + }, + { + "epoch": 0.04, + "grad_norm": 0.6522161960601807, + "learning_rate": 1.9967046837762195e-05, + "loss": 2.517, + "step": 1264 + }, + { + "epoch": 0.04, + "grad_norm": 0.6715566515922546, + "learning_rate": 1.996696056404839e-05, + "loss": 2.4071, + "step": 1265 + }, + { + "epoch": 0.04, + "grad_norm": 0.6703926920890808, + "learning_rate": 1.9966874177733698e-05, + "loss": 2.4235, + "step": 1266 + }, + { + "epoch": 0.04, + "grad_norm": 0.6756909489631653, + "learning_rate": 1.9966787678819103e-05, + "loss": 2.3735, + "step": 1267 + }, + { + "epoch": 0.04, + "grad_norm": 0.6688052415847778, + "learning_rate": 1.9966701067305586e-05, + "loss": 2.3846, + "step": 1268 + }, + { + "epoch": 0.04, + "grad_norm": 0.684777557849884, + "learning_rate": 1.9966614343194116e-05, + "loss": 2.525, + "step": 1269 + }, + { + "epoch": 0.04, + "grad_norm": 0.6482384204864502, + "learning_rate": 1.9966527506485685e-05, + "loss": 2.4885, + "step": 1270 + }, + { + "epoch": 0.04, + "grad_norm": 0.6748205423355103, + "learning_rate": 1.9966440557181263e-05, + "loss": 2.4762, + "step": 1271 + }, + { + "epoch": 0.04, + "grad_norm": 0.6453205943107605, + "learning_rate": 1.996635349528184e-05, + "loss": 2.522, + "step": 1272 + }, + { + "epoch": 0.04, + "grad_norm": 0.663126528263092, + "learning_rate": 1.996626632078839e-05, + "loss": 2.4957, + "step": 1273 + }, + { + "epoch": 0.04, + "grad_norm": 0.657171905040741, + "learning_rate": 1.9966179033701907e-05, + "loss": 2.5044, + "step": 1274 + }, + { + "epoch": 0.04, + "grad_norm": 0.638403058052063, + "learning_rate": 1.9966091634023376e-05, + "loss": 2.5283, + "step": 1275 + }, + { + "epoch": 0.04, + "grad_norm": 0.6649624705314636, + "learning_rate": 1.996600412175378e-05, + "loss": 2.4927, + "step": 1276 + }, + { + "epoch": 0.04, + "grad_norm": 0.6398544907569885, + "learning_rate": 1.996591649689411e-05, + "loss": 2.4976, + "step": 1277 + }, + { + "epoch": 0.04, + "grad_norm": 0.6636454463005066, + "learning_rate": 1.9965828759445357e-05, + "loss": 2.4077, + "step": 1278 + }, + { + "epoch": 0.04, + "grad_norm": 0.7010447978973389, + "learning_rate": 1.996574090940851e-05, + "loss": 2.4973, + "step": 1279 + }, + { + "epoch": 0.04, + "grad_norm": 0.6408699154853821, + "learning_rate": 1.9965652946784565e-05, + "loss": 2.5336, + "step": 1280 + }, + { + "epoch": 0.04, + "grad_norm": 0.6317563652992249, + "learning_rate": 1.9965564871574513e-05, + "loss": 2.4476, + "step": 1281 + }, + { + "epoch": 0.04, + "grad_norm": 0.6677871346473694, + "learning_rate": 1.996547668377935e-05, + "loss": 2.5296, + "step": 1282 + }, + { + "epoch": 0.04, + "grad_norm": 0.6846206784248352, + "learning_rate": 1.996538838340007e-05, + "loss": 2.4617, + "step": 1283 + }, + { + "epoch": 0.04, + "grad_norm": 0.6472628712654114, + "learning_rate": 1.996529997043767e-05, + "loss": 2.5552, + "step": 1284 + }, + { + "epoch": 0.04, + "grad_norm": 0.6841762065887451, + "learning_rate": 1.9965211444893156e-05, + "loss": 2.4944, + "step": 1285 + }, + { + "epoch": 0.04, + "grad_norm": 0.6732828617095947, + "learning_rate": 1.996512280676752e-05, + "loss": 2.515, + "step": 1286 + }, + { + "epoch": 0.04, + "grad_norm": 0.6832213997840881, + "learning_rate": 1.996503405606177e-05, + "loss": 2.5194, + "step": 1287 + }, + { + "epoch": 0.04, + "grad_norm": 0.6445607542991638, + "learning_rate": 1.99649451927769e-05, + "loss": 2.455, + "step": 1288 + }, + { + "epoch": 0.04, + "grad_norm": 0.689518928527832, + "learning_rate": 1.9964856216913925e-05, + "loss": 2.4003, + "step": 1289 + }, + { + "epoch": 0.04, + "grad_norm": 0.6411250233650208, + "learning_rate": 1.996476712847384e-05, + "loss": 2.4268, + "step": 1290 + }, + { + "epoch": 0.04, + "grad_norm": 0.6646576523780823, + "learning_rate": 1.9964677927457658e-05, + "loss": 2.5256, + "step": 1291 + }, + { + "epoch": 0.04, + "grad_norm": 0.630534291267395, + "learning_rate": 1.9964588613866385e-05, + "loss": 2.434, + "step": 1292 + }, + { + "epoch": 0.04, + "grad_norm": 0.6903734803199768, + "learning_rate": 1.996449918770103e-05, + "loss": 2.4734, + "step": 1293 + }, + { + "epoch": 0.04, + "grad_norm": 0.6380398273468018, + "learning_rate": 1.9964409648962603e-05, + "loss": 2.4945, + "step": 1294 + }, + { + "epoch": 0.04, + "grad_norm": 0.6500599384307861, + "learning_rate": 1.9964319997652112e-05, + "loss": 2.473, + "step": 1295 + }, + { + "epoch": 0.04, + "grad_norm": 0.6738811731338501, + "learning_rate": 1.9964230233770576e-05, + "loss": 2.5539, + "step": 1296 + }, + { + "epoch": 0.04, + "grad_norm": 0.6428767442703247, + "learning_rate": 1.996414035731901e-05, + "loss": 2.4458, + "step": 1297 + }, + { + "epoch": 0.04, + "grad_norm": 0.6268296837806702, + "learning_rate": 1.9964050368298417e-05, + "loss": 2.4535, + "step": 1298 + }, + { + "epoch": 0.04, + "grad_norm": 0.6365980505943298, + "learning_rate": 1.996396026670983e-05, + "loss": 2.4581, + "step": 1299 + }, + { + "epoch": 0.04, + "grad_norm": 0.6351314783096313, + "learning_rate": 1.9963870052554256e-05, + "loss": 2.5259, + "step": 1300 + }, + { + "epoch": 0.04, + "grad_norm": 0.6886266469955444, + "learning_rate": 1.996377972583272e-05, + "loss": 2.5028, + "step": 1301 + }, + { + "epoch": 0.04, + "grad_norm": 0.6512261033058167, + "learning_rate": 1.9963689286546238e-05, + "loss": 2.5232, + "step": 1302 + }, + { + "epoch": 0.04, + "grad_norm": 0.6695678234100342, + "learning_rate": 1.9963598734695834e-05, + "loss": 2.4975, + "step": 1303 + }, + { + "epoch": 0.04, + "grad_norm": 0.6604126691818237, + "learning_rate": 1.996350807028253e-05, + "loss": 2.4152, + "step": 1304 + }, + { + "epoch": 0.04, + "grad_norm": 0.6352373957633972, + "learning_rate": 1.9963417293307353e-05, + "loss": 2.4842, + "step": 1305 + }, + { + "epoch": 0.04, + "grad_norm": 0.6646503806114197, + "learning_rate": 1.9963326403771326e-05, + "loss": 2.4807, + "step": 1306 + }, + { + "epoch": 0.04, + "grad_norm": 0.6240456104278564, + "learning_rate": 1.9963235401675476e-05, + "loss": 2.4038, + "step": 1307 + }, + { + "epoch": 0.04, + "grad_norm": 0.6661893129348755, + "learning_rate": 1.996314428702083e-05, + "loss": 2.4963, + "step": 1308 + }, + { + "epoch": 0.04, + "grad_norm": 0.655264675617218, + "learning_rate": 1.996305305980842e-05, + "loss": 2.4834, + "step": 1309 + }, + { + "epoch": 0.04, + "grad_norm": 0.696756899356842, + "learning_rate": 1.9962961720039278e-05, + "loss": 2.4746, + "step": 1310 + }, + { + "epoch": 0.04, + "grad_norm": 0.7081061601638794, + "learning_rate": 1.996287026771443e-05, + "loss": 2.4903, + "step": 1311 + }, + { + "epoch": 0.04, + "grad_norm": 0.657322883605957, + "learning_rate": 1.9962778702834913e-05, + "loss": 2.4367, + "step": 1312 + }, + { + "epoch": 0.04, + "grad_norm": 0.7017743587493896, + "learning_rate": 1.996268702540176e-05, + "loss": 2.4314, + "step": 1313 + }, + { + "epoch": 0.04, + "grad_norm": 0.6454604864120483, + "learning_rate": 1.996259523541601e-05, + "loss": 2.4836, + "step": 1314 + }, + { + "epoch": 0.04, + "grad_norm": 0.6961708068847656, + "learning_rate": 1.99625033328787e-05, + "loss": 2.438, + "step": 1315 + }, + { + "epoch": 0.04, + "grad_norm": 0.6980037093162537, + "learning_rate": 1.996241131779086e-05, + "loss": 2.4708, + "step": 1316 + }, + { + "epoch": 0.04, + "grad_norm": 0.7056499123573303, + "learning_rate": 1.996231919015354e-05, + "loss": 2.4033, + "step": 1317 + }, + { + "epoch": 0.04, + "grad_norm": 0.6699267625808716, + "learning_rate": 1.9962226949967774e-05, + "loss": 2.4356, + "step": 1318 + }, + { + "epoch": 0.04, + "grad_norm": 0.6619434952735901, + "learning_rate": 1.9962134597234606e-05, + "loss": 2.4638, + "step": 1319 + }, + { + "epoch": 0.04, + "grad_norm": 0.7110757827758789, + "learning_rate": 1.9962042131955083e-05, + "loss": 2.4656, + "step": 1320 + }, + { + "epoch": 0.04, + "grad_norm": 0.742756187915802, + "learning_rate": 1.9961949554130242e-05, + "loss": 2.4659, + "step": 1321 + }, + { + "epoch": 0.04, + "grad_norm": 0.6969943642616272, + "learning_rate": 1.9961856863761135e-05, + "loss": 2.4452, + "step": 1322 + }, + { + "epoch": 0.04, + "grad_norm": 0.6458531022071838, + "learning_rate": 1.9961764060848808e-05, + "loss": 2.4329, + "step": 1323 + }, + { + "epoch": 0.04, + "grad_norm": 0.6976578235626221, + "learning_rate": 1.996167114539431e-05, + "loss": 2.5283, + "step": 1324 + }, + { + "epoch": 0.04, + "grad_norm": 0.6413078904151917, + "learning_rate": 1.9961578117398687e-05, + "loss": 2.4435, + "step": 1325 + }, + { + "epoch": 0.04, + "grad_norm": 0.6445743441581726, + "learning_rate": 1.996148497686299e-05, + "loss": 2.4572, + "step": 1326 + }, + { + "epoch": 0.04, + "grad_norm": 0.6960339546203613, + "learning_rate": 1.9961391723788278e-05, + "loss": 2.4592, + "step": 1327 + }, + { + "epoch": 0.04, + "grad_norm": 0.6371101140975952, + "learning_rate": 1.99612983581756e-05, + "loss": 2.4695, + "step": 1328 + }, + { + "epoch": 0.04, + "grad_norm": 0.6478706002235413, + "learning_rate": 1.996120488002601e-05, + "loss": 2.4022, + "step": 1329 + }, + { + "epoch": 0.04, + "grad_norm": 0.6440830826759338, + "learning_rate": 1.9961111289340567e-05, + "loss": 2.4604, + "step": 1330 + }, + { + "epoch": 0.04, + "grad_norm": 0.661811888217926, + "learning_rate": 1.9961017586120325e-05, + "loss": 2.4909, + "step": 1331 + }, + { + "epoch": 0.04, + "grad_norm": 0.6538639068603516, + "learning_rate": 1.9960923770366345e-05, + "loss": 2.4333, + "step": 1332 + }, + { + "epoch": 0.04, + "grad_norm": 0.6719698905944824, + "learning_rate": 1.9960829842079685e-05, + "loss": 2.5043, + "step": 1333 + }, + { + "epoch": 0.04, + "grad_norm": 0.640352725982666, + "learning_rate": 1.9960735801261407e-05, + "loss": 2.5373, + "step": 1334 + }, + { + "epoch": 0.04, + "grad_norm": 0.673474907875061, + "learning_rate": 1.9960641647912573e-05, + "loss": 2.4131, + "step": 1335 + }, + { + "epoch": 0.04, + "grad_norm": 0.6996123790740967, + "learning_rate": 1.9960547382034246e-05, + "loss": 2.5014, + "step": 1336 + }, + { + "epoch": 0.04, + "grad_norm": 0.6799312829971313, + "learning_rate": 1.9960453003627494e-05, + "loss": 2.4825, + "step": 1337 + }, + { + "epoch": 0.04, + "grad_norm": 0.6481389999389648, + "learning_rate": 1.9960358512693384e-05, + "loss": 2.4664, + "step": 1338 + }, + { + "epoch": 0.04, + "grad_norm": 0.6614128947257996, + "learning_rate": 1.9960263909232977e-05, + "loss": 2.4074, + "step": 1339 + }, + { + "epoch": 0.04, + "grad_norm": 0.635061502456665, + "learning_rate": 1.9960169193247346e-05, + "loss": 2.5352, + "step": 1340 + }, + { + "epoch": 0.04, + "grad_norm": 0.6372793316841125, + "learning_rate": 1.9960074364737562e-05, + "loss": 2.4957, + "step": 1341 + }, + { + "epoch": 0.04, + "grad_norm": 0.6712480783462524, + "learning_rate": 1.9959979423704692e-05, + "loss": 2.4521, + "step": 1342 + }, + { + "epoch": 0.04, + "grad_norm": 0.6700102686882019, + "learning_rate": 1.9959884370149815e-05, + "loss": 2.4668, + "step": 1343 + }, + { + "epoch": 0.04, + "grad_norm": 0.6707836389541626, + "learning_rate": 1.9959789204074e-05, + "loss": 2.4943, + "step": 1344 + }, + { + "epoch": 0.04, + "grad_norm": 0.6268095374107361, + "learning_rate": 1.9959693925478324e-05, + "loss": 2.4197, + "step": 1345 + }, + { + "epoch": 0.04, + "grad_norm": 0.6622927784919739, + "learning_rate": 1.9959598534363863e-05, + "loss": 2.4782, + "step": 1346 + }, + { + "epoch": 0.04, + "grad_norm": 0.6450578570365906, + "learning_rate": 1.9959503030731696e-05, + "loss": 2.4494, + "step": 1347 + }, + { + "epoch": 0.04, + "grad_norm": 0.6368165016174316, + "learning_rate": 1.9959407414582895e-05, + "loss": 2.4482, + "step": 1348 + }, + { + "epoch": 0.04, + "grad_norm": 0.6570246815681458, + "learning_rate": 1.9959311685918553e-05, + "loss": 2.4203, + "step": 1349 + }, + { + "epoch": 0.04, + "grad_norm": 0.6869211196899414, + "learning_rate": 1.9959215844739742e-05, + "loss": 2.5324, + "step": 1350 + }, + { + "epoch": 0.04, + "grad_norm": 0.7127034664154053, + "learning_rate": 1.9959119891047546e-05, + "loss": 2.5216, + "step": 1351 + }, + { + "epoch": 0.04, + "grad_norm": 0.6510097980499268, + "learning_rate": 1.995902382484305e-05, + "loss": 2.4695, + "step": 1352 + }, + { + "epoch": 0.05, + "grad_norm": 0.6794703602790833, + "learning_rate": 1.995892764612734e-05, + "loss": 2.4854, + "step": 1353 + }, + { + "epoch": 0.05, + "grad_norm": 0.6507699489593506, + "learning_rate": 1.99588313549015e-05, + "loss": 2.3988, + "step": 1354 + }, + { + "epoch": 0.05, + "grad_norm": 0.6657196283340454, + "learning_rate": 1.9958734951166624e-05, + "loss": 2.4094, + "step": 1355 + }, + { + "epoch": 0.05, + "grad_norm": 0.6738486289978027, + "learning_rate": 1.9958638434923795e-05, + "loss": 2.4578, + "step": 1356 + }, + { + "epoch": 0.05, + "grad_norm": 0.6376387476921082, + "learning_rate": 1.99585418061741e-05, + "loss": 2.4534, + "step": 1357 + }, + { + "epoch": 0.05, + "grad_norm": 0.6589769124984741, + "learning_rate": 1.9958445064918646e-05, + "loss": 2.4137, + "step": 1358 + }, + { + "epoch": 0.05, + "grad_norm": 0.649523138999939, + "learning_rate": 1.995834821115851e-05, + "loss": 2.459, + "step": 1359 + }, + { + "epoch": 0.05, + "grad_norm": 0.6733065247535706, + "learning_rate": 1.9958251244894793e-05, + "loss": 2.5466, + "step": 1360 + }, + { + "epoch": 0.05, + "grad_norm": 0.6583726406097412, + "learning_rate": 1.9958154166128588e-05, + "loss": 2.5025, + "step": 1361 + }, + { + "epoch": 0.05, + "grad_norm": 0.6689596176147461, + "learning_rate": 1.9958056974860994e-05, + "loss": 2.3948, + "step": 1362 + }, + { + "epoch": 0.05, + "grad_norm": 0.6504204273223877, + "learning_rate": 1.9957959671093107e-05, + "loss": 2.4677, + "step": 1363 + }, + { + "epoch": 0.05, + "grad_norm": 0.6603761315345764, + "learning_rate": 1.9957862254826034e-05, + "loss": 2.439, + "step": 1364 + }, + { + "epoch": 0.05, + "grad_norm": 0.655724048614502, + "learning_rate": 1.9957764726060863e-05, + "loss": 2.4584, + "step": 1365 + }, + { + "epoch": 0.05, + "grad_norm": 0.6528634428977966, + "learning_rate": 1.99576670847987e-05, + "loss": 2.4153, + "step": 1366 + }, + { + "epoch": 0.05, + "grad_norm": 0.6665694117546082, + "learning_rate": 1.9957569331040658e-05, + "loss": 2.5118, + "step": 1367 + }, + { + "epoch": 0.05, + "grad_norm": 0.6297091245651245, + "learning_rate": 1.9957471464787826e-05, + "loss": 2.4325, + "step": 1368 + }, + { + "epoch": 0.05, + "grad_norm": 0.7026760578155518, + "learning_rate": 1.995737348604132e-05, + "loss": 2.4842, + "step": 1369 + }, + { + "epoch": 0.05, + "grad_norm": 0.6293516755104065, + "learning_rate": 1.9957275394802244e-05, + "loss": 2.4273, + "step": 1370 + }, + { + "epoch": 0.05, + "grad_norm": 0.6752887964248657, + "learning_rate": 1.995717719107171e-05, + "loss": 2.4505, + "step": 1371 + }, + { + "epoch": 0.05, + "grad_norm": 0.6465798616409302, + "learning_rate": 1.9957078874850816e-05, + "loss": 2.4412, + "step": 1372 + }, + { + "epoch": 0.05, + "grad_norm": 0.6404193043708801, + "learning_rate": 1.9956980446140687e-05, + "loss": 2.5465, + "step": 1373 + }, + { + "epoch": 0.05, + "grad_norm": 0.6800628304481506, + "learning_rate": 1.995688190494242e-05, + "loss": 2.4986, + "step": 1374 + }, + { + "epoch": 0.05, + "grad_norm": 0.6417450904846191, + "learning_rate": 1.9956783251257144e-05, + "loss": 2.4614, + "step": 1375 + }, + { + "epoch": 0.05, + "grad_norm": 0.671389102935791, + "learning_rate": 1.9956684485085965e-05, + "loss": 2.5032, + "step": 1376 + }, + { + "epoch": 0.05, + "grad_norm": 0.659179151058197, + "learning_rate": 1.995658560643e-05, + "loss": 2.5366, + "step": 1377 + }, + { + "epoch": 0.05, + "grad_norm": 0.6766591668128967, + "learning_rate": 1.9956486615290363e-05, + "loss": 2.5038, + "step": 1378 + }, + { + "epoch": 0.05, + "grad_norm": 0.6527840495109558, + "learning_rate": 1.9956387511668177e-05, + "loss": 2.5286, + "step": 1379 + }, + { + "epoch": 0.05, + "grad_norm": 0.6626148819923401, + "learning_rate": 1.9956288295564562e-05, + "loss": 2.4467, + "step": 1380 + }, + { + "epoch": 0.05, + "grad_norm": 0.6345047354698181, + "learning_rate": 1.9956188966980633e-05, + "loss": 2.5187, + "step": 1381 + }, + { + "epoch": 0.05, + "grad_norm": 0.6495223641395569, + "learning_rate": 1.9956089525917516e-05, + "loss": 2.3816, + "step": 1382 + }, + { + "epoch": 0.05, + "grad_norm": 0.6508678793907166, + "learning_rate": 1.9955989972376337e-05, + "loss": 2.5325, + "step": 1383 + }, + { + "epoch": 0.05, + "grad_norm": 0.6658614873886108, + "learning_rate": 1.9955890306358216e-05, + "loss": 2.4572, + "step": 1384 + }, + { + "epoch": 0.05, + "grad_norm": 0.6279206275939941, + "learning_rate": 1.9955790527864284e-05, + "loss": 2.4017, + "step": 1385 + }, + { + "epoch": 0.05, + "grad_norm": 0.6657488346099854, + "learning_rate": 1.9955690636895663e-05, + "loss": 2.4689, + "step": 1386 + }, + { + "epoch": 0.05, + "grad_norm": 0.6805073022842407, + "learning_rate": 1.9955590633453483e-05, + "loss": 2.4432, + "step": 1387 + }, + { + "epoch": 0.05, + "grad_norm": 0.6697499752044678, + "learning_rate": 1.9955490517538877e-05, + "loss": 2.4984, + "step": 1388 + }, + { + "epoch": 0.05, + "grad_norm": 0.6372087597846985, + "learning_rate": 1.995539028915297e-05, + "loss": 2.4308, + "step": 1389 + }, + { + "epoch": 0.05, + "grad_norm": 0.6644550561904907, + "learning_rate": 1.99552899482969e-05, + "loss": 2.433, + "step": 1390 + }, + { + "epoch": 0.05, + "grad_norm": 0.6407074332237244, + "learning_rate": 1.9955189494971802e-05, + "loss": 2.4654, + "step": 1391 + }, + { + "epoch": 0.05, + "grad_norm": 0.6997047662734985, + "learning_rate": 1.9955088929178804e-05, + "loss": 2.4656, + "step": 1392 + }, + { + "epoch": 0.05, + "grad_norm": 0.6186793446540833, + "learning_rate": 1.9954988250919045e-05, + "loss": 2.3622, + "step": 1393 + }, + { + "epoch": 0.05, + "grad_norm": 0.6754992604255676, + "learning_rate": 1.9954887460193663e-05, + "loss": 2.5099, + "step": 1394 + }, + { + "epoch": 0.05, + "grad_norm": 0.6849030256271362, + "learning_rate": 1.9954786557003797e-05, + "loss": 2.5153, + "step": 1395 + }, + { + "epoch": 0.05, + "grad_norm": 0.6449416279792786, + "learning_rate": 1.9954685541350587e-05, + "loss": 2.4798, + "step": 1396 + }, + { + "epoch": 0.05, + "grad_norm": 0.6258751153945923, + "learning_rate": 1.9954584413235174e-05, + "loss": 2.4951, + "step": 1397 + }, + { + "epoch": 0.05, + "grad_norm": 0.6558632254600525, + "learning_rate": 1.9954483172658698e-05, + "loss": 2.412, + "step": 1398 + }, + { + "epoch": 0.05, + "grad_norm": 0.6475643515586853, + "learning_rate": 1.995438181962231e-05, + "loss": 2.5025, + "step": 1399 + }, + { + "epoch": 0.05, + "grad_norm": 0.6989474296569824, + "learning_rate": 1.9954280354127147e-05, + "loss": 2.4506, + "step": 1400 + }, + { + "epoch": 0.05, + "grad_norm": 0.6567366719245911, + "learning_rate": 1.9954178776174358e-05, + "loss": 2.4485, + "step": 1401 + }, + { + "epoch": 0.05, + "grad_norm": 0.6459561586380005, + "learning_rate": 1.9954077085765087e-05, + "loss": 2.3865, + "step": 1402 + }, + { + "epoch": 0.05, + "grad_norm": 0.6543406844139099, + "learning_rate": 1.9953975282900494e-05, + "loss": 2.4356, + "step": 1403 + }, + { + "epoch": 0.05, + "grad_norm": 0.6842741370201111, + "learning_rate": 1.995387336758172e-05, + "loss": 2.416, + "step": 1404 + }, + { + "epoch": 0.05, + "grad_norm": 0.6660739779472351, + "learning_rate": 1.995377133980992e-05, + "loss": 2.4348, + "step": 1405 + }, + { + "epoch": 0.05, + "grad_norm": 0.6702792048454285, + "learning_rate": 1.9953669199586238e-05, + "loss": 2.5076, + "step": 1406 + }, + { + "epoch": 0.05, + "grad_norm": 0.6631298661231995, + "learning_rate": 1.995356694691184e-05, + "loss": 2.4301, + "step": 1407 + }, + { + "epoch": 0.05, + "grad_norm": 0.6334249377250671, + "learning_rate": 1.9953464581787873e-05, + "loss": 2.4175, + "step": 1408 + }, + { + "epoch": 0.05, + "grad_norm": 0.6762251257896423, + "learning_rate": 1.99533621042155e-05, + "loss": 2.4747, + "step": 1409 + }, + { + "epoch": 0.05, + "grad_norm": 0.6757739186286926, + "learning_rate": 1.9953259514195872e-05, + "loss": 2.4625, + "step": 1410 + }, + { + "epoch": 0.05, + "grad_norm": 0.6670404672622681, + "learning_rate": 1.9953156811730153e-05, + "loss": 2.4713, + "step": 1411 + }, + { + "epoch": 0.05, + "grad_norm": 0.6381444334983826, + "learning_rate": 1.99530539968195e-05, + "loss": 2.3485, + "step": 1412 + }, + { + "epoch": 0.05, + "grad_norm": 0.7113683223724365, + "learning_rate": 1.995295106946508e-05, + "loss": 2.4354, + "step": 1413 + }, + { + "epoch": 0.05, + "grad_norm": 0.6402413845062256, + "learning_rate": 1.9952848029668045e-05, + "loss": 2.4277, + "step": 1414 + }, + { + "epoch": 0.05, + "grad_norm": 0.6554989218711853, + "learning_rate": 1.995274487742957e-05, + "loss": 2.4241, + "step": 1415 + }, + { + "epoch": 0.05, + "grad_norm": 0.6495899558067322, + "learning_rate": 1.995264161275082e-05, + "loss": 2.4419, + "step": 1416 + }, + { + "epoch": 0.05, + "grad_norm": 0.6453097462654114, + "learning_rate": 1.995253823563295e-05, + "loss": 2.4402, + "step": 1417 + }, + { + "epoch": 0.05, + "grad_norm": 0.6719436049461365, + "learning_rate": 1.9952434746077142e-05, + "loss": 2.4673, + "step": 1418 + }, + { + "epoch": 0.05, + "grad_norm": 0.6509451866149902, + "learning_rate": 1.9952331144084557e-05, + "loss": 2.4369, + "step": 1419 + }, + { + "epoch": 0.05, + "grad_norm": 0.6536533236503601, + "learning_rate": 1.9952227429656367e-05, + "loss": 2.4723, + "step": 1420 + }, + { + "epoch": 0.05, + "grad_norm": 0.6670294404029846, + "learning_rate": 1.9952123602793746e-05, + "loss": 2.4833, + "step": 1421 + }, + { + "epoch": 0.05, + "grad_norm": 0.6520291566848755, + "learning_rate": 1.995201966349786e-05, + "loss": 2.445, + "step": 1422 + }, + { + "epoch": 0.05, + "grad_norm": 0.6867606043815613, + "learning_rate": 1.995191561176989e-05, + "loss": 2.4425, + "step": 1423 + }, + { + "epoch": 0.05, + "grad_norm": 0.6837676763534546, + "learning_rate": 1.995181144761101e-05, + "loss": 2.5294, + "step": 1424 + }, + { + "epoch": 0.05, + "grad_norm": 0.6840876340866089, + "learning_rate": 1.99517071710224e-05, + "loss": 2.4623, + "step": 1425 + }, + { + "epoch": 0.05, + "grad_norm": 0.6418316960334778, + "learning_rate": 1.9951602782005234e-05, + "loss": 2.4164, + "step": 1426 + }, + { + "epoch": 0.05, + "grad_norm": 0.6555109620094299, + "learning_rate": 1.995149828056069e-05, + "loss": 2.355, + "step": 1427 + }, + { + "epoch": 0.05, + "grad_norm": 0.697262704372406, + "learning_rate": 1.995139366668995e-05, + "loss": 2.432, + "step": 1428 + }, + { + "epoch": 0.05, + "grad_norm": 0.684881865978241, + "learning_rate": 1.9951288940394196e-05, + "loss": 2.4478, + "step": 1429 + }, + { + "epoch": 0.05, + "grad_norm": 0.6949005126953125, + "learning_rate": 1.9951184101674615e-05, + "loss": 2.4227, + "step": 1430 + }, + { + "epoch": 0.05, + "grad_norm": 0.6596503257751465, + "learning_rate": 1.9951079150532387e-05, + "loss": 2.4358, + "step": 1431 + }, + { + "epoch": 0.05, + "grad_norm": 0.6502809524536133, + "learning_rate": 1.9950974086968696e-05, + "loss": 2.4377, + "step": 1432 + }, + { + "epoch": 0.05, + "grad_norm": 0.6620749831199646, + "learning_rate": 1.9950868910984736e-05, + "loss": 2.4739, + "step": 1433 + }, + { + "epoch": 0.05, + "grad_norm": 0.6707249283790588, + "learning_rate": 1.9950763622581685e-05, + "loss": 2.478, + "step": 1434 + }, + { + "epoch": 0.05, + "grad_norm": 0.6480233073234558, + "learning_rate": 1.9950658221760744e-05, + "loss": 2.4544, + "step": 1435 + }, + { + "epoch": 0.05, + "grad_norm": 0.7066438794136047, + "learning_rate": 1.9950552708523095e-05, + "loss": 2.5056, + "step": 1436 + }, + { + "epoch": 0.05, + "grad_norm": 0.6745548844337463, + "learning_rate": 1.9950447082869932e-05, + "loss": 2.4512, + "step": 1437 + }, + { + "epoch": 0.05, + "grad_norm": 0.6837611198425293, + "learning_rate": 1.9950341344802454e-05, + "loss": 2.5036, + "step": 1438 + }, + { + "epoch": 0.05, + "grad_norm": 0.7114278674125671, + "learning_rate": 1.9950235494321845e-05, + "loss": 2.4821, + "step": 1439 + }, + { + "epoch": 0.05, + "grad_norm": 0.735874593257904, + "learning_rate": 1.995012953142931e-05, + "loss": 2.4204, + "step": 1440 + }, + { + "epoch": 0.05, + "grad_norm": 0.704858124256134, + "learning_rate": 1.9950023456126043e-05, + "loss": 2.4549, + "step": 1441 + }, + { + "epoch": 0.05, + "grad_norm": 0.6343608498573303, + "learning_rate": 1.9949917268413245e-05, + "loss": 2.3616, + "step": 1442 + }, + { + "epoch": 0.05, + "grad_norm": 0.6664688587188721, + "learning_rate": 1.9949810968292108e-05, + "loss": 2.4674, + "step": 1443 + }, + { + "epoch": 0.05, + "grad_norm": 0.6743026375770569, + "learning_rate": 1.9949704555763837e-05, + "loss": 2.448, + "step": 1444 + }, + { + "epoch": 0.05, + "grad_norm": 0.6959954500198364, + "learning_rate": 1.994959803082964e-05, + "loss": 2.4971, + "step": 1445 + }, + { + "epoch": 0.05, + "grad_norm": 0.6768322587013245, + "learning_rate": 1.9949491393490712e-05, + "loss": 2.4193, + "step": 1446 + }, + { + "epoch": 0.05, + "grad_norm": 0.6748964190483093, + "learning_rate": 1.9949384643748264e-05, + "loss": 2.4642, + "step": 1447 + }, + { + "epoch": 0.05, + "grad_norm": 0.6769373416900635, + "learning_rate": 1.9949277781603497e-05, + "loss": 2.4381, + "step": 1448 + }, + { + "epoch": 0.05, + "grad_norm": 0.6413769125938416, + "learning_rate": 1.9949170807057624e-05, + "loss": 2.5033, + "step": 1449 + }, + { + "epoch": 0.05, + "grad_norm": 0.6787489056587219, + "learning_rate": 1.9949063720111844e-05, + "loss": 2.4863, + "step": 1450 + }, + { + "epoch": 0.05, + "grad_norm": 0.6813258528709412, + "learning_rate": 1.9948956520767377e-05, + "loss": 2.4412, + "step": 1451 + }, + { + "epoch": 0.05, + "grad_norm": 0.6843349933624268, + "learning_rate": 1.994884920902543e-05, + "loss": 2.4557, + "step": 1452 + }, + { + "epoch": 0.05, + "grad_norm": 0.6443434953689575, + "learning_rate": 1.9948741784887214e-05, + "loss": 2.4452, + "step": 1453 + }, + { + "epoch": 0.05, + "grad_norm": 0.632203221321106, + "learning_rate": 1.9948634248353943e-05, + "loss": 2.4124, + "step": 1454 + }, + { + "epoch": 0.05, + "grad_norm": 0.6675472259521484, + "learning_rate": 1.9948526599426834e-05, + "loss": 2.4227, + "step": 1455 + }, + { + "epoch": 0.05, + "grad_norm": 0.6401450037956238, + "learning_rate": 1.9948418838107103e-05, + "loss": 2.364, + "step": 1456 + }, + { + "epoch": 0.05, + "grad_norm": 0.6327877044677734, + "learning_rate": 1.9948310964395963e-05, + "loss": 2.4603, + "step": 1457 + }, + { + "epoch": 0.05, + "grad_norm": 0.6701040267944336, + "learning_rate": 1.9948202978294636e-05, + "loss": 2.4826, + "step": 1458 + }, + { + "epoch": 0.05, + "grad_norm": 0.7000370621681213, + "learning_rate": 1.9948094879804344e-05, + "loss": 2.4118, + "step": 1459 + }, + { + "epoch": 0.05, + "grad_norm": 0.6528130173683167, + "learning_rate": 1.994798666892631e-05, + "loss": 2.4789, + "step": 1460 + }, + { + "epoch": 0.05, + "grad_norm": 0.6773544549942017, + "learning_rate": 1.9947878345661748e-05, + "loss": 2.4532, + "step": 1461 + }, + { + "epoch": 0.05, + "grad_norm": 0.6997448801994324, + "learning_rate": 1.9947769910011887e-05, + "loss": 2.4173, + "step": 1462 + }, + { + "epoch": 0.05, + "grad_norm": 0.6456718444824219, + "learning_rate": 1.994766136197795e-05, + "loss": 2.4131, + "step": 1463 + }, + { + "epoch": 0.05, + "grad_norm": 0.6528633832931519, + "learning_rate": 1.9947552701561168e-05, + "loss": 2.4492, + "step": 1464 + }, + { + "epoch": 0.05, + "grad_norm": 0.6766114234924316, + "learning_rate": 1.9947443928762766e-05, + "loss": 2.4406, + "step": 1465 + }, + { + "epoch": 0.05, + "grad_norm": 0.6753530502319336, + "learning_rate": 1.9947335043583965e-05, + "loss": 2.4842, + "step": 1466 + }, + { + "epoch": 0.05, + "grad_norm": 0.6650490760803223, + "learning_rate": 1.994722604602601e-05, + "loss": 2.44, + "step": 1467 + }, + { + "epoch": 0.05, + "grad_norm": 0.6383075714111328, + "learning_rate": 1.994711693609012e-05, + "loss": 2.4848, + "step": 1468 + }, + { + "epoch": 0.05, + "grad_norm": 0.6608891487121582, + "learning_rate": 1.9947007713777537e-05, + "loss": 2.3905, + "step": 1469 + }, + { + "epoch": 0.05, + "grad_norm": 0.7179611325263977, + "learning_rate": 1.994689837908949e-05, + "loss": 2.4612, + "step": 1470 + }, + { + "epoch": 0.05, + "grad_norm": 0.6353431940078735, + "learning_rate": 1.9946788932027212e-05, + "loss": 2.366, + "step": 1471 + }, + { + "epoch": 0.05, + "grad_norm": 0.6647564768791199, + "learning_rate": 1.994667937259194e-05, + "loss": 2.4345, + "step": 1472 + }, + { + "epoch": 0.05, + "grad_norm": 0.6502447724342346, + "learning_rate": 1.9946569700784918e-05, + "loss": 2.4329, + "step": 1473 + }, + { + "epoch": 0.05, + "grad_norm": 0.6676260828971863, + "learning_rate": 1.994645991660738e-05, + "loss": 2.4561, + "step": 1474 + }, + { + "epoch": 0.05, + "grad_norm": 0.6598013043403625, + "learning_rate": 1.9946350020060565e-05, + "loss": 2.4406, + "step": 1475 + }, + { + "epoch": 0.05, + "grad_norm": 0.64496248960495, + "learning_rate": 1.9946240011145722e-05, + "loss": 2.4118, + "step": 1476 + }, + { + "epoch": 0.05, + "grad_norm": 0.6442719101905823, + "learning_rate": 1.994612988986408e-05, + "loss": 2.4802, + "step": 1477 + }, + { + "epoch": 0.05, + "grad_norm": 0.6791317462921143, + "learning_rate": 1.9946019656216896e-05, + "loss": 2.4789, + "step": 1478 + }, + { + "epoch": 0.05, + "grad_norm": 0.6613801717758179, + "learning_rate": 1.9945909310205413e-05, + "loss": 2.3991, + "step": 1479 + }, + { + "epoch": 0.05, + "grad_norm": 0.6486839652061462, + "learning_rate": 1.9945798851830873e-05, + "loss": 2.4855, + "step": 1480 + }, + { + "epoch": 0.05, + "grad_norm": 0.6682941317558289, + "learning_rate": 1.994568828109453e-05, + "loss": 2.5033, + "step": 1481 + }, + { + "epoch": 0.05, + "grad_norm": 0.6302229762077332, + "learning_rate": 1.9945577597997622e-05, + "loss": 2.4025, + "step": 1482 + }, + { + "epoch": 0.05, + "grad_norm": 0.6426867842674255, + "learning_rate": 1.9945466802541415e-05, + "loss": 2.4209, + "step": 1483 + }, + { + "epoch": 0.05, + "grad_norm": 0.6459939479827881, + "learning_rate": 1.9945355894727147e-05, + "loss": 2.4595, + "step": 1484 + }, + { + "epoch": 0.05, + "grad_norm": 0.7037394642829895, + "learning_rate": 1.9945244874556077e-05, + "loss": 2.4495, + "step": 1485 + }, + { + "epoch": 0.05, + "grad_norm": 0.6541789174079895, + "learning_rate": 1.994513374202946e-05, + "loss": 2.4435, + "step": 1486 + }, + { + "epoch": 0.05, + "grad_norm": 0.6893953084945679, + "learning_rate": 1.9945022497148553e-05, + "loss": 2.4323, + "step": 1487 + }, + { + "epoch": 0.05, + "grad_norm": 0.6722946166992188, + "learning_rate": 1.994491113991461e-05, + "loss": 2.4677, + "step": 1488 + }, + { + "epoch": 0.05, + "grad_norm": 0.7074438333511353, + "learning_rate": 1.9944799670328888e-05, + "loss": 2.4202, + "step": 1489 + }, + { + "epoch": 0.05, + "grad_norm": 0.6615106463432312, + "learning_rate": 1.9944688088392644e-05, + "loss": 2.4234, + "step": 1490 + }, + { + "epoch": 0.05, + "grad_norm": 0.6668723821640015, + "learning_rate": 1.9944576394107144e-05, + "loss": 2.3832, + "step": 1491 + }, + { + "epoch": 0.05, + "grad_norm": 0.6496140956878662, + "learning_rate": 1.994446458747365e-05, + "loss": 2.4246, + "step": 1492 + }, + { + "epoch": 0.05, + "grad_norm": 0.6784415245056152, + "learning_rate": 1.9944352668493422e-05, + "loss": 2.3942, + "step": 1493 + }, + { + "epoch": 0.05, + "grad_norm": 0.697881817817688, + "learning_rate": 1.9944240637167728e-05, + "loss": 2.4847, + "step": 1494 + }, + { + "epoch": 0.05, + "grad_norm": 0.6393730044364929, + "learning_rate": 1.994412849349783e-05, + "loss": 2.4267, + "step": 1495 + }, + { + "epoch": 0.05, + "grad_norm": 0.6433627605438232, + "learning_rate": 1.994401623748499e-05, + "loss": 2.4117, + "step": 1496 + }, + { + "epoch": 0.05, + "grad_norm": 0.7175366878509521, + "learning_rate": 1.994390386913049e-05, + "loss": 2.3979, + "step": 1497 + }, + { + "epoch": 0.05, + "grad_norm": 0.6627487540245056, + "learning_rate": 1.994379138843559e-05, + "loss": 2.482, + "step": 1498 + }, + { + "epoch": 0.05, + "grad_norm": 0.6880819201469421, + "learning_rate": 1.994367879540156e-05, + "loss": 2.4718, + "step": 1499 + }, + { + "epoch": 0.05, + "grad_norm": 0.6599110960960388, + "learning_rate": 1.9943566090029675e-05, + "loss": 2.3975, + "step": 1500 + }, + { + "epoch": 0.05, + "grad_norm": 0.66238933801651, + "learning_rate": 1.994345327232121e-05, + "loss": 2.4052, + "step": 1501 + }, + { + "epoch": 0.05, + "grad_norm": 0.6406200528144836, + "learning_rate": 1.9943340342277437e-05, + "loss": 2.4193, + "step": 1502 + }, + { + "epoch": 0.05, + "grad_norm": 0.643899142742157, + "learning_rate": 1.994322729989963e-05, + "loss": 2.4207, + "step": 1503 + }, + { + "epoch": 0.05, + "grad_norm": 0.6523613929748535, + "learning_rate": 1.994311414518907e-05, + "loss": 2.3813, + "step": 1504 + }, + { + "epoch": 0.05, + "grad_norm": 0.6831004023551941, + "learning_rate": 1.9943000878147034e-05, + "loss": 2.4456, + "step": 1505 + }, + { + "epoch": 0.05, + "grad_norm": 0.6590916514396667, + "learning_rate": 1.9942887498774798e-05, + "loss": 2.4683, + "step": 1506 + }, + { + "epoch": 0.05, + "grad_norm": 0.6561903953552246, + "learning_rate": 1.994277400707365e-05, + "loss": 2.4258, + "step": 1507 + }, + { + "epoch": 0.05, + "grad_norm": 0.6884208917617798, + "learning_rate": 1.994266040304487e-05, + "loss": 2.4078, + "step": 1508 + }, + { + "epoch": 0.05, + "grad_norm": 0.6622353792190552, + "learning_rate": 1.9942546686689736e-05, + "loss": 2.4797, + "step": 1509 + }, + { + "epoch": 0.05, + "grad_norm": 0.6397408246994019, + "learning_rate": 1.9942432858009537e-05, + "loss": 2.4671, + "step": 1510 + }, + { + "epoch": 0.05, + "grad_norm": 0.6506167650222778, + "learning_rate": 1.994231891700556e-05, + "loss": 2.4709, + "step": 1511 + }, + { + "epoch": 0.05, + "grad_norm": 0.67271488904953, + "learning_rate": 1.9942204863679087e-05, + "loss": 2.4528, + "step": 1512 + }, + { + "epoch": 0.05, + "grad_norm": 0.6726418137550354, + "learning_rate": 1.9942090698031415e-05, + "loss": 2.448, + "step": 1513 + }, + { + "epoch": 0.05, + "grad_norm": 0.6408865451812744, + "learning_rate": 1.9941976420063826e-05, + "loss": 2.4428, + "step": 1514 + }, + { + "epoch": 0.05, + "grad_norm": 0.6632372140884399, + "learning_rate": 1.994186202977761e-05, + "loss": 2.3638, + "step": 1515 + }, + { + "epoch": 0.05, + "grad_norm": 0.6702383756637573, + "learning_rate": 1.994174752717407e-05, + "loss": 2.4671, + "step": 1516 + }, + { + "epoch": 0.05, + "grad_norm": 0.6440345048904419, + "learning_rate": 1.994163291225449e-05, + "loss": 2.4225, + "step": 1517 + }, + { + "epoch": 0.05, + "grad_norm": 0.6779958009719849, + "learning_rate": 1.994151818502017e-05, + "loss": 2.4266, + "step": 1518 + }, + { + "epoch": 0.05, + "grad_norm": 0.6389851570129395, + "learning_rate": 1.99414033454724e-05, + "loss": 2.4749, + "step": 1519 + }, + { + "epoch": 0.05, + "grad_norm": 0.6462379693984985, + "learning_rate": 1.9941288393612482e-05, + "loss": 2.4849, + "step": 1520 + }, + { + "epoch": 0.05, + "grad_norm": 0.6789087653160095, + "learning_rate": 1.9941173329441716e-05, + "loss": 2.4594, + "step": 1521 + }, + { + "epoch": 0.05, + "grad_norm": 0.7006807923316956, + "learning_rate": 1.99410581529614e-05, + "loss": 2.3854, + "step": 1522 + }, + { + "epoch": 0.05, + "grad_norm": 0.6482889652252197, + "learning_rate": 1.9940942864172834e-05, + "loss": 2.4475, + "step": 1523 + }, + { + "epoch": 0.05, + "grad_norm": 0.6787514686584473, + "learning_rate": 1.9940827463077325e-05, + "loss": 2.4828, + "step": 1524 + }, + { + "epoch": 0.05, + "grad_norm": 0.6578748226165771, + "learning_rate": 1.9940711949676166e-05, + "loss": 2.4834, + "step": 1525 + }, + { + "epoch": 0.05, + "grad_norm": 0.6645954847335815, + "learning_rate": 1.9940596323970674e-05, + "loss": 2.422, + "step": 1526 + }, + { + "epoch": 0.05, + "grad_norm": 0.6895474195480347, + "learning_rate": 1.9940480585962148e-05, + "loss": 2.5313, + "step": 1527 + }, + { + "epoch": 0.05, + "grad_norm": 0.6712403297424316, + "learning_rate": 1.99403647356519e-05, + "loss": 2.4299, + "step": 1528 + }, + { + "epoch": 0.05, + "grad_norm": 0.655525267124176, + "learning_rate": 1.994024877304124e-05, + "loss": 2.3436, + "step": 1529 + }, + { + "epoch": 0.05, + "grad_norm": 0.6689746379852295, + "learning_rate": 1.994013269813147e-05, + "loss": 2.4294, + "step": 1530 + }, + { + "epoch": 0.05, + "grad_norm": 0.6445267200469971, + "learning_rate": 1.9940016510923907e-05, + "loss": 2.4486, + "step": 1531 + }, + { + "epoch": 0.05, + "grad_norm": 0.6358187794685364, + "learning_rate": 1.9939900211419863e-05, + "loss": 2.423, + "step": 1532 + }, + { + "epoch": 0.05, + "grad_norm": 0.6302741765975952, + "learning_rate": 1.9939783799620653e-05, + "loss": 2.3782, + "step": 1533 + }, + { + "epoch": 0.05, + "grad_norm": 0.6615095138549805, + "learning_rate": 1.993966727552759e-05, + "loss": 2.4817, + "step": 1534 + }, + { + "epoch": 0.05, + "grad_norm": 0.6813662052154541, + "learning_rate": 1.993955063914199e-05, + "loss": 2.3707, + "step": 1535 + }, + { + "epoch": 0.05, + "grad_norm": 0.7147194743156433, + "learning_rate": 1.9939433890465173e-05, + "loss": 2.3969, + "step": 1536 + }, + { + "epoch": 0.05, + "grad_norm": 0.6699428558349609, + "learning_rate": 1.9939317029498458e-05, + "loss": 2.4376, + "step": 1537 + }, + { + "epoch": 0.05, + "grad_norm": 0.6649152040481567, + "learning_rate": 1.9939200056243164e-05, + "loss": 2.4177, + "step": 1538 + }, + { + "epoch": 0.05, + "grad_norm": 0.6570271253585815, + "learning_rate": 1.9939082970700612e-05, + "loss": 2.3663, + "step": 1539 + }, + { + "epoch": 0.05, + "grad_norm": 0.6725113987922668, + "learning_rate": 1.9938965772872124e-05, + "loss": 2.3496, + "step": 1540 + }, + { + "epoch": 0.05, + "grad_norm": 0.6776495575904846, + "learning_rate": 1.993884846275903e-05, + "loss": 2.4243, + "step": 1541 + }, + { + "epoch": 0.05, + "grad_norm": 0.6712349057197571, + "learning_rate": 1.993873104036265e-05, + "loss": 2.4676, + "step": 1542 + }, + { + "epoch": 0.05, + "grad_norm": 0.7130674123764038, + "learning_rate": 1.993861350568431e-05, + "loss": 2.4445, + "step": 1543 + }, + { + "epoch": 0.05, + "grad_norm": 0.7261738181114197, + "learning_rate": 1.9938495858725334e-05, + "loss": 2.4102, + "step": 1544 + }, + { + "epoch": 0.05, + "grad_norm": 0.6606306433677673, + "learning_rate": 1.993837809948706e-05, + "loss": 2.458, + "step": 1545 + }, + { + "epoch": 0.05, + "grad_norm": 0.7028478384017944, + "learning_rate": 1.993826022797082e-05, + "loss": 2.4384, + "step": 1546 + }, + { + "epoch": 0.05, + "grad_norm": 0.6371980905532837, + "learning_rate": 1.9938142244177935e-05, + "loss": 2.4642, + "step": 1547 + }, + { + "epoch": 0.05, + "grad_norm": 0.6604588627815247, + "learning_rate": 1.9938024148109742e-05, + "loss": 2.3999, + "step": 1548 + }, + { + "epoch": 0.05, + "grad_norm": 0.6582524180412292, + "learning_rate": 1.9937905939767583e-05, + "loss": 2.422, + "step": 1549 + }, + { + "epoch": 0.05, + "grad_norm": 0.6685971617698669, + "learning_rate": 1.993778761915278e-05, + "loss": 2.4648, + "step": 1550 + }, + { + "epoch": 0.05, + "grad_norm": 0.6981533169746399, + "learning_rate": 1.9937669186266678e-05, + "loss": 2.4648, + "step": 1551 + }, + { + "epoch": 0.05, + "grad_norm": 0.6834406852722168, + "learning_rate": 1.9937550641110615e-05, + "loss": 2.429, + "step": 1552 + }, + { + "epoch": 0.05, + "grad_norm": 0.6810020804405212, + "learning_rate": 1.993743198368593e-05, + "loss": 2.4485, + "step": 1553 + }, + { + "epoch": 0.05, + "grad_norm": 0.6478970646858215, + "learning_rate": 1.9937313213993962e-05, + "loss": 2.4009, + "step": 1554 + }, + { + "epoch": 0.05, + "grad_norm": 0.6841230392456055, + "learning_rate": 1.993719433203605e-05, + "loss": 2.4762, + "step": 1555 + }, + { + "epoch": 0.05, + "grad_norm": 0.6615786552429199, + "learning_rate": 1.9937075337813545e-05, + "loss": 2.4232, + "step": 1556 + }, + { + "epoch": 0.05, + "grad_norm": 0.6891390085220337, + "learning_rate": 1.9936956231327783e-05, + "loss": 2.4517, + "step": 1557 + }, + { + "epoch": 0.05, + "grad_norm": 0.6467688679695129, + "learning_rate": 1.9936837012580113e-05, + "loss": 2.4618, + "step": 1558 + }, + { + "epoch": 0.05, + "grad_norm": 0.6353906393051147, + "learning_rate": 1.9936717681571883e-05, + "loss": 2.3032, + "step": 1559 + }, + { + "epoch": 0.05, + "grad_norm": 0.662642240524292, + "learning_rate": 1.9936598238304442e-05, + "loss": 2.3919, + "step": 1560 + }, + { + "epoch": 0.05, + "grad_norm": 0.6605132818222046, + "learning_rate": 1.9936478682779135e-05, + "loss": 2.4362, + "step": 1561 + }, + { + "epoch": 0.05, + "grad_norm": 0.6424047350883484, + "learning_rate": 1.9936359014997314e-05, + "loss": 2.4628, + "step": 1562 + }, + { + "epoch": 0.05, + "grad_norm": 0.676135241985321, + "learning_rate": 1.9936239234960337e-05, + "loss": 2.4219, + "step": 1563 + }, + { + "epoch": 0.05, + "grad_norm": 0.6838515996932983, + "learning_rate": 1.9936119342669546e-05, + "loss": 2.4443, + "step": 1564 + }, + { + "epoch": 0.05, + "grad_norm": 0.6867080330848694, + "learning_rate": 1.9935999338126307e-05, + "loss": 2.4302, + "step": 1565 + }, + { + "epoch": 0.05, + "grad_norm": 0.6572315692901611, + "learning_rate": 1.9935879221331968e-05, + "loss": 2.442, + "step": 1566 + }, + { + "epoch": 0.05, + "grad_norm": 0.6354489326477051, + "learning_rate": 1.9935758992287888e-05, + "loss": 2.4566, + "step": 1567 + }, + { + "epoch": 0.05, + "grad_norm": 0.6707313656806946, + "learning_rate": 1.993563865099543e-05, + "loss": 2.4622, + "step": 1568 + }, + { + "epoch": 0.05, + "grad_norm": 0.6528519988059998, + "learning_rate": 1.9935518197455945e-05, + "loss": 2.3938, + "step": 1569 + }, + { + "epoch": 0.05, + "grad_norm": 0.7114832401275635, + "learning_rate": 1.99353976316708e-05, + "loss": 2.4286, + "step": 1570 + }, + { + "epoch": 0.05, + "grad_norm": 0.6804948449134827, + "learning_rate": 1.9935276953641357e-05, + "loss": 2.3996, + "step": 1571 + }, + { + "epoch": 0.05, + "grad_norm": 0.6786565184593201, + "learning_rate": 1.993515616336897e-05, + "loss": 2.3967, + "step": 1572 + }, + { + "epoch": 0.05, + "grad_norm": 0.6636152863502502, + "learning_rate": 1.993503526085502e-05, + "loss": 2.4333, + "step": 1573 + }, + { + "epoch": 0.05, + "grad_norm": 0.6386958956718445, + "learning_rate": 1.9934914246100862e-05, + "loss": 2.3645, + "step": 1574 + }, + { + "epoch": 0.05, + "grad_norm": 0.6662698984146118, + "learning_rate": 1.9934793119107864e-05, + "loss": 2.4286, + "step": 1575 + }, + { + "epoch": 0.05, + "grad_norm": 0.6887092590332031, + "learning_rate": 1.9934671879877393e-05, + "loss": 2.4271, + "step": 1576 + }, + { + "epoch": 0.05, + "grad_norm": 0.6771765351295471, + "learning_rate": 1.9934550528410825e-05, + "loss": 2.5199, + "step": 1577 + }, + { + "epoch": 0.05, + "grad_norm": 0.6455226540565491, + "learning_rate": 1.9934429064709525e-05, + "loss": 2.4054, + "step": 1578 + }, + { + "epoch": 0.05, + "grad_norm": 0.7230437397956848, + "learning_rate": 1.9934307488774872e-05, + "loss": 2.5009, + "step": 1579 + }, + { + "epoch": 0.05, + "grad_norm": 0.6783396601676941, + "learning_rate": 1.9934185800608232e-05, + "loss": 2.472, + "step": 1580 + }, + { + "epoch": 0.05, + "grad_norm": 0.6692841053009033, + "learning_rate": 1.9934064000210983e-05, + "loss": 2.4002, + "step": 1581 + }, + { + "epoch": 0.05, + "grad_norm": 0.6551260352134705, + "learning_rate": 1.99339420875845e-05, + "loss": 2.4324, + "step": 1582 + }, + { + "epoch": 0.05, + "grad_norm": 0.672709047794342, + "learning_rate": 1.9933820062730162e-05, + "loss": 2.4216, + "step": 1583 + }, + { + "epoch": 0.05, + "grad_norm": 0.6607254147529602, + "learning_rate": 1.993369792564935e-05, + "loss": 2.4349, + "step": 1584 + }, + { + "epoch": 0.05, + "grad_norm": 0.6395419239997864, + "learning_rate": 1.9933575676343435e-05, + "loss": 2.3873, + "step": 1585 + }, + { + "epoch": 0.05, + "grad_norm": 0.6873798370361328, + "learning_rate": 1.9933453314813808e-05, + "loss": 2.4809, + "step": 1586 + }, + { + "epoch": 0.05, + "grad_norm": 0.734438955783844, + "learning_rate": 1.9933330841061842e-05, + "loss": 2.4436, + "step": 1587 + }, + { + "epoch": 0.05, + "grad_norm": 0.67962646484375, + "learning_rate": 1.9933208255088928e-05, + "loss": 2.4159, + "step": 1588 + }, + { + "epoch": 0.05, + "grad_norm": 0.6698024272918701, + "learning_rate": 1.993308555689645e-05, + "loss": 2.4169, + "step": 1589 + }, + { + "epoch": 0.05, + "grad_norm": 0.6622918248176575, + "learning_rate": 1.9932962746485793e-05, + "loss": 2.4357, + "step": 1590 + }, + { + "epoch": 0.05, + "grad_norm": 0.6909894347190857, + "learning_rate": 1.9932839823858343e-05, + "loss": 2.4753, + "step": 1591 + }, + { + "epoch": 0.05, + "grad_norm": 0.6382946968078613, + "learning_rate": 1.993271678901549e-05, + "loss": 2.4403, + "step": 1592 + }, + { + "epoch": 0.05, + "grad_norm": 0.6618476510047913, + "learning_rate": 1.9932593641958624e-05, + "loss": 2.3979, + "step": 1593 + }, + { + "epoch": 0.05, + "grad_norm": 0.6698614954948425, + "learning_rate": 1.9932470382689137e-05, + "loss": 2.3737, + "step": 1594 + }, + { + "epoch": 0.05, + "grad_norm": 0.6669596433639526, + "learning_rate": 1.993234701120842e-05, + "loss": 2.3625, + "step": 1595 + }, + { + "epoch": 0.05, + "grad_norm": 0.7005311250686646, + "learning_rate": 1.9932223527517864e-05, + "loss": 2.4255, + "step": 1596 + }, + { + "epoch": 0.05, + "grad_norm": 0.6587526798248291, + "learning_rate": 1.9932099931618873e-05, + "loss": 2.4389, + "step": 1597 + }, + { + "epoch": 0.05, + "grad_norm": 0.6763669848442078, + "learning_rate": 1.9931976223512834e-05, + "loss": 2.4017, + "step": 1598 + }, + { + "epoch": 0.05, + "grad_norm": 0.645672619342804, + "learning_rate": 1.993185240320115e-05, + "loss": 2.3812, + "step": 1599 + }, + { + "epoch": 0.05, + "grad_norm": 0.6708835363388062, + "learning_rate": 1.9931728470685215e-05, + "loss": 2.4076, + "step": 1600 + }, + { + "epoch": 0.05, + "grad_norm": 0.6925601959228516, + "learning_rate": 1.9931604425966437e-05, + "loss": 2.4903, + "step": 1601 + }, + { + "epoch": 0.05, + "grad_norm": 0.6661214828491211, + "learning_rate": 1.993148026904621e-05, + "loss": 2.4037, + "step": 1602 + }, + { + "epoch": 0.05, + "grad_norm": 0.6573379635810852, + "learning_rate": 1.9931355999925942e-05, + "loss": 2.4497, + "step": 1603 + }, + { + "epoch": 0.05, + "grad_norm": 0.6751570105552673, + "learning_rate": 1.993123161860703e-05, + "loss": 2.4477, + "step": 1604 + }, + { + "epoch": 0.05, + "grad_norm": 0.6991583704948425, + "learning_rate": 1.9931107125090888e-05, + "loss": 2.4284, + "step": 1605 + }, + { + "epoch": 0.05, + "grad_norm": 0.7075982689857483, + "learning_rate": 1.9930982519378913e-05, + "loss": 2.4594, + "step": 1606 + }, + { + "epoch": 0.05, + "grad_norm": 0.6452618837356567, + "learning_rate": 1.9930857801472524e-05, + "loss": 2.4403, + "step": 1607 + }, + { + "epoch": 0.05, + "grad_norm": 0.684503972530365, + "learning_rate": 1.9930732971373118e-05, + "loss": 2.414, + "step": 1608 + }, + { + "epoch": 0.05, + "grad_norm": 0.6694889068603516, + "learning_rate": 1.9930608029082114e-05, + "loss": 2.537, + "step": 1609 + }, + { + "epoch": 0.05, + "grad_norm": 0.6663505434989929, + "learning_rate": 1.9930482974600916e-05, + "loss": 2.4709, + "step": 1610 + }, + { + "epoch": 0.05, + "grad_norm": 0.6709225177764893, + "learning_rate": 1.9930357807930946e-05, + "loss": 2.4, + "step": 1611 + }, + { + "epoch": 0.05, + "grad_norm": 0.6499970555305481, + "learning_rate": 1.9930232529073613e-05, + "loss": 2.4283, + "step": 1612 + }, + { + "epoch": 0.05, + "grad_norm": 0.6813496351242065, + "learning_rate": 1.993010713803033e-05, + "loss": 2.4253, + "step": 1613 + }, + { + "epoch": 0.05, + "grad_norm": 0.6806023716926575, + "learning_rate": 1.992998163480252e-05, + "loss": 2.3823, + "step": 1614 + }, + { + "epoch": 0.05, + "grad_norm": 0.6823223829269409, + "learning_rate": 1.9929856019391593e-05, + "loss": 2.3888, + "step": 1615 + }, + { + "epoch": 0.05, + "grad_norm": 0.6910129189491272, + "learning_rate": 1.9929730291798973e-05, + "loss": 2.3644, + "step": 1616 + }, + { + "epoch": 0.05, + "grad_norm": 0.6463114619255066, + "learning_rate": 1.992960445202608e-05, + "loss": 2.4517, + "step": 1617 + }, + { + "epoch": 0.05, + "grad_norm": 0.6912835240364075, + "learning_rate": 1.9929478500074336e-05, + "loss": 2.4715, + "step": 1618 + }, + { + "epoch": 0.05, + "grad_norm": 0.6860828995704651, + "learning_rate": 1.9929352435945166e-05, + "loss": 2.4235, + "step": 1619 + }, + { + "epoch": 0.05, + "grad_norm": 0.7106069922447205, + "learning_rate": 1.9929226259639983e-05, + "loss": 2.482, + "step": 1620 + }, + { + "epoch": 0.05, + "grad_norm": 0.6664543747901917, + "learning_rate": 1.9929099971160228e-05, + "loss": 2.4067, + "step": 1621 + }, + { + "epoch": 0.05, + "grad_norm": 0.6521303653717041, + "learning_rate": 1.9928973570507317e-05, + "loss": 2.3995, + "step": 1622 + }, + { + "epoch": 0.05, + "grad_norm": 0.660266637802124, + "learning_rate": 1.9928847057682683e-05, + "loss": 2.4643, + "step": 1623 + }, + { + "epoch": 0.05, + "grad_norm": 0.643988847732544, + "learning_rate": 1.9928720432687753e-05, + "loss": 2.4582, + "step": 1624 + }, + { + "epoch": 0.05, + "grad_norm": 0.684067964553833, + "learning_rate": 1.992859369552396e-05, + "loss": 2.4127, + "step": 1625 + }, + { + "epoch": 0.05, + "grad_norm": 0.665391743183136, + "learning_rate": 1.9928466846192732e-05, + "loss": 2.4157, + "step": 1626 + }, + { + "epoch": 0.05, + "grad_norm": 0.6780904531478882, + "learning_rate": 1.9928339884695505e-05, + "loss": 2.4072, + "step": 1627 + }, + { + "epoch": 0.05, + "grad_norm": 0.6330990791320801, + "learning_rate": 1.9928212811033714e-05, + "loss": 2.3887, + "step": 1628 + }, + { + "epoch": 0.05, + "grad_norm": 0.667547345161438, + "learning_rate": 1.992808562520879e-05, + "loss": 2.4574, + "step": 1629 + }, + { + "epoch": 0.05, + "grad_norm": 0.6760832071304321, + "learning_rate": 1.9927958327222178e-05, + "loss": 2.3935, + "step": 1630 + }, + { + "epoch": 0.05, + "grad_norm": 0.6605042815208435, + "learning_rate": 1.992783091707531e-05, + "loss": 2.4841, + "step": 1631 + }, + { + "epoch": 0.05, + "grad_norm": 0.6503846645355225, + "learning_rate": 1.9927703394769623e-05, + "loss": 2.422, + "step": 1632 + }, + { + "epoch": 0.05, + "grad_norm": 0.6339687705039978, + "learning_rate": 1.9927575760306562e-05, + "loss": 2.455, + "step": 1633 + }, + { + "epoch": 0.05, + "grad_norm": 0.6454290747642517, + "learning_rate": 1.9927448013687568e-05, + "loss": 2.3573, + "step": 1634 + }, + { + "epoch": 0.05, + "grad_norm": 0.6633464694023132, + "learning_rate": 1.9927320154914086e-05, + "loss": 2.4031, + "step": 1635 + }, + { + "epoch": 0.05, + "grad_norm": 0.6876009702682495, + "learning_rate": 1.992719218398756e-05, + "loss": 2.4137, + "step": 1636 + }, + { + "epoch": 0.05, + "grad_norm": 0.6541686058044434, + "learning_rate": 1.9927064100909433e-05, + "loss": 2.3556, + "step": 1637 + }, + { + "epoch": 0.05, + "grad_norm": 0.6572750806808472, + "learning_rate": 1.9926935905681152e-05, + "loss": 2.4638, + "step": 1638 + }, + { + "epoch": 0.05, + "grad_norm": 0.6526206731796265, + "learning_rate": 1.992680759830417e-05, + "loss": 2.4229, + "step": 1639 + }, + { + "epoch": 0.05, + "grad_norm": 0.6827916502952576, + "learning_rate": 1.9926679178779933e-05, + "loss": 2.3862, + "step": 1640 + }, + { + "epoch": 0.05, + "grad_norm": 0.6964197158813477, + "learning_rate": 1.992655064710989e-05, + "loss": 2.4201, + "step": 1641 + }, + { + "epoch": 0.05, + "grad_norm": 0.6807335615158081, + "learning_rate": 1.9926422003295497e-05, + "loss": 2.4586, + "step": 1642 + }, + { + "epoch": 0.05, + "grad_norm": 0.6591370105743408, + "learning_rate": 1.9926293247338205e-05, + "loss": 2.3595, + "step": 1643 + }, + { + "epoch": 0.05, + "grad_norm": 0.6882135272026062, + "learning_rate": 1.992616437923947e-05, + "loss": 2.4286, + "step": 1644 + }, + { + "epoch": 0.05, + "grad_norm": 0.6745463013648987, + "learning_rate": 1.9926035399000746e-05, + "loss": 2.4701, + "step": 1645 + }, + { + "epoch": 0.05, + "grad_norm": 0.664196789264679, + "learning_rate": 1.9925906306623492e-05, + "loss": 2.4409, + "step": 1646 + }, + { + "epoch": 0.05, + "grad_norm": 0.6802324056625366, + "learning_rate": 1.9925777102109166e-05, + "loss": 2.4362, + "step": 1647 + }, + { + "epoch": 0.05, + "grad_norm": 0.6947612762451172, + "learning_rate": 1.992564778545923e-05, + "loss": 2.3822, + "step": 1648 + }, + { + "epoch": 0.05, + "grad_norm": 0.705377459526062, + "learning_rate": 1.992551835667514e-05, + "loss": 2.4534, + "step": 1649 + }, + { + "epoch": 0.05, + "grad_norm": 0.6672513484954834, + "learning_rate": 1.9925388815758358e-05, + "loss": 2.4627, + "step": 1650 + }, + { + "epoch": 0.05, + "grad_norm": 0.6844388246536255, + "learning_rate": 1.9925259162710352e-05, + "loss": 2.4271, + "step": 1651 + }, + { + "epoch": 0.05, + "grad_norm": 0.6461808681488037, + "learning_rate": 1.9925129397532582e-05, + "loss": 2.4519, + "step": 1652 + }, + { + "epoch": 0.05, + "grad_norm": 0.6839893460273743, + "learning_rate": 1.992499952022652e-05, + "loss": 2.4025, + "step": 1653 + }, + { + "epoch": 0.06, + "grad_norm": 0.6754345893859863, + "learning_rate": 1.992486953079363e-05, + "loss": 2.4176, + "step": 1654 + }, + { + "epoch": 0.06, + "grad_norm": 0.6423801779747009, + "learning_rate": 1.9924739429235382e-05, + "loss": 2.4293, + "step": 1655 + }, + { + "epoch": 0.06, + "grad_norm": 0.6501866579055786, + "learning_rate": 1.9924609215553243e-05, + "loss": 2.3941, + "step": 1656 + }, + { + "epoch": 0.06, + "grad_norm": 0.655415952205658, + "learning_rate": 1.9924478889748685e-05, + "loss": 2.366, + "step": 1657 + }, + { + "epoch": 0.06, + "grad_norm": 0.6413949131965637, + "learning_rate": 1.992434845182318e-05, + "loss": 2.3983, + "step": 1658 + }, + { + "epoch": 0.06, + "grad_norm": 0.689906895160675, + "learning_rate": 1.9924217901778202e-05, + "loss": 2.4081, + "step": 1659 + }, + { + "epoch": 0.06, + "grad_norm": 0.6907313466072083, + "learning_rate": 1.9924087239615225e-05, + "loss": 2.507, + "step": 1660 + }, + { + "epoch": 0.06, + "grad_norm": 0.6539212465286255, + "learning_rate": 1.992395646533573e-05, + "loss": 2.4434, + "step": 1661 + }, + { + "epoch": 0.06, + "grad_norm": 0.7002682685852051, + "learning_rate": 1.992382557894119e-05, + "loss": 2.4123, + "step": 1662 + }, + { + "epoch": 0.06, + "grad_norm": 0.6133070588111877, + "learning_rate": 1.9923694580433085e-05, + "loss": 2.4207, + "step": 1663 + }, + { + "epoch": 0.06, + "grad_norm": 0.6680501103401184, + "learning_rate": 1.9923563469812898e-05, + "loss": 2.3983, + "step": 1664 + }, + { + "epoch": 0.06, + "grad_norm": 0.7163813710212708, + "learning_rate": 1.99234322470821e-05, + "loss": 2.414, + "step": 1665 + }, + { + "epoch": 0.06, + "grad_norm": 0.6384507417678833, + "learning_rate": 1.992330091224218e-05, + "loss": 2.4251, + "step": 1666 + }, + { + "epoch": 0.06, + "grad_norm": 0.653640627861023, + "learning_rate": 1.9923169465294627e-05, + "loss": 2.4372, + "step": 1667 + }, + { + "epoch": 0.06, + "grad_norm": 0.6543007493019104, + "learning_rate": 1.992303790624092e-05, + "loss": 2.4155, + "step": 1668 + }, + { + "epoch": 0.06, + "grad_norm": 0.6777818202972412, + "learning_rate": 1.992290623508254e-05, + "loss": 2.3688, + "step": 1669 + }, + { + "epoch": 0.06, + "grad_norm": 0.673274040222168, + "learning_rate": 1.9922774451820988e-05, + "loss": 2.427, + "step": 1670 + }, + { + "epoch": 0.06, + "grad_norm": 0.6461830139160156, + "learning_rate": 1.9922642556457745e-05, + "loss": 2.3944, + "step": 1671 + }, + { + "epoch": 0.06, + "grad_norm": 0.6684697270393372, + "learning_rate": 1.99225105489943e-05, + "loss": 2.4229, + "step": 1672 + }, + { + "epoch": 0.06, + "grad_norm": 0.6868581771850586, + "learning_rate": 1.9922378429432142e-05, + "loss": 2.4115, + "step": 1673 + }, + { + "epoch": 0.06, + "grad_norm": 0.7135509848594666, + "learning_rate": 1.9922246197772774e-05, + "loss": 2.3571, + "step": 1674 + }, + { + "epoch": 0.06, + "grad_norm": 0.6706013083457947, + "learning_rate": 1.992211385401768e-05, + "loss": 2.4206, + "step": 1675 + }, + { + "epoch": 0.06, + "grad_norm": 0.6538805961608887, + "learning_rate": 1.992198139816836e-05, + "loss": 2.4177, + "step": 1676 + }, + { + "epoch": 0.06, + "grad_norm": 0.6860239505767822, + "learning_rate": 1.992184883022631e-05, + "loss": 2.5078, + "step": 1677 + }, + { + "epoch": 0.06, + "grad_norm": 0.6961548924446106, + "learning_rate": 1.9921716150193022e-05, + "loss": 2.379, + "step": 1678 + }, + { + "epoch": 0.06, + "grad_norm": 0.6848573684692383, + "learning_rate": 1.9921583358070005e-05, + "loss": 2.3842, + "step": 1679 + }, + { + "epoch": 0.06, + "grad_norm": 0.6472194790840149, + "learning_rate": 1.992145045385875e-05, + "loss": 2.391, + "step": 1680 + }, + { + "epoch": 0.06, + "grad_norm": 0.6962064504623413, + "learning_rate": 1.9921317437560766e-05, + "loss": 2.5019, + "step": 1681 + }, + { + "epoch": 0.06, + "grad_norm": 0.6933851838111877, + "learning_rate": 1.992118430917755e-05, + "loss": 2.4058, + "step": 1682 + }, + { + "epoch": 0.06, + "grad_norm": 0.6392589211463928, + "learning_rate": 1.9921051068710605e-05, + "loss": 2.4153, + "step": 1683 + }, + { + "epoch": 0.06, + "grad_norm": 0.6323282718658447, + "learning_rate": 1.992091771616144e-05, + "loss": 2.3463, + "step": 1684 + }, + { + "epoch": 0.06, + "grad_norm": 0.6988240480422974, + "learning_rate": 1.9920784251531567e-05, + "loss": 2.4045, + "step": 1685 + }, + { + "epoch": 0.06, + "grad_norm": 0.6445634365081787, + "learning_rate": 1.9920650674822486e-05, + "loss": 2.4675, + "step": 1686 + }, + { + "epoch": 0.06, + "grad_norm": 0.6991953253746033, + "learning_rate": 1.9920516986035703e-05, + "loss": 2.4553, + "step": 1687 + }, + { + "epoch": 0.06, + "grad_norm": 0.6528216004371643, + "learning_rate": 1.9920383185172736e-05, + "loss": 2.4123, + "step": 1688 + }, + { + "epoch": 0.06, + "grad_norm": 0.6391637921333313, + "learning_rate": 1.9920249272235095e-05, + "loss": 2.3931, + "step": 1689 + }, + { + "epoch": 0.06, + "grad_norm": 0.6762369871139526, + "learning_rate": 1.992011524722429e-05, + "loss": 2.4311, + "step": 1690 + }, + { + "epoch": 0.06, + "grad_norm": 0.6571438312530518, + "learning_rate": 1.9919981110141836e-05, + "loss": 2.4362, + "step": 1691 + }, + { + "epoch": 0.06, + "grad_norm": 0.6934826374053955, + "learning_rate": 1.991984686098925e-05, + "loss": 2.4657, + "step": 1692 + }, + { + "epoch": 0.06, + "grad_norm": 0.6816678643226624, + "learning_rate": 1.9919712499768048e-05, + "loss": 2.4073, + "step": 1693 + }, + { + "epoch": 0.06, + "grad_norm": 0.6610366702079773, + "learning_rate": 1.9919578026479745e-05, + "loss": 2.4363, + "step": 1694 + }, + { + "epoch": 0.06, + "grad_norm": 0.6828557848930359, + "learning_rate": 1.9919443441125867e-05, + "loss": 2.4359, + "step": 1695 + }, + { + "epoch": 0.06, + "grad_norm": 0.6646338105201721, + "learning_rate": 1.9919308743707927e-05, + "loss": 2.4003, + "step": 1696 + }, + { + "epoch": 0.06, + "grad_norm": 0.68185955286026, + "learning_rate": 1.991917393422745e-05, + "loss": 2.4012, + "step": 1697 + }, + { + "epoch": 0.06, + "grad_norm": 0.6393457055091858, + "learning_rate": 1.9919039012685962e-05, + "loss": 2.3605, + "step": 1698 + }, + { + "epoch": 0.06, + "grad_norm": 0.6696728467941284, + "learning_rate": 1.9918903979084985e-05, + "loss": 2.4643, + "step": 1699 + }, + { + "epoch": 0.06, + "grad_norm": 0.7348231077194214, + "learning_rate": 1.991876883342604e-05, + "loss": 2.3806, + "step": 1700 + }, + { + "epoch": 0.06, + "grad_norm": 0.7297537326812744, + "learning_rate": 1.9918633575710662e-05, + "loss": 2.4512, + "step": 1701 + }, + { + "epoch": 0.06, + "grad_norm": 0.6754732728004456, + "learning_rate": 1.991849820594037e-05, + "loss": 2.4335, + "step": 1702 + }, + { + "epoch": 0.06, + "grad_norm": 0.6498231291770935, + "learning_rate": 1.99183627241167e-05, + "loss": 2.4156, + "step": 1703 + }, + { + "epoch": 0.06, + "grad_norm": 0.6874418258666992, + "learning_rate": 1.991822713024118e-05, + "loss": 2.3507, + "step": 1704 + }, + { + "epoch": 0.06, + "grad_norm": 0.6841293573379517, + "learning_rate": 1.9918091424315348e-05, + "loss": 2.3088, + "step": 1705 + }, + { + "epoch": 0.06, + "grad_norm": 0.6642603278160095, + "learning_rate": 1.9917955606340724e-05, + "loss": 2.4436, + "step": 1706 + }, + { + "epoch": 0.06, + "grad_norm": 0.6520339250564575, + "learning_rate": 1.9917819676318854e-05, + "loss": 2.4135, + "step": 1707 + }, + { + "epoch": 0.06, + "grad_norm": 0.6486449837684631, + "learning_rate": 1.9917683634251272e-05, + "loss": 2.483, + "step": 1708 + }, + { + "epoch": 0.06, + "grad_norm": 0.6944218873977661, + "learning_rate": 1.9917547480139507e-05, + "loss": 2.4216, + "step": 1709 + }, + { + "epoch": 0.06, + "grad_norm": 0.6373897790908813, + "learning_rate": 1.9917411213985107e-05, + "loss": 2.4388, + "step": 1710 + }, + { + "epoch": 0.06, + "grad_norm": 0.6460530757904053, + "learning_rate": 1.9917274835789607e-05, + "loss": 2.4096, + "step": 1711 + }, + { + "epoch": 0.06, + "grad_norm": 0.6609753966331482, + "learning_rate": 1.9917138345554545e-05, + "loss": 2.4114, + "step": 1712 + }, + { + "epoch": 0.06, + "grad_norm": 0.6893213987350464, + "learning_rate": 1.991700174328147e-05, + "loss": 2.3611, + "step": 1713 + }, + { + "epoch": 0.06, + "grad_norm": 0.6588318943977356, + "learning_rate": 1.9916865028971918e-05, + "loss": 2.4215, + "step": 1714 + }, + { + "epoch": 0.06, + "grad_norm": 0.6442292928695679, + "learning_rate": 1.9916728202627437e-05, + "loss": 2.4183, + "step": 1715 + }, + { + "epoch": 0.06, + "grad_norm": 0.6383470892906189, + "learning_rate": 1.991659126424957e-05, + "loss": 2.4195, + "step": 1716 + }, + { + "epoch": 0.06, + "grad_norm": 0.648501455783844, + "learning_rate": 1.9916454213839873e-05, + "loss": 2.5091, + "step": 1717 + }, + { + "epoch": 0.06, + "grad_norm": 0.6657336354255676, + "learning_rate": 1.9916317051399885e-05, + "loss": 2.4844, + "step": 1718 + }, + { + "epoch": 0.06, + "grad_norm": 0.6767327785491943, + "learning_rate": 1.9916179776931156e-05, + "loss": 2.3965, + "step": 1719 + }, + { + "epoch": 0.06, + "grad_norm": 0.6539074778556824, + "learning_rate": 1.9916042390435245e-05, + "loss": 2.3497, + "step": 1720 + }, + { + "epoch": 0.06, + "grad_norm": 0.6573300957679749, + "learning_rate": 1.9915904891913694e-05, + "loss": 2.399, + "step": 1721 + }, + { + "epoch": 0.06, + "grad_norm": 0.6630330085754395, + "learning_rate": 1.9915767281368065e-05, + "loss": 2.3945, + "step": 1722 + }, + { + "epoch": 0.06, + "grad_norm": 0.6443758010864258, + "learning_rate": 1.9915629558799903e-05, + "loss": 2.3415, + "step": 1723 + }, + { + "epoch": 0.06, + "grad_norm": 0.673997700214386, + "learning_rate": 1.9915491724210772e-05, + "loss": 2.3922, + "step": 1724 + }, + { + "epoch": 0.06, + "grad_norm": 0.6867017149925232, + "learning_rate": 1.9915353777602226e-05, + "loss": 2.4566, + "step": 1725 + }, + { + "epoch": 0.06, + "grad_norm": 0.6897754073143005, + "learning_rate": 1.9915215718975826e-05, + "loss": 2.403, + "step": 1726 + }, + { + "epoch": 0.06, + "grad_norm": 0.6551098823547363, + "learning_rate": 1.9915077548333127e-05, + "loss": 2.425, + "step": 1727 + }, + { + "epoch": 0.06, + "grad_norm": 0.6676865816116333, + "learning_rate": 1.9914939265675694e-05, + "loss": 2.3723, + "step": 1728 + }, + { + "epoch": 0.06, + "grad_norm": 0.6359845995903015, + "learning_rate": 1.9914800871005085e-05, + "loss": 2.3827, + "step": 1729 + }, + { + "epoch": 0.06, + "grad_norm": 0.652776300907135, + "learning_rate": 1.991466236432287e-05, + "loss": 2.4373, + "step": 1730 + }, + { + "epoch": 0.06, + "grad_norm": 0.6557121872901917, + "learning_rate": 1.9914523745630608e-05, + "loss": 2.4005, + "step": 1731 + }, + { + "epoch": 0.06, + "grad_norm": 0.654423713684082, + "learning_rate": 1.991438501492987e-05, + "loss": 2.3411, + "step": 1732 + }, + { + "epoch": 0.06, + "grad_norm": 0.6976038217544556, + "learning_rate": 1.9914246172222217e-05, + "loss": 2.3755, + "step": 1733 + }, + { + "epoch": 0.06, + "grad_norm": 0.6480667591094971, + "learning_rate": 1.9914107217509223e-05, + "loss": 2.3473, + "step": 1734 + }, + { + "epoch": 0.06, + "grad_norm": 0.6538426280021667, + "learning_rate": 1.9913968150792456e-05, + "loss": 2.4296, + "step": 1735 + }, + { + "epoch": 0.06, + "grad_norm": 0.6696330308914185, + "learning_rate": 1.9913828972073488e-05, + "loss": 2.4355, + "step": 1736 + }, + { + "epoch": 0.06, + "grad_norm": 0.6323601603507996, + "learning_rate": 1.991368968135389e-05, + "loss": 2.3574, + "step": 1737 + }, + { + "epoch": 0.06, + "grad_norm": 0.658532977104187, + "learning_rate": 1.9913550278635234e-05, + "loss": 2.3955, + "step": 1738 + }, + { + "epoch": 0.06, + "grad_norm": 0.7092984914779663, + "learning_rate": 1.99134107639191e-05, + "loss": 2.3814, + "step": 1739 + }, + { + "epoch": 0.06, + "grad_norm": 0.6931276321411133, + "learning_rate": 1.9913271137207057e-05, + "loss": 2.395, + "step": 1740 + }, + { + "epoch": 0.06, + "grad_norm": 0.641856849193573, + "learning_rate": 1.991313139850069e-05, + "loss": 2.3926, + "step": 1741 + }, + { + "epoch": 0.06, + "grad_norm": 0.6465002298355103, + "learning_rate": 1.991299154780157e-05, + "loss": 2.3829, + "step": 1742 + }, + { + "epoch": 0.06, + "grad_norm": 0.7714751958847046, + "learning_rate": 1.9912851585111284e-05, + "loss": 2.4126, + "step": 1743 + }, + { + "epoch": 0.06, + "grad_norm": 0.6506043672561646, + "learning_rate": 1.9912711510431412e-05, + "loss": 2.4933, + "step": 1744 + }, + { + "epoch": 0.06, + "grad_norm": 0.6575149893760681, + "learning_rate": 1.991257132376353e-05, + "loss": 2.4109, + "step": 1745 + }, + { + "epoch": 0.06, + "grad_norm": 0.6435120105743408, + "learning_rate": 1.9912431025109232e-05, + "loss": 2.3785, + "step": 1746 + }, + { + "epoch": 0.06, + "grad_norm": 0.653723955154419, + "learning_rate": 1.9912290614470092e-05, + "loss": 2.4823, + "step": 1747 + }, + { + "epoch": 0.06, + "grad_norm": 0.6825549602508545, + "learning_rate": 1.9912150091847705e-05, + "loss": 2.4414, + "step": 1748 + }, + { + "epoch": 0.06, + "grad_norm": 0.6390737891197205, + "learning_rate": 1.991200945724366e-05, + "loss": 2.4204, + "step": 1749 + }, + { + "epoch": 0.06, + "grad_norm": 0.6617243885993958, + "learning_rate": 1.9911868710659532e-05, + "loss": 2.3667, + "step": 1750 + }, + { + "epoch": 0.06, + "grad_norm": 0.6704902052879333, + "learning_rate": 1.9911727852096925e-05, + "loss": 2.393, + "step": 1751 + }, + { + "epoch": 0.06, + "grad_norm": 0.6647295355796814, + "learning_rate": 1.9911586881557423e-05, + "loss": 2.434, + "step": 1752 + }, + { + "epoch": 0.06, + "grad_norm": 0.6292001605033875, + "learning_rate": 1.9911445799042622e-05, + "loss": 2.3912, + "step": 1753 + }, + { + "epoch": 0.06, + "grad_norm": 0.6650229692459106, + "learning_rate": 1.9911304604554118e-05, + "loss": 2.4618, + "step": 1754 + }, + { + "epoch": 0.06, + "grad_norm": 0.6842104196548462, + "learning_rate": 1.9911163298093502e-05, + "loss": 2.3811, + "step": 1755 + }, + { + "epoch": 0.06, + "grad_norm": 0.6654988527297974, + "learning_rate": 1.991102187966237e-05, + "loss": 2.4264, + "step": 1756 + }, + { + "epoch": 0.06, + "grad_norm": 0.6416323781013489, + "learning_rate": 1.991088034926232e-05, + "loss": 2.4401, + "step": 1757 + }, + { + "epoch": 0.06, + "grad_norm": 0.6998594999313354, + "learning_rate": 1.9910738706894957e-05, + "loss": 2.4209, + "step": 1758 + }, + { + "epoch": 0.06, + "grad_norm": 0.6584749817848206, + "learning_rate": 1.9910596952561873e-05, + "loss": 2.4313, + "step": 1759 + }, + { + "epoch": 0.06, + "grad_norm": 0.6843457818031311, + "learning_rate": 1.9910455086264673e-05, + "loss": 2.3678, + "step": 1760 + }, + { + "epoch": 0.06, + "grad_norm": 0.6927817463874817, + "learning_rate": 1.991031310800496e-05, + "loss": 2.4414, + "step": 1761 + }, + { + "epoch": 0.06, + "grad_norm": 0.6730621457099915, + "learning_rate": 1.991017101778434e-05, + "loss": 2.3603, + "step": 1762 + }, + { + "epoch": 0.06, + "grad_norm": 0.6471081972122192, + "learning_rate": 1.991002881560441e-05, + "loss": 2.4396, + "step": 1763 + }, + { + "epoch": 0.06, + "grad_norm": 0.6782976984977722, + "learning_rate": 1.9909886501466787e-05, + "loss": 2.3799, + "step": 1764 + }, + { + "epoch": 0.06, + "grad_norm": 0.6571404337882996, + "learning_rate": 1.990974407537307e-05, + "loss": 2.447, + "step": 1765 + }, + { + "epoch": 0.06, + "grad_norm": 0.6603661775588989, + "learning_rate": 1.9909601537324877e-05, + "loss": 2.3868, + "step": 1766 + }, + { + "epoch": 0.06, + "grad_norm": 0.6608498096466064, + "learning_rate": 1.9909458887323812e-05, + "loss": 2.4492, + "step": 1767 + }, + { + "epoch": 0.06, + "grad_norm": 0.6713616847991943, + "learning_rate": 1.9909316125371488e-05, + "loss": 2.4248, + "step": 1768 + }, + { + "epoch": 0.06, + "grad_norm": 0.6860651969909668, + "learning_rate": 1.9909173251469515e-05, + "loss": 2.3522, + "step": 1769 + }, + { + "epoch": 0.06, + "grad_norm": 0.7090104818344116, + "learning_rate": 1.9909030265619514e-05, + "loss": 2.4264, + "step": 1770 + }, + { + "epoch": 0.06, + "grad_norm": 0.6741381883621216, + "learning_rate": 1.990888716782309e-05, + "loss": 2.3504, + "step": 1771 + }, + { + "epoch": 0.06, + "grad_norm": 0.6544789671897888, + "learning_rate": 1.9908743958081873e-05, + "loss": 2.3994, + "step": 1772 + }, + { + "epoch": 0.06, + "grad_norm": 0.6701503992080688, + "learning_rate": 1.990860063639747e-05, + "loss": 2.3968, + "step": 1773 + }, + { + "epoch": 0.06, + "grad_norm": 0.680515468120575, + "learning_rate": 1.990845720277151e-05, + "loss": 2.3581, + "step": 1774 + }, + { + "epoch": 0.06, + "grad_norm": 0.6981581449508667, + "learning_rate": 1.99083136572056e-05, + "loss": 2.46, + "step": 1775 + }, + { + "epoch": 0.06, + "grad_norm": 0.6608291864395142, + "learning_rate": 1.990816999970137e-05, + "loss": 2.4174, + "step": 1776 + }, + { + "epoch": 0.06, + "grad_norm": 0.700094997882843, + "learning_rate": 1.9908026230260446e-05, + "loss": 2.4271, + "step": 1777 + }, + { + "epoch": 0.06, + "grad_norm": 0.6515207290649414, + "learning_rate": 1.9907882348884445e-05, + "loss": 2.4263, + "step": 1778 + }, + { + "epoch": 0.06, + "grad_norm": 0.6882498860359192, + "learning_rate": 1.9907738355575e-05, + "loss": 2.458, + "step": 1779 + }, + { + "epoch": 0.06, + "grad_norm": 0.6586911082267761, + "learning_rate": 1.990759425033373e-05, + "loss": 2.3839, + "step": 1780 + }, + { + "epoch": 0.06, + "grad_norm": 0.6677483916282654, + "learning_rate": 1.9907450033162267e-05, + "loss": 2.3727, + "step": 1781 + }, + { + "epoch": 0.06, + "grad_norm": 0.6829531192779541, + "learning_rate": 1.9907305704062238e-05, + "loss": 2.3717, + "step": 1782 + }, + { + "epoch": 0.06, + "grad_norm": 0.6769689321517944, + "learning_rate": 1.990716126303528e-05, + "loss": 2.3915, + "step": 1783 + }, + { + "epoch": 0.06, + "grad_norm": 0.7131335735321045, + "learning_rate": 1.9907016710083015e-05, + "loss": 2.3727, + "step": 1784 + }, + { + "epoch": 0.06, + "grad_norm": 0.6324228644371033, + "learning_rate": 1.9906872045207084e-05, + "loss": 2.3224, + "step": 1785 + }, + { + "epoch": 0.06, + "grad_norm": 0.6493406891822815, + "learning_rate": 1.9906727268409116e-05, + "loss": 2.3651, + "step": 1786 + }, + { + "epoch": 0.06, + "grad_norm": 0.64736407995224, + "learning_rate": 1.9906582379690753e-05, + "loss": 2.4296, + "step": 1787 + }, + { + "epoch": 0.06, + "grad_norm": 0.6784501075744629, + "learning_rate": 1.9906437379053628e-05, + "loss": 2.4315, + "step": 1788 + }, + { + "epoch": 0.06, + "grad_norm": 0.677405059337616, + "learning_rate": 1.9906292266499375e-05, + "loss": 2.3455, + "step": 1789 + }, + { + "epoch": 0.06, + "grad_norm": 0.6765218377113342, + "learning_rate": 1.9906147042029644e-05, + "loss": 2.4255, + "step": 1790 + }, + { + "epoch": 0.06, + "grad_norm": 0.6785590052604675, + "learning_rate": 1.9906001705646064e-05, + "loss": 2.3912, + "step": 1791 + }, + { + "epoch": 0.06, + "grad_norm": 0.6327767372131348, + "learning_rate": 1.9905856257350285e-05, + "loss": 2.356, + "step": 1792 + }, + { + "epoch": 0.06, + "grad_norm": 0.7202600836753845, + "learning_rate": 1.9905710697143944e-05, + "loss": 2.4297, + "step": 1793 + }, + { + "epoch": 0.06, + "grad_norm": 0.6425220966339111, + "learning_rate": 1.9905565025028694e-05, + "loss": 2.4092, + "step": 1794 + }, + { + "epoch": 0.06, + "grad_norm": 0.6445935964584351, + "learning_rate": 1.9905419241006172e-05, + "loss": 2.4094, + "step": 1795 + }, + { + "epoch": 0.06, + "grad_norm": 0.687806248664856, + "learning_rate": 1.990527334507803e-05, + "loss": 2.3995, + "step": 1796 + }, + { + "epoch": 0.06, + "grad_norm": 0.6905810236930847, + "learning_rate": 1.9905127337245915e-05, + "loss": 2.4002, + "step": 1797 + }, + { + "epoch": 0.06, + "grad_norm": 0.6583490371704102, + "learning_rate": 1.9904981217511476e-05, + "loss": 2.3864, + "step": 1798 + }, + { + "epoch": 0.06, + "grad_norm": 0.6532671451568604, + "learning_rate": 1.9904834985876365e-05, + "loss": 2.3931, + "step": 1799 + }, + { + "epoch": 0.06, + "grad_norm": 0.6690351963043213, + "learning_rate": 1.9904688642342228e-05, + "loss": 2.41, + "step": 1800 + }, + { + "epoch": 0.06, + "grad_norm": 0.6628871560096741, + "learning_rate": 1.990454218691073e-05, + "loss": 2.4035, + "step": 1801 + }, + { + "epoch": 0.06, + "grad_norm": 0.6618131995201111, + "learning_rate": 1.9904395619583515e-05, + "loss": 2.3896, + "step": 1802 + }, + { + "epoch": 0.06, + "grad_norm": 0.6890316009521484, + "learning_rate": 1.9904248940362246e-05, + "loss": 2.3503, + "step": 1803 + }, + { + "epoch": 0.06, + "grad_norm": 0.639540433883667, + "learning_rate": 1.9904102149248574e-05, + "loss": 2.4145, + "step": 1804 + }, + { + "epoch": 0.06, + "grad_norm": 0.6708518862724304, + "learning_rate": 1.990395524624416e-05, + "loss": 2.401, + "step": 1805 + }, + { + "epoch": 0.06, + "grad_norm": 0.66914963722229, + "learning_rate": 1.9903808231350664e-05, + "loss": 2.4246, + "step": 1806 + }, + { + "epoch": 0.06, + "grad_norm": 0.6464137434959412, + "learning_rate": 1.990366110456975e-05, + "loss": 2.3966, + "step": 1807 + }, + { + "epoch": 0.06, + "grad_norm": 0.7029147148132324, + "learning_rate": 1.9903513865903075e-05, + "loss": 2.3999, + "step": 1808 + }, + { + "epoch": 0.06, + "grad_norm": 0.6588590145111084, + "learning_rate": 1.9903366515352304e-05, + "loss": 2.432, + "step": 1809 + }, + { + "epoch": 0.06, + "grad_norm": 0.6760753989219666, + "learning_rate": 1.9903219052919103e-05, + "loss": 2.3811, + "step": 1810 + }, + { + "epoch": 0.06, + "grad_norm": 0.6614328026771545, + "learning_rate": 1.9903071478605138e-05, + "loss": 2.4046, + "step": 1811 + }, + { + "epoch": 0.06, + "grad_norm": 0.7194113731384277, + "learning_rate": 1.9902923792412073e-05, + "loss": 2.3518, + "step": 1812 + }, + { + "epoch": 0.06, + "grad_norm": 0.6925607919692993, + "learning_rate": 1.990277599434158e-05, + "loss": 2.4237, + "step": 1813 + }, + { + "epoch": 0.06, + "grad_norm": 0.646809995174408, + "learning_rate": 1.990262808439533e-05, + "loss": 2.4628, + "step": 1814 + }, + { + "epoch": 0.06, + "grad_norm": 0.6585157513618469, + "learning_rate": 1.9902480062574986e-05, + "loss": 2.3831, + "step": 1815 + }, + { + "epoch": 0.06, + "grad_norm": 0.6462484002113342, + "learning_rate": 1.9902331928882228e-05, + "loss": 2.3346, + "step": 1816 + }, + { + "epoch": 0.06, + "grad_norm": 0.6405508518218994, + "learning_rate": 1.9902183683318725e-05, + "loss": 2.3453, + "step": 1817 + }, + { + "epoch": 0.06, + "grad_norm": 0.6426177024841309, + "learning_rate": 1.9902035325886158e-05, + "loss": 2.4001, + "step": 1818 + }, + { + "epoch": 0.06, + "grad_norm": 0.692316472530365, + "learning_rate": 1.9901886856586197e-05, + "loss": 2.408, + "step": 1819 + }, + { + "epoch": 0.06, + "grad_norm": 0.6814116835594177, + "learning_rate": 1.990173827542052e-05, + "loss": 2.414, + "step": 1820 + }, + { + "epoch": 0.06, + "grad_norm": 0.7283808588981628, + "learning_rate": 1.9901589582390807e-05, + "loss": 2.3991, + "step": 1821 + }, + { + "epoch": 0.06, + "grad_norm": 0.6647890210151672, + "learning_rate": 1.990144077749874e-05, + "loss": 2.3838, + "step": 1822 + }, + { + "epoch": 0.06, + "grad_norm": 0.6502346396446228, + "learning_rate": 1.9901291860745998e-05, + "loss": 2.383, + "step": 1823 + }, + { + "epoch": 0.06, + "grad_norm": 0.6670609712600708, + "learning_rate": 1.9901142832134264e-05, + "loss": 2.3836, + "step": 1824 + }, + { + "epoch": 0.06, + "grad_norm": 0.6736798286437988, + "learning_rate": 1.990099369166522e-05, + "loss": 2.4128, + "step": 1825 + }, + { + "epoch": 0.06, + "grad_norm": 0.6703789234161377, + "learning_rate": 1.990084443934055e-05, + "loss": 2.3482, + "step": 1826 + }, + { + "epoch": 0.06, + "grad_norm": 0.6589123606681824, + "learning_rate": 1.9900695075161943e-05, + "loss": 2.4003, + "step": 1827 + }, + { + "epoch": 0.06, + "grad_norm": 0.6856886744499207, + "learning_rate": 1.9900545599131086e-05, + "loss": 2.3682, + "step": 1828 + }, + { + "epoch": 0.06, + "grad_norm": 0.6985341310501099, + "learning_rate": 1.9900396011249667e-05, + "loss": 2.3766, + "step": 1829 + }, + { + "epoch": 0.06, + "grad_norm": 0.6431722044944763, + "learning_rate": 1.9900246311519374e-05, + "loss": 2.4331, + "step": 1830 + }, + { + "epoch": 0.06, + "grad_norm": 0.652043879032135, + "learning_rate": 1.9900096499941904e-05, + "loss": 2.3688, + "step": 1831 + }, + { + "epoch": 0.06, + "grad_norm": 0.6494725942611694, + "learning_rate": 1.9899946576518943e-05, + "loss": 2.394, + "step": 1832 + }, + { + "epoch": 0.06, + "grad_norm": 0.6854714751243591, + "learning_rate": 1.989979654125219e-05, + "loss": 2.4537, + "step": 1833 + }, + { + "epoch": 0.06, + "grad_norm": 0.6890335083007812, + "learning_rate": 1.9899646394143334e-05, + "loss": 2.405, + "step": 1834 + }, + { + "epoch": 0.06, + "grad_norm": 0.6724991798400879, + "learning_rate": 1.9899496135194075e-05, + "loss": 2.4273, + "step": 1835 + }, + { + "epoch": 0.06, + "grad_norm": 0.6392744183540344, + "learning_rate": 1.9899345764406114e-05, + "loss": 2.3527, + "step": 1836 + }, + { + "epoch": 0.06, + "grad_norm": 0.637627363204956, + "learning_rate": 1.9899195281781143e-05, + "loss": 2.4107, + "step": 1837 + }, + { + "epoch": 0.06, + "grad_norm": 0.6806239485740662, + "learning_rate": 1.9899044687320867e-05, + "loss": 2.4054, + "step": 1838 + }, + { + "epoch": 0.06, + "grad_norm": 0.7183214426040649, + "learning_rate": 1.9898893981026984e-05, + "loss": 2.4416, + "step": 1839 + }, + { + "epoch": 0.06, + "grad_norm": 0.6456092000007629, + "learning_rate": 1.98987431629012e-05, + "loss": 2.4358, + "step": 1840 + }, + { + "epoch": 0.06, + "grad_norm": 0.6367050409317017, + "learning_rate": 1.989859223294522e-05, + "loss": 2.3597, + "step": 1841 + }, + { + "epoch": 0.06, + "grad_norm": 0.6637138724327087, + "learning_rate": 1.9898441191160738e-05, + "loss": 2.3776, + "step": 1842 + }, + { + "epoch": 0.06, + "grad_norm": 0.7038841247558594, + "learning_rate": 1.9898290037549474e-05, + "loss": 2.4314, + "step": 1843 + }, + { + "epoch": 0.06, + "grad_norm": 0.6421082019805908, + "learning_rate": 1.989813877211313e-05, + "loss": 2.35, + "step": 1844 + }, + { + "epoch": 0.06, + "grad_norm": 0.6555576324462891, + "learning_rate": 1.989798739485341e-05, + "loss": 2.4033, + "step": 1845 + }, + { + "epoch": 0.06, + "grad_norm": 0.6614798307418823, + "learning_rate": 1.9897835905772033e-05, + "loss": 2.359, + "step": 1846 + }, + { + "epoch": 0.06, + "grad_norm": 0.6740425825119019, + "learning_rate": 1.989768430487071e-05, + "loss": 2.3709, + "step": 1847 + }, + { + "epoch": 0.06, + "grad_norm": 0.6593112349510193, + "learning_rate": 1.9897532592151146e-05, + "loss": 2.3757, + "step": 1848 + }, + { + "epoch": 0.06, + "grad_norm": 0.645943820476532, + "learning_rate": 1.989738076761506e-05, + "loss": 2.3386, + "step": 1849 + }, + { + "epoch": 0.06, + "grad_norm": 0.6590797901153564, + "learning_rate": 1.9897228831264165e-05, + "loss": 2.3417, + "step": 1850 + }, + { + "epoch": 0.06, + "grad_norm": 0.6435642838478088, + "learning_rate": 1.9897076783100182e-05, + "loss": 2.3346, + "step": 1851 + }, + { + "epoch": 0.06, + "grad_norm": 0.6621133089065552, + "learning_rate": 1.9896924623124824e-05, + "loss": 2.4472, + "step": 1852 + }, + { + "epoch": 0.06, + "grad_norm": 0.6617000102996826, + "learning_rate": 1.9896772351339813e-05, + "loss": 2.3989, + "step": 1853 + }, + { + "epoch": 0.06, + "grad_norm": 0.6556550860404968, + "learning_rate": 1.989661996774687e-05, + "loss": 2.4081, + "step": 1854 + }, + { + "epoch": 0.06, + "grad_norm": 0.6734105348587036, + "learning_rate": 1.9896467472347708e-05, + "loss": 2.374, + "step": 1855 + }, + { + "epoch": 0.06, + "grad_norm": 0.6760770678520203, + "learning_rate": 1.9896314865144063e-05, + "loss": 2.3653, + "step": 1856 + }, + { + "epoch": 0.06, + "grad_norm": 0.6664807200431824, + "learning_rate": 1.989616214613765e-05, + "loss": 2.4349, + "step": 1857 + }, + { + "epoch": 0.06, + "grad_norm": 0.6725401282310486, + "learning_rate": 1.9896009315330195e-05, + "loss": 2.3643, + "step": 1858 + }, + { + "epoch": 0.06, + "grad_norm": 0.6584186553955078, + "learning_rate": 1.9895856372723428e-05, + "loss": 2.4042, + "step": 1859 + }, + { + "epoch": 0.06, + "grad_norm": 0.6646932363510132, + "learning_rate": 1.9895703318319076e-05, + "loss": 2.4155, + "step": 1860 + }, + { + "epoch": 0.06, + "grad_norm": 0.6928250193595886, + "learning_rate": 1.9895550152118867e-05, + "loss": 2.4126, + "step": 1861 + }, + { + "epoch": 0.06, + "grad_norm": 0.6571219563484192, + "learning_rate": 1.9895396874124532e-05, + "loss": 2.3967, + "step": 1862 + }, + { + "epoch": 0.06, + "grad_norm": 0.6730003952980042, + "learning_rate": 1.98952434843378e-05, + "loss": 2.3757, + "step": 1863 + }, + { + "epoch": 0.06, + "grad_norm": 0.7038864493370056, + "learning_rate": 1.989508998276041e-05, + "loss": 2.3982, + "step": 1864 + }, + { + "epoch": 0.06, + "grad_norm": 0.7045702338218689, + "learning_rate": 1.989493636939409e-05, + "loss": 2.3419, + "step": 1865 + }, + { + "epoch": 0.06, + "grad_norm": 0.6351613402366638, + "learning_rate": 1.9894782644240577e-05, + "loss": 2.3944, + "step": 1866 + }, + { + "epoch": 0.06, + "grad_norm": 0.643223226070404, + "learning_rate": 1.989462880730161e-05, + "loss": 2.3195, + "step": 1867 + }, + { + "epoch": 0.06, + "grad_norm": 0.7200911641120911, + "learning_rate": 1.9894474858578924e-05, + "loss": 2.4814, + "step": 1868 + }, + { + "epoch": 0.06, + "grad_norm": 0.6807271242141724, + "learning_rate": 1.989432079807426e-05, + "loss": 2.4219, + "step": 1869 + }, + { + "epoch": 0.06, + "grad_norm": 0.7036699056625366, + "learning_rate": 1.989416662578936e-05, + "loss": 2.3488, + "step": 1870 + }, + { + "epoch": 0.06, + "grad_norm": 0.6819643378257751, + "learning_rate": 1.9894012341725965e-05, + "loss": 2.4499, + "step": 1871 + }, + { + "epoch": 0.06, + "grad_norm": 0.66123366355896, + "learning_rate": 1.9893857945885817e-05, + "loss": 2.3581, + "step": 1872 + }, + { + "epoch": 0.06, + "grad_norm": 0.6614993810653687, + "learning_rate": 1.9893703438270656e-05, + "loss": 2.395, + "step": 1873 + }, + { + "epoch": 0.06, + "grad_norm": 0.6651284098625183, + "learning_rate": 1.9893548818882234e-05, + "loss": 2.4768, + "step": 1874 + }, + { + "epoch": 0.06, + "grad_norm": 0.7005410194396973, + "learning_rate": 1.9893394087722298e-05, + "loss": 2.3912, + "step": 1875 + }, + { + "epoch": 0.06, + "grad_norm": 0.6791648268699646, + "learning_rate": 1.9893239244792594e-05, + "loss": 2.4434, + "step": 1876 + }, + { + "epoch": 0.06, + "grad_norm": 0.6413533687591553, + "learning_rate": 1.989308429009487e-05, + "loss": 2.4116, + "step": 1877 + }, + { + "epoch": 0.06, + "grad_norm": 0.6797276735305786, + "learning_rate": 1.9892929223630877e-05, + "loss": 2.4005, + "step": 1878 + }, + { + "epoch": 0.06, + "grad_norm": 0.6793906092643738, + "learning_rate": 1.9892774045402364e-05, + "loss": 2.3685, + "step": 1879 + }, + { + "epoch": 0.06, + "grad_norm": 0.6800909638404846, + "learning_rate": 1.9892618755411093e-05, + "loss": 2.3956, + "step": 1880 + }, + { + "epoch": 0.06, + "grad_norm": 0.6815364956855774, + "learning_rate": 1.9892463353658812e-05, + "loss": 2.3914, + "step": 1881 + }, + { + "epoch": 0.06, + "grad_norm": 0.6469994187355042, + "learning_rate": 1.9892307840147276e-05, + "loss": 2.4199, + "step": 1882 + }, + { + "epoch": 0.06, + "grad_norm": 0.7118571400642395, + "learning_rate": 1.9892152214878243e-05, + "loss": 2.4018, + "step": 1883 + }, + { + "epoch": 0.06, + "grad_norm": 0.6700411438941956, + "learning_rate": 1.989199647785347e-05, + "loss": 2.3576, + "step": 1884 + }, + { + "epoch": 0.06, + "grad_norm": 0.6585733294487, + "learning_rate": 1.989184062907472e-05, + "loss": 2.3376, + "step": 1885 + }, + { + "epoch": 0.06, + "grad_norm": 0.6642573475837708, + "learning_rate": 1.9891684668543757e-05, + "loss": 2.4196, + "step": 1886 + }, + { + "epoch": 0.06, + "grad_norm": 0.6474754214286804, + "learning_rate": 1.9891528596262332e-05, + "loss": 2.3563, + "step": 1887 + }, + { + "epoch": 0.06, + "grad_norm": 0.6807147860527039, + "learning_rate": 1.9891372412232213e-05, + "loss": 2.3796, + "step": 1888 + }, + { + "epoch": 0.06, + "grad_norm": 0.6752489805221558, + "learning_rate": 1.989121611645517e-05, + "loss": 2.3887, + "step": 1889 + }, + { + "epoch": 0.06, + "grad_norm": 0.6685085892677307, + "learning_rate": 1.989105970893296e-05, + "loss": 2.3934, + "step": 1890 + }, + { + "epoch": 0.06, + "grad_norm": 0.6582318544387817, + "learning_rate": 1.9890903189667354e-05, + "loss": 2.3518, + "step": 1891 + }, + { + "epoch": 0.06, + "grad_norm": 0.7093725204467773, + "learning_rate": 1.9890746558660125e-05, + "loss": 2.3663, + "step": 1892 + }, + { + "epoch": 0.06, + "grad_norm": 0.6949980854988098, + "learning_rate": 1.989058981591303e-05, + "loss": 2.3679, + "step": 1893 + }, + { + "epoch": 0.06, + "grad_norm": 0.693029522895813, + "learning_rate": 1.9890432961427853e-05, + "loss": 2.4068, + "step": 1894 + }, + { + "epoch": 0.06, + "grad_norm": 0.6516783237457275, + "learning_rate": 1.9890275995206362e-05, + "loss": 2.3933, + "step": 1895 + }, + { + "epoch": 0.06, + "grad_norm": 0.6877561211585999, + "learning_rate": 1.9890118917250326e-05, + "loss": 2.4009, + "step": 1896 + }, + { + "epoch": 0.06, + "grad_norm": 0.6436954140663147, + "learning_rate": 1.9889961727561523e-05, + "loss": 2.3639, + "step": 1897 + }, + { + "epoch": 0.06, + "grad_norm": 0.6731047630310059, + "learning_rate": 1.9889804426141732e-05, + "loss": 2.4678, + "step": 1898 + }, + { + "epoch": 0.06, + "grad_norm": 0.7072241306304932, + "learning_rate": 1.9889647012992722e-05, + "loss": 2.3922, + "step": 1899 + }, + { + "epoch": 0.06, + "grad_norm": 0.6704971194267273, + "learning_rate": 1.988948948811628e-05, + "loss": 2.3587, + "step": 1900 + }, + { + "epoch": 0.06, + "grad_norm": 0.6448014974594116, + "learning_rate": 1.988933185151418e-05, + "loss": 2.3384, + "step": 1901 + }, + { + "epoch": 0.06, + "grad_norm": 0.6710502505302429, + "learning_rate": 1.9889174103188205e-05, + "loss": 2.3918, + "step": 1902 + }, + { + "epoch": 0.06, + "grad_norm": 0.6166720390319824, + "learning_rate": 1.9889016243140133e-05, + "loss": 2.4035, + "step": 1903 + }, + { + "epoch": 0.06, + "grad_norm": 0.6438337564468384, + "learning_rate": 1.9888858271371755e-05, + "loss": 2.3678, + "step": 1904 + }, + { + "epoch": 0.06, + "grad_norm": 0.6769886612892151, + "learning_rate": 1.9888700187884852e-05, + "loss": 2.3884, + "step": 1905 + }, + { + "epoch": 0.06, + "grad_norm": 0.7140762209892273, + "learning_rate": 1.9888541992681208e-05, + "loss": 2.446, + "step": 1906 + }, + { + "epoch": 0.06, + "grad_norm": 0.7310478091239929, + "learning_rate": 1.9888383685762612e-05, + "loss": 2.3836, + "step": 1907 + }, + { + "epoch": 0.06, + "grad_norm": 0.726624608039856, + "learning_rate": 1.9888225267130853e-05, + "loss": 2.4926, + "step": 1908 + }, + { + "epoch": 0.06, + "grad_norm": 0.6651246547698975, + "learning_rate": 1.988806673678772e-05, + "loss": 2.347, + "step": 1909 + }, + { + "epoch": 0.06, + "grad_norm": 0.68213951587677, + "learning_rate": 1.9887908094735002e-05, + "loss": 2.4096, + "step": 1910 + }, + { + "epoch": 0.06, + "grad_norm": 0.6780643463134766, + "learning_rate": 1.9887749340974495e-05, + "loss": 2.4552, + "step": 1911 + }, + { + "epoch": 0.06, + "grad_norm": 0.6292219161987305, + "learning_rate": 1.9887590475507988e-05, + "loss": 2.3672, + "step": 1912 + }, + { + "epoch": 0.06, + "grad_norm": 0.6707997918128967, + "learning_rate": 1.9887431498337283e-05, + "loss": 2.4766, + "step": 1913 + }, + { + "epoch": 0.06, + "grad_norm": 0.6433650255203247, + "learning_rate": 1.9887272409464165e-05, + "loss": 2.4221, + "step": 1914 + }, + { + "epoch": 0.06, + "grad_norm": 0.6633206009864807, + "learning_rate": 1.9887113208890444e-05, + "loss": 2.4195, + "step": 1915 + }, + { + "epoch": 0.06, + "grad_norm": 0.6569668054580688, + "learning_rate": 1.988695389661791e-05, + "loss": 2.3693, + "step": 1916 + }, + { + "epoch": 0.06, + "grad_norm": 0.6609025001525879, + "learning_rate": 1.9886794472648367e-05, + "loss": 2.376, + "step": 1917 + }, + { + "epoch": 0.06, + "grad_norm": 0.6585745215415955, + "learning_rate": 1.9886634936983614e-05, + "loss": 2.386, + "step": 1918 + }, + { + "epoch": 0.06, + "grad_norm": 0.6758257746696472, + "learning_rate": 1.9886475289625452e-05, + "loss": 2.3482, + "step": 1919 + }, + { + "epoch": 0.06, + "grad_norm": 0.6775721311569214, + "learning_rate": 1.9886315530575686e-05, + "loss": 2.3974, + "step": 1920 + }, + { + "epoch": 0.06, + "grad_norm": 0.6903815865516663, + "learning_rate": 1.9886155659836118e-05, + "loss": 2.3932, + "step": 1921 + }, + { + "epoch": 0.06, + "grad_norm": 0.71864253282547, + "learning_rate": 1.9885995677408564e-05, + "loss": 2.3851, + "step": 1922 + }, + { + "epoch": 0.06, + "grad_norm": 0.6632323265075684, + "learning_rate": 1.988583558329482e-05, + "loss": 2.3965, + "step": 1923 + }, + { + "epoch": 0.06, + "grad_norm": 0.6756615042686462, + "learning_rate": 1.9885675377496703e-05, + "loss": 2.3585, + "step": 1924 + }, + { + "epoch": 0.06, + "grad_norm": 0.6681207418441772, + "learning_rate": 1.9885515060016017e-05, + "loss": 2.299, + "step": 1925 + }, + { + "epoch": 0.06, + "grad_norm": 0.6748582720756531, + "learning_rate": 1.9885354630854573e-05, + "loss": 2.4134, + "step": 1926 + }, + { + "epoch": 0.06, + "grad_norm": 0.6963973045349121, + "learning_rate": 1.988519409001419e-05, + "loss": 2.4039, + "step": 1927 + }, + { + "epoch": 0.06, + "grad_norm": 0.6478883624076843, + "learning_rate": 1.988503343749668e-05, + "loss": 2.3677, + "step": 1928 + }, + { + "epoch": 0.06, + "grad_norm": 0.6691045761108398, + "learning_rate": 1.9884872673303847e-05, + "loss": 2.3676, + "step": 1929 + }, + { + "epoch": 0.06, + "grad_norm": 0.6562191247940063, + "learning_rate": 1.9884711797437518e-05, + "loss": 2.4296, + "step": 1930 + }, + { + "epoch": 0.06, + "grad_norm": 0.68257075548172, + "learning_rate": 1.988455080989951e-05, + "loss": 2.3776, + "step": 1931 + }, + { + "epoch": 0.06, + "grad_norm": 0.6754991412162781, + "learning_rate": 1.988438971069164e-05, + "loss": 2.3978, + "step": 1932 + }, + { + "epoch": 0.06, + "grad_norm": 0.702732503414154, + "learning_rate": 1.9884228499815726e-05, + "loss": 2.3875, + "step": 1933 + }, + { + "epoch": 0.06, + "grad_norm": 0.67500239610672, + "learning_rate": 1.9884067177273592e-05, + "loss": 2.4485, + "step": 1934 + }, + { + "epoch": 0.06, + "grad_norm": 0.718306303024292, + "learning_rate": 1.988390574306706e-05, + "loss": 2.3784, + "step": 1935 + }, + { + "epoch": 0.06, + "grad_norm": 0.6888105273246765, + "learning_rate": 1.988374419719796e-05, + "loss": 2.3815, + "step": 1936 + }, + { + "epoch": 0.06, + "grad_norm": 0.678712785243988, + "learning_rate": 1.9883582539668104e-05, + "loss": 2.4398, + "step": 1937 + }, + { + "epoch": 0.06, + "grad_norm": 0.6974777579307556, + "learning_rate": 1.9883420770479324e-05, + "loss": 2.4074, + "step": 1938 + }, + { + "epoch": 0.06, + "grad_norm": 0.6886699199676514, + "learning_rate": 1.9883258889633448e-05, + "loss": 2.3408, + "step": 1939 + }, + { + "epoch": 0.06, + "grad_norm": 0.6882629990577698, + "learning_rate": 1.9883096897132305e-05, + "loss": 2.3495, + "step": 1940 + }, + { + "epoch": 0.06, + "grad_norm": 0.6942282319068909, + "learning_rate": 1.988293479297773e-05, + "loss": 2.3103, + "step": 1941 + }, + { + "epoch": 0.06, + "grad_norm": 0.6663061380386353, + "learning_rate": 1.9882772577171546e-05, + "loss": 2.394, + "step": 1942 + }, + { + "epoch": 0.06, + "grad_norm": 0.646624743938446, + "learning_rate": 1.9882610249715588e-05, + "loss": 2.3551, + "step": 1943 + }, + { + "epoch": 0.06, + "grad_norm": 0.7030312418937683, + "learning_rate": 1.9882447810611692e-05, + "loss": 2.3462, + "step": 1944 + }, + { + "epoch": 0.06, + "grad_norm": 0.6428135633468628, + "learning_rate": 1.9882285259861695e-05, + "loss": 2.3417, + "step": 1945 + }, + { + "epoch": 0.06, + "grad_norm": 0.6615855097770691, + "learning_rate": 1.988212259746743e-05, + "loss": 2.3529, + "step": 1946 + }, + { + "epoch": 0.06, + "grad_norm": 0.6686455011367798, + "learning_rate": 1.9881959823430735e-05, + "loss": 2.3863, + "step": 1947 + }, + { + "epoch": 0.06, + "grad_norm": 0.6516574025154114, + "learning_rate": 1.9881796937753448e-05, + "loss": 2.3393, + "step": 1948 + }, + { + "epoch": 0.06, + "grad_norm": 0.690444827079773, + "learning_rate": 1.988163394043741e-05, + "loss": 2.4368, + "step": 1949 + }, + { + "epoch": 0.06, + "grad_norm": 0.7215588688850403, + "learning_rate": 1.9881470831484465e-05, + "loss": 2.4541, + "step": 1950 + }, + { + "epoch": 0.06, + "grad_norm": 0.6870972514152527, + "learning_rate": 1.9881307610896453e-05, + "loss": 2.3809, + "step": 1951 + }, + { + "epoch": 0.06, + "grad_norm": 0.683038592338562, + "learning_rate": 1.988114427867522e-05, + "loss": 2.4195, + "step": 1952 + }, + { + "epoch": 0.06, + "grad_norm": 0.6629478335380554, + "learning_rate": 1.988098083482261e-05, + "loss": 2.3159, + "step": 1953 + }, + { + "epoch": 0.07, + "grad_norm": 0.6993348598480225, + "learning_rate": 1.988081727934047e-05, + "loss": 2.4497, + "step": 1954 + }, + { + "epoch": 0.07, + "grad_norm": 0.6564309000968933, + "learning_rate": 1.988065361223064e-05, + "loss": 2.3749, + "step": 1955 + }, + { + "epoch": 0.07, + "grad_norm": 0.6449017524719238, + "learning_rate": 1.988048983349498e-05, + "loss": 2.3683, + "step": 1956 + }, + { + "epoch": 0.07, + "grad_norm": 0.692620038986206, + "learning_rate": 1.988032594313534e-05, + "loss": 2.3219, + "step": 1957 + }, + { + "epoch": 0.07, + "grad_norm": 0.6503558158874512, + "learning_rate": 1.9880161941153564e-05, + "loss": 2.4109, + "step": 1958 + }, + { + "epoch": 0.07, + "grad_norm": 0.6510140895843506, + "learning_rate": 1.987999782755151e-05, + "loss": 2.2771, + "step": 1959 + }, + { + "epoch": 0.07, + "grad_norm": 0.6401605606079102, + "learning_rate": 1.987983360233103e-05, + "loss": 2.4024, + "step": 1960 + }, + { + "epoch": 0.07, + "grad_norm": 0.6664703488349915, + "learning_rate": 1.9879669265493984e-05, + "loss": 2.3935, + "step": 1961 + }, + { + "epoch": 0.07, + "grad_norm": 0.7180161476135254, + "learning_rate": 1.987950481704222e-05, + "loss": 2.3931, + "step": 1962 + }, + { + "epoch": 0.07, + "grad_norm": 0.6274951100349426, + "learning_rate": 1.9879340256977603e-05, + "loss": 2.3548, + "step": 1963 + }, + { + "epoch": 0.07, + "grad_norm": 0.6671566367149353, + "learning_rate": 1.987917558530199e-05, + "loss": 2.3166, + "step": 1964 + }, + { + "epoch": 0.07, + "grad_norm": 0.6738818287849426, + "learning_rate": 1.9879010802017237e-05, + "loss": 2.4312, + "step": 1965 + }, + { + "epoch": 0.07, + "grad_norm": 0.6993537545204163, + "learning_rate": 1.9878845907125215e-05, + "loss": 2.4354, + "step": 1966 + }, + { + "epoch": 0.07, + "grad_norm": 0.6515253782272339, + "learning_rate": 1.9878680900627776e-05, + "loss": 2.3889, + "step": 1967 + }, + { + "epoch": 0.07, + "grad_norm": 0.6461590528488159, + "learning_rate": 1.9878515782526795e-05, + "loss": 2.3306, + "step": 1968 + }, + { + "epoch": 0.07, + "grad_norm": 0.64005047082901, + "learning_rate": 1.987835055282413e-05, + "loss": 2.3448, + "step": 1969 + }, + { + "epoch": 0.07, + "grad_norm": 0.7339893579483032, + "learning_rate": 1.987818521152165e-05, + "loss": 2.5097, + "step": 1970 + }, + { + "epoch": 0.07, + "grad_norm": 0.6623417735099792, + "learning_rate": 1.987801975862122e-05, + "loss": 2.3238, + "step": 1971 + }, + { + "epoch": 0.07, + "grad_norm": 0.70616215467453, + "learning_rate": 1.9877854194124714e-05, + "loss": 2.4033, + "step": 1972 + }, + { + "epoch": 0.07, + "grad_norm": 0.6597620248794556, + "learning_rate": 1.9877688518034002e-05, + "loss": 2.3966, + "step": 1973 + }, + { + "epoch": 0.07, + "grad_norm": 0.633905827999115, + "learning_rate": 1.987752273035095e-05, + "loss": 2.3723, + "step": 1974 + }, + { + "epoch": 0.07, + "grad_norm": 0.6773349046707153, + "learning_rate": 1.987735683107744e-05, + "loss": 2.3748, + "step": 1975 + }, + { + "epoch": 0.07, + "grad_norm": 0.6817188858985901, + "learning_rate": 1.9877190820215338e-05, + "loss": 2.3489, + "step": 1976 + }, + { + "epoch": 0.07, + "grad_norm": 0.6746551990509033, + "learning_rate": 1.9877024697766523e-05, + "loss": 2.3408, + "step": 1977 + }, + { + "epoch": 0.07, + "grad_norm": 0.7102041244506836, + "learning_rate": 1.987685846373287e-05, + "loss": 2.3338, + "step": 1978 + }, + { + "epoch": 0.07, + "grad_norm": 0.6619156002998352, + "learning_rate": 1.987669211811626e-05, + "loss": 2.4134, + "step": 1979 + }, + { + "epoch": 0.07, + "grad_norm": 0.6872266530990601, + "learning_rate": 1.9876525660918572e-05, + "loss": 2.395, + "step": 1980 + }, + { + "epoch": 0.07, + "grad_norm": 0.6938050389289856, + "learning_rate": 1.9876359092141687e-05, + "loss": 2.3238, + "step": 1981 + }, + { + "epoch": 0.07, + "grad_norm": 0.6569597125053406, + "learning_rate": 1.987619241178748e-05, + "loss": 2.3686, + "step": 1982 + }, + { + "epoch": 0.07, + "grad_norm": 0.6240250468254089, + "learning_rate": 1.9876025619857843e-05, + "loss": 2.3393, + "step": 1983 + }, + { + "epoch": 0.07, + "grad_norm": 0.6742483377456665, + "learning_rate": 1.9875858716354655e-05, + "loss": 2.3847, + "step": 1984 + }, + { + "epoch": 0.07, + "grad_norm": 0.6481521129608154, + "learning_rate": 1.9875691701279803e-05, + "loss": 2.3429, + "step": 1985 + }, + { + "epoch": 0.07, + "grad_norm": 0.6890261769294739, + "learning_rate": 1.9875524574635173e-05, + "loss": 2.3927, + "step": 1986 + }, + { + "epoch": 0.07, + "grad_norm": 0.7430751919746399, + "learning_rate": 1.9875357336422657e-05, + "loss": 2.3593, + "step": 1987 + }, + { + "epoch": 0.07, + "grad_norm": 0.6601622700691223, + "learning_rate": 1.9875189986644135e-05, + "loss": 2.423, + "step": 1988 + }, + { + "epoch": 0.07, + "grad_norm": 0.6857054233551025, + "learning_rate": 1.987502252530151e-05, + "loss": 2.3402, + "step": 1989 + }, + { + "epoch": 0.07, + "grad_norm": 0.663474977016449, + "learning_rate": 1.9874854952396666e-05, + "loss": 2.308, + "step": 1990 + }, + { + "epoch": 0.07, + "grad_norm": 0.6763882637023926, + "learning_rate": 1.98746872679315e-05, + "loss": 2.3502, + "step": 1991 + }, + { + "epoch": 0.07, + "grad_norm": 0.6983333230018616, + "learning_rate": 1.98745194719079e-05, + "loss": 2.368, + "step": 1992 + }, + { + "epoch": 0.07, + "grad_norm": 0.700821042060852, + "learning_rate": 1.987435156432777e-05, + "loss": 2.4557, + "step": 1993 + }, + { + "epoch": 0.07, + "grad_norm": 0.6614567637443542, + "learning_rate": 1.9874183545193e-05, + "loss": 2.4166, + "step": 1994 + }, + { + "epoch": 0.07, + "grad_norm": 0.6725916862487793, + "learning_rate": 1.987401541450549e-05, + "loss": 2.4023, + "step": 1995 + }, + { + "epoch": 0.07, + "grad_norm": 0.6512778997421265, + "learning_rate": 1.9873847172267146e-05, + "loss": 2.3874, + "step": 1996 + }, + { + "epoch": 0.07, + "grad_norm": 0.6550699472427368, + "learning_rate": 1.9873678818479862e-05, + "loss": 2.3988, + "step": 1997 + }, + { + "epoch": 0.07, + "grad_norm": 0.6466767191886902, + "learning_rate": 1.9873510353145538e-05, + "loss": 2.3601, + "step": 1998 + }, + { + "epoch": 0.07, + "grad_norm": 0.6573099493980408, + "learning_rate": 1.9873341776266083e-05, + "loss": 2.3733, + "step": 1999 + }, + { + "epoch": 0.07, + "grad_norm": 0.6988899111747742, + "learning_rate": 1.98731730878434e-05, + "loss": 2.3703, + "step": 2000 + }, + { + "epoch": 0.07, + "grad_norm": 0.6830779314041138, + "learning_rate": 1.987300428787939e-05, + "loss": 2.3948, + "step": 2001 + }, + { + "epoch": 0.07, + "grad_norm": 0.6640918254852295, + "learning_rate": 1.9872835376375966e-05, + "loss": 2.3053, + "step": 2002 + }, + { + "epoch": 0.07, + "grad_norm": 0.6555066108703613, + "learning_rate": 1.9872666353335034e-05, + "loss": 2.4064, + "step": 2003 + }, + { + "epoch": 0.07, + "grad_norm": 0.6405621767044067, + "learning_rate": 1.98724972187585e-05, + "loss": 2.3567, + "step": 2004 + }, + { + "epoch": 0.07, + "grad_norm": 0.6533565521240234, + "learning_rate": 1.9872327972648282e-05, + "loss": 2.3832, + "step": 2005 + }, + { + "epoch": 0.07, + "grad_norm": 0.6543545126914978, + "learning_rate": 1.9872158615006287e-05, + "loss": 2.3483, + "step": 2006 + }, + { + "epoch": 0.07, + "grad_norm": 0.7175511717796326, + "learning_rate": 1.987198914583443e-05, + "loss": 2.3405, + "step": 2007 + }, + { + "epoch": 0.07, + "grad_norm": 0.6671443581581116, + "learning_rate": 1.9871819565134626e-05, + "loss": 2.3703, + "step": 2008 + }, + { + "epoch": 0.07, + "grad_norm": 0.7089993357658386, + "learning_rate": 1.987164987290879e-05, + "loss": 2.3992, + "step": 2009 + }, + { + "epoch": 0.07, + "grad_norm": 0.6659372448921204, + "learning_rate": 1.987148006915884e-05, + "loss": 2.3296, + "step": 2010 + }, + { + "epoch": 0.07, + "grad_norm": 0.7050767540931702, + "learning_rate": 1.987131015388669e-05, + "loss": 2.4049, + "step": 2011 + }, + { + "epoch": 0.07, + "grad_norm": 0.6699216365814209, + "learning_rate": 1.9871140127094268e-05, + "loss": 2.3962, + "step": 2012 + }, + { + "epoch": 0.07, + "grad_norm": 0.6618857979774475, + "learning_rate": 1.9870969988783483e-05, + "loss": 2.387, + "step": 2013 + }, + { + "epoch": 0.07, + "grad_norm": 0.7021678686141968, + "learning_rate": 1.987079973895627e-05, + "loss": 2.437, + "step": 2014 + }, + { + "epoch": 0.07, + "grad_norm": 0.6738669872283936, + "learning_rate": 1.9870629377614544e-05, + "loss": 2.4115, + "step": 2015 + }, + { + "epoch": 0.07, + "grad_norm": 0.674420177936554, + "learning_rate": 1.9870458904760232e-05, + "loss": 2.4134, + "step": 2016 + }, + { + "epoch": 0.07, + "grad_norm": 0.6766080856323242, + "learning_rate": 1.987028832039526e-05, + "loss": 2.3895, + "step": 2017 + }, + { + "epoch": 0.07, + "grad_norm": 0.6816107034683228, + "learning_rate": 1.9870117624521557e-05, + "loss": 2.3732, + "step": 2018 + }, + { + "epoch": 0.07, + "grad_norm": 0.6705644130706787, + "learning_rate": 1.9869946817141046e-05, + "loss": 2.4621, + "step": 2019 + }, + { + "epoch": 0.07, + "grad_norm": 0.7104842066764832, + "learning_rate": 1.9869775898255665e-05, + "loss": 2.4223, + "step": 2020 + }, + { + "epoch": 0.07, + "grad_norm": 0.6839065551757812, + "learning_rate": 1.9869604867867337e-05, + "loss": 2.4159, + "step": 2021 + }, + { + "epoch": 0.07, + "grad_norm": 0.7369197607040405, + "learning_rate": 1.9869433725977997e-05, + "loss": 2.3866, + "step": 2022 + }, + { + "epoch": 0.07, + "grad_norm": 0.7267538905143738, + "learning_rate": 1.9869262472589577e-05, + "loss": 2.4151, + "step": 2023 + }, + { + "epoch": 0.07, + "grad_norm": 0.6522248387336731, + "learning_rate": 1.9869091107704018e-05, + "loss": 2.3232, + "step": 2024 + }, + { + "epoch": 0.07, + "grad_norm": 0.6542271375656128, + "learning_rate": 1.986891963132325e-05, + "loss": 2.3985, + "step": 2025 + }, + { + "epoch": 0.07, + "grad_norm": 0.6832755208015442, + "learning_rate": 1.986874804344921e-05, + "loss": 2.428, + "step": 2026 + }, + { + "epoch": 0.07, + "grad_norm": 0.6650073528289795, + "learning_rate": 1.9868576344083837e-05, + "loss": 2.4005, + "step": 2027 + }, + { + "epoch": 0.07, + "grad_norm": 0.6918550729751587, + "learning_rate": 1.986840453322908e-05, + "loss": 2.3573, + "step": 2028 + }, + { + "epoch": 0.07, + "grad_norm": 0.6809049248695374, + "learning_rate": 1.9868232610886865e-05, + "loss": 2.3346, + "step": 2029 + }, + { + "epoch": 0.07, + "grad_norm": 0.6784788966178894, + "learning_rate": 1.986806057705914e-05, + "loss": 2.3566, + "step": 2030 + }, + { + "epoch": 0.07, + "grad_norm": 0.66108638048172, + "learning_rate": 1.9867888431747856e-05, + "loss": 2.3627, + "step": 2031 + }, + { + "epoch": 0.07, + "grad_norm": 0.6386533379554749, + "learning_rate": 1.9867716174954947e-05, + "loss": 2.3714, + "step": 2032 + }, + { + "epoch": 0.07, + "grad_norm": 0.6785694360733032, + "learning_rate": 1.9867543806682364e-05, + "loss": 2.3925, + "step": 2033 + }, + { + "epoch": 0.07, + "grad_norm": 0.6885417699813843, + "learning_rate": 1.9867371326932057e-05, + "loss": 2.4287, + "step": 2034 + }, + { + "epoch": 0.07, + "grad_norm": 0.6983519196510315, + "learning_rate": 1.9867198735705967e-05, + "loss": 2.3495, + "step": 2035 + }, + { + "epoch": 0.07, + "grad_norm": 0.673380970954895, + "learning_rate": 1.9867026033006054e-05, + "loss": 2.4587, + "step": 2036 + }, + { + "epoch": 0.07, + "grad_norm": 0.6670863032341003, + "learning_rate": 1.9866853218834258e-05, + "loss": 2.3746, + "step": 2037 + }, + { + "epoch": 0.07, + "grad_norm": 0.6862033605575562, + "learning_rate": 1.9866680293192543e-05, + "loss": 2.3832, + "step": 2038 + }, + { + "epoch": 0.07, + "grad_norm": 0.6466721296310425, + "learning_rate": 1.986650725608285e-05, + "loss": 2.3868, + "step": 2039 + }, + { + "epoch": 0.07, + "grad_norm": 0.663447380065918, + "learning_rate": 1.9866334107507142e-05, + "loss": 2.4216, + "step": 2040 + }, + { + "epoch": 0.07, + "grad_norm": 0.6897183656692505, + "learning_rate": 1.986616084746738e-05, + "loss": 2.3383, + "step": 2041 + }, + { + "epoch": 0.07, + "grad_norm": 0.6826854944229126, + "learning_rate": 1.986598747596551e-05, + "loss": 2.3601, + "step": 2042 + }, + { + "epoch": 0.07, + "grad_norm": 0.6641092300415039, + "learning_rate": 1.9865813993003495e-05, + "loss": 2.3777, + "step": 2043 + }, + { + "epoch": 0.07, + "grad_norm": 0.6822092533111572, + "learning_rate": 1.986564039858329e-05, + "loss": 2.3741, + "step": 2044 + }, + { + "epoch": 0.07, + "grad_norm": 0.6672011613845825, + "learning_rate": 1.9865466692706873e-05, + "loss": 2.3549, + "step": 2045 + }, + { + "epoch": 0.07, + "grad_norm": 0.6960858702659607, + "learning_rate": 1.9865292875376187e-05, + "loss": 2.3543, + "step": 2046 + }, + { + "epoch": 0.07, + "grad_norm": 0.6726285815238953, + "learning_rate": 1.9865118946593206e-05, + "loss": 2.4135, + "step": 2047 + }, + { + "epoch": 0.07, + "grad_norm": 0.6630895733833313, + "learning_rate": 1.9864944906359894e-05, + "loss": 2.398, + "step": 2048 + }, + { + "epoch": 0.07, + "grad_norm": 0.6435898542404175, + "learning_rate": 1.9864770754678212e-05, + "loss": 2.3481, + "step": 2049 + }, + { + "epoch": 0.07, + "grad_norm": 0.6689082384109497, + "learning_rate": 1.9864596491550138e-05, + "loss": 2.4276, + "step": 2050 + }, + { + "epoch": 0.07, + "grad_norm": 0.6663751602172852, + "learning_rate": 1.9864422116977628e-05, + "loss": 2.4089, + "step": 2051 + }, + { + "epoch": 0.07, + "grad_norm": 0.7422425150871277, + "learning_rate": 1.986424763096266e-05, + "loss": 2.4257, + "step": 2052 + }, + { + "epoch": 0.07, + "grad_norm": 0.6590396761894226, + "learning_rate": 1.9864073033507205e-05, + "loss": 2.3919, + "step": 2053 + }, + { + "epoch": 0.07, + "grad_norm": 0.6558582782745361, + "learning_rate": 1.986389832461323e-05, + "loss": 2.3787, + "step": 2054 + }, + { + "epoch": 0.07, + "grad_norm": 0.6467647552490234, + "learning_rate": 1.9863723504282716e-05, + "loss": 2.3168, + "step": 2055 + }, + { + "epoch": 0.07, + "grad_norm": 0.6688591241836548, + "learning_rate": 1.9863548572517635e-05, + "loss": 2.3702, + "step": 2056 + }, + { + "epoch": 0.07, + "grad_norm": 0.6677448749542236, + "learning_rate": 1.9863373529319962e-05, + "loss": 2.3814, + "step": 2057 + }, + { + "epoch": 0.07, + "grad_norm": 0.6953178644180298, + "learning_rate": 1.9863198374691673e-05, + "loss": 2.4052, + "step": 2058 + }, + { + "epoch": 0.07, + "grad_norm": 0.6931257247924805, + "learning_rate": 1.986302310863475e-05, + "loss": 2.4291, + "step": 2059 + }, + { + "epoch": 0.07, + "grad_norm": 0.6836228966712952, + "learning_rate": 1.9862847731151174e-05, + "loss": 2.3587, + "step": 2060 + }, + { + "epoch": 0.07, + "grad_norm": 0.6498184204101562, + "learning_rate": 1.986267224224292e-05, + "loss": 2.3757, + "step": 2061 + }, + { + "epoch": 0.07, + "grad_norm": 0.6620586514472961, + "learning_rate": 1.986249664191198e-05, + "loss": 2.4577, + "step": 2062 + }, + { + "epoch": 0.07, + "grad_norm": 0.6652518510818481, + "learning_rate": 1.9862320930160333e-05, + "loss": 2.4013, + "step": 2063 + }, + { + "epoch": 0.07, + "grad_norm": 0.6732465624809265, + "learning_rate": 1.9862145106989962e-05, + "loss": 2.3901, + "step": 2064 + }, + { + "epoch": 0.07, + "grad_norm": 0.6599177122116089, + "learning_rate": 1.9861969172402856e-05, + "loss": 2.3518, + "step": 2065 + }, + { + "epoch": 0.07, + "grad_norm": 0.6582318544387817, + "learning_rate": 1.9861793126401e-05, + "loss": 2.4471, + "step": 2066 + }, + { + "epoch": 0.07, + "grad_norm": 0.6476503610610962, + "learning_rate": 1.986161696898639e-05, + "loss": 2.3543, + "step": 2067 + }, + { + "epoch": 0.07, + "grad_norm": 0.652982234954834, + "learning_rate": 1.9861440700161004e-05, + "loss": 2.384, + "step": 2068 + }, + { + "epoch": 0.07, + "grad_norm": 0.6581761240959167, + "learning_rate": 1.9861264319926845e-05, + "loss": 2.4123, + "step": 2069 + }, + { + "epoch": 0.07, + "grad_norm": 0.6495481133460999, + "learning_rate": 1.98610878282859e-05, + "loss": 2.3554, + "step": 2070 + }, + { + "epoch": 0.07, + "grad_norm": 0.6670634746551514, + "learning_rate": 1.9860911225240164e-05, + "loss": 2.3854, + "step": 2071 + }, + { + "epoch": 0.07, + "grad_norm": 0.6399278044700623, + "learning_rate": 1.9860734510791632e-05, + "loss": 2.4069, + "step": 2072 + }, + { + "epoch": 0.07, + "grad_norm": 0.6676241755485535, + "learning_rate": 1.98605576849423e-05, + "loss": 2.4118, + "step": 2073 + }, + { + "epoch": 0.07, + "grad_norm": 0.6826092600822449, + "learning_rate": 1.986038074769417e-05, + "loss": 2.3657, + "step": 2074 + }, + { + "epoch": 0.07, + "grad_norm": 0.7534124255180359, + "learning_rate": 1.986020369904923e-05, + "loss": 2.4276, + "step": 2075 + }, + { + "epoch": 0.07, + "grad_norm": 0.6724449396133423, + "learning_rate": 1.986002653900949e-05, + "loss": 2.4011, + "step": 2076 + }, + { + "epoch": 0.07, + "grad_norm": 0.6664377450942993, + "learning_rate": 1.985984926757695e-05, + "loss": 2.4104, + "step": 2077 + }, + { + "epoch": 0.07, + "grad_norm": 0.6607882976531982, + "learning_rate": 1.985967188475361e-05, + "loss": 2.4033, + "step": 2078 + }, + { + "epoch": 0.07, + "grad_norm": 0.7024486064910889, + "learning_rate": 1.9859494390541476e-05, + "loss": 2.4296, + "step": 2079 + }, + { + "epoch": 0.07, + "grad_norm": 0.6828149557113647, + "learning_rate": 1.9859316784942554e-05, + "loss": 2.3237, + "step": 2080 + }, + { + "epoch": 0.07, + "grad_norm": 0.6808659434318542, + "learning_rate": 1.9859139067958847e-05, + "loss": 2.424, + "step": 2081 + }, + { + "epoch": 0.07, + "grad_norm": 0.6621743440628052, + "learning_rate": 1.9858961239592367e-05, + "loss": 2.3853, + "step": 2082 + }, + { + "epoch": 0.07, + "grad_norm": 0.68265300989151, + "learning_rate": 1.985878329984512e-05, + "loss": 2.3204, + "step": 2083 + }, + { + "epoch": 0.07, + "grad_norm": 0.6555649042129517, + "learning_rate": 1.9858605248719116e-05, + "loss": 2.3968, + "step": 2084 + }, + { + "epoch": 0.07, + "grad_norm": 0.6771050095558167, + "learning_rate": 1.985842708621637e-05, + "loss": 2.3691, + "step": 2085 + }, + { + "epoch": 0.07, + "grad_norm": 0.6494032144546509, + "learning_rate": 1.9858248812338888e-05, + "loss": 2.4183, + "step": 2086 + }, + { + "epoch": 0.07, + "grad_norm": 0.669026792049408, + "learning_rate": 1.9858070427088693e-05, + "loss": 2.3452, + "step": 2087 + }, + { + "epoch": 0.07, + "grad_norm": 0.7020699381828308, + "learning_rate": 1.985789193046779e-05, + "loss": 2.3691, + "step": 2088 + }, + { + "epoch": 0.07, + "grad_norm": 0.6524288654327393, + "learning_rate": 1.9857713322478206e-05, + "loss": 2.3178, + "step": 2089 + }, + { + "epoch": 0.07, + "grad_norm": 0.6542246341705322, + "learning_rate": 1.9857534603121954e-05, + "loss": 2.3738, + "step": 2090 + }, + { + "epoch": 0.07, + "grad_norm": 0.668902575969696, + "learning_rate": 1.985735577240105e-05, + "loss": 2.3369, + "step": 2091 + }, + { + "epoch": 0.07, + "grad_norm": 0.6768938899040222, + "learning_rate": 1.9857176830317522e-05, + "loss": 2.3875, + "step": 2092 + }, + { + "epoch": 0.07, + "grad_norm": 0.6657976508140564, + "learning_rate": 1.9856997776873386e-05, + "loss": 2.367, + "step": 2093 + }, + { + "epoch": 0.07, + "grad_norm": 0.6555323004722595, + "learning_rate": 1.9856818612070665e-05, + "loss": 2.3022, + "step": 2094 + }, + { + "epoch": 0.07, + "grad_norm": 0.6780571937561035, + "learning_rate": 1.9856639335911385e-05, + "loss": 2.3329, + "step": 2095 + }, + { + "epoch": 0.07, + "grad_norm": 0.6511105298995972, + "learning_rate": 1.985645994839757e-05, + "loss": 2.3687, + "step": 2096 + }, + { + "epoch": 0.07, + "grad_norm": 0.6600292921066284, + "learning_rate": 1.9856280449531244e-05, + "loss": 2.349, + "step": 2097 + }, + { + "epoch": 0.07, + "grad_norm": 0.6861180067062378, + "learning_rate": 1.9856100839314442e-05, + "loss": 2.4087, + "step": 2098 + }, + { + "epoch": 0.07, + "grad_norm": 0.6916261911392212, + "learning_rate": 1.9855921117749186e-05, + "loss": 2.3968, + "step": 2099 + }, + { + "epoch": 0.07, + "grad_norm": 0.6579020619392395, + "learning_rate": 1.9855741284837508e-05, + "loss": 2.3103, + "step": 2100 + }, + { + "epoch": 0.07, + "grad_norm": 0.6711440682411194, + "learning_rate": 1.9855561340581445e-05, + "loss": 2.419, + "step": 2101 + }, + { + "epoch": 0.07, + "grad_norm": 0.6653145551681519, + "learning_rate": 1.9855381284983023e-05, + "loss": 2.393, + "step": 2102 + }, + { + "epoch": 0.07, + "grad_norm": 0.6877423524856567, + "learning_rate": 1.985520111804428e-05, + "loss": 2.4116, + "step": 2103 + }, + { + "epoch": 0.07, + "grad_norm": 0.6493449807167053, + "learning_rate": 1.985502083976725e-05, + "loss": 2.3635, + "step": 2104 + }, + { + "epoch": 0.07, + "grad_norm": 0.6679942607879639, + "learning_rate": 1.985484045015397e-05, + "loss": 2.4139, + "step": 2105 + }, + { + "epoch": 0.07, + "grad_norm": 0.6926138401031494, + "learning_rate": 1.985465994920648e-05, + "loss": 2.3995, + "step": 2106 + }, + { + "epoch": 0.07, + "grad_norm": 0.659649133682251, + "learning_rate": 1.9854479336926816e-05, + "loss": 2.3948, + "step": 2107 + }, + { + "epoch": 0.07, + "grad_norm": 0.6499054431915283, + "learning_rate": 1.985429861331702e-05, + "loss": 2.3598, + "step": 2108 + }, + { + "epoch": 0.07, + "grad_norm": 0.6990717649459839, + "learning_rate": 1.9854117778379135e-05, + "loss": 2.3509, + "step": 2109 + }, + { + "epoch": 0.07, + "grad_norm": 0.715269148349762, + "learning_rate": 1.9853936832115198e-05, + "loss": 2.3407, + "step": 2110 + }, + { + "epoch": 0.07, + "grad_norm": 0.7020423412322998, + "learning_rate": 1.9853755774527262e-05, + "loss": 2.2913, + "step": 2111 + }, + { + "epoch": 0.07, + "grad_norm": 0.716228723526001, + "learning_rate": 1.9853574605617364e-05, + "loss": 2.3399, + "step": 2112 + }, + { + "epoch": 0.07, + "grad_norm": 0.6657246351242065, + "learning_rate": 1.985339332538756e-05, + "loss": 2.3581, + "step": 2113 + }, + { + "epoch": 0.07, + "grad_norm": 0.6320322155952454, + "learning_rate": 1.9853211933839887e-05, + "loss": 2.4297, + "step": 2114 + }, + { + "epoch": 0.07, + "grad_norm": 0.6691752076148987, + "learning_rate": 1.9853030430976404e-05, + "loss": 2.3932, + "step": 2115 + }, + { + "epoch": 0.07, + "grad_norm": 0.7007383704185486, + "learning_rate": 1.985284881679916e-05, + "loss": 2.3776, + "step": 2116 + }, + { + "epoch": 0.07, + "grad_norm": 0.700365424156189, + "learning_rate": 1.98526670913102e-05, + "loss": 2.3643, + "step": 2117 + }, + { + "epoch": 0.07, + "grad_norm": 0.6492392420768738, + "learning_rate": 1.9852485254511584e-05, + "loss": 2.3954, + "step": 2118 + }, + { + "epoch": 0.07, + "grad_norm": 0.6809250116348267, + "learning_rate": 1.985230330640536e-05, + "loss": 2.3443, + "step": 2119 + }, + { + "epoch": 0.07, + "grad_norm": 0.7044557332992554, + "learning_rate": 1.985212124699359e-05, + "loss": 2.4108, + "step": 2120 + }, + { + "epoch": 0.07, + "grad_norm": 0.660711944103241, + "learning_rate": 1.9851939076278328e-05, + "loss": 2.4054, + "step": 2121 + }, + { + "epoch": 0.07, + "grad_norm": 0.7104321122169495, + "learning_rate": 1.9851756794261634e-05, + "loss": 2.3376, + "step": 2122 + }, + { + "epoch": 0.07, + "grad_norm": 0.6935300230979919, + "learning_rate": 1.985157440094556e-05, + "loss": 2.3494, + "step": 2123 + }, + { + "epoch": 0.07, + "grad_norm": 0.6529771685600281, + "learning_rate": 1.985139189633218e-05, + "loss": 2.3632, + "step": 2124 + }, + { + "epoch": 0.07, + "grad_norm": 0.6651171445846558, + "learning_rate": 1.985120928042354e-05, + "loss": 2.3846, + "step": 2125 + }, + { + "epoch": 0.07, + "grad_norm": 0.683975875377655, + "learning_rate": 1.9851026553221715e-05, + "loss": 2.3916, + "step": 2126 + }, + { + "epoch": 0.07, + "grad_norm": 0.6484038829803467, + "learning_rate": 1.9850843714728763e-05, + "loss": 2.3413, + "step": 2127 + }, + { + "epoch": 0.07, + "grad_norm": 0.6678999066352844, + "learning_rate": 1.9850660764946754e-05, + "loss": 2.3454, + "step": 2128 + }, + { + "epoch": 0.07, + "grad_norm": 0.6546140909194946, + "learning_rate": 1.985047770387775e-05, + "loss": 2.4254, + "step": 2129 + }, + { + "epoch": 0.07, + "grad_norm": 0.6998836994171143, + "learning_rate": 1.985029453152383e-05, + "loss": 2.4132, + "step": 2130 + }, + { + "epoch": 0.07, + "grad_norm": 0.7189781069755554, + "learning_rate": 1.9850111247887047e-05, + "loss": 2.3623, + "step": 2131 + }, + { + "epoch": 0.07, + "grad_norm": 0.6607388257980347, + "learning_rate": 1.984992785296948e-05, + "loss": 2.3734, + "step": 2132 + }, + { + "epoch": 0.07, + "grad_norm": 0.6751648187637329, + "learning_rate": 1.9849744346773203e-05, + "loss": 2.3753, + "step": 2133 + }, + { + "epoch": 0.07, + "grad_norm": 0.6989012956619263, + "learning_rate": 1.9849560729300288e-05, + "loss": 2.3938, + "step": 2134 + }, + { + "epoch": 0.07, + "grad_norm": 0.6420818567276001, + "learning_rate": 1.9849377000552805e-05, + "loss": 2.3443, + "step": 2135 + }, + { + "epoch": 0.07, + "grad_norm": 0.6902161240577698, + "learning_rate": 1.9849193160532836e-05, + "loss": 2.3801, + "step": 2136 + }, + { + "epoch": 0.07, + "grad_norm": 0.692310631275177, + "learning_rate": 1.9849009209242455e-05, + "loss": 2.3207, + "step": 2137 + }, + { + "epoch": 0.07, + "grad_norm": 0.6986352801322937, + "learning_rate": 1.9848825146683735e-05, + "loss": 2.3127, + "step": 2138 + }, + { + "epoch": 0.07, + "grad_norm": 0.7159571051597595, + "learning_rate": 1.9848640972858768e-05, + "loss": 2.372, + "step": 2139 + }, + { + "epoch": 0.07, + "grad_norm": 0.7222824096679688, + "learning_rate": 1.9848456687769623e-05, + "loss": 2.3334, + "step": 2140 + }, + { + "epoch": 0.07, + "grad_norm": 0.6626449823379517, + "learning_rate": 1.9848272291418387e-05, + "loss": 2.3185, + "step": 2141 + }, + { + "epoch": 0.07, + "grad_norm": 0.6679739952087402, + "learning_rate": 1.984808778380714e-05, + "loss": 2.3761, + "step": 2142 + }, + { + "epoch": 0.07, + "grad_norm": 0.682604193687439, + "learning_rate": 1.984790316493797e-05, + "loss": 2.3264, + "step": 2143 + }, + { + "epoch": 0.07, + "grad_norm": 0.7066798806190491, + "learning_rate": 1.9847718434812965e-05, + "loss": 2.4053, + "step": 2144 + }, + { + "epoch": 0.07, + "grad_norm": 0.6840493679046631, + "learning_rate": 1.9847533593434206e-05, + "loss": 2.4115, + "step": 2145 + }, + { + "epoch": 0.07, + "grad_norm": 0.6960777640342712, + "learning_rate": 1.984734864080378e-05, + "loss": 2.3578, + "step": 2146 + }, + { + "epoch": 0.07, + "grad_norm": 0.6708642244338989, + "learning_rate": 1.9847163576923784e-05, + "loss": 2.3484, + "step": 2147 + }, + { + "epoch": 0.07, + "grad_norm": 0.655567467212677, + "learning_rate": 1.9846978401796304e-05, + "loss": 2.3435, + "step": 2148 + }, + { + "epoch": 0.07, + "grad_norm": 0.670125424861908, + "learning_rate": 1.9846793115423433e-05, + "loss": 2.3615, + "step": 2149 + }, + { + "epoch": 0.07, + "grad_norm": 0.7002980709075928, + "learning_rate": 1.9846607717807266e-05, + "loss": 2.3947, + "step": 2150 + }, + { + "epoch": 0.07, + "grad_norm": 0.6629713177680969, + "learning_rate": 1.9846422208949893e-05, + "loss": 2.4121, + "step": 2151 + }, + { + "epoch": 0.07, + "grad_norm": 0.7053492069244385, + "learning_rate": 1.9846236588853414e-05, + "loss": 2.4199, + "step": 2152 + }, + { + "epoch": 0.07, + "grad_norm": 0.6962704062461853, + "learning_rate": 1.9846050857519926e-05, + "loss": 2.324, + "step": 2153 + }, + { + "epoch": 0.07, + "grad_norm": 0.6892321705818176, + "learning_rate": 1.984586501495152e-05, + "loss": 2.4088, + "step": 2154 + }, + { + "epoch": 0.07, + "grad_norm": 0.6752204298973083, + "learning_rate": 1.9845679061150305e-05, + "loss": 2.3328, + "step": 2155 + }, + { + "epoch": 0.07, + "grad_norm": 0.6548035144805908, + "learning_rate": 1.984549299611838e-05, + "loss": 2.3186, + "step": 2156 + }, + { + "epoch": 0.07, + "grad_norm": 0.6625932455062866, + "learning_rate": 1.9845306819857844e-05, + "loss": 2.3366, + "step": 2157 + }, + { + "epoch": 0.07, + "grad_norm": 0.734879732131958, + "learning_rate": 1.9845120532370797e-05, + "loss": 2.3609, + "step": 2158 + }, + { + "epoch": 0.07, + "grad_norm": 0.6960425972938538, + "learning_rate": 1.9844934133659356e-05, + "loss": 2.3509, + "step": 2159 + }, + { + "epoch": 0.07, + "grad_norm": 0.6719722747802734, + "learning_rate": 1.9844747623725615e-05, + "loss": 2.3544, + "step": 2160 + }, + { + "epoch": 0.07, + "grad_norm": 0.6537691354751587, + "learning_rate": 1.9844561002571683e-05, + "loss": 2.3312, + "step": 2161 + }, + { + "epoch": 0.07, + "grad_norm": 0.6795572638511658, + "learning_rate": 1.984437427019967e-05, + "loss": 2.3576, + "step": 2162 + }, + { + "epoch": 0.07, + "grad_norm": 0.6786391735076904, + "learning_rate": 1.984418742661169e-05, + "loss": 2.3089, + "step": 2163 + }, + { + "epoch": 0.07, + "grad_norm": 0.6689738035202026, + "learning_rate": 1.9844000471809845e-05, + "loss": 2.3998, + "step": 2164 + }, + { + "epoch": 0.07, + "grad_norm": 0.752162754535675, + "learning_rate": 1.9843813405796257e-05, + "loss": 2.4445, + "step": 2165 + }, + { + "epoch": 0.07, + "grad_norm": 0.6791106462478638, + "learning_rate": 1.984362622857303e-05, + "loss": 2.3781, + "step": 2166 + }, + { + "epoch": 0.07, + "grad_norm": 0.6738051772117615, + "learning_rate": 1.9843438940142286e-05, + "loss": 2.3065, + "step": 2167 + }, + { + "epoch": 0.07, + "grad_norm": 0.7057000994682312, + "learning_rate": 1.9843251540506133e-05, + "loss": 2.4463, + "step": 2168 + }, + { + "epoch": 0.07, + "grad_norm": 0.6772225499153137, + "learning_rate": 1.9843064029666697e-05, + "loss": 2.4504, + "step": 2169 + }, + { + "epoch": 0.07, + "grad_norm": 0.677797794342041, + "learning_rate": 1.9842876407626092e-05, + "loss": 2.3421, + "step": 2170 + }, + { + "epoch": 0.07, + "grad_norm": 0.6764008402824402, + "learning_rate": 1.9842688674386436e-05, + "loss": 2.3668, + "step": 2171 + }, + { + "epoch": 0.07, + "grad_norm": 0.7085667848587036, + "learning_rate": 1.984250082994985e-05, + "loss": 2.4127, + "step": 2172 + }, + { + "epoch": 0.07, + "grad_norm": 0.6663535237312317, + "learning_rate": 1.9842312874318465e-05, + "loss": 2.4099, + "step": 2173 + }, + { + "epoch": 0.07, + "grad_norm": 0.682090699672699, + "learning_rate": 1.984212480749439e-05, + "loss": 2.4213, + "step": 2174 + }, + { + "epoch": 0.07, + "grad_norm": 0.6791077256202698, + "learning_rate": 1.984193662947976e-05, + "loss": 2.387, + "step": 2175 + }, + { + "epoch": 0.07, + "grad_norm": 0.6651291251182556, + "learning_rate": 1.9841748340276697e-05, + "loss": 2.3421, + "step": 2176 + }, + { + "epoch": 0.07, + "grad_norm": 0.6477990746498108, + "learning_rate": 1.9841559939887333e-05, + "loss": 2.3968, + "step": 2177 + }, + { + "epoch": 0.07, + "grad_norm": 0.6812819242477417, + "learning_rate": 1.9841371428313784e-05, + "loss": 2.3848, + "step": 2178 + }, + { + "epoch": 0.07, + "grad_norm": 0.6822513341903687, + "learning_rate": 1.9841182805558196e-05, + "loss": 2.2675, + "step": 2179 + }, + { + "epoch": 0.07, + "grad_norm": 0.6849010586738586, + "learning_rate": 1.984099407162269e-05, + "loss": 2.3486, + "step": 2180 + }, + { + "epoch": 0.07, + "grad_norm": 0.6623086333274841, + "learning_rate": 1.98408052265094e-05, + "loss": 2.3179, + "step": 2181 + }, + { + "epoch": 0.07, + "grad_norm": 0.6471244692802429, + "learning_rate": 1.984061627022046e-05, + "loss": 2.3481, + "step": 2182 + }, + { + "epoch": 0.07, + "grad_norm": 0.6550038456916809, + "learning_rate": 1.9840427202758005e-05, + "loss": 2.3348, + "step": 2183 + }, + { + "epoch": 0.07, + "grad_norm": 0.664729118347168, + "learning_rate": 1.9840238024124167e-05, + "loss": 2.3733, + "step": 2184 + }, + { + "epoch": 0.07, + "grad_norm": 0.6552519202232361, + "learning_rate": 1.984004873432109e-05, + "loss": 2.3487, + "step": 2185 + }, + { + "epoch": 0.07, + "grad_norm": 0.67540442943573, + "learning_rate": 1.9839859333350908e-05, + "loss": 2.3756, + "step": 2186 + }, + { + "epoch": 0.07, + "grad_norm": 0.6946542263031006, + "learning_rate": 1.9839669821215764e-05, + "loss": 2.3425, + "step": 2187 + }, + { + "epoch": 0.07, + "grad_norm": 0.6732617020606995, + "learning_rate": 1.9839480197917797e-05, + "loss": 2.3929, + "step": 2188 + }, + { + "epoch": 0.07, + "grad_norm": 0.6694601774215698, + "learning_rate": 1.9839290463459147e-05, + "loss": 2.4033, + "step": 2189 + }, + { + "epoch": 0.07, + "grad_norm": 0.6610518097877502, + "learning_rate": 1.983910061784196e-05, + "loss": 2.4387, + "step": 2190 + }, + { + "epoch": 0.07, + "grad_norm": 0.6607776284217834, + "learning_rate": 1.9838910661068383e-05, + "loss": 2.3341, + "step": 2191 + }, + { + "epoch": 0.07, + "grad_norm": 0.6444172859191895, + "learning_rate": 1.9838720593140556e-05, + "loss": 2.3956, + "step": 2192 + }, + { + "epoch": 0.07, + "grad_norm": 0.680614173412323, + "learning_rate": 1.983853041406063e-05, + "loss": 2.3562, + "step": 2193 + }, + { + "epoch": 0.07, + "grad_norm": 0.7059701681137085, + "learning_rate": 1.9838340123830757e-05, + "loss": 2.3543, + "step": 2194 + }, + { + "epoch": 0.07, + "grad_norm": 0.6627543568611145, + "learning_rate": 1.9838149722453083e-05, + "loss": 2.345, + "step": 2195 + }, + { + "epoch": 0.07, + "grad_norm": 0.6775937080383301, + "learning_rate": 1.9837959209929757e-05, + "loss": 2.3323, + "step": 2196 + }, + { + "epoch": 0.07, + "grad_norm": 0.6425820589065552, + "learning_rate": 1.9837768586262932e-05, + "loss": 2.359, + "step": 2197 + }, + { + "epoch": 0.07, + "grad_norm": 0.6489399671554565, + "learning_rate": 1.9837577851454766e-05, + "loss": 2.3938, + "step": 2198 + }, + { + "epoch": 0.07, + "grad_norm": 0.6738085746765137, + "learning_rate": 1.983738700550741e-05, + "loss": 2.3928, + "step": 2199 + }, + { + "epoch": 0.07, + "grad_norm": 0.7129176259040833, + "learning_rate": 1.9837196048423016e-05, + "loss": 2.2998, + "step": 2200 + }, + { + "epoch": 0.07, + "grad_norm": 0.6612347364425659, + "learning_rate": 1.9837004980203753e-05, + "loss": 2.3982, + "step": 2201 + }, + { + "epoch": 0.07, + "grad_norm": 0.6673231720924377, + "learning_rate": 1.983681380085177e-05, + "loss": 2.4097, + "step": 2202 + }, + { + "epoch": 0.07, + "grad_norm": 0.6592409014701843, + "learning_rate": 1.983662251036923e-05, + "loss": 2.3826, + "step": 2203 + }, + { + "epoch": 0.07, + "grad_norm": 0.6936072111129761, + "learning_rate": 1.9836431108758295e-05, + "loss": 2.3897, + "step": 2204 + }, + { + "epoch": 0.07, + "grad_norm": 0.7123696208000183, + "learning_rate": 1.983623959602112e-05, + "loss": 2.3228, + "step": 2205 + }, + { + "epoch": 0.07, + "grad_norm": 0.6704083681106567, + "learning_rate": 1.9836047972159883e-05, + "loss": 2.3384, + "step": 2206 + }, + { + "epoch": 0.07, + "grad_norm": 0.6490381360054016, + "learning_rate": 1.9835856237176737e-05, + "loss": 2.3899, + "step": 2207 + }, + { + "epoch": 0.07, + "grad_norm": 0.6442617774009705, + "learning_rate": 1.983566439107385e-05, + "loss": 2.2996, + "step": 2208 + }, + { + "epoch": 0.07, + "grad_norm": 0.6636687517166138, + "learning_rate": 1.9835472433853393e-05, + "loss": 2.3065, + "step": 2209 + }, + { + "epoch": 0.07, + "grad_norm": 0.6941110491752625, + "learning_rate": 1.983528036551753e-05, + "loss": 2.3928, + "step": 2210 + }, + { + "epoch": 0.07, + "grad_norm": 0.6904152631759644, + "learning_rate": 1.983508818606844e-05, + "loss": 2.3905, + "step": 2211 + }, + { + "epoch": 0.07, + "grad_norm": 0.6666905879974365, + "learning_rate": 1.983489589550828e-05, + "loss": 2.4326, + "step": 2212 + }, + { + "epoch": 0.07, + "grad_norm": 0.6616180539131165, + "learning_rate": 1.9834703493839232e-05, + "loss": 2.3421, + "step": 2213 + }, + { + "epoch": 0.07, + "grad_norm": 0.6803056597709656, + "learning_rate": 1.983451098106347e-05, + "loss": 2.3051, + "step": 2214 + }, + { + "epoch": 0.07, + "grad_norm": 0.6685052514076233, + "learning_rate": 1.9834318357183165e-05, + "loss": 2.2906, + "step": 2215 + }, + { + "epoch": 0.07, + "grad_norm": 0.6381955742835999, + "learning_rate": 1.9834125622200495e-05, + "loss": 2.3177, + "step": 2216 + }, + { + "epoch": 0.07, + "grad_norm": 0.6845715641975403, + "learning_rate": 1.9833932776117635e-05, + "loss": 2.4243, + "step": 2217 + }, + { + "epoch": 0.07, + "grad_norm": 0.6919439435005188, + "learning_rate": 1.983373981893677e-05, + "loss": 2.4294, + "step": 2218 + }, + { + "epoch": 0.07, + "grad_norm": 0.6709433197975159, + "learning_rate": 1.9833546750660074e-05, + "loss": 2.3393, + "step": 2219 + }, + { + "epoch": 0.07, + "grad_norm": 0.6831055283546448, + "learning_rate": 1.983335357128973e-05, + "loss": 2.3991, + "step": 2220 + }, + { + "epoch": 0.07, + "grad_norm": 0.6682264804840088, + "learning_rate": 1.9833160280827918e-05, + "loss": 2.4037, + "step": 2221 + }, + { + "epoch": 0.07, + "grad_norm": 0.6652941107749939, + "learning_rate": 1.9832966879276823e-05, + "loss": 2.2745, + "step": 2222 + }, + { + "epoch": 0.07, + "grad_norm": 0.7240835428237915, + "learning_rate": 1.9832773366638635e-05, + "loss": 2.3751, + "step": 2223 + }, + { + "epoch": 0.07, + "grad_norm": 0.6648626923561096, + "learning_rate": 1.9832579742915535e-05, + "loss": 2.3702, + "step": 2224 + }, + { + "epoch": 0.07, + "grad_norm": 0.7103949189186096, + "learning_rate": 1.983238600810971e-05, + "loss": 2.422, + "step": 2225 + }, + { + "epoch": 0.07, + "grad_norm": 0.6544563174247742, + "learning_rate": 1.983219216222335e-05, + "loss": 2.4306, + "step": 2226 + }, + { + "epoch": 0.07, + "grad_norm": 0.7084912061691284, + "learning_rate": 1.983199820525865e-05, + "loss": 2.4458, + "step": 2227 + }, + { + "epoch": 0.07, + "grad_norm": 0.6707584857940674, + "learning_rate": 1.9831804137217792e-05, + "loss": 2.3604, + "step": 2228 + }, + { + "epoch": 0.07, + "grad_norm": 0.6936107277870178, + "learning_rate": 1.9831609958102975e-05, + "loss": 2.3785, + "step": 2229 + }, + { + "epoch": 0.07, + "grad_norm": 0.6739218235015869, + "learning_rate": 1.9831415667916384e-05, + "loss": 2.3709, + "step": 2230 + }, + { + "epoch": 0.07, + "grad_norm": 0.6616268754005432, + "learning_rate": 1.9831221266660227e-05, + "loss": 2.2716, + "step": 2231 + }, + { + "epoch": 0.07, + "grad_norm": 0.6548470258712769, + "learning_rate": 1.9831026754336692e-05, + "loss": 2.3323, + "step": 2232 + }, + { + "epoch": 0.07, + "grad_norm": 0.6888502836227417, + "learning_rate": 1.983083213094798e-05, + "loss": 2.3388, + "step": 2233 + }, + { + "epoch": 0.07, + "grad_norm": 0.6601564288139343, + "learning_rate": 1.983063739649629e-05, + "loss": 2.3674, + "step": 2234 + }, + { + "epoch": 0.07, + "grad_norm": 0.6550270318984985, + "learning_rate": 1.9830442550983813e-05, + "loss": 2.3844, + "step": 2235 + }, + { + "epoch": 0.07, + "grad_norm": 0.6713321208953857, + "learning_rate": 1.983024759441276e-05, + "loss": 2.3543, + "step": 2236 + }, + { + "epoch": 0.07, + "grad_norm": 0.6701964735984802, + "learning_rate": 1.9830052526785332e-05, + "loss": 2.4103, + "step": 2237 + }, + { + "epoch": 0.07, + "grad_norm": 0.6644104719161987, + "learning_rate": 1.9829857348103735e-05, + "loss": 2.3459, + "step": 2238 + }, + { + "epoch": 0.07, + "grad_norm": 0.627583920955658, + "learning_rate": 1.9829662058370166e-05, + "loss": 2.3467, + "step": 2239 + }, + { + "epoch": 0.07, + "grad_norm": 0.6823845505714417, + "learning_rate": 1.9829466657586836e-05, + "loss": 2.3181, + "step": 2240 + }, + { + "epoch": 0.07, + "grad_norm": 0.7115896344184875, + "learning_rate": 1.9829271145755953e-05, + "loss": 2.3261, + "step": 2241 + }, + { + "epoch": 0.07, + "grad_norm": 0.6602494716644287, + "learning_rate": 1.9829075522879725e-05, + "loss": 2.3696, + "step": 2242 + }, + { + "epoch": 0.07, + "grad_norm": 0.6751396059989929, + "learning_rate": 1.9828879788960363e-05, + "loss": 2.3747, + "step": 2243 + }, + { + "epoch": 0.07, + "grad_norm": 0.666413426399231, + "learning_rate": 1.9828683944000075e-05, + "loss": 2.3838, + "step": 2244 + }, + { + "epoch": 0.07, + "grad_norm": 0.687860369682312, + "learning_rate": 1.9828487988001076e-05, + "loss": 2.361, + "step": 2245 + }, + { + "epoch": 0.07, + "grad_norm": 0.7031977772712708, + "learning_rate": 1.9828291920965584e-05, + "loss": 2.3181, + "step": 2246 + }, + { + "epoch": 0.07, + "grad_norm": 0.725185215473175, + "learning_rate": 1.9828095742895807e-05, + "loss": 2.3702, + "step": 2247 + }, + { + "epoch": 0.07, + "grad_norm": 0.6503520607948303, + "learning_rate": 1.9827899453793965e-05, + "loss": 2.4002, + "step": 2248 + }, + { + "epoch": 0.07, + "grad_norm": 0.6763826608657837, + "learning_rate": 1.9827703053662276e-05, + "loss": 2.3354, + "step": 2249 + }, + { + "epoch": 0.07, + "grad_norm": 0.676348865032196, + "learning_rate": 1.9827506542502952e-05, + "loss": 2.3694, + "step": 2250 + }, + { + "epoch": 0.07, + "grad_norm": 0.6666421890258789, + "learning_rate": 1.982730992031822e-05, + "loss": 2.3091, + "step": 2251 + }, + { + "epoch": 0.07, + "grad_norm": 0.7023469805717468, + "learning_rate": 1.9827113187110307e-05, + "loss": 2.3515, + "step": 2252 + }, + { + "epoch": 0.07, + "grad_norm": 0.6576856374740601, + "learning_rate": 1.9826916342881423e-05, + "loss": 2.3297, + "step": 2253 + }, + { + "epoch": 0.07, + "grad_norm": 0.7065723538398743, + "learning_rate": 1.9826719387633796e-05, + "loss": 2.3503, + "step": 2254 + }, + { + "epoch": 0.08, + "grad_norm": 0.6622753143310547, + "learning_rate": 1.9826522321369658e-05, + "loss": 2.3695, + "step": 2255 + }, + { + "epoch": 0.08, + "grad_norm": 0.6447135806083679, + "learning_rate": 1.9826325144091223e-05, + "loss": 2.3309, + "step": 2256 + }, + { + "epoch": 0.08, + "grad_norm": 0.6541101336479187, + "learning_rate": 1.9826127855800728e-05, + "loss": 2.3419, + "step": 2257 + }, + { + "epoch": 0.08, + "grad_norm": 0.6862423419952393, + "learning_rate": 1.98259304565004e-05, + "loss": 2.3309, + "step": 2258 + }, + { + "epoch": 0.08, + "grad_norm": 0.6656540632247925, + "learning_rate": 1.982573294619247e-05, + "loss": 2.3244, + "step": 2259 + }, + { + "epoch": 0.08, + "grad_norm": 0.6669071912765503, + "learning_rate": 1.9825535324879163e-05, + "loss": 2.2975, + "step": 2260 + }, + { + "epoch": 0.08, + "grad_norm": 0.6684110760688782, + "learning_rate": 1.9825337592562718e-05, + "loss": 2.4088, + "step": 2261 + }, + { + "epoch": 0.08, + "grad_norm": 0.6939542293548584, + "learning_rate": 1.9825139749245366e-05, + "loss": 2.4086, + "step": 2262 + }, + { + "epoch": 0.08, + "grad_norm": 0.6607959270477295, + "learning_rate": 1.9824941794929348e-05, + "loss": 2.379, + "step": 2263 + }, + { + "epoch": 0.08, + "grad_norm": 0.6760181188583374, + "learning_rate": 1.9824743729616892e-05, + "loss": 2.3516, + "step": 2264 + }, + { + "epoch": 0.08, + "grad_norm": 0.671470046043396, + "learning_rate": 1.982454555331024e-05, + "loss": 2.285, + "step": 2265 + }, + { + "epoch": 0.08, + "grad_norm": 0.6935726404190063, + "learning_rate": 1.982434726601163e-05, + "loss": 2.3662, + "step": 2266 + }, + { + "epoch": 0.08, + "grad_norm": 0.6683235764503479, + "learning_rate": 1.98241488677233e-05, + "loss": 2.3068, + "step": 2267 + }, + { + "epoch": 0.08, + "grad_norm": 0.6576142907142639, + "learning_rate": 1.9823950358447496e-05, + "loss": 2.3303, + "step": 2268 + }, + { + "epoch": 0.08, + "grad_norm": 0.68270343542099, + "learning_rate": 1.982375173818646e-05, + "loss": 2.3367, + "step": 2269 + }, + { + "epoch": 0.08, + "grad_norm": 0.6778784990310669, + "learning_rate": 1.982355300694243e-05, + "loss": 2.3381, + "step": 2270 + }, + { + "epoch": 0.08, + "grad_norm": 0.6466098427772522, + "learning_rate": 1.9823354164717657e-05, + "loss": 2.3546, + "step": 2271 + }, + { + "epoch": 0.08, + "grad_norm": 0.6838887333869934, + "learning_rate": 1.9823155211514387e-05, + "loss": 2.3914, + "step": 2272 + }, + { + "epoch": 0.08, + "grad_norm": 0.6879265904426575, + "learning_rate": 1.9822956147334865e-05, + "loss": 2.3555, + "step": 2273 + }, + { + "epoch": 0.08, + "grad_norm": 0.7021540403366089, + "learning_rate": 1.982275697218134e-05, + "loss": 2.3519, + "step": 2274 + }, + { + "epoch": 0.08, + "grad_norm": 0.684629499912262, + "learning_rate": 1.9822557686056068e-05, + "loss": 2.2958, + "step": 2275 + }, + { + "epoch": 0.08, + "grad_norm": 0.6549875736236572, + "learning_rate": 1.982235828896129e-05, + "loss": 2.3237, + "step": 2276 + }, + { + "epoch": 0.08, + "grad_norm": 0.6954808235168457, + "learning_rate": 1.9822158780899267e-05, + "loss": 2.3542, + "step": 2277 + }, + { + "epoch": 0.08, + "grad_norm": 0.6480743288993835, + "learning_rate": 1.9821959161872254e-05, + "loss": 2.3431, + "step": 2278 + }, + { + "epoch": 0.08, + "grad_norm": 0.6996781229972839, + "learning_rate": 1.9821759431882498e-05, + "loss": 2.4046, + "step": 2279 + }, + { + "epoch": 0.08, + "grad_norm": 0.6610148549079895, + "learning_rate": 1.982155959093226e-05, + "loss": 2.3877, + "step": 2280 + }, + { + "epoch": 0.08, + "grad_norm": 0.6795777082443237, + "learning_rate": 1.98213596390238e-05, + "loss": 2.3228, + "step": 2281 + }, + { + "epoch": 0.08, + "grad_norm": 0.6659331321716309, + "learning_rate": 1.9821159576159373e-05, + "loss": 2.3286, + "step": 2282 + }, + { + "epoch": 0.08, + "grad_norm": 0.6794893145561218, + "learning_rate": 1.9820959402341243e-05, + "loss": 2.3709, + "step": 2283 + }, + { + "epoch": 0.08, + "grad_norm": 0.6683352589607239, + "learning_rate": 1.982075911757167e-05, + "loss": 2.425, + "step": 2284 + }, + { + "epoch": 0.08, + "grad_norm": 0.6822823882102966, + "learning_rate": 1.9820558721852912e-05, + "loss": 2.3288, + "step": 2285 + }, + { + "epoch": 0.08, + "grad_norm": 0.7015583515167236, + "learning_rate": 1.982035821518724e-05, + "loss": 2.3452, + "step": 2286 + }, + { + "epoch": 0.08, + "grad_norm": 0.7066489458084106, + "learning_rate": 1.9820157597576916e-05, + "loss": 2.418, + "step": 2287 + }, + { + "epoch": 0.08, + "grad_norm": 0.7535952925682068, + "learning_rate": 1.9819956869024202e-05, + "loss": 2.3598, + "step": 2288 + }, + { + "epoch": 0.08, + "grad_norm": 0.6653067469596863, + "learning_rate": 1.9819756029531376e-05, + "loss": 2.3433, + "step": 2289 + }, + { + "epoch": 0.08, + "grad_norm": 0.7046130299568176, + "learning_rate": 1.98195550791007e-05, + "loss": 2.3948, + "step": 2290 + }, + { + "epoch": 0.08, + "grad_norm": 0.7025448083877563, + "learning_rate": 1.9819354017734444e-05, + "loss": 2.3742, + "step": 2291 + }, + { + "epoch": 0.08, + "grad_norm": 0.7082207798957825, + "learning_rate": 1.9819152845434884e-05, + "loss": 2.3808, + "step": 2292 + }, + { + "epoch": 0.08, + "grad_norm": 0.6819546222686768, + "learning_rate": 1.981895156220429e-05, + "loss": 2.41, + "step": 2293 + }, + { + "epoch": 0.08, + "grad_norm": 0.669355571269989, + "learning_rate": 1.981875016804493e-05, + "loss": 2.3912, + "step": 2294 + }, + { + "epoch": 0.08, + "grad_norm": 0.6663204431533813, + "learning_rate": 1.981854866295909e-05, + "loss": 2.3569, + "step": 2295 + }, + { + "epoch": 0.08, + "grad_norm": 0.636420488357544, + "learning_rate": 1.981834704694904e-05, + "loss": 2.3258, + "step": 2296 + }, + { + "epoch": 0.08, + "grad_norm": 0.6867048144340515, + "learning_rate": 1.981814532001706e-05, + "loss": 2.3886, + "step": 2297 + }, + { + "epoch": 0.08, + "grad_norm": 0.7038825750350952, + "learning_rate": 1.9817943482165428e-05, + "loss": 2.384, + "step": 2298 + }, + { + "epoch": 0.08, + "grad_norm": 0.7206969857215881, + "learning_rate": 1.981774153339642e-05, + "loss": 2.4174, + "step": 2299 + }, + { + "epoch": 0.08, + "grad_norm": 0.6829017996788025, + "learning_rate": 1.9817539473712325e-05, + "loss": 2.3915, + "step": 2300 + }, + { + "epoch": 0.08, + "grad_norm": 0.6383287906646729, + "learning_rate": 1.9817337303115428e-05, + "loss": 2.3093, + "step": 2301 + }, + { + "epoch": 0.08, + "grad_norm": 0.6981441974639893, + "learning_rate": 1.9817135021608e-05, + "loss": 2.3901, + "step": 2302 + }, + { + "epoch": 0.08, + "grad_norm": 0.6976889371871948, + "learning_rate": 1.9816932629192337e-05, + "loss": 2.3326, + "step": 2303 + }, + { + "epoch": 0.08, + "grad_norm": 0.6695263385772705, + "learning_rate": 1.9816730125870724e-05, + "loss": 2.336, + "step": 2304 + }, + { + "epoch": 0.08, + "grad_norm": 0.6563782095909119, + "learning_rate": 1.9816527511645444e-05, + "loss": 2.2674, + "step": 2305 + }, + { + "epoch": 0.08, + "grad_norm": 0.6686438918113708, + "learning_rate": 1.981632478651879e-05, + "loss": 2.382, + "step": 2306 + }, + { + "epoch": 0.08, + "grad_norm": 0.655463695526123, + "learning_rate": 1.9816121950493054e-05, + "loss": 2.3732, + "step": 2307 + }, + { + "epoch": 0.08, + "grad_norm": 0.6731521487236023, + "learning_rate": 1.981591900357052e-05, + "loss": 2.4068, + "step": 2308 + }, + { + "epoch": 0.08, + "grad_norm": 0.6871774792671204, + "learning_rate": 1.9815715945753494e-05, + "loss": 2.3312, + "step": 2309 + }, + { + "epoch": 0.08, + "grad_norm": 0.710421085357666, + "learning_rate": 1.9815512777044254e-05, + "loss": 2.3715, + "step": 2310 + }, + { + "epoch": 0.08, + "grad_norm": 0.6659837961196899, + "learning_rate": 1.9815309497445104e-05, + "loss": 2.3593, + "step": 2311 + }, + { + "epoch": 0.08, + "grad_norm": 0.7032288908958435, + "learning_rate": 1.9815106106958346e-05, + "loss": 2.4283, + "step": 2312 + }, + { + "epoch": 0.08, + "grad_norm": 0.7084277868270874, + "learning_rate": 1.9814902605586264e-05, + "loss": 2.424, + "step": 2313 + }, + { + "epoch": 0.08, + "grad_norm": 0.6944140195846558, + "learning_rate": 1.981469899333117e-05, + "loss": 2.4113, + "step": 2314 + }, + { + "epoch": 0.08, + "grad_norm": 0.6629042029380798, + "learning_rate": 1.9814495270195356e-05, + "loss": 2.3446, + "step": 2315 + }, + { + "epoch": 0.08, + "grad_norm": 0.6665487885475159, + "learning_rate": 1.9814291436181127e-05, + "loss": 2.3898, + "step": 2316 + }, + { + "epoch": 0.08, + "grad_norm": 0.742047131061554, + "learning_rate": 1.9814087491290787e-05, + "loss": 2.3705, + "step": 2317 + }, + { + "epoch": 0.08, + "grad_norm": 0.6905255913734436, + "learning_rate": 1.9813883435526632e-05, + "loss": 2.3752, + "step": 2318 + }, + { + "epoch": 0.08, + "grad_norm": 0.65250164270401, + "learning_rate": 1.9813679268890977e-05, + "loss": 2.2976, + "step": 2319 + }, + { + "epoch": 0.08, + "grad_norm": 0.6600178480148315, + "learning_rate": 1.9813474991386126e-05, + "loss": 2.3313, + "step": 2320 + }, + { + "epoch": 0.08, + "grad_norm": 0.6407349705696106, + "learning_rate": 1.9813270603014387e-05, + "loss": 2.3562, + "step": 2321 + }, + { + "epoch": 0.08, + "grad_norm": 0.6514217257499695, + "learning_rate": 1.9813066103778065e-05, + "loss": 2.3724, + "step": 2322 + }, + { + "epoch": 0.08, + "grad_norm": 0.6543853282928467, + "learning_rate": 1.9812861493679473e-05, + "loss": 2.3668, + "step": 2323 + }, + { + "epoch": 0.08, + "grad_norm": 0.6927496790885925, + "learning_rate": 1.9812656772720925e-05, + "loss": 2.3391, + "step": 2324 + }, + { + "epoch": 0.08, + "grad_norm": 0.6647445559501648, + "learning_rate": 1.981245194090473e-05, + "loss": 2.3347, + "step": 2325 + }, + { + "epoch": 0.08, + "grad_norm": 0.65156090259552, + "learning_rate": 1.9812246998233203e-05, + "loss": 2.3256, + "step": 2326 + }, + { + "epoch": 0.08, + "grad_norm": 0.6763613224029541, + "learning_rate": 1.981204194470866e-05, + "loss": 2.3799, + "step": 2327 + }, + { + "epoch": 0.08, + "grad_norm": 0.6693637371063232, + "learning_rate": 1.9811836780333423e-05, + "loss": 2.3025, + "step": 2328 + }, + { + "epoch": 0.08, + "grad_norm": 0.6679176688194275, + "learning_rate": 1.98116315051098e-05, + "loss": 2.2816, + "step": 2329 + }, + { + "epoch": 0.08, + "grad_norm": 0.6952856779098511, + "learning_rate": 1.9811426119040112e-05, + "loss": 2.3168, + "step": 2330 + }, + { + "epoch": 0.08, + "grad_norm": 0.6682324409484863, + "learning_rate": 1.9811220622126685e-05, + "loss": 2.3869, + "step": 2331 + }, + { + "epoch": 0.08, + "grad_norm": 0.688409686088562, + "learning_rate": 1.9811015014371833e-05, + "loss": 2.2884, + "step": 2332 + }, + { + "epoch": 0.08, + "grad_norm": 0.6618613600730896, + "learning_rate": 1.981080929577789e-05, + "loss": 2.3327, + "step": 2333 + }, + { + "epoch": 0.08, + "grad_norm": 0.6743007898330688, + "learning_rate": 1.981060346634717e-05, + "loss": 2.4113, + "step": 2334 + }, + { + "epoch": 0.08, + "grad_norm": 0.6952850818634033, + "learning_rate": 1.9810397526082e-05, + "loss": 2.3446, + "step": 2335 + }, + { + "epoch": 0.08, + "grad_norm": 0.6999621391296387, + "learning_rate": 1.981019147498471e-05, + "loss": 2.3243, + "step": 2336 + }, + { + "epoch": 0.08, + "grad_norm": 0.6548631191253662, + "learning_rate": 1.980998531305763e-05, + "loss": 2.2809, + "step": 2337 + }, + { + "epoch": 0.08, + "grad_norm": 0.6674143671989441, + "learning_rate": 1.980977904030308e-05, + "loss": 2.3572, + "step": 2338 + }, + { + "epoch": 0.08, + "grad_norm": 0.6914337277412415, + "learning_rate": 1.9809572656723395e-05, + "loss": 2.3094, + "step": 2339 + }, + { + "epoch": 0.08, + "grad_norm": 0.6847976446151733, + "learning_rate": 1.9809366162320906e-05, + "loss": 2.3424, + "step": 2340 + }, + { + "epoch": 0.08, + "grad_norm": 0.6632505655288696, + "learning_rate": 1.980915955709795e-05, + "loss": 2.3189, + "step": 2341 + }, + { + "epoch": 0.08, + "grad_norm": 0.7025483250617981, + "learning_rate": 1.980895284105686e-05, + "loss": 2.3234, + "step": 2342 + }, + { + "epoch": 0.08, + "grad_norm": 0.6602748036384583, + "learning_rate": 1.9808746014199967e-05, + "loss": 2.3485, + "step": 2343 + }, + { + "epoch": 0.08, + "grad_norm": 0.6879835724830627, + "learning_rate": 1.9808539076529608e-05, + "loss": 2.3745, + "step": 2344 + }, + { + "epoch": 0.08, + "grad_norm": 0.6636591553688049, + "learning_rate": 1.9808332028048126e-05, + "loss": 2.3354, + "step": 2345 + }, + { + "epoch": 0.08, + "grad_norm": 0.675483763217926, + "learning_rate": 1.9808124868757855e-05, + "loss": 2.3341, + "step": 2346 + }, + { + "epoch": 0.08, + "grad_norm": 0.6626991629600525, + "learning_rate": 1.9807917598661136e-05, + "loss": 2.3837, + "step": 2347 + }, + { + "epoch": 0.08, + "grad_norm": 0.6520759463310242, + "learning_rate": 1.9807710217760316e-05, + "loss": 2.3212, + "step": 2348 + }, + { + "epoch": 0.08, + "grad_norm": 0.6476108431816101, + "learning_rate": 1.980750272605773e-05, + "loss": 2.2884, + "step": 2349 + }, + { + "epoch": 0.08, + "grad_norm": 0.6741753816604614, + "learning_rate": 1.980729512355573e-05, + "loss": 2.399, + "step": 2350 + }, + { + "epoch": 0.08, + "grad_norm": 0.6582635641098022, + "learning_rate": 1.9807087410256657e-05, + "loss": 2.3465, + "step": 2351 + }, + { + "epoch": 0.08, + "grad_norm": 0.6374813318252563, + "learning_rate": 1.9806879586162853e-05, + "loss": 2.3512, + "step": 2352 + }, + { + "epoch": 0.08, + "grad_norm": 0.6761130094528198, + "learning_rate": 1.9806671651276676e-05, + "loss": 2.311, + "step": 2353 + }, + { + "epoch": 0.08, + "grad_norm": 0.6651429533958435, + "learning_rate": 1.980646360560047e-05, + "loss": 2.3808, + "step": 2354 + }, + { + "epoch": 0.08, + "grad_norm": 0.7057443857192993, + "learning_rate": 1.980625544913658e-05, + "loss": 2.3341, + "step": 2355 + }, + { + "epoch": 0.08, + "grad_norm": 0.66246098279953, + "learning_rate": 1.9806047181887368e-05, + "loss": 2.2941, + "step": 2356 + }, + { + "epoch": 0.08, + "grad_norm": 0.7019712924957275, + "learning_rate": 1.9805838803855182e-05, + "loss": 2.3611, + "step": 2357 + }, + { + "epoch": 0.08, + "grad_norm": 0.6965950131416321, + "learning_rate": 1.9805630315042373e-05, + "loss": 2.3089, + "step": 2358 + }, + { + "epoch": 0.08, + "grad_norm": 0.6560185551643372, + "learning_rate": 1.98054217154513e-05, + "loss": 2.3538, + "step": 2359 + }, + { + "epoch": 0.08, + "grad_norm": 0.7050288319587708, + "learning_rate": 1.980521300508432e-05, + "loss": 2.3845, + "step": 2360 + }, + { + "epoch": 0.08, + "grad_norm": 0.6816173791885376, + "learning_rate": 1.980500418394379e-05, + "loss": 2.3627, + "step": 2361 + }, + { + "epoch": 0.08, + "grad_norm": 0.6425358653068542, + "learning_rate": 1.980479525203207e-05, + "loss": 2.2805, + "step": 2362 + }, + { + "epoch": 0.08, + "grad_norm": 0.698404848575592, + "learning_rate": 1.9804586209351516e-05, + "loss": 2.332, + "step": 2363 + }, + { + "epoch": 0.08, + "grad_norm": 0.672673761844635, + "learning_rate": 1.9804377055904496e-05, + "loss": 2.3349, + "step": 2364 + }, + { + "epoch": 0.08, + "grad_norm": 0.6692365407943726, + "learning_rate": 1.9804167791693367e-05, + "loss": 2.362, + "step": 2365 + }, + { + "epoch": 0.08, + "grad_norm": 0.659717321395874, + "learning_rate": 1.98039584167205e-05, + "loss": 2.3975, + "step": 2366 + }, + { + "epoch": 0.08, + "grad_norm": 0.6474272608757019, + "learning_rate": 1.9803748930988254e-05, + "loss": 2.2985, + "step": 2367 + }, + { + "epoch": 0.08, + "grad_norm": 0.6747021079063416, + "learning_rate": 1.9803539334498997e-05, + "loss": 2.2719, + "step": 2368 + }, + { + "epoch": 0.08, + "grad_norm": 0.6841328740119934, + "learning_rate": 1.98033296272551e-05, + "loss": 2.3732, + "step": 2369 + }, + { + "epoch": 0.08, + "grad_norm": 0.7138396501541138, + "learning_rate": 1.980311980925893e-05, + "loss": 2.3756, + "step": 2370 + }, + { + "epoch": 0.08, + "grad_norm": 0.6870377063751221, + "learning_rate": 1.980290988051286e-05, + "loss": 2.3642, + "step": 2371 + }, + { + "epoch": 0.08, + "grad_norm": 0.6847071647644043, + "learning_rate": 1.9802699841019254e-05, + "loss": 2.3311, + "step": 2372 + }, + { + "epoch": 0.08, + "grad_norm": 0.6537051796913147, + "learning_rate": 1.9802489690780494e-05, + "loss": 2.3416, + "step": 2373 + }, + { + "epoch": 0.08, + "grad_norm": 0.685537576675415, + "learning_rate": 1.980227942979895e-05, + "loss": 2.3635, + "step": 2374 + }, + { + "epoch": 0.08, + "grad_norm": 0.6559857726097107, + "learning_rate": 1.9802069058076997e-05, + "loss": 2.2923, + "step": 2375 + }, + { + "epoch": 0.08, + "grad_norm": 0.6780763864517212, + "learning_rate": 1.980185857561701e-05, + "loss": 2.3519, + "step": 2376 + }, + { + "epoch": 0.08, + "grad_norm": 0.6728874444961548, + "learning_rate": 1.980164798242137e-05, + "loss": 2.3395, + "step": 2377 + }, + { + "epoch": 0.08, + "grad_norm": 0.697409451007843, + "learning_rate": 1.980143727849246e-05, + "loss": 2.377, + "step": 2378 + }, + { + "epoch": 0.08, + "grad_norm": 0.7174190878868103, + "learning_rate": 1.9801226463832654e-05, + "loss": 2.3417, + "step": 2379 + }, + { + "epoch": 0.08, + "grad_norm": 0.7185983061790466, + "learning_rate": 1.9801015538444333e-05, + "loss": 2.2838, + "step": 2380 + }, + { + "epoch": 0.08, + "grad_norm": 0.7024831771850586, + "learning_rate": 1.9800804502329884e-05, + "loss": 2.3891, + "step": 2381 + }, + { + "epoch": 0.08, + "grad_norm": 0.6656056642532349, + "learning_rate": 1.9800593355491687e-05, + "loss": 2.3812, + "step": 2382 + }, + { + "epoch": 0.08, + "grad_norm": 0.6568113565444946, + "learning_rate": 1.9800382097932135e-05, + "loss": 2.3372, + "step": 2383 + }, + { + "epoch": 0.08, + "grad_norm": 0.6643727421760559, + "learning_rate": 1.9800170729653603e-05, + "loss": 2.4087, + "step": 2384 + }, + { + "epoch": 0.08, + "grad_norm": 0.6439428925514221, + "learning_rate": 1.979995925065849e-05, + "loss": 2.3234, + "step": 2385 + }, + { + "epoch": 0.08, + "grad_norm": 0.6895545125007629, + "learning_rate": 1.9799747660949182e-05, + "loss": 2.4139, + "step": 2386 + }, + { + "epoch": 0.08, + "grad_norm": 0.6769569516181946, + "learning_rate": 1.9799535960528066e-05, + "loss": 2.3634, + "step": 2387 + }, + { + "epoch": 0.08, + "grad_norm": 0.7185633778572083, + "learning_rate": 1.9799324149397538e-05, + "loss": 2.3182, + "step": 2388 + }, + { + "epoch": 0.08, + "grad_norm": 0.6852063536643982, + "learning_rate": 1.9799112227559983e-05, + "loss": 2.3522, + "step": 2389 + }, + { + "epoch": 0.08, + "grad_norm": 0.6807916164398193, + "learning_rate": 1.9798900195017804e-05, + "loss": 2.3247, + "step": 2390 + }, + { + "epoch": 0.08, + "grad_norm": 0.6746793985366821, + "learning_rate": 1.97986880517734e-05, + "loss": 2.3947, + "step": 2391 + }, + { + "epoch": 0.08, + "grad_norm": 0.658911943435669, + "learning_rate": 1.979847579782915e-05, + "loss": 2.3334, + "step": 2392 + }, + { + "epoch": 0.08, + "grad_norm": 0.6673645377159119, + "learning_rate": 1.979826343318747e-05, + "loss": 2.3625, + "step": 2393 + }, + { + "epoch": 0.08, + "grad_norm": 0.6755536794662476, + "learning_rate": 1.9798050957850748e-05, + "loss": 2.3165, + "step": 2394 + }, + { + "epoch": 0.08, + "grad_norm": 0.6686786413192749, + "learning_rate": 1.979783837182139e-05, + "loss": 2.3413, + "step": 2395 + }, + { + "epoch": 0.08, + "grad_norm": 0.6808382868766785, + "learning_rate": 1.97976256751018e-05, + "loss": 2.3751, + "step": 2396 + }, + { + "epoch": 0.08, + "grad_norm": 0.6608921885490417, + "learning_rate": 1.9797412867694372e-05, + "loss": 2.2984, + "step": 2397 + }, + { + "epoch": 0.08, + "grad_norm": 0.6757985353469849, + "learning_rate": 1.9797199949601517e-05, + "loss": 2.3168, + "step": 2398 + }, + { + "epoch": 0.08, + "grad_norm": 0.6643825769424438, + "learning_rate": 1.979698692082564e-05, + "loss": 2.2871, + "step": 2399 + }, + { + "epoch": 0.08, + "grad_norm": 0.6880276799201965, + "learning_rate": 1.9796773781369147e-05, + "loss": 2.371, + "step": 2400 + }, + { + "epoch": 0.08, + "grad_norm": 0.6854760646820068, + "learning_rate": 1.979656053123444e-05, + "loss": 2.3488, + "step": 2401 + }, + { + "epoch": 0.08, + "grad_norm": 0.6247063875198364, + "learning_rate": 1.9796347170423938e-05, + "loss": 2.305, + "step": 2402 + }, + { + "epoch": 0.08, + "grad_norm": 0.7004512548446655, + "learning_rate": 1.9796133698940047e-05, + "loss": 2.3131, + "step": 2403 + }, + { + "epoch": 0.08, + "grad_norm": 0.6864848732948303, + "learning_rate": 1.9795920116785175e-05, + "loss": 2.3258, + "step": 2404 + }, + { + "epoch": 0.08, + "grad_norm": 0.7278434634208679, + "learning_rate": 1.979570642396174e-05, + "loss": 2.3947, + "step": 2405 + }, + { + "epoch": 0.08, + "grad_norm": 0.7325437664985657, + "learning_rate": 1.9795492620472158e-05, + "loss": 2.3156, + "step": 2406 + }, + { + "epoch": 0.08, + "grad_norm": 0.7307333946228027, + "learning_rate": 1.9795278706318836e-05, + "loss": 2.3246, + "step": 2407 + }, + { + "epoch": 0.08, + "grad_norm": 0.6175476312637329, + "learning_rate": 1.9795064681504198e-05, + "loss": 2.3598, + "step": 2408 + }, + { + "epoch": 0.08, + "grad_norm": 0.6618334054946899, + "learning_rate": 1.979485054603066e-05, + "loss": 2.3681, + "step": 2409 + }, + { + "epoch": 0.08, + "grad_norm": 0.6762998700141907, + "learning_rate": 1.979463629990064e-05, + "loss": 2.2989, + "step": 2410 + }, + { + "epoch": 0.08, + "grad_norm": 0.6708942651748657, + "learning_rate": 1.9794421943116566e-05, + "loss": 2.3587, + "step": 2411 + }, + { + "epoch": 0.08, + "grad_norm": 0.7319777011871338, + "learning_rate": 1.9794207475680842e-05, + "loss": 2.3366, + "step": 2412 + }, + { + "epoch": 0.08, + "grad_norm": 0.7017005681991577, + "learning_rate": 1.9793992897595907e-05, + "loss": 2.4201, + "step": 2413 + }, + { + "epoch": 0.08, + "grad_norm": 0.6522051692008972, + "learning_rate": 1.979377820886418e-05, + "loss": 2.3404, + "step": 2414 + }, + { + "epoch": 0.08, + "grad_norm": 0.6675101518630981, + "learning_rate": 1.979356340948809e-05, + "loss": 2.4141, + "step": 2415 + }, + { + "epoch": 0.08, + "grad_norm": 0.6809315085411072, + "learning_rate": 1.9793348499470054e-05, + "loss": 2.4456, + "step": 2416 + }, + { + "epoch": 0.08, + "grad_norm": 0.6461268067359924, + "learning_rate": 1.979313347881251e-05, + "loss": 2.3205, + "step": 2417 + }, + { + "epoch": 0.08, + "grad_norm": 0.6469259262084961, + "learning_rate": 1.9792918347517883e-05, + "loss": 2.3083, + "step": 2418 + }, + { + "epoch": 0.08, + "grad_norm": 0.7118104696273804, + "learning_rate": 1.9792703105588602e-05, + "loss": 2.4228, + "step": 2419 + }, + { + "epoch": 0.08, + "grad_norm": 0.6664142608642578, + "learning_rate": 1.9792487753027105e-05, + "loss": 2.3281, + "step": 2420 + }, + { + "epoch": 0.08, + "grad_norm": 0.7148756980895996, + "learning_rate": 1.9792272289835813e-05, + "loss": 2.3269, + "step": 2421 + }, + { + "epoch": 0.08, + "grad_norm": 0.6392363905906677, + "learning_rate": 1.9792056716017173e-05, + "loss": 2.3326, + "step": 2422 + }, + { + "epoch": 0.08, + "grad_norm": 0.6856620907783508, + "learning_rate": 1.9791841031573612e-05, + "loss": 2.3162, + "step": 2423 + }, + { + "epoch": 0.08, + "grad_norm": 0.6761419177055359, + "learning_rate": 1.979162523650757e-05, + "loss": 2.3402, + "step": 2424 + }, + { + "epoch": 0.08, + "grad_norm": 0.686103105545044, + "learning_rate": 1.9791409330821487e-05, + "loss": 2.3455, + "step": 2425 + }, + { + "epoch": 0.08, + "grad_norm": 0.6756161451339722, + "learning_rate": 1.97911933145178e-05, + "loss": 2.3883, + "step": 2426 + }, + { + "epoch": 0.08, + "grad_norm": 0.6594185829162598, + "learning_rate": 1.9790977187598944e-05, + "loss": 2.3497, + "step": 2427 + }, + { + "epoch": 0.08, + "grad_norm": 0.6902406215667725, + "learning_rate": 1.979076095006737e-05, + "loss": 2.3433, + "step": 2428 + }, + { + "epoch": 0.08, + "grad_norm": 0.6560533046722412, + "learning_rate": 1.9790544601925516e-05, + "loss": 2.33, + "step": 2429 + }, + { + "epoch": 0.08, + "grad_norm": 0.7036490440368652, + "learning_rate": 1.9790328143175825e-05, + "loss": 2.3057, + "step": 2430 + }, + { + "epoch": 0.08, + "grad_norm": 0.7172425985336304, + "learning_rate": 1.9790111573820748e-05, + "loss": 2.3128, + "step": 2431 + }, + { + "epoch": 0.08, + "grad_norm": 0.6538001298904419, + "learning_rate": 1.9789894893862724e-05, + "loss": 2.3007, + "step": 2432 + }, + { + "epoch": 0.08, + "grad_norm": 0.746077299118042, + "learning_rate": 1.9789678103304207e-05, + "loss": 2.3522, + "step": 2433 + }, + { + "epoch": 0.08, + "grad_norm": 0.641230046749115, + "learning_rate": 1.9789461202147646e-05, + "loss": 2.3442, + "step": 2434 + }, + { + "epoch": 0.08, + "grad_norm": 0.6417468190193176, + "learning_rate": 1.9789244190395487e-05, + "loss": 2.3624, + "step": 2435 + }, + { + "epoch": 0.08, + "grad_norm": 0.6666562557220459, + "learning_rate": 1.9789027068050183e-05, + "loss": 2.3395, + "step": 2436 + }, + { + "epoch": 0.08, + "grad_norm": 0.6717875599861145, + "learning_rate": 1.978880983511419e-05, + "loss": 2.3655, + "step": 2437 + }, + { + "epoch": 0.08, + "grad_norm": 0.6529743671417236, + "learning_rate": 1.978859249158996e-05, + "loss": 2.3807, + "step": 2438 + }, + { + "epoch": 0.08, + "grad_norm": 0.6670441031455994, + "learning_rate": 1.978837503747995e-05, + "loss": 2.3199, + "step": 2439 + }, + { + "epoch": 0.08, + "grad_norm": 0.6656957268714905, + "learning_rate": 1.9788157472786612e-05, + "loss": 2.3606, + "step": 2440 + }, + { + "epoch": 0.08, + "grad_norm": 0.6989373564720154, + "learning_rate": 1.9787939797512414e-05, + "loss": 2.3375, + "step": 2441 + }, + { + "epoch": 0.08, + "grad_norm": 0.6827592849731445, + "learning_rate": 1.9787722011659802e-05, + "loss": 2.3123, + "step": 2442 + }, + { + "epoch": 0.08, + "grad_norm": 0.6697636246681213, + "learning_rate": 1.9787504115231244e-05, + "loss": 2.4168, + "step": 2443 + }, + { + "epoch": 0.08, + "grad_norm": 0.686275839805603, + "learning_rate": 1.9787286108229202e-05, + "loss": 2.3603, + "step": 2444 + }, + { + "epoch": 0.08, + "grad_norm": 0.6657013893127441, + "learning_rate": 1.9787067990656137e-05, + "loss": 2.3509, + "step": 2445 + }, + { + "epoch": 0.08, + "grad_norm": 0.7105251550674438, + "learning_rate": 1.978684976251451e-05, + "loss": 2.3474, + "step": 2446 + }, + { + "epoch": 0.08, + "grad_norm": 0.7280749082565308, + "learning_rate": 1.9786631423806795e-05, + "loss": 2.3462, + "step": 2447 + }, + { + "epoch": 0.08, + "grad_norm": 0.6626918315887451, + "learning_rate": 1.9786412974535455e-05, + "loss": 2.3222, + "step": 2448 + }, + { + "epoch": 0.08, + "grad_norm": 0.6755833029747009, + "learning_rate": 1.9786194414702954e-05, + "loss": 2.2975, + "step": 2449 + }, + { + "epoch": 0.08, + "grad_norm": 0.6483421325683594, + "learning_rate": 1.9785975744311762e-05, + "loss": 2.3571, + "step": 2450 + }, + { + "epoch": 0.08, + "grad_norm": 0.6710434556007385, + "learning_rate": 1.9785756963364357e-05, + "loss": 2.376, + "step": 2451 + }, + { + "epoch": 0.08, + "grad_norm": 0.719467282295227, + "learning_rate": 1.9785538071863196e-05, + "loss": 2.3948, + "step": 2452 + }, + { + "epoch": 0.08, + "grad_norm": 0.6863904595375061, + "learning_rate": 1.9785319069810765e-05, + "loss": 2.3777, + "step": 2453 + }, + { + "epoch": 0.08, + "grad_norm": 0.6910497546195984, + "learning_rate": 1.9785099957209537e-05, + "loss": 2.3985, + "step": 2454 + }, + { + "epoch": 0.08, + "grad_norm": 0.6693934798240662, + "learning_rate": 1.978488073406198e-05, + "loss": 2.3428, + "step": 2455 + }, + { + "epoch": 0.08, + "grad_norm": 0.6567925214767456, + "learning_rate": 1.9784661400370577e-05, + "loss": 2.3877, + "step": 2456 + }, + { + "epoch": 0.08, + "grad_norm": 0.6569461226463318, + "learning_rate": 1.9784441956137806e-05, + "loss": 2.3467, + "step": 2457 + }, + { + "epoch": 0.08, + "grad_norm": 0.6679854393005371, + "learning_rate": 1.978422240136614e-05, + "loss": 2.3632, + "step": 2458 + }, + { + "epoch": 0.08, + "grad_norm": 0.7015809416770935, + "learning_rate": 1.9784002736058063e-05, + "loss": 2.327, + "step": 2459 + }, + { + "epoch": 0.08, + "grad_norm": 0.6517996191978455, + "learning_rate": 1.978378296021606e-05, + "loss": 2.3622, + "step": 2460 + }, + { + "epoch": 0.08, + "grad_norm": 0.7029489278793335, + "learning_rate": 1.978356307384261e-05, + "loss": 2.3368, + "step": 2461 + }, + { + "epoch": 0.08, + "grad_norm": 0.6573650240898132, + "learning_rate": 1.97833430769402e-05, + "loss": 2.335, + "step": 2462 + }, + { + "epoch": 0.08, + "grad_norm": 0.6775755286216736, + "learning_rate": 1.978312296951131e-05, + "loss": 2.3194, + "step": 2463 + }, + { + "epoch": 0.08, + "grad_norm": 0.680691123008728, + "learning_rate": 1.978290275155843e-05, + "loss": 2.4107, + "step": 2464 + }, + { + "epoch": 0.08, + "grad_norm": 0.6899623274803162, + "learning_rate": 1.9782682423084053e-05, + "loss": 2.3359, + "step": 2465 + }, + { + "epoch": 0.08, + "grad_norm": 0.669139564037323, + "learning_rate": 1.978246198409066e-05, + "loss": 2.3315, + "step": 2466 + }, + { + "epoch": 0.08, + "grad_norm": 0.6696339249610901, + "learning_rate": 1.9782241434580744e-05, + "loss": 2.2645, + "step": 2467 + }, + { + "epoch": 0.08, + "grad_norm": 0.6642321944236755, + "learning_rate": 1.97820207745568e-05, + "loss": 2.2725, + "step": 2468 + }, + { + "epoch": 0.08, + "grad_norm": 0.7508084177970886, + "learning_rate": 1.9781800004021314e-05, + "loss": 2.3355, + "step": 2469 + }, + { + "epoch": 0.08, + "grad_norm": 0.6830573678016663, + "learning_rate": 1.9781579122976786e-05, + "loss": 2.3611, + "step": 2470 + }, + { + "epoch": 0.08, + "grad_norm": 0.663981020450592, + "learning_rate": 1.978135813142571e-05, + "loss": 2.2829, + "step": 2471 + }, + { + "epoch": 0.08, + "grad_norm": 0.6628773808479309, + "learning_rate": 1.978113702937058e-05, + "loss": 2.3523, + "step": 2472 + }, + { + "epoch": 0.08, + "grad_norm": 0.6705053448677063, + "learning_rate": 1.9780915816813898e-05, + "loss": 2.3284, + "step": 2473 + }, + { + "epoch": 0.08, + "grad_norm": 0.7338024973869324, + "learning_rate": 1.9780694493758164e-05, + "loss": 2.3616, + "step": 2474 + }, + { + "epoch": 0.08, + "grad_norm": 0.6585288643836975, + "learning_rate": 1.9780473060205873e-05, + "loss": 2.3061, + "step": 2475 + }, + { + "epoch": 0.08, + "grad_norm": 0.6750043630599976, + "learning_rate": 1.978025151615953e-05, + "loss": 2.3577, + "step": 2476 + }, + { + "epoch": 0.08, + "grad_norm": 0.7103081941604614, + "learning_rate": 1.9780029861621636e-05, + "loss": 2.3343, + "step": 2477 + }, + { + "epoch": 0.08, + "grad_norm": 0.6971740126609802, + "learning_rate": 1.9779808096594697e-05, + "loss": 2.3729, + "step": 2478 + }, + { + "epoch": 0.08, + "grad_norm": 0.6948923468589783, + "learning_rate": 1.9779586221081218e-05, + "loss": 2.3973, + "step": 2479 + }, + { + "epoch": 0.08, + "grad_norm": 0.6590224504470825, + "learning_rate": 1.9779364235083705e-05, + "loss": 2.342, + "step": 2480 + }, + { + "epoch": 0.08, + "grad_norm": 0.6810263991355896, + "learning_rate": 1.9779142138604667e-05, + "loss": 2.3886, + "step": 2481 + }, + { + "epoch": 0.08, + "grad_norm": 0.677933931350708, + "learning_rate": 1.977891993164661e-05, + "loss": 2.3141, + "step": 2482 + }, + { + "epoch": 0.08, + "grad_norm": 0.7265552878379822, + "learning_rate": 1.9778697614212046e-05, + "loss": 2.3418, + "step": 2483 + }, + { + "epoch": 0.08, + "grad_norm": 0.7075479030609131, + "learning_rate": 1.9778475186303493e-05, + "loss": 2.3563, + "step": 2484 + }, + { + "epoch": 0.08, + "grad_norm": 0.6601793766021729, + "learning_rate": 1.9778252647923452e-05, + "loss": 2.3481, + "step": 2485 + }, + { + "epoch": 0.08, + "grad_norm": 0.6419060826301575, + "learning_rate": 1.9778029999074445e-05, + "loss": 2.258, + "step": 2486 + }, + { + "epoch": 0.08, + "grad_norm": 0.6921791434288025, + "learning_rate": 1.9777807239758986e-05, + "loss": 2.2663, + "step": 2487 + }, + { + "epoch": 0.08, + "grad_norm": 0.6691721677780151, + "learning_rate": 1.977758436997959e-05, + "loss": 2.3167, + "step": 2488 + }, + { + "epoch": 0.08, + "grad_norm": 0.7009128332138062, + "learning_rate": 1.977736138973878e-05, + "loss": 2.3695, + "step": 2489 + }, + { + "epoch": 0.08, + "grad_norm": 0.6716076731681824, + "learning_rate": 1.9777138299039068e-05, + "loss": 2.3607, + "step": 2490 + }, + { + "epoch": 0.08, + "grad_norm": 0.6918885707855225, + "learning_rate": 1.9776915097882976e-05, + "loss": 2.3011, + "step": 2491 + }, + { + "epoch": 0.08, + "grad_norm": 0.653325080871582, + "learning_rate": 1.977669178627303e-05, + "loss": 2.3295, + "step": 2492 + }, + { + "epoch": 0.08, + "grad_norm": 0.7150638699531555, + "learning_rate": 1.977646836421175e-05, + "loss": 2.3531, + "step": 2493 + }, + { + "epoch": 0.08, + "grad_norm": 0.6699737906455994, + "learning_rate": 1.977624483170166e-05, + "loss": 2.2952, + "step": 2494 + }, + { + "epoch": 0.08, + "grad_norm": 0.6769757270812988, + "learning_rate": 1.9776021188745283e-05, + "loss": 2.3511, + "step": 2495 + }, + { + "epoch": 0.08, + "grad_norm": 0.6839399337768555, + "learning_rate": 1.9775797435345146e-05, + "loss": 2.3964, + "step": 2496 + }, + { + "epoch": 0.08, + "grad_norm": 0.6775975227355957, + "learning_rate": 1.9775573571503782e-05, + "loss": 2.307, + "step": 2497 + }, + { + "epoch": 0.08, + "grad_norm": 0.6734044551849365, + "learning_rate": 1.9775349597223718e-05, + "loss": 2.2954, + "step": 2498 + }, + { + "epoch": 0.08, + "grad_norm": 0.6867079138755798, + "learning_rate": 1.977512551250748e-05, + "loss": 2.3962, + "step": 2499 + }, + { + "epoch": 0.08, + "grad_norm": 0.6623310446739197, + "learning_rate": 1.9774901317357606e-05, + "loss": 2.3888, + "step": 2500 + }, + { + "epoch": 0.08, + "grad_norm": 0.6749738454818726, + "learning_rate": 1.9774677011776627e-05, + "loss": 2.2942, + "step": 2501 + }, + { + "epoch": 0.08, + "grad_norm": 0.7118020057678223, + "learning_rate": 1.977445259576707e-05, + "loss": 2.3604, + "step": 2502 + }, + { + "epoch": 0.08, + "grad_norm": 0.7161041498184204, + "learning_rate": 1.9774228069331477e-05, + "loss": 2.3489, + "step": 2503 + }, + { + "epoch": 0.08, + "grad_norm": 0.6589542031288147, + "learning_rate": 1.9774003432472386e-05, + "loss": 2.3941, + "step": 2504 + }, + { + "epoch": 0.08, + "grad_norm": 0.6774342656135559, + "learning_rate": 1.9773778685192334e-05, + "loss": 2.361, + "step": 2505 + }, + { + "epoch": 0.08, + "grad_norm": 0.6852116584777832, + "learning_rate": 1.9773553827493853e-05, + "loss": 2.3543, + "step": 2506 + }, + { + "epoch": 0.08, + "grad_norm": 0.6582016348838806, + "learning_rate": 1.9773328859379495e-05, + "loss": 2.293, + "step": 2507 + }, + { + "epoch": 0.08, + "grad_norm": 0.66592937707901, + "learning_rate": 1.977310378085179e-05, + "loss": 2.3754, + "step": 2508 + }, + { + "epoch": 0.08, + "grad_norm": 0.6909178495407104, + "learning_rate": 1.977287859191329e-05, + "loss": 2.2913, + "step": 2509 + }, + { + "epoch": 0.08, + "grad_norm": 0.7298354506492615, + "learning_rate": 1.977265329256653e-05, + "loss": 2.3705, + "step": 2510 + }, + { + "epoch": 0.08, + "grad_norm": 0.6798087954521179, + "learning_rate": 1.9772427882814066e-05, + "loss": 2.3607, + "step": 2511 + }, + { + "epoch": 0.08, + "grad_norm": 0.6613409519195557, + "learning_rate": 1.9772202362658436e-05, + "loss": 2.3717, + "step": 2512 + }, + { + "epoch": 0.08, + "grad_norm": 0.7095069885253906, + "learning_rate": 1.977197673210219e-05, + "loss": 2.3206, + "step": 2513 + }, + { + "epoch": 0.08, + "grad_norm": 0.7083219885826111, + "learning_rate": 1.977175099114788e-05, + "loss": 2.3383, + "step": 2514 + }, + { + "epoch": 0.08, + "grad_norm": 0.7079398036003113, + "learning_rate": 1.9771525139798052e-05, + "loss": 2.3359, + "step": 2515 + }, + { + "epoch": 0.08, + "grad_norm": 0.7500843405723572, + "learning_rate": 1.9771299178055262e-05, + "loss": 2.2748, + "step": 2516 + }, + { + "epoch": 0.08, + "grad_norm": 0.6314077377319336, + "learning_rate": 1.977107310592206e-05, + "loss": 2.3019, + "step": 2517 + }, + { + "epoch": 0.08, + "grad_norm": 0.7129048109054565, + "learning_rate": 1.9770846923401e-05, + "loss": 2.3916, + "step": 2518 + }, + { + "epoch": 0.08, + "grad_norm": 0.6690525412559509, + "learning_rate": 1.9770620630494637e-05, + "loss": 2.3426, + "step": 2519 + }, + { + "epoch": 0.08, + "grad_norm": 0.6771161556243896, + "learning_rate": 1.977039422720553e-05, + "loss": 2.4115, + "step": 2520 + }, + { + "epoch": 0.08, + "grad_norm": 0.6748583316802979, + "learning_rate": 1.9770167713536234e-05, + "loss": 2.3244, + "step": 2521 + }, + { + "epoch": 0.08, + "grad_norm": 0.6719194054603577, + "learning_rate": 1.976994108948931e-05, + "loss": 2.309, + "step": 2522 + }, + { + "epoch": 0.08, + "grad_norm": 0.6624894738197327, + "learning_rate": 1.9769714355067314e-05, + "loss": 2.3283, + "step": 2523 + }, + { + "epoch": 0.08, + "grad_norm": 0.6610791087150574, + "learning_rate": 1.9769487510272812e-05, + "loss": 2.3539, + "step": 2524 + }, + { + "epoch": 0.08, + "grad_norm": 0.710705041885376, + "learning_rate": 1.9769260555108366e-05, + "loss": 2.4005, + "step": 2525 + }, + { + "epoch": 0.08, + "grad_norm": 0.6553685665130615, + "learning_rate": 1.9769033489576543e-05, + "loss": 2.3185, + "step": 2526 + }, + { + "epoch": 0.08, + "grad_norm": 0.6929029822349548, + "learning_rate": 1.97688063136799e-05, + "loss": 2.3661, + "step": 2527 + }, + { + "epoch": 0.08, + "grad_norm": 0.6664001941680908, + "learning_rate": 1.976857902742101e-05, + "loss": 2.2775, + "step": 2528 + }, + { + "epoch": 0.08, + "grad_norm": 0.6698111295700073, + "learning_rate": 1.976835163080244e-05, + "loss": 2.3137, + "step": 2529 + }, + { + "epoch": 0.08, + "grad_norm": 0.6514168977737427, + "learning_rate": 1.976812412382676e-05, + "loss": 2.3823, + "step": 2530 + }, + { + "epoch": 0.08, + "grad_norm": 0.661277174949646, + "learning_rate": 1.9767896506496534e-05, + "loss": 2.29, + "step": 2531 + }, + { + "epoch": 0.08, + "grad_norm": 0.6575627326965332, + "learning_rate": 1.9767668778814343e-05, + "loss": 2.3572, + "step": 2532 + }, + { + "epoch": 0.08, + "grad_norm": 0.6911303400993347, + "learning_rate": 1.976744094078275e-05, + "loss": 2.3405, + "step": 2533 + }, + { + "epoch": 0.08, + "grad_norm": 0.6842401027679443, + "learning_rate": 1.9767212992404335e-05, + "loss": 2.4345, + "step": 2534 + }, + { + "epoch": 0.08, + "grad_norm": 0.6876300573348999, + "learning_rate": 1.9766984933681672e-05, + "loss": 2.3806, + "step": 2535 + }, + { + "epoch": 0.08, + "grad_norm": 0.681178629398346, + "learning_rate": 1.9766756764617338e-05, + "loss": 2.2977, + "step": 2536 + }, + { + "epoch": 0.08, + "grad_norm": 0.6752604246139526, + "learning_rate": 1.9766528485213907e-05, + "loss": 2.3436, + "step": 2537 + }, + { + "epoch": 0.08, + "grad_norm": 0.6857795715332031, + "learning_rate": 1.9766300095473963e-05, + "loss": 2.3354, + "step": 2538 + }, + { + "epoch": 0.08, + "grad_norm": 0.6532720923423767, + "learning_rate": 1.9766071595400083e-05, + "loss": 2.3791, + "step": 2539 + }, + { + "epoch": 0.08, + "grad_norm": 0.6762163043022156, + "learning_rate": 1.9765842984994853e-05, + "loss": 2.3965, + "step": 2540 + }, + { + "epoch": 0.08, + "grad_norm": 0.6776533126831055, + "learning_rate": 1.976561426426085e-05, + "loss": 2.3515, + "step": 2541 + }, + { + "epoch": 0.08, + "grad_norm": 0.6732799410820007, + "learning_rate": 1.976538543320066e-05, + "loss": 2.3031, + "step": 2542 + }, + { + "epoch": 0.08, + "grad_norm": 0.6601924300193787, + "learning_rate": 1.976515649181687e-05, + "loss": 2.3455, + "step": 2543 + }, + { + "epoch": 0.08, + "grad_norm": 0.6769899129867554, + "learning_rate": 1.976492744011206e-05, + "loss": 2.3406, + "step": 2544 + }, + { + "epoch": 0.08, + "grad_norm": 0.6701977252960205, + "learning_rate": 1.9764698278088826e-05, + "loss": 2.3313, + "step": 2545 + }, + { + "epoch": 0.08, + "grad_norm": 0.6765927672386169, + "learning_rate": 1.9764469005749753e-05, + "loss": 2.3199, + "step": 2546 + }, + { + "epoch": 0.08, + "grad_norm": 0.6569625735282898, + "learning_rate": 1.9764239623097432e-05, + "loss": 2.3749, + "step": 2547 + }, + { + "epoch": 0.08, + "grad_norm": 0.6878440976142883, + "learning_rate": 1.9764010130134456e-05, + "loss": 2.3246, + "step": 2548 + }, + { + "epoch": 0.08, + "grad_norm": 0.7374963164329529, + "learning_rate": 1.9763780526863415e-05, + "loss": 2.3752, + "step": 2549 + }, + { + "epoch": 0.08, + "grad_norm": 0.6553363800048828, + "learning_rate": 1.97635508132869e-05, + "loss": 2.3942, + "step": 2550 + }, + { + "epoch": 0.08, + "grad_norm": 0.6632681488990784, + "learning_rate": 1.9763320989407516e-05, + "loss": 2.3239, + "step": 2551 + }, + { + "epoch": 0.08, + "grad_norm": 0.742710530757904, + "learning_rate": 1.976309105522785e-05, + "loss": 2.2165, + "step": 2552 + }, + { + "epoch": 0.08, + "grad_norm": 0.6711102724075317, + "learning_rate": 1.97628610107505e-05, + "loss": 2.3137, + "step": 2553 + }, + { + "epoch": 0.08, + "grad_norm": 0.6705657243728638, + "learning_rate": 1.9762630855978074e-05, + "loss": 2.3955, + "step": 2554 + }, + { + "epoch": 0.09, + "grad_norm": 0.6922289133071899, + "learning_rate": 1.976240059091316e-05, + "loss": 2.3154, + "step": 2555 + }, + { + "epoch": 0.09, + "grad_norm": 0.6572542190551758, + "learning_rate": 1.976217021555837e-05, + "loss": 2.2869, + "step": 2556 + }, + { + "epoch": 0.09, + "grad_norm": 0.7043604254722595, + "learning_rate": 1.97619397299163e-05, + "loss": 2.3116, + "step": 2557 + }, + { + "epoch": 0.09, + "grad_norm": 0.6992303729057312, + "learning_rate": 1.976170913398956e-05, + "loss": 2.3722, + "step": 2558 + }, + { + "epoch": 0.09, + "grad_norm": 0.6854923367500305, + "learning_rate": 1.9761478427780746e-05, + "loss": 2.3498, + "step": 2559 + }, + { + "epoch": 0.09, + "grad_norm": 0.7188247442245483, + "learning_rate": 1.9761247611292472e-05, + "loss": 2.3239, + "step": 2560 + }, + { + "epoch": 0.09, + "grad_norm": 0.670086145401001, + "learning_rate": 1.976101668452734e-05, + "loss": 2.3215, + "step": 2561 + }, + { + "epoch": 0.09, + "grad_norm": 0.6621863842010498, + "learning_rate": 1.9760785647487965e-05, + "loss": 2.2859, + "step": 2562 + }, + { + "epoch": 0.09, + "grad_norm": 0.6977325677871704, + "learning_rate": 1.9760554500176953e-05, + "loss": 2.3643, + "step": 2563 + }, + { + "epoch": 0.09, + "grad_norm": 0.6692652106285095, + "learning_rate": 1.9760323242596918e-05, + "loss": 2.326, + "step": 2564 + }, + { + "epoch": 0.09, + "grad_norm": 0.6879720091819763, + "learning_rate": 1.976009187475047e-05, + "loss": 2.3995, + "step": 2565 + }, + { + "epoch": 0.09, + "grad_norm": 0.7025189399719238, + "learning_rate": 1.9759860396640228e-05, + "loss": 2.3461, + "step": 2566 + }, + { + "epoch": 0.09, + "grad_norm": 0.7029894590377808, + "learning_rate": 1.9759628808268796e-05, + "loss": 2.287, + "step": 2567 + }, + { + "epoch": 0.09, + "grad_norm": 0.7145413160324097, + "learning_rate": 1.9759397109638804e-05, + "loss": 2.325, + "step": 2568 + }, + { + "epoch": 0.09, + "grad_norm": 0.6734700202941895, + "learning_rate": 1.9759165300752858e-05, + "loss": 2.3274, + "step": 2569 + }, + { + "epoch": 0.09, + "grad_norm": 0.6818303465843201, + "learning_rate": 1.9758933381613583e-05, + "loss": 2.4053, + "step": 2570 + }, + { + "epoch": 0.09, + "grad_norm": 0.6740102767944336, + "learning_rate": 1.9758701352223598e-05, + "loss": 2.3826, + "step": 2571 + }, + { + "epoch": 0.09, + "grad_norm": 0.6677603721618652, + "learning_rate": 1.9758469212585526e-05, + "loss": 2.3814, + "step": 2572 + }, + { + "epoch": 0.09, + "grad_norm": 0.7409994006156921, + "learning_rate": 1.9758236962701986e-05, + "loss": 2.3438, + "step": 2573 + }, + { + "epoch": 0.09, + "grad_norm": 0.6682050228118896, + "learning_rate": 1.9758004602575604e-05, + "loss": 2.3731, + "step": 2574 + }, + { + "epoch": 0.09, + "grad_norm": 0.6983799934387207, + "learning_rate": 1.9757772132209005e-05, + "loss": 2.3769, + "step": 2575 + }, + { + "epoch": 0.09, + "grad_norm": 0.667367696762085, + "learning_rate": 1.9757539551604815e-05, + "loss": 2.4061, + "step": 2576 + }, + { + "epoch": 0.09, + "grad_norm": 0.6818742156028748, + "learning_rate": 1.9757306860765663e-05, + "loss": 2.2342, + "step": 2577 + }, + { + "epoch": 0.09, + "grad_norm": 0.6677143573760986, + "learning_rate": 1.9757074059694176e-05, + "loss": 2.3649, + "step": 2578 + }, + { + "epoch": 0.09, + "grad_norm": 0.6930766105651855, + "learning_rate": 1.9756841148392985e-05, + "loss": 2.3522, + "step": 2579 + }, + { + "epoch": 0.09, + "grad_norm": 0.6598162055015564, + "learning_rate": 1.9756608126864717e-05, + "loss": 2.3653, + "step": 2580 + }, + { + "epoch": 0.09, + "grad_norm": 0.6312028765678406, + "learning_rate": 1.9756374995112014e-05, + "loss": 2.3636, + "step": 2581 + }, + { + "epoch": 0.09, + "grad_norm": 0.6659961342811584, + "learning_rate": 1.97561417531375e-05, + "loss": 2.3344, + "step": 2582 + }, + { + "epoch": 0.09, + "grad_norm": 0.6427549719810486, + "learning_rate": 1.975590840094381e-05, + "loss": 2.3591, + "step": 2583 + }, + { + "epoch": 0.09, + "grad_norm": 0.6754583716392517, + "learning_rate": 1.9755674938533593e-05, + "loss": 2.3293, + "step": 2584 + }, + { + "epoch": 0.09, + "grad_norm": 0.6722887754440308, + "learning_rate": 1.9755441365909474e-05, + "loss": 2.3619, + "step": 2585 + }, + { + "epoch": 0.09, + "grad_norm": 0.6678455471992493, + "learning_rate": 1.9755207683074097e-05, + "loss": 2.3705, + "step": 2586 + }, + { + "epoch": 0.09, + "grad_norm": 0.6525406241416931, + "learning_rate": 1.9754973890030097e-05, + "loss": 2.3724, + "step": 2587 + }, + { + "epoch": 0.09, + "grad_norm": 0.6728523373603821, + "learning_rate": 1.9754739986780125e-05, + "loss": 2.41, + "step": 2588 + }, + { + "epoch": 0.09, + "grad_norm": 0.6756922006607056, + "learning_rate": 1.9754505973326816e-05, + "loss": 2.3362, + "step": 2589 + }, + { + "epoch": 0.09, + "grad_norm": 0.6535354256629944, + "learning_rate": 1.975427184967281e-05, + "loss": 2.3035, + "step": 2590 + }, + { + "epoch": 0.09, + "grad_norm": 0.6603698134422302, + "learning_rate": 1.975403761582076e-05, + "loss": 2.3197, + "step": 2591 + }, + { + "epoch": 0.09, + "grad_norm": 0.6813551783561707, + "learning_rate": 1.975380327177331e-05, + "loss": 2.3345, + "step": 2592 + }, + { + "epoch": 0.09, + "grad_norm": 0.6722380518913269, + "learning_rate": 1.975356881753311e-05, + "loss": 2.3617, + "step": 2593 + }, + { + "epoch": 0.09, + "grad_norm": 0.6549121737480164, + "learning_rate": 1.9753334253102802e-05, + "loss": 2.3345, + "step": 2594 + }, + { + "epoch": 0.09, + "grad_norm": 0.7044191360473633, + "learning_rate": 1.975309957848504e-05, + "loss": 2.3977, + "step": 2595 + }, + { + "epoch": 0.09, + "grad_norm": 0.6986442804336548, + "learning_rate": 1.9752864793682476e-05, + "loss": 2.4535, + "step": 2596 + }, + { + "epoch": 0.09, + "grad_norm": 0.7342276573181152, + "learning_rate": 1.975262989869776e-05, + "loss": 2.3951, + "step": 2597 + }, + { + "epoch": 0.09, + "grad_norm": 0.644694447517395, + "learning_rate": 1.9752394893533546e-05, + "loss": 2.3238, + "step": 2598 + }, + { + "epoch": 0.09, + "grad_norm": 0.6654594540596008, + "learning_rate": 1.975215977819249e-05, + "loss": 2.3113, + "step": 2599 + }, + { + "epoch": 0.09, + "grad_norm": 0.6604269742965698, + "learning_rate": 1.9751924552677254e-05, + "loss": 2.3596, + "step": 2600 + }, + { + "epoch": 0.09, + "grad_norm": 0.675030529499054, + "learning_rate": 1.9751689216990484e-05, + "loss": 2.2923, + "step": 2601 + }, + { + "epoch": 0.09, + "grad_norm": 0.7175413370132446, + "learning_rate": 1.9751453771134845e-05, + "loss": 2.3172, + "step": 2602 + }, + { + "epoch": 0.09, + "grad_norm": 0.6582487225532532, + "learning_rate": 1.9751218215112996e-05, + "loss": 2.3568, + "step": 2603 + }, + { + "epoch": 0.09, + "grad_norm": 0.6553653478622437, + "learning_rate": 1.97509825489276e-05, + "loss": 2.3252, + "step": 2604 + }, + { + "epoch": 0.09, + "grad_norm": 0.6520278453826904, + "learning_rate": 1.975074677258132e-05, + "loss": 2.2719, + "step": 2605 + }, + { + "epoch": 0.09, + "grad_norm": 0.6835995316505432, + "learning_rate": 1.975051088607681e-05, + "loss": 2.2891, + "step": 2606 + }, + { + "epoch": 0.09, + "grad_norm": 0.6756531596183777, + "learning_rate": 1.9750274889416746e-05, + "loss": 2.2676, + "step": 2607 + }, + { + "epoch": 0.09, + "grad_norm": 0.6604465842247009, + "learning_rate": 1.9750038782603792e-05, + "loss": 2.3499, + "step": 2608 + }, + { + "epoch": 0.09, + "grad_norm": 0.6575184464454651, + "learning_rate": 1.9749802565640614e-05, + "loss": 2.336, + "step": 2609 + }, + { + "epoch": 0.09, + "grad_norm": 0.6695541143417358, + "learning_rate": 1.9749566238529877e-05, + "loss": 2.3244, + "step": 2610 + }, + { + "epoch": 0.09, + "grad_norm": 0.6648885011672974, + "learning_rate": 1.974932980127426e-05, + "loss": 2.3452, + "step": 2611 + }, + { + "epoch": 0.09, + "grad_norm": 0.6662595868110657, + "learning_rate": 1.9749093253876426e-05, + "loss": 2.2842, + "step": 2612 + }, + { + "epoch": 0.09, + "grad_norm": 0.6614744663238525, + "learning_rate": 1.9748856596339048e-05, + "loss": 2.3319, + "step": 2613 + }, + { + "epoch": 0.09, + "grad_norm": 0.6691503524780273, + "learning_rate": 1.9748619828664805e-05, + "loss": 2.2617, + "step": 2614 + }, + { + "epoch": 0.09, + "grad_norm": 0.6675350666046143, + "learning_rate": 1.9748382950856367e-05, + "loss": 2.2923, + "step": 2615 + }, + { + "epoch": 0.09, + "grad_norm": 0.6864911913871765, + "learning_rate": 1.974814596291641e-05, + "loss": 2.3235, + "step": 2616 + }, + { + "epoch": 0.09, + "grad_norm": 0.6606498956680298, + "learning_rate": 1.9747908864847615e-05, + "loss": 2.3297, + "step": 2617 + }, + { + "epoch": 0.09, + "grad_norm": 0.6963382363319397, + "learning_rate": 1.9747671656652654e-05, + "loss": 2.3068, + "step": 2618 + }, + { + "epoch": 0.09, + "grad_norm": 0.7013143301010132, + "learning_rate": 1.974743433833422e-05, + "loss": 2.3221, + "step": 2619 + }, + { + "epoch": 0.09, + "grad_norm": 0.6671189665794373, + "learning_rate": 1.9747196909894978e-05, + "loss": 2.2847, + "step": 2620 + }, + { + "epoch": 0.09, + "grad_norm": 0.7117622494697571, + "learning_rate": 1.9746959371337618e-05, + "loss": 2.2751, + "step": 2621 + }, + { + "epoch": 0.09, + "grad_norm": 0.6652538180351257, + "learning_rate": 1.9746721722664827e-05, + "loss": 2.3045, + "step": 2622 + }, + { + "epoch": 0.09, + "grad_norm": 0.692873477935791, + "learning_rate": 1.9746483963879287e-05, + "loss": 2.3351, + "step": 2623 + }, + { + "epoch": 0.09, + "grad_norm": 0.6478108167648315, + "learning_rate": 1.974624609498368e-05, + "loss": 2.306, + "step": 2624 + }, + { + "epoch": 0.09, + "grad_norm": 0.6797033548355103, + "learning_rate": 1.9746008115980693e-05, + "loss": 2.268, + "step": 2625 + }, + { + "epoch": 0.09, + "grad_norm": 0.7153365015983582, + "learning_rate": 1.9745770026873026e-05, + "loss": 2.429, + "step": 2626 + }, + { + "epoch": 0.09, + "grad_norm": 0.6668415665626526, + "learning_rate": 1.9745531827663354e-05, + "loss": 2.3347, + "step": 2627 + }, + { + "epoch": 0.09, + "grad_norm": 0.6554449200630188, + "learning_rate": 1.974529351835438e-05, + "loss": 2.3302, + "step": 2628 + }, + { + "epoch": 0.09, + "grad_norm": 0.68616783618927, + "learning_rate": 1.9745055098948785e-05, + "loss": 2.3398, + "step": 2629 + }, + { + "epoch": 0.09, + "grad_norm": 0.6684591770172119, + "learning_rate": 1.9744816569449274e-05, + "loss": 2.3457, + "step": 2630 + }, + { + "epoch": 0.09, + "grad_norm": 0.6995184421539307, + "learning_rate": 1.9744577929858537e-05, + "loss": 2.4529, + "step": 2631 + }, + { + "epoch": 0.09, + "grad_norm": 0.6782556772232056, + "learning_rate": 1.9744339180179266e-05, + "loss": 2.3126, + "step": 2632 + }, + { + "epoch": 0.09, + "grad_norm": 0.6802340149879456, + "learning_rate": 1.9744100320414166e-05, + "loss": 2.3574, + "step": 2633 + }, + { + "epoch": 0.09, + "grad_norm": 0.6853451132774353, + "learning_rate": 1.9743861350565926e-05, + "loss": 2.3394, + "step": 2634 + }, + { + "epoch": 0.09, + "grad_norm": 0.6779966354370117, + "learning_rate": 1.9743622270637254e-05, + "loss": 2.3204, + "step": 2635 + }, + { + "epoch": 0.09, + "grad_norm": 0.6626641154289246, + "learning_rate": 1.9743383080630847e-05, + "loss": 2.3263, + "step": 2636 + }, + { + "epoch": 0.09, + "grad_norm": 0.6955687403678894, + "learning_rate": 1.974314378054941e-05, + "loss": 2.3328, + "step": 2637 + }, + { + "epoch": 0.09, + "grad_norm": 0.688758134841919, + "learning_rate": 1.974290437039564e-05, + "loss": 2.3672, + "step": 2638 + }, + { + "epoch": 0.09, + "grad_norm": 0.6622840166091919, + "learning_rate": 1.974266485017225e-05, + "loss": 2.317, + "step": 2639 + }, + { + "epoch": 0.09, + "grad_norm": 0.693135142326355, + "learning_rate": 1.974242521988194e-05, + "loss": 2.3454, + "step": 2640 + }, + { + "epoch": 0.09, + "grad_norm": 0.6840419173240662, + "learning_rate": 1.9742185479527423e-05, + "loss": 2.4075, + "step": 2641 + }, + { + "epoch": 0.09, + "grad_norm": 0.673745334148407, + "learning_rate": 1.9741945629111402e-05, + "loss": 2.3096, + "step": 2642 + }, + { + "epoch": 0.09, + "grad_norm": 0.6978880167007446, + "learning_rate": 1.974170566863659e-05, + "loss": 2.331, + "step": 2643 + }, + { + "epoch": 0.09, + "grad_norm": 0.6693536043167114, + "learning_rate": 1.9741465598105693e-05, + "loss": 2.3699, + "step": 2644 + }, + { + "epoch": 0.09, + "grad_norm": 0.7083197832107544, + "learning_rate": 1.9741225417521427e-05, + "loss": 2.3074, + "step": 2645 + }, + { + "epoch": 0.09, + "grad_norm": 0.660619854927063, + "learning_rate": 1.974098512688651e-05, + "loss": 2.3456, + "step": 2646 + }, + { + "epoch": 0.09, + "grad_norm": 0.6895096898078918, + "learning_rate": 1.9740744726203646e-05, + "loss": 2.3467, + "step": 2647 + }, + { + "epoch": 0.09, + "grad_norm": 0.6936542391777039, + "learning_rate": 1.9740504215475562e-05, + "loss": 2.3278, + "step": 2648 + }, + { + "epoch": 0.09, + "grad_norm": 0.6921387314796448, + "learning_rate": 1.9740263594704966e-05, + "loss": 2.3043, + "step": 2649 + }, + { + "epoch": 0.09, + "grad_norm": 0.686497688293457, + "learning_rate": 1.9740022863894582e-05, + "loss": 2.2841, + "step": 2650 + }, + { + "epoch": 0.09, + "grad_norm": 0.6849023103713989, + "learning_rate": 1.973978202304713e-05, + "loss": 2.3904, + "step": 2651 + }, + { + "epoch": 0.09, + "grad_norm": 0.6534825563430786, + "learning_rate": 1.9739541072165325e-05, + "loss": 2.3281, + "step": 2652 + }, + { + "epoch": 0.09, + "grad_norm": 0.684870183467865, + "learning_rate": 1.9739300011251893e-05, + "loss": 2.2653, + "step": 2653 + }, + { + "epoch": 0.09, + "grad_norm": 0.6593223214149475, + "learning_rate": 1.973905884030956e-05, + "loss": 2.2932, + "step": 2654 + }, + { + "epoch": 0.09, + "grad_norm": 0.7248606085777283, + "learning_rate": 1.9738817559341044e-05, + "loss": 2.3455, + "step": 2655 + }, + { + "epoch": 0.09, + "grad_norm": 0.6495844721794128, + "learning_rate": 1.973857616834908e-05, + "loss": 2.3143, + "step": 2656 + }, + { + "epoch": 0.09, + "grad_norm": 0.6471594572067261, + "learning_rate": 1.9738334667336385e-05, + "loss": 2.3243, + "step": 2657 + }, + { + "epoch": 0.09, + "grad_norm": 0.7020214200019836, + "learning_rate": 1.9738093056305693e-05, + "loss": 2.3331, + "step": 2658 + }, + { + "epoch": 0.09, + "grad_norm": 0.6698110699653625, + "learning_rate": 1.9737851335259736e-05, + "loss": 2.3643, + "step": 2659 + }, + { + "epoch": 0.09, + "grad_norm": 0.6480512022972107, + "learning_rate": 1.9737609504201238e-05, + "loss": 2.284, + "step": 2660 + }, + { + "epoch": 0.09, + "grad_norm": 0.6656991243362427, + "learning_rate": 1.9737367563132936e-05, + "loss": 2.2823, + "step": 2661 + }, + { + "epoch": 0.09, + "grad_norm": 0.7015044689178467, + "learning_rate": 1.9737125512057562e-05, + "loss": 2.2916, + "step": 2662 + }, + { + "epoch": 0.09, + "grad_norm": 0.692518949508667, + "learning_rate": 1.973688335097785e-05, + "loss": 2.3405, + "step": 2663 + }, + { + "epoch": 0.09, + "grad_norm": 0.6857185959815979, + "learning_rate": 1.9736641079896537e-05, + "loss": 2.2824, + "step": 2664 + }, + { + "epoch": 0.09, + "grad_norm": 0.6690720319747925, + "learning_rate": 1.9736398698816358e-05, + "loss": 2.274, + "step": 2665 + }, + { + "epoch": 0.09, + "grad_norm": 0.6870160698890686, + "learning_rate": 1.9736156207740054e-05, + "loss": 2.4095, + "step": 2666 + }, + { + "epoch": 0.09, + "grad_norm": 0.680374026298523, + "learning_rate": 1.973591360667036e-05, + "loss": 2.3454, + "step": 2667 + }, + { + "epoch": 0.09, + "grad_norm": 0.7228624820709229, + "learning_rate": 1.9735670895610025e-05, + "loss": 2.4323, + "step": 2668 + }, + { + "epoch": 0.09, + "grad_norm": 0.6820529103279114, + "learning_rate": 1.9735428074561784e-05, + "loss": 2.3229, + "step": 2669 + }, + { + "epoch": 0.09, + "grad_norm": 0.6576327681541443, + "learning_rate": 1.973518514352838e-05, + "loss": 2.3514, + "step": 2670 + }, + { + "epoch": 0.09, + "grad_norm": 0.73012375831604, + "learning_rate": 1.9734942102512564e-05, + "loss": 2.2817, + "step": 2671 + }, + { + "epoch": 0.09, + "grad_norm": 0.7029129266738892, + "learning_rate": 1.9734698951517074e-05, + "loss": 2.2584, + "step": 2672 + }, + { + "epoch": 0.09, + "grad_norm": 0.6998592615127563, + "learning_rate": 1.973445569054466e-05, + "loss": 2.366, + "step": 2673 + }, + { + "epoch": 0.09, + "grad_norm": 0.6375428438186646, + "learning_rate": 1.973421231959807e-05, + "loss": 2.3533, + "step": 2674 + }, + { + "epoch": 0.09, + "grad_norm": 0.6980307102203369, + "learning_rate": 1.9733968838680057e-05, + "loss": 2.3354, + "step": 2675 + }, + { + "epoch": 0.09, + "grad_norm": 0.6800624132156372, + "learning_rate": 1.973372524779337e-05, + "loss": 2.3186, + "step": 2676 + }, + { + "epoch": 0.09, + "grad_norm": 0.6651679873466492, + "learning_rate": 1.9733481546940753e-05, + "loss": 2.313, + "step": 2677 + }, + { + "epoch": 0.09, + "grad_norm": 0.6906301975250244, + "learning_rate": 1.973323773612497e-05, + "loss": 2.4072, + "step": 2678 + }, + { + "epoch": 0.09, + "grad_norm": 0.6820117831230164, + "learning_rate": 1.973299381534877e-05, + "loss": 2.3682, + "step": 2679 + }, + { + "epoch": 0.09, + "grad_norm": 0.6804512143135071, + "learning_rate": 1.9732749784614912e-05, + "loss": 2.3221, + "step": 2680 + }, + { + "epoch": 0.09, + "grad_norm": 0.6502261757850647, + "learning_rate": 1.973250564392615e-05, + "loss": 2.3421, + "step": 2681 + }, + { + "epoch": 0.09, + "grad_norm": 0.6732829809188843, + "learning_rate": 1.9732261393285245e-05, + "loss": 2.3534, + "step": 2682 + }, + { + "epoch": 0.09, + "grad_norm": 0.6769009828567505, + "learning_rate": 1.9732017032694953e-05, + "loss": 2.349, + "step": 2683 + }, + { + "epoch": 0.09, + "grad_norm": 0.7115375995635986, + "learning_rate": 1.9731772562158033e-05, + "loss": 2.3782, + "step": 2684 + }, + { + "epoch": 0.09, + "grad_norm": 0.702756941318512, + "learning_rate": 1.9731527981677253e-05, + "loss": 2.4245, + "step": 2685 + }, + { + "epoch": 0.09, + "grad_norm": 0.6708562970161438, + "learning_rate": 1.973128329125537e-05, + "loss": 2.3797, + "step": 2686 + }, + { + "epoch": 0.09, + "grad_norm": 0.6474363207817078, + "learning_rate": 1.9731038490895155e-05, + "loss": 2.337, + "step": 2687 + }, + { + "epoch": 0.09, + "grad_norm": 0.7877047061920166, + "learning_rate": 1.973079358059937e-05, + "loss": 2.313, + "step": 2688 + }, + { + "epoch": 0.09, + "grad_norm": 0.6766756176948547, + "learning_rate": 1.9730548560370782e-05, + "loss": 2.2859, + "step": 2689 + }, + { + "epoch": 0.09, + "grad_norm": 0.6642561554908752, + "learning_rate": 1.9730303430212155e-05, + "loss": 2.2982, + "step": 2690 + }, + { + "epoch": 0.09, + "grad_norm": 0.6470698118209839, + "learning_rate": 1.973005819012627e-05, + "loss": 2.3217, + "step": 2691 + }, + { + "epoch": 0.09, + "grad_norm": 0.6641038060188293, + "learning_rate": 1.972981284011588e-05, + "loss": 2.2844, + "step": 2692 + }, + { + "epoch": 0.09, + "grad_norm": 0.7244064211845398, + "learning_rate": 1.9729567380183777e-05, + "loss": 2.2948, + "step": 2693 + }, + { + "epoch": 0.09, + "grad_norm": 0.6843678951263428, + "learning_rate": 1.972932181033272e-05, + "loss": 2.363, + "step": 2694 + }, + { + "epoch": 0.09, + "grad_norm": 0.6606845259666443, + "learning_rate": 1.9729076130565482e-05, + "loss": 2.3669, + "step": 2695 + }, + { + "epoch": 0.09, + "grad_norm": 0.6558694839477539, + "learning_rate": 1.972883034088485e-05, + "loss": 2.4103, + "step": 2696 + }, + { + "epoch": 0.09, + "grad_norm": 0.6615455746650696, + "learning_rate": 1.9728584441293594e-05, + "loss": 2.3222, + "step": 2697 + }, + { + "epoch": 0.09, + "grad_norm": 0.6603631377220154, + "learning_rate": 1.9728338431794485e-05, + "loss": 2.3586, + "step": 2698 + }, + { + "epoch": 0.09, + "grad_norm": 0.6658846139907837, + "learning_rate": 1.9728092312390318e-05, + "loss": 2.2742, + "step": 2699 + }, + { + "epoch": 0.09, + "grad_norm": 0.6689095497131348, + "learning_rate": 1.9727846083083863e-05, + "loss": 2.2687, + "step": 2700 + }, + { + "epoch": 0.09, + "grad_norm": 0.6557187438011169, + "learning_rate": 1.97275997438779e-05, + "loss": 2.318, + "step": 2701 + }, + { + "epoch": 0.09, + "grad_norm": 0.6632029414176941, + "learning_rate": 1.9727353294775223e-05, + "loss": 2.3758, + "step": 2702 + }, + { + "epoch": 0.09, + "grad_norm": 0.6535089612007141, + "learning_rate": 1.9727106735778604e-05, + "loss": 2.3527, + "step": 2703 + }, + { + "epoch": 0.09, + "grad_norm": 0.6508504152297974, + "learning_rate": 1.972686006689084e-05, + "loss": 2.3167, + "step": 2704 + }, + { + "epoch": 0.09, + "grad_norm": 0.6619318723678589, + "learning_rate": 1.9726613288114707e-05, + "loss": 2.3414, + "step": 2705 + }, + { + "epoch": 0.09, + "grad_norm": 0.6698011755943298, + "learning_rate": 1.9726366399452998e-05, + "loss": 2.3451, + "step": 2706 + }, + { + "epoch": 0.09, + "grad_norm": 0.6627507209777832, + "learning_rate": 1.97261194009085e-05, + "loss": 2.3602, + "step": 2707 + }, + { + "epoch": 0.09, + "grad_norm": 0.6537807583808899, + "learning_rate": 1.9725872292484004e-05, + "loss": 2.313, + "step": 2708 + }, + { + "epoch": 0.09, + "grad_norm": 0.6808216571807861, + "learning_rate": 1.9725625074182307e-05, + "loss": 2.3337, + "step": 2709 + }, + { + "epoch": 0.09, + "grad_norm": 0.7131608724594116, + "learning_rate": 1.9725377746006196e-05, + "loss": 2.3667, + "step": 2710 + }, + { + "epoch": 0.09, + "grad_norm": 0.6554555296897888, + "learning_rate": 1.9725130307958466e-05, + "loss": 2.3328, + "step": 2711 + }, + { + "epoch": 0.09, + "grad_norm": 0.6939631104469299, + "learning_rate": 1.9724882760041914e-05, + "loss": 2.2983, + "step": 2712 + }, + { + "epoch": 0.09, + "grad_norm": 0.6824575066566467, + "learning_rate": 1.9724635102259337e-05, + "loss": 2.3407, + "step": 2713 + }, + { + "epoch": 0.09, + "grad_norm": 0.6878491044044495, + "learning_rate": 1.972438733461353e-05, + "loss": 2.3289, + "step": 2714 + }, + { + "epoch": 0.09, + "grad_norm": 0.7263282537460327, + "learning_rate": 1.9724139457107293e-05, + "loss": 2.378, + "step": 2715 + }, + { + "epoch": 0.09, + "grad_norm": 0.6850353479385376, + "learning_rate": 1.972389146974343e-05, + "loss": 2.317, + "step": 2716 + }, + { + "epoch": 0.09, + "grad_norm": 0.6950661540031433, + "learning_rate": 1.9723643372524737e-05, + "loss": 2.3665, + "step": 2717 + }, + { + "epoch": 0.09, + "grad_norm": 0.6544163823127747, + "learning_rate": 1.9723395165454023e-05, + "loss": 2.2455, + "step": 2718 + }, + { + "epoch": 0.09, + "grad_norm": 0.6603690981864929, + "learning_rate": 1.9723146848534086e-05, + "loss": 2.3369, + "step": 2719 + }, + { + "epoch": 0.09, + "grad_norm": 0.7105359435081482, + "learning_rate": 1.972289842176774e-05, + "loss": 2.2781, + "step": 2720 + }, + { + "epoch": 0.09, + "grad_norm": 0.6735163927078247, + "learning_rate": 1.972264988515778e-05, + "loss": 2.3244, + "step": 2721 + }, + { + "epoch": 0.09, + "grad_norm": 0.679388701915741, + "learning_rate": 1.972240123870702e-05, + "loss": 2.3253, + "step": 2722 + }, + { + "epoch": 0.09, + "grad_norm": 0.7158270478248596, + "learning_rate": 1.972215248241827e-05, + "loss": 2.3268, + "step": 2723 + }, + { + "epoch": 0.09, + "grad_norm": 0.6961209177970886, + "learning_rate": 1.972190361629434e-05, + "loss": 2.3232, + "step": 2724 + }, + { + "epoch": 0.09, + "grad_norm": 0.6575139164924622, + "learning_rate": 1.972165464033804e-05, + "loss": 2.3428, + "step": 2725 + }, + { + "epoch": 0.09, + "grad_norm": 0.6724652051925659, + "learning_rate": 1.9721405554552184e-05, + "loss": 2.3171, + "step": 2726 + }, + { + "epoch": 0.09, + "grad_norm": 0.7010536789894104, + "learning_rate": 1.9721156358939583e-05, + "loss": 2.3046, + "step": 2727 + }, + { + "epoch": 0.09, + "grad_norm": 0.676328718662262, + "learning_rate": 1.9720907053503055e-05, + "loss": 2.325, + "step": 2728 + }, + { + "epoch": 0.09, + "grad_norm": 0.6754179000854492, + "learning_rate": 1.9720657638245416e-05, + "loss": 2.4289, + "step": 2729 + }, + { + "epoch": 0.09, + "grad_norm": 0.6611103415489197, + "learning_rate": 1.9720408113169484e-05, + "loss": 2.2179, + "step": 2730 + }, + { + "epoch": 0.09, + "grad_norm": 0.6659634709358215, + "learning_rate": 1.972015847827808e-05, + "loss": 2.2713, + "step": 2731 + }, + { + "epoch": 0.09, + "grad_norm": 0.658402144908905, + "learning_rate": 1.971990873357402e-05, + "loss": 2.3174, + "step": 2732 + }, + { + "epoch": 0.09, + "grad_norm": 0.6764083504676819, + "learning_rate": 1.971965887906013e-05, + "loss": 2.2581, + "step": 2733 + }, + { + "epoch": 0.09, + "grad_norm": 0.6838564872741699, + "learning_rate": 1.971940891473923e-05, + "loss": 2.3845, + "step": 2734 + }, + { + "epoch": 0.09, + "grad_norm": 0.664307177066803, + "learning_rate": 1.971915884061414e-05, + "loss": 2.3254, + "step": 2735 + }, + { + "epoch": 0.09, + "grad_norm": 0.6847516298294067, + "learning_rate": 1.9718908656687694e-05, + "loss": 2.3191, + "step": 2736 + }, + { + "epoch": 0.09, + "grad_norm": 0.6980885863304138, + "learning_rate": 1.9718658362962708e-05, + "loss": 2.3384, + "step": 2737 + }, + { + "epoch": 0.09, + "grad_norm": 0.7042725682258606, + "learning_rate": 1.9718407959442022e-05, + "loss": 2.3456, + "step": 2738 + }, + { + "epoch": 0.09, + "grad_norm": 0.684991717338562, + "learning_rate": 1.9718157446128458e-05, + "loss": 2.3108, + "step": 2739 + }, + { + "epoch": 0.09, + "grad_norm": 0.64825439453125, + "learning_rate": 1.971790682302484e-05, + "loss": 2.2857, + "step": 2740 + }, + { + "epoch": 0.09, + "grad_norm": 0.6802736520767212, + "learning_rate": 1.971765609013401e-05, + "loss": 2.2591, + "step": 2741 + }, + { + "epoch": 0.09, + "grad_norm": 0.6619553565979004, + "learning_rate": 1.9717405247458797e-05, + "loss": 2.3043, + "step": 2742 + }, + { + "epoch": 0.09, + "grad_norm": 0.7146410942077637, + "learning_rate": 1.9717154295002035e-05, + "loss": 2.3555, + "step": 2743 + }, + { + "epoch": 0.09, + "grad_norm": 0.6785635948181152, + "learning_rate": 1.971690323276656e-05, + "loss": 2.2608, + "step": 2744 + }, + { + "epoch": 0.09, + "grad_norm": 0.6665732860565186, + "learning_rate": 1.9716652060755206e-05, + "loss": 2.4102, + "step": 2745 + }, + { + "epoch": 0.09, + "grad_norm": 0.6612725257873535, + "learning_rate": 1.9716400778970807e-05, + "loss": 2.3051, + "step": 2746 + }, + { + "epoch": 0.09, + "grad_norm": 0.6648546457290649, + "learning_rate": 1.9716149387416214e-05, + "loss": 2.3456, + "step": 2747 + }, + { + "epoch": 0.09, + "grad_norm": 0.6508253812789917, + "learning_rate": 1.971589788609425e-05, + "loss": 2.3343, + "step": 2748 + }, + { + "epoch": 0.09, + "grad_norm": 0.7162386775016785, + "learning_rate": 1.9715646275007773e-05, + "loss": 2.3702, + "step": 2749 + }, + { + "epoch": 0.09, + "grad_norm": 0.6513245105743408, + "learning_rate": 1.9715394554159618e-05, + "loss": 2.3236, + "step": 2750 + }, + { + "epoch": 0.09, + "grad_norm": 0.7057669758796692, + "learning_rate": 1.971514272355263e-05, + "loss": 2.2712, + "step": 2751 + }, + { + "epoch": 0.09, + "grad_norm": 0.6696560382843018, + "learning_rate": 1.9714890783189647e-05, + "loss": 2.3212, + "step": 2752 + }, + { + "epoch": 0.09, + "grad_norm": 0.6656703948974609, + "learning_rate": 1.9714638733073524e-05, + "loss": 2.2859, + "step": 2753 + }, + { + "epoch": 0.09, + "grad_norm": 0.6675062775611877, + "learning_rate": 1.9714386573207108e-05, + "loss": 2.3799, + "step": 2754 + }, + { + "epoch": 0.09, + "grad_norm": 0.6664295196533203, + "learning_rate": 1.9714134303593245e-05, + "loss": 2.3152, + "step": 2755 + }, + { + "epoch": 0.09, + "grad_norm": 0.6889835000038147, + "learning_rate": 1.9713881924234785e-05, + "loss": 2.2728, + "step": 2756 + }, + { + "epoch": 0.09, + "grad_norm": 0.713476836681366, + "learning_rate": 1.9713629435134584e-05, + "loss": 2.318, + "step": 2757 + }, + { + "epoch": 0.09, + "grad_norm": 0.6531257033348083, + "learning_rate": 1.9713376836295484e-05, + "loss": 2.2585, + "step": 2758 + }, + { + "epoch": 0.09, + "grad_norm": 0.693151593208313, + "learning_rate": 1.9713124127720346e-05, + "loss": 2.3006, + "step": 2759 + }, + { + "epoch": 0.09, + "grad_norm": 0.6975042819976807, + "learning_rate": 1.9712871309412028e-05, + "loss": 2.2839, + "step": 2760 + }, + { + "epoch": 0.09, + "grad_norm": 0.6617156267166138, + "learning_rate": 1.971261838137338e-05, + "loss": 2.3373, + "step": 2761 + }, + { + "epoch": 0.09, + "grad_norm": 0.7235597968101501, + "learning_rate": 1.9712365343607262e-05, + "loss": 2.3684, + "step": 2762 + }, + { + "epoch": 0.09, + "grad_norm": 0.710724949836731, + "learning_rate": 1.9712112196116533e-05, + "loss": 2.3078, + "step": 2763 + }, + { + "epoch": 0.09, + "grad_norm": 0.67494797706604, + "learning_rate": 1.971185893890405e-05, + "loss": 2.337, + "step": 2764 + }, + { + "epoch": 0.09, + "grad_norm": 0.6771137118339539, + "learning_rate": 1.971160557197268e-05, + "loss": 2.3543, + "step": 2765 + }, + { + "epoch": 0.09, + "grad_norm": 0.6714797616004944, + "learning_rate": 1.971135209532528e-05, + "loss": 2.2936, + "step": 2766 + }, + { + "epoch": 0.09, + "grad_norm": 0.6499322652816772, + "learning_rate": 1.9711098508964713e-05, + "loss": 2.2439, + "step": 2767 + }, + { + "epoch": 0.09, + "grad_norm": 0.686851441860199, + "learning_rate": 1.9710844812893848e-05, + "loss": 2.2578, + "step": 2768 + }, + { + "epoch": 0.09, + "grad_norm": 0.6743660569190979, + "learning_rate": 1.971059100711555e-05, + "loss": 2.3012, + "step": 2769 + }, + { + "epoch": 0.09, + "grad_norm": 0.6434255242347717, + "learning_rate": 1.9710337091632686e-05, + "loss": 2.3013, + "step": 2770 + }, + { + "epoch": 0.09, + "grad_norm": 0.692061185836792, + "learning_rate": 1.971008306644812e-05, + "loss": 2.2622, + "step": 2771 + }, + { + "epoch": 0.09, + "grad_norm": 0.6664226055145264, + "learning_rate": 1.970982893156473e-05, + "loss": 2.3594, + "step": 2772 + }, + { + "epoch": 0.09, + "grad_norm": 0.6570258140563965, + "learning_rate": 1.9709574686985388e-05, + "loss": 2.2674, + "step": 2773 + }, + { + "epoch": 0.09, + "grad_norm": 0.6971666812896729, + "learning_rate": 1.9709320332712955e-05, + "loss": 2.3696, + "step": 2774 + }, + { + "epoch": 0.09, + "grad_norm": 0.6769872903823853, + "learning_rate": 1.970906586875031e-05, + "loss": 2.3101, + "step": 2775 + }, + { + "epoch": 0.09, + "grad_norm": 0.6789970993995667, + "learning_rate": 1.9708811295100333e-05, + "loss": 2.3187, + "step": 2776 + }, + { + "epoch": 0.09, + "grad_norm": 0.6811920404434204, + "learning_rate": 1.9708556611765896e-05, + "loss": 2.2682, + "step": 2777 + }, + { + "epoch": 0.09, + "grad_norm": 0.6698407530784607, + "learning_rate": 1.9708301818749875e-05, + "loss": 2.2877, + "step": 2778 + }, + { + "epoch": 0.09, + "grad_norm": 0.666793167591095, + "learning_rate": 1.970804691605515e-05, + "loss": 2.3332, + "step": 2779 + }, + { + "epoch": 0.09, + "grad_norm": 0.697039008140564, + "learning_rate": 1.97077919036846e-05, + "loss": 2.3361, + "step": 2780 + }, + { + "epoch": 0.09, + "grad_norm": 0.6642515063285828, + "learning_rate": 1.9707536781641105e-05, + "loss": 2.2603, + "step": 2781 + }, + { + "epoch": 0.09, + "grad_norm": 0.6724225878715515, + "learning_rate": 1.970728154992755e-05, + "loss": 2.3526, + "step": 2782 + }, + { + "epoch": 0.09, + "grad_norm": 0.6646852493286133, + "learning_rate": 1.9707026208546817e-05, + "loss": 2.3927, + "step": 2783 + }, + { + "epoch": 0.09, + "grad_norm": 0.6520638465881348, + "learning_rate": 1.9706770757501796e-05, + "loss": 2.3084, + "step": 2784 + }, + { + "epoch": 0.09, + "grad_norm": 0.6691730618476868, + "learning_rate": 1.970651519679536e-05, + "loss": 2.3428, + "step": 2785 + }, + { + "epoch": 0.09, + "grad_norm": 0.6513739824295044, + "learning_rate": 1.9706259526430407e-05, + "loss": 2.2991, + "step": 2786 + }, + { + "epoch": 0.09, + "grad_norm": 0.7030876278877258, + "learning_rate": 1.970600374640982e-05, + "loss": 2.3405, + "step": 2787 + }, + { + "epoch": 0.09, + "grad_norm": 0.7199677228927612, + "learning_rate": 1.9705747856736494e-05, + "loss": 2.3145, + "step": 2788 + }, + { + "epoch": 0.09, + "grad_norm": 0.6951326131820679, + "learning_rate": 1.9705491857413314e-05, + "loss": 2.3813, + "step": 2789 + }, + { + "epoch": 0.09, + "grad_norm": 0.6960110664367676, + "learning_rate": 1.9705235748443176e-05, + "loss": 2.3517, + "step": 2790 + }, + { + "epoch": 0.09, + "grad_norm": 0.6430880427360535, + "learning_rate": 1.9704979529828976e-05, + "loss": 2.335, + "step": 2791 + }, + { + "epoch": 0.09, + "grad_norm": 0.6669966578483582, + "learning_rate": 1.9704723201573598e-05, + "loss": 2.3209, + "step": 2792 + }, + { + "epoch": 0.09, + "grad_norm": 0.7018842697143555, + "learning_rate": 1.970446676367995e-05, + "loss": 2.2517, + "step": 2793 + }, + { + "epoch": 0.09, + "grad_norm": 0.7080121040344238, + "learning_rate": 1.9704210216150927e-05, + "loss": 2.3463, + "step": 2794 + }, + { + "epoch": 0.09, + "grad_norm": 0.6576233506202698, + "learning_rate": 1.9703953558989413e-05, + "loss": 2.3411, + "step": 2795 + }, + { + "epoch": 0.09, + "grad_norm": 0.7154086828231812, + "learning_rate": 1.970369679219833e-05, + "loss": 2.3039, + "step": 2796 + }, + { + "epoch": 0.09, + "grad_norm": 0.6594984531402588, + "learning_rate": 1.9703439915780562e-05, + "loss": 2.3591, + "step": 2797 + }, + { + "epoch": 0.09, + "grad_norm": 0.6829261779785156, + "learning_rate": 1.970318292973902e-05, + "loss": 2.3764, + "step": 2798 + }, + { + "epoch": 0.09, + "grad_norm": 0.6986505389213562, + "learning_rate": 1.9702925834076598e-05, + "loss": 2.3564, + "step": 2799 + }, + { + "epoch": 0.09, + "grad_norm": 0.7279203534126282, + "learning_rate": 1.9702668628796212e-05, + "loss": 2.2904, + "step": 2800 + }, + { + "epoch": 0.09, + "grad_norm": 0.6733790636062622, + "learning_rate": 1.970241131390076e-05, + "loss": 2.2949, + "step": 2801 + }, + { + "epoch": 0.09, + "grad_norm": 0.6831963658332825, + "learning_rate": 1.970215388939315e-05, + "loss": 2.3072, + "step": 2802 + }, + { + "epoch": 0.09, + "grad_norm": 0.6766725778579712, + "learning_rate": 1.9701896355276292e-05, + "loss": 2.2128, + "step": 2803 + }, + { + "epoch": 0.09, + "grad_norm": 0.6662180423736572, + "learning_rate": 1.9701638711553095e-05, + "loss": 2.3418, + "step": 2804 + }, + { + "epoch": 0.09, + "grad_norm": 0.6496817469596863, + "learning_rate": 1.9701380958226472e-05, + "loss": 2.3203, + "step": 2805 + }, + { + "epoch": 0.09, + "grad_norm": 0.6867324709892273, + "learning_rate": 1.970112309529933e-05, + "loss": 2.3713, + "step": 2806 + }, + { + "epoch": 0.09, + "grad_norm": 0.665769636631012, + "learning_rate": 1.9700865122774587e-05, + "loss": 2.3942, + "step": 2807 + }, + { + "epoch": 0.09, + "grad_norm": 0.6567417979240417, + "learning_rate": 1.970060704065515e-05, + "loss": 2.2627, + "step": 2808 + }, + { + "epoch": 0.09, + "grad_norm": 0.6605551838874817, + "learning_rate": 1.9700348848943945e-05, + "loss": 2.3088, + "step": 2809 + }, + { + "epoch": 0.09, + "grad_norm": 0.6862800717353821, + "learning_rate": 1.970009054764388e-05, + "loss": 2.3394, + "step": 2810 + }, + { + "epoch": 0.09, + "grad_norm": 0.6355836987495422, + "learning_rate": 1.969983213675788e-05, + "loss": 2.3071, + "step": 2811 + }, + { + "epoch": 0.09, + "grad_norm": 0.7063869833946228, + "learning_rate": 1.969957361628886e-05, + "loss": 2.3235, + "step": 2812 + }, + { + "epoch": 0.09, + "grad_norm": 0.6928921341896057, + "learning_rate": 1.969931498623974e-05, + "loss": 2.2941, + "step": 2813 + }, + { + "epoch": 0.09, + "grad_norm": 0.6836203932762146, + "learning_rate": 1.9699056246613445e-05, + "loss": 2.3272, + "step": 2814 + }, + { + "epoch": 0.09, + "grad_norm": 0.7501394152641296, + "learning_rate": 1.9698797397412897e-05, + "loss": 2.3011, + "step": 2815 + }, + { + "epoch": 0.09, + "grad_norm": 0.6688721179962158, + "learning_rate": 1.969853843864102e-05, + "loss": 2.2575, + "step": 2816 + }, + { + "epoch": 0.09, + "grad_norm": 0.703423261642456, + "learning_rate": 1.969827937030074e-05, + "loss": 2.3742, + "step": 2817 + }, + { + "epoch": 0.09, + "grad_norm": 0.6901835799217224, + "learning_rate": 1.9698020192394984e-05, + "loss": 2.258, + "step": 2818 + }, + { + "epoch": 0.09, + "grad_norm": 0.6559231877326965, + "learning_rate": 1.9697760904926677e-05, + "loss": 2.2929, + "step": 2819 + }, + { + "epoch": 0.09, + "grad_norm": 0.6842488646507263, + "learning_rate": 1.969750150789875e-05, + "loss": 2.32, + "step": 2820 + }, + { + "epoch": 0.09, + "grad_norm": 0.7518985271453857, + "learning_rate": 1.969724200131414e-05, + "loss": 2.3606, + "step": 2821 + }, + { + "epoch": 0.09, + "grad_norm": 0.6831926703453064, + "learning_rate": 1.969698238517577e-05, + "loss": 2.2733, + "step": 2822 + }, + { + "epoch": 0.09, + "grad_norm": 0.7015331387519836, + "learning_rate": 1.9696722659486575e-05, + "loss": 2.3697, + "step": 2823 + }, + { + "epoch": 0.09, + "grad_norm": 0.664523184299469, + "learning_rate": 1.969646282424949e-05, + "loss": 2.39, + "step": 2824 + }, + { + "epoch": 0.09, + "grad_norm": 0.67743319272995, + "learning_rate": 1.9696202879467453e-05, + "loss": 2.3874, + "step": 2825 + }, + { + "epoch": 0.09, + "grad_norm": 0.6681963205337524, + "learning_rate": 1.9695942825143394e-05, + "loss": 2.352, + "step": 2826 + }, + { + "epoch": 0.09, + "grad_norm": 0.6769992113113403, + "learning_rate": 1.969568266128026e-05, + "loss": 2.3296, + "step": 2827 + }, + { + "epoch": 0.09, + "grad_norm": 0.6578978300094604, + "learning_rate": 1.9695422387880986e-05, + "loss": 2.298, + "step": 2828 + }, + { + "epoch": 0.09, + "grad_norm": 0.6683707237243652, + "learning_rate": 1.9695162004948506e-05, + "loss": 2.3166, + "step": 2829 + }, + { + "epoch": 0.09, + "grad_norm": 0.6699895262718201, + "learning_rate": 1.9694901512485775e-05, + "loss": 2.3068, + "step": 2830 + }, + { + "epoch": 0.09, + "grad_norm": 0.6741325259208679, + "learning_rate": 1.9694640910495724e-05, + "loss": 2.3837, + "step": 2831 + }, + { + "epoch": 0.09, + "grad_norm": 0.6590512990951538, + "learning_rate": 1.96943801989813e-05, + "loss": 2.41, + "step": 2832 + }, + { + "epoch": 0.09, + "grad_norm": 0.6686561703681946, + "learning_rate": 1.9694119377945454e-05, + "loss": 2.3824, + "step": 2833 + }, + { + "epoch": 0.09, + "grad_norm": 0.6950041055679321, + "learning_rate": 1.9693858447391127e-05, + "loss": 2.3005, + "step": 2834 + }, + { + "epoch": 0.09, + "grad_norm": 0.7246048450469971, + "learning_rate": 1.9693597407321267e-05, + "loss": 2.3192, + "step": 2835 + }, + { + "epoch": 0.09, + "grad_norm": 0.6865792870521545, + "learning_rate": 1.9693336257738826e-05, + "loss": 2.4299, + "step": 2836 + }, + { + "epoch": 0.09, + "grad_norm": 0.6889941692352295, + "learning_rate": 1.9693074998646754e-05, + "loss": 2.3421, + "step": 2837 + }, + { + "epoch": 0.09, + "grad_norm": 0.6586092710494995, + "learning_rate": 1.9692813630047997e-05, + "loss": 2.2776, + "step": 2838 + }, + { + "epoch": 0.09, + "grad_norm": 0.6731042861938477, + "learning_rate": 1.9692552151945517e-05, + "loss": 2.3016, + "step": 2839 + }, + { + "epoch": 0.09, + "grad_norm": 0.6686066389083862, + "learning_rate": 1.969229056434226e-05, + "loss": 2.3174, + "step": 2840 + }, + { + "epoch": 0.09, + "grad_norm": 0.6902161836624146, + "learning_rate": 1.9692028867241185e-05, + "loss": 2.3257, + "step": 2841 + }, + { + "epoch": 0.09, + "grad_norm": 0.6358218193054199, + "learning_rate": 1.969176706064525e-05, + "loss": 2.2772, + "step": 2842 + }, + { + "epoch": 0.09, + "grad_norm": 0.6587114334106445, + "learning_rate": 1.969150514455741e-05, + "loss": 2.3158, + "step": 2843 + }, + { + "epoch": 0.09, + "grad_norm": 0.6809013485908508, + "learning_rate": 1.9691243118980624e-05, + "loss": 2.2681, + "step": 2844 + }, + { + "epoch": 0.09, + "grad_norm": 0.6586540341377258, + "learning_rate": 1.9690980983917853e-05, + "loss": 2.3074, + "step": 2845 + }, + { + "epoch": 0.09, + "grad_norm": 0.6920683979988098, + "learning_rate": 1.9690718739372058e-05, + "loss": 2.405, + "step": 2846 + }, + { + "epoch": 0.09, + "grad_norm": 0.689683198928833, + "learning_rate": 1.96904563853462e-05, + "loss": 2.3508, + "step": 2847 + }, + { + "epoch": 0.09, + "grad_norm": 0.6630246043205261, + "learning_rate": 1.969019392184325e-05, + "loss": 2.3226, + "step": 2848 + }, + { + "epoch": 0.09, + "grad_norm": 0.6757833957672119, + "learning_rate": 1.9689931348866163e-05, + "loss": 2.3491, + "step": 2849 + }, + { + "epoch": 0.09, + "grad_norm": 0.6617912650108337, + "learning_rate": 1.9689668666417914e-05, + "loss": 2.322, + "step": 2850 + }, + { + "epoch": 0.09, + "grad_norm": 0.6862687468528748, + "learning_rate": 1.9689405874501465e-05, + "loss": 2.2738, + "step": 2851 + }, + { + "epoch": 0.09, + "grad_norm": 0.697131335735321, + "learning_rate": 1.9689142973119787e-05, + "loss": 2.3246, + "step": 2852 + }, + { + "epoch": 0.09, + "grad_norm": 0.7109963297843933, + "learning_rate": 1.9688879962275852e-05, + "loss": 2.3636, + "step": 2853 + }, + { + "epoch": 0.09, + "grad_norm": 0.6603410840034485, + "learning_rate": 1.968861684197263e-05, + "loss": 2.3731, + "step": 2854 + }, + { + "epoch": 0.09, + "grad_norm": 0.6609280109405518, + "learning_rate": 1.968835361221309e-05, + "loss": 2.3883, + "step": 2855 + }, + { + "epoch": 0.1, + "grad_norm": 0.6524820923805237, + "learning_rate": 1.9688090273000208e-05, + "loss": 2.3379, + "step": 2856 + }, + { + "epoch": 0.1, + "grad_norm": 0.7151212096214294, + "learning_rate": 1.9687826824336963e-05, + "loss": 2.344, + "step": 2857 + }, + { + "epoch": 0.1, + "grad_norm": 0.7164432406425476, + "learning_rate": 1.9687563266226328e-05, + "loss": 2.3213, + "step": 2858 + }, + { + "epoch": 0.1, + "grad_norm": 0.6699766516685486, + "learning_rate": 1.968729959867128e-05, + "loss": 2.3085, + "step": 2859 + }, + { + "epoch": 0.1, + "grad_norm": 0.6621629595756531, + "learning_rate": 1.96870358216748e-05, + "loss": 2.3541, + "step": 2860 + }, + { + "epoch": 0.1, + "grad_norm": 0.7014157772064209, + "learning_rate": 1.9686771935239865e-05, + "loss": 2.2916, + "step": 2861 + }, + { + "epoch": 0.1, + "grad_norm": 0.6799154281616211, + "learning_rate": 1.9686507939369453e-05, + "loss": 2.2846, + "step": 2862 + }, + { + "epoch": 0.1, + "grad_norm": 0.7110755443572998, + "learning_rate": 1.968624383406656e-05, + "loss": 2.2864, + "step": 2863 + }, + { + "epoch": 0.1, + "grad_norm": 0.7277179956436157, + "learning_rate": 1.9685979619334154e-05, + "loss": 2.3314, + "step": 2864 + }, + { + "epoch": 0.1, + "grad_norm": 0.7831695079803467, + "learning_rate": 1.968571529517523e-05, + "loss": 2.3018, + "step": 2865 + }, + { + "epoch": 0.1, + "grad_norm": 0.6849926710128784, + "learning_rate": 1.968545086159277e-05, + "loss": 2.3282, + "step": 2866 + }, + { + "epoch": 0.1, + "grad_norm": 0.6711964011192322, + "learning_rate": 1.9685186318589763e-05, + "loss": 2.3194, + "step": 2867 + }, + { + "epoch": 0.1, + "grad_norm": 0.7245537042617798, + "learning_rate": 1.9684921666169196e-05, + "loss": 2.2753, + "step": 2868 + }, + { + "epoch": 0.1, + "grad_norm": 0.7223377227783203, + "learning_rate": 1.968465690433406e-05, + "loss": 2.3601, + "step": 2869 + }, + { + "epoch": 0.1, + "grad_norm": 0.6900066137313843, + "learning_rate": 1.968439203308735e-05, + "loss": 2.2858, + "step": 2870 + }, + { + "epoch": 0.1, + "grad_norm": 0.6660450100898743, + "learning_rate": 1.9684127052432048e-05, + "loss": 2.352, + "step": 2871 + }, + { + "epoch": 0.1, + "grad_norm": 0.6990333199501038, + "learning_rate": 1.9683861962371157e-05, + "loss": 2.3597, + "step": 2872 + }, + { + "epoch": 0.1, + "grad_norm": 0.706770122051239, + "learning_rate": 1.968359676290767e-05, + "loss": 2.3662, + "step": 2873 + }, + { + "epoch": 0.1, + "grad_norm": 0.6692976355552673, + "learning_rate": 1.968333145404458e-05, + "loss": 2.3545, + "step": 2874 + }, + { + "epoch": 0.1, + "grad_norm": 0.6634491682052612, + "learning_rate": 1.9683066035784887e-05, + "loss": 2.3419, + "step": 2875 + }, + { + "epoch": 0.1, + "grad_norm": 0.6848682165145874, + "learning_rate": 1.968280050813159e-05, + "loss": 2.2765, + "step": 2876 + }, + { + "epoch": 0.1, + "grad_norm": 0.6789926290512085, + "learning_rate": 1.9682534871087687e-05, + "loss": 2.3227, + "step": 2877 + }, + { + "epoch": 0.1, + "grad_norm": 0.7044233679771423, + "learning_rate": 1.968226912465618e-05, + "loss": 2.3168, + "step": 2878 + }, + { + "epoch": 0.1, + "grad_norm": 0.6985860466957092, + "learning_rate": 1.9682003268840065e-05, + "loss": 2.3249, + "step": 2879 + }, + { + "epoch": 0.1, + "grad_norm": 0.6989080309867859, + "learning_rate": 1.9681737303642358e-05, + "loss": 2.3902, + "step": 2880 + }, + { + "epoch": 0.1, + "grad_norm": 0.6801905632019043, + "learning_rate": 1.9681471229066056e-05, + "loss": 2.2602, + "step": 2881 + }, + { + "epoch": 0.1, + "grad_norm": 0.6810482740402222, + "learning_rate": 1.9681205045114165e-05, + "loss": 2.3372, + "step": 2882 + }, + { + "epoch": 0.1, + "grad_norm": 0.690697431564331, + "learning_rate": 1.968093875178969e-05, + "loss": 2.3505, + "step": 2883 + }, + { + "epoch": 0.1, + "grad_norm": 0.688677191734314, + "learning_rate": 1.968067234909565e-05, + "loss": 2.3376, + "step": 2884 + }, + { + "epoch": 0.1, + "grad_norm": 0.6849868297576904, + "learning_rate": 1.968040583703504e-05, + "loss": 2.3899, + "step": 2885 + }, + { + "epoch": 0.1, + "grad_norm": 0.6812607049942017, + "learning_rate": 1.9680139215610883e-05, + "loss": 2.3214, + "step": 2886 + }, + { + "epoch": 0.1, + "grad_norm": 0.6999800205230713, + "learning_rate": 1.9679872484826183e-05, + "loss": 2.3885, + "step": 2887 + }, + { + "epoch": 0.1, + "grad_norm": 0.6741604804992676, + "learning_rate": 1.967960564468396e-05, + "loss": 2.3095, + "step": 2888 + }, + { + "epoch": 0.1, + "grad_norm": 0.68731290102005, + "learning_rate": 1.967933869518722e-05, + "loss": 2.3032, + "step": 2889 + }, + { + "epoch": 0.1, + "grad_norm": 0.7445862889289856, + "learning_rate": 1.967907163633899e-05, + "loss": 2.3417, + "step": 2890 + }, + { + "epoch": 0.1, + "grad_norm": 0.6830973029136658, + "learning_rate": 1.9678804468142275e-05, + "loss": 2.3185, + "step": 2891 + }, + { + "epoch": 0.1, + "grad_norm": 0.6858906149864197, + "learning_rate": 1.9678537190600105e-05, + "loss": 2.369, + "step": 2892 + }, + { + "epoch": 0.1, + "grad_norm": 0.6888479590415955, + "learning_rate": 1.9678269803715492e-05, + "loss": 2.364, + "step": 2893 + }, + { + "epoch": 0.1, + "grad_norm": 0.6803169846534729, + "learning_rate": 1.9678002307491458e-05, + "loss": 2.3447, + "step": 2894 + }, + { + "epoch": 0.1, + "grad_norm": 0.7076132297515869, + "learning_rate": 1.9677734701931026e-05, + "loss": 2.2798, + "step": 2895 + }, + { + "epoch": 0.1, + "grad_norm": 0.6628857254981995, + "learning_rate": 1.967746698703722e-05, + "loss": 2.2807, + "step": 2896 + }, + { + "epoch": 0.1, + "grad_norm": 0.6583648920059204, + "learning_rate": 1.9677199162813064e-05, + "loss": 2.3475, + "step": 2897 + }, + { + "epoch": 0.1, + "grad_norm": 0.6615450978279114, + "learning_rate": 1.9676931229261583e-05, + "loss": 2.2934, + "step": 2898 + }, + { + "epoch": 0.1, + "grad_norm": 0.6884331107139587, + "learning_rate": 1.96766631863858e-05, + "loss": 2.3462, + "step": 2899 + }, + { + "epoch": 0.1, + "grad_norm": 0.6690109968185425, + "learning_rate": 1.9676395034188753e-05, + "loss": 2.3161, + "step": 2900 + }, + { + "epoch": 0.1, + "grad_norm": 0.693662166595459, + "learning_rate": 1.9676126772673464e-05, + "loss": 2.2918, + "step": 2901 + }, + { + "epoch": 0.1, + "grad_norm": 0.6729449033737183, + "learning_rate": 1.9675858401842965e-05, + "loss": 2.3063, + "step": 2902 + }, + { + "epoch": 0.1, + "grad_norm": 0.72861647605896, + "learning_rate": 1.967558992170029e-05, + "loss": 2.2797, + "step": 2903 + }, + { + "epoch": 0.1, + "grad_norm": 0.692997932434082, + "learning_rate": 1.9675321332248465e-05, + "loss": 2.3049, + "step": 2904 + }, + { + "epoch": 0.1, + "grad_norm": 0.6852849721908569, + "learning_rate": 1.9675052633490537e-05, + "loss": 2.3263, + "step": 2905 + }, + { + "epoch": 0.1, + "grad_norm": 0.6654704213142395, + "learning_rate": 1.967478382542953e-05, + "loss": 2.3684, + "step": 2906 + }, + { + "epoch": 0.1, + "grad_norm": 0.6582489013671875, + "learning_rate": 1.9674514908068486e-05, + "loss": 2.3281, + "step": 2907 + }, + { + "epoch": 0.1, + "grad_norm": 0.6841027736663818, + "learning_rate": 1.9674245881410445e-05, + "loss": 2.3741, + "step": 2908 + }, + { + "epoch": 0.1, + "grad_norm": 0.6436604261398315, + "learning_rate": 1.9673976745458443e-05, + "loss": 2.3055, + "step": 2909 + }, + { + "epoch": 0.1, + "grad_norm": 0.6485874056816101, + "learning_rate": 1.967370750021552e-05, + "loss": 2.3059, + "step": 2910 + }, + { + "epoch": 0.1, + "grad_norm": 0.6804980039596558, + "learning_rate": 1.9673438145684718e-05, + "loss": 2.2776, + "step": 2911 + }, + { + "epoch": 0.1, + "grad_norm": 0.6365377902984619, + "learning_rate": 1.967316868186908e-05, + "loss": 2.2342, + "step": 2912 + }, + { + "epoch": 0.1, + "grad_norm": 0.6652557849884033, + "learning_rate": 1.9672899108771655e-05, + "loss": 2.3008, + "step": 2913 + }, + { + "epoch": 0.1, + "grad_norm": 0.6715561747550964, + "learning_rate": 1.9672629426395482e-05, + "loss": 2.2901, + "step": 2914 + }, + { + "epoch": 0.1, + "grad_norm": 0.6965147256851196, + "learning_rate": 1.9672359634743613e-05, + "loss": 2.3019, + "step": 2915 + }, + { + "epoch": 0.1, + "grad_norm": 0.6825271844863892, + "learning_rate": 1.9672089733819094e-05, + "loss": 2.3214, + "step": 2916 + }, + { + "epoch": 0.1, + "grad_norm": 0.7176769375801086, + "learning_rate": 1.967181972362497e-05, + "loss": 2.3139, + "step": 2917 + }, + { + "epoch": 0.1, + "grad_norm": 0.677059531211853, + "learning_rate": 1.9671549604164295e-05, + "loss": 2.3367, + "step": 2918 + }, + { + "epoch": 0.1, + "grad_norm": 0.6437870860099792, + "learning_rate": 1.9671279375440125e-05, + "loss": 2.3398, + "step": 2919 + }, + { + "epoch": 0.1, + "grad_norm": 0.6716718077659607, + "learning_rate": 1.9671009037455505e-05, + "loss": 2.3098, + "step": 2920 + }, + { + "epoch": 0.1, + "grad_norm": 0.7423004508018494, + "learning_rate": 1.9670738590213495e-05, + "loss": 2.3374, + "step": 2921 + }, + { + "epoch": 0.1, + "grad_norm": 0.6660673022270203, + "learning_rate": 1.9670468033717146e-05, + "loss": 2.2926, + "step": 2922 + }, + { + "epoch": 0.1, + "grad_norm": 0.7292910814285278, + "learning_rate": 1.9670197367969515e-05, + "loss": 2.3283, + "step": 2923 + }, + { + "epoch": 0.1, + "grad_norm": 0.683182954788208, + "learning_rate": 1.9669926592973665e-05, + "loss": 2.3062, + "step": 2924 + }, + { + "epoch": 0.1, + "grad_norm": 0.65863037109375, + "learning_rate": 1.966965570873265e-05, + "loss": 2.324, + "step": 2925 + }, + { + "epoch": 0.1, + "grad_norm": 0.6742762327194214, + "learning_rate": 1.966938471524953e-05, + "loss": 2.2788, + "step": 2926 + }, + { + "epoch": 0.1, + "grad_norm": 0.6800342202186584, + "learning_rate": 1.966911361252737e-05, + "loss": 2.2504, + "step": 2927 + }, + { + "epoch": 0.1, + "grad_norm": 0.6733680367469788, + "learning_rate": 1.966884240056923e-05, + "loss": 2.3576, + "step": 2928 + }, + { + "epoch": 0.1, + "grad_norm": 0.6734517216682434, + "learning_rate": 1.966857107937818e-05, + "loss": 2.3624, + "step": 2929 + }, + { + "epoch": 0.1, + "grad_norm": 0.6791380643844604, + "learning_rate": 1.9668299648957274e-05, + "loss": 2.2488, + "step": 2930 + }, + { + "epoch": 0.1, + "grad_norm": 0.6863561868667603, + "learning_rate": 1.966802810930959e-05, + "loss": 2.2741, + "step": 2931 + }, + { + "epoch": 0.1, + "grad_norm": 0.7177913784980774, + "learning_rate": 1.9667756460438187e-05, + "loss": 2.3396, + "step": 2932 + }, + { + "epoch": 0.1, + "grad_norm": 0.7329105734825134, + "learning_rate": 1.9667484702346137e-05, + "loss": 2.3455, + "step": 2933 + }, + { + "epoch": 0.1, + "grad_norm": 0.6803003549575806, + "learning_rate": 1.966721283503651e-05, + "loss": 2.2926, + "step": 2934 + }, + { + "epoch": 0.1, + "grad_norm": 0.6709240078926086, + "learning_rate": 1.966694085851238e-05, + "loss": 2.2645, + "step": 2935 + }, + { + "epoch": 0.1, + "grad_norm": 0.6546924114227295, + "learning_rate": 1.9666668772776816e-05, + "loss": 2.2866, + "step": 2936 + }, + { + "epoch": 0.1, + "grad_norm": 0.6571659445762634, + "learning_rate": 1.9666396577832894e-05, + "loss": 2.2654, + "step": 2937 + }, + { + "epoch": 0.1, + "grad_norm": 0.6824886798858643, + "learning_rate": 1.966612427368369e-05, + "loss": 2.3668, + "step": 2938 + }, + { + "epoch": 0.1, + "grad_norm": 0.6801427602767944, + "learning_rate": 1.966585186033228e-05, + "loss": 2.3443, + "step": 2939 + }, + { + "epoch": 0.1, + "grad_norm": 0.6745694875717163, + "learning_rate": 1.9665579337781736e-05, + "loss": 2.3563, + "step": 2940 + }, + { + "epoch": 0.1, + "grad_norm": 0.6681062579154968, + "learning_rate": 1.9665306706035144e-05, + "loss": 2.309, + "step": 2941 + }, + { + "epoch": 0.1, + "grad_norm": 0.6749881505966187, + "learning_rate": 1.966503396509558e-05, + "loss": 2.2378, + "step": 2942 + }, + { + "epoch": 0.1, + "grad_norm": 0.7094277143478394, + "learning_rate": 1.966476111496613e-05, + "loss": 2.3236, + "step": 2943 + }, + { + "epoch": 0.1, + "grad_norm": 0.7237303853034973, + "learning_rate": 1.966448815564987e-05, + "loss": 2.321, + "step": 2944 + }, + { + "epoch": 0.1, + "grad_norm": 0.6517361998558044, + "learning_rate": 1.9664215087149887e-05, + "loss": 2.3316, + "step": 2945 + }, + { + "epoch": 0.1, + "grad_norm": 0.7197785973548889, + "learning_rate": 1.9663941909469266e-05, + "loss": 2.3227, + "step": 2946 + }, + { + "epoch": 0.1, + "grad_norm": 0.6674231290817261, + "learning_rate": 1.9663668622611092e-05, + "loss": 2.3273, + "step": 2947 + }, + { + "epoch": 0.1, + "grad_norm": 0.6753541827201843, + "learning_rate": 1.9663395226578456e-05, + "loss": 2.3204, + "step": 2948 + }, + { + "epoch": 0.1, + "grad_norm": 0.6835004091262817, + "learning_rate": 1.966312172137444e-05, + "loss": 2.2465, + "step": 2949 + }, + { + "epoch": 0.1, + "grad_norm": 0.6827998161315918, + "learning_rate": 1.966284810700214e-05, + "loss": 2.3224, + "step": 2950 + }, + { + "epoch": 0.1, + "grad_norm": 0.6671438813209534, + "learning_rate": 1.9662574383464645e-05, + "loss": 2.2785, + "step": 2951 + }, + { + "epoch": 0.1, + "grad_norm": 0.6737426519393921, + "learning_rate": 1.9662300550765047e-05, + "loss": 2.3119, + "step": 2952 + }, + { + "epoch": 0.1, + "grad_norm": 0.6862102150917053, + "learning_rate": 1.9662026608906443e-05, + "loss": 2.2767, + "step": 2953 + }, + { + "epoch": 0.1, + "grad_norm": 0.6574042439460754, + "learning_rate": 1.9661752557891922e-05, + "loss": 2.2886, + "step": 2954 + }, + { + "epoch": 0.1, + "grad_norm": 0.6658502817153931, + "learning_rate": 1.9661478397724582e-05, + "loss": 2.3101, + "step": 2955 + }, + { + "epoch": 0.1, + "grad_norm": 0.6832183599472046, + "learning_rate": 1.9661204128407527e-05, + "loss": 2.2869, + "step": 2956 + }, + { + "epoch": 0.1, + "grad_norm": 0.6775646209716797, + "learning_rate": 1.966092974994384e-05, + "loss": 2.2984, + "step": 2957 + }, + { + "epoch": 0.1, + "grad_norm": 0.6615846753120422, + "learning_rate": 1.9660655262336637e-05, + "loss": 2.2624, + "step": 2958 + }, + { + "epoch": 0.1, + "grad_norm": 0.6634668111801147, + "learning_rate": 1.9660380665589014e-05, + "loss": 2.3543, + "step": 2959 + }, + { + "epoch": 0.1, + "grad_norm": 0.6529388427734375, + "learning_rate": 1.966010595970407e-05, + "loss": 2.3351, + "step": 2960 + }, + { + "epoch": 0.1, + "grad_norm": 0.6666375398635864, + "learning_rate": 1.965983114468491e-05, + "loss": 2.2789, + "step": 2961 + }, + { + "epoch": 0.1, + "grad_norm": 0.643934965133667, + "learning_rate": 1.9659556220534637e-05, + "loss": 2.2641, + "step": 2962 + }, + { + "epoch": 0.1, + "grad_norm": 0.6774589419364929, + "learning_rate": 1.965928118725636e-05, + "loss": 2.3048, + "step": 2963 + }, + { + "epoch": 0.1, + "grad_norm": 0.6716755032539368, + "learning_rate": 1.9659006044853185e-05, + "loss": 2.2985, + "step": 2964 + }, + { + "epoch": 0.1, + "grad_norm": 0.6758524775505066, + "learning_rate": 1.9658730793328223e-05, + "loss": 2.2921, + "step": 2965 + }, + { + "epoch": 0.1, + "grad_norm": 0.6557595133781433, + "learning_rate": 1.9658455432684577e-05, + "loss": 2.3303, + "step": 2966 + }, + { + "epoch": 0.1, + "grad_norm": 0.7137976288795471, + "learning_rate": 1.9658179962925364e-05, + "loss": 2.3339, + "step": 2967 + }, + { + "epoch": 0.1, + "grad_norm": 0.6865337491035461, + "learning_rate": 1.9657904384053694e-05, + "loss": 2.2815, + "step": 2968 + }, + { + "epoch": 0.1, + "grad_norm": 0.7109874486923218, + "learning_rate": 1.9657628696072682e-05, + "loss": 2.2711, + "step": 2969 + }, + { + "epoch": 0.1, + "grad_norm": 0.7054360508918762, + "learning_rate": 1.9657352898985437e-05, + "loss": 2.287, + "step": 2970 + }, + { + "epoch": 0.1, + "grad_norm": 0.6785338521003723, + "learning_rate": 1.9657076992795082e-05, + "loss": 2.3105, + "step": 2971 + }, + { + "epoch": 0.1, + "grad_norm": 0.6620676517486572, + "learning_rate": 1.965680097750473e-05, + "loss": 2.3705, + "step": 2972 + }, + { + "epoch": 0.1, + "grad_norm": 0.7038158178329468, + "learning_rate": 1.96565248531175e-05, + "loss": 2.3203, + "step": 2973 + }, + { + "epoch": 0.1, + "grad_norm": 0.6762446761131287, + "learning_rate": 1.965624861963651e-05, + "loss": 2.3627, + "step": 2974 + }, + { + "epoch": 0.1, + "grad_norm": 0.6897306442260742, + "learning_rate": 1.9655972277064883e-05, + "loss": 2.3561, + "step": 2975 + }, + { + "epoch": 0.1, + "grad_norm": 0.7097106575965881, + "learning_rate": 1.965569582540574e-05, + "loss": 2.3221, + "step": 2976 + }, + { + "epoch": 0.1, + "grad_norm": 0.6775929927825928, + "learning_rate": 1.9655419264662207e-05, + "loss": 2.29, + "step": 2977 + }, + { + "epoch": 0.1, + "grad_norm": 0.6804125905036926, + "learning_rate": 1.96551425948374e-05, + "loss": 2.3053, + "step": 2978 + }, + { + "epoch": 0.1, + "grad_norm": 0.7170947194099426, + "learning_rate": 1.965486581593446e-05, + "loss": 2.3726, + "step": 2979 + }, + { + "epoch": 0.1, + "grad_norm": 0.6982647180557251, + "learning_rate": 1.9654588927956493e-05, + "loss": 2.3388, + "step": 2980 + }, + { + "epoch": 0.1, + "grad_norm": 0.6818144917488098, + "learning_rate": 1.9654311930906647e-05, + "loss": 2.3887, + "step": 2981 + }, + { + "epoch": 0.1, + "grad_norm": 0.6808184385299683, + "learning_rate": 1.965403482478804e-05, + "loss": 2.2705, + "step": 2982 + }, + { + "epoch": 0.1, + "grad_norm": 0.7061893939971924, + "learning_rate": 1.9653757609603805e-05, + "loss": 2.3364, + "step": 2983 + }, + { + "epoch": 0.1, + "grad_norm": 0.6770671010017395, + "learning_rate": 1.9653480285357075e-05, + "loss": 2.3129, + "step": 2984 + }, + { + "epoch": 0.1, + "grad_norm": 0.6462687849998474, + "learning_rate": 1.965320285205098e-05, + "loss": 2.3095, + "step": 2985 + }, + { + "epoch": 0.1, + "grad_norm": 0.6441843509674072, + "learning_rate": 1.9652925309688657e-05, + "loss": 2.3125, + "step": 2986 + }, + { + "epoch": 0.1, + "grad_norm": 0.6661598086357117, + "learning_rate": 1.965264765827324e-05, + "loss": 2.305, + "step": 2987 + }, + { + "epoch": 0.1, + "grad_norm": 0.6770630478858948, + "learning_rate": 1.9652369897807873e-05, + "loss": 2.3224, + "step": 2988 + }, + { + "epoch": 0.1, + "grad_norm": 0.6821475625038147, + "learning_rate": 1.965209202829568e-05, + "loss": 2.2615, + "step": 2989 + }, + { + "epoch": 0.1, + "grad_norm": 0.6795044541358948, + "learning_rate": 1.9651814049739813e-05, + "loss": 2.2746, + "step": 2990 + }, + { + "epoch": 0.1, + "grad_norm": 0.6581634283065796, + "learning_rate": 1.9651535962143406e-05, + "loss": 2.3292, + "step": 2991 + }, + { + "epoch": 0.1, + "grad_norm": 0.7273206114768982, + "learning_rate": 1.9651257765509602e-05, + "loss": 2.243, + "step": 2992 + }, + { + "epoch": 0.1, + "grad_norm": 0.6700400710105896, + "learning_rate": 1.9650979459841544e-05, + "loss": 2.2106, + "step": 2993 + }, + { + "epoch": 0.1, + "grad_norm": 0.6745424866676331, + "learning_rate": 1.9650701045142378e-05, + "loss": 2.2631, + "step": 2994 + }, + { + "epoch": 0.1, + "grad_norm": 0.6996797919273376, + "learning_rate": 1.9650422521415245e-05, + "loss": 2.3256, + "step": 2995 + }, + { + "epoch": 0.1, + "grad_norm": 0.6645322442054749, + "learning_rate": 1.9650143888663293e-05, + "loss": 2.2706, + "step": 2996 + }, + { + "epoch": 0.1, + "grad_norm": 0.6626133322715759, + "learning_rate": 1.9649865146889673e-05, + "loss": 2.3176, + "step": 2997 + }, + { + "epoch": 0.1, + "grad_norm": 0.7161187529563904, + "learning_rate": 1.9649586296097532e-05, + "loss": 2.3507, + "step": 2998 + }, + { + "epoch": 0.1, + "grad_norm": 0.7267447113990784, + "learning_rate": 1.964930733629002e-05, + "loss": 2.3298, + "step": 2999 + }, + { + "epoch": 0.1, + "grad_norm": 0.6988711357116699, + "learning_rate": 1.964902826747029e-05, + "loss": 2.3242, + "step": 3000 + }, + { + "epoch": 0.1, + "grad_norm": 0.6781612038612366, + "learning_rate": 1.964874908964149e-05, + "loss": 2.3394, + "step": 3001 + }, + { + "epoch": 0.1, + "grad_norm": 0.663555383682251, + "learning_rate": 1.9648469802806777e-05, + "loss": 2.2873, + "step": 3002 + }, + { + "epoch": 0.1, + "grad_norm": 0.6918426156044006, + "learning_rate": 1.964819040696931e-05, + "loss": 2.243, + "step": 3003 + }, + { + "epoch": 0.1, + "grad_norm": 0.6672970056533813, + "learning_rate": 1.964791090213224e-05, + "loss": 2.2704, + "step": 3004 + }, + { + "epoch": 0.1, + "grad_norm": 0.766591489315033, + "learning_rate": 1.964763128829873e-05, + "loss": 2.3628, + "step": 3005 + }, + { + "epoch": 0.1, + "grad_norm": 0.694460391998291, + "learning_rate": 1.9647351565471932e-05, + "loss": 2.2729, + "step": 3006 + }, + { + "epoch": 0.1, + "grad_norm": 0.7073972225189209, + "learning_rate": 1.9647071733655013e-05, + "loss": 2.3376, + "step": 3007 + }, + { + "epoch": 0.1, + "grad_norm": 0.6735703349113464, + "learning_rate": 1.964679179285113e-05, + "loss": 2.2827, + "step": 3008 + }, + { + "epoch": 0.1, + "grad_norm": 0.6921026110649109, + "learning_rate": 1.9646511743063447e-05, + "loss": 2.3041, + "step": 3009 + }, + { + "epoch": 0.1, + "grad_norm": 0.7243918776512146, + "learning_rate": 1.9646231584295128e-05, + "loss": 2.2428, + "step": 3010 + }, + { + "epoch": 0.1, + "grad_norm": 0.7014862298965454, + "learning_rate": 1.964595131654934e-05, + "loss": 2.2854, + "step": 3011 + }, + { + "epoch": 0.1, + "grad_norm": 0.7269584536552429, + "learning_rate": 1.964567093982924e-05, + "loss": 2.3257, + "step": 3012 + }, + { + "epoch": 0.1, + "grad_norm": 0.7346311211585999, + "learning_rate": 1.9645390454138008e-05, + "loss": 2.3521, + "step": 3013 + }, + { + "epoch": 0.1, + "grad_norm": 0.6559798717498779, + "learning_rate": 1.964510985947881e-05, + "loss": 2.3009, + "step": 3014 + }, + { + "epoch": 0.1, + "grad_norm": 0.7006489038467407, + "learning_rate": 1.964482915585481e-05, + "loss": 2.3408, + "step": 3015 + }, + { + "epoch": 0.1, + "grad_norm": 0.7058645486831665, + "learning_rate": 1.964454834326918e-05, + "loss": 2.3534, + "step": 3016 + }, + { + "epoch": 0.1, + "grad_norm": 0.7061296701431274, + "learning_rate": 1.9644267421725098e-05, + "loss": 2.318, + "step": 3017 + }, + { + "epoch": 0.1, + "grad_norm": 0.710426390171051, + "learning_rate": 1.9643986391225733e-05, + "loss": 2.351, + "step": 3018 + }, + { + "epoch": 0.1, + "grad_norm": 0.664726197719574, + "learning_rate": 1.9643705251774265e-05, + "loss": 2.3205, + "step": 3019 + }, + { + "epoch": 0.1, + "grad_norm": 0.7297730445861816, + "learning_rate": 1.9643424003373866e-05, + "loss": 2.286, + "step": 3020 + }, + { + "epoch": 0.1, + "grad_norm": 0.6999923586845398, + "learning_rate": 1.9643142646027712e-05, + "loss": 2.3199, + "step": 3021 + }, + { + "epoch": 0.1, + "grad_norm": 0.7477583885192871, + "learning_rate": 1.9642861179738987e-05, + "loss": 2.2819, + "step": 3022 + }, + { + "epoch": 0.1, + "grad_norm": 0.685806393623352, + "learning_rate": 1.9642579604510863e-05, + "loss": 2.3025, + "step": 3023 + }, + { + "epoch": 0.1, + "grad_norm": 0.6749387383460999, + "learning_rate": 1.964229792034653e-05, + "loss": 2.3002, + "step": 3024 + }, + { + "epoch": 0.1, + "grad_norm": 0.6618485450744629, + "learning_rate": 1.9642016127249163e-05, + "loss": 2.2874, + "step": 3025 + }, + { + "epoch": 0.1, + "grad_norm": 0.7302093505859375, + "learning_rate": 1.9641734225221953e-05, + "loss": 2.2887, + "step": 3026 + }, + { + "epoch": 0.1, + "grad_norm": 0.6852867007255554, + "learning_rate": 1.9641452214268074e-05, + "loss": 2.3125, + "step": 3027 + }, + { + "epoch": 0.1, + "grad_norm": 0.65642911195755, + "learning_rate": 1.964117009439072e-05, + "loss": 2.3186, + "step": 3028 + }, + { + "epoch": 0.1, + "grad_norm": 0.6707789301872253, + "learning_rate": 1.964088786559308e-05, + "loss": 2.3398, + "step": 3029 + }, + { + "epoch": 0.1, + "grad_norm": 0.6963332891464233, + "learning_rate": 1.964060552787834e-05, + "loss": 2.3412, + "step": 3030 + }, + { + "epoch": 0.1, + "grad_norm": 0.6832951903343201, + "learning_rate": 1.9640323081249682e-05, + "loss": 2.3255, + "step": 3031 + }, + { + "epoch": 0.1, + "grad_norm": 0.6812306642532349, + "learning_rate": 1.9640040525710312e-05, + "loss": 2.262, + "step": 3032 + }, + { + "epoch": 0.1, + "grad_norm": 0.6663896441459656, + "learning_rate": 1.9639757861263406e-05, + "loss": 2.3401, + "step": 3033 + }, + { + "epoch": 0.1, + "grad_norm": 0.6947149038314819, + "learning_rate": 1.963947508791217e-05, + "loss": 2.3147, + "step": 3034 + }, + { + "epoch": 0.1, + "grad_norm": 0.6737911105155945, + "learning_rate": 1.963919220565979e-05, + "loss": 2.2706, + "step": 3035 + }, + { + "epoch": 0.1, + "grad_norm": 0.6445004940032959, + "learning_rate": 1.9638909214509468e-05, + "loss": 2.3945, + "step": 3036 + }, + { + "epoch": 0.1, + "grad_norm": 0.6594206094741821, + "learning_rate": 1.96386261144644e-05, + "loss": 2.2973, + "step": 3037 + }, + { + "epoch": 0.1, + "grad_norm": 0.6794245839118958, + "learning_rate": 1.9638342905527782e-05, + "loss": 2.3288, + "step": 3038 + }, + { + "epoch": 0.1, + "grad_norm": 0.6590719819068909, + "learning_rate": 1.9638059587702815e-05, + "loss": 2.3301, + "step": 3039 + }, + { + "epoch": 0.1, + "grad_norm": 0.7198504209518433, + "learning_rate": 1.9637776160992697e-05, + "loss": 2.2887, + "step": 3040 + }, + { + "epoch": 0.1, + "grad_norm": 0.6979372501373291, + "learning_rate": 1.9637492625400635e-05, + "loss": 2.3052, + "step": 3041 + }, + { + "epoch": 0.1, + "grad_norm": 0.6512684226036072, + "learning_rate": 1.9637208980929826e-05, + "loss": 2.3156, + "step": 3042 + }, + { + "epoch": 0.1, + "grad_norm": 0.704622745513916, + "learning_rate": 1.963692522758348e-05, + "loss": 2.3174, + "step": 3043 + }, + { + "epoch": 0.1, + "grad_norm": 0.6765683889389038, + "learning_rate": 1.9636641365364802e-05, + "loss": 2.264, + "step": 3044 + }, + { + "epoch": 0.1, + "grad_norm": 0.6656168699264526, + "learning_rate": 1.9636357394276994e-05, + "loss": 2.2789, + "step": 3045 + }, + { + "epoch": 0.1, + "grad_norm": 0.6795988082885742, + "learning_rate": 1.963607331432327e-05, + "loss": 2.3539, + "step": 3046 + }, + { + "epoch": 0.1, + "grad_norm": 0.6894536018371582, + "learning_rate": 1.9635789125506836e-05, + "loss": 2.2465, + "step": 3047 + }, + { + "epoch": 0.1, + "grad_norm": 0.6951778531074524, + "learning_rate": 1.9635504827830903e-05, + "loss": 2.3085, + "step": 3048 + }, + { + "epoch": 0.1, + "grad_norm": 0.677876353263855, + "learning_rate": 1.9635220421298683e-05, + "loss": 2.2011, + "step": 3049 + }, + { + "epoch": 0.1, + "grad_norm": 0.6561124920845032, + "learning_rate": 1.9634935905913392e-05, + "loss": 2.3303, + "step": 3050 + }, + { + "epoch": 0.1, + "grad_norm": 0.7039267420768738, + "learning_rate": 1.9634651281678245e-05, + "loss": 2.3445, + "step": 3051 + }, + { + "epoch": 0.1, + "grad_norm": 0.6769395470619202, + "learning_rate": 1.9634366548596447e-05, + "loss": 2.3078, + "step": 3052 + }, + { + "epoch": 0.1, + "grad_norm": 0.6986508965492249, + "learning_rate": 1.9634081706671226e-05, + "loss": 2.2988, + "step": 3053 + }, + { + "epoch": 0.1, + "grad_norm": 0.6775603890419006, + "learning_rate": 1.9633796755905793e-05, + "loss": 2.3272, + "step": 3054 + }, + { + "epoch": 0.1, + "grad_norm": 0.6805962920188904, + "learning_rate": 1.9633511696303374e-05, + "loss": 2.354, + "step": 3055 + }, + { + "epoch": 0.1, + "grad_norm": 0.6692181825637817, + "learning_rate": 1.9633226527867184e-05, + "loss": 2.2114, + "step": 3056 + }, + { + "epoch": 0.1, + "grad_norm": 0.6714400053024292, + "learning_rate": 1.9632941250600446e-05, + "loss": 2.3039, + "step": 3057 + }, + { + "epoch": 0.1, + "grad_norm": 0.6624983549118042, + "learning_rate": 1.9632655864506383e-05, + "loss": 2.2455, + "step": 3058 + }, + { + "epoch": 0.1, + "grad_norm": 0.6828032732009888, + "learning_rate": 1.9632370369588217e-05, + "loss": 2.3183, + "step": 3059 + }, + { + "epoch": 0.1, + "grad_norm": 0.6705331206321716, + "learning_rate": 1.963208476584918e-05, + "loss": 2.2456, + "step": 3060 + }, + { + "epoch": 0.1, + "grad_norm": 0.673492431640625, + "learning_rate": 1.9631799053292492e-05, + "loss": 2.2594, + "step": 3061 + }, + { + "epoch": 0.1, + "grad_norm": 0.7021549344062805, + "learning_rate": 1.9631513231921384e-05, + "loss": 2.3157, + "step": 3062 + }, + { + "epoch": 0.1, + "grad_norm": 0.655586302280426, + "learning_rate": 1.9631227301739085e-05, + "loss": 2.2824, + "step": 3063 + }, + { + "epoch": 0.1, + "grad_norm": 0.6719132661819458, + "learning_rate": 1.963094126274882e-05, + "loss": 2.265, + "step": 3064 + }, + { + "epoch": 0.1, + "grad_norm": 0.6862486600875854, + "learning_rate": 1.9630655114953828e-05, + "loss": 2.2409, + "step": 3065 + }, + { + "epoch": 0.1, + "grad_norm": 0.6536693572998047, + "learning_rate": 1.963036885835734e-05, + "loss": 2.212, + "step": 3066 + }, + { + "epoch": 0.1, + "grad_norm": 0.7304664850234985, + "learning_rate": 1.963008249296259e-05, + "loss": 2.2872, + "step": 3067 + }, + { + "epoch": 0.1, + "grad_norm": 0.6861785650253296, + "learning_rate": 1.962979601877281e-05, + "loss": 2.3103, + "step": 3068 + }, + { + "epoch": 0.1, + "grad_norm": 0.6688429117202759, + "learning_rate": 1.9629509435791235e-05, + "loss": 2.2763, + "step": 3069 + }, + { + "epoch": 0.1, + "grad_norm": 0.6977278590202332, + "learning_rate": 1.962922274402111e-05, + "loss": 2.2802, + "step": 3070 + }, + { + "epoch": 0.1, + "grad_norm": 0.7347684502601624, + "learning_rate": 1.9628935943465666e-05, + "loss": 2.3477, + "step": 3071 + }, + { + "epoch": 0.1, + "grad_norm": 0.7178388833999634, + "learning_rate": 1.9628649034128148e-05, + "loss": 2.267, + "step": 3072 + }, + { + "epoch": 0.1, + "grad_norm": 0.6722525358200073, + "learning_rate": 1.96283620160118e-05, + "loss": 2.3399, + "step": 3073 + }, + { + "epoch": 0.1, + "grad_norm": 0.6516978740692139, + "learning_rate": 1.9628074889119854e-05, + "loss": 2.3217, + "step": 3074 + }, + { + "epoch": 0.1, + "grad_norm": 0.6769207119941711, + "learning_rate": 1.9627787653455564e-05, + "loss": 2.2535, + "step": 3075 + }, + { + "epoch": 0.1, + "grad_norm": 0.7423757314682007, + "learning_rate": 1.962750030902217e-05, + "loss": 2.2908, + "step": 3076 + }, + { + "epoch": 0.1, + "grad_norm": 0.6747810244560242, + "learning_rate": 1.9627212855822923e-05, + "loss": 2.2945, + "step": 3077 + }, + { + "epoch": 0.1, + "grad_norm": 0.6609032154083252, + "learning_rate": 1.9626925293861068e-05, + "loss": 2.2892, + "step": 3078 + }, + { + "epoch": 0.1, + "grad_norm": 0.7531595230102539, + "learning_rate": 1.962663762313985e-05, + "loss": 2.3213, + "step": 3079 + }, + { + "epoch": 0.1, + "grad_norm": 0.658375084400177, + "learning_rate": 1.962634984366252e-05, + "loss": 2.3033, + "step": 3080 + }, + { + "epoch": 0.1, + "grad_norm": 0.6509714126586914, + "learning_rate": 1.9626061955432333e-05, + "loss": 2.3249, + "step": 3081 + }, + { + "epoch": 0.1, + "grad_norm": 0.6989246606826782, + "learning_rate": 1.962577395845254e-05, + "loss": 2.3516, + "step": 3082 + }, + { + "epoch": 0.1, + "grad_norm": 0.6992824077606201, + "learning_rate": 1.9625485852726397e-05, + "loss": 2.2787, + "step": 3083 + }, + { + "epoch": 0.1, + "grad_norm": 0.7299829721450806, + "learning_rate": 1.9625197638257152e-05, + "loss": 2.3116, + "step": 3084 + }, + { + "epoch": 0.1, + "grad_norm": 0.6939265131950378, + "learning_rate": 1.9624909315048067e-05, + "loss": 2.3574, + "step": 3085 + }, + { + "epoch": 0.1, + "grad_norm": 0.6580144166946411, + "learning_rate": 1.9624620883102395e-05, + "loss": 2.3364, + "step": 3086 + }, + { + "epoch": 0.1, + "grad_norm": 0.6714300513267517, + "learning_rate": 1.9624332342423398e-05, + "loss": 2.2868, + "step": 3087 + }, + { + "epoch": 0.1, + "grad_norm": 0.6902357339859009, + "learning_rate": 1.9624043693014336e-05, + "loss": 2.2624, + "step": 3088 + }, + { + "epoch": 0.1, + "grad_norm": 0.6722714304924011, + "learning_rate": 1.9623754934878464e-05, + "loss": 2.3488, + "step": 3089 + }, + { + "epoch": 0.1, + "grad_norm": 0.6773586273193359, + "learning_rate": 1.9623466068019056e-05, + "loss": 2.2837, + "step": 3090 + }, + { + "epoch": 0.1, + "grad_norm": 0.6732198596000671, + "learning_rate": 1.9623177092439364e-05, + "loss": 2.2584, + "step": 3091 + }, + { + "epoch": 0.1, + "grad_norm": 0.6610260009765625, + "learning_rate": 1.962288800814266e-05, + "loss": 2.346, + "step": 3092 + }, + { + "epoch": 0.1, + "grad_norm": 0.6437327265739441, + "learning_rate": 1.9622598815132198e-05, + "loss": 2.3689, + "step": 3093 + }, + { + "epoch": 0.1, + "grad_norm": 0.6843645572662354, + "learning_rate": 1.9622309513411265e-05, + "loss": 2.2683, + "step": 3094 + }, + { + "epoch": 0.1, + "grad_norm": 0.6770291328430176, + "learning_rate": 1.9622020102983114e-05, + "loss": 2.2661, + "step": 3095 + }, + { + "epoch": 0.1, + "grad_norm": 0.6504688262939453, + "learning_rate": 1.9621730583851016e-05, + "loss": 2.3082, + "step": 3096 + }, + { + "epoch": 0.1, + "grad_norm": 0.6647862792015076, + "learning_rate": 1.9621440956018248e-05, + "loss": 2.3046, + "step": 3097 + }, + { + "epoch": 0.1, + "grad_norm": 0.6618741154670715, + "learning_rate": 1.9621151219488077e-05, + "loss": 2.287, + "step": 3098 + }, + { + "epoch": 0.1, + "grad_norm": 0.6801279783248901, + "learning_rate": 1.9620861374263778e-05, + "loss": 2.33, + "step": 3099 + }, + { + "epoch": 0.1, + "grad_norm": 0.7018862366676331, + "learning_rate": 1.962057142034863e-05, + "loss": 2.2983, + "step": 3100 + }, + { + "epoch": 0.1, + "grad_norm": 0.6851409077644348, + "learning_rate": 1.96202813577459e-05, + "loss": 2.335, + "step": 3101 + }, + { + "epoch": 0.1, + "grad_norm": 0.6796402931213379, + "learning_rate": 1.9619991186458868e-05, + "loss": 2.2785, + "step": 3102 + }, + { + "epoch": 0.1, + "grad_norm": 0.6904879808425903, + "learning_rate": 1.9619700906490816e-05, + "loss": 2.3951, + "step": 3103 + }, + { + "epoch": 0.1, + "grad_norm": 0.6954065561294556, + "learning_rate": 1.9619410517845022e-05, + "loss": 2.3086, + "step": 3104 + }, + { + "epoch": 0.1, + "grad_norm": 0.7203652262687683, + "learning_rate": 1.9619120020524765e-05, + "loss": 2.3378, + "step": 3105 + }, + { + "epoch": 0.1, + "grad_norm": 0.6783618927001953, + "learning_rate": 1.9618829414533325e-05, + "loss": 2.2792, + "step": 3106 + }, + { + "epoch": 0.1, + "grad_norm": 0.681893527507782, + "learning_rate": 1.961853869987399e-05, + "loss": 2.2767, + "step": 3107 + }, + { + "epoch": 0.1, + "grad_norm": 0.6805277466773987, + "learning_rate": 1.961824787655004e-05, + "loss": 2.3361, + "step": 3108 + }, + { + "epoch": 0.1, + "grad_norm": 0.6789894700050354, + "learning_rate": 1.9617956944564763e-05, + "loss": 2.3144, + "step": 3109 + }, + { + "epoch": 0.1, + "grad_norm": 0.6904398202896118, + "learning_rate": 1.9617665903921446e-05, + "loss": 2.3166, + "step": 3110 + }, + { + "epoch": 0.1, + "grad_norm": 0.6732697486877441, + "learning_rate": 1.9617374754623376e-05, + "loss": 2.312, + "step": 3111 + }, + { + "epoch": 0.1, + "grad_norm": 0.662837564945221, + "learning_rate": 1.9617083496673838e-05, + "loss": 2.4164, + "step": 3112 + }, + { + "epoch": 0.1, + "grad_norm": 0.6572659611701965, + "learning_rate": 1.9616792130076132e-05, + "loss": 2.2993, + "step": 3113 + }, + { + "epoch": 0.1, + "grad_norm": 0.6960311532020569, + "learning_rate": 1.9616500654833542e-05, + "loss": 2.2621, + "step": 3114 + }, + { + "epoch": 0.1, + "grad_norm": 0.6972876191139221, + "learning_rate": 1.9616209070949365e-05, + "loss": 2.2946, + "step": 3115 + }, + { + "epoch": 0.1, + "grad_norm": 0.6960448622703552, + "learning_rate": 1.9615917378426893e-05, + "loss": 2.2674, + "step": 3116 + }, + { + "epoch": 0.1, + "grad_norm": 0.6815236806869507, + "learning_rate": 1.961562557726942e-05, + "loss": 2.2426, + "step": 3117 + }, + { + "epoch": 0.1, + "grad_norm": 0.6605592370033264, + "learning_rate": 1.9615333667480247e-05, + "loss": 2.2673, + "step": 3118 + }, + { + "epoch": 0.1, + "grad_norm": 0.7008052468299866, + "learning_rate": 1.961504164906267e-05, + "loss": 2.3058, + "step": 3119 + }, + { + "epoch": 0.1, + "grad_norm": 0.7082712054252625, + "learning_rate": 1.9614749522019986e-05, + "loss": 2.2543, + "step": 3120 + }, + { + "epoch": 0.1, + "grad_norm": 0.7027872204780579, + "learning_rate": 1.9614457286355496e-05, + "loss": 2.2355, + "step": 3121 + }, + { + "epoch": 0.1, + "grad_norm": 0.6617802977561951, + "learning_rate": 1.9614164942072505e-05, + "loss": 2.2881, + "step": 3122 + }, + { + "epoch": 0.1, + "grad_norm": 0.6827096939086914, + "learning_rate": 1.961387248917431e-05, + "loss": 2.3263, + "step": 3123 + }, + { + "epoch": 0.1, + "grad_norm": 0.6676981449127197, + "learning_rate": 1.961357992766422e-05, + "loss": 2.33, + "step": 3124 + }, + { + "epoch": 0.1, + "grad_norm": 0.670794665813446, + "learning_rate": 1.9613287257545533e-05, + "loss": 2.2564, + "step": 3125 + }, + { + "epoch": 0.1, + "grad_norm": 0.7067950367927551, + "learning_rate": 1.961299447882157e-05, + "loss": 2.3435, + "step": 3126 + }, + { + "epoch": 0.1, + "grad_norm": 0.664048969745636, + "learning_rate": 1.9612701591495618e-05, + "loss": 2.2498, + "step": 3127 + }, + { + "epoch": 0.1, + "grad_norm": 0.661981463432312, + "learning_rate": 1.9612408595571007e-05, + "loss": 2.315, + "step": 3128 + }, + { + "epoch": 0.1, + "grad_norm": 0.6529367566108704, + "learning_rate": 1.9612115491051033e-05, + "loss": 2.1863, + "step": 3129 + }, + { + "epoch": 0.1, + "grad_norm": 0.6726460456848145, + "learning_rate": 1.961182227793901e-05, + "loss": 2.3114, + "step": 3130 + }, + { + "epoch": 0.1, + "grad_norm": 0.6730542778968811, + "learning_rate": 1.9611528956238252e-05, + "loss": 2.3233, + "step": 3131 + }, + { + "epoch": 0.1, + "grad_norm": 0.6809111833572388, + "learning_rate": 1.9611235525952076e-05, + "loss": 2.2634, + "step": 3132 + }, + { + "epoch": 0.1, + "grad_norm": 0.6963350176811218, + "learning_rate": 1.9610941987083788e-05, + "loss": 2.2942, + "step": 3133 + }, + { + "epoch": 0.1, + "grad_norm": 0.6986297369003296, + "learning_rate": 1.9610648339636715e-05, + "loss": 2.2303, + "step": 3134 + }, + { + "epoch": 0.1, + "grad_norm": 0.6576271057128906, + "learning_rate": 1.961035458361417e-05, + "loss": 2.212, + "step": 3135 + }, + { + "epoch": 0.1, + "grad_norm": 0.6566599607467651, + "learning_rate": 1.961006071901947e-05, + "loss": 2.3151, + "step": 3136 + }, + { + "epoch": 0.1, + "grad_norm": 0.6749888062477112, + "learning_rate": 1.9609766745855933e-05, + "loss": 2.3049, + "step": 3137 + }, + { + "epoch": 0.1, + "grad_norm": 0.6851335167884827, + "learning_rate": 1.9609472664126885e-05, + "loss": 2.3199, + "step": 3138 + }, + { + "epoch": 0.1, + "grad_norm": 0.7235228419303894, + "learning_rate": 1.9609178473835647e-05, + "loss": 2.3395, + "step": 3139 + }, + { + "epoch": 0.1, + "grad_norm": 0.7386735677719116, + "learning_rate": 1.9608884174985542e-05, + "loss": 2.3669, + "step": 3140 + }, + { + "epoch": 0.1, + "grad_norm": 0.6980429887771606, + "learning_rate": 1.96085897675799e-05, + "loss": 2.3397, + "step": 3141 + }, + { + "epoch": 0.1, + "grad_norm": 0.6962876319885254, + "learning_rate": 1.9608295251622036e-05, + "loss": 2.3555, + "step": 3142 + }, + { + "epoch": 0.1, + "grad_norm": 0.682605504989624, + "learning_rate": 1.9608000627115282e-05, + "loss": 2.2796, + "step": 3143 + }, + { + "epoch": 0.1, + "grad_norm": 0.6582570672035217, + "learning_rate": 1.960770589406297e-05, + "loss": 2.2466, + "step": 3144 + }, + { + "epoch": 0.1, + "grad_norm": 0.7072826623916626, + "learning_rate": 1.9607411052468427e-05, + "loss": 2.3273, + "step": 3145 + }, + { + "epoch": 0.1, + "grad_norm": 0.6734939813613892, + "learning_rate": 1.9607116102334988e-05, + "loss": 2.2719, + "step": 3146 + }, + { + "epoch": 0.1, + "grad_norm": 0.6844536066055298, + "learning_rate": 1.960682104366598e-05, + "loss": 2.2639, + "step": 3147 + }, + { + "epoch": 0.1, + "grad_norm": 0.7324599027633667, + "learning_rate": 1.9606525876464734e-05, + "loss": 2.3159, + "step": 3148 + }, + { + "epoch": 0.1, + "grad_norm": 0.7275071740150452, + "learning_rate": 1.960623060073459e-05, + "loss": 2.3568, + "step": 3149 + }, + { + "epoch": 0.1, + "grad_norm": 0.6729439496994019, + "learning_rate": 1.9605935216478884e-05, + "loss": 2.3132, + "step": 3150 + }, + { + "epoch": 0.1, + "grad_norm": 0.65671706199646, + "learning_rate": 1.9605639723700953e-05, + "loss": 2.2849, + "step": 3151 + }, + { + "epoch": 0.1, + "grad_norm": 0.6889495849609375, + "learning_rate": 1.9605344122404132e-05, + "loss": 2.3272, + "step": 3152 + }, + { + "epoch": 0.1, + "grad_norm": 0.6683332920074463, + "learning_rate": 1.9605048412591762e-05, + "loss": 2.287, + "step": 3153 + }, + { + "epoch": 0.1, + "grad_norm": 0.6705678105354309, + "learning_rate": 1.9604752594267184e-05, + "loss": 2.2859, + "step": 3154 + }, + { + "epoch": 0.1, + "grad_norm": 0.6564631462097168, + "learning_rate": 1.960445666743374e-05, + "loss": 2.2636, + "step": 3155 + }, + { + "epoch": 0.11, + "grad_norm": 0.7232211828231812, + "learning_rate": 1.9604160632094778e-05, + "loss": 2.3698, + "step": 3156 + }, + { + "epoch": 0.11, + "grad_norm": 0.6949573159217834, + "learning_rate": 1.9603864488253632e-05, + "loss": 2.2941, + "step": 3157 + }, + { + "epoch": 0.11, + "grad_norm": 0.7094296216964722, + "learning_rate": 1.9603568235913654e-05, + "loss": 2.2363, + "step": 3158 + }, + { + "epoch": 0.11, + "grad_norm": 0.6770161986351013, + "learning_rate": 1.9603271875078194e-05, + "loss": 2.2922, + "step": 3159 + }, + { + "epoch": 0.11, + "grad_norm": 0.6891723871231079, + "learning_rate": 1.960297540575059e-05, + "loss": 2.2343, + "step": 3160 + }, + { + "epoch": 0.11, + "grad_norm": 0.6961461901664734, + "learning_rate": 1.9602678827934205e-05, + "loss": 2.2409, + "step": 3161 + }, + { + "epoch": 0.11, + "grad_norm": 0.6995030045509338, + "learning_rate": 1.960238214163238e-05, + "loss": 2.2295, + "step": 3162 + }, + { + "epoch": 0.11, + "grad_norm": 0.6646366119384766, + "learning_rate": 1.9602085346848468e-05, + "loss": 2.2904, + "step": 3163 + }, + { + "epoch": 0.11, + "grad_norm": 0.6777051687240601, + "learning_rate": 1.960178844358582e-05, + "loss": 2.2175, + "step": 3164 + }, + { + "epoch": 0.11, + "grad_norm": 0.6713035106658936, + "learning_rate": 1.9601491431847802e-05, + "loss": 2.284, + "step": 3165 + }, + { + "epoch": 0.11, + "grad_norm": 0.6949702501296997, + "learning_rate": 1.960119431163775e-05, + "loss": 2.3241, + "step": 3166 + }, + { + "epoch": 0.11, + "grad_norm": 0.6910507082939148, + "learning_rate": 1.960089708295904e-05, + "loss": 2.3641, + "step": 3167 + }, + { + "epoch": 0.11, + "grad_norm": 0.6850019097328186, + "learning_rate": 1.960059974581502e-05, + "loss": 2.3064, + "step": 3168 + }, + { + "epoch": 0.11, + "grad_norm": 0.6518955230712891, + "learning_rate": 1.9600302300209047e-05, + "loss": 2.3273, + "step": 3169 + }, + { + "epoch": 0.11, + "grad_norm": 0.7129683494567871, + "learning_rate": 1.960000474614449e-05, + "loss": 2.3703, + "step": 3170 + }, + { + "epoch": 0.11, + "grad_norm": 0.6644880771636963, + "learning_rate": 1.95997070836247e-05, + "loss": 2.294, + "step": 3171 + }, + { + "epoch": 0.11, + "grad_norm": 0.6693429350852966, + "learning_rate": 1.959940931265305e-05, + "loss": 2.2669, + "step": 3172 + }, + { + "epoch": 0.11, + "grad_norm": 0.6777902245521545, + "learning_rate": 1.9599111433232897e-05, + "loss": 2.2397, + "step": 3173 + }, + { + "epoch": 0.11, + "grad_norm": 0.6584378480911255, + "learning_rate": 1.9598813445367608e-05, + "loss": 2.2918, + "step": 3174 + }, + { + "epoch": 0.11, + "grad_norm": 0.6797993183135986, + "learning_rate": 1.9598515349060553e-05, + "loss": 2.308, + "step": 3175 + }, + { + "epoch": 0.11, + "grad_norm": 0.668185293674469, + "learning_rate": 1.9598217144315096e-05, + "loss": 2.2994, + "step": 3176 + }, + { + "epoch": 0.11, + "grad_norm": 0.6579134464263916, + "learning_rate": 1.9597918831134603e-05, + "loss": 2.2443, + "step": 3177 + }, + { + "epoch": 0.11, + "grad_norm": 0.6813787221908569, + "learning_rate": 1.9597620409522454e-05, + "loss": 2.2887, + "step": 3178 + }, + { + "epoch": 0.11, + "grad_norm": 0.6628063917160034, + "learning_rate": 1.959732187948201e-05, + "loss": 2.3068, + "step": 3179 + }, + { + "epoch": 0.11, + "grad_norm": 0.6778666377067566, + "learning_rate": 1.9597023241016647e-05, + "loss": 2.2634, + "step": 3180 + }, + { + "epoch": 0.11, + "grad_norm": 0.666515588760376, + "learning_rate": 1.9596724494129745e-05, + "loss": 2.272, + "step": 3181 + }, + { + "epoch": 0.11, + "grad_norm": 0.6696109175682068, + "learning_rate": 1.959642563882467e-05, + "loss": 2.3331, + "step": 3182 + }, + { + "epoch": 0.11, + "grad_norm": 0.6769323945045471, + "learning_rate": 1.9596126675104803e-05, + "loss": 2.2684, + "step": 3183 + }, + { + "epoch": 0.11, + "grad_norm": 0.671610414981842, + "learning_rate": 1.959582760297352e-05, + "loss": 2.2981, + "step": 3184 + }, + { + "epoch": 0.11, + "grad_norm": 0.6878125071525574, + "learning_rate": 1.95955284224342e-05, + "loss": 2.297, + "step": 3185 + }, + { + "epoch": 0.11, + "grad_norm": 0.7316403388977051, + "learning_rate": 1.9595229133490225e-05, + "loss": 2.2497, + "step": 3186 + }, + { + "epoch": 0.11, + "grad_norm": 0.7361128330230713, + "learning_rate": 1.9594929736144978e-05, + "loss": 2.3559, + "step": 3187 + }, + { + "epoch": 0.11, + "grad_norm": 0.6558111906051636, + "learning_rate": 1.959463023040183e-05, + "loss": 2.2835, + "step": 3188 + }, + { + "epoch": 0.11, + "grad_norm": 0.6527162790298462, + "learning_rate": 1.959433061626418e-05, + "loss": 2.2099, + "step": 3189 + }, + { + "epoch": 0.11, + "grad_norm": 0.6706349849700928, + "learning_rate": 1.9594030893735404e-05, + "loss": 2.3187, + "step": 3190 + }, + { + "epoch": 0.11, + "grad_norm": 0.6911870241165161, + "learning_rate": 1.9593731062818887e-05, + "loss": 2.3388, + "step": 3191 + }, + { + "epoch": 0.11, + "grad_norm": 0.6891999244689941, + "learning_rate": 1.959343112351802e-05, + "loss": 2.3054, + "step": 3192 + }, + { + "epoch": 0.11, + "grad_norm": 0.6826380491256714, + "learning_rate": 1.959313107583619e-05, + "loss": 2.2617, + "step": 3193 + }, + { + "epoch": 0.11, + "grad_norm": 0.7053672075271606, + "learning_rate": 1.9592830919776786e-05, + "loss": 2.299, + "step": 3194 + }, + { + "epoch": 0.11, + "grad_norm": 0.7277592420578003, + "learning_rate": 1.9592530655343202e-05, + "loss": 2.2939, + "step": 3195 + }, + { + "epoch": 0.11, + "grad_norm": 0.6787527799606323, + "learning_rate": 1.9592230282538828e-05, + "loss": 2.292, + "step": 3196 + }, + { + "epoch": 0.11, + "grad_norm": 0.6701645255088806, + "learning_rate": 1.959192980136706e-05, + "loss": 2.311, + "step": 3197 + }, + { + "epoch": 0.11, + "grad_norm": 0.6556048393249512, + "learning_rate": 1.9591629211831288e-05, + "loss": 2.2946, + "step": 3198 + }, + { + "epoch": 0.11, + "grad_norm": 0.6999236941337585, + "learning_rate": 1.9591328513934913e-05, + "loss": 2.3142, + "step": 3199 + }, + { + "epoch": 0.11, + "grad_norm": 0.7091421484947205, + "learning_rate": 1.9591027707681326e-05, + "loss": 2.2679, + "step": 3200 + }, + { + "epoch": 0.11, + "grad_norm": 0.696406900882721, + "learning_rate": 1.959072679307393e-05, + "loss": 2.3135, + "step": 3201 + }, + { + "epoch": 0.11, + "grad_norm": 0.7025538086891174, + "learning_rate": 1.9590425770116125e-05, + "loss": 2.3131, + "step": 3202 + }, + { + "epoch": 0.11, + "grad_norm": 0.6604133248329163, + "learning_rate": 1.959012463881131e-05, + "loss": 2.3091, + "step": 3203 + }, + { + "epoch": 0.11, + "grad_norm": 0.7291641235351562, + "learning_rate": 1.9589823399162887e-05, + "loss": 2.2842, + "step": 3204 + }, + { + "epoch": 0.11, + "grad_norm": 0.6853616833686829, + "learning_rate": 1.9589522051174257e-05, + "loss": 2.3785, + "step": 3205 + }, + { + "epoch": 0.11, + "grad_norm": 0.7002339959144592, + "learning_rate": 1.9589220594848826e-05, + "loss": 2.3104, + "step": 3206 + }, + { + "epoch": 0.11, + "grad_norm": 0.6704463958740234, + "learning_rate": 1.9588919030190006e-05, + "loss": 2.2511, + "step": 3207 + }, + { + "epoch": 0.11, + "grad_norm": 0.7219144701957703, + "learning_rate": 1.9588617357201198e-05, + "loss": 2.2918, + "step": 3208 + }, + { + "epoch": 0.11, + "grad_norm": 0.6766985058784485, + "learning_rate": 1.9588315575885806e-05, + "loss": 2.3221, + "step": 3209 + }, + { + "epoch": 0.11, + "grad_norm": 0.697449266910553, + "learning_rate": 1.9588013686247247e-05, + "loss": 2.2893, + "step": 3210 + }, + { + "epoch": 0.11, + "grad_norm": 0.6685243844985962, + "learning_rate": 1.958771168828893e-05, + "loss": 2.2684, + "step": 3211 + }, + { + "epoch": 0.11, + "grad_norm": 0.6941660046577454, + "learning_rate": 1.958740958201426e-05, + "loss": 2.2944, + "step": 3212 + }, + { + "epoch": 0.11, + "grad_norm": 0.6850084066390991, + "learning_rate": 1.958710736742666e-05, + "loss": 2.2579, + "step": 3213 + }, + { + "epoch": 0.11, + "grad_norm": 0.6781442165374756, + "learning_rate": 1.9586805044529536e-05, + "loss": 2.284, + "step": 3214 + }, + { + "epoch": 0.11, + "grad_norm": 0.6558775305747986, + "learning_rate": 1.958650261332631e-05, + "loss": 2.3083, + "step": 3215 + }, + { + "epoch": 0.11, + "grad_norm": 0.7022719979286194, + "learning_rate": 1.9586200073820394e-05, + "loss": 2.3518, + "step": 3216 + }, + { + "epoch": 0.11, + "grad_norm": 0.6738336682319641, + "learning_rate": 1.9585897426015207e-05, + "loss": 2.2363, + "step": 3217 + }, + { + "epoch": 0.11, + "grad_norm": 0.6709434986114502, + "learning_rate": 1.958559466991417e-05, + "loss": 2.2673, + "step": 3218 + }, + { + "epoch": 0.11, + "grad_norm": 0.6536407470703125, + "learning_rate": 1.9585291805520702e-05, + "loss": 2.3066, + "step": 3219 + }, + { + "epoch": 0.11, + "grad_norm": 0.6876931190490723, + "learning_rate": 1.9584988832838227e-05, + "loss": 2.3874, + "step": 3220 + }, + { + "epoch": 0.11, + "grad_norm": 0.6653323173522949, + "learning_rate": 1.9584685751870162e-05, + "loss": 2.3723, + "step": 3221 + }, + { + "epoch": 0.11, + "grad_norm": 0.6823737025260925, + "learning_rate": 1.9584382562619937e-05, + "loss": 2.3453, + "step": 3222 + }, + { + "epoch": 0.11, + "grad_norm": 0.6831386685371399, + "learning_rate": 1.9584079265090975e-05, + "loss": 2.2667, + "step": 3223 + }, + { + "epoch": 0.11, + "grad_norm": 0.6659426093101501, + "learning_rate": 1.95837758592867e-05, + "loss": 2.233, + "step": 3224 + }, + { + "epoch": 0.11, + "grad_norm": 0.6855709552764893, + "learning_rate": 1.9583472345210544e-05, + "loss": 2.2367, + "step": 3225 + }, + { + "epoch": 0.11, + "grad_norm": 0.6779314875602722, + "learning_rate": 1.9583168722865932e-05, + "loss": 2.3187, + "step": 3226 + }, + { + "epoch": 0.11, + "grad_norm": 0.7188411951065063, + "learning_rate": 1.9582864992256295e-05, + "loss": 2.2545, + "step": 3227 + }, + { + "epoch": 0.11, + "grad_norm": 0.6842631101608276, + "learning_rate": 1.9582561153385067e-05, + "loss": 2.2459, + "step": 3228 + }, + { + "epoch": 0.11, + "grad_norm": 0.7029978632926941, + "learning_rate": 1.958225720625568e-05, + "loss": 2.2771, + "step": 3229 + }, + { + "epoch": 0.11, + "grad_norm": 0.7499919533729553, + "learning_rate": 1.958195315087157e-05, + "loss": 2.3098, + "step": 3230 + }, + { + "epoch": 0.11, + "grad_norm": 0.6602301597595215, + "learning_rate": 1.9581648987236165e-05, + "loss": 2.2927, + "step": 3231 + }, + { + "epoch": 0.11, + "grad_norm": 0.6747449040412903, + "learning_rate": 1.9581344715352902e-05, + "loss": 2.2869, + "step": 3232 + }, + { + "epoch": 0.11, + "grad_norm": 0.6745195388793945, + "learning_rate": 1.958104033522523e-05, + "loss": 2.3307, + "step": 3233 + }, + { + "epoch": 0.11, + "grad_norm": 0.7078105211257935, + "learning_rate": 1.958073584685657e-05, + "loss": 2.2831, + "step": 3234 + }, + { + "epoch": 0.11, + "grad_norm": 0.6723508834838867, + "learning_rate": 1.9580431250250376e-05, + "loss": 2.2534, + "step": 3235 + }, + { + "epoch": 0.11, + "grad_norm": 0.681725800037384, + "learning_rate": 1.958012654541008e-05, + "loss": 2.3029, + "step": 3236 + }, + { + "epoch": 0.11, + "grad_norm": 0.6951661109924316, + "learning_rate": 1.9579821732339136e-05, + "loss": 2.3438, + "step": 3237 + }, + { + "epoch": 0.11, + "grad_norm": 0.6803429126739502, + "learning_rate": 1.9579516811040977e-05, + "loss": 2.2932, + "step": 3238 + }, + { + "epoch": 0.11, + "grad_norm": 0.6646954417228699, + "learning_rate": 1.9579211781519052e-05, + "loss": 2.2873, + "step": 3239 + }, + { + "epoch": 0.11, + "grad_norm": 0.6637921333312988, + "learning_rate": 1.9578906643776807e-05, + "loss": 2.269, + "step": 3240 + }, + { + "epoch": 0.11, + "grad_norm": 0.6860204339027405, + "learning_rate": 1.9578601397817686e-05, + "loss": 2.2794, + "step": 3241 + }, + { + "epoch": 0.11, + "grad_norm": 0.6754475831985474, + "learning_rate": 1.9578296043645142e-05, + "loss": 2.3017, + "step": 3242 + }, + { + "epoch": 0.11, + "grad_norm": 0.6865530610084534, + "learning_rate": 1.9577990581262622e-05, + "loss": 2.3192, + "step": 3243 + }, + { + "epoch": 0.11, + "grad_norm": 0.6550263166427612, + "learning_rate": 1.9577685010673577e-05, + "loss": 2.2488, + "step": 3244 + }, + { + "epoch": 0.11, + "grad_norm": 0.6935075521469116, + "learning_rate": 1.957737933188146e-05, + "loss": 2.2327, + "step": 3245 + }, + { + "epoch": 0.11, + "grad_norm": 0.6526719331741333, + "learning_rate": 1.9577073544889728e-05, + "loss": 2.307, + "step": 3246 + }, + { + "epoch": 0.11, + "grad_norm": 0.6713154315948486, + "learning_rate": 1.9576767649701828e-05, + "loss": 2.2876, + "step": 3247 + }, + { + "epoch": 0.11, + "grad_norm": 0.7009166479110718, + "learning_rate": 1.9576461646321217e-05, + "loss": 2.3952, + "step": 3248 + }, + { + "epoch": 0.11, + "grad_norm": 0.6870133280754089, + "learning_rate": 1.957615553475136e-05, + "loss": 2.3105, + "step": 3249 + }, + { + "epoch": 0.11, + "grad_norm": 0.6648132801055908, + "learning_rate": 1.9575849314995707e-05, + "loss": 2.29, + "step": 3250 + }, + { + "epoch": 0.11, + "grad_norm": 0.6671931147575378, + "learning_rate": 1.957554298705772e-05, + "loss": 2.2932, + "step": 3251 + }, + { + "epoch": 0.11, + "grad_norm": 0.7125139236450195, + "learning_rate": 1.957523655094086e-05, + "loss": 2.3547, + "step": 3252 + }, + { + "epoch": 0.11, + "grad_norm": 0.6626330018043518, + "learning_rate": 1.9574930006648592e-05, + "loss": 2.3173, + "step": 3253 + }, + { + "epoch": 0.11, + "grad_norm": 0.6882072687149048, + "learning_rate": 1.9574623354184374e-05, + "loss": 2.2967, + "step": 3254 + }, + { + "epoch": 0.11, + "grad_norm": 0.6339793801307678, + "learning_rate": 1.9574316593551674e-05, + "loss": 2.2545, + "step": 3255 + }, + { + "epoch": 0.11, + "grad_norm": 0.6647969484329224, + "learning_rate": 1.9574009724753954e-05, + "loss": 2.3258, + "step": 3256 + }, + { + "epoch": 0.11, + "grad_norm": 0.6963188648223877, + "learning_rate": 1.9573702747794687e-05, + "loss": 2.3536, + "step": 3257 + }, + { + "epoch": 0.11, + "grad_norm": 0.6629931926727295, + "learning_rate": 1.9573395662677332e-05, + "loss": 2.3059, + "step": 3258 + }, + { + "epoch": 0.11, + "grad_norm": 0.6581913232803345, + "learning_rate": 1.957308846940537e-05, + "loss": 2.341, + "step": 3259 + }, + { + "epoch": 0.11, + "grad_norm": 0.6464890241622925, + "learning_rate": 1.9572781167982258e-05, + "loss": 2.2698, + "step": 3260 + }, + { + "epoch": 0.11, + "grad_norm": 0.6877791285514832, + "learning_rate": 1.9572473758411477e-05, + "loss": 2.3588, + "step": 3261 + }, + { + "epoch": 0.11, + "grad_norm": 0.6642687916755676, + "learning_rate": 1.9572166240696496e-05, + "loss": 2.2444, + "step": 3262 + }, + { + "epoch": 0.11, + "grad_norm": 0.6501506567001343, + "learning_rate": 1.9571858614840793e-05, + "loss": 2.2833, + "step": 3263 + }, + { + "epoch": 0.11, + "grad_norm": 0.6657009720802307, + "learning_rate": 1.957155088084784e-05, + "loss": 2.2363, + "step": 3264 + }, + { + "epoch": 0.11, + "grad_norm": 0.6632678508758545, + "learning_rate": 1.9571243038721116e-05, + "loss": 2.3072, + "step": 3265 + }, + { + "epoch": 0.11, + "grad_norm": 0.7015467882156372, + "learning_rate": 1.95709350884641e-05, + "loss": 2.2846, + "step": 3266 + }, + { + "epoch": 0.11, + "grad_norm": 0.7435239553451538, + "learning_rate": 1.9570627030080263e-05, + "loss": 2.3322, + "step": 3267 + }, + { + "epoch": 0.11, + "grad_norm": 0.7427944540977478, + "learning_rate": 1.9570318863573092e-05, + "loss": 2.2779, + "step": 3268 + }, + { + "epoch": 0.11, + "grad_norm": 0.6989432573318481, + "learning_rate": 1.957001058894607e-05, + "loss": 2.2088, + "step": 3269 + }, + { + "epoch": 0.11, + "grad_norm": 0.6986260414123535, + "learning_rate": 1.9569702206202675e-05, + "loss": 2.3343, + "step": 3270 + }, + { + "epoch": 0.11, + "grad_norm": 0.6584373712539673, + "learning_rate": 1.9569393715346392e-05, + "loss": 2.3144, + "step": 3271 + }, + { + "epoch": 0.11, + "grad_norm": 0.6659768223762512, + "learning_rate": 1.9569085116380705e-05, + "loss": 2.2224, + "step": 3272 + }, + { + "epoch": 0.11, + "grad_norm": 0.7299159169197083, + "learning_rate": 1.9568776409309108e-05, + "loss": 2.3413, + "step": 3273 + }, + { + "epoch": 0.11, + "grad_norm": 0.6683459877967834, + "learning_rate": 1.956846759413508e-05, + "loss": 2.231, + "step": 3274 + }, + { + "epoch": 0.11, + "grad_norm": 0.6688826084136963, + "learning_rate": 1.956815867086211e-05, + "loss": 2.2433, + "step": 3275 + }, + { + "epoch": 0.11, + "grad_norm": 0.675812304019928, + "learning_rate": 1.9567849639493697e-05, + "loss": 2.3089, + "step": 3276 + }, + { + "epoch": 0.11, + "grad_norm": 0.6878451704978943, + "learning_rate": 1.9567540500033325e-05, + "loss": 2.3657, + "step": 3277 + }, + { + "epoch": 0.11, + "grad_norm": 0.6912396550178528, + "learning_rate": 1.9567231252484485e-05, + "loss": 2.313, + "step": 3278 + }, + { + "epoch": 0.11, + "grad_norm": 0.6846947073936462, + "learning_rate": 1.9566921896850673e-05, + "loss": 2.2733, + "step": 3279 + }, + { + "epoch": 0.11, + "grad_norm": 0.6598116755485535, + "learning_rate": 1.9566612433135383e-05, + "loss": 2.2762, + "step": 3280 + }, + { + "epoch": 0.11, + "grad_norm": 0.6648226380348206, + "learning_rate": 1.9566302861342117e-05, + "loss": 2.3119, + "step": 3281 + }, + { + "epoch": 0.11, + "grad_norm": 0.6883125305175781, + "learning_rate": 1.9565993181474362e-05, + "loss": 2.2338, + "step": 3282 + }, + { + "epoch": 0.11, + "grad_norm": 0.6678287982940674, + "learning_rate": 1.9565683393535625e-05, + "loss": 2.3436, + "step": 3283 + }, + { + "epoch": 0.11, + "grad_norm": 0.6640600562095642, + "learning_rate": 1.9565373497529406e-05, + "loss": 2.278, + "step": 3284 + }, + { + "epoch": 0.11, + "grad_norm": 0.7145614624023438, + "learning_rate": 1.9565063493459198e-05, + "loss": 2.3214, + "step": 3285 + }, + { + "epoch": 0.11, + "grad_norm": 0.6901586651802063, + "learning_rate": 1.9564753381328515e-05, + "loss": 2.2573, + "step": 3286 + }, + { + "epoch": 0.11, + "grad_norm": 0.7153841853141785, + "learning_rate": 1.956444316114085e-05, + "loss": 2.2895, + "step": 3287 + }, + { + "epoch": 0.11, + "grad_norm": 0.6618996262550354, + "learning_rate": 1.9564132832899707e-05, + "loss": 2.281, + "step": 3288 + }, + { + "epoch": 0.11, + "grad_norm": 0.6791805028915405, + "learning_rate": 1.9563822396608603e-05, + "loss": 2.2662, + "step": 3289 + }, + { + "epoch": 0.11, + "grad_norm": 0.6669474244117737, + "learning_rate": 1.9563511852271033e-05, + "loss": 2.2253, + "step": 3290 + }, + { + "epoch": 0.11, + "grad_norm": 0.659118115901947, + "learning_rate": 1.9563201199890514e-05, + "loss": 2.2917, + "step": 3291 + }, + { + "epoch": 0.11, + "grad_norm": 0.6741810441017151, + "learning_rate": 1.9562890439470554e-05, + "loss": 2.3494, + "step": 3292 + }, + { + "epoch": 0.11, + "grad_norm": 0.7016229033470154, + "learning_rate": 1.956257957101466e-05, + "loss": 2.3065, + "step": 3293 + }, + { + "epoch": 0.11, + "grad_norm": 0.6853245496749878, + "learning_rate": 1.9562268594526347e-05, + "loss": 2.3762, + "step": 3294 + }, + { + "epoch": 0.11, + "grad_norm": 0.6615698933601379, + "learning_rate": 1.9561957510009128e-05, + "loss": 2.2544, + "step": 3295 + }, + { + "epoch": 0.11, + "grad_norm": 0.7177711129188538, + "learning_rate": 1.9561646317466514e-05, + "loss": 2.3078, + "step": 3296 + }, + { + "epoch": 0.11, + "grad_norm": 0.6854174137115479, + "learning_rate": 1.956133501690203e-05, + "loss": 2.3099, + "step": 3297 + }, + { + "epoch": 0.11, + "grad_norm": 0.6666294932365417, + "learning_rate": 1.956102360831918e-05, + "loss": 2.312, + "step": 3298 + }, + { + "epoch": 0.11, + "grad_norm": 0.6575854420661926, + "learning_rate": 1.9560712091721488e-05, + "loss": 2.2546, + "step": 3299 + }, + { + "epoch": 0.11, + "grad_norm": 0.695037305355072, + "learning_rate": 1.9560400467112477e-05, + "loss": 2.266, + "step": 3300 + }, + { + "epoch": 0.11, + "grad_norm": 0.6751512289047241, + "learning_rate": 1.9560088734495665e-05, + "loss": 2.2428, + "step": 3301 + }, + { + "epoch": 0.11, + "grad_norm": 0.7023912668228149, + "learning_rate": 1.955977689387457e-05, + "loss": 2.2193, + "step": 3302 + }, + { + "epoch": 0.11, + "grad_norm": 0.6935909390449524, + "learning_rate": 1.9559464945252722e-05, + "loss": 2.3496, + "step": 3303 + }, + { + "epoch": 0.11, + "grad_norm": 0.7226423025131226, + "learning_rate": 1.955915288863364e-05, + "loss": 2.3311, + "step": 3304 + }, + { + "epoch": 0.11, + "grad_norm": 0.688186526298523, + "learning_rate": 1.9558840724020852e-05, + "loss": 2.2579, + "step": 3305 + }, + { + "epoch": 0.11, + "grad_norm": 0.7643429040908813, + "learning_rate": 1.955852845141788e-05, + "loss": 2.3113, + "step": 3306 + }, + { + "epoch": 0.11, + "grad_norm": 0.6757791638374329, + "learning_rate": 1.9558216070828257e-05, + "loss": 2.3065, + "step": 3307 + }, + { + "epoch": 0.11, + "grad_norm": 0.6753725409507751, + "learning_rate": 1.9557903582255513e-05, + "loss": 2.3026, + "step": 3308 + }, + { + "epoch": 0.11, + "grad_norm": 0.6865319013595581, + "learning_rate": 1.9557590985703174e-05, + "loss": 2.3241, + "step": 3309 + }, + { + "epoch": 0.11, + "grad_norm": 0.6682195067405701, + "learning_rate": 1.9557278281174775e-05, + "loss": 2.2624, + "step": 3310 + }, + { + "epoch": 0.11, + "grad_norm": 0.6861039996147156, + "learning_rate": 1.9556965468673847e-05, + "loss": 2.2788, + "step": 3311 + }, + { + "epoch": 0.11, + "grad_norm": 0.6809138059616089, + "learning_rate": 1.9556652548203922e-05, + "loss": 2.2352, + "step": 3312 + }, + { + "epoch": 0.11, + "grad_norm": 0.6755217909812927, + "learning_rate": 1.9556339519768535e-05, + "loss": 2.2645, + "step": 3313 + }, + { + "epoch": 0.11, + "grad_norm": 0.6843271255493164, + "learning_rate": 1.9556026383371227e-05, + "loss": 2.3389, + "step": 3314 + }, + { + "epoch": 0.11, + "grad_norm": 0.6822518110275269, + "learning_rate": 1.9555713139015534e-05, + "loss": 2.2334, + "step": 3315 + }, + { + "epoch": 0.11, + "grad_norm": 0.6699937582015991, + "learning_rate": 1.9555399786704994e-05, + "loss": 2.2655, + "step": 3316 + }, + { + "epoch": 0.11, + "grad_norm": 0.6926649808883667, + "learning_rate": 1.955508632644315e-05, + "loss": 2.2775, + "step": 3317 + }, + { + "epoch": 0.11, + "grad_norm": 0.6768621206283569, + "learning_rate": 1.9554772758233535e-05, + "loss": 2.2864, + "step": 3318 + }, + { + "epoch": 0.11, + "grad_norm": 0.6527018547058105, + "learning_rate": 1.95544590820797e-05, + "loss": 2.3011, + "step": 3319 + }, + { + "epoch": 0.11, + "grad_norm": 0.6702751517295837, + "learning_rate": 1.9554145297985187e-05, + "loss": 2.3541, + "step": 3320 + }, + { + "epoch": 0.11, + "grad_norm": 0.6950775384902954, + "learning_rate": 1.9553831405953537e-05, + "loss": 2.3011, + "step": 3321 + }, + { + "epoch": 0.11, + "grad_norm": 0.6607711911201477, + "learning_rate": 1.95535174059883e-05, + "loss": 2.2887, + "step": 3322 + }, + { + "epoch": 0.11, + "grad_norm": 0.6950490474700928, + "learning_rate": 1.955320329809302e-05, + "loss": 2.3431, + "step": 3323 + }, + { + "epoch": 0.11, + "grad_norm": 0.6859994530677795, + "learning_rate": 1.955288908227125e-05, + "loss": 2.27, + "step": 3324 + }, + { + "epoch": 0.11, + "grad_norm": 0.6716843247413635, + "learning_rate": 1.9552574758526538e-05, + "loss": 2.1834, + "step": 3325 + }, + { + "epoch": 0.11, + "grad_norm": 0.700272262096405, + "learning_rate": 1.9552260326862438e-05, + "loss": 2.228, + "step": 3326 + }, + { + "epoch": 0.11, + "grad_norm": 0.6992161870002747, + "learning_rate": 1.9551945787282496e-05, + "loss": 2.3421, + "step": 3327 + }, + { + "epoch": 0.11, + "grad_norm": 0.6473497152328491, + "learning_rate": 1.9551631139790264e-05, + "loss": 2.2662, + "step": 3328 + }, + { + "epoch": 0.11, + "grad_norm": 0.6414010524749756, + "learning_rate": 1.955131638438931e-05, + "loss": 2.2617, + "step": 3329 + }, + { + "epoch": 0.11, + "grad_norm": 0.6958712339401245, + "learning_rate": 1.9551001521083176e-05, + "loss": 2.2968, + "step": 3330 + }, + { + "epoch": 0.11, + "grad_norm": 0.6657890677452087, + "learning_rate": 1.9550686549875423e-05, + "loss": 2.2757, + "step": 3331 + }, + { + "epoch": 0.11, + "grad_norm": 0.7069322466850281, + "learning_rate": 1.9550371470769615e-05, + "loss": 2.3166, + "step": 3332 + }, + { + "epoch": 0.11, + "grad_norm": 0.7049700617790222, + "learning_rate": 1.95500562837693e-05, + "loss": 2.3427, + "step": 3333 + }, + { + "epoch": 0.11, + "grad_norm": 0.6740976572036743, + "learning_rate": 1.9549740988878053e-05, + "loss": 2.2906, + "step": 3334 + }, + { + "epoch": 0.11, + "grad_norm": 0.6662238240242004, + "learning_rate": 1.9549425586099425e-05, + "loss": 2.3191, + "step": 3335 + }, + { + "epoch": 0.11, + "grad_norm": 0.6581270694732666, + "learning_rate": 1.9549110075436984e-05, + "loss": 2.2896, + "step": 3336 + }, + { + "epoch": 0.11, + "grad_norm": 0.6574767827987671, + "learning_rate": 1.9548794456894297e-05, + "loss": 2.3268, + "step": 3337 + }, + { + "epoch": 0.11, + "grad_norm": 0.7186704874038696, + "learning_rate": 1.9548478730474923e-05, + "loss": 2.3037, + "step": 3338 + }, + { + "epoch": 0.11, + "grad_norm": 0.759251058101654, + "learning_rate": 1.9548162896182433e-05, + "loss": 2.2765, + "step": 3339 + }, + { + "epoch": 0.11, + "grad_norm": 0.6931256055831909, + "learning_rate": 1.9547846954020393e-05, + "loss": 2.2978, + "step": 3340 + }, + { + "epoch": 0.11, + "grad_norm": 0.6591857075691223, + "learning_rate": 1.9547530903992377e-05, + "loss": 2.3768, + "step": 3341 + }, + { + "epoch": 0.11, + "grad_norm": 0.710225522518158, + "learning_rate": 1.954721474610195e-05, + "loss": 2.2768, + "step": 3342 + }, + { + "epoch": 0.11, + "grad_norm": 0.7074884176254272, + "learning_rate": 1.9546898480352685e-05, + "loss": 2.3369, + "step": 3343 + }, + { + "epoch": 0.11, + "grad_norm": 0.7213272452354431, + "learning_rate": 1.9546582106748158e-05, + "loss": 2.3301, + "step": 3344 + }, + { + "epoch": 0.11, + "grad_norm": 0.6728893518447876, + "learning_rate": 1.954626562529194e-05, + "loss": 2.3361, + "step": 3345 + }, + { + "epoch": 0.11, + "grad_norm": 0.6933442950248718, + "learning_rate": 1.9545949035987607e-05, + "loss": 2.2059, + "step": 3346 + }, + { + "epoch": 0.11, + "grad_norm": 0.6774097084999084, + "learning_rate": 1.954563233883874e-05, + "loss": 2.3017, + "step": 3347 + }, + { + "epoch": 0.11, + "grad_norm": 0.6783869862556458, + "learning_rate": 1.954531553384891e-05, + "loss": 2.2787, + "step": 3348 + }, + { + "epoch": 0.11, + "grad_norm": 0.6795644164085388, + "learning_rate": 1.9544998621021702e-05, + "loss": 2.2513, + "step": 3349 + }, + { + "epoch": 0.11, + "grad_norm": 0.6820704936981201, + "learning_rate": 1.9544681600360687e-05, + "loss": 2.2568, + "step": 3350 + }, + { + "epoch": 0.11, + "grad_norm": 0.7162044644355774, + "learning_rate": 1.9544364471869458e-05, + "loss": 2.3018, + "step": 3351 + }, + { + "epoch": 0.11, + "grad_norm": 0.6596421003341675, + "learning_rate": 1.9544047235551592e-05, + "loss": 2.2835, + "step": 3352 + }, + { + "epoch": 0.11, + "grad_norm": 0.6737750768661499, + "learning_rate": 1.9543729891410674e-05, + "loss": 2.2869, + "step": 3353 + }, + { + "epoch": 0.11, + "grad_norm": 0.6758458018302917, + "learning_rate": 1.9543412439450288e-05, + "loss": 2.2493, + "step": 3354 + }, + { + "epoch": 0.11, + "grad_norm": 0.6500502228736877, + "learning_rate": 1.9543094879674022e-05, + "loss": 2.2356, + "step": 3355 + }, + { + "epoch": 0.11, + "grad_norm": 0.6987592577934265, + "learning_rate": 1.954277721208546e-05, + "loss": 2.3173, + "step": 3356 + }, + { + "epoch": 0.11, + "grad_norm": 0.698062002658844, + "learning_rate": 1.9542459436688198e-05, + "loss": 2.3035, + "step": 3357 + }, + { + "epoch": 0.11, + "grad_norm": 0.7098769545555115, + "learning_rate": 1.954214155348582e-05, + "loss": 2.2808, + "step": 3358 + }, + { + "epoch": 0.11, + "grad_norm": 0.7098121643066406, + "learning_rate": 1.954182356248192e-05, + "loss": 2.2999, + "step": 3359 + }, + { + "epoch": 0.11, + "grad_norm": 0.7207874655723572, + "learning_rate": 1.9541505463680092e-05, + "loss": 2.2884, + "step": 3360 + }, + { + "epoch": 0.11, + "grad_norm": 0.6703974604606628, + "learning_rate": 1.954118725708392e-05, + "loss": 2.2689, + "step": 3361 + }, + { + "epoch": 0.11, + "grad_norm": 0.6942721009254456, + "learning_rate": 1.954086894269701e-05, + "loss": 2.3201, + "step": 3362 + }, + { + "epoch": 0.11, + "grad_norm": 0.6930775046348572, + "learning_rate": 1.9540550520522953e-05, + "loss": 2.285, + "step": 3363 + }, + { + "epoch": 0.11, + "grad_norm": 0.6868101358413696, + "learning_rate": 1.954023199056535e-05, + "loss": 2.2863, + "step": 3364 + }, + { + "epoch": 0.11, + "grad_norm": 0.6652754545211792, + "learning_rate": 1.9539913352827794e-05, + "loss": 2.206, + "step": 3365 + }, + { + "epoch": 0.11, + "grad_norm": 0.6693503260612488, + "learning_rate": 1.953959460731389e-05, + "loss": 2.1681, + "step": 3366 + }, + { + "epoch": 0.11, + "grad_norm": 0.6639902591705322, + "learning_rate": 1.9539275754027235e-05, + "loss": 2.2197, + "step": 3367 + }, + { + "epoch": 0.11, + "grad_norm": 0.6737970113754272, + "learning_rate": 1.953895679297144e-05, + "loss": 2.3076, + "step": 3368 + }, + { + "epoch": 0.11, + "grad_norm": 0.6923253536224365, + "learning_rate": 1.953863772415009e-05, + "loss": 2.3116, + "step": 3369 + }, + { + "epoch": 0.11, + "grad_norm": 0.7079081535339355, + "learning_rate": 1.953831854756681e-05, + "loss": 2.3126, + "step": 3370 + }, + { + "epoch": 0.11, + "grad_norm": 0.6975604891777039, + "learning_rate": 1.9537999263225194e-05, + "loss": 2.2651, + "step": 3371 + }, + { + "epoch": 0.11, + "grad_norm": 0.6655405163764954, + "learning_rate": 1.9537679871128853e-05, + "loss": 2.3007, + "step": 3372 + }, + { + "epoch": 0.11, + "grad_norm": 0.6903324127197266, + "learning_rate": 1.953736037128139e-05, + "loss": 2.3234, + "step": 3373 + }, + { + "epoch": 0.11, + "grad_norm": 0.6861885190010071, + "learning_rate": 1.9537040763686422e-05, + "loss": 2.3528, + "step": 3374 + }, + { + "epoch": 0.11, + "grad_norm": 0.6907678246498108, + "learning_rate": 1.953672104834756e-05, + "loss": 2.3082, + "step": 3375 + }, + { + "epoch": 0.11, + "grad_norm": 0.6754046082496643, + "learning_rate": 1.953640122526841e-05, + "loss": 2.233, + "step": 3376 + }, + { + "epoch": 0.11, + "grad_norm": 0.6609771847724915, + "learning_rate": 1.953608129445259e-05, + "loss": 2.3472, + "step": 3377 + }, + { + "epoch": 0.11, + "grad_norm": 0.6661567687988281, + "learning_rate": 1.953576125590371e-05, + "loss": 2.2698, + "step": 3378 + }, + { + "epoch": 0.11, + "grad_norm": 0.681016206741333, + "learning_rate": 1.9535441109625387e-05, + "loss": 2.2685, + "step": 3379 + }, + { + "epoch": 0.11, + "grad_norm": 0.6722439527511597, + "learning_rate": 1.9535120855621238e-05, + "loss": 2.2453, + "step": 3380 + }, + { + "epoch": 0.11, + "grad_norm": 0.6440585255622864, + "learning_rate": 1.9534800493894884e-05, + "loss": 2.3155, + "step": 3381 + }, + { + "epoch": 0.11, + "grad_norm": 0.6670352220535278, + "learning_rate": 1.953448002444994e-05, + "loss": 2.3069, + "step": 3382 + }, + { + "epoch": 0.11, + "grad_norm": 0.6978661417961121, + "learning_rate": 1.953415944729003e-05, + "loss": 2.3057, + "step": 3383 + }, + { + "epoch": 0.11, + "grad_norm": 0.6492437720298767, + "learning_rate": 1.9533838762418774e-05, + "loss": 2.335, + "step": 3384 + }, + { + "epoch": 0.11, + "grad_norm": 0.6747755408287048, + "learning_rate": 1.9533517969839794e-05, + "loss": 2.2508, + "step": 3385 + }, + { + "epoch": 0.11, + "grad_norm": 0.6762723922729492, + "learning_rate": 1.953319706955672e-05, + "loss": 2.3085, + "step": 3386 + }, + { + "epoch": 0.11, + "grad_norm": 0.6577224731445312, + "learning_rate": 1.9532876061573164e-05, + "loss": 2.232, + "step": 3387 + }, + { + "epoch": 0.11, + "grad_norm": 0.6669553518295288, + "learning_rate": 1.953255494589277e-05, + "loss": 2.267, + "step": 3388 + }, + { + "epoch": 0.11, + "grad_norm": 0.7034344673156738, + "learning_rate": 1.953223372251915e-05, + "loss": 2.3281, + "step": 3389 + }, + { + "epoch": 0.11, + "grad_norm": 0.712338924407959, + "learning_rate": 1.9531912391455942e-05, + "loss": 2.2683, + "step": 3390 + }, + { + "epoch": 0.11, + "grad_norm": 0.6566740274429321, + "learning_rate": 1.9531590952706776e-05, + "loss": 2.2568, + "step": 3391 + }, + { + "epoch": 0.11, + "grad_norm": 0.7083081007003784, + "learning_rate": 1.953126940627528e-05, + "loss": 2.3304, + "step": 3392 + }, + { + "epoch": 0.11, + "grad_norm": 0.6720786690711975, + "learning_rate": 1.9530947752165086e-05, + "loss": 2.2513, + "step": 3393 + }, + { + "epoch": 0.11, + "grad_norm": 0.7112762331962585, + "learning_rate": 1.9530625990379834e-05, + "loss": 2.3022, + "step": 3394 + }, + { + "epoch": 0.11, + "grad_norm": 0.6996901631355286, + "learning_rate": 1.953030412092315e-05, + "loss": 2.2459, + "step": 3395 + }, + { + "epoch": 0.11, + "grad_norm": 0.7056000828742981, + "learning_rate": 1.952998214379868e-05, + "loss": 2.2842, + "step": 3396 + }, + { + "epoch": 0.11, + "grad_norm": 0.7128042578697205, + "learning_rate": 1.9529660059010056e-05, + "loss": 2.2868, + "step": 3397 + }, + { + "epoch": 0.11, + "grad_norm": 0.6723177433013916, + "learning_rate": 1.9529337866560917e-05, + "loss": 2.3254, + "step": 3398 + }, + { + "epoch": 0.11, + "grad_norm": 0.6917299628257751, + "learning_rate": 1.95290155664549e-05, + "loss": 2.3654, + "step": 3399 + }, + { + "epoch": 0.11, + "grad_norm": 0.6795265078544617, + "learning_rate": 1.9528693158695654e-05, + "loss": 2.2848, + "step": 3400 + }, + { + "epoch": 0.11, + "grad_norm": 0.6936113834381104, + "learning_rate": 1.9528370643286818e-05, + "loss": 2.2222, + "step": 3401 + }, + { + "epoch": 0.11, + "grad_norm": 0.7027393579483032, + "learning_rate": 1.952804802023203e-05, + "loss": 2.364, + "step": 3402 + }, + { + "epoch": 0.11, + "grad_norm": 0.7291425466537476, + "learning_rate": 1.9527725289534944e-05, + "loss": 2.2969, + "step": 3403 + }, + { + "epoch": 0.11, + "grad_norm": 0.6756417751312256, + "learning_rate": 1.95274024511992e-05, + "loss": 2.3043, + "step": 3404 + }, + { + "epoch": 0.11, + "grad_norm": 0.6620833873748779, + "learning_rate": 1.9527079505228445e-05, + "loss": 2.307, + "step": 3405 + }, + { + "epoch": 0.11, + "grad_norm": 0.6577024459838867, + "learning_rate": 1.9526756451626333e-05, + "loss": 2.3146, + "step": 3406 + }, + { + "epoch": 0.11, + "grad_norm": 0.6745875477790833, + "learning_rate": 1.9526433290396505e-05, + "loss": 2.2838, + "step": 3407 + }, + { + "epoch": 0.11, + "grad_norm": 0.6589615345001221, + "learning_rate": 1.952611002154262e-05, + "loss": 2.1902, + "step": 3408 + }, + { + "epoch": 0.11, + "grad_norm": 0.7146021723747253, + "learning_rate": 1.9525786645068326e-05, + "loss": 2.2918, + "step": 3409 + }, + { + "epoch": 0.11, + "grad_norm": 0.6813265681266785, + "learning_rate": 1.9525463160977277e-05, + "loss": 2.3083, + "step": 3410 + }, + { + "epoch": 0.11, + "grad_norm": 0.6737810373306274, + "learning_rate": 1.9525139569273128e-05, + "loss": 2.3204, + "step": 3411 + }, + { + "epoch": 0.11, + "grad_norm": 0.6791371703147888, + "learning_rate": 1.9524815869959535e-05, + "loss": 2.2571, + "step": 3412 + }, + { + "epoch": 0.11, + "grad_norm": 0.6639089584350586, + "learning_rate": 1.9524492063040153e-05, + "loss": 2.2866, + "step": 3413 + }, + { + "epoch": 0.11, + "grad_norm": 0.6872113943099976, + "learning_rate": 1.9524168148518643e-05, + "loss": 2.2505, + "step": 3414 + }, + { + "epoch": 0.11, + "grad_norm": 0.6669588088989258, + "learning_rate": 1.952384412639866e-05, + "loss": 2.312, + "step": 3415 + }, + { + "epoch": 0.11, + "grad_norm": 0.6643083691596985, + "learning_rate": 1.952351999668387e-05, + "loss": 2.3176, + "step": 3416 + }, + { + "epoch": 0.11, + "grad_norm": 0.6882132291793823, + "learning_rate": 1.9523195759377932e-05, + "loss": 2.2089, + "step": 3417 + }, + { + "epoch": 0.11, + "grad_norm": 0.6898518204689026, + "learning_rate": 1.952287141448451e-05, + "loss": 2.2675, + "step": 3418 + }, + { + "epoch": 0.11, + "grad_norm": 0.7204471230506897, + "learning_rate": 1.9522546962007266e-05, + "loss": 2.3435, + "step": 3419 + }, + { + "epoch": 0.11, + "grad_norm": 0.7124329209327698, + "learning_rate": 1.9522222401949867e-05, + "loss": 2.312, + "step": 3420 + }, + { + "epoch": 0.11, + "grad_norm": 0.6496767401695251, + "learning_rate": 1.952189773431598e-05, + "loss": 2.2707, + "step": 3421 + }, + { + "epoch": 0.11, + "grad_norm": 0.6923325657844543, + "learning_rate": 1.9521572959109277e-05, + "loss": 2.3195, + "step": 3422 + }, + { + "epoch": 0.11, + "grad_norm": 0.6800845265388489, + "learning_rate": 1.952124807633342e-05, + "loss": 2.2783, + "step": 3423 + }, + { + "epoch": 0.11, + "grad_norm": 0.7181073427200317, + "learning_rate": 1.9520923085992083e-05, + "loss": 2.2602, + "step": 3424 + }, + { + "epoch": 0.11, + "grad_norm": 0.6994168758392334, + "learning_rate": 1.9520597988088937e-05, + "loss": 2.2398, + "step": 3425 + }, + { + "epoch": 0.11, + "grad_norm": 0.6955270171165466, + "learning_rate": 1.9520272782627652e-05, + "loss": 2.2968, + "step": 3426 + }, + { + "epoch": 0.11, + "grad_norm": 0.6842566728591919, + "learning_rate": 1.9519947469611906e-05, + "loss": 2.333, + "step": 3427 + }, + { + "epoch": 0.11, + "grad_norm": 0.6695115566253662, + "learning_rate": 1.951962204904537e-05, + "loss": 2.3024, + "step": 3428 + }, + { + "epoch": 0.11, + "grad_norm": 0.6924493312835693, + "learning_rate": 1.9519296520931727e-05, + "loss": 2.3159, + "step": 3429 + }, + { + "epoch": 0.11, + "grad_norm": 0.6888512969017029, + "learning_rate": 1.9518970885274654e-05, + "loss": 2.3097, + "step": 3430 + }, + { + "epoch": 0.11, + "grad_norm": 0.699921727180481, + "learning_rate": 1.951864514207782e-05, + "loss": 2.2789, + "step": 3431 + }, + { + "epoch": 0.11, + "grad_norm": 0.6825907230377197, + "learning_rate": 1.9518319291344915e-05, + "loss": 2.2531, + "step": 3432 + }, + { + "epoch": 0.11, + "grad_norm": 0.6498616337776184, + "learning_rate": 1.9517993333079616e-05, + "loss": 2.2994, + "step": 3433 + }, + { + "epoch": 0.11, + "grad_norm": 0.6959145665168762, + "learning_rate": 1.9517667267285605e-05, + "loss": 2.3102, + "step": 3434 + }, + { + "epoch": 0.11, + "grad_norm": 0.6896790266036987, + "learning_rate": 1.9517341093966568e-05, + "loss": 2.3168, + "step": 3435 + }, + { + "epoch": 0.11, + "grad_norm": 0.6853213906288147, + "learning_rate": 1.951701481312619e-05, + "loss": 2.2811, + "step": 3436 + }, + { + "epoch": 0.11, + "grad_norm": 0.6857879161834717, + "learning_rate": 1.9516688424768154e-05, + "loss": 2.2829, + "step": 3437 + }, + { + "epoch": 0.11, + "grad_norm": 0.670282244682312, + "learning_rate": 1.9516361928896152e-05, + "loss": 2.311, + "step": 3438 + }, + { + "epoch": 0.11, + "grad_norm": 0.6659310460090637, + "learning_rate": 1.951603532551387e-05, + "loss": 2.2545, + "step": 3439 + }, + { + "epoch": 0.11, + "grad_norm": 0.6962447166442871, + "learning_rate": 1.9515708614624995e-05, + "loss": 2.2005, + "step": 3440 + }, + { + "epoch": 0.11, + "grad_norm": 0.6639426946640015, + "learning_rate": 1.9515381796233225e-05, + "loss": 2.2276, + "step": 3441 + }, + { + "epoch": 0.11, + "grad_norm": 0.6607621908187866, + "learning_rate": 1.9515054870342243e-05, + "loss": 2.3179, + "step": 3442 + }, + { + "epoch": 0.11, + "grad_norm": 0.6547995209693909, + "learning_rate": 1.951472783695575e-05, + "loss": 2.2112, + "step": 3443 + }, + { + "epoch": 0.11, + "grad_norm": 0.6812206506729126, + "learning_rate": 1.951440069607744e-05, + "loss": 2.2735, + "step": 3444 + }, + { + "epoch": 0.11, + "grad_norm": 0.6637999415397644, + "learning_rate": 1.9514073447711007e-05, + "loss": 2.2831, + "step": 3445 + }, + { + "epoch": 0.11, + "grad_norm": 0.67435622215271, + "learning_rate": 1.9513746091860145e-05, + "loss": 2.2981, + "step": 3446 + }, + { + "epoch": 0.11, + "grad_norm": 0.6635550856590271, + "learning_rate": 1.951341862852856e-05, + "loss": 2.2676, + "step": 3447 + }, + { + "epoch": 0.11, + "grad_norm": 0.6529548764228821, + "learning_rate": 1.951309105771994e-05, + "loss": 2.3021, + "step": 3448 + }, + { + "epoch": 0.11, + "grad_norm": 0.6742626428604126, + "learning_rate": 1.9512763379437997e-05, + "loss": 2.2425, + "step": 3449 + }, + { + "epoch": 0.11, + "grad_norm": 0.6873909831047058, + "learning_rate": 1.9512435593686425e-05, + "loss": 2.3165, + "step": 3450 + }, + { + "epoch": 0.11, + "grad_norm": 0.6663831472396851, + "learning_rate": 1.951210770046893e-05, + "loss": 2.3146, + "step": 3451 + }, + { + "epoch": 0.11, + "grad_norm": 0.6812635064125061, + "learning_rate": 1.9511779699789223e-05, + "loss": 2.286, + "step": 3452 + }, + { + "epoch": 0.11, + "grad_norm": 0.6595397591590881, + "learning_rate": 1.9511451591651e-05, + "loss": 2.236, + "step": 3453 + }, + { + "epoch": 0.11, + "grad_norm": 0.6729317903518677, + "learning_rate": 1.951112337605797e-05, + "loss": 2.2351, + "step": 3454 + }, + { + "epoch": 0.11, + "grad_norm": 0.6776654124259949, + "learning_rate": 1.9510795053013843e-05, + "loss": 2.2852, + "step": 3455 + }, + { + "epoch": 0.11, + "grad_norm": 0.6912583112716675, + "learning_rate": 1.9510466622522327e-05, + "loss": 2.2225, + "step": 3456 + }, + { + "epoch": 0.12, + "grad_norm": 0.6785014867782593, + "learning_rate": 1.9510138084587135e-05, + "loss": 2.2637, + "step": 3457 + }, + { + "epoch": 0.12, + "grad_norm": 0.7126516103744507, + "learning_rate": 1.9509809439211973e-05, + "loss": 2.2875, + "step": 3458 + }, + { + "epoch": 0.12, + "grad_norm": 0.6923148036003113, + "learning_rate": 1.950948068640056e-05, + "loss": 2.2884, + "step": 3459 + }, + { + "epoch": 0.12, + "grad_norm": 0.6734636425971985, + "learning_rate": 1.9509151826156606e-05, + "loss": 2.2916, + "step": 3460 + }, + { + "epoch": 0.12, + "grad_norm": 0.6748021841049194, + "learning_rate": 1.950882285848383e-05, + "loss": 2.2819, + "step": 3461 + }, + { + "epoch": 0.12, + "grad_norm": 0.6771445870399475, + "learning_rate": 1.9508493783385942e-05, + "loss": 2.2334, + "step": 3462 + }, + { + "epoch": 0.12, + "grad_norm": 0.6622663140296936, + "learning_rate": 1.9508164600866662e-05, + "loss": 2.2817, + "step": 3463 + }, + { + "epoch": 0.12, + "grad_norm": 0.7023286819458008, + "learning_rate": 1.950783531092972e-05, + "loss": 2.3262, + "step": 3464 + }, + { + "epoch": 0.12, + "grad_norm": 0.6604294776916504, + "learning_rate": 1.950750591357882e-05, + "loss": 2.2929, + "step": 3465 + }, + { + "epoch": 0.12, + "grad_norm": 0.7139497399330139, + "learning_rate": 1.950717640881769e-05, + "loss": 2.2682, + "step": 3466 + }, + { + "epoch": 0.12, + "grad_norm": 0.6750184893608093, + "learning_rate": 1.9506846796650056e-05, + "loss": 2.3418, + "step": 3467 + }, + { + "epoch": 0.12, + "grad_norm": 0.6928868889808655, + "learning_rate": 1.9506517077079632e-05, + "loss": 2.2963, + "step": 3468 + }, + { + "epoch": 0.12, + "grad_norm": 0.6937859058380127, + "learning_rate": 1.9506187250110155e-05, + "loss": 2.3062, + "step": 3469 + }, + { + "epoch": 0.12, + "grad_norm": 0.6828649044036865, + "learning_rate": 1.9505857315745346e-05, + "loss": 2.2334, + "step": 3470 + }, + { + "epoch": 0.12, + "grad_norm": 0.6752848029136658, + "learning_rate": 1.950552727398893e-05, + "loss": 2.3374, + "step": 3471 + }, + { + "epoch": 0.12, + "grad_norm": 0.6865189075469971, + "learning_rate": 1.950519712484464e-05, + "loss": 2.3397, + "step": 3472 + }, + { + "epoch": 0.12, + "grad_norm": 0.6818467378616333, + "learning_rate": 1.95048668683162e-05, + "loss": 2.247, + "step": 3473 + }, + { + "epoch": 0.12, + "grad_norm": 0.6738606691360474, + "learning_rate": 1.9504536504407345e-05, + "loss": 2.2814, + "step": 3474 + }, + { + "epoch": 0.12, + "grad_norm": 0.6744799017906189, + "learning_rate": 1.950420603312181e-05, + "loss": 2.2357, + "step": 3475 + }, + { + "epoch": 0.12, + "grad_norm": 0.6895490884780884, + "learning_rate": 1.950387545446332e-05, + "loss": 2.3056, + "step": 3476 + }, + { + "epoch": 0.12, + "grad_norm": 0.6931930184364319, + "learning_rate": 1.950354476843562e-05, + "loss": 2.2138, + "step": 3477 + }, + { + "epoch": 0.12, + "grad_norm": 0.6776548624038696, + "learning_rate": 1.950321397504244e-05, + "loss": 2.2876, + "step": 3478 + }, + { + "epoch": 0.12, + "grad_norm": 0.6878694891929626, + "learning_rate": 1.9502883074287516e-05, + "loss": 2.333, + "step": 3479 + }, + { + "epoch": 0.12, + "grad_norm": 0.6741803288459778, + "learning_rate": 1.950255206617459e-05, + "loss": 2.2588, + "step": 3480 + }, + { + "epoch": 0.12, + "grad_norm": 0.6665170788764954, + "learning_rate": 1.9502220950707397e-05, + "loss": 2.2819, + "step": 3481 + }, + { + "epoch": 0.12, + "grad_norm": 0.6764044761657715, + "learning_rate": 1.9501889727889686e-05, + "loss": 2.3122, + "step": 3482 + }, + { + "epoch": 0.12, + "grad_norm": 0.6899176836013794, + "learning_rate": 1.9501558397725186e-05, + "loss": 2.3786, + "step": 3483 + }, + { + "epoch": 0.12, + "grad_norm": 0.6570963859558105, + "learning_rate": 1.950122696021765e-05, + "loss": 2.2498, + "step": 3484 + }, + { + "epoch": 0.12, + "grad_norm": 0.6669249534606934, + "learning_rate": 1.9500895415370823e-05, + "loss": 2.3338, + "step": 3485 + }, + { + "epoch": 0.12, + "grad_norm": 0.6689350605010986, + "learning_rate": 1.9500563763188445e-05, + "loss": 2.3312, + "step": 3486 + }, + { + "epoch": 0.12, + "grad_norm": 0.6737769246101379, + "learning_rate": 1.950023200367427e-05, + "loss": 2.2852, + "step": 3487 + }, + { + "epoch": 0.12, + "grad_norm": 0.7012941241264343, + "learning_rate": 1.9499900136832036e-05, + "loss": 2.2931, + "step": 3488 + }, + { + "epoch": 0.12, + "grad_norm": 0.6590933799743652, + "learning_rate": 1.9499568162665503e-05, + "loss": 2.2476, + "step": 3489 + }, + { + "epoch": 0.12, + "grad_norm": 0.6714367270469666, + "learning_rate": 1.949923608117841e-05, + "loss": 2.2868, + "step": 3490 + }, + { + "epoch": 0.12, + "grad_norm": 0.6752423048019409, + "learning_rate": 1.949890389237452e-05, + "loss": 2.2997, + "step": 3491 + }, + { + "epoch": 0.12, + "grad_norm": 0.7171645164489746, + "learning_rate": 1.949857159625758e-05, + "loss": 2.325, + "step": 3492 + }, + { + "epoch": 0.12, + "grad_norm": 0.6709889769554138, + "learning_rate": 1.9498239192831342e-05, + "loss": 2.213, + "step": 3493 + }, + { + "epoch": 0.12, + "grad_norm": 0.6939496994018555, + "learning_rate": 1.9497906682099564e-05, + "loss": 2.3505, + "step": 3494 + }, + { + "epoch": 0.12, + "grad_norm": 0.6679947376251221, + "learning_rate": 1.9497574064066008e-05, + "loss": 2.3483, + "step": 3495 + }, + { + "epoch": 0.12, + "grad_norm": 0.685558021068573, + "learning_rate": 1.9497241338734424e-05, + "loss": 2.3125, + "step": 3496 + }, + { + "epoch": 0.12, + "grad_norm": 0.6740711331367493, + "learning_rate": 1.949690850610857e-05, + "loss": 2.3581, + "step": 3497 + }, + { + "epoch": 0.12, + "grad_norm": 0.6664581894874573, + "learning_rate": 1.949657556619221e-05, + "loss": 2.2713, + "step": 3498 + }, + { + "epoch": 0.12, + "grad_norm": 0.6670740842819214, + "learning_rate": 1.9496242518989108e-05, + "loss": 2.2952, + "step": 3499 + }, + { + "epoch": 0.12, + "grad_norm": 0.6904124021530151, + "learning_rate": 1.949590936450302e-05, + "loss": 2.288, + "step": 3500 + }, + { + "epoch": 0.12, + "grad_norm": 0.7095031142234802, + "learning_rate": 1.9495576102737715e-05, + "loss": 2.2638, + "step": 3501 + }, + { + "epoch": 0.12, + "grad_norm": 0.6857951283454895, + "learning_rate": 1.9495242733696958e-05, + "loss": 2.3004, + "step": 3502 + }, + { + "epoch": 0.12, + "grad_norm": 0.7024117112159729, + "learning_rate": 1.949490925738451e-05, + "loss": 2.2699, + "step": 3503 + }, + { + "epoch": 0.12, + "grad_norm": 0.6955640316009521, + "learning_rate": 1.9494575673804145e-05, + "loss": 2.3566, + "step": 3504 + }, + { + "epoch": 0.12, + "grad_norm": 0.671813428401947, + "learning_rate": 1.9494241982959624e-05, + "loss": 2.2246, + "step": 3505 + }, + { + "epoch": 0.12, + "grad_norm": 0.6745615601539612, + "learning_rate": 1.9493908184854727e-05, + "loss": 2.3297, + "step": 3506 + }, + { + "epoch": 0.12, + "grad_norm": 0.7011864185333252, + "learning_rate": 1.9493574279493213e-05, + "loss": 2.2687, + "step": 3507 + }, + { + "epoch": 0.12, + "grad_norm": 0.6664074659347534, + "learning_rate": 1.9493240266878866e-05, + "loss": 2.3133, + "step": 3508 + }, + { + "epoch": 0.12, + "grad_norm": 0.6848646998405457, + "learning_rate": 1.949290614701545e-05, + "loss": 2.2742, + "step": 3509 + }, + { + "epoch": 0.12, + "grad_norm": 0.7012177109718323, + "learning_rate": 1.9492571919906747e-05, + "loss": 2.2487, + "step": 3510 + }, + { + "epoch": 0.12, + "grad_norm": 0.6619529724121094, + "learning_rate": 1.9492237585556527e-05, + "loss": 2.3242, + "step": 3511 + }, + { + "epoch": 0.12, + "grad_norm": 0.6916337609291077, + "learning_rate": 1.949190314396857e-05, + "loss": 2.2456, + "step": 3512 + }, + { + "epoch": 0.12, + "grad_norm": 0.674776554107666, + "learning_rate": 1.9491568595146657e-05, + "loss": 2.2046, + "step": 3513 + }, + { + "epoch": 0.12, + "grad_norm": 0.6817277669906616, + "learning_rate": 1.949123393909456e-05, + "loss": 2.3114, + "step": 3514 + }, + { + "epoch": 0.12, + "grad_norm": 0.6720808148384094, + "learning_rate": 1.9490899175816068e-05, + "loss": 2.3064, + "step": 3515 + }, + { + "epoch": 0.12, + "grad_norm": 0.6769091486930847, + "learning_rate": 1.9490564305314958e-05, + "loss": 2.3276, + "step": 3516 + }, + { + "epoch": 0.12, + "grad_norm": 0.686509907245636, + "learning_rate": 1.9490229327595015e-05, + "loss": 2.2504, + "step": 3517 + }, + { + "epoch": 0.12, + "grad_norm": 0.696013331413269, + "learning_rate": 1.9489894242660023e-05, + "loss": 2.245, + "step": 3518 + }, + { + "epoch": 0.12, + "grad_norm": 0.6892712712287903, + "learning_rate": 1.9489559050513767e-05, + "loss": 2.2722, + "step": 3519 + }, + { + "epoch": 0.12, + "grad_norm": 0.6955429315567017, + "learning_rate": 1.9489223751160035e-05, + "loss": 2.2573, + "step": 3520 + }, + { + "epoch": 0.12, + "grad_norm": 0.6766269207000732, + "learning_rate": 1.948888834460261e-05, + "loss": 2.349, + "step": 3521 + }, + { + "epoch": 0.12, + "grad_norm": 0.7339362502098083, + "learning_rate": 1.9488552830845294e-05, + "loss": 2.2768, + "step": 3522 + }, + { + "epoch": 0.12, + "grad_norm": 0.7005716562271118, + "learning_rate": 1.948821720989186e-05, + "loss": 2.3168, + "step": 3523 + }, + { + "epoch": 0.12, + "grad_norm": 0.6667356491088867, + "learning_rate": 1.9487881481746114e-05, + "loss": 2.3052, + "step": 3524 + }, + { + "epoch": 0.12, + "grad_norm": 0.7022951245307922, + "learning_rate": 1.9487545646411844e-05, + "loss": 2.3252, + "step": 3525 + }, + { + "epoch": 0.12, + "grad_norm": 0.71285480260849, + "learning_rate": 1.948720970389284e-05, + "loss": 2.2532, + "step": 3526 + }, + { + "epoch": 0.12, + "grad_norm": 0.7096241116523743, + "learning_rate": 1.94868736541929e-05, + "loss": 2.297, + "step": 3527 + }, + { + "epoch": 0.12, + "grad_norm": 0.7043609023094177, + "learning_rate": 1.9486537497315824e-05, + "loss": 2.3178, + "step": 3528 + }, + { + "epoch": 0.12, + "grad_norm": 0.6503707766532898, + "learning_rate": 1.948620123326541e-05, + "loss": 2.2669, + "step": 3529 + }, + { + "epoch": 0.12, + "grad_norm": 0.6767188310623169, + "learning_rate": 1.9485864862045448e-05, + "loss": 2.2865, + "step": 3530 + }, + { + "epoch": 0.12, + "grad_norm": 0.6910713911056519, + "learning_rate": 1.948552838365975e-05, + "loss": 2.2707, + "step": 3531 + }, + { + "epoch": 0.12, + "grad_norm": 0.685518741607666, + "learning_rate": 1.9485191798112105e-05, + "loss": 2.2775, + "step": 3532 + }, + { + "epoch": 0.12, + "grad_norm": 0.7035486102104187, + "learning_rate": 1.948485510540633e-05, + "loss": 2.3289, + "step": 3533 + }, + { + "epoch": 0.12, + "grad_norm": 0.6760866045951843, + "learning_rate": 1.9484518305546213e-05, + "loss": 2.2802, + "step": 3534 + }, + { + "epoch": 0.12, + "grad_norm": 0.6866413354873657, + "learning_rate": 1.9484181398535568e-05, + "loss": 2.3452, + "step": 3535 + }, + { + "epoch": 0.12, + "grad_norm": 0.6818528771400452, + "learning_rate": 1.9483844384378203e-05, + "loss": 2.2618, + "step": 3536 + }, + { + "epoch": 0.12, + "grad_norm": 0.6678909659385681, + "learning_rate": 1.948350726307792e-05, + "loss": 2.2818, + "step": 3537 + }, + { + "epoch": 0.12, + "grad_norm": 0.6868831515312195, + "learning_rate": 1.9483170034638533e-05, + "loss": 2.2854, + "step": 3538 + }, + { + "epoch": 0.12, + "grad_norm": 0.6847174167633057, + "learning_rate": 1.9482832699063844e-05, + "loss": 2.1954, + "step": 3539 + }, + { + "epoch": 0.12, + "grad_norm": 0.6939400434494019, + "learning_rate": 1.948249525635767e-05, + "loss": 2.2947, + "step": 3540 + }, + { + "epoch": 0.12, + "grad_norm": 0.7136675119400024, + "learning_rate": 1.9482157706523822e-05, + "loss": 2.2846, + "step": 3541 + }, + { + "epoch": 0.12, + "grad_norm": 0.6572127938270569, + "learning_rate": 1.9481820049566113e-05, + "loss": 2.2469, + "step": 3542 + }, + { + "epoch": 0.12, + "grad_norm": 0.6830535531044006, + "learning_rate": 1.948148228548836e-05, + "loss": 2.2468, + "step": 3543 + }, + { + "epoch": 0.12, + "grad_norm": 0.6657615900039673, + "learning_rate": 1.9481144414294375e-05, + "loss": 2.2803, + "step": 3544 + }, + { + "epoch": 0.12, + "grad_norm": 0.7039142847061157, + "learning_rate": 1.948080643598798e-05, + "loss": 2.355, + "step": 3545 + }, + { + "epoch": 0.12, + "grad_norm": 0.7190167903900146, + "learning_rate": 1.9480468350572988e-05, + "loss": 2.2113, + "step": 3546 + }, + { + "epoch": 0.12, + "grad_norm": 0.6970165371894836, + "learning_rate": 1.948013015805322e-05, + "loss": 2.2803, + "step": 3547 + }, + { + "epoch": 0.12, + "grad_norm": 0.6995370388031006, + "learning_rate": 1.9479791858432494e-05, + "loss": 2.2955, + "step": 3548 + }, + { + "epoch": 0.12, + "grad_norm": 0.7019973397254944, + "learning_rate": 1.947945345171464e-05, + "loss": 2.2102, + "step": 3549 + }, + { + "epoch": 0.12, + "grad_norm": 0.6501315832138062, + "learning_rate": 1.9479114937903478e-05, + "loss": 2.2773, + "step": 3550 + }, + { + "epoch": 0.12, + "grad_norm": 0.6660248637199402, + "learning_rate": 1.9478776317002824e-05, + "loss": 2.2877, + "step": 3551 + }, + { + "epoch": 0.12, + "grad_norm": 0.6723596453666687, + "learning_rate": 1.9478437589016518e-05, + "loss": 2.2659, + "step": 3552 + }, + { + "epoch": 0.12, + "grad_norm": 0.6939621567726135, + "learning_rate": 1.9478098753948377e-05, + "loss": 2.2179, + "step": 3553 + }, + { + "epoch": 0.12, + "grad_norm": 0.6738900542259216, + "learning_rate": 1.9477759811802228e-05, + "loss": 2.2073, + "step": 3554 + }, + { + "epoch": 0.12, + "grad_norm": 0.7160170674324036, + "learning_rate": 1.9477420762581905e-05, + "loss": 2.2912, + "step": 3555 + }, + { + "epoch": 0.12, + "grad_norm": 0.6936720609664917, + "learning_rate": 1.9477081606291233e-05, + "loss": 2.2782, + "step": 3556 + }, + { + "epoch": 0.12, + "grad_norm": 0.7159092426300049, + "learning_rate": 1.9476742342934053e-05, + "loss": 2.3297, + "step": 3557 + }, + { + "epoch": 0.12, + "grad_norm": 0.669593870639801, + "learning_rate": 1.947640297251419e-05, + "loss": 2.2583, + "step": 3558 + }, + { + "epoch": 0.12, + "grad_norm": 0.6970084309577942, + "learning_rate": 1.947606349503548e-05, + "loss": 2.2254, + "step": 3559 + }, + { + "epoch": 0.12, + "grad_norm": 0.6627599596977234, + "learning_rate": 1.9475723910501756e-05, + "loss": 2.2442, + "step": 3560 + }, + { + "epoch": 0.12, + "grad_norm": 0.6634886264801025, + "learning_rate": 1.9475384218916857e-05, + "loss": 2.3363, + "step": 3561 + }, + { + "epoch": 0.12, + "grad_norm": 0.6702818870544434, + "learning_rate": 1.9475044420284622e-05, + "loss": 2.2577, + "step": 3562 + }, + { + "epoch": 0.12, + "grad_norm": 0.7019290328025818, + "learning_rate": 1.9474704514608886e-05, + "loss": 2.2377, + "step": 3563 + }, + { + "epoch": 0.12, + "grad_norm": 0.7041156888008118, + "learning_rate": 1.9474364501893492e-05, + "loss": 2.2244, + "step": 3564 + }, + { + "epoch": 0.12, + "grad_norm": 0.6737179160118103, + "learning_rate": 1.947402438214228e-05, + "loss": 2.3119, + "step": 3565 + }, + { + "epoch": 0.12, + "grad_norm": 0.6735750436782837, + "learning_rate": 1.9473684155359093e-05, + "loss": 2.2836, + "step": 3566 + }, + { + "epoch": 0.12, + "grad_norm": 0.7283354997634888, + "learning_rate": 1.947334382154778e-05, + "loss": 2.3148, + "step": 3567 + }, + { + "epoch": 0.12, + "grad_norm": 0.7039659023284912, + "learning_rate": 1.947300338071217e-05, + "loss": 2.252, + "step": 3568 + }, + { + "epoch": 0.12, + "grad_norm": 0.6613754034042358, + "learning_rate": 1.947266283285613e-05, + "loss": 2.2106, + "step": 3569 + }, + { + "epoch": 0.12, + "grad_norm": 0.6886136531829834, + "learning_rate": 1.947232217798349e-05, + "loss": 2.3261, + "step": 3570 + }, + { + "epoch": 0.12, + "grad_norm": 0.6867454051971436, + "learning_rate": 1.9471981416098105e-05, + "loss": 2.2806, + "step": 3571 + }, + { + "epoch": 0.12, + "grad_norm": 0.6932241320610046, + "learning_rate": 1.947164054720383e-05, + "loss": 2.3267, + "step": 3572 + }, + { + "epoch": 0.12, + "grad_norm": 0.685605525970459, + "learning_rate": 1.947129957130451e-05, + "loss": 2.3084, + "step": 3573 + }, + { + "epoch": 0.12, + "grad_norm": 0.668707013130188, + "learning_rate": 1.9470958488403994e-05, + "loss": 2.3182, + "step": 3574 + }, + { + "epoch": 0.12, + "grad_norm": 0.6734517812728882, + "learning_rate": 1.9470617298506143e-05, + "loss": 2.295, + "step": 3575 + }, + { + "epoch": 0.12, + "grad_norm": 0.6585891842842102, + "learning_rate": 1.9470276001614804e-05, + "loss": 2.283, + "step": 3576 + }, + { + "epoch": 0.12, + "grad_norm": 0.6893803477287292, + "learning_rate": 1.9469934597733845e-05, + "loss": 2.2767, + "step": 3577 + }, + { + "epoch": 0.12, + "grad_norm": 0.713064432144165, + "learning_rate": 1.9469593086867106e-05, + "loss": 2.3564, + "step": 3578 + }, + { + "epoch": 0.12, + "grad_norm": 0.7094035744667053, + "learning_rate": 1.946925146901846e-05, + "loss": 2.284, + "step": 3579 + }, + { + "epoch": 0.12, + "grad_norm": 0.6580082178115845, + "learning_rate": 1.9468909744191757e-05, + "loss": 2.2991, + "step": 3580 + }, + { + "epoch": 0.12, + "grad_norm": 0.6811079978942871, + "learning_rate": 1.9468567912390865e-05, + "loss": 2.2529, + "step": 3581 + }, + { + "epoch": 0.12, + "grad_norm": 0.6906937956809998, + "learning_rate": 1.9468225973619635e-05, + "loss": 2.3296, + "step": 3582 + }, + { + "epoch": 0.12, + "grad_norm": 0.690157949924469, + "learning_rate": 1.9467883927881944e-05, + "loss": 2.2139, + "step": 3583 + }, + { + "epoch": 0.12, + "grad_norm": 0.7017017602920532, + "learning_rate": 1.9467541775181648e-05, + "loss": 2.2057, + "step": 3584 + }, + { + "epoch": 0.12, + "grad_norm": 0.6760733127593994, + "learning_rate": 1.946719951552261e-05, + "loss": 2.2971, + "step": 3585 + }, + { + "epoch": 0.12, + "grad_norm": 0.6878550052642822, + "learning_rate": 1.94668571489087e-05, + "loss": 2.3081, + "step": 3586 + }, + { + "epoch": 0.12, + "grad_norm": 0.6624292135238647, + "learning_rate": 1.946651467534379e-05, + "loss": 2.298, + "step": 3587 + }, + { + "epoch": 0.12, + "grad_norm": 0.7125348448753357, + "learning_rate": 1.9466172094831742e-05, + "loss": 2.3015, + "step": 3588 + }, + { + "epoch": 0.12, + "grad_norm": 0.689313530921936, + "learning_rate": 1.9465829407376432e-05, + "loss": 2.31, + "step": 3589 + }, + { + "epoch": 0.12, + "grad_norm": 0.7020419239997864, + "learning_rate": 1.9465486612981725e-05, + "loss": 2.3112, + "step": 3590 + }, + { + "epoch": 0.12, + "grad_norm": 0.7126964926719666, + "learning_rate": 1.94651437116515e-05, + "loss": 2.2397, + "step": 3591 + }, + { + "epoch": 0.12, + "grad_norm": 0.6662706136703491, + "learning_rate": 1.946480070338963e-05, + "loss": 2.2541, + "step": 3592 + }, + { + "epoch": 0.12, + "grad_norm": 0.6958052515983582, + "learning_rate": 1.946445758819999e-05, + "loss": 2.3148, + "step": 3593 + }, + { + "epoch": 0.12, + "grad_norm": 0.6823972463607788, + "learning_rate": 1.9464114366086448e-05, + "loss": 2.3385, + "step": 3594 + }, + { + "epoch": 0.12, + "grad_norm": 0.6693614721298218, + "learning_rate": 1.9463771037052893e-05, + "loss": 2.2784, + "step": 3595 + }, + { + "epoch": 0.12, + "grad_norm": 0.6864728331565857, + "learning_rate": 1.9463427601103197e-05, + "loss": 2.2771, + "step": 3596 + }, + { + "epoch": 0.12, + "grad_norm": 0.7275521159172058, + "learning_rate": 1.9463084058241243e-05, + "loss": 2.3019, + "step": 3597 + }, + { + "epoch": 0.12, + "grad_norm": 0.6874843239784241, + "learning_rate": 1.9462740408470914e-05, + "loss": 2.2823, + "step": 3598 + }, + { + "epoch": 0.12, + "grad_norm": 0.6993808746337891, + "learning_rate": 1.9462396651796086e-05, + "loss": 2.2229, + "step": 3599 + }, + { + "epoch": 0.12, + "grad_norm": 0.6955780982971191, + "learning_rate": 1.9462052788220648e-05, + "loss": 2.2508, + "step": 3600 + }, + { + "epoch": 0.12, + "grad_norm": 0.6660264730453491, + "learning_rate": 1.9461708817748483e-05, + "loss": 2.2563, + "step": 3601 + }, + { + "epoch": 0.12, + "grad_norm": 0.7100155353546143, + "learning_rate": 1.9461364740383474e-05, + "loss": 2.2477, + "step": 3602 + }, + { + "epoch": 0.12, + "grad_norm": 0.6789041757583618, + "learning_rate": 1.9461020556129514e-05, + "loss": 2.3007, + "step": 3603 + }, + { + "epoch": 0.12, + "grad_norm": 0.706390380859375, + "learning_rate": 1.946067626499049e-05, + "loss": 2.3153, + "step": 3604 + }, + { + "epoch": 0.12, + "grad_norm": 0.6905703544616699, + "learning_rate": 1.9460331866970286e-05, + "loss": 2.2993, + "step": 3605 + }, + { + "epoch": 0.12, + "grad_norm": 0.7201916575431824, + "learning_rate": 1.94599873620728e-05, + "loss": 2.2688, + "step": 3606 + }, + { + "epoch": 0.12, + "grad_norm": 0.7137526273727417, + "learning_rate": 1.9459642750301918e-05, + "loss": 2.2158, + "step": 3607 + }, + { + "epoch": 0.12, + "grad_norm": 0.7265249490737915, + "learning_rate": 1.945929803166154e-05, + "loss": 2.3062, + "step": 3608 + }, + { + "epoch": 0.12, + "grad_norm": 0.6788699626922607, + "learning_rate": 1.9458953206155554e-05, + "loss": 2.2542, + "step": 3609 + }, + { + "epoch": 0.12, + "grad_norm": 0.7030156850814819, + "learning_rate": 1.9458608273787854e-05, + "loss": 2.2353, + "step": 3610 + }, + { + "epoch": 0.12, + "grad_norm": 0.6762740015983582, + "learning_rate": 1.9458263234562348e-05, + "loss": 2.3069, + "step": 3611 + }, + { + "epoch": 0.12, + "grad_norm": 0.7022709250450134, + "learning_rate": 1.9457918088482923e-05, + "loss": 2.3291, + "step": 3612 + }, + { + "epoch": 0.12, + "grad_norm": 0.6675629019737244, + "learning_rate": 1.9457572835553484e-05, + "loss": 2.2963, + "step": 3613 + }, + { + "epoch": 0.12, + "grad_norm": 0.6842197179794312, + "learning_rate": 1.945722747577793e-05, + "loss": 2.258, + "step": 3614 + }, + { + "epoch": 0.12, + "grad_norm": 0.6655317544937134, + "learning_rate": 1.945688200916016e-05, + "loss": 2.2494, + "step": 3615 + }, + { + "epoch": 0.12, + "grad_norm": 0.6737083792686462, + "learning_rate": 1.9456536435704083e-05, + "loss": 2.3036, + "step": 3616 + }, + { + "epoch": 0.12, + "grad_norm": 0.6749952435493469, + "learning_rate": 1.94561907554136e-05, + "loss": 2.2809, + "step": 3617 + }, + { + "epoch": 0.12, + "grad_norm": 0.7076355814933777, + "learning_rate": 1.9455844968292613e-05, + "loss": 2.2699, + "step": 3618 + }, + { + "epoch": 0.12, + "grad_norm": 0.7070693969726562, + "learning_rate": 1.945549907434503e-05, + "loss": 2.312, + "step": 3619 + }, + { + "epoch": 0.12, + "grad_norm": 0.7362203598022461, + "learning_rate": 1.945515307357476e-05, + "loss": 2.2756, + "step": 3620 + }, + { + "epoch": 0.12, + "grad_norm": 0.6687037944793701, + "learning_rate": 1.9454806965985716e-05, + "loss": 2.2226, + "step": 3621 + }, + { + "epoch": 0.12, + "grad_norm": 0.6770171523094177, + "learning_rate": 1.94544607515818e-05, + "loss": 2.2967, + "step": 3622 + }, + { + "epoch": 0.12, + "grad_norm": 0.6660905480384827, + "learning_rate": 1.945411443036693e-05, + "loss": 2.303, + "step": 3623 + }, + { + "epoch": 0.12, + "grad_norm": 0.6492541432380676, + "learning_rate": 1.9453768002345013e-05, + "loss": 2.2959, + "step": 3624 + }, + { + "epoch": 0.12, + "grad_norm": 0.7097062468528748, + "learning_rate": 1.9453421467519967e-05, + "loss": 2.3442, + "step": 3625 + }, + { + "epoch": 0.12, + "grad_norm": 0.6908991932868958, + "learning_rate": 1.945307482589571e-05, + "loss": 2.3162, + "step": 3626 + }, + { + "epoch": 0.12, + "grad_norm": 0.7540254592895508, + "learning_rate": 1.9452728077476146e-05, + "loss": 2.2021, + "step": 3627 + }, + { + "epoch": 0.12, + "grad_norm": 0.6622290015220642, + "learning_rate": 1.9452381222265204e-05, + "loss": 2.2724, + "step": 3628 + }, + { + "epoch": 0.12, + "grad_norm": 0.6925186514854431, + "learning_rate": 1.9452034260266796e-05, + "loss": 2.2525, + "step": 3629 + }, + { + "epoch": 0.12, + "grad_norm": 0.6948437094688416, + "learning_rate": 1.945168719148485e-05, + "loss": 2.3201, + "step": 3630 + }, + { + "epoch": 0.12, + "grad_norm": 0.6924076080322266, + "learning_rate": 1.9451340015923275e-05, + "loss": 2.2381, + "step": 3631 + }, + { + "epoch": 0.12, + "grad_norm": 0.7558528780937195, + "learning_rate": 1.9450992733586e-05, + "loss": 2.2655, + "step": 3632 + }, + { + "epoch": 0.12, + "grad_norm": 0.6910993456840515, + "learning_rate": 1.945064534447695e-05, + "loss": 2.3398, + "step": 3633 + }, + { + "epoch": 0.12, + "grad_norm": 0.7183420658111572, + "learning_rate": 1.945029784860005e-05, + "loss": 2.3202, + "step": 3634 + }, + { + "epoch": 0.12, + "grad_norm": 0.6935462951660156, + "learning_rate": 1.944995024595922e-05, + "loss": 2.2775, + "step": 3635 + }, + { + "epoch": 0.12, + "grad_norm": 0.6807013750076294, + "learning_rate": 1.944960253655839e-05, + "loss": 2.236, + "step": 3636 + }, + { + "epoch": 0.12, + "grad_norm": 0.7202365398406982, + "learning_rate": 1.9449254720401492e-05, + "loss": 2.256, + "step": 3637 + }, + { + "epoch": 0.12, + "grad_norm": 0.7282215356826782, + "learning_rate": 1.944890679749245e-05, + "loss": 2.1769, + "step": 3638 + }, + { + "epoch": 0.12, + "grad_norm": 0.6830108165740967, + "learning_rate": 1.9448558767835194e-05, + "loss": 2.283, + "step": 3639 + }, + { + "epoch": 0.12, + "grad_norm": 0.7019248008728027, + "learning_rate": 1.9448210631433662e-05, + "loss": 2.2922, + "step": 3640 + }, + { + "epoch": 0.12, + "grad_norm": 0.7160152792930603, + "learning_rate": 1.9447862388291782e-05, + "loss": 2.2644, + "step": 3641 + }, + { + "epoch": 0.12, + "grad_norm": 0.681394100189209, + "learning_rate": 1.944751403841349e-05, + "loss": 2.3206, + "step": 3642 + }, + { + "epoch": 0.12, + "grad_norm": 0.6696096658706665, + "learning_rate": 1.944716558180272e-05, + "loss": 2.2646, + "step": 3643 + }, + { + "epoch": 0.12, + "grad_norm": 0.6756918430328369, + "learning_rate": 1.9446817018463412e-05, + "loss": 2.2412, + "step": 3644 + }, + { + "epoch": 0.12, + "grad_norm": 0.6465837359428406, + "learning_rate": 1.94464683483995e-05, + "loss": 2.3048, + "step": 3645 + }, + { + "epoch": 0.12, + "grad_norm": 0.6895812749862671, + "learning_rate": 1.9446119571614925e-05, + "loss": 2.297, + "step": 3646 + }, + { + "epoch": 0.12, + "grad_norm": 0.6861203908920288, + "learning_rate": 1.944577068811363e-05, + "loss": 2.3233, + "step": 3647 + }, + { + "epoch": 0.12, + "grad_norm": 0.6962707042694092, + "learning_rate": 1.944542169789955e-05, + "loss": 2.2271, + "step": 3648 + }, + { + "epoch": 0.12, + "grad_norm": 0.660060465335846, + "learning_rate": 1.9445072600976633e-05, + "loss": 2.2757, + "step": 3649 + }, + { + "epoch": 0.12, + "grad_norm": 0.692184567451477, + "learning_rate": 1.944472339734882e-05, + "loss": 2.2959, + "step": 3650 + }, + { + "epoch": 0.12, + "grad_norm": 0.7133738398551941, + "learning_rate": 1.9444374087020057e-05, + "loss": 2.3268, + "step": 3651 + }, + { + "epoch": 0.12, + "grad_norm": 0.6811637282371521, + "learning_rate": 1.9444024669994294e-05, + "loss": 2.2814, + "step": 3652 + }, + { + "epoch": 0.12, + "grad_norm": 0.7280363440513611, + "learning_rate": 1.9443675146275468e-05, + "loss": 2.2434, + "step": 3653 + }, + { + "epoch": 0.12, + "grad_norm": 0.7025060057640076, + "learning_rate": 1.944332551586754e-05, + "loss": 2.3152, + "step": 3654 + }, + { + "epoch": 0.12, + "grad_norm": 0.6796720027923584, + "learning_rate": 1.9442975778774453e-05, + "loss": 2.3378, + "step": 3655 + }, + { + "epoch": 0.12, + "grad_norm": 0.6745999455451965, + "learning_rate": 1.9442625935000162e-05, + "loss": 2.331, + "step": 3656 + }, + { + "epoch": 0.12, + "grad_norm": 0.6987726092338562, + "learning_rate": 1.9442275984548614e-05, + "loss": 2.3179, + "step": 3657 + }, + { + "epoch": 0.12, + "grad_norm": 0.6735572218894958, + "learning_rate": 1.944192592742377e-05, + "loss": 2.2812, + "step": 3658 + }, + { + "epoch": 0.12, + "grad_norm": 0.671812891960144, + "learning_rate": 1.9441575763629576e-05, + "loss": 2.2404, + "step": 3659 + }, + { + "epoch": 0.12, + "grad_norm": 0.6835404634475708, + "learning_rate": 1.9441225493169993e-05, + "loss": 2.2787, + "step": 3660 + }, + { + "epoch": 0.12, + "grad_norm": 0.6952942609786987, + "learning_rate": 1.944087511604898e-05, + "loss": 2.335, + "step": 3661 + }, + { + "epoch": 0.12, + "grad_norm": 0.6576671004295349, + "learning_rate": 1.944052463227049e-05, + "loss": 2.2742, + "step": 3662 + }, + { + "epoch": 0.12, + "grad_norm": 0.6631172895431519, + "learning_rate": 1.9440174041838484e-05, + "loss": 2.2914, + "step": 3663 + }, + { + "epoch": 0.12, + "grad_norm": 0.6596168279647827, + "learning_rate": 1.9439823344756927e-05, + "loss": 2.2755, + "step": 3664 + }, + { + "epoch": 0.12, + "grad_norm": 0.6673133373260498, + "learning_rate": 1.943947254102978e-05, + "loss": 2.3057, + "step": 3665 + }, + { + "epoch": 0.12, + "grad_norm": 0.6609194874763489, + "learning_rate": 1.9439121630661e-05, + "loss": 2.252, + "step": 3666 + }, + { + "epoch": 0.12, + "grad_norm": 0.6546292304992676, + "learning_rate": 1.943877061365456e-05, + "loss": 2.2646, + "step": 3667 + }, + { + "epoch": 0.12, + "grad_norm": 0.7106362581253052, + "learning_rate": 1.9438419490014417e-05, + "loss": 2.2382, + "step": 3668 + }, + { + "epoch": 0.12, + "grad_norm": 0.7512779831886292, + "learning_rate": 1.9438068259744546e-05, + "loss": 2.2603, + "step": 3669 + }, + { + "epoch": 0.12, + "grad_norm": 0.6715754270553589, + "learning_rate": 1.9437716922848907e-05, + "loss": 2.2388, + "step": 3670 + }, + { + "epoch": 0.12, + "grad_norm": 0.6882107853889465, + "learning_rate": 1.9437365479331475e-05, + "loss": 2.309, + "step": 3671 + }, + { + "epoch": 0.12, + "grad_norm": 0.6753911375999451, + "learning_rate": 1.943701392919622e-05, + "loss": 2.2215, + "step": 3672 + }, + { + "epoch": 0.12, + "grad_norm": 0.7017413973808289, + "learning_rate": 1.943666227244711e-05, + "loss": 2.331, + "step": 3673 + }, + { + "epoch": 0.12, + "grad_norm": 0.6687679290771484, + "learning_rate": 1.9436310509088122e-05, + "loss": 2.3083, + "step": 3674 + }, + { + "epoch": 0.12, + "grad_norm": 0.7172769904136658, + "learning_rate": 1.943595863912323e-05, + "loss": 2.2197, + "step": 3675 + }, + { + "epoch": 0.12, + "grad_norm": 0.7076760530471802, + "learning_rate": 1.9435606662556402e-05, + "loss": 2.3206, + "step": 3676 + }, + { + "epoch": 0.12, + "grad_norm": 0.6666626334190369, + "learning_rate": 1.9435254579391625e-05, + "loss": 2.2212, + "step": 3677 + }, + { + "epoch": 0.12, + "grad_norm": 0.6660637259483337, + "learning_rate": 1.9434902389632867e-05, + "loss": 2.352, + "step": 3678 + }, + { + "epoch": 0.12, + "grad_norm": 0.7116628289222717, + "learning_rate": 1.9434550093284113e-05, + "loss": 2.3049, + "step": 3679 + }, + { + "epoch": 0.12, + "grad_norm": 0.6952300071716309, + "learning_rate": 1.943419769034934e-05, + "loss": 2.301, + "step": 3680 + }, + { + "epoch": 0.12, + "grad_norm": 0.6677801012992859, + "learning_rate": 1.9433845180832532e-05, + "loss": 2.184, + "step": 3681 + }, + { + "epoch": 0.12, + "grad_norm": 0.671719491481781, + "learning_rate": 1.9433492564737673e-05, + "loss": 2.229, + "step": 3682 + }, + { + "epoch": 0.12, + "grad_norm": 0.6637355089187622, + "learning_rate": 1.9433139842068737e-05, + "loss": 2.2418, + "step": 3683 + }, + { + "epoch": 0.12, + "grad_norm": 0.7171868085861206, + "learning_rate": 1.9432787012829723e-05, + "loss": 2.2693, + "step": 3684 + }, + { + "epoch": 0.12, + "grad_norm": 0.6579193472862244, + "learning_rate": 1.9432434077024602e-05, + "loss": 2.2586, + "step": 3685 + }, + { + "epoch": 0.12, + "grad_norm": 0.6844008564949036, + "learning_rate": 1.9432081034657374e-05, + "loss": 2.2166, + "step": 3686 + }, + { + "epoch": 0.12, + "grad_norm": 0.6631815433502197, + "learning_rate": 1.9431727885732015e-05, + "loss": 2.2513, + "step": 3687 + }, + { + "epoch": 0.12, + "grad_norm": 0.6815577149391174, + "learning_rate": 1.943137463025253e-05, + "loss": 2.2373, + "step": 3688 + }, + { + "epoch": 0.12, + "grad_norm": 0.704023540019989, + "learning_rate": 1.9431021268222898e-05, + "loss": 2.31, + "step": 3689 + }, + { + "epoch": 0.12, + "grad_norm": 0.7080705165863037, + "learning_rate": 1.9430667799647115e-05, + "loss": 2.1978, + "step": 3690 + }, + { + "epoch": 0.12, + "grad_norm": 0.6579564213752747, + "learning_rate": 1.9430314224529177e-05, + "loss": 2.284, + "step": 3691 + }, + { + "epoch": 0.12, + "grad_norm": 0.69828861951828, + "learning_rate": 1.942996054287307e-05, + "loss": 2.3091, + "step": 3692 + }, + { + "epoch": 0.12, + "grad_norm": 0.7169203758239746, + "learning_rate": 1.9429606754682803e-05, + "loss": 2.2944, + "step": 3693 + }, + { + "epoch": 0.12, + "grad_norm": 0.6707363724708557, + "learning_rate": 1.942925285996236e-05, + "loss": 2.2127, + "step": 3694 + }, + { + "epoch": 0.12, + "grad_norm": 0.6921399831771851, + "learning_rate": 1.9428898858715745e-05, + "loss": 2.2407, + "step": 3695 + }, + { + "epoch": 0.12, + "grad_norm": 0.6841446161270142, + "learning_rate": 1.9428544750946955e-05, + "loss": 2.2568, + "step": 3696 + }, + { + "epoch": 0.12, + "grad_norm": 0.6716071963310242, + "learning_rate": 1.9428190536659995e-05, + "loss": 2.2963, + "step": 3697 + }, + { + "epoch": 0.12, + "grad_norm": 0.6964879035949707, + "learning_rate": 1.942783621585886e-05, + "loss": 2.2276, + "step": 3698 + }, + { + "epoch": 0.12, + "grad_norm": 0.6463219523429871, + "learning_rate": 1.942748178854756e-05, + "loss": 2.2197, + "step": 3699 + }, + { + "epoch": 0.12, + "grad_norm": 0.6674463748931885, + "learning_rate": 1.9427127254730095e-05, + "loss": 2.276, + "step": 3700 + }, + { + "epoch": 0.12, + "grad_norm": 0.6658323407173157, + "learning_rate": 1.942677261441047e-05, + "loss": 2.2693, + "step": 3701 + }, + { + "epoch": 0.12, + "grad_norm": 0.664180338382721, + "learning_rate": 1.9426417867592694e-05, + "loss": 2.2563, + "step": 3702 + }, + { + "epoch": 0.12, + "grad_norm": 0.6704079508781433, + "learning_rate": 1.9426063014280775e-05, + "loss": 2.2813, + "step": 3703 + }, + { + "epoch": 0.12, + "grad_norm": 0.7194498777389526, + "learning_rate": 1.9425708054478718e-05, + "loss": 2.2816, + "step": 3704 + }, + { + "epoch": 0.12, + "grad_norm": 0.7084808349609375, + "learning_rate": 1.9425352988190535e-05, + "loss": 2.3381, + "step": 3705 + }, + { + "epoch": 0.12, + "grad_norm": 0.7209370732307434, + "learning_rate": 1.9424997815420237e-05, + "loss": 2.2332, + "step": 3706 + }, + { + "epoch": 0.12, + "grad_norm": 0.6653528809547424, + "learning_rate": 1.942464253617184e-05, + "loss": 2.2177, + "step": 3707 + }, + { + "epoch": 0.12, + "grad_norm": 0.7053827047348022, + "learning_rate": 1.9424287150449356e-05, + "loss": 2.2957, + "step": 3708 + }, + { + "epoch": 0.12, + "grad_norm": 0.6653017997741699, + "learning_rate": 1.9423931658256795e-05, + "loss": 2.2521, + "step": 3709 + }, + { + "epoch": 0.12, + "grad_norm": 0.6525475382804871, + "learning_rate": 1.942357605959818e-05, + "loss": 2.2293, + "step": 3710 + }, + { + "epoch": 0.12, + "grad_norm": 0.6921650171279907, + "learning_rate": 1.9423220354477524e-05, + "loss": 2.2676, + "step": 3711 + }, + { + "epoch": 0.12, + "grad_norm": 0.6810005307197571, + "learning_rate": 1.9422864542898847e-05, + "loss": 2.2119, + "step": 3712 + }, + { + "epoch": 0.12, + "grad_norm": 0.670204222202301, + "learning_rate": 1.942250862486617e-05, + "loss": 2.2884, + "step": 3713 + }, + { + "epoch": 0.12, + "grad_norm": 0.6999047994613647, + "learning_rate": 1.942215260038351e-05, + "loss": 2.3229, + "step": 3714 + }, + { + "epoch": 0.12, + "grad_norm": 0.6989960074424744, + "learning_rate": 1.9421796469454896e-05, + "loss": 2.3461, + "step": 3715 + }, + { + "epoch": 0.12, + "grad_norm": 0.6834724545478821, + "learning_rate": 1.9421440232084344e-05, + "loss": 2.2876, + "step": 3716 + }, + { + "epoch": 0.12, + "grad_norm": 0.6775102019309998, + "learning_rate": 1.9421083888275882e-05, + "loss": 2.221, + "step": 3717 + }, + { + "epoch": 0.12, + "grad_norm": 0.7152907252311707, + "learning_rate": 1.9420727438033537e-05, + "loss": 2.252, + "step": 3718 + }, + { + "epoch": 0.12, + "grad_norm": 0.6460477709770203, + "learning_rate": 1.9420370881361332e-05, + "loss": 2.2655, + "step": 3719 + }, + { + "epoch": 0.12, + "grad_norm": 0.7120924592018127, + "learning_rate": 1.94200142182633e-05, + "loss": 2.2999, + "step": 3720 + }, + { + "epoch": 0.12, + "grad_norm": 0.677557110786438, + "learning_rate": 1.9419657448743465e-05, + "loss": 2.2966, + "step": 3721 + }, + { + "epoch": 0.12, + "grad_norm": 0.7043353915214539, + "learning_rate": 1.9419300572805863e-05, + "loss": 2.3866, + "step": 3722 + }, + { + "epoch": 0.12, + "grad_norm": 0.7141336798667908, + "learning_rate": 1.9418943590454523e-05, + "loss": 2.2002, + "step": 3723 + }, + { + "epoch": 0.12, + "grad_norm": 0.6903342604637146, + "learning_rate": 1.941858650169348e-05, + "loss": 2.2939, + "step": 3724 + }, + { + "epoch": 0.12, + "grad_norm": 0.6966872811317444, + "learning_rate": 1.9418229306526766e-05, + "loss": 2.2765, + "step": 3725 + }, + { + "epoch": 0.12, + "grad_norm": 0.6794267296791077, + "learning_rate": 1.9417872004958415e-05, + "loss": 2.2519, + "step": 3726 + }, + { + "epoch": 0.12, + "grad_norm": 0.6904814839363098, + "learning_rate": 1.9417514596992467e-05, + "loss": 2.2037, + "step": 3727 + }, + { + "epoch": 0.12, + "grad_norm": 0.6799295544624329, + "learning_rate": 1.9417157082632957e-05, + "loss": 2.2426, + "step": 3728 + }, + { + "epoch": 0.12, + "grad_norm": 0.687701404094696, + "learning_rate": 1.9416799461883926e-05, + "loss": 2.282, + "step": 3729 + }, + { + "epoch": 0.12, + "grad_norm": 0.6887182593345642, + "learning_rate": 1.9416441734749414e-05, + "loss": 2.2245, + "step": 3730 + }, + { + "epoch": 0.12, + "grad_norm": 0.6488282084465027, + "learning_rate": 1.9416083901233463e-05, + "loss": 2.3164, + "step": 3731 + }, + { + "epoch": 0.12, + "grad_norm": 0.6747837066650391, + "learning_rate": 1.941572596134011e-05, + "loss": 2.2615, + "step": 3732 + }, + { + "epoch": 0.12, + "grad_norm": 0.7180113792419434, + "learning_rate": 1.941536791507341e-05, + "loss": 2.3417, + "step": 3733 + }, + { + "epoch": 0.12, + "grad_norm": 0.6641101837158203, + "learning_rate": 1.9415009762437393e-05, + "loss": 2.2425, + "step": 3734 + }, + { + "epoch": 0.12, + "grad_norm": 0.7113267183303833, + "learning_rate": 1.941465150343612e-05, + "loss": 2.3495, + "step": 3735 + }, + { + "epoch": 0.12, + "grad_norm": 0.6928016543388367, + "learning_rate": 1.9414293138073627e-05, + "loss": 2.2755, + "step": 3736 + }, + { + "epoch": 0.12, + "grad_norm": 0.6808182001113892, + "learning_rate": 1.9413934666353972e-05, + "loss": 2.211, + "step": 3737 + }, + { + "epoch": 0.12, + "grad_norm": 0.659949779510498, + "learning_rate": 1.94135760882812e-05, + "loss": 2.3518, + "step": 3738 + }, + { + "epoch": 0.12, + "grad_norm": 0.6785452365875244, + "learning_rate": 1.941321740385936e-05, + "loss": 2.2474, + "step": 3739 + }, + { + "epoch": 0.12, + "grad_norm": 0.7094192504882812, + "learning_rate": 1.9412858613092503e-05, + "loss": 2.2156, + "step": 3740 + }, + { + "epoch": 0.12, + "grad_norm": 0.6787962317466736, + "learning_rate": 1.9412499715984694e-05, + "loss": 2.2738, + "step": 3741 + }, + { + "epoch": 0.12, + "grad_norm": 0.6623697876930237, + "learning_rate": 1.9412140712539975e-05, + "loss": 2.2391, + "step": 3742 + }, + { + "epoch": 0.12, + "grad_norm": 0.6972416639328003, + "learning_rate": 1.941178160276241e-05, + "loss": 2.2584, + "step": 3743 + }, + { + "epoch": 0.12, + "grad_norm": 0.6648272275924683, + "learning_rate": 1.9411422386656045e-05, + "loss": 2.2689, + "step": 3744 + }, + { + "epoch": 0.12, + "grad_norm": 0.7274272441864014, + "learning_rate": 1.9411063064224952e-05, + "loss": 2.1682, + "step": 3745 + }, + { + "epoch": 0.12, + "grad_norm": 0.6804381608963013, + "learning_rate": 1.941070363547318e-05, + "loss": 2.2258, + "step": 3746 + }, + { + "epoch": 0.12, + "grad_norm": 0.6874633431434631, + "learning_rate": 1.9410344100404792e-05, + "loss": 2.273, + "step": 3747 + }, + { + "epoch": 0.12, + "grad_norm": 0.6696643829345703, + "learning_rate": 1.9409984459023856e-05, + "loss": 2.2466, + "step": 3748 + }, + { + "epoch": 0.12, + "grad_norm": 0.7038395404815674, + "learning_rate": 1.940962471133443e-05, + "loss": 2.3261, + "step": 3749 + }, + { + "epoch": 0.12, + "grad_norm": 0.6701029539108276, + "learning_rate": 1.9409264857340578e-05, + "loss": 2.2746, + "step": 3750 + }, + { + "epoch": 0.12, + "grad_norm": 0.695885181427002, + "learning_rate": 1.9408904897046364e-05, + "loss": 2.2239, + "step": 3751 + }, + { + "epoch": 0.12, + "grad_norm": 0.6909551024436951, + "learning_rate": 1.9408544830455858e-05, + "loss": 2.3368, + "step": 3752 + }, + { + "epoch": 0.12, + "grad_norm": 0.686220645904541, + "learning_rate": 1.9408184657573125e-05, + "loss": 2.2643, + "step": 3753 + }, + { + "epoch": 0.12, + "grad_norm": 0.7020294070243835, + "learning_rate": 1.9407824378402238e-05, + "loss": 2.2943, + "step": 3754 + }, + { + "epoch": 0.12, + "grad_norm": 0.6743779182434082, + "learning_rate": 1.9407463992947263e-05, + "loss": 2.2428, + "step": 3755 + }, + { + "epoch": 0.12, + "grad_norm": 0.6766365170478821, + "learning_rate": 1.9407103501212276e-05, + "loss": 2.2272, + "step": 3756 + }, + { + "epoch": 0.12, + "grad_norm": 0.6825518012046814, + "learning_rate": 1.9406742903201342e-05, + "loss": 2.2346, + "step": 3757 + }, + { + "epoch": 0.13, + "grad_norm": 0.7209994792938232, + "learning_rate": 1.940638219891854e-05, + "loss": 2.2656, + "step": 3758 + }, + { + "epoch": 0.13, + "grad_norm": 0.6528053283691406, + "learning_rate": 1.9406021388367948e-05, + "loss": 2.2748, + "step": 3759 + }, + { + "epoch": 0.13, + "grad_norm": 0.6639633774757385, + "learning_rate": 1.9405660471553637e-05, + "loss": 2.2343, + "step": 3760 + }, + { + "epoch": 0.13, + "grad_norm": 0.6886638402938843, + "learning_rate": 1.9405299448479686e-05, + "loss": 2.2773, + "step": 3761 + }, + { + "epoch": 0.13, + "grad_norm": 0.7195329666137695, + "learning_rate": 1.9404938319150175e-05, + "loss": 2.3398, + "step": 3762 + }, + { + "epoch": 0.13, + "grad_norm": 0.6680470705032349, + "learning_rate": 1.9404577083569183e-05, + "loss": 2.2494, + "step": 3763 + }, + { + "epoch": 0.13, + "grad_norm": 0.7430095076560974, + "learning_rate": 1.940421574174079e-05, + "loss": 2.2654, + "step": 3764 + }, + { + "epoch": 0.13, + "grad_norm": 0.680685818195343, + "learning_rate": 1.940385429366908e-05, + "loss": 2.2734, + "step": 3765 + }, + { + "epoch": 0.13, + "grad_norm": 0.7081979513168335, + "learning_rate": 1.9403492739358132e-05, + "loss": 2.2889, + "step": 3766 + }, + { + "epoch": 0.13, + "grad_norm": 0.6792062520980835, + "learning_rate": 1.9403131078812037e-05, + "loss": 2.2726, + "step": 3767 + }, + { + "epoch": 0.13, + "grad_norm": 0.6896951794624329, + "learning_rate": 1.9402769312034878e-05, + "loss": 2.249, + "step": 3768 + }, + { + "epoch": 0.13, + "grad_norm": 0.6813924908638, + "learning_rate": 1.940240743903074e-05, + "loss": 2.2295, + "step": 3769 + }, + { + "epoch": 0.13, + "grad_norm": 0.7176504135131836, + "learning_rate": 1.9402045459803712e-05, + "loss": 2.2056, + "step": 3770 + }, + { + "epoch": 0.13, + "grad_norm": 0.6598018407821655, + "learning_rate": 1.9401683374357888e-05, + "loss": 2.2767, + "step": 3771 + }, + { + "epoch": 0.13, + "grad_norm": 0.6950193643569946, + "learning_rate": 1.9401321182697354e-05, + "loss": 2.2728, + "step": 3772 + }, + { + "epoch": 0.13, + "grad_norm": 0.6810510754585266, + "learning_rate": 1.9400958884826203e-05, + "loss": 2.3052, + "step": 3773 + }, + { + "epoch": 0.13, + "grad_norm": 0.6701185703277588, + "learning_rate": 1.940059648074853e-05, + "loss": 2.2383, + "step": 3774 + }, + { + "epoch": 0.13, + "grad_norm": 0.6869978308677673, + "learning_rate": 1.9400233970468424e-05, + "loss": 2.2488, + "step": 3775 + }, + { + "epoch": 0.13, + "grad_norm": 0.6976146697998047, + "learning_rate": 1.9399871353989984e-05, + "loss": 2.2684, + "step": 3776 + }, + { + "epoch": 0.13, + "grad_norm": 0.671232283115387, + "learning_rate": 1.9399508631317306e-05, + "loss": 2.2699, + "step": 3777 + }, + { + "epoch": 0.13, + "grad_norm": 0.6673446893692017, + "learning_rate": 1.939914580245449e-05, + "loss": 2.2968, + "step": 3778 + }, + { + "epoch": 0.13, + "grad_norm": 0.6780042052268982, + "learning_rate": 1.9398782867405633e-05, + "loss": 2.257, + "step": 3779 + }, + { + "epoch": 0.13, + "grad_norm": 0.6714012622833252, + "learning_rate": 1.9398419826174835e-05, + "loss": 2.3533, + "step": 3780 + }, + { + "epoch": 0.13, + "grad_norm": 0.6834983825683594, + "learning_rate": 1.9398056678766196e-05, + "loss": 2.3048, + "step": 3781 + }, + { + "epoch": 0.13, + "grad_norm": 0.6877976059913635, + "learning_rate": 1.9397693425183824e-05, + "loss": 2.2664, + "step": 3782 + }, + { + "epoch": 0.13, + "grad_norm": 0.6947190165519714, + "learning_rate": 1.9397330065431815e-05, + "loss": 2.2917, + "step": 3783 + }, + { + "epoch": 0.13, + "grad_norm": 0.6546889543533325, + "learning_rate": 1.9396966599514285e-05, + "loss": 2.2027, + "step": 3784 + }, + { + "epoch": 0.13, + "grad_norm": 0.6743900775909424, + "learning_rate": 1.9396603027435324e-05, + "loss": 2.2568, + "step": 3785 + }, + { + "epoch": 0.13, + "grad_norm": 0.6828441023826599, + "learning_rate": 1.939623934919906e-05, + "loss": 2.246, + "step": 3786 + }, + { + "epoch": 0.13, + "grad_norm": 0.648501455783844, + "learning_rate": 1.9395875564809582e-05, + "loss": 2.2794, + "step": 3787 + }, + { + "epoch": 0.13, + "grad_norm": 0.6960654258728027, + "learning_rate": 1.939551167427101e-05, + "loss": 2.2531, + "step": 3788 + }, + { + "epoch": 0.13, + "grad_norm": 0.741593599319458, + "learning_rate": 1.9395147677587457e-05, + "loss": 2.312, + "step": 3789 + }, + { + "epoch": 0.13, + "grad_norm": 0.671448826789856, + "learning_rate": 1.939478357476303e-05, + "loss": 2.2683, + "step": 3790 + }, + { + "epoch": 0.13, + "grad_norm": 0.6526780724525452, + "learning_rate": 1.939441936580184e-05, + "loss": 2.246, + "step": 3791 + }, + { + "epoch": 0.13, + "grad_norm": 0.6915876269340515, + "learning_rate": 1.939405505070801e-05, + "loss": 2.2602, + "step": 3792 + }, + { + "epoch": 0.13, + "grad_norm": 0.6606250405311584, + "learning_rate": 1.939369062948565e-05, + "loss": 2.2105, + "step": 3793 + }, + { + "epoch": 0.13, + "grad_norm": 0.6585690975189209, + "learning_rate": 1.939332610213888e-05, + "loss": 2.3095, + "step": 3794 + }, + { + "epoch": 0.13, + "grad_norm": 0.7024334073066711, + "learning_rate": 1.9392961468671812e-05, + "loss": 2.2228, + "step": 3795 + }, + { + "epoch": 0.13, + "grad_norm": 0.6917774677276611, + "learning_rate": 1.9392596729088577e-05, + "loss": 2.2104, + "step": 3796 + }, + { + "epoch": 0.13, + "grad_norm": 0.6826688647270203, + "learning_rate": 1.9392231883393285e-05, + "loss": 2.2854, + "step": 3797 + }, + { + "epoch": 0.13, + "grad_norm": 0.6910467147827148, + "learning_rate": 1.939186693159006e-05, + "loss": 2.1931, + "step": 3798 + }, + { + "epoch": 0.13, + "grad_norm": 0.7044627070426941, + "learning_rate": 1.9391501873683028e-05, + "loss": 2.1717, + "step": 3799 + }, + { + "epoch": 0.13, + "grad_norm": 0.6685564517974854, + "learning_rate": 1.9391136709676316e-05, + "loss": 2.313, + "step": 3800 + }, + { + "epoch": 0.13, + "grad_norm": 0.6969269514083862, + "learning_rate": 1.939077143957404e-05, + "loss": 2.2872, + "step": 3801 + }, + { + "epoch": 0.13, + "grad_norm": 0.6813797950744629, + "learning_rate": 1.9390406063380334e-05, + "loss": 2.2175, + "step": 3802 + }, + { + "epoch": 0.13, + "grad_norm": 0.7131717205047607, + "learning_rate": 1.9390040581099322e-05, + "loss": 2.296, + "step": 3803 + }, + { + "epoch": 0.13, + "grad_norm": 0.685228168964386, + "learning_rate": 1.9389674992735137e-05, + "loss": 2.2951, + "step": 3804 + }, + { + "epoch": 0.13, + "grad_norm": 0.6646665930747986, + "learning_rate": 1.9389309298291904e-05, + "loss": 2.2439, + "step": 3805 + }, + { + "epoch": 0.13, + "grad_norm": 0.6834672689437866, + "learning_rate": 1.938894349777376e-05, + "loss": 2.2997, + "step": 3806 + }, + { + "epoch": 0.13, + "grad_norm": 0.6647322773933411, + "learning_rate": 1.9388577591184833e-05, + "loss": 2.3175, + "step": 3807 + }, + { + "epoch": 0.13, + "grad_norm": 0.6993784308433533, + "learning_rate": 1.9388211578529258e-05, + "loss": 2.2611, + "step": 3808 + }, + { + "epoch": 0.13, + "grad_norm": 0.7068907618522644, + "learning_rate": 1.9387845459811175e-05, + "loss": 2.3137, + "step": 3809 + }, + { + "epoch": 0.13, + "grad_norm": 0.7464045882225037, + "learning_rate": 1.938747923503471e-05, + "loss": 2.367, + "step": 3810 + }, + { + "epoch": 0.13, + "grad_norm": 0.6496166586875916, + "learning_rate": 1.9387112904204008e-05, + "loss": 2.224, + "step": 3811 + }, + { + "epoch": 0.13, + "grad_norm": 0.711633026599884, + "learning_rate": 1.9386746467323206e-05, + "loss": 2.293, + "step": 3812 + }, + { + "epoch": 0.13, + "grad_norm": 0.7147133350372314, + "learning_rate": 1.9386379924396448e-05, + "loss": 2.2877, + "step": 3813 + }, + { + "epoch": 0.13, + "grad_norm": 0.7064916491508484, + "learning_rate": 1.9386013275427866e-05, + "loss": 2.2539, + "step": 3814 + }, + { + "epoch": 0.13, + "grad_norm": 0.6703269481658936, + "learning_rate": 1.9385646520421605e-05, + "loss": 2.2243, + "step": 3815 + }, + { + "epoch": 0.13, + "grad_norm": 0.6712427139282227, + "learning_rate": 1.938527965938181e-05, + "loss": 2.2781, + "step": 3816 + }, + { + "epoch": 0.13, + "grad_norm": 0.6613171100616455, + "learning_rate": 1.9384912692312633e-05, + "loss": 2.2297, + "step": 3817 + }, + { + "epoch": 0.13, + "grad_norm": 0.6942279934883118, + "learning_rate": 1.938454561921821e-05, + "loss": 2.303, + "step": 3818 + }, + { + "epoch": 0.13, + "grad_norm": 0.6588547825813293, + "learning_rate": 1.9384178440102686e-05, + "loss": 2.2316, + "step": 3819 + }, + { + "epoch": 0.13, + "grad_norm": 0.676581621170044, + "learning_rate": 1.9383811154970216e-05, + "loss": 2.2574, + "step": 3820 + }, + { + "epoch": 0.13, + "grad_norm": 0.6870656609535217, + "learning_rate": 1.9383443763824946e-05, + "loss": 2.3091, + "step": 3821 + }, + { + "epoch": 0.13, + "grad_norm": 0.6999729871749878, + "learning_rate": 1.938307626667103e-05, + "loss": 2.2618, + "step": 3822 + }, + { + "epoch": 0.13, + "grad_norm": 0.6895442008972168, + "learning_rate": 1.9382708663512616e-05, + "loss": 2.3144, + "step": 3823 + }, + { + "epoch": 0.13, + "grad_norm": 0.6573551893234253, + "learning_rate": 1.9382340954353857e-05, + "loss": 2.2283, + "step": 3824 + }, + { + "epoch": 0.13, + "grad_norm": 0.6985926628112793, + "learning_rate": 1.9381973139198912e-05, + "loss": 2.3384, + "step": 3825 + }, + { + "epoch": 0.13, + "grad_norm": 0.6900296211242676, + "learning_rate": 1.938160521805193e-05, + "loss": 2.239, + "step": 3826 + }, + { + "epoch": 0.13, + "grad_norm": 0.6675920486450195, + "learning_rate": 1.938123719091707e-05, + "loss": 2.2255, + "step": 3827 + }, + { + "epoch": 0.13, + "grad_norm": 0.692578136920929, + "learning_rate": 1.9380869057798494e-05, + "loss": 2.2696, + "step": 3828 + }, + { + "epoch": 0.13, + "grad_norm": 0.6923479437828064, + "learning_rate": 1.938050081870035e-05, + "loss": 2.2553, + "step": 3829 + }, + { + "epoch": 0.13, + "grad_norm": 0.7357146739959717, + "learning_rate": 1.9380132473626813e-05, + "loss": 2.2535, + "step": 3830 + }, + { + "epoch": 0.13, + "grad_norm": 0.6821557879447937, + "learning_rate": 1.9379764022582032e-05, + "loss": 2.2379, + "step": 3831 + }, + { + "epoch": 0.13, + "grad_norm": 0.7115961909294128, + "learning_rate": 1.9379395465570175e-05, + "loss": 2.2285, + "step": 3832 + }, + { + "epoch": 0.13, + "grad_norm": 0.6817278265953064, + "learning_rate": 1.9379026802595406e-05, + "loss": 2.2827, + "step": 3833 + }, + { + "epoch": 0.13, + "grad_norm": 0.7151566743850708, + "learning_rate": 1.937865803366189e-05, + "loss": 2.2156, + "step": 3834 + }, + { + "epoch": 0.13, + "grad_norm": 0.6646065711975098, + "learning_rate": 1.9378289158773785e-05, + "loss": 2.3028, + "step": 3835 + }, + { + "epoch": 0.13, + "grad_norm": 0.751550555229187, + "learning_rate": 1.9377920177935274e-05, + "loss": 2.2381, + "step": 3836 + }, + { + "epoch": 0.13, + "grad_norm": 0.6581366658210754, + "learning_rate": 1.937755109115051e-05, + "loss": 2.2975, + "step": 3837 + }, + { + "epoch": 0.13, + "grad_norm": 0.6811825037002563, + "learning_rate": 1.9377181898423676e-05, + "loss": 2.2861, + "step": 3838 + }, + { + "epoch": 0.13, + "grad_norm": 0.6856463551521301, + "learning_rate": 1.937681259975893e-05, + "loss": 2.2393, + "step": 3839 + }, + { + "epoch": 0.13, + "grad_norm": 0.6743162274360657, + "learning_rate": 1.9376443195160454e-05, + "loss": 2.2287, + "step": 3840 + }, + { + "epoch": 0.13, + "grad_norm": 0.6996175050735474, + "learning_rate": 1.937607368463242e-05, + "loss": 2.2938, + "step": 3841 + }, + { + "epoch": 0.13, + "grad_norm": 0.6857704520225525, + "learning_rate": 1.9375704068179e-05, + "loss": 2.2518, + "step": 3842 + }, + { + "epoch": 0.13, + "grad_norm": 0.6750434637069702, + "learning_rate": 1.9375334345804366e-05, + "loss": 2.2987, + "step": 3843 + }, + { + "epoch": 0.13, + "grad_norm": 0.661083459854126, + "learning_rate": 1.93749645175127e-05, + "loss": 2.3061, + "step": 3844 + }, + { + "epoch": 0.13, + "grad_norm": 0.6785575747489929, + "learning_rate": 1.937459458330818e-05, + "loss": 2.2249, + "step": 3845 + }, + { + "epoch": 0.13, + "grad_norm": 0.6781976222991943, + "learning_rate": 1.9374224543194986e-05, + "loss": 2.2666, + "step": 3846 + }, + { + "epoch": 0.13, + "grad_norm": 0.6765331625938416, + "learning_rate": 1.9373854397177295e-05, + "loss": 2.2515, + "step": 3847 + }, + { + "epoch": 0.13, + "grad_norm": 0.6972099542617798, + "learning_rate": 1.9373484145259293e-05, + "loss": 2.26, + "step": 3848 + }, + { + "epoch": 0.13, + "grad_norm": 0.7114779353141785, + "learning_rate": 1.937311378744516e-05, + "loss": 2.174, + "step": 3849 + }, + { + "epoch": 0.13, + "grad_norm": 0.6999061107635498, + "learning_rate": 1.937274332373908e-05, + "loss": 2.2336, + "step": 3850 + }, + { + "epoch": 0.13, + "grad_norm": 0.6958664059638977, + "learning_rate": 1.937237275414524e-05, + "loss": 2.2772, + "step": 3851 + }, + { + "epoch": 0.13, + "grad_norm": 0.7307547926902771, + "learning_rate": 1.937200207866782e-05, + "loss": 2.2527, + "step": 3852 + }, + { + "epoch": 0.13, + "grad_norm": 0.6892645955085754, + "learning_rate": 1.9371631297311018e-05, + "loss": 2.2756, + "step": 3853 + }, + { + "epoch": 0.13, + "grad_norm": 0.7035792469978333, + "learning_rate": 1.9371260410079017e-05, + "loss": 2.2895, + "step": 3854 + }, + { + "epoch": 0.13, + "grad_norm": 0.6636065244674683, + "learning_rate": 1.9370889416976005e-05, + "loss": 2.2186, + "step": 3855 + }, + { + "epoch": 0.13, + "grad_norm": 0.6809833645820618, + "learning_rate": 1.9370518318006183e-05, + "loss": 2.2541, + "step": 3856 + }, + { + "epoch": 0.13, + "grad_norm": 0.691703200340271, + "learning_rate": 1.937014711317373e-05, + "loss": 2.2346, + "step": 3857 + }, + { + "epoch": 0.13, + "grad_norm": 0.6827625632286072, + "learning_rate": 1.936977580248285e-05, + "loss": 2.2599, + "step": 3858 + }, + { + "epoch": 0.13, + "grad_norm": 0.68923419713974, + "learning_rate": 1.9369404385937734e-05, + "loss": 2.3339, + "step": 3859 + }, + { + "epoch": 0.13, + "grad_norm": 0.707530677318573, + "learning_rate": 1.936903286354258e-05, + "loss": 2.3143, + "step": 3860 + }, + { + "epoch": 0.13, + "grad_norm": 0.6839113831520081, + "learning_rate": 1.936866123530158e-05, + "loss": 2.3177, + "step": 3861 + }, + { + "epoch": 0.13, + "grad_norm": 0.6864945888519287, + "learning_rate": 1.9368289501218935e-05, + "loss": 2.2573, + "step": 3862 + }, + { + "epoch": 0.13, + "grad_norm": 0.6917358040809631, + "learning_rate": 1.936791766129885e-05, + "loss": 2.3434, + "step": 3863 + }, + { + "epoch": 0.13, + "grad_norm": 0.6996966600418091, + "learning_rate": 1.936754571554552e-05, + "loss": 2.3309, + "step": 3864 + }, + { + "epoch": 0.13, + "grad_norm": 0.6747732162475586, + "learning_rate": 1.9367173663963146e-05, + "loss": 2.2537, + "step": 3865 + }, + { + "epoch": 0.13, + "grad_norm": 0.6763821244239807, + "learning_rate": 1.9366801506555933e-05, + "loss": 2.1774, + "step": 3866 + }, + { + "epoch": 0.13, + "grad_norm": 0.6778748631477356, + "learning_rate": 1.936642924332809e-05, + "loss": 2.2048, + "step": 3867 + }, + { + "epoch": 0.13, + "grad_norm": 0.6992117166519165, + "learning_rate": 1.9366056874283817e-05, + "loss": 2.2658, + "step": 3868 + }, + { + "epoch": 0.13, + "grad_norm": 0.667715847492218, + "learning_rate": 1.9365684399427324e-05, + "loss": 2.2399, + "step": 3869 + }, + { + "epoch": 0.13, + "grad_norm": 0.6567389369010925, + "learning_rate": 1.9365311818762812e-05, + "loss": 2.2115, + "step": 3870 + }, + { + "epoch": 0.13, + "grad_norm": 0.6794754862785339, + "learning_rate": 1.9364939132294503e-05, + "loss": 2.2936, + "step": 3871 + }, + { + "epoch": 0.13, + "grad_norm": 0.7116652727127075, + "learning_rate": 1.9364566340026595e-05, + "loss": 2.2988, + "step": 3872 + }, + { + "epoch": 0.13, + "grad_norm": 0.6668806076049805, + "learning_rate": 1.9364193441963307e-05, + "loss": 2.2648, + "step": 3873 + }, + { + "epoch": 0.13, + "grad_norm": 0.6928072571754456, + "learning_rate": 1.936382043810885e-05, + "loss": 2.2542, + "step": 3874 + }, + { + "epoch": 0.13, + "grad_norm": 0.6819534301757812, + "learning_rate": 1.9363447328467434e-05, + "loss": 2.271, + "step": 3875 + }, + { + "epoch": 0.13, + "grad_norm": 0.6918515563011169, + "learning_rate": 1.9363074113043282e-05, + "loss": 2.3231, + "step": 3876 + }, + { + "epoch": 0.13, + "grad_norm": 0.7092213034629822, + "learning_rate": 1.9362700791840602e-05, + "loss": 2.2577, + "step": 3877 + }, + { + "epoch": 0.13, + "grad_norm": 0.6970034241676331, + "learning_rate": 1.936232736486362e-05, + "loss": 2.2668, + "step": 3878 + }, + { + "epoch": 0.13, + "grad_norm": 0.7085751891136169, + "learning_rate": 1.9361953832116548e-05, + "loss": 2.2622, + "step": 3879 + }, + { + "epoch": 0.13, + "grad_norm": 0.6737750172615051, + "learning_rate": 1.9361580193603603e-05, + "loss": 2.2725, + "step": 3880 + }, + { + "epoch": 0.13, + "grad_norm": 0.6979121565818787, + "learning_rate": 1.9361206449329017e-05, + "loss": 2.1956, + "step": 3881 + }, + { + "epoch": 0.13, + "grad_norm": 0.7040246725082397, + "learning_rate": 1.9360832599297007e-05, + "loss": 2.2627, + "step": 3882 + }, + { + "epoch": 0.13, + "grad_norm": 0.6824359893798828, + "learning_rate": 1.9360458643511798e-05, + "loss": 2.2686, + "step": 3883 + }, + { + "epoch": 0.13, + "grad_norm": 0.6732771396636963, + "learning_rate": 1.936008458197761e-05, + "loss": 2.2638, + "step": 3884 + }, + { + "epoch": 0.13, + "grad_norm": 0.6961280703544617, + "learning_rate": 1.9359710414698672e-05, + "loss": 2.2877, + "step": 3885 + }, + { + "epoch": 0.13, + "grad_norm": 0.6671313047409058, + "learning_rate": 1.935933614167921e-05, + "loss": 2.2342, + "step": 3886 + }, + { + "epoch": 0.13, + "grad_norm": 0.6848142743110657, + "learning_rate": 1.9358961762923455e-05, + "loss": 2.2807, + "step": 3887 + }, + { + "epoch": 0.13, + "grad_norm": 0.7128828763961792, + "learning_rate": 1.9358587278435633e-05, + "loss": 2.3302, + "step": 3888 + }, + { + "epoch": 0.13, + "grad_norm": 0.6834088563919067, + "learning_rate": 1.935821268821998e-05, + "loss": 2.2734, + "step": 3889 + }, + { + "epoch": 0.13, + "grad_norm": 0.7087829113006592, + "learning_rate": 1.935783799228072e-05, + "loss": 2.2717, + "step": 3890 + }, + { + "epoch": 0.13, + "grad_norm": 0.6643744707107544, + "learning_rate": 1.9357463190622096e-05, + "loss": 2.2913, + "step": 3891 + }, + { + "epoch": 0.13, + "grad_norm": 0.6617563366889954, + "learning_rate": 1.9357088283248334e-05, + "loss": 2.2531, + "step": 3892 + }, + { + "epoch": 0.13, + "grad_norm": 0.6796502470970154, + "learning_rate": 1.9356713270163672e-05, + "loss": 2.2258, + "step": 3893 + }, + { + "epoch": 0.13, + "grad_norm": 0.6705867052078247, + "learning_rate": 1.9356338151372347e-05, + "loss": 2.2279, + "step": 3894 + }, + { + "epoch": 0.13, + "grad_norm": 0.6770344376564026, + "learning_rate": 1.9355962926878598e-05, + "loss": 2.2395, + "step": 3895 + }, + { + "epoch": 0.13, + "grad_norm": 0.6612032651901245, + "learning_rate": 1.9355587596686663e-05, + "loss": 2.2633, + "step": 3896 + }, + { + "epoch": 0.13, + "grad_norm": 0.6406837701797485, + "learning_rate": 1.935521216080078e-05, + "loss": 2.2691, + "step": 3897 + }, + { + "epoch": 0.13, + "grad_norm": 0.664910614490509, + "learning_rate": 1.9354836619225196e-05, + "loss": 2.336, + "step": 3898 + }, + { + "epoch": 0.13, + "grad_norm": 0.6791231036186218, + "learning_rate": 1.9354460971964147e-05, + "loss": 2.2131, + "step": 3899 + }, + { + "epoch": 0.13, + "grad_norm": 0.7030355334281921, + "learning_rate": 1.935408521902188e-05, + "loss": 2.2883, + "step": 3900 + }, + { + "epoch": 0.13, + "grad_norm": 0.6841225624084473, + "learning_rate": 1.9353709360402645e-05, + "loss": 2.2596, + "step": 3901 + }, + { + "epoch": 0.13, + "grad_norm": 0.6789695024490356, + "learning_rate": 1.9353333396110682e-05, + "loss": 2.299, + "step": 3902 + }, + { + "epoch": 0.13, + "grad_norm": 0.6934884786605835, + "learning_rate": 1.9352957326150238e-05, + "loss": 2.2427, + "step": 3903 + }, + { + "epoch": 0.13, + "grad_norm": 0.6843969821929932, + "learning_rate": 1.935258115052556e-05, + "loss": 2.2395, + "step": 3904 + }, + { + "epoch": 0.13, + "grad_norm": 0.6676246523857117, + "learning_rate": 1.9352204869240906e-05, + "loss": 2.206, + "step": 3905 + }, + { + "epoch": 0.13, + "grad_norm": 0.6708856225013733, + "learning_rate": 1.9351828482300523e-05, + "loss": 2.2368, + "step": 3906 + }, + { + "epoch": 0.13, + "grad_norm": 0.6857244372367859, + "learning_rate": 1.935145198970866e-05, + "loss": 2.272, + "step": 3907 + }, + { + "epoch": 0.13, + "grad_norm": 0.7019046545028687, + "learning_rate": 1.9351075391469575e-05, + "loss": 2.2947, + "step": 3908 + }, + { + "epoch": 0.13, + "grad_norm": 0.6780718564987183, + "learning_rate": 1.935069868758752e-05, + "loss": 2.273, + "step": 3909 + }, + { + "epoch": 0.13, + "grad_norm": 0.6682991981506348, + "learning_rate": 1.935032187806675e-05, + "loss": 2.1827, + "step": 3910 + }, + { + "epoch": 0.13, + "grad_norm": 0.6956418752670288, + "learning_rate": 1.9349944962911523e-05, + "loss": 2.2479, + "step": 3911 + }, + { + "epoch": 0.13, + "grad_norm": 0.6831377148628235, + "learning_rate": 1.9349567942126102e-05, + "loss": 2.2517, + "step": 3912 + }, + { + "epoch": 0.13, + "grad_norm": 0.6706723570823669, + "learning_rate": 1.9349190815714734e-05, + "loss": 2.3225, + "step": 3913 + }, + { + "epoch": 0.13, + "grad_norm": 0.6868249773979187, + "learning_rate": 1.9348813583681692e-05, + "loss": 2.2743, + "step": 3914 + }, + { + "epoch": 0.13, + "grad_norm": 0.6771519184112549, + "learning_rate": 1.9348436246031234e-05, + "loss": 2.2711, + "step": 3915 + }, + { + "epoch": 0.13, + "grad_norm": 0.6883339881896973, + "learning_rate": 1.9348058802767622e-05, + "loss": 2.2709, + "step": 3916 + }, + { + "epoch": 0.13, + "grad_norm": 0.6665377616882324, + "learning_rate": 1.934768125389512e-05, + "loss": 2.2763, + "step": 3917 + }, + { + "epoch": 0.13, + "grad_norm": 0.7197356224060059, + "learning_rate": 1.934730359941799e-05, + "loss": 2.2944, + "step": 3918 + }, + { + "epoch": 0.13, + "grad_norm": 0.698290228843689, + "learning_rate": 1.9346925839340507e-05, + "loss": 2.2297, + "step": 3919 + }, + { + "epoch": 0.13, + "grad_norm": 0.7223041653633118, + "learning_rate": 1.9346547973666928e-05, + "loss": 2.2834, + "step": 3920 + }, + { + "epoch": 0.13, + "grad_norm": 0.6898424029350281, + "learning_rate": 1.934617000240153e-05, + "loss": 2.3536, + "step": 3921 + }, + { + "epoch": 0.13, + "grad_norm": 0.688073992729187, + "learning_rate": 1.9345791925548584e-05, + "loss": 2.2874, + "step": 3922 + }, + { + "epoch": 0.13, + "grad_norm": 0.6813004016876221, + "learning_rate": 1.9345413743112354e-05, + "loss": 2.23, + "step": 3923 + }, + { + "epoch": 0.13, + "grad_norm": 0.6877011656761169, + "learning_rate": 1.934503545509712e-05, + "loss": 2.2894, + "step": 3924 + }, + { + "epoch": 0.13, + "grad_norm": 0.6924586296081543, + "learning_rate": 1.934465706150715e-05, + "loss": 2.3253, + "step": 3925 + }, + { + "epoch": 0.13, + "grad_norm": 0.8447633981704712, + "learning_rate": 1.934427856234672e-05, + "loss": 2.2928, + "step": 3926 + }, + { + "epoch": 0.13, + "grad_norm": 0.6965543627738953, + "learning_rate": 1.9343899957620105e-05, + "loss": 2.2453, + "step": 3927 + }, + { + "epoch": 0.13, + "grad_norm": 0.7002049684524536, + "learning_rate": 1.934352124733159e-05, + "loss": 2.2676, + "step": 3928 + }, + { + "epoch": 0.13, + "grad_norm": 0.6983843445777893, + "learning_rate": 1.9343142431485447e-05, + "loss": 2.2615, + "step": 3929 + }, + { + "epoch": 0.13, + "grad_norm": 0.6810771226882935, + "learning_rate": 1.9342763510085954e-05, + "loss": 2.2556, + "step": 3930 + }, + { + "epoch": 0.13, + "grad_norm": 0.6740642786026001, + "learning_rate": 1.9342384483137394e-05, + "loss": 2.2308, + "step": 3931 + }, + { + "epoch": 0.13, + "grad_norm": 0.687989354133606, + "learning_rate": 1.934200535064405e-05, + "loss": 2.3078, + "step": 3932 + }, + { + "epoch": 0.13, + "grad_norm": 0.6699447631835938, + "learning_rate": 1.9341626112610204e-05, + "loss": 2.197, + "step": 3933 + }, + { + "epoch": 0.13, + "grad_norm": 0.6842352747917175, + "learning_rate": 1.9341246769040142e-05, + "loss": 2.2704, + "step": 3934 + }, + { + "epoch": 0.13, + "grad_norm": 0.6888436675071716, + "learning_rate": 1.9340867319938147e-05, + "loss": 2.2767, + "step": 3935 + }, + { + "epoch": 0.13, + "grad_norm": 0.6800437569618225, + "learning_rate": 1.9340487765308508e-05, + "loss": 2.3061, + "step": 3936 + }, + { + "epoch": 0.13, + "grad_norm": 0.6544216275215149, + "learning_rate": 1.9340108105155515e-05, + "loss": 2.2451, + "step": 3937 + }, + { + "epoch": 0.13, + "grad_norm": 0.6744993329048157, + "learning_rate": 1.933972833948345e-05, + "loss": 2.2858, + "step": 3938 + }, + { + "epoch": 0.13, + "grad_norm": 0.680762529373169, + "learning_rate": 1.933934846829661e-05, + "loss": 2.2925, + "step": 3939 + }, + { + "epoch": 0.13, + "grad_norm": 0.6617317199707031, + "learning_rate": 1.9338968491599286e-05, + "loss": 2.2356, + "step": 3940 + }, + { + "epoch": 0.13, + "grad_norm": 0.6594517230987549, + "learning_rate": 1.9338588409395766e-05, + "loss": 2.2268, + "step": 3941 + }, + { + "epoch": 0.13, + "grad_norm": 0.6944791078567505, + "learning_rate": 1.933820822169035e-05, + "loss": 2.2893, + "step": 3942 + }, + { + "epoch": 0.13, + "grad_norm": 0.687288224697113, + "learning_rate": 1.933782792848733e-05, + "loss": 2.2566, + "step": 3943 + }, + { + "epoch": 0.13, + "grad_norm": 0.6864408254623413, + "learning_rate": 1.9337447529791e-05, + "loss": 2.2672, + "step": 3944 + }, + { + "epoch": 0.13, + "grad_norm": 0.677907407283783, + "learning_rate": 1.933706702560566e-05, + "loss": 2.2708, + "step": 3945 + }, + { + "epoch": 0.13, + "grad_norm": 0.6765162944793701, + "learning_rate": 1.933668641593561e-05, + "loss": 2.2501, + "step": 3946 + }, + { + "epoch": 0.13, + "grad_norm": 0.6796731352806091, + "learning_rate": 1.933630570078515e-05, + "loss": 2.2774, + "step": 3947 + }, + { + "epoch": 0.13, + "grad_norm": 0.6950958371162415, + "learning_rate": 1.933592488015858e-05, + "loss": 2.2606, + "step": 3948 + }, + { + "epoch": 0.13, + "grad_norm": 0.6761336326599121, + "learning_rate": 1.93355439540602e-05, + "loss": 2.2602, + "step": 3949 + }, + { + "epoch": 0.13, + "grad_norm": 0.669391393661499, + "learning_rate": 1.9335162922494317e-05, + "loss": 2.2477, + "step": 3950 + }, + { + "epoch": 0.13, + "grad_norm": 0.6777642965316772, + "learning_rate": 1.9334781785465234e-05, + "loss": 2.2023, + "step": 3951 + }, + { + "epoch": 0.13, + "grad_norm": 0.6737990975379944, + "learning_rate": 1.9334400542977256e-05, + "loss": 2.2923, + "step": 3952 + }, + { + "epoch": 0.13, + "grad_norm": 0.695613443851471, + "learning_rate": 1.9334019195034693e-05, + "loss": 2.2825, + "step": 3953 + }, + { + "epoch": 0.13, + "grad_norm": 0.6728472709655762, + "learning_rate": 1.933363774164185e-05, + "loss": 2.2328, + "step": 3954 + }, + { + "epoch": 0.13, + "grad_norm": 0.70237797498703, + "learning_rate": 1.933325618280304e-05, + "loss": 2.2637, + "step": 3955 + }, + { + "epoch": 0.13, + "grad_norm": 0.7053419947624207, + "learning_rate": 1.9332874518522567e-05, + "loss": 2.2701, + "step": 3956 + }, + { + "epoch": 0.13, + "grad_norm": 0.6708536744117737, + "learning_rate": 1.933249274880475e-05, + "loss": 2.2638, + "step": 3957 + }, + { + "epoch": 0.13, + "grad_norm": 0.6735857129096985, + "learning_rate": 1.9332110873653903e-05, + "loss": 2.2613, + "step": 3958 + }, + { + "epoch": 0.13, + "grad_norm": 0.6780750751495361, + "learning_rate": 1.933172889307433e-05, + "loss": 2.2484, + "step": 3959 + }, + { + "epoch": 0.13, + "grad_norm": 0.6781023144721985, + "learning_rate": 1.9331346807070358e-05, + "loss": 2.2538, + "step": 3960 + }, + { + "epoch": 0.13, + "grad_norm": 0.6940311789512634, + "learning_rate": 1.9330964615646294e-05, + "loss": 2.243, + "step": 3961 + }, + { + "epoch": 0.13, + "grad_norm": 0.680412232875824, + "learning_rate": 1.9330582318806462e-05, + "loss": 2.2727, + "step": 3962 + }, + { + "epoch": 0.13, + "grad_norm": 0.6722524166107178, + "learning_rate": 1.9330199916555183e-05, + "loss": 2.2579, + "step": 3963 + }, + { + "epoch": 0.13, + "grad_norm": 0.7014780640602112, + "learning_rate": 1.932981740889677e-05, + "loss": 2.2792, + "step": 3964 + }, + { + "epoch": 0.13, + "grad_norm": 0.6936694383621216, + "learning_rate": 1.9329434795835546e-05, + "loss": 2.191, + "step": 3965 + }, + { + "epoch": 0.13, + "grad_norm": 0.6772468686103821, + "learning_rate": 1.9329052077375836e-05, + "loss": 2.2207, + "step": 3966 + }, + { + "epoch": 0.13, + "grad_norm": 0.6639229655265808, + "learning_rate": 1.9328669253521964e-05, + "loss": 2.3123, + "step": 3967 + }, + { + "epoch": 0.13, + "grad_norm": 0.6710674166679382, + "learning_rate": 1.9328286324278257e-05, + "loss": 2.2683, + "step": 3968 + }, + { + "epoch": 0.13, + "grad_norm": 0.6889562010765076, + "learning_rate": 1.9327903289649032e-05, + "loss": 2.2424, + "step": 3969 + }, + { + "epoch": 0.13, + "grad_norm": 0.6798684597015381, + "learning_rate": 1.9327520149638625e-05, + "loss": 2.3013, + "step": 3970 + }, + { + "epoch": 0.13, + "grad_norm": 0.713754415512085, + "learning_rate": 1.9327136904251363e-05, + "loss": 2.2461, + "step": 3971 + }, + { + "epoch": 0.13, + "grad_norm": 0.6701642274856567, + "learning_rate": 1.9326753553491576e-05, + "loss": 2.215, + "step": 3972 + }, + { + "epoch": 0.13, + "grad_norm": 0.6795248985290527, + "learning_rate": 1.9326370097363588e-05, + "loss": 2.2822, + "step": 3973 + }, + { + "epoch": 0.13, + "grad_norm": 0.6594623923301697, + "learning_rate": 1.9325986535871738e-05, + "loss": 2.2373, + "step": 3974 + }, + { + "epoch": 0.13, + "grad_norm": 0.6895204782485962, + "learning_rate": 1.932560286902036e-05, + "loss": 2.2973, + "step": 3975 + }, + { + "epoch": 0.13, + "grad_norm": 0.7123169898986816, + "learning_rate": 1.9325219096813787e-05, + "loss": 2.2842, + "step": 3976 + }, + { + "epoch": 0.13, + "grad_norm": 0.6550269722938538, + "learning_rate": 1.9324835219256353e-05, + "loss": 2.2312, + "step": 3977 + }, + { + "epoch": 0.13, + "grad_norm": 0.7204231023788452, + "learning_rate": 1.9324451236352394e-05, + "loss": 2.25, + "step": 3978 + }, + { + "epoch": 0.13, + "grad_norm": 0.6728067994117737, + "learning_rate": 1.932406714810625e-05, + "loss": 2.2935, + "step": 3979 + }, + { + "epoch": 0.13, + "grad_norm": 0.6936350464820862, + "learning_rate": 1.932368295452226e-05, + "loss": 2.3103, + "step": 3980 + }, + { + "epoch": 0.13, + "grad_norm": 0.7079327702522278, + "learning_rate": 1.9323298655604765e-05, + "loss": 2.2153, + "step": 3981 + }, + { + "epoch": 0.13, + "grad_norm": 0.6720169186592102, + "learning_rate": 1.9322914251358104e-05, + "loss": 2.2262, + "step": 3982 + }, + { + "epoch": 0.13, + "grad_norm": 0.696619987487793, + "learning_rate": 1.9322529741786623e-05, + "loss": 2.3156, + "step": 3983 + }, + { + "epoch": 0.13, + "grad_norm": 0.6902371644973755, + "learning_rate": 1.9322145126894662e-05, + "loss": 2.2503, + "step": 3984 + }, + { + "epoch": 0.13, + "grad_norm": 0.6832977533340454, + "learning_rate": 1.9321760406686572e-05, + "loss": 2.3031, + "step": 3985 + }, + { + "epoch": 0.13, + "grad_norm": 0.7059169411659241, + "learning_rate": 1.9321375581166696e-05, + "loss": 2.3153, + "step": 3986 + }, + { + "epoch": 0.13, + "grad_norm": 0.6833273768424988, + "learning_rate": 1.932099065033938e-05, + "loss": 2.2597, + "step": 3987 + }, + { + "epoch": 0.13, + "grad_norm": 0.6970521211624146, + "learning_rate": 1.9320605614208974e-05, + "loss": 2.2588, + "step": 3988 + }, + { + "epoch": 0.13, + "grad_norm": 0.6857945322990417, + "learning_rate": 1.932022047277983e-05, + "loss": 2.3074, + "step": 3989 + }, + { + "epoch": 0.13, + "grad_norm": 0.7248327136039734, + "learning_rate": 1.9319835226056295e-05, + "loss": 2.2426, + "step": 3990 + }, + { + "epoch": 0.13, + "grad_norm": 0.6732500195503235, + "learning_rate": 1.9319449874042725e-05, + "loss": 2.2786, + "step": 3991 + }, + { + "epoch": 0.13, + "grad_norm": 0.7115939259529114, + "learning_rate": 1.931906441674347e-05, + "loss": 2.2653, + "step": 3992 + }, + { + "epoch": 0.13, + "grad_norm": 0.6777035593986511, + "learning_rate": 1.931867885416289e-05, + "loss": 2.2099, + "step": 3993 + }, + { + "epoch": 0.13, + "grad_norm": 0.6582109928131104, + "learning_rate": 1.9318293186305336e-05, + "loss": 2.2221, + "step": 3994 + }, + { + "epoch": 0.13, + "grad_norm": 0.7182148098945618, + "learning_rate": 1.9317907413175163e-05, + "loss": 2.2377, + "step": 3995 + }, + { + "epoch": 0.13, + "grad_norm": 0.6935954689979553, + "learning_rate": 1.9317521534776738e-05, + "loss": 2.2432, + "step": 3996 + }, + { + "epoch": 0.13, + "grad_norm": 0.7065118551254272, + "learning_rate": 1.9317135551114416e-05, + "loss": 2.286, + "step": 3997 + }, + { + "epoch": 0.13, + "grad_norm": 0.7009655237197876, + "learning_rate": 1.9316749462192552e-05, + "loss": 2.2385, + "step": 3998 + }, + { + "epoch": 0.13, + "grad_norm": 0.6878575086593628, + "learning_rate": 1.9316363268015515e-05, + "loss": 2.2227, + "step": 3999 + }, + { + "epoch": 0.13, + "grad_norm": 0.7239075899124146, + "learning_rate": 1.9315976968587668e-05, + "loss": 2.3003, + "step": 4000 + }, + { + "epoch": 0.13, + "grad_norm": 0.6808204650878906, + "learning_rate": 1.931559056391337e-05, + "loss": 2.1639, + "step": 4001 + }, + { + "epoch": 0.13, + "grad_norm": 0.704873263835907, + "learning_rate": 1.9315204053996994e-05, + "loss": 2.2154, + "step": 4002 + }, + { + "epoch": 0.13, + "grad_norm": 0.7178678512573242, + "learning_rate": 1.9314817438842894e-05, + "loss": 2.2235, + "step": 4003 + }, + { + "epoch": 0.13, + "grad_norm": 0.6878161430358887, + "learning_rate": 1.931443071845545e-05, + "loss": 2.2823, + "step": 4004 + }, + { + "epoch": 0.13, + "grad_norm": 0.7085456252098083, + "learning_rate": 1.9314043892839028e-05, + "loss": 2.2742, + "step": 4005 + }, + { + "epoch": 0.13, + "grad_norm": 0.6980879306793213, + "learning_rate": 1.9313656961997992e-05, + "loss": 2.2868, + "step": 4006 + }, + { + "epoch": 0.13, + "grad_norm": 0.7101640701293945, + "learning_rate": 1.931326992593672e-05, + "loss": 2.2735, + "step": 4007 + }, + { + "epoch": 0.13, + "grad_norm": 0.667428731918335, + "learning_rate": 1.9312882784659586e-05, + "loss": 2.2479, + "step": 4008 + }, + { + "epoch": 0.13, + "grad_norm": 0.6823713183403015, + "learning_rate": 1.9312495538170957e-05, + "loss": 2.1994, + "step": 4009 + }, + { + "epoch": 0.13, + "grad_norm": 0.6577711701393127, + "learning_rate": 1.931210818647521e-05, + "loss": 2.2359, + "step": 4010 + }, + { + "epoch": 0.13, + "grad_norm": 0.7209216356277466, + "learning_rate": 1.9311720729576726e-05, + "loss": 2.283, + "step": 4011 + }, + { + "epoch": 0.13, + "grad_norm": 0.6786279678344727, + "learning_rate": 1.9311333167479877e-05, + "loss": 2.2328, + "step": 4012 + }, + { + "epoch": 0.13, + "grad_norm": 0.6739886403083801, + "learning_rate": 1.9310945500189044e-05, + "loss": 2.2308, + "step": 4013 + }, + { + "epoch": 0.13, + "grad_norm": 0.6918519139289856, + "learning_rate": 1.93105577277086e-05, + "loss": 2.2332, + "step": 4014 + }, + { + "epoch": 0.13, + "grad_norm": 0.6787145733833313, + "learning_rate": 1.9310169850042935e-05, + "loss": 2.3042, + "step": 4015 + }, + { + "epoch": 0.13, + "grad_norm": 0.6656612753868103, + "learning_rate": 1.9309781867196425e-05, + "loss": 2.1578, + "step": 4016 + }, + { + "epoch": 0.13, + "grad_norm": 0.7095340490341187, + "learning_rate": 1.930939377917346e-05, + "loss": 2.3214, + "step": 4017 + }, + { + "epoch": 0.13, + "grad_norm": 0.670860767364502, + "learning_rate": 1.9309005585978417e-05, + "loss": 2.2514, + "step": 4018 + }, + { + "epoch": 0.13, + "grad_norm": 0.6796691417694092, + "learning_rate": 1.930861728761569e-05, + "loss": 2.217, + "step": 4019 + }, + { + "epoch": 0.13, + "grad_norm": 0.6609011888504028, + "learning_rate": 1.9308228884089652e-05, + "loss": 2.2554, + "step": 4020 + }, + { + "epoch": 0.13, + "grad_norm": 0.6706184148788452, + "learning_rate": 1.9307840375404702e-05, + "loss": 2.2334, + "step": 4021 + }, + { + "epoch": 0.13, + "grad_norm": 0.6741407513618469, + "learning_rate": 1.930745176156523e-05, + "loss": 2.2528, + "step": 4022 + }, + { + "epoch": 0.13, + "grad_norm": 0.7067764401435852, + "learning_rate": 1.930706304257562e-05, + "loss": 2.2892, + "step": 4023 + }, + { + "epoch": 0.13, + "grad_norm": 0.6657218933105469, + "learning_rate": 1.930667421844026e-05, + "loss": 2.2829, + "step": 4024 + }, + { + "epoch": 0.13, + "grad_norm": 0.69483882188797, + "learning_rate": 1.9306285289163557e-05, + "loss": 2.2651, + "step": 4025 + }, + { + "epoch": 0.13, + "grad_norm": 0.7377609610557556, + "learning_rate": 1.9305896254749895e-05, + "loss": 2.2366, + "step": 4026 + }, + { + "epoch": 0.13, + "grad_norm": 0.6977409720420837, + "learning_rate": 1.930550711520367e-05, + "loss": 2.2245, + "step": 4027 + }, + { + "epoch": 0.13, + "grad_norm": 0.6938289999961853, + "learning_rate": 1.930511787052928e-05, + "loss": 2.2253, + "step": 4028 + }, + { + "epoch": 0.13, + "grad_norm": 0.738755464553833, + "learning_rate": 1.930472852073112e-05, + "loss": 2.3212, + "step": 4029 + }, + { + "epoch": 0.13, + "grad_norm": 0.6851395964622498, + "learning_rate": 1.9304339065813593e-05, + "loss": 2.2855, + "step": 4030 + }, + { + "epoch": 0.13, + "grad_norm": 0.7024518847465515, + "learning_rate": 1.9303949505781093e-05, + "loss": 2.2453, + "step": 4031 + }, + { + "epoch": 0.13, + "grad_norm": 0.714691698551178, + "learning_rate": 1.930355984063803e-05, + "loss": 2.2441, + "step": 4032 + }, + { + "epoch": 0.13, + "grad_norm": 0.6691733598709106, + "learning_rate": 1.9303170070388793e-05, + "loss": 2.1669, + "step": 4033 + }, + { + "epoch": 0.13, + "grad_norm": 0.7234676480293274, + "learning_rate": 1.93027801950378e-05, + "loss": 2.183, + "step": 4034 + }, + { + "epoch": 0.13, + "grad_norm": 0.6671250462532043, + "learning_rate": 1.9302390214589444e-05, + "loss": 2.3147, + "step": 4035 + }, + { + "epoch": 0.13, + "grad_norm": 0.7062347531318665, + "learning_rate": 1.9302000129048135e-05, + "loss": 2.2399, + "step": 4036 + }, + { + "epoch": 0.13, + "grad_norm": 0.7137826681137085, + "learning_rate": 1.930160993841828e-05, + "loss": 2.3784, + "step": 4037 + }, + { + "epoch": 0.13, + "grad_norm": 0.6843861937522888, + "learning_rate": 1.9301219642704287e-05, + "loss": 2.3105, + "step": 4038 + }, + { + "epoch": 0.13, + "grad_norm": 0.7326799035072327, + "learning_rate": 1.9300829241910566e-05, + "loss": 2.2966, + "step": 4039 + }, + { + "epoch": 0.13, + "grad_norm": 0.7401648163795471, + "learning_rate": 1.9300438736041527e-05, + "loss": 2.2887, + "step": 4040 + }, + { + "epoch": 0.13, + "grad_norm": 0.7100834846496582, + "learning_rate": 1.9300048125101582e-05, + "loss": 2.2705, + "step": 4041 + }, + { + "epoch": 0.13, + "grad_norm": 0.6748153567314148, + "learning_rate": 1.9299657409095145e-05, + "loss": 2.2105, + "step": 4042 + }, + { + "epoch": 0.13, + "grad_norm": 0.6559632420539856, + "learning_rate": 1.9299266588026627e-05, + "loss": 2.2251, + "step": 4043 + }, + { + "epoch": 0.13, + "grad_norm": 0.7120890021324158, + "learning_rate": 1.9298875661900443e-05, + "loss": 2.2803, + "step": 4044 + }, + { + "epoch": 0.13, + "grad_norm": 0.6809351444244385, + "learning_rate": 1.9298484630721015e-05, + "loss": 2.2034, + "step": 4045 + }, + { + "epoch": 0.13, + "grad_norm": 0.7116170525550842, + "learning_rate": 1.9298093494492754e-05, + "loss": 2.245, + "step": 4046 + }, + { + "epoch": 0.13, + "grad_norm": 0.692582905292511, + "learning_rate": 1.929770225322009e-05, + "loss": 2.298, + "step": 4047 + }, + { + "epoch": 0.13, + "grad_norm": 0.717008650302887, + "learning_rate": 1.9297310906907426e-05, + "loss": 2.2476, + "step": 4048 + }, + { + "epoch": 0.13, + "grad_norm": 0.6927548050880432, + "learning_rate": 1.9296919455559195e-05, + "loss": 2.3081, + "step": 4049 + }, + { + "epoch": 0.13, + "grad_norm": 0.6683803200721741, + "learning_rate": 1.9296527899179817e-05, + "loss": 2.2315, + "step": 4050 + }, + { + "epoch": 0.13, + "grad_norm": 0.6744758486747742, + "learning_rate": 1.9296136237773714e-05, + "loss": 2.2669, + "step": 4051 + }, + { + "epoch": 0.13, + "grad_norm": 0.7054483294487, + "learning_rate": 1.929574447134531e-05, + "loss": 2.1422, + "step": 4052 + }, + { + "epoch": 0.13, + "grad_norm": 0.6705911755561829, + "learning_rate": 1.9295352599899037e-05, + "loss": 2.2704, + "step": 4053 + }, + { + "epoch": 0.13, + "grad_norm": 0.6922420859336853, + "learning_rate": 1.9294960623439314e-05, + "loss": 2.2424, + "step": 4054 + }, + { + "epoch": 0.13, + "grad_norm": 0.6809048056602478, + "learning_rate": 1.9294568541970578e-05, + "loss": 2.2833, + "step": 4055 + }, + { + "epoch": 0.13, + "grad_norm": 0.6752691864967346, + "learning_rate": 1.9294176355497248e-05, + "loss": 2.3028, + "step": 4056 + }, + { + "epoch": 0.13, + "grad_norm": 0.6868904829025269, + "learning_rate": 1.9293784064023766e-05, + "loss": 2.3387, + "step": 4057 + }, + { + "epoch": 0.14, + "grad_norm": 0.6761431694030762, + "learning_rate": 1.9293391667554556e-05, + "loss": 2.3075, + "step": 4058 + }, + { + "epoch": 0.14, + "grad_norm": 0.6953502893447876, + "learning_rate": 1.9292999166094054e-05, + "loss": 2.2698, + "step": 4059 + }, + { + "epoch": 0.14, + "grad_norm": 0.6974455714225769, + "learning_rate": 1.929260655964669e-05, + "loss": 2.2439, + "step": 4060 + }, + { + "epoch": 0.14, + "grad_norm": 0.6760315299034119, + "learning_rate": 1.9292213848216906e-05, + "loss": 2.2518, + "step": 4061 + }, + { + "epoch": 0.14, + "grad_norm": 0.658374011516571, + "learning_rate": 1.9291821031809138e-05, + "loss": 2.2063, + "step": 4062 + }, + { + "epoch": 0.14, + "grad_norm": 0.6942732930183411, + "learning_rate": 1.929142811042782e-05, + "loss": 2.245, + "step": 4063 + }, + { + "epoch": 0.14, + "grad_norm": 0.6837006211280823, + "learning_rate": 1.9291035084077393e-05, + "loss": 2.2986, + "step": 4064 + }, + { + "epoch": 0.14, + "grad_norm": 0.7031409740447998, + "learning_rate": 1.9290641952762293e-05, + "loss": 2.2378, + "step": 4065 + }, + { + "epoch": 0.14, + "grad_norm": 0.6463398337364197, + "learning_rate": 1.9290248716486967e-05, + "loss": 2.254, + "step": 4066 + }, + { + "epoch": 0.14, + "grad_norm": 0.6659468412399292, + "learning_rate": 1.9289855375255857e-05, + "loss": 2.2422, + "step": 4067 + }, + { + "epoch": 0.14, + "grad_norm": 0.7240059971809387, + "learning_rate": 1.9289461929073403e-05, + "loss": 2.2301, + "step": 4068 + }, + { + "epoch": 0.14, + "grad_norm": 0.6983621716499329, + "learning_rate": 1.9289068377944055e-05, + "loss": 2.2544, + "step": 4069 + }, + { + "epoch": 0.14, + "grad_norm": 0.6898089051246643, + "learning_rate": 1.9288674721872255e-05, + "loss": 2.3016, + "step": 4070 + }, + { + "epoch": 0.14, + "grad_norm": 0.6957117319107056, + "learning_rate": 1.928828096086245e-05, + "loss": 2.2343, + "step": 4071 + }, + { + "epoch": 0.14, + "grad_norm": 0.7072228789329529, + "learning_rate": 1.9287887094919095e-05, + "loss": 2.2243, + "step": 4072 + }, + { + "epoch": 0.14, + "grad_norm": 0.6874589920043945, + "learning_rate": 1.928749312404663e-05, + "loss": 2.185, + "step": 4073 + }, + { + "epoch": 0.14, + "grad_norm": 0.6967061758041382, + "learning_rate": 1.9287099048249515e-05, + "loss": 2.2442, + "step": 4074 + }, + { + "epoch": 0.14, + "grad_norm": 0.6740591526031494, + "learning_rate": 1.9286704867532195e-05, + "loss": 2.2356, + "step": 4075 + }, + { + "epoch": 0.14, + "grad_norm": 0.6667296886444092, + "learning_rate": 1.9286310581899125e-05, + "loss": 2.269, + "step": 4076 + }, + { + "epoch": 0.14, + "grad_norm": 0.7137822508811951, + "learning_rate": 1.9285916191354763e-05, + "loss": 2.3062, + "step": 4077 + }, + { + "epoch": 0.14, + "grad_norm": 0.6585429906845093, + "learning_rate": 1.928552169590356e-05, + "loss": 2.2543, + "step": 4078 + }, + { + "epoch": 0.14, + "grad_norm": 0.694356381893158, + "learning_rate": 1.9285127095549975e-05, + "loss": 2.2893, + "step": 4079 + }, + { + "epoch": 0.14, + "grad_norm": 0.6740075945854187, + "learning_rate": 1.9284732390298467e-05, + "loss": 2.252, + "step": 4080 + }, + { + "epoch": 0.14, + "grad_norm": 0.6916618943214417, + "learning_rate": 1.9284337580153495e-05, + "loss": 2.2878, + "step": 4081 + }, + { + "epoch": 0.14, + "grad_norm": 0.6934636235237122, + "learning_rate": 1.928394266511951e-05, + "loss": 2.1903, + "step": 4082 + }, + { + "epoch": 0.14, + "grad_norm": 0.6917687058448792, + "learning_rate": 1.928354764520099e-05, + "loss": 2.1913, + "step": 4083 + }, + { + "epoch": 0.14, + "grad_norm": 0.6849561929702759, + "learning_rate": 1.928315252040239e-05, + "loss": 2.2753, + "step": 4084 + }, + { + "epoch": 0.14, + "grad_norm": 0.6985245943069458, + "learning_rate": 1.928275729072817e-05, + "loss": 2.2794, + "step": 4085 + }, + { + "epoch": 0.14, + "grad_norm": 0.6691417098045349, + "learning_rate": 1.9282361956182796e-05, + "loss": 2.1862, + "step": 4086 + }, + { + "epoch": 0.14, + "grad_norm": 0.7120523452758789, + "learning_rate": 1.9281966516770742e-05, + "loss": 2.2543, + "step": 4087 + }, + { + "epoch": 0.14, + "grad_norm": 0.6947247982025146, + "learning_rate": 1.9281570972496467e-05, + "loss": 2.2446, + "step": 4088 + }, + { + "epoch": 0.14, + "grad_norm": 0.689336895942688, + "learning_rate": 1.9281175323364442e-05, + "loss": 2.2132, + "step": 4089 + }, + { + "epoch": 0.14, + "grad_norm": 0.6751183271408081, + "learning_rate": 1.928077956937914e-05, + "loss": 2.2913, + "step": 4090 + }, + { + "epoch": 0.14, + "grad_norm": 0.7047361135482788, + "learning_rate": 1.928038371054503e-05, + "loss": 2.2762, + "step": 4091 + }, + { + "epoch": 0.14, + "grad_norm": 0.6893208622932434, + "learning_rate": 1.9279987746866578e-05, + "loss": 2.2295, + "step": 4092 + }, + { + "epoch": 0.14, + "grad_norm": 0.6904289722442627, + "learning_rate": 1.927959167834827e-05, + "loss": 2.2475, + "step": 4093 + }, + { + "epoch": 0.14, + "grad_norm": 0.6864316463470459, + "learning_rate": 1.927919550499457e-05, + "loss": 2.1996, + "step": 4094 + }, + { + "epoch": 0.14, + "grad_norm": 0.7316529750823975, + "learning_rate": 1.9278799226809958e-05, + "loss": 2.337, + "step": 4095 + }, + { + "epoch": 0.14, + "grad_norm": 0.6959969401359558, + "learning_rate": 1.9278402843798908e-05, + "loss": 2.2408, + "step": 4096 + }, + { + "epoch": 0.14, + "grad_norm": 0.7061643600463867, + "learning_rate": 1.92780063559659e-05, + "loss": 2.283, + "step": 4097 + }, + { + "epoch": 0.14, + "grad_norm": 0.6846803426742554, + "learning_rate": 1.927760976331542e-05, + "loss": 2.2315, + "step": 4098 + }, + { + "epoch": 0.14, + "grad_norm": 0.6590558290481567, + "learning_rate": 1.9277213065851937e-05, + "loss": 2.1848, + "step": 4099 + }, + { + "epoch": 0.14, + "grad_norm": 0.71205073595047, + "learning_rate": 1.9276816263579938e-05, + "loss": 2.2495, + "step": 4100 + }, + { + "epoch": 0.14, + "grad_norm": 0.6840975284576416, + "learning_rate": 1.9276419356503905e-05, + "loss": 2.2749, + "step": 4101 + }, + { + "epoch": 0.14, + "grad_norm": 0.7232087850570679, + "learning_rate": 1.9276022344628328e-05, + "loss": 2.1806, + "step": 4102 + }, + { + "epoch": 0.14, + "grad_norm": 0.7220843434333801, + "learning_rate": 1.927562522795768e-05, + "loss": 2.2749, + "step": 4103 + }, + { + "epoch": 0.14, + "grad_norm": 0.6793683767318726, + "learning_rate": 1.927522800649646e-05, + "loss": 2.2893, + "step": 4104 + }, + { + "epoch": 0.14, + "grad_norm": 0.719146728515625, + "learning_rate": 1.9274830680249147e-05, + "loss": 2.191, + "step": 4105 + }, + { + "epoch": 0.14, + "grad_norm": 0.6640661358833313, + "learning_rate": 1.927443324922023e-05, + "loss": 2.2322, + "step": 4106 + }, + { + "epoch": 0.14, + "grad_norm": 0.6741136312484741, + "learning_rate": 1.9274035713414206e-05, + "loss": 2.2247, + "step": 4107 + }, + { + "epoch": 0.14, + "grad_norm": 0.6907027959823608, + "learning_rate": 1.927363807283556e-05, + "loss": 2.2309, + "step": 4108 + }, + { + "epoch": 0.14, + "grad_norm": 0.6727420687675476, + "learning_rate": 1.9273240327488785e-05, + "loss": 2.2236, + "step": 4109 + }, + { + "epoch": 0.14, + "grad_norm": 0.6931125521659851, + "learning_rate": 1.9272842477378375e-05, + "loss": 2.2684, + "step": 4110 + }, + { + "epoch": 0.14, + "grad_norm": 0.6975182890892029, + "learning_rate": 1.9272444522508827e-05, + "loss": 2.2534, + "step": 4111 + }, + { + "epoch": 0.14, + "grad_norm": 0.6834473609924316, + "learning_rate": 1.9272046462884634e-05, + "loss": 2.297, + "step": 4112 + }, + { + "epoch": 0.14, + "grad_norm": 0.6884832978248596, + "learning_rate": 1.9271648298510292e-05, + "loss": 2.2195, + "step": 4113 + }, + { + "epoch": 0.14, + "grad_norm": 0.7182244658470154, + "learning_rate": 1.9271250029390305e-05, + "loss": 2.2656, + "step": 4114 + }, + { + "epoch": 0.14, + "grad_norm": 0.676688551902771, + "learning_rate": 1.9270851655529167e-05, + "loss": 2.2906, + "step": 4115 + }, + { + "epoch": 0.14, + "grad_norm": 0.6723467111587524, + "learning_rate": 1.9270453176931382e-05, + "loss": 2.2452, + "step": 4116 + }, + { + "epoch": 0.14, + "grad_norm": 0.6708421111106873, + "learning_rate": 1.9270054593601446e-05, + "loss": 2.245, + "step": 4117 + }, + { + "epoch": 0.14, + "grad_norm": 0.664937436580658, + "learning_rate": 1.926965590554387e-05, + "loss": 2.262, + "step": 4118 + }, + { + "epoch": 0.14, + "grad_norm": 0.7137066721916199, + "learning_rate": 1.9269257112763153e-05, + "loss": 2.206, + "step": 4119 + }, + { + "epoch": 0.14, + "grad_norm": 0.7133108377456665, + "learning_rate": 1.92688582152638e-05, + "loss": 2.2702, + "step": 4120 + }, + { + "epoch": 0.14, + "grad_norm": 0.7143247127532959, + "learning_rate": 1.926845921305032e-05, + "loss": 2.2699, + "step": 4121 + }, + { + "epoch": 0.14, + "grad_norm": 0.6612210273742676, + "learning_rate": 1.926806010612722e-05, + "loss": 2.2304, + "step": 4122 + }, + { + "epoch": 0.14, + "grad_norm": 0.6612570285797119, + "learning_rate": 1.9267660894499006e-05, + "loss": 2.2376, + "step": 4123 + }, + { + "epoch": 0.14, + "grad_norm": 0.7004397511482239, + "learning_rate": 1.9267261578170193e-05, + "loss": 2.2443, + "step": 4124 + }, + { + "epoch": 0.14, + "grad_norm": 0.6992260217666626, + "learning_rate": 1.926686215714529e-05, + "loss": 2.1572, + "step": 4125 + }, + { + "epoch": 0.14, + "grad_norm": 0.6960259079933167, + "learning_rate": 1.9266462631428807e-05, + "loss": 2.2872, + "step": 4126 + }, + { + "epoch": 0.14, + "grad_norm": 0.769133985042572, + "learning_rate": 1.9266063001025263e-05, + "loss": 2.2139, + "step": 4127 + }, + { + "epoch": 0.14, + "grad_norm": 0.7451737523078918, + "learning_rate": 1.9265663265939167e-05, + "loss": 2.2873, + "step": 4128 + }, + { + "epoch": 0.14, + "grad_norm": 0.6848505139350891, + "learning_rate": 1.926526342617504e-05, + "loss": 2.2294, + "step": 4129 + }, + { + "epoch": 0.14, + "grad_norm": 0.6998298764228821, + "learning_rate": 1.9264863481737396e-05, + "loss": 2.1783, + "step": 4130 + }, + { + "epoch": 0.14, + "grad_norm": 0.7073127627372742, + "learning_rate": 1.9264463432630752e-05, + "loss": 2.3522, + "step": 4131 + }, + { + "epoch": 0.14, + "grad_norm": 0.6959075927734375, + "learning_rate": 1.9264063278859634e-05, + "loss": 2.1921, + "step": 4132 + }, + { + "epoch": 0.14, + "grad_norm": 0.7207573056221008, + "learning_rate": 1.9263663020428556e-05, + "loss": 2.2664, + "step": 4133 + }, + { + "epoch": 0.14, + "grad_norm": 0.6904211640357971, + "learning_rate": 1.926326265734204e-05, + "loss": 2.2408, + "step": 4134 + }, + { + "epoch": 0.14, + "grad_norm": 0.6853381991386414, + "learning_rate": 1.9262862189604616e-05, + "loss": 2.2246, + "step": 4135 + }, + { + "epoch": 0.14, + "grad_norm": 0.6879850029945374, + "learning_rate": 1.92624616172208e-05, + "loss": 2.2129, + "step": 4136 + }, + { + "epoch": 0.14, + "grad_norm": 0.6997902989387512, + "learning_rate": 1.9262060940195123e-05, + "loss": 2.2817, + "step": 4137 + }, + { + "epoch": 0.14, + "grad_norm": 0.6840111613273621, + "learning_rate": 1.926166015853211e-05, + "loss": 2.2462, + "step": 4138 + }, + { + "epoch": 0.14, + "grad_norm": 0.6782870292663574, + "learning_rate": 1.9261259272236287e-05, + "loss": 2.254, + "step": 4139 + }, + { + "epoch": 0.14, + "grad_norm": 0.6812632083892822, + "learning_rate": 1.926085828131218e-05, + "loss": 2.3096, + "step": 4140 + }, + { + "epoch": 0.14, + "grad_norm": 0.6874100565910339, + "learning_rate": 1.926045718576433e-05, + "loss": 2.2337, + "step": 4141 + }, + { + "epoch": 0.14, + "grad_norm": 0.6933589577674866, + "learning_rate": 1.926005598559726e-05, + "loss": 2.2611, + "step": 4142 + }, + { + "epoch": 0.14, + "grad_norm": 0.660602331161499, + "learning_rate": 1.9259654680815503e-05, + "loss": 2.223, + "step": 4143 + }, + { + "epoch": 0.14, + "grad_norm": 0.6787921190261841, + "learning_rate": 1.9259253271423595e-05, + "loss": 2.2034, + "step": 4144 + }, + { + "epoch": 0.14, + "grad_norm": 0.6807339191436768, + "learning_rate": 1.925885175742607e-05, + "loss": 2.2636, + "step": 4145 + }, + { + "epoch": 0.14, + "grad_norm": 0.7023966908454895, + "learning_rate": 1.9258450138827465e-05, + "loss": 2.2757, + "step": 4146 + }, + { + "epoch": 0.14, + "grad_norm": 0.6914066672325134, + "learning_rate": 1.9258048415632317e-05, + "loss": 2.2947, + "step": 4147 + }, + { + "epoch": 0.14, + "grad_norm": 0.715107798576355, + "learning_rate": 1.925764658784516e-05, + "loss": 2.2492, + "step": 4148 + }, + { + "epoch": 0.14, + "grad_norm": 0.6742933988571167, + "learning_rate": 1.925724465547054e-05, + "loss": 2.2126, + "step": 4149 + }, + { + "epoch": 0.14, + "grad_norm": 0.6703738570213318, + "learning_rate": 1.9256842618512996e-05, + "loss": 2.2814, + "step": 4150 + }, + { + "epoch": 0.14, + "grad_norm": 0.6775171160697937, + "learning_rate": 1.9256440476977067e-05, + "loss": 2.2763, + "step": 4151 + }, + { + "epoch": 0.14, + "grad_norm": 0.6648553013801575, + "learning_rate": 1.92560382308673e-05, + "loss": 2.2145, + "step": 4152 + }, + { + "epoch": 0.14, + "grad_norm": 0.6665297150611877, + "learning_rate": 1.9255635880188234e-05, + "loss": 2.1968, + "step": 4153 + }, + { + "epoch": 0.14, + "grad_norm": 0.7342318296432495, + "learning_rate": 1.9255233424944424e-05, + "loss": 2.3158, + "step": 4154 + }, + { + "epoch": 0.14, + "grad_norm": 0.7199332118034363, + "learning_rate": 1.9254830865140407e-05, + "loss": 2.2776, + "step": 4155 + }, + { + "epoch": 0.14, + "grad_norm": 0.709397554397583, + "learning_rate": 1.9254428200780734e-05, + "loss": 2.2482, + "step": 4156 + }, + { + "epoch": 0.14, + "grad_norm": 0.6641029715538025, + "learning_rate": 1.9254025431869957e-05, + "loss": 2.2156, + "step": 4157 + }, + { + "epoch": 0.14, + "grad_norm": 0.6642153263092041, + "learning_rate": 1.9253622558412625e-05, + "loss": 2.2505, + "step": 4158 + }, + { + "epoch": 0.14, + "grad_norm": 0.7343358993530273, + "learning_rate": 1.9253219580413287e-05, + "loss": 2.2497, + "step": 4159 + }, + { + "epoch": 0.14, + "grad_norm": 0.6918613314628601, + "learning_rate": 1.9252816497876497e-05, + "loss": 2.251, + "step": 4160 + }, + { + "epoch": 0.14, + "grad_norm": 0.6823561191558838, + "learning_rate": 1.925241331080681e-05, + "loss": 2.2629, + "step": 4161 + }, + { + "epoch": 0.14, + "grad_norm": 0.7178806066513062, + "learning_rate": 1.9252010019208778e-05, + "loss": 2.2447, + "step": 4162 + }, + { + "epoch": 0.14, + "grad_norm": 0.67769855260849, + "learning_rate": 1.925160662308696e-05, + "loss": 2.2854, + "step": 4163 + }, + { + "epoch": 0.14, + "grad_norm": 0.6748905181884766, + "learning_rate": 1.9251203122445915e-05, + "loss": 2.2027, + "step": 4164 + }, + { + "epoch": 0.14, + "grad_norm": 0.6750346422195435, + "learning_rate": 1.9250799517290196e-05, + "loss": 2.2551, + "step": 4165 + }, + { + "epoch": 0.14, + "grad_norm": 0.7078080773353577, + "learning_rate": 1.9250395807624364e-05, + "loss": 2.2747, + "step": 4166 + }, + { + "epoch": 0.14, + "grad_norm": 0.6961045265197754, + "learning_rate": 1.9249991993452983e-05, + "loss": 2.2219, + "step": 4167 + }, + { + "epoch": 0.14, + "grad_norm": 0.677168607711792, + "learning_rate": 1.9249588074780612e-05, + "loss": 2.2163, + "step": 4168 + }, + { + "epoch": 0.14, + "grad_norm": 0.7004295587539673, + "learning_rate": 1.924918405161182e-05, + "loss": 2.2627, + "step": 4169 + }, + { + "epoch": 0.14, + "grad_norm": 0.6706784963607788, + "learning_rate": 1.9248779923951162e-05, + "loss": 2.207, + "step": 4170 + }, + { + "epoch": 0.14, + "grad_norm": 0.745366096496582, + "learning_rate": 1.924837569180321e-05, + "loss": 2.2778, + "step": 4171 + }, + { + "epoch": 0.14, + "grad_norm": 0.7028800249099731, + "learning_rate": 1.9247971355172533e-05, + "loss": 2.2395, + "step": 4172 + }, + { + "epoch": 0.14, + "grad_norm": 0.6924868226051331, + "learning_rate": 1.9247566914063695e-05, + "loss": 2.1954, + "step": 4173 + }, + { + "epoch": 0.14, + "grad_norm": 0.6988398432731628, + "learning_rate": 1.9247162368481264e-05, + "loss": 2.2969, + "step": 4174 + }, + { + "epoch": 0.14, + "grad_norm": 0.6819460391998291, + "learning_rate": 1.9246757718429808e-05, + "loss": 2.214, + "step": 4175 + }, + { + "epoch": 0.14, + "grad_norm": 0.6714036464691162, + "learning_rate": 1.9246352963913907e-05, + "loss": 2.2242, + "step": 4176 + }, + { + "epoch": 0.14, + "grad_norm": 0.699151337146759, + "learning_rate": 1.924594810493813e-05, + "loss": 2.2486, + "step": 4177 + }, + { + "epoch": 0.14, + "grad_norm": 0.6854993104934692, + "learning_rate": 1.9245543141507047e-05, + "loss": 2.2521, + "step": 4178 + }, + { + "epoch": 0.14, + "grad_norm": 0.743733823299408, + "learning_rate": 1.924513807362524e-05, + "loss": 2.314, + "step": 4179 + }, + { + "epoch": 0.14, + "grad_norm": 0.7106902003288269, + "learning_rate": 1.9244732901297276e-05, + "loss": 2.295, + "step": 4180 + }, + { + "epoch": 0.14, + "grad_norm": 0.6810404658317566, + "learning_rate": 1.9244327624527738e-05, + "loss": 2.2485, + "step": 4181 + }, + { + "epoch": 0.14, + "grad_norm": 0.7209069132804871, + "learning_rate": 1.9243922243321206e-05, + "loss": 2.2621, + "step": 4182 + }, + { + "epoch": 0.14, + "grad_norm": 0.6818249225616455, + "learning_rate": 1.924351675768226e-05, + "loss": 2.2309, + "step": 4183 + }, + { + "epoch": 0.14, + "grad_norm": 0.7056018710136414, + "learning_rate": 1.924311116761548e-05, + "loss": 2.2297, + "step": 4184 + }, + { + "epoch": 0.14, + "grad_norm": 0.6919586062431335, + "learning_rate": 1.9242705473125443e-05, + "loss": 2.3078, + "step": 4185 + }, + { + "epoch": 0.14, + "grad_norm": 0.6827554702758789, + "learning_rate": 1.9242299674216736e-05, + "loss": 2.2194, + "step": 4186 + }, + { + "epoch": 0.14, + "grad_norm": 0.6794097423553467, + "learning_rate": 1.9241893770893945e-05, + "loss": 2.2373, + "step": 4187 + }, + { + "epoch": 0.14, + "grad_norm": 0.6981142163276672, + "learning_rate": 1.9241487763161655e-05, + "loss": 2.3126, + "step": 4188 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815127730369568, + "learning_rate": 1.924108165102445e-05, + "loss": 2.1562, + "step": 4189 + }, + { + "epoch": 0.14, + "grad_norm": 0.7028631567955017, + "learning_rate": 1.9240675434486924e-05, + "loss": 2.2575, + "step": 4190 + }, + { + "epoch": 0.14, + "grad_norm": 0.6733468770980835, + "learning_rate": 1.9240269113553662e-05, + "loss": 2.2176, + "step": 4191 + }, + { + "epoch": 0.14, + "grad_norm": 0.6907353401184082, + "learning_rate": 1.9239862688229253e-05, + "loss": 2.2068, + "step": 4192 + }, + { + "epoch": 0.14, + "grad_norm": 0.7049823999404907, + "learning_rate": 1.923945615851829e-05, + "loss": 2.2611, + "step": 4193 + }, + { + "epoch": 0.14, + "grad_norm": 0.6889074444770813, + "learning_rate": 1.923904952442537e-05, + "loss": 2.243, + "step": 4194 + }, + { + "epoch": 0.14, + "grad_norm": 0.6630429625511169, + "learning_rate": 1.923864278595508e-05, + "loss": 2.2565, + "step": 4195 + }, + { + "epoch": 0.14, + "grad_norm": 0.7224636673927307, + "learning_rate": 1.9238235943112017e-05, + "loss": 2.2782, + "step": 4196 + }, + { + "epoch": 0.14, + "grad_norm": 0.6806182265281677, + "learning_rate": 1.9237828995900784e-05, + "loss": 2.264, + "step": 4197 + }, + { + "epoch": 0.14, + "grad_norm": 0.6724852323532104, + "learning_rate": 1.9237421944325968e-05, + "loss": 2.2331, + "step": 4198 + }, + { + "epoch": 0.14, + "grad_norm": 0.6790256500244141, + "learning_rate": 1.9237014788392173e-05, + "loss": 2.2826, + "step": 4199 + }, + { + "epoch": 0.14, + "grad_norm": 0.66058349609375, + "learning_rate": 1.9236607528104e-05, + "loss": 2.2221, + "step": 4200 + }, + { + "epoch": 0.14, + "grad_norm": 0.6907892823219299, + "learning_rate": 1.9236200163466046e-05, + "loss": 2.2802, + "step": 4201 + }, + { + "epoch": 0.14, + "grad_norm": 0.7072100043296814, + "learning_rate": 1.9235792694482914e-05, + "loss": 2.2423, + "step": 4202 + }, + { + "epoch": 0.14, + "grad_norm": 0.6915808320045471, + "learning_rate": 1.9235385121159214e-05, + "loss": 2.2589, + "step": 4203 + }, + { + "epoch": 0.14, + "grad_norm": 0.6821367740631104, + "learning_rate": 1.923497744349954e-05, + "loss": 2.2619, + "step": 4204 + }, + { + "epoch": 0.14, + "grad_norm": 0.7339404821395874, + "learning_rate": 1.923456966150851e-05, + "loss": 2.2702, + "step": 4205 + }, + { + "epoch": 0.14, + "grad_norm": 0.6708696484565735, + "learning_rate": 1.923416177519072e-05, + "loss": 2.2652, + "step": 4206 + }, + { + "epoch": 0.14, + "grad_norm": 0.6846692562103271, + "learning_rate": 1.9233753784550783e-05, + "loss": 2.227, + "step": 4207 + }, + { + "epoch": 0.14, + "grad_norm": 0.7005242705345154, + "learning_rate": 1.9233345689593304e-05, + "loss": 2.2362, + "step": 4208 + }, + { + "epoch": 0.14, + "grad_norm": 0.6648873090744019, + "learning_rate": 1.92329374903229e-05, + "loss": 2.1961, + "step": 4209 + }, + { + "epoch": 0.14, + "grad_norm": 0.7171528339385986, + "learning_rate": 1.9232529186744177e-05, + "loss": 2.2767, + "step": 4210 + }, + { + "epoch": 0.14, + "grad_norm": 0.7055249214172363, + "learning_rate": 1.923212077886175e-05, + "loss": 2.3218, + "step": 4211 + }, + { + "epoch": 0.14, + "grad_norm": 0.699110746383667, + "learning_rate": 1.9231712266680236e-05, + "loss": 2.2461, + "step": 4212 + }, + { + "epoch": 0.14, + "grad_norm": 0.676840603351593, + "learning_rate": 1.9231303650204244e-05, + "loss": 2.1944, + "step": 4213 + }, + { + "epoch": 0.14, + "grad_norm": 0.6827298998832703, + "learning_rate": 1.9230894929438393e-05, + "loss": 2.3278, + "step": 4214 + }, + { + "epoch": 0.14, + "grad_norm": 0.7176477909088135, + "learning_rate": 1.9230486104387304e-05, + "loss": 2.2611, + "step": 4215 + }, + { + "epoch": 0.14, + "grad_norm": 0.7108551263809204, + "learning_rate": 1.9230077175055592e-05, + "loss": 2.2883, + "step": 4216 + }, + { + "epoch": 0.14, + "grad_norm": 0.7139759659767151, + "learning_rate": 1.9229668141447877e-05, + "loss": 2.2279, + "step": 4217 + }, + { + "epoch": 0.14, + "grad_norm": 0.7175858020782471, + "learning_rate": 1.922925900356878e-05, + "loss": 2.2631, + "step": 4218 + }, + { + "epoch": 0.14, + "grad_norm": 0.7114773392677307, + "learning_rate": 1.9228849761422923e-05, + "loss": 2.2262, + "step": 4219 + }, + { + "epoch": 0.14, + "grad_norm": 0.708707332611084, + "learning_rate": 1.922844041501493e-05, + "loss": 2.2704, + "step": 4220 + }, + { + "epoch": 0.14, + "grad_norm": 0.7688212394714355, + "learning_rate": 1.9228030964349428e-05, + "loss": 2.2478, + "step": 4221 + }, + { + "epoch": 0.14, + "grad_norm": 0.6771520972251892, + "learning_rate": 1.922762140943104e-05, + "loss": 2.1879, + "step": 4222 + }, + { + "epoch": 0.14, + "grad_norm": 0.7011240124702454, + "learning_rate": 1.922721175026439e-05, + "loss": 2.2365, + "step": 4223 + }, + { + "epoch": 0.14, + "grad_norm": 0.6894611716270447, + "learning_rate": 1.9226801986854112e-05, + "loss": 2.3041, + "step": 4224 + }, + { + "epoch": 0.14, + "grad_norm": 0.7119976282119751, + "learning_rate": 1.9226392119204835e-05, + "loss": 2.2309, + "step": 4225 + }, + { + "epoch": 0.14, + "grad_norm": 0.7273371815681458, + "learning_rate": 1.922598214732118e-05, + "loss": 2.2803, + "step": 4226 + }, + { + "epoch": 0.14, + "grad_norm": 0.6832113265991211, + "learning_rate": 1.922557207120779e-05, + "loss": 2.2929, + "step": 4227 + }, + { + "epoch": 0.14, + "grad_norm": 0.6855977773666382, + "learning_rate": 1.922516189086929e-05, + "loss": 2.2334, + "step": 4228 + }, + { + "epoch": 0.14, + "grad_norm": 0.7750351428985596, + "learning_rate": 1.9224751606310323e-05, + "loss": 2.2476, + "step": 4229 + }, + { + "epoch": 0.14, + "grad_norm": 0.6686546206474304, + "learning_rate": 1.9224341217535517e-05, + "loss": 2.2335, + "step": 4230 + }, + { + "epoch": 0.14, + "grad_norm": 0.6697055101394653, + "learning_rate": 1.922393072454951e-05, + "loss": 2.1985, + "step": 4231 + }, + { + "epoch": 0.14, + "grad_norm": 0.6736751198768616, + "learning_rate": 1.9223520127356938e-05, + "loss": 2.2272, + "step": 4232 + }, + { + "epoch": 0.14, + "grad_norm": 0.6789987683296204, + "learning_rate": 1.922310942596244e-05, + "loss": 2.233, + "step": 4233 + }, + { + "epoch": 0.14, + "grad_norm": 0.7310657501220703, + "learning_rate": 1.922269862037066e-05, + "loss": 2.273, + "step": 4234 + }, + { + "epoch": 0.14, + "grad_norm": 0.7140824198722839, + "learning_rate": 1.9222287710586234e-05, + "loss": 2.2605, + "step": 4235 + }, + { + "epoch": 0.14, + "grad_norm": 0.6683670878410339, + "learning_rate": 1.9221876696613808e-05, + "loss": 2.2078, + "step": 4236 + }, + { + "epoch": 0.14, + "grad_norm": 0.7233315110206604, + "learning_rate": 1.922146557845802e-05, + "loss": 2.269, + "step": 4237 + }, + { + "epoch": 0.14, + "grad_norm": 0.6739272475242615, + "learning_rate": 1.9221054356123522e-05, + "loss": 2.1626, + "step": 4238 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815820336341858, + "learning_rate": 1.9220643029614953e-05, + "loss": 2.3263, + "step": 4239 + }, + { + "epoch": 0.14, + "grad_norm": 0.6762769818305969, + "learning_rate": 1.9220231598936964e-05, + "loss": 2.2301, + "step": 4240 + }, + { + "epoch": 0.14, + "grad_norm": 0.6702861785888672, + "learning_rate": 1.9219820064094207e-05, + "loss": 2.2011, + "step": 4241 + }, + { + "epoch": 0.14, + "grad_norm": 0.6802589297294617, + "learning_rate": 1.921940842509132e-05, + "loss": 2.2924, + "step": 4242 + }, + { + "epoch": 0.14, + "grad_norm": 0.6847351789474487, + "learning_rate": 1.921899668193296e-05, + "loss": 2.1822, + "step": 4243 + }, + { + "epoch": 0.14, + "grad_norm": 0.6704094409942627, + "learning_rate": 1.921858483462378e-05, + "loss": 2.2552, + "step": 4244 + }, + { + "epoch": 0.14, + "grad_norm": 0.6904206275939941, + "learning_rate": 1.9218172883168432e-05, + "loss": 2.2928, + "step": 4245 + }, + { + "epoch": 0.14, + "grad_norm": 0.7047081589698792, + "learning_rate": 1.9217760827571567e-05, + "loss": 2.227, + "step": 4246 + }, + { + "epoch": 0.14, + "grad_norm": 0.6851109862327576, + "learning_rate": 1.9217348667837843e-05, + "loss": 2.2906, + "step": 4247 + }, + { + "epoch": 0.14, + "grad_norm": 0.671583354473114, + "learning_rate": 1.9216936403971918e-05, + "loss": 2.1968, + "step": 4248 + }, + { + "epoch": 0.14, + "grad_norm": 0.6658949851989746, + "learning_rate": 1.9216524035978443e-05, + "loss": 2.2784, + "step": 4249 + }, + { + "epoch": 0.14, + "grad_norm": 0.6806211471557617, + "learning_rate": 1.921611156386208e-05, + "loss": 2.1715, + "step": 4250 + }, + { + "epoch": 0.14, + "grad_norm": 0.674152135848999, + "learning_rate": 1.9215698987627495e-05, + "loss": 2.2108, + "step": 4251 + }, + { + "epoch": 0.14, + "grad_norm": 0.728184700012207, + "learning_rate": 1.9215286307279342e-05, + "loss": 2.2149, + "step": 4252 + }, + { + "epoch": 0.14, + "grad_norm": 0.6897283792495728, + "learning_rate": 1.9214873522822285e-05, + "loss": 2.2659, + "step": 4253 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815371513366699, + "learning_rate": 1.9214460634260986e-05, + "loss": 2.2255, + "step": 4254 + }, + { + "epoch": 0.14, + "grad_norm": 0.6435858607292175, + "learning_rate": 1.9214047641600113e-05, + "loss": 2.2543, + "step": 4255 + }, + { + "epoch": 0.14, + "grad_norm": 0.6967017650604248, + "learning_rate": 1.921363454484433e-05, + "loss": 2.219, + "step": 4256 + }, + { + "epoch": 0.14, + "grad_norm": 0.6708928942680359, + "learning_rate": 1.92132213439983e-05, + "loss": 2.2233, + "step": 4257 + }, + { + "epoch": 0.14, + "grad_norm": 0.7591630816459656, + "learning_rate": 1.9212808039066696e-05, + "loss": 2.2053, + "step": 4258 + }, + { + "epoch": 0.14, + "grad_norm": 0.7051583528518677, + "learning_rate": 1.921239463005419e-05, + "loss": 2.1823, + "step": 4259 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815659403800964, + "learning_rate": 1.9211981116965444e-05, + "loss": 2.2357, + "step": 4260 + }, + { + "epoch": 0.14, + "grad_norm": 0.699821949005127, + "learning_rate": 1.9211567499805134e-05, + "loss": 2.318, + "step": 4261 + }, + { + "epoch": 0.14, + "grad_norm": 0.7074738144874573, + "learning_rate": 1.9211153778577935e-05, + "loss": 2.2506, + "step": 4262 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815426349639893, + "learning_rate": 1.9210739953288516e-05, + "loss": 2.2001, + "step": 4263 + }, + { + "epoch": 0.14, + "grad_norm": 0.740431547164917, + "learning_rate": 1.9210326023941558e-05, + "loss": 2.2312, + "step": 4264 + }, + { + "epoch": 0.14, + "grad_norm": 0.6804113984107971, + "learning_rate": 1.9209911990541735e-05, + "loss": 2.1852, + "step": 4265 + }, + { + "epoch": 0.14, + "grad_norm": 0.6760491132736206, + "learning_rate": 1.9209497853093724e-05, + "loss": 2.2152, + "step": 4266 + }, + { + "epoch": 0.14, + "grad_norm": 0.7127466201782227, + "learning_rate": 1.9209083611602202e-05, + "loss": 2.2115, + "step": 4267 + }, + { + "epoch": 0.14, + "grad_norm": 0.6811696887016296, + "learning_rate": 1.9208669266071853e-05, + "loss": 2.203, + "step": 4268 + }, + { + "epoch": 0.14, + "grad_norm": 0.6816546320915222, + "learning_rate": 1.920825481650735e-05, + "loss": 2.1733, + "step": 4269 + }, + { + "epoch": 0.14, + "grad_norm": 0.6877427697181702, + "learning_rate": 1.9207840262913384e-05, + "loss": 2.2356, + "step": 4270 + }, + { + "epoch": 0.14, + "grad_norm": 0.6752818822860718, + "learning_rate": 1.9207425605294633e-05, + "loss": 2.289, + "step": 4271 + }, + { + "epoch": 0.14, + "grad_norm": 0.6741052269935608, + "learning_rate": 1.9207010843655788e-05, + "loss": 2.2376, + "step": 4272 + }, + { + "epoch": 0.14, + "grad_norm": 0.6986745595932007, + "learning_rate": 1.9206595978001527e-05, + "loss": 2.2519, + "step": 4273 + }, + { + "epoch": 0.14, + "grad_norm": 0.6718072891235352, + "learning_rate": 1.920618100833654e-05, + "loss": 2.2707, + "step": 4274 + }, + { + "epoch": 0.14, + "grad_norm": 0.6851564049720764, + "learning_rate": 1.920576593466552e-05, + "loss": 2.1898, + "step": 4275 + }, + { + "epoch": 0.14, + "grad_norm": 0.6624534130096436, + "learning_rate": 1.9205350756993146e-05, + "loss": 2.2259, + "step": 4276 + }, + { + "epoch": 0.14, + "grad_norm": 0.6701390147209167, + "learning_rate": 1.9204935475324114e-05, + "loss": 2.1864, + "step": 4277 + }, + { + "epoch": 0.14, + "grad_norm": 0.7018690705299377, + "learning_rate": 1.9204520089663117e-05, + "loss": 2.232, + "step": 4278 + }, + { + "epoch": 0.14, + "grad_norm": 0.7081454396247864, + "learning_rate": 1.9204104600014845e-05, + "loss": 2.2151, + "step": 4279 + }, + { + "epoch": 0.14, + "grad_norm": 0.6908605694770813, + "learning_rate": 1.9203689006383996e-05, + "loss": 2.2727, + "step": 4280 + }, + { + "epoch": 0.14, + "grad_norm": 0.7174037098884583, + "learning_rate": 1.9203273308775262e-05, + "loss": 2.2352, + "step": 4281 + }, + { + "epoch": 0.14, + "grad_norm": 0.6773051023483276, + "learning_rate": 1.920285750719334e-05, + "loss": 2.2262, + "step": 4282 + }, + { + "epoch": 0.14, + "grad_norm": 0.7071585059165955, + "learning_rate": 1.9202441601642925e-05, + "loss": 2.2373, + "step": 4283 + }, + { + "epoch": 0.14, + "grad_norm": 0.7034968137741089, + "learning_rate": 1.9202025592128717e-05, + "loss": 2.2253, + "step": 4284 + }, + { + "epoch": 0.14, + "grad_norm": 0.6774274110794067, + "learning_rate": 1.9201609478655422e-05, + "loss": 2.2604, + "step": 4285 + }, + { + "epoch": 0.14, + "grad_norm": 0.7059097290039062, + "learning_rate": 1.9201193261227735e-05, + "loss": 2.2413, + "step": 4286 + }, + { + "epoch": 0.14, + "grad_norm": 0.7126817107200623, + "learning_rate": 1.9200776939850355e-05, + "loss": 2.2493, + "step": 4287 + }, + { + "epoch": 0.14, + "grad_norm": 0.7232714891433716, + "learning_rate": 1.9200360514527993e-05, + "loss": 2.2543, + "step": 4288 + }, + { + "epoch": 0.14, + "grad_norm": 0.689019501209259, + "learning_rate": 1.919994398526535e-05, + "loss": 2.2236, + "step": 4289 + }, + { + "epoch": 0.14, + "grad_norm": 0.6858633756637573, + "learning_rate": 1.919952735206713e-05, + "loss": 2.2171, + "step": 4290 + }, + { + "epoch": 0.14, + "grad_norm": 0.6815646290779114, + "learning_rate": 1.919911061493804e-05, + "loss": 2.2148, + "step": 4291 + }, + { + "epoch": 0.14, + "grad_norm": 0.6734101176261902, + "learning_rate": 1.919869377388279e-05, + "loss": 2.2289, + "step": 4292 + }, + { + "epoch": 0.14, + "grad_norm": 0.6812942624092102, + "learning_rate": 1.919827682890609e-05, + "loss": 2.2405, + "step": 4293 + }, + { + "epoch": 0.14, + "grad_norm": 0.686515212059021, + "learning_rate": 1.9197859780012647e-05, + "loss": 2.1884, + "step": 4294 + }, + { + "epoch": 0.14, + "grad_norm": 0.6849203109741211, + "learning_rate": 1.9197442627207177e-05, + "loss": 2.2359, + "step": 4295 + }, + { + "epoch": 0.14, + "grad_norm": 0.7028157114982605, + "learning_rate": 1.9197025370494387e-05, + "loss": 2.2941, + "step": 4296 + }, + { + "epoch": 0.14, + "grad_norm": 0.686337411403656, + "learning_rate": 1.9196608009879e-05, + "loss": 2.2146, + "step": 4297 + }, + { + "epoch": 0.14, + "grad_norm": 0.6850748658180237, + "learning_rate": 1.9196190545365724e-05, + "loss": 2.2756, + "step": 4298 + }, + { + "epoch": 0.14, + "grad_norm": 0.6942947506904602, + "learning_rate": 1.9195772976959274e-05, + "loss": 2.242, + "step": 4299 + }, + { + "epoch": 0.14, + "grad_norm": 0.6923968195915222, + "learning_rate": 1.919535530466437e-05, + "loss": 2.2302, + "step": 4300 + }, + { + "epoch": 0.14, + "grad_norm": 0.7147257328033447, + "learning_rate": 1.9194937528485732e-05, + "loss": 2.3098, + "step": 4301 + }, + { + "epoch": 0.14, + "grad_norm": 0.6557518839836121, + "learning_rate": 1.919451964842808e-05, + "loss": 2.1527, + "step": 4302 + }, + { + "epoch": 0.14, + "grad_norm": 0.6924035549163818, + "learning_rate": 1.9194101664496133e-05, + "loss": 2.2917, + "step": 4303 + }, + { + "epoch": 0.14, + "grad_norm": 0.716222882270813, + "learning_rate": 1.9193683576694612e-05, + "loss": 2.2452, + "step": 4304 + }, + { + "epoch": 0.14, + "grad_norm": 0.6650006175041199, + "learning_rate": 1.919326538502824e-05, + "loss": 2.2292, + "step": 4305 + }, + { + "epoch": 0.14, + "grad_norm": 0.675167977809906, + "learning_rate": 1.919284708950175e-05, + "loss": 2.2074, + "step": 4306 + }, + { + "epoch": 0.14, + "grad_norm": 0.733742356300354, + "learning_rate": 1.9192428690119856e-05, + "loss": 2.2724, + "step": 4307 + }, + { + "epoch": 0.14, + "grad_norm": 0.7422173023223877, + "learning_rate": 1.9192010186887292e-05, + "loss": 2.2882, + "step": 4308 + }, + { + "epoch": 0.14, + "grad_norm": 0.8102651834487915, + "learning_rate": 1.9191591579808784e-05, + "loss": 2.241, + "step": 4309 + }, + { + "epoch": 0.14, + "grad_norm": 0.7037965655326843, + "learning_rate": 1.919117286888906e-05, + "loss": 2.2687, + "step": 4310 + }, + { + "epoch": 0.14, + "grad_norm": 0.7065612077713013, + "learning_rate": 1.9190754054132853e-05, + "loss": 2.2605, + "step": 4311 + }, + { + "epoch": 0.14, + "grad_norm": 0.6892729997634888, + "learning_rate": 1.919033513554489e-05, + "loss": 2.1547, + "step": 4312 + }, + { + "epoch": 0.14, + "grad_norm": 0.6618356108665466, + "learning_rate": 1.9189916113129908e-05, + "loss": 2.2217, + "step": 4313 + }, + { + "epoch": 0.14, + "grad_norm": 0.6700673699378967, + "learning_rate": 1.918949698689264e-05, + "loss": 2.2688, + "step": 4314 + }, + { + "epoch": 0.14, + "grad_norm": 0.6773180961608887, + "learning_rate": 1.9189077756837822e-05, + "loss": 2.2506, + "step": 4315 + }, + { + "epoch": 0.14, + "grad_norm": 0.6946173906326294, + "learning_rate": 1.9188658422970188e-05, + "loss": 2.2832, + "step": 4316 + }, + { + "epoch": 0.14, + "grad_norm": 0.6928938627243042, + "learning_rate": 1.9188238985294475e-05, + "loss": 2.2512, + "step": 4317 + }, + { + "epoch": 0.14, + "grad_norm": 0.70658940076828, + "learning_rate": 1.9187819443815427e-05, + "loss": 2.2258, + "step": 4318 + }, + { + "epoch": 0.14, + "grad_norm": 0.7347752451896667, + "learning_rate": 1.9187399798537773e-05, + "loss": 2.1972, + "step": 4319 + }, + { + "epoch": 0.14, + "grad_norm": 0.6671786308288574, + "learning_rate": 1.9186980049466263e-05, + "loss": 2.3089, + "step": 4320 + }, + { + "epoch": 0.14, + "grad_norm": 0.6883679032325745, + "learning_rate": 1.9186560196605638e-05, + "loss": 2.2869, + "step": 4321 + }, + { + "epoch": 0.14, + "grad_norm": 0.7399175763130188, + "learning_rate": 1.9186140239960638e-05, + "loss": 2.2112, + "step": 4322 + }, + { + "epoch": 0.14, + "grad_norm": 0.7386508584022522, + "learning_rate": 1.9185720179536012e-05, + "loss": 2.2078, + "step": 4323 + }, + { + "epoch": 0.14, + "grad_norm": 0.7095656394958496, + "learning_rate": 1.9185300015336498e-05, + "loss": 2.2287, + "step": 4324 + }, + { + "epoch": 0.14, + "grad_norm": 0.7059575319290161, + "learning_rate": 1.9184879747366852e-05, + "loss": 2.2832, + "step": 4325 + }, + { + "epoch": 0.14, + "grad_norm": 0.6887425184249878, + "learning_rate": 1.9184459375631818e-05, + "loss": 2.2058, + "step": 4326 + }, + { + "epoch": 0.14, + "grad_norm": 0.6827843189239502, + "learning_rate": 1.918403890013614e-05, + "loss": 2.2277, + "step": 4327 + }, + { + "epoch": 0.14, + "grad_norm": 0.7170089483261108, + "learning_rate": 1.9183618320884578e-05, + "loss": 2.2751, + "step": 4328 + }, + { + "epoch": 0.14, + "grad_norm": 0.7188481092453003, + "learning_rate": 1.9183197637881872e-05, + "loss": 2.2946, + "step": 4329 + }, + { + "epoch": 0.14, + "grad_norm": 0.7036263942718506, + "learning_rate": 1.9182776851132786e-05, + "loss": 2.2952, + "step": 4330 + }, + { + "epoch": 0.14, + "grad_norm": 0.6741549372673035, + "learning_rate": 1.9182355960642066e-05, + "loss": 2.3494, + "step": 4331 + }, + { + "epoch": 0.14, + "grad_norm": 0.6946344971656799, + "learning_rate": 1.9181934966414472e-05, + "loss": 2.2812, + "step": 4332 + }, + { + "epoch": 0.14, + "grad_norm": 0.7252484560012817, + "learning_rate": 1.9181513868454758e-05, + "loss": 2.2825, + "step": 4333 + }, + { + "epoch": 0.14, + "grad_norm": 0.6909781098365784, + "learning_rate": 1.9181092666767683e-05, + "loss": 2.2108, + "step": 4334 + }, + { + "epoch": 0.14, + "grad_norm": 0.6733206510543823, + "learning_rate": 1.9180671361358e-05, + "loss": 2.2751, + "step": 4335 + }, + { + "epoch": 0.14, + "grad_norm": 0.6813592314720154, + "learning_rate": 1.9180249952230472e-05, + "loss": 2.2511, + "step": 4336 + }, + { + "epoch": 0.14, + "grad_norm": 0.7085376381874084, + "learning_rate": 1.9179828439389863e-05, + "loss": 2.2474, + "step": 4337 + }, + { + "epoch": 0.14, + "grad_norm": 0.6810333728790283, + "learning_rate": 1.917940682284093e-05, + "loss": 2.2976, + "step": 4338 + }, + { + "epoch": 0.14, + "grad_norm": 0.7013625502586365, + "learning_rate": 1.917898510258844e-05, + "loss": 2.239, + "step": 4339 + }, + { + "epoch": 0.14, + "grad_norm": 0.6949984431266785, + "learning_rate": 1.9178563278637155e-05, + "loss": 2.2295, + "step": 4340 + }, + { + "epoch": 0.14, + "grad_norm": 0.6863375306129456, + "learning_rate": 1.917814135099184e-05, + "loss": 2.2274, + "step": 4341 + }, + { + "epoch": 0.14, + "grad_norm": 0.7012094855308533, + "learning_rate": 1.917771931965726e-05, + "loss": 2.2489, + "step": 4342 + }, + { + "epoch": 0.14, + "grad_norm": 0.6757533550262451, + "learning_rate": 1.9177297184638192e-05, + "loss": 2.2683, + "step": 4343 + }, + { + "epoch": 0.14, + "grad_norm": 0.677887499332428, + "learning_rate": 1.9176874945939397e-05, + "loss": 2.2398, + "step": 4344 + }, + { + "epoch": 0.14, + "grad_norm": 0.7272711992263794, + "learning_rate": 1.9176452603565646e-05, + "loss": 2.2495, + "step": 4345 + }, + { + "epoch": 0.14, + "grad_norm": 0.675494372844696, + "learning_rate": 1.9176030157521714e-05, + "loss": 2.2869, + "step": 4346 + }, + { + "epoch": 0.14, + "grad_norm": 0.6765179634094238, + "learning_rate": 1.9175607607812366e-05, + "loss": 2.2472, + "step": 4347 + }, + { + "epoch": 0.14, + "grad_norm": 0.6921008825302124, + "learning_rate": 1.9175184954442385e-05, + "loss": 2.2219, + "step": 4348 + }, + { + "epoch": 0.14, + "grad_norm": 0.7094331979751587, + "learning_rate": 1.917476219741654e-05, + "loss": 2.2319, + "step": 4349 + }, + { + "epoch": 0.14, + "grad_norm": 0.6870075464248657, + "learning_rate": 1.917433933673961e-05, + "loss": 2.2843, + "step": 4350 + }, + { + "epoch": 0.14, + "grad_norm": 0.692391574382782, + "learning_rate": 1.9173916372416372e-05, + "loss": 2.3004, + "step": 4351 + }, + { + "epoch": 0.14, + "grad_norm": 0.6720705628395081, + "learning_rate": 1.91734933044516e-05, + "loss": 2.2298, + "step": 4352 + }, + { + "epoch": 0.14, + "grad_norm": 0.7211714386940002, + "learning_rate": 1.9173070132850076e-05, + "loss": 2.222, + "step": 4353 + }, + { + "epoch": 0.14, + "grad_norm": 0.6883718967437744, + "learning_rate": 1.9172646857616586e-05, + "loss": 2.2426, + "step": 4354 + }, + { + "epoch": 0.14, + "grad_norm": 0.6840395927429199, + "learning_rate": 1.9172223478755906e-05, + "loss": 2.2383, + "step": 4355 + }, + { + "epoch": 0.14, + "grad_norm": 0.6846573948860168, + "learning_rate": 1.9171799996272816e-05, + "loss": 2.3109, + "step": 4356 + }, + { + "epoch": 0.14, + "grad_norm": 0.6816583871841431, + "learning_rate": 1.917137641017211e-05, + "loss": 2.305, + "step": 4357 + }, + { + "epoch": 0.14, + "grad_norm": 0.7049252986907959, + "learning_rate": 1.9170952720458566e-05, + "loss": 2.23, + "step": 4358 + }, + { + "epoch": 0.15, + "grad_norm": 0.6837392449378967, + "learning_rate": 1.9170528927136974e-05, + "loss": 2.2405, + "step": 4359 + }, + { + "epoch": 0.15, + "grad_norm": 0.7084900140762329, + "learning_rate": 1.9170105030212122e-05, + "loss": 2.2255, + "step": 4360 + }, + { + "epoch": 0.15, + "grad_norm": 0.696904182434082, + "learning_rate": 1.9169681029688795e-05, + "loss": 2.3168, + "step": 4361 + }, + { + "epoch": 0.15, + "grad_norm": 0.7154303193092346, + "learning_rate": 1.9169256925571783e-05, + "loss": 2.2789, + "step": 4362 + }, + { + "epoch": 0.15, + "grad_norm": 0.710085391998291, + "learning_rate": 1.9168832717865887e-05, + "loss": 2.3226, + "step": 4363 + }, + { + "epoch": 0.15, + "grad_norm": 0.6970433592796326, + "learning_rate": 1.9168408406575885e-05, + "loss": 2.2416, + "step": 4364 + }, + { + "epoch": 0.15, + "grad_norm": 0.7164191007614136, + "learning_rate": 1.9167983991706585e-05, + "loss": 2.2341, + "step": 4365 + }, + { + "epoch": 0.15, + "grad_norm": 0.6675049066543579, + "learning_rate": 1.916755947326277e-05, + "loss": 2.261, + "step": 4366 + }, + { + "epoch": 0.15, + "grad_norm": 0.6755507588386536, + "learning_rate": 1.9167134851249245e-05, + "loss": 2.3228, + "step": 4367 + }, + { + "epoch": 0.15, + "grad_norm": 0.6516239643096924, + "learning_rate": 1.91667101256708e-05, + "loss": 2.2168, + "step": 4368 + }, + { + "epoch": 0.15, + "grad_norm": 0.6708970665931702, + "learning_rate": 1.9166285296532237e-05, + "loss": 2.2842, + "step": 4369 + }, + { + "epoch": 0.15, + "grad_norm": 0.6642171740531921, + "learning_rate": 1.9165860363838353e-05, + "loss": 2.2259, + "step": 4370 + }, + { + "epoch": 0.15, + "grad_norm": 0.7030302286148071, + "learning_rate": 1.9165435327593953e-05, + "loss": 2.2444, + "step": 4371 + }, + { + "epoch": 0.15, + "grad_norm": 0.6991947293281555, + "learning_rate": 1.9165010187803834e-05, + "loss": 2.2601, + "step": 4372 + }, + { + "epoch": 0.15, + "grad_norm": 0.7127227187156677, + "learning_rate": 1.91645849444728e-05, + "loss": 2.2409, + "step": 4373 + }, + { + "epoch": 0.15, + "grad_norm": 0.7084012031555176, + "learning_rate": 1.916415959760566e-05, + "loss": 2.2792, + "step": 4374 + }, + { + "epoch": 0.15, + "grad_norm": 0.6790396571159363, + "learning_rate": 1.9163734147207215e-05, + "loss": 2.2507, + "step": 4375 + }, + { + "epoch": 0.15, + "grad_norm": 0.6784519553184509, + "learning_rate": 1.9163308593282268e-05, + "loss": 2.2235, + "step": 4376 + }, + { + "epoch": 0.15, + "grad_norm": 0.694807231426239, + "learning_rate": 1.9162882935835637e-05, + "loss": 2.2071, + "step": 4377 + }, + { + "epoch": 0.15, + "grad_norm": 0.7259537577629089, + "learning_rate": 1.916245717487212e-05, + "loss": 2.2639, + "step": 4378 + }, + { + "epoch": 0.15, + "grad_norm": 0.7170383334159851, + "learning_rate": 1.9162031310396533e-05, + "loss": 2.2854, + "step": 4379 + }, + { + "epoch": 0.15, + "grad_norm": 0.6712615489959717, + "learning_rate": 1.9161605342413686e-05, + "loss": 2.3089, + "step": 4380 + }, + { + "epoch": 0.15, + "grad_norm": 0.6805903911590576, + "learning_rate": 1.916117927092839e-05, + "loss": 2.239, + "step": 4381 + }, + { + "epoch": 0.15, + "grad_norm": 0.6861051321029663, + "learning_rate": 1.9160753095945458e-05, + "loss": 2.2394, + "step": 4382 + }, + { + "epoch": 0.15, + "grad_norm": 0.6917824149131775, + "learning_rate": 1.916032681746971e-05, + "loss": 2.2403, + "step": 4383 + }, + { + "epoch": 0.15, + "grad_norm": 0.6860147714614868, + "learning_rate": 1.9159900435505957e-05, + "loss": 2.1552, + "step": 4384 + }, + { + "epoch": 0.15, + "grad_norm": 0.6734533309936523, + "learning_rate": 1.9159473950059016e-05, + "loss": 2.2103, + "step": 4385 + }, + { + "epoch": 0.15, + "grad_norm": 0.6711817979812622, + "learning_rate": 1.9159047361133706e-05, + "loss": 2.2561, + "step": 4386 + }, + { + "epoch": 0.15, + "grad_norm": 0.7137919664382935, + "learning_rate": 1.9158620668734848e-05, + "loss": 2.2224, + "step": 4387 + }, + { + "epoch": 0.15, + "grad_norm": 0.7050636410713196, + "learning_rate": 1.9158193872867262e-05, + "loss": 2.2368, + "step": 4388 + }, + { + "epoch": 0.15, + "grad_norm": 0.7166649699211121, + "learning_rate": 1.9157766973535765e-05, + "loss": 2.2685, + "step": 4389 + }, + { + "epoch": 0.15, + "grad_norm": 0.6831756830215454, + "learning_rate": 1.915733997074519e-05, + "loss": 2.1879, + "step": 4390 + }, + { + "epoch": 0.15, + "grad_norm": 0.7025697827339172, + "learning_rate": 1.9156912864500348e-05, + "loss": 2.2422, + "step": 4391 + }, + { + "epoch": 0.15, + "grad_norm": 0.7169339060783386, + "learning_rate": 1.9156485654806075e-05, + "loss": 2.2367, + "step": 4392 + }, + { + "epoch": 0.15, + "grad_norm": 0.6627646088600159, + "learning_rate": 1.915605834166719e-05, + "loss": 2.2418, + "step": 4393 + }, + { + "epoch": 0.15, + "grad_norm": 0.6652949452400208, + "learning_rate": 1.915563092508853e-05, + "loss": 2.1737, + "step": 4394 + }, + { + "epoch": 0.15, + "grad_norm": 0.6702134013175964, + "learning_rate": 1.915520340507491e-05, + "loss": 2.2029, + "step": 4395 + }, + { + "epoch": 0.15, + "grad_norm": 0.6901026368141174, + "learning_rate": 1.9154775781631172e-05, + "loss": 2.2851, + "step": 4396 + }, + { + "epoch": 0.15, + "grad_norm": 0.6956287026405334, + "learning_rate": 1.9154348054762142e-05, + "loss": 2.2261, + "step": 4397 + }, + { + "epoch": 0.15, + "grad_norm": 0.7035785913467407, + "learning_rate": 1.9153920224472653e-05, + "loss": 2.1871, + "step": 4398 + }, + { + "epoch": 0.15, + "grad_norm": 0.662171483039856, + "learning_rate": 1.9153492290767536e-05, + "loss": 2.2421, + "step": 4399 + }, + { + "epoch": 0.15, + "grad_norm": 0.7039633393287659, + "learning_rate": 1.915306425365163e-05, + "loss": 2.2784, + "step": 4400 + }, + { + "epoch": 0.15, + "grad_norm": 0.694678008556366, + "learning_rate": 1.9152636113129767e-05, + "loss": 2.1918, + "step": 4401 + }, + { + "epoch": 0.15, + "grad_norm": 0.7037451863288879, + "learning_rate": 1.915220786920678e-05, + "loss": 2.256, + "step": 4402 + }, + { + "epoch": 0.15, + "grad_norm": 0.6861623525619507, + "learning_rate": 1.915177952188752e-05, + "loss": 2.2929, + "step": 4403 + }, + { + "epoch": 0.15, + "grad_norm": 0.6903137564659119, + "learning_rate": 1.915135107117682e-05, + "loss": 2.2495, + "step": 4404 + }, + { + "epoch": 0.15, + "grad_norm": 0.6748040318489075, + "learning_rate": 1.915092251707951e-05, + "loss": 2.2264, + "step": 4405 + }, + { + "epoch": 0.15, + "grad_norm": 0.7340372204780579, + "learning_rate": 1.9150493859600446e-05, + "loss": 2.3174, + "step": 4406 + }, + { + "epoch": 0.15, + "grad_norm": 0.6770610213279724, + "learning_rate": 1.9150065098744464e-05, + "loss": 2.2245, + "step": 4407 + }, + { + "epoch": 0.15, + "grad_norm": 0.6855124235153198, + "learning_rate": 1.914963623451641e-05, + "loss": 2.2128, + "step": 4408 + }, + { + "epoch": 0.15, + "grad_norm": 0.6983487606048584, + "learning_rate": 1.9149207266921127e-05, + "loss": 2.338, + "step": 4409 + }, + { + "epoch": 0.15, + "grad_norm": 0.6790350079536438, + "learning_rate": 1.9148778195963463e-05, + "loss": 2.2326, + "step": 4410 + }, + { + "epoch": 0.15, + "grad_norm": 0.7224295735359192, + "learning_rate": 1.9148349021648266e-05, + "loss": 2.2603, + "step": 4411 + }, + { + "epoch": 0.15, + "grad_norm": 0.7385583519935608, + "learning_rate": 1.914791974398038e-05, + "loss": 2.2597, + "step": 4412 + }, + { + "epoch": 0.15, + "grad_norm": 0.6818671226501465, + "learning_rate": 1.9147490362964655e-05, + "loss": 2.2276, + "step": 4413 + }, + { + "epoch": 0.15, + "grad_norm": 0.6700279712677002, + "learning_rate": 1.9147060878605954e-05, + "loss": 2.2412, + "step": 4414 + }, + { + "epoch": 0.15, + "grad_norm": 0.6668648719787598, + "learning_rate": 1.914663129090911e-05, + "loss": 2.2163, + "step": 4415 + }, + { + "epoch": 0.15, + "grad_norm": 0.6888675689697266, + "learning_rate": 1.914620159987899e-05, + "loss": 2.2743, + "step": 4416 + }, + { + "epoch": 0.15, + "grad_norm": 0.709429144859314, + "learning_rate": 1.914577180552045e-05, + "loss": 2.2469, + "step": 4417 + }, + { + "epoch": 0.15, + "grad_norm": 0.6794667840003967, + "learning_rate": 1.9145341907838336e-05, + "loss": 2.1758, + "step": 4418 + }, + { + "epoch": 0.15, + "grad_norm": 0.6865988373756409, + "learning_rate": 1.9144911906837506e-05, + "loss": 2.1894, + "step": 4419 + }, + { + "epoch": 0.15, + "grad_norm": 0.7329025864601135, + "learning_rate": 1.9144481802522824e-05, + "loss": 2.258, + "step": 4420 + }, + { + "epoch": 0.15, + "grad_norm": 0.6853049993515015, + "learning_rate": 1.9144051594899148e-05, + "loss": 2.2387, + "step": 4421 + }, + { + "epoch": 0.15, + "grad_norm": 0.6635901927947998, + "learning_rate": 1.914362128397133e-05, + "loss": 2.2063, + "step": 4422 + }, + { + "epoch": 0.15, + "grad_norm": 0.6731832027435303, + "learning_rate": 1.914319086974424e-05, + "loss": 2.2055, + "step": 4423 + }, + { + "epoch": 0.15, + "grad_norm": 0.6986168622970581, + "learning_rate": 1.9142760352222737e-05, + "loss": 2.2159, + "step": 4424 + }, + { + "epoch": 0.15, + "grad_norm": 0.6876347661018372, + "learning_rate": 1.9142329731411687e-05, + "loss": 2.2883, + "step": 4425 + }, + { + "epoch": 0.15, + "grad_norm": 0.6784365773200989, + "learning_rate": 1.9141899007315957e-05, + "loss": 2.2402, + "step": 4426 + }, + { + "epoch": 0.15, + "grad_norm": 0.7190592288970947, + "learning_rate": 1.9141468179940405e-05, + "loss": 2.286, + "step": 4427 + }, + { + "epoch": 0.15, + "grad_norm": 0.6770738363265991, + "learning_rate": 1.9141037249289904e-05, + "loss": 2.2378, + "step": 4428 + }, + { + "epoch": 0.15, + "grad_norm": 0.693450927734375, + "learning_rate": 1.9140606215369325e-05, + "loss": 2.2621, + "step": 4429 + }, + { + "epoch": 0.15, + "grad_norm": 0.6916965842247009, + "learning_rate": 1.9140175078183528e-05, + "loss": 2.2435, + "step": 4430 + }, + { + "epoch": 0.15, + "grad_norm": 0.7128582000732422, + "learning_rate": 1.913974383773739e-05, + "loss": 2.2977, + "step": 4431 + }, + { + "epoch": 0.15, + "grad_norm": 0.694035530090332, + "learning_rate": 1.9139312494035786e-05, + "loss": 2.2156, + "step": 4432 + }, + { + "epoch": 0.15, + "grad_norm": 0.711816668510437, + "learning_rate": 1.9138881047083585e-05, + "loss": 2.2057, + "step": 4433 + }, + { + "epoch": 0.15, + "grad_norm": 0.6962132453918457, + "learning_rate": 1.913844949688566e-05, + "loss": 2.2035, + "step": 4434 + }, + { + "epoch": 0.15, + "grad_norm": 0.6823022365570068, + "learning_rate": 1.9138017843446893e-05, + "loss": 2.2626, + "step": 4435 + }, + { + "epoch": 0.15, + "grad_norm": 0.6944918632507324, + "learning_rate": 1.9137586086772152e-05, + "loss": 2.2931, + "step": 4436 + }, + { + "epoch": 0.15, + "grad_norm": 0.6743447780609131, + "learning_rate": 1.9137154226866317e-05, + "loss": 2.2444, + "step": 4437 + }, + { + "epoch": 0.15, + "grad_norm": 0.6706382036209106, + "learning_rate": 1.913672226373427e-05, + "loss": 2.2335, + "step": 4438 + }, + { + "epoch": 0.15, + "grad_norm": 0.675216794013977, + "learning_rate": 1.913629019738089e-05, + "loss": 2.2347, + "step": 4439 + }, + { + "epoch": 0.15, + "grad_norm": 0.6797457933425903, + "learning_rate": 1.9135858027811056e-05, + "loss": 2.1319, + "step": 4440 + }, + { + "epoch": 0.15, + "grad_norm": 0.7135501503944397, + "learning_rate": 1.9135425755029656e-05, + "loss": 2.2661, + "step": 4441 + }, + { + "epoch": 0.15, + "grad_norm": 0.7354063391685486, + "learning_rate": 1.9134993379041565e-05, + "loss": 2.2869, + "step": 4442 + }, + { + "epoch": 0.15, + "grad_norm": 0.6962104439735413, + "learning_rate": 1.9134560899851674e-05, + "loss": 2.2949, + "step": 4443 + }, + { + "epoch": 0.15, + "grad_norm": 0.677567183971405, + "learning_rate": 1.913412831746487e-05, + "loss": 2.2623, + "step": 4444 + }, + { + "epoch": 0.15, + "grad_norm": 0.6790776252746582, + "learning_rate": 1.9133695631886036e-05, + "loss": 2.2064, + "step": 4445 + }, + { + "epoch": 0.15, + "grad_norm": 0.6918368339538574, + "learning_rate": 1.9133262843120063e-05, + "loss": 2.2275, + "step": 4446 + }, + { + "epoch": 0.15, + "grad_norm": 0.6921852231025696, + "learning_rate": 1.9132829951171837e-05, + "loss": 2.2545, + "step": 4447 + }, + { + "epoch": 0.15, + "grad_norm": 0.7122212648391724, + "learning_rate": 1.913239695604625e-05, + "loss": 2.2442, + "step": 4448 + }, + { + "epoch": 0.15, + "grad_norm": 0.688454806804657, + "learning_rate": 1.9131963857748193e-05, + "loss": 2.2662, + "step": 4449 + }, + { + "epoch": 0.15, + "grad_norm": 0.6824939250946045, + "learning_rate": 1.913153065628256e-05, + "loss": 2.2404, + "step": 4450 + }, + { + "epoch": 0.15, + "grad_norm": 0.6846230626106262, + "learning_rate": 1.913109735165425e-05, + "loss": 2.2551, + "step": 4451 + }, + { + "epoch": 0.15, + "grad_norm": 0.669251024723053, + "learning_rate": 1.913066394386815e-05, + "loss": 2.2341, + "step": 4452 + }, + { + "epoch": 0.15, + "grad_norm": 0.7283945083618164, + "learning_rate": 1.9130230432929162e-05, + "loss": 2.215, + "step": 4453 + }, + { + "epoch": 0.15, + "grad_norm": 0.7121866345405579, + "learning_rate": 1.912979681884218e-05, + "loss": 2.2714, + "step": 4454 + }, + { + "epoch": 0.15, + "grad_norm": 0.6670955419540405, + "learning_rate": 1.9129363101612104e-05, + "loss": 2.232, + "step": 4455 + }, + { + "epoch": 0.15, + "grad_norm": 0.6937485933303833, + "learning_rate": 1.9128929281243834e-05, + "loss": 2.2648, + "step": 4456 + }, + { + "epoch": 0.15, + "grad_norm": 0.6939454078674316, + "learning_rate": 1.9128495357742273e-05, + "loss": 2.2119, + "step": 4457 + }, + { + "epoch": 0.15, + "grad_norm": 0.7153201699256897, + "learning_rate": 1.912806133111232e-05, + "loss": 2.2215, + "step": 4458 + }, + { + "epoch": 0.15, + "grad_norm": 0.7199572324752808, + "learning_rate": 1.9127627201358878e-05, + "loss": 2.2619, + "step": 4459 + }, + { + "epoch": 0.15, + "grad_norm": 0.711908221244812, + "learning_rate": 1.9127192968486854e-05, + "loss": 2.1738, + "step": 4460 + }, + { + "epoch": 0.15, + "grad_norm": 0.6910556554794312, + "learning_rate": 1.9126758632501155e-05, + "loss": 2.2213, + "step": 4461 + }, + { + "epoch": 0.15, + "grad_norm": 0.6971352100372314, + "learning_rate": 1.9126324193406684e-05, + "loss": 2.2507, + "step": 4462 + }, + { + "epoch": 0.15, + "grad_norm": 0.6779584288597107, + "learning_rate": 1.9125889651208348e-05, + "loss": 2.2271, + "step": 4463 + }, + { + "epoch": 0.15, + "grad_norm": 0.6746313571929932, + "learning_rate": 1.9125455005911066e-05, + "loss": 2.2429, + "step": 4464 + }, + { + "epoch": 0.15, + "grad_norm": 0.7001540064811707, + "learning_rate": 1.9125020257519736e-05, + "loss": 2.2535, + "step": 4465 + }, + { + "epoch": 0.15, + "grad_norm": 0.7001059651374817, + "learning_rate": 1.9124585406039276e-05, + "loss": 2.1943, + "step": 4466 + }, + { + "epoch": 0.15, + "grad_norm": 0.6591464281082153, + "learning_rate": 1.9124150451474597e-05, + "loss": 2.1906, + "step": 4467 + }, + { + "epoch": 0.15, + "grad_norm": 0.7008257508277893, + "learning_rate": 1.9123715393830614e-05, + "loss": 2.3508, + "step": 4468 + }, + { + "epoch": 0.15, + "grad_norm": 0.7082772850990295, + "learning_rate": 1.912328023311224e-05, + "loss": 2.2319, + "step": 4469 + }, + { + "epoch": 0.15, + "grad_norm": 0.7263653874397278, + "learning_rate": 1.91228449693244e-05, + "loss": 2.2375, + "step": 4470 + }, + { + "epoch": 0.15, + "grad_norm": 0.7051621079444885, + "learning_rate": 1.9122409602471997e-05, + "loss": 2.2804, + "step": 4471 + }, + { + "epoch": 0.15, + "grad_norm": 0.7339425683021545, + "learning_rate": 1.9121974132559958e-05, + "loss": 2.2079, + "step": 4472 + }, + { + "epoch": 0.15, + "grad_norm": 0.6864722967147827, + "learning_rate": 1.91215385595932e-05, + "loss": 2.2082, + "step": 4473 + }, + { + "epoch": 0.15, + "grad_norm": 0.7400726675987244, + "learning_rate": 1.9121102883576647e-05, + "loss": 2.2807, + "step": 4474 + }, + { + "epoch": 0.15, + "grad_norm": 0.7143449783325195, + "learning_rate": 1.912066710451522e-05, + "loss": 2.2416, + "step": 4475 + }, + { + "epoch": 0.15, + "grad_norm": 0.7170915007591248, + "learning_rate": 1.912023122241384e-05, + "loss": 2.2746, + "step": 4476 + }, + { + "epoch": 0.15, + "grad_norm": 0.6703060865402222, + "learning_rate": 1.911979523727743e-05, + "loss": 2.1689, + "step": 4477 + }, + { + "epoch": 0.15, + "grad_norm": 0.6909302473068237, + "learning_rate": 1.9119359149110924e-05, + "loss": 2.1541, + "step": 4478 + }, + { + "epoch": 0.15, + "grad_norm": 0.6457582712173462, + "learning_rate": 1.9118922957919238e-05, + "loss": 2.2244, + "step": 4479 + }, + { + "epoch": 0.15, + "grad_norm": 0.6873852610588074, + "learning_rate": 1.9118486663707308e-05, + "loss": 2.2121, + "step": 4480 + }, + { + "epoch": 0.15, + "grad_norm": 0.6986265778541565, + "learning_rate": 1.9118050266480057e-05, + "loss": 2.2899, + "step": 4481 + }, + { + "epoch": 0.15, + "grad_norm": 0.7102159261703491, + "learning_rate": 1.9117613766242416e-05, + "loss": 2.198, + "step": 4482 + }, + { + "epoch": 0.15, + "grad_norm": 0.7048208117485046, + "learning_rate": 1.911717716299932e-05, + "loss": 2.2178, + "step": 4483 + }, + { + "epoch": 0.15, + "grad_norm": 0.691589891910553, + "learning_rate": 1.9116740456755702e-05, + "loss": 2.2098, + "step": 4484 + }, + { + "epoch": 0.15, + "grad_norm": 0.6619166731834412, + "learning_rate": 1.911630364751649e-05, + "loss": 2.2175, + "step": 4485 + }, + { + "epoch": 0.15, + "grad_norm": 0.6970120072364807, + "learning_rate": 1.9115866735286626e-05, + "loss": 2.2132, + "step": 4486 + }, + { + "epoch": 0.15, + "grad_norm": 0.6771944165229797, + "learning_rate": 1.9115429720071043e-05, + "loss": 2.2163, + "step": 4487 + }, + { + "epoch": 0.15, + "grad_norm": 0.6751306056976318, + "learning_rate": 1.911499260187467e-05, + "loss": 2.1743, + "step": 4488 + }, + { + "epoch": 0.15, + "grad_norm": 0.7012993097305298, + "learning_rate": 1.9114555380702457e-05, + "loss": 2.2428, + "step": 4489 + }, + { + "epoch": 0.15, + "grad_norm": 0.6847450137138367, + "learning_rate": 1.9114118056559337e-05, + "loss": 2.3057, + "step": 4490 + }, + { + "epoch": 0.15, + "grad_norm": 0.6945045590400696, + "learning_rate": 1.9113680629450256e-05, + "loss": 2.2047, + "step": 4491 + }, + { + "epoch": 0.15, + "grad_norm": 0.6948922276496887, + "learning_rate": 1.911324309938015e-05, + "loss": 2.2938, + "step": 4492 + }, + { + "epoch": 0.15, + "grad_norm": 0.7318688035011292, + "learning_rate": 1.9112805466353964e-05, + "loss": 2.2778, + "step": 4493 + }, + { + "epoch": 0.15, + "grad_norm": 0.6989498138427734, + "learning_rate": 1.9112367730376646e-05, + "loss": 2.193, + "step": 4494 + }, + { + "epoch": 0.15, + "grad_norm": 0.7335171699523926, + "learning_rate": 1.9111929891453132e-05, + "loss": 2.3043, + "step": 4495 + }, + { + "epoch": 0.15, + "grad_norm": 0.7089048624038696, + "learning_rate": 1.9111491949588376e-05, + "loss": 2.2643, + "step": 4496 + }, + { + "epoch": 0.15, + "grad_norm": 0.6977636218070984, + "learning_rate": 1.9111053904787325e-05, + "loss": 2.2011, + "step": 4497 + }, + { + "epoch": 0.15, + "grad_norm": 0.6723150610923767, + "learning_rate": 1.911061575705493e-05, + "loss": 2.2092, + "step": 4498 + }, + { + "epoch": 0.15, + "grad_norm": 0.6890773177146912, + "learning_rate": 1.911017750639613e-05, + "loss": 2.1875, + "step": 4499 + }, + { + "epoch": 0.15, + "grad_norm": 0.6538310647010803, + "learning_rate": 1.9109739152815888e-05, + "loss": 2.1812, + "step": 4500 + }, + { + "epoch": 0.15, + "grad_norm": 0.702815055847168, + "learning_rate": 1.9109300696319152e-05, + "loss": 2.2143, + "step": 4501 + }, + { + "epoch": 0.15, + "grad_norm": 0.6854480504989624, + "learning_rate": 1.9108862136910877e-05, + "loss": 2.2009, + "step": 4502 + }, + { + "epoch": 0.15, + "grad_norm": 0.6895673274993896, + "learning_rate": 1.9108423474596014e-05, + "loss": 2.2761, + "step": 4503 + }, + { + "epoch": 0.15, + "grad_norm": 0.6680657863616943, + "learning_rate": 1.910798470937952e-05, + "loss": 2.2378, + "step": 4504 + }, + { + "epoch": 0.15, + "grad_norm": 0.6699315309524536, + "learning_rate": 1.910754584126635e-05, + "loss": 2.1556, + "step": 4505 + }, + { + "epoch": 0.15, + "grad_norm": 0.7083072662353516, + "learning_rate": 1.910710687026147e-05, + "loss": 2.3596, + "step": 4506 + }, + { + "epoch": 0.15, + "grad_norm": 0.6675357818603516, + "learning_rate": 1.910666779636983e-05, + "loss": 2.2646, + "step": 4507 + }, + { + "epoch": 0.15, + "grad_norm": 0.6957983374595642, + "learning_rate": 1.9106228619596395e-05, + "loss": 2.2206, + "step": 4508 + }, + { + "epoch": 0.15, + "grad_norm": 0.6860612630844116, + "learning_rate": 1.910578933994613e-05, + "loss": 2.2531, + "step": 4509 + }, + { + "epoch": 0.15, + "grad_norm": 0.6830098628997803, + "learning_rate": 1.910534995742399e-05, + "loss": 2.2069, + "step": 4510 + }, + { + "epoch": 0.15, + "grad_norm": 0.7457085251808167, + "learning_rate": 1.9104910472034942e-05, + "loss": 2.1973, + "step": 4511 + }, + { + "epoch": 0.15, + "grad_norm": 0.6901485323905945, + "learning_rate": 1.910447088378395e-05, + "loss": 2.2808, + "step": 4512 + }, + { + "epoch": 0.15, + "grad_norm": 0.6666480898857117, + "learning_rate": 1.9104031192675984e-05, + "loss": 2.2288, + "step": 4513 + }, + { + "epoch": 0.15, + "grad_norm": 0.720283031463623, + "learning_rate": 1.910359139871601e-05, + "loss": 2.2478, + "step": 4514 + }, + { + "epoch": 0.15, + "grad_norm": 0.7002878785133362, + "learning_rate": 1.9103151501908993e-05, + "loss": 2.246, + "step": 4515 + }, + { + "epoch": 0.15, + "grad_norm": 0.6835904121398926, + "learning_rate": 1.910271150225991e-05, + "loss": 2.2377, + "step": 4516 + }, + { + "epoch": 0.15, + "grad_norm": 0.7141403555870056, + "learning_rate": 1.910227139977372e-05, + "loss": 2.2755, + "step": 4517 + }, + { + "epoch": 0.15, + "grad_norm": 0.6972172856330872, + "learning_rate": 1.9101831194455406e-05, + "loss": 2.267, + "step": 4518 + }, + { + "epoch": 0.15, + "grad_norm": 0.666835606098175, + "learning_rate": 1.910139088630994e-05, + "loss": 2.2295, + "step": 4519 + }, + { + "epoch": 0.15, + "grad_norm": 0.6905609369277954, + "learning_rate": 1.9100950475342292e-05, + "loss": 2.2884, + "step": 4520 + }, + { + "epoch": 0.15, + "grad_norm": 0.7138857841491699, + "learning_rate": 1.9100509961557435e-05, + "loss": 2.2752, + "step": 4521 + }, + { + "epoch": 0.15, + "grad_norm": 0.6792369484901428, + "learning_rate": 1.9100069344960353e-05, + "loss": 2.3247, + "step": 4522 + }, + { + "epoch": 0.15, + "grad_norm": 0.6814497709274292, + "learning_rate": 1.9099628625556023e-05, + "loss": 2.2487, + "step": 4523 + }, + { + "epoch": 0.15, + "grad_norm": 0.6874086856842041, + "learning_rate": 1.9099187803349418e-05, + "loss": 2.2425, + "step": 4524 + }, + { + "epoch": 0.15, + "grad_norm": 0.7241448760032654, + "learning_rate": 1.9098746878345526e-05, + "loss": 2.2633, + "step": 4525 + }, + { + "epoch": 0.15, + "grad_norm": 0.6647095680236816, + "learning_rate": 1.909830585054932e-05, + "loss": 2.1892, + "step": 4526 + }, + { + "epoch": 0.15, + "grad_norm": 0.6803613901138306, + "learning_rate": 1.9097864719965788e-05, + "loss": 2.228, + "step": 4527 + }, + { + "epoch": 0.15, + "grad_norm": 0.6743932962417603, + "learning_rate": 1.9097423486599914e-05, + "loss": 2.2926, + "step": 4528 + }, + { + "epoch": 0.15, + "grad_norm": 0.667098879814148, + "learning_rate": 1.9096982150456678e-05, + "loss": 2.163, + "step": 4529 + }, + { + "epoch": 0.15, + "grad_norm": 0.7247840762138367, + "learning_rate": 1.909654071154107e-05, + "loss": 2.1927, + "step": 4530 + }, + { + "epoch": 0.15, + "grad_norm": 0.6938689947128296, + "learning_rate": 1.9096099169858077e-05, + "loss": 2.256, + "step": 4531 + }, + { + "epoch": 0.15, + "grad_norm": 0.6706234812736511, + "learning_rate": 1.9095657525412687e-05, + "loss": 2.1942, + "step": 4532 + }, + { + "epoch": 0.15, + "grad_norm": 0.6712916493415833, + "learning_rate": 1.9095215778209888e-05, + "loss": 2.1464, + "step": 4533 + }, + { + "epoch": 0.15, + "grad_norm": 0.6955000162124634, + "learning_rate": 1.909477392825467e-05, + "loss": 2.2807, + "step": 4534 + }, + { + "epoch": 0.15, + "grad_norm": 0.7521129250526428, + "learning_rate": 1.909433197555203e-05, + "loss": 2.1938, + "step": 4535 + }, + { + "epoch": 0.15, + "grad_norm": 0.6938499808311462, + "learning_rate": 1.9093889920106954e-05, + "loss": 2.2998, + "step": 4536 + }, + { + "epoch": 0.15, + "grad_norm": 0.6937317848205566, + "learning_rate": 1.909344776192444e-05, + "loss": 2.2573, + "step": 4537 + }, + { + "epoch": 0.15, + "grad_norm": 0.6733994483947754, + "learning_rate": 1.909300550100948e-05, + "loss": 2.2581, + "step": 4538 + }, + { + "epoch": 0.15, + "grad_norm": 0.6996141076087952, + "learning_rate": 1.9092563137367077e-05, + "loss": 2.2948, + "step": 4539 + }, + { + "epoch": 0.15, + "grad_norm": 0.6986212730407715, + "learning_rate": 1.9092120671002222e-05, + "loss": 2.2509, + "step": 4540 + }, + { + "epoch": 0.15, + "grad_norm": 0.6827740669250488, + "learning_rate": 1.909167810191992e-05, + "loss": 2.2315, + "step": 4541 + }, + { + "epoch": 0.15, + "grad_norm": 0.7064968347549438, + "learning_rate": 1.909123543012516e-05, + "loss": 2.1918, + "step": 4542 + }, + { + "epoch": 0.15, + "grad_norm": 0.6934424042701721, + "learning_rate": 1.9090792655622958e-05, + "loss": 2.2324, + "step": 4543 + }, + { + "epoch": 0.15, + "grad_norm": 0.7085209488868713, + "learning_rate": 1.90903497784183e-05, + "loss": 2.2602, + "step": 4544 + }, + { + "epoch": 0.15, + "grad_norm": 0.7089028358459473, + "learning_rate": 1.9089906798516206e-05, + "loss": 2.2524, + "step": 4545 + }, + { + "epoch": 0.15, + "grad_norm": 0.6986806392669678, + "learning_rate": 1.908946371592167e-05, + "loss": 2.2122, + "step": 4546 + }, + { + "epoch": 0.15, + "grad_norm": 0.6926932334899902, + "learning_rate": 1.9089020530639695e-05, + "loss": 2.231, + "step": 4547 + }, + { + "epoch": 0.15, + "grad_norm": 0.7113991975784302, + "learning_rate": 1.9088577242675294e-05, + "loss": 2.1531, + "step": 4548 + }, + { + "epoch": 0.15, + "grad_norm": 0.682905912399292, + "learning_rate": 1.9088133852033475e-05, + "loss": 2.2802, + "step": 4549 + }, + { + "epoch": 0.15, + "grad_norm": 0.7404407262802124, + "learning_rate": 1.908769035871925e-05, + "loss": 2.2866, + "step": 4550 + }, + { + "epoch": 0.15, + "grad_norm": 0.6578441858291626, + "learning_rate": 1.908724676273762e-05, + "loss": 2.1688, + "step": 4551 + }, + { + "epoch": 0.15, + "grad_norm": 0.6865326166152954, + "learning_rate": 1.9086803064093604e-05, + "loss": 2.2487, + "step": 4552 + }, + { + "epoch": 0.15, + "grad_norm": 0.6928645968437195, + "learning_rate": 1.9086359262792214e-05, + "loss": 2.2492, + "step": 4553 + }, + { + "epoch": 0.15, + "grad_norm": 0.6785802245140076, + "learning_rate": 1.9085915358838458e-05, + "loss": 2.2255, + "step": 4554 + }, + { + "epoch": 0.15, + "grad_norm": 0.6708192229270935, + "learning_rate": 1.9085471352237356e-05, + "loss": 2.2274, + "step": 4555 + }, + { + "epoch": 0.15, + "grad_norm": 0.6953854560852051, + "learning_rate": 1.9085027242993927e-05, + "loss": 2.2109, + "step": 4556 + }, + { + "epoch": 0.15, + "grad_norm": 0.7028687000274658, + "learning_rate": 1.908458303111318e-05, + "loss": 2.3107, + "step": 4557 + }, + { + "epoch": 0.15, + "grad_norm": 0.6851649880409241, + "learning_rate": 1.908413871660014e-05, + "loss": 2.1954, + "step": 4558 + }, + { + "epoch": 0.15, + "grad_norm": 0.7201548218727112, + "learning_rate": 1.9083694299459827e-05, + "loss": 2.3498, + "step": 4559 + }, + { + "epoch": 0.15, + "grad_norm": 0.7277721762657166, + "learning_rate": 1.9083249779697258e-05, + "loss": 2.1507, + "step": 4560 + }, + { + "epoch": 0.15, + "grad_norm": 0.694873571395874, + "learning_rate": 1.9082805157317454e-05, + "loss": 2.2682, + "step": 4561 + }, + { + "epoch": 0.15, + "grad_norm": 0.7165713310241699, + "learning_rate": 1.908236043232544e-05, + "loss": 2.2574, + "step": 4562 + }, + { + "epoch": 0.15, + "grad_norm": 0.7065345048904419, + "learning_rate": 1.9081915604726246e-05, + "loss": 2.2643, + "step": 4563 + }, + { + "epoch": 0.15, + "grad_norm": 0.6978501677513123, + "learning_rate": 1.9081470674524887e-05, + "loss": 2.1966, + "step": 4564 + }, + { + "epoch": 0.15, + "grad_norm": 0.6929413676261902, + "learning_rate": 1.9081025641726395e-05, + "loss": 2.2575, + "step": 4565 + }, + { + "epoch": 0.15, + "grad_norm": 0.710230827331543, + "learning_rate": 1.9080580506335798e-05, + "loss": 2.189, + "step": 4566 + }, + { + "epoch": 0.15, + "grad_norm": 0.7041822671890259, + "learning_rate": 1.9080135268358123e-05, + "loss": 2.1717, + "step": 4567 + }, + { + "epoch": 0.15, + "grad_norm": 0.712323009967804, + "learning_rate": 1.9079689927798402e-05, + "loss": 2.3079, + "step": 4568 + }, + { + "epoch": 0.15, + "grad_norm": 0.6946340203285217, + "learning_rate": 1.9079244484661667e-05, + "loss": 2.1584, + "step": 4569 + }, + { + "epoch": 0.15, + "grad_norm": 0.7263460755348206, + "learning_rate": 1.9078798938952948e-05, + "loss": 2.2643, + "step": 4570 + }, + { + "epoch": 0.15, + "grad_norm": 0.6815439462661743, + "learning_rate": 1.9078353290677277e-05, + "loss": 2.2765, + "step": 4571 + }, + { + "epoch": 0.15, + "grad_norm": 0.6965142488479614, + "learning_rate": 1.9077907539839696e-05, + "loss": 2.236, + "step": 4572 + }, + { + "epoch": 0.15, + "grad_norm": 0.7325807809829712, + "learning_rate": 1.9077461686445233e-05, + "loss": 2.2505, + "step": 4573 + }, + { + "epoch": 0.15, + "grad_norm": 0.6771026253700256, + "learning_rate": 1.907701573049893e-05, + "loss": 2.1882, + "step": 4574 + }, + { + "epoch": 0.15, + "grad_norm": 0.7427154183387756, + "learning_rate": 1.9076569672005818e-05, + "loss": 2.2702, + "step": 4575 + }, + { + "epoch": 0.15, + "grad_norm": 2.0212326049804688, + "learning_rate": 1.9076123510970946e-05, + "loss": 2.2655, + "step": 4576 + }, + { + "epoch": 0.15, + "grad_norm": 0.7426022291183472, + "learning_rate": 1.907567724739935e-05, + "loss": 2.2891, + "step": 4577 + }, + { + "epoch": 0.15, + "grad_norm": 0.6935437321662903, + "learning_rate": 1.9075230881296062e-05, + "loss": 2.2058, + "step": 4578 + }, + { + "epoch": 0.15, + "grad_norm": 0.692090630531311, + "learning_rate": 1.9074784412666143e-05, + "loss": 2.2189, + "step": 4579 + }, + { + "epoch": 0.15, + "grad_norm": 0.6593171954154968, + "learning_rate": 1.9074337841514625e-05, + "loss": 2.1989, + "step": 4580 + }, + { + "epoch": 0.15, + "grad_norm": 0.7352481484413147, + "learning_rate": 1.9073891167846557e-05, + "loss": 2.2326, + "step": 4581 + }, + { + "epoch": 0.15, + "grad_norm": 0.6694567799568176, + "learning_rate": 1.9073444391666984e-05, + "loss": 2.2396, + "step": 4582 + }, + { + "epoch": 0.15, + "grad_norm": 0.6737841367721558, + "learning_rate": 1.9072997512980954e-05, + "loss": 2.2062, + "step": 4583 + }, + { + "epoch": 0.15, + "grad_norm": 0.6751940846443176, + "learning_rate": 1.907255053179352e-05, + "loss": 2.2713, + "step": 4584 + }, + { + "epoch": 0.15, + "grad_norm": 0.7192975878715515, + "learning_rate": 1.907210344810972e-05, + "loss": 2.3455, + "step": 4585 + }, + { + "epoch": 0.15, + "grad_norm": 0.6888840794563293, + "learning_rate": 1.9071656261934617e-05, + "loss": 2.2298, + "step": 4586 + }, + { + "epoch": 0.15, + "grad_norm": 0.6834296584129333, + "learning_rate": 1.9071208973273254e-05, + "loss": 2.1835, + "step": 4587 + }, + { + "epoch": 0.15, + "grad_norm": 0.7026612162590027, + "learning_rate": 1.907076158213069e-05, + "loss": 2.194, + "step": 4588 + }, + { + "epoch": 0.15, + "grad_norm": 0.6923760771751404, + "learning_rate": 1.907031408851198e-05, + "loss": 2.2319, + "step": 4589 + }, + { + "epoch": 0.15, + "grad_norm": 0.6889351010322571, + "learning_rate": 1.9069866492422172e-05, + "loss": 2.281, + "step": 4590 + }, + { + "epoch": 0.15, + "grad_norm": 0.7010775804519653, + "learning_rate": 1.9069418793866332e-05, + "loss": 2.1759, + "step": 4591 + }, + { + "epoch": 0.15, + "grad_norm": 0.6823865175247192, + "learning_rate": 1.9068970992849514e-05, + "loss": 2.1999, + "step": 4592 + }, + { + "epoch": 0.15, + "grad_norm": 0.6949930787086487, + "learning_rate": 1.9068523089376777e-05, + "loss": 2.168, + "step": 4593 + }, + { + "epoch": 0.15, + "grad_norm": 0.7002421617507935, + "learning_rate": 1.9068075083453175e-05, + "loss": 2.2158, + "step": 4594 + }, + { + "epoch": 0.15, + "grad_norm": 0.6876270771026611, + "learning_rate": 1.9067626975083778e-05, + "loss": 2.209, + "step": 4595 + }, + { + "epoch": 0.15, + "grad_norm": 0.6715017557144165, + "learning_rate": 1.906717876427365e-05, + "loss": 2.2091, + "step": 4596 + }, + { + "epoch": 0.15, + "grad_norm": 0.6766876578330994, + "learning_rate": 1.9066730451027847e-05, + "loss": 2.2066, + "step": 4597 + }, + { + "epoch": 0.15, + "grad_norm": 0.7371006011962891, + "learning_rate": 1.9066282035351437e-05, + "loss": 2.2863, + "step": 4598 + }, + { + "epoch": 0.15, + "grad_norm": 0.7380681037902832, + "learning_rate": 1.9065833517249485e-05, + "loss": 2.1779, + "step": 4599 + }, + { + "epoch": 0.15, + "grad_norm": 0.7549762725830078, + "learning_rate": 1.906538489672706e-05, + "loss": 2.3302, + "step": 4600 + }, + { + "epoch": 0.15, + "grad_norm": 0.6860675811767578, + "learning_rate": 1.906493617378923e-05, + "loss": 2.2591, + "step": 4601 + }, + { + "epoch": 0.15, + "grad_norm": 0.6979547142982483, + "learning_rate": 1.906448734844106e-05, + "loss": 2.2395, + "step": 4602 + }, + { + "epoch": 0.15, + "grad_norm": 0.6913871765136719, + "learning_rate": 1.906403842068763e-05, + "loss": 2.3294, + "step": 4603 + }, + { + "epoch": 0.15, + "grad_norm": 0.6853166222572327, + "learning_rate": 1.9063589390534e-05, + "loss": 2.1914, + "step": 4604 + }, + { + "epoch": 0.15, + "grad_norm": 0.7007079720497131, + "learning_rate": 1.9063140257985253e-05, + "loss": 2.2107, + "step": 4605 + }, + { + "epoch": 0.15, + "grad_norm": 0.6780895590782166, + "learning_rate": 1.9062691023046457e-05, + "loss": 2.2244, + "step": 4606 + }, + { + "epoch": 0.15, + "grad_norm": 0.7327328324317932, + "learning_rate": 1.9062241685722692e-05, + "loss": 2.2269, + "step": 4607 + }, + { + "epoch": 0.15, + "grad_norm": 0.6897546052932739, + "learning_rate": 1.9061792246019028e-05, + "loss": 2.2254, + "step": 4608 + }, + { + "epoch": 0.15, + "grad_norm": 0.6947484612464905, + "learning_rate": 1.906134270394055e-05, + "loss": 2.3009, + "step": 4609 + }, + { + "epoch": 0.15, + "grad_norm": 0.6870993971824646, + "learning_rate": 1.9060893059492328e-05, + "loss": 2.0749, + "step": 4610 + }, + { + "epoch": 0.15, + "grad_norm": 0.7048745155334473, + "learning_rate": 1.906044331267945e-05, + "loss": 2.262, + "step": 4611 + }, + { + "epoch": 0.15, + "grad_norm": 0.6902496814727783, + "learning_rate": 1.905999346350699e-05, + "loss": 2.2667, + "step": 4612 + }, + { + "epoch": 0.15, + "grad_norm": 0.6931498050689697, + "learning_rate": 1.9059543511980036e-05, + "loss": 2.243, + "step": 4613 + }, + { + "epoch": 0.15, + "grad_norm": 0.6988371014595032, + "learning_rate": 1.9059093458103664e-05, + "loss": 2.2357, + "step": 4614 + }, + { + "epoch": 0.15, + "grad_norm": 0.7046211957931519, + "learning_rate": 1.905864330188297e-05, + "loss": 2.2414, + "step": 4615 + }, + { + "epoch": 0.15, + "grad_norm": 0.6742013096809387, + "learning_rate": 1.9058193043323032e-05, + "loss": 2.2386, + "step": 4616 + }, + { + "epoch": 0.15, + "grad_norm": 0.6920917630195618, + "learning_rate": 1.9057742682428933e-05, + "loss": 2.2034, + "step": 4617 + }, + { + "epoch": 0.15, + "grad_norm": 0.688748836517334, + "learning_rate": 1.905729221920577e-05, + "loss": 2.2308, + "step": 4618 + }, + { + "epoch": 0.15, + "grad_norm": 0.6944236159324646, + "learning_rate": 1.9056841653658624e-05, + "loss": 2.2351, + "step": 4619 + }, + { + "epoch": 0.15, + "grad_norm": 0.6904944181442261, + "learning_rate": 1.9056390985792592e-05, + "loss": 2.2596, + "step": 4620 + }, + { + "epoch": 0.15, + "grad_norm": 0.7856355905532837, + "learning_rate": 1.9055940215612763e-05, + "loss": 2.2648, + "step": 4621 + }, + { + "epoch": 0.15, + "grad_norm": 0.6989129185676575, + "learning_rate": 1.9055489343124225e-05, + "loss": 2.2286, + "step": 4622 + }, + { + "epoch": 0.15, + "grad_norm": 0.6861041784286499, + "learning_rate": 1.9055038368332078e-05, + "loss": 2.1994, + "step": 4623 + }, + { + "epoch": 0.15, + "grad_norm": 0.6962595582008362, + "learning_rate": 1.905458729124141e-05, + "loss": 2.2713, + "step": 4624 + }, + { + "epoch": 0.15, + "grad_norm": 0.7135487198829651, + "learning_rate": 1.9054136111857327e-05, + "loss": 2.2302, + "step": 4625 + }, + { + "epoch": 0.15, + "grad_norm": 0.7174801826477051, + "learning_rate": 1.9053684830184916e-05, + "loss": 2.2462, + "step": 4626 + }, + { + "epoch": 0.15, + "grad_norm": 0.7136462926864624, + "learning_rate": 1.9053233446229285e-05, + "loss": 2.3219, + "step": 4627 + }, + { + "epoch": 0.15, + "grad_norm": 0.7102366089820862, + "learning_rate": 1.9052781959995524e-05, + "loss": 2.2153, + "step": 4628 + }, + { + "epoch": 0.15, + "grad_norm": 0.6911572813987732, + "learning_rate": 1.9052330371488738e-05, + "loss": 2.2451, + "step": 4629 + }, + { + "epoch": 0.15, + "grad_norm": 0.6709862351417542, + "learning_rate": 1.905187868071403e-05, + "loss": 2.263, + "step": 4630 + }, + { + "epoch": 0.15, + "grad_norm": 0.6938295960426331, + "learning_rate": 1.90514268876765e-05, + "loss": 2.2877, + "step": 4631 + }, + { + "epoch": 0.15, + "grad_norm": 0.7085367441177368, + "learning_rate": 1.9050974992381256e-05, + "loss": 2.205, + "step": 4632 + }, + { + "epoch": 0.15, + "grad_norm": 0.707690954208374, + "learning_rate": 1.90505229948334e-05, + "loss": 2.2603, + "step": 4633 + }, + { + "epoch": 0.15, + "grad_norm": 0.666207492351532, + "learning_rate": 1.905007089503804e-05, + "loss": 2.295, + "step": 4634 + }, + { + "epoch": 0.15, + "grad_norm": 0.722149133682251, + "learning_rate": 1.904961869300028e-05, + "loss": 2.2134, + "step": 4635 + }, + { + "epoch": 0.15, + "grad_norm": 0.6758798956871033, + "learning_rate": 1.9049166388725237e-05, + "loss": 2.2543, + "step": 4636 + }, + { + "epoch": 0.15, + "grad_norm": 0.7259350419044495, + "learning_rate": 1.904871398221801e-05, + "loss": 2.2901, + "step": 4637 + }, + { + "epoch": 0.15, + "grad_norm": 0.6832470893859863, + "learning_rate": 1.9048261473483718e-05, + "loss": 2.2933, + "step": 4638 + }, + { + "epoch": 0.15, + "grad_norm": 0.7110921740531921, + "learning_rate": 1.904780886252747e-05, + "loss": 2.2172, + "step": 4639 + }, + { + "epoch": 0.15, + "grad_norm": 0.7228556275367737, + "learning_rate": 1.904735614935438e-05, + "loss": 2.2116, + "step": 4640 + }, + { + "epoch": 0.15, + "grad_norm": 0.6668363809585571, + "learning_rate": 1.9046903333969564e-05, + "loss": 2.2295, + "step": 4641 + }, + { + "epoch": 0.15, + "grad_norm": 0.7395142316818237, + "learning_rate": 1.9046450416378135e-05, + "loss": 2.2783, + "step": 4642 + }, + { + "epoch": 0.15, + "grad_norm": 0.6834067702293396, + "learning_rate": 1.904599739658521e-05, + "loss": 2.2767, + "step": 4643 + }, + { + "epoch": 0.15, + "grad_norm": 0.6739209294319153, + "learning_rate": 1.904554427459591e-05, + "loss": 2.1694, + "step": 4644 + }, + { + "epoch": 0.15, + "grad_norm": 0.6868010759353638, + "learning_rate": 1.904509105041535e-05, + "loss": 2.2444, + "step": 4645 + }, + { + "epoch": 0.15, + "grad_norm": 0.6862638592720032, + "learning_rate": 1.9044637724048654e-05, + "loss": 2.2702, + "step": 4646 + }, + { + "epoch": 0.15, + "grad_norm": 0.694426953792572, + "learning_rate": 1.904418429550094e-05, + "loss": 2.2437, + "step": 4647 + }, + { + "epoch": 0.15, + "grad_norm": 0.7224493026733398, + "learning_rate": 1.9043730764777335e-05, + "loss": 2.2029, + "step": 4648 + }, + { + "epoch": 0.15, + "grad_norm": 0.6839559674263, + "learning_rate": 1.9043277131882957e-05, + "loss": 2.2095, + "step": 4649 + }, + { + "epoch": 0.15, + "grad_norm": 0.6855702996253967, + "learning_rate": 1.9042823396822935e-05, + "loss": 2.2297, + "step": 4650 + }, + { + "epoch": 0.15, + "grad_norm": 0.7167149782180786, + "learning_rate": 1.904236955960239e-05, + "loss": 2.2956, + "step": 4651 + }, + { + "epoch": 0.15, + "grad_norm": 0.697726845741272, + "learning_rate": 1.9041915620226458e-05, + "loss": 2.2994, + "step": 4652 + }, + { + "epoch": 0.15, + "grad_norm": 0.7095721960067749, + "learning_rate": 1.9041461578700262e-05, + "loss": 2.2747, + "step": 4653 + }, + { + "epoch": 0.15, + "grad_norm": 0.6605771780014038, + "learning_rate": 1.9041007435028934e-05, + "loss": 2.1758, + "step": 4654 + }, + { + "epoch": 0.15, + "grad_norm": 0.6793345808982849, + "learning_rate": 1.90405531892176e-05, + "loss": 2.2372, + "step": 4655 + }, + { + "epoch": 0.15, + "grad_norm": 0.6980547308921814, + "learning_rate": 1.904009884127139e-05, + "loss": 2.2648, + "step": 4656 + }, + { + "epoch": 0.15, + "grad_norm": 0.6922035217285156, + "learning_rate": 1.9039644391195448e-05, + "loss": 2.178, + "step": 4657 + }, + { + "epoch": 0.15, + "grad_norm": 0.7471354007720947, + "learning_rate": 1.9039189838994895e-05, + "loss": 2.2931, + "step": 4658 + }, + { + "epoch": 0.16, + "grad_norm": 0.7050546407699585, + "learning_rate": 1.9038735184674877e-05, + "loss": 2.2606, + "step": 4659 + }, + { + "epoch": 0.16, + "grad_norm": 0.6797491312026978, + "learning_rate": 1.9038280428240528e-05, + "loss": 2.1881, + "step": 4660 + }, + { + "epoch": 0.16, + "grad_norm": 0.6849299669265747, + "learning_rate": 1.903782556969698e-05, + "loss": 2.2772, + "step": 4661 + }, + { + "epoch": 0.16, + "grad_norm": 0.6662061810493469, + "learning_rate": 1.9037370609049377e-05, + "loss": 2.1805, + "step": 4662 + }, + { + "epoch": 0.16, + "grad_norm": 0.6830024719238281, + "learning_rate": 1.9036915546302856e-05, + "loss": 2.1578, + "step": 4663 + }, + { + "epoch": 0.16, + "grad_norm": 0.6732627749443054, + "learning_rate": 1.903646038146256e-05, + "loss": 2.1806, + "step": 4664 + }, + { + "epoch": 0.16, + "grad_norm": 0.6966559886932373, + "learning_rate": 1.9036005114533633e-05, + "loss": 2.2785, + "step": 4665 + }, + { + "epoch": 0.16, + "grad_norm": 0.7576621174812317, + "learning_rate": 1.903554974552121e-05, + "loss": 2.2234, + "step": 4666 + }, + { + "epoch": 0.16, + "grad_norm": 0.69753497838974, + "learning_rate": 1.903509427443045e-05, + "loss": 2.2634, + "step": 4667 + }, + { + "epoch": 0.16, + "grad_norm": 0.691307783126831, + "learning_rate": 1.903463870126648e-05, + "loss": 2.1989, + "step": 4668 + }, + { + "epoch": 0.16, + "grad_norm": 0.6854883432388306, + "learning_rate": 1.9034183026034464e-05, + "loss": 2.2481, + "step": 4669 + }, + { + "epoch": 0.16, + "grad_norm": 0.7330436706542969, + "learning_rate": 1.903372724873954e-05, + "loss": 2.3033, + "step": 4670 + }, + { + "epoch": 0.16, + "grad_norm": 0.6856635808944702, + "learning_rate": 1.9033271369386857e-05, + "loss": 2.2615, + "step": 4671 + }, + { + "epoch": 0.16, + "grad_norm": 0.7164098620414734, + "learning_rate": 1.9032815387981574e-05, + "loss": 2.1888, + "step": 4672 + }, + { + "epoch": 0.16, + "grad_norm": 0.7534990906715393, + "learning_rate": 1.9032359304528835e-05, + "loss": 2.206, + "step": 4673 + }, + { + "epoch": 0.16, + "grad_norm": 0.6944230794906616, + "learning_rate": 1.903190311903379e-05, + "loss": 2.2532, + "step": 4674 + }, + { + "epoch": 0.16, + "grad_norm": 0.6585599780082703, + "learning_rate": 1.90314468315016e-05, + "loss": 2.266, + "step": 4675 + }, + { + "epoch": 0.16, + "grad_norm": 0.6750780344009399, + "learning_rate": 1.9030990441937415e-05, + "loss": 2.2157, + "step": 4676 + }, + { + "epoch": 0.16, + "grad_norm": 0.6668248176574707, + "learning_rate": 1.9030533950346397e-05, + "loss": 2.2001, + "step": 4677 + }, + { + "epoch": 0.16, + "grad_norm": 0.676856279373169, + "learning_rate": 1.9030077356733695e-05, + "loss": 2.2057, + "step": 4678 + }, + { + "epoch": 0.16, + "grad_norm": 0.7172597050666809, + "learning_rate": 1.902962066110447e-05, + "loss": 2.2366, + "step": 4679 + }, + { + "epoch": 0.16, + "grad_norm": 0.6745300889015198, + "learning_rate": 1.9029163863463885e-05, + "loss": 2.2293, + "step": 4680 + }, + { + "epoch": 0.16, + "grad_norm": 0.6895262002944946, + "learning_rate": 1.90287069638171e-05, + "loss": 2.2089, + "step": 4681 + }, + { + "epoch": 0.16, + "grad_norm": 0.6986544728279114, + "learning_rate": 1.902824996216927e-05, + "loss": 2.1799, + "step": 4682 + }, + { + "epoch": 0.16, + "grad_norm": 0.6920831799507141, + "learning_rate": 1.9027792858525567e-05, + "loss": 2.2806, + "step": 4683 + }, + { + "epoch": 0.16, + "grad_norm": 0.7168166041374207, + "learning_rate": 1.9027335652891148e-05, + "loss": 2.2487, + "step": 4684 + }, + { + "epoch": 0.16, + "grad_norm": 0.6964851021766663, + "learning_rate": 1.9026878345271184e-05, + "loss": 2.1965, + "step": 4685 + }, + { + "epoch": 0.16, + "grad_norm": 0.6743322014808655, + "learning_rate": 1.9026420935670838e-05, + "loss": 2.2135, + "step": 4686 + }, + { + "epoch": 0.16, + "grad_norm": 0.7139073610305786, + "learning_rate": 1.902596342409528e-05, + "loss": 2.2628, + "step": 4687 + }, + { + "epoch": 0.16, + "grad_norm": 0.6876122355461121, + "learning_rate": 1.9025505810549673e-05, + "loss": 2.2583, + "step": 4688 + }, + { + "epoch": 0.16, + "grad_norm": 0.7109939455986023, + "learning_rate": 1.9025048095039194e-05, + "loss": 2.2455, + "step": 4689 + }, + { + "epoch": 0.16, + "grad_norm": 0.6654071807861328, + "learning_rate": 1.9024590277569007e-05, + "loss": 2.261, + "step": 4690 + }, + { + "epoch": 0.16, + "grad_norm": 0.6604311466217041, + "learning_rate": 1.902413235814429e-05, + "loss": 2.1672, + "step": 4691 + }, + { + "epoch": 0.16, + "grad_norm": 0.681565523147583, + "learning_rate": 1.9023674336770218e-05, + "loss": 2.2215, + "step": 4692 + }, + { + "epoch": 0.16, + "grad_norm": 0.6992550492286682, + "learning_rate": 1.9023216213451962e-05, + "loss": 2.2648, + "step": 4693 + }, + { + "epoch": 0.16, + "grad_norm": 0.6947644948959351, + "learning_rate": 1.902275798819469e-05, + "loss": 2.2481, + "step": 4694 + }, + { + "epoch": 0.16, + "grad_norm": 0.7007181644439697, + "learning_rate": 1.902229966100359e-05, + "loss": 2.2614, + "step": 4695 + }, + { + "epoch": 0.16, + "grad_norm": 0.6869601011276245, + "learning_rate": 1.9021841231883838e-05, + "loss": 2.2218, + "step": 4696 + }, + { + "epoch": 0.16, + "grad_norm": 0.6765796542167664, + "learning_rate": 1.9021382700840608e-05, + "loss": 2.187, + "step": 4697 + }, + { + "epoch": 0.16, + "grad_norm": 0.7050783038139343, + "learning_rate": 1.9020924067879084e-05, + "loss": 2.252, + "step": 4698 + }, + { + "epoch": 0.16, + "grad_norm": 0.689795196056366, + "learning_rate": 1.9020465333004447e-05, + "loss": 2.2411, + "step": 4699 + }, + { + "epoch": 0.16, + "grad_norm": 0.7019768357276917, + "learning_rate": 1.9020006496221883e-05, + "loss": 2.1898, + "step": 4700 + }, + { + "epoch": 0.16, + "grad_norm": 0.7195585370063782, + "learning_rate": 1.9019547557536566e-05, + "loss": 2.2732, + "step": 4701 + }, + { + "epoch": 0.16, + "grad_norm": 0.6751661896705627, + "learning_rate": 1.901908851695369e-05, + "loss": 2.1515, + "step": 4702 + }, + { + "epoch": 0.16, + "grad_norm": 0.7056312561035156, + "learning_rate": 1.9018629374478437e-05, + "loss": 2.127, + "step": 4703 + }, + { + "epoch": 0.16, + "grad_norm": 0.7532898187637329, + "learning_rate": 1.9018170130115995e-05, + "loss": 2.2179, + "step": 4704 + }, + { + "epoch": 0.16, + "grad_norm": 0.7017446160316467, + "learning_rate": 1.901771078387155e-05, + "loss": 2.2784, + "step": 4705 + }, + { + "epoch": 0.16, + "grad_norm": 0.682104766368866, + "learning_rate": 1.9017251335750295e-05, + "loss": 2.1998, + "step": 4706 + }, + { + "epoch": 0.16, + "grad_norm": 0.6823832392692566, + "learning_rate": 1.901679178575742e-05, + "loss": 2.2293, + "step": 4707 + }, + { + "epoch": 0.16, + "grad_norm": 0.7049028873443604, + "learning_rate": 1.901633213389811e-05, + "loss": 2.2029, + "step": 4708 + }, + { + "epoch": 0.16, + "grad_norm": 0.7099496126174927, + "learning_rate": 1.9015872380177574e-05, + "loss": 2.2673, + "step": 4709 + }, + { + "epoch": 0.16, + "grad_norm": 0.7724513411521912, + "learning_rate": 1.9015412524600986e-05, + "loss": 2.2185, + "step": 4710 + }, + { + "epoch": 0.16, + "grad_norm": 0.7045884132385254, + "learning_rate": 1.901495256717356e-05, + "loss": 2.2328, + "step": 4711 + }, + { + "epoch": 0.16, + "grad_norm": 0.6682022213935852, + "learning_rate": 1.9014492507900475e-05, + "loss": 2.2218, + "step": 4712 + }, + { + "epoch": 0.16, + "grad_norm": 0.6990941166877747, + "learning_rate": 1.901403234678694e-05, + "loss": 2.27, + "step": 4713 + }, + { + "epoch": 0.16, + "grad_norm": 0.7037197351455688, + "learning_rate": 1.901357208383815e-05, + "loss": 2.1981, + "step": 4714 + }, + { + "epoch": 0.16, + "grad_norm": 0.6634253263473511, + "learning_rate": 1.9013111719059306e-05, + "loss": 2.246, + "step": 4715 + }, + { + "epoch": 0.16, + "grad_norm": 0.7547180652618408, + "learning_rate": 1.9012651252455606e-05, + "loss": 2.2939, + "step": 4716 + }, + { + "epoch": 0.16, + "grad_norm": 0.7243127822875977, + "learning_rate": 1.9012190684032255e-05, + "loss": 2.3154, + "step": 4717 + }, + { + "epoch": 0.16, + "grad_norm": 0.7080589532852173, + "learning_rate": 1.9011730013794455e-05, + "loss": 2.2295, + "step": 4718 + }, + { + "epoch": 0.16, + "grad_norm": 0.6812245845794678, + "learning_rate": 1.9011269241747412e-05, + "loss": 2.2276, + "step": 4719 + }, + { + "epoch": 0.16, + "grad_norm": 0.6937214136123657, + "learning_rate": 1.9010808367896326e-05, + "loss": 2.2944, + "step": 4720 + }, + { + "epoch": 0.16, + "grad_norm": 0.7501614093780518, + "learning_rate": 1.9010347392246414e-05, + "loss": 2.2433, + "step": 4721 + }, + { + "epoch": 0.16, + "grad_norm": 0.6915141940116882, + "learning_rate": 1.9009886314802875e-05, + "loss": 2.2183, + "step": 4722 + }, + { + "epoch": 0.16, + "grad_norm": 0.6897430419921875, + "learning_rate": 1.9009425135570923e-05, + "loss": 2.1968, + "step": 4723 + }, + { + "epoch": 0.16, + "grad_norm": 0.7018092274665833, + "learning_rate": 1.900896385455576e-05, + "loss": 2.2467, + "step": 4724 + }, + { + "epoch": 0.16, + "grad_norm": 0.6967020034790039, + "learning_rate": 1.9008502471762608e-05, + "loss": 2.1736, + "step": 4725 + }, + { + "epoch": 0.16, + "grad_norm": 0.6835813522338867, + "learning_rate": 1.900804098719667e-05, + "loss": 2.127, + "step": 4726 + }, + { + "epoch": 0.16, + "grad_norm": 0.7201195955276489, + "learning_rate": 1.900757940086317e-05, + "loss": 2.2164, + "step": 4727 + }, + { + "epoch": 0.16, + "grad_norm": 0.7513042688369751, + "learning_rate": 1.9007117712767315e-05, + "loss": 2.2299, + "step": 4728 + }, + { + "epoch": 0.16, + "grad_norm": 0.6663387417793274, + "learning_rate": 1.9006655922914322e-05, + "loss": 2.2264, + "step": 4729 + }, + { + "epoch": 0.16, + "grad_norm": 0.7091827392578125, + "learning_rate": 1.9006194031309412e-05, + "loss": 2.2036, + "step": 4730 + }, + { + "epoch": 0.16, + "grad_norm": 0.6978490352630615, + "learning_rate": 1.9005732037957797e-05, + "loss": 2.2253, + "step": 4731 + }, + { + "epoch": 0.16, + "grad_norm": 0.6767582297325134, + "learning_rate": 1.9005269942864697e-05, + "loss": 2.2531, + "step": 4732 + }, + { + "epoch": 0.16, + "grad_norm": 0.7303970456123352, + "learning_rate": 1.9004807746035338e-05, + "loss": 2.2603, + "step": 4733 + }, + { + "epoch": 0.16, + "grad_norm": 0.7422457337379456, + "learning_rate": 1.9004345447474936e-05, + "loss": 2.2301, + "step": 4734 + }, + { + "epoch": 0.16, + "grad_norm": 0.719879150390625, + "learning_rate": 1.9003883047188717e-05, + "loss": 2.2006, + "step": 4735 + }, + { + "epoch": 0.16, + "grad_norm": 0.7116043567657471, + "learning_rate": 1.9003420545181904e-05, + "loss": 2.2767, + "step": 4736 + }, + { + "epoch": 0.16, + "grad_norm": 0.6684793829917908, + "learning_rate": 1.9002957941459726e-05, + "loss": 2.2467, + "step": 4737 + }, + { + "epoch": 0.16, + "grad_norm": 0.7001445293426514, + "learning_rate": 1.90024952360274e-05, + "loss": 2.2611, + "step": 4738 + }, + { + "epoch": 0.16, + "grad_norm": 0.7057059407234192, + "learning_rate": 1.9002032428890162e-05, + "loss": 2.2866, + "step": 4739 + }, + { + "epoch": 0.16, + "grad_norm": 0.7199113965034485, + "learning_rate": 1.9001569520053235e-05, + "loss": 2.2214, + "step": 4740 + }, + { + "epoch": 0.16, + "grad_norm": 0.6772589087486267, + "learning_rate": 1.9001106509521854e-05, + "loss": 2.2293, + "step": 4741 + }, + { + "epoch": 0.16, + "grad_norm": 0.703598141670227, + "learning_rate": 1.9000643397301248e-05, + "loss": 2.2667, + "step": 4742 + }, + { + "epoch": 0.16, + "grad_norm": 0.7413385510444641, + "learning_rate": 1.9000180183396643e-05, + "loss": 2.2312, + "step": 4743 + }, + { + "epoch": 0.16, + "grad_norm": 0.6944820284843445, + "learning_rate": 1.899971686781328e-05, + "loss": 2.1681, + "step": 4744 + }, + { + "epoch": 0.16, + "grad_norm": 0.6855244636535645, + "learning_rate": 1.899925345055639e-05, + "loss": 2.2931, + "step": 4745 + }, + { + "epoch": 0.16, + "grad_norm": 0.6820714473724365, + "learning_rate": 1.8998789931631205e-05, + "loss": 2.3185, + "step": 4746 + }, + { + "epoch": 0.16, + "grad_norm": 0.6639246344566345, + "learning_rate": 1.899832631104297e-05, + "loss": 2.2587, + "step": 4747 + }, + { + "epoch": 0.16, + "grad_norm": 0.7517090439796448, + "learning_rate": 1.8997862588796914e-05, + "loss": 2.222, + "step": 4748 + }, + { + "epoch": 0.16, + "grad_norm": 0.7011862993240356, + "learning_rate": 1.8997398764898283e-05, + "loss": 2.256, + "step": 4749 + }, + { + "epoch": 0.16, + "grad_norm": 0.689825713634491, + "learning_rate": 1.899693483935231e-05, + "loss": 2.1859, + "step": 4750 + }, + { + "epoch": 0.16, + "grad_norm": 0.7003384828567505, + "learning_rate": 1.8996470812164244e-05, + "loss": 2.2672, + "step": 4751 + }, + { + "epoch": 0.16, + "grad_norm": 0.6527851819992065, + "learning_rate": 1.8996006683339323e-05, + "loss": 2.2509, + "step": 4752 + }, + { + "epoch": 0.16, + "grad_norm": 0.7032826542854309, + "learning_rate": 1.899554245288279e-05, + "loss": 2.2519, + "step": 4753 + }, + { + "epoch": 0.16, + "grad_norm": 0.6933138966560364, + "learning_rate": 1.899507812079989e-05, + "loss": 2.2572, + "step": 4754 + }, + { + "epoch": 0.16, + "grad_norm": 0.6778034567832947, + "learning_rate": 1.8994613687095866e-05, + "loss": 2.1596, + "step": 4755 + }, + { + "epoch": 0.16, + "grad_norm": 0.7171981334686279, + "learning_rate": 1.899414915177597e-05, + "loss": 2.2819, + "step": 4756 + }, + { + "epoch": 0.16, + "grad_norm": 0.681999921798706, + "learning_rate": 1.899368451484545e-05, + "loss": 2.2006, + "step": 4757 + }, + { + "epoch": 0.16, + "grad_norm": 0.7144233584403992, + "learning_rate": 1.899321977630955e-05, + "loss": 2.3002, + "step": 4758 + }, + { + "epoch": 0.16, + "grad_norm": 0.7501667737960815, + "learning_rate": 1.8992754936173525e-05, + "loss": 2.2388, + "step": 4759 + }, + { + "epoch": 0.16, + "grad_norm": 0.7081283330917358, + "learning_rate": 1.8992289994442624e-05, + "loss": 2.2208, + "step": 4760 + }, + { + "epoch": 0.16, + "grad_norm": 0.7322137951850891, + "learning_rate": 1.89918249511221e-05, + "loss": 2.203, + "step": 4761 + }, + { + "epoch": 0.16, + "grad_norm": 0.6836585998535156, + "learning_rate": 1.899135980621721e-05, + "loss": 2.2255, + "step": 4762 + }, + { + "epoch": 0.16, + "grad_norm": 0.6888043284416199, + "learning_rate": 1.8990894559733207e-05, + "loss": 2.2524, + "step": 4763 + }, + { + "epoch": 0.16, + "grad_norm": 0.666958749294281, + "learning_rate": 1.8990429211675346e-05, + "loss": 2.1743, + "step": 4764 + }, + { + "epoch": 0.16, + "grad_norm": 0.6876826882362366, + "learning_rate": 1.8989963762048883e-05, + "loss": 2.2047, + "step": 4765 + }, + { + "epoch": 0.16, + "grad_norm": 0.7631163001060486, + "learning_rate": 1.8989498210859077e-05, + "loss": 2.2205, + "step": 4766 + }, + { + "epoch": 0.16, + "grad_norm": 0.7583569288253784, + "learning_rate": 1.8989032558111193e-05, + "loss": 2.2258, + "step": 4767 + }, + { + "epoch": 0.16, + "grad_norm": 0.7457662224769592, + "learning_rate": 1.8988566803810486e-05, + "loss": 2.2954, + "step": 4768 + }, + { + "epoch": 0.16, + "grad_norm": 0.7219420075416565, + "learning_rate": 1.8988100947962214e-05, + "loss": 2.2765, + "step": 4769 + }, + { + "epoch": 0.16, + "grad_norm": 0.6786173582077026, + "learning_rate": 1.898763499057165e-05, + "loss": 2.1854, + "step": 4770 + }, + { + "epoch": 0.16, + "grad_norm": 0.6997213959693909, + "learning_rate": 1.898716893164405e-05, + "loss": 2.2406, + "step": 4771 + }, + { + "epoch": 0.16, + "grad_norm": 0.6940840482711792, + "learning_rate": 1.8986702771184685e-05, + "loss": 2.222, + "step": 4772 + }, + { + "epoch": 0.16, + "grad_norm": 0.659393310546875, + "learning_rate": 1.898623650919882e-05, + "loss": 2.2632, + "step": 4773 + }, + { + "epoch": 0.16, + "grad_norm": 0.6857128739356995, + "learning_rate": 1.898577014569172e-05, + "loss": 2.2292, + "step": 4774 + }, + { + "epoch": 0.16, + "grad_norm": 0.6805195212364197, + "learning_rate": 1.898530368066865e-05, + "loss": 2.23, + "step": 4775 + }, + { + "epoch": 0.16, + "grad_norm": 0.6657445430755615, + "learning_rate": 1.8984837114134894e-05, + "loss": 2.2716, + "step": 4776 + }, + { + "epoch": 0.16, + "grad_norm": 0.6844924092292786, + "learning_rate": 1.8984370446095708e-05, + "loss": 2.2397, + "step": 4777 + }, + { + "epoch": 0.16, + "grad_norm": 0.7198789119720459, + "learning_rate": 1.898390367655637e-05, + "loss": 2.2229, + "step": 4778 + }, + { + "epoch": 0.16, + "grad_norm": 0.7239323258399963, + "learning_rate": 1.8983436805522157e-05, + "loss": 2.2944, + "step": 4779 + }, + { + "epoch": 0.16, + "grad_norm": 0.709490954875946, + "learning_rate": 1.8982969832998336e-05, + "loss": 2.2577, + "step": 4780 + }, + { + "epoch": 0.16, + "grad_norm": 0.6862909197807312, + "learning_rate": 1.8982502758990187e-05, + "loss": 2.1898, + "step": 4781 + }, + { + "epoch": 0.16, + "grad_norm": 0.6763822436332703, + "learning_rate": 1.8982035583502987e-05, + "loss": 2.1829, + "step": 4782 + }, + { + "epoch": 0.16, + "grad_norm": 0.7010238170623779, + "learning_rate": 1.8981568306542014e-05, + "loss": 2.2036, + "step": 4783 + }, + { + "epoch": 0.16, + "grad_norm": 0.6869719624519348, + "learning_rate": 1.8981100928112544e-05, + "loss": 2.2641, + "step": 4784 + }, + { + "epoch": 0.16, + "grad_norm": 0.6763965487480164, + "learning_rate": 1.8980633448219857e-05, + "loss": 2.2742, + "step": 4785 + }, + { + "epoch": 0.16, + "grad_norm": 0.6951785683631897, + "learning_rate": 1.8980165866869242e-05, + "loss": 2.2348, + "step": 4786 + }, + { + "epoch": 0.16, + "grad_norm": 0.7116746306419373, + "learning_rate": 1.8979698184065974e-05, + "loss": 2.229, + "step": 4787 + }, + { + "epoch": 0.16, + "grad_norm": 0.6824199557304382, + "learning_rate": 1.8979230399815338e-05, + "loss": 2.1917, + "step": 4788 + }, + { + "epoch": 0.16, + "grad_norm": 0.7191827297210693, + "learning_rate": 1.897876251412262e-05, + "loss": 2.2197, + "step": 4789 + }, + { + "epoch": 0.16, + "grad_norm": 0.7147292494773865, + "learning_rate": 1.8978294526993103e-05, + "loss": 2.2748, + "step": 4790 + }, + { + "epoch": 0.16, + "grad_norm": 0.7100715637207031, + "learning_rate": 1.8977826438432078e-05, + "loss": 2.1955, + "step": 4791 + }, + { + "epoch": 0.16, + "grad_norm": 0.7220259308815002, + "learning_rate": 1.897735824844483e-05, + "loss": 2.2544, + "step": 4792 + }, + { + "epoch": 0.16, + "grad_norm": 0.706153154373169, + "learning_rate": 1.897688995703665e-05, + "loss": 2.2283, + "step": 4793 + }, + { + "epoch": 0.16, + "grad_norm": 0.7048121690750122, + "learning_rate": 1.897642156421283e-05, + "loss": 2.2328, + "step": 4794 + }, + { + "epoch": 0.16, + "grad_norm": 0.7308088541030884, + "learning_rate": 1.8975953069978658e-05, + "loss": 2.2408, + "step": 4795 + }, + { + "epoch": 0.16, + "grad_norm": 0.6967409253120422, + "learning_rate": 1.897548447433943e-05, + "loss": 2.1815, + "step": 4796 + }, + { + "epoch": 0.16, + "grad_norm": 0.7581714987754822, + "learning_rate": 1.897501577730044e-05, + "loss": 2.1975, + "step": 4797 + }, + { + "epoch": 0.16, + "grad_norm": 0.6834259629249573, + "learning_rate": 1.897454697886698e-05, + "loss": 2.2592, + "step": 4798 + }, + { + "epoch": 0.16, + "grad_norm": 0.7299399971961975, + "learning_rate": 1.8974078079044347e-05, + "loss": 2.2586, + "step": 4799 + }, + { + "epoch": 0.16, + "grad_norm": 0.7129775881767273, + "learning_rate": 1.8973609077837837e-05, + "loss": 2.2026, + "step": 4800 + }, + { + "epoch": 0.16, + "grad_norm": 0.6958067417144775, + "learning_rate": 1.8973139975252756e-05, + "loss": 2.2811, + "step": 4801 + }, + { + "epoch": 0.16, + "grad_norm": 0.7096580266952515, + "learning_rate": 1.89726707712944e-05, + "loss": 2.1941, + "step": 4802 + }, + { + "epoch": 0.16, + "grad_norm": 0.6954323053359985, + "learning_rate": 1.8972201465968058e-05, + "loss": 2.1811, + "step": 4803 + }, + { + "epoch": 0.16, + "grad_norm": 0.7187135219573975, + "learning_rate": 1.897173205927905e-05, + "loss": 2.1819, + "step": 4804 + }, + { + "epoch": 0.16, + "grad_norm": 0.6987690925598145, + "learning_rate": 1.897126255123267e-05, + "loss": 2.1495, + "step": 4805 + }, + { + "epoch": 0.16, + "grad_norm": 0.6783400177955627, + "learning_rate": 1.897079294183422e-05, + "loss": 2.2867, + "step": 4806 + }, + { + "epoch": 0.16, + "grad_norm": 0.7092986702919006, + "learning_rate": 1.8970323231089013e-05, + "loss": 2.2398, + "step": 4807 + }, + { + "epoch": 0.16, + "grad_norm": 0.7376054525375366, + "learning_rate": 1.8969853419002348e-05, + "loss": 2.285, + "step": 4808 + }, + { + "epoch": 0.16, + "grad_norm": 0.7156516313552856, + "learning_rate": 1.8969383505579538e-05, + "loss": 2.1289, + "step": 4809 + }, + { + "epoch": 0.16, + "grad_norm": 0.6990901231765747, + "learning_rate": 1.896891349082589e-05, + "loss": 2.3434, + "step": 4810 + }, + { + "epoch": 0.16, + "grad_norm": 0.6845041513442993, + "learning_rate": 1.8968443374746712e-05, + "loss": 2.219, + "step": 4811 + }, + { + "epoch": 0.16, + "grad_norm": 0.7466919422149658, + "learning_rate": 1.896797315734732e-05, + "loss": 2.2279, + "step": 4812 + }, + { + "epoch": 0.16, + "grad_norm": 0.685339629650116, + "learning_rate": 1.896750283863302e-05, + "loss": 2.1961, + "step": 4813 + }, + { + "epoch": 0.16, + "grad_norm": 0.7060288786888123, + "learning_rate": 1.8967032418609128e-05, + "loss": 2.1464, + "step": 4814 + }, + { + "epoch": 0.16, + "grad_norm": 0.7408494353294373, + "learning_rate": 1.896656189728096e-05, + "loss": 2.2185, + "step": 4815 + }, + { + "epoch": 0.16, + "grad_norm": 0.7651981711387634, + "learning_rate": 1.896609127465383e-05, + "loss": 2.2012, + "step": 4816 + }, + { + "epoch": 0.16, + "grad_norm": 0.7010645866394043, + "learning_rate": 1.8965620550733055e-05, + "loss": 2.2667, + "step": 4817 + }, + { + "epoch": 0.16, + "grad_norm": 0.7302684187889099, + "learning_rate": 1.8965149725523953e-05, + "loss": 2.2955, + "step": 4818 + }, + { + "epoch": 0.16, + "grad_norm": 0.6990823745727539, + "learning_rate": 1.8964678799031846e-05, + "loss": 2.2571, + "step": 4819 + }, + { + "epoch": 0.16, + "grad_norm": 0.7016177773475647, + "learning_rate": 1.896420777126205e-05, + "loss": 2.2137, + "step": 4820 + }, + { + "epoch": 0.16, + "grad_norm": 0.7371472716331482, + "learning_rate": 1.896373664221989e-05, + "loss": 2.1897, + "step": 4821 + }, + { + "epoch": 0.16, + "grad_norm": 0.7481779456138611, + "learning_rate": 1.896326541191069e-05, + "loss": 2.2554, + "step": 4822 + }, + { + "epoch": 0.16, + "grad_norm": 0.6907321810722351, + "learning_rate": 1.8962794080339765e-05, + "loss": 2.1911, + "step": 4823 + }, + { + "epoch": 0.16, + "grad_norm": 0.733113706111908, + "learning_rate": 1.8962322647512442e-05, + "loss": 2.2483, + "step": 4824 + }, + { + "epoch": 0.16, + "grad_norm": 0.6961241364479065, + "learning_rate": 1.8961851113434054e-05, + "loss": 2.2809, + "step": 4825 + }, + { + "epoch": 0.16, + "grad_norm": 0.7328378558158875, + "learning_rate": 1.8961379478109926e-05, + "loss": 2.1666, + "step": 4826 + }, + { + "epoch": 0.16, + "grad_norm": 0.7186113595962524, + "learning_rate": 1.8960907741545384e-05, + "loss": 2.2388, + "step": 4827 + }, + { + "epoch": 0.16, + "grad_norm": 0.696742594242096, + "learning_rate": 1.896043590374576e-05, + "loss": 2.2255, + "step": 4828 + }, + { + "epoch": 0.16, + "grad_norm": 0.7496445178985596, + "learning_rate": 1.8959963964716375e-05, + "loss": 2.1754, + "step": 4829 + }, + { + "epoch": 0.16, + "grad_norm": 0.7089710235595703, + "learning_rate": 1.8959491924462573e-05, + "loss": 2.2434, + "step": 4830 + }, + { + "epoch": 0.16, + "grad_norm": 0.7051609754562378, + "learning_rate": 1.8959019782989682e-05, + "loss": 2.2, + "step": 4831 + }, + { + "epoch": 0.16, + "grad_norm": 0.7148686647415161, + "learning_rate": 1.8958547540303035e-05, + "loss": 2.2332, + "step": 4832 + }, + { + "epoch": 0.16, + "grad_norm": 0.6751871705055237, + "learning_rate": 1.895807519640797e-05, + "loss": 2.3279, + "step": 4833 + }, + { + "epoch": 0.16, + "grad_norm": 0.6686996817588806, + "learning_rate": 1.8957602751309817e-05, + "loss": 2.2654, + "step": 4834 + }, + { + "epoch": 0.16, + "grad_norm": 0.6863821148872375, + "learning_rate": 1.8957130205013924e-05, + "loss": 2.2506, + "step": 4835 + }, + { + "epoch": 0.16, + "grad_norm": 0.7030407786369324, + "learning_rate": 1.895665755752562e-05, + "loss": 2.2566, + "step": 4836 + }, + { + "epoch": 0.16, + "grad_norm": 0.7643577456474304, + "learning_rate": 1.8956184808850246e-05, + "loss": 2.2525, + "step": 4837 + }, + { + "epoch": 0.16, + "grad_norm": 0.6815815567970276, + "learning_rate": 1.8955711958993148e-05, + "loss": 2.2123, + "step": 4838 + }, + { + "epoch": 0.16, + "grad_norm": 0.7061907052993774, + "learning_rate": 1.8955239007959665e-05, + "loss": 2.2259, + "step": 4839 + }, + { + "epoch": 0.16, + "grad_norm": 0.7244454622268677, + "learning_rate": 1.895476595575514e-05, + "loss": 2.1651, + "step": 4840 + }, + { + "epoch": 0.16, + "grad_norm": 0.6943740248680115, + "learning_rate": 1.8954292802384916e-05, + "loss": 2.2313, + "step": 4841 + }, + { + "epoch": 0.16, + "grad_norm": 0.6884348392486572, + "learning_rate": 1.895381954785434e-05, + "loss": 2.2947, + "step": 4842 + }, + { + "epoch": 0.16, + "grad_norm": 0.693078875541687, + "learning_rate": 1.8953346192168756e-05, + "loss": 2.2086, + "step": 4843 + }, + { + "epoch": 0.16, + "grad_norm": 0.6925577521324158, + "learning_rate": 1.8952872735333516e-05, + "loss": 2.2012, + "step": 4844 + }, + { + "epoch": 0.16, + "grad_norm": 0.6842532753944397, + "learning_rate": 1.895239917735397e-05, + "loss": 2.2253, + "step": 4845 + }, + { + "epoch": 0.16, + "grad_norm": 0.6860787868499756, + "learning_rate": 1.8951925518235458e-05, + "loss": 2.2174, + "step": 4846 + }, + { + "epoch": 0.16, + "grad_norm": 0.7333983778953552, + "learning_rate": 1.8951451757983343e-05, + "loss": 2.2143, + "step": 4847 + }, + { + "epoch": 0.16, + "grad_norm": 0.6927983164787292, + "learning_rate": 1.8950977896602968e-05, + "loss": 2.271, + "step": 4848 + }, + { + "epoch": 0.16, + "grad_norm": 0.6859998106956482, + "learning_rate": 1.8950503934099697e-05, + "loss": 2.2854, + "step": 4849 + }, + { + "epoch": 0.16, + "grad_norm": 0.7233844995498657, + "learning_rate": 1.895002987047887e-05, + "loss": 2.2643, + "step": 4850 + }, + { + "epoch": 0.16, + "grad_norm": 0.6846117377281189, + "learning_rate": 1.8949555705745855e-05, + "loss": 2.275, + "step": 4851 + }, + { + "epoch": 0.16, + "grad_norm": 0.6728076934814453, + "learning_rate": 1.8949081439906005e-05, + "loss": 2.2382, + "step": 4852 + }, + { + "epoch": 0.16, + "grad_norm": 0.715869665145874, + "learning_rate": 1.8948607072964678e-05, + "loss": 2.2466, + "step": 4853 + }, + { + "epoch": 0.16, + "grad_norm": 0.6812397837638855, + "learning_rate": 1.894813260492723e-05, + "loss": 2.2279, + "step": 4854 + }, + { + "epoch": 0.16, + "grad_norm": 0.7054184675216675, + "learning_rate": 1.8947658035799024e-05, + "loss": 2.2156, + "step": 4855 + }, + { + "epoch": 0.16, + "grad_norm": 0.6722304224967957, + "learning_rate": 1.8947183365585424e-05, + "loss": 2.2488, + "step": 4856 + }, + { + "epoch": 0.16, + "grad_norm": 0.6748642921447754, + "learning_rate": 1.8946708594291788e-05, + "loss": 2.2098, + "step": 4857 + }, + { + "epoch": 0.16, + "grad_norm": 0.6665319204330444, + "learning_rate": 1.8946233721923483e-05, + "loss": 2.3029, + "step": 4858 + }, + { + "epoch": 0.16, + "grad_norm": 0.6844080090522766, + "learning_rate": 1.894575874848587e-05, + "loss": 2.2601, + "step": 4859 + }, + { + "epoch": 0.16, + "grad_norm": 0.7068097591400146, + "learning_rate": 1.894528367398432e-05, + "loss": 2.1822, + "step": 4860 + }, + { + "epoch": 0.16, + "grad_norm": 0.6819680333137512, + "learning_rate": 1.89448084984242e-05, + "loss": 2.2628, + "step": 4861 + }, + { + "epoch": 0.16, + "grad_norm": 0.7051294445991516, + "learning_rate": 1.8944333221810872e-05, + "loss": 2.271, + "step": 4862 + }, + { + "epoch": 0.16, + "grad_norm": 0.6991629600524902, + "learning_rate": 1.8943857844149707e-05, + "loss": 2.3393, + "step": 4863 + }, + { + "epoch": 0.16, + "grad_norm": 0.7286848425865173, + "learning_rate": 1.894338236544608e-05, + "loss": 2.2781, + "step": 4864 + }, + { + "epoch": 0.16, + "grad_norm": 0.7007122039794922, + "learning_rate": 1.894290678570536e-05, + "loss": 2.181, + "step": 4865 + }, + { + "epoch": 0.16, + "grad_norm": 0.7174482345581055, + "learning_rate": 1.8942431104932923e-05, + "loss": 2.1817, + "step": 4866 + }, + { + "epoch": 0.16, + "grad_norm": 0.6742419600486755, + "learning_rate": 1.8941955323134138e-05, + "loss": 2.3149, + "step": 4867 + }, + { + "epoch": 0.16, + "grad_norm": 0.6836929321289062, + "learning_rate": 1.8941479440314385e-05, + "loss": 2.2309, + "step": 4868 + }, + { + "epoch": 0.16, + "grad_norm": 0.6904343366622925, + "learning_rate": 1.8941003456479034e-05, + "loss": 2.2103, + "step": 4869 + }, + { + "epoch": 0.16, + "grad_norm": 0.6752385497093201, + "learning_rate": 1.8940527371633463e-05, + "loss": 2.1965, + "step": 4870 + }, + { + "epoch": 0.16, + "grad_norm": 0.6568467020988464, + "learning_rate": 1.8940051185783058e-05, + "loss": 2.161, + "step": 4871 + }, + { + "epoch": 0.16, + "grad_norm": 0.6638485193252563, + "learning_rate": 1.8939574898933193e-05, + "loss": 2.2665, + "step": 4872 + }, + { + "epoch": 0.16, + "grad_norm": 0.6979497075080872, + "learning_rate": 1.8939098511089253e-05, + "loss": 2.2586, + "step": 4873 + }, + { + "epoch": 0.16, + "grad_norm": 0.7235592007637024, + "learning_rate": 1.8938622022256613e-05, + "loss": 2.2256, + "step": 4874 + }, + { + "epoch": 0.16, + "grad_norm": 0.7090440392494202, + "learning_rate": 1.8938145432440658e-05, + "loss": 2.2613, + "step": 4875 + }, + { + "epoch": 0.16, + "grad_norm": 0.6963095664978027, + "learning_rate": 1.8937668741646777e-05, + "loss": 2.2741, + "step": 4876 + }, + { + "epoch": 0.16, + "grad_norm": 0.6985675096511841, + "learning_rate": 1.8937191949880353e-05, + "loss": 2.2226, + "step": 4877 + }, + { + "epoch": 0.16, + "grad_norm": 0.7113465070724487, + "learning_rate": 1.8936715057146774e-05, + "loss": 2.2518, + "step": 4878 + }, + { + "epoch": 0.16, + "grad_norm": 0.6837702393531799, + "learning_rate": 1.8936238063451422e-05, + "loss": 2.2059, + "step": 4879 + }, + { + "epoch": 0.16, + "grad_norm": 0.6932180523872375, + "learning_rate": 1.893576096879969e-05, + "loss": 2.2262, + "step": 4880 + }, + { + "epoch": 0.16, + "grad_norm": 0.6973762512207031, + "learning_rate": 1.8935283773196968e-05, + "loss": 2.2418, + "step": 4881 + }, + { + "epoch": 0.16, + "grad_norm": 0.6797553896903992, + "learning_rate": 1.8934806476648648e-05, + "loss": 2.2563, + "step": 4882 + }, + { + "epoch": 0.16, + "grad_norm": 0.6827948093414307, + "learning_rate": 1.893432907916012e-05, + "loss": 2.1967, + "step": 4883 + }, + { + "epoch": 0.16, + "grad_norm": 0.6905988454818726, + "learning_rate": 1.8933851580736777e-05, + "loss": 2.2012, + "step": 4884 + }, + { + "epoch": 0.16, + "grad_norm": 0.6837722659111023, + "learning_rate": 1.8933373981384014e-05, + "loss": 2.2146, + "step": 4885 + }, + { + "epoch": 0.16, + "grad_norm": 0.6837047934532166, + "learning_rate": 1.893289628110723e-05, + "loss": 2.2211, + "step": 4886 + }, + { + "epoch": 0.16, + "grad_norm": 0.6742048859596252, + "learning_rate": 1.8932418479911817e-05, + "loss": 2.2403, + "step": 4887 + }, + { + "epoch": 0.16, + "grad_norm": 0.7212556004524231, + "learning_rate": 1.8931940577803173e-05, + "loss": 2.2334, + "step": 4888 + }, + { + "epoch": 0.16, + "grad_norm": 0.7001677751541138, + "learning_rate": 1.8931462574786705e-05, + "loss": 2.2099, + "step": 4889 + }, + { + "epoch": 0.16, + "grad_norm": 0.6931729912757874, + "learning_rate": 1.8930984470867802e-05, + "loss": 2.2813, + "step": 4890 + }, + { + "epoch": 0.16, + "grad_norm": 0.7120679020881653, + "learning_rate": 1.893050626605187e-05, + "loss": 2.2449, + "step": 4891 + }, + { + "epoch": 0.16, + "grad_norm": 0.7054172158241272, + "learning_rate": 1.8930027960344316e-05, + "loss": 2.1826, + "step": 4892 + }, + { + "epoch": 0.16, + "grad_norm": 0.758216917514801, + "learning_rate": 1.8929549553750537e-05, + "loss": 2.2148, + "step": 4893 + }, + { + "epoch": 0.16, + "grad_norm": 0.7101566791534424, + "learning_rate": 1.892907104627594e-05, + "loss": 2.1885, + "step": 4894 + }, + { + "epoch": 0.16, + "grad_norm": 0.6897265911102295, + "learning_rate": 1.8928592437925936e-05, + "loss": 2.2315, + "step": 4895 + }, + { + "epoch": 0.16, + "grad_norm": 0.7192511558532715, + "learning_rate": 1.8928113728705922e-05, + "loss": 2.258, + "step": 4896 + }, + { + "epoch": 0.16, + "grad_norm": 0.6910582780838013, + "learning_rate": 1.892763491862131e-05, + "loss": 2.1909, + "step": 4897 + }, + { + "epoch": 0.16, + "grad_norm": 0.6786988973617554, + "learning_rate": 1.8927156007677517e-05, + "loss": 2.14, + "step": 4898 + }, + { + "epoch": 0.16, + "grad_norm": 0.760339617729187, + "learning_rate": 1.8926676995879944e-05, + "loss": 2.1893, + "step": 4899 + }, + { + "epoch": 0.16, + "grad_norm": 0.6589984893798828, + "learning_rate": 1.8926197883234004e-05, + "loss": 2.2477, + "step": 4900 + }, + { + "epoch": 0.16, + "grad_norm": 0.705695629119873, + "learning_rate": 1.8925718669745116e-05, + "loss": 2.1911, + "step": 4901 + }, + { + "epoch": 0.16, + "grad_norm": 0.6967737674713135, + "learning_rate": 1.8925239355418687e-05, + "loss": 2.1795, + "step": 4902 + }, + { + "epoch": 0.16, + "grad_norm": 0.679197371006012, + "learning_rate": 1.8924759940260134e-05, + "loss": 2.187, + "step": 4903 + }, + { + "epoch": 0.16, + "grad_norm": 0.698905885219574, + "learning_rate": 1.8924280424274873e-05, + "loss": 2.2716, + "step": 4904 + }, + { + "epoch": 0.16, + "grad_norm": 0.6894533634185791, + "learning_rate": 1.8923800807468323e-05, + "loss": 2.2946, + "step": 4905 + }, + { + "epoch": 0.16, + "grad_norm": 0.727260172367096, + "learning_rate": 1.89233210898459e-05, + "loss": 2.1636, + "step": 4906 + }, + { + "epoch": 0.16, + "grad_norm": 0.6923341155052185, + "learning_rate": 1.892284127141303e-05, + "loss": 2.2301, + "step": 4907 + }, + { + "epoch": 0.16, + "grad_norm": 0.7053573727607727, + "learning_rate": 1.8922361352175124e-05, + "loss": 2.2577, + "step": 4908 + }, + { + "epoch": 0.16, + "grad_norm": 0.687829315662384, + "learning_rate": 1.8921881332137608e-05, + "loss": 2.2556, + "step": 4909 + }, + { + "epoch": 0.16, + "grad_norm": 0.7025995850563049, + "learning_rate": 1.8921401211305905e-05, + "loss": 2.2183, + "step": 4910 + }, + { + "epoch": 0.16, + "grad_norm": 0.7422037720680237, + "learning_rate": 1.8920920989685444e-05, + "loss": 2.3328, + "step": 4911 + }, + { + "epoch": 0.16, + "grad_norm": 0.6935421824455261, + "learning_rate": 1.8920440667281645e-05, + "loss": 2.2012, + "step": 4912 + }, + { + "epoch": 0.16, + "grad_norm": 0.7041882276535034, + "learning_rate": 1.8919960244099932e-05, + "loss": 2.2565, + "step": 4913 + }, + { + "epoch": 0.16, + "grad_norm": 0.6862683892250061, + "learning_rate": 1.8919479720145735e-05, + "loss": 2.2158, + "step": 4914 + }, + { + "epoch": 0.16, + "grad_norm": 0.6875916123390198, + "learning_rate": 1.8918999095424486e-05, + "loss": 2.1983, + "step": 4915 + }, + { + "epoch": 0.16, + "grad_norm": 0.6591494679450989, + "learning_rate": 1.891851836994161e-05, + "loss": 2.1778, + "step": 4916 + }, + { + "epoch": 0.16, + "grad_norm": 0.7151937484741211, + "learning_rate": 1.8918037543702543e-05, + "loss": 2.2424, + "step": 4917 + }, + { + "epoch": 0.16, + "grad_norm": 0.6980819702148438, + "learning_rate": 1.8917556616712715e-05, + "loss": 2.284, + "step": 4918 + }, + { + "epoch": 0.16, + "grad_norm": 0.6850806474685669, + "learning_rate": 1.8917075588977557e-05, + "loss": 2.2329, + "step": 4919 + }, + { + "epoch": 0.16, + "grad_norm": 0.7290889620780945, + "learning_rate": 1.8916594460502504e-05, + "loss": 2.2209, + "step": 4920 + }, + { + "epoch": 0.16, + "grad_norm": 0.7004249095916748, + "learning_rate": 1.8916113231292994e-05, + "loss": 2.1932, + "step": 4921 + }, + { + "epoch": 0.16, + "grad_norm": 0.6988550424575806, + "learning_rate": 1.891563190135446e-05, + "loss": 2.2506, + "step": 4922 + }, + { + "epoch": 0.16, + "grad_norm": 0.6866981387138367, + "learning_rate": 1.8915150470692344e-05, + "loss": 2.3055, + "step": 4923 + }, + { + "epoch": 0.16, + "grad_norm": 0.6938983201980591, + "learning_rate": 1.891466893931208e-05, + "loss": 2.2401, + "step": 4924 + }, + { + "epoch": 0.16, + "grad_norm": 0.6911777853965759, + "learning_rate": 1.8914187307219115e-05, + "loss": 2.2385, + "step": 4925 + }, + { + "epoch": 0.16, + "grad_norm": 0.6610969305038452, + "learning_rate": 1.8913705574418885e-05, + "loss": 2.2253, + "step": 4926 + }, + { + "epoch": 0.16, + "grad_norm": 0.6792849898338318, + "learning_rate": 1.8913223740916832e-05, + "loss": 2.2774, + "step": 4927 + }, + { + "epoch": 0.16, + "grad_norm": 0.6893838047981262, + "learning_rate": 1.89127418067184e-05, + "loss": 2.2601, + "step": 4928 + }, + { + "epoch": 0.16, + "grad_norm": 0.6990026831626892, + "learning_rate": 1.8912259771829035e-05, + "loss": 2.2078, + "step": 4929 + }, + { + "epoch": 0.16, + "grad_norm": 0.6758476495742798, + "learning_rate": 1.8911777636254183e-05, + "loss": 2.1951, + "step": 4930 + }, + { + "epoch": 0.16, + "grad_norm": 0.6893577575683594, + "learning_rate": 1.891129539999929e-05, + "loss": 2.2568, + "step": 4931 + }, + { + "epoch": 0.16, + "grad_norm": 0.7033409476280212, + "learning_rate": 1.8910813063069806e-05, + "loss": 2.1711, + "step": 4932 + }, + { + "epoch": 0.16, + "grad_norm": 0.6908968687057495, + "learning_rate": 1.8910330625471174e-05, + "loss": 2.2832, + "step": 4933 + }, + { + "epoch": 0.16, + "grad_norm": 0.699252188205719, + "learning_rate": 1.890984808720885e-05, + "loss": 2.2938, + "step": 4934 + }, + { + "epoch": 0.16, + "grad_norm": 0.665632963180542, + "learning_rate": 1.8909365448288287e-05, + "loss": 2.2688, + "step": 4935 + }, + { + "epoch": 0.16, + "grad_norm": 0.6963040232658386, + "learning_rate": 1.8908882708714932e-05, + "loss": 2.2039, + "step": 4936 + }, + { + "epoch": 0.16, + "grad_norm": 0.6532336473464966, + "learning_rate": 1.8908399868494242e-05, + "loss": 2.2121, + "step": 4937 + }, + { + "epoch": 0.16, + "grad_norm": 0.7373966574668884, + "learning_rate": 1.8907916927631672e-05, + "loss": 2.2348, + "step": 4938 + }, + { + "epoch": 0.16, + "grad_norm": 0.6986955404281616, + "learning_rate": 1.8907433886132674e-05, + "loss": 2.2722, + "step": 4939 + }, + { + "epoch": 0.16, + "grad_norm": 0.6973358988761902, + "learning_rate": 1.890695074400271e-05, + "loss": 2.2467, + "step": 4940 + }, + { + "epoch": 0.16, + "grad_norm": 0.7383548617362976, + "learning_rate": 1.8906467501247236e-05, + "loss": 2.2426, + "step": 4941 + }, + { + "epoch": 0.16, + "grad_norm": 0.7176355123519897, + "learning_rate": 1.8905984157871713e-05, + "loss": 2.1795, + "step": 4942 + }, + { + "epoch": 0.16, + "grad_norm": 0.6596736907958984, + "learning_rate": 1.8905500713881598e-05, + "loss": 2.2155, + "step": 4943 + }, + { + "epoch": 0.16, + "grad_norm": 0.7085241079330444, + "learning_rate": 1.8905017169282356e-05, + "loss": 2.2032, + "step": 4944 + }, + { + "epoch": 0.16, + "grad_norm": 0.6802390217781067, + "learning_rate": 1.8904533524079453e-05, + "loss": 2.2547, + "step": 4945 + }, + { + "epoch": 0.16, + "grad_norm": 0.6536100506782532, + "learning_rate": 1.8904049778278342e-05, + "loss": 2.21, + "step": 4946 + }, + { + "epoch": 0.16, + "grad_norm": 0.6878189444541931, + "learning_rate": 1.89035659318845e-05, + "loss": 2.2463, + "step": 4947 + }, + { + "epoch": 0.16, + "grad_norm": 0.70465087890625, + "learning_rate": 1.8903081984903385e-05, + "loss": 2.2097, + "step": 4948 + }, + { + "epoch": 0.16, + "grad_norm": 0.7641710638999939, + "learning_rate": 1.890259793734047e-05, + "loss": 2.2942, + "step": 4949 + }, + { + "epoch": 0.16, + "grad_norm": 0.6710432171821594, + "learning_rate": 1.890211378920122e-05, + "loss": 2.2391, + "step": 4950 + }, + { + "epoch": 0.16, + "grad_norm": 0.6708301305770874, + "learning_rate": 1.8901629540491105e-05, + "loss": 2.1809, + "step": 4951 + }, + { + "epoch": 0.16, + "grad_norm": 0.6742755174636841, + "learning_rate": 1.8901145191215598e-05, + "loss": 2.2419, + "step": 4952 + }, + { + "epoch": 0.16, + "grad_norm": 0.6981996893882751, + "learning_rate": 1.8900660741380167e-05, + "loss": 2.1894, + "step": 4953 + }, + { + "epoch": 0.16, + "grad_norm": 0.6808575391769409, + "learning_rate": 1.890017619099029e-05, + "loss": 2.2263, + "step": 4954 + }, + { + "epoch": 0.16, + "grad_norm": 0.683233380317688, + "learning_rate": 1.8899691540051436e-05, + "loss": 2.2211, + "step": 4955 + }, + { + "epoch": 0.16, + "grad_norm": 0.6842895746231079, + "learning_rate": 1.8899206788569083e-05, + "loss": 2.2631, + "step": 4956 + }, + { + "epoch": 0.16, + "grad_norm": 0.6878306865692139, + "learning_rate": 1.8898721936548707e-05, + "loss": 2.324, + "step": 4957 + }, + { + "epoch": 0.16, + "grad_norm": 0.6985573172569275, + "learning_rate": 1.8898236983995786e-05, + "loss": 2.2618, + "step": 4958 + }, + { + "epoch": 0.16, + "grad_norm": 0.6800970435142517, + "learning_rate": 1.88977519309158e-05, + "loss": 2.2108, + "step": 4959 + }, + { + "epoch": 0.17, + "grad_norm": 0.7035930156707764, + "learning_rate": 1.8897266777314224e-05, + "loss": 2.2586, + "step": 4960 + }, + { + "epoch": 0.17, + "grad_norm": 0.6819291710853577, + "learning_rate": 1.8896781523196547e-05, + "loss": 2.2496, + "step": 4961 + }, + { + "epoch": 0.17, + "grad_norm": 0.7000607848167419, + "learning_rate": 1.8896296168568243e-05, + "loss": 2.253, + "step": 4962 + }, + { + "epoch": 0.17, + "grad_norm": 0.7050279974937439, + "learning_rate": 1.8895810713434798e-05, + "loss": 2.261, + "step": 4963 + }, + { + "epoch": 0.17, + "grad_norm": 0.6799076199531555, + "learning_rate": 1.88953251578017e-05, + "loss": 2.2571, + "step": 4964 + }, + { + "epoch": 0.17, + "grad_norm": 0.6625675559043884, + "learning_rate": 1.889483950167443e-05, + "loss": 2.2051, + "step": 4965 + }, + { + "epoch": 0.17, + "grad_norm": 0.6812843680381775, + "learning_rate": 1.8894353745058476e-05, + "loss": 2.1567, + "step": 4966 + }, + { + "epoch": 0.17, + "grad_norm": 0.6905362010002136, + "learning_rate": 1.889386788795932e-05, + "loss": 2.2659, + "step": 4967 + }, + { + "epoch": 0.17, + "grad_norm": 0.7396480441093445, + "learning_rate": 1.889338193038247e-05, + "loss": 2.2381, + "step": 4968 + }, + { + "epoch": 0.17, + "grad_norm": 0.6694372296333313, + "learning_rate": 1.889289587233339e-05, + "loss": 2.2594, + "step": 4969 + }, + { + "epoch": 0.17, + "grad_norm": 0.6820674538612366, + "learning_rate": 1.8892409713817592e-05, + "loss": 2.2359, + "step": 4970 + }, + { + "epoch": 0.17, + "grad_norm": 0.6703478693962097, + "learning_rate": 1.889192345484056e-05, + "loss": 2.2191, + "step": 4971 + }, + { + "epoch": 0.17, + "grad_norm": 0.6847573518753052, + "learning_rate": 1.8891437095407787e-05, + "loss": 2.2643, + "step": 4972 + }, + { + "epoch": 0.17, + "grad_norm": 0.6730780601501465, + "learning_rate": 1.8890950635524767e-05, + "loss": 2.246, + "step": 4973 + }, + { + "epoch": 0.17, + "grad_norm": 0.7108911275863647, + "learning_rate": 1.8890464075197e-05, + "loss": 2.2301, + "step": 4974 + }, + { + "epoch": 0.17, + "grad_norm": 0.6786690950393677, + "learning_rate": 1.8889977414429976e-05, + "loss": 2.2601, + "step": 4975 + }, + { + "epoch": 0.17, + "grad_norm": 0.7251121401786804, + "learning_rate": 1.8889490653229202e-05, + "loss": 2.2282, + "step": 4976 + }, + { + "epoch": 0.17, + "grad_norm": 0.6847441792488098, + "learning_rate": 1.888900379160017e-05, + "loss": 2.2492, + "step": 4977 + }, + { + "epoch": 0.17, + "grad_norm": 0.6911371946334839, + "learning_rate": 1.8888516829548382e-05, + "loss": 2.2787, + "step": 4978 + }, + { + "epoch": 0.17, + "grad_norm": 0.6721246242523193, + "learning_rate": 1.888802976707934e-05, + "loss": 2.2541, + "step": 4979 + }, + { + "epoch": 0.17, + "grad_norm": 0.6804863214492798, + "learning_rate": 1.888754260419855e-05, + "loss": 2.2274, + "step": 4980 + }, + { + "epoch": 0.17, + "grad_norm": 0.7050630450248718, + "learning_rate": 1.8887055340911508e-05, + "loss": 2.2396, + "step": 4981 + }, + { + "epoch": 0.17, + "grad_norm": 0.7062733769416809, + "learning_rate": 1.8886567977223723e-05, + "loss": 2.25, + "step": 4982 + }, + { + "epoch": 0.17, + "grad_norm": 0.688262939453125, + "learning_rate": 1.8886080513140705e-05, + "loss": 2.2226, + "step": 4983 + }, + { + "epoch": 0.17, + "grad_norm": 0.7089536190032959, + "learning_rate": 1.8885592948667955e-05, + "loss": 2.2186, + "step": 4984 + }, + { + "epoch": 0.17, + "grad_norm": 0.6549676060676575, + "learning_rate": 1.8885105283810983e-05, + "loss": 2.2122, + "step": 4985 + }, + { + "epoch": 0.17, + "grad_norm": 0.6723291873931885, + "learning_rate": 1.88846175185753e-05, + "loss": 2.2285, + "step": 4986 + }, + { + "epoch": 0.17, + "grad_norm": 0.7110216021537781, + "learning_rate": 1.8884129652966414e-05, + "loss": 2.2326, + "step": 4987 + }, + { + "epoch": 0.17, + "grad_norm": 0.724730372428894, + "learning_rate": 1.8883641686989838e-05, + "loss": 2.2437, + "step": 4988 + }, + { + "epoch": 0.17, + "grad_norm": 0.6912781000137329, + "learning_rate": 1.8883153620651084e-05, + "loss": 2.3385, + "step": 4989 + }, + { + "epoch": 0.17, + "grad_norm": 0.7131420969963074, + "learning_rate": 1.888266545395567e-05, + "loss": 2.2051, + "step": 4990 + }, + { + "epoch": 0.17, + "grad_norm": 0.7040233612060547, + "learning_rate": 1.88821771869091e-05, + "loss": 2.2559, + "step": 4991 + }, + { + "epoch": 0.17, + "grad_norm": 0.7079867720603943, + "learning_rate": 1.8881688819516902e-05, + "loss": 2.2228, + "step": 4992 + }, + { + "epoch": 0.17, + "grad_norm": 0.7042294144630432, + "learning_rate": 1.8881200351784592e-05, + "loss": 2.2568, + "step": 4993 + }, + { + "epoch": 0.17, + "grad_norm": 0.7099297642707825, + "learning_rate": 1.8880711783717682e-05, + "loss": 2.2235, + "step": 4994 + }, + { + "epoch": 0.17, + "grad_norm": 0.6637887358665466, + "learning_rate": 1.8880223115321695e-05, + "loss": 2.1899, + "step": 4995 + }, + { + "epoch": 0.17, + "grad_norm": 0.666069746017456, + "learning_rate": 1.8879734346602153e-05, + "loss": 2.2155, + "step": 4996 + }, + { + "epoch": 0.17, + "grad_norm": 0.6963674426078796, + "learning_rate": 1.8879245477564572e-05, + "loss": 2.2441, + "step": 4997 + }, + { + "epoch": 0.17, + "grad_norm": 0.7048020362854004, + "learning_rate": 1.8878756508214482e-05, + "loss": 2.1783, + "step": 4998 + }, + { + "epoch": 0.17, + "grad_norm": 0.7003149390220642, + "learning_rate": 1.887826743855741e-05, + "loss": 2.1917, + "step": 4999 + }, + { + "epoch": 0.17, + "grad_norm": 0.667918860912323, + "learning_rate": 1.8877778268598868e-05, + "loss": 2.1887, + "step": 5000 + }, + { + "epoch": 0.17, + "grad_norm": 0.7057336568832397, + "learning_rate": 1.8877288998344392e-05, + "loss": 2.2062, + "step": 5001 + }, + { + "epoch": 0.17, + "grad_norm": 0.6759325861930847, + "learning_rate": 1.887679962779951e-05, + "loss": 2.2066, + "step": 5002 + }, + { + "epoch": 0.17, + "grad_norm": 0.6874157190322876, + "learning_rate": 1.8876310156969745e-05, + "loss": 2.2504, + "step": 5003 + }, + { + "epoch": 0.17, + "grad_norm": 0.6488717794418335, + "learning_rate": 1.887582058586063e-05, + "loss": 2.2011, + "step": 5004 + }, + { + "epoch": 0.17, + "grad_norm": 0.6775034070014954, + "learning_rate": 1.8875330914477696e-05, + "loss": 2.1922, + "step": 5005 + }, + { + "epoch": 0.17, + "grad_norm": 0.7067666053771973, + "learning_rate": 1.8874841142826475e-05, + "loss": 2.2438, + "step": 5006 + }, + { + "epoch": 0.17, + "grad_norm": 0.6979759931564331, + "learning_rate": 1.88743512709125e-05, + "loss": 2.3106, + "step": 5007 + }, + { + "epoch": 0.17, + "grad_norm": 0.6776329278945923, + "learning_rate": 1.8873861298741306e-05, + "loss": 2.1763, + "step": 5008 + }, + { + "epoch": 0.17, + "grad_norm": 0.6810246109962463, + "learning_rate": 1.8873371226318427e-05, + "loss": 2.1903, + "step": 5009 + }, + { + "epoch": 0.17, + "grad_norm": 0.6592420935630798, + "learning_rate": 1.88728810536494e-05, + "loss": 2.2192, + "step": 5010 + }, + { + "epoch": 0.17, + "grad_norm": 0.7082527279853821, + "learning_rate": 1.8872390780739763e-05, + "loss": 2.2234, + "step": 5011 + }, + { + "epoch": 0.17, + "grad_norm": 0.6939054727554321, + "learning_rate": 1.8871900407595058e-05, + "loss": 2.1776, + "step": 5012 + }, + { + "epoch": 0.17, + "grad_norm": 0.6793426871299744, + "learning_rate": 1.8871409934220818e-05, + "loss": 2.1277, + "step": 5013 + }, + { + "epoch": 0.17, + "grad_norm": 0.6657713651657104, + "learning_rate": 1.8870919360622588e-05, + "loss": 2.1616, + "step": 5014 + }, + { + "epoch": 0.17, + "grad_norm": 0.6900374293327332, + "learning_rate": 1.887042868680591e-05, + "loss": 2.286, + "step": 5015 + }, + { + "epoch": 0.17, + "grad_norm": 0.7074781060218811, + "learning_rate": 1.886993791277633e-05, + "loss": 2.2437, + "step": 5016 + }, + { + "epoch": 0.17, + "grad_norm": 0.6935451626777649, + "learning_rate": 1.8869447038539387e-05, + "loss": 2.2164, + "step": 5017 + }, + { + "epoch": 0.17, + "grad_norm": 0.7203739881515503, + "learning_rate": 1.886895606410063e-05, + "loss": 2.2355, + "step": 5018 + }, + { + "epoch": 0.17, + "grad_norm": 0.7050142288208008, + "learning_rate": 1.8868464989465605e-05, + "loss": 2.2589, + "step": 5019 + }, + { + "epoch": 0.17, + "grad_norm": 0.7077071070671082, + "learning_rate": 1.886797381463986e-05, + "loss": 2.2414, + "step": 5020 + }, + { + "epoch": 0.17, + "grad_norm": 0.6927554607391357, + "learning_rate": 1.8867482539628942e-05, + "loss": 2.1831, + "step": 5021 + }, + { + "epoch": 0.17, + "grad_norm": 0.6928524374961853, + "learning_rate": 1.8866991164438405e-05, + "loss": 2.2028, + "step": 5022 + }, + { + "epoch": 0.17, + "grad_norm": 0.6950771808624268, + "learning_rate": 1.88664996890738e-05, + "loss": 2.1945, + "step": 5023 + }, + { + "epoch": 0.17, + "grad_norm": 0.7099668383598328, + "learning_rate": 1.8866008113540674e-05, + "loss": 2.3015, + "step": 5024 + }, + { + "epoch": 0.17, + "grad_norm": 0.6755173802375793, + "learning_rate": 1.8865516437844586e-05, + "loss": 2.1791, + "step": 5025 + }, + { + "epoch": 0.17, + "grad_norm": 0.6789655685424805, + "learning_rate": 1.886502466199109e-05, + "loss": 2.2237, + "step": 5026 + }, + { + "epoch": 0.17, + "grad_norm": 0.7095639109611511, + "learning_rate": 1.886453278598574e-05, + "loss": 2.2423, + "step": 5027 + }, + { + "epoch": 0.17, + "grad_norm": 0.7211741209030151, + "learning_rate": 1.8864040809834093e-05, + "loss": 2.1976, + "step": 5028 + }, + { + "epoch": 0.17, + "grad_norm": 0.6962361931800842, + "learning_rate": 1.886354873354171e-05, + "loss": 2.238, + "step": 5029 + }, + { + "epoch": 0.17, + "grad_norm": 0.6994633674621582, + "learning_rate": 1.8863056557114148e-05, + "loss": 2.2196, + "step": 5030 + }, + { + "epoch": 0.17, + "grad_norm": 0.6872656941413879, + "learning_rate": 1.8862564280556966e-05, + "loss": 2.2344, + "step": 5031 + }, + { + "epoch": 0.17, + "grad_norm": 0.7347181439399719, + "learning_rate": 1.8862071903875727e-05, + "loss": 2.2577, + "step": 5032 + }, + { + "epoch": 0.17, + "grad_norm": 0.7118401527404785, + "learning_rate": 1.8861579427075992e-05, + "loss": 2.1704, + "step": 5033 + }, + { + "epoch": 0.17, + "grad_norm": 0.689094603061676, + "learning_rate": 1.8861086850163327e-05, + "loss": 2.2785, + "step": 5034 + }, + { + "epoch": 0.17, + "grad_norm": 0.7315661311149597, + "learning_rate": 1.8860594173143296e-05, + "loss": 2.2297, + "step": 5035 + }, + { + "epoch": 0.17, + "grad_norm": 0.7047004699707031, + "learning_rate": 1.8860101396021468e-05, + "loss": 2.2395, + "step": 5036 + }, + { + "epoch": 0.17, + "grad_norm": 0.6841112971305847, + "learning_rate": 1.88596085188034e-05, + "loss": 2.2449, + "step": 5037 + }, + { + "epoch": 0.17, + "grad_norm": 0.7024719715118408, + "learning_rate": 1.885911554149467e-05, + "loss": 2.1848, + "step": 5038 + }, + { + "epoch": 0.17, + "grad_norm": 0.7408850789070129, + "learning_rate": 1.8858622464100847e-05, + "loss": 2.2881, + "step": 5039 + }, + { + "epoch": 0.17, + "grad_norm": 0.6529345512390137, + "learning_rate": 1.8858129286627497e-05, + "loss": 2.1309, + "step": 5040 + }, + { + "epoch": 0.17, + "grad_norm": 0.7167838215827942, + "learning_rate": 1.8857636009080192e-05, + "loss": 2.2678, + "step": 5041 + }, + { + "epoch": 0.17, + "grad_norm": 0.7037912607192993, + "learning_rate": 1.885714263146451e-05, + "loss": 2.2042, + "step": 5042 + }, + { + "epoch": 0.17, + "grad_norm": 0.7449973225593567, + "learning_rate": 1.885664915378602e-05, + "loss": 2.2383, + "step": 5043 + }, + { + "epoch": 0.17, + "grad_norm": 0.7064663171768188, + "learning_rate": 1.88561555760503e-05, + "loss": 2.2093, + "step": 5044 + }, + { + "epoch": 0.17, + "grad_norm": 0.7145691514015198, + "learning_rate": 1.8855661898262926e-05, + "loss": 2.2091, + "step": 5045 + }, + { + "epoch": 0.17, + "grad_norm": 0.6990336179733276, + "learning_rate": 1.885516812042947e-05, + "loss": 2.2251, + "step": 5046 + }, + { + "epoch": 0.17, + "grad_norm": 0.6995840668678284, + "learning_rate": 1.8854674242555514e-05, + "loss": 2.1769, + "step": 5047 + }, + { + "epoch": 0.17, + "grad_norm": 0.6863860487937927, + "learning_rate": 1.8854180264646637e-05, + "loss": 2.2144, + "step": 5048 + }, + { + "epoch": 0.17, + "grad_norm": 0.7096532583236694, + "learning_rate": 1.885368618670842e-05, + "loss": 2.2029, + "step": 5049 + }, + { + "epoch": 0.17, + "grad_norm": 0.7184292674064636, + "learning_rate": 1.885319200874645e-05, + "loss": 2.2367, + "step": 5050 + }, + { + "epoch": 0.17, + "grad_norm": 0.6989868879318237, + "learning_rate": 1.8852697730766303e-05, + "loss": 2.2247, + "step": 5051 + }, + { + "epoch": 0.17, + "grad_norm": 0.7400781512260437, + "learning_rate": 1.8852203352773566e-05, + "loss": 2.2633, + "step": 5052 + }, + { + "epoch": 0.17, + "grad_norm": 0.6930588483810425, + "learning_rate": 1.8851708874773818e-05, + "loss": 2.2579, + "step": 5053 + }, + { + "epoch": 0.17, + "grad_norm": 0.7328749299049377, + "learning_rate": 1.885121429677266e-05, + "loss": 2.31, + "step": 5054 + }, + { + "epoch": 0.17, + "grad_norm": 0.7348461151123047, + "learning_rate": 1.885071961877566e-05, + "loss": 2.252, + "step": 5055 + }, + { + "epoch": 0.17, + "grad_norm": 0.7306812405586243, + "learning_rate": 1.8850224840788424e-05, + "loss": 2.2259, + "step": 5056 + }, + { + "epoch": 0.17, + "grad_norm": 0.7326768636703491, + "learning_rate": 1.8849729962816533e-05, + "loss": 2.2677, + "step": 5057 + }, + { + "epoch": 0.17, + "grad_norm": 0.7767086625099182, + "learning_rate": 1.884923498486558e-05, + "loss": 2.2038, + "step": 5058 + }, + { + "epoch": 0.17, + "grad_norm": 0.707169234752655, + "learning_rate": 1.8848739906941153e-05, + "loss": 2.2909, + "step": 5059 + }, + { + "epoch": 0.17, + "grad_norm": 0.669387936592102, + "learning_rate": 1.884824472904885e-05, + "loss": 2.1259, + "step": 5060 + }, + { + "epoch": 0.17, + "grad_norm": 0.688892662525177, + "learning_rate": 1.8847749451194264e-05, + "loss": 2.191, + "step": 5061 + }, + { + "epoch": 0.17, + "grad_norm": 0.7167370319366455, + "learning_rate": 1.8847254073382993e-05, + "loss": 2.2376, + "step": 5062 + }, + { + "epoch": 0.17, + "grad_norm": 0.6911334991455078, + "learning_rate": 1.8846758595620628e-05, + "loss": 2.2385, + "step": 5063 + }, + { + "epoch": 0.17, + "grad_norm": 0.7398827075958252, + "learning_rate": 1.8846263017912766e-05, + "loss": 2.2651, + "step": 5064 + }, + { + "epoch": 0.17, + "grad_norm": 0.7097740769386292, + "learning_rate": 1.8845767340265012e-05, + "loss": 2.1988, + "step": 5065 + }, + { + "epoch": 0.17, + "grad_norm": 0.7139015197753906, + "learning_rate": 1.884527156268296e-05, + "loss": 2.2769, + "step": 5066 + }, + { + "epoch": 0.17, + "grad_norm": 0.71262127161026, + "learning_rate": 1.884477568517222e-05, + "loss": 2.2398, + "step": 5067 + }, + { + "epoch": 0.17, + "grad_norm": 0.6858075261116028, + "learning_rate": 1.8844279707738384e-05, + "loss": 2.2017, + "step": 5068 + }, + { + "epoch": 0.17, + "grad_norm": 0.7736190557479858, + "learning_rate": 1.8843783630387057e-05, + "loss": 2.1817, + "step": 5069 + }, + { + "epoch": 0.17, + "grad_norm": 0.7048096060752869, + "learning_rate": 1.8843287453123847e-05, + "loss": 2.1457, + "step": 5070 + }, + { + "epoch": 0.17, + "grad_norm": 0.7217193245887756, + "learning_rate": 1.8842791175954358e-05, + "loss": 2.2319, + "step": 5071 + }, + { + "epoch": 0.17, + "grad_norm": 0.6749775409698486, + "learning_rate": 1.8842294798884197e-05, + "loss": 2.1967, + "step": 5072 + }, + { + "epoch": 0.17, + "grad_norm": 0.7348529100418091, + "learning_rate": 1.8841798321918972e-05, + "loss": 2.164, + "step": 5073 + }, + { + "epoch": 0.17, + "grad_norm": 0.7659627199172974, + "learning_rate": 1.884130174506429e-05, + "loss": 2.2006, + "step": 5074 + }, + { + "epoch": 0.17, + "grad_norm": 0.756077229976654, + "learning_rate": 1.8840805068325765e-05, + "loss": 2.2635, + "step": 5075 + }, + { + "epoch": 0.17, + "grad_norm": 0.6964266300201416, + "learning_rate": 1.8840308291709e-05, + "loss": 2.1826, + "step": 5076 + }, + { + "epoch": 0.17, + "grad_norm": 0.6656002998352051, + "learning_rate": 1.883981141521962e-05, + "loss": 2.2388, + "step": 5077 + }, + { + "epoch": 0.17, + "grad_norm": 0.7073518633842468, + "learning_rate": 1.883931443886323e-05, + "loss": 2.2141, + "step": 5078 + }, + { + "epoch": 0.17, + "grad_norm": 0.7134633660316467, + "learning_rate": 1.8838817362645443e-05, + "loss": 2.3162, + "step": 5079 + }, + { + "epoch": 0.17, + "grad_norm": 0.7177674770355225, + "learning_rate": 1.883832018657188e-05, + "loss": 2.243, + "step": 5080 + }, + { + "epoch": 0.17, + "grad_norm": 0.7090542912483215, + "learning_rate": 1.8837822910648152e-05, + "loss": 2.2455, + "step": 5081 + }, + { + "epoch": 0.17, + "grad_norm": 0.672967791557312, + "learning_rate": 1.883732553487988e-05, + "loss": 2.2182, + "step": 5082 + }, + { + "epoch": 0.17, + "grad_norm": 0.6998160481452942, + "learning_rate": 1.8836828059272685e-05, + "loss": 2.2016, + "step": 5083 + }, + { + "epoch": 0.17, + "grad_norm": 0.6857835650444031, + "learning_rate": 1.8836330483832185e-05, + "loss": 2.2118, + "step": 5084 + }, + { + "epoch": 0.17, + "grad_norm": 0.6978532671928406, + "learning_rate": 1.8835832808564002e-05, + "loss": 2.2585, + "step": 5085 + }, + { + "epoch": 0.17, + "grad_norm": 0.7195155024528503, + "learning_rate": 1.883533503347376e-05, + "loss": 2.2588, + "step": 5086 + }, + { + "epoch": 0.17, + "grad_norm": 0.7492415904998779, + "learning_rate": 1.8834837158567078e-05, + "loss": 2.2017, + "step": 5087 + }, + { + "epoch": 0.17, + "grad_norm": 0.7003429532051086, + "learning_rate": 1.8834339183849586e-05, + "loss": 2.2221, + "step": 5088 + }, + { + "epoch": 0.17, + "grad_norm": 0.7068492770195007, + "learning_rate": 1.8833841109326906e-05, + "loss": 2.2284, + "step": 5089 + }, + { + "epoch": 0.17, + "grad_norm": 0.7309505343437195, + "learning_rate": 1.8833342935004667e-05, + "loss": 2.1664, + "step": 5090 + }, + { + "epoch": 0.17, + "grad_norm": 0.6891161203384399, + "learning_rate": 1.8832844660888496e-05, + "loss": 2.2346, + "step": 5091 + }, + { + "epoch": 0.17, + "grad_norm": 0.6830641627311707, + "learning_rate": 1.883234628698402e-05, + "loss": 2.1392, + "step": 5092 + }, + { + "epoch": 0.17, + "grad_norm": 0.692571222782135, + "learning_rate": 1.883184781329688e-05, + "loss": 2.1848, + "step": 5093 + }, + { + "epoch": 0.17, + "grad_norm": 0.7061823606491089, + "learning_rate": 1.883134923983269e-05, + "loss": 2.2209, + "step": 5094 + }, + { + "epoch": 0.17, + "grad_norm": 0.6857052445411682, + "learning_rate": 1.8830850566597096e-05, + "loss": 2.1984, + "step": 5095 + }, + { + "epoch": 0.17, + "grad_norm": 0.7192384004592896, + "learning_rate": 1.8830351793595727e-05, + "loss": 2.2937, + "step": 5096 + }, + { + "epoch": 0.17, + "grad_norm": 0.6932501792907715, + "learning_rate": 1.8829852920834223e-05, + "loss": 2.2218, + "step": 5097 + }, + { + "epoch": 0.17, + "grad_norm": 0.7089884877204895, + "learning_rate": 1.882935394831821e-05, + "loss": 2.2552, + "step": 5098 + }, + { + "epoch": 0.17, + "grad_norm": 0.6818573474884033, + "learning_rate": 1.8828854876053332e-05, + "loss": 2.1774, + "step": 5099 + }, + { + "epoch": 0.17, + "grad_norm": 0.7147440314292908, + "learning_rate": 1.8828355704045225e-05, + "loss": 2.1969, + "step": 5100 + }, + { + "epoch": 0.17, + "grad_norm": 0.7260103225708008, + "learning_rate": 1.882785643229953e-05, + "loss": 2.2024, + "step": 5101 + }, + { + "epoch": 0.17, + "grad_norm": 0.7020487189292908, + "learning_rate": 1.8827357060821886e-05, + "loss": 2.1687, + "step": 5102 + }, + { + "epoch": 0.17, + "grad_norm": 0.708466649055481, + "learning_rate": 1.8826857589617934e-05, + "loss": 2.2057, + "step": 5103 + }, + { + "epoch": 0.17, + "grad_norm": 0.6802229881286621, + "learning_rate": 1.8826358018693324e-05, + "loss": 2.0251, + "step": 5104 + }, + { + "epoch": 0.17, + "grad_norm": 0.689306378364563, + "learning_rate": 1.8825858348053686e-05, + "loss": 2.2905, + "step": 5105 + }, + { + "epoch": 0.17, + "grad_norm": 0.6987339854240417, + "learning_rate": 1.882535857770468e-05, + "loss": 2.2342, + "step": 5106 + }, + { + "epoch": 0.17, + "grad_norm": 0.7029455304145813, + "learning_rate": 1.882485870765194e-05, + "loss": 2.2232, + "step": 5107 + }, + { + "epoch": 0.17, + "grad_norm": 0.6918255686759949, + "learning_rate": 1.882435873790112e-05, + "loss": 2.2525, + "step": 5108 + }, + { + "epoch": 0.17, + "grad_norm": 0.6963185667991638, + "learning_rate": 1.8823858668457866e-05, + "loss": 2.2526, + "step": 5109 + }, + { + "epoch": 0.17, + "grad_norm": 0.6784640550613403, + "learning_rate": 1.882335849932783e-05, + "loss": 2.1899, + "step": 5110 + }, + { + "epoch": 0.17, + "grad_norm": 0.6808115243911743, + "learning_rate": 1.882285823051666e-05, + "loss": 2.2027, + "step": 5111 + }, + { + "epoch": 0.17, + "grad_norm": 0.7175215482711792, + "learning_rate": 1.8822357862030008e-05, + "loss": 2.191, + "step": 5112 + }, + { + "epoch": 0.17, + "grad_norm": 0.7055568695068359, + "learning_rate": 1.8821857393873525e-05, + "loss": 2.2004, + "step": 5113 + }, + { + "epoch": 0.17, + "grad_norm": 0.6805019378662109, + "learning_rate": 1.882135682605287e-05, + "loss": 2.2352, + "step": 5114 + }, + { + "epoch": 0.17, + "grad_norm": 0.7093477249145508, + "learning_rate": 1.8820856158573693e-05, + "loss": 2.2168, + "step": 5115 + }, + { + "epoch": 0.17, + "grad_norm": 0.7215244174003601, + "learning_rate": 1.8820355391441657e-05, + "loss": 2.224, + "step": 5116 + }, + { + "epoch": 0.17, + "grad_norm": 0.7366530895233154, + "learning_rate": 1.8819854524662413e-05, + "loss": 2.2683, + "step": 5117 + }, + { + "epoch": 0.17, + "grad_norm": 0.6769137382507324, + "learning_rate": 1.8819353558241617e-05, + "loss": 2.2585, + "step": 5118 + }, + { + "epoch": 0.17, + "grad_norm": 0.6813618540763855, + "learning_rate": 1.881885249218494e-05, + "loss": 2.1645, + "step": 5119 + }, + { + "epoch": 0.17, + "grad_norm": 0.6905884146690369, + "learning_rate": 1.881835132649803e-05, + "loss": 2.2181, + "step": 5120 + }, + { + "epoch": 0.17, + "grad_norm": 0.7158372402191162, + "learning_rate": 1.8817850061186558e-05, + "loss": 2.2497, + "step": 5121 + }, + { + "epoch": 0.17, + "grad_norm": 0.9966598749160767, + "learning_rate": 1.8817348696256185e-05, + "loss": 2.2706, + "step": 5122 + }, + { + "epoch": 0.17, + "grad_norm": 0.6946061253547668, + "learning_rate": 1.8816847231712572e-05, + "loss": 2.2492, + "step": 5123 + }, + { + "epoch": 0.17, + "grad_norm": 0.7460147142410278, + "learning_rate": 1.8816345667561385e-05, + "loss": 2.3094, + "step": 5124 + }, + { + "epoch": 0.17, + "grad_norm": 0.7010629177093506, + "learning_rate": 1.881584400380829e-05, + "loss": 2.204, + "step": 5125 + }, + { + "epoch": 0.17, + "grad_norm": 0.6772447824478149, + "learning_rate": 1.8815342240458963e-05, + "loss": 2.2181, + "step": 5126 + }, + { + "epoch": 0.17, + "grad_norm": 0.728985607624054, + "learning_rate": 1.8814840377519062e-05, + "loss": 2.2205, + "step": 5127 + }, + { + "epoch": 0.17, + "grad_norm": 0.6857736706733704, + "learning_rate": 1.8814338414994256e-05, + "loss": 2.2202, + "step": 5128 + }, + { + "epoch": 0.17, + "grad_norm": 0.6764331459999084, + "learning_rate": 1.8813836352890227e-05, + "loss": 2.1848, + "step": 5129 + }, + { + "epoch": 0.17, + "grad_norm": 0.6858397126197815, + "learning_rate": 1.8813334191212637e-05, + "loss": 2.2129, + "step": 5130 + }, + { + "epoch": 0.17, + "grad_norm": 0.7191885113716125, + "learning_rate": 1.8812831929967165e-05, + "loss": 2.1414, + "step": 5131 + }, + { + "epoch": 0.17, + "grad_norm": 0.6899157762527466, + "learning_rate": 1.881232956915948e-05, + "loss": 2.1246, + "step": 5132 + }, + { + "epoch": 0.17, + "grad_norm": 0.6869111657142639, + "learning_rate": 1.881182710879526e-05, + "loss": 2.2016, + "step": 5133 + }, + { + "epoch": 0.17, + "grad_norm": 0.6878247857093811, + "learning_rate": 1.8811324548880182e-05, + "loss": 2.2824, + "step": 5134 + }, + { + "epoch": 0.17, + "grad_norm": 0.6816338300704956, + "learning_rate": 1.8810821889419926e-05, + "loss": 2.221, + "step": 5135 + }, + { + "epoch": 0.17, + "grad_norm": 0.7088598012924194, + "learning_rate": 1.8810319130420164e-05, + "loss": 2.2069, + "step": 5136 + }, + { + "epoch": 0.17, + "grad_norm": 0.7191385626792908, + "learning_rate": 1.880981627188658e-05, + "loss": 2.2419, + "step": 5137 + }, + { + "epoch": 0.17, + "grad_norm": 0.7018817067146301, + "learning_rate": 1.880931331382486e-05, + "loss": 2.1571, + "step": 5138 + }, + { + "epoch": 0.17, + "grad_norm": 0.6681764721870422, + "learning_rate": 1.8808810256240676e-05, + "loss": 2.1501, + "step": 5139 + }, + { + "epoch": 0.17, + "grad_norm": 0.7004812359809875, + "learning_rate": 1.8808307099139714e-05, + "loss": 2.1977, + "step": 5140 + }, + { + "epoch": 0.17, + "grad_norm": 0.72236567735672, + "learning_rate": 1.8807803842527665e-05, + "loss": 2.2801, + "step": 5141 + }, + { + "epoch": 0.17, + "grad_norm": 0.6962788105010986, + "learning_rate": 1.880730048641021e-05, + "loss": 2.2461, + "step": 5142 + }, + { + "epoch": 0.17, + "grad_norm": 0.7311447858810425, + "learning_rate": 1.8806797030793035e-05, + "loss": 2.2518, + "step": 5143 + }, + { + "epoch": 0.17, + "grad_norm": 0.6980361342430115, + "learning_rate": 1.880629347568183e-05, + "loss": 2.2762, + "step": 5144 + }, + { + "epoch": 0.17, + "grad_norm": 0.7014514803886414, + "learning_rate": 1.8805789821082276e-05, + "loss": 2.2332, + "step": 5145 + }, + { + "epoch": 0.17, + "grad_norm": 0.7239410281181335, + "learning_rate": 1.8805286067000075e-05, + "loss": 2.1908, + "step": 5146 + }, + { + "epoch": 0.17, + "grad_norm": 0.694811224937439, + "learning_rate": 1.880478221344091e-05, + "loss": 2.2238, + "step": 5147 + }, + { + "epoch": 0.17, + "grad_norm": 0.6995794773101807, + "learning_rate": 1.8804278260410476e-05, + "loss": 2.1678, + "step": 5148 + }, + { + "epoch": 0.17, + "grad_norm": 0.6875421404838562, + "learning_rate": 1.880377420791447e-05, + "loss": 2.2063, + "step": 5149 + }, + { + "epoch": 0.17, + "grad_norm": 0.6913409233093262, + "learning_rate": 1.880327005595858e-05, + "loss": 2.2812, + "step": 5150 + }, + { + "epoch": 0.17, + "grad_norm": 0.6921247243881226, + "learning_rate": 1.8802765804548502e-05, + "loss": 2.196, + "step": 5151 + }, + { + "epoch": 0.17, + "grad_norm": 0.7294057607650757, + "learning_rate": 1.8802261453689933e-05, + "loss": 2.1911, + "step": 5152 + }, + { + "epoch": 0.17, + "grad_norm": 0.714398205280304, + "learning_rate": 1.8801757003388578e-05, + "loss": 2.185, + "step": 5153 + }, + { + "epoch": 0.17, + "grad_norm": 0.6870704889297485, + "learning_rate": 1.880125245365013e-05, + "loss": 2.2089, + "step": 5154 + }, + { + "epoch": 0.17, + "grad_norm": 0.7094072699546814, + "learning_rate": 1.8800747804480285e-05, + "loss": 2.1477, + "step": 5155 + }, + { + "epoch": 0.17, + "grad_norm": 0.7408642768859863, + "learning_rate": 1.8800243055884755e-05, + "loss": 2.1855, + "step": 5156 + }, + { + "epoch": 0.17, + "grad_norm": 0.6755397319793701, + "learning_rate": 1.8799738207869237e-05, + "loss": 2.138, + "step": 5157 + }, + { + "epoch": 0.17, + "grad_norm": 0.7241804599761963, + "learning_rate": 1.8799233260439427e-05, + "loss": 2.2652, + "step": 5158 + }, + { + "epoch": 0.17, + "grad_norm": 0.7762075662612915, + "learning_rate": 1.8798728213601042e-05, + "loss": 2.1924, + "step": 5159 + }, + { + "epoch": 0.17, + "grad_norm": 0.7232881188392639, + "learning_rate": 1.879822306735978e-05, + "loss": 2.1984, + "step": 5160 + }, + { + "epoch": 0.17, + "grad_norm": 0.7912119030952454, + "learning_rate": 1.879771782172135e-05, + "loss": 2.1972, + "step": 5161 + }, + { + "epoch": 0.17, + "grad_norm": 0.6885355114936829, + "learning_rate": 1.8797212476691464e-05, + "loss": 2.1617, + "step": 5162 + }, + { + "epoch": 0.17, + "grad_norm": 0.7278901934623718, + "learning_rate": 1.879670703227582e-05, + "loss": 2.2085, + "step": 5163 + }, + { + "epoch": 0.17, + "grad_norm": 0.6827148199081421, + "learning_rate": 1.879620148848014e-05, + "loss": 2.2332, + "step": 5164 + }, + { + "epoch": 0.17, + "grad_norm": 0.7028045654296875, + "learning_rate": 1.879569584531013e-05, + "loss": 2.1916, + "step": 5165 + }, + { + "epoch": 0.17, + "grad_norm": 0.7079675793647766, + "learning_rate": 1.8795190102771502e-05, + "loss": 2.1834, + "step": 5166 + }, + { + "epoch": 0.17, + "grad_norm": 0.7118650674819946, + "learning_rate": 1.879468426086997e-05, + "loss": 2.2151, + "step": 5167 + }, + { + "epoch": 0.17, + "grad_norm": 0.7239086031913757, + "learning_rate": 1.8794178319611254e-05, + "loss": 2.1602, + "step": 5168 + }, + { + "epoch": 0.17, + "grad_norm": 0.6722283363342285, + "learning_rate": 1.879367227900106e-05, + "loss": 2.155, + "step": 5169 + }, + { + "epoch": 0.17, + "grad_norm": 0.6998399496078491, + "learning_rate": 1.8793166139045112e-05, + "loss": 2.1961, + "step": 5170 + }, + { + "epoch": 0.17, + "grad_norm": 0.6859027147293091, + "learning_rate": 1.879265989974913e-05, + "loss": 2.2233, + "step": 5171 + }, + { + "epoch": 0.17, + "grad_norm": 0.6962826251983643, + "learning_rate": 1.8792153561118823e-05, + "loss": 2.1912, + "step": 5172 + }, + { + "epoch": 0.17, + "grad_norm": 0.7125821709632874, + "learning_rate": 1.8791647123159922e-05, + "loss": 2.2599, + "step": 5173 + }, + { + "epoch": 0.17, + "grad_norm": 0.6984997391700745, + "learning_rate": 1.8791140585878144e-05, + "loss": 2.2752, + "step": 5174 + }, + { + "epoch": 0.17, + "grad_norm": 0.7091352343559265, + "learning_rate": 1.8790633949279212e-05, + "loss": 2.1558, + "step": 5175 + }, + { + "epoch": 0.17, + "grad_norm": 0.6993475556373596, + "learning_rate": 1.879012721336885e-05, + "loss": 2.2107, + "step": 5176 + }, + { + "epoch": 0.17, + "grad_norm": 0.7126711010932922, + "learning_rate": 1.878962037815278e-05, + "loss": 2.2304, + "step": 5177 + }, + { + "epoch": 0.17, + "grad_norm": 0.7008918523788452, + "learning_rate": 1.878911344363673e-05, + "loss": 2.1511, + "step": 5178 + }, + { + "epoch": 0.17, + "grad_norm": 0.687052309513092, + "learning_rate": 1.8788606409826427e-05, + "loss": 2.2292, + "step": 5179 + }, + { + "epoch": 0.17, + "grad_norm": 0.7070609331130981, + "learning_rate": 1.87880992767276e-05, + "loss": 2.2167, + "step": 5180 + }, + { + "epoch": 0.17, + "grad_norm": 0.6896861791610718, + "learning_rate": 1.878759204434598e-05, + "loss": 2.217, + "step": 5181 + }, + { + "epoch": 0.17, + "grad_norm": 0.68677818775177, + "learning_rate": 1.8787084712687292e-05, + "loss": 2.2229, + "step": 5182 + }, + { + "epoch": 0.17, + "grad_norm": 0.6798427700996399, + "learning_rate": 1.878657728175727e-05, + "loss": 2.2, + "step": 5183 + }, + { + "epoch": 0.17, + "grad_norm": 0.7029697299003601, + "learning_rate": 1.878606975156165e-05, + "loss": 2.2202, + "step": 5184 + }, + { + "epoch": 0.17, + "grad_norm": 0.6784753203392029, + "learning_rate": 1.8785562122106164e-05, + "loss": 2.1838, + "step": 5185 + }, + { + "epoch": 0.17, + "grad_norm": 0.7120959758758545, + "learning_rate": 1.8785054393396543e-05, + "loss": 2.2425, + "step": 5186 + }, + { + "epoch": 0.17, + "grad_norm": 0.7057787179946899, + "learning_rate": 1.878454656543853e-05, + "loss": 2.2116, + "step": 5187 + }, + { + "epoch": 0.17, + "grad_norm": 0.6878623366355896, + "learning_rate": 1.878403863823785e-05, + "loss": 2.1995, + "step": 5188 + }, + { + "epoch": 0.17, + "grad_norm": 0.6719970107078552, + "learning_rate": 1.878353061180026e-05, + "loss": 2.1639, + "step": 5189 + }, + { + "epoch": 0.17, + "grad_norm": 0.7024589776992798, + "learning_rate": 1.878302248613148e-05, + "loss": 2.2103, + "step": 5190 + }, + { + "epoch": 0.17, + "grad_norm": 0.7108834385871887, + "learning_rate": 1.8782514261237263e-05, + "loss": 2.1811, + "step": 5191 + }, + { + "epoch": 0.17, + "grad_norm": 0.7629290819168091, + "learning_rate": 1.878200593712335e-05, + "loss": 2.2176, + "step": 5192 + }, + { + "epoch": 0.17, + "grad_norm": 0.6590706706047058, + "learning_rate": 1.8781497513795476e-05, + "loss": 2.1312, + "step": 5193 + }, + { + "epoch": 0.17, + "grad_norm": 0.6679494976997375, + "learning_rate": 1.878098899125939e-05, + "loss": 2.1923, + "step": 5194 + }, + { + "epoch": 0.17, + "grad_norm": 0.6982284188270569, + "learning_rate": 1.878048036952084e-05, + "loss": 2.2231, + "step": 5195 + }, + { + "epoch": 0.17, + "grad_norm": 0.6949024796485901, + "learning_rate": 1.8779971648585566e-05, + "loss": 2.2107, + "step": 5196 + }, + { + "epoch": 0.17, + "grad_norm": 0.7009048461914062, + "learning_rate": 1.877946282845932e-05, + "loss": 2.2243, + "step": 5197 + }, + { + "epoch": 0.17, + "grad_norm": 0.7282629609107971, + "learning_rate": 1.8778953909147844e-05, + "loss": 2.2313, + "step": 5198 + }, + { + "epoch": 0.17, + "grad_norm": 0.7124980092048645, + "learning_rate": 1.8778444890656896e-05, + "loss": 2.2477, + "step": 5199 + }, + { + "epoch": 0.17, + "grad_norm": 0.7047486305236816, + "learning_rate": 1.877793577299222e-05, + "loss": 2.1911, + "step": 5200 + }, + { + "epoch": 0.17, + "grad_norm": 0.7133715748786926, + "learning_rate": 1.877742655615957e-05, + "loss": 2.167, + "step": 5201 + }, + { + "epoch": 0.17, + "grad_norm": 0.6848175525665283, + "learning_rate": 1.87769172401647e-05, + "loss": 2.2421, + "step": 5202 + }, + { + "epoch": 0.17, + "grad_norm": 0.6807448863983154, + "learning_rate": 1.877640782501336e-05, + "loss": 2.2311, + "step": 5203 + }, + { + "epoch": 0.17, + "grad_norm": 0.7021402716636658, + "learning_rate": 1.877589831071131e-05, + "loss": 2.2491, + "step": 5204 + }, + { + "epoch": 0.17, + "grad_norm": 0.6985737681388855, + "learning_rate": 1.8775388697264305e-05, + "loss": 2.2056, + "step": 5205 + }, + { + "epoch": 0.17, + "grad_norm": 0.6904672980308533, + "learning_rate": 1.87748789846781e-05, + "loss": 2.2153, + "step": 5206 + }, + { + "epoch": 0.17, + "grad_norm": 0.7181807160377502, + "learning_rate": 1.8774369172958456e-05, + "loss": 2.2447, + "step": 5207 + }, + { + "epoch": 0.17, + "grad_norm": 0.6959670782089233, + "learning_rate": 1.877385926211113e-05, + "loss": 2.1482, + "step": 5208 + }, + { + "epoch": 0.17, + "grad_norm": 0.7081802487373352, + "learning_rate": 1.8773349252141884e-05, + "loss": 2.2023, + "step": 5209 + }, + { + "epoch": 0.17, + "grad_norm": 0.6991066932678223, + "learning_rate": 1.877283914305648e-05, + "loss": 2.2326, + "step": 5210 + }, + { + "epoch": 0.17, + "grad_norm": 0.6997536420822144, + "learning_rate": 1.8772328934860682e-05, + "loss": 2.1842, + "step": 5211 + }, + { + "epoch": 0.17, + "grad_norm": 0.7279016375541687, + "learning_rate": 1.877181862756025e-05, + "loss": 2.25, + "step": 5212 + }, + { + "epoch": 0.17, + "grad_norm": 0.6934177875518799, + "learning_rate": 1.8771308221160956e-05, + "loss": 2.2361, + "step": 5213 + }, + { + "epoch": 0.17, + "grad_norm": 0.718974232673645, + "learning_rate": 1.877079771566856e-05, + "loss": 2.1879, + "step": 5214 + }, + { + "epoch": 0.17, + "grad_norm": 0.7184673547744751, + "learning_rate": 1.8770287111088832e-05, + "loss": 2.1693, + "step": 5215 + }, + { + "epoch": 0.17, + "grad_norm": 0.6779240369796753, + "learning_rate": 1.876977640742754e-05, + "loss": 2.2711, + "step": 5216 + }, + { + "epoch": 0.17, + "grad_norm": 0.6954242587089539, + "learning_rate": 1.8769265604690456e-05, + "loss": 2.2146, + "step": 5217 + }, + { + "epoch": 0.17, + "grad_norm": 0.7047503590583801, + "learning_rate": 1.8768754702883342e-05, + "loss": 2.1805, + "step": 5218 + }, + { + "epoch": 0.17, + "grad_norm": 0.7255203127861023, + "learning_rate": 1.8768243702011984e-05, + "loss": 2.242, + "step": 5219 + }, + { + "epoch": 0.17, + "grad_norm": 0.6902960538864136, + "learning_rate": 1.8767732602082143e-05, + "loss": 2.1662, + "step": 5220 + }, + { + "epoch": 0.17, + "grad_norm": 0.6786679625511169, + "learning_rate": 1.8767221403099598e-05, + "loss": 2.1941, + "step": 5221 + }, + { + "epoch": 0.17, + "grad_norm": 0.6963168382644653, + "learning_rate": 1.8766710105070122e-05, + "loss": 2.1896, + "step": 5222 + }, + { + "epoch": 0.17, + "grad_norm": 0.6761866807937622, + "learning_rate": 1.8766198707999497e-05, + "loss": 2.1576, + "step": 5223 + }, + { + "epoch": 0.17, + "grad_norm": 0.688229501247406, + "learning_rate": 1.8765687211893494e-05, + "loss": 2.2233, + "step": 5224 + }, + { + "epoch": 0.17, + "grad_norm": 0.7287259697914124, + "learning_rate": 1.8765175616757892e-05, + "loss": 2.288, + "step": 5225 + }, + { + "epoch": 0.17, + "grad_norm": 0.7473742961883545, + "learning_rate": 1.876466392259848e-05, + "loss": 2.2698, + "step": 5226 + }, + { + "epoch": 0.17, + "grad_norm": 0.7048467397689819, + "learning_rate": 1.8764152129421025e-05, + "loss": 2.1749, + "step": 5227 + }, + { + "epoch": 0.17, + "grad_norm": 0.7084662318229675, + "learning_rate": 1.8763640237231317e-05, + "loss": 2.1858, + "step": 5228 + }, + { + "epoch": 0.17, + "grad_norm": 0.6919201612472534, + "learning_rate": 1.876312824603514e-05, + "loss": 2.2708, + "step": 5229 + }, + { + "epoch": 0.17, + "grad_norm": 0.689358651638031, + "learning_rate": 1.8762616155838273e-05, + "loss": 2.2076, + "step": 5230 + }, + { + "epoch": 0.17, + "grad_norm": 1.0632957220077515, + "learning_rate": 1.8762103966646504e-05, + "loss": 2.2484, + "step": 5231 + }, + { + "epoch": 0.17, + "grad_norm": 0.7174069881439209, + "learning_rate": 1.876159167846562e-05, + "loss": 2.2538, + "step": 5232 + }, + { + "epoch": 0.17, + "grad_norm": 0.7072395086288452, + "learning_rate": 1.8761079291301412e-05, + "loss": 2.2607, + "step": 5233 + }, + { + "epoch": 0.17, + "grad_norm": 0.726084291934967, + "learning_rate": 1.8760566805159658e-05, + "loss": 2.2206, + "step": 5234 + }, + { + "epoch": 0.17, + "grad_norm": 0.7034367322921753, + "learning_rate": 1.876005422004616e-05, + "loss": 2.2611, + "step": 5235 + }, + { + "epoch": 0.17, + "grad_norm": 0.6781550049781799, + "learning_rate": 1.87595415359667e-05, + "loss": 2.1555, + "step": 5236 + }, + { + "epoch": 0.17, + "grad_norm": 0.7205064296722412, + "learning_rate": 1.8759028752927073e-05, + "loss": 2.3049, + "step": 5237 + }, + { + "epoch": 0.17, + "grad_norm": 0.6687968373298645, + "learning_rate": 1.8758515870933074e-05, + "loss": 2.1595, + "step": 5238 + }, + { + "epoch": 0.17, + "grad_norm": 0.7130811810493469, + "learning_rate": 1.8758002889990495e-05, + "loss": 2.2346, + "step": 5239 + }, + { + "epoch": 0.17, + "grad_norm": 0.7240042686462402, + "learning_rate": 1.875748981010513e-05, + "loss": 2.1687, + "step": 5240 + }, + { + "epoch": 0.17, + "grad_norm": 0.6798577308654785, + "learning_rate": 1.8756976631282784e-05, + "loss": 2.1766, + "step": 5241 + }, + { + "epoch": 0.17, + "grad_norm": 0.6812129020690918, + "learning_rate": 1.8756463353529243e-05, + "loss": 2.223, + "step": 5242 + }, + { + "epoch": 0.17, + "grad_norm": 0.7317559719085693, + "learning_rate": 1.8755949976850313e-05, + "loss": 2.2232, + "step": 5243 + }, + { + "epoch": 0.17, + "grad_norm": 0.7212831377983093, + "learning_rate": 1.875543650125179e-05, + "loss": 2.2592, + "step": 5244 + }, + { + "epoch": 0.17, + "grad_norm": 0.7260909080505371, + "learning_rate": 1.875492292673948e-05, + "loss": 2.2108, + "step": 5245 + }, + { + "epoch": 0.17, + "grad_norm": 0.6917257905006409, + "learning_rate": 1.8754409253319175e-05, + "loss": 2.2471, + "step": 5246 + }, + { + "epoch": 0.17, + "grad_norm": 0.6876272559165955, + "learning_rate": 1.8753895480996688e-05, + "loss": 2.2357, + "step": 5247 + }, + { + "epoch": 0.17, + "grad_norm": 0.6912677884101868, + "learning_rate": 1.875338160977782e-05, + "loss": 2.2529, + "step": 5248 + }, + { + "epoch": 0.17, + "grad_norm": 0.7301540970802307, + "learning_rate": 1.875286763966838e-05, + "loss": 2.2543, + "step": 5249 + }, + { + "epoch": 0.17, + "grad_norm": 0.7249478697776794, + "learning_rate": 1.8752353570674166e-05, + "loss": 2.2306, + "step": 5250 + }, + { + "epoch": 0.17, + "grad_norm": 0.7140246629714966, + "learning_rate": 1.8751839402800994e-05, + "loss": 2.1985, + "step": 5251 + }, + { + "epoch": 0.17, + "grad_norm": 0.7103005647659302, + "learning_rate": 1.875132513605467e-05, + "loss": 2.267, + "step": 5252 + }, + { + "epoch": 0.17, + "grad_norm": 0.7145276069641113, + "learning_rate": 1.8750810770441e-05, + "loss": 2.2387, + "step": 5253 + }, + { + "epoch": 0.17, + "grad_norm": 0.7124696373939514, + "learning_rate": 1.8750296305965802e-05, + "loss": 2.2413, + "step": 5254 + }, + { + "epoch": 0.17, + "grad_norm": 0.6838007569313049, + "learning_rate": 1.8749781742634882e-05, + "loss": 2.168, + "step": 5255 + }, + { + "epoch": 0.17, + "grad_norm": 0.7054868340492249, + "learning_rate": 1.8749267080454056e-05, + "loss": 2.2863, + "step": 5256 + }, + { + "epoch": 0.17, + "grad_norm": 0.7296440601348877, + "learning_rate": 1.874875231942914e-05, + "loss": 2.1533, + "step": 5257 + }, + { + "epoch": 0.17, + "grad_norm": 0.6929095387458801, + "learning_rate": 1.8748237459565944e-05, + "loss": 2.2304, + "step": 5258 + }, + { + "epoch": 0.17, + "grad_norm": 0.7003834247589111, + "learning_rate": 1.874772250087029e-05, + "loss": 2.1616, + "step": 5259 + }, + { + "epoch": 0.18, + "grad_norm": 0.664771318435669, + "learning_rate": 1.8747207443347997e-05, + "loss": 2.1972, + "step": 5260 + }, + { + "epoch": 0.18, + "grad_norm": 0.6660566329956055, + "learning_rate": 1.8746692287004876e-05, + "loss": 2.232, + "step": 5261 + }, + { + "epoch": 0.18, + "grad_norm": 0.6920390129089355, + "learning_rate": 1.8746177031846756e-05, + "loss": 2.2525, + "step": 5262 + }, + { + "epoch": 0.18, + "grad_norm": 0.6631890535354614, + "learning_rate": 1.874566167787945e-05, + "loss": 2.1842, + "step": 5263 + }, + { + "epoch": 0.18, + "grad_norm": 0.6929755210876465, + "learning_rate": 1.8745146225108784e-05, + "loss": 2.203, + "step": 5264 + }, + { + "epoch": 0.18, + "grad_norm": 0.6716404557228088, + "learning_rate": 1.8744630673540585e-05, + "loss": 2.303, + "step": 5265 + }, + { + "epoch": 0.18, + "grad_norm": 0.687391459941864, + "learning_rate": 1.8744115023180673e-05, + "loss": 2.177, + "step": 5266 + }, + { + "epoch": 0.18, + "grad_norm": 0.6871922016143799, + "learning_rate": 1.8743599274034866e-05, + "loss": 2.2062, + "step": 5267 + }, + { + "epoch": 0.18, + "grad_norm": 0.6913610100746155, + "learning_rate": 1.8743083426109008e-05, + "loss": 2.1569, + "step": 5268 + }, + { + "epoch": 0.18, + "grad_norm": 0.7089357972145081, + "learning_rate": 1.8742567479408914e-05, + "loss": 2.1745, + "step": 5269 + }, + { + "epoch": 0.18, + "grad_norm": 0.7007119655609131, + "learning_rate": 1.8742051433940417e-05, + "loss": 2.2804, + "step": 5270 + }, + { + "epoch": 0.18, + "grad_norm": 0.673805296421051, + "learning_rate": 1.8741535289709343e-05, + "loss": 2.1742, + "step": 5271 + }, + { + "epoch": 0.18, + "grad_norm": 0.6845331192016602, + "learning_rate": 1.874101904672153e-05, + "loss": 2.237, + "step": 5272 + }, + { + "epoch": 0.18, + "grad_norm": 0.6765787601470947, + "learning_rate": 1.8740502704982805e-05, + "loss": 2.1846, + "step": 5273 + }, + { + "epoch": 0.18, + "grad_norm": 0.6866967678070068, + "learning_rate": 1.8739986264499003e-05, + "loss": 2.2091, + "step": 5274 + }, + { + "epoch": 0.18, + "grad_norm": 0.6875186562538147, + "learning_rate": 1.8739469725275957e-05, + "loss": 2.196, + "step": 5275 + }, + { + "epoch": 0.18, + "grad_norm": 0.6828067898750305, + "learning_rate": 1.8738953087319504e-05, + "loss": 2.2236, + "step": 5276 + }, + { + "epoch": 0.18, + "grad_norm": 0.7565956711769104, + "learning_rate": 1.8738436350635484e-05, + "loss": 2.2474, + "step": 5277 + }, + { + "epoch": 0.18, + "grad_norm": 0.6970562934875488, + "learning_rate": 1.873791951522973e-05, + "loss": 2.2526, + "step": 5278 + }, + { + "epoch": 0.18, + "grad_norm": 0.7061561942100525, + "learning_rate": 1.873740258110808e-05, + "loss": 2.2714, + "step": 5279 + }, + { + "epoch": 0.18, + "grad_norm": 0.6894608736038208, + "learning_rate": 1.873688554827638e-05, + "loss": 2.2531, + "step": 5280 + }, + { + "epoch": 0.18, + "grad_norm": 0.6829431056976318, + "learning_rate": 1.8736368416740462e-05, + "loss": 2.2067, + "step": 5281 + }, + { + "epoch": 0.18, + "grad_norm": 0.7060560584068298, + "learning_rate": 1.8735851186506176e-05, + "loss": 2.2441, + "step": 5282 + }, + { + "epoch": 0.18, + "grad_norm": 0.7230420112609863, + "learning_rate": 1.8735333857579365e-05, + "loss": 2.2011, + "step": 5283 + }, + { + "epoch": 0.18, + "grad_norm": 0.7455811500549316, + "learning_rate": 1.8734816429965873e-05, + "loss": 2.1947, + "step": 5284 + }, + { + "epoch": 0.18, + "grad_norm": 0.7295905351638794, + "learning_rate": 1.8734298903671536e-05, + "loss": 2.2331, + "step": 5285 + }, + { + "epoch": 0.18, + "grad_norm": 0.7490503191947937, + "learning_rate": 1.8733781278702217e-05, + "loss": 2.2404, + "step": 5286 + }, + { + "epoch": 0.18, + "grad_norm": 0.6925051212310791, + "learning_rate": 1.873326355506375e-05, + "loss": 2.1867, + "step": 5287 + }, + { + "epoch": 0.18, + "grad_norm": 0.7150958180427551, + "learning_rate": 1.8732745732761993e-05, + "loss": 2.2145, + "step": 5288 + }, + { + "epoch": 0.18, + "grad_norm": 0.6927400231361389, + "learning_rate": 1.8732227811802794e-05, + "loss": 2.2983, + "step": 5289 + }, + { + "epoch": 0.18, + "grad_norm": 0.7035780549049377, + "learning_rate": 1.8731709792192003e-05, + "loss": 2.2311, + "step": 5290 + }, + { + "epoch": 0.18, + "grad_norm": 0.6878951191902161, + "learning_rate": 1.8731191673935466e-05, + "loss": 2.1893, + "step": 5291 + }, + { + "epoch": 0.18, + "grad_norm": 0.6932110786437988, + "learning_rate": 1.8730673457039046e-05, + "loss": 2.162, + "step": 5292 + }, + { + "epoch": 0.18, + "grad_norm": 0.695071816444397, + "learning_rate": 1.8730155141508596e-05, + "loss": 2.2531, + "step": 5293 + }, + { + "epoch": 0.18, + "grad_norm": 0.6811164021492004, + "learning_rate": 1.8729636727349966e-05, + "loss": 2.2514, + "step": 5294 + }, + { + "epoch": 0.18, + "grad_norm": 0.6868739724159241, + "learning_rate": 1.872911821456902e-05, + "loss": 2.2621, + "step": 5295 + }, + { + "epoch": 0.18, + "grad_norm": 0.697605311870575, + "learning_rate": 1.872859960317161e-05, + "loss": 2.2291, + "step": 5296 + }, + { + "epoch": 0.18, + "grad_norm": 0.6981117129325867, + "learning_rate": 1.8728080893163595e-05, + "loss": 2.1618, + "step": 5297 + }, + { + "epoch": 0.18, + "grad_norm": 0.6748373508453369, + "learning_rate": 1.872756208455084e-05, + "loss": 2.2274, + "step": 5298 + }, + { + "epoch": 0.18, + "grad_norm": 0.6913279294967651, + "learning_rate": 1.8727043177339205e-05, + "loss": 2.2371, + "step": 5299 + }, + { + "epoch": 0.18, + "grad_norm": 0.7016263008117676, + "learning_rate": 1.8726524171534546e-05, + "loss": 2.1737, + "step": 5300 + }, + { + "epoch": 0.18, + "grad_norm": 0.6757892966270447, + "learning_rate": 1.8726005067142737e-05, + "loss": 2.2235, + "step": 5301 + }, + { + "epoch": 0.18, + "grad_norm": 0.7008597254753113, + "learning_rate": 1.872548586416963e-05, + "loss": 2.3142, + "step": 5302 + }, + { + "epoch": 0.18, + "grad_norm": 0.706744909286499, + "learning_rate": 1.87249665626211e-05, + "loss": 2.2115, + "step": 5303 + }, + { + "epoch": 0.18, + "grad_norm": 0.7096339464187622, + "learning_rate": 1.8724447162503015e-05, + "loss": 2.2428, + "step": 5304 + }, + { + "epoch": 0.18, + "grad_norm": 0.703036367893219, + "learning_rate": 1.8723927663821235e-05, + "loss": 2.2323, + "step": 5305 + }, + { + "epoch": 0.18, + "grad_norm": 0.7101420164108276, + "learning_rate": 1.8723408066581634e-05, + "loss": 2.21, + "step": 5306 + }, + { + "epoch": 0.18, + "grad_norm": 0.7034243941307068, + "learning_rate": 1.8722888370790083e-05, + "loss": 2.256, + "step": 5307 + }, + { + "epoch": 0.18, + "grad_norm": 0.7019157409667969, + "learning_rate": 1.8722368576452448e-05, + "loss": 2.2387, + "step": 5308 + }, + { + "epoch": 0.18, + "grad_norm": 0.6812459230422974, + "learning_rate": 1.8721848683574605e-05, + "loss": 2.2216, + "step": 5309 + }, + { + "epoch": 0.18, + "grad_norm": 0.7246698141098022, + "learning_rate": 1.872132869216243e-05, + "loss": 2.1682, + "step": 5310 + }, + { + "epoch": 0.18, + "grad_norm": 0.695830225944519, + "learning_rate": 1.8720808602221788e-05, + "loss": 2.2236, + "step": 5311 + }, + { + "epoch": 0.18, + "grad_norm": 0.6872268319129944, + "learning_rate": 1.872028841375857e-05, + "loss": 2.2286, + "step": 5312 + }, + { + "epoch": 0.18, + "grad_norm": 0.6908929347991943, + "learning_rate": 1.871976812677864e-05, + "loss": 2.2656, + "step": 5313 + }, + { + "epoch": 0.18, + "grad_norm": 0.6971749663352966, + "learning_rate": 1.8719247741287877e-05, + "loss": 2.2842, + "step": 5314 + }, + { + "epoch": 0.18, + "grad_norm": 0.7302929162979126, + "learning_rate": 1.8718727257292168e-05, + "loss": 2.2074, + "step": 5315 + }, + { + "epoch": 0.18, + "grad_norm": 0.7205823063850403, + "learning_rate": 1.871820667479739e-05, + "loss": 2.2375, + "step": 5316 + }, + { + "epoch": 0.18, + "grad_norm": 0.6951491236686707, + "learning_rate": 1.8717685993809413e-05, + "loss": 2.2508, + "step": 5317 + }, + { + "epoch": 0.18, + "grad_norm": 0.6884263753890991, + "learning_rate": 1.8717165214334137e-05, + "loss": 2.1395, + "step": 5318 + }, + { + "epoch": 0.18, + "grad_norm": 0.6922199130058289, + "learning_rate": 1.871664433637743e-05, + "loss": 2.2473, + "step": 5319 + }, + { + "epoch": 0.18, + "grad_norm": 0.6757543087005615, + "learning_rate": 1.8716123359945192e-05, + "loss": 2.2073, + "step": 5320 + }, + { + "epoch": 0.18, + "grad_norm": 0.6590266823768616, + "learning_rate": 1.8715602285043297e-05, + "loss": 2.1763, + "step": 5321 + }, + { + "epoch": 0.18, + "grad_norm": 0.7229035496711731, + "learning_rate": 1.8715081111677636e-05, + "loss": 2.2826, + "step": 5322 + }, + { + "epoch": 0.18, + "grad_norm": 0.6956657767295837, + "learning_rate": 1.8714559839854096e-05, + "loss": 2.2112, + "step": 5323 + }, + { + "epoch": 0.18, + "grad_norm": 0.6742832064628601, + "learning_rate": 1.871403846957856e-05, + "loss": 2.1635, + "step": 5324 + }, + { + "epoch": 0.18, + "grad_norm": 0.6906077265739441, + "learning_rate": 1.8713517000856933e-05, + "loss": 2.2277, + "step": 5325 + }, + { + "epoch": 0.18, + "grad_norm": 0.6868237257003784, + "learning_rate": 1.871299543369509e-05, + "loss": 2.2257, + "step": 5326 + }, + { + "epoch": 0.18, + "grad_norm": 0.6614282727241516, + "learning_rate": 1.8712473768098937e-05, + "loss": 2.2426, + "step": 5327 + }, + { + "epoch": 0.18, + "grad_norm": 0.7293252944946289, + "learning_rate": 1.871195200407436e-05, + "loss": 2.2029, + "step": 5328 + }, + { + "epoch": 0.18, + "grad_norm": 0.6892263293266296, + "learning_rate": 1.8711430141627253e-05, + "loss": 2.1873, + "step": 5329 + }, + { + "epoch": 0.18, + "grad_norm": 0.6843459010124207, + "learning_rate": 1.8710908180763515e-05, + "loss": 2.2151, + "step": 5330 + }, + { + "epoch": 0.18, + "grad_norm": 0.6956990361213684, + "learning_rate": 1.8710386121489043e-05, + "loss": 2.2371, + "step": 5331 + }, + { + "epoch": 0.18, + "grad_norm": 0.7233136296272278, + "learning_rate": 1.8709863963809728e-05, + "loss": 2.2339, + "step": 5332 + }, + { + "epoch": 0.18, + "grad_norm": 0.693359911441803, + "learning_rate": 1.870934170773148e-05, + "loss": 2.2342, + "step": 5333 + }, + { + "epoch": 0.18, + "grad_norm": 0.6913610696792603, + "learning_rate": 1.870881935326019e-05, + "loss": 2.204, + "step": 5334 + }, + { + "epoch": 0.18, + "grad_norm": 0.6841592788696289, + "learning_rate": 1.8708296900401767e-05, + "loss": 2.2108, + "step": 5335 + }, + { + "epoch": 0.18, + "grad_norm": 0.7112038731575012, + "learning_rate": 1.8707774349162105e-05, + "loss": 2.2177, + "step": 5336 + }, + { + "epoch": 0.18, + "grad_norm": 0.6971014142036438, + "learning_rate": 1.8707251699547115e-05, + "loss": 2.1938, + "step": 5337 + }, + { + "epoch": 0.18, + "grad_norm": 0.68815678358078, + "learning_rate": 1.8706728951562696e-05, + "loss": 2.2188, + "step": 5338 + }, + { + "epoch": 0.18, + "grad_norm": 0.6814174652099609, + "learning_rate": 1.8706206105214757e-05, + "loss": 2.1766, + "step": 5339 + }, + { + "epoch": 0.18, + "grad_norm": 0.6762268543243408, + "learning_rate": 1.8705683160509203e-05, + "loss": 2.1857, + "step": 5340 + }, + { + "epoch": 0.18, + "grad_norm": 0.6923859119415283, + "learning_rate": 1.8705160117451942e-05, + "loss": 2.276, + "step": 5341 + }, + { + "epoch": 0.18, + "grad_norm": 0.6915370225906372, + "learning_rate": 1.8704636976048888e-05, + "loss": 2.1864, + "step": 5342 + }, + { + "epoch": 0.18, + "grad_norm": 0.665428102016449, + "learning_rate": 1.870411373630594e-05, + "loss": 2.218, + "step": 5343 + }, + { + "epoch": 0.18, + "grad_norm": 0.7339053750038147, + "learning_rate": 1.870359039822902e-05, + "loss": 2.2011, + "step": 5344 + }, + { + "epoch": 0.18, + "grad_norm": 0.7154248356819153, + "learning_rate": 1.8703066961824036e-05, + "loss": 2.2076, + "step": 5345 + }, + { + "epoch": 0.18, + "grad_norm": 0.7322760224342346, + "learning_rate": 1.8702543427096906e-05, + "loss": 2.1635, + "step": 5346 + }, + { + "epoch": 0.18, + "grad_norm": 0.7206581830978394, + "learning_rate": 1.8702019794053534e-05, + "loss": 2.2212, + "step": 5347 + }, + { + "epoch": 0.18, + "grad_norm": 0.7312862277030945, + "learning_rate": 1.8701496062699848e-05, + "loss": 2.1776, + "step": 5348 + }, + { + "epoch": 0.18, + "grad_norm": 0.6989860534667969, + "learning_rate": 1.8700972233041755e-05, + "loss": 2.216, + "step": 5349 + }, + { + "epoch": 0.18, + "grad_norm": 0.6877766251564026, + "learning_rate": 1.8700448305085177e-05, + "loss": 2.2133, + "step": 5350 + }, + { + "epoch": 0.18, + "grad_norm": 0.689217746257782, + "learning_rate": 1.8699924278836032e-05, + "loss": 2.159, + "step": 5351 + }, + { + "epoch": 0.18, + "grad_norm": 0.6855818629264832, + "learning_rate": 1.8699400154300244e-05, + "loss": 2.1867, + "step": 5352 + }, + { + "epoch": 0.18, + "grad_norm": 0.7007381916046143, + "learning_rate": 1.869887593148373e-05, + "loss": 2.2638, + "step": 5353 + }, + { + "epoch": 0.18, + "grad_norm": 0.7113897800445557, + "learning_rate": 1.8698351610392416e-05, + "loss": 2.2138, + "step": 5354 + }, + { + "epoch": 0.18, + "grad_norm": 0.7301239371299744, + "learning_rate": 1.869782719103222e-05, + "loss": 2.1666, + "step": 5355 + }, + { + "epoch": 0.18, + "grad_norm": 0.7006897330284119, + "learning_rate": 1.8697302673409072e-05, + "loss": 2.2525, + "step": 5356 + }, + { + "epoch": 0.18, + "grad_norm": 0.6918513774871826, + "learning_rate": 1.8696778057528896e-05, + "loss": 2.1298, + "step": 5357 + }, + { + "epoch": 0.18, + "grad_norm": 0.6716538071632385, + "learning_rate": 1.8696253343397617e-05, + "loss": 2.1695, + "step": 5358 + }, + { + "epoch": 0.18, + "grad_norm": 0.7065458297729492, + "learning_rate": 1.8695728531021165e-05, + "loss": 2.2303, + "step": 5359 + }, + { + "epoch": 0.18, + "grad_norm": 0.6769695281982422, + "learning_rate": 1.8695203620405466e-05, + "loss": 2.2553, + "step": 5360 + }, + { + "epoch": 0.18, + "grad_norm": 0.7818331122398376, + "learning_rate": 1.8694678611556455e-05, + "loss": 2.2299, + "step": 5361 + }, + { + "epoch": 0.18, + "grad_norm": 0.7667955160140991, + "learning_rate": 1.869415350448006e-05, + "loss": 2.1514, + "step": 5362 + }, + { + "epoch": 0.18, + "grad_norm": 0.6861446499824524, + "learning_rate": 1.8693628299182215e-05, + "loss": 2.1735, + "step": 5363 + }, + { + "epoch": 0.18, + "grad_norm": 0.698962390422821, + "learning_rate": 1.8693102995668847e-05, + "loss": 2.1578, + "step": 5364 + }, + { + "epoch": 0.18, + "grad_norm": 0.7044604420661926, + "learning_rate": 1.86925775939459e-05, + "loss": 2.25, + "step": 5365 + }, + { + "epoch": 0.18, + "grad_norm": 0.7175866365432739, + "learning_rate": 1.8692052094019307e-05, + "loss": 2.2016, + "step": 5366 + }, + { + "epoch": 0.18, + "grad_norm": 0.6886649131774902, + "learning_rate": 1.8691526495895002e-05, + "loss": 2.1987, + "step": 5367 + }, + { + "epoch": 0.18, + "grad_norm": 0.7320054769515991, + "learning_rate": 1.8691000799578927e-05, + "loss": 2.2625, + "step": 5368 + }, + { + "epoch": 0.18, + "grad_norm": 0.6931559443473816, + "learning_rate": 1.8690475005077016e-05, + "loss": 2.2609, + "step": 5369 + }, + { + "epoch": 0.18, + "grad_norm": 0.6913182735443115, + "learning_rate": 1.868994911239521e-05, + "loss": 2.2325, + "step": 5370 + }, + { + "epoch": 0.18, + "grad_norm": 0.7052625417709351, + "learning_rate": 1.8689423121539457e-05, + "loss": 2.2349, + "step": 5371 + }, + { + "epoch": 0.18, + "grad_norm": 0.6975575685501099, + "learning_rate": 1.868889703251569e-05, + "loss": 2.1084, + "step": 5372 + }, + { + "epoch": 0.18, + "grad_norm": 0.700147271156311, + "learning_rate": 1.8688370845329855e-05, + "loss": 2.2219, + "step": 5373 + }, + { + "epoch": 0.18, + "grad_norm": 0.695196807384491, + "learning_rate": 1.8687844559987903e-05, + "loss": 2.1564, + "step": 5374 + }, + { + "epoch": 0.18, + "grad_norm": 0.6810943484306335, + "learning_rate": 1.868731817649577e-05, + "loss": 2.2038, + "step": 5375 + }, + { + "epoch": 0.18, + "grad_norm": 0.6825370192527771, + "learning_rate": 1.8686791694859407e-05, + "loss": 2.2726, + "step": 5376 + }, + { + "epoch": 0.18, + "grad_norm": 0.6985204219818115, + "learning_rate": 1.8686265115084766e-05, + "loss": 2.2453, + "step": 5377 + }, + { + "epoch": 0.18, + "grad_norm": 0.7111895680427551, + "learning_rate": 1.868573843717779e-05, + "loss": 2.3021, + "step": 5378 + }, + { + "epoch": 0.18, + "grad_norm": 0.705970048904419, + "learning_rate": 1.8685211661144437e-05, + "loss": 2.1198, + "step": 5379 + }, + { + "epoch": 0.18, + "grad_norm": 0.6955997943878174, + "learning_rate": 1.8684684786990645e-05, + "loss": 2.2134, + "step": 5380 + }, + { + "epoch": 0.18, + "grad_norm": 0.6756146550178528, + "learning_rate": 1.8684157814722376e-05, + "loss": 2.2083, + "step": 5381 + }, + { + "epoch": 0.18, + "grad_norm": 0.6966724991798401, + "learning_rate": 1.868363074434558e-05, + "loss": 2.2801, + "step": 5382 + }, + { + "epoch": 0.18, + "grad_norm": 0.6867474317550659, + "learning_rate": 1.868310357586622e-05, + "loss": 2.1273, + "step": 5383 + }, + { + "epoch": 0.18, + "grad_norm": 0.7444121241569519, + "learning_rate": 1.868257630929024e-05, + "loss": 2.2383, + "step": 5384 + }, + { + "epoch": 0.18, + "grad_norm": 0.7392184138298035, + "learning_rate": 1.86820489446236e-05, + "loss": 2.195, + "step": 5385 + }, + { + "epoch": 0.18, + "grad_norm": 0.7271459698677063, + "learning_rate": 1.868152148187226e-05, + "loss": 2.2587, + "step": 5386 + }, + { + "epoch": 0.18, + "grad_norm": 0.7205798625946045, + "learning_rate": 1.868099392104218e-05, + "loss": 2.2178, + "step": 5387 + }, + { + "epoch": 0.18, + "grad_norm": 0.6847581267356873, + "learning_rate": 1.8680466262139318e-05, + "loss": 2.0889, + "step": 5388 + }, + { + "epoch": 0.18, + "grad_norm": 0.6811925768852234, + "learning_rate": 1.8679938505169634e-05, + "loss": 2.2135, + "step": 5389 + }, + { + "epoch": 0.18, + "grad_norm": 0.6883907914161682, + "learning_rate": 1.8679410650139095e-05, + "loss": 2.1567, + "step": 5390 + }, + { + "epoch": 0.18, + "grad_norm": 0.7297616004943848, + "learning_rate": 1.8678882697053654e-05, + "loss": 2.2658, + "step": 5391 + }, + { + "epoch": 0.18, + "grad_norm": 0.6985902786254883, + "learning_rate": 1.867835464591929e-05, + "loss": 2.1996, + "step": 5392 + }, + { + "epoch": 0.18, + "grad_norm": 0.713433563709259, + "learning_rate": 1.8677826496741957e-05, + "loss": 2.2445, + "step": 5393 + }, + { + "epoch": 0.18, + "grad_norm": 0.7662736773490906, + "learning_rate": 1.867729824952763e-05, + "loss": 2.2951, + "step": 5394 + }, + { + "epoch": 0.18, + "grad_norm": 0.682248055934906, + "learning_rate": 1.8676769904282267e-05, + "loss": 2.162, + "step": 5395 + }, + { + "epoch": 0.18, + "grad_norm": 0.7012356519699097, + "learning_rate": 1.8676241461011845e-05, + "loss": 2.2166, + "step": 5396 + }, + { + "epoch": 0.18, + "grad_norm": 0.6921445727348328, + "learning_rate": 1.8675712919722334e-05, + "loss": 2.2047, + "step": 5397 + }, + { + "epoch": 0.18, + "grad_norm": 0.7156121730804443, + "learning_rate": 1.86751842804197e-05, + "loss": 2.2358, + "step": 5398 + }, + { + "epoch": 0.18, + "grad_norm": 0.7065675854682922, + "learning_rate": 1.8674655543109922e-05, + "loss": 2.1874, + "step": 5399 + }, + { + "epoch": 0.18, + "grad_norm": 0.6988011002540588, + "learning_rate": 1.8674126707798965e-05, + "loss": 2.1563, + "step": 5400 + }, + { + "epoch": 0.18, + "grad_norm": 0.7090868353843689, + "learning_rate": 1.867359777449281e-05, + "loss": 2.2142, + "step": 5401 + }, + { + "epoch": 0.18, + "grad_norm": 0.6951003074645996, + "learning_rate": 1.867306874319743e-05, + "loss": 2.2123, + "step": 5402 + }, + { + "epoch": 0.18, + "grad_norm": 0.6858704090118408, + "learning_rate": 1.8672539613918802e-05, + "loss": 2.3237, + "step": 5403 + }, + { + "epoch": 0.18, + "grad_norm": 0.7154164910316467, + "learning_rate": 1.8672010386662908e-05, + "loss": 2.2159, + "step": 5404 + }, + { + "epoch": 0.18, + "grad_norm": 0.6896719336509705, + "learning_rate": 1.867148106143572e-05, + "loss": 2.1738, + "step": 5405 + }, + { + "epoch": 0.18, + "grad_norm": 0.6984386444091797, + "learning_rate": 1.867095163824322e-05, + "loss": 2.1847, + "step": 5406 + }, + { + "epoch": 0.18, + "grad_norm": 0.723698616027832, + "learning_rate": 1.867042211709139e-05, + "loss": 2.2102, + "step": 5407 + }, + { + "epoch": 0.18, + "grad_norm": 0.6982423067092896, + "learning_rate": 1.866989249798621e-05, + "loss": 2.2104, + "step": 5408 + }, + { + "epoch": 0.18, + "grad_norm": 0.7349892854690552, + "learning_rate": 1.8669362780933675e-05, + "loss": 2.157, + "step": 5409 + }, + { + "epoch": 0.18, + "grad_norm": 0.6859233975410461, + "learning_rate": 1.866883296593975e-05, + "loss": 2.166, + "step": 5410 + }, + { + "epoch": 0.18, + "grad_norm": 0.6601275205612183, + "learning_rate": 1.8668303053010436e-05, + "loss": 2.19, + "step": 5411 + }, + { + "epoch": 0.18, + "grad_norm": 0.7230720520019531, + "learning_rate": 1.8667773042151714e-05, + "loss": 2.1467, + "step": 5412 + }, + { + "epoch": 0.18, + "grad_norm": 0.748222291469574, + "learning_rate": 1.866724293336957e-05, + "loss": 2.2229, + "step": 5413 + }, + { + "epoch": 0.18, + "grad_norm": 0.6734126210212708, + "learning_rate": 1.8666712726669994e-05, + "loss": 2.2003, + "step": 5414 + }, + { + "epoch": 0.18, + "grad_norm": 0.7056984901428223, + "learning_rate": 1.866618242205898e-05, + "loss": 2.1362, + "step": 5415 + }, + { + "epoch": 0.18, + "grad_norm": 0.7258943319320679, + "learning_rate": 1.8665652019542512e-05, + "loss": 2.2775, + "step": 5416 + }, + { + "epoch": 0.18, + "grad_norm": 0.6757399439811707, + "learning_rate": 1.8665121519126587e-05, + "loss": 2.2203, + "step": 5417 + }, + { + "epoch": 0.18, + "grad_norm": 0.6979444026947021, + "learning_rate": 1.86645909208172e-05, + "loss": 2.2087, + "step": 5418 + }, + { + "epoch": 0.18, + "grad_norm": 0.704862117767334, + "learning_rate": 1.866406022462034e-05, + "loss": 2.2101, + "step": 5419 + }, + { + "epoch": 0.18, + "grad_norm": 0.6693013906478882, + "learning_rate": 1.866352943054201e-05, + "loss": 2.1976, + "step": 5420 + }, + { + "epoch": 0.18, + "grad_norm": 0.6967042088508606, + "learning_rate": 1.86629985385882e-05, + "loss": 2.206, + "step": 5421 + }, + { + "epoch": 0.18, + "grad_norm": 0.7242235541343689, + "learning_rate": 1.8662467548764904e-05, + "loss": 2.2046, + "step": 5422 + }, + { + "epoch": 0.18, + "grad_norm": 0.7256143093109131, + "learning_rate": 1.8661936461078133e-05, + "loss": 2.2108, + "step": 5423 + }, + { + "epoch": 0.18, + "grad_norm": 0.6766475439071655, + "learning_rate": 1.8661405275533876e-05, + "loss": 2.1661, + "step": 5424 + }, + { + "epoch": 0.18, + "grad_norm": 0.7162649631500244, + "learning_rate": 1.866087399213814e-05, + "loss": 2.2202, + "step": 5425 + }, + { + "epoch": 0.18, + "grad_norm": 0.6785038113594055, + "learning_rate": 1.8660342610896922e-05, + "loss": 2.2361, + "step": 5426 + }, + { + "epoch": 0.18, + "grad_norm": 0.7025398015975952, + "learning_rate": 1.8659811131816233e-05, + "loss": 2.2294, + "step": 5427 + }, + { + "epoch": 0.18, + "grad_norm": 0.7494419813156128, + "learning_rate": 1.8659279554902074e-05, + "loss": 2.2756, + "step": 5428 + }, + { + "epoch": 0.18, + "grad_norm": 0.67743980884552, + "learning_rate": 1.8658747880160443e-05, + "loss": 2.1855, + "step": 5429 + }, + { + "epoch": 0.18, + "grad_norm": 0.6811135411262512, + "learning_rate": 1.865821610759736e-05, + "loss": 2.2088, + "step": 5430 + }, + { + "epoch": 0.18, + "grad_norm": 0.7470099925994873, + "learning_rate": 1.8657684237218823e-05, + "loss": 2.2392, + "step": 5431 + }, + { + "epoch": 0.18, + "grad_norm": 0.6951124668121338, + "learning_rate": 1.8657152269030844e-05, + "loss": 2.2116, + "step": 5432 + }, + { + "epoch": 0.18, + "grad_norm": 0.685046911239624, + "learning_rate": 1.865662020303943e-05, + "loss": 2.1808, + "step": 5433 + }, + { + "epoch": 0.18, + "grad_norm": 0.69264817237854, + "learning_rate": 1.8656088039250595e-05, + "loss": 2.2121, + "step": 5434 + }, + { + "epoch": 0.18, + "grad_norm": 0.7065656781196594, + "learning_rate": 1.8655555777670353e-05, + "loss": 2.1962, + "step": 5435 + }, + { + "epoch": 0.18, + "grad_norm": 0.6592856049537659, + "learning_rate": 1.865502341830471e-05, + "loss": 2.1927, + "step": 5436 + }, + { + "epoch": 0.18, + "grad_norm": 0.7418520450592041, + "learning_rate": 1.8654490961159688e-05, + "loss": 2.2608, + "step": 5437 + }, + { + "epoch": 0.18, + "grad_norm": 0.7003412842750549, + "learning_rate": 1.86539584062413e-05, + "loss": 2.218, + "step": 5438 + }, + { + "epoch": 0.18, + "grad_norm": 0.715146541595459, + "learning_rate": 1.865342575355556e-05, + "loss": 2.2088, + "step": 5439 + }, + { + "epoch": 0.18, + "grad_norm": 0.6890220642089844, + "learning_rate": 1.8652893003108485e-05, + "loss": 2.283, + "step": 5440 + }, + { + "epoch": 0.18, + "grad_norm": 0.716315507888794, + "learning_rate": 1.86523601549061e-05, + "loss": 2.2476, + "step": 5441 + }, + { + "epoch": 0.18, + "grad_norm": 0.7049520611763, + "learning_rate": 1.865182720895442e-05, + "loss": 2.2387, + "step": 5442 + }, + { + "epoch": 0.18, + "grad_norm": 0.6862707734107971, + "learning_rate": 1.8651294165259464e-05, + "loss": 2.1911, + "step": 5443 + }, + { + "epoch": 0.18, + "grad_norm": 0.6874216198921204, + "learning_rate": 1.8650761023827258e-05, + "loss": 2.1584, + "step": 5444 + }, + { + "epoch": 0.18, + "grad_norm": 0.696226179599762, + "learning_rate": 1.8650227784663825e-05, + "loss": 2.1757, + "step": 5445 + }, + { + "epoch": 0.18, + "grad_norm": 0.7441384792327881, + "learning_rate": 1.8649694447775184e-05, + "loss": 2.1678, + "step": 5446 + }, + { + "epoch": 0.18, + "grad_norm": 0.6918995976448059, + "learning_rate": 1.864916101316737e-05, + "loss": 2.2048, + "step": 5447 + }, + { + "epoch": 0.18, + "grad_norm": 0.6750444173812866, + "learning_rate": 1.86486274808464e-05, + "loss": 2.1889, + "step": 5448 + }, + { + "epoch": 0.18, + "grad_norm": 0.7276250720024109, + "learning_rate": 1.8648093850818306e-05, + "loss": 2.1614, + "step": 5449 + }, + { + "epoch": 0.18, + "grad_norm": 0.692226231098175, + "learning_rate": 1.864756012308912e-05, + "loss": 2.1936, + "step": 5450 + }, + { + "epoch": 0.18, + "grad_norm": 0.6868963837623596, + "learning_rate": 1.864702629766486e-05, + "loss": 2.2254, + "step": 5451 + }, + { + "epoch": 0.18, + "grad_norm": 0.7053418159484863, + "learning_rate": 1.864649237455157e-05, + "loss": 2.2325, + "step": 5452 + }, + { + "epoch": 0.18, + "grad_norm": 0.6951059103012085, + "learning_rate": 1.8645958353755276e-05, + "loss": 2.1848, + "step": 5453 + }, + { + "epoch": 0.18, + "grad_norm": 0.6877989768981934, + "learning_rate": 1.8645424235282007e-05, + "loss": 2.2899, + "step": 5454 + }, + { + "epoch": 0.18, + "grad_norm": 0.6931294798851013, + "learning_rate": 1.8644890019137806e-05, + "loss": 2.267, + "step": 5455 + }, + { + "epoch": 0.18, + "grad_norm": 0.7140448689460754, + "learning_rate": 1.8644355705328707e-05, + "loss": 2.2686, + "step": 5456 + }, + { + "epoch": 0.18, + "grad_norm": 0.6684128642082214, + "learning_rate": 1.8643821293860737e-05, + "loss": 2.1757, + "step": 5457 + }, + { + "epoch": 0.18, + "grad_norm": 0.6734837889671326, + "learning_rate": 1.8643286784739945e-05, + "loss": 2.1974, + "step": 5458 + }, + { + "epoch": 0.18, + "grad_norm": 0.674057126045227, + "learning_rate": 1.8642752177972362e-05, + "loss": 2.2014, + "step": 5459 + }, + { + "epoch": 0.18, + "grad_norm": 0.6899111866950989, + "learning_rate": 1.8642217473564028e-05, + "loss": 2.1724, + "step": 5460 + }, + { + "epoch": 0.18, + "grad_norm": 0.6900359392166138, + "learning_rate": 1.8641682671520987e-05, + "loss": 2.2603, + "step": 5461 + }, + { + "epoch": 0.18, + "grad_norm": 0.6928248405456543, + "learning_rate": 1.8641147771849282e-05, + "loss": 2.2622, + "step": 5462 + }, + { + "epoch": 0.18, + "grad_norm": 0.6716713309288025, + "learning_rate": 1.8640612774554952e-05, + "loss": 2.1786, + "step": 5463 + }, + { + "epoch": 0.18, + "grad_norm": 0.6904696226119995, + "learning_rate": 1.8640077679644043e-05, + "loss": 2.2464, + "step": 5464 + }, + { + "epoch": 0.18, + "grad_norm": 0.6997979879379272, + "learning_rate": 1.86395424871226e-05, + "loss": 2.2752, + "step": 5465 + }, + { + "epoch": 0.18, + "grad_norm": 0.7003740072250366, + "learning_rate": 1.8639007196996666e-05, + "loss": 2.2966, + "step": 5466 + }, + { + "epoch": 0.18, + "grad_norm": 0.708040177822113, + "learning_rate": 1.8638471809272298e-05, + "loss": 2.1917, + "step": 5467 + }, + { + "epoch": 0.18, + "grad_norm": 0.7367563247680664, + "learning_rate": 1.8637936323955535e-05, + "loss": 2.3012, + "step": 5468 + }, + { + "epoch": 0.18, + "grad_norm": 0.7042673230171204, + "learning_rate": 1.863740074105243e-05, + "loss": 2.2485, + "step": 5469 + }, + { + "epoch": 0.18, + "grad_norm": 0.6951615810394287, + "learning_rate": 1.863686506056903e-05, + "loss": 2.1976, + "step": 5470 + }, + { + "epoch": 0.18, + "grad_norm": 0.7056179642677307, + "learning_rate": 1.8636329282511396e-05, + "loss": 2.2954, + "step": 5471 + }, + { + "epoch": 0.18, + "grad_norm": 0.6767618060112, + "learning_rate": 1.863579340688557e-05, + "loss": 2.2458, + "step": 5472 + }, + { + "epoch": 0.18, + "grad_norm": 0.6723634004592896, + "learning_rate": 1.8635257433697617e-05, + "loss": 2.1689, + "step": 5473 + }, + { + "epoch": 0.18, + "grad_norm": 0.702843189239502, + "learning_rate": 1.863472136295358e-05, + "loss": 2.1316, + "step": 5474 + }, + { + "epoch": 0.18, + "grad_norm": 0.6565383076667786, + "learning_rate": 1.8634185194659526e-05, + "loss": 2.1886, + "step": 5475 + }, + { + "epoch": 0.18, + "grad_norm": 0.6981648206710815, + "learning_rate": 1.8633648928821505e-05, + "loss": 2.2147, + "step": 5476 + }, + { + "epoch": 0.18, + "grad_norm": 0.7100622653961182, + "learning_rate": 1.863311256544558e-05, + "loss": 2.2265, + "step": 5477 + }, + { + "epoch": 0.18, + "grad_norm": 0.694697916507721, + "learning_rate": 1.863257610453781e-05, + "loss": 2.2423, + "step": 5478 + }, + { + "epoch": 0.18, + "grad_norm": 0.6915003657341003, + "learning_rate": 1.863203954610425e-05, + "loss": 2.2446, + "step": 5479 + }, + { + "epoch": 0.18, + "grad_norm": 0.7234693169593811, + "learning_rate": 1.863150289015097e-05, + "loss": 2.2404, + "step": 5480 + }, + { + "epoch": 0.18, + "grad_norm": 0.6751726865768433, + "learning_rate": 1.8630966136684028e-05, + "loss": 2.1591, + "step": 5481 + }, + { + "epoch": 0.18, + "grad_norm": 0.7268427014350891, + "learning_rate": 1.8630429285709488e-05, + "loss": 2.2034, + "step": 5482 + }, + { + "epoch": 0.18, + "grad_norm": 0.6962007284164429, + "learning_rate": 1.8629892337233416e-05, + "loss": 2.1773, + "step": 5483 + }, + { + "epoch": 0.18, + "grad_norm": 0.6734541058540344, + "learning_rate": 1.8629355291261876e-05, + "loss": 2.2747, + "step": 5484 + }, + { + "epoch": 0.18, + "grad_norm": 0.6871586441993713, + "learning_rate": 1.862881814780094e-05, + "loss": 2.1693, + "step": 5485 + }, + { + "epoch": 0.18, + "grad_norm": 0.6941714286804199, + "learning_rate": 1.8628280906856676e-05, + "loss": 2.1657, + "step": 5486 + }, + { + "epoch": 0.18, + "grad_norm": 0.6855652928352356, + "learning_rate": 1.8627743568435146e-05, + "loss": 2.172, + "step": 5487 + }, + { + "epoch": 0.18, + "grad_norm": 0.7262262105941772, + "learning_rate": 1.8627206132542428e-05, + "loss": 2.246, + "step": 5488 + }, + { + "epoch": 0.18, + "grad_norm": 0.6726296544075012, + "learning_rate": 1.862666859918459e-05, + "loss": 2.2458, + "step": 5489 + }, + { + "epoch": 0.18, + "grad_norm": 0.6849536299705505, + "learning_rate": 1.862613096836771e-05, + "loss": 2.2036, + "step": 5490 + }, + { + "epoch": 0.18, + "grad_norm": 0.7037539482116699, + "learning_rate": 1.862559324009785e-05, + "loss": 2.1393, + "step": 5491 + }, + { + "epoch": 0.18, + "grad_norm": 0.7455698251724243, + "learning_rate": 1.8625055414381097e-05, + "loss": 2.2794, + "step": 5492 + }, + { + "epoch": 0.18, + "grad_norm": 0.671033501625061, + "learning_rate": 1.8624517491223525e-05, + "loss": 2.1886, + "step": 5493 + }, + { + "epoch": 0.18, + "grad_norm": 0.7034931182861328, + "learning_rate": 1.8623979470631207e-05, + "loss": 2.21, + "step": 5494 + }, + { + "epoch": 0.18, + "grad_norm": 0.7348529100418091, + "learning_rate": 1.862344135261022e-05, + "loss": 2.1648, + "step": 5495 + }, + { + "epoch": 0.18, + "grad_norm": 0.6792471408843994, + "learning_rate": 1.862290313716665e-05, + "loss": 2.1797, + "step": 5496 + }, + { + "epoch": 0.18, + "grad_norm": 0.6816999316215515, + "learning_rate": 1.862236482430657e-05, + "loss": 2.167, + "step": 5497 + }, + { + "epoch": 0.18, + "grad_norm": 0.71730637550354, + "learning_rate": 1.862182641403607e-05, + "loss": 2.2021, + "step": 5498 + }, + { + "epoch": 0.18, + "grad_norm": 0.7005695104598999, + "learning_rate": 1.8621287906361227e-05, + "loss": 2.2008, + "step": 5499 + }, + { + "epoch": 0.18, + "grad_norm": 0.7150699496269226, + "learning_rate": 1.8620749301288125e-05, + "loss": 2.2006, + "step": 5500 + }, + { + "epoch": 0.18, + "grad_norm": 0.7294042110443115, + "learning_rate": 1.862021059882285e-05, + "loss": 2.1673, + "step": 5501 + }, + { + "epoch": 0.18, + "grad_norm": 0.6940259337425232, + "learning_rate": 1.861967179897149e-05, + "loss": 2.2113, + "step": 5502 + }, + { + "epoch": 0.18, + "grad_norm": 0.6767745614051819, + "learning_rate": 1.8619132901740126e-05, + "loss": 2.2421, + "step": 5503 + }, + { + "epoch": 0.18, + "grad_norm": 0.7345863580703735, + "learning_rate": 1.8618593907134847e-05, + "loss": 2.2711, + "step": 5504 + }, + { + "epoch": 0.18, + "grad_norm": 0.7103909254074097, + "learning_rate": 1.8618054815161752e-05, + "loss": 2.2055, + "step": 5505 + }, + { + "epoch": 0.18, + "grad_norm": 0.7215567231178284, + "learning_rate": 1.8617515625826922e-05, + "loss": 2.2516, + "step": 5506 + }, + { + "epoch": 0.18, + "grad_norm": 0.7726604342460632, + "learning_rate": 1.8616976339136446e-05, + "loss": 2.1847, + "step": 5507 + }, + { + "epoch": 0.18, + "grad_norm": 0.6908069849014282, + "learning_rate": 1.861643695509643e-05, + "loss": 2.2314, + "step": 5508 + }, + { + "epoch": 0.18, + "grad_norm": 0.7274362444877625, + "learning_rate": 1.861589747371295e-05, + "loss": 2.1753, + "step": 5509 + }, + { + "epoch": 0.18, + "grad_norm": 0.6759147644042969, + "learning_rate": 1.8615357894992116e-05, + "loss": 2.248, + "step": 5510 + }, + { + "epoch": 0.18, + "grad_norm": 0.7180452346801758, + "learning_rate": 1.8614818218940015e-05, + "loss": 2.2368, + "step": 5511 + }, + { + "epoch": 0.18, + "grad_norm": 0.7551531791687012, + "learning_rate": 1.8614278445562748e-05, + "loss": 2.1844, + "step": 5512 + }, + { + "epoch": 0.18, + "grad_norm": 0.6935170888900757, + "learning_rate": 1.8613738574866413e-05, + "loss": 2.1608, + "step": 5513 + }, + { + "epoch": 0.18, + "grad_norm": 0.6883781552314758, + "learning_rate": 1.8613198606857105e-05, + "loss": 2.2857, + "step": 5514 + }, + { + "epoch": 0.18, + "grad_norm": 0.6902933716773987, + "learning_rate": 1.8612658541540924e-05, + "loss": 2.1987, + "step": 5515 + }, + { + "epoch": 0.18, + "grad_norm": 0.6875550746917725, + "learning_rate": 1.861211837892398e-05, + "loss": 2.2228, + "step": 5516 + }, + { + "epoch": 0.18, + "grad_norm": 0.6723427772521973, + "learning_rate": 1.8611578119012367e-05, + "loss": 2.2504, + "step": 5517 + }, + { + "epoch": 0.18, + "grad_norm": 0.6614521741867065, + "learning_rate": 1.8611037761812193e-05, + "loss": 2.1557, + "step": 5518 + }, + { + "epoch": 0.18, + "grad_norm": 0.7107515931129456, + "learning_rate": 1.861049730732956e-05, + "loss": 2.3023, + "step": 5519 + }, + { + "epoch": 0.18, + "grad_norm": 0.6961677670478821, + "learning_rate": 1.8609956755570576e-05, + "loss": 2.3136, + "step": 5520 + }, + { + "epoch": 0.18, + "grad_norm": 0.7206357717514038, + "learning_rate": 1.860941610654134e-05, + "loss": 2.1585, + "step": 5521 + }, + { + "epoch": 0.18, + "grad_norm": 0.6987873315811157, + "learning_rate": 1.8608875360247977e-05, + "loss": 2.2404, + "step": 5522 + }, + { + "epoch": 0.18, + "grad_norm": 0.7084365487098694, + "learning_rate": 1.860833451669658e-05, + "loss": 2.2177, + "step": 5523 + }, + { + "epoch": 0.18, + "grad_norm": 0.7131112813949585, + "learning_rate": 1.860779357589326e-05, + "loss": 2.1164, + "step": 5524 + }, + { + "epoch": 0.18, + "grad_norm": 0.6868016719818115, + "learning_rate": 1.8607252537844142e-05, + "loss": 2.1786, + "step": 5525 + }, + { + "epoch": 0.18, + "grad_norm": 0.6579523086547852, + "learning_rate": 1.860671140255532e-05, + "loss": 2.2116, + "step": 5526 + }, + { + "epoch": 0.18, + "grad_norm": 0.6694238185882568, + "learning_rate": 1.8606170170032922e-05, + "loss": 2.1808, + "step": 5527 + }, + { + "epoch": 0.18, + "grad_norm": 0.671684741973877, + "learning_rate": 1.8605628840283057e-05, + "loss": 2.2538, + "step": 5528 + }, + { + "epoch": 0.18, + "grad_norm": 0.6888987421989441, + "learning_rate": 1.860508741331184e-05, + "loss": 2.2145, + "step": 5529 + }, + { + "epoch": 0.18, + "grad_norm": 0.6738946437835693, + "learning_rate": 1.8604545889125387e-05, + "loss": 2.2444, + "step": 5530 + }, + { + "epoch": 0.18, + "grad_norm": 0.7116053700447083, + "learning_rate": 1.860400426772982e-05, + "loss": 2.1907, + "step": 5531 + }, + { + "epoch": 0.18, + "grad_norm": 0.6757642030715942, + "learning_rate": 1.8603462549131255e-05, + "loss": 2.2448, + "step": 5532 + }, + { + "epoch": 0.18, + "grad_norm": 0.7053086161613464, + "learning_rate": 1.860292073333581e-05, + "loss": 2.2386, + "step": 5533 + }, + { + "epoch": 0.18, + "grad_norm": 0.6668089628219604, + "learning_rate": 1.860237882034961e-05, + "loss": 2.1735, + "step": 5534 + }, + { + "epoch": 0.18, + "grad_norm": 0.6910253167152405, + "learning_rate": 1.8601836810178775e-05, + "loss": 2.2152, + "step": 5535 + }, + { + "epoch": 0.18, + "grad_norm": 0.6880805492401123, + "learning_rate": 1.860129470282943e-05, + "loss": 2.1959, + "step": 5536 + }, + { + "epoch": 0.18, + "grad_norm": 0.6861456632614136, + "learning_rate": 1.8600752498307696e-05, + "loss": 2.2708, + "step": 5537 + }, + { + "epoch": 0.18, + "grad_norm": 0.7295663952827454, + "learning_rate": 1.8600210196619704e-05, + "loss": 2.2496, + "step": 5538 + }, + { + "epoch": 0.18, + "grad_norm": 0.6734892129898071, + "learning_rate": 1.859966779777158e-05, + "loss": 2.2155, + "step": 5539 + }, + { + "epoch": 0.18, + "grad_norm": 0.6983523368835449, + "learning_rate": 1.8599125301769438e-05, + "loss": 2.1859, + "step": 5540 + }, + { + "epoch": 0.18, + "grad_norm": 0.7135183215141296, + "learning_rate": 1.8598582708619428e-05, + "loss": 2.1719, + "step": 5541 + }, + { + "epoch": 0.18, + "grad_norm": 0.7493467926979065, + "learning_rate": 1.8598040018327665e-05, + "loss": 2.2675, + "step": 5542 + }, + { + "epoch": 0.18, + "grad_norm": 0.7057217359542847, + "learning_rate": 1.8597497230900288e-05, + "loss": 2.1965, + "step": 5543 + }, + { + "epoch": 0.18, + "grad_norm": 0.670571506023407, + "learning_rate": 1.8596954346343423e-05, + "loss": 2.1929, + "step": 5544 + }, + { + "epoch": 0.18, + "grad_norm": 0.6801275610923767, + "learning_rate": 1.859641136466321e-05, + "loss": 2.2156, + "step": 5545 + }, + { + "epoch": 0.18, + "grad_norm": 0.7076669335365295, + "learning_rate": 1.8595868285865775e-05, + "loss": 2.1874, + "step": 5546 + }, + { + "epoch": 0.18, + "grad_norm": 0.7053273320198059, + "learning_rate": 1.859532510995726e-05, + "loss": 2.2453, + "step": 5547 + }, + { + "epoch": 0.18, + "grad_norm": 0.7127453088760376, + "learning_rate": 1.8594781836943797e-05, + "loss": 2.1342, + "step": 5548 + }, + { + "epoch": 0.18, + "grad_norm": 0.7153468728065491, + "learning_rate": 1.8594238466831526e-05, + "loss": 2.1582, + "step": 5549 + }, + { + "epoch": 0.18, + "grad_norm": 0.6730310916900635, + "learning_rate": 1.859369499962659e-05, + "loss": 2.2399, + "step": 5550 + }, + { + "epoch": 0.18, + "grad_norm": 0.7787979245185852, + "learning_rate": 1.859315143533512e-05, + "loss": 2.1869, + "step": 5551 + }, + { + "epoch": 0.18, + "grad_norm": 0.7161830067634583, + "learning_rate": 1.8592607773963262e-05, + "loss": 2.2198, + "step": 5552 + }, + { + "epoch": 0.18, + "grad_norm": 0.7131355404853821, + "learning_rate": 1.859206401551716e-05, + "loss": 2.1467, + "step": 5553 + }, + { + "epoch": 0.18, + "grad_norm": 0.6888655424118042, + "learning_rate": 1.8591520160002953e-05, + "loss": 2.1874, + "step": 5554 + }, + { + "epoch": 0.18, + "grad_norm": 0.6643336415290833, + "learning_rate": 1.8590976207426784e-05, + "loss": 2.236, + "step": 5555 + }, + { + "epoch": 0.18, + "grad_norm": 0.7004115581512451, + "learning_rate": 1.85904321577948e-05, + "loss": 2.2059, + "step": 5556 + }, + { + "epoch": 0.18, + "grad_norm": 0.7037082314491272, + "learning_rate": 1.8589888011113154e-05, + "loss": 2.2197, + "step": 5557 + }, + { + "epoch": 0.18, + "grad_norm": 0.740204393863678, + "learning_rate": 1.858934376738798e-05, + "loss": 2.2869, + "step": 5558 + }, + { + "epoch": 0.18, + "grad_norm": 0.7056680917739868, + "learning_rate": 1.8588799426625438e-05, + "loss": 2.2085, + "step": 5559 + }, + { + "epoch": 0.18, + "grad_norm": 0.7074881196022034, + "learning_rate": 1.8588254988831674e-05, + "loss": 2.2235, + "step": 5560 + }, + { + "epoch": 0.19, + "grad_norm": 0.6911839246749878, + "learning_rate": 1.8587710454012837e-05, + "loss": 2.2555, + "step": 5561 + }, + { + "epoch": 0.19, + "grad_norm": 0.6697965264320374, + "learning_rate": 1.858716582217508e-05, + "loss": 2.2001, + "step": 5562 + }, + { + "epoch": 0.19, + "grad_norm": 0.678257405757904, + "learning_rate": 1.8586621093324555e-05, + "loss": 2.2246, + "step": 5563 + }, + { + "epoch": 0.19, + "grad_norm": 0.6960854530334473, + "learning_rate": 1.858607626746742e-05, + "loss": 2.2236, + "step": 5564 + }, + { + "epoch": 0.19, + "grad_norm": 0.7796299457550049, + "learning_rate": 1.8585531344609827e-05, + "loss": 2.3053, + "step": 5565 + }, + { + "epoch": 0.19, + "grad_norm": 0.6959280967712402, + "learning_rate": 1.858498632475793e-05, + "loss": 2.1606, + "step": 5566 + }, + { + "epoch": 0.19, + "grad_norm": 0.7102519273757935, + "learning_rate": 1.858444120791789e-05, + "loss": 2.2322, + "step": 5567 + }, + { + "epoch": 0.19, + "grad_norm": 0.7155406475067139, + "learning_rate": 1.8583895994095864e-05, + "loss": 2.2198, + "step": 5568 + }, + { + "epoch": 0.19, + "grad_norm": 0.6744106411933899, + "learning_rate": 1.8583350683298015e-05, + "loss": 2.1861, + "step": 5569 + }, + { + "epoch": 0.19, + "grad_norm": 0.7172953486442566, + "learning_rate": 1.8582805275530495e-05, + "loss": 2.2601, + "step": 5570 + }, + { + "epoch": 0.19, + "grad_norm": 0.7674827575683594, + "learning_rate": 1.8582259770799475e-05, + "loss": 2.2199, + "step": 5571 + }, + { + "epoch": 0.19, + "grad_norm": 0.6964028477668762, + "learning_rate": 1.858171416911111e-05, + "loss": 2.2127, + "step": 5572 + }, + { + "epoch": 0.19, + "grad_norm": 0.6877376437187195, + "learning_rate": 1.8581168470471572e-05, + "loss": 2.2473, + "step": 5573 + }, + { + "epoch": 0.19, + "grad_norm": 0.6967625617980957, + "learning_rate": 1.858062267488702e-05, + "loss": 2.1633, + "step": 5574 + }, + { + "epoch": 0.19, + "grad_norm": 0.6951371431350708, + "learning_rate": 1.858007678236362e-05, + "loss": 2.2336, + "step": 5575 + }, + { + "epoch": 0.19, + "grad_norm": 0.7015105485916138, + "learning_rate": 1.8579530792907545e-05, + "loss": 2.2123, + "step": 5576 + }, + { + "epoch": 0.19, + "grad_norm": 0.6948661208152771, + "learning_rate": 1.8578984706524953e-05, + "loss": 2.2136, + "step": 5577 + }, + { + "epoch": 0.19, + "grad_norm": 0.726847767829895, + "learning_rate": 1.8578438523222025e-05, + "loss": 2.2187, + "step": 5578 + }, + { + "epoch": 0.19, + "grad_norm": 0.726252019405365, + "learning_rate": 1.8577892243004925e-05, + "loss": 2.2335, + "step": 5579 + }, + { + "epoch": 0.19, + "grad_norm": 0.7257281541824341, + "learning_rate": 1.8577345865879822e-05, + "loss": 2.1701, + "step": 5580 + }, + { + "epoch": 0.19, + "grad_norm": 0.7173301577568054, + "learning_rate": 1.8576799391852897e-05, + "loss": 2.207, + "step": 5581 + }, + { + "epoch": 0.19, + "grad_norm": 0.7347779273986816, + "learning_rate": 1.8576252820930315e-05, + "loss": 2.2597, + "step": 5582 + }, + { + "epoch": 0.19, + "grad_norm": 0.7081858515739441, + "learning_rate": 1.8575706153118256e-05, + "loss": 2.2277, + "step": 5583 + }, + { + "epoch": 0.19, + "grad_norm": 0.6845013499259949, + "learning_rate": 1.8575159388422895e-05, + "loss": 2.1899, + "step": 5584 + }, + { + "epoch": 0.19, + "grad_norm": 0.7392457127571106, + "learning_rate": 1.857461252685041e-05, + "loss": 2.1537, + "step": 5585 + }, + { + "epoch": 0.19, + "grad_norm": 0.7651543021202087, + "learning_rate": 1.857406556840698e-05, + "loss": 2.1385, + "step": 5586 + }, + { + "epoch": 0.19, + "grad_norm": 0.6975187659263611, + "learning_rate": 1.857351851309878e-05, + "loss": 2.2183, + "step": 5587 + }, + { + "epoch": 0.19, + "grad_norm": 0.6869101524353027, + "learning_rate": 1.857297136093199e-05, + "loss": 2.2096, + "step": 5588 + }, + { + "epoch": 0.19, + "grad_norm": 0.7008610963821411, + "learning_rate": 1.8572424111912796e-05, + "loss": 2.2878, + "step": 5589 + }, + { + "epoch": 0.19, + "grad_norm": 0.7367783188819885, + "learning_rate": 1.8571876766047377e-05, + "loss": 2.3027, + "step": 5590 + }, + { + "epoch": 0.19, + "grad_norm": 0.6932119131088257, + "learning_rate": 1.8571329323341918e-05, + "loss": 2.2268, + "step": 5591 + }, + { + "epoch": 0.19, + "grad_norm": 0.7032895088195801, + "learning_rate": 1.8570781783802605e-05, + "loss": 2.1581, + "step": 5592 + }, + { + "epoch": 0.19, + "grad_norm": 0.7264770865440369, + "learning_rate": 1.8570234147435622e-05, + "loss": 2.1819, + "step": 5593 + }, + { + "epoch": 0.19, + "grad_norm": 0.6944084763526917, + "learning_rate": 1.8569686414247156e-05, + "loss": 2.1661, + "step": 5594 + }, + { + "epoch": 0.19, + "grad_norm": 0.6666220426559448, + "learning_rate": 1.8569138584243393e-05, + "loss": 2.1788, + "step": 5595 + }, + { + "epoch": 0.19, + "grad_norm": 0.6838375926017761, + "learning_rate": 1.8568590657430527e-05, + "loss": 2.2102, + "step": 5596 + }, + { + "epoch": 0.19, + "grad_norm": 0.7171152830123901, + "learning_rate": 1.8568042633814745e-05, + "loss": 2.2683, + "step": 5597 + }, + { + "epoch": 0.19, + "grad_norm": 0.7188552618026733, + "learning_rate": 1.856749451340224e-05, + "loss": 2.1678, + "step": 5598 + }, + { + "epoch": 0.19, + "grad_norm": 0.7029561400413513, + "learning_rate": 1.85669462961992e-05, + "loss": 2.1847, + "step": 5599 + }, + { + "epoch": 0.19, + "grad_norm": 0.6970089077949524, + "learning_rate": 1.8566397982211824e-05, + "loss": 2.1656, + "step": 5600 + }, + { + "epoch": 0.19, + "grad_norm": 0.699612557888031, + "learning_rate": 1.85658495714463e-05, + "loss": 2.241, + "step": 5601 + }, + { + "epoch": 0.19, + "grad_norm": 0.7196010947227478, + "learning_rate": 1.8565301063908835e-05, + "loss": 2.2421, + "step": 5602 + }, + { + "epoch": 0.19, + "grad_norm": 0.6825580596923828, + "learning_rate": 1.856475245960561e-05, + "loss": 2.2258, + "step": 5603 + }, + { + "epoch": 0.19, + "grad_norm": 0.6924556493759155, + "learning_rate": 1.856420375854284e-05, + "loss": 2.1826, + "step": 5604 + }, + { + "epoch": 0.19, + "grad_norm": 0.6784041523933411, + "learning_rate": 1.8563654960726708e-05, + "loss": 2.1784, + "step": 5605 + }, + { + "epoch": 0.19, + "grad_norm": 0.7094051837921143, + "learning_rate": 1.8563106066163422e-05, + "loss": 2.2358, + "step": 5606 + }, + { + "epoch": 0.19, + "grad_norm": 0.673248291015625, + "learning_rate": 1.8562557074859183e-05, + "loss": 2.1545, + "step": 5607 + }, + { + "epoch": 0.19, + "grad_norm": 0.6802188754081726, + "learning_rate": 1.8562007986820192e-05, + "loss": 2.2248, + "step": 5608 + }, + { + "epoch": 0.19, + "grad_norm": 0.6944574117660522, + "learning_rate": 1.8561458802052655e-05, + "loss": 2.1954, + "step": 5609 + }, + { + "epoch": 0.19, + "grad_norm": 0.6873819231987, + "learning_rate": 1.856090952056277e-05, + "loss": 2.229, + "step": 5610 + }, + { + "epoch": 0.19, + "grad_norm": 0.6994902491569519, + "learning_rate": 1.856036014235675e-05, + "loss": 2.1363, + "step": 5611 + }, + { + "epoch": 0.19, + "grad_norm": 0.7005643248558044, + "learning_rate": 1.85598106674408e-05, + "loss": 2.199, + "step": 5612 + }, + { + "epoch": 0.19, + "grad_norm": 0.6903079748153687, + "learning_rate": 1.855926109582112e-05, + "loss": 2.2653, + "step": 5613 + }, + { + "epoch": 0.19, + "grad_norm": 0.7384333610534668, + "learning_rate": 1.8558711427503924e-05, + "loss": 2.1607, + "step": 5614 + }, + { + "epoch": 0.19, + "grad_norm": 0.702040433883667, + "learning_rate": 1.8558161662495427e-05, + "loss": 2.1642, + "step": 5615 + }, + { + "epoch": 0.19, + "grad_norm": 0.7769565582275391, + "learning_rate": 1.8557611800801835e-05, + "loss": 2.2698, + "step": 5616 + }, + { + "epoch": 0.19, + "grad_norm": 0.6853185892105103, + "learning_rate": 1.855706184242936e-05, + "loss": 2.1464, + "step": 5617 + }, + { + "epoch": 0.19, + "grad_norm": 0.7219986319541931, + "learning_rate": 1.855651178738421e-05, + "loss": 2.2501, + "step": 5618 + }, + { + "epoch": 0.19, + "grad_norm": 0.7006060481071472, + "learning_rate": 1.855596163567261e-05, + "loss": 2.1989, + "step": 5619 + }, + { + "epoch": 0.19, + "grad_norm": 0.694694459438324, + "learning_rate": 1.8555411387300765e-05, + "loss": 2.2333, + "step": 5620 + }, + { + "epoch": 0.19, + "grad_norm": 0.688143253326416, + "learning_rate": 1.85548610422749e-05, + "loss": 2.2051, + "step": 5621 + }, + { + "epoch": 0.19, + "grad_norm": 0.6829541921615601, + "learning_rate": 1.8554310600601227e-05, + "loss": 2.1406, + "step": 5622 + }, + { + "epoch": 0.19, + "grad_norm": 0.6947133541107178, + "learning_rate": 1.8553760062285967e-05, + "loss": 2.1565, + "step": 5623 + }, + { + "epoch": 0.19, + "grad_norm": 0.6916519999504089, + "learning_rate": 1.8553209427335337e-05, + "loss": 2.1921, + "step": 5624 + }, + { + "epoch": 0.19, + "grad_norm": 0.7105448842048645, + "learning_rate": 1.8552658695755558e-05, + "loss": 2.1853, + "step": 5625 + }, + { + "epoch": 0.19, + "grad_norm": 0.7299433946609497, + "learning_rate": 1.8552107867552856e-05, + "loss": 2.1408, + "step": 5626 + }, + { + "epoch": 0.19, + "grad_norm": 0.677615225315094, + "learning_rate": 1.8551556942733454e-05, + "loss": 2.1561, + "step": 5627 + }, + { + "epoch": 0.19, + "grad_norm": 0.7708123922348022, + "learning_rate": 1.855100592130357e-05, + "loss": 2.1148, + "step": 5628 + }, + { + "epoch": 0.19, + "grad_norm": 0.7076684832572937, + "learning_rate": 1.8550454803269434e-05, + "loss": 2.2088, + "step": 5629 + }, + { + "epoch": 0.19, + "grad_norm": 0.76951003074646, + "learning_rate": 1.854990358863727e-05, + "loss": 2.2664, + "step": 5630 + }, + { + "epoch": 0.19, + "grad_norm": 0.6916471719741821, + "learning_rate": 1.8549352277413307e-05, + "loss": 2.1557, + "step": 5631 + }, + { + "epoch": 0.19, + "grad_norm": 0.7412891983985901, + "learning_rate": 1.8548800869603767e-05, + "loss": 2.2193, + "step": 5632 + }, + { + "epoch": 0.19, + "grad_norm": 0.691865086555481, + "learning_rate": 1.854824936521489e-05, + "loss": 2.1714, + "step": 5633 + }, + { + "epoch": 0.19, + "grad_norm": 0.7263773083686829, + "learning_rate": 1.8547697764252902e-05, + "loss": 2.1972, + "step": 5634 + }, + { + "epoch": 0.19, + "grad_norm": 0.6993486881256104, + "learning_rate": 1.8547146066724034e-05, + "loss": 2.2254, + "step": 5635 + }, + { + "epoch": 0.19, + "grad_norm": 0.7502067685127258, + "learning_rate": 1.8546594272634518e-05, + "loss": 2.2153, + "step": 5636 + }, + { + "epoch": 0.19, + "grad_norm": 0.7296717762947083, + "learning_rate": 1.854604238199059e-05, + "loss": 2.1962, + "step": 5637 + }, + { + "epoch": 0.19, + "grad_norm": 0.6980537176132202, + "learning_rate": 1.854549039479848e-05, + "loss": 2.1625, + "step": 5638 + }, + { + "epoch": 0.19, + "grad_norm": 0.7148882150650024, + "learning_rate": 1.854493831106443e-05, + "loss": 2.1139, + "step": 5639 + }, + { + "epoch": 0.19, + "grad_norm": 0.6928771734237671, + "learning_rate": 1.8544386130794674e-05, + "loss": 2.2283, + "step": 5640 + }, + { + "epoch": 0.19, + "grad_norm": 0.7212710380554199, + "learning_rate": 1.8543833853995454e-05, + "loss": 2.1868, + "step": 5641 + }, + { + "epoch": 0.19, + "grad_norm": 0.7183309197425842, + "learning_rate": 1.8543281480673e-05, + "loss": 2.1854, + "step": 5642 + }, + { + "epoch": 0.19, + "grad_norm": 0.740610659122467, + "learning_rate": 1.8542729010833565e-05, + "loss": 2.1671, + "step": 5643 + }, + { + "epoch": 0.19, + "grad_norm": 0.6828684210777283, + "learning_rate": 1.854217644448338e-05, + "loss": 2.1862, + "step": 5644 + }, + { + "epoch": 0.19, + "grad_norm": 0.6876583099365234, + "learning_rate": 1.8541623781628694e-05, + "loss": 2.1578, + "step": 5645 + }, + { + "epoch": 0.19, + "grad_norm": 0.6859642863273621, + "learning_rate": 1.854107102227575e-05, + "loss": 2.189, + "step": 5646 + }, + { + "epoch": 0.19, + "grad_norm": 0.7046186923980713, + "learning_rate": 1.8540518166430786e-05, + "loss": 2.1867, + "step": 5647 + }, + { + "epoch": 0.19, + "grad_norm": 0.7016686201095581, + "learning_rate": 1.8539965214100056e-05, + "loss": 2.1755, + "step": 5648 + }, + { + "epoch": 0.19, + "grad_norm": 0.7054423093795776, + "learning_rate": 1.8539412165289803e-05, + "loss": 2.1632, + "step": 5649 + }, + { + "epoch": 0.19, + "grad_norm": 0.6863690614700317, + "learning_rate": 1.8538859020006275e-05, + "loss": 2.2322, + "step": 5650 + }, + { + "epoch": 0.19, + "grad_norm": 0.7221072912216187, + "learning_rate": 1.8538305778255727e-05, + "loss": 2.2056, + "step": 5651 + }, + { + "epoch": 0.19, + "grad_norm": 0.6877840757369995, + "learning_rate": 1.85377524400444e-05, + "loss": 2.2096, + "step": 5652 + }, + { + "epoch": 0.19, + "grad_norm": 0.690946102142334, + "learning_rate": 1.853719900537855e-05, + "loss": 2.1877, + "step": 5653 + }, + { + "epoch": 0.19, + "grad_norm": 0.6967852711677551, + "learning_rate": 1.853664547426443e-05, + "loss": 2.1957, + "step": 5654 + }, + { + "epoch": 0.19, + "grad_norm": 0.7667407393455505, + "learning_rate": 1.8536091846708292e-05, + "loss": 2.2885, + "step": 5655 + }, + { + "epoch": 0.19, + "grad_norm": 0.6827165484428406, + "learning_rate": 1.853553812271639e-05, + "loss": 2.1943, + "step": 5656 + }, + { + "epoch": 0.19, + "grad_norm": 0.6725485324859619, + "learning_rate": 1.8534984302294984e-05, + "loss": 2.1761, + "step": 5657 + }, + { + "epoch": 0.19, + "grad_norm": 0.7040414810180664, + "learning_rate": 1.853443038545032e-05, + "loss": 2.1618, + "step": 5658 + }, + { + "epoch": 0.19, + "grad_norm": 0.711972713470459, + "learning_rate": 1.853387637218867e-05, + "loss": 2.2586, + "step": 5659 + }, + { + "epoch": 0.19, + "grad_norm": 0.7058595418930054, + "learning_rate": 1.8533322262516284e-05, + "loss": 2.1537, + "step": 5660 + }, + { + "epoch": 0.19, + "grad_norm": 0.7124547362327576, + "learning_rate": 1.8532768056439424e-05, + "loss": 2.257, + "step": 5661 + }, + { + "epoch": 0.19, + "grad_norm": 0.6787526607513428, + "learning_rate": 1.8532213753964356e-05, + "loss": 2.1562, + "step": 5662 + }, + { + "epoch": 0.19, + "grad_norm": 0.7267456650733948, + "learning_rate": 1.853165935509733e-05, + "loss": 2.2167, + "step": 5663 + }, + { + "epoch": 0.19, + "grad_norm": 0.7189849019050598, + "learning_rate": 1.853110485984462e-05, + "loss": 2.2196, + "step": 5664 + }, + { + "epoch": 0.19, + "grad_norm": 0.6702702641487122, + "learning_rate": 1.8530550268212483e-05, + "loss": 2.2164, + "step": 5665 + }, + { + "epoch": 0.19, + "grad_norm": 0.6748697757720947, + "learning_rate": 1.8529995580207195e-05, + "loss": 2.1395, + "step": 5666 + }, + { + "epoch": 0.19, + "grad_norm": 0.7360884547233582, + "learning_rate": 1.8529440795835012e-05, + "loss": 2.2542, + "step": 5667 + }, + { + "epoch": 0.19, + "grad_norm": 0.6972715854644775, + "learning_rate": 1.8528885915102206e-05, + "loss": 2.1685, + "step": 5668 + }, + { + "epoch": 0.19, + "grad_norm": 0.6819813251495361, + "learning_rate": 1.8528330938015045e-05, + "loss": 2.2144, + "step": 5669 + }, + { + "epoch": 0.19, + "grad_norm": 0.7535191178321838, + "learning_rate": 1.85277758645798e-05, + "loss": 2.1532, + "step": 5670 + }, + { + "epoch": 0.19, + "grad_norm": 0.6854206323623657, + "learning_rate": 1.8527220694802742e-05, + "loss": 2.1489, + "step": 5671 + }, + { + "epoch": 0.19, + "grad_norm": 0.6832886338233948, + "learning_rate": 1.852666542869014e-05, + "loss": 2.1693, + "step": 5672 + }, + { + "epoch": 0.19, + "grad_norm": 0.6725049018859863, + "learning_rate": 1.852611006624827e-05, + "loss": 2.194, + "step": 5673 + }, + { + "epoch": 0.19, + "grad_norm": 0.6795705556869507, + "learning_rate": 1.8525554607483405e-05, + "loss": 2.1702, + "step": 5674 + }, + { + "epoch": 0.19, + "grad_norm": 0.7061900496482849, + "learning_rate": 1.8524999052401822e-05, + "loss": 2.1954, + "step": 5675 + }, + { + "epoch": 0.19, + "grad_norm": 0.6852815747261047, + "learning_rate": 1.8524443401009794e-05, + "loss": 2.2372, + "step": 5676 + }, + { + "epoch": 0.19, + "grad_norm": 0.7232020497322083, + "learning_rate": 1.85238876533136e-05, + "loss": 2.256, + "step": 5677 + }, + { + "epoch": 0.19, + "grad_norm": 0.6963891983032227, + "learning_rate": 1.852333180931952e-05, + "loss": 2.2246, + "step": 5678 + }, + { + "epoch": 0.19, + "grad_norm": 0.6922557353973389, + "learning_rate": 1.8522775869033832e-05, + "loss": 2.2516, + "step": 5679 + }, + { + "epoch": 0.19, + "grad_norm": 0.7227574586868286, + "learning_rate": 1.852221983246282e-05, + "loss": 2.2781, + "step": 5680 + }, + { + "epoch": 0.19, + "grad_norm": 0.6583694219589233, + "learning_rate": 1.852166369961276e-05, + "loss": 2.1877, + "step": 5681 + }, + { + "epoch": 0.19, + "grad_norm": 0.691525399684906, + "learning_rate": 1.8521107470489936e-05, + "loss": 2.1654, + "step": 5682 + }, + { + "epoch": 0.19, + "grad_norm": 0.7407993078231812, + "learning_rate": 1.852055114510064e-05, + "loss": 2.1989, + "step": 5683 + }, + { + "epoch": 0.19, + "grad_norm": 0.6777991056442261, + "learning_rate": 1.8519994723451144e-05, + "loss": 2.2016, + "step": 5684 + }, + { + "epoch": 0.19, + "grad_norm": 0.6982577443122864, + "learning_rate": 1.8519438205547742e-05, + "loss": 2.2563, + "step": 5685 + }, + { + "epoch": 0.19, + "grad_norm": 0.6696224212646484, + "learning_rate": 1.851888159139672e-05, + "loss": 2.1372, + "step": 5686 + }, + { + "epoch": 0.19, + "grad_norm": 0.6761857271194458, + "learning_rate": 1.851832488100437e-05, + "loss": 2.2024, + "step": 5687 + }, + { + "epoch": 0.19, + "grad_norm": 0.7226700782775879, + "learning_rate": 1.8517768074376974e-05, + "loss": 2.1896, + "step": 5688 + }, + { + "epoch": 0.19, + "grad_norm": 0.6938554048538208, + "learning_rate": 1.8517211171520827e-05, + "loss": 2.1917, + "step": 5689 + }, + { + "epoch": 0.19, + "grad_norm": 0.6912155151367188, + "learning_rate": 1.8516654172442223e-05, + "loss": 2.1957, + "step": 5690 + }, + { + "epoch": 0.19, + "grad_norm": 0.7307465076446533, + "learning_rate": 1.8516097077147445e-05, + "loss": 2.2042, + "step": 5691 + }, + { + "epoch": 0.19, + "grad_norm": 0.6893206238746643, + "learning_rate": 1.85155398856428e-05, + "loss": 2.2514, + "step": 5692 + }, + { + "epoch": 0.19, + "grad_norm": 0.7607327103614807, + "learning_rate": 1.851498259793457e-05, + "loss": 2.1813, + "step": 5693 + }, + { + "epoch": 0.19, + "grad_norm": 0.7359330654144287, + "learning_rate": 1.851442521402906e-05, + "loss": 2.179, + "step": 5694 + }, + { + "epoch": 0.19, + "grad_norm": 0.7026422619819641, + "learning_rate": 1.8513867733932563e-05, + "loss": 2.2105, + "step": 5695 + }, + { + "epoch": 0.19, + "grad_norm": 0.7308424711227417, + "learning_rate": 1.851331015765138e-05, + "loss": 2.2052, + "step": 5696 + }, + { + "epoch": 0.19, + "grad_norm": 0.6761261820793152, + "learning_rate": 1.8512752485191804e-05, + "loss": 2.2391, + "step": 5697 + }, + { + "epoch": 0.19, + "grad_norm": 0.6896911263465881, + "learning_rate": 1.8512194716560145e-05, + "loss": 2.2366, + "step": 5698 + }, + { + "epoch": 0.19, + "grad_norm": 0.7406615614891052, + "learning_rate": 1.8511636851762694e-05, + "loss": 2.2587, + "step": 5699 + }, + { + "epoch": 0.19, + "grad_norm": 0.667523980140686, + "learning_rate": 1.851107889080576e-05, + "loss": 2.2125, + "step": 5700 + }, + { + "epoch": 0.19, + "grad_norm": 0.6855826377868652, + "learning_rate": 1.8510520833695644e-05, + "loss": 2.2316, + "step": 5701 + }, + { + "epoch": 0.19, + "grad_norm": 0.6964127421379089, + "learning_rate": 1.850996268043865e-05, + "loss": 2.1675, + "step": 5702 + }, + { + "epoch": 0.19, + "grad_norm": 0.6840857267379761, + "learning_rate": 1.8509404431041088e-05, + "loss": 2.1918, + "step": 5703 + }, + { + "epoch": 0.19, + "grad_norm": 0.7179672122001648, + "learning_rate": 1.850884608550926e-05, + "loss": 2.1245, + "step": 5704 + }, + { + "epoch": 0.19, + "grad_norm": 0.7067128419876099, + "learning_rate": 1.8508287643849474e-05, + "loss": 2.2511, + "step": 5705 + }, + { + "epoch": 0.19, + "grad_norm": 0.7082436084747314, + "learning_rate": 1.8507729106068046e-05, + "loss": 2.1442, + "step": 5706 + }, + { + "epoch": 0.19, + "grad_norm": 0.6925551295280457, + "learning_rate": 1.8507170472171275e-05, + "loss": 2.1899, + "step": 5707 + }, + { + "epoch": 0.19, + "grad_norm": 0.6743143200874329, + "learning_rate": 1.850661174216548e-05, + "loss": 2.2037, + "step": 5708 + }, + { + "epoch": 0.19, + "grad_norm": 0.728156566619873, + "learning_rate": 1.8506052916056972e-05, + "loss": 2.1831, + "step": 5709 + }, + { + "epoch": 0.19, + "grad_norm": 0.7280499935150146, + "learning_rate": 1.850549399385206e-05, + "loss": 2.1842, + "step": 5710 + }, + { + "epoch": 0.19, + "grad_norm": 0.6557510495185852, + "learning_rate": 1.8504934975557064e-05, + "loss": 2.1276, + "step": 5711 + }, + { + "epoch": 0.19, + "grad_norm": 0.704355001449585, + "learning_rate": 1.8504375861178296e-05, + "loss": 2.1626, + "step": 5712 + }, + { + "epoch": 0.19, + "grad_norm": 0.6704341173171997, + "learning_rate": 1.8503816650722075e-05, + "loss": 2.2024, + "step": 5713 + }, + { + "epoch": 0.19, + "grad_norm": 0.689931333065033, + "learning_rate": 1.8503257344194717e-05, + "loss": 2.2125, + "step": 5714 + }, + { + "epoch": 0.19, + "grad_norm": 0.7019279599189758, + "learning_rate": 1.850269794160254e-05, + "loss": 2.1518, + "step": 5715 + }, + { + "epoch": 0.19, + "grad_norm": 0.6858396530151367, + "learning_rate": 1.8502138442951866e-05, + "loss": 2.2054, + "step": 5716 + }, + { + "epoch": 0.19, + "grad_norm": 0.707389771938324, + "learning_rate": 1.850157884824901e-05, + "loss": 2.2043, + "step": 5717 + }, + { + "epoch": 0.19, + "grad_norm": 0.6972928643226624, + "learning_rate": 1.85010191575003e-05, + "loss": 2.2093, + "step": 5718 + }, + { + "epoch": 0.19, + "grad_norm": 0.7109317779541016, + "learning_rate": 1.850045937071206e-05, + "loss": 2.1362, + "step": 5719 + }, + { + "epoch": 0.19, + "grad_norm": 0.6993730068206787, + "learning_rate": 1.8499899487890615e-05, + "loss": 2.1745, + "step": 5720 + }, + { + "epoch": 0.19, + "grad_norm": 0.7064675092697144, + "learning_rate": 1.8499339509042286e-05, + "loss": 2.2045, + "step": 5721 + }, + { + "epoch": 0.19, + "grad_norm": 0.7081506848335266, + "learning_rate": 1.8498779434173394e-05, + "loss": 2.21, + "step": 5722 + }, + { + "epoch": 0.19, + "grad_norm": 0.6949377655982971, + "learning_rate": 1.849821926329028e-05, + "loss": 2.1899, + "step": 5723 + }, + { + "epoch": 0.19, + "grad_norm": 0.739858090877533, + "learning_rate": 1.8497658996399258e-05, + "loss": 2.2108, + "step": 5724 + }, + { + "epoch": 0.19, + "grad_norm": 0.7240483164787292, + "learning_rate": 1.849709863350667e-05, + "loss": 2.1727, + "step": 5725 + }, + { + "epoch": 0.19, + "grad_norm": 0.6831229329109192, + "learning_rate": 1.849653817461884e-05, + "loss": 2.1385, + "step": 5726 + }, + { + "epoch": 0.19, + "grad_norm": 0.6802065372467041, + "learning_rate": 1.84959776197421e-05, + "loss": 2.1887, + "step": 5727 + }, + { + "epoch": 0.19, + "grad_norm": 0.6928762793540955, + "learning_rate": 1.8495416968882787e-05, + "loss": 2.1857, + "step": 5728 + }, + { + "epoch": 0.19, + "grad_norm": 0.7056906223297119, + "learning_rate": 1.8494856222047232e-05, + "loss": 2.1992, + "step": 5729 + }, + { + "epoch": 0.19, + "grad_norm": 0.6872397065162659, + "learning_rate": 1.8494295379241766e-05, + "loss": 2.2372, + "step": 5730 + }, + { + "epoch": 0.19, + "grad_norm": 0.6804319024085999, + "learning_rate": 1.849373444047273e-05, + "loss": 2.1564, + "step": 5731 + }, + { + "epoch": 0.19, + "grad_norm": 0.6832162737846375, + "learning_rate": 1.849317340574646e-05, + "loss": 2.189, + "step": 5732 + }, + { + "epoch": 0.19, + "grad_norm": 0.6804760098457336, + "learning_rate": 1.8492612275069297e-05, + "loss": 2.1802, + "step": 5733 + }, + { + "epoch": 0.19, + "grad_norm": 0.7294803261756897, + "learning_rate": 1.8492051048447575e-05, + "loss": 2.242, + "step": 5734 + }, + { + "epoch": 0.19, + "grad_norm": 0.6804189085960388, + "learning_rate": 1.849148972588764e-05, + "loss": 2.2371, + "step": 5735 + }, + { + "epoch": 0.19, + "grad_norm": 0.7128114104270935, + "learning_rate": 1.849092830739583e-05, + "loss": 2.1209, + "step": 5736 + }, + { + "epoch": 0.19, + "grad_norm": 0.707399845123291, + "learning_rate": 1.849036679297848e-05, + "loss": 2.207, + "step": 5737 + }, + { + "epoch": 0.19, + "grad_norm": 0.6638315320014954, + "learning_rate": 1.8489805182641954e-05, + "loss": 2.1705, + "step": 5738 + }, + { + "epoch": 0.19, + "grad_norm": 0.6954512596130371, + "learning_rate": 1.8489243476392577e-05, + "loss": 2.1287, + "step": 5739 + }, + { + "epoch": 0.19, + "grad_norm": 0.6850218772888184, + "learning_rate": 1.8488681674236708e-05, + "loss": 2.2591, + "step": 5740 + }, + { + "epoch": 0.19, + "grad_norm": 0.7465183138847351, + "learning_rate": 1.8488119776180683e-05, + "loss": 2.1737, + "step": 5741 + }, + { + "epoch": 0.19, + "grad_norm": 0.698441207408905, + "learning_rate": 1.848755778223086e-05, + "loss": 2.2203, + "step": 5742 + }, + { + "epoch": 0.19, + "grad_norm": 0.7407118082046509, + "learning_rate": 1.848699569239358e-05, + "loss": 2.1947, + "step": 5743 + }, + { + "epoch": 0.19, + "grad_norm": 0.6816844940185547, + "learning_rate": 1.8486433506675194e-05, + "loss": 2.1664, + "step": 5744 + }, + { + "epoch": 0.19, + "grad_norm": 0.6911886930465698, + "learning_rate": 1.848587122508206e-05, + "loss": 2.2101, + "step": 5745 + }, + { + "epoch": 0.19, + "grad_norm": 0.7974408864974976, + "learning_rate": 1.8485308847620523e-05, + "loss": 2.215, + "step": 5746 + }, + { + "epoch": 0.19, + "grad_norm": 0.7241461873054504, + "learning_rate": 1.8484746374296943e-05, + "loss": 2.2264, + "step": 5747 + }, + { + "epoch": 0.19, + "grad_norm": 0.7246502637863159, + "learning_rate": 1.848418380511767e-05, + "loss": 2.2024, + "step": 5748 + }, + { + "epoch": 0.19, + "grad_norm": 0.7227166891098022, + "learning_rate": 1.848362114008906e-05, + "loss": 2.2325, + "step": 5749 + }, + { + "epoch": 0.19, + "grad_norm": 0.7089793682098389, + "learning_rate": 1.848305837921747e-05, + "loss": 2.2491, + "step": 5750 + }, + { + "epoch": 0.19, + "grad_norm": 0.7279247641563416, + "learning_rate": 1.848249552250926e-05, + "loss": 2.183, + "step": 5751 + }, + { + "epoch": 0.19, + "grad_norm": 0.6795079708099365, + "learning_rate": 1.8481932569970782e-05, + "loss": 2.259, + "step": 5752 + }, + { + "epoch": 0.19, + "grad_norm": 0.6965195536613464, + "learning_rate": 1.8481369521608406e-05, + "loss": 2.2388, + "step": 5753 + }, + { + "epoch": 0.19, + "grad_norm": 0.7750223875045776, + "learning_rate": 1.8480806377428483e-05, + "loss": 2.2226, + "step": 5754 + }, + { + "epoch": 0.19, + "grad_norm": 0.7496516704559326, + "learning_rate": 1.8480243137437385e-05, + "loss": 2.1744, + "step": 5755 + }, + { + "epoch": 0.19, + "grad_norm": 0.6600427031517029, + "learning_rate": 1.8479679801641466e-05, + "loss": 2.1989, + "step": 5756 + }, + { + "epoch": 0.19, + "grad_norm": 0.7313635945320129, + "learning_rate": 1.8479116370047096e-05, + "loss": 2.1683, + "step": 5757 + }, + { + "epoch": 0.19, + "grad_norm": 0.7298907041549683, + "learning_rate": 1.8478552842660636e-05, + "loss": 2.1809, + "step": 5758 + }, + { + "epoch": 0.19, + "grad_norm": 0.728547215461731, + "learning_rate": 1.8477989219488458e-05, + "loss": 2.2321, + "step": 5759 + }, + { + "epoch": 0.19, + "grad_norm": 0.7118574380874634, + "learning_rate": 1.8477425500536927e-05, + "loss": 2.2615, + "step": 5760 + }, + { + "epoch": 0.19, + "grad_norm": 0.7344117164611816, + "learning_rate": 1.8476861685812412e-05, + "loss": 2.2594, + "step": 5761 + }, + { + "epoch": 0.19, + "grad_norm": 0.7285259962081909, + "learning_rate": 1.8476297775321278e-05, + "loss": 2.1876, + "step": 5762 + }, + { + "epoch": 0.19, + "grad_norm": 0.7066472172737122, + "learning_rate": 1.84757337690699e-05, + "loss": 2.1914, + "step": 5763 + }, + { + "epoch": 0.19, + "grad_norm": 0.7148813009262085, + "learning_rate": 1.8475169667064647e-05, + "loss": 2.1986, + "step": 5764 + }, + { + "epoch": 0.19, + "grad_norm": 0.7032379508018494, + "learning_rate": 1.8474605469311897e-05, + "loss": 2.2771, + "step": 5765 + }, + { + "epoch": 0.19, + "grad_norm": 0.7261673212051392, + "learning_rate": 1.847404117581802e-05, + "loss": 2.1403, + "step": 5766 + }, + { + "epoch": 0.19, + "grad_norm": 0.7148529291152954, + "learning_rate": 1.847347678658939e-05, + "loss": 2.1727, + "step": 5767 + }, + { + "epoch": 0.19, + "grad_norm": 0.7385905385017395, + "learning_rate": 1.847291230163239e-05, + "loss": 2.2446, + "step": 5768 + }, + { + "epoch": 0.19, + "grad_norm": 0.6731931567192078, + "learning_rate": 1.8472347720953392e-05, + "loss": 2.209, + "step": 5769 + }, + { + "epoch": 0.19, + "grad_norm": 0.7199866771697998, + "learning_rate": 1.847178304455877e-05, + "loss": 2.221, + "step": 5770 + }, + { + "epoch": 0.19, + "grad_norm": 0.7093116044998169, + "learning_rate": 1.8471218272454913e-05, + "loss": 2.2051, + "step": 5771 + }, + { + "epoch": 0.19, + "grad_norm": 0.6715319156646729, + "learning_rate": 1.8470653404648192e-05, + "loss": 2.2453, + "step": 5772 + }, + { + "epoch": 0.19, + "grad_norm": 0.704441487789154, + "learning_rate": 1.8470088441144996e-05, + "loss": 2.194, + "step": 5773 + }, + { + "epoch": 0.19, + "grad_norm": 0.7156498432159424, + "learning_rate": 1.8469523381951706e-05, + "loss": 2.2629, + "step": 5774 + }, + { + "epoch": 0.19, + "grad_norm": 0.719251811504364, + "learning_rate": 1.8468958227074704e-05, + "loss": 2.2132, + "step": 5775 + }, + { + "epoch": 0.19, + "grad_norm": 0.7486870288848877, + "learning_rate": 1.8468392976520374e-05, + "loss": 2.2248, + "step": 5776 + }, + { + "epoch": 0.19, + "grad_norm": 0.678663432598114, + "learning_rate": 1.8467827630295102e-05, + "loss": 2.1407, + "step": 5777 + }, + { + "epoch": 0.19, + "grad_norm": 0.7613793611526489, + "learning_rate": 1.846726218840528e-05, + "loss": 2.2142, + "step": 5778 + }, + { + "epoch": 0.19, + "grad_norm": 0.7172176241874695, + "learning_rate": 1.8466696650857287e-05, + "loss": 2.2728, + "step": 5779 + }, + { + "epoch": 0.19, + "grad_norm": 0.727596640586853, + "learning_rate": 1.846613101765752e-05, + "loss": 2.2357, + "step": 5780 + }, + { + "epoch": 0.19, + "grad_norm": 0.8211220502853394, + "learning_rate": 1.8465565288812362e-05, + "loss": 2.213, + "step": 5781 + }, + { + "epoch": 0.19, + "grad_norm": 0.6838415265083313, + "learning_rate": 1.8464999464328216e-05, + "loss": 2.2234, + "step": 5782 + }, + { + "epoch": 0.19, + "grad_norm": 0.7337430715560913, + "learning_rate": 1.8464433544211465e-05, + "loss": 2.1714, + "step": 5783 + }, + { + "epoch": 0.19, + "grad_norm": 0.7136640548706055, + "learning_rate": 1.8463867528468502e-05, + "loss": 2.2249, + "step": 5784 + }, + { + "epoch": 0.19, + "grad_norm": 0.6972154378890991, + "learning_rate": 1.8463301417105724e-05, + "loss": 2.2083, + "step": 5785 + }, + { + "epoch": 0.19, + "grad_norm": 0.6871650815010071, + "learning_rate": 1.846273521012953e-05, + "loss": 2.2087, + "step": 5786 + }, + { + "epoch": 0.19, + "grad_norm": 0.7603265643119812, + "learning_rate": 1.846216890754631e-05, + "loss": 2.2314, + "step": 5787 + }, + { + "epoch": 0.19, + "grad_norm": 0.7470519542694092, + "learning_rate": 1.8461602509362465e-05, + "loss": 2.1892, + "step": 5788 + }, + { + "epoch": 0.19, + "grad_norm": 0.7518399953842163, + "learning_rate": 1.8461036015584393e-05, + "loss": 2.2239, + "step": 5789 + }, + { + "epoch": 0.19, + "grad_norm": 0.7135910987854004, + "learning_rate": 1.8460469426218498e-05, + "loss": 2.1873, + "step": 5790 + }, + { + "epoch": 0.19, + "grad_norm": 0.729735255241394, + "learning_rate": 1.8459902741271175e-05, + "loss": 2.1145, + "step": 5791 + }, + { + "epoch": 0.19, + "grad_norm": 0.6778610944747925, + "learning_rate": 1.8459335960748835e-05, + "loss": 2.1942, + "step": 5792 + }, + { + "epoch": 0.19, + "grad_norm": 0.6946741342544556, + "learning_rate": 1.845876908465787e-05, + "loss": 2.1534, + "step": 5793 + }, + { + "epoch": 0.19, + "grad_norm": 0.733286440372467, + "learning_rate": 1.8458202113004686e-05, + "loss": 2.14, + "step": 5794 + }, + { + "epoch": 0.19, + "grad_norm": 0.7079468965530396, + "learning_rate": 1.8457635045795697e-05, + "loss": 2.2105, + "step": 5795 + }, + { + "epoch": 0.19, + "grad_norm": 0.730912446975708, + "learning_rate": 1.8457067883037302e-05, + "loss": 2.2391, + "step": 5796 + }, + { + "epoch": 0.19, + "grad_norm": 0.6963058114051819, + "learning_rate": 1.8456500624735908e-05, + "loss": 2.1772, + "step": 5797 + }, + { + "epoch": 0.19, + "grad_norm": 0.7229794263839722, + "learning_rate": 1.845593327089793e-05, + "loss": 2.2, + "step": 5798 + }, + { + "epoch": 0.19, + "grad_norm": 0.6929073929786682, + "learning_rate": 1.845536582152977e-05, + "loss": 2.2058, + "step": 5799 + }, + { + "epoch": 0.19, + "grad_norm": 0.7347926497459412, + "learning_rate": 1.8454798276637843e-05, + "loss": 2.304, + "step": 5800 + }, + { + "epoch": 0.19, + "grad_norm": 0.6852323412895203, + "learning_rate": 1.8454230636228563e-05, + "loss": 2.1878, + "step": 5801 + }, + { + "epoch": 0.19, + "grad_norm": 0.6772787570953369, + "learning_rate": 1.8453662900308337e-05, + "loss": 2.1611, + "step": 5802 + }, + { + "epoch": 0.19, + "grad_norm": 0.6806902289390564, + "learning_rate": 1.8453095068883583e-05, + "loss": 2.2181, + "step": 5803 + }, + { + "epoch": 0.19, + "grad_norm": 0.6981421709060669, + "learning_rate": 1.8452527141960717e-05, + "loss": 2.1295, + "step": 5804 + }, + { + "epoch": 0.19, + "grad_norm": 0.6946926712989807, + "learning_rate": 1.845195911954615e-05, + "loss": 2.2123, + "step": 5805 + }, + { + "epoch": 0.19, + "grad_norm": 0.6878112554550171, + "learning_rate": 1.8451391001646303e-05, + "loss": 2.1646, + "step": 5806 + }, + { + "epoch": 0.19, + "grad_norm": 0.6959139108657837, + "learning_rate": 1.8450822788267593e-05, + "loss": 2.1849, + "step": 5807 + }, + { + "epoch": 0.19, + "grad_norm": 0.7147149443626404, + "learning_rate": 1.845025447941644e-05, + "loss": 2.2275, + "step": 5808 + }, + { + "epoch": 0.19, + "grad_norm": 0.6920427680015564, + "learning_rate": 1.844968607509926e-05, + "loss": 2.15, + "step": 5809 + }, + { + "epoch": 0.19, + "grad_norm": 0.6701664924621582, + "learning_rate": 1.8449117575322487e-05, + "loss": 2.2455, + "step": 5810 + }, + { + "epoch": 0.19, + "grad_norm": 0.6952377557754517, + "learning_rate": 1.8448548980092532e-05, + "loss": 2.1685, + "step": 5811 + }, + { + "epoch": 0.19, + "grad_norm": 0.7051309943199158, + "learning_rate": 1.8447980289415822e-05, + "loss": 2.2331, + "step": 5812 + }, + { + "epoch": 0.19, + "grad_norm": 0.7096760869026184, + "learning_rate": 1.844741150329878e-05, + "loss": 2.1934, + "step": 5813 + }, + { + "epoch": 0.19, + "grad_norm": 0.7116329669952393, + "learning_rate": 1.8446842621747834e-05, + "loss": 2.1326, + "step": 5814 + }, + { + "epoch": 0.19, + "grad_norm": 0.6922067403793335, + "learning_rate": 1.8446273644769414e-05, + "loss": 2.1921, + "step": 5815 + }, + { + "epoch": 0.19, + "grad_norm": 0.7360302805900574, + "learning_rate": 1.8445704572369937e-05, + "loss": 2.2255, + "step": 5816 + }, + { + "epoch": 0.19, + "grad_norm": 0.7079552412033081, + "learning_rate": 1.8445135404555844e-05, + "loss": 2.2096, + "step": 5817 + }, + { + "epoch": 0.19, + "grad_norm": 0.6884557008743286, + "learning_rate": 1.8444566141333563e-05, + "loss": 2.1887, + "step": 5818 + }, + { + "epoch": 0.19, + "grad_norm": 0.7541073560714722, + "learning_rate": 1.844399678270952e-05, + "loss": 2.1831, + "step": 5819 + }, + { + "epoch": 0.19, + "grad_norm": 0.7467616200447083, + "learning_rate": 1.844342732869015e-05, + "loss": 2.283, + "step": 5820 + }, + { + "epoch": 0.19, + "grad_norm": 0.6788119077682495, + "learning_rate": 1.8442857779281887e-05, + "loss": 2.187, + "step": 5821 + }, + { + "epoch": 0.19, + "grad_norm": 0.7284881472587585, + "learning_rate": 1.8442288134491165e-05, + "loss": 2.233, + "step": 5822 + }, + { + "epoch": 0.19, + "grad_norm": 0.6983395218849182, + "learning_rate": 1.8441718394324417e-05, + "loss": 2.1928, + "step": 5823 + }, + { + "epoch": 0.19, + "grad_norm": 0.7055501341819763, + "learning_rate": 1.8441148558788083e-05, + "loss": 2.2157, + "step": 5824 + }, + { + "epoch": 0.19, + "grad_norm": 0.7153393626213074, + "learning_rate": 1.8440578627888597e-05, + "loss": 2.2753, + "step": 5825 + }, + { + "epoch": 0.19, + "grad_norm": 0.6805808544158936, + "learning_rate": 1.8440008601632406e-05, + "loss": 2.1886, + "step": 5826 + }, + { + "epoch": 0.19, + "grad_norm": 0.7000783681869507, + "learning_rate": 1.843943848002594e-05, + "loss": 2.1792, + "step": 5827 + }, + { + "epoch": 0.19, + "grad_norm": 0.7028132081031799, + "learning_rate": 1.8438868263075646e-05, + "loss": 2.1927, + "step": 5828 + }, + { + "epoch": 0.19, + "grad_norm": 0.6968508362770081, + "learning_rate": 1.8438297950787966e-05, + "loss": 2.1809, + "step": 5829 + }, + { + "epoch": 0.19, + "grad_norm": 0.6766806244850159, + "learning_rate": 1.843772754316934e-05, + "loss": 2.1805, + "step": 5830 + }, + { + "epoch": 0.19, + "grad_norm": 0.6885875463485718, + "learning_rate": 1.843715704022621e-05, + "loss": 2.2798, + "step": 5831 + }, + { + "epoch": 0.19, + "grad_norm": 0.6872454285621643, + "learning_rate": 1.8436586441965025e-05, + "loss": 2.1742, + "step": 5832 + }, + { + "epoch": 0.19, + "grad_norm": 0.6760377883911133, + "learning_rate": 1.8436015748392232e-05, + "loss": 2.1702, + "step": 5833 + }, + { + "epoch": 0.19, + "grad_norm": 0.6895399689674377, + "learning_rate": 1.8435444959514278e-05, + "loss": 2.2661, + "step": 5834 + }, + { + "epoch": 0.19, + "grad_norm": 0.7271701097488403, + "learning_rate": 1.8434874075337608e-05, + "loss": 2.1619, + "step": 5835 + }, + { + "epoch": 0.19, + "grad_norm": 0.7000858187675476, + "learning_rate": 1.843430309586868e-05, + "loss": 2.2094, + "step": 5836 + }, + { + "epoch": 0.19, + "grad_norm": 0.7013121843338013, + "learning_rate": 1.8433732021113933e-05, + "loss": 2.1684, + "step": 5837 + }, + { + "epoch": 0.19, + "grad_norm": 0.7426285743713379, + "learning_rate": 1.8433160851079822e-05, + "loss": 2.1785, + "step": 5838 + }, + { + "epoch": 0.19, + "grad_norm": 0.7096011638641357, + "learning_rate": 1.8432589585772808e-05, + "loss": 2.2291, + "step": 5839 + }, + { + "epoch": 0.19, + "grad_norm": 0.7322856187820435, + "learning_rate": 1.8432018225199337e-05, + "loss": 2.1919, + "step": 5840 + }, + { + "epoch": 0.19, + "grad_norm": 0.7045252323150635, + "learning_rate": 1.8431446769365867e-05, + "loss": 2.1675, + "step": 5841 + }, + { + "epoch": 0.19, + "grad_norm": 0.6601688861846924, + "learning_rate": 1.8430875218278847e-05, + "loss": 2.2005, + "step": 5842 + }, + { + "epoch": 0.19, + "grad_norm": 0.7153899073600769, + "learning_rate": 1.8430303571944744e-05, + "loss": 2.206, + "step": 5843 + }, + { + "epoch": 0.19, + "grad_norm": 0.6839788556098938, + "learning_rate": 1.8429731830370016e-05, + "loss": 2.2015, + "step": 5844 + }, + { + "epoch": 0.19, + "grad_norm": 0.7272733449935913, + "learning_rate": 1.842915999356111e-05, + "loss": 2.2061, + "step": 5845 + }, + { + "epoch": 0.19, + "grad_norm": 0.7017080783843994, + "learning_rate": 1.8428588061524498e-05, + "loss": 2.2193, + "step": 5846 + }, + { + "epoch": 0.19, + "grad_norm": 0.6546066999435425, + "learning_rate": 1.8428016034266637e-05, + "loss": 2.1681, + "step": 5847 + }, + { + "epoch": 0.19, + "grad_norm": 0.6774407625198364, + "learning_rate": 1.8427443911793993e-05, + "loss": 2.1968, + "step": 5848 + }, + { + "epoch": 0.19, + "grad_norm": 0.6878687739372253, + "learning_rate": 1.8426871694113023e-05, + "loss": 2.257, + "step": 5849 + }, + { + "epoch": 0.19, + "grad_norm": 0.6821404695510864, + "learning_rate": 1.8426299381230198e-05, + "loss": 2.2435, + "step": 5850 + }, + { + "epoch": 0.19, + "grad_norm": 0.6938878297805786, + "learning_rate": 1.842572697315198e-05, + "loss": 2.228, + "step": 5851 + }, + { + "epoch": 0.19, + "grad_norm": 0.6978577971458435, + "learning_rate": 1.8425154469884834e-05, + "loss": 2.1483, + "step": 5852 + }, + { + "epoch": 0.19, + "grad_norm": 0.6769468784332275, + "learning_rate": 1.8424581871435232e-05, + "loss": 2.1544, + "step": 5853 + }, + { + "epoch": 0.19, + "grad_norm": 0.6915081739425659, + "learning_rate": 1.8424009177809644e-05, + "loss": 2.1374, + "step": 5854 + }, + { + "epoch": 0.19, + "grad_norm": 0.7178905010223389, + "learning_rate": 1.8423436389014534e-05, + "loss": 2.1612, + "step": 5855 + }, + { + "epoch": 0.19, + "grad_norm": 0.7068578600883484, + "learning_rate": 1.842286350505638e-05, + "loss": 2.1994, + "step": 5856 + }, + { + "epoch": 0.19, + "grad_norm": 0.7085909247398376, + "learning_rate": 1.8422290525941647e-05, + "loss": 2.1842, + "step": 5857 + }, + { + "epoch": 0.19, + "grad_norm": 0.7198071479797363, + "learning_rate": 1.842171745167681e-05, + "loss": 2.1736, + "step": 5858 + }, + { + "epoch": 0.19, + "grad_norm": 0.6914926767349243, + "learning_rate": 1.8421144282268352e-05, + "loss": 2.1614, + "step": 5859 + }, + { + "epoch": 0.19, + "grad_norm": 0.6952751278877258, + "learning_rate": 1.8420571017722736e-05, + "loss": 2.1973, + "step": 5860 + }, + { + "epoch": 0.19, + "grad_norm": 0.7335081696510315, + "learning_rate": 1.841999765804644e-05, + "loss": 2.1579, + "step": 5861 + }, + { + "epoch": 0.2, + "grad_norm": 0.7027428150177002, + "learning_rate": 1.841942420324595e-05, + "loss": 2.2104, + "step": 5862 + }, + { + "epoch": 0.2, + "grad_norm": 0.7556118369102478, + "learning_rate": 1.841885065332774e-05, + "loss": 2.2314, + "step": 5863 + }, + { + "epoch": 0.2, + "grad_norm": 0.6858674883842468, + "learning_rate": 1.841827700829829e-05, + "loss": 2.1902, + "step": 5864 + }, + { + "epoch": 0.2, + "grad_norm": 0.6955288052558899, + "learning_rate": 1.8417703268164075e-05, + "loss": 2.2019, + "step": 5865 + }, + { + "epoch": 0.2, + "grad_norm": 0.6943233013153076, + "learning_rate": 1.8417129432931587e-05, + "loss": 2.2263, + "step": 5866 + }, + { + "epoch": 0.2, + "grad_norm": 0.7108997702598572, + "learning_rate": 1.8416555502607297e-05, + "loss": 2.2253, + "step": 5867 + }, + { + "epoch": 0.2, + "grad_norm": 0.6960964798927307, + "learning_rate": 1.84159814771977e-05, + "loss": 2.1652, + "step": 5868 + }, + { + "epoch": 0.2, + "grad_norm": 0.7019452452659607, + "learning_rate": 1.841540735670928e-05, + "loss": 2.1728, + "step": 5869 + }, + { + "epoch": 0.2, + "grad_norm": 0.6867461800575256, + "learning_rate": 1.8414833141148512e-05, + "loss": 2.1422, + "step": 5870 + }, + { + "epoch": 0.2, + "grad_norm": 0.7390266060829163, + "learning_rate": 1.84142588305219e-05, + "loss": 2.1594, + "step": 5871 + }, + { + "epoch": 0.2, + "grad_norm": 0.6901983022689819, + "learning_rate": 1.8413684424835913e-05, + "loss": 2.1657, + "step": 5872 + }, + { + "epoch": 0.2, + "grad_norm": 0.6811671853065491, + "learning_rate": 1.8413109924097058e-05, + "loss": 2.1677, + "step": 5873 + }, + { + "epoch": 0.2, + "grad_norm": 0.6941006779670715, + "learning_rate": 1.8412535328311813e-05, + "loss": 2.2289, + "step": 5874 + }, + { + "epoch": 0.2, + "grad_norm": 0.7869853973388672, + "learning_rate": 1.8411960637486676e-05, + "loss": 2.2051, + "step": 5875 + }, + { + "epoch": 0.2, + "grad_norm": 0.7169014811515808, + "learning_rate": 1.841138585162814e-05, + "loss": 2.2509, + "step": 5876 + }, + { + "epoch": 0.2, + "grad_norm": 0.7121827006340027, + "learning_rate": 1.8410810970742693e-05, + "loss": 2.1844, + "step": 5877 + }, + { + "epoch": 0.2, + "grad_norm": 0.6684106588363647, + "learning_rate": 1.841023599483683e-05, + "loss": 2.1817, + "step": 5878 + }, + { + "epoch": 0.2, + "grad_norm": 0.7329347729682922, + "learning_rate": 1.8409660923917055e-05, + "loss": 2.143, + "step": 5879 + }, + { + "epoch": 0.2, + "grad_norm": 0.6933580636978149, + "learning_rate": 1.8409085757989857e-05, + "loss": 2.1866, + "step": 5880 + }, + { + "epoch": 0.2, + "grad_norm": 0.7033088803291321, + "learning_rate": 1.8408510497061738e-05, + "loss": 2.257, + "step": 5881 + }, + { + "epoch": 0.2, + "grad_norm": 0.726311981678009, + "learning_rate": 1.8407935141139192e-05, + "loss": 2.2082, + "step": 5882 + }, + { + "epoch": 0.2, + "grad_norm": 0.762786328792572, + "learning_rate": 1.8407359690228725e-05, + "loss": 2.1684, + "step": 5883 + }, + { + "epoch": 0.2, + "grad_norm": 0.6952560544013977, + "learning_rate": 1.8406784144336834e-05, + "loss": 2.2183, + "step": 5884 + }, + { + "epoch": 0.2, + "grad_norm": 0.695530354976654, + "learning_rate": 1.8406208503470023e-05, + "loss": 2.2046, + "step": 5885 + }, + { + "epoch": 0.2, + "grad_norm": 0.6944941282272339, + "learning_rate": 1.8405632767634796e-05, + "loss": 2.2882, + "step": 5886 + }, + { + "epoch": 0.2, + "grad_norm": 0.6655538082122803, + "learning_rate": 1.8405056936837654e-05, + "loss": 2.1245, + "step": 5887 + }, + { + "epoch": 0.2, + "grad_norm": 0.6736990213394165, + "learning_rate": 1.8404481011085108e-05, + "loss": 2.2207, + "step": 5888 + }, + { + "epoch": 0.2, + "grad_norm": 0.6976009011268616, + "learning_rate": 1.8403904990383657e-05, + "loss": 2.1988, + "step": 5889 + }, + { + "epoch": 0.2, + "grad_norm": 0.694331705570221, + "learning_rate": 1.8403328874739815e-05, + "loss": 2.2709, + "step": 5890 + }, + { + "epoch": 0.2, + "grad_norm": 0.7157639265060425, + "learning_rate": 1.8402752664160087e-05, + "loss": 2.191, + "step": 5891 + }, + { + "epoch": 0.2, + "grad_norm": 0.6894098520278931, + "learning_rate": 1.8402176358650984e-05, + "loss": 2.1705, + "step": 5892 + }, + { + "epoch": 0.2, + "grad_norm": 0.7596185803413391, + "learning_rate": 1.840159995821902e-05, + "loss": 2.1419, + "step": 5893 + }, + { + "epoch": 0.2, + "grad_norm": 0.710532546043396, + "learning_rate": 1.84010234628707e-05, + "loss": 2.1763, + "step": 5894 + }, + { + "epoch": 0.2, + "grad_norm": 0.6919721961021423, + "learning_rate": 1.840044687261254e-05, + "loss": 2.2056, + "step": 5895 + }, + { + "epoch": 0.2, + "grad_norm": 0.6842281222343445, + "learning_rate": 1.8399870187451055e-05, + "loss": 2.2562, + "step": 5896 + }, + { + "epoch": 0.2, + "grad_norm": 0.7018915414810181, + "learning_rate": 1.839929340739276e-05, + "loss": 2.263, + "step": 5897 + }, + { + "epoch": 0.2, + "grad_norm": 0.7027345895767212, + "learning_rate": 1.8398716532444173e-05, + "loss": 2.1569, + "step": 5898 + }, + { + "epoch": 0.2, + "grad_norm": 0.7313182353973389, + "learning_rate": 1.8398139562611805e-05, + "loss": 2.2466, + "step": 5899 + }, + { + "epoch": 0.2, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.839756249790218e-05, + "loss": 2.1344, + "step": 5900 + }, + { + "epoch": 0.2, + "grad_norm": 0.7044562101364136, + "learning_rate": 1.8396985338321814e-05, + "loss": 2.2207, + "step": 5901 + }, + { + "epoch": 0.2, + "grad_norm": 0.7260205149650574, + "learning_rate": 1.839640808387723e-05, + "loss": 2.1719, + "step": 5902 + }, + { + "epoch": 0.2, + "grad_norm": 0.676543116569519, + "learning_rate": 1.8395830734574947e-05, + "loss": 2.1415, + "step": 5903 + }, + { + "epoch": 0.2, + "grad_norm": 0.7353396415710449, + "learning_rate": 1.839525329042149e-05, + "loss": 2.2341, + "step": 5904 + }, + { + "epoch": 0.2, + "grad_norm": 0.6957465410232544, + "learning_rate": 1.8394675751423382e-05, + "loss": 2.2027, + "step": 5905 + }, + { + "epoch": 0.2, + "grad_norm": 0.7285893559455872, + "learning_rate": 1.8394098117587145e-05, + "loss": 2.2132, + "step": 5906 + }, + { + "epoch": 0.2, + "grad_norm": 0.700844407081604, + "learning_rate": 1.839352038891931e-05, + "loss": 2.1647, + "step": 5907 + }, + { + "epoch": 0.2, + "grad_norm": 0.670282244682312, + "learning_rate": 1.8392942565426395e-05, + "loss": 2.1663, + "step": 5908 + }, + { + "epoch": 0.2, + "grad_norm": 0.681713342666626, + "learning_rate": 1.839236464711494e-05, + "loss": 2.1263, + "step": 5909 + }, + { + "epoch": 0.2, + "grad_norm": 0.707585334777832, + "learning_rate": 1.8391786633991463e-05, + "loss": 2.1834, + "step": 5910 + }, + { + "epoch": 0.2, + "grad_norm": 0.7204396724700928, + "learning_rate": 1.83912085260625e-05, + "loss": 2.3074, + "step": 5911 + }, + { + "epoch": 0.2, + "grad_norm": 0.7059338092803955, + "learning_rate": 1.8390630323334582e-05, + "loss": 2.261, + "step": 5912 + }, + { + "epoch": 0.2, + "grad_norm": 0.6968511343002319, + "learning_rate": 1.839005202581424e-05, + "loss": 2.196, + "step": 5913 + }, + { + "epoch": 0.2, + "grad_norm": 0.6919164061546326, + "learning_rate": 1.8389473633508007e-05, + "loss": 2.1883, + "step": 5914 + }, + { + "epoch": 0.2, + "grad_norm": 0.713945746421814, + "learning_rate": 1.8388895146422417e-05, + "loss": 2.3184, + "step": 5915 + }, + { + "epoch": 0.2, + "grad_norm": 0.6772942543029785, + "learning_rate": 1.8388316564564005e-05, + "loss": 2.2151, + "step": 5916 + }, + { + "epoch": 0.2, + "grad_norm": 0.6983880400657654, + "learning_rate": 1.838773788793931e-05, + "loss": 2.1479, + "step": 5917 + }, + { + "epoch": 0.2, + "grad_norm": 0.69270259141922, + "learning_rate": 1.8387159116554868e-05, + "loss": 2.1717, + "step": 5918 + }, + { + "epoch": 0.2, + "grad_norm": 0.6878052353858948, + "learning_rate": 1.838658025041722e-05, + "loss": 2.1714, + "step": 5919 + }, + { + "epoch": 0.2, + "grad_norm": 0.6984413862228394, + "learning_rate": 1.83860012895329e-05, + "loss": 2.2791, + "step": 5920 + }, + { + "epoch": 0.2, + "grad_norm": 0.6905718445777893, + "learning_rate": 1.8385422233908452e-05, + "loss": 2.2023, + "step": 5921 + }, + { + "epoch": 0.2, + "grad_norm": 0.7092013955116272, + "learning_rate": 1.838484308355042e-05, + "loss": 2.1796, + "step": 5922 + }, + { + "epoch": 0.2, + "grad_norm": 0.6702136397361755, + "learning_rate": 1.8384263838465343e-05, + "loss": 2.1786, + "step": 5923 + }, + { + "epoch": 0.2, + "grad_norm": 0.7048898935317993, + "learning_rate": 1.8383684498659766e-05, + "loss": 2.231, + "step": 5924 + }, + { + "epoch": 0.2, + "grad_norm": 0.6839735507965088, + "learning_rate": 1.8383105064140236e-05, + "loss": 2.1057, + "step": 5925 + }, + { + "epoch": 0.2, + "grad_norm": 0.6794403791427612, + "learning_rate": 1.83825255349133e-05, + "loss": 2.1605, + "step": 5926 + }, + { + "epoch": 0.2, + "grad_norm": 0.7457671761512756, + "learning_rate": 1.83819459109855e-05, + "loss": 2.1542, + "step": 5927 + }, + { + "epoch": 0.2, + "grad_norm": 0.6855442523956299, + "learning_rate": 1.838136619236339e-05, + "loss": 2.1861, + "step": 5928 + }, + { + "epoch": 0.2, + "grad_norm": 0.7052209973335266, + "learning_rate": 1.8380786379053516e-05, + "loss": 2.1828, + "step": 5929 + }, + { + "epoch": 0.2, + "grad_norm": 0.6917964816093445, + "learning_rate": 1.838020647106243e-05, + "loss": 2.2451, + "step": 5930 + }, + { + "epoch": 0.2, + "grad_norm": 0.7004150152206421, + "learning_rate": 1.8379626468396677e-05, + "loss": 2.1842, + "step": 5931 + }, + { + "epoch": 0.2, + "grad_norm": 0.7099143266677856, + "learning_rate": 1.837904637106282e-05, + "loss": 2.214, + "step": 5932 + }, + { + "epoch": 0.2, + "grad_norm": 0.7352455258369446, + "learning_rate": 1.8378466179067407e-05, + "loss": 2.1771, + "step": 5933 + }, + { + "epoch": 0.2, + "grad_norm": 0.6866982579231262, + "learning_rate": 1.8377885892416994e-05, + "loss": 2.2085, + "step": 5934 + }, + { + "epoch": 0.2, + "grad_norm": 0.6692179441452026, + "learning_rate": 1.8377305511118137e-05, + "loss": 2.1688, + "step": 5935 + }, + { + "epoch": 0.2, + "grad_norm": 0.7044215202331543, + "learning_rate": 1.837672503517739e-05, + "loss": 2.1047, + "step": 5936 + }, + { + "epoch": 0.2, + "grad_norm": 0.6862626671791077, + "learning_rate": 1.8376144464601314e-05, + "loss": 2.1846, + "step": 5937 + }, + { + "epoch": 0.2, + "grad_norm": 0.7199077010154724, + "learning_rate": 1.8375563799396468e-05, + "loss": 2.2083, + "step": 5938 + }, + { + "epoch": 0.2, + "grad_norm": 0.694146990776062, + "learning_rate": 1.8374983039569408e-05, + "loss": 2.2173, + "step": 5939 + }, + { + "epoch": 0.2, + "grad_norm": 0.6816791892051697, + "learning_rate": 1.8374402185126698e-05, + "loss": 2.1635, + "step": 5940 + }, + { + "epoch": 0.2, + "grad_norm": 0.6968711614608765, + "learning_rate": 1.83738212360749e-05, + "loss": 2.1973, + "step": 5941 + }, + { + "epoch": 0.2, + "grad_norm": 0.7187376022338867, + "learning_rate": 1.8373240192420578e-05, + "loss": 2.1864, + "step": 5942 + }, + { + "epoch": 0.2, + "grad_norm": 0.6808293461799622, + "learning_rate": 1.8372659054170294e-05, + "loss": 2.2394, + "step": 5943 + }, + { + "epoch": 0.2, + "grad_norm": 0.7138363718986511, + "learning_rate": 1.8372077821330617e-05, + "loss": 2.2057, + "step": 5944 + }, + { + "epoch": 0.2, + "grad_norm": 0.6983692049980164, + "learning_rate": 1.837149649390811e-05, + "loss": 2.2146, + "step": 5945 + }, + { + "epoch": 0.2, + "grad_norm": 0.7370388507843018, + "learning_rate": 1.8370915071909345e-05, + "loss": 2.2344, + "step": 5946 + }, + { + "epoch": 0.2, + "grad_norm": 0.7056685090065002, + "learning_rate": 1.8370333555340885e-05, + "loss": 2.2289, + "step": 5947 + }, + { + "epoch": 0.2, + "grad_norm": 0.689792811870575, + "learning_rate": 1.83697519442093e-05, + "loss": 2.2218, + "step": 5948 + }, + { + "epoch": 0.2, + "grad_norm": 0.6775859594345093, + "learning_rate": 1.8369170238521166e-05, + "loss": 2.1443, + "step": 5949 + }, + { + "epoch": 0.2, + "grad_norm": 0.6931392550468445, + "learning_rate": 1.836858843828305e-05, + "loss": 2.2422, + "step": 5950 + }, + { + "epoch": 0.2, + "grad_norm": 0.7106674313545227, + "learning_rate": 1.8368006543501527e-05, + "loss": 2.279, + "step": 5951 + }, + { + "epoch": 0.2, + "grad_norm": 0.6893783211708069, + "learning_rate": 1.8367424554183166e-05, + "loss": 2.2492, + "step": 5952 + }, + { + "epoch": 0.2, + "grad_norm": 0.7215111255645752, + "learning_rate": 1.8366842470334553e-05, + "loss": 2.2211, + "step": 5953 + }, + { + "epoch": 0.2, + "grad_norm": 0.7105419039726257, + "learning_rate": 1.836626029196225e-05, + "loss": 2.2575, + "step": 5954 + }, + { + "epoch": 0.2, + "grad_norm": 0.7024807333946228, + "learning_rate": 1.8365678019072847e-05, + "loss": 2.2265, + "step": 5955 + }, + { + "epoch": 0.2, + "grad_norm": 0.6756042242050171, + "learning_rate": 1.8365095651672914e-05, + "loss": 2.1462, + "step": 5956 + }, + { + "epoch": 0.2, + "grad_norm": 0.7355726361274719, + "learning_rate": 1.8364513189769033e-05, + "loss": 2.2458, + "step": 5957 + }, + { + "epoch": 0.2, + "grad_norm": 0.6921836137771606, + "learning_rate": 1.8363930633367783e-05, + "loss": 2.2163, + "step": 5958 + }, + { + "epoch": 0.2, + "grad_norm": 0.6773610711097717, + "learning_rate": 1.836334798247575e-05, + "loss": 2.2428, + "step": 5959 + }, + { + "epoch": 0.2, + "grad_norm": 0.7215322852134705, + "learning_rate": 1.836276523709951e-05, + "loss": 2.2451, + "step": 5960 + }, + { + "epoch": 0.2, + "grad_norm": 0.7023947834968567, + "learning_rate": 1.8362182397245648e-05, + "loss": 2.2407, + "step": 5961 + }, + { + "epoch": 0.2, + "grad_norm": 0.7059973478317261, + "learning_rate": 1.8361599462920752e-05, + "loss": 2.2726, + "step": 5962 + }, + { + "epoch": 0.2, + "grad_norm": 0.6837860941886902, + "learning_rate": 1.836101643413141e-05, + "loss": 2.2246, + "step": 5963 + }, + { + "epoch": 0.2, + "grad_norm": 0.7055572867393494, + "learning_rate": 1.8360433310884197e-05, + "loss": 2.1929, + "step": 5964 + }, + { + "epoch": 0.2, + "grad_norm": 0.6980744004249573, + "learning_rate": 1.8359850093185713e-05, + "loss": 2.2268, + "step": 5965 + }, + { + "epoch": 0.2, + "grad_norm": 0.6900520920753479, + "learning_rate": 1.835926678104254e-05, + "loss": 2.1952, + "step": 5966 + }, + { + "epoch": 0.2, + "grad_norm": 0.7105039954185486, + "learning_rate": 1.835868337446127e-05, + "loss": 2.2974, + "step": 5967 + }, + { + "epoch": 0.2, + "grad_norm": 0.7339195013046265, + "learning_rate": 1.8358099873448493e-05, + "loss": 2.2795, + "step": 5968 + }, + { + "epoch": 0.2, + "grad_norm": 0.7335793972015381, + "learning_rate": 1.83575162780108e-05, + "loss": 2.2296, + "step": 5969 + }, + { + "epoch": 0.2, + "grad_norm": 0.6875026822090149, + "learning_rate": 1.8356932588154794e-05, + "loss": 2.092, + "step": 5970 + }, + { + "epoch": 0.2, + "grad_norm": 0.6867982745170593, + "learning_rate": 1.8356348803887058e-05, + "loss": 2.1101, + "step": 5971 + }, + { + "epoch": 0.2, + "grad_norm": 0.6770048141479492, + "learning_rate": 1.8355764925214186e-05, + "loss": 2.1351, + "step": 5972 + }, + { + "epoch": 0.2, + "grad_norm": 0.7466998100280762, + "learning_rate": 1.8355180952142782e-05, + "loss": 2.2375, + "step": 5973 + }, + { + "epoch": 0.2, + "grad_norm": 0.7097471952438354, + "learning_rate": 1.835459688467944e-05, + "loss": 2.2135, + "step": 5974 + }, + { + "epoch": 0.2, + "grad_norm": 0.6944347620010376, + "learning_rate": 1.8354012722830758e-05, + "loss": 2.1978, + "step": 5975 + }, + { + "epoch": 0.2, + "grad_norm": 0.6981500387191772, + "learning_rate": 1.8353428466603338e-05, + "loss": 2.2386, + "step": 5976 + }, + { + "epoch": 0.2, + "grad_norm": 0.7420014142990112, + "learning_rate": 1.8352844116003776e-05, + "loss": 2.1666, + "step": 5977 + }, + { + "epoch": 0.2, + "grad_norm": 0.7116607427597046, + "learning_rate": 1.8352259671038683e-05, + "loss": 2.1537, + "step": 5978 + }, + { + "epoch": 0.2, + "grad_norm": 0.7906265258789062, + "learning_rate": 1.8351675131714647e-05, + "loss": 2.2019, + "step": 5979 + }, + { + "epoch": 0.2, + "grad_norm": 0.7103914618492126, + "learning_rate": 1.8351090498038284e-05, + "loss": 2.1767, + "step": 5980 + }, + { + "epoch": 0.2, + "grad_norm": 0.6715589165687561, + "learning_rate": 1.8350505770016192e-05, + "loss": 2.1168, + "step": 5981 + }, + { + "epoch": 0.2, + "grad_norm": 0.7194783091545105, + "learning_rate": 1.8349920947654983e-05, + "loss": 2.1955, + "step": 5982 + }, + { + "epoch": 0.2, + "grad_norm": 0.7149052023887634, + "learning_rate": 1.834933603096126e-05, + "loss": 2.1642, + "step": 5983 + }, + { + "epoch": 0.2, + "grad_norm": 0.6783707141876221, + "learning_rate": 1.8348751019941628e-05, + "loss": 2.1749, + "step": 5984 + }, + { + "epoch": 0.2, + "grad_norm": 0.7093323469161987, + "learning_rate": 1.8348165914602706e-05, + "loss": 2.14, + "step": 5985 + }, + { + "epoch": 0.2, + "grad_norm": 0.7068187594413757, + "learning_rate": 1.8347580714951094e-05, + "loss": 2.2053, + "step": 5986 + }, + { + "epoch": 0.2, + "grad_norm": 0.691927969455719, + "learning_rate": 1.834699542099341e-05, + "loss": 2.196, + "step": 5987 + }, + { + "epoch": 0.2, + "grad_norm": 0.7062950134277344, + "learning_rate": 1.834641003273626e-05, + "loss": 2.1975, + "step": 5988 + }, + { + "epoch": 0.2, + "grad_norm": 0.6762887239456177, + "learning_rate": 1.8345824550186262e-05, + "loss": 2.2288, + "step": 5989 + }, + { + "epoch": 0.2, + "grad_norm": 0.716850221157074, + "learning_rate": 1.8345238973350028e-05, + "loss": 2.2527, + "step": 5990 + }, + { + "epoch": 0.2, + "grad_norm": 0.6900080442428589, + "learning_rate": 1.834465330223418e-05, + "loss": 2.2255, + "step": 5991 + }, + { + "epoch": 0.2, + "grad_norm": 0.7132346630096436, + "learning_rate": 1.8344067536845324e-05, + "loss": 2.2122, + "step": 5992 + }, + { + "epoch": 0.2, + "grad_norm": 0.7375802397727966, + "learning_rate": 1.8343481677190084e-05, + "loss": 2.1508, + "step": 5993 + }, + { + "epoch": 0.2, + "grad_norm": 0.685304582118988, + "learning_rate": 1.834289572327508e-05, + "loss": 2.1748, + "step": 5994 + }, + { + "epoch": 0.2, + "grad_norm": 0.7087296843528748, + "learning_rate": 1.834230967510693e-05, + "loss": 2.1564, + "step": 5995 + }, + { + "epoch": 0.2, + "grad_norm": 0.7595576047897339, + "learning_rate": 1.834172353269225e-05, + "loss": 2.2485, + "step": 5996 + }, + { + "epoch": 0.2, + "grad_norm": 0.6874471306800842, + "learning_rate": 1.8341137296037674e-05, + "loss": 2.2287, + "step": 5997 + }, + { + "epoch": 0.2, + "grad_norm": 0.6641967296600342, + "learning_rate": 1.834055096514981e-05, + "loss": 2.1199, + "step": 5998 + }, + { + "epoch": 0.2, + "grad_norm": 0.6969876885414124, + "learning_rate": 1.833996454003529e-05, + "loss": 2.196, + "step": 5999 + }, + { + "epoch": 0.2, + "grad_norm": 0.7392247319221497, + "learning_rate": 1.8339378020700742e-05, + "loss": 2.2641, + "step": 6000 + }, + { + "epoch": 0.2, + "grad_norm": 0.7043234705924988, + "learning_rate": 1.8338791407152786e-05, + "loss": 2.1198, + "step": 6001 + }, + { + "epoch": 0.2, + "grad_norm": 0.727663516998291, + "learning_rate": 1.8338204699398053e-05, + "loss": 2.2133, + "step": 6002 + }, + { + "epoch": 0.2, + "grad_norm": 0.7107959389686584, + "learning_rate": 1.8337617897443166e-05, + "loss": 2.1887, + "step": 6003 + }, + { + "epoch": 0.2, + "grad_norm": 0.6774876117706299, + "learning_rate": 1.8337031001294763e-05, + "loss": 2.1195, + "step": 6004 + }, + { + "epoch": 0.2, + "grad_norm": 0.7117305397987366, + "learning_rate": 1.8336444010959468e-05, + "loss": 2.1784, + "step": 6005 + }, + { + "epoch": 0.2, + "grad_norm": 0.7036505341529846, + "learning_rate": 1.8335856926443917e-05, + "loss": 2.1756, + "step": 6006 + }, + { + "epoch": 0.2, + "grad_norm": 0.7028264403343201, + "learning_rate": 1.8335269747754735e-05, + "loss": 2.1763, + "step": 6007 + }, + { + "epoch": 0.2, + "grad_norm": 0.7299203872680664, + "learning_rate": 1.8334682474898563e-05, + "loss": 2.2028, + "step": 6008 + }, + { + "epoch": 0.2, + "grad_norm": 0.6820215582847595, + "learning_rate": 1.833409510788203e-05, + "loss": 2.1637, + "step": 6009 + }, + { + "epoch": 0.2, + "grad_norm": 0.6902409195899963, + "learning_rate": 1.833350764671178e-05, + "loss": 2.173, + "step": 6010 + }, + { + "epoch": 0.2, + "grad_norm": 0.7083802819252014, + "learning_rate": 1.833292009139444e-05, + "loss": 2.255, + "step": 6011 + }, + { + "epoch": 0.2, + "grad_norm": 0.6939976811408997, + "learning_rate": 1.8332332441936653e-05, + "loss": 2.1539, + "step": 6012 + }, + { + "epoch": 0.2, + "grad_norm": 0.7012671828269958, + "learning_rate": 1.833174469834506e-05, + "loss": 2.2298, + "step": 6013 + }, + { + "epoch": 0.2, + "grad_norm": 0.7109884023666382, + "learning_rate": 1.83311568606263e-05, + "loss": 2.1514, + "step": 6014 + }, + { + "epoch": 0.2, + "grad_norm": 0.6631449460983276, + "learning_rate": 1.8330568928787005e-05, + "loss": 2.2016, + "step": 6015 + }, + { + "epoch": 0.2, + "grad_norm": 0.6718214154243469, + "learning_rate": 1.832998090283383e-05, + "loss": 2.1483, + "step": 6016 + }, + { + "epoch": 0.2, + "grad_norm": 0.7046425938606262, + "learning_rate": 1.832939278277341e-05, + "loss": 2.1419, + "step": 6017 + }, + { + "epoch": 0.2, + "grad_norm": 0.686935305595398, + "learning_rate": 1.832880456861239e-05, + "loss": 2.1696, + "step": 6018 + }, + { + "epoch": 0.2, + "grad_norm": 0.6837860941886902, + "learning_rate": 1.8328216260357422e-05, + "loss": 2.1746, + "step": 6019 + }, + { + "epoch": 0.2, + "grad_norm": 0.6991344094276428, + "learning_rate": 1.8327627858015147e-05, + "loss": 2.1396, + "step": 6020 + }, + { + "epoch": 0.2, + "grad_norm": 0.730015754699707, + "learning_rate": 1.8327039361592208e-05, + "loss": 2.1834, + "step": 6021 + }, + { + "epoch": 0.2, + "grad_norm": 0.6774981617927551, + "learning_rate": 1.8326450771095266e-05, + "loss": 2.2054, + "step": 6022 + }, + { + "epoch": 0.2, + "grad_norm": 0.7134107351303101, + "learning_rate": 1.8325862086530958e-05, + "loss": 2.2073, + "step": 6023 + }, + { + "epoch": 0.2, + "grad_norm": 0.7216014862060547, + "learning_rate": 1.832527330790594e-05, + "loss": 2.1177, + "step": 6024 + }, + { + "epoch": 0.2, + "grad_norm": 0.7144160866737366, + "learning_rate": 1.8324684435226864e-05, + "loss": 2.1689, + "step": 6025 + }, + { + "epoch": 0.2, + "grad_norm": 0.7272300720214844, + "learning_rate": 1.832409546850038e-05, + "loss": 2.3307, + "step": 6026 + }, + { + "epoch": 0.2, + "grad_norm": 0.7238339185714722, + "learning_rate": 1.8323506407733148e-05, + "loss": 2.2375, + "step": 6027 + }, + { + "epoch": 0.2, + "grad_norm": 0.7258115410804749, + "learning_rate": 1.8322917252931814e-05, + "loss": 2.2396, + "step": 6028 + }, + { + "epoch": 0.2, + "grad_norm": 0.6923658847808838, + "learning_rate": 1.8322328004103044e-05, + "loss": 2.1061, + "step": 6029 + }, + { + "epoch": 0.2, + "grad_norm": 0.7527838349342346, + "learning_rate": 1.8321738661253484e-05, + "loss": 2.1484, + "step": 6030 + }, + { + "epoch": 0.2, + "grad_norm": 0.6811270713806152, + "learning_rate": 1.83211492243898e-05, + "loss": 2.1407, + "step": 6031 + }, + { + "epoch": 0.2, + "grad_norm": 0.7285508513450623, + "learning_rate": 1.8320559693518647e-05, + "loss": 2.2233, + "step": 6032 + }, + { + "epoch": 0.2, + "grad_norm": 0.710042417049408, + "learning_rate": 1.831997006864669e-05, + "loss": 2.2382, + "step": 6033 + }, + { + "epoch": 0.2, + "grad_norm": 0.7149767279624939, + "learning_rate": 1.8319380349780583e-05, + "loss": 2.2301, + "step": 6034 + }, + { + "epoch": 0.2, + "grad_norm": 0.6899021863937378, + "learning_rate": 1.8318790536926996e-05, + "loss": 2.1824, + "step": 6035 + }, + { + "epoch": 0.2, + "grad_norm": 0.6782851815223694, + "learning_rate": 1.831820063009259e-05, + "loss": 2.2167, + "step": 6036 + }, + { + "epoch": 0.2, + "grad_norm": 0.6982631683349609, + "learning_rate": 1.8317610629284025e-05, + "loss": 2.2101, + "step": 6037 + }, + { + "epoch": 0.2, + "grad_norm": 0.6747298240661621, + "learning_rate": 1.8317020534507974e-05, + "loss": 2.1583, + "step": 6038 + }, + { + "epoch": 0.2, + "grad_norm": 0.7257499694824219, + "learning_rate": 1.8316430345771096e-05, + "loss": 2.2401, + "step": 6039 + }, + { + "epoch": 0.2, + "grad_norm": 0.7261691689491272, + "learning_rate": 1.8315840063080063e-05, + "loss": 2.159, + "step": 6040 + }, + { + "epoch": 0.2, + "grad_norm": 0.7353494763374329, + "learning_rate": 1.831524968644154e-05, + "loss": 2.2041, + "step": 6041 + }, + { + "epoch": 0.2, + "grad_norm": 0.8072969913482666, + "learning_rate": 1.83146592158622e-05, + "loss": 2.1891, + "step": 6042 + }, + { + "epoch": 0.2, + "grad_norm": 0.6938116550445557, + "learning_rate": 1.8314068651348713e-05, + "loss": 2.1847, + "step": 6043 + }, + { + "epoch": 0.2, + "grad_norm": 0.7015967965126038, + "learning_rate": 1.8313477992907752e-05, + "loss": 2.1191, + "step": 6044 + }, + { + "epoch": 0.2, + "grad_norm": 0.685150682926178, + "learning_rate": 1.831288724054599e-05, + "loss": 2.2588, + "step": 6045 + }, + { + "epoch": 0.2, + "grad_norm": 0.6879811882972717, + "learning_rate": 1.8312296394270096e-05, + "loss": 2.2719, + "step": 6046 + }, + { + "epoch": 0.2, + "grad_norm": 0.765891969203949, + "learning_rate": 1.831170545408675e-05, + "loss": 2.1679, + "step": 6047 + }, + { + "epoch": 0.2, + "grad_norm": 0.7400086522102356, + "learning_rate": 1.831111442000263e-05, + "loss": 2.2094, + "step": 6048 + }, + { + "epoch": 0.2, + "grad_norm": 0.7193347215652466, + "learning_rate": 1.8310523292024407e-05, + "loss": 2.1197, + "step": 6049 + }, + { + "epoch": 0.2, + "grad_norm": 0.7543421983718872, + "learning_rate": 1.8309932070158763e-05, + "loss": 2.2343, + "step": 6050 + }, + { + "epoch": 0.2, + "grad_norm": 0.7831417322158813, + "learning_rate": 1.830934075441238e-05, + "loss": 2.2343, + "step": 6051 + }, + { + "epoch": 0.2, + "grad_norm": 0.7583911418914795, + "learning_rate": 1.830874934479193e-05, + "loss": 2.1702, + "step": 6052 + }, + { + "epoch": 0.2, + "grad_norm": 0.7281417846679688, + "learning_rate": 1.8308157841304102e-05, + "loss": 2.2591, + "step": 6053 + }, + { + "epoch": 0.2, + "grad_norm": 0.6615597605705261, + "learning_rate": 1.8307566243955573e-05, + "loss": 2.1288, + "step": 6054 + }, + { + "epoch": 0.2, + "grad_norm": 0.724187433719635, + "learning_rate": 1.8306974552753032e-05, + "loss": 2.2371, + "step": 6055 + }, + { + "epoch": 0.2, + "grad_norm": 0.6731974482536316, + "learning_rate": 1.830638276770316e-05, + "loss": 2.1724, + "step": 6056 + }, + { + "epoch": 0.2, + "grad_norm": 0.7219492793083191, + "learning_rate": 1.8305790888812644e-05, + "loss": 2.1728, + "step": 6057 + }, + { + "epoch": 0.2, + "grad_norm": 0.7225202918052673, + "learning_rate": 1.830519891608817e-05, + "loss": 2.1779, + "step": 6058 + }, + { + "epoch": 0.2, + "grad_norm": 0.7043360471725464, + "learning_rate": 1.8304606849536425e-05, + "loss": 2.1872, + "step": 6059 + }, + { + "epoch": 0.2, + "grad_norm": 0.6997250318527222, + "learning_rate": 1.83040146891641e-05, + "loss": 2.1889, + "step": 6060 + }, + { + "epoch": 0.2, + "grad_norm": 0.7361088991165161, + "learning_rate": 1.830342243497788e-05, + "loss": 2.2281, + "step": 6061 + }, + { + "epoch": 0.2, + "grad_norm": 0.738519549369812, + "learning_rate": 1.8302830086984465e-05, + "loss": 2.2028, + "step": 6062 + }, + { + "epoch": 0.2, + "grad_norm": 0.7191739678382874, + "learning_rate": 1.8302237645190543e-05, + "loss": 2.15, + "step": 6063 + }, + { + "epoch": 0.2, + "grad_norm": 0.7262798547744751, + "learning_rate": 1.8301645109602798e-05, + "loss": 2.184, + "step": 6064 + }, + { + "epoch": 0.2, + "grad_norm": 0.6967772245407104, + "learning_rate": 1.830105248022794e-05, + "loss": 2.1784, + "step": 6065 + }, + { + "epoch": 0.2, + "grad_norm": 0.7169735431671143, + "learning_rate": 1.830045975707265e-05, + "loss": 2.1693, + "step": 6066 + }, + { + "epoch": 0.2, + "grad_norm": 0.7417337894439697, + "learning_rate": 1.8299866940143635e-05, + "loss": 2.1241, + "step": 6067 + }, + { + "epoch": 0.2, + "grad_norm": 0.7590447664260864, + "learning_rate": 1.8299274029447583e-05, + "loss": 2.2769, + "step": 6068 + }, + { + "epoch": 0.2, + "grad_norm": 0.7754830121994019, + "learning_rate": 1.82986810249912e-05, + "loss": 2.2303, + "step": 6069 + }, + { + "epoch": 0.2, + "grad_norm": 0.727974534034729, + "learning_rate": 1.829808792678118e-05, + "loss": 2.2446, + "step": 6070 + }, + { + "epoch": 0.2, + "grad_norm": 0.7034462690353394, + "learning_rate": 1.8297494734824225e-05, + "loss": 2.2155, + "step": 6071 + }, + { + "epoch": 0.2, + "grad_norm": 0.7213855981826782, + "learning_rate": 1.829690144912704e-05, + "loss": 2.1292, + "step": 6072 + }, + { + "epoch": 0.2, + "grad_norm": 0.7275819778442383, + "learning_rate": 1.8296308069696322e-05, + "loss": 2.2415, + "step": 6073 + }, + { + "epoch": 0.2, + "grad_norm": 0.706141471862793, + "learning_rate": 1.829571459653878e-05, + "loss": 2.1955, + "step": 6074 + }, + { + "epoch": 0.2, + "grad_norm": 0.7414873242378235, + "learning_rate": 1.8295121029661116e-05, + "loss": 2.1622, + "step": 6075 + }, + { + "epoch": 0.2, + "grad_norm": 0.7358091473579407, + "learning_rate": 1.8294527369070036e-05, + "loss": 2.1321, + "step": 6076 + }, + { + "epoch": 0.2, + "grad_norm": 0.6925868391990662, + "learning_rate": 1.8293933614772245e-05, + "loss": 2.2351, + "step": 6077 + }, + { + "epoch": 0.2, + "grad_norm": 0.7007365226745605, + "learning_rate": 1.8293339766774454e-05, + "loss": 2.1834, + "step": 6078 + }, + { + "epoch": 0.2, + "grad_norm": 0.7314189672470093, + "learning_rate": 1.829274582508337e-05, + "loss": 2.1988, + "step": 6079 + }, + { + "epoch": 0.2, + "grad_norm": 0.7194808721542358, + "learning_rate": 1.8292151789705707e-05, + "loss": 2.1799, + "step": 6080 + }, + { + "epoch": 0.2, + "grad_norm": 0.6773965954780579, + "learning_rate": 1.829155766064817e-05, + "loss": 2.2208, + "step": 6081 + }, + { + "epoch": 0.2, + "grad_norm": 0.6956982612609863, + "learning_rate": 1.8290963437917474e-05, + "loss": 2.1525, + "step": 6082 + }, + { + "epoch": 0.2, + "grad_norm": 0.7528073787689209, + "learning_rate": 1.829036912152033e-05, + "loss": 2.1559, + "step": 6083 + }, + { + "epoch": 0.2, + "grad_norm": 0.6957526803016663, + "learning_rate": 1.828977471146346e-05, + "loss": 2.183, + "step": 6084 + }, + { + "epoch": 0.2, + "grad_norm": 0.6852614879608154, + "learning_rate": 1.828918020775357e-05, + "loss": 2.2011, + "step": 6085 + }, + { + "epoch": 0.2, + "grad_norm": 0.704866349697113, + "learning_rate": 1.828858561039738e-05, + "loss": 2.2265, + "step": 6086 + }, + { + "epoch": 0.2, + "grad_norm": 0.7121172547340393, + "learning_rate": 1.8287990919401607e-05, + "loss": 2.1845, + "step": 6087 + }, + { + "epoch": 0.2, + "grad_norm": 0.7090650796890259, + "learning_rate": 1.8287396134772967e-05, + "loss": 2.2106, + "step": 6088 + }, + { + "epoch": 0.2, + "grad_norm": 0.7217909693717957, + "learning_rate": 1.8286801256518187e-05, + "loss": 2.2171, + "step": 6089 + }, + { + "epoch": 0.2, + "grad_norm": 0.6990750432014465, + "learning_rate": 1.8286206284643983e-05, + "loss": 2.1908, + "step": 6090 + }, + { + "epoch": 0.2, + "grad_norm": 0.6932628154754639, + "learning_rate": 1.8285611219157076e-05, + "loss": 2.1829, + "step": 6091 + }, + { + "epoch": 0.2, + "grad_norm": 0.6944264769554138, + "learning_rate": 1.8285016060064186e-05, + "loss": 2.2011, + "step": 6092 + }, + { + "epoch": 0.2, + "grad_norm": 0.6755760908126831, + "learning_rate": 1.8284420807372044e-05, + "loss": 2.1698, + "step": 6093 + }, + { + "epoch": 0.2, + "grad_norm": 0.7104582190513611, + "learning_rate": 1.828382546108737e-05, + "loss": 2.204, + "step": 6094 + }, + { + "epoch": 0.2, + "grad_norm": 0.7113857865333557, + "learning_rate": 1.8283230021216888e-05, + "loss": 2.2104, + "step": 6095 + }, + { + "epoch": 0.2, + "grad_norm": 0.6951066255569458, + "learning_rate": 1.828263448776733e-05, + "loss": 2.1829, + "step": 6096 + }, + { + "epoch": 0.2, + "grad_norm": 0.691688060760498, + "learning_rate": 1.8282038860745424e-05, + "loss": 2.2764, + "step": 6097 + }, + { + "epoch": 0.2, + "grad_norm": 0.7030866742134094, + "learning_rate": 1.8281443140157893e-05, + "loss": 2.1391, + "step": 6098 + }, + { + "epoch": 0.2, + "grad_norm": 0.7055807709693909, + "learning_rate": 1.828084732601147e-05, + "loss": 2.2015, + "step": 6099 + }, + { + "epoch": 0.2, + "grad_norm": 0.7225607633590698, + "learning_rate": 1.828025141831289e-05, + "loss": 2.2028, + "step": 6100 + }, + { + "epoch": 0.2, + "grad_norm": 0.7286145091056824, + "learning_rate": 1.8279655417068883e-05, + "loss": 2.1027, + "step": 6101 + }, + { + "epoch": 0.2, + "grad_norm": 0.6851641535758972, + "learning_rate": 1.8279059322286177e-05, + "loss": 2.2547, + "step": 6102 + }, + { + "epoch": 0.2, + "grad_norm": 0.6705181002616882, + "learning_rate": 1.8278463133971515e-05, + "loss": 2.1414, + "step": 6103 + }, + { + "epoch": 0.2, + "grad_norm": 0.7223976850509644, + "learning_rate": 1.827786685213163e-05, + "loss": 2.1997, + "step": 6104 + }, + { + "epoch": 0.2, + "grad_norm": 0.6816443800926208, + "learning_rate": 1.827727047677325e-05, + "loss": 2.2058, + "step": 6105 + }, + { + "epoch": 0.2, + "grad_norm": 0.7550622224807739, + "learning_rate": 1.8276674007903122e-05, + "loss": 2.1709, + "step": 6106 + }, + { + "epoch": 0.2, + "grad_norm": 0.7704421877861023, + "learning_rate": 1.8276077445527983e-05, + "loss": 2.1888, + "step": 6107 + }, + { + "epoch": 0.2, + "grad_norm": 0.745924711227417, + "learning_rate": 1.827548078965457e-05, + "loss": 2.224, + "step": 6108 + }, + { + "epoch": 0.2, + "grad_norm": 0.6951000690460205, + "learning_rate": 1.8274884040289623e-05, + "loss": 2.1383, + "step": 6109 + }, + { + "epoch": 0.2, + "grad_norm": 0.6771449446678162, + "learning_rate": 1.8274287197439887e-05, + "loss": 2.1856, + "step": 6110 + }, + { + "epoch": 0.2, + "grad_norm": 0.7057459354400635, + "learning_rate": 1.8273690261112106e-05, + "loss": 2.182, + "step": 6111 + }, + { + "epoch": 0.2, + "grad_norm": 0.7333860993385315, + "learning_rate": 1.827309323131302e-05, + "loss": 2.14, + "step": 6112 + }, + { + "epoch": 0.2, + "grad_norm": 0.7036881446838379, + "learning_rate": 1.8272496108049377e-05, + "loss": 2.1503, + "step": 6113 + }, + { + "epoch": 0.2, + "grad_norm": 0.6983610391616821, + "learning_rate": 1.8271898891327917e-05, + "loss": 2.1565, + "step": 6114 + }, + { + "epoch": 0.2, + "grad_norm": 0.7313774824142456, + "learning_rate": 1.8271301581155395e-05, + "loss": 2.1619, + "step": 6115 + }, + { + "epoch": 0.2, + "grad_norm": 0.6971172094345093, + "learning_rate": 1.827070417753855e-05, + "loss": 2.1996, + "step": 6116 + }, + { + "epoch": 0.2, + "grad_norm": 0.7039988040924072, + "learning_rate": 1.827010668048414e-05, + "loss": 2.2181, + "step": 6117 + }, + { + "epoch": 0.2, + "grad_norm": 0.7069287300109863, + "learning_rate": 1.826950908999891e-05, + "loss": 2.2428, + "step": 6118 + }, + { + "epoch": 0.2, + "grad_norm": 0.695624589920044, + "learning_rate": 1.8268911406089615e-05, + "loss": 2.1869, + "step": 6119 + }, + { + "epoch": 0.2, + "grad_norm": 0.7200223803520203, + "learning_rate": 1.8268313628763004e-05, + "loss": 2.1601, + "step": 6120 + }, + { + "epoch": 0.2, + "grad_norm": 0.704347550868988, + "learning_rate": 1.826771575802583e-05, + "loss": 2.0877, + "step": 6121 + }, + { + "epoch": 0.2, + "grad_norm": 0.6993927359580994, + "learning_rate": 1.826711779388485e-05, + "loss": 2.146, + "step": 6122 + }, + { + "epoch": 0.2, + "grad_norm": 0.7171605229377747, + "learning_rate": 1.826651973634682e-05, + "loss": 2.1704, + "step": 6123 + }, + { + "epoch": 0.2, + "grad_norm": 0.7352426052093506, + "learning_rate": 1.8265921585418488e-05, + "loss": 2.2503, + "step": 6124 + }, + { + "epoch": 0.2, + "grad_norm": 0.6969130039215088, + "learning_rate": 1.8265323341106628e-05, + "loss": 2.1811, + "step": 6125 + }, + { + "epoch": 0.2, + "grad_norm": 0.7109573483467102, + "learning_rate": 1.826472500341798e-05, + "loss": 2.2227, + "step": 6126 + }, + { + "epoch": 0.2, + "grad_norm": 0.745374321937561, + "learning_rate": 1.8264126572359317e-05, + "loss": 2.256, + "step": 6127 + }, + { + "epoch": 0.2, + "grad_norm": 0.6687126755714417, + "learning_rate": 1.8263528047937395e-05, + "loss": 2.1171, + "step": 6128 + }, + { + "epoch": 0.2, + "grad_norm": 0.7024168372154236, + "learning_rate": 1.8262929430158974e-05, + "loss": 2.1857, + "step": 6129 + }, + { + "epoch": 0.2, + "grad_norm": 0.705483078956604, + "learning_rate": 1.826233071903082e-05, + "loss": 2.3051, + "step": 6130 + }, + { + "epoch": 0.2, + "grad_norm": 0.7141180634498596, + "learning_rate": 1.8261731914559698e-05, + "loss": 2.1743, + "step": 6131 + }, + { + "epoch": 0.2, + "grad_norm": 0.6809024214744568, + "learning_rate": 1.8261133016752368e-05, + "loss": 2.1463, + "step": 6132 + }, + { + "epoch": 0.2, + "grad_norm": 0.7426943778991699, + "learning_rate": 1.82605340256156e-05, + "loss": 2.1639, + "step": 6133 + }, + { + "epoch": 0.2, + "grad_norm": 0.7214270234107971, + "learning_rate": 1.8259934941156157e-05, + "loss": 2.1537, + "step": 6134 + }, + { + "epoch": 0.2, + "grad_norm": 0.7001158595085144, + "learning_rate": 1.8259335763380816e-05, + "loss": 2.2022, + "step": 6135 + }, + { + "epoch": 0.2, + "grad_norm": 0.7222607135772705, + "learning_rate": 1.8258736492296335e-05, + "loss": 2.1775, + "step": 6136 + }, + { + "epoch": 0.2, + "grad_norm": 0.7160484194755554, + "learning_rate": 1.825813712790949e-05, + "loss": 2.2132, + "step": 6137 + }, + { + "epoch": 0.2, + "grad_norm": 0.701562225818634, + "learning_rate": 1.825753767022705e-05, + "loss": 2.1854, + "step": 6138 + }, + { + "epoch": 0.2, + "grad_norm": 0.690872848033905, + "learning_rate": 1.825693811925579e-05, + "loss": 2.2013, + "step": 6139 + }, + { + "epoch": 0.2, + "grad_norm": 0.7470741271972656, + "learning_rate": 1.825633847500248e-05, + "loss": 2.2429, + "step": 6140 + }, + { + "epoch": 0.2, + "grad_norm": 0.7656940221786499, + "learning_rate": 1.8255738737473904e-05, + "loss": 2.1491, + "step": 6141 + }, + { + "epoch": 0.2, + "grad_norm": 0.6991948485374451, + "learning_rate": 1.8255138906676824e-05, + "loss": 2.1446, + "step": 6142 + }, + { + "epoch": 0.2, + "grad_norm": 0.6899681091308594, + "learning_rate": 1.8254538982618023e-05, + "loss": 2.1528, + "step": 6143 + }, + { + "epoch": 0.2, + "grad_norm": 0.7008669376373291, + "learning_rate": 1.825393896530428e-05, + "loss": 2.1887, + "step": 6144 + }, + { + "epoch": 0.2, + "grad_norm": 0.6513592004776001, + "learning_rate": 1.825333885474237e-05, + "loss": 2.1329, + "step": 6145 + }, + { + "epoch": 0.2, + "grad_norm": 0.712500810623169, + "learning_rate": 1.8252738650939073e-05, + "loss": 2.1613, + "step": 6146 + }, + { + "epoch": 0.2, + "grad_norm": 0.6816225051879883, + "learning_rate": 1.8252138353901175e-05, + "loss": 2.174, + "step": 6147 + }, + { + "epoch": 0.2, + "grad_norm": 0.7048233151435852, + "learning_rate": 1.8251537963635456e-05, + "loss": 2.25, + "step": 6148 + }, + { + "epoch": 0.2, + "grad_norm": 0.7450661659240723, + "learning_rate": 1.8250937480148693e-05, + "loss": 2.2452, + "step": 6149 + }, + { + "epoch": 0.2, + "grad_norm": 0.7033609747886658, + "learning_rate": 1.8250336903447675e-05, + "loss": 2.1024, + "step": 6150 + }, + { + "epoch": 0.2, + "grad_norm": 0.6958092451095581, + "learning_rate": 1.8249736233539185e-05, + "loss": 2.1157, + "step": 6151 + }, + { + "epoch": 0.2, + "grad_norm": 0.7224093675613403, + "learning_rate": 1.824913547043001e-05, + "loss": 2.1944, + "step": 6152 + }, + { + "epoch": 0.2, + "grad_norm": 0.7134841680526733, + "learning_rate": 1.8248534614126937e-05, + "loss": 2.2418, + "step": 6153 + }, + { + "epoch": 0.2, + "grad_norm": 0.7342516183853149, + "learning_rate": 1.8247933664636754e-05, + "loss": 2.2417, + "step": 6154 + }, + { + "epoch": 0.2, + "grad_norm": 0.7000675797462463, + "learning_rate": 1.8247332621966252e-05, + "loss": 2.2424, + "step": 6155 + }, + { + "epoch": 0.2, + "grad_norm": 0.6992802619934082, + "learning_rate": 1.8246731486122218e-05, + "loss": 2.2312, + "step": 6156 + }, + { + "epoch": 0.2, + "grad_norm": 0.7231718301773071, + "learning_rate": 1.8246130257111444e-05, + "loss": 2.2105, + "step": 6157 + }, + { + "epoch": 0.2, + "grad_norm": 0.6765751242637634, + "learning_rate": 1.8245528934940723e-05, + "loss": 2.1141, + "step": 6158 + }, + { + "epoch": 0.2, + "grad_norm": 0.7158822417259216, + "learning_rate": 1.824492751961685e-05, + "loss": 2.237, + "step": 6159 + }, + { + "epoch": 0.2, + "grad_norm": 0.717758059501648, + "learning_rate": 1.8244326011146617e-05, + "loss": 2.2047, + "step": 6160 + }, + { + "epoch": 0.2, + "grad_norm": 0.7053166627883911, + "learning_rate": 1.824372440953682e-05, + "loss": 2.1299, + "step": 6161 + }, + { + "epoch": 0.21, + "grad_norm": 0.6988082528114319, + "learning_rate": 1.8243122714794257e-05, + "loss": 2.2999, + "step": 6162 + }, + { + "epoch": 0.21, + "grad_norm": 0.6660996079444885, + "learning_rate": 1.8242520926925723e-05, + "loss": 2.142, + "step": 6163 + }, + { + "epoch": 0.21, + "grad_norm": 0.6848520636558533, + "learning_rate": 1.824191904593802e-05, + "loss": 2.1853, + "step": 6164 + }, + { + "epoch": 0.21, + "grad_norm": 0.6864492893218994, + "learning_rate": 1.8241317071837946e-05, + "loss": 2.196, + "step": 6165 + }, + { + "epoch": 0.21, + "grad_norm": 0.7054276466369629, + "learning_rate": 1.8240715004632302e-05, + "loss": 2.2011, + "step": 6166 + }, + { + "epoch": 0.21, + "grad_norm": 0.6922920346260071, + "learning_rate": 1.8240112844327888e-05, + "loss": 2.1856, + "step": 6167 + }, + { + "epoch": 0.21, + "grad_norm": 0.6933912634849548, + "learning_rate": 1.8239510590931507e-05, + "loss": 2.183, + "step": 6168 + }, + { + "epoch": 0.21, + "grad_norm": 0.7077281475067139, + "learning_rate": 1.8238908244449966e-05, + "loss": 2.1729, + "step": 6169 + }, + { + "epoch": 0.21, + "grad_norm": 0.7017167806625366, + "learning_rate": 1.8238305804890068e-05, + "loss": 2.2121, + "step": 6170 + }, + { + "epoch": 0.21, + "grad_norm": 0.7212790250778198, + "learning_rate": 1.8237703272258617e-05, + "loss": 2.1961, + "step": 6171 + }, + { + "epoch": 0.21, + "grad_norm": 0.692525327205658, + "learning_rate": 1.8237100646562426e-05, + "loss": 2.1353, + "step": 6172 + }, + { + "epoch": 0.21, + "grad_norm": 0.7116010189056396, + "learning_rate": 1.8236497927808295e-05, + "loss": 2.0804, + "step": 6173 + }, + { + "epoch": 0.21, + "grad_norm": 0.710145890712738, + "learning_rate": 1.823589511600304e-05, + "loss": 2.2376, + "step": 6174 + }, + { + "epoch": 0.21, + "grad_norm": 0.6760478019714355, + "learning_rate": 1.823529221115347e-05, + "loss": 2.113, + "step": 6175 + }, + { + "epoch": 0.21, + "grad_norm": 0.6640588045120239, + "learning_rate": 1.8234689213266393e-05, + "loss": 2.1815, + "step": 6176 + }, + { + "epoch": 0.21, + "grad_norm": 0.7036170363426208, + "learning_rate": 1.823408612234862e-05, + "loss": 2.1824, + "step": 6177 + }, + { + "epoch": 0.21, + "grad_norm": 0.6894940733909607, + "learning_rate": 1.8233482938406974e-05, + "loss": 2.1956, + "step": 6178 + }, + { + "epoch": 0.21, + "grad_norm": 0.6891513466835022, + "learning_rate": 1.823287966144826e-05, + "loss": 2.1908, + "step": 6179 + }, + { + "epoch": 0.21, + "grad_norm": 0.6755658984184265, + "learning_rate": 1.8232276291479297e-05, + "loss": 2.1949, + "step": 6180 + }, + { + "epoch": 0.21, + "grad_norm": 0.702664852142334, + "learning_rate": 1.82316728285069e-05, + "loss": 2.2577, + "step": 6181 + }, + { + "epoch": 0.21, + "grad_norm": 0.6811558604240417, + "learning_rate": 1.823106927253789e-05, + "loss": 2.1818, + "step": 6182 + }, + { + "epoch": 0.21, + "grad_norm": 0.727492094039917, + "learning_rate": 1.823046562357908e-05, + "loss": 2.1927, + "step": 6183 + }, + { + "epoch": 0.21, + "grad_norm": 0.6844305992126465, + "learning_rate": 1.8229861881637296e-05, + "loss": 2.151, + "step": 6184 + }, + { + "epoch": 0.21, + "grad_norm": 0.6705989241600037, + "learning_rate": 1.8229258046719356e-05, + "loss": 2.0927, + "step": 6185 + }, + { + "epoch": 0.21, + "grad_norm": 0.7396030426025391, + "learning_rate": 1.822865411883208e-05, + "loss": 2.2514, + "step": 6186 + }, + { + "epoch": 0.21, + "grad_norm": 0.7141703963279724, + "learning_rate": 1.822805009798229e-05, + "loss": 2.154, + "step": 6187 + }, + { + "epoch": 0.21, + "grad_norm": 0.6837753057479858, + "learning_rate": 1.8227445984176815e-05, + "loss": 2.133, + "step": 6188 + }, + { + "epoch": 0.21, + "grad_norm": 0.6862742304801941, + "learning_rate": 1.8226841777422474e-05, + "loss": 2.1477, + "step": 6189 + }, + { + "epoch": 0.21, + "grad_norm": 0.7126458883285522, + "learning_rate": 1.8226237477726097e-05, + "loss": 2.1091, + "step": 6190 + }, + { + "epoch": 0.21, + "grad_norm": 0.6781268119812012, + "learning_rate": 1.8225633085094513e-05, + "loss": 2.1036, + "step": 6191 + }, + { + "epoch": 0.21, + "grad_norm": 0.6711091995239258, + "learning_rate": 1.8225028599534544e-05, + "loss": 2.2101, + "step": 6192 + }, + { + "epoch": 0.21, + "grad_norm": 0.6815574765205383, + "learning_rate": 1.8224424021053028e-05, + "loss": 2.1765, + "step": 6193 + }, + { + "epoch": 0.21, + "grad_norm": 0.7124742269515991, + "learning_rate": 1.822381934965678e-05, + "loss": 2.1639, + "step": 6194 + }, + { + "epoch": 0.21, + "grad_norm": 0.6860081553459167, + "learning_rate": 1.8223214585352645e-05, + "loss": 2.2187, + "step": 6195 + }, + { + "epoch": 0.21, + "grad_norm": 0.683290958404541, + "learning_rate": 1.8222609728147454e-05, + "loss": 2.1457, + "step": 6196 + }, + { + "epoch": 0.21, + "grad_norm": 0.7669884562492371, + "learning_rate": 1.8222004778048033e-05, + "loss": 2.2802, + "step": 6197 + }, + { + "epoch": 0.21, + "grad_norm": 0.7044051885604858, + "learning_rate": 1.8221399735061225e-05, + "loss": 2.2255, + "step": 6198 + }, + { + "epoch": 0.21, + "grad_norm": 0.7378009557723999, + "learning_rate": 1.8220794599193855e-05, + "loss": 2.2224, + "step": 6199 + }, + { + "epoch": 0.21, + "grad_norm": 0.684019923210144, + "learning_rate": 1.822018937045277e-05, + "loss": 2.1293, + "step": 6200 + }, + { + "epoch": 0.21, + "grad_norm": 0.7094712257385254, + "learning_rate": 1.8219584048844802e-05, + "loss": 2.2337, + "step": 6201 + }, + { + "epoch": 0.21, + "grad_norm": 0.7301838397979736, + "learning_rate": 1.821897863437679e-05, + "loss": 2.226, + "step": 6202 + }, + { + "epoch": 0.21, + "grad_norm": 0.7089719176292419, + "learning_rate": 1.8218373127055577e-05, + "loss": 2.2129, + "step": 6203 + }, + { + "epoch": 0.21, + "grad_norm": 0.7048456072807312, + "learning_rate": 1.8217767526888e-05, + "loss": 2.1371, + "step": 6204 + }, + { + "epoch": 0.21, + "grad_norm": 0.693789541721344, + "learning_rate": 1.8217161833880896e-05, + "loss": 2.2254, + "step": 6205 + }, + { + "epoch": 0.21, + "grad_norm": 0.67508465051651, + "learning_rate": 1.8216556048041118e-05, + "loss": 2.2422, + "step": 6206 + }, + { + "epoch": 0.21, + "grad_norm": 0.6962656378746033, + "learning_rate": 1.8215950169375503e-05, + "loss": 2.1848, + "step": 6207 + }, + { + "epoch": 0.21, + "grad_norm": 0.754131555557251, + "learning_rate": 1.82153441978909e-05, + "loss": 2.2022, + "step": 6208 + }, + { + "epoch": 0.21, + "grad_norm": 0.6938959360122681, + "learning_rate": 1.8214738133594153e-05, + "loss": 2.2416, + "step": 6209 + }, + { + "epoch": 0.21, + "grad_norm": 0.6860623359680176, + "learning_rate": 1.8214131976492108e-05, + "loss": 2.2005, + "step": 6210 + }, + { + "epoch": 0.21, + "grad_norm": 0.6835795640945435, + "learning_rate": 1.821352572659161e-05, + "loss": 2.2198, + "step": 6211 + }, + { + "epoch": 0.21, + "grad_norm": 0.7212157249450684, + "learning_rate": 1.8212919383899518e-05, + "loss": 2.2119, + "step": 6212 + }, + { + "epoch": 0.21, + "grad_norm": 0.6794966459274292, + "learning_rate": 1.8212312948422674e-05, + "loss": 2.1172, + "step": 6213 + }, + { + "epoch": 0.21, + "grad_norm": 0.7304803133010864, + "learning_rate": 1.8211706420167932e-05, + "loss": 2.1489, + "step": 6214 + }, + { + "epoch": 0.21, + "grad_norm": 0.6906092762947083, + "learning_rate": 1.821109979914214e-05, + "loss": 2.2018, + "step": 6215 + }, + { + "epoch": 0.21, + "grad_norm": 0.6878266930580139, + "learning_rate": 1.8210493085352158e-05, + "loss": 2.1808, + "step": 6216 + }, + { + "epoch": 0.21, + "grad_norm": 0.7326026558876038, + "learning_rate": 1.8209886278804837e-05, + "loss": 2.1339, + "step": 6217 + }, + { + "epoch": 0.21, + "grad_norm": 0.7428187727928162, + "learning_rate": 1.820927937950703e-05, + "loss": 2.151, + "step": 6218 + }, + { + "epoch": 0.21, + "grad_norm": 0.6972575783729553, + "learning_rate": 1.8208672387465597e-05, + "loss": 2.2047, + "step": 6219 + }, + { + "epoch": 0.21, + "grad_norm": 0.6899797320365906, + "learning_rate": 1.8208065302687393e-05, + "loss": 2.1237, + "step": 6220 + }, + { + "epoch": 0.21, + "grad_norm": 0.7210092544555664, + "learning_rate": 1.820745812517928e-05, + "loss": 2.1802, + "step": 6221 + }, + { + "epoch": 0.21, + "grad_norm": 0.69561767578125, + "learning_rate": 1.8206850854948114e-05, + "loss": 2.1653, + "step": 6222 + }, + { + "epoch": 0.21, + "grad_norm": 0.7154582738876343, + "learning_rate": 1.8206243492000757e-05, + "loss": 2.1818, + "step": 6223 + }, + { + "epoch": 0.21, + "grad_norm": 0.6922856569290161, + "learning_rate": 1.820563603634407e-05, + "loss": 2.1714, + "step": 6224 + }, + { + "epoch": 0.21, + "grad_norm": 0.7273600101470947, + "learning_rate": 1.8205028487984916e-05, + "loss": 2.1604, + "step": 6225 + }, + { + "epoch": 0.21, + "grad_norm": 0.7266347408294678, + "learning_rate": 1.820442084693016e-05, + "loss": 2.2157, + "step": 6226 + }, + { + "epoch": 0.21, + "grad_norm": 0.6917631030082703, + "learning_rate": 1.8203813113186664e-05, + "loss": 2.1647, + "step": 6227 + }, + { + "epoch": 0.21, + "grad_norm": 0.7093598246574402, + "learning_rate": 1.8203205286761292e-05, + "loss": 2.1412, + "step": 6228 + }, + { + "epoch": 0.21, + "grad_norm": 0.6844556331634521, + "learning_rate": 1.820259736766092e-05, + "loss": 2.1465, + "step": 6229 + }, + { + "epoch": 0.21, + "grad_norm": 0.7113626003265381, + "learning_rate": 1.820198935589241e-05, + "loss": 2.1403, + "step": 6230 + }, + { + "epoch": 0.21, + "grad_norm": 0.6789575815200806, + "learning_rate": 1.8201381251462628e-05, + "loss": 2.189, + "step": 6231 + }, + { + "epoch": 0.21, + "grad_norm": 0.7100657820701599, + "learning_rate": 1.8200773054378448e-05, + "loss": 2.1739, + "step": 6232 + }, + { + "epoch": 0.21, + "grad_norm": 0.7089124917984009, + "learning_rate": 1.8200164764646742e-05, + "loss": 2.1731, + "step": 6233 + }, + { + "epoch": 0.21, + "grad_norm": 0.7312548756599426, + "learning_rate": 1.819955638227438e-05, + "loss": 2.2229, + "step": 6234 + }, + { + "epoch": 0.21, + "grad_norm": 0.7068428993225098, + "learning_rate": 1.8198947907268233e-05, + "loss": 2.228, + "step": 6235 + }, + { + "epoch": 0.21, + "grad_norm": 0.6849491596221924, + "learning_rate": 1.819833933963518e-05, + "loss": 2.2067, + "step": 6236 + }, + { + "epoch": 0.21, + "grad_norm": 0.6931092143058777, + "learning_rate": 1.819773067938209e-05, + "loss": 2.2137, + "step": 6237 + }, + { + "epoch": 0.21, + "grad_norm": 0.6814872622489929, + "learning_rate": 1.8197121926515846e-05, + "loss": 2.1804, + "step": 6238 + }, + { + "epoch": 0.21, + "grad_norm": 0.7009797692298889, + "learning_rate": 1.8196513081043323e-05, + "loss": 2.1904, + "step": 6239 + }, + { + "epoch": 0.21, + "grad_norm": 0.7696446180343628, + "learning_rate": 1.8195904142971397e-05, + "loss": 2.1403, + "step": 6240 + }, + { + "epoch": 0.21, + "grad_norm": 0.7061079740524292, + "learning_rate": 1.819529511230695e-05, + "loss": 2.1931, + "step": 6241 + }, + { + "epoch": 0.21, + "grad_norm": 0.7309475541114807, + "learning_rate": 1.819468598905686e-05, + "loss": 2.2096, + "step": 6242 + }, + { + "epoch": 0.21, + "grad_norm": 0.6979312896728516, + "learning_rate": 1.8194076773228016e-05, + "loss": 2.1797, + "step": 6243 + }, + { + "epoch": 0.21, + "grad_norm": 0.7142277359962463, + "learning_rate": 1.819346746482729e-05, + "loss": 2.2248, + "step": 6244 + }, + { + "epoch": 0.21, + "grad_norm": 0.7087216973304749, + "learning_rate": 1.8192858063861573e-05, + "loss": 2.1498, + "step": 6245 + }, + { + "epoch": 0.21, + "grad_norm": 0.6993076205253601, + "learning_rate": 1.8192248570337742e-05, + "loss": 2.1994, + "step": 6246 + }, + { + "epoch": 0.21, + "grad_norm": 0.6921340227127075, + "learning_rate": 1.819163898426269e-05, + "loss": 2.15, + "step": 6247 + }, + { + "epoch": 0.21, + "grad_norm": 0.705520749092102, + "learning_rate": 1.8191029305643306e-05, + "loss": 2.2565, + "step": 6248 + }, + { + "epoch": 0.21, + "grad_norm": 0.6995993256568909, + "learning_rate": 1.819041953448647e-05, + "loss": 2.1462, + "step": 6249 + }, + { + "epoch": 0.21, + "grad_norm": 0.7580268979072571, + "learning_rate": 1.8189809670799074e-05, + "loss": 2.1653, + "step": 6250 + }, + { + "epoch": 0.21, + "grad_norm": 0.6980740427970886, + "learning_rate": 1.818919971458801e-05, + "loss": 2.1517, + "step": 6251 + }, + { + "epoch": 0.21, + "grad_norm": 0.6747538447380066, + "learning_rate": 1.8188589665860163e-05, + "loss": 2.1554, + "step": 6252 + }, + { + "epoch": 0.21, + "grad_norm": 0.7014902234077454, + "learning_rate": 1.818797952462243e-05, + "loss": 2.1507, + "step": 6253 + }, + { + "epoch": 0.21, + "grad_norm": 0.7250577211380005, + "learning_rate": 1.8187369290881705e-05, + "loss": 2.1215, + "step": 6254 + }, + { + "epoch": 0.21, + "grad_norm": 0.6888594627380371, + "learning_rate": 1.818675896464488e-05, + "loss": 2.1389, + "step": 6255 + }, + { + "epoch": 0.21, + "grad_norm": 0.7407350540161133, + "learning_rate": 1.818614854591885e-05, + "loss": 2.2043, + "step": 6256 + }, + { + "epoch": 0.21, + "grad_norm": 0.7185331583023071, + "learning_rate": 1.818553803471051e-05, + "loss": 2.2166, + "step": 6257 + }, + { + "epoch": 0.21, + "grad_norm": 0.7327126860618591, + "learning_rate": 1.818492743102676e-05, + "loss": 2.1842, + "step": 6258 + }, + { + "epoch": 0.21, + "grad_norm": 0.6717777252197266, + "learning_rate": 1.8184316734874494e-05, + "loss": 2.2057, + "step": 6259 + }, + { + "epoch": 0.21, + "grad_norm": 0.7251034379005432, + "learning_rate": 1.8183705946260618e-05, + "loss": 2.1421, + "step": 6260 + }, + { + "epoch": 0.21, + "grad_norm": 0.7194979786872864, + "learning_rate": 1.8183095065192025e-05, + "loss": 2.1818, + "step": 6261 + }, + { + "epoch": 0.21, + "grad_norm": 0.7353861927986145, + "learning_rate": 1.818248409167562e-05, + "loss": 2.0895, + "step": 6262 + }, + { + "epoch": 0.21, + "grad_norm": 0.7097426056861877, + "learning_rate": 1.8181873025718308e-05, + "loss": 2.217, + "step": 6263 + }, + { + "epoch": 0.21, + "grad_norm": 0.721851646900177, + "learning_rate": 1.8181261867326987e-05, + "loss": 2.1687, + "step": 6264 + }, + { + "epoch": 0.21, + "grad_norm": 0.6836680769920349, + "learning_rate": 1.8180650616508564e-05, + "loss": 2.2267, + "step": 6265 + }, + { + "epoch": 0.21, + "grad_norm": 0.7297622561454773, + "learning_rate": 1.8180039273269944e-05, + "loss": 2.2466, + "step": 6266 + }, + { + "epoch": 0.21, + "grad_norm": 0.7098370790481567, + "learning_rate": 1.817942783761804e-05, + "loss": 2.2356, + "step": 6267 + }, + { + "epoch": 0.21, + "grad_norm": 0.6814084649085999, + "learning_rate": 1.8178816309559747e-05, + "loss": 2.1568, + "step": 6268 + }, + { + "epoch": 0.21, + "grad_norm": 0.707601010799408, + "learning_rate": 1.8178204689101983e-05, + "loss": 2.2056, + "step": 6269 + }, + { + "epoch": 0.21, + "grad_norm": 0.7034727334976196, + "learning_rate": 1.8177592976251657e-05, + "loss": 2.1334, + "step": 6270 + }, + { + "epoch": 0.21, + "grad_norm": 0.6976986527442932, + "learning_rate": 1.8176981171015675e-05, + "loss": 2.2238, + "step": 6271 + }, + { + "epoch": 0.21, + "grad_norm": 0.7366610169410706, + "learning_rate": 1.8176369273400954e-05, + "loss": 2.1651, + "step": 6272 + }, + { + "epoch": 0.21, + "grad_norm": 0.6615982055664062, + "learning_rate": 1.8175757283414404e-05, + "loss": 2.1617, + "step": 6273 + }, + { + "epoch": 0.21, + "grad_norm": 0.7057796120643616, + "learning_rate": 1.817514520106294e-05, + "loss": 2.1623, + "step": 6274 + }, + { + "epoch": 0.21, + "grad_norm": 0.6771888732910156, + "learning_rate": 1.8174533026353476e-05, + "loss": 2.1548, + "step": 6275 + }, + { + "epoch": 0.21, + "grad_norm": 0.6665482521057129, + "learning_rate": 1.817392075929293e-05, + "loss": 2.1494, + "step": 6276 + }, + { + "epoch": 0.21, + "grad_norm": 0.6746802926063538, + "learning_rate": 1.8173308399888218e-05, + "loss": 2.1856, + "step": 6277 + }, + { + "epoch": 0.21, + "grad_norm": 0.6971818208694458, + "learning_rate": 1.8172695948146258e-05, + "loss": 2.2528, + "step": 6278 + }, + { + "epoch": 0.21, + "grad_norm": 0.6868285536766052, + "learning_rate": 1.8172083404073966e-05, + "loss": 2.2043, + "step": 6279 + }, + { + "epoch": 0.21, + "grad_norm": 0.7004644870758057, + "learning_rate": 1.8171470767678264e-05, + "loss": 2.1386, + "step": 6280 + }, + { + "epoch": 0.21, + "grad_norm": 0.6835257411003113, + "learning_rate": 1.8170858038966078e-05, + "loss": 2.2032, + "step": 6281 + }, + { + "epoch": 0.21, + "grad_norm": 0.6910305023193359, + "learning_rate": 1.8170245217944326e-05, + "loss": 2.17, + "step": 6282 + }, + { + "epoch": 0.21, + "grad_norm": 0.7208327054977417, + "learning_rate": 1.816963230461993e-05, + "loss": 2.216, + "step": 6283 + }, + { + "epoch": 0.21, + "grad_norm": 0.6916594505310059, + "learning_rate": 1.816901929899982e-05, + "loss": 2.2521, + "step": 6284 + }, + { + "epoch": 0.21, + "grad_norm": 0.6763165593147278, + "learning_rate": 1.816840620109091e-05, + "loss": 2.2101, + "step": 6285 + }, + { + "epoch": 0.21, + "grad_norm": 0.6844795346260071, + "learning_rate": 1.8167793010900138e-05, + "loss": 2.237, + "step": 6286 + }, + { + "epoch": 0.21, + "grad_norm": 0.6833997368812561, + "learning_rate": 1.816717972843443e-05, + "loss": 2.2902, + "step": 6287 + }, + { + "epoch": 0.21, + "grad_norm": 0.7094277739524841, + "learning_rate": 1.8166566353700708e-05, + "loss": 2.1871, + "step": 6288 + }, + { + "epoch": 0.21, + "grad_norm": 0.7402395009994507, + "learning_rate": 1.8165952886705908e-05, + "loss": 2.1758, + "step": 6289 + }, + { + "epoch": 0.21, + "grad_norm": 0.7101511359214783, + "learning_rate": 1.8165339327456958e-05, + "loss": 2.1494, + "step": 6290 + }, + { + "epoch": 0.21, + "grad_norm": 0.6769729256629944, + "learning_rate": 1.8164725675960787e-05, + "loss": 2.1452, + "step": 6291 + }, + { + "epoch": 0.21, + "grad_norm": 0.7318620681762695, + "learning_rate": 1.8164111932224334e-05, + "loss": 2.1832, + "step": 6292 + }, + { + "epoch": 0.21, + "grad_norm": 0.6814513802528381, + "learning_rate": 1.8163498096254525e-05, + "loss": 2.2132, + "step": 6293 + }, + { + "epoch": 0.21, + "grad_norm": 0.6977999210357666, + "learning_rate": 1.81628841680583e-05, + "loss": 2.1859, + "step": 6294 + }, + { + "epoch": 0.21, + "grad_norm": 0.6960188746452332, + "learning_rate": 1.8162270147642598e-05, + "loss": 2.19, + "step": 6295 + }, + { + "epoch": 0.21, + "grad_norm": 0.6972387433052063, + "learning_rate": 1.8161656035014345e-05, + "loss": 2.2156, + "step": 6296 + }, + { + "epoch": 0.21, + "grad_norm": 0.7259312272071838, + "learning_rate": 1.816104183018049e-05, + "loss": 2.2128, + "step": 6297 + }, + { + "epoch": 0.21, + "grad_norm": 0.7384807467460632, + "learning_rate": 1.8160427533147965e-05, + "loss": 2.1929, + "step": 6298 + }, + { + "epoch": 0.21, + "grad_norm": 0.7192266583442688, + "learning_rate": 1.8159813143923712e-05, + "loss": 2.221, + "step": 6299 + }, + { + "epoch": 0.21, + "grad_norm": 0.7081276178359985, + "learning_rate": 1.8159198662514672e-05, + "loss": 2.2761, + "step": 6300 + }, + { + "epoch": 0.21, + "grad_norm": 0.6984350085258484, + "learning_rate": 1.815858408892779e-05, + "loss": 2.1591, + "step": 6301 + }, + { + "epoch": 0.21, + "grad_norm": 0.7108070254325867, + "learning_rate": 1.8157969423170003e-05, + "loss": 2.2081, + "step": 6302 + }, + { + "epoch": 0.21, + "grad_norm": 0.7418871521949768, + "learning_rate": 1.815735466524826e-05, + "loss": 2.1984, + "step": 6303 + }, + { + "epoch": 0.21, + "grad_norm": 0.7400758862495422, + "learning_rate": 1.8156739815169504e-05, + "loss": 2.1936, + "step": 6304 + }, + { + "epoch": 0.21, + "grad_norm": 0.7166884541511536, + "learning_rate": 1.8156124872940683e-05, + "loss": 2.212, + "step": 6305 + }, + { + "epoch": 0.21, + "grad_norm": 0.6811509132385254, + "learning_rate": 1.8155509838568744e-05, + "loss": 2.1244, + "step": 6306 + }, + { + "epoch": 0.21, + "grad_norm": 0.6913968324661255, + "learning_rate": 1.815489471206063e-05, + "loss": 2.1332, + "step": 6307 + }, + { + "epoch": 0.21, + "grad_norm": 0.6897211074829102, + "learning_rate": 1.8154279493423298e-05, + "loss": 2.1517, + "step": 6308 + }, + { + "epoch": 0.21, + "grad_norm": 0.7282798290252686, + "learning_rate": 1.81536641826637e-05, + "loss": 2.1669, + "step": 6309 + }, + { + "epoch": 0.21, + "grad_norm": 0.6984309554100037, + "learning_rate": 1.8153048779788775e-05, + "loss": 2.1608, + "step": 6310 + }, + { + "epoch": 0.21, + "grad_norm": 0.7070353627204895, + "learning_rate": 1.8152433284805484e-05, + "loss": 2.1299, + "step": 6311 + }, + { + "epoch": 0.21, + "grad_norm": 0.7314441800117493, + "learning_rate": 1.8151817697720782e-05, + "loss": 2.1851, + "step": 6312 + }, + { + "epoch": 0.21, + "grad_norm": 0.7233806848526001, + "learning_rate": 1.815120201854162e-05, + "loss": 2.2038, + "step": 6313 + }, + { + "epoch": 0.21, + "grad_norm": 0.7096444368362427, + "learning_rate": 1.8150586247274955e-05, + "loss": 2.2084, + "step": 6314 + }, + { + "epoch": 0.21, + "grad_norm": 0.6937890648841858, + "learning_rate": 1.8149970383927745e-05, + "loss": 2.1841, + "step": 6315 + }, + { + "epoch": 0.21, + "grad_norm": 0.6818922758102417, + "learning_rate": 1.814935442850694e-05, + "loss": 2.175, + "step": 6316 + }, + { + "epoch": 0.21, + "grad_norm": 0.6858789920806885, + "learning_rate": 1.814873838101951e-05, + "loss": 2.1925, + "step": 6317 + }, + { + "epoch": 0.21, + "grad_norm": 0.6864098906517029, + "learning_rate": 1.814812224147241e-05, + "loss": 2.1812, + "step": 6318 + }, + { + "epoch": 0.21, + "grad_norm": 0.7687103152275085, + "learning_rate": 1.8147506009872598e-05, + "loss": 2.1788, + "step": 6319 + }, + { + "epoch": 0.21, + "grad_norm": 0.687004804611206, + "learning_rate": 1.814688968622704e-05, + "loss": 2.1971, + "step": 6320 + }, + { + "epoch": 0.21, + "grad_norm": 0.7049893736839294, + "learning_rate": 1.8146273270542695e-05, + "loss": 2.1856, + "step": 6321 + }, + { + "epoch": 0.21, + "grad_norm": 0.6553322672843933, + "learning_rate": 1.8145656762826527e-05, + "loss": 2.2346, + "step": 6322 + }, + { + "epoch": 0.21, + "grad_norm": 0.7063862085342407, + "learning_rate": 1.8145040163085507e-05, + "loss": 2.2448, + "step": 6323 + }, + { + "epoch": 0.21, + "grad_norm": 0.6989167332649231, + "learning_rate": 1.8144423471326593e-05, + "loss": 2.2555, + "step": 6324 + }, + { + "epoch": 0.21, + "grad_norm": 0.6936603784561157, + "learning_rate": 1.814380668755676e-05, + "loss": 2.2197, + "step": 6325 + }, + { + "epoch": 0.21, + "grad_norm": 0.7103709578514099, + "learning_rate": 1.8143189811782966e-05, + "loss": 2.2227, + "step": 6326 + }, + { + "epoch": 0.21, + "grad_norm": 0.6912099719047546, + "learning_rate": 1.814257284401219e-05, + "loss": 2.1816, + "step": 6327 + }, + { + "epoch": 0.21, + "grad_norm": 0.6960267424583435, + "learning_rate": 1.8141955784251397e-05, + "loss": 2.1873, + "step": 6328 + }, + { + "epoch": 0.21, + "grad_norm": 0.7041221857070923, + "learning_rate": 1.8141338632507558e-05, + "loss": 2.2062, + "step": 6329 + }, + { + "epoch": 0.21, + "grad_norm": 0.6873149871826172, + "learning_rate": 1.8140721388787647e-05, + "loss": 2.126, + "step": 6330 + }, + { + "epoch": 0.21, + "grad_norm": 0.7146872878074646, + "learning_rate": 1.8140104053098636e-05, + "loss": 2.2552, + "step": 6331 + }, + { + "epoch": 0.21, + "grad_norm": 0.7022344470024109, + "learning_rate": 1.8139486625447502e-05, + "loss": 2.1751, + "step": 6332 + }, + { + "epoch": 0.21, + "grad_norm": 0.7143667936325073, + "learning_rate": 1.8138869105841217e-05, + "loss": 2.1773, + "step": 6333 + }, + { + "epoch": 0.21, + "grad_norm": 0.7031447291374207, + "learning_rate": 1.8138251494286762e-05, + "loss": 2.1149, + "step": 6334 + }, + { + "epoch": 0.21, + "grad_norm": 0.7136235237121582, + "learning_rate": 1.8137633790791106e-05, + "loss": 2.2171, + "step": 6335 + }, + { + "epoch": 0.21, + "grad_norm": 0.7328564524650574, + "learning_rate": 1.8137015995361237e-05, + "loss": 2.1318, + "step": 6336 + }, + { + "epoch": 0.21, + "grad_norm": 0.7245420217514038, + "learning_rate": 1.8136398108004127e-05, + "loss": 2.2318, + "step": 6337 + }, + { + "epoch": 0.21, + "grad_norm": 0.6986823678016663, + "learning_rate": 1.813578012872676e-05, + "loss": 2.1925, + "step": 6338 + }, + { + "epoch": 0.21, + "grad_norm": 0.685001790523529, + "learning_rate": 1.813516205753612e-05, + "loss": 2.2366, + "step": 6339 + }, + { + "epoch": 0.21, + "grad_norm": 0.7356112599372864, + "learning_rate": 1.8134543894439184e-05, + "loss": 2.1948, + "step": 6340 + }, + { + "epoch": 0.21, + "grad_norm": 0.6923741698265076, + "learning_rate": 1.8133925639442937e-05, + "loss": 2.1875, + "step": 6341 + }, + { + "epoch": 0.21, + "grad_norm": 0.7427212595939636, + "learning_rate": 1.813330729255437e-05, + "loss": 2.1757, + "step": 6342 + }, + { + "epoch": 0.21, + "grad_norm": 0.6963168382644653, + "learning_rate": 1.8132688853780456e-05, + "loss": 2.1771, + "step": 6343 + }, + { + "epoch": 0.21, + "grad_norm": 0.7154357433319092, + "learning_rate": 1.8132070323128196e-05, + "loss": 2.1302, + "step": 6344 + }, + { + "epoch": 0.21, + "grad_norm": 0.6744716763496399, + "learning_rate": 1.813145170060457e-05, + "loss": 2.178, + "step": 6345 + }, + { + "epoch": 0.21, + "grad_norm": 0.7308353781700134, + "learning_rate": 1.8130832986216568e-05, + "loss": 2.2084, + "step": 6346 + }, + { + "epoch": 0.21, + "grad_norm": 0.7460055947303772, + "learning_rate": 1.813021417997118e-05, + "loss": 2.1327, + "step": 6347 + }, + { + "epoch": 0.21, + "grad_norm": 0.7088429927825928, + "learning_rate": 1.8129595281875393e-05, + "loss": 2.1834, + "step": 6348 + }, + { + "epoch": 0.21, + "grad_norm": 0.7316433191299438, + "learning_rate": 1.812897629193621e-05, + "loss": 2.1508, + "step": 6349 + }, + { + "epoch": 0.21, + "grad_norm": 0.7352184653282166, + "learning_rate": 1.812835721016061e-05, + "loss": 2.1479, + "step": 6350 + }, + { + "epoch": 0.21, + "grad_norm": 0.7303087115287781, + "learning_rate": 1.81277380365556e-05, + "loss": 2.1116, + "step": 6351 + }, + { + "epoch": 0.21, + "grad_norm": 0.682537853717804, + "learning_rate": 1.8127118771128164e-05, + "loss": 2.1957, + "step": 6352 + }, + { + "epoch": 0.21, + "grad_norm": 0.6753235459327698, + "learning_rate": 1.8126499413885306e-05, + "loss": 2.2009, + "step": 6353 + }, + { + "epoch": 0.21, + "grad_norm": 0.6913830637931824, + "learning_rate": 1.8125879964834022e-05, + "loss": 2.1362, + "step": 6354 + }, + { + "epoch": 0.21, + "grad_norm": 0.6910260915756226, + "learning_rate": 1.8125260423981306e-05, + "loss": 2.1488, + "step": 6355 + }, + { + "epoch": 0.21, + "grad_norm": 0.6832036375999451, + "learning_rate": 1.8124640791334157e-05, + "loss": 2.1118, + "step": 6356 + }, + { + "epoch": 0.21, + "grad_norm": 0.7157078385353088, + "learning_rate": 1.8124021066899586e-05, + "loss": 2.1964, + "step": 6357 + }, + { + "epoch": 0.21, + "grad_norm": 0.7438831329345703, + "learning_rate": 1.8123401250684577e-05, + "loss": 2.1687, + "step": 6358 + }, + { + "epoch": 0.21, + "grad_norm": 0.7154524326324463, + "learning_rate": 1.8122781342696147e-05, + "loss": 2.172, + "step": 6359 + }, + { + "epoch": 0.21, + "grad_norm": 0.715208888053894, + "learning_rate": 1.812216134294129e-05, + "loss": 2.0952, + "step": 6360 + }, + { + "epoch": 0.21, + "grad_norm": 0.6812685132026672, + "learning_rate": 1.8121541251427017e-05, + "loss": 2.1503, + "step": 6361 + }, + { + "epoch": 0.21, + "grad_norm": 0.7198569774627686, + "learning_rate": 1.812092106816033e-05, + "loss": 2.2044, + "step": 6362 + }, + { + "epoch": 0.21, + "grad_norm": 0.6905926465988159, + "learning_rate": 1.8120300793148235e-05, + "loss": 2.2152, + "step": 6363 + }, + { + "epoch": 0.21, + "grad_norm": 0.7575319409370422, + "learning_rate": 1.8119680426397745e-05, + "loss": 2.2161, + "step": 6364 + }, + { + "epoch": 0.21, + "grad_norm": 0.6950216293334961, + "learning_rate": 1.8119059967915857e-05, + "loss": 2.2185, + "step": 6365 + }, + { + "epoch": 0.21, + "grad_norm": 0.6988063454627991, + "learning_rate": 1.811843941770959e-05, + "loss": 2.2093, + "step": 6366 + }, + { + "epoch": 0.21, + "grad_norm": 0.6994710564613342, + "learning_rate": 1.8117818775785957e-05, + "loss": 2.2079, + "step": 6367 + }, + { + "epoch": 0.21, + "grad_norm": 0.7182785868644714, + "learning_rate": 1.811719804215196e-05, + "loss": 2.2167, + "step": 6368 + }, + { + "epoch": 0.21, + "grad_norm": 0.7230461835861206, + "learning_rate": 1.811657721681462e-05, + "loss": 2.2336, + "step": 6369 + }, + { + "epoch": 0.21, + "grad_norm": 0.7024505138397217, + "learning_rate": 1.8115956299780942e-05, + "loss": 2.1784, + "step": 6370 + }, + { + "epoch": 0.21, + "grad_norm": 0.6858686208724976, + "learning_rate": 1.811533529105795e-05, + "loss": 2.1382, + "step": 6371 + }, + { + "epoch": 0.21, + "grad_norm": 0.6883136034011841, + "learning_rate": 1.811471419065266e-05, + "loss": 2.1476, + "step": 6372 + }, + { + "epoch": 0.21, + "grad_norm": 0.7034709453582764, + "learning_rate": 1.811409299857208e-05, + "loss": 2.1814, + "step": 6373 + }, + { + "epoch": 0.21, + "grad_norm": 0.7325684428215027, + "learning_rate": 1.8113471714823233e-05, + "loss": 2.2138, + "step": 6374 + }, + { + "epoch": 0.21, + "grad_norm": 0.6874024868011475, + "learning_rate": 1.8112850339413136e-05, + "loss": 2.1226, + "step": 6375 + }, + { + "epoch": 0.21, + "grad_norm": 0.6834449172019958, + "learning_rate": 1.8112228872348813e-05, + "loss": 2.168, + "step": 6376 + }, + { + "epoch": 0.21, + "grad_norm": 0.6913960576057434, + "learning_rate": 1.811160731363728e-05, + "loss": 2.172, + "step": 6377 + }, + { + "epoch": 0.21, + "grad_norm": 0.683789074420929, + "learning_rate": 1.8110985663285564e-05, + "loss": 2.2221, + "step": 6378 + }, + { + "epoch": 0.21, + "grad_norm": 0.6796307563781738, + "learning_rate": 1.8110363921300685e-05, + "loss": 2.1493, + "step": 6379 + }, + { + "epoch": 0.21, + "grad_norm": 0.6846427321434021, + "learning_rate": 1.8109742087689667e-05, + "loss": 2.1264, + "step": 6380 + }, + { + "epoch": 0.21, + "grad_norm": 0.6993242502212524, + "learning_rate": 1.8109120162459537e-05, + "loss": 2.1857, + "step": 6381 + }, + { + "epoch": 0.21, + "grad_norm": 0.6903895735740662, + "learning_rate": 1.8108498145617316e-05, + "loss": 2.1911, + "step": 6382 + }, + { + "epoch": 0.21, + "grad_norm": 0.6771966814994812, + "learning_rate": 1.810787603717004e-05, + "loss": 2.1264, + "step": 6383 + }, + { + "epoch": 0.21, + "grad_norm": 0.6768045425415039, + "learning_rate": 1.810725383712473e-05, + "loss": 2.1325, + "step": 6384 + }, + { + "epoch": 0.21, + "grad_norm": 0.7299454212188721, + "learning_rate": 1.8106631545488417e-05, + "loss": 2.1273, + "step": 6385 + }, + { + "epoch": 0.21, + "grad_norm": 0.7045280933380127, + "learning_rate": 1.810600916226813e-05, + "loss": 2.2444, + "step": 6386 + }, + { + "epoch": 0.21, + "grad_norm": 0.7406953573226929, + "learning_rate": 1.8105386687470906e-05, + "loss": 2.1438, + "step": 6387 + }, + { + "epoch": 0.21, + "grad_norm": 0.6980265378952026, + "learning_rate": 1.810476412110377e-05, + "loss": 2.1574, + "step": 6388 + }, + { + "epoch": 0.21, + "grad_norm": 0.684349000453949, + "learning_rate": 1.810414146317376e-05, + "loss": 2.2093, + "step": 6389 + }, + { + "epoch": 0.21, + "grad_norm": 0.7353962063789368, + "learning_rate": 1.8103518713687914e-05, + "loss": 2.257, + "step": 6390 + }, + { + "epoch": 0.21, + "grad_norm": 0.700209379196167, + "learning_rate": 1.8102895872653253e-05, + "loss": 2.1635, + "step": 6391 + }, + { + "epoch": 0.21, + "grad_norm": 0.6887604594230652, + "learning_rate": 1.810227294007683e-05, + "loss": 2.2406, + "step": 6392 + }, + { + "epoch": 0.21, + "grad_norm": 0.7095186710357666, + "learning_rate": 1.8101649915965675e-05, + "loss": 2.2186, + "step": 6393 + }, + { + "epoch": 0.21, + "grad_norm": 0.6913116574287415, + "learning_rate": 1.8101026800326825e-05, + "loss": 2.2092, + "step": 6394 + }, + { + "epoch": 0.21, + "grad_norm": 0.716457188129425, + "learning_rate": 1.8100403593167322e-05, + "loss": 2.1255, + "step": 6395 + }, + { + "epoch": 0.21, + "grad_norm": 0.6879635453224182, + "learning_rate": 1.809978029449421e-05, + "loss": 2.2685, + "step": 6396 + }, + { + "epoch": 0.21, + "grad_norm": 0.691448450088501, + "learning_rate": 1.8099156904314527e-05, + "loss": 2.1519, + "step": 6397 + }, + { + "epoch": 0.21, + "grad_norm": 0.7008914947509766, + "learning_rate": 1.8098533422635315e-05, + "loss": 2.1923, + "step": 6398 + }, + { + "epoch": 0.21, + "grad_norm": 0.687630295753479, + "learning_rate": 1.8097909849463616e-05, + "loss": 2.1772, + "step": 6399 + }, + { + "epoch": 0.21, + "grad_norm": 0.6858562231063843, + "learning_rate": 1.809728618480648e-05, + "loss": 2.2076, + "step": 6400 + }, + { + "epoch": 0.21, + "grad_norm": 0.7262950539588928, + "learning_rate": 1.809666242867095e-05, + "loss": 2.217, + "step": 6401 + }, + { + "epoch": 0.21, + "grad_norm": 0.6790397763252258, + "learning_rate": 1.809603858106407e-05, + "loss": 2.1823, + "step": 6402 + }, + { + "epoch": 0.21, + "grad_norm": 0.7365374565124512, + "learning_rate": 1.8095414641992893e-05, + "loss": 2.1839, + "step": 6403 + }, + { + "epoch": 0.21, + "grad_norm": 0.7116594910621643, + "learning_rate": 1.8094790611464463e-05, + "loss": 2.1758, + "step": 6404 + }, + { + "epoch": 0.21, + "grad_norm": 0.7414642572402954, + "learning_rate": 1.8094166489485836e-05, + "loss": 2.3294, + "step": 6405 + }, + { + "epoch": 0.21, + "grad_norm": 0.6941266655921936, + "learning_rate": 1.809354227606406e-05, + "loss": 2.1388, + "step": 6406 + }, + { + "epoch": 0.21, + "grad_norm": 0.6877501010894775, + "learning_rate": 1.8092917971206186e-05, + "loss": 2.1603, + "step": 6407 + }, + { + "epoch": 0.21, + "grad_norm": 0.6909286379814148, + "learning_rate": 1.8092293574919266e-05, + "loss": 2.2053, + "step": 6408 + }, + { + "epoch": 0.21, + "grad_norm": 0.7115195393562317, + "learning_rate": 1.8091669087210357e-05, + "loss": 2.1893, + "step": 6409 + }, + { + "epoch": 0.21, + "grad_norm": 0.7032685279846191, + "learning_rate": 1.8091044508086514e-05, + "loss": 2.2226, + "step": 6410 + }, + { + "epoch": 0.21, + "grad_norm": 0.6896002888679504, + "learning_rate": 1.809041983755479e-05, + "loss": 2.1301, + "step": 6411 + }, + { + "epoch": 0.21, + "grad_norm": 0.6763260364532471, + "learning_rate": 1.8089795075622245e-05, + "loss": 2.1891, + "step": 6412 + }, + { + "epoch": 0.21, + "grad_norm": 0.687936544418335, + "learning_rate": 1.8089170222295934e-05, + "loss": 2.1396, + "step": 6413 + }, + { + "epoch": 0.21, + "grad_norm": 0.6922518610954285, + "learning_rate": 1.8088545277582923e-05, + "loss": 2.2185, + "step": 6414 + }, + { + "epoch": 0.21, + "grad_norm": 0.6957792639732361, + "learning_rate": 1.8087920241490264e-05, + "loss": 2.23, + "step": 6415 + }, + { + "epoch": 0.21, + "grad_norm": 0.6831899285316467, + "learning_rate": 1.8087295114025026e-05, + "loss": 2.2014, + "step": 6416 + }, + { + "epoch": 0.21, + "grad_norm": 0.728905439376831, + "learning_rate": 1.8086669895194268e-05, + "loss": 2.239, + "step": 6417 + }, + { + "epoch": 0.21, + "grad_norm": 0.6694440245628357, + "learning_rate": 1.8086044585005046e-05, + "loss": 2.1219, + "step": 6418 + }, + { + "epoch": 0.21, + "grad_norm": 0.6922813057899475, + "learning_rate": 1.8085419183464433e-05, + "loss": 2.2065, + "step": 6419 + }, + { + "epoch": 0.21, + "grad_norm": 0.7299982309341431, + "learning_rate": 1.8084793690579498e-05, + "loss": 2.2145, + "step": 6420 + }, + { + "epoch": 0.21, + "grad_norm": 0.6954406499862671, + "learning_rate": 1.8084168106357297e-05, + "loss": 2.2071, + "step": 6421 + }, + { + "epoch": 0.21, + "grad_norm": 0.6924938559532166, + "learning_rate": 1.8083542430804906e-05, + "loss": 2.1214, + "step": 6422 + }, + { + "epoch": 0.21, + "grad_norm": 0.7058471441268921, + "learning_rate": 1.8082916663929388e-05, + "loss": 2.2462, + "step": 6423 + }, + { + "epoch": 0.21, + "grad_norm": 0.7106438279151917, + "learning_rate": 1.8082290805737815e-05, + "loss": 2.1662, + "step": 6424 + }, + { + "epoch": 0.21, + "grad_norm": 0.6858699917793274, + "learning_rate": 1.8081664856237256e-05, + "loss": 2.2066, + "step": 6425 + }, + { + "epoch": 0.21, + "grad_norm": 0.7168295383453369, + "learning_rate": 1.8081038815434785e-05, + "loss": 2.1969, + "step": 6426 + }, + { + "epoch": 0.21, + "grad_norm": 0.7257906198501587, + "learning_rate": 1.8080412683337474e-05, + "loss": 2.1639, + "step": 6427 + }, + { + "epoch": 0.21, + "grad_norm": 0.6879714727401733, + "learning_rate": 1.8079786459952396e-05, + "loss": 2.1983, + "step": 6428 + }, + { + "epoch": 0.21, + "grad_norm": 0.744773805141449, + "learning_rate": 1.8079160145286623e-05, + "loss": 2.2146, + "step": 6429 + }, + { + "epoch": 0.21, + "grad_norm": 0.6614795327186584, + "learning_rate": 1.8078533739347236e-05, + "loss": 2.1254, + "step": 6430 + }, + { + "epoch": 0.21, + "grad_norm": 0.6983741521835327, + "learning_rate": 1.807790724214131e-05, + "loss": 2.1471, + "step": 6431 + }, + { + "epoch": 0.21, + "grad_norm": 0.7102526426315308, + "learning_rate": 1.807728065367592e-05, + "loss": 2.1267, + "step": 6432 + }, + { + "epoch": 0.21, + "grad_norm": 0.7187673449516296, + "learning_rate": 1.8076653973958148e-05, + "loss": 2.1065, + "step": 6433 + }, + { + "epoch": 0.21, + "grad_norm": 0.6844434142112732, + "learning_rate": 1.8076027202995076e-05, + "loss": 2.1211, + "step": 6434 + }, + { + "epoch": 0.21, + "grad_norm": 0.717785656452179, + "learning_rate": 1.8075400340793775e-05, + "loss": 2.163, + "step": 6435 + }, + { + "epoch": 0.21, + "grad_norm": 0.7003490328788757, + "learning_rate": 1.8074773387361338e-05, + "loss": 2.2091, + "step": 6436 + }, + { + "epoch": 0.21, + "grad_norm": 0.7042582631111145, + "learning_rate": 1.8074146342704844e-05, + "loss": 2.2433, + "step": 6437 + }, + { + "epoch": 0.21, + "grad_norm": 0.6968415379524231, + "learning_rate": 1.8073519206831372e-05, + "loss": 2.1712, + "step": 6438 + }, + { + "epoch": 0.21, + "grad_norm": 0.6971914172172546, + "learning_rate": 1.8072891979748014e-05, + "loss": 2.2193, + "step": 6439 + }, + { + "epoch": 0.21, + "grad_norm": 0.7089229822158813, + "learning_rate": 1.8072264661461857e-05, + "loss": 2.2155, + "step": 6440 + }, + { + "epoch": 0.21, + "grad_norm": 0.7010672092437744, + "learning_rate": 1.8071637251979982e-05, + "loss": 2.1624, + "step": 6441 + }, + { + "epoch": 0.21, + "grad_norm": 0.7093105912208557, + "learning_rate": 1.8071009751309478e-05, + "loss": 2.1832, + "step": 6442 + }, + { + "epoch": 0.21, + "grad_norm": 0.7181783318519592, + "learning_rate": 1.8070382159457442e-05, + "loss": 2.1717, + "step": 6443 + }, + { + "epoch": 0.21, + "grad_norm": 0.7245060205459595, + "learning_rate": 1.806975447643095e-05, + "loss": 2.1976, + "step": 6444 + }, + { + "epoch": 0.21, + "grad_norm": 0.7049036026000977, + "learning_rate": 1.8069126702237104e-05, + "loss": 2.1509, + "step": 6445 + }, + { + "epoch": 0.21, + "grad_norm": 0.7025460600852966, + "learning_rate": 1.8068498836882998e-05, + "loss": 2.1817, + "step": 6446 + }, + { + "epoch": 0.21, + "grad_norm": 0.7077845335006714, + "learning_rate": 1.8067870880375716e-05, + "loss": 2.2038, + "step": 6447 + }, + { + "epoch": 0.21, + "grad_norm": 0.6847579479217529, + "learning_rate": 1.8067242832722356e-05, + "loss": 2.1977, + "step": 6448 + }, + { + "epoch": 0.21, + "grad_norm": 0.6642428636550903, + "learning_rate": 1.8066614693930017e-05, + "loss": 2.1934, + "step": 6449 + }, + { + "epoch": 0.21, + "grad_norm": 0.715844988822937, + "learning_rate": 1.806598646400579e-05, + "loss": 2.1943, + "step": 6450 + }, + { + "epoch": 0.21, + "grad_norm": 0.7015922665596008, + "learning_rate": 1.806535814295678e-05, + "loss": 2.1524, + "step": 6451 + }, + { + "epoch": 0.21, + "grad_norm": 0.6961047053337097, + "learning_rate": 1.806472973079008e-05, + "loss": 2.1643, + "step": 6452 + }, + { + "epoch": 0.21, + "grad_norm": 0.7450258135795593, + "learning_rate": 1.8064101227512787e-05, + "loss": 2.2079, + "step": 6453 + }, + { + "epoch": 0.21, + "grad_norm": 0.7437617182731628, + "learning_rate": 1.8063472633132006e-05, + "loss": 2.1505, + "step": 6454 + }, + { + "epoch": 0.21, + "grad_norm": 0.7318354845046997, + "learning_rate": 1.8062843947654834e-05, + "loss": 2.1072, + "step": 6455 + }, + { + "epoch": 0.21, + "grad_norm": 0.6964071989059448, + "learning_rate": 1.806221517108838e-05, + "loss": 2.1676, + "step": 6456 + }, + { + "epoch": 0.21, + "grad_norm": 0.6921846270561218, + "learning_rate": 1.806158630343974e-05, + "loss": 2.2175, + "step": 6457 + }, + { + "epoch": 0.21, + "grad_norm": 0.7437452673912048, + "learning_rate": 1.8060957344716024e-05, + "loss": 2.1265, + "step": 6458 + }, + { + "epoch": 0.21, + "grad_norm": 0.7209609746932983, + "learning_rate": 1.8060328294924337e-05, + "loss": 2.1778, + "step": 6459 + }, + { + "epoch": 0.21, + "grad_norm": 0.7071230411529541, + "learning_rate": 1.8059699154071783e-05, + "loss": 2.1099, + "step": 6460 + }, + { + "epoch": 0.21, + "grad_norm": 0.7006473541259766, + "learning_rate": 1.8059069922165474e-05, + "loss": 2.2137, + "step": 6461 + }, + { + "epoch": 0.21, + "grad_norm": 0.6914381384849548, + "learning_rate": 1.8058440599212516e-05, + "loss": 2.1719, + "step": 6462 + }, + { + "epoch": 0.22, + "grad_norm": 0.6959124803543091, + "learning_rate": 1.805781118522002e-05, + "loss": 2.112, + "step": 6463 + }, + { + "epoch": 0.22, + "grad_norm": 0.7534375786781311, + "learning_rate": 1.8057181680195092e-05, + "loss": 2.1671, + "step": 6464 + }, + { + "epoch": 0.22, + "grad_norm": 0.6837925910949707, + "learning_rate": 1.8056552084144847e-05, + "loss": 2.1453, + "step": 6465 + }, + { + "epoch": 0.22, + "grad_norm": 0.7110303044319153, + "learning_rate": 1.80559223970764e-05, + "loss": 2.2173, + "step": 6466 + }, + { + "epoch": 0.22, + "grad_norm": 0.6862213611602783, + "learning_rate": 1.8055292618996863e-05, + "loss": 2.1573, + "step": 6467 + }, + { + "epoch": 0.22, + "grad_norm": 0.705294132232666, + "learning_rate": 1.8054662749913353e-05, + "loss": 2.1875, + "step": 6468 + }, + { + "epoch": 0.22, + "grad_norm": 0.6973253488540649, + "learning_rate": 1.805403278983298e-05, + "loss": 2.1887, + "step": 6469 + }, + { + "epoch": 0.22, + "grad_norm": 0.6857908964157104, + "learning_rate": 1.8053402738762863e-05, + "loss": 2.2324, + "step": 6470 + }, + { + "epoch": 0.22, + "grad_norm": 0.7354410886764526, + "learning_rate": 1.8052772596710125e-05, + "loss": 2.1174, + "step": 6471 + }, + { + "epoch": 0.22, + "grad_norm": 0.687079668045044, + "learning_rate": 1.8052142363681884e-05, + "loss": 2.1314, + "step": 6472 + }, + { + "epoch": 0.22, + "grad_norm": 0.7114191055297852, + "learning_rate": 1.8051512039685256e-05, + "loss": 2.2349, + "step": 6473 + }, + { + "epoch": 0.22, + "grad_norm": 0.7184898257255554, + "learning_rate": 1.805088162472736e-05, + "loss": 2.189, + "step": 6474 + }, + { + "epoch": 0.22, + "grad_norm": 0.6776912808418274, + "learning_rate": 1.8050251118815326e-05, + "loss": 2.1767, + "step": 6475 + }, + { + "epoch": 0.22, + "grad_norm": 0.715488851070404, + "learning_rate": 1.804962052195627e-05, + "loss": 2.2062, + "step": 6476 + }, + { + "epoch": 0.22, + "grad_norm": 0.7093402147293091, + "learning_rate": 1.804898983415732e-05, + "loss": 2.1505, + "step": 6477 + }, + { + "epoch": 0.22, + "grad_norm": 0.7027968168258667, + "learning_rate": 1.80483590554256e-05, + "loss": 2.1883, + "step": 6478 + }, + { + "epoch": 0.22, + "grad_norm": 0.6755682826042175, + "learning_rate": 1.8047728185768237e-05, + "loss": 2.1611, + "step": 6479 + }, + { + "epoch": 0.22, + "grad_norm": 0.7189090251922607, + "learning_rate": 1.8047097225192356e-05, + "loss": 2.2126, + "step": 6480 + }, + { + "epoch": 0.22, + "grad_norm": 0.7012996673583984, + "learning_rate": 1.8046466173705086e-05, + "loss": 2.3044, + "step": 6481 + }, + { + "epoch": 0.22, + "grad_norm": 0.7429297566413879, + "learning_rate": 1.804583503131356e-05, + "loss": 2.1841, + "step": 6482 + }, + { + "epoch": 0.22, + "grad_norm": 0.6993507146835327, + "learning_rate": 1.80452037980249e-05, + "loss": 2.2551, + "step": 6483 + }, + { + "epoch": 0.22, + "grad_norm": 0.6889524459838867, + "learning_rate": 1.8044572473846246e-05, + "loss": 2.1253, + "step": 6484 + }, + { + "epoch": 0.22, + "grad_norm": 0.6920259594917297, + "learning_rate": 1.8043941058784728e-05, + "loss": 2.1633, + "step": 6485 + }, + { + "epoch": 0.22, + "grad_norm": 0.7178728580474854, + "learning_rate": 1.8043309552847476e-05, + "loss": 2.1903, + "step": 6486 + }, + { + "epoch": 0.22, + "grad_norm": 0.7149829864501953, + "learning_rate": 1.804267795604163e-05, + "loss": 2.1432, + "step": 6487 + }, + { + "epoch": 0.22, + "grad_norm": 0.7360720634460449, + "learning_rate": 1.804204626837432e-05, + "loss": 2.1205, + "step": 6488 + }, + { + "epoch": 0.22, + "grad_norm": 0.7601842284202576, + "learning_rate": 1.804141448985268e-05, + "loss": 2.2712, + "step": 6489 + }, + { + "epoch": 0.22, + "grad_norm": 0.7222402095794678, + "learning_rate": 1.8040782620483854e-05, + "loss": 2.2379, + "step": 6490 + }, + { + "epoch": 0.22, + "grad_norm": 0.7046586275100708, + "learning_rate": 1.804015066027498e-05, + "loss": 2.1602, + "step": 6491 + }, + { + "epoch": 0.22, + "grad_norm": 0.723450779914856, + "learning_rate": 1.8039518609233196e-05, + "loss": 2.2932, + "step": 6492 + }, + { + "epoch": 0.22, + "grad_norm": 0.6879225969314575, + "learning_rate": 1.803888646736564e-05, + "loss": 2.1475, + "step": 6493 + }, + { + "epoch": 0.22, + "grad_norm": 0.6795632243156433, + "learning_rate": 1.803825423467946e-05, + "loss": 2.2199, + "step": 6494 + }, + { + "epoch": 0.22, + "grad_norm": 0.6736174821853638, + "learning_rate": 1.803762191118179e-05, + "loss": 2.174, + "step": 6495 + }, + { + "epoch": 0.22, + "grad_norm": 0.6765211820602417, + "learning_rate": 1.803698949687978e-05, + "loss": 2.1198, + "step": 6496 + }, + { + "epoch": 0.22, + "grad_norm": 0.6945918202400208, + "learning_rate": 1.8036356991780573e-05, + "loss": 2.1683, + "step": 6497 + }, + { + "epoch": 0.22, + "grad_norm": 0.7008389830589294, + "learning_rate": 1.8035724395891314e-05, + "loss": 2.1578, + "step": 6498 + }, + { + "epoch": 0.22, + "grad_norm": 0.7341498732566833, + "learning_rate": 1.8035091709219153e-05, + "loss": 2.1383, + "step": 6499 + }, + { + "epoch": 0.22, + "grad_norm": 0.6831977367401123, + "learning_rate": 1.803445893177123e-05, + "loss": 2.17, + "step": 6500 + }, + { + "epoch": 0.22, + "grad_norm": 0.6934776306152344, + "learning_rate": 1.8033826063554706e-05, + "loss": 2.1596, + "step": 6501 + }, + { + "epoch": 0.22, + "grad_norm": 0.6972570419311523, + "learning_rate": 1.8033193104576717e-05, + "loss": 2.1951, + "step": 6502 + }, + { + "epoch": 0.22, + "grad_norm": 0.7249875068664551, + "learning_rate": 1.8032560054844422e-05, + "loss": 2.2139, + "step": 6503 + }, + { + "epoch": 0.22, + "grad_norm": 0.7270299792289734, + "learning_rate": 1.803192691436497e-05, + "loss": 2.2562, + "step": 6504 + }, + { + "epoch": 0.22, + "grad_norm": 0.6792468428611755, + "learning_rate": 1.8031293683145515e-05, + "loss": 2.1996, + "step": 6505 + }, + { + "epoch": 0.22, + "grad_norm": 0.7299947142601013, + "learning_rate": 1.8030660361193213e-05, + "loss": 2.1512, + "step": 6506 + }, + { + "epoch": 0.22, + "grad_norm": 0.7278671264648438, + "learning_rate": 1.8030026948515216e-05, + "loss": 2.2307, + "step": 6507 + }, + { + "epoch": 0.22, + "grad_norm": 0.7127509117126465, + "learning_rate": 1.8029393445118678e-05, + "loss": 2.2387, + "step": 6508 + }, + { + "epoch": 0.22, + "grad_norm": 0.6947072744369507, + "learning_rate": 1.8028759851010763e-05, + "loss": 2.1922, + "step": 6509 + }, + { + "epoch": 0.22, + "grad_norm": 0.7127625942230225, + "learning_rate": 1.8028126166198617e-05, + "loss": 2.1843, + "step": 6510 + }, + { + "epoch": 0.22, + "grad_norm": 0.731911838054657, + "learning_rate": 1.8027492390689415e-05, + "loss": 2.1824, + "step": 6511 + }, + { + "epoch": 0.22, + "grad_norm": 0.6634818911552429, + "learning_rate": 1.8026858524490306e-05, + "loss": 2.2563, + "step": 6512 + }, + { + "epoch": 0.22, + "grad_norm": 0.7038640975952148, + "learning_rate": 1.8026224567608453e-05, + "loss": 2.1537, + "step": 6513 + }, + { + "epoch": 0.22, + "grad_norm": 0.6808935403823853, + "learning_rate": 1.8025590520051018e-05, + "loss": 2.0971, + "step": 6514 + }, + { + "epoch": 0.22, + "grad_norm": 0.699148416519165, + "learning_rate": 1.8024956381825164e-05, + "loss": 2.2262, + "step": 6515 + }, + { + "epoch": 0.22, + "grad_norm": 0.7081369161605835, + "learning_rate": 1.8024322152938056e-05, + "loss": 2.1795, + "step": 6516 + }, + { + "epoch": 0.22, + "grad_norm": 0.7216430306434631, + "learning_rate": 1.8023687833396858e-05, + "loss": 2.2134, + "step": 6517 + }, + { + "epoch": 0.22, + "grad_norm": 0.7136854529380798, + "learning_rate": 1.802305342320874e-05, + "loss": 2.2434, + "step": 6518 + }, + { + "epoch": 0.22, + "grad_norm": 0.6983802914619446, + "learning_rate": 1.8022418922380866e-05, + "loss": 2.1794, + "step": 6519 + }, + { + "epoch": 0.22, + "grad_norm": 0.6896607279777527, + "learning_rate": 1.8021784330920404e-05, + "loss": 2.2337, + "step": 6520 + }, + { + "epoch": 0.22, + "grad_norm": 0.6989573240280151, + "learning_rate": 1.8021149648834525e-05, + "loss": 2.1637, + "step": 6521 + }, + { + "epoch": 0.22, + "grad_norm": 0.7296557426452637, + "learning_rate": 1.8020514876130395e-05, + "loss": 2.2675, + "step": 6522 + }, + { + "epoch": 0.22, + "grad_norm": 0.7054111957550049, + "learning_rate": 1.801988001281519e-05, + "loss": 2.2258, + "step": 6523 + }, + { + "epoch": 0.22, + "grad_norm": 0.725476086139679, + "learning_rate": 1.8019245058896083e-05, + "loss": 2.1431, + "step": 6524 + }, + { + "epoch": 0.22, + "grad_norm": 0.7145067453384399, + "learning_rate": 1.8018610014380242e-05, + "loss": 2.0628, + "step": 6525 + }, + { + "epoch": 0.22, + "grad_norm": 0.6953862905502319, + "learning_rate": 1.8017974879274848e-05, + "loss": 2.2062, + "step": 6526 + }, + { + "epoch": 0.22, + "grad_norm": 0.7258676886558533, + "learning_rate": 1.801733965358707e-05, + "loss": 2.2444, + "step": 6527 + }, + { + "epoch": 0.22, + "grad_norm": 0.7089829444885254, + "learning_rate": 1.8016704337324086e-05, + "loss": 2.2835, + "step": 6528 + }, + { + "epoch": 0.22, + "grad_norm": 0.7030249834060669, + "learning_rate": 1.8016068930493076e-05, + "loss": 2.1972, + "step": 6529 + }, + { + "epoch": 0.22, + "grad_norm": 0.706455409526825, + "learning_rate": 1.8015433433101218e-05, + "loss": 2.238, + "step": 6530 + }, + { + "epoch": 0.22, + "grad_norm": 0.7113663554191589, + "learning_rate": 1.8014797845155693e-05, + "loss": 2.2018, + "step": 6531 + }, + { + "epoch": 0.22, + "grad_norm": 0.7248159646987915, + "learning_rate": 1.8014162166663674e-05, + "loss": 2.1602, + "step": 6532 + }, + { + "epoch": 0.22, + "grad_norm": 0.7148717641830444, + "learning_rate": 1.801352639763235e-05, + "loss": 2.1948, + "step": 6533 + }, + { + "epoch": 0.22, + "grad_norm": 0.721083402633667, + "learning_rate": 1.8012890538068902e-05, + "loss": 2.2059, + "step": 6534 + }, + { + "epoch": 0.22, + "grad_norm": 0.7552103996276855, + "learning_rate": 1.8012254587980512e-05, + "loss": 2.2157, + "step": 6535 + }, + { + "epoch": 0.22, + "grad_norm": 0.6762883067131042, + "learning_rate": 1.8011618547374366e-05, + "loss": 2.2042, + "step": 6536 + }, + { + "epoch": 0.22, + "grad_norm": 0.6903905868530273, + "learning_rate": 1.8010982416257647e-05, + "loss": 2.1375, + "step": 6537 + }, + { + "epoch": 0.22, + "grad_norm": 0.6858564615249634, + "learning_rate": 1.801034619463754e-05, + "loss": 2.1554, + "step": 6538 + }, + { + "epoch": 0.22, + "grad_norm": 0.7493283748626709, + "learning_rate": 1.8009709882521242e-05, + "loss": 2.148, + "step": 6539 + }, + { + "epoch": 0.22, + "grad_norm": 0.6917670965194702, + "learning_rate": 1.8009073479915935e-05, + "loss": 2.206, + "step": 6540 + }, + { + "epoch": 0.22, + "grad_norm": 0.7596235275268555, + "learning_rate": 1.800843698682881e-05, + "loss": 2.2567, + "step": 6541 + }, + { + "epoch": 0.22, + "grad_norm": 0.757006049156189, + "learning_rate": 1.8007800403267057e-05, + "loss": 2.1531, + "step": 6542 + }, + { + "epoch": 0.22, + "grad_norm": 0.738733172416687, + "learning_rate": 1.8007163729237866e-05, + "loss": 2.1861, + "step": 6543 + }, + { + "epoch": 0.22, + "grad_norm": 0.7054998874664307, + "learning_rate": 1.8006526964748435e-05, + "loss": 2.2065, + "step": 6544 + }, + { + "epoch": 0.22, + "grad_norm": 0.6866227388381958, + "learning_rate": 1.8005890109805952e-05, + "loss": 2.267, + "step": 6545 + }, + { + "epoch": 0.22, + "grad_norm": 0.7243348956108093, + "learning_rate": 1.8005253164417616e-05, + "loss": 2.1476, + "step": 6546 + }, + { + "epoch": 0.22, + "grad_norm": 0.7126927375793457, + "learning_rate": 1.800461612859062e-05, + "loss": 2.1831, + "step": 6547 + }, + { + "epoch": 0.22, + "grad_norm": 0.7031869292259216, + "learning_rate": 1.8003979002332165e-05, + "loss": 2.1321, + "step": 6548 + }, + { + "epoch": 0.22, + "grad_norm": 0.696779727935791, + "learning_rate": 1.800334178564944e-05, + "loss": 2.219, + "step": 6549 + }, + { + "epoch": 0.22, + "grad_norm": 0.713350236415863, + "learning_rate": 1.8002704478549655e-05, + "loss": 2.1468, + "step": 6550 + }, + { + "epoch": 0.22, + "grad_norm": 0.6896235942840576, + "learning_rate": 1.8002067081040007e-05, + "loss": 2.2027, + "step": 6551 + }, + { + "epoch": 0.22, + "grad_norm": 0.688564121723175, + "learning_rate": 1.8001429593127692e-05, + "loss": 2.2271, + "step": 6552 + }, + { + "epoch": 0.22, + "grad_norm": 0.6818128228187561, + "learning_rate": 1.800079201481991e-05, + "loss": 2.2218, + "step": 6553 + }, + { + "epoch": 0.22, + "grad_norm": 0.652244508266449, + "learning_rate": 1.8000154346123875e-05, + "loss": 2.141, + "step": 6554 + }, + { + "epoch": 0.22, + "grad_norm": 0.6858006119728088, + "learning_rate": 1.7999516587046782e-05, + "loss": 2.1967, + "step": 6555 + }, + { + "epoch": 0.22, + "grad_norm": 0.709963858127594, + "learning_rate": 1.799887873759584e-05, + "loss": 2.1607, + "step": 6556 + }, + { + "epoch": 0.22, + "grad_norm": 0.7003176212310791, + "learning_rate": 1.7998240797778255e-05, + "loss": 2.1554, + "step": 6557 + }, + { + "epoch": 0.22, + "grad_norm": 0.6957613229751587, + "learning_rate": 1.799760276760123e-05, + "loss": 2.2217, + "step": 6558 + }, + { + "epoch": 0.22, + "grad_norm": 0.6880059838294983, + "learning_rate": 1.7996964647071977e-05, + "loss": 2.1608, + "step": 6559 + }, + { + "epoch": 0.22, + "grad_norm": 0.7173803448677063, + "learning_rate": 1.7996326436197706e-05, + "loss": 2.2572, + "step": 6560 + }, + { + "epoch": 0.22, + "grad_norm": 0.6865238547325134, + "learning_rate": 1.7995688134985622e-05, + "loss": 2.201, + "step": 6561 + }, + { + "epoch": 0.22, + "grad_norm": 0.6913920640945435, + "learning_rate": 1.799504974344294e-05, + "loss": 2.209, + "step": 6562 + }, + { + "epoch": 0.22, + "grad_norm": 0.7052408456802368, + "learning_rate": 1.7994411261576874e-05, + "loss": 2.2589, + "step": 6563 + }, + { + "epoch": 0.22, + "grad_norm": 0.7043444514274597, + "learning_rate": 1.799377268939463e-05, + "loss": 2.1344, + "step": 6564 + }, + { + "epoch": 0.22, + "grad_norm": 0.6907814741134644, + "learning_rate": 1.7993134026903432e-05, + "loss": 2.1204, + "step": 6565 + }, + { + "epoch": 0.22, + "grad_norm": 0.7150293588638306, + "learning_rate": 1.7992495274110488e-05, + "loss": 2.1116, + "step": 6566 + }, + { + "epoch": 0.22, + "grad_norm": 0.6804690361022949, + "learning_rate": 1.7991856431023018e-05, + "loss": 2.2566, + "step": 6567 + }, + { + "epoch": 0.22, + "grad_norm": 0.7090702056884766, + "learning_rate": 1.7991217497648236e-05, + "loss": 2.1704, + "step": 6568 + }, + { + "epoch": 0.22, + "grad_norm": 0.7331114411354065, + "learning_rate": 1.7990578473993362e-05, + "loss": 2.1685, + "step": 6569 + }, + { + "epoch": 0.22, + "grad_norm": 0.7139462828636169, + "learning_rate": 1.7989939360065617e-05, + "loss": 2.1706, + "step": 6570 + }, + { + "epoch": 0.22, + "grad_norm": 0.7457171678543091, + "learning_rate": 1.7989300155872217e-05, + "loss": 2.1477, + "step": 6571 + }, + { + "epoch": 0.22, + "grad_norm": 0.6962663531303406, + "learning_rate": 1.7988660861420388e-05, + "loss": 2.1674, + "step": 6572 + }, + { + "epoch": 0.22, + "grad_norm": 0.6956488490104675, + "learning_rate": 1.798802147671735e-05, + "loss": 2.1655, + "step": 6573 + }, + { + "epoch": 0.22, + "grad_norm": 0.7154152989387512, + "learning_rate": 1.7987382001770327e-05, + "loss": 2.2148, + "step": 6574 + }, + { + "epoch": 0.22, + "grad_norm": 0.7448229193687439, + "learning_rate": 1.7986742436586544e-05, + "loss": 2.1096, + "step": 6575 + }, + { + "epoch": 0.22, + "grad_norm": 0.7161765694618225, + "learning_rate": 1.7986102781173228e-05, + "loss": 2.2459, + "step": 6576 + }, + { + "epoch": 0.22, + "grad_norm": 0.7332423329353333, + "learning_rate": 1.7985463035537597e-05, + "loss": 2.2572, + "step": 6577 + }, + { + "epoch": 0.22, + "grad_norm": 0.7228856682777405, + "learning_rate": 1.7984823199686886e-05, + "loss": 2.2125, + "step": 6578 + }, + { + "epoch": 0.22, + "grad_norm": 0.6857089400291443, + "learning_rate": 1.7984183273628325e-05, + "loss": 2.1786, + "step": 6579 + }, + { + "epoch": 0.22, + "grad_norm": 0.694179892539978, + "learning_rate": 1.7983543257369137e-05, + "loss": 2.1752, + "step": 6580 + }, + { + "epoch": 0.22, + "grad_norm": 0.6854165196418762, + "learning_rate": 1.798290315091656e-05, + "loss": 2.1365, + "step": 6581 + }, + { + "epoch": 0.22, + "grad_norm": 0.7022131681442261, + "learning_rate": 1.798226295427782e-05, + "loss": 2.1647, + "step": 6582 + }, + { + "epoch": 0.22, + "grad_norm": 0.7304705381393433, + "learning_rate": 1.798162266746015e-05, + "loss": 2.1365, + "step": 6583 + }, + { + "epoch": 0.22, + "grad_norm": 0.7153554558753967, + "learning_rate": 1.7980982290470786e-05, + "loss": 2.1569, + "step": 6584 + }, + { + "epoch": 0.22, + "grad_norm": 0.7307454943656921, + "learning_rate": 1.798034182331696e-05, + "loss": 2.146, + "step": 6585 + }, + { + "epoch": 0.22, + "grad_norm": 0.7032563090324402, + "learning_rate": 1.7979701266005912e-05, + "loss": 2.1904, + "step": 6586 + }, + { + "epoch": 0.22, + "grad_norm": 0.7243797779083252, + "learning_rate": 1.7979060618544874e-05, + "loss": 2.158, + "step": 6587 + }, + { + "epoch": 0.22, + "grad_norm": 0.7189526557922363, + "learning_rate": 1.7978419880941085e-05, + "loss": 2.1461, + "step": 6588 + }, + { + "epoch": 0.22, + "grad_norm": 0.7318780422210693, + "learning_rate": 1.7977779053201785e-05, + "loss": 2.2235, + "step": 6589 + }, + { + "epoch": 0.22, + "grad_norm": 0.7007522583007812, + "learning_rate": 1.797713813533421e-05, + "loss": 2.1663, + "step": 6590 + }, + { + "epoch": 0.22, + "grad_norm": 0.6892000436782837, + "learning_rate": 1.7976497127345608e-05, + "loss": 2.2301, + "step": 6591 + }, + { + "epoch": 0.22, + "grad_norm": 0.7175612449645996, + "learning_rate": 1.797585602924321e-05, + "loss": 2.1522, + "step": 6592 + }, + { + "epoch": 0.22, + "grad_norm": 0.6855272650718689, + "learning_rate": 1.797521484103427e-05, + "loss": 2.24, + "step": 6593 + }, + { + "epoch": 0.22, + "grad_norm": 0.6977092623710632, + "learning_rate": 1.7974573562726022e-05, + "loss": 2.2286, + "step": 6594 + }, + { + "epoch": 0.22, + "grad_norm": 0.6980246305465698, + "learning_rate": 1.797393219432572e-05, + "loss": 2.2004, + "step": 6595 + }, + { + "epoch": 0.22, + "grad_norm": 0.7200573086738586, + "learning_rate": 1.7973290735840603e-05, + "loss": 2.178, + "step": 6596 + }, + { + "epoch": 0.22, + "grad_norm": 0.6859830617904663, + "learning_rate": 1.7972649187277923e-05, + "loss": 2.1832, + "step": 6597 + }, + { + "epoch": 0.22, + "grad_norm": 0.6967597007751465, + "learning_rate": 1.797200754864492e-05, + "loss": 2.1251, + "step": 6598 + }, + { + "epoch": 0.22, + "grad_norm": 0.7362996935844421, + "learning_rate": 1.797136581994885e-05, + "loss": 2.2404, + "step": 6599 + }, + { + "epoch": 0.22, + "grad_norm": 0.7006789445877075, + "learning_rate": 1.7970724001196962e-05, + "loss": 2.1868, + "step": 6600 + }, + { + "epoch": 0.22, + "grad_norm": 0.696056067943573, + "learning_rate": 1.7970082092396505e-05, + "loss": 2.2004, + "step": 6601 + }, + { + "epoch": 0.22, + "grad_norm": 0.7123087048530579, + "learning_rate": 1.7969440093554732e-05, + "loss": 2.1999, + "step": 6602 + }, + { + "epoch": 0.22, + "grad_norm": 0.6767599582672119, + "learning_rate": 1.7968798004678892e-05, + "loss": 2.2067, + "step": 6603 + }, + { + "epoch": 0.22, + "grad_norm": 0.7009679675102234, + "learning_rate": 1.7968155825776244e-05, + "loss": 2.287, + "step": 6604 + }, + { + "epoch": 0.22, + "grad_norm": 0.7190284729003906, + "learning_rate": 1.7967513556854045e-05, + "loss": 2.2251, + "step": 6605 + }, + { + "epoch": 0.22, + "grad_norm": 0.7339173555374146, + "learning_rate": 1.7966871197919544e-05, + "loss": 2.1661, + "step": 6606 + }, + { + "epoch": 0.22, + "grad_norm": 0.7319211363792419, + "learning_rate": 1.796622874898e-05, + "loss": 2.1429, + "step": 6607 + }, + { + "epoch": 0.22, + "grad_norm": 0.7431654930114746, + "learning_rate": 1.7965586210042675e-05, + "loss": 2.1866, + "step": 6608 + }, + { + "epoch": 0.22, + "grad_norm": 0.6840620040893555, + "learning_rate": 1.7964943581114823e-05, + "loss": 2.1711, + "step": 6609 + }, + { + "epoch": 0.22, + "grad_norm": 0.7071534991264343, + "learning_rate": 1.796430086220371e-05, + "loss": 2.1362, + "step": 6610 + }, + { + "epoch": 0.22, + "grad_norm": 0.7259408831596375, + "learning_rate": 1.7963658053316588e-05, + "loss": 2.1168, + "step": 6611 + }, + { + "epoch": 0.22, + "grad_norm": 0.7257355451583862, + "learning_rate": 1.796301515446073e-05, + "loss": 2.1886, + "step": 6612 + }, + { + "epoch": 0.22, + "grad_norm": 0.7154070138931274, + "learning_rate": 1.7962372165643387e-05, + "loss": 2.1935, + "step": 6613 + }, + { + "epoch": 0.22, + "grad_norm": 0.7240186333656311, + "learning_rate": 1.7961729086871835e-05, + "loss": 2.2037, + "step": 6614 + }, + { + "epoch": 0.22, + "grad_norm": 0.7244178652763367, + "learning_rate": 1.7961085918153332e-05, + "loss": 2.2365, + "step": 6615 + }, + { + "epoch": 0.22, + "grad_norm": 0.7270267009735107, + "learning_rate": 1.7960442659495147e-05, + "loss": 2.1786, + "step": 6616 + }, + { + "epoch": 0.22, + "grad_norm": 0.710294246673584, + "learning_rate": 1.7959799310904545e-05, + "loss": 2.1643, + "step": 6617 + }, + { + "epoch": 0.22, + "grad_norm": 0.7330827713012695, + "learning_rate": 1.7959155872388797e-05, + "loss": 2.2561, + "step": 6618 + }, + { + "epoch": 0.22, + "grad_norm": 0.7038842439651489, + "learning_rate": 1.7958512343955167e-05, + "loss": 2.1791, + "step": 6619 + }, + { + "epoch": 0.22, + "grad_norm": 0.7041257619857788, + "learning_rate": 1.7957868725610932e-05, + "loss": 2.192, + "step": 6620 + }, + { + "epoch": 0.22, + "grad_norm": 0.7143344283103943, + "learning_rate": 1.795722501736336e-05, + "loss": 2.1639, + "step": 6621 + }, + { + "epoch": 0.22, + "grad_norm": 0.7441373467445374, + "learning_rate": 1.795658121921972e-05, + "loss": 2.1501, + "step": 6622 + }, + { + "epoch": 0.22, + "grad_norm": 0.7239783406257629, + "learning_rate": 1.795593733118729e-05, + "loss": 2.2269, + "step": 6623 + }, + { + "epoch": 0.22, + "grad_norm": 0.6970530152320862, + "learning_rate": 1.7955293353273344e-05, + "loss": 2.1674, + "step": 6624 + }, + { + "epoch": 0.22, + "grad_norm": 0.7018797397613525, + "learning_rate": 1.7954649285485157e-05, + "loss": 2.1611, + "step": 6625 + }, + { + "epoch": 0.22, + "grad_norm": 0.6809978485107422, + "learning_rate": 1.7954005127830003e-05, + "loss": 2.1371, + "step": 6626 + }, + { + "epoch": 0.22, + "grad_norm": 0.6916415691375732, + "learning_rate": 1.7953360880315156e-05, + "loss": 2.1818, + "step": 6627 + }, + { + "epoch": 0.22, + "grad_norm": 0.6924553513526917, + "learning_rate": 1.7952716542947905e-05, + "loss": 2.121, + "step": 6628 + }, + { + "epoch": 0.22, + "grad_norm": 0.6921359300613403, + "learning_rate": 1.795207211573552e-05, + "loss": 2.1718, + "step": 6629 + }, + { + "epoch": 0.22, + "grad_norm": 0.708489179611206, + "learning_rate": 1.7951427598685285e-05, + "loss": 2.1784, + "step": 6630 + }, + { + "epoch": 0.22, + "grad_norm": 0.7168723940849304, + "learning_rate": 1.795078299180448e-05, + "loss": 2.1614, + "step": 6631 + }, + { + "epoch": 0.22, + "grad_norm": 0.6781845092773438, + "learning_rate": 1.795013829510039e-05, + "loss": 2.1534, + "step": 6632 + }, + { + "epoch": 0.22, + "grad_norm": 0.7574804425239563, + "learning_rate": 1.79494935085803e-05, + "loss": 2.1353, + "step": 6633 + }, + { + "epoch": 0.22, + "grad_norm": 0.693473756313324, + "learning_rate": 1.7948848632251485e-05, + "loss": 2.1414, + "step": 6634 + }, + { + "epoch": 0.22, + "grad_norm": 0.7214508652687073, + "learning_rate": 1.794820366612124e-05, + "loss": 2.1962, + "step": 6635 + }, + { + "epoch": 0.22, + "grad_norm": 0.6834951043128967, + "learning_rate": 1.7947558610196846e-05, + "loss": 2.2291, + "step": 6636 + }, + { + "epoch": 0.22, + "grad_norm": 0.7094677090644836, + "learning_rate": 1.7946913464485595e-05, + "loss": 2.2176, + "step": 6637 + }, + { + "epoch": 0.22, + "grad_norm": 0.7109693288803101, + "learning_rate": 1.794626822899477e-05, + "loss": 2.1726, + "step": 6638 + }, + { + "epoch": 0.22, + "grad_norm": 0.7006967067718506, + "learning_rate": 1.794562290373167e-05, + "loss": 2.1951, + "step": 6639 + }, + { + "epoch": 0.22, + "grad_norm": 0.7130532264709473, + "learning_rate": 1.7944977488703572e-05, + "loss": 2.2397, + "step": 6640 + }, + { + "epoch": 0.22, + "grad_norm": 0.6664572954177856, + "learning_rate": 1.7944331983917776e-05, + "loss": 2.1613, + "step": 6641 + }, + { + "epoch": 0.22, + "grad_norm": 0.7230521440505981, + "learning_rate": 1.7943686389381575e-05, + "loss": 2.1413, + "step": 6642 + }, + { + "epoch": 0.22, + "grad_norm": 0.6880577206611633, + "learning_rate": 1.794304070510226e-05, + "loss": 2.1441, + "step": 6643 + }, + { + "epoch": 0.22, + "grad_norm": 0.7133157849311829, + "learning_rate": 1.7942394931087127e-05, + "loss": 2.2102, + "step": 6644 + }, + { + "epoch": 0.22, + "grad_norm": 0.7430444955825806, + "learning_rate": 1.7941749067343472e-05, + "loss": 2.184, + "step": 6645 + }, + { + "epoch": 0.22, + "grad_norm": 0.6830583214759827, + "learning_rate": 1.7941103113878587e-05, + "loss": 2.0998, + "step": 6646 + }, + { + "epoch": 0.22, + "grad_norm": 0.6989683508872986, + "learning_rate": 1.7940457070699774e-05, + "loss": 2.1725, + "step": 6647 + }, + { + "epoch": 0.22, + "grad_norm": 0.697478175163269, + "learning_rate": 1.793981093781433e-05, + "loss": 2.0627, + "step": 6648 + }, + { + "epoch": 0.22, + "grad_norm": 0.6964222192764282, + "learning_rate": 1.7939164715229556e-05, + "loss": 2.2161, + "step": 6649 + }, + { + "epoch": 0.22, + "grad_norm": 0.704581618309021, + "learning_rate": 1.793851840295275e-05, + "loss": 2.1239, + "step": 6650 + }, + { + "epoch": 0.22, + "grad_norm": 0.7236476540565491, + "learning_rate": 1.7937872000991218e-05, + "loss": 2.1688, + "step": 6651 + }, + { + "epoch": 0.22, + "grad_norm": 0.7146232724189758, + "learning_rate": 1.793722550935226e-05, + "loss": 2.1019, + "step": 6652 + }, + { + "epoch": 0.22, + "grad_norm": 0.6998026371002197, + "learning_rate": 1.793657892804318e-05, + "loss": 2.1766, + "step": 6653 + }, + { + "epoch": 0.22, + "grad_norm": 0.7196683287620544, + "learning_rate": 1.7935932257071284e-05, + "loss": 2.215, + "step": 6654 + }, + { + "epoch": 0.22, + "grad_norm": 0.6888589262962341, + "learning_rate": 1.7935285496443872e-05, + "loss": 2.1192, + "step": 6655 + }, + { + "epoch": 0.22, + "grad_norm": 0.7328450679779053, + "learning_rate": 1.793463864616826e-05, + "loss": 2.1467, + "step": 6656 + }, + { + "epoch": 0.22, + "grad_norm": 0.7402969002723694, + "learning_rate": 1.7933991706251745e-05, + "loss": 2.212, + "step": 6657 + }, + { + "epoch": 0.22, + "grad_norm": 0.6689773201942444, + "learning_rate": 1.793334467670165e-05, + "loss": 2.184, + "step": 6658 + }, + { + "epoch": 0.22, + "grad_norm": 0.6956059336662292, + "learning_rate": 1.7932697557525265e-05, + "loss": 2.1339, + "step": 6659 + }, + { + "epoch": 0.22, + "grad_norm": 0.7008812427520752, + "learning_rate": 1.793205034872992e-05, + "loss": 2.2287, + "step": 6660 + }, + { + "epoch": 0.22, + "grad_norm": 0.7328618764877319, + "learning_rate": 1.7931403050322916e-05, + "loss": 2.1426, + "step": 6661 + }, + { + "epoch": 0.22, + "grad_norm": 0.6775570511817932, + "learning_rate": 1.7930755662311566e-05, + "loss": 2.1817, + "step": 6662 + }, + { + "epoch": 0.22, + "grad_norm": 0.7062534689903259, + "learning_rate": 1.793010818470319e-05, + "loss": 2.1274, + "step": 6663 + }, + { + "epoch": 0.22, + "grad_norm": 0.7023792266845703, + "learning_rate": 1.79294606175051e-05, + "loss": 2.1523, + "step": 6664 + }, + { + "epoch": 0.22, + "grad_norm": 0.7355208396911621, + "learning_rate": 1.792881296072461e-05, + "loss": 2.1614, + "step": 6665 + }, + { + "epoch": 0.22, + "grad_norm": 0.7151618003845215, + "learning_rate": 1.7928165214369036e-05, + "loss": 2.1148, + "step": 6666 + }, + { + "epoch": 0.22, + "grad_norm": 0.6916586756706238, + "learning_rate": 1.79275173784457e-05, + "loss": 2.0807, + "step": 6667 + }, + { + "epoch": 0.22, + "grad_norm": 0.7306488156318665, + "learning_rate": 1.792686945296192e-05, + "loss": 2.2012, + "step": 6668 + }, + { + "epoch": 0.22, + "grad_norm": 0.7072722911834717, + "learning_rate": 1.7926221437925014e-05, + "loss": 2.2066, + "step": 6669 + }, + { + "epoch": 0.22, + "grad_norm": 0.6857954263687134, + "learning_rate": 1.79255733333423e-05, + "loss": 2.1622, + "step": 6670 + }, + { + "epoch": 0.22, + "grad_norm": 0.7160612344741821, + "learning_rate": 1.7924925139221107e-05, + "loss": 2.2173, + "step": 6671 + }, + { + "epoch": 0.22, + "grad_norm": 0.7112611532211304, + "learning_rate": 1.792427685556875e-05, + "loss": 2.1385, + "step": 6672 + }, + { + "epoch": 0.22, + "grad_norm": 0.7116711139678955, + "learning_rate": 1.792362848239256e-05, + "loss": 2.1129, + "step": 6673 + }, + { + "epoch": 0.22, + "grad_norm": 0.6863235235214233, + "learning_rate": 1.7922980019699858e-05, + "loss": 2.1991, + "step": 6674 + }, + { + "epoch": 0.22, + "grad_norm": 0.7066608667373657, + "learning_rate": 1.7922331467497973e-05, + "loss": 2.206, + "step": 6675 + }, + { + "epoch": 0.22, + "grad_norm": 0.655047595500946, + "learning_rate": 1.792168282579423e-05, + "loss": 2.1688, + "step": 6676 + }, + { + "epoch": 0.22, + "grad_norm": 0.701244592666626, + "learning_rate": 1.7921034094595958e-05, + "loss": 2.213, + "step": 6677 + }, + { + "epoch": 0.22, + "grad_norm": 0.6872324347496033, + "learning_rate": 1.792038527391048e-05, + "loss": 2.0548, + "step": 6678 + }, + { + "epoch": 0.22, + "grad_norm": 0.71617192029953, + "learning_rate": 1.7919736363745135e-05, + "loss": 2.2081, + "step": 6679 + }, + { + "epoch": 0.22, + "grad_norm": 0.6914728283882141, + "learning_rate": 1.7919087364107247e-05, + "loss": 2.1946, + "step": 6680 + }, + { + "epoch": 0.22, + "grad_norm": 0.7542549967765808, + "learning_rate": 1.7918438275004156e-05, + "loss": 2.1453, + "step": 6681 + }, + { + "epoch": 0.22, + "grad_norm": 0.7000204920768738, + "learning_rate": 1.7917789096443185e-05, + "loss": 2.1744, + "step": 6682 + }, + { + "epoch": 0.22, + "grad_norm": 0.6969152688980103, + "learning_rate": 1.7917139828431675e-05, + "loss": 2.2687, + "step": 6683 + }, + { + "epoch": 0.22, + "grad_norm": 0.7273998856544495, + "learning_rate": 1.791649047097696e-05, + "loss": 2.2601, + "step": 6684 + }, + { + "epoch": 0.22, + "grad_norm": 0.6938063502311707, + "learning_rate": 1.7915841024086372e-05, + "loss": 2.1484, + "step": 6685 + }, + { + "epoch": 0.22, + "grad_norm": 0.7053707242012024, + "learning_rate": 1.7915191487767253e-05, + "loss": 2.1978, + "step": 6686 + }, + { + "epoch": 0.22, + "grad_norm": 0.7187672853469849, + "learning_rate": 1.7914541862026943e-05, + "loss": 2.2416, + "step": 6687 + }, + { + "epoch": 0.22, + "grad_norm": 0.704089879989624, + "learning_rate": 1.7913892146872775e-05, + "loss": 2.2611, + "step": 6688 + }, + { + "epoch": 0.22, + "grad_norm": 0.6950346231460571, + "learning_rate": 1.791324234231209e-05, + "loss": 2.1764, + "step": 6689 + }, + { + "epoch": 0.22, + "grad_norm": 0.7089124917984009, + "learning_rate": 1.7912592448352232e-05, + "loss": 2.2397, + "step": 6690 + }, + { + "epoch": 0.22, + "grad_norm": 0.7141889929771423, + "learning_rate": 1.791194246500054e-05, + "loss": 2.0711, + "step": 6691 + }, + { + "epoch": 0.22, + "grad_norm": 0.7076598405838013, + "learning_rate": 1.791129239226436e-05, + "loss": 2.2425, + "step": 6692 + }, + { + "epoch": 0.22, + "grad_norm": 0.7320265769958496, + "learning_rate": 1.7910642230151035e-05, + "loss": 2.2193, + "step": 6693 + }, + { + "epoch": 0.22, + "grad_norm": 0.6987038850784302, + "learning_rate": 1.790999197866791e-05, + "loss": 2.1945, + "step": 6694 + }, + { + "epoch": 0.22, + "grad_norm": 0.7167535424232483, + "learning_rate": 1.7909341637822332e-05, + "loss": 2.1507, + "step": 6695 + }, + { + "epoch": 0.22, + "grad_norm": 0.6794973611831665, + "learning_rate": 1.7908691207621645e-05, + "loss": 2.2056, + "step": 6696 + }, + { + "epoch": 0.22, + "grad_norm": 0.720698356628418, + "learning_rate": 1.7908040688073206e-05, + "loss": 2.1165, + "step": 6697 + }, + { + "epoch": 0.22, + "grad_norm": 0.6985532641410828, + "learning_rate": 1.7907390079184353e-05, + "loss": 2.2245, + "step": 6698 + }, + { + "epoch": 0.22, + "grad_norm": 0.702853798866272, + "learning_rate": 1.7906739380962442e-05, + "loss": 2.1829, + "step": 6699 + }, + { + "epoch": 0.22, + "grad_norm": 0.6892301440238953, + "learning_rate": 1.790608859341482e-05, + "loss": 2.2112, + "step": 6700 + }, + { + "epoch": 0.22, + "grad_norm": 0.7039897441864014, + "learning_rate": 1.7905437716548847e-05, + "loss": 2.1829, + "step": 6701 + }, + { + "epoch": 0.22, + "grad_norm": 0.7373062372207642, + "learning_rate": 1.7904786750371868e-05, + "loss": 2.2158, + "step": 6702 + }, + { + "epoch": 0.22, + "grad_norm": 0.6866317391395569, + "learning_rate": 1.7904135694891243e-05, + "loss": 2.1776, + "step": 6703 + }, + { + "epoch": 0.22, + "grad_norm": 0.7084851861000061, + "learning_rate": 1.7903484550114328e-05, + "loss": 2.1547, + "step": 6704 + }, + { + "epoch": 0.22, + "grad_norm": 0.7180258631706238, + "learning_rate": 1.7902833316048475e-05, + "loss": 2.1391, + "step": 6705 + }, + { + "epoch": 0.22, + "grad_norm": 0.7088985443115234, + "learning_rate": 1.790218199270104e-05, + "loss": 2.1339, + "step": 6706 + }, + { + "epoch": 0.22, + "grad_norm": 0.7078843712806702, + "learning_rate": 1.7901530580079387e-05, + "loss": 2.1637, + "step": 6707 + }, + { + "epoch": 0.22, + "grad_norm": 0.7053789496421814, + "learning_rate": 1.7900879078190868e-05, + "loss": 2.1417, + "step": 6708 + }, + { + "epoch": 0.22, + "grad_norm": 0.7207679748535156, + "learning_rate": 1.7900227487042853e-05, + "loss": 2.2276, + "step": 6709 + }, + { + "epoch": 0.22, + "grad_norm": 0.6909953951835632, + "learning_rate": 1.7899575806642695e-05, + "loss": 2.2244, + "step": 6710 + }, + { + "epoch": 0.22, + "grad_norm": 0.7080489993095398, + "learning_rate": 1.789892403699776e-05, + "loss": 2.2077, + "step": 6711 + }, + { + "epoch": 0.22, + "grad_norm": 0.6981737017631531, + "learning_rate": 1.7898272178115407e-05, + "loss": 2.1733, + "step": 6712 + }, + { + "epoch": 0.22, + "grad_norm": 0.7211215496063232, + "learning_rate": 1.789762023000301e-05, + "loss": 2.1602, + "step": 6713 + }, + { + "epoch": 0.22, + "grad_norm": 0.6821576356887817, + "learning_rate": 1.7896968192667924e-05, + "loss": 2.1013, + "step": 6714 + }, + { + "epoch": 0.22, + "grad_norm": 0.7175663113594055, + "learning_rate": 1.789631606611752e-05, + "loss": 2.2145, + "step": 6715 + }, + { + "epoch": 0.22, + "grad_norm": 0.7491844296455383, + "learning_rate": 1.7895663850359165e-05, + "loss": 2.0937, + "step": 6716 + }, + { + "epoch": 0.22, + "grad_norm": 0.7000634074211121, + "learning_rate": 1.7895011545400226e-05, + "loss": 2.1272, + "step": 6717 + }, + { + "epoch": 0.22, + "grad_norm": 0.7442992329597473, + "learning_rate": 1.7894359151248074e-05, + "loss": 2.2024, + "step": 6718 + }, + { + "epoch": 0.22, + "grad_norm": 0.6817628145217896, + "learning_rate": 1.7893706667910078e-05, + "loss": 2.1675, + "step": 6719 + }, + { + "epoch": 0.22, + "grad_norm": 0.7229297757148743, + "learning_rate": 1.7893054095393614e-05, + "loss": 2.2143, + "step": 6720 + }, + { + "epoch": 0.22, + "grad_norm": 0.6896880269050598, + "learning_rate": 1.7892401433706047e-05, + "loss": 2.1671, + "step": 6721 + }, + { + "epoch": 0.22, + "grad_norm": 0.7212055921554565, + "learning_rate": 1.7891748682854758e-05, + "loss": 2.1986, + "step": 6722 + }, + { + "epoch": 0.22, + "grad_norm": 0.7027114629745483, + "learning_rate": 1.7891095842847112e-05, + "loss": 2.1849, + "step": 6723 + }, + { + "epoch": 0.22, + "grad_norm": 0.6905052065849304, + "learning_rate": 1.7890442913690492e-05, + "loss": 2.1926, + "step": 6724 + }, + { + "epoch": 0.22, + "grad_norm": 0.7455458045005798, + "learning_rate": 1.7889789895392273e-05, + "loss": 2.2669, + "step": 6725 + }, + { + "epoch": 0.22, + "grad_norm": 0.7445131540298462, + "learning_rate": 1.7889136787959832e-05, + "loss": 2.1653, + "step": 6726 + }, + { + "epoch": 0.22, + "grad_norm": 0.7124503254890442, + "learning_rate": 1.7888483591400545e-05, + "loss": 2.1634, + "step": 6727 + }, + { + "epoch": 0.22, + "grad_norm": 0.7132136225700378, + "learning_rate": 1.7887830305721798e-05, + "loss": 2.1531, + "step": 6728 + }, + { + "epoch": 0.22, + "grad_norm": 0.6970967054367065, + "learning_rate": 1.788717693093096e-05, + "loss": 2.2441, + "step": 6729 + }, + { + "epoch": 0.22, + "grad_norm": 0.6777008175849915, + "learning_rate": 1.7886523467035422e-05, + "loss": 2.1577, + "step": 6730 + }, + { + "epoch": 0.22, + "grad_norm": 0.7264772057533264, + "learning_rate": 1.7885869914042567e-05, + "loss": 2.159, + "step": 6731 + }, + { + "epoch": 0.22, + "grad_norm": 0.693709135055542, + "learning_rate": 1.7885216271959772e-05, + "loss": 2.1447, + "step": 6732 + }, + { + "epoch": 0.22, + "grad_norm": 0.6850021481513977, + "learning_rate": 1.7884562540794427e-05, + "loss": 2.1597, + "step": 6733 + }, + { + "epoch": 0.22, + "grad_norm": 0.7083114981651306, + "learning_rate": 1.7883908720553915e-05, + "loss": 2.1781, + "step": 6734 + }, + { + "epoch": 0.22, + "grad_norm": 0.7071018218994141, + "learning_rate": 1.7883254811245622e-05, + "loss": 2.1997, + "step": 6735 + }, + { + "epoch": 0.22, + "grad_norm": 0.7036201357841492, + "learning_rate": 1.7882600812876937e-05, + "loss": 2.1797, + "step": 6736 + }, + { + "epoch": 0.22, + "grad_norm": 0.7072071433067322, + "learning_rate": 1.7881946725455247e-05, + "loss": 2.1695, + "step": 6737 + }, + { + "epoch": 0.22, + "grad_norm": 0.6878752112388611, + "learning_rate": 1.7881292548987938e-05, + "loss": 2.1631, + "step": 6738 + }, + { + "epoch": 0.22, + "grad_norm": 0.7123965620994568, + "learning_rate": 1.788063828348241e-05, + "loss": 2.1545, + "step": 6739 + }, + { + "epoch": 0.22, + "grad_norm": 0.698392391204834, + "learning_rate": 1.787998392894605e-05, + "loss": 2.1705, + "step": 6740 + }, + { + "epoch": 0.22, + "grad_norm": 0.7054446935653687, + "learning_rate": 1.7879329485386246e-05, + "loss": 2.084, + "step": 6741 + }, + { + "epoch": 0.22, + "grad_norm": 0.6947166323661804, + "learning_rate": 1.78786749528104e-05, + "loss": 2.1355, + "step": 6742 + }, + { + "epoch": 0.22, + "grad_norm": 0.684367299079895, + "learning_rate": 1.7878020331225897e-05, + "loss": 2.2047, + "step": 6743 + }, + { + "epoch": 0.22, + "grad_norm": 0.7165995240211487, + "learning_rate": 1.7877365620640136e-05, + "loss": 2.1029, + "step": 6744 + }, + { + "epoch": 0.22, + "grad_norm": 0.7213699817657471, + "learning_rate": 1.787671082106052e-05, + "loss": 2.1612, + "step": 6745 + }, + { + "epoch": 0.22, + "grad_norm": 0.6980475187301636, + "learning_rate": 1.7876055932494435e-05, + "loss": 2.1828, + "step": 6746 + }, + { + "epoch": 0.22, + "grad_norm": 0.6845316290855408, + "learning_rate": 1.7875400954949293e-05, + "loss": 2.17, + "step": 6747 + }, + { + "epoch": 0.22, + "grad_norm": 0.7053123712539673, + "learning_rate": 1.7874745888432482e-05, + "loss": 2.1305, + "step": 6748 + }, + { + "epoch": 0.22, + "grad_norm": 0.6992340683937073, + "learning_rate": 1.7874090732951407e-05, + "loss": 2.161, + "step": 6749 + }, + { + "epoch": 0.22, + "grad_norm": 0.7306883931159973, + "learning_rate": 1.7873435488513472e-05, + "loss": 2.281, + "step": 6750 + }, + { + "epoch": 0.22, + "grad_norm": 0.7324796319007874, + "learning_rate": 1.7872780155126076e-05, + "loss": 2.2185, + "step": 6751 + }, + { + "epoch": 0.22, + "grad_norm": 0.7234364748001099, + "learning_rate": 1.7872124732796626e-05, + "loss": 2.1428, + "step": 6752 + }, + { + "epoch": 0.22, + "grad_norm": 0.6811234951019287, + "learning_rate": 1.787146922153252e-05, + "loss": 2.1215, + "step": 6753 + }, + { + "epoch": 0.22, + "grad_norm": 0.6736853122711182, + "learning_rate": 1.7870813621341168e-05, + "loss": 2.1741, + "step": 6754 + }, + { + "epoch": 0.22, + "grad_norm": 0.7128562331199646, + "learning_rate": 1.787015793222998e-05, + "loss": 2.2112, + "step": 6755 + }, + { + "epoch": 0.22, + "grad_norm": 0.7001230120658875, + "learning_rate": 1.7869502154206357e-05, + "loss": 2.2458, + "step": 6756 + }, + { + "epoch": 0.22, + "grad_norm": 0.6988021731376648, + "learning_rate": 1.7868846287277712e-05, + "loss": 2.185, + "step": 6757 + }, + { + "epoch": 0.22, + "grad_norm": 0.6877143383026123, + "learning_rate": 1.7868190331451455e-05, + "loss": 2.2192, + "step": 6758 + }, + { + "epoch": 0.22, + "grad_norm": 0.6942476034164429, + "learning_rate": 1.7867534286734992e-05, + "loss": 2.1803, + "step": 6759 + }, + { + "epoch": 0.22, + "grad_norm": 0.6822233200073242, + "learning_rate": 1.7866878153135737e-05, + "loss": 2.1575, + "step": 6760 + }, + { + "epoch": 0.22, + "grad_norm": 0.7128563523292542, + "learning_rate": 1.7866221930661106e-05, + "loss": 2.1747, + "step": 6761 + }, + { + "epoch": 0.22, + "grad_norm": 0.6892821192741394, + "learning_rate": 1.786556561931851e-05, + "loss": 2.1538, + "step": 6762 + }, + { + "epoch": 0.23, + "grad_norm": 0.7127853631973267, + "learning_rate": 1.786490921911536e-05, + "loss": 2.1916, + "step": 6763 + }, + { + "epoch": 0.23, + "grad_norm": 0.6819483637809753, + "learning_rate": 1.7864252730059076e-05, + "loss": 2.1341, + "step": 6764 + }, + { + "epoch": 0.23, + "grad_norm": 0.7072163820266724, + "learning_rate": 1.7863596152157074e-05, + "loss": 2.1627, + "step": 6765 + }, + { + "epoch": 0.23, + "grad_norm": 0.7099435329437256, + "learning_rate": 1.7862939485416772e-05, + "loss": 2.1628, + "step": 6766 + }, + { + "epoch": 0.23, + "grad_norm": 0.6882128119468689, + "learning_rate": 1.7862282729845587e-05, + "loss": 2.1305, + "step": 6767 + }, + { + "epoch": 0.23, + "grad_norm": 0.7161807417869568, + "learning_rate": 1.7861625885450937e-05, + "loss": 2.1655, + "step": 6768 + }, + { + "epoch": 0.23, + "grad_norm": 0.7024717330932617, + "learning_rate": 1.786096895224025e-05, + "loss": 2.1571, + "step": 6769 + }, + { + "epoch": 0.23, + "grad_norm": 0.7148637771606445, + "learning_rate": 1.786031193022094e-05, + "loss": 2.1452, + "step": 6770 + }, + { + "epoch": 0.23, + "grad_norm": 0.7146597504615784, + "learning_rate": 1.785965481940043e-05, + "loss": 2.1596, + "step": 6771 + }, + { + "epoch": 0.23, + "grad_norm": 0.6785227656364441, + "learning_rate": 1.7858997619786152e-05, + "loss": 2.217, + "step": 6772 + }, + { + "epoch": 0.23, + "grad_norm": 0.7046297788619995, + "learning_rate": 1.785834033138552e-05, + "loss": 2.2015, + "step": 6773 + }, + { + "epoch": 0.23, + "grad_norm": 0.6983430981636047, + "learning_rate": 1.7857682954205967e-05, + "loss": 2.0605, + "step": 6774 + }, + { + "epoch": 0.23, + "grad_norm": 0.6989234089851379, + "learning_rate": 1.7857025488254914e-05, + "loss": 2.2127, + "step": 6775 + }, + { + "epoch": 0.23, + "grad_norm": 0.7047427296638489, + "learning_rate": 1.7856367933539796e-05, + "loss": 2.2234, + "step": 6776 + }, + { + "epoch": 0.23, + "grad_norm": 0.7147670984268188, + "learning_rate": 1.7855710290068035e-05, + "loss": 2.2364, + "step": 6777 + }, + { + "epoch": 0.23, + "grad_norm": 0.6983500123023987, + "learning_rate": 1.7855052557847063e-05, + "loss": 2.2129, + "step": 6778 + }, + { + "epoch": 0.23, + "grad_norm": 0.6956278681755066, + "learning_rate": 1.7854394736884312e-05, + "loss": 2.1629, + "step": 6779 + }, + { + "epoch": 0.23, + "grad_norm": 0.6807165741920471, + "learning_rate": 1.7853736827187213e-05, + "loss": 2.1385, + "step": 6780 + }, + { + "epoch": 0.23, + "grad_norm": 0.6907092332839966, + "learning_rate": 1.7853078828763192e-05, + "loss": 2.1519, + "step": 6781 + }, + { + "epoch": 0.23, + "grad_norm": 0.6958861947059631, + "learning_rate": 1.7852420741619694e-05, + "loss": 2.2483, + "step": 6782 + }, + { + "epoch": 0.23, + "grad_norm": 0.7231794595718384, + "learning_rate": 1.7851762565764148e-05, + "loss": 2.1885, + "step": 6783 + }, + { + "epoch": 0.23, + "grad_norm": 0.6978388428688049, + "learning_rate": 1.785110430120399e-05, + "loss": 2.208, + "step": 6784 + }, + { + "epoch": 0.23, + "grad_norm": 0.7005677819252014, + "learning_rate": 1.7850445947946658e-05, + "loss": 2.2178, + "step": 6785 + }, + { + "epoch": 0.23, + "grad_norm": 0.7164233922958374, + "learning_rate": 1.7849787505999584e-05, + "loss": 2.2552, + "step": 6786 + }, + { + "epoch": 0.23, + "grad_norm": 0.6966948509216309, + "learning_rate": 1.7849128975370214e-05, + "loss": 2.1195, + "step": 6787 + }, + { + "epoch": 0.23, + "grad_norm": 0.7261508703231812, + "learning_rate": 1.7848470356065985e-05, + "loss": 2.198, + "step": 6788 + }, + { + "epoch": 0.23, + "grad_norm": 0.6945905685424805, + "learning_rate": 1.7847811648094336e-05, + "loss": 2.1478, + "step": 6789 + }, + { + "epoch": 0.23, + "grad_norm": 0.6871730089187622, + "learning_rate": 1.7847152851462716e-05, + "loss": 2.2043, + "step": 6790 + }, + { + "epoch": 0.23, + "grad_norm": 0.687189519405365, + "learning_rate": 1.7846493966178557e-05, + "loss": 2.0927, + "step": 6791 + }, + { + "epoch": 0.23, + "grad_norm": 0.7218593955039978, + "learning_rate": 1.7845834992249307e-05, + "loss": 2.1607, + "step": 6792 + }, + { + "epoch": 0.23, + "grad_norm": 0.6897413730621338, + "learning_rate": 1.7845175929682412e-05, + "loss": 2.1774, + "step": 6793 + }, + { + "epoch": 0.23, + "grad_norm": 0.6789997816085815, + "learning_rate": 1.784451677848532e-05, + "loss": 2.241, + "step": 6794 + }, + { + "epoch": 0.23, + "grad_norm": 0.6930385231971741, + "learning_rate": 1.784385753866547e-05, + "loss": 2.1561, + "step": 6795 + }, + { + "epoch": 0.23, + "grad_norm": 0.710186243057251, + "learning_rate": 1.7843198210230318e-05, + "loss": 2.152, + "step": 6796 + }, + { + "epoch": 0.23, + "grad_norm": 0.6987459659576416, + "learning_rate": 1.7842538793187308e-05, + "loss": 2.205, + "step": 6797 + }, + { + "epoch": 0.23, + "grad_norm": 0.754103422164917, + "learning_rate": 1.7841879287543893e-05, + "loss": 2.1411, + "step": 6798 + }, + { + "epoch": 0.23, + "grad_norm": 0.7063634395599365, + "learning_rate": 1.784121969330752e-05, + "loss": 2.127, + "step": 6799 + }, + { + "epoch": 0.23, + "grad_norm": 0.7065637707710266, + "learning_rate": 1.784056001048564e-05, + "loss": 2.1813, + "step": 6800 + }, + { + "epoch": 0.23, + "grad_norm": 0.6785488128662109, + "learning_rate": 1.783990023908571e-05, + "loss": 2.1728, + "step": 6801 + }, + { + "epoch": 0.23, + "grad_norm": 0.7472487688064575, + "learning_rate": 1.7839240379115182e-05, + "loss": 2.2167, + "step": 6802 + }, + { + "epoch": 0.23, + "grad_norm": 0.7082286477088928, + "learning_rate": 1.7838580430581512e-05, + "loss": 2.1691, + "step": 6803 + }, + { + "epoch": 0.23, + "grad_norm": 0.6916322112083435, + "learning_rate": 1.7837920393492153e-05, + "loss": 2.1708, + "step": 6804 + }, + { + "epoch": 0.23, + "grad_norm": 0.7010189294815063, + "learning_rate": 1.783726026785456e-05, + "loss": 2.098, + "step": 6805 + }, + { + "epoch": 0.23, + "grad_norm": 0.6913995146751404, + "learning_rate": 1.7836600053676198e-05, + "loss": 2.2058, + "step": 6806 + }, + { + "epoch": 0.23, + "grad_norm": 0.6896275281906128, + "learning_rate": 1.783593975096452e-05, + "loss": 2.1871, + "step": 6807 + }, + { + "epoch": 0.23, + "grad_norm": 0.7041301727294922, + "learning_rate": 1.7835279359726986e-05, + "loss": 2.1883, + "step": 6808 + }, + { + "epoch": 0.23, + "grad_norm": 0.7403188943862915, + "learning_rate": 1.7834618879971056e-05, + "loss": 2.1484, + "step": 6809 + }, + { + "epoch": 0.23, + "grad_norm": 0.7081049680709839, + "learning_rate": 1.7833958311704195e-05, + "loss": 2.1113, + "step": 6810 + }, + { + "epoch": 0.23, + "grad_norm": 0.68865567445755, + "learning_rate": 1.7833297654933863e-05, + "loss": 2.2313, + "step": 6811 + }, + { + "epoch": 0.23, + "grad_norm": 0.6944952011108398, + "learning_rate": 1.7832636909667528e-05, + "loss": 2.15, + "step": 6812 + }, + { + "epoch": 0.23, + "grad_norm": 0.7047025561332703, + "learning_rate": 1.783197607591265e-05, + "loss": 2.0817, + "step": 6813 + }, + { + "epoch": 0.23, + "grad_norm": 0.6887606382369995, + "learning_rate": 1.7831315153676697e-05, + "loss": 2.1184, + "step": 6814 + }, + { + "epoch": 0.23, + "grad_norm": 0.7089728116989136, + "learning_rate": 1.7830654142967133e-05, + "loss": 2.244, + "step": 6815 + }, + { + "epoch": 0.23, + "grad_norm": 0.6902227997779846, + "learning_rate": 1.782999304379143e-05, + "loss": 2.1589, + "step": 6816 + }, + { + "epoch": 0.23, + "grad_norm": 0.6971452832221985, + "learning_rate": 1.7829331856157054e-05, + "loss": 2.1804, + "step": 6817 + }, + { + "epoch": 0.23, + "grad_norm": 0.7186415195465088, + "learning_rate": 1.782867058007147e-05, + "loss": 2.1595, + "step": 6818 + }, + { + "epoch": 0.23, + "grad_norm": 0.6955856084823608, + "learning_rate": 1.782800921554216e-05, + "loss": 2.1716, + "step": 6819 + }, + { + "epoch": 0.23, + "grad_norm": 0.7264643311500549, + "learning_rate": 1.782734776257659e-05, + "loss": 2.1205, + "step": 6820 + }, + { + "epoch": 0.23, + "grad_norm": 0.7218306660652161, + "learning_rate": 1.782668622118223e-05, + "loss": 2.1635, + "step": 6821 + }, + { + "epoch": 0.23, + "grad_norm": 0.6905709505081177, + "learning_rate": 1.7826024591366556e-05, + "loss": 2.1501, + "step": 6822 + }, + { + "epoch": 0.23, + "grad_norm": 0.7445659041404724, + "learning_rate": 1.7825362873137042e-05, + "loss": 2.1679, + "step": 6823 + }, + { + "epoch": 0.23, + "grad_norm": 0.6806520223617554, + "learning_rate": 1.7824701066501165e-05, + "loss": 2.1701, + "step": 6824 + }, + { + "epoch": 0.23, + "grad_norm": 0.7084265351295471, + "learning_rate": 1.78240391714664e-05, + "loss": 2.1767, + "step": 6825 + }, + { + "epoch": 0.23, + "grad_norm": 0.6946107745170593, + "learning_rate": 1.7823377188040227e-05, + "loss": 2.1565, + "step": 6826 + }, + { + "epoch": 0.23, + "grad_norm": 0.7721121907234192, + "learning_rate": 1.7822715116230124e-05, + "loss": 2.2209, + "step": 6827 + }, + { + "epoch": 0.23, + "grad_norm": 0.6846488118171692, + "learning_rate": 1.782205295604357e-05, + "loss": 2.147, + "step": 6828 + }, + { + "epoch": 0.23, + "grad_norm": 0.7053474187850952, + "learning_rate": 1.7821390707488048e-05, + "loss": 2.1395, + "step": 6829 + }, + { + "epoch": 0.23, + "grad_norm": 0.7143411040306091, + "learning_rate": 1.7820728370571033e-05, + "loss": 2.26, + "step": 6830 + }, + { + "epoch": 0.23, + "grad_norm": 0.6922802329063416, + "learning_rate": 1.7820065945300014e-05, + "loss": 2.0962, + "step": 6831 + }, + { + "epoch": 0.23, + "grad_norm": 0.6994383931159973, + "learning_rate": 1.7819403431682473e-05, + "loss": 2.1017, + "step": 6832 + }, + { + "epoch": 0.23, + "grad_norm": 0.6988868117332458, + "learning_rate": 1.7818740829725894e-05, + "loss": 2.2113, + "step": 6833 + }, + { + "epoch": 0.23, + "grad_norm": 0.7174085378646851, + "learning_rate": 1.7818078139437765e-05, + "loss": 2.1615, + "step": 6834 + }, + { + "epoch": 0.23, + "grad_norm": 0.7122299075126648, + "learning_rate": 1.781741536082557e-05, + "loss": 2.152, + "step": 6835 + }, + { + "epoch": 0.23, + "grad_norm": 0.7476032972335815, + "learning_rate": 1.7816752493896798e-05, + "loss": 2.0624, + "step": 6836 + }, + { + "epoch": 0.23, + "grad_norm": 0.7121260166168213, + "learning_rate": 1.7816089538658938e-05, + "loss": 2.1682, + "step": 6837 + }, + { + "epoch": 0.23, + "grad_norm": 0.7298016548156738, + "learning_rate": 1.781542649511948e-05, + "loss": 2.2233, + "step": 6838 + }, + { + "epoch": 0.23, + "grad_norm": 0.7053233981132507, + "learning_rate": 1.781476336328591e-05, + "loss": 2.1786, + "step": 6839 + }, + { + "epoch": 0.23, + "grad_norm": 0.6758351922035217, + "learning_rate": 1.781410014316573e-05, + "loss": 2.2082, + "step": 6840 + }, + { + "epoch": 0.23, + "grad_norm": 0.6976833939552307, + "learning_rate": 1.781343683476642e-05, + "loss": 2.1605, + "step": 6841 + }, + { + "epoch": 0.23, + "grad_norm": 0.743137776851654, + "learning_rate": 1.781277343809548e-05, + "loss": 2.1794, + "step": 6842 + }, + { + "epoch": 0.23, + "grad_norm": 0.745040237903595, + "learning_rate": 1.7812109953160405e-05, + "loss": 2.2113, + "step": 6843 + }, + { + "epoch": 0.23, + "grad_norm": 0.7027665376663208, + "learning_rate": 1.781144637996869e-05, + "loss": 2.1141, + "step": 6844 + }, + { + "epoch": 0.23, + "grad_norm": 0.690324068069458, + "learning_rate": 1.7810782718527834e-05, + "loss": 2.0999, + "step": 6845 + }, + { + "epoch": 0.23, + "grad_norm": 0.6947686076164246, + "learning_rate": 1.781011896884533e-05, + "loss": 2.2087, + "step": 6846 + }, + { + "epoch": 0.23, + "grad_norm": 0.6965914964675903, + "learning_rate": 1.780945513092868e-05, + "loss": 2.2171, + "step": 6847 + }, + { + "epoch": 0.23, + "grad_norm": 0.6865129470825195, + "learning_rate": 1.7808791204785384e-05, + "loss": 2.1142, + "step": 6848 + }, + { + "epoch": 0.23, + "grad_norm": 0.7207651734352112, + "learning_rate": 1.780812719042294e-05, + "loss": 2.2166, + "step": 6849 + }, + { + "epoch": 0.23, + "grad_norm": 0.7159828543663025, + "learning_rate": 1.780746308784885e-05, + "loss": 2.2117, + "step": 6850 + }, + { + "epoch": 0.23, + "grad_norm": 0.6997160911560059, + "learning_rate": 1.780679889707062e-05, + "loss": 2.1425, + "step": 6851 + }, + { + "epoch": 0.23, + "grad_norm": 0.6947236657142639, + "learning_rate": 1.7806134618095752e-05, + "loss": 2.2179, + "step": 6852 + }, + { + "epoch": 0.23, + "grad_norm": 0.7177760601043701, + "learning_rate": 1.7805470250931748e-05, + "loss": 2.2012, + "step": 6853 + }, + { + "epoch": 0.23, + "grad_norm": 0.7546408176422119, + "learning_rate": 1.7804805795586114e-05, + "loss": 2.1229, + "step": 6854 + }, + { + "epoch": 0.23, + "grad_norm": 0.7468322515487671, + "learning_rate": 1.7804141252066363e-05, + "loss": 2.2103, + "step": 6855 + }, + { + "epoch": 0.23, + "grad_norm": 0.7040621042251587, + "learning_rate": 1.7803476620379993e-05, + "loss": 2.1764, + "step": 6856 + }, + { + "epoch": 0.23, + "grad_norm": 0.7218724489212036, + "learning_rate": 1.7802811900534518e-05, + "loss": 2.173, + "step": 6857 + }, + { + "epoch": 0.23, + "grad_norm": 0.6986580491065979, + "learning_rate": 1.780214709253745e-05, + "loss": 2.1946, + "step": 6858 + }, + { + "epoch": 0.23, + "grad_norm": 0.7062078714370728, + "learning_rate": 1.7801482196396294e-05, + "loss": 2.1584, + "step": 6859 + }, + { + "epoch": 0.23, + "grad_norm": 0.6856774687767029, + "learning_rate": 1.7800817212118565e-05, + "loss": 2.2048, + "step": 6860 + }, + { + "epoch": 0.23, + "grad_norm": 0.7328609824180603, + "learning_rate": 1.7800152139711774e-05, + "loss": 2.2265, + "step": 6861 + }, + { + "epoch": 0.23, + "grad_norm": 0.7360959649085999, + "learning_rate": 1.779948697918344e-05, + "loss": 2.1738, + "step": 6862 + }, + { + "epoch": 0.23, + "grad_norm": 0.7078776359558105, + "learning_rate": 1.779882173054107e-05, + "loss": 2.1151, + "step": 6863 + }, + { + "epoch": 0.23, + "grad_norm": 0.701103150844574, + "learning_rate": 1.7798156393792178e-05, + "loss": 2.1202, + "step": 6864 + }, + { + "epoch": 0.23, + "grad_norm": 0.7015447020530701, + "learning_rate": 1.779749096894429e-05, + "loss": 2.1846, + "step": 6865 + }, + { + "epoch": 0.23, + "grad_norm": 0.7004568576812744, + "learning_rate": 1.779682545600492e-05, + "loss": 2.1816, + "step": 6866 + }, + { + "epoch": 0.23, + "grad_norm": 0.7055882811546326, + "learning_rate": 1.7796159854981584e-05, + "loss": 2.1633, + "step": 6867 + }, + { + "epoch": 0.23, + "grad_norm": 0.7181345820426941, + "learning_rate": 1.77954941658818e-05, + "loss": 2.1481, + "step": 6868 + }, + { + "epoch": 0.23, + "grad_norm": 0.7153964638710022, + "learning_rate": 1.7794828388713097e-05, + "loss": 2.1544, + "step": 6869 + }, + { + "epoch": 0.23, + "grad_norm": 0.713890790939331, + "learning_rate": 1.7794162523482986e-05, + "loss": 2.1867, + "step": 6870 + }, + { + "epoch": 0.23, + "grad_norm": 0.6742919087409973, + "learning_rate": 1.7793496570199e-05, + "loss": 2.1635, + "step": 6871 + }, + { + "epoch": 0.23, + "grad_norm": 0.7094627618789673, + "learning_rate": 1.779283052886865e-05, + "loss": 2.1961, + "step": 6872 + }, + { + "epoch": 0.23, + "grad_norm": 0.7124009132385254, + "learning_rate": 1.7792164399499475e-05, + "loss": 2.1982, + "step": 6873 + }, + { + "epoch": 0.23, + "grad_norm": 0.7158564329147339, + "learning_rate": 1.779149818209899e-05, + "loss": 2.2429, + "step": 6874 + }, + { + "epoch": 0.23, + "grad_norm": 0.7279731035232544, + "learning_rate": 1.7790831876674724e-05, + "loss": 2.2099, + "step": 6875 + }, + { + "epoch": 0.23, + "grad_norm": 0.6774821877479553, + "learning_rate": 1.7790165483234206e-05, + "loss": 2.0863, + "step": 6876 + }, + { + "epoch": 0.23, + "grad_norm": 0.6874121427536011, + "learning_rate": 1.7789499001784963e-05, + "loss": 2.1245, + "step": 6877 + }, + { + "epoch": 0.23, + "grad_norm": 0.7278497815132141, + "learning_rate": 1.7788832432334526e-05, + "loss": 2.1851, + "step": 6878 + }, + { + "epoch": 0.23, + "grad_norm": 0.6820404529571533, + "learning_rate": 1.7788165774890427e-05, + "loss": 2.1506, + "step": 6879 + }, + { + "epoch": 0.23, + "grad_norm": 0.6948745846748352, + "learning_rate": 1.7787499029460195e-05, + "loss": 2.1817, + "step": 6880 + }, + { + "epoch": 0.23, + "grad_norm": 0.7137362360954285, + "learning_rate": 1.778683219605136e-05, + "loss": 2.158, + "step": 6881 + }, + { + "epoch": 0.23, + "grad_norm": 0.7012460827827454, + "learning_rate": 1.778616527467146e-05, + "loss": 2.2094, + "step": 6882 + }, + { + "epoch": 0.23, + "grad_norm": 0.7243384718894958, + "learning_rate": 1.7785498265328028e-05, + "loss": 2.1578, + "step": 6883 + }, + { + "epoch": 0.23, + "grad_norm": 0.7307226657867432, + "learning_rate": 1.77848311680286e-05, + "loss": 2.2498, + "step": 6884 + }, + { + "epoch": 0.23, + "grad_norm": 0.695855438709259, + "learning_rate": 1.778416398278071e-05, + "loss": 2.2159, + "step": 6885 + }, + { + "epoch": 0.23, + "grad_norm": 0.7213189005851746, + "learning_rate": 1.7783496709591896e-05, + "loss": 2.1378, + "step": 6886 + }, + { + "epoch": 0.23, + "grad_norm": 0.7287018299102783, + "learning_rate": 1.77828293484697e-05, + "loss": 2.162, + "step": 6887 + }, + { + "epoch": 0.23, + "grad_norm": 0.6844534277915955, + "learning_rate": 1.778216189942166e-05, + "loss": 2.187, + "step": 6888 + }, + { + "epoch": 0.23, + "grad_norm": 0.7013359069824219, + "learning_rate": 1.7781494362455315e-05, + "loss": 2.1443, + "step": 6889 + }, + { + "epoch": 0.23, + "grad_norm": 0.6730186939239502, + "learning_rate": 1.7780826737578207e-05, + "loss": 2.1743, + "step": 6890 + }, + { + "epoch": 0.23, + "grad_norm": 0.7261166572570801, + "learning_rate": 1.778015902479788e-05, + "loss": 2.1706, + "step": 6891 + }, + { + "epoch": 0.23, + "grad_norm": 0.7098137736320496, + "learning_rate": 1.7779491224121875e-05, + "loss": 2.1456, + "step": 6892 + }, + { + "epoch": 0.23, + "grad_norm": 0.7070254683494568, + "learning_rate": 1.7778823335557736e-05, + "loss": 2.2083, + "step": 6893 + }, + { + "epoch": 0.23, + "grad_norm": 0.7378586530685425, + "learning_rate": 1.7778155359113014e-05, + "loss": 2.2822, + "step": 6894 + }, + { + "epoch": 0.23, + "grad_norm": 0.7364852428436279, + "learning_rate": 1.7777487294795243e-05, + "loss": 2.1838, + "step": 6895 + }, + { + "epoch": 0.23, + "grad_norm": 0.7221633195877075, + "learning_rate": 1.7776819142611984e-05, + "loss": 2.1188, + "step": 6896 + }, + { + "epoch": 0.23, + "grad_norm": 0.6738303899765015, + "learning_rate": 1.7776150902570778e-05, + "loss": 2.1318, + "step": 6897 + }, + { + "epoch": 0.23, + "grad_norm": 0.7121299505233765, + "learning_rate": 1.777548257467918e-05, + "loss": 2.1177, + "step": 6898 + }, + { + "epoch": 0.23, + "grad_norm": 0.6904041171073914, + "learning_rate": 1.7774814158944736e-05, + "loss": 2.1602, + "step": 6899 + }, + { + "epoch": 0.23, + "grad_norm": 0.667462170124054, + "learning_rate": 1.7774145655374995e-05, + "loss": 2.1384, + "step": 6900 + }, + { + "epoch": 0.23, + "grad_norm": 0.6945261359214783, + "learning_rate": 1.7773477063977512e-05, + "loss": 2.1315, + "step": 6901 + }, + { + "epoch": 0.23, + "grad_norm": 0.7075498104095459, + "learning_rate": 1.7772808384759846e-05, + "loss": 2.1646, + "step": 6902 + }, + { + "epoch": 0.23, + "grad_norm": 0.7124106287956238, + "learning_rate": 1.7772139617729543e-05, + "loss": 2.1501, + "step": 6903 + }, + { + "epoch": 0.23, + "grad_norm": 0.723992109298706, + "learning_rate": 1.777147076289416e-05, + "loss": 2.1225, + "step": 6904 + }, + { + "epoch": 0.23, + "grad_norm": 0.7132938504219055, + "learning_rate": 1.7770801820261257e-05, + "loss": 2.2064, + "step": 6905 + }, + { + "epoch": 0.23, + "grad_norm": 0.7230051159858704, + "learning_rate": 1.777013278983839e-05, + "loss": 2.152, + "step": 6906 + }, + { + "epoch": 0.23, + "grad_norm": 0.692264974117279, + "learning_rate": 1.7769463671633117e-05, + "loss": 2.2115, + "step": 6907 + }, + { + "epoch": 0.23, + "grad_norm": 0.7420234084129333, + "learning_rate": 1.7768794465652994e-05, + "loss": 2.1924, + "step": 6908 + }, + { + "epoch": 0.23, + "grad_norm": 0.7254521250724792, + "learning_rate": 1.7768125171905583e-05, + "loss": 2.1549, + "step": 6909 + }, + { + "epoch": 0.23, + "grad_norm": 0.6803408265113831, + "learning_rate": 1.776745579039845e-05, + "loss": 2.0715, + "step": 6910 + }, + { + "epoch": 0.23, + "grad_norm": 0.718235969543457, + "learning_rate": 1.776678632113915e-05, + "loss": 2.23, + "step": 6911 + }, + { + "epoch": 0.23, + "grad_norm": 0.7095158100128174, + "learning_rate": 1.7766116764135252e-05, + "loss": 2.1275, + "step": 6912 + }, + { + "epoch": 0.23, + "grad_norm": 0.717291533946991, + "learning_rate": 1.7765447119394318e-05, + "loss": 2.1735, + "step": 6913 + }, + { + "epoch": 0.23, + "grad_norm": 0.6823374032974243, + "learning_rate": 1.7764777386923915e-05, + "loss": 2.1684, + "step": 6914 + }, + { + "epoch": 0.23, + "grad_norm": 0.680199146270752, + "learning_rate": 1.7764107566731606e-05, + "loss": 2.1922, + "step": 6915 + }, + { + "epoch": 0.23, + "grad_norm": 0.6936507821083069, + "learning_rate": 1.7763437658824962e-05, + "loss": 2.1487, + "step": 6916 + }, + { + "epoch": 0.23, + "grad_norm": 0.7469543814659119, + "learning_rate": 1.7762767663211546e-05, + "loss": 2.1431, + "step": 6917 + }, + { + "epoch": 0.23, + "grad_norm": 0.6895522475242615, + "learning_rate": 1.7762097579898934e-05, + "loss": 2.1719, + "step": 6918 + }, + { + "epoch": 0.23, + "grad_norm": 0.7110211849212646, + "learning_rate": 1.776142740889469e-05, + "loss": 2.1726, + "step": 6919 + }, + { + "epoch": 0.23, + "grad_norm": 0.689598560333252, + "learning_rate": 1.7760757150206386e-05, + "loss": 2.1256, + "step": 6920 + }, + { + "epoch": 0.23, + "grad_norm": 0.6985383629798889, + "learning_rate": 1.77600868038416e-05, + "loss": 2.1273, + "step": 6921 + }, + { + "epoch": 0.23, + "grad_norm": 0.7095934748649597, + "learning_rate": 1.7759416369807902e-05, + "loss": 2.1946, + "step": 6922 + }, + { + "epoch": 0.23, + "grad_norm": 0.6924101710319519, + "learning_rate": 1.7758745848112856e-05, + "loss": 2.1422, + "step": 6923 + }, + { + "epoch": 0.23, + "grad_norm": 0.7058820724487305, + "learning_rate": 1.7758075238764053e-05, + "loss": 2.1465, + "step": 6924 + }, + { + "epoch": 0.23, + "grad_norm": 0.674541175365448, + "learning_rate": 1.7757404541769064e-05, + "loss": 2.0787, + "step": 6925 + }, + { + "epoch": 0.23, + "grad_norm": 0.7304415106773376, + "learning_rate": 1.7756733757135466e-05, + "loss": 2.2224, + "step": 6926 + }, + { + "epoch": 0.23, + "grad_norm": 0.6868225336074829, + "learning_rate": 1.7756062884870834e-05, + "loss": 2.1023, + "step": 6927 + }, + { + "epoch": 0.23, + "grad_norm": 0.7010774612426758, + "learning_rate": 1.7755391924982745e-05, + "loss": 2.1425, + "step": 6928 + }, + { + "epoch": 0.23, + "grad_norm": 0.6922581195831299, + "learning_rate": 1.7754720877478787e-05, + "loss": 2.1399, + "step": 6929 + }, + { + "epoch": 0.23, + "grad_norm": 0.6922581195831299, + "learning_rate": 1.7754049742366537e-05, + "loss": 2.1483, + "step": 6930 + }, + { + "epoch": 0.23, + "grad_norm": 0.6921782493591309, + "learning_rate": 1.775337851965358e-05, + "loss": 2.1345, + "step": 6931 + }, + { + "epoch": 0.23, + "grad_norm": 0.6929617524147034, + "learning_rate": 1.7752707209347492e-05, + "loss": 2.1222, + "step": 6932 + }, + { + "epoch": 0.23, + "grad_norm": 0.687907874584198, + "learning_rate": 1.7752035811455864e-05, + "loss": 2.1598, + "step": 6933 + }, + { + "epoch": 0.23, + "grad_norm": 0.7117748260498047, + "learning_rate": 1.7751364325986277e-05, + "loss": 2.1598, + "step": 6934 + }, + { + "epoch": 0.23, + "grad_norm": 0.670447051525116, + "learning_rate": 1.775069275294632e-05, + "loss": 2.1551, + "step": 6935 + }, + { + "epoch": 0.23, + "grad_norm": 0.6872610449790955, + "learning_rate": 1.7750021092343578e-05, + "loss": 2.2419, + "step": 6936 + }, + { + "epoch": 0.23, + "grad_norm": 0.7005404829978943, + "learning_rate": 1.7749349344185638e-05, + "loss": 2.198, + "step": 6937 + }, + { + "epoch": 0.23, + "grad_norm": 0.7544254064559937, + "learning_rate": 1.774867750848009e-05, + "loss": 2.2047, + "step": 6938 + }, + { + "epoch": 0.23, + "grad_norm": 0.7147417664527893, + "learning_rate": 1.774800558523453e-05, + "loss": 2.1163, + "step": 6939 + }, + { + "epoch": 0.23, + "grad_norm": 0.6980299353599548, + "learning_rate": 1.774733357445654e-05, + "loss": 2.1198, + "step": 6940 + }, + { + "epoch": 0.23, + "grad_norm": 0.6910920143127441, + "learning_rate": 1.7746661476153714e-05, + "loss": 2.1569, + "step": 6941 + }, + { + "epoch": 0.23, + "grad_norm": 0.6915248036384583, + "learning_rate": 1.774598929033365e-05, + "loss": 2.1883, + "step": 6942 + }, + { + "epoch": 0.23, + "grad_norm": 0.6882959604263306, + "learning_rate": 1.7745317017003937e-05, + "loss": 2.0899, + "step": 6943 + }, + { + "epoch": 0.23, + "grad_norm": 0.6960617303848267, + "learning_rate": 1.7744644656172172e-05, + "loss": 2.1908, + "step": 6944 + }, + { + "epoch": 0.23, + "grad_norm": 0.7447720766067505, + "learning_rate": 1.7743972207845952e-05, + "loss": 2.1812, + "step": 6945 + }, + { + "epoch": 0.23, + "grad_norm": 0.6778199672698975, + "learning_rate": 1.7743299672032868e-05, + "loss": 2.1864, + "step": 6946 + }, + { + "epoch": 0.23, + "grad_norm": 0.6879332661628723, + "learning_rate": 1.7742627048740525e-05, + "loss": 2.1759, + "step": 6947 + }, + { + "epoch": 0.23, + "grad_norm": 0.7041139602661133, + "learning_rate": 1.7741954337976522e-05, + "loss": 2.1894, + "step": 6948 + }, + { + "epoch": 0.23, + "grad_norm": 0.6966969966888428, + "learning_rate": 1.7741281539748453e-05, + "loss": 2.0973, + "step": 6949 + }, + { + "epoch": 0.23, + "grad_norm": 0.6998757123947144, + "learning_rate": 1.774060865406392e-05, + "loss": 2.1341, + "step": 6950 + }, + { + "epoch": 0.23, + "grad_norm": 0.6866270899772644, + "learning_rate": 1.773993568093053e-05, + "loss": 2.1833, + "step": 6951 + }, + { + "epoch": 0.23, + "grad_norm": 0.7264165878295898, + "learning_rate": 1.7739262620355883e-05, + "loss": 2.1901, + "step": 6952 + }, + { + "epoch": 0.23, + "grad_norm": 0.7377843856811523, + "learning_rate": 1.773858947234758e-05, + "loss": 2.2135, + "step": 6953 + }, + { + "epoch": 0.23, + "grad_norm": 0.7093053460121155, + "learning_rate": 1.7737916236913234e-05, + "loss": 2.1458, + "step": 6954 + }, + { + "epoch": 0.23, + "grad_norm": 0.7045026421546936, + "learning_rate": 1.7737242914060438e-05, + "loss": 2.169, + "step": 6955 + }, + { + "epoch": 0.23, + "grad_norm": 0.7239832282066345, + "learning_rate": 1.773656950379681e-05, + "loss": 2.1406, + "step": 6956 + }, + { + "epoch": 0.23, + "grad_norm": 0.7197915315628052, + "learning_rate": 1.7735896006129953e-05, + "loss": 2.209, + "step": 6957 + }, + { + "epoch": 0.23, + "grad_norm": 0.7131180763244629, + "learning_rate": 1.7735222421067474e-05, + "loss": 2.1907, + "step": 6958 + }, + { + "epoch": 0.23, + "grad_norm": 0.6831712126731873, + "learning_rate": 1.773454874861699e-05, + "loss": 2.2196, + "step": 6959 + }, + { + "epoch": 0.23, + "grad_norm": 0.7192733287811279, + "learning_rate": 1.7733874988786105e-05, + "loss": 2.2355, + "step": 6960 + }, + { + "epoch": 0.23, + "grad_norm": 0.7124232053756714, + "learning_rate": 1.7733201141582436e-05, + "loss": 2.1509, + "step": 6961 + }, + { + "epoch": 0.23, + "grad_norm": 0.6945650577545166, + "learning_rate": 1.773252720701359e-05, + "loss": 2.1304, + "step": 6962 + }, + { + "epoch": 0.23, + "grad_norm": 0.7014550566673279, + "learning_rate": 1.773185318508718e-05, + "loss": 2.1409, + "step": 6963 + }, + { + "epoch": 0.23, + "grad_norm": 0.7208422422409058, + "learning_rate": 1.773117907581083e-05, + "loss": 2.1306, + "step": 6964 + }, + { + "epoch": 0.23, + "grad_norm": 0.7048414349555969, + "learning_rate": 1.7730504879192146e-05, + "loss": 2.1794, + "step": 6965 + }, + { + "epoch": 0.23, + "grad_norm": 0.6916300654411316, + "learning_rate": 1.772983059523875e-05, + "loss": 2.1734, + "step": 6966 + }, + { + "epoch": 0.23, + "grad_norm": 0.7062911987304688, + "learning_rate": 1.7729156223958255e-05, + "loss": 2.0856, + "step": 6967 + }, + { + "epoch": 0.23, + "grad_norm": 0.7180600166320801, + "learning_rate": 1.7728481765358286e-05, + "loss": 2.2398, + "step": 6968 + }, + { + "epoch": 0.23, + "grad_norm": 0.6934996843338013, + "learning_rate": 1.7727807219446456e-05, + "loss": 2.2006, + "step": 6969 + }, + { + "epoch": 0.23, + "grad_norm": 0.7503741383552551, + "learning_rate": 1.772713258623039e-05, + "loss": 2.1749, + "step": 6970 + }, + { + "epoch": 0.23, + "grad_norm": 0.717060387134552, + "learning_rate": 1.772645786571771e-05, + "loss": 2.1679, + "step": 6971 + }, + { + "epoch": 0.23, + "grad_norm": 0.7326761484146118, + "learning_rate": 1.772578305791604e-05, + "loss": 2.1056, + "step": 6972 + }, + { + "epoch": 0.23, + "grad_norm": 0.6984583139419556, + "learning_rate": 1.7725108162832996e-05, + "loss": 2.1224, + "step": 6973 + }, + { + "epoch": 0.23, + "grad_norm": 0.6965352296829224, + "learning_rate": 1.7724433180476212e-05, + "loss": 2.1575, + "step": 6974 + }, + { + "epoch": 0.23, + "grad_norm": 0.6931893825531006, + "learning_rate": 1.7723758110853306e-05, + "loss": 2.1079, + "step": 6975 + }, + { + "epoch": 0.23, + "grad_norm": 0.732840359210968, + "learning_rate": 1.7723082953971908e-05, + "loss": 2.1613, + "step": 6976 + }, + { + "epoch": 0.23, + "grad_norm": 0.6816244721412659, + "learning_rate": 1.772240770983964e-05, + "loss": 2.1744, + "step": 6977 + }, + { + "epoch": 0.23, + "grad_norm": 0.6839824914932251, + "learning_rate": 1.7721732378464144e-05, + "loss": 2.1294, + "step": 6978 + }, + { + "epoch": 0.23, + "grad_norm": 0.7114626169204712, + "learning_rate": 1.7721056959853035e-05, + "loss": 2.2076, + "step": 6979 + }, + { + "epoch": 0.23, + "grad_norm": 0.7165341973304749, + "learning_rate": 1.772038145401395e-05, + "loss": 2.184, + "step": 6980 + }, + { + "epoch": 0.23, + "grad_norm": 0.7392146587371826, + "learning_rate": 1.7719705860954523e-05, + "loss": 2.2029, + "step": 6981 + }, + { + "epoch": 0.23, + "grad_norm": 0.7326892614364624, + "learning_rate": 1.7719030180682383e-05, + "loss": 2.1645, + "step": 6982 + }, + { + "epoch": 0.23, + "grad_norm": 0.6810978055000305, + "learning_rate": 1.7718354413205163e-05, + "loss": 2.1694, + "step": 6983 + }, + { + "epoch": 0.23, + "grad_norm": 0.7064249515533447, + "learning_rate": 1.7717678558530494e-05, + "loss": 2.2307, + "step": 6984 + }, + { + "epoch": 0.23, + "grad_norm": 0.6877684593200684, + "learning_rate": 1.771700261666602e-05, + "loss": 2.095, + "step": 6985 + }, + { + "epoch": 0.23, + "grad_norm": 0.7236197590827942, + "learning_rate": 1.771632658761937e-05, + "loss": 2.1905, + "step": 6986 + }, + { + "epoch": 0.23, + "grad_norm": 0.6959729790687561, + "learning_rate": 1.7715650471398186e-05, + "loss": 2.1637, + "step": 6987 + }, + { + "epoch": 0.23, + "grad_norm": 0.6859177350997925, + "learning_rate": 1.771497426801011e-05, + "loss": 2.1751, + "step": 6988 + }, + { + "epoch": 0.23, + "grad_norm": 0.705310583114624, + "learning_rate": 1.771429797746277e-05, + "loss": 2.2228, + "step": 6989 + }, + { + "epoch": 0.23, + "grad_norm": 0.6920613050460815, + "learning_rate": 1.7713621599763816e-05, + "loss": 2.1959, + "step": 6990 + }, + { + "epoch": 0.23, + "grad_norm": 0.6908178329467773, + "learning_rate": 1.7712945134920884e-05, + "loss": 2.1529, + "step": 6991 + }, + { + "epoch": 0.23, + "grad_norm": 0.7605025172233582, + "learning_rate": 1.7712268582941616e-05, + "loss": 2.1404, + "step": 6992 + }, + { + "epoch": 0.23, + "grad_norm": 0.7012733221054077, + "learning_rate": 1.771159194383366e-05, + "loss": 2.1866, + "step": 6993 + }, + { + "epoch": 0.23, + "grad_norm": 0.6853523254394531, + "learning_rate": 1.7710915217604656e-05, + "loss": 2.2352, + "step": 6994 + }, + { + "epoch": 0.23, + "grad_norm": 0.7281190156936646, + "learning_rate": 1.7710238404262252e-05, + "loss": 2.1342, + "step": 6995 + }, + { + "epoch": 0.23, + "grad_norm": 0.6976677775382996, + "learning_rate": 1.7709561503814093e-05, + "loss": 2.2016, + "step": 6996 + }, + { + "epoch": 0.23, + "grad_norm": 0.7000302076339722, + "learning_rate": 1.7708884516267827e-05, + "loss": 2.1113, + "step": 6997 + }, + { + "epoch": 0.23, + "grad_norm": 0.7196448445320129, + "learning_rate": 1.77082074416311e-05, + "loss": 2.1871, + "step": 6998 + }, + { + "epoch": 0.23, + "grad_norm": 0.7371408939361572, + "learning_rate": 1.7707530279911563e-05, + "loss": 2.2159, + "step": 6999 + }, + { + "epoch": 0.23, + "grad_norm": 0.6803138852119446, + "learning_rate": 1.770685303111687e-05, + "loss": 2.21, + "step": 7000 + }, + { + "epoch": 0.23, + "grad_norm": 0.6637128591537476, + "learning_rate": 1.7706175695254663e-05, + "loss": 2.1477, + "step": 7001 + }, + { + "epoch": 0.23, + "grad_norm": 0.6885989904403687, + "learning_rate": 1.77054982723326e-05, + "loss": 2.1902, + "step": 7002 + }, + { + "epoch": 0.23, + "grad_norm": 0.6914072632789612, + "learning_rate": 1.7704820762358338e-05, + "loss": 2.1155, + "step": 7003 + }, + { + "epoch": 0.23, + "grad_norm": 0.6976311802864075, + "learning_rate": 1.7704143165339523e-05, + "loss": 2.1543, + "step": 7004 + }, + { + "epoch": 0.23, + "grad_norm": 0.7220262289047241, + "learning_rate": 1.7703465481283814e-05, + "loss": 2.2428, + "step": 7005 + }, + { + "epoch": 0.23, + "grad_norm": 0.6914054155349731, + "learning_rate": 1.7702787710198865e-05, + "loss": 2.1638, + "step": 7006 + }, + { + "epoch": 0.23, + "grad_norm": 0.7070133686065674, + "learning_rate": 1.7702109852092335e-05, + "loss": 2.1536, + "step": 7007 + }, + { + "epoch": 0.23, + "grad_norm": 0.7063805460929871, + "learning_rate": 1.7701431906971883e-05, + "loss": 2.1977, + "step": 7008 + }, + { + "epoch": 0.23, + "grad_norm": 0.7011902332305908, + "learning_rate": 1.7700753874845165e-05, + "loss": 2.1261, + "step": 7009 + }, + { + "epoch": 0.23, + "grad_norm": 0.6850791573524475, + "learning_rate": 1.7700075755719846e-05, + "loss": 2.1723, + "step": 7010 + }, + { + "epoch": 0.23, + "grad_norm": 0.7156707048416138, + "learning_rate": 1.7699397549603583e-05, + "loss": 2.1862, + "step": 7011 + }, + { + "epoch": 0.23, + "grad_norm": 0.6938273310661316, + "learning_rate": 1.769871925650404e-05, + "loss": 2.1351, + "step": 7012 + }, + { + "epoch": 0.23, + "grad_norm": 0.6962174773216248, + "learning_rate": 1.7698040876428875e-05, + "loss": 2.1195, + "step": 7013 + }, + { + "epoch": 0.23, + "grad_norm": 0.6999425888061523, + "learning_rate": 1.7697362409385755e-05, + "loss": 2.1429, + "step": 7014 + }, + { + "epoch": 0.23, + "grad_norm": 0.729241132736206, + "learning_rate": 1.769668385538235e-05, + "loss": 2.175, + "step": 7015 + }, + { + "epoch": 0.23, + "grad_norm": 0.7249606251716614, + "learning_rate": 1.769600521442632e-05, + "loss": 2.1501, + "step": 7016 + }, + { + "epoch": 0.23, + "grad_norm": 0.7185417413711548, + "learning_rate": 1.769532648652533e-05, + "loss": 2.1781, + "step": 7017 + }, + { + "epoch": 0.23, + "grad_norm": 0.6729242205619812, + "learning_rate": 1.7694647671687058e-05, + "loss": 2.1217, + "step": 7018 + }, + { + "epoch": 0.23, + "grad_norm": 0.7271994948387146, + "learning_rate": 1.7693968769919162e-05, + "loss": 2.1365, + "step": 7019 + }, + { + "epoch": 0.23, + "grad_norm": 0.7043911218643188, + "learning_rate": 1.7693289781229314e-05, + "loss": 2.1888, + "step": 7020 + }, + { + "epoch": 0.23, + "grad_norm": 0.7178796529769897, + "learning_rate": 1.769261070562519e-05, + "loss": 2.1697, + "step": 7021 + }, + { + "epoch": 0.23, + "grad_norm": 0.7304026484489441, + "learning_rate": 1.7691931543114457e-05, + "loss": 2.238, + "step": 7022 + }, + { + "epoch": 0.23, + "grad_norm": 0.7023227214813232, + "learning_rate": 1.769125229370479e-05, + "loss": 2.1849, + "step": 7023 + }, + { + "epoch": 0.23, + "grad_norm": 0.7478675842285156, + "learning_rate": 1.769057295740386e-05, + "loss": 2.217, + "step": 7024 + }, + { + "epoch": 0.23, + "grad_norm": 0.6802921891212463, + "learning_rate": 1.768989353421935e-05, + "loss": 2.0981, + "step": 7025 + }, + { + "epoch": 0.23, + "grad_norm": 0.7544086575508118, + "learning_rate": 1.7689214024158926e-05, + "loss": 2.1034, + "step": 7026 + }, + { + "epoch": 0.23, + "grad_norm": 0.6963983178138733, + "learning_rate": 1.7688534427230264e-05, + "loss": 2.2001, + "step": 7027 + }, + { + "epoch": 0.23, + "grad_norm": 0.6966838240623474, + "learning_rate": 1.768785474344105e-05, + "loss": 2.1898, + "step": 7028 + }, + { + "epoch": 0.23, + "grad_norm": 0.7023069858551025, + "learning_rate": 1.7687174972798957e-05, + "loss": 2.18, + "step": 7029 + }, + { + "epoch": 0.23, + "grad_norm": 0.6893876194953918, + "learning_rate": 1.7686495115311668e-05, + "loss": 2.1816, + "step": 7030 + }, + { + "epoch": 0.23, + "grad_norm": 0.73393714427948, + "learning_rate": 1.7685815170986862e-05, + "loss": 2.1925, + "step": 7031 + }, + { + "epoch": 0.23, + "grad_norm": 0.696412205696106, + "learning_rate": 1.7685135139832217e-05, + "loss": 2.1382, + "step": 7032 + }, + { + "epoch": 0.23, + "grad_norm": 0.7571011185646057, + "learning_rate": 1.7684455021855425e-05, + "loss": 2.1897, + "step": 7033 + }, + { + "epoch": 0.23, + "grad_norm": 0.7051617503166199, + "learning_rate": 1.768377481706416e-05, + "loss": 2.1441, + "step": 7034 + }, + { + "epoch": 0.23, + "grad_norm": 0.6864407062530518, + "learning_rate": 1.7683094525466115e-05, + "loss": 2.1558, + "step": 7035 + }, + { + "epoch": 0.23, + "grad_norm": 0.7346776127815247, + "learning_rate": 1.7682414147068962e-05, + "loss": 2.1839, + "step": 7036 + }, + { + "epoch": 0.23, + "grad_norm": 0.695116400718689, + "learning_rate": 1.7681733681880405e-05, + "loss": 2.2075, + "step": 7037 + }, + { + "epoch": 0.23, + "grad_norm": 0.6876145601272583, + "learning_rate": 1.7681053129908116e-05, + "loss": 2.211, + "step": 7038 + }, + { + "epoch": 0.23, + "grad_norm": 0.7274297475814819, + "learning_rate": 1.7680372491159794e-05, + "loss": 2.1938, + "step": 7039 + }, + { + "epoch": 0.23, + "grad_norm": 0.7030305862426758, + "learning_rate": 1.7679691765643125e-05, + "loss": 2.1505, + "step": 7040 + }, + { + "epoch": 0.23, + "grad_norm": 0.7753658294677734, + "learning_rate": 1.76790109533658e-05, + "loss": 2.1743, + "step": 7041 + }, + { + "epoch": 0.23, + "grad_norm": 0.7701655030250549, + "learning_rate": 1.7678330054335505e-05, + "loss": 2.2, + "step": 7042 + }, + { + "epoch": 0.23, + "grad_norm": 0.7166990041732788, + "learning_rate": 1.767764906855994e-05, + "loss": 2.1451, + "step": 7043 + }, + { + "epoch": 0.23, + "grad_norm": 0.7324900031089783, + "learning_rate": 1.7676967996046795e-05, + "loss": 2.2178, + "step": 7044 + }, + { + "epoch": 0.23, + "grad_norm": 0.7566375732421875, + "learning_rate": 1.7676286836803768e-05, + "loss": 2.1504, + "step": 7045 + }, + { + "epoch": 0.23, + "grad_norm": 0.7311758399009705, + "learning_rate": 1.7675605590838546e-05, + "loss": 2.1594, + "step": 7046 + }, + { + "epoch": 0.23, + "grad_norm": 0.704200804233551, + "learning_rate": 1.767492425815883e-05, + "loss": 2.1562, + "step": 7047 + }, + { + "epoch": 0.23, + "grad_norm": 0.7691122889518738, + "learning_rate": 1.7674242838772322e-05, + "loss": 2.2035, + "step": 7048 + }, + { + "epoch": 0.23, + "grad_norm": 0.7102630138397217, + "learning_rate": 1.767356133268671e-05, + "loss": 2.2291, + "step": 7049 + }, + { + "epoch": 0.23, + "grad_norm": 0.7495120167732239, + "learning_rate": 1.7672879739909704e-05, + "loss": 2.1761, + "step": 7050 + }, + { + "epoch": 0.23, + "grad_norm": 0.72347491979599, + "learning_rate": 1.7672198060449e-05, + "loss": 2.2156, + "step": 7051 + }, + { + "epoch": 0.23, + "grad_norm": 0.6991457343101501, + "learning_rate": 1.7671516294312296e-05, + "loss": 2.1692, + "step": 7052 + }, + { + "epoch": 0.23, + "grad_norm": 0.7387259006500244, + "learning_rate": 1.76708344415073e-05, + "loss": 2.2244, + "step": 7053 + }, + { + "epoch": 0.23, + "grad_norm": 0.6986570358276367, + "learning_rate": 1.767015250204171e-05, + "loss": 2.1844, + "step": 7054 + }, + { + "epoch": 0.23, + "grad_norm": 0.7126681804656982, + "learning_rate": 1.766947047592323e-05, + "loss": 2.185, + "step": 7055 + }, + { + "epoch": 0.23, + "grad_norm": 0.7179346680641174, + "learning_rate": 1.7668788363159572e-05, + "loss": 2.1564, + "step": 7056 + }, + { + "epoch": 0.23, + "grad_norm": 0.7141900658607483, + "learning_rate": 1.7668106163758432e-05, + "loss": 2.2348, + "step": 7057 + }, + { + "epoch": 0.23, + "grad_norm": 0.7525929808616638, + "learning_rate": 1.766742387772753e-05, + "loss": 2.1186, + "step": 7058 + }, + { + "epoch": 0.23, + "grad_norm": 0.7265985012054443, + "learning_rate": 1.766674150507456e-05, + "loss": 2.1401, + "step": 7059 + }, + { + "epoch": 0.23, + "grad_norm": 0.7377471327781677, + "learning_rate": 1.766605904580724e-05, + "loss": 2.1818, + "step": 7060 + }, + { + "epoch": 0.23, + "grad_norm": 0.7004332542419434, + "learning_rate": 1.766537649993328e-05, + "loss": 2.233, + "step": 7061 + }, + { + "epoch": 0.23, + "grad_norm": 0.7027762532234192, + "learning_rate": 1.766469386746038e-05, + "loss": 2.1524, + "step": 7062 + }, + { + "epoch": 0.23, + "grad_norm": 0.7092044353485107, + "learning_rate": 1.7664011148396268e-05, + "loss": 2.2288, + "step": 7063 + }, + { + "epoch": 0.24, + "grad_norm": 0.7277551293373108, + "learning_rate": 1.766332834274865e-05, + "loss": 2.102, + "step": 7064 + }, + { + "epoch": 0.24, + "grad_norm": 0.7051529288291931, + "learning_rate": 1.7662645450525236e-05, + "loss": 2.1962, + "step": 7065 + }, + { + "epoch": 0.24, + "grad_norm": 0.7289415001869202, + "learning_rate": 1.7661962471733747e-05, + "loss": 2.257, + "step": 7066 + }, + { + "epoch": 0.24, + "grad_norm": 0.7112755179405212, + "learning_rate": 1.7661279406381897e-05, + "loss": 2.1057, + "step": 7067 + }, + { + "epoch": 0.24, + "grad_norm": 0.7327879071235657, + "learning_rate": 1.7660596254477402e-05, + "loss": 2.1188, + "step": 7068 + }, + { + "epoch": 0.24, + "grad_norm": 0.7188847661018372, + "learning_rate": 1.7659913016027977e-05, + "loss": 2.1882, + "step": 7069 + }, + { + "epoch": 0.24, + "grad_norm": 0.6882083415985107, + "learning_rate": 1.7659229691041345e-05, + "loss": 2.1659, + "step": 7070 + }, + { + "epoch": 0.24, + "grad_norm": 0.6994731426239014, + "learning_rate": 1.7658546279525226e-05, + "loss": 2.1826, + "step": 7071 + }, + { + "epoch": 0.24, + "grad_norm": 0.6930410265922546, + "learning_rate": 1.765786278148734e-05, + "loss": 2.104, + "step": 7072 + }, + { + "epoch": 0.24, + "grad_norm": 0.7055414915084839, + "learning_rate": 1.7657179196935404e-05, + "loss": 2.1656, + "step": 7073 + }, + { + "epoch": 0.24, + "grad_norm": 0.7362331748008728, + "learning_rate": 1.7656495525877152e-05, + "loss": 2.1441, + "step": 7074 + }, + { + "epoch": 0.24, + "grad_norm": 0.767522394657135, + "learning_rate": 1.7655811768320298e-05, + "loss": 2.2187, + "step": 7075 + }, + { + "epoch": 0.24, + "grad_norm": 0.7122377753257751, + "learning_rate": 1.7655127924272567e-05, + "loss": 2.1469, + "step": 7076 + }, + { + "epoch": 0.24, + "grad_norm": 0.6900952458381653, + "learning_rate": 1.765444399374169e-05, + "loss": 2.1424, + "step": 7077 + }, + { + "epoch": 0.24, + "grad_norm": 0.7402431964874268, + "learning_rate": 1.7653759976735387e-05, + "loss": 2.1794, + "step": 7078 + }, + { + "epoch": 0.24, + "grad_norm": 0.7040723562240601, + "learning_rate": 1.765307587326139e-05, + "loss": 2.1603, + "step": 7079 + }, + { + "epoch": 0.24, + "grad_norm": 0.6977261900901794, + "learning_rate": 1.7652391683327428e-05, + "loss": 2.2098, + "step": 7080 + }, + { + "epoch": 0.24, + "grad_norm": 0.7347288131713867, + "learning_rate": 1.7651707406941233e-05, + "loss": 2.1827, + "step": 7081 + }, + { + "epoch": 0.24, + "grad_norm": 0.7046722173690796, + "learning_rate": 1.7651023044110525e-05, + "loss": 2.2361, + "step": 7082 + }, + { + "epoch": 0.24, + "grad_norm": 0.6939675807952881, + "learning_rate": 1.765033859484305e-05, + "loss": 2.169, + "step": 7083 + }, + { + "epoch": 0.24, + "grad_norm": 0.7467039227485657, + "learning_rate": 1.7649654059146527e-05, + "loss": 2.1523, + "step": 7084 + }, + { + "epoch": 0.24, + "grad_norm": 0.6875421404838562, + "learning_rate": 1.76489694370287e-05, + "loss": 2.1816, + "step": 7085 + }, + { + "epoch": 0.24, + "grad_norm": 0.6921672821044922, + "learning_rate": 1.7648284728497298e-05, + "loss": 2.1743, + "step": 7086 + }, + { + "epoch": 0.24, + "grad_norm": 0.7377331256866455, + "learning_rate": 1.7647599933560057e-05, + "loss": 2.1664, + "step": 7087 + }, + { + "epoch": 0.24, + "grad_norm": 0.7586964964866638, + "learning_rate": 1.764691505222471e-05, + "loss": 2.1404, + "step": 7088 + }, + { + "epoch": 0.24, + "grad_norm": 0.7179208397865295, + "learning_rate": 1.7646230084499003e-05, + "loss": 2.2189, + "step": 7089 + }, + { + "epoch": 0.24, + "grad_norm": 0.7474113702774048, + "learning_rate": 1.764554503039067e-05, + "loss": 2.2019, + "step": 7090 + }, + { + "epoch": 0.24, + "grad_norm": 0.702309250831604, + "learning_rate": 1.7644859889907447e-05, + "loss": 2.1623, + "step": 7091 + }, + { + "epoch": 0.24, + "grad_norm": 0.6929931640625, + "learning_rate": 1.7644174663057078e-05, + "loss": 2.1661, + "step": 7092 + }, + { + "epoch": 0.24, + "grad_norm": 0.7058249115943909, + "learning_rate": 1.7643489349847306e-05, + "loss": 2.17, + "step": 7093 + }, + { + "epoch": 0.24, + "grad_norm": 0.7427285313606262, + "learning_rate": 1.764280395028587e-05, + "loss": 2.1432, + "step": 7094 + }, + { + "epoch": 0.24, + "grad_norm": 0.6774200201034546, + "learning_rate": 1.7642118464380512e-05, + "loss": 2.138, + "step": 7095 + }, + { + "epoch": 0.24, + "grad_norm": 0.693178117275238, + "learning_rate": 1.7641432892138977e-05, + "loss": 2.1933, + "step": 7096 + }, + { + "epoch": 0.24, + "grad_norm": 0.696986973285675, + "learning_rate": 1.7640747233569014e-05, + "loss": 2.1764, + "step": 7097 + }, + { + "epoch": 0.24, + "grad_norm": 0.6967857480049133, + "learning_rate": 1.764006148867837e-05, + "loss": 2.2353, + "step": 7098 + }, + { + "epoch": 0.24, + "grad_norm": 0.6892018914222717, + "learning_rate": 1.7639375657474784e-05, + "loss": 2.1879, + "step": 7099 + }, + { + "epoch": 0.24, + "grad_norm": 0.7344846725463867, + "learning_rate": 1.7638689739966012e-05, + "loss": 2.1801, + "step": 7100 + }, + { + "epoch": 0.24, + "grad_norm": 0.6911873817443848, + "learning_rate": 1.7638003736159798e-05, + "loss": 2.2166, + "step": 7101 + }, + { + "epoch": 0.24, + "grad_norm": 0.7187475562095642, + "learning_rate": 1.7637317646063895e-05, + "loss": 2.1824, + "step": 7102 + }, + { + "epoch": 0.24, + "grad_norm": 0.7112851738929749, + "learning_rate": 1.7636631469686052e-05, + "loss": 2.1736, + "step": 7103 + }, + { + "epoch": 0.24, + "grad_norm": 0.687077522277832, + "learning_rate": 1.763594520703402e-05, + "loss": 2.1646, + "step": 7104 + }, + { + "epoch": 0.24, + "grad_norm": 0.6993844509124756, + "learning_rate": 1.763525885811556e-05, + "loss": 2.1577, + "step": 7105 + }, + { + "epoch": 0.24, + "grad_norm": 0.6984513998031616, + "learning_rate": 1.7634572422938417e-05, + "loss": 2.1692, + "step": 7106 + }, + { + "epoch": 0.24, + "grad_norm": 0.6940476298332214, + "learning_rate": 1.763388590151035e-05, + "loss": 2.2246, + "step": 7107 + }, + { + "epoch": 0.24, + "grad_norm": 0.712977945804596, + "learning_rate": 1.7633199293839114e-05, + "loss": 2.0875, + "step": 7108 + }, + { + "epoch": 0.24, + "grad_norm": 0.6979058384895325, + "learning_rate": 1.7632512599932464e-05, + "loss": 2.1307, + "step": 7109 + }, + { + "epoch": 0.24, + "grad_norm": 0.7082684636116028, + "learning_rate": 1.763182581979816e-05, + "loss": 2.1423, + "step": 7110 + }, + { + "epoch": 0.24, + "grad_norm": 0.7205988168716431, + "learning_rate": 1.7631138953443964e-05, + "loss": 2.1945, + "step": 7111 + }, + { + "epoch": 0.24, + "grad_norm": 0.7150731086730957, + "learning_rate": 1.7630452000877626e-05, + "loss": 2.2081, + "step": 7112 + }, + { + "epoch": 0.24, + "grad_norm": 0.6804044842720032, + "learning_rate": 1.762976496210692e-05, + "loss": 2.1488, + "step": 7113 + }, + { + "epoch": 0.24, + "grad_norm": 0.750855565071106, + "learning_rate": 1.7629077837139593e-05, + "loss": 2.2993, + "step": 7114 + }, + { + "epoch": 0.24, + "grad_norm": 0.7272837162017822, + "learning_rate": 1.762839062598342e-05, + "loss": 2.1838, + "step": 7115 + }, + { + "epoch": 0.24, + "grad_norm": 0.6976091861724854, + "learning_rate": 1.7627703328646167e-05, + "loss": 2.1181, + "step": 7116 + }, + { + "epoch": 0.24, + "grad_norm": 0.688254177570343, + "learning_rate": 1.7627015945135585e-05, + "loss": 2.1459, + "step": 7117 + }, + { + "epoch": 0.24, + "grad_norm": 0.7192466855049133, + "learning_rate": 1.7626328475459445e-05, + "loss": 2.2415, + "step": 7118 + }, + { + "epoch": 0.24, + "grad_norm": 0.6899108290672302, + "learning_rate": 1.762564091962552e-05, + "loss": 2.1828, + "step": 7119 + }, + { + "epoch": 0.24, + "grad_norm": 0.7197570204734802, + "learning_rate": 1.7624953277641574e-05, + "loss": 2.1719, + "step": 7120 + }, + { + "epoch": 0.24, + "grad_norm": 0.6901236176490784, + "learning_rate": 1.762426554951537e-05, + "loss": 2.1099, + "step": 7121 + }, + { + "epoch": 0.24, + "grad_norm": 0.7143628001213074, + "learning_rate": 1.7623577735254684e-05, + "loss": 2.2154, + "step": 7122 + }, + { + "epoch": 0.24, + "grad_norm": 0.7191237211227417, + "learning_rate": 1.762288983486728e-05, + "loss": 2.1818, + "step": 7123 + }, + { + "epoch": 0.24, + "grad_norm": 0.6905342936515808, + "learning_rate": 1.762220184836094e-05, + "loss": 2.1773, + "step": 7124 + }, + { + "epoch": 0.24, + "grad_norm": 0.6980713605880737, + "learning_rate": 1.762151377574343e-05, + "loss": 2.1639, + "step": 7125 + }, + { + "epoch": 0.24, + "grad_norm": 0.6923877000808716, + "learning_rate": 1.7620825617022523e-05, + "loss": 2.1763, + "step": 7126 + }, + { + "epoch": 0.24, + "grad_norm": 0.7071407437324524, + "learning_rate": 1.7620137372205995e-05, + "loss": 2.2079, + "step": 7127 + }, + { + "epoch": 0.24, + "grad_norm": 0.7185677289962769, + "learning_rate": 1.7619449041301618e-05, + "loss": 2.1967, + "step": 7128 + }, + { + "epoch": 0.24, + "grad_norm": 0.7291254997253418, + "learning_rate": 1.7618760624317172e-05, + "loss": 2.2212, + "step": 7129 + }, + { + "epoch": 0.24, + "grad_norm": 0.7027580738067627, + "learning_rate": 1.7618072121260437e-05, + "loss": 2.1419, + "step": 7130 + }, + { + "epoch": 0.24, + "grad_norm": 0.6847134232521057, + "learning_rate": 1.7617383532139185e-05, + "loss": 2.1927, + "step": 7131 + }, + { + "epoch": 0.24, + "grad_norm": 0.707868754863739, + "learning_rate": 1.7616694856961197e-05, + "loss": 2.1189, + "step": 7132 + }, + { + "epoch": 0.24, + "grad_norm": 0.6700188517570496, + "learning_rate": 1.7616006095734257e-05, + "loss": 2.1076, + "step": 7133 + }, + { + "epoch": 0.24, + "grad_norm": 0.6947464346885681, + "learning_rate": 1.7615317248466145e-05, + "loss": 2.1699, + "step": 7134 + }, + { + "epoch": 0.24, + "grad_norm": 0.7023497819900513, + "learning_rate": 1.7614628315164638e-05, + "loss": 2.2165, + "step": 7135 + }, + { + "epoch": 0.24, + "grad_norm": 0.724716067314148, + "learning_rate": 1.7613939295837523e-05, + "loss": 2.2748, + "step": 7136 + }, + { + "epoch": 0.24, + "grad_norm": 0.6894080638885498, + "learning_rate": 1.7613250190492586e-05, + "loss": 2.1114, + "step": 7137 + }, + { + "epoch": 0.24, + "grad_norm": 0.7019832134246826, + "learning_rate": 1.761256099913761e-05, + "loss": 2.1725, + "step": 7138 + }, + { + "epoch": 0.24, + "grad_norm": 0.7702217698097229, + "learning_rate": 1.7611871721780383e-05, + "loss": 2.1482, + "step": 7139 + }, + { + "epoch": 0.24, + "grad_norm": 0.6937247514724731, + "learning_rate": 1.7611182358428686e-05, + "loss": 2.099, + "step": 7140 + }, + { + "epoch": 0.24, + "grad_norm": 0.7083534598350525, + "learning_rate": 1.7610492909090313e-05, + "loss": 2.1761, + "step": 7141 + }, + { + "epoch": 0.24, + "grad_norm": 0.733631432056427, + "learning_rate": 1.760980337377305e-05, + "loss": 2.2098, + "step": 7142 + }, + { + "epoch": 0.24, + "grad_norm": 0.6954620480537415, + "learning_rate": 1.760911375248469e-05, + "loss": 2.1378, + "step": 7143 + }, + { + "epoch": 0.24, + "grad_norm": 0.7171097993850708, + "learning_rate": 1.7608424045233024e-05, + "loss": 2.2137, + "step": 7144 + }, + { + "epoch": 0.24, + "grad_norm": 0.7045809626579285, + "learning_rate": 1.7607734252025838e-05, + "loss": 2.1465, + "step": 7145 + }, + { + "epoch": 0.24, + "grad_norm": 0.6942517757415771, + "learning_rate": 1.7607044372870933e-05, + "loss": 2.1518, + "step": 7146 + }, + { + "epoch": 0.24, + "grad_norm": 0.7139645218849182, + "learning_rate": 1.7606354407776096e-05, + "loss": 2.1122, + "step": 7147 + }, + { + "epoch": 0.24, + "grad_norm": 0.7286125421524048, + "learning_rate": 1.7605664356749127e-05, + "loss": 2.1573, + "step": 7148 + }, + { + "epoch": 0.24, + "grad_norm": 0.7075738906860352, + "learning_rate": 1.760497421979782e-05, + "loss": 2.1792, + "step": 7149 + }, + { + "epoch": 0.24, + "grad_norm": 0.6882444620132446, + "learning_rate": 1.7604283996929966e-05, + "loss": 2.1807, + "step": 7150 + }, + { + "epoch": 0.24, + "grad_norm": 0.7576085925102234, + "learning_rate": 1.7603593688153374e-05, + "loss": 2.1999, + "step": 7151 + }, + { + "epoch": 0.24, + "grad_norm": 0.6881242394447327, + "learning_rate": 1.760290329347583e-05, + "loss": 2.2123, + "step": 7152 + }, + { + "epoch": 0.24, + "grad_norm": 0.7222576141357422, + "learning_rate": 1.7602212812905145e-05, + "loss": 2.2091, + "step": 7153 + }, + { + "epoch": 0.24, + "grad_norm": 0.7324318289756775, + "learning_rate": 1.7601522246449116e-05, + "loss": 2.1703, + "step": 7154 + }, + { + "epoch": 0.24, + "grad_norm": 0.7193495035171509, + "learning_rate": 1.760083159411554e-05, + "loss": 2.2076, + "step": 7155 + }, + { + "epoch": 0.24, + "grad_norm": 0.7113542556762695, + "learning_rate": 1.7600140855912228e-05, + "loss": 2.139, + "step": 7156 + }, + { + "epoch": 0.24, + "grad_norm": 0.7306573987007141, + "learning_rate": 1.7599450031846975e-05, + "loss": 2.1168, + "step": 7157 + }, + { + "epoch": 0.24, + "grad_norm": 0.7024623155593872, + "learning_rate": 1.759875912192759e-05, + "loss": 2.2138, + "step": 7158 + }, + { + "epoch": 0.24, + "grad_norm": 0.7339100241661072, + "learning_rate": 1.7598068126161877e-05, + "loss": 2.1328, + "step": 7159 + }, + { + "epoch": 0.24, + "grad_norm": 0.7010151147842407, + "learning_rate": 1.7597377044557645e-05, + "loss": 2.0788, + "step": 7160 + }, + { + "epoch": 0.24, + "grad_norm": 0.6988605856895447, + "learning_rate": 1.7596685877122698e-05, + "loss": 2.123, + "step": 7161 + }, + { + "epoch": 0.24, + "grad_norm": 0.723537266254425, + "learning_rate": 1.7595994623864844e-05, + "loss": 2.1786, + "step": 7162 + }, + { + "epoch": 0.24, + "grad_norm": 0.7330615520477295, + "learning_rate": 1.7595303284791897e-05, + "loss": 2.1655, + "step": 7163 + }, + { + "epoch": 0.24, + "grad_norm": 0.7243181467056274, + "learning_rate": 1.7594611859911666e-05, + "loss": 2.1281, + "step": 7164 + }, + { + "epoch": 0.24, + "grad_norm": 0.7109289765357971, + "learning_rate": 1.759392034923196e-05, + "loss": 2.1622, + "step": 7165 + }, + { + "epoch": 0.24, + "grad_norm": 0.7568494081497192, + "learning_rate": 1.7593228752760595e-05, + "loss": 2.1907, + "step": 7166 + }, + { + "epoch": 0.24, + "grad_norm": 0.745379626750946, + "learning_rate": 1.7592537070505375e-05, + "loss": 2.1535, + "step": 7167 + }, + { + "epoch": 0.24, + "grad_norm": 0.7044739127159119, + "learning_rate": 1.7591845302474127e-05, + "loss": 2.0973, + "step": 7168 + }, + { + "epoch": 0.24, + "grad_norm": 0.7193352580070496, + "learning_rate": 1.7591153448674657e-05, + "loss": 2.183, + "step": 7169 + }, + { + "epoch": 0.24, + "grad_norm": 0.7042730450630188, + "learning_rate": 1.7590461509114784e-05, + "loss": 2.1836, + "step": 7170 + }, + { + "epoch": 0.24, + "grad_norm": 0.7369201183319092, + "learning_rate": 1.7589769483802327e-05, + "loss": 2.2254, + "step": 7171 + }, + { + "epoch": 0.24, + "grad_norm": 0.6803762316703796, + "learning_rate": 1.7589077372745103e-05, + "loss": 2.1385, + "step": 7172 + }, + { + "epoch": 0.24, + "grad_norm": 0.6886398792266846, + "learning_rate": 1.7588385175950927e-05, + "loss": 2.1688, + "step": 7173 + }, + { + "epoch": 0.24, + "grad_norm": 0.7159465551376343, + "learning_rate": 1.7587692893427624e-05, + "loss": 2.0772, + "step": 7174 + }, + { + "epoch": 0.24, + "grad_norm": 0.7281029224395752, + "learning_rate": 1.7587000525183015e-05, + "loss": 2.1459, + "step": 7175 + }, + { + "epoch": 0.24, + "grad_norm": 0.6926736831665039, + "learning_rate": 1.758630807122492e-05, + "loss": 2.1492, + "step": 7176 + }, + { + "epoch": 0.24, + "grad_norm": 0.7265029549598694, + "learning_rate": 1.758561553156116e-05, + "loss": 2.2149, + "step": 7177 + }, + { + "epoch": 0.24, + "grad_norm": 0.6997843980789185, + "learning_rate": 1.7584922906199566e-05, + "loss": 2.1439, + "step": 7178 + }, + { + "epoch": 0.24, + "grad_norm": 0.7008737921714783, + "learning_rate": 1.7584230195147957e-05, + "loss": 2.1605, + "step": 7179 + }, + { + "epoch": 0.24, + "grad_norm": 0.7115992903709412, + "learning_rate": 1.7583537398414157e-05, + "loss": 2.2262, + "step": 7180 + }, + { + "epoch": 0.24, + "grad_norm": 0.6865088939666748, + "learning_rate": 1.7582844516005998e-05, + "loss": 2.1604, + "step": 7181 + }, + { + "epoch": 0.24, + "grad_norm": 0.7284027338027954, + "learning_rate": 1.7582151547931307e-05, + "loss": 2.2171, + "step": 7182 + }, + { + "epoch": 0.24, + "grad_norm": 0.7269845604896545, + "learning_rate": 1.758145849419791e-05, + "loss": 2.1779, + "step": 7183 + }, + { + "epoch": 0.24, + "grad_norm": 0.70087069272995, + "learning_rate": 1.7580765354813635e-05, + "loss": 2.1639, + "step": 7184 + }, + { + "epoch": 0.24, + "grad_norm": 0.7409170269966125, + "learning_rate": 1.758007212978632e-05, + "loss": 2.2228, + "step": 7185 + }, + { + "epoch": 0.24, + "grad_norm": 0.7220888137817383, + "learning_rate": 1.757937881912379e-05, + "loss": 2.0875, + "step": 7186 + }, + { + "epoch": 0.24, + "grad_norm": 0.7421817183494568, + "learning_rate": 1.757868542283388e-05, + "loss": 2.186, + "step": 7187 + }, + { + "epoch": 0.24, + "grad_norm": 0.7095873951911926, + "learning_rate": 1.7577991940924427e-05, + "loss": 2.1767, + "step": 7188 + }, + { + "epoch": 0.24, + "grad_norm": 0.6997811198234558, + "learning_rate": 1.757729837340326e-05, + "loss": 2.1697, + "step": 7189 + }, + { + "epoch": 0.24, + "grad_norm": 0.6928690075874329, + "learning_rate": 1.7576604720278215e-05, + "loss": 2.1204, + "step": 7190 + }, + { + "epoch": 0.24, + "grad_norm": 0.7197240591049194, + "learning_rate": 1.757591098155713e-05, + "loss": 2.1668, + "step": 7191 + }, + { + "epoch": 0.24, + "grad_norm": 0.7520041465759277, + "learning_rate": 1.7575217157247845e-05, + "loss": 2.1643, + "step": 7192 + }, + { + "epoch": 0.24, + "grad_norm": 0.7292707562446594, + "learning_rate": 1.7574523247358193e-05, + "loss": 2.1609, + "step": 7193 + }, + { + "epoch": 0.24, + "grad_norm": 0.7159697413444519, + "learning_rate": 1.757382925189602e-05, + "loss": 2.1064, + "step": 7194 + }, + { + "epoch": 0.24, + "grad_norm": 0.7121797800064087, + "learning_rate": 1.7573135170869163e-05, + "loss": 2.1888, + "step": 7195 + }, + { + "epoch": 0.24, + "grad_norm": 0.6986796259880066, + "learning_rate": 1.7572441004285462e-05, + "loss": 2.1751, + "step": 7196 + }, + { + "epoch": 0.24, + "grad_norm": 0.7070351243019104, + "learning_rate": 1.7571746752152764e-05, + "loss": 2.1809, + "step": 7197 + }, + { + "epoch": 0.24, + "grad_norm": 0.7284947633743286, + "learning_rate": 1.75710524144789e-05, + "loss": 2.174, + "step": 7198 + }, + { + "epoch": 0.24, + "grad_norm": 0.7155860662460327, + "learning_rate": 1.757035799127173e-05, + "loss": 2.2541, + "step": 7199 + }, + { + "epoch": 0.24, + "grad_norm": 0.7115167379379272, + "learning_rate": 1.756966348253909e-05, + "loss": 2.155, + "step": 7200 + }, + { + "epoch": 0.24, + "grad_norm": 0.7302448749542236, + "learning_rate": 1.756896888828883e-05, + "loss": 2.2762, + "step": 7201 + }, + { + "epoch": 0.24, + "grad_norm": 0.7236961722373962, + "learning_rate": 1.7568274208528793e-05, + "loss": 2.1774, + "step": 7202 + }, + { + "epoch": 0.24, + "grad_norm": 0.7028623223304749, + "learning_rate": 1.756757944326683e-05, + "loss": 2.175, + "step": 7203 + }, + { + "epoch": 0.24, + "grad_norm": 0.685914933681488, + "learning_rate": 1.7566884592510787e-05, + "loss": 2.1462, + "step": 7204 + }, + { + "epoch": 0.24, + "grad_norm": 0.712369441986084, + "learning_rate": 1.7566189656268518e-05, + "loss": 2.2266, + "step": 7205 + }, + { + "epoch": 0.24, + "grad_norm": 0.6990700960159302, + "learning_rate": 1.7565494634547875e-05, + "loss": 2.1827, + "step": 7206 + }, + { + "epoch": 0.24, + "grad_norm": 0.7054822444915771, + "learning_rate": 1.7564799527356705e-05, + "loss": 2.1786, + "step": 7207 + }, + { + "epoch": 0.24, + "grad_norm": 0.7042509913444519, + "learning_rate": 1.7564104334702864e-05, + "loss": 2.2569, + "step": 7208 + }, + { + "epoch": 0.24, + "grad_norm": 0.7008150815963745, + "learning_rate": 1.7563409056594208e-05, + "loss": 2.2296, + "step": 7209 + }, + { + "epoch": 0.24, + "grad_norm": 0.7111454010009766, + "learning_rate": 1.7562713693038586e-05, + "loss": 2.1781, + "step": 7210 + }, + { + "epoch": 0.24, + "grad_norm": 0.6818954944610596, + "learning_rate": 1.7562018244043858e-05, + "loss": 2.166, + "step": 7211 + }, + { + "epoch": 0.24, + "grad_norm": 0.7062378525733948, + "learning_rate": 1.756132270961788e-05, + "loss": 2.2324, + "step": 7212 + }, + { + "epoch": 0.24, + "grad_norm": 0.7248069047927856, + "learning_rate": 1.7560627089768507e-05, + "loss": 2.2316, + "step": 7213 + }, + { + "epoch": 0.24, + "grad_norm": 0.7233408689498901, + "learning_rate": 1.75599313845036e-05, + "loss": 2.0961, + "step": 7214 + }, + { + "epoch": 0.24, + "grad_norm": 0.7021569609642029, + "learning_rate": 1.755923559383102e-05, + "loss": 2.1695, + "step": 7215 + }, + { + "epoch": 0.24, + "grad_norm": 0.7020743489265442, + "learning_rate": 1.7558539717758628e-05, + "loss": 2.1291, + "step": 7216 + }, + { + "epoch": 0.24, + "grad_norm": 0.7038241028785706, + "learning_rate": 1.7557843756294283e-05, + "loss": 2.1557, + "step": 7217 + }, + { + "epoch": 0.24, + "grad_norm": 0.7126986384391785, + "learning_rate": 1.755714770944585e-05, + "loss": 2.1401, + "step": 7218 + }, + { + "epoch": 0.24, + "grad_norm": 0.7292580604553223, + "learning_rate": 1.7556451577221186e-05, + "loss": 2.1915, + "step": 7219 + }, + { + "epoch": 0.24, + "grad_norm": 0.7099244594573975, + "learning_rate": 1.7555755359628162e-05, + "loss": 2.1581, + "step": 7220 + }, + { + "epoch": 0.24, + "grad_norm": 0.7247647643089294, + "learning_rate": 1.7555059056674644e-05, + "loss": 2.1343, + "step": 7221 + }, + { + "epoch": 0.24, + "grad_norm": 0.7021731734275818, + "learning_rate": 1.75543626683685e-05, + "loss": 2.1052, + "step": 7222 + }, + { + "epoch": 0.24, + "grad_norm": 0.7235526442527771, + "learning_rate": 1.755366619471759e-05, + "loss": 2.1418, + "step": 7223 + }, + { + "epoch": 0.24, + "grad_norm": 0.6962249279022217, + "learning_rate": 1.7552969635729785e-05, + "loss": 2.2227, + "step": 7224 + }, + { + "epoch": 0.24, + "grad_norm": 0.7144291996955872, + "learning_rate": 1.755227299141296e-05, + "loss": 2.0845, + "step": 7225 + }, + { + "epoch": 0.24, + "grad_norm": 0.7120006680488586, + "learning_rate": 1.7551576261774978e-05, + "loss": 2.1192, + "step": 7226 + }, + { + "epoch": 0.24, + "grad_norm": 0.7338788509368896, + "learning_rate": 1.755087944682371e-05, + "loss": 2.1476, + "step": 7227 + }, + { + "epoch": 0.24, + "grad_norm": 0.7074646353721619, + "learning_rate": 1.7550182546567034e-05, + "loss": 2.1194, + "step": 7228 + }, + { + "epoch": 0.24, + "grad_norm": 0.7230750322341919, + "learning_rate": 1.7549485561012822e-05, + "loss": 2.1866, + "step": 7229 + }, + { + "epoch": 0.24, + "grad_norm": 0.7354404926300049, + "learning_rate": 1.7548788490168945e-05, + "loss": 2.0884, + "step": 7230 + }, + { + "epoch": 0.24, + "grad_norm": 0.719078004360199, + "learning_rate": 1.754809133404328e-05, + "loss": 2.1524, + "step": 7231 + }, + { + "epoch": 0.24, + "grad_norm": 0.6931889057159424, + "learning_rate": 1.7547394092643704e-05, + "loss": 2.1343, + "step": 7232 + }, + { + "epoch": 0.24, + "grad_norm": 0.7164390683174133, + "learning_rate": 1.754669676597809e-05, + "loss": 2.1451, + "step": 7233 + }, + { + "epoch": 0.24, + "grad_norm": 0.714684784412384, + "learning_rate": 1.7545999354054322e-05, + "loss": 2.184, + "step": 7234 + }, + { + "epoch": 0.24, + "grad_norm": 0.7204262018203735, + "learning_rate": 1.7545301856880273e-05, + "loss": 2.1609, + "step": 7235 + }, + { + "epoch": 0.24, + "grad_norm": 0.6923215389251709, + "learning_rate": 1.7544604274463824e-05, + "loss": 2.207, + "step": 7236 + }, + { + "epoch": 0.24, + "grad_norm": 0.6961036920547485, + "learning_rate": 1.754390660681286e-05, + "loss": 2.1285, + "step": 7237 + }, + { + "epoch": 0.24, + "grad_norm": 0.725296676158905, + "learning_rate": 1.754320885393526e-05, + "loss": 2.1073, + "step": 7238 + }, + { + "epoch": 0.24, + "grad_norm": 0.7500540018081665, + "learning_rate": 1.7542511015838907e-05, + "loss": 2.1075, + "step": 7239 + }, + { + "epoch": 0.24, + "grad_norm": 0.7047463059425354, + "learning_rate": 1.7541813092531686e-05, + "loss": 2.1389, + "step": 7240 + }, + { + "epoch": 0.24, + "grad_norm": 0.7001760601997375, + "learning_rate": 1.7541115084021482e-05, + "loss": 2.1657, + "step": 7241 + }, + { + "epoch": 0.24, + "grad_norm": 0.7066453099250793, + "learning_rate": 1.7540416990316176e-05, + "loss": 2.2254, + "step": 7242 + }, + { + "epoch": 0.24, + "grad_norm": 0.7352464199066162, + "learning_rate": 1.753971881142366e-05, + "loss": 2.1193, + "step": 7243 + }, + { + "epoch": 0.24, + "grad_norm": 0.6961249709129333, + "learning_rate": 1.753902054735182e-05, + "loss": 2.1574, + "step": 7244 + }, + { + "epoch": 0.24, + "grad_norm": 0.7194728255271912, + "learning_rate": 1.7538322198108543e-05, + "loss": 2.1432, + "step": 7245 + }, + { + "epoch": 0.24, + "grad_norm": 0.7117722630500793, + "learning_rate": 1.7537623763701716e-05, + "loss": 2.1208, + "step": 7246 + }, + { + "epoch": 0.24, + "grad_norm": 0.7025547027587891, + "learning_rate": 1.753692524413924e-05, + "loss": 2.1954, + "step": 7247 + }, + { + "epoch": 0.24, + "grad_norm": 0.7396434545516968, + "learning_rate": 1.7536226639428995e-05, + "loss": 2.2003, + "step": 7248 + }, + { + "epoch": 0.24, + "grad_norm": 0.6975345611572266, + "learning_rate": 1.7535527949578878e-05, + "loss": 2.1389, + "step": 7249 + }, + { + "epoch": 0.24, + "grad_norm": 0.7192094326019287, + "learning_rate": 1.7534829174596787e-05, + "loss": 2.1633, + "step": 7250 + }, + { + "epoch": 0.24, + "grad_norm": 0.7041571736335754, + "learning_rate": 1.7534130314490605e-05, + "loss": 2.1869, + "step": 7251 + }, + { + "epoch": 0.24, + "grad_norm": 0.6926823258399963, + "learning_rate": 1.753343136926824e-05, + "loss": 2.2033, + "step": 7252 + }, + { + "epoch": 0.24, + "grad_norm": 0.744451105594635, + "learning_rate": 1.753273233893758e-05, + "loss": 2.2377, + "step": 7253 + }, + { + "epoch": 0.24, + "grad_norm": 0.6958133578300476, + "learning_rate": 1.7532033223506527e-05, + "loss": 2.1863, + "step": 7254 + }, + { + "epoch": 0.24, + "grad_norm": 0.7272453904151917, + "learning_rate": 1.7531334022982974e-05, + "loss": 2.1706, + "step": 7255 + }, + { + "epoch": 0.24, + "grad_norm": 0.7132975459098816, + "learning_rate": 1.7530634737374823e-05, + "loss": 2.1284, + "step": 7256 + }, + { + "epoch": 0.24, + "grad_norm": 0.7043978571891785, + "learning_rate": 1.7529935366689974e-05, + "loss": 2.1633, + "step": 7257 + }, + { + "epoch": 0.24, + "grad_norm": 0.6916427612304688, + "learning_rate": 1.752923591093633e-05, + "loss": 2.1578, + "step": 7258 + }, + { + "epoch": 0.24, + "grad_norm": 0.7118101716041565, + "learning_rate": 1.752853637012179e-05, + "loss": 2.1702, + "step": 7259 + }, + { + "epoch": 0.24, + "grad_norm": 0.7304262518882751, + "learning_rate": 1.7527836744254258e-05, + "loss": 2.1953, + "step": 7260 + }, + { + "epoch": 0.24, + "grad_norm": 0.7017860412597656, + "learning_rate": 1.7527137033341637e-05, + "loss": 2.2032, + "step": 7261 + }, + { + "epoch": 0.24, + "grad_norm": 0.6941752433776855, + "learning_rate": 1.752643723739184e-05, + "loss": 2.1451, + "step": 7262 + }, + { + "epoch": 0.24, + "grad_norm": 0.7321357131004333, + "learning_rate": 1.752573735641276e-05, + "loss": 2.1279, + "step": 7263 + }, + { + "epoch": 0.24, + "grad_norm": 0.69637531042099, + "learning_rate": 1.752503739041231e-05, + "loss": 2.0956, + "step": 7264 + }, + { + "epoch": 0.24, + "grad_norm": 0.7315152883529663, + "learning_rate": 1.75243373393984e-05, + "loss": 2.1598, + "step": 7265 + }, + { + "epoch": 0.24, + "grad_norm": 0.6988497376441956, + "learning_rate": 1.7523637203378934e-05, + "loss": 2.1443, + "step": 7266 + }, + { + "epoch": 0.24, + "grad_norm": 0.7008230686187744, + "learning_rate": 1.7522936982361825e-05, + "loss": 2.186, + "step": 7267 + }, + { + "epoch": 0.24, + "grad_norm": 0.7115781903266907, + "learning_rate": 1.7522236676354983e-05, + "loss": 2.1296, + "step": 7268 + }, + { + "epoch": 0.24, + "grad_norm": 0.7292385697364807, + "learning_rate": 1.7521536285366318e-05, + "loss": 2.2256, + "step": 7269 + }, + { + "epoch": 0.24, + "grad_norm": 0.7356202006340027, + "learning_rate": 1.7520835809403745e-05, + "loss": 2.1326, + "step": 7270 + }, + { + "epoch": 0.24, + "grad_norm": 0.7628737092018127, + "learning_rate": 1.7520135248475178e-05, + "loss": 2.1813, + "step": 7271 + }, + { + "epoch": 0.24, + "grad_norm": 0.7206861972808838, + "learning_rate": 1.751943460258853e-05, + "loss": 2.1567, + "step": 7272 + }, + { + "epoch": 0.24, + "grad_norm": 0.7141769528388977, + "learning_rate": 1.7518733871751716e-05, + "loss": 2.1568, + "step": 7273 + }, + { + "epoch": 0.24, + "grad_norm": 0.7164717316627502, + "learning_rate": 1.7518033055972653e-05, + "loss": 2.1305, + "step": 7274 + }, + { + "epoch": 0.24, + "grad_norm": 0.7194958925247192, + "learning_rate": 1.751733215525926e-05, + "loss": 2.1489, + "step": 7275 + }, + { + "epoch": 0.24, + "grad_norm": 0.66965651512146, + "learning_rate": 1.7516631169619455e-05, + "loss": 2.2196, + "step": 7276 + }, + { + "epoch": 0.24, + "grad_norm": 0.7030080556869507, + "learning_rate": 1.7515930099061152e-05, + "loss": 2.141, + "step": 7277 + }, + { + "epoch": 0.24, + "grad_norm": 0.7465642094612122, + "learning_rate": 1.7515228943592275e-05, + "loss": 2.1471, + "step": 7278 + }, + { + "epoch": 0.24, + "grad_norm": 0.7094667553901672, + "learning_rate": 1.751452770322075e-05, + "loss": 2.2516, + "step": 7279 + }, + { + "epoch": 0.24, + "grad_norm": 0.7043116688728333, + "learning_rate": 1.7513826377954493e-05, + "loss": 2.1317, + "step": 7280 + }, + { + "epoch": 0.24, + "grad_norm": 0.7117936015129089, + "learning_rate": 1.751312496780143e-05, + "loss": 2.1652, + "step": 7281 + }, + { + "epoch": 0.24, + "grad_norm": 0.7124249339103699, + "learning_rate": 1.7512423472769483e-05, + "loss": 2.1329, + "step": 7282 + }, + { + "epoch": 0.24, + "grad_norm": 0.7335474491119385, + "learning_rate": 1.751172189286658e-05, + "loss": 2.2121, + "step": 7283 + }, + { + "epoch": 0.24, + "grad_norm": 0.7250624895095825, + "learning_rate": 1.751102022810064e-05, + "loss": 2.2243, + "step": 7284 + }, + { + "epoch": 0.24, + "grad_norm": 0.6745933890342712, + "learning_rate": 1.75103184784796e-05, + "loss": 2.1639, + "step": 7285 + }, + { + "epoch": 0.24, + "grad_norm": 0.711452305316925, + "learning_rate": 1.7509616644011384e-05, + "loss": 2.1893, + "step": 7286 + }, + { + "epoch": 0.24, + "grad_norm": 0.6868210434913635, + "learning_rate": 1.750891472470392e-05, + "loss": 2.1655, + "step": 7287 + }, + { + "epoch": 0.24, + "grad_norm": 0.7069107890129089, + "learning_rate": 1.7508212720565137e-05, + "loss": 2.1877, + "step": 7288 + }, + { + "epoch": 0.24, + "grad_norm": 0.6904305815696716, + "learning_rate": 1.7507510631602967e-05, + "loss": 2.2068, + "step": 7289 + }, + { + "epoch": 0.24, + "grad_norm": 0.732614278793335, + "learning_rate": 1.750680845782534e-05, + "loss": 2.1553, + "step": 7290 + }, + { + "epoch": 0.24, + "grad_norm": 0.6902382969856262, + "learning_rate": 1.7506106199240192e-05, + "loss": 2.1724, + "step": 7291 + }, + { + "epoch": 0.24, + "grad_norm": 0.7217767238616943, + "learning_rate": 1.7505403855855458e-05, + "loss": 2.2679, + "step": 7292 + }, + { + "epoch": 0.24, + "grad_norm": 0.7065476775169373, + "learning_rate": 1.7504701427679066e-05, + "loss": 2.1749, + "step": 7293 + }, + { + "epoch": 0.24, + "grad_norm": 0.7379423379898071, + "learning_rate": 1.750399891471896e-05, + "loss": 2.2309, + "step": 7294 + }, + { + "epoch": 0.24, + "grad_norm": 0.6837379336357117, + "learning_rate": 1.7503296316983064e-05, + "loss": 2.1935, + "step": 7295 + }, + { + "epoch": 0.24, + "grad_norm": 0.6765915751457214, + "learning_rate": 1.750259363447933e-05, + "loss": 2.1636, + "step": 7296 + }, + { + "epoch": 0.24, + "grad_norm": 0.7495899796485901, + "learning_rate": 1.750189086721569e-05, + "loss": 2.1608, + "step": 7297 + }, + { + "epoch": 0.24, + "grad_norm": 0.6841633915901184, + "learning_rate": 1.7501188015200082e-05, + "loss": 2.1792, + "step": 7298 + }, + { + "epoch": 0.24, + "grad_norm": 0.6756229996681213, + "learning_rate": 1.7500485078440447e-05, + "loss": 2.114, + "step": 7299 + }, + { + "epoch": 0.24, + "grad_norm": 0.6933177709579468, + "learning_rate": 1.7499782056944726e-05, + "loss": 2.1558, + "step": 7300 + }, + { + "epoch": 0.24, + "grad_norm": 0.7170290350914001, + "learning_rate": 1.7499078950720866e-05, + "loss": 2.1121, + "step": 7301 + }, + { + "epoch": 0.24, + "grad_norm": 0.7026599645614624, + "learning_rate": 1.7498375759776807e-05, + "loss": 2.1191, + "step": 7302 + }, + { + "epoch": 0.24, + "grad_norm": 0.6902745962142944, + "learning_rate": 1.7497672484120492e-05, + "loss": 2.1091, + "step": 7303 + }, + { + "epoch": 0.24, + "grad_norm": 0.6959500312805176, + "learning_rate": 1.7496969123759866e-05, + "loss": 2.1364, + "step": 7304 + }, + { + "epoch": 0.24, + "grad_norm": 0.7102234363555908, + "learning_rate": 1.749626567870288e-05, + "loss": 2.1135, + "step": 7305 + }, + { + "epoch": 0.24, + "grad_norm": 0.757964551448822, + "learning_rate": 1.749556214895747e-05, + "loss": 2.1173, + "step": 7306 + }, + { + "epoch": 0.24, + "grad_norm": 0.7235650420188904, + "learning_rate": 1.74948585345316e-05, + "loss": 2.1222, + "step": 7307 + }, + { + "epoch": 0.24, + "grad_norm": 0.6940234899520874, + "learning_rate": 1.7494154835433207e-05, + "loss": 2.1321, + "step": 7308 + }, + { + "epoch": 0.24, + "grad_norm": 0.6974302530288696, + "learning_rate": 1.7493451051670244e-05, + "loss": 2.1214, + "step": 7309 + }, + { + "epoch": 0.24, + "grad_norm": 0.7239911556243896, + "learning_rate": 1.7492747183250663e-05, + "loss": 2.0629, + "step": 7310 + }, + { + "epoch": 0.24, + "grad_norm": 0.7024223804473877, + "learning_rate": 1.749204323018242e-05, + "loss": 2.185, + "step": 7311 + }, + { + "epoch": 0.24, + "grad_norm": 0.7538290619850159, + "learning_rate": 1.749133919247346e-05, + "loss": 2.1747, + "step": 7312 + }, + { + "epoch": 0.24, + "grad_norm": 0.7445806264877319, + "learning_rate": 1.749063507013174e-05, + "loss": 2.1127, + "step": 7313 + }, + { + "epoch": 0.24, + "grad_norm": 0.688978374004364, + "learning_rate": 1.7489930863165214e-05, + "loss": 1.9789, + "step": 7314 + }, + { + "epoch": 0.24, + "grad_norm": 0.7351188659667969, + "learning_rate": 1.7489226571581837e-05, + "loss": 2.1352, + "step": 7315 + }, + { + "epoch": 0.24, + "grad_norm": 0.6995475888252258, + "learning_rate": 1.7488522195389573e-05, + "loss": 2.145, + "step": 7316 + }, + { + "epoch": 0.24, + "grad_norm": 0.7234391570091248, + "learning_rate": 1.748781773459637e-05, + "loss": 2.1589, + "step": 7317 + }, + { + "epoch": 0.24, + "grad_norm": 0.7088303565979004, + "learning_rate": 1.7487113189210192e-05, + "loss": 2.1685, + "step": 7318 + }, + { + "epoch": 0.24, + "grad_norm": 0.7406907081604004, + "learning_rate": 1.7486408559238993e-05, + "loss": 2.1189, + "step": 7319 + }, + { + "epoch": 0.24, + "grad_norm": 0.7396381497383118, + "learning_rate": 1.7485703844690744e-05, + "loss": 2.1539, + "step": 7320 + }, + { + "epoch": 0.24, + "grad_norm": 0.732856810092926, + "learning_rate": 1.7484999045573395e-05, + "loss": 2.2015, + "step": 7321 + }, + { + "epoch": 0.24, + "grad_norm": 0.7239986062049866, + "learning_rate": 1.7484294161894916e-05, + "loss": 2.1977, + "step": 7322 + }, + { + "epoch": 0.24, + "grad_norm": 0.6993224620819092, + "learning_rate": 1.7483589193663268e-05, + "loss": 2.1003, + "step": 7323 + }, + { + "epoch": 0.24, + "grad_norm": 0.6834139227867126, + "learning_rate": 1.7482884140886412e-05, + "loss": 2.1539, + "step": 7324 + }, + { + "epoch": 0.24, + "grad_norm": 0.7545121908187866, + "learning_rate": 1.748217900357232e-05, + "loss": 2.1639, + "step": 7325 + }, + { + "epoch": 0.24, + "grad_norm": 0.6862412691116333, + "learning_rate": 1.748147378172895e-05, + "loss": 2.1949, + "step": 7326 + }, + { + "epoch": 0.24, + "grad_norm": 0.6971762180328369, + "learning_rate": 1.748076847536428e-05, + "loss": 2.162, + "step": 7327 + }, + { + "epoch": 0.24, + "grad_norm": 0.7125928401947021, + "learning_rate": 1.748006308448627e-05, + "loss": 2.1504, + "step": 7328 + }, + { + "epoch": 0.24, + "grad_norm": 0.7318286299705505, + "learning_rate": 1.7479357609102893e-05, + "loss": 2.2281, + "step": 7329 + }, + { + "epoch": 0.24, + "grad_norm": 0.7059075832366943, + "learning_rate": 1.747865204922211e-05, + "loss": 2.088, + "step": 7330 + }, + { + "epoch": 0.24, + "grad_norm": 0.6982176303863525, + "learning_rate": 1.7477946404851903e-05, + "loss": 2.1215, + "step": 7331 + }, + { + "epoch": 0.24, + "grad_norm": 0.7462006211280823, + "learning_rate": 1.7477240676000243e-05, + "loss": 2.1416, + "step": 7332 + }, + { + "epoch": 0.24, + "grad_norm": 0.7491143345832825, + "learning_rate": 1.7476534862675095e-05, + "loss": 2.1555, + "step": 7333 + }, + { + "epoch": 0.24, + "grad_norm": 0.6999737620353699, + "learning_rate": 1.747582896488444e-05, + "loss": 2.1447, + "step": 7334 + }, + { + "epoch": 0.24, + "grad_norm": 0.7268891334533691, + "learning_rate": 1.747512298263625e-05, + "loss": 2.165, + "step": 7335 + }, + { + "epoch": 0.24, + "grad_norm": 0.7181307077407837, + "learning_rate": 1.7474416915938502e-05, + "loss": 2.1499, + "step": 7336 + }, + { + "epoch": 0.24, + "grad_norm": 0.6906822919845581, + "learning_rate": 1.7473710764799173e-05, + "loss": 2.1951, + "step": 7337 + }, + { + "epoch": 0.24, + "grad_norm": 0.7302138209342957, + "learning_rate": 1.7473004529226237e-05, + "loss": 2.1269, + "step": 7338 + }, + { + "epoch": 0.24, + "grad_norm": 0.7196494340896606, + "learning_rate": 1.747229820922768e-05, + "loss": 2.1088, + "step": 7339 + }, + { + "epoch": 0.24, + "grad_norm": 0.7300520539283752, + "learning_rate": 1.747159180481147e-05, + "loss": 2.1777, + "step": 7340 + }, + { + "epoch": 0.24, + "grad_norm": 0.7005968689918518, + "learning_rate": 1.74708853159856e-05, + "loss": 2.1143, + "step": 7341 + }, + { + "epoch": 0.24, + "grad_norm": 0.8301087617874146, + "learning_rate": 1.7470178742758046e-05, + "loss": 2.1046, + "step": 7342 + }, + { + "epoch": 0.24, + "grad_norm": 0.7460008263587952, + "learning_rate": 1.746947208513679e-05, + "loss": 2.1479, + "step": 7343 + }, + { + "epoch": 0.24, + "grad_norm": 0.7298780679702759, + "learning_rate": 1.7468765343129813e-05, + "loss": 2.2079, + "step": 7344 + }, + { + "epoch": 0.24, + "grad_norm": 0.7237032055854797, + "learning_rate": 1.746805851674511e-05, + "loss": 2.2102, + "step": 7345 + }, + { + "epoch": 0.24, + "grad_norm": 0.6874216198921204, + "learning_rate": 1.746735160599065e-05, + "loss": 2.1385, + "step": 7346 + }, + { + "epoch": 0.24, + "grad_norm": 0.7372941970825195, + "learning_rate": 1.746664461087443e-05, + "loss": 2.1788, + "step": 7347 + }, + { + "epoch": 0.24, + "grad_norm": 0.6971107721328735, + "learning_rate": 1.746593753140444e-05, + "loss": 2.1717, + "step": 7348 + }, + { + "epoch": 0.24, + "grad_norm": 0.6987071633338928, + "learning_rate": 1.7465230367588656e-05, + "loss": 2.2039, + "step": 7349 + }, + { + "epoch": 0.24, + "grad_norm": 0.7064196467399597, + "learning_rate": 1.7464523119435076e-05, + "loss": 2.1982, + "step": 7350 + }, + { + "epoch": 0.24, + "grad_norm": 0.7170935273170471, + "learning_rate": 1.7463815786951692e-05, + "loss": 2.1776, + "step": 7351 + }, + { + "epoch": 0.24, + "grad_norm": 0.6694630980491638, + "learning_rate": 1.746310837014649e-05, + "loss": 2.1344, + "step": 7352 + }, + { + "epoch": 0.24, + "grad_norm": 0.6851156949996948, + "learning_rate": 1.7462400869027463e-05, + "loss": 2.1789, + "step": 7353 + }, + { + "epoch": 0.24, + "grad_norm": 0.7190427184104919, + "learning_rate": 1.7461693283602606e-05, + "loss": 2.1707, + "step": 7354 + }, + { + "epoch": 0.24, + "grad_norm": 0.7038695216178894, + "learning_rate": 1.746098561387991e-05, + "loss": 2.2036, + "step": 7355 + }, + { + "epoch": 0.24, + "grad_norm": 0.7284106612205505, + "learning_rate": 1.746027785986737e-05, + "loss": 2.2001, + "step": 7356 + }, + { + "epoch": 0.24, + "grad_norm": 0.722726583480835, + "learning_rate": 1.7459570021572983e-05, + "loss": 2.1712, + "step": 7357 + }, + { + "epoch": 0.24, + "grad_norm": 0.7008733153343201, + "learning_rate": 1.7458862099004744e-05, + "loss": 2.155, + "step": 7358 + }, + { + "epoch": 0.24, + "grad_norm": 0.7053101062774658, + "learning_rate": 1.7458154092170655e-05, + "loss": 2.1325, + "step": 7359 + }, + { + "epoch": 0.24, + "grad_norm": 0.7075342535972595, + "learning_rate": 1.7457446001078712e-05, + "loss": 2.1702, + "step": 7360 + }, + { + "epoch": 0.24, + "grad_norm": 0.730897068977356, + "learning_rate": 1.7456737825736916e-05, + "loss": 2.137, + "step": 7361 + }, + { + "epoch": 0.24, + "grad_norm": 0.7369991540908813, + "learning_rate": 1.7456029566153264e-05, + "loss": 2.092, + "step": 7362 + }, + { + "epoch": 0.24, + "grad_norm": 0.6947185397148132, + "learning_rate": 1.745532122233576e-05, + "loss": 2.1808, + "step": 7363 + }, + { + "epoch": 0.25, + "grad_norm": 0.725644588470459, + "learning_rate": 1.7454612794292404e-05, + "loss": 2.1444, + "step": 7364 + }, + { + "epoch": 0.25, + "grad_norm": 0.7057211399078369, + "learning_rate": 1.7453904282031207e-05, + "loss": 2.2077, + "step": 7365 + }, + { + "epoch": 0.25, + "grad_norm": 0.7329549789428711, + "learning_rate": 1.7453195685560164e-05, + "loss": 2.2167, + "step": 7366 + }, + { + "epoch": 0.25, + "grad_norm": 0.6952570676803589, + "learning_rate": 1.7452487004887282e-05, + "loss": 2.1227, + "step": 7367 + }, + { + "epoch": 0.25, + "grad_norm": 0.6819628477096558, + "learning_rate": 1.7451778240020573e-05, + "loss": 2.1415, + "step": 7368 + }, + { + "epoch": 0.25, + "grad_norm": 0.7332285046577454, + "learning_rate": 1.745106939096804e-05, + "loss": 2.1867, + "step": 7369 + }, + { + "epoch": 0.25, + "grad_norm": 0.7193008661270142, + "learning_rate": 1.745036045773769e-05, + "loss": 2.2043, + "step": 7370 + }, + { + "epoch": 0.25, + "grad_norm": 0.6841698884963989, + "learning_rate": 1.7449651440337538e-05, + "loss": 2.1465, + "step": 7371 + }, + { + "epoch": 0.25, + "grad_norm": 0.7153070569038391, + "learning_rate": 1.7448942338775584e-05, + "loss": 2.127, + "step": 7372 + }, + { + "epoch": 0.25, + "grad_norm": 0.6940542459487915, + "learning_rate": 1.7448233153059847e-05, + "loss": 2.1652, + "step": 7373 + }, + { + "epoch": 0.25, + "grad_norm": 0.702078640460968, + "learning_rate": 1.7447523883198342e-05, + "loss": 2.1799, + "step": 7374 + }, + { + "epoch": 0.25, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.7446814529199072e-05, + "loss": 2.0938, + "step": 7375 + }, + { + "epoch": 0.25, + "grad_norm": 0.6977732181549072, + "learning_rate": 1.7446105091070058e-05, + "loss": 2.1918, + "step": 7376 + }, + { + "epoch": 0.25, + "grad_norm": 0.716506838798523, + "learning_rate": 1.744539556881931e-05, + "loss": 2.2126, + "step": 7377 + }, + { + "epoch": 0.25, + "grad_norm": 0.722317099571228, + "learning_rate": 1.7444685962454845e-05, + "loss": 2.1754, + "step": 7378 + }, + { + "epoch": 0.25, + "grad_norm": 0.7083778977394104, + "learning_rate": 1.7443976271984687e-05, + "loss": 2.1985, + "step": 7379 + }, + { + "epoch": 0.25, + "grad_norm": 0.7139623761177063, + "learning_rate": 1.7443266497416842e-05, + "loss": 2.2137, + "step": 7380 + }, + { + "epoch": 0.25, + "grad_norm": 0.7358871698379517, + "learning_rate": 1.7442556638759337e-05, + "loss": 2.2826, + "step": 7381 + }, + { + "epoch": 0.25, + "grad_norm": 0.6936337351799011, + "learning_rate": 1.7441846696020185e-05, + "loss": 2.142, + "step": 7382 + }, + { + "epoch": 0.25, + "grad_norm": 0.7159985899925232, + "learning_rate": 1.7441136669207416e-05, + "loss": 2.1861, + "step": 7383 + }, + { + "epoch": 0.25, + "grad_norm": 0.6704110503196716, + "learning_rate": 1.7440426558329046e-05, + "loss": 2.0958, + "step": 7384 + }, + { + "epoch": 0.25, + "grad_norm": 0.7053582072257996, + "learning_rate": 1.743971636339309e-05, + "loss": 2.1454, + "step": 7385 + }, + { + "epoch": 0.25, + "grad_norm": 0.6758845448493958, + "learning_rate": 1.7439006084407585e-05, + "loss": 2.1722, + "step": 7386 + }, + { + "epoch": 0.25, + "grad_norm": 0.7335237264633179, + "learning_rate": 1.7438295721380548e-05, + "loss": 2.2505, + "step": 7387 + }, + { + "epoch": 0.25, + "grad_norm": 0.692959725856781, + "learning_rate": 1.7437585274320005e-05, + "loss": 2.1461, + "step": 7388 + }, + { + "epoch": 0.25, + "grad_norm": 0.6938751935958862, + "learning_rate": 1.7436874743233984e-05, + "loss": 2.1179, + "step": 7389 + }, + { + "epoch": 0.25, + "grad_norm": 0.7352149486541748, + "learning_rate": 1.7436164128130507e-05, + "loss": 2.1928, + "step": 7390 + }, + { + "epoch": 0.25, + "grad_norm": 0.6796690225601196, + "learning_rate": 1.7435453429017604e-05, + "loss": 2.1386, + "step": 7391 + }, + { + "epoch": 0.25, + "grad_norm": 0.690133810043335, + "learning_rate": 1.743474264590331e-05, + "loss": 2.1405, + "step": 7392 + }, + { + "epoch": 0.25, + "grad_norm": 0.7542551159858704, + "learning_rate": 1.7434031778795652e-05, + "loss": 2.2049, + "step": 7393 + }, + { + "epoch": 0.25, + "grad_norm": 0.6859422326087952, + "learning_rate": 1.743332082770266e-05, + "loss": 2.1652, + "step": 7394 + }, + { + "epoch": 0.25, + "grad_norm": 0.69484943151474, + "learning_rate": 1.743260979263236e-05, + "loss": 2.2245, + "step": 7395 + }, + { + "epoch": 0.25, + "grad_norm": 0.7636570334434509, + "learning_rate": 1.7431898673592793e-05, + "loss": 2.1831, + "step": 7396 + }, + { + "epoch": 0.25, + "grad_norm": 0.675305962562561, + "learning_rate": 1.743118747059199e-05, + "loss": 2.0869, + "step": 7397 + }, + { + "epoch": 0.25, + "grad_norm": 0.6886658072471619, + "learning_rate": 1.743047618363799e-05, + "loss": 2.1854, + "step": 7398 + }, + { + "epoch": 0.25, + "grad_norm": 0.708949625492096, + "learning_rate": 1.742976481273882e-05, + "loss": 2.1772, + "step": 7399 + }, + { + "epoch": 0.25, + "grad_norm": 0.7116689085960388, + "learning_rate": 1.7429053357902527e-05, + "loss": 2.2006, + "step": 7400 + }, + { + "epoch": 0.25, + "grad_norm": 0.6921509504318237, + "learning_rate": 1.7428341819137136e-05, + "loss": 2.1045, + "step": 7401 + }, + { + "epoch": 0.25, + "grad_norm": 0.6995593309402466, + "learning_rate": 1.74276301964507e-05, + "loss": 2.203, + "step": 7402 + }, + { + "epoch": 0.25, + "grad_norm": 0.7579448819160461, + "learning_rate": 1.7426918489851242e-05, + "loss": 2.1404, + "step": 7403 + }, + { + "epoch": 0.25, + "grad_norm": 0.7423741817474365, + "learning_rate": 1.7426206699346816e-05, + "loss": 2.1423, + "step": 7404 + }, + { + "epoch": 0.25, + "grad_norm": 0.6746935844421387, + "learning_rate": 1.742549482494546e-05, + "loss": 2.1538, + "step": 7405 + }, + { + "epoch": 0.25, + "grad_norm": 0.7288904786109924, + "learning_rate": 1.7424782866655215e-05, + "loss": 2.1552, + "step": 7406 + }, + { + "epoch": 0.25, + "grad_norm": 0.7345695495605469, + "learning_rate": 1.7424070824484123e-05, + "loss": 2.2146, + "step": 7407 + }, + { + "epoch": 0.25, + "grad_norm": 0.7102009057998657, + "learning_rate": 1.7423358698440232e-05, + "loss": 2.1274, + "step": 7408 + }, + { + "epoch": 0.25, + "grad_norm": 0.7081682682037354, + "learning_rate": 1.7422646488531586e-05, + "loss": 2.2366, + "step": 7409 + }, + { + "epoch": 0.25, + "grad_norm": 0.7027879953384399, + "learning_rate": 1.7421934194766227e-05, + "loss": 2.2137, + "step": 7410 + }, + { + "epoch": 0.25, + "grad_norm": 0.7048414945602417, + "learning_rate": 1.7421221817152206e-05, + "loss": 2.2222, + "step": 7411 + }, + { + "epoch": 0.25, + "grad_norm": 0.7034715414047241, + "learning_rate": 1.742050935569757e-05, + "loss": 2.2014, + "step": 7412 + }, + { + "epoch": 0.25, + "grad_norm": 0.7006920576095581, + "learning_rate": 1.7419796810410368e-05, + "loss": 2.1246, + "step": 7413 + }, + { + "epoch": 0.25, + "grad_norm": 0.7030184864997864, + "learning_rate": 1.7419084181298648e-05, + "loss": 2.1018, + "step": 7414 + }, + { + "epoch": 0.25, + "grad_norm": 0.7494039535522461, + "learning_rate": 1.7418371468370466e-05, + "loss": 2.1859, + "step": 7415 + }, + { + "epoch": 0.25, + "grad_norm": 0.6777616143226624, + "learning_rate": 1.741765867163387e-05, + "loss": 2.1583, + "step": 7416 + }, + { + "epoch": 0.25, + "grad_norm": 0.7057734131813049, + "learning_rate": 1.7416945791096913e-05, + "loss": 2.1813, + "step": 7417 + }, + { + "epoch": 0.25, + "grad_norm": 0.6973326802253723, + "learning_rate": 1.741623282676765e-05, + "loss": 2.2326, + "step": 7418 + }, + { + "epoch": 0.25, + "grad_norm": 0.6813613772392273, + "learning_rate": 1.7415519778654134e-05, + "loss": 2.1236, + "step": 7419 + }, + { + "epoch": 0.25, + "grad_norm": 0.7161060571670532, + "learning_rate": 1.7414806646764422e-05, + "loss": 2.1569, + "step": 7420 + }, + { + "epoch": 0.25, + "grad_norm": 0.6896141767501831, + "learning_rate": 1.741409343110657e-05, + "loss": 2.155, + "step": 7421 + }, + { + "epoch": 0.25, + "grad_norm": 0.7087156176567078, + "learning_rate": 1.7413380131688636e-05, + "loss": 2.2002, + "step": 7422 + }, + { + "epoch": 0.25, + "grad_norm": 0.6988806128501892, + "learning_rate": 1.741266674851868e-05, + "loss": 2.1226, + "step": 7423 + }, + { + "epoch": 0.25, + "grad_norm": 0.7560961842536926, + "learning_rate": 1.7411953281604753e-05, + "loss": 2.2387, + "step": 7424 + }, + { + "epoch": 0.25, + "grad_norm": 0.7142537236213684, + "learning_rate": 1.741123973095493e-05, + "loss": 2.1575, + "step": 7425 + }, + { + "epoch": 0.25, + "grad_norm": 0.6950585246086121, + "learning_rate": 1.7410526096577257e-05, + "loss": 2.1462, + "step": 7426 + }, + { + "epoch": 0.25, + "grad_norm": 0.7015814185142517, + "learning_rate": 1.7409812378479803e-05, + "loss": 2.1112, + "step": 7427 + }, + { + "epoch": 0.25, + "grad_norm": 0.7047384977340698, + "learning_rate": 1.7409098576670636e-05, + "loss": 2.234, + "step": 7428 + }, + { + "epoch": 0.25, + "grad_norm": 0.7405452132225037, + "learning_rate": 1.7408384691157815e-05, + "loss": 2.1384, + "step": 7429 + }, + { + "epoch": 0.25, + "grad_norm": 0.7188785076141357, + "learning_rate": 1.7407670721949404e-05, + "loss": 2.156, + "step": 7430 + }, + { + "epoch": 0.25, + "grad_norm": 0.7146674394607544, + "learning_rate": 1.7406956669053467e-05, + "loss": 2.1454, + "step": 7431 + }, + { + "epoch": 0.25, + "grad_norm": 0.7263641357421875, + "learning_rate": 1.7406242532478078e-05, + "loss": 2.2112, + "step": 7432 + }, + { + "epoch": 0.25, + "grad_norm": 0.7088471055030823, + "learning_rate": 1.74055283122313e-05, + "loss": 2.1552, + "step": 7433 + }, + { + "epoch": 0.25, + "grad_norm": 0.7016251087188721, + "learning_rate": 1.7404814008321206e-05, + "loss": 2.1228, + "step": 7434 + }, + { + "epoch": 0.25, + "grad_norm": 0.715968906879425, + "learning_rate": 1.740409962075586e-05, + "loss": 2.169, + "step": 7435 + }, + { + "epoch": 0.25, + "grad_norm": 0.7019792199134827, + "learning_rate": 1.740338514954334e-05, + "loss": 2.0986, + "step": 7436 + }, + { + "epoch": 0.25, + "grad_norm": 0.6957949995994568, + "learning_rate": 1.740267059469171e-05, + "loss": 2.2018, + "step": 7437 + }, + { + "epoch": 0.25, + "grad_norm": 0.704764187335968, + "learning_rate": 1.7401955956209047e-05, + "loss": 2.1519, + "step": 7438 + }, + { + "epoch": 0.25, + "grad_norm": 0.7123653888702393, + "learning_rate": 1.7401241234103424e-05, + "loss": 2.1578, + "step": 7439 + }, + { + "epoch": 0.25, + "grad_norm": 0.6978668570518494, + "learning_rate": 1.740052642838291e-05, + "loss": 2.1586, + "step": 7440 + }, + { + "epoch": 0.25, + "grad_norm": 0.7097553610801697, + "learning_rate": 1.7399811539055592e-05, + "loss": 2.1147, + "step": 7441 + }, + { + "epoch": 0.25, + "grad_norm": 0.7407875061035156, + "learning_rate": 1.7399096566129537e-05, + "loss": 2.185, + "step": 7442 + }, + { + "epoch": 0.25, + "grad_norm": 0.7441389560699463, + "learning_rate": 1.7398381509612827e-05, + "loss": 2.0968, + "step": 7443 + }, + { + "epoch": 0.25, + "grad_norm": 0.7218737602233887, + "learning_rate": 1.7397666369513534e-05, + "loss": 2.1156, + "step": 7444 + }, + { + "epoch": 0.25, + "grad_norm": 0.6910512447357178, + "learning_rate": 1.7396951145839747e-05, + "loss": 2.1398, + "step": 7445 + }, + { + "epoch": 0.25, + "grad_norm": 0.6886022686958313, + "learning_rate": 1.739623583859954e-05, + "loss": 2.1143, + "step": 7446 + }, + { + "epoch": 0.25, + "grad_norm": 0.6996616721153259, + "learning_rate": 1.7395520447800994e-05, + "loss": 2.173, + "step": 7447 + }, + { + "epoch": 0.25, + "grad_norm": 0.7167861461639404, + "learning_rate": 1.7394804973452194e-05, + "loss": 2.192, + "step": 7448 + }, + { + "epoch": 0.25, + "grad_norm": 0.7033651471138, + "learning_rate": 1.7394089415561218e-05, + "loss": 2.164, + "step": 7449 + }, + { + "epoch": 0.25, + "grad_norm": 0.7103638648986816, + "learning_rate": 1.7393373774136155e-05, + "loss": 2.1629, + "step": 7450 + }, + { + "epoch": 0.25, + "grad_norm": 0.7029015421867371, + "learning_rate": 1.7392658049185088e-05, + "loss": 2.1443, + "step": 7451 + }, + { + "epoch": 0.25, + "grad_norm": 0.7431227564811707, + "learning_rate": 1.73919422407161e-05, + "loss": 2.2072, + "step": 7452 + }, + { + "epoch": 0.25, + "grad_norm": 0.748414933681488, + "learning_rate": 1.7391226348737286e-05, + "loss": 2.1445, + "step": 7453 + }, + { + "epoch": 0.25, + "grad_norm": 0.6983426809310913, + "learning_rate": 1.739051037325673e-05, + "loss": 2.1745, + "step": 7454 + }, + { + "epoch": 0.25, + "grad_norm": 0.6963228583335876, + "learning_rate": 1.738979431428251e-05, + "loss": 2.1153, + "step": 7455 + }, + { + "epoch": 0.25, + "grad_norm": 0.6816458702087402, + "learning_rate": 1.738907817182273e-05, + "loss": 2.1184, + "step": 7456 + }, + { + "epoch": 0.25, + "grad_norm": 0.7504379749298096, + "learning_rate": 1.7388361945885478e-05, + "loss": 2.1968, + "step": 7457 + }, + { + "epoch": 0.25, + "grad_norm": 0.7345001697540283, + "learning_rate": 1.7387645636478838e-05, + "loss": 2.1634, + "step": 7458 + }, + { + "epoch": 0.25, + "grad_norm": 0.7751197814941406, + "learning_rate": 1.7386929243610908e-05, + "loss": 2.2579, + "step": 7459 + }, + { + "epoch": 0.25, + "grad_norm": 0.7273992300033569, + "learning_rate": 1.7386212767289782e-05, + "loss": 2.169, + "step": 7460 + }, + { + "epoch": 0.25, + "grad_norm": 0.6980360746383667, + "learning_rate": 1.7385496207523554e-05, + "loss": 2.1606, + "step": 7461 + }, + { + "epoch": 0.25, + "grad_norm": 0.7287806868553162, + "learning_rate": 1.7384779564320315e-05, + "loss": 2.1308, + "step": 7462 + }, + { + "epoch": 0.25, + "grad_norm": 0.7536101341247559, + "learning_rate": 1.7384062837688166e-05, + "loss": 2.1401, + "step": 7463 + }, + { + "epoch": 0.25, + "grad_norm": 0.7021917104721069, + "learning_rate": 1.73833460276352e-05, + "loss": 2.1248, + "step": 7464 + }, + { + "epoch": 0.25, + "grad_norm": 0.7641261219978333, + "learning_rate": 1.7382629134169522e-05, + "loss": 2.141, + "step": 7465 + }, + { + "epoch": 0.25, + "grad_norm": 0.769978940486908, + "learning_rate": 1.7381912157299222e-05, + "loss": 2.0738, + "step": 7466 + }, + { + "epoch": 0.25, + "grad_norm": 0.6940213441848755, + "learning_rate": 1.738119509703241e-05, + "loss": 2.1623, + "step": 7467 + }, + { + "epoch": 0.25, + "grad_norm": 0.7132591009140015, + "learning_rate": 1.738047795337718e-05, + "loss": 2.1606, + "step": 7468 + }, + { + "epoch": 0.25, + "grad_norm": 0.6867596507072449, + "learning_rate": 1.737976072634163e-05, + "loss": 2.1361, + "step": 7469 + }, + { + "epoch": 0.25, + "grad_norm": 0.7038741111755371, + "learning_rate": 1.7379043415933874e-05, + "loss": 2.2105, + "step": 7470 + }, + { + "epoch": 0.25, + "grad_norm": 0.7070055603981018, + "learning_rate": 1.737832602216201e-05, + "loss": 2.1602, + "step": 7471 + }, + { + "epoch": 0.25, + "grad_norm": 0.7588679790496826, + "learning_rate": 1.737760854503414e-05, + "loss": 2.2208, + "step": 7472 + }, + { + "epoch": 0.25, + "grad_norm": 0.733066201210022, + "learning_rate": 1.7376890984558374e-05, + "loss": 2.1702, + "step": 7473 + }, + { + "epoch": 0.25, + "grad_norm": 0.7015243768692017, + "learning_rate": 1.737617334074282e-05, + "loss": 2.1432, + "step": 7474 + }, + { + "epoch": 0.25, + "grad_norm": 0.7411099076271057, + "learning_rate": 1.7375455613595577e-05, + "loss": 2.1639, + "step": 7475 + }, + { + "epoch": 0.25, + "grad_norm": 0.6995283961296082, + "learning_rate": 1.737473780312476e-05, + "loss": 2.1924, + "step": 7476 + }, + { + "epoch": 0.25, + "grad_norm": 0.7202366590499878, + "learning_rate": 1.737401990933848e-05, + "loss": 2.2227, + "step": 7477 + }, + { + "epoch": 0.25, + "grad_norm": 0.7031075358390808, + "learning_rate": 1.7373301932244842e-05, + "loss": 2.1387, + "step": 7478 + }, + { + "epoch": 0.25, + "grad_norm": 0.6923456192016602, + "learning_rate": 1.7372583871851962e-05, + "loss": 2.1065, + "step": 7479 + }, + { + "epoch": 0.25, + "grad_norm": 0.7043970823287964, + "learning_rate": 1.737186572816795e-05, + "loss": 2.1079, + "step": 7480 + }, + { + "epoch": 0.25, + "grad_norm": 0.7098026871681213, + "learning_rate": 1.7371147501200917e-05, + "loss": 2.1695, + "step": 7481 + }, + { + "epoch": 0.25, + "grad_norm": 0.7155427932739258, + "learning_rate": 1.7370429190958982e-05, + "loss": 2.1759, + "step": 7482 + }, + { + "epoch": 0.25, + "grad_norm": 0.7232871651649475, + "learning_rate": 1.7369710797450256e-05, + "loss": 2.1602, + "step": 7483 + }, + { + "epoch": 0.25, + "grad_norm": 0.7343376278877258, + "learning_rate": 1.736899232068286e-05, + "loss": 2.211, + "step": 7484 + }, + { + "epoch": 0.25, + "grad_norm": 0.7422235012054443, + "learning_rate": 1.73682737606649e-05, + "loss": 2.1357, + "step": 7485 + }, + { + "epoch": 0.25, + "grad_norm": 0.7326328754425049, + "learning_rate": 1.7367555117404506e-05, + "loss": 2.2202, + "step": 7486 + }, + { + "epoch": 0.25, + "grad_norm": 0.7203668355941772, + "learning_rate": 1.7366836390909794e-05, + "loss": 2.1505, + "step": 7487 + }, + { + "epoch": 0.25, + "grad_norm": 0.6873130798339844, + "learning_rate": 1.7366117581188878e-05, + "loss": 2.1756, + "step": 7488 + }, + { + "epoch": 0.25, + "grad_norm": 0.7645033597946167, + "learning_rate": 1.7365398688249885e-05, + "loss": 2.2623, + "step": 7489 + }, + { + "epoch": 0.25, + "grad_norm": 0.6934333443641663, + "learning_rate": 1.7364679712100933e-05, + "loss": 2.1253, + "step": 7490 + }, + { + "epoch": 0.25, + "grad_norm": 0.7220542430877686, + "learning_rate": 1.7363960652750148e-05, + "loss": 2.121, + "step": 7491 + }, + { + "epoch": 0.25, + "grad_norm": 0.676648736000061, + "learning_rate": 1.736324151020565e-05, + "loss": 2.0911, + "step": 7492 + }, + { + "epoch": 0.25, + "grad_norm": 0.7199625372886658, + "learning_rate": 1.7362522284475563e-05, + "loss": 2.1996, + "step": 7493 + }, + { + "epoch": 0.25, + "grad_norm": 0.7112314701080322, + "learning_rate": 1.7361802975568014e-05, + "loss": 2.1218, + "step": 7494 + }, + { + "epoch": 0.25, + "grad_norm": 0.7417469620704651, + "learning_rate": 1.7361083583491133e-05, + "loss": 2.1805, + "step": 7495 + }, + { + "epoch": 0.25, + "grad_norm": 0.7254404425621033, + "learning_rate": 1.736036410825304e-05, + "loss": 2.1814, + "step": 7496 + }, + { + "epoch": 0.25, + "grad_norm": 0.6896488666534424, + "learning_rate": 1.7359644549861866e-05, + "loss": 2.1396, + "step": 7497 + }, + { + "epoch": 0.25, + "grad_norm": 0.7474830150604248, + "learning_rate": 1.7358924908325745e-05, + "loss": 2.1735, + "step": 7498 + }, + { + "epoch": 0.25, + "grad_norm": 0.7241219878196716, + "learning_rate": 1.7358205183652802e-05, + "loss": 2.126, + "step": 7499 + }, + { + "epoch": 0.25, + "grad_norm": 0.7144243717193604, + "learning_rate": 1.7357485375851165e-05, + "loss": 2.2264, + "step": 7500 + }, + { + "epoch": 0.25, + "grad_norm": 0.7307119369506836, + "learning_rate": 1.735676548492898e-05, + "loss": 2.1178, + "step": 7501 + }, + { + "epoch": 0.25, + "grad_norm": 0.7247582674026489, + "learning_rate": 1.735604551089436e-05, + "loss": 2.2271, + "step": 7502 + }, + { + "epoch": 0.25, + "grad_norm": 0.6875985860824585, + "learning_rate": 1.7355325453755453e-05, + "loss": 2.1737, + "step": 7503 + }, + { + "epoch": 0.25, + "grad_norm": 0.7499565482139587, + "learning_rate": 1.7354605313520387e-05, + "loss": 2.1483, + "step": 7504 + }, + { + "epoch": 0.25, + "grad_norm": 0.7241115570068359, + "learning_rate": 1.7353885090197305e-05, + "loss": 2.0983, + "step": 7505 + }, + { + "epoch": 0.25, + "grad_norm": 0.7252108454704285, + "learning_rate": 1.7353164783794335e-05, + "loss": 2.2157, + "step": 7506 + }, + { + "epoch": 0.25, + "grad_norm": 0.7384920716285706, + "learning_rate": 1.735244439431962e-05, + "loss": 2.1632, + "step": 7507 + }, + { + "epoch": 0.25, + "grad_norm": 0.7114937901496887, + "learning_rate": 1.73517239217813e-05, + "loss": 2.1594, + "step": 7508 + }, + { + "epoch": 0.25, + "grad_norm": 0.7011703252792358, + "learning_rate": 1.735100336618751e-05, + "loss": 2.1403, + "step": 7509 + }, + { + "epoch": 0.25, + "grad_norm": 0.6802363991737366, + "learning_rate": 1.7350282727546386e-05, + "loss": 2.0931, + "step": 7510 + }, + { + "epoch": 0.25, + "grad_norm": 0.6992273330688477, + "learning_rate": 1.7349562005866083e-05, + "loss": 2.1548, + "step": 7511 + }, + { + "epoch": 0.25, + "grad_norm": 0.7036104202270508, + "learning_rate": 1.7348841201154734e-05, + "loss": 2.1983, + "step": 7512 + }, + { + "epoch": 0.25, + "grad_norm": 0.718110978603363, + "learning_rate": 1.7348120313420485e-05, + "loss": 2.1642, + "step": 7513 + }, + { + "epoch": 0.25, + "grad_norm": 0.7201098799705505, + "learning_rate": 1.7347399342671475e-05, + "loss": 2.0977, + "step": 7514 + }, + { + "epoch": 0.25, + "grad_norm": 0.7423645853996277, + "learning_rate": 1.7346678288915858e-05, + "loss": 2.1916, + "step": 7515 + }, + { + "epoch": 0.25, + "grad_norm": 0.7260878682136536, + "learning_rate": 1.7345957152161773e-05, + "loss": 2.1935, + "step": 7516 + }, + { + "epoch": 0.25, + "grad_norm": 0.7157997488975525, + "learning_rate": 1.734523593241737e-05, + "loss": 2.2339, + "step": 7517 + }, + { + "epoch": 0.25, + "grad_norm": 0.7187975645065308, + "learning_rate": 1.7344514629690793e-05, + "loss": 2.2074, + "step": 7518 + }, + { + "epoch": 0.25, + "grad_norm": 0.7448701858520508, + "learning_rate": 1.7343793243990198e-05, + "loss": 2.2391, + "step": 7519 + }, + { + "epoch": 0.25, + "grad_norm": 0.6892527937889099, + "learning_rate": 1.7343071775323728e-05, + "loss": 2.1601, + "step": 7520 + }, + { + "epoch": 0.25, + "grad_norm": 0.693621039390564, + "learning_rate": 1.734235022369954e-05, + "loss": 2.1642, + "step": 7521 + }, + { + "epoch": 0.25, + "grad_norm": 0.7147509455680847, + "learning_rate": 1.7341628589125777e-05, + "loss": 2.1624, + "step": 7522 + }, + { + "epoch": 0.25, + "grad_norm": 0.6916564106941223, + "learning_rate": 1.7340906871610603e-05, + "loss": 2.2838, + "step": 7523 + }, + { + "epoch": 0.25, + "grad_norm": 0.6941556930541992, + "learning_rate": 1.734018507116216e-05, + "loss": 2.1288, + "step": 7524 + }, + { + "epoch": 0.25, + "grad_norm": 0.6766563057899475, + "learning_rate": 1.733946318778861e-05, + "loss": 2.1473, + "step": 7525 + }, + { + "epoch": 0.25, + "grad_norm": 0.7380691766738892, + "learning_rate": 1.7338741221498105e-05, + "loss": 2.1491, + "step": 7526 + }, + { + "epoch": 0.25, + "grad_norm": 0.726524293422699, + "learning_rate": 1.7338019172298805e-05, + "loss": 2.1338, + "step": 7527 + }, + { + "epoch": 0.25, + "grad_norm": 0.7141581177711487, + "learning_rate": 1.7337297040198865e-05, + "loss": 2.2821, + "step": 7528 + }, + { + "epoch": 0.25, + "grad_norm": 0.7193422913551331, + "learning_rate": 1.733657482520644e-05, + "loss": 2.124, + "step": 7529 + }, + { + "epoch": 0.25, + "grad_norm": 0.7184845805168152, + "learning_rate": 1.7335852527329695e-05, + "loss": 2.166, + "step": 7530 + }, + { + "epoch": 0.25, + "grad_norm": 0.6978302597999573, + "learning_rate": 1.7335130146576786e-05, + "loss": 2.1371, + "step": 7531 + }, + { + "epoch": 0.25, + "grad_norm": 0.690495491027832, + "learning_rate": 1.7334407682955876e-05, + "loss": 2.1527, + "step": 7532 + }, + { + "epoch": 0.25, + "grad_norm": 0.7555071115493774, + "learning_rate": 1.7333685136475126e-05, + "loss": 2.1825, + "step": 7533 + }, + { + "epoch": 0.25, + "grad_norm": 0.6808302998542786, + "learning_rate": 1.7332962507142703e-05, + "loss": 2.1226, + "step": 7534 + }, + { + "epoch": 0.25, + "grad_norm": 0.7210285067558289, + "learning_rate": 1.733223979496676e-05, + "loss": 2.115, + "step": 7535 + }, + { + "epoch": 0.25, + "grad_norm": 0.7195649743080139, + "learning_rate": 1.7331516999955475e-05, + "loss": 2.146, + "step": 7536 + }, + { + "epoch": 0.25, + "grad_norm": 0.725496768951416, + "learning_rate": 1.7330794122117005e-05, + "loss": 2.2002, + "step": 7537 + }, + { + "epoch": 0.25, + "grad_norm": 0.7282800674438477, + "learning_rate": 1.733007116145952e-05, + "loss": 2.0893, + "step": 7538 + }, + { + "epoch": 0.25, + "grad_norm": 0.7512715458869934, + "learning_rate": 1.7329348117991186e-05, + "loss": 2.2226, + "step": 7539 + }, + { + "epoch": 0.25, + "grad_norm": 0.7053192257881165, + "learning_rate": 1.7328624991720172e-05, + "loss": 2.1011, + "step": 7540 + }, + { + "epoch": 0.25, + "grad_norm": 0.761727511882782, + "learning_rate": 1.732790178265465e-05, + "loss": 2.0872, + "step": 7541 + }, + { + "epoch": 0.25, + "grad_norm": 0.747033953666687, + "learning_rate": 1.7327178490802784e-05, + "loss": 2.2036, + "step": 7542 + }, + { + "epoch": 0.25, + "grad_norm": 0.7316663861274719, + "learning_rate": 1.7326455116172755e-05, + "loss": 2.1965, + "step": 7543 + }, + { + "epoch": 0.25, + "grad_norm": 0.7357268333435059, + "learning_rate": 1.7325731658772726e-05, + "loss": 2.1607, + "step": 7544 + }, + { + "epoch": 0.25, + "grad_norm": 0.723380982875824, + "learning_rate": 1.7325008118610877e-05, + "loss": 2.1437, + "step": 7545 + }, + { + "epoch": 0.25, + "grad_norm": 0.7776650190353394, + "learning_rate": 1.7324284495695376e-05, + "loss": 2.1591, + "step": 7546 + }, + { + "epoch": 0.25, + "grad_norm": 0.6923309564590454, + "learning_rate": 1.7323560790034405e-05, + "loss": 2.1412, + "step": 7547 + }, + { + "epoch": 0.25, + "grad_norm": 0.6813734769821167, + "learning_rate": 1.7322837001636133e-05, + "loss": 2.1431, + "step": 7548 + }, + { + "epoch": 0.25, + "grad_norm": 0.7127648591995239, + "learning_rate": 1.732211313050874e-05, + "loss": 2.159, + "step": 7549 + }, + { + "epoch": 0.25, + "grad_norm": 0.7070778608322144, + "learning_rate": 1.7321389176660407e-05, + "loss": 2.1349, + "step": 7550 + }, + { + "epoch": 0.25, + "grad_norm": 0.7294631004333496, + "learning_rate": 1.7320665140099306e-05, + "loss": 2.1697, + "step": 7551 + }, + { + "epoch": 0.25, + "grad_norm": 0.7419963479042053, + "learning_rate": 1.731994102083362e-05, + "loss": 2.1584, + "step": 7552 + }, + { + "epoch": 0.25, + "grad_norm": 0.6788718104362488, + "learning_rate": 1.7319216818871533e-05, + "loss": 2.0694, + "step": 7553 + }, + { + "epoch": 0.25, + "grad_norm": 0.6953298449516296, + "learning_rate": 1.7318492534221225e-05, + "loss": 2.1377, + "step": 7554 + }, + { + "epoch": 0.25, + "grad_norm": 0.74330073595047, + "learning_rate": 1.7317768166890876e-05, + "loss": 2.1322, + "step": 7555 + }, + { + "epoch": 0.25, + "grad_norm": 0.7187591791152954, + "learning_rate": 1.731704371688867e-05, + "loss": 2.0923, + "step": 7556 + }, + { + "epoch": 0.25, + "grad_norm": 0.6934306025505066, + "learning_rate": 1.7316319184222792e-05, + "loss": 2.1229, + "step": 7557 + }, + { + "epoch": 0.25, + "grad_norm": 0.7290444374084473, + "learning_rate": 1.7315594568901433e-05, + "loss": 2.2649, + "step": 7558 + }, + { + "epoch": 0.25, + "grad_norm": 0.7017270922660828, + "learning_rate": 1.731486987093277e-05, + "loss": 2.0735, + "step": 7559 + }, + { + "epoch": 0.25, + "grad_norm": 0.6965583562850952, + "learning_rate": 1.7314145090324992e-05, + "loss": 2.0838, + "step": 7560 + }, + { + "epoch": 0.25, + "grad_norm": 0.7092208862304688, + "learning_rate": 1.731342022708629e-05, + "loss": 2.0909, + "step": 7561 + }, + { + "epoch": 0.25, + "grad_norm": 0.6921049356460571, + "learning_rate": 1.7312695281224856e-05, + "loss": 2.1835, + "step": 7562 + }, + { + "epoch": 0.25, + "grad_norm": 0.7226642370223999, + "learning_rate": 1.7311970252748873e-05, + "loss": 2.1902, + "step": 7563 + }, + { + "epoch": 0.25, + "grad_norm": 0.7010865807533264, + "learning_rate": 1.7311245141666536e-05, + "loss": 2.1794, + "step": 7564 + }, + { + "epoch": 0.25, + "grad_norm": 0.7060854434967041, + "learning_rate": 1.731051994798604e-05, + "loss": 2.1102, + "step": 7565 + }, + { + "epoch": 0.25, + "grad_norm": 0.6836500763893127, + "learning_rate": 1.7309794671715567e-05, + "loss": 2.1947, + "step": 7566 + }, + { + "epoch": 0.25, + "grad_norm": 0.6924862861633301, + "learning_rate": 1.7309069312863324e-05, + "loss": 2.1533, + "step": 7567 + }, + { + "epoch": 0.25, + "grad_norm": 0.7123151421546936, + "learning_rate": 1.7308343871437494e-05, + "loss": 2.1525, + "step": 7568 + }, + { + "epoch": 0.25, + "grad_norm": 0.6885114312171936, + "learning_rate": 1.730761834744628e-05, + "loss": 2.1661, + "step": 7569 + }, + { + "epoch": 0.25, + "grad_norm": 0.7045148611068726, + "learning_rate": 1.730689274089788e-05, + "loss": 2.1576, + "step": 7570 + }, + { + "epoch": 0.25, + "grad_norm": 0.7144096493721008, + "learning_rate": 1.7306167051800483e-05, + "loss": 2.1577, + "step": 7571 + }, + { + "epoch": 0.25, + "grad_norm": 0.723014235496521, + "learning_rate": 1.7305441280162294e-05, + "loss": 2.1602, + "step": 7572 + }, + { + "epoch": 0.25, + "grad_norm": 0.7233469486236572, + "learning_rate": 1.7304715425991512e-05, + "loss": 2.1592, + "step": 7573 + }, + { + "epoch": 0.25, + "grad_norm": 0.7292847633361816, + "learning_rate": 1.7303989489296336e-05, + "loss": 2.1199, + "step": 7574 + }, + { + "epoch": 0.25, + "grad_norm": 0.7259262204170227, + "learning_rate": 1.730326347008497e-05, + "loss": 2.1342, + "step": 7575 + }, + { + "epoch": 0.25, + "grad_norm": 0.6966424584388733, + "learning_rate": 1.730253736836561e-05, + "loss": 2.214, + "step": 7576 + }, + { + "epoch": 0.25, + "grad_norm": 0.7465115785598755, + "learning_rate": 1.730181118414646e-05, + "loss": 2.2096, + "step": 7577 + }, + { + "epoch": 0.25, + "grad_norm": 0.7213481664657593, + "learning_rate": 1.730108491743573e-05, + "loss": 2.1914, + "step": 7578 + }, + { + "epoch": 0.25, + "grad_norm": 0.7127256989479065, + "learning_rate": 1.730035856824162e-05, + "loss": 2.148, + "step": 7579 + }, + { + "epoch": 0.25, + "grad_norm": 0.7505007386207581, + "learning_rate": 1.729963213657234e-05, + "loss": 2.1591, + "step": 7580 + }, + { + "epoch": 0.25, + "grad_norm": 0.749862015247345, + "learning_rate": 1.729890562243609e-05, + "loss": 2.1181, + "step": 7581 + }, + { + "epoch": 0.25, + "grad_norm": 0.7312537431716919, + "learning_rate": 1.7298179025841087e-05, + "loss": 2.0667, + "step": 7582 + }, + { + "epoch": 0.25, + "grad_norm": 0.7174736857414246, + "learning_rate": 1.729745234679553e-05, + "loss": 2.136, + "step": 7583 + }, + { + "epoch": 0.25, + "grad_norm": 0.7012611031532288, + "learning_rate": 1.7296725585307635e-05, + "loss": 2.1535, + "step": 7584 + }, + { + "epoch": 0.25, + "grad_norm": 0.718070387840271, + "learning_rate": 1.729599874138561e-05, + "loss": 2.1692, + "step": 7585 + }, + { + "epoch": 0.25, + "grad_norm": 0.714130163192749, + "learning_rate": 1.7295271815037668e-05, + "loss": 2.1803, + "step": 7586 + }, + { + "epoch": 0.25, + "grad_norm": 0.734847366809845, + "learning_rate": 1.7294544806272023e-05, + "loss": 2.2203, + "step": 7587 + }, + { + "epoch": 0.25, + "grad_norm": 0.6970461010932922, + "learning_rate": 1.7293817715096883e-05, + "loss": 2.1493, + "step": 7588 + }, + { + "epoch": 0.25, + "grad_norm": 0.7138895392417908, + "learning_rate": 1.7293090541520464e-05, + "loss": 2.2119, + "step": 7589 + }, + { + "epoch": 0.25, + "grad_norm": 0.7600546479225159, + "learning_rate": 1.729236328555098e-05, + "loss": 2.1131, + "step": 7590 + }, + { + "epoch": 0.25, + "grad_norm": 0.6877783536911011, + "learning_rate": 1.7291635947196658e-05, + "loss": 2.1998, + "step": 7591 + }, + { + "epoch": 0.25, + "grad_norm": 0.7334975600242615, + "learning_rate": 1.72909085264657e-05, + "loss": 2.1525, + "step": 7592 + }, + { + "epoch": 0.25, + "grad_norm": 0.6944721341133118, + "learning_rate": 1.729018102336633e-05, + "loss": 2.1195, + "step": 7593 + }, + { + "epoch": 0.25, + "grad_norm": 0.739058256149292, + "learning_rate": 1.728945343790677e-05, + "loss": 2.1471, + "step": 7594 + }, + { + "epoch": 0.25, + "grad_norm": 0.6996712684631348, + "learning_rate": 1.7288725770095235e-05, + "loss": 2.1322, + "step": 7595 + }, + { + "epoch": 0.25, + "grad_norm": 0.72550368309021, + "learning_rate": 1.7287998019939947e-05, + "loss": 2.1817, + "step": 7596 + }, + { + "epoch": 0.25, + "grad_norm": 0.7203039526939392, + "learning_rate": 1.728727018744913e-05, + "loss": 2.0869, + "step": 7597 + }, + { + "epoch": 0.25, + "grad_norm": 0.7318381667137146, + "learning_rate": 1.7286542272631008e-05, + "loss": 2.1581, + "step": 7598 + }, + { + "epoch": 0.25, + "grad_norm": 0.7341532111167908, + "learning_rate": 1.7285814275493798e-05, + "loss": 2.1491, + "step": 7599 + }, + { + "epoch": 0.25, + "grad_norm": 0.7220089435577393, + "learning_rate": 1.7285086196045728e-05, + "loss": 2.2054, + "step": 7600 + }, + { + "epoch": 0.25, + "grad_norm": 0.7038966417312622, + "learning_rate": 1.7284358034295024e-05, + "loss": 2.1362, + "step": 7601 + }, + { + "epoch": 0.25, + "grad_norm": 0.7343723773956299, + "learning_rate": 1.7283629790249916e-05, + "loss": 2.1662, + "step": 7602 + }, + { + "epoch": 0.25, + "grad_norm": 0.7587023973464966, + "learning_rate": 1.7282901463918623e-05, + "loss": 2.1688, + "step": 7603 + }, + { + "epoch": 0.25, + "grad_norm": 0.6875295639038086, + "learning_rate": 1.728217305530938e-05, + "loss": 2.1179, + "step": 7604 + }, + { + "epoch": 0.25, + "grad_norm": 0.7254225015640259, + "learning_rate": 1.728144456443041e-05, + "loss": 2.1473, + "step": 7605 + }, + { + "epoch": 0.25, + "grad_norm": 0.749250054359436, + "learning_rate": 1.728071599128995e-05, + "loss": 2.1945, + "step": 7606 + }, + { + "epoch": 0.25, + "grad_norm": 0.7107381224632263, + "learning_rate": 1.7279987335896226e-05, + "loss": 2.1221, + "step": 7607 + }, + { + "epoch": 0.25, + "grad_norm": 0.7231041193008423, + "learning_rate": 1.727925859825747e-05, + "loss": 2.2122, + "step": 7608 + }, + { + "epoch": 0.25, + "grad_norm": 0.709024965763092, + "learning_rate": 1.727852977838192e-05, + "loss": 2.16, + "step": 7609 + }, + { + "epoch": 0.25, + "grad_norm": 0.7269505262374878, + "learning_rate": 1.7277800876277806e-05, + "loss": 2.2096, + "step": 7610 + }, + { + "epoch": 0.25, + "grad_norm": 0.6951438188552856, + "learning_rate": 1.727707189195336e-05, + "loss": 2.107, + "step": 7611 + }, + { + "epoch": 0.25, + "grad_norm": 0.716847836971283, + "learning_rate": 1.7276342825416823e-05, + "loss": 2.125, + "step": 7612 + }, + { + "epoch": 0.25, + "grad_norm": 0.7037303447723389, + "learning_rate": 1.7275613676676427e-05, + "loss": 2.1978, + "step": 7613 + }, + { + "epoch": 0.25, + "grad_norm": 0.6980779767036438, + "learning_rate": 1.7274884445740415e-05, + "loss": 2.1849, + "step": 7614 + }, + { + "epoch": 0.25, + "grad_norm": 0.7484356760978699, + "learning_rate": 1.727415513261702e-05, + "loss": 2.1095, + "step": 7615 + }, + { + "epoch": 0.25, + "grad_norm": 0.6940260529518127, + "learning_rate": 1.727342573731448e-05, + "loss": 2.1474, + "step": 7616 + }, + { + "epoch": 0.25, + "grad_norm": 0.7696806192398071, + "learning_rate": 1.7272696259841045e-05, + "loss": 2.0745, + "step": 7617 + }, + { + "epoch": 0.25, + "grad_norm": 0.7069591879844666, + "learning_rate": 1.7271966700204946e-05, + "loss": 2.1479, + "step": 7618 + }, + { + "epoch": 0.25, + "grad_norm": 0.7415960431098938, + "learning_rate": 1.727123705841443e-05, + "loss": 2.1696, + "step": 7619 + }, + { + "epoch": 0.25, + "grad_norm": 0.7355552315711975, + "learning_rate": 1.7270507334477738e-05, + "loss": 2.1759, + "step": 7620 + }, + { + "epoch": 0.25, + "grad_norm": 0.7433149218559265, + "learning_rate": 1.726977752840312e-05, + "loss": 2.205, + "step": 7621 + }, + { + "epoch": 0.25, + "grad_norm": 0.721484363079071, + "learning_rate": 1.726904764019881e-05, + "loss": 2.1781, + "step": 7622 + }, + { + "epoch": 0.25, + "grad_norm": 0.7339152097702026, + "learning_rate": 1.726831766987306e-05, + "loss": 2.1704, + "step": 7623 + }, + { + "epoch": 0.25, + "grad_norm": 0.6840312480926514, + "learning_rate": 1.7267587617434118e-05, + "loss": 2.1546, + "step": 7624 + }, + { + "epoch": 0.25, + "grad_norm": 0.6954158544540405, + "learning_rate": 1.7266857482890232e-05, + "loss": 2.1584, + "step": 7625 + }, + { + "epoch": 0.25, + "grad_norm": 0.7378429174423218, + "learning_rate": 1.726612726624965e-05, + "loss": 2.1331, + "step": 7626 + }, + { + "epoch": 0.25, + "grad_norm": 0.6984655261039734, + "learning_rate": 1.7265396967520614e-05, + "loss": 2.1923, + "step": 7627 + }, + { + "epoch": 0.25, + "grad_norm": 0.705985963344574, + "learning_rate": 1.726466658671139e-05, + "loss": 2.2063, + "step": 7628 + }, + { + "epoch": 0.25, + "grad_norm": 0.6962282061576843, + "learning_rate": 1.7263936123830214e-05, + "loss": 2.191, + "step": 7629 + }, + { + "epoch": 0.25, + "grad_norm": 0.7112159729003906, + "learning_rate": 1.7263205578885343e-05, + "loss": 2.1498, + "step": 7630 + }, + { + "epoch": 0.25, + "grad_norm": 0.6993825435638428, + "learning_rate": 1.7262474951885037e-05, + "loss": 2.1423, + "step": 7631 + }, + { + "epoch": 0.25, + "grad_norm": 0.6986170411109924, + "learning_rate": 1.7261744242837544e-05, + "loss": 2.2199, + "step": 7632 + }, + { + "epoch": 0.25, + "grad_norm": 0.6939405798912048, + "learning_rate": 1.726101345175112e-05, + "loss": 2.1223, + "step": 7633 + }, + { + "epoch": 0.25, + "grad_norm": 0.6675134301185608, + "learning_rate": 1.7260282578634023e-05, + "loss": 2.1813, + "step": 7634 + }, + { + "epoch": 0.25, + "grad_norm": 0.7185758352279663, + "learning_rate": 1.7259551623494507e-05, + "loss": 2.1824, + "step": 7635 + }, + { + "epoch": 0.25, + "grad_norm": 0.7143075466156006, + "learning_rate": 1.725882058634083e-05, + "loss": 2.1824, + "step": 7636 + }, + { + "epoch": 0.25, + "grad_norm": 0.6790210604667664, + "learning_rate": 1.7258089467181252e-05, + "loss": 2.0746, + "step": 7637 + }, + { + "epoch": 0.25, + "grad_norm": 0.6795542240142822, + "learning_rate": 1.7257358266024033e-05, + "loss": 2.1269, + "step": 7638 + }, + { + "epoch": 0.25, + "grad_norm": 0.7037162780761719, + "learning_rate": 1.7256626982877436e-05, + "loss": 2.1749, + "step": 7639 + }, + { + "epoch": 0.25, + "grad_norm": 0.7341267466545105, + "learning_rate": 1.725589561774972e-05, + "loss": 2.1793, + "step": 7640 + }, + { + "epoch": 0.25, + "grad_norm": 0.7170124053955078, + "learning_rate": 1.7255164170649145e-05, + "loss": 2.1276, + "step": 7641 + }, + { + "epoch": 0.25, + "grad_norm": 0.71085524559021, + "learning_rate": 1.7254432641583977e-05, + "loss": 2.1204, + "step": 7642 + }, + { + "epoch": 0.25, + "grad_norm": 0.726539134979248, + "learning_rate": 1.7253701030562483e-05, + "loss": 2.1488, + "step": 7643 + }, + { + "epoch": 0.25, + "grad_norm": 0.710188090801239, + "learning_rate": 1.7252969337592927e-05, + "loss": 2.1363, + "step": 7644 + }, + { + "epoch": 0.25, + "grad_norm": 0.6881375908851624, + "learning_rate": 1.7252237562683572e-05, + "loss": 2.1894, + "step": 7645 + }, + { + "epoch": 0.25, + "grad_norm": 0.71407151222229, + "learning_rate": 1.7251505705842685e-05, + "loss": 2.1331, + "step": 7646 + }, + { + "epoch": 0.25, + "grad_norm": 0.7067898511886597, + "learning_rate": 1.725077376707854e-05, + "loss": 2.1995, + "step": 7647 + }, + { + "epoch": 0.25, + "grad_norm": 0.7009431719779968, + "learning_rate": 1.72500417463994e-05, + "loss": 2.1412, + "step": 7648 + }, + { + "epoch": 0.25, + "grad_norm": 0.6963695883750916, + "learning_rate": 1.724930964381354e-05, + "loss": 2.1243, + "step": 7649 + }, + { + "epoch": 0.25, + "grad_norm": 0.7290429472923279, + "learning_rate": 1.7248577459329226e-05, + "loss": 2.1453, + "step": 7650 + }, + { + "epoch": 0.25, + "grad_norm": 0.708939790725708, + "learning_rate": 1.724784519295473e-05, + "loss": 2.1956, + "step": 7651 + }, + { + "epoch": 0.25, + "grad_norm": 0.6971594095230103, + "learning_rate": 1.7247112844698334e-05, + "loss": 2.158, + "step": 7652 + }, + { + "epoch": 0.25, + "grad_norm": 0.6983869075775146, + "learning_rate": 1.7246380414568296e-05, + "loss": 2.1477, + "step": 7653 + }, + { + "epoch": 0.25, + "grad_norm": 0.7416946887969971, + "learning_rate": 1.7245647902572903e-05, + "loss": 2.1712, + "step": 7654 + }, + { + "epoch": 0.25, + "grad_norm": 0.6916167736053467, + "learning_rate": 1.7244915308720425e-05, + "loss": 2.1513, + "step": 7655 + }, + { + "epoch": 0.25, + "grad_norm": 0.7251725196838379, + "learning_rate": 1.724418263301914e-05, + "loss": 2.182, + "step": 7656 + }, + { + "epoch": 0.25, + "grad_norm": 0.7102228403091431, + "learning_rate": 1.7243449875477326e-05, + "loss": 2.1498, + "step": 7657 + }, + { + "epoch": 0.25, + "grad_norm": 0.7427058219909668, + "learning_rate": 1.724271703610326e-05, + "loss": 2.1184, + "step": 7658 + }, + { + "epoch": 0.25, + "grad_norm": 0.7271700501441956, + "learning_rate": 1.724198411490522e-05, + "loss": 2.0709, + "step": 7659 + }, + { + "epoch": 0.25, + "grad_norm": 0.7278515696525574, + "learning_rate": 1.724125111189149e-05, + "loss": 2.1955, + "step": 7660 + }, + { + "epoch": 0.25, + "grad_norm": 0.7055908441543579, + "learning_rate": 1.7240518027070348e-05, + "loss": 2.1924, + "step": 7661 + }, + { + "epoch": 0.25, + "grad_norm": 0.6929784417152405, + "learning_rate": 1.7239784860450078e-05, + "loss": 2.1776, + "step": 7662 + }, + { + "epoch": 0.25, + "grad_norm": 0.7270942330360413, + "learning_rate": 1.723905161203896e-05, + "loss": 2.1704, + "step": 7663 + }, + { + "epoch": 0.25, + "grad_norm": 0.7108985185623169, + "learning_rate": 1.7238318281845277e-05, + "loss": 2.0996, + "step": 7664 + }, + { + "epoch": 0.26, + "grad_norm": 0.7238744497299194, + "learning_rate": 1.723758486987732e-05, + "loss": 2.184, + "step": 7665 + }, + { + "epoch": 0.26, + "grad_norm": 0.7307531237602234, + "learning_rate": 1.7236851376143368e-05, + "loss": 2.1922, + "step": 7666 + }, + { + "epoch": 0.26, + "grad_norm": 0.7151849269866943, + "learning_rate": 1.7236117800651712e-05, + "loss": 2.2465, + "step": 7667 + }, + { + "epoch": 0.26, + "grad_norm": 0.7142261266708374, + "learning_rate": 1.7235384143410637e-05, + "loss": 2.1448, + "step": 7668 + }, + { + "epoch": 0.26, + "grad_norm": 0.689373254776001, + "learning_rate": 1.7234650404428433e-05, + "loss": 2.1667, + "step": 7669 + }, + { + "epoch": 0.26, + "grad_norm": 0.7123984098434448, + "learning_rate": 1.7233916583713387e-05, + "loss": 2.1923, + "step": 7670 + }, + { + "epoch": 0.26, + "grad_norm": 0.7342566251754761, + "learning_rate": 1.7233182681273794e-05, + "loss": 2.1017, + "step": 7671 + }, + { + "epoch": 0.26, + "grad_norm": 0.733862578868866, + "learning_rate": 1.723244869711794e-05, + "loss": 2.1822, + "step": 7672 + }, + { + "epoch": 0.26, + "grad_norm": 0.7084027528762817, + "learning_rate": 1.723171463125412e-05, + "loss": 2.1368, + "step": 7673 + }, + { + "epoch": 0.26, + "grad_norm": 0.6860386729240417, + "learning_rate": 1.7230980483690626e-05, + "loss": 2.1514, + "step": 7674 + }, + { + "epoch": 0.26, + "grad_norm": 0.6763572692871094, + "learning_rate": 1.7230246254435754e-05, + "loss": 2.1408, + "step": 7675 + }, + { + "epoch": 0.26, + "grad_norm": 0.7044961452484131, + "learning_rate": 1.7229511943497794e-05, + "loss": 2.1054, + "step": 7676 + }, + { + "epoch": 0.26, + "grad_norm": 0.7081761360168457, + "learning_rate": 1.722877755088505e-05, + "loss": 2.1304, + "step": 7677 + }, + { + "epoch": 0.26, + "grad_norm": 0.6801961064338684, + "learning_rate": 1.7228043076605808e-05, + "loss": 2.1542, + "step": 7678 + }, + { + "epoch": 0.26, + "grad_norm": 0.7183455228805542, + "learning_rate": 1.7227308520668373e-05, + "loss": 2.1424, + "step": 7679 + }, + { + "epoch": 0.26, + "grad_norm": 0.7013319730758667, + "learning_rate": 1.7226573883081045e-05, + "loss": 2.2342, + "step": 7680 + }, + { + "epoch": 0.26, + "grad_norm": 0.7163876891136169, + "learning_rate": 1.7225839163852117e-05, + "loss": 2.178, + "step": 7681 + }, + { + "epoch": 0.26, + "grad_norm": 0.7153096199035645, + "learning_rate": 1.7225104362989894e-05, + "loss": 2.1031, + "step": 7682 + }, + { + "epoch": 0.26, + "grad_norm": 0.6976253986358643, + "learning_rate": 1.7224369480502674e-05, + "loss": 2.1376, + "step": 7683 + }, + { + "epoch": 0.26, + "grad_norm": 0.68953537940979, + "learning_rate": 1.7223634516398764e-05, + "loss": 2.1572, + "step": 7684 + }, + { + "epoch": 0.26, + "grad_norm": 0.7066412568092346, + "learning_rate": 1.7222899470686466e-05, + "loss": 2.1801, + "step": 7685 + }, + { + "epoch": 0.26, + "grad_norm": 0.7250317931175232, + "learning_rate": 1.722216434337408e-05, + "loss": 2.144, + "step": 7686 + }, + { + "epoch": 0.26, + "grad_norm": 0.7483562231063843, + "learning_rate": 1.7221429134469913e-05, + "loss": 2.176, + "step": 7687 + }, + { + "epoch": 0.26, + "grad_norm": 0.7701245546340942, + "learning_rate": 1.7220693843982277e-05, + "loss": 2.1539, + "step": 7688 + }, + { + "epoch": 0.26, + "grad_norm": 0.6969650983810425, + "learning_rate": 1.721995847191947e-05, + "loss": 2.1079, + "step": 7689 + }, + { + "epoch": 0.26, + "grad_norm": 0.7545410990715027, + "learning_rate": 1.7219223018289802e-05, + "loss": 2.2084, + "step": 7690 + }, + { + "epoch": 0.26, + "grad_norm": 0.7547464370727539, + "learning_rate": 1.7218487483101588e-05, + "loss": 2.1646, + "step": 7691 + }, + { + "epoch": 0.26, + "grad_norm": 0.7234165072441101, + "learning_rate": 1.721775186636313e-05, + "loss": 2.1615, + "step": 7692 + }, + { + "epoch": 0.26, + "grad_norm": 0.7450416684150696, + "learning_rate": 1.721701616808274e-05, + "loss": 2.1504, + "step": 7693 + }, + { + "epoch": 0.26, + "grad_norm": 0.7488062381744385, + "learning_rate": 1.7216280388268734e-05, + "loss": 2.2552, + "step": 7694 + }, + { + "epoch": 0.26, + "grad_norm": 0.6949632167816162, + "learning_rate": 1.7215544526929417e-05, + "loss": 2.1116, + "step": 7695 + }, + { + "epoch": 0.26, + "grad_norm": 0.7128508687019348, + "learning_rate": 1.7214808584073112e-05, + "loss": 2.1198, + "step": 7696 + }, + { + "epoch": 0.26, + "grad_norm": 0.7132129669189453, + "learning_rate": 1.7214072559708125e-05, + "loss": 2.2174, + "step": 7697 + }, + { + "epoch": 0.26, + "grad_norm": 0.6808035373687744, + "learning_rate": 1.7213336453842772e-05, + "loss": 2.1415, + "step": 7698 + }, + { + "epoch": 0.26, + "grad_norm": 0.7169152498245239, + "learning_rate": 1.7212600266485376e-05, + "loss": 2.1041, + "step": 7699 + }, + { + "epoch": 0.26, + "grad_norm": 0.7282432913780212, + "learning_rate": 1.7211863997644247e-05, + "loss": 2.1199, + "step": 7700 + }, + { + "epoch": 0.26, + "grad_norm": 0.7323275208473206, + "learning_rate": 1.7211127647327704e-05, + "loss": 2.1146, + "step": 7701 + }, + { + "epoch": 0.26, + "grad_norm": 0.7298784852027893, + "learning_rate": 1.721039121554407e-05, + "loss": 2.2151, + "step": 7702 + }, + { + "epoch": 0.26, + "grad_norm": 0.7169885635375977, + "learning_rate": 1.7209654702301657e-05, + "loss": 2.1455, + "step": 7703 + }, + { + "epoch": 0.26, + "grad_norm": 0.705639123916626, + "learning_rate": 1.7208918107608793e-05, + "loss": 2.1673, + "step": 7704 + }, + { + "epoch": 0.26, + "grad_norm": 0.7212986946105957, + "learning_rate": 1.72081814314738e-05, + "loss": 2.1698, + "step": 7705 + }, + { + "epoch": 0.26, + "grad_norm": 0.7285521626472473, + "learning_rate": 1.720744467390499e-05, + "loss": 2.1519, + "step": 7706 + }, + { + "epoch": 0.26, + "grad_norm": 0.7345906496047974, + "learning_rate": 1.72067078349107e-05, + "loss": 2.1381, + "step": 7707 + }, + { + "epoch": 0.26, + "grad_norm": 0.7159486413002014, + "learning_rate": 1.720597091449925e-05, + "loss": 2.102, + "step": 7708 + }, + { + "epoch": 0.26, + "grad_norm": 0.760722815990448, + "learning_rate": 1.720523391267896e-05, + "loss": 2.1874, + "step": 7709 + }, + { + "epoch": 0.26, + "grad_norm": 0.6814525723457336, + "learning_rate": 1.7204496829458162e-05, + "loss": 2.1426, + "step": 7710 + }, + { + "epoch": 0.26, + "grad_norm": 0.701771080493927, + "learning_rate": 1.720375966484518e-05, + "loss": 2.0886, + "step": 7711 + }, + { + "epoch": 0.26, + "grad_norm": 0.7392334342002869, + "learning_rate": 1.7203022418848344e-05, + "loss": 2.1523, + "step": 7712 + }, + { + "epoch": 0.26, + "grad_norm": 0.7213826179504395, + "learning_rate": 1.720228509147598e-05, + "loss": 2.1514, + "step": 7713 + }, + { + "epoch": 0.26, + "grad_norm": 0.7078513503074646, + "learning_rate": 1.7201547682736423e-05, + "loss": 2.1591, + "step": 7714 + }, + { + "epoch": 0.26, + "grad_norm": 0.7237377762794495, + "learning_rate": 1.7200810192637996e-05, + "loss": 2.0758, + "step": 7715 + }, + { + "epoch": 0.26, + "grad_norm": 0.7191662788391113, + "learning_rate": 1.720007262118904e-05, + "loss": 2.184, + "step": 7716 + }, + { + "epoch": 0.26, + "grad_norm": 0.6949969530105591, + "learning_rate": 1.7199334968397877e-05, + "loss": 2.1662, + "step": 7717 + }, + { + "epoch": 0.26, + "grad_norm": 0.7313836216926575, + "learning_rate": 1.7198597234272854e-05, + "loss": 2.0782, + "step": 7718 + }, + { + "epoch": 0.26, + "grad_norm": 0.7024040818214417, + "learning_rate": 1.7197859418822296e-05, + "loss": 2.1085, + "step": 7719 + }, + { + "epoch": 0.26, + "grad_norm": 0.6957772374153137, + "learning_rate": 1.719712152205454e-05, + "loss": 2.1578, + "step": 7720 + }, + { + "epoch": 0.26, + "grad_norm": 0.6967777013778687, + "learning_rate": 1.7196383543977925e-05, + "loss": 2.1456, + "step": 7721 + }, + { + "epoch": 0.26, + "grad_norm": 0.7206192016601562, + "learning_rate": 1.7195645484600785e-05, + "loss": 2.1539, + "step": 7722 + }, + { + "epoch": 0.26, + "grad_norm": 0.688813328742981, + "learning_rate": 1.7194907343931456e-05, + "loss": 2.2106, + "step": 7723 + }, + { + "epoch": 0.26, + "grad_norm": 0.7512519955635071, + "learning_rate": 1.7194169121978285e-05, + "loss": 2.1557, + "step": 7724 + }, + { + "epoch": 0.26, + "grad_norm": 0.765169620513916, + "learning_rate": 1.7193430818749605e-05, + "loss": 2.1333, + "step": 7725 + }, + { + "epoch": 0.26, + "grad_norm": 0.7524273991584778, + "learning_rate": 1.7192692434253762e-05, + "loss": 2.0843, + "step": 7726 + }, + { + "epoch": 0.26, + "grad_norm": 0.6931421756744385, + "learning_rate": 1.719195396849909e-05, + "loss": 2.1318, + "step": 7727 + }, + { + "epoch": 0.26, + "grad_norm": 0.6800587773323059, + "learning_rate": 1.7191215421493942e-05, + "loss": 2.1586, + "step": 7728 + }, + { + "epoch": 0.26, + "grad_norm": 0.7171177268028259, + "learning_rate": 1.7190476793246655e-05, + "loss": 2.1729, + "step": 7729 + }, + { + "epoch": 0.26, + "grad_norm": 0.7246927618980408, + "learning_rate": 1.7189738083765575e-05, + "loss": 2.1476, + "step": 7730 + }, + { + "epoch": 0.26, + "grad_norm": 0.7056196928024292, + "learning_rate": 1.718899929305905e-05, + "loss": 2.1996, + "step": 7731 + }, + { + "epoch": 0.26, + "grad_norm": 0.7401746511459351, + "learning_rate": 1.718826042113542e-05, + "loss": 2.227, + "step": 7732 + }, + { + "epoch": 0.26, + "grad_norm": 0.716092050075531, + "learning_rate": 1.718752146800304e-05, + "loss": 2.127, + "step": 7733 + }, + { + "epoch": 0.26, + "grad_norm": 0.7027502655982971, + "learning_rate": 1.7186782433670254e-05, + "loss": 2.2089, + "step": 7734 + }, + { + "epoch": 0.26, + "grad_norm": 0.7207968831062317, + "learning_rate": 1.7186043318145408e-05, + "loss": 2.1098, + "step": 7735 + }, + { + "epoch": 0.26, + "grad_norm": 0.7254105806350708, + "learning_rate": 1.718530412143686e-05, + "loss": 2.1304, + "step": 7736 + }, + { + "epoch": 0.26, + "grad_norm": 0.7859221696853638, + "learning_rate": 1.7184564843552956e-05, + "loss": 2.1023, + "step": 7737 + }, + { + "epoch": 0.26, + "grad_norm": 0.7056150436401367, + "learning_rate": 1.718382548450205e-05, + "loss": 2.1338, + "step": 7738 + }, + { + "epoch": 0.26, + "grad_norm": 0.6863545775413513, + "learning_rate": 1.7183086044292495e-05, + "loss": 2.101, + "step": 7739 + }, + { + "epoch": 0.26, + "grad_norm": 0.713303804397583, + "learning_rate": 1.718234652293264e-05, + "loss": 2.0906, + "step": 7740 + }, + { + "epoch": 0.26, + "grad_norm": 0.7170447111129761, + "learning_rate": 1.7181606920430844e-05, + "loss": 2.1679, + "step": 7741 + }, + { + "epoch": 0.26, + "grad_norm": 0.6951106190681458, + "learning_rate": 1.7180867236795463e-05, + "loss": 2.166, + "step": 7742 + }, + { + "epoch": 0.26, + "grad_norm": 0.7681471109390259, + "learning_rate": 1.7180127472034852e-05, + "loss": 2.1185, + "step": 7743 + }, + { + "epoch": 0.26, + "grad_norm": 0.6858208775520325, + "learning_rate": 1.717938762615737e-05, + "loss": 2.1199, + "step": 7744 + }, + { + "epoch": 0.26, + "grad_norm": 0.7252985239028931, + "learning_rate": 1.7178647699171373e-05, + "loss": 2.2203, + "step": 7745 + }, + { + "epoch": 0.26, + "grad_norm": 0.7053428888320923, + "learning_rate": 1.7177907691085223e-05, + "loss": 2.1813, + "step": 7746 + }, + { + "epoch": 0.26, + "grad_norm": 0.7169435620307922, + "learning_rate": 1.7177167601907276e-05, + "loss": 2.1588, + "step": 7747 + }, + { + "epoch": 0.26, + "grad_norm": 0.676417350769043, + "learning_rate": 1.7176427431645897e-05, + "loss": 2.1057, + "step": 7748 + }, + { + "epoch": 0.26, + "grad_norm": 0.7188929319381714, + "learning_rate": 1.7175687180309445e-05, + "loss": 2.1825, + "step": 7749 + }, + { + "epoch": 0.26, + "grad_norm": 0.7432684898376465, + "learning_rate": 1.7174946847906285e-05, + "loss": 2.1339, + "step": 7750 + }, + { + "epoch": 0.26, + "grad_norm": 0.6986342668533325, + "learning_rate": 1.7174206434444783e-05, + "loss": 2.1792, + "step": 7751 + }, + { + "epoch": 0.26, + "grad_norm": 0.693280816078186, + "learning_rate": 1.71734659399333e-05, + "loss": 2.121, + "step": 7752 + }, + { + "epoch": 0.26, + "grad_norm": 0.7147374749183655, + "learning_rate": 1.7172725364380202e-05, + "loss": 2.1187, + "step": 7753 + }, + { + "epoch": 0.26, + "grad_norm": 0.6885616779327393, + "learning_rate": 1.7171984707793857e-05, + "loss": 2.2069, + "step": 7754 + }, + { + "epoch": 0.26, + "grad_norm": 0.7237285375595093, + "learning_rate": 1.7171243970182634e-05, + "loss": 2.1614, + "step": 7755 + }, + { + "epoch": 0.26, + "grad_norm": 0.7343361973762512, + "learning_rate": 1.71705031515549e-05, + "loss": 2.1448, + "step": 7756 + }, + { + "epoch": 0.26, + "grad_norm": 0.7518948316574097, + "learning_rate": 1.7169762251919015e-05, + "loss": 2.1604, + "step": 7757 + }, + { + "epoch": 0.26, + "grad_norm": 0.7037867903709412, + "learning_rate": 1.7169021271283367e-05, + "loss": 2.1536, + "step": 7758 + }, + { + "epoch": 0.26, + "grad_norm": 0.7368143796920776, + "learning_rate": 1.7168280209656313e-05, + "loss": 2.2027, + "step": 7759 + }, + { + "epoch": 0.26, + "grad_norm": 0.7020929455757141, + "learning_rate": 1.7167539067046233e-05, + "loss": 2.1053, + "step": 7760 + }, + { + "epoch": 0.26, + "grad_norm": 0.7318944931030273, + "learning_rate": 1.7166797843461495e-05, + "loss": 2.2475, + "step": 7761 + }, + { + "epoch": 0.26, + "grad_norm": 0.7208459377288818, + "learning_rate": 1.716605653891048e-05, + "loss": 2.1559, + "step": 7762 + }, + { + "epoch": 0.26, + "grad_norm": 0.686706006526947, + "learning_rate": 1.7165315153401554e-05, + "loss": 2.0947, + "step": 7763 + }, + { + "epoch": 0.26, + "grad_norm": 0.702031672000885, + "learning_rate": 1.7164573686943095e-05, + "loss": 2.1466, + "step": 7764 + }, + { + "epoch": 0.26, + "grad_norm": 0.7381224036216736, + "learning_rate": 1.7163832139543485e-05, + "loss": 2.1821, + "step": 7765 + }, + { + "epoch": 0.26, + "grad_norm": 0.7788733243942261, + "learning_rate": 1.7163090511211097e-05, + "loss": 2.1166, + "step": 7766 + }, + { + "epoch": 0.26, + "grad_norm": 0.6938040852546692, + "learning_rate": 1.716234880195431e-05, + "loss": 2.1858, + "step": 7767 + }, + { + "epoch": 0.26, + "grad_norm": 0.6925522685050964, + "learning_rate": 1.7161607011781504e-05, + "loss": 2.0945, + "step": 7768 + }, + { + "epoch": 0.26, + "grad_norm": 0.7058216333389282, + "learning_rate": 1.716086514070106e-05, + "loss": 2.2084, + "step": 7769 + }, + { + "epoch": 0.26, + "grad_norm": 0.6886893510818481, + "learning_rate": 1.7160123188721355e-05, + "loss": 2.1679, + "step": 7770 + }, + { + "epoch": 0.26, + "grad_norm": 0.7198439836502075, + "learning_rate": 1.7159381155850778e-05, + "loss": 2.2192, + "step": 7771 + }, + { + "epoch": 0.26, + "grad_norm": 0.6899675130844116, + "learning_rate": 1.7158639042097706e-05, + "loss": 2.1193, + "step": 7772 + }, + { + "epoch": 0.26, + "grad_norm": 0.6987216472625732, + "learning_rate": 1.7157896847470527e-05, + "loss": 2.097, + "step": 7773 + }, + { + "epoch": 0.26, + "grad_norm": 0.7111876606941223, + "learning_rate": 1.7157154571977622e-05, + "loss": 2.1797, + "step": 7774 + }, + { + "epoch": 0.26, + "grad_norm": 0.718830943107605, + "learning_rate": 1.7156412215627382e-05, + "loss": 2.1111, + "step": 7775 + }, + { + "epoch": 0.26, + "grad_norm": 0.6891503930091858, + "learning_rate": 1.7155669778428192e-05, + "loss": 2.1771, + "step": 7776 + }, + { + "epoch": 0.26, + "grad_norm": 0.6862533092498779, + "learning_rate": 1.7154927260388436e-05, + "loss": 2.1374, + "step": 7777 + }, + { + "epoch": 0.26, + "grad_norm": 0.7214975357055664, + "learning_rate": 1.7154184661516505e-05, + "loss": 2.1597, + "step": 7778 + }, + { + "epoch": 0.26, + "grad_norm": 0.7311958074569702, + "learning_rate": 1.7153441981820788e-05, + "loss": 2.1588, + "step": 7779 + }, + { + "epoch": 0.26, + "grad_norm": 0.7298711538314819, + "learning_rate": 1.715269922130968e-05, + "loss": 2.2677, + "step": 7780 + }, + { + "epoch": 0.26, + "grad_norm": 0.7290353178977966, + "learning_rate": 1.7151956379991564e-05, + "loss": 2.1714, + "step": 7781 + }, + { + "epoch": 0.26, + "grad_norm": 0.7301397323608398, + "learning_rate": 1.7151213457874835e-05, + "loss": 2.1664, + "step": 7782 + }, + { + "epoch": 0.26, + "grad_norm": 0.6944224238395691, + "learning_rate": 1.715047045496789e-05, + "loss": 2.1812, + "step": 7783 + }, + { + "epoch": 0.26, + "grad_norm": 0.72264164686203, + "learning_rate": 1.714972737127912e-05, + "loss": 2.1993, + "step": 7784 + }, + { + "epoch": 0.26, + "grad_norm": 0.7018815875053406, + "learning_rate": 1.7148984206816922e-05, + "loss": 2.1408, + "step": 7785 + }, + { + "epoch": 0.26, + "grad_norm": 0.7239283919334412, + "learning_rate": 1.7148240961589687e-05, + "loss": 2.1771, + "step": 7786 + }, + { + "epoch": 0.26, + "grad_norm": 0.7128854990005493, + "learning_rate": 1.7147497635605815e-05, + "loss": 2.0971, + "step": 7787 + }, + { + "epoch": 0.26, + "grad_norm": 0.7174303531646729, + "learning_rate": 1.7146754228873704e-05, + "loss": 2.1276, + "step": 7788 + }, + { + "epoch": 0.26, + "grad_norm": 0.7037590146064758, + "learning_rate": 1.7146010741401754e-05, + "loss": 2.1133, + "step": 7789 + }, + { + "epoch": 0.26, + "grad_norm": 0.704841673374176, + "learning_rate": 1.7145267173198363e-05, + "loss": 2.2071, + "step": 7790 + }, + { + "epoch": 0.26, + "grad_norm": 0.7176227569580078, + "learning_rate": 1.714452352427193e-05, + "loss": 2.1578, + "step": 7791 + }, + { + "epoch": 0.26, + "grad_norm": 0.7243119478225708, + "learning_rate": 1.7143779794630857e-05, + "loss": 2.1628, + "step": 7792 + }, + { + "epoch": 0.26, + "grad_norm": 0.6822180151939392, + "learning_rate": 1.7143035984283544e-05, + "loss": 2.1132, + "step": 7793 + }, + { + "epoch": 0.26, + "grad_norm": 0.692240297794342, + "learning_rate": 1.71422920932384e-05, + "loss": 2.1718, + "step": 7794 + }, + { + "epoch": 0.26, + "grad_norm": 0.6943684816360474, + "learning_rate": 1.7141548121503823e-05, + "loss": 2.1393, + "step": 7795 + }, + { + "epoch": 0.26, + "grad_norm": 0.716433048248291, + "learning_rate": 1.7140804069088223e-05, + "loss": 2.1996, + "step": 7796 + }, + { + "epoch": 0.26, + "grad_norm": 0.6924384236335754, + "learning_rate": 1.7140059936000002e-05, + "loss": 2.1481, + "step": 7797 + }, + { + "epoch": 0.26, + "grad_norm": 0.7412042021751404, + "learning_rate": 1.713931572224757e-05, + "loss": 2.1127, + "step": 7798 + }, + { + "epoch": 0.26, + "grad_norm": 0.725852906703949, + "learning_rate": 1.7138571427839333e-05, + "loss": 2.1532, + "step": 7799 + }, + { + "epoch": 0.26, + "grad_norm": 0.7315605282783508, + "learning_rate": 1.71378270527837e-05, + "loss": 2.1784, + "step": 7800 + }, + { + "epoch": 0.26, + "grad_norm": 0.7074808478355408, + "learning_rate": 1.713708259708908e-05, + "loss": 2.1471, + "step": 7801 + }, + { + "epoch": 0.26, + "grad_norm": 0.6726783514022827, + "learning_rate": 1.713633806076388e-05, + "loss": 2.1611, + "step": 7802 + }, + { + "epoch": 0.26, + "grad_norm": 0.6805378198623657, + "learning_rate": 1.713559344381652e-05, + "loss": 2.1342, + "step": 7803 + }, + { + "epoch": 0.26, + "grad_norm": 0.7407160401344299, + "learning_rate": 1.7134848746255405e-05, + "loss": 2.1911, + "step": 7804 + }, + { + "epoch": 0.26, + "grad_norm": 0.7225199937820435, + "learning_rate": 1.713410396808895e-05, + "loss": 2.1709, + "step": 7805 + }, + { + "epoch": 0.26, + "grad_norm": 0.7017956376075745, + "learning_rate": 1.713335910932557e-05, + "loss": 2.1566, + "step": 7806 + }, + { + "epoch": 0.26, + "grad_norm": 0.729587197303772, + "learning_rate": 1.713261416997368e-05, + "loss": 2.1785, + "step": 7807 + }, + { + "epoch": 0.26, + "grad_norm": 0.6951969861984253, + "learning_rate": 1.7131869150041695e-05, + "loss": 2.1225, + "step": 7808 + }, + { + "epoch": 0.26, + "grad_norm": 0.7271116971969604, + "learning_rate": 1.713112404953803e-05, + "loss": 2.1011, + "step": 7809 + }, + { + "epoch": 0.26, + "grad_norm": 0.7424437999725342, + "learning_rate": 1.7130378868471105e-05, + "loss": 2.1272, + "step": 7810 + }, + { + "epoch": 0.26, + "grad_norm": 0.7134390473365784, + "learning_rate": 1.7129633606849338e-05, + "loss": 2.1633, + "step": 7811 + }, + { + "epoch": 0.26, + "grad_norm": 0.7044578790664673, + "learning_rate": 1.712888826468115e-05, + "loss": 2.156, + "step": 7812 + }, + { + "epoch": 0.26, + "grad_norm": 0.711760938167572, + "learning_rate": 1.712814284197496e-05, + "loss": 2.1559, + "step": 7813 + }, + { + "epoch": 0.26, + "grad_norm": 0.7074726819992065, + "learning_rate": 1.7127397338739192e-05, + "loss": 2.1713, + "step": 7814 + }, + { + "epoch": 0.26, + "grad_norm": 0.7287285327911377, + "learning_rate": 1.712665175498226e-05, + "loss": 2.1091, + "step": 7815 + }, + { + "epoch": 0.26, + "grad_norm": 0.6875952482223511, + "learning_rate": 1.71259060907126e-05, + "loss": 2.1086, + "step": 7816 + }, + { + "epoch": 0.26, + "grad_norm": 0.7718198895454407, + "learning_rate": 1.7125160345938624e-05, + "loss": 2.1822, + "step": 7817 + }, + { + "epoch": 0.26, + "grad_norm": 0.7299543619155884, + "learning_rate": 1.7124414520668767e-05, + "loss": 2.2307, + "step": 7818 + }, + { + "epoch": 0.26, + "grad_norm": 0.6945135593414307, + "learning_rate": 1.7123668614911445e-05, + "loss": 2.1438, + "step": 7819 + }, + { + "epoch": 0.26, + "grad_norm": 0.7080686688423157, + "learning_rate": 1.7122922628675092e-05, + "loss": 2.1498, + "step": 7820 + }, + { + "epoch": 0.26, + "grad_norm": 0.74290931224823, + "learning_rate": 1.7122176561968133e-05, + "loss": 2.1104, + "step": 7821 + }, + { + "epoch": 0.26, + "grad_norm": 0.7897998094558716, + "learning_rate": 1.7121430414799e-05, + "loss": 2.2249, + "step": 7822 + }, + { + "epoch": 0.26, + "grad_norm": 0.7258075475692749, + "learning_rate": 1.7120684187176117e-05, + "loss": 2.1421, + "step": 7823 + }, + { + "epoch": 0.26, + "grad_norm": 0.698527991771698, + "learning_rate": 1.711993787910792e-05, + "loss": 2.1741, + "step": 7824 + }, + { + "epoch": 0.26, + "grad_norm": 0.6947150826454163, + "learning_rate": 1.7119191490602834e-05, + "loss": 2.073, + "step": 7825 + }, + { + "epoch": 0.26, + "grad_norm": 0.7102426290512085, + "learning_rate": 1.7118445021669297e-05, + "loss": 2.1823, + "step": 7826 + }, + { + "epoch": 0.26, + "grad_norm": 0.740727424621582, + "learning_rate": 1.7117698472315737e-05, + "loss": 2.1667, + "step": 7827 + }, + { + "epoch": 0.26, + "grad_norm": 0.699183464050293, + "learning_rate": 1.7116951842550596e-05, + "loss": 2.0765, + "step": 7828 + }, + { + "epoch": 0.26, + "grad_norm": 0.6834732294082642, + "learning_rate": 1.7116205132382302e-05, + "loss": 2.1058, + "step": 7829 + }, + { + "epoch": 0.26, + "grad_norm": 0.7036998271942139, + "learning_rate": 1.711545834181929e-05, + "loss": 2.1511, + "step": 7830 + }, + { + "epoch": 0.26, + "grad_norm": 0.7054764628410339, + "learning_rate": 1.7114711470870004e-05, + "loss": 2.1169, + "step": 7831 + }, + { + "epoch": 0.26, + "grad_norm": 0.7175148129463196, + "learning_rate": 1.7113964519542875e-05, + "loss": 2.1476, + "step": 7832 + }, + { + "epoch": 0.26, + "grad_norm": 0.7632777094841003, + "learning_rate": 1.7113217487846343e-05, + "loss": 2.1509, + "step": 7833 + }, + { + "epoch": 0.26, + "grad_norm": 0.7094358205795288, + "learning_rate": 1.711247037578885e-05, + "loss": 2.1711, + "step": 7834 + }, + { + "epoch": 0.26, + "grad_norm": 0.7338247299194336, + "learning_rate": 1.7111723183378835e-05, + "loss": 2.1128, + "step": 7835 + }, + { + "epoch": 0.26, + "grad_norm": 0.7362343072891235, + "learning_rate": 1.711097591062474e-05, + "loss": 2.1349, + "step": 7836 + }, + { + "epoch": 0.26, + "grad_norm": 0.7271143794059753, + "learning_rate": 1.7110228557535007e-05, + "loss": 2.2233, + "step": 7837 + }, + { + "epoch": 0.26, + "grad_norm": 0.6997426748275757, + "learning_rate": 1.7109481124118076e-05, + "loss": 2.1398, + "step": 7838 + }, + { + "epoch": 0.26, + "grad_norm": 0.7205355167388916, + "learning_rate": 1.71087336103824e-05, + "loss": 2.1654, + "step": 7839 + }, + { + "epoch": 0.26, + "grad_norm": 0.7339781522750854, + "learning_rate": 1.710798601633641e-05, + "loss": 2.1909, + "step": 7840 + }, + { + "epoch": 0.26, + "grad_norm": 0.6762281060218811, + "learning_rate": 1.7107238341988565e-05, + "loss": 2.1575, + "step": 7841 + }, + { + "epoch": 0.26, + "grad_norm": 0.7162430882453918, + "learning_rate": 1.71064905873473e-05, + "loss": 2.2021, + "step": 7842 + }, + { + "epoch": 0.26, + "grad_norm": 0.704414963722229, + "learning_rate": 1.7105742752421077e-05, + "loss": 2.1754, + "step": 7843 + }, + { + "epoch": 0.26, + "grad_norm": 0.7161237001419067, + "learning_rate": 1.7104994837218332e-05, + "loss": 2.1312, + "step": 7844 + }, + { + "epoch": 0.26, + "grad_norm": 0.7203507423400879, + "learning_rate": 1.7104246841747523e-05, + "loss": 2.143, + "step": 7845 + }, + { + "epoch": 0.26, + "grad_norm": 0.743654191493988, + "learning_rate": 1.7103498766017096e-05, + "loss": 2.2259, + "step": 7846 + }, + { + "epoch": 0.26, + "grad_norm": 0.7108768224716187, + "learning_rate": 1.71027506100355e-05, + "loss": 2.1748, + "step": 7847 + }, + { + "epoch": 0.26, + "grad_norm": 0.6944230794906616, + "learning_rate": 1.7102002373811193e-05, + "loss": 2.1621, + "step": 7848 + }, + { + "epoch": 0.26, + "grad_norm": 0.7488688230514526, + "learning_rate": 1.710125405735262e-05, + "loss": 2.158, + "step": 7849 + }, + { + "epoch": 0.26, + "grad_norm": 0.7385022640228271, + "learning_rate": 1.7100505660668244e-05, + "loss": 2.288, + "step": 7850 + }, + { + "epoch": 0.26, + "grad_norm": 0.7253884673118591, + "learning_rate": 1.709975718376652e-05, + "loss": 2.2102, + "step": 7851 + }, + { + "epoch": 0.26, + "grad_norm": 0.7080777883529663, + "learning_rate": 1.7099008626655895e-05, + "loss": 2.143, + "step": 7852 + }, + { + "epoch": 0.26, + "grad_norm": 0.7025786638259888, + "learning_rate": 1.7098259989344833e-05, + "loss": 2.1763, + "step": 7853 + }, + { + "epoch": 0.26, + "grad_norm": 0.687380850315094, + "learning_rate": 1.709751127184179e-05, + "loss": 2.1124, + "step": 7854 + }, + { + "epoch": 0.26, + "grad_norm": 0.7233151197433472, + "learning_rate": 1.7096762474155225e-05, + "loss": 2.1529, + "step": 7855 + }, + { + "epoch": 0.26, + "grad_norm": 0.7254893779754639, + "learning_rate": 1.7096013596293596e-05, + "loss": 2.0678, + "step": 7856 + }, + { + "epoch": 0.26, + "grad_norm": 0.8375031352043152, + "learning_rate": 1.7095264638265364e-05, + "loss": 2.2048, + "step": 7857 + }, + { + "epoch": 0.26, + "grad_norm": 0.7308005094528198, + "learning_rate": 1.709451560007899e-05, + "loss": 2.0957, + "step": 7858 + }, + { + "epoch": 0.26, + "grad_norm": 0.6927459836006165, + "learning_rate": 1.7093766481742934e-05, + "loss": 2.1568, + "step": 7859 + }, + { + "epoch": 0.26, + "grad_norm": 0.7318151593208313, + "learning_rate": 1.7093017283265667e-05, + "loss": 2.1217, + "step": 7860 + }, + { + "epoch": 0.26, + "grad_norm": 0.7318304777145386, + "learning_rate": 1.7092268004655644e-05, + "loss": 2.1766, + "step": 7861 + }, + { + "epoch": 0.26, + "grad_norm": 0.7013775706291199, + "learning_rate": 1.7091518645921335e-05, + "loss": 2.2367, + "step": 7862 + }, + { + "epoch": 0.26, + "grad_norm": 0.7081071138381958, + "learning_rate": 1.70907692070712e-05, + "loss": 2.1679, + "step": 7863 + }, + { + "epoch": 0.26, + "grad_norm": 0.7219730615615845, + "learning_rate": 1.7090019688113716e-05, + "loss": 2.1468, + "step": 7864 + }, + { + "epoch": 0.26, + "grad_norm": 0.7015517354011536, + "learning_rate": 1.7089270089057343e-05, + "loss": 2.176, + "step": 7865 + }, + { + "epoch": 0.26, + "grad_norm": 0.6918462514877319, + "learning_rate": 1.708852040991055e-05, + "loss": 2.1799, + "step": 7866 + }, + { + "epoch": 0.26, + "grad_norm": 0.7144599556922913, + "learning_rate": 1.7087770650681807e-05, + "loss": 2.1685, + "step": 7867 + }, + { + "epoch": 0.26, + "grad_norm": 0.7278997302055359, + "learning_rate": 1.7087020811379588e-05, + "loss": 2.0475, + "step": 7868 + }, + { + "epoch": 0.26, + "grad_norm": 0.6881092190742493, + "learning_rate": 1.708627089201236e-05, + "loss": 2.1629, + "step": 7869 + }, + { + "epoch": 0.26, + "grad_norm": 0.7210370898246765, + "learning_rate": 1.7085520892588597e-05, + "loss": 2.1167, + "step": 7870 + }, + { + "epoch": 0.26, + "grad_norm": 0.7074130177497864, + "learning_rate": 1.708477081311677e-05, + "loss": 2.1643, + "step": 7871 + }, + { + "epoch": 0.26, + "grad_norm": 0.7149900794029236, + "learning_rate": 1.7084020653605353e-05, + "loss": 2.1546, + "step": 7872 + }, + { + "epoch": 0.26, + "grad_norm": 0.7233145833015442, + "learning_rate": 1.7083270414062824e-05, + "loss": 2.1295, + "step": 7873 + }, + { + "epoch": 0.26, + "grad_norm": 0.7299347519874573, + "learning_rate": 1.7082520094497658e-05, + "loss": 2.1517, + "step": 7874 + }, + { + "epoch": 0.26, + "grad_norm": 0.6943715214729309, + "learning_rate": 1.708176969491833e-05, + "loss": 2.1967, + "step": 7875 + }, + { + "epoch": 0.26, + "grad_norm": 0.7194640040397644, + "learning_rate": 1.708101921533332e-05, + "loss": 2.1547, + "step": 7876 + }, + { + "epoch": 0.26, + "grad_norm": 0.732223629951477, + "learning_rate": 1.70802686557511e-05, + "loss": 2.1397, + "step": 7877 + }, + { + "epoch": 0.26, + "grad_norm": 0.7153412699699402, + "learning_rate": 1.707951801618016e-05, + "loss": 2.1529, + "step": 7878 + }, + { + "epoch": 0.26, + "grad_norm": 0.7337096333503723, + "learning_rate": 1.707876729662897e-05, + "loss": 2.1527, + "step": 7879 + }, + { + "epoch": 0.26, + "grad_norm": 0.7013744115829468, + "learning_rate": 1.7078016497106017e-05, + "loss": 2.1296, + "step": 7880 + }, + { + "epoch": 0.26, + "grad_norm": 0.6861643195152283, + "learning_rate": 1.7077265617619783e-05, + "loss": 2.1686, + "step": 7881 + }, + { + "epoch": 0.26, + "grad_norm": 0.7032082676887512, + "learning_rate": 1.707651465817875e-05, + "loss": 2.1257, + "step": 7882 + }, + { + "epoch": 0.26, + "grad_norm": 0.7724486589431763, + "learning_rate": 1.70757636187914e-05, + "loss": 2.1404, + "step": 7883 + }, + { + "epoch": 0.26, + "grad_norm": 0.7003377676010132, + "learning_rate": 1.707501249946622e-05, + "loss": 2.1534, + "step": 7884 + }, + { + "epoch": 0.26, + "grad_norm": 0.7136922478675842, + "learning_rate": 1.7074261300211696e-05, + "loss": 2.0819, + "step": 7885 + }, + { + "epoch": 0.26, + "grad_norm": 0.7247936725616455, + "learning_rate": 1.7073510021036313e-05, + "loss": 2.156, + "step": 7886 + }, + { + "epoch": 0.26, + "grad_norm": 0.689854621887207, + "learning_rate": 1.707275866194856e-05, + "loss": 2.1033, + "step": 7887 + }, + { + "epoch": 0.26, + "grad_norm": 0.7166960835456848, + "learning_rate": 1.7072007222956925e-05, + "loss": 2.1228, + "step": 7888 + }, + { + "epoch": 0.26, + "grad_norm": 0.6709117889404297, + "learning_rate": 1.7071255704069894e-05, + "loss": 2.1364, + "step": 7889 + }, + { + "epoch": 0.26, + "grad_norm": 0.7289060354232788, + "learning_rate": 1.7070504105295963e-05, + "loss": 2.1456, + "step": 7890 + }, + { + "epoch": 0.26, + "grad_norm": 0.7029357552528381, + "learning_rate": 1.706975242664362e-05, + "loss": 2.211, + "step": 7891 + }, + { + "epoch": 0.26, + "grad_norm": 0.7252646088600159, + "learning_rate": 1.7069000668121356e-05, + "loss": 2.0948, + "step": 7892 + }, + { + "epoch": 0.26, + "grad_norm": 0.7137283086776733, + "learning_rate": 1.7068248829737668e-05, + "loss": 2.1006, + "step": 7893 + }, + { + "epoch": 0.26, + "grad_norm": 0.683407723903656, + "learning_rate": 1.7067496911501043e-05, + "loss": 2.1348, + "step": 7894 + }, + { + "epoch": 0.26, + "grad_norm": 0.7210580706596375, + "learning_rate": 1.7066744913419982e-05, + "loss": 2.1142, + "step": 7895 + }, + { + "epoch": 0.26, + "grad_norm": 0.7027649283409119, + "learning_rate": 1.706599283550298e-05, + "loss": 2.1779, + "step": 7896 + }, + { + "epoch": 0.26, + "grad_norm": 0.7076180577278137, + "learning_rate": 1.706524067775853e-05, + "loss": 2.0913, + "step": 7897 + }, + { + "epoch": 0.26, + "grad_norm": 0.755730390548706, + "learning_rate": 1.706448844019513e-05, + "loss": 2.1764, + "step": 7898 + }, + { + "epoch": 0.26, + "grad_norm": 0.6955671906471252, + "learning_rate": 1.7063736122821284e-05, + "loss": 2.1123, + "step": 7899 + }, + { + "epoch": 0.26, + "grad_norm": 0.7206125855445862, + "learning_rate": 1.7062983725645485e-05, + "loss": 2.1599, + "step": 7900 + }, + { + "epoch": 0.26, + "grad_norm": 0.7265791296958923, + "learning_rate": 1.7062231248676234e-05, + "loss": 2.1614, + "step": 7901 + }, + { + "epoch": 0.26, + "grad_norm": 0.7384074926376343, + "learning_rate": 1.7061478691922037e-05, + "loss": 2.1348, + "step": 7902 + }, + { + "epoch": 0.26, + "grad_norm": 0.7313788533210754, + "learning_rate": 1.7060726055391386e-05, + "loss": 2.195, + "step": 7903 + }, + { + "epoch": 0.26, + "grad_norm": 0.725885272026062, + "learning_rate": 1.7059973339092793e-05, + "loss": 2.0983, + "step": 7904 + }, + { + "epoch": 0.26, + "grad_norm": 0.7421061396598816, + "learning_rate": 1.7059220543034763e-05, + "loss": 2.1565, + "step": 7905 + }, + { + "epoch": 0.26, + "grad_norm": 0.7270490527153015, + "learning_rate": 1.7058467667225792e-05, + "loss": 2.1294, + "step": 7906 + }, + { + "epoch": 0.26, + "grad_norm": 0.7013449668884277, + "learning_rate": 1.7057714711674388e-05, + "loss": 2.1907, + "step": 7907 + }, + { + "epoch": 0.26, + "grad_norm": 0.7751885652542114, + "learning_rate": 1.7056961676389062e-05, + "loss": 2.1606, + "step": 7908 + }, + { + "epoch": 0.26, + "grad_norm": 0.6948938369750977, + "learning_rate": 1.705620856137832e-05, + "loss": 2.1428, + "step": 7909 + }, + { + "epoch": 0.26, + "grad_norm": 0.7014685869216919, + "learning_rate": 1.7055455366650666e-05, + "loss": 2.2132, + "step": 7910 + }, + { + "epoch": 0.26, + "grad_norm": 0.7074909806251526, + "learning_rate": 1.7054702092214617e-05, + "loss": 2.1413, + "step": 7911 + }, + { + "epoch": 0.26, + "grad_norm": 0.6974443197250366, + "learning_rate": 1.7053948738078677e-05, + "loss": 2.1535, + "step": 7912 + }, + { + "epoch": 0.26, + "grad_norm": 0.7107587456703186, + "learning_rate": 1.7053195304251352e-05, + "loss": 2.117, + "step": 7913 + }, + { + "epoch": 0.26, + "grad_norm": 0.6895557641983032, + "learning_rate": 1.7052441790741165e-05, + "loss": 2.1151, + "step": 7914 + }, + { + "epoch": 0.26, + "grad_norm": 0.7073776721954346, + "learning_rate": 1.7051688197556627e-05, + "loss": 2.0986, + "step": 7915 + }, + { + "epoch": 0.26, + "grad_norm": 0.7559026479721069, + "learning_rate": 1.7050934524706244e-05, + "loss": 2.142, + "step": 7916 + }, + { + "epoch": 0.26, + "grad_norm": 0.7243779301643372, + "learning_rate": 1.7050180772198535e-05, + "loss": 2.1526, + "step": 7917 + }, + { + "epoch": 0.26, + "grad_norm": 0.7031018733978271, + "learning_rate": 1.7049426940042014e-05, + "loss": 2.1218, + "step": 7918 + }, + { + "epoch": 0.26, + "grad_norm": 0.737893283367157, + "learning_rate": 1.7048673028245202e-05, + "loss": 2.1539, + "step": 7919 + }, + { + "epoch": 0.26, + "grad_norm": 0.7016792297363281, + "learning_rate": 1.7047919036816614e-05, + "loss": 2.1607, + "step": 7920 + }, + { + "epoch": 0.26, + "grad_norm": 0.7344710826873779, + "learning_rate": 1.7047164965764764e-05, + "loss": 2.1762, + "step": 7921 + }, + { + "epoch": 0.26, + "grad_norm": 0.7150170207023621, + "learning_rate": 1.7046410815098176e-05, + "loss": 2.1988, + "step": 7922 + }, + { + "epoch": 0.26, + "grad_norm": 0.7300527095794678, + "learning_rate": 1.704565658482537e-05, + "loss": 2.1416, + "step": 7923 + }, + { + "epoch": 0.26, + "grad_norm": 0.7067297101020813, + "learning_rate": 1.704490227495486e-05, + "loss": 2.1325, + "step": 7924 + }, + { + "epoch": 0.26, + "grad_norm": 0.7377815246582031, + "learning_rate": 1.7044147885495175e-05, + "loss": 2.1759, + "step": 7925 + }, + { + "epoch": 0.26, + "grad_norm": 0.7164495587348938, + "learning_rate": 1.7043393416454836e-05, + "loss": 2.1722, + "step": 7926 + }, + { + "epoch": 0.26, + "grad_norm": 0.7310929894447327, + "learning_rate": 1.7042638867842364e-05, + "loss": 2.1562, + "step": 7927 + }, + { + "epoch": 0.26, + "grad_norm": 0.716425895690918, + "learning_rate": 1.7041884239666292e-05, + "loss": 2.2071, + "step": 7928 + }, + { + "epoch": 0.26, + "grad_norm": 0.689057469367981, + "learning_rate": 1.7041129531935134e-05, + "loss": 2.1581, + "step": 7929 + }, + { + "epoch": 0.26, + "grad_norm": 0.6819918751716614, + "learning_rate": 1.704037474465742e-05, + "loss": 2.1129, + "step": 7930 + }, + { + "epoch": 0.26, + "grad_norm": 0.7058389186859131, + "learning_rate": 1.703961987784168e-05, + "loss": 2.1605, + "step": 7931 + }, + { + "epoch": 0.26, + "grad_norm": 0.6986914873123169, + "learning_rate": 1.703886493149644e-05, + "loss": 2.1125, + "step": 7932 + }, + { + "epoch": 0.26, + "grad_norm": 0.697096586227417, + "learning_rate": 1.7038109905630226e-05, + "loss": 2.0867, + "step": 7933 + }, + { + "epoch": 0.26, + "grad_norm": 0.7282609343528748, + "learning_rate": 1.7037354800251576e-05, + "loss": 2.1989, + "step": 7934 + }, + { + "epoch": 0.26, + "grad_norm": 0.7441202402114868, + "learning_rate": 1.7036599615369015e-05, + "loss": 2.0777, + "step": 7935 + }, + { + "epoch": 0.26, + "grad_norm": 0.7383463382720947, + "learning_rate": 1.7035844350991074e-05, + "loss": 2.1203, + "step": 7936 + }, + { + "epoch": 0.26, + "grad_norm": 0.7265232801437378, + "learning_rate": 1.7035089007126287e-05, + "loss": 2.2107, + "step": 7937 + }, + { + "epoch": 0.26, + "grad_norm": 0.7042539715766907, + "learning_rate": 1.7034333583783185e-05, + "loss": 2.051, + "step": 7938 + }, + { + "epoch": 0.26, + "grad_norm": 0.7071065902709961, + "learning_rate": 1.7033578080970308e-05, + "loss": 2.2016, + "step": 7939 + }, + { + "epoch": 0.26, + "grad_norm": 0.6963499188423157, + "learning_rate": 1.7032822498696188e-05, + "loss": 2.2138, + "step": 7940 + }, + { + "epoch": 0.26, + "grad_norm": 0.7050575613975525, + "learning_rate": 1.703206683696936e-05, + "loss": 2.137, + "step": 7941 + }, + { + "epoch": 0.26, + "grad_norm": 0.7180153727531433, + "learning_rate": 1.7031311095798363e-05, + "loss": 2.1788, + "step": 7942 + }, + { + "epoch": 0.26, + "grad_norm": 0.7360210418701172, + "learning_rate": 1.703055527519173e-05, + "loss": 2.1245, + "step": 7943 + }, + { + "epoch": 0.26, + "grad_norm": 0.7026529312133789, + "learning_rate": 1.702979937515801e-05, + "loss": 2.141, + "step": 7944 + }, + { + "epoch": 0.26, + "grad_norm": 0.7011356949806213, + "learning_rate": 1.7029043395705733e-05, + "loss": 2.1764, + "step": 7945 + }, + { + "epoch": 0.26, + "grad_norm": 0.6785018444061279, + "learning_rate": 1.7028287336843443e-05, + "loss": 2.1822, + "step": 7946 + }, + { + "epoch": 0.26, + "grad_norm": 0.743807852268219, + "learning_rate": 1.7027531198579682e-05, + "loss": 2.1363, + "step": 7947 + }, + { + "epoch": 0.26, + "grad_norm": 0.7605055570602417, + "learning_rate": 1.7026774980922994e-05, + "loss": 2.091, + "step": 7948 + }, + { + "epoch": 0.26, + "grad_norm": 0.7215128540992737, + "learning_rate": 1.7026018683881918e-05, + "loss": 2.1461, + "step": 7949 + }, + { + "epoch": 0.26, + "grad_norm": 0.713710367679596, + "learning_rate": 1.7025262307465e-05, + "loss": 2.1123, + "step": 7950 + }, + { + "epoch": 0.26, + "grad_norm": 0.7201887965202332, + "learning_rate": 1.702450585168079e-05, + "loss": 2.195, + "step": 7951 + }, + { + "epoch": 0.26, + "grad_norm": 0.7062337398529053, + "learning_rate": 1.7023749316537827e-05, + "loss": 2.1098, + "step": 7952 + }, + { + "epoch": 0.26, + "grad_norm": 0.7705143690109253, + "learning_rate": 1.702299270204466e-05, + "loss": 2.2221, + "step": 7953 + }, + { + "epoch": 0.26, + "grad_norm": 0.7025548219680786, + "learning_rate": 1.7022236008209833e-05, + "loss": 2.0606, + "step": 7954 + }, + { + "epoch": 0.26, + "grad_norm": 0.7815284729003906, + "learning_rate": 1.7021479235041908e-05, + "loss": 2.1329, + "step": 7955 + }, + { + "epoch": 0.26, + "grad_norm": 0.7062655091285706, + "learning_rate": 1.7020722382549418e-05, + "loss": 2.1295, + "step": 7956 + }, + { + "epoch": 0.26, + "grad_norm": 0.8637570738792419, + "learning_rate": 1.7019965450740926e-05, + "loss": 2.2093, + "step": 7957 + }, + { + "epoch": 0.26, + "grad_norm": 0.7366224527359009, + "learning_rate": 1.7019208439624977e-05, + "loss": 2.2339, + "step": 7958 + }, + { + "epoch": 0.26, + "grad_norm": 0.7094641923904419, + "learning_rate": 1.7018451349210125e-05, + "loss": 2.1993, + "step": 7959 + }, + { + "epoch": 0.26, + "grad_norm": 0.7672893404960632, + "learning_rate": 1.701769417950492e-05, + "loss": 2.3043, + "step": 7960 + }, + { + "epoch": 0.26, + "grad_norm": 0.6900611519813538, + "learning_rate": 1.7016936930517922e-05, + "loss": 2.191, + "step": 7961 + }, + { + "epoch": 0.26, + "grad_norm": 0.7013009190559387, + "learning_rate": 1.7016179602257682e-05, + "loss": 2.1899, + "step": 7962 + }, + { + "epoch": 0.26, + "grad_norm": 0.7456221580505371, + "learning_rate": 1.7015422194732756e-05, + "loss": 2.1829, + "step": 7963 + }, + { + "epoch": 0.26, + "grad_norm": 0.7236294150352478, + "learning_rate": 1.7014664707951706e-05, + "loss": 2.1468, + "step": 7964 + }, + { + "epoch": 0.26, + "grad_norm": 0.7210402488708496, + "learning_rate": 1.7013907141923076e-05, + "loss": 2.1507, + "step": 7965 + }, + { + "epoch": 0.27, + "grad_norm": 0.7101784944534302, + "learning_rate": 1.701314949665544e-05, + "loss": 2.0943, + "step": 7966 + }, + { + "epoch": 0.27, + "grad_norm": 0.7123855948448181, + "learning_rate": 1.7012391772157354e-05, + "loss": 2.1735, + "step": 7967 + }, + { + "epoch": 0.27, + "grad_norm": 0.7276803255081177, + "learning_rate": 1.7011633968437368e-05, + "loss": 2.1236, + "step": 7968 + }, + { + "epoch": 0.27, + "grad_norm": 0.6953967809677124, + "learning_rate": 1.7010876085504057e-05, + "loss": 2.1396, + "step": 7969 + }, + { + "epoch": 0.27, + "grad_norm": 0.7428085803985596, + "learning_rate": 1.7010118123365972e-05, + "loss": 2.1059, + "step": 7970 + }, + { + "epoch": 0.27, + "grad_norm": 0.7024562358856201, + "learning_rate": 1.7009360082031682e-05, + "loss": 2.1464, + "step": 7971 + }, + { + "epoch": 0.27, + "grad_norm": 0.7196164131164551, + "learning_rate": 1.7008601961509753e-05, + "loss": 2.2172, + "step": 7972 + }, + { + "epoch": 0.27, + "grad_norm": 0.7017200589179993, + "learning_rate": 1.7007843761808742e-05, + "loss": 2.1951, + "step": 7973 + }, + { + "epoch": 0.27, + "grad_norm": 0.7219711542129517, + "learning_rate": 1.700708548293722e-05, + "loss": 2.0993, + "step": 7974 + }, + { + "epoch": 0.27, + "grad_norm": 0.7260026931762695, + "learning_rate": 1.7006327124903754e-05, + "loss": 2.1858, + "step": 7975 + }, + { + "epoch": 0.27, + "grad_norm": 0.7200074791908264, + "learning_rate": 1.700556868771691e-05, + "loss": 2.105, + "step": 7976 + }, + { + "epoch": 0.27, + "grad_norm": 0.7227960228919983, + "learning_rate": 1.7004810171385256e-05, + "loss": 2.1721, + "step": 7977 + }, + { + "epoch": 0.27, + "grad_norm": 0.6948323249816895, + "learning_rate": 1.7004051575917364e-05, + "loss": 2.1503, + "step": 7978 + }, + { + "epoch": 0.27, + "grad_norm": 0.7318873405456543, + "learning_rate": 1.70032929013218e-05, + "loss": 2.2419, + "step": 7979 + }, + { + "epoch": 0.27, + "grad_norm": 0.7254428267478943, + "learning_rate": 1.7002534147607138e-05, + "loss": 2.1377, + "step": 7980 + }, + { + "epoch": 0.27, + "grad_norm": 0.8091462850570679, + "learning_rate": 1.7001775314781948e-05, + "loss": 2.0492, + "step": 7981 + }, + { + "epoch": 0.27, + "grad_norm": 0.6963717341423035, + "learning_rate": 1.7001016402854808e-05, + "loss": 2.1075, + "step": 7982 + }, + { + "epoch": 0.27, + "grad_norm": 0.7116683721542358, + "learning_rate": 1.7000257411834283e-05, + "loss": 2.1132, + "step": 7983 + }, + { + "epoch": 0.27, + "grad_norm": 0.7118235230445862, + "learning_rate": 1.6999498341728954e-05, + "loss": 2.1676, + "step": 7984 + }, + { + "epoch": 0.27, + "grad_norm": 0.7162246704101562, + "learning_rate": 1.6998739192547394e-05, + "loss": 2.106, + "step": 7985 + }, + { + "epoch": 0.27, + "grad_norm": 0.7315239310264587, + "learning_rate": 1.6997979964298182e-05, + "loss": 2.1918, + "step": 7986 + }, + { + "epoch": 0.27, + "grad_norm": 0.7427484393119812, + "learning_rate": 1.6997220656989893e-05, + "loss": 2.174, + "step": 7987 + }, + { + "epoch": 0.27, + "grad_norm": 0.7121890187263489, + "learning_rate": 1.6996461270631105e-05, + "loss": 2.1346, + "step": 7988 + }, + { + "epoch": 0.27, + "grad_norm": 0.7858433723449707, + "learning_rate": 1.6995701805230397e-05, + "loss": 2.161, + "step": 7989 + }, + { + "epoch": 0.27, + "grad_norm": 0.7381325960159302, + "learning_rate": 1.6994942260796353e-05, + "loss": 2.0858, + "step": 7990 + }, + { + "epoch": 0.27, + "grad_norm": 0.7145074605941772, + "learning_rate": 1.6994182637337545e-05, + "loss": 2.2733, + "step": 7991 + }, + { + "epoch": 0.27, + "grad_norm": 0.7198360562324524, + "learning_rate": 1.6993422934862565e-05, + "loss": 2.1684, + "step": 7992 + }, + { + "epoch": 0.27, + "grad_norm": 0.7395340800285339, + "learning_rate": 1.6992663153379994e-05, + "loss": 2.1291, + "step": 7993 + }, + { + "epoch": 0.27, + "grad_norm": 0.7065789699554443, + "learning_rate": 1.699190329289841e-05, + "loss": 2.1054, + "step": 7994 + }, + { + "epoch": 0.27, + "grad_norm": 0.7089216709136963, + "learning_rate": 1.69911433534264e-05, + "loss": 2.0744, + "step": 7995 + }, + { + "epoch": 0.27, + "grad_norm": 0.7498329877853394, + "learning_rate": 1.6990383334972548e-05, + "loss": 2.1265, + "step": 7996 + }, + { + "epoch": 0.27, + "grad_norm": 0.6979795098304749, + "learning_rate": 1.6989623237545444e-05, + "loss": 2.1121, + "step": 7997 + }, + { + "epoch": 0.27, + "grad_norm": 0.7098645567893982, + "learning_rate": 1.698886306115367e-05, + "loss": 2.2209, + "step": 7998 + }, + { + "epoch": 0.27, + "grad_norm": 0.7421597838401794, + "learning_rate": 1.698810280580582e-05, + "loss": 2.121, + "step": 7999 + }, + { + "epoch": 0.27, + "grad_norm": 0.7312493324279785, + "learning_rate": 1.698734247151048e-05, + "loss": 2.123, + "step": 8000 + }, + { + "epoch": 0.27, + "grad_norm": 0.7391269207000732, + "learning_rate": 1.698658205827624e-05, + "loss": 2.1927, + "step": 8001 + }, + { + "epoch": 0.27, + "grad_norm": 0.717480480670929, + "learning_rate": 1.6985821566111685e-05, + "loss": 2.2087, + "step": 8002 + }, + { + "epoch": 0.27, + "grad_norm": 0.7311884164810181, + "learning_rate": 1.698506099502542e-05, + "loss": 2.1699, + "step": 8003 + }, + { + "epoch": 0.27, + "grad_norm": 0.6925169229507446, + "learning_rate": 1.6984300345026026e-05, + "loss": 2.1653, + "step": 8004 + }, + { + "epoch": 0.27, + "grad_norm": 0.7030767202377319, + "learning_rate": 1.6983539616122097e-05, + "loss": 2.0961, + "step": 8005 + }, + { + "epoch": 0.27, + "grad_norm": 0.7056183218955994, + "learning_rate": 1.6982778808322233e-05, + "loss": 2.1506, + "step": 8006 + }, + { + "epoch": 0.27, + "grad_norm": 0.7171412110328674, + "learning_rate": 1.698201792163503e-05, + "loss": 2.1124, + "step": 8007 + }, + { + "epoch": 0.27, + "grad_norm": 0.7261080145835876, + "learning_rate": 1.6981256956069075e-05, + "loss": 2.1752, + "step": 8008 + }, + { + "epoch": 0.27, + "grad_norm": 0.7251702547073364, + "learning_rate": 1.6980495911632973e-05, + "loss": 2.1737, + "step": 8009 + }, + { + "epoch": 0.27, + "grad_norm": 0.6978588700294495, + "learning_rate": 1.6979734788335318e-05, + "loss": 2.1561, + "step": 8010 + }, + { + "epoch": 0.27, + "grad_norm": 0.7254132628440857, + "learning_rate": 1.6978973586184707e-05, + "loss": 2.1109, + "step": 8011 + }, + { + "epoch": 0.27, + "grad_norm": 0.7048943638801575, + "learning_rate": 1.6978212305189744e-05, + "loss": 2.1732, + "step": 8012 + }, + { + "epoch": 0.27, + "grad_norm": 0.7541685700416565, + "learning_rate": 1.6977450945359033e-05, + "loss": 2.1694, + "step": 8013 + }, + { + "epoch": 0.27, + "grad_norm": 0.7133663892745972, + "learning_rate": 1.6976689506701167e-05, + "loss": 2.1601, + "step": 8014 + }, + { + "epoch": 0.27, + "grad_norm": 0.7131994366645813, + "learning_rate": 1.6975927989224753e-05, + "loss": 2.1426, + "step": 8015 + }, + { + "epoch": 0.27, + "grad_norm": 0.7086762189865112, + "learning_rate": 1.6975166392938388e-05, + "loss": 2.1216, + "step": 8016 + }, + { + "epoch": 0.27, + "grad_norm": 0.718539834022522, + "learning_rate": 1.6974404717850688e-05, + "loss": 2.2322, + "step": 8017 + }, + { + "epoch": 0.27, + "grad_norm": 0.7072193026542664, + "learning_rate": 1.6973642963970245e-05, + "loss": 2.1863, + "step": 8018 + }, + { + "epoch": 0.27, + "grad_norm": 0.7035252451896667, + "learning_rate": 1.6972881131305675e-05, + "loss": 2.1708, + "step": 8019 + }, + { + "epoch": 0.27, + "grad_norm": 0.7313700914382935, + "learning_rate": 1.6972119219865577e-05, + "loss": 2.2044, + "step": 8020 + }, + { + "epoch": 0.27, + "grad_norm": 0.7427760362625122, + "learning_rate": 1.6971357229658564e-05, + "loss": 2.1373, + "step": 8021 + }, + { + "epoch": 0.27, + "grad_norm": 0.7605355381965637, + "learning_rate": 1.6970595160693242e-05, + "loss": 2.2379, + "step": 8022 + }, + { + "epoch": 0.27, + "grad_norm": 0.7150384783744812, + "learning_rate": 1.6969833012978224e-05, + "loss": 2.1413, + "step": 8023 + }, + { + "epoch": 0.27, + "grad_norm": 0.7506532669067383, + "learning_rate": 1.6969070786522114e-05, + "loss": 2.0715, + "step": 8024 + }, + { + "epoch": 0.27, + "grad_norm": 0.6972370147705078, + "learning_rate": 1.6968308481333523e-05, + "loss": 2.0523, + "step": 8025 + }, + { + "epoch": 0.27, + "grad_norm": 0.6927030086517334, + "learning_rate": 1.6967546097421073e-05, + "loss": 2.1483, + "step": 8026 + }, + { + "epoch": 0.27, + "grad_norm": 0.7181596159934998, + "learning_rate": 1.696678363479337e-05, + "loss": 2.1924, + "step": 8027 + }, + { + "epoch": 0.27, + "grad_norm": 0.7039393782615662, + "learning_rate": 1.6966021093459028e-05, + "loss": 2.1442, + "step": 8028 + }, + { + "epoch": 0.27, + "grad_norm": 0.694395899772644, + "learning_rate": 1.6965258473426657e-05, + "loss": 2.1439, + "step": 8029 + }, + { + "epoch": 0.27, + "grad_norm": 0.689033567905426, + "learning_rate": 1.6964495774704885e-05, + "loss": 2.1915, + "step": 8030 + }, + { + "epoch": 0.27, + "grad_norm": 0.6865559220314026, + "learning_rate": 1.6963732997302317e-05, + "loss": 2.1535, + "step": 8031 + }, + { + "epoch": 0.27, + "grad_norm": 0.7057960033416748, + "learning_rate": 1.6962970141227577e-05, + "loss": 2.1348, + "step": 8032 + }, + { + "epoch": 0.27, + "grad_norm": 0.7132707238197327, + "learning_rate": 1.696220720648928e-05, + "loss": 2.173, + "step": 8033 + }, + { + "epoch": 0.27, + "grad_norm": 0.696930468082428, + "learning_rate": 1.6961444193096045e-05, + "loss": 2.1668, + "step": 8034 + }, + { + "epoch": 0.27, + "grad_norm": 0.7106955051422119, + "learning_rate": 1.6960681101056495e-05, + "loss": 2.1367, + "step": 8035 + }, + { + "epoch": 0.27, + "grad_norm": 0.6993038058280945, + "learning_rate": 1.6959917930379248e-05, + "loss": 2.1838, + "step": 8036 + }, + { + "epoch": 0.27, + "grad_norm": 0.7253422737121582, + "learning_rate": 1.6959154681072927e-05, + "loss": 2.1525, + "step": 8037 + }, + { + "epoch": 0.27, + "grad_norm": 0.7360081672668457, + "learning_rate": 1.6958391353146158e-05, + "loss": 2.1615, + "step": 8038 + }, + { + "epoch": 0.27, + "grad_norm": 0.7317522764205933, + "learning_rate": 1.695762794660756e-05, + "loss": 2.1363, + "step": 8039 + }, + { + "epoch": 0.27, + "grad_norm": 0.7145367860794067, + "learning_rate": 1.6956864461465757e-05, + "loss": 2.1493, + "step": 8040 + }, + { + "epoch": 0.27, + "grad_norm": 0.7083792090415955, + "learning_rate": 1.695610089772938e-05, + "loss": 2.1437, + "step": 8041 + }, + { + "epoch": 0.27, + "grad_norm": 0.7179403901100159, + "learning_rate": 1.695533725540705e-05, + "loss": 2.1711, + "step": 8042 + }, + { + "epoch": 0.27, + "grad_norm": 0.6831409931182861, + "learning_rate": 1.6954573534507393e-05, + "loss": 2.1158, + "step": 8043 + }, + { + "epoch": 0.27, + "grad_norm": 0.756127119064331, + "learning_rate": 1.6953809735039045e-05, + "loss": 2.1415, + "step": 8044 + }, + { + "epoch": 0.27, + "grad_norm": 0.7312495708465576, + "learning_rate": 1.6953045857010628e-05, + "loss": 2.1293, + "step": 8045 + }, + { + "epoch": 0.27, + "grad_norm": 0.7439257502555847, + "learning_rate": 1.695228190043077e-05, + "loss": 2.1521, + "step": 8046 + }, + { + "epoch": 0.27, + "grad_norm": 0.715004563331604, + "learning_rate": 1.695151786530811e-05, + "loss": 2.2003, + "step": 8047 + }, + { + "epoch": 0.27, + "grad_norm": 0.7093165516853333, + "learning_rate": 1.6950753751651273e-05, + "loss": 2.0874, + "step": 8048 + }, + { + "epoch": 0.27, + "grad_norm": 0.7282658219337463, + "learning_rate": 1.6949989559468892e-05, + "loss": 2.1598, + "step": 8049 + }, + { + "epoch": 0.27, + "grad_norm": 0.6960320472717285, + "learning_rate": 1.6949225288769607e-05, + "loss": 2.1254, + "step": 8050 + }, + { + "epoch": 0.27, + "grad_norm": 0.6917444467544556, + "learning_rate": 1.6948460939562043e-05, + "loss": 2.1998, + "step": 8051 + }, + { + "epoch": 0.27, + "grad_norm": 0.6972142457962036, + "learning_rate": 1.6947696511854844e-05, + "loss": 2.1455, + "step": 8052 + }, + { + "epoch": 0.27, + "grad_norm": 0.7067061066627502, + "learning_rate": 1.6946932005656638e-05, + "loss": 2.1209, + "step": 8053 + }, + { + "epoch": 0.27, + "grad_norm": 0.7279441952705383, + "learning_rate": 1.6946167420976066e-05, + "loss": 2.1664, + "step": 8054 + }, + { + "epoch": 0.27, + "grad_norm": 0.7077794075012207, + "learning_rate": 1.6945402757821768e-05, + "loss": 2.134, + "step": 8055 + }, + { + "epoch": 0.27, + "grad_norm": 0.7306479811668396, + "learning_rate": 1.6944638016202376e-05, + "loss": 2.2105, + "step": 8056 + }, + { + "epoch": 0.27, + "grad_norm": 0.718594491481781, + "learning_rate": 1.6943873196126537e-05, + "loss": 2.1049, + "step": 8057 + }, + { + "epoch": 0.27, + "grad_norm": 0.7279380559921265, + "learning_rate": 1.6943108297602887e-05, + "loss": 2.2009, + "step": 8058 + }, + { + "epoch": 0.27, + "grad_norm": 0.6916021108627319, + "learning_rate": 1.694234332064007e-05, + "loss": 2.1807, + "step": 8059 + }, + { + "epoch": 0.27, + "grad_norm": 0.7069020867347717, + "learning_rate": 1.694157826524672e-05, + "loss": 2.1464, + "step": 8060 + }, + { + "epoch": 0.27, + "grad_norm": 0.722312331199646, + "learning_rate": 1.6940813131431495e-05, + "loss": 2.138, + "step": 8061 + }, + { + "epoch": 0.27, + "grad_norm": 0.7316096425056458, + "learning_rate": 1.6940047919203026e-05, + "loss": 2.2364, + "step": 8062 + }, + { + "epoch": 0.27, + "grad_norm": 0.7448049187660217, + "learning_rate": 1.6939282628569967e-05, + "loss": 2.1896, + "step": 8063 + }, + { + "epoch": 0.27, + "grad_norm": 0.715130090713501, + "learning_rate": 1.6938517259540955e-05, + "loss": 2.1982, + "step": 8064 + }, + { + "epoch": 0.27, + "grad_norm": 0.7153337001800537, + "learning_rate": 1.6937751812124644e-05, + "loss": 2.1577, + "step": 8065 + }, + { + "epoch": 0.27, + "grad_norm": 0.7206545472145081, + "learning_rate": 1.6936986286329678e-05, + "loss": 2.154, + "step": 8066 + }, + { + "epoch": 0.27, + "grad_norm": 0.6954268217086792, + "learning_rate": 1.6936220682164706e-05, + "loss": 2.1567, + "step": 8067 + }, + { + "epoch": 0.27, + "grad_norm": 0.7146590948104858, + "learning_rate": 1.6935454999638382e-05, + "loss": 2.1984, + "step": 8068 + }, + { + "epoch": 0.27, + "grad_norm": 0.7114245891571045, + "learning_rate": 1.693468923875935e-05, + "loss": 2.1612, + "step": 8069 + }, + { + "epoch": 0.27, + "grad_norm": 0.7052518129348755, + "learning_rate": 1.693392339953626e-05, + "loss": 2.0681, + "step": 8070 + }, + { + "epoch": 0.27, + "grad_norm": 0.7618337869644165, + "learning_rate": 1.6933157481977768e-05, + "loss": 2.1538, + "step": 8071 + }, + { + "epoch": 0.27, + "grad_norm": 0.7551383972167969, + "learning_rate": 1.6932391486092526e-05, + "loss": 2.215, + "step": 8072 + }, + { + "epoch": 0.27, + "grad_norm": 0.7414827942848206, + "learning_rate": 1.6931625411889192e-05, + "loss": 2.1049, + "step": 8073 + }, + { + "epoch": 0.27, + "grad_norm": 0.7153434753417969, + "learning_rate": 1.6930859259376412e-05, + "loss": 2.2162, + "step": 8074 + }, + { + "epoch": 0.27, + "grad_norm": 0.7411903738975525, + "learning_rate": 1.693009302856285e-05, + "loss": 2.1172, + "step": 8075 + }, + { + "epoch": 0.27, + "grad_norm": 0.7050119638442993, + "learning_rate": 1.6929326719457153e-05, + "loss": 2.1202, + "step": 8076 + }, + { + "epoch": 0.27, + "grad_norm": 0.7146358489990234, + "learning_rate": 1.6928560332067988e-05, + "loss": 2.0591, + "step": 8077 + }, + { + "epoch": 0.27, + "grad_norm": 0.7181561589241028, + "learning_rate": 1.6927793866404006e-05, + "loss": 2.1103, + "step": 8078 + }, + { + "epoch": 0.27, + "grad_norm": 0.7203660607337952, + "learning_rate": 1.692702732247387e-05, + "loss": 2.1229, + "step": 8079 + }, + { + "epoch": 0.27, + "grad_norm": 0.7607026696205139, + "learning_rate": 1.692626070028624e-05, + "loss": 2.1658, + "step": 8080 + }, + { + "epoch": 0.27, + "grad_norm": 0.7067725658416748, + "learning_rate": 1.6925493999849773e-05, + "loss": 2.1535, + "step": 8081 + }, + { + "epoch": 0.27, + "grad_norm": 0.7124351263046265, + "learning_rate": 1.6924727221173135e-05, + "loss": 2.0787, + "step": 8082 + }, + { + "epoch": 0.27, + "grad_norm": 0.6913341879844666, + "learning_rate": 1.692396036426499e-05, + "loss": 2.107, + "step": 8083 + }, + { + "epoch": 0.27, + "grad_norm": 0.7637014985084534, + "learning_rate": 1.6923193429133994e-05, + "loss": 2.1074, + "step": 8084 + }, + { + "epoch": 0.27, + "grad_norm": 0.6997377276420593, + "learning_rate": 1.692242641578882e-05, + "loss": 2.1823, + "step": 8085 + }, + { + "epoch": 0.27, + "grad_norm": 0.7334240078926086, + "learning_rate": 1.6921659324238126e-05, + "loss": 2.1146, + "step": 8086 + }, + { + "epoch": 0.27, + "grad_norm": 0.7359877824783325, + "learning_rate": 1.6920892154490584e-05, + "loss": 2.216, + "step": 8087 + }, + { + "epoch": 0.27, + "grad_norm": 0.7286894917488098, + "learning_rate": 1.6920124906554857e-05, + "loss": 2.1528, + "step": 8088 + }, + { + "epoch": 0.27, + "grad_norm": 0.7410925626754761, + "learning_rate": 1.6919357580439615e-05, + "loss": 2.121, + "step": 8089 + }, + { + "epoch": 0.27, + "grad_norm": 0.7232389450073242, + "learning_rate": 1.691859017615353e-05, + "loss": 2.1989, + "step": 8090 + }, + { + "epoch": 0.27, + "grad_norm": 0.7084623575210571, + "learning_rate": 1.6917822693705262e-05, + "loss": 2.2004, + "step": 8091 + }, + { + "epoch": 0.27, + "grad_norm": 0.73396897315979, + "learning_rate": 1.6917055133103487e-05, + "loss": 2.1291, + "step": 8092 + }, + { + "epoch": 0.27, + "grad_norm": 0.710496723651886, + "learning_rate": 1.691628749435688e-05, + "loss": 2.1599, + "step": 8093 + }, + { + "epoch": 0.27, + "grad_norm": 0.7316484451293945, + "learning_rate": 1.6915519777474113e-05, + "loss": 2.1912, + "step": 8094 + }, + { + "epoch": 0.27, + "grad_norm": 0.7637284994125366, + "learning_rate": 1.691475198246385e-05, + "loss": 2.1422, + "step": 8095 + }, + { + "epoch": 0.27, + "grad_norm": 0.7148154973983765, + "learning_rate": 1.6913984109334776e-05, + "loss": 2.082, + "step": 8096 + }, + { + "epoch": 0.27, + "grad_norm": 0.7341612577438354, + "learning_rate": 1.691321615809556e-05, + "loss": 2.1288, + "step": 8097 + }, + { + "epoch": 0.27, + "grad_norm": 0.7228325605392456, + "learning_rate": 1.691244812875488e-05, + "loss": 2.1845, + "step": 8098 + }, + { + "epoch": 0.27, + "grad_norm": 0.7305983901023865, + "learning_rate": 1.691168002132141e-05, + "loss": 2.2612, + "step": 8099 + }, + { + "epoch": 0.27, + "grad_norm": 0.7235361933708191, + "learning_rate": 1.6910911835803833e-05, + "loss": 2.1056, + "step": 8100 + }, + { + "epoch": 0.27, + "grad_norm": 0.7653730511665344, + "learning_rate": 1.691014357221082e-05, + "loss": 2.1176, + "step": 8101 + }, + { + "epoch": 0.27, + "grad_norm": 0.7131959199905396, + "learning_rate": 1.6909375230551058e-05, + "loss": 2.1578, + "step": 8102 + }, + { + "epoch": 0.27, + "grad_norm": 0.7110508680343628, + "learning_rate": 1.6908606810833225e-05, + "loss": 2.0944, + "step": 8103 + }, + { + "epoch": 0.27, + "grad_norm": 0.7016122341156006, + "learning_rate": 1.6907838313065998e-05, + "loss": 2.1732, + "step": 8104 + }, + { + "epoch": 0.27, + "grad_norm": 0.6847202181816101, + "learning_rate": 1.6907069737258065e-05, + "loss": 2.1049, + "step": 8105 + }, + { + "epoch": 0.27, + "grad_norm": 0.7419757843017578, + "learning_rate": 1.6906301083418106e-05, + "loss": 2.2233, + "step": 8106 + }, + { + "epoch": 0.27, + "grad_norm": 0.7221055626869202, + "learning_rate": 1.69055323515548e-05, + "loss": 2.157, + "step": 8107 + }, + { + "epoch": 0.27, + "grad_norm": 0.7006413340568542, + "learning_rate": 1.690476354167684e-05, + "loss": 2.1716, + "step": 8108 + }, + { + "epoch": 0.27, + "grad_norm": 0.7043401598930359, + "learning_rate": 1.690399465379291e-05, + "loss": 2.1418, + "step": 8109 + }, + { + "epoch": 0.27, + "grad_norm": 0.7013598084449768, + "learning_rate": 1.6903225687911692e-05, + "loss": 2.1057, + "step": 8110 + }, + { + "epoch": 0.27, + "grad_norm": 0.6882937550544739, + "learning_rate": 1.6902456644041877e-05, + "loss": 2.135, + "step": 8111 + }, + { + "epoch": 0.27, + "grad_norm": 0.7238315939903259, + "learning_rate": 1.6901687522192152e-05, + "loss": 2.1692, + "step": 8112 + }, + { + "epoch": 0.27, + "grad_norm": 0.7501814961433411, + "learning_rate": 1.6900918322371204e-05, + "loss": 2.1871, + "step": 8113 + }, + { + "epoch": 0.27, + "grad_norm": 0.6972922682762146, + "learning_rate": 1.6900149044587728e-05, + "loss": 2.0657, + "step": 8114 + }, + { + "epoch": 0.27, + "grad_norm": 0.7206215858459473, + "learning_rate": 1.6899379688850407e-05, + "loss": 2.1329, + "step": 8115 + }, + { + "epoch": 0.27, + "grad_norm": 0.7121152877807617, + "learning_rate": 1.6898610255167945e-05, + "loss": 2.0767, + "step": 8116 + }, + { + "epoch": 0.27, + "grad_norm": 0.712029218673706, + "learning_rate": 1.6897840743549023e-05, + "loss": 2.2257, + "step": 8117 + }, + { + "epoch": 0.27, + "grad_norm": 0.7147645950317383, + "learning_rate": 1.689707115400234e-05, + "loss": 2.1688, + "step": 8118 + }, + { + "epoch": 0.27, + "grad_norm": 0.7088053822517395, + "learning_rate": 1.6896301486536588e-05, + "loss": 2.066, + "step": 8119 + }, + { + "epoch": 0.27, + "grad_norm": 0.7544865012168884, + "learning_rate": 1.6895531741160465e-05, + "loss": 2.1453, + "step": 8120 + }, + { + "epoch": 0.27, + "grad_norm": 0.6798065900802612, + "learning_rate": 1.6894761917882665e-05, + "loss": 2.1154, + "step": 8121 + }, + { + "epoch": 0.27, + "grad_norm": 0.7167335152626038, + "learning_rate": 1.6893992016711885e-05, + "loss": 2.1372, + "step": 8122 + }, + { + "epoch": 0.27, + "grad_norm": 0.7570890784263611, + "learning_rate": 1.6893222037656825e-05, + "loss": 2.1945, + "step": 8123 + }, + { + "epoch": 0.27, + "grad_norm": 0.7210062742233276, + "learning_rate": 1.6892451980726182e-05, + "loss": 2.1511, + "step": 8124 + }, + { + "epoch": 0.27, + "grad_norm": 0.7151951789855957, + "learning_rate": 1.6891681845928654e-05, + "loss": 2.1267, + "step": 8125 + }, + { + "epoch": 0.27, + "grad_norm": 0.7219679355621338, + "learning_rate": 1.6890911633272942e-05, + "loss": 2.1799, + "step": 8126 + }, + { + "epoch": 0.27, + "grad_norm": 0.6984708309173584, + "learning_rate": 1.689014134276775e-05, + "loss": 2.125, + "step": 8127 + }, + { + "epoch": 0.27, + "grad_norm": 0.6969929933547974, + "learning_rate": 1.6889370974421782e-05, + "loss": 2.1218, + "step": 8128 + }, + { + "epoch": 0.27, + "grad_norm": 0.7126932144165039, + "learning_rate": 1.688860052824374e-05, + "loss": 2.207, + "step": 8129 + }, + { + "epoch": 0.27, + "grad_norm": 0.7355309724807739, + "learning_rate": 1.688783000424232e-05, + "loss": 2.1554, + "step": 8130 + }, + { + "epoch": 0.27, + "grad_norm": 0.7027552127838135, + "learning_rate": 1.6887059402426235e-05, + "loss": 2.1381, + "step": 8131 + }, + { + "epoch": 0.27, + "grad_norm": 0.7012249827384949, + "learning_rate": 1.688628872280419e-05, + "loss": 2.1112, + "step": 8132 + }, + { + "epoch": 0.27, + "grad_norm": 0.7139543294906616, + "learning_rate": 1.6885517965384893e-05, + "loss": 2.1294, + "step": 8133 + }, + { + "epoch": 0.27, + "grad_norm": 0.7407210469245911, + "learning_rate": 1.6884747130177048e-05, + "loss": 2.1616, + "step": 8134 + }, + { + "epoch": 0.27, + "grad_norm": 0.7053178548812866, + "learning_rate": 1.6883976217189365e-05, + "loss": 2.1059, + "step": 8135 + }, + { + "epoch": 0.27, + "grad_norm": 0.7465366125106812, + "learning_rate": 1.6883205226430553e-05, + "loss": 2.1996, + "step": 8136 + }, + { + "epoch": 0.27, + "grad_norm": 0.7339368462562561, + "learning_rate": 1.688243415790932e-05, + "loss": 2.0866, + "step": 8137 + }, + { + "epoch": 0.27, + "grad_norm": 0.7101246118545532, + "learning_rate": 1.688166301163438e-05, + "loss": 2.1291, + "step": 8138 + }, + { + "epoch": 0.27, + "grad_norm": 0.7082968354225159, + "learning_rate": 1.6880891787614445e-05, + "loss": 2.1409, + "step": 8139 + }, + { + "epoch": 0.27, + "grad_norm": 0.7251446843147278, + "learning_rate": 1.688012048585823e-05, + "loss": 2.1302, + "step": 8140 + }, + { + "epoch": 0.27, + "grad_norm": 0.7157198190689087, + "learning_rate": 1.6879349106374443e-05, + "loss": 2.2018, + "step": 8141 + }, + { + "epoch": 0.27, + "grad_norm": 0.7369323372840881, + "learning_rate": 1.6878577649171802e-05, + "loss": 2.1875, + "step": 8142 + }, + { + "epoch": 0.27, + "grad_norm": 0.7154991030693054, + "learning_rate": 1.687780611425902e-05, + "loss": 2.1027, + "step": 8143 + }, + { + "epoch": 0.27, + "grad_norm": 0.7154111266136169, + "learning_rate": 1.6877034501644818e-05, + "loss": 2.1324, + "step": 8144 + }, + { + "epoch": 0.27, + "grad_norm": 0.7388401627540588, + "learning_rate": 1.687626281133791e-05, + "loss": 2.1719, + "step": 8145 + }, + { + "epoch": 0.27, + "grad_norm": 0.709808886051178, + "learning_rate": 1.6875491043347016e-05, + "loss": 2.0829, + "step": 8146 + }, + { + "epoch": 0.27, + "grad_norm": 0.7194606065750122, + "learning_rate": 1.6874719197680852e-05, + "loss": 2.1545, + "step": 8147 + }, + { + "epoch": 0.27, + "grad_norm": 0.7188669443130493, + "learning_rate": 1.6873947274348137e-05, + "loss": 2.1875, + "step": 8148 + }, + { + "epoch": 0.27, + "grad_norm": 0.7057079076766968, + "learning_rate": 1.6873175273357598e-05, + "loss": 2.1571, + "step": 8149 + }, + { + "epoch": 0.27, + "grad_norm": 0.7026749849319458, + "learning_rate": 1.6872403194717953e-05, + "loss": 2.1094, + "step": 8150 + }, + { + "epoch": 0.27, + "grad_norm": 0.7317450642585754, + "learning_rate": 1.6871631038437922e-05, + "loss": 2.1423, + "step": 8151 + }, + { + "epoch": 0.27, + "grad_norm": 0.717634916305542, + "learning_rate": 1.687085880452623e-05, + "loss": 2.1577, + "step": 8152 + }, + { + "epoch": 0.27, + "grad_norm": 0.6946392059326172, + "learning_rate": 1.6870086492991605e-05, + "loss": 2.0983, + "step": 8153 + }, + { + "epoch": 0.27, + "grad_norm": 0.7069228887557983, + "learning_rate": 1.686931410384277e-05, + "loss": 2.1568, + "step": 8154 + }, + { + "epoch": 0.27, + "grad_norm": 0.7103220820426941, + "learning_rate": 1.6868541637088447e-05, + "loss": 2.1689, + "step": 8155 + }, + { + "epoch": 0.27, + "grad_norm": 0.700924277305603, + "learning_rate": 1.6867769092737365e-05, + "loss": 2.0884, + "step": 8156 + }, + { + "epoch": 0.27, + "grad_norm": 0.7578829526901245, + "learning_rate": 1.6866996470798255e-05, + "loss": 2.1409, + "step": 8157 + }, + { + "epoch": 0.27, + "grad_norm": 0.6955701112747192, + "learning_rate": 1.686622377127984e-05, + "loss": 2.1014, + "step": 8158 + }, + { + "epoch": 0.27, + "grad_norm": 0.719598114490509, + "learning_rate": 1.6865450994190857e-05, + "loss": 2.1266, + "step": 8159 + }, + { + "epoch": 0.27, + "grad_norm": 0.7287486791610718, + "learning_rate": 1.6864678139540033e-05, + "loss": 2.1767, + "step": 8160 + }, + { + "epoch": 0.27, + "grad_norm": 0.7337954640388489, + "learning_rate": 1.6863905207336095e-05, + "loss": 2.1465, + "step": 8161 + }, + { + "epoch": 0.27, + "grad_norm": 0.7136723399162292, + "learning_rate": 1.686313219758778e-05, + "loss": 2.1723, + "step": 8162 + }, + { + "epoch": 0.27, + "grad_norm": 0.7117597460746765, + "learning_rate": 1.686235911030382e-05, + "loss": 2.1984, + "step": 8163 + }, + { + "epoch": 0.27, + "grad_norm": 0.7197886109352112, + "learning_rate": 1.6861585945492945e-05, + "loss": 2.1736, + "step": 8164 + }, + { + "epoch": 0.27, + "grad_norm": 0.7394952774047852, + "learning_rate": 1.68608127031639e-05, + "loss": 2.1479, + "step": 8165 + }, + { + "epoch": 0.27, + "grad_norm": 0.7111626863479614, + "learning_rate": 1.686003938332541e-05, + "loss": 2.1732, + "step": 8166 + }, + { + "epoch": 0.27, + "grad_norm": 0.7040988206863403, + "learning_rate": 1.6859265985986213e-05, + "loss": 2.1797, + "step": 8167 + }, + { + "epoch": 0.27, + "grad_norm": 0.7538870573043823, + "learning_rate": 1.6858492511155052e-05, + "loss": 2.1488, + "step": 8168 + }, + { + "epoch": 0.27, + "grad_norm": 0.6979429125785828, + "learning_rate": 1.685771895884066e-05, + "loss": 2.1244, + "step": 8169 + }, + { + "epoch": 0.27, + "grad_norm": 0.699279248714447, + "learning_rate": 1.6856945329051782e-05, + "loss": 2.1317, + "step": 8170 + }, + { + "epoch": 0.27, + "grad_norm": 0.7493459582328796, + "learning_rate": 1.6856171621797153e-05, + "loss": 2.1604, + "step": 8171 + }, + { + "epoch": 0.27, + "grad_norm": 0.7153160572052002, + "learning_rate": 1.6855397837085514e-05, + "loss": 2.1766, + "step": 8172 + }, + { + "epoch": 0.27, + "grad_norm": 0.7087304592132568, + "learning_rate": 1.6854623974925607e-05, + "loss": 2.1578, + "step": 8173 + }, + { + "epoch": 0.27, + "grad_norm": 0.7083562016487122, + "learning_rate": 1.6853850035326174e-05, + "loss": 2.1075, + "step": 8174 + }, + { + "epoch": 0.27, + "grad_norm": 0.7284536957740784, + "learning_rate": 1.6853076018295963e-05, + "loss": 2.1145, + "step": 8175 + }, + { + "epoch": 0.27, + "grad_norm": 0.7345038056373596, + "learning_rate": 1.685230192384372e-05, + "loss": 2.2218, + "step": 8176 + }, + { + "epoch": 0.27, + "grad_norm": 0.73847895860672, + "learning_rate": 1.6851527751978175e-05, + "loss": 2.2136, + "step": 8177 + }, + { + "epoch": 0.27, + "grad_norm": 0.7068412899971008, + "learning_rate": 1.6850753502708094e-05, + "loss": 2.1286, + "step": 8178 + }, + { + "epoch": 0.27, + "grad_norm": 0.6908994913101196, + "learning_rate": 1.684997917604221e-05, + "loss": 2.1152, + "step": 8179 + }, + { + "epoch": 0.27, + "grad_norm": 0.7088663578033447, + "learning_rate": 1.6849204771989276e-05, + "loss": 2.1014, + "step": 8180 + }, + { + "epoch": 0.27, + "grad_norm": 0.716882586479187, + "learning_rate": 1.6848430290558046e-05, + "loss": 2.177, + "step": 8181 + }, + { + "epoch": 0.27, + "grad_norm": 0.7196425795555115, + "learning_rate": 1.6847655731757257e-05, + "loss": 2.2238, + "step": 8182 + }, + { + "epoch": 0.27, + "grad_norm": 0.7443209290504456, + "learning_rate": 1.684688109559567e-05, + "loss": 2.1341, + "step": 8183 + }, + { + "epoch": 0.27, + "grad_norm": 0.7341402173042297, + "learning_rate": 1.6846106382082032e-05, + "loss": 2.1472, + "step": 8184 + }, + { + "epoch": 0.27, + "grad_norm": 0.7248502969741821, + "learning_rate": 1.6845331591225095e-05, + "loss": 2.1305, + "step": 8185 + }, + { + "epoch": 0.27, + "grad_norm": 0.7372217774391174, + "learning_rate": 1.684455672303362e-05, + "loss": 2.1903, + "step": 8186 + }, + { + "epoch": 0.27, + "grad_norm": 0.7091234922409058, + "learning_rate": 1.6843781777516344e-05, + "loss": 2.1907, + "step": 8187 + }, + { + "epoch": 0.27, + "grad_norm": 0.7076736688613892, + "learning_rate": 1.684300675468204e-05, + "loss": 2.1589, + "step": 8188 + }, + { + "epoch": 0.27, + "grad_norm": 0.7167781591415405, + "learning_rate": 1.6842231654539456e-05, + "loss": 2.1733, + "step": 8189 + }, + { + "epoch": 0.27, + "grad_norm": 0.7146630883216858, + "learning_rate": 1.6841456477097343e-05, + "loss": 2.1145, + "step": 8190 + }, + { + "epoch": 0.27, + "grad_norm": 0.7279488444328308, + "learning_rate": 1.684068122236447e-05, + "loss": 2.1362, + "step": 8191 + }, + { + "epoch": 0.27, + "grad_norm": 0.6903893947601318, + "learning_rate": 1.6839905890349587e-05, + "loss": 2.1346, + "step": 8192 + }, + { + "epoch": 0.27, + "grad_norm": 0.7163598537445068, + "learning_rate": 1.6839130481061458e-05, + "loss": 2.1218, + "step": 8193 + }, + { + "epoch": 0.27, + "grad_norm": 0.7111249566078186, + "learning_rate": 1.683835499450884e-05, + "loss": 2.1122, + "step": 8194 + }, + { + "epoch": 0.27, + "grad_norm": 0.7025899887084961, + "learning_rate": 1.683757943070049e-05, + "loss": 2.1729, + "step": 8195 + }, + { + "epoch": 0.27, + "grad_norm": 0.7012423276901245, + "learning_rate": 1.6836803789645184e-05, + "loss": 2.1226, + "step": 8196 + }, + { + "epoch": 0.27, + "grad_norm": 0.7256345748901367, + "learning_rate": 1.683602807135167e-05, + "loss": 2.2321, + "step": 8197 + }, + { + "epoch": 0.27, + "grad_norm": 0.7525830864906311, + "learning_rate": 1.6835252275828717e-05, + "loss": 2.132, + "step": 8198 + }, + { + "epoch": 0.27, + "grad_norm": 0.6893534660339355, + "learning_rate": 1.683447640308509e-05, + "loss": 2.123, + "step": 8199 + }, + { + "epoch": 0.27, + "grad_norm": 0.7206797003746033, + "learning_rate": 1.6833700453129553e-05, + "loss": 2.1569, + "step": 8200 + }, + { + "epoch": 0.27, + "grad_norm": 0.7020494937896729, + "learning_rate": 1.683292442597088e-05, + "loss": 2.1894, + "step": 8201 + }, + { + "epoch": 0.27, + "grad_norm": 0.7730892300605774, + "learning_rate": 1.6832148321617823e-05, + "loss": 2.1399, + "step": 8202 + }, + { + "epoch": 0.27, + "grad_norm": 0.7217729687690735, + "learning_rate": 1.6831372140079164e-05, + "loss": 2.1365, + "step": 8203 + }, + { + "epoch": 0.27, + "grad_norm": 0.728108286857605, + "learning_rate": 1.683059588136366e-05, + "loss": 2.1405, + "step": 8204 + }, + { + "epoch": 0.27, + "grad_norm": 0.6715365648269653, + "learning_rate": 1.6829819545480092e-05, + "loss": 2.2119, + "step": 8205 + }, + { + "epoch": 0.27, + "grad_norm": 0.710647463798523, + "learning_rate": 1.6829043132437225e-05, + "loss": 2.1001, + "step": 8206 + }, + { + "epoch": 0.27, + "grad_norm": 0.7033864259719849, + "learning_rate": 1.6828266642243826e-05, + "loss": 2.1681, + "step": 8207 + }, + { + "epoch": 0.27, + "grad_norm": 0.7252451181411743, + "learning_rate": 1.6827490074908677e-05, + "loss": 2.1136, + "step": 8208 + }, + { + "epoch": 0.27, + "grad_norm": 0.7354610562324524, + "learning_rate": 1.6826713430440546e-05, + "loss": 2.1045, + "step": 8209 + }, + { + "epoch": 0.27, + "grad_norm": 0.7123645544052124, + "learning_rate": 1.6825936708848205e-05, + "loss": 2.1464, + "step": 8210 + }, + { + "epoch": 0.27, + "grad_norm": 0.7007272243499756, + "learning_rate": 1.6825159910140433e-05, + "loss": 2.1523, + "step": 8211 + }, + { + "epoch": 0.27, + "grad_norm": 0.703607976436615, + "learning_rate": 1.6824383034326005e-05, + "loss": 2.136, + "step": 8212 + }, + { + "epoch": 0.27, + "grad_norm": 0.6920743584632874, + "learning_rate": 1.6823606081413696e-05, + "loss": 2.1082, + "step": 8213 + }, + { + "epoch": 0.27, + "grad_norm": 0.7157577276229858, + "learning_rate": 1.6822829051412285e-05, + "loss": 2.1073, + "step": 8214 + }, + { + "epoch": 0.27, + "grad_norm": 0.731047511100769, + "learning_rate": 1.682205194433055e-05, + "loss": 2.1453, + "step": 8215 + }, + { + "epoch": 0.27, + "grad_norm": 0.7084121108055115, + "learning_rate": 1.682127476017727e-05, + "loss": 2.1385, + "step": 8216 + }, + { + "epoch": 0.27, + "grad_norm": 0.6725792288780212, + "learning_rate": 1.682049749896123e-05, + "loss": 2.1152, + "step": 8217 + }, + { + "epoch": 0.27, + "grad_norm": 0.7226374745368958, + "learning_rate": 1.68197201606912e-05, + "loss": 2.1444, + "step": 8218 + }, + { + "epoch": 0.27, + "grad_norm": 0.7008115649223328, + "learning_rate": 1.681894274537597e-05, + "loss": 2.1081, + "step": 8219 + }, + { + "epoch": 0.27, + "grad_norm": 0.7104893326759338, + "learning_rate": 1.6818165253024322e-05, + "loss": 2.1692, + "step": 8220 + }, + { + "epoch": 0.27, + "grad_norm": 0.6952874660491943, + "learning_rate": 1.681738768364504e-05, + "loss": 2.0989, + "step": 8221 + }, + { + "epoch": 0.27, + "grad_norm": 0.7030425667762756, + "learning_rate": 1.6816610037246903e-05, + "loss": 2.0449, + "step": 8222 + }, + { + "epoch": 0.27, + "grad_norm": 0.7712633609771729, + "learning_rate": 1.6815832313838704e-05, + "loss": 2.0826, + "step": 8223 + }, + { + "epoch": 0.27, + "grad_norm": 0.7134078741073608, + "learning_rate": 1.6815054513429226e-05, + "loss": 2.1914, + "step": 8224 + }, + { + "epoch": 0.27, + "grad_norm": 0.7146320343017578, + "learning_rate": 1.6814276636027255e-05, + "loss": 2.1565, + "step": 8225 + }, + { + "epoch": 0.27, + "grad_norm": 0.7184485793113708, + "learning_rate": 1.681349868164158e-05, + "loss": 2.1965, + "step": 8226 + }, + { + "epoch": 0.27, + "grad_norm": 0.7314838767051697, + "learning_rate": 1.6812720650280994e-05, + "loss": 2.1331, + "step": 8227 + }, + { + "epoch": 0.27, + "grad_norm": 0.7233169674873352, + "learning_rate": 1.6811942541954277e-05, + "loss": 2.1323, + "step": 8228 + }, + { + "epoch": 0.27, + "grad_norm": 0.70633864402771, + "learning_rate": 1.681116435667023e-05, + "loss": 2.0925, + "step": 8229 + }, + { + "epoch": 0.27, + "grad_norm": 0.7111966013908386, + "learning_rate": 1.6810386094437637e-05, + "loss": 2.1374, + "step": 8230 + }, + { + "epoch": 0.27, + "grad_norm": 0.7626006007194519, + "learning_rate": 1.680960775526529e-05, + "loss": 2.1396, + "step": 8231 + }, + { + "epoch": 0.27, + "grad_norm": 0.6895802617073059, + "learning_rate": 1.680882933916199e-05, + "loss": 2.0748, + "step": 8232 + }, + { + "epoch": 0.27, + "grad_norm": 0.7243805527687073, + "learning_rate": 1.6808050846136528e-05, + "loss": 2.1607, + "step": 8233 + }, + { + "epoch": 0.27, + "grad_norm": 0.7030206322669983, + "learning_rate": 1.6807272276197696e-05, + "loss": 2.2233, + "step": 8234 + }, + { + "epoch": 0.27, + "grad_norm": 0.7152889966964722, + "learning_rate": 1.680649362935429e-05, + "loss": 2.1177, + "step": 8235 + }, + { + "epoch": 0.27, + "grad_norm": 0.6976685523986816, + "learning_rate": 1.680571490561511e-05, + "loss": 2.1297, + "step": 8236 + }, + { + "epoch": 0.27, + "grad_norm": 0.6971706748008728, + "learning_rate": 1.680493610498895e-05, + "loss": 2.1723, + "step": 8237 + }, + { + "epoch": 0.27, + "grad_norm": 0.7076480388641357, + "learning_rate": 1.6804157227484606e-05, + "loss": 2.2193, + "step": 8238 + }, + { + "epoch": 0.27, + "grad_norm": 0.704615592956543, + "learning_rate": 1.680337827311089e-05, + "loss": 2.108, + "step": 8239 + }, + { + "epoch": 0.27, + "grad_norm": 0.7076357007026672, + "learning_rate": 1.680259924187659e-05, + "loss": 2.1563, + "step": 8240 + }, + { + "epoch": 0.27, + "grad_norm": 0.7415187358856201, + "learning_rate": 1.6801820133790504e-05, + "loss": 2.2339, + "step": 8241 + }, + { + "epoch": 0.27, + "grad_norm": 0.6887537240982056, + "learning_rate": 1.6801040948861446e-05, + "loss": 2.1614, + "step": 8242 + }, + { + "epoch": 0.27, + "grad_norm": 0.728999674320221, + "learning_rate": 1.6800261687098215e-05, + "loss": 2.172, + "step": 8243 + }, + { + "epoch": 0.27, + "grad_norm": 0.7224252820014954, + "learning_rate": 1.6799482348509607e-05, + "loss": 2.1779, + "step": 8244 + }, + { + "epoch": 0.27, + "grad_norm": 0.7017470598220825, + "learning_rate": 1.679870293310444e-05, + "loss": 2.075, + "step": 8245 + }, + { + "epoch": 0.27, + "grad_norm": 0.7249842882156372, + "learning_rate": 1.6797923440891505e-05, + "loss": 2.1235, + "step": 8246 + }, + { + "epoch": 0.27, + "grad_norm": 0.6951231956481934, + "learning_rate": 1.679714387187962e-05, + "loss": 2.1208, + "step": 8247 + }, + { + "epoch": 0.27, + "grad_norm": 0.7260183095932007, + "learning_rate": 1.6796364226077582e-05, + "loss": 2.1822, + "step": 8248 + }, + { + "epoch": 0.27, + "grad_norm": 0.7375438809394836, + "learning_rate": 1.6795584503494207e-05, + "loss": 2.165, + "step": 8249 + }, + { + "epoch": 0.27, + "grad_norm": 0.7089533805847168, + "learning_rate": 1.67948047041383e-05, + "loss": 2.1819, + "step": 8250 + }, + { + "epoch": 0.27, + "grad_norm": 0.7065329551696777, + "learning_rate": 1.6794024828018675e-05, + "loss": 2.1518, + "step": 8251 + }, + { + "epoch": 0.27, + "grad_norm": 0.7169740796089172, + "learning_rate": 1.6793244875144132e-05, + "loss": 2.1594, + "step": 8252 + }, + { + "epoch": 0.27, + "grad_norm": 0.6889315843582153, + "learning_rate": 1.6792464845523494e-05, + "loss": 2.0848, + "step": 8253 + }, + { + "epoch": 0.27, + "grad_norm": 0.7495715618133545, + "learning_rate": 1.679168473916557e-05, + "loss": 2.1362, + "step": 8254 + }, + { + "epoch": 0.27, + "grad_norm": 0.7213800549507141, + "learning_rate": 1.679090455607917e-05, + "loss": 2.1531, + "step": 8255 + }, + { + "epoch": 0.27, + "grad_norm": 0.6942418217658997, + "learning_rate": 1.6790124296273114e-05, + "loss": 2.1153, + "step": 8256 + }, + { + "epoch": 0.27, + "grad_norm": 0.7012165188789368, + "learning_rate": 1.6789343959756207e-05, + "loss": 2.1485, + "step": 8257 + }, + { + "epoch": 0.27, + "grad_norm": 0.751469075679779, + "learning_rate": 1.6788563546537275e-05, + "loss": 2.071, + "step": 8258 + }, + { + "epoch": 0.27, + "grad_norm": 0.7323615550994873, + "learning_rate": 1.678778305662513e-05, + "loss": 2.1121, + "step": 8259 + }, + { + "epoch": 0.27, + "grad_norm": 0.7156904339790344, + "learning_rate": 1.678700249002859e-05, + "loss": 2.1633, + "step": 8260 + }, + { + "epoch": 0.27, + "grad_norm": 0.7246262431144714, + "learning_rate": 1.6786221846756472e-05, + "loss": 2.1547, + "step": 8261 + }, + { + "epoch": 0.27, + "grad_norm": 0.7387124300003052, + "learning_rate": 1.6785441126817596e-05, + "loss": 2.1047, + "step": 8262 + }, + { + "epoch": 0.27, + "grad_norm": 0.7257428169250488, + "learning_rate": 1.6784660330220788e-05, + "loss": 2.1553, + "step": 8263 + }, + { + "epoch": 0.27, + "grad_norm": 0.7381401062011719, + "learning_rate": 1.678387945697486e-05, + "loss": 2.0597, + "step": 8264 + }, + { + "epoch": 0.27, + "grad_norm": 0.7107391357421875, + "learning_rate": 1.678309850708864e-05, + "loss": 2.1646, + "step": 8265 + }, + { + "epoch": 0.28, + "grad_norm": 0.7003684639930725, + "learning_rate": 1.6782317480570943e-05, + "loss": 2.1272, + "step": 8266 + }, + { + "epoch": 0.28, + "grad_norm": 0.6960376501083374, + "learning_rate": 1.6781536377430604e-05, + "loss": 2.1271, + "step": 8267 + }, + { + "epoch": 0.28, + "grad_norm": 0.7366856336593628, + "learning_rate": 1.6780755197676437e-05, + "loss": 2.1617, + "step": 8268 + }, + { + "epoch": 0.28, + "grad_norm": 0.6857305765151978, + "learning_rate": 1.6779973941317274e-05, + "loss": 2.1715, + "step": 8269 + }, + { + "epoch": 0.28, + "grad_norm": 0.6927384734153748, + "learning_rate": 1.6779192608361938e-05, + "loss": 2.1503, + "step": 8270 + }, + { + "epoch": 0.28, + "grad_norm": 0.7158834934234619, + "learning_rate": 1.6778411198819258e-05, + "loss": 2.1308, + "step": 8271 + }, + { + "epoch": 0.28, + "grad_norm": 0.6907369494438171, + "learning_rate": 1.677762971269806e-05, + "loss": 2.1912, + "step": 8272 + }, + { + "epoch": 0.28, + "grad_norm": 0.7156636714935303, + "learning_rate": 1.6776848150007173e-05, + "loss": 2.143, + "step": 8273 + }, + { + "epoch": 0.28, + "grad_norm": 0.7007836699485779, + "learning_rate": 1.677606651075543e-05, + "loss": 2.0942, + "step": 8274 + }, + { + "epoch": 0.28, + "grad_norm": 0.7127761244773865, + "learning_rate": 1.6775284794951654e-05, + "loss": 2.0989, + "step": 8275 + }, + { + "epoch": 0.28, + "grad_norm": 0.7096700668334961, + "learning_rate": 1.6774503002604686e-05, + "loss": 2.2125, + "step": 8276 + }, + { + "epoch": 0.28, + "grad_norm": 0.6989270448684692, + "learning_rate": 1.6773721133723347e-05, + "loss": 2.0727, + "step": 8277 + }, + { + "epoch": 0.28, + "grad_norm": 0.7447015643119812, + "learning_rate": 1.6772939188316485e-05, + "loss": 2.1774, + "step": 8278 + }, + { + "epoch": 0.28, + "grad_norm": 0.7023367285728455, + "learning_rate": 1.6772157166392918e-05, + "loss": 2.1555, + "step": 8279 + }, + { + "epoch": 0.28, + "grad_norm": 0.6987752318382263, + "learning_rate": 1.6771375067961494e-05, + "loss": 2.2008, + "step": 8280 + }, + { + "epoch": 0.28, + "grad_norm": 0.6770157217979431, + "learning_rate": 1.6770592893031037e-05, + "loss": 2.1727, + "step": 8281 + }, + { + "epoch": 0.28, + "grad_norm": 0.7365054488182068, + "learning_rate": 1.6769810641610392e-05, + "loss": 2.2102, + "step": 8282 + }, + { + "epoch": 0.28, + "grad_norm": 0.6906271576881409, + "learning_rate": 1.6769028313708394e-05, + "loss": 2.161, + "step": 8283 + }, + { + "epoch": 0.28, + "grad_norm": 0.7183332443237305, + "learning_rate": 1.676824590933388e-05, + "loss": 2.1348, + "step": 8284 + }, + { + "epoch": 0.28, + "grad_norm": 0.7114660143852234, + "learning_rate": 1.6767463428495693e-05, + "loss": 2.1544, + "step": 8285 + }, + { + "epoch": 0.28, + "grad_norm": 0.7062199115753174, + "learning_rate": 1.676668087120267e-05, + "loss": 2.1454, + "step": 8286 + }, + { + "epoch": 0.28, + "grad_norm": 0.6940359473228455, + "learning_rate": 1.676589823746365e-05, + "loss": 2.1202, + "step": 8287 + }, + { + "epoch": 0.28, + "grad_norm": 0.7147840857505798, + "learning_rate": 1.676511552728748e-05, + "loss": 2.1315, + "step": 8288 + }, + { + "epoch": 0.28, + "grad_norm": 0.7117205262184143, + "learning_rate": 1.6764332740682996e-05, + "loss": 2.13, + "step": 8289 + }, + { + "epoch": 0.28, + "grad_norm": 0.7808775901794434, + "learning_rate": 1.6763549877659047e-05, + "loss": 2.1542, + "step": 8290 + }, + { + "epoch": 0.28, + "grad_norm": 0.7333183884620667, + "learning_rate": 1.6762766938224476e-05, + "loss": 2.1379, + "step": 8291 + }, + { + "epoch": 0.28, + "grad_norm": 0.7025657892227173, + "learning_rate": 1.6761983922388125e-05, + "loss": 2.0825, + "step": 8292 + }, + { + "epoch": 0.28, + "grad_norm": 0.7131985425949097, + "learning_rate": 1.6761200830158843e-05, + "loss": 2.1719, + "step": 8293 + }, + { + "epoch": 0.28, + "grad_norm": 0.7128108739852905, + "learning_rate": 1.6760417661545477e-05, + "loss": 2.1223, + "step": 8294 + }, + { + "epoch": 0.28, + "grad_norm": 0.7385960221290588, + "learning_rate": 1.6759634416556876e-05, + "loss": 2.1175, + "step": 8295 + }, + { + "epoch": 0.28, + "grad_norm": 0.7357921600341797, + "learning_rate": 1.6758851095201888e-05, + "loss": 2.1118, + "step": 8296 + }, + { + "epoch": 0.28, + "grad_norm": 0.718222439289093, + "learning_rate": 1.6758067697489356e-05, + "loss": 2.1226, + "step": 8297 + }, + { + "epoch": 0.28, + "grad_norm": 0.6995694041252136, + "learning_rate": 1.675728422342814e-05, + "loss": 2.1624, + "step": 8298 + }, + { + "epoch": 0.28, + "grad_norm": 0.7512602806091309, + "learning_rate": 1.6756500673027085e-05, + "loss": 2.2035, + "step": 8299 + }, + { + "epoch": 0.28, + "grad_norm": 0.7235705256462097, + "learning_rate": 1.6755717046295046e-05, + "loss": 2.1831, + "step": 8300 + }, + { + "epoch": 0.28, + "grad_norm": 0.7105406522750854, + "learning_rate": 1.6754933343240876e-05, + "loss": 2.1052, + "step": 8301 + }, + { + "epoch": 0.28, + "grad_norm": 0.7159382104873657, + "learning_rate": 1.6754149563873428e-05, + "loss": 2.1358, + "step": 8302 + }, + { + "epoch": 0.28, + "grad_norm": 0.7522639036178589, + "learning_rate": 1.6753365708201552e-05, + "loss": 2.2017, + "step": 8303 + }, + { + "epoch": 0.28, + "grad_norm": 0.7046844363212585, + "learning_rate": 1.6752581776234113e-05, + "loss": 2.1157, + "step": 8304 + }, + { + "epoch": 0.28, + "grad_norm": 0.7020446062088013, + "learning_rate": 1.6751797767979958e-05, + "loss": 2.1251, + "step": 8305 + }, + { + "epoch": 0.28, + "grad_norm": 0.7152548432350159, + "learning_rate": 1.6751013683447953e-05, + "loss": 2.1626, + "step": 8306 + }, + { + "epoch": 0.28, + "grad_norm": 0.7360692620277405, + "learning_rate": 1.675022952264695e-05, + "loss": 2.1307, + "step": 8307 + }, + { + "epoch": 0.28, + "grad_norm": 0.7157758474349976, + "learning_rate": 1.674944528558581e-05, + "loss": 2.1199, + "step": 8308 + }, + { + "epoch": 0.28, + "grad_norm": 0.7382952570915222, + "learning_rate": 1.6748660972273394e-05, + "loss": 2.1937, + "step": 8309 + }, + { + "epoch": 0.28, + "grad_norm": 0.7215338349342346, + "learning_rate": 1.674787658271856e-05, + "loss": 2.1216, + "step": 8310 + }, + { + "epoch": 0.28, + "grad_norm": 0.7211667895317078, + "learning_rate": 1.6747092116930173e-05, + "loss": 2.1462, + "step": 8311 + }, + { + "epoch": 0.28, + "grad_norm": 0.7023290395736694, + "learning_rate": 1.6746307574917093e-05, + "loss": 2.1455, + "step": 8312 + }, + { + "epoch": 0.28, + "grad_norm": 0.73150634765625, + "learning_rate": 1.6745522956688184e-05, + "loss": 2.1186, + "step": 8313 + }, + { + "epoch": 0.28, + "grad_norm": 0.697403609752655, + "learning_rate": 1.6744738262252303e-05, + "loss": 2.1688, + "step": 8314 + }, + { + "epoch": 0.28, + "grad_norm": 0.8051870465278625, + "learning_rate": 1.674395349161833e-05, + "loss": 2.0316, + "step": 8315 + }, + { + "epoch": 0.28, + "grad_norm": 0.7292209267616272, + "learning_rate": 1.6743168644795122e-05, + "loss": 2.1886, + "step": 8316 + }, + { + "epoch": 0.28, + "grad_norm": 0.7143980860710144, + "learning_rate": 1.6742383721791544e-05, + "loss": 2.2072, + "step": 8317 + }, + { + "epoch": 0.28, + "grad_norm": 0.7262305021286011, + "learning_rate": 1.674159872261647e-05, + "loss": 2.1488, + "step": 8318 + }, + { + "epoch": 0.28, + "grad_norm": 0.7264353036880493, + "learning_rate": 1.6740813647278756e-05, + "loss": 2.1718, + "step": 8319 + }, + { + "epoch": 0.28, + "grad_norm": 0.6919220685958862, + "learning_rate": 1.674002849578729e-05, + "loss": 2.1457, + "step": 8320 + }, + { + "epoch": 0.28, + "grad_norm": 0.7476584315299988, + "learning_rate": 1.6739243268150924e-05, + "loss": 2.1521, + "step": 8321 + }, + { + "epoch": 0.28, + "grad_norm": 0.7002483010292053, + "learning_rate": 1.6738457964378545e-05, + "loss": 2.1546, + "step": 8322 + }, + { + "epoch": 0.28, + "grad_norm": 0.7533687353134155, + "learning_rate": 1.673767258447901e-05, + "loss": 2.1098, + "step": 8323 + }, + { + "epoch": 0.28, + "grad_norm": 0.7503271698951721, + "learning_rate": 1.6736887128461203e-05, + "loss": 2.0784, + "step": 8324 + }, + { + "epoch": 0.28, + "grad_norm": 0.7119193077087402, + "learning_rate": 1.673610159633399e-05, + "loss": 2.1547, + "step": 8325 + }, + { + "epoch": 0.28, + "grad_norm": 0.7029410004615784, + "learning_rate": 1.673531598810625e-05, + "loss": 2.1347, + "step": 8326 + }, + { + "epoch": 0.28, + "grad_norm": 0.726902186870575, + "learning_rate": 1.673453030378686e-05, + "loss": 2.1404, + "step": 8327 + }, + { + "epoch": 0.28, + "grad_norm": 0.6724271178245544, + "learning_rate": 1.6733744543384693e-05, + "loss": 2.1356, + "step": 8328 + }, + { + "epoch": 0.28, + "grad_norm": 0.7225965261459351, + "learning_rate": 1.6732958706908628e-05, + "loss": 2.0981, + "step": 8329 + }, + { + "epoch": 0.28, + "grad_norm": 0.7144973874092102, + "learning_rate": 1.6732172794367538e-05, + "loss": 2.175, + "step": 8330 + }, + { + "epoch": 0.28, + "grad_norm": 0.7380536794662476, + "learning_rate": 1.6731386805770302e-05, + "loss": 2.1244, + "step": 8331 + }, + { + "epoch": 0.28, + "grad_norm": 0.7548120021820068, + "learning_rate": 1.6730600741125808e-05, + "loss": 2.1325, + "step": 8332 + }, + { + "epoch": 0.28, + "grad_norm": 0.7191427946090698, + "learning_rate": 1.6729814600442932e-05, + "loss": 2.1493, + "step": 8333 + }, + { + "epoch": 0.28, + "grad_norm": 0.7184178233146667, + "learning_rate": 1.672902838373055e-05, + "loss": 2.1327, + "step": 8334 + }, + { + "epoch": 0.28, + "grad_norm": 0.7234036326408386, + "learning_rate": 1.6728242090997554e-05, + "loss": 2.1388, + "step": 8335 + }, + { + "epoch": 0.28, + "grad_norm": 0.7199417352676392, + "learning_rate": 1.672745572225282e-05, + "loss": 2.1508, + "step": 8336 + }, + { + "epoch": 0.28, + "grad_norm": 0.7441931962966919, + "learning_rate": 1.672666927750523e-05, + "loss": 2.1783, + "step": 8337 + }, + { + "epoch": 0.28, + "grad_norm": 0.7059201598167419, + "learning_rate": 1.672588275676368e-05, + "loss": 2.0918, + "step": 8338 + }, + { + "epoch": 0.28, + "grad_norm": 0.700907289981842, + "learning_rate": 1.6725096160037042e-05, + "loss": 2.073, + "step": 8339 + }, + { + "epoch": 0.28, + "grad_norm": 0.7274459600448608, + "learning_rate": 1.6724309487334212e-05, + "loss": 2.1595, + "step": 8340 + }, + { + "epoch": 0.28, + "grad_norm": 0.6845276355743408, + "learning_rate": 1.6723522738664074e-05, + "loss": 2.1264, + "step": 8341 + }, + { + "epoch": 0.28, + "grad_norm": 0.7291163802146912, + "learning_rate": 1.6722735914035517e-05, + "loss": 2.2317, + "step": 8342 + }, + { + "epoch": 0.28, + "grad_norm": 0.7830685377120972, + "learning_rate": 1.672194901345743e-05, + "loss": 2.0342, + "step": 8343 + }, + { + "epoch": 0.28, + "grad_norm": 0.6782450675964355, + "learning_rate": 1.6721162036938698e-05, + "loss": 2.1267, + "step": 8344 + }, + { + "epoch": 0.28, + "grad_norm": 0.7008998394012451, + "learning_rate": 1.672037498448822e-05, + "loss": 2.1159, + "step": 8345 + }, + { + "epoch": 0.28, + "grad_norm": 0.7255397439002991, + "learning_rate": 1.6719587856114885e-05, + "loss": 2.1883, + "step": 8346 + }, + { + "epoch": 0.28, + "grad_norm": 0.7182254791259766, + "learning_rate": 1.6718800651827584e-05, + "loss": 2.1091, + "step": 8347 + }, + { + "epoch": 0.28, + "grad_norm": 0.7435699701309204, + "learning_rate": 1.6718013371635208e-05, + "loss": 2.1272, + "step": 8348 + }, + { + "epoch": 0.28, + "grad_norm": 0.7311944365501404, + "learning_rate": 1.6717226015546657e-05, + "loss": 2.2433, + "step": 8349 + }, + { + "epoch": 0.28, + "grad_norm": 0.7117100954055786, + "learning_rate": 1.671643858357082e-05, + "loss": 2.1646, + "step": 8350 + }, + { + "epoch": 0.28, + "grad_norm": 0.7185521125793457, + "learning_rate": 1.67156510757166e-05, + "loss": 2.1113, + "step": 8351 + }, + { + "epoch": 0.28, + "grad_norm": 0.6903740167617798, + "learning_rate": 1.6714863491992884e-05, + "loss": 2.0606, + "step": 8352 + }, + { + "epoch": 0.28, + "grad_norm": 0.6981849074363708, + "learning_rate": 1.6714075832408583e-05, + "loss": 2.1869, + "step": 8353 + }, + { + "epoch": 0.28, + "grad_norm": 0.7545415759086609, + "learning_rate": 1.6713288096972586e-05, + "loss": 2.1934, + "step": 8354 + }, + { + "epoch": 0.28, + "grad_norm": 0.699349582195282, + "learning_rate": 1.671250028569379e-05, + "loss": 2.1133, + "step": 8355 + }, + { + "epoch": 0.28, + "grad_norm": 0.7073742151260376, + "learning_rate": 1.6711712398581105e-05, + "loss": 2.1475, + "step": 8356 + }, + { + "epoch": 0.28, + "grad_norm": 0.7133447527885437, + "learning_rate": 1.6710924435643426e-05, + "loss": 2.2382, + "step": 8357 + }, + { + "epoch": 0.28, + "grad_norm": 0.7232763171195984, + "learning_rate": 1.6710136396889654e-05, + "loss": 2.1209, + "step": 8358 + }, + { + "epoch": 0.28, + "grad_norm": 0.6911131143569946, + "learning_rate": 1.6709348282328693e-05, + "loss": 2.1182, + "step": 8359 + }, + { + "epoch": 0.28, + "grad_norm": 0.7193372845649719, + "learning_rate": 1.6708560091969447e-05, + "loss": 2.089, + "step": 8360 + }, + { + "epoch": 0.28, + "grad_norm": 0.7212553024291992, + "learning_rate": 1.6707771825820823e-05, + "loss": 2.1283, + "step": 8361 + }, + { + "epoch": 0.28, + "grad_norm": 0.6816129684448242, + "learning_rate": 1.670698348389172e-05, + "loss": 2.1855, + "step": 8362 + }, + { + "epoch": 0.28, + "grad_norm": 0.7554389238357544, + "learning_rate": 1.6706195066191053e-05, + "loss": 2.1181, + "step": 8363 + }, + { + "epoch": 0.28, + "grad_norm": 0.7077310681343079, + "learning_rate": 1.670540657272772e-05, + "loss": 2.1316, + "step": 8364 + }, + { + "epoch": 0.28, + "grad_norm": 0.7212445735931396, + "learning_rate": 1.670461800351064e-05, + "loss": 2.1734, + "step": 8365 + }, + { + "epoch": 0.28, + "grad_norm": 0.6918357610702515, + "learning_rate": 1.6703829358548708e-05, + "loss": 2.101, + "step": 8366 + }, + { + "epoch": 0.28, + "grad_norm": 0.7416213154792786, + "learning_rate": 1.6703040637850845e-05, + "loss": 2.1361, + "step": 8367 + }, + { + "epoch": 0.28, + "grad_norm": 0.7355925440788269, + "learning_rate": 1.6702251841425955e-05, + "loss": 2.111, + "step": 8368 + }, + { + "epoch": 0.28, + "grad_norm": 0.6752378344535828, + "learning_rate": 1.6701462969282955e-05, + "loss": 2.1532, + "step": 8369 + }, + { + "epoch": 0.28, + "grad_norm": 0.7172408103942871, + "learning_rate": 1.670067402143075e-05, + "loss": 2.1626, + "step": 8370 + }, + { + "epoch": 0.28, + "grad_norm": 0.7430909276008606, + "learning_rate": 1.669988499787826e-05, + "loss": 2.163, + "step": 8371 + }, + { + "epoch": 0.28, + "grad_norm": 0.7323562502861023, + "learning_rate": 1.669909589863439e-05, + "loss": 2.2295, + "step": 8372 + }, + { + "epoch": 0.28, + "grad_norm": 0.6948447823524475, + "learning_rate": 1.6698306723708066e-05, + "loss": 2.1245, + "step": 8373 + }, + { + "epoch": 0.28, + "grad_norm": 0.7318553924560547, + "learning_rate": 1.6697517473108197e-05, + "loss": 2.1301, + "step": 8374 + }, + { + "epoch": 0.28, + "grad_norm": 0.7293442487716675, + "learning_rate": 1.66967281468437e-05, + "loss": 2.1382, + "step": 8375 + }, + { + "epoch": 0.28, + "grad_norm": 0.6928600072860718, + "learning_rate": 1.6695938744923493e-05, + "loss": 2.1421, + "step": 8376 + }, + { + "epoch": 0.28, + "grad_norm": 0.6966847777366638, + "learning_rate": 1.6695149267356493e-05, + "loss": 2.18, + "step": 8377 + }, + { + "epoch": 0.28, + "grad_norm": 0.7042227983474731, + "learning_rate": 1.6694359714151623e-05, + "loss": 2.0846, + "step": 8378 + }, + { + "epoch": 0.28, + "grad_norm": 0.6989268064498901, + "learning_rate": 1.66935700853178e-05, + "loss": 2.1497, + "step": 8379 + }, + { + "epoch": 0.28, + "grad_norm": 0.7188319563865662, + "learning_rate": 1.6692780380863946e-05, + "loss": 2.1777, + "step": 8380 + }, + { + "epoch": 0.28, + "grad_norm": 0.6994345784187317, + "learning_rate": 1.6691990600798977e-05, + "loss": 2.1279, + "step": 8381 + }, + { + "epoch": 0.28, + "grad_norm": 0.6980247497558594, + "learning_rate": 1.6691200745131825e-05, + "loss": 2.118, + "step": 8382 + }, + { + "epoch": 0.28, + "grad_norm": 0.7195389866828918, + "learning_rate": 1.6690410813871407e-05, + "loss": 2.147, + "step": 8383 + }, + { + "epoch": 0.28, + "grad_norm": 0.7061222791671753, + "learning_rate": 1.6689620807026648e-05, + "loss": 2.1469, + "step": 8384 + }, + { + "epoch": 0.28, + "grad_norm": 0.7184776663780212, + "learning_rate": 1.668883072460647e-05, + "loss": 2.2292, + "step": 8385 + }, + { + "epoch": 0.28, + "grad_norm": 0.6848839521408081, + "learning_rate": 1.6688040566619806e-05, + "loss": 2.072, + "step": 8386 + }, + { + "epoch": 0.28, + "grad_norm": 0.6812568306922913, + "learning_rate": 1.6687250333075583e-05, + "loss": 2.1698, + "step": 8387 + }, + { + "epoch": 0.28, + "grad_norm": 0.7278180122375488, + "learning_rate": 1.668646002398272e-05, + "loss": 2.1683, + "step": 8388 + }, + { + "epoch": 0.28, + "grad_norm": 0.6893550157546997, + "learning_rate": 1.6685669639350152e-05, + "loss": 2.0946, + "step": 8389 + }, + { + "epoch": 0.28, + "grad_norm": 0.6801903247833252, + "learning_rate": 1.6684879179186807e-05, + "loss": 2.1645, + "step": 8390 + }, + { + "epoch": 0.28, + "grad_norm": 0.7107182145118713, + "learning_rate": 1.668408864350161e-05, + "loss": 2.1383, + "step": 8391 + }, + { + "epoch": 0.28, + "grad_norm": 0.7200020551681519, + "learning_rate": 1.6683298032303503e-05, + "loss": 2.1573, + "step": 8392 + }, + { + "epoch": 0.28, + "grad_norm": 0.698668360710144, + "learning_rate": 1.668250734560141e-05, + "loss": 2.157, + "step": 8393 + }, + { + "epoch": 0.28, + "grad_norm": 0.723284125328064, + "learning_rate": 1.6681716583404263e-05, + "loss": 2.1707, + "step": 8394 + }, + { + "epoch": 0.28, + "grad_norm": 0.7130258679389954, + "learning_rate": 1.6680925745721003e-05, + "loss": 2.1787, + "step": 8395 + }, + { + "epoch": 0.28, + "grad_norm": 0.7131319046020508, + "learning_rate": 1.6680134832560555e-05, + "loss": 2.1454, + "step": 8396 + }, + { + "epoch": 0.28, + "grad_norm": 0.763205885887146, + "learning_rate": 1.667934384393186e-05, + "loss": 2.1734, + "step": 8397 + }, + { + "epoch": 0.28, + "grad_norm": 0.7104514837265015, + "learning_rate": 1.667855277984385e-05, + "loss": 2.1275, + "step": 8398 + }, + { + "epoch": 0.28, + "grad_norm": 0.7180097699165344, + "learning_rate": 1.6677761640305464e-05, + "loss": 2.2083, + "step": 8399 + }, + { + "epoch": 0.28, + "grad_norm": 0.7494561076164246, + "learning_rate": 1.6676970425325646e-05, + "loss": 2.1904, + "step": 8400 + }, + { + "epoch": 0.28, + "grad_norm": 0.7247684597969055, + "learning_rate": 1.6676179134913325e-05, + "loss": 2.1325, + "step": 8401 + }, + { + "epoch": 0.28, + "grad_norm": 0.718853235244751, + "learning_rate": 1.667538776907745e-05, + "loss": 2.164, + "step": 8402 + }, + { + "epoch": 0.28, + "grad_norm": 0.7261177897453308, + "learning_rate": 1.6674596327826952e-05, + "loss": 2.1968, + "step": 8403 + }, + { + "epoch": 0.28, + "grad_norm": 0.7159755229949951, + "learning_rate": 1.667380481117078e-05, + "loss": 2.0809, + "step": 8404 + }, + { + "epoch": 0.28, + "grad_norm": 0.7119253873825073, + "learning_rate": 1.6673013219117866e-05, + "loss": 2.1298, + "step": 8405 + }, + { + "epoch": 0.28, + "grad_norm": 0.7206315994262695, + "learning_rate": 1.6672221551677163e-05, + "loss": 2.2023, + "step": 8406 + }, + { + "epoch": 0.28, + "grad_norm": 0.740351140499115, + "learning_rate": 1.667142980885761e-05, + "loss": 2.1002, + "step": 8407 + }, + { + "epoch": 0.28, + "grad_norm": 0.730655312538147, + "learning_rate": 1.6670637990668153e-05, + "loss": 2.1188, + "step": 8408 + }, + { + "epoch": 0.28, + "grad_norm": 0.706206738948822, + "learning_rate": 1.6669846097117738e-05, + "loss": 2.1719, + "step": 8409 + }, + { + "epoch": 0.28, + "grad_norm": 0.7733293175697327, + "learning_rate": 1.6669054128215313e-05, + "loss": 2.137, + "step": 8410 + }, + { + "epoch": 0.28, + "grad_norm": 0.7247806191444397, + "learning_rate": 1.6668262083969822e-05, + "loss": 2.1317, + "step": 8411 + }, + { + "epoch": 0.28, + "grad_norm": 0.7170233726501465, + "learning_rate": 1.666746996439021e-05, + "loss": 2.1185, + "step": 8412 + }, + { + "epoch": 0.28, + "grad_norm": 0.7162359952926636, + "learning_rate": 1.6666677769485432e-05, + "loss": 2.1113, + "step": 8413 + }, + { + "epoch": 0.28, + "grad_norm": 0.7174320816993713, + "learning_rate": 1.666588549926444e-05, + "loss": 2.109, + "step": 8414 + }, + { + "epoch": 0.28, + "grad_norm": 0.7953102588653564, + "learning_rate": 1.6665093153736177e-05, + "loss": 2.1272, + "step": 8415 + }, + { + "epoch": 0.28, + "grad_norm": 0.7284758687019348, + "learning_rate": 1.6664300732909595e-05, + "loss": 2.1221, + "step": 8416 + }, + { + "epoch": 0.28, + "grad_norm": 0.7045118808746338, + "learning_rate": 1.6663508236793653e-05, + "loss": 2.1435, + "step": 8417 + }, + { + "epoch": 0.28, + "grad_norm": 0.7355021238327026, + "learning_rate": 1.66627156653973e-05, + "loss": 2.1627, + "step": 8418 + }, + { + "epoch": 0.28, + "grad_norm": 0.7339742183685303, + "learning_rate": 1.6661923018729488e-05, + "loss": 2.1397, + "step": 8419 + }, + { + "epoch": 0.28, + "grad_norm": 0.7345160245895386, + "learning_rate": 1.6661130296799173e-05, + "loss": 2.1937, + "step": 8420 + }, + { + "epoch": 0.28, + "grad_norm": 0.7035835385322571, + "learning_rate": 1.6660337499615315e-05, + "loss": 2.0899, + "step": 8421 + }, + { + "epoch": 0.28, + "grad_norm": 0.7480828166007996, + "learning_rate": 1.6659544627186863e-05, + "loss": 2.1465, + "step": 8422 + }, + { + "epoch": 0.28, + "grad_norm": 0.707321286201477, + "learning_rate": 1.6658751679522783e-05, + "loss": 2.1243, + "step": 8423 + }, + { + "epoch": 0.28, + "grad_norm": 0.703544020652771, + "learning_rate": 1.6657958656632027e-05, + "loss": 2.1214, + "step": 8424 + }, + { + "epoch": 0.28, + "grad_norm": 0.7414666414260864, + "learning_rate": 1.6657165558523555e-05, + "loss": 2.1719, + "step": 8425 + }, + { + "epoch": 0.28, + "grad_norm": 0.716022789478302, + "learning_rate": 1.665637238520633e-05, + "loss": 2.0527, + "step": 8426 + }, + { + "epoch": 0.28, + "grad_norm": 0.7326505780220032, + "learning_rate": 1.665557913668931e-05, + "loss": 2.1086, + "step": 8427 + }, + { + "epoch": 0.28, + "grad_norm": 0.6998535990715027, + "learning_rate": 1.6654785812981455e-05, + "loss": 2.0982, + "step": 8428 + }, + { + "epoch": 0.28, + "grad_norm": 0.6943238973617554, + "learning_rate": 1.6653992414091736e-05, + "loss": 2.0497, + "step": 8429 + }, + { + "epoch": 0.28, + "grad_norm": 0.7014089822769165, + "learning_rate": 1.6653198940029104e-05, + "loss": 2.1706, + "step": 8430 + }, + { + "epoch": 0.28, + "grad_norm": 0.7265568971633911, + "learning_rate": 1.6652405390802534e-05, + "loss": 2.2045, + "step": 8431 + }, + { + "epoch": 0.28, + "grad_norm": 0.7365382313728333, + "learning_rate": 1.6651611766420983e-05, + "loss": 2.1409, + "step": 8432 + }, + { + "epoch": 0.28, + "grad_norm": 0.7257308959960938, + "learning_rate": 1.6650818066893423e-05, + "loss": 2.1283, + "step": 8433 + }, + { + "epoch": 0.28, + "grad_norm": 0.7167442440986633, + "learning_rate": 1.6650024292228817e-05, + "loss": 2.1801, + "step": 8434 + }, + { + "epoch": 0.28, + "grad_norm": 0.7947908043861389, + "learning_rate": 1.664923044243614e-05, + "loss": 2.1535, + "step": 8435 + }, + { + "epoch": 0.28, + "grad_norm": 0.6911414265632629, + "learning_rate": 1.6648436517524344e-05, + "loss": 2.1545, + "step": 8436 + }, + { + "epoch": 0.28, + "grad_norm": 0.6991528272628784, + "learning_rate": 1.6647642517502417e-05, + "loss": 2.1625, + "step": 8437 + }, + { + "epoch": 0.28, + "grad_norm": 0.7230033278465271, + "learning_rate": 1.6646848442379314e-05, + "loss": 2.1526, + "step": 8438 + }, + { + "epoch": 0.28, + "grad_norm": 0.71552574634552, + "learning_rate": 1.6646054292164016e-05, + "loss": 2.2209, + "step": 8439 + }, + { + "epoch": 0.28, + "grad_norm": 0.73277747631073, + "learning_rate": 1.664526006686549e-05, + "loss": 2.1951, + "step": 8440 + }, + { + "epoch": 0.28, + "grad_norm": 0.7186518907546997, + "learning_rate": 1.664446576649271e-05, + "loss": 2.1679, + "step": 8441 + }, + { + "epoch": 0.28, + "grad_norm": 0.7248024344444275, + "learning_rate": 1.6643671391054653e-05, + "loss": 2.1947, + "step": 8442 + }, + { + "epoch": 0.28, + "grad_norm": 0.751242458820343, + "learning_rate": 1.664287694056029e-05, + "loss": 2.1242, + "step": 8443 + }, + { + "epoch": 0.28, + "grad_norm": 0.6933706402778625, + "learning_rate": 1.6642082415018594e-05, + "loss": 2.1546, + "step": 8444 + }, + { + "epoch": 0.28, + "grad_norm": 0.7158252000808716, + "learning_rate": 1.6641287814438544e-05, + "loss": 2.1716, + "step": 8445 + }, + { + "epoch": 0.28, + "grad_norm": 0.7104446887969971, + "learning_rate": 1.664049313882912e-05, + "loss": 2.0644, + "step": 8446 + }, + { + "epoch": 0.28, + "grad_norm": 0.7372139096260071, + "learning_rate": 1.6639698388199293e-05, + "loss": 2.1729, + "step": 8447 + }, + { + "epoch": 0.28, + "grad_norm": 0.7316924929618835, + "learning_rate": 1.6638903562558046e-05, + "loss": 2.1271, + "step": 8448 + }, + { + "epoch": 0.28, + "grad_norm": 0.770596981048584, + "learning_rate": 1.6638108661914355e-05, + "loss": 2.1226, + "step": 8449 + }, + { + "epoch": 0.28, + "grad_norm": 0.7030594944953918, + "learning_rate": 1.6637313686277203e-05, + "loss": 2.1657, + "step": 8450 + }, + { + "epoch": 0.28, + "grad_norm": 0.6895646452903748, + "learning_rate": 1.6636518635655572e-05, + "loss": 2.1375, + "step": 8451 + }, + { + "epoch": 0.28, + "grad_norm": 0.7410181760787964, + "learning_rate": 1.6635723510058443e-05, + "loss": 2.137, + "step": 8452 + }, + { + "epoch": 0.28, + "grad_norm": 0.6950429081916809, + "learning_rate": 1.66349283094948e-05, + "loss": 2.1358, + "step": 8453 + }, + { + "epoch": 0.28, + "grad_norm": 0.7217109203338623, + "learning_rate": 1.663413303397362e-05, + "loss": 2.2561, + "step": 8454 + }, + { + "epoch": 0.28, + "grad_norm": 0.7148011326789856, + "learning_rate": 1.66333376835039e-05, + "loss": 2.147, + "step": 8455 + }, + { + "epoch": 0.28, + "grad_norm": 0.714650571346283, + "learning_rate": 1.6632542258094614e-05, + "loss": 2.0939, + "step": 8456 + }, + { + "epoch": 0.28, + "grad_norm": 0.7211595177650452, + "learning_rate": 1.6631746757754754e-05, + "loss": 2.1421, + "step": 8457 + }, + { + "epoch": 0.28, + "grad_norm": 0.7004099488258362, + "learning_rate": 1.6630951182493306e-05, + "loss": 2.144, + "step": 8458 + }, + { + "epoch": 0.28, + "grad_norm": 0.6834902167320251, + "learning_rate": 1.6630155532319257e-05, + "loss": 2.1436, + "step": 8459 + }, + { + "epoch": 0.28, + "grad_norm": 0.7063214182853699, + "learning_rate": 1.6629359807241597e-05, + "loss": 2.1505, + "step": 8460 + }, + { + "epoch": 0.28, + "grad_norm": 0.7732962369918823, + "learning_rate": 1.6628564007269315e-05, + "loss": 2.1846, + "step": 8461 + }, + { + "epoch": 0.28, + "grad_norm": 0.7055607438087463, + "learning_rate": 1.66277681324114e-05, + "loss": 2.2373, + "step": 8462 + }, + { + "epoch": 0.28, + "grad_norm": 0.689182460308075, + "learning_rate": 1.6626972182676847e-05, + "loss": 2.1085, + "step": 8463 + }, + { + "epoch": 0.28, + "grad_norm": 0.6998549699783325, + "learning_rate": 1.662617615807465e-05, + "loss": 2.116, + "step": 8464 + }, + { + "epoch": 0.28, + "grad_norm": 0.7300270199775696, + "learning_rate": 1.662538005861379e-05, + "loss": 2.1492, + "step": 8465 + }, + { + "epoch": 0.28, + "grad_norm": 0.7549532651901245, + "learning_rate": 1.662458388430327e-05, + "loss": 2.1431, + "step": 8466 + }, + { + "epoch": 0.28, + "grad_norm": 0.7259175777435303, + "learning_rate": 1.662378763515209e-05, + "loss": 2.2179, + "step": 8467 + }, + { + "epoch": 0.28, + "grad_norm": 0.7303707599639893, + "learning_rate": 1.662299131116923e-05, + "loss": 2.082, + "step": 8468 + }, + { + "epoch": 0.28, + "grad_norm": 0.7546704411506653, + "learning_rate": 1.6622194912363702e-05, + "loss": 2.2049, + "step": 8469 + }, + { + "epoch": 0.28, + "grad_norm": 0.6981831789016724, + "learning_rate": 1.6621398438744497e-05, + "loss": 2.169, + "step": 8470 + }, + { + "epoch": 0.28, + "grad_norm": 0.6856091618537903, + "learning_rate": 1.662060189032061e-05, + "loss": 2.1368, + "step": 8471 + }, + { + "epoch": 0.28, + "grad_norm": 0.7288587689399719, + "learning_rate": 1.661980526710105e-05, + "loss": 2.1485, + "step": 8472 + }, + { + "epoch": 0.28, + "grad_norm": 0.731757402420044, + "learning_rate": 1.66190085690948e-05, + "loss": 2.1266, + "step": 8473 + }, + { + "epoch": 0.28, + "grad_norm": 0.7040241956710815, + "learning_rate": 1.6618211796310876e-05, + "loss": 2.1543, + "step": 8474 + }, + { + "epoch": 0.28, + "grad_norm": 0.7040770053863525, + "learning_rate": 1.6617414948758273e-05, + "loss": 2.1694, + "step": 8475 + }, + { + "epoch": 0.28, + "grad_norm": 0.7272975444793701, + "learning_rate": 1.6616618026445994e-05, + "loss": 2.1573, + "step": 8476 + }, + { + "epoch": 0.28, + "grad_norm": 0.7112754583358765, + "learning_rate": 1.6615821029383043e-05, + "loss": 2.05, + "step": 8477 + }, + { + "epoch": 0.28, + "grad_norm": 0.7027485370635986, + "learning_rate": 1.6615023957578416e-05, + "loss": 2.143, + "step": 8478 + }, + { + "epoch": 0.28, + "grad_norm": 0.7477208971977234, + "learning_rate": 1.6614226811041134e-05, + "loss": 2.1382, + "step": 8479 + }, + { + "epoch": 0.28, + "grad_norm": 0.7543733716011047, + "learning_rate": 1.6613429589780193e-05, + "loss": 2.1455, + "step": 8480 + }, + { + "epoch": 0.28, + "grad_norm": 0.7092347145080566, + "learning_rate": 1.6612632293804594e-05, + "loss": 2.1214, + "step": 8481 + }, + { + "epoch": 0.28, + "grad_norm": 0.7138279676437378, + "learning_rate": 1.661183492312336e-05, + "loss": 2.085, + "step": 8482 + }, + { + "epoch": 0.28, + "grad_norm": 0.7034551501274109, + "learning_rate": 1.6611037477745483e-05, + "loss": 2.1731, + "step": 8483 + }, + { + "epoch": 0.28, + "grad_norm": 0.7038451433181763, + "learning_rate": 1.6610239957679983e-05, + "loss": 2.1823, + "step": 8484 + }, + { + "epoch": 0.28, + "grad_norm": 0.7277883887290955, + "learning_rate": 1.660944236293586e-05, + "loss": 2.098, + "step": 8485 + }, + { + "epoch": 0.28, + "grad_norm": 0.7263098359107971, + "learning_rate": 1.6608644693522136e-05, + "loss": 2.2134, + "step": 8486 + }, + { + "epoch": 0.28, + "grad_norm": 0.7074192762374878, + "learning_rate": 1.6607846949447816e-05, + "loss": 2.1099, + "step": 8487 + }, + { + "epoch": 0.28, + "grad_norm": 0.7036694288253784, + "learning_rate": 1.6607049130721913e-05, + "loss": 2.1641, + "step": 8488 + }, + { + "epoch": 0.28, + "grad_norm": 0.7400307059288025, + "learning_rate": 1.660625123735344e-05, + "loss": 2.1215, + "step": 8489 + }, + { + "epoch": 0.28, + "grad_norm": 0.715126097202301, + "learning_rate": 1.6605453269351416e-05, + "loss": 2.1314, + "step": 8490 + }, + { + "epoch": 0.28, + "grad_norm": 0.703659176826477, + "learning_rate": 1.660465522672485e-05, + "loss": 2.1438, + "step": 8491 + }, + { + "epoch": 0.28, + "grad_norm": 0.7191702127456665, + "learning_rate": 1.6603857109482757e-05, + "loss": 2.0984, + "step": 8492 + }, + { + "epoch": 0.28, + "grad_norm": 0.7585740089416504, + "learning_rate": 1.6603058917634162e-05, + "loss": 2.1753, + "step": 8493 + }, + { + "epoch": 0.28, + "grad_norm": 0.6940328478813171, + "learning_rate": 1.6602260651188073e-05, + "loss": 2.1386, + "step": 8494 + }, + { + "epoch": 0.28, + "grad_norm": 0.705902099609375, + "learning_rate": 1.6601462310153517e-05, + "loss": 2.1367, + "step": 8495 + }, + { + "epoch": 0.28, + "grad_norm": 0.6982494592666626, + "learning_rate": 1.6600663894539502e-05, + "loss": 2.1372, + "step": 8496 + }, + { + "epoch": 0.28, + "grad_norm": 0.715004563331604, + "learning_rate": 1.659986540435506e-05, + "loss": 2.0947, + "step": 8497 + }, + { + "epoch": 0.28, + "grad_norm": 0.6934937834739685, + "learning_rate": 1.6599066839609204e-05, + "loss": 2.1878, + "step": 8498 + }, + { + "epoch": 0.28, + "grad_norm": 0.8017980456352234, + "learning_rate": 1.6598268200310962e-05, + "loss": 2.2035, + "step": 8499 + }, + { + "epoch": 0.28, + "grad_norm": 0.7302924990653992, + "learning_rate": 1.6597469486469348e-05, + "loss": 2.0979, + "step": 8500 + }, + { + "epoch": 0.28, + "grad_norm": 0.7269686460494995, + "learning_rate": 1.6596670698093392e-05, + "loss": 2.0866, + "step": 8501 + }, + { + "epoch": 0.28, + "grad_norm": 0.7090336084365845, + "learning_rate": 1.6595871835192117e-05, + "loss": 2.1728, + "step": 8502 + }, + { + "epoch": 0.28, + "grad_norm": 0.7512612342834473, + "learning_rate": 1.6595072897774547e-05, + "loss": 2.1771, + "step": 8503 + }, + { + "epoch": 0.28, + "grad_norm": 0.7363735437393188, + "learning_rate": 1.659427388584971e-05, + "loss": 2.1241, + "step": 8504 + }, + { + "epoch": 0.28, + "grad_norm": 0.6985809206962585, + "learning_rate": 1.659347479942663e-05, + "loss": 2.14, + "step": 8505 + }, + { + "epoch": 0.28, + "grad_norm": 0.731840968132019, + "learning_rate": 1.6592675638514337e-05, + "loss": 2.1731, + "step": 8506 + }, + { + "epoch": 0.28, + "grad_norm": 0.7617955803871155, + "learning_rate": 1.6591876403121855e-05, + "loss": 2.1518, + "step": 8507 + }, + { + "epoch": 0.28, + "grad_norm": 0.7017560601234436, + "learning_rate": 1.659107709325822e-05, + "loss": 2.1333, + "step": 8508 + }, + { + "epoch": 0.28, + "grad_norm": 0.7608446478843689, + "learning_rate": 1.6590277708932458e-05, + "loss": 2.1565, + "step": 8509 + }, + { + "epoch": 0.28, + "grad_norm": 0.7530316114425659, + "learning_rate": 1.65894782501536e-05, + "loss": 2.1841, + "step": 8510 + }, + { + "epoch": 0.28, + "grad_norm": 0.7154077887535095, + "learning_rate": 1.658867871693068e-05, + "loss": 2.1075, + "step": 8511 + }, + { + "epoch": 0.28, + "grad_norm": 0.7149897217750549, + "learning_rate": 1.6587879109272726e-05, + "loss": 2.1193, + "step": 8512 + }, + { + "epoch": 0.28, + "grad_norm": 0.7030389904975891, + "learning_rate": 1.658707942718878e-05, + "loss": 2.1882, + "step": 8513 + }, + { + "epoch": 0.28, + "grad_norm": 0.7259228825569153, + "learning_rate": 1.6586279670687867e-05, + "loss": 2.1006, + "step": 8514 + }, + { + "epoch": 0.28, + "grad_norm": 0.7169371843338013, + "learning_rate": 1.658547983977903e-05, + "loss": 2.0825, + "step": 8515 + }, + { + "epoch": 0.28, + "grad_norm": 0.7480980157852173, + "learning_rate": 1.6584679934471294e-05, + "loss": 2.1582, + "step": 8516 + }, + { + "epoch": 0.28, + "grad_norm": 0.7195892333984375, + "learning_rate": 1.6583879954773707e-05, + "loss": 2.0497, + "step": 8517 + }, + { + "epoch": 0.28, + "grad_norm": 0.7250720262527466, + "learning_rate": 1.6583079900695303e-05, + "loss": 2.1564, + "step": 8518 + }, + { + "epoch": 0.28, + "grad_norm": 0.7400161623954773, + "learning_rate": 1.6582279772245123e-05, + "loss": 2.1467, + "step": 8519 + }, + { + "epoch": 0.28, + "grad_norm": 0.7226407527923584, + "learning_rate": 1.65814795694322e-05, + "loss": 2.1859, + "step": 8520 + }, + { + "epoch": 0.28, + "grad_norm": 0.7086884379386902, + "learning_rate": 1.658067929226558e-05, + "loss": 2.1824, + "step": 8521 + }, + { + "epoch": 0.28, + "grad_norm": 0.7324230670928955, + "learning_rate": 1.65798789407543e-05, + "loss": 2.206, + "step": 8522 + }, + { + "epoch": 0.28, + "grad_norm": 0.6903305053710938, + "learning_rate": 1.6579078514907404e-05, + "loss": 2.1037, + "step": 8523 + }, + { + "epoch": 0.28, + "grad_norm": 0.7690555453300476, + "learning_rate": 1.6578278014733938e-05, + "loss": 2.1065, + "step": 8524 + }, + { + "epoch": 0.28, + "grad_norm": 0.6953065395355225, + "learning_rate": 1.657747744024294e-05, + "loss": 2.1035, + "step": 8525 + }, + { + "epoch": 0.28, + "grad_norm": 0.7228513956069946, + "learning_rate": 1.6576676791443457e-05, + "loss": 2.1905, + "step": 8526 + }, + { + "epoch": 0.28, + "grad_norm": 0.7356615662574768, + "learning_rate": 1.6575876068344533e-05, + "loss": 2.1777, + "step": 8527 + }, + { + "epoch": 0.28, + "grad_norm": 0.747215747833252, + "learning_rate": 1.657507527095522e-05, + "loss": 2.241, + "step": 8528 + }, + { + "epoch": 0.28, + "grad_norm": 0.7284457087516785, + "learning_rate": 1.6574274399284552e-05, + "loss": 2.1066, + "step": 8529 + }, + { + "epoch": 0.28, + "grad_norm": 0.7463632225990295, + "learning_rate": 1.6573473453341587e-05, + "loss": 2.159, + "step": 8530 + }, + { + "epoch": 0.28, + "grad_norm": 0.7322075963020325, + "learning_rate": 1.6572672433135375e-05, + "loss": 2.1637, + "step": 8531 + }, + { + "epoch": 0.28, + "grad_norm": 0.759601354598999, + "learning_rate": 1.6571871338674957e-05, + "loss": 2.1192, + "step": 8532 + }, + { + "epoch": 0.28, + "grad_norm": 0.70357346534729, + "learning_rate": 1.6571070169969395e-05, + "loss": 2.1886, + "step": 8533 + }, + { + "epoch": 0.28, + "grad_norm": 0.7102460861206055, + "learning_rate": 1.6570268927027727e-05, + "loss": 2.1235, + "step": 8534 + }, + { + "epoch": 0.28, + "grad_norm": 0.7317030429840088, + "learning_rate": 1.6569467609859013e-05, + "loss": 2.138, + "step": 8535 + }, + { + "epoch": 0.28, + "grad_norm": 0.6941621899604797, + "learning_rate": 1.6568666218472304e-05, + "loss": 2.1995, + "step": 8536 + }, + { + "epoch": 0.28, + "grad_norm": 0.7085536122322083, + "learning_rate": 1.6567864752876656e-05, + "loss": 2.1135, + "step": 8537 + }, + { + "epoch": 0.28, + "grad_norm": 0.6942277550697327, + "learning_rate": 1.6567063213081117e-05, + "loss": 2.1104, + "step": 8538 + }, + { + "epoch": 0.28, + "grad_norm": 0.7417043447494507, + "learning_rate": 1.656626159909475e-05, + "loss": 2.1428, + "step": 8539 + }, + { + "epoch": 0.28, + "grad_norm": 0.7053242325782776, + "learning_rate": 1.6565459910926605e-05, + "loss": 2.1594, + "step": 8540 + }, + { + "epoch": 0.28, + "grad_norm": 0.6774251461029053, + "learning_rate": 1.6564658148585743e-05, + "loss": 2.1338, + "step": 8541 + }, + { + "epoch": 0.28, + "grad_norm": 0.7282190322875977, + "learning_rate": 1.656385631208122e-05, + "loss": 2.1722, + "step": 8542 + }, + { + "epoch": 0.28, + "grad_norm": 0.7296175360679626, + "learning_rate": 1.6563054401422095e-05, + "loss": 2.2193, + "step": 8543 + }, + { + "epoch": 0.28, + "grad_norm": 0.7117055654525757, + "learning_rate": 1.6562252416617432e-05, + "loss": 2.1307, + "step": 8544 + }, + { + "epoch": 0.28, + "grad_norm": 0.7168688178062439, + "learning_rate": 1.656145035767628e-05, + "loss": 2.1571, + "step": 8545 + }, + { + "epoch": 0.28, + "grad_norm": 0.6976636052131653, + "learning_rate": 1.6560648224607713e-05, + "loss": 2.0826, + "step": 8546 + }, + { + "epoch": 0.28, + "grad_norm": 0.7028382420539856, + "learning_rate": 1.655984601742078e-05, + "loss": 2.1246, + "step": 8547 + }, + { + "epoch": 0.28, + "grad_norm": 0.6970878839492798, + "learning_rate": 1.655904373612456e-05, + "loss": 2.1403, + "step": 8548 + }, + { + "epoch": 0.28, + "grad_norm": 0.7084276080131531, + "learning_rate": 1.6558241380728102e-05, + "loss": 2.152, + "step": 8549 + }, + { + "epoch": 0.28, + "grad_norm": 0.7015260457992554, + "learning_rate": 1.6557438951240482e-05, + "loss": 2.1362, + "step": 8550 + }, + { + "epoch": 0.28, + "grad_norm": 0.710869550704956, + "learning_rate": 1.6556636447670755e-05, + "loss": 2.1249, + "step": 8551 + }, + { + "epoch": 0.28, + "grad_norm": 0.7144033908843994, + "learning_rate": 1.6555833870027993e-05, + "loss": 2.1458, + "step": 8552 + }, + { + "epoch": 0.28, + "grad_norm": 0.7335296273231506, + "learning_rate": 1.655503121832126e-05, + "loss": 2.1684, + "step": 8553 + }, + { + "epoch": 0.28, + "grad_norm": 0.6994426250457764, + "learning_rate": 1.6554228492559628e-05, + "loss": 2.1711, + "step": 8554 + }, + { + "epoch": 0.28, + "grad_norm": 0.6763849258422852, + "learning_rate": 1.6553425692752165e-05, + "loss": 2.1012, + "step": 8555 + }, + { + "epoch": 0.28, + "grad_norm": 0.7549949288368225, + "learning_rate": 1.6552622818907935e-05, + "loss": 2.123, + "step": 8556 + }, + { + "epoch": 0.28, + "grad_norm": 0.7343417406082153, + "learning_rate": 1.655181987103602e-05, + "loss": 2.0746, + "step": 8557 + }, + { + "epoch": 0.28, + "grad_norm": 0.7319876551628113, + "learning_rate": 1.6551016849145476e-05, + "loss": 2.1482, + "step": 8558 + }, + { + "epoch": 0.28, + "grad_norm": 0.7186651825904846, + "learning_rate": 1.655021375324539e-05, + "loss": 2.0865, + "step": 8559 + }, + { + "epoch": 0.28, + "grad_norm": 0.7188573479652405, + "learning_rate": 1.654941058334482e-05, + "loss": 2.1263, + "step": 8560 + }, + { + "epoch": 0.28, + "grad_norm": 0.6992587447166443, + "learning_rate": 1.6548607339452853e-05, + "loss": 2.1866, + "step": 8561 + }, + { + "epoch": 0.28, + "grad_norm": 0.7086381912231445, + "learning_rate": 1.6547804021578556e-05, + "loss": 2.1448, + "step": 8562 + }, + { + "epoch": 0.28, + "grad_norm": 0.7193552255630493, + "learning_rate": 1.6547000629731008e-05, + "loss": 2.1399, + "step": 8563 + }, + { + "epoch": 0.28, + "grad_norm": 0.7114681005477905, + "learning_rate": 1.6546197163919282e-05, + "loss": 2.1589, + "step": 8564 + }, + { + "epoch": 0.28, + "grad_norm": 0.7475398778915405, + "learning_rate": 1.6545393624152456e-05, + "loss": 2.1892, + "step": 8565 + }, + { + "epoch": 0.28, + "grad_norm": 0.741888701915741, + "learning_rate": 1.6544590010439613e-05, + "loss": 2.2071, + "step": 8566 + }, + { + "epoch": 0.29, + "grad_norm": 0.6935592293739319, + "learning_rate": 1.6543786322789827e-05, + "loss": 2.1692, + "step": 8567 + }, + { + "epoch": 0.29, + "grad_norm": 0.7552782893180847, + "learning_rate": 1.6542982561212174e-05, + "loss": 2.2293, + "step": 8568 + }, + { + "epoch": 0.29, + "grad_norm": 0.7516879439353943, + "learning_rate": 1.6542178725715744e-05, + "loss": 2.1764, + "step": 8569 + }, + { + "epoch": 0.29, + "grad_norm": 0.7252159714698792, + "learning_rate": 1.6541374816309608e-05, + "loss": 2.1718, + "step": 8570 + }, + { + "epoch": 0.29, + "grad_norm": 0.7396523952484131, + "learning_rate": 1.6540570833002853e-05, + "loss": 2.2216, + "step": 8571 + }, + { + "epoch": 0.29, + "grad_norm": 0.7281020283699036, + "learning_rate": 1.6539766775804565e-05, + "loss": 2.1355, + "step": 8572 + }, + { + "epoch": 0.29, + "grad_norm": 0.7397704124450684, + "learning_rate": 1.6538962644723825e-05, + "loss": 2.1136, + "step": 8573 + }, + { + "epoch": 0.29, + "grad_norm": 0.7383582592010498, + "learning_rate": 1.6538158439769713e-05, + "loss": 2.1956, + "step": 8574 + }, + { + "epoch": 0.29, + "grad_norm": 0.6808381080627441, + "learning_rate": 1.6537354160951323e-05, + "loss": 2.1152, + "step": 8575 + }, + { + "epoch": 0.29, + "grad_norm": 0.7280012965202332, + "learning_rate": 1.6536549808277735e-05, + "loss": 2.0951, + "step": 8576 + }, + { + "epoch": 0.29, + "grad_norm": 0.6991639733314514, + "learning_rate": 1.653574538175804e-05, + "loss": 2.0888, + "step": 8577 + }, + { + "epoch": 0.29, + "grad_norm": 0.7489104866981506, + "learning_rate": 1.653494088140132e-05, + "loss": 2.0993, + "step": 8578 + }, + { + "epoch": 0.29, + "grad_norm": 0.6890525817871094, + "learning_rate": 1.653413630721667e-05, + "loss": 2.1885, + "step": 8579 + }, + { + "epoch": 0.29, + "grad_norm": 0.7387303113937378, + "learning_rate": 1.6533331659213177e-05, + "loss": 2.1598, + "step": 8580 + }, + { + "epoch": 0.29, + "grad_norm": 0.7390830516815186, + "learning_rate": 1.6532526937399934e-05, + "loss": 2.1788, + "step": 8581 + }, + { + "epoch": 0.29, + "grad_norm": 0.6796732544898987, + "learning_rate": 1.6531722141786027e-05, + "loss": 2.0676, + "step": 8582 + }, + { + "epoch": 0.29, + "grad_norm": 0.7047191262245178, + "learning_rate": 1.6530917272380552e-05, + "loss": 2.1098, + "step": 8583 + }, + { + "epoch": 0.29, + "grad_norm": 0.7175714373588562, + "learning_rate": 1.65301123291926e-05, + "loss": 2.1388, + "step": 8584 + }, + { + "epoch": 0.29, + "grad_norm": 0.7000894546508789, + "learning_rate": 1.652930731223127e-05, + "loss": 2.1714, + "step": 8585 + }, + { + "epoch": 0.29, + "grad_norm": 0.7137597799301147, + "learning_rate": 1.652850222150565e-05, + "loss": 2.0534, + "step": 8586 + }, + { + "epoch": 0.29, + "grad_norm": 0.7001762390136719, + "learning_rate": 1.652769705702484e-05, + "loss": 2.0566, + "step": 8587 + }, + { + "epoch": 0.29, + "grad_norm": 0.738588809967041, + "learning_rate": 1.6526891818797932e-05, + "loss": 2.1602, + "step": 8588 + }, + { + "epoch": 0.29, + "grad_norm": 0.7601026296615601, + "learning_rate": 1.6526086506834025e-05, + "loss": 2.1668, + "step": 8589 + }, + { + "epoch": 0.29, + "grad_norm": 0.6844609975814819, + "learning_rate": 1.652528112114222e-05, + "loss": 2.2094, + "step": 8590 + }, + { + "epoch": 0.29, + "grad_norm": 0.723003089427948, + "learning_rate": 1.652447566173161e-05, + "loss": 2.123, + "step": 8591 + }, + { + "epoch": 0.29, + "grad_norm": 0.7601599097251892, + "learning_rate": 1.65236701286113e-05, + "loss": 2.1215, + "step": 8592 + }, + { + "epoch": 0.29, + "grad_norm": 0.7292881608009338, + "learning_rate": 1.6522864521790384e-05, + "loss": 2.2064, + "step": 8593 + }, + { + "epoch": 0.29, + "grad_norm": 0.6910530924797058, + "learning_rate": 1.6522058841277972e-05, + "loss": 2.1627, + "step": 8594 + }, + { + "epoch": 0.29, + "grad_norm": 0.7271946668624878, + "learning_rate": 1.652125308708316e-05, + "loss": 2.1611, + "step": 8595 + }, + { + "epoch": 0.29, + "grad_norm": 0.7418900728225708, + "learning_rate": 1.652044725921505e-05, + "loss": 2.1461, + "step": 8596 + }, + { + "epoch": 0.29, + "grad_norm": 0.6954464912414551, + "learning_rate": 1.6519641357682754e-05, + "loss": 2.1595, + "step": 8597 + }, + { + "epoch": 0.29, + "grad_norm": 0.7137049436569214, + "learning_rate": 1.6518835382495363e-05, + "loss": 2.2013, + "step": 8598 + }, + { + "epoch": 0.29, + "grad_norm": 0.7366760969161987, + "learning_rate": 1.6518029333662e-05, + "loss": 2.1932, + "step": 8599 + }, + { + "epoch": 0.29, + "grad_norm": 0.7147172689437866, + "learning_rate": 1.6517223211191753e-05, + "loss": 2.1012, + "step": 8600 + }, + { + "epoch": 0.29, + "grad_norm": 0.6887701749801636, + "learning_rate": 1.651641701509374e-05, + "loss": 2.1469, + "step": 8601 + }, + { + "epoch": 0.29, + "grad_norm": 0.7105332612991333, + "learning_rate": 1.651561074537707e-05, + "loss": 2.1269, + "step": 8602 + }, + { + "epoch": 0.29, + "grad_norm": 0.7148953676223755, + "learning_rate": 1.6514804402050843e-05, + "loss": 2.1258, + "step": 8603 + }, + { + "epoch": 0.29, + "grad_norm": 0.7073917984962463, + "learning_rate": 1.6513997985124176e-05, + "loss": 2.1035, + "step": 8604 + }, + { + "epoch": 0.29, + "grad_norm": 0.7075343132019043, + "learning_rate": 1.6513191494606182e-05, + "loss": 2.0877, + "step": 8605 + }, + { + "epoch": 0.29, + "grad_norm": 0.7029696702957153, + "learning_rate": 1.651238493050596e-05, + "loss": 2.2, + "step": 8606 + }, + { + "epoch": 0.29, + "grad_norm": 0.6935347318649292, + "learning_rate": 1.6511578292832635e-05, + "loss": 2.1688, + "step": 8607 + }, + { + "epoch": 0.29, + "grad_norm": 0.7413133978843689, + "learning_rate": 1.6510771581595314e-05, + "loss": 2.1712, + "step": 8608 + }, + { + "epoch": 0.29, + "grad_norm": 0.7197940349578857, + "learning_rate": 1.650996479680311e-05, + "loss": 2.1333, + "step": 8609 + }, + { + "epoch": 0.29, + "grad_norm": 0.7398563027381897, + "learning_rate": 1.650915793846514e-05, + "loss": 2.1485, + "step": 8610 + }, + { + "epoch": 0.29, + "grad_norm": 0.7056536674499512, + "learning_rate": 1.6508351006590518e-05, + "loss": 2.1274, + "step": 8611 + }, + { + "epoch": 0.29, + "grad_norm": 0.7039489150047302, + "learning_rate": 1.650754400118836e-05, + "loss": 2.1344, + "step": 8612 + }, + { + "epoch": 0.29, + "grad_norm": 0.7109227180480957, + "learning_rate": 1.650673692226778e-05, + "loss": 2.0707, + "step": 8613 + }, + { + "epoch": 0.29, + "grad_norm": 0.7520394325256348, + "learning_rate": 1.6505929769837905e-05, + "loss": 2.0625, + "step": 8614 + }, + { + "epoch": 0.29, + "grad_norm": 0.7551279664039612, + "learning_rate": 1.6505122543907847e-05, + "loss": 2.2104, + "step": 8615 + }, + { + "epoch": 0.29, + "grad_norm": 0.7096158266067505, + "learning_rate": 1.6504315244486728e-05, + "loss": 2.1345, + "step": 8616 + }, + { + "epoch": 0.29, + "grad_norm": 0.716400146484375, + "learning_rate": 1.6503507871583667e-05, + "loss": 2.1754, + "step": 8617 + }, + { + "epoch": 0.29, + "grad_norm": 0.700905442237854, + "learning_rate": 1.6502700425207782e-05, + "loss": 2.2002, + "step": 8618 + }, + { + "epoch": 0.29, + "grad_norm": 0.7089530825614929, + "learning_rate": 1.65018929053682e-05, + "loss": 2.1146, + "step": 8619 + }, + { + "epoch": 0.29, + "grad_norm": 0.7055644989013672, + "learning_rate": 1.6501085312074042e-05, + "loss": 2.1102, + "step": 8620 + }, + { + "epoch": 0.29, + "grad_norm": 0.7378947138786316, + "learning_rate": 1.6500277645334435e-05, + "loss": 2.1813, + "step": 8621 + }, + { + "epoch": 0.29, + "grad_norm": 0.7238419055938721, + "learning_rate": 1.64994699051585e-05, + "loss": 2.1264, + "step": 8622 + }, + { + "epoch": 0.29, + "grad_norm": 0.6934816837310791, + "learning_rate": 1.6498662091555366e-05, + "loss": 2.1511, + "step": 8623 + }, + { + "epoch": 0.29, + "grad_norm": 0.6926857829093933, + "learning_rate": 1.6497854204534148e-05, + "loss": 2.1347, + "step": 8624 + }, + { + "epoch": 0.29, + "grad_norm": 0.7049158215522766, + "learning_rate": 1.6497046244103986e-05, + "loss": 2.1505, + "step": 8625 + }, + { + "epoch": 0.29, + "grad_norm": 0.7279732823371887, + "learning_rate": 1.6496238210274005e-05, + "loss": 2.1111, + "step": 8626 + }, + { + "epoch": 0.29, + "grad_norm": 0.7275941371917725, + "learning_rate": 1.649543010305333e-05, + "loss": 2.2188, + "step": 8627 + }, + { + "epoch": 0.29, + "grad_norm": 0.716028094291687, + "learning_rate": 1.649462192245109e-05, + "loss": 2.1244, + "step": 8628 + }, + { + "epoch": 0.29, + "grad_norm": 0.7254250645637512, + "learning_rate": 1.649381366847642e-05, + "loss": 2.1393, + "step": 8629 + }, + { + "epoch": 0.29, + "grad_norm": 0.7290170788764954, + "learning_rate": 1.649300534113845e-05, + "loss": 2.1159, + "step": 8630 + }, + { + "epoch": 0.29, + "grad_norm": 0.6990953087806702, + "learning_rate": 1.649219694044631e-05, + "loss": 2.1616, + "step": 8631 + }, + { + "epoch": 0.29, + "grad_norm": 0.7147461175918579, + "learning_rate": 1.6491388466409134e-05, + "loss": 2.127, + "step": 8632 + }, + { + "epoch": 0.29, + "grad_norm": 0.7202890515327454, + "learning_rate": 1.6490579919036057e-05, + "loss": 2.0824, + "step": 8633 + }, + { + "epoch": 0.29, + "grad_norm": 0.67803555727005, + "learning_rate": 1.648977129833621e-05, + "loss": 2.1154, + "step": 8634 + }, + { + "epoch": 0.29, + "grad_norm": 0.7088366150856018, + "learning_rate": 1.648896260431873e-05, + "loss": 2.1029, + "step": 8635 + }, + { + "epoch": 0.29, + "grad_norm": 0.7328715920448303, + "learning_rate": 1.648815383699275e-05, + "loss": 2.1116, + "step": 8636 + }, + { + "epoch": 0.29, + "grad_norm": 0.7269423007965088, + "learning_rate": 1.6487344996367415e-05, + "loss": 2.1388, + "step": 8637 + }, + { + "epoch": 0.29, + "grad_norm": 0.7346189022064209, + "learning_rate": 1.6486536082451858e-05, + "loss": 2.148, + "step": 8638 + }, + { + "epoch": 0.29, + "grad_norm": 0.7402300238609314, + "learning_rate": 1.648572709525522e-05, + "loss": 2.1861, + "step": 8639 + }, + { + "epoch": 0.29, + "grad_norm": 0.6965931057929993, + "learning_rate": 1.648491803478663e-05, + "loss": 2.1325, + "step": 8640 + }, + { + "epoch": 0.29, + "grad_norm": 0.743628978729248, + "learning_rate": 1.6484108901055244e-05, + "loss": 2.1256, + "step": 8641 + }, + { + "epoch": 0.29, + "grad_norm": 0.7043337225914001, + "learning_rate": 1.6483299694070194e-05, + "loss": 2.1679, + "step": 8642 + }, + { + "epoch": 0.29, + "grad_norm": 0.6928893327713013, + "learning_rate": 1.6482490413840623e-05, + "loss": 2.1849, + "step": 8643 + }, + { + "epoch": 0.29, + "grad_norm": 0.7343873977661133, + "learning_rate": 1.6481681060375675e-05, + "loss": 2.1332, + "step": 8644 + }, + { + "epoch": 0.29, + "grad_norm": 0.7067716121673584, + "learning_rate": 1.6480871633684495e-05, + "loss": 2.1708, + "step": 8645 + }, + { + "epoch": 0.29, + "grad_norm": 0.7068729996681213, + "learning_rate": 1.648006213377622e-05, + "loss": 2.1317, + "step": 8646 + }, + { + "epoch": 0.29, + "grad_norm": 0.7325693964958191, + "learning_rate": 1.6479252560660004e-05, + "loss": 2.143, + "step": 8647 + }, + { + "epoch": 0.29, + "grad_norm": 0.7149820327758789, + "learning_rate": 1.6478442914344988e-05, + "loss": 2.2159, + "step": 8648 + }, + { + "epoch": 0.29, + "grad_norm": 0.716033399105072, + "learning_rate": 1.6477633194840322e-05, + "loss": 2.1152, + "step": 8649 + }, + { + "epoch": 0.29, + "grad_norm": 0.7164447903633118, + "learning_rate": 1.6476823402155154e-05, + "loss": 2.0901, + "step": 8650 + }, + { + "epoch": 0.29, + "grad_norm": 0.6766393780708313, + "learning_rate": 1.647601353629863e-05, + "loss": 2.1432, + "step": 8651 + }, + { + "epoch": 0.29, + "grad_norm": 0.7457061409950256, + "learning_rate": 1.64752035972799e-05, + "loss": 2.1008, + "step": 8652 + }, + { + "epoch": 0.29, + "grad_norm": 0.7502416968345642, + "learning_rate": 1.6474393585108117e-05, + "loss": 2.081, + "step": 8653 + }, + { + "epoch": 0.29, + "grad_norm": 0.7059175968170166, + "learning_rate": 1.647358349979243e-05, + "loss": 2.1548, + "step": 8654 + }, + { + "epoch": 0.29, + "grad_norm": 0.7197018265724182, + "learning_rate": 1.6472773341341984e-05, + "loss": 2.2745, + "step": 8655 + }, + { + "epoch": 0.29, + "grad_norm": 0.7112841010093689, + "learning_rate": 1.6471963109765942e-05, + "loss": 2.1559, + "step": 8656 + }, + { + "epoch": 0.29, + "grad_norm": 0.7062753438949585, + "learning_rate": 1.6471152805073454e-05, + "loss": 2.1862, + "step": 8657 + }, + { + "epoch": 0.29, + "grad_norm": 0.7131178975105286, + "learning_rate": 1.6470342427273673e-05, + "loss": 2.0975, + "step": 8658 + }, + { + "epoch": 0.29, + "grad_norm": 0.7311652302742004, + "learning_rate": 1.6469531976375756e-05, + "loss": 2.1208, + "step": 8659 + }, + { + "epoch": 0.29, + "grad_norm": 0.7294530272483826, + "learning_rate": 1.646872145238886e-05, + "loss": 2.107, + "step": 8660 + }, + { + "epoch": 0.29, + "grad_norm": 0.755399227142334, + "learning_rate": 1.6467910855322136e-05, + "loss": 2.0934, + "step": 8661 + }, + { + "epoch": 0.29, + "grad_norm": 0.7106584906578064, + "learning_rate": 1.646710018518475e-05, + "loss": 2.1423, + "step": 8662 + }, + { + "epoch": 0.29, + "grad_norm": 0.7575503587722778, + "learning_rate": 1.6466289441985853e-05, + "loss": 2.1657, + "step": 8663 + }, + { + "epoch": 0.29, + "grad_norm": 0.6909418106079102, + "learning_rate": 1.646547862573461e-05, + "loss": 2.1373, + "step": 8664 + }, + { + "epoch": 0.29, + "grad_norm": 0.7176034450531006, + "learning_rate": 1.646466773644018e-05, + "loss": 2.1539, + "step": 8665 + }, + { + "epoch": 0.29, + "grad_norm": 0.7235804796218872, + "learning_rate": 1.6463856774111718e-05, + "loss": 2.155, + "step": 8666 + }, + { + "epoch": 0.29, + "grad_norm": 0.7293908596038818, + "learning_rate": 1.6463045738758394e-05, + "loss": 2.0896, + "step": 8667 + }, + { + "epoch": 0.29, + "grad_norm": 0.6998721361160278, + "learning_rate": 1.6462234630389366e-05, + "loss": 2.1473, + "step": 8668 + }, + { + "epoch": 0.29, + "grad_norm": 0.7099831104278564, + "learning_rate": 1.6461423449013796e-05, + "loss": 2.1542, + "step": 8669 + }, + { + "epoch": 0.29, + "grad_norm": 0.7209317088127136, + "learning_rate": 1.6460612194640852e-05, + "loss": 2.1557, + "step": 8670 + }, + { + "epoch": 0.29, + "grad_norm": 0.6948076486587524, + "learning_rate": 1.64598008672797e-05, + "loss": 2.1168, + "step": 8671 + }, + { + "epoch": 0.29, + "grad_norm": 0.6941455602645874, + "learning_rate": 1.64589894669395e-05, + "loss": 2.156, + "step": 8672 + }, + { + "epoch": 0.29, + "grad_norm": 0.7441786527633667, + "learning_rate": 1.6458177993629425e-05, + "loss": 2.1611, + "step": 8673 + }, + { + "epoch": 0.29, + "grad_norm": 0.7092084884643555, + "learning_rate": 1.6457366447358638e-05, + "loss": 2.1527, + "step": 8674 + }, + { + "epoch": 0.29, + "grad_norm": 0.6901856064796448, + "learning_rate": 1.645655482813631e-05, + "loss": 2.151, + "step": 8675 + }, + { + "epoch": 0.29, + "grad_norm": 0.703450083732605, + "learning_rate": 1.645574313597161e-05, + "loss": 2.1625, + "step": 8676 + }, + { + "epoch": 0.29, + "grad_norm": 0.7163329720497131, + "learning_rate": 1.6454931370873707e-05, + "loss": 2.0686, + "step": 8677 + }, + { + "epoch": 0.29, + "grad_norm": 0.7170610427856445, + "learning_rate": 1.6454119532851772e-05, + "loss": 2.1833, + "step": 8678 + }, + { + "epoch": 0.29, + "grad_norm": 0.7266414165496826, + "learning_rate": 1.645330762191498e-05, + "loss": 2.116, + "step": 8679 + }, + { + "epoch": 0.29, + "grad_norm": 0.7273832559585571, + "learning_rate": 1.6452495638072496e-05, + "loss": 2.1479, + "step": 8680 + }, + { + "epoch": 0.29, + "grad_norm": 0.6916490793228149, + "learning_rate": 1.64516835813335e-05, + "loss": 2.1933, + "step": 8681 + }, + { + "epoch": 0.29, + "grad_norm": 0.7194525003433228, + "learning_rate": 1.6450871451707166e-05, + "loss": 2.129, + "step": 8682 + }, + { + "epoch": 0.29, + "grad_norm": 0.7347792983055115, + "learning_rate": 1.6450059249202665e-05, + "loss": 2.1805, + "step": 8683 + }, + { + "epoch": 0.29, + "grad_norm": 0.7341068387031555, + "learning_rate": 1.6449246973829172e-05, + "loss": 2.1565, + "step": 8684 + }, + { + "epoch": 0.29, + "grad_norm": 0.7437977194786072, + "learning_rate": 1.644843462559587e-05, + "loss": 2.1665, + "step": 8685 + }, + { + "epoch": 0.29, + "grad_norm": 0.7272555232048035, + "learning_rate": 1.644762220451193e-05, + "loss": 2.1583, + "step": 8686 + }, + { + "epoch": 0.29, + "grad_norm": 0.7330377697944641, + "learning_rate": 1.644680971058654e-05, + "loss": 2.1352, + "step": 8687 + }, + { + "epoch": 0.29, + "grad_norm": 0.7000678181648254, + "learning_rate": 1.644599714382886e-05, + "loss": 2.1119, + "step": 8688 + }, + { + "epoch": 0.29, + "grad_norm": 0.6793113350868225, + "learning_rate": 1.644518450424809e-05, + "loss": 2.117, + "step": 8689 + }, + { + "epoch": 0.29, + "grad_norm": 0.7002363801002502, + "learning_rate": 1.6444371791853405e-05, + "loss": 2.0819, + "step": 8690 + }, + { + "epoch": 0.29, + "grad_norm": 0.7260528802871704, + "learning_rate": 1.6443559006653977e-05, + "loss": 2.1111, + "step": 8691 + }, + { + "epoch": 0.29, + "grad_norm": 0.7341198325157166, + "learning_rate": 1.6442746148659002e-05, + "loss": 2.2254, + "step": 8692 + }, + { + "epoch": 0.29, + "grad_norm": 0.7159674167633057, + "learning_rate": 1.6441933217877653e-05, + "loss": 2.1475, + "step": 8693 + }, + { + "epoch": 0.29, + "grad_norm": 0.719525933265686, + "learning_rate": 1.644112021431912e-05, + "loss": 2.1687, + "step": 8694 + }, + { + "epoch": 0.29, + "grad_norm": 0.7415254712104797, + "learning_rate": 1.6440307137992585e-05, + "loss": 2.1598, + "step": 8695 + }, + { + "epoch": 0.29, + "grad_norm": 0.7052748203277588, + "learning_rate": 1.6439493988907234e-05, + "loss": 2.2062, + "step": 8696 + }, + { + "epoch": 0.29, + "grad_norm": 0.7486609816551208, + "learning_rate": 1.6438680767072252e-05, + "loss": 2.1648, + "step": 8697 + }, + { + "epoch": 0.29, + "grad_norm": 0.7042666673660278, + "learning_rate": 1.6437867472496832e-05, + "loss": 2.1855, + "step": 8698 + }, + { + "epoch": 0.29, + "grad_norm": 0.6793323755264282, + "learning_rate": 1.6437054105190155e-05, + "loss": 2.0925, + "step": 8699 + }, + { + "epoch": 0.29, + "grad_norm": 0.7216567397117615, + "learning_rate": 1.643624066516141e-05, + "loss": 2.1075, + "step": 8700 + }, + { + "epoch": 0.29, + "grad_norm": 0.70390385389328, + "learning_rate": 1.6435427152419797e-05, + "loss": 2.1534, + "step": 8701 + }, + { + "epoch": 0.29, + "grad_norm": 0.7354033589363098, + "learning_rate": 1.64346135669745e-05, + "loss": 2.1255, + "step": 8702 + }, + { + "epoch": 0.29, + "grad_norm": 0.7334848046302795, + "learning_rate": 1.6433799908834703e-05, + "loss": 2.1991, + "step": 8703 + }, + { + "epoch": 0.29, + "grad_norm": 0.7464609146118164, + "learning_rate": 1.643298617800961e-05, + "loss": 2.1287, + "step": 8704 + }, + { + "epoch": 0.29, + "grad_norm": 0.7090374827384949, + "learning_rate": 1.643217237450841e-05, + "loss": 2.1402, + "step": 8705 + }, + { + "epoch": 0.29, + "grad_norm": 0.7271326780319214, + "learning_rate": 1.6431358498340293e-05, + "loss": 2.1258, + "step": 8706 + }, + { + "epoch": 0.29, + "grad_norm": 0.7055995464324951, + "learning_rate": 1.643054454951446e-05, + "loss": 2.1364, + "step": 8707 + }, + { + "epoch": 0.29, + "grad_norm": 0.7151098847389221, + "learning_rate": 1.64297305280401e-05, + "loss": 2.1987, + "step": 8708 + }, + { + "epoch": 0.29, + "grad_norm": 0.7521646022796631, + "learning_rate": 1.6428916433926415e-05, + "loss": 2.13, + "step": 8709 + }, + { + "epoch": 0.29, + "grad_norm": 0.6807918548583984, + "learning_rate": 1.64281022671826e-05, + "loss": 2.0933, + "step": 8710 + }, + { + "epoch": 0.29, + "grad_norm": 0.7281333804130554, + "learning_rate": 1.642728802781785e-05, + "loss": 2.2045, + "step": 8711 + }, + { + "epoch": 0.29, + "grad_norm": 0.7047079205513, + "learning_rate": 1.642647371584137e-05, + "loss": 2.1284, + "step": 8712 + }, + { + "epoch": 0.29, + "grad_norm": 0.7147891521453857, + "learning_rate": 1.6425659331262353e-05, + "loss": 2.1327, + "step": 8713 + }, + { + "epoch": 0.29, + "grad_norm": 0.6735539436340332, + "learning_rate": 1.6424844874090008e-05, + "loss": 2.0491, + "step": 8714 + }, + { + "epoch": 0.29, + "grad_norm": 0.7134787440299988, + "learning_rate": 1.6424030344333526e-05, + "loss": 2.0897, + "step": 8715 + }, + { + "epoch": 0.29, + "grad_norm": 0.7234330773353577, + "learning_rate": 1.6423215742002115e-05, + "loss": 2.1982, + "step": 8716 + }, + { + "epoch": 0.29, + "grad_norm": 0.7160414457321167, + "learning_rate": 1.642240106710498e-05, + "loss": 2.1127, + "step": 8717 + }, + { + "epoch": 0.29, + "grad_norm": 0.6791563630104065, + "learning_rate": 1.6421586319651315e-05, + "loss": 2.1201, + "step": 8718 + }, + { + "epoch": 0.29, + "grad_norm": 0.7297214865684509, + "learning_rate": 1.6420771499650337e-05, + "loss": 2.1483, + "step": 8719 + }, + { + "epoch": 0.29, + "grad_norm": 0.7450850009918213, + "learning_rate": 1.6419956607111246e-05, + "loss": 2.2155, + "step": 8720 + }, + { + "epoch": 0.29, + "grad_norm": 0.7217132449150085, + "learning_rate": 1.6419141642043245e-05, + "loss": 2.1235, + "step": 8721 + }, + { + "epoch": 0.29, + "grad_norm": 0.6960335373878479, + "learning_rate": 1.6418326604455545e-05, + "loss": 2.1715, + "step": 8722 + }, + { + "epoch": 0.29, + "grad_norm": 0.7075805068016052, + "learning_rate": 1.6417511494357353e-05, + "loss": 2.1347, + "step": 8723 + }, + { + "epoch": 0.29, + "grad_norm": 0.7439130544662476, + "learning_rate": 1.6416696311757873e-05, + "loss": 2.1443, + "step": 8724 + }, + { + "epoch": 0.29, + "grad_norm": 0.7126134037971497, + "learning_rate": 1.641588105666632e-05, + "loss": 2.1566, + "step": 8725 + }, + { + "epoch": 0.29, + "grad_norm": 0.7144042253494263, + "learning_rate": 1.6415065729091906e-05, + "loss": 2.1124, + "step": 8726 + }, + { + "epoch": 0.29, + "grad_norm": 0.6928504109382629, + "learning_rate": 1.6414250329043836e-05, + "loss": 2.1516, + "step": 8727 + }, + { + "epoch": 0.29, + "grad_norm": 0.7137312293052673, + "learning_rate": 1.6413434856531328e-05, + "loss": 2.1447, + "step": 8728 + }, + { + "epoch": 0.29, + "grad_norm": 0.7179322838783264, + "learning_rate": 1.6412619311563588e-05, + "loss": 2.16, + "step": 8729 + }, + { + "epoch": 0.29, + "grad_norm": 0.7052469253540039, + "learning_rate": 1.641180369414984e-05, + "loss": 2.1228, + "step": 8730 + }, + { + "epoch": 0.29, + "grad_norm": 0.7070691585540771, + "learning_rate": 1.641098800429928e-05, + "loss": 2.1589, + "step": 8731 + }, + { + "epoch": 0.29, + "grad_norm": 0.7290984392166138, + "learning_rate": 1.6410172242021146e-05, + "loss": 2.1536, + "step": 8732 + }, + { + "epoch": 0.29, + "grad_norm": 0.7093420028686523, + "learning_rate": 1.6409356407324638e-05, + "loss": 2.1757, + "step": 8733 + }, + { + "epoch": 0.29, + "grad_norm": 0.7079386115074158, + "learning_rate": 1.640854050021898e-05, + "loss": 2.1372, + "step": 8734 + }, + { + "epoch": 0.29, + "grad_norm": 0.7045636177062988, + "learning_rate": 1.640772452071338e-05, + "loss": 2.0838, + "step": 8735 + }, + { + "epoch": 0.29, + "grad_norm": 0.7122809290885925, + "learning_rate": 1.6406908468817072e-05, + "loss": 2.1526, + "step": 8736 + }, + { + "epoch": 0.29, + "grad_norm": 0.7370101809501648, + "learning_rate": 1.640609234453926e-05, + "loss": 2.0996, + "step": 8737 + }, + { + "epoch": 0.29, + "grad_norm": 0.7180492281913757, + "learning_rate": 1.640527614788918e-05, + "loss": 2.1469, + "step": 8738 + }, + { + "epoch": 0.29, + "grad_norm": 0.7035475373268127, + "learning_rate": 1.6404459878876036e-05, + "loss": 2.1176, + "step": 8739 + }, + { + "epoch": 0.29, + "grad_norm": 0.6948394179344177, + "learning_rate": 1.640364353750906e-05, + "loss": 2.1903, + "step": 8740 + }, + { + "epoch": 0.29, + "grad_norm": 0.775216281414032, + "learning_rate": 1.6402827123797472e-05, + "loss": 2.0726, + "step": 8741 + }, + { + "epoch": 0.29, + "grad_norm": 0.7295196652412415, + "learning_rate": 1.6402010637750497e-05, + "loss": 2.1011, + "step": 8742 + }, + { + "epoch": 0.29, + "grad_norm": 0.7750604152679443, + "learning_rate": 1.6401194079377357e-05, + "loss": 2.183, + "step": 8743 + }, + { + "epoch": 0.29, + "grad_norm": 0.699211061000824, + "learning_rate": 1.6400377448687278e-05, + "loss": 2.1482, + "step": 8744 + }, + { + "epoch": 0.29, + "grad_norm": 0.7084868550300598, + "learning_rate": 1.6399560745689486e-05, + "loss": 2.0831, + "step": 8745 + }, + { + "epoch": 0.29, + "grad_norm": 0.6998394131660461, + "learning_rate": 1.6398743970393207e-05, + "loss": 2.1181, + "step": 8746 + }, + { + "epoch": 0.29, + "grad_norm": 0.7365617752075195, + "learning_rate": 1.6397927122807666e-05, + "loss": 2.1257, + "step": 8747 + }, + { + "epoch": 0.29, + "grad_norm": 0.7238606810569763, + "learning_rate": 1.6397110202942098e-05, + "loss": 2.1638, + "step": 8748 + }, + { + "epoch": 0.29, + "grad_norm": 0.7203308343887329, + "learning_rate": 1.6396293210805723e-05, + "loss": 2.1237, + "step": 8749 + }, + { + "epoch": 0.29, + "grad_norm": 0.7496856451034546, + "learning_rate": 1.6395476146407778e-05, + "loss": 2.1332, + "step": 8750 + }, + { + "epoch": 0.29, + "grad_norm": 0.735129177570343, + "learning_rate": 1.6394659009757493e-05, + "loss": 2.1361, + "step": 8751 + }, + { + "epoch": 0.29, + "grad_norm": 0.710877001285553, + "learning_rate": 1.63938418008641e-05, + "loss": 2.1201, + "step": 8752 + }, + { + "epoch": 0.29, + "grad_norm": 0.706219494342804, + "learning_rate": 1.6393024519736824e-05, + "loss": 2.0817, + "step": 8753 + }, + { + "epoch": 0.29, + "grad_norm": 0.736022412776947, + "learning_rate": 1.639220716638491e-05, + "loss": 2.1648, + "step": 8754 + }, + { + "epoch": 0.29, + "grad_norm": 0.7418921589851379, + "learning_rate": 1.639138974081758e-05, + "loss": 2.1882, + "step": 8755 + }, + { + "epoch": 0.29, + "grad_norm": 0.7078555226325989, + "learning_rate": 1.639057224304408e-05, + "loss": 2.2109, + "step": 8756 + }, + { + "epoch": 0.29, + "grad_norm": 0.7736138701438904, + "learning_rate": 1.6389754673073635e-05, + "loss": 2.1076, + "step": 8757 + }, + { + "epoch": 0.29, + "grad_norm": 0.7333442568778992, + "learning_rate": 1.6388937030915486e-05, + "loss": 2.135, + "step": 8758 + }, + { + "epoch": 0.29, + "grad_norm": 0.7175415754318237, + "learning_rate": 1.6388119316578874e-05, + "loss": 2.1358, + "step": 8759 + }, + { + "epoch": 0.29, + "grad_norm": 0.7150269150733948, + "learning_rate": 1.6387301530073033e-05, + "loss": 2.1755, + "step": 8760 + }, + { + "epoch": 0.29, + "grad_norm": 0.7132679224014282, + "learning_rate": 1.63864836714072e-05, + "loss": 2.1651, + "step": 8761 + }, + { + "epoch": 0.29, + "grad_norm": 0.7067186832427979, + "learning_rate": 1.6385665740590622e-05, + "loss": 2.1035, + "step": 8762 + }, + { + "epoch": 0.29, + "grad_norm": 0.7212598323822021, + "learning_rate": 1.638484773763253e-05, + "loss": 2.1719, + "step": 8763 + }, + { + "epoch": 0.29, + "grad_norm": 0.709871768951416, + "learning_rate": 1.6384029662542175e-05, + "loss": 2.1227, + "step": 8764 + }, + { + "epoch": 0.29, + "grad_norm": 0.7096536755561829, + "learning_rate": 1.6383211515328793e-05, + "loss": 2.1158, + "step": 8765 + }, + { + "epoch": 0.29, + "grad_norm": 0.7124400734901428, + "learning_rate": 1.6382393296001625e-05, + "loss": 2.1401, + "step": 8766 + }, + { + "epoch": 0.29, + "grad_norm": 0.7127114534378052, + "learning_rate": 1.6381575004569923e-05, + "loss": 2.0982, + "step": 8767 + }, + { + "epoch": 0.29, + "grad_norm": 0.6893149614334106, + "learning_rate": 1.6380756641042924e-05, + "loss": 2.0642, + "step": 8768 + }, + { + "epoch": 0.29, + "grad_norm": 0.7137542963027954, + "learning_rate": 1.6379938205429874e-05, + "loss": 2.2296, + "step": 8769 + }, + { + "epoch": 0.29, + "grad_norm": 0.7044686675071716, + "learning_rate": 1.6379119697740025e-05, + "loss": 2.1124, + "step": 8770 + }, + { + "epoch": 0.29, + "grad_norm": 0.7261411547660828, + "learning_rate": 1.6378301117982618e-05, + "loss": 2.2042, + "step": 8771 + }, + { + "epoch": 0.29, + "grad_norm": 0.7239534258842468, + "learning_rate": 1.63774824661669e-05, + "loss": 2.1688, + "step": 8772 + }, + { + "epoch": 0.29, + "grad_norm": 0.7159039974212646, + "learning_rate": 1.6376663742302125e-05, + "loss": 2.1593, + "step": 8773 + }, + { + "epoch": 0.29, + "grad_norm": 0.7057291269302368, + "learning_rate": 1.6375844946397546e-05, + "loss": 2.176, + "step": 8774 + }, + { + "epoch": 0.29, + "grad_norm": 0.7203836441040039, + "learning_rate": 1.63750260784624e-05, + "loss": 2.215, + "step": 8775 + }, + { + "epoch": 0.29, + "grad_norm": 0.7096632719039917, + "learning_rate": 1.637420713850595e-05, + "loss": 2.1158, + "step": 8776 + }, + { + "epoch": 0.29, + "grad_norm": 0.745130717754364, + "learning_rate": 1.6373388126537437e-05, + "loss": 2.1832, + "step": 8777 + }, + { + "epoch": 0.29, + "grad_norm": 0.7339268922805786, + "learning_rate": 1.6372569042566128e-05, + "loss": 2.1505, + "step": 8778 + }, + { + "epoch": 0.29, + "grad_norm": 0.7244040966033936, + "learning_rate": 1.6371749886601263e-05, + "loss": 2.1477, + "step": 8779 + }, + { + "epoch": 0.29, + "grad_norm": 0.7341741919517517, + "learning_rate": 1.6370930658652103e-05, + "loss": 2.154, + "step": 8780 + }, + { + "epoch": 0.29, + "grad_norm": 0.7291587591171265, + "learning_rate": 1.6370111358727904e-05, + "loss": 2.2091, + "step": 8781 + }, + { + "epoch": 0.29, + "grad_norm": 0.6986908316612244, + "learning_rate": 1.6369291986837922e-05, + "loss": 2.0877, + "step": 8782 + }, + { + "epoch": 0.29, + "grad_norm": 0.7447097301483154, + "learning_rate": 1.6368472542991407e-05, + "loss": 2.089, + "step": 8783 + }, + { + "epoch": 0.29, + "grad_norm": 0.6976644396781921, + "learning_rate": 1.6367653027197622e-05, + "loss": 2.1786, + "step": 8784 + }, + { + "epoch": 0.29, + "grad_norm": 0.7244577407836914, + "learning_rate": 1.6366833439465827e-05, + "loss": 2.1528, + "step": 8785 + }, + { + "epoch": 0.29, + "grad_norm": 0.7220506072044373, + "learning_rate": 1.6366013779805278e-05, + "loss": 2.1257, + "step": 8786 + }, + { + "epoch": 0.29, + "grad_norm": 0.7239930033683777, + "learning_rate": 1.636519404822524e-05, + "loss": 2.1943, + "step": 8787 + }, + { + "epoch": 0.29, + "grad_norm": 0.7204551696777344, + "learning_rate": 1.6364374244734965e-05, + "loss": 2.0981, + "step": 8788 + }, + { + "epoch": 0.29, + "grad_norm": 0.7207492589950562, + "learning_rate": 1.6363554369343724e-05, + "loss": 2.074, + "step": 8789 + }, + { + "epoch": 0.29, + "grad_norm": 0.7105235457420349, + "learning_rate": 1.636273442206077e-05, + "loss": 2.13, + "step": 8790 + }, + { + "epoch": 0.29, + "grad_norm": 0.7341360449790955, + "learning_rate": 1.6361914402895377e-05, + "loss": 2.0734, + "step": 8791 + }, + { + "epoch": 0.29, + "grad_norm": 0.7436923384666443, + "learning_rate": 1.6361094311856798e-05, + "loss": 2.1613, + "step": 8792 + }, + { + "epoch": 0.29, + "grad_norm": 0.7519908547401428, + "learning_rate": 1.6360274148954307e-05, + "loss": 2.1294, + "step": 8793 + }, + { + "epoch": 0.29, + "grad_norm": 0.6914599537849426, + "learning_rate": 1.6359453914197164e-05, + "loss": 2.2098, + "step": 8794 + }, + { + "epoch": 0.29, + "grad_norm": 0.7312342524528503, + "learning_rate": 1.635863360759464e-05, + "loss": 2.1477, + "step": 8795 + }, + { + "epoch": 0.29, + "grad_norm": 0.7123243808746338, + "learning_rate": 1.6357813229156e-05, + "loss": 2.1621, + "step": 8796 + }, + { + "epoch": 0.29, + "grad_norm": 0.723903238773346, + "learning_rate": 1.635699277889051e-05, + "loss": 2.1575, + "step": 8797 + }, + { + "epoch": 0.29, + "grad_norm": 0.7797050476074219, + "learning_rate": 1.6356172256807445e-05, + "loss": 2.1658, + "step": 8798 + }, + { + "epoch": 0.29, + "grad_norm": 0.7093600034713745, + "learning_rate": 1.6355351662916064e-05, + "loss": 2.17, + "step": 8799 + }, + { + "epoch": 0.29, + "grad_norm": 0.721022367477417, + "learning_rate": 1.635453099722565e-05, + "loss": 2.0877, + "step": 8800 + }, + { + "epoch": 0.29, + "grad_norm": 0.7305416464805603, + "learning_rate": 1.635371025974547e-05, + "loss": 2.1786, + "step": 8801 + }, + { + "epoch": 0.29, + "grad_norm": 0.7348678112030029, + "learning_rate": 1.6352889450484794e-05, + "loss": 2.1508, + "step": 8802 + }, + { + "epoch": 0.29, + "grad_norm": 0.7743626236915588, + "learning_rate": 1.6352068569452893e-05, + "loss": 2.1255, + "step": 8803 + }, + { + "epoch": 0.29, + "grad_norm": 0.7292847037315369, + "learning_rate": 1.6351247616659046e-05, + "loss": 2.1044, + "step": 8804 + }, + { + "epoch": 0.29, + "grad_norm": 0.7174158692359924, + "learning_rate": 1.6350426592112523e-05, + "loss": 2.134, + "step": 8805 + }, + { + "epoch": 0.29, + "grad_norm": 0.702694296836853, + "learning_rate": 1.6349605495822605e-05, + "loss": 2.0817, + "step": 8806 + }, + { + "epoch": 0.29, + "grad_norm": 0.7096382975578308, + "learning_rate": 1.6348784327798567e-05, + "loss": 2.1485, + "step": 8807 + }, + { + "epoch": 0.29, + "grad_norm": 0.7276924848556519, + "learning_rate": 1.634796308804968e-05, + "loss": 2.1361, + "step": 8808 + }, + { + "epoch": 0.29, + "grad_norm": 0.7243310213088989, + "learning_rate": 1.634714177658523e-05, + "loss": 2.0894, + "step": 8809 + }, + { + "epoch": 0.29, + "grad_norm": 0.738064706325531, + "learning_rate": 1.634632039341449e-05, + "loss": 2.1871, + "step": 8810 + }, + { + "epoch": 0.29, + "grad_norm": 0.7024344205856323, + "learning_rate": 1.6345498938546742e-05, + "loss": 2.1296, + "step": 8811 + }, + { + "epoch": 0.29, + "grad_norm": 0.7270868420600891, + "learning_rate": 1.6344677411991266e-05, + "loss": 2.1841, + "step": 8812 + }, + { + "epoch": 0.29, + "grad_norm": 0.7622269988059998, + "learning_rate": 1.6343855813757344e-05, + "loss": 2.1947, + "step": 8813 + }, + { + "epoch": 0.29, + "grad_norm": 0.7116913795471191, + "learning_rate": 1.6343034143854254e-05, + "loss": 2.0811, + "step": 8814 + }, + { + "epoch": 0.29, + "grad_norm": 0.7222337126731873, + "learning_rate": 1.6342212402291285e-05, + "loss": 2.0714, + "step": 8815 + }, + { + "epoch": 0.29, + "grad_norm": 0.6948091983795166, + "learning_rate": 1.6341390589077715e-05, + "loss": 2.1652, + "step": 8816 + }, + { + "epoch": 0.29, + "grad_norm": 0.730566680431366, + "learning_rate": 1.634056870422283e-05, + "loss": 2.1516, + "step": 8817 + }, + { + "epoch": 0.29, + "grad_norm": 0.7419631481170654, + "learning_rate": 1.6339746747735916e-05, + "loss": 2.1248, + "step": 8818 + }, + { + "epoch": 0.29, + "grad_norm": 0.7744646072387695, + "learning_rate": 1.6338924719626262e-05, + "loss": 2.1506, + "step": 8819 + }, + { + "epoch": 0.29, + "grad_norm": 0.7301687598228455, + "learning_rate": 1.633810261990315e-05, + "loss": 2.1438, + "step": 8820 + }, + { + "epoch": 0.29, + "grad_norm": 0.7711130380630493, + "learning_rate": 1.6337280448575868e-05, + "loss": 2.1334, + "step": 8821 + }, + { + "epoch": 0.29, + "grad_norm": 0.7505212426185608, + "learning_rate": 1.6336458205653705e-05, + "loss": 2.1573, + "step": 8822 + }, + { + "epoch": 0.29, + "grad_norm": 0.7354183197021484, + "learning_rate": 1.633563589114595e-05, + "loss": 2.1934, + "step": 8823 + }, + { + "epoch": 0.29, + "grad_norm": 0.7145424485206604, + "learning_rate": 1.6334813505061898e-05, + "loss": 2.0968, + "step": 8824 + }, + { + "epoch": 0.29, + "grad_norm": 0.7443456649780273, + "learning_rate": 1.6333991047410828e-05, + "loss": 2.1553, + "step": 8825 + }, + { + "epoch": 0.29, + "grad_norm": 0.7150864601135254, + "learning_rate": 1.6333168518202045e-05, + "loss": 2.0695, + "step": 8826 + }, + { + "epoch": 0.29, + "grad_norm": 0.7085060477256775, + "learning_rate": 1.6332345917444837e-05, + "loss": 2.1055, + "step": 8827 + }, + { + "epoch": 0.29, + "grad_norm": 0.7280860543251038, + "learning_rate": 1.6331523245148493e-05, + "loss": 2.1907, + "step": 8828 + }, + { + "epoch": 0.29, + "grad_norm": 0.7594099044799805, + "learning_rate": 1.633070050132231e-05, + "loss": 2.1361, + "step": 8829 + }, + { + "epoch": 0.29, + "grad_norm": 0.7153259515762329, + "learning_rate": 1.6329877685975583e-05, + "loss": 2.1616, + "step": 8830 + }, + { + "epoch": 0.29, + "grad_norm": 0.6950773000717163, + "learning_rate": 1.6329054799117608e-05, + "loss": 2.1086, + "step": 8831 + }, + { + "epoch": 0.29, + "grad_norm": 0.7078160047531128, + "learning_rate": 1.6328231840757682e-05, + "loss": 2.2081, + "step": 8832 + }, + { + "epoch": 0.29, + "grad_norm": 0.6944240927696228, + "learning_rate": 1.6327408810905102e-05, + "loss": 2.2008, + "step": 8833 + }, + { + "epoch": 0.29, + "grad_norm": 0.6990172266960144, + "learning_rate": 1.6326585709569162e-05, + "loss": 2.1064, + "step": 8834 + }, + { + "epoch": 0.29, + "grad_norm": 0.727824866771698, + "learning_rate": 1.6325762536759166e-05, + "loss": 2.1484, + "step": 8835 + }, + { + "epoch": 0.29, + "grad_norm": 0.6902204155921936, + "learning_rate": 1.632493929248441e-05, + "loss": 2.1592, + "step": 8836 + }, + { + "epoch": 0.29, + "grad_norm": 0.7441250681877136, + "learning_rate": 1.6324115976754203e-05, + "loss": 2.205, + "step": 8837 + }, + { + "epoch": 0.29, + "grad_norm": 0.7540885210037231, + "learning_rate": 1.6323292589577837e-05, + "loss": 2.1061, + "step": 8838 + }, + { + "epoch": 0.29, + "grad_norm": 0.7254241108894348, + "learning_rate": 1.6322469130964616e-05, + "loss": 2.0898, + "step": 8839 + }, + { + "epoch": 0.29, + "grad_norm": 0.7198905348777771, + "learning_rate": 1.6321645600923844e-05, + "loss": 2.1784, + "step": 8840 + }, + { + "epoch": 0.29, + "grad_norm": 0.7073347568511963, + "learning_rate": 1.6320821999464826e-05, + "loss": 2.0828, + "step": 8841 + }, + { + "epoch": 0.29, + "grad_norm": 0.6953628063201904, + "learning_rate": 1.631999832659686e-05, + "loss": 2.0739, + "step": 8842 + }, + { + "epoch": 0.29, + "grad_norm": 0.7028694748878479, + "learning_rate": 1.6319174582329266e-05, + "loss": 2.1717, + "step": 8843 + }, + { + "epoch": 0.29, + "grad_norm": 0.7340285778045654, + "learning_rate": 1.6318350766671333e-05, + "loss": 2.126, + "step": 8844 + }, + { + "epoch": 0.29, + "grad_norm": 0.7425290942192078, + "learning_rate": 1.631752687963238e-05, + "loss": 2.1112, + "step": 8845 + }, + { + "epoch": 0.29, + "grad_norm": 0.766482412815094, + "learning_rate": 1.6316702921221708e-05, + "loss": 2.1888, + "step": 8846 + }, + { + "epoch": 0.29, + "grad_norm": 0.7340766787528992, + "learning_rate": 1.6315878891448635e-05, + "loss": 2.1639, + "step": 8847 + }, + { + "epoch": 0.29, + "grad_norm": 0.6977381706237793, + "learning_rate": 1.6315054790322458e-05, + "loss": 2.1266, + "step": 8848 + }, + { + "epoch": 0.29, + "grad_norm": 0.7038753628730774, + "learning_rate": 1.6314230617852492e-05, + "loss": 2.1159, + "step": 8849 + }, + { + "epoch": 0.29, + "grad_norm": 0.6921088099479675, + "learning_rate": 1.6313406374048054e-05, + "loss": 2.1113, + "step": 8850 + }, + { + "epoch": 0.29, + "grad_norm": 0.7174754738807678, + "learning_rate": 1.6312582058918447e-05, + "loss": 2.1747, + "step": 8851 + }, + { + "epoch": 0.29, + "grad_norm": 0.7212690711021423, + "learning_rate": 1.6311757672472987e-05, + "loss": 2.2322, + "step": 8852 + }, + { + "epoch": 0.29, + "grad_norm": 0.7302584052085876, + "learning_rate": 1.6310933214720992e-05, + "loss": 2.2476, + "step": 8853 + }, + { + "epoch": 0.29, + "grad_norm": 0.7185867428779602, + "learning_rate": 1.6310108685671768e-05, + "loss": 2.182, + "step": 8854 + }, + { + "epoch": 0.29, + "grad_norm": 0.7199523448944092, + "learning_rate": 1.6309284085334635e-05, + "loss": 2.115, + "step": 8855 + }, + { + "epoch": 0.29, + "grad_norm": 0.6832734942436218, + "learning_rate": 1.630845941371891e-05, + "loss": 2.082, + "step": 8856 + }, + { + "epoch": 0.29, + "grad_norm": 0.7254805564880371, + "learning_rate": 1.63076346708339e-05, + "loss": 2.164, + "step": 8857 + }, + { + "epoch": 0.29, + "grad_norm": 0.7400642037391663, + "learning_rate": 1.6306809856688942e-05, + "loss": 2.087, + "step": 8858 + }, + { + "epoch": 0.29, + "grad_norm": 0.697419285774231, + "learning_rate": 1.6305984971293337e-05, + "loss": 2.1324, + "step": 8859 + }, + { + "epoch": 0.29, + "grad_norm": 0.7521214485168457, + "learning_rate": 1.6305160014656406e-05, + "loss": 2.1685, + "step": 8860 + }, + { + "epoch": 0.29, + "grad_norm": 0.7082265615463257, + "learning_rate": 1.6304334986787477e-05, + "loss": 2.1524, + "step": 8861 + }, + { + "epoch": 0.29, + "grad_norm": 0.7007856369018555, + "learning_rate": 1.6303509887695864e-05, + "loss": 2.1332, + "step": 8862 + }, + { + "epoch": 0.29, + "grad_norm": 0.748794436454773, + "learning_rate": 1.6302684717390894e-05, + "loss": 2.1539, + "step": 8863 + }, + { + "epoch": 0.29, + "grad_norm": 0.7005513310432434, + "learning_rate": 1.6301859475881882e-05, + "loss": 2.0851, + "step": 8864 + }, + { + "epoch": 0.29, + "grad_norm": 0.7241923809051514, + "learning_rate": 1.6301034163178155e-05, + "loss": 2.1796, + "step": 8865 + }, + { + "epoch": 0.29, + "grad_norm": 0.744773805141449, + "learning_rate": 1.6300208779289036e-05, + "loss": 2.1582, + "step": 8866 + }, + { + "epoch": 0.3, + "grad_norm": 0.6742888689041138, + "learning_rate": 1.6299383324223855e-05, + "loss": 2.1298, + "step": 8867 + }, + { + "epoch": 0.3, + "grad_norm": 0.6955166459083557, + "learning_rate": 1.6298557797991927e-05, + "loss": 2.1325, + "step": 8868 + }, + { + "epoch": 0.3, + "grad_norm": 0.702983021736145, + "learning_rate": 1.6297732200602588e-05, + "loss": 2.1273, + "step": 8869 + }, + { + "epoch": 0.3, + "grad_norm": 0.7420945763587952, + "learning_rate": 1.629690653206516e-05, + "loss": 2.215, + "step": 8870 + }, + { + "epoch": 0.3, + "grad_norm": 0.7193183302879333, + "learning_rate": 1.6296080792388975e-05, + "loss": 2.161, + "step": 8871 + }, + { + "epoch": 0.3, + "grad_norm": 0.7072712779045105, + "learning_rate": 1.6295254981583356e-05, + "loss": 2.1187, + "step": 8872 + }, + { + "epoch": 0.3, + "grad_norm": 0.7018760442733765, + "learning_rate": 1.6294429099657638e-05, + "loss": 2.1413, + "step": 8873 + }, + { + "epoch": 0.3, + "grad_norm": 0.7001602649688721, + "learning_rate": 1.629360314662115e-05, + "loss": 2.1876, + "step": 8874 + }, + { + "epoch": 0.3, + "grad_norm": 0.6876716613769531, + "learning_rate": 1.629277712248322e-05, + "loss": 2.0828, + "step": 8875 + }, + { + "epoch": 0.3, + "grad_norm": 0.6851648092269897, + "learning_rate": 1.629195102725318e-05, + "loss": 2.1274, + "step": 8876 + }, + { + "epoch": 0.3, + "grad_norm": 0.7131861448287964, + "learning_rate": 1.6291124860940364e-05, + "loss": 2.1521, + "step": 8877 + }, + { + "epoch": 0.3, + "grad_norm": 0.703472912311554, + "learning_rate": 1.629029862355411e-05, + "loss": 2.0658, + "step": 8878 + }, + { + "epoch": 0.3, + "grad_norm": 0.7242181897163391, + "learning_rate": 1.6289472315103748e-05, + "loss": 2.1965, + "step": 8879 + }, + { + "epoch": 0.3, + "grad_norm": 0.7093063592910767, + "learning_rate": 1.6288645935598612e-05, + "loss": 2.1472, + "step": 8880 + }, + { + "epoch": 0.3, + "grad_norm": 0.7213153839111328, + "learning_rate": 1.6287819485048042e-05, + "loss": 2.1557, + "step": 8881 + }, + { + "epoch": 0.3, + "grad_norm": 0.7248581051826477, + "learning_rate": 1.6286992963461373e-05, + "loss": 2.1838, + "step": 8882 + }, + { + "epoch": 0.3, + "grad_norm": 0.7324124574661255, + "learning_rate": 1.6286166370847938e-05, + "loss": 2.1358, + "step": 8883 + }, + { + "epoch": 0.3, + "grad_norm": 0.7122989892959595, + "learning_rate": 1.6285339707217083e-05, + "loss": 2.1307, + "step": 8884 + }, + { + "epoch": 0.3, + "grad_norm": 0.723582923412323, + "learning_rate": 1.628451297257814e-05, + "loss": 2.1371, + "step": 8885 + }, + { + "epoch": 0.3, + "grad_norm": 0.7509222030639648, + "learning_rate": 1.628368616694046e-05, + "loss": 2.2076, + "step": 8886 + }, + { + "epoch": 0.3, + "grad_norm": 0.692204475402832, + "learning_rate": 1.6282859290313367e-05, + "loss": 2.0991, + "step": 8887 + }, + { + "epoch": 0.3, + "grad_norm": 0.6966241002082825, + "learning_rate": 1.6282032342706215e-05, + "loss": 2.1488, + "step": 8888 + }, + { + "epoch": 0.3, + "grad_norm": 0.7217451930046082, + "learning_rate": 1.6281205324128348e-05, + "loss": 2.1477, + "step": 8889 + }, + { + "epoch": 0.3, + "grad_norm": 0.7313122749328613, + "learning_rate": 1.62803782345891e-05, + "loss": 2.156, + "step": 8890 + }, + { + "epoch": 0.3, + "grad_norm": 0.7585131525993347, + "learning_rate": 1.627955107409782e-05, + "loss": 2.1118, + "step": 8891 + }, + { + "epoch": 0.3, + "grad_norm": 0.7477955222129822, + "learning_rate": 1.627872384266385e-05, + "loss": 2.1316, + "step": 8892 + }, + { + "epoch": 0.3, + "grad_norm": 0.7714107632637024, + "learning_rate": 1.627789654029654e-05, + "loss": 2.1702, + "step": 8893 + }, + { + "epoch": 0.3, + "grad_norm": 0.7142125368118286, + "learning_rate": 1.6277069167005236e-05, + "loss": 2.1266, + "step": 8894 + }, + { + "epoch": 0.3, + "grad_norm": 0.7066044807434082, + "learning_rate": 1.6276241722799283e-05, + "loss": 2.1583, + "step": 8895 + }, + { + "epoch": 0.3, + "grad_norm": 0.7052202224731445, + "learning_rate": 1.6275414207688025e-05, + "loss": 2.1031, + "step": 8896 + }, + { + "epoch": 0.3, + "grad_norm": 0.7488783001899719, + "learning_rate": 1.6274586621680818e-05, + "loss": 2.2091, + "step": 8897 + }, + { + "epoch": 0.3, + "grad_norm": 0.6974542737007141, + "learning_rate": 1.627375896478701e-05, + "loss": 2.1284, + "step": 8898 + }, + { + "epoch": 0.3, + "grad_norm": 0.6987120509147644, + "learning_rate": 1.6272931237015946e-05, + "loss": 2.1194, + "step": 8899 + }, + { + "epoch": 0.3, + "grad_norm": 0.7495936155319214, + "learning_rate": 1.6272103438376983e-05, + "loss": 2.1288, + "step": 8900 + }, + { + "epoch": 0.3, + "grad_norm": 0.7632669806480408, + "learning_rate": 1.6271275568879473e-05, + "loss": 2.1519, + "step": 8901 + }, + { + "epoch": 0.3, + "grad_norm": 0.7524606585502625, + "learning_rate": 1.6270447628532763e-05, + "loss": 2.1329, + "step": 8902 + }, + { + "epoch": 0.3, + "grad_norm": 0.7062490582466125, + "learning_rate": 1.6269619617346214e-05, + "loss": 2.1789, + "step": 8903 + }, + { + "epoch": 0.3, + "grad_norm": 0.7303714156150818, + "learning_rate": 1.6268791535329178e-05, + "loss": 2.1431, + "step": 8904 + }, + { + "epoch": 0.3, + "grad_norm": 0.7478488087654114, + "learning_rate": 1.6267963382491005e-05, + "loss": 2.1338, + "step": 8905 + }, + { + "epoch": 0.3, + "grad_norm": 0.7119024991989136, + "learning_rate": 1.6267135158841057e-05, + "loss": 2.135, + "step": 8906 + }, + { + "epoch": 0.3, + "grad_norm": 0.7577170133590698, + "learning_rate": 1.626630686438869e-05, + "loss": 2.1089, + "step": 8907 + }, + { + "epoch": 0.3, + "grad_norm": 0.7047663927078247, + "learning_rate": 1.626547849914326e-05, + "loss": 2.1295, + "step": 8908 + }, + { + "epoch": 0.3, + "grad_norm": 0.7324678301811218, + "learning_rate": 1.6264650063114126e-05, + "loss": 2.1463, + "step": 8909 + }, + { + "epoch": 0.3, + "grad_norm": 0.704318642616272, + "learning_rate": 1.6263821556310642e-05, + "loss": 2.1334, + "step": 8910 + }, + { + "epoch": 0.3, + "grad_norm": 0.7360548973083496, + "learning_rate": 1.626299297874218e-05, + "loss": 2.1137, + "step": 8911 + }, + { + "epoch": 0.3, + "grad_norm": 0.7151976227760315, + "learning_rate": 1.626216433041809e-05, + "loss": 2.1272, + "step": 8912 + }, + { + "epoch": 0.3, + "grad_norm": 0.7338722944259644, + "learning_rate": 1.6261335611347743e-05, + "loss": 2.1485, + "step": 8913 + }, + { + "epoch": 0.3, + "grad_norm": 0.7278993129730225, + "learning_rate": 1.6260506821540485e-05, + "loss": 2.1021, + "step": 8914 + }, + { + "epoch": 0.3, + "grad_norm": 0.7019294500350952, + "learning_rate": 1.62596779610057e-05, + "loss": 2.1049, + "step": 8915 + }, + { + "epoch": 0.3, + "grad_norm": 0.7294197678565979, + "learning_rate": 1.625884902975274e-05, + "loss": 2.2182, + "step": 8916 + }, + { + "epoch": 0.3, + "grad_norm": 0.7339378595352173, + "learning_rate": 1.625802002779097e-05, + "loss": 2.0896, + "step": 8917 + }, + { + "epoch": 0.3, + "grad_norm": 0.7534977197647095, + "learning_rate": 1.6257190955129757e-05, + "loss": 2.0876, + "step": 8918 + }, + { + "epoch": 0.3, + "grad_norm": 0.6987320184707642, + "learning_rate": 1.6256361811778466e-05, + "loss": 2.1723, + "step": 8919 + }, + { + "epoch": 0.3, + "grad_norm": 0.6845196485519409, + "learning_rate": 1.625553259774647e-05, + "loss": 2.1065, + "step": 8920 + }, + { + "epoch": 0.3, + "grad_norm": 0.7144913077354431, + "learning_rate": 1.6254703313043127e-05, + "loss": 2.0659, + "step": 8921 + }, + { + "epoch": 0.3, + "grad_norm": 0.706723690032959, + "learning_rate": 1.625387395767782e-05, + "loss": 2.1204, + "step": 8922 + }, + { + "epoch": 0.3, + "grad_norm": 0.7010518312454224, + "learning_rate": 1.62530445316599e-05, + "loss": 2.0792, + "step": 8923 + }, + { + "epoch": 0.3, + "grad_norm": 0.7021790146827698, + "learning_rate": 1.6252215034998755e-05, + "loss": 2.1956, + "step": 8924 + }, + { + "epoch": 0.3, + "grad_norm": 0.7198841571807861, + "learning_rate": 1.6251385467703747e-05, + "loss": 2.1197, + "step": 8925 + }, + { + "epoch": 0.3, + "grad_norm": 0.7137982845306396, + "learning_rate": 1.625055582978425e-05, + "loss": 2.1395, + "step": 8926 + }, + { + "epoch": 0.3, + "grad_norm": 0.7104584574699402, + "learning_rate": 1.6249726121249635e-05, + "loss": 2.163, + "step": 8927 + }, + { + "epoch": 0.3, + "grad_norm": 0.7310519218444824, + "learning_rate": 1.6248896342109277e-05, + "loss": 2.1281, + "step": 8928 + }, + { + "epoch": 0.3, + "grad_norm": 0.7242134213447571, + "learning_rate": 1.624806649237255e-05, + "loss": 2.159, + "step": 8929 + }, + { + "epoch": 0.3, + "grad_norm": 0.7312332391738892, + "learning_rate": 1.624723657204883e-05, + "loss": 2.2189, + "step": 8930 + }, + { + "epoch": 0.3, + "grad_norm": 0.7232054471969604, + "learning_rate": 1.6246406581147493e-05, + "loss": 2.0991, + "step": 8931 + }, + { + "epoch": 0.3, + "grad_norm": 0.692573606967926, + "learning_rate": 1.6245576519677915e-05, + "loss": 2.1192, + "step": 8932 + }, + { + "epoch": 0.3, + "grad_norm": 0.7269569039344788, + "learning_rate": 1.6244746387649476e-05, + "loss": 2.2017, + "step": 8933 + }, + { + "epoch": 0.3, + "grad_norm": 0.7473732829093933, + "learning_rate": 1.6243916185071548e-05, + "loss": 2.1832, + "step": 8934 + }, + { + "epoch": 0.3, + "grad_norm": 0.7651844620704651, + "learning_rate": 1.6243085911953516e-05, + "loss": 2.1119, + "step": 8935 + }, + { + "epoch": 0.3, + "grad_norm": 0.7066621780395508, + "learning_rate": 1.6242255568304757e-05, + "loss": 2.0934, + "step": 8936 + }, + { + "epoch": 0.3, + "grad_norm": 0.7401037812232971, + "learning_rate": 1.6241425154134655e-05, + "loss": 2.2184, + "step": 8937 + }, + { + "epoch": 0.3, + "grad_norm": 0.7021321058273315, + "learning_rate": 1.624059466945259e-05, + "loss": 2.1139, + "step": 8938 + }, + { + "epoch": 0.3, + "grad_norm": 0.727994441986084, + "learning_rate": 1.623976411426794e-05, + "loss": 2.1272, + "step": 8939 + }, + { + "epoch": 0.3, + "grad_norm": 0.7134078145027161, + "learning_rate": 1.6238933488590097e-05, + "loss": 2.0149, + "step": 8940 + }, + { + "epoch": 0.3, + "grad_norm": 0.7448168992996216, + "learning_rate": 1.6238102792428435e-05, + "loss": 2.1625, + "step": 8941 + }, + { + "epoch": 0.3, + "grad_norm": 0.7107097506523132, + "learning_rate": 1.6237272025792348e-05, + "loss": 2.1668, + "step": 8942 + }, + { + "epoch": 0.3, + "grad_norm": 0.7188959717750549, + "learning_rate": 1.6236441188691214e-05, + "loss": 2.1161, + "step": 8943 + }, + { + "epoch": 0.3, + "grad_norm": 0.7116233706474304, + "learning_rate": 1.6235610281134425e-05, + "loss": 2.1555, + "step": 8944 + }, + { + "epoch": 0.3, + "grad_norm": 0.7164419889450073, + "learning_rate": 1.6234779303131362e-05, + "loss": 2.2121, + "step": 8945 + }, + { + "epoch": 0.3, + "grad_norm": 0.7134877443313599, + "learning_rate": 1.6233948254691418e-05, + "loss": 2.1065, + "step": 8946 + }, + { + "epoch": 0.3, + "grad_norm": 0.733862042427063, + "learning_rate": 1.6233117135823978e-05, + "loss": 2.0999, + "step": 8947 + }, + { + "epoch": 0.3, + "grad_norm": 0.7173563838005066, + "learning_rate": 1.6232285946538437e-05, + "loss": 2.1461, + "step": 8948 + }, + { + "epoch": 0.3, + "grad_norm": 0.6966747045516968, + "learning_rate": 1.623145468684418e-05, + "loss": 2.1697, + "step": 8949 + }, + { + "epoch": 0.3, + "grad_norm": 0.7019830942153931, + "learning_rate": 1.62306233567506e-05, + "loss": 2.0592, + "step": 8950 + }, + { + "epoch": 0.3, + "grad_norm": 0.6916581988334656, + "learning_rate": 1.622979195626709e-05, + "loss": 2.0856, + "step": 8951 + }, + { + "epoch": 0.3, + "grad_norm": 0.6881765127182007, + "learning_rate": 1.622896048540304e-05, + "loss": 2.074, + "step": 8952 + }, + { + "epoch": 0.3, + "grad_norm": 0.7288192510604858, + "learning_rate": 1.6228128944167848e-05, + "loss": 2.1247, + "step": 8953 + }, + { + "epoch": 0.3, + "grad_norm": 0.7455095052719116, + "learning_rate": 1.62272973325709e-05, + "loss": 2.1643, + "step": 8954 + }, + { + "epoch": 0.3, + "grad_norm": 0.7479301691055298, + "learning_rate": 1.6226465650621598e-05, + "loss": 2.1373, + "step": 8955 + }, + { + "epoch": 0.3, + "grad_norm": 0.7094659805297852, + "learning_rate": 1.622563389832934e-05, + "loss": 2.1093, + "step": 8956 + }, + { + "epoch": 0.3, + "grad_norm": 0.6840853095054626, + "learning_rate": 1.6224802075703515e-05, + "loss": 2.0851, + "step": 8957 + }, + { + "epoch": 0.3, + "grad_norm": 0.7015469670295715, + "learning_rate": 1.6223970182753525e-05, + "loss": 2.0948, + "step": 8958 + }, + { + "epoch": 0.3, + "grad_norm": 0.7198246121406555, + "learning_rate": 1.622313821948877e-05, + "loss": 2.2097, + "step": 8959 + }, + { + "epoch": 0.3, + "grad_norm": 0.6909313797950745, + "learning_rate": 1.6222306185918645e-05, + "loss": 2.1223, + "step": 8960 + }, + { + "epoch": 0.3, + "grad_norm": 0.7057914137840271, + "learning_rate": 1.622147408205255e-05, + "loss": 2.1095, + "step": 8961 + }, + { + "epoch": 0.3, + "grad_norm": 0.7460607290267944, + "learning_rate": 1.622064190789989e-05, + "loss": 2.1432, + "step": 8962 + }, + { + "epoch": 0.3, + "grad_norm": 0.6938759684562683, + "learning_rate": 1.621980966347006e-05, + "loss": 2.1748, + "step": 8963 + }, + { + "epoch": 0.3, + "grad_norm": 0.7186651825904846, + "learning_rate": 1.6218977348772466e-05, + "loss": 2.2098, + "step": 8964 + }, + { + "epoch": 0.3, + "grad_norm": 0.7120456099510193, + "learning_rate": 1.621814496381651e-05, + "loss": 2.146, + "step": 8965 + }, + { + "epoch": 0.3, + "grad_norm": 0.7139744162559509, + "learning_rate": 1.6217312508611597e-05, + "loss": 2.0768, + "step": 8966 + }, + { + "epoch": 0.3, + "grad_norm": 0.6866811513900757, + "learning_rate": 1.6216479983167133e-05, + "loss": 2.0386, + "step": 8967 + }, + { + "epoch": 0.3, + "grad_norm": 0.7079299092292786, + "learning_rate": 1.6215647387492518e-05, + "loss": 2.1422, + "step": 8968 + }, + { + "epoch": 0.3, + "grad_norm": 0.7179138660430908, + "learning_rate": 1.6214814721597164e-05, + "loss": 2.143, + "step": 8969 + }, + { + "epoch": 0.3, + "grad_norm": 0.7471867203712463, + "learning_rate": 1.6213981985490476e-05, + "loss": 2.1949, + "step": 8970 + }, + { + "epoch": 0.3, + "grad_norm": 0.7435702085494995, + "learning_rate": 1.621314917918186e-05, + "loss": 2.1242, + "step": 8971 + }, + { + "epoch": 0.3, + "grad_norm": 0.7322764992713928, + "learning_rate": 1.6212316302680723e-05, + "loss": 2.1591, + "step": 8972 + }, + { + "epoch": 0.3, + "grad_norm": 0.7437384724617004, + "learning_rate": 1.621148335599648e-05, + "loss": 2.0912, + "step": 8973 + }, + { + "epoch": 0.3, + "grad_norm": 0.7254350781440735, + "learning_rate": 1.621065033913854e-05, + "loss": 2.1919, + "step": 8974 + }, + { + "epoch": 0.3, + "grad_norm": 0.7194914817810059, + "learning_rate": 1.6209817252116313e-05, + "loss": 2.1169, + "step": 8975 + }, + { + "epoch": 0.3, + "grad_norm": 0.6991117596626282, + "learning_rate": 1.6208984094939206e-05, + "loss": 2.1374, + "step": 8976 + }, + { + "epoch": 0.3, + "grad_norm": 0.733970582485199, + "learning_rate": 1.620815086761664e-05, + "loss": 2.1179, + "step": 8977 + }, + { + "epoch": 0.3, + "grad_norm": 0.6946825981140137, + "learning_rate": 1.620731757015802e-05, + "loss": 2.1284, + "step": 8978 + }, + { + "epoch": 0.3, + "grad_norm": 0.7070122361183167, + "learning_rate": 1.6206484202572765e-05, + "loss": 2.1042, + "step": 8979 + }, + { + "epoch": 0.3, + "grad_norm": 0.7761732339859009, + "learning_rate": 1.620565076487029e-05, + "loss": 2.1783, + "step": 8980 + }, + { + "epoch": 0.3, + "grad_norm": 0.7341007590293884, + "learning_rate": 1.6204817257060006e-05, + "loss": 2.1719, + "step": 8981 + }, + { + "epoch": 0.3, + "grad_norm": 0.7100133299827576, + "learning_rate": 1.620398367915134e-05, + "loss": 2.1772, + "step": 8982 + }, + { + "epoch": 0.3, + "grad_norm": 0.6998253464698792, + "learning_rate": 1.6203150031153693e-05, + "loss": 2.1283, + "step": 8983 + }, + { + "epoch": 0.3, + "grad_norm": 0.7178162932395935, + "learning_rate": 1.62023163130765e-05, + "loss": 2.0893, + "step": 8984 + }, + { + "epoch": 0.3, + "grad_norm": 0.7074682116508484, + "learning_rate": 1.620148252492917e-05, + "loss": 2.1625, + "step": 8985 + }, + { + "epoch": 0.3, + "grad_norm": 0.7318282127380371, + "learning_rate": 1.620064866672112e-05, + "loss": 2.1964, + "step": 8986 + }, + { + "epoch": 0.3, + "grad_norm": 0.7359329462051392, + "learning_rate": 1.619981473846178e-05, + "loss": 2.1485, + "step": 8987 + }, + { + "epoch": 0.3, + "grad_norm": 0.694105327129364, + "learning_rate": 1.6198980740160568e-05, + "loss": 2.1176, + "step": 8988 + }, + { + "epoch": 0.3, + "grad_norm": 0.7203445434570312, + "learning_rate": 1.6198146671826902e-05, + "loss": 2.0862, + "step": 8989 + }, + { + "epoch": 0.3, + "grad_norm": 0.7165696620941162, + "learning_rate": 1.619731253347021e-05, + "loss": 2.171, + "step": 8990 + }, + { + "epoch": 0.3, + "grad_norm": 0.7560213804244995, + "learning_rate": 1.6196478325099908e-05, + "loss": 2.1725, + "step": 8991 + }, + { + "epoch": 0.3, + "grad_norm": 0.7538928389549255, + "learning_rate": 1.619564404672543e-05, + "loss": 2.1157, + "step": 8992 + }, + { + "epoch": 0.3, + "grad_norm": 0.7136211395263672, + "learning_rate": 1.6194809698356192e-05, + "loss": 2.1034, + "step": 8993 + }, + { + "epoch": 0.3, + "grad_norm": 0.7151387333869934, + "learning_rate": 1.619397528000163e-05, + "loss": 2.0754, + "step": 8994 + }, + { + "epoch": 0.3, + "grad_norm": 0.6979206800460815, + "learning_rate": 1.6193140791671164e-05, + "loss": 2.1314, + "step": 8995 + }, + { + "epoch": 0.3, + "grad_norm": 0.7063075304031372, + "learning_rate": 1.619230623337422e-05, + "loss": 2.1154, + "step": 8996 + }, + { + "epoch": 0.3, + "grad_norm": 0.7320974469184875, + "learning_rate": 1.6191471605120232e-05, + "loss": 2.183, + "step": 8997 + }, + { + "epoch": 0.3, + "grad_norm": 0.7277300357818604, + "learning_rate": 1.6190636906918626e-05, + "loss": 2.1677, + "step": 8998 + }, + { + "epoch": 0.3, + "grad_norm": 0.7248032689094543, + "learning_rate": 1.618980213877883e-05, + "loss": 2.1266, + "step": 8999 + }, + { + "epoch": 0.3, + "grad_norm": 0.6959267258644104, + "learning_rate": 1.618896730071028e-05, + "loss": 2.1184, + "step": 9000 + }, + { + "epoch": 0.3, + "grad_norm": 0.7078720927238464, + "learning_rate": 1.6188132392722404e-05, + "loss": 2.1367, + "step": 9001 + }, + { + "epoch": 0.3, + "grad_norm": 0.7187773585319519, + "learning_rate": 1.6187297414824633e-05, + "loss": 2.1379, + "step": 9002 + }, + { + "epoch": 0.3, + "grad_norm": 0.7332285046577454, + "learning_rate": 1.61864623670264e-05, + "loss": 2.1368, + "step": 9003 + }, + { + "epoch": 0.3, + "grad_norm": 0.7162072062492371, + "learning_rate": 1.6185627249337145e-05, + "loss": 2.1621, + "step": 9004 + }, + { + "epoch": 0.3, + "grad_norm": 0.7183804512023926, + "learning_rate": 1.61847920617663e-05, + "loss": 2.114, + "step": 9005 + }, + { + "epoch": 0.3, + "grad_norm": 0.6891884207725525, + "learning_rate": 1.6183956804323292e-05, + "loss": 2.1134, + "step": 9006 + }, + { + "epoch": 0.3, + "grad_norm": 0.743189811706543, + "learning_rate": 1.618312147701757e-05, + "loss": 2.1012, + "step": 9007 + }, + { + "epoch": 0.3, + "grad_norm": 0.7743074893951416, + "learning_rate": 1.6182286079858562e-05, + "loss": 2.1569, + "step": 9008 + }, + { + "epoch": 0.3, + "grad_norm": 0.7137490510940552, + "learning_rate": 1.618145061285571e-05, + "loss": 2.1666, + "step": 9009 + }, + { + "epoch": 0.3, + "grad_norm": 0.7253501415252686, + "learning_rate": 1.6180615076018456e-05, + "loss": 2.2228, + "step": 9010 + }, + { + "epoch": 0.3, + "grad_norm": 0.7498502731323242, + "learning_rate": 1.617977946935623e-05, + "loss": 2.1648, + "step": 9011 + }, + { + "epoch": 0.3, + "grad_norm": 0.6997895836830139, + "learning_rate": 1.6178943792878478e-05, + "loss": 2.1655, + "step": 9012 + }, + { + "epoch": 0.3, + "grad_norm": 0.7311301231384277, + "learning_rate": 1.617810804659464e-05, + "loss": 2.1689, + "step": 9013 + }, + { + "epoch": 0.3, + "grad_norm": 0.709051251411438, + "learning_rate": 1.6177272230514157e-05, + "loss": 2.1446, + "step": 9014 + }, + { + "epoch": 0.3, + "grad_norm": 0.6879650950431824, + "learning_rate": 1.6176436344646476e-05, + "loss": 2.1112, + "step": 9015 + }, + { + "epoch": 0.3, + "grad_norm": 0.7095572352409363, + "learning_rate": 1.6175600389001034e-05, + "loss": 2.0908, + "step": 9016 + }, + { + "epoch": 0.3, + "grad_norm": 0.7236729264259338, + "learning_rate": 1.6174764363587284e-05, + "loss": 2.1153, + "step": 9017 + }, + { + "epoch": 0.3, + "grad_norm": 0.7203137874603271, + "learning_rate": 1.617392826841466e-05, + "loss": 2.1663, + "step": 9018 + }, + { + "epoch": 0.3, + "grad_norm": 0.7368481159210205, + "learning_rate": 1.617309210349261e-05, + "loss": 2.1254, + "step": 9019 + }, + { + "epoch": 0.3, + "grad_norm": 0.718671977519989, + "learning_rate": 1.6172255868830586e-05, + "loss": 2.1766, + "step": 9020 + }, + { + "epoch": 0.3, + "grad_norm": 0.7523877620697021, + "learning_rate": 1.6171419564438037e-05, + "loss": 2.1286, + "step": 9021 + }, + { + "epoch": 0.3, + "grad_norm": 0.701265811920166, + "learning_rate": 1.6170583190324398e-05, + "loss": 2.0959, + "step": 9022 + }, + { + "epoch": 0.3, + "grad_norm": 0.7192615866661072, + "learning_rate": 1.616974674649913e-05, + "loss": 2.1058, + "step": 9023 + }, + { + "epoch": 0.3, + "grad_norm": 0.7001630067825317, + "learning_rate": 1.6168910232971683e-05, + "loss": 2.1264, + "step": 9024 + }, + { + "epoch": 0.3, + "grad_norm": 0.7107051610946655, + "learning_rate": 1.6168073649751498e-05, + "loss": 2.1413, + "step": 9025 + }, + { + "epoch": 0.3, + "grad_norm": 0.7077645659446716, + "learning_rate": 1.6167236996848036e-05, + "loss": 2.1035, + "step": 9026 + }, + { + "epoch": 0.3, + "grad_norm": 0.7043297290802002, + "learning_rate": 1.616640027427074e-05, + "loss": 2.1767, + "step": 9027 + }, + { + "epoch": 0.3, + "grad_norm": 0.7185124158859253, + "learning_rate": 1.616556348202907e-05, + "loss": 2.1148, + "step": 9028 + }, + { + "epoch": 0.3, + "grad_norm": 0.6983075737953186, + "learning_rate": 1.6164726620132478e-05, + "loss": 2.1162, + "step": 9029 + }, + { + "epoch": 0.3, + "grad_norm": 0.7281727194786072, + "learning_rate": 1.6163889688590416e-05, + "loss": 2.1481, + "step": 9030 + }, + { + "epoch": 0.3, + "grad_norm": 0.7135176658630371, + "learning_rate": 1.6163052687412343e-05, + "loss": 2.1164, + "step": 9031 + }, + { + "epoch": 0.3, + "grad_norm": 0.6879961490631104, + "learning_rate": 1.6162215616607714e-05, + "loss": 2.1641, + "step": 9032 + }, + { + "epoch": 0.3, + "grad_norm": 0.6878626942634583, + "learning_rate": 1.616137847618598e-05, + "loss": 2.1461, + "step": 9033 + }, + { + "epoch": 0.3, + "grad_norm": 0.7165526151657104, + "learning_rate": 1.6160541266156605e-05, + "loss": 2.1356, + "step": 9034 + }, + { + "epoch": 0.3, + "grad_norm": 0.691294252872467, + "learning_rate": 1.6159703986529044e-05, + "loss": 2.083, + "step": 9035 + }, + { + "epoch": 0.3, + "grad_norm": 0.7055017948150635, + "learning_rate": 1.615886663731276e-05, + "loss": 2.1519, + "step": 9036 + }, + { + "epoch": 0.3, + "grad_norm": 0.7372446060180664, + "learning_rate": 1.6158029218517207e-05, + "loss": 2.1293, + "step": 9037 + }, + { + "epoch": 0.3, + "grad_norm": 0.7621408700942993, + "learning_rate": 1.615719173015185e-05, + "loss": 2.2021, + "step": 9038 + }, + { + "epoch": 0.3, + "grad_norm": 0.7190671563148499, + "learning_rate": 1.615635417222615e-05, + "loss": 2.166, + "step": 9039 + }, + { + "epoch": 0.3, + "grad_norm": 0.716773509979248, + "learning_rate": 1.6155516544749567e-05, + "loss": 2.0983, + "step": 9040 + }, + { + "epoch": 0.3, + "grad_norm": 0.718288242816925, + "learning_rate": 1.615467884773157e-05, + "loss": 2.1386, + "step": 9041 + }, + { + "epoch": 0.3, + "grad_norm": 0.6960080862045288, + "learning_rate": 1.6153841081181613e-05, + "loss": 2.0962, + "step": 9042 + }, + { + "epoch": 0.3, + "grad_norm": 0.7400527596473694, + "learning_rate": 1.6153003245109167e-05, + "loss": 2.1168, + "step": 9043 + }, + { + "epoch": 0.3, + "grad_norm": 0.7312054634094238, + "learning_rate": 1.61521653395237e-05, + "loss": 2.1429, + "step": 9044 + }, + { + "epoch": 0.3, + "grad_norm": 0.7034080028533936, + "learning_rate": 1.6151327364434668e-05, + "loss": 2.1731, + "step": 9045 + }, + { + "epoch": 0.3, + "grad_norm": 0.7475862503051758, + "learning_rate": 1.615048931985155e-05, + "loss": 2.1292, + "step": 9046 + }, + { + "epoch": 0.3, + "grad_norm": 0.7107519507408142, + "learning_rate": 1.6149651205783807e-05, + "loss": 2.1497, + "step": 9047 + }, + { + "epoch": 0.3, + "grad_norm": 0.728132426738739, + "learning_rate": 1.614881302224091e-05, + "loss": 2.0438, + "step": 9048 + }, + { + "epoch": 0.3, + "grad_norm": 0.7282428741455078, + "learning_rate": 1.6147974769232325e-05, + "loss": 2.201, + "step": 9049 + }, + { + "epoch": 0.3, + "grad_norm": 0.7181493639945984, + "learning_rate": 1.6147136446767523e-05, + "loss": 2.1295, + "step": 9050 + }, + { + "epoch": 0.3, + "grad_norm": 0.7265649437904358, + "learning_rate": 1.6146298054855977e-05, + "loss": 2.2017, + "step": 9051 + }, + { + "epoch": 0.3, + "grad_norm": 0.7228426337242126, + "learning_rate": 1.6145459593507158e-05, + "loss": 2.1885, + "step": 9052 + }, + { + "epoch": 0.3, + "grad_norm": 0.6979672908782959, + "learning_rate": 1.6144621062730537e-05, + "loss": 2.1016, + "step": 9053 + }, + { + "epoch": 0.3, + "grad_norm": 0.7314849495887756, + "learning_rate": 1.614378246253559e-05, + "loss": 2.1651, + "step": 9054 + }, + { + "epoch": 0.3, + "grad_norm": 0.7438293099403381, + "learning_rate": 1.614294379293179e-05, + "loss": 2.1504, + "step": 9055 + }, + { + "epoch": 0.3, + "grad_norm": 0.723396360874176, + "learning_rate": 1.614210505392861e-05, + "loss": 2.1252, + "step": 9056 + }, + { + "epoch": 0.3, + "grad_norm": 0.6903195381164551, + "learning_rate": 1.6141266245535527e-05, + "loss": 2.1004, + "step": 9057 + }, + { + "epoch": 0.3, + "grad_norm": 0.7252946496009827, + "learning_rate": 1.6140427367762013e-05, + "loss": 2.1212, + "step": 9058 + }, + { + "epoch": 0.3, + "grad_norm": 0.728418231010437, + "learning_rate": 1.6139588420617555e-05, + "loss": 2.1324, + "step": 9059 + }, + { + "epoch": 0.3, + "grad_norm": 0.7290331125259399, + "learning_rate": 1.6138749404111626e-05, + "loss": 2.0888, + "step": 9060 + }, + { + "epoch": 0.3, + "grad_norm": 0.7248278856277466, + "learning_rate": 1.61379103182537e-05, + "loss": 2.1773, + "step": 9061 + }, + { + "epoch": 0.3, + "grad_norm": 0.7369230389595032, + "learning_rate": 1.6137071163053262e-05, + "loss": 2.1476, + "step": 9062 + }, + { + "epoch": 0.3, + "grad_norm": 0.7253776788711548, + "learning_rate": 1.613623193851979e-05, + "loss": 2.0841, + "step": 9063 + }, + { + "epoch": 0.3, + "grad_norm": 0.7122920155525208, + "learning_rate": 1.6135392644662762e-05, + "loss": 2.1002, + "step": 9064 + }, + { + "epoch": 0.3, + "grad_norm": 0.7598580121994019, + "learning_rate": 1.613455328149167e-05, + "loss": 2.1341, + "step": 9065 + }, + { + "epoch": 0.3, + "grad_norm": 0.7112950086593628, + "learning_rate": 1.6133713849015987e-05, + "loss": 2.0898, + "step": 9066 + }, + { + "epoch": 0.3, + "grad_norm": 0.7106995582580566, + "learning_rate": 1.6132874347245204e-05, + "loss": 2.1188, + "step": 9067 + }, + { + "epoch": 0.3, + "grad_norm": 0.7363333702087402, + "learning_rate": 1.6132034776188796e-05, + "loss": 2.1318, + "step": 9068 + }, + { + "epoch": 0.3, + "grad_norm": 0.7098732590675354, + "learning_rate": 1.6131195135856253e-05, + "loss": 2.1089, + "step": 9069 + }, + { + "epoch": 0.3, + "grad_norm": 0.7206540107727051, + "learning_rate": 1.6130355426257063e-05, + "loss": 2.1553, + "step": 9070 + }, + { + "epoch": 0.3, + "grad_norm": 0.697512149810791, + "learning_rate": 1.6129515647400705e-05, + "loss": 2.1651, + "step": 9071 + }, + { + "epoch": 0.3, + "grad_norm": 0.700214684009552, + "learning_rate": 1.612867579929668e-05, + "loss": 2.0557, + "step": 9072 + }, + { + "epoch": 0.3, + "grad_norm": 0.7461444139480591, + "learning_rate": 1.612783588195446e-05, + "loss": 2.1188, + "step": 9073 + }, + { + "epoch": 0.3, + "grad_norm": 0.6864328980445862, + "learning_rate": 1.6126995895383542e-05, + "loss": 2.031, + "step": 9074 + }, + { + "epoch": 0.3, + "grad_norm": 0.6722150444984436, + "learning_rate": 1.612615583959342e-05, + "loss": 2.1271, + "step": 9075 + }, + { + "epoch": 0.3, + "grad_norm": 0.7236228585243225, + "learning_rate": 1.6125315714593573e-05, + "loss": 2.141, + "step": 9076 + }, + { + "epoch": 0.3, + "grad_norm": 0.6886546611785889, + "learning_rate": 1.6124475520393508e-05, + "loss": 2.0355, + "step": 9077 + }, + { + "epoch": 0.3, + "grad_norm": 0.7419759035110474, + "learning_rate": 1.61236352570027e-05, + "loss": 2.1241, + "step": 9078 + }, + { + "epoch": 0.3, + "grad_norm": 0.7256042957305908, + "learning_rate": 1.6122794924430655e-05, + "loss": 2.1811, + "step": 9079 + }, + { + "epoch": 0.3, + "grad_norm": 0.7233948707580566, + "learning_rate": 1.6121954522686857e-05, + "loss": 2.1728, + "step": 9080 + }, + { + "epoch": 0.3, + "grad_norm": 0.7298876047134399, + "learning_rate": 1.6121114051780807e-05, + "loss": 2.0923, + "step": 9081 + }, + { + "epoch": 0.3, + "grad_norm": 0.6840257048606873, + "learning_rate": 1.6120273511721997e-05, + "loss": 2.1553, + "step": 9082 + }, + { + "epoch": 0.3, + "grad_norm": 0.740852415561676, + "learning_rate": 1.6119432902519925e-05, + "loss": 2.1631, + "step": 9083 + }, + { + "epoch": 0.3, + "grad_norm": 0.7222468852996826, + "learning_rate": 1.6118592224184085e-05, + "loss": 2.1989, + "step": 9084 + }, + { + "epoch": 0.3, + "grad_norm": 0.7189435362815857, + "learning_rate": 1.6117751476723972e-05, + "loss": 2.162, + "step": 9085 + }, + { + "epoch": 0.3, + "grad_norm": 0.7160603404045105, + "learning_rate": 1.6116910660149096e-05, + "loss": 2.1591, + "step": 9086 + }, + { + "epoch": 0.3, + "grad_norm": 0.729033887386322, + "learning_rate": 1.6116069774468944e-05, + "loss": 2.1807, + "step": 9087 + }, + { + "epoch": 0.3, + "grad_norm": 0.7417169213294983, + "learning_rate": 1.6115228819693022e-05, + "loss": 2.1222, + "step": 9088 + }, + { + "epoch": 0.3, + "grad_norm": 0.7592983841896057, + "learning_rate": 1.6114387795830823e-05, + "loss": 2.1079, + "step": 9089 + }, + { + "epoch": 0.3, + "grad_norm": 0.7269306778907776, + "learning_rate": 1.611354670289186e-05, + "loss": 2.1275, + "step": 9090 + }, + { + "epoch": 0.3, + "grad_norm": 0.7094120979309082, + "learning_rate": 1.611270554088563e-05, + "loss": 2.095, + "step": 9091 + }, + { + "epoch": 0.3, + "grad_norm": 0.6961520910263062, + "learning_rate": 1.611186430982163e-05, + "loss": 2.1044, + "step": 9092 + }, + { + "epoch": 0.3, + "grad_norm": 0.7210471630096436, + "learning_rate": 1.611102300970937e-05, + "loss": 2.1773, + "step": 9093 + }, + { + "epoch": 0.3, + "grad_norm": 0.7415155172348022, + "learning_rate": 1.6110181640558358e-05, + "loss": 2.1331, + "step": 9094 + }, + { + "epoch": 0.3, + "grad_norm": 0.7176220417022705, + "learning_rate": 1.6109340202378093e-05, + "loss": 2.1611, + "step": 9095 + }, + { + "epoch": 0.3, + "grad_norm": 0.7124950885772705, + "learning_rate": 1.6108498695178078e-05, + "loss": 2.1661, + "step": 9096 + }, + { + "epoch": 0.3, + "grad_norm": 0.7172918319702148, + "learning_rate": 1.610765711896783e-05, + "loss": 2.0726, + "step": 9097 + }, + { + "epoch": 0.3, + "grad_norm": 0.7279451489448547, + "learning_rate": 1.6106815473756848e-05, + "loss": 2.0828, + "step": 9098 + }, + { + "epoch": 0.3, + "grad_norm": 0.7020747661590576, + "learning_rate": 1.6105973759554648e-05, + "loss": 2.0909, + "step": 9099 + }, + { + "epoch": 0.3, + "grad_norm": 0.674650251865387, + "learning_rate": 1.6105131976370732e-05, + "loss": 2.1026, + "step": 9100 + }, + { + "epoch": 0.3, + "grad_norm": 0.7160528302192688, + "learning_rate": 1.610429012421461e-05, + "loss": 2.1424, + "step": 9101 + }, + { + "epoch": 0.3, + "grad_norm": 0.7222595810890198, + "learning_rate": 1.61034482030958e-05, + "loss": 2.1229, + "step": 9102 + }, + { + "epoch": 0.3, + "grad_norm": 0.7355678677558899, + "learning_rate": 1.610260621302381e-05, + "loss": 2.1477, + "step": 9103 + }, + { + "epoch": 0.3, + "grad_norm": 0.7936947345733643, + "learning_rate": 1.6101764154008148e-05, + "loss": 2.174, + "step": 9104 + }, + { + "epoch": 0.3, + "grad_norm": 0.7312738299369812, + "learning_rate": 1.6100922026058336e-05, + "loss": 2.1422, + "step": 9105 + }, + { + "epoch": 0.3, + "grad_norm": 0.7081700563430786, + "learning_rate": 1.6100079829183877e-05, + "loss": 2.1572, + "step": 9106 + }, + { + "epoch": 0.3, + "grad_norm": 0.728241503238678, + "learning_rate": 1.609923756339429e-05, + "loss": 2.1513, + "step": 9107 + }, + { + "epoch": 0.3, + "grad_norm": 0.72907954454422, + "learning_rate": 1.6098395228699094e-05, + "loss": 2.1665, + "step": 9108 + }, + { + "epoch": 0.3, + "grad_norm": 0.6978530883789062, + "learning_rate": 1.6097552825107803e-05, + "loss": 2.1052, + "step": 9109 + }, + { + "epoch": 0.3, + "grad_norm": 0.7283750772476196, + "learning_rate": 1.6096710352629935e-05, + "loss": 2.1299, + "step": 9110 + }, + { + "epoch": 0.3, + "grad_norm": 0.7288293242454529, + "learning_rate": 1.6095867811275003e-05, + "loss": 2.1771, + "step": 9111 + }, + { + "epoch": 0.3, + "grad_norm": 0.7400304079055786, + "learning_rate": 1.6095025201052532e-05, + "loss": 2.1902, + "step": 9112 + }, + { + "epoch": 0.3, + "grad_norm": 0.7296028137207031, + "learning_rate": 1.6094182521972036e-05, + "loss": 2.1115, + "step": 9113 + }, + { + "epoch": 0.3, + "grad_norm": 0.7431108951568604, + "learning_rate": 1.609333977404304e-05, + "loss": 2.0976, + "step": 9114 + }, + { + "epoch": 0.3, + "grad_norm": 0.7112067341804504, + "learning_rate": 1.6092496957275062e-05, + "loss": 2.1574, + "step": 9115 + }, + { + "epoch": 0.3, + "grad_norm": 0.7418813109397888, + "learning_rate": 1.6091654071677623e-05, + "loss": 2.1188, + "step": 9116 + }, + { + "epoch": 0.3, + "grad_norm": 0.6986328959465027, + "learning_rate": 1.6090811117260245e-05, + "loss": 2.1436, + "step": 9117 + }, + { + "epoch": 0.3, + "grad_norm": 0.6848977208137512, + "learning_rate": 1.6089968094032454e-05, + "loss": 2.148, + "step": 9118 + }, + { + "epoch": 0.3, + "grad_norm": 0.7670209407806396, + "learning_rate": 1.608912500200377e-05, + "loss": 2.1364, + "step": 9119 + }, + { + "epoch": 0.3, + "grad_norm": 0.7172484397888184, + "learning_rate": 1.6088281841183725e-05, + "loss": 2.1509, + "step": 9120 + }, + { + "epoch": 0.3, + "grad_norm": 0.728642463684082, + "learning_rate": 1.6087438611581835e-05, + "loss": 2.0701, + "step": 9121 + }, + { + "epoch": 0.3, + "grad_norm": 0.6924048662185669, + "learning_rate": 1.6086595313207633e-05, + "loss": 2.0979, + "step": 9122 + }, + { + "epoch": 0.3, + "grad_norm": 0.7285665273666382, + "learning_rate": 1.608575194607065e-05, + "loss": 2.1435, + "step": 9123 + }, + { + "epoch": 0.3, + "grad_norm": 0.6986537575721741, + "learning_rate": 1.60849085101804e-05, + "loss": 2.0778, + "step": 9124 + }, + { + "epoch": 0.3, + "grad_norm": 0.7152937650680542, + "learning_rate": 1.6084065005546424e-05, + "loss": 2.1787, + "step": 9125 + }, + { + "epoch": 0.3, + "grad_norm": 0.7102826237678528, + "learning_rate": 1.6083221432178246e-05, + "loss": 2.1672, + "step": 9126 + }, + { + "epoch": 0.3, + "grad_norm": 0.7468326091766357, + "learning_rate": 1.6082377790085395e-05, + "loss": 2.1497, + "step": 9127 + }, + { + "epoch": 0.3, + "grad_norm": 0.702418863773346, + "learning_rate": 1.6081534079277406e-05, + "loss": 2.1326, + "step": 9128 + }, + { + "epoch": 0.3, + "grad_norm": 0.7368364930152893, + "learning_rate": 1.608069029976381e-05, + "loss": 2.1836, + "step": 9129 + }, + { + "epoch": 0.3, + "grad_norm": 0.7184275388717651, + "learning_rate": 1.6079846451554142e-05, + "loss": 2.1719, + "step": 9130 + }, + { + "epoch": 0.3, + "grad_norm": 0.7169089317321777, + "learning_rate": 1.607900253465793e-05, + "loss": 2.1915, + "step": 9131 + }, + { + "epoch": 0.3, + "grad_norm": 0.6985670924186707, + "learning_rate": 1.6078158549084707e-05, + "loss": 2.1547, + "step": 9132 + }, + { + "epoch": 0.3, + "grad_norm": 0.7136695384979248, + "learning_rate": 1.607731449484401e-05, + "loss": 2.0828, + "step": 9133 + }, + { + "epoch": 0.3, + "grad_norm": 0.7812572121620178, + "learning_rate": 1.607647037194538e-05, + "loss": 2.2155, + "step": 9134 + }, + { + "epoch": 0.3, + "grad_norm": 0.7029160261154175, + "learning_rate": 1.6075626180398346e-05, + "loss": 2.1264, + "step": 9135 + }, + { + "epoch": 0.3, + "grad_norm": 0.7137871384620667, + "learning_rate": 1.607478192021245e-05, + "loss": 2.117, + "step": 9136 + }, + { + "epoch": 0.3, + "grad_norm": 0.6959608793258667, + "learning_rate": 1.6073937591397225e-05, + "loss": 2.0638, + "step": 9137 + }, + { + "epoch": 0.3, + "grad_norm": 0.7332161068916321, + "learning_rate": 1.607309319396221e-05, + "loss": 2.1515, + "step": 9138 + }, + { + "epoch": 0.3, + "grad_norm": 0.7073734402656555, + "learning_rate": 1.6072248727916953e-05, + "loss": 2.0645, + "step": 9139 + }, + { + "epoch": 0.3, + "grad_norm": 0.7115029692649841, + "learning_rate": 1.6071404193270988e-05, + "loss": 2.1261, + "step": 9140 + }, + { + "epoch": 0.3, + "grad_norm": 0.6940522789955139, + "learning_rate": 1.6070559590033857e-05, + "loss": 2.1456, + "step": 9141 + }, + { + "epoch": 0.3, + "grad_norm": 0.7211481928825378, + "learning_rate": 1.60697149182151e-05, + "loss": 2.1263, + "step": 9142 + }, + { + "epoch": 0.3, + "grad_norm": 0.719411313533783, + "learning_rate": 1.6068870177824263e-05, + "loss": 2.1389, + "step": 9143 + }, + { + "epoch": 0.3, + "grad_norm": 0.6907497644424438, + "learning_rate": 1.6068025368870883e-05, + "loss": 2.1278, + "step": 9144 + }, + { + "epoch": 0.3, + "grad_norm": 0.720475435256958, + "learning_rate": 1.6067180491364514e-05, + "loss": 2.0869, + "step": 9145 + }, + { + "epoch": 0.3, + "grad_norm": 0.7141836285591125, + "learning_rate": 1.6066335545314693e-05, + "loss": 2.1806, + "step": 9146 + }, + { + "epoch": 0.3, + "grad_norm": 0.7256964445114136, + "learning_rate": 1.606549053073097e-05, + "loss": 2.1216, + "step": 9147 + }, + { + "epoch": 0.3, + "grad_norm": 0.7165928483009338, + "learning_rate": 1.6064645447622887e-05, + "loss": 2.1144, + "step": 9148 + }, + { + "epoch": 0.3, + "grad_norm": 0.7332196831703186, + "learning_rate": 1.606380029599999e-05, + "loss": 2.1664, + "step": 9149 + }, + { + "epoch": 0.3, + "grad_norm": 0.7206238508224487, + "learning_rate": 1.606295507587184e-05, + "loss": 2.1206, + "step": 9150 + }, + { + "epoch": 0.3, + "grad_norm": 0.7073818445205688, + "learning_rate": 1.6062109787247974e-05, + "loss": 2.1009, + "step": 9151 + }, + { + "epoch": 0.3, + "grad_norm": 0.7226938009262085, + "learning_rate": 1.6061264430137943e-05, + "loss": 2.1238, + "step": 9152 + }, + { + "epoch": 0.3, + "grad_norm": 0.7461568117141724, + "learning_rate": 1.6060419004551296e-05, + "loss": 2.1419, + "step": 9153 + }, + { + "epoch": 0.3, + "grad_norm": 0.7064507603645325, + "learning_rate": 1.6059573510497593e-05, + "loss": 2.1393, + "step": 9154 + }, + { + "epoch": 0.3, + "grad_norm": 0.7169920206069946, + "learning_rate": 1.6058727947986376e-05, + "loss": 2.2082, + "step": 9155 + }, + { + "epoch": 0.3, + "grad_norm": 0.7013477087020874, + "learning_rate": 1.6057882317027202e-05, + "loss": 2.0941, + "step": 9156 + }, + { + "epoch": 0.3, + "grad_norm": 0.6975902318954468, + "learning_rate": 1.605703661762962e-05, + "loss": 2.113, + "step": 9157 + }, + { + "epoch": 0.3, + "grad_norm": 0.7152775526046753, + "learning_rate": 1.6056190849803192e-05, + "loss": 2.119, + "step": 9158 + }, + { + "epoch": 0.3, + "grad_norm": 0.7113847136497498, + "learning_rate": 1.605534501355747e-05, + "loss": 2.1438, + "step": 9159 + }, + { + "epoch": 0.3, + "grad_norm": 0.6999005079269409, + "learning_rate": 1.6054499108902007e-05, + "loss": 2.1607, + "step": 9160 + }, + { + "epoch": 0.3, + "grad_norm": 0.7313960194587708, + "learning_rate": 1.605365313584636e-05, + "loss": 2.1366, + "step": 9161 + }, + { + "epoch": 0.3, + "grad_norm": 0.7339703440666199, + "learning_rate": 1.6052807094400093e-05, + "loss": 2.1109, + "step": 9162 + }, + { + "epoch": 0.3, + "grad_norm": 0.7059674263000488, + "learning_rate": 1.6051960984572753e-05, + "loss": 2.1883, + "step": 9163 + }, + { + "epoch": 0.3, + "grad_norm": 0.722870945930481, + "learning_rate": 1.605111480637391e-05, + "loss": 2.1347, + "step": 9164 + }, + { + "epoch": 0.3, + "grad_norm": 0.7093409895896912, + "learning_rate": 1.6050268559813115e-05, + "loss": 2.1132, + "step": 9165 + }, + { + "epoch": 0.3, + "grad_norm": 0.6932336091995239, + "learning_rate": 1.6049422244899928e-05, + "loss": 2.1067, + "step": 9166 + }, + { + "epoch": 0.3, + "grad_norm": 0.7209358811378479, + "learning_rate": 1.604857586164392e-05, + "loss": 2.0301, + "step": 9167 + }, + { + "epoch": 0.31, + "grad_norm": 0.7585920691490173, + "learning_rate": 1.6047729410054645e-05, + "loss": 2.1395, + "step": 9168 + }, + { + "epoch": 0.31, + "grad_norm": 0.7451275587081909, + "learning_rate": 1.6046882890141664e-05, + "loss": 2.0805, + "step": 9169 + }, + { + "epoch": 0.31, + "grad_norm": 0.7101922631263733, + "learning_rate": 1.6046036301914547e-05, + "loss": 2.1187, + "step": 9170 + }, + { + "epoch": 0.31, + "grad_norm": 0.7058566212654114, + "learning_rate": 1.6045189645382856e-05, + "loss": 2.0607, + "step": 9171 + }, + { + "epoch": 0.31, + "grad_norm": 0.7104160785675049, + "learning_rate": 1.6044342920556152e-05, + "loss": 2.0338, + "step": 9172 + }, + { + "epoch": 0.31, + "grad_norm": 0.7436968088150024, + "learning_rate": 1.6043496127444007e-05, + "loss": 2.0851, + "step": 9173 + }, + { + "epoch": 0.31, + "grad_norm": 0.7205508351325989, + "learning_rate": 1.604264926605598e-05, + "loss": 2.1315, + "step": 9174 + }, + { + "epoch": 0.31, + "grad_norm": 0.7143407464027405, + "learning_rate": 1.604180233640165e-05, + "loss": 2.1885, + "step": 9175 + }, + { + "epoch": 0.31, + "grad_norm": 0.6849372386932373, + "learning_rate": 1.6040955338490576e-05, + "loss": 2.1422, + "step": 9176 + }, + { + "epoch": 0.31, + "grad_norm": 0.7259867787361145, + "learning_rate": 1.6040108272332325e-05, + "loss": 2.1147, + "step": 9177 + }, + { + "epoch": 0.31, + "grad_norm": 0.7319706678390503, + "learning_rate": 1.6039261137936474e-05, + "loss": 2.1421, + "step": 9178 + }, + { + "epoch": 0.31, + "grad_norm": 0.713388204574585, + "learning_rate": 1.603841393531259e-05, + "loss": 2.09, + "step": 9179 + }, + { + "epoch": 0.31, + "grad_norm": 0.6804453730583191, + "learning_rate": 1.603756666447024e-05, + "loss": 2.1083, + "step": 9180 + }, + { + "epoch": 0.31, + "grad_norm": 0.7508741021156311, + "learning_rate": 1.6036719325419007e-05, + "loss": 2.1244, + "step": 9181 + }, + { + "epoch": 0.31, + "grad_norm": 0.6907243728637695, + "learning_rate": 1.6035871918168452e-05, + "loss": 2.0895, + "step": 9182 + }, + { + "epoch": 0.31, + "grad_norm": 0.7081133723258972, + "learning_rate": 1.6035024442728157e-05, + "loss": 2.0405, + "step": 9183 + }, + { + "epoch": 0.31, + "grad_norm": 0.6962268352508545, + "learning_rate": 1.603417689910769e-05, + "loss": 2.1157, + "step": 9184 + }, + { + "epoch": 0.31, + "grad_norm": 0.6961023211479187, + "learning_rate": 1.603332928731663e-05, + "loss": 2.1785, + "step": 9185 + }, + { + "epoch": 0.31, + "grad_norm": 0.7001343369483948, + "learning_rate": 1.603248160736455e-05, + "loss": 2.1178, + "step": 9186 + }, + { + "epoch": 0.31, + "grad_norm": 0.7052466869354248, + "learning_rate": 1.603163385926103e-05, + "loss": 2.1258, + "step": 9187 + }, + { + "epoch": 0.31, + "grad_norm": 0.6974732875823975, + "learning_rate": 1.6030786043015644e-05, + "loss": 2.1652, + "step": 9188 + }, + { + "epoch": 0.31, + "grad_norm": 0.7125571370124817, + "learning_rate": 1.6029938158637973e-05, + "loss": 2.1205, + "step": 9189 + }, + { + "epoch": 0.31, + "grad_norm": 0.722271740436554, + "learning_rate": 1.6029090206137592e-05, + "loss": 2.1224, + "step": 9190 + }, + { + "epoch": 0.31, + "grad_norm": 0.7402487397193909, + "learning_rate": 1.6028242185524087e-05, + "loss": 2.1522, + "step": 9191 + }, + { + "epoch": 0.31, + "grad_norm": 0.6940657496452332, + "learning_rate": 1.6027394096807035e-05, + "loss": 2.179, + "step": 9192 + }, + { + "epoch": 0.31, + "grad_norm": 0.6938674449920654, + "learning_rate": 1.602654593999601e-05, + "loss": 2.1405, + "step": 9193 + }, + { + "epoch": 0.31, + "grad_norm": 0.7211888432502747, + "learning_rate": 1.6025697715100606e-05, + "loss": 2.1961, + "step": 9194 + }, + { + "epoch": 0.31, + "grad_norm": 0.6818050146102905, + "learning_rate": 1.6024849422130403e-05, + "loss": 2.0702, + "step": 9195 + }, + { + "epoch": 0.31, + "grad_norm": 0.7188803553581238, + "learning_rate": 1.602400106109498e-05, + "loss": 2.1377, + "step": 9196 + }, + { + "epoch": 0.31, + "grad_norm": 0.7143694758415222, + "learning_rate": 1.602315263200392e-05, + "loss": 2.1284, + "step": 9197 + }, + { + "epoch": 0.31, + "grad_norm": 0.7232929468154907, + "learning_rate": 1.6022304134866814e-05, + "loss": 2.1792, + "step": 9198 + }, + { + "epoch": 0.31, + "grad_norm": 0.6955771446228027, + "learning_rate": 1.6021455569693248e-05, + "loss": 2.1351, + "step": 9199 + }, + { + "epoch": 0.31, + "grad_norm": 0.7464007139205933, + "learning_rate": 1.6020606936492803e-05, + "loss": 2.1728, + "step": 9200 + }, + { + "epoch": 0.31, + "grad_norm": 0.7254986763000488, + "learning_rate": 1.601975823527507e-05, + "loss": 2.2202, + "step": 9201 + }, + { + "epoch": 0.31, + "grad_norm": 0.7056158781051636, + "learning_rate": 1.6018909466049635e-05, + "loss": 2.1285, + "step": 9202 + }, + { + "epoch": 0.31, + "grad_norm": 0.7082623839378357, + "learning_rate": 1.601806062882609e-05, + "loss": 2.1202, + "step": 9203 + }, + { + "epoch": 0.31, + "grad_norm": 0.7204388976097107, + "learning_rate": 1.6017211723614023e-05, + "loss": 2.1415, + "step": 9204 + }, + { + "epoch": 0.31, + "grad_norm": 0.7447026968002319, + "learning_rate": 1.6016362750423023e-05, + "loss": 2.1626, + "step": 9205 + }, + { + "epoch": 0.31, + "grad_norm": 0.691584050655365, + "learning_rate": 1.601551370926268e-05, + "loss": 2.1264, + "step": 9206 + }, + { + "epoch": 0.31, + "grad_norm": 0.7081149220466614, + "learning_rate": 1.6014664600142595e-05, + "loss": 2.0592, + "step": 9207 + }, + { + "epoch": 0.31, + "grad_norm": 0.7025768756866455, + "learning_rate": 1.601381542307235e-05, + "loss": 2.1791, + "step": 9208 + }, + { + "epoch": 0.31, + "grad_norm": 0.7228710651397705, + "learning_rate": 1.6012966178061543e-05, + "loss": 2.1401, + "step": 9209 + }, + { + "epoch": 0.31, + "grad_norm": 0.7501245141029358, + "learning_rate": 1.601211686511977e-05, + "loss": 2.2134, + "step": 9210 + }, + { + "epoch": 0.31, + "grad_norm": 0.7173078060150146, + "learning_rate": 1.6011267484256624e-05, + "loss": 2.1123, + "step": 9211 + }, + { + "epoch": 0.31, + "grad_norm": 0.7497999668121338, + "learning_rate": 1.60104180354817e-05, + "loss": 2.122, + "step": 9212 + }, + { + "epoch": 0.31, + "grad_norm": 0.6962926983833313, + "learning_rate": 1.6009568518804596e-05, + "loss": 2.1564, + "step": 9213 + }, + { + "epoch": 0.31, + "grad_norm": 0.7197548151016235, + "learning_rate": 1.6008718934234912e-05, + "loss": 2.1983, + "step": 9214 + }, + { + "epoch": 0.31, + "grad_norm": 0.7410464882850647, + "learning_rate": 1.6007869281782235e-05, + "loss": 2.1191, + "step": 9215 + }, + { + "epoch": 0.31, + "grad_norm": 0.7107625603675842, + "learning_rate": 1.600701956145618e-05, + "loss": 2.1727, + "step": 9216 + }, + { + "epoch": 0.31, + "grad_norm": 0.7562989592552185, + "learning_rate": 1.600616977326633e-05, + "loss": 2.1419, + "step": 9217 + }, + { + "epoch": 0.31, + "grad_norm": 0.7248507142066956, + "learning_rate": 1.6005319917222298e-05, + "loss": 2.1126, + "step": 9218 + }, + { + "epoch": 0.31, + "grad_norm": 0.7140735983848572, + "learning_rate": 1.6004469993333685e-05, + "loss": 2.1175, + "step": 9219 + }, + { + "epoch": 0.31, + "grad_norm": 0.7225379943847656, + "learning_rate": 1.6003620001610083e-05, + "loss": 2.124, + "step": 9220 + }, + { + "epoch": 0.31, + "grad_norm": 0.7579736113548279, + "learning_rate": 1.6002769942061107e-05, + "loss": 2.1667, + "step": 9221 + }, + { + "epoch": 0.31, + "grad_norm": 0.7073781490325928, + "learning_rate": 1.600191981469635e-05, + "loss": 2.1004, + "step": 9222 + }, + { + "epoch": 0.31, + "grad_norm": 0.7554293870925903, + "learning_rate": 1.6001069619525417e-05, + "loss": 2.0418, + "step": 9223 + }, + { + "epoch": 0.31, + "grad_norm": 0.8300036787986755, + "learning_rate": 1.600021935655792e-05, + "loss": 2.1243, + "step": 9224 + }, + { + "epoch": 0.31, + "grad_norm": 0.7187107801437378, + "learning_rate": 1.5999369025803463e-05, + "loss": 2.1137, + "step": 9225 + }, + { + "epoch": 0.31, + "grad_norm": 0.7158005237579346, + "learning_rate": 1.5998518627271647e-05, + "loss": 2.1661, + "step": 9226 + }, + { + "epoch": 0.31, + "grad_norm": 0.7382926940917969, + "learning_rate": 1.5997668160972086e-05, + "loss": 2.1454, + "step": 9227 + }, + { + "epoch": 0.31, + "grad_norm": 0.7160718441009521, + "learning_rate": 1.5996817626914386e-05, + "loss": 2.0932, + "step": 9228 + }, + { + "epoch": 0.31, + "grad_norm": 0.7767157554626465, + "learning_rate": 1.599596702510815e-05, + "loss": 2.1123, + "step": 9229 + }, + { + "epoch": 0.31, + "grad_norm": 0.7325469851493835, + "learning_rate": 1.5995116355562997e-05, + "loss": 2.1483, + "step": 9230 + }, + { + "epoch": 0.31, + "grad_norm": 0.7318035960197449, + "learning_rate": 1.599426561828853e-05, + "loss": 2.1002, + "step": 9231 + }, + { + "epoch": 0.31, + "grad_norm": 0.6890215873718262, + "learning_rate": 1.5993414813294368e-05, + "loss": 2.0897, + "step": 9232 + }, + { + "epoch": 0.31, + "grad_norm": 0.7208920121192932, + "learning_rate": 1.5992563940590114e-05, + "loss": 2.0999, + "step": 9233 + }, + { + "epoch": 0.31, + "grad_norm": 0.7177322506904602, + "learning_rate": 1.5991713000185382e-05, + "loss": 2.1628, + "step": 9234 + }, + { + "epoch": 0.31, + "grad_norm": 0.7050248980522156, + "learning_rate": 1.5990861992089792e-05, + "loss": 2.079, + "step": 9235 + }, + { + "epoch": 0.31, + "grad_norm": 0.7299240827560425, + "learning_rate": 1.5990010916312956e-05, + "loss": 2.1838, + "step": 9236 + }, + { + "epoch": 0.31, + "grad_norm": 0.73721843957901, + "learning_rate": 1.5989159772864483e-05, + "loss": 2.1084, + "step": 9237 + }, + { + "epoch": 0.31, + "grad_norm": 0.7857838869094849, + "learning_rate": 1.5988308561753995e-05, + "loss": 2.1617, + "step": 9238 + }, + { + "epoch": 0.31, + "grad_norm": 0.7269307374954224, + "learning_rate": 1.5987457282991107e-05, + "loss": 2.1562, + "step": 9239 + }, + { + "epoch": 0.31, + "grad_norm": 0.7437485456466675, + "learning_rate": 1.5986605936585432e-05, + "loss": 2.1184, + "step": 9240 + }, + { + "epoch": 0.31, + "grad_norm": 0.7098572254180908, + "learning_rate": 1.598575452254659e-05, + "loss": 2.1512, + "step": 9241 + }, + { + "epoch": 0.31, + "grad_norm": 0.7172836065292358, + "learning_rate": 1.598490304088421e-05, + "loss": 2.1955, + "step": 9242 + }, + { + "epoch": 0.31, + "grad_norm": 0.7143498063087463, + "learning_rate": 1.5984051491607898e-05, + "loss": 2.1556, + "step": 9243 + }, + { + "epoch": 0.31, + "grad_norm": 0.6794418096542358, + "learning_rate": 1.598319987472728e-05, + "loss": 2.1414, + "step": 9244 + }, + { + "epoch": 0.31, + "grad_norm": 0.7081068754196167, + "learning_rate": 1.5982348190251974e-05, + "loss": 2.1379, + "step": 9245 + }, + { + "epoch": 0.31, + "grad_norm": 0.7376105785369873, + "learning_rate": 1.5981496438191606e-05, + "loss": 2.0912, + "step": 9246 + }, + { + "epoch": 0.31, + "grad_norm": 0.7500844597816467, + "learning_rate": 1.5980644618555796e-05, + "loss": 2.1541, + "step": 9247 + }, + { + "epoch": 0.31, + "grad_norm": 0.6969217658042908, + "learning_rate": 1.597979273135417e-05, + "loss": 2.1342, + "step": 9248 + }, + { + "epoch": 0.31, + "grad_norm": 0.6875256896018982, + "learning_rate": 1.597894077659635e-05, + "loss": 2.0596, + "step": 9249 + }, + { + "epoch": 0.31, + "grad_norm": 0.732375979423523, + "learning_rate": 1.597808875429196e-05, + "loss": 2.1545, + "step": 9250 + }, + { + "epoch": 0.31, + "grad_norm": 0.7435402274131775, + "learning_rate": 1.5977236664450627e-05, + "loss": 2.0905, + "step": 9251 + }, + { + "epoch": 0.31, + "grad_norm": 0.7759851217269897, + "learning_rate": 1.5976384507081974e-05, + "loss": 2.1992, + "step": 9252 + }, + { + "epoch": 0.31, + "grad_norm": 0.7100881338119507, + "learning_rate": 1.5975532282195637e-05, + "loss": 2.1343, + "step": 9253 + }, + { + "epoch": 0.31, + "grad_norm": 0.723523736000061, + "learning_rate": 1.5974679989801235e-05, + "loss": 2.152, + "step": 9254 + }, + { + "epoch": 0.31, + "grad_norm": 0.7145118713378906, + "learning_rate": 1.59738276299084e-05, + "loss": 2.1073, + "step": 9255 + }, + { + "epoch": 0.31, + "grad_norm": 0.746797502040863, + "learning_rate": 1.5972975202526763e-05, + "loss": 2.1959, + "step": 9256 + }, + { + "epoch": 0.31, + "grad_norm": 0.7300235629081726, + "learning_rate": 1.597212270766595e-05, + "loss": 2.1183, + "step": 9257 + }, + { + "epoch": 0.31, + "grad_norm": 0.7386276721954346, + "learning_rate": 1.59712701453356e-05, + "loss": 2.1796, + "step": 9258 + }, + { + "epoch": 0.31, + "grad_norm": 0.7125728726387024, + "learning_rate": 1.5970417515545335e-05, + "loss": 2.0942, + "step": 9259 + }, + { + "epoch": 0.31, + "grad_norm": 0.6984363794326782, + "learning_rate": 1.5969564818304793e-05, + "loss": 2.1497, + "step": 9260 + }, + { + "epoch": 0.31, + "grad_norm": 0.7038816213607788, + "learning_rate": 1.5968712053623604e-05, + "loss": 2.1059, + "step": 9261 + }, + { + "epoch": 0.31, + "grad_norm": 0.7059393525123596, + "learning_rate": 1.5967859221511406e-05, + "loss": 2.1676, + "step": 9262 + }, + { + "epoch": 0.31, + "grad_norm": 0.718723475933075, + "learning_rate": 1.5967006321977834e-05, + "loss": 2.1623, + "step": 9263 + }, + { + "epoch": 0.31, + "grad_norm": 0.7173232436180115, + "learning_rate": 1.596615335503252e-05, + "loss": 2.1257, + "step": 9264 + }, + { + "epoch": 0.31, + "grad_norm": 0.7239296436309814, + "learning_rate": 1.59653003206851e-05, + "loss": 2.0621, + "step": 9265 + }, + { + "epoch": 0.31, + "grad_norm": 0.7134047150611877, + "learning_rate": 1.5964447218945214e-05, + "loss": 2.1374, + "step": 9266 + }, + { + "epoch": 0.31, + "grad_norm": 0.7569999098777771, + "learning_rate": 1.59635940498225e-05, + "loss": 2.1341, + "step": 9267 + }, + { + "epoch": 0.31, + "grad_norm": 0.7099441885948181, + "learning_rate": 1.5962740813326597e-05, + "loss": 2.1652, + "step": 9268 + }, + { + "epoch": 0.31, + "grad_norm": 0.7193514108657837, + "learning_rate": 1.596188750946714e-05, + "loss": 2.1912, + "step": 9269 + }, + { + "epoch": 0.31, + "grad_norm": 0.7716888189315796, + "learning_rate": 1.596103413825377e-05, + "loss": 2.1129, + "step": 9270 + }, + { + "epoch": 0.31, + "grad_norm": 0.7170916795730591, + "learning_rate": 1.5960180699696132e-05, + "loss": 2.1951, + "step": 9271 + }, + { + "epoch": 0.31, + "grad_norm": 0.7163340449333191, + "learning_rate": 1.5959327193803868e-05, + "loss": 2.1645, + "step": 9272 + }, + { + "epoch": 0.31, + "grad_norm": 0.7287588715553284, + "learning_rate": 1.5958473620586613e-05, + "loss": 2.1258, + "step": 9273 + }, + { + "epoch": 0.31, + "grad_norm": 0.7344598770141602, + "learning_rate": 1.595761998005402e-05, + "loss": 2.1368, + "step": 9274 + }, + { + "epoch": 0.31, + "grad_norm": 0.7135640978813171, + "learning_rate": 1.5956766272215725e-05, + "loss": 2.1053, + "step": 9275 + }, + { + "epoch": 0.31, + "grad_norm": 0.7311667203903198, + "learning_rate": 1.5955912497081377e-05, + "loss": 2.1348, + "step": 9276 + }, + { + "epoch": 0.31, + "grad_norm": 0.7427075505256653, + "learning_rate": 1.595505865466062e-05, + "loss": 2.099, + "step": 9277 + }, + { + "epoch": 0.31, + "grad_norm": 0.7743737101554871, + "learning_rate": 1.59542047449631e-05, + "loss": 2.0539, + "step": 9278 + }, + { + "epoch": 0.31, + "grad_norm": 0.7061465978622437, + "learning_rate": 1.5953350767998463e-05, + "loss": 2.1601, + "step": 9279 + }, + { + "epoch": 0.31, + "grad_norm": 0.7167195081710815, + "learning_rate": 1.5952496723776364e-05, + "loss": 2.1161, + "step": 9280 + }, + { + "epoch": 0.31, + "grad_norm": 0.6850878596305847, + "learning_rate": 1.5951642612306436e-05, + "loss": 2.1017, + "step": 9281 + }, + { + "epoch": 0.31, + "grad_norm": 0.713681697845459, + "learning_rate": 1.5950788433598345e-05, + "loss": 2.0973, + "step": 9282 + }, + { + "epoch": 0.31, + "grad_norm": 0.6899462938308716, + "learning_rate": 1.594993418766173e-05, + "loss": 2.0692, + "step": 9283 + }, + { + "epoch": 0.31, + "grad_norm": 0.7102536559104919, + "learning_rate": 1.5949079874506246e-05, + "loss": 2.1644, + "step": 9284 + }, + { + "epoch": 0.31, + "grad_norm": 0.7040597200393677, + "learning_rate": 1.5948225494141545e-05, + "loss": 2.1001, + "step": 9285 + }, + { + "epoch": 0.31, + "grad_norm": 0.7178003787994385, + "learning_rate": 1.5947371046577278e-05, + "loss": 2.1227, + "step": 9286 + }, + { + "epoch": 0.31, + "grad_norm": 0.7293294072151184, + "learning_rate": 1.5946516531823098e-05, + "loss": 2.1198, + "step": 9287 + }, + { + "epoch": 0.31, + "grad_norm": 0.7625541687011719, + "learning_rate": 1.594566194988866e-05, + "loss": 2.1493, + "step": 9288 + }, + { + "epoch": 0.31, + "grad_norm": 0.7138282656669617, + "learning_rate": 1.594480730078362e-05, + "loss": 2.1141, + "step": 9289 + }, + { + "epoch": 0.31, + "grad_norm": 0.7239620685577393, + "learning_rate": 1.5943952584517627e-05, + "loss": 2.1815, + "step": 9290 + }, + { + "epoch": 0.31, + "grad_norm": 0.7259666323661804, + "learning_rate": 1.5943097801100344e-05, + "loss": 2.0702, + "step": 9291 + }, + { + "epoch": 0.31, + "grad_norm": 0.7165936231613159, + "learning_rate": 1.5942242950541423e-05, + "loss": 2.1533, + "step": 9292 + }, + { + "epoch": 0.31, + "grad_norm": 0.7234683632850647, + "learning_rate": 1.5941388032850526e-05, + "loss": 2.1468, + "step": 9293 + }, + { + "epoch": 0.31, + "grad_norm": 0.7195634245872498, + "learning_rate": 1.5940533048037306e-05, + "loss": 2.1009, + "step": 9294 + }, + { + "epoch": 0.31, + "grad_norm": 0.7426090836524963, + "learning_rate": 1.593967799611143e-05, + "loss": 2.1868, + "step": 9295 + }, + { + "epoch": 0.31, + "grad_norm": 0.735403299331665, + "learning_rate": 1.593882287708255e-05, + "loss": 2.0949, + "step": 9296 + }, + { + "epoch": 0.31, + "grad_norm": 0.699145495891571, + "learning_rate": 1.593796769096033e-05, + "loss": 2.0949, + "step": 9297 + }, + { + "epoch": 0.31, + "grad_norm": 0.7040101885795593, + "learning_rate": 1.593711243775443e-05, + "loss": 2.1892, + "step": 9298 + }, + { + "epoch": 0.31, + "grad_norm": 0.7026491761207581, + "learning_rate": 1.5936257117474512e-05, + "loss": 2.0946, + "step": 9299 + }, + { + "epoch": 0.31, + "grad_norm": 0.7546688914299011, + "learning_rate": 1.5935401730130243e-05, + "loss": 2.1376, + "step": 9300 + }, + { + "epoch": 0.31, + "grad_norm": 0.706497073173523, + "learning_rate": 1.593454627573128e-05, + "loss": 2.1687, + "step": 9301 + }, + { + "epoch": 0.31, + "grad_norm": 0.711330771446228, + "learning_rate": 1.5933690754287297e-05, + "loss": 2.1293, + "step": 9302 + }, + { + "epoch": 0.31, + "grad_norm": 0.6929452419281006, + "learning_rate": 1.5932835165807946e-05, + "loss": 2.1038, + "step": 9303 + }, + { + "epoch": 0.31, + "grad_norm": 0.749926745891571, + "learning_rate": 1.5931979510302905e-05, + "loss": 2.2167, + "step": 9304 + }, + { + "epoch": 0.31, + "grad_norm": 0.7023991942405701, + "learning_rate": 1.5931123787781837e-05, + "loss": 2.1228, + "step": 9305 + }, + { + "epoch": 0.31, + "grad_norm": 0.7499391436576843, + "learning_rate": 1.593026799825441e-05, + "loss": 2.1255, + "step": 9306 + }, + { + "epoch": 0.31, + "grad_norm": 0.7123939990997314, + "learning_rate": 1.5929412141730286e-05, + "loss": 2.1516, + "step": 9307 + }, + { + "epoch": 0.31, + "grad_norm": 0.699347198009491, + "learning_rate": 1.5928556218219138e-05, + "loss": 2.0914, + "step": 9308 + }, + { + "epoch": 0.31, + "grad_norm": 0.6875589489936829, + "learning_rate": 1.592770022773064e-05, + "loss": 2.1464, + "step": 9309 + }, + { + "epoch": 0.31, + "grad_norm": 0.7070686221122742, + "learning_rate": 1.5926844170274454e-05, + "loss": 2.1166, + "step": 9310 + }, + { + "epoch": 0.31, + "grad_norm": 0.7435624599456787, + "learning_rate": 1.592598804586026e-05, + "loss": 2.0973, + "step": 9311 + }, + { + "epoch": 0.31, + "grad_norm": 0.7203469276428223, + "learning_rate": 1.5925131854497722e-05, + "loss": 2.1745, + "step": 9312 + }, + { + "epoch": 0.31, + "grad_norm": 0.7259249687194824, + "learning_rate": 1.5924275596196524e-05, + "loss": 2.0451, + "step": 9313 + }, + { + "epoch": 0.31, + "grad_norm": 0.7620490789413452, + "learning_rate": 1.5923419270966327e-05, + "loss": 2.1113, + "step": 9314 + }, + { + "epoch": 0.31, + "grad_norm": 0.7011623382568359, + "learning_rate": 1.5922562878816813e-05, + "loss": 2.0573, + "step": 9315 + }, + { + "epoch": 0.31, + "grad_norm": 0.7292693853378296, + "learning_rate": 1.5921706419757653e-05, + "loss": 2.0924, + "step": 9316 + }, + { + "epoch": 0.31, + "grad_norm": 0.7191295623779297, + "learning_rate": 1.5920849893798523e-05, + "loss": 2.1491, + "step": 9317 + }, + { + "epoch": 0.31, + "grad_norm": 0.6986382603645325, + "learning_rate": 1.5919993300949103e-05, + "loss": 2.1228, + "step": 9318 + }, + { + "epoch": 0.31, + "grad_norm": 0.7097615003585815, + "learning_rate": 1.591913664121907e-05, + "loss": 2.1283, + "step": 9319 + }, + { + "epoch": 0.31, + "grad_norm": 0.72479647397995, + "learning_rate": 1.5918279914618095e-05, + "loss": 2.1574, + "step": 9320 + }, + { + "epoch": 0.31, + "grad_norm": 0.6991228461265564, + "learning_rate": 1.5917423121155863e-05, + "loss": 2.0836, + "step": 9321 + }, + { + "epoch": 0.31, + "grad_norm": 0.6978242993354797, + "learning_rate": 1.5916566260842058e-05, + "loss": 2.1033, + "step": 9322 + }, + { + "epoch": 0.31, + "grad_norm": 0.72382652759552, + "learning_rate": 1.5915709333686348e-05, + "loss": 2.1684, + "step": 9323 + }, + { + "epoch": 0.31, + "grad_norm": 0.711806058883667, + "learning_rate": 1.5914852339698424e-05, + "loss": 2.0954, + "step": 9324 + }, + { + "epoch": 0.31, + "grad_norm": 0.6966581344604492, + "learning_rate": 1.591399527888797e-05, + "loss": 2.0669, + "step": 9325 + }, + { + "epoch": 0.31, + "grad_norm": 0.7167718410491943, + "learning_rate": 1.5913138151264654e-05, + "loss": 2.1015, + "step": 9326 + }, + { + "epoch": 0.31, + "grad_norm": 0.7108865976333618, + "learning_rate": 1.5912280956838174e-05, + "loss": 2.0901, + "step": 9327 + }, + { + "epoch": 0.31, + "grad_norm": 0.7295517325401306, + "learning_rate": 1.5911423695618206e-05, + "loss": 2.1619, + "step": 9328 + }, + { + "epoch": 0.31, + "grad_norm": 0.6814781427383423, + "learning_rate": 1.591056636761444e-05, + "loss": 2.1046, + "step": 9329 + }, + { + "epoch": 0.31, + "grad_norm": 0.7195289134979248, + "learning_rate": 1.5909708972836554e-05, + "loss": 2.1368, + "step": 9330 + }, + { + "epoch": 0.31, + "grad_norm": 0.7272166013717651, + "learning_rate": 1.590885151129425e-05, + "loss": 2.135, + "step": 9331 + }, + { + "epoch": 0.31, + "grad_norm": 0.7215157747268677, + "learning_rate": 1.5907993982997194e-05, + "loss": 2.2216, + "step": 9332 + }, + { + "epoch": 0.31, + "grad_norm": 0.7225114703178406, + "learning_rate": 1.590713638795509e-05, + "loss": 2.1911, + "step": 9333 + }, + { + "epoch": 0.31, + "grad_norm": 0.7003817558288574, + "learning_rate": 1.5906278726177616e-05, + "loss": 2.1757, + "step": 9334 + }, + { + "epoch": 0.31, + "grad_norm": 0.7157624959945679, + "learning_rate": 1.590542099767447e-05, + "loss": 2.1128, + "step": 9335 + }, + { + "epoch": 0.31, + "grad_norm": 0.7406724691390991, + "learning_rate": 1.5904563202455337e-05, + "loss": 2.2628, + "step": 9336 + }, + { + "epoch": 0.31, + "grad_norm": 0.7400164008140564, + "learning_rate": 1.5903705340529913e-05, + "loss": 2.2008, + "step": 9337 + }, + { + "epoch": 0.31, + "grad_norm": 0.7527457475662231, + "learning_rate": 1.5902847411907885e-05, + "loss": 2.142, + "step": 9338 + }, + { + "epoch": 0.31, + "grad_norm": 0.7302672863006592, + "learning_rate": 1.5901989416598943e-05, + "loss": 2.0443, + "step": 9339 + }, + { + "epoch": 0.31, + "grad_norm": 0.713763952255249, + "learning_rate": 1.5901131354612787e-05, + "loss": 2.1518, + "step": 9340 + }, + { + "epoch": 0.31, + "grad_norm": 0.7410566806793213, + "learning_rate": 1.5900273225959104e-05, + "loss": 2.124, + "step": 9341 + }, + { + "epoch": 0.31, + "grad_norm": 0.7286756634712219, + "learning_rate": 1.5899415030647593e-05, + "loss": 2.1996, + "step": 9342 + }, + { + "epoch": 0.31, + "grad_norm": 0.7066284418106079, + "learning_rate": 1.589855676868795e-05, + "loss": 2.1599, + "step": 9343 + }, + { + "epoch": 0.31, + "grad_norm": 0.7200791239738464, + "learning_rate": 1.589769844008987e-05, + "loss": 2.1145, + "step": 9344 + }, + { + "epoch": 0.31, + "grad_norm": 0.7457669973373413, + "learning_rate": 1.5896840044863045e-05, + "loss": 2.2582, + "step": 9345 + }, + { + "epoch": 0.31, + "grad_norm": 0.694642961025238, + "learning_rate": 1.5895981583017182e-05, + "loss": 2.0973, + "step": 9346 + }, + { + "epoch": 0.31, + "grad_norm": 0.7158113718032837, + "learning_rate": 1.5895123054561976e-05, + "loss": 2.1097, + "step": 9347 + }, + { + "epoch": 0.31, + "grad_norm": 0.7374735474586487, + "learning_rate": 1.5894264459507116e-05, + "loss": 2.054, + "step": 9348 + }, + { + "epoch": 0.31, + "grad_norm": 0.7377456426620483, + "learning_rate": 1.589340579786232e-05, + "loss": 2.1702, + "step": 9349 + }, + { + "epoch": 0.31, + "grad_norm": 0.7287301421165466, + "learning_rate": 1.589254706963727e-05, + "loss": 2.0944, + "step": 9350 + }, + { + "epoch": 0.31, + "grad_norm": 0.7183813452720642, + "learning_rate": 1.5891688274841683e-05, + "loss": 2.1142, + "step": 9351 + }, + { + "epoch": 0.31, + "grad_norm": 0.7221638560295105, + "learning_rate": 1.5890829413485255e-05, + "loss": 2.1353, + "step": 9352 + }, + { + "epoch": 0.31, + "grad_norm": 0.699821412563324, + "learning_rate": 1.5889970485577687e-05, + "loss": 2.1422, + "step": 9353 + }, + { + "epoch": 0.31, + "grad_norm": 0.7150502800941467, + "learning_rate": 1.5889111491128683e-05, + "loss": 2.1305, + "step": 9354 + }, + { + "epoch": 0.31, + "grad_norm": 0.6975569725036621, + "learning_rate": 1.588825243014795e-05, + "loss": 2.0958, + "step": 9355 + }, + { + "epoch": 0.31, + "grad_norm": 0.7854176759719849, + "learning_rate": 1.588739330264519e-05, + "loss": 2.1754, + "step": 9356 + }, + { + "epoch": 0.31, + "grad_norm": 0.7363340854644775, + "learning_rate": 1.5886534108630113e-05, + "loss": 2.1417, + "step": 9357 + }, + { + "epoch": 0.31, + "grad_norm": 0.7523507475852966, + "learning_rate": 1.5885674848112425e-05, + "loss": 2.1669, + "step": 9358 + }, + { + "epoch": 0.31, + "grad_norm": 0.709601104259491, + "learning_rate": 1.588481552110183e-05, + "loss": 2.11, + "step": 9359 + }, + { + "epoch": 0.31, + "grad_norm": 0.7014585733413696, + "learning_rate": 1.5883956127608036e-05, + "loss": 2.0255, + "step": 9360 + }, + { + "epoch": 0.31, + "grad_norm": 0.7283008098602295, + "learning_rate": 1.5883096667640757e-05, + "loss": 2.1471, + "step": 9361 + }, + { + "epoch": 0.31, + "grad_norm": 0.7762390971183777, + "learning_rate": 1.58822371412097e-05, + "loss": 2.192, + "step": 9362 + }, + { + "epoch": 0.31, + "grad_norm": 0.7231695055961609, + "learning_rate": 1.5881377548324573e-05, + "loss": 2.1392, + "step": 9363 + }, + { + "epoch": 0.31, + "grad_norm": 0.7393805384635925, + "learning_rate": 1.588051788899509e-05, + "loss": 2.1102, + "step": 9364 + }, + { + "epoch": 0.31, + "grad_norm": 0.7337321639060974, + "learning_rate": 1.5879658163230962e-05, + "loss": 2.1958, + "step": 9365 + }, + { + "epoch": 0.31, + "grad_norm": 0.7064790725708008, + "learning_rate": 1.5878798371041904e-05, + "loss": 2.135, + "step": 9366 + }, + { + "epoch": 0.31, + "grad_norm": 0.6932360529899597, + "learning_rate": 1.5877938512437623e-05, + "loss": 2.0814, + "step": 9367 + }, + { + "epoch": 0.31, + "grad_norm": 0.7264076471328735, + "learning_rate": 1.5877078587427843e-05, + "loss": 2.1409, + "step": 9368 + }, + { + "epoch": 0.31, + "grad_norm": 0.7129030227661133, + "learning_rate": 1.5876218596022267e-05, + "loss": 2.1248, + "step": 9369 + }, + { + "epoch": 0.31, + "grad_norm": 0.7282942533493042, + "learning_rate": 1.5875358538230622e-05, + "loss": 2.1388, + "step": 9370 + }, + { + "epoch": 0.31, + "grad_norm": 0.744226336479187, + "learning_rate": 1.5874498414062617e-05, + "loss": 2.1115, + "step": 9371 + }, + { + "epoch": 0.31, + "grad_norm": 0.7208715677261353, + "learning_rate": 1.5873638223527974e-05, + "loss": 2.0923, + "step": 9372 + }, + { + "epoch": 0.31, + "grad_norm": 0.742484986782074, + "learning_rate": 1.5872777966636407e-05, + "loss": 2.1057, + "step": 9373 + }, + { + "epoch": 0.31, + "grad_norm": 0.7470643520355225, + "learning_rate": 1.5871917643397637e-05, + "loss": 2.1095, + "step": 9374 + }, + { + "epoch": 0.31, + "grad_norm": 0.7380080819129944, + "learning_rate": 1.587105725382138e-05, + "loss": 2.1459, + "step": 9375 + }, + { + "epoch": 0.31, + "grad_norm": 0.7264449596405029, + "learning_rate": 1.5870196797917364e-05, + "loss": 2.1382, + "step": 9376 + }, + { + "epoch": 0.31, + "grad_norm": 0.6858323812484741, + "learning_rate": 1.58693362756953e-05, + "loss": 2.0887, + "step": 9377 + }, + { + "epoch": 0.31, + "grad_norm": 0.7045590877532959, + "learning_rate": 1.586847568716492e-05, + "loss": 2.0999, + "step": 9378 + }, + { + "epoch": 0.31, + "grad_norm": 0.7243896126747131, + "learning_rate": 1.5867615032335938e-05, + "loss": 2.1392, + "step": 9379 + }, + { + "epoch": 0.31, + "grad_norm": 0.709012508392334, + "learning_rate": 1.5866754311218078e-05, + "loss": 2.1841, + "step": 9380 + }, + { + "epoch": 0.31, + "grad_norm": 0.7338648438453674, + "learning_rate": 1.586589352382107e-05, + "loss": 2.1355, + "step": 9381 + }, + { + "epoch": 0.31, + "grad_norm": 0.7011011242866516, + "learning_rate": 1.586503267015463e-05, + "loss": 2.0781, + "step": 9382 + }, + { + "epoch": 0.31, + "grad_norm": 0.7398859858512878, + "learning_rate": 1.5864171750228493e-05, + "loss": 2.1129, + "step": 9383 + }, + { + "epoch": 0.31, + "grad_norm": 0.7238042950630188, + "learning_rate": 1.5863310764052377e-05, + "loss": 2.0833, + "step": 9384 + }, + { + "epoch": 0.31, + "grad_norm": 0.748180627822876, + "learning_rate": 1.5862449711636015e-05, + "loss": 2.0895, + "step": 9385 + }, + { + "epoch": 0.31, + "grad_norm": 0.7103529572486877, + "learning_rate": 1.586158859298913e-05, + "loss": 2.0477, + "step": 9386 + }, + { + "epoch": 0.31, + "grad_norm": 0.7032710313796997, + "learning_rate": 1.5860727408121453e-05, + "loss": 2.1162, + "step": 9387 + }, + { + "epoch": 0.31, + "grad_norm": 0.7389243245124817, + "learning_rate": 1.5859866157042708e-05, + "loss": 2.1359, + "step": 9388 + }, + { + "epoch": 0.31, + "grad_norm": 0.7248475551605225, + "learning_rate": 1.5859004839762636e-05, + "loss": 2.1971, + "step": 9389 + }, + { + "epoch": 0.31, + "grad_norm": 0.7445863485336304, + "learning_rate": 1.5858143456290957e-05, + "loss": 2.0935, + "step": 9390 + }, + { + "epoch": 0.31, + "grad_norm": 0.7575012445449829, + "learning_rate": 1.585728200663741e-05, + "loss": 2.1435, + "step": 9391 + }, + { + "epoch": 0.31, + "grad_norm": 0.7120488286018372, + "learning_rate": 1.585642049081172e-05, + "loss": 2.069, + "step": 9392 + }, + { + "epoch": 0.31, + "grad_norm": 0.7273603081703186, + "learning_rate": 1.5855558908823624e-05, + "loss": 2.1504, + "step": 9393 + }, + { + "epoch": 0.31, + "grad_norm": 0.6936109066009521, + "learning_rate": 1.5854697260682857e-05, + "loss": 2.1565, + "step": 9394 + }, + { + "epoch": 0.31, + "grad_norm": 0.7172167301177979, + "learning_rate": 1.585383554639915e-05, + "loss": 2.2064, + "step": 9395 + }, + { + "epoch": 0.31, + "grad_norm": 0.7418685555458069, + "learning_rate": 1.585297376598224e-05, + "loss": 2.022, + "step": 9396 + }, + { + "epoch": 0.31, + "grad_norm": 0.7625650763511658, + "learning_rate": 1.5852111919441867e-05, + "loss": 2.1119, + "step": 9397 + }, + { + "epoch": 0.31, + "grad_norm": 0.7353043556213379, + "learning_rate": 1.5851250006787754e-05, + "loss": 2.1042, + "step": 9398 + }, + { + "epoch": 0.31, + "grad_norm": 0.741849422454834, + "learning_rate": 1.5850388028029656e-05, + "loss": 2.0738, + "step": 9399 + }, + { + "epoch": 0.31, + "grad_norm": 0.6942260265350342, + "learning_rate": 1.5849525983177298e-05, + "loss": 2.1395, + "step": 9400 + }, + { + "epoch": 0.31, + "grad_norm": 0.7396000027656555, + "learning_rate": 1.5848663872240426e-05, + "loss": 2.1977, + "step": 9401 + }, + { + "epoch": 0.31, + "grad_norm": 0.7268931269645691, + "learning_rate": 1.5847801695228773e-05, + "loss": 2.1354, + "step": 9402 + }, + { + "epoch": 0.31, + "grad_norm": 0.6917301416397095, + "learning_rate": 1.5846939452152092e-05, + "loss": 2.0879, + "step": 9403 + }, + { + "epoch": 0.31, + "grad_norm": 0.7451241612434387, + "learning_rate": 1.5846077143020108e-05, + "loss": 2.0965, + "step": 9404 + }, + { + "epoch": 0.31, + "grad_norm": 0.7706646919250488, + "learning_rate": 1.584521476784257e-05, + "loss": 2.0862, + "step": 9405 + }, + { + "epoch": 0.31, + "grad_norm": 0.6801823377609253, + "learning_rate": 1.584435232662923e-05, + "loss": 2.0701, + "step": 9406 + }, + { + "epoch": 0.31, + "grad_norm": 0.7284613847732544, + "learning_rate": 1.5843489819389815e-05, + "loss": 2.1161, + "step": 9407 + }, + { + "epoch": 0.31, + "grad_norm": 0.7244563698768616, + "learning_rate": 1.5842627246134077e-05, + "loss": 2.1431, + "step": 9408 + }, + { + "epoch": 0.31, + "grad_norm": 0.7015225887298584, + "learning_rate": 1.5841764606871764e-05, + "loss": 2.1401, + "step": 9409 + }, + { + "epoch": 0.31, + "grad_norm": 0.7111163139343262, + "learning_rate": 1.5840901901612612e-05, + "loss": 2.1635, + "step": 9410 + }, + { + "epoch": 0.31, + "grad_norm": 0.7268586158752441, + "learning_rate": 1.584003913036638e-05, + "loss": 2.0753, + "step": 9411 + }, + { + "epoch": 0.31, + "grad_norm": 0.7158044576644897, + "learning_rate": 1.5839176293142802e-05, + "loss": 2.1375, + "step": 9412 + }, + { + "epoch": 0.31, + "grad_norm": 0.6889114379882812, + "learning_rate": 1.583831338995164e-05, + "loss": 2.108, + "step": 9413 + }, + { + "epoch": 0.31, + "grad_norm": 0.6827769875526428, + "learning_rate": 1.5837450420802633e-05, + "loss": 2.0551, + "step": 9414 + }, + { + "epoch": 0.31, + "grad_norm": 0.6834708452224731, + "learning_rate": 1.583658738570553e-05, + "loss": 2.1315, + "step": 9415 + }, + { + "epoch": 0.31, + "grad_norm": 0.7518619894981384, + "learning_rate": 1.583572428467008e-05, + "loss": 2.0399, + "step": 9416 + }, + { + "epoch": 0.31, + "grad_norm": 0.7287269234657288, + "learning_rate": 1.5834861117706043e-05, + "loss": 2.2061, + "step": 9417 + }, + { + "epoch": 0.31, + "grad_norm": 0.6860605478286743, + "learning_rate": 1.5833997884823166e-05, + "loss": 2.0921, + "step": 9418 + }, + { + "epoch": 0.31, + "grad_norm": 0.6967748999595642, + "learning_rate": 1.5833134586031192e-05, + "loss": 2.0759, + "step": 9419 + }, + { + "epoch": 0.31, + "grad_norm": 0.7458381056785583, + "learning_rate": 1.583227122133989e-05, + "loss": 2.176, + "step": 9420 + }, + { + "epoch": 0.31, + "grad_norm": 0.7498286962509155, + "learning_rate": 1.5831407790759e-05, + "loss": 2.1065, + "step": 9421 + }, + { + "epoch": 0.31, + "grad_norm": 0.7366600036621094, + "learning_rate": 1.5830544294298285e-05, + "loss": 2.1394, + "step": 9422 + }, + { + "epoch": 0.31, + "grad_norm": 0.7680703401565552, + "learning_rate": 1.5829680731967496e-05, + "loss": 2.1454, + "step": 9423 + }, + { + "epoch": 0.31, + "grad_norm": 0.7394464612007141, + "learning_rate": 1.5828817103776393e-05, + "loss": 2.1434, + "step": 9424 + }, + { + "epoch": 0.31, + "grad_norm": 0.7061243653297424, + "learning_rate": 1.582795340973473e-05, + "loss": 2.1018, + "step": 9425 + }, + { + "epoch": 0.31, + "grad_norm": 0.7081742882728577, + "learning_rate": 1.5827089649852262e-05, + "loss": 2.1638, + "step": 9426 + }, + { + "epoch": 0.31, + "grad_norm": 0.7141790986061096, + "learning_rate": 1.582622582413875e-05, + "loss": 2.0311, + "step": 9427 + }, + { + "epoch": 0.31, + "grad_norm": 0.7654913067817688, + "learning_rate": 1.582536193260396e-05, + "loss": 2.058, + "step": 9428 + }, + { + "epoch": 0.31, + "grad_norm": 0.7559537887573242, + "learning_rate": 1.5824497975257638e-05, + "loss": 2.0857, + "step": 9429 + }, + { + "epoch": 0.31, + "grad_norm": 0.7193070650100708, + "learning_rate": 1.5823633952109555e-05, + "loss": 2.1137, + "step": 9430 + }, + { + "epoch": 0.31, + "grad_norm": 0.6875612735748291, + "learning_rate": 1.5822769863169466e-05, + "loss": 2.1259, + "step": 9431 + }, + { + "epoch": 0.31, + "grad_norm": 0.7018385529518127, + "learning_rate": 1.5821905708447138e-05, + "loss": 2.1033, + "step": 9432 + }, + { + "epoch": 0.31, + "grad_norm": 0.736066997051239, + "learning_rate": 1.582104148795233e-05, + "loss": 2.1127, + "step": 9433 + }, + { + "epoch": 0.31, + "grad_norm": 0.7211173176765442, + "learning_rate": 1.5820177201694806e-05, + "loss": 2.1441, + "step": 9434 + }, + { + "epoch": 0.31, + "grad_norm": 0.7701630592346191, + "learning_rate": 1.5819312849684332e-05, + "loss": 2.2309, + "step": 9435 + }, + { + "epoch": 0.31, + "grad_norm": 0.7922909259796143, + "learning_rate": 1.5818448431930674e-05, + "loss": 2.1249, + "step": 9436 + }, + { + "epoch": 0.31, + "grad_norm": 0.718798816204071, + "learning_rate": 1.581758394844359e-05, + "loss": 2.149, + "step": 9437 + }, + { + "epoch": 0.31, + "grad_norm": 0.7408843040466309, + "learning_rate": 1.5816719399232855e-05, + "loss": 2.1312, + "step": 9438 + }, + { + "epoch": 0.31, + "grad_norm": 0.7309269309043884, + "learning_rate": 1.5815854784308233e-05, + "loss": 2.0993, + "step": 9439 + }, + { + "epoch": 0.31, + "grad_norm": 0.7137578129768372, + "learning_rate": 1.5814990103679492e-05, + "loss": 2.129, + "step": 9440 + }, + { + "epoch": 0.31, + "grad_norm": 0.7637180685997009, + "learning_rate": 1.5814125357356404e-05, + "loss": 2.1656, + "step": 9441 + }, + { + "epoch": 0.31, + "grad_norm": 0.6977443695068359, + "learning_rate": 1.581326054534873e-05, + "loss": 2.1297, + "step": 9442 + }, + { + "epoch": 0.31, + "grad_norm": 0.7001411318778992, + "learning_rate": 1.581239566766625e-05, + "loss": 2.1991, + "step": 9443 + }, + { + "epoch": 0.31, + "grad_norm": 0.7499983906745911, + "learning_rate": 1.5811530724318725e-05, + "loss": 2.0963, + "step": 9444 + }, + { + "epoch": 0.31, + "grad_norm": 0.7667697072029114, + "learning_rate": 1.5810665715315934e-05, + "loss": 2.1679, + "step": 9445 + }, + { + "epoch": 0.31, + "grad_norm": 0.7213220000267029, + "learning_rate": 1.5809800640667646e-05, + "loss": 2.0904, + "step": 9446 + }, + { + "epoch": 0.31, + "grad_norm": 0.7774108648300171, + "learning_rate": 1.580893550038364e-05, + "loss": 2.1252, + "step": 9447 + }, + { + "epoch": 0.31, + "grad_norm": 0.757612943649292, + "learning_rate": 1.5808070294473684e-05, + "loss": 2.1299, + "step": 9448 + }, + { + "epoch": 0.31, + "grad_norm": 0.726270854473114, + "learning_rate": 1.5807205022947546e-05, + "loss": 2.1325, + "step": 9449 + }, + { + "epoch": 0.31, + "grad_norm": 0.7092653512954712, + "learning_rate": 1.5806339685815016e-05, + "loss": 2.096, + "step": 9450 + }, + { + "epoch": 0.31, + "grad_norm": 0.7162549495697021, + "learning_rate": 1.5805474283085863e-05, + "loss": 2.1713, + "step": 9451 + }, + { + "epoch": 0.31, + "grad_norm": 0.7511173486709595, + "learning_rate": 1.5804608814769862e-05, + "loss": 2.1674, + "step": 9452 + }, + { + "epoch": 0.31, + "grad_norm": 0.7152876853942871, + "learning_rate": 1.5803743280876793e-05, + "loss": 2.1214, + "step": 9453 + }, + { + "epoch": 0.31, + "grad_norm": 0.742279052734375, + "learning_rate": 1.5802877681416435e-05, + "loss": 2.2081, + "step": 9454 + }, + { + "epoch": 0.31, + "grad_norm": 0.7373721599578857, + "learning_rate": 1.5802012016398563e-05, + "loss": 2.1039, + "step": 9455 + }, + { + "epoch": 0.31, + "grad_norm": 0.7521692514419556, + "learning_rate": 1.580114628583296e-05, + "loss": 2.1895, + "step": 9456 + }, + { + "epoch": 0.31, + "grad_norm": 0.7203809022903442, + "learning_rate": 1.580028048972941e-05, + "loss": 2.1184, + "step": 9457 + }, + { + "epoch": 0.31, + "grad_norm": 0.6909192800521851, + "learning_rate": 1.5799414628097687e-05, + "loss": 2.1398, + "step": 9458 + }, + { + "epoch": 0.31, + "grad_norm": 0.7108280658721924, + "learning_rate": 1.579854870094758e-05, + "loss": 2.1717, + "step": 9459 + }, + { + "epoch": 0.31, + "grad_norm": 0.7120805978775024, + "learning_rate": 1.5797682708288863e-05, + "loss": 2.1169, + "step": 9460 + }, + { + "epoch": 0.31, + "grad_norm": 0.7278050780296326, + "learning_rate": 1.5796816650131324e-05, + "loss": 2.2593, + "step": 9461 + }, + { + "epoch": 0.31, + "grad_norm": 0.7110636830329895, + "learning_rate": 1.579595052648475e-05, + "loss": 2.1484, + "step": 9462 + }, + { + "epoch": 0.31, + "grad_norm": 0.7246778011322021, + "learning_rate": 1.5795084337358925e-05, + "loss": 2.1747, + "step": 9463 + }, + { + "epoch": 0.31, + "grad_norm": 0.7256181836128235, + "learning_rate": 1.579421808276363e-05, + "loss": 2.151, + "step": 9464 + }, + { + "epoch": 0.31, + "grad_norm": 0.7386330366134644, + "learning_rate": 1.579335176270866e-05, + "loss": 2.1752, + "step": 9465 + }, + { + "epoch": 0.31, + "grad_norm": 0.7566443681716919, + "learning_rate": 1.5792485377203793e-05, + "loss": 2.1853, + "step": 9466 + }, + { + "epoch": 0.31, + "grad_norm": 0.7201268076896667, + "learning_rate": 1.579161892625882e-05, + "loss": 2.0811, + "step": 9467 + }, + { + "epoch": 0.32, + "grad_norm": 0.7124336361885071, + "learning_rate": 1.5790752409883533e-05, + "loss": 2.0542, + "step": 9468 + }, + { + "epoch": 0.32, + "grad_norm": 0.7368142008781433, + "learning_rate": 1.578988582808772e-05, + "loss": 2.1207, + "step": 9469 + }, + { + "epoch": 0.32, + "grad_norm": 0.7141274213790894, + "learning_rate": 1.5789019180881168e-05, + "loss": 2.1098, + "step": 9470 + }, + { + "epoch": 0.32, + "grad_norm": 0.7566022872924805, + "learning_rate": 1.578815246827367e-05, + "loss": 2.1009, + "step": 9471 + }, + { + "epoch": 0.32, + "grad_norm": 0.7603344321250916, + "learning_rate": 1.5787285690275016e-05, + "loss": 2.1171, + "step": 9472 + }, + { + "epoch": 0.32, + "grad_norm": 0.733277440071106, + "learning_rate": 1.5786418846895007e-05, + "loss": 2.1771, + "step": 9473 + }, + { + "epoch": 0.32, + "grad_norm": 0.6996964812278748, + "learning_rate": 1.5785551938143422e-05, + "loss": 2.1505, + "step": 9474 + }, + { + "epoch": 0.32, + "grad_norm": 0.7126699686050415, + "learning_rate": 1.5784684964030065e-05, + "loss": 2.1401, + "step": 9475 + }, + { + "epoch": 0.32, + "grad_norm": 0.7585956454277039, + "learning_rate": 1.5783817924564725e-05, + "loss": 2.1734, + "step": 9476 + }, + { + "epoch": 0.32, + "grad_norm": 0.7639651894569397, + "learning_rate": 1.5782950819757203e-05, + "loss": 2.1451, + "step": 9477 + }, + { + "epoch": 0.32, + "grad_norm": 0.7288308143615723, + "learning_rate": 1.5782083649617292e-05, + "loss": 2.1835, + "step": 9478 + }, + { + "epoch": 0.32, + "grad_norm": 0.7138044238090515, + "learning_rate": 1.5781216414154785e-05, + "loss": 2.1379, + "step": 9479 + }, + { + "epoch": 0.32, + "grad_norm": 0.7208316922187805, + "learning_rate": 1.5780349113379483e-05, + "loss": 2.0688, + "step": 9480 + }, + { + "epoch": 0.32, + "grad_norm": 0.7289469242095947, + "learning_rate": 1.5779481747301186e-05, + "loss": 2.1293, + "step": 9481 + }, + { + "epoch": 0.32, + "grad_norm": 0.7546771168708801, + "learning_rate": 1.5778614315929693e-05, + "loss": 2.2088, + "step": 9482 + }, + { + "epoch": 0.32, + "grad_norm": 0.720140278339386, + "learning_rate": 1.57777468192748e-05, + "loss": 2.1878, + "step": 9483 + }, + { + "epoch": 0.32, + "grad_norm": 0.7224448323249817, + "learning_rate": 1.577687925734631e-05, + "loss": 2.177, + "step": 9484 + }, + { + "epoch": 0.32, + "grad_norm": 0.7290453314781189, + "learning_rate": 1.5776011630154023e-05, + "loss": 2.1306, + "step": 9485 + }, + { + "epoch": 0.32, + "grad_norm": 0.7161919474601746, + "learning_rate": 1.577514393770774e-05, + "loss": 2.2324, + "step": 9486 + }, + { + "epoch": 0.32, + "grad_norm": 0.7317053079605103, + "learning_rate": 1.577427618001727e-05, + "loss": 2.0505, + "step": 9487 + }, + { + "epoch": 0.32, + "grad_norm": 0.7158587574958801, + "learning_rate": 1.577340835709241e-05, + "loss": 2.1329, + "step": 9488 + }, + { + "epoch": 0.32, + "grad_norm": 0.7073089480400085, + "learning_rate": 1.5772540468942964e-05, + "loss": 2.1659, + "step": 9489 + }, + { + "epoch": 0.32, + "grad_norm": 0.7054910063743591, + "learning_rate": 1.5771672515578743e-05, + "loss": 2.1204, + "step": 9490 + }, + { + "epoch": 0.32, + "grad_norm": 0.715887188911438, + "learning_rate": 1.5770804497009544e-05, + "loss": 2.0894, + "step": 9491 + }, + { + "epoch": 0.32, + "grad_norm": 0.7075126767158508, + "learning_rate": 1.576993641324518e-05, + "loss": 2.1834, + "step": 9492 + }, + { + "epoch": 0.32, + "grad_norm": 0.7366071939468384, + "learning_rate": 1.5769068264295456e-05, + "loss": 2.1915, + "step": 9493 + }, + { + "epoch": 0.32, + "grad_norm": 0.7239155173301697, + "learning_rate": 1.5768200050170177e-05, + "loss": 2.1354, + "step": 9494 + }, + { + "epoch": 0.32, + "grad_norm": 0.7025246024131775, + "learning_rate": 1.5767331770879158e-05, + "loss": 2.1724, + "step": 9495 + }, + { + "epoch": 0.32, + "grad_norm": 0.7343201041221619, + "learning_rate": 1.5766463426432203e-05, + "loss": 2.131, + "step": 9496 + }, + { + "epoch": 0.32, + "grad_norm": 0.75413978099823, + "learning_rate": 1.5765595016839127e-05, + "loss": 2.1168, + "step": 9497 + }, + { + "epoch": 0.32, + "grad_norm": 0.7159507274627686, + "learning_rate": 1.5764726542109732e-05, + "loss": 2.2244, + "step": 9498 + }, + { + "epoch": 0.32, + "grad_norm": 0.717211127281189, + "learning_rate": 1.5763858002253838e-05, + "loss": 2.1286, + "step": 9499 + }, + { + "epoch": 0.32, + "grad_norm": 0.7077893614768982, + "learning_rate": 1.5762989397281252e-05, + "loss": 2.1646, + "step": 9500 + }, + { + "epoch": 0.32, + "grad_norm": 0.7396695017814636, + "learning_rate": 1.5762120727201794e-05, + "loss": 2.157, + "step": 9501 + }, + { + "epoch": 0.32, + "grad_norm": 0.7099971175193787, + "learning_rate": 1.576125199202527e-05, + "loss": 2.0659, + "step": 9502 + }, + { + "epoch": 0.32, + "grad_norm": 0.7169786095619202, + "learning_rate": 1.5760383191761496e-05, + "loss": 2.1055, + "step": 9503 + }, + { + "epoch": 0.32, + "grad_norm": 0.6997612714767456, + "learning_rate": 1.575951432642029e-05, + "loss": 2.1445, + "step": 9504 + }, + { + "epoch": 0.32, + "grad_norm": 0.7236524820327759, + "learning_rate": 1.5758645396011466e-05, + "loss": 2.1581, + "step": 9505 + }, + { + "epoch": 0.32, + "grad_norm": 0.710495114326477, + "learning_rate": 1.5757776400544843e-05, + "loss": 2.1879, + "step": 9506 + }, + { + "epoch": 0.32, + "grad_norm": 0.702593982219696, + "learning_rate": 1.575690734003023e-05, + "loss": 2.0816, + "step": 9507 + }, + { + "epoch": 0.32, + "grad_norm": 0.7106673121452332, + "learning_rate": 1.575603821447746e-05, + "loss": 2.0962, + "step": 9508 + }, + { + "epoch": 0.32, + "grad_norm": 0.6896476745605469, + "learning_rate": 1.575516902389634e-05, + "loss": 2.1703, + "step": 9509 + }, + { + "epoch": 0.32, + "grad_norm": 0.6999567747116089, + "learning_rate": 1.5754299768296695e-05, + "loss": 2.1081, + "step": 9510 + }, + { + "epoch": 0.32, + "grad_norm": 0.7676829695701599, + "learning_rate": 1.5753430447688344e-05, + "loss": 2.1694, + "step": 9511 + }, + { + "epoch": 0.32, + "grad_norm": 0.7136418223381042, + "learning_rate": 1.5752561062081104e-05, + "loss": 2.1243, + "step": 9512 + }, + { + "epoch": 0.32, + "grad_norm": 0.7588100433349609, + "learning_rate": 1.5751691611484803e-05, + "loss": 2.1602, + "step": 9513 + }, + { + "epoch": 0.32, + "grad_norm": 0.7047775387763977, + "learning_rate": 1.5750822095909258e-05, + "loss": 2.1807, + "step": 9514 + }, + { + "epoch": 0.32, + "grad_norm": 0.7533066272735596, + "learning_rate": 1.57499525153643e-05, + "loss": 2.1384, + "step": 9515 + }, + { + "epoch": 0.32, + "grad_norm": 0.7027696967124939, + "learning_rate": 1.5749082869859745e-05, + "loss": 2.0984, + "step": 9516 + }, + { + "epoch": 0.32, + "grad_norm": 0.6989158987998962, + "learning_rate": 1.5748213159405422e-05, + "loss": 2.1355, + "step": 9517 + }, + { + "epoch": 0.32, + "grad_norm": 0.7062950134277344, + "learning_rate": 1.5747343384011157e-05, + "loss": 2.1985, + "step": 9518 + }, + { + "epoch": 0.32, + "grad_norm": 0.6866962313652039, + "learning_rate": 1.5746473543686775e-05, + "loss": 2.145, + "step": 9519 + }, + { + "epoch": 0.32, + "grad_norm": 0.7092107534408569, + "learning_rate": 1.57456036384421e-05, + "loss": 2.1825, + "step": 9520 + }, + { + "epoch": 0.32, + "grad_norm": 0.7087661623954773, + "learning_rate": 1.574473366828696e-05, + "loss": 2.1107, + "step": 9521 + }, + { + "epoch": 0.32, + "grad_norm": 0.7195688486099243, + "learning_rate": 1.574386363323119e-05, + "loss": 2.0866, + "step": 9522 + }, + { + "epoch": 0.32, + "grad_norm": 0.6997634172439575, + "learning_rate": 1.5742993533284614e-05, + "loss": 2.145, + "step": 9523 + }, + { + "epoch": 0.32, + "grad_norm": 0.7220474481582642, + "learning_rate": 1.5742123368457065e-05, + "loss": 2.1756, + "step": 9524 + }, + { + "epoch": 0.32, + "grad_norm": 0.7579584717750549, + "learning_rate": 1.574125313875837e-05, + "loss": 2.0839, + "step": 9525 + }, + { + "epoch": 0.32, + "grad_norm": 0.7131385803222656, + "learning_rate": 1.574038284419836e-05, + "loss": 2.1274, + "step": 9526 + }, + { + "epoch": 0.32, + "grad_norm": 0.7348940968513489, + "learning_rate": 1.5739512484786866e-05, + "loss": 2.0994, + "step": 9527 + }, + { + "epoch": 0.32, + "grad_norm": 0.749491810798645, + "learning_rate": 1.573864206053373e-05, + "loss": 2.1788, + "step": 9528 + }, + { + "epoch": 0.32, + "grad_norm": 0.7153084874153137, + "learning_rate": 1.5737771571448776e-05, + "loss": 2.1363, + "step": 9529 + }, + { + "epoch": 0.32, + "grad_norm": 0.7272295355796814, + "learning_rate": 1.5736901017541843e-05, + "loss": 2.0987, + "step": 9530 + }, + { + "epoch": 0.32, + "grad_norm": 0.7216790318489075, + "learning_rate": 1.573603039882276e-05, + "loss": 2.2127, + "step": 9531 + }, + { + "epoch": 0.32, + "grad_norm": 0.7159500122070312, + "learning_rate": 1.5735159715301373e-05, + "loss": 2.0835, + "step": 9532 + }, + { + "epoch": 0.32, + "grad_norm": 0.7494335770606995, + "learning_rate": 1.5734288966987514e-05, + "loss": 2.0973, + "step": 9533 + }, + { + "epoch": 0.32, + "grad_norm": 0.7109895348548889, + "learning_rate": 1.5733418153891013e-05, + "loss": 2.0859, + "step": 9534 + }, + { + "epoch": 0.32, + "grad_norm": 0.7220141291618347, + "learning_rate": 1.5732547276021716e-05, + "loss": 2.1052, + "step": 9535 + }, + { + "epoch": 0.32, + "grad_norm": 0.7249950766563416, + "learning_rate": 1.573167633338946e-05, + "loss": 2.1341, + "step": 9536 + }, + { + "epoch": 0.32, + "grad_norm": 0.7163698077201843, + "learning_rate": 1.5730805326004085e-05, + "loss": 2.1211, + "step": 9537 + }, + { + "epoch": 0.32, + "grad_norm": 0.7331742644309998, + "learning_rate": 1.5729934253875425e-05, + "loss": 2.0906, + "step": 9538 + }, + { + "epoch": 0.32, + "grad_norm": 0.7258093953132629, + "learning_rate": 1.572906311701333e-05, + "loss": 2.1952, + "step": 9539 + }, + { + "epoch": 0.32, + "grad_norm": 0.7199559807777405, + "learning_rate": 1.5728191915427634e-05, + "loss": 2.1366, + "step": 9540 + }, + { + "epoch": 0.32, + "grad_norm": 0.7624881267547607, + "learning_rate": 1.5727320649128187e-05, + "loss": 2.1805, + "step": 9541 + }, + { + "epoch": 0.32, + "grad_norm": 0.7302795052528381, + "learning_rate": 1.572644931812483e-05, + "loss": 2.1467, + "step": 9542 + }, + { + "epoch": 0.32, + "grad_norm": 0.7440550923347473, + "learning_rate": 1.57255779224274e-05, + "loss": 2.2007, + "step": 9543 + }, + { + "epoch": 0.32, + "grad_norm": 0.7277077436447144, + "learning_rate": 1.5724706462045744e-05, + "loss": 2.1112, + "step": 9544 + }, + { + "epoch": 0.32, + "grad_norm": 0.7130835652351379, + "learning_rate": 1.5723834936989713e-05, + "loss": 2.148, + "step": 9545 + }, + { + "epoch": 0.32, + "grad_norm": 0.7161419987678528, + "learning_rate": 1.572296334726915e-05, + "loss": 2.1175, + "step": 9546 + }, + { + "epoch": 0.32, + "grad_norm": 0.7154002785682678, + "learning_rate": 1.5722091692893898e-05, + "loss": 2.1295, + "step": 9547 + }, + { + "epoch": 0.32, + "grad_norm": 0.7618816494941711, + "learning_rate": 1.572121997387381e-05, + "loss": 2.0115, + "step": 9548 + }, + { + "epoch": 0.32, + "grad_norm": 0.7276881337165833, + "learning_rate": 1.572034819021873e-05, + "loss": 2.211, + "step": 9549 + }, + { + "epoch": 0.32, + "grad_norm": 0.7261018753051758, + "learning_rate": 1.5719476341938512e-05, + "loss": 2.1398, + "step": 9550 + }, + { + "epoch": 0.32, + "grad_norm": 0.749725341796875, + "learning_rate": 1.5718604429043e-05, + "loss": 2.1149, + "step": 9551 + }, + { + "epoch": 0.32, + "grad_norm": 0.6969484090805054, + "learning_rate": 1.5717732451542047e-05, + "loss": 2.1031, + "step": 9552 + }, + { + "epoch": 0.32, + "grad_norm": 0.7233068346977234, + "learning_rate": 1.5716860409445504e-05, + "loss": 2.1048, + "step": 9553 + }, + { + "epoch": 0.32, + "grad_norm": 0.6973428726196289, + "learning_rate": 1.571598830276322e-05, + "loss": 2.1798, + "step": 9554 + }, + { + "epoch": 0.32, + "grad_norm": 0.7416917085647583, + "learning_rate": 1.5715116131505052e-05, + "loss": 2.0262, + "step": 9555 + }, + { + "epoch": 0.32, + "grad_norm": 0.7377226948738098, + "learning_rate": 1.571424389568085e-05, + "loss": 2.0619, + "step": 9556 + }, + { + "epoch": 0.32, + "grad_norm": 0.7081400156021118, + "learning_rate": 1.5713371595300473e-05, + "loss": 2.0919, + "step": 9557 + }, + { + "epoch": 0.32, + "grad_norm": 0.6876232028007507, + "learning_rate": 1.5712499230373768e-05, + "loss": 2.0479, + "step": 9558 + }, + { + "epoch": 0.32, + "grad_norm": 0.7329896092414856, + "learning_rate": 1.57116268009106e-05, + "loss": 2.1534, + "step": 9559 + }, + { + "epoch": 0.32, + "grad_norm": 0.7370933294296265, + "learning_rate": 1.5710754306920813e-05, + "loss": 2.1273, + "step": 9560 + }, + { + "epoch": 0.32, + "grad_norm": 0.742594301700592, + "learning_rate": 1.5709881748414275e-05, + "loss": 2.0766, + "step": 9561 + }, + { + "epoch": 0.32, + "grad_norm": 0.7008952498435974, + "learning_rate": 1.5709009125400835e-05, + "loss": 2.0758, + "step": 9562 + }, + { + "epoch": 0.32, + "grad_norm": 0.7532712817192078, + "learning_rate": 1.5708136437890363e-05, + "loss": 2.1701, + "step": 9563 + }, + { + "epoch": 0.32, + "grad_norm": 0.7653273344039917, + "learning_rate": 1.5707263685892705e-05, + "loss": 2.1042, + "step": 9564 + }, + { + "epoch": 0.32, + "grad_norm": 0.6916401386260986, + "learning_rate": 1.5706390869417726e-05, + "loss": 2.1362, + "step": 9565 + }, + { + "epoch": 0.32, + "grad_norm": 0.6997584104537964, + "learning_rate": 1.5705517988475293e-05, + "loss": 2.0919, + "step": 9566 + }, + { + "epoch": 0.32, + "grad_norm": 0.7480838298797607, + "learning_rate": 1.5704645043075256e-05, + "loss": 2.0627, + "step": 9567 + }, + { + "epoch": 0.32, + "grad_norm": 0.7022796273231506, + "learning_rate": 1.5703772033227483e-05, + "loss": 2.08, + "step": 9568 + }, + { + "epoch": 0.32, + "grad_norm": 0.70134437084198, + "learning_rate": 1.5702898958941835e-05, + "loss": 2.2027, + "step": 9569 + }, + { + "epoch": 0.32, + "grad_norm": 0.6919594407081604, + "learning_rate": 1.5702025820228185e-05, + "loss": 2.1394, + "step": 9570 + }, + { + "epoch": 0.32, + "grad_norm": 0.699573814868927, + "learning_rate": 1.570115261709638e-05, + "loss": 2.0919, + "step": 9571 + }, + { + "epoch": 0.32, + "grad_norm": 0.718121349811554, + "learning_rate": 1.57002793495563e-05, + "loss": 2.0955, + "step": 9572 + }, + { + "epoch": 0.32, + "grad_norm": 0.7320795655250549, + "learning_rate": 1.56994060176178e-05, + "loss": 2.1064, + "step": 9573 + }, + { + "epoch": 0.32, + "grad_norm": 0.7241496443748474, + "learning_rate": 1.5698532621290755e-05, + "loss": 2.1564, + "step": 9574 + }, + { + "epoch": 0.32, + "grad_norm": 0.7316837310791016, + "learning_rate": 1.5697659160585024e-05, + "loss": 2.12, + "step": 9575 + }, + { + "epoch": 0.32, + "grad_norm": 0.7229344248771667, + "learning_rate": 1.569678563551048e-05, + "loss": 2.1038, + "step": 9576 + }, + { + "epoch": 0.32, + "grad_norm": 0.7267295718193054, + "learning_rate": 1.5695912046076993e-05, + "loss": 2.1607, + "step": 9577 + }, + { + "epoch": 0.32, + "grad_norm": 0.6990330219268799, + "learning_rate": 1.5695038392294428e-05, + "loss": 2.1009, + "step": 9578 + }, + { + "epoch": 0.32, + "grad_norm": 0.7229902744293213, + "learning_rate": 1.5694164674172657e-05, + "loss": 2.062, + "step": 9579 + }, + { + "epoch": 0.32, + "grad_norm": 0.7202978730201721, + "learning_rate": 1.5693290891721552e-05, + "loss": 2.1183, + "step": 9580 + }, + { + "epoch": 0.32, + "grad_norm": 0.722460150718689, + "learning_rate": 1.5692417044950984e-05, + "loss": 2.0628, + "step": 9581 + }, + { + "epoch": 0.32, + "grad_norm": 0.7395266890525818, + "learning_rate": 1.5691543133870818e-05, + "loss": 2.1198, + "step": 9582 + }, + { + "epoch": 0.32, + "grad_norm": 0.7214912176132202, + "learning_rate": 1.569066915849094e-05, + "loss": 2.2318, + "step": 9583 + }, + { + "epoch": 0.32, + "grad_norm": 0.734996497631073, + "learning_rate": 1.5689795118821212e-05, + "loss": 2.1246, + "step": 9584 + }, + { + "epoch": 0.32, + "grad_norm": 0.7530327439308167, + "learning_rate": 1.5688921014871516e-05, + "loss": 2.1565, + "step": 9585 + }, + { + "epoch": 0.32, + "grad_norm": 0.7424435019493103, + "learning_rate": 1.5688046846651727e-05, + "loss": 2.0547, + "step": 9586 + }, + { + "epoch": 0.32, + "grad_norm": 0.7094511985778809, + "learning_rate": 1.5687172614171716e-05, + "loss": 2.1595, + "step": 9587 + }, + { + "epoch": 0.32, + "grad_norm": 0.7107856273651123, + "learning_rate": 1.568629831744136e-05, + "loss": 2.1035, + "step": 9588 + }, + { + "epoch": 0.32, + "grad_norm": 0.7119570374488831, + "learning_rate": 1.568542395647054e-05, + "loss": 2.1014, + "step": 9589 + }, + { + "epoch": 0.32, + "grad_norm": 0.6754194498062134, + "learning_rate": 1.568454953126913e-05, + "loss": 2.1741, + "step": 9590 + }, + { + "epoch": 0.32, + "grad_norm": 0.7398413419723511, + "learning_rate": 1.5683675041847014e-05, + "loss": 2.1005, + "step": 9591 + }, + { + "epoch": 0.32, + "grad_norm": 0.721780002117157, + "learning_rate": 1.5682800488214068e-05, + "loss": 2.0811, + "step": 9592 + }, + { + "epoch": 0.32, + "grad_norm": 0.7383992075920105, + "learning_rate": 1.5681925870380172e-05, + "loss": 2.1526, + "step": 9593 + }, + { + "epoch": 0.32, + "grad_norm": 0.7144381403923035, + "learning_rate": 1.5681051188355205e-05, + "loss": 2.0615, + "step": 9594 + }, + { + "epoch": 0.32, + "grad_norm": 0.7038764357566833, + "learning_rate": 1.5680176442149054e-05, + "loss": 2.0843, + "step": 9595 + }, + { + "epoch": 0.32, + "grad_norm": 0.7094604969024658, + "learning_rate": 1.5679301631771596e-05, + "loss": 2.1016, + "step": 9596 + }, + { + "epoch": 0.32, + "grad_norm": 0.7397826910018921, + "learning_rate": 1.567842675723272e-05, + "loss": 2.1635, + "step": 9597 + }, + { + "epoch": 0.32, + "grad_norm": 0.7061549425125122, + "learning_rate": 1.56775518185423e-05, + "loss": 2.0496, + "step": 9598 + }, + { + "epoch": 0.32, + "grad_norm": 0.7178221344947815, + "learning_rate": 1.5676676815710234e-05, + "loss": 2.0624, + "step": 9599 + }, + { + "epoch": 0.32, + "grad_norm": 0.708868145942688, + "learning_rate": 1.5675801748746398e-05, + "loss": 2.1848, + "step": 9600 + }, + { + "epoch": 0.32, + "grad_norm": 0.7420101761817932, + "learning_rate": 1.567492661766068e-05, + "loss": 2.0407, + "step": 9601 + }, + { + "epoch": 0.32, + "grad_norm": 0.7334026098251343, + "learning_rate": 1.5674051422462963e-05, + "loss": 2.1414, + "step": 9602 + }, + { + "epoch": 0.32, + "grad_norm": 0.7109665870666504, + "learning_rate": 1.5673176163163145e-05, + "loss": 2.0832, + "step": 9603 + }, + { + "epoch": 0.32, + "grad_norm": 0.7079555988311768, + "learning_rate": 1.5672300839771102e-05, + "loss": 2.0636, + "step": 9604 + }, + { + "epoch": 0.32, + "grad_norm": 0.7286210060119629, + "learning_rate": 1.567142545229673e-05, + "loss": 2.1746, + "step": 9605 + }, + { + "epoch": 0.32, + "grad_norm": 0.741376519203186, + "learning_rate": 1.567055000074992e-05, + "loss": 2.1565, + "step": 9606 + }, + { + "epoch": 0.32, + "grad_norm": 0.695118248462677, + "learning_rate": 1.5669674485140556e-05, + "loss": 2.1242, + "step": 9607 + }, + { + "epoch": 0.32, + "grad_norm": 0.7205113768577576, + "learning_rate": 1.5668798905478535e-05, + "loss": 2.1159, + "step": 9608 + }, + { + "epoch": 0.32, + "grad_norm": 0.7460171580314636, + "learning_rate": 1.5667923261773743e-05, + "loss": 2.1477, + "step": 9609 + }, + { + "epoch": 0.32, + "grad_norm": 0.7187833189964294, + "learning_rate": 1.5667047554036078e-05, + "loss": 2.1223, + "step": 9610 + }, + { + "epoch": 0.32, + "grad_norm": 0.7472878098487854, + "learning_rate": 1.566617178227543e-05, + "loss": 2.1371, + "step": 9611 + }, + { + "epoch": 0.32, + "grad_norm": 0.7552516460418701, + "learning_rate": 1.5665295946501697e-05, + "loss": 2.1796, + "step": 9612 + }, + { + "epoch": 0.32, + "grad_norm": 0.7284395098686218, + "learning_rate": 1.5664420046724767e-05, + "loss": 2.1306, + "step": 9613 + }, + { + "epoch": 0.32, + "grad_norm": 0.7214163541793823, + "learning_rate": 1.566354408295454e-05, + "loss": 2.1494, + "step": 9614 + }, + { + "epoch": 0.32, + "grad_norm": 0.7119484543800354, + "learning_rate": 1.5662668055200908e-05, + "loss": 2.1303, + "step": 9615 + }, + { + "epoch": 0.32, + "grad_norm": 0.7451114058494568, + "learning_rate": 1.5661791963473775e-05, + "loss": 2.1109, + "step": 9616 + }, + { + "epoch": 0.32, + "grad_norm": 0.707645058631897, + "learning_rate": 1.5660915807783035e-05, + "loss": 2.0225, + "step": 9617 + }, + { + "epoch": 0.32, + "grad_norm": 0.7205560207366943, + "learning_rate": 1.5660039588138583e-05, + "loss": 2.0906, + "step": 9618 + }, + { + "epoch": 0.32, + "grad_norm": 0.7707234621047974, + "learning_rate": 1.5659163304550323e-05, + "loss": 2.1227, + "step": 9619 + }, + { + "epoch": 0.32, + "grad_norm": 0.7582520842552185, + "learning_rate": 1.565828695702815e-05, + "loss": 2.1891, + "step": 9620 + }, + { + "epoch": 0.32, + "grad_norm": 0.7272468209266663, + "learning_rate": 1.565741054558197e-05, + "loss": 2.134, + "step": 9621 + }, + { + "epoch": 0.32, + "grad_norm": 0.7442682981491089, + "learning_rate": 1.565653407022168e-05, + "loss": 2.1559, + "step": 9622 + }, + { + "epoch": 0.32, + "grad_norm": 0.744335412979126, + "learning_rate": 1.565565753095718e-05, + "loss": 2.114, + "step": 9623 + }, + { + "epoch": 0.32, + "grad_norm": 0.7005484700202942, + "learning_rate": 1.5654780927798382e-05, + "loss": 2.1597, + "step": 9624 + }, + { + "epoch": 0.32, + "grad_norm": 0.7300022840499878, + "learning_rate": 1.565390426075518e-05, + "loss": 2.0513, + "step": 9625 + }, + { + "epoch": 0.32, + "grad_norm": 0.7206262350082397, + "learning_rate": 1.565302752983748e-05, + "loss": 2.1894, + "step": 9626 + }, + { + "epoch": 0.32, + "grad_norm": 0.7290781736373901, + "learning_rate": 1.565215073505519e-05, + "loss": 2.1122, + "step": 9627 + }, + { + "epoch": 0.32, + "grad_norm": 0.7283538579940796, + "learning_rate": 1.5651273876418214e-05, + "loss": 2.1367, + "step": 9628 + }, + { + "epoch": 0.32, + "grad_norm": 0.698457658290863, + "learning_rate": 1.5650396953936455e-05, + "loss": 2.1004, + "step": 9629 + }, + { + "epoch": 0.32, + "grad_norm": 0.7498002648353577, + "learning_rate": 1.5649519967619826e-05, + "loss": 2.1383, + "step": 9630 + }, + { + "epoch": 0.32, + "grad_norm": 0.7227504849433899, + "learning_rate": 1.5648642917478227e-05, + "loss": 2.0743, + "step": 9631 + }, + { + "epoch": 0.32, + "grad_norm": 0.7126960158348083, + "learning_rate": 1.5647765803521573e-05, + "loss": 2.1988, + "step": 9632 + }, + { + "epoch": 0.32, + "grad_norm": 0.715067982673645, + "learning_rate": 1.5646888625759774e-05, + "loss": 2.1768, + "step": 9633 + }, + { + "epoch": 0.32, + "grad_norm": 0.7235296368598938, + "learning_rate": 1.5646011384202733e-05, + "loss": 2.2027, + "step": 9634 + }, + { + "epoch": 0.32, + "grad_norm": 0.7500407099723816, + "learning_rate": 1.5645134078860363e-05, + "loss": 2.1222, + "step": 9635 + }, + { + "epoch": 0.32, + "grad_norm": 0.7103623151779175, + "learning_rate": 1.564425670974258e-05, + "loss": 2.1598, + "step": 9636 + }, + { + "epoch": 0.32, + "grad_norm": 0.7132090330123901, + "learning_rate": 1.564337927685929e-05, + "loss": 2.1487, + "step": 9637 + }, + { + "epoch": 0.32, + "grad_norm": 0.7143595814704895, + "learning_rate": 1.5642501780220405e-05, + "loss": 2.0656, + "step": 9638 + }, + { + "epoch": 0.32, + "grad_norm": 0.721831202507019, + "learning_rate": 1.5641624219835853e-05, + "loss": 2.1336, + "step": 9639 + }, + { + "epoch": 0.32, + "grad_norm": 0.7576766014099121, + "learning_rate": 1.5640746595715527e-05, + "loss": 2.1204, + "step": 9640 + }, + { + "epoch": 0.32, + "grad_norm": 0.704969584941864, + "learning_rate": 1.5639868907869356e-05, + "loss": 2.0931, + "step": 9641 + }, + { + "epoch": 0.32, + "grad_norm": 0.698978066444397, + "learning_rate": 1.5638991156307248e-05, + "loss": 2.1002, + "step": 9642 + }, + { + "epoch": 0.32, + "grad_norm": 0.733176589012146, + "learning_rate": 1.5638113341039125e-05, + "loss": 2.1458, + "step": 9643 + }, + { + "epoch": 0.32, + "grad_norm": 0.7193590402603149, + "learning_rate": 1.5637235462074903e-05, + "loss": 2.1283, + "step": 9644 + }, + { + "epoch": 0.32, + "grad_norm": 0.7169396281242371, + "learning_rate": 1.56363575194245e-05, + "loss": 2.1778, + "step": 9645 + }, + { + "epoch": 0.32, + "grad_norm": 0.7564206123352051, + "learning_rate": 1.5635479513097827e-05, + "loss": 2.2108, + "step": 9646 + }, + { + "epoch": 0.32, + "grad_norm": 0.7212381958961487, + "learning_rate": 1.5634601443104812e-05, + "loss": 2.1225, + "step": 9647 + }, + { + "epoch": 0.32, + "grad_norm": 0.7097349762916565, + "learning_rate": 1.563372330945537e-05, + "loss": 2.1498, + "step": 9648 + }, + { + "epoch": 0.32, + "grad_norm": 0.6714023351669312, + "learning_rate": 1.5632845112159428e-05, + "loss": 2.1314, + "step": 9649 + }, + { + "epoch": 0.32, + "grad_norm": 0.7411195635795593, + "learning_rate": 1.56319668512269e-05, + "loss": 2.1672, + "step": 9650 + }, + { + "epoch": 0.32, + "grad_norm": 0.703231930732727, + "learning_rate": 1.5631088526667713e-05, + "loss": 2.1897, + "step": 9651 + }, + { + "epoch": 0.32, + "grad_norm": 0.6935396790504456, + "learning_rate": 1.5630210138491786e-05, + "loss": 2.147, + "step": 9652 + }, + { + "epoch": 0.32, + "grad_norm": 0.7113943099975586, + "learning_rate": 1.5629331686709046e-05, + "loss": 2.1304, + "step": 9653 + }, + { + "epoch": 0.32, + "grad_norm": 0.7628820538520813, + "learning_rate": 1.5628453171329418e-05, + "loss": 2.1103, + "step": 9654 + }, + { + "epoch": 0.32, + "grad_norm": 0.736619234085083, + "learning_rate": 1.562757459236282e-05, + "loss": 2.1834, + "step": 9655 + }, + { + "epoch": 0.32, + "grad_norm": 0.7182301878929138, + "learning_rate": 1.5626695949819186e-05, + "loss": 2.1551, + "step": 9656 + }, + { + "epoch": 0.32, + "grad_norm": 0.7071012854576111, + "learning_rate": 1.5625817243708437e-05, + "loss": 2.1194, + "step": 9657 + }, + { + "epoch": 0.32, + "grad_norm": 0.732288658618927, + "learning_rate": 1.5624938474040502e-05, + "loss": 2.1598, + "step": 9658 + }, + { + "epoch": 0.32, + "grad_norm": 0.7055097818374634, + "learning_rate": 1.562405964082531e-05, + "loss": 2.1195, + "step": 9659 + }, + { + "epoch": 0.32, + "grad_norm": 0.7581003904342651, + "learning_rate": 1.5623180744072784e-05, + "loss": 2.1182, + "step": 9660 + }, + { + "epoch": 0.32, + "grad_norm": 0.7259427905082703, + "learning_rate": 1.562230178379286e-05, + "loss": 2.2026, + "step": 9661 + }, + { + "epoch": 0.32, + "grad_norm": 0.7154667377471924, + "learning_rate": 1.5621422759995466e-05, + "loss": 2.1247, + "step": 9662 + }, + { + "epoch": 0.32, + "grad_norm": 0.7540830373764038, + "learning_rate": 1.5620543672690532e-05, + "loss": 2.1474, + "step": 9663 + }, + { + "epoch": 0.32, + "grad_norm": 0.7043972015380859, + "learning_rate": 1.561966452188799e-05, + "loss": 2.1137, + "step": 9664 + }, + { + "epoch": 0.32, + "grad_norm": 0.7038115859031677, + "learning_rate": 1.561878530759777e-05, + "loss": 2.1321, + "step": 9665 + }, + { + "epoch": 0.32, + "grad_norm": 0.7378524541854858, + "learning_rate": 1.5617906029829808e-05, + "loss": 2.1209, + "step": 9666 + }, + { + "epoch": 0.32, + "grad_norm": 0.7201337218284607, + "learning_rate": 1.5617026688594036e-05, + "loss": 2.0737, + "step": 9667 + }, + { + "epoch": 0.32, + "grad_norm": 0.7516124248504639, + "learning_rate": 1.561614728390039e-05, + "loss": 2.075, + "step": 9668 + }, + { + "epoch": 0.32, + "grad_norm": 0.6917712688446045, + "learning_rate": 1.56152678157588e-05, + "loss": 2.1478, + "step": 9669 + }, + { + "epoch": 0.32, + "grad_norm": 0.725277304649353, + "learning_rate": 1.5614388284179206e-05, + "loss": 2.0715, + "step": 9670 + }, + { + "epoch": 0.32, + "grad_norm": 0.7047340869903564, + "learning_rate": 1.5613508689171543e-05, + "loss": 2.1364, + "step": 9671 + }, + { + "epoch": 0.32, + "grad_norm": 0.7251754999160767, + "learning_rate": 1.561262903074575e-05, + "loss": 2.0938, + "step": 9672 + }, + { + "epoch": 0.32, + "grad_norm": 0.7610461115837097, + "learning_rate": 1.5611749308911763e-05, + "loss": 2.1606, + "step": 9673 + }, + { + "epoch": 0.32, + "grad_norm": 0.7318379282951355, + "learning_rate": 1.561086952367952e-05, + "loss": 2.124, + "step": 9674 + }, + { + "epoch": 0.32, + "grad_norm": 0.7205722332000732, + "learning_rate": 1.5609989675058965e-05, + "loss": 2.0929, + "step": 9675 + }, + { + "epoch": 0.32, + "grad_norm": 0.7249205112457275, + "learning_rate": 1.5609109763060033e-05, + "loss": 2.1646, + "step": 9676 + }, + { + "epoch": 0.32, + "grad_norm": 0.727459192276001, + "learning_rate": 1.5608229787692665e-05, + "loss": 2.2156, + "step": 9677 + }, + { + "epoch": 0.32, + "grad_norm": 0.7697524428367615, + "learning_rate": 1.5607349748966806e-05, + "loss": 2.2548, + "step": 9678 + }, + { + "epoch": 0.32, + "grad_norm": 0.7445123791694641, + "learning_rate": 1.560646964689239e-05, + "loss": 2.1198, + "step": 9679 + }, + { + "epoch": 0.32, + "grad_norm": 0.7075611352920532, + "learning_rate": 1.560558948147937e-05, + "loss": 2.1079, + "step": 9680 + }, + { + "epoch": 0.32, + "grad_norm": 0.7296414375305176, + "learning_rate": 1.5604709252737685e-05, + "loss": 2.074, + "step": 9681 + }, + { + "epoch": 0.32, + "grad_norm": 0.7431594133377075, + "learning_rate": 1.560382896067728e-05, + "loss": 2.0554, + "step": 9682 + }, + { + "epoch": 0.32, + "grad_norm": 0.7352239489555359, + "learning_rate": 1.5602948605308098e-05, + "loss": 2.1339, + "step": 9683 + }, + { + "epoch": 0.32, + "grad_norm": 0.7060679793357849, + "learning_rate": 1.5602068186640088e-05, + "loss": 2.0358, + "step": 9684 + }, + { + "epoch": 0.32, + "grad_norm": 0.7050802111625671, + "learning_rate": 1.5601187704683192e-05, + "loss": 2.0464, + "step": 9685 + }, + { + "epoch": 0.32, + "grad_norm": 0.7456352710723877, + "learning_rate": 1.5600307159447362e-05, + "loss": 2.191, + "step": 9686 + }, + { + "epoch": 0.32, + "grad_norm": 0.690403163433075, + "learning_rate": 1.5599426550942544e-05, + "loss": 2.1071, + "step": 9687 + }, + { + "epoch": 0.32, + "grad_norm": 0.7531574368476868, + "learning_rate": 1.5598545879178687e-05, + "loss": 2.0715, + "step": 9688 + }, + { + "epoch": 0.32, + "grad_norm": 0.7362563014030457, + "learning_rate": 1.5597665144165735e-05, + "loss": 2.144, + "step": 9689 + }, + { + "epoch": 0.32, + "grad_norm": 0.721060574054718, + "learning_rate": 1.559678434591365e-05, + "loss": 2.1294, + "step": 9690 + }, + { + "epoch": 0.32, + "grad_norm": 0.7231608629226685, + "learning_rate": 1.5595903484432372e-05, + "loss": 2.1079, + "step": 9691 + }, + { + "epoch": 0.32, + "grad_norm": 0.7737318873405457, + "learning_rate": 1.5595022559731856e-05, + "loss": 2.0954, + "step": 9692 + }, + { + "epoch": 0.32, + "grad_norm": 0.7315420508384705, + "learning_rate": 1.5594141571822055e-05, + "loss": 2.173, + "step": 9693 + }, + { + "epoch": 0.32, + "grad_norm": 0.7234458327293396, + "learning_rate": 1.5593260520712917e-05, + "loss": 2.1957, + "step": 9694 + }, + { + "epoch": 0.32, + "grad_norm": 0.706051230430603, + "learning_rate": 1.5592379406414403e-05, + "loss": 2.171, + "step": 9695 + }, + { + "epoch": 0.32, + "grad_norm": 0.7145938873291016, + "learning_rate": 1.5591498228936464e-05, + "loss": 2.0657, + "step": 9696 + }, + { + "epoch": 0.32, + "grad_norm": 0.7288222312927246, + "learning_rate": 1.5590616988289054e-05, + "loss": 2.1432, + "step": 9697 + }, + { + "epoch": 0.32, + "grad_norm": 0.6874464750289917, + "learning_rate": 1.5589735684482134e-05, + "loss": 2.1252, + "step": 9698 + }, + { + "epoch": 0.32, + "grad_norm": 0.7213895320892334, + "learning_rate": 1.558885431752565e-05, + "loss": 2.0905, + "step": 9699 + }, + { + "epoch": 0.32, + "grad_norm": 0.7034483551979065, + "learning_rate": 1.5587972887429565e-05, + "loss": 2.1318, + "step": 9700 + }, + { + "epoch": 0.32, + "grad_norm": 0.7301508784294128, + "learning_rate": 1.5587091394203842e-05, + "loss": 2.0836, + "step": 9701 + }, + { + "epoch": 0.32, + "grad_norm": 0.7120941877365112, + "learning_rate": 1.558620983785843e-05, + "loss": 2.1018, + "step": 9702 + }, + { + "epoch": 0.32, + "grad_norm": 0.7034561634063721, + "learning_rate": 1.5585328218403296e-05, + "loss": 2.11, + "step": 9703 + }, + { + "epoch": 0.32, + "grad_norm": 0.7046024203300476, + "learning_rate": 1.55844465358484e-05, + "loss": 2.0964, + "step": 9704 + }, + { + "epoch": 0.32, + "grad_norm": 0.7175389528274536, + "learning_rate": 1.5583564790203697e-05, + "loss": 2.1949, + "step": 9705 + }, + { + "epoch": 0.32, + "grad_norm": 0.7190459966659546, + "learning_rate": 1.558268298147915e-05, + "loss": 2.1111, + "step": 9706 + }, + { + "epoch": 0.32, + "grad_norm": 0.7264535427093506, + "learning_rate": 1.5581801109684725e-05, + "loss": 2.0915, + "step": 9707 + }, + { + "epoch": 0.32, + "grad_norm": 0.73790043592453, + "learning_rate": 1.558091917483038e-05, + "loss": 2.1259, + "step": 9708 + }, + { + "epoch": 0.32, + "grad_norm": 0.7432808876037598, + "learning_rate": 1.558003717692608e-05, + "loss": 2.2039, + "step": 9709 + }, + { + "epoch": 0.32, + "grad_norm": 0.7579568028450012, + "learning_rate": 1.557915511598179e-05, + "loss": 2.0407, + "step": 9710 + }, + { + "epoch": 0.32, + "grad_norm": 0.7072755098342896, + "learning_rate": 1.557827299200748e-05, + "loss": 2.1199, + "step": 9711 + }, + { + "epoch": 0.32, + "grad_norm": 0.6925066113471985, + "learning_rate": 1.557739080501311e-05, + "loss": 2.0794, + "step": 9712 + }, + { + "epoch": 0.32, + "grad_norm": 0.7213560342788696, + "learning_rate": 1.5576508555008643e-05, + "loss": 2.1352, + "step": 9713 + }, + { + "epoch": 0.32, + "grad_norm": 0.7008641362190247, + "learning_rate": 1.557562624200405e-05, + "loss": 2.1148, + "step": 9714 + }, + { + "epoch": 0.32, + "grad_norm": 0.7303087711334229, + "learning_rate": 1.5574743866009307e-05, + "loss": 2.0961, + "step": 9715 + }, + { + "epoch": 0.32, + "grad_norm": 0.7058576941490173, + "learning_rate": 1.5573861427034368e-05, + "loss": 2.1023, + "step": 9716 + }, + { + "epoch": 0.32, + "grad_norm": 0.745682954788208, + "learning_rate": 1.557297892508921e-05, + "loss": 2.0735, + "step": 9717 + }, + { + "epoch": 0.32, + "grad_norm": 0.763796865940094, + "learning_rate": 1.5572096360183805e-05, + "loss": 2.1054, + "step": 9718 + }, + { + "epoch": 0.32, + "grad_norm": 0.7220934629440308, + "learning_rate": 1.5571213732328118e-05, + "loss": 2.1797, + "step": 9719 + }, + { + "epoch": 0.32, + "grad_norm": 0.7159127593040466, + "learning_rate": 1.5570331041532127e-05, + "loss": 2.1119, + "step": 9720 + }, + { + "epoch": 0.32, + "grad_norm": 0.7099900841712952, + "learning_rate": 1.55694482878058e-05, + "loss": 2.1781, + "step": 9721 + }, + { + "epoch": 0.32, + "grad_norm": 0.7351802587509155, + "learning_rate": 1.5568565471159105e-05, + "loss": 2.0964, + "step": 9722 + }, + { + "epoch": 0.32, + "grad_norm": 0.7681098580360413, + "learning_rate": 1.5567682591602025e-05, + "loss": 2.1356, + "step": 9723 + }, + { + "epoch": 0.32, + "grad_norm": 0.7014427781105042, + "learning_rate": 1.5566799649144528e-05, + "loss": 2.1336, + "step": 9724 + }, + { + "epoch": 0.32, + "grad_norm": 0.6984763145446777, + "learning_rate": 1.5565916643796594e-05, + "loss": 2.0458, + "step": 9725 + }, + { + "epoch": 0.32, + "grad_norm": 0.7065165042877197, + "learning_rate": 1.5565033575568193e-05, + "loss": 2.1095, + "step": 9726 + }, + { + "epoch": 0.32, + "grad_norm": 0.7118334174156189, + "learning_rate": 1.5564150444469306e-05, + "loss": 2.1977, + "step": 9727 + }, + { + "epoch": 0.32, + "grad_norm": 0.7149691581726074, + "learning_rate": 1.5563267250509906e-05, + "loss": 2.164, + "step": 9728 + }, + { + "epoch": 0.32, + "grad_norm": 0.7476349472999573, + "learning_rate": 1.5562383993699977e-05, + "loss": 2.0644, + "step": 9729 + }, + { + "epoch": 0.32, + "grad_norm": 0.7497175931930542, + "learning_rate": 1.556150067404949e-05, + "loss": 2.1512, + "step": 9730 + }, + { + "epoch": 0.32, + "grad_norm": 0.7219561338424683, + "learning_rate": 1.5560617291568427e-05, + "loss": 2.1152, + "step": 9731 + }, + { + "epoch": 0.32, + "grad_norm": 0.7257176637649536, + "learning_rate": 1.5559733846266772e-05, + "loss": 2.1317, + "step": 9732 + }, + { + "epoch": 0.32, + "grad_norm": 0.7180116772651672, + "learning_rate": 1.55588503381545e-05, + "loss": 2.11, + "step": 9733 + }, + { + "epoch": 0.32, + "grad_norm": 0.7305561900138855, + "learning_rate": 1.5557966767241596e-05, + "loss": 2.0841, + "step": 9734 + }, + { + "epoch": 0.32, + "grad_norm": 0.7213141918182373, + "learning_rate": 1.5557083133538038e-05, + "loss": 2.1015, + "step": 9735 + }, + { + "epoch": 0.32, + "grad_norm": 0.7244415283203125, + "learning_rate": 1.5556199437053814e-05, + "loss": 2.1909, + "step": 9736 + }, + { + "epoch": 0.32, + "grad_norm": 0.7275545597076416, + "learning_rate": 1.5555315677798906e-05, + "loss": 2.1289, + "step": 9737 + }, + { + "epoch": 0.32, + "grad_norm": 0.7024979591369629, + "learning_rate": 1.5554431855783295e-05, + "loss": 2.1182, + "step": 9738 + }, + { + "epoch": 0.32, + "grad_norm": 0.6924365162849426, + "learning_rate": 1.5553547971016966e-05, + "loss": 2.1226, + "step": 9739 + }, + { + "epoch": 0.32, + "grad_norm": 0.7123757600784302, + "learning_rate": 1.555266402350991e-05, + "loss": 2.1115, + "step": 9740 + }, + { + "epoch": 0.32, + "grad_norm": 0.7232630848884583, + "learning_rate": 1.555178001327211e-05, + "loss": 2.1744, + "step": 9741 + }, + { + "epoch": 0.32, + "grad_norm": 0.7116715908050537, + "learning_rate": 1.5550895940313552e-05, + "loss": 2.1361, + "step": 9742 + }, + { + "epoch": 0.32, + "grad_norm": 0.6923726201057434, + "learning_rate": 1.5550011804644226e-05, + "loss": 2.0649, + "step": 9743 + }, + { + "epoch": 0.32, + "grad_norm": 0.7449272871017456, + "learning_rate": 1.554912760627412e-05, + "loss": 2.1537, + "step": 9744 + }, + { + "epoch": 0.32, + "grad_norm": 0.6884171962738037, + "learning_rate": 1.5548243345213223e-05, + "loss": 2.1485, + "step": 9745 + }, + { + "epoch": 0.32, + "grad_norm": 0.7247269153594971, + "learning_rate": 1.554735902147152e-05, + "loss": 2.1648, + "step": 9746 + }, + { + "epoch": 0.32, + "grad_norm": 0.7230300903320312, + "learning_rate": 1.554647463505901e-05, + "loss": 2.1397, + "step": 9747 + }, + { + "epoch": 0.32, + "grad_norm": 0.740444004535675, + "learning_rate": 1.554559018598568e-05, + "loss": 2.1322, + "step": 9748 + }, + { + "epoch": 0.32, + "grad_norm": 0.7256143093109131, + "learning_rate": 1.5544705674261517e-05, + "loss": 2.1217, + "step": 9749 + }, + { + "epoch": 0.32, + "grad_norm": 0.7350302934646606, + "learning_rate": 1.554382109989652e-05, + "loss": 2.164, + "step": 9750 + }, + { + "epoch": 0.32, + "grad_norm": 0.6851370334625244, + "learning_rate": 1.5542936462900685e-05, + "loss": 2.0827, + "step": 9751 + }, + { + "epoch": 0.32, + "grad_norm": 0.7035511136054993, + "learning_rate": 1.5542051763284002e-05, + "loss": 2.0926, + "step": 9752 + }, + { + "epoch": 0.32, + "grad_norm": 0.7148852944374084, + "learning_rate": 1.5541167001056466e-05, + "loss": 2.1065, + "step": 9753 + }, + { + "epoch": 0.32, + "grad_norm": 0.7390248775482178, + "learning_rate": 1.5540282176228073e-05, + "loss": 2.0664, + "step": 9754 + }, + { + "epoch": 0.32, + "grad_norm": 0.7371506094932556, + "learning_rate": 1.5539397288808817e-05, + "loss": 2.1897, + "step": 9755 + }, + { + "epoch": 0.32, + "grad_norm": 0.7095745801925659, + "learning_rate": 1.5538512338808696e-05, + "loss": 2.1245, + "step": 9756 + }, + { + "epoch": 0.32, + "grad_norm": 0.7180996537208557, + "learning_rate": 1.553762732623771e-05, + "loss": 2.121, + "step": 9757 + }, + { + "epoch": 0.32, + "grad_norm": 0.71940016746521, + "learning_rate": 1.5536742251105856e-05, + "loss": 2.0846, + "step": 9758 + }, + { + "epoch": 0.32, + "grad_norm": 0.7432024478912354, + "learning_rate": 1.5535857113423133e-05, + "loss": 2.1328, + "step": 9759 + }, + { + "epoch": 0.32, + "grad_norm": 0.7416552305221558, + "learning_rate": 1.553497191319954e-05, + "loss": 2.2062, + "step": 9760 + }, + { + "epoch": 0.32, + "grad_norm": 0.7160609364509583, + "learning_rate": 1.5534086650445077e-05, + "loss": 2.1748, + "step": 9761 + }, + { + "epoch": 0.32, + "grad_norm": 0.7547546625137329, + "learning_rate": 1.5533201325169746e-05, + "loss": 2.1229, + "step": 9762 + }, + { + "epoch": 0.32, + "grad_norm": 0.7350295782089233, + "learning_rate": 1.5532315937383554e-05, + "loss": 2.1763, + "step": 9763 + }, + { + "epoch": 0.32, + "grad_norm": 0.7067313194274902, + "learning_rate": 1.5531430487096495e-05, + "loss": 2.1024, + "step": 9764 + }, + { + "epoch": 0.32, + "grad_norm": 0.7448184490203857, + "learning_rate": 1.5530544974318577e-05, + "loss": 2.1582, + "step": 9765 + }, + { + "epoch": 0.32, + "grad_norm": 0.6961895227432251, + "learning_rate": 1.5529659399059803e-05, + "loss": 2.1542, + "step": 9766 + }, + { + "epoch": 0.32, + "grad_norm": 0.7170191407203674, + "learning_rate": 1.552877376133018e-05, + "loss": 2.0092, + "step": 9767 + }, + { + "epoch": 0.32, + "grad_norm": 0.7149031758308411, + "learning_rate": 1.5527888061139707e-05, + "loss": 2.1091, + "step": 9768 + }, + { + "epoch": 0.33, + "grad_norm": 0.7349783182144165, + "learning_rate": 1.5527002298498396e-05, + "loss": 2.1104, + "step": 9769 + }, + { + "epoch": 0.33, + "grad_norm": 0.7329489588737488, + "learning_rate": 1.552611647341625e-05, + "loss": 2.1871, + "step": 9770 + }, + { + "epoch": 0.33, + "grad_norm": 0.7452016472816467, + "learning_rate": 1.5525230585903285e-05, + "loss": 2.0818, + "step": 9771 + }, + { + "epoch": 0.33, + "grad_norm": 0.7211071252822876, + "learning_rate": 1.5524344635969498e-05, + "loss": 2.1826, + "step": 9772 + }, + { + "epoch": 0.33, + "grad_norm": 0.7095054984092712, + "learning_rate": 1.5523458623624904e-05, + "loss": 2.0897, + "step": 9773 + }, + { + "epoch": 0.33, + "grad_norm": 0.7175541520118713, + "learning_rate": 1.552257254887951e-05, + "loss": 2.1136, + "step": 9774 + }, + { + "epoch": 0.33, + "grad_norm": 0.7369424104690552, + "learning_rate": 1.552168641174333e-05, + "loss": 2.2051, + "step": 9775 + }, + { + "epoch": 0.33, + "grad_norm": 0.6945841312408447, + "learning_rate": 1.5520800212226374e-05, + "loss": 2.1022, + "step": 9776 + }, + { + "epoch": 0.33, + "grad_norm": 0.7432894706726074, + "learning_rate": 1.551991395033865e-05, + "loss": 2.1468, + "step": 9777 + }, + { + "epoch": 0.33, + "grad_norm": 0.7032977938652039, + "learning_rate": 1.5519027626090175e-05, + "loss": 2.1469, + "step": 9778 + }, + { + "epoch": 0.33, + "grad_norm": 0.7035618424415588, + "learning_rate": 1.5518141239490958e-05, + "loss": 2.1076, + "step": 9779 + }, + { + "epoch": 0.33, + "grad_norm": 0.7174363136291504, + "learning_rate": 1.5517254790551017e-05, + "loss": 2.1474, + "step": 9780 + }, + { + "epoch": 0.33, + "grad_norm": 0.7240395545959473, + "learning_rate": 1.5516368279280365e-05, + "loss": 2.1265, + "step": 9781 + }, + { + "epoch": 0.33, + "grad_norm": 0.730636477470398, + "learning_rate": 1.551548170568902e-05, + "loss": 2.1643, + "step": 9782 + }, + { + "epoch": 0.33, + "grad_norm": 0.7373067736625671, + "learning_rate": 1.5514595069786992e-05, + "loss": 2.085, + "step": 9783 + }, + { + "epoch": 0.33, + "grad_norm": 0.736629068851471, + "learning_rate": 1.5513708371584296e-05, + "loss": 2.0876, + "step": 9784 + }, + { + "epoch": 0.33, + "grad_norm": 0.7060760855674744, + "learning_rate": 1.551282161109096e-05, + "loss": 2.0933, + "step": 9785 + }, + { + "epoch": 0.33, + "grad_norm": 0.7124506831169128, + "learning_rate": 1.5511934788316995e-05, + "loss": 2.1651, + "step": 9786 + }, + { + "epoch": 0.33, + "grad_norm": 0.7154838442802429, + "learning_rate": 1.551104790327242e-05, + "loss": 2.2181, + "step": 9787 + }, + { + "epoch": 0.33, + "grad_norm": 0.7429937720298767, + "learning_rate": 1.5510160955967256e-05, + "loss": 2.1809, + "step": 9788 + }, + { + "epoch": 0.33, + "grad_norm": 0.7234243750572205, + "learning_rate": 1.5509273946411525e-05, + "loss": 2.1139, + "step": 9789 + }, + { + "epoch": 0.33, + "grad_norm": 0.691798746585846, + "learning_rate": 1.5508386874615244e-05, + "loss": 2.0765, + "step": 9790 + }, + { + "epoch": 0.33, + "grad_norm": 0.7239765524864197, + "learning_rate": 1.550749974058844e-05, + "loss": 2.1571, + "step": 9791 + }, + { + "epoch": 0.33, + "grad_norm": 0.713736891746521, + "learning_rate": 1.5506612544341124e-05, + "loss": 2.1501, + "step": 9792 + }, + { + "epoch": 0.33, + "grad_norm": 0.7478756308555603, + "learning_rate": 1.550572528588333e-05, + "loss": 2.1259, + "step": 9793 + }, + { + "epoch": 0.33, + "grad_norm": 0.6878976225852966, + "learning_rate": 1.550483796522508e-05, + "loss": 2.0555, + "step": 9794 + }, + { + "epoch": 0.33, + "grad_norm": 0.7342193722724915, + "learning_rate": 1.5503950582376398e-05, + "loss": 2.1753, + "step": 9795 + }, + { + "epoch": 0.33, + "grad_norm": 0.7443256378173828, + "learning_rate": 1.5503063137347307e-05, + "loss": 2.1324, + "step": 9796 + }, + { + "epoch": 0.33, + "grad_norm": 0.7581925988197327, + "learning_rate": 1.550217563014783e-05, + "loss": 2.0882, + "step": 9797 + }, + { + "epoch": 0.33, + "grad_norm": 0.7454391121864319, + "learning_rate": 1.5501288060788e-05, + "loss": 2.1966, + "step": 9798 + }, + { + "epoch": 0.33, + "grad_norm": 0.7362740635871887, + "learning_rate": 1.550040042927784e-05, + "loss": 2.1479, + "step": 9799 + }, + { + "epoch": 0.33, + "grad_norm": 0.7669014930725098, + "learning_rate": 1.5499512735627385e-05, + "loss": 2.1624, + "step": 9800 + }, + { + "epoch": 0.33, + "grad_norm": 0.7412266731262207, + "learning_rate": 1.5498624979846653e-05, + "loss": 2.1285, + "step": 9801 + }, + { + "epoch": 0.33, + "grad_norm": 0.724396288394928, + "learning_rate": 1.549773716194568e-05, + "loss": 2.1935, + "step": 9802 + }, + { + "epoch": 0.33, + "grad_norm": 0.7174577116966248, + "learning_rate": 1.5496849281934494e-05, + "loss": 2.089, + "step": 9803 + }, + { + "epoch": 0.33, + "grad_norm": 0.7296112179756165, + "learning_rate": 1.5495961339823125e-05, + "loss": 2.1034, + "step": 9804 + }, + { + "epoch": 0.33, + "grad_norm": 0.7373788356781006, + "learning_rate": 1.549507333562161e-05, + "loss": 2.1389, + "step": 9805 + }, + { + "epoch": 0.33, + "grad_norm": 0.7346022129058838, + "learning_rate": 1.549418526933997e-05, + "loss": 2.0909, + "step": 9806 + }, + { + "epoch": 0.33, + "grad_norm": 0.7354152798652649, + "learning_rate": 1.5493297140988253e-05, + "loss": 2.1904, + "step": 9807 + }, + { + "epoch": 0.33, + "grad_norm": 0.6868494749069214, + "learning_rate": 1.549240895057648e-05, + "loss": 2.1085, + "step": 9808 + }, + { + "epoch": 0.33, + "grad_norm": 0.7333486080169678, + "learning_rate": 1.549152069811469e-05, + "loss": 2.0694, + "step": 9809 + }, + { + "epoch": 0.33, + "grad_norm": 0.7068595290184021, + "learning_rate": 1.5490632383612915e-05, + "loss": 2.0819, + "step": 9810 + }, + { + "epoch": 0.33, + "grad_norm": 0.7222439646720886, + "learning_rate": 1.5489744007081198e-05, + "loss": 2.0458, + "step": 9811 + }, + { + "epoch": 0.33, + "grad_norm": 0.7160742878913879, + "learning_rate": 1.5488855568529565e-05, + "loss": 2.1086, + "step": 9812 + }, + { + "epoch": 0.33, + "grad_norm": 0.717342734336853, + "learning_rate": 1.5487967067968063e-05, + "loss": 2.1249, + "step": 9813 + }, + { + "epoch": 0.33, + "grad_norm": 0.747526228427887, + "learning_rate": 1.5487078505406724e-05, + "loss": 2.0738, + "step": 9814 + }, + { + "epoch": 0.33, + "grad_norm": 0.6963484287261963, + "learning_rate": 1.5486189880855587e-05, + "loss": 2.1135, + "step": 9815 + }, + { + "epoch": 0.33, + "grad_norm": 0.7524906992912292, + "learning_rate": 1.5485301194324695e-05, + "loss": 2.1046, + "step": 9816 + }, + { + "epoch": 0.33, + "grad_norm": 0.7156223058700562, + "learning_rate": 1.5484412445824082e-05, + "loss": 2.137, + "step": 9817 + }, + { + "epoch": 0.33, + "grad_norm": 0.7152125239372253, + "learning_rate": 1.548352363536379e-05, + "loss": 2.0648, + "step": 9818 + }, + { + "epoch": 0.33, + "grad_norm": 0.758443295955658, + "learning_rate": 1.5482634762953864e-05, + "loss": 2.1882, + "step": 9819 + }, + { + "epoch": 0.33, + "grad_norm": 0.7346198558807373, + "learning_rate": 1.5481745828604344e-05, + "loss": 2.1159, + "step": 9820 + }, + { + "epoch": 0.33, + "grad_norm": 0.7042797803878784, + "learning_rate": 1.548085683232527e-05, + "loss": 2.1502, + "step": 9821 + }, + { + "epoch": 0.33, + "grad_norm": 0.7387520670890808, + "learning_rate": 1.547996777412669e-05, + "loss": 2.1844, + "step": 9822 + }, + { + "epoch": 0.33, + "grad_norm": 0.7866591811180115, + "learning_rate": 1.5479078654018644e-05, + "loss": 2.2013, + "step": 9823 + }, + { + "epoch": 0.33, + "grad_norm": 0.7142841219902039, + "learning_rate": 1.547818947201118e-05, + "loss": 2.1571, + "step": 9824 + }, + { + "epoch": 0.33, + "grad_norm": 0.7088612914085388, + "learning_rate": 1.547730022811434e-05, + "loss": 2.1592, + "step": 9825 + }, + { + "epoch": 0.33, + "grad_norm": 0.7097257971763611, + "learning_rate": 1.547641092233817e-05, + "loss": 2.1393, + "step": 9826 + }, + { + "epoch": 0.33, + "grad_norm": 0.6985494494438171, + "learning_rate": 1.5475521554692724e-05, + "loss": 2.0859, + "step": 9827 + }, + { + "epoch": 0.33, + "grad_norm": 0.7373705506324768, + "learning_rate": 1.547463212518804e-05, + "loss": 2.0419, + "step": 9828 + }, + { + "epoch": 0.33, + "grad_norm": 0.7425941824913025, + "learning_rate": 1.5473742633834174e-05, + "loss": 2.0636, + "step": 9829 + }, + { + "epoch": 0.33, + "grad_norm": 0.704028844833374, + "learning_rate": 1.547285308064117e-05, + "loss": 2.1561, + "step": 9830 + }, + { + "epoch": 0.33, + "grad_norm": 0.6992152333259583, + "learning_rate": 1.5471963465619082e-05, + "loss": 2.1074, + "step": 9831 + }, + { + "epoch": 0.33, + "grad_norm": 0.7251248359680176, + "learning_rate": 1.5471073788777956e-05, + "loss": 2.1368, + "step": 9832 + }, + { + "epoch": 0.33, + "grad_norm": 0.7286407351493835, + "learning_rate": 1.5470184050127843e-05, + "loss": 2.1674, + "step": 9833 + }, + { + "epoch": 0.33, + "grad_norm": 0.7300108671188354, + "learning_rate": 1.5469294249678795e-05, + "loss": 2.1612, + "step": 9834 + }, + { + "epoch": 0.33, + "grad_norm": 0.7284534573554993, + "learning_rate": 1.546840438744087e-05, + "loss": 2.1442, + "step": 9835 + }, + { + "epoch": 0.33, + "grad_norm": 0.7208967208862305, + "learning_rate": 1.5467514463424115e-05, + "loss": 2.0659, + "step": 9836 + }, + { + "epoch": 0.33, + "grad_norm": 0.7511195540428162, + "learning_rate": 1.5466624477638587e-05, + "loss": 2.0932, + "step": 9837 + }, + { + "epoch": 0.33, + "grad_norm": 0.7420071959495544, + "learning_rate": 1.546573443009434e-05, + "loss": 2.0748, + "step": 9838 + }, + { + "epoch": 0.33, + "grad_norm": 0.7343674302101135, + "learning_rate": 1.5464844320801425e-05, + "loss": 2.1689, + "step": 9839 + }, + { + "epoch": 0.33, + "grad_norm": 0.7268196940422058, + "learning_rate": 1.5463954149769906e-05, + "loss": 2.1533, + "step": 9840 + }, + { + "epoch": 0.33, + "grad_norm": 0.7534284591674805, + "learning_rate": 1.5463063917009832e-05, + "loss": 2.2125, + "step": 9841 + }, + { + "epoch": 0.33, + "grad_norm": 0.7615901231765747, + "learning_rate": 1.5462173622531268e-05, + "loss": 2.172, + "step": 9842 + }, + { + "epoch": 0.33, + "grad_norm": 0.756576657295227, + "learning_rate": 1.5461283266344263e-05, + "loss": 2.1248, + "step": 9843 + }, + { + "epoch": 0.33, + "grad_norm": 0.7452014088630676, + "learning_rate": 1.5460392848458883e-05, + "loss": 2.1654, + "step": 9844 + }, + { + "epoch": 0.33, + "grad_norm": 0.7113962769508362, + "learning_rate": 1.5459502368885182e-05, + "loss": 2.0892, + "step": 9845 + }, + { + "epoch": 0.33, + "grad_norm": 0.7467913627624512, + "learning_rate": 1.5458611827633224e-05, + "loss": 2.2005, + "step": 9846 + }, + { + "epoch": 0.33, + "grad_norm": 0.714866042137146, + "learning_rate": 1.545772122471307e-05, + "loss": 2.1368, + "step": 9847 + }, + { + "epoch": 0.33, + "grad_norm": 0.7434927225112915, + "learning_rate": 1.545683056013478e-05, + "loss": 2.1496, + "step": 9848 + }, + { + "epoch": 0.33, + "grad_norm": 0.704870343208313, + "learning_rate": 1.5455939833908415e-05, + "loss": 2.0809, + "step": 9849 + }, + { + "epoch": 0.33, + "grad_norm": 0.7373092174530029, + "learning_rate": 1.5455049046044038e-05, + "loss": 2.1198, + "step": 9850 + }, + { + "epoch": 0.33, + "grad_norm": 0.7553548216819763, + "learning_rate": 1.5454158196551716e-05, + "loss": 2.0814, + "step": 9851 + }, + { + "epoch": 0.33, + "grad_norm": 0.7285803556442261, + "learning_rate": 1.545326728544151e-05, + "loss": 2.1672, + "step": 9852 + }, + { + "epoch": 0.33, + "grad_norm": 0.6988288164138794, + "learning_rate": 1.5452376312723486e-05, + "loss": 2.1192, + "step": 9853 + }, + { + "epoch": 0.33, + "grad_norm": 0.7022321224212646, + "learning_rate": 1.545148527840771e-05, + "loss": 2.0755, + "step": 9854 + }, + { + "epoch": 0.33, + "grad_norm": 0.7328547835350037, + "learning_rate": 1.5450594182504247e-05, + "loss": 2.1475, + "step": 9855 + }, + { + "epoch": 0.33, + "grad_norm": 0.7344414591789246, + "learning_rate": 1.544970302502317e-05, + "loss": 2.1254, + "step": 9856 + }, + { + "epoch": 0.33, + "grad_norm": 0.7214395403862, + "learning_rate": 1.5448811805974537e-05, + "loss": 2.1237, + "step": 9857 + }, + { + "epoch": 0.33, + "grad_norm": 0.7196457386016846, + "learning_rate": 1.5447920525368424e-05, + "loss": 2.1504, + "step": 9858 + }, + { + "epoch": 0.33, + "grad_norm": 0.7674043774604797, + "learning_rate": 1.5447029183214896e-05, + "loss": 2.1422, + "step": 9859 + }, + { + "epoch": 0.33, + "grad_norm": 0.7161842584609985, + "learning_rate": 1.5446137779524027e-05, + "loss": 2.1256, + "step": 9860 + }, + { + "epoch": 0.33, + "grad_norm": 0.7090122103691101, + "learning_rate": 1.5445246314305885e-05, + "loss": 2.0829, + "step": 9861 + }, + { + "epoch": 0.33, + "grad_norm": 0.7219876646995544, + "learning_rate": 1.544435478757054e-05, + "loss": 2.1002, + "step": 9862 + }, + { + "epoch": 0.33, + "grad_norm": 0.7130935788154602, + "learning_rate": 1.5443463199328066e-05, + "loss": 2.0865, + "step": 9863 + }, + { + "epoch": 0.33, + "grad_norm": 0.7210922837257385, + "learning_rate": 1.5442571549588533e-05, + "loss": 2.1697, + "step": 9864 + }, + { + "epoch": 0.33, + "grad_norm": 0.7407726049423218, + "learning_rate": 1.5441679838362017e-05, + "loss": 2.1573, + "step": 9865 + }, + { + "epoch": 0.33, + "grad_norm": 0.7217987179756165, + "learning_rate": 1.5440788065658593e-05, + "loss": 2.1135, + "step": 9866 + }, + { + "epoch": 0.33, + "grad_norm": 0.7374716997146606, + "learning_rate": 1.5439896231488335e-05, + "loss": 2.1197, + "step": 9867 + }, + { + "epoch": 0.33, + "grad_norm": 0.7338510751724243, + "learning_rate": 1.5439004335861313e-05, + "loss": 2.0922, + "step": 9868 + }, + { + "epoch": 0.33, + "grad_norm": 0.7532132863998413, + "learning_rate": 1.5438112378787615e-05, + "loss": 2.0739, + "step": 9869 + }, + { + "epoch": 0.33, + "grad_norm": 0.7063016891479492, + "learning_rate": 1.5437220360277302e-05, + "loss": 2.1581, + "step": 9870 + }, + { + "epoch": 0.33, + "grad_norm": 0.7058444023132324, + "learning_rate": 1.5436328280340465e-05, + "loss": 2.0959, + "step": 9871 + }, + { + "epoch": 0.33, + "grad_norm": 0.7520977854728699, + "learning_rate": 1.5435436138987173e-05, + "loss": 2.1477, + "step": 9872 + }, + { + "epoch": 0.33, + "grad_norm": 0.6891581416130066, + "learning_rate": 1.5434543936227516e-05, + "loss": 2.0731, + "step": 9873 + }, + { + "epoch": 0.33, + "grad_norm": 0.7480459809303284, + "learning_rate": 1.543365167207156e-05, + "loss": 2.1227, + "step": 9874 + }, + { + "epoch": 0.33, + "grad_norm": 0.789496898651123, + "learning_rate": 1.5432759346529395e-05, + "loss": 2.1305, + "step": 9875 + }, + { + "epoch": 0.33, + "grad_norm": 0.7157832384109497, + "learning_rate": 1.5431866959611098e-05, + "loss": 2.0395, + "step": 9876 + }, + { + "epoch": 0.33, + "grad_norm": 0.6972479224205017, + "learning_rate": 1.5430974511326747e-05, + "loss": 2.1008, + "step": 9877 + }, + { + "epoch": 0.33, + "grad_norm": 0.6888520121574402, + "learning_rate": 1.5430082001686436e-05, + "loss": 2.1229, + "step": 9878 + }, + { + "epoch": 0.33, + "grad_norm": 0.702393114566803, + "learning_rate": 1.542918943070024e-05, + "loss": 2.1191, + "step": 9879 + }, + { + "epoch": 0.33, + "grad_norm": 0.7181735634803772, + "learning_rate": 1.542829679837824e-05, + "loss": 2.0763, + "step": 9880 + }, + { + "epoch": 0.33, + "grad_norm": 0.7311133146286011, + "learning_rate": 1.5427404104730526e-05, + "loss": 2.1268, + "step": 9881 + }, + { + "epoch": 0.33, + "grad_norm": 0.7658363580703735, + "learning_rate": 1.542651134976718e-05, + "loss": 2.1161, + "step": 9882 + }, + { + "epoch": 0.33, + "grad_norm": 0.7795351147651672, + "learning_rate": 1.5425618533498294e-05, + "loss": 2.0841, + "step": 9883 + }, + { + "epoch": 0.33, + "grad_norm": 0.7189720869064331, + "learning_rate": 1.5424725655933942e-05, + "loss": 2.1377, + "step": 9884 + }, + { + "epoch": 0.33, + "grad_norm": 0.7353165745735168, + "learning_rate": 1.5423832717084223e-05, + "loss": 2.1416, + "step": 9885 + }, + { + "epoch": 0.33, + "grad_norm": 0.730982780456543, + "learning_rate": 1.542293971695922e-05, + "loss": 2.1549, + "step": 9886 + }, + { + "epoch": 0.33, + "grad_norm": 0.769044816493988, + "learning_rate": 1.5422046655569022e-05, + "loss": 2.1356, + "step": 9887 + }, + { + "epoch": 0.33, + "grad_norm": 0.7480217218399048, + "learning_rate": 1.5421153532923717e-05, + "loss": 2.1525, + "step": 9888 + }, + { + "epoch": 0.33, + "grad_norm": 0.7241945266723633, + "learning_rate": 1.54202603490334e-05, + "loss": 2.0659, + "step": 9889 + }, + { + "epoch": 0.33, + "grad_norm": 0.6988205313682556, + "learning_rate": 1.5419367103908157e-05, + "loss": 2.1106, + "step": 9890 + }, + { + "epoch": 0.33, + "grad_norm": 0.7037733197212219, + "learning_rate": 1.541847379755808e-05, + "loss": 2.1116, + "step": 9891 + }, + { + "epoch": 0.33, + "grad_norm": 0.7654491066932678, + "learning_rate": 1.541758042999326e-05, + "loss": 2.1954, + "step": 9892 + }, + { + "epoch": 0.33, + "grad_norm": 0.8013147711753845, + "learning_rate": 1.5416687001223792e-05, + "loss": 2.1497, + "step": 9893 + }, + { + "epoch": 0.33, + "grad_norm": 0.7342007756233215, + "learning_rate": 1.5415793511259773e-05, + "loss": 2.1371, + "step": 9894 + }, + { + "epoch": 0.33, + "grad_norm": 0.7167657613754272, + "learning_rate": 1.5414899960111288e-05, + "loss": 2.1428, + "step": 9895 + }, + { + "epoch": 0.33, + "grad_norm": 0.7281323075294495, + "learning_rate": 1.5414006347788436e-05, + "loss": 2.0863, + "step": 9896 + }, + { + "epoch": 0.33, + "grad_norm": 0.7209944128990173, + "learning_rate": 1.541311267430132e-05, + "loss": 2.135, + "step": 9897 + }, + { + "epoch": 0.33, + "grad_norm": 0.7091156244277954, + "learning_rate": 1.541221893966002e-05, + "loss": 2.2141, + "step": 9898 + }, + { + "epoch": 0.33, + "grad_norm": 0.7492038011550903, + "learning_rate": 1.5411325143874646e-05, + "loss": 2.0966, + "step": 9899 + }, + { + "epoch": 0.33, + "grad_norm": 0.7224010825157166, + "learning_rate": 1.5410431286955293e-05, + "loss": 2.1006, + "step": 9900 + }, + { + "epoch": 0.33, + "grad_norm": 0.7136358022689819, + "learning_rate": 1.540953736891206e-05, + "loss": 2.1388, + "step": 9901 + }, + { + "epoch": 0.33, + "grad_norm": 0.7419625520706177, + "learning_rate": 1.5408643389755043e-05, + "loss": 2.1652, + "step": 9902 + }, + { + "epoch": 0.33, + "grad_norm": 0.7292348742485046, + "learning_rate": 1.5407749349494338e-05, + "loss": 2.1878, + "step": 9903 + }, + { + "epoch": 0.33, + "grad_norm": 0.6981943249702454, + "learning_rate": 1.5406855248140057e-05, + "loss": 2.0896, + "step": 9904 + }, + { + "epoch": 0.33, + "grad_norm": 0.7167201042175293, + "learning_rate": 1.540596108570229e-05, + "loss": 2.1466, + "step": 9905 + }, + { + "epoch": 0.33, + "grad_norm": 0.7755454778671265, + "learning_rate": 1.5405066862191144e-05, + "loss": 2.1449, + "step": 9906 + }, + { + "epoch": 0.33, + "grad_norm": 0.7494082450866699, + "learning_rate": 1.540417257761672e-05, + "loss": 2.0588, + "step": 9907 + }, + { + "epoch": 0.33, + "grad_norm": 0.7106969952583313, + "learning_rate": 1.5403278231989123e-05, + "loss": 2.0879, + "step": 9908 + }, + { + "epoch": 0.33, + "grad_norm": 0.7271022200584412, + "learning_rate": 1.540238382531845e-05, + "loss": 2.0686, + "step": 9909 + }, + { + "epoch": 0.33, + "grad_norm": 0.7402494549751282, + "learning_rate": 1.5401489357614815e-05, + "loss": 2.0353, + "step": 9910 + }, + { + "epoch": 0.33, + "grad_norm": 0.7261745929718018, + "learning_rate": 1.540059482888832e-05, + "loss": 2.1017, + "step": 9911 + }, + { + "epoch": 0.33, + "grad_norm": 0.7259093523025513, + "learning_rate": 1.5399700239149067e-05, + "loss": 2.1564, + "step": 9912 + }, + { + "epoch": 0.33, + "grad_norm": 0.7282025218009949, + "learning_rate": 1.5398805588407167e-05, + "loss": 2.188, + "step": 9913 + }, + { + "epoch": 0.33, + "grad_norm": 0.7093106508255005, + "learning_rate": 1.5397910876672725e-05, + "loss": 2.0741, + "step": 9914 + }, + { + "epoch": 0.33, + "grad_norm": 0.7277908325195312, + "learning_rate": 1.5397016103955848e-05, + "loss": 2.0758, + "step": 9915 + }, + { + "epoch": 0.33, + "grad_norm": 0.7237734198570251, + "learning_rate": 1.539612127026665e-05, + "loss": 2.1128, + "step": 9916 + }, + { + "epoch": 0.33, + "grad_norm": 0.7731038331985474, + "learning_rate": 1.539522637561523e-05, + "loss": 2.1854, + "step": 9917 + }, + { + "epoch": 0.33, + "grad_norm": 0.6891342997550964, + "learning_rate": 1.5394331420011706e-05, + "loss": 2.0909, + "step": 9918 + }, + { + "epoch": 0.33, + "grad_norm": 0.7118052244186401, + "learning_rate": 1.539343640346619e-05, + "loss": 2.0269, + "step": 9919 + }, + { + "epoch": 0.33, + "grad_norm": 0.7081072926521301, + "learning_rate": 1.539254132598879e-05, + "loss": 2.1136, + "step": 9920 + }, + { + "epoch": 0.33, + "grad_norm": 0.7083466053009033, + "learning_rate": 1.5391646187589618e-05, + "loss": 2.1611, + "step": 9921 + }, + { + "epoch": 0.33, + "grad_norm": 0.7518587708473206, + "learning_rate": 1.539075098827879e-05, + "loss": 2.1501, + "step": 9922 + }, + { + "epoch": 0.33, + "grad_norm": 0.7448289394378662, + "learning_rate": 1.5389855728066408e-05, + "loss": 2.1538, + "step": 9923 + }, + { + "epoch": 0.33, + "grad_norm": 0.7354655265808105, + "learning_rate": 1.5388960406962602e-05, + "loss": 2.1285, + "step": 9924 + }, + { + "epoch": 0.33, + "grad_norm": 0.7223306894302368, + "learning_rate": 1.5388065024977477e-05, + "loss": 2.1777, + "step": 9925 + }, + { + "epoch": 0.33, + "grad_norm": 0.7092664837837219, + "learning_rate": 1.5387169582121153e-05, + "loss": 2.1183, + "step": 9926 + }, + { + "epoch": 0.33, + "grad_norm": 0.7027783989906311, + "learning_rate": 1.5386274078403742e-05, + "loss": 2.2018, + "step": 9927 + }, + { + "epoch": 0.33, + "grad_norm": 0.7217532992362976, + "learning_rate": 1.5385378513835366e-05, + "loss": 2.1276, + "step": 9928 + }, + { + "epoch": 0.33, + "grad_norm": 0.7172870635986328, + "learning_rate": 1.5384482888426135e-05, + "loss": 2.0906, + "step": 9929 + }, + { + "epoch": 0.33, + "grad_norm": 0.751054584980011, + "learning_rate": 1.5383587202186176e-05, + "loss": 2.0431, + "step": 9930 + }, + { + "epoch": 0.33, + "grad_norm": 0.7331206798553467, + "learning_rate": 1.53826914551256e-05, + "loss": 2.171, + "step": 9931 + }, + { + "epoch": 0.33, + "grad_norm": 0.7286101579666138, + "learning_rate": 1.5381795647254537e-05, + "loss": 2.1675, + "step": 9932 + }, + { + "epoch": 0.33, + "grad_norm": 0.7480999231338501, + "learning_rate": 1.5380899778583094e-05, + "loss": 2.0751, + "step": 9933 + }, + { + "epoch": 0.33, + "grad_norm": 0.7488501071929932, + "learning_rate": 1.5380003849121402e-05, + "loss": 2.1048, + "step": 9934 + }, + { + "epoch": 0.33, + "grad_norm": 0.7640261650085449, + "learning_rate": 1.537910785887958e-05, + "loss": 2.0773, + "step": 9935 + }, + { + "epoch": 0.33, + "grad_norm": 0.732803463935852, + "learning_rate": 1.537821180786775e-05, + "loss": 2.1771, + "step": 9936 + }, + { + "epoch": 0.33, + "grad_norm": 0.7260231971740723, + "learning_rate": 1.5377315696096034e-05, + "loss": 2.191, + "step": 9937 + }, + { + "epoch": 0.33, + "grad_norm": 0.7418771982192993, + "learning_rate": 1.5376419523574554e-05, + "loss": 2.1695, + "step": 9938 + }, + { + "epoch": 0.33, + "grad_norm": 0.7330632209777832, + "learning_rate": 1.5375523290313443e-05, + "loss": 2.1246, + "step": 9939 + }, + { + "epoch": 0.33, + "grad_norm": 0.7042512893676758, + "learning_rate": 1.5374626996322817e-05, + "loss": 2.1056, + "step": 9940 + }, + { + "epoch": 0.33, + "grad_norm": 0.7159243822097778, + "learning_rate": 1.5373730641612804e-05, + "loss": 2.1369, + "step": 9941 + }, + { + "epoch": 0.33, + "grad_norm": 0.705363392829895, + "learning_rate": 1.537283422619353e-05, + "loss": 2.1225, + "step": 9942 + }, + { + "epoch": 0.33, + "grad_norm": 0.8177266716957092, + "learning_rate": 1.5371937750075128e-05, + "loss": 2.0374, + "step": 9943 + }, + { + "epoch": 0.33, + "grad_norm": 0.7357922792434692, + "learning_rate": 1.5371041213267722e-05, + "loss": 2.1691, + "step": 9944 + }, + { + "epoch": 0.33, + "grad_norm": 0.7095556855201721, + "learning_rate": 1.5370144615781434e-05, + "loss": 2.1771, + "step": 9945 + }, + { + "epoch": 0.33, + "grad_norm": 0.7182725667953491, + "learning_rate": 1.5369247957626408e-05, + "loss": 2.0836, + "step": 9946 + }, + { + "epoch": 0.33, + "grad_norm": 0.7115985751152039, + "learning_rate": 1.536835123881276e-05, + "loss": 2.0627, + "step": 9947 + }, + { + "epoch": 0.33, + "grad_norm": 0.7677529454231262, + "learning_rate": 1.5367454459350625e-05, + "loss": 2.1948, + "step": 9948 + }, + { + "epoch": 0.33, + "grad_norm": 0.7491755485534668, + "learning_rate": 1.5366557619250137e-05, + "loss": 2.1202, + "step": 9949 + }, + { + "epoch": 0.33, + "grad_norm": 0.7151673436164856, + "learning_rate": 1.5365660718521425e-05, + "loss": 2.1107, + "step": 9950 + }, + { + "epoch": 0.33, + "grad_norm": 0.7090498805046082, + "learning_rate": 1.5364763757174625e-05, + "loss": 2.1193, + "step": 9951 + }, + { + "epoch": 0.33, + "grad_norm": 0.7554393410682678, + "learning_rate": 1.5363866735219866e-05, + "loss": 2.1116, + "step": 9952 + }, + { + "epoch": 0.33, + "grad_norm": 0.7136741280555725, + "learning_rate": 1.5362969652667286e-05, + "loss": 2.1138, + "step": 9953 + }, + { + "epoch": 0.33, + "grad_norm": 0.7398972511291504, + "learning_rate": 1.5362072509527015e-05, + "loss": 2.1595, + "step": 9954 + }, + { + "epoch": 0.33, + "grad_norm": 0.7205328941345215, + "learning_rate": 1.5361175305809194e-05, + "loss": 2.1002, + "step": 9955 + }, + { + "epoch": 0.33, + "grad_norm": 0.7061589360237122, + "learning_rate": 1.5360278041523953e-05, + "loss": 2.1428, + "step": 9956 + }, + { + "epoch": 0.33, + "grad_norm": 0.703381359577179, + "learning_rate": 1.5359380716681437e-05, + "loss": 2.111, + "step": 9957 + }, + { + "epoch": 0.33, + "grad_norm": 0.7750102281570435, + "learning_rate": 1.5358483331291776e-05, + "loss": 2.111, + "step": 9958 + }, + { + "epoch": 0.33, + "grad_norm": 0.7327236533164978, + "learning_rate": 1.535758588536511e-05, + "loss": 2.0984, + "step": 9959 + }, + { + "epoch": 0.33, + "grad_norm": 0.7597827911376953, + "learning_rate": 1.535668837891158e-05, + "loss": 2.1211, + "step": 9960 + }, + { + "epoch": 0.33, + "grad_norm": 0.7324711084365845, + "learning_rate": 1.535579081194132e-05, + "loss": 2.1065, + "step": 9961 + }, + { + "epoch": 0.33, + "grad_norm": 0.7440337538719177, + "learning_rate": 1.5354893184464482e-05, + "loss": 2.1782, + "step": 9962 + }, + { + "epoch": 0.33, + "grad_norm": 0.7318512201309204, + "learning_rate": 1.5353995496491193e-05, + "loss": 2.1907, + "step": 9963 + }, + { + "epoch": 0.33, + "grad_norm": 0.6927915215492249, + "learning_rate": 1.5353097748031603e-05, + "loss": 2.1267, + "step": 9964 + }, + { + "epoch": 0.33, + "grad_norm": 0.7288481593132019, + "learning_rate": 1.535219993909585e-05, + "loss": 2.0988, + "step": 9965 + }, + { + "epoch": 0.33, + "grad_norm": 0.7203844785690308, + "learning_rate": 1.535130206969408e-05, + "loss": 2.0986, + "step": 9966 + }, + { + "epoch": 0.33, + "grad_norm": 0.7276561260223389, + "learning_rate": 1.5350404139836434e-05, + "loss": 2.1159, + "step": 9967 + }, + { + "epoch": 0.33, + "grad_norm": 0.7588036060333252, + "learning_rate": 1.534950614953306e-05, + "loss": 2.1008, + "step": 9968 + }, + { + "epoch": 0.33, + "grad_norm": 0.7034308910369873, + "learning_rate": 1.5348608098794097e-05, + "loss": 2.0805, + "step": 9969 + }, + { + "epoch": 0.33, + "grad_norm": 0.7161398530006409, + "learning_rate": 1.53477099876297e-05, + "loss": 2.0332, + "step": 9970 + }, + { + "epoch": 0.33, + "grad_norm": 0.7189461588859558, + "learning_rate": 1.5346811816050004e-05, + "loss": 2.0768, + "step": 9971 + }, + { + "epoch": 0.33, + "grad_norm": 0.7164156436920166, + "learning_rate": 1.5345913584065166e-05, + "loss": 2.1495, + "step": 9972 + }, + { + "epoch": 0.33, + "grad_norm": 0.77699875831604, + "learning_rate": 1.5345015291685327e-05, + "loss": 2.2366, + "step": 9973 + }, + { + "epoch": 0.33, + "grad_norm": 0.7561855912208557, + "learning_rate": 1.5344116938920638e-05, + "loss": 2.1528, + "step": 9974 + }, + { + "epoch": 0.33, + "grad_norm": 0.7107703685760498, + "learning_rate": 1.5343218525781247e-05, + "loss": 2.1251, + "step": 9975 + }, + { + "epoch": 0.33, + "grad_norm": 0.7027930617332458, + "learning_rate": 1.5342320052277307e-05, + "loss": 2.0786, + "step": 9976 + }, + { + "epoch": 0.33, + "grad_norm": 0.6941022872924805, + "learning_rate": 1.5341421518418963e-05, + "loss": 2.0993, + "step": 9977 + }, + { + "epoch": 0.33, + "grad_norm": 0.7118350863456726, + "learning_rate": 1.534052292421637e-05, + "loss": 2.0717, + "step": 9978 + }, + { + "epoch": 0.33, + "grad_norm": 0.7233568429946899, + "learning_rate": 1.533962426967968e-05, + "loss": 2.1068, + "step": 9979 + }, + { + "epoch": 0.33, + "grad_norm": 0.7118363380432129, + "learning_rate": 1.5338725554819043e-05, + "loss": 2.0943, + "step": 9980 + }, + { + "epoch": 0.33, + "grad_norm": 0.7006356120109558, + "learning_rate": 1.5337826779644617e-05, + "loss": 2.0703, + "step": 9981 + }, + { + "epoch": 0.33, + "grad_norm": 0.7241905331611633, + "learning_rate": 1.5336927944166548e-05, + "loss": 2.162, + "step": 9982 + }, + { + "epoch": 0.33, + "grad_norm": 0.7173171043395996, + "learning_rate": 1.5336029048394997e-05, + "loss": 2.1389, + "step": 9983 + }, + { + "epoch": 0.33, + "grad_norm": 0.7141885757446289, + "learning_rate": 1.5335130092340117e-05, + "loss": 2.1684, + "step": 9984 + }, + { + "epoch": 0.33, + "grad_norm": 0.7098783254623413, + "learning_rate": 1.5334231076012064e-05, + "loss": 2.1473, + "step": 9985 + }, + { + "epoch": 0.33, + "grad_norm": 0.7285926342010498, + "learning_rate": 1.533333199942099e-05, + "loss": 2.0833, + "step": 9986 + }, + { + "epoch": 0.33, + "grad_norm": 0.7388181090354919, + "learning_rate": 1.5332432862577062e-05, + "loss": 2.1173, + "step": 9987 + }, + { + "epoch": 0.33, + "grad_norm": 0.7213814854621887, + "learning_rate": 1.533153366549043e-05, + "loss": 2.1227, + "step": 9988 + }, + { + "epoch": 0.33, + "grad_norm": 0.7296930551528931, + "learning_rate": 1.5330634408171257e-05, + "loss": 2.0856, + "step": 9989 + }, + { + "epoch": 0.33, + "grad_norm": 0.7041009068489075, + "learning_rate": 1.5329735090629702e-05, + "loss": 2.1039, + "step": 9990 + }, + { + "epoch": 0.33, + "grad_norm": 0.7245975732803345, + "learning_rate": 1.532883571287592e-05, + "loss": 2.1567, + "step": 9991 + }, + { + "epoch": 0.33, + "grad_norm": 0.7275087833404541, + "learning_rate": 1.5327936274920075e-05, + "loss": 2.1653, + "step": 9992 + }, + { + "epoch": 0.33, + "grad_norm": 0.7535735368728638, + "learning_rate": 1.532703677677233e-05, + "loss": 2.14, + "step": 9993 + }, + { + "epoch": 0.33, + "grad_norm": 0.722087562084198, + "learning_rate": 1.5326137218442845e-05, + "loss": 2.1575, + "step": 9994 + }, + { + "epoch": 0.33, + "grad_norm": 0.7065912485122681, + "learning_rate": 1.5325237599941786e-05, + "loss": 2.1354, + "step": 9995 + }, + { + "epoch": 0.33, + "grad_norm": 0.6966012120246887, + "learning_rate": 1.5324337921279308e-05, + "loss": 2.1188, + "step": 9996 + }, + { + "epoch": 0.33, + "grad_norm": 0.6988899111747742, + "learning_rate": 1.5323438182465585e-05, + "loss": 2.1296, + "step": 9997 + }, + { + "epoch": 0.33, + "grad_norm": 0.7006117701530457, + "learning_rate": 1.5322538383510774e-05, + "loss": 2.0668, + "step": 9998 + }, + { + "epoch": 0.33, + "grad_norm": 0.7583548426628113, + "learning_rate": 1.5321638524425047e-05, + "loss": 2.1825, + "step": 9999 + }, + { + "epoch": 0.33, + "grad_norm": 0.7304947972297668, + "learning_rate": 1.5320738605218564e-05, + "loss": 2.0628, + "step": 10000 + }, + { + "epoch": 0.33, + "grad_norm": 0.717784583568573, + "learning_rate": 1.5319838625901497e-05, + "loss": 2.0766, + "step": 10001 + }, + { + "epoch": 0.33, + "grad_norm": 0.7098894119262695, + "learning_rate": 1.5318938586484007e-05, + "loss": 2.1641, + "step": 10002 + }, + { + "epoch": 0.33, + "grad_norm": 0.7608881592750549, + "learning_rate": 1.531803848697627e-05, + "loss": 2.1629, + "step": 10003 + }, + { + "epoch": 0.33, + "grad_norm": 0.7275397777557373, + "learning_rate": 1.531713832738845e-05, + "loss": 2.1159, + "step": 10004 + }, + { + "epoch": 0.33, + "grad_norm": 0.7441413998603821, + "learning_rate": 1.5316238107730717e-05, + "loss": 2.1733, + "step": 10005 + }, + { + "epoch": 0.33, + "grad_norm": 0.7365608811378479, + "learning_rate": 1.5315337828013243e-05, + "loss": 2.136, + "step": 10006 + }, + { + "epoch": 0.33, + "grad_norm": 0.7113382816314697, + "learning_rate": 1.531443748824619e-05, + "loss": 2.1169, + "step": 10007 + }, + { + "epoch": 0.33, + "grad_norm": 0.7332295179367065, + "learning_rate": 1.5313537088439746e-05, + "loss": 2.1024, + "step": 10008 + }, + { + "epoch": 0.33, + "grad_norm": 0.7116252183914185, + "learning_rate": 1.531263662860407e-05, + "loss": 2.0703, + "step": 10009 + }, + { + "epoch": 0.33, + "grad_norm": 0.7264032959938049, + "learning_rate": 1.5311736108749337e-05, + "loss": 2.1368, + "step": 10010 + }, + { + "epoch": 0.33, + "grad_norm": 0.7282747030258179, + "learning_rate": 1.5310835528885727e-05, + "loss": 2.0475, + "step": 10011 + }, + { + "epoch": 0.33, + "grad_norm": 0.7228419780731201, + "learning_rate": 1.5309934889023406e-05, + "loss": 2.1583, + "step": 10012 + }, + { + "epoch": 0.33, + "grad_norm": 0.7247503399848938, + "learning_rate": 1.5309034189172556e-05, + "loss": 2.174, + "step": 10013 + }, + { + "epoch": 0.33, + "grad_norm": 0.7345393896102905, + "learning_rate": 1.5308133429343346e-05, + "loss": 2.0899, + "step": 10014 + }, + { + "epoch": 0.33, + "grad_norm": 0.7341177463531494, + "learning_rate": 1.5307232609545958e-05, + "loss": 2.1062, + "step": 10015 + }, + { + "epoch": 0.33, + "grad_norm": 0.7032195329666138, + "learning_rate": 1.530633172979056e-05, + "loss": 2.0295, + "step": 10016 + }, + { + "epoch": 0.33, + "grad_norm": 0.693708598613739, + "learning_rate": 1.5305430790087345e-05, + "loss": 2.0597, + "step": 10017 + }, + { + "epoch": 0.33, + "grad_norm": 0.7378243803977966, + "learning_rate": 1.5304529790446476e-05, + "loss": 2.1099, + "step": 10018 + }, + { + "epoch": 0.33, + "grad_norm": 0.7657470703125, + "learning_rate": 1.530362873087814e-05, + "loss": 2.14, + "step": 10019 + }, + { + "epoch": 0.33, + "grad_norm": 0.7148919701576233, + "learning_rate": 1.5302727611392517e-05, + "loss": 2.0718, + "step": 10020 + }, + { + "epoch": 0.33, + "grad_norm": 0.6995805501937866, + "learning_rate": 1.530182643199978e-05, + "loss": 2.1045, + "step": 10021 + }, + { + "epoch": 0.33, + "grad_norm": 0.7176634669303894, + "learning_rate": 1.530092519271012e-05, + "loss": 2.0682, + "step": 10022 + }, + { + "epoch": 0.33, + "grad_norm": 0.7703898549079895, + "learning_rate": 1.530002389353371e-05, + "loss": 2.1773, + "step": 10023 + }, + { + "epoch": 0.33, + "grad_norm": 0.7362306118011475, + "learning_rate": 1.5299122534480738e-05, + "loss": 2.287, + "step": 10024 + }, + { + "epoch": 0.33, + "grad_norm": 0.7134345173835754, + "learning_rate": 1.5298221115561385e-05, + "loss": 2.1785, + "step": 10025 + }, + { + "epoch": 0.33, + "grad_norm": 0.7660397291183472, + "learning_rate": 1.529731963678584e-05, + "loss": 2.1399, + "step": 10026 + }, + { + "epoch": 0.33, + "grad_norm": 0.7364994883537292, + "learning_rate": 1.5296418098164275e-05, + "loss": 2.2059, + "step": 10027 + }, + { + "epoch": 0.33, + "grad_norm": 0.7345845103263855, + "learning_rate": 1.5295516499706887e-05, + "loss": 2.1169, + "step": 10028 + }, + { + "epoch": 0.33, + "grad_norm": 0.7142937779426575, + "learning_rate": 1.5294614841423854e-05, + "loss": 2.0992, + "step": 10029 + }, + { + "epoch": 0.33, + "grad_norm": 0.6972905993461609, + "learning_rate": 1.5293713123325366e-05, + "loss": 2.0863, + "step": 10030 + }, + { + "epoch": 0.33, + "grad_norm": 0.7371915578842163, + "learning_rate": 1.529281134542161e-05, + "loss": 2.2024, + "step": 10031 + }, + { + "epoch": 0.33, + "grad_norm": 0.7531124353408813, + "learning_rate": 1.5291909507722773e-05, + "loss": 2.1074, + "step": 10032 + }, + { + "epoch": 0.33, + "grad_norm": 0.7074714303016663, + "learning_rate": 1.5291007610239045e-05, + "loss": 2.1263, + "step": 10033 + }, + { + "epoch": 0.33, + "grad_norm": 0.7162356376647949, + "learning_rate": 1.529010565298061e-05, + "loss": 2.1638, + "step": 10034 + }, + { + "epoch": 0.33, + "grad_norm": 0.7348289489746094, + "learning_rate": 1.5289203635957667e-05, + "loss": 2.0836, + "step": 10035 + }, + { + "epoch": 0.33, + "grad_norm": 0.7351411581039429, + "learning_rate": 1.5288301559180398e-05, + "loss": 2.1134, + "step": 10036 + }, + { + "epoch": 0.33, + "grad_norm": 0.7018687129020691, + "learning_rate": 1.5287399422659e-05, + "loss": 2.1286, + "step": 10037 + }, + { + "epoch": 0.33, + "grad_norm": 0.7249342799186707, + "learning_rate": 1.5286497226403655e-05, + "loss": 2.1089, + "step": 10038 + }, + { + "epoch": 0.33, + "grad_norm": 0.6925143003463745, + "learning_rate": 1.5285594970424572e-05, + "loss": 2.0753, + "step": 10039 + }, + { + "epoch": 0.33, + "grad_norm": 0.7001458406448364, + "learning_rate": 1.528469265473193e-05, + "loss": 2.112, + "step": 10040 + }, + { + "epoch": 0.33, + "grad_norm": 0.7512720227241516, + "learning_rate": 1.5283790279335925e-05, + "loss": 2.0943, + "step": 10041 + }, + { + "epoch": 0.33, + "grad_norm": 0.7175402045249939, + "learning_rate": 1.5282887844246758e-05, + "loss": 2.1508, + "step": 10042 + }, + { + "epoch": 0.33, + "grad_norm": 0.7568737268447876, + "learning_rate": 1.5281985349474616e-05, + "loss": 2.171, + "step": 10043 + }, + { + "epoch": 0.33, + "grad_norm": 0.7141623497009277, + "learning_rate": 1.5281082795029704e-05, + "loss": 2.1291, + "step": 10044 + }, + { + "epoch": 0.33, + "grad_norm": 0.7126940488815308, + "learning_rate": 1.528018018092221e-05, + "loss": 2.0879, + "step": 10045 + }, + { + "epoch": 0.33, + "grad_norm": 0.7380043864250183, + "learning_rate": 1.5279277507162337e-05, + "loss": 2.1142, + "step": 10046 + }, + { + "epoch": 0.33, + "grad_norm": 0.7129105925559998, + "learning_rate": 1.527837477376028e-05, + "loss": 2.1489, + "step": 10047 + }, + { + "epoch": 0.33, + "grad_norm": 0.7440321445465088, + "learning_rate": 1.527747198072624e-05, + "loss": 2.1598, + "step": 10048 + }, + { + "epoch": 0.33, + "grad_norm": 0.7014553546905518, + "learning_rate": 1.527656912807041e-05, + "loss": 2.1405, + "step": 10049 + }, + { + "epoch": 0.33, + "grad_norm": 0.7701312303543091, + "learning_rate": 1.5275666215803e-05, + "loss": 2.1116, + "step": 10050 + }, + { + "epoch": 0.33, + "grad_norm": 0.7119162678718567, + "learning_rate": 1.5274763243934203e-05, + "loss": 2.113, + "step": 10051 + }, + { + "epoch": 0.33, + "grad_norm": 0.7237569689750671, + "learning_rate": 1.527386021247422e-05, + "loss": 2.0938, + "step": 10052 + }, + { + "epoch": 0.33, + "grad_norm": 0.7306511998176575, + "learning_rate": 1.527295712143326e-05, + "loss": 2.0881, + "step": 10053 + }, + { + "epoch": 0.33, + "grad_norm": 0.7547991871833801, + "learning_rate": 1.527205397082151e-05, + "loss": 2.1578, + "step": 10054 + }, + { + "epoch": 0.33, + "grad_norm": 0.7612103223800659, + "learning_rate": 1.5271150760649197e-05, + "loss": 2.1034, + "step": 10055 + }, + { + "epoch": 0.33, + "grad_norm": 0.7557827830314636, + "learning_rate": 1.5270247490926503e-05, + "loss": 2.0853, + "step": 10056 + }, + { + "epoch": 0.33, + "grad_norm": 0.7160493731498718, + "learning_rate": 1.5269344161663644e-05, + "loss": 2.199, + "step": 10057 + }, + { + "epoch": 0.33, + "grad_norm": 0.7116184830665588, + "learning_rate": 1.5268440772870822e-05, + "loss": 2.1476, + "step": 10058 + }, + { + "epoch": 0.33, + "grad_norm": 0.7349880933761597, + "learning_rate": 1.5267537324558248e-05, + "loss": 2.1842, + "step": 10059 + }, + { + "epoch": 0.33, + "grad_norm": 0.7381742596626282, + "learning_rate": 1.526663381673612e-05, + "loss": 2.1727, + "step": 10060 + }, + { + "epoch": 0.33, + "grad_norm": 0.7480927109718323, + "learning_rate": 1.5265730249414652e-05, + "loss": 1.9984, + "step": 10061 + }, + { + "epoch": 0.33, + "grad_norm": 0.7254616618156433, + "learning_rate": 1.5264826622604047e-05, + "loss": 2.1766, + "step": 10062 + }, + { + "epoch": 0.33, + "grad_norm": 0.769919216632843, + "learning_rate": 1.526392293631452e-05, + "loss": 2.1294, + "step": 10063 + }, + { + "epoch": 0.33, + "grad_norm": 0.7038577198982239, + "learning_rate": 1.5263019190556275e-05, + "loss": 2.061, + "step": 10064 + }, + { + "epoch": 0.33, + "grad_norm": 0.7674484848976135, + "learning_rate": 1.526211538533952e-05, + "loss": 2.0606, + "step": 10065 + }, + { + "epoch": 0.33, + "grad_norm": 0.7276782989501953, + "learning_rate": 1.5261211520674475e-05, + "loss": 2.0537, + "step": 10066 + }, + { + "epoch": 0.33, + "grad_norm": 0.693509578704834, + "learning_rate": 1.5260307596571342e-05, + "loss": 2.0649, + "step": 10067 + }, + { + "epoch": 0.33, + "grad_norm": 0.7392958402633667, + "learning_rate": 1.525940361304034e-05, + "loss": 2.1051, + "step": 10068 + }, + { + "epoch": 0.33, + "grad_norm": 0.7127280235290527, + "learning_rate": 1.5258499570091673e-05, + "loss": 2.0899, + "step": 10069 + }, + { + "epoch": 0.34, + "grad_norm": 0.73350590467453, + "learning_rate": 1.5257595467735563e-05, + "loss": 2.1438, + "step": 10070 + }, + { + "epoch": 0.34, + "grad_norm": 0.6800039410591125, + "learning_rate": 1.525669130598222e-05, + "loss": 2.1154, + "step": 10071 + }, + { + "epoch": 0.34, + "grad_norm": 0.7113328576087952, + "learning_rate": 1.5255787084841863e-05, + "loss": 2.0811, + "step": 10072 + }, + { + "epoch": 0.34, + "grad_norm": 0.7271084189414978, + "learning_rate": 1.5254882804324698e-05, + "loss": 2.1298, + "step": 10073 + }, + { + "epoch": 0.34, + "grad_norm": 0.7235487699508667, + "learning_rate": 1.525397846444095e-05, + "loss": 2.1552, + "step": 10074 + }, + { + "epoch": 0.34, + "grad_norm": 0.7181670069694519, + "learning_rate": 1.5253074065200832e-05, + "loss": 2.1006, + "step": 10075 + }, + { + "epoch": 0.34, + "grad_norm": 0.7539389729499817, + "learning_rate": 1.525216960661456e-05, + "loss": 2.199, + "step": 10076 + }, + { + "epoch": 0.34, + "grad_norm": 0.715266227722168, + "learning_rate": 1.5251265088692356e-05, + "loss": 2.1355, + "step": 10077 + }, + { + "epoch": 0.34, + "grad_norm": 0.6826164722442627, + "learning_rate": 1.5250360511444436e-05, + "loss": 2.0438, + "step": 10078 + }, + { + "epoch": 0.34, + "grad_norm": 0.7137704491615295, + "learning_rate": 1.5249455874881021e-05, + "loss": 2.1742, + "step": 10079 + }, + { + "epoch": 0.34, + "grad_norm": 0.6959431171417236, + "learning_rate": 1.5248551179012327e-05, + "loss": 2.1222, + "step": 10080 + }, + { + "epoch": 0.34, + "grad_norm": 0.7458562850952148, + "learning_rate": 1.524764642384858e-05, + "loss": 2.1365, + "step": 10081 + }, + { + "epoch": 0.34, + "grad_norm": 0.7086814641952515, + "learning_rate": 1.5246741609399998e-05, + "loss": 2.1047, + "step": 10082 + }, + { + "epoch": 0.34, + "grad_norm": 0.7142043709754944, + "learning_rate": 1.5245836735676806e-05, + "loss": 2.0732, + "step": 10083 + }, + { + "epoch": 0.34, + "grad_norm": 0.7026371955871582, + "learning_rate": 1.5244931802689223e-05, + "loss": 2.12, + "step": 10084 + }, + { + "epoch": 0.34, + "grad_norm": 0.7161905169487, + "learning_rate": 1.5244026810447476e-05, + "loss": 2.151, + "step": 10085 + }, + { + "epoch": 0.34, + "grad_norm": 0.7201579213142395, + "learning_rate": 1.5243121758961787e-05, + "loss": 2.0986, + "step": 10086 + }, + { + "epoch": 0.34, + "grad_norm": 0.7146960496902466, + "learning_rate": 1.5242216648242378e-05, + "loss": 2.0986, + "step": 10087 + }, + { + "epoch": 0.34, + "grad_norm": 0.7470773458480835, + "learning_rate": 1.5241311478299482e-05, + "loss": 2.0586, + "step": 10088 + }, + { + "epoch": 0.34, + "grad_norm": 0.7542617917060852, + "learning_rate": 1.5240406249143319e-05, + "loss": 2.1768, + "step": 10089 + }, + { + "epoch": 0.34, + "grad_norm": 0.7179585099220276, + "learning_rate": 1.5239500960784118e-05, + "loss": 2.1645, + "step": 10090 + }, + { + "epoch": 0.34, + "grad_norm": 0.7630740404129028, + "learning_rate": 1.5238595613232106e-05, + "loss": 2.1987, + "step": 10091 + }, + { + "epoch": 0.34, + "grad_norm": 0.7415053844451904, + "learning_rate": 1.5237690206497509e-05, + "loss": 2.1573, + "step": 10092 + }, + { + "epoch": 0.34, + "grad_norm": 0.7079194188117981, + "learning_rate": 1.5236784740590558e-05, + "loss": 2.1964, + "step": 10093 + }, + { + "epoch": 0.34, + "grad_norm": 0.7079851031303406, + "learning_rate": 1.5235879215521486e-05, + "loss": 2.0738, + "step": 10094 + }, + { + "epoch": 0.34, + "grad_norm": 0.7330510020256042, + "learning_rate": 1.5234973631300512e-05, + "loss": 2.0947, + "step": 10095 + }, + { + "epoch": 0.34, + "grad_norm": 0.7014070153236389, + "learning_rate": 1.5234067987937878e-05, + "loss": 2.1612, + "step": 10096 + }, + { + "epoch": 0.34, + "grad_norm": 0.72809898853302, + "learning_rate": 1.5233162285443813e-05, + "loss": 2.096, + "step": 10097 + }, + { + "epoch": 0.34, + "grad_norm": 0.7351396679878235, + "learning_rate": 1.5232256523828542e-05, + "loss": 2.1379, + "step": 10098 + }, + { + "epoch": 0.34, + "grad_norm": 0.7281804084777832, + "learning_rate": 1.5231350703102308e-05, + "loss": 2.1177, + "step": 10099 + }, + { + "epoch": 0.34, + "grad_norm": 0.7390543818473816, + "learning_rate": 1.523044482327534e-05, + "loss": 2.1655, + "step": 10100 + }, + { + "epoch": 0.34, + "grad_norm": 0.7415804862976074, + "learning_rate": 1.5229538884357869e-05, + "loss": 2.1151, + "step": 10101 + }, + { + "epoch": 0.34, + "grad_norm": 0.7407842874526978, + "learning_rate": 1.522863288636013e-05, + "loss": 2.0566, + "step": 10102 + }, + { + "epoch": 0.34, + "grad_norm": 0.7362062335014343, + "learning_rate": 1.5227726829292367e-05, + "loss": 2.0746, + "step": 10103 + }, + { + "epoch": 0.34, + "grad_norm": 0.7202370762825012, + "learning_rate": 1.5226820713164807e-05, + "loss": 2.1043, + "step": 10104 + }, + { + "epoch": 0.34, + "grad_norm": 0.7315031886100769, + "learning_rate": 1.5225914537987692e-05, + "loss": 2.1437, + "step": 10105 + }, + { + "epoch": 0.34, + "grad_norm": 0.7333627343177795, + "learning_rate": 1.5225008303771254e-05, + "loss": 2.1346, + "step": 10106 + }, + { + "epoch": 0.34, + "grad_norm": 0.7167739272117615, + "learning_rate": 1.5224102010525737e-05, + "loss": 2.1236, + "step": 10107 + }, + { + "epoch": 0.34, + "grad_norm": 0.7488238215446472, + "learning_rate": 1.5223195658261375e-05, + "loss": 2.1451, + "step": 10108 + }, + { + "epoch": 0.34, + "grad_norm": 0.7118038535118103, + "learning_rate": 1.5222289246988409e-05, + "loss": 2.1629, + "step": 10109 + }, + { + "epoch": 0.34, + "grad_norm": 0.7536574006080627, + "learning_rate": 1.5221382776717084e-05, + "loss": 2.1372, + "step": 10110 + }, + { + "epoch": 0.34, + "grad_norm": 0.6996644139289856, + "learning_rate": 1.5220476247457632e-05, + "loss": 2.1374, + "step": 10111 + }, + { + "epoch": 0.34, + "grad_norm": 0.7120413780212402, + "learning_rate": 1.5219569659220299e-05, + "loss": 2.171, + "step": 10112 + }, + { + "epoch": 0.34, + "grad_norm": 0.7156165242195129, + "learning_rate": 1.521866301201533e-05, + "loss": 2.0741, + "step": 10113 + }, + { + "epoch": 0.34, + "grad_norm": 0.7156989574432373, + "learning_rate": 1.5217756305852962e-05, + "loss": 2.1327, + "step": 10114 + }, + { + "epoch": 0.34, + "grad_norm": 0.731597900390625, + "learning_rate": 1.5216849540743442e-05, + "loss": 2.2012, + "step": 10115 + }, + { + "epoch": 0.34, + "grad_norm": 0.7137221693992615, + "learning_rate": 1.5215942716697014e-05, + "loss": 2.1428, + "step": 10116 + }, + { + "epoch": 0.34, + "grad_norm": 0.7302429676055908, + "learning_rate": 1.5215035833723922e-05, + "loss": 2.1457, + "step": 10117 + }, + { + "epoch": 0.34, + "grad_norm": 0.7069265246391296, + "learning_rate": 1.521412889183441e-05, + "loss": 2.1095, + "step": 10118 + }, + { + "epoch": 0.34, + "grad_norm": 0.7193915843963623, + "learning_rate": 1.5213221891038727e-05, + "loss": 2.13, + "step": 10119 + }, + { + "epoch": 0.34, + "grad_norm": 0.775750994682312, + "learning_rate": 1.5212314831347117e-05, + "loss": 2.0978, + "step": 10120 + }, + { + "epoch": 0.34, + "grad_norm": 0.695470929145813, + "learning_rate": 1.5211407712769832e-05, + "loss": 2.1209, + "step": 10121 + }, + { + "epoch": 0.34, + "grad_norm": 0.7211986780166626, + "learning_rate": 1.5210500535317114e-05, + "loss": 2.0966, + "step": 10122 + }, + { + "epoch": 0.34, + "grad_norm": 0.7539097666740417, + "learning_rate": 1.5209593298999215e-05, + "loss": 2.1879, + "step": 10123 + }, + { + "epoch": 0.34, + "grad_norm": 0.7180412411689758, + "learning_rate": 1.5208686003826386e-05, + "loss": 2.1687, + "step": 10124 + }, + { + "epoch": 0.34, + "grad_norm": 0.7558460235595703, + "learning_rate": 1.5207778649808871e-05, + "loss": 2.0185, + "step": 10125 + }, + { + "epoch": 0.34, + "grad_norm": 0.7342061400413513, + "learning_rate": 1.5206871236956926e-05, + "loss": 2.1148, + "step": 10126 + }, + { + "epoch": 0.34, + "grad_norm": 0.7195091247558594, + "learning_rate": 1.5205963765280802e-05, + "loss": 2.1366, + "step": 10127 + }, + { + "epoch": 0.34, + "grad_norm": 0.736455500125885, + "learning_rate": 1.520505623479075e-05, + "loss": 2.1394, + "step": 10128 + }, + { + "epoch": 0.34, + "grad_norm": 0.7169816493988037, + "learning_rate": 1.5204148645497023e-05, + "loss": 2.1111, + "step": 10129 + }, + { + "epoch": 0.34, + "grad_norm": 0.719291090965271, + "learning_rate": 1.5203240997409881e-05, + "loss": 2.055, + "step": 10130 + }, + { + "epoch": 0.34, + "grad_norm": 0.7686059474945068, + "learning_rate": 1.5202333290539562e-05, + "loss": 2.1627, + "step": 10131 + }, + { + "epoch": 0.34, + "grad_norm": 0.6843876838684082, + "learning_rate": 1.5201425524896336e-05, + "loss": 2.0869, + "step": 10132 + }, + { + "epoch": 0.34, + "grad_norm": 0.6983750462532043, + "learning_rate": 1.5200517700490451e-05, + "loss": 2.1436, + "step": 10133 + }, + { + "epoch": 0.34, + "grad_norm": 0.7205191850662231, + "learning_rate": 1.5199609817332164e-05, + "loss": 2.1617, + "step": 10134 + }, + { + "epoch": 0.34, + "grad_norm": 0.7300103306770325, + "learning_rate": 1.5198701875431734e-05, + "loss": 2.1396, + "step": 10135 + }, + { + "epoch": 0.34, + "grad_norm": 0.7241202592849731, + "learning_rate": 1.5197793874799419e-05, + "loss": 2.1648, + "step": 10136 + }, + { + "epoch": 0.34, + "grad_norm": 0.7154372334480286, + "learning_rate": 1.519688581544547e-05, + "loss": 2.1443, + "step": 10137 + }, + { + "epoch": 0.34, + "grad_norm": 0.7359454035758972, + "learning_rate": 1.5195977697380152e-05, + "loss": 2.1352, + "step": 10138 + }, + { + "epoch": 0.34, + "grad_norm": 0.6980564594268799, + "learning_rate": 1.5195069520613724e-05, + "loss": 2.1059, + "step": 10139 + }, + { + "epoch": 0.34, + "grad_norm": 0.7291387915611267, + "learning_rate": 1.5194161285156446e-05, + "loss": 2.116, + "step": 10140 + }, + { + "epoch": 0.34, + "grad_norm": 0.7412888407707214, + "learning_rate": 1.5193252991018578e-05, + "loss": 2.0577, + "step": 10141 + }, + { + "epoch": 0.34, + "grad_norm": 0.712284505367279, + "learning_rate": 1.5192344638210382e-05, + "loss": 2.1597, + "step": 10142 + }, + { + "epoch": 0.34, + "grad_norm": 0.7140550017356873, + "learning_rate": 1.5191436226742118e-05, + "loss": 2.189, + "step": 10143 + }, + { + "epoch": 0.34, + "grad_norm": 0.7818500995635986, + "learning_rate": 1.5190527756624049e-05, + "loss": 2.1182, + "step": 10144 + }, + { + "epoch": 0.34, + "grad_norm": 0.7108513712882996, + "learning_rate": 1.5189619227866442e-05, + "loss": 2.1707, + "step": 10145 + }, + { + "epoch": 0.34, + "grad_norm": 0.7283231019973755, + "learning_rate": 1.5188710640479553e-05, + "loss": 2.0942, + "step": 10146 + }, + { + "epoch": 0.34, + "grad_norm": 0.7268214821815491, + "learning_rate": 1.5187801994473658e-05, + "loss": 2.1274, + "step": 10147 + }, + { + "epoch": 0.34, + "grad_norm": 0.7127975821495056, + "learning_rate": 1.5186893289859012e-05, + "loss": 2.1966, + "step": 10148 + }, + { + "epoch": 0.34, + "grad_norm": 0.7417986392974854, + "learning_rate": 1.5185984526645889e-05, + "loss": 2.0895, + "step": 10149 + }, + { + "epoch": 0.34, + "grad_norm": 0.7573621869087219, + "learning_rate": 1.518507570484455e-05, + "loss": 2.0942, + "step": 10150 + }, + { + "epoch": 0.34, + "grad_norm": 0.8116528987884521, + "learning_rate": 1.5184166824465265e-05, + "loss": 2.0429, + "step": 10151 + }, + { + "epoch": 0.34, + "grad_norm": 0.7381929755210876, + "learning_rate": 1.5183257885518304e-05, + "loss": 2.0743, + "step": 10152 + }, + { + "epoch": 0.34, + "grad_norm": 0.7514052987098694, + "learning_rate": 1.5182348888013928e-05, + "loss": 2.1545, + "step": 10153 + }, + { + "epoch": 0.34, + "grad_norm": 0.7120821475982666, + "learning_rate": 1.5181439831962417e-05, + "loss": 2.0973, + "step": 10154 + }, + { + "epoch": 0.34, + "grad_norm": 0.7056170701980591, + "learning_rate": 1.518053071737403e-05, + "loss": 2.1477, + "step": 10155 + }, + { + "epoch": 0.34, + "grad_norm": 0.7355800271034241, + "learning_rate": 1.5179621544259045e-05, + "loss": 2.1324, + "step": 10156 + }, + { + "epoch": 0.34, + "grad_norm": 0.7193593382835388, + "learning_rate": 1.5178712312627732e-05, + "loss": 2.0879, + "step": 10157 + }, + { + "epoch": 0.34, + "grad_norm": 0.7022790312767029, + "learning_rate": 1.517780302249036e-05, + "loss": 2.0873, + "step": 10158 + }, + { + "epoch": 0.34, + "grad_norm": 0.7303242087364197, + "learning_rate": 1.517689367385721e-05, + "loss": 2.1087, + "step": 10159 + }, + { + "epoch": 0.34, + "grad_norm": 0.7229496240615845, + "learning_rate": 1.5175984266738544e-05, + "loss": 2.0533, + "step": 10160 + }, + { + "epoch": 0.34, + "grad_norm": 0.7070238590240479, + "learning_rate": 1.5175074801144643e-05, + "loss": 2.0632, + "step": 10161 + }, + { + "epoch": 0.34, + "grad_norm": 0.7178279161453247, + "learning_rate": 1.5174165277085777e-05, + "loss": 2.1639, + "step": 10162 + }, + { + "epoch": 0.34, + "grad_norm": 0.7114453315734863, + "learning_rate": 1.5173255694572229e-05, + "loss": 2.1478, + "step": 10163 + }, + { + "epoch": 0.34, + "grad_norm": 0.7243159413337708, + "learning_rate": 1.5172346053614266e-05, + "loss": 2.1299, + "step": 10164 + }, + { + "epoch": 0.34, + "grad_norm": 0.7140703201293945, + "learning_rate": 1.517143635422217e-05, + "loss": 2.1942, + "step": 10165 + }, + { + "epoch": 0.34, + "grad_norm": 0.7631360292434692, + "learning_rate": 1.5170526596406214e-05, + "loss": 2.1439, + "step": 10166 + }, + { + "epoch": 0.34, + "grad_norm": 0.7161300778388977, + "learning_rate": 1.5169616780176686e-05, + "loss": 2.0916, + "step": 10167 + }, + { + "epoch": 0.34, + "grad_norm": 0.7268704175949097, + "learning_rate": 1.5168706905543853e-05, + "loss": 2.0673, + "step": 10168 + }, + { + "epoch": 0.34, + "grad_norm": 0.7197861075401306, + "learning_rate": 1.5167796972517997e-05, + "loss": 2.1099, + "step": 10169 + }, + { + "epoch": 0.34, + "grad_norm": 0.7375487685203552, + "learning_rate": 1.5166886981109402e-05, + "loss": 2.1715, + "step": 10170 + }, + { + "epoch": 0.34, + "grad_norm": 0.7384415864944458, + "learning_rate": 1.5165976931328343e-05, + "loss": 2.1104, + "step": 10171 + }, + { + "epoch": 0.34, + "grad_norm": 0.7145796418190002, + "learning_rate": 1.5165066823185106e-05, + "loss": 2.1192, + "step": 10172 + }, + { + "epoch": 0.34, + "grad_norm": 0.7070500254631042, + "learning_rate": 1.516415665668997e-05, + "loss": 2.2016, + "step": 10173 + }, + { + "epoch": 0.34, + "grad_norm": 0.7318699359893799, + "learning_rate": 1.5163246431853221e-05, + "loss": 2.0483, + "step": 10174 + }, + { + "epoch": 0.34, + "grad_norm": 0.7059455513954163, + "learning_rate": 1.5162336148685137e-05, + "loss": 2.0838, + "step": 10175 + }, + { + "epoch": 0.34, + "grad_norm": 0.7218620181083679, + "learning_rate": 1.5161425807196009e-05, + "loss": 2.127, + "step": 10176 + }, + { + "epoch": 0.34, + "grad_norm": 0.7254737615585327, + "learning_rate": 1.516051540739611e-05, + "loss": 2.1323, + "step": 10177 + }, + { + "epoch": 0.34, + "grad_norm": 0.7498472929000854, + "learning_rate": 1.5159604949295739e-05, + "loss": 2.1462, + "step": 10178 + }, + { + "epoch": 0.34, + "grad_norm": 0.7851585149765015, + "learning_rate": 1.5158694432905173e-05, + "loss": 2.1289, + "step": 10179 + }, + { + "epoch": 0.34, + "grad_norm": 0.7177717685699463, + "learning_rate": 1.5157783858234698e-05, + "loss": 2.1086, + "step": 10180 + }, + { + "epoch": 0.34, + "grad_norm": 0.7086904644966125, + "learning_rate": 1.5156873225294608e-05, + "loss": 2.1469, + "step": 10181 + }, + { + "epoch": 0.34, + "grad_norm": 0.7533708810806274, + "learning_rate": 1.5155962534095183e-05, + "loss": 2.1736, + "step": 10182 + }, + { + "epoch": 0.34, + "grad_norm": 0.7106229662895203, + "learning_rate": 1.5155051784646717e-05, + "loss": 2.1236, + "step": 10183 + }, + { + "epoch": 0.34, + "grad_norm": 0.7571940422058105, + "learning_rate": 1.5154140976959494e-05, + "loss": 2.1568, + "step": 10184 + }, + { + "epoch": 0.34, + "grad_norm": 0.7139760851860046, + "learning_rate": 1.515323011104381e-05, + "loss": 2.1698, + "step": 10185 + }, + { + "epoch": 0.34, + "grad_norm": 0.7233843207359314, + "learning_rate": 1.5152319186909952e-05, + "loss": 2.1466, + "step": 10186 + }, + { + "epoch": 0.34, + "grad_norm": 0.7144239544868469, + "learning_rate": 1.5151408204568212e-05, + "loss": 2.1446, + "step": 10187 + }, + { + "epoch": 0.34, + "grad_norm": 0.7265833616256714, + "learning_rate": 1.5150497164028877e-05, + "loss": 2.0779, + "step": 10188 + }, + { + "epoch": 0.34, + "grad_norm": 0.7402101159095764, + "learning_rate": 1.5149586065302249e-05, + "loss": 2.1751, + "step": 10189 + }, + { + "epoch": 0.34, + "grad_norm": 0.7012652158737183, + "learning_rate": 1.5148674908398616e-05, + "loss": 2.0595, + "step": 10190 + }, + { + "epoch": 0.34, + "grad_norm": 0.7403795123100281, + "learning_rate": 1.5147763693328263e-05, + "loss": 2.159, + "step": 10191 + }, + { + "epoch": 0.34, + "grad_norm": 0.7063884735107422, + "learning_rate": 1.51468524201015e-05, + "loss": 2.0006, + "step": 10192 + }, + { + "epoch": 0.34, + "grad_norm": 0.7251092195510864, + "learning_rate": 1.5145941088728613e-05, + "loss": 2.1587, + "step": 10193 + }, + { + "epoch": 0.34, + "grad_norm": 0.7288483381271362, + "learning_rate": 1.51450296992199e-05, + "loss": 2.0203, + "step": 10194 + }, + { + "epoch": 0.34, + "grad_norm": 0.7674341201782227, + "learning_rate": 1.5144118251585655e-05, + "loss": 2.1435, + "step": 10195 + }, + { + "epoch": 0.34, + "grad_norm": 0.7153727412223816, + "learning_rate": 1.5143206745836179e-05, + "loss": 2.0925, + "step": 10196 + }, + { + "epoch": 0.34, + "grad_norm": 0.7176985740661621, + "learning_rate": 1.5142295181981765e-05, + "loss": 2.1629, + "step": 10197 + }, + { + "epoch": 0.34, + "grad_norm": 0.732745349407196, + "learning_rate": 1.5141383560032717e-05, + "loss": 2.1163, + "step": 10198 + }, + { + "epoch": 0.34, + "grad_norm": 0.7387077212333679, + "learning_rate": 1.5140471879999328e-05, + "loss": 2.1483, + "step": 10199 + }, + { + "epoch": 0.34, + "grad_norm": 0.714963972568512, + "learning_rate": 1.5139560141891899e-05, + "loss": 2.0191, + "step": 10200 + }, + { + "epoch": 0.34, + "grad_norm": 0.7100968360900879, + "learning_rate": 1.5138648345720737e-05, + "loss": 2.0694, + "step": 10201 + }, + { + "epoch": 0.34, + "grad_norm": 0.7089889049530029, + "learning_rate": 1.5137736491496129e-05, + "loss": 2.0732, + "step": 10202 + }, + { + "epoch": 0.34, + "grad_norm": 0.7154504060745239, + "learning_rate": 1.5136824579228393e-05, + "loss": 2.1097, + "step": 10203 + }, + { + "epoch": 0.34, + "grad_norm": 0.7036659121513367, + "learning_rate": 1.5135912608927821e-05, + "loss": 2.1226, + "step": 10204 + }, + { + "epoch": 0.34, + "grad_norm": 0.7435508370399475, + "learning_rate": 1.5135000580604717e-05, + "loss": 2.0424, + "step": 10205 + }, + { + "epoch": 0.34, + "grad_norm": 0.7407211661338806, + "learning_rate": 1.5134088494269388e-05, + "loss": 2.1433, + "step": 10206 + }, + { + "epoch": 0.34, + "grad_norm": 0.7177254557609558, + "learning_rate": 1.5133176349932133e-05, + "loss": 2.0809, + "step": 10207 + }, + { + "epoch": 0.34, + "grad_norm": 0.7436890006065369, + "learning_rate": 1.5132264147603262e-05, + "loss": 2.0775, + "step": 10208 + }, + { + "epoch": 0.34, + "grad_norm": 0.7249823808670044, + "learning_rate": 1.513135188729308e-05, + "loss": 2.1762, + "step": 10209 + }, + { + "epoch": 0.34, + "grad_norm": 0.7352937459945679, + "learning_rate": 1.5130439569011887e-05, + "loss": 2.0668, + "step": 10210 + }, + { + "epoch": 0.34, + "grad_norm": 0.7332491278648376, + "learning_rate": 1.5129527192769998e-05, + "loss": 2.1216, + "step": 10211 + }, + { + "epoch": 0.34, + "grad_norm": 0.8026663661003113, + "learning_rate": 1.5128614758577716e-05, + "loss": 2.1369, + "step": 10212 + }, + { + "epoch": 0.34, + "grad_norm": 0.746737003326416, + "learning_rate": 1.512770226644535e-05, + "loss": 2.0801, + "step": 10213 + }, + { + "epoch": 0.34, + "grad_norm": 0.7212234139442444, + "learning_rate": 1.512678971638321e-05, + "loss": 2.0859, + "step": 10214 + }, + { + "epoch": 0.34, + "grad_norm": 0.7143864035606384, + "learning_rate": 1.5125877108401604e-05, + "loss": 2.0687, + "step": 10215 + }, + { + "epoch": 0.34, + "grad_norm": 0.7290762066841125, + "learning_rate": 1.5124964442510842e-05, + "loss": 2.1229, + "step": 10216 + }, + { + "epoch": 0.34, + "grad_norm": 0.7154545783996582, + "learning_rate": 1.5124051718721236e-05, + "loss": 2.0922, + "step": 10217 + }, + { + "epoch": 0.34, + "grad_norm": 0.6948208808898926, + "learning_rate": 1.5123138937043096e-05, + "loss": 2.068, + "step": 10218 + }, + { + "epoch": 0.34, + "grad_norm": 0.7190092206001282, + "learning_rate": 1.5122226097486735e-05, + "loss": 2.1497, + "step": 10219 + }, + { + "epoch": 0.34, + "grad_norm": 0.7374728918075562, + "learning_rate": 1.5121313200062466e-05, + "loss": 2.102, + "step": 10220 + }, + { + "epoch": 0.34, + "grad_norm": 0.7338109612464905, + "learning_rate": 1.5120400244780597e-05, + "loss": 2.1218, + "step": 10221 + }, + { + "epoch": 0.34, + "grad_norm": 0.719439685344696, + "learning_rate": 1.511948723165145e-05, + "loss": 2.1292, + "step": 10222 + }, + { + "epoch": 0.34, + "grad_norm": 0.7246389985084534, + "learning_rate": 1.511857416068534e-05, + "loss": 2.1318, + "step": 10223 + }, + { + "epoch": 0.34, + "grad_norm": 0.735849142074585, + "learning_rate": 1.5117661031892574e-05, + "loss": 2.077, + "step": 10224 + }, + { + "epoch": 0.34, + "grad_norm": 0.7003060579299927, + "learning_rate": 1.511674784528348e-05, + "loss": 2.0615, + "step": 10225 + }, + { + "epoch": 0.34, + "grad_norm": 0.7314756512641907, + "learning_rate": 1.5115834600868362e-05, + "loss": 2.1126, + "step": 10226 + }, + { + "epoch": 0.34, + "grad_norm": 0.7211246490478516, + "learning_rate": 1.5114921298657543e-05, + "loss": 2.123, + "step": 10227 + }, + { + "epoch": 0.34, + "grad_norm": 0.7200993299484253, + "learning_rate": 1.5114007938661342e-05, + "loss": 2.0411, + "step": 10228 + }, + { + "epoch": 0.34, + "grad_norm": 0.7090663909912109, + "learning_rate": 1.5113094520890075e-05, + "loss": 2.0869, + "step": 10229 + }, + { + "epoch": 0.34, + "grad_norm": 0.7302254438400269, + "learning_rate": 1.5112181045354065e-05, + "loss": 2.1559, + "step": 10230 + }, + { + "epoch": 0.34, + "grad_norm": 0.7411405444145203, + "learning_rate": 1.5111267512063626e-05, + "loss": 2.0763, + "step": 10231 + }, + { + "epoch": 0.34, + "grad_norm": 0.7253177762031555, + "learning_rate": 1.5110353921029087e-05, + "loss": 2.1202, + "step": 10232 + }, + { + "epoch": 0.34, + "grad_norm": 0.7203285098075867, + "learning_rate": 1.5109440272260761e-05, + "loss": 2.0994, + "step": 10233 + }, + { + "epoch": 0.34, + "grad_norm": 0.731164276599884, + "learning_rate": 1.5108526565768973e-05, + "loss": 2.1118, + "step": 10234 + }, + { + "epoch": 0.34, + "grad_norm": 0.7556511759757996, + "learning_rate": 1.5107612801564044e-05, + "loss": 2.1148, + "step": 10235 + }, + { + "epoch": 0.34, + "grad_norm": 0.7315865755081177, + "learning_rate": 1.5106698979656304e-05, + "loss": 2.1263, + "step": 10236 + }, + { + "epoch": 0.34, + "grad_norm": 0.730632483959198, + "learning_rate": 1.510578510005607e-05, + "loss": 2.0696, + "step": 10237 + }, + { + "epoch": 0.34, + "grad_norm": 0.7701296806335449, + "learning_rate": 1.5104871162773668e-05, + "loss": 2.0765, + "step": 10238 + }, + { + "epoch": 0.34, + "grad_norm": 0.6920897960662842, + "learning_rate": 1.5103957167819423e-05, + "loss": 2.0837, + "step": 10239 + }, + { + "epoch": 0.34, + "grad_norm": 0.7125385403633118, + "learning_rate": 1.5103043115203662e-05, + "loss": 2.1019, + "step": 10240 + }, + { + "epoch": 0.34, + "grad_norm": 0.7424910664558411, + "learning_rate": 1.510212900493671e-05, + "loss": 2.1277, + "step": 10241 + }, + { + "epoch": 0.34, + "grad_norm": 0.7422478795051575, + "learning_rate": 1.5101214837028892e-05, + "loss": 2.158, + "step": 10242 + }, + { + "epoch": 0.34, + "grad_norm": 0.7804239392280579, + "learning_rate": 1.5100300611490539e-05, + "loss": 2.0967, + "step": 10243 + }, + { + "epoch": 0.34, + "grad_norm": 0.7511236667633057, + "learning_rate": 1.5099386328331981e-05, + "loss": 2.0986, + "step": 10244 + }, + { + "epoch": 0.34, + "grad_norm": 0.7510626316070557, + "learning_rate": 1.5098471987563548e-05, + "loss": 2.048, + "step": 10245 + }, + { + "epoch": 0.34, + "grad_norm": 0.7645931839942932, + "learning_rate": 1.5097557589195559e-05, + "loss": 2.1135, + "step": 10246 + }, + { + "epoch": 0.34, + "grad_norm": 0.7372564673423767, + "learning_rate": 1.5096643133238359e-05, + "loss": 2.1448, + "step": 10247 + }, + { + "epoch": 0.34, + "grad_norm": 0.7620286345481873, + "learning_rate": 1.5095728619702267e-05, + "loss": 2.0992, + "step": 10248 + }, + { + "epoch": 0.34, + "grad_norm": 1.0034486055374146, + "learning_rate": 1.509481404859762e-05, + "loss": 2.021, + "step": 10249 + }, + { + "epoch": 0.34, + "grad_norm": 0.7202479243278503, + "learning_rate": 1.509389941993475e-05, + "loss": 2.1456, + "step": 10250 + }, + { + "epoch": 0.34, + "grad_norm": 0.7199007868766785, + "learning_rate": 1.509298473372399e-05, + "loss": 2.1081, + "step": 10251 + }, + { + "epoch": 0.34, + "grad_norm": 0.7580626606941223, + "learning_rate": 1.5092069989975675e-05, + "loss": 2.0434, + "step": 10252 + }, + { + "epoch": 0.34, + "grad_norm": 0.7029133439064026, + "learning_rate": 1.5091155188700136e-05, + "loss": 2.1337, + "step": 10253 + }, + { + "epoch": 0.34, + "grad_norm": 0.739310085773468, + "learning_rate": 1.5090240329907708e-05, + "loss": 2.2218, + "step": 10254 + }, + { + "epoch": 0.34, + "grad_norm": 0.7283275723457336, + "learning_rate": 1.5089325413608729e-05, + "loss": 2.1131, + "step": 10255 + }, + { + "epoch": 0.34, + "grad_norm": 0.7090397477149963, + "learning_rate": 1.5088410439813537e-05, + "loss": 2.1414, + "step": 10256 + }, + { + "epoch": 0.34, + "grad_norm": 0.7785203456878662, + "learning_rate": 1.5087495408532461e-05, + "loss": 2.1726, + "step": 10257 + }, + { + "epoch": 0.34, + "grad_norm": 0.7300161719322205, + "learning_rate": 1.5086580319775848e-05, + "loss": 2.0917, + "step": 10258 + }, + { + "epoch": 0.34, + "grad_norm": 0.7645954489707947, + "learning_rate": 1.5085665173554026e-05, + "loss": 2.1428, + "step": 10259 + }, + { + "epoch": 0.34, + "grad_norm": 0.7352378368377686, + "learning_rate": 1.5084749969877347e-05, + "loss": 2.0755, + "step": 10260 + }, + { + "epoch": 0.34, + "grad_norm": 0.7733477354049683, + "learning_rate": 1.5083834708756136e-05, + "loss": 2.0918, + "step": 10261 + }, + { + "epoch": 0.34, + "grad_norm": 0.7641263008117676, + "learning_rate": 1.5082919390200743e-05, + "loss": 2.1255, + "step": 10262 + }, + { + "epoch": 0.34, + "grad_norm": 0.7576525211334229, + "learning_rate": 1.5082004014221505e-05, + "loss": 2.1548, + "step": 10263 + }, + { + "epoch": 0.34, + "grad_norm": 0.7089620232582092, + "learning_rate": 1.5081088580828765e-05, + "loss": 2.0619, + "step": 10264 + }, + { + "epoch": 0.34, + "grad_norm": 0.7123631238937378, + "learning_rate": 1.5080173090032861e-05, + "loss": 2.1571, + "step": 10265 + }, + { + "epoch": 0.34, + "grad_norm": 0.7292981743812561, + "learning_rate": 1.5079257541844141e-05, + "loss": 2.1569, + "step": 10266 + }, + { + "epoch": 0.34, + "grad_norm": 0.6959831118583679, + "learning_rate": 1.5078341936272947e-05, + "loss": 2.1106, + "step": 10267 + }, + { + "epoch": 0.34, + "grad_norm": 0.7832688689231873, + "learning_rate": 1.5077426273329621e-05, + "loss": 2.1802, + "step": 10268 + }, + { + "epoch": 0.34, + "grad_norm": 0.7376165986061096, + "learning_rate": 1.5076510553024509e-05, + "loss": 2.0988, + "step": 10269 + }, + { + "epoch": 0.34, + "grad_norm": 0.7325858473777771, + "learning_rate": 1.5075594775367951e-05, + "loss": 2.08, + "step": 10270 + }, + { + "epoch": 0.34, + "grad_norm": 0.7741001844406128, + "learning_rate": 1.5074678940370304e-05, + "loss": 2.1681, + "step": 10271 + }, + { + "epoch": 0.34, + "grad_norm": 0.7445299625396729, + "learning_rate": 1.5073763048041908e-05, + "loss": 2.1172, + "step": 10272 + }, + { + "epoch": 0.34, + "grad_norm": 0.7177271246910095, + "learning_rate": 1.5072847098393107e-05, + "loss": 2.1041, + "step": 10273 + }, + { + "epoch": 0.34, + "grad_norm": 0.7253889441490173, + "learning_rate": 1.5071931091434256e-05, + "loss": 2.1462, + "step": 10274 + }, + { + "epoch": 0.34, + "grad_norm": 0.7408947348594666, + "learning_rate": 1.50710150271757e-05, + "loss": 2.1264, + "step": 10275 + }, + { + "epoch": 0.34, + "grad_norm": 0.7105762362480164, + "learning_rate": 1.5070098905627784e-05, + "loss": 2.1616, + "step": 10276 + }, + { + "epoch": 0.34, + "grad_norm": 0.7541555762290955, + "learning_rate": 1.5069182726800864e-05, + "loss": 2.0972, + "step": 10277 + }, + { + "epoch": 0.34, + "grad_norm": 0.709560751914978, + "learning_rate": 1.506826649070529e-05, + "loss": 2.1854, + "step": 10278 + }, + { + "epoch": 0.34, + "grad_norm": 0.7341075539588928, + "learning_rate": 1.5067350197351412e-05, + "loss": 2.112, + "step": 10279 + }, + { + "epoch": 0.34, + "grad_norm": 0.7066092491149902, + "learning_rate": 1.506643384674958e-05, + "loss": 2.113, + "step": 10280 + }, + { + "epoch": 0.34, + "grad_norm": 0.732146680355072, + "learning_rate": 1.5065517438910147e-05, + "loss": 2.0779, + "step": 10281 + }, + { + "epoch": 0.34, + "grad_norm": 0.7378636002540588, + "learning_rate": 1.5064600973843466e-05, + "loss": 2.1643, + "step": 10282 + }, + { + "epoch": 0.34, + "grad_norm": 0.6972429156303406, + "learning_rate": 1.5063684451559892e-05, + "loss": 2.1123, + "step": 10283 + }, + { + "epoch": 0.34, + "grad_norm": 0.7225801944732666, + "learning_rate": 1.5062767872069776e-05, + "loss": 2.1024, + "step": 10284 + }, + { + "epoch": 0.34, + "grad_norm": 0.7435677647590637, + "learning_rate": 1.5061851235383481e-05, + "loss": 2.1075, + "step": 10285 + }, + { + "epoch": 0.34, + "grad_norm": 0.7366815209388733, + "learning_rate": 1.5060934541511356e-05, + "loss": 2.1467, + "step": 10286 + }, + { + "epoch": 0.34, + "grad_norm": 0.7169926166534424, + "learning_rate": 1.5060017790463758e-05, + "loss": 2.1452, + "step": 10287 + }, + { + "epoch": 0.34, + "grad_norm": 0.7394550442695618, + "learning_rate": 1.5059100982251042e-05, + "loss": 2.0776, + "step": 10288 + }, + { + "epoch": 0.34, + "grad_norm": 0.7188639640808105, + "learning_rate": 1.505818411688357e-05, + "loss": 2.0668, + "step": 10289 + }, + { + "epoch": 0.34, + "grad_norm": 0.7361927032470703, + "learning_rate": 1.5057267194371698e-05, + "loss": 2.0387, + "step": 10290 + }, + { + "epoch": 0.34, + "grad_norm": 0.6998142004013062, + "learning_rate": 1.5056350214725788e-05, + "loss": 2.0848, + "step": 10291 + }, + { + "epoch": 0.34, + "grad_norm": 0.734306812286377, + "learning_rate": 1.5055433177956191e-05, + "loss": 2.1236, + "step": 10292 + }, + { + "epoch": 0.34, + "grad_norm": 0.7162696719169617, + "learning_rate": 1.5054516084073278e-05, + "loss": 2.1153, + "step": 10293 + }, + { + "epoch": 0.34, + "grad_norm": 0.7276742458343506, + "learning_rate": 1.5053598933087404e-05, + "loss": 2.1398, + "step": 10294 + }, + { + "epoch": 0.34, + "grad_norm": 0.7033008933067322, + "learning_rate": 1.5052681725008924e-05, + "loss": 2.0563, + "step": 10295 + }, + { + "epoch": 0.34, + "grad_norm": 0.7212196588516235, + "learning_rate": 1.5051764459848214e-05, + "loss": 2.1485, + "step": 10296 + }, + { + "epoch": 0.34, + "grad_norm": 0.7071161270141602, + "learning_rate": 1.5050847137615629e-05, + "loss": 2.145, + "step": 10297 + }, + { + "epoch": 0.34, + "grad_norm": 0.7241649031639099, + "learning_rate": 1.504992975832153e-05, + "loss": 2.122, + "step": 10298 + }, + { + "epoch": 0.34, + "grad_norm": 0.73282790184021, + "learning_rate": 1.5049012321976285e-05, + "loss": 2.1464, + "step": 10299 + }, + { + "epoch": 0.34, + "grad_norm": 0.6962893009185791, + "learning_rate": 1.5048094828590261e-05, + "loss": 2.0795, + "step": 10300 + }, + { + "epoch": 0.34, + "grad_norm": 0.7504282593727112, + "learning_rate": 1.5047177278173818e-05, + "loss": 2.1502, + "step": 10301 + }, + { + "epoch": 0.34, + "grad_norm": 0.7163746953010559, + "learning_rate": 1.5046259670737327e-05, + "loss": 2.0661, + "step": 10302 + }, + { + "epoch": 0.34, + "grad_norm": 0.7137919068336487, + "learning_rate": 1.5045342006291145e-05, + "loss": 2.0882, + "step": 10303 + }, + { + "epoch": 0.34, + "grad_norm": 0.7386662364006042, + "learning_rate": 1.5044424284845649e-05, + "loss": 2.0709, + "step": 10304 + }, + { + "epoch": 0.34, + "grad_norm": 0.7412835955619812, + "learning_rate": 1.5043506506411208e-05, + "loss": 2.0494, + "step": 10305 + }, + { + "epoch": 0.34, + "grad_norm": 0.7208580374717712, + "learning_rate": 1.5042588670998179e-05, + "loss": 2.1164, + "step": 10306 + }, + { + "epoch": 0.34, + "grad_norm": 0.7306938171386719, + "learning_rate": 1.5041670778616944e-05, + "loss": 2.1731, + "step": 10307 + }, + { + "epoch": 0.34, + "grad_norm": 0.7092154622077942, + "learning_rate": 1.5040752829277865e-05, + "loss": 2.0894, + "step": 10308 + }, + { + "epoch": 0.34, + "grad_norm": 0.7602466344833374, + "learning_rate": 1.5039834822991317e-05, + "loss": 2.1554, + "step": 10309 + }, + { + "epoch": 0.34, + "grad_norm": 0.7629290223121643, + "learning_rate": 1.5038916759767666e-05, + "loss": 2.1429, + "step": 10310 + }, + { + "epoch": 0.34, + "grad_norm": 0.7038528323173523, + "learning_rate": 1.5037998639617286e-05, + "loss": 2.0687, + "step": 10311 + }, + { + "epoch": 0.34, + "grad_norm": 0.7083216905593872, + "learning_rate": 1.5037080462550551e-05, + "loss": 2.1193, + "step": 10312 + }, + { + "epoch": 0.34, + "grad_norm": 0.7246485352516174, + "learning_rate": 1.5036162228577836e-05, + "loss": 2.1635, + "step": 10313 + }, + { + "epoch": 0.34, + "grad_norm": 0.7515565156936646, + "learning_rate": 1.5035243937709507e-05, + "loss": 2.0849, + "step": 10314 + }, + { + "epoch": 0.34, + "grad_norm": 0.7317960262298584, + "learning_rate": 1.5034325589955945e-05, + "loss": 2.1035, + "step": 10315 + }, + { + "epoch": 0.34, + "grad_norm": 0.6860764622688293, + "learning_rate": 1.5033407185327522e-05, + "loss": 2.1132, + "step": 10316 + }, + { + "epoch": 0.34, + "grad_norm": 0.7260316014289856, + "learning_rate": 1.5032488723834613e-05, + "loss": 2.1487, + "step": 10317 + }, + { + "epoch": 0.34, + "grad_norm": 0.7630167007446289, + "learning_rate": 1.50315702054876e-05, + "loss": 2.1187, + "step": 10318 + }, + { + "epoch": 0.34, + "grad_norm": 0.7175998687744141, + "learning_rate": 1.5030651630296853e-05, + "loss": 2.1117, + "step": 10319 + }, + { + "epoch": 0.34, + "grad_norm": 0.7112496495246887, + "learning_rate": 1.5029732998272754e-05, + "loss": 2.1334, + "step": 10320 + }, + { + "epoch": 0.34, + "grad_norm": 0.705259382724762, + "learning_rate": 1.5028814309425678e-05, + "loss": 2.1495, + "step": 10321 + }, + { + "epoch": 0.34, + "grad_norm": 0.7591829299926758, + "learning_rate": 1.5027895563766004e-05, + "loss": 2.0832, + "step": 10322 + }, + { + "epoch": 0.34, + "grad_norm": 0.7291409969329834, + "learning_rate": 1.5026976761304112e-05, + "loss": 2.1422, + "step": 10323 + }, + { + "epoch": 0.34, + "grad_norm": 0.715740978717804, + "learning_rate": 1.5026057902050389e-05, + "loss": 2.1182, + "step": 10324 + }, + { + "epoch": 0.34, + "grad_norm": 0.7589407563209534, + "learning_rate": 1.5025138986015203e-05, + "loss": 2.2218, + "step": 10325 + }, + { + "epoch": 0.34, + "grad_norm": 0.702508807182312, + "learning_rate": 1.5024220013208944e-05, + "loss": 2.1066, + "step": 10326 + }, + { + "epoch": 0.34, + "grad_norm": 0.7366430759429932, + "learning_rate": 1.5023300983641991e-05, + "loss": 2.1699, + "step": 10327 + }, + { + "epoch": 0.34, + "grad_norm": 0.714024543762207, + "learning_rate": 1.5022381897324726e-05, + "loss": 2.1521, + "step": 10328 + }, + { + "epoch": 0.34, + "grad_norm": 0.7490982413291931, + "learning_rate": 1.5021462754267536e-05, + "loss": 2.1693, + "step": 10329 + }, + { + "epoch": 0.34, + "grad_norm": 0.7185238003730774, + "learning_rate": 1.5020543554480802e-05, + "loss": 2.1678, + "step": 10330 + }, + { + "epoch": 0.34, + "grad_norm": 0.7415304780006409, + "learning_rate": 1.5019624297974908e-05, + "loss": 2.1903, + "step": 10331 + }, + { + "epoch": 0.34, + "grad_norm": 0.714748203754425, + "learning_rate": 1.5018704984760244e-05, + "loss": 2.1323, + "step": 10332 + }, + { + "epoch": 0.34, + "grad_norm": 0.7412862777709961, + "learning_rate": 1.5017785614847189e-05, + "loss": 2.1295, + "step": 10333 + }, + { + "epoch": 0.34, + "grad_norm": 0.729502260684967, + "learning_rate": 1.5016866188246132e-05, + "loss": 2.1134, + "step": 10334 + }, + { + "epoch": 0.34, + "grad_norm": 0.7605142593383789, + "learning_rate": 1.5015946704967465e-05, + "loss": 2.1107, + "step": 10335 + }, + { + "epoch": 0.34, + "grad_norm": 0.7379709482192993, + "learning_rate": 1.5015027165021568e-05, + "loss": 2.1928, + "step": 10336 + }, + { + "epoch": 0.34, + "grad_norm": 0.7751691341400146, + "learning_rate": 1.5014107568418834e-05, + "loss": 2.1, + "step": 10337 + }, + { + "epoch": 0.34, + "grad_norm": 0.7118521928787231, + "learning_rate": 1.5013187915169653e-05, + "loss": 2.084, + "step": 10338 + }, + { + "epoch": 0.34, + "grad_norm": 0.6936752796173096, + "learning_rate": 1.5012268205284412e-05, + "loss": 2.1154, + "step": 10339 + }, + { + "epoch": 0.34, + "grad_norm": 0.7451189756393433, + "learning_rate": 1.5011348438773503e-05, + "loss": 2.1776, + "step": 10340 + }, + { + "epoch": 0.34, + "grad_norm": 0.7534626722335815, + "learning_rate": 1.5010428615647317e-05, + "loss": 2.1622, + "step": 10341 + }, + { + "epoch": 0.34, + "grad_norm": 0.726686418056488, + "learning_rate": 1.5009508735916242e-05, + "loss": 2.1138, + "step": 10342 + }, + { + "epoch": 0.34, + "grad_norm": 0.7750912308692932, + "learning_rate": 1.5008588799590675e-05, + "loss": 2.0848, + "step": 10343 + }, + { + "epoch": 0.34, + "grad_norm": 0.7680246829986572, + "learning_rate": 1.5007668806681009e-05, + "loss": 2.0902, + "step": 10344 + }, + { + "epoch": 0.34, + "grad_norm": 0.7062492370605469, + "learning_rate": 1.5006748757197632e-05, + "loss": 2.0987, + "step": 10345 + }, + { + "epoch": 0.34, + "grad_norm": 0.7390596866607666, + "learning_rate": 1.5005828651150942e-05, + "loss": 2.053, + "step": 10346 + }, + { + "epoch": 0.34, + "grad_norm": 0.7019400000572205, + "learning_rate": 1.5004908488551336e-05, + "loss": 2.1106, + "step": 10347 + }, + { + "epoch": 0.34, + "grad_norm": 0.7123686671257019, + "learning_rate": 1.5003988269409205e-05, + "loss": 2.1459, + "step": 10348 + }, + { + "epoch": 0.34, + "grad_norm": 0.7197229862213135, + "learning_rate": 1.5003067993734947e-05, + "loss": 2.1655, + "step": 10349 + }, + { + "epoch": 0.34, + "grad_norm": 0.7065096497535706, + "learning_rate": 1.500214766153896e-05, + "loss": 2.1101, + "step": 10350 + }, + { + "epoch": 0.34, + "grad_norm": 0.6985183358192444, + "learning_rate": 1.5001227272831642e-05, + "loss": 2.1329, + "step": 10351 + }, + { + "epoch": 0.34, + "grad_norm": 0.7091954946517944, + "learning_rate": 1.5000306827623386e-05, + "loss": 2.0944, + "step": 10352 + }, + { + "epoch": 0.34, + "grad_norm": 0.7143648862838745, + "learning_rate": 1.4999386325924596e-05, + "loss": 2.0991, + "step": 10353 + }, + { + "epoch": 0.34, + "grad_norm": 0.7221906781196594, + "learning_rate": 1.4998465767745667e-05, + "loss": 2.125, + "step": 10354 + }, + { + "epoch": 0.34, + "grad_norm": 0.6930164098739624, + "learning_rate": 1.4997545153097004e-05, + "loss": 2.0814, + "step": 10355 + }, + { + "epoch": 0.34, + "grad_norm": 0.7225677967071533, + "learning_rate": 1.4996624481989001e-05, + "loss": 2.0677, + "step": 10356 + }, + { + "epoch": 0.34, + "grad_norm": 0.7153261303901672, + "learning_rate": 1.4995703754432067e-05, + "loss": 2.0675, + "step": 10357 + }, + { + "epoch": 0.34, + "grad_norm": 0.6866571307182312, + "learning_rate": 1.49947829704366e-05, + "loss": 2.0981, + "step": 10358 + }, + { + "epoch": 0.34, + "grad_norm": 0.7465630769729614, + "learning_rate": 1.4993862130012998e-05, + "loss": 2.1825, + "step": 10359 + }, + { + "epoch": 0.34, + "grad_norm": 0.7296932339668274, + "learning_rate": 1.4992941233171669e-05, + "loss": 2.1167, + "step": 10360 + }, + { + "epoch": 0.34, + "grad_norm": 0.7211138606071472, + "learning_rate": 1.4992020279923018e-05, + "loss": 2.0485, + "step": 10361 + }, + { + "epoch": 0.34, + "grad_norm": 0.7493512034416199, + "learning_rate": 1.499109927027745e-05, + "loss": 2.059, + "step": 10362 + }, + { + "epoch": 0.34, + "grad_norm": 0.7193006873130798, + "learning_rate": 1.4990178204245362e-05, + "loss": 2.165, + "step": 10363 + }, + { + "epoch": 0.34, + "grad_norm": 0.6909987926483154, + "learning_rate": 1.498925708183717e-05, + "loss": 2.0426, + "step": 10364 + }, + { + "epoch": 0.34, + "grad_norm": 0.7321874499320984, + "learning_rate": 1.4988335903063273e-05, + "loss": 2.1224, + "step": 10365 + }, + { + "epoch": 0.34, + "grad_norm": 0.7004453539848328, + "learning_rate": 1.498741466793408e-05, + "loss": 2.0681, + "step": 10366 + }, + { + "epoch": 0.34, + "grad_norm": 0.7139745354652405, + "learning_rate": 1.498649337646e-05, + "loss": 2.087, + "step": 10367 + }, + { + "epoch": 0.34, + "grad_norm": 0.7002428770065308, + "learning_rate": 1.4985572028651441e-05, + "loss": 2.0921, + "step": 10368 + }, + { + "epoch": 0.34, + "grad_norm": 0.7547236084938049, + "learning_rate": 1.4984650624518809e-05, + "loss": 2.1227, + "step": 10369 + }, + { + "epoch": 0.35, + "grad_norm": 0.7229976654052734, + "learning_rate": 1.4983729164072516e-05, + "loss": 2.1896, + "step": 10370 + }, + { + "epoch": 0.35, + "grad_norm": 0.7958829998970032, + "learning_rate": 1.4982807647322972e-05, + "loss": 2.1319, + "step": 10371 + }, + { + "epoch": 0.35, + "grad_norm": 0.754949152469635, + "learning_rate": 1.4981886074280588e-05, + "loss": 2.1197, + "step": 10372 + }, + { + "epoch": 0.35, + "grad_norm": 0.7111319899559021, + "learning_rate": 1.4980964444955776e-05, + "loss": 2.1018, + "step": 10373 + }, + { + "epoch": 0.35, + "grad_norm": 0.748814046382904, + "learning_rate": 1.4980042759358944e-05, + "loss": 2.1299, + "step": 10374 + }, + { + "epoch": 0.35, + "grad_norm": 0.7135460376739502, + "learning_rate": 1.4979121017500512e-05, + "loss": 2.1592, + "step": 10375 + }, + { + "epoch": 0.35, + "grad_norm": 0.731593668460846, + "learning_rate": 1.4978199219390884e-05, + "loss": 2.1546, + "step": 10376 + }, + { + "epoch": 0.35, + "grad_norm": 0.7230343818664551, + "learning_rate": 1.4977277365040477e-05, + "loss": 2.1055, + "step": 10377 + }, + { + "epoch": 0.35, + "grad_norm": 0.7991068363189697, + "learning_rate": 1.4976355454459712e-05, + "loss": 2.133, + "step": 10378 + }, + { + "epoch": 0.35, + "grad_norm": 0.7679474353790283, + "learning_rate": 1.4975433487658998e-05, + "loss": 2.0911, + "step": 10379 + }, + { + "epoch": 0.35, + "grad_norm": 0.7250180840492249, + "learning_rate": 1.4974511464648753e-05, + "loss": 2.1292, + "step": 10380 + }, + { + "epoch": 0.35, + "grad_norm": 0.736171543598175, + "learning_rate": 1.4973589385439393e-05, + "loss": 2.0646, + "step": 10381 + }, + { + "epoch": 0.35, + "grad_norm": 0.7305517792701721, + "learning_rate": 1.4972667250041332e-05, + "loss": 2.0894, + "step": 10382 + }, + { + "epoch": 0.35, + "grad_norm": 0.7441668510437012, + "learning_rate": 1.4971745058464993e-05, + "loss": 2.0548, + "step": 10383 + }, + { + "epoch": 0.35, + "grad_norm": 0.7640621066093445, + "learning_rate": 1.4970822810720793e-05, + "loss": 2.1489, + "step": 10384 + }, + { + "epoch": 0.35, + "grad_norm": 0.6901702880859375, + "learning_rate": 1.4969900506819149e-05, + "loss": 2.0988, + "step": 10385 + }, + { + "epoch": 0.35, + "grad_norm": 0.7494155764579773, + "learning_rate": 1.4968978146770483e-05, + "loss": 2.0859, + "step": 10386 + }, + { + "epoch": 0.35, + "grad_norm": 0.7437639832496643, + "learning_rate": 1.4968055730585214e-05, + "loss": 2.0612, + "step": 10387 + }, + { + "epoch": 0.35, + "grad_norm": 0.716599702835083, + "learning_rate": 1.4967133258273759e-05, + "loss": 2.1612, + "step": 10388 + }, + { + "epoch": 0.35, + "grad_norm": 0.7363559603691101, + "learning_rate": 1.4966210729846547e-05, + "loss": 2.115, + "step": 10389 + }, + { + "epoch": 0.35, + "grad_norm": 0.7077569961547852, + "learning_rate": 1.4965288145313994e-05, + "loss": 2.0629, + "step": 10390 + }, + { + "epoch": 0.35, + "grad_norm": 0.7086566090583801, + "learning_rate": 1.4964365504686529e-05, + "loss": 2.1083, + "step": 10391 + }, + { + "epoch": 0.35, + "grad_norm": 0.698201060295105, + "learning_rate": 1.496344280797457e-05, + "loss": 2.1248, + "step": 10392 + }, + { + "epoch": 0.35, + "grad_norm": 0.7233189940452576, + "learning_rate": 1.4962520055188543e-05, + "loss": 2.1615, + "step": 10393 + }, + { + "epoch": 0.35, + "grad_norm": 0.7760753035545349, + "learning_rate": 1.4961597246338873e-05, + "loss": 2.2683, + "step": 10394 + }, + { + "epoch": 0.35, + "grad_norm": 0.7282745242118835, + "learning_rate": 1.4960674381435986e-05, + "loss": 2.1014, + "step": 10395 + }, + { + "epoch": 0.35, + "grad_norm": 0.7469928860664368, + "learning_rate": 1.4959751460490303e-05, + "loss": 2.0985, + "step": 10396 + }, + { + "epoch": 0.35, + "grad_norm": 0.7423644661903381, + "learning_rate": 1.4958828483512263e-05, + "loss": 2.0822, + "step": 10397 + }, + { + "epoch": 0.35, + "grad_norm": 0.6997212171554565, + "learning_rate": 1.4957905450512278e-05, + "loss": 2.0564, + "step": 10398 + }, + { + "epoch": 0.35, + "grad_norm": 0.7488000988960266, + "learning_rate": 1.4956982361500782e-05, + "loss": 2.1356, + "step": 10399 + }, + { + "epoch": 0.35, + "grad_norm": 0.6968668103218079, + "learning_rate": 1.4956059216488208e-05, + "loss": 2.0392, + "step": 10400 + }, + { + "epoch": 0.35, + "grad_norm": 0.7572245597839355, + "learning_rate": 1.495513601548498e-05, + "loss": 2.1186, + "step": 10401 + }, + { + "epoch": 0.35, + "grad_norm": 0.7399706840515137, + "learning_rate": 1.4954212758501529e-05, + "loss": 2.2061, + "step": 10402 + }, + { + "epoch": 0.35, + "grad_norm": 0.7279837727546692, + "learning_rate": 1.495328944554829e-05, + "loss": 2.1012, + "step": 10403 + }, + { + "epoch": 0.35, + "grad_norm": 0.7321770787239075, + "learning_rate": 1.4952366076635685e-05, + "loss": 2.1344, + "step": 10404 + }, + { + "epoch": 0.35, + "grad_norm": 0.724321186542511, + "learning_rate": 1.495144265177415e-05, + "loss": 2.0893, + "step": 10405 + }, + { + "epoch": 0.35, + "grad_norm": 0.7022022008895874, + "learning_rate": 1.4950519170974123e-05, + "loss": 2.1125, + "step": 10406 + }, + { + "epoch": 0.35, + "grad_norm": 0.7245703339576721, + "learning_rate": 1.4949595634246024e-05, + "loss": 2.0896, + "step": 10407 + }, + { + "epoch": 0.35, + "grad_norm": 0.7087785601615906, + "learning_rate": 1.4948672041600298e-05, + "loss": 2.1412, + "step": 10408 + }, + { + "epoch": 0.35, + "grad_norm": 0.7188130617141724, + "learning_rate": 1.494774839304738e-05, + "loss": 2.0717, + "step": 10409 + }, + { + "epoch": 0.35, + "grad_norm": 0.7065389752388, + "learning_rate": 1.4946824688597694e-05, + "loss": 2.0767, + "step": 10410 + }, + { + "epoch": 0.35, + "grad_norm": 0.7327172756195068, + "learning_rate": 1.4945900928261684e-05, + "loss": 2.0997, + "step": 10411 + }, + { + "epoch": 0.35, + "grad_norm": 0.7502834796905518, + "learning_rate": 1.4944977112049786e-05, + "loss": 2.089, + "step": 10412 + }, + { + "epoch": 0.35, + "grad_norm": 0.7189309000968933, + "learning_rate": 1.4944053239972431e-05, + "loss": 2.0768, + "step": 10413 + }, + { + "epoch": 0.35, + "grad_norm": 0.7220255732536316, + "learning_rate": 1.4943129312040062e-05, + "loss": 2.1049, + "step": 10414 + }, + { + "epoch": 0.35, + "grad_norm": 0.7026408314704895, + "learning_rate": 1.4942205328263114e-05, + "loss": 2.1123, + "step": 10415 + }, + { + "epoch": 0.35, + "grad_norm": 0.7300592064857483, + "learning_rate": 1.4941281288652025e-05, + "loss": 2.1302, + "step": 10416 + }, + { + "epoch": 0.35, + "grad_norm": 0.7076727747917175, + "learning_rate": 1.494035719321724e-05, + "loss": 2.1603, + "step": 10417 + }, + { + "epoch": 0.35, + "grad_norm": 0.7582526206970215, + "learning_rate": 1.4939433041969192e-05, + "loss": 2.2236, + "step": 10418 + }, + { + "epoch": 0.35, + "grad_norm": 0.7354879975318909, + "learning_rate": 1.4938508834918324e-05, + "loss": 2.1324, + "step": 10419 + }, + { + "epoch": 0.35, + "grad_norm": 0.7018381953239441, + "learning_rate": 1.493758457207508e-05, + "loss": 2.0883, + "step": 10420 + }, + { + "epoch": 0.35, + "grad_norm": 0.7061810493469238, + "learning_rate": 1.4936660253449898e-05, + "loss": 2.1218, + "step": 10421 + }, + { + "epoch": 0.35, + "grad_norm": 0.7005699872970581, + "learning_rate": 1.4935735879053221e-05, + "loss": 2.0894, + "step": 10422 + }, + { + "epoch": 0.35, + "grad_norm": 0.7697465419769287, + "learning_rate": 1.4934811448895495e-05, + "loss": 2.1483, + "step": 10423 + }, + { + "epoch": 0.35, + "grad_norm": 0.7204517722129822, + "learning_rate": 1.4933886962987156e-05, + "loss": 2.1694, + "step": 10424 + }, + { + "epoch": 0.35, + "grad_norm": 0.722022294998169, + "learning_rate": 1.4932962421338659e-05, + "loss": 2.165, + "step": 10425 + }, + { + "epoch": 0.35, + "grad_norm": 0.7072915434837341, + "learning_rate": 1.493203782396044e-05, + "loss": 2.1159, + "step": 10426 + }, + { + "epoch": 0.35, + "grad_norm": 0.717031717300415, + "learning_rate": 1.493111317086295e-05, + "loss": 2.044, + "step": 10427 + }, + { + "epoch": 0.35, + "grad_norm": 0.7328306436538696, + "learning_rate": 1.4930188462056636e-05, + "loss": 2.0956, + "step": 10428 + }, + { + "epoch": 0.35, + "grad_norm": 0.7058218121528625, + "learning_rate": 1.4929263697551937e-05, + "loss": 2.134, + "step": 10429 + }, + { + "epoch": 0.35, + "grad_norm": 0.7317168116569519, + "learning_rate": 1.4928338877359308e-05, + "loss": 2.1048, + "step": 10430 + }, + { + "epoch": 0.35, + "grad_norm": 0.7269558906555176, + "learning_rate": 1.4927414001489196e-05, + "loss": 2.1082, + "step": 10431 + }, + { + "epoch": 0.35, + "grad_norm": 0.7535553574562073, + "learning_rate": 1.4926489069952043e-05, + "loss": 2.0856, + "step": 10432 + }, + { + "epoch": 0.35, + "grad_norm": 0.7451537251472473, + "learning_rate": 1.492556408275831e-05, + "loss": 2.1492, + "step": 10433 + }, + { + "epoch": 0.35, + "grad_norm": 0.714319109916687, + "learning_rate": 1.4924639039918436e-05, + "loss": 2.0898, + "step": 10434 + }, + { + "epoch": 0.35, + "grad_norm": 0.697678804397583, + "learning_rate": 1.4923713941442878e-05, + "loss": 2.1035, + "step": 10435 + }, + { + "epoch": 0.35, + "grad_norm": 0.7103835940361023, + "learning_rate": 1.4922788787342086e-05, + "loss": 2.0643, + "step": 10436 + }, + { + "epoch": 0.35, + "grad_norm": 0.7077265977859497, + "learning_rate": 1.4921863577626513e-05, + "loss": 2.1212, + "step": 10437 + }, + { + "epoch": 0.35, + "grad_norm": 0.7205677032470703, + "learning_rate": 1.4920938312306605e-05, + "loss": 2.1544, + "step": 10438 + }, + { + "epoch": 0.35, + "grad_norm": 0.7194802165031433, + "learning_rate": 1.4920012991392826e-05, + "loss": 2.0831, + "step": 10439 + }, + { + "epoch": 0.35, + "grad_norm": 0.717306911945343, + "learning_rate": 1.4919087614895618e-05, + "loss": 2.1347, + "step": 10440 + }, + { + "epoch": 0.35, + "grad_norm": 0.7321506142616272, + "learning_rate": 1.4918162182825441e-05, + "loss": 2.1455, + "step": 10441 + }, + { + "epoch": 0.35, + "grad_norm": 0.7375169992446899, + "learning_rate": 1.4917236695192756e-05, + "loss": 2.0854, + "step": 10442 + }, + { + "epoch": 0.35, + "grad_norm": 0.7321113348007202, + "learning_rate": 1.4916311152008007e-05, + "loss": 2.0741, + "step": 10443 + }, + { + "epoch": 0.35, + "grad_norm": 0.7229140400886536, + "learning_rate": 1.491538555328166e-05, + "loss": 2.1295, + "step": 10444 + }, + { + "epoch": 0.35, + "grad_norm": 0.6926165819168091, + "learning_rate": 1.4914459899024165e-05, + "loss": 2.0971, + "step": 10445 + }, + { + "epoch": 0.35, + "grad_norm": 0.7356226444244385, + "learning_rate": 1.4913534189245983e-05, + "loss": 2.0913, + "step": 10446 + }, + { + "epoch": 0.35, + "grad_norm": 0.6954305768013, + "learning_rate": 1.4912608423957572e-05, + "loss": 2.1465, + "step": 10447 + }, + { + "epoch": 0.35, + "grad_norm": 0.7343324422836304, + "learning_rate": 1.491168260316939e-05, + "loss": 2.0841, + "step": 10448 + }, + { + "epoch": 0.35, + "grad_norm": 0.7240501642227173, + "learning_rate": 1.4910756726891892e-05, + "loss": 2.0793, + "step": 10449 + }, + { + "epoch": 0.35, + "grad_norm": 0.7118881344795227, + "learning_rate": 1.4909830795135546e-05, + "loss": 2.0981, + "step": 10450 + }, + { + "epoch": 0.35, + "grad_norm": 0.7070509195327759, + "learning_rate": 1.4908904807910811e-05, + "loss": 2.0921, + "step": 10451 + }, + { + "epoch": 0.35, + "grad_norm": 0.7430326342582703, + "learning_rate": 1.4907978765228143e-05, + "loss": 2.1043, + "step": 10452 + }, + { + "epoch": 0.35, + "grad_norm": 0.7531911134719849, + "learning_rate": 1.490705266709801e-05, + "loss": 2.0278, + "step": 10453 + }, + { + "epoch": 0.35, + "grad_norm": 0.7078771591186523, + "learning_rate": 1.490612651353087e-05, + "loss": 2.0929, + "step": 10454 + }, + { + "epoch": 0.35, + "grad_norm": 0.7775567770004272, + "learning_rate": 1.490520030453719e-05, + "loss": 2.0864, + "step": 10455 + }, + { + "epoch": 0.35, + "grad_norm": 0.7243703007698059, + "learning_rate": 1.4904274040127428e-05, + "loss": 2.0745, + "step": 10456 + }, + { + "epoch": 0.35, + "grad_norm": 0.7435544729232788, + "learning_rate": 1.4903347720312055e-05, + "loss": 2.0954, + "step": 10457 + }, + { + "epoch": 0.35, + "grad_norm": 0.7295030355453491, + "learning_rate": 1.4902421345101533e-05, + "loss": 2.0956, + "step": 10458 + }, + { + "epoch": 0.35, + "grad_norm": 0.7206933498382568, + "learning_rate": 1.490149491450633e-05, + "loss": 2.1157, + "step": 10459 + }, + { + "epoch": 0.35, + "grad_norm": 0.7240864634513855, + "learning_rate": 1.4900568428536906e-05, + "loss": 2.1242, + "step": 10460 + }, + { + "epoch": 0.35, + "grad_norm": 0.7165495753288269, + "learning_rate": 1.4899641887203733e-05, + "loss": 2.128, + "step": 10461 + }, + { + "epoch": 0.35, + "grad_norm": 0.7247999906539917, + "learning_rate": 1.4898715290517276e-05, + "loss": 2.1085, + "step": 10462 + }, + { + "epoch": 0.35, + "grad_norm": 0.7359534502029419, + "learning_rate": 1.4897788638488007e-05, + "loss": 2.0735, + "step": 10463 + }, + { + "epoch": 0.35, + "grad_norm": 0.7117490768432617, + "learning_rate": 1.4896861931126391e-05, + "loss": 2.0801, + "step": 10464 + }, + { + "epoch": 0.35, + "grad_norm": 0.7139006853103638, + "learning_rate": 1.48959351684429e-05, + "loss": 2.1382, + "step": 10465 + }, + { + "epoch": 0.35, + "grad_norm": 0.7375780344009399, + "learning_rate": 1.4895008350448004e-05, + "loss": 2.158, + "step": 10466 + }, + { + "epoch": 0.35, + "grad_norm": 0.7131018042564392, + "learning_rate": 1.4894081477152167e-05, + "loss": 2.0598, + "step": 10467 + }, + { + "epoch": 0.35, + "grad_norm": 0.7269315123558044, + "learning_rate": 1.4893154548565871e-05, + "loss": 2.1019, + "step": 10468 + }, + { + "epoch": 0.35, + "grad_norm": 0.6978451609611511, + "learning_rate": 1.4892227564699581e-05, + "loss": 2.0587, + "step": 10469 + }, + { + "epoch": 0.35, + "grad_norm": 0.7096911668777466, + "learning_rate": 1.489130052556377e-05, + "loss": 2.13, + "step": 10470 + }, + { + "epoch": 0.35, + "grad_norm": 0.719523549079895, + "learning_rate": 1.4890373431168913e-05, + "loss": 2.0238, + "step": 10471 + }, + { + "epoch": 0.35, + "grad_norm": 0.6851069927215576, + "learning_rate": 1.4889446281525484e-05, + "loss": 2.0578, + "step": 10472 + }, + { + "epoch": 0.35, + "grad_norm": 0.7608821988105774, + "learning_rate": 1.4888519076643954e-05, + "loss": 2.1292, + "step": 10473 + }, + { + "epoch": 0.35, + "grad_norm": 0.7138228416442871, + "learning_rate": 1.4887591816534803e-05, + "loss": 2.0917, + "step": 10474 + }, + { + "epoch": 0.35, + "grad_norm": 0.7031155824661255, + "learning_rate": 1.4886664501208503e-05, + "loss": 2.1274, + "step": 10475 + }, + { + "epoch": 0.35, + "grad_norm": 0.6930181980133057, + "learning_rate": 1.488573713067553e-05, + "loss": 2.0519, + "step": 10476 + }, + { + "epoch": 0.35, + "grad_norm": 0.7102878093719482, + "learning_rate": 1.4884809704946365e-05, + "loss": 2.1132, + "step": 10477 + }, + { + "epoch": 0.35, + "grad_norm": 0.7405447959899902, + "learning_rate": 1.488388222403148e-05, + "loss": 2.1209, + "step": 10478 + }, + { + "epoch": 0.35, + "grad_norm": 0.7276825904846191, + "learning_rate": 1.488295468794136e-05, + "loss": 2.0838, + "step": 10479 + }, + { + "epoch": 0.35, + "grad_norm": 0.7264253497123718, + "learning_rate": 1.4882027096686476e-05, + "loss": 2.1321, + "step": 10480 + }, + { + "epoch": 0.35, + "grad_norm": 0.7306627035140991, + "learning_rate": 1.488109945027731e-05, + "loss": 2.1571, + "step": 10481 + }, + { + "epoch": 0.35, + "grad_norm": 0.7229866981506348, + "learning_rate": 1.4880171748724346e-05, + "loss": 2.0787, + "step": 10482 + }, + { + "epoch": 0.35, + "grad_norm": 0.7677871584892273, + "learning_rate": 1.487924399203806e-05, + "loss": 2.101, + "step": 10483 + }, + { + "epoch": 0.35, + "grad_norm": 0.7415304183959961, + "learning_rate": 1.4878316180228936e-05, + "loss": 2.1051, + "step": 10484 + }, + { + "epoch": 0.35, + "grad_norm": 0.7222515940666199, + "learning_rate": 1.4877388313307453e-05, + "loss": 2.0475, + "step": 10485 + }, + { + "epoch": 0.35, + "grad_norm": 0.7173749804496765, + "learning_rate": 1.4876460391284095e-05, + "loss": 2.1455, + "step": 10486 + }, + { + "epoch": 0.35, + "grad_norm": 0.7043889760971069, + "learning_rate": 1.4875532414169347e-05, + "loss": 2.1354, + "step": 10487 + }, + { + "epoch": 0.35, + "grad_norm": 0.7284137606620789, + "learning_rate": 1.4874604381973694e-05, + "loss": 2.0635, + "step": 10488 + }, + { + "epoch": 0.35, + "grad_norm": 0.7181965112686157, + "learning_rate": 1.4873676294707612e-05, + "loss": 2.0734, + "step": 10489 + }, + { + "epoch": 0.35, + "grad_norm": 0.7312502861022949, + "learning_rate": 1.4872748152381595e-05, + "loss": 2.1388, + "step": 10490 + }, + { + "epoch": 0.35, + "grad_norm": 0.7134490609169006, + "learning_rate": 1.4871819955006123e-05, + "loss": 2.0798, + "step": 10491 + }, + { + "epoch": 0.35, + "grad_norm": 0.728509247303009, + "learning_rate": 1.4870891702591684e-05, + "loss": 2.104, + "step": 10492 + }, + { + "epoch": 0.35, + "grad_norm": 0.7252039909362793, + "learning_rate": 1.4869963395148768e-05, + "loss": 2.1094, + "step": 10493 + }, + { + "epoch": 0.35, + "grad_norm": 0.7166321277618408, + "learning_rate": 1.4869035032687857e-05, + "loss": 2.1168, + "step": 10494 + }, + { + "epoch": 0.35, + "grad_norm": 0.7361246347427368, + "learning_rate": 1.4868106615219444e-05, + "loss": 2.1154, + "step": 10495 + }, + { + "epoch": 0.35, + "grad_norm": 0.7485746741294861, + "learning_rate": 1.4867178142754015e-05, + "loss": 2.0781, + "step": 10496 + }, + { + "epoch": 0.35, + "grad_norm": 0.7183189988136292, + "learning_rate": 1.4866249615302057e-05, + "loss": 2.1706, + "step": 10497 + }, + { + "epoch": 0.35, + "grad_norm": 0.7119051218032837, + "learning_rate": 1.4865321032874064e-05, + "loss": 2.166, + "step": 10498 + }, + { + "epoch": 0.35, + "grad_norm": 0.7493982315063477, + "learning_rate": 1.486439239548053e-05, + "loss": 2.0672, + "step": 10499 + }, + { + "epoch": 0.35, + "grad_norm": 0.7383939623832703, + "learning_rate": 1.4863463703131936e-05, + "loss": 2.1507, + "step": 10500 + }, + { + "epoch": 0.35, + "grad_norm": 0.7388848662376404, + "learning_rate": 1.4862534955838781e-05, + "loss": 2.1251, + "step": 10501 + }, + { + "epoch": 0.35, + "grad_norm": 0.7299036383628845, + "learning_rate": 1.4861606153611556e-05, + "loss": 2.1879, + "step": 10502 + }, + { + "epoch": 0.35, + "grad_norm": 0.7220633029937744, + "learning_rate": 1.4860677296460751e-05, + "loss": 2.1471, + "step": 10503 + }, + { + "epoch": 0.35, + "grad_norm": 0.7154396176338196, + "learning_rate": 1.4859748384396868e-05, + "loss": 2.1351, + "step": 10504 + }, + { + "epoch": 0.35, + "grad_norm": 0.7401567697525024, + "learning_rate": 1.4858819417430392e-05, + "loss": 2.0747, + "step": 10505 + }, + { + "epoch": 0.35, + "grad_norm": 0.7309392690658569, + "learning_rate": 1.4857890395571823e-05, + "loss": 2.1357, + "step": 10506 + }, + { + "epoch": 0.35, + "grad_norm": 0.7369203567504883, + "learning_rate": 1.4856961318831655e-05, + "loss": 2.1082, + "step": 10507 + }, + { + "epoch": 0.35, + "grad_norm": 0.7487432956695557, + "learning_rate": 1.4856032187220384e-05, + "loss": 2.0868, + "step": 10508 + }, + { + "epoch": 0.35, + "grad_norm": 0.7392724752426147, + "learning_rate": 1.4855103000748507e-05, + "loss": 2.0824, + "step": 10509 + }, + { + "epoch": 0.35, + "grad_norm": 0.6983421444892883, + "learning_rate": 1.4854173759426527e-05, + "loss": 2.1034, + "step": 10510 + }, + { + "epoch": 0.35, + "grad_norm": 0.7250156998634338, + "learning_rate": 1.4853244463264929e-05, + "loss": 2.184, + "step": 10511 + }, + { + "epoch": 0.35, + "grad_norm": 0.7094531655311584, + "learning_rate": 1.4852315112274225e-05, + "loss": 2.1054, + "step": 10512 + }, + { + "epoch": 0.35, + "grad_norm": 0.7636260390281677, + "learning_rate": 1.4851385706464904e-05, + "loss": 2.1546, + "step": 10513 + }, + { + "epoch": 0.35, + "grad_norm": 0.7198039889335632, + "learning_rate": 1.4850456245847472e-05, + "loss": 2.0479, + "step": 10514 + }, + { + "epoch": 0.35, + "grad_norm": 0.7443731427192688, + "learning_rate": 1.4849526730432428e-05, + "loss": 2.106, + "step": 10515 + }, + { + "epoch": 0.35, + "grad_norm": 0.7250052690505981, + "learning_rate": 1.4848597160230276e-05, + "loss": 2.1323, + "step": 10516 + }, + { + "epoch": 0.35, + "grad_norm": 0.7147414088249207, + "learning_rate": 1.484766753525151e-05, + "loss": 2.1115, + "step": 10517 + }, + { + "epoch": 0.35, + "grad_norm": 0.7056184411048889, + "learning_rate": 1.484673785550664e-05, + "loss": 2.1591, + "step": 10518 + }, + { + "epoch": 0.35, + "grad_norm": 0.7206652760505676, + "learning_rate": 1.4845808121006166e-05, + "loss": 2.0536, + "step": 10519 + }, + { + "epoch": 0.35, + "grad_norm": 0.7376263737678528, + "learning_rate": 1.4844878331760593e-05, + "loss": 2.1601, + "step": 10520 + }, + { + "epoch": 0.35, + "grad_norm": 0.7320221066474915, + "learning_rate": 1.4843948487780425e-05, + "loss": 2.0822, + "step": 10521 + }, + { + "epoch": 0.35, + "grad_norm": 0.7012993693351746, + "learning_rate": 1.484301858907616e-05, + "loss": 2.1, + "step": 10522 + }, + { + "epoch": 0.35, + "grad_norm": 0.7236185073852539, + "learning_rate": 1.4842088635658312e-05, + "loss": 2.0744, + "step": 10523 + }, + { + "epoch": 0.35, + "grad_norm": 0.7262057065963745, + "learning_rate": 1.484115862753739e-05, + "loss": 2.102, + "step": 10524 + }, + { + "epoch": 0.35, + "grad_norm": 0.7356593608856201, + "learning_rate": 1.4840228564723887e-05, + "loss": 2.1163, + "step": 10525 + }, + { + "epoch": 0.35, + "grad_norm": 0.7310892939567566, + "learning_rate": 1.483929844722832e-05, + "loss": 2.1092, + "step": 10526 + }, + { + "epoch": 0.35, + "grad_norm": 0.7332732081413269, + "learning_rate": 1.48383682750612e-05, + "loss": 2.1135, + "step": 10527 + }, + { + "epoch": 0.35, + "grad_norm": 0.7348324060440063, + "learning_rate": 1.4837438048233026e-05, + "loss": 2.098, + "step": 10528 + }, + { + "epoch": 0.35, + "grad_norm": 0.7708093523979187, + "learning_rate": 1.4836507766754314e-05, + "loss": 2.1329, + "step": 10529 + }, + { + "epoch": 0.35, + "grad_norm": 0.7241346836090088, + "learning_rate": 1.483557743063557e-05, + "loss": 2.1412, + "step": 10530 + }, + { + "epoch": 0.35, + "grad_norm": 0.7094256281852722, + "learning_rate": 1.4834647039887307e-05, + "loss": 2.1273, + "step": 10531 + }, + { + "epoch": 0.35, + "grad_norm": 0.7067854404449463, + "learning_rate": 1.4833716594520037e-05, + "loss": 2.0856, + "step": 10532 + }, + { + "epoch": 0.35, + "grad_norm": 0.7352094054222107, + "learning_rate": 1.4832786094544267e-05, + "loss": 2.1571, + "step": 10533 + }, + { + "epoch": 0.35, + "grad_norm": 0.7399184107780457, + "learning_rate": 1.4831855539970512e-05, + "loss": 2.0595, + "step": 10534 + }, + { + "epoch": 0.35, + "grad_norm": 0.7360782027244568, + "learning_rate": 1.4830924930809288e-05, + "loss": 2.1515, + "step": 10535 + }, + { + "epoch": 0.35, + "grad_norm": 0.7636145949363708, + "learning_rate": 1.4829994267071102e-05, + "loss": 2.0853, + "step": 10536 + }, + { + "epoch": 0.35, + "grad_norm": 0.7282500267028809, + "learning_rate": 1.4829063548766474e-05, + "loss": 2.1467, + "step": 10537 + }, + { + "epoch": 0.35, + "grad_norm": 0.7503781318664551, + "learning_rate": 1.4828132775905914e-05, + "loss": 2.1371, + "step": 10538 + }, + { + "epoch": 0.35, + "grad_norm": 0.7393903732299805, + "learning_rate": 1.482720194849994e-05, + "loss": 2.1011, + "step": 10539 + }, + { + "epoch": 0.35, + "grad_norm": 0.733180046081543, + "learning_rate": 1.4826271066559068e-05, + "loss": 2.1936, + "step": 10540 + }, + { + "epoch": 0.35, + "grad_norm": 0.7636940479278564, + "learning_rate": 1.4825340130093815e-05, + "loss": 2.1331, + "step": 10541 + }, + { + "epoch": 0.35, + "grad_norm": 0.7593944072723389, + "learning_rate": 1.4824409139114695e-05, + "loss": 2.1394, + "step": 10542 + }, + { + "epoch": 0.35, + "grad_norm": 0.7695443034172058, + "learning_rate": 1.4823478093632231e-05, + "loss": 2.1357, + "step": 10543 + }, + { + "epoch": 0.35, + "grad_norm": 0.7230416536331177, + "learning_rate": 1.4822546993656932e-05, + "loss": 2.1202, + "step": 10544 + }, + { + "epoch": 0.35, + "grad_norm": 0.722801923751831, + "learning_rate": 1.4821615839199329e-05, + "loss": 2.1665, + "step": 10545 + }, + { + "epoch": 0.35, + "grad_norm": 0.7306049466133118, + "learning_rate": 1.4820684630269936e-05, + "loss": 2.1299, + "step": 10546 + }, + { + "epoch": 0.35, + "grad_norm": 0.745830237865448, + "learning_rate": 1.4819753366879268e-05, + "loss": 2.1465, + "step": 10547 + }, + { + "epoch": 0.35, + "grad_norm": 0.7438483834266663, + "learning_rate": 1.4818822049037857e-05, + "loss": 2.1491, + "step": 10548 + }, + { + "epoch": 0.35, + "grad_norm": 0.7464824318885803, + "learning_rate": 1.4817890676756215e-05, + "loss": 2.0877, + "step": 10549 + }, + { + "epoch": 0.35, + "grad_norm": 0.7562946081161499, + "learning_rate": 1.4816959250044872e-05, + "loss": 2.136, + "step": 10550 + }, + { + "epoch": 0.35, + "grad_norm": 0.6988404989242554, + "learning_rate": 1.4816027768914343e-05, + "loss": 2.1708, + "step": 10551 + }, + { + "epoch": 0.35, + "grad_norm": 0.7500683665275574, + "learning_rate": 1.4815096233375155e-05, + "loss": 2.1226, + "step": 10552 + }, + { + "epoch": 0.35, + "grad_norm": 0.746777355670929, + "learning_rate": 1.4814164643437832e-05, + "loss": 2.0321, + "step": 10553 + }, + { + "epoch": 0.35, + "grad_norm": 0.715187132358551, + "learning_rate": 1.4813232999112898e-05, + "loss": 2.1128, + "step": 10554 + }, + { + "epoch": 0.35, + "grad_norm": 0.7032173871994019, + "learning_rate": 1.4812301300410878e-05, + "loss": 2.0912, + "step": 10555 + }, + { + "epoch": 0.35, + "grad_norm": 0.7155032157897949, + "learning_rate": 1.4811369547342298e-05, + "loss": 2.1007, + "step": 10556 + }, + { + "epoch": 0.35, + "grad_norm": 0.737968385219574, + "learning_rate": 1.4810437739917687e-05, + "loss": 2.0715, + "step": 10557 + }, + { + "epoch": 0.35, + "grad_norm": 0.7057023644447327, + "learning_rate": 1.4809505878147566e-05, + "loss": 2.0921, + "step": 10558 + }, + { + "epoch": 0.35, + "grad_norm": 0.7230681777000427, + "learning_rate": 1.4808573962042471e-05, + "loss": 2.0197, + "step": 10559 + }, + { + "epoch": 0.35, + "grad_norm": 0.7395954132080078, + "learning_rate": 1.480764199161292e-05, + "loss": 2.0976, + "step": 10560 + }, + { + "epoch": 0.35, + "grad_norm": 0.7191762328147888, + "learning_rate": 1.4806709966869452e-05, + "loss": 2.1201, + "step": 10561 + }, + { + "epoch": 0.35, + "grad_norm": 0.7204675674438477, + "learning_rate": 1.480577788782259e-05, + "loss": 2.1215, + "step": 10562 + }, + { + "epoch": 0.35, + "grad_norm": 0.7214027643203735, + "learning_rate": 1.4804845754482865e-05, + "loss": 2.124, + "step": 10563 + }, + { + "epoch": 0.35, + "grad_norm": 0.754606306552887, + "learning_rate": 1.4803913566860808e-05, + "loss": 2.0976, + "step": 10564 + }, + { + "epoch": 0.35, + "grad_norm": 0.7406073212623596, + "learning_rate": 1.4802981324966953e-05, + "loss": 2.1338, + "step": 10565 + }, + { + "epoch": 0.35, + "grad_norm": 0.7012860178947449, + "learning_rate": 1.480204902881183e-05, + "loss": 2.0892, + "step": 10566 + }, + { + "epoch": 0.35, + "grad_norm": 0.7526813745498657, + "learning_rate": 1.4801116678405969e-05, + "loss": 2.1355, + "step": 10567 + }, + { + "epoch": 0.35, + "grad_norm": 0.7804391980171204, + "learning_rate": 1.4800184273759907e-05, + "loss": 2.1392, + "step": 10568 + }, + { + "epoch": 0.35, + "grad_norm": 0.733037531375885, + "learning_rate": 1.4799251814884176e-05, + "loss": 2.217, + "step": 10569 + }, + { + "epoch": 0.35, + "grad_norm": 0.7024804353713989, + "learning_rate": 1.4798319301789312e-05, + "loss": 2.1368, + "step": 10570 + }, + { + "epoch": 0.35, + "grad_norm": 0.7046234607696533, + "learning_rate": 1.4797386734485845e-05, + "loss": 2.133, + "step": 10571 + }, + { + "epoch": 0.35, + "grad_norm": 0.7400580644607544, + "learning_rate": 1.479645411298432e-05, + "loss": 2.1856, + "step": 10572 + }, + { + "epoch": 0.35, + "grad_norm": 0.724421501159668, + "learning_rate": 1.4795521437295264e-05, + "loss": 2.171, + "step": 10573 + }, + { + "epoch": 0.35, + "grad_norm": 0.7011397480964661, + "learning_rate": 1.4794588707429217e-05, + "loss": 2.0649, + "step": 10574 + }, + { + "epoch": 0.35, + "grad_norm": 0.72963947057724, + "learning_rate": 1.4793655923396717e-05, + "loss": 2.1113, + "step": 10575 + }, + { + "epoch": 0.35, + "grad_norm": 0.7208470702171326, + "learning_rate": 1.47927230852083e-05, + "loss": 2.183, + "step": 10576 + }, + { + "epoch": 0.35, + "grad_norm": 0.743577778339386, + "learning_rate": 1.4791790192874509e-05, + "loss": 2.1312, + "step": 10577 + }, + { + "epoch": 0.35, + "grad_norm": 0.7263944149017334, + "learning_rate": 1.4790857246405879e-05, + "loss": 2.1037, + "step": 10578 + }, + { + "epoch": 0.35, + "grad_norm": 0.7544155716896057, + "learning_rate": 1.4789924245812952e-05, + "loss": 2.1417, + "step": 10579 + }, + { + "epoch": 0.35, + "grad_norm": 0.7245422005653381, + "learning_rate": 1.4788991191106268e-05, + "loss": 2.1138, + "step": 10580 + }, + { + "epoch": 0.35, + "grad_norm": 0.7459631562232971, + "learning_rate": 1.4788058082296371e-05, + "loss": 2.1463, + "step": 10581 + }, + { + "epoch": 0.35, + "grad_norm": 0.716444730758667, + "learning_rate": 1.4787124919393793e-05, + "loss": 2.1114, + "step": 10582 + }, + { + "epoch": 0.35, + "grad_norm": 0.7301832437515259, + "learning_rate": 1.4786191702409091e-05, + "loss": 2.0493, + "step": 10583 + }, + { + "epoch": 0.35, + "grad_norm": 0.7089590430259705, + "learning_rate": 1.4785258431352798e-05, + "loss": 2.1109, + "step": 10584 + }, + { + "epoch": 0.35, + "grad_norm": 0.7470178008079529, + "learning_rate": 1.4784325106235457e-05, + "loss": 2.1053, + "step": 10585 + }, + { + "epoch": 0.35, + "grad_norm": 0.7108622789382935, + "learning_rate": 1.4783391727067615e-05, + "loss": 2.026, + "step": 10586 + }, + { + "epoch": 0.35, + "grad_norm": 0.7535647749900818, + "learning_rate": 1.4782458293859819e-05, + "loss": 2.1831, + "step": 10587 + }, + { + "epoch": 0.35, + "grad_norm": 0.6880736947059631, + "learning_rate": 1.478152480662261e-05, + "loss": 2.1105, + "step": 10588 + }, + { + "epoch": 0.35, + "grad_norm": 0.7120999693870544, + "learning_rate": 1.4780591265366538e-05, + "loss": 2.1359, + "step": 10589 + }, + { + "epoch": 0.35, + "grad_norm": 0.7136363983154297, + "learning_rate": 1.4779657670102145e-05, + "loss": 2.126, + "step": 10590 + }, + { + "epoch": 0.35, + "grad_norm": 0.7094338536262512, + "learning_rate": 1.4778724020839982e-05, + "loss": 2.1197, + "step": 10591 + }, + { + "epoch": 0.35, + "grad_norm": 0.7233152389526367, + "learning_rate": 1.4777790317590596e-05, + "loss": 2.116, + "step": 10592 + }, + { + "epoch": 0.35, + "grad_norm": 0.7566646933555603, + "learning_rate": 1.4776856560364535e-05, + "loss": 2.0689, + "step": 10593 + }, + { + "epoch": 0.35, + "grad_norm": 0.7554349899291992, + "learning_rate": 1.477592274917235e-05, + "loss": 2.1131, + "step": 10594 + }, + { + "epoch": 0.35, + "grad_norm": 0.7407272458076477, + "learning_rate": 1.4774988884024586e-05, + "loss": 2.1729, + "step": 10595 + }, + { + "epoch": 0.35, + "grad_norm": 0.7396735548973083, + "learning_rate": 1.4774054964931794e-05, + "loss": 2.129, + "step": 10596 + }, + { + "epoch": 0.35, + "grad_norm": 0.7124592065811157, + "learning_rate": 1.4773120991904533e-05, + "loss": 2.1109, + "step": 10597 + }, + { + "epoch": 0.35, + "grad_norm": 0.7445746064186096, + "learning_rate": 1.4772186964953343e-05, + "loss": 2.0262, + "step": 10598 + }, + { + "epoch": 0.35, + "grad_norm": 0.7359894514083862, + "learning_rate": 1.4771252884088783e-05, + "loss": 2.1211, + "step": 10599 + }, + { + "epoch": 0.35, + "grad_norm": 0.7731152176856995, + "learning_rate": 1.4770318749321404e-05, + "loss": 2.0924, + "step": 10600 + }, + { + "epoch": 0.35, + "grad_norm": 0.6916806697845459, + "learning_rate": 1.4769384560661761e-05, + "loss": 2.1023, + "step": 10601 + }, + { + "epoch": 0.35, + "grad_norm": 0.7274885177612305, + "learning_rate": 1.4768450318120404e-05, + "loss": 2.1491, + "step": 10602 + }, + { + "epoch": 0.35, + "grad_norm": 0.7249237895011902, + "learning_rate": 1.4767516021707895e-05, + "loss": 2.1291, + "step": 10603 + }, + { + "epoch": 0.35, + "grad_norm": 0.7154503464698792, + "learning_rate": 1.4766581671434777e-05, + "loss": 2.1047, + "step": 10604 + }, + { + "epoch": 0.35, + "grad_norm": 0.7300462126731873, + "learning_rate": 1.4765647267311618e-05, + "loss": 2.1929, + "step": 10605 + }, + { + "epoch": 0.35, + "grad_norm": 0.7393168210983276, + "learning_rate": 1.4764712809348967e-05, + "loss": 2.1085, + "step": 10606 + }, + { + "epoch": 0.35, + "grad_norm": 0.7266584634780884, + "learning_rate": 1.4763778297557381e-05, + "loss": 2.1526, + "step": 10607 + }, + { + "epoch": 0.35, + "grad_norm": 0.7454675436019897, + "learning_rate": 1.4762843731947422e-05, + "loss": 2.0567, + "step": 10608 + }, + { + "epoch": 0.35, + "grad_norm": 0.7402105331420898, + "learning_rate": 1.4761909112529646e-05, + "loss": 2.0699, + "step": 10609 + }, + { + "epoch": 0.35, + "grad_norm": 0.7279614210128784, + "learning_rate": 1.4760974439314613e-05, + "loss": 2.1363, + "step": 10610 + }, + { + "epoch": 0.35, + "grad_norm": 0.719496488571167, + "learning_rate": 1.476003971231288e-05, + "loss": 2.1142, + "step": 10611 + }, + { + "epoch": 0.35, + "grad_norm": 0.7673267126083374, + "learning_rate": 1.4759104931535004e-05, + "loss": 2.0291, + "step": 10612 + }, + { + "epoch": 0.35, + "grad_norm": 0.7183021903038025, + "learning_rate": 1.4758170096991552e-05, + "loss": 2.1576, + "step": 10613 + }, + { + "epoch": 0.35, + "grad_norm": 0.7354138493537903, + "learning_rate": 1.4757235208693086e-05, + "loss": 2.132, + "step": 10614 + }, + { + "epoch": 0.35, + "grad_norm": 0.6772012710571289, + "learning_rate": 1.4756300266650161e-05, + "loss": 2.0775, + "step": 10615 + }, + { + "epoch": 0.35, + "grad_norm": 0.7167621850967407, + "learning_rate": 1.4755365270873348e-05, + "loss": 2.1945, + "step": 10616 + }, + { + "epoch": 0.35, + "grad_norm": 0.7289667725563049, + "learning_rate": 1.47544302213732e-05, + "loss": 2.1157, + "step": 10617 + }, + { + "epoch": 0.35, + "grad_norm": 0.7327138781547546, + "learning_rate": 1.4753495118160283e-05, + "loss": 2.0014, + "step": 10618 + }, + { + "epoch": 0.35, + "grad_norm": 0.726081371307373, + "learning_rate": 1.4752559961245172e-05, + "loss": 2.136, + "step": 10619 + }, + { + "epoch": 0.35, + "grad_norm": 0.6932653188705444, + "learning_rate": 1.4751624750638418e-05, + "loss": 2.1021, + "step": 10620 + }, + { + "epoch": 0.35, + "grad_norm": 0.738296389579773, + "learning_rate": 1.4750689486350595e-05, + "loss": 2.1061, + "step": 10621 + }, + { + "epoch": 0.35, + "grad_norm": 0.6972613334655762, + "learning_rate": 1.4749754168392266e-05, + "loss": 2.1224, + "step": 10622 + }, + { + "epoch": 0.35, + "grad_norm": 0.7374845743179321, + "learning_rate": 1.4748818796773998e-05, + "loss": 2.0677, + "step": 10623 + }, + { + "epoch": 0.35, + "grad_norm": 0.7273114323616028, + "learning_rate": 1.4747883371506357e-05, + "loss": 2.1475, + "step": 10624 + }, + { + "epoch": 0.35, + "grad_norm": 0.7235073447227478, + "learning_rate": 1.4746947892599914e-05, + "loss": 2.0495, + "step": 10625 + }, + { + "epoch": 0.35, + "grad_norm": 0.6947516798973083, + "learning_rate": 1.4746012360065234e-05, + "loss": 2.1637, + "step": 10626 + }, + { + "epoch": 0.35, + "grad_norm": 0.7210212349891663, + "learning_rate": 1.4745076773912887e-05, + "loss": 2.1589, + "step": 10627 + }, + { + "epoch": 0.35, + "grad_norm": 0.741195559501648, + "learning_rate": 1.4744141134153446e-05, + "loss": 2.0898, + "step": 10628 + }, + { + "epoch": 0.35, + "grad_norm": 0.7275981903076172, + "learning_rate": 1.4743205440797477e-05, + "loss": 2.141, + "step": 10629 + }, + { + "epoch": 0.35, + "grad_norm": 0.733950138092041, + "learning_rate": 1.4742269693855558e-05, + "loss": 2.0787, + "step": 10630 + }, + { + "epoch": 0.35, + "grad_norm": 0.7199733257293701, + "learning_rate": 1.474133389333825e-05, + "loss": 2.1381, + "step": 10631 + }, + { + "epoch": 0.35, + "grad_norm": 0.7123814821243286, + "learning_rate": 1.474039803925613e-05, + "loss": 2.1302, + "step": 10632 + }, + { + "epoch": 0.35, + "grad_norm": 0.7444413304328918, + "learning_rate": 1.4739462131619773e-05, + "loss": 2.1569, + "step": 10633 + }, + { + "epoch": 0.35, + "grad_norm": 0.7282716631889343, + "learning_rate": 1.4738526170439748e-05, + "loss": 2.0456, + "step": 10634 + }, + { + "epoch": 0.35, + "grad_norm": 0.7091349959373474, + "learning_rate": 1.4737590155726633e-05, + "loss": 2.1012, + "step": 10635 + }, + { + "epoch": 0.35, + "grad_norm": 0.7262563705444336, + "learning_rate": 1.4736654087491005e-05, + "loss": 2.1402, + "step": 10636 + }, + { + "epoch": 0.35, + "grad_norm": 0.7245771884918213, + "learning_rate": 1.4735717965743427e-05, + "loss": 2.1075, + "step": 10637 + }, + { + "epoch": 0.35, + "grad_norm": 0.7241370677947998, + "learning_rate": 1.4734781790494489e-05, + "loss": 2.1261, + "step": 10638 + }, + { + "epoch": 0.35, + "grad_norm": 0.7440412044525146, + "learning_rate": 1.473384556175476e-05, + "loss": 2.0871, + "step": 10639 + }, + { + "epoch": 0.35, + "grad_norm": 0.70985347032547, + "learning_rate": 1.4732909279534815e-05, + "loss": 2.0531, + "step": 10640 + }, + { + "epoch": 0.35, + "grad_norm": 0.727592408657074, + "learning_rate": 1.473197294384524e-05, + "loss": 2.1026, + "step": 10641 + }, + { + "epoch": 0.35, + "grad_norm": 0.7280829548835754, + "learning_rate": 1.47310365546966e-05, + "loss": 2.153, + "step": 10642 + }, + { + "epoch": 0.35, + "grad_norm": 0.7090317606925964, + "learning_rate": 1.4730100112099491e-05, + "loss": 2.0744, + "step": 10643 + }, + { + "epoch": 0.35, + "grad_norm": 0.7340937256813049, + "learning_rate": 1.4729163616064478e-05, + "loss": 2.1438, + "step": 10644 + }, + { + "epoch": 0.35, + "grad_norm": 0.7132500410079956, + "learning_rate": 1.4728227066602146e-05, + "loss": 2.1138, + "step": 10645 + }, + { + "epoch": 0.35, + "grad_norm": 0.7084729075431824, + "learning_rate": 1.4727290463723076e-05, + "loss": 2.1024, + "step": 10646 + }, + { + "epoch": 0.35, + "grad_norm": 0.7285545468330383, + "learning_rate": 1.472635380743785e-05, + "loss": 2.1535, + "step": 10647 + }, + { + "epoch": 0.35, + "grad_norm": 0.7258221507072449, + "learning_rate": 1.4725417097757046e-05, + "loss": 2.0476, + "step": 10648 + }, + { + "epoch": 0.35, + "grad_norm": 0.7319920063018799, + "learning_rate": 1.4724480334691252e-05, + "loss": 2.1264, + "step": 10649 + }, + { + "epoch": 0.35, + "grad_norm": 0.7296038866043091, + "learning_rate": 1.4723543518251044e-05, + "loss": 2.1207, + "step": 10650 + }, + { + "epoch": 0.35, + "grad_norm": 0.7396849989891052, + "learning_rate": 1.4722606648447013e-05, + "loss": 2.1328, + "step": 10651 + }, + { + "epoch": 0.35, + "grad_norm": 0.7211018800735474, + "learning_rate": 1.4721669725289742e-05, + "loss": 2.0991, + "step": 10652 + }, + { + "epoch": 0.35, + "grad_norm": 0.7875257730484009, + "learning_rate": 1.4720732748789808e-05, + "loss": 2.1144, + "step": 10653 + }, + { + "epoch": 0.35, + "grad_norm": 0.7283000349998474, + "learning_rate": 1.4719795718957807e-05, + "loss": 2.0817, + "step": 10654 + }, + { + "epoch": 0.35, + "grad_norm": 0.711953341960907, + "learning_rate": 1.4718858635804317e-05, + "loss": 2.1122, + "step": 10655 + }, + { + "epoch": 0.35, + "grad_norm": 0.7097283601760864, + "learning_rate": 1.4717921499339928e-05, + "loss": 2.1411, + "step": 10656 + }, + { + "epoch": 0.35, + "grad_norm": 0.7133876085281372, + "learning_rate": 1.4716984309575229e-05, + "loss": 2.0972, + "step": 10657 + }, + { + "epoch": 0.35, + "grad_norm": 0.7025846838951111, + "learning_rate": 1.4716047066520802e-05, + "loss": 2.1515, + "step": 10658 + }, + { + "epoch": 0.35, + "grad_norm": 0.694673478603363, + "learning_rate": 1.4715109770187242e-05, + "loss": 2.1083, + "step": 10659 + }, + { + "epoch": 0.35, + "grad_norm": 0.7403780221939087, + "learning_rate": 1.4714172420585134e-05, + "loss": 2.1976, + "step": 10660 + }, + { + "epoch": 0.35, + "grad_norm": 0.7139356136322021, + "learning_rate": 1.4713235017725066e-05, + "loss": 2.1342, + "step": 10661 + }, + { + "epoch": 0.35, + "grad_norm": 0.740252673625946, + "learning_rate": 1.4712297561617637e-05, + "loss": 2.1538, + "step": 10662 + }, + { + "epoch": 0.35, + "grad_norm": 0.7596855759620667, + "learning_rate": 1.471136005227343e-05, + "loss": 2.1501, + "step": 10663 + }, + { + "epoch": 0.35, + "grad_norm": 0.7212527990341187, + "learning_rate": 1.4710422489703036e-05, + "loss": 2.1039, + "step": 10664 + }, + { + "epoch": 0.35, + "grad_norm": 0.6830949783325195, + "learning_rate": 1.470948487391705e-05, + "loss": 2.1044, + "step": 10665 + }, + { + "epoch": 0.35, + "grad_norm": 0.6985042691230774, + "learning_rate": 1.4708547204926064e-05, + "loss": 2.1154, + "step": 10666 + }, + { + "epoch": 0.35, + "grad_norm": 0.7282301783561707, + "learning_rate": 1.4707609482740673e-05, + "loss": 2.0644, + "step": 10667 + }, + { + "epoch": 0.35, + "grad_norm": 0.7447898387908936, + "learning_rate": 1.4706671707371466e-05, + "loss": 2.0883, + "step": 10668 + }, + { + "epoch": 0.35, + "grad_norm": 0.7376669049263, + "learning_rate": 1.4705733878829042e-05, + "loss": 2.1139, + "step": 10669 + }, + { + "epoch": 0.35, + "grad_norm": 0.7449215054512024, + "learning_rate": 1.4704795997123995e-05, + "loss": 2.0451, + "step": 10670 + }, + { + "epoch": 0.36, + "grad_norm": 0.7168874740600586, + "learning_rate": 1.470385806226692e-05, + "loss": 2.0914, + "step": 10671 + }, + { + "epoch": 0.36, + "grad_norm": 0.7354474663734436, + "learning_rate": 1.4702920074268412e-05, + "loss": 2.1448, + "step": 10672 + }, + { + "epoch": 0.36, + "grad_norm": 0.7651484608650208, + "learning_rate": 1.4701982033139071e-05, + "loss": 2.1086, + "step": 10673 + }, + { + "epoch": 0.36, + "grad_norm": 0.7756704688072205, + "learning_rate": 1.4701043938889492e-05, + "loss": 2.1092, + "step": 10674 + }, + { + "epoch": 0.36, + "grad_norm": 0.7392194271087646, + "learning_rate": 1.470010579153027e-05, + "loss": 2.1041, + "step": 10675 + }, + { + "epoch": 0.36, + "grad_norm": 0.7607390284538269, + "learning_rate": 1.4699167591072013e-05, + "loss": 2.1497, + "step": 10676 + }, + { + "epoch": 0.36, + "grad_norm": 0.8281517624855042, + "learning_rate": 1.4698229337525311e-05, + "loss": 2.0652, + "step": 10677 + }, + { + "epoch": 0.36, + "grad_norm": 0.7029891014099121, + "learning_rate": 1.4697291030900771e-05, + "loss": 2.2133, + "step": 10678 + }, + { + "epoch": 0.36, + "grad_norm": 0.6847342848777771, + "learning_rate": 1.4696352671208986e-05, + "loss": 2.0701, + "step": 10679 + }, + { + "epoch": 0.36, + "grad_norm": 0.714811384677887, + "learning_rate": 1.4695414258460564e-05, + "loss": 2.125, + "step": 10680 + }, + { + "epoch": 0.36, + "grad_norm": 0.7215642929077148, + "learning_rate": 1.4694475792666098e-05, + "loss": 2.0745, + "step": 10681 + }, + { + "epoch": 0.36, + "grad_norm": 0.7549083232879639, + "learning_rate": 1.4693537273836201e-05, + "loss": 2.0591, + "step": 10682 + }, + { + "epoch": 0.36, + "grad_norm": 0.7193545699119568, + "learning_rate": 1.4692598701981469e-05, + "loss": 2.1202, + "step": 10683 + }, + { + "epoch": 0.36, + "grad_norm": 0.7159538865089417, + "learning_rate": 1.4691660077112507e-05, + "loss": 2.1022, + "step": 10684 + }, + { + "epoch": 0.36, + "grad_norm": 0.7144292593002319, + "learning_rate": 1.469072139923992e-05, + "loss": 2.1217, + "step": 10685 + }, + { + "epoch": 0.36, + "grad_norm": 0.7088968753814697, + "learning_rate": 1.4689782668374309e-05, + "loss": 2.1863, + "step": 10686 + }, + { + "epoch": 0.36, + "grad_norm": 0.7178087830543518, + "learning_rate": 1.4688843884526285e-05, + "loss": 2.0307, + "step": 10687 + }, + { + "epoch": 0.36, + "grad_norm": 0.7296062707901001, + "learning_rate": 1.4687905047706448e-05, + "loss": 2.0879, + "step": 10688 + }, + { + "epoch": 0.36, + "grad_norm": 0.699737548828125, + "learning_rate": 1.4686966157925409e-05, + "loss": 2.0873, + "step": 10689 + }, + { + "epoch": 0.36, + "grad_norm": 0.7272664308547974, + "learning_rate": 1.4686027215193772e-05, + "loss": 2.075, + "step": 10690 + }, + { + "epoch": 0.36, + "grad_norm": 0.7221621870994568, + "learning_rate": 1.4685088219522147e-05, + "loss": 2.0958, + "step": 10691 + }, + { + "epoch": 0.36, + "grad_norm": 0.7861818671226501, + "learning_rate": 1.468414917092114e-05, + "loss": 2.1617, + "step": 10692 + }, + { + "epoch": 0.36, + "grad_norm": 0.7191817164421082, + "learning_rate": 1.4683210069401361e-05, + "loss": 2.0327, + "step": 10693 + }, + { + "epoch": 0.36, + "grad_norm": 0.711847722530365, + "learning_rate": 1.468227091497342e-05, + "loss": 2.0676, + "step": 10694 + }, + { + "epoch": 0.36, + "grad_norm": 0.7502296566963196, + "learning_rate": 1.4681331707647925e-05, + "loss": 2.1205, + "step": 10695 + }, + { + "epoch": 0.36, + "grad_norm": 0.7163909673690796, + "learning_rate": 1.4680392447435492e-05, + "loss": 2.0897, + "step": 10696 + }, + { + "epoch": 0.36, + "grad_norm": 0.7378166913986206, + "learning_rate": 1.4679453134346722e-05, + "loss": 2.1559, + "step": 10697 + }, + { + "epoch": 0.36, + "grad_norm": 0.760080873966217, + "learning_rate": 1.4678513768392237e-05, + "loss": 2.0706, + "step": 10698 + }, + { + "epoch": 0.36, + "grad_norm": 0.7390359044075012, + "learning_rate": 1.4677574349582644e-05, + "loss": 2.1519, + "step": 10699 + }, + { + "epoch": 0.36, + "grad_norm": 0.7275302410125732, + "learning_rate": 1.4676634877928554e-05, + "loss": 2.1615, + "step": 10700 + }, + { + "epoch": 0.36, + "grad_norm": 0.715356707572937, + "learning_rate": 1.4675695353440591e-05, + "loss": 2.0973, + "step": 10701 + }, + { + "epoch": 0.36, + "grad_norm": 0.7154173254966736, + "learning_rate": 1.4674755776129357e-05, + "loss": 2.0608, + "step": 10702 + }, + { + "epoch": 0.36, + "grad_norm": 0.7507113218307495, + "learning_rate": 1.4673816146005473e-05, + "loss": 2.0811, + "step": 10703 + }, + { + "epoch": 0.36, + "grad_norm": 0.7092428207397461, + "learning_rate": 1.4672876463079555e-05, + "loss": 2.11, + "step": 10704 + }, + { + "epoch": 0.36, + "grad_norm": 0.7321333289146423, + "learning_rate": 1.4671936727362214e-05, + "loss": 2.1619, + "step": 10705 + }, + { + "epoch": 0.36, + "grad_norm": 0.7313870191574097, + "learning_rate": 1.4670996938864073e-05, + "loss": 2.1284, + "step": 10706 + }, + { + "epoch": 0.36, + "grad_norm": 0.7189551591873169, + "learning_rate": 1.4670057097595747e-05, + "loss": 2.1673, + "step": 10707 + }, + { + "epoch": 0.36, + "grad_norm": 0.7239701747894287, + "learning_rate": 1.4669117203567848e-05, + "loss": 2.1397, + "step": 10708 + }, + { + "epoch": 0.36, + "grad_norm": 0.7288504838943481, + "learning_rate": 1.4668177256791003e-05, + "loss": 2.1879, + "step": 10709 + }, + { + "epoch": 0.36, + "grad_norm": 0.7403784394264221, + "learning_rate": 1.4667237257275826e-05, + "loss": 2.1761, + "step": 10710 + }, + { + "epoch": 0.36, + "grad_norm": 0.6891387104988098, + "learning_rate": 1.4666297205032935e-05, + "loss": 2.0966, + "step": 10711 + }, + { + "epoch": 0.36, + "grad_norm": 0.7422135472297668, + "learning_rate": 1.466535710007296e-05, + "loss": 2.1045, + "step": 10712 + }, + { + "epoch": 0.36, + "grad_norm": 0.7195985913276672, + "learning_rate": 1.466441694240651e-05, + "loss": 2.0519, + "step": 10713 + }, + { + "epoch": 0.36, + "grad_norm": 0.7184195518493652, + "learning_rate": 1.466347673204421e-05, + "loss": 2.1365, + "step": 10714 + }, + { + "epoch": 0.36, + "grad_norm": 0.7611271142959595, + "learning_rate": 1.4662536468996684e-05, + "loss": 2.1449, + "step": 10715 + }, + { + "epoch": 0.36, + "grad_norm": 0.7371851205825806, + "learning_rate": 1.4661596153274555e-05, + "loss": 2.1129, + "step": 10716 + }, + { + "epoch": 0.36, + "grad_norm": 0.7106055021286011, + "learning_rate": 1.4660655784888442e-05, + "loss": 2.1347, + "step": 10717 + }, + { + "epoch": 0.36, + "grad_norm": 0.7725675702095032, + "learning_rate": 1.4659715363848976e-05, + "loss": 2.1875, + "step": 10718 + }, + { + "epoch": 0.36, + "grad_norm": 0.6984225511550903, + "learning_rate": 1.4658774890166768e-05, + "loss": 2.169, + "step": 10719 + }, + { + "epoch": 0.36, + "grad_norm": 0.7144049406051636, + "learning_rate": 1.465783436385246e-05, + "loss": 2.0814, + "step": 10720 + }, + { + "epoch": 0.36, + "grad_norm": 0.7294212579727173, + "learning_rate": 1.4656893784916668e-05, + "loss": 2.0594, + "step": 10721 + }, + { + "epoch": 0.36, + "grad_norm": 0.7128740549087524, + "learning_rate": 1.4655953153370014e-05, + "loss": 2.1168, + "step": 10722 + }, + { + "epoch": 0.36, + "grad_norm": 0.7078655362129211, + "learning_rate": 1.4655012469223136e-05, + "loss": 2.0562, + "step": 10723 + }, + { + "epoch": 0.36, + "grad_norm": 0.7319775819778442, + "learning_rate": 1.4654071732486652e-05, + "loss": 2.1196, + "step": 10724 + }, + { + "epoch": 0.36, + "grad_norm": 0.7235652804374695, + "learning_rate": 1.4653130943171194e-05, + "loss": 2.1462, + "step": 10725 + }, + { + "epoch": 0.36, + "grad_norm": 0.7009215950965881, + "learning_rate": 1.465219010128739e-05, + "loss": 2.1263, + "step": 10726 + }, + { + "epoch": 0.36, + "grad_norm": 0.7108410596847534, + "learning_rate": 1.4651249206845868e-05, + "loss": 2.0909, + "step": 10727 + }, + { + "epoch": 0.36, + "grad_norm": 0.7604007124900818, + "learning_rate": 1.4650308259857258e-05, + "loss": 2.1175, + "step": 10728 + }, + { + "epoch": 0.36, + "grad_norm": 0.746516227722168, + "learning_rate": 1.4649367260332192e-05, + "loss": 2.111, + "step": 10729 + }, + { + "epoch": 0.36, + "grad_norm": 0.7076823711395264, + "learning_rate": 1.4648426208281297e-05, + "loss": 2.0996, + "step": 10730 + }, + { + "epoch": 0.36, + "grad_norm": 0.7431108951568604, + "learning_rate": 1.4647485103715207e-05, + "loss": 2.0979, + "step": 10731 + }, + { + "epoch": 0.36, + "grad_norm": 0.7201970219612122, + "learning_rate": 1.4646543946644558e-05, + "loss": 2.0842, + "step": 10732 + }, + { + "epoch": 0.36, + "grad_norm": 0.7544375658035278, + "learning_rate": 1.4645602737079974e-05, + "loss": 2.1014, + "step": 10733 + }, + { + "epoch": 0.36, + "grad_norm": 0.7263185381889343, + "learning_rate": 1.4644661475032096e-05, + "loss": 2.1331, + "step": 10734 + }, + { + "epoch": 0.36, + "grad_norm": 0.730281412601471, + "learning_rate": 1.4643720160511554e-05, + "loss": 2.1958, + "step": 10735 + }, + { + "epoch": 0.36, + "grad_norm": 0.7261035442352295, + "learning_rate": 1.4642778793528983e-05, + "loss": 2.2242, + "step": 10736 + }, + { + "epoch": 0.36, + "grad_norm": 0.7320296168327332, + "learning_rate": 1.4641837374095019e-05, + "loss": 2.0946, + "step": 10737 + }, + { + "epoch": 0.36, + "grad_norm": 0.7540250420570374, + "learning_rate": 1.4640895902220293e-05, + "loss": 2.1462, + "step": 10738 + }, + { + "epoch": 0.36, + "grad_norm": 0.7177721261978149, + "learning_rate": 1.4639954377915448e-05, + "loss": 2.1154, + "step": 10739 + }, + { + "epoch": 0.36, + "grad_norm": 0.703453004360199, + "learning_rate": 1.4639012801191119e-05, + "loss": 2.1231, + "step": 10740 + }, + { + "epoch": 0.36, + "grad_norm": 0.7644084692001343, + "learning_rate": 1.4638071172057937e-05, + "loss": 2.1142, + "step": 10741 + }, + { + "epoch": 0.36, + "grad_norm": 0.7115994691848755, + "learning_rate": 1.463712949052655e-05, + "loss": 2.0805, + "step": 10742 + }, + { + "epoch": 0.36, + "grad_norm": 0.7387803792953491, + "learning_rate": 1.4636187756607591e-05, + "loss": 2.1379, + "step": 10743 + }, + { + "epoch": 0.36, + "grad_norm": 0.7143697142601013, + "learning_rate": 1.4635245970311696e-05, + "loss": 2.0551, + "step": 10744 + }, + { + "epoch": 0.36, + "grad_norm": 0.7333706021308899, + "learning_rate": 1.4634304131649516e-05, + "loss": 2.1594, + "step": 10745 + }, + { + "epoch": 0.36, + "grad_norm": 0.7061779499053955, + "learning_rate": 1.4633362240631675e-05, + "loss": 2.0513, + "step": 10746 + }, + { + "epoch": 0.36, + "grad_norm": 0.7650803327560425, + "learning_rate": 1.463242029726883e-05, + "loss": 2.1049, + "step": 10747 + }, + { + "epoch": 0.36, + "grad_norm": 0.7329779267311096, + "learning_rate": 1.463147830157161e-05, + "loss": 2.156, + "step": 10748 + }, + { + "epoch": 0.36, + "grad_norm": 0.7555022835731506, + "learning_rate": 1.4630536253550666e-05, + "loss": 2.1121, + "step": 10749 + }, + { + "epoch": 0.36, + "grad_norm": 0.7738577723503113, + "learning_rate": 1.4629594153216635e-05, + "loss": 2.1724, + "step": 10750 + }, + { + "epoch": 0.36, + "grad_norm": 0.7368938326835632, + "learning_rate": 1.4628652000580163e-05, + "loss": 2.1247, + "step": 10751 + }, + { + "epoch": 0.36, + "grad_norm": 0.7199434041976929, + "learning_rate": 1.4627709795651895e-05, + "loss": 2.0774, + "step": 10752 + }, + { + "epoch": 0.36, + "grad_norm": 0.7110752463340759, + "learning_rate": 1.4626767538442473e-05, + "loss": 2.1559, + "step": 10753 + }, + { + "epoch": 0.36, + "grad_norm": 0.7562914490699768, + "learning_rate": 1.4625825228962543e-05, + "loss": 2.1231, + "step": 10754 + }, + { + "epoch": 0.36, + "grad_norm": 0.7123403549194336, + "learning_rate": 1.4624882867222749e-05, + "loss": 2.0972, + "step": 10755 + }, + { + "epoch": 0.36, + "grad_norm": 0.7152132391929626, + "learning_rate": 1.4623940453233742e-05, + "loss": 2.084, + "step": 10756 + }, + { + "epoch": 0.36, + "grad_norm": 0.7218719124794006, + "learning_rate": 1.4622997987006162e-05, + "loss": 2.2151, + "step": 10757 + }, + { + "epoch": 0.36, + "grad_norm": 0.7454134821891785, + "learning_rate": 1.4622055468550663e-05, + "loss": 2.1839, + "step": 10758 + }, + { + "epoch": 0.36, + "grad_norm": 0.751155436038971, + "learning_rate": 1.462111289787789e-05, + "loss": 2.0662, + "step": 10759 + }, + { + "epoch": 0.36, + "grad_norm": 0.7157754898071289, + "learning_rate": 1.4620170274998488e-05, + "loss": 2.1229, + "step": 10760 + }, + { + "epoch": 0.36, + "grad_norm": 0.7539533972740173, + "learning_rate": 1.4619227599923114e-05, + "loss": 2.0677, + "step": 10761 + }, + { + "epoch": 0.36, + "grad_norm": 0.7011374235153198, + "learning_rate": 1.4618284872662412e-05, + "loss": 2.1359, + "step": 10762 + }, + { + "epoch": 0.36, + "grad_norm": 0.7207832336425781, + "learning_rate": 1.4617342093227034e-05, + "loss": 2.1337, + "step": 10763 + }, + { + "epoch": 0.36, + "grad_norm": 0.7410914897918701, + "learning_rate": 1.461639926162763e-05, + "loss": 2.1402, + "step": 10764 + }, + { + "epoch": 0.36, + "grad_norm": 0.7477924227714539, + "learning_rate": 1.4615456377874854e-05, + "loss": 2.1298, + "step": 10765 + }, + { + "epoch": 0.36, + "grad_norm": 0.7417619824409485, + "learning_rate": 1.4614513441979357e-05, + "loss": 2.0995, + "step": 10766 + }, + { + "epoch": 0.36, + "grad_norm": 0.7469790577888489, + "learning_rate": 1.4613570453951794e-05, + "loss": 2.0885, + "step": 10767 + }, + { + "epoch": 0.36, + "grad_norm": 0.6892023086547852, + "learning_rate": 1.4612627413802808e-05, + "loss": 2.1173, + "step": 10768 + }, + { + "epoch": 0.36, + "grad_norm": 0.7399068474769592, + "learning_rate": 1.4611684321543069e-05, + "loss": 2.1545, + "step": 10769 + }, + { + "epoch": 0.36, + "grad_norm": 0.7480810880661011, + "learning_rate": 1.461074117718322e-05, + "loss": 2.0427, + "step": 10770 + }, + { + "epoch": 0.36, + "grad_norm": 0.7500886917114258, + "learning_rate": 1.460979798073392e-05, + "loss": 2.1223, + "step": 10771 + }, + { + "epoch": 0.36, + "grad_norm": 0.6938839554786682, + "learning_rate": 1.4608854732205824e-05, + "loss": 2.0547, + "step": 10772 + }, + { + "epoch": 0.36, + "grad_norm": 0.7379195690155029, + "learning_rate": 1.4607911431609587e-05, + "loss": 2.139, + "step": 10773 + }, + { + "epoch": 0.36, + "grad_norm": 0.7262565493583679, + "learning_rate": 1.4606968078955868e-05, + "loss": 2.0891, + "step": 10774 + }, + { + "epoch": 0.36, + "grad_norm": 0.7446302771568298, + "learning_rate": 1.4606024674255324e-05, + "loss": 2.1435, + "step": 10775 + }, + { + "epoch": 0.36, + "grad_norm": 0.749920666217804, + "learning_rate": 1.4605081217518611e-05, + "loss": 2.1642, + "step": 10776 + }, + { + "epoch": 0.36, + "grad_norm": 0.7277040481567383, + "learning_rate": 1.460413770875639e-05, + "loss": 2.13, + "step": 10777 + }, + { + "epoch": 0.36, + "grad_norm": 0.6896201968193054, + "learning_rate": 1.4603194147979322e-05, + "loss": 2.1219, + "step": 10778 + }, + { + "epoch": 0.36, + "grad_norm": 0.706484317779541, + "learning_rate": 1.460225053519806e-05, + "loss": 2.1169, + "step": 10779 + }, + { + "epoch": 0.36, + "grad_norm": 0.743334174156189, + "learning_rate": 1.460130687042327e-05, + "loss": 2.1258, + "step": 10780 + }, + { + "epoch": 0.36, + "grad_norm": 0.7265442609786987, + "learning_rate": 1.4600363153665613e-05, + "loss": 2.1119, + "step": 10781 + }, + { + "epoch": 0.36, + "grad_norm": 0.7329130172729492, + "learning_rate": 1.4599419384935749e-05, + "loss": 2.0479, + "step": 10782 + }, + { + "epoch": 0.36, + "grad_norm": 0.7100732326507568, + "learning_rate": 1.4598475564244341e-05, + "loss": 2.1189, + "step": 10783 + }, + { + "epoch": 0.36, + "grad_norm": 0.7102295160293579, + "learning_rate": 1.459753169160205e-05, + "loss": 2.0738, + "step": 10784 + }, + { + "epoch": 0.36, + "grad_norm": 0.7193012833595276, + "learning_rate": 1.4596587767019538e-05, + "loss": 2.0452, + "step": 10785 + }, + { + "epoch": 0.36, + "grad_norm": 0.7266778349876404, + "learning_rate": 1.4595643790507475e-05, + "loss": 2.1064, + "step": 10786 + }, + { + "epoch": 0.36, + "grad_norm": 0.7419440150260925, + "learning_rate": 1.4594699762076518e-05, + "loss": 2.0794, + "step": 10787 + }, + { + "epoch": 0.36, + "grad_norm": 0.7537574172019958, + "learning_rate": 1.4593755681737338e-05, + "loss": 2.0985, + "step": 10788 + }, + { + "epoch": 0.36, + "grad_norm": 0.724121630191803, + "learning_rate": 1.45928115495006e-05, + "loss": 2.1709, + "step": 10789 + }, + { + "epoch": 0.36, + "grad_norm": 0.7019773721694946, + "learning_rate": 1.4591867365376965e-05, + "loss": 2.164, + "step": 10790 + }, + { + "epoch": 0.36, + "grad_norm": 0.7442371249198914, + "learning_rate": 1.459092312937711e-05, + "loss": 2.0974, + "step": 10791 + }, + { + "epoch": 0.36, + "grad_norm": 0.7568175792694092, + "learning_rate": 1.4589978841511692e-05, + "loss": 2.1592, + "step": 10792 + }, + { + "epoch": 0.36, + "grad_norm": 0.7406652569770813, + "learning_rate": 1.4589034501791381e-05, + "loss": 2.1429, + "step": 10793 + }, + { + "epoch": 0.36, + "grad_norm": 0.7298339605331421, + "learning_rate": 1.4588090110226852e-05, + "loss": 2.1166, + "step": 10794 + }, + { + "epoch": 0.36, + "grad_norm": 0.6889939308166504, + "learning_rate": 1.4587145666828766e-05, + "loss": 2.1064, + "step": 10795 + }, + { + "epoch": 0.36, + "grad_norm": 0.7417524456977844, + "learning_rate": 1.4586201171607796e-05, + "loss": 2.1753, + "step": 10796 + }, + { + "epoch": 0.36, + "grad_norm": 0.7101824283599854, + "learning_rate": 1.4585256624574618e-05, + "loss": 2.0869, + "step": 10797 + }, + { + "epoch": 0.36, + "grad_norm": 0.7211170792579651, + "learning_rate": 1.4584312025739893e-05, + "loss": 2.1187, + "step": 10798 + }, + { + "epoch": 0.36, + "grad_norm": 0.7860206961631775, + "learning_rate": 1.4583367375114299e-05, + "loss": 2.1442, + "step": 10799 + }, + { + "epoch": 0.36, + "grad_norm": 0.7377907037734985, + "learning_rate": 1.458242267270851e-05, + "loss": 2.2027, + "step": 10800 + }, + { + "epoch": 0.36, + "grad_norm": 0.6976315379142761, + "learning_rate": 1.4581477918533188e-05, + "loss": 2.08, + "step": 10801 + }, + { + "epoch": 0.36, + "grad_norm": 0.7417261600494385, + "learning_rate": 1.458053311259902e-05, + "loss": 2.0475, + "step": 10802 + }, + { + "epoch": 0.36, + "grad_norm": 0.7454881072044373, + "learning_rate": 1.457958825491667e-05, + "loss": 2.0978, + "step": 10803 + }, + { + "epoch": 0.36, + "grad_norm": 0.7373024821281433, + "learning_rate": 1.4578643345496815e-05, + "loss": 2.1205, + "step": 10804 + }, + { + "epoch": 0.36, + "grad_norm": 0.749143123626709, + "learning_rate": 1.4577698384350135e-05, + "loss": 2.1054, + "step": 10805 + }, + { + "epoch": 0.36, + "grad_norm": 0.7391125559806824, + "learning_rate": 1.4576753371487297e-05, + "loss": 2.1247, + "step": 10806 + }, + { + "epoch": 0.36, + "grad_norm": 0.8038753867149353, + "learning_rate": 1.4575808306918984e-05, + "loss": 2.063, + "step": 10807 + }, + { + "epoch": 0.36, + "grad_norm": 0.7392237186431885, + "learning_rate": 1.457486319065587e-05, + "loss": 2.0919, + "step": 10808 + }, + { + "epoch": 0.36, + "grad_norm": 0.7688574194908142, + "learning_rate": 1.4573918022708634e-05, + "loss": 2.0183, + "step": 10809 + }, + { + "epoch": 0.36, + "grad_norm": 0.7133106589317322, + "learning_rate": 1.457297280308795e-05, + "loss": 2.0748, + "step": 10810 + }, + { + "epoch": 0.36, + "grad_norm": 0.728495180606842, + "learning_rate": 1.4572027531804502e-05, + "loss": 2.1385, + "step": 10811 + }, + { + "epoch": 0.36, + "grad_norm": 0.707040011882782, + "learning_rate": 1.4571082208868963e-05, + "loss": 2.1198, + "step": 10812 + }, + { + "epoch": 0.36, + "grad_norm": 0.7182496190071106, + "learning_rate": 1.4570136834292021e-05, + "loss": 2.1119, + "step": 10813 + }, + { + "epoch": 0.36, + "grad_norm": 0.7041711211204529, + "learning_rate": 1.456919140808435e-05, + "loss": 2.081, + "step": 10814 + }, + { + "epoch": 0.36, + "grad_norm": 0.7287329435348511, + "learning_rate": 1.4568245930256628e-05, + "loss": 2.1193, + "step": 10815 + }, + { + "epoch": 0.36, + "grad_norm": 0.7133411765098572, + "learning_rate": 1.4567300400819547e-05, + "loss": 2.1259, + "step": 10816 + }, + { + "epoch": 0.36, + "grad_norm": 0.727554976940155, + "learning_rate": 1.4566354819783776e-05, + "loss": 2.0984, + "step": 10817 + }, + { + "epoch": 0.36, + "grad_norm": 0.688666582107544, + "learning_rate": 1.4565409187160011e-05, + "loss": 2.0431, + "step": 10818 + }, + { + "epoch": 0.36, + "grad_norm": 0.69767165184021, + "learning_rate": 1.4564463502958926e-05, + "loss": 2.0929, + "step": 10819 + }, + { + "epoch": 0.36, + "grad_norm": 0.6941933631896973, + "learning_rate": 1.4563517767191203e-05, + "loss": 2.0815, + "step": 10820 + }, + { + "epoch": 0.36, + "grad_norm": 0.7267068028450012, + "learning_rate": 1.4562571979867536e-05, + "loss": 2.0718, + "step": 10821 + }, + { + "epoch": 0.36, + "grad_norm": 0.7111235857009888, + "learning_rate": 1.4561626140998606e-05, + "loss": 2.1077, + "step": 10822 + }, + { + "epoch": 0.36, + "grad_norm": 0.779595136642456, + "learning_rate": 1.4560680250595092e-05, + "loss": 2.1109, + "step": 10823 + }, + { + "epoch": 0.36, + "grad_norm": 0.7360061407089233, + "learning_rate": 1.455973430866769e-05, + "loss": 2.1407, + "step": 10824 + }, + { + "epoch": 0.36, + "grad_norm": 0.745019257068634, + "learning_rate": 1.455878831522708e-05, + "loss": 2.1865, + "step": 10825 + }, + { + "epoch": 0.36, + "grad_norm": 0.7326734066009521, + "learning_rate": 1.455784227028395e-05, + "loss": 2.135, + "step": 10826 + }, + { + "epoch": 0.36, + "grad_norm": 0.725932776927948, + "learning_rate": 1.4556896173848991e-05, + "loss": 2.0618, + "step": 10827 + }, + { + "epoch": 0.36, + "grad_norm": 0.7488042712211609, + "learning_rate": 1.455595002593289e-05, + "loss": 2.1003, + "step": 10828 + }, + { + "epoch": 0.36, + "grad_norm": 0.7197643518447876, + "learning_rate": 1.4555003826546334e-05, + "loss": 2.0368, + "step": 10829 + }, + { + "epoch": 0.36, + "grad_norm": 0.7350213527679443, + "learning_rate": 1.4554057575700016e-05, + "loss": 2.1598, + "step": 10830 + }, + { + "epoch": 0.36, + "grad_norm": 0.7123099565505981, + "learning_rate": 1.4553111273404624e-05, + "loss": 2.1037, + "step": 10831 + }, + { + "epoch": 0.36, + "grad_norm": 0.7257305383682251, + "learning_rate": 1.4552164919670848e-05, + "loss": 2.1229, + "step": 10832 + }, + { + "epoch": 0.36, + "grad_norm": 0.7463681101799011, + "learning_rate": 1.4551218514509382e-05, + "loss": 2.1104, + "step": 10833 + }, + { + "epoch": 0.36, + "grad_norm": 0.7100160121917725, + "learning_rate": 1.4550272057930915e-05, + "loss": 2.1429, + "step": 10834 + }, + { + "epoch": 0.36, + "grad_norm": 0.7083027958869934, + "learning_rate": 1.4549325549946143e-05, + "loss": 2.0732, + "step": 10835 + }, + { + "epoch": 0.36, + "grad_norm": 0.7189925312995911, + "learning_rate": 1.4548378990565754e-05, + "loss": 2.0574, + "step": 10836 + }, + { + "epoch": 0.36, + "grad_norm": 0.7410835027694702, + "learning_rate": 1.4547432379800446e-05, + "loss": 2.1301, + "step": 10837 + }, + { + "epoch": 0.36, + "grad_norm": 0.8349276781082153, + "learning_rate": 1.4546485717660914e-05, + "loss": 2.1316, + "step": 10838 + }, + { + "epoch": 0.36, + "grad_norm": 0.7540861368179321, + "learning_rate": 1.454553900415785e-05, + "loss": 2.1142, + "step": 10839 + }, + { + "epoch": 0.36, + "grad_norm": 0.71322101354599, + "learning_rate": 1.4544592239301949e-05, + "loss": 2.0422, + "step": 10840 + }, + { + "epoch": 0.36, + "grad_norm": 0.7297841310501099, + "learning_rate": 1.454364542310391e-05, + "loss": 2.0814, + "step": 10841 + }, + { + "epoch": 0.36, + "grad_norm": 0.7599559426307678, + "learning_rate": 1.4542698555574428e-05, + "loss": 2.0643, + "step": 10842 + }, + { + "epoch": 0.36, + "grad_norm": 0.7415245175361633, + "learning_rate": 1.4541751636724197e-05, + "loss": 2.1242, + "step": 10843 + }, + { + "epoch": 0.36, + "grad_norm": 0.7150170803070068, + "learning_rate": 1.4540804666563922e-05, + "loss": 2.1166, + "step": 10844 + }, + { + "epoch": 0.36, + "grad_norm": 0.6866410374641418, + "learning_rate": 1.4539857645104294e-05, + "loss": 2.0342, + "step": 10845 + }, + { + "epoch": 0.36, + "grad_norm": 0.7315654754638672, + "learning_rate": 1.4538910572356015e-05, + "loss": 2.1193, + "step": 10846 + }, + { + "epoch": 0.36, + "grad_norm": 0.7464458346366882, + "learning_rate": 1.4537963448329785e-05, + "loss": 2.147, + "step": 10847 + }, + { + "epoch": 0.36, + "grad_norm": 0.7058669328689575, + "learning_rate": 1.4537016273036304e-05, + "loss": 2.1424, + "step": 10848 + }, + { + "epoch": 0.36, + "grad_norm": 0.7047792077064514, + "learning_rate": 1.4536069046486274e-05, + "loss": 2.061, + "step": 10849 + }, + { + "epoch": 0.36, + "grad_norm": 0.7293535470962524, + "learning_rate": 1.453512176869039e-05, + "loss": 2.1038, + "step": 10850 + }, + { + "epoch": 0.36, + "grad_norm": 0.7221577167510986, + "learning_rate": 1.453417443965936e-05, + "loss": 2.0948, + "step": 10851 + }, + { + "epoch": 0.36, + "grad_norm": 0.708378255367279, + "learning_rate": 1.4533227059403885e-05, + "loss": 2.0588, + "step": 10852 + }, + { + "epoch": 0.36, + "grad_norm": 0.7083930969238281, + "learning_rate": 1.4532279627934667e-05, + "loss": 2.1745, + "step": 10853 + }, + { + "epoch": 0.36, + "grad_norm": 0.7348835468292236, + "learning_rate": 1.4531332145262412e-05, + "loss": 2.1056, + "step": 10854 + }, + { + "epoch": 0.36, + "grad_norm": 0.7022838592529297, + "learning_rate": 1.453038461139782e-05, + "loss": 2.0842, + "step": 10855 + }, + { + "epoch": 0.36, + "grad_norm": 0.6926270723342896, + "learning_rate": 1.4529437026351596e-05, + "loss": 2.1476, + "step": 10856 + }, + { + "epoch": 0.36, + "grad_norm": 0.7325962781906128, + "learning_rate": 1.4528489390134448e-05, + "loss": 2.1105, + "step": 10857 + }, + { + "epoch": 0.36, + "grad_norm": 0.7328006029129028, + "learning_rate": 1.4527541702757082e-05, + "loss": 2.1299, + "step": 10858 + }, + { + "epoch": 0.36, + "grad_norm": 0.6998463273048401, + "learning_rate": 1.4526593964230203e-05, + "loss": 2.0754, + "step": 10859 + }, + { + "epoch": 0.36, + "grad_norm": 0.7028898596763611, + "learning_rate": 1.4525646174564518e-05, + "loss": 2.1532, + "step": 10860 + }, + { + "epoch": 0.36, + "grad_norm": 0.7503082752227783, + "learning_rate": 1.4524698333770735e-05, + "loss": 2.1873, + "step": 10861 + }, + { + "epoch": 0.36, + "grad_norm": 0.7635589838027954, + "learning_rate": 1.4523750441859563e-05, + "loss": 2.1596, + "step": 10862 + }, + { + "epoch": 0.36, + "grad_norm": 0.7186018228530884, + "learning_rate": 1.4522802498841709e-05, + "loss": 2.1475, + "step": 10863 + }, + { + "epoch": 0.36, + "grad_norm": 0.7263177633285522, + "learning_rate": 1.4521854504727882e-05, + "loss": 2.115, + "step": 10864 + }, + { + "epoch": 0.36, + "grad_norm": 0.7370553016662598, + "learning_rate": 1.4520906459528792e-05, + "loss": 2.1552, + "step": 10865 + }, + { + "epoch": 0.36, + "grad_norm": 0.7102876901626587, + "learning_rate": 1.4519958363255151e-05, + "loss": 2.1163, + "step": 10866 + }, + { + "epoch": 0.36, + "grad_norm": 0.7208355665206909, + "learning_rate": 1.451901021591767e-05, + "loss": 2.0658, + "step": 10867 + }, + { + "epoch": 0.36, + "grad_norm": 0.7425570487976074, + "learning_rate": 1.4518062017527062e-05, + "loss": 2.1783, + "step": 10868 + }, + { + "epoch": 0.36, + "grad_norm": 0.7198006510734558, + "learning_rate": 1.4517113768094033e-05, + "loss": 2.172, + "step": 10869 + }, + { + "epoch": 0.36, + "grad_norm": 0.7048671245574951, + "learning_rate": 1.4516165467629302e-05, + "loss": 2.1586, + "step": 10870 + }, + { + "epoch": 0.36, + "grad_norm": 0.7216575145721436, + "learning_rate": 1.4515217116143583e-05, + "loss": 2.1169, + "step": 10871 + }, + { + "epoch": 0.36, + "grad_norm": 0.7407144904136658, + "learning_rate": 1.4514268713647584e-05, + "loss": 2.1412, + "step": 10872 + }, + { + "epoch": 0.36, + "grad_norm": 0.7324246168136597, + "learning_rate": 1.4513320260152024e-05, + "loss": 2.1501, + "step": 10873 + }, + { + "epoch": 0.36, + "grad_norm": 0.7167472243309021, + "learning_rate": 1.4512371755667616e-05, + "loss": 2.1128, + "step": 10874 + }, + { + "epoch": 0.36, + "grad_norm": 0.7325664162635803, + "learning_rate": 1.4511423200205074e-05, + "loss": 2.0935, + "step": 10875 + }, + { + "epoch": 0.36, + "grad_norm": 0.718949556350708, + "learning_rate": 1.451047459377512e-05, + "loss": 2.1462, + "step": 10876 + }, + { + "epoch": 0.36, + "grad_norm": 0.7398171424865723, + "learning_rate": 1.4509525936388466e-05, + "loss": 2.1264, + "step": 10877 + }, + { + "epoch": 0.36, + "grad_norm": 0.7043357491493225, + "learning_rate": 1.450857722805583e-05, + "loss": 2.1135, + "step": 10878 + }, + { + "epoch": 0.36, + "grad_norm": 0.7486860752105713, + "learning_rate": 1.4507628468787935e-05, + "loss": 2.0986, + "step": 10879 + }, + { + "epoch": 0.36, + "grad_norm": 0.7363168597221375, + "learning_rate": 1.4506679658595491e-05, + "loss": 2.1759, + "step": 10880 + }, + { + "epoch": 0.36, + "grad_norm": 0.7130545973777771, + "learning_rate": 1.4505730797489224e-05, + "loss": 2.0961, + "step": 10881 + }, + { + "epoch": 0.36, + "grad_norm": 0.7040376663208008, + "learning_rate": 1.450478188547985e-05, + "loss": 2.1021, + "step": 10882 + }, + { + "epoch": 0.36, + "grad_norm": 0.7261214852333069, + "learning_rate": 1.4503832922578089e-05, + "loss": 2.1328, + "step": 10883 + }, + { + "epoch": 0.36, + "grad_norm": 0.7242673635482788, + "learning_rate": 1.4502883908794668e-05, + "loss": 2.1511, + "step": 10884 + }, + { + "epoch": 0.36, + "grad_norm": 0.7208804488182068, + "learning_rate": 1.4501934844140298e-05, + "loss": 2.0874, + "step": 10885 + }, + { + "epoch": 0.36, + "grad_norm": 0.7144356369972229, + "learning_rate": 1.450098572862571e-05, + "loss": 2.1028, + "step": 10886 + }, + { + "epoch": 0.36, + "grad_norm": 0.7056716084480286, + "learning_rate": 1.450003656226162e-05, + "loss": 2.1162, + "step": 10887 + }, + { + "epoch": 0.36, + "grad_norm": 0.7253352403640747, + "learning_rate": 1.4499087345058755e-05, + "loss": 2.1318, + "step": 10888 + }, + { + "epoch": 0.36, + "grad_norm": 0.7326635718345642, + "learning_rate": 1.449813807702784e-05, + "loss": 2.1288, + "step": 10889 + }, + { + "epoch": 0.36, + "grad_norm": 0.7527998685836792, + "learning_rate": 1.4497188758179594e-05, + "loss": 2.0779, + "step": 10890 + }, + { + "epoch": 0.36, + "grad_norm": 0.7145019769668579, + "learning_rate": 1.4496239388524747e-05, + "loss": 2.0462, + "step": 10891 + }, + { + "epoch": 0.36, + "grad_norm": 0.7095299363136292, + "learning_rate": 1.4495289968074023e-05, + "loss": 2.0985, + "step": 10892 + }, + { + "epoch": 0.36, + "grad_norm": 0.7044367790222168, + "learning_rate": 1.4494340496838147e-05, + "loss": 2.11, + "step": 10893 + }, + { + "epoch": 0.36, + "grad_norm": 0.7180089354515076, + "learning_rate": 1.4493390974827844e-05, + "loss": 2.065, + "step": 10894 + }, + { + "epoch": 0.36, + "grad_norm": 0.7672885060310364, + "learning_rate": 1.4492441402053847e-05, + "loss": 2.0492, + "step": 10895 + }, + { + "epoch": 0.36, + "grad_norm": 0.7067762017250061, + "learning_rate": 1.4491491778526876e-05, + "loss": 2.1265, + "step": 10896 + }, + { + "epoch": 0.36, + "grad_norm": 0.7342500686645508, + "learning_rate": 1.4490542104257665e-05, + "loss": 2.0786, + "step": 10897 + }, + { + "epoch": 0.36, + "grad_norm": 0.7158714532852173, + "learning_rate": 1.4489592379256943e-05, + "loss": 2.1077, + "step": 10898 + }, + { + "epoch": 0.36, + "grad_norm": 0.7045587301254272, + "learning_rate": 1.4488642603535434e-05, + "loss": 2.0991, + "step": 10899 + }, + { + "epoch": 0.36, + "grad_norm": 0.7314188480377197, + "learning_rate": 1.4487692777103872e-05, + "loss": 2.0476, + "step": 10900 + }, + { + "epoch": 0.36, + "grad_norm": 0.7130773663520813, + "learning_rate": 1.4486742899972987e-05, + "loss": 2.0436, + "step": 10901 + }, + { + "epoch": 0.36, + "grad_norm": 0.713201642036438, + "learning_rate": 1.4485792972153511e-05, + "loss": 2.1237, + "step": 10902 + }, + { + "epoch": 0.36, + "grad_norm": 0.6957569718360901, + "learning_rate": 1.4484842993656175e-05, + "loss": 2.0787, + "step": 10903 + }, + { + "epoch": 0.36, + "grad_norm": 0.7366880178451538, + "learning_rate": 1.4483892964491714e-05, + "loss": 2.0914, + "step": 10904 + }, + { + "epoch": 0.36, + "grad_norm": 0.7480309009552002, + "learning_rate": 1.4482942884670853e-05, + "loss": 2.1052, + "step": 10905 + }, + { + "epoch": 0.36, + "grad_norm": 0.7176265120506287, + "learning_rate": 1.4481992754204334e-05, + "loss": 2.1913, + "step": 10906 + }, + { + "epoch": 0.36, + "grad_norm": 0.7319594621658325, + "learning_rate": 1.4481042573102887e-05, + "loss": 2.1537, + "step": 10907 + }, + { + "epoch": 0.36, + "grad_norm": 0.7314065098762512, + "learning_rate": 1.4480092341377246e-05, + "loss": 2.1417, + "step": 10908 + }, + { + "epoch": 0.36, + "grad_norm": 0.726470410823822, + "learning_rate": 1.4479142059038149e-05, + "loss": 2.0456, + "step": 10909 + }, + { + "epoch": 0.36, + "grad_norm": 0.7410440444946289, + "learning_rate": 1.447819172609633e-05, + "loss": 2.1008, + "step": 10910 + }, + { + "epoch": 0.36, + "grad_norm": 0.7370637655258179, + "learning_rate": 1.4477241342562523e-05, + "loss": 2.1506, + "step": 10911 + }, + { + "epoch": 0.36, + "grad_norm": 0.7347295880317688, + "learning_rate": 1.4476290908447467e-05, + "loss": 2.1154, + "step": 10912 + }, + { + "epoch": 0.36, + "grad_norm": 0.7187429666519165, + "learning_rate": 1.4475340423761903e-05, + "loss": 2.1125, + "step": 10913 + }, + { + "epoch": 0.36, + "grad_norm": 0.7010026574134827, + "learning_rate": 1.4474389888516562e-05, + "loss": 2.0997, + "step": 10914 + }, + { + "epoch": 0.36, + "grad_norm": 0.7269919514656067, + "learning_rate": 1.447343930272219e-05, + "loss": 2.0665, + "step": 10915 + }, + { + "epoch": 0.36, + "grad_norm": 0.7168606519699097, + "learning_rate": 1.4472488666389518e-05, + "loss": 2.0581, + "step": 10916 + }, + { + "epoch": 0.36, + "grad_norm": 0.7325286269187927, + "learning_rate": 1.4471537979529298e-05, + "loss": 2.0813, + "step": 10917 + }, + { + "epoch": 0.36, + "grad_norm": 0.7726890444755554, + "learning_rate": 1.4470587242152257e-05, + "loss": 2.0679, + "step": 10918 + }, + { + "epoch": 0.36, + "grad_norm": 0.742205798625946, + "learning_rate": 1.4469636454269138e-05, + "loss": 2.1163, + "step": 10919 + }, + { + "epoch": 0.36, + "grad_norm": 0.7359945774078369, + "learning_rate": 1.4468685615890692e-05, + "loss": 2.1488, + "step": 10920 + }, + { + "epoch": 0.36, + "grad_norm": 0.7210713028907776, + "learning_rate": 1.4467734727027652e-05, + "loss": 2.0359, + "step": 10921 + }, + { + "epoch": 0.36, + "grad_norm": 0.7315620183944702, + "learning_rate": 1.4466783787690763e-05, + "loss": 2.1334, + "step": 10922 + }, + { + "epoch": 0.36, + "grad_norm": 0.7648641467094421, + "learning_rate": 1.446583279789077e-05, + "loss": 2.0994, + "step": 10923 + }, + { + "epoch": 0.36, + "grad_norm": 0.787000298500061, + "learning_rate": 1.4464881757638414e-05, + "loss": 2.1573, + "step": 10924 + }, + { + "epoch": 0.36, + "grad_norm": 0.7458527684211731, + "learning_rate": 1.4463930666944442e-05, + "loss": 2.1377, + "step": 10925 + }, + { + "epoch": 0.36, + "grad_norm": 0.8063718676567078, + "learning_rate": 1.4462979525819598e-05, + "loss": 2.1465, + "step": 10926 + }, + { + "epoch": 0.36, + "grad_norm": 0.7354551553726196, + "learning_rate": 1.4462028334274622e-05, + "loss": 2.0684, + "step": 10927 + }, + { + "epoch": 0.36, + "grad_norm": 0.7330908179283142, + "learning_rate": 1.446107709232027e-05, + "loss": 2.1185, + "step": 10928 + }, + { + "epoch": 0.36, + "grad_norm": 0.7285526990890503, + "learning_rate": 1.4460125799967281e-05, + "loss": 2.139, + "step": 10929 + }, + { + "epoch": 0.36, + "grad_norm": 0.7355754971504211, + "learning_rate": 1.4459174457226406e-05, + "loss": 2.1123, + "step": 10930 + }, + { + "epoch": 0.36, + "grad_norm": 0.7244279980659485, + "learning_rate": 1.4458223064108393e-05, + "loss": 2.0825, + "step": 10931 + }, + { + "epoch": 0.36, + "grad_norm": 0.6986587643623352, + "learning_rate": 1.4457271620623985e-05, + "loss": 2.0643, + "step": 10932 + }, + { + "epoch": 0.36, + "grad_norm": 0.7295422554016113, + "learning_rate": 1.4456320126783936e-05, + "loss": 2.1732, + "step": 10933 + }, + { + "epoch": 0.36, + "grad_norm": 0.72148197889328, + "learning_rate": 1.4455368582598995e-05, + "loss": 2.0792, + "step": 10934 + }, + { + "epoch": 0.36, + "grad_norm": 0.7096263766288757, + "learning_rate": 1.4454416988079907e-05, + "loss": 2.0761, + "step": 10935 + }, + { + "epoch": 0.36, + "grad_norm": 0.7505578994750977, + "learning_rate": 1.445346534323743e-05, + "loss": 2.1351, + "step": 10936 + }, + { + "epoch": 0.36, + "grad_norm": 0.7266875505447388, + "learning_rate": 1.4452513648082311e-05, + "loss": 2.0368, + "step": 10937 + }, + { + "epoch": 0.36, + "grad_norm": 0.6972452402114868, + "learning_rate": 1.4451561902625299e-05, + "loss": 2.1115, + "step": 10938 + }, + { + "epoch": 0.36, + "grad_norm": 0.740939736366272, + "learning_rate": 1.4450610106877156e-05, + "loss": 2.0487, + "step": 10939 + }, + { + "epoch": 0.36, + "grad_norm": 0.7329394221305847, + "learning_rate": 1.4449658260848623e-05, + "loss": 2.1026, + "step": 10940 + }, + { + "epoch": 0.36, + "grad_norm": 0.7096993923187256, + "learning_rate": 1.444870636455046e-05, + "loss": 2.1233, + "step": 10941 + }, + { + "epoch": 0.36, + "grad_norm": 0.7099723815917969, + "learning_rate": 1.444775441799342e-05, + "loss": 2.1275, + "step": 10942 + }, + { + "epoch": 0.36, + "grad_norm": 0.725796103477478, + "learning_rate": 1.4446802421188255e-05, + "loss": 2.0754, + "step": 10943 + }, + { + "epoch": 0.36, + "grad_norm": 0.7467756271362305, + "learning_rate": 1.4445850374145727e-05, + "loss": 2.1314, + "step": 10944 + }, + { + "epoch": 0.36, + "grad_norm": 0.7312129139900208, + "learning_rate": 1.4444898276876584e-05, + "loss": 2.1195, + "step": 10945 + }, + { + "epoch": 0.36, + "grad_norm": 0.737076997756958, + "learning_rate": 1.4443946129391586e-05, + "loss": 2.081, + "step": 10946 + }, + { + "epoch": 0.36, + "grad_norm": 0.7481940388679504, + "learning_rate": 1.4442993931701488e-05, + "loss": 2.1332, + "step": 10947 + }, + { + "epoch": 0.36, + "grad_norm": 0.7531542181968689, + "learning_rate": 1.444204168381705e-05, + "loss": 2.0704, + "step": 10948 + }, + { + "epoch": 0.36, + "grad_norm": 0.7210349440574646, + "learning_rate": 1.4441089385749027e-05, + "loss": 2.1961, + "step": 10949 + }, + { + "epoch": 0.36, + "grad_norm": 0.7450878024101257, + "learning_rate": 1.444013703750818e-05, + "loss": 2.1039, + "step": 10950 + }, + { + "epoch": 0.36, + "grad_norm": 0.7782377004623413, + "learning_rate": 1.4439184639105266e-05, + "loss": 2.1753, + "step": 10951 + }, + { + "epoch": 0.36, + "grad_norm": 0.7599091529846191, + "learning_rate": 1.4438232190551045e-05, + "loss": 2.1296, + "step": 10952 + }, + { + "epoch": 0.36, + "grad_norm": 0.7225616574287415, + "learning_rate": 1.443727969185628e-05, + "loss": 2.0826, + "step": 10953 + }, + { + "epoch": 0.36, + "grad_norm": 0.7230786085128784, + "learning_rate": 1.4436327143031728e-05, + "loss": 2.106, + "step": 10954 + }, + { + "epoch": 0.36, + "grad_norm": 0.8053635358810425, + "learning_rate": 1.4435374544088152e-05, + "loss": 2.0593, + "step": 10955 + }, + { + "epoch": 0.36, + "grad_norm": 0.7252182960510254, + "learning_rate": 1.4434421895036315e-05, + "loss": 2.111, + "step": 10956 + }, + { + "epoch": 0.36, + "grad_norm": 0.7089612483978271, + "learning_rate": 1.4433469195886977e-05, + "loss": 2.1092, + "step": 10957 + }, + { + "epoch": 0.36, + "grad_norm": 0.7707215547561646, + "learning_rate": 1.4432516446650902e-05, + "loss": 2.1239, + "step": 10958 + }, + { + "epoch": 0.36, + "grad_norm": 0.7279652953147888, + "learning_rate": 1.4431563647338853e-05, + "loss": 2.1468, + "step": 10959 + }, + { + "epoch": 0.36, + "grad_norm": 0.7217979431152344, + "learning_rate": 1.4430610797961597e-05, + "loss": 2.1101, + "step": 10960 + }, + { + "epoch": 0.36, + "grad_norm": 0.7599738836288452, + "learning_rate": 1.4429657898529897e-05, + "loss": 2.1634, + "step": 10961 + }, + { + "epoch": 0.36, + "grad_norm": 0.7474250197410583, + "learning_rate": 1.4428704949054516e-05, + "loss": 2.169, + "step": 10962 + }, + { + "epoch": 0.36, + "grad_norm": 0.737455427646637, + "learning_rate": 1.4427751949546223e-05, + "loss": 2.1138, + "step": 10963 + }, + { + "epoch": 0.36, + "grad_norm": 0.723259449005127, + "learning_rate": 1.4426798900015786e-05, + "loss": 2.074, + "step": 10964 + }, + { + "epoch": 0.36, + "grad_norm": 0.6938008666038513, + "learning_rate": 1.4425845800473965e-05, + "loss": 2.1703, + "step": 10965 + }, + { + "epoch": 0.36, + "grad_norm": 0.7192168235778809, + "learning_rate": 1.4424892650931535e-05, + "loss": 2.1419, + "step": 10966 + }, + { + "epoch": 0.36, + "grad_norm": 0.7198514938354492, + "learning_rate": 1.4423939451399261e-05, + "loss": 2.1041, + "step": 10967 + }, + { + "epoch": 0.36, + "grad_norm": 0.7028201818466187, + "learning_rate": 1.442298620188791e-05, + "loss": 2.0459, + "step": 10968 + }, + { + "epoch": 0.36, + "grad_norm": 0.7281674742698669, + "learning_rate": 1.4422032902408254e-05, + "loss": 2.0467, + "step": 10969 + }, + { + "epoch": 0.36, + "grad_norm": 0.7188822031021118, + "learning_rate": 1.442107955297106e-05, + "loss": 2.1042, + "step": 10970 + }, + { + "epoch": 0.37, + "grad_norm": 0.7240476012229919, + "learning_rate": 1.4420126153587104e-05, + "loss": 2.0655, + "step": 10971 + }, + { + "epoch": 0.37, + "grad_norm": 0.7237669229507446, + "learning_rate": 1.441917270426715e-05, + "loss": 2.0923, + "step": 10972 + }, + { + "epoch": 0.37, + "grad_norm": 0.7235566973686218, + "learning_rate": 1.4418219205021972e-05, + "loss": 2.1215, + "step": 10973 + }, + { + "epoch": 0.37, + "grad_norm": 0.7344925403594971, + "learning_rate": 1.4417265655862345e-05, + "loss": 2.1361, + "step": 10974 + }, + { + "epoch": 0.37, + "grad_norm": 0.746641218662262, + "learning_rate": 1.441631205679904e-05, + "loss": 2.1247, + "step": 10975 + }, + { + "epoch": 0.37, + "grad_norm": 0.7065014839172363, + "learning_rate": 1.4415358407842828e-05, + "loss": 2.1162, + "step": 10976 + }, + { + "epoch": 0.37, + "grad_norm": 0.7090362906455994, + "learning_rate": 1.4414404709004486e-05, + "loss": 2.1161, + "step": 10977 + }, + { + "epoch": 0.37, + "grad_norm": 0.7639482021331787, + "learning_rate": 1.4413450960294786e-05, + "loss": 2.076, + "step": 10978 + }, + { + "epoch": 0.37, + "grad_norm": 0.7675634622573853, + "learning_rate": 1.4412497161724504e-05, + "loss": 1.9846, + "step": 10979 + }, + { + "epoch": 0.37, + "grad_norm": 0.723737359046936, + "learning_rate": 1.4411543313304413e-05, + "loss": 2.1005, + "step": 10980 + }, + { + "epoch": 0.37, + "grad_norm": 0.7130969166755676, + "learning_rate": 1.441058941504529e-05, + "loss": 2.167, + "step": 10981 + }, + { + "epoch": 0.37, + "grad_norm": 0.7220802307128906, + "learning_rate": 1.4409635466957916e-05, + "loss": 2.0338, + "step": 10982 + }, + { + "epoch": 0.37, + "grad_norm": 0.7066482901573181, + "learning_rate": 1.4408681469053063e-05, + "loss": 2.0862, + "step": 10983 + }, + { + "epoch": 0.37, + "grad_norm": 0.7124679088592529, + "learning_rate": 1.4407727421341511e-05, + "loss": 2.1052, + "step": 10984 + }, + { + "epoch": 0.37, + "grad_norm": 0.6884941458702087, + "learning_rate": 1.4406773323834038e-05, + "loss": 2.0652, + "step": 10985 + }, + { + "epoch": 0.37, + "grad_norm": 0.7236327528953552, + "learning_rate": 1.4405819176541425e-05, + "loss": 2.1254, + "step": 10986 + }, + { + "epoch": 0.37, + "grad_norm": 0.7210729718208313, + "learning_rate": 1.4404864979474444e-05, + "loss": 2.072, + "step": 10987 + }, + { + "epoch": 0.37, + "grad_norm": 0.7433964014053345, + "learning_rate": 1.4403910732643884e-05, + "loss": 2.1967, + "step": 10988 + }, + { + "epoch": 0.37, + "grad_norm": 0.7166556715965271, + "learning_rate": 1.440295643606052e-05, + "loss": 2.0949, + "step": 10989 + }, + { + "epoch": 0.37, + "grad_norm": 0.7392034530639648, + "learning_rate": 1.4402002089735134e-05, + "loss": 2.1939, + "step": 10990 + }, + { + "epoch": 0.37, + "grad_norm": 0.7213878035545349, + "learning_rate": 1.440104769367851e-05, + "loss": 2.0616, + "step": 10991 + }, + { + "epoch": 0.37, + "grad_norm": 0.7424634099006653, + "learning_rate": 1.4400093247901426e-05, + "loss": 2.0829, + "step": 10992 + }, + { + "epoch": 0.37, + "grad_norm": 0.7225481867790222, + "learning_rate": 1.4399138752414667e-05, + "loss": 2.1214, + "step": 10993 + }, + { + "epoch": 0.37, + "grad_norm": 0.7312317490577698, + "learning_rate": 1.4398184207229018e-05, + "loss": 2.1228, + "step": 10994 + }, + { + "epoch": 0.37, + "grad_norm": 0.72078537940979, + "learning_rate": 1.4397229612355262e-05, + "loss": 2.126, + "step": 10995 + }, + { + "epoch": 0.37, + "grad_norm": 0.7222729325294495, + "learning_rate": 1.4396274967804182e-05, + "loss": 2.0879, + "step": 10996 + }, + { + "epoch": 0.37, + "grad_norm": 0.7208762168884277, + "learning_rate": 1.4395320273586565e-05, + "loss": 2.0235, + "step": 10997 + }, + { + "epoch": 0.37, + "grad_norm": 0.7261767387390137, + "learning_rate": 1.4394365529713195e-05, + "loss": 2.0774, + "step": 10998 + }, + { + "epoch": 0.37, + "grad_norm": 0.7472676038742065, + "learning_rate": 1.439341073619486e-05, + "loss": 2.1451, + "step": 10999 + }, + { + "epoch": 0.37, + "grad_norm": 0.7569315433502197, + "learning_rate": 1.439245589304234e-05, + "loss": 2.0944, + "step": 11000 + }, + { + "epoch": 0.37, + "grad_norm": 0.7565671801567078, + "learning_rate": 1.4391501000266433e-05, + "loss": 2.0761, + "step": 11001 + }, + { + "epoch": 0.37, + "grad_norm": 0.7336362600326538, + "learning_rate": 1.4390546057877919e-05, + "loss": 2.0928, + "step": 11002 + }, + { + "epoch": 0.37, + "grad_norm": 0.7405349016189575, + "learning_rate": 1.438959106588759e-05, + "loss": 2.1121, + "step": 11003 + }, + { + "epoch": 0.37, + "grad_norm": 0.7202802896499634, + "learning_rate": 1.4388636024306232e-05, + "loss": 2.1496, + "step": 11004 + }, + { + "epoch": 0.37, + "grad_norm": 0.7333552837371826, + "learning_rate": 1.4387680933144637e-05, + "loss": 2.0726, + "step": 11005 + }, + { + "epoch": 0.37, + "grad_norm": 0.7481369972229004, + "learning_rate": 1.4386725792413594e-05, + "loss": 2.0554, + "step": 11006 + }, + { + "epoch": 0.37, + "grad_norm": 0.7050867676734924, + "learning_rate": 1.4385770602123894e-05, + "loss": 2.0215, + "step": 11007 + }, + { + "epoch": 0.37, + "grad_norm": 0.7272744178771973, + "learning_rate": 1.4384815362286331e-05, + "loss": 2.1404, + "step": 11008 + }, + { + "epoch": 0.37, + "grad_norm": 0.7782060503959656, + "learning_rate": 1.4383860072911687e-05, + "loss": 2.1032, + "step": 11009 + }, + { + "epoch": 0.37, + "grad_norm": 0.7342901825904846, + "learning_rate": 1.4382904734010768e-05, + "loss": 2.0913, + "step": 11010 + }, + { + "epoch": 0.37, + "grad_norm": 0.7254053950309753, + "learning_rate": 1.4381949345594355e-05, + "loss": 2.1383, + "step": 11011 + }, + { + "epoch": 0.37, + "grad_norm": 0.7419200539588928, + "learning_rate": 1.4380993907673248e-05, + "loss": 2.0881, + "step": 11012 + }, + { + "epoch": 0.37, + "grad_norm": 0.7126465439796448, + "learning_rate": 1.438003842025824e-05, + "loss": 2.0379, + "step": 11013 + }, + { + "epoch": 0.37, + "grad_norm": 0.738862156867981, + "learning_rate": 1.4379082883360125e-05, + "loss": 2.0451, + "step": 11014 + }, + { + "epoch": 0.37, + "grad_norm": 0.7291884422302246, + "learning_rate": 1.4378127296989698e-05, + "loss": 2.1312, + "step": 11015 + }, + { + "epoch": 0.37, + "grad_norm": 0.7657714486122131, + "learning_rate": 1.4377171661157755e-05, + "loss": 2.1354, + "step": 11016 + }, + { + "epoch": 0.37, + "grad_norm": 0.7402278780937195, + "learning_rate": 1.4376215975875089e-05, + "loss": 2.097, + "step": 11017 + }, + { + "epoch": 0.37, + "grad_norm": 0.7361637949943542, + "learning_rate": 1.4375260241152503e-05, + "loss": 2.0977, + "step": 11018 + }, + { + "epoch": 0.37, + "grad_norm": 0.7245307564735413, + "learning_rate": 1.4374304457000792e-05, + "loss": 2.072, + "step": 11019 + }, + { + "epoch": 0.37, + "grad_norm": 0.7223939895629883, + "learning_rate": 1.4373348623430747e-05, + "loss": 2.1585, + "step": 11020 + }, + { + "epoch": 0.37, + "grad_norm": 0.6904551386833191, + "learning_rate": 1.4372392740453178e-05, + "loss": 2.0594, + "step": 11021 + }, + { + "epoch": 0.37, + "grad_norm": 0.6923089623451233, + "learning_rate": 1.4371436808078876e-05, + "loss": 2.1211, + "step": 11022 + }, + { + "epoch": 0.37, + "grad_norm": 0.7443510890007019, + "learning_rate": 1.437048082631864e-05, + "loss": 2.0624, + "step": 11023 + }, + { + "epoch": 0.37, + "grad_norm": 0.7004593014717102, + "learning_rate": 1.4369524795183278e-05, + "loss": 2.0124, + "step": 11024 + }, + { + "epoch": 0.37, + "grad_norm": 0.7271811962127686, + "learning_rate": 1.4368568714683584e-05, + "loss": 2.1124, + "step": 11025 + }, + { + "epoch": 0.37, + "grad_norm": 0.7128680348396301, + "learning_rate": 1.436761258483036e-05, + "loss": 2.0778, + "step": 11026 + }, + { + "epoch": 0.37, + "grad_norm": 0.7634636163711548, + "learning_rate": 1.4366656405634408e-05, + "loss": 2.1213, + "step": 11027 + }, + { + "epoch": 0.37, + "grad_norm": 0.7193471193313599, + "learning_rate": 1.4365700177106533e-05, + "loss": 2.0803, + "step": 11028 + }, + { + "epoch": 0.37, + "grad_norm": 0.7342803478240967, + "learning_rate": 1.4364743899257534e-05, + "loss": 2.1395, + "step": 11029 + }, + { + "epoch": 0.37, + "grad_norm": 0.7173555493354797, + "learning_rate": 1.436378757209822e-05, + "loss": 2.1625, + "step": 11030 + }, + { + "epoch": 0.37, + "grad_norm": 0.7119832038879395, + "learning_rate": 1.4362831195639387e-05, + "loss": 2.1487, + "step": 11031 + }, + { + "epoch": 0.37, + "grad_norm": 0.7320874333381653, + "learning_rate": 1.4361874769891849e-05, + "loss": 2.0987, + "step": 11032 + }, + { + "epoch": 0.37, + "grad_norm": 0.722071647644043, + "learning_rate": 1.4360918294866402e-05, + "loss": 2.084, + "step": 11033 + }, + { + "epoch": 0.37, + "grad_norm": 0.7141579985618591, + "learning_rate": 1.4359961770573854e-05, + "loss": 2.1022, + "step": 11034 + }, + { + "epoch": 0.37, + "grad_norm": 0.6977395415306091, + "learning_rate": 1.4359005197025018e-05, + "loss": 2.13, + "step": 11035 + }, + { + "epoch": 0.37, + "grad_norm": 0.73216712474823, + "learning_rate": 1.4358048574230693e-05, + "loss": 2.1278, + "step": 11036 + }, + { + "epoch": 0.37, + "grad_norm": 0.7248515486717224, + "learning_rate": 1.4357091902201693e-05, + "loss": 2.1612, + "step": 11037 + }, + { + "epoch": 0.37, + "grad_norm": 0.7488477230072021, + "learning_rate": 1.4356135180948818e-05, + "loss": 2.1416, + "step": 11038 + }, + { + "epoch": 0.37, + "grad_norm": 0.7104046940803528, + "learning_rate": 1.4355178410482882e-05, + "loss": 2.1407, + "step": 11039 + }, + { + "epoch": 0.37, + "grad_norm": 0.7590029835700989, + "learning_rate": 1.4354221590814693e-05, + "loss": 2.1583, + "step": 11040 + }, + { + "epoch": 0.37, + "grad_norm": 0.7558960318565369, + "learning_rate": 1.4353264721955058e-05, + "loss": 2.0891, + "step": 11041 + }, + { + "epoch": 0.37, + "grad_norm": 0.7772329449653625, + "learning_rate": 1.4352307803914793e-05, + "loss": 2.0879, + "step": 11042 + }, + { + "epoch": 0.37, + "grad_norm": 0.7465712428092957, + "learning_rate": 1.4351350836704705e-05, + "loss": 2.0408, + "step": 11043 + }, + { + "epoch": 0.37, + "grad_norm": 0.732805073261261, + "learning_rate": 1.4350393820335602e-05, + "loss": 2.1925, + "step": 11044 + }, + { + "epoch": 0.37, + "grad_norm": 0.6990310549736023, + "learning_rate": 1.4349436754818302e-05, + "loss": 2.1764, + "step": 11045 + }, + { + "epoch": 0.37, + "grad_norm": 0.7209809422492981, + "learning_rate": 1.4348479640163614e-05, + "loss": 2.2235, + "step": 11046 + }, + { + "epoch": 0.37, + "grad_norm": 0.7356286644935608, + "learning_rate": 1.4347522476382349e-05, + "loss": 2.1074, + "step": 11047 + }, + { + "epoch": 0.37, + "grad_norm": 0.7203242182731628, + "learning_rate": 1.4346565263485328e-05, + "loss": 2.1255, + "step": 11048 + }, + { + "epoch": 0.37, + "grad_norm": 0.7170785069465637, + "learning_rate": 1.4345608001483357e-05, + "loss": 2.0362, + "step": 11049 + }, + { + "epoch": 0.37, + "grad_norm": 0.7259039282798767, + "learning_rate": 1.4344650690387255e-05, + "loss": 2.1136, + "step": 11050 + }, + { + "epoch": 0.37, + "grad_norm": 0.739266574382782, + "learning_rate": 1.4343693330207833e-05, + "loss": 2.0615, + "step": 11051 + }, + { + "epoch": 0.37, + "grad_norm": 0.7370533347129822, + "learning_rate": 1.434273592095591e-05, + "loss": 2.0629, + "step": 11052 + }, + { + "epoch": 0.37, + "grad_norm": 0.7941794991493225, + "learning_rate": 1.4341778462642305e-05, + "loss": 2.1063, + "step": 11053 + }, + { + "epoch": 0.37, + "grad_norm": 0.7523413300514221, + "learning_rate": 1.4340820955277828e-05, + "loss": 2.0863, + "step": 11054 + }, + { + "epoch": 0.37, + "grad_norm": 0.7648608088493347, + "learning_rate": 1.4339863398873302e-05, + "loss": 2.1059, + "step": 11055 + }, + { + "epoch": 0.37, + "grad_norm": 0.7154421806335449, + "learning_rate": 1.4338905793439541e-05, + "loss": 2.1347, + "step": 11056 + }, + { + "epoch": 0.37, + "grad_norm": 0.7509371042251587, + "learning_rate": 1.4337948138987368e-05, + "loss": 2.116, + "step": 11057 + }, + { + "epoch": 0.37, + "grad_norm": 0.7362762093544006, + "learning_rate": 1.4336990435527593e-05, + "loss": 2.0682, + "step": 11058 + }, + { + "epoch": 0.37, + "grad_norm": 0.7995498180389404, + "learning_rate": 1.4336032683071049e-05, + "loss": 2.0501, + "step": 11059 + }, + { + "epoch": 0.37, + "grad_norm": 0.7454381585121155, + "learning_rate": 1.4335074881628546e-05, + "loss": 2.0681, + "step": 11060 + }, + { + "epoch": 0.37, + "grad_norm": 0.7226183414459229, + "learning_rate": 1.4334117031210906e-05, + "loss": 2.1514, + "step": 11061 + }, + { + "epoch": 0.37, + "grad_norm": 0.7082606554031372, + "learning_rate": 1.4333159131828953e-05, + "loss": 2.0745, + "step": 11062 + }, + { + "epoch": 0.37, + "grad_norm": 0.7274426221847534, + "learning_rate": 1.4332201183493509e-05, + "loss": 2.0408, + "step": 11063 + }, + { + "epoch": 0.37, + "grad_norm": 0.7160465717315674, + "learning_rate": 1.4331243186215393e-05, + "loss": 2.0713, + "step": 11064 + }, + { + "epoch": 0.37, + "grad_norm": 0.7047940492630005, + "learning_rate": 1.4330285140005432e-05, + "loss": 2.134, + "step": 11065 + }, + { + "epoch": 0.37, + "grad_norm": 0.705583393573761, + "learning_rate": 1.4329327044874444e-05, + "loss": 2.0535, + "step": 11066 + }, + { + "epoch": 0.37, + "grad_norm": 0.7313485145568848, + "learning_rate": 1.4328368900833259e-05, + "loss": 2.0484, + "step": 11067 + }, + { + "epoch": 0.37, + "grad_norm": 0.7553391456604004, + "learning_rate": 1.43274107078927e-05, + "loss": 2.0883, + "step": 11068 + }, + { + "epoch": 0.37, + "grad_norm": 0.7202082276344299, + "learning_rate": 1.4326452466063585e-05, + "loss": 2.0871, + "step": 11069 + }, + { + "epoch": 0.37, + "grad_norm": 0.7087470889091492, + "learning_rate": 1.432549417535675e-05, + "loss": 2.082, + "step": 11070 + }, + { + "epoch": 0.37, + "grad_norm": 0.7409905791282654, + "learning_rate": 1.4324535835783017e-05, + "loss": 2.1651, + "step": 11071 + }, + { + "epoch": 0.37, + "grad_norm": 0.7737860083580017, + "learning_rate": 1.4323577447353212e-05, + "loss": 2.105, + "step": 11072 + }, + { + "epoch": 0.37, + "grad_norm": 0.739077091217041, + "learning_rate": 1.4322619010078163e-05, + "loss": 2.1639, + "step": 11073 + }, + { + "epoch": 0.37, + "grad_norm": 0.7896743416786194, + "learning_rate": 1.4321660523968697e-05, + "loss": 2.0989, + "step": 11074 + }, + { + "epoch": 0.37, + "grad_norm": 0.7045981287956238, + "learning_rate": 1.4320701989035643e-05, + "loss": 2.0502, + "step": 11075 + }, + { + "epoch": 0.37, + "grad_norm": 0.7547368407249451, + "learning_rate": 1.4319743405289833e-05, + "loss": 2.0714, + "step": 11076 + }, + { + "epoch": 0.37, + "grad_norm": 0.7796988487243652, + "learning_rate": 1.431878477274209e-05, + "loss": 2.2137, + "step": 11077 + }, + { + "epoch": 0.37, + "grad_norm": 0.7328726053237915, + "learning_rate": 1.4317826091403249e-05, + "loss": 2.0675, + "step": 11078 + }, + { + "epoch": 0.37, + "grad_norm": 0.7560614347457886, + "learning_rate": 1.4316867361284141e-05, + "loss": 2.194, + "step": 11079 + }, + { + "epoch": 0.37, + "grad_norm": 0.7340458631515503, + "learning_rate": 1.431590858239559e-05, + "loss": 2.0681, + "step": 11080 + }, + { + "epoch": 0.37, + "grad_norm": 0.7251687049865723, + "learning_rate": 1.431494975474844e-05, + "loss": 2.1052, + "step": 11081 + }, + { + "epoch": 0.37, + "grad_norm": 0.7459153532981873, + "learning_rate": 1.4313990878353515e-05, + "loss": 2.1627, + "step": 11082 + }, + { + "epoch": 0.37, + "grad_norm": 0.7301740050315857, + "learning_rate": 1.4313031953221646e-05, + "loss": 2.0577, + "step": 11083 + }, + { + "epoch": 0.37, + "grad_norm": 0.7670778632164001, + "learning_rate": 1.4312072979363671e-05, + "loss": 2.1043, + "step": 11084 + }, + { + "epoch": 0.37, + "grad_norm": 0.7218716740608215, + "learning_rate": 1.4311113956790423e-05, + "loss": 2.1657, + "step": 11085 + }, + { + "epoch": 0.37, + "grad_norm": 0.7021493315696716, + "learning_rate": 1.4310154885512737e-05, + "loss": 2.0981, + "step": 11086 + }, + { + "epoch": 0.37, + "grad_norm": 0.7529250979423523, + "learning_rate": 1.4309195765541445e-05, + "loss": 2.0664, + "step": 11087 + }, + { + "epoch": 0.37, + "grad_norm": 0.7032921314239502, + "learning_rate": 1.4308236596887385e-05, + "loss": 2.1013, + "step": 11088 + }, + { + "epoch": 0.37, + "grad_norm": 0.7842296957969666, + "learning_rate": 1.4307277379561394e-05, + "loss": 2.0878, + "step": 11089 + }, + { + "epoch": 0.37, + "grad_norm": 0.7105204463005066, + "learning_rate": 1.4306318113574311e-05, + "loss": 2.1299, + "step": 11090 + }, + { + "epoch": 0.37, + "grad_norm": 0.7196194529533386, + "learning_rate": 1.4305358798936963e-05, + "loss": 2.0936, + "step": 11091 + }, + { + "epoch": 0.37, + "grad_norm": 0.7477278709411621, + "learning_rate": 1.4304399435660199e-05, + "loss": 2.0721, + "step": 11092 + }, + { + "epoch": 0.37, + "grad_norm": 0.7170090675354004, + "learning_rate": 1.4303440023754851e-05, + "loss": 2.1958, + "step": 11093 + }, + { + "epoch": 0.37, + "grad_norm": 0.7261789441108704, + "learning_rate": 1.4302480563231759e-05, + "loss": 2.14, + "step": 11094 + }, + { + "epoch": 0.37, + "grad_norm": 0.7644128203392029, + "learning_rate": 1.4301521054101764e-05, + "loss": 2.0635, + "step": 11095 + }, + { + "epoch": 0.37, + "grad_norm": 0.7188597321510315, + "learning_rate": 1.4300561496375705e-05, + "loss": 2.1334, + "step": 11096 + }, + { + "epoch": 0.37, + "grad_norm": 0.7074153423309326, + "learning_rate": 1.4299601890064422e-05, + "loss": 2.1319, + "step": 11097 + }, + { + "epoch": 0.37, + "grad_norm": 0.7150753140449524, + "learning_rate": 1.429864223517876e-05, + "loss": 2.1304, + "step": 11098 + }, + { + "epoch": 0.37, + "grad_norm": 0.7301631569862366, + "learning_rate": 1.4297682531729553e-05, + "loss": 2.1902, + "step": 11099 + }, + { + "epoch": 0.37, + "grad_norm": 0.7252650856971741, + "learning_rate": 1.429672277972765e-05, + "loss": 2.1327, + "step": 11100 + }, + { + "epoch": 0.37, + "grad_norm": 0.7567165493965149, + "learning_rate": 1.4295762979183891e-05, + "loss": 2.1784, + "step": 11101 + }, + { + "epoch": 0.37, + "grad_norm": 0.7652906775474548, + "learning_rate": 1.4294803130109117e-05, + "loss": 2.1697, + "step": 11102 + }, + { + "epoch": 0.37, + "grad_norm": 0.7174903750419617, + "learning_rate": 1.4293843232514177e-05, + "loss": 2.0361, + "step": 11103 + }, + { + "epoch": 0.37, + "grad_norm": 0.7215195894241333, + "learning_rate": 1.429288328640991e-05, + "loss": 2.0586, + "step": 11104 + }, + { + "epoch": 0.37, + "grad_norm": 0.7259606719017029, + "learning_rate": 1.4291923291807166e-05, + "loss": 2.0784, + "step": 11105 + }, + { + "epoch": 0.37, + "grad_norm": 0.739876389503479, + "learning_rate": 1.4290963248716788e-05, + "loss": 2.0641, + "step": 11106 + }, + { + "epoch": 0.37, + "grad_norm": 0.7501680850982666, + "learning_rate": 1.429000315714962e-05, + "loss": 2.1648, + "step": 11107 + }, + { + "epoch": 0.37, + "grad_norm": 0.7290095686912537, + "learning_rate": 1.428904301711651e-05, + "loss": 2.0177, + "step": 11108 + }, + { + "epoch": 0.37, + "grad_norm": 0.7456393837928772, + "learning_rate": 1.4288082828628307e-05, + "loss": 2.0828, + "step": 11109 + }, + { + "epoch": 0.37, + "grad_norm": 0.7451617121696472, + "learning_rate": 1.4287122591695859e-05, + "loss": 2.1659, + "step": 11110 + }, + { + "epoch": 0.37, + "grad_norm": 0.7310173511505127, + "learning_rate": 1.4286162306330009e-05, + "loss": 2.0502, + "step": 11111 + }, + { + "epoch": 0.37, + "grad_norm": 0.7474257946014404, + "learning_rate": 1.4285201972541613e-05, + "loss": 2.0621, + "step": 11112 + }, + { + "epoch": 0.37, + "grad_norm": 0.7389144897460938, + "learning_rate": 1.4284241590341512e-05, + "loss": 2.1135, + "step": 11113 + }, + { + "epoch": 0.37, + "grad_norm": 0.7029172778129578, + "learning_rate": 1.4283281159740565e-05, + "loss": 2.1263, + "step": 11114 + }, + { + "epoch": 0.37, + "grad_norm": 0.7077636122703552, + "learning_rate": 1.4282320680749616e-05, + "loss": 2.0843, + "step": 11115 + }, + { + "epoch": 0.37, + "grad_norm": 0.7399885058403015, + "learning_rate": 1.4281360153379517e-05, + "loss": 2.1341, + "step": 11116 + }, + { + "epoch": 0.37, + "grad_norm": 0.735608696937561, + "learning_rate": 1.428039957764112e-05, + "loss": 2.1334, + "step": 11117 + }, + { + "epoch": 0.37, + "grad_norm": 0.735717236995697, + "learning_rate": 1.4279438953545277e-05, + "loss": 2.0716, + "step": 11118 + }, + { + "epoch": 0.37, + "grad_norm": 0.7126065492630005, + "learning_rate": 1.4278478281102841e-05, + "loss": 2.1087, + "step": 11119 + }, + { + "epoch": 0.37, + "grad_norm": 0.6903116106987, + "learning_rate": 1.4277517560324663e-05, + "loss": 2.0883, + "step": 11120 + }, + { + "epoch": 0.37, + "grad_norm": 0.7217884659767151, + "learning_rate": 1.42765567912216e-05, + "loss": 2.1188, + "step": 11121 + }, + { + "epoch": 0.37, + "grad_norm": 0.7285078763961792, + "learning_rate": 1.4275595973804505e-05, + "loss": 2.1336, + "step": 11122 + }, + { + "epoch": 0.37, + "grad_norm": 0.7281102538108826, + "learning_rate": 1.4274635108084236e-05, + "loss": 2.071, + "step": 11123 + }, + { + "epoch": 0.37, + "grad_norm": 0.7387408018112183, + "learning_rate": 1.4273674194071639e-05, + "loss": 2.0707, + "step": 11124 + }, + { + "epoch": 0.37, + "grad_norm": 0.7337959408760071, + "learning_rate": 1.427271323177758e-05, + "loss": 2.0836, + "step": 11125 + }, + { + "epoch": 0.37, + "grad_norm": 0.7263737320899963, + "learning_rate": 1.4271752221212908e-05, + "loss": 2.0969, + "step": 11126 + }, + { + "epoch": 0.37, + "grad_norm": 0.718824565410614, + "learning_rate": 1.427079116238848e-05, + "loss": 2.1588, + "step": 11127 + }, + { + "epoch": 0.37, + "grad_norm": 0.7114875912666321, + "learning_rate": 1.4269830055315164e-05, + "loss": 2.1052, + "step": 11128 + }, + { + "epoch": 0.37, + "grad_norm": 0.6948983073234558, + "learning_rate": 1.4268868900003803e-05, + "loss": 2.0783, + "step": 11129 + }, + { + "epoch": 0.37, + "grad_norm": 0.7300153374671936, + "learning_rate": 1.4267907696465269e-05, + "loss": 2.1526, + "step": 11130 + }, + { + "epoch": 0.37, + "grad_norm": 0.6916389465332031, + "learning_rate": 1.4266946444710412e-05, + "loss": 2.1058, + "step": 11131 + }, + { + "epoch": 0.37, + "grad_norm": 0.7249911427497864, + "learning_rate": 1.4265985144750095e-05, + "loss": 2.0656, + "step": 11132 + }, + { + "epoch": 0.37, + "grad_norm": 0.6803973913192749, + "learning_rate": 1.4265023796595179e-05, + "loss": 2.0699, + "step": 11133 + }, + { + "epoch": 0.37, + "grad_norm": 0.7414565086364746, + "learning_rate": 1.4264062400256524e-05, + "loss": 2.134, + "step": 11134 + }, + { + "epoch": 0.37, + "grad_norm": 0.7456116080284119, + "learning_rate": 1.4263100955744987e-05, + "loss": 2.1763, + "step": 11135 + }, + { + "epoch": 0.37, + "grad_norm": 0.7009689807891846, + "learning_rate": 1.4262139463071439e-05, + "loss": 2.1134, + "step": 11136 + }, + { + "epoch": 0.37, + "grad_norm": 0.7296027541160583, + "learning_rate": 1.4261177922246732e-05, + "loss": 2.1365, + "step": 11137 + }, + { + "epoch": 0.37, + "grad_norm": 0.6952452063560486, + "learning_rate": 1.4260216333281739e-05, + "loss": 2.1519, + "step": 11138 + }, + { + "epoch": 0.37, + "grad_norm": 0.7457868456840515, + "learning_rate": 1.4259254696187316e-05, + "loss": 2.0657, + "step": 11139 + }, + { + "epoch": 0.37, + "grad_norm": 0.7168306708335876, + "learning_rate": 1.425829301097433e-05, + "loss": 2.0244, + "step": 11140 + }, + { + "epoch": 0.37, + "grad_norm": 0.767706036567688, + "learning_rate": 1.4257331277653646e-05, + "loss": 2.1113, + "step": 11141 + }, + { + "epoch": 0.37, + "grad_norm": 0.710503876209259, + "learning_rate": 1.4256369496236127e-05, + "loss": 2.0871, + "step": 11142 + }, + { + "epoch": 0.37, + "grad_norm": 0.7095200419425964, + "learning_rate": 1.4255407666732642e-05, + "loss": 2.1485, + "step": 11143 + }, + { + "epoch": 0.37, + "grad_norm": 0.7167171835899353, + "learning_rate": 1.4254445789154053e-05, + "loss": 2.1488, + "step": 11144 + }, + { + "epoch": 0.37, + "grad_norm": 0.7402951121330261, + "learning_rate": 1.4253483863511228e-05, + "loss": 2.1512, + "step": 11145 + }, + { + "epoch": 0.37, + "grad_norm": 0.7066875100135803, + "learning_rate": 1.4252521889815037e-05, + "loss": 2.0603, + "step": 11146 + }, + { + "epoch": 0.37, + "grad_norm": 0.7391627430915833, + "learning_rate": 1.4251559868076348e-05, + "loss": 2.1808, + "step": 11147 + }, + { + "epoch": 0.37, + "grad_norm": 0.823256254196167, + "learning_rate": 1.425059779830602e-05, + "loss": 2.0949, + "step": 11148 + }, + { + "epoch": 0.37, + "grad_norm": 0.6952913403511047, + "learning_rate": 1.4249635680514936e-05, + "loss": 2.0154, + "step": 11149 + }, + { + "epoch": 0.37, + "grad_norm": 0.7182970643043518, + "learning_rate": 1.4248673514713957e-05, + "loss": 2.0283, + "step": 11150 + }, + { + "epoch": 0.37, + "grad_norm": 0.7535386681556702, + "learning_rate": 1.4247711300913953e-05, + "loss": 2.0722, + "step": 11151 + }, + { + "epoch": 0.37, + "grad_norm": 0.7249448299407959, + "learning_rate": 1.42467490391258e-05, + "loss": 2.0991, + "step": 11152 + }, + { + "epoch": 0.37, + "grad_norm": 0.7416818737983704, + "learning_rate": 1.4245786729360362e-05, + "loss": 2.077, + "step": 11153 + }, + { + "epoch": 0.37, + "grad_norm": 0.7161725163459778, + "learning_rate": 1.4244824371628515e-05, + "loss": 2.1532, + "step": 11154 + }, + { + "epoch": 0.37, + "grad_norm": 0.7586443424224854, + "learning_rate": 1.4243861965941127e-05, + "loss": 2.0419, + "step": 11155 + }, + { + "epoch": 0.37, + "grad_norm": 0.7333353757858276, + "learning_rate": 1.4242899512309077e-05, + "loss": 2.045, + "step": 11156 + }, + { + "epoch": 0.37, + "grad_norm": 0.7093413472175598, + "learning_rate": 1.4241937010743232e-05, + "loss": 2.101, + "step": 11157 + }, + { + "epoch": 0.37, + "grad_norm": 0.6973031759262085, + "learning_rate": 1.4240974461254473e-05, + "loss": 2.1135, + "step": 11158 + }, + { + "epoch": 0.37, + "grad_norm": 0.7134810090065002, + "learning_rate": 1.4240011863853664e-05, + "loss": 2.1313, + "step": 11159 + }, + { + "epoch": 0.37, + "grad_norm": 0.7472101449966431, + "learning_rate": 1.4239049218551691e-05, + "loss": 2.1091, + "step": 11160 + }, + { + "epoch": 0.37, + "grad_norm": 0.7165377140045166, + "learning_rate": 1.4238086525359424e-05, + "loss": 2.1667, + "step": 11161 + }, + { + "epoch": 0.37, + "grad_norm": 0.749644935131073, + "learning_rate": 1.4237123784287734e-05, + "loss": 2.1844, + "step": 11162 + }, + { + "epoch": 0.37, + "grad_norm": 0.7497105002403259, + "learning_rate": 1.423616099534751e-05, + "loss": 2.1359, + "step": 11163 + }, + { + "epoch": 0.37, + "grad_norm": 0.733754575252533, + "learning_rate": 1.4235198158549618e-05, + "loss": 2.127, + "step": 11164 + }, + { + "epoch": 0.37, + "grad_norm": 0.7308863997459412, + "learning_rate": 1.423423527390494e-05, + "loss": 2.087, + "step": 11165 + }, + { + "epoch": 0.37, + "grad_norm": 0.7337420582771301, + "learning_rate": 1.4233272341424351e-05, + "loss": 2.1412, + "step": 11166 + }, + { + "epoch": 0.37, + "grad_norm": 0.727179765701294, + "learning_rate": 1.4232309361118735e-05, + "loss": 2.1399, + "step": 11167 + }, + { + "epoch": 0.37, + "grad_norm": 0.7389503121376038, + "learning_rate": 1.4231346332998965e-05, + "loss": 2.1281, + "step": 11168 + }, + { + "epoch": 0.37, + "grad_norm": 0.7143319249153137, + "learning_rate": 1.4230383257075926e-05, + "loss": 2.1064, + "step": 11169 + }, + { + "epoch": 0.37, + "grad_norm": 0.7208426594734192, + "learning_rate": 1.4229420133360497e-05, + "loss": 2.0868, + "step": 11170 + }, + { + "epoch": 0.37, + "grad_norm": 0.7088901996612549, + "learning_rate": 1.4228456961863556e-05, + "loss": 2.0777, + "step": 11171 + }, + { + "epoch": 0.37, + "grad_norm": 0.7331553101539612, + "learning_rate": 1.422749374259599e-05, + "loss": 2.0266, + "step": 11172 + }, + { + "epoch": 0.37, + "grad_norm": 0.7248064279556274, + "learning_rate": 1.4226530475568673e-05, + "loss": 2.0646, + "step": 11173 + }, + { + "epoch": 0.37, + "grad_norm": 0.7564190626144409, + "learning_rate": 1.4225567160792494e-05, + "loss": 2.0066, + "step": 11174 + }, + { + "epoch": 0.37, + "grad_norm": 0.7105141282081604, + "learning_rate": 1.4224603798278333e-05, + "loss": 2.0284, + "step": 11175 + }, + { + "epoch": 0.37, + "grad_norm": 0.7353984713554382, + "learning_rate": 1.4223640388037074e-05, + "loss": 2.1149, + "step": 11176 + }, + { + "epoch": 0.37, + "grad_norm": 0.7325558662414551, + "learning_rate": 1.4222676930079603e-05, + "loss": 2.1431, + "step": 11177 + }, + { + "epoch": 0.37, + "grad_norm": 0.7351787686347961, + "learning_rate": 1.42217134244168e-05, + "loss": 2.1375, + "step": 11178 + }, + { + "epoch": 0.37, + "grad_norm": 0.7309549450874329, + "learning_rate": 1.4220749871059555e-05, + "loss": 2.0433, + "step": 11179 + }, + { + "epoch": 0.37, + "grad_norm": 0.7133582830429077, + "learning_rate": 1.421978627001875e-05, + "loss": 2.1523, + "step": 11180 + }, + { + "epoch": 0.37, + "grad_norm": 0.740746021270752, + "learning_rate": 1.4218822621305272e-05, + "loss": 2.1162, + "step": 11181 + }, + { + "epoch": 0.37, + "grad_norm": 0.749692440032959, + "learning_rate": 1.4217858924930012e-05, + "loss": 2.1137, + "step": 11182 + }, + { + "epoch": 0.37, + "grad_norm": 0.7487459182739258, + "learning_rate": 1.4216895180903853e-05, + "loss": 2.127, + "step": 11183 + }, + { + "epoch": 0.37, + "grad_norm": 0.7453940510749817, + "learning_rate": 1.421593138923768e-05, + "loss": 2.1659, + "step": 11184 + }, + { + "epoch": 0.37, + "grad_norm": 0.6963984370231628, + "learning_rate": 1.4214967549942389e-05, + "loss": 2.0389, + "step": 11185 + }, + { + "epoch": 0.37, + "grad_norm": 0.722974956035614, + "learning_rate": 1.4214003663028864e-05, + "loss": 2.0811, + "step": 11186 + }, + { + "epoch": 0.37, + "grad_norm": 0.7186281085014343, + "learning_rate": 1.4213039728507994e-05, + "loss": 2.168, + "step": 11187 + }, + { + "epoch": 0.37, + "grad_norm": 0.7200491428375244, + "learning_rate": 1.421207574639067e-05, + "loss": 2.0644, + "step": 11188 + }, + { + "epoch": 0.37, + "grad_norm": 0.711393415927887, + "learning_rate": 1.4211111716687782e-05, + "loss": 2.1257, + "step": 11189 + }, + { + "epoch": 0.37, + "grad_norm": 0.7236936688423157, + "learning_rate": 1.4210147639410223e-05, + "loss": 2.173, + "step": 11190 + }, + { + "epoch": 0.37, + "grad_norm": 0.7565777897834778, + "learning_rate": 1.4209183514568885e-05, + "loss": 2.1224, + "step": 11191 + }, + { + "epoch": 0.37, + "grad_norm": 0.7428798675537109, + "learning_rate": 1.4208219342174655e-05, + "loss": 2.1587, + "step": 11192 + }, + { + "epoch": 0.37, + "grad_norm": 0.7308961153030396, + "learning_rate": 1.4207255122238432e-05, + "loss": 2.1311, + "step": 11193 + }, + { + "epoch": 0.37, + "grad_norm": 0.7124369144439697, + "learning_rate": 1.4206290854771107e-05, + "loss": 2.1172, + "step": 11194 + }, + { + "epoch": 0.37, + "grad_norm": 0.7443590760231018, + "learning_rate": 1.420532653978357e-05, + "loss": 2.1043, + "step": 11195 + }, + { + "epoch": 0.37, + "grad_norm": 0.7128584980964661, + "learning_rate": 1.4204362177286722e-05, + "loss": 2.1073, + "step": 11196 + }, + { + "epoch": 0.37, + "grad_norm": 0.7255983948707581, + "learning_rate": 1.4203397767291452e-05, + "loss": 2.1244, + "step": 11197 + }, + { + "epoch": 0.37, + "grad_norm": 0.7299162149429321, + "learning_rate": 1.4202433309808657e-05, + "loss": 2.1315, + "step": 11198 + }, + { + "epoch": 0.37, + "grad_norm": 0.7553777098655701, + "learning_rate": 1.4201468804849233e-05, + "loss": 2.0709, + "step": 11199 + }, + { + "epoch": 0.37, + "grad_norm": 0.7665624618530273, + "learning_rate": 1.4200504252424077e-05, + "loss": 2.1495, + "step": 11200 + }, + { + "epoch": 0.37, + "grad_norm": 0.7205923795700073, + "learning_rate": 1.4199539652544086e-05, + "loss": 2.1178, + "step": 11201 + }, + { + "epoch": 0.37, + "grad_norm": 0.7153617143630981, + "learning_rate": 1.4198575005220157e-05, + "loss": 2.0675, + "step": 11202 + }, + { + "epoch": 0.37, + "grad_norm": 0.7182059288024902, + "learning_rate": 1.4197610310463188e-05, + "loss": 2.117, + "step": 11203 + }, + { + "epoch": 0.37, + "grad_norm": 0.7073665857315063, + "learning_rate": 1.4196645568284078e-05, + "loss": 2.1047, + "step": 11204 + }, + { + "epoch": 0.37, + "grad_norm": 0.7404348254203796, + "learning_rate": 1.4195680778693728e-05, + "loss": 2.0887, + "step": 11205 + }, + { + "epoch": 0.37, + "grad_norm": 0.766862690448761, + "learning_rate": 1.4194715941703032e-05, + "loss": 2.1858, + "step": 11206 + }, + { + "epoch": 0.37, + "grad_norm": 0.719981849193573, + "learning_rate": 1.4193751057322898e-05, + "loss": 2.102, + "step": 11207 + }, + { + "epoch": 0.37, + "grad_norm": 0.7175014019012451, + "learning_rate": 1.4192786125564217e-05, + "loss": 2.1391, + "step": 11208 + }, + { + "epoch": 0.37, + "grad_norm": 0.7436515688896179, + "learning_rate": 1.4191821146437898e-05, + "loss": 2.1259, + "step": 11209 + }, + { + "epoch": 0.37, + "grad_norm": 0.73243248462677, + "learning_rate": 1.419085611995484e-05, + "loss": 2.0753, + "step": 11210 + }, + { + "epoch": 0.37, + "grad_norm": 0.7517950534820557, + "learning_rate": 1.4189891046125948e-05, + "loss": 2.0716, + "step": 11211 + }, + { + "epoch": 0.37, + "grad_norm": 0.771217942237854, + "learning_rate": 1.4188925924962119e-05, + "loss": 2.1326, + "step": 11212 + }, + { + "epoch": 0.37, + "grad_norm": 0.7113727331161499, + "learning_rate": 1.4187960756474262e-05, + "loss": 2.0873, + "step": 11213 + }, + { + "epoch": 0.37, + "grad_norm": 0.7072215676307678, + "learning_rate": 1.4186995540673278e-05, + "loss": 2.1199, + "step": 11214 + }, + { + "epoch": 0.37, + "grad_norm": 0.7198404669761658, + "learning_rate": 1.418603027757007e-05, + "loss": 2.136, + "step": 11215 + }, + { + "epoch": 0.37, + "grad_norm": 0.7018731236457825, + "learning_rate": 1.418506496717555e-05, + "loss": 2.0726, + "step": 11216 + }, + { + "epoch": 0.37, + "grad_norm": 0.732787013053894, + "learning_rate": 1.4184099609500615e-05, + "loss": 2.1254, + "step": 11217 + }, + { + "epoch": 0.37, + "grad_norm": 0.713353157043457, + "learning_rate": 1.418313420455618e-05, + "loss": 2.1718, + "step": 11218 + }, + { + "epoch": 0.37, + "grad_norm": 0.738911509513855, + "learning_rate": 1.418216875235314e-05, + "loss": 2.1517, + "step": 11219 + }, + { + "epoch": 0.37, + "grad_norm": 0.7707206606864929, + "learning_rate": 1.418120325290241e-05, + "loss": 2.0662, + "step": 11220 + }, + { + "epoch": 0.37, + "grad_norm": 0.6887251734733582, + "learning_rate": 1.4180237706214897e-05, + "loss": 2.0978, + "step": 11221 + }, + { + "epoch": 0.37, + "grad_norm": 0.7137007117271423, + "learning_rate": 1.4179272112301507e-05, + "loss": 2.1184, + "step": 11222 + }, + { + "epoch": 0.37, + "grad_norm": 0.7035083770751953, + "learning_rate": 1.4178306471173154e-05, + "loss": 2.0873, + "step": 11223 + }, + { + "epoch": 0.37, + "grad_norm": 0.7317951321601868, + "learning_rate": 1.4177340782840738e-05, + "loss": 2.0584, + "step": 11224 + }, + { + "epoch": 0.37, + "grad_norm": 0.7241039872169495, + "learning_rate": 1.4176375047315176e-05, + "loss": 2.1039, + "step": 11225 + }, + { + "epoch": 0.37, + "grad_norm": 0.7058163285255432, + "learning_rate": 1.4175409264607376e-05, + "loss": 2.1532, + "step": 11226 + }, + { + "epoch": 0.37, + "grad_norm": 0.7255133390426636, + "learning_rate": 1.4174443434728251e-05, + "loss": 2.1352, + "step": 11227 + }, + { + "epoch": 0.37, + "grad_norm": 0.7064658403396606, + "learning_rate": 1.4173477557688708e-05, + "loss": 2.0906, + "step": 11228 + }, + { + "epoch": 0.37, + "grad_norm": 0.7195213437080383, + "learning_rate": 1.4172511633499663e-05, + "loss": 2.0963, + "step": 11229 + }, + { + "epoch": 0.37, + "grad_norm": 0.7163783311843872, + "learning_rate": 1.4171545662172026e-05, + "loss": 2.1103, + "step": 11230 + }, + { + "epoch": 0.37, + "grad_norm": 0.7199503183364868, + "learning_rate": 1.417057964371671e-05, + "loss": 2.1444, + "step": 11231 + }, + { + "epoch": 0.37, + "grad_norm": 0.7062727808952332, + "learning_rate": 1.4169613578144635e-05, + "loss": 2.0899, + "step": 11232 + }, + { + "epoch": 0.37, + "grad_norm": 0.7281644344329834, + "learning_rate": 1.4168647465466702e-05, + "loss": 2.1675, + "step": 11233 + }, + { + "epoch": 0.37, + "grad_norm": 0.740689754486084, + "learning_rate": 1.416768130569384e-05, + "loss": 2.014, + "step": 11234 + }, + { + "epoch": 0.37, + "grad_norm": 0.7241379022598267, + "learning_rate": 1.4166715098836951e-05, + "loss": 2.0267, + "step": 11235 + }, + { + "epoch": 0.37, + "grad_norm": 0.6982423663139343, + "learning_rate": 1.416574884490696e-05, + "loss": 2.1057, + "step": 11236 + }, + { + "epoch": 0.37, + "grad_norm": 0.7420019507408142, + "learning_rate": 1.416478254391478e-05, + "loss": 2.2113, + "step": 11237 + }, + { + "epoch": 0.37, + "grad_norm": 0.6994154453277588, + "learning_rate": 1.4163816195871324e-05, + "loss": 2.1112, + "step": 11238 + }, + { + "epoch": 0.37, + "grad_norm": 0.757770299911499, + "learning_rate": 1.4162849800787517e-05, + "loss": 2.0877, + "step": 11239 + }, + { + "epoch": 0.37, + "grad_norm": 0.7443302869796753, + "learning_rate": 1.4161883358674273e-05, + "loss": 2.086, + "step": 11240 + }, + { + "epoch": 0.37, + "grad_norm": 0.7188851833343506, + "learning_rate": 1.4160916869542505e-05, + "loss": 2.1156, + "step": 11241 + }, + { + "epoch": 0.37, + "grad_norm": 0.72759610414505, + "learning_rate": 1.4159950333403139e-05, + "loss": 2.1929, + "step": 11242 + }, + { + "epoch": 0.37, + "grad_norm": 0.7391787171363831, + "learning_rate": 1.4158983750267097e-05, + "loss": 2.1368, + "step": 11243 + }, + { + "epoch": 0.37, + "grad_norm": 0.722474217414856, + "learning_rate": 1.4158017120145287e-05, + "loss": 2.0946, + "step": 11244 + }, + { + "epoch": 0.37, + "grad_norm": 0.7508598566055298, + "learning_rate": 1.415705044304864e-05, + "loss": 2.0879, + "step": 11245 + }, + { + "epoch": 0.37, + "grad_norm": 0.7179402709007263, + "learning_rate": 1.4156083718988071e-05, + "loss": 2.1025, + "step": 11246 + }, + { + "epoch": 0.37, + "grad_norm": 0.7372363805770874, + "learning_rate": 1.4155116947974507e-05, + "loss": 2.0202, + "step": 11247 + }, + { + "epoch": 0.37, + "grad_norm": 0.7209445834159851, + "learning_rate": 1.4154150130018867e-05, + "loss": 2.1265, + "step": 11248 + }, + { + "epoch": 0.37, + "grad_norm": 0.7012056708335876, + "learning_rate": 1.415318326513207e-05, + "loss": 2.0507, + "step": 11249 + }, + { + "epoch": 0.37, + "grad_norm": 0.7258504629135132, + "learning_rate": 1.4152216353325044e-05, + "loss": 2.0767, + "step": 11250 + }, + { + "epoch": 0.37, + "grad_norm": 0.7148690819740295, + "learning_rate": 1.4151249394608715e-05, + "loss": 2.0766, + "step": 11251 + }, + { + "epoch": 0.37, + "grad_norm": 0.7151157855987549, + "learning_rate": 1.4150282388993997e-05, + "loss": 2.12, + "step": 11252 + }, + { + "epoch": 0.37, + "grad_norm": 0.7250333428382874, + "learning_rate": 1.4149315336491822e-05, + "loss": 2.1482, + "step": 11253 + }, + { + "epoch": 0.37, + "grad_norm": 0.7033488154411316, + "learning_rate": 1.4148348237113117e-05, + "loss": 2.158, + "step": 11254 + }, + { + "epoch": 0.37, + "grad_norm": 0.7513361573219299, + "learning_rate": 1.4147381090868801e-05, + "loss": 2.1476, + "step": 11255 + }, + { + "epoch": 0.37, + "grad_norm": 0.7150612473487854, + "learning_rate": 1.4146413897769809e-05, + "loss": 2.0776, + "step": 11256 + }, + { + "epoch": 0.37, + "grad_norm": 0.7760404348373413, + "learning_rate": 1.414544665782706e-05, + "loss": 2.1538, + "step": 11257 + }, + { + "epoch": 0.37, + "grad_norm": 0.7502290606498718, + "learning_rate": 1.4144479371051482e-05, + "loss": 2.1843, + "step": 11258 + }, + { + "epoch": 0.37, + "grad_norm": 0.7119926810264587, + "learning_rate": 1.414351203745401e-05, + "loss": 2.1012, + "step": 11259 + }, + { + "epoch": 0.37, + "grad_norm": 0.7729415893554688, + "learning_rate": 1.4142544657045562e-05, + "loss": 2.0562, + "step": 11260 + }, + { + "epoch": 0.37, + "grad_norm": 0.7658978700637817, + "learning_rate": 1.4141577229837075e-05, + "loss": 2.098, + "step": 11261 + }, + { + "epoch": 0.37, + "grad_norm": 0.7574605941772461, + "learning_rate": 1.4140609755839479e-05, + "loss": 2.1429, + "step": 11262 + }, + { + "epoch": 0.37, + "grad_norm": 0.7060720324516296, + "learning_rate": 1.4139642235063695e-05, + "loss": 2.0942, + "step": 11263 + }, + { + "epoch": 0.37, + "grad_norm": 0.7192293405532837, + "learning_rate": 1.413867466752066e-05, + "loss": 2.0803, + "step": 11264 + }, + { + "epoch": 0.37, + "grad_norm": 0.7126860618591309, + "learning_rate": 1.4137707053221307e-05, + "loss": 2.0107, + "step": 11265 + }, + { + "epoch": 0.37, + "grad_norm": 0.7545983195304871, + "learning_rate": 1.4136739392176561e-05, + "loss": 2.1046, + "step": 11266 + }, + { + "epoch": 0.37, + "grad_norm": 0.7372099161148071, + "learning_rate": 1.4135771684397361e-05, + "loss": 2.182, + "step": 11267 + }, + { + "epoch": 0.37, + "grad_norm": 0.7328610420227051, + "learning_rate": 1.4134803929894635e-05, + "loss": 2.161, + "step": 11268 + }, + { + "epoch": 0.37, + "grad_norm": 0.697360634803772, + "learning_rate": 1.4133836128679316e-05, + "loss": 2.0497, + "step": 11269 + }, + { + "epoch": 0.37, + "grad_norm": 0.7492145299911499, + "learning_rate": 1.4132868280762342e-05, + "loss": 2.1161, + "step": 11270 + }, + { + "epoch": 0.37, + "grad_norm": 0.7282615900039673, + "learning_rate": 1.4131900386154642e-05, + "loss": 2.0528, + "step": 11271 + }, + { + "epoch": 0.38, + "grad_norm": 0.7174212336540222, + "learning_rate": 1.4130932444867153e-05, + "loss": 2.1017, + "step": 11272 + }, + { + "epoch": 0.38, + "grad_norm": 0.7297884821891785, + "learning_rate": 1.4129964456910812e-05, + "loss": 2.1593, + "step": 11273 + }, + { + "epoch": 0.38, + "grad_norm": 0.7081218957901001, + "learning_rate": 1.4128996422296552e-05, + "loss": 2.1565, + "step": 11274 + }, + { + "epoch": 0.38, + "grad_norm": 0.6938362121582031, + "learning_rate": 1.412802834103531e-05, + "loss": 2.0969, + "step": 11275 + }, + { + "epoch": 0.38, + "grad_norm": 0.7617660164833069, + "learning_rate": 1.4127060213138024e-05, + "loss": 2.0674, + "step": 11276 + }, + { + "epoch": 0.38, + "grad_norm": 0.721051812171936, + "learning_rate": 1.4126092038615629e-05, + "loss": 2.0496, + "step": 11277 + }, + { + "epoch": 0.38, + "grad_norm": 0.7314443588256836, + "learning_rate": 1.4125123817479066e-05, + "loss": 2.0641, + "step": 11278 + }, + { + "epoch": 0.38, + "grad_norm": 0.7625236511230469, + "learning_rate": 1.4124155549739272e-05, + "loss": 2.1138, + "step": 11279 + }, + { + "epoch": 0.38, + "grad_norm": 0.7367079257965088, + "learning_rate": 1.4123187235407182e-05, + "loss": 2.0346, + "step": 11280 + }, + { + "epoch": 0.38, + "grad_norm": 0.7756535410881042, + "learning_rate": 1.4122218874493741e-05, + "loss": 2.1311, + "step": 11281 + }, + { + "epoch": 0.38, + "grad_norm": 0.7690381407737732, + "learning_rate": 1.4121250467009886e-05, + "loss": 2.1323, + "step": 11282 + }, + { + "epoch": 0.38, + "grad_norm": 0.7657479643821716, + "learning_rate": 1.4120282012966562e-05, + "loss": 2.1407, + "step": 11283 + }, + { + "epoch": 0.38, + "grad_norm": 0.6905273199081421, + "learning_rate": 1.4119313512374703e-05, + "loss": 2.0533, + "step": 11284 + }, + { + "epoch": 0.38, + "grad_norm": 0.6848394870758057, + "learning_rate": 1.4118344965245254e-05, + "loss": 2.097, + "step": 11285 + }, + { + "epoch": 0.38, + "grad_norm": 0.7616974115371704, + "learning_rate": 1.411737637158916e-05, + "loss": 2.1164, + "step": 11286 + }, + { + "epoch": 0.38, + "grad_norm": 0.7437761425971985, + "learning_rate": 1.4116407731417361e-05, + "loss": 2.0913, + "step": 11287 + }, + { + "epoch": 0.38, + "grad_norm": 0.737551748752594, + "learning_rate": 1.4115439044740796e-05, + "loss": 2.1029, + "step": 11288 + }, + { + "epoch": 0.38, + "grad_norm": 0.8175996541976929, + "learning_rate": 1.4114470311570417e-05, + "loss": 2.1222, + "step": 11289 + }, + { + "epoch": 0.38, + "grad_norm": 0.7244274020195007, + "learning_rate": 1.4113501531917159e-05, + "loss": 2.0865, + "step": 11290 + }, + { + "epoch": 0.38, + "grad_norm": 0.7473612427711487, + "learning_rate": 1.4112532705791975e-05, + "loss": 2.0965, + "step": 11291 + }, + { + "epoch": 0.38, + "grad_norm": 0.7113406658172607, + "learning_rate": 1.4111563833205806e-05, + "loss": 2.1322, + "step": 11292 + }, + { + "epoch": 0.38, + "grad_norm": 0.7082083821296692, + "learning_rate": 1.4110594914169597e-05, + "loss": 2.1156, + "step": 11293 + }, + { + "epoch": 0.38, + "grad_norm": 0.7392251491546631, + "learning_rate": 1.4109625948694297e-05, + "loss": 2.1376, + "step": 11294 + }, + { + "epoch": 0.38, + "grad_norm": 0.7428208589553833, + "learning_rate": 1.4108656936790852e-05, + "loss": 2.0954, + "step": 11295 + }, + { + "epoch": 0.38, + "grad_norm": 0.7477293014526367, + "learning_rate": 1.4107687878470206e-05, + "loss": 2.1105, + "step": 11296 + }, + { + "epoch": 0.38, + "grad_norm": 0.7450971007347107, + "learning_rate": 1.4106718773743312e-05, + "loss": 2.192, + "step": 11297 + }, + { + "epoch": 0.38, + "grad_norm": 0.7245600819587708, + "learning_rate": 1.410574962262112e-05, + "loss": 2.1028, + "step": 11298 + }, + { + "epoch": 0.38, + "grad_norm": 0.7093858122825623, + "learning_rate": 1.4104780425114565e-05, + "loss": 2.1175, + "step": 11299 + }, + { + "epoch": 0.38, + "grad_norm": 0.6904973387718201, + "learning_rate": 1.4103811181234614e-05, + "loss": 2.0277, + "step": 11300 + }, + { + "epoch": 0.38, + "grad_norm": 0.7340632677078247, + "learning_rate": 1.410284189099221e-05, + "loss": 2.1226, + "step": 11301 + }, + { + "epoch": 0.38, + "grad_norm": 0.7565782070159912, + "learning_rate": 1.41018725543983e-05, + "loss": 2.1351, + "step": 11302 + }, + { + "epoch": 0.38, + "grad_norm": 0.7128376960754395, + "learning_rate": 1.4100903171463838e-05, + "loss": 2.2219, + "step": 11303 + }, + { + "epoch": 0.38, + "grad_norm": 0.7159379720687866, + "learning_rate": 1.4099933742199775e-05, + "loss": 2.1717, + "step": 11304 + }, + { + "epoch": 0.38, + "grad_norm": 0.7225620746612549, + "learning_rate": 1.4098964266617064e-05, + "loss": 2.0403, + "step": 11305 + }, + { + "epoch": 0.38, + "grad_norm": 0.7462195158004761, + "learning_rate": 1.4097994744726658e-05, + "loss": 2.0894, + "step": 11306 + }, + { + "epoch": 0.38, + "grad_norm": 0.7404658198356628, + "learning_rate": 1.4097025176539506e-05, + "loss": 2.0947, + "step": 11307 + }, + { + "epoch": 0.38, + "grad_norm": 0.7252629995346069, + "learning_rate": 1.4096055562066567e-05, + "loss": 2.0514, + "step": 11308 + }, + { + "epoch": 0.38, + "grad_norm": 0.7713181376457214, + "learning_rate": 1.4095085901318793e-05, + "loss": 2.1531, + "step": 11309 + }, + { + "epoch": 0.38, + "grad_norm": 0.7235509157180786, + "learning_rate": 1.4094116194307135e-05, + "loss": 2.1111, + "step": 11310 + }, + { + "epoch": 0.38, + "grad_norm": 0.7227891087532043, + "learning_rate": 1.4093146441042556e-05, + "loss": 2.1519, + "step": 11311 + }, + { + "epoch": 0.38, + "grad_norm": 0.7395390868186951, + "learning_rate": 1.4092176641536006e-05, + "loss": 2.1223, + "step": 11312 + }, + { + "epoch": 0.38, + "grad_norm": 0.7327308058738708, + "learning_rate": 1.4091206795798443e-05, + "loss": 2.141, + "step": 11313 + }, + { + "epoch": 0.38, + "grad_norm": 0.7350775003433228, + "learning_rate": 1.409023690384082e-05, + "loss": 2.1248, + "step": 11314 + }, + { + "epoch": 0.38, + "grad_norm": 0.7206475734710693, + "learning_rate": 1.4089266965674102e-05, + "loss": 2.0807, + "step": 11315 + }, + { + "epoch": 0.38, + "grad_norm": 0.709550678730011, + "learning_rate": 1.408829698130924e-05, + "loss": 2.0873, + "step": 11316 + }, + { + "epoch": 0.38, + "grad_norm": 0.6950903534889221, + "learning_rate": 1.4087326950757194e-05, + "loss": 2.1174, + "step": 11317 + }, + { + "epoch": 0.38, + "grad_norm": 0.7297247052192688, + "learning_rate": 1.4086356874028927e-05, + "loss": 2.2129, + "step": 11318 + }, + { + "epoch": 0.38, + "grad_norm": 0.7027513980865479, + "learning_rate": 1.408538675113539e-05, + "loss": 2.1113, + "step": 11319 + }, + { + "epoch": 0.38, + "grad_norm": 0.7393156290054321, + "learning_rate": 1.4084416582087554e-05, + "loss": 2.1598, + "step": 11320 + }, + { + "epoch": 0.38, + "grad_norm": 0.7525022625923157, + "learning_rate": 1.4083446366896367e-05, + "loss": 2.1294, + "step": 11321 + }, + { + "epoch": 0.38, + "grad_norm": 0.7394075989723206, + "learning_rate": 1.40824761055728e-05, + "loss": 2.1739, + "step": 11322 + }, + { + "epoch": 0.38, + "grad_norm": 0.7208836674690247, + "learning_rate": 1.4081505798127809e-05, + "loss": 2.1031, + "step": 11323 + }, + { + "epoch": 0.38, + "grad_norm": 0.7147702574729919, + "learning_rate": 1.4080535444572357e-05, + "loss": 2.2089, + "step": 11324 + }, + { + "epoch": 0.38, + "grad_norm": 0.7271811366081238, + "learning_rate": 1.4079565044917408e-05, + "loss": 2.0441, + "step": 11325 + }, + { + "epoch": 0.38, + "grad_norm": 0.7279956340789795, + "learning_rate": 1.407859459917392e-05, + "loss": 2.1404, + "step": 11326 + }, + { + "epoch": 0.38, + "grad_norm": 0.7437514662742615, + "learning_rate": 1.4077624107352867e-05, + "loss": 2.0868, + "step": 11327 + }, + { + "epoch": 0.38, + "grad_norm": 0.7344135642051697, + "learning_rate": 1.4076653569465204e-05, + "loss": 2.1298, + "step": 11328 + }, + { + "epoch": 0.38, + "grad_norm": 0.7050654292106628, + "learning_rate": 1.4075682985521896e-05, + "loss": 2.0802, + "step": 11329 + }, + { + "epoch": 0.38, + "grad_norm": 0.727127730846405, + "learning_rate": 1.4074712355533913e-05, + "loss": 2.0194, + "step": 11330 + }, + { + "epoch": 0.38, + "grad_norm": 0.7598231434822083, + "learning_rate": 1.4073741679512218e-05, + "loss": 2.106, + "step": 11331 + }, + { + "epoch": 0.38, + "grad_norm": 0.755497395992279, + "learning_rate": 1.4072770957467775e-05, + "loss": 2.0838, + "step": 11332 + }, + { + "epoch": 0.38, + "grad_norm": 0.7339121699333191, + "learning_rate": 1.4071800189411552e-05, + "loss": 2.1129, + "step": 11333 + }, + { + "epoch": 0.38, + "grad_norm": 0.7392110228538513, + "learning_rate": 1.4070829375354518e-05, + "loss": 2.1172, + "step": 11334 + }, + { + "epoch": 0.38, + "grad_norm": 0.739726185798645, + "learning_rate": 1.4069858515307643e-05, + "loss": 2.1291, + "step": 11335 + }, + { + "epoch": 0.38, + "grad_norm": 0.7599457502365112, + "learning_rate": 1.4068887609281888e-05, + "loss": 2.0738, + "step": 11336 + }, + { + "epoch": 0.38, + "grad_norm": 0.703784167766571, + "learning_rate": 1.4067916657288221e-05, + "loss": 2.1631, + "step": 11337 + }, + { + "epoch": 0.38, + "grad_norm": 0.6943023204803467, + "learning_rate": 1.4066945659337623e-05, + "loss": 2.0617, + "step": 11338 + }, + { + "epoch": 0.38, + "grad_norm": 0.7323131561279297, + "learning_rate": 1.4065974615441054e-05, + "loss": 2.0414, + "step": 11339 + }, + { + "epoch": 0.38, + "grad_norm": 0.7876615524291992, + "learning_rate": 1.4065003525609483e-05, + "loss": 2.1557, + "step": 11340 + }, + { + "epoch": 0.38, + "grad_norm": 0.739802360534668, + "learning_rate": 1.406403238985389e-05, + "loss": 2.1271, + "step": 11341 + }, + { + "epoch": 0.38, + "grad_norm": 0.7265348434448242, + "learning_rate": 1.4063061208185238e-05, + "loss": 2.1282, + "step": 11342 + }, + { + "epoch": 0.38, + "grad_norm": 0.7415464520454407, + "learning_rate": 1.4062089980614504e-05, + "loss": 2.0651, + "step": 11343 + }, + { + "epoch": 0.38, + "grad_norm": 0.7040876150131226, + "learning_rate": 1.4061118707152657e-05, + "loss": 2.0781, + "step": 11344 + }, + { + "epoch": 0.38, + "grad_norm": 0.732926607131958, + "learning_rate": 1.4060147387810667e-05, + "loss": 1.9949, + "step": 11345 + }, + { + "epoch": 0.38, + "grad_norm": 0.7381318807601929, + "learning_rate": 1.4059176022599515e-05, + "loss": 2.1105, + "step": 11346 + }, + { + "epoch": 0.38, + "grad_norm": 0.7351718544960022, + "learning_rate": 1.4058204611530173e-05, + "loss": 2.0902, + "step": 11347 + }, + { + "epoch": 0.38, + "grad_norm": 0.7441126108169556, + "learning_rate": 1.405723315461361e-05, + "loss": 2.1913, + "step": 11348 + }, + { + "epoch": 0.38, + "grad_norm": 0.7330016493797302, + "learning_rate": 1.4056261651860808e-05, + "loss": 2.0643, + "step": 11349 + }, + { + "epoch": 0.38, + "grad_norm": 0.7363520264625549, + "learning_rate": 1.4055290103282735e-05, + "loss": 2.062, + "step": 11350 + }, + { + "epoch": 0.38, + "grad_norm": 0.7020858526229858, + "learning_rate": 1.4054318508890372e-05, + "loss": 2.1009, + "step": 11351 + }, + { + "epoch": 0.38, + "grad_norm": 0.7362831234931946, + "learning_rate": 1.4053346868694697e-05, + "loss": 2.0614, + "step": 11352 + }, + { + "epoch": 0.38, + "grad_norm": 0.6958045959472656, + "learning_rate": 1.4052375182706683e-05, + "loss": 2.065, + "step": 11353 + }, + { + "epoch": 0.38, + "grad_norm": 0.760219931602478, + "learning_rate": 1.405140345093731e-05, + "loss": 2.0969, + "step": 11354 + }, + { + "epoch": 0.38, + "grad_norm": 0.7303688526153564, + "learning_rate": 1.4050431673397556e-05, + "loss": 2.1052, + "step": 11355 + }, + { + "epoch": 0.38, + "grad_norm": 0.7132928371429443, + "learning_rate": 1.4049459850098394e-05, + "loss": 2.078, + "step": 11356 + }, + { + "epoch": 0.38, + "grad_norm": 0.7101141214370728, + "learning_rate": 1.404848798105081e-05, + "loss": 2.0918, + "step": 11357 + }, + { + "epoch": 0.38, + "grad_norm": 0.7626948952674866, + "learning_rate": 1.4047516066265786e-05, + "loss": 2.1307, + "step": 11358 + }, + { + "epoch": 0.38, + "grad_norm": 0.7110300064086914, + "learning_rate": 1.4046544105754292e-05, + "loss": 2.0222, + "step": 11359 + }, + { + "epoch": 0.38, + "grad_norm": 0.7119615077972412, + "learning_rate": 1.4045572099527318e-05, + "loss": 2.1136, + "step": 11360 + }, + { + "epoch": 0.38, + "grad_norm": 0.7320970296859741, + "learning_rate": 1.4044600047595838e-05, + "loss": 2.1013, + "step": 11361 + }, + { + "epoch": 0.38, + "grad_norm": 0.7220376133918762, + "learning_rate": 1.404362794997084e-05, + "loss": 2.1056, + "step": 11362 + }, + { + "epoch": 0.38, + "grad_norm": 0.7294628024101257, + "learning_rate": 1.4042655806663302e-05, + "loss": 2.1514, + "step": 11363 + }, + { + "epoch": 0.38, + "grad_norm": 0.7361676096916199, + "learning_rate": 1.4041683617684206e-05, + "loss": 2.1134, + "step": 11364 + }, + { + "epoch": 0.38, + "grad_norm": 0.7277711033821106, + "learning_rate": 1.4040711383044539e-05, + "loss": 2.0383, + "step": 11365 + }, + { + "epoch": 0.38, + "grad_norm": 0.7244346141815186, + "learning_rate": 1.4039739102755285e-05, + "loss": 2.1375, + "step": 11366 + }, + { + "epoch": 0.38, + "grad_norm": 0.7274999618530273, + "learning_rate": 1.4038766776827422e-05, + "loss": 2.1807, + "step": 11367 + }, + { + "epoch": 0.38, + "grad_norm": 0.756253719329834, + "learning_rate": 1.4037794405271941e-05, + "loss": 2.131, + "step": 11368 + }, + { + "epoch": 0.38, + "grad_norm": 0.7286421060562134, + "learning_rate": 1.4036821988099827e-05, + "loss": 2.0942, + "step": 11369 + }, + { + "epoch": 0.38, + "grad_norm": 0.7169977426528931, + "learning_rate": 1.4035849525322058e-05, + "loss": 2.0811, + "step": 11370 + }, + { + "epoch": 0.38, + "grad_norm": 0.7374930381774902, + "learning_rate": 1.4034877016949633e-05, + "loss": 2.1252, + "step": 11371 + }, + { + "epoch": 0.38, + "grad_norm": 0.7497426867485046, + "learning_rate": 1.403390446299353e-05, + "loss": 2.1562, + "step": 11372 + }, + { + "epoch": 0.38, + "grad_norm": 0.7529228329658508, + "learning_rate": 1.4032931863464736e-05, + "loss": 2.1309, + "step": 11373 + }, + { + "epoch": 0.38, + "grad_norm": 0.7152889370918274, + "learning_rate": 1.4031959218374245e-05, + "loss": 2.0716, + "step": 11374 + }, + { + "epoch": 0.38, + "grad_norm": 0.7394862174987793, + "learning_rate": 1.403098652773304e-05, + "loss": 2.1411, + "step": 11375 + }, + { + "epoch": 0.38, + "grad_norm": 0.7230199575424194, + "learning_rate": 1.4030013791552111e-05, + "loss": 2.0813, + "step": 11376 + }, + { + "epoch": 0.38, + "grad_norm": 0.7785342335700989, + "learning_rate": 1.4029041009842448e-05, + "loss": 2.1173, + "step": 11377 + }, + { + "epoch": 0.38, + "grad_norm": 0.7200108170509338, + "learning_rate": 1.402806818261504e-05, + "loss": 2.091, + "step": 11378 + }, + { + "epoch": 0.38, + "grad_norm": 0.7376548051834106, + "learning_rate": 1.4027095309880881e-05, + "loss": 2.1798, + "step": 11379 + }, + { + "epoch": 0.38, + "grad_norm": 0.7292722463607788, + "learning_rate": 1.402612239165096e-05, + "loss": 2.0076, + "step": 11380 + }, + { + "epoch": 0.38, + "grad_norm": 0.7084988951683044, + "learning_rate": 1.4025149427936264e-05, + "loss": 2.072, + "step": 11381 + }, + { + "epoch": 0.38, + "grad_norm": 0.745607316493988, + "learning_rate": 1.4024176418747793e-05, + "loss": 2.0998, + "step": 11382 + }, + { + "epoch": 0.38, + "grad_norm": 0.7558834552764893, + "learning_rate": 1.4023203364096531e-05, + "loss": 2.1665, + "step": 11383 + }, + { + "epoch": 0.38, + "grad_norm": 0.6955224275588989, + "learning_rate": 1.4022230263993478e-05, + "loss": 2.0801, + "step": 11384 + }, + { + "epoch": 0.38, + "grad_norm": 0.7393018007278442, + "learning_rate": 1.4021257118449622e-05, + "loss": 2.1223, + "step": 11385 + }, + { + "epoch": 0.38, + "grad_norm": 0.7001378536224365, + "learning_rate": 1.4020283927475963e-05, + "loss": 2.1294, + "step": 11386 + }, + { + "epoch": 0.38, + "grad_norm": 0.728144109249115, + "learning_rate": 1.401931069108349e-05, + "loss": 2.0869, + "step": 11387 + }, + { + "epoch": 0.38, + "grad_norm": 0.7182221412658691, + "learning_rate": 1.40183374092832e-05, + "loss": 2.0966, + "step": 11388 + }, + { + "epoch": 0.38, + "grad_norm": 0.694695770740509, + "learning_rate": 1.401736408208609e-05, + "loss": 2.0954, + "step": 11389 + }, + { + "epoch": 0.38, + "grad_norm": 0.7392402291297913, + "learning_rate": 1.4016390709503156e-05, + "loss": 2.0801, + "step": 11390 + }, + { + "epoch": 0.38, + "grad_norm": 0.7085093855857849, + "learning_rate": 1.4015417291545394e-05, + "loss": 2.1285, + "step": 11391 + }, + { + "epoch": 0.38, + "grad_norm": 0.7328885793685913, + "learning_rate": 1.4014443828223798e-05, + "loss": 2.1352, + "step": 11392 + }, + { + "epoch": 0.38, + "grad_norm": 0.7181127071380615, + "learning_rate": 1.4013470319549371e-05, + "loss": 2.1056, + "step": 11393 + }, + { + "epoch": 0.38, + "grad_norm": 0.701712965965271, + "learning_rate": 1.4012496765533108e-05, + "loss": 2.0286, + "step": 11394 + }, + { + "epoch": 0.38, + "grad_norm": 0.7256366014480591, + "learning_rate": 1.4011523166186007e-05, + "loss": 2.1188, + "step": 11395 + }, + { + "epoch": 0.38, + "grad_norm": 0.7266802787780762, + "learning_rate": 1.401054952151907e-05, + "loss": 2.1754, + "step": 11396 + }, + { + "epoch": 0.38, + "grad_norm": 0.7212596535682678, + "learning_rate": 1.4009575831543292e-05, + "loss": 2.1239, + "step": 11397 + }, + { + "epoch": 0.38, + "grad_norm": 0.7530649304389954, + "learning_rate": 1.4008602096269677e-05, + "loss": 2.1331, + "step": 11398 + }, + { + "epoch": 0.38, + "grad_norm": 0.7259551882743835, + "learning_rate": 1.4007628315709225e-05, + "loss": 2.0715, + "step": 11399 + }, + { + "epoch": 0.38, + "grad_norm": 0.7623724937438965, + "learning_rate": 1.4006654489872937e-05, + "loss": 2.1195, + "step": 11400 + }, + { + "epoch": 0.38, + "grad_norm": 0.7286744713783264, + "learning_rate": 1.4005680618771816e-05, + "loss": 2.089, + "step": 11401 + }, + { + "epoch": 0.38, + "grad_norm": 0.7333736419677734, + "learning_rate": 1.4004706702416864e-05, + "loss": 2.1449, + "step": 11402 + }, + { + "epoch": 0.38, + "grad_norm": 0.7168421149253845, + "learning_rate": 1.4003732740819078e-05, + "loss": 2.0961, + "step": 11403 + }, + { + "epoch": 0.38, + "grad_norm": 0.74763423204422, + "learning_rate": 1.4002758733989473e-05, + "loss": 2.0197, + "step": 11404 + }, + { + "epoch": 0.38, + "grad_norm": 0.7334607243537903, + "learning_rate": 1.400178468193904e-05, + "loss": 2.1822, + "step": 11405 + }, + { + "epoch": 0.38, + "grad_norm": 0.7560888528823853, + "learning_rate": 1.4000810584678789e-05, + "loss": 2.188, + "step": 11406 + }, + { + "epoch": 0.38, + "grad_norm": 0.7041736841201782, + "learning_rate": 1.3999836442219725e-05, + "loss": 2.1567, + "step": 11407 + }, + { + "epoch": 0.38, + "grad_norm": 0.7709736227989197, + "learning_rate": 1.3998862254572854e-05, + "loss": 2.1027, + "step": 11408 + }, + { + "epoch": 0.38, + "grad_norm": 0.7523634433746338, + "learning_rate": 1.3997888021749183e-05, + "loss": 2.0818, + "step": 11409 + }, + { + "epoch": 0.38, + "grad_norm": 0.7626747488975525, + "learning_rate": 1.3996913743759712e-05, + "loss": 2.1527, + "step": 11410 + }, + { + "epoch": 0.38, + "grad_norm": 0.748218834400177, + "learning_rate": 1.3995939420615455e-05, + "loss": 2.1156, + "step": 11411 + }, + { + "epoch": 0.38, + "grad_norm": 0.7393519282341003, + "learning_rate": 1.3994965052327414e-05, + "loss": 2.1081, + "step": 11412 + }, + { + "epoch": 0.38, + "grad_norm": 0.7613183259963989, + "learning_rate": 1.3993990638906603e-05, + "loss": 2.1247, + "step": 11413 + }, + { + "epoch": 0.38, + "grad_norm": 0.7168604731559753, + "learning_rate": 1.3993016180364021e-05, + "loss": 2.1126, + "step": 11414 + }, + { + "epoch": 0.38, + "grad_norm": 0.7124350666999817, + "learning_rate": 1.3992041676710686e-05, + "loss": 2.0649, + "step": 11415 + }, + { + "epoch": 0.38, + "grad_norm": 0.7400722503662109, + "learning_rate": 1.3991067127957603e-05, + "loss": 2.1009, + "step": 11416 + }, + { + "epoch": 0.38, + "grad_norm": 0.7381260395050049, + "learning_rate": 1.3990092534115783e-05, + "loss": 2.0644, + "step": 11417 + }, + { + "epoch": 0.38, + "grad_norm": 0.7363085746765137, + "learning_rate": 1.3989117895196233e-05, + "loss": 2.0769, + "step": 11418 + }, + { + "epoch": 0.38, + "grad_norm": 0.7187110781669617, + "learning_rate": 1.3988143211209968e-05, + "loss": 2.0663, + "step": 11419 + }, + { + "epoch": 0.38, + "grad_norm": 0.7486928105354309, + "learning_rate": 1.3987168482168e-05, + "loss": 2.1658, + "step": 11420 + }, + { + "epoch": 0.38, + "grad_norm": 0.7433101534843445, + "learning_rate": 1.3986193708081339e-05, + "loss": 2.1435, + "step": 11421 + }, + { + "epoch": 0.38, + "grad_norm": 0.7400829195976257, + "learning_rate": 1.3985218888960999e-05, + "loss": 2.0912, + "step": 11422 + }, + { + "epoch": 0.38, + "grad_norm": 0.7167808413505554, + "learning_rate": 1.3984244024817989e-05, + "loss": 2.0938, + "step": 11423 + }, + { + "epoch": 0.38, + "grad_norm": 0.7212663888931274, + "learning_rate": 1.3983269115663327e-05, + "loss": 2.0599, + "step": 11424 + }, + { + "epoch": 0.38, + "grad_norm": 0.7064757943153381, + "learning_rate": 1.3982294161508021e-05, + "loss": 2.1403, + "step": 11425 + }, + { + "epoch": 0.38, + "grad_norm": 0.7244797348976135, + "learning_rate": 1.3981319162363094e-05, + "loss": 2.087, + "step": 11426 + }, + { + "epoch": 0.38, + "grad_norm": 0.726233184337616, + "learning_rate": 1.3980344118239553e-05, + "loss": 2.0482, + "step": 11427 + }, + { + "epoch": 0.38, + "grad_norm": 0.7073317170143127, + "learning_rate": 1.397936902914842e-05, + "loss": 2.1363, + "step": 11428 + }, + { + "epoch": 0.38, + "grad_norm": 0.7302148342132568, + "learning_rate": 1.3978393895100706e-05, + "loss": 2.1555, + "step": 11429 + }, + { + "epoch": 0.38, + "grad_norm": 0.7258449792861938, + "learning_rate": 1.3977418716107427e-05, + "loss": 2.1651, + "step": 11430 + }, + { + "epoch": 0.38, + "grad_norm": 0.7266464233398438, + "learning_rate": 1.3976443492179607e-05, + "loss": 2.0379, + "step": 11431 + }, + { + "epoch": 0.38, + "grad_norm": 0.8782541751861572, + "learning_rate": 1.3975468223328257e-05, + "loss": 2.1166, + "step": 11432 + }, + { + "epoch": 0.38, + "grad_norm": 0.7109593152999878, + "learning_rate": 1.3974492909564397e-05, + "loss": 2.0765, + "step": 11433 + }, + { + "epoch": 0.38, + "grad_norm": 0.7213874459266663, + "learning_rate": 1.3973517550899045e-05, + "loss": 2.1173, + "step": 11434 + }, + { + "epoch": 0.38, + "grad_norm": 0.7201820015907288, + "learning_rate": 1.397254214734322e-05, + "loss": 2.0872, + "step": 11435 + }, + { + "epoch": 0.38, + "grad_norm": 0.719819962978363, + "learning_rate": 1.3971566698907944e-05, + "loss": 2.1451, + "step": 11436 + }, + { + "epoch": 0.38, + "grad_norm": 0.7373226881027222, + "learning_rate": 1.3970591205604234e-05, + "loss": 2.0531, + "step": 11437 + }, + { + "epoch": 0.38, + "grad_norm": 0.7376651763916016, + "learning_rate": 1.3969615667443108e-05, + "loss": 2.1365, + "step": 11438 + }, + { + "epoch": 0.38, + "grad_norm": 0.7356284856796265, + "learning_rate": 1.3968640084435597e-05, + "loss": 2.1581, + "step": 11439 + }, + { + "epoch": 0.38, + "grad_norm": 0.744534432888031, + "learning_rate": 1.396766445659271e-05, + "loss": 2.2055, + "step": 11440 + }, + { + "epoch": 0.38, + "grad_norm": 0.7165676951408386, + "learning_rate": 1.3966688783925478e-05, + "loss": 2.1019, + "step": 11441 + }, + { + "epoch": 0.38, + "grad_norm": 0.7214135527610779, + "learning_rate": 1.3965713066444923e-05, + "loss": 2.1822, + "step": 11442 + }, + { + "epoch": 0.38, + "grad_norm": 0.7336829900741577, + "learning_rate": 1.3964737304162064e-05, + "loss": 2.1272, + "step": 11443 + }, + { + "epoch": 0.38, + "grad_norm": 0.7009656429290771, + "learning_rate": 1.3963761497087926e-05, + "loss": 2.0848, + "step": 11444 + }, + { + "epoch": 0.38, + "grad_norm": 0.7161441445350647, + "learning_rate": 1.3962785645233533e-05, + "loss": 2.0877, + "step": 11445 + }, + { + "epoch": 0.38, + "grad_norm": 0.7274609208106995, + "learning_rate": 1.3961809748609911e-05, + "loss": 2.0588, + "step": 11446 + }, + { + "epoch": 0.38, + "grad_norm": 0.7047138810157776, + "learning_rate": 1.3960833807228085e-05, + "loss": 2.1, + "step": 11447 + }, + { + "epoch": 0.38, + "grad_norm": 0.7163825035095215, + "learning_rate": 1.3959857821099081e-05, + "loss": 2.1135, + "step": 11448 + }, + { + "epoch": 0.38, + "grad_norm": 0.7072292566299438, + "learning_rate": 1.395888179023392e-05, + "loss": 2.1682, + "step": 11449 + }, + { + "epoch": 0.38, + "grad_norm": 0.7029929161071777, + "learning_rate": 1.3957905714643633e-05, + "loss": 2.144, + "step": 11450 + }, + { + "epoch": 0.38, + "grad_norm": 0.7170625329017639, + "learning_rate": 1.3956929594339253e-05, + "loss": 2.158, + "step": 11451 + }, + { + "epoch": 0.38, + "grad_norm": 0.7440356612205505, + "learning_rate": 1.3955953429331795e-05, + "loss": 2.1039, + "step": 11452 + }, + { + "epoch": 0.38, + "grad_norm": 0.7330176830291748, + "learning_rate": 1.3954977219632297e-05, + "loss": 2.0907, + "step": 11453 + }, + { + "epoch": 0.38, + "grad_norm": 0.7226961255073547, + "learning_rate": 1.3954000965251781e-05, + "loss": 2.1209, + "step": 11454 + }, + { + "epoch": 0.38, + "grad_norm": 0.7116376161575317, + "learning_rate": 1.3953024666201282e-05, + "loss": 2.1473, + "step": 11455 + }, + { + "epoch": 0.38, + "grad_norm": 0.8063004016876221, + "learning_rate": 1.3952048322491827e-05, + "loss": 2.0179, + "step": 11456 + }, + { + "epoch": 0.38, + "grad_norm": 0.7078288197517395, + "learning_rate": 1.3951071934134446e-05, + "loss": 2.0711, + "step": 11457 + }, + { + "epoch": 0.38, + "grad_norm": 0.7183838486671448, + "learning_rate": 1.3950095501140169e-05, + "loss": 2.142, + "step": 11458 + }, + { + "epoch": 0.38, + "grad_norm": 0.7276841402053833, + "learning_rate": 1.394911902352003e-05, + "loss": 2.1224, + "step": 11459 + }, + { + "epoch": 0.38, + "grad_norm": 0.718867838382721, + "learning_rate": 1.3948142501285056e-05, + "loss": 2.1073, + "step": 11460 + }, + { + "epoch": 0.38, + "grad_norm": 0.732370913028717, + "learning_rate": 1.3947165934446283e-05, + "loss": 2.1215, + "step": 11461 + }, + { + "epoch": 0.38, + "grad_norm": 0.7546433806419373, + "learning_rate": 1.3946189323014743e-05, + "loss": 2.0714, + "step": 11462 + }, + { + "epoch": 0.38, + "grad_norm": 0.7155033349990845, + "learning_rate": 1.3945212667001467e-05, + "loss": 2.044, + "step": 11463 + }, + { + "epoch": 0.38, + "grad_norm": 0.7212733030319214, + "learning_rate": 1.3944235966417491e-05, + "loss": 2.0661, + "step": 11464 + }, + { + "epoch": 0.38, + "grad_norm": 0.7089389562606812, + "learning_rate": 1.3943259221273851e-05, + "loss": 2.1174, + "step": 11465 + }, + { + "epoch": 0.38, + "grad_norm": 0.6950744390487671, + "learning_rate": 1.3942282431581577e-05, + "loss": 2.082, + "step": 11466 + }, + { + "epoch": 0.38, + "grad_norm": 0.7404822111129761, + "learning_rate": 1.3941305597351709e-05, + "loss": 2.1128, + "step": 11467 + }, + { + "epoch": 0.38, + "grad_norm": 0.768186092376709, + "learning_rate": 1.3940328718595277e-05, + "loss": 2.1688, + "step": 11468 + }, + { + "epoch": 0.38, + "grad_norm": 0.7197774648666382, + "learning_rate": 1.3939351795323322e-05, + "loss": 2.0765, + "step": 11469 + }, + { + "epoch": 0.38, + "grad_norm": 0.7011984586715698, + "learning_rate": 1.3938374827546879e-05, + "loss": 2.0411, + "step": 11470 + }, + { + "epoch": 0.38, + "grad_norm": 0.7435581088066101, + "learning_rate": 1.3937397815276982e-05, + "loss": 2.1283, + "step": 11471 + }, + { + "epoch": 0.38, + "grad_norm": 0.7341912388801575, + "learning_rate": 1.3936420758524678e-05, + "loss": 2.0971, + "step": 11472 + }, + { + "epoch": 0.38, + "grad_norm": 0.7510620355606079, + "learning_rate": 1.3935443657300997e-05, + "loss": 2.1302, + "step": 11473 + }, + { + "epoch": 0.38, + "grad_norm": 0.7260511517524719, + "learning_rate": 1.3934466511616977e-05, + "loss": 2.2008, + "step": 11474 + }, + { + "epoch": 0.38, + "grad_norm": 0.7165123224258423, + "learning_rate": 1.3933489321483664e-05, + "loss": 2.1922, + "step": 11475 + }, + { + "epoch": 0.38, + "grad_norm": 0.7333325147628784, + "learning_rate": 1.3932512086912093e-05, + "loss": 2.0791, + "step": 11476 + }, + { + "epoch": 0.38, + "grad_norm": 0.7265769243240356, + "learning_rate": 1.3931534807913303e-05, + "loss": 2.1234, + "step": 11477 + }, + { + "epoch": 0.38, + "grad_norm": 0.718147873878479, + "learning_rate": 1.3930557484498339e-05, + "loss": 2.152, + "step": 11478 + }, + { + "epoch": 0.38, + "grad_norm": 0.7242167592048645, + "learning_rate": 1.3929580116678238e-05, + "loss": 2.1463, + "step": 11479 + }, + { + "epoch": 0.38, + "grad_norm": 0.7438389658927917, + "learning_rate": 1.3928602704464047e-05, + "loss": 2.09, + "step": 11480 + }, + { + "epoch": 0.38, + "grad_norm": 0.7340410351753235, + "learning_rate": 1.3927625247866803e-05, + "loss": 2.1176, + "step": 11481 + }, + { + "epoch": 0.38, + "grad_norm": 0.727824330329895, + "learning_rate": 1.392664774689755e-05, + "loss": 2.1639, + "step": 11482 + }, + { + "epoch": 0.38, + "grad_norm": 0.7227712869644165, + "learning_rate": 1.3925670201567333e-05, + "loss": 2.1331, + "step": 11483 + }, + { + "epoch": 0.38, + "grad_norm": 0.7353600263595581, + "learning_rate": 1.3924692611887196e-05, + "loss": 2.1199, + "step": 11484 + }, + { + "epoch": 0.38, + "grad_norm": 0.7445943355560303, + "learning_rate": 1.3923714977868179e-05, + "loss": 2.0896, + "step": 11485 + }, + { + "epoch": 0.38, + "grad_norm": 0.7453267574310303, + "learning_rate": 1.3922737299521334e-05, + "loss": 2.0772, + "step": 11486 + }, + { + "epoch": 0.38, + "grad_norm": 0.7079179883003235, + "learning_rate": 1.3921759576857698e-05, + "loss": 2.085, + "step": 11487 + }, + { + "epoch": 0.38, + "grad_norm": 0.726108193397522, + "learning_rate": 1.3920781809888322e-05, + "loss": 2.1079, + "step": 11488 + }, + { + "epoch": 0.38, + "grad_norm": 0.7410142421722412, + "learning_rate": 1.391980399862425e-05, + "loss": 2.0856, + "step": 11489 + }, + { + "epoch": 0.38, + "grad_norm": 0.7477012872695923, + "learning_rate": 1.3918826143076532e-05, + "loss": 2.1499, + "step": 11490 + }, + { + "epoch": 0.38, + "grad_norm": 0.7122055292129517, + "learning_rate": 1.3917848243256211e-05, + "loss": 2.0947, + "step": 11491 + }, + { + "epoch": 0.38, + "grad_norm": 0.7292513847351074, + "learning_rate": 1.3916870299174336e-05, + "loss": 2.1374, + "step": 11492 + }, + { + "epoch": 0.38, + "grad_norm": 0.7271727323532104, + "learning_rate": 1.3915892310841959e-05, + "loss": 2.1032, + "step": 11493 + }, + { + "epoch": 0.38, + "grad_norm": 0.7012867331504822, + "learning_rate": 1.3914914278270121e-05, + "loss": 2.1032, + "step": 11494 + }, + { + "epoch": 0.38, + "grad_norm": 0.7849199175834656, + "learning_rate": 1.3913936201469882e-05, + "loss": 2.0916, + "step": 11495 + }, + { + "epoch": 0.38, + "grad_norm": 0.7315781116485596, + "learning_rate": 1.391295808045228e-05, + "loss": 2.1162, + "step": 11496 + }, + { + "epoch": 0.38, + "grad_norm": 0.7250478863716125, + "learning_rate": 1.3911979915228374e-05, + "loss": 2.1189, + "step": 11497 + }, + { + "epoch": 0.38, + "grad_norm": 0.7302700281143188, + "learning_rate": 1.3911001705809211e-05, + "loss": 2.0142, + "step": 11498 + }, + { + "epoch": 0.38, + "grad_norm": 0.7002418041229248, + "learning_rate": 1.391002345220584e-05, + "loss": 2.1169, + "step": 11499 + }, + { + "epoch": 0.38, + "grad_norm": 0.7220374345779419, + "learning_rate": 1.3909045154429319e-05, + "loss": 2.1254, + "step": 11500 + }, + { + "epoch": 0.38, + "grad_norm": 0.727595865726471, + "learning_rate": 1.3908066812490695e-05, + "loss": 2.0756, + "step": 11501 + }, + { + "epoch": 0.38, + "grad_norm": 0.712340772151947, + "learning_rate": 1.3907088426401024e-05, + "loss": 2.1615, + "step": 11502 + }, + { + "epoch": 0.38, + "grad_norm": 0.7316076755523682, + "learning_rate": 1.3906109996171355e-05, + "loss": 2.1092, + "step": 11503 + }, + { + "epoch": 0.38, + "grad_norm": 0.7226830720901489, + "learning_rate": 1.3905131521812746e-05, + "loss": 2.136, + "step": 11504 + }, + { + "epoch": 0.38, + "grad_norm": 0.7411084175109863, + "learning_rate": 1.3904153003336252e-05, + "loss": 2.0613, + "step": 11505 + }, + { + "epoch": 0.38, + "grad_norm": 0.71602463722229, + "learning_rate": 1.3903174440752925e-05, + "loss": 2.1099, + "step": 11506 + }, + { + "epoch": 0.38, + "grad_norm": 0.7252110242843628, + "learning_rate": 1.3902195834073818e-05, + "loss": 2.1546, + "step": 11507 + }, + { + "epoch": 0.38, + "grad_norm": 0.7378067970275879, + "learning_rate": 1.390121718330999e-05, + "loss": 2.0927, + "step": 11508 + }, + { + "epoch": 0.38, + "grad_norm": 0.7067298293113708, + "learning_rate": 1.3900238488472498e-05, + "loss": 2.0794, + "step": 11509 + }, + { + "epoch": 0.38, + "grad_norm": 0.768795371055603, + "learning_rate": 1.3899259749572397e-05, + "loss": 2.0918, + "step": 11510 + }, + { + "epoch": 0.38, + "grad_norm": 0.7537060976028442, + "learning_rate": 1.389828096662074e-05, + "loss": 2.1635, + "step": 11511 + }, + { + "epoch": 0.38, + "grad_norm": 0.7030593156814575, + "learning_rate": 1.3897302139628594e-05, + "loss": 2.044, + "step": 11512 + }, + { + "epoch": 0.38, + "grad_norm": 0.727529764175415, + "learning_rate": 1.3896323268607013e-05, + "loss": 2.0999, + "step": 11513 + }, + { + "epoch": 0.38, + "grad_norm": 0.7152175903320312, + "learning_rate": 1.3895344353567052e-05, + "loss": 2.1882, + "step": 11514 + }, + { + "epoch": 0.38, + "grad_norm": 0.7171653509140015, + "learning_rate": 1.3894365394519776e-05, + "loss": 2.1376, + "step": 11515 + }, + { + "epoch": 0.38, + "grad_norm": 0.749289870262146, + "learning_rate": 1.3893386391476243e-05, + "loss": 2.0859, + "step": 11516 + }, + { + "epoch": 0.38, + "grad_norm": 0.7324069142341614, + "learning_rate": 1.389240734444751e-05, + "loss": 2.0636, + "step": 11517 + }, + { + "epoch": 0.38, + "grad_norm": 0.7270019054412842, + "learning_rate": 1.3891428253444639e-05, + "loss": 2.1318, + "step": 11518 + }, + { + "epoch": 0.38, + "grad_norm": 0.7595897316932678, + "learning_rate": 1.3890449118478695e-05, + "loss": 2.0793, + "step": 11519 + }, + { + "epoch": 0.38, + "grad_norm": 0.7341136336326599, + "learning_rate": 1.3889469939560736e-05, + "loss": 2.1393, + "step": 11520 + }, + { + "epoch": 0.38, + "grad_norm": 0.7345200777053833, + "learning_rate": 1.3888490716701824e-05, + "loss": 2.1797, + "step": 11521 + }, + { + "epoch": 0.38, + "grad_norm": 0.7249011397361755, + "learning_rate": 1.3887511449913023e-05, + "loss": 2.0732, + "step": 11522 + }, + { + "epoch": 0.38, + "grad_norm": 0.6892530918121338, + "learning_rate": 1.3886532139205395e-05, + "loss": 2.0747, + "step": 11523 + }, + { + "epoch": 0.38, + "grad_norm": 0.7325267195701599, + "learning_rate": 1.3885552784590009e-05, + "loss": 2.0627, + "step": 11524 + }, + { + "epoch": 0.38, + "grad_norm": 0.732509195804596, + "learning_rate": 1.388457338607792e-05, + "loss": 2.154, + "step": 11525 + }, + { + "epoch": 0.38, + "grad_norm": 0.7337095141410828, + "learning_rate": 1.38835939436802e-05, + "loss": 2.1187, + "step": 11526 + }, + { + "epoch": 0.38, + "grad_norm": 0.7032434940338135, + "learning_rate": 1.388261445740791e-05, + "loss": 2.1416, + "step": 11527 + }, + { + "epoch": 0.38, + "grad_norm": 0.7411068081855774, + "learning_rate": 1.3881634927272118e-05, + "loss": 2.0621, + "step": 11528 + }, + { + "epoch": 0.38, + "grad_norm": 0.7256404161453247, + "learning_rate": 1.3880655353283889e-05, + "loss": 2.1571, + "step": 11529 + }, + { + "epoch": 0.38, + "grad_norm": 0.7578774690628052, + "learning_rate": 1.3879675735454295e-05, + "loss": 2.1516, + "step": 11530 + }, + { + "epoch": 0.38, + "grad_norm": 0.7206849455833435, + "learning_rate": 1.3878696073794392e-05, + "loss": 2.0701, + "step": 11531 + }, + { + "epoch": 0.38, + "grad_norm": 0.7665100693702698, + "learning_rate": 1.3877716368315257e-05, + "loss": 2.0497, + "step": 11532 + }, + { + "epoch": 0.38, + "grad_norm": 0.7228631377220154, + "learning_rate": 1.3876736619027953e-05, + "loss": 2.0951, + "step": 11533 + }, + { + "epoch": 0.38, + "grad_norm": 0.740211546421051, + "learning_rate": 1.387575682594355e-05, + "loss": 2.0692, + "step": 11534 + }, + { + "epoch": 0.38, + "grad_norm": 0.7295033931732178, + "learning_rate": 1.3874776989073123e-05, + "loss": 2.0018, + "step": 11535 + }, + { + "epoch": 0.38, + "grad_norm": 0.7313127517700195, + "learning_rate": 1.3873797108427731e-05, + "loss": 2.1086, + "step": 11536 + }, + { + "epoch": 0.38, + "grad_norm": 0.721352219581604, + "learning_rate": 1.387281718401845e-05, + "loss": 2.0851, + "step": 11537 + }, + { + "epoch": 0.38, + "grad_norm": 0.7298734784126282, + "learning_rate": 1.3871837215856351e-05, + "loss": 2.1828, + "step": 11538 + }, + { + "epoch": 0.38, + "grad_norm": 0.7168005108833313, + "learning_rate": 1.3870857203952505e-05, + "loss": 2.068, + "step": 11539 + }, + { + "epoch": 0.38, + "grad_norm": 0.756931722164154, + "learning_rate": 1.3869877148317983e-05, + "loss": 2.1316, + "step": 11540 + }, + { + "epoch": 0.38, + "grad_norm": 0.6979550123214722, + "learning_rate": 1.3868897048963858e-05, + "loss": 2.1556, + "step": 11541 + }, + { + "epoch": 0.38, + "grad_norm": 0.7059472799301147, + "learning_rate": 1.38679169059012e-05, + "loss": 2.1597, + "step": 11542 + }, + { + "epoch": 0.38, + "grad_norm": 0.7303836941719055, + "learning_rate": 1.3866936719141084e-05, + "loss": 2.0681, + "step": 11543 + }, + { + "epoch": 0.38, + "grad_norm": 0.7224733233451843, + "learning_rate": 1.3865956488694584e-05, + "loss": 2.0979, + "step": 11544 + }, + { + "epoch": 0.38, + "grad_norm": 0.7030598521232605, + "learning_rate": 1.386497621457277e-05, + "loss": 2.1062, + "step": 11545 + }, + { + "epoch": 0.38, + "grad_norm": 0.7102269530296326, + "learning_rate": 1.3863995896786723e-05, + "loss": 2.074, + "step": 11546 + }, + { + "epoch": 0.38, + "grad_norm": 0.7306323647499084, + "learning_rate": 1.3863015535347515e-05, + "loss": 2.0859, + "step": 11547 + }, + { + "epoch": 0.38, + "grad_norm": 0.7136075496673584, + "learning_rate": 1.3862035130266218e-05, + "loss": 2.028, + "step": 11548 + }, + { + "epoch": 0.38, + "grad_norm": 0.7416447401046753, + "learning_rate": 1.3861054681553917e-05, + "loss": 2.1362, + "step": 11549 + }, + { + "epoch": 0.38, + "grad_norm": 0.7210537195205688, + "learning_rate": 1.3860074189221677e-05, + "loss": 2.1254, + "step": 11550 + }, + { + "epoch": 0.38, + "grad_norm": 0.7629061937332153, + "learning_rate": 1.3859093653280584e-05, + "loss": 2.1234, + "step": 11551 + }, + { + "epoch": 0.38, + "grad_norm": 0.7179463505744934, + "learning_rate": 1.3858113073741715e-05, + "loss": 2.1187, + "step": 11552 + }, + { + "epoch": 0.38, + "grad_norm": 0.7118474841117859, + "learning_rate": 1.385713245061614e-05, + "loss": 2.0743, + "step": 11553 + }, + { + "epoch": 0.38, + "grad_norm": 0.7272747755050659, + "learning_rate": 1.3856151783914946e-05, + "loss": 2.006, + "step": 11554 + }, + { + "epoch": 0.38, + "grad_norm": 0.710779070854187, + "learning_rate": 1.3855171073649211e-05, + "loss": 2.1044, + "step": 11555 + }, + { + "epoch": 0.38, + "grad_norm": 0.7099700570106506, + "learning_rate": 1.385419031983001e-05, + "loss": 2.0576, + "step": 11556 + }, + { + "epoch": 0.38, + "grad_norm": 0.7408515810966492, + "learning_rate": 1.3853209522468427e-05, + "loss": 2.0831, + "step": 11557 + }, + { + "epoch": 0.38, + "grad_norm": 0.6994704008102417, + "learning_rate": 1.385222868157554e-05, + "loss": 2.0659, + "step": 11558 + }, + { + "epoch": 0.38, + "grad_norm": 0.7266060709953308, + "learning_rate": 1.385124779716243e-05, + "loss": 2.0474, + "step": 11559 + }, + { + "epoch": 0.38, + "grad_norm": 0.7341500520706177, + "learning_rate": 1.3850266869240181e-05, + "loss": 2.127, + "step": 11560 + }, + { + "epoch": 0.38, + "grad_norm": 0.7326970100402832, + "learning_rate": 1.3849285897819873e-05, + "loss": 2.0466, + "step": 11561 + }, + { + "epoch": 0.38, + "grad_norm": 0.7227213978767395, + "learning_rate": 1.3848304882912589e-05, + "loss": 2.067, + "step": 11562 + }, + { + "epoch": 0.38, + "grad_norm": 0.7275463938713074, + "learning_rate": 1.3847323824529415e-05, + "loss": 2.1088, + "step": 11563 + }, + { + "epoch": 0.38, + "grad_norm": 0.7263650298118591, + "learning_rate": 1.3846342722681426e-05, + "loss": 2.08, + "step": 11564 + }, + { + "epoch": 0.38, + "grad_norm": 0.7303856015205383, + "learning_rate": 1.384536157737971e-05, + "loss": 2.0516, + "step": 11565 + }, + { + "epoch": 0.38, + "grad_norm": 0.696685254573822, + "learning_rate": 1.3844380388635358e-05, + "loss": 2.0779, + "step": 11566 + }, + { + "epoch": 0.38, + "grad_norm": 0.7021346688270569, + "learning_rate": 1.3843399156459444e-05, + "loss": 2.068, + "step": 11567 + }, + { + "epoch": 0.38, + "grad_norm": 0.7369007468223572, + "learning_rate": 1.3842417880863063e-05, + "loss": 2.082, + "step": 11568 + }, + { + "epoch": 0.38, + "grad_norm": 0.7505294680595398, + "learning_rate": 1.3841436561857297e-05, + "loss": 2.1907, + "step": 11569 + }, + { + "epoch": 0.38, + "grad_norm": 0.7150418162345886, + "learning_rate": 1.384045519945323e-05, + "loss": 2.0478, + "step": 11570 + }, + { + "epoch": 0.38, + "grad_norm": 0.7252741456031799, + "learning_rate": 1.383947379366195e-05, + "loss": 2.0921, + "step": 11571 + }, + { + "epoch": 0.39, + "grad_norm": 0.7577261924743652, + "learning_rate": 1.3838492344494547e-05, + "loss": 2.1021, + "step": 11572 + }, + { + "epoch": 0.39, + "grad_norm": 0.6852173805236816, + "learning_rate": 1.3837510851962106e-05, + "loss": 2.0974, + "step": 11573 + }, + { + "epoch": 0.39, + "grad_norm": 0.7020335793495178, + "learning_rate": 1.383652931607572e-05, + "loss": 2.0654, + "step": 11574 + }, + { + "epoch": 0.39, + "grad_norm": 0.7773502469062805, + "learning_rate": 1.3835547736846468e-05, + "loss": 2.0196, + "step": 11575 + }, + { + "epoch": 0.39, + "grad_norm": 0.7273013591766357, + "learning_rate": 1.3834566114285448e-05, + "loss": 2.1288, + "step": 11576 + }, + { + "epoch": 0.39, + "grad_norm": 0.7291594743728638, + "learning_rate": 1.3833584448403752e-05, + "loss": 2.1428, + "step": 11577 + }, + { + "epoch": 0.39, + "grad_norm": 0.7520511150360107, + "learning_rate": 1.3832602739212457e-05, + "loss": 2.111, + "step": 11578 + }, + { + "epoch": 0.39, + "grad_norm": 0.704126238822937, + "learning_rate": 1.383162098672267e-05, + "loss": 2.1322, + "step": 11579 + }, + { + "epoch": 0.39, + "grad_norm": 0.7477869391441345, + "learning_rate": 1.3830639190945471e-05, + "loss": 2.1438, + "step": 11580 + }, + { + "epoch": 0.39, + "grad_norm": 0.7056574821472168, + "learning_rate": 1.3829657351891957e-05, + "loss": 2.035, + "step": 11581 + }, + { + "epoch": 0.39, + "grad_norm": 0.7050648331642151, + "learning_rate": 1.3828675469573216e-05, + "loss": 2.0526, + "step": 11582 + }, + { + "epoch": 0.39, + "grad_norm": 0.7426525354385376, + "learning_rate": 1.3827693544000345e-05, + "loss": 2.1328, + "step": 11583 + }, + { + "epoch": 0.39, + "grad_norm": 0.7540075182914734, + "learning_rate": 1.3826711575184436e-05, + "loss": 2.0373, + "step": 11584 + }, + { + "epoch": 0.39, + "grad_norm": 0.7662214040756226, + "learning_rate": 1.3825729563136583e-05, + "loss": 2.0748, + "step": 11585 + }, + { + "epoch": 0.39, + "grad_norm": 0.75132155418396, + "learning_rate": 1.3824747507867877e-05, + "loss": 2.1016, + "step": 11586 + }, + { + "epoch": 0.39, + "grad_norm": 0.7163492441177368, + "learning_rate": 1.3823765409389417e-05, + "loss": 2.0613, + "step": 11587 + }, + { + "epoch": 0.39, + "grad_norm": 0.7279643416404724, + "learning_rate": 1.3822783267712296e-05, + "loss": 2.0861, + "step": 11588 + }, + { + "epoch": 0.39, + "grad_norm": 0.7701489925384521, + "learning_rate": 1.3821801082847608e-05, + "loss": 2.0789, + "step": 11589 + }, + { + "epoch": 0.39, + "grad_norm": 0.7818331122398376, + "learning_rate": 1.3820818854806454e-05, + "loss": 2.0695, + "step": 11590 + }, + { + "epoch": 0.39, + "grad_norm": 0.7220224142074585, + "learning_rate": 1.3819836583599929e-05, + "loss": 2.0912, + "step": 11591 + }, + { + "epoch": 0.39, + "grad_norm": 0.7788681387901306, + "learning_rate": 1.3818854269239126e-05, + "loss": 2.097, + "step": 11592 + }, + { + "epoch": 0.39, + "grad_norm": 0.7736840844154358, + "learning_rate": 1.3817871911735146e-05, + "loss": 2.1502, + "step": 11593 + }, + { + "epoch": 0.39, + "grad_norm": 0.7439195513725281, + "learning_rate": 1.3816889511099087e-05, + "loss": 2.1364, + "step": 11594 + }, + { + "epoch": 0.39, + "grad_norm": 0.7504758238792419, + "learning_rate": 1.3815907067342047e-05, + "loss": 2.0641, + "step": 11595 + }, + { + "epoch": 0.39, + "grad_norm": 0.7239766120910645, + "learning_rate": 1.3814924580475126e-05, + "loss": 2.1144, + "step": 11596 + }, + { + "epoch": 0.39, + "grad_norm": 0.7589836120605469, + "learning_rate": 1.3813942050509422e-05, + "loss": 2.0981, + "step": 11597 + }, + { + "epoch": 0.39, + "grad_norm": 0.754057765007019, + "learning_rate": 1.3812959477456036e-05, + "loss": 2.1734, + "step": 11598 + }, + { + "epoch": 0.39, + "grad_norm": 0.7073413133621216, + "learning_rate": 1.3811976861326071e-05, + "loss": 2.0691, + "step": 11599 + }, + { + "epoch": 0.39, + "grad_norm": 0.7381290793418884, + "learning_rate": 1.381099420213062e-05, + "loss": 2.088, + "step": 11600 + }, + { + "epoch": 0.39, + "grad_norm": 0.7068790793418884, + "learning_rate": 1.3810011499880795e-05, + "loss": 2.1446, + "step": 11601 + }, + { + "epoch": 0.39, + "grad_norm": 0.7349498867988586, + "learning_rate": 1.380902875458769e-05, + "loss": 2.1278, + "step": 11602 + }, + { + "epoch": 0.39, + "grad_norm": 0.743887186050415, + "learning_rate": 1.3808045966262413e-05, + "loss": 2.1066, + "step": 11603 + }, + { + "epoch": 0.39, + "grad_norm": 0.7581366300582886, + "learning_rate": 1.3807063134916061e-05, + "loss": 2.125, + "step": 11604 + }, + { + "epoch": 0.39, + "grad_norm": 0.7287168502807617, + "learning_rate": 1.3806080260559744e-05, + "loss": 2.0938, + "step": 11605 + }, + { + "epoch": 0.39, + "grad_norm": 0.7191795706748962, + "learning_rate": 1.3805097343204561e-05, + "loss": 2.1182, + "step": 11606 + }, + { + "epoch": 0.39, + "grad_norm": 0.755765974521637, + "learning_rate": 1.3804114382861617e-05, + "loss": 2.11, + "step": 11607 + }, + { + "epoch": 0.39, + "grad_norm": 0.7350980043411255, + "learning_rate": 1.3803131379542021e-05, + "loss": 2.0764, + "step": 11608 + }, + { + "epoch": 0.39, + "grad_norm": 0.7042651772499084, + "learning_rate": 1.3802148333256876e-05, + "loss": 2.1245, + "step": 11609 + }, + { + "epoch": 0.39, + "grad_norm": 0.7305948138237, + "learning_rate": 1.3801165244017285e-05, + "loss": 2.0893, + "step": 11610 + }, + { + "epoch": 0.39, + "grad_norm": 0.7541106939315796, + "learning_rate": 1.3800182111834358e-05, + "loss": 2.1493, + "step": 11611 + }, + { + "epoch": 0.39, + "grad_norm": 0.7800107598304749, + "learning_rate": 1.37991989367192e-05, + "loss": 2.1469, + "step": 11612 + }, + { + "epoch": 0.39, + "grad_norm": 0.7313702702522278, + "learning_rate": 1.379821571868292e-05, + "loss": 2.051, + "step": 11613 + }, + { + "epoch": 0.39, + "grad_norm": 0.7433415055274963, + "learning_rate": 1.3797232457736623e-05, + "loss": 2.1306, + "step": 11614 + }, + { + "epoch": 0.39, + "grad_norm": 0.7259772419929504, + "learning_rate": 1.3796249153891422e-05, + "loss": 2.1154, + "step": 11615 + }, + { + "epoch": 0.39, + "grad_norm": 0.7363418936729431, + "learning_rate": 1.379526580715842e-05, + "loss": 2.0794, + "step": 11616 + }, + { + "epoch": 0.39, + "grad_norm": 0.7201186418533325, + "learning_rate": 1.379428241754873e-05, + "loss": 2.0937, + "step": 11617 + }, + { + "epoch": 0.39, + "grad_norm": 0.7100079655647278, + "learning_rate": 1.379329898507346e-05, + "loss": 2.088, + "step": 11618 + }, + { + "epoch": 0.39, + "grad_norm": 0.7562648057937622, + "learning_rate": 1.3792315509743724e-05, + "loss": 2.1664, + "step": 11619 + }, + { + "epoch": 0.39, + "grad_norm": 0.7121405005455017, + "learning_rate": 1.3791331991570627e-05, + "loss": 2.0768, + "step": 11620 + }, + { + "epoch": 0.39, + "grad_norm": 0.7101187705993652, + "learning_rate": 1.3790348430565286e-05, + "loss": 2.0737, + "step": 11621 + }, + { + "epoch": 0.39, + "grad_norm": 0.7118765711784363, + "learning_rate": 1.3789364826738804e-05, + "loss": 2.1091, + "step": 11622 + }, + { + "epoch": 0.39, + "grad_norm": 0.7277964949607849, + "learning_rate": 1.3788381180102306e-05, + "loss": 2.0694, + "step": 11623 + }, + { + "epoch": 0.39, + "grad_norm": 0.7176958918571472, + "learning_rate": 1.3787397490666892e-05, + "loss": 2.0614, + "step": 11624 + }, + { + "epoch": 0.39, + "grad_norm": 0.7238707542419434, + "learning_rate": 1.3786413758443687e-05, + "loss": 2.0781, + "step": 11625 + }, + { + "epoch": 0.39, + "grad_norm": 0.7206467390060425, + "learning_rate": 1.3785429983443793e-05, + "loss": 2.1317, + "step": 11626 + }, + { + "epoch": 0.39, + "grad_norm": 0.7189276218414307, + "learning_rate": 1.3784446165678329e-05, + "loss": 2.0996, + "step": 11627 + }, + { + "epoch": 0.39, + "grad_norm": 0.7252731323242188, + "learning_rate": 1.3783462305158416e-05, + "loss": 2.1325, + "step": 11628 + }, + { + "epoch": 0.39, + "grad_norm": 0.7328822016716003, + "learning_rate": 1.3782478401895158e-05, + "loss": 2.0293, + "step": 11629 + }, + { + "epoch": 0.39, + "grad_norm": 0.7346433401107788, + "learning_rate": 1.3781494455899677e-05, + "loss": 2.1235, + "step": 11630 + }, + { + "epoch": 0.39, + "grad_norm": 0.7207636833190918, + "learning_rate": 1.3780510467183087e-05, + "loss": 2.0963, + "step": 11631 + }, + { + "epoch": 0.39, + "grad_norm": 0.7483453154563904, + "learning_rate": 1.3779526435756507e-05, + "loss": 2.1121, + "step": 11632 + }, + { + "epoch": 0.39, + "grad_norm": 0.7184513807296753, + "learning_rate": 1.3778542361631052e-05, + "loss": 2.1231, + "step": 11633 + }, + { + "epoch": 0.39, + "grad_norm": 0.7262873649597168, + "learning_rate": 1.3777558244817841e-05, + "loss": 2.0617, + "step": 11634 + }, + { + "epoch": 0.39, + "grad_norm": 0.737218976020813, + "learning_rate": 1.377657408532799e-05, + "loss": 2.1204, + "step": 11635 + }, + { + "epoch": 0.39, + "grad_norm": 0.7416977882385254, + "learning_rate": 1.3775589883172618e-05, + "loss": 2.1408, + "step": 11636 + }, + { + "epoch": 0.39, + "grad_norm": 0.7427653670310974, + "learning_rate": 1.3774605638362844e-05, + "loss": 1.9906, + "step": 11637 + }, + { + "epoch": 0.39, + "grad_norm": 0.73930823802948, + "learning_rate": 1.3773621350909786e-05, + "loss": 2.1392, + "step": 11638 + }, + { + "epoch": 0.39, + "grad_norm": 0.7166590094566345, + "learning_rate": 1.3772637020824568e-05, + "loss": 2.1509, + "step": 11639 + }, + { + "epoch": 0.39, + "grad_norm": 0.7091129422187805, + "learning_rate": 1.3771652648118308e-05, + "loss": 2.1837, + "step": 11640 + }, + { + "epoch": 0.39, + "grad_norm": 0.775908350944519, + "learning_rate": 1.3770668232802124e-05, + "loss": 2.1563, + "step": 11641 + }, + { + "epoch": 0.39, + "grad_norm": 0.7425082921981812, + "learning_rate": 1.3769683774887144e-05, + "loss": 2.1261, + "step": 11642 + }, + { + "epoch": 0.39, + "grad_norm": 0.7394794821739197, + "learning_rate": 1.3768699274384483e-05, + "loss": 2.0882, + "step": 11643 + }, + { + "epoch": 0.39, + "grad_norm": 0.725896954536438, + "learning_rate": 1.3767714731305267e-05, + "loss": 2.1013, + "step": 11644 + }, + { + "epoch": 0.39, + "grad_norm": 0.7525447010993958, + "learning_rate": 1.376673014566062e-05, + "loss": 2.04, + "step": 11645 + }, + { + "epoch": 0.39, + "grad_norm": 0.7347779273986816, + "learning_rate": 1.376574551746166e-05, + "loss": 2.1101, + "step": 11646 + }, + { + "epoch": 0.39, + "grad_norm": 0.777912974357605, + "learning_rate": 1.3764760846719518e-05, + "loss": 2.0888, + "step": 11647 + }, + { + "epoch": 0.39, + "grad_norm": 0.7194976806640625, + "learning_rate": 1.376377613344531e-05, + "loss": 2.0507, + "step": 11648 + }, + { + "epoch": 0.39, + "grad_norm": 0.7134403586387634, + "learning_rate": 1.3762791377650167e-05, + "loss": 2.1727, + "step": 11649 + }, + { + "epoch": 0.39, + "grad_norm": 0.72414630651474, + "learning_rate": 1.3761806579345214e-05, + "loss": 2.1516, + "step": 11650 + }, + { + "epoch": 0.39, + "grad_norm": 0.7293637990951538, + "learning_rate": 1.3760821738541572e-05, + "loss": 2.0929, + "step": 11651 + }, + { + "epoch": 0.39, + "grad_norm": 0.7099736332893372, + "learning_rate": 1.3759836855250373e-05, + "loss": 2.0895, + "step": 11652 + }, + { + "epoch": 0.39, + "grad_norm": 0.7444831132888794, + "learning_rate": 1.3758851929482737e-05, + "loss": 2.1395, + "step": 11653 + }, + { + "epoch": 0.39, + "grad_norm": 0.7232000827789307, + "learning_rate": 1.3757866961249797e-05, + "loss": 2.1274, + "step": 11654 + }, + { + "epoch": 0.39, + "grad_norm": 0.7381769418716431, + "learning_rate": 1.375688195056268e-05, + "loss": 2.125, + "step": 11655 + }, + { + "epoch": 0.39, + "grad_norm": 0.7343809604644775, + "learning_rate": 1.3755896897432511e-05, + "loss": 2.1176, + "step": 11656 + }, + { + "epoch": 0.39, + "grad_norm": 0.7515524625778198, + "learning_rate": 1.3754911801870417e-05, + "loss": 2.0074, + "step": 11657 + }, + { + "epoch": 0.39, + "grad_norm": 0.7542002201080322, + "learning_rate": 1.3753926663887536e-05, + "loss": 2.0765, + "step": 11658 + }, + { + "epoch": 0.39, + "grad_norm": 0.7247751951217651, + "learning_rate": 1.3752941483494987e-05, + "loss": 2.1592, + "step": 11659 + }, + { + "epoch": 0.39, + "grad_norm": 0.7217437624931335, + "learning_rate": 1.3751956260703905e-05, + "loss": 2.0725, + "step": 11660 + }, + { + "epoch": 0.39, + "grad_norm": 0.7486657500267029, + "learning_rate": 1.3750970995525421e-05, + "loss": 2.0966, + "step": 11661 + }, + { + "epoch": 0.39, + "grad_norm": 0.7318110466003418, + "learning_rate": 1.3749985687970666e-05, + "loss": 2.1677, + "step": 11662 + }, + { + "epoch": 0.39, + "grad_norm": 0.7041610479354858, + "learning_rate": 1.3749000338050768e-05, + "loss": 2.0176, + "step": 11663 + }, + { + "epoch": 0.39, + "grad_norm": 0.7950376868247986, + "learning_rate": 1.3748014945776861e-05, + "loss": 2.112, + "step": 11664 + }, + { + "epoch": 0.39, + "grad_norm": 0.7559768557548523, + "learning_rate": 1.3747029511160082e-05, + "loss": 2.1106, + "step": 11665 + }, + { + "epoch": 0.39, + "grad_norm": 0.7226019501686096, + "learning_rate": 1.3746044034211556e-05, + "loss": 2.1685, + "step": 11666 + }, + { + "epoch": 0.39, + "grad_norm": 0.7206690311431885, + "learning_rate": 1.3745058514942422e-05, + "loss": 2.0802, + "step": 11667 + }, + { + "epoch": 0.39, + "grad_norm": 0.7013974785804749, + "learning_rate": 1.374407295336381e-05, + "loss": 2.1606, + "step": 11668 + }, + { + "epoch": 0.39, + "grad_norm": 0.7075404524803162, + "learning_rate": 1.3743087349486855e-05, + "loss": 2.0479, + "step": 11669 + }, + { + "epoch": 0.39, + "grad_norm": 0.7521916031837463, + "learning_rate": 1.3742101703322696e-05, + "loss": 2.0927, + "step": 11670 + }, + { + "epoch": 0.39, + "grad_norm": 0.774567186832428, + "learning_rate": 1.374111601488246e-05, + "loss": 2.0388, + "step": 11671 + }, + { + "epoch": 0.39, + "grad_norm": 0.7391560673713684, + "learning_rate": 1.3740130284177293e-05, + "loss": 2.0977, + "step": 11672 + }, + { + "epoch": 0.39, + "grad_norm": 0.6954547166824341, + "learning_rate": 1.3739144511218325e-05, + "loss": 2.0325, + "step": 11673 + }, + { + "epoch": 0.39, + "grad_norm": 0.7475699186325073, + "learning_rate": 1.3738158696016692e-05, + "loss": 2.1308, + "step": 11674 + }, + { + "epoch": 0.39, + "grad_norm": 0.7774897813796997, + "learning_rate": 1.3737172838583534e-05, + "loss": 2.0479, + "step": 11675 + }, + { + "epoch": 0.39, + "grad_norm": 0.704092800617218, + "learning_rate": 1.3736186938929987e-05, + "loss": 2.1271, + "step": 11676 + }, + { + "epoch": 0.39, + "grad_norm": 0.7230117917060852, + "learning_rate": 1.373520099706719e-05, + "loss": 2.1087, + "step": 11677 + }, + { + "epoch": 0.39, + "grad_norm": 0.7377476692199707, + "learning_rate": 1.3734215013006283e-05, + "loss": 2.1425, + "step": 11678 + }, + { + "epoch": 0.39, + "grad_norm": 0.737406313419342, + "learning_rate": 1.3733228986758399e-05, + "loss": 2.0811, + "step": 11679 + }, + { + "epoch": 0.39, + "grad_norm": 0.7015998363494873, + "learning_rate": 1.3732242918334682e-05, + "loss": 2.0914, + "step": 11680 + }, + { + "epoch": 0.39, + "grad_norm": 0.7315475940704346, + "learning_rate": 1.3731256807746277e-05, + "loss": 2.0347, + "step": 11681 + }, + { + "epoch": 0.39, + "grad_norm": 0.7496551275253296, + "learning_rate": 1.3730270655004314e-05, + "loss": 2.1345, + "step": 11682 + }, + { + "epoch": 0.39, + "grad_norm": 0.7160812020301819, + "learning_rate": 1.3729284460119942e-05, + "loss": 2.0269, + "step": 11683 + }, + { + "epoch": 0.39, + "grad_norm": 0.7383368611335754, + "learning_rate": 1.3728298223104299e-05, + "loss": 2.1474, + "step": 11684 + }, + { + "epoch": 0.39, + "grad_norm": 0.7193183898925781, + "learning_rate": 1.372731194396853e-05, + "loss": 2.0504, + "step": 11685 + }, + { + "epoch": 0.39, + "grad_norm": 0.7306458950042725, + "learning_rate": 1.372632562272377e-05, + "loss": 2.0681, + "step": 11686 + }, + { + "epoch": 0.39, + "grad_norm": 0.7010766863822937, + "learning_rate": 1.3725339259381169e-05, + "loss": 2.1072, + "step": 11687 + }, + { + "epoch": 0.39, + "grad_norm": 0.7116683125495911, + "learning_rate": 1.372435285395187e-05, + "loss": 2.0939, + "step": 11688 + }, + { + "epoch": 0.39, + "grad_norm": 0.7291589379310608, + "learning_rate": 1.3723366406447017e-05, + "loss": 2.0564, + "step": 11689 + }, + { + "epoch": 0.39, + "grad_norm": 0.7191392183303833, + "learning_rate": 1.3722379916877746e-05, + "loss": 2.1154, + "step": 11690 + }, + { + "epoch": 0.39, + "grad_norm": 0.7248224020004272, + "learning_rate": 1.3721393385255212e-05, + "loss": 2.1274, + "step": 11691 + }, + { + "epoch": 0.39, + "grad_norm": 0.7120988965034485, + "learning_rate": 1.372040681159056e-05, + "loss": 2.0742, + "step": 11692 + }, + { + "epoch": 0.39, + "grad_norm": 0.7146470546722412, + "learning_rate": 1.3719420195894925e-05, + "loss": 2.0074, + "step": 11693 + }, + { + "epoch": 0.39, + "grad_norm": 0.734995424747467, + "learning_rate": 1.3718433538179465e-05, + "loss": 2.0324, + "step": 11694 + }, + { + "epoch": 0.39, + "grad_norm": 0.7181507349014282, + "learning_rate": 1.3717446838455322e-05, + "loss": 2.053, + "step": 11695 + }, + { + "epoch": 0.39, + "grad_norm": 0.7228471636772156, + "learning_rate": 1.3716460096733641e-05, + "loss": 2.0576, + "step": 11696 + }, + { + "epoch": 0.39, + "grad_norm": 0.7376711964607239, + "learning_rate": 1.3715473313025573e-05, + "loss": 2.081, + "step": 11697 + }, + { + "epoch": 0.39, + "grad_norm": 0.7009484171867371, + "learning_rate": 1.3714486487342265e-05, + "loss": 2.1181, + "step": 11698 + }, + { + "epoch": 0.39, + "grad_norm": 0.7111235857009888, + "learning_rate": 1.3713499619694865e-05, + "loss": 2.11, + "step": 11699 + }, + { + "epoch": 0.39, + "grad_norm": 0.7466190457344055, + "learning_rate": 1.3712512710094524e-05, + "loss": 2.179, + "step": 11700 + }, + { + "epoch": 0.39, + "grad_norm": 0.6936691403388977, + "learning_rate": 1.3711525758552391e-05, + "loss": 2.0376, + "step": 11701 + }, + { + "epoch": 0.39, + "grad_norm": 0.7580778002738953, + "learning_rate": 1.3710538765079614e-05, + "loss": 2.1115, + "step": 11702 + }, + { + "epoch": 0.39, + "grad_norm": 0.7091009020805359, + "learning_rate": 1.3709551729687345e-05, + "loss": 2.1275, + "step": 11703 + }, + { + "epoch": 0.39, + "grad_norm": 0.7427220344543457, + "learning_rate": 1.370856465238673e-05, + "loss": 2.0868, + "step": 11704 + }, + { + "epoch": 0.39, + "grad_norm": 0.7189304828643799, + "learning_rate": 1.3707577533188932e-05, + "loss": 2.0795, + "step": 11705 + }, + { + "epoch": 0.39, + "grad_norm": 0.680560827255249, + "learning_rate": 1.3706590372105093e-05, + "loss": 2.0343, + "step": 11706 + }, + { + "epoch": 0.39, + "grad_norm": 0.7574985027313232, + "learning_rate": 1.3705603169146367e-05, + "loss": 2.1341, + "step": 11707 + }, + { + "epoch": 0.39, + "grad_norm": 0.7180473208427429, + "learning_rate": 1.370461592432391e-05, + "loss": 2.0851, + "step": 11708 + }, + { + "epoch": 0.39, + "grad_norm": 0.7354133725166321, + "learning_rate": 1.3703628637648871e-05, + "loss": 2.0651, + "step": 11709 + }, + { + "epoch": 0.39, + "grad_norm": 0.7554914355278015, + "learning_rate": 1.370264130913241e-05, + "loss": 2.1047, + "step": 11710 + }, + { + "epoch": 0.39, + "grad_norm": 0.7154842019081116, + "learning_rate": 1.3701653938785673e-05, + "loss": 2.085, + "step": 11711 + }, + { + "epoch": 0.39, + "grad_norm": 0.7353411316871643, + "learning_rate": 1.3700666526619822e-05, + "loss": 2.1355, + "step": 11712 + }, + { + "epoch": 0.39, + "grad_norm": 0.7104331851005554, + "learning_rate": 1.3699679072646009e-05, + "loss": 2.0757, + "step": 11713 + }, + { + "epoch": 0.39, + "grad_norm": 0.7844958901405334, + "learning_rate": 1.3698691576875392e-05, + "loss": 2.1545, + "step": 11714 + }, + { + "epoch": 0.39, + "grad_norm": 0.7634601593017578, + "learning_rate": 1.3697704039319119e-05, + "loss": 2.0961, + "step": 11715 + }, + { + "epoch": 0.39, + "grad_norm": 0.7031099200248718, + "learning_rate": 1.369671645998836e-05, + "loss": 2.1251, + "step": 11716 + }, + { + "epoch": 0.39, + "grad_norm": 0.7315368056297302, + "learning_rate": 1.3695728838894258e-05, + "loss": 2.0888, + "step": 11717 + }, + { + "epoch": 0.39, + "grad_norm": 0.7246574759483337, + "learning_rate": 1.3694741176047983e-05, + "loss": 2.1324, + "step": 11718 + }, + { + "epoch": 0.39, + "grad_norm": 0.733501672744751, + "learning_rate": 1.3693753471460687e-05, + "loss": 2.1518, + "step": 11719 + }, + { + "epoch": 0.39, + "grad_norm": 0.7272926568984985, + "learning_rate": 1.3692765725143527e-05, + "loss": 2.1252, + "step": 11720 + }, + { + "epoch": 0.39, + "grad_norm": 0.7733513116836548, + "learning_rate": 1.3691777937107664e-05, + "loss": 2.1165, + "step": 11721 + }, + { + "epoch": 0.39, + "grad_norm": 0.8041664361953735, + "learning_rate": 1.3690790107364257e-05, + "loss": 2.1638, + "step": 11722 + }, + { + "epoch": 0.39, + "grad_norm": 0.7449365258216858, + "learning_rate": 1.3689802235924469e-05, + "loss": 2.1418, + "step": 11723 + }, + { + "epoch": 0.39, + "grad_norm": 0.7313663363456726, + "learning_rate": 1.3688814322799457e-05, + "loss": 2.1502, + "step": 11724 + }, + { + "epoch": 0.39, + "grad_norm": 0.7385975122451782, + "learning_rate": 1.3687826368000382e-05, + "loss": 2.125, + "step": 11725 + }, + { + "epoch": 0.39, + "grad_norm": 0.7681602239608765, + "learning_rate": 1.3686838371538405e-05, + "loss": 2.0685, + "step": 11726 + }, + { + "epoch": 0.39, + "grad_norm": 0.733538031578064, + "learning_rate": 1.3685850333424693e-05, + "loss": 2.1005, + "step": 11727 + }, + { + "epoch": 0.39, + "grad_norm": 0.7743049263954163, + "learning_rate": 1.3684862253670397e-05, + "loss": 2.0788, + "step": 11728 + }, + { + "epoch": 0.39, + "grad_norm": 0.7338125705718994, + "learning_rate": 1.3683874132286694e-05, + "loss": 2.0682, + "step": 11729 + }, + { + "epoch": 0.39, + "grad_norm": 0.7306209802627563, + "learning_rate": 1.3682885969284736e-05, + "loss": 2.1116, + "step": 11730 + }, + { + "epoch": 0.39, + "grad_norm": 0.7301396727561951, + "learning_rate": 1.3681897764675692e-05, + "loss": 2.1278, + "step": 11731 + }, + { + "epoch": 0.39, + "grad_norm": 0.7581589818000793, + "learning_rate": 1.3680909518470725e-05, + "loss": 2.132, + "step": 11732 + }, + { + "epoch": 0.39, + "grad_norm": 0.7395617365837097, + "learning_rate": 1.3679921230680997e-05, + "loss": 2.1896, + "step": 11733 + }, + { + "epoch": 0.39, + "grad_norm": 0.7247474193572998, + "learning_rate": 1.3678932901317676e-05, + "loss": 2.0744, + "step": 11734 + }, + { + "epoch": 0.39, + "grad_norm": 0.727653443813324, + "learning_rate": 1.3677944530391928e-05, + "loss": 2.0326, + "step": 11735 + }, + { + "epoch": 0.39, + "grad_norm": 0.7075906991958618, + "learning_rate": 1.3676956117914919e-05, + "loss": 2.0534, + "step": 11736 + }, + { + "epoch": 0.39, + "grad_norm": 0.7425323724746704, + "learning_rate": 1.3675967663897813e-05, + "loss": 2.0904, + "step": 11737 + }, + { + "epoch": 0.39, + "grad_norm": 0.7643874287605286, + "learning_rate": 1.3674979168351782e-05, + "loss": 2.0633, + "step": 11738 + }, + { + "epoch": 0.39, + "grad_norm": 0.7117298245429993, + "learning_rate": 1.3673990631287988e-05, + "loss": 2.0662, + "step": 11739 + }, + { + "epoch": 0.39, + "grad_norm": 0.6828161478042603, + "learning_rate": 1.3673002052717603e-05, + "loss": 2.0638, + "step": 11740 + }, + { + "epoch": 0.39, + "grad_norm": 0.7172871232032776, + "learning_rate": 1.3672013432651791e-05, + "loss": 2.1276, + "step": 11741 + }, + { + "epoch": 0.39, + "grad_norm": 0.6878655552864075, + "learning_rate": 1.367102477110172e-05, + "loss": 2.0929, + "step": 11742 + }, + { + "epoch": 0.39, + "grad_norm": 0.7642264366149902, + "learning_rate": 1.3670036068078569e-05, + "loss": 2.1102, + "step": 11743 + }, + { + "epoch": 0.39, + "grad_norm": 0.7359413504600525, + "learning_rate": 1.3669047323593499e-05, + "loss": 2.1085, + "step": 11744 + }, + { + "epoch": 0.39, + "grad_norm": 0.6921376585960388, + "learning_rate": 1.3668058537657682e-05, + "loss": 2.1135, + "step": 11745 + }, + { + "epoch": 0.39, + "grad_norm": 0.7098267078399658, + "learning_rate": 1.3667069710282289e-05, + "loss": 2.0348, + "step": 11746 + }, + { + "epoch": 0.39, + "grad_norm": 0.7138068079948425, + "learning_rate": 1.3666080841478493e-05, + "loss": 2.1003, + "step": 11747 + }, + { + "epoch": 0.39, + "grad_norm": 0.7155347466468811, + "learning_rate": 1.3665091931257464e-05, + "loss": 2.1124, + "step": 11748 + }, + { + "epoch": 0.39, + "grad_norm": 0.7511319518089294, + "learning_rate": 1.3664102979630374e-05, + "loss": 2.1371, + "step": 11749 + }, + { + "epoch": 0.39, + "grad_norm": 0.7296749353408813, + "learning_rate": 1.3663113986608393e-05, + "loss": 2.1696, + "step": 11750 + }, + { + "epoch": 0.39, + "grad_norm": 0.7460076808929443, + "learning_rate": 1.3662124952202703e-05, + "loss": 2.1298, + "step": 11751 + }, + { + "epoch": 0.39, + "grad_norm": 0.7265193462371826, + "learning_rate": 1.3661135876424466e-05, + "loss": 2.0483, + "step": 11752 + }, + { + "epoch": 0.39, + "grad_norm": 0.7461183667182922, + "learning_rate": 1.3660146759284863e-05, + "loss": 2.0551, + "step": 11753 + }, + { + "epoch": 0.39, + "grad_norm": 0.7553591728210449, + "learning_rate": 1.3659157600795067e-05, + "loss": 2.1206, + "step": 11754 + }, + { + "epoch": 0.39, + "grad_norm": 0.7180603742599487, + "learning_rate": 1.3658168400966254e-05, + "loss": 2.0343, + "step": 11755 + }, + { + "epoch": 0.39, + "grad_norm": 0.7250557541847229, + "learning_rate": 1.3657179159809597e-05, + "loss": 2.1298, + "step": 11756 + }, + { + "epoch": 0.39, + "grad_norm": 0.7045844197273254, + "learning_rate": 1.3656189877336274e-05, + "loss": 2.0182, + "step": 11757 + }, + { + "epoch": 0.39, + "grad_norm": 0.7270033359527588, + "learning_rate": 1.3655200553557462e-05, + "loss": 2.1452, + "step": 11758 + }, + { + "epoch": 0.39, + "grad_norm": 0.7578840255737305, + "learning_rate": 1.3654211188484333e-05, + "loss": 2.1128, + "step": 11759 + }, + { + "epoch": 0.39, + "grad_norm": 0.7205632925033569, + "learning_rate": 1.3653221782128073e-05, + "loss": 2.0798, + "step": 11760 + }, + { + "epoch": 0.39, + "grad_norm": 0.7442613840103149, + "learning_rate": 1.3652232334499847e-05, + "loss": 2.0643, + "step": 11761 + }, + { + "epoch": 0.39, + "grad_norm": 0.7242169380187988, + "learning_rate": 1.3651242845610846e-05, + "loss": 2.0407, + "step": 11762 + }, + { + "epoch": 0.39, + "grad_norm": 0.7313669323921204, + "learning_rate": 1.3650253315472241e-05, + "loss": 2.1489, + "step": 11763 + }, + { + "epoch": 0.39, + "grad_norm": 0.7545270323753357, + "learning_rate": 1.3649263744095212e-05, + "loss": 2.1099, + "step": 11764 + }, + { + "epoch": 0.39, + "grad_norm": 0.7050315141677856, + "learning_rate": 1.3648274131490942e-05, + "loss": 2.0566, + "step": 11765 + }, + { + "epoch": 0.39, + "grad_norm": 0.7374283075332642, + "learning_rate": 1.3647284477670609e-05, + "loss": 2.0849, + "step": 11766 + }, + { + "epoch": 0.39, + "grad_norm": 0.7493853569030762, + "learning_rate": 1.3646294782645393e-05, + "loss": 2.1557, + "step": 11767 + }, + { + "epoch": 0.39, + "grad_norm": 0.7397019267082214, + "learning_rate": 1.3645305046426475e-05, + "loss": 2.0967, + "step": 11768 + }, + { + "epoch": 0.39, + "grad_norm": 0.7131369113922119, + "learning_rate": 1.3644315269025037e-05, + "loss": 2.0812, + "step": 11769 + }, + { + "epoch": 0.39, + "grad_norm": 0.7269879579544067, + "learning_rate": 1.364332545045226e-05, + "loss": 2.1027, + "step": 11770 + }, + { + "epoch": 0.39, + "grad_norm": 0.7159155011177063, + "learning_rate": 1.3642335590719331e-05, + "loss": 2.1522, + "step": 11771 + }, + { + "epoch": 0.39, + "grad_norm": 0.7585508823394775, + "learning_rate": 1.3641345689837424e-05, + "loss": 2.0823, + "step": 11772 + }, + { + "epoch": 0.39, + "grad_norm": 0.732868492603302, + "learning_rate": 1.3640355747817731e-05, + "loss": 2.0949, + "step": 11773 + }, + { + "epoch": 0.39, + "grad_norm": 0.7338318228721619, + "learning_rate": 1.3639365764671432e-05, + "loss": 2.1353, + "step": 11774 + }, + { + "epoch": 0.39, + "grad_norm": 0.7466054558753967, + "learning_rate": 1.363837574040971e-05, + "loss": 2.1409, + "step": 11775 + }, + { + "epoch": 0.39, + "grad_norm": 0.7179787755012512, + "learning_rate": 1.3637385675043753e-05, + "loss": 2.0827, + "step": 11776 + }, + { + "epoch": 0.39, + "grad_norm": 0.733527421951294, + "learning_rate": 1.3636395568584744e-05, + "loss": 2.153, + "step": 11777 + }, + { + "epoch": 0.39, + "grad_norm": 0.7135565876960754, + "learning_rate": 1.3635405421043868e-05, + "loss": 2.0811, + "step": 11778 + }, + { + "epoch": 0.39, + "grad_norm": 0.7211730480194092, + "learning_rate": 1.3634415232432313e-05, + "loss": 2.1196, + "step": 11779 + }, + { + "epoch": 0.39, + "grad_norm": 0.7162937521934509, + "learning_rate": 1.3633425002761262e-05, + "loss": 2.0789, + "step": 11780 + }, + { + "epoch": 0.39, + "grad_norm": 0.7458769679069519, + "learning_rate": 1.3632434732041909e-05, + "loss": 2.1288, + "step": 11781 + }, + { + "epoch": 0.39, + "grad_norm": 0.7141501903533936, + "learning_rate": 1.3631444420285436e-05, + "loss": 2.1043, + "step": 11782 + }, + { + "epoch": 0.39, + "grad_norm": 0.7233158946037292, + "learning_rate": 1.3630454067503027e-05, + "loss": 2.147, + "step": 11783 + }, + { + "epoch": 0.39, + "grad_norm": 0.7284120917320251, + "learning_rate": 1.362946367370588e-05, + "loss": 2.1314, + "step": 11784 + }, + { + "epoch": 0.39, + "grad_norm": 0.7159897685050964, + "learning_rate": 1.3628473238905181e-05, + "loss": 2.0526, + "step": 11785 + }, + { + "epoch": 0.39, + "grad_norm": 0.7793608903884888, + "learning_rate": 1.3627482763112115e-05, + "loss": 2.0951, + "step": 11786 + }, + { + "epoch": 0.39, + "grad_norm": 0.7869195938110352, + "learning_rate": 1.3626492246337876e-05, + "loss": 2.0777, + "step": 11787 + }, + { + "epoch": 0.39, + "grad_norm": 0.8092279434204102, + "learning_rate": 1.3625501688593654e-05, + "loss": 2.0745, + "step": 11788 + }, + { + "epoch": 0.39, + "grad_norm": 0.7590587735176086, + "learning_rate": 1.3624511089890638e-05, + "loss": 2.0162, + "step": 11789 + }, + { + "epoch": 0.39, + "grad_norm": 0.7630215287208557, + "learning_rate": 1.3623520450240018e-05, + "loss": 2.1698, + "step": 11790 + }, + { + "epoch": 0.39, + "grad_norm": 0.7258909344673157, + "learning_rate": 1.3622529769652987e-05, + "loss": 2.0457, + "step": 11791 + }, + { + "epoch": 0.39, + "grad_norm": 0.7465696334838867, + "learning_rate": 1.3621539048140741e-05, + "loss": 2.1494, + "step": 11792 + }, + { + "epoch": 0.39, + "grad_norm": 0.7444326877593994, + "learning_rate": 1.3620548285714468e-05, + "loss": 2.0528, + "step": 11793 + }, + { + "epoch": 0.39, + "grad_norm": 0.7123231291770935, + "learning_rate": 1.3619557482385362e-05, + "loss": 2.0794, + "step": 11794 + }, + { + "epoch": 0.39, + "grad_norm": 0.7381765246391296, + "learning_rate": 1.3618566638164615e-05, + "loss": 2.134, + "step": 11795 + }, + { + "epoch": 0.39, + "grad_norm": 0.8029764890670776, + "learning_rate": 1.3617575753063427e-05, + "loss": 2.1034, + "step": 11796 + }, + { + "epoch": 0.39, + "grad_norm": 0.7212918400764465, + "learning_rate": 1.3616584827092984e-05, + "loss": 2.0822, + "step": 11797 + }, + { + "epoch": 0.39, + "grad_norm": 0.6852489113807678, + "learning_rate": 1.361559386026449e-05, + "loss": 2.0727, + "step": 11798 + }, + { + "epoch": 0.39, + "grad_norm": 0.7195196747779846, + "learning_rate": 1.3614602852589132e-05, + "loss": 2.128, + "step": 11799 + }, + { + "epoch": 0.39, + "grad_norm": 0.7447269558906555, + "learning_rate": 1.3613611804078109e-05, + "loss": 2.0551, + "step": 11800 + }, + { + "epoch": 0.39, + "grad_norm": 0.70705646276474, + "learning_rate": 1.3612620714742617e-05, + "loss": 2.075, + "step": 11801 + }, + { + "epoch": 0.39, + "grad_norm": 0.721500039100647, + "learning_rate": 1.3611629584593856e-05, + "loss": 2.0944, + "step": 11802 + }, + { + "epoch": 0.39, + "grad_norm": 0.7489003539085388, + "learning_rate": 1.3610638413643021e-05, + "loss": 2.0642, + "step": 11803 + }, + { + "epoch": 0.39, + "grad_norm": 0.7121352553367615, + "learning_rate": 1.3609647201901305e-05, + "loss": 2.1007, + "step": 11804 + }, + { + "epoch": 0.39, + "grad_norm": 0.7280601859092712, + "learning_rate": 1.3608655949379914e-05, + "loss": 2.094, + "step": 11805 + }, + { + "epoch": 0.39, + "grad_norm": 0.7160506844520569, + "learning_rate": 1.3607664656090042e-05, + "loss": 2.0419, + "step": 11806 + }, + { + "epoch": 0.39, + "grad_norm": 0.7363958954811096, + "learning_rate": 1.360667332204289e-05, + "loss": 2.1453, + "step": 11807 + }, + { + "epoch": 0.39, + "grad_norm": 0.7282476425170898, + "learning_rate": 1.3605681947249654e-05, + "loss": 2.131, + "step": 11808 + }, + { + "epoch": 0.39, + "grad_norm": 0.717846155166626, + "learning_rate": 1.360469053172154e-05, + "loss": 2.1376, + "step": 11809 + }, + { + "epoch": 0.39, + "grad_norm": 0.7346615195274353, + "learning_rate": 1.3603699075469743e-05, + "loss": 2.078, + "step": 11810 + }, + { + "epoch": 0.39, + "grad_norm": 0.7356274127960205, + "learning_rate": 1.3602707578505465e-05, + "loss": 2.0375, + "step": 11811 + }, + { + "epoch": 0.39, + "grad_norm": 0.750554621219635, + "learning_rate": 1.360171604083991e-05, + "loss": 2.149, + "step": 11812 + }, + { + "epoch": 0.39, + "grad_norm": 0.7154950499534607, + "learning_rate": 1.3600724462484276e-05, + "loss": 2.0947, + "step": 11813 + }, + { + "epoch": 0.39, + "grad_norm": 0.7440913915634155, + "learning_rate": 1.359973284344977e-05, + "loss": 2.1504, + "step": 11814 + }, + { + "epoch": 0.39, + "grad_norm": 0.6969454884529114, + "learning_rate": 1.3598741183747589e-05, + "loss": 2.0589, + "step": 11815 + }, + { + "epoch": 0.39, + "grad_norm": 0.7188594341278076, + "learning_rate": 1.3597749483388941e-05, + "loss": 2.1022, + "step": 11816 + }, + { + "epoch": 0.39, + "grad_norm": 0.7729142904281616, + "learning_rate": 1.3596757742385026e-05, + "loss": 2.1188, + "step": 11817 + }, + { + "epoch": 0.39, + "grad_norm": 0.7737226486206055, + "learning_rate": 1.359576596074705e-05, + "loss": 2.0984, + "step": 11818 + }, + { + "epoch": 0.39, + "grad_norm": 0.7376880049705505, + "learning_rate": 1.359477413848622e-05, + "loss": 2.1221, + "step": 11819 + }, + { + "epoch": 0.39, + "grad_norm": 0.7260102033615112, + "learning_rate": 1.3593782275613738e-05, + "loss": 2.0636, + "step": 11820 + }, + { + "epoch": 0.39, + "grad_norm": 0.712247371673584, + "learning_rate": 1.3592790372140807e-05, + "loss": 2.1116, + "step": 11821 + }, + { + "epoch": 0.39, + "grad_norm": 0.7451822757720947, + "learning_rate": 1.359179842807864e-05, + "loss": 2.0608, + "step": 11822 + }, + { + "epoch": 0.39, + "grad_norm": 0.7092812061309814, + "learning_rate": 1.3590806443438438e-05, + "loss": 2.1458, + "step": 11823 + }, + { + "epoch": 0.39, + "grad_norm": 0.7245237231254578, + "learning_rate": 1.3589814418231408e-05, + "loss": 2.0969, + "step": 11824 + }, + { + "epoch": 0.39, + "grad_norm": 0.7172369956970215, + "learning_rate": 1.3588822352468757e-05, + "loss": 2.1557, + "step": 11825 + }, + { + "epoch": 0.39, + "grad_norm": 0.7550198435783386, + "learning_rate": 1.3587830246161699e-05, + "loss": 2.1188, + "step": 11826 + }, + { + "epoch": 0.39, + "grad_norm": 0.7420009970664978, + "learning_rate": 1.3586838099321436e-05, + "loss": 2.165, + "step": 11827 + }, + { + "epoch": 0.39, + "grad_norm": 0.7174752354621887, + "learning_rate": 1.3585845911959177e-05, + "loss": 2.0657, + "step": 11828 + }, + { + "epoch": 0.39, + "grad_norm": 0.7367106676101685, + "learning_rate": 1.3584853684086132e-05, + "loss": 2.1422, + "step": 11829 + }, + { + "epoch": 0.39, + "grad_norm": 0.7473825812339783, + "learning_rate": 1.358386141571351e-05, + "loss": 2.1102, + "step": 11830 + }, + { + "epoch": 0.39, + "grad_norm": 0.6973622441291809, + "learning_rate": 1.3582869106852527e-05, + "loss": 2.1118, + "step": 11831 + }, + { + "epoch": 0.39, + "grad_norm": 0.7323245406150818, + "learning_rate": 1.3581876757514383e-05, + "loss": 2.0888, + "step": 11832 + }, + { + "epoch": 0.39, + "grad_norm": 0.7049663066864014, + "learning_rate": 1.35808843677103e-05, + "loss": 2.099, + "step": 11833 + }, + { + "epoch": 0.39, + "grad_norm": 0.7374347448348999, + "learning_rate": 1.3579891937451479e-05, + "loss": 2.0756, + "step": 11834 + }, + { + "epoch": 0.39, + "grad_norm": 0.7212903499603271, + "learning_rate": 1.357889946674914e-05, + "loss": 2.1041, + "step": 11835 + }, + { + "epoch": 0.39, + "grad_norm": 0.7214891314506531, + "learning_rate": 1.3577906955614491e-05, + "loss": 2.108, + "step": 11836 + }, + { + "epoch": 0.39, + "grad_norm": 0.7283778190612793, + "learning_rate": 1.3576914404058745e-05, + "loss": 2.1127, + "step": 11837 + }, + { + "epoch": 0.39, + "grad_norm": 0.7171215415000916, + "learning_rate": 1.3575921812093118e-05, + "loss": 2.0911, + "step": 11838 + }, + { + "epoch": 0.39, + "grad_norm": 0.7304274439811707, + "learning_rate": 1.3574929179728821e-05, + "loss": 2.0783, + "step": 11839 + }, + { + "epoch": 0.39, + "grad_norm": 0.7523946166038513, + "learning_rate": 1.357393650697707e-05, + "loss": 2.0963, + "step": 11840 + }, + { + "epoch": 0.39, + "grad_norm": 0.7120776176452637, + "learning_rate": 1.3572943793849081e-05, + "loss": 2.0881, + "step": 11841 + }, + { + "epoch": 0.39, + "grad_norm": 0.7136206030845642, + "learning_rate": 1.3571951040356066e-05, + "loss": 2.1311, + "step": 11842 + }, + { + "epoch": 0.39, + "grad_norm": 0.7349733114242554, + "learning_rate": 1.3570958246509238e-05, + "loss": 2.0704, + "step": 11843 + }, + { + "epoch": 0.39, + "grad_norm": 0.7401798367500305, + "learning_rate": 1.3569965412319821e-05, + "loss": 2.1256, + "step": 11844 + }, + { + "epoch": 0.39, + "grad_norm": 0.7279854416847229, + "learning_rate": 1.3568972537799023e-05, + "loss": 2.0348, + "step": 11845 + }, + { + "epoch": 0.39, + "grad_norm": 0.7442956566810608, + "learning_rate": 1.3567979622958066e-05, + "loss": 2.0971, + "step": 11846 + }, + { + "epoch": 0.39, + "grad_norm": 0.7395093441009521, + "learning_rate": 1.356698666780817e-05, + "loss": 2.0695, + "step": 11847 + }, + { + "epoch": 0.39, + "grad_norm": 0.7076857686042786, + "learning_rate": 1.3565993672360545e-05, + "loss": 2.0809, + "step": 11848 + }, + { + "epoch": 0.39, + "grad_norm": 0.7494702339172363, + "learning_rate": 1.3565000636626414e-05, + "loss": 2.0849, + "step": 11849 + }, + { + "epoch": 0.39, + "grad_norm": 0.7329906821250916, + "learning_rate": 1.3564007560616994e-05, + "loss": 2.1433, + "step": 11850 + }, + { + "epoch": 0.39, + "grad_norm": 0.7262210845947266, + "learning_rate": 1.3563014444343508e-05, + "loss": 2.0718, + "step": 11851 + }, + { + "epoch": 0.39, + "grad_norm": 0.7258151769638062, + "learning_rate": 1.3562021287817171e-05, + "loss": 2.1178, + "step": 11852 + }, + { + "epoch": 0.39, + "grad_norm": 0.7193211317062378, + "learning_rate": 1.3561028091049209e-05, + "loss": 2.134, + "step": 11853 + }, + { + "epoch": 0.39, + "grad_norm": 0.7477589249610901, + "learning_rate": 1.3560034854050832e-05, + "loss": 2.1748, + "step": 11854 + }, + { + "epoch": 0.39, + "grad_norm": 0.7304595112800598, + "learning_rate": 1.3559041576833275e-05, + "loss": 2.1029, + "step": 11855 + }, + { + "epoch": 0.39, + "grad_norm": 0.7145698070526123, + "learning_rate": 1.3558048259407746e-05, + "loss": 2.1139, + "step": 11856 + }, + { + "epoch": 0.39, + "grad_norm": 0.7470434904098511, + "learning_rate": 1.3557054901785474e-05, + "loss": 2.0705, + "step": 11857 + }, + { + "epoch": 0.39, + "grad_norm": 0.7447194457054138, + "learning_rate": 1.3556061503977682e-05, + "loss": 2.141, + "step": 11858 + }, + { + "epoch": 0.39, + "grad_norm": 0.7773707509040833, + "learning_rate": 1.3555068065995592e-05, + "loss": 2.0955, + "step": 11859 + }, + { + "epoch": 0.39, + "grad_norm": 0.7411269545555115, + "learning_rate": 1.3554074587850423e-05, + "loss": 2.1382, + "step": 11860 + }, + { + "epoch": 0.39, + "grad_norm": 0.718757152557373, + "learning_rate": 1.3553081069553404e-05, + "loss": 2.131, + "step": 11861 + }, + { + "epoch": 0.39, + "grad_norm": 0.7610073685646057, + "learning_rate": 1.3552087511115758e-05, + "loss": 2.1214, + "step": 11862 + }, + { + "epoch": 0.39, + "grad_norm": 0.7159537076950073, + "learning_rate": 1.355109391254871e-05, + "loss": 2.086, + "step": 11863 + }, + { + "epoch": 0.39, + "grad_norm": 0.723526120185852, + "learning_rate": 1.3550100273863485e-05, + "loss": 2.1387, + "step": 11864 + }, + { + "epoch": 0.39, + "grad_norm": 0.7085689306259155, + "learning_rate": 1.3549106595071301e-05, + "loss": 2.1302, + "step": 11865 + }, + { + "epoch": 0.39, + "grad_norm": 0.7286121845245361, + "learning_rate": 1.3548112876183398e-05, + "loss": 2.0922, + "step": 11866 + }, + { + "epoch": 0.39, + "grad_norm": 0.7174853682518005, + "learning_rate": 1.3547119117210991e-05, + "loss": 2.1488, + "step": 11867 + }, + { + "epoch": 0.39, + "grad_norm": 0.7198792695999146, + "learning_rate": 1.3546125318165312e-05, + "loss": 2.0757, + "step": 11868 + }, + { + "epoch": 0.39, + "grad_norm": 0.7292694449424744, + "learning_rate": 1.3545131479057588e-05, + "loss": 2.0017, + "step": 11869 + }, + { + "epoch": 0.39, + "grad_norm": 0.7365686893463135, + "learning_rate": 1.3544137599899046e-05, + "loss": 2.1266, + "step": 11870 + }, + { + "epoch": 0.39, + "grad_norm": 0.7388505935668945, + "learning_rate": 1.3543143680700915e-05, + "loss": 2.1003, + "step": 11871 + }, + { + "epoch": 0.39, + "grad_norm": 0.7343797087669373, + "learning_rate": 1.3542149721474422e-05, + "loss": 2.0624, + "step": 11872 + }, + { + "epoch": 0.4, + "grad_norm": 0.7326977849006653, + "learning_rate": 1.3541155722230798e-05, + "loss": 2.1209, + "step": 11873 + }, + { + "epoch": 0.4, + "grad_norm": 0.7104066610336304, + "learning_rate": 1.3540161682981273e-05, + "loss": 2.0663, + "step": 11874 + }, + { + "epoch": 0.4, + "grad_norm": 0.7365987300872803, + "learning_rate": 1.3539167603737076e-05, + "loss": 2.1019, + "step": 11875 + }, + { + "epoch": 0.4, + "grad_norm": 0.7244292497634888, + "learning_rate": 1.3538173484509434e-05, + "loss": 2.1085, + "step": 11876 + }, + { + "epoch": 0.4, + "grad_norm": 0.7400307655334473, + "learning_rate": 1.3537179325309584e-05, + "loss": 2.1224, + "step": 11877 + }, + { + "epoch": 0.4, + "grad_norm": 0.7349416017532349, + "learning_rate": 1.3536185126148756e-05, + "loss": 2.1256, + "step": 11878 + }, + { + "epoch": 0.4, + "grad_norm": 0.7294501066207886, + "learning_rate": 1.3535190887038176e-05, + "loss": 2.1478, + "step": 11879 + }, + { + "epoch": 0.4, + "grad_norm": 0.6963375806808472, + "learning_rate": 1.3534196607989088e-05, + "loss": 2.1318, + "step": 11880 + }, + { + "epoch": 0.4, + "grad_norm": 0.7168729305267334, + "learning_rate": 1.3533202289012715e-05, + "loss": 2.1097, + "step": 11881 + }, + { + "epoch": 0.4, + "grad_norm": 0.7568763494491577, + "learning_rate": 1.3532207930120294e-05, + "loss": 2.1822, + "step": 11882 + }, + { + "epoch": 0.4, + "grad_norm": 0.7257993221282959, + "learning_rate": 1.3531213531323055e-05, + "loss": 2.0488, + "step": 11883 + }, + { + "epoch": 0.4, + "grad_norm": 0.754767119884491, + "learning_rate": 1.353021909263224e-05, + "loss": 2.149, + "step": 11884 + }, + { + "epoch": 0.4, + "grad_norm": 0.7038937211036682, + "learning_rate": 1.3529224614059073e-05, + "loss": 2.0851, + "step": 11885 + }, + { + "epoch": 0.4, + "grad_norm": 0.7204558253288269, + "learning_rate": 1.3528230095614801e-05, + "loss": 2.0242, + "step": 11886 + }, + { + "epoch": 0.4, + "grad_norm": 0.7038654088973999, + "learning_rate": 1.3527235537310646e-05, + "loss": 2.0865, + "step": 11887 + }, + { + "epoch": 0.4, + "grad_norm": 0.7367478013038635, + "learning_rate": 1.3526240939157855e-05, + "loss": 2.1776, + "step": 11888 + }, + { + "epoch": 0.4, + "grad_norm": 0.7200941443443298, + "learning_rate": 1.3525246301167663e-05, + "loss": 2.0742, + "step": 11889 + }, + { + "epoch": 0.4, + "grad_norm": 0.7723875045776367, + "learning_rate": 1.3524251623351297e-05, + "loss": 2.1452, + "step": 11890 + }, + { + "epoch": 0.4, + "grad_norm": 0.7163500785827637, + "learning_rate": 1.3523256905720007e-05, + "loss": 2.0615, + "step": 11891 + }, + { + "epoch": 0.4, + "grad_norm": 0.7262644171714783, + "learning_rate": 1.3522262148285025e-05, + "loss": 2.0837, + "step": 11892 + }, + { + "epoch": 0.4, + "grad_norm": 0.7281244993209839, + "learning_rate": 1.3521267351057588e-05, + "loss": 2.1059, + "step": 11893 + }, + { + "epoch": 0.4, + "grad_norm": 0.7241297960281372, + "learning_rate": 1.3520272514048937e-05, + "loss": 2.147, + "step": 11894 + }, + { + "epoch": 0.4, + "grad_norm": 0.7133578062057495, + "learning_rate": 1.351927763727031e-05, + "loss": 2.1647, + "step": 11895 + }, + { + "epoch": 0.4, + "grad_norm": 0.7305067777633667, + "learning_rate": 1.3518282720732947e-05, + "loss": 2.095, + "step": 11896 + }, + { + "epoch": 0.4, + "grad_norm": 0.7288134694099426, + "learning_rate": 1.351728776444809e-05, + "loss": 2.1072, + "step": 11897 + }, + { + "epoch": 0.4, + "grad_norm": 0.723741888999939, + "learning_rate": 1.3516292768426973e-05, + "loss": 2.0583, + "step": 11898 + }, + { + "epoch": 0.4, + "grad_norm": 0.7667611241340637, + "learning_rate": 1.351529773268084e-05, + "loss": 2.1075, + "step": 11899 + }, + { + "epoch": 0.4, + "grad_norm": 0.7788020372390747, + "learning_rate": 1.3514302657220939e-05, + "loss": 2.0886, + "step": 11900 + }, + { + "epoch": 0.4, + "grad_norm": 0.7150760293006897, + "learning_rate": 1.3513307542058503e-05, + "loss": 2.0834, + "step": 11901 + }, + { + "epoch": 0.4, + "grad_norm": 0.7030854821205139, + "learning_rate": 1.3512312387204778e-05, + "loss": 2.1086, + "step": 11902 + }, + { + "epoch": 0.4, + "grad_norm": 0.7624700665473938, + "learning_rate": 1.3511317192671005e-05, + "loss": 2.1323, + "step": 11903 + }, + { + "epoch": 0.4, + "grad_norm": 0.7344890832901001, + "learning_rate": 1.3510321958468428e-05, + "loss": 2.106, + "step": 11904 + }, + { + "epoch": 0.4, + "grad_norm": 0.7027426958084106, + "learning_rate": 1.3509326684608292e-05, + "loss": 2.1249, + "step": 11905 + }, + { + "epoch": 0.4, + "grad_norm": 0.7222638726234436, + "learning_rate": 1.350833137110184e-05, + "loss": 2.1572, + "step": 11906 + }, + { + "epoch": 0.4, + "grad_norm": 0.7451738715171814, + "learning_rate": 1.3507336017960316e-05, + "loss": 2.1378, + "step": 11907 + }, + { + "epoch": 0.4, + "grad_norm": 0.7626450061798096, + "learning_rate": 1.3506340625194967e-05, + "loss": 2.1169, + "step": 11908 + }, + { + "epoch": 0.4, + "grad_norm": 0.7147843837738037, + "learning_rate": 1.3505345192817032e-05, + "loss": 2.0305, + "step": 11909 + }, + { + "epoch": 0.4, + "grad_norm": 0.7145608067512512, + "learning_rate": 1.3504349720837762e-05, + "loss": 2.0765, + "step": 11910 + }, + { + "epoch": 0.4, + "grad_norm": 0.7192610502243042, + "learning_rate": 1.3503354209268407e-05, + "loss": 2.0684, + "step": 11911 + }, + { + "epoch": 0.4, + "grad_norm": 0.7346543669700623, + "learning_rate": 1.3502358658120205e-05, + "loss": 2.1268, + "step": 11912 + }, + { + "epoch": 0.4, + "grad_norm": 0.7392002940177917, + "learning_rate": 1.3501363067404412e-05, + "loss": 2.1106, + "step": 11913 + }, + { + "epoch": 0.4, + "grad_norm": 0.7130147814750671, + "learning_rate": 1.3500367437132264e-05, + "loss": 2.0244, + "step": 11914 + }, + { + "epoch": 0.4, + "grad_norm": 0.7438791990280151, + "learning_rate": 1.3499371767315021e-05, + "loss": 2.0854, + "step": 11915 + }, + { + "epoch": 0.4, + "grad_norm": 0.7050479054450989, + "learning_rate": 1.3498376057963927e-05, + "loss": 2.1197, + "step": 11916 + }, + { + "epoch": 0.4, + "grad_norm": 0.7010165452957153, + "learning_rate": 1.349738030909023e-05, + "loss": 2.1193, + "step": 11917 + }, + { + "epoch": 0.4, + "grad_norm": 0.7917079329490662, + "learning_rate": 1.349638452070518e-05, + "loss": 2.1634, + "step": 11918 + }, + { + "epoch": 0.4, + "grad_norm": 0.7302039265632629, + "learning_rate": 1.3495388692820023e-05, + "loss": 2.102, + "step": 11919 + }, + { + "epoch": 0.4, + "grad_norm": 0.6980583667755127, + "learning_rate": 1.3494392825446018e-05, + "loss": 2.0532, + "step": 11920 + }, + { + "epoch": 0.4, + "grad_norm": 0.7171124815940857, + "learning_rate": 1.349339691859441e-05, + "loss": 2.1219, + "step": 11921 + }, + { + "epoch": 0.4, + "grad_norm": 0.7243660092353821, + "learning_rate": 1.349240097227645e-05, + "loss": 2.0254, + "step": 11922 + }, + { + "epoch": 0.4, + "grad_norm": 0.7567772269248962, + "learning_rate": 1.3491404986503388e-05, + "loss": 2.0524, + "step": 11923 + }, + { + "epoch": 0.4, + "grad_norm": 0.6997441649436951, + "learning_rate": 1.3490408961286483e-05, + "loss": 2.1235, + "step": 11924 + }, + { + "epoch": 0.4, + "grad_norm": 0.7804303169250488, + "learning_rate": 1.348941289663698e-05, + "loss": 2.0649, + "step": 11925 + }, + { + "epoch": 0.4, + "grad_norm": 0.716002881526947, + "learning_rate": 1.3488416792566138e-05, + "loss": 2.1837, + "step": 11926 + }, + { + "epoch": 0.4, + "grad_norm": 0.7299790978431702, + "learning_rate": 1.3487420649085205e-05, + "loss": 2.1671, + "step": 11927 + }, + { + "epoch": 0.4, + "grad_norm": 0.7241437435150146, + "learning_rate": 1.3486424466205439e-05, + "loss": 2.1574, + "step": 11928 + }, + { + "epoch": 0.4, + "grad_norm": 0.7270132899284363, + "learning_rate": 1.3485428243938092e-05, + "loss": 2.1026, + "step": 11929 + }, + { + "epoch": 0.4, + "grad_norm": 0.7270392775535583, + "learning_rate": 1.348443198229442e-05, + "loss": 2.1644, + "step": 11930 + }, + { + "epoch": 0.4, + "grad_norm": 0.7319567799568176, + "learning_rate": 1.3483435681285675e-05, + "loss": 2.1369, + "step": 11931 + }, + { + "epoch": 0.4, + "grad_norm": 0.7070012092590332, + "learning_rate": 1.3482439340923118e-05, + "loss": 2.0983, + "step": 11932 + }, + { + "epoch": 0.4, + "grad_norm": 0.712510883808136, + "learning_rate": 1.3481442961218e-05, + "loss": 2.0888, + "step": 11933 + }, + { + "epoch": 0.4, + "grad_norm": 0.7225126028060913, + "learning_rate": 1.348044654218158e-05, + "loss": 2.0894, + "step": 11934 + }, + { + "epoch": 0.4, + "grad_norm": 0.7503625750541687, + "learning_rate": 1.3479450083825118e-05, + "loss": 2.1406, + "step": 11935 + }, + { + "epoch": 0.4, + "grad_norm": 0.7262725830078125, + "learning_rate": 1.3478453586159864e-05, + "loss": 2.1001, + "step": 11936 + }, + { + "epoch": 0.4, + "grad_norm": 0.7298868894577026, + "learning_rate": 1.3477457049197083e-05, + "loss": 2.122, + "step": 11937 + }, + { + "epoch": 0.4, + "grad_norm": 0.6974429488182068, + "learning_rate": 1.347646047294803e-05, + "loss": 2.0752, + "step": 11938 + }, + { + "epoch": 0.4, + "grad_norm": 0.7239841818809509, + "learning_rate": 1.3475463857423963e-05, + "loss": 2.1476, + "step": 11939 + }, + { + "epoch": 0.4, + "grad_norm": 0.7338855862617493, + "learning_rate": 1.3474467202636138e-05, + "loss": 2.0711, + "step": 11940 + }, + { + "epoch": 0.4, + "grad_norm": 0.7322626113891602, + "learning_rate": 1.3473470508595826e-05, + "loss": 1.9713, + "step": 11941 + }, + { + "epoch": 0.4, + "grad_norm": 0.6999083161354065, + "learning_rate": 1.3472473775314274e-05, + "loss": 2.0507, + "step": 11942 + }, + { + "epoch": 0.4, + "grad_norm": 0.7115495204925537, + "learning_rate": 1.3471477002802753e-05, + "loss": 2.059, + "step": 11943 + }, + { + "epoch": 0.4, + "grad_norm": 0.7371779084205627, + "learning_rate": 1.3470480191072518e-05, + "loss": 2.0217, + "step": 11944 + }, + { + "epoch": 0.4, + "grad_norm": 0.707308828830719, + "learning_rate": 1.346948334013483e-05, + "loss": 2.0485, + "step": 11945 + }, + { + "epoch": 0.4, + "grad_norm": 0.735703706741333, + "learning_rate": 1.3468486450000954e-05, + "loss": 2.1268, + "step": 11946 + }, + { + "epoch": 0.4, + "grad_norm": 0.7145293354988098, + "learning_rate": 1.3467489520682148e-05, + "loss": 2.0691, + "step": 11947 + }, + { + "epoch": 0.4, + "grad_norm": 0.7297286987304688, + "learning_rate": 1.3466492552189683e-05, + "loss": 2.1146, + "step": 11948 + }, + { + "epoch": 0.4, + "grad_norm": 0.7017959356307983, + "learning_rate": 1.3465495544534812e-05, + "loss": 2.0913, + "step": 11949 + }, + { + "epoch": 0.4, + "grad_norm": 0.7871559858322144, + "learning_rate": 1.3464498497728804e-05, + "loss": 2.1557, + "step": 11950 + }, + { + "epoch": 0.4, + "grad_norm": 0.7523760795593262, + "learning_rate": 1.3463501411782924e-05, + "loss": 2.1079, + "step": 11951 + }, + { + "epoch": 0.4, + "grad_norm": 0.7428274750709534, + "learning_rate": 1.3462504286708436e-05, + "loss": 2.1154, + "step": 11952 + }, + { + "epoch": 0.4, + "grad_norm": 0.7221001386642456, + "learning_rate": 1.3461507122516601e-05, + "loss": 2.1259, + "step": 11953 + }, + { + "epoch": 0.4, + "grad_norm": 0.7462368011474609, + "learning_rate": 1.3460509919218688e-05, + "loss": 2.1448, + "step": 11954 + }, + { + "epoch": 0.4, + "grad_norm": 0.7348706722259521, + "learning_rate": 1.345951267682596e-05, + "loss": 2.1367, + "step": 11955 + }, + { + "epoch": 0.4, + "grad_norm": 0.7168831825256348, + "learning_rate": 1.345851539534969e-05, + "loss": 2.1278, + "step": 11956 + }, + { + "epoch": 0.4, + "grad_norm": 0.7091690301895142, + "learning_rate": 1.345751807480114e-05, + "loss": 2.0625, + "step": 11957 + }, + { + "epoch": 0.4, + "grad_norm": 0.749040424823761, + "learning_rate": 1.345652071519157e-05, + "loss": 2.1614, + "step": 11958 + }, + { + "epoch": 0.4, + "grad_norm": 0.7181079387664795, + "learning_rate": 1.3455523316532262e-05, + "loss": 2.1376, + "step": 11959 + }, + { + "epoch": 0.4, + "grad_norm": 0.7624078392982483, + "learning_rate": 1.3454525878834472e-05, + "loss": 2.0957, + "step": 11960 + }, + { + "epoch": 0.4, + "grad_norm": 0.7558721303939819, + "learning_rate": 1.3453528402109475e-05, + "loss": 2.1243, + "step": 11961 + }, + { + "epoch": 0.4, + "grad_norm": 0.7175999283790588, + "learning_rate": 1.345253088636854e-05, + "loss": 2.1014, + "step": 11962 + }, + { + "epoch": 0.4, + "grad_norm": 0.7405567169189453, + "learning_rate": 1.3451533331622933e-05, + "loss": 2.1304, + "step": 11963 + }, + { + "epoch": 0.4, + "grad_norm": 0.747665524482727, + "learning_rate": 1.3450535737883923e-05, + "loss": 2.0849, + "step": 11964 + }, + { + "epoch": 0.4, + "grad_norm": 0.7248052954673767, + "learning_rate": 1.3449538105162786e-05, + "loss": 2.0836, + "step": 11965 + }, + { + "epoch": 0.4, + "grad_norm": 0.7160945534706116, + "learning_rate": 1.3448540433470784e-05, + "loss": 2.0646, + "step": 11966 + }, + { + "epoch": 0.4, + "grad_norm": 0.7389582395553589, + "learning_rate": 1.3447542722819197e-05, + "loss": 2.0902, + "step": 11967 + }, + { + "epoch": 0.4, + "grad_norm": 0.7161489129066467, + "learning_rate": 1.3446544973219294e-05, + "loss": 2.1447, + "step": 11968 + }, + { + "epoch": 0.4, + "grad_norm": 0.729282557964325, + "learning_rate": 1.3445547184682341e-05, + "loss": 2.0559, + "step": 11969 + }, + { + "epoch": 0.4, + "grad_norm": 0.7201874256134033, + "learning_rate": 1.344454935721962e-05, + "loss": 2.0604, + "step": 11970 + }, + { + "epoch": 0.4, + "grad_norm": 0.7166064381599426, + "learning_rate": 1.34435514908424e-05, + "loss": 2.0367, + "step": 11971 + }, + { + "epoch": 0.4, + "grad_norm": 0.7325381636619568, + "learning_rate": 1.3442553585561946e-05, + "loss": 2.1063, + "step": 11972 + }, + { + "epoch": 0.4, + "grad_norm": 0.7519670724868774, + "learning_rate": 1.3441555641389548e-05, + "loss": 2.1117, + "step": 11973 + }, + { + "epoch": 0.4, + "grad_norm": 0.7265070080757141, + "learning_rate": 1.3440557658336465e-05, + "loss": 2.117, + "step": 11974 + }, + { + "epoch": 0.4, + "grad_norm": 0.720403254032135, + "learning_rate": 1.3439559636413982e-05, + "loss": 1.9995, + "step": 11975 + }, + { + "epoch": 0.4, + "grad_norm": 0.7042388916015625, + "learning_rate": 1.3438561575633366e-05, + "loss": 2.0648, + "step": 11976 + }, + { + "epoch": 0.4, + "grad_norm": 0.7401152849197388, + "learning_rate": 1.34375634760059e-05, + "loss": 2.0547, + "step": 11977 + }, + { + "epoch": 0.4, + "grad_norm": 0.7344746589660645, + "learning_rate": 1.3436565337542855e-05, + "loss": 2.1087, + "step": 11978 + }, + { + "epoch": 0.4, + "grad_norm": 0.6882182359695435, + "learning_rate": 1.343556716025551e-05, + "loss": 2.0829, + "step": 11979 + }, + { + "epoch": 0.4, + "grad_norm": 0.7547155618667603, + "learning_rate": 1.3434568944155137e-05, + "loss": 2.0933, + "step": 11980 + }, + { + "epoch": 0.4, + "grad_norm": 0.7404521703720093, + "learning_rate": 1.3433570689253021e-05, + "loss": 2.1393, + "step": 11981 + }, + { + "epoch": 0.4, + "grad_norm": 0.7344704270362854, + "learning_rate": 1.3432572395560435e-05, + "loss": 2.1244, + "step": 11982 + }, + { + "epoch": 0.4, + "grad_norm": 0.736875593662262, + "learning_rate": 1.3431574063088655e-05, + "loss": 2.0785, + "step": 11983 + }, + { + "epoch": 0.4, + "grad_norm": 0.736531674861908, + "learning_rate": 1.3430575691848966e-05, + "loss": 2.1712, + "step": 11984 + }, + { + "epoch": 0.4, + "grad_norm": 0.732238233089447, + "learning_rate": 1.3429577281852639e-05, + "loss": 2.0907, + "step": 11985 + }, + { + "epoch": 0.4, + "grad_norm": 0.7236172556877136, + "learning_rate": 1.3428578833110962e-05, + "loss": 2.1248, + "step": 11986 + }, + { + "epoch": 0.4, + "grad_norm": 0.7297419905662537, + "learning_rate": 1.342758034563521e-05, + "loss": 2.0684, + "step": 11987 + }, + { + "epoch": 0.4, + "grad_norm": 0.7087078094482422, + "learning_rate": 1.3426581819436662e-05, + "loss": 2.1134, + "step": 11988 + }, + { + "epoch": 0.4, + "grad_norm": 0.7172901630401611, + "learning_rate": 1.34255832545266e-05, + "loss": 2.0963, + "step": 11989 + }, + { + "epoch": 0.4, + "grad_norm": 0.7438780665397644, + "learning_rate": 1.342458465091631e-05, + "loss": 2.1603, + "step": 11990 + }, + { + "epoch": 0.4, + "grad_norm": 0.6966897249221802, + "learning_rate": 1.3423586008617066e-05, + "loss": 2.144, + "step": 11991 + }, + { + "epoch": 0.4, + "grad_norm": 0.7477492094039917, + "learning_rate": 1.3422587327640156e-05, + "loss": 2.1948, + "step": 11992 + }, + { + "epoch": 0.4, + "grad_norm": 0.7102459669113159, + "learning_rate": 1.342158860799686e-05, + "loss": 2.044, + "step": 11993 + }, + { + "epoch": 0.4, + "grad_norm": 0.6995379328727722, + "learning_rate": 1.3420589849698458e-05, + "loss": 2.0633, + "step": 11994 + }, + { + "epoch": 0.4, + "grad_norm": 0.7036551833152771, + "learning_rate": 1.341959105275624e-05, + "loss": 2.1438, + "step": 11995 + }, + { + "epoch": 0.4, + "grad_norm": 0.7439143657684326, + "learning_rate": 1.3418592217181488e-05, + "loss": 2.1255, + "step": 11996 + }, + { + "epoch": 0.4, + "grad_norm": 0.7276679873466492, + "learning_rate": 1.341759334298548e-05, + "loss": 2.067, + "step": 11997 + }, + { + "epoch": 0.4, + "grad_norm": 0.7191269397735596, + "learning_rate": 1.3416594430179507e-05, + "loss": 2.1727, + "step": 11998 + }, + { + "epoch": 0.4, + "grad_norm": 0.7358136177062988, + "learning_rate": 1.3415595478774852e-05, + "loss": 2.0976, + "step": 11999 + }, + { + "epoch": 0.4, + "grad_norm": 0.7344049215316772, + "learning_rate": 1.3414596488782803e-05, + "loss": 2.0877, + "step": 12000 + }, + { + "epoch": 0.4, + "grad_norm": 0.7179331183433533, + "learning_rate": 1.3413597460214645e-05, + "loss": 2.0674, + "step": 12001 + }, + { + "epoch": 0.4, + "grad_norm": 0.7403781414031982, + "learning_rate": 1.3412598393081657e-05, + "loss": 2.0537, + "step": 12002 + }, + { + "epoch": 0.4, + "grad_norm": 0.7281312942504883, + "learning_rate": 1.3411599287395138e-05, + "loss": 2.0579, + "step": 12003 + }, + { + "epoch": 0.4, + "grad_norm": 0.7021412253379822, + "learning_rate": 1.341060014316637e-05, + "loss": 2.1481, + "step": 12004 + }, + { + "epoch": 0.4, + "grad_norm": 0.6905468106269836, + "learning_rate": 1.3409600960406636e-05, + "loss": 2.1128, + "step": 12005 + }, + { + "epoch": 0.4, + "grad_norm": 0.764636754989624, + "learning_rate": 1.3408601739127233e-05, + "loss": 2.0651, + "step": 12006 + }, + { + "epoch": 0.4, + "grad_norm": 0.7115615606307983, + "learning_rate": 1.3407602479339444e-05, + "loss": 2.1367, + "step": 12007 + }, + { + "epoch": 0.4, + "grad_norm": 0.7363144755363464, + "learning_rate": 1.3406603181054557e-05, + "loss": 2.1003, + "step": 12008 + }, + { + "epoch": 0.4, + "grad_norm": 0.7762720584869385, + "learning_rate": 1.3405603844283865e-05, + "loss": 2.191, + "step": 12009 + }, + { + "epoch": 0.4, + "grad_norm": 0.7243630886077881, + "learning_rate": 1.3404604469038658e-05, + "loss": 2.1183, + "step": 12010 + }, + { + "epoch": 0.4, + "grad_norm": 0.7665592432022095, + "learning_rate": 1.3403605055330225e-05, + "loss": 2.0819, + "step": 12011 + }, + { + "epoch": 0.4, + "grad_norm": 0.7728646397590637, + "learning_rate": 1.3402605603169856e-05, + "loss": 2.1356, + "step": 12012 + }, + { + "epoch": 0.4, + "grad_norm": 0.7162576913833618, + "learning_rate": 1.3401606112568841e-05, + "loss": 2.0983, + "step": 12013 + }, + { + "epoch": 0.4, + "grad_norm": 0.7321494221687317, + "learning_rate": 1.3400606583538477e-05, + "loss": 2.1502, + "step": 12014 + }, + { + "epoch": 0.4, + "grad_norm": 0.7294401526451111, + "learning_rate": 1.3399607016090049e-05, + "loss": 2.1794, + "step": 12015 + }, + { + "epoch": 0.4, + "grad_norm": 0.7285364270210266, + "learning_rate": 1.3398607410234856e-05, + "loss": 2.0643, + "step": 12016 + }, + { + "epoch": 0.4, + "grad_norm": 0.7331371903419495, + "learning_rate": 1.339760776598419e-05, + "loss": 2.1587, + "step": 12017 + }, + { + "epoch": 0.4, + "grad_norm": 0.7316774129867554, + "learning_rate": 1.3396608083349338e-05, + "loss": 2.089, + "step": 12018 + }, + { + "epoch": 0.4, + "grad_norm": 0.7160951495170593, + "learning_rate": 1.3395608362341604e-05, + "loss": 1.9923, + "step": 12019 + }, + { + "epoch": 0.4, + "grad_norm": 0.7584801316261292, + "learning_rate": 1.3394608602972273e-05, + "loss": 2.1518, + "step": 12020 + }, + { + "epoch": 0.4, + "grad_norm": 0.731639564037323, + "learning_rate": 1.3393608805252642e-05, + "loss": 2.1022, + "step": 12021 + }, + { + "epoch": 0.4, + "grad_norm": 0.7300428748130798, + "learning_rate": 1.339260896919401e-05, + "loss": 2.0602, + "step": 12022 + }, + { + "epoch": 0.4, + "grad_norm": 0.7085554003715515, + "learning_rate": 1.339160909480767e-05, + "loss": 2.1673, + "step": 12023 + }, + { + "epoch": 0.4, + "grad_norm": 0.7818900942802429, + "learning_rate": 1.339060918210492e-05, + "loss": 2.0619, + "step": 12024 + }, + { + "epoch": 0.4, + "grad_norm": 0.718970000743866, + "learning_rate": 1.3389609231097049e-05, + "loss": 2.0802, + "step": 12025 + }, + { + "epoch": 0.4, + "grad_norm": 0.700148344039917, + "learning_rate": 1.3388609241795366e-05, + "loss": 2.0414, + "step": 12026 + }, + { + "epoch": 0.4, + "grad_norm": 0.7287668585777283, + "learning_rate": 1.3387609214211156e-05, + "loss": 2.1654, + "step": 12027 + }, + { + "epoch": 0.4, + "grad_norm": 0.7149532437324524, + "learning_rate": 1.3386609148355726e-05, + "loss": 2.0606, + "step": 12028 + }, + { + "epoch": 0.4, + "grad_norm": 0.7514705657958984, + "learning_rate": 1.3385609044240369e-05, + "loss": 2.0765, + "step": 12029 + }, + { + "epoch": 0.4, + "grad_norm": 0.7484687566757202, + "learning_rate": 1.3384608901876388e-05, + "loss": 2.2026, + "step": 12030 + }, + { + "epoch": 0.4, + "grad_norm": 0.726549506187439, + "learning_rate": 1.3383608721275077e-05, + "loss": 2.141, + "step": 12031 + }, + { + "epoch": 0.4, + "grad_norm": 0.7661895751953125, + "learning_rate": 1.3382608502447738e-05, + "loss": 2.0832, + "step": 12032 + }, + { + "epoch": 0.4, + "grad_norm": 0.7306628227233887, + "learning_rate": 1.3381608245405668e-05, + "loss": 2.1097, + "step": 12033 + }, + { + "epoch": 0.4, + "grad_norm": 0.7323367595672607, + "learning_rate": 1.3380607950160174e-05, + "loss": 2.0683, + "step": 12034 + }, + { + "epoch": 0.4, + "grad_norm": 0.7466384172439575, + "learning_rate": 1.337960761672255e-05, + "loss": 2.041, + "step": 12035 + }, + { + "epoch": 0.4, + "grad_norm": 0.7382234930992126, + "learning_rate": 1.3378607245104101e-05, + "loss": 2.1145, + "step": 12036 + }, + { + "epoch": 0.4, + "grad_norm": 0.7467278838157654, + "learning_rate": 1.3377606835316125e-05, + "loss": 2.1153, + "step": 12037 + }, + { + "epoch": 0.4, + "grad_norm": 0.7356166243553162, + "learning_rate": 1.3376606387369928e-05, + "loss": 1.9944, + "step": 12038 + }, + { + "epoch": 0.4, + "grad_norm": 0.7498576045036316, + "learning_rate": 1.3375605901276813e-05, + "loss": 2.1375, + "step": 12039 + }, + { + "epoch": 0.4, + "grad_norm": 0.7336161732673645, + "learning_rate": 1.3374605377048078e-05, + "loss": 2.0669, + "step": 12040 + }, + { + "epoch": 0.4, + "grad_norm": 0.7627463340759277, + "learning_rate": 1.3373604814695033e-05, + "loss": 2.1358, + "step": 12041 + }, + { + "epoch": 0.4, + "grad_norm": 0.715947687625885, + "learning_rate": 1.3372604214228975e-05, + "loss": 2.0842, + "step": 12042 + }, + { + "epoch": 0.4, + "grad_norm": 0.7084338068962097, + "learning_rate": 1.3371603575661212e-05, + "loss": 2.1022, + "step": 12043 + }, + { + "epoch": 0.4, + "grad_norm": 0.7530382871627808, + "learning_rate": 1.3370602899003044e-05, + "loss": 2.1372, + "step": 12044 + }, + { + "epoch": 0.4, + "grad_norm": 0.7993972301483154, + "learning_rate": 1.3369602184265784e-05, + "loss": 2.1295, + "step": 12045 + }, + { + "epoch": 0.4, + "grad_norm": 0.7330203652381897, + "learning_rate": 1.3368601431460733e-05, + "loss": 2.1008, + "step": 12046 + }, + { + "epoch": 0.4, + "grad_norm": 0.7318942546844482, + "learning_rate": 1.3367600640599196e-05, + "loss": 2.0997, + "step": 12047 + }, + { + "epoch": 0.4, + "grad_norm": 0.7715089917182922, + "learning_rate": 1.336659981169248e-05, + "loss": 2.1062, + "step": 12048 + }, + { + "epoch": 0.4, + "grad_norm": 0.7368652820587158, + "learning_rate": 1.3365598944751893e-05, + "loss": 2.0819, + "step": 12049 + }, + { + "epoch": 0.4, + "grad_norm": 0.7263789772987366, + "learning_rate": 1.3364598039788743e-05, + "loss": 2.0323, + "step": 12050 + }, + { + "epoch": 0.4, + "grad_norm": 0.7159261703491211, + "learning_rate": 1.3363597096814333e-05, + "loss": 2.1078, + "step": 12051 + }, + { + "epoch": 0.4, + "grad_norm": 0.7335701584815979, + "learning_rate": 1.3362596115839979e-05, + "loss": 2.0681, + "step": 12052 + }, + { + "epoch": 0.4, + "grad_norm": 0.7015700340270996, + "learning_rate": 1.3361595096876983e-05, + "loss": 2.0875, + "step": 12053 + }, + { + "epoch": 0.4, + "grad_norm": 0.704871654510498, + "learning_rate": 1.3360594039936654e-05, + "loss": 2.1055, + "step": 12054 + }, + { + "epoch": 0.4, + "grad_norm": 0.7387040853500366, + "learning_rate": 1.3359592945030304e-05, + "loss": 2.0598, + "step": 12055 + }, + { + "epoch": 0.4, + "grad_norm": 0.708371639251709, + "learning_rate": 1.3358591812169242e-05, + "loss": 2.1104, + "step": 12056 + }, + { + "epoch": 0.4, + "grad_norm": 0.7675510048866272, + "learning_rate": 1.3357590641364777e-05, + "loss": 2.1269, + "step": 12057 + }, + { + "epoch": 0.4, + "grad_norm": 0.7276513576507568, + "learning_rate": 1.3356589432628222e-05, + "loss": 2.0552, + "step": 12058 + }, + { + "epoch": 0.4, + "grad_norm": 0.7227287292480469, + "learning_rate": 1.3355588185970885e-05, + "loss": 2.1679, + "step": 12059 + }, + { + "epoch": 0.4, + "grad_norm": 0.7557696104049683, + "learning_rate": 1.3354586901404081e-05, + "loss": 2.1392, + "step": 12060 + }, + { + "epoch": 0.4, + "grad_norm": 0.7497695088386536, + "learning_rate": 1.3353585578939121e-05, + "loss": 2.1092, + "step": 12061 + }, + { + "epoch": 0.4, + "grad_norm": 0.742592453956604, + "learning_rate": 1.3352584218587312e-05, + "loss": 2.0385, + "step": 12062 + }, + { + "epoch": 0.4, + "grad_norm": 0.6988925337791443, + "learning_rate": 1.3351582820359976e-05, + "loss": 2.0614, + "step": 12063 + }, + { + "epoch": 0.4, + "grad_norm": 0.7404292225837708, + "learning_rate": 1.335058138426842e-05, + "loss": 2.0912, + "step": 12064 + }, + { + "epoch": 0.4, + "grad_norm": 0.7507045269012451, + "learning_rate": 1.3349579910323958e-05, + "loss": 2.0967, + "step": 12065 + }, + { + "epoch": 0.4, + "grad_norm": 0.7688963413238525, + "learning_rate": 1.3348578398537905e-05, + "loss": 2.1536, + "step": 12066 + }, + { + "epoch": 0.4, + "grad_norm": 0.7715846300125122, + "learning_rate": 1.334757684892158e-05, + "loss": 2.0161, + "step": 12067 + }, + { + "epoch": 0.4, + "grad_norm": 0.7235500812530518, + "learning_rate": 1.334657526148629e-05, + "loss": 2.0715, + "step": 12068 + }, + { + "epoch": 0.4, + "grad_norm": 0.734069287776947, + "learning_rate": 1.3345573636243355e-05, + "loss": 2.1677, + "step": 12069 + }, + { + "epoch": 0.4, + "grad_norm": 0.7506744861602783, + "learning_rate": 1.3344571973204088e-05, + "loss": 2.1195, + "step": 12070 + }, + { + "epoch": 0.4, + "grad_norm": 0.7169836163520813, + "learning_rate": 1.3343570272379807e-05, + "loss": 2.1327, + "step": 12071 + }, + { + "epoch": 0.4, + "grad_norm": 0.7448582649230957, + "learning_rate": 1.3342568533781833e-05, + "loss": 2.0265, + "step": 12072 + }, + { + "epoch": 0.4, + "grad_norm": 0.7182772159576416, + "learning_rate": 1.3341566757421474e-05, + "loss": 2.0622, + "step": 12073 + }, + { + "epoch": 0.4, + "grad_norm": 0.7488613128662109, + "learning_rate": 1.3340564943310055e-05, + "loss": 2.0701, + "step": 12074 + }, + { + "epoch": 0.4, + "grad_norm": 0.7862052321434021, + "learning_rate": 1.333956309145889e-05, + "loss": 2.0725, + "step": 12075 + }, + { + "epoch": 0.4, + "grad_norm": 0.7152290940284729, + "learning_rate": 1.3338561201879295e-05, + "loss": 2.0119, + "step": 12076 + }, + { + "epoch": 0.4, + "grad_norm": 0.7075240612030029, + "learning_rate": 1.3337559274582596e-05, + "loss": 2.0566, + "step": 12077 + }, + { + "epoch": 0.4, + "grad_norm": 0.7444201111793518, + "learning_rate": 1.333655730958011e-05, + "loss": 2.1379, + "step": 12078 + }, + { + "epoch": 0.4, + "grad_norm": 0.7348806262016296, + "learning_rate": 1.333555530688315e-05, + "loss": 2.1088, + "step": 12079 + }, + { + "epoch": 0.4, + "grad_norm": 0.757260799407959, + "learning_rate": 1.3334553266503043e-05, + "loss": 2.1445, + "step": 12080 + }, + { + "epoch": 0.4, + "grad_norm": 0.7476964592933655, + "learning_rate": 1.3333551188451109e-05, + "loss": 2.1272, + "step": 12081 + }, + { + "epoch": 0.4, + "grad_norm": 0.7153235077857971, + "learning_rate": 1.3332549072738665e-05, + "loss": 2.042, + "step": 12082 + }, + { + "epoch": 0.4, + "grad_norm": 0.718843936920166, + "learning_rate": 1.3331546919377039e-05, + "loss": 2.0495, + "step": 12083 + }, + { + "epoch": 0.4, + "grad_norm": 0.7286083698272705, + "learning_rate": 1.333054472837754e-05, + "loss": 2.1144, + "step": 12084 + }, + { + "epoch": 0.4, + "grad_norm": 0.7480261325836182, + "learning_rate": 1.3329542499751507e-05, + "loss": 2.0668, + "step": 12085 + }, + { + "epoch": 0.4, + "grad_norm": 0.7608886361122131, + "learning_rate": 1.332854023351025e-05, + "loss": 2.0802, + "step": 12086 + }, + { + "epoch": 0.4, + "grad_norm": 0.7223628759384155, + "learning_rate": 1.3327537929665096e-05, + "loss": 2.0372, + "step": 12087 + }, + { + "epoch": 0.4, + "grad_norm": 0.7423355579376221, + "learning_rate": 1.3326535588227371e-05, + "loss": 2.0686, + "step": 12088 + }, + { + "epoch": 0.4, + "grad_norm": 0.7143242359161377, + "learning_rate": 1.3325533209208395e-05, + "loss": 2.0487, + "step": 12089 + }, + { + "epoch": 0.4, + "grad_norm": 0.729502260684967, + "learning_rate": 1.3324530792619494e-05, + "loss": 2.1068, + "step": 12090 + }, + { + "epoch": 0.4, + "grad_norm": 0.724786102771759, + "learning_rate": 1.3323528338471991e-05, + "loss": 2.1124, + "step": 12091 + }, + { + "epoch": 0.4, + "grad_norm": 0.733322262763977, + "learning_rate": 1.3322525846777214e-05, + "loss": 2.0895, + "step": 12092 + }, + { + "epoch": 0.4, + "grad_norm": 0.7134036421775818, + "learning_rate": 1.3321523317546488e-05, + "loss": 2.0739, + "step": 12093 + }, + { + "epoch": 0.4, + "grad_norm": 0.7441741228103638, + "learning_rate": 1.332052075079114e-05, + "loss": 2.0501, + "step": 12094 + }, + { + "epoch": 0.4, + "grad_norm": 0.7421995401382446, + "learning_rate": 1.331951814652249e-05, + "loss": 2.1202, + "step": 12095 + }, + { + "epoch": 0.4, + "grad_norm": 0.7337427139282227, + "learning_rate": 1.331851550475187e-05, + "loss": 2.116, + "step": 12096 + }, + { + "epoch": 0.4, + "grad_norm": 0.7612642049789429, + "learning_rate": 1.3317512825490608e-05, + "loss": 2.1247, + "step": 12097 + }, + { + "epoch": 0.4, + "grad_norm": 0.7377074956893921, + "learning_rate": 1.331651010875003e-05, + "loss": 2.1202, + "step": 12098 + }, + { + "epoch": 0.4, + "grad_norm": 0.7289406061172485, + "learning_rate": 1.3315507354541467e-05, + "loss": 2.0346, + "step": 12099 + }, + { + "epoch": 0.4, + "grad_norm": 0.7488502860069275, + "learning_rate": 1.331450456287624e-05, + "loss": 2.0708, + "step": 12100 + }, + { + "epoch": 0.4, + "grad_norm": 0.7228421568870544, + "learning_rate": 1.3313501733765687e-05, + "loss": 2.0734, + "step": 12101 + }, + { + "epoch": 0.4, + "grad_norm": 0.7770979404449463, + "learning_rate": 1.3312498867221133e-05, + "loss": 2.1616, + "step": 12102 + }, + { + "epoch": 0.4, + "grad_norm": 0.7226687669754028, + "learning_rate": 1.3311495963253907e-05, + "loss": 2.0697, + "step": 12103 + }, + { + "epoch": 0.4, + "grad_norm": 0.7017569541931152, + "learning_rate": 1.3310493021875339e-05, + "loss": 2.0903, + "step": 12104 + }, + { + "epoch": 0.4, + "grad_norm": 0.7225120067596436, + "learning_rate": 1.3309490043096765e-05, + "loss": 1.9901, + "step": 12105 + }, + { + "epoch": 0.4, + "grad_norm": 0.7058452367782593, + "learning_rate": 1.3308487026929506e-05, + "loss": 2.0884, + "step": 12106 + }, + { + "epoch": 0.4, + "grad_norm": 0.7359464168548584, + "learning_rate": 1.3307483973384902e-05, + "loss": 2.1107, + "step": 12107 + }, + { + "epoch": 0.4, + "grad_norm": 0.7240065932273865, + "learning_rate": 1.3306480882474287e-05, + "loss": 2.1334, + "step": 12108 + }, + { + "epoch": 0.4, + "grad_norm": 0.7140375375747681, + "learning_rate": 1.3305477754208982e-05, + "loss": 2.1181, + "step": 12109 + }, + { + "epoch": 0.4, + "grad_norm": 0.7515427470207214, + "learning_rate": 1.3304474588600332e-05, + "loss": 2.1244, + "step": 12110 + }, + { + "epoch": 0.4, + "grad_norm": 0.7220250368118286, + "learning_rate": 1.330347138565966e-05, + "loss": 2.0481, + "step": 12111 + }, + { + "epoch": 0.4, + "grad_norm": 0.7371188402175903, + "learning_rate": 1.3302468145398309e-05, + "loss": 2.0658, + "step": 12112 + }, + { + "epoch": 0.4, + "grad_norm": 0.7189971804618835, + "learning_rate": 1.3301464867827606e-05, + "loss": 2.0647, + "step": 12113 + }, + { + "epoch": 0.4, + "grad_norm": 0.7000138163566589, + "learning_rate": 1.3300461552958887e-05, + "loss": 2.0953, + "step": 12114 + }, + { + "epoch": 0.4, + "grad_norm": 0.7683405876159668, + "learning_rate": 1.329945820080349e-05, + "loss": 2.0594, + "step": 12115 + }, + { + "epoch": 0.4, + "grad_norm": 0.7733820080757141, + "learning_rate": 1.3298454811372746e-05, + "loss": 2.1801, + "step": 12116 + }, + { + "epoch": 0.4, + "grad_norm": 0.7567668557167053, + "learning_rate": 1.329745138467799e-05, + "loss": 2.1038, + "step": 12117 + }, + { + "epoch": 0.4, + "grad_norm": 0.7354212999343872, + "learning_rate": 1.3296447920730566e-05, + "loss": 2.0977, + "step": 12118 + }, + { + "epoch": 0.4, + "grad_norm": 0.7005856037139893, + "learning_rate": 1.3295444419541804e-05, + "loss": 2.0696, + "step": 12119 + }, + { + "epoch": 0.4, + "grad_norm": 0.7595085501670837, + "learning_rate": 1.3294440881123039e-05, + "loss": 2.0424, + "step": 12120 + }, + { + "epoch": 0.4, + "grad_norm": 0.7395510077476501, + "learning_rate": 1.3293437305485617e-05, + "loss": 2.0878, + "step": 12121 + }, + { + "epoch": 0.4, + "grad_norm": 0.7587229013442993, + "learning_rate": 1.3292433692640864e-05, + "loss": 2.1559, + "step": 12122 + }, + { + "epoch": 0.4, + "grad_norm": 0.7649913430213928, + "learning_rate": 1.3291430042600131e-05, + "loss": 2.157, + "step": 12123 + }, + { + "epoch": 0.4, + "grad_norm": 0.7135988473892212, + "learning_rate": 1.3290426355374747e-05, + "loss": 2.0855, + "step": 12124 + }, + { + "epoch": 0.4, + "grad_norm": 0.7831515073776245, + "learning_rate": 1.3289422630976054e-05, + "loss": 2.1327, + "step": 12125 + }, + { + "epoch": 0.4, + "grad_norm": 0.7143268585205078, + "learning_rate": 1.3288418869415394e-05, + "loss": 2.0556, + "step": 12126 + }, + { + "epoch": 0.4, + "grad_norm": 0.757780909538269, + "learning_rate": 1.3287415070704102e-05, + "loss": 2.0898, + "step": 12127 + }, + { + "epoch": 0.4, + "grad_norm": 0.7206072211265564, + "learning_rate": 1.3286411234853524e-05, + "loss": 2.1595, + "step": 12128 + }, + { + "epoch": 0.4, + "grad_norm": 0.758520781993866, + "learning_rate": 1.3285407361874995e-05, + "loss": 2.1164, + "step": 12129 + }, + { + "epoch": 0.4, + "grad_norm": 0.7994642853736877, + "learning_rate": 1.3284403451779862e-05, + "loss": 2.1227, + "step": 12130 + }, + { + "epoch": 0.4, + "grad_norm": 0.7674486637115479, + "learning_rate": 1.3283399504579462e-05, + "loss": 2.1092, + "step": 12131 + }, + { + "epoch": 0.4, + "grad_norm": 0.730057954788208, + "learning_rate": 1.3282395520285141e-05, + "loss": 2.1024, + "step": 12132 + }, + { + "epoch": 0.4, + "grad_norm": 0.7449609041213989, + "learning_rate": 1.3281391498908235e-05, + "loss": 2.0773, + "step": 12133 + }, + { + "epoch": 0.4, + "grad_norm": 0.7412946224212646, + "learning_rate": 1.3280387440460094e-05, + "loss": 2.0648, + "step": 12134 + }, + { + "epoch": 0.4, + "grad_norm": 0.7678388953208923, + "learning_rate": 1.3279383344952057e-05, + "loss": 2.119, + "step": 12135 + }, + { + "epoch": 0.4, + "grad_norm": 0.7435850501060486, + "learning_rate": 1.3278379212395469e-05, + "loss": 2.1387, + "step": 12136 + }, + { + "epoch": 0.4, + "grad_norm": 0.7180525660514832, + "learning_rate": 1.3277375042801675e-05, + "loss": 2.0609, + "step": 12137 + }, + { + "epoch": 0.4, + "grad_norm": 0.7182612419128418, + "learning_rate": 1.3276370836182018e-05, + "loss": 2.0757, + "step": 12138 + }, + { + "epoch": 0.4, + "grad_norm": 0.738795280456543, + "learning_rate": 1.3275366592547843e-05, + "loss": 2.1215, + "step": 12139 + }, + { + "epoch": 0.4, + "grad_norm": 0.740969717502594, + "learning_rate": 1.3274362311910497e-05, + "loss": 2.081, + "step": 12140 + }, + { + "epoch": 0.4, + "grad_norm": 0.7093883752822876, + "learning_rate": 1.3273357994281322e-05, + "loss": 2.0255, + "step": 12141 + }, + { + "epoch": 0.4, + "grad_norm": 0.7246757745742798, + "learning_rate": 1.3272353639671667e-05, + "loss": 2.0168, + "step": 12142 + }, + { + "epoch": 0.4, + "grad_norm": 0.7177174091339111, + "learning_rate": 1.3271349248092882e-05, + "loss": 2.0693, + "step": 12143 + }, + { + "epoch": 0.4, + "grad_norm": 0.7357671856880188, + "learning_rate": 1.3270344819556304e-05, + "loss": 2.0559, + "step": 12144 + }, + { + "epoch": 0.4, + "grad_norm": 0.7713660001754761, + "learning_rate": 1.3269340354073295e-05, + "loss": 2.0531, + "step": 12145 + }, + { + "epoch": 0.4, + "grad_norm": 0.7783529162406921, + "learning_rate": 1.326833585165519e-05, + "loss": 2.1948, + "step": 12146 + }, + { + "epoch": 0.4, + "grad_norm": 0.7201218008995056, + "learning_rate": 1.326733131231334e-05, + "loss": 2.1798, + "step": 12147 + }, + { + "epoch": 0.4, + "grad_norm": 0.771898627281189, + "learning_rate": 1.3266326736059098e-05, + "loss": 2.1223, + "step": 12148 + }, + { + "epoch": 0.4, + "grad_norm": 0.7134940028190613, + "learning_rate": 1.3265322122903812e-05, + "loss": 2.0542, + "step": 12149 + }, + { + "epoch": 0.4, + "grad_norm": 0.7506200075149536, + "learning_rate": 1.326431747285883e-05, + "loss": 2.1791, + "step": 12150 + }, + { + "epoch": 0.4, + "grad_norm": 0.7342522144317627, + "learning_rate": 1.32633127859355e-05, + "loss": 2.0727, + "step": 12151 + }, + { + "epoch": 0.4, + "grad_norm": 0.7490595579147339, + "learning_rate": 1.3262308062145178e-05, + "loss": 2.0272, + "step": 12152 + }, + { + "epoch": 0.4, + "grad_norm": 0.7310972809791565, + "learning_rate": 1.3261303301499209e-05, + "loss": 2.0318, + "step": 12153 + }, + { + "epoch": 0.4, + "grad_norm": 0.7424598336219788, + "learning_rate": 1.3260298504008948e-05, + "loss": 2.1796, + "step": 12154 + }, + { + "epoch": 0.4, + "grad_norm": 0.7339668869972229, + "learning_rate": 1.3259293669685742e-05, + "loss": 2.1373, + "step": 12155 + }, + { + "epoch": 0.4, + "grad_norm": 0.7330799698829651, + "learning_rate": 1.325828879854095e-05, + "loss": 2.0477, + "step": 12156 + }, + { + "epoch": 0.4, + "grad_norm": 0.7548856735229492, + "learning_rate": 1.3257283890585919e-05, + "loss": 2.1257, + "step": 12157 + }, + { + "epoch": 0.4, + "grad_norm": 0.7211975455284119, + "learning_rate": 1.3256278945832004e-05, + "loss": 2.0301, + "step": 12158 + }, + { + "epoch": 0.4, + "grad_norm": 0.7330728769302368, + "learning_rate": 1.3255273964290559e-05, + "loss": 2.0662, + "step": 12159 + }, + { + "epoch": 0.4, + "grad_norm": 0.7182918190956116, + "learning_rate": 1.3254268945972935e-05, + "loss": 2.0601, + "step": 12160 + }, + { + "epoch": 0.4, + "grad_norm": 0.7329309582710266, + "learning_rate": 1.3253263890890486e-05, + "loss": 2.144, + "step": 12161 + }, + { + "epoch": 0.4, + "grad_norm": 0.7323415875434875, + "learning_rate": 1.3252258799054572e-05, + "loss": 2.1493, + "step": 12162 + }, + { + "epoch": 0.4, + "grad_norm": 0.7347413897514343, + "learning_rate": 1.325125367047654e-05, + "loss": 2.0207, + "step": 12163 + }, + { + "epoch": 0.4, + "grad_norm": 0.7081376910209656, + "learning_rate": 1.3250248505167753e-05, + "loss": 2.161, + "step": 12164 + }, + { + "epoch": 0.4, + "grad_norm": 0.7458366751670837, + "learning_rate": 1.3249243303139562e-05, + "loss": 2.0663, + "step": 12165 + }, + { + "epoch": 0.4, + "grad_norm": 0.7380081415176392, + "learning_rate": 1.3248238064403322e-05, + "loss": 2.149, + "step": 12166 + }, + { + "epoch": 0.4, + "grad_norm": 0.739449143409729, + "learning_rate": 1.3247232788970397e-05, + "loss": 2.0985, + "step": 12167 + }, + { + "epoch": 0.4, + "grad_norm": 0.737318754196167, + "learning_rate": 1.3246227476852135e-05, + "loss": 2.1306, + "step": 12168 + }, + { + "epoch": 0.4, + "grad_norm": 0.7357999682426453, + "learning_rate": 1.3245222128059898e-05, + "loss": 2.1393, + "step": 12169 + }, + { + "epoch": 0.4, + "grad_norm": 0.7420468926429749, + "learning_rate": 1.3244216742605045e-05, + "loss": 2.1582, + "step": 12170 + }, + { + "epoch": 0.4, + "grad_norm": 0.7335938811302185, + "learning_rate": 1.3243211320498931e-05, + "loss": 2.0893, + "step": 12171 + }, + { + "epoch": 0.4, + "grad_norm": 0.7411210536956787, + "learning_rate": 1.3242205861752917e-05, + "loss": 2.0188, + "step": 12172 + }, + { + "epoch": 0.4, + "grad_norm": 0.734242856502533, + "learning_rate": 1.3241200366378361e-05, + "loss": 2.0853, + "step": 12173 + }, + { + "epoch": 0.41, + "grad_norm": 0.761043131351471, + "learning_rate": 1.3240194834386624e-05, + "loss": 2.1214, + "step": 12174 + }, + { + "epoch": 0.41, + "grad_norm": 0.7509834170341492, + "learning_rate": 1.3239189265789063e-05, + "loss": 2.1148, + "step": 12175 + }, + { + "epoch": 0.41, + "grad_norm": 0.699530303478241, + "learning_rate": 1.3238183660597042e-05, + "loss": 2.1627, + "step": 12176 + }, + { + "epoch": 0.41, + "grad_norm": 0.7220861911773682, + "learning_rate": 1.3237178018821916e-05, + "loss": 2.0795, + "step": 12177 + }, + { + "epoch": 0.41, + "grad_norm": 0.7164317965507507, + "learning_rate": 1.3236172340475055e-05, + "loss": 2.0654, + "step": 12178 + }, + { + "epoch": 0.41, + "grad_norm": 0.7442899346351624, + "learning_rate": 1.3235166625567813e-05, + "loss": 2.1254, + "step": 12179 + }, + { + "epoch": 0.41, + "grad_norm": 0.7266655564308167, + "learning_rate": 1.323416087411155e-05, + "loss": 2.1311, + "step": 12180 + }, + { + "epoch": 0.41, + "grad_norm": 0.7420675158500671, + "learning_rate": 1.3233155086117639e-05, + "loss": 2.1089, + "step": 12181 + }, + { + "epoch": 0.41, + "grad_norm": 0.7587383389472961, + "learning_rate": 1.3232149261597436e-05, + "loss": 2.1242, + "step": 12182 + }, + { + "epoch": 0.41, + "grad_norm": 0.7147238254547119, + "learning_rate": 1.3231143400562303e-05, + "loss": 2.1317, + "step": 12183 + }, + { + "epoch": 0.41, + "grad_norm": 0.7093467116355896, + "learning_rate": 1.3230137503023606e-05, + "loss": 2.0587, + "step": 12184 + }, + { + "epoch": 0.41, + "grad_norm": 0.7572020292282104, + "learning_rate": 1.3229131568992708e-05, + "loss": 2.1475, + "step": 12185 + }, + { + "epoch": 0.41, + "grad_norm": 0.7225651741027832, + "learning_rate": 1.3228125598480974e-05, + "loss": 2.0567, + "step": 12186 + }, + { + "epoch": 0.41, + "grad_norm": 0.7124921679496765, + "learning_rate": 1.3227119591499771e-05, + "loss": 2.0968, + "step": 12187 + }, + { + "epoch": 0.41, + "grad_norm": 0.7775607705116272, + "learning_rate": 1.3226113548060457e-05, + "loss": 2.1322, + "step": 12188 + }, + { + "epoch": 0.41, + "grad_norm": 0.7556065320968628, + "learning_rate": 1.322510746817441e-05, + "loss": 2.1538, + "step": 12189 + }, + { + "epoch": 0.41, + "grad_norm": 0.7152615785598755, + "learning_rate": 1.3224101351852986e-05, + "loss": 2.1077, + "step": 12190 + }, + { + "epoch": 0.41, + "grad_norm": 0.7348170280456543, + "learning_rate": 1.3223095199107553e-05, + "loss": 2.1235, + "step": 12191 + }, + { + "epoch": 0.41, + "grad_norm": 0.7330371737480164, + "learning_rate": 1.322208900994948e-05, + "loss": 2.0824, + "step": 12192 + }, + { + "epoch": 0.41, + "grad_norm": 0.752933144569397, + "learning_rate": 1.3221082784390133e-05, + "loss": 2.1588, + "step": 12193 + }, + { + "epoch": 0.41, + "grad_norm": 0.7272346019744873, + "learning_rate": 1.322007652244088e-05, + "loss": 2.0777, + "step": 12194 + }, + { + "epoch": 0.41, + "grad_norm": 0.7402298450469971, + "learning_rate": 1.3219070224113093e-05, + "loss": 2.0758, + "step": 12195 + }, + { + "epoch": 0.41, + "grad_norm": 0.7377109527587891, + "learning_rate": 1.3218063889418134e-05, + "loss": 2.1124, + "step": 12196 + }, + { + "epoch": 0.41, + "grad_norm": 0.7469210028648376, + "learning_rate": 1.3217057518367375e-05, + "loss": 2.0467, + "step": 12197 + }, + { + "epoch": 0.41, + "grad_norm": 0.7463890314102173, + "learning_rate": 1.321605111097219e-05, + "loss": 2.1604, + "step": 12198 + }, + { + "epoch": 0.41, + "grad_norm": 0.710898756980896, + "learning_rate": 1.321504466724394e-05, + "loss": 2.0383, + "step": 12199 + }, + { + "epoch": 0.41, + "grad_norm": 0.7388975620269775, + "learning_rate": 1.3214038187193998e-05, + "loss": 2.0603, + "step": 12200 + }, + { + "epoch": 0.41, + "grad_norm": 0.7719639539718628, + "learning_rate": 1.321303167083374e-05, + "loss": 2.125, + "step": 12201 + }, + { + "epoch": 0.41, + "grad_norm": 0.7088715434074402, + "learning_rate": 1.321202511817453e-05, + "loss": 2.0342, + "step": 12202 + }, + { + "epoch": 0.41, + "grad_norm": 0.7219780087471008, + "learning_rate": 1.3211018529227748e-05, + "loss": 2.0137, + "step": 12203 + }, + { + "epoch": 0.41, + "grad_norm": 0.7552051544189453, + "learning_rate": 1.3210011904004753e-05, + "loss": 2.0909, + "step": 12204 + }, + { + "epoch": 0.41, + "grad_norm": 0.7172917127609253, + "learning_rate": 1.3209005242516933e-05, + "loss": 2.0871, + "step": 12205 + }, + { + "epoch": 0.41, + "grad_norm": 0.7143824100494385, + "learning_rate": 1.3207998544775646e-05, + "loss": 2.1013, + "step": 12206 + }, + { + "epoch": 0.41, + "grad_norm": 0.7500854134559631, + "learning_rate": 1.3206991810792276e-05, + "loss": 2.0933, + "step": 12207 + }, + { + "epoch": 0.41, + "grad_norm": 0.7154642343521118, + "learning_rate": 1.3205985040578188e-05, + "loss": 2.0465, + "step": 12208 + }, + { + "epoch": 0.41, + "grad_norm": 0.7345568537712097, + "learning_rate": 1.3204978234144763e-05, + "loss": 2.1788, + "step": 12209 + }, + { + "epoch": 0.41, + "grad_norm": 0.733279287815094, + "learning_rate": 1.320397139150337e-05, + "loss": 2.135, + "step": 12210 + }, + { + "epoch": 0.41, + "grad_norm": 0.7153021693229675, + "learning_rate": 1.3202964512665386e-05, + "loss": 2.0769, + "step": 12211 + }, + { + "epoch": 0.41, + "grad_norm": 0.7709648013114929, + "learning_rate": 1.3201957597642188e-05, + "loss": 2.132, + "step": 12212 + }, + { + "epoch": 0.41, + "grad_norm": 0.7457278966903687, + "learning_rate": 1.3200950646445148e-05, + "loss": 2.0645, + "step": 12213 + }, + { + "epoch": 0.41, + "grad_norm": 0.7288813591003418, + "learning_rate": 1.3199943659085645e-05, + "loss": 2.0466, + "step": 12214 + }, + { + "epoch": 0.41, + "grad_norm": 0.7319962382316589, + "learning_rate": 1.319893663557505e-05, + "loss": 2.1492, + "step": 12215 + }, + { + "epoch": 0.41, + "grad_norm": 0.7162802219390869, + "learning_rate": 1.3197929575924749e-05, + "loss": 2.168, + "step": 12216 + }, + { + "epoch": 0.41, + "grad_norm": 0.7164115905761719, + "learning_rate": 1.319692248014611e-05, + "loss": 2.1021, + "step": 12217 + }, + { + "epoch": 0.41, + "grad_norm": 0.7337713241577148, + "learning_rate": 1.3195915348250516e-05, + "loss": 2.0957, + "step": 12218 + }, + { + "epoch": 0.41, + "grad_norm": 0.7305305600166321, + "learning_rate": 1.3194908180249345e-05, + "loss": 2.0436, + "step": 12219 + }, + { + "epoch": 0.41, + "grad_norm": 0.7298184037208557, + "learning_rate": 1.3193900976153971e-05, + "loss": 2.1199, + "step": 12220 + }, + { + "epoch": 0.41, + "grad_norm": 0.6861364841461182, + "learning_rate": 1.3192893735975775e-05, + "loss": 2.1148, + "step": 12221 + }, + { + "epoch": 0.41, + "grad_norm": 0.745052695274353, + "learning_rate": 1.319188645972614e-05, + "loss": 2.0689, + "step": 12222 + }, + { + "epoch": 0.41, + "grad_norm": 0.7128831744194031, + "learning_rate": 1.319087914741644e-05, + "loss": 2.0969, + "step": 12223 + }, + { + "epoch": 0.41, + "grad_norm": 0.7533825039863586, + "learning_rate": 1.3189871799058058e-05, + "loss": 2.0121, + "step": 12224 + }, + { + "epoch": 0.41, + "grad_norm": 0.764360785484314, + "learning_rate": 1.3188864414662377e-05, + "loss": 2.0778, + "step": 12225 + }, + { + "epoch": 0.41, + "grad_norm": 0.7718948125839233, + "learning_rate": 1.318785699424077e-05, + "loss": 2.0817, + "step": 12226 + }, + { + "epoch": 0.41, + "grad_norm": 0.7558844685554504, + "learning_rate": 1.3186849537804625e-05, + "loss": 2.091, + "step": 12227 + }, + { + "epoch": 0.41, + "grad_norm": 0.7332499623298645, + "learning_rate": 1.3185842045365323e-05, + "loss": 2.0769, + "step": 12228 + }, + { + "epoch": 0.41, + "grad_norm": 0.7730721235275269, + "learning_rate": 1.3184834516934242e-05, + "loss": 2.1087, + "step": 12229 + }, + { + "epoch": 0.41, + "grad_norm": 0.7223451733589172, + "learning_rate": 1.3183826952522766e-05, + "loss": 2.052, + "step": 12230 + }, + { + "epoch": 0.41, + "grad_norm": 0.7387200593948364, + "learning_rate": 1.3182819352142281e-05, + "loss": 2.1251, + "step": 12231 + }, + { + "epoch": 0.41, + "grad_norm": 0.7380063533782959, + "learning_rate": 1.3181811715804169e-05, + "loss": 2.1268, + "step": 12232 + }, + { + "epoch": 0.41, + "grad_norm": 0.7524533867835999, + "learning_rate": 1.3180804043519813e-05, + "loss": 2.1285, + "step": 12233 + }, + { + "epoch": 0.41, + "grad_norm": 0.733268141746521, + "learning_rate": 1.3179796335300594e-05, + "loss": 2.1486, + "step": 12234 + }, + { + "epoch": 0.41, + "grad_norm": 0.711117148399353, + "learning_rate": 1.3178788591157903e-05, + "loss": 2.0419, + "step": 12235 + }, + { + "epoch": 0.41, + "grad_norm": 0.7309277653694153, + "learning_rate": 1.3177780811103122e-05, + "loss": 2.0573, + "step": 12236 + }, + { + "epoch": 0.41, + "grad_norm": 0.7389529943466187, + "learning_rate": 1.3176772995147631e-05, + "loss": 2.0573, + "step": 12237 + }, + { + "epoch": 0.41, + "grad_norm": 0.7231329083442688, + "learning_rate": 1.3175765143302827e-05, + "loss": 2.1174, + "step": 12238 + }, + { + "epoch": 0.41, + "grad_norm": 0.7504206895828247, + "learning_rate": 1.3174757255580086e-05, + "loss": 2.104, + "step": 12239 + }, + { + "epoch": 0.41, + "grad_norm": 0.7122098207473755, + "learning_rate": 1.3173749331990797e-05, + "loss": 2.0573, + "step": 12240 + }, + { + "epoch": 0.41, + "grad_norm": 0.738332211971283, + "learning_rate": 1.317274137254635e-05, + "loss": 2.0993, + "step": 12241 + }, + { + "epoch": 0.41, + "grad_norm": 0.7200453877449036, + "learning_rate": 1.3171733377258129e-05, + "loss": 2.0298, + "step": 12242 + }, + { + "epoch": 0.41, + "grad_norm": 0.7603911757469177, + "learning_rate": 1.3170725346137524e-05, + "loss": 2.0486, + "step": 12243 + }, + { + "epoch": 0.41, + "grad_norm": 0.6965784430503845, + "learning_rate": 1.3169717279195922e-05, + "loss": 2.081, + "step": 12244 + }, + { + "epoch": 0.41, + "grad_norm": 0.728667140007019, + "learning_rate": 1.3168709176444711e-05, + "loss": 2.0689, + "step": 12245 + }, + { + "epoch": 0.41, + "grad_norm": 0.7706298232078552, + "learning_rate": 1.3167701037895282e-05, + "loss": 2.0656, + "step": 12246 + }, + { + "epoch": 0.41, + "grad_norm": 0.8332446217536926, + "learning_rate": 1.3166692863559025e-05, + "loss": 2.1349, + "step": 12247 + }, + { + "epoch": 0.41, + "grad_norm": 0.7225379347801208, + "learning_rate": 1.3165684653447324e-05, + "loss": 2.1644, + "step": 12248 + }, + { + "epoch": 0.41, + "grad_norm": 0.7151829600334167, + "learning_rate": 1.3164676407571577e-05, + "loss": 2.0665, + "step": 12249 + }, + { + "epoch": 0.41, + "grad_norm": 0.7026948928833008, + "learning_rate": 1.3163668125943167e-05, + "loss": 2.0818, + "step": 12250 + }, + { + "epoch": 0.41, + "grad_norm": 0.7538250684738159, + "learning_rate": 1.316265980857349e-05, + "loss": 2.0982, + "step": 12251 + }, + { + "epoch": 0.41, + "grad_norm": 0.7229146957397461, + "learning_rate": 1.3161651455473936e-05, + "loss": 2.1051, + "step": 12252 + }, + { + "epoch": 0.41, + "grad_norm": 0.7220286726951599, + "learning_rate": 1.3160643066655897e-05, + "loss": 2.1054, + "step": 12253 + }, + { + "epoch": 0.41, + "grad_norm": 0.7010679841041565, + "learning_rate": 1.3159634642130766e-05, + "loss": 2.0999, + "step": 12254 + }, + { + "epoch": 0.41, + "grad_norm": 0.709977924823761, + "learning_rate": 1.3158626181909933e-05, + "loss": 2.0737, + "step": 12255 + }, + { + "epoch": 0.41, + "grad_norm": 0.7584277391433716, + "learning_rate": 1.3157617686004792e-05, + "loss": 2.1818, + "step": 12256 + }, + { + "epoch": 0.41, + "grad_norm": 0.7256103157997131, + "learning_rate": 1.3156609154426736e-05, + "loss": 2.0949, + "step": 12257 + }, + { + "epoch": 0.41, + "grad_norm": 0.6991605758666992, + "learning_rate": 1.3155600587187163e-05, + "loss": 2.1322, + "step": 12258 + }, + { + "epoch": 0.41, + "grad_norm": 0.7396019101142883, + "learning_rate": 1.3154591984297462e-05, + "loss": 2.0982, + "step": 12259 + }, + { + "epoch": 0.41, + "grad_norm": 0.7315061092376709, + "learning_rate": 1.315358334576903e-05, + "loss": 2.2019, + "step": 12260 + }, + { + "epoch": 0.41, + "grad_norm": 0.7197700142860413, + "learning_rate": 1.3152574671613263e-05, + "loss": 2.114, + "step": 12261 + }, + { + "epoch": 0.41, + "grad_norm": 0.7214488387107849, + "learning_rate": 1.3151565961841552e-05, + "loss": 2.0674, + "step": 12262 + }, + { + "epoch": 0.41, + "grad_norm": 0.7106820940971375, + "learning_rate": 1.3150557216465299e-05, + "loss": 2.0424, + "step": 12263 + }, + { + "epoch": 0.41, + "grad_norm": 0.7551635503768921, + "learning_rate": 1.3149548435495894e-05, + "loss": 2.1015, + "step": 12264 + }, + { + "epoch": 0.41, + "grad_norm": 0.7381037473678589, + "learning_rate": 1.3148539618944738e-05, + "loss": 2.1013, + "step": 12265 + }, + { + "epoch": 0.41, + "grad_norm": 0.7137131094932556, + "learning_rate": 1.3147530766823224e-05, + "loss": 2.1085, + "step": 12266 + }, + { + "epoch": 0.41, + "grad_norm": 0.7035143971443176, + "learning_rate": 1.3146521879142754e-05, + "loss": 2.1048, + "step": 12267 + }, + { + "epoch": 0.41, + "grad_norm": 0.7187929749488831, + "learning_rate": 1.3145512955914726e-05, + "loss": 2.0852, + "step": 12268 + }, + { + "epoch": 0.41, + "grad_norm": 0.7122571468353271, + "learning_rate": 1.3144503997150536e-05, + "loss": 2.0894, + "step": 12269 + }, + { + "epoch": 0.41, + "grad_norm": 0.7035654187202454, + "learning_rate": 1.3143495002861579e-05, + "loss": 2.0255, + "step": 12270 + }, + { + "epoch": 0.41, + "grad_norm": 0.6940398216247559, + "learning_rate": 1.314248597305926e-05, + "loss": 2.0829, + "step": 12271 + }, + { + "epoch": 0.41, + "grad_norm": 0.7191868424415588, + "learning_rate": 1.3141476907754975e-05, + "loss": 2.0492, + "step": 12272 + }, + { + "epoch": 0.41, + "grad_norm": 0.770883321762085, + "learning_rate": 1.3140467806960127e-05, + "loss": 2.0887, + "step": 12273 + }, + { + "epoch": 0.41, + "grad_norm": 0.7466403841972351, + "learning_rate": 1.3139458670686114e-05, + "loss": 2.0788, + "step": 12274 + }, + { + "epoch": 0.41, + "grad_norm": 0.7153128385543823, + "learning_rate": 1.3138449498944334e-05, + "loss": 2.0958, + "step": 12275 + }, + { + "epoch": 0.41, + "grad_norm": 0.7169919013977051, + "learning_rate": 1.3137440291746192e-05, + "loss": 2.047, + "step": 12276 + }, + { + "epoch": 0.41, + "grad_norm": 0.7219117283821106, + "learning_rate": 1.3136431049103091e-05, + "loss": 2.1615, + "step": 12277 + }, + { + "epoch": 0.41, + "grad_norm": 0.7174742817878723, + "learning_rate": 1.3135421771026428e-05, + "loss": 2.0657, + "step": 12278 + }, + { + "epoch": 0.41, + "grad_norm": 0.7144287824630737, + "learning_rate": 1.3134412457527608e-05, + "loss": 2.0515, + "step": 12279 + }, + { + "epoch": 0.41, + "grad_norm": 0.7230450510978699, + "learning_rate": 1.3133403108618032e-05, + "loss": 2.0, + "step": 12280 + }, + { + "epoch": 0.41, + "grad_norm": 0.7241331338882446, + "learning_rate": 1.3132393724309106e-05, + "loss": 2.0619, + "step": 12281 + }, + { + "epoch": 0.41, + "grad_norm": 0.7462328672409058, + "learning_rate": 1.3131384304612229e-05, + "loss": 2.1105, + "step": 12282 + }, + { + "epoch": 0.41, + "grad_norm": 0.7317826747894287, + "learning_rate": 1.3130374849538809e-05, + "loss": 2.1196, + "step": 12283 + }, + { + "epoch": 0.41, + "grad_norm": 0.719585120677948, + "learning_rate": 1.3129365359100246e-05, + "loss": 2.0432, + "step": 12284 + }, + { + "epoch": 0.41, + "grad_norm": 0.7141981720924377, + "learning_rate": 1.312835583330795e-05, + "loss": 2.0894, + "step": 12285 + }, + { + "epoch": 0.41, + "grad_norm": 0.7380064725875854, + "learning_rate": 1.3127346272173322e-05, + "loss": 2.1083, + "step": 12286 + }, + { + "epoch": 0.41, + "grad_norm": 0.7349861264228821, + "learning_rate": 1.3126336675707769e-05, + "loss": 2.14, + "step": 12287 + }, + { + "epoch": 0.41, + "grad_norm": 0.7635281085968018, + "learning_rate": 1.3125327043922695e-05, + "loss": 2.1023, + "step": 12288 + }, + { + "epoch": 0.41, + "grad_norm": 0.7234963178634644, + "learning_rate": 1.3124317376829509e-05, + "loss": 2.1169, + "step": 12289 + }, + { + "epoch": 0.41, + "grad_norm": 0.7389782071113586, + "learning_rate": 1.3123307674439618e-05, + "loss": 2.0732, + "step": 12290 + }, + { + "epoch": 0.41, + "grad_norm": 0.7262029647827148, + "learning_rate": 1.3122297936764428e-05, + "loss": 2.0917, + "step": 12291 + }, + { + "epoch": 0.41, + "grad_norm": 0.7388591170310974, + "learning_rate": 1.312128816381534e-05, + "loss": 2.178, + "step": 12292 + }, + { + "epoch": 0.41, + "grad_norm": 0.7062615156173706, + "learning_rate": 1.3120278355603773e-05, + "loss": 2.0834, + "step": 12293 + }, + { + "epoch": 0.41, + "grad_norm": 0.7468334436416626, + "learning_rate": 1.3119268512141128e-05, + "loss": 2.1267, + "step": 12294 + }, + { + "epoch": 0.41, + "grad_norm": 0.730571985244751, + "learning_rate": 1.3118258633438812e-05, + "loss": 2.0835, + "step": 12295 + }, + { + "epoch": 0.41, + "grad_norm": 0.7390456795692444, + "learning_rate": 1.3117248719508244e-05, + "loss": 2.0981, + "step": 12296 + }, + { + "epoch": 0.41, + "grad_norm": 0.7342653274536133, + "learning_rate": 1.3116238770360823e-05, + "loss": 2.1061, + "step": 12297 + }, + { + "epoch": 0.41, + "grad_norm": 0.7776727080345154, + "learning_rate": 1.3115228786007963e-05, + "loss": 2.1455, + "step": 12298 + }, + { + "epoch": 0.41, + "grad_norm": 0.7341132164001465, + "learning_rate": 1.3114218766461072e-05, + "loss": 2.0983, + "step": 12299 + }, + { + "epoch": 0.41, + "grad_norm": 0.7398295402526855, + "learning_rate": 1.3113208711731567e-05, + "loss": 2.1011, + "step": 12300 + }, + { + "epoch": 0.41, + "grad_norm": 0.7450608015060425, + "learning_rate": 1.3112198621830852e-05, + "loss": 1.9763, + "step": 12301 + }, + { + "epoch": 0.41, + "grad_norm": 0.7357621788978577, + "learning_rate": 1.3111188496770342e-05, + "loss": 2.1598, + "step": 12302 + }, + { + "epoch": 0.41, + "grad_norm": 0.7268281579017639, + "learning_rate": 1.3110178336561444e-05, + "loss": 2.0508, + "step": 12303 + }, + { + "epoch": 0.41, + "grad_norm": 0.7181071639060974, + "learning_rate": 1.310916814121558e-05, + "loss": 2.0474, + "step": 12304 + }, + { + "epoch": 0.41, + "grad_norm": 0.7495671510696411, + "learning_rate": 1.310815791074415e-05, + "loss": 2.106, + "step": 12305 + }, + { + "epoch": 0.41, + "grad_norm": 0.7158242464065552, + "learning_rate": 1.3107147645158576e-05, + "loss": 2.1299, + "step": 12306 + }, + { + "epoch": 0.41, + "grad_norm": 0.7315487861633301, + "learning_rate": 1.310613734447027e-05, + "loss": 2.0266, + "step": 12307 + }, + { + "epoch": 0.41, + "grad_norm": 0.7496597766876221, + "learning_rate": 1.3105127008690644e-05, + "loss": 2.1436, + "step": 12308 + }, + { + "epoch": 0.41, + "grad_norm": 0.7076642513275146, + "learning_rate": 1.3104116637831114e-05, + "loss": 2.1448, + "step": 12309 + }, + { + "epoch": 0.41, + "grad_norm": 0.7664644122123718, + "learning_rate": 1.310310623190309e-05, + "loss": 2.1678, + "step": 12310 + }, + { + "epoch": 0.41, + "grad_norm": 0.7084061503410339, + "learning_rate": 1.3102095790917994e-05, + "loss": 2.0676, + "step": 12311 + }, + { + "epoch": 0.41, + "grad_norm": 0.72226881980896, + "learning_rate": 1.3101085314887237e-05, + "loss": 2.0899, + "step": 12312 + }, + { + "epoch": 0.41, + "grad_norm": 0.8006677031517029, + "learning_rate": 1.3100074803822236e-05, + "loss": 2.0548, + "step": 12313 + }, + { + "epoch": 0.41, + "grad_norm": 0.715962290763855, + "learning_rate": 1.3099064257734404e-05, + "loss": 2.1122, + "step": 12314 + }, + { + "epoch": 0.41, + "grad_norm": 0.7328554391860962, + "learning_rate": 1.309805367663516e-05, + "loss": 2.0859, + "step": 12315 + }, + { + "epoch": 0.41, + "grad_norm": 0.7648824453353882, + "learning_rate": 1.3097043060535926e-05, + "loss": 2.13, + "step": 12316 + }, + { + "epoch": 0.41, + "grad_norm": 0.7866998314857483, + "learning_rate": 1.3096032409448109e-05, + "loss": 1.9937, + "step": 12317 + }, + { + "epoch": 0.41, + "grad_norm": 0.7532053589820862, + "learning_rate": 1.3095021723383139e-05, + "loss": 2.0001, + "step": 12318 + }, + { + "epoch": 0.41, + "grad_norm": 0.7566047310829163, + "learning_rate": 1.3094011002352421e-05, + "loss": 2.1032, + "step": 12319 + }, + { + "epoch": 0.41, + "grad_norm": 0.7170406579971313, + "learning_rate": 1.3093000246367385e-05, + "loss": 2.061, + "step": 12320 + }, + { + "epoch": 0.41, + "grad_norm": 0.7680877447128296, + "learning_rate": 1.309198945543944e-05, + "loss": 2.0961, + "step": 12321 + }, + { + "epoch": 0.41, + "grad_norm": 0.7140366435050964, + "learning_rate": 1.3090978629580013e-05, + "loss": 2.1093, + "step": 12322 + }, + { + "epoch": 0.41, + "grad_norm": 0.7203481793403625, + "learning_rate": 1.308996776880052e-05, + "loss": 2.0624, + "step": 12323 + }, + { + "epoch": 0.41, + "grad_norm": 0.7167226076126099, + "learning_rate": 1.3088956873112381e-05, + "loss": 2.0504, + "step": 12324 + }, + { + "epoch": 0.41, + "grad_norm": 0.7650008201599121, + "learning_rate": 1.308794594252702e-05, + "loss": 2.0954, + "step": 12325 + }, + { + "epoch": 0.41, + "grad_norm": 0.7596518397331238, + "learning_rate": 1.3086934977055854e-05, + "loss": 2.0559, + "step": 12326 + }, + { + "epoch": 0.41, + "grad_norm": 0.7558705806732178, + "learning_rate": 1.3085923976710306e-05, + "loss": 2.1087, + "step": 12327 + }, + { + "epoch": 0.41, + "grad_norm": 0.7355661392211914, + "learning_rate": 1.3084912941501797e-05, + "loss": 2.0683, + "step": 12328 + }, + { + "epoch": 0.41, + "grad_norm": 0.7271652221679688, + "learning_rate": 1.3083901871441753e-05, + "loss": 2.1263, + "step": 12329 + }, + { + "epoch": 0.41, + "grad_norm": 0.7198430895805359, + "learning_rate": 1.3082890766541587e-05, + "loss": 2.1222, + "step": 12330 + }, + { + "epoch": 0.41, + "grad_norm": 0.7599000930786133, + "learning_rate": 1.3081879626812735e-05, + "loss": 2.1045, + "step": 12331 + }, + { + "epoch": 0.41, + "grad_norm": 0.7240828275680542, + "learning_rate": 1.3080868452266608e-05, + "loss": 2.0743, + "step": 12332 + }, + { + "epoch": 0.41, + "grad_norm": 0.7350494265556335, + "learning_rate": 1.3079857242914635e-05, + "loss": 2.1942, + "step": 12333 + }, + { + "epoch": 0.41, + "grad_norm": 0.7203738689422607, + "learning_rate": 1.3078845998768241e-05, + "loss": 2.0572, + "step": 12334 + }, + { + "epoch": 0.41, + "grad_norm": 0.7287986278533936, + "learning_rate": 1.307783471983885e-05, + "loss": 2.0391, + "step": 12335 + }, + { + "epoch": 0.41, + "grad_norm": 0.7205066680908203, + "learning_rate": 1.3076823406137885e-05, + "loss": 2.0729, + "step": 12336 + }, + { + "epoch": 0.41, + "grad_norm": 0.7623180150985718, + "learning_rate": 1.3075812057676771e-05, + "loss": 2.0729, + "step": 12337 + }, + { + "epoch": 0.41, + "grad_norm": 0.7264131903648376, + "learning_rate": 1.3074800674466936e-05, + "loss": 2.0602, + "step": 12338 + }, + { + "epoch": 0.41, + "grad_norm": 0.7383767366409302, + "learning_rate": 1.3073789256519806e-05, + "loss": 2.1407, + "step": 12339 + }, + { + "epoch": 0.41, + "grad_norm": 0.7553867697715759, + "learning_rate": 1.3072777803846808e-05, + "loss": 2.171, + "step": 12340 + }, + { + "epoch": 0.41, + "grad_norm": 0.7256442308425903, + "learning_rate": 1.3071766316459362e-05, + "loss": 2.1121, + "step": 12341 + }, + { + "epoch": 0.41, + "grad_norm": 0.7372667193412781, + "learning_rate": 1.3070754794368907e-05, + "loss": 2.0319, + "step": 12342 + }, + { + "epoch": 0.41, + "grad_norm": 0.7469893097877502, + "learning_rate": 1.306974323758686e-05, + "loss": 2.0926, + "step": 12343 + }, + { + "epoch": 0.41, + "grad_norm": 0.7320919036865234, + "learning_rate": 1.3068731646124652e-05, + "loss": 2.0884, + "step": 12344 + }, + { + "epoch": 0.41, + "grad_norm": 0.743634819984436, + "learning_rate": 1.3067720019993715e-05, + "loss": 2.0944, + "step": 12345 + }, + { + "epoch": 0.41, + "grad_norm": 0.7345340847969055, + "learning_rate": 1.3066708359205473e-05, + "loss": 2.0338, + "step": 12346 + }, + { + "epoch": 0.41, + "grad_norm": 0.7416863441467285, + "learning_rate": 1.3065696663771359e-05, + "loss": 2.1609, + "step": 12347 + }, + { + "epoch": 0.41, + "grad_norm": 0.7155302166938782, + "learning_rate": 1.30646849337028e-05, + "loss": 2.1, + "step": 12348 + }, + { + "epoch": 0.41, + "grad_norm": 0.7079076766967773, + "learning_rate": 1.3063673169011226e-05, + "loss": 2.0907, + "step": 12349 + }, + { + "epoch": 0.41, + "grad_norm": 0.728448212146759, + "learning_rate": 1.3062661369708068e-05, + "loss": 2.0345, + "step": 12350 + }, + { + "epoch": 0.41, + "grad_norm": 0.7596637010574341, + "learning_rate": 1.306164953580476e-05, + "loss": 2.0966, + "step": 12351 + }, + { + "epoch": 0.41, + "grad_norm": 0.7685812711715698, + "learning_rate": 1.3060637667312723e-05, + "loss": 2.1784, + "step": 12352 + }, + { + "epoch": 0.41, + "grad_norm": 0.7350772023200989, + "learning_rate": 1.3059625764243403e-05, + "loss": 2.0675, + "step": 12353 + }, + { + "epoch": 0.41, + "grad_norm": 0.7163321375846863, + "learning_rate": 1.305861382660822e-05, + "loss": 2.0769, + "step": 12354 + }, + { + "epoch": 0.41, + "grad_norm": 0.7666007280349731, + "learning_rate": 1.3057601854418612e-05, + "loss": 2.0864, + "step": 12355 + }, + { + "epoch": 0.41, + "grad_norm": 0.7470929026603699, + "learning_rate": 1.3056589847686008e-05, + "loss": 2.1617, + "step": 12356 + }, + { + "epoch": 0.41, + "grad_norm": 0.7122946381568909, + "learning_rate": 1.3055577806421844e-05, + "loss": 2.1232, + "step": 12357 + }, + { + "epoch": 0.41, + "grad_norm": 0.7549747824668884, + "learning_rate": 1.3054565730637554e-05, + "loss": 2.1978, + "step": 12358 + }, + { + "epoch": 0.41, + "grad_norm": 0.7279300689697266, + "learning_rate": 1.305355362034457e-05, + "loss": 2.0166, + "step": 12359 + }, + { + "epoch": 0.41, + "grad_norm": 0.7320740818977356, + "learning_rate": 1.3052541475554325e-05, + "loss": 2.1422, + "step": 12360 + }, + { + "epoch": 0.41, + "grad_norm": 0.7508188486099243, + "learning_rate": 1.3051529296278259e-05, + "loss": 1.9954, + "step": 12361 + }, + { + "epoch": 0.41, + "grad_norm": 0.7478777170181274, + "learning_rate": 1.3050517082527802e-05, + "loss": 2.0834, + "step": 12362 + }, + { + "epoch": 0.41, + "grad_norm": 0.722675621509552, + "learning_rate": 1.3049504834314389e-05, + "loss": 2.0601, + "step": 12363 + }, + { + "epoch": 0.41, + "grad_norm": 0.7712547779083252, + "learning_rate": 1.3048492551649459e-05, + "loss": 2.0442, + "step": 12364 + }, + { + "epoch": 0.41, + "grad_norm": 0.7088399529457092, + "learning_rate": 1.3047480234544447e-05, + "loss": 2.127, + "step": 12365 + }, + { + "epoch": 0.41, + "grad_norm": 0.7515777945518494, + "learning_rate": 1.3046467883010788e-05, + "loss": 2.0906, + "step": 12366 + }, + { + "epoch": 0.41, + "grad_norm": 0.7725820541381836, + "learning_rate": 1.304545549705992e-05, + "loss": 2.1274, + "step": 12367 + }, + { + "epoch": 0.41, + "grad_norm": 0.732892632484436, + "learning_rate": 1.3044443076703281e-05, + "loss": 2.1339, + "step": 12368 + }, + { + "epoch": 0.41, + "grad_norm": 0.739269495010376, + "learning_rate": 1.304343062195231e-05, + "loss": 2.066, + "step": 12369 + }, + { + "epoch": 0.41, + "grad_norm": 0.757472813129425, + "learning_rate": 1.3042418132818442e-05, + "loss": 2.121, + "step": 12370 + }, + { + "epoch": 0.41, + "grad_norm": 0.7038738131523132, + "learning_rate": 1.3041405609313117e-05, + "loss": 2.0845, + "step": 12371 + }, + { + "epoch": 0.41, + "grad_norm": 0.7386584877967834, + "learning_rate": 1.3040393051447776e-05, + "loss": 2.1328, + "step": 12372 + }, + { + "epoch": 0.41, + "grad_norm": 0.771976888179779, + "learning_rate": 1.3039380459233855e-05, + "loss": 2.0488, + "step": 12373 + }, + { + "epoch": 0.41, + "grad_norm": 0.7317124009132385, + "learning_rate": 1.3038367832682793e-05, + "loss": 2.0415, + "step": 12374 + }, + { + "epoch": 0.41, + "grad_norm": 0.7550515532493591, + "learning_rate": 1.3037355171806034e-05, + "loss": 2.1163, + "step": 12375 + }, + { + "epoch": 0.41, + "grad_norm": 0.7270771265029907, + "learning_rate": 1.3036342476615017e-05, + "loss": 2.0651, + "step": 12376 + }, + { + "epoch": 0.41, + "grad_norm": 0.719088613986969, + "learning_rate": 1.3035329747121177e-05, + "loss": 2.0914, + "step": 12377 + }, + { + "epoch": 0.41, + "grad_norm": 0.7338764667510986, + "learning_rate": 1.3034316983335966e-05, + "loss": 2.0441, + "step": 12378 + }, + { + "epoch": 0.41, + "grad_norm": 0.7195775508880615, + "learning_rate": 1.3033304185270819e-05, + "loss": 2.1247, + "step": 12379 + }, + { + "epoch": 0.41, + "grad_norm": 0.7105481028556824, + "learning_rate": 1.3032291352937176e-05, + "loss": 2.115, + "step": 12380 + }, + { + "epoch": 0.41, + "grad_norm": 0.7122864723205566, + "learning_rate": 1.3031278486346484e-05, + "loss": 2.1242, + "step": 12381 + }, + { + "epoch": 0.41, + "grad_norm": 0.7632964253425598, + "learning_rate": 1.3030265585510185e-05, + "loss": 2.0858, + "step": 12382 + }, + { + "epoch": 0.41, + "grad_norm": 0.7272965312004089, + "learning_rate": 1.3029252650439722e-05, + "loss": 2.1293, + "step": 12383 + }, + { + "epoch": 0.41, + "grad_norm": 0.7279636263847351, + "learning_rate": 1.3028239681146537e-05, + "loss": 2.1043, + "step": 12384 + }, + { + "epoch": 0.41, + "grad_norm": 0.7214242815971375, + "learning_rate": 1.3027226677642073e-05, + "loss": 2.1085, + "step": 12385 + }, + { + "epoch": 0.41, + "grad_norm": 0.7254440784454346, + "learning_rate": 1.3026213639937779e-05, + "loss": 2.0777, + "step": 12386 + }, + { + "epoch": 0.41, + "grad_norm": 0.8031516075134277, + "learning_rate": 1.3025200568045096e-05, + "loss": 2.1154, + "step": 12387 + }, + { + "epoch": 0.41, + "grad_norm": 0.7618164420127869, + "learning_rate": 1.3024187461975468e-05, + "loss": 2.1237, + "step": 12388 + }, + { + "epoch": 0.41, + "grad_norm": 0.7552929520606995, + "learning_rate": 1.3023174321740344e-05, + "loss": 2.044, + "step": 12389 + }, + { + "epoch": 0.41, + "grad_norm": 0.7681983113288879, + "learning_rate": 1.3022161147351168e-05, + "loss": 2.0774, + "step": 12390 + }, + { + "epoch": 0.41, + "grad_norm": 0.7132359147071838, + "learning_rate": 1.3021147938819387e-05, + "loss": 2.0364, + "step": 12391 + }, + { + "epoch": 0.41, + "grad_norm": 0.75300133228302, + "learning_rate": 1.3020134696156448e-05, + "loss": 2.0942, + "step": 12392 + }, + { + "epoch": 0.41, + "grad_norm": 0.7216118574142456, + "learning_rate": 1.3019121419373794e-05, + "loss": 2.1265, + "step": 12393 + }, + { + "epoch": 0.41, + "grad_norm": 0.7107651829719543, + "learning_rate": 1.301810810848288e-05, + "loss": 2.1309, + "step": 12394 + }, + { + "epoch": 0.41, + "grad_norm": 0.7452234625816345, + "learning_rate": 1.3017094763495148e-05, + "loss": 2.0988, + "step": 12395 + }, + { + "epoch": 0.41, + "grad_norm": 0.7183559536933899, + "learning_rate": 1.3016081384422044e-05, + "loss": 2.0499, + "step": 12396 + }, + { + "epoch": 0.41, + "grad_norm": 0.7661429643630981, + "learning_rate": 1.3015067971275023e-05, + "loss": 2.1108, + "step": 12397 + }, + { + "epoch": 0.41, + "grad_norm": 0.7464975714683533, + "learning_rate": 1.3014054524065531e-05, + "loss": 2.1354, + "step": 12398 + }, + { + "epoch": 0.41, + "grad_norm": 0.7326420545578003, + "learning_rate": 1.3013041042805014e-05, + "loss": 2.0899, + "step": 12399 + }, + { + "epoch": 0.41, + "grad_norm": 0.7147065997123718, + "learning_rate": 1.301202752750493e-05, + "loss": 2.0265, + "step": 12400 + }, + { + "epoch": 0.41, + "grad_norm": 0.7486024498939514, + "learning_rate": 1.301101397817672e-05, + "loss": 2.1461, + "step": 12401 + }, + { + "epoch": 0.41, + "grad_norm": 0.739072322845459, + "learning_rate": 1.3010000394831846e-05, + "loss": 2.0902, + "step": 12402 + }, + { + "epoch": 0.41, + "grad_norm": 0.7615841627120972, + "learning_rate": 1.3008986777481744e-05, + "loss": 2.131, + "step": 12403 + }, + { + "epoch": 0.41, + "grad_norm": 0.7553632259368896, + "learning_rate": 1.3007973126137876e-05, + "loss": 2.1382, + "step": 12404 + }, + { + "epoch": 0.41, + "grad_norm": 0.7141990065574646, + "learning_rate": 1.3006959440811691e-05, + "loss": 2.0961, + "step": 12405 + }, + { + "epoch": 0.41, + "grad_norm": 0.721104085445404, + "learning_rate": 1.3005945721514642e-05, + "loss": 2.1062, + "step": 12406 + }, + { + "epoch": 0.41, + "grad_norm": 0.7564157247543335, + "learning_rate": 1.3004931968258174e-05, + "loss": 2.0973, + "step": 12407 + }, + { + "epoch": 0.41, + "grad_norm": 0.7303712964057922, + "learning_rate": 1.300391818105375e-05, + "loss": 2.0641, + "step": 12408 + }, + { + "epoch": 0.41, + "grad_norm": 0.7411707043647766, + "learning_rate": 1.3002904359912815e-05, + "loss": 2.0741, + "step": 12409 + }, + { + "epoch": 0.41, + "grad_norm": 0.7584968209266663, + "learning_rate": 1.300189050484683e-05, + "loss": 2.0971, + "step": 12410 + }, + { + "epoch": 0.41, + "grad_norm": 0.7540577054023743, + "learning_rate": 1.3000876615867246e-05, + "loss": 2.0445, + "step": 12411 + }, + { + "epoch": 0.41, + "grad_norm": 0.7482898831367493, + "learning_rate": 1.2999862692985515e-05, + "loss": 2.0558, + "step": 12412 + }, + { + "epoch": 0.41, + "grad_norm": 0.7182303667068481, + "learning_rate": 1.2998848736213094e-05, + "loss": 2.0866, + "step": 12413 + }, + { + "epoch": 0.41, + "grad_norm": 0.7315609455108643, + "learning_rate": 1.2997834745561437e-05, + "loss": 2.0639, + "step": 12414 + }, + { + "epoch": 0.41, + "grad_norm": 0.7472220063209534, + "learning_rate": 1.2996820721042001e-05, + "loss": 2.1198, + "step": 12415 + }, + { + "epoch": 0.41, + "grad_norm": 0.7642606496810913, + "learning_rate": 1.299580666266624e-05, + "loss": 2.1476, + "step": 12416 + }, + { + "epoch": 0.41, + "grad_norm": 0.7121624946594238, + "learning_rate": 1.2994792570445612e-05, + "loss": 2.1061, + "step": 12417 + }, + { + "epoch": 0.41, + "grad_norm": 0.7423768639564514, + "learning_rate": 1.299377844439157e-05, + "loss": 2.1004, + "step": 12418 + }, + { + "epoch": 0.41, + "grad_norm": 0.7726824283599854, + "learning_rate": 1.2992764284515575e-05, + "loss": 2.0773, + "step": 12419 + }, + { + "epoch": 0.41, + "grad_norm": 0.7338455319404602, + "learning_rate": 1.2991750090829082e-05, + "loss": 2.1518, + "step": 12420 + }, + { + "epoch": 0.41, + "grad_norm": 0.7193387150764465, + "learning_rate": 1.2990735863343552e-05, + "loss": 2.1054, + "step": 12421 + }, + { + "epoch": 0.41, + "grad_norm": 0.7375040054321289, + "learning_rate": 1.2989721602070442e-05, + "loss": 2.1476, + "step": 12422 + }, + { + "epoch": 0.41, + "grad_norm": 0.7037714719772339, + "learning_rate": 1.2988707307021207e-05, + "loss": 2.0779, + "step": 12423 + }, + { + "epoch": 0.41, + "grad_norm": 0.722993791103363, + "learning_rate": 1.298769297820731e-05, + "loss": 2.0913, + "step": 12424 + }, + { + "epoch": 0.41, + "grad_norm": 0.7318497896194458, + "learning_rate": 1.2986678615640208e-05, + "loss": 2.1353, + "step": 12425 + }, + { + "epoch": 0.41, + "grad_norm": 0.7400106191635132, + "learning_rate": 1.298566421933136e-05, + "loss": 2.107, + "step": 12426 + }, + { + "epoch": 0.41, + "grad_norm": 0.7406516075134277, + "learning_rate": 1.298464978929223e-05, + "loss": 2.1031, + "step": 12427 + }, + { + "epoch": 0.41, + "grad_norm": 0.7219787836074829, + "learning_rate": 1.2983635325534273e-05, + "loss": 2.1066, + "step": 12428 + }, + { + "epoch": 0.41, + "grad_norm": 0.7212991714477539, + "learning_rate": 1.2982620828068955e-05, + "loss": 2.1884, + "step": 12429 + }, + { + "epoch": 0.41, + "grad_norm": 0.7395643591880798, + "learning_rate": 1.2981606296907733e-05, + "loss": 2.0887, + "step": 12430 + }, + { + "epoch": 0.41, + "grad_norm": 0.7339820861816406, + "learning_rate": 1.2980591732062071e-05, + "loss": 2.1066, + "step": 12431 + }, + { + "epoch": 0.41, + "grad_norm": 0.7472740411758423, + "learning_rate": 1.297957713354343e-05, + "loss": 2.1136, + "step": 12432 + }, + { + "epoch": 0.41, + "grad_norm": 0.7241846323013306, + "learning_rate": 1.2978562501363276e-05, + "loss": 2.0129, + "step": 12433 + }, + { + "epoch": 0.41, + "grad_norm": 0.7235549092292786, + "learning_rate": 1.2977547835533065e-05, + "loss": 2.0751, + "step": 12434 + }, + { + "epoch": 0.41, + "grad_norm": 0.7627942562103271, + "learning_rate": 1.2976533136064266e-05, + "loss": 2.0619, + "step": 12435 + }, + { + "epoch": 0.41, + "grad_norm": 0.7333987355232239, + "learning_rate": 1.297551840296834e-05, + "loss": 2.1492, + "step": 12436 + }, + { + "epoch": 0.41, + "grad_norm": 0.7314045429229736, + "learning_rate": 1.2974503636256748e-05, + "loss": 2.0264, + "step": 12437 + }, + { + "epoch": 0.41, + "grad_norm": 0.7258691787719727, + "learning_rate": 1.2973488835940957e-05, + "loss": 2.0911, + "step": 12438 + }, + { + "epoch": 0.41, + "grad_norm": 0.7175994515419006, + "learning_rate": 1.2972474002032434e-05, + "loss": 2.0402, + "step": 12439 + }, + { + "epoch": 0.41, + "grad_norm": 0.7267522215843201, + "learning_rate": 1.2971459134542641e-05, + "loss": 2.1262, + "step": 12440 + }, + { + "epoch": 0.41, + "grad_norm": 0.7042745351791382, + "learning_rate": 1.2970444233483044e-05, + "loss": 2.1337, + "step": 12441 + }, + { + "epoch": 0.41, + "grad_norm": 0.7178748846054077, + "learning_rate": 1.2969429298865109e-05, + "loss": 2.0386, + "step": 12442 + }, + { + "epoch": 0.41, + "grad_norm": 0.7408658266067505, + "learning_rate": 1.2968414330700303e-05, + "loss": 2.0807, + "step": 12443 + }, + { + "epoch": 0.41, + "grad_norm": 0.7536001205444336, + "learning_rate": 1.2967399329000092e-05, + "loss": 2.0797, + "step": 12444 + }, + { + "epoch": 0.41, + "grad_norm": 0.7605969309806824, + "learning_rate": 1.2966384293775937e-05, + "loss": 2.0311, + "step": 12445 + }, + { + "epoch": 0.41, + "grad_norm": 0.7548078298568726, + "learning_rate": 1.2965369225039318e-05, + "loss": 2.1059, + "step": 12446 + }, + { + "epoch": 0.41, + "grad_norm": 0.7265551686286926, + "learning_rate": 1.2964354122801695e-05, + "loss": 2.0705, + "step": 12447 + }, + { + "epoch": 0.41, + "grad_norm": 0.7274618148803711, + "learning_rate": 1.2963338987074531e-05, + "loss": 2.0621, + "step": 12448 + }, + { + "epoch": 0.41, + "grad_norm": 0.7528622150421143, + "learning_rate": 1.2962323817869304e-05, + "loss": 2.1558, + "step": 12449 + }, + { + "epoch": 0.41, + "grad_norm": 0.7333722114562988, + "learning_rate": 1.2961308615197476e-05, + "loss": 2.0447, + "step": 12450 + }, + { + "epoch": 0.41, + "grad_norm": 0.7431352138519287, + "learning_rate": 1.2960293379070521e-05, + "loss": 2.096, + "step": 12451 + }, + { + "epoch": 0.41, + "grad_norm": 0.7008032202720642, + "learning_rate": 1.2959278109499904e-05, + "loss": 2.0824, + "step": 12452 + }, + { + "epoch": 0.41, + "grad_norm": 0.7102967500686646, + "learning_rate": 1.2958262806497097e-05, + "loss": 2.0613, + "step": 12453 + }, + { + "epoch": 0.41, + "grad_norm": 0.7483636736869812, + "learning_rate": 1.2957247470073572e-05, + "loss": 2.1896, + "step": 12454 + }, + { + "epoch": 0.41, + "grad_norm": 0.7122631669044495, + "learning_rate": 1.2956232100240802e-05, + "loss": 2.1096, + "step": 12455 + }, + { + "epoch": 0.41, + "grad_norm": 0.7482119798660278, + "learning_rate": 1.2955216697010249e-05, + "loss": 2.0603, + "step": 12456 + }, + { + "epoch": 0.41, + "grad_norm": 0.7347743511199951, + "learning_rate": 1.2954201260393391e-05, + "loss": 2.0582, + "step": 12457 + }, + { + "epoch": 0.41, + "grad_norm": 0.7447862029075623, + "learning_rate": 1.2953185790401699e-05, + "loss": 2.0154, + "step": 12458 + }, + { + "epoch": 0.41, + "grad_norm": 0.7481386065483093, + "learning_rate": 1.2952170287046644e-05, + "loss": 2.101, + "step": 12459 + }, + { + "epoch": 0.41, + "grad_norm": 0.7137046456336975, + "learning_rate": 1.2951154750339701e-05, + "loss": 2.0875, + "step": 12460 + }, + { + "epoch": 0.41, + "grad_norm": 0.7311532497406006, + "learning_rate": 1.2950139180292338e-05, + "loss": 2.0994, + "step": 12461 + }, + { + "epoch": 0.41, + "grad_norm": 0.7358998656272888, + "learning_rate": 1.2949123576916033e-05, + "loss": 2.1244, + "step": 12462 + }, + { + "epoch": 0.41, + "grad_norm": 0.7009521126747131, + "learning_rate": 1.2948107940222258e-05, + "loss": 2.0667, + "step": 12463 + }, + { + "epoch": 0.41, + "grad_norm": 0.7382879257202148, + "learning_rate": 1.2947092270222487e-05, + "loss": 2.1183, + "step": 12464 + }, + { + "epoch": 0.41, + "grad_norm": 0.7537720203399658, + "learning_rate": 1.2946076566928197e-05, + "loss": 2.089, + "step": 12465 + }, + { + "epoch": 0.41, + "grad_norm": 0.7354782223701477, + "learning_rate": 1.294506083035086e-05, + "loss": 2.0513, + "step": 12466 + }, + { + "epoch": 0.41, + "grad_norm": 0.7709543704986572, + "learning_rate": 1.2944045060501949e-05, + "loss": 2.1218, + "step": 12467 + }, + { + "epoch": 0.41, + "grad_norm": 0.7562302947044373, + "learning_rate": 1.2943029257392946e-05, + "loss": 2.1225, + "step": 12468 + }, + { + "epoch": 0.41, + "grad_norm": 0.7235966324806213, + "learning_rate": 1.294201342103532e-05, + "loss": 2.0557, + "step": 12469 + }, + { + "epoch": 0.41, + "grad_norm": 0.7397311329841614, + "learning_rate": 1.294099755144055e-05, + "loss": 2.1216, + "step": 12470 + }, + { + "epoch": 0.41, + "grad_norm": 0.7242834568023682, + "learning_rate": 1.2939981648620117e-05, + "loss": 2.0255, + "step": 12471 + }, + { + "epoch": 0.41, + "grad_norm": 0.7577300071716309, + "learning_rate": 1.2938965712585491e-05, + "loss": 2.1199, + "step": 12472 + }, + { + "epoch": 0.41, + "grad_norm": 0.7869002819061279, + "learning_rate": 1.2937949743348155e-05, + "loss": 2.0693, + "step": 12473 + }, + { + "epoch": 0.42, + "grad_norm": 0.772146463394165, + "learning_rate": 1.2936933740919582e-05, + "loss": 2.0179, + "step": 12474 + }, + { + "epoch": 0.42, + "grad_norm": 0.7702578902244568, + "learning_rate": 1.2935917705311254e-05, + "loss": 2.0312, + "step": 12475 + }, + { + "epoch": 0.42, + "grad_norm": 0.7136684060096741, + "learning_rate": 1.293490163653465e-05, + "loss": 2.0224, + "step": 12476 + }, + { + "epoch": 0.42, + "grad_norm": 0.7049131393432617, + "learning_rate": 1.2933885534601247e-05, + "loss": 2.1108, + "step": 12477 + }, + { + "epoch": 0.42, + "grad_norm": 0.7300074696540833, + "learning_rate": 1.2932869399522524e-05, + "loss": 2.0606, + "step": 12478 + }, + { + "epoch": 0.42, + "grad_norm": 0.7547006607055664, + "learning_rate": 1.2931853231309962e-05, + "loss": 2.1243, + "step": 12479 + }, + { + "epoch": 0.42, + "grad_norm": 0.7452551126480103, + "learning_rate": 1.2930837029975039e-05, + "loss": 2.1121, + "step": 12480 + }, + { + "epoch": 0.42, + "grad_norm": 0.7739254236221313, + "learning_rate": 1.2929820795529238e-05, + "loss": 2.135, + "step": 12481 + }, + { + "epoch": 0.42, + "grad_norm": 0.7449448108673096, + "learning_rate": 1.2928804527984039e-05, + "loss": 2.0873, + "step": 12482 + }, + { + "epoch": 0.42, + "grad_norm": 0.7372896075248718, + "learning_rate": 1.292778822735092e-05, + "loss": 2.0538, + "step": 12483 + }, + { + "epoch": 0.42, + "grad_norm": 0.7410143613815308, + "learning_rate": 1.2926771893641367e-05, + "loss": 2.0869, + "step": 12484 + }, + { + "epoch": 0.42, + "grad_norm": 0.7250874042510986, + "learning_rate": 1.2925755526866861e-05, + "loss": 2.1315, + "step": 12485 + }, + { + "epoch": 0.42, + "grad_norm": 0.7346044778823853, + "learning_rate": 1.2924739127038884e-05, + "loss": 2.0274, + "step": 12486 + }, + { + "epoch": 0.42, + "grad_norm": 0.7323034405708313, + "learning_rate": 1.2923722694168918e-05, + "loss": 2.1207, + "step": 12487 + }, + { + "epoch": 0.42, + "grad_norm": 0.7275946140289307, + "learning_rate": 1.292270622826845e-05, + "loss": 2.0034, + "step": 12488 + }, + { + "epoch": 0.42, + "grad_norm": 0.7204136848449707, + "learning_rate": 1.2921689729348951e-05, + "loss": 2.0823, + "step": 12489 + }, + { + "epoch": 0.42, + "grad_norm": 0.7274395823478699, + "learning_rate": 1.2920673197421922e-05, + "loss": 2.075, + "step": 12490 + }, + { + "epoch": 0.42, + "grad_norm": 0.7710362672805786, + "learning_rate": 1.2919656632498837e-05, + "loss": 2.0882, + "step": 12491 + }, + { + "epoch": 0.42, + "grad_norm": 0.752733051776886, + "learning_rate": 1.2918640034591179e-05, + "loss": 2.0999, + "step": 12492 + }, + { + "epoch": 0.42, + "grad_norm": 0.7112500071525574, + "learning_rate": 1.291762340371044e-05, + "loss": 2.0026, + "step": 12493 + }, + { + "epoch": 0.42, + "grad_norm": 0.774886965751648, + "learning_rate": 1.2916606739868098e-05, + "loss": 2.1162, + "step": 12494 + }, + { + "epoch": 0.42, + "grad_norm": 0.7454021573066711, + "learning_rate": 1.2915590043075647e-05, + "loss": 2.0428, + "step": 12495 + }, + { + "epoch": 0.42, + "grad_norm": 0.7235939502716064, + "learning_rate": 1.2914573313344568e-05, + "loss": 2.1094, + "step": 12496 + }, + { + "epoch": 0.42, + "grad_norm": 0.7358099818229675, + "learning_rate": 1.2913556550686344e-05, + "loss": 2.1168, + "step": 12497 + }, + { + "epoch": 0.42, + "grad_norm": 0.7191984057426453, + "learning_rate": 1.2912539755112468e-05, + "loss": 2.0134, + "step": 12498 + }, + { + "epoch": 0.42, + "grad_norm": 0.7245826125144958, + "learning_rate": 1.2911522926634427e-05, + "loss": 2.1307, + "step": 12499 + }, + { + "epoch": 0.42, + "grad_norm": 0.749428391456604, + "learning_rate": 1.2910506065263701e-05, + "loss": 2.1134, + "step": 12500 + }, + { + "epoch": 0.42, + "grad_norm": 0.7430476546287537, + "learning_rate": 1.2909489171011789e-05, + "loss": 2.0918, + "step": 12501 + }, + { + "epoch": 0.42, + "grad_norm": 0.7384743690490723, + "learning_rate": 1.2908472243890166e-05, + "loss": 2.0688, + "step": 12502 + }, + { + "epoch": 0.42, + "grad_norm": 0.7587735652923584, + "learning_rate": 1.2907455283910333e-05, + "loss": 2.0363, + "step": 12503 + }, + { + "epoch": 0.42, + "grad_norm": 0.7803107500076294, + "learning_rate": 1.2906438291083777e-05, + "loss": 1.9772, + "step": 12504 + }, + { + "epoch": 0.42, + "grad_norm": 0.7342994213104248, + "learning_rate": 1.2905421265421978e-05, + "loss": 2.1213, + "step": 12505 + }, + { + "epoch": 0.42, + "grad_norm": 0.7121531963348389, + "learning_rate": 1.2904404206936438e-05, + "loss": 2.0547, + "step": 12506 + }, + { + "epoch": 0.42, + "grad_norm": 0.7410596013069153, + "learning_rate": 1.2903387115638639e-05, + "loss": 2.1044, + "step": 12507 + }, + { + "epoch": 0.42, + "grad_norm": 0.7894978523254395, + "learning_rate": 1.2902369991540074e-05, + "loss": 2.0863, + "step": 12508 + }, + { + "epoch": 0.42, + "grad_norm": 0.7759518027305603, + "learning_rate": 1.2901352834652233e-05, + "loss": 2.0598, + "step": 12509 + }, + { + "epoch": 0.42, + "grad_norm": 0.6977453827857971, + "learning_rate": 1.2900335644986607e-05, + "loss": 2.0912, + "step": 12510 + }, + { + "epoch": 0.42, + "grad_norm": 0.7241869568824768, + "learning_rate": 1.2899318422554693e-05, + "loss": 2.0525, + "step": 12511 + }, + { + "epoch": 0.42, + "grad_norm": 0.7618778944015503, + "learning_rate": 1.2898301167367977e-05, + "loss": 2.1334, + "step": 12512 + }, + { + "epoch": 0.42, + "grad_norm": 0.7546077370643616, + "learning_rate": 1.2897283879437949e-05, + "loss": 2.1207, + "step": 12513 + }, + { + "epoch": 0.42, + "grad_norm": 0.7609128355979919, + "learning_rate": 1.289626655877611e-05, + "loss": 2.1855, + "step": 12514 + }, + { + "epoch": 0.42, + "grad_norm": 0.7545419335365295, + "learning_rate": 1.2895249205393947e-05, + "loss": 2.07, + "step": 12515 + }, + { + "epoch": 0.42, + "grad_norm": 0.7712535858154297, + "learning_rate": 1.2894231819302953e-05, + "loss": 2.0833, + "step": 12516 + }, + { + "epoch": 0.42, + "grad_norm": 0.7289283275604248, + "learning_rate": 1.289321440051463e-05, + "loss": 2.078, + "step": 12517 + }, + { + "epoch": 0.42, + "grad_norm": 0.7323263883590698, + "learning_rate": 1.2892196949040463e-05, + "loss": 2.126, + "step": 12518 + }, + { + "epoch": 0.42, + "grad_norm": 0.7540423274040222, + "learning_rate": 1.289117946489195e-05, + "loss": 2.1426, + "step": 12519 + }, + { + "epoch": 0.42, + "grad_norm": 0.7753336429595947, + "learning_rate": 1.2890161948080587e-05, + "loss": 2.1253, + "step": 12520 + }, + { + "epoch": 0.42, + "grad_norm": 0.7441282272338867, + "learning_rate": 1.2889144398617866e-05, + "loss": 2.0572, + "step": 12521 + }, + { + "epoch": 0.42, + "grad_norm": 0.8003175854682922, + "learning_rate": 1.2888126816515286e-05, + "loss": 2.1587, + "step": 12522 + }, + { + "epoch": 0.42, + "grad_norm": 0.7630079984664917, + "learning_rate": 1.288710920178434e-05, + "loss": 2.1268, + "step": 12523 + }, + { + "epoch": 0.42, + "grad_norm": 0.7639109492301941, + "learning_rate": 1.2886091554436528e-05, + "loss": 2.1387, + "step": 12524 + }, + { + "epoch": 0.42, + "grad_norm": 0.7448859810829163, + "learning_rate": 1.2885073874483345e-05, + "loss": 2.069, + "step": 12525 + }, + { + "epoch": 0.42, + "grad_norm": 0.7317858338356018, + "learning_rate": 1.288405616193629e-05, + "loss": 2.1252, + "step": 12526 + }, + { + "epoch": 0.42, + "grad_norm": 0.7437266707420349, + "learning_rate": 1.2883038416806852e-05, + "loss": 2.1007, + "step": 12527 + }, + { + "epoch": 0.42, + "grad_norm": 0.7373574376106262, + "learning_rate": 1.2882020639106543e-05, + "loss": 2.0574, + "step": 12528 + }, + { + "epoch": 0.42, + "grad_norm": 0.7733877301216125, + "learning_rate": 1.2881002828846851e-05, + "loss": 2.1218, + "step": 12529 + }, + { + "epoch": 0.42, + "grad_norm": 0.7624263763427734, + "learning_rate": 1.2879984986039278e-05, + "loss": 2.1774, + "step": 12530 + }, + { + "epoch": 0.42, + "grad_norm": 0.7640353441238403, + "learning_rate": 1.2878967110695322e-05, + "loss": 2.0998, + "step": 12531 + }, + { + "epoch": 0.42, + "grad_norm": 0.7497876286506653, + "learning_rate": 1.2877949202826483e-05, + "loss": 2.1312, + "step": 12532 + }, + { + "epoch": 0.42, + "grad_norm": 0.7114462852478027, + "learning_rate": 1.2876931262444262e-05, + "loss": 2.0907, + "step": 12533 + }, + { + "epoch": 0.42, + "grad_norm": 0.7434590458869934, + "learning_rate": 1.2875913289560153e-05, + "loss": 2.125, + "step": 12534 + }, + { + "epoch": 0.42, + "grad_norm": 0.7647614479064941, + "learning_rate": 1.2874895284185665e-05, + "loss": 2.1343, + "step": 12535 + }, + { + "epoch": 0.42, + "grad_norm": 0.7234348654747009, + "learning_rate": 1.2873877246332293e-05, + "loss": 2.1097, + "step": 12536 + }, + { + "epoch": 0.42, + "grad_norm": 0.7357848286628723, + "learning_rate": 1.2872859176011545e-05, + "loss": 2.0357, + "step": 12537 + }, + { + "epoch": 0.42, + "grad_norm": 0.7342125177383423, + "learning_rate": 1.2871841073234909e-05, + "loss": 2.0857, + "step": 12538 + }, + { + "epoch": 0.42, + "grad_norm": 0.7367901802062988, + "learning_rate": 1.2870822938013905e-05, + "loss": 2.0725, + "step": 12539 + }, + { + "epoch": 0.42, + "grad_norm": 0.7597434520721436, + "learning_rate": 1.2869804770360022e-05, + "loss": 2.0708, + "step": 12540 + }, + { + "epoch": 0.42, + "grad_norm": 0.7530860900878906, + "learning_rate": 1.2868786570284764e-05, + "loss": 2.1025, + "step": 12541 + }, + { + "epoch": 0.42, + "grad_norm": 0.7283613681793213, + "learning_rate": 1.286776833779964e-05, + "loss": 2.0568, + "step": 12542 + }, + { + "epoch": 0.42, + "grad_norm": 0.7463346719741821, + "learning_rate": 1.2866750072916147e-05, + "loss": 2.1213, + "step": 12543 + }, + { + "epoch": 0.42, + "grad_norm": 0.7241556644439697, + "learning_rate": 1.2865731775645794e-05, + "loss": 2.0233, + "step": 12544 + }, + { + "epoch": 0.42, + "grad_norm": 0.7484334111213684, + "learning_rate": 1.2864713446000082e-05, + "loss": 2.1057, + "step": 12545 + }, + { + "epoch": 0.42, + "grad_norm": 0.7291292548179626, + "learning_rate": 1.2863695083990515e-05, + "loss": 2.0629, + "step": 12546 + }, + { + "epoch": 0.42, + "grad_norm": 0.7417165637016296, + "learning_rate": 1.2862676689628602e-05, + "loss": 2.0462, + "step": 12547 + }, + { + "epoch": 0.42, + "grad_norm": 0.7339032292366028, + "learning_rate": 1.2861658262925846e-05, + "loss": 2.0988, + "step": 12548 + }, + { + "epoch": 0.42, + "grad_norm": 0.7546550035476685, + "learning_rate": 1.286063980389375e-05, + "loss": 2.074, + "step": 12549 + }, + { + "epoch": 0.42, + "grad_norm": 0.7336243391036987, + "learning_rate": 1.2859621312543821e-05, + "loss": 2.0354, + "step": 12550 + }, + { + "epoch": 0.42, + "grad_norm": 0.7206490635871887, + "learning_rate": 1.2858602788887569e-05, + "loss": 2.0985, + "step": 12551 + }, + { + "epoch": 0.42, + "grad_norm": 0.7196187973022461, + "learning_rate": 1.2857584232936498e-05, + "loss": 2.1298, + "step": 12552 + }, + { + "epoch": 0.42, + "grad_norm": 0.7507184743881226, + "learning_rate": 1.2856565644702112e-05, + "loss": 2.0732, + "step": 12553 + }, + { + "epoch": 0.42, + "grad_norm": 0.7262274026870728, + "learning_rate": 1.2855547024195922e-05, + "loss": 2.0221, + "step": 12554 + }, + { + "epoch": 0.42, + "grad_norm": 0.703575849533081, + "learning_rate": 1.2854528371429438e-05, + "loss": 2.0877, + "step": 12555 + }, + { + "epoch": 0.42, + "grad_norm": 0.7684732675552368, + "learning_rate": 1.2853509686414163e-05, + "loss": 2.1358, + "step": 12556 + }, + { + "epoch": 0.42, + "grad_norm": 0.7479212284088135, + "learning_rate": 1.2852490969161609e-05, + "loss": 2.0925, + "step": 12557 + }, + { + "epoch": 0.42, + "grad_norm": 0.7670333385467529, + "learning_rate": 1.2851472219683283e-05, + "loss": 2.1338, + "step": 12558 + }, + { + "epoch": 0.42, + "grad_norm": 0.7229040861129761, + "learning_rate": 1.2850453437990698e-05, + "loss": 2.1174, + "step": 12559 + }, + { + "epoch": 0.42, + "grad_norm": 0.7548657655715942, + "learning_rate": 1.2849434624095357e-05, + "loss": 2.0903, + "step": 12560 + }, + { + "epoch": 0.42, + "grad_norm": 0.7260783314704895, + "learning_rate": 1.2848415778008776e-05, + "loss": 2.1035, + "step": 12561 + }, + { + "epoch": 0.42, + "grad_norm": 0.767126202583313, + "learning_rate": 1.284739689974246e-05, + "loss": 2.1313, + "step": 12562 + }, + { + "epoch": 0.42, + "grad_norm": 0.7501829862594604, + "learning_rate": 1.2846377989307923e-05, + "loss": 2.0471, + "step": 12563 + }, + { + "epoch": 0.42, + "grad_norm": 0.7142001986503601, + "learning_rate": 1.2845359046716676e-05, + "loss": 2.1107, + "step": 12564 + }, + { + "epoch": 0.42, + "grad_norm": 0.7691676616668701, + "learning_rate": 1.2844340071980232e-05, + "loss": 2.124, + "step": 12565 + }, + { + "epoch": 0.42, + "grad_norm": 0.7425621747970581, + "learning_rate": 1.28433210651101e-05, + "loss": 2.0082, + "step": 12566 + }, + { + "epoch": 0.42, + "grad_norm": 0.7841475605964661, + "learning_rate": 1.2842302026117793e-05, + "loss": 2.0725, + "step": 12567 + }, + { + "epoch": 0.42, + "grad_norm": 0.726830244064331, + "learning_rate": 1.2841282955014819e-05, + "loss": 2.1484, + "step": 12568 + }, + { + "epoch": 0.42, + "grad_norm": 0.717081606388092, + "learning_rate": 1.28402638518127e-05, + "loss": 2.1174, + "step": 12569 + }, + { + "epoch": 0.42, + "grad_norm": 0.7229527831077576, + "learning_rate": 1.2839244716522947e-05, + "loss": 2.1188, + "step": 12570 + }, + { + "epoch": 0.42, + "grad_norm": 0.7493440508842468, + "learning_rate": 1.2838225549157066e-05, + "loss": 2.022, + "step": 12571 + }, + { + "epoch": 0.42, + "grad_norm": 0.7204367518424988, + "learning_rate": 1.2837206349726578e-05, + "loss": 2.0566, + "step": 12572 + }, + { + "epoch": 0.42, + "grad_norm": 0.7157355546951294, + "learning_rate": 1.2836187118242998e-05, + "loss": 2.0886, + "step": 12573 + }, + { + "epoch": 0.42, + "grad_norm": 0.7666046619415283, + "learning_rate": 1.2835167854717833e-05, + "loss": 2.1545, + "step": 12574 + }, + { + "epoch": 0.42, + "grad_norm": 0.7181825041770935, + "learning_rate": 1.2834148559162608e-05, + "loss": 2.0887, + "step": 12575 + }, + { + "epoch": 0.42, + "grad_norm": 0.7197605967521667, + "learning_rate": 1.283312923158883e-05, + "loss": 2.1795, + "step": 12576 + }, + { + "epoch": 0.42, + "grad_norm": 0.7486099004745483, + "learning_rate": 1.283210987200802e-05, + "loss": 2.107, + "step": 12577 + }, + { + "epoch": 0.42, + "grad_norm": 0.7456390857696533, + "learning_rate": 1.2831090480431691e-05, + "loss": 2.1254, + "step": 12578 + }, + { + "epoch": 0.42, + "grad_norm": 0.7212749719619751, + "learning_rate": 1.2830071056871363e-05, + "loss": 2.1212, + "step": 12579 + }, + { + "epoch": 0.42, + "grad_norm": 0.7251642942428589, + "learning_rate": 1.2829051601338549e-05, + "loss": 2.0992, + "step": 12580 + }, + { + "epoch": 0.42, + "grad_norm": 0.7159357666969299, + "learning_rate": 1.2828032113844771e-05, + "loss": 2.0818, + "step": 12581 + }, + { + "epoch": 0.42, + "grad_norm": 0.7305713295936584, + "learning_rate": 1.2827012594401538e-05, + "loss": 2.1023, + "step": 12582 + }, + { + "epoch": 0.42, + "grad_norm": 0.7207646369934082, + "learning_rate": 1.282599304302038e-05, + "loss": 2.0929, + "step": 12583 + }, + { + "epoch": 0.42, + "grad_norm": 0.7722252607345581, + "learning_rate": 1.2824973459712803e-05, + "loss": 2.1228, + "step": 12584 + }, + { + "epoch": 0.42, + "grad_norm": 0.6945728659629822, + "learning_rate": 1.2823953844490335e-05, + "loss": 2.123, + "step": 12585 + }, + { + "epoch": 0.42, + "grad_norm": 0.7388607859611511, + "learning_rate": 1.2822934197364491e-05, + "loss": 2.0776, + "step": 12586 + }, + { + "epoch": 0.42, + "grad_norm": 0.7294972538948059, + "learning_rate": 1.282191451834679e-05, + "loss": 2.0525, + "step": 12587 + }, + { + "epoch": 0.42, + "grad_norm": 0.7348666191101074, + "learning_rate": 1.2820894807448751e-05, + "loss": 2.1468, + "step": 12588 + }, + { + "epoch": 0.42, + "grad_norm": 0.6959039568901062, + "learning_rate": 1.28198750646819e-05, + "loss": 2.1045, + "step": 12589 + }, + { + "epoch": 0.42, + "grad_norm": 0.753452479839325, + "learning_rate": 1.281885529005775e-05, + "loss": 2.1281, + "step": 12590 + }, + { + "epoch": 0.42, + "grad_norm": 0.744562566280365, + "learning_rate": 1.2817835483587827e-05, + "loss": 2.1277, + "step": 12591 + }, + { + "epoch": 0.42, + "grad_norm": 0.7442547678947449, + "learning_rate": 1.2816815645283648e-05, + "loss": 2.0969, + "step": 12592 + }, + { + "epoch": 0.42, + "grad_norm": 0.735022783279419, + "learning_rate": 1.2815795775156736e-05, + "loss": 2.0781, + "step": 12593 + }, + { + "epoch": 0.42, + "grad_norm": 0.7144117951393127, + "learning_rate": 1.2814775873218616e-05, + "loss": 2.1296, + "step": 12594 + }, + { + "epoch": 0.42, + "grad_norm": 0.7010131478309631, + "learning_rate": 1.2813755939480808e-05, + "loss": 2.1471, + "step": 12595 + }, + { + "epoch": 0.42, + "grad_norm": 0.6953817009925842, + "learning_rate": 1.2812735973954832e-05, + "loss": 2.0686, + "step": 12596 + }, + { + "epoch": 0.42, + "grad_norm": 0.758838951587677, + "learning_rate": 1.2811715976652215e-05, + "loss": 2.117, + "step": 12597 + }, + { + "epoch": 0.42, + "grad_norm": 0.7543825507164001, + "learning_rate": 1.2810695947584478e-05, + "loss": 2.0829, + "step": 12598 + }, + { + "epoch": 0.42, + "grad_norm": 0.7288152575492859, + "learning_rate": 1.2809675886763147e-05, + "loss": 2.1239, + "step": 12599 + }, + { + "epoch": 0.42, + "grad_norm": 0.8003158569335938, + "learning_rate": 1.2808655794199743e-05, + "loss": 2.0807, + "step": 12600 + }, + { + "epoch": 0.42, + "grad_norm": 0.7143562436103821, + "learning_rate": 1.2807635669905791e-05, + "loss": 2.0931, + "step": 12601 + }, + { + "epoch": 0.42, + "grad_norm": 0.7711488604545593, + "learning_rate": 1.2806615513892817e-05, + "loss": 2.1051, + "step": 12602 + }, + { + "epoch": 0.42, + "grad_norm": 0.7360863089561462, + "learning_rate": 1.2805595326172347e-05, + "loss": 2.1104, + "step": 12603 + }, + { + "epoch": 0.42, + "grad_norm": 0.7283881306648254, + "learning_rate": 1.2804575106755905e-05, + "loss": 2.1218, + "step": 12604 + }, + { + "epoch": 0.42, + "grad_norm": 0.7559407353401184, + "learning_rate": 1.2803554855655019e-05, + "loss": 2.1171, + "step": 12605 + }, + { + "epoch": 0.42, + "grad_norm": 0.742595911026001, + "learning_rate": 1.280253457288121e-05, + "loss": 2.1065, + "step": 12606 + }, + { + "epoch": 0.42, + "grad_norm": 0.759644627571106, + "learning_rate": 1.2801514258446006e-05, + "loss": 2.0869, + "step": 12607 + }, + { + "epoch": 0.42, + "grad_norm": 0.7000303268432617, + "learning_rate": 1.2800493912360942e-05, + "loss": 2.052, + "step": 12608 + }, + { + "epoch": 0.42, + "grad_norm": 0.7211377620697021, + "learning_rate": 1.2799473534637535e-05, + "loss": 2.1428, + "step": 12609 + }, + { + "epoch": 0.42, + "grad_norm": 0.73008131980896, + "learning_rate": 1.2798453125287317e-05, + "loss": 2.1007, + "step": 12610 + }, + { + "epoch": 0.42, + "grad_norm": 0.7594427466392517, + "learning_rate": 1.2797432684321818e-05, + "loss": 2.1128, + "step": 12611 + }, + { + "epoch": 0.42, + "grad_norm": 0.7525646686553955, + "learning_rate": 1.279641221175256e-05, + "loss": 2.1618, + "step": 12612 + }, + { + "epoch": 0.42, + "grad_norm": 0.7373979687690735, + "learning_rate": 1.2795391707591078e-05, + "loss": 2.1091, + "step": 12613 + }, + { + "epoch": 0.42, + "grad_norm": 0.7182112336158752, + "learning_rate": 1.2794371171848899e-05, + "loss": 2.1101, + "step": 12614 + }, + { + "epoch": 0.42, + "grad_norm": 0.7231495380401611, + "learning_rate": 1.2793350604537552e-05, + "loss": 2.1376, + "step": 12615 + }, + { + "epoch": 0.42, + "grad_norm": 0.7288999557495117, + "learning_rate": 1.2792330005668568e-05, + "loss": 2.0933, + "step": 12616 + }, + { + "epoch": 0.42, + "grad_norm": 0.717610239982605, + "learning_rate": 1.2791309375253472e-05, + "loss": 2.0764, + "step": 12617 + }, + { + "epoch": 0.42, + "grad_norm": 0.7496227025985718, + "learning_rate": 1.2790288713303803e-05, + "loss": 2.1384, + "step": 12618 + }, + { + "epoch": 0.42, + "grad_norm": 0.7234798073768616, + "learning_rate": 1.2789268019831085e-05, + "loss": 2.0489, + "step": 12619 + }, + { + "epoch": 0.42, + "grad_norm": 0.7556368708610535, + "learning_rate": 1.2788247294846849e-05, + "loss": 2.1336, + "step": 12620 + }, + { + "epoch": 0.42, + "grad_norm": 0.7156413793563843, + "learning_rate": 1.2787226538362636e-05, + "loss": 2.1565, + "step": 12621 + }, + { + "epoch": 0.42, + "grad_norm": 0.6893162727355957, + "learning_rate": 1.2786205750389966e-05, + "loss": 2.0671, + "step": 12622 + }, + { + "epoch": 0.42, + "grad_norm": 0.7370979189872742, + "learning_rate": 1.2785184930940377e-05, + "loss": 2.095, + "step": 12623 + }, + { + "epoch": 0.42, + "grad_norm": 0.7006845474243164, + "learning_rate": 1.2784164080025403e-05, + "loss": 2.0302, + "step": 12624 + }, + { + "epoch": 0.42, + "grad_norm": 0.7243999242782593, + "learning_rate": 1.2783143197656574e-05, + "loss": 2.1083, + "step": 12625 + }, + { + "epoch": 0.42, + "grad_norm": 0.7104041576385498, + "learning_rate": 1.2782122283845424e-05, + "loss": 2.0912, + "step": 12626 + }, + { + "epoch": 0.42, + "grad_norm": 0.7446256279945374, + "learning_rate": 1.2781101338603487e-05, + "loss": 2.1065, + "step": 12627 + }, + { + "epoch": 0.42, + "grad_norm": 0.7413390874862671, + "learning_rate": 1.2780080361942295e-05, + "loss": 2.1584, + "step": 12628 + }, + { + "epoch": 0.42, + "grad_norm": 0.7150620818138123, + "learning_rate": 1.2779059353873385e-05, + "loss": 2.0622, + "step": 12629 + }, + { + "epoch": 0.42, + "grad_norm": 0.7796303033828735, + "learning_rate": 1.2778038314408294e-05, + "loss": 2.1759, + "step": 12630 + }, + { + "epoch": 0.42, + "grad_norm": 0.7301186919212341, + "learning_rate": 1.2777017243558549e-05, + "loss": 2.0812, + "step": 12631 + }, + { + "epoch": 0.42, + "grad_norm": 0.7088884711265564, + "learning_rate": 1.2775996141335697e-05, + "loss": 2.0692, + "step": 12632 + }, + { + "epoch": 0.42, + "grad_norm": 0.7162004113197327, + "learning_rate": 1.2774975007751265e-05, + "loss": 2.0777, + "step": 12633 + }, + { + "epoch": 0.42, + "grad_norm": 0.712647557258606, + "learning_rate": 1.2773953842816791e-05, + "loss": 2.042, + "step": 12634 + }, + { + "epoch": 0.42, + "grad_norm": 0.7210012078285217, + "learning_rate": 1.2772932646543811e-05, + "loss": 2.0795, + "step": 12635 + }, + { + "epoch": 0.42, + "grad_norm": 0.7615554928779602, + "learning_rate": 1.2771911418943865e-05, + "loss": 2.101, + "step": 12636 + }, + { + "epoch": 0.42, + "grad_norm": 0.7578179240226746, + "learning_rate": 1.2770890160028486e-05, + "loss": 2.0935, + "step": 12637 + }, + { + "epoch": 0.42, + "grad_norm": 0.7398737072944641, + "learning_rate": 1.2769868869809216e-05, + "loss": 2.0496, + "step": 12638 + }, + { + "epoch": 0.42, + "grad_norm": 0.7361347675323486, + "learning_rate": 1.2768847548297592e-05, + "loss": 2.1154, + "step": 12639 + }, + { + "epoch": 0.42, + "grad_norm": 0.7262876629829407, + "learning_rate": 1.2767826195505148e-05, + "loss": 2.0848, + "step": 12640 + }, + { + "epoch": 0.42, + "grad_norm": 0.7332879900932312, + "learning_rate": 1.276680481144343e-05, + "loss": 2.0867, + "step": 12641 + }, + { + "epoch": 0.42, + "grad_norm": 0.7368112802505493, + "learning_rate": 1.2765783396123968e-05, + "loss": 2.0892, + "step": 12642 + }, + { + "epoch": 0.42, + "grad_norm": 0.7189192175865173, + "learning_rate": 1.2764761949558308e-05, + "loss": 2.0955, + "step": 12643 + }, + { + "epoch": 0.42, + "grad_norm": 0.7539229989051819, + "learning_rate": 1.2763740471757989e-05, + "loss": 2.0989, + "step": 12644 + }, + { + "epoch": 0.42, + "grad_norm": 0.748955488204956, + "learning_rate": 1.2762718962734548e-05, + "loss": 2.07, + "step": 12645 + }, + { + "epoch": 0.42, + "grad_norm": 0.7497884035110474, + "learning_rate": 1.2761697422499528e-05, + "loss": 2.0855, + "step": 12646 + }, + { + "epoch": 0.42, + "grad_norm": 0.7217252254486084, + "learning_rate": 1.2760675851064468e-05, + "loss": 2.0618, + "step": 12647 + }, + { + "epoch": 0.42, + "grad_norm": 0.7172107696533203, + "learning_rate": 1.2759654248440911e-05, + "loss": 2.0552, + "step": 12648 + }, + { + "epoch": 0.42, + "grad_norm": 0.7179107069969177, + "learning_rate": 1.2758632614640398e-05, + "loss": 2.1127, + "step": 12649 + }, + { + "epoch": 0.42, + "grad_norm": 0.7248226404190063, + "learning_rate": 1.275761094967447e-05, + "loss": 2.0177, + "step": 12650 + }, + { + "epoch": 0.42, + "grad_norm": 0.7487012147903442, + "learning_rate": 1.275658925355467e-05, + "loss": 2.1302, + "step": 12651 + }, + { + "epoch": 0.42, + "grad_norm": 0.7389917969703674, + "learning_rate": 1.2755567526292541e-05, + "loss": 2.1286, + "step": 12652 + }, + { + "epoch": 0.42, + "grad_norm": 0.7343252897262573, + "learning_rate": 1.2754545767899622e-05, + "loss": 2.0585, + "step": 12653 + }, + { + "epoch": 0.42, + "grad_norm": 0.7418295741081238, + "learning_rate": 1.2753523978387463e-05, + "loss": 2.1479, + "step": 12654 + }, + { + "epoch": 0.42, + "grad_norm": 0.7431178689002991, + "learning_rate": 1.2752502157767604e-05, + "loss": 2.1084, + "step": 12655 + }, + { + "epoch": 0.42, + "grad_norm": 0.7074499130249023, + "learning_rate": 1.2751480306051584e-05, + "loss": 2.1276, + "step": 12656 + }, + { + "epoch": 0.42, + "grad_norm": 0.712332546710968, + "learning_rate": 1.2750458423250955e-05, + "loss": 2.1186, + "step": 12657 + }, + { + "epoch": 0.42, + "grad_norm": 0.7484898567199707, + "learning_rate": 1.274943650937726e-05, + "loss": 2.012, + "step": 12658 + }, + { + "epoch": 0.42, + "grad_norm": 0.7371085286140442, + "learning_rate": 1.2748414564442039e-05, + "loss": 2.0341, + "step": 12659 + }, + { + "epoch": 0.42, + "grad_norm": 0.7543914318084717, + "learning_rate": 1.2747392588456844e-05, + "loss": 2.1985, + "step": 12660 + }, + { + "epoch": 0.42, + "grad_norm": 0.7225934863090515, + "learning_rate": 1.2746370581433215e-05, + "loss": 2.0547, + "step": 12661 + }, + { + "epoch": 0.42, + "grad_norm": 0.7660501599311829, + "learning_rate": 1.2745348543382702e-05, + "loss": 2.0575, + "step": 12662 + }, + { + "epoch": 0.42, + "grad_norm": 0.7421630024909973, + "learning_rate": 1.2744326474316853e-05, + "loss": 2.1751, + "step": 12663 + }, + { + "epoch": 0.42, + "grad_norm": 0.7121289372444153, + "learning_rate": 1.2743304374247208e-05, + "loss": 2.0713, + "step": 12664 + }, + { + "epoch": 0.42, + "grad_norm": 0.737197995185852, + "learning_rate": 1.2742282243185322e-05, + "loss": 2.1301, + "step": 12665 + }, + { + "epoch": 0.42, + "grad_norm": 0.740676760673523, + "learning_rate": 1.2741260081142734e-05, + "loss": 2.1065, + "step": 12666 + }, + { + "epoch": 0.42, + "grad_norm": 0.7201475501060486, + "learning_rate": 1.2740237888130998e-05, + "loss": 2.0979, + "step": 12667 + }, + { + "epoch": 0.42, + "grad_norm": 0.7298979759216309, + "learning_rate": 1.2739215664161659e-05, + "loss": 2.0547, + "step": 12668 + }, + { + "epoch": 0.42, + "grad_norm": 0.7261883020401001, + "learning_rate": 1.273819340924627e-05, + "loss": 2.0599, + "step": 12669 + }, + { + "epoch": 0.42, + "grad_norm": 0.7303823232650757, + "learning_rate": 1.2737171123396373e-05, + "loss": 2.0918, + "step": 12670 + }, + { + "epoch": 0.42, + "grad_norm": 0.7302249073982239, + "learning_rate": 1.2736148806623522e-05, + "loss": 2.0722, + "step": 12671 + }, + { + "epoch": 0.42, + "grad_norm": 0.7708373069763184, + "learning_rate": 1.2735126458939265e-05, + "loss": 2.138, + "step": 12672 + }, + { + "epoch": 0.42, + "grad_norm": 0.720122754573822, + "learning_rate": 1.2734104080355153e-05, + "loss": 2.1061, + "step": 12673 + }, + { + "epoch": 0.42, + "grad_norm": 0.7400721311569214, + "learning_rate": 1.2733081670882737e-05, + "loss": 2.0741, + "step": 12674 + }, + { + "epoch": 0.42, + "grad_norm": 0.7440530061721802, + "learning_rate": 1.2732059230533561e-05, + "loss": 2.0882, + "step": 12675 + }, + { + "epoch": 0.42, + "grad_norm": 0.754288911819458, + "learning_rate": 1.2731036759319186e-05, + "loss": 2.0258, + "step": 12676 + }, + { + "epoch": 0.42, + "grad_norm": 0.7512241005897522, + "learning_rate": 1.2730014257251155e-05, + "loss": 2.0688, + "step": 12677 + }, + { + "epoch": 0.42, + "grad_norm": 0.7493742108345032, + "learning_rate": 1.2728991724341024e-05, + "loss": 2.0567, + "step": 12678 + }, + { + "epoch": 0.42, + "grad_norm": 0.7561684846878052, + "learning_rate": 1.2727969160600346e-05, + "loss": 2.0758, + "step": 12679 + }, + { + "epoch": 0.42, + "grad_norm": 0.7259445190429688, + "learning_rate": 1.2726946566040668e-05, + "loss": 2.1686, + "step": 12680 + }, + { + "epoch": 0.42, + "grad_norm": 0.7286955714225769, + "learning_rate": 1.2725923940673548e-05, + "loss": 2.0844, + "step": 12681 + }, + { + "epoch": 0.42, + "grad_norm": 0.7230685353279114, + "learning_rate": 1.2724901284510535e-05, + "loss": 2.016, + "step": 12682 + }, + { + "epoch": 0.42, + "grad_norm": 0.7834388613700867, + "learning_rate": 1.2723878597563186e-05, + "loss": 2.0878, + "step": 12683 + }, + { + "epoch": 0.42, + "grad_norm": 0.6985279321670532, + "learning_rate": 1.272285587984305e-05, + "loss": 2.0895, + "step": 12684 + }, + { + "epoch": 0.42, + "grad_norm": 0.7152412533760071, + "learning_rate": 1.2721833131361689e-05, + "loss": 2.0616, + "step": 12685 + }, + { + "epoch": 0.42, + "grad_norm": 0.7252992987632751, + "learning_rate": 1.2720810352130649e-05, + "loss": 2.1278, + "step": 12686 + }, + { + "epoch": 0.42, + "grad_norm": 0.7453857064247131, + "learning_rate": 1.271978754216149e-05, + "loss": 2.1403, + "step": 12687 + }, + { + "epoch": 0.42, + "grad_norm": 0.7167844176292419, + "learning_rate": 1.2718764701465762e-05, + "loss": 2.0571, + "step": 12688 + }, + { + "epoch": 0.42, + "grad_norm": 0.6989026665687561, + "learning_rate": 1.2717741830055026e-05, + "loss": 2.0639, + "step": 12689 + }, + { + "epoch": 0.42, + "grad_norm": 0.7144736647605896, + "learning_rate": 1.2716718927940837e-05, + "loss": 2.0657, + "step": 12690 + }, + { + "epoch": 0.42, + "grad_norm": 0.7150135636329651, + "learning_rate": 1.2715695995134744e-05, + "loss": 2.0971, + "step": 12691 + }, + { + "epoch": 0.42, + "grad_norm": 0.7528398633003235, + "learning_rate": 1.2714673031648317e-05, + "loss": 2.0996, + "step": 12692 + }, + { + "epoch": 0.42, + "grad_norm": 0.7265036702156067, + "learning_rate": 1.2713650037493102e-05, + "loss": 2.0359, + "step": 12693 + }, + { + "epoch": 0.42, + "grad_norm": 0.7068057656288147, + "learning_rate": 1.2712627012680656e-05, + "loss": 2.0662, + "step": 12694 + }, + { + "epoch": 0.42, + "grad_norm": 0.7649387121200562, + "learning_rate": 1.2711603957222542e-05, + "loss": 2.0895, + "step": 12695 + }, + { + "epoch": 0.42, + "grad_norm": 0.722756564617157, + "learning_rate": 1.271058087113032e-05, + "loss": 2.0955, + "step": 12696 + }, + { + "epoch": 0.42, + "grad_norm": 0.7174360752105713, + "learning_rate": 1.2709557754415536e-05, + "loss": 2.1431, + "step": 12697 + }, + { + "epoch": 0.42, + "grad_norm": 0.7663110494613647, + "learning_rate": 1.2708534607089762e-05, + "loss": 2.1089, + "step": 12698 + }, + { + "epoch": 0.42, + "grad_norm": 0.7197758555412292, + "learning_rate": 1.270751142916455e-05, + "loss": 2.101, + "step": 12699 + }, + { + "epoch": 0.42, + "grad_norm": 0.7626747488975525, + "learning_rate": 1.2706488220651458e-05, + "loss": 2.115, + "step": 12700 + }, + { + "epoch": 0.42, + "grad_norm": 0.7441595792770386, + "learning_rate": 1.2705464981562053e-05, + "loss": 2.1371, + "step": 12701 + }, + { + "epoch": 0.42, + "grad_norm": 0.7302168011665344, + "learning_rate": 1.2704441711907887e-05, + "loss": 2.0224, + "step": 12702 + }, + { + "epoch": 0.42, + "grad_norm": 0.7157600522041321, + "learning_rate": 1.2703418411700525e-05, + "loss": 2.0739, + "step": 12703 + }, + { + "epoch": 0.42, + "grad_norm": 0.7158370614051819, + "learning_rate": 1.2702395080951523e-05, + "loss": 2.1041, + "step": 12704 + }, + { + "epoch": 0.42, + "grad_norm": 0.7379342913627625, + "learning_rate": 1.270137171967245e-05, + "loss": 2.056, + "step": 12705 + }, + { + "epoch": 0.42, + "grad_norm": 0.7154335379600525, + "learning_rate": 1.270034832787486e-05, + "loss": 2.1051, + "step": 12706 + }, + { + "epoch": 0.42, + "grad_norm": 0.7598645091056824, + "learning_rate": 1.2699324905570316e-05, + "loss": 2.0422, + "step": 12707 + }, + { + "epoch": 0.42, + "grad_norm": 0.7315258979797363, + "learning_rate": 1.2698301452770382e-05, + "loss": 2.1084, + "step": 12708 + }, + { + "epoch": 0.42, + "grad_norm": 0.722845733165741, + "learning_rate": 1.2697277969486623e-05, + "loss": 2.1356, + "step": 12709 + }, + { + "epoch": 0.42, + "grad_norm": 0.7215485572814941, + "learning_rate": 1.2696254455730592e-05, + "loss": 2.0826, + "step": 12710 + }, + { + "epoch": 0.42, + "grad_norm": 0.711340069770813, + "learning_rate": 1.2695230911513861e-05, + "loss": 2.1298, + "step": 12711 + }, + { + "epoch": 0.42, + "grad_norm": 0.7505236268043518, + "learning_rate": 1.2694207336847995e-05, + "loss": 2.0924, + "step": 12712 + }, + { + "epoch": 0.42, + "grad_norm": 0.7219468355178833, + "learning_rate": 1.2693183731744547e-05, + "loss": 2.1006, + "step": 12713 + }, + { + "epoch": 0.42, + "grad_norm": 0.720144510269165, + "learning_rate": 1.2692160096215092e-05, + "loss": 2.1187, + "step": 12714 + }, + { + "epoch": 0.42, + "grad_norm": 0.7063847780227661, + "learning_rate": 1.2691136430271187e-05, + "loss": 2.1038, + "step": 12715 + }, + { + "epoch": 0.42, + "grad_norm": 0.7389441132545471, + "learning_rate": 1.2690112733924403e-05, + "loss": 2.1456, + "step": 12716 + }, + { + "epoch": 0.42, + "grad_norm": 0.7234571576118469, + "learning_rate": 1.2689089007186297e-05, + "loss": 2.1394, + "step": 12717 + }, + { + "epoch": 0.42, + "grad_norm": 0.7167448401451111, + "learning_rate": 1.2688065250068442e-05, + "loss": 2.0266, + "step": 12718 + }, + { + "epoch": 0.42, + "grad_norm": 0.7269853949546814, + "learning_rate": 1.2687041462582402e-05, + "loss": 2.0827, + "step": 12719 + }, + { + "epoch": 0.42, + "grad_norm": 0.7125935554504395, + "learning_rate": 1.2686017644739743e-05, + "loss": 2.087, + "step": 12720 + }, + { + "epoch": 0.42, + "grad_norm": 0.7153884172439575, + "learning_rate": 1.2684993796552027e-05, + "loss": 2.0965, + "step": 12721 + }, + { + "epoch": 0.42, + "grad_norm": 0.7356677651405334, + "learning_rate": 1.2683969918030828e-05, + "loss": 2.0973, + "step": 12722 + }, + { + "epoch": 0.42, + "grad_norm": 0.7685341835021973, + "learning_rate": 1.2682946009187711e-05, + "loss": 2.0651, + "step": 12723 + }, + { + "epoch": 0.42, + "grad_norm": 0.7065647840499878, + "learning_rate": 1.2681922070034239e-05, + "loss": 2.0572, + "step": 12724 + }, + { + "epoch": 0.42, + "grad_norm": 0.7394136786460876, + "learning_rate": 1.2680898100581986e-05, + "loss": 2.0904, + "step": 12725 + }, + { + "epoch": 0.42, + "grad_norm": 0.7436098456382751, + "learning_rate": 1.2679874100842516e-05, + "loss": 2.084, + "step": 12726 + }, + { + "epoch": 0.42, + "grad_norm": 0.729438841342926, + "learning_rate": 1.2678850070827397e-05, + "loss": 2.0987, + "step": 12727 + }, + { + "epoch": 0.42, + "grad_norm": 0.7214196920394897, + "learning_rate": 1.2677826010548202e-05, + "loss": 2.0792, + "step": 12728 + }, + { + "epoch": 0.42, + "grad_norm": 0.7163324356079102, + "learning_rate": 1.2676801920016497e-05, + "loss": 2.0815, + "step": 12729 + }, + { + "epoch": 0.42, + "grad_norm": 0.7306609749794006, + "learning_rate": 1.2675777799243853e-05, + "loss": 2.1288, + "step": 12730 + }, + { + "epoch": 0.42, + "grad_norm": 0.7139286994934082, + "learning_rate": 1.2674753648241844e-05, + "loss": 2.0556, + "step": 12731 + }, + { + "epoch": 0.42, + "grad_norm": 0.7436847686767578, + "learning_rate": 1.2673729467022029e-05, + "loss": 2.058, + "step": 12732 + }, + { + "epoch": 0.42, + "grad_norm": 0.7410436272621155, + "learning_rate": 1.267270525559599e-05, + "loss": 2.0728, + "step": 12733 + }, + { + "epoch": 0.42, + "grad_norm": 0.7239573001861572, + "learning_rate": 1.2671681013975292e-05, + "loss": 2.0907, + "step": 12734 + }, + { + "epoch": 0.42, + "grad_norm": 0.7278544306755066, + "learning_rate": 1.2670656742171505e-05, + "loss": 2.1209, + "step": 12735 + }, + { + "epoch": 0.42, + "grad_norm": 0.7375119924545288, + "learning_rate": 1.2669632440196208e-05, + "loss": 2.0564, + "step": 12736 + }, + { + "epoch": 0.42, + "grad_norm": 0.7421109080314636, + "learning_rate": 1.2668608108060966e-05, + "loss": 2.0524, + "step": 12737 + }, + { + "epoch": 0.42, + "grad_norm": 0.765608549118042, + "learning_rate": 1.2667583745777354e-05, + "loss": 2.0263, + "step": 12738 + }, + { + "epoch": 0.42, + "grad_norm": 0.7103093266487122, + "learning_rate": 1.2666559353356944e-05, + "loss": 2.004, + "step": 12739 + }, + { + "epoch": 0.42, + "grad_norm": 0.7310738563537598, + "learning_rate": 1.2665534930811308e-05, + "loss": 2.1353, + "step": 12740 + }, + { + "epoch": 0.42, + "grad_norm": 0.7467474341392517, + "learning_rate": 1.2664510478152021e-05, + "loss": 2.0687, + "step": 12741 + }, + { + "epoch": 0.42, + "grad_norm": 0.7339344620704651, + "learning_rate": 1.2663485995390657e-05, + "loss": 2.1003, + "step": 12742 + }, + { + "epoch": 0.42, + "grad_norm": 0.7507628798484802, + "learning_rate": 1.2662461482538788e-05, + "loss": 2.0869, + "step": 12743 + }, + { + "epoch": 0.42, + "grad_norm": 0.7471380233764648, + "learning_rate": 1.2661436939607992e-05, + "loss": 2.0552, + "step": 12744 + }, + { + "epoch": 0.42, + "grad_norm": 0.7467623353004456, + "learning_rate": 1.266041236660984e-05, + "loss": 2.0478, + "step": 12745 + }, + { + "epoch": 0.42, + "grad_norm": 0.7515903115272522, + "learning_rate": 1.2659387763555906e-05, + "loss": 1.9875, + "step": 12746 + }, + { + "epoch": 0.42, + "grad_norm": 0.7452664971351624, + "learning_rate": 1.2658363130457771e-05, + "loss": 2.098, + "step": 12747 + }, + { + "epoch": 0.42, + "grad_norm": 0.7219454050064087, + "learning_rate": 1.2657338467327005e-05, + "loss": 2.1158, + "step": 12748 + }, + { + "epoch": 0.42, + "grad_norm": 0.7090778946876526, + "learning_rate": 1.2656313774175186e-05, + "loss": 2.0913, + "step": 12749 + }, + { + "epoch": 0.42, + "grad_norm": 0.7454358339309692, + "learning_rate": 1.2655289051013893e-05, + "loss": 2.0386, + "step": 12750 + }, + { + "epoch": 0.42, + "grad_norm": 0.7435495257377625, + "learning_rate": 1.2654264297854699e-05, + "loss": 2.132, + "step": 12751 + }, + { + "epoch": 0.42, + "grad_norm": 0.7354375123977661, + "learning_rate": 1.2653239514709184e-05, + "loss": 2.0563, + "step": 12752 + }, + { + "epoch": 0.42, + "grad_norm": 0.7272806763648987, + "learning_rate": 1.265221470158892e-05, + "loss": 2.0682, + "step": 12753 + }, + { + "epoch": 0.42, + "grad_norm": 0.7368921041488647, + "learning_rate": 1.2651189858505492e-05, + "loss": 2.1298, + "step": 12754 + }, + { + "epoch": 0.42, + "grad_norm": 0.7720046043395996, + "learning_rate": 1.2650164985470475e-05, + "loss": 2.0695, + "step": 12755 + }, + { + "epoch": 0.42, + "grad_norm": 0.7313575744628906, + "learning_rate": 1.2649140082495447e-05, + "loss": 2.0726, + "step": 12756 + }, + { + "epoch": 0.42, + "grad_norm": 0.7854654788970947, + "learning_rate": 1.2648115149591984e-05, + "loss": 2.15, + "step": 12757 + }, + { + "epoch": 0.42, + "grad_norm": 0.7268454432487488, + "learning_rate": 1.2647090186771673e-05, + "loss": 2.0519, + "step": 12758 + }, + { + "epoch": 0.42, + "grad_norm": 0.7306022047996521, + "learning_rate": 1.2646065194046085e-05, + "loss": 2.0747, + "step": 12759 + }, + { + "epoch": 0.42, + "grad_norm": 0.7409251928329468, + "learning_rate": 1.2645040171426803e-05, + "loss": 2.0599, + "step": 12760 + }, + { + "epoch": 0.42, + "grad_norm": 0.7362368702888489, + "learning_rate": 1.2644015118925408e-05, + "loss": 2.0861, + "step": 12761 + }, + { + "epoch": 0.42, + "grad_norm": 0.7322105169296265, + "learning_rate": 1.264299003655348e-05, + "loss": 2.1156, + "step": 12762 + }, + { + "epoch": 0.42, + "grad_norm": 0.7246610522270203, + "learning_rate": 1.2641964924322598e-05, + "loss": 2.1398, + "step": 12763 + }, + { + "epoch": 0.42, + "grad_norm": 0.7239896655082703, + "learning_rate": 1.2640939782244345e-05, + "loss": 2.106, + "step": 12764 + }, + { + "epoch": 0.42, + "grad_norm": 0.7168049216270447, + "learning_rate": 1.2639914610330304e-05, + "loss": 2.0629, + "step": 12765 + }, + { + "epoch": 0.42, + "grad_norm": 0.7442251443862915, + "learning_rate": 1.2638889408592055e-05, + "loss": 2.061, + "step": 12766 + }, + { + "epoch": 0.42, + "grad_norm": 0.7725180387496948, + "learning_rate": 1.263786417704118e-05, + "loss": 2.138, + "step": 12767 + }, + { + "epoch": 0.42, + "grad_norm": 0.7543867230415344, + "learning_rate": 1.2636838915689258e-05, + "loss": 2.0952, + "step": 12768 + }, + { + "epoch": 0.42, + "grad_norm": 0.7561289072036743, + "learning_rate": 1.263581362454788e-05, + "loss": 2.1423, + "step": 12769 + }, + { + "epoch": 0.42, + "grad_norm": 0.7409908771514893, + "learning_rate": 1.263478830362862e-05, + "loss": 2.0512, + "step": 12770 + }, + { + "epoch": 0.42, + "grad_norm": 0.7405576109886169, + "learning_rate": 1.2633762952943067e-05, + "loss": 2.1191, + "step": 12771 + }, + { + "epoch": 0.42, + "grad_norm": 0.7161704897880554, + "learning_rate": 1.2632737572502804e-05, + "loss": 1.996, + "step": 12772 + }, + { + "epoch": 0.42, + "grad_norm": 0.7001577019691467, + "learning_rate": 1.2631712162319417e-05, + "loss": 2.0985, + "step": 12773 + }, + { + "epoch": 0.42, + "grad_norm": 0.8032430410385132, + "learning_rate": 1.2630686722404486e-05, + "loss": 2.0641, + "step": 12774 + }, + { + "epoch": 0.43, + "grad_norm": 0.7699704170227051, + "learning_rate": 1.2629661252769599e-05, + "loss": 2.1456, + "step": 12775 + }, + { + "epoch": 0.43, + "grad_norm": 0.7268160581588745, + "learning_rate": 1.2628635753426339e-05, + "loss": 2.0595, + "step": 12776 + }, + { + "epoch": 0.43, + "grad_norm": 0.7381707429885864, + "learning_rate": 1.2627610224386295e-05, + "loss": 2.0731, + "step": 12777 + }, + { + "epoch": 0.43, + "grad_norm": 0.7395838499069214, + "learning_rate": 1.262658466566105e-05, + "loss": 2.0963, + "step": 12778 + }, + { + "epoch": 0.43, + "grad_norm": 0.7667689919471741, + "learning_rate": 1.2625559077262188e-05, + "loss": 2.1016, + "step": 12779 + }, + { + "epoch": 0.43, + "grad_norm": 0.7195752859115601, + "learning_rate": 1.2624533459201302e-05, + "loss": 2.1036, + "step": 12780 + }, + { + "epoch": 0.43, + "grad_norm": 0.7402536869049072, + "learning_rate": 1.2623507811489974e-05, + "loss": 2.0956, + "step": 12781 + }, + { + "epoch": 0.43, + "grad_norm": 0.7418485283851624, + "learning_rate": 1.2622482134139792e-05, + "loss": 2.2098, + "step": 12782 + }, + { + "epoch": 0.43, + "grad_norm": 0.7422534823417664, + "learning_rate": 1.2621456427162345e-05, + "loss": 2.1005, + "step": 12783 + }, + { + "epoch": 0.43, + "grad_norm": 0.7154671549797058, + "learning_rate": 1.2620430690569218e-05, + "loss": 2.0454, + "step": 12784 + }, + { + "epoch": 0.43, + "grad_norm": 0.7359423637390137, + "learning_rate": 1.2619404924372001e-05, + "loss": 2.0536, + "step": 12785 + }, + { + "epoch": 0.43, + "grad_norm": 0.7370131015777588, + "learning_rate": 1.2618379128582282e-05, + "loss": 2.1466, + "step": 12786 + }, + { + "epoch": 0.43, + "grad_norm": 0.7296870946884155, + "learning_rate": 1.2617353303211651e-05, + "loss": 2.1024, + "step": 12787 + }, + { + "epoch": 0.43, + "grad_norm": 0.7170761227607727, + "learning_rate": 1.2616327448271695e-05, + "loss": 1.9707, + "step": 12788 + }, + { + "epoch": 0.43, + "grad_norm": 0.7660859823226929, + "learning_rate": 1.2615301563774007e-05, + "loss": 2.1363, + "step": 12789 + }, + { + "epoch": 0.43, + "grad_norm": 0.7266695499420166, + "learning_rate": 1.2614275649730172e-05, + "loss": 2.063, + "step": 12790 + }, + { + "epoch": 0.43, + "grad_norm": 0.7092355489730835, + "learning_rate": 1.2613249706151785e-05, + "loss": 2.0569, + "step": 12791 + }, + { + "epoch": 0.43, + "grad_norm": 0.7087313532829285, + "learning_rate": 1.2612223733050431e-05, + "loss": 2.067, + "step": 12792 + }, + { + "epoch": 0.43, + "grad_norm": 0.7416631579399109, + "learning_rate": 1.2611197730437709e-05, + "loss": 2.0261, + "step": 12793 + }, + { + "epoch": 0.43, + "grad_norm": 0.7330492734909058, + "learning_rate": 1.2610171698325203e-05, + "loss": 2.1244, + "step": 12794 + }, + { + "epoch": 0.43, + "grad_norm": 0.7242865562438965, + "learning_rate": 1.2609145636724505e-05, + "loss": 2.17, + "step": 12795 + }, + { + "epoch": 0.43, + "grad_norm": 0.7279788851737976, + "learning_rate": 1.260811954564721e-05, + "loss": 2.0504, + "step": 12796 + }, + { + "epoch": 0.43, + "grad_norm": 0.7238636016845703, + "learning_rate": 1.260709342510491e-05, + "loss": 2.0592, + "step": 12797 + }, + { + "epoch": 0.43, + "grad_norm": 0.738256573677063, + "learning_rate": 1.2606067275109197e-05, + "loss": 2.0793, + "step": 12798 + }, + { + "epoch": 0.43, + "grad_norm": 0.7343834638595581, + "learning_rate": 1.2605041095671663e-05, + "loss": 2.1069, + "step": 12799 + }, + { + "epoch": 0.43, + "grad_norm": 0.7321343421936035, + "learning_rate": 1.2604014886803899e-05, + "loss": 2.0975, + "step": 12800 + }, + { + "epoch": 0.43, + "grad_norm": 0.7291301488876343, + "learning_rate": 1.2602988648517503e-05, + "loss": 2.109, + "step": 12801 + }, + { + "epoch": 0.43, + "grad_norm": 0.7361016273498535, + "learning_rate": 1.2601962380824067e-05, + "loss": 2.0713, + "step": 12802 + }, + { + "epoch": 0.43, + "grad_norm": 0.7367565631866455, + "learning_rate": 1.2600936083735182e-05, + "loss": 2.1107, + "step": 12803 + }, + { + "epoch": 0.43, + "grad_norm": 0.7395138740539551, + "learning_rate": 1.259990975726245e-05, + "loss": 1.9857, + "step": 12804 + }, + { + "epoch": 0.43, + "grad_norm": 0.7349457740783691, + "learning_rate": 1.2598883401417456e-05, + "loss": 2.0998, + "step": 12805 + }, + { + "epoch": 0.43, + "grad_norm": 0.7453685402870178, + "learning_rate": 1.2597857016211803e-05, + "loss": 2.1413, + "step": 12806 + }, + { + "epoch": 0.43, + "grad_norm": 0.7263466119766235, + "learning_rate": 1.2596830601657086e-05, + "loss": 2.1033, + "step": 12807 + }, + { + "epoch": 0.43, + "grad_norm": 0.7682474851608276, + "learning_rate": 1.2595804157764897e-05, + "loss": 2.0659, + "step": 12808 + }, + { + "epoch": 0.43, + "grad_norm": 0.7367715239524841, + "learning_rate": 1.2594777684546833e-05, + "loss": 2.1067, + "step": 12809 + }, + { + "epoch": 0.43, + "grad_norm": 0.7507600784301758, + "learning_rate": 1.259375118201449e-05, + "loss": 2.1425, + "step": 12810 + }, + { + "epoch": 0.43, + "grad_norm": 0.7448073029518127, + "learning_rate": 1.2592724650179471e-05, + "loss": 2.1449, + "step": 12811 + }, + { + "epoch": 0.43, + "grad_norm": 0.7340902090072632, + "learning_rate": 1.2591698089053366e-05, + "loss": 2.0812, + "step": 12812 + }, + { + "epoch": 0.43, + "grad_norm": 0.7985180616378784, + "learning_rate": 1.2590671498647775e-05, + "loss": 2.0712, + "step": 12813 + }, + { + "epoch": 0.43, + "grad_norm": 0.7169725298881531, + "learning_rate": 1.2589644878974295e-05, + "loss": 2.0652, + "step": 12814 + }, + { + "epoch": 0.43, + "grad_norm": 0.709062933921814, + "learning_rate": 1.2588618230044522e-05, + "loss": 2.0729, + "step": 12815 + }, + { + "epoch": 0.43, + "grad_norm": 0.7615477442741394, + "learning_rate": 1.2587591551870065e-05, + "loss": 2.1306, + "step": 12816 + }, + { + "epoch": 0.43, + "grad_norm": 0.7726173996925354, + "learning_rate": 1.2586564844462508e-05, + "loss": 2.1059, + "step": 12817 + }, + { + "epoch": 0.43, + "grad_norm": 0.7393383979797363, + "learning_rate": 1.2585538107833461e-05, + "loss": 2.0618, + "step": 12818 + }, + { + "epoch": 0.43, + "grad_norm": 0.746898889541626, + "learning_rate": 1.2584511341994519e-05, + "loss": 2.0532, + "step": 12819 + }, + { + "epoch": 0.43, + "grad_norm": 0.743028998374939, + "learning_rate": 1.258348454695728e-05, + "loss": 2.0486, + "step": 12820 + }, + { + "epoch": 0.43, + "grad_norm": 0.7561843991279602, + "learning_rate": 1.258245772273335e-05, + "loss": 2.087, + "step": 12821 + }, + { + "epoch": 0.43, + "grad_norm": 0.711010217666626, + "learning_rate": 1.2581430869334325e-05, + "loss": 2.077, + "step": 12822 + }, + { + "epoch": 0.43, + "grad_norm": 0.7202813029289246, + "learning_rate": 1.2580403986771805e-05, + "loss": 2.049, + "step": 12823 + }, + { + "epoch": 0.43, + "grad_norm": 0.7540672421455383, + "learning_rate": 1.25793770750574e-05, + "loss": 2.0742, + "step": 12824 + }, + { + "epoch": 0.43, + "grad_norm": 0.7332183718681335, + "learning_rate": 1.2578350134202695e-05, + "loss": 2.1099, + "step": 12825 + }, + { + "epoch": 0.43, + "grad_norm": 0.7302794456481934, + "learning_rate": 1.2577323164219306e-05, + "loss": 2.0679, + "step": 12826 + }, + { + "epoch": 0.43, + "grad_norm": 0.7282748818397522, + "learning_rate": 1.2576296165118832e-05, + "loss": 2.0665, + "step": 12827 + }, + { + "epoch": 0.43, + "grad_norm": 0.7057153582572937, + "learning_rate": 1.2575269136912869e-05, + "loss": 2.0825, + "step": 12828 + }, + { + "epoch": 0.43, + "grad_norm": 0.7553389072418213, + "learning_rate": 1.2574242079613029e-05, + "loss": 2.1098, + "step": 12829 + }, + { + "epoch": 0.43, + "grad_norm": 0.7073999047279358, + "learning_rate": 1.2573214993230909e-05, + "loss": 2.0818, + "step": 12830 + }, + { + "epoch": 0.43, + "grad_norm": 0.7208580374717712, + "learning_rate": 1.2572187877778115e-05, + "loss": 2.0355, + "step": 12831 + }, + { + "epoch": 0.43, + "grad_norm": 0.7796158194541931, + "learning_rate": 1.2571160733266248e-05, + "loss": 2.1315, + "step": 12832 + }, + { + "epoch": 0.43, + "grad_norm": 0.720184862613678, + "learning_rate": 1.2570133559706914e-05, + "loss": 2.0471, + "step": 12833 + }, + { + "epoch": 0.43, + "grad_norm": 0.7150482535362244, + "learning_rate": 1.2569106357111717e-05, + "loss": 2.1031, + "step": 12834 + }, + { + "epoch": 0.43, + "grad_norm": 0.7584150433540344, + "learning_rate": 1.2568079125492265e-05, + "loss": 2.1448, + "step": 12835 + }, + { + "epoch": 0.43, + "grad_norm": 0.728193998336792, + "learning_rate": 1.2567051864860156e-05, + "loss": 2.1453, + "step": 12836 + }, + { + "epoch": 0.43, + "grad_norm": 0.7228482961654663, + "learning_rate": 1.2566024575227004e-05, + "loss": 2.0512, + "step": 12837 + }, + { + "epoch": 0.43, + "grad_norm": 0.7330294251441956, + "learning_rate": 1.2564997256604407e-05, + "loss": 2.1627, + "step": 12838 + }, + { + "epoch": 0.43, + "grad_norm": 0.7319846749305725, + "learning_rate": 1.2563969909003972e-05, + "loss": 2.0857, + "step": 12839 + }, + { + "epoch": 0.43, + "grad_norm": 0.7598676681518555, + "learning_rate": 1.2562942532437314e-05, + "loss": 2.103, + "step": 12840 + }, + { + "epoch": 0.43, + "grad_norm": 0.7345932722091675, + "learning_rate": 1.2561915126916029e-05, + "loss": 2.0929, + "step": 12841 + }, + { + "epoch": 0.43, + "grad_norm": 0.7150911688804626, + "learning_rate": 1.2560887692451728e-05, + "loss": 2.1287, + "step": 12842 + }, + { + "epoch": 0.43, + "grad_norm": 0.7236700654029846, + "learning_rate": 1.255986022905602e-05, + "loss": 2.0395, + "step": 12843 + }, + { + "epoch": 0.43, + "grad_norm": 0.7608015537261963, + "learning_rate": 1.2558832736740511e-05, + "loss": 2.087, + "step": 12844 + }, + { + "epoch": 0.43, + "grad_norm": 0.7441733479499817, + "learning_rate": 1.2557805215516809e-05, + "loss": 2.1509, + "step": 12845 + }, + { + "epoch": 0.43, + "grad_norm": 0.7206248641014099, + "learning_rate": 1.2556777665396525e-05, + "loss": 2.1258, + "step": 12846 + }, + { + "epoch": 0.43, + "grad_norm": 0.7422816157341003, + "learning_rate": 1.2555750086391263e-05, + "loss": 2.1369, + "step": 12847 + }, + { + "epoch": 0.43, + "grad_norm": 0.7217992544174194, + "learning_rate": 1.2554722478512633e-05, + "loss": 2.0931, + "step": 12848 + }, + { + "epoch": 0.43, + "grad_norm": 0.7503949403762817, + "learning_rate": 1.2553694841772251e-05, + "loss": 2.1338, + "step": 12849 + }, + { + "epoch": 0.43, + "grad_norm": 0.7418839335441589, + "learning_rate": 1.2552667176181714e-05, + "loss": 2.0373, + "step": 12850 + }, + { + "epoch": 0.43, + "grad_norm": 0.7647033929824829, + "learning_rate": 1.2551639481752647e-05, + "loss": 2.1165, + "step": 12851 + }, + { + "epoch": 0.43, + "grad_norm": 0.753045380115509, + "learning_rate": 1.2550611758496647e-05, + "loss": 2.1189, + "step": 12852 + }, + { + "epoch": 0.43, + "grad_norm": 0.7317849397659302, + "learning_rate": 1.2549584006425333e-05, + "loss": 2.0848, + "step": 12853 + }, + { + "epoch": 0.43, + "grad_norm": 0.7443662285804749, + "learning_rate": 1.2548556225550312e-05, + "loss": 2.1085, + "step": 12854 + }, + { + "epoch": 0.43, + "grad_norm": 0.767545223236084, + "learning_rate": 1.2547528415883195e-05, + "loss": 2.0834, + "step": 12855 + }, + { + "epoch": 0.43, + "grad_norm": 0.7341024279594421, + "learning_rate": 1.2546500577435597e-05, + "loss": 2.142, + "step": 12856 + }, + { + "epoch": 0.43, + "grad_norm": 0.7362480163574219, + "learning_rate": 1.2545472710219127e-05, + "loss": 2.1704, + "step": 12857 + }, + { + "epoch": 0.43, + "grad_norm": 0.7114051580429077, + "learning_rate": 1.2544444814245398e-05, + "loss": 2.0947, + "step": 12858 + }, + { + "epoch": 0.43, + "grad_norm": 0.7444718480110168, + "learning_rate": 1.2543416889526023e-05, + "loss": 2.0388, + "step": 12859 + }, + { + "epoch": 0.43, + "grad_norm": 0.7749068737030029, + "learning_rate": 1.2542388936072616e-05, + "loss": 2.0959, + "step": 12860 + }, + { + "epoch": 0.43, + "grad_norm": 0.7396832704544067, + "learning_rate": 1.2541360953896784e-05, + "loss": 2.1142, + "step": 12861 + }, + { + "epoch": 0.43, + "grad_norm": 0.7276213765144348, + "learning_rate": 1.2540332943010152e-05, + "loss": 2.1054, + "step": 12862 + }, + { + "epoch": 0.43, + "grad_norm": 0.7538001537322998, + "learning_rate": 1.2539304903424323e-05, + "loss": 2.1327, + "step": 12863 + }, + { + "epoch": 0.43, + "grad_norm": 0.7564075589179993, + "learning_rate": 1.2538276835150916e-05, + "loss": 2.0927, + "step": 12864 + }, + { + "epoch": 0.43, + "grad_norm": 0.7238653302192688, + "learning_rate": 1.2537248738201547e-05, + "loss": 2.1118, + "step": 12865 + }, + { + "epoch": 0.43, + "grad_norm": 0.7505905628204346, + "learning_rate": 1.2536220612587824e-05, + "loss": 2.0343, + "step": 12866 + }, + { + "epoch": 0.43, + "grad_norm": 0.7649300694465637, + "learning_rate": 1.253519245832137e-05, + "loss": 2.0728, + "step": 12867 + }, + { + "epoch": 0.43, + "grad_norm": 0.717523455619812, + "learning_rate": 1.2534164275413799e-05, + "loss": 2.0953, + "step": 12868 + }, + { + "epoch": 0.43, + "grad_norm": 0.7451853156089783, + "learning_rate": 1.253313606387672e-05, + "loss": 2.0617, + "step": 12869 + }, + { + "epoch": 0.43, + "grad_norm": 0.6994777321815491, + "learning_rate": 1.2532107823721758e-05, + "loss": 2.0686, + "step": 12870 + }, + { + "epoch": 0.43, + "grad_norm": 0.7746158242225647, + "learning_rate": 1.2531079554960527e-05, + "loss": 2.0949, + "step": 12871 + }, + { + "epoch": 0.43, + "grad_norm": 0.7163287401199341, + "learning_rate": 1.2530051257604639e-05, + "loss": 2.1098, + "step": 12872 + }, + { + "epoch": 0.43, + "grad_norm": 0.7302113771438599, + "learning_rate": 1.2529022931665718e-05, + "loss": 2.1206, + "step": 12873 + }, + { + "epoch": 0.43, + "grad_norm": 0.7289221286773682, + "learning_rate": 1.2527994577155375e-05, + "loss": 2.0449, + "step": 12874 + }, + { + "epoch": 0.43, + "grad_norm": 0.7118273973464966, + "learning_rate": 1.2526966194085236e-05, + "loss": 2.0733, + "step": 12875 + }, + { + "epoch": 0.43, + "grad_norm": 0.7065019607543945, + "learning_rate": 1.2525937782466908e-05, + "loss": 2.0951, + "step": 12876 + }, + { + "epoch": 0.43, + "grad_norm": 0.7398509979248047, + "learning_rate": 1.2524909342312017e-05, + "loss": 2.1081, + "step": 12877 + }, + { + "epoch": 0.43, + "grad_norm": 0.7744411826133728, + "learning_rate": 1.2523880873632181e-05, + "loss": 2.1407, + "step": 12878 + }, + { + "epoch": 0.43, + "grad_norm": 0.7451678514480591, + "learning_rate": 1.2522852376439016e-05, + "loss": 2.0842, + "step": 12879 + }, + { + "epoch": 0.43, + "grad_norm": 0.7644221782684326, + "learning_rate": 1.2521823850744146e-05, + "loss": 2.0714, + "step": 12880 + }, + { + "epoch": 0.43, + "grad_norm": 0.7232552170753479, + "learning_rate": 1.2520795296559188e-05, + "loss": 2.0825, + "step": 12881 + }, + { + "epoch": 0.43, + "grad_norm": 0.7300330996513367, + "learning_rate": 1.2519766713895762e-05, + "loss": 2.0915, + "step": 12882 + }, + { + "epoch": 0.43, + "grad_norm": 0.700222373008728, + "learning_rate": 1.2518738102765486e-05, + "loss": 2.0966, + "step": 12883 + }, + { + "epoch": 0.43, + "grad_norm": 0.709098756313324, + "learning_rate": 1.2517709463179986e-05, + "loss": 2.1471, + "step": 12884 + }, + { + "epoch": 0.43, + "grad_norm": 0.7355266213417053, + "learning_rate": 1.251668079515088e-05, + "loss": 2.1091, + "step": 12885 + }, + { + "epoch": 0.43, + "grad_norm": 0.7305822372436523, + "learning_rate": 1.2515652098689785e-05, + "loss": 2.1556, + "step": 12886 + }, + { + "epoch": 0.43, + "grad_norm": 0.776351809501648, + "learning_rate": 1.251462337380833e-05, + "loss": 2.1657, + "step": 12887 + }, + { + "epoch": 0.43, + "grad_norm": 0.7186362743377686, + "learning_rate": 1.2513594620518133e-05, + "loss": 2.1161, + "step": 12888 + }, + { + "epoch": 0.43, + "grad_norm": 0.7281479239463806, + "learning_rate": 1.251256583883082e-05, + "loss": 2.048, + "step": 12889 + }, + { + "epoch": 0.43, + "grad_norm": 0.728370726108551, + "learning_rate": 1.2511537028758007e-05, + "loss": 2.0607, + "step": 12890 + }, + { + "epoch": 0.43, + "grad_norm": 0.7770498394966125, + "learning_rate": 1.2510508190311322e-05, + "loss": 2.1291, + "step": 12891 + }, + { + "epoch": 0.43, + "grad_norm": 0.7721983194351196, + "learning_rate": 1.2509479323502384e-05, + "loss": 2.0398, + "step": 12892 + }, + { + "epoch": 0.43, + "grad_norm": 0.7382127642631531, + "learning_rate": 1.2508450428342824e-05, + "loss": 2.0664, + "step": 12893 + }, + { + "epoch": 0.43, + "grad_norm": 0.718222439289093, + "learning_rate": 1.250742150484426e-05, + "loss": 2.0573, + "step": 12894 + }, + { + "epoch": 0.43, + "grad_norm": 0.7147057056427002, + "learning_rate": 1.2506392553018319e-05, + "loss": 2.0341, + "step": 12895 + }, + { + "epoch": 0.43, + "grad_norm": 0.7591274976730347, + "learning_rate": 1.2505363572876617e-05, + "loss": 2.0766, + "step": 12896 + }, + { + "epoch": 0.43, + "grad_norm": 0.747122049331665, + "learning_rate": 1.2504334564430792e-05, + "loss": 2.0198, + "step": 12897 + }, + { + "epoch": 0.43, + "grad_norm": 0.7370492815971375, + "learning_rate": 1.250330552769246e-05, + "loss": 2.0957, + "step": 12898 + }, + { + "epoch": 0.43, + "grad_norm": 0.7714354395866394, + "learning_rate": 1.2502276462673248e-05, + "loss": 2.078, + "step": 12899 + }, + { + "epoch": 0.43, + "grad_norm": 0.7045102119445801, + "learning_rate": 1.2501247369384788e-05, + "loss": 2.0842, + "step": 12900 + }, + { + "epoch": 0.43, + "grad_norm": 0.7832626700401306, + "learning_rate": 1.2500218247838698e-05, + "loss": 2.0752, + "step": 12901 + }, + { + "epoch": 0.43, + "grad_norm": 0.7434380650520325, + "learning_rate": 1.2499189098046607e-05, + "loss": 2.1218, + "step": 12902 + }, + { + "epoch": 0.43, + "grad_norm": 0.7311598658561707, + "learning_rate": 1.2498159920020143e-05, + "loss": 2.1231, + "step": 12903 + }, + { + "epoch": 0.43, + "grad_norm": 0.7336952090263367, + "learning_rate": 1.249713071377093e-05, + "loss": 2.0299, + "step": 12904 + }, + { + "epoch": 0.43, + "grad_norm": 0.7524884343147278, + "learning_rate": 1.24961014793106e-05, + "loss": 2.0857, + "step": 12905 + }, + { + "epoch": 0.43, + "grad_norm": 0.7341942191123962, + "learning_rate": 1.249507221665078e-05, + "loss": 2.0875, + "step": 12906 + }, + { + "epoch": 0.43, + "grad_norm": 0.7251594066619873, + "learning_rate": 1.2494042925803092e-05, + "loss": 2.1256, + "step": 12907 + }, + { + "epoch": 0.43, + "grad_norm": 0.7407419681549072, + "learning_rate": 1.2493013606779175e-05, + "loss": 2.0983, + "step": 12908 + }, + { + "epoch": 0.43, + "grad_norm": 0.7236939668655396, + "learning_rate": 1.2491984259590646e-05, + "loss": 2.0536, + "step": 12909 + }, + { + "epoch": 0.43, + "grad_norm": 0.7367979288101196, + "learning_rate": 1.2490954884249138e-05, + "loss": 2.0309, + "step": 12910 + }, + { + "epoch": 0.43, + "grad_norm": 0.7514662146568298, + "learning_rate": 1.2489925480766288e-05, + "loss": 2.1448, + "step": 12911 + }, + { + "epoch": 0.43, + "grad_norm": 0.7420753240585327, + "learning_rate": 1.2488896049153714e-05, + "loss": 2.1275, + "step": 12912 + }, + { + "epoch": 0.43, + "grad_norm": 0.736609935760498, + "learning_rate": 1.2487866589423052e-05, + "loss": 2.1294, + "step": 12913 + }, + { + "epoch": 0.43, + "grad_norm": 0.7688855528831482, + "learning_rate": 1.2486837101585932e-05, + "loss": 2.0282, + "step": 12914 + }, + { + "epoch": 0.43, + "grad_norm": 0.7054730653762817, + "learning_rate": 1.2485807585653985e-05, + "loss": 2.1025, + "step": 12915 + }, + { + "epoch": 0.43, + "grad_norm": 0.7414841651916504, + "learning_rate": 1.2484778041638838e-05, + "loss": 2.1521, + "step": 12916 + }, + { + "epoch": 0.43, + "grad_norm": 0.7420551776885986, + "learning_rate": 1.2483748469552128e-05, + "loss": 2.1072, + "step": 12917 + }, + { + "epoch": 0.43, + "grad_norm": 0.7364272475242615, + "learning_rate": 1.2482718869405479e-05, + "loss": 2.11, + "step": 12918 + }, + { + "epoch": 0.43, + "grad_norm": 0.7953136563301086, + "learning_rate": 1.248168924121053e-05, + "loss": 2.0976, + "step": 12919 + }, + { + "epoch": 0.43, + "grad_norm": 0.7382527589797974, + "learning_rate": 1.248065958497891e-05, + "loss": 2.1382, + "step": 12920 + }, + { + "epoch": 0.43, + "grad_norm": 0.7572237849235535, + "learning_rate": 1.2479629900722252e-05, + "loss": 2.1144, + "step": 12921 + }, + { + "epoch": 0.43, + "grad_norm": 0.7328273057937622, + "learning_rate": 1.247860018845219e-05, + "loss": 2.0848, + "step": 12922 + }, + { + "epoch": 0.43, + "grad_norm": 0.7206224203109741, + "learning_rate": 1.2477570448180355e-05, + "loss": 2.085, + "step": 12923 + }, + { + "epoch": 0.43, + "grad_norm": 0.7551538944244385, + "learning_rate": 1.247654067991838e-05, + "loss": 2.1005, + "step": 12924 + }, + { + "epoch": 0.43, + "grad_norm": 0.7376437187194824, + "learning_rate": 1.2475510883677902e-05, + "loss": 2.169, + "step": 12925 + }, + { + "epoch": 0.43, + "grad_norm": 0.722365140914917, + "learning_rate": 1.247448105947055e-05, + "loss": 2.1536, + "step": 12926 + }, + { + "epoch": 0.43, + "grad_norm": 0.7430427670478821, + "learning_rate": 1.247345120730796e-05, + "loss": 2.0885, + "step": 12927 + }, + { + "epoch": 0.43, + "grad_norm": 0.7104294300079346, + "learning_rate": 1.2472421327201774e-05, + "loss": 2.135, + "step": 12928 + }, + { + "epoch": 0.43, + "grad_norm": 0.7420622110366821, + "learning_rate": 1.2471391419163615e-05, + "loss": 2.1251, + "step": 12929 + }, + { + "epoch": 0.43, + "grad_norm": 0.7666173577308655, + "learning_rate": 1.2470361483205125e-05, + "loss": 2.0975, + "step": 12930 + }, + { + "epoch": 0.43, + "grad_norm": 0.7457684874534607, + "learning_rate": 1.2469331519337942e-05, + "loss": 2.188, + "step": 12931 + }, + { + "epoch": 0.43, + "grad_norm": 0.7165634036064148, + "learning_rate": 1.2468301527573694e-05, + "loss": 2.1345, + "step": 12932 + }, + { + "epoch": 0.43, + "grad_norm": 0.708013117313385, + "learning_rate": 1.2467271507924026e-05, + "loss": 2.1187, + "step": 12933 + }, + { + "epoch": 0.43, + "grad_norm": 0.7162837386131287, + "learning_rate": 1.2466241460400567e-05, + "loss": 2.0705, + "step": 12934 + }, + { + "epoch": 0.43, + "grad_norm": 0.7297375798225403, + "learning_rate": 1.2465211385014961e-05, + "loss": 2.1602, + "step": 12935 + }, + { + "epoch": 0.43, + "grad_norm": 0.7404950857162476, + "learning_rate": 1.246418128177884e-05, + "loss": 2.1462, + "step": 12936 + }, + { + "epoch": 0.43, + "grad_norm": 0.7483963370323181, + "learning_rate": 1.246315115070384e-05, + "loss": 2.0395, + "step": 12937 + }, + { + "epoch": 0.43, + "grad_norm": 0.744726300239563, + "learning_rate": 1.2462120991801605e-05, + "loss": 2.0548, + "step": 12938 + }, + { + "epoch": 0.43, + "grad_norm": 0.7292644381523132, + "learning_rate": 1.2461090805083774e-05, + "loss": 2.0595, + "step": 12939 + }, + { + "epoch": 0.43, + "grad_norm": 0.7418462634086609, + "learning_rate": 1.2460060590561972e-05, + "loss": 2.122, + "step": 12940 + }, + { + "epoch": 0.43, + "grad_norm": 0.7440269589424133, + "learning_rate": 1.2459030348247853e-05, + "loss": 2.1191, + "step": 12941 + }, + { + "epoch": 0.43, + "grad_norm": 0.7234408259391785, + "learning_rate": 1.2458000078153052e-05, + "loss": 2.0026, + "step": 12942 + }, + { + "epoch": 0.43, + "grad_norm": 0.7557561993598938, + "learning_rate": 1.24569697802892e-05, + "loss": 2.0467, + "step": 12943 + }, + { + "epoch": 0.43, + "grad_norm": 0.734746515750885, + "learning_rate": 1.245593945466795e-05, + "loss": 2.1099, + "step": 12944 + }, + { + "epoch": 0.43, + "grad_norm": 0.742408275604248, + "learning_rate": 1.2454909101300934e-05, + "loss": 2.1039, + "step": 12945 + }, + { + "epoch": 0.43, + "grad_norm": 0.7103086113929749, + "learning_rate": 1.2453878720199792e-05, + "loss": 2.0695, + "step": 12946 + }, + { + "epoch": 0.43, + "grad_norm": 0.7477011680603027, + "learning_rate": 1.2452848311376167e-05, + "loss": 2.0753, + "step": 12947 + }, + { + "epoch": 0.43, + "grad_norm": 0.743648111820221, + "learning_rate": 1.2451817874841698e-05, + "loss": 2.0901, + "step": 12948 + }, + { + "epoch": 0.43, + "grad_norm": 0.709757924079895, + "learning_rate": 1.245078741060803e-05, + "loss": 2.0919, + "step": 12949 + }, + { + "epoch": 0.43, + "grad_norm": 0.7295517325401306, + "learning_rate": 1.24497569186868e-05, + "loss": 2.1181, + "step": 12950 + }, + { + "epoch": 0.43, + "grad_norm": 0.7430495023727417, + "learning_rate": 1.2448726399089652e-05, + "loss": 2.0155, + "step": 12951 + }, + { + "epoch": 0.43, + "grad_norm": 0.7456165552139282, + "learning_rate": 1.244769585182823e-05, + "loss": 2.0471, + "step": 12952 + }, + { + "epoch": 0.43, + "grad_norm": 0.75956791639328, + "learning_rate": 1.2446665276914174e-05, + "loss": 2.1012, + "step": 12953 + }, + { + "epoch": 0.43, + "grad_norm": 0.7669713497161865, + "learning_rate": 1.2445634674359126e-05, + "loss": 2.1117, + "step": 12954 + }, + { + "epoch": 0.43, + "grad_norm": 0.7304967641830444, + "learning_rate": 1.2444604044174734e-05, + "loss": 2.1065, + "step": 12955 + }, + { + "epoch": 0.43, + "grad_norm": 0.7389522790908813, + "learning_rate": 1.2443573386372636e-05, + "loss": 2.0986, + "step": 12956 + }, + { + "epoch": 0.43, + "grad_norm": 0.7395791411399841, + "learning_rate": 1.2442542700964477e-05, + "loss": 2.1404, + "step": 12957 + }, + { + "epoch": 0.43, + "grad_norm": 0.730171263217926, + "learning_rate": 1.2441511987961906e-05, + "loss": 2.0623, + "step": 12958 + }, + { + "epoch": 0.43, + "grad_norm": 0.7415866851806641, + "learning_rate": 1.2440481247376559e-05, + "loss": 2.0379, + "step": 12959 + }, + { + "epoch": 0.43, + "grad_norm": 0.741508960723877, + "learning_rate": 1.2439450479220088e-05, + "loss": 2.1165, + "step": 12960 + }, + { + "epoch": 0.43, + "grad_norm": 0.7084594368934631, + "learning_rate": 1.2438419683504133e-05, + "loss": 2.0718, + "step": 12961 + }, + { + "epoch": 0.43, + "grad_norm": 0.7417922019958496, + "learning_rate": 1.2437388860240342e-05, + "loss": 2.0931, + "step": 12962 + }, + { + "epoch": 0.43, + "grad_norm": 0.733667254447937, + "learning_rate": 1.2436358009440359e-05, + "loss": 2.0493, + "step": 12963 + }, + { + "epoch": 0.43, + "grad_norm": 0.742735743522644, + "learning_rate": 1.2435327131115835e-05, + "loss": 2.0443, + "step": 12964 + }, + { + "epoch": 0.43, + "grad_norm": 0.7243116497993469, + "learning_rate": 1.2434296225278409e-05, + "loss": 2.0384, + "step": 12965 + }, + { + "epoch": 0.43, + "grad_norm": 0.737555980682373, + "learning_rate": 1.2433265291939734e-05, + "loss": 2.073, + "step": 12966 + }, + { + "epoch": 0.43, + "grad_norm": 0.7015740275382996, + "learning_rate": 1.243223433111145e-05, + "loss": 2.0985, + "step": 12967 + }, + { + "epoch": 0.43, + "grad_norm": 0.7190524339675903, + "learning_rate": 1.243120334280521e-05, + "loss": 2.1368, + "step": 12968 + }, + { + "epoch": 0.43, + "grad_norm": 0.7631261348724365, + "learning_rate": 1.2430172327032658e-05, + "loss": 2.0654, + "step": 12969 + }, + { + "epoch": 0.43, + "grad_norm": 0.7269861102104187, + "learning_rate": 1.2429141283805445e-05, + "loss": 2.0432, + "step": 12970 + }, + { + "epoch": 0.43, + "grad_norm": 0.7421794533729553, + "learning_rate": 1.2428110213135215e-05, + "loss": 2.1662, + "step": 12971 + }, + { + "epoch": 0.43, + "grad_norm": 0.7735297679901123, + "learning_rate": 1.2427079115033623e-05, + "loss": 2.0923, + "step": 12972 + }, + { + "epoch": 0.43, + "grad_norm": 0.7318286299705505, + "learning_rate": 1.2426047989512308e-05, + "loss": 2.0134, + "step": 12973 + }, + { + "epoch": 0.43, + "grad_norm": 0.7278672456741333, + "learning_rate": 1.2425016836582928e-05, + "loss": 2.0582, + "step": 12974 + }, + { + "epoch": 0.43, + "grad_norm": 0.7466635704040527, + "learning_rate": 1.2423985656257132e-05, + "loss": 2.0925, + "step": 12975 + }, + { + "epoch": 0.43, + "grad_norm": 0.7345931529998779, + "learning_rate": 1.242295444854656e-05, + "loss": 2.1243, + "step": 12976 + }, + { + "epoch": 0.43, + "grad_norm": 0.7147917151451111, + "learning_rate": 1.2421923213462874e-05, + "loss": 2.0957, + "step": 12977 + }, + { + "epoch": 0.43, + "grad_norm": 0.7575796246528625, + "learning_rate": 1.2420891951017717e-05, + "loss": 2.1344, + "step": 12978 + }, + { + "epoch": 0.43, + "grad_norm": 0.730445921421051, + "learning_rate": 1.2419860661222743e-05, + "loss": 2.1346, + "step": 12979 + }, + { + "epoch": 0.43, + "grad_norm": 0.7233250737190247, + "learning_rate": 1.2418829344089598e-05, + "loss": 2.0304, + "step": 12980 + }, + { + "epoch": 0.43, + "grad_norm": 0.7203359603881836, + "learning_rate": 1.241779799962994e-05, + "loss": 2.082, + "step": 12981 + }, + { + "epoch": 0.43, + "grad_norm": 0.7810947299003601, + "learning_rate": 1.2416766627855415e-05, + "loss": 2.1888, + "step": 12982 + }, + { + "epoch": 0.43, + "grad_norm": 0.7359386682510376, + "learning_rate": 1.2415735228777676e-05, + "loss": 1.9999, + "step": 12983 + }, + { + "epoch": 0.43, + "grad_norm": 0.711155354976654, + "learning_rate": 1.2414703802408378e-05, + "loss": 2.0707, + "step": 12984 + }, + { + "epoch": 0.43, + "grad_norm": 0.7029881477355957, + "learning_rate": 1.2413672348759171e-05, + "loss": 2.0587, + "step": 12985 + }, + { + "epoch": 0.43, + "grad_norm": 0.7210840582847595, + "learning_rate": 1.2412640867841712e-05, + "loss": 2.0736, + "step": 12986 + }, + { + "epoch": 0.43, + "grad_norm": 0.7426431179046631, + "learning_rate": 1.2411609359667645e-05, + "loss": 2.1838, + "step": 12987 + }, + { + "epoch": 0.43, + "grad_norm": 0.7290809750556946, + "learning_rate": 1.2410577824248632e-05, + "loss": 2.0765, + "step": 12988 + }, + { + "epoch": 0.43, + "grad_norm": 0.7348223328590393, + "learning_rate": 1.2409546261596318e-05, + "loss": 2.0899, + "step": 12989 + }, + { + "epoch": 0.43, + "grad_norm": 0.7185832262039185, + "learning_rate": 1.2408514671722372e-05, + "loss": 2.0783, + "step": 12990 + }, + { + "epoch": 0.43, + "grad_norm": 0.7139955759048462, + "learning_rate": 1.2407483054638432e-05, + "loss": 2.1295, + "step": 12991 + }, + { + "epoch": 0.43, + "grad_norm": 0.744611918926239, + "learning_rate": 1.240645141035616e-05, + "loss": 2.081, + "step": 12992 + }, + { + "epoch": 0.43, + "grad_norm": 0.7355293035507202, + "learning_rate": 1.2405419738887213e-05, + "loss": 2.089, + "step": 12993 + }, + { + "epoch": 0.43, + "grad_norm": 0.7547792196273804, + "learning_rate": 1.240438804024324e-05, + "loss": 2.0476, + "step": 12994 + }, + { + "epoch": 0.43, + "grad_norm": 0.7759978771209717, + "learning_rate": 1.2403356314435901e-05, + "loss": 2.1206, + "step": 12995 + }, + { + "epoch": 0.43, + "grad_norm": 0.7384457588195801, + "learning_rate": 1.2402324561476854e-05, + "loss": 2.1033, + "step": 12996 + }, + { + "epoch": 0.43, + "grad_norm": 0.7192955613136292, + "learning_rate": 1.2401292781377749e-05, + "loss": 2.1075, + "step": 12997 + }, + { + "epoch": 0.43, + "grad_norm": 0.7396403551101685, + "learning_rate": 1.2400260974150244e-05, + "loss": 2.1064, + "step": 12998 + }, + { + "epoch": 0.43, + "grad_norm": 0.7467607855796814, + "learning_rate": 1.2399229139806004e-05, + "loss": 2.0501, + "step": 12999 + }, + { + "epoch": 0.43, + "grad_norm": 0.7314237952232361, + "learning_rate": 1.2398197278356671e-05, + "loss": 2.094, + "step": 13000 + }, + { + "epoch": 0.43, + "grad_norm": 0.7604562044143677, + "learning_rate": 1.2397165389813917e-05, + "loss": 2.05, + "step": 13001 + }, + { + "epoch": 0.43, + "grad_norm": 0.7421305775642395, + "learning_rate": 1.239613347418939e-05, + "loss": 2.0682, + "step": 13002 + }, + { + "epoch": 0.43, + "grad_norm": 0.7043124437332153, + "learning_rate": 1.2395101531494748e-05, + "loss": 2.0826, + "step": 13003 + }, + { + "epoch": 0.43, + "grad_norm": 0.7208982110023499, + "learning_rate": 1.239406956174166e-05, + "loss": 2.1075, + "step": 13004 + }, + { + "epoch": 0.43, + "grad_norm": 0.7237157821655273, + "learning_rate": 1.2393037564941773e-05, + "loss": 2.1099, + "step": 13005 + }, + { + "epoch": 0.43, + "grad_norm": 0.7213141918182373, + "learning_rate": 1.239200554110675e-05, + "loss": 2.0778, + "step": 13006 + }, + { + "epoch": 0.43, + "grad_norm": 0.740760326385498, + "learning_rate": 1.2390973490248253e-05, + "loss": 2.0622, + "step": 13007 + }, + { + "epoch": 0.43, + "grad_norm": 0.7101807594299316, + "learning_rate": 1.2389941412377935e-05, + "loss": 2.0304, + "step": 13008 + }, + { + "epoch": 0.43, + "grad_norm": 0.7369447946548462, + "learning_rate": 1.2388909307507462e-05, + "loss": 2.0873, + "step": 13009 + }, + { + "epoch": 0.43, + "grad_norm": 0.7201719880104065, + "learning_rate": 1.2387877175648493e-05, + "loss": 2.0637, + "step": 13010 + }, + { + "epoch": 0.43, + "grad_norm": 0.735668420791626, + "learning_rate": 1.238684501681268e-05, + "loss": 2.0916, + "step": 13011 + }, + { + "epoch": 0.43, + "grad_norm": 0.7282198667526245, + "learning_rate": 1.2385812831011699e-05, + "loss": 2.0079, + "step": 13012 + }, + { + "epoch": 0.43, + "grad_norm": 0.733134925365448, + "learning_rate": 1.23847806182572e-05, + "loss": 2.0684, + "step": 13013 + }, + { + "epoch": 0.43, + "grad_norm": 0.7027309536933899, + "learning_rate": 1.2383748378560845e-05, + "loss": 2.0627, + "step": 13014 + }, + { + "epoch": 0.43, + "grad_norm": 0.7507836818695068, + "learning_rate": 1.23827161119343e-05, + "loss": 2.1422, + "step": 13015 + }, + { + "epoch": 0.43, + "grad_norm": 0.7230039238929749, + "learning_rate": 1.2381683818389226e-05, + "loss": 2.0719, + "step": 13016 + }, + { + "epoch": 0.43, + "grad_norm": 0.7450369000434875, + "learning_rate": 1.2380651497937282e-05, + "loss": 2.1228, + "step": 13017 + }, + { + "epoch": 0.43, + "grad_norm": 0.7306612133979797, + "learning_rate": 1.2379619150590131e-05, + "loss": 2.0863, + "step": 13018 + }, + { + "epoch": 0.43, + "grad_norm": 0.7152718901634216, + "learning_rate": 1.237858677635944e-05, + "loss": 2.0963, + "step": 13019 + }, + { + "epoch": 0.43, + "grad_norm": 0.7400249242782593, + "learning_rate": 1.237755437525687e-05, + "loss": 2.1085, + "step": 13020 + }, + { + "epoch": 0.43, + "grad_norm": 0.7396306991577148, + "learning_rate": 1.2376521947294084e-05, + "loss": 2.102, + "step": 13021 + }, + { + "epoch": 0.43, + "grad_norm": 0.721130907535553, + "learning_rate": 1.2375489492482743e-05, + "loss": 2.0543, + "step": 13022 + }, + { + "epoch": 0.43, + "grad_norm": 0.7092298269271851, + "learning_rate": 1.2374457010834516e-05, + "loss": 2.1064, + "step": 13023 + }, + { + "epoch": 0.43, + "grad_norm": 0.7501256465911865, + "learning_rate": 1.2373424502361067e-05, + "loss": 2.1203, + "step": 13024 + }, + { + "epoch": 0.43, + "grad_norm": 0.7504392862319946, + "learning_rate": 1.2372391967074056e-05, + "loss": 2.1429, + "step": 13025 + }, + { + "epoch": 0.43, + "grad_norm": 0.7366483807563782, + "learning_rate": 1.2371359404985152e-05, + "loss": 2.1198, + "step": 13026 + }, + { + "epoch": 0.43, + "grad_norm": 0.7139244079589844, + "learning_rate": 1.237032681610602e-05, + "loss": 2.0325, + "step": 13027 + }, + { + "epoch": 0.43, + "grad_norm": 0.7296825051307678, + "learning_rate": 1.2369294200448323e-05, + "loss": 2.0789, + "step": 13028 + }, + { + "epoch": 0.43, + "grad_norm": 0.7670575380325317, + "learning_rate": 1.2368261558023732e-05, + "loss": 2.0786, + "step": 13029 + }, + { + "epoch": 0.43, + "grad_norm": 0.7251813411712646, + "learning_rate": 1.2367228888843905e-05, + "loss": 2.0218, + "step": 13030 + }, + { + "epoch": 0.43, + "grad_norm": 0.7382007837295532, + "learning_rate": 1.2366196192920516e-05, + "loss": 2.122, + "step": 13031 + }, + { + "epoch": 0.43, + "grad_norm": 0.7294771075248718, + "learning_rate": 1.2365163470265232e-05, + "loss": 2.1225, + "step": 13032 + }, + { + "epoch": 0.43, + "grad_norm": 0.7454811334609985, + "learning_rate": 1.236413072088971e-05, + "loss": 2.1003, + "step": 13033 + }, + { + "epoch": 0.43, + "grad_norm": 0.7262408137321472, + "learning_rate": 1.2363097944805629e-05, + "loss": 2.0271, + "step": 13034 + }, + { + "epoch": 0.43, + "grad_norm": 0.7219258546829224, + "learning_rate": 1.2362065142024654e-05, + "loss": 2.1094, + "step": 13035 + }, + { + "epoch": 0.43, + "grad_norm": 0.7461423277854919, + "learning_rate": 1.2361032312558447e-05, + "loss": 2.0645, + "step": 13036 + }, + { + "epoch": 0.43, + "grad_norm": 0.759750485420227, + "learning_rate": 1.2359999456418685e-05, + "loss": 2.1249, + "step": 13037 + }, + { + "epoch": 0.43, + "grad_norm": 0.7096912860870361, + "learning_rate": 1.235896657361703e-05, + "loss": 2.0923, + "step": 13038 + }, + { + "epoch": 0.43, + "grad_norm": 0.7406792640686035, + "learning_rate": 1.2357933664165154e-05, + "loss": 2.0667, + "step": 13039 + }, + { + "epoch": 0.43, + "grad_norm": 0.7547255754470825, + "learning_rate": 1.2356900728074726e-05, + "loss": 2.0844, + "step": 13040 + }, + { + "epoch": 0.43, + "grad_norm": 0.7590304017066956, + "learning_rate": 1.2355867765357412e-05, + "loss": 2.1176, + "step": 13041 + }, + { + "epoch": 0.43, + "grad_norm": 0.7223701477050781, + "learning_rate": 1.2354834776024886e-05, + "loss": 2.1008, + "step": 13042 + }, + { + "epoch": 0.43, + "grad_norm": 0.7323155999183655, + "learning_rate": 1.235380176008882e-05, + "loss": 2.1232, + "step": 13043 + }, + { + "epoch": 0.43, + "grad_norm": 0.6999065279960632, + "learning_rate": 1.2352768717560875e-05, + "loss": 2.102, + "step": 13044 + }, + { + "epoch": 0.43, + "grad_norm": 0.7324925661087036, + "learning_rate": 1.2351735648452732e-05, + "loss": 2.0873, + "step": 13045 + }, + { + "epoch": 0.43, + "grad_norm": 0.7423679232597351, + "learning_rate": 1.235070255277606e-05, + "loss": 2.0432, + "step": 13046 + }, + { + "epoch": 0.43, + "grad_norm": 0.7264320254325867, + "learning_rate": 1.2349669430542522e-05, + "loss": 2.028, + "step": 13047 + }, + { + "epoch": 0.43, + "grad_norm": 0.7267599701881409, + "learning_rate": 1.2348636281763801e-05, + "loss": 1.9875, + "step": 13048 + }, + { + "epoch": 0.43, + "grad_norm": 0.7498978972434998, + "learning_rate": 1.234760310645156e-05, + "loss": 2.1167, + "step": 13049 + }, + { + "epoch": 0.43, + "grad_norm": 0.710991621017456, + "learning_rate": 1.2346569904617477e-05, + "loss": 2.1737, + "step": 13050 + }, + { + "epoch": 0.43, + "grad_norm": 0.7034395337104797, + "learning_rate": 1.2345536676273222e-05, + "loss": 2.0488, + "step": 13051 + }, + { + "epoch": 0.43, + "grad_norm": 0.72297602891922, + "learning_rate": 1.2344503421430469e-05, + "loss": 2.0898, + "step": 13052 + }, + { + "epoch": 0.43, + "grad_norm": 0.7451843619346619, + "learning_rate": 1.234347014010089e-05, + "loss": 2.0423, + "step": 13053 + }, + { + "epoch": 0.43, + "grad_norm": 0.7730289697647095, + "learning_rate": 1.234243683229616e-05, + "loss": 2.0815, + "step": 13054 + }, + { + "epoch": 0.43, + "grad_norm": 0.733791708946228, + "learning_rate": 1.2341403498027948e-05, + "loss": 2.1385, + "step": 13055 + }, + { + "epoch": 0.43, + "grad_norm": 0.7163063287734985, + "learning_rate": 1.2340370137307933e-05, + "loss": 2.0514, + "step": 13056 + }, + { + "epoch": 0.43, + "grad_norm": 0.7582456469535828, + "learning_rate": 1.233933675014779e-05, + "loss": 2.1049, + "step": 13057 + }, + { + "epoch": 0.43, + "grad_norm": 0.7381325364112854, + "learning_rate": 1.2338303336559187e-05, + "loss": 2.1334, + "step": 13058 + }, + { + "epoch": 0.43, + "grad_norm": 0.7408602833747864, + "learning_rate": 1.233726989655381e-05, + "loss": 2.0824, + "step": 13059 + }, + { + "epoch": 0.43, + "grad_norm": 0.7487793564796448, + "learning_rate": 1.2336236430143323e-05, + "loss": 2.027, + "step": 13060 + }, + { + "epoch": 0.43, + "grad_norm": 0.7571076154708862, + "learning_rate": 1.2335202937339406e-05, + "loss": 2.0531, + "step": 13061 + }, + { + "epoch": 0.43, + "grad_norm": 0.7500000596046448, + "learning_rate": 1.2334169418153733e-05, + "loss": 2.0554, + "step": 13062 + }, + { + "epoch": 0.43, + "grad_norm": 0.7484603524208069, + "learning_rate": 1.2333135872597985e-05, + "loss": 2.132, + "step": 13063 + }, + { + "epoch": 0.43, + "grad_norm": 0.7276855111122131, + "learning_rate": 1.2332102300683835e-05, + "loss": 1.9919, + "step": 13064 + }, + { + "epoch": 0.43, + "grad_norm": 0.7594392895698547, + "learning_rate": 1.233106870242296e-05, + "loss": 2.0513, + "step": 13065 + }, + { + "epoch": 0.43, + "grad_norm": 0.7225670218467712, + "learning_rate": 1.2330035077827033e-05, + "loss": 2.0747, + "step": 13066 + }, + { + "epoch": 0.43, + "grad_norm": 0.7556294202804565, + "learning_rate": 1.232900142690774e-05, + "loss": 2.0421, + "step": 13067 + }, + { + "epoch": 0.43, + "grad_norm": 0.791875958442688, + "learning_rate": 1.2327967749676755e-05, + "loss": 2.1779, + "step": 13068 + }, + { + "epoch": 0.43, + "grad_norm": 0.7489445209503174, + "learning_rate": 1.2326934046145749e-05, + "loss": 2.0885, + "step": 13069 + }, + { + "epoch": 0.43, + "grad_norm": 0.7299328446388245, + "learning_rate": 1.232590031632641e-05, + "loss": 2.1004, + "step": 13070 + }, + { + "epoch": 0.43, + "grad_norm": 0.7820027470588684, + "learning_rate": 1.232486656023041e-05, + "loss": 2.1207, + "step": 13071 + }, + { + "epoch": 0.43, + "grad_norm": 0.7115002274513245, + "learning_rate": 1.2323832777869434e-05, + "loss": 2.0616, + "step": 13072 + }, + { + "epoch": 0.43, + "grad_norm": 0.7216504216194153, + "learning_rate": 1.2322798969255154e-05, + "loss": 2.1014, + "step": 13073 + }, + { + "epoch": 0.43, + "grad_norm": 0.7520599961280823, + "learning_rate": 1.2321765134399253e-05, + "loss": 2.0291, + "step": 13074 + }, + { + "epoch": 0.44, + "grad_norm": 0.7540664076805115, + "learning_rate": 1.2320731273313409e-05, + "loss": 2.086, + "step": 13075 + }, + { + "epoch": 0.44, + "grad_norm": 0.6993480324745178, + "learning_rate": 1.2319697386009304e-05, + "loss": 2.0171, + "step": 13076 + }, + { + "epoch": 0.44, + "grad_norm": 0.7562140226364136, + "learning_rate": 1.2318663472498617e-05, + "loss": 2.1042, + "step": 13077 + }, + { + "epoch": 0.44, + "grad_norm": 0.7461341023445129, + "learning_rate": 1.2317629532793029e-05, + "loss": 2.1262, + "step": 13078 + }, + { + "epoch": 0.44, + "grad_norm": 0.7607473731040955, + "learning_rate": 1.2316595566904223e-05, + "loss": 2.1007, + "step": 13079 + }, + { + "epoch": 0.44, + "grad_norm": 0.7403892278671265, + "learning_rate": 1.2315561574843874e-05, + "loss": 2.0591, + "step": 13080 + }, + { + "epoch": 0.44, + "grad_norm": 0.7353152632713318, + "learning_rate": 1.2314527556623674e-05, + "loss": 2.0821, + "step": 13081 + }, + { + "epoch": 0.44, + "grad_norm": 0.7451971173286438, + "learning_rate": 1.231349351225529e-05, + "loss": 1.9626, + "step": 13082 + }, + { + "epoch": 0.44, + "grad_norm": 0.7552878856658936, + "learning_rate": 1.2312459441750419e-05, + "loss": 2.0459, + "step": 13083 + }, + { + "epoch": 0.44, + "grad_norm": 0.7366985082626343, + "learning_rate": 1.2311425345120734e-05, + "loss": 2.1556, + "step": 13084 + }, + { + "epoch": 0.44, + "grad_norm": 0.7105645537376404, + "learning_rate": 1.231039122237792e-05, + "loss": 2.1362, + "step": 13085 + }, + { + "epoch": 0.44, + "grad_norm": 0.7527480125427246, + "learning_rate": 1.230935707353366e-05, + "loss": 2.1095, + "step": 13086 + }, + { + "epoch": 0.44, + "grad_norm": 0.7542600631713867, + "learning_rate": 1.2308322898599636e-05, + "loss": 2.0581, + "step": 13087 + }, + { + "epoch": 0.44, + "grad_norm": 0.8106100559234619, + "learning_rate": 1.2307288697587532e-05, + "loss": 2.0799, + "step": 13088 + }, + { + "epoch": 0.44, + "grad_norm": 0.7616980075836182, + "learning_rate": 1.2306254470509033e-05, + "loss": 2.1161, + "step": 13089 + }, + { + "epoch": 0.44, + "grad_norm": 0.7653668522834778, + "learning_rate": 1.2305220217375824e-05, + "loss": 2.1513, + "step": 13090 + }, + { + "epoch": 0.44, + "grad_norm": 0.7590305805206299, + "learning_rate": 1.2304185938199588e-05, + "loss": 2.0824, + "step": 13091 + }, + { + "epoch": 0.44, + "grad_norm": 0.7175247073173523, + "learning_rate": 1.2303151632992011e-05, + "loss": 2.0704, + "step": 13092 + }, + { + "epoch": 0.44, + "grad_norm": 0.7056856155395508, + "learning_rate": 1.2302117301764773e-05, + "loss": 2.0668, + "step": 13093 + }, + { + "epoch": 0.44, + "grad_norm": 0.7867297530174255, + "learning_rate": 1.2301082944529567e-05, + "loss": 2.0789, + "step": 13094 + }, + { + "epoch": 0.44, + "grad_norm": 0.7226760387420654, + "learning_rate": 1.2300048561298072e-05, + "loss": 2.1711, + "step": 13095 + }, + { + "epoch": 0.44, + "grad_norm": 0.7539404630661011, + "learning_rate": 1.2299014152081974e-05, + "loss": 2.0695, + "step": 13096 + }, + { + "epoch": 0.44, + "grad_norm": 0.7063410878181458, + "learning_rate": 1.2297979716892968e-05, + "loss": 2.0958, + "step": 13097 + }, + { + "epoch": 0.44, + "grad_norm": 0.7316232919692993, + "learning_rate": 1.2296945255742727e-05, + "loss": 2.092, + "step": 13098 + }, + { + "epoch": 0.44, + "grad_norm": 0.7146007418632507, + "learning_rate": 1.2295910768642948e-05, + "loss": 2.1059, + "step": 13099 + }, + { + "epoch": 0.44, + "grad_norm": 0.7345675230026245, + "learning_rate": 1.2294876255605315e-05, + "loss": 2.0916, + "step": 13100 + }, + { + "epoch": 0.44, + "grad_norm": 0.7280321717262268, + "learning_rate": 1.2293841716641513e-05, + "loss": 2.0848, + "step": 13101 + }, + { + "epoch": 0.44, + "grad_norm": 0.7396214604377747, + "learning_rate": 1.2292807151763233e-05, + "loss": 2.1429, + "step": 13102 + }, + { + "epoch": 0.44, + "grad_norm": 0.733751654624939, + "learning_rate": 1.2291772560982162e-05, + "loss": 2.029, + "step": 13103 + }, + { + "epoch": 0.44, + "grad_norm": 0.7273739576339722, + "learning_rate": 1.2290737944309986e-05, + "loss": 2.1023, + "step": 13104 + }, + { + "epoch": 0.44, + "grad_norm": 0.7427027821540833, + "learning_rate": 1.2289703301758398e-05, + "loss": 2.1286, + "step": 13105 + }, + { + "epoch": 0.44, + "grad_norm": 0.6845645904541016, + "learning_rate": 1.2288668633339083e-05, + "loss": 2.0917, + "step": 13106 + }, + { + "epoch": 0.44, + "grad_norm": 0.7375481128692627, + "learning_rate": 1.2287633939063729e-05, + "loss": 2.0495, + "step": 13107 + }, + { + "epoch": 0.44, + "grad_norm": 0.7013950943946838, + "learning_rate": 1.2286599218944028e-05, + "loss": 2.0518, + "step": 13108 + }, + { + "epoch": 0.44, + "grad_norm": 0.749984085559845, + "learning_rate": 1.2285564472991671e-05, + "loss": 2.1493, + "step": 13109 + }, + { + "epoch": 0.44, + "grad_norm": 0.7206103801727295, + "learning_rate": 1.2284529701218345e-05, + "loss": 2.1019, + "step": 13110 + }, + { + "epoch": 0.44, + "grad_norm": 0.7308148741722107, + "learning_rate": 1.228349490363574e-05, + "loss": 2.0938, + "step": 13111 + }, + { + "epoch": 0.44, + "grad_norm": 0.7367468476295471, + "learning_rate": 1.2282460080255549e-05, + "loss": 2.0537, + "step": 13112 + }, + { + "epoch": 0.44, + "grad_norm": 0.7396413683891296, + "learning_rate": 1.228142523108946e-05, + "loss": 2.077, + "step": 13113 + }, + { + "epoch": 0.44, + "grad_norm": 0.7315376996994019, + "learning_rate": 1.2280390356149173e-05, + "loss": 2.0684, + "step": 13114 + }, + { + "epoch": 0.44, + "grad_norm": 0.7459783554077148, + "learning_rate": 1.2279355455446363e-05, + "loss": 2.147, + "step": 13115 + }, + { + "epoch": 0.44, + "grad_norm": 0.7214423418045044, + "learning_rate": 1.2278320528992738e-05, + "loss": 2.0684, + "step": 13116 + }, + { + "epoch": 0.44, + "grad_norm": 0.7454709410667419, + "learning_rate": 1.2277285576799978e-05, + "loss": 2.1455, + "step": 13117 + }, + { + "epoch": 0.44, + "grad_norm": 0.7578288912773132, + "learning_rate": 1.227625059887978e-05, + "loss": 2.0969, + "step": 13118 + }, + { + "epoch": 0.44, + "grad_norm": 0.7165650129318237, + "learning_rate": 1.2275215595243838e-05, + "loss": 2.0942, + "step": 13119 + }, + { + "epoch": 0.44, + "grad_norm": 0.7278130650520325, + "learning_rate": 1.2274180565903846e-05, + "loss": 2.0724, + "step": 13120 + }, + { + "epoch": 0.44, + "grad_norm": 0.7341870665550232, + "learning_rate": 1.227314551087149e-05, + "loss": 2.0838, + "step": 13121 + }, + { + "epoch": 0.44, + "grad_norm": 0.7364649176597595, + "learning_rate": 1.2272110430158472e-05, + "loss": 2.0031, + "step": 13122 + }, + { + "epoch": 0.44, + "grad_norm": 0.7159063220024109, + "learning_rate": 1.2271075323776482e-05, + "loss": 2.0438, + "step": 13123 + }, + { + "epoch": 0.44, + "grad_norm": 0.7325496077537537, + "learning_rate": 1.2270040191737214e-05, + "loss": 2.1087, + "step": 13124 + }, + { + "epoch": 0.44, + "grad_norm": 0.7154690623283386, + "learning_rate": 1.2269005034052362e-05, + "loss": 2.0633, + "step": 13125 + }, + { + "epoch": 0.44, + "grad_norm": 0.6962517499923706, + "learning_rate": 1.226796985073362e-05, + "loss": 2.0617, + "step": 13126 + }, + { + "epoch": 0.44, + "grad_norm": 0.7379069924354553, + "learning_rate": 1.2266934641792682e-05, + "loss": 2.1081, + "step": 13127 + }, + { + "epoch": 0.44, + "grad_norm": 0.7530523538589478, + "learning_rate": 1.2265899407241248e-05, + "loss": 2.0473, + "step": 13128 + }, + { + "epoch": 0.44, + "grad_norm": 0.7387613654136658, + "learning_rate": 1.2264864147091007e-05, + "loss": 2.1031, + "step": 13129 + }, + { + "epoch": 0.44, + "grad_norm": 0.7239238023757935, + "learning_rate": 1.2263828861353661e-05, + "loss": 2.0882, + "step": 13130 + }, + { + "epoch": 0.44, + "grad_norm": 0.7356334328651428, + "learning_rate": 1.2262793550040903e-05, + "loss": 2.044, + "step": 13131 + }, + { + "epoch": 0.44, + "grad_norm": 0.7270652651786804, + "learning_rate": 1.226175821316443e-05, + "loss": 2.0914, + "step": 13132 + }, + { + "epoch": 0.44, + "grad_norm": 0.7258672118186951, + "learning_rate": 1.2260722850735938e-05, + "loss": 2.079, + "step": 13133 + }, + { + "epoch": 0.44, + "grad_norm": 0.7671827077865601, + "learning_rate": 1.2259687462767125e-05, + "loss": 2.1014, + "step": 13134 + }, + { + "epoch": 0.44, + "grad_norm": 0.7275815606117249, + "learning_rate": 1.2258652049269684e-05, + "loss": 2.1284, + "step": 13135 + }, + { + "epoch": 0.44, + "grad_norm": 0.7642878293991089, + "learning_rate": 1.225761661025532e-05, + "loss": 2.154, + "step": 13136 + }, + { + "epoch": 0.44, + "grad_norm": 0.7268238067626953, + "learning_rate": 1.2256581145735722e-05, + "loss": 2.1164, + "step": 13137 + }, + { + "epoch": 0.44, + "grad_norm": 0.7394101619720459, + "learning_rate": 1.2255545655722595e-05, + "loss": 2.069, + "step": 13138 + }, + { + "epoch": 0.44, + "grad_norm": 0.7537820935249329, + "learning_rate": 1.2254510140227638e-05, + "loss": 2.0637, + "step": 13139 + }, + { + "epoch": 0.44, + "grad_norm": 0.7274131774902344, + "learning_rate": 1.2253474599262543e-05, + "loss": 2.0899, + "step": 13140 + }, + { + "epoch": 0.44, + "grad_norm": 0.7554284334182739, + "learning_rate": 1.2252439032839015e-05, + "loss": 2.1211, + "step": 13141 + }, + { + "epoch": 0.44, + "grad_norm": 0.7334622144699097, + "learning_rate": 1.225140344096875e-05, + "loss": 2.0343, + "step": 13142 + }, + { + "epoch": 0.44, + "grad_norm": 0.7156754732131958, + "learning_rate": 1.2250367823663447e-05, + "loss": 2.1048, + "step": 13143 + }, + { + "epoch": 0.44, + "grad_norm": 0.7202135324478149, + "learning_rate": 1.2249332180934807e-05, + "loss": 2.0942, + "step": 13144 + }, + { + "epoch": 0.44, + "grad_norm": 0.7382623553276062, + "learning_rate": 1.2248296512794532e-05, + "loss": 2.0994, + "step": 13145 + }, + { + "epoch": 0.44, + "grad_norm": 0.7413188219070435, + "learning_rate": 1.2247260819254322e-05, + "loss": 2.093, + "step": 13146 + }, + { + "epoch": 0.44, + "grad_norm": 0.7307173609733582, + "learning_rate": 1.2246225100325875e-05, + "loss": 2.0524, + "step": 13147 + }, + { + "epoch": 0.44, + "grad_norm": 0.7691468000411987, + "learning_rate": 1.224518935602089e-05, + "loss": 2.1094, + "step": 13148 + }, + { + "epoch": 0.44, + "grad_norm": 0.731715738773346, + "learning_rate": 1.2244153586351074e-05, + "loss": 2.1303, + "step": 13149 + }, + { + "epoch": 0.44, + "grad_norm": 0.714421272277832, + "learning_rate": 1.2243117791328127e-05, + "loss": 2.0974, + "step": 13150 + }, + { + "epoch": 0.44, + "grad_norm": 0.72210693359375, + "learning_rate": 1.2242081970963746e-05, + "loss": 2.0474, + "step": 13151 + }, + { + "epoch": 0.44, + "grad_norm": 0.7014721632003784, + "learning_rate": 1.2241046125269642e-05, + "loss": 2.0614, + "step": 13152 + }, + { + "epoch": 0.44, + "grad_norm": 0.7401075959205627, + "learning_rate": 1.224001025425751e-05, + "loss": 1.9836, + "step": 13153 + }, + { + "epoch": 0.44, + "grad_norm": 0.7199180126190186, + "learning_rate": 1.2238974357939056e-05, + "loss": 2.1379, + "step": 13154 + }, + { + "epoch": 0.44, + "grad_norm": 0.762093186378479, + "learning_rate": 1.2237938436325978e-05, + "loss": 2.0546, + "step": 13155 + }, + { + "epoch": 0.44, + "grad_norm": 0.7383498549461365, + "learning_rate": 1.2236902489429988e-05, + "loss": 2.0278, + "step": 13156 + }, + { + "epoch": 0.44, + "grad_norm": 0.7315771579742432, + "learning_rate": 1.223586651726278e-05, + "loss": 2.0895, + "step": 13157 + }, + { + "epoch": 0.44, + "grad_norm": 0.7298175096511841, + "learning_rate": 1.2234830519836067e-05, + "loss": 2.0403, + "step": 13158 + }, + { + "epoch": 0.44, + "grad_norm": 0.7435483932495117, + "learning_rate": 1.2233794497161545e-05, + "loss": 2.0874, + "step": 13159 + }, + { + "epoch": 0.44, + "grad_norm": 0.7397019267082214, + "learning_rate": 1.2232758449250923e-05, + "loss": 2.1538, + "step": 13160 + }, + { + "epoch": 0.44, + "grad_norm": 0.7528139352798462, + "learning_rate": 1.2231722376115907e-05, + "loss": 2.1492, + "step": 13161 + }, + { + "epoch": 0.44, + "grad_norm": 0.7568425536155701, + "learning_rate": 1.2230686277768195e-05, + "loss": 2.1088, + "step": 13162 + }, + { + "epoch": 0.44, + "grad_norm": 0.7638435363769531, + "learning_rate": 1.2229650154219501e-05, + "loss": 2.068, + "step": 13163 + }, + { + "epoch": 0.44, + "grad_norm": 0.7074065208435059, + "learning_rate": 1.2228614005481525e-05, + "loss": 2.1211, + "step": 13164 + }, + { + "epoch": 0.44, + "grad_norm": 0.7352027297019958, + "learning_rate": 1.2227577831565973e-05, + "loss": 2.1219, + "step": 13165 + }, + { + "epoch": 0.44, + "grad_norm": 0.7174952030181885, + "learning_rate": 1.2226541632484553e-05, + "loss": 2.0801, + "step": 13166 + }, + { + "epoch": 0.44, + "grad_norm": 0.7294105291366577, + "learning_rate": 1.222550540824897e-05, + "loss": 2.0844, + "step": 13167 + }, + { + "epoch": 0.44, + "grad_norm": 0.7511965036392212, + "learning_rate": 1.2224469158870931e-05, + "loss": 2.1647, + "step": 13168 + }, + { + "epoch": 0.44, + "grad_norm": 0.722353458404541, + "learning_rate": 1.2223432884362143e-05, + "loss": 2.0466, + "step": 13169 + }, + { + "epoch": 0.44, + "grad_norm": 0.7366239428520203, + "learning_rate": 1.2222396584734314e-05, + "loss": 2.1238, + "step": 13170 + }, + { + "epoch": 0.44, + "grad_norm": 0.7566250562667847, + "learning_rate": 1.2221360259999149e-05, + "loss": 2.1302, + "step": 13171 + }, + { + "epoch": 0.44, + "grad_norm": 0.75490802526474, + "learning_rate": 1.222032391016836e-05, + "loss": 2.1209, + "step": 13172 + }, + { + "epoch": 0.44, + "grad_norm": 0.7112650275230408, + "learning_rate": 1.221928753525365e-05, + "loss": 2.1355, + "step": 13173 + }, + { + "epoch": 0.44, + "grad_norm": 0.7453727126121521, + "learning_rate": 1.2218251135266734e-05, + "loss": 2.1415, + "step": 13174 + }, + { + "epoch": 0.44, + "grad_norm": 0.7279874086380005, + "learning_rate": 1.2217214710219315e-05, + "loss": 2.0881, + "step": 13175 + }, + { + "epoch": 0.44, + "grad_norm": 0.683152437210083, + "learning_rate": 1.2216178260123103e-05, + "loss": 2.009, + "step": 13176 + }, + { + "epoch": 0.44, + "grad_norm": 0.7088492512702942, + "learning_rate": 1.2215141784989808e-05, + "loss": 2.1278, + "step": 13177 + }, + { + "epoch": 0.44, + "grad_norm": 0.709507167339325, + "learning_rate": 1.221410528483114e-05, + "loss": 2.0926, + "step": 13178 + }, + { + "epoch": 0.44, + "grad_norm": 0.7219312787055969, + "learning_rate": 1.2213068759658806e-05, + "loss": 2.0901, + "step": 13179 + }, + { + "epoch": 0.44, + "grad_norm": 0.7445300817489624, + "learning_rate": 1.2212032209484521e-05, + "loss": 2.1051, + "step": 13180 + }, + { + "epoch": 0.44, + "grad_norm": 0.7414700984954834, + "learning_rate": 1.2210995634319991e-05, + "loss": 2.089, + "step": 13181 + }, + { + "epoch": 0.44, + "grad_norm": 0.7115605473518372, + "learning_rate": 1.2209959034176928e-05, + "loss": 2.1008, + "step": 13182 + }, + { + "epoch": 0.44, + "grad_norm": 0.7357711791992188, + "learning_rate": 1.2208922409067045e-05, + "loss": 2.0417, + "step": 13183 + }, + { + "epoch": 0.44, + "grad_norm": 0.7760270833969116, + "learning_rate": 1.2207885759002047e-05, + "loss": 2.139, + "step": 13184 + }, + { + "epoch": 0.44, + "grad_norm": 0.7291125059127808, + "learning_rate": 1.2206849083993654e-05, + "loss": 2.0478, + "step": 13185 + }, + { + "epoch": 0.44, + "grad_norm": 0.7175349593162537, + "learning_rate": 1.2205812384053568e-05, + "loss": 2.0609, + "step": 13186 + }, + { + "epoch": 0.44, + "grad_norm": 0.7666062116622925, + "learning_rate": 1.2204775659193513e-05, + "loss": 2.0536, + "step": 13187 + }, + { + "epoch": 0.44, + "grad_norm": 0.7302495241165161, + "learning_rate": 1.2203738909425191e-05, + "loss": 2.0662, + "step": 13188 + }, + { + "epoch": 0.44, + "grad_norm": 0.7195479869842529, + "learning_rate": 1.2202702134760318e-05, + "loss": 2.0583, + "step": 13189 + }, + { + "epoch": 0.44, + "grad_norm": 0.7674867510795593, + "learning_rate": 1.2201665335210609e-05, + "loss": 2.0538, + "step": 13190 + }, + { + "epoch": 0.44, + "grad_norm": 0.7009322047233582, + "learning_rate": 1.2200628510787774e-05, + "loss": 2.0639, + "step": 13191 + }, + { + "epoch": 0.44, + "grad_norm": 0.7210790514945984, + "learning_rate": 1.2199591661503527e-05, + "loss": 2.057, + "step": 13192 + }, + { + "epoch": 0.44, + "grad_norm": 0.7276585102081299, + "learning_rate": 1.2198554787369586e-05, + "loss": 2.0125, + "step": 13193 + }, + { + "epoch": 0.44, + "grad_norm": 0.751298189163208, + "learning_rate": 1.2197517888397657e-05, + "loss": 2.1108, + "step": 13194 + }, + { + "epoch": 0.44, + "grad_norm": 0.7229934334754944, + "learning_rate": 1.2196480964599461e-05, + "loss": 2.1031, + "step": 13195 + }, + { + "epoch": 0.44, + "grad_norm": 0.7401496767997742, + "learning_rate": 1.2195444015986712e-05, + "loss": 2.1331, + "step": 13196 + }, + { + "epoch": 0.44, + "grad_norm": 0.7509361505508423, + "learning_rate": 1.219440704257112e-05, + "loss": 2.1087, + "step": 13197 + }, + { + "epoch": 0.44, + "grad_norm": 0.7497627139091492, + "learning_rate": 1.2193370044364405e-05, + "loss": 2.1031, + "step": 13198 + }, + { + "epoch": 0.44, + "grad_norm": 0.7191805243492126, + "learning_rate": 1.2192333021378282e-05, + "loss": 2.0454, + "step": 13199 + }, + { + "epoch": 0.44, + "grad_norm": 0.748418390750885, + "learning_rate": 1.2191295973624463e-05, + "loss": 2.1085, + "step": 13200 + }, + { + "epoch": 0.44, + "grad_norm": 0.712529182434082, + "learning_rate": 1.2190258901114667e-05, + "loss": 2.1191, + "step": 13201 + }, + { + "epoch": 0.44, + "grad_norm": 0.7392669320106506, + "learning_rate": 1.2189221803860609e-05, + "loss": 2.1181, + "step": 13202 + }, + { + "epoch": 0.44, + "grad_norm": 0.7241291403770447, + "learning_rate": 1.2188184681874007e-05, + "loss": 2.129, + "step": 13203 + }, + { + "epoch": 0.44, + "grad_norm": 0.769792914390564, + "learning_rate": 1.2187147535166577e-05, + "loss": 2.097, + "step": 13204 + }, + { + "epoch": 0.44, + "grad_norm": 0.7727922797203064, + "learning_rate": 1.2186110363750035e-05, + "loss": 2.1007, + "step": 13205 + }, + { + "epoch": 0.44, + "grad_norm": 0.7304590940475464, + "learning_rate": 1.2185073167636098e-05, + "loss": 2.0325, + "step": 13206 + }, + { + "epoch": 0.44, + "grad_norm": 0.7333456873893738, + "learning_rate": 1.218403594683649e-05, + "loss": 2.0828, + "step": 13207 + }, + { + "epoch": 0.44, + "grad_norm": 0.7269657850265503, + "learning_rate": 1.2182998701362918e-05, + "loss": 2.0368, + "step": 13208 + }, + { + "epoch": 0.44, + "grad_norm": 0.7209774851799011, + "learning_rate": 1.2181961431227112e-05, + "loss": 2.0632, + "step": 13209 + }, + { + "epoch": 0.44, + "grad_norm": 0.7555052042007446, + "learning_rate": 1.218092413644078e-05, + "loss": 2.0664, + "step": 13210 + }, + { + "epoch": 0.44, + "grad_norm": 0.7058494091033936, + "learning_rate": 1.2179886817015645e-05, + "loss": 2.0956, + "step": 13211 + }, + { + "epoch": 0.44, + "grad_norm": 0.8538835048675537, + "learning_rate": 1.2178849472963428e-05, + "loss": 2.1221, + "step": 13212 + }, + { + "epoch": 0.44, + "grad_norm": 0.7394633889198303, + "learning_rate": 1.2177812104295848e-05, + "loss": 2.0115, + "step": 13213 + }, + { + "epoch": 0.44, + "grad_norm": 0.7214871644973755, + "learning_rate": 1.2176774711024618e-05, + "loss": 2.0205, + "step": 13214 + }, + { + "epoch": 0.44, + "grad_norm": 0.7459919452667236, + "learning_rate": 1.217573729316147e-05, + "loss": 2.1065, + "step": 13215 + }, + { + "epoch": 0.44, + "grad_norm": 0.7378609776496887, + "learning_rate": 1.2174699850718113e-05, + "loss": 2.1565, + "step": 13216 + }, + { + "epoch": 0.44, + "grad_norm": 0.7099727988243103, + "learning_rate": 1.217366238370627e-05, + "loss": 2.0685, + "step": 13217 + }, + { + "epoch": 0.44, + "grad_norm": 0.7468857169151306, + "learning_rate": 1.2172624892137668e-05, + "loss": 2.1075, + "step": 13218 + }, + { + "epoch": 0.44, + "grad_norm": 0.7311460971832275, + "learning_rate": 1.2171587376024019e-05, + "loss": 2.0701, + "step": 13219 + }, + { + "epoch": 0.44, + "grad_norm": 0.7185695171356201, + "learning_rate": 1.2170549835377053e-05, + "loss": 2.1327, + "step": 13220 + }, + { + "epoch": 0.44, + "grad_norm": 0.7469179630279541, + "learning_rate": 1.2169512270208483e-05, + "loss": 2.1314, + "step": 13221 + }, + { + "epoch": 0.44, + "grad_norm": 0.7485544681549072, + "learning_rate": 1.2168474680530034e-05, + "loss": 2.1216, + "step": 13222 + }, + { + "epoch": 0.44, + "grad_norm": 0.7138108015060425, + "learning_rate": 1.2167437066353433e-05, + "loss": 2.0641, + "step": 13223 + }, + { + "epoch": 0.44, + "grad_norm": 0.7264711856842041, + "learning_rate": 1.2166399427690396e-05, + "loss": 2.0888, + "step": 13224 + }, + { + "epoch": 0.44, + "grad_norm": 0.7644013166427612, + "learning_rate": 1.2165361764552649e-05, + "loss": 2.1206, + "step": 13225 + }, + { + "epoch": 0.44, + "grad_norm": 0.7458553314208984, + "learning_rate": 1.2164324076951912e-05, + "loss": 2.0934, + "step": 13226 + }, + { + "epoch": 0.44, + "grad_norm": 0.7324727177619934, + "learning_rate": 1.216328636489991e-05, + "loss": 2.0704, + "step": 13227 + }, + { + "epoch": 0.44, + "grad_norm": 0.7355753779411316, + "learning_rate": 1.2162248628408366e-05, + "loss": 2.0964, + "step": 13228 + }, + { + "epoch": 0.44, + "grad_norm": 0.7430371046066284, + "learning_rate": 1.2161210867489008e-05, + "loss": 2.0361, + "step": 13229 + }, + { + "epoch": 0.44, + "grad_norm": 0.7671518325805664, + "learning_rate": 1.2160173082153553e-05, + "loss": 2.0983, + "step": 13230 + }, + { + "epoch": 0.44, + "grad_norm": 0.7220532894134521, + "learning_rate": 1.2159135272413732e-05, + "loss": 2.1486, + "step": 13231 + }, + { + "epoch": 0.44, + "grad_norm": 0.7248452305793762, + "learning_rate": 1.2158097438281262e-05, + "loss": 2.081, + "step": 13232 + }, + { + "epoch": 0.44, + "grad_norm": 0.70583176612854, + "learning_rate": 1.2157059579767871e-05, + "loss": 2.0682, + "step": 13233 + }, + { + "epoch": 0.44, + "grad_norm": 0.717170000076294, + "learning_rate": 1.215602169688529e-05, + "loss": 2.1011, + "step": 13234 + }, + { + "epoch": 0.44, + "grad_norm": 0.735986053943634, + "learning_rate": 1.2154983789645237e-05, + "loss": 1.9876, + "step": 13235 + }, + { + "epoch": 0.44, + "grad_norm": 0.790681004524231, + "learning_rate": 1.215394585805944e-05, + "loss": 2.0056, + "step": 13236 + }, + { + "epoch": 0.44, + "grad_norm": 0.7352068424224854, + "learning_rate": 1.2152907902139624e-05, + "loss": 2.1499, + "step": 13237 + }, + { + "epoch": 0.44, + "grad_norm": 0.7338637113571167, + "learning_rate": 1.2151869921897517e-05, + "loss": 2.0664, + "step": 13238 + }, + { + "epoch": 0.44, + "grad_norm": 0.724999189376831, + "learning_rate": 1.2150831917344843e-05, + "loss": 2.1252, + "step": 13239 + }, + { + "epoch": 0.44, + "grad_norm": 0.725937008857727, + "learning_rate": 1.2149793888493336e-05, + "loss": 2.0532, + "step": 13240 + }, + { + "epoch": 0.44, + "grad_norm": 0.7111504077911377, + "learning_rate": 1.214875583535471e-05, + "loss": 2.0967, + "step": 13241 + }, + { + "epoch": 0.44, + "grad_norm": 0.738692045211792, + "learning_rate": 1.2147717757940704e-05, + "loss": 2.116, + "step": 13242 + }, + { + "epoch": 0.44, + "grad_norm": 0.7376387715339661, + "learning_rate": 1.2146679656263043e-05, + "loss": 2.1008, + "step": 13243 + }, + { + "epoch": 0.44, + "grad_norm": 0.7186775207519531, + "learning_rate": 1.2145641530333449e-05, + "loss": 2.0921, + "step": 13244 + }, + { + "epoch": 0.44, + "grad_norm": 0.7621871829032898, + "learning_rate": 1.214460338016366e-05, + "loss": 2.0266, + "step": 13245 + }, + { + "epoch": 0.44, + "grad_norm": 0.7311782240867615, + "learning_rate": 1.2143565205765395e-05, + "loss": 2.094, + "step": 13246 + }, + { + "epoch": 0.44, + "grad_norm": 0.7438217401504517, + "learning_rate": 1.214252700715039e-05, + "loss": 2.0656, + "step": 13247 + }, + { + "epoch": 0.44, + "grad_norm": 0.7314299941062927, + "learning_rate": 1.2141488784330367e-05, + "loss": 2.0232, + "step": 13248 + }, + { + "epoch": 0.44, + "grad_norm": 0.7191365361213684, + "learning_rate": 1.214045053731706e-05, + "loss": 2.0018, + "step": 13249 + }, + { + "epoch": 0.44, + "grad_norm": 0.7546627521514893, + "learning_rate": 1.2139412266122194e-05, + "loss": 2.0866, + "step": 13250 + }, + { + "epoch": 0.44, + "grad_norm": 0.7303929924964905, + "learning_rate": 1.2138373970757508e-05, + "loss": 2.0726, + "step": 13251 + }, + { + "epoch": 0.44, + "grad_norm": 0.7280935645103455, + "learning_rate": 1.2137335651234721e-05, + "loss": 2.1202, + "step": 13252 + }, + { + "epoch": 0.44, + "grad_norm": 0.7485957741737366, + "learning_rate": 1.213629730756557e-05, + "loss": 2.1531, + "step": 13253 + }, + { + "epoch": 0.44, + "grad_norm": 0.7577044367790222, + "learning_rate": 1.2135258939761787e-05, + "loss": 2.1567, + "step": 13254 + }, + { + "epoch": 0.44, + "grad_norm": 0.732736349105835, + "learning_rate": 1.2134220547835096e-05, + "loss": 2.1081, + "step": 13255 + }, + { + "epoch": 0.44, + "grad_norm": 0.747572660446167, + "learning_rate": 1.2133182131797234e-05, + "loss": 2.0604, + "step": 13256 + }, + { + "epoch": 0.44, + "grad_norm": 0.779145359992981, + "learning_rate": 1.213214369165993e-05, + "loss": 2.097, + "step": 13257 + }, + { + "epoch": 0.44, + "grad_norm": 0.728493869304657, + "learning_rate": 1.2131105227434916e-05, + "loss": 2.0945, + "step": 13258 + }, + { + "epoch": 0.44, + "grad_norm": 0.7282928228378296, + "learning_rate": 1.2130066739133923e-05, + "loss": 2.0741, + "step": 13259 + }, + { + "epoch": 0.44, + "grad_norm": 0.7217252850532532, + "learning_rate": 1.2129028226768686e-05, + "loss": 2.0593, + "step": 13260 + }, + { + "epoch": 0.44, + "grad_norm": 0.7185680866241455, + "learning_rate": 1.2127989690350937e-05, + "loss": 2.0446, + "step": 13261 + }, + { + "epoch": 0.44, + "grad_norm": 0.7479565143585205, + "learning_rate": 1.2126951129892406e-05, + "loss": 2.1234, + "step": 13262 + }, + { + "epoch": 0.44, + "grad_norm": 0.7399598956108093, + "learning_rate": 1.2125912545404826e-05, + "loss": 2.0747, + "step": 13263 + }, + { + "epoch": 0.44, + "grad_norm": 0.794170081615448, + "learning_rate": 1.2124873936899932e-05, + "loss": 2.0976, + "step": 13264 + }, + { + "epoch": 0.44, + "grad_norm": 0.7387720942497253, + "learning_rate": 1.2123835304389462e-05, + "loss": 2.0657, + "step": 13265 + }, + { + "epoch": 0.44, + "grad_norm": 0.7227472066879272, + "learning_rate": 1.212279664788514e-05, + "loss": 2.1054, + "step": 13266 + }, + { + "epoch": 0.44, + "grad_norm": 0.7274028658866882, + "learning_rate": 1.2121757967398711e-05, + "loss": 2.1271, + "step": 13267 + }, + { + "epoch": 0.44, + "grad_norm": 0.8054218888282776, + "learning_rate": 1.21207192629419e-05, + "loss": 2.2246, + "step": 13268 + }, + { + "epoch": 0.44, + "grad_norm": 0.7277166843414307, + "learning_rate": 1.2119680534526447e-05, + "loss": 2.1673, + "step": 13269 + }, + { + "epoch": 0.44, + "grad_norm": 0.7338177561759949, + "learning_rate": 1.2118641782164084e-05, + "loss": 2.0853, + "step": 13270 + }, + { + "epoch": 0.44, + "grad_norm": 0.7415596842765808, + "learning_rate": 1.2117603005866549e-05, + "loss": 2.1669, + "step": 13271 + }, + { + "epoch": 0.44, + "grad_norm": 0.7345831990242004, + "learning_rate": 1.2116564205645576e-05, + "loss": 2.1641, + "step": 13272 + }, + { + "epoch": 0.44, + "grad_norm": 0.7320764064788818, + "learning_rate": 1.2115525381512902e-05, + "loss": 1.9925, + "step": 13273 + }, + { + "epoch": 0.44, + "grad_norm": 0.7361744046211243, + "learning_rate": 1.211448653348026e-05, + "loss": 2.0757, + "step": 13274 + }, + { + "epoch": 0.44, + "grad_norm": 0.7120850086212158, + "learning_rate": 1.211344766155939e-05, + "loss": 2.0971, + "step": 13275 + }, + { + "epoch": 0.44, + "grad_norm": 0.7386009097099304, + "learning_rate": 1.211240876576203e-05, + "loss": 2.0218, + "step": 13276 + }, + { + "epoch": 0.44, + "grad_norm": 0.7775686383247375, + "learning_rate": 1.2111369846099907e-05, + "loss": 2.1285, + "step": 13277 + }, + { + "epoch": 0.44, + "grad_norm": 0.7459964156150818, + "learning_rate": 1.211033090258477e-05, + "loss": 1.9862, + "step": 13278 + }, + { + "epoch": 0.44, + "grad_norm": 0.743541955947876, + "learning_rate": 1.2109291935228347e-05, + "loss": 2.0848, + "step": 13279 + }, + { + "epoch": 0.44, + "grad_norm": 0.7040044665336609, + "learning_rate": 1.2108252944042385e-05, + "loss": 2.13, + "step": 13280 + }, + { + "epoch": 0.44, + "grad_norm": 0.7124125361442566, + "learning_rate": 1.2107213929038615e-05, + "loss": 2.0474, + "step": 13281 + }, + { + "epoch": 0.44, + "grad_norm": 0.712355375289917, + "learning_rate": 1.2106174890228775e-05, + "loss": 1.9857, + "step": 13282 + }, + { + "epoch": 0.44, + "grad_norm": 0.7673336863517761, + "learning_rate": 1.2105135827624606e-05, + "loss": 2.0899, + "step": 13283 + }, + { + "epoch": 0.44, + "grad_norm": 0.7353999614715576, + "learning_rate": 1.2104096741237847e-05, + "loss": 2.1287, + "step": 13284 + }, + { + "epoch": 0.44, + "grad_norm": 0.733677327632904, + "learning_rate": 1.2103057631080236e-05, + "loss": 2.0848, + "step": 13285 + }, + { + "epoch": 0.44, + "grad_norm": 0.7404350638389587, + "learning_rate": 1.2102018497163513e-05, + "loss": 2.0185, + "step": 13286 + }, + { + "epoch": 0.44, + "grad_norm": 0.7109466195106506, + "learning_rate": 1.2100979339499415e-05, + "loss": 2.0855, + "step": 13287 + }, + { + "epoch": 0.44, + "grad_norm": 0.7178806066513062, + "learning_rate": 1.2099940158099686e-05, + "loss": 2.0765, + "step": 13288 + }, + { + "epoch": 0.44, + "grad_norm": 0.728897750377655, + "learning_rate": 1.2098900952976063e-05, + "loss": 2.0555, + "step": 13289 + }, + { + "epoch": 0.44, + "grad_norm": 0.7291553020477295, + "learning_rate": 1.2097861724140286e-05, + "loss": 2.0982, + "step": 13290 + }, + { + "epoch": 0.44, + "grad_norm": 0.7267939448356628, + "learning_rate": 1.2096822471604097e-05, + "loss": 2.0674, + "step": 13291 + }, + { + "epoch": 0.44, + "grad_norm": 0.7231267690658569, + "learning_rate": 1.2095783195379237e-05, + "loss": 2.1678, + "step": 13292 + }, + { + "epoch": 0.44, + "grad_norm": 0.7178898453712463, + "learning_rate": 1.2094743895477447e-05, + "loss": 2.0928, + "step": 13293 + }, + { + "epoch": 0.44, + "grad_norm": 0.7198132872581482, + "learning_rate": 1.2093704571910468e-05, + "loss": 2.122, + "step": 13294 + }, + { + "epoch": 0.44, + "grad_norm": 0.7055746912956238, + "learning_rate": 1.209266522469004e-05, + "loss": 2.1212, + "step": 13295 + }, + { + "epoch": 0.44, + "grad_norm": 0.7428348064422607, + "learning_rate": 1.2091625853827911e-05, + "loss": 2.0449, + "step": 13296 + }, + { + "epoch": 0.44, + "grad_norm": 0.7168387174606323, + "learning_rate": 1.2090586459335816e-05, + "loss": 2.0194, + "step": 13297 + }, + { + "epoch": 0.44, + "grad_norm": 0.7541288733482361, + "learning_rate": 1.20895470412255e-05, + "loss": 2.1057, + "step": 13298 + }, + { + "epoch": 0.44, + "grad_norm": 0.7282351851463318, + "learning_rate": 1.2088507599508707e-05, + "loss": 2.0268, + "step": 13299 + }, + { + "epoch": 0.44, + "grad_norm": 0.7244184017181396, + "learning_rate": 1.208746813419718e-05, + "loss": 2.1372, + "step": 13300 + }, + { + "epoch": 0.44, + "grad_norm": 0.7184075713157654, + "learning_rate": 1.2086428645302659e-05, + "loss": 2.1176, + "step": 13301 + }, + { + "epoch": 0.44, + "grad_norm": 0.7376933693885803, + "learning_rate": 1.2085389132836893e-05, + "loss": 2.1075, + "step": 13302 + }, + { + "epoch": 0.44, + "grad_norm": 0.7401725053787231, + "learning_rate": 1.208434959681162e-05, + "loss": 2.0628, + "step": 13303 + }, + { + "epoch": 0.44, + "grad_norm": 0.7182230949401855, + "learning_rate": 1.208331003723859e-05, + "loss": 2.0199, + "step": 13304 + }, + { + "epoch": 0.44, + "grad_norm": 0.6960762143135071, + "learning_rate": 1.208227045412954e-05, + "loss": 2.0676, + "step": 13305 + }, + { + "epoch": 0.44, + "grad_norm": 0.7502022981643677, + "learning_rate": 1.2081230847496221e-05, + "loss": 2.0893, + "step": 13306 + }, + { + "epoch": 0.44, + "grad_norm": 0.7258266806602478, + "learning_rate": 1.2080191217350374e-05, + "loss": 2.0767, + "step": 13307 + }, + { + "epoch": 0.44, + "grad_norm": 0.7505900859832764, + "learning_rate": 1.2079151563703749e-05, + "loss": 2.0079, + "step": 13308 + }, + { + "epoch": 0.44, + "grad_norm": 0.7215785384178162, + "learning_rate": 1.2078111886568085e-05, + "loss": 2.0666, + "step": 13309 + }, + { + "epoch": 0.44, + "grad_norm": 0.7636035084724426, + "learning_rate": 1.2077072185955131e-05, + "loss": 2.0765, + "step": 13310 + }, + { + "epoch": 0.44, + "grad_norm": 0.7186139225959778, + "learning_rate": 1.2076032461876636e-05, + "loss": 2.1178, + "step": 13311 + }, + { + "epoch": 0.44, + "grad_norm": 0.713874340057373, + "learning_rate": 1.2074992714344338e-05, + "loss": 2.0386, + "step": 13312 + }, + { + "epoch": 0.44, + "grad_norm": 0.7320047616958618, + "learning_rate": 1.2073952943369992e-05, + "loss": 2.0103, + "step": 13313 + }, + { + "epoch": 0.44, + "grad_norm": 0.7288681268692017, + "learning_rate": 1.2072913148965341e-05, + "loss": 2.0505, + "step": 13314 + }, + { + "epoch": 0.44, + "grad_norm": 0.7776504158973694, + "learning_rate": 1.207187333114213e-05, + "loss": 2.0223, + "step": 13315 + }, + { + "epoch": 0.44, + "grad_norm": 0.7459606528282166, + "learning_rate": 1.207083348991211e-05, + "loss": 2.2105, + "step": 13316 + }, + { + "epoch": 0.44, + "grad_norm": 0.7721802592277527, + "learning_rate": 1.2069793625287027e-05, + "loss": 2.026, + "step": 13317 + }, + { + "epoch": 0.44, + "grad_norm": 0.701431930065155, + "learning_rate": 1.2068753737278626e-05, + "loss": 2.0211, + "step": 13318 + }, + { + "epoch": 0.44, + "grad_norm": 0.7196957468986511, + "learning_rate": 1.2067713825898662e-05, + "loss": 1.9918, + "step": 13319 + }, + { + "epoch": 0.44, + "grad_norm": 0.7388178110122681, + "learning_rate": 1.2066673891158875e-05, + "loss": 2.058, + "step": 13320 + }, + { + "epoch": 0.44, + "grad_norm": 0.7367233633995056, + "learning_rate": 1.2065633933071019e-05, + "loss": 2.099, + "step": 13321 + }, + { + "epoch": 0.44, + "grad_norm": 0.7038823962211609, + "learning_rate": 1.2064593951646843e-05, + "loss": 2.0612, + "step": 13322 + }, + { + "epoch": 0.44, + "grad_norm": 0.730548083782196, + "learning_rate": 1.2063553946898092e-05, + "loss": 2.0726, + "step": 13323 + }, + { + "epoch": 0.44, + "grad_norm": 0.7345361709594727, + "learning_rate": 1.206251391883652e-05, + "loss": 2.088, + "step": 13324 + }, + { + "epoch": 0.44, + "grad_norm": 0.7355608940124512, + "learning_rate": 1.2061473867473874e-05, + "loss": 2.0906, + "step": 13325 + }, + { + "epoch": 0.44, + "grad_norm": 0.7406070232391357, + "learning_rate": 1.2060433792821901e-05, + "loss": 2.0383, + "step": 13326 + }, + { + "epoch": 0.44, + "grad_norm": 0.7416023015975952, + "learning_rate": 1.2059393694892361e-05, + "loss": 2.013, + "step": 13327 + }, + { + "epoch": 0.44, + "grad_norm": 0.7365932464599609, + "learning_rate": 1.2058353573696995e-05, + "loss": 2.0441, + "step": 13328 + }, + { + "epoch": 0.44, + "grad_norm": 0.7340754270553589, + "learning_rate": 1.2057313429247554e-05, + "loss": 2.1135, + "step": 13329 + }, + { + "epoch": 0.44, + "grad_norm": 0.7222080826759338, + "learning_rate": 1.2056273261555793e-05, + "loss": 2.179, + "step": 13330 + }, + { + "epoch": 0.44, + "grad_norm": 0.7258617281913757, + "learning_rate": 1.2055233070633464e-05, + "loss": 2.0164, + "step": 13331 + }, + { + "epoch": 0.44, + "grad_norm": 0.7366630434989929, + "learning_rate": 1.2054192856492315e-05, + "loss": 2.0369, + "step": 13332 + }, + { + "epoch": 0.44, + "grad_norm": 0.7424752712249756, + "learning_rate": 1.20531526191441e-05, + "loss": 2.0194, + "step": 13333 + }, + { + "epoch": 0.44, + "grad_norm": 0.7324604392051697, + "learning_rate": 1.2052112358600565e-05, + "loss": 2.2096, + "step": 13334 + }, + { + "epoch": 0.44, + "grad_norm": 0.7110586166381836, + "learning_rate": 1.2051072074873473e-05, + "loss": 2.1446, + "step": 13335 + }, + { + "epoch": 0.44, + "grad_norm": 0.7497795820236206, + "learning_rate": 1.2050031767974568e-05, + "loss": 2.11, + "step": 13336 + }, + { + "epoch": 0.44, + "grad_norm": 0.7348743677139282, + "learning_rate": 1.2048991437915605e-05, + "loss": 2.0414, + "step": 13337 + }, + { + "epoch": 0.44, + "grad_norm": 0.7378973960876465, + "learning_rate": 1.204795108470834e-05, + "loss": 2.0422, + "step": 13338 + }, + { + "epoch": 0.44, + "grad_norm": 0.7241606712341309, + "learning_rate": 1.2046910708364523e-05, + "loss": 2.0843, + "step": 13339 + }, + { + "epoch": 0.44, + "grad_norm": 0.704196572303772, + "learning_rate": 1.2045870308895908e-05, + "loss": 2.1158, + "step": 13340 + }, + { + "epoch": 0.44, + "grad_norm": 0.7024517059326172, + "learning_rate": 1.2044829886314249e-05, + "loss": 2.0759, + "step": 13341 + }, + { + "epoch": 0.44, + "grad_norm": 0.7401084899902344, + "learning_rate": 1.2043789440631301e-05, + "loss": 2.0879, + "step": 13342 + }, + { + "epoch": 0.44, + "grad_norm": 0.7075070738792419, + "learning_rate": 1.2042748971858816e-05, + "loss": 2.0724, + "step": 13343 + }, + { + "epoch": 0.44, + "grad_norm": 0.7374077439308167, + "learning_rate": 1.2041708480008554e-05, + "loss": 2.1015, + "step": 13344 + }, + { + "epoch": 0.44, + "grad_norm": 0.7080702185630798, + "learning_rate": 1.2040667965092262e-05, + "loss": 2.1689, + "step": 13345 + }, + { + "epoch": 0.44, + "grad_norm": 0.7485519647598267, + "learning_rate": 1.2039627427121701e-05, + "loss": 2.067, + "step": 13346 + }, + { + "epoch": 0.44, + "grad_norm": 0.7322629690170288, + "learning_rate": 1.2038586866108626e-05, + "loss": 2.0579, + "step": 13347 + }, + { + "epoch": 0.44, + "grad_norm": 0.7240492105484009, + "learning_rate": 1.2037546282064787e-05, + "loss": 2.0415, + "step": 13348 + }, + { + "epoch": 0.44, + "grad_norm": 0.7376251816749573, + "learning_rate": 1.2036505675001951e-05, + "loss": 2.1102, + "step": 13349 + }, + { + "epoch": 0.44, + "grad_norm": 0.7244545817375183, + "learning_rate": 1.2035465044931862e-05, + "loss": 2.0624, + "step": 13350 + }, + { + "epoch": 0.44, + "grad_norm": 0.6985520720481873, + "learning_rate": 1.2034424391866285e-05, + "loss": 2.1079, + "step": 13351 + }, + { + "epoch": 0.44, + "grad_norm": 0.7272088527679443, + "learning_rate": 1.203338371581697e-05, + "loss": 2.0419, + "step": 13352 + }, + { + "epoch": 0.44, + "grad_norm": 0.7849928736686707, + "learning_rate": 1.203234301679568e-05, + "loss": 2.0514, + "step": 13353 + }, + { + "epoch": 0.44, + "grad_norm": 0.6937972903251648, + "learning_rate": 1.203130229481417e-05, + "loss": 2.0466, + "step": 13354 + }, + { + "epoch": 0.44, + "grad_norm": 0.7293304800987244, + "learning_rate": 1.2030261549884197e-05, + "loss": 2.1136, + "step": 13355 + }, + { + "epoch": 0.44, + "grad_norm": 0.7527003288269043, + "learning_rate": 1.2029220782017515e-05, + "loss": 2.0399, + "step": 13356 + }, + { + "epoch": 0.44, + "grad_norm": 0.731359601020813, + "learning_rate": 1.2028179991225889e-05, + "loss": 2.1017, + "step": 13357 + }, + { + "epoch": 0.44, + "grad_norm": 0.7181694507598877, + "learning_rate": 1.2027139177521074e-05, + "loss": 2.0404, + "step": 13358 + }, + { + "epoch": 0.44, + "grad_norm": 0.7249829173088074, + "learning_rate": 1.2026098340914826e-05, + "loss": 2.1439, + "step": 13359 + }, + { + "epoch": 0.44, + "grad_norm": 0.7250382304191589, + "learning_rate": 1.202505748141891e-05, + "loss": 2.0003, + "step": 13360 + }, + { + "epoch": 0.44, + "grad_norm": 0.7194114923477173, + "learning_rate": 1.2024016599045083e-05, + "loss": 2.032, + "step": 13361 + }, + { + "epoch": 0.44, + "grad_norm": 0.7364223599433899, + "learning_rate": 1.2022975693805099e-05, + "loss": 2.0746, + "step": 13362 + }, + { + "epoch": 0.44, + "grad_norm": 0.7428044080734253, + "learning_rate": 1.2021934765710724e-05, + "loss": 2.0517, + "step": 13363 + }, + { + "epoch": 0.44, + "grad_norm": 0.7028313279151917, + "learning_rate": 1.202089381477371e-05, + "loss": 2.0246, + "step": 13364 + }, + { + "epoch": 0.44, + "grad_norm": 0.7533397078514099, + "learning_rate": 1.2019852841005825e-05, + "loss": 2.0668, + "step": 13365 + }, + { + "epoch": 0.44, + "grad_norm": 0.743570864200592, + "learning_rate": 1.201881184441883e-05, + "loss": 2.0481, + "step": 13366 + }, + { + "epoch": 0.44, + "grad_norm": 0.7564150094985962, + "learning_rate": 1.2017770825024475e-05, + "loss": 2.103, + "step": 13367 + }, + { + "epoch": 0.44, + "grad_norm": 0.6989673972129822, + "learning_rate": 1.201672978283453e-05, + "loss": 2.0965, + "step": 13368 + }, + { + "epoch": 0.44, + "grad_norm": 0.7528740763664246, + "learning_rate": 1.2015688717860758e-05, + "loss": 2.0254, + "step": 13369 + }, + { + "epoch": 0.44, + "grad_norm": 0.738219141960144, + "learning_rate": 1.2014647630114911e-05, + "loss": 2.0191, + "step": 13370 + }, + { + "epoch": 0.44, + "grad_norm": 0.7300574779510498, + "learning_rate": 1.2013606519608761e-05, + "loss": 2.0921, + "step": 13371 + }, + { + "epoch": 0.44, + "grad_norm": 0.7272834777832031, + "learning_rate": 1.201256538635406e-05, + "loss": 2.1256, + "step": 13372 + }, + { + "epoch": 0.44, + "grad_norm": 0.7309862971305847, + "learning_rate": 1.2011524230362576e-05, + "loss": 2.052, + "step": 13373 + }, + { + "epoch": 0.44, + "grad_norm": 0.7109898924827576, + "learning_rate": 1.2010483051646072e-05, + "loss": 2.0412, + "step": 13374 + }, + { + "epoch": 0.44, + "grad_norm": 0.7418531179428101, + "learning_rate": 1.2009441850216307e-05, + "loss": 2.0788, + "step": 13375 + }, + { + "epoch": 0.45, + "grad_norm": 0.7676542401313782, + "learning_rate": 1.2008400626085047e-05, + "loss": 2.083, + "step": 13376 + }, + { + "epoch": 0.45, + "grad_norm": 0.7033101320266724, + "learning_rate": 1.2007359379264051e-05, + "loss": 2.1281, + "step": 13377 + }, + { + "epoch": 0.45, + "grad_norm": 0.7425255179405212, + "learning_rate": 1.2006318109765087e-05, + "loss": 2.0863, + "step": 13378 + }, + { + "epoch": 0.45, + "grad_norm": 0.7479313611984253, + "learning_rate": 1.2005276817599915e-05, + "loss": 2.1083, + "step": 13379 + }, + { + "epoch": 0.45, + "grad_norm": 0.7301887273788452, + "learning_rate": 1.20042355027803e-05, + "loss": 2.0251, + "step": 13380 + }, + { + "epoch": 0.45, + "grad_norm": 0.7358429431915283, + "learning_rate": 1.2003194165318011e-05, + "loss": 2.1119, + "step": 13381 + }, + { + "epoch": 0.45, + "grad_norm": 0.7144620418548584, + "learning_rate": 1.200215280522481e-05, + "loss": 2.0898, + "step": 13382 + }, + { + "epoch": 0.45, + "grad_norm": 0.7465035915374756, + "learning_rate": 1.2001111422512453e-05, + "loss": 2.1242, + "step": 13383 + }, + { + "epoch": 0.45, + "grad_norm": 0.7254433631896973, + "learning_rate": 1.2000070017192717e-05, + "loss": 2.0923, + "step": 13384 + }, + { + "epoch": 0.45, + "grad_norm": 0.7439807057380676, + "learning_rate": 1.199902858927736e-05, + "loss": 2.1265, + "step": 13385 + }, + { + "epoch": 0.45, + "grad_norm": 0.7531620860099792, + "learning_rate": 1.1997987138778151e-05, + "loss": 2.1412, + "step": 13386 + }, + { + "epoch": 0.45, + "grad_norm": 0.7350923418998718, + "learning_rate": 1.1996945665706851e-05, + "loss": 2.0927, + "step": 13387 + }, + { + "epoch": 0.45, + "grad_norm": 0.7015329003334045, + "learning_rate": 1.1995904170075233e-05, + "loss": 2.0502, + "step": 13388 + }, + { + "epoch": 0.45, + "grad_norm": 0.7923730611801147, + "learning_rate": 1.1994862651895059e-05, + "loss": 2.0946, + "step": 13389 + }, + { + "epoch": 0.45, + "grad_norm": 0.7369385361671448, + "learning_rate": 1.1993821111178092e-05, + "loss": 2.0647, + "step": 13390 + }, + { + "epoch": 0.45, + "grad_norm": 0.750055730342865, + "learning_rate": 1.1992779547936107e-05, + "loss": 2.107, + "step": 13391 + }, + { + "epoch": 0.45, + "grad_norm": 0.7802562117576599, + "learning_rate": 1.1991737962180863e-05, + "loss": 2.1582, + "step": 13392 + }, + { + "epoch": 0.45, + "grad_norm": 0.7317141890525818, + "learning_rate": 1.1990696353924136e-05, + "loss": 2.0508, + "step": 13393 + }, + { + "epoch": 0.45, + "grad_norm": 0.7592988610267639, + "learning_rate": 1.1989654723177681e-05, + "loss": 2.0541, + "step": 13394 + }, + { + "epoch": 0.45, + "grad_norm": 0.7209230661392212, + "learning_rate": 1.198861306995328e-05, + "loss": 2.1029, + "step": 13395 + }, + { + "epoch": 0.45, + "grad_norm": 0.7682515382766724, + "learning_rate": 1.198757139426269e-05, + "loss": 2.1503, + "step": 13396 + }, + { + "epoch": 0.45, + "grad_norm": 0.7309234142303467, + "learning_rate": 1.1986529696117684e-05, + "loss": 2.1562, + "step": 13397 + }, + { + "epoch": 0.45, + "grad_norm": 0.7398276329040527, + "learning_rate": 1.198548797553003e-05, + "loss": 2.1394, + "step": 13398 + }, + { + "epoch": 0.45, + "grad_norm": 0.7231976985931396, + "learning_rate": 1.1984446232511495e-05, + "loss": 2.0708, + "step": 13399 + }, + { + "epoch": 0.45, + "grad_norm": 0.7282794117927551, + "learning_rate": 1.198340446707385e-05, + "loss": 2.0774, + "step": 13400 + }, + { + "epoch": 0.45, + "grad_norm": 0.7439401745796204, + "learning_rate": 1.1982362679228865e-05, + "loss": 2.1004, + "step": 13401 + }, + { + "epoch": 0.45, + "grad_norm": 0.7311477065086365, + "learning_rate": 1.1981320868988309e-05, + "loss": 2.098, + "step": 13402 + }, + { + "epoch": 0.45, + "grad_norm": 0.7431143522262573, + "learning_rate": 1.1980279036363948e-05, + "loss": 2.1662, + "step": 13403 + }, + { + "epoch": 0.45, + "grad_norm": 0.7629624605178833, + "learning_rate": 1.197923718136756e-05, + "loss": 2.0967, + "step": 13404 + }, + { + "epoch": 0.45, + "grad_norm": 0.7716542482376099, + "learning_rate": 1.1978195304010904e-05, + "loss": 2.0398, + "step": 13405 + }, + { + "epoch": 0.45, + "grad_norm": 0.7417488694190979, + "learning_rate": 1.197715340430576e-05, + "loss": 2.1021, + "step": 13406 + }, + { + "epoch": 0.45, + "grad_norm": 0.7302343845367432, + "learning_rate": 1.1976111482263898e-05, + "loss": 2.0789, + "step": 13407 + }, + { + "epoch": 0.45, + "grad_norm": 0.7542890310287476, + "learning_rate": 1.1975069537897082e-05, + "loss": 2.1509, + "step": 13408 + }, + { + "epoch": 0.45, + "grad_norm": 0.7298617959022522, + "learning_rate": 1.1974027571217091e-05, + "loss": 2.0961, + "step": 13409 + }, + { + "epoch": 0.45, + "grad_norm": 0.728975236415863, + "learning_rate": 1.1972985582235692e-05, + "loss": 2.0823, + "step": 13410 + }, + { + "epoch": 0.45, + "grad_norm": 0.734054684638977, + "learning_rate": 1.1971943570964656e-05, + "loss": 2.155, + "step": 13411 + }, + { + "epoch": 0.45, + "grad_norm": 0.7165126800537109, + "learning_rate": 1.197090153741576e-05, + "loss": 2.0734, + "step": 13412 + }, + { + "epoch": 0.45, + "grad_norm": 0.7485402822494507, + "learning_rate": 1.1969859481600774e-05, + "loss": 2.1955, + "step": 13413 + }, + { + "epoch": 0.45, + "grad_norm": 0.75748211145401, + "learning_rate": 1.1968817403531468e-05, + "loss": 2.0924, + "step": 13414 + }, + { + "epoch": 0.45, + "grad_norm": 0.707868218421936, + "learning_rate": 1.196777530321962e-05, + "loss": 2.1114, + "step": 13415 + }, + { + "epoch": 0.45, + "grad_norm": 0.7354319095611572, + "learning_rate": 1.1966733180676995e-05, + "loss": 2.102, + "step": 13416 + }, + { + "epoch": 0.45, + "grad_norm": 0.735072910785675, + "learning_rate": 1.1965691035915376e-05, + "loss": 2.0612, + "step": 13417 + }, + { + "epoch": 0.45, + "grad_norm": 0.705896258354187, + "learning_rate": 1.1964648868946528e-05, + "loss": 1.9996, + "step": 13418 + }, + { + "epoch": 0.45, + "grad_norm": 0.7545985579490662, + "learning_rate": 1.196360667978223e-05, + "loss": 2.112, + "step": 13419 + }, + { + "epoch": 0.45, + "grad_norm": 0.7155085802078247, + "learning_rate": 1.1962564468434254e-05, + "loss": 2.1007, + "step": 13420 + }, + { + "epoch": 0.45, + "grad_norm": 0.7386749982833862, + "learning_rate": 1.1961522234914375e-05, + "loss": 2.1554, + "step": 13421 + }, + { + "epoch": 0.45, + "grad_norm": 0.7164409160614014, + "learning_rate": 1.196047997923437e-05, + "loss": 2.0849, + "step": 13422 + }, + { + "epoch": 0.45, + "grad_norm": 0.6822673678398132, + "learning_rate": 1.1959437701406007e-05, + "loss": 2.1013, + "step": 13423 + }, + { + "epoch": 0.45, + "grad_norm": 0.74634850025177, + "learning_rate": 1.1958395401441067e-05, + "loss": 2.124, + "step": 13424 + }, + { + "epoch": 0.45, + "grad_norm": 0.7154432535171509, + "learning_rate": 1.1957353079351324e-05, + "loss": 2.0637, + "step": 13425 + }, + { + "epoch": 0.45, + "grad_norm": 0.7406798601150513, + "learning_rate": 1.1956310735148555e-05, + "loss": 2.1231, + "step": 13426 + }, + { + "epoch": 0.45, + "grad_norm": 0.7359046339988708, + "learning_rate": 1.1955268368844528e-05, + "loss": 2.1022, + "step": 13427 + }, + { + "epoch": 0.45, + "grad_norm": 0.7135004997253418, + "learning_rate": 1.1954225980451031e-05, + "loss": 2.0329, + "step": 13428 + }, + { + "epoch": 0.45, + "grad_norm": 0.7093487977981567, + "learning_rate": 1.1953183569979832e-05, + "loss": 2.075, + "step": 13429 + }, + { + "epoch": 0.45, + "grad_norm": 0.7442511320114136, + "learning_rate": 1.1952141137442706e-05, + "loss": 2.1143, + "step": 13430 + }, + { + "epoch": 0.45, + "grad_norm": 0.7548267245292664, + "learning_rate": 1.1951098682851439e-05, + "loss": 2.1139, + "step": 13431 + }, + { + "epoch": 0.45, + "grad_norm": 0.7108021974563599, + "learning_rate": 1.19500562062178e-05, + "loss": 2.1115, + "step": 13432 + }, + { + "epoch": 0.45, + "grad_norm": 0.7433215379714966, + "learning_rate": 1.1949013707553568e-05, + "loss": 2.0845, + "step": 13433 + }, + { + "epoch": 0.45, + "grad_norm": 0.7358103394508362, + "learning_rate": 1.1947971186870522e-05, + "loss": 2.0432, + "step": 13434 + }, + { + "epoch": 0.45, + "grad_norm": 0.7236347198486328, + "learning_rate": 1.194692864418044e-05, + "loss": 2.1276, + "step": 13435 + }, + { + "epoch": 0.45, + "grad_norm": 0.7393098473548889, + "learning_rate": 1.19458860794951e-05, + "loss": 2.0207, + "step": 13436 + }, + { + "epoch": 0.45, + "grad_norm": 0.7302792072296143, + "learning_rate": 1.1944843492826278e-05, + "loss": 2.1426, + "step": 13437 + }, + { + "epoch": 0.45, + "grad_norm": 0.7330463528633118, + "learning_rate": 1.1943800884185753e-05, + "loss": 2.0552, + "step": 13438 + }, + { + "epoch": 0.45, + "grad_norm": 0.7458175420761108, + "learning_rate": 1.1942758253585307e-05, + "loss": 2.0665, + "step": 13439 + }, + { + "epoch": 0.45, + "grad_norm": 0.7964898943901062, + "learning_rate": 1.1941715601036716e-05, + "loss": 2.1928, + "step": 13440 + }, + { + "epoch": 0.45, + "grad_norm": 0.7129392623901367, + "learning_rate": 1.1940672926551757e-05, + "loss": 2.1013, + "step": 13441 + }, + { + "epoch": 0.45, + "grad_norm": 0.7619794607162476, + "learning_rate": 1.1939630230142218e-05, + "loss": 2.0543, + "step": 13442 + }, + { + "epoch": 0.45, + "grad_norm": 0.718312680721283, + "learning_rate": 1.193858751181987e-05, + "loss": 2.0593, + "step": 13443 + }, + { + "epoch": 0.45, + "grad_norm": 0.6910481452941895, + "learning_rate": 1.1937544771596497e-05, + "loss": 2.0546, + "step": 13444 + }, + { + "epoch": 0.45, + "grad_norm": 0.7477357983589172, + "learning_rate": 1.193650200948388e-05, + "loss": 2.1146, + "step": 13445 + }, + { + "epoch": 0.45, + "grad_norm": 0.7275376915931702, + "learning_rate": 1.1935459225493795e-05, + "loss": 2.1487, + "step": 13446 + }, + { + "epoch": 0.45, + "grad_norm": 0.7672938704490662, + "learning_rate": 1.193441641963803e-05, + "loss": 2.1407, + "step": 13447 + }, + { + "epoch": 0.45, + "grad_norm": 0.730105459690094, + "learning_rate": 1.1933373591928361e-05, + "loss": 2.059, + "step": 13448 + }, + { + "epoch": 0.45, + "grad_norm": 0.7491620182991028, + "learning_rate": 1.1932330742376568e-05, + "loss": 2.0867, + "step": 13449 + }, + { + "epoch": 0.45, + "grad_norm": 0.7420753836631775, + "learning_rate": 1.1931287870994437e-05, + "loss": 2.1474, + "step": 13450 + }, + { + "epoch": 0.45, + "grad_norm": 0.7357080578804016, + "learning_rate": 1.1930244977793745e-05, + "loss": 2.0952, + "step": 13451 + }, + { + "epoch": 0.45, + "grad_norm": 0.7083612084388733, + "learning_rate": 1.1929202062786278e-05, + "loss": 2.0772, + "step": 13452 + }, + { + "epoch": 0.45, + "grad_norm": 0.7223063707351685, + "learning_rate": 1.1928159125983818e-05, + "loss": 2.1145, + "step": 13453 + }, + { + "epoch": 0.45, + "grad_norm": 0.7823801040649414, + "learning_rate": 1.1927116167398146e-05, + "loss": 2.0906, + "step": 13454 + }, + { + "epoch": 0.45, + "grad_norm": 0.7294846177101135, + "learning_rate": 1.1926073187041043e-05, + "loss": 1.9763, + "step": 13455 + }, + { + "epoch": 0.45, + "grad_norm": 0.7259325981140137, + "learning_rate": 1.1925030184924293e-05, + "loss": 2.1163, + "step": 13456 + }, + { + "epoch": 0.45, + "grad_norm": 0.7335636615753174, + "learning_rate": 1.1923987161059682e-05, + "loss": 2.1241, + "step": 13457 + }, + { + "epoch": 0.45, + "grad_norm": 0.7183225750923157, + "learning_rate": 1.1922944115458993e-05, + "loss": 2.0826, + "step": 13458 + }, + { + "epoch": 0.45, + "grad_norm": 0.7281363606452942, + "learning_rate": 1.1921901048134009e-05, + "loss": 2.0605, + "step": 13459 + }, + { + "epoch": 0.45, + "grad_norm": 0.7072030305862427, + "learning_rate": 1.1920857959096508e-05, + "loss": 2.0597, + "step": 13460 + }, + { + "epoch": 0.45, + "grad_norm": 0.7302013635635376, + "learning_rate": 1.1919814848358282e-05, + "loss": 2.1012, + "step": 13461 + }, + { + "epoch": 0.45, + "grad_norm": 0.7271750569343567, + "learning_rate": 1.1918771715931116e-05, + "loss": 2.0882, + "step": 13462 + }, + { + "epoch": 0.45, + "grad_norm": 0.715904712677002, + "learning_rate": 1.1917728561826787e-05, + "loss": 2.1002, + "step": 13463 + }, + { + "epoch": 0.45, + "grad_norm": 0.7084422707557678, + "learning_rate": 1.191668538605709e-05, + "loss": 2.0494, + "step": 13464 + }, + { + "epoch": 0.45, + "grad_norm": 0.7305770516395569, + "learning_rate": 1.19156421886338e-05, + "loss": 2.0841, + "step": 13465 + }, + { + "epoch": 0.45, + "grad_norm": 0.7397257089614868, + "learning_rate": 1.191459896956871e-05, + "loss": 2.1024, + "step": 13466 + }, + { + "epoch": 0.45, + "grad_norm": 0.7399925589561462, + "learning_rate": 1.19135557288736e-05, + "loss": 2.088, + "step": 13467 + }, + { + "epoch": 0.45, + "grad_norm": 0.761913001537323, + "learning_rate": 1.1912512466560261e-05, + "loss": 2.0867, + "step": 13468 + }, + { + "epoch": 0.45, + "grad_norm": 0.7388766407966614, + "learning_rate": 1.1911469182640478e-05, + "loss": 2.0689, + "step": 13469 + }, + { + "epoch": 0.45, + "grad_norm": 0.7390720248222351, + "learning_rate": 1.1910425877126038e-05, + "loss": 2.0879, + "step": 13470 + }, + { + "epoch": 0.45, + "grad_norm": 0.7065528631210327, + "learning_rate": 1.1909382550028719e-05, + "loss": 2.0785, + "step": 13471 + }, + { + "epoch": 0.45, + "grad_norm": 0.7561423182487488, + "learning_rate": 1.1908339201360319e-05, + "loss": 2.1433, + "step": 13472 + }, + { + "epoch": 0.45, + "grad_norm": 0.7405491471290588, + "learning_rate": 1.1907295831132624e-05, + "loss": 2.1288, + "step": 13473 + }, + { + "epoch": 0.45, + "grad_norm": 0.7585501670837402, + "learning_rate": 1.1906252439357413e-05, + "loss": 2.0703, + "step": 13474 + }, + { + "epoch": 0.45, + "grad_norm": 0.734179675579071, + "learning_rate": 1.1905209026046485e-05, + "loss": 2.0892, + "step": 13475 + }, + { + "epoch": 0.45, + "grad_norm": 0.7396822571754456, + "learning_rate": 1.1904165591211616e-05, + "loss": 2.0744, + "step": 13476 + }, + { + "epoch": 0.45, + "grad_norm": 0.7383284568786621, + "learning_rate": 1.1903122134864604e-05, + "loss": 2.0331, + "step": 13477 + }, + { + "epoch": 0.45, + "grad_norm": 0.7291897535324097, + "learning_rate": 1.1902078657017234e-05, + "loss": 2.0848, + "step": 13478 + }, + { + "epoch": 0.45, + "grad_norm": 0.739812970161438, + "learning_rate": 1.1901035157681291e-05, + "loss": 2.0657, + "step": 13479 + }, + { + "epoch": 0.45, + "grad_norm": 0.7075086832046509, + "learning_rate": 1.1899991636868569e-05, + "loss": 2.0577, + "step": 13480 + }, + { + "epoch": 0.45, + "grad_norm": 0.6971482634544373, + "learning_rate": 1.1898948094590854e-05, + "loss": 2.0405, + "step": 13481 + }, + { + "epoch": 0.45, + "grad_norm": 0.7240017056465149, + "learning_rate": 1.1897904530859937e-05, + "loss": 2.116, + "step": 13482 + }, + { + "epoch": 0.45, + "grad_norm": 0.7471370100975037, + "learning_rate": 1.1896860945687605e-05, + "loss": 2.1012, + "step": 13483 + }, + { + "epoch": 0.45, + "grad_norm": 0.7188327312469482, + "learning_rate": 1.1895817339085651e-05, + "loss": 2.0337, + "step": 13484 + }, + { + "epoch": 0.45, + "grad_norm": 0.7193142175674438, + "learning_rate": 1.1894773711065863e-05, + "loss": 2.0526, + "step": 13485 + }, + { + "epoch": 0.45, + "grad_norm": 0.7240602970123291, + "learning_rate": 1.1893730061640036e-05, + "loss": 2.0533, + "step": 13486 + }, + { + "epoch": 0.45, + "grad_norm": 0.7409474849700928, + "learning_rate": 1.1892686390819952e-05, + "loss": 2.1813, + "step": 13487 + }, + { + "epoch": 0.45, + "grad_norm": 0.7168980240821838, + "learning_rate": 1.189164269861741e-05, + "loss": 2.1133, + "step": 13488 + }, + { + "epoch": 0.45, + "grad_norm": 0.7582558393478394, + "learning_rate": 1.1890598985044195e-05, + "loss": 2.1501, + "step": 13489 + }, + { + "epoch": 0.45, + "grad_norm": 0.7557507157325745, + "learning_rate": 1.1889555250112101e-05, + "loss": 2.0906, + "step": 13490 + }, + { + "epoch": 0.45, + "grad_norm": 0.7242085933685303, + "learning_rate": 1.1888511493832919e-05, + "loss": 2.1029, + "step": 13491 + }, + { + "epoch": 0.45, + "grad_norm": 0.7394744157791138, + "learning_rate": 1.1887467716218442e-05, + "loss": 2.0242, + "step": 13492 + }, + { + "epoch": 0.45, + "grad_norm": 0.7168545722961426, + "learning_rate": 1.1886423917280459e-05, + "loss": 1.9949, + "step": 13493 + }, + { + "epoch": 0.45, + "grad_norm": 0.7620472311973572, + "learning_rate": 1.1885380097030765e-05, + "loss": 2.1004, + "step": 13494 + }, + { + "epoch": 0.45, + "grad_norm": 0.7373746037483215, + "learning_rate": 1.1884336255481152e-05, + "loss": 2.0368, + "step": 13495 + }, + { + "epoch": 0.45, + "grad_norm": 0.7282609939575195, + "learning_rate": 1.188329239264341e-05, + "loss": 2.1139, + "step": 13496 + }, + { + "epoch": 0.45, + "grad_norm": 0.773597002029419, + "learning_rate": 1.188224850852934e-05, + "loss": 2.0988, + "step": 13497 + }, + { + "epoch": 0.45, + "grad_norm": 0.7168407440185547, + "learning_rate": 1.1881204603150725e-05, + "loss": 2.0062, + "step": 13498 + }, + { + "epoch": 0.45, + "grad_norm": 0.7512868046760559, + "learning_rate": 1.1880160676519363e-05, + "loss": 2.057, + "step": 13499 + }, + { + "epoch": 0.45, + "grad_norm": 0.7379336357116699, + "learning_rate": 1.1879116728647048e-05, + "loss": 2.0415, + "step": 13500 + }, + { + "epoch": 0.45, + "grad_norm": 0.7391049861907959, + "learning_rate": 1.1878072759545576e-05, + "loss": 2.1142, + "step": 13501 + }, + { + "epoch": 0.45, + "grad_norm": 0.7618208527565002, + "learning_rate": 1.1877028769226735e-05, + "loss": 2.081, + "step": 13502 + }, + { + "epoch": 0.45, + "grad_norm": 0.7453198432922363, + "learning_rate": 1.1875984757702326e-05, + "loss": 2.105, + "step": 13503 + }, + { + "epoch": 0.45, + "grad_norm": 0.7550119161605835, + "learning_rate": 1.1874940724984139e-05, + "loss": 2.0907, + "step": 13504 + }, + { + "epoch": 0.45, + "grad_norm": 0.732653021812439, + "learning_rate": 1.187389667108397e-05, + "loss": 2.0965, + "step": 13505 + }, + { + "epoch": 0.45, + "grad_norm": 0.7220315337181091, + "learning_rate": 1.1872852596013615e-05, + "loss": 2.0584, + "step": 13506 + }, + { + "epoch": 0.45, + "grad_norm": 0.7149839401245117, + "learning_rate": 1.187180849978487e-05, + "loss": 2.0255, + "step": 13507 + }, + { + "epoch": 0.45, + "grad_norm": 0.7610925436019897, + "learning_rate": 1.1870764382409529e-05, + "loss": 2.0962, + "step": 13508 + }, + { + "epoch": 0.45, + "grad_norm": 0.7505216598510742, + "learning_rate": 1.1869720243899385e-05, + "loss": 2.0925, + "step": 13509 + }, + { + "epoch": 0.45, + "grad_norm": 0.7339593172073364, + "learning_rate": 1.1868676084266244e-05, + "loss": 2.0386, + "step": 13510 + }, + { + "epoch": 0.45, + "grad_norm": 0.7054505348205566, + "learning_rate": 1.1867631903521892e-05, + "loss": 2.0927, + "step": 13511 + }, + { + "epoch": 0.45, + "grad_norm": 0.730927050113678, + "learning_rate": 1.186658770167813e-05, + "loss": 2.1538, + "step": 13512 + }, + { + "epoch": 0.45, + "grad_norm": 0.7303416728973389, + "learning_rate": 1.1865543478746753e-05, + "loss": 2.0691, + "step": 13513 + }, + { + "epoch": 0.45, + "grad_norm": 0.7300906181335449, + "learning_rate": 1.1864499234739559e-05, + "loss": 2.1353, + "step": 13514 + }, + { + "epoch": 0.45, + "grad_norm": 0.7153344750404358, + "learning_rate": 1.1863454969668346e-05, + "loss": 2.0678, + "step": 13515 + }, + { + "epoch": 0.45, + "grad_norm": 0.7668852806091309, + "learning_rate": 1.1862410683544912e-05, + "loss": 2.0774, + "step": 13516 + }, + { + "epoch": 0.45, + "grad_norm": 0.6971147656440735, + "learning_rate": 1.1861366376381052e-05, + "loss": 2.1003, + "step": 13517 + }, + { + "epoch": 0.45, + "grad_norm": 0.7274404168128967, + "learning_rate": 1.1860322048188566e-05, + "loss": 2.0974, + "step": 13518 + }, + { + "epoch": 0.45, + "grad_norm": 0.7207568883895874, + "learning_rate": 1.1859277698979253e-05, + "loss": 2.1015, + "step": 13519 + }, + { + "epoch": 0.45, + "grad_norm": 0.7528194189071655, + "learning_rate": 1.1858233328764908e-05, + "loss": 2.0512, + "step": 13520 + }, + { + "epoch": 0.45, + "grad_norm": 0.7192557454109192, + "learning_rate": 1.1857188937557333e-05, + "loss": 2.0556, + "step": 13521 + }, + { + "epoch": 0.45, + "grad_norm": 0.7366397976875305, + "learning_rate": 1.1856144525368327e-05, + "loss": 2.056, + "step": 13522 + }, + { + "epoch": 0.45, + "grad_norm": 0.7384868264198303, + "learning_rate": 1.1855100092209683e-05, + "loss": 2.0505, + "step": 13523 + }, + { + "epoch": 0.45, + "grad_norm": 0.7295539379119873, + "learning_rate": 1.1854055638093212e-05, + "loss": 2.0907, + "step": 13524 + }, + { + "epoch": 0.45, + "grad_norm": 0.7075151801109314, + "learning_rate": 1.1853011163030703e-05, + "loss": 2.081, + "step": 13525 + }, + { + "epoch": 0.45, + "grad_norm": 0.7750862240791321, + "learning_rate": 1.185196666703396e-05, + "loss": 2.0959, + "step": 13526 + }, + { + "epoch": 0.45, + "grad_norm": 0.698559045791626, + "learning_rate": 1.1850922150114786e-05, + "loss": 2.0639, + "step": 13527 + }, + { + "epoch": 0.45, + "grad_norm": 0.7167655229568481, + "learning_rate": 1.1849877612284974e-05, + "loss": 2.0244, + "step": 13528 + }, + { + "epoch": 0.45, + "grad_norm": 0.7359040975570679, + "learning_rate": 1.1848833053556332e-05, + "loss": 2.0873, + "step": 13529 + }, + { + "epoch": 0.45, + "grad_norm": 0.7603029608726501, + "learning_rate": 1.1847788473940658e-05, + "loss": 2.1706, + "step": 13530 + }, + { + "epoch": 0.45, + "grad_norm": 0.7462285161018372, + "learning_rate": 1.184674387344975e-05, + "loss": 2.0498, + "step": 13531 + }, + { + "epoch": 0.45, + "grad_norm": 0.7745400071144104, + "learning_rate": 1.1845699252095414e-05, + "loss": 2.1679, + "step": 13532 + }, + { + "epoch": 0.45, + "grad_norm": 0.791156530380249, + "learning_rate": 1.184465460988945e-05, + "loss": 2.1602, + "step": 13533 + }, + { + "epoch": 0.45, + "grad_norm": 0.7173791527748108, + "learning_rate": 1.1843609946843655e-05, + "loss": 2.0552, + "step": 13534 + }, + { + "epoch": 0.45, + "grad_norm": 0.7484079599380493, + "learning_rate": 1.1842565262969842e-05, + "loss": 2.05, + "step": 13535 + }, + { + "epoch": 0.45, + "grad_norm": 0.7357771396636963, + "learning_rate": 1.1841520558279802e-05, + "loss": 2.1432, + "step": 13536 + }, + { + "epoch": 0.45, + "grad_norm": 0.7360095977783203, + "learning_rate": 1.1840475832785343e-05, + "loss": 2.0778, + "step": 13537 + }, + { + "epoch": 0.45, + "grad_norm": 0.767088770866394, + "learning_rate": 1.1839431086498268e-05, + "loss": 2.0112, + "step": 13538 + }, + { + "epoch": 0.45, + "grad_norm": 0.7236528396606445, + "learning_rate": 1.1838386319430377e-05, + "loss": 2.0528, + "step": 13539 + }, + { + "epoch": 0.45, + "grad_norm": 0.727060079574585, + "learning_rate": 1.1837341531593473e-05, + "loss": 2.092, + "step": 13540 + }, + { + "epoch": 0.45, + "grad_norm": 0.7364929914474487, + "learning_rate": 1.1836296722999364e-05, + "loss": 2.1365, + "step": 13541 + }, + { + "epoch": 0.45, + "grad_norm": 0.7439523339271545, + "learning_rate": 1.1835251893659849e-05, + "loss": 2.0503, + "step": 13542 + }, + { + "epoch": 0.45, + "grad_norm": 0.7160428762435913, + "learning_rate": 1.1834207043586738e-05, + "loss": 2.1407, + "step": 13543 + }, + { + "epoch": 0.45, + "grad_norm": 0.7623382210731506, + "learning_rate": 1.1833162172791828e-05, + "loss": 2.1538, + "step": 13544 + }, + { + "epoch": 0.45, + "grad_norm": 0.7711104154586792, + "learning_rate": 1.183211728128692e-05, + "loss": 2.077, + "step": 13545 + }, + { + "epoch": 0.45, + "grad_norm": 0.7471561431884766, + "learning_rate": 1.1831072369083834e-05, + "loss": 2.0149, + "step": 13546 + }, + { + "epoch": 0.45, + "grad_norm": 0.7303157448768616, + "learning_rate": 1.1830027436194362e-05, + "loss": 2.0109, + "step": 13547 + }, + { + "epoch": 0.45, + "grad_norm": 0.7227977514266968, + "learning_rate": 1.1828982482630314e-05, + "loss": 2.1047, + "step": 13548 + }, + { + "epoch": 0.45, + "grad_norm": 0.744614839553833, + "learning_rate": 1.182793750840349e-05, + "loss": 2.1748, + "step": 13549 + }, + { + "epoch": 0.45, + "grad_norm": 0.7837855815887451, + "learning_rate": 1.1826892513525701e-05, + "loss": 2.1153, + "step": 13550 + }, + { + "epoch": 0.45, + "grad_norm": 0.7416403889656067, + "learning_rate": 1.1825847498008752e-05, + "loss": 2.0078, + "step": 13551 + }, + { + "epoch": 0.45, + "grad_norm": 0.7315610647201538, + "learning_rate": 1.1824802461864448e-05, + "loss": 2.0833, + "step": 13552 + }, + { + "epoch": 0.45, + "grad_norm": 0.7420401573181152, + "learning_rate": 1.1823757405104594e-05, + "loss": 2.1042, + "step": 13553 + }, + { + "epoch": 0.45, + "grad_norm": 0.7715992331504822, + "learning_rate": 1.1822712327740999e-05, + "loss": 2.122, + "step": 13554 + }, + { + "epoch": 0.45, + "grad_norm": 0.7241045236587524, + "learning_rate": 1.182166722978547e-05, + "loss": 2.0218, + "step": 13555 + }, + { + "epoch": 0.45, + "grad_norm": 0.7550198435783386, + "learning_rate": 1.1820622111249807e-05, + "loss": 2.1166, + "step": 13556 + }, + { + "epoch": 0.45, + "grad_norm": 0.7314906716346741, + "learning_rate": 1.1819576972145828e-05, + "loss": 2.0638, + "step": 13557 + }, + { + "epoch": 0.45, + "grad_norm": 0.7201056480407715, + "learning_rate": 1.181853181248533e-05, + "loss": 2.0344, + "step": 13558 + }, + { + "epoch": 0.45, + "grad_norm": 0.7708947062492371, + "learning_rate": 1.1817486632280129e-05, + "loss": 2.0434, + "step": 13559 + }, + { + "epoch": 0.45, + "grad_norm": 0.8097667694091797, + "learning_rate": 1.1816441431542026e-05, + "loss": 2.0571, + "step": 13560 + }, + { + "epoch": 0.45, + "grad_norm": 0.7036840915679932, + "learning_rate": 1.1815396210282835e-05, + "loss": 2.0917, + "step": 13561 + }, + { + "epoch": 0.45, + "grad_norm": 0.7495243549346924, + "learning_rate": 1.1814350968514358e-05, + "loss": 2.1614, + "step": 13562 + }, + { + "epoch": 0.45, + "grad_norm": 0.7221084833145142, + "learning_rate": 1.1813305706248412e-05, + "loss": 2.0914, + "step": 13563 + }, + { + "epoch": 0.45, + "grad_norm": 0.7237715125083923, + "learning_rate": 1.1812260423496795e-05, + "loss": 2.0879, + "step": 13564 + }, + { + "epoch": 0.45, + "grad_norm": 0.7475821375846863, + "learning_rate": 1.1811215120271327e-05, + "loss": 2.1245, + "step": 13565 + }, + { + "epoch": 0.45, + "grad_norm": 0.7633784413337708, + "learning_rate": 1.181016979658381e-05, + "loss": 2.1713, + "step": 13566 + }, + { + "epoch": 0.45, + "grad_norm": 0.7213771939277649, + "learning_rate": 1.1809124452446055e-05, + "loss": 2.1263, + "step": 13567 + }, + { + "epoch": 0.45, + "grad_norm": 0.7430083155632019, + "learning_rate": 1.1808079087869875e-05, + "loss": 2.0524, + "step": 13568 + }, + { + "epoch": 0.45, + "grad_norm": 0.7704249024391174, + "learning_rate": 1.1807033702867071e-05, + "loss": 2.0629, + "step": 13569 + }, + { + "epoch": 0.45, + "grad_norm": 0.7276620268821716, + "learning_rate": 1.1805988297449467e-05, + "loss": 2.0724, + "step": 13570 + }, + { + "epoch": 0.45, + "grad_norm": 0.736805260181427, + "learning_rate": 1.1804942871628859e-05, + "loss": 2.0693, + "step": 13571 + }, + { + "epoch": 0.45, + "grad_norm": 0.7313312292098999, + "learning_rate": 1.1803897425417067e-05, + "loss": 2.067, + "step": 13572 + }, + { + "epoch": 0.45, + "grad_norm": 0.7328804731369019, + "learning_rate": 1.18028519588259e-05, + "loss": 2.0928, + "step": 13573 + }, + { + "epoch": 0.45, + "grad_norm": 0.7619591355323792, + "learning_rate": 1.180180647186717e-05, + "loss": 2.0579, + "step": 13574 + }, + { + "epoch": 0.45, + "grad_norm": 0.7512155771255493, + "learning_rate": 1.180076096455268e-05, + "loss": 2.1167, + "step": 13575 + }, + { + "epoch": 0.45, + "grad_norm": 0.7085748314857483, + "learning_rate": 1.1799715436894252e-05, + "loss": 2.0645, + "step": 13576 + }, + { + "epoch": 0.45, + "grad_norm": 0.7317373156547546, + "learning_rate": 1.1798669888903693e-05, + "loss": 2.1281, + "step": 13577 + }, + { + "epoch": 0.45, + "grad_norm": 0.7313659191131592, + "learning_rate": 1.1797624320592817e-05, + "loss": 2.0699, + "step": 13578 + }, + { + "epoch": 0.45, + "grad_norm": 0.7449996471405029, + "learning_rate": 1.1796578731973436e-05, + "loss": 2.0624, + "step": 13579 + }, + { + "epoch": 0.45, + "grad_norm": 0.7183303236961365, + "learning_rate": 1.1795533123057356e-05, + "loss": 2.0716, + "step": 13580 + }, + { + "epoch": 0.45, + "grad_norm": 0.7231505513191223, + "learning_rate": 1.1794487493856402e-05, + "loss": 2.1245, + "step": 13581 + }, + { + "epoch": 0.45, + "grad_norm": 0.704030454158783, + "learning_rate": 1.1793441844382376e-05, + "loss": 2.0786, + "step": 13582 + }, + { + "epoch": 0.45, + "grad_norm": 0.7091351747512817, + "learning_rate": 1.1792396174647096e-05, + "loss": 2.0382, + "step": 13583 + }, + { + "epoch": 0.45, + "grad_norm": 0.7370789647102356, + "learning_rate": 1.1791350484662375e-05, + "loss": 2.1172, + "step": 13584 + }, + { + "epoch": 0.45, + "grad_norm": 0.7186959385871887, + "learning_rate": 1.1790304774440022e-05, + "loss": 2.093, + "step": 13585 + }, + { + "epoch": 0.45, + "grad_norm": 0.7489336133003235, + "learning_rate": 1.178925904399186e-05, + "loss": 2.068, + "step": 13586 + }, + { + "epoch": 0.45, + "grad_norm": 0.7123768329620361, + "learning_rate": 1.1788213293329696e-05, + "loss": 2.057, + "step": 13587 + }, + { + "epoch": 0.45, + "grad_norm": 0.7339223623275757, + "learning_rate": 1.1787167522465344e-05, + "loss": 2.0773, + "step": 13588 + }, + { + "epoch": 0.45, + "grad_norm": 0.7449429631233215, + "learning_rate": 1.1786121731410622e-05, + "loss": 2.0256, + "step": 13589 + }, + { + "epoch": 0.45, + "grad_norm": 0.7440928220748901, + "learning_rate": 1.1785075920177347e-05, + "loss": 2.1163, + "step": 13590 + }, + { + "epoch": 0.45, + "grad_norm": 0.7177662253379822, + "learning_rate": 1.1784030088777325e-05, + "loss": 1.9806, + "step": 13591 + }, + { + "epoch": 0.45, + "grad_norm": 0.7182180881500244, + "learning_rate": 1.1782984237222382e-05, + "loss": 2.0412, + "step": 13592 + }, + { + "epoch": 0.45, + "grad_norm": 0.6961660385131836, + "learning_rate": 1.1781938365524326e-05, + "loss": 2.0749, + "step": 13593 + }, + { + "epoch": 0.45, + "grad_norm": 0.7375578284263611, + "learning_rate": 1.1780892473694974e-05, + "loss": 2.0391, + "step": 13594 + }, + { + "epoch": 0.45, + "grad_norm": 0.7078100442886353, + "learning_rate": 1.1779846561746143e-05, + "loss": 2.0301, + "step": 13595 + }, + { + "epoch": 0.45, + "grad_norm": 0.7513500452041626, + "learning_rate": 1.1778800629689646e-05, + "loss": 2.0775, + "step": 13596 + }, + { + "epoch": 0.45, + "grad_norm": 0.8137655258178711, + "learning_rate": 1.1777754677537306e-05, + "loss": 2.1367, + "step": 13597 + }, + { + "epoch": 0.45, + "grad_norm": 0.7182844877243042, + "learning_rate": 1.1776708705300932e-05, + "loss": 2.0454, + "step": 13598 + }, + { + "epoch": 0.45, + "grad_norm": 0.7568352222442627, + "learning_rate": 1.1775662712992346e-05, + "loss": 2.1431, + "step": 13599 + }, + { + "epoch": 0.45, + "grad_norm": 0.7061640024185181, + "learning_rate": 1.1774616700623363e-05, + "loss": 2.0685, + "step": 13600 + }, + { + "epoch": 0.45, + "grad_norm": 0.739234209060669, + "learning_rate": 1.1773570668205803e-05, + "loss": 2.0834, + "step": 13601 + }, + { + "epoch": 0.45, + "grad_norm": 0.7545337080955505, + "learning_rate": 1.1772524615751477e-05, + "loss": 2.0877, + "step": 13602 + }, + { + "epoch": 0.45, + "grad_norm": 0.7233408093452454, + "learning_rate": 1.177147854327221e-05, + "loss": 2.0459, + "step": 13603 + }, + { + "epoch": 0.45, + "grad_norm": 0.7200063467025757, + "learning_rate": 1.1770432450779814e-05, + "loss": 2.0533, + "step": 13604 + }, + { + "epoch": 0.45, + "grad_norm": 0.7366318702697754, + "learning_rate": 1.176938633828611e-05, + "loss": 2.0898, + "step": 13605 + }, + { + "epoch": 0.45, + "grad_norm": 0.740839421749115, + "learning_rate": 1.1768340205802917e-05, + "loss": 2.1812, + "step": 13606 + }, + { + "epoch": 0.45, + "grad_norm": 0.7220223546028137, + "learning_rate": 1.1767294053342053e-05, + "loss": 2.0054, + "step": 13607 + }, + { + "epoch": 0.45, + "grad_norm": 0.72603839635849, + "learning_rate": 1.1766247880915335e-05, + "loss": 2.0762, + "step": 13608 + }, + { + "epoch": 0.45, + "grad_norm": 0.7728837728500366, + "learning_rate": 1.1765201688534587e-05, + "loss": 2.0701, + "step": 13609 + }, + { + "epoch": 0.45, + "grad_norm": 0.7520168423652649, + "learning_rate": 1.176415547621162e-05, + "loss": 2.0575, + "step": 13610 + }, + { + "epoch": 0.45, + "grad_norm": 0.7832136154174805, + "learning_rate": 1.1763109243958261e-05, + "loss": 2.1359, + "step": 13611 + }, + { + "epoch": 0.45, + "grad_norm": 0.7365365028381348, + "learning_rate": 1.176206299178633e-05, + "loss": 2.0446, + "step": 13612 + }, + { + "epoch": 0.45, + "grad_norm": 0.7437178492546082, + "learning_rate": 1.176101671970764e-05, + "loss": 2.112, + "step": 13613 + }, + { + "epoch": 0.45, + "grad_norm": 0.7158846855163574, + "learning_rate": 1.1759970427734017e-05, + "loss": 2.0736, + "step": 13614 + }, + { + "epoch": 0.45, + "grad_norm": 0.7176305651664734, + "learning_rate": 1.175892411587728e-05, + "loss": 2.112, + "step": 13615 + }, + { + "epoch": 0.45, + "grad_norm": 0.7492138147354126, + "learning_rate": 1.1757877784149245e-05, + "loss": 2.1162, + "step": 13616 + }, + { + "epoch": 0.45, + "grad_norm": 0.7546277046203613, + "learning_rate": 1.1756831432561742e-05, + "loss": 2.06, + "step": 13617 + }, + { + "epoch": 0.45, + "grad_norm": 0.7384843826293945, + "learning_rate": 1.1755785061126584e-05, + "loss": 2.097, + "step": 13618 + }, + { + "epoch": 0.45, + "grad_norm": 0.7283943295478821, + "learning_rate": 1.1754738669855596e-05, + "loss": 2.0687, + "step": 13619 + }, + { + "epoch": 0.45, + "grad_norm": 0.7531116604804993, + "learning_rate": 1.1753692258760599e-05, + "loss": 2.1363, + "step": 13620 + }, + { + "epoch": 0.45, + "grad_norm": 0.7325713038444519, + "learning_rate": 1.1752645827853416e-05, + "loss": 2.102, + "step": 13621 + }, + { + "epoch": 0.45, + "grad_norm": 0.7368161082267761, + "learning_rate": 1.1751599377145863e-05, + "loss": 2.0616, + "step": 13622 + }, + { + "epoch": 0.45, + "grad_norm": 0.741741955280304, + "learning_rate": 1.1750552906649775e-05, + "loss": 2.0974, + "step": 13623 + }, + { + "epoch": 0.45, + "grad_norm": 0.7672288417816162, + "learning_rate": 1.1749506416376956e-05, + "loss": 2.0957, + "step": 13624 + }, + { + "epoch": 0.45, + "grad_norm": 0.7456240653991699, + "learning_rate": 1.1748459906339247e-05, + "loss": 2.0949, + "step": 13625 + }, + { + "epoch": 0.45, + "grad_norm": 0.7599456310272217, + "learning_rate": 1.174741337654846e-05, + "loss": 2.0673, + "step": 13626 + }, + { + "epoch": 0.45, + "grad_norm": 0.7436365485191345, + "learning_rate": 1.174636682701642e-05, + "loss": 2.052, + "step": 13627 + }, + { + "epoch": 0.45, + "grad_norm": 0.7147632241249084, + "learning_rate": 1.1745320257754954e-05, + "loss": 2.1098, + "step": 13628 + }, + { + "epoch": 0.45, + "grad_norm": 0.7553607821464539, + "learning_rate": 1.1744273668775878e-05, + "loss": 2.0852, + "step": 13629 + }, + { + "epoch": 0.45, + "grad_norm": 0.7371150255203247, + "learning_rate": 1.1743227060091023e-05, + "loss": 2.1183, + "step": 13630 + }, + { + "epoch": 0.45, + "grad_norm": 0.7260974645614624, + "learning_rate": 1.174218043171221e-05, + "loss": 2.0854, + "step": 13631 + }, + { + "epoch": 0.45, + "grad_norm": 0.7527048587799072, + "learning_rate": 1.1741133783651261e-05, + "loss": 2.0101, + "step": 13632 + }, + { + "epoch": 0.45, + "grad_norm": 0.6959316730499268, + "learning_rate": 1.1740087115920007e-05, + "loss": 2.0863, + "step": 13633 + }, + { + "epoch": 0.45, + "grad_norm": 0.719954252243042, + "learning_rate": 1.1739040428530268e-05, + "loss": 2.059, + "step": 13634 + }, + { + "epoch": 0.45, + "grad_norm": 0.7341354489326477, + "learning_rate": 1.1737993721493868e-05, + "loss": 2.0851, + "step": 13635 + }, + { + "epoch": 0.45, + "grad_norm": 0.7512224912643433, + "learning_rate": 1.1736946994822636e-05, + "loss": 2.0598, + "step": 13636 + }, + { + "epoch": 0.45, + "grad_norm": 0.7037633657455444, + "learning_rate": 1.1735900248528392e-05, + "loss": 2.0953, + "step": 13637 + }, + { + "epoch": 0.45, + "grad_norm": 0.7858691215515137, + "learning_rate": 1.1734853482622961e-05, + "loss": 2.1482, + "step": 13638 + }, + { + "epoch": 0.45, + "grad_norm": 0.7320055365562439, + "learning_rate": 1.1733806697118179e-05, + "loss": 2.0523, + "step": 13639 + }, + { + "epoch": 0.45, + "grad_norm": 0.7900111079216003, + "learning_rate": 1.1732759892025862e-05, + "loss": 2.1107, + "step": 13640 + }, + { + "epoch": 0.45, + "grad_norm": 0.709962010383606, + "learning_rate": 1.1731713067357839e-05, + "loss": 2.05, + "step": 13641 + }, + { + "epoch": 0.45, + "grad_norm": 0.7873455882072449, + "learning_rate": 1.1730666223125937e-05, + "loss": 2.0806, + "step": 13642 + }, + { + "epoch": 0.45, + "grad_norm": 0.7152709364891052, + "learning_rate": 1.1729619359341982e-05, + "loss": 2.0529, + "step": 13643 + }, + { + "epoch": 0.45, + "grad_norm": 0.7855849266052246, + "learning_rate": 1.1728572476017802e-05, + "loss": 2.1003, + "step": 13644 + }, + { + "epoch": 0.45, + "grad_norm": 0.7602287530899048, + "learning_rate": 1.1727525573165224e-05, + "loss": 2.0402, + "step": 13645 + }, + { + "epoch": 0.45, + "grad_norm": 0.76436448097229, + "learning_rate": 1.172647865079607e-05, + "loss": 2.1372, + "step": 13646 + }, + { + "epoch": 0.45, + "grad_norm": 0.8220353126525879, + "learning_rate": 1.1725431708922176e-05, + "loss": 2.0536, + "step": 13647 + }, + { + "epoch": 0.45, + "grad_norm": 0.7804812788963318, + "learning_rate": 1.1724384747555367e-05, + "loss": 2.0916, + "step": 13648 + }, + { + "epoch": 0.45, + "grad_norm": 0.7357838153839111, + "learning_rate": 1.1723337766707464e-05, + "loss": 2.1634, + "step": 13649 + }, + { + "epoch": 0.45, + "grad_norm": 0.7195533514022827, + "learning_rate": 1.1722290766390305e-05, + "loss": 2.1455, + "step": 13650 + }, + { + "epoch": 0.45, + "grad_norm": 0.7323089241981506, + "learning_rate": 1.1721243746615714e-05, + "loss": 2.0436, + "step": 13651 + }, + { + "epoch": 0.45, + "grad_norm": 0.753634512424469, + "learning_rate": 1.172019670739552e-05, + "loss": 2.0273, + "step": 13652 + }, + { + "epoch": 0.45, + "grad_norm": 0.7407516837120056, + "learning_rate": 1.171914964874155e-05, + "loss": 2.1384, + "step": 13653 + }, + { + "epoch": 0.45, + "grad_norm": 0.7272430658340454, + "learning_rate": 1.1718102570665637e-05, + "loss": 2.0942, + "step": 13654 + }, + { + "epoch": 0.45, + "grad_norm": 0.7293431758880615, + "learning_rate": 1.1717055473179606e-05, + "loss": 2.0504, + "step": 13655 + }, + { + "epoch": 0.45, + "grad_norm": 0.8003043532371521, + "learning_rate": 1.1716008356295291e-05, + "loss": 2.0668, + "step": 13656 + }, + { + "epoch": 0.45, + "grad_norm": 0.7484285831451416, + "learning_rate": 1.1714961220024517e-05, + "loss": 2.112, + "step": 13657 + }, + { + "epoch": 0.45, + "grad_norm": 0.7347952127456665, + "learning_rate": 1.171391406437912e-05, + "loss": 2.0792, + "step": 13658 + }, + { + "epoch": 0.45, + "grad_norm": 0.7198622822761536, + "learning_rate": 1.1712866889370921e-05, + "loss": 2.0725, + "step": 13659 + }, + { + "epoch": 0.45, + "grad_norm": 0.7431581020355225, + "learning_rate": 1.1711819695011757e-05, + "loss": 2.165, + "step": 13660 + }, + { + "epoch": 0.45, + "grad_norm": 0.7139117121696472, + "learning_rate": 1.1710772481313462e-05, + "loss": 2.0748, + "step": 13661 + }, + { + "epoch": 0.45, + "grad_norm": 0.7634978294372559, + "learning_rate": 1.1709725248287858e-05, + "loss": 2.0433, + "step": 13662 + }, + { + "epoch": 0.45, + "grad_norm": 0.7221102714538574, + "learning_rate": 1.170867799594678e-05, + "loss": 2.1047, + "step": 13663 + }, + { + "epoch": 0.45, + "grad_norm": 0.746594250202179, + "learning_rate": 1.170763072430206e-05, + "loss": 2.0648, + "step": 13664 + }, + { + "epoch": 0.45, + "grad_norm": 0.7177837491035461, + "learning_rate": 1.170658343336553e-05, + "loss": 2.0794, + "step": 13665 + }, + { + "epoch": 0.45, + "grad_norm": 0.7404343485832214, + "learning_rate": 1.170553612314902e-05, + "loss": 2.129, + "step": 13666 + }, + { + "epoch": 0.45, + "grad_norm": 0.7583587169647217, + "learning_rate": 1.1704488793664364e-05, + "loss": 2.0697, + "step": 13667 + }, + { + "epoch": 0.45, + "grad_norm": 0.7130071520805359, + "learning_rate": 1.1703441444923387e-05, + "loss": 2.0525, + "step": 13668 + }, + { + "epoch": 0.45, + "grad_norm": 0.713004469871521, + "learning_rate": 1.170239407693793e-05, + "loss": 2.0594, + "step": 13669 + }, + { + "epoch": 0.45, + "grad_norm": 0.7370579838752747, + "learning_rate": 1.1701346689719823e-05, + "loss": 2.138, + "step": 13670 + }, + { + "epoch": 0.45, + "grad_norm": 0.7253836393356323, + "learning_rate": 1.1700299283280899e-05, + "loss": 2.0853, + "step": 13671 + }, + { + "epoch": 0.45, + "grad_norm": 0.7241047024726868, + "learning_rate": 1.1699251857632991e-05, + "loss": 2.1016, + "step": 13672 + }, + { + "epoch": 0.45, + "grad_norm": 0.7888206839561462, + "learning_rate": 1.1698204412787927e-05, + "loss": 2.1041, + "step": 13673 + }, + { + "epoch": 0.45, + "grad_norm": 0.7230753898620605, + "learning_rate": 1.1697156948757549e-05, + "loss": 2.1162, + "step": 13674 + }, + { + "epoch": 0.45, + "grad_norm": 0.7181246876716614, + "learning_rate": 1.1696109465553685e-05, + "loss": 2.0219, + "step": 13675 + }, + { + "epoch": 0.46, + "grad_norm": 0.7416624426841736, + "learning_rate": 1.169506196318817e-05, + "loss": 2.0487, + "step": 13676 + }, + { + "epoch": 0.46, + "grad_norm": 0.72171950340271, + "learning_rate": 1.1694014441672838e-05, + "loss": 1.9841, + "step": 13677 + }, + { + "epoch": 0.46, + "grad_norm": 0.7187781929969788, + "learning_rate": 1.1692966901019524e-05, + "loss": 2.095, + "step": 13678 + }, + { + "epoch": 0.46, + "grad_norm": 0.7483463883399963, + "learning_rate": 1.1691919341240063e-05, + "loss": 2.1678, + "step": 13679 + }, + { + "epoch": 0.46, + "grad_norm": 0.7612974643707275, + "learning_rate": 1.1690871762346287e-05, + "loss": 2.0787, + "step": 13680 + }, + { + "epoch": 0.46, + "grad_norm": 0.7629601955413818, + "learning_rate": 1.1689824164350035e-05, + "loss": 2.0825, + "step": 13681 + }, + { + "epoch": 0.46, + "grad_norm": 0.7431178689002991, + "learning_rate": 1.1688776547263137e-05, + "loss": 2.0248, + "step": 13682 + }, + { + "epoch": 0.46, + "grad_norm": 0.7509763240814209, + "learning_rate": 1.1687728911097435e-05, + "loss": 2.0657, + "step": 13683 + }, + { + "epoch": 0.46, + "grad_norm": 0.7365239858627319, + "learning_rate": 1.1686681255864758e-05, + "loss": 2.0528, + "step": 13684 + }, + { + "epoch": 0.46, + "grad_norm": 0.7138425707817078, + "learning_rate": 1.1685633581576947e-05, + "loss": 2.1661, + "step": 13685 + }, + { + "epoch": 0.46, + "grad_norm": 0.7321451902389526, + "learning_rate": 1.1684585888245834e-05, + "loss": 2.0905, + "step": 13686 + }, + { + "epoch": 0.46, + "grad_norm": 0.7448704838752747, + "learning_rate": 1.1683538175883256e-05, + "loss": 2.081, + "step": 13687 + }, + { + "epoch": 0.46, + "grad_norm": 0.764478325843811, + "learning_rate": 1.168249044450105e-05, + "loss": 2.0542, + "step": 13688 + }, + { + "epoch": 0.46, + "grad_norm": 0.7574368119239807, + "learning_rate": 1.1681442694111055e-05, + "loss": 2.0596, + "step": 13689 + }, + { + "epoch": 0.46, + "grad_norm": 0.7530555725097656, + "learning_rate": 1.1680394924725107e-05, + "loss": 2.15, + "step": 13690 + }, + { + "epoch": 0.46, + "grad_norm": 0.69953852891922, + "learning_rate": 1.1679347136355039e-05, + "loss": 2.0633, + "step": 13691 + }, + { + "epoch": 0.46, + "grad_norm": 0.763239860534668, + "learning_rate": 1.1678299329012693e-05, + "loss": 2.0256, + "step": 13692 + }, + { + "epoch": 0.46, + "grad_norm": 0.7498131394386292, + "learning_rate": 1.1677251502709904e-05, + "loss": 2.0631, + "step": 13693 + }, + { + "epoch": 0.46, + "grad_norm": 0.7580260038375854, + "learning_rate": 1.1676203657458513e-05, + "loss": 2.1257, + "step": 13694 + }, + { + "epoch": 0.46, + "grad_norm": 0.7232735753059387, + "learning_rate": 1.167515579327035e-05, + "loss": 2.1499, + "step": 13695 + }, + { + "epoch": 0.46, + "grad_norm": 0.7194355130195618, + "learning_rate": 1.1674107910157264e-05, + "loss": 2.0418, + "step": 13696 + }, + { + "epoch": 0.46, + "grad_norm": 0.7269057631492615, + "learning_rate": 1.1673060008131085e-05, + "loss": 2.0864, + "step": 13697 + }, + { + "epoch": 0.46, + "grad_norm": 0.7373828887939453, + "learning_rate": 1.1672012087203655e-05, + "loss": 2.1178, + "step": 13698 + }, + { + "epoch": 0.46, + "grad_norm": 0.7113621234893799, + "learning_rate": 1.1670964147386815e-05, + "loss": 2.0994, + "step": 13699 + }, + { + "epoch": 0.46, + "grad_norm": 0.7409968376159668, + "learning_rate": 1.1669916188692397e-05, + "loss": 2.0597, + "step": 13700 + }, + { + "epoch": 0.46, + "grad_norm": 0.7060233950614929, + "learning_rate": 1.1668868211132247e-05, + "loss": 2.0928, + "step": 13701 + }, + { + "epoch": 0.46, + "grad_norm": 0.7202461957931519, + "learning_rate": 1.16678202147182e-05, + "loss": 2.0582, + "step": 13702 + }, + { + "epoch": 0.46, + "grad_norm": 0.7405433654785156, + "learning_rate": 1.16667721994621e-05, + "loss": 2.1196, + "step": 13703 + }, + { + "epoch": 0.46, + "grad_norm": 0.7499316334724426, + "learning_rate": 1.1665724165375783e-05, + "loss": 2.0728, + "step": 13704 + }, + { + "epoch": 0.46, + "grad_norm": 0.7555602192878723, + "learning_rate": 1.1664676112471094e-05, + "loss": 2.0208, + "step": 13705 + }, + { + "epoch": 0.46, + "grad_norm": 0.7762464284896851, + "learning_rate": 1.1663628040759865e-05, + "loss": 2.0387, + "step": 13706 + }, + { + "epoch": 0.46, + "grad_norm": 0.7289209365844727, + "learning_rate": 1.1662579950253944e-05, + "loss": 2.0078, + "step": 13707 + }, + { + "epoch": 0.46, + "grad_norm": 0.7297747731208801, + "learning_rate": 1.166153184096517e-05, + "loss": 2.137, + "step": 13708 + }, + { + "epoch": 0.46, + "grad_norm": 0.7750729918479919, + "learning_rate": 1.1660483712905381e-05, + "loss": 2.1189, + "step": 13709 + }, + { + "epoch": 0.46, + "grad_norm": 0.8182358145713806, + "learning_rate": 1.1659435566086421e-05, + "loss": 2.1063, + "step": 13710 + }, + { + "epoch": 0.46, + "grad_norm": 0.7671381235122681, + "learning_rate": 1.165838740052013e-05, + "loss": 2.1015, + "step": 13711 + }, + { + "epoch": 0.46, + "grad_norm": 0.7501659393310547, + "learning_rate": 1.165733921621835e-05, + "loss": 2.047, + "step": 13712 + }, + { + "epoch": 0.46, + "grad_norm": 0.7266820669174194, + "learning_rate": 1.1656291013192922e-05, + "loss": 1.9885, + "step": 13713 + }, + { + "epoch": 0.46, + "grad_norm": 0.7108304500579834, + "learning_rate": 1.165524279145569e-05, + "loss": 2.1447, + "step": 13714 + }, + { + "epoch": 0.46, + "grad_norm": 0.7451035380363464, + "learning_rate": 1.1654194551018496e-05, + "loss": 2.1282, + "step": 13715 + }, + { + "epoch": 0.46, + "grad_norm": 0.7590197920799255, + "learning_rate": 1.1653146291893182e-05, + "loss": 2.0399, + "step": 13716 + }, + { + "epoch": 0.46, + "grad_norm": 0.743858277797699, + "learning_rate": 1.1652098014091587e-05, + "loss": 2.1689, + "step": 13717 + }, + { + "epoch": 0.46, + "grad_norm": 0.7178841829299927, + "learning_rate": 1.165104971762556e-05, + "loss": 2.0946, + "step": 13718 + }, + { + "epoch": 0.46, + "grad_norm": 0.7296066284179688, + "learning_rate": 1.1650001402506939e-05, + "loss": 2.0716, + "step": 13719 + }, + { + "epoch": 0.46, + "grad_norm": 0.7324082255363464, + "learning_rate": 1.1648953068747569e-05, + "loss": 2.0448, + "step": 13720 + }, + { + "epoch": 0.46, + "grad_norm": 0.7605100274085999, + "learning_rate": 1.1647904716359292e-05, + "loss": 2.015, + "step": 13721 + }, + { + "epoch": 0.46, + "grad_norm": 0.768844485282898, + "learning_rate": 1.1646856345353957e-05, + "loss": 2.0263, + "step": 13722 + }, + { + "epoch": 0.46, + "grad_norm": 0.7715038657188416, + "learning_rate": 1.1645807955743402e-05, + "loss": 2.1304, + "step": 13723 + }, + { + "epoch": 0.46, + "grad_norm": 0.7381030321121216, + "learning_rate": 1.1644759547539473e-05, + "loss": 2.0975, + "step": 13724 + }, + { + "epoch": 0.46, + "grad_norm": 0.751333475112915, + "learning_rate": 1.1643711120754015e-05, + "loss": 2.0958, + "step": 13725 + }, + { + "epoch": 0.46, + "grad_norm": 0.7442734837532043, + "learning_rate": 1.1642662675398872e-05, + "loss": 1.9951, + "step": 13726 + }, + { + "epoch": 0.46, + "grad_norm": 0.7160650491714478, + "learning_rate": 1.1641614211485892e-05, + "loss": 2.0888, + "step": 13727 + }, + { + "epoch": 0.46, + "grad_norm": 0.7498159408569336, + "learning_rate": 1.1640565729026912e-05, + "loss": 2.1579, + "step": 13728 + }, + { + "epoch": 0.46, + "grad_norm": 0.731899082660675, + "learning_rate": 1.1639517228033786e-05, + "loss": 2.136, + "step": 13729 + }, + { + "epoch": 0.46, + "grad_norm": 0.7222269177436829, + "learning_rate": 1.1638468708518352e-05, + "loss": 2.0882, + "step": 13730 + }, + { + "epoch": 0.46, + "grad_norm": 0.7352545857429504, + "learning_rate": 1.163742017049246e-05, + "loss": 2.1528, + "step": 13731 + }, + { + "epoch": 0.46, + "grad_norm": 0.7229012846946716, + "learning_rate": 1.1636371613967954e-05, + "loss": 2.0229, + "step": 13732 + }, + { + "epoch": 0.46, + "grad_norm": 0.7431833744049072, + "learning_rate": 1.1635323038956678e-05, + "loss": 2.1452, + "step": 13733 + }, + { + "epoch": 0.46, + "grad_norm": 0.7188732028007507, + "learning_rate": 1.1634274445470485e-05, + "loss": 2.1309, + "step": 13734 + }, + { + "epoch": 0.46, + "grad_norm": 0.7330019474029541, + "learning_rate": 1.1633225833521216e-05, + "loss": 2.0301, + "step": 13735 + }, + { + "epoch": 0.46, + "grad_norm": 0.7540656924247742, + "learning_rate": 1.1632177203120719e-05, + "loss": 2.0855, + "step": 13736 + }, + { + "epoch": 0.46, + "grad_norm": 0.6982877254486084, + "learning_rate": 1.1631128554280837e-05, + "loss": 2.074, + "step": 13737 + }, + { + "epoch": 0.46, + "grad_norm": 0.7802829742431641, + "learning_rate": 1.1630079887013426e-05, + "loss": 2.0758, + "step": 13738 + }, + { + "epoch": 0.46, + "grad_norm": 0.7080307006835938, + "learning_rate": 1.1629031201330322e-05, + "loss": 2.0532, + "step": 13739 + }, + { + "epoch": 0.46, + "grad_norm": 0.7211573719978333, + "learning_rate": 1.1627982497243384e-05, + "loss": 1.9945, + "step": 13740 + }, + { + "epoch": 0.46, + "grad_norm": 0.7203699350357056, + "learning_rate": 1.1626933774764451e-05, + "loss": 2.1082, + "step": 13741 + }, + { + "epoch": 0.46, + "grad_norm": 0.6971467733383179, + "learning_rate": 1.162588503390537e-05, + "loss": 2.0702, + "step": 13742 + }, + { + "epoch": 0.46, + "grad_norm": 0.7233619689941406, + "learning_rate": 1.1624836274678e-05, + "loss": 2.1116, + "step": 13743 + }, + { + "epoch": 0.46, + "grad_norm": 0.7606395483016968, + "learning_rate": 1.1623787497094177e-05, + "loss": 2.0618, + "step": 13744 + }, + { + "epoch": 0.46, + "grad_norm": 0.7250732779502869, + "learning_rate": 1.1622738701165756e-05, + "loss": 2.119, + "step": 13745 + }, + { + "epoch": 0.46, + "grad_norm": 0.7453100085258484, + "learning_rate": 1.1621689886904583e-05, + "loss": 2.0983, + "step": 13746 + }, + { + "epoch": 0.46, + "grad_norm": 0.757234513759613, + "learning_rate": 1.162064105432251e-05, + "loss": 2.1804, + "step": 13747 + }, + { + "epoch": 0.46, + "grad_norm": 0.7453684210777283, + "learning_rate": 1.1619592203431384e-05, + "loss": 2.055, + "step": 13748 + }, + { + "epoch": 0.46, + "grad_norm": 0.7019809484481812, + "learning_rate": 1.1618543334243055e-05, + "loss": 2.0598, + "step": 13749 + }, + { + "epoch": 0.46, + "grad_norm": 0.7220568060874939, + "learning_rate": 1.1617494446769368e-05, + "loss": 2.0768, + "step": 13750 + }, + { + "epoch": 0.46, + "grad_norm": 0.7256587743759155, + "learning_rate": 1.1616445541022184e-05, + "loss": 2.1179, + "step": 13751 + }, + { + "epoch": 0.46, + "grad_norm": 0.7081303596496582, + "learning_rate": 1.1615396617013339e-05, + "loss": 2.0777, + "step": 13752 + }, + { + "epoch": 0.46, + "grad_norm": 0.7434557676315308, + "learning_rate": 1.161434767475469e-05, + "loss": 2.1268, + "step": 13753 + }, + { + "epoch": 0.46, + "grad_norm": 0.7596583366394043, + "learning_rate": 1.161329871425809e-05, + "loss": 2.1072, + "step": 13754 + }, + { + "epoch": 0.46, + "grad_norm": 1.0023049116134644, + "learning_rate": 1.1612249735535386e-05, + "loss": 2.0781, + "step": 13755 + }, + { + "epoch": 0.46, + "grad_norm": 0.7322414517402649, + "learning_rate": 1.1611200738598429e-05, + "loss": 2.2143, + "step": 13756 + }, + { + "epoch": 0.46, + "grad_norm": 0.7430794835090637, + "learning_rate": 1.1610151723459069e-05, + "loss": 2.1117, + "step": 13757 + }, + { + "epoch": 0.46, + "grad_norm": 0.7875491380691528, + "learning_rate": 1.160910269012916e-05, + "loss": 2.2122, + "step": 13758 + }, + { + "epoch": 0.46, + "grad_norm": 0.7219997644424438, + "learning_rate": 1.1608053638620551e-05, + "loss": 2.0634, + "step": 13759 + }, + { + "epoch": 0.46, + "grad_norm": 0.732057511806488, + "learning_rate": 1.1607004568945097e-05, + "loss": 2.0851, + "step": 13760 + }, + { + "epoch": 0.46, + "grad_norm": 0.7197027802467346, + "learning_rate": 1.1605955481114643e-05, + "loss": 2.1158, + "step": 13761 + }, + { + "epoch": 0.46, + "grad_norm": 0.7384375929832458, + "learning_rate": 1.160490637514105e-05, + "loss": 2.0889, + "step": 13762 + }, + { + "epoch": 0.46, + "grad_norm": 0.7573885917663574, + "learning_rate": 1.160385725103616e-05, + "loss": 2.0544, + "step": 13763 + }, + { + "epoch": 0.46, + "grad_norm": 0.726362407207489, + "learning_rate": 1.1602808108811831e-05, + "loss": 2.0627, + "step": 13764 + }, + { + "epoch": 0.46, + "grad_norm": 0.7642941474914551, + "learning_rate": 1.160175894847992e-05, + "loss": 2.1119, + "step": 13765 + }, + { + "epoch": 0.46, + "grad_norm": 0.7677561640739441, + "learning_rate": 1.1600709770052272e-05, + "loss": 2.0845, + "step": 13766 + }, + { + "epoch": 0.46, + "grad_norm": 0.6975741982460022, + "learning_rate": 1.1599660573540746e-05, + "loss": 2.0672, + "step": 13767 + }, + { + "epoch": 0.46, + "grad_norm": 0.7311582565307617, + "learning_rate": 1.159861135895719e-05, + "loss": 2.0461, + "step": 13768 + }, + { + "epoch": 0.46, + "grad_norm": 0.7537286281585693, + "learning_rate": 1.159756212631346e-05, + "loss": 2.0727, + "step": 13769 + }, + { + "epoch": 0.46, + "grad_norm": 0.7318214774131775, + "learning_rate": 1.1596512875621408e-05, + "loss": 2.0891, + "step": 13770 + }, + { + "epoch": 0.46, + "grad_norm": 0.7135664224624634, + "learning_rate": 1.1595463606892891e-05, + "loss": 2.1078, + "step": 13771 + }, + { + "epoch": 0.46, + "grad_norm": 0.7326236963272095, + "learning_rate": 1.159441432013976e-05, + "loss": 2.0932, + "step": 13772 + }, + { + "epoch": 0.46, + "grad_norm": 0.726948082447052, + "learning_rate": 1.1593365015373874e-05, + "loss": 2.0788, + "step": 13773 + }, + { + "epoch": 0.46, + "grad_norm": 0.7320414185523987, + "learning_rate": 1.1592315692607078e-05, + "loss": 2.0545, + "step": 13774 + }, + { + "epoch": 0.46, + "grad_norm": 0.7315406203269958, + "learning_rate": 1.1591266351851234e-05, + "loss": 2.0611, + "step": 13775 + }, + { + "epoch": 0.46, + "grad_norm": 0.7235338687896729, + "learning_rate": 1.1590216993118199e-05, + "loss": 2.1023, + "step": 13776 + }, + { + "epoch": 0.46, + "grad_norm": 0.7381092309951782, + "learning_rate": 1.158916761641982e-05, + "loss": 2.0963, + "step": 13777 + }, + { + "epoch": 0.46, + "grad_norm": 0.772896409034729, + "learning_rate": 1.1588118221767962e-05, + "loss": 2.1372, + "step": 13778 + }, + { + "epoch": 0.46, + "grad_norm": 0.7314771413803101, + "learning_rate": 1.1587068809174471e-05, + "loss": 2.0575, + "step": 13779 + }, + { + "epoch": 0.46, + "grad_norm": 0.7316055297851562, + "learning_rate": 1.1586019378651208e-05, + "loss": 2.1004, + "step": 13780 + }, + { + "epoch": 0.46, + "grad_norm": 0.7309486865997314, + "learning_rate": 1.1584969930210026e-05, + "loss": 2.0272, + "step": 13781 + }, + { + "epoch": 0.46, + "grad_norm": 0.7354961633682251, + "learning_rate": 1.1583920463862783e-05, + "loss": 2.0628, + "step": 13782 + }, + { + "epoch": 0.46, + "grad_norm": 0.7436493039131165, + "learning_rate": 1.1582870979621337e-05, + "loss": 2.1886, + "step": 13783 + }, + { + "epoch": 0.46, + "grad_norm": 0.7101261615753174, + "learning_rate": 1.1581821477497538e-05, + "loss": 2.0419, + "step": 13784 + }, + { + "epoch": 0.46, + "grad_norm": 0.7517040371894836, + "learning_rate": 1.1580771957503252e-05, + "loss": 2.1498, + "step": 13785 + }, + { + "epoch": 0.46, + "grad_norm": 0.7366485595703125, + "learning_rate": 1.1579722419650328e-05, + "loss": 2.0679, + "step": 13786 + }, + { + "epoch": 0.46, + "grad_norm": 0.7682188153266907, + "learning_rate": 1.1578672863950628e-05, + "loss": 2.082, + "step": 13787 + }, + { + "epoch": 0.46, + "grad_norm": 0.7242255806922913, + "learning_rate": 1.1577623290416005e-05, + "loss": 2.0692, + "step": 13788 + }, + { + "epoch": 0.46, + "grad_norm": 0.7442821264266968, + "learning_rate": 1.157657369905832e-05, + "loss": 2.0902, + "step": 13789 + }, + { + "epoch": 0.46, + "grad_norm": 0.7589248418807983, + "learning_rate": 1.1575524089889429e-05, + "loss": 2.0905, + "step": 13790 + }, + { + "epoch": 0.46, + "grad_norm": 0.7484375834465027, + "learning_rate": 1.157447446292119e-05, + "loss": 2.069, + "step": 13791 + }, + { + "epoch": 0.46, + "grad_norm": 0.7687254548072815, + "learning_rate": 1.1573424818165462e-05, + "loss": 2.1245, + "step": 13792 + }, + { + "epoch": 0.46, + "grad_norm": 0.7781221866607666, + "learning_rate": 1.1572375155634101e-05, + "loss": 2.1161, + "step": 13793 + }, + { + "epoch": 0.46, + "grad_norm": 0.724916934967041, + "learning_rate": 1.1571325475338968e-05, + "loss": 2.1169, + "step": 13794 + }, + { + "epoch": 0.46, + "grad_norm": 0.7341775298118591, + "learning_rate": 1.1570275777291919e-05, + "loss": 2.076, + "step": 13795 + }, + { + "epoch": 0.46, + "grad_norm": 0.710292398929596, + "learning_rate": 1.1569226061504816e-05, + "loss": 1.9933, + "step": 13796 + }, + { + "epoch": 0.46, + "grad_norm": 0.7598239183425903, + "learning_rate": 1.1568176327989517e-05, + "loss": 2.1007, + "step": 13797 + }, + { + "epoch": 0.46, + "grad_norm": 0.755302369594574, + "learning_rate": 1.156712657675788e-05, + "loss": 2.1054, + "step": 13798 + }, + { + "epoch": 0.46, + "grad_norm": 0.7612534761428833, + "learning_rate": 1.1566076807821766e-05, + "loss": 2.0651, + "step": 13799 + }, + { + "epoch": 0.46, + "grad_norm": 0.7229418754577637, + "learning_rate": 1.1565027021193036e-05, + "loss": 2.1513, + "step": 13800 + }, + { + "epoch": 0.46, + "grad_norm": 0.7178006172180176, + "learning_rate": 1.1563977216883544e-05, + "loss": 2.0463, + "step": 13801 + }, + { + "epoch": 0.46, + "grad_norm": 0.7258201241493225, + "learning_rate": 1.1562927394905157e-05, + "loss": 2.1088, + "step": 13802 + }, + { + "epoch": 0.46, + "grad_norm": 0.6954020261764526, + "learning_rate": 1.1561877555269729e-05, + "loss": 2.0157, + "step": 13803 + }, + { + "epoch": 0.46, + "grad_norm": 0.7243265509605408, + "learning_rate": 1.1560827697989128e-05, + "loss": 2.0818, + "step": 13804 + }, + { + "epoch": 0.46, + "grad_norm": 0.7162607312202454, + "learning_rate": 1.1559777823075206e-05, + "loss": 2.0569, + "step": 13805 + }, + { + "epoch": 0.46, + "grad_norm": 0.7211546897888184, + "learning_rate": 1.155872793053983e-05, + "loss": 2.0529, + "step": 13806 + }, + { + "epoch": 0.46, + "grad_norm": 0.7309221029281616, + "learning_rate": 1.155767802039486e-05, + "loss": 2.0982, + "step": 13807 + }, + { + "epoch": 0.46, + "grad_norm": 0.7461057901382446, + "learning_rate": 1.1556628092652156e-05, + "loss": 2.097, + "step": 13808 + }, + { + "epoch": 0.46, + "grad_norm": 0.7538158297538757, + "learning_rate": 1.1555578147323583e-05, + "loss": 2.0875, + "step": 13809 + }, + { + "epoch": 0.46, + "grad_norm": 0.727258563041687, + "learning_rate": 1.1554528184420995e-05, + "loss": 2.1033, + "step": 13810 + }, + { + "epoch": 0.46, + "grad_norm": 0.7411683797836304, + "learning_rate": 1.1553478203956264e-05, + "loss": 2.017, + "step": 13811 + }, + { + "epoch": 0.46, + "grad_norm": 0.7461369633674622, + "learning_rate": 1.1552428205941241e-05, + "loss": 2.0472, + "step": 13812 + }, + { + "epoch": 0.46, + "grad_norm": 0.7368441820144653, + "learning_rate": 1.1551378190387796e-05, + "loss": 2.0617, + "step": 13813 + }, + { + "epoch": 0.46, + "grad_norm": 0.7620907425880432, + "learning_rate": 1.1550328157307791e-05, + "loss": 2.1029, + "step": 13814 + }, + { + "epoch": 0.46, + "grad_norm": 0.7447356581687927, + "learning_rate": 1.1549278106713086e-05, + "loss": 2.0507, + "step": 13815 + }, + { + "epoch": 0.46, + "grad_norm": 0.7286354899406433, + "learning_rate": 1.1548228038615545e-05, + "loss": 2.0656, + "step": 13816 + }, + { + "epoch": 0.46, + "grad_norm": 0.7376671433448792, + "learning_rate": 1.1547177953027029e-05, + "loss": 2.0964, + "step": 13817 + }, + { + "epoch": 0.46, + "grad_norm": 0.7543604969978333, + "learning_rate": 1.1546127849959405e-05, + "loss": 2.1194, + "step": 13818 + }, + { + "epoch": 0.46, + "grad_norm": 0.7598224878311157, + "learning_rate": 1.1545077729424534e-05, + "loss": 2.0952, + "step": 13819 + }, + { + "epoch": 0.46, + "grad_norm": 0.7582297325134277, + "learning_rate": 1.1544027591434283e-05, + "loss": 2.0438, + "step": 13820 + }, + { + "epoch": 0.46, + "grad_norm": 0.7243918776512146, + "learning_rate": 1.1542977436000511e-05, + "loss": 2.0731, + "step": 13821 + }, + { + "epoch": 0.46, + "grad_norm": 0.7117231488227844, + "learning_rate": 1.1541927263135087e-05, + "loss": 2.0732, + "step": 13822 + }, + { + "epoch": 0.46, + "grad_norm": 0.7152557969093323, + "learning_rate": 1.1540877072849867e-05, + "loss": 1.9554, + "step": 13823 + }, + { + "epoch": 0.46, + "grad_norm": 0.7396102547645569, + "learning_rate": 1.1539826865156725e-05, + "loss": 2.0946, + "step": 13824 + }, + { + "epoch": 0.46, + "grad_norm": 0.7315008640289307, + "learning_rate": 1.1538776640067519e-05, + "loss": 2.0017, + "step": 13825 + }, + { + "epoch": 0.46, + "grad_norm": 0.7583364248275757, + "learning_rate": 1.1537726397594119e-05, + "loss": 2.1846, + "step": 13826 + }, + { + "epoch": 0.46, + "grad_norm": 0.7367548942565918, + "learning_rate": 1.1536676137748384e-05, + "loss": 1.9949, + "step": 13827 + }, + { + "epoch": 0.46, + "grad_norm": 0.7530710697174072, + "learning_rate": 1.1535625860542186e-05, + "loss": 2.1036, + "step": 13828 + }, + { + "epoch": 0.46, + "grad_norm": 0.7300674319267273, + "learning_rate": 1.1534575565987383e-05, + "loss": 2.0043, + "step": 13829 + }, + { + "epoch": 0.46, + "grad_norm": 0.7651640772819519, + "learning_rate": 1.1533525254095848e-05, + "loss": 2.0677, + "step": 13830 + }, + { + "epoch": 0.46, + "grad_norm": 0.7363502383232117, + "learning_rate": 1.1532474924879445e-05, + "loss": 2.0626, + "step": 13831 + }, + { + "epoch": 0.46, + "grad_norm": 0.7527604699134827, + "learning_rate": 1.1531424578350032e-05, + "loss": 2.116, + "step": 13832 + }, + { + "epoch": 0.46, + "grad_norm": 0.7592442035675049, + "learning_rate": 1.1530374214519489e-05, + "loss": 2.1137, + "step": 13833 + }, + { + "epoch": 0.46, + "grad_norm": 0.7953033447265625, + "learning_rate": 1.152932383339967e-05, + "loss": 2.0978, + "step": 13834 + }, + { + "epoch": 0.46, + "grad_norm": 0.7347846031188965, + "learning_rate": 1.1528273435002448e-05, + "loss": 2.078, + "step": 13835 + }, + { + "epoch": 0.46, + "grad_norm": 0.7502967715263367, + "learning_rate": 1.1527223019339688e-05, + "loss": 2.0442, + "step": 13836 + }, + { + "epoch": 0.46, + "grad_norm": 0.7213720083236694, + "learning_rate": 1.1526172586423259e-05, + "loss": 2.0875, + "step": 13837 + }, + { + "epoch": 0.46, + "grad_norm": 0.7581600546836853, + "learning_rate": 1.1525122136265025e-05, + "loss": 2.1345, + "step": 13838 + }, + { + "epoch": 0.46, + "grad_norm": 0.7445322275161743, + "learning_rate": 1.1524071668876856e-05, + "loss": 2.1047, + "step": 13839 + }, + { + "epoch": 0.46, + "grad_norm": 0.7370960712432861, + "learning_rate": 1.1523021184270615e-05, + "loss": 2.064, + "step": 13840 + }, + { + "epoch": 0.46, + "grad_norm": 0.774368166923523, + "learning_rate": 1.1521970682458176e-05, + "loss": 2.1606, + "step": 13841 + }, + { + "epoch": 0.46, + "grad_norm": 0.7363104820251465, + "learning_rate": 1.1520920163451407e-05, + "loss": 2.0227, + "step": 13842 + }, + { + "epoch": 0.46, + "grad_norm": 0.7620216012001038, + "learning_rate": 1.1519869627262168e-05, + "loss": 2.1349, + "step": 13843 + }, + { + "epoch": 0.46, + "grad_norm": 0.7250754237174988, + "learning_rate": 1.1518819073902336e-05, + "loss": 2.1687, + "step": 13844 + }, + { + "epoch": 0.46, + "grad_norm": 0.73106849193573, + "learning_rate": 1.1517768503383777e-05, + "loss": 2.0818, + "step": 13845 + }, + { + "epoch": 0.46, + "grad_norm": 0.74542236328125, + "learning_rate": 1.1516717915718357e-05, + "loss": 2.0703, + "step": 13846 + }, + { + "epoch": 0.46, + "grad_norm": 0.7665259838104248, + "learning_rate": 1.1515667310917946e-05, + "loss": 2.0592, + "step": 13847 + }, + { + "epoch": 0.46, + "grad_norm": 0.7540133595466614, + "learning_rate": 1.1514616688994416e-05, + "loss": 2.0475, + "step": 13848 + }, + { + "epoch": 0.46, + "grad_norm": 0.7784125804901123, + "learning_rate": 1.1513566049959634e-05, + "loss": 2.1381, + "step": 13849 + }, + { + "epoch": 0.46, + "grad_norm": 0.7082016468048096, + "learning_rate": 1.151251539382547e-05, + "loss": 2.0818, + "step": 13850 + }, + { + "epoch": 0.46, + "grad_norm": 0.7291967272758484, + "learning_rate": 1.1511464720603791e-05, + "loss": 2.1883, + "step": 13851 + }, + { + "epoch": 0.46, + "grad_norm": 0.714471161365509, + "learning_rate": 1.1510414030306472e-05, + "loss": 2.0319, + "step": 13852 + }, + { + "epoch": 0.46, + "grad_norm": 0.7395991086959839, + "learning_rate": 1.1509363322945381e-05, + "loss": 1.9846, + "step": 13853 + }, + { + "epoch": 0.46, + "grad_norm": 0.7501673698425293, + "learning_rate": 1.1508312598532385e-05, + "loss": 2.098, + "step": 13854 + }, + { + "epoch": 0.46, + "grad_norm": 0.7246443033218384, + "learning_rate": 1.1507261857079358e-05, + "loss": 2.0764, + "step": 13855 + }, + { + "epoch": 0.46, + "grad_norm": 0.7416405081748962, + "learning_rate": 1.1506211098598172e-05, + "loss": 2.0618, + "step": 13856 + }, + { + "epoch": 0.46, + "grad_norm": 0.7348794341087341, + "learning_rate": 1.1505160323100692e-05, + "loss": 2.1011, + "step": 13857 + }, + { + "epoch": 0.46, + "grad_norm": 0.7050432562828064, + "learning_rate": 1.1504109530598797e-05, + "loss": 2.0575, + "step": 13858 + }, + { + "epoch": 0.46, + "grad_norm": 0.7333235740661621, + "learning_rate": 1.1503058721104349e-05, + "loss": 2.0726, + "step": 13859 + }, + { + "epoch": 0.46, + "grad_norm": 0.7256713509559631, + "learning_rate": 1.150200789462923e-05, + "loss": 2.0544, + "step": 13860 + }, + { + "epoch": 0.46, + "grad_norm": 0.7469145059585571, + "learning_rate": 1.1500957051185304e-05, + "loss": 2.1184, + "step": 13861 + }, + { + "epoch": 0.46, + "grad_norm": 0.7442758679389954, + "learning_rate": 1.1499906190784445e-05, + "loss": 2.1784, + "step": 13862 + }, + { + "epoch": 0.46, + "grad_norm": 0.7319467663764954, + "learning_rate": 1.1498855313438524e-05, + "loss": 2.0443, + "step": 13863 + }, + { + "epoch": 0.46, + "grad_norm": 0.8317609429359436, + "learning_rate": 1.1497804419159417e-05, + "loss": 2.1619, + "step": 13864 + }, + { + "epoch": 0.46, + "grad_norm": 0.7429776191711426, + "learning_rate": 1.1496753507958988e-05, + "loss": 2.1153, + "step": 13865 + }, + { + "epoch": 0.46, + "grad_norm": 0.7224605679512024, + "learning_rate": 1.149570257984912e-05, + "loss": 2.0132, + "step": 13866 + }, + { + "epoch": 0.46, + "grad_norm": 0.7159321904182434, + "learning_rate": 1.1494651634841676e-05, + "loss": 2.0076, + "step": 13867 + }, + { + "epoch": 0.46, + "grad_norm": 0.7364002466201782, + "learning_rate": 1.1493600672948537e-05, + "loss": 2.0055, + "step": 13868 + }, + { + "epoch": 0.46, + "grad_norm": 0.7353820204734802, + "learning_rate": 1.1492549694181574e-05, + "loss": 2.0486, + "step": 13869 + }, + { + "epoch": 0.46, + "grad_norm": 0.7276679277420044, + "learning_rate": 1.1491498698552657e-05, + "loss": 2.067, + "step": 13870 + }, + { + "epoch": 0.46, + "grad_norm": 0.711052656173706, + "learning_rate": 1.1490447686073663e-05, + "loss": 2.1002, + "step": 13871 + }, + { + "epoch": 0.46, + "grad_norm": 0.7470738887786865, + "learning_rate": 1.1489396656756463e-05, + "loss": 2.1044, + "step": 13872 + }, + { + "epoch": 0.46, + "grad_norm": 0.7476822137832642, + "learning_rate": 1.1488345610612934e-05, + "loss": 2.1049, + "step": 13873 + }, + { + "epoch": 0.46, + "grad_norm": 0.7425014972686768, + "learning_rate": 1.148729454765495e-05, + "loss": 2.1098, + "step": 13874 + }, + { + "epoch": 0.46, + "grad_norm": 0.7206026911735535, + "learning_rate": 1.1486243467894381e-05, + "loss": 2.0387, + "step": 13875 + }, + { + "epoch": 0.46, + "grad_norm": 0.7091426849365234, + "learning_rate": 1.1485192371343106e-05, + "loss": 2.0376, + "step": 13876 + }, + { + "epoch": 0.46, + "grad_norm": 0.715815007686615, + "learning_rate": 1.1484141258012998e-05, + "loss": 2.0714, + "step": 13877 + }, + { + "epoch": 0.46, + "grad_norm": 0.7546079158782959, + "learning_rate": 1.148309012791593e-05, + "loss": 2.0836, + "step": 13878 + }, + { + "epoch": 0.46, + "grad_norm": 0.7702943682670593, + "learning_rate": 1.1482038981063778e-05, + "loss": 2.1156, + "step": 13879 + }, + { + "epoch": 0.46, + "grad_norm": 0.7444843649864197, + "learning_rate": 1.1480987817468423e-05, + "loss": 2.1323, + "step": 13880 + }, + { + "epoch": 0.46, + "grad_norm": 0.7514033317565918, + "learning_rate": 1.147993663714173e-05, + "loss": 2.061, + "step": 13881 + }, + { + "epoch": 0.46, + "grad_norm": 0.7395638823509216, + "learning_rate": 1.1478885440095587e-05, + "loss": 2.1083, + "step": 13882 + }, + { + "epoch": 0.46, + "grad_norm": 0.758599042892456, + "learning_rate": 1.1477834226341857e-05, + "loss": 2.1821, + "step": 13883 + }, + { + "epoch": 0.46, + "grad_norm": 0.7210294008255005, + "learning_rate": 1.1476782995892424e-05, + "loss": 2.0166, + "step": 13884 + }, + { + "epoch": 0.46, + "grad_norm": 0.7398263216018677, + "learning_rate": 1.1475731748759162e-05, + "loss": 2.1801, + "step": 13885 + }, + { + "epoch": 0.46, + "grad_norm": 0.764863908290863, + "learning_rate": 1.147468048495395e-05, + "loss": 2.0518, + "step": 13886 + }, + { + "epoch": 0.46, + "grad_norm": 0.7324207425117493, + "learning_rate": 1.1473629204488659e-05, + "loss": 2.0582, + "step": 13887 + }, + { + "epoch": 0.46, + "grad_norm": 0.7663314938545227, + "learning_rate": 1.147257790737517e-05, + "loss": 2.1097, + "step": 13888 + }, + { + "epoch": 0.46, + "grad_norm": 0.7337262630462646, + "learning_rate": 1.1471526593625358e-05, + "loss": 2.0108, + "step": 13889 + }, + { + "epoch": 0.46, + "grad_norm": 0.7179603576660156, + "learning_rate": 1.1470475263251099e-05, + "loss": 2.0722, + "step": 13890 + }, + { + "epoch": 0.46, + "grad_norm": 0.7415421009063721, + "learning_rate": 1.1469423916264277e-05, + "loss": 2.066, + "step": 13891 + }, + { + "epoch": 0.46, + "grad_norm": 0.7523535490036011, + "learning_rate": 1.146837255267676e-05, + "loss": 2.0712, + "step": 13892 + }, + { + "epoch": 0.46, + "grad_norm": 0.7425236105918884, + "learning_rate": 1.1467321172500437e-05, + "loss": 2.086, + "step": 13893 + }, + { + "epoch": 0.46, + "grad_norm": 0.777191698551178, + "learning_rate": 1.1466269775747174e-05, + "loss": 2.0477, + "step": 13894 + }, + { + "epoch": 0.46, + "grad_norm": 0.7317134737968445, + "learning_rate": 1.1465218362428856e-05, + "loss": 2.0937, + "step": 13895 + }, + { + "epoch": 0.46, + "grad_norm": 0.7797331809997559, + "learning_rate": 1.1464166932557359e-05, + "loss": 2.1333, + "step": 13896 + }, + { + "epoch": 0.46, + "grad_norm": 0.751956045627594, + "learning_rate": 1.1463115486144563e-05, + "loss": 2.0396, + "step": 13897 + }, + { + "epoch": 0.46, + "grad_norm": 0.7568763494491577, + "learning_rate": 1.1462064023202345e-05, + "loss": 2.0919, + "step": 13898 + }, + { + "epoch": 0.46, + "grad_norm": 0.7560786604881287, + "learning_rate": 1.1461012543742584e-05, + "loss": 2.0439, + "step": 13899 + }, + { + "epoch": 0.46, + "grad_norm": 0.7259364724159241, + "learning_rate": 1.145996104777716e-05, + "loss": 2.0688, + "step": 13900 + }, + { + "epoch": 0.46, + "grad_norm": 0.7402641177177429, + "learning_rate": 1.1458909535317953e-05, + "loss": 2.1444, + "step": 13901 + }, + { + "epoch": 0.46, + "grad_norm": 0.718134343624115, + "learning_rate": 1.1457858006376841e-05, + "loss": 2.086, + "step": 13902 + }, + { + "epoch": 0.46, + "grad_norm": 0.7434369325637817, + "learning_rate": 1.1456806460965701e-05, + "loss": 2.059, + "step": 13903 + }, + { + "epoch": 0.46, + "grad_norm": 0.7297135591506958, + "learning_rate": 1.1455754899096419e-05, + "loss": 2.157, + "step": 13904 + }, + { + "epoch": 0.46, + "grad_norm": 0.7373976707458496, + "learning_rate": 1.145470332078087e-05, + "loss": 2.094, + "step": 13905 + }, + { + "epoch": 0.46, + "grad_norm": 0.7307386994361877, + "learning_rate": 1.1453651726030933e-05, + "loss": 2.1151, + "step": 13906 + }, + { + "epoch": 0.46, + "grad_norm": 0.7358463406562805, + "learning_rate": 1.1452600114858492e-05, + "loss": 2.0761, + "step": 13907 + }, + { + "epoch": 0.46, + "grad_norm": 0.7153708934783936, + "learning_rate": 1.1451548487275429e-05, + "loss": 2.0131, + "step": 13908 + }, + { + "epoch": 0.46, + "grad_norm": 0.7238354086875916, + "learning_rate": 1.1450496843293618e-05, + "loss": 2.0723, + "step": 13909 + }, + { + "epoch": 0.46, + "grad_norm": 0.7462184429168701, + "learning_rate": 1.1449445182924945e-05, + "loss": 2.0467, + "step": 13910 + }, + { + "epoch": 0.46, + "grad_norm": 0.7478886246681213, + "learning_rate": 1.144839350618129e-05, + "loss": 2.1195, + "step": 13911 + }, + { + "epoch": 0.46, + "grad_norm": 0.7192968726158142, + "learning_rate": 1.1447341813074533e-05, + "loss": 2.0741, + "step": 13912 + }, + { + "epoch": 0.46, + "grad_norm": 0.7380467653274536, + "learning_rate": 1.144629010361656e-05, + "loss": 2.0996, + "step": 13913 + }, + { + "epoch": 0.46, + "grad_norm": 0.7404887080192566, + "learning_rate": 1.1445238377819243e-05, + "loss": 2.0496, + "step": 13914 + }, + { + "epoch": 0.46, + "grad_norm": 0.7744516134262085, + "learning_rate": 1.1444186635694476e-05, + "loss": 2.0126, + "step": 13915 + }, + { + "epoch": 0.46, + "grad_norm": 0.7374155521392822, + "learning_rate": 1.1443134877254131e-05, + "loss": 2.1088, + "step": 13916 + }, + { + "epoch": 0.46, + "grad_norm": 0.7409254312515259, + "learning_rate": 1.1442083102510096e-05, + "loss": 2.0647, + "step": 13917 + }, + { + "epoch": 0.46, + "grad_norm": 0.7647099494934082, + "learning_rate": 1.144103131147425e-05, + "loss": 2.1031, + "step": 13918 + }, + { + "epoch": 0.46, + "grad_norm": 0.741031289100647, + "learning_rate": 1.1439979504158476e-05, + "loss": 2.0965, + "step": 13919 + }, + { + "epoch": 0.46, + "grad_norm": 0.7317578792572021, + "learning_rate": 1.1438927680574658e-05, + "loss": 2.0238, + "step": 13920 + }, + { + "epoch": 0.46, + "grad_norm": 0.7393238544464111, + "learning_rate": 1.143787584073468e-05, + "loss": 2.0677, + "step": 13921 + }, + { + "epoch": 0.46, + "grad_norm": 0.7135379910469055, + "learning_rate": 1.1436823984650422e-05, + "loss": 2.051, + "step": 13922 + }, + { + "epoch": 0.46, + "grad_norm": 0.7402812242507935, + "learning_rate": 1.143577211233377e-05, + "loss": 2.161, + "step": 13923 + }, + { + "epoch": 0.46, + "grad_norm": 0.7004898190498352, + "learning_rate": 1.1434720223796605e-05, + "loss": 2.0466, + "step": 13924 + }, + { + "epoch": 0.46, + "grad_norm": 0.7115724682807922, + "learning_rate": 1.143366831905081e-05, + "loss": 2.0585, + "step": 13925 + }, + { + "epoch": 0.46, + "grad_norm": 0.7690955400466919, + "learning_rate": 1.1432616398108274e-05, + "loss": 2.1698, + "step": 13926 + }, + { + "epoch": 0.46, + "grad_norm": 0.733790934085846, + "learning_rate": 1.1431564460980877e-05, + "loss": 2.0965, + "step": 13927 + }, + { + "epoch": 0.46, + "grad_norm": 0.687777042388916, + "learning_rate": 1.1430512507680503e-05, + "loss": 2.0732, + "step": 13928 + }, + { + "epoch": 0.46, + "grad_norm": 0.7394942045211792, + "learning_rate": 1.1429460538219034e-05, + "loss": 2.0993, + "step": 13929 + }, + { + "epoch": 0.46, + "grad_norm": 0.7259914875030518, + "learning_rate": 1.1428408552608361e-05, + "loss": 2.0541, + "step": 13930 + }, + { + "epoch": 0.46, + "grad_norm": 0.7330652475357056, + "learning_rate": 1.1427356550860364e-05, + "loss": 2.1504, + "step": 13931 + }, + { + "epoch": 0.46, + "grad_norm": 0.7698226571083069, + "learning_rate": 1.1426304532986929e-05, + "loss": 2.118, + "step": 13932 + }, + { + "epoch": 0.46, + "grad_norm": 0.7498550415039062, + "learning_rate": 1.1425252498999944e-05, + "loss": 2.1065, + "step": 13933 + }, + { + "epoch": 0.46, + "grad_norm": 0.7705021500587463, + "learning_rate": 1.1424200448911289e-05, + "loss": 2.034, + "step": 13934 + }, + { + "epoch": 0.46, + "grad_norm": 0.7328606247901917, + "learning_rate": 1.1423148382732854e-05, + "loss": 2.0827, + "step": 13935 + }, + { + "epoch": 0.46, + "grad_norm": 0.7137869596481323, + "learning_rate": 1.142209630047652e-05, + "loss": 2.0482, + "step": 13936 + }, + { + "epoch": 0.46, + "grad_norm": 0.7185385823249817, + "learning_rate": 1.1421044202154179e-05, + "loss": 2.1659, + "step": 13937 + }, + { + "epoch": 0.46, + "grad_norm": 0.7574206590652466, + "learning_rate": 1.141999208777771e-05, + "loss": 2.1053, + "step": 13938 + }, + { + "epoch": 0.46, + "grad_norm": 0.7163469195365906, + "learning_rate": 1.1418939957359004e-05, + "loss": 2.1074, + "step": 13939 + }, + { + "epoch": 0.46, + "grad_norm": 0.7385785579681396, + "learning_rate": 1.1417887810909944e-05, + "loss": 2.1101, + "step": 13940 + }, + { + "epoch": 0.46, + "grad_norm": 0.7270582914352417, + "learning_rate": 1.1416835648442422e-05, + "loss": 2.0727, + "step": 13941 + }, + { + "epoch": 0.46, + "grad_norm": 0.7220864295959473, + "learning_rate": 1.1415783469968318e-05, + "loss": 2.0929, + "step": 13942 + }, + { + "epoch": 0.46, + "grad_norm": 0.7149447798728943, + "learning_rate": 1.1414731275499522e-05, + "loss": 2.0499, + "step": 13943 + }, + { + "epoch": 0.46, + "grad_norm": 0.72572261095047, + "learning_rate": 1.1413679065047922e-05, + "loss": 2.1075, + "step": 13944 + }, + { + "epoch": 0.46, + "grad_norm": 0.762215256690979, + "learning_rate": 1.1412626838625404e-05, + "loss": 2.0324, + "step": 13945 + }, + { + "epoch": 0.46, + "grad_norm": 0.7542533874511719, + "learning_rate": 1.1411574596243859e-05, + "loss": 2.015, + "step": 13946 + }, + { + "epoch": 0.46, + "grad_norm": 0.7126274704933167, + "learning_rate": 1.1410522337915169e-05, + "loss": 2.1059, + "step": 13947 + }, + { + "epoch": 0.46, + "grad_norm": 0.7149423956871033, + "learning_rate": 1.1409470063651225e-05, + "loss": 2.0672, + "step": 13948 + }, + { + "epoch": 0.46, + "grad_norm": 0.748698353767395, + "learning_rate": 1.1408417773463913e-05, + "loss": 1.9922, + "step": 13949 + }, + { + "epoch": 0.46, + "grad_norm": 0.7311465740203857, + "learning_rate": 1.1407365467365124e-05, + "loss": 2.0944, + "step": 13950 + }, + { + "epoch": 0.46, + "grad_norm": 0.732067883014679, + "learning_rate": 1.1406313145366742e-05, + "loss": 2.1242, + "step": 13951 + }, + { + "epoch": 0.46, + "grad_norm": 0.71977299451828, + "learning_rate": 1.1405260807480662e-05, + "loss": 2.0736, + "step": 13952 + }, + { + "epoch": 0.46, + "grad_norm": 0.7463904619216919, + "learning_rate": 1.1404208453718769e-05, + "loss": 2.0105, + "step": 13953 + }, + { + "epoch": 0.46, + "grad_norm": 0.7443024516105652, + "learning_rate": 1.1403156084092947e-05, + "loss": 2.0538, + "step": 13954 + }, + { + "epoch": 0.46, + "grad_norm": 0.7316613793373108, + "learning_rate": 1.1402103698615093e-05, + "loss": 1.9684, + "step": 13955 + }, + { + "epoch": 0.46, + "grad_norm": 0.7614690065383911, + "learning_rate": 1.1401051297297095e-05, + "loss": 2.1533, + "step": 13956 + }, + { + "epoch": 0.46, + "grad_norm": 0.7414304614067078, + "learning_rate": 1.1399998880150838e-05, + "loss": 2.0698, + "step": 13957 + }, + { + "epoch": 0.46, + "grad_norm": 0.754650890827179, + "learning_rate": 1.1398946447188213e-05, + "loss": 2.1399, + "step": 13958 + }, + { + "epoch": 0.46, + "grad_norm": 0.7636027932167053, + "learning_rate": 1.1397893998421115e-05, + "loss": 2.0127, + "step": 13959 + }, + { + "epoch": 0.46, + "grad_norm": 0.7040846347808838, + "learning_rate": 1.1396841533861427e-05, + "loss": 2.1173, + "step": 13960 + }, + { + "epoch": 0.46, + "grad_norm": 0.7160289287567139, + "learning_rate": 1.139578905352104e-05, + "loss": 2.0297, + "step": 13961 + }, + { + "epoch": 0.46, + "grad_norm": 0.7494525909423828, + "learning_rate": 1.1394736557411852e-05, + "loss": 2.1559, + "step": 13962 + }, + { + "epoch": 0.46, + "grad_norm": 0.7337530255317688, + "learning_rate": 1.1393684045545741e-05, + "loss": 2.0356, + "step": 13963 + }, + { + "epoch": 0.46, + "grad_norm": 0.7371634244918823, + "learning_rate": 1.139263151793461e-05, + "loss": 2.0973, + "step": 13964 + }, + { + "epoch": 0.46, + "grad_norm": 0.767296552658081, + "learning_rate": 1.1391578974590344e-05, + "loss": 2.1593, + "step": 13965 + }, + { + "epoch": 0.46, + "grad_norm": 0.7549008131027222, + "learning_rate": 1.139052641552483e-05, + "loss": 2.0054, + "step": 13966 + }, + { + "epoch": 0.46, + "grad_norm": 0.7386720776557922, + "learning_rate": 1.1389473840749965e-05, + "loss": 2.0924, + "step": 13967 + }, + { + "epoch": 0.46, + "grad_norm": 0.7300058603286743, + "learning_rate": 1.1388421250277641e-05, + "loss": 2.0955, + "step": 13968 + }, + { + "epoch": 0.46, + "grad_norm": 0.7234599590301514, + "learning_rate": 1.1387368644119745e-05, + "loss": 2.1453, + "step": 13969 + }, + { + "epoch": 0.46, + "grad_norm": 0.7075248956680298, + "learning_rate": 1.1386316022288175e-05, + "loss": 2.0352, + "step": 13970 + }, + { + "epoch": 0.46, + "grad_norm": 0.7367042899131775, + "learning_rate": 1.1385263384794813e-05, + "loss": 2.131, + "step": 13971 + }, + { + "epoch": 0.46, + "grad_norm": 0.7205585241317749, + "learning_rate": 1.1384210731651562e-05, + "loss": 2.1128, + "step": 13972 + }, + { + "epoch": 0.46, + "grad_norm": 0.6995275616645813, + "learning_rate": 1.138315806287031e-05, + "loss": 2.0532, + "step": 13973 + }, + { + "epoch": 0.46, + "grad_norm": 0.7629989385604858, + "learning_rate": 1.1382105378462945e-05, + "loss": 2.0868, + "step": 13974 + }, + { + "epoch": 0.46, + "grad_norm": 0.740467369556427, + "learning_rate": 1.1381052678441367e-05, + "loss": 2.0842, + "step": 13975 + }, + { + "epoch": 0.46, + "grad_norm": 0.712752640247345, + "learning_rate": 1.1379999962817462e-05, + "loss": 2.1317, + "step": 13976 + }, + { + "epoch": 0.47, + "grad_norm": 0.7488521933555603, + "learning_rate": 1.1378947231603128e-05, + "loss": 2.1505, + "step": 13977 + }, + { + "epoch": 0.47, + "grad_norm": 0.7209734320640564, + "learning_rate": 1.1377894484810255e-05, + "loss": 2.0598, + "step": 13978 + }, + { + "epoch": 0.47, + "grad_norm": 0.7276514768600464, + "learning_rate": 1.1376841722450738e-05, + "loss": 2.1052, + "step": 13979 + }, + { + "epoch": 0.47, + "grad_norm": 0.7672101855278015, + "learning_rate": 1.137578894453647e-05, + "loss": 1.9813, + "step": 13980 + }, + { + "epoch": 0.47, + "grad_norm": 0.781242847442627, + "learning_rate": 1.137473615107935e-05, + "loss": 2.103, + "step": 13981 + }, + { + "epoch": 0.47, + "grad_norm": 0.7273551821708679, + "learning_rate": 1.1373683342091257e-05, + "loss": 2.0676, + "step": 13982 + }, + { + "epoch": 0.47, + "grad_norm": 0.7384310960769653, + "learning_rate": 1.13726305175841e-05, + "loss": 2.1079, + "step": 13983 + }, + { + "epoch": 0.47, + "grad_norm": 0.7347680330276489, + "learning_rate": 1.137157767756977e-05, + "loss": 2.0556, + "step": 13984 + }, + { + "epoch": 0.47, + "grad_norm": 0.7486788034439087, + "learning_rate": 1.1370524822060154e-05, + "loss": 2.0971, + "step": 13985 + }, + { + "epoch": 0.47, + "grad_norm": 0.7360063791275024, + "learning_rate": 1.1369471951067158e-05, + "loss": 2.0756, + "step": 13986 + }, + { + "epoch": 0.47, + "grad_norm": 0.7113722562789917, + "learning_rate": 1.1368419064602666e-05, + "loss": 2.0873, + "step": 13987 + }, + { + "epoch": 0.47, + "grad_norm": 0.7143206000328064, + "learning_rate": 1.1367366162678577e-05, + "loss": 2.0529, + "step": 13988 + }, + { + "epoch": 0.47, + "grad_norm": 0.7286527156829834, + "learning_rate": 1.1366313245306788e-05, + "loss": 2.0539, + "step": 13989 + }, + { + "epoch": 0.47, + "grad_norm": 0.7297046184539795, + "learning_rate": 1.1365260312499194e-05, + "loss": 2.105, + "step": 13990 + }, + { + "epoch": 0.47, + "grad_norm": 0.7451215982437134, + "learning_rate": 1.1364207364267686e-05, + "loss": 2.0773, + "step": 13991 + }, + { + "epoch": 0.47, + "grad_norm": 0.7240462303161621, + "learning_rate": 1.1363154400624166e-05, + "loss": 2.0693, + "step": 13992 + }, + { + "epoch": 0.47, + "grad_norm": 0.7205730080604553, + "learning_rate": 1.1362101421580522e-05, + "loss": 2.0684, + "step": 13993 + }, + { + "epoch": 0.47, + "grad_norm": 0.7240166068077087, + "learning_rate": 1.1361048427148657e-05, + "loss": 2.0661, + "step": 13994 + }, + { + "epoch": 0.47, + "grad_norm": 0.7666645050048828, + "learning_rate": 1.1359995417340468e-05, + "loss": 2.1364, + "step": 13995 + }, + { + "epoch": 0.47, + "grad_norm": 0.7451803684234619, + "learning_rate": 1.1358942392167842e-05, + "loss": 2.0688, + "step": 13996 + }, + { + "epoch": 0.47, + "grad_norm": 0.7014675140380859, + "learning_rate": 1.1357889351642686e-05, + "loss": 2.0438, + "step": 13997 + }, + { + "epoch": 0.47, + "grad_norm": 0.7625283002853394, + "learning_rate": 1.135683629577689e-05, + "loss": 2.1134, + "step": 13998 + }, + { + "epoch": 0.47, + "grad_norm": 0.7011498808860779, + "learning_rate": 1.135578322458235e-05, + "loss": 2.0442, + "step": 13999 + }, + { + "epoch": 0.47, + "grad_norm": 0.719828724861145, + "learning_rate": 1.135473013807097e-05, + "loss": 2.103, + "step": 14000 + }, + { + "epoch": 0.47, + "grad_norm": 0.7221359610557556, + "learning_rate": 1.135367703625464e-05, + "loss": 2.0582, + "step": 14001 + }, + { + "epoch": 0.47, + "grad_norm": 0.7149716019630432, + "learning_rate": 1.1352623919145263e-05, + "loss": 2.0814, + "step": 14002 + }, + { + "epoch": 0.47, + "grad_norm": 0.7294800877571106, + "learning_rate": 1.1351570786754731e-05, + "loss": 2.1489, + "step": 14003 + }, + { + "epoch": 0.47, + "grad_norm": 0.7244287729263306, + "learning_rate": 1.1350517639094945e-05, + "loss": 2.0893, + "step": 14004 + }, + { + "epoch": 0.47, + "grad_norm": 0.748668909072876, + "learning_rate": 1.1349464476177801e-05, + "loss": 2.1098, + "step": 14005 + }, + { + "epoch": 0.47, + "grad_norm": 0.740935206413269, + "learning_rate": 1.1348411298015203e-05, + "loss": 2.0779, + "step": 14006 + }, + { + "epoch": 0.47, + "grad_norm": 0.7441394329071045, + "learning_rate": 1.1347358104619038e-05, + "loss": 2.052, + "step": 14007 + }, + { + "epoch": 0.47, + "grad_norm": 0.7490922808647156, + "learning_rate": 1.1346304896001217e-05, + "loss": 2.1348, + "step": 14008 + }, + { + "epoch": 0.47, + "grad_norm": 0.7414029240608215, + "learning_rate": 1.1345251672173628e-05, + "loss": 2.0878, + "step": 14009 + }, + { + "epoch": 0.47, + "grad_norm": 0.7330541610717773, + "learning_rate": 1.1344198433148175e-05, + "loss": 1.9649, + "step": 14010 + }, + { + "epoch": 0.47, + "grad_norm": 0.7368197441101074, + "learning_rate": 1.1343145178936757e-05, + "loss": 2.0267, + "step": 14011 + }, + { + "epoch": 0.47, + "grad_norm": 0.7594302296638489, + "learning_rate": 1.134209190955127e-05, + "loss": 2.1157, + "step": 14012 + }, + { + "epoch": 0.47, + "grad_norm": 0.7266318202018738, + "learning_rate": 1.1341038625003615e-05, + "loss": 2.0607, + "step": 14013 + }, + { + "epoch": 0.47, + "grad_norm": 0.7155597805976868, + "learning_rate": 1.133998532530569e-05, + "loss": 2.108, + "step": 14014 + }, + { + "epoch": 0.47, + "grad_norm": 0.7481585144996643, + "learning_rate": 1.1338932010469402e-05, + "loss": 2.1388, + "step": 14015 + }, + { + "epoch": 0.47, + "grad_norm": 0.7479772567749023, + "learning_rate": 1.133787868050664e-05, + "loss": 2.0908, + "step": 14016 + }, + { + "epoch": 0.47, + "grad_norm": 0.7307004332542419, + "learning_rate": 1.1336825335429314e-05, + "loss": 2.1405, + "step": 14017 + }, + { + "epoch": 0.47, + "grad_norm": 0.7523561716079712, + "learning_rate": 1.1335771975249312e-05, + "loss": 2.158, + "step": 14018 + }, + { + "epoch": 0.47, + "grad_norm": 0.7081090211868286, + "learning_rate": 1.1334718599978545e-05, + "loss": 2.0943, + "step": 14019 + }, + { + "epoch": 0.47, + "grad_norm": 0.7477535605430603, + "learning_rate": 1.1333665209628908e-05, + "loss": 2.1125, + "step": 14020 + }, + { + "epoch": 0.47, + "grad_norm": 0.7332716584205627, + "learning_rate": 1.1332611804212305e-05, + "loss": 2.0454, + "step": 14021 + }, + { + "epoch": 0.47, + "grad_norm": 0.767056941986084, + "learning_rate": 1.1331558383740633e-05, + "loss": 2.113, + "step": 14022 + }, + { + "epoch": 0.47, + "grad_norm": 0.7267517447471619, + "learning_rate": 1.1330504948225795e-05, + "loss": 2.0748, + "step": 14023 + }, + { + "epoch": 0.47, + "grad_norm": 0.7462465763092041, + "learning_rate": 1.1329451497679692e-05, + "loss": 2.0513, + "step": 14024 + }, + { + "epoch": 0.47, + "grad_norm": 0.7530091404914856, + "learning_rate": 1.1328398032114225e-05, + "loss": 2.0737, + "step": 14025 + }, + { + "epoch": 0.47, + "grad_norm": 0.7244776487350464, + "learning_rate": 1.1327344551541295e-05, + "loss": 2.1615, + "step": 14026 + }, + { + "epoch": 0.47, + "grad_norm": 0.7228409647941589, + "learning_rate": 1.1326291055972805e-05, + "loss": 2.1467, + "step": 14027 + }, + { + "epoch": 0.47, + "grad_norm": 0.7332518696784973, + "learning_rate": 1.1325237545420658e-05, + "loss": 2.1216, + "step": 14028 + }, + { + "epoch": 0.47, + "grad_norm": 0.7250930666923523, + "learning_rate": 1.1324184019896748e-05, + "loss": 2.0935, + "step": 14029 + }, + { + "epoch": 0.47, + "grad_norm": 0.7490986585617065, + "learning_rate": 1.132313047941299e-05, + "loss": 2.187, + "step": 14030 + }, + { + "epoch": 0.47, + "grad_norm": 0.7438898086547852, + "learning_rate": 1.1322076923981275e-05, + "loss": 2.023, + "step": 14031 + }, + { + "epoch": 0.47, + "grad_norm": 0.743503749370575, + "learning_rate": 1.1321023353613511e-05, + "loss": 2.1319, + "step": 14032 + }, + { + "epoch": 0.47, + "grad_norm": 0.7419090270996094, + "learning_rate": 1.13199697683216e-05, + "loss": 2.1351, + "step": 14033 + }, + { + "epoch": 0.47, + "grad_norm": 0.7308027148246765, + "learning_rate": 1.1318916168117442e-05, + "loss": 2.0453, + "step": 14034 + }, + { + "epoch": 0.47, + "grad_norm": 0.7740827202796936, + "learning_rate": 1.1317862553012944e-05, + "loss": 2.1055, + "step": 14035 + }, + { + "epoch": 0.47, + "grad_norm": 0.7577747106552124, + "learning_rate": 1.1316808923020007e-05, + "loss": 2.0984, + "step": 14036 + }, + { + "epoch": 0.47, + "grad_norm": 0.742768406867981, + "learning_rate": 1.1315755278150534e-05, + "loss": 2.0944, + "step": 14037 + }, + { + "epoch": 0.47, + "grad_norm": 0.7283887267112732, + "learning_rate": 1.1314701618416429e-05, + "loss": 2.0895, + "step": 14038 + }, + { + "epoch": 0.47, + "grad_norm": 0.7511473298072815, + "learning_rate": 1.1313647943829597e-05, + "loss": 2.1151, + "step": 14039 + }, + { + "epoch": 0.47, + "grad_norm": 0.7397477626800537, + "learning_rate": 1.1312594254401937e-05, + "loss": 1.961, + "step": 14040 + }, + { + "epoch": 0.47, + "grad_norm": 0.73041170835495, + "learning_rate": 1.131154055014536e-05, + "loss": 2.1296, + "step": 14041 + }, + { + "epoch": 0.47, + "grad_norm": 0.7607660889625549, + "learning_rate": 1.1310486831071765e-05, + "loss": 2.1117, + "step": 14042 + }, + { + "epoch": 0.47, + "grad_norm": 0.7276760935783386, + "learning_rate": 1.1309433097193057e-05, + "loss": 2.0849, + "step": 14043 + }, + { + "epoch": 0.47, + "grad_norm": 0.7549551129341125, + "learning_rate": 1.1308379348521141e-05, + "loss": 2.1102, + "step": 14044 + }, + { + "epoch": 0.47, + "grad_norm": 0.7305091023445129, + "learning_rate": 1.1307325585067923e-05, + "loss": 2.1077, + "step": 14045 + }, + { + "epoch": 0.47, + "grad_norm": 0.7362569570541382, + "learning_rate": 1.1306271806845306e-05, + "loss": 2.0761, + "step": 14046 + }, + { + "epoch": 0.47, + "grad_norm": 0.7306249737739563, + "learning_rate": 1.1305218013865198e-05, + "loss": 1.9843, + "step": 14047 + }, + { + "epoch": 0.47, + "grad_norm": 0.7770626544952393, + "learning_rate": 1.1304164206139499e-05, + "loss": 2.1324, + "step": 14048 + }, + { + "epoch": 0.47, + "grad_norm": 0.7392917275428772, + "learning_rate": 1.1303110383680119e-05, + "loss": 2.0991, + "step": 14049 + }, + { + "epoch": 0.47, + "grad_norm": 0.741791844367981, + "learning_rate": 1.1302056546498964e-05, + "loss": 2.0201, + "step": 14050 + }, + { + "epoch": 0.47, + "grad_norm": 0.7629430294036865, + "learning_rate": 1.130100269460793e-05, + "loss": 2.0694, + "step": 14051 + }, + { + "epoch": 0.47, + "grad_norm": 0.7539768218994141, + "learning_rate": 1.1299948828018936e-05, + "loss": 2.1713, + "step": 14052 + }, + { + "epoch": 0.47, + "grad_norm": 0.7455495595932007, + "learning_rate": 1.1298894946743878e-05, + "loss": 2.0637, + "step": 14053 + }, + { + "epoch": 0.47, + "grad_norm": 0.735785961151123, + "learning_rate": 1.129784105079467e-05, + "loss": 2.0646, + "step": 14054 + }, + { + "epoch": 0.47, + "grad_norm": 0.6989507079124451, + "learning_rate": 1.1296787140183212e-05, + "loss": 2.0175, + "step": 14055 + }, + { + "epoch": 0.47, + "grad_norm": 0.7415017485618591, + "learning_rate": 1.1295733214921411e-05, + "loss": 2.0424, + "step": 14056 + }, + { + "epoch": 0.47, + "grad_norm": 0.6982277631759644, + "learning_rate": 1.1294679275021179e-05, + "loss": 1.9993, + "step": 14057 + }, + { + "epoch": 0.47, + "grad_norm": 0.7305813431739807, + "learning_rate": 1.129362532049442e-05, + "loss": 2.109, + "step": 14058 + }, + { + "epoch": 0.47, + "grad_norm": 0.7424694299697876, + "learning_rate": 1.1292571351353037e-05, + "loss": 2.1005, + "step": 14059 + }, + { + "epoch": 0.47, + "grad_norm": 0.7935014963150024, + "learning_rate": 1.1291517367608942e-05, + "loss": 2.1004, + "step": 14060 + }, + { + "epoch": 0.47, + "grad_norm": 0.727861225605011, + "learning_rate": 1.1290463369274042e-05, + "loss": 2.0406, + "step": 14061 + }, + { + "epoch": 0.47, + "grad_norm": 0.7031880021095276, + "learning_rate": 1.128940935636024e-05, + "loss": 2.0641, + "step": 14062 + }, + { + "epoch": 0.47, + "grad_norm": 0.7515766620635986, + "learning_rate": 1.128835532887945e-05, + "loss": 2.1094, + "step": 14063 + }, + { + "epoch": 0.47, + "grad_norm": 0.7242578864097595, + "learning_rate": 1.1287301286843573e-05, + "loss": 2.1454, + "step": 14064 + }, + { + "epoch": 0.47, + "grad_norm": 0.7245280146598816, + "learning_rate": 1.1286247230264522e-05, + "loss": 2.058, + "step": 14065 + }, + { + "epoch": 0.47, + "grad_norm": 0.7270819544792175, + "learning_rate": 1.1285193159154207e-05, + "loss": 2.0514, + "step": 14066 + }, + { + "epoch": 0.47, + "grad_norm": 0.7513290643692017, + "learning_rate": 1.1284139073524528e-05, + "loss": 2.0748, + "step": 14067 + }, + { + "epoch": 0.47, + "grad_norm": 0.7444294691085815, + "learning_rate": 1.1283084973387402e-05, + "loss": 2.0319, + "step": 14068 + }, + { + "epoch": 0.47, + "grad_norm": 0.7484315633773804, + "learning_rate": 1.1282030858754731e-05, + "loss": 2.0395, + "step": 14069 + }, + { + "epoch": 0.47, + "grad_norm": 0.6898239850997925, + "learning_rate": 1.1280976729638428e-05, + "loss": 2.0262, + "step": 14070 + }, + { + "epoch": 0.47, + "grad_norm": 0.723848283290863, + "learning_rate": 1.12799225860504e-05, + "loss": 2.0494, + "step": 14071 + }, + { + "epoch": 0.47, + "grad_norm": 0.736810028553009, + "learning_rate": 1.1278868428002559e-05, + "loss": 2.0754, + "step": 14072 + }, + { + "epoch": 0.47, + "grad_norm": 0.7575604319572449, + "learning_rate": 1.127781425550681e-05, + "loss": 2.0905, + "step": 14073 + }, + { + "epoch": 0.47, + "grad_norm": 0.7576225399971008, + "learning_rate": 1.1276760068575065e-05, + "loss": 1.978, + "step": 14074 + }, + { + "epoch": 0.47, + "grad_norm": 0.7554639577865601, + "learning_rate": 1.1275705867219231e-05, + "loss": 2.1882, + "step": 14075 + }, + { + "epoch": 0.47, + "grad_norm": 0.7662122249603271, + "learning_rate": 1.127465165145122e-05, + "loss": 2.0803, + "step": 14076 + }, + { + "epoch": 0.47, + "grad_norm": 0.7734922766685486, + "learning_rate": 1.1273597421282946e-05, + "loss": 2.0119, + "step": 14077 + }, + { + "epoch": 0.47, + "grad_norm": 0.7401114702224731, + "learning_rate": 1.127254317672631e-05, + "loss": 2.0387, + "step": 14078 + }, + { + "epoch": 0.47, + "grad_norm": 0.7506241202354431, + "learning_rate": 1.1271488917793232e-05, + "loss": 2.0169, + "step": 14079 + }, + { + "epoch": 0.47, + "grad_norm": 0.7795040607452393, + "learning_rate": 1.1270434644495614e-05, + "loss": 2.1069, + "step": 14080 + }, + { + "epoch": 0.47, + "grad_norm": 0.753847599029541, + "learning_rate": 1.126938035684537e-05, + "loss": 2.0981, + "step": 14081 + }, + { + "epoch": 0.47, + "grad_norm": 0.7214574813842773, + "learning_rate": 1.126832605485441e-05, + "loss": 2.0556, + "step": 14082 + }, + { + "epoch": 0.47, + "grad_norm": 0.711821973323822, + "learning_rate": 1.1267271738534646e-05, + "loss": 2.056, + "step": 14083 + }, + { + "epoch": 0.47, + "grad_norm": 0.7408535480499268, + "learning_rate": 1.1266217407897988e-05, + "loss": 2.1032, + "step": 14084 + }, + { + "epoch": 0.47, + "grad_norm": 0.7385123372077942, + "learning_rate": 1.1265163062956353e-05, + "loss": 2.054, + "step": 14085 + }, + { + "epoch": 0.47, + "grad_norm": 0.7447160482406616, + "learning_rate": 1.1264108703721638e-05, + "loss": 2.1132, + "step": 14086 + }, + { + "epoch": 0.47, + "grad_norm": 0.7687646746635437, + "learning_rate": 1.1263054330205769e-05, + "loss": 2.0824, + "step": 14087 + }, + { + "epoch": 0.47, + "grad_norm": 0.7291406989097595, + "learning_rate": 1.1261999942420653e-05, + "loss": 2.0499, + "step": 14088 + }, + { + "epoch": 0.47, + "grad_norm": 0.7002138495445251, + "learning_rate": 1.1260945540378198e-05, + "loss": 2.0523, + "step": 14089 + }, + { + "epoch": 0.47, + "grad_norm": 0.7312231659889221, + "learning_rate": 1.1259891124090326e-05, + "loss": 2.083, + "step": 14090 + }, + { + "epoch": 0.47, + "grad_norm": 0.7296798825263977, + "learning_rate": 1.1258836693568937e-05, + "loss": 2.1295, + "step": 14091 + }, + { + "epoch": 0.47, + "grad_norm": 0.7545778155326843, + "learning_rate": 1.125778224882595e-05, + "loss": 2.0522, + "step": 14092 + }, + { + "epoch": 0.47, + "grad_norm": 0.7274122834205627, + "learning_rate": 1.1256727789873276e-05, + "loss": 2.0464, + "step": 14093 + }, + { + "epoch": 0.47, + "grad_norm": 0.7647008895874023, + "learning_rate": 1.1255673316722828e-05, + "loss": 2.1172, + "step": 14094 + }, + { + "epoch": 0.47, + "grad_norm": 0.715890109539032, + "learning_rate": 1.1254618829386518e-05, + "loss": 2.0993, + "step": 14095 + }, + { + "epoch": 0.47, + "grad_norm": 0.7222809791564941, + "learning_rate": 1.1253564327876262e-05, + "loss": 2.087, + "step": 14096 + }, + { + "epoch": 0.47, + "grad_norm": 0.7518162727355957, + "learning_rate": 1.125250981220397e-05, + "loss": 2.0916, + "step": 14097 + }, + { + "epoch": 0.47, + "grad_norm": 0.7373457551002502, + "learning_rate": 1.1251455282381554e-05, + "loss": 2.0645, + "step": 14098 + }, + { + "epoch": 0.47, + "grad_norm": 0.7541444897651672, + "learning_rate": 1.1250400738420933e-05, + "loss": 2.08, + "step": 14099 + }, + { + "epoch": 0.47, + "grad_norm": 0.7496023178100586, + "learning_rate": 1.1249346180334012e-05, + "loss": 2.1125, + "step": 14100 + }, + { + "epoch": 0.47, + "grad_norm": 0.7495477795600891, + "learning_rate": 1.1248291608132718e-05, + "loss": 2.0757, + "step": 14101 + }, + { + "epoch": 0.47, + "grad_norm": 0.7495827674865723, + "learning_rate": 1.1247237021828951e-05, + "loss": 2.0812, + "step": 14102 + }, + { + "epoch": 0.47, + "grad_norm": 0.7537965178489685, + "learning_rate": 1.1246182421434633e-05, + "loss": 2.1164, + "step": 14103 + }, + { + "epoch": 0.47, + "grad_norm": 0.7331140637397766, + "learning_rate": 1.1245127806961676e-05, + "loss": 2.0276, + "step": 14104 + }, + { + "epoch": 0.47, + "grad_norm": 0.76673823595047, + "learning_rate": 1.1244073178421996e-05, + "loss": 2.082, + "step": 14105 + }, + { + "epoch": 0.47, + "grad_norm": 0.7465325593948364, + "learning_rate": 1.1243018535827503e-05, + "loss": 2.0999, + "step": 14106 + }, + { + "epoch": 0.47, + "grad_norm": 0.7604457139968872, + "learning_rate": 1.1241963879190117e-05, + "loss": 2.1277, + "step": 14107 + }, + { + "epoch": 0.47, + "grad_norm": 0.7472529411315918, + "learning_rate": 1.124090920852175e-05, + "loss": 2.0806, + "step": 14108 + }, + { + "epoch": 0.47, + "grad_norm": 0.7689916491508484, + "learning_rate": 1.1239854523834319e-05, + "loss": 2.147, + "step": 14109 + }, + { + "epoch": 0.47, + "grad_norm": 0.7152977585792542, + "learning_rate": 1.1238799825139741e-05, + "loss": 2.0955, + "step": 14110 + }, + { + "epoch": 0.47, + "grad_norm": 0.7335329055786133, + "learning_rate": 1.1237745112449923e-05, + "loss": 2.1087, + "step": 14111 + }, + { + "epoch": 0.47, + "grad_norm": 0.7496398091316223, + "learning_rate": 1.1236690385776792e-05, + "loss": 2.0839, + "step": 14112 + }, + { + "epoch": 0.47, + "grad_norm": 0.7283348441123962, + "learning_rate": 1.1235635645132255e-05, + "loss": 2.0566, + "step": 14113 + }, + { + "epoch": 0.47, + "grad_norm": 0.7165517210960388, + "learning_rate": 1.1234580890528229e-05, + "loss": 2.1336, + "step": 14114 + }, + { + "epoch": 0.47, + "grad_norm": 0.7048013210296631, + "learning_rate": 1.1233526121976632e-05, + "loss": 2.1134, + "step": 14115 + }, + { + "epoch": 0.47, + "grad_norm": 0.7458078265190125, + "learning_rate": 1.123247133948938e-05, + "loss": 2.0285, + "step": 14116 + }, + { + "epoch": 0.47, + "grad_norm": 0.7379661798477173, + "learning_rate": 1.123141654307839e-05, + "loss": 2.1385, + "step": 14117 + }, + { + "epoch": 0.47, + "grad_norm": 0.7375317811965942, + "learning_rate": 1.1230361732755579e-05, + "loss": 2.1057, + "step": 14118 + }, + { + "epoch": 0.47, + "grad_norm": 0.7448461651802063, + "learning_rate": 1.122930690853286e-05, + "loss": 2.062, + "step": 14119 + }, + { + "epoch": 0.47, + "grad_norm": 0.7793803811073303, + "learning_rate": 1.1228252070422152e-05, + "loss": 2.1329, + "step": 14120 + }, + { + "epoch": 0.47, + "grad_norm": 0.7436450719833374, + "learning_rate": 1.1227197218435375e-05, + "loss": 2.1152, + "step": 14121 + }, + { + "epoch": 0.47, + "grad_norm": 0.7249694466590881, + "learning_rate": 1.122614235258444e-05, + "loss": 2.0756, + "step": 14122 + }, + { + "epoch": 0.47, + "grad_norm": 0.7256861925125122, + "learning_rate": 1.1225087472881269e-05, + "loss": 2.0855, + "step": 14123 + }, + { + "epoch": 0.47, + "grad_norm": 0.7595627903938293, + "learning_rate": 1.1224032579337777e-05, + "loss": 2.1777, + "step": 14124 + }, + { + "epoch": 0.47, + "grad_norm": 0.7674539685249329, + "learning_rate": 1.1222977671965882e-05, + "loss": 2.0858, + "step": 14125 + }, + { + "epoch": 0.47, + "grad_norm": 0.7489127516746521, + "learning_rate": 1.12219227507775e-05, + "loss": 2.0329, + "step": 14126 + }, + { + "epoch": 0.47, + "grad_norm": 0.6948639750480652, + "learning_rate": 1.1220867815784553e-05, + "loss": 2.0531, + "step": 14127 + }, + { + "epoch": 0.47, + "grad_norm": 0.7794910669326782, + "learning_rate": 1.1219812866998959e-05, + "loss": 2.1326, + "step": 14128 + }, + { + "epoch": 0.47, + "grad_norm": 0.7348852753639221, + "learning_rate": 1.1218757904432629e-05, + "loss": 2.0992, + "step": 14129 + }, + { + "epoch": 0.47, + "grad_norm": 0.7510849833488464, + "learning_rate": 1.121770292809749e-05, + "loss": 2.0313, + "step": 14130 + }, + { + "epoch": 0.47, + "grad_norm": 0.7234457731246948, + "learning_rate": 1.1216647938005455e-05, + "loss": 2.0655, + "step": 14131 + }, + { + "epoch": 0.47, + "grad_norm": 0.7223379015922546, + "learning_rate": 1.121559293416845e-05, + "loss": 2.0913, + "step": 14132 + }, + { + "epoch": 0.47, + "grad_norm": 0.7217646837234497, + "learning_rate": 1.121453791659838e-05, + "loss": 2.0782, + "step": 14133 + }, + { + "epoch": 0.47, + "grad_norm": 0.7575363516807556, + "learning_rate": 1.1213482885307179e-05, + "loss": 2.057, + "step": 14134 + }, + { + "epoch": 0.47, + "grad_norm": 0.7553440928459167, + "learning_rate": 1.1212427840306759e-05, + "loss": 2.1164, + "step": 14135 + }, + { + "epoch": 0.47, + "grad_norm": 0.7449776530265808, + "learning_rate": 1.1211372781609038e-05, + "loss": 2.0241, + "step": 14136 + }, + { + "epoch": 0.47, + "grad_norm": 0.7748123407363892, + "learning_rate": 1.1210317709225935e-05, + "loss": 2.0277, + "step": 14137 + }, + { + "epoch": 0.47, + "grad_norm": 0.7384259700775146, + "learning_rate": 1.1209262623169375e-05, + "loss": 2.0691, + "step": 14138 + }, + { + "epoch": 0.47, + "grad_norm": 0.6911337375640869, + "learning_rate": 1.1208207523451274e-05, + "loss": 2.032, + "step": 14139 + }, + { + "epoch": 0.47, + "grad_norm": 0.7353290915489197, + "learning_rate": 1.1207152410083553e-05, + "loss": 2.0862, + "step": 14140 + }, + { + "epoch": 0.47, + "grad_norm": 0.7301750183105469, + "learning_rate": 1.1206097283078131e-05, + "loss": 2.1174, + "step": 14141 + }, + { + "epoch": 0.47, + "grad_norm": 0.7042744159698486, + "learning_rate": 1.1205042142446927e-05, + "loss": 2.0093, + "step": 14142 + }, + { + "epoch": 0.47, + "grad_norm": 0.7420377731323242, + "learning_rate": 1.1203986988201867e-05, + "loss": 2.0163, + "step": 14143 + }, + { + "epoch": 0.47, + "grad_norm": 0.728172242641449, + "learning_rate": 1.120293182035486e-05, + "loss": 2.0896, + "step": 14144 + }, + { + "epoch": 0.47, + "grad_norm": 0.7490823268890381, + "learning_rate": 1.1201876638917843e-05, + "loss": 2.1335, + "step": 14145 + }, + { + "epoch": 0.47, + "grad_norm": 0.7358691096305847, + "learning_rate": 1.1200821443902726e-05, + "loss": 2.1055, + "step": 14146 + }, + { + "epoch": 0.47, + "grad_norm": 0.7314212322235107, + "learning_rate": 1.1199766235321429e-05, + "loss": 2.1365, + "step": 14147 + }, + { + "epoch": 0.47, + "grad_norm": 0.7496983408927917, + "learning_rate": 1.1198711013185879e-05, + "loss": 2.0748, + "step": 14148 + }, + { + "epoch": 0.47, + "grad_norm": 0.7331315875053406, + "learning_rate": 1.1197655777507991e-05, + "loss": 2.0567, + "step": 14149 + }, + { + "epoch": 0.47, + "grad_norm": 0.7236708998680115, + "learning_rate": 1.1196600528299693e-05, + "loss": 2.1185, + "step": 14150 + }, + { + "epoch": 0.47, + "grad_norm": 0.7511530518531799, + "learning_rate": 1.1195545265572903e-05, + "loss": 2.1002, + "step": 14151 + }, + { + "epoch": 0.47, + "grad_norm": 0.7367854714393616, + "learning_rate": 1.1194489989339543e-05, + "loss": 2.0731, + "step": 14152 + }, + { + "epoch": 0.47, + "grad_norm": 0.7246856093406677, + "learning_rate": 1.1193434699611533e-05, + "loss": 2.0118, + "step": 14153 + }, + { + "epoch": 0.47, + "grad_norm": 0.7418739199638367, + "learning_rate": 1.1192379396400803e-05, + "loss": 2.0615, + "step": 14154 + }, + { + "epoch": 0.47, + "grad_norm": 0.773021399974823, + "learning_rate": 1.1191324079719263e-05, + "loss": 2.1079, + "step": 14155 + }, + { + "epoch": 0.47, + "grad_norm": 0.734575092792511, + "learning_rate": 1.1190268749578848e-05, + "loss": 2.1141, + "step": 14156 + }, + { + "epoch": 0.47, + "grad_norm": 0.712656557559967, + "learning_rate": 1.1189213405991469e-05, + "loss": 2.106, + "step": 14157 + }, + { + "epoch": 0.47, + "grad_norm": 0.7313783764839172, + "learning_rate": 1.1188158048969056e-05, + "loss": 2.079, + "step": 14158 + }, + { + "epoch": 0.47, + "grad_norm": 0.7257425785064697, + "learning_rate": 1.1187102678523529e-05, + "loss": 2.1187, + "step": 14159 + }, + { + "epoch": 0.47, + "grad_norm": 0.7388837933540344, + "learning_rate": 1.118604729466681e-05, + "loss": 2.1224, + "step": 14160 + }, + { + "epoch": 0.47, + "grad_norm": 0.771956741809845, + "learning_rate": 1.1184991897410829e-05, + "loss": 2.0956, + "step": 14161 + }, + { + "epoch": 0.47, + "grad_norm": 0.7762402892112732, + "learning_rate": 1.11839364867675e-05, + "loss": 2.0883, + "step": 14162 + }, + { + "epoch": 0.47, + "grad_norm": 0.7367829084396362, + "learning_rate": 1.1182881062748749e-05, + "loss": 2.0575, + "step": 14163 + }, + { + "epoch": 0.47, + "grad_norm": 0.7327202558517456, + "learning_rate": 1.1181825625366506e-05, + "loss": 2.1224, + "step": 14164 + }, + { + "epoch": 0.47, + "grad_norm": 0.7169814109802246, + "learning_rate": 1.1180770174632684e-05, + "loss": 2.0279, + "step": 14165 + }, + { + "epoch": 0.47, + "grad_norm": 0.7503306269645691, + "learning_rate": 1.1179714710559215e-05, + "loss": 2.0068, + "step": 14166 + }, + { + "epoch": 0.47, + "grad_norm": 0.7357821464538574, + "learning_rate": 1.1178659233158024e-05, + "loss": 2.0596, + "step": 14167 + }, + { + "epoch": 0.47, + "grad_norm": 0.7679687738418579, + "learning_rate": 1.1177603742441025e-05, + "loss": 2.1336, + "step": 14168 + }, + { + "epoch": 0.47, + "grad_norm": 0.745258629322052, + "learning_rate": 1.1176548238420156e-05, + "loss": 2.0968, + "step": 14169 + }, + { + "epoch": 0.47, + "grad_norm": 0.7397328615188599, + "learning_rate": 1.117549272110733e-05, + "loss": 2.0678, + "step": 14170 + }, + { + "epoch": 0.47, + "grad_norm": 0.7299311757087708, + "learning_rate": 1.1174437190514475e-05, + "loss": 2.0712, + "step": 14171 + }, + { + "epoch": 0.47, + "grad_norm": 0.740551233291626, + "learning_rate": 1.1173381646653523e-05, + "loss": 2.109, + "step": 14172 + }, + { + "epoch": 0.47, + "grad_norm": 0.7318323850631714, + "learning_rate": 1.117232608953639e-05, + "loss": 2.0391, + "step": 14173 + }, + { + "epoch": 0.47, + "grad_norm": 0.749868631362915, + "learning_rate": 1.1171270519175002e-05, + "loss": 2.0989, + "step": 14174 + }, + { + "epoch": 0.47, + "grad_norm": 0.7104763388633728, + "learning_rate": 1.1170214935581287e-05, + "loss": 2.1373, + "step": 14175 + }, + { + "epoch": 0.47, + "grad_norm": 0.8010764122009277, + "learning_rate": 1.116915933876717e-05, + "loss": 2.0425, + "step": 14176 + }, + { + "epoch": 0.47, + "grad_norm": 0.7307770252227783, + "learning_rate": 1.1168103728744575e-05, + "loss": 2.0634, + "step": 14177 + }, + { + "epoch": 0.47, + "grad_norm": 0.7643880844116211, + "learning_rate": 1.1167048105525431e-05, + "loss": 2.0882, + "step": 14178 + }, + { + "epoch": 0.47, + "grad_norm": 0.753108561038971, + "learning_rate": 1.1165992469121659e-05, + "loss": 2.1011, + "step": 14179 + }, + { + "epoch": 0.47, + "grad_norm": 0.7784432172775269, + "learning_rate": 1.1164936819545187e-05, + "loss": 2.0803, + "step": 14180 + }, + { + "epoch": 0.47, + "grad_norm": 0.7116439342498779, + "learning_rate": 1.1163881156807945e-05, + "loss": 2.0969, + "step": 14181 + }, + { + "epoch": 0.47, + "grad_norm": 0.7272990942001343, + "learning_rate": 1.116282548092185e-05, + "loss": 2.0168, + "step": 14182 + }, + { + "epoch": 0.47, + "grad_norm": 0.8080035448074341, + "learning_rate": 1.116176979189884e-05, + "loss": 2.1166, + "step": 14183 + }, + { + "epoch": 0.47, + "grad_norm": 0.7456568479537964, + "learning_rate": 1.1160714089750833e-05, + "loss": 2.1534, + "step": 14184 + }, + { + "epoch": 0.47, + "grad_norm": 0.7111576199531555, + "learning_rate": 1.1159658374489759e-05, + "loss": 2.1103, + "step": 14185 + }, + { + "epoch": 0.47, + "grad_norm": 0.7288686633110046, + "learning_rate": 1.1158602646127546e-05, + "loss": 2.0632, + "step": 14186 + }, + { + "epoch": 0.47, + "grad_norm": 0.7175378203392029, + "learning_rate": 1.1157546904676115e-05, + "loss": 2.0521, + "step": 14187 + }, + { + "epoch": 0.47, + "grad_norm": 0.7293234467506409, + "learning_rate": 1.1156491150147399e-05, + "loss": 2.1176, + "step": 14188 + }, + { + "epoch": 0.47, + "grad_norm": 0.7512077689170837, + "learning_rate": 1.1155435382553327e-05, + "loss": 2.0551, + "step": 14189 + }, + { + "epoch": 0.47, + "grad_norm": 0.7169491052627563, + "learning_rate": 1.115437960190582e-05, + "loss": 2.0045, + "step": 14190 + }, + { + "epoch": 0.47, + "grad_norm": 0.741670548915863, + "learning_rate": 1.1153323808216809e-05, + "loss": 2.0671, + "step": 14191 + }, + { + "epoch": 0.47, + "grad_norm": 0.7526654005050659, + "learning_rate": 1.1152268001498226e-05, + "loss": 2.1611, + "step": 14192 + }, + { + "epoch": 0.47, + "grad_norm": 0.7536620497703552, + "learning_rate": 1.1151212181761988e-05, + "loss": 2.0351, + "step": 14193 + }, + { + "epoch": 0.47, + "grad_norm": 0.7254747152328491, + "learning_rate": 1.1150156349020034e-05, + "loss": 2.0792, + "step": 14194 + }, + { + "epoch": 0.47, + "grad_norm": 0.7223042249679565, + "learning_rate": 1.1149100503284285e-05, + "loss": 1.9875, + "step": 14195 + }, + { + "epoch": 0.47, + "grad_norm": 0.7163583636283875, + "learning_rate": 1.1148044644566673e-05, + "loss": 2.0846, + "step": 14196 + }, + { + "epoch": 0.47, + "grad_norm": 0.7543470859527588, + "learning_rate": 1.1146988772879123e-05, + "loss": 1.9927, + "step": 14197 + }, + { + "epoch": 0.47, + "grad_norm": 0.711875855922699, + "learning_rate": 1.114593288823357e-05, + "loss": 1.9932, + "step": 14198 + }, + { + "epoch": 0.47, + "grad_norm": 0.7417489886283875, + "learning_rate": 1.1144876990641937e-05, + "loss": 2.0592, + "step": 14199 + }, + { + "epoch": 0.47, + "grad_norm": 0.7545101642608643, + "learning_rate": 1.1143821080116155e-05, + "loss": 2.0807, + "step": 14200 + }, + { + "epoch": 0.47, + "grad_norm": 0.749754011631012, + "learning_rate": 1.1142765156668151e-05, + "loss": 2.1095, + "step": 14201 + }, + { + "epoch": 0.47, + "grad_norm": 0.7475979924201965, + "learning_rate": 1.1141709220309858e-05, + "loss": 2.1227, + "step": 14202 + }, + { + "epoch": 0.47, + "grad_norm": 0.7298274040222168, + "learning_rate": 1.1140653271053204e-05, + "loss": 2.0631, + "step": 14203 + }, + { + "epoch": 0.47, + "grad_norm": 0.7125329971313477, + "learning_rate": 1.1139597308910115e-05, + "loss": 2.0651, + "step": 14204 + }, + { + "epoch": 0.47, + "grad_norm": 0.7100964784622192, + "learning_rate": 1.1138541333892528e-05, + "loss": 2.1247, + "step": 14205 + }, + { + "epoch": 0.47, + "grad_norm": 0.749793529510498, + "learning_rate": 1.1137485346012365e-05, + "loss": 2.0522, + "step": 14206 + }, + { + "epoch": 0.47, + "grad_norm": 0.7549937963485718, + "learning_rate": 1.1136429345281558e-05, + "loss": 2.0706, + "step": 14207 + }, + { + "epoch": 0.47, + "grad_norm": 0.7523854970932007, + "learning_rate": 1.113537333171204e-05, + "loss": 2.1121, + "step": 14208 + }, + { + "epoch": 0.47, + "grad_norm": 0.7278813719749451, + "learning_rate": 1.1134317305315739e-05, + "loss": 2.0391, + "step": 14209 + }, + { + "epoch": 0.47, + "grad_norm": 0.741080641746521, + "learning_rate": 1.1133261266104586e-05, + "loss": 2.0419, + "step": 14210 + }, + { + "epoch": 0.47, + "grad_norm": 0.722870409488678, + "learning_rate": 1.1132205214090512e-05, + "loss": 2.0673, + "step": 14211 + }, + { + "epoch": 0.47, + "grad_norm": 0.779769778251648, + "learning_rate": 1.1131149149285445e-05, + "loss": 2.0892, + "step": 14212 + }, + { + "epoch": 0.47, + "grad_norm": 0.7490190863609314, + "learning_rate": 1.113009307170132e-05, + "loss": 2.0157, + "step": 14213 + }, + { + "epoch": 0.47, + "grad_norm": 0.7290025949478149, + "learning_rate": 1.1129036981350068e-05, + "loss": 2.0793, + "step": 14214 + }, + { + "epoch": 0.47, + "grad_norm": 0.7280087471008301, + "learning_rate": 1.1127980878243612e-05, + "loss": 2.0995, + "step": 14215 + }, + { + "epoch": 0.47, + "grad_norm": 0.7295913100242615, + "learning_rate": 1.1126924762393894e-05, + "loss": 2.0829, + "step": 14216 + }, + { + "epoch": 0.47, + "grad_norm": 0.7469884753227234, + "learning_rate": 1.112586863381284e-05, + "loss": 2.1549, + "step": 14217 + }, + { + "epoch": 0.47, + "grad_norm": 0.7080041170120239, + "learning_rate": 1.112481249251238e-05, + "loss": 2.0709, + "step": 14218 + }, + { + "epoch": 0.47, + "grad_norm": 0.7416033148765564, + "learning_rate": 1.1123756338504447e-05, + "loss": 2.0324, + "step": 14219 + }, + { + "epoch": 0.47, + "grad_norm": 0.7677333950996399, + "learning_rate": 1.1122700171800975e-05, + "loss": 2.0431, + "step": 14220 + }, + { + "epoch": 0.47, + "grad_norm": 0.7225643396377563, + "learning_rate": 1.1121643992413896e-05, + "loss": 2.0588, + "step": 14221 + }, + { + "epoch": 0.47, + "grad_norm": 0.7581294178962708, + "learning_rate": 1.1120587800355138e-05, + "loss": 2.1167, + "step": 14222 + }, + { + "epoch": 0.47, + "grad_norm": 0.7548710703849792, + "learning_rate": 1.1119531595636634e-05, + "loss": 2.0471, + "step": 14223 + }, + { + "epoch": 0.47, + "grad_norm": 0.7461514472961426, + "learning_rate": 1.1118475378270322e-05, + "loss": 2.0981, + "step": 14224 + }, + { + "epoch": 0.47, + "grad_norm": 0.7237613797187805, + "learning_rate": 1.1117419148268132e-05, + "loss": 2.0392, + "step": 14225 + }, + { + "epoch": 0.47, + "grad_norm": 0.7099583148956299, + "learning_rate": 1.1116362905641988e-05, + "loss": 2.0895, + "step": 14226 + }, + { + "epoch": 0.47, + "grad_norm": 0.738778293132782, + "learning_rate": 1.1115306650403839e-05, + "loss": 2.1059, + "step": 14227 + }, + { + "epoch": 0.47, + "grad_norm": 0.7312893867492676, + "learning_rate": 1.1114250382565603e-05, + "loss": 2.0993, + "step": 14228 + }, + { + "epoch": 0.47, + "grad_norm": 0.748782753944397, + "learning_rate": 1.1113194102139221e-05, + "loss": 2.1583, + "step": 14229 + }, + { + "epoch": 0.47, + "grad_norm": 0.7461559176445007, + "learning_rate": 1.1112137809136625e-05, + "loss": 2.1165, + "step": 14230 + }, + { + "epoch": 0.47, + "grad_norm": 0.7195150852203369, + "learning_rate": 1.1111081503569748e-05, + "loss": 2.092, + "step": 14231 + }, + { + "epoch": 0.47, + "grad_norm": 0.7435267567634583, + "learning_rate": 1.1110025185450522e-05, + "loss": 2.1278, + "step": 14232 + }, + { + "epoch": 0.47, + "grad_norm": 0.7279480695724487, + "learning_rate": 1.1108968854790882e-05, + "loss": 2.0468, + "step": 14233 + }, + { + "epoch": 0.47, + "grad_norm": 0.727445125579834, + "learning_rate": 1.1107912511602762e-05, + "loss": 2.0994, + "step": 14234 + }, + { + "epoch": 0.47, + "grad_norm": 0.779285728931427, + "learning_rate": 1.1106856155898096e-05, + "loss": 2.1178, + "step": 14235 + }, + { + "epoch": 0.47, + "grad_norm": 0.7448298931121826, + "learning_rate": 1.110579978768882e-05, + "loss": 2.0478, + "step": 14236 + }, + { + "epoch": 0.47, + "grad_norm": 0.7249873876571655, + "learning_rate": 1.1104743406986863e-05, + "loss": 2.0938, + "step": 14237 + }, + { + "epoch": 0.47, + "grad_norm": 0.7359350919723511, + "learning_rate": 1.1103687013804165e-05, + "loss": 2.0596, + "step": 14238 + }, + { + "epoch": 0.47, + "grad_norm": 0.7265920042991638, + "learning_rate": 1.1102630608152656e-05, + "loss": 2.0913, + "step": 14239 + }, + { + "epoch": 0.47, + "grad_norm": 0.7173481583595276, + "learning_rate": 1.1101574190044274e-05, + "loss": 2.0293, + "step": 14240 + }, + { + "epoch": 0.47, + "grad_norm": 0.7362083792686462, + "learning_rate": 1.110051775949095e-05, + "loss": 2.0835, + "step": 14241 + }, + { + "epoch": 0.47, + "grad_norm": 0.7416704893112183, + "learning_rate": 1.1099461316504622e-05, + "loss": 2.0422, + "step": 14242 + }, + { + "epoch": 0.47, + "grad_norm": 0.729263424873352, + "learning_rate": 1.1098404861097224e-05, + "loss": 2.0986, + "step": 14243 + }, + { + "epoch": 0.47, + "grad_norm": 0.7417550683021545, + "learning_rate": 1.1097348393280693e-05, + "loss": 2.0579, + "step": 14244 + }, + { + "epoch": 0.47, + "grad_norm": 0.7671549320220947, + "learning_rate": 1.1096291913066963e-05, + "loss": 2.0377, + "step": 14245 + }, + { + "epoch": 0.47, + "grad_norm": 0.781024694442749, + "learning_rate": 1.1095235420467969e-05, + "loss": 2.0717, + "step": 14246 + }, + { + "epoch": 0.47, + "grad_norm": 0.7438501119613647, + "learning_rate": 1.109417891549565e-05, + "loss": 2.0973, + "step": 14247 + }, + { + "epoch": 0.47, + "grad_norm": 0.7039564251899719, + "learning_rate": 1.1093122398161936e-05, + "loss": 2.0365, + "step": 14248 + }, + { + "epoch": 0.47, + "grad_norm": 0.7803378105163574, + "learning_rate": 1.1092065868478765e-05, + "loss": 2.1019, + "step": 14249 + }, + { + "epoch": 0.47, + "grad_norm": 0.772740364074707, + "learning_rate": 1.1091009326458076e-05, + "loss": 2.0728, + "step": 14250 + }, + { + "epoch": 0.47, + "grad_norm": 0.7509992122650146, + "learning_rate": 1.10899527721118e-05, + "loss": 2.0917, + "step": 14251 + }, + { + "epoch": 0.47, + "grad_norm": 0.7196308374404907, + "learning_rate": 1.108889620545188e-05, + "loss": 2.0937, + "step": 14252 + }, + { + "epoch": 0.47, + "grad_norm": 0.7355828285217285, + "learning_rate": 1.1087839626490244e-05, + "loss": 2.1054, + "step": 14253 + }, + { + "epoch": 0.47, + "grad_norm": 0.7692351937294006, + "learning_rate": 1.1086783035238838e-05, + "loss": 2.0726, + "step": 14254 + }, + { + "epoch": 0.47, + "grad_norm": 0.7383742928504944, + "learning_rate": 1.1085726431709594e-05, + "loss": 2.1103, + "step": 14255 + }, + { + "epoch": 0.47, + "grad_norm": 0.7565069198608398, + "learning_rate": 1.1084669815914447e-05, + "loss": 2.0934, + "step": 14256 + }, + { + "epoch": 0.47, + "grad_norm": 0.7379568219184875, + "learning_rate": 1.1083613187865337e-05, + "loss": 2.0754, + "step": 14257 + }, + { + "epoch": 0.47, + "grad_norm": 0.7448661923408508, + "learning_rate": 1.10825565475742e-05, + "loss": 2.0382, + "step": 14258 + }, + { + "epoch": 0.47, + "grad_norm": 0.7181591391563416, + "learning_rate": 1.1081499895052972e-05, + "loss": 2.0113, + "step": 14259 + }, + { + "epoch": 0.47, + "grad_norm": 0.7368479371070862, + "learning_rate": 1.1080443230313595e-05, + "loss": 2.076, + "step": 14260 + }, + { + "epoch": 0.47, + "grad_norm": 0.7352670431137085, + "learning_rate": 1.1079386553368001e-05, + "loss": 2.1059, + "step": 14261 + }, + { + "epoch": 0.47, + "grad_norm": 0.7263641953468323, + "learning_rate": 1.1078329864228132e-05, + "loss": 1.9485, + "step": 14262 + }, + { + "epoch": 0.47, + "grad_norm": 0.7289211750030518, + "learning_rate": 1.1077273162905924e-05, + "loss": 2.1017, + "step": 14263 + }, + { + "epoch": 0.47, + "grad_norm": 0.7218477129936218, + "learning_rate": 1.1076216449413313e-05, + "loss": 2.1144, + "step": 14264 + }, + { + "epoch": 0.47, + "grad_norm": 0.72855144739151, + "learning_rate": 1.1075159723762243e-05, + "loss": 2.0301, + "step": 14265 + }, + { + "epoch": 0.47, + "grad_norm": 0.7453488707542419, + "learning_rate": 1.1074102985964645e-05, + "loss": 2.1004, + "step": 14266 + }, + { + "epoch": 0.47, + "grad_norm": 0.7261212468147278, + "learning_rate": 1.1073046236032463e-05, + "loss": 2.0811, + "step": 14267 + }, + { + "epoch": 0.47, + "grad_norm": 0.7659101486206055, + "learning_rate": 1.1071989473977633e-05, + "loss": 2.1092, + "step": 14268 + }, + { + "epoch": 0.47, + "grad_norm": 0.7164590358734131, + "learning_rate": 1.1070932699812094e-05, + "loss": 2.1276, + "step": 14269 + }, + { + "epoch": 0.47, + "grad_norm": 0.7432991862297058, + "learning_rate": 1.1069875913547785e-05, + "loss": 1.9513, + "step": 14270 + }, + { + "epoch": 0.47, + "grad_norm": 0.7367212772369385, + "learning_rate": 1.1068819115196647e-05, + "loss": 2.1209, + "step": 14271 + }, + { + "epoch": 0.47, + "grad_norm": 0.753699779510498, + "learning_rate": 1.1067762304770613e-05, + "loss": 2.0827, + "step": 14272 + }, + { + "epoch": 0.47, + "grad_norm": 0.7014368176460266, + "learning_rate": 1.106670548228163e-05, + "loss": 2.0153, + "step": 14273 + }, + { + "epoch": 0.47, + "grad_norm": 0.7370536923408508, + "learning_rate": 1.1065648647741634e-05, + "loss": 2.0971, + "step": 14274 + }, + { + "epoch": 0.47, + "grad_norm": 0.7757735252380371, + "learning_rate": 1.1064591801162558e-05, + "loss": 2.1051, + "step": 14275 + }, + { + "epoch": 0.47, + "grad_norm": 0.7629368901252747, + "learning_rate": 1.1063534942556356e-05, + "loss": 2.0872, + "step": 14276 + }, + { + "epoch": 0.47, + "grad_norm": 0.7460831999778748, + "learning_rate": 1.1062478071934955e-05, + "loss": 2.0856, + "step": 14277 + }, + { + "epoch": 0.48, + "grad_norm": 0.7188703417778015, + "learning_rate": 1.1061421189310298e-05, + "loss": 2.0073, + "step": 14278 + }, + { + "epoch": 0.48, + "grad_norm": 0.7412698268890381, + "learning_rate": 1.1060364294694328e-05, + "loss": 2.0473, + "step": 14279 + }, + { + "epoch": 0.48, + "grad_norm": 0.7346656918525696, + "learning_rate": 1.1059307388098984e-05, + "loss": 2.0386, + "step": 14280 + }, + { + "epoch": 0.48, + "grad_norm": 0.7299883961677551, + "learning_rate": 1.1058250469536204e-05, + "loss": 2.102, + "step": 14281 + }, + { + "epoch": 0.48, + "grad_norm": 0.7499776482582092, + "learning_rate": 1.1057193539017936e-05, + "loss": 2.0454, + "step": 14282 + }, + { + "epoch": 0.48, + "grad_norm": 0.7022654414176941, + "learning_rate": 1.1056136596556109e-05, + "loss": 2.1104, + "step": 14283 + }, + { + "epoch": 0.48, + "grad_norm": 0.7361116409301758, + "learning_rate": 1.1055079642162672e-05, + "loss": 2.0841, + "step": 14284 + }, + { + "epoch": 0.48, + "grad_norm": 0.727783739566803, + "learning_rate": 1.1054022675849565e-05, + "loss": 2.0352, + "step": 14285 + }, + { + "epoch": 0.48, + "grad_norm": 0.758216142654419, + "learning_rate": 1.1052965697628724e-05, + "loss": 2.1149, + "step": 14286 + }, + { + "epoch": 0.48, + "grad_norm": 0.6951219439506531, + "learning_rate": 1.1051908707512098e-05, + "loss": 2.0474, + "step": 14287 + }, + { + "epoch": 0.48, + "grad_norm": 0.718856692314148, + "learning_rate": 1.105085170551162e-05, + "loss": 2.0779, + "step": 14288 + }, + { + "epoch": 0.48, + "grad_norm": 0.723412036895752, + "learning_rate": 1.1049794691639238e-05, + "loss": 2.0386, + "step": 14289 + }, + { + "epoch": 0.48, + "grad_norm": 0.7316255569458008, + "learning_rate": 1.1048737665906887e-05, + "loss": 2.108, + "step": 14290 + }, + { + "epoch": 0.48, + "grad_norm": 0.7689874172210693, + "learning_rate": 1.1047680628326516e-05, + "loss": 2.1273, + "step": 14291 + }, + { + "epoch": 0.48, + "grad_norm": 0.7342373132705688, + "learning_rate": 1.1046623578910063e-05, + "loss": 2.1002, + "step": 14292 + }, + { + "epoch": 0.48, + "grad_norm": 0.7026596665382385, + "learning_rate": 1.104556651766947e-05, + "loss": 2.0714, + "step": 14293 + }, + { + "epoch": 0.48, + "grad_norm": 0.7184266448020935, + "learning_rate": 1.1044509444616678e-05, + "loss": 2.0685, + "step": 14294 + }, + { + "epoch": 0.48, + "grad_norm": 0.7516776919364929, + "learning_rate": 1.1043452359763631e-05, + "loss": 2.1051, + "step": 14295 + }, + { + "epoch": 0.48, + "grad_norm": 0.7233201265335083, + "learning_rate": 1.1042395263122272e-05, + "loss": 2.1263, + "step": 14296 + }, + { + "epoch": 0.48, + "grad_norm": 0.7389164566993713, + "learning_rate": 1.104133815470454e-05, + "loss": 2.0444, + "step": 14297 + }, + { + "epoch": 0.48, + "grad_norm": 0.7377763986587524, + "learning_rate": 1.1040281034522383e-05, + "loss": 2.0929, + "step": 14298 + }, + { + "epoch": 0.48, + "grad_norm": 0.7246180772781372, + "learning_rate": 1.1039223902587736e-05, + "loss": 2.0888, + "step": 14299 + }, + { + "epoch": 0.48, + "grad_norm": 0.7131818532943726, + "learning_rate": 1.1038166758912551e-05, + "loss": 2.0968, + "step": 14300 + }, + { + "epoch": 0.48, + "grad_norm": 0.7286971807479858, + "learning_rate": 1.1037109603508764e-05, + "loss": 2.1462, + "step": 14301 + }, + { + "epoch": 0.48, + "grad_norm": 0.7298083901405334, + "learning_rate": 1.103605243638832e-05, + "loss": 2.0474, + "step": 14302 + }, + { + "epoch": 0.48, + "grad_norm": 0.7260264158248901, + "learning_rate": 1.1034995257563163e-05, + "loss": 2.0769, + "step": 14303 + }, + { + "epoch": 0.48, + "grad_norm": 0.7699926495552063, + "learning_rate": 1.103393806704524e-05, + "loss": 2.1027, + "step": 14304 + }, + { + "epoch": 0.48, + "grad_norm": 0.7546269297599792, + "learning_rate": 1.1032880864846485e-05, + "loss": 2.107, + "step": 14305 + }, + { + "epoch": 0.48, + "grad_norm": 0.7314380407333374, + "learning_rate": 1.103182365097885e-05, + "loss": 2.0534, + "step": 14306 + }, + { + "epoch": 0.48, + "grad_norm": 0.7109289765357971, + "learning_rate": 1.1030766425454278e-05, + "loss": 2.1013, + "step": 14307 + }, + { + "epoch": 0.48, + "grad_norm": 0.7165745496749878, + "learning_rate": 1.1029709188284706e-05, + "loss": 2.0598, + "step": 14308 + }, + { + "epoch": 0.48, + "grad_norm": 0.7367250323295593, + "learning_rate": 1.1028651939482086e-05, + "loss": 2.0832, + "step": 14309 + }, + { + "epoch": 0.48, + "grad_norm": 0.7513577342033386, + "learning_rate": 1.102759467905836e-05, + "loss": 2.0393, + "step": 14310 + }, + { + "epoch": 0.48, + "grad_norm": 0.7990437150001526, + "learning_rate": 1.102653740702547e-05, + "loss": 2.087, + "step": 14311 + }, + { + "epoch": 0.48, + "grad_norm": 0.7362295389175415, + "learning_rate": 1.1025480123395362e-05, + "loss": 2.0531, + "step": 14312 + }, + { + "epoch": 0.48, + "grad_norm": 0.7576819062232971, + "learning_rate": 1.1024422828179984e-05, + "loss": 2.0929, + "step": 14313 + }, + { + "epoch": 0.48, + "grad_norm": 0.7159005999565125, + "learning_rate": 1.1023365521391274e-05, + "loss": 2.0763, + "step": 14314 + }, + { + "epoch": 0.48, + "grad_norm": 0.71181720495224, + "learning_rate": 1.1022308203041178e-05, + "loss": 2.0437, + "step": 14315 + }, + { + "epoch": 0.48, + "grad_norm": 0.7493017315864563, + "learning_rate": 1.1021250873141647e-05, + "loss": 2.0566, + "step": 14316 + }, + { + "epoch": 0.48, + "grad_norm": 0.704941987991333, + "learning_rate": 1.102019353170462e-05, + "loss": 2.1047, + "step": 14317 + }, + { + "epoch": 0.48, + "grad_norm": 0.7419793009757996, + "learning_rate": 1.1019136178742047e-05, + "loss": 2.1184, + "step": 14318 + }, + { + "epoch": 0.48, + "grad_norm": 0.7151005268096924, + "learning_rate": 1.1018078814265868e-05, + "loss": 2.0089, + "step": 14319 + }, + { + "epoch": 0.48, + "grad_norm": 0.7453631162643433, + "learning_rate": 1.1017021438288034e-05, + "loss": 2.0218, + "step": 14320 + }, + { + "epoch": 0.48, + "grad_norm": 0.7348095774650574, + "learning_rate": 1.1015964050820485e-05, + "loss": 2.1215, + "step": 14321 + }, + { + "epoch": 0.48, + "grad_norm": 0.7228155732154846, + "learning_rate": 1.101490665187517e-05, + "loss": 2.0924, + "step": 14322 + }, + { + "epoch": 0.48, + "grad_norm": 0.7173928618431091, + "learning_rate": 1.1013849241464035e-05, + "loss": 2.0314, + "step": 14323 + }, + { + "epoch": 0.48, + "grad_norm": 0.7199174165725708, + "learning_rate": 1.1012791819599025e-05, + "loss": 2.0794, + "step": 14324 + }, + { + "epoch": 0.48, + "grad_norm": 0.7311334013938904, + "learning_rate": 1.1011734386292087e-05, + "loss": 2.0624, + "step": 14325 + }, + { + "epoch": 0.48, + "grad_norm": 0.749911367893219, + "learning_rate": 1.1010676941555167e-05, + "loss": 2.0884, + "step": 14326 + }, + { + "epoch": 0.48, + "grad_norm": 0.7528603076934814, + "learning_rate": 1.1009619485400209e-05, + "loss": 2.1063, + "step": 14327 + }, + { + "epoch": 0.48, + "grad_norm": 0.7235192060470581, + "learning_rate": 1.1008562017839164e-05, + "loss": 2.0645, + "step": 14328 + }, + { + "epoch": 0.48, + "grad_norm": 0.7137499451637268, + "learning_rate": 1.1007504538883979e-05, + "loss": 2.0096, + "step": 14329 + }, + { + "epoch": 0.48, + "grad_norm": 0.7254073023796082, + "learning_rate": 1.1006447048546594e-05, + "loss": 2.0687, + "step": 14330 + }, + { + "epoch": 0.48, + "grad_norm": 0.7711024284362793, + "learning_rate": 1.1005389546838963e-05, + "loss": 2.0943, + "step": 14331 + }, + { + "epoch": 0.48, + "grad_norm": 0.751362681388855, + "learning_rate": 1.1004332033773028e-05, + "loss": 2.0817, + "step": 14332 + }, + { + "epoch": 0.48, + "grad_norm": 0.7168970108032227, + "learning_rate": 1.1003274509360738e-05, + "loss": 2.0688, + "step": 14333 + }, + { + "epoch": 0.48, + "grad_norm": 0.7737463712692261, + "learning_rate": 1.1002216973614042e-05, + "loss": 2.1569, + "step": 14334 + }, + { + "epoch": 0.48, + "grad_norm": 0.7361810803413391, + "learning_rate": 1.1001159426544886e-05, + "loss": 2.0896, + "step": 14335 + }, + { + "epoch": 0.48, + "grad_norm": 0.7197995185852051, + "learning_rate": 1.1000101868165216e-05, + "loss": 2.1391, + "step": 14336 + }, + { + "epoch": 0.48, + "grad_norm": 0.751809298992157, + "learning_rate": 1.099904429848698e-05, + "loss": 2.1495, + "step": 14337 + }, + { + "epoch": 0.48, + "grad_norm": 0.7163493037223816, + "learning_rate": 1.0997986717522128e-05, + "loss": 2.0216, + "step": 14338 + }, + { + "epoch": 0.48, + "grad_norm": 0.7354874610900879, + "learning_rate": 1.099692912528261e-05, + "loss": 1.9987, + "step": 14339 + }, + { + "epoch": 0.48, + "grad_norm": 0.7600633502006531, + "learning_rate": 1.0995871521780371e-05, + "loss": 2.0085, + "step": 14340 + }, + { + "epoch": 0.48, + "grad_norm": 0.7319091558456421, + "learning_rate": 1.0994813907027355e-05, + "loss": 2.0032, + "step": 14341 + }, + { + "epoch": 0.48, + "grad_norm": 0.7334975004196167, + "learning_rate": 1.099375628103552e-05, + "loss": 1.9702, + "step": 14342 + }, + { + "epoch": 0.48, + "grad_norm": 0.7139559388160706, + "learning_rate": 1.0992698643816804e-05, + "loss": 2.005, + "step": 14343 + }, + { + "epoch": 0.48, + "grad_norm": 0.7769381999969482, + "learning_rate": 1.0991640995383161e-05, + "loss": 2.1367, + "step": 14344 + }, + { + "epoch": 0.48, + "grad_norm": 0.716027557849884, + "learning_rate": 1.0990583335746541e-05, + "loss": 2.0542, + "step": 14345 + }, + { + "epoch": 0.48, + "grad_norm": 0.7761984467506409, + "learning_rate": 1.098952566491889e-05, + "loss": 2.0923, + "step": 14346 + }, + { + "epoch": 0.48, + "grad_norm": 0.739981472492218, + "learning_rate": 1.0988467982912158e-05, + "loss": 2.0699, + "step": 14347 + }, + { + "epoch": 0.48, + "grad_norm": 0.7462652325630188, + "learning_rate": 1.0987410289738294e-05, + "loss": 2.0871, + "step": 14348 + }, + { + "epoch": 0.48, + "grad_norm": 0.7528339624404907, + "learning_rate": 1.0986352585409248e-05, + "loss": 2.0772, + "step": 14349 + }, + { + "epoch": 0.48, + "grad_norm": 0.7492295503616333, + "learning_rate": 1.0985294869936969e-05, + "loss": 2.0549, + "step": 14350 + }, + { + "epoch": 0.48, + "grad_norm": 0.7841691374778748, + "learning_rate": 1.0984237143333408e-05, + "loss": 2.1091, + "step": 14351 + }, + { + "epoch": 0.48, + "grad_norm": 0.7653245329856873, + "learning_rate": 1.0983179405610506e-05, + "loss": 2.1358, + "step": 14352 + }, + { + "epoch": 0.48, + "grad_norm": 0.7500335574150085, + "learning_rate": 1.0982121656780225e-05, + "loss": 2.1296, + "step": 14353 + }, + { + "epoch": 0.48, + "grad_norm": 0.7445665597915649, + "learning_rate": 1.0981063896854505e-05, + "loss": 1.9956, + "step": 14354 + }, + { + "epoch": 0.48, + "grad_norm": 0.7375607490539551, + "learning_rate": 1.0980006125845304e-05, + "loss": 2.1364, + "step": 14355 + }, + { + "epoch": 0.48, + "grad_norm": 0.7358099818229675, + "learning_rate": 1.0978948343764566e-05, + "loss": 2.0695, + "step": 14356 + }, + { + "epoch": 0.48, + "grad_norm": 0.7630209922790527, + "learning_rate": 1.0977890550624241e-05, + "loss": 2.1038, + "step": 14357 + }, + { + "epoch": 0.48, + "grad_norm": 0.7237311005592346, + "learning_rate": 1.0976832746436286e-05, + "loss": 2.0676, + "step": 14358 + }, + { + "epoch": 0.48, + "grad_norm": 0.7421792149543762, + "learning_rate": 1.0975774931212647e-05, + "loss": 2.0875, + "step": 14359 + }, + { + "epoch": 0.48, + "grad_norm": 0.7438628673553467, + "learning_rate": 1.0974717104965271e-05, + "loss": 2.1439, + "step": 14360 + }, + { + "epoch": 0.48, + "grad_norm": 0.7470522522926331, + "learning_rate": 1.0973659267706114e-05, + "loss": 2.1254, + "step": 14361 + }, + { + "epoch": 0.48, + "grad_norm": 0.7259917259216309, + "learning_rate": 1.0972601419447126e-05, + "loss": 2.049, + "step": 14362 + }, + { + "epoch": 0.48, + "grad_norm": 0.7716514468193054, + "learning_rate": 1.0971543560200257e-05, + "loss": 2.0957, + "step": 14363 + }, + { + "epoch": 0.48, + "grad_norm": 0.7350912690162659, + "learning_rate": 1.0970485689977459e-05, + "loss": 2.0722, + "step": 14364 + }, + { + "epoch": 0.48, + "grad_norm": 0.7682210206985474, + "learning_rate": 1.096942780879068e-05, + "loss": 2.1348, + "step": 14365 + }, + { + "epoch": 0.48, + "grad_norm": 0.744215726852417, + "learning_rate": 1.0968369916651878e-05, + "loss": 2.0874, + "step": 14366 + }, + { + "epoch": 0.48, + "grad_norm": 0.7271391749382019, + "learning_rate": 1.0967312013572997e-05, + "loss": 1.9712, + "step": 14367 + }, + { + "epoch": 0.48, + "grad_norm": 0.7200261950492859, + "learning_rate": 1.096625409956599e-05, + "loss": 2.1073, + "step": 14368 + }, + { + "epoch": 0.48, + "grad_norm": 0.77424556016922, + "learning_rate": 1.0965196174642814e-05, + "loss": 2.0051, + "step": 14369 + }, + { + "epoch": 0.48, + "grad_norm": 0.7318762540817261, + "learning_rate": 1.0964138238815416e-05, + "loss": 2.0531, + "step": 14370 + }, + { + "epoch": 0.48, + "grad_norm": 0.730255663394928, + "learning_rate": 1.096308029209575e-05, + "loss": 2.1758, + "step": 14371 + }, + { + "epoch": 0.48, + "grad_norm": 0.7182950377464294, + "learning_rate": 1.0962022334495765e-05, + "loss": 2.0916, + "step": 14372 + }, + { + "epoch": 0.48, + "grad_norm": 0.7538824677467346, + "learning_rate": 1.0960964366027418e-05, + "loss": 2.1205, + "step": 14373 + }, + { + "epoch": 0.48, + "grad_norm": 0.7706205248832703, + "learning_rate": 1.0959906386702656e-05, + "loss": 2.0821, + "step": 14374 + }, + { + "epoch": 0.48, + "grad_norm": 0.7516716718673706, + "learning_rate": 1.0958848396533438e-05, + "loss": 2.0682, + "step": 14375 + }, + { + "epoch": 0.48, + "grad_norm": 0.6984663009643555, + "learning_rate": 1.095779039553171e-05, + "loss": 2.0458, + "step": 14376 + }, + { + "epoch": 0.48, + "grad_norm": 0.7279420495033264, + "learning_rate": 1.0956732383709429e-05, + "loss": 2.1155, + "step": 14377 + }, + { + "epoch": 0.48, + "grad_norm": 0.7458705306053162, + "learning_rate": 1.0955674361078546e-05, + "loss": 1.9973, + "step": 14378 + }, + { + "epoch": 0.48, + "grad_norm": 0.7171550393104553, + "learning_rate": 1.0954616327651011e-05, + "loss": 2.0765, + "step": 14379 + }, + { + "epoch": 0.48, + "grad_norm": 0.7854146361351013, + "learning_rate": 1.0953558283438783e-05, + "loss": 2.1844, + "step": 14380 + }, + { + "epoch": 0.48, + "grad_norm": 0.7266553640365601, + "learning_rate": 1.0952500228453812e-05, + "loss": 2.1075, + "step": 14381 + }, + { + "epoch": 0.48, + "grad_norm": 0.7142643332481384, + "learning_rate": 1.0951442162708053e-05, + "loss": 2.0437, + "step": 14382 + }, + { + "epoch": 0.48, + "grad_norm": 0.7138341069221497, + "learning_rate": 1.0950384086213455e-05, + "loss": 2.116, + "step": 14383 + }, + { + "epoch": 0.48, + "grad_norm": 0.7354432940483093, + "learning_rate": 1.0949325998981976e-05, + "loss": 2.0782, + "step": 14384 + }, + { + "epoch": 0.48, + "grad_norm": 0.7139890789985657, + "learning_rate": 1.094826790102557e-05, + "loss": 2.0422, + "step": 14385 + }, + { + "epoch": 0.48, + "grad_norm": 0.7205191850662231, + "learning_rate": 1.0947209792356188e-05, + "loss": 2.0512, + "step": 14386 + }, + { + "epoch": 0.48, + "grad_norm": 0.7609856128692627, + "learning_rate": 1.0946151672985782e-05, + "loss": 2.0829, + "step": 14387 + }, + { + "epoch": 0.48, + "grad_norm": 0.7177435755729675, + "learning_rate": 1.0945093542926312e-05, + "loss": 2.1275, + "step": 14388 + }, + { + "epoch": 0.48, + "grad_norm": 0.7493702173233032, + "learning_rate": 1.094403540218973e-05, + "loss": 2.0845, + "step": 14389 + }, + { + "epoch": 0.48, + "grad_norm": 0.7312853336334229, + "learning_rate": 1.0942977250787986e-05, + "loss": 2.0646, + "step": 14390 + }, + { + "epoch": 0.48, + "grad_norm": 0.793751060962677, + "learning_rate": 1.094191908873304e-05, + "loss": 2.088, + "step": 14391 + }, + { + "epoch": 0.48, + "grad_norm": 0.7416241765022278, + "learning_rate": 1.0940860916036843e-05, + "loss": 2.0813, + "step": 14392 + }, + { + "epoch": 0.48, + "grad_norm": 0.7198772430419922, + "learning_rate": 1.093980273271135e-05, + "loss": 2.101, + "step": 14393 + }, + { + "epoch": 0.48, + "grad_norm": 0.7386717200279236, + "learning_rate": 1.093874453876852e-05, + "loss": 2.124, + "step": 14394 + }, + { + "epoch": 0.48, + "grad_norm": 0.771170973777771, + "learning_rate": 1.09376863342203e-05, + "loss": 2.0349, + "step": 14395 + }, + { + "epoch": 0.48, + "grad_norm": 0.7241373658180237, + "learning_rate": 1.0936628119078651e-05, + "loss": 2.0342, + "step": 14396 + }, + { + "epoch": 0.48, + "grad_norm": 0.7185758352279663, + "learning_rate": 1.0935569893355532e-05, + "loss": 2.0642, + "step": 14397 + }, + { + "epoch": 0.48, + "grad_norm": 0.7752938270568848, + "learning_rate": 1.0934511657062883e-05, + "loss": 2.0667, + "step": 14398 + }, + { + "epoch": 0.48, + "grad_norm": 0.7349656224250793, + "learning_rate": 1.0933453410212674e-05, + "loss": 2.0726, + "step": 14399 + }, + { + "epoch": 0.48, + "grad_norm": 0.7349641919136047, + "learning_rate": 1.0932395152816856e-05, + "loss": 2.079, + "step": 14400 + }, + { + "epoch": 0.48, + "grad_norm": 0.7453962564468384, + "learning_rate": 1.093133688488738e-05, + "loss": 2.1127, + "step": 14401 + }, + { + "epoch": 0.48, + "grad_norm": 0.7331302165985107, + "learning_rate": 1.093027860643621e-05, + "loss": 2.092, + "step": 14402 + }, + { + "epoch": 0.48, + "grad_norm": 0.7489010095596313, + "learning_rate": 1.0929220317475298e-05, + "loss": 2.1371, + "step": 14403 + }, + { + "epoch": 0.48, + "grad_norm": 0.7274237275123596, + "learning_rate": 1.0928162018016597e-05, + "loss": 2.0997, + "step": 14404 + }, + { + "epoch": 0.48, + "grad_norm": 0.7037672996520996, + "learning_rate": 1.0927103708072065e-05, + "loss": 2.071, + "step": 14405 + }, + { + "epoch": 0.48, + "grad_norm": 0.7324825525283813, + "learning_rate": 1.092604538765366e-05, + "loss": 2.0293, + "step": 14406 + }, + { + "epoch": 0.48, + "grad_norm": 0.7871751189231873, + "learning_rate": 1.0924987056773334e-05, + "loss": 2.0793, + "step": 14407 + }, + { + "epoch": 0.48, + "grad_norm": 0.7060885429382324, + "learning_rate": 1.0923928715443052e-05, + "loss": 2.0876, + "step": 14408 + }, + { + "epoch": 0.48, + "grad_norm": 0.7304544448852539, + "learning_rate": 1.092287036367476e-05, + "loss": 2.0895, + "step": 14409 + }, + { + "epoch": 0.48, + "grad_norm": 0.7364305853843689, + "learning_rate": 1.0921812001480421e-05, + "loss": 2.0912, + "step": 14410 + }, + { + "epoch": 0.48, + "grad_norm": 0.7127228379249573, + "learning_rate": 1.0920753628871992e-05, + "loss": 2.022, + "step": 14411 + }, + { + "epoch": 0.48, + "grad_norm": 0.7321669459342957, + "learning_rate": 1.0919695245861426e-05, + "loss": 2.1161, + "step": 14412 + }, + { + "epoch": 0.48, + "grad_norm": 0.7178621292114258, + "learning_rate": 1.0918636852460685e-05, + "loss": 2.0904, + "step": 14413 + }, + { + "epoch": 0.48, + "grad_norm": 0.7229382395744324, + "learning_rate": 1.0917578448681718e-05, + "loss": 2.0731, + "step": 14414 + }, + { + "epoch": 0.48, + "grad_norm": 0.739997386932373, + "learning_rate": 1.091652003453649e-05, + "loss": 2.0058, + "step": 14415 + }, + { + "epoch": 0.48, + "grad_norm": 0.7560994029045105, + "learning_rate": 1.0915461610036956e-05, + "loss": 2.0961, + "step": 14416 + }, + { + "epoch": 0.48, + "grad_norm": 0.7425842881202698, + "learning_rate": 1.0914403175195074e-05, + "loss": 2.0943, + "step": 14417 + }, + { + "epoch": 0.48, + "grad_norm": 0.7661288380622864, + "learning_rate": 1.0913344730022801e-05, + "loss": 2.1376, + "step": 14418 + }, + { + "epoch": 0.48, + "grad_norm": 0.7817131876945496, + "learning_rate": 1.0912286274532093e-05, + "loss": 2.1068, + "step": 14419 + }, + { + "epoch": 0.48, + "grad_norm": 0.733586847782135, + "learning_rate": 1.091122780873491e-05, + "loss": 2.0932, + "step": 14420 + }, + { + "epoch": 0.48, + "grad_norm": 0.7024655938148499, + "learning_rate": 1.0910169332643207e-05, + "loss": 2.0376, + "step": 14421 + }, + { + "epoch": 0.48, + "grad_norm": 0.7409464716911316, + "learning_rate": 1.0909110846268952e-05, + "loss": 2.139, + "step": 14422 + }, + { + "epoch": 0.48, + "grad_norm": 0.7436284422874451, + "learning_rate": 1.0908052349624086e-05, + "loss": 2.0371, + "step": 14423 + }, + { + "epoch": 0.48, + "grad_norm": 0.7409657835960388, + "learning_rate": 1.0906993842720584e-05, + "loss": 2.0584, + "step": 14424 + }, + { + "epoch": 0.48, + "grad_norm": 0.7211195230484009, + "learning_rate": 1.0905935325570392e-05, + "loss": 2.1267, + "step": 14425 + }, + { + "epoch": 0.48, + "grad_norm": 0.7863349914550781, + "learning_rate": 1.0904876798185476e-05, + "loss": 2.1386, + "step": 14426 + }, + { + "epoch": 0.48, + "grad_norm": 0.735988974571228, + "learning_rate": 1.0903818260577792e-05, + "loss": 2.0587, + "step": 14427 + }, + { + "epoch": 0.48, + "grad_norm": 0.7491430044174194, + "learning_rate": 1.0902759712759297e-05, + "loss": 2.0627, + "step": 14428 + }, + { + "epoch": 0.48, + "grad_norm": 0.7436647415161133, + "learning_rate": 1.0901701154741952e-05, + "loss": 2.102, + "step": 14429 + }, + { + "epoch": 0.48, + "grad_norm": 0.7171557545661926, + "learning_rate": 1.0900642586537719e-05, + "loss": 2.1349, + "step": 14430 + }, + { + "epoch": 0.48, + "grad_norm": 0.7371664643287659, + "learning_rate": 1.0899584008158553e-05, + "loss": 2.0691, + "step": 14431 + }, + { + "epoch": 0.48, + "grad_norm": 0.7271640300750732, + "learning_rate": 1.0898525419616413e-05, + "loss": 2.0522, + "step": 14432 + }, + { + "epoch": 0.48, + "grad_norm": 0.7158271670341492, + "learning_rate": 1.0897466820923262e-05, + "loss": 2.029, + "step": 14433 + }, + { + "epoch": 0.48, + "grad_norm": 0.7423985600471497, + "learning_rate": 1.089640821209105e-05, + "loss": 2.1821, + "step": 14434 + }, + { + "epoch": 0.48, + "grad_norm": 0.7722052931785583, + "learning_rate": 1.089534959313175e-05, + "loss": 2.0516, + "step": 14435 + }, + { + "epoch": 0.48, + "grad_norm": 0.7033466696739197, + "learning_rate": 1.0894290964057313e-05, + "loss": 2.047, + "step": 14436 + }, + { + "epoch": 0.48, + "grad_norm": 0.7261670827865601, + "learning_rate": 1.08932323248797e-05, + "loss": 2.0294, + "step": 14437 + }, + { + "epoch": 0.48, + "grad_norm": 0.7561299800872803, + "learning_rate": 1.0892173675610873e-05, + "loss": 2.0643, + "step": 14438 + }, + { + "epoch": 0.48, + "grad_norm": 0.7200880646705627, + "learning_rate": 1.089111501626279e-05, + "loss": 1.9993, + "step": 14439 + }, + { + "epoch": 0.48, + "grad_norm": 0.7535219192504883, + "learning_rate": 1.089005634684741e-05, + "loss": 2.0563, + "step": 14440 + }, + { + "epoch": 0.48, + "grad_norm": 0.716543436050415, + "learning_rate": 1.0888997667376697e-05, + "loss": 2.1138, + "step": 14441 + }, + { + "epoch": 0.48, + "grad_norm": 0.7296412587165833, + "learning_rate": 1.088793897786261e-05, + "loss": 2.077, + "step": 14442 + }, + { + "epoch": 0.48, + "grad_norm": 0.7608610391616821, + "learning_rate": 1.088688027831711e-05, + "loss": 2.0565, + "step": 14443 + }, + { + "epoch": 0.48, + "grad_norm": 0.7232423424720764, + "learning_rate": 1.0885821568752155e-05, + "loss": 2.0926, + "step": 14444 + }, + { + "epoch": 0.48, + "grad_norm": 0.7583892345428467, + "learning_rate": 1.0884762849179704e-05, + "loss": 2.1186, + "step": 14445 + }, + { + "epoch": 0.48, + "grad_norm": 0.7035171389579773, + "learning_rate": 1.0883704119611725e-05, + "loss": 2.0165, + "step": 14446 + }, + { + "epoch": 0.48, + "grad_norm": 0.7235673069953918, + "learning_rate": 1.0882645380060173e-05, + "loss": 2.1529, + "step": 14447 + }, + { + "epoch": 0.48, + "grad_norm": 0.7160201072692871, + "learning_rate": 1.088158663053701e-05, + "loss": 2.0459, + "step": 14448 + }, + { + "epoch": 0.48, + "grad_norm": 0.7110678553581238, + "learning_rate": 1.0880527871054198e-05, + "loss": 2.0787, + "step": 14449 + }, + { + "epoch": 0.48, + "grad_norm": 0.746077299118042, + "learning_rate": 1.0879469101623701e-05, + "loss": 2.1206, + "step": 14450 + }, + { + "epoch": 0.48, + "grad_norm": 0.7280842661857605, + "learning_rate": 1.0878410322257472e-05, + "loss": 2.0939, + "step": 14451 + }, + { + "epoch": 0.48, + "grad_norm": 0.7563570141792297, + "learning_rate": 1.0877351532967484e-05, + "loss": 2.1091, + "step": 14452 + }, + { + "epoch": 0.48, + "grad_norm": 0.7215373516082764, + "learning_rate": 1.087629273376569e-05, + "loss": 2.0472, + "step": 14453 + }, + { + "epoch": 0.48, + "grad_norm": 0.7527751326560974, + "learning_rate": 1.0875233924664053e-05, + "loss": 2.1465, + "step": 14454 + }, + { + "epoch": 0.48, + "grad_norm": 0.7293099761009216, + "learning_rate": 1.0874175105674536e-05, + "loss": 2.1375, + "step": 14455 + }, + { + "epoch": 0.48, + "grad_norm": 0.7809600234031677, + "learning_rate": 1.0873116276809103e-05, + "loss": 2.0034, + "step": 14456 + }, + { + "epoch": 0.48, + "grad_norm": 0.7228170037269592, + "learning_rate": 1.0872057438079716e-05, + "loss": 2.0187, + "step": 14457 + }, + { + "epoch": 0.48, + "grad_norm": 0.7572272419929504, + "learning_rate": 1.0870998589498329e-05, + "loss": 2.0769, + "step": 14458 + }, + { + "epoch": 0.48, + "grad_norm": 0.7572263479232788, + "learning_rate": 1.0869939731076916e-05, + "loss": 2.1412, + "step": 14459 + }, + { + "epoch": 0.48, + "grad_norm": 0.7432723641395569, + "learning_rate": 1.086888086282743e-05, + "loss": 2.0379, + "step": 14460 + }, + { + "epoch": 0.48, + "grad_norm": 0.7562572956085205, + "learning_rate": 1.0867821984761836e-05, + "loss": 2.0951, + "step": 14461 + }, + { + "epoch": 0.48, + "grad_norm": 0.7595495581626892, + "learning_rate": 1.0866763096892103e-05, + "loss": 2.063, + "step": 14462 + }, + { + "epoch": 0.48, + "grad_norm": 0.7275890111923218, + "learning_rate": 1.0865704199230187e-05, + "loss": 2.0746, + "step": 14463 + }, + { + "epoch": 0.48, + "grad_norm": 0.7534134984016418, + "learning_rate": 1.0864645291788052e-05, + "loss": 2.0974, + "step": 14464 + }, + { + "epoch": 0.48, + "grad_norm": 0.724084198474884, + "learning_rate": 1.086358637457766e-05, + "loss": 2.1156, + "step": 14465 + }, + { + "epoch": 0.48, + "grad_norm": 0.7541021704673767, + "learning_rate": 1.0862527447610975e-05, + "loss": 2.0722, + "step": 14466 + }, + { + "epoch": 0.48, + "grad_norm": 0.753352165222168, + "learning_rate": 1.0861468510899962e-05, + "loss": 2.1408, + "step": 14467 + }, + { + "epoch": 0.48, + "grad_norm": 0.7354586124420166, + "learning_rate": 1.0860409564456584e-05, + "loss": 2.064, + "step": 14468 + }, + { + "epoch": 0.48, + "grad_norm": 0.7099581956863403, + "learning_rate": 1.0859350608292797e-05, + "loss": 1.9769, + "step": 14469 + }, + { + "epoch": 0.48, + "grad_norm": 0.7177425622940063, + "learning_rate": 1.0858291642420578e-05, + "loss": 2.113, + "step": 14470 + }, + { + "epoch": 0.48, + "grad_norm": 0.7127478122711182, + "learning_rate": 1.0857232666851878e-05, + "loss": 2.0088, + "step": 14471 + }, + { + "epoch": 0.48, + "grad_norm": 0.7607955932617188, + "learning_rate": 1.0856173681598665e-05, + "loss": 2.0841, + "step": 14472 + }, + { + "epoch": 0.48, + "grad_norm": 0.7667393684387207, + "learning_rate": 1.0855114686672907e-05, + "loss": 2.0871, + "step": 14473 + }, + { + "epoch": 0.48, + "grad_norm": 0.7473248839378357, + "learning_rate": 1.0854055682086564e-05, + "loss": 2.1569, + "step": 14474 + }, + { + "epoch": 0.48, + "grad_norm": 0.7220759987831116, + "learning_rate": 1.08529966678516e-05, + "loss": 2.144, + "step": 14475 + }, + { + "epoch": 0.48, + "grad_norm": 0.7408268451690674, + "learning_rate": 1.0851937643979978e-05, + "loss": 2.1088, + "step": 14476 + }, + { + "epoch": 0.48, + "grad_norm": 0.7305794954299927, + "learning_rate": 1.0850878610483663e-05, + "loss": 2.0707, + "step": 14477 + }, + { + "epoch": 0.48, + "grad_norm": 0.7127835750579834, + "learning_rate": 1.0849819567374623e-05, + "loss": 1.9797, + "step": 14478 + }, + { + "epoch": 0.48, + "grad_norm": 0.7302347421646118, + "learning_rate": 1.084876051466482e-05, + "loss": 2.0681, + "step": 14479 + }, + { + "epoch": 0.48, + "grad_norm": 0.7055133581161499, + "learning_rate": 1.0847701452366215e-05, + "loss": 2.0967, + "step": 14480 + }, + { + "epoch": 0.48, + "grad_norm": 0.7589823603630066, + "learning_rate": 1.084664238049078e-05, + "loss": 2.0769, + "step": 14481 + }, + { + "epoch": 0.48, + "grad_norm": 0.7595403790473938, + "learning_rate": 1.0845583299050471e-05, + "loss": 2.0888, + "step": 14482 + }, + { + "epoch": 0.48, + "grad_norm": 0.7433275580406189, + "learning_rate": 1.0844524208057257e-05, + "loss": 2.0595, + "step": 14483 + }, + { + "epoch": 0.48, + "grad_norm": 0.7364020943641663, + "learning_rate": 1.0843465107523107e-05, + "loss": 2.1058, + "step": 14484 + }, + { + "epoch": 0.48, + "grad_norm": 0.7449548840522766, + "learning_rate": 1.0842405997459981e-05, + "loss": 2.1129, + "step": 14485 + }, + { + "epoch": 0.48, + "grad_norm": 0.7430303692817688, + "learning_rate": 1.0841346877879847e-05, + "loss": 2.0789, + "step": 14486 + }, + { + "epoch": 0.48, + "grad_norm": 0.7459086179733276, + "learning_rate": 1.0840287748794669e-05, + "loss": 2.0754, + "step": 14487 + }, + { + "epoch": 0.48, + "grad_norm": 0.7378416657447815, + "learning_rate": 1.083922861021641e-05, + "loss": 2.108, + "step": 14488 + }, + { + "epoch": 0.48, + "grad_norm": 0.7368146777153015, + "learning_rate": 1.083816946215704e-05, + "loss": 2.1181, + "step": 14489 + }, + { + "epoch": 0.48, + "grad_norm": 0.745969831943512, + "learning_rate": 1.0837110304628524e-05, + "loss": 2.077, + "step": 14490 + }, + { + "epoch": 0.48, + "grad_norm": 0.7739499807357788, + "learning_rate": 1.0836051137642822e-05, + "loss": 2.1226, + "step": 14491 + }, + { + "epoch": 0.48, + "grad_norm": 0.7658153772354126, + "learning_rate": 1.0834991961211907e-05, + "loss": 2.0104, + "step": 14492 + }, + { + "epoch": 0.48, + "grad_norm": 0.7268418669700623, + "learning_rate": 1.0833932775347745e-05, + "loss": 2.0734, + "step": 14493 + }, + { + "epoch": 0.48, + "grad_norm": 0.7200512886047363, + "learning_rate": 1.0832873580062293e-05, + "loss": 1.987, + "step": 14494 + }, + { + "epoch": 0.48, + "grad_norm": 0.7354490756988525, + "learning_rate": 1.0831814375367528e-05, + "loss": 2.0619, + "step": 14495 + }, + { + "epoch": 0.48, + "grad_norm": 0.7385833859443665, + "learning_rate": 1.083075516127541e-05, + "loss": 2.032, + "step": 14496 + }, + { + "epoch": 0.48, + "grad_norm": 0.7234517931938171, + "learning_rate": 1.0829695937797908e-05, + "loss": 2.0088, + "step": 14497 + }, + { + "epoch": 0.48, + "grad_norm": 0.7308991551399231, + "learning_rate": 1.0828636704946987e-05, + "loss": 2.078, + "step": 14498 + }, + { + "epoch": 0.48, + "grad_norm": 0.72681725025177, + "learning_rate": 1.0827577462734612e-05, + "loss": 2.0715, + "step": 14499 + }, + { + "epoch": 0.48, + "grad_norm": 0.7563591003417969, + "learning_rate": 1.0826518211172755e-05, + "loss": 2.0677, + "step": 14500 + }, + { + "epoch": 0.48, + "grad_norm": 0.741085410118103, + "learning_rate": 1.0825458950273382e-05, + "loss": 2.1019, + "step": 14501 + }, + { + "epoch": 0.48, + "grad_norm": 0.7280721068382263, + "learning_rate": 1.0824399680048452e-05, + "loss": 2.0044, + "step": 14502 + }, + { + "epoch": 0.48, + "grad_norm": 0.7590895295143127, + "learning_rate": 1.0823340400509939e-05, + "loss": 2.0291, + "step": 14503 + }, + { + "epoch": 0.48, + "grad_norm": 0.7302896976470947, + "learning_rate": 1.082228111166981e-05, + "loss": 2.0569, + "step": 14504 + }, + { + "epoch": 0.48, + "grad_norm": 0.7266169190406799, + "learning_rate": 1.0821221813540028e-05, + "loss": 2.0741, + "step": 14505 + }, + { + "epoch": 0.48, + "grad_norm": 0.7605879306793213, + "learning_rate": 1.0820162506132567e-05, + "loss": 2.1122, + "step": 14506 + }, + { + "epoch": 0.48, + "grad_norm": 0.7141992449760437, + "learning_rate": 1.081910318945939e-05, + "loss": 1.9808, + "step": 14507 + }, + { + "epoch": 0.48, + "grad_norm": 0.7256706953048706, + "learning_rate": 1.0818043863532464e-05, + "loss": 2.0349, + "step": 14508 + }, + { + "epoch": 0.48, + "grad_norm": 0.7446910738945007, + "learning_rate": 1.0816984528363758e-05, + "loss": 2.1489, + "step": 14509 + }, + { + "epoch": 0.48, + "grad_norm": 0.7351961731910706, + "learning_rate": 1.0815925183965239e-05, + "loss": 2.1343, + "step": 14510 + }, + { + "epoch": 0.48, + "grad_norm": 0.7672762870788574, + "learning_rate": 1.0814865830348878e-05, + "loss": 2.0792, + "step": 14511 + }, + { + "epoch": 0.48, + "grad_norm": 0.7373375296592712, + "learning_rate": 1.081380646752664e-05, + "loss": 2.1457, + "step": 14512 + }, + { + "epoch": 0.48, + "grad_norm": 0.7743820548057556, + "learning_rate": 1.0812747095510489e-05, + "loss": 2.1224, + "step": 14513 + }, + { + "epoch": 0.48, + "grad_norm": 0.7648014426231384, + "learning_rate": 1.08116877143124e-05, + "loss": 1.9845, + "step": 14514 + }, + { + "epoch": 0.48, + "grad_norm": 0.7586275935173035, + "learning_rate": 1.0810628323944343e-05, + "loss": 2.0666, + "step": 14515 + }, + { + "epoch": 0.48, + "grad_norm": 0.7430000305175781, + "learning_rate": 1.0809568924418277e-05, + "loss": 2.0849, + "step": 14516 + }, + { + "epoch": 0.48, + "grad_norm": 0.7236108183860779, + "learning_rate": 1.0808509515746179e-05, + "loss": 2.0875, + "step": 14517 + }, + { + "epoch": 0.48, + "grad_norm": 0.7681379318237305, + "learning_rate": 1.0807450097940013e-05, + "loss": 2.1311, + "step": 14518 + }, + { + "epoch": 0.48, + "grad_norm": 0.7555314302444458, + "learning_rate": 1.0806390671011748e-05, + "loss": 2.021, + "step": 14519 + }, + { + "epoch": 0.48, + "grad_norm": 0.7460755705833435, + "learning_rate": 1.0805331234973354e-05, + "loss": 2.1327, + "step": 14520 + }, + { + "epoch": 0.48, + "grad_norm": 0.7329644560813904, + "learning_rate": 1.0804271789836802e-05, + "loss": 2.0831, + "step": 14521 + }, + { + "epoch": 0.48, + "grad_norm": 0.7432982325553894, + "learning_rate": 1.0803212335614057e-05, + "loss": 2.0984, + "step": 14522 + }, + { + "epoch": 0.48, + "grad_norm": 0.7142213582992554, + "learning_rate": 1.080215287231709e-05, + "loss": 2.0855, + "step": 14523 + }, + { + "epoch": 0.48, + "grad_norm": 0.7374944090843201, + "learning_rate": 1.080109339995787e-05, + "loss": 2.1162, + "step": 14524 + }, + { + "epoch": 0.48, + "grad_norm": 0.725470781326294, + "learning_rate": 1.0800033918548364e-05, + "loss": 2.1443, + "step": 14525 + }, + { + "epoch": 0.48, + "grad_norm": 0.738006591796875, + "learning_rate": 1.0798974428100548e-05, + "loss": 2.0391, + "step": 14526 + }, + { + "epoch": 0.48, + "grad_norm": 0.7326415777206421, + "learning_rate": 1.0797914928626383e-05, + "loss": 2.0639, + "step": 14527 + }, + { + "epoch": 0.48, + "grad_norm": 0.7425469756126404, + "learning_rate": 1.0796855420137846e-05, + "loss": 2.1035, + "step": 14528 + }, + { + "epoch": 0.48, + "grad_norm": 0.7605542540550232, + "learning_rate": 1.0795795902646902e-05, + "loss": 2.1137, + "step": 14529 + }, + { + "epoch": 0.48, + "grad_norm": 0.7297346591949463, + "learning_rate": 1.0794736376165523e-05, + "loss": 2.0356, + "step": 14530 + }, + { + "epoch": 0.48, + "grad_norm": 0.718613862991333, + "learning_rate": 1.0793676840705678e-05, + "loss": 2.0344, + "step": 14531 + }, + { + "epoch": 0.48, + "grad_norm": 0.7361851334571838, + "learning_rate": 1.0792617296279335e-05, + "loss": 2.0215, + "step": 14532 + }, + { + "epoch": 0.48, + "grad_norm": 0.7202955484390259, + "learning_rate": 1.0791557742898469e-05, + "loss": 2.0593, + "step": 14533 + }, + { + "epoch": 0.48, + "grad_norm": 0.7353624701499939, + "learning_rate": 1.0790498180575046e-05, + "loss": 2.0801, + "step": 14534 + }, + { + "epoch": 0.48, + "grad_norm": 0.7539271712303162, + "learning_rate": 1.078943860932104e-05, + "loss": 2.0801, + "step": 14535 + }, + { + "epoch": 0.48, + "grad_norm": 0.7168089151382446, + "learning_rate": 1.0788379029148417e-05, + "loss": 2.1141, + "step": 14536 + }, + { + "epoch": 0.48, + "grad_norm": 0.7230263948440552, + "learning_rate": 1.078731944006915e-05, + "loss": 2.0702, + "step": 14537 + }, + { + "epoch": 0.48, + "grad_norm": 0.753931999206543, + "learning_rate": 1.0786259842095208e-05, + "loss": 2.1037, + "step": 14538 + }, + { + "epoch": 0.48, + "grad_norm": 0.7379437685012817, + "learning_rate": 1.0785200235238569e-05, + "loss": 2.0617, + "step": 14539 + }, + { + "epoch": 0.48, + "grad_norm": 0.7212793231010437, + "learning_rate": 1.0784140619511192e-05, + "loss": 2.0534, + "step": 14540 + }, + { + "epoch": 0.48, + "grad_norm": 0.7412114143371582, + "learning_rate": 1.0783080994925058e-05, + "loss": 2.0414, + "step": 14541 + }, + { + "epoch": 0.48, + "grad_norm": 0.7415284514427185, + "learning_rate": 1.078202136149213e-05, + "loss": 2.1183, + "step": 14542 + }, + { + "epoch": 0.48, + "grad_norm": 0.7655936479568481, + "learning_rate": 1.0780961719224384e-05, + "loss": 2.0047, + "step": 14543 + }, + { + "epoch": 0.48, + "grad_norm": 0.7588881850242615, + "learning_rate": 1.0779902068133789e-05, + "loss": 2.095, + "step": 14544 + }, + { + "epoch": 0.48, + "grad_norm": 0.7264940142631531, + "learning_rate": 1.0778842408232319e-05, + "loss": 2.1196, + "step": 14545 + }, + { + "epoch": 0.48, + "grad_norm": 0.7662963271141052, + "learning_rate": 1.0777782739531945e-05, + "loss": 2.1363, + "step": 14546 + }, + { + "epoch": 0.48, + "grad_norm": 0.7407233119010925, + "learning_rate": 1.0776723062044636e-05, + "loss": 2.0421, + "step": 14547 + }, + { + "epoch": 0.48, + "grad_norm": 0.7734267711639404, + "learning_rate": 1.0775663375782369e-05, + "loss": 2.1171, + "step": 14548 + }, + { + "epoch": 0.48, + "grad_norm": 0.709658145904541, + "learning_rate": 1.0774603680757105e-05, + "loss": 2.1365, + "step": 14549 + }, + { + "epoch": 0.48, + "grad_norm": 0.7338653802871704, + "learning_rate": 1.0773543976980827e-05, + "loss": 2.0883, + "step": 14550 + }, + { + "epoch": 0.48, + "grad_norm": 0.7757417559623718, + "learning_rate": 1.0772484264465499e-05, + "loss": 2.0663, + "step": 14551 + }, + { + "epoch": 0.48, + "grad_norm": 0.7110576033592224, + "learning_rate": 1.07714245432231e-05, + "loss": 2.1025, + "step": 14552 + }, + { + "epoch": 0.48, + "grad_norm": 0.7363674640655518, + "learning_rate": 1.0770364813265599e-05, + "loss": 2.0375, + "step": 14553 + }, + { + "epoch": 0.48, + "grad_norm": 0.7321013808250427, + "learning_rate": 1.0769305074604967e-05, + "loss": 2.041, + "step": 14554 + }, + { + "epoch": 0.48, + "grad_norm": 0.7813422679901123, + "learning_rate": 1.0768245327253176e-05, + "loss": 2.0472, + "step": 14555 + }, + { + "epoch": 0.48, + "grad_norm": 0.7532978653907776, + "learning_rate": 1.07671855712222e-05, + "loss": 2.1504, + "step": 14556 + }, + { + "epoch": 0.48, + "grad_norm": 0.7305266857147217, + "learning_rate": 1.076612580652401e-05, + "loss": 1.9771, + "step": 14557 + }, + { + "epoch": 0.48, + "grad_norm": 0.7696027159690857, + "learning_rate": 1.0765066033170581e-05, + "loss": 2.0586, + "step": 14558 + }, + { + "epoch": 0.48, + "grad_norm": 0.7136980295181274, + "learning_rate": 1.0764006251173884e-05, + "loss": 2.0457, + "step": 14559 + }, + { + "epoch": 0.48, + "grad_norm": 0.7092509865760803, + "learning_rate": 1.0762946460545892e-05, + "loss": 2.0526, + "step": 14560 + }, + { + "epoch": 0.48, + "grad_norm": 0.7640079855918884, + "learning_rate": 1.0761886661298579e-05, + "loss": 2.1239, + "step": 14561 + }, + { + "epoch": 0.48, + "grad_norm": 0.7479318976402283, + "learning_rate": 1.0760826853443916e-05, + "loss": 2.0975, + "step": 14562 + }, + { + "epoch": 0.48, + "grad_norm": 0.7171921730041504, + "learning_rate": 1.0759767036993877e-05, + "loss": 2.0863, + "step": 14563 + }, + { + "epoch": 0.48, + "grad_norm": 0.7298987507820129, + "learning_rate": 1.0758707211960435e-05, + "loss": 2.0352, + "step": 14564 + }, + { + "epoch": 0.48, + "grad_norm": 0.7785048484802246, + "learning_rate": 1.0757647378355562e-05, + "loss": 2.1103, + "step": 14565 + }, + { + "epoch": 0.48, + "grad_norm": 0.7254499793052673, + "learning_rate": 1.0756587536191238e-05, + "loss": 2.1137, + "step": 14566 + }, + { + "epoch": 0.48, + "grad_norm": 0.748076319694519, + "learning_rate": 1.0755527685479428e-05, + "loss": 2.1113, + "step": 14567 + }, + { + "epoch": 0.48, + "grad_norm": 0.7399908900260925, + "learning_rate": 1.0754467826232108e-05, + "loss": 2.1108, + "step": 14568 + }, + { + "epoch": 0.48, + "grad_norm": 0.7155466079711914, + "learning_rate": 1.0753407958461255e-05, + "loss": 2.0481, + "step": 14569 + }, + { + "epoch": 0.48, + "grad_norm": 0.7140225768089294, + "learning_rate": 1.075234808217884e-05, + "loss": 2.0565, + "step": 14570 + }, + { + "epoch": 0.48, + "grad_norm": 0.7307649850845337, + "learning_rate": 1.0751288197396836e-05, + "loss": 2.0242, + "step": 14571 + }, + { + "epoch": 0.48, + "grad_norm": 0.7213634848594666, + "learning_rate": 1.0750228304127221e-05, + "loss": 2.1171, + "step": 14572 + }, + { + "epoch": 0.48, + "grad_norm": 0.7209476232528687, + "learning_rate": 1.0749168402381963e-05, + "loss": 2.1208, + "step": 14573 + }, + { + "epoch": 0.48, + "grad_norm": 0.7196819186210632, + "learning_rate": 1.0748108492173042e-05, + "loss": 2.1133, + "step": 14574 + }, + { + "epoch": 0.48, + "grad_norm": 0.7658464312553406, + "learning_rate": 1.074704857351243e-05, + "loss": 2.1002, + "step": 14575 + }, + { + "epoch": 0.48, + "grad_norm": 0.7282299995422363, + "learning_rate": 1.0745988646412095e-05, + "loss": 2.0259, + "step": 14576 + }, + { + "epoch": 0.48, + "grad_norm": 0.7416151165962219, + "learning_rate": 1.0744928710884027e-05, + "loss": 2.0864, + "step": 14577 + }, + { + "epoch": 0.49, + "grad_norm": 0.7469478845596313, + "learning_rate": 1.0743868766940185e-05, + "loss": 2.1459, + "step": 14578 + }, + { + "epoch": 0.49, + "grad_norm": 0.7472251057624817, + "learning_rate": 1.074280881459255e-05, + "loss": 1.9939, + "step": 14579 + }, + { + "epoch": 0.49, + "grad_norm": 0.7251909375190735, + "learning_rate": 1.0741748853853096e-05, + "loss": 2.0804, + "step": 14580 + }, + { + "epoch": 0.49, + "grad_norm": 0.7327628135681152, + "learning_rate": 1.0740688884733798e-05, + "loss": 2.1516, + "step": 14581 + }, + { + "epoch": 0.49, + "grad_norm": 0.7315933108329773, + "learning_rate": 1.0739628907246634e-05, + "loss": 2.1176, + "step": 14582 + }, + { + "epoch": 0.49, + "grad_norm": 0.7169124484062195, + "learning_rate": 1.0738568921403577e-05, + "loss": 2.0604, + "step": 14583 + }, + { + "epoch": 0.49, + "grad_norm": 0.72947758436203, + "learning_rate": 1.0737508927216593e-05, + "loss": 2.0901, + "step": 14584 + }, + { + "epoch": 0.49, + "grad_norm": 0.7218141555786133, + "learning_rate": 1.0736448924697673e-05, + "loss": 2.1156, + "step": 14585 + }, + { + "epoch": 0.49, + "grad_norm": 0.7156152129173279, + "learning_rate": 1.0735388913858784e-05, + "loss": 2.0811, + "step": 14586 + }, + { + "epoch": 0.49, + "grad_norm": 0.7359183430671692, + "learning_rate": 1.0734328894711898e-05, + "loss": 2.0208, + "step": 14587 + }, + { + "epoch": 0.49, + "grad_norm": 0.6928489804267883, + "learning_rate": 1.0733268867268998e-05, + "loss": 2.0554, + "step": 14588 + }, + { + "epoch": 0.49, + "grad_norm": 0.743375837802887, + "learning_rate": 1.0732208831542055e-05, + "loss": 2.0936, + "step": 14589 + }, + { + "epoch": 0.49, + "grad_norm": 0.7146577835083008, + "learning_rate": 1.0731148787543046e-05, + "loss": 2.0956, + "step": 14590 + }, + { + "epoch": 0.49, + "grad_norm": 0.741669774055481, + "learning_rate": 1.0730088735283946e-05, + "loss": 2.0339, + "step": 14591 + }, + { + "epoch": 0.49, + "grad_norm": 0.7593550682067871, + "learning_rate": 1.072902867477673e-05, + "loss": 2.1198, + "step": 14592 + }, + { + "epoch": 0.49, + "grad_norm": 0.750761091709137, + "learning_rate": 1.0727968606033378e-05, + "loss": 2.0423, + "step": 14593 + }, + { + "epoch": 0.49, + "grad_norm": 0.7350996732711792, + "learning_rate": 1.0726908529065865e-05, + "loss": 2.1502, + "step": 14594 + }, + { + "epoch": 0.49, + "grad_norm": 0.7488811612129211, + "learning_rate": 1.072584844388616e-05, + "loss": 2.0394, + "step": 14595 + }, + { + "epoch": 0.49, + "grad_norm": 0.7655408978462219, + "learning_rate": 1.072478835050625e-05, + "loss": 2.0863, + "step": 14596 + }, + { + "epoch": 0.49, + "grad_norm": 0.7548730373382568, + "learning_rate": 1.0723728248938103e-05, + "loss": 2.0056, + "step": 14597 + }, + { + "epoch": 0.49, + "grad_norm": 0.7081469893455505, + "learning_rate": 1.0722668139193696e-05, + "loss": 2.0936, + "step": 14598 + }, + { + "epoch": 0.49, + "grad_norm": 0.766166090965271, + "learning_rate": 1.0721608021285012e-05, + "loss": 2.0785, + "step": 14599 + }, + { + "epoch": 0.49, + "grad_norm": 0.774308979511261, + "learning_rate": 1.0720547895224023e-05, + "loss": 2.0606, + "step": 14600 + }, + { + "epoch": 0.49, + "grad_norm": 0.7536383271217346, + "learning_rate": 1.0719487761022705e-05, + "loss": 2.0379, + "step": 14601 + }, + { + "epoch": 0.49, + "grad_norm": 0.7396448254585266, + "learning_rate": 1.0718427618693035e-05, + "loss": 2.0943, + "step": 14602 + }, + { + "epoch": 0.49, + "grad_norm": 0.7359600067138672, + "learning_rate": 1.0717367468246992e-05, + "loss": 2.0329, + "step": 14603 + }, + { + "epoch": 0.49, + "grad_norm": 0.7345253229141235, + "learning_rate": 1.0716307309696552e-05, + "loss": 2.0687, + "step": 14604 + }, + { + "epoch": 0.49, + "grad_norm": 0.7935307621955872, + "learning_rate": 1.0715247143053693e-05, + "loss": 2.0726, + "step": 14605 + }, + { + "epoch": 0.49, + "grad_norm": 0.7525728940963745, + "learning_rate": 1.0714186968330386e-05, + "loss": 2.0839, + "step": 14606 + }, + { + "epoch": 0.49, + "grad_norm": 0.8106819987297058, + "learning_rate": 1.0713126785538618e-05, + "loss": 2.0749, + "step": 14607 + }, + { + "epoch": 0.49, + "grad_norm": 0.7426064610481262, + "learning_rate": 1.0712066594690362e-05, + "loss": 2.1124, + "step": 14608 + }, + { + "epoch": 0.49, + "grad_norm": 0.725161612033844, + "learning_rate": 1.0711006395797593e-05, + "loss": 2.0218, + "step": 14609 + }, + { + "epoch": 0.49, + "grad_norm": 0.7419837117195129, + "learning_rate": 1.070994618887229e-05, + "loss": 2.0605, + "step": 14610 + }, + { + "epoch": 0.49, + "grad_norm": 0.7481278777122498, + "learning_rate": 1.0708885973926432e-05, + "loss": 2.0176, + "step": 14611 + }, + { + "epoch": 0.49, + "grad_norm": 0.7455757856369019, + "learning_rate": 1.0707825750971994e-05, + "loss": 2.1241, + "step": 14612 + }, + { + "epoch": 0.49, + "grad_norm": 0.7478470206260681, + "learning_rate": 1.0706765520020958e-05, + "loss": 2.0784, + "step": 14613 + }, + { + "epoch": 0.49, + "grad_norm": 0.7379599213600159, + "learning_rate": 1.0705705281085297e-05, + "loss": 2.1277, + "step": 14614 + }, + { + "epoch": 0.49, + "grad_norm": 0.7394887804985046, + "learning_rate": 1.0704645034176992e-05, + "loss": 2.0614, + "step": 14615 + }, + { + "epoch": 0.49, + "grad_norm": 0.7471224069595337, + "learning_rate": 1.0703584779308023e-05, + "loss": 2.1958, + "step": 14616 + }, + { + "epoch": 0.49, + "grad_norm": 0.7275299429893494, + "learning_rate": 1.0702524516490363e-05, + "loss": 2.0863, + "step": 14617 + }, + { + "epoch": 0.49, + "grad_norm": 0.7513114213943481, + "learning_rate": 1.070146424573599e-05, + "loss": 2.1102, + "step": 14618 + }, + { + "epoch": 0.49, + "grad_norm": 0.7537183165550232, + "learning_rate": 1.070040396705689e-05, + "loss": 2.083, + "step": 14619 + }, + { + "epoch": 0.49, + "grad_norm": 0.7381898760795593, + "learning_rate": 1.0699343680465035e-05, + "loss": 2.0606, + "step": 14620 + }, + { + "epoch": 0.49, + "grad_norm": 0.7517684102058411, + "learning_rate": 1.0698283385972405e-05, + "loss": 2.1284, + "step": 14621 + }, + { + "epoch": 0.49, + "grad_norm": 0.7099905014038086, + "learning_rate": 1.0697223083590979e-05, + "loss": 1.9619, + "step": 14622 + }, + { + "epoch": 0.49, + "grad_norm": 0.7577435970306396, + "learning_rate": 1.0696162773332735e-05, + "loss": 2.0517, + "step": 14623 + }, + { + "epoch": 0.49, + "grad_norm": 0.7741847634315491, + "learning_rate": 1.0695102455209649e-05, + "loss": 2.0292, + "step": 14624 + }, + { + "epoch": 0.49, + "grad_norm": 0.745650053024292, + "learning_rate": 1.0694042129233707e-05, + "loss": 2.091, + "step": 14625 + }, + { + "epoch": 0.49, + "grad_norm": 0.7720785737037659, + "learning_rate": 1.0692981795416881e-05, + "loss": 2.1599, + "step": 14626 + }, + { + "epoch": 0.49, + "grad_norm": 0.7433773875236511, + "learning_rate": 1.0691921453771155e-05, + "loss": 2.1176, + "step": 14627 + }, + { + "epoch": 0.49, + "grad_norm": 0.7450385093688965, + "learning_rate": 1.0690861104308504e-05, + "loss": 2.0064, + "step": 14628 + }, + { + "epoch": 0.49, + "grad_norm": 0.7526825666427612, + "learning_rate": 1.0689800747040908e-05, + "loss": 2.1196, + "step": 14629 + }, + { + "epoch": 0.49, + "grad_norm": 0.7309104800224304, + "learning_rate": 1.0688740381980352e-05, + "loss": 2.055, + "step": 14630 + }, + { + "epoch": 0.49, + "grad_norm": 0.7464230060577393, + "learning_rate": 1.0687680009138806e-05, + "loss": 2.0825, + "step": 14631 + }, + { + "epoch": 0.49, + "grad_norm": 0.7237047553062439, + "learning_rate": 1.0686619628528256e-05, + "loss": 2.0883, + "step": 14632 + }, + { + "epoch": 0.49, + "grad_norm": 0.7204283475875854, + "learning_rate": 1.068555924016068e-05, + "loss": 2.0997, + "step": 14633 + }, + { + "epoch": 0.49, + "grad_norm": 0.7293440699577332, + "learning_rate": 1.068449884404806e-05, + "loss": 2.0959, + "step": 14634 + }, + { + "epoch": 0.49, + "grad_norm": 0.7542620301246643, + "learning_rate": 1.068343844020237e-05, + "loss": 2.0791, + "step": 14635 + }, + { + "epoch": 0.49, + "grad_norm": 0.7173818945884705, + "learning_rate": 1.0682378028635591e-05, + "loss": 2.0866, + "step": 14636 + }, + { + "epoch": 0.49, + "grad_norm": 0.7198214530944824, + "learning_rate": 1.0681317609359709e-05, + "loss": 2.0463, + "step": 14637 + }, + { + "epoch": 0.49, + "grad_norm": 0.7311445474624634, + "learning_rate": 1.06802571823867e-05, + "loss": 2.0898, + "step": 14638 + }, + { + "epoch": 0.49, + "grad_norm": 0.7399665117263794, + "learning_rate": 1.0679196747728543e-05, + "loss": 2.1081, + "step": 14639 + }, + { + "epoch": 0.49, + "grad_norm": 0.7373688817024231, + "learning_rate": 1.0678136305397218e-05, + "loss": 2.0918, + "step": 14640 + }, + { + "epoch": 0.49, + "grad_norm": 0.7440222501754761, + "learning_rate": 1.0677075855404709e-05, + "loss": 2.1241, + "step": 14641 + }, + { + "epoch": 0.49, + "grad_norm": 0.7079523205757141, + "learning_rate": 1.067601539776299e-05, + "loss": 2.09, + "step": 14642 + }, + { + "epoch": 0.49, + "grad_norm": 0.7419347763061523, + "learning_rate": 1.067495493248405e-05, + "loss": 2.151, + "step": 14643 + }, + { + "epoch": 0.49, + "grad_norm": 0.740949809551239, + "learning_rate": 1.0673894459579858e-05, + "loss": 2.0791, + "step": 14644 + }, + { + "epoch": 0.49, + "grad_norm": 0.7136651277542114, + "learning_rate": 1.0672833979062406e-05, + "loss": 2.0997, + "step": 14645 + }, + { + "epoch": 0.49, + "grad_norm": 0.7348021864891052, + "learning_rate": 1.067177349094367e-05, + "loss": 2.134, + "step": 14646 + }, + { + "epoch": 0.49, + "grad_norm": 0.7171558737754822, + "learning_rate": 1.0670712995235631e-05, + "loss": 2.0722, + "step": 14647 + }, + { + "epoch": 0.49, + "grad_norm": 0.7304102182388306, + "learning_rate": 1.0669652491950269e-05, + "loss": 2.0041, + "step": 14648 + }, + { + "epoch": 0.49, + "grad_norm": 0.7185641527175903, + "learning_rate": 1.0668591981099566e-05, + "loss": 2.1098, + "step": 14649 + }, + { + "epoch": 0.49, + "grad_norm": 0.7144058346748352, + "learning_rate": 1.0667531462695502e-05, + "loss": 2.1243, + "step": 14650 + }, + { + "epoch": 0.49, + "grad_norm": 0.7622905373573303, + "learning_rate": 1.0666470936750057e-05, + "loss": 2.0879, + "step": 14651 + }, + { + "epoch": 0.49, + "grad_norm": 0.7399274110794067, + "learning_rate": 1.0665410403275216e-05, + "loss": 2.0914, + "step": 14652 + }, + { + "epoch": 0.49, + "grad_norm": 0.7643499970436096, + "learning_rate": 1.0664349862282958e-05, + "loss": 2.1075, + "step": 14653 + }, + { + "epoch": 0.49, + "grad_norm": 0.7139347195625305, + "learning_rate": 1.0663289313785268e-05, + "loss": 1.9994, + "step": 14654 + }, + { + "epoch": 0.49, + "grad_norm": 0.7429953813552856, + "learning_rate": 1.0662228757794117e-05, + "loss": 2.085, + "step": 14655 + }, + { + "epoch": 0.49, + "grad_norm": 0.7331598401069641, + "learning_rate": 1.06611681943215e-05, + "loss": 2.0623, + "step": 14656 + }, + { + "epoch": 0.49, + "grad_norm": 0.7400999665260315, + "learning_rate": 1.066010762337939e-05, + "loss": 2.0318, + "step": 14657 + }, + { + "epoch": 0.49, + "grad_norm": 0.7368826866149902, + "learning_rate": 1.065904704497977e-05, + "loss": 2.1126, + "step": 14658 + }, + { + "epoch": 0.49, + "grad_norm": 0.7501544952392578, + "learning_rate": 1.0657986459134621e-05, + "loss": 2.133, + "step": 14659 + }, + { + "epoch": 0.49, + "grad_norm": 0.7455234527587891, + "learning_rate": 1.065692586585593e-05, + "loss": 2.1171, + "step": 14660 + }, + { + "epoch": 0.49, + "grad_norm": 0.7894590497016907, + "learning_rate": 1.0655865265155673e-05, + "loss": 2.1526, + "step": 14661 + }, + { + "epoch": 0.49, + "grad_norm": 0.7913345098495483, + "learning_rate": 1.0654804657045837e-05, + "loss": 2.1424, + "step": 14662 + }, + { + "epoch": 0.49, + "grad_norm": 0.7647777199745178, + "learning_rate": 1.06537440415384e-05, + "loss": 2.146, + "step": 14663 + }, + { + "epoch": 0.49, + "grad_norm": 0.7644203305244446, + "learning_rate": 1.0652683418645347e-05, + "loss": 2.0871, + "step": 14664 + }, + { + "epoch": 0.49, + "grad_norm": 0.7443122267723083, + "learning_rate": 1.0651622788378662e-05, + "loss": 2.058, + "step": 14665 + }, + { + "epoch": 0.49, + "grad_norm": 0.754377543926239, + "learning_rate": 1.0650562150750318e-05, + "loss": 2.0506, + "step": 14666 + }, + { + "epoch": 0.49, + "grad_norm": 0.7406039834022522, + "learning_rate": 1.064950150577231e-05, + "loss": 2.0777, + "step": 14667 + }, + { + "epoch": 0.49, + "grad_norm": 0.7436974048614502, + "learning_rate": 1.0648440853456612e-05, + "loss": 2.1324, + "step": 14668 + }, + { + "epoch": 0.49, + "grad_norm": 0.7229152917861938, + "learning_rate": 1.0647380193815209e-05, + "loss": 2.0307, + "step": 14669 + }, + { + "epoch": 0.49, + "grad_norm": 0.7415510416030884, + "learning_rate": 1.0646319526860086e-05, + "loss": 2.067, + "step": 14670 + }, + { + "epoch": 0.49, + "grad_norm": 0.7377073168754578, + "learning_rate": 1.0645258852603222e-05, + "loss": 2.102, + "step": 14671 + }, + { + "epoch": 0.49, + "grad_norm": 0.7496103048324585, + "learning_rate": 1.0644198171056601e-05, + "loss": 2.1657, + "step": 14672 + }, + { + "epoch": 0.49, + "grad_norm": 0.7456011772155762, + "learning_rate": 1.0643137482232206e-05, + "loss": 2.0973, + "step": 14673 + }, + { + "epoch": 0.49, + "grad_norm": 0.7341430187225342, + "learning_rate": 1.0642076786142024e-05, + "loss": 2.1307, + "step": 14674 + }, + { + "epoch": 0.49, + "grad_norm": 0.7097193002700806, + "learning_rate": 1.0641016082798032e-05, + "loss": 2.0745, + "step": 14675 + }, + { + "epoch": 0.49, + "grad_norm": 0.7230064868927002, + "learning_rate": 1.0639955372212218e-05, + "loss": 2.0991, + "step": 14676 + }, + { + "epoch": 0.49, + "grad_norm": 0.735785961151123, + "learning_rate": 1.0638894654396562e-05, + "loss": 2.0265, + "step": 14677 + }, + { + "epoch": 0.49, + "grad_norm": 0.7457605004310608, + "learning_rate": 1.0637833929363049e-05, + "loss": 2.1327, + "step": 14678 + }, + { + "epoch": 0.49, + "grad_norm": 0.7232545018196106, + "learning_rate": 1.0636773197123661e-05, + "loss": 2.071, + "step": 14679 + }, + { + "epoch": 0.49, + "grad_norm": 0.7175222039222717, + "learning_rate": 1.0635712457690382e-05, + "loss": 2.1216, + "step": 14680 + }, + { + "epoch": 0.49, + "grad_norm": 0.7136391997337341, + "learning_rate": 1.0634651711075199e-05, + "loss": 2.056, + "step": 14681 + }, + { + "epoch": 0.49, + "grad_norm": 0.7620527148246765, + "learning_rate": 1.0633590957290091e-05, + "loss": 2.0689, + "step": 14682 + }, + { + "epoch": 0.49, + "grad_norm": 0.7666252851486206, + "learning_rate": 1.0632530196347046e-05, + "loss": 2.1107, + "step": 14683 + }, + { + "epoch": 0.49, + "grad_norm": 0.7668177485466003, + "learning_rate": 1.0631469428258044e-05, + "loss": 2.0176, + "step": 14684 + }, + { + "epoch": 0.49, + "grad_norm": 0.7574915885925293, + "learning_rate": 1.063040865303507e-05, + "loss": 2.0771, + "step": 14685 + }, + { + "epoch": 0.49, + "grad_norm": 0.7369412779808044, + "learning_rate": 1.0629347870690108e-05, + "loss": 2.0537, + "step": 14686 + }, + { + "epoch": 0.49, + "grad_norm": 0.7470325827598572, + "learning_rate": 1.0628287081235144e-05, + "loss": 2.0076, + "step": 14687 + }, + { + "epoch": 0.49, + "grad_norm": 0.7449122667312622, + "learning_rate": 1.062722628468216e-05, + "loss": 2.124, + "step": 14688 + }, + { + "epoch": 0.49, + "grad_norm": 0.7190289497375488, + "learning_rate": 1.0626165481043142e-05, + "loss": 2.0568, + "step": 14689 + }, + { + "epoch": 0.49, + "grad_norm": 0.7241438031196594, + "learning_rate": 1.0625104670330074e-05, + "loss": 2.1299, + "step": 14690 + }, + { + "epoch": 0.49, + "grad_norm": 0.742878258228302, + "learning_rate": 1.0624043852554934e-05, + "loss": 2.1139, + "step": 14691 + }, + { + "epoch": 0.49, + "grad_norm": 0.7262085676193237, + "learning_rate": 1.0622983027729719e-05, + "loss": 2.0705, + "step": 14692 + }, + { + "epoch": 0.49, + "grad_norm": 0.7183705568313599, + "learning_rate": 1.0621922195866404e-05, + "loss": 2.0801, + "step": 14693 + }, + { + "epoch": 0.49, + "grad_norm": 0.731928288936615, + "learning_rate": 1.0620861356976977e-05, + "loss": 2.067, + "step": 14694 + }, + { + "epoch": 0.49, + "grad_norm": 0.7484527230262756, + "learning_rate": 1.0619800511073422e-05, + "loss": 2.1193, + "step": 14695 + }, + { + "epoch": 0.49, + "grad_norm": 0.7143909335136414, + "learning_rate": 1.0618739658167725e-05, + "loss": 2.1202, + "step": 14696 + }, + { + "epoch": 0.49, + "grad_norm": 0.7312271595001221, + "learning_rate": 1.061767879827187e-05, + "loss": 2.0424, + "step": 14697 + }, + { + "epoch": 0.49, + "grad_norm": 0.738841712474823, + "learning_rate": 1.0616617931397841e-05, + "loss": 2.077, + "step": 14698 + }, + { + "epoch": 0.49, + "grad_norm": 0.754411518573761, + "learning_rate": 1.0615557057557621e-05, + "loss": 2.1127, + "step": 14699 + }, + { + "epoch": 0.49, + "grad_norm": 0.7421163320541382, + "learning_rate": 1.0614496176763205e-05, + "loss": 2.1467, + "step": 14700 + }, + { + "epoch": 0.49, + "grad_norm": 0.7563844919204712, + "learning_rate": 1.0613435289026566e-05, + "loss": 2.0843, + "step": 14701 + }, + { + "epoch": 0.49, + "grad_norm": 0.7339263558387756, + "learning_rate": 1.0612374394359695e-05, + "loss": 2.1495, + "step": 14702 + }, + { + "epoch": 0.49, + "grad_norm": 0.7737752199172974, + "learning_rate": 1.061131349277458e-05, + "loss": 2.052, + "step": 14703 + }, + { + "epoch": 0.49, + "grad_norm": 0.7618801593780518, + "learning_rate": 1.0610252584283201e-05, + "loss": 2.0892, + "step": 14704 + }, + { + "epoch": 0.49, + "grad_norm": 0.785929262638092, + "learning_rate": 1.0609191668897546e-05, + "loss": 2.0841, + "step": 14705 + }, + { + "epoch": 0.49, + "grad_norm": 0.723013162612915, + "learning_rate": 1.0608130746629602e-05, + "loss": 2.0553, + "step": 14706 + }, + { + "epoch": 0.49, + "grad_norm": 0.7250922918319702, + "learning_rate": 1.060706981749135e-05, + "loss": 2.0306, + "step": 14707 + }, + { + "epoch": 0.49, + "grad_norm": 0.722086489200592, + "learning_rate": 1.0606008881494783e-05, + "loss": 2.0614, + "step": 14708 + }, + { + "epoch": 0.49, + "grad_norm": 0.7287055850028992, + "learning_rate": 1.0604947938651882e-05, + "loss": 2.0793, + "step": 14709 + }, + { + "epoch": 0.49, + "grad_norm": 0.7173362970352173, + "learning_rate": 1.0603886988974633e-05, + "loss": 2.0832, + "step": 14710 + }, + { + "epoch": 0.49, + "grad_norm": 0.727529764175415, + "learning_rate": 1.060282603247502e-05, + "loss": 2.0841, + "step": 14711 + }, + { + "epoch": 0.49, + "grad_norm": 0.7330241203308105, + "learning_rate": 1.0601765069165038e-05, + "loss": 2.1248, + "step": 14712 + }, + { + "epoch": 0.49, + "grad_norm": 0.6999135613441467, + "learning_rate": 1.060070409905666e-05, + "loss": 2.0929, + "step": 14713 + }, + { + "epoch": 0.49, + "grad_norm": 0.7290116548538208, + "learning_rate": 1.0599643122161884e-05, + "loss": 2.1478, + "step": 14714 + }, + { + "epoch": 0.49, + "grad_norm": 0.7333020567893982, + "learning_rate": 1.059858213849269e-05, + "loss": 2.0858, + "step": 14715 + }, + { + "epoch": 0.49, + "grad_norm": 0.7391338348388672, + "learning_rate": 1.0597521148061065e-05, + "loss": 2.0842, + "step": 14716 + }, + { + "epoch": 0.49, + "grad_norm": 0.7238151431083679, + "learning_rate": 1.0596460150878997e-05, + "loss": 2.0709, + "step": 14717 + }, + { + "epoch": 0.49, + "grad_norm": 0.747388482093811, + "learning_rate": 1.0595399146958472e-05, + "loss": 2.0572, + "step": 14718 + }, + { + "epoch": 0.49, + "grad_norm": 0.7397347092628479, + "learning_rate": 1.0594338136311476e-05, + "loss": 2.0958, + "step": 14719 + }, + { + "epoch": 0.49, + "grad_norm": 0.7746142148971558, + "learning_rate": 1.0593277118949997e-05, + "loss": 2.1318, + "step": 14720 + }, + { + "epoch": 0.49, + "grad_norm": 0.718159556388855, + "learning_rate": 1.0592216094886019e-05, + "loss": 2.0858, + "step": 14721 + }, + { + "epoch": 0.49, + "grad_norm": 0.7231830358505249, + "learning_rate": 1.059115506413153e-05, + "loss": 2.0669, + "step": 14722 + }, + { + "epoch": 0.49, + "grad_norm": 0.730810284614563, + "learning_rate": 1.0590094026698522e-05, + "loss": 2.0586, + "step": 14723 + }, + { + "epoch": 0.49, + "grad_norm": 0.7129188179969788, + "learning_rate": 1.0589032982598972e-05, + "loss": 2.0543, + "step": 14724 + }, + { + "epoch": 0.49, + "grad_norm": 0.7510318160057068, + "learning_rate": 1.0587971931844876e-05, + "loss": 2.1121, + "step": 14725 + }, + { + "epoch": 0.49, + "grad_norm": 0.720358669757843, + "learning_rate": 1.0586910874448218e-05, + "loss": 2.1159, + "step": 14726 + }, + { + "epoch": 0.49, + "grad_norm": 0.7271606922149658, + "learning_rate": 1.0585849810420983e-05, + "loss": 2.0892, + "step": 14727 + }, + { + "epoch": 0.49, + "grad_norm": 0.757247269153595, + "learning_rate": 1.058478873977516e-05, + "loss": 2.0705, + "step": 14728 + }, + { + "epoch": 0.49, + "grad_norm": 0.7254404425621033, + "learning_rate": 1.0583727662522738e-05, + "loss": 2.0867, + "step": 14729 + }, + { + "epoch": 0.49, + "grad_norm": 0.7303285002708435, + "learning_rate": 1.0582666578675703e-05, + "loss": 2.0723, + "step": 14730 + }, + { + "epoch": 0.49, + "grad_norm": 0.7081171870231628, + "learning_rate": 1.0581605488246043e-05, + "loss": 2.0343, + "step": 14731 + }, + { + "epoch": 0.49, + "grad_norm": 0.7264071702957153, + "learning_rate": 1.0580544391245742e-05, + "loss": 2.1134, + "step": 14732 + }, + { + "epoch": 0.49, + "grad_norm": 0.7303284406661987, + "learning_rate": 1.0579483287686791e-05, + "loss": 2.0114, + "step": 14733 + }, + { + "epoch": 0.49, + "grad_norm": 0.7209147214889526, + "learning_rate": 1.0578422177581183e-05, + "loss": 2.0332, + "step": 14734 + }, + { + "epoch": 0.49, + "grad_norm": 0.7522361278533936, + "learning_rate": 1.0577361060940895e-05, + "loss": 2.0763, + "step": 14735 + }, + { + "epoch": 0.49, + "grad_norm": 0.748513400554657, + "learning_rate": 1.0576299937777921e-05, + "loss": 2.076, + "step": 14736 + }, + { + "epoch": 0.49, + "grad_norm": 0.7712220549583435, + "learning_rate": 1.0575238808104249e-05, + "loss": 2.1357, + "step": 14737 + }, + { + "epoch": 0.49, + "grad_norm": 0.7355931401252747, + "learning_rate": 1.0574177671931865e-05, + "loss": 2.1153, + "step": 14738 + }, + { + "epoch": 0.49, + "grad_norm": 0.767103374004364, + "learning_rate": 1.0573116529272758e-05, + "loss": 2.0697, + "step": 14739 + }, + { + "epoch": 0.49, + "grad_norm": 0.7569059133529663, + "learning_rate": 1.0572055380138917e-05, + "loss": 2.0828, + "step": 14740 + }, + { + "epoch": 0.49, + "grad_norm": 0.7460962533950806, + "learning_rate": 1.057099422454233e-05, + "loss": 2.0737, + "step": 14741 + }, + { + "epoch": 0.49, + "grad_norm": 0.8008043766021729, + "learning_rate": 1.0569933062494984e-05, + "loss": 2.0467, + "step": 14742 + }, + { + "epoch": 0.49, + "grad_norm": 0.7106244564056396, + "learning_rate": 1.0568871894008868e-05, + "loss": 2.0353, + "step": 14743 + }, + { + "epoch": 0.49, + "grad_norm": 0.7089235186576843, + "learning_rate": 1.0567810719095973e-05, + "loss": 2.053, + "step": 14744 + }, + { + "epoch": 0.49, + "grad_norm": 0.7956158518791199, + "learning_rate": 1.0566749537768281e-05, + "loss": 2.1527, + "step": 14745 + }, + { + "epoch": 0.49, + "grad_norm": 0.7136249542236328, + "learning_rate": 1.0565688350037788e-05, + "loss": 2.0314, + "step": 14746 + }, + { + "epoch": 0.49, + "grad_norm": 0.7562602758407593, + "learning_rate": 1.0564627155916483e-05, + "loss": 2.0599, + "step": 14747 + }, + { + "epoch": 0.49, + "grad_norm": 0.7685193419456482, + "learning_rate": 1.0563565955416343e-05, + "loss": 2.1565, + "step": 14748 + }, + { + "epoch": 0.49, + "grad_norm": 0.8187074065208435, + "learning_rate": 1.0562504748549372e-05, + "loss": 2.1812, + "step": 14749 + }, + { + "epoch": 0.49, + "grad_norm": 0.7400255799293518, + "learning_rate": 1.056144353532755e-05, + "loss": 2.081, + "step": 14750 + }, + { + "epoch": 0.49, + "grad_norm": 0.7657455205917358, + "learning_rate": 1.0560382315762867e-05, + "loss": 2.1556, + "step": 14751 + }, + { + "epoch": 0.49, + "grad_norm": 0.7442103624343872, + "learning_rate": 1.0559321089867314e-05, + "loss": 2.046, + "step": 14752 + }, + { + "epoch": 0.49, + "grad_norm": 0.7319579720497131, + "learning_rate": 1.0558259857652877e-05, + "loss": 2.0833, + "step": 14753 + }, + { + "epoch": 0.49, + "grad_norm": 0.754719614982605, + "learning_rate": 1.055719861913155e-05, + "loss": 2.0554, + "step": 14754 + }, + { + "epoch": 0.49, + "grad_norm": 0.740059494972229, + "learning_rate": 1.0556137374315318e-05, + "loss": 2.0894, + "step": 14755 + }, + { + "epoch": 0.49, + "grad_norm": 0.7332885265350342, + "learning_rate": 1.0555076123216173e-05, + "loss": 2.1334, + "step": 14756 + }, + { + "epoch": 0.49, + "grad_norm": 0.7479389309883118, + "learning_rate": 1.0554014865846102e-05, + "loss": 2.0968, + "step": 14757 + }, + { + "epoch": 0.49, + "grad_norm": 0.764297604560852, + "learning_rate": 1.0552953602217097e-05, + "loss": 2.0348, + "step": 14758 + }, + { + "epoch": 0.49, + "grad_norm": 0.7222644090652466, + "learning_rate": 1.0551892332341145e-05, + "loss": 2.095, + "step": 14759 + }, + { + "epoch": 0.49, + "grad_norm": 0.7213262915611267, + "learning_rate": 1.055083105623024e-05, + "loss": 2.0729, + "step": 14760 + }, + { + "epoch": 0.49, + "grad_norm": 0.7323314547538757, + "learning_rate": 1.0549769773896366e-05, + "loss": 2.0795, + "step": 14761 + }, + { + "epoch": 0.49, + "grad_norm": 0.7494660019874573, + "learning_rate": 1.0548708485351515e-05, + "loss": 2.1009, + "step": 14762 + }, + { + "epoch": 0.49, + "grad_norm": 0.7454178333282471, + "learning_rate": 1.0547647190607677e-05, + "loss": 2.1049, + "step": 14763 + }, + { + "epoch": 0.49, + "grad_norm": 0.7230193018913269, + "learning_rate": 1.0546585889676842e-05, + "loss": 1.9667, + "step": 14764 + }, + { + "epoch": 0.49, + "grad_norm": 0.7430580854415894, + "learning_rate": 1.0545524582571e-05, + "loss": 2.0633, + "step": 14765 + }, + { + "epoch": 0.49, + "grad_norm": 0.7117336392402649, + "learning_rate": 1.0544463269302141e-05, + "loss": 2.0982, + "step": 14766 + }, + { + "epoch": 0.49, + "grad_norm": 0.7224259376525879, + "learning_rate": 1.0543401949882255e-05, + "loss": 2.0417, + "step": 14767 + }, + { + "epoch": 0.49, + "grad_norm": 0.7265010476112366, + "learning_rate": 1.0542340624323333e-05, + "loss": 2.1235, + "step": 14768 + }, + { + "epoch": 0.49, + "grad_norm": 0.7292789220809937, + "learning_rate": 1.0541279292637365e-05, + "loss": 2.109, + "step": 14769 + }, + { + "epoch": 0.49, + "grad_norm": 0.7527442574501038, + "learning_rate": 1.0540217954836337e-05, + "loss": 2.1187, + "step": 14770 + }, + { + "epoch": 0.49, + "grad_norm": 0.712405264377594, + "learning_rate": 1.0539156610932251e-05, + "loss": 2.0323, + "step": 14771 + }, + { + "epoch": 0.49, + "grad_norm": 0.7256230115890503, + "learning_rate": 1.0538095260937085e-05, + "loss": 2.1081, + "step": 14772 + }, + { + "epoch": 0.49, + "grad_norm": 0.7343063354492188, + "learning_rate": 1.0537033904862832e-05, + "loss": 2.1318, + "step": 14773 + }, + { + "epoch": 0.49, + "grad_norm": 0.7543482184410095, + "learning_rate": 1.0535972542721486e-05, + "loss": 2.0812, + "step": 14774 + }, + { + "epoch": 0.49, + "grad_norm": 0.7285898327827454, + "learning_rate": 1.0534911174525038e-05, + "loss": 2.025, + "step": 14775 + }, + { + "epoch": 0.49, + "grad_norm": 0.7087196111679077, + "learning_rate": 1.0533849800285473e-05, + "loss": 2.0329, + "step": 14776 + }, + { + "epoch": 0.49, + "grad_norm": 0.7079684734344482, + "learning_rate": 1.053278842001479e-05, + "loss": 2.0988, + "step": 14777 + }, + { + "epoch": 0.49, + "grad_norm": 0.742074728012085, + "learning_rate": 1.0531727033724974e-05, + "loss": 2.1, + "step": 14778 + }, + { + "epoch": 0.49, + "grad_norm": 0.7521358728408813, + "learning_rate": 1.0530665641428017e-05, + "loss": 2.0587, + "step": 14779 + }, + { + "epoch": 0.49, + "grad_norm": 0.732659637928009, + "learning_rate": 1.0529604243135914e-05, + "loss": 2.1874, + "step": 14780 + }, + { + "epoch": 0.49, + "grad_norm": 0.8465365767478943, + "learning_rate": 1.0528542838860649e-05, + "loss": 2.1838, + "step": 14781 + }, + { + "epoch": 0.49, + "grad_norm": 0.7363940477371216, + "learning_rate": 1.052748142861422e-05, + "loss": 2.03, + "step": 14782 + }, + { + "epoch": 0.49, + "grad_norm": 0.7460628151893616, + "learning_rate": 1.0526420012408612e-05, + "loss": 2.0515, + "step": 14783 + }, + { + "epoch": 0.49, + "grad_norm": 0.7049014568328857, + "learning_rate": 1.0525358590255817e-05, + "loss": 2.025, + "step": 14784 + }, + { + "epoch": 0.49, + "grad_norm": 0.7407488226890564, + "learning_rate": 1.0524297162167834e-05, + "loss": 2.0535, + "step": 14785 + }, + { + "epoch": 0.49, + "grad_norm": 0.723631739616394, + "learning_rate": 1.0523235728156647e-05, + "loss": 2.0119, + "step": 14786 + }, + { + "epoch": 0.49, + "grad_norm": 0.7573441863059998, + "learning_rate": 1.0522174288234248e-05, + "loss": 2.0564, + "step": 14787 + }, + { + "epoch": 0.49, + "grad_norm": 0.7188177704811096, + "learning_rate": 1.0521112842412631e-05, + "loss": 2.0811, + "step": 14788 + }, + { + "epoch": 0.49, + "grad_norm": 0.7122758030891418, + "learning_rate": 1.0520051390703786e-05, + "loss": 2.1229, + "step": 14789 + }, + { + "epoch": 0.49, + "grad_norm": 0.7398770451545715, + "learning_rate": 1.0518989933119705e-05, + "loss": 2.1362, + "step": 14790 + }, + { + "epoch": 0.49, + "grad_norm": 0.731458842754364, + "learning_rate": 1.0517928469672383e-05, + "loss": 2.0919, + "step": 14791 + }, + { + "epoch": 0.49, + "grad_norm": 0.7129913568496704, + "learning_rate": 1.0516867000373803e-05, + "loss": 2.0775, + "step": 14792 + }, + { + "epoch": 0.49, + "grad_norm": 0.7778252363204956, + "learning_rate": 1.051580552523597e-05, + "loss": 2.1144, + "step": 14793 + }, + { + "epoch": 0.49, + "grad_norm": 0.7551724910736084, + "learning_rate": 1.0514744044270861e-05, + "loss": 2.0782, + "step": 14794 + }, + { + "epoch": 0.49, + "grad_norm": 0.71817547082901, + "learning_rate": 1.0513682557490477e-05, + "loss": 2.0851, + "step": 14795 + }, + { + "epoch": 0.49, + "grad_norm": 0.7446401715278625, + "learning_rate": 1.0512621064906812e-05, + "loss": 2.1078, + "step": 14796 + }, + { + "epoch": 0.49, + "grad_norm": 0.7497610449790955, + "learning_rate": 1.0511559566531853e-05, + "loss": 2.0912, + "step": 14797 + }, + { + "epoch": 0.49, + "grad_norm": 0.739708662033081, + "learning_rate": 1.0510498062377595e-05, + "loss": 2.1659, + "step": 14798 + }, + { + "epoch": 0.49, + "grad_norm": 0.7608699798583984, + "learning_rate": 1.0509436552456025e-05, + "loss": 1.9546, + "step": 14799 + }, + { + "epoch": 0.49, + "grad_norm": 0.7054779529571533, + "learning_rate": 1.0508375036779142e-05, + "loss": 2.0285, + "step": 14800 + }, + { + "epoch": 0.49, + "grad_norm": 0.732172966003418, + "learning_rate": 1.0507313515358937e-05, + "loss": 2.1021, + "step": 14801 + }, + { + "epoch": 0.49, + "grad_norm": 0.740344762802124, + "learning_rate": 1.05062519882074e-05, + "loss": 2.0766, + "step": 14802 + }, + { + "epoch": 0.49, + "grad_norm": 0.746663510799408, + "learning_rate": 1.0505190455336523e-05, + "loss": 2.0438, + "step": 14803 + }, + { + "epoch": 0.49, + "grad_norm": 0.733219563961029, + "learning_rate": 1.0504128916758306e-05, + "loss": 2.0896, + "step": 14804 + }, + { + "epoch": 0.49, + "grad_norm": 0.7470428943634033, + "learning_rate": 1.050306737248473e-05, + "loss": 2.1443, + "step": 14805 + }, + { + "epoch": 0.49, + "grad_norm": 0.682278037071228, + "learning_rate": 1.0502005822527794e-05, + "loss": 2.077, + "step": 14806 + }, + { + "epoch": 0.49, + "grad_norm": 0.7123978137969971, + "learning_rate": 1.0500944266899494e-05, + "loss": 2.0967, + "step": 14807 + }, + { + "epoch": 0.49, + "grad_norm": 0.7374909520149231, + "learning_rate": 1.0499882705611816e-05, + "loss": 2.0918, + "step": 14808 + }, + { + "epoch": 0.49, + "grad_norm": 0.7331507205963135, + "learning_rate": 1.0498821138676759e-05, + "loss": 2.0707, + "step": 14809 + }, + { + "epoch": 0.49, + "grad_norm": 0.7418730854988098, + "learning_rate": 1.0497759566106311e-05, + "loss": 2.075, + "step": 14810 + }, + { + "epoch": 0.49, + "grad_norm": 0.7236738801002502, + "learning_rate": 1.0496697987912467e-05, + "loss": 2.1086, + "step": 14811 + }, + { + "epoch": 0.49, + "grad_norm": 0.7181369662284851, + "learning_rate": 1.0495636404107222e-05, + "loss": 2.1007, + "step": 14812 + }, + { + "epoch": 0.49, + "grad_norm": 0.7514585852622986, + "learning_rate": 1.0494574814702567e-05, + "loss": 2.0852, + "step": 14813 + }, + { + "epoch": 0.49, + "grad_norm": 0.7502102851867676, + "learning_rate": 1.0493513219710491e-05, + "loss": 2.0993, + "step": 14814 + }, + { + "epoch": 0.49, + "grad_norm": 0.7513480186462402, + "learning_rate": 1.0492451619142996e-05, + "loss": 2.0501, + "step": 14815 + }, + { + "epoch": 0.49, + "grad_norm": 0.746101438999176, + "learning_rate": 1.0491390013012075e-05, + "loss": 2.0503, + "step": 14816 + }, + { + "epoch": 0.49, + "grad_norm": 0.7273973226547241, + "learning_rate": 1.049032840132971e-05, + "loss": 1.9748, + "step": 14817 + }, + { + "epoch": 0.49, + "grad_norm": 0.7350636720657349, + "learning_rate": 1.0489266784107908e-05, + "loss": 2.0512, + "step": 14818 + }, + { + "epoch": 0.49, + "grad_norm": 0.7424909472465515, + "learning_rate": 1.0488205161358653e-05, + "loss": 2.0722, + "step": 14819 + }, + { + "epoch": 0.49, + "grad_norm": 0.7231415510177612, + "learning_rate": 1.0487143533093944e-05, + "loss": 2.0642, + "step": 14820 + }, + { + "epoch": 0.49, + "grad_norm": 0.7148972749710083, + "learning_rate": 1.0486081899325772e-05, + "loss": 2.0375, + "step": 14821 + }, + { + "epoch": 0.49, + "grad_norm": 0.7788691520690918, + "learning_rate": 1.0485020260066132e-05, + "loss": 2.1145, + "step": 14822 + }, + { + "epoch": 0.49, + "grad_norm": 0.7397133111953735, + "learning_rate": 1.0483958615327017e-05, + "loss": 2.0871, + "step": 14823 + }, + { + "epoch": 0.49, + "grad_norm": 0.7532609701156616, + "learning_rate": 1.0482896965120424e-05, + "loss": 2.109, + "step": 14824 + }, + { + "epoch": 0.49, + "grad_norm": 0.7499014735221863, + "learning_rate": 1.048183530945834e-05, + "loss": 2.1293, + "step": 14825 + }, + { + "epoch": 0.49, + "grad_norm": 0.7129559516906738, + "learning_rate": 1.0480773648352764e-05, + "loss": 2.0304, + "step": 14826 + }, + { + "epoch": 0.49, + "grad_norm": 0.7490065097808838, + "learning_rate": 1.047971198181569e-05, + "loss": 2.0992, + "step": 14827 + }, + { + "epoch": 0.49, + "grad_norm": 0.751141369342804, + "learning_rate": 1.047865030985911e-05, + "loss": 2.0212, + "step": 14828 + }, + { + "epoch": 0.49, + "grad_norm": 0.7428528070449829, + "learning_rate": 1.0477588632495021e-05, + "loss": 2.0472, + "step": 14829 + }, + { + "epoch": 0.49, + "grad_norm": 0.7626310586929321, + "learning_rate": 1.0476526949735414e-05, + "loss": 2.0668, + "step": 14830 + }, + { + "epoch": 0.49, + "grad_norm": 0.7268064618110657, + "learning_rate": 1.0475465261592286e-05, + "loss": 2.0644, + "step": 14831 + }, + { + "epoch": 0.49, + "grad_norm": 0.7549756169319153, + "learning_rate": 1.0474403568077629e-05, + "loss": 2.068, + "step": 14832 + }, + { + "epoch": 0.49, + "grad_norm": 0.7479764819145203, + "learning_rate": 1.0473341869203439e-05, + "loss": 2.07, + "step": 14833 + }, + { + "epoch": 0.49, + "grad_norm": 0.7389526963233948, + "learning_rate": 1.0472280164981711e-05, + "loss": 2.0545, + "step": 14834 + }, + { + "epoch": 0.49, + "grad_norm": 0.7164120078086853, + "learning_rate": 1.0471218455424438e-05, + "loss": 2.0708, + "step": 14835 + }, + { + "epoch": 0.49, + "grad_norm": 0.7190315127372742, + "learning_rate": 1.0470156740543613e-05, + "loss": 2.1056, + "step": 14836 + }, + { + "epoch": 0.49, + "grad_norm": 0.7541943192481995, + "learning_rate": 1.0469095020351234e-05, + "loss": 2.1135, + "step": 14837 + }, + { + "epoch": 0.49, + "grad_norm": 0.7683500647544861, + "learning_rate": 1.0468033294859297e-05, + "loss": 2.0597, + "step": 14838 + }, + { + "epoch": 0.49, + "grad_norm": 0.7602309584617615, + "learning_rate": 1.0466971564079791e-05, + "loss": 2.1157, + "step": 14839 + }, + { + "epoch": 0.49, + "grad_norm": 0.7340739369392395, + "learning_rate": 1.0465909828024717e-05, + "loss": 2.0805, + "step": 14840 + }, + { + "epoch": 0.49, + "grad_norm": 0.7412785887718201, + "learning_rate": 1.0464848086706062e-05, + "loss": 2.0509, + "step": 14841 + }, + { + "epoch": 0.49, + "grad_norm": 0.7308230996131897, + "learning_rate": 1.0463786340135829e-05, + "loss": 2.0538, + "step": 14842 + }, + { + "epoch": 0.49, + "grad_norm": 0.729729413986206, + "learning_rate": 1.046272458832601e-05, + "loss": 2.0844, + "step": 14843 + }, + { + "epoch": 0.49, + "grad_norm": 0.7556081414222717, + "learning_rate": 1.0461662831288597e-05, + "loss": 1.9887, + "step": 14844 + }, + { + "epoch": 0.49, + "grad_norm": 0.7030629515647888, + "learning_rate": 1.046060106903559e-05, + "loss": 2.0412, + "step": 14845 + }, + { + "epoch": 0.49, + "grad_norm": 0.7378942966461182, + "learning_rate": 1.0459539301578985e-05, + "loss": 2.065, + "step": 14846 + }, + { + "epoch": 0.49, + "grad_norm": 0.7397748827934265, + "learning_rate": 1.0458477528930768e-05, + "loss": 2.0969, + "step": 14847 + }, + { + "epoch": 0.49, + "grad_norm": 0.72629714012146, + "learning_rate": 1.0457415751102944e-05, + "loss": 1.9918, + "step": 14848 + }, + { + "epoch": 0.49, + "grad_norm": 0.7250366806983948, + "learning_rate": 1.0456353968107505e-05, + "loss": 2.0717, + "step": 14849 + }, + { + "epoch": 0.49, + "grad_norm": 0.7371429800987244, + "learning_rate": 1.0455292179956445e-05, + "loss": 2.1163, + "step": 14850 + }, + { + "epoch": 0.49, + "grad_norm": 0.7485713362693787, + "learning_rate": 1.0454230386661763e-05, + "loss": 2.0558, + "step": 14851 + }, + { + "epoch": 0.49, + "grad_norm": 0.7368359565734863, + "learning_rate": 1.045316858823545e-05, + "loss": 2.0922, + "step": 14852 + }, + { + "epoch": 0.49, + "grad_norm": 0.7014850378036499, + "learning_rate": 1.0452106784689507e-05, + "loss": 2.1043, + "step": 14853 + }, + { + "epoch": 0.49, + "grad_norm": 0.7132440209388733, + "learning_rate": 1.0451044976035922e-05, + "loss": 2.0933, + "step": 14854 + }, + { + "epoch": 0.49, + "grad_norm": 0.7075067162513733, + "learning_rate": 1.0449983162286698e-05, + "loss": 2.1263, + "step": 14855 + }, + { + "epoch": 0.49, + "grad_norm": 0.7589308023452759, + "learning_rate": 1.0448921343453828e-05, + "loss": 2.1144, + "step": 14856 + }, + { + "epoch": 0.49, + "grad_norm": 0.7201362252235413, + "learning_rate": 1.0447859519549307e-05, + "loss": 2.0986, + "step": 14857 + }, + { + "epoch": 0.49, + "grad_norm": 0.7554084658622742, + "learning_rate": 1.0446797690585132e-05, + "loss": 1.9713, + "step": 14858 + }, + { + "epoch": 0.49, + "grad_norm": 0.7545449733734131, + "learning_rate": 1.0445735856573298e-05, + "loss": 2.1074, + "step": 14859 + }, + { + "epoch": 0.49, + "grad_norm": 0.7172949910163879, + "learning_rate": 1.0444674017525802e-05, + "loss": 2.1635, + "step": 14860 + }, + { + "epoch": 0.49, + "grad_norm": 0.7619054317474365, + "learning_rate": 1.0443612173454638e-05, + "loss": 2.1584, + "step": 14861 + }, + { + "epoch": 0.49, + "grad_norm": 0.7586904764175415, + "learning_rate": 1.0442550324371808e-05, + "loss": 2.0511, + "step": 14862 + }, + { + "epoch": 0.49, + "grad_norm": 0.7278960347175598, + "learning_rate": 1.0441488470289298e-05, + "loss": 2.0518, + "step": 14863 + }, + { + "epoch": 0.49, + "grad_norm": 0.7224172353744507, + "learning_rate": 1.0440426611219114e-05, + "loss": 2.0955, + "step": 14864 + }, + { + "epoch": 0.49, + "grad_norm": 0.7188601493835449, + "learning_rate": 1.0439364747173248e-05, + "loss": 2.1007, + "step": 14865 + }, + { + "epoch": 0.49, + "grad_norm": 0.7485142350196838, + "learning_rate": 1.0438302878163695e-05, + "loss": 2.0162, + "step": 14866 + }, + { + "epoch": 0.49, + "grad_norm": 0.7297865152359009, + "learning_rate": 1.0437241004202453e-05, + "loss": 2.0578, + "step": 14867 + }, + { + "epoch": 0.49, + "grad_norm": 0.7471247315406799, + "learning_rate": 1.043617912530152e-05, + "loss": 2.0313, + "step": 14868 + }, + { + "epoch": 0.49, + "grad_norm": 0.7283530235290527, + "learning_rate": 1.043511724147289e-05, + "loss": 2.0541, + "step": 14869 + }, + { + "epoch": 0.49, + "grad_norm": 0.7534549236297607, + "learning_rate": 1.043405535272856e-05, + "loss": 2.1377, + "step": 14870 + }, + { + "epoch": 0.49, + "grad_norm": 0.7159428000450134, + "learning_rate": 1.0432993459080527e-05, + "loss": 2.1281, + "step": 14871 + }, + { + "epoch": 0.49, + "grad_norm": 0.7389901280403137, + "learning_rate": 1.043193156054079e-05, + "loss": 2.1387, + "step": 14872 + }, + { + "epoch": 0.49, + "grad_norm": 0.7569697499275208, + "learning_rate": 1.043086965712134e-05, + "loss": 2.0704, + "step": 14873 + }, + { + "epoch": 0.49, + "grad_norm": 0.7161557078361511, + "learning_rate": 1.0429807748834177e-05, + "loss": 2.0791, + "step": 14874 + }, + { + "epoch": 0.49, + "grad_norm": 0.7219780087471008, + "learning_rate": 1.0428745835691304e-05, + "loss": 2.0284, + "step": 14875 + }, + { + "epoch": 0.49, + "grad_norm": 0.7340202927589417, + "learning_rate": 1.0427683917704704e-05, + "loss": 2.0737, + "step": 14876 + }, + { + "epoch": 0.49, + "grad_norm": 0.7395620942115784, + "learning_rate": 1.0426621994886385e-05, + "loss": 2.076, + "step": 14877 + }, + { + "epoch": 0.49, + "grad_norm": 0.7204892039299011, + "learning_rate": 1.0425560067248342e-05, + "loss": 2.0223, + "step": 14878 + }, + { + "epoch": 0.5, + "grad_norm": 0.7494797706604004, + "learning_rate": 1.042449813480257e-05, + "loss": 2.1426, + "step": 14879 + }, + { + "epoch": 0.5, + "grad_norm": 0.7590444684028625, + "learning_rate": 1.0423436197561066e-05, + "loss": 2.1084, + "step": 14880 + }, + { + "epoch": 0.5, + "grad_norm": 0.7462781667709351, + "learning_rate": 1.0422374255535828e-05, + "loss": 2.0546, + "step": 14881 + }, + { + "epoch": 0.5, + "grad_norm": 0.7948204874992371, + "learning_rate": 1.0421312308738853e-05, + "loss": 2.1134, + "step": 14882 + }, + { + "epoch": 0.5, + "grad_norm": 0.7597406506538391, + "learning_rate": 1.042025035718214e-05, + "loss": 2.0733, + "step": 14883 + }, + { + "epoch": 0.5, + "grad_norm": 0.7587404251098633, + "learning_rate": 1.0419188400877684e-05, + "loss": 2.0537, + "step": 14884 + }, + { + "epoch": 0.5, + "grad_norm": 0.7504622340202332, + "learning_rate": 1.0418126439837481e-05, + "loss": 2.0388, + "step": 14885 + }, + { + "epoch": 0.5, + "grad_norm": 0.749716579914093, + "learning_rate": 1.0417064474073535e-05, + "loss": 2.1063, + "step": 14886 + }, + { + "epoch": 0.5, + "grad_norm": 0.7219901084899902, + "learning_rate": 1.0416002503597835e-05, + "loss": 2.0664, + "step": 14887 + }, + { + "epoch": 0.5, + "grad_norm": 0.7423126697540283, + "learning_rate": 1.0414940528422384e-05, + "loss": 2.0935, + "step": 14888 + }, + { + "epoch": 0.5, + "grad_norm": 0.7399823069572449, + "learning_rate": 1.0413878548559179e-05, + "loss": 2.0848, + "step": 14889 + }, + { + "epoch": 0.5, + "grad_norm": 0.7641533613204956, + "learning_rate": 1.0412816564020215e-05, + "loss": 2.0497, + "step": 14890 + }, + { + "epoch": 0.5, + "grad_norm": 0.7240563631057739, + "learning_rate": 1.0411754574817492e-05, + "loss": 2.0497, + "step": 14891 + }, + { + "epoch": 0.5, + "grad_norm": 0.748987078666687, + "learning_rate": 1.0410692580963007e-05, + "loss": 2.0936, + "step": 14892 + }, + { + "epoch": 0.5, + "grad_norm": 0.7537991404533386, + "learning_rate": 1.0409630582468759e-05, + "loss": 2.0376, + "step": 14893 + }, + { + "epoch": 0.5, + "grad_norm": 0.753716766834259, + "learning_rate": 1.0408568579346742e-05, + "loss": 2.0471, + "step": 14894 + }, + { + "epoch": 0.5, + "grad_norm": 0.7071313261985779, + "learning_rate": 1.0407506571608961e-05, + "loss": 2.0454, + "step": 14895 + }, + { + "epoch": 0.5, + "grad_norm": 0.7359317541122437, + "learning_rate": 1.0406444559267406e-05, + "loss": 2.1734, + "step": 14896 + }, + { + "epoch": 0.5, + "grad_norm": 0.7349830865859985, + "learning_rate": 1.040538254233408e-05, + "loss": 2.0682, + "step": 14897 + }, + { + "epoch": 0.5, + "grad_norm": 0.7482216358184814, + "learning_rate": 1.040432052082098e-05, + "loss": 2.1106, + "step": 14898 + }, + { + "epoch": 0.5, + "grad_norm": 0.7701519131660461, + "learning_rate": 1.04032584947401e-05, + "loss": 2.1622, + "step": 14899 + }, + { + "epoch": 0.5, + "grad_norm": 0.7353580594062805, + "learning_rate": 1.0402196464103449e-05, + "loss": 2.0583, + "step": 14900 + }, + { + "epoch": 0.5, + "grad_norm": 0.7098243236541748, + "learning_rate": 1.0401134428923013e-05, + "loss": 2.1072, + "step": 14901 + }, + { + "epoch": 0.5, + "grad_norm": 0.7634531855583191, + "learning_rate": 1.0400072389210796e-05, + "loss": 2.1195, + "step": 14902 + }, + { + "epoch": 0.5, + "grad_norm": 0.7568991184234619, + "learning_rate": 1.0399010344978795e-05, + "loss": 2.0333, + "step": 14903 + }, + { + "epoch": 0.5, + "grad_norm": 0.7366955280303955, + "learning_rate": 1.0397948296239011e-05, + "loss": 2.0631, + "step": 14904 + }, + { + "epoch": 0.5, + "grad_norm": 0.7325473427772522, + "learning_rate": 1.039688624300344e-05, + "loss": 2.1169, + "step": 14905 + }, + { + "epoch": 0.5, + "grad_norm": 0.7596264481544495, + "learning_rate": 1.039582418528408e-05, + "loss": 2.1066, + "step": 14906 + }, + { + "epoch": 0.5, + "grad_norm": 0.7336369752883911, + "learning_rate": 1.0394762123092927e-05, + "loss": 2.001, + "step": 14907 + }, + { + "epoch": 0.5, + "grad_norm": 0.7136302590370178, + "learning_rate": 1.0393700056441988e-05, + "loss": 1.9942, + "step": 14908 + }, + { + "epoch": 0.5, + "grad_norm": 0.7119197845458984, + "learning_rate": 1.0392637985343257e-05, + "loss": 2.1342, + "step": 14909 + }, + { + "epoch": 0.5, + "grad_norm": 0.7322482466697693, + "learning_rate": 1.0391575909808726e-05, + "loss": 2.0638, + "step": 14910 + }, + { + "epoch": 0.5, + "grad_norm": 0.7553600668907166, + "learning_rate": 1.0390513829850407e-05, + "loss": 2.0723, + "step": 14911 + }, + { + "epoch": 0.5, + "grad_norm": 0.7396270632743835, + "learning_rate": 1.0389451745480287e-05, + "loss": 2.0935, + "step": 14912 + }, + { + "epoch": 0.5, + "grad_norm": 0.7652947902679443, + "learning_rate": 1.0388389656710372e-05, + "loss": 2.0409, + "step": 14913 + }, + { + "epoch": 0.5, + "grad_norm": 0.7241201996803284, + "learning_rate": 1.0387327563552657e-05, + "loss": 1.9566, + "step": 14914 + }, + { + "epoch": 0.5, + "grad_norm": 0.7488453984260559, + "learning_rate": 1.038626546601914e-05, + "loss": 2.1189, + "step": 14915 + }, + { + "epoch": 0.5, + "grad_norm": 0.7362661957740784, + "learning_rate": 1.0385203364121825e-05, + "loss": 2.1046, + "step": 14916 + }, + { + "epoch": 0.5, + "grad_norm": 0.760785698890686, + "learning_rate": 1.0384141257872711e-05, + "loss": 2.0809, + "step": 14917 + }, + { + "epoch": 0.5, + "grad_norm": 0.7182925343513489, + "learning_rate": 1.0383079147283788e-05, + "loss": 2.0664, + "step": 14918 + }, + { + "epoch": 0.5, + "grad_norm": 0.8115835785865784, + "learning_rate": 1.0382017032367065e-05, + "loss": 2.1025, + "step": 14919 + }, + { + "epoch": 0.5, + "grad_norm": 0.7542586326599121, + "learning_rate": 1.0380954913134535e-05, + "loss": 2.061, + "step": 14920 + }, + { + "epoch": 0.5, + "grad_norm": 0.734032392501831, + "learning_rate": 1.0379892789598201e-05, + "loss": 2.1074, + "step": 14921 + }, + { + "epoch": 0.5, + "grad_norm": 0.7570006251335144, + "learning_rate": 1.0378830661770064e-05, + "loss": 2.1267, + "step": 14922 + }, + { + "epoch": 0.5, + "grad_norm": 0.7115615606307983, + "learning_rate": 1.0377768529662116e-05, + "loss": 2.005, + "step": 14923 + }, + { + "epoch": 0.5, + "grad_norm": 0.7267482280731201, + "learning_rate": 1.037670639328636e-05, + "loss": 2.1589, + "step": 14924 + }, + { + "epoch": 0.5, + "grad_norm": 0.7609515190124512, + "learning_rate": 1.0375644252654797e-05, + "loss": 2.0504, + "step": 14925 + }, + { + "epoch": 0.5, + "grad_norm": 0.7393014430999756, + "learning_rate": 1.0374582107779428e-05, + "loss": 2.0218, + "step": 14926 + }, + { + "epoch": 0.5, + "grad_norm": 0.7563897967338562, + "learning_rate": 1.0373519958672247e-05, + "loss": 2.0477, + "step": 14927 + }, + { + "epoch": 0.5, + "grad_norm": 0.7553216814994812, + "learning_rate": 1.037245780534526e-05, + "loss": 2.0472, + "step": 14928 + }, + { + "epoch": 0.5, + "grad_norm": 0.7543018460273743, + "learning_rate": 1.0371395647810458e-05, + "loss": 2.0794, + "step": 14929 + }, + { + "epoch": 0.5, + "grad_norm": 0.7642726898193359, + "learning_rate": 1.0370333486079847e-05, + "loss": 2.0482, + "step": 14930 + }, + { + "epoch": 0.5, + "grad_norm": 0.7337827682495117, + "learning_rate": 1.0369271320165428e-05, + "loss": 2.0773, + "step": 14931 + }, + { + "epoch": 0.5, + "grad_norm": 0.7327796220779419, + "learning_rate": 1.0368209150079193e-05, + "loss": 2.0768, + "step": 14932 + }, + { + "epoch": 0.5, + "grad_norm": 0.7250407934188843, + "learning_rate": 1.0367146975833154e-05, + "loss": 2.0812, + "step": 14933 + }, + { + "epoch": 0.5, + "grad_norm": 0.7569813132286072, + "learning_rate": 1.0366084797439297e-05, + "loss": 2.0398, + "step": 14934 + }, + { + "epoch": 0.5, + "grad_norm": 0.7283141016960144, + "learning_rate": 1.0365022614909635e-05, + "loss": 2.0673, + "step": 14935 + }, + { + "epoch": 0.5, + "grad_norm": 0.7430052757263184, + "learning_rate": 1.0363960428256157e-05, + "loss": 2.1804, + "step": 14936 + }, + { + "epoch": 0.5, + "grad_norm": 0.7416808009147644, + "learning_rate": 1.0362898237490869e-05, + "loss": 2.0851, + "step": 14937 + }, + { + "epoch": 0.5, + "grad_norm": 0.7201119065284729, + "learning_rate": 1.0361836042625766e-05, + "loss": 1.9295, + "step": 14938 + }, + { + "epoch": 0.5, + "grad_norm": 0.7499935626983643, + "learning_rate": 1.0360773843672856e-05, + "loss": 2.0678, + "step": 14939 + }, + { + "epoch": 0.5, + "grad_norm": 0.7393625378608704, + "learning_rate": 1.035971164064413e-05, + "loss": 2.0804, + "step": 14940 + }, + { + "epoch": 0.5, + "grad_norm": 0.7280915975570679, + "learning_rate": 1.0358649433551595e-05, + "loss": 2.1508, + "step": 14941 + }, + { + "epoch": 0.5, + "grad_norm": 0.741487443447113, + "learning_rate": 1.035758722240725e-05, + "loss": 2.1156, + "step": 14942 + }, + { + "epoch": 0.5, + "grad_norm": 0.7153503894805908, + "learning_rate": 1.0356525007223092e-05, + "loss": 2.1264, + "step": 14943 + }, + { + "epoch": 0.5, + "grad_norm": 0.7580947875976562, + "learning_rate": 1.0355462788011128e-05, + "loss": 2.0357, + "step": 14944 + }, + { + "epoch": 0.5, + "grad_norm": 0.7380011081695557, + "learning_rate": 1.0354400564783347e-05, + "loss": 2.0876, + "step": 14945 + }, + { + "epoch": 0.5, + "grad_norm": 0.73245769739151, + "learning_rate": 1.035333833755176e-05, + "loss": 2.0588, + "step": 14946 + }, + { + "epoch": 0.5, + "grad_norm": 0.7557364106178284, + "learning_rate": 1.0352276106328365e-05, + "loss": 2.0769, + "step": 14947 + }, + { + "epoch": 0.5, + "grad_norm": 0.7831631302833557, + "learning_rate": 1.0351213871125159e-05, + "loss": 2.043, + "step": 14948 + }, + { + "epoch": 0.5, + "grad_norm": 0.7449649572372437, + "learning_rate": 1.0350151631954144e-05, + "loss": 2.1206, + "step": 14949 + }, + { + "epoch": 0.5, + "grad_norm": 0.7104489803314209, + "learning_rate": 1.034908938882732e-05, + "loss": 1.9836, + "step": 14950 + }, + { + "epoch": 0.5, + "grad_norm": 0.7336370348930359, + "learning_rate": 1.0348027141756692e-05, + "loss": 2.1277, + "step": 14951 + }, + { + "epoch": 0.5, + "grad_norm": 0.723856508731842, + "learning_rate": 1.0346964890754255e-05, + "loss": 2.0523, + "step": 14952 + }, + { + "epoch": 0.5, + "grad_norm": 0.7445895075798035, + "learning_rate": 1.0345902635832013e-05, + "loss": 2.1037, + "step": 14953 + }, + { + "epoch": 0.5, + "grad_norm": 0.7457610368728638, + "learning_rate": 1.0344840377001963e-05, + "loss": 2.052, + "step": 14954 + }, + { + "epoch": 0.5, + "grad_norm": 0.7634707093238831, + "learning_rate": 1.0343778114276116e-05, + "loss": 2.0986, + "step": 14955 + }, + { + "epoch": 0.5, + "grad_norm": 0.7595989108085632, + "learning_rate": 1.0342715847666456e-05, + "loss": 2.1089, + "step": 14956 + }, + { + "epoch": 0.5, + "grad_norm": 0.7370497584342957, + "learning_rate": 1.0341653577185e-05, + "loss": 2.1221, + "step": 14957 + }, + { + "epoch": 0.5, + "grad_norm": 0.7734804749488831, + "learning_rate": 1.034059130284374e-05, + "loss": 2.0986, + "step": 14958 + }, + { + "epoch": 0.5, + "grad_norm": 0.7678103446960449, + "learning_rate": 1.0339529024654677e-05, + "loss": 2.0241, + "step": 14959 + }, + { + "epoch": 0.5, + "grad_norm": 0.7381793856620789, + "learning_rate": 1.0338466742629816e-05, + "loss": 1.9941, + "step": 14960 + }, + { + "epoch": 0.5, + "grad_norm": 0.7295925617218018, + "learning_rate": 1.0337404456781155e-05, + "loss": 2.0989, + "step": 14961 + }, + { + "epoch": 0.5, + "grad_norm": 0.7533796429634094, + "learning_rate": 1.0336342167120696e-05, + "loss": 2.1011, + "step": 14962 + }, + { + "epoch": 0.5, + "grad_norm": 0.7278109788894653, + "learning_rate": 1.0335279873660443e-05, + "loss": 2.0117, + "step": 14963 + }, + { + "epoch": 0.5, + "grad_norm": 0.7220146059989929, + "learning_rate": 1.033421757641239e-05, + "loss": 2.0747, + "step": 14964 + }, + { + "epoch": 0.5, + "grad_norm": 0.7226706147193909, + "learning_rate": 1.0333155275388546e-05, + "loss": 2.0671, + "step": 14965 + }, + { + "epoch": 0.5, + "grad_norm": 0.7375779151916504, + "learning_rate": 1.0332092970600911e-05, + "loss": 2.0097, + "step": 14966 + }, + { + "epoch": 0.5, + "grad_norm": 0.7444968819618225, + "learning_rate": 1.0331030662061479e-05, + "loss": 2.1171, + "step": 14967 + }, + { + "epoch": 0.5, + "grad_norm": 0.73470139503479, + "learning_rate": 1.0329968349782263e-05, + "loss": 2.1067, + "step": 14968 + }, + { + "epoch": 0.5, + "grad_norm": 0.7416480183601379, + "learning_rate": 1.0328906033775252e-05, + "loss": 2.0331, + "step": 14969 + }, + { + "epoch": 0.5, + "grad_norm": 0.7530362010002136, + "learning_rate": 1.0327843714052456e-05, + "loss": 2.0737, + "step": 14970 + }, + { + "epoch": 0.5, + "grad_norm": 0.7495905756950378, + "learning_rate": 1.0326781390625873e-05, + "loss": 2.0987, + "step": 14971 + }, + { + "epoch": 0.5, + "grad_norm": 0.7729963064193726, + "learning_rate": 1.0325719063507507e-05, + "loss": 2.1022, + "step": 14972 + }, + { + "epoch": 0.5, + "grad_norm": 0.77906733751297, + "learning_rate": 1.0324656732709355e-05, + "loss": 2.1234, + "step": 14973 + }, + { + "epoch": 0.5, + "grad_norm": 0.7431879043579102, + "learning_rate": 1.0323594398243424e-05, + "loss": 2.0721, + "step": 14974 + }, + { + "epoch": 0.5, + "grad_norm": 0.78221595287323, + "learning_rate": 1.0322532060121713e-05, + "loss": 2.0817, + "step": 14975 + }, + { + "epoch": 0.5, + "grad_norm": 0.7753394842147827, + "learning_rate": 1.0321469718356221e-05, + "loss": 2.0719, + "step": 14976 + }, + { + "epoch": 0.5, + "grad_norm": 0.7251288294792175, + "learning_rate": 1.0320407372958959e-05, + "loss": 2.128, + "step": 14977 + }, + { + "epoch": 0.5, + "grad_norm": 0.73030686378479, + "learning_rate": 1.0319345023941914e-05, + "loss": 2.0274, + "step": 14978 + }, + { + "epoch": 0.5, + "grad_norm": 0.7386972308158875, + "learning_rate": 1.03182826713171e-05, + "loss": 2.0903, + "step": 14979 + }, + { + "epoch": 0.5, + "grad_norm": 0.7418166399002075, + "learning_rate": 1.0317220315096517e-05, + "loss": 2.0638, + "step": 14980 + }, + { + "epoch": 0.5, + "grad_norm": 0.7402293682098389, + "learning_rate": 1.0316157955292162e-05, + "loss": 2.0901, + "step": 14981 + }, + { + "epoch": 0.5, + "grad_norm": 0.7058587074279785, + "learning_rate": 1.031509559191604e-05, + "loss": 2.068, + "step": 14982 + }, + { + "epoch": 0.5, + "grad_norm": 0.7270570397377014, + "learning_rate": 1.0314033224980154e-05, + "loss": 2.0533, + "step": 14983 + }, + { + "epoch": 0.5, + "grad_norm": 0.743494987487793, + "learning_rate": 1.0312970854496502e-05, + "loss": 2.0321, + "step": 14984 + }, + { + "epoch": 0.5, + "grad_norm": 0.7592753171920776, + "learning_rate": 1.031190848047709e-05, + "loss": 2.016, + "step": 14985 + }, + { + "epoch": 0.5, + "grad_norm": 0.772149384021759, + "learning_rate": 1.0310846102933921e-05, + "loss": 2.1343, + "step": 14986 + }, + { + "epoch": 0.5, + "grad_norm": 0.7260019183158875, + "learning_rate": 1.0309783721878992e-05, + "loss": 2.0509, + "step": 14987 + }, + { + "epoch": 0.5, + "grad_norm": 0.7711973786354065, + "learning_rate": 1.0308721337324313e-05, + "loss": 2.1454, + "step": 14988 + }, + { + "epoch": 0.5, + "grad_norm": 0.7398608922958374, + "learning_rate": 1.0307658949281874e-05, + "loss": 2.1267, + "step": 14989 + }, + { + "epoch": 0.5, + "grad_norm": 0.7645648717880249, + "learning_rate": 1.0306596557763689e-05, + "loss": 2.074, + "step": 14990 + }, + { + "epoch": 0.5, + "grad_norm": 0.7492560148239136, + "learning_rate": 1.0305534162781755e-05, + "loss": 2.0244, + "step": 14991 + }, + { + "epoch": 0.5, + "grad_norm": 0.7266944646835327, + "learning_rate": 1.0304471764348071e-05, + "loss": 2.1695, + "step": 14992 + }, + { + "epoch": 0.5, + "grad_norm": 0.7242462635040283, + "learning_rate": 1.030340936247465e-05, + "loss": 2.0263, + "step": 14993 + }, + { + "epoch": 0.5, + "grad_norm": 0.7420892119407654, + "learning_rate": 1.0302346957173485e-05, + "loss": 2.054, + "step": 14994 + }, + { + "epoch": 0.5, + "grad_norm": 0.749206006526947, + "learning_rate": 1.0301284548456583e-05, + "loss": 2.0831, + "step": 14995 + }, + { + "epoch": 0.5, + "grad_norm": 0.7119378447532654, + "learning_rate": 1.0300222136335942e-05, + "loss": 2.0203, + "step": 14996 + }, + { + "epoch": 0.5, + "grad_norm": 0.7122659683227539, + "learning_rate": 1.0299159720823568e-05, + "loss": 2.118, + "step": 14997 + }, + { + "epoch": 0.5, + "grad_norm": 0.7372711896896362, + "learning_rate": 1.0298097301931463e-05, + "loss": 2.0184, + "step": 14998 + }, + { + "epoch": 0.5, + "grad_norm": 0.7319211959838867, + "learning_rate": 1.0297034879671632e-05, + "loss": 2.1168, + "step": 14999 + }, + { + "epoch": 0.5, + "grad_norm": 0.7794976830482483, + "learning_rate": 1.029597245405607e-05, + "loss": 2.1424, + "step": 15000 + }, + { + "epoch": 0.5, + "grad_norm": 0.7259954810142517, + "learning_rate": 1.029491002509679e-05, + "loss": 2.1461, + "step": 15001 + }, + { + "epoch": 0.5, + "grad_norm": 0.7392929792404175, + "learning_rate": 1.0293847592805786e-05, + "loss": 2.0404, + "step": 15002 + }, + { + "epoch": 0.5, + "grad_norm": 0.7564783692359924, + "learning_rate": 1.0292785157195063e-05, + "loss": 2.1237, + "step": 15003 + }, + { + "epoch": 0.5, + "grad_norm": 0.7409192323684692, + "learning_rate": 1.0291722718276626e-05, + "loss": 2.0812, + "step": 15004 + }, + { + "epoch": 0.5, + "grad_norm": 0.7153927087783813, + "learning_rate": 1.0290660276062478e-05, + "loss": 2.1209, + "step": 15005 + }, + { + "epoch": 0.5, + "grad_norm": 0.7136696577072144, + "learning_rate": 1.028959783056462e-05, + "loss": 1.9567, + "step": 15006 + }, + { + "epoch": 0.5, + "grad_norm": 0.7844505906105042, + "learning_rate": 1.0288535381795055e-05, + "loss": 2.0603, + "step": 15007 + }, + { + "epoch": 0.5, + "grad_norm": 0.7610125541687012, + "learning_rate": 1.0287472929765787e-05, + "loss": 2.0848, + "step": 15008 + }, + { + "epoch": 0.5, + "grad_norm": 0.7249979376792908, + "learning_rate": 1.0286410474488817e-05, + "loss": 2.1011, + "step": 15009 + }, + { + "epoch": 0.5, + "grad_norm": 0.7571722865104675, + "learning_rate": 1.0285348015976154e-05, + "loss": 2.0825, + "step": 15010 + }, + { + "epoch": 0.5, + "grad_norm": 0.7320692539215088, + "learning_rate": 1.0284285554239788e-05, + "loss": 2.0592, + "step": 15011 + }, + { + "epoch": 0.5, + "grad_norm": 0.735034167766571, + "learning_rate": 1.0283223089291738e-05, + "loss": 2.0401, + "step": 15012 + }, + { + "epoch": 0.5, + "grad_norm": 0.7413105368614197, + "learning_rate": 1.0282160621143995e-05, + "loss": 2.0998, + "step": 15013 + }, + { + "epoch": 0.5, + "grad_norm": 0.7590155005455017, + "learning_rate": 1.0281098149808566e-05, + "loss": 2.0922, + "step": 15014 + }, + { + "epoch": 0.5, + "grad_norm": 0.7385783195495605, + "learning_rate": 1.028003567529746e-05, + "loss": 2.1231, + "step": 15015 + }, + { + "epoch": 0.5, + "grad_norm": 0.7663057446479797, + "learning_rate": 1.0278973197622672e-05, + "loss": 2.12, + "step": 15016 + }, + { + "epoch": 0.5, + "grad_norm": 0.7794922590255737, + "learning_rate": 1.0277910716796208e-05, + "loss": 2.0075, + "step": 15017 + }, + { + "epoch": 0.5, + "grad_norm": 0.8342970609664917, + "learning_rate": 1.0276848232830073e-05, + "loss": 2.0844, + "step": 15018 + }, + { + "epoch": 0.5, + "grad_norm": 0.730302095413208, + "learning_rate": 1.0275785745736267e-05, + "loss": 2.0895, + "step": 15019 + }, + { + "epoch": 0.5, + "grad_norm": 0.7153183817863464, + "learning_rate": 1.0274723255526795e-05, + "loss": 1.9688, + "step": 15020 + }, + { + "epoch": 0.5, + "grad_norm": 0.7469394207000732, + "learning_rate": 1.0273660762213663e-05, + "loss": 2.0676, + "step": 15021 + }, + { + "epoch": 0.5, + "grad_norm": 0.7591396570205688, + "learning_rate": 1.0272598265808871e-05, + "loss": 2.0814, + "step": 15022 + }, + { + "epoch": 0.5, + "grad_norm": 0.789517343044281, + "learning_rate": 1.0271535766324425e-05, + "loss": 2.0854, + "step": 15023 + }, + { + "epoch": 0.5, + "grad_norm": 0.7499868869781494, + "learning_rate": 1.0270473263772325e-05, + "loss": 2.1533, + "step": 15024 + }, + { + "epoch": 0.5, + "grad_norm": 0.7245014309883118, + "learning_rate": 1.0269410758164576e-05, + "loss": 2.0434, + "step": 15025 + }, + { + "epoch": 0.5, + "grad_norm": 0.7627546787261963, + "learning_rate": 1.0268348249513185e-05, + "loss": 2.0336, + "step": 15026 + }, + { + "epoch": 0.5, + "grad_norm": 0.7713609337806702, + "learning_rate": 1.0267285737830151e-05, + "loss": 2.0851, + "step": 15027 + }, + { + "epoch": 0.5, + "grad_norm": 0.7164605855941772, + "learning_rate": 1.0266223223127479e-05, + "loss": 2.0451, + "step": 15028 + }, + { + "epoch": 0.5, + "grad_norm": 0.7042728662490845, + "learning_rate": 1.0265160705417172e-05, + "loss": 2.0453, + "step": 15029 + }, + { + "epoch": 0.5, + "grad_norm": 0.7053623795509338, + "learning_rate": 1.0264098184711235e-05, + "loss": 2.1062, + "step": 15030 + }, + { + "epoch": 0.5, + "grad_norm": 0.7514421343803406, + "learning_rate": 1.0263035661021673e-05, + "loss": 2.0617, + "step": 15031 + }, + { + "epoch": 0.5, + "grad_norm": 0.6990146040916443, + "learning_rate": 1.026197313436049e-05, + "loss": 2.0726, + "step": 15032 + }, + { + "epoch": 0.5, + "grad_norm": 0.7161879539489746, + "learning_rate": 1.0260910604739685e-05, + "loss": 2.0757, + "step": 15033 + }, + { + "epoch": 0.5, + "grad_norm": 0.7399678230285645, + "learning_rate": 1.0259848072171265e-05, + "loss": 2.0967, + "step": 15034 + }, + { + "epoch": 0.5, + "grad_norm": 0.714238703250885, + "learning_rate": 1.0258785536667237e-05, + "loss": 2.0928, + "step": 15035 + }, + { + "epoch": 0.5, + "grad_norm": 0.7681207656860352, + "learning_rate": 1.0257722998239596e-05, + "loss": 2.002, + "step": 15036 + }, + { + "epoch": 0.5, + "grad_norm": 0.7067738771438599, + "learning_rate": 1.0256660456900358e-05, + "loss": 2.1206, + "step": 15037 + }, + { + "epoch": 0.5, + "grad_norm": 0.7332746982574463, + "learning_rate": 1.0255597912661515e-05, + "loss": 1.9893, + "step": 15038 + }, + { + "epoch": 0.5, + "grad_norm": 0.7434923648834229, + "learning_rate": 1.0254535365535082e-05, + "loss": 2.0891, + "step": 15039 + }, + { + "epoch": 0.5, + "grad_norm": 0.7306588292121887, + "learning_rate": 1.0253472815533052e-05, + "loss": 2.064, + "step": 15040 + }, + { + "epoch": 0.5, + "grad_norm": 0.6879561543464661, + "learning_rate": 1.0252410262667439e-05, + "loss": 2.0323, + "step": 15041 + }, + { + "epoch": 0.5, + "grad_norm": 0.7288201451301575, + "learning_rate": 1.025134770695024e-05, + "loss": 2.0726, + "step": 15042 + }, + { + "epoch": 0.5, + "grad_norm": 0.7382511496543884, + "learning_rate": 1.0250285148393464e-05, + "loss": 2.0406, + "step": 15043 + }, + { + "epoch": 0.5, + "grad_norm": 0.734129786491394, + "learning_rate": 1.0249222587009111e-05, + "loss": 1.9874, + "step": 15044 + }, + { + "epoch": 0.5, + "grad_norm": 0.7282660603523254, + "learning_rate": 1.0248160022809188e-05, + "loss": 2.1071, + "step": 15045 + }, + { + "epoch": 0.5, + "grad_norm": 0.742565929889679, + "learning_rate": 1.0247097455805699e-05, + "loss": 2.1104, + "step": 15046 + }, + { + "epoch": 0.5, + "grad_norm": 0.7477833032608032, + "learning_rate": 1.0246034886010647e-05, + "loss": 2.0561, + "step": 15047 + }, + { + "epoch": 0.5, + "grad_norm": 0.7254829406738281, + "learning_rate": 1.0244972313436039e-05, + "loss": 2.03, + "step": 15048 + }, + { + "epoch": 0.5, + "grad_norm": 0.7322990894317627, + "learning_rate": 1.0243909738093876e-05, + "loss": 2.0664, + "step": 15049 + }, + { + "epoch": 0.5, + "grad_norm": 0.7462686896324158, + "learning_rate": 1.0242847159996165e-05, + "loss": 2.0078, + "step": 15050 + }, + { + "epoch": 0.5, + "grad_norm": 0.7335122227668762, + "learning_rate": 1.0241784579154907e-05, + "loss": 2.1686, + "step": 15051 + }, + { + "epoch": 0.5, + "grad_norm": 0.7716194987297058, + "learning_rate": 1.024072199558211e-05, + "loss": 2.059, + "step": 15052 + }, + { + "epoch": 0.5, + "grad_norm": 0.7603804469108582, + "learning_rate": 1.0239659409289775e-05, + "loss": 2.0512, + "step": 15053 + }, + { + "epoch": 0.5, + "grad_norm": 0.7174856662750244, + "learning_rate": 1.023859682028991e-05, + "loss": 2.0278, + "step": 15054 + }, + { + "epoch": 0.5, + "grad_norm": 0.7241097688674927, + "learning_rate": 1.0237534228594519e-05, + "loss": 2.0836, + "step": 15055 + }, + { + "epoch": 0.5, + "grad_norm": 0.7563875317573547, + "learning_rate": 1.0236471634215604e-05, + "loss": 2.0814, + "step": 15056 + }, + { + "epoch": 0.5, + "grad_norm": 0.7260016202926636, + "learning_rate": 1.023540903716517e-05, + "loss": 2.0738, + "step": 15057 + }, + { + "epoch": 0.5, + "grad_norm": 0.7824963331222534, + "learning_rate": 1.0234346437455225e-05, + "loss": 2.0687, + "step": 15058 + }, + { + "epoch": 0.5, + "grad_norm": 0.7510564923286438, + "learning_rate": 1.0233283835097771e-05, + "loss": 2.0284, + "step": 15059 + }, + { + "epoch": 0.5, + "grad_norm": 0.751591682434082, + "learning_rate": 1.023222123010481e-05, + "loss": 2.0995, + "step": 15060 + }, + { + "epoch": 0.5, + "grad_norm": 0.7454843521118164, + "learning_rate": 1.0231158622488355e-05, + "loss": 2.0902, + "step": 15061 + }, + { + "epoch": 0.5, + "grad_norm": 0.7171697616577148, + "learning_rate": 1.0230096012260402e-05, + "loss": 2.0762, + "step": 15062 + }, + { + "epoch": 0.5, + "grad_norm": 0.7329590916633606, + "learning_rate": 1.0229033399432959e-05, + "loss": 2.046, + "step": 15063 + }, + { + "epoch": 0.5, + "grad_norm": 0.7426868677139282, + "learning_rate": 1.0227970784018032e-05, + "loss": 2.1133, + "step": 15064 + }, + { + "epoch": 0.5, + "grad_norm": 0.7365871071815491, + "learning_rate": 1.0226908166027623e-05, + "loss": 2.0647, + "step": 15065 + }, + { + "epoch": 0.5, + "grad_norm": 0.7472206950187683, + "learning_rate": 1.0225845545473739e-05, + "loss": 2.0433, + "step": 15066 + }, + { + "epoch": 0.5, + "grad_norm": 0.7443026900291443, + "learning_rate": 1.0224782922368384e-05, + "loss": 2.098, + "step": 15067 + }, + { + "epoch": 0.5, + "grad_norm": 0.7591605186462402, + "learning_rate": 1.0223720296723564e-05, + "loss": 2.1284, + "step": 15068 + }, + { + "epoch": 0.5, + "grad_norm": 0.7183951735496521, + "learning_rate": 1.0222657668551284e-05, + "loss": 2.104, + "step": 15069 + }, + { + "epoch": 0.5, + "grad_norm": 0.7175899147987366, + "learning_rate": 1.022159503786355e-05, + "loss": 2.0853, + "step": 15070 + }, + { + "epoch": 0.5, + "grad_norm": 0.7538208365440369, + "learning_rate": 1.0220532404672358e-05, + "loss": 2.053, + "step": 15071 + }, + { + "epoch": 0.5, + "grad_norm": 0.7765169739723206, + "learning_rate": 1.0219469768989726e-05, + "loss": 2.0836, + "step": 15072 + }, + { + "epoch": 0.5, + "grad_norm": 0.7378482222557068, + "learning_rate": 1.0218407130827655e-05, + "loss": 2.0727, + "step": 15073 + }, + { + "epoch": 0.5, + "grad_norm": 0.7468685507774353, + "learning_rate": 1.0217344490198143e-05, + "loss": 2.1315, + "step": 15074 + }, + { + "epoch": 0.5, + "grad_norm": 0.7393693327903748, + "learning_rate": 1.0216281847113202e-05, + "loss": 2.0738, + "step": 15075 + }, + { + "epoch": 0.5, + "grad_norm": 0.7757687568664551, + "learning_rate": 1.0215219201584836e-05, + "loss": 2.1222, + "step": 15076 + }, + { + "epoch": 0.5, + "grad_norm": 0.7412993311882019, + "learning_rate": 1.021415655362505e-05, + "loss": 2.113, + "step": 15077 + }, + { + "epoch": 0.5, + "grad_norm": 0.7452883124351501, + "learning_rate": 1.0213093903245848e-05, + "loss": 2.0894, + "step": 15078 + }, + { + "epoch": 0.5, + "grad_norm": 0.7222248315811157, + "learning_rate": 1.0212031250459236e-05, + "loss": 2.1022, + "step": 15079 + }, + { + "epoch": 0.5, + "grad_norm": 0.7397889494895935, + "learning_rate": 1.021096859527722e-05, + "loss": 2.0902, + "step": 15080 + }, + { + "epoch": 0.5, + "grad_norm": 0.7630834579467773, + "learning_rate": 1.0209905937711806e-05, + "loss": 2.0848, + "step": 15081 + }, + { + "epoch": 0.5, + "grad_norm": 0.7141994833946228, + "learning_rate": 1.020884327777499e-05, + "loss": 2.0776, + "step": 15082 + }, + { + "epoch": 0.5, + "grad_norm": 0.7081262469291687, + "learning_rate": 1.0207780615478794e-05, + "loss": 2.1083, + "step": 15083 + }, + { + "epoch": 0.5, + "grad_norm": 0.7765079736709595, + "learning_rate": 1.0206717950835212e-05, + "loss": 2.0279, + "step": 15084 + }, + { + "epoch": 0.5, + "grad_norm": 0.7433781623840332, + "learning_rate": 1.0205655283856251e-05, + "loss": 2.0705, + "step": 15085 + }, + { + "epoch": 0.5, + "grad_norm": 0.7235702276229858, + "learning_rate": 1.0204592614553917e-05, + "loss": 2.0715, + "step": 15086 + }, + { + "epoch": 0.5, + "grad_norm": 0.7098559141159058, + "learning_rate": 1.0203529942940214e-05, + "loss": 2.0702, + "step": 15087 + }, + { + "epoch": 0.5, + "grad_norm": 0.7294248342514038, + "learning_rate": 1.020246726902715e-05, + "loss": 2.0861, + "step": 15088 + }, + { + "epoch": 0.5, + "grad_norm": 0.7264123558998108, + "learning_rate": 1.020140459282673e-05, + "loss": 2.0442, + "step": 15089 + }, + { + "epoch": 0.5, + "grad_norm": 0.7129672169685364, + "learning_rate": 1.020034191435096e-05, + "loss": 2.0229, + "step": 15090 + }, + { + "epoch": 0.5, + "grad_norm": 0.7302761673927307, + "learning_rate": 1.0199279233611843e-05, + "loss": 2.0656, + "step": 15091 + }, + { + "epoch": 0.5, + "grad_norm": 0.7293055653572083, + "learning_rate": 1.0198216550621388e-05, + "loss": 2.0566, + "step": 15092 + }, + { + "epoch": 0.5, + "grad_norm": 0.7157041430473328, + "learning_rate": 1.0197153865391593e-05, + "loss": 2.1004, + "step": 15093 + }, + { + "epoch": 0.5, + "grad_norm": 0.765071451663971, + "learning_rate": 1.0196091177934476e-05, + "loss": 2.1405, + "step": 15094 + }, + { + "epoch": 0.5, + "grad_norm": 0.7433205246925354, + "learning_rate": 1.0195028488262034e-05, + "loss": 2.0846, + "step": 15095 + }, + { + "epoch": 0.5, + "grad_norm": 0.7083059549331665, + "learning_rate": 1.0193965796386271e-05, + "loss": 2.0481, + "step": 15096 + }, + { + "epoch": 0.5, + "grad_norm": 0.7131375670433044, + "learning_rate": 1.0192903102319198e-05, + "loss": 2.0603, + "step": 15097 + }, + { + "epoch": 0.5, + "grad_norm": 0.7157630920410156, + "learning_rate": 1.019184040607282e-05, + "loss": 2.0362, + "step": 15098 + }, + { + "epoch": 0.5, + "grad_norm": 0.7088899612426758, + "learning_rate": 1.019077770765914e-05, + "loss": 2.0728, + "step": 15099 + }, + { + "epoch": 0.5, + "grad_norm": 0.733696460723877, + "learning_rate": 1.0189715007090167e-05, + "loss": 2.0708, + "step": 15100 + }, + { + "epoch": 0.5, + "grad_norm": 0.758601725101471, + "learning_rate": 1.0188652304377901e-05, + "loss": 2.0514, + "step": 15101 + }, + { + "epoch": 0.5, + "grad_norm": 0.7484540939331055, + "learning_rate": 1.0187589599534356e-05, + "loss": 2.0157, + "step": 15102 + }, + { + "epoch": 0.5, + "grad_norm": 0.7373932003974915, + "learning_rate": 1.0186526892571535e-05, + "loss": 2.0475, + "step": 15103 + }, + { + "epoch": 0.5, + "grad_norm": 0.7213883399963379, + "learning_rate": 1.0185464183501437e-05, + "loss": 2.1162, + "step": 15104 + }, + { + "epoch": 0.5, + "grad_norm": 0.7763431072235107, + "learning_rate": 1.0184401472336078e-05, + "loss": 2.0045, + "step": 15105 + }, + { + "epoch": 0.5, + "grad_norm": 0.7215366363525391, + "learning_rate": 1.0183338759087458e-05, + "loss": 2.0769, + "step": 15106 + }, + { + "epoch": 0.5, + "grad_norm": 0.7565811276435852, + "learning_rate": 1.018227604376758e-05, + "loss": 2.1016, + "step": 15107 + }, + { + "epoch": 0.5, + "grad_norm": 0.7278676629066467, + "learning_rate": 1.0181213326388461e-05, + "loss": 2.1173, + "step": 15108 + }, + { + "epoch": 0.5, + "grad_norm": 0.7293820381164551, + "learning_rate": 1.0180150606962097e-05, + "loss": 2.0596, + "step": 15109 + }, + { + "epoch": 0.5, + "grad_norm": 0.7558550834655762, + "learning_rate": 1.0179087885500496e-05, + "loss": 2.1054, + "step": 15110 + }, + { + "epoch": 0.5, + "grad_norm": 0.7702699303627014, + "learning_rate": 1.0178025162015666e-05, + "loss": 2.0019, + "step": 15111 + }, + { + "epoch": 0.5, + "grad_norm": 0.734424889087677, + "learning_rate": 1.0176962436519612e-05, + "loss": 2.0816, + "step": 15112 + }, + { + "epoch": 0.5, + "grad_norm": 0.7536603212356567, + "learning_rate": 1.0175899709024339e-05, + "loss": 2.1009, + "step": 15113 + }, + { + "epoch": 0.5, + "grad_norm": 0.7323282957077026, + "learning_rate": 1.0174836979541858e-05, + "loss": 2.1001, + "step": 15114 + }, + { + "epoch": 0.5, + "grad_norm": 0.7491731643676758, + "learning_rate": 1.0173774248084164e-05, + "loss": 2.101, + "step": 15115 + }, + { + "epoch": 0.5, + "grad_norm": 0.7597584128379822, + "learning_rate": 1.0172711514663279e-05, + "loss": 2.0307, + "step": 15116 + }, + { + "epoch": 0.5, + "grad_norm": 0.7628453969955444, + "learning_rate": 1.0171648779291197e-05, + "loss": 2.1231, + "step": 15117 + }, + { + "epoch": 0.5, + "grad_norm": 0.7514697909355164, + "learning_rate": 1.0170586041979924e-05, + "loss": 2.1547, + "step": 15118 + }, + { + "epoch": 0.5, + "grad_norm": 0.7454550862312317, + "learning_rate": 1.0169523302741476e-05, + "loss": 2.0855, + "step": 15119 + }, + { + "epoch": 0.5, + "grad_norm": 0.7269362211227417, + "learning_rate": 1.0168460561587848e-05, + "loss": 2.046, + "step": 15120 + }, + { + "epoch": 0.5, + "grad_norm": 0.7522753477096558, + "learning_rate": 1.0167397818531053e-05, + "loss": 2.088, + "step": 15121 + }, + { + "epoch": 0.5, + "grad_norm": 0.721161961555481, + "learning_rate": 1.0166335073583096e-05, + "loss": 2.0585, + "step": 15122 + }, + { + "epoch": 0.5, + "grad_norm": 0.7396345138549805, + "learning_rate": 1.016527232675598e-05, + "loss": 2.0434, + "step": 15123 + }, + { + "epoch": 0.5, + "grad_norm": 0.7660729885101318, + "learning_rate": 1.0164209578061719e-05, + "loss": 2.1389, + "step": 15124 + }, + { + "epoch": 0.5, + "grad_norm": 0.7383466362953186, + "learning_rate": 1.0163146827512314e-05, + "loss": 2.0484, + "step": 15125 + }, + { + "epoch": 0.5, + "grad_norm": 0.7397867441177368, + "learning_rate": 1.016208407511977e-05, + "loss": 2.1103, + "step": 15126 + }, + { + "epoch": 0.5, + "grad_norm": 0.7015363574028015, + "learning_rate": 1.0161021320896097e-05, + "loss": 2.0816, + "step": 15127 + }, + { + "epoch": 0.5, + "grad_norm": 0.7294539213180542, + "learning_rate": 1.0159958564853297e-05, + "loss": 2.0564, + "step": 15128 + }, + { + "epoch": 0.5, + "grad_norm": 0.7421606183052063, + "learning_rate": 1.0158895807003375e-05, + "loss": 2.0636, + "step": 15129 + }, + { + "epoch": 0.5, + "grad_norm": 0.7420483827590942, + "learning_rate": 1.0157833047358347e-05, + "loss": 2.1263, + "step": 15130 + }, + { + "epoch": 0.5, + "grad_norm": 0.738120436668396, + "learning_rate": 1.015677028593021e-05, + "loss": 2.0523, + "step": 15131 + }, + { + "epoch": 0.5, + "grad_norm": 0.732755184173584, + "learning_rate": 1.015570752273098e-05, + "loss": 2.0664, + "step": 15132 + }, + { + "epoch": 0.5, + "grad_norm": 0.7340903282165527, + "learning_rate": 1.0154644757772654e-05, + "loss": 2.1405, + "step": 15133 + }, + { + "epoch": 0.5, + "grad_norm": 0.7227667570114136, + "learning_rate": 1.0153581991067243e-05, + "loss": 2.0689, + "step": 15134 + }, + { + "epoch": 0.5, + "grad_norm": 0.7309543490409851, + "learning_rate": 1.015251922262675e-05, + "loss": 2.0788, + "step": 15135 + }, + { + "epoch": 0.5, + "grad_norm": 0.7164747714996338, + "learning_rate": 1.0151456452463192e-05, + "loss": 2.0419, + "step": 15136 + }, + { + "epoch": 0.5, + "grad_norm": 0.773080587387085, + "learning_rate": 1.0150393680588557e-05, + "loss": 2.025, + "step": 15137 + }, + { + "epoch": 0.5, + "grad_norm": 0.758573591709137, + "learning_rate": 1.0149330907014867e-05, + "loss": 2.0936, + "step": 15138 + }, + { + "epoch": 0.5, + "grad_norm": 0.7612324357032776, + "learning_rate": 1.0148268131754125e-05, + "loss": 2.0881, + "step": 15139 + }, + { + "epoch": 0.5, + "grad_norm": 0.7490122318267822, + "learning_rate": 1.0147205354818334e-05, + "loss": 1.9939, + "step": 15140 + }, + { + "epoch": 0.5, + "grad_norm": 0.7673269510269165, + "learning_rate": 1.0146142576219508e-05, + "loss": 2.0337, + "step": 15141 + }, + { + "epoch": 0.5, + "grad_norm": 0.7624891400337219, + "learning_rate": 1.0145079795969644e-05, + "loss": 2.0566, + "step": 15142 + }, + { + "epoch": 0.5, + "grad_norm": 0.718422532081604, + "learning_rate": 1.0144017014080759e-05, + "loss": 2.0209, + "step": 15143 + }, + { + "epoch": 0.5, + "grad_norm": 0.728847086429596, + "learning_rate": 1.014295423056485e-05, + "loss": 2.0575, + "step": 15144 + }, + { + "epoch": 0.5, + "grad_norm": 0.7390760779380798, + "learning_rate": 1.0141891445433926e-05, + "loss": 2.0314, + "step": 15145 + }, + { + "epoch": 0.5, + "grad_norm": 0.7592571973800659, + "learning_rate": 1.0140828658699999e-05, + "loss": 1.996, + "step": 15146 + }, + { + "epoch": 0.5, + "grad_norm": 0.7184703350067139, + "learning_rate": 1.0139765870375071e-05, + "loss": 2.061, + "step": 15147 + }, + { + "epoch": 0.5, + "grad_norm": 0.7111548781394958, + "learning_rate": 1.013870308047115e-05, + "loss": 2.0621, + "step": 15148 + }, + { + "epoch": 0.5, + "grad_norm": 0.7391053438186646, + "learning_rate": 1.0137640289000244e-05, + "loss": 2.0417, + "step": 15149 + }, + { + "epoch": 0.5, + "grad_norm": 0.7320195436477661, + "learning_rate": 1.0136577495974358e-05, + "loss": 2.0852, + "step": 15150 + }, + { + "epoch": 0.5, + "grad_norm": 0.7583122849464417, + "learning_rate": 1.01355147014055e-05, + "loss": 2.0752, + "step": 15151 + }, + { + "epoch": 0.5, + "grad_norm": 0.7353431582450867, + "learning_rate": 1.0134451905305679e-05, + "loss": 2.0692, + "step": 15152 + }, + { + "epoch": 0.5, + "grad_norm": 0.7522952556610107, + "learning_rate": 1.0133389107686894e-05, + "loss": 2.0474, + "step": 15153 + }, + { + "epoch": 0.5, + "grad_norm": 0.7363624572753906, + "learning_rate": 1.013232630856116e-05, + "loss": 2.0796, + "step": 15154 + }, + { + "epoch": 0.5, + "grad_norm": 0.7328789234161377, + "learning_rate": 1.0131263507940479e-05, + "loss": 2.0889, + "step": 15155 + }, + { + "epoch": 0.5, + "grad_norm": 0.7326347231864929, + "learning_rate": 1.0130200705836861e-05, + "loss": 2.1141, + "step": 15156 + }, + { + "epoch": 0.5, + "grad_norm": 0.7435540556907654, + "learning_rate": 1.0129137902262311e-05, + "loss": 2.0389, + "step": 15157 + }, + { + "epoch": 0.5, + "grad_norm": 0.719172477722168, + "learning_rate": 1.0128075097228837e-05, + "loss": 2.1025, + "step": 15158 + }, + { + "epoch": 0.5, + "grad_norm": 0.7488683462142944, + "learning_rate": 1.0127012290748446e-05, + "loss": 2.1098, + "step": 15159 + }, + { + "epoch": 0.5, + "grad_norm": 0.7347745299339294, + "learning_rate": 1.0125949482833144e-05, + "loss": 2.0382, + "step": 15160 + }, + { + "epoch": 0.5, + "grad_norm": 0.718937337398529, + "learning_rate": 1.0124886673494938e-05, + "loss": 2.0751, + "step": 15161 + }, + { + "epoch": 0.5, + "grad_norm": 0.7688394784927368, + "learning_rate": 1.0123823862745836e-05, + "loss": 2.0577, + "step": 15162 + }, + { + "epoch": 0.5, + "grad_norm": 0.7382035851478577, + "learning_rate": 1.012276105059785e-05, + "loss": 2.1334, + "step": 15163 + }, + { + "epoch": 0.5, + "grad_norm": 0.7622292041778564, + "learning_rate": 1.0121698237062973e-05, + "loss": 2.0474, + "step": 15164 + }, + { + "epoch": 0.5, + "grad_norm": 0.7671157717704773, + "learning_rate": 1.0120635422153227e-05, + "loss": 2.1028, + "step": 15165 + }, + { + "epoch": 0.5, + "grad_norm": 0.7590948343276978, + "learning_rate": 1.0119572605880608e-05, + "loss": 2.0489, + "step": 15166 + }, + { + "epoch": 0.5, + "grad_norm": 0.7478145360946655, + "learning_rate": 1.0118509788257129e-05, + "loss": 2.0816, + "step": 15167 + }, + { + "epoch": 0.5, + "grad_norm": 0.7441771626472473, + "learning_rate": 1.0117446969294797e-05, + "loss": 2.1215, + "step": 15168 + }, + { + "epoch": 0.5, + "grad_norm": 0.7434021234512329, + "learning_rate": 1.0116384149005618e-05, + "loss": 2.0326, + "step": 15169 + }, + { + "epoch": 0.5, + "grad_norm": 0.7349259853363037, + "learning_rate": 1.0115321327401599e-05, + "loss": 2.0524, + "step": 15170 + }, + { + "epoch": 0.5, + "grad_norm": 0.7186653017997742, + "learning_rate": 1.0114258504494747e-05, + "loss": 2.0354, + "step": 15171 + }, + { + "epoch": 0.5, + "grad_norm": 0.7231743931770325, + "learning_rate": 1.0113195680297068e-05, + "loss": 2.1182, + "step": 15172 + }, + { + "epoch": 0.5, + "grad_norm": 0.7351239919662476, + "learning_rate": 1.0112132854820573e-05, + "loss": 2.1352, + "step": 15173 + }, + { + "epoch": 0.5, + "grad_norm": 0.7919723391532898, + "learning_rate": 1.0111070028077267e-05, + "loss": 2.0923, + "step": 15174 + }, + { + "epoch": 0.5, + "grad_norm": 0.7534822225570679, + "learning_rate": 1.0110007200079152e-05, + "loss": 2.0735, + "step": 15175 + }, + { + "epoch": 0.5, + "grad_norm": 0.7378916144371033, + "learning_rate": 1.0108944370838247e-05, + "loss": 2.0864, + "step": 15176 + }, + { + "epoch": 0.5, + "grad_norm": 0.7237681150436401, + "learning_rate": 1.0107881540366549e-05, + "loss": 2.0669, + "step": 15177 + }, + { + "epoch": 0.5, + "grad_norm": 0.7312081456184387, + "learning_rate": 1.0106818708676067e-05, + "loss": 1.9855, + "step": 15178 + }, + { + "epoch": 0.51, + "grad_norm": 0.7438679933547974, + "learning_rate": 1.0105755875778814e-05, + "loss": 2.0171, + "step": 15179 + }, + { + "epoch": 0.51, + "grad_norm": 0.7269994616508484, + "learning_rate": 1.0104693041686788e-05, + "loss": 2.0821, + "step": 15180 + }, + { + "epoch": 0.51, + "grad_norm": 0.7123048901557922, + "learning_rate": 1.0103630206412005e-05, + "loss": 2.1417, + "step": 15181 + }, + { + "epoch": 0.51, + "grad_norm": 0.7697892189025879, + "learning_rate": 1.0102567369966466e-05, + "loss": 1.9883, + "step": 15182 + }, + { + "epoch": 0.51, + "grad_norm": 0.7447414398193359, + "learning_rate": 1.0101504532362183e-05, + "loss": 2.0633, + "step": 15183 + }, + { + "epoch": 0.51, + "grad_norm": 0.7448007464408875, + "learning_rate": 1.010044169361116e-05, + "loss": 2.0827, + "step": 15184 + }, + { + "epoch": 0.51, + "grad_norm": 0.7365018725395203, + "learning_rate": 1.009937885372541e-05, + "loss": 2.0613, + "step": 15185 + }, + { + "epoch": 0.51, + "grad_norm": 0.7608380317687988, + "learning_rate": 1.0098316012716929e-05, + "loss": 2.0395, + "step": 15186 + }, + { + "epoch": 0.51, + "grad_norm": 0.7518879771232605, + "learning_rate": 1.0097253170597737e-05, + "loss": 1.9853, + "step": 15187 + }, + { + "epoch": 0.51, + "grad_norm": 0.7416049242019653, + "learning_rate": 1.0096190327379833e-05, + "loss": 2.1062, + "step": 15188 + }, + { + "epoch": 0.51, + "grad_norm": 0.721646249294281, + "learning_rate": 1.0095127483075226e-05, + "loss": 2.1055, + "step": 15189 + }, + { + "epoch": 0.51, + "grad_norm": 0.7360051870346069, + "learning_rate": 1.0094064637695926e-05, + "loss": 2.1329, + "step": 15190 + }, + { + "epoch": 0.51, + "grad_norm": 0.7046104669570923, + "learning_rate": 1.0093001791253938e-05, + "loss": 2.057, + "step": 15191 + }, + { + "epoch": 0.51, + "grad_norm": 0.7525282502174377, + "learning_rate": 1.0091938943761272e-05, + "loss": 2.0985, + "step": 15192 + }, + { + "epoch": 0.51, + "grad_norm": 0.7234799861907959, + "learning_rate": 1.0090876095229932e-05, + "loss": 2.0857, + "step": 15193 + }, + { + "epoch": 0.51, + "grad_norm": 0.7559448480606079, + "learning_rate": 1.0089813245671928e-05, + "loss": 2.0604, + "step": 15194 + }, + { + "epoch": 0.51, + "grad_norm": 0.7281912565231323, + "learning_rate": 1.0088750395099268e-05, + "loss": 2.0901, + "step": 15195 + }, + { + "epoch": 0.51, + "grad_norm": 0.7190815806388855, + "learning_rate": 1.0087687543523957e-05, + "loss": 2.0004, + "step": 15196 + }, + { + "epoch": 0.51, + "grad_norm": 0.7349309921264648, + "learning_rate": 1.0086624690958e-05, + "loss": 2.0891, + "step": 15197 + }, + { + "epoch": 0.51, + "grad_norm": 0.7298219799995422, + "learning_rate": 1.0085561837413413e-05, + "loss": 2.0898, + "step": 15198 + }, + { + "epoch": 0.51, + "grad_norm": 0.7425423264503479, + "learning_rate": 1.0084498982902195e-05, + "loss": 2.0662, + "step": 15199 + }, + { + "epoch": 0.51, + "grad_norm": 0.7731768488883972, + "learning_rate": 1.0083436127436359e-05, + "loss": 2.0929, + "step": 15200 + }, + { + "epoch": 0.51, + "grad_norm": 0.7267821431159973, + "learning_rate": 1.008237327102791e-05, + "loss": 2.1465, + "step": 15201 + }, + { + "epoch": 0.51, + "grad_norm": 0.7411586046218872, + "learning_rate": 1.0081310413688855e-05, + "loss": 1.9804, + "step": 15202 + }, + { + "epoch": 0.51, + "grad_norm": 0.7393783926963806, + "learning_rate": 1.0080247555431204e-05, + "loss": 2.075, + "step": 15203 + }, + { + "epoch": 0.51, + "grad_norm": 0.7609144449234009, + "learning_rate": 1.0079184696266964e-05, + "loss": 2.0835, + "step": 15204 + }, + { + "epoch": 0.51, + "grad_norm": 0.7264783382415771, + "learning_rate": 1.0078121836208139e-05, + "loss": 2.0801, + "step": 15205 + }, + { + "epoch": 0.51, + "grad_norm": 0.7539568543434143, + "learning_rate": 1.007705897526674e-05, + "loss": 2.0123, + "step": 15206 + }, + { + "epoch": 0.51, + "grad_norm": 0.7428175210952759, + "learning_rate": 1.0075996113454778e-05, + "loss": 2.0255, + "step": 15207 + }, + { + "epoch": 0.51, + "grad_norm": 0.7531280517578125, + "learning_rate": 1.0074933250784251e-05, + "loss": 2.1658, + "step": 15208 + }, + { + "epoch": 0.51, + "grad_norm": 0.7306881546974182, + "learning_rate": 1.0073870387267175e-05, + "loss": 2.1391, + "step": 15209 + }, + { + "epoch": 0.51, + "grad_norm": 0.7184191942214966, + "learning_rate": 1.0072807522915555e-05, + "loss": 2.0463, + "step": 15210 + }, + { + "epoch": 0.51, + "grad_norm": 0.7459871172904968, + "learning_rate": 1.0071744657741393e-05, + "loss": 2.0882, + "step": 15211 + }, + { + "epoch": 0.51, + "grad_norm": 0.7540646195411682, + "learning_rate": 1.0070681791756708e-05, + "loss": 2.1714, + "step": 15212 + }, + { + "epoch": 0.51, + "grad_norm": 0.7392327189445496, + "learning_rate": 1.00696189249735e-05, + "loss": 2.035, + "step": 15213 + }, + { + "epoch": 0.51, + "grad_norm": 0.7537505626678467, + "learning_rate": 1.0068556057403777e-05, + "loss": 2.0894, + "step": 15214 + }, + { + "epoch": 0.51, + "grad_norm": 0.7503833174705505, + "learning_rate": 1.006749318905955e-05, + "loss": 2.05, + "step": 15215 + }, + { + "epoch": 0.51, + "grad_norm": 0.7317917346954346, + "learning_rate": 1.0066430319952823e-05, + "loss": 2.0767, + "step": 15216 + }, + { + "epoch": 0.51, + "grad_norm": 0.7461367845535278, + "learning_rate": 1.0065367450095605e-05, + "loss": 2.0301, + "step": 15217 + }, + { + "epoch": 0.51, + "grad_norm": 0.740315318107605, + "learning_rate": 1.0064304579499905e-05, + "loss": 2.0266, + "step": 15218 + }, + { + "epoch": 0.51, + "grad_norm": 0.742792010307312, + "learning_rate": 1.0063241708177726e-05, + "loss": 2.115, + "step": 15219 + }, + { + "epoch": 0.51, + "grad_norm": 0.7185328006744385, + "learning_rate": 1.0062178836141083e-05, + "loss": 2.1014, + "step": 15220 + }, + { + "epoch": 0.51, + "grad_norm": 0.7664076685905457, + "learning_rate": 1.006111596340198e-05, + "loss": 2.0612, + "step": 15221 + }, + { + "epoch": 0.51, + "grad_norm": 0.7740065455436707, + "learning_rate": 1.0060053089972421e-05, + "loss": 2.0792, + "step": 15222 + }, + { + "epoch": 0.51, + "grad_norm": 0.732048511505127, + "learning_rate": 1.0058990215864421e-05, + "loss": 2.1611, + "step": 15223 + }, + { + "epoch": 0.51, + "grad_norm": 0.72318434715271, + "learning_rate": 1.0057927341089984e-05, + "loss": 2.0056, + "step": 15224 + }, + { + "epoch": 0.51, + "grad_norm": 0.754892885684967, + "learning_rate": 1.0056864465661116e-05, + "loss": 2.0849, + "step": 15225 + }, + { + "epoch": 0.51, + "grad_norm": 0.7374746203422546, + "learning_rate": 1.0055801589589826e-05, + "loss": 2.0613, + "step": 15226 + }, + { + "epoch": 0.51, + "grad_norm": 0.7219117283821106, + "learning_rate": 1.0054738712888125e-05, + "loss": 2.0746, + "step": 15227 + }, + { + "epoch": 0.51, + "grad_norm": 0.7901682257652283, + "learning_rate": 1.0053675835568017e-05, + "loss": 2.098, + "step": 15228 + }, + { + "epoch": 0.51, + "grad_norm": 0.7302582859992981, + "learning_rate": 1.0052612957641512e-05, + "loss": 2.0974, + "step": 15229 + }, + { + "epoch": 0.51, + "grad_norm": 0.7356825470924377, + "learning_rate": 1.0051550079120613e-05, + "loss": 2.0163, + "step": 15230 + }, + { + "epoch": 0.51, + "grad_norm": 0.7401964664459229, + "learning_rate": 1.0050487200017336e-05, + "loss": 2.0496, + "step": 15231 + }, + { + "epoch": 0.51, + "grad_norm": 0.7328833937644958, + "learning_rate": 1.004942432034368e-05, + "loss": 2.0699, + "step": 15232 + }, + { + "epoch": 0.51, + "grad_norm": 0.739920973777771, + "learning_rate": 1.0048361440111659e-05, + "loss": 2.1049, + "step": 15233 + }, + { + "epoch": 0.51, + "grad_norm": 0.7351012825965881, + "learning_rate": 1.0047298559333281e-05, + "loss": 2.0481, + "step": 15234 + }, + { + "epoch": 0.51, + "grad_norm": 0.7253711819648743, + "learning_rate": 1.0046235678020546e-05, + "loss": 2.0576, + "step": 15235 + }, + { + "epoch": 0.51, + "grad_norm": 0.7943901419639587, + "learning_rate": 1.0045172796185473e-05, + "loss": 2.0545, + "step": 15236 + }, + { + "epoch": 0.51, + "grad_norm": 0.7487738132476807, + "learning_rate": 1.0044109913840061e-05, + "loss": 2.0827, + "step": 15237 + }, + { + "epoch": 0.51, + "grad_norm": 0.7283720970153809, + "learning_rate": 1.0043047030996322e-05, + "loss": 2.0863, + "step": 15238 + }, + { + "epoch": 0.51, + "grad_norm": 0.7430052161216736, + "learning_rate": 1.0041984147666263e-05, + "loss": 2.0745, + "step": 15239 + }, + { + "epoch": 0.51, + "grad_norm": 0.7278792262077332, + "learning_rate": 1.0040921263861891e-05, + "loss": 2.1126, + "step": 15240 + }, + { + "epoch": 0.51, + "grad_norm": 0.7549926042556763, + "learning_rate": 1.0039858379595215e-05, + "loss": 2.0238, + "step": 15241 + }, + { + "epoch": 0.51, + "grad_norm": 0.7231711745262146, + "learning_rate": 1.0038795494878246e-05, + "loss": 2.0809, + "step": 15242 + }, + { + "epoch": 0.51, + "grad_norm": 0.7544825077056885, + "learning_rate": 1.003773260972298e-05, + "loss": 2.1058, + "step": 15243 + }, + { + "epoch": 0.51, + "grad_norm": 0.7193359136581421, + "learning_rate": 1.0036669724141438e-05, + "loss": 2.0645, + "step": 15244 + }, + { + "epoch": 0.51, + "grad_norm": 0.724247395992279, + "learning_rate": 1.0035606838145626e-05, + "loss": 2.0784, + "step": 15245 + }, + { + "epoch": 0.51, + "grad_norm": 0.7380356788635254, + "learning_rate": 1.0034543951747544e-05, + "loss": 2.0535, + "step": 15246 + }, + { + "epoch": 0.51, + "grad_norm": 0.7263193130493164, + "learning_rate": 1.0033481064959207e-05, + "loss": 2.0935, + "step": 15247 + }, + { + "epoch": 0.51, + "grad_norm": 0.8255354166030884, + "learning_rate": 1.003241817779262e-05, + "loss": 2.1038, + "step": 15248 + }, + { + "epoch": 0.51, + "grad_norm": 0.7594192624092102, + "learning_rate": 1.0031355290259792e-05, + "loss": 2.0508, + "step": 15249 + }, + { + "epoch": 0.51, + "grad_norm": 0.7299487590789795, + "learning_rate": 1.003029240237273e-05, + "loss": 2.1225, + "step": 15250 + }, + { + "epoch": 0.51, + "grad_norm": 0.7273380160331726, + "learning_rate": 1.0029229514143442e-05, + "loss": 2.072, + "step": 15251 + }, + { + "epoch": 0.51, + "grad_norm": 0.7191823124885559, + "learning_rate": 1.0028166625583936e-05, + "loss": 2.0998, + "step": 15252 + }, + { + "epoch": 0.51, + "grad_norm": 0.7502151727676392, + "learning_rate": 1.0027103736706219e-05, + "loss": 2.0606, + "step": 15253 + }, + { + "epoch": 0.51, + "grad_norm": 0.7763063311576843, + "learning_rate": 1.00260408475223e-05, + "loss": 2.0743, + "step": 15254 + }, + { + "epoch": 0.51, + "grad_norm": 0.7186775207519531, + "learning_rate": 1.0024977958044186e-05, + "loss": 2.088, + "step": 15255 + }, + { + "epoch": 0.51, + "grad_norm": 0.7211713194847107, + "learning_rate": 1.002391506828389e-05, + "loss": 2.0633, + "step": 15256 + }, + { + "epoch": 0.51, + "grad_norm": 0.7242795825004578, + "learning_rate": 1.002285217825341e-05, + "loss": 2.0531, + "step": 15257 + }, + { + "epoch": 0.51, + "grad_norm": 0.743888795375824, + "learning_rate": 1.0021789287964766e-05, + "loss": 2.1296, + "step": 15258 + }, + { + "epoch": 0.51, + "grad_norm": 0.7710480093955994, + "learning_rate": 1.0020726397429954e-05, + "loss": 2.103, + "step": 15259 + }, + { + "epoch": 0.51, + "grad_norm": 0.7453544735908508, + "learning_rate": 1.0019663506660988e-05, + "loss": 2.1141, + "step": 15260 + }, + { + "epoch": 0.51, + "grad_norm": 0.7427123785018921, + "learning_rate": 1.0018600615669878e-05, + "loss": 2.0837, + "step": 15261 + }, + { + "epoch": 0.51, + "grad_norm": 0.7413340210914612, + "learning_rate": 1.0017537724468626e-05, + "loss": 2.0458, + "step": 15262 + }, + { + "epoch": 0.51, + "grad_norm": 0.7319753766059875, + "learning_rate": 1.0016474833069245e-05, + "loss": 2.0056, + "step": 15263 + }, + { + "epoch": 0.51, + "grad_norm": 0.7666097283363342, + "learning_rate": 1.0015411941483739e-05, + "loss": 2.0842, + "step": 15264 + }, + { + "epoch": 0.51, + "grad_norm": 0.7362604737281799, + "learning_rate": 1.001434904972412e-05, + "loss": 2.0875, + "step": 15265 + }, + { + "epoch": 0.51, + "grad_norm": 0.7272161841392517, + "learning_rate": 1.0013286157802393e-05, + "loss": 2.0756, + "step": 15266 + }, + { + "epoch": 0.51, + "grad_norm": 0.7550996541976929, + "learning_rate": 1.0012223265730568e-05, + "loss": 2.0467, + "step": 15267 + }, + { + "epoch": 0.51, + "grad_norm": 0.7245338559150696, + "learning_rate": 1.0011160373520648e-05, + "loss": 2.0044, + "step": 15268 + }, + { + "epoch": 0.51, + "grad_norm": 0.7105261087417603, + "learning_rate": 1.0010097481184648e-05, + "loss": 2.0823, + "step": 15269 + }, + { + "epoch": 0.51, + "grad_norm": 0.7497519254684448, + "learning_rate": 1.0009034588734575e-05, + "loss": 2.0664, + "step": 15270 + }, + { + "epoch": 0.51, + "grad_norm": 0.724078357219696, + "learning_rate": 1.0007971696182431e-05, + "loss": 2.0628, + "step": 15271 + }, + { + "epoch": 0.51, + "grad_norm": 0.7513481378555298, + "learning_rate": 1.0006908803540225e-05, + "loss": 2.0779, + "step": 15272 + }, + { + "epoch": 0.51, + "grad_norm": 0.7692030072212219, + "learning_rate": 1.0005845910819971e-05, + "loss": 2.0697, + "step": 15273 + }, + { + "epoch": 0.51, + "grad_norm": 0.7543445825576782, + "learning_rate": 1.0004783018033673e-05, + "loss": 2.1127, + "step": 15274 + }, + { + "epoch": 0.51, + "grad_norm": 0.7230091094970703, + "learning_rate": 1.0003720125193337e-05, + "loss": 2.1016, + "step": 15275 + }, + { + "epoch": 0.51, + "grad_norm": 0.6973388195037842, + "learning_rate": 1.0002657232310975e-05, + "loss": 2.0636, + "step": 15276 + }, + { + "epoch": 0.51, + "grad_norm": 0.7234853506088257, + "learning_rate": 1.0001594339398593e-05, + "loss": 2.058, + "step": 15277 + }, + { + "epoch": 0.51, + "grad_norm": 0.7300195097923279, + "learning_rate": 1.0000531446468202e-05, + "loss": 1.999, + "step": 15278 + }, + { + "epoch": 0.51, + "grad_norm": 0.7298647165298462, + "learning_rate": 9.999468553531801e-06, + "loss": 2.0329, + "step": 15279 + }, + { + "epoch": 0.51, + "grad_norm": 0.7457165718078613, + "learning_rate": 9.998405660601407e-06, + "loss": 2.0123, + "step": 15280 + }, + { + "epoch": 0.51, + "grad_norm": 0.7942795157432556, + "learning_rate": 9.997342767689028e-06, + "loss": 2.0454, + "step": 15281 + }, + { + "epoch": 0.51, + "grad_norm": 0.7216944694519043, + "learning_rate": 9.996279874806665e-06, + "loss": 2.0531, + "step": 15282 + }, + { + "epoch": 0.51, + "grad_norm": 0.7550262808799744, + "learning_rate": 9.99521698196633e-06, + "loss": 2.1393, + "step": 15283 + }, + { + "epoch": 0.51, + "grad_norm": 0.732352614402771, + "learning_rate": 9.99415408918003e-06, + "loss": 2.0892, + "step": 15284 + }, + { + "epoch": 0.51, + "grad_norm": 0.7159614562988281, + "learning_rate": 9.993091196459774e-06, + "loss": 2.0502, + "step": 15285 + }, + { + "epoch": 0.51, + "grad_norm": 0.7350059747695923, + "learning_rate": 9.992028303817576e-06, + "loss": 2.0457, + "step": 15286 + }, + { + "epoch": 0.51, + "grad_norm": 0.7518167495727539, + "learning_rate": 9.99096541126543e-06, + "loss": 2.0672, + "step": 15287 + }, + { + "epoch": 0.51, + "grad_norm": 0.7341758012771606, + "learning_rate": 9.989902518815354e-06, + "loss": 2.0402, + "step": 15288 + }, + { + "epoch": 0.51, + "grad_norm": 0.7129610180854797, + "learning_rate": 9.988839626479352e-06, + "loss": 2.0887, + "step": 15289 + }, + { + "epoch": 0.51, + "grad_norm": 0.7049269676208496, + "learning_rate": 9.987776734269437e-06, + "loss": 1.9986, + "step": 15290 + }, + { + "epoch": 0.51, + "grad_norm": 0.7403312921524048, + "learning_rate": 9.98671384219761e-06, + "loss": 2.0856, + "step": 15291 + }, + { + "epoch": 0.51, + "grad_norm": 0.7347792387008667, + "learning_rate": 9.985650950275884e-06, + "loss": 2.0887, + "step": 15292 + }, + { + "epoch": 0.51, + "grad_norm": 0.7298508882522583, + "learning_rate": 9.984588058516261e-06, + "loss": 2.0803, + "step": 15293 + }, + { + "epoch": 0.51, + "grad_norm": 0.7205401659011841, + "learning_rate": 9.983525166930762e-06, + "loss": 2.0711, + "step": 15294 + }, + { + "epoch": 0.51, + "grad_norm": 0.7332109808921814, + "learning_rate": 9.982462275531377e-06, + "loss": 2.013, + "step": 15295 + }, + { + "epoch": 0.51, + "grad_norm": 0.7301912307739258, + "learning_rate": 9.981399384330125e-06, + "loss": 2.0295, + "step": 15296 + }, + { + "epoch": 0.51, + "grad_norm": 0.7130862474441528, + "learning_rate": 9.980336493339014e-06, + "loss": 2.0465, + "step": 15297 + }, + { + "epoch": 0.51, + "grad_norm": 0.7575035095214844, + "learning_rate": 9.979273602570049e-06, + "loss": 2.1012, + "step": 15298 + }, + { + "epoch": 0.51, + "grad_norm": 0.7416663765907288, + "learning_rate": 9.97821071203524e-06, + "loss": 1.9937, + "step": 15299 + }, + { + "epoch": 0.51, + "grad_norm": 0.7405804395675659, + "learning_rate": 9.977147821746593e-06, + "loss": 2.1182, + "step": 15300 + }, + { + "epoch": 0.51, + "grad_norm": 0.7277283072471619, + "learning_rate": 9.976084931716112e-06, + "loss": 2.1212, + "step": 15301 + }, + { + "epoch": 0.51, + "grad_norm": 0.7182308435440063, + "learning_rate": 9.975022041955812e-06, + "loss": 2.0121, + "step": 15302 + }, + { + "epoch": 0.51, + "grad_norm": 0.719869077205658, + "learning_rate": 9.973959152477703e-06, + "loss": 2.1158, + "step": 15303 + }, + { + "epoch": 0.51, + "grad_norm": 0.7614814043045044, + "learning_rate": 9.972896263293784e-06, + "loss": 2.0855, + "step": 15304 + }, + { + "epoch": 0.51, + "grad_norm": 0.7154949903488159, + "learning_rate": 9.971833374416068e-06, + "loss": 2.0485, + "step": 15305 + }, + { + "epoch": 0.51, + "grad_norm": 0.7173162698745728, + "learning_rate": 9.970770485856563e-06, + "loss": 2.0618, + "step": 15306 + }, + { + "epoch": 0.51, + "grad_norm": 0.7708456516265869, + "learning_rate": 9.969707597627272e-06, + "loss": 2.1038, + "step": 15307 + }, + { + "epoch": 0.51, + "grad_norm": 0.7407368421554565, + "learning_rate": 9.968644709740213e-06, + "loss": 2.0913, + "step": 15308 + }, + { + "epoch": 0.51, + "grad_norm": 0.7399535179138184, + "learning_rate": 9.967581822207381e-06, + "loss": 2.0952, + "step": 15309 + }, + { + "epoch": 0.51, + "grad_norm": 0.7314403653144836, + "learning_rate": 9.966518935040795e-06, + "loss": 2.1047, + "step": 15310 + }, + { + "epoch": 0.51, + "grad_norm": 0.7201424241065979, + "learning_rate": 9.965456048252456e-06, + "loss": 2.0688, + "step": 15311 + }, + { + "epoch": 0.51, + "grad_norm": 0.7334617972373962, + "learning_rate": 9.96439316185438e-06, + "loss": 2.0652, + "step": 15312 + }, + { + "epoch": 0.51, + "grad_norm": 0.7077397704124451, + "learning_rate": 9.963330275858563e-06, + "loss": 2.1227, + "step": 15313 + }, + { + "epoch": 0.51, + "grad_norm": 0.744537353515625, + "learning_rate": 9.962267390277021e-06, + "loss": 2.1078, + "step": 15314 + }, + { + "epoch": 0.51, + "grad_norm": 0.7356935739517212, + "learning_rate": 9.961204505121757e-06, + "loss": 2.0835, + "step": 15315 + }, + { + "epoch": 0.51, + "grad_norm": 0.7220970392227173, + "learning_rate": 9.960141620404785e-06, + "loss": 2.1011, + "step": 15316 + }, + { + "epoch": 0.51, + "grad_norm": 0.7231786847114563, + "learning_rate": 9.95907873613811e-06, + "loss": 2.0847, + "step": 15317 + }, + { + "epoch": 0.51, + "grad_norm": 0.7488526701927185, + "learning_rate": 9.958015852333738e-06, + "loss": 2.077, + "step": 15318 + }, + { + "epoch": 0.51, + "grad_norm": 0.715393602848053, + "learning_rate": 9.956952969003681e-06, + "loss": 1.974, + "step": 15319 + }, + { + "epoch": 0.51, + "grad_norm": 0.768552839756012, + "learning_rate": 9.955890086159939e-06, + "loss": 2.0354, + "step": 15320 + }, + { + "epoch": 0.51, + "grad_norm": 0.7585130929946899, + "learning_rate": 9.954827203814532e-06, + "loss": 2.0856, + "step": 15321 + }, + { + "epoch": 0.51, + "grad_norm": 0.7729237079620361, + "learning_rate": 9.953764321979457e-06, + "loss": 2.1393, + "step": 15322 + }, + { + "epoch": 0.51, + "grad_norm": 0.7574151754379272, + "learning_rate": 9.952701440666722e-06, + "loss": 2.0875, + "step": 15323 + }, + { + "epoch": 0.51, + "grad_norm": 0.7221570014953613, + "learning_rate": 9.951638559888341e-06, + "loss": 2.1037, + "step": 15324 + }, + { + "epoch": 0.51, + "grad_norm": 0.7477939128875732, + "learning_rate": 9.950575679656322e-06, + "loss": 2.0826, + "step": 15325 + }, + { + "epoch": 0.51, + "grad_norm": 0.7464163899421692, + "learning_rate": 9.949512799982669e-06, + "loss": 2.1224, + "step": 15326 + }, + { + "epoch": 0.51, + "grad_norm": 0.7142251133918762, + "learning_rate": 9.948449920879389e-06, + "loss": 2.0919, + "step": 15327 + }, + { + "epoch": 0.51, + "grad_norm": 0.7431299686431885, + "learning_rate": 9.94738704235849e-06, + "loss": 2.0833, + "step": 15328 + }, + { + "epoch": 0.51, + "grad_norm": 0.7188173532485962, + "learning_rate": 9.946324164431984e-06, + "loss": 1.9877, + "step": 15329 + }, + { + "epoch": 0.51, + "grad_norm": 0.744729220867157, + "learning_rate": 9.94526128711188e-06, + "loss": 2.0454, + "step": 15330 + }, + { + "epoch": 0.51, + "grad_norm": 0.7507574558258057, + "learning_rate": 9.944198410410175e-06, + "loss": 2.0925, + "step": 15331 + }, + { + "epoch": 0.51, + "grad_norm": 0.7460050582885742, + "learning_rate": 9.943135534338887e-06, + "loss": 2.0664, + "step": 15332 + }, + { + "epoch": 0.51, + "grad_norm": 0.7492886781692505, + "learning_rate": 9.942072658910019e-06, + "loss": 2.1096, + "step": 15333 + }, + { + "epoch": 0.51, + "grad_norm": 0.7262297868728638, + "learning_rate": 9.941009784135584e-06, + "loss": 2.1055, + "step": 15334 + }, + { + "epoch": 0.51, + "grad_norm": 0.7422386407852173, + "learning_rate": 9.93994691002758e-06, + "loss": 2.1267, + "step": 15335 + }, + { + "epoch": 0.51, + "grad_norm": 0.7256451845169067, + "learning_rate": 9.938884036598024e-06, + "loss": 2.1021, + "step": 15336 + }, + { + "epoch": 0.51, + "grad_norm": 0.7455830574035645, + "learning_rate": 9.937821163858919e-06, + "loss": 2.1173, + "step": 15337 + }, + { + "epoch": 0.51, + "grad_norm": 0.7465831637382507, + "learning_rate": 9.936758291822274e-06, + "loss": 2.0572, + "step": 15338 + }, + { + "epoch": 0.51, + "grad_norm": 0.7223896384239197, + "learning_rate": 9.9356954205001e-06, + "loss": 2.0221, + "step": 15339 + }, + { + "epoch": 0.51, + "grad_norm": 0.74922776222229, + "learning_rate": 9.9346325499044e-06, + "loss": 2.1266, + "step": 15340 + }, + { + "epoch": 0.51, + "grad_norm": 0.7232711911201477, + "learning_rate": 9.93356968004718e-06, + "loss": 2.0403, + "step": 15341 + }, + { + "epoch": 0.51, + "grad_norm": 0.7061301469802856, + "learning_rate": 9.932506810940451e-06, + "loss": 2.0142, + "step": 15342 + }, + { + "epoch": 0.51, + "grad_norm": 0.7734557390213013, + "learning_rate": 9.931443942596228e-06, + "loss": 2.1026, + "step": 15343 + }, + { + "epoch": 0.51, + "grad_norm": 0.7426832318305969, + "learning_rate": 9.930381075026503e-06, + "loss": 1.9838, + "step": 15344 + }, + { + "epoch": 0.51, + "grad_norm": 0.7451736927032471, + "learning_rate": 9.929318208243293e-06, + "loss": 2.0332, + "step": 15345 + }, + { + "epoch": 0.51, + "grad_norm": 0.7048302888870239, + "learning_rate": 9.928255342258607e-06, + "loss": 2.0318, + "step": 15346 + }, + { + "epoch": 0.51, + "grad_norm": 0.7207559943199158, + "learning_rate": 9.927192477084448e-06, + "loss": 2.1094, + "step": 15347 + }, + { + "epoch": 0.51, + "grad_norm": 0.7534931898117065, + "learning_rate": 9.92612961273283e-06, + "loss": 2.0471, + "step": 15348 + }, + { + "epoch": 0.51, + "grad_norm": 0.723344624042511, + "learning_rate": 9.925066749215752e-06, + "loss": 2.0498, + "step": 15349 + }, + { + "epoch": 0.51, + "grad_norm": 0.7234449982643127, + "learning_rate": 9.924003886545225e-06, + "loss": 2.0775, + "step": 15350 + }, + { + "epoch": 0.51, + "grad_norm": 0.7601335644721985, + "learning_rate": 9.922941024733259e-06, + "loss": 2.0446, + "step": 15351 + }, + { + "epoch": 0.51, + "grad_norm": 0.7248831391334534, + "learning_rate": 9.921878163791864e-06, + "loss": 2.0859, + "step": 15352 + }, + { + "epoch": 0.51, + "grad_norm": 0.7175817489624023, + "learning_rate": 9.92081530373304e-06, + "loss": 2.0436, + "step": 15353 + }, + { + "epoch": 0.51, + "grad_norm": 0.7386490106582642, + "learning_rate": 9.919752444568798e-06, + "loss": 2.0243, + "step": 15354 + }, + { + "epoch": 0.51, + "grad_norm": 0.7008812427520752, + "learning_rate": 9.918689586311146e-06, + "loss": 2.0074, + "step": 15355 + }, + { + "epoch": 0.51, + "grad_norm": 0.740475594997406, + "learning_rate": 9.917626728972095e-06, + "loss": 2.068, + "step": 15356 + }, + { + "epoch": 0.51, + "grad_norm": 0.7467914819717407, + "learning_rate": 9.916563872563647e-06, + "loss": 2.1123, + "step": 15357 + }, + { + "epoch": 0.51, + "grad_norm": 0.719474196434021, + "learning_rate": 9.915501017097807e-06, + "loss": 2.0098, + "step": 15358 + }, + { + "epoch": 0.51, + "grad_norm": 0.7158430814743042, + "learning_rate": 9.91443816258659e-06, + "loss": 2.0239, + "step": 15359 + }, + { + "epoch": 0.51, + "grad_norm": 0.7373977899551392, + "learning_rate": 9.913375309042001e-06, + "loss": 2.0632, + "step": 15360 + }, + { + "epoch": 0.51, + "grad_norm": 0.752083957195282, + "learning_rate": 9.912312456476048e-06, + "loss": 2.1362, + "step": 15361 + }, + { + "epoch": 0.51, + "grad_norm": 0.7436091303825378, + "learning_rate": 9.911249604900737e-06, + "loss": 2.0635, + "step": 15362 + }, + { + "epoch": 0.51, + "grad_norm": 0.7555418014526367, + "learning_rate": 9.910186754328075e-06, + "loss": 2.0264, + "step": 15363 + }, + { + "epoch": 0.51, + "grad_norm": 0.7400103211402893, + "learning_rate": 9.909123904770068e-06, + "loss": 2.0187, + "step": 15364 + }, + { + "epoch": 0.51, + "grad_norm": 0.712190568447113, + "learning_rate": 9.908061056238733e-06, + "loss": 2.0523, + "step": 15365 + }, + { + "epoch": 0.51, + "grad_norm": 0.7998560070991516, + "learning_rate": 9.906998208746064e-06, + "loss": 2.1282, + "step": 15366 + }, + { + "epoch": 0.51, + "grad_norm": 0.7256661057472229, + "learning_rate": 9.905935362304076e-06, + "loss": 2.1391, + "step": 15367 + }, + { + "epoch": 0.51, + "grad_norm": 0.726402223110199, + "learning_rate": 9.904872516924776e-06, + "loss": 2.0804, + "step": 15368 + }, + { + "epoch": 0.51, + "grad_norm": 0.7185222506523132, + "learning_rate": 9.90380967262017e-06, + "loss": 2.0465, + "step": 15369 + }, + { + "epoch": 0.51, + "grad_norm": 0.7617346048355103, + "learning_rate": 9.902746829402268e-06, + "loss": 2.0661, + "step": 15370 + }, + { + "epoch": 0.51, + "grad_norm": 0.7034574151039124, + "learning_rate": 9.901683987283074e-06, + "loss": 1.9937, + "step": 15371 + }, + { + "epoch": 0.51, + "grad_norm": 0.7556415796279907, + "learning_rate": 9.900621146274594e-06, + "loss": 2.0638, + "step": 15372 + }, + { + "epoch": 0.51, + "grad_norm": 0.7601121664047241, + "learning_rate": 9.89955830638884e-06, + "loss": 2.1071, + "step": 15373 + }, + { + "epoch": 0.51, + "grad_norm": 0.7391757965087891, + "learning_rate": 9.89849546763782e-06, + "loss": 2.0533, + "step": 15374 + }, + { + "epoch": 0.51, + "grad_norm": 0.7098444700241089, + "learning_rate": 9.897432630033537e-06, + "loss": 2.1017, + "step": 15375 + }, + { + "epoch": 0.51, + "grad_norm": 0.7534666061401367, + "learning_rate": 9.896369793587998e-06, + "loss": 2.1028, + "step": 15376 + }, + { + "epoch": 0.51, + "grad_norm": 0.7309271097183228, + "learning_rate": 9.895306958313215e-06, + "loss": 2.031, + "step": 15377 + }, + { + "epoch": 0.51, + "grad_norm": 0.7496236562728882, + "learning_rate": 9.894244124221188e-06, + "loss": 2.0072, + "step": 15378 + }, + { + "epoch": 0.51, + "grad_norm": 0.7571414709091187, + "learning_rate": 9.893181291323936e-06, + "loss": 2.0447, + "step": 15379 + }, + { + "epoch": 0.51, + "grad_norm": 0.7489757537841797, + "learning_rate": 9.892118459633454e-06, + "loss": 2.0185, + "step": 15380 + }, + { + "epoch": 0.51, + "grad_norm": 0.7407941222190857, + "learning_rate": 9.891055629161756e-06, + "loss": 2.0663, + "step": 15381 + }, + { + "epoch": 0.51, + "grad_norm": 0.7439061999320984, + "learning_rate": 9.889992799920848e-06, + "loss": 2.0648, + "step": 15382 + }, + { + "epoch": 0.51, + "grad_norm": 0.7580878138542175, + "learning_rate": 9.888929971922738e-06, + "loss": 2.0232, + "step": 15383 + }, + { + "epoch": 0.51, + "grad_norm": 0.7839977145195007, + "learning_rate": 9.88786714517943e-06, + "loss": 2.0624, + "step": 15384 + }, + { + "epoch": 0.51, + "grad_norm": 0.760627269744873, + "learning_rate": 9.886804319702934e-06, + "loss": 2.0722, + "step": 15385 + }, + { + "epoch": 0.51, + "grad_norm": 0.7217568159103394, + "learning_rate": 9.885741495505255e-06, + "loss": 2.0922, + "step": 15386 + }, + { + "epoch": 0.51, + "grad_norm": 0.7457327842712402, + "learning_rate": 9.884678672598406e-06, + "loss": 2.1363, + "step": 15387 + }, + { + "epoch": 0.51, + "grad_norm": 0.7632504105567932, + "learning_rate": 9.883615850994384e-06, + "loss": 2.1028, + "step": 15388 + }, + { + "epoch": 0.51, + "grad_norm": 0.7467462420463562, + "learning_rate": 9.882553030705206e-06, + "loss": 2.0979, + "step": 15389 + }, + { + "epoch": 0.51, + "grad_norm": 0.7233224511146545, + "learning_rate": 9.881490211742873e-06, + "loss": 2.0171, + "step": 15390 + }, + { + "epoch": 0.51, + "grad_norm": 0.7475437521934509, + "learning_rate": 9.880427394119394e-06, + "loss": 2.0718, + "step": 15391 + }, + { + "epoch": 0.51, + "grad_norm": 0.7187190055847168, + "learning_rate": 9.87936457784678e-06, + "loss": 2.1157, + "step": 15392 + }, + { + "epoch": 0.51, + "grad_norm": 0.7324836254119873, + "learning_rate": 9.87830176293703e-06, + "loss": 2.0941, + "step": 15393 + }, + { + "epoch": 0.51, + "grad_norm": 0.7730200886726379, + "learning_rate": 9.877238949402154e-06, + "loss": 2.0695, + "step": 15394 + }, + { + "epoch": 0.51, + "grad_norm": 0.7878044843673706, + "learning_rate": 9.876176137254164e-06, + "loss": 2.0871, + "step": 15395 + }, + { + "epoch": 0.51, + "grad_norm": 0.7498047351837158, + "learning_rate": 9.875113326505064e-06, + "loss": 2.0561, + "step": 15396 + }, + { + "epoch": 0.51, + "grad_norm": 0.7387615442276001, + "learning_rate": 9.87405051716686e-06, + "loss": 2.0795, + "step": 15397 + }, + { + "epoch": 0.51, + "grad_norm": 0.7281858921051025, + "learning_rate": 9.872987709251557e-06, + "loss": 2.0904, + "step": 15398 + }, + { + "epoch": 0.51, + "grad_norm": 0.7290946841239929, + "learning_rate": 9.871924902771166e-06, + "loss": 2.0523, + "step": 15399 + }, + { + "epoch": 0.51, + "grad_norm": 0.7390920519828796, + "learning_rate": 9.87086209773769e-06, + "loss": 2.0774, + "step": 15400 + }, + { + "epoch": 0.51, + "grad_norm": 0.7358347773551941, + "learning_rate": 9.869799294163145e-06, + "loss": 2.0841, + "step": 15401 + }, + { + "epoch": 0.51, + "grad_norm": 0.7207438945770264, + "learning_rate": 9.868736492059524e-06, + "loss": 2.0145, + "step": 15402 + }, + { + "epoch": 0.51, + "grad_norm": 0.7457445859909058, + "learning_rate": 9.867673691438844e-06, + "loss": 2.0136, + "step": 15403 + }, + { + "epoch": 0.51, + "grad_norm": 0.7311504483222961, + "learning_rate": 9.866610892313108e-06, + "loss": 2.0981, + "step": 15404 + }, + { + "epoch": 0.51, + "grad_norm": 0.7590753436088562, + "learning_rate": 9.865548094694328e-06, + "loss": 2.0612, + "step": 15405 + }, + { + "epoch": 0.51, + "grad_norm": 0.7475900650024414, + "learning_rate": 9.864485298594504e-06, + "loss": 2.0596, + "step": 15406 + }, + { + "epoch": 0.51, + "grad_norm": 0.7344780564308167, + "learning_rate": 9.863422504025645e-06, + "loss": 2.0409, + "step": 15407 + }, + { + "epoch": 0.51, + "grad_norm": 0.7245326042175293, + "learning_rate": 9.862359710999758e-06, + "loss": 2.0726, + "step": 15408 + }, + { + "epoch": 0.51, + "grad_norm": 0.7149578928947449, + "learning_rate": 9.86129691952885e-06, + "loss": 2.1089, + "step": 15409 + }, + { + "epoch": 0.51, + "grad_norm": 0.7664355635643005, + "learning_rate": 9.860234129624932e-06, + "loss": 2.053, + "step": 15410 + }, + { + "epoch": 0.51, + "grad_norm": 0.7224113941192627, + "learning_rate": 9.859171341300003e-06, + "loss": 2.1228, + "step": 15411 + }, + { + "epoch": 0.51, + "grad_norm": 0.8034083247184753, + "learning_rate": 9.858108554566076e-06, + "loss": 2.0027, + "step": 15412 + }, + { + "epoch": 0.51, + "grad_norm": 0.7370339632034302, + "learning_rate": 9.857045769435155e-06, + "loss": 2.0495, + "step": 15413 + }, + { + "epoch": 0.51, + "grad_norm": 0.6925652623176575, + "learning_rate": 9.855982985919246e-06, + "loss": 2.0543, + "step": 15414 + }, + { + "epoch": 0.51, + "grad_norm": 0.7595086097717285, + "learning_rate": 9.854920204030358e-06, + "loss": 2.0787, + "step": 15415 + }, + { + "epoch": 0.51, + "grad_norm": 0.7260364294052124, + "learning_rate": 9.853857423780493e-06, + "loss": 2.0787, + "step": 15416 + }, + { + "epoch": 0.51, + "grad_norm": 0.7198963761329651, + "learning_rate": 9.852794645181666e-06, + "loss": 2.0078, + "step": 15417 + }, + { + "epoch": 0.51, + "grad_norm": 0.7344509363174438, + "learning_rate": 9.851731868245877e-06, + "loss": 2.0207, + "step": 15418 + }, + { + "epoch": 0.51, + "grad_norm": 0.7224626541137695, + "learning_rate": 9.850669092985136e-06, + "loss": 2.1186, + "step": 15419 + }, + { + "epoch": 0.51, + "grad_norm": 0.7282788157463074, + "learning_rate": 9.849606319411445e-06, + "loss": 2.1431, + "step": 15420 + }, + { + "epoch": 0.51, + "grad_norm": 0.7216843962669373, + "learning_rate": 9.848543547536813e-06, + "loss": 2.0717, + "step": 15421 + }, + { + "epoch": 0.51, + "grad_norm": 0.7225651144981384, + "learning_rate": 9.84748077737325e-06, + "loss": 2.0695, + "step": 15422 + }, + { + "epoch": 0.51, + "grad_norm": 0.7397006154060364, + "learning_rate": 9.846418008932762e-06, + "loss": 2.0807, + "step": 15423 + }, + { + "epoch": 0.51, + "grad_norm": 0.7230384349822998, + "learning_rate": 9.84535524222735e-06, + "loss": 1.9347, + "step": 15424 + }, + { + "epoch": 0.51, + "grad_norm": 0.7308531403541565, + "learning_rate": 9.844292477269023e-06, + "loss": 2.003, + "step": 15425 + }, + { + "epoch": 0.51, + "grad_norm": 0.7896338701248169, + "learning_rate": 9.84322971406979e-06, + "loss": 2.0688, + "step": 15426 + }, + { + "epoch": 0.51, + "grad_norm": 0.74184650182724, + "learning_rate": 9.842166952641656e-06, + "loss": 2.0828, + "step": 15427 + }, + { + "epoch": 0.51, + "grad_norm": 0.7163426280021667, + "learning_rate": 9.841104192996627e-06, + "loss": 2.0951, + "step": 15428 + }, + { + "epoch": 0.51, + "grad_norm": 0.7500051259994507, + "learning_rate": 9.840041435146708e-06, + "loss": 2.0631, + "step": 15429 + }, + { + "epoch": 0.51, + "grad_norm": 0.7333345413208008, + "learning_rate": 9.838978679103908e-06, + "loss": 1.9789, + "step": 15430 + }, + { + "epoch": 0.51, + "grad_norm": 0.7438106536865234, + "learning_rate": 9.837915924880232e-06, + "loss": 2.0958, + "step": 15431 + }, + { + "epoch": 0.51, + "grad_norm": 0.7173997163772583, + "learning_rate": 9.83685317248769e-06, + "loss": 2.0991, + "step": 15432 + }, + { + "epoch": 0.51, + "grad_norm": 0.710135817527771, + "learning_rate": 9.835790421938284e-06, + "loss": 2.0511, + "step": 15433 + }, + { + "epoch": 0.51, + "grad_norm": 0.7174953818321228, + "learning_rate": 9.83472767324402e-06, + "loss": 2.1526, + "step": 15434 + }, + { + "epoch": 0.51, + "grad_norm": 0.7140005230903625, + "learning_rate": 9.833664926416904e-06, + "loss": 2.1002, + "step": 15435 + }, + { + "epoch": 0.51, + "grad_norm": 0.7471922636032104, + "learning_rate": 9.832602181468952e-06, + "loss": 2.1362, + "step": 15436 + }, + { + "epoch": 0.51, + "grad_norm": 0.7396267652511597, + "learning_rate": 9.831539438412153e-06, + "loss": 2.0854, + "step": 15437 + }, + { + "epoch": 0.51, + "grad_norm": 0.7174857258796692, + "learning_rate": 9.830476697258528e-06, + "loss": 2.0772, + "step": 15438 + }, + { + "epoch": 0.51, + "grad_norm": 0.752330482006073, + "learning_rate": 9.829413958020078e-06, + "loss": 2.1341, + "step": 15439 + }, + { + "epoch": 0.51, + "grad_norm": 0.7354570031166077, + "learning_rate": 9.828351220708807e-06, + "loss": 2.1309, + "step": 15440 + }, + { + "epoch": 0.51, + "grad_norm": 0.7684403657913208, + "learning_rate": 9.827288485336726e-06, + "loss": 2.0429, + "step": 15441 + }, + { + "epoch": 0.51, + "grad_norm": 0.746667206287384, + "learning_rate": 9.826225751915837e-06, + "loss": 2.0474, + "step": 15442 + }, + { + "epoch": 0.51, + "grad_norm": 0.7403775453567505, + "learning_rate": 9.825163020458145e-06, + "loss": 2.113, + "step": 15443 + }, + { + "epoch": 0.51, + "grad_norm": 0.7618185877799988, + "learning_rate": 9.824100290975661e-06, + "loss": 2.0831, + "step": 15444 + }, + { + "epoch": 0.51, + "grad_norm": 0.7202662229537964, + "learning_rate": 9.823037563480391e-06, + "loss": 2.0557, + "step": 15445 + }, + { + "epoch": 0.51, + "grad_norm": 0.7305876016616821, + "learning_rate": 9.821974837984337e-06, + "loss": 2.028, + "step": 15446 + }, + { + "epoch": 0.51, + "grad_norm": 0.7291291356086731, + "learning_rate": 9.820912114499507e-06, + "loss": 2.1397, + "step": 15447 + }, + { + "epoch": 0.51, + "grad_norm": 0.7440987825393677, + "learning_rate": 9.819849393037905e-06, + "loss": 2.1266, + "step": 15448 + }, + { + "epoch": 0.51, + "grad_norm": 0.7154314517974854, + "learning_rate": 9.818786673611545e-06, + "loss": 2.0856, + "step": 15449 + }, + { + "epoch": 0.51, + "grad_norm": 0.7714183330535889, + "learning_rate": 9.817723956232422e-06, + "loss": 2.0478, + "step": 15450 + }, + { + "epoch": 0.51, + "grad_norm": 0.7148356437683105, + "learning_rate": 9.816661240912545e-06, + "loss": 2.0562, + "step": 15451 + }, + { + "epoch": 0.51, + "grad_norm": 0.7106906771659851, + "learning_rate": 9.815598527663924e-06, + "loss": 2.0265, + "step": 15452 + }, + { + "epoch": 0.51, + "grad_norm": 0.7191160321235657, + "learning_rate": 9.814535816498563e-06, + "loss": 2.1333, + "step": 15453 + }, + { + "epoch": 0.51, + "grad_norm": 0.7423055768013, + "learning_rate": 9.81347310742847e-06, + "loss": 2.1441, + "step": 15454 + }, + { + "epoch": 0.51, + "grad_norm": 0.719419538974762, + "learning_rate": 9.812410400465646e-06, + "loss": 2.0527, + "step": 15455 + }, + { + "epoch": 0.51, + "grad_norm": 0.7294454574584961, + "learning_rate": 9.8113476956221e-06, + "loss": 1.9811, + "step": 15456 + }, + { + "epoch": 0.51, + "grad_norm": 0.7310861349105835, + "learning_rate": 9.810284992909835e-06, + "loss": 2.0688, + "step": 15457 + }, + { + "epoch": 0.51, + "grad_norm": 0.7448965907096863, + "learning_rate": 9.809222292340865e-06, + "loss": 2.0857, + "step": 15458 + }, + { + "epoch": 0.51, + "grad_norm": 0.6905385851860046, + "learning_rate": 9.808159593927183e-06, + "loss": 2.0013, + "step": 15459 + }, + { + "epoch": 0.51, + "grad_norm": 0.7185955047607422, + "learning_rate": 9.807096897680805e-06, + "loss": 2.0877, + "step": 15460 + }, + { + "epoch": 0.51, + "grad_norm": 0.735254168510437, + "learning_rate": 9.80603420361373e-06, + "loss": 2.0117, + "step": 15461 + }, + { + "epoch": 0.51, + "grad_norm": 0.7139628529548645, + "learning_rate": 9.804971511737971e-06, + "loss": 2.0588, + "step": 15462 + }, + { + "epoch": 0.51, + "grad_norm": 0.7532000541687012, + "learning_rate": 9.80390882206553e-06, + "loss": 1.9247, + "step": 15463 + }, + { + "epoch": 0.51, + "grad_norm": 0.7532366514205933, + "learning_rate": 9.802846134608409e-06, + "loss": 2.1071, + "step": 15464 + }, + { + "epoch": 0.51, + "grad_norm": 0.740577757358551, + "learning_rate": 9.801783449378616e-06, + "loss": 2.13, + "step": 15465 + }, + { + "epoch": 0.51, + "grad_norm": 0.7298146486282349, + "learning_rate": 9.800720766388159e-06, + "loss": 2.0301, + "step": 15466 + }, + { + "epoch": 0.51, + "grad_norm": 0.7567242383956909, + "learning_rate": 9.799658085649045e-06, + "loss": 2.1368, + "step": 15467 + }, + { + "epoch": 0.51, + "grad_norm": 0.7340618371963501, + "learning_rate": 9.798595407173272e-06, + "loss": 1.9951, + "step": 15468 + }, + { + "epoch": 0.51, + "grad_norm": 0.737190306186676, + "learning_rate": 9.797532730972853e-06, + "loss": 2.1096, + "step": 15469 + }, + { + "epoch": 0.51, + "grad_norm": 0.7751051783561707, + "learning_rate": 9.796470057059788e-06, + "loss": 2.1154, + "step": 15470 + }, + { + "epoch": 0.51, + "grad_norm": 0.7434642314910889, + "learning_rate": 9.79540738544609e-06, + "loss": 2.0401, + "step": 15471 + }, + { + "epoch": 0.51, + "grad_norm": 0.7171930074691772, + "learning_rate": 9.794344716143754e-06, + "loss": 2.0856, + "step": 15472 + }, + { + "epoch": 0.51, + "grad_norm": 0.737261950969696, + "learning_rate": 9.793282049164791e-06, + "loss": 2.1137, + "step": 15473 + }, + { + "epoch": 0.51, + "grad_norm": 0.7464218735694885, + "learning_rate": 9.792219384521207e-06, + "loss": 2.117, + "step": 15474 + }, + { + "epoch": 0.51, + "grad_norm": 0.7863865494728088, + "learning_rate": 9.791156722225007e-06, + "loss": 2.0856, + "step": 15475 + }, + { + "epoch": 0.51, + "grad_norm": 0.7412142753601074, + "learning_rate": 9.7900940622882e-06, + "loss": 2.0507, + "step": 15476 + }, + { + "epoch": 0.51, + "grad_norm": 0.7396461367607117, + "learning_rate": 9.789031404722785e-06, + "loss": 2.0501, + "step": 15477 + }, + { + "epoch": 0.51, + "grad_norm": 0.7308434844017029, + "learning_rate": 9.787968749540767e-06, + "loss": 2.076, + "step": 15478 + }, + { + "epoch": 0.51, + "grad_norm": 0.7044723629951477, + "learning_rate": 9.786906096754154e-06, + "loss": 1.9995, + "step": 15479 + }, + { + "epoch": 0.52, + "grad_norm": 0.7285619378089905, + "learning_rate": 9.785843446374955e-06, + "loss": 2.1147, + "step": 15480 + }, + { + "epoch": 0.52, + "grad_norm": 0.7292072772979736, + "learning_rate": 9.784780798415167e-06, + "loss": 2.027, + "step": 15481 + }, + { + "epoch": 0.52, + "grad_norm": 0.7500810623168945, + "learning_rate": 9.7837181528868e-06, + "loss": 2.1657, + "step": 15482 + }, + { + "epoch": 0.52, + "grad_norm": 0.7479604482650757, + "learning_rate": 9.782655509801859e-06, + "loss": 2.0836, + "step": 15483 + }, + { + "epoch": 0.52, + "grad_norm": 0.7441113591194153, + "learning_rate": 9.78159286917235e-06, + "loss": 2.0057, + "step": 15484 + }, + { + "epoch": 0.52, + "grad_norm": 0.6986281275749207, + "learning_rate": 9.780530231010279e-06, + "loss": 1.9845, + "step": 15485 + }, + { + "epoch": 0.52, + "grad_norm": 0.7486786246299744, + "learning_rate": 9.779467595327644e-06, + "loss": 2.0646, + "step": 15486 + }, + { + "epoch": 0.52, + "grad_norm": 0.7273546457290649, + "learning_rate": 9.778404962136455e-06, + "loss": 2.0678, + "step": 15487 + }, + { + "epoch": 0.52, + "grad_norm": 0.7597822546958923, + "learning_rate": 9.777342331448717e-06, + "loss": 2.0996, + "step": 15488 + }, + { + "epoch": 0.52, + "grad_norm": 0.7094641923904419, + "learning_rate": 9.776279703276439e-06, + "loss": 2.0453, + "step": 15489 + }, + { + "epoch": 0.52, + "grad_norm": 0.7305936813354492, + "learning_rate": 9.775217077631619e-06, + "loss": 2.0448, + "step": 15490 + }, + { + "epoch": 0.52, + "grad_norm": 0.7562252879142761, + "learning_rate": 9.774154454526265e-06, + "loss": 2.0429, + "step": 15491 + }, + { + "epoch": 0.52, + "grad_norm": 0.7288805246353149, + "learning_rate": 9.77309183397238e-06, + "loss": 2.0711, + "step": 15492 + }, + { + "epoch": 0.52, + "grad_norm": 0.7238330245018005, + "learning_rate": 9.77202921598197e-06, + "loss": 2.0819, + "step": 15493 + }, + { + "epoch": 0.52, + "grad_norm": 0.7518173456192017, + "learning_rate": 9.770966600567046e-06, + "loss": 2.0907, + "step": 15494 + }, + { + "epoch": 0.52, + "grad_norm": 0.7401343584060669, + "learning_rate": 9.769903987739602e-06, + "loss": 2.0953, + "step": 15495 + }, + { + "epoch": 0.52, + "grad_norm": 0.7601081728935242, + "learning_rate": 9.768841377511649e-06, + "loss": 2.098, + "step": 15496 + }, + { + "epoch": 0.52, + "grad_norm": 0.7254143953323364, + "learning_rate": 9.76777876989519e-06, + "loss": 2.1037, + "step": 15497 + }, + { + "epoch": 0.52, + "grad_norm": 0.754649817943573, + "learning_rate": 9.766716164902234e-06, + "loss": 2.076, + "step": 15498 + }, + { + "epoch": 0.52, + "grad_norm": 0.7536258697509766, + "learning_rate": 9.765653562544779e-06, + "loss": 2.0219, + "step": 15499 + }, + { + "epoch": 0.52, + "grad_norm": 0.7242909669876099, + "learning_rate": 9.764590962834832e-06, + "loss": 2.0932, + "step": 15500 + }, + { + "epoch": 0.52, + "grad_norm": 0.7305471301078796, + "learning_rate": 9.763528365784397e-06, + "loss": 2.0499, + "step": 15501 + }, + { + "epoch": 0.52, + "grad_norm": 0.7612824440002441, + "learning_rate": 9.762465771405488e-06, + "loss": 2.0851, + "step": 15502 + }, + { + "epoch": 0.52, + "grad_norm": 0.7543918490409851, + "learning_rate": 9.761403179710092e-06, + "loss": 2.0588, + "step": 15503 + }, + { + "epoch": 0.52, + "grad_norm": 0.7554641962051392, + "learning_rate": 9.760340590710227e-06, + "loss": 2.0982, + "step": 15504 + }, + { + "epoch": 0.52, + "grad_norm": 0.7310226559638977, + "learning_rate": 9.759278004417893e-06, + "loss": 1.9929, + "step": 15505 + }, + { + "epoch": 0.52, + "grad_norm": 0.7380367517471313, + "learning_rate": 9.758215420845095e-06, + "loss": 2.0211, + "step": 15506 + }, + { + "epoch": 0.52, + "grad_norm": 0.7349383234977722, + "learning_rate": 9.75715284000384e-06, + "loss": 1.9907, + "step": 15507 + }, + { + "epoch": 0.52, + "grad_norm": 0.7183637022972107, + "learning_rate": 9.756090261906129e-06, + "loss": 2.0757, + "step": 15508 + }, + { + "epoch": 0.52, + "grad_norm": 0.7491658926010132, + "learning_rate": 9.755027686563963e-06, + "loss": 2.103, + "step": 15509 + }, + { + "epoch": 0.52, + "grad_norm": 0.7532989978790283, + "learning_rate": 9.753965113989353e-06, + "loss": 2.1514, + "step": 15510 + }, + { + "epoch": 0.52, + "grad_norm": 0.7262265682220459, + "learning_rate": 9.752902544194304e-06, + "loss": 2.0946, + "step": 15511 + }, + { + "epoch": 0.52, + "grad_norm": 0.7389784455299377, + "learning_rate": 9.751839977190815e-06, + "loss": 2.1384, + "step": 15512 + }, + { + "epoch": 0.52, + "grad_norm": 0.7374013066291809, + "learning_rate": 9.750777412990892e-06, + "loss": 2.0292, + "step": 15513 + }, + { + "epoch": 0.52, + "grad_norm": 0.7303351759910583, + "learning_rate": 9.74971485160654e-06, + "loss": 2.0003, + "step": 15514 + }, + { + "epoch": 0.52, + "grad_norm": 0.7219321727752686, + "learning_rate": 9.748652293049761e-06, + "loss": 2.0752, + "step": 15515 + }, + { + "epoch": 0.52, + "grad_norm": 0.7591698169708252, + "learning_rate": 9.747589737332566e-06, + "loss": 2.1115, + "step": 15516 + }, + { + "epoch": 0.52, + "grad_norm": 0.7176534533500671, + "learning_rate": 9.74652718446695e-06, + "loss": 1.9945, + "step": 15517 + }, + { + "epoch": 0.52, + "grad_norm": 0.7573475241661072, + "learning_rate": 9.745464634464923e-06, + "loss": 2.0965, + "step": 15518 + }, + { + "epoch": 0.52, + "grad_norm": 0.7309889793395996, + "learning_rate": 9.744402087338485e-06, + "loss": 2.1003, + "step": 15519 + }, + { + "epoch": 0.52, + "grad_norm": 0.7442038059234619, + "learning_rate": 9.743339543099649e-06, + "loss": 2.0572, + "step": 15520 + }, + { + "epoch": 0.52, + "grad_norm": 0.7452618479728699, + "learning_rate": 9.742277001760406e-06, + "loss": 2.0766, + "step": 15521 + }, + { + "epoch": 0.52, + "grad_norm": 0.7495506405830383, + "learning_rate": 9.741214463332766e-06, + "loss": 2.1218, + "step": 15522 + }, + { + "epoch": 0.52, + "grad_norm": 0.7246361374855042, + "learning_rate": 9.740151927828736e-06, + "loss": 2.0471, + "step": 15523 + }, + { + "epoch": 0.52, + "grad_norm": 0.7134158611297607, + "learning_rate": 9.739089395260316e-06, + "loss": 2.0002, + "step": 15524 + }, + { + "epoch": 0.52, + "grad_norm": 0.7300854325294495, + "learning_rate": 9.738026865639515e-06, + "loss": 2.0589, + "step": 15525 + }, + { + "epoch": 0.52, + "grad_norm": 0.7746339440345764, + "learning_rate": 9.73696433897833e-06, + "loss": 2.0503, + "step": 15526 + }, + { + "epoch": 0.52, + "grad_norm": 0.7450358271598816, + "learning_rate": 9.735901815288767e-06, + "loss": 2.1289, + "step": 15527 + }, + { + "epoch": 0.52, + "grad_norm": 0.7547447681427002, + "learning_rate": 9.734839294582828e-06, + "loss": 2.0906, + "step": 15528 + }, + { + "epoch": 0.52, + "grad_norm": 0.7442801594734192, + "learning_rate": 9.733776776872528e-06, + "loss": 2.1284, + "step": 15529 + }, + { + "epoch": 0.52, + "grad_norm": 0.7449796795845032, + "learning_rate": 9.732714262169854e-06, + "loss": 1.9984, + "step": 15530 + }, + { + "epoch": 0.52, + "grad_norm": 0.7429680824279785, + "learning_rate": 9.731651750486819e-06, + "loss": 2.107, + "step": 15531 + }, + { + "epoch": 0.52, + "grad_norm": 0.7331522107124329, + "learning_rate": 9.730589241835426e-06, + "loss": 2.0447, + "step": 15532 + }, + { + "epoch": 0.52, + "grad_norm": 0.7749835848808289, + "learning_rate": 9.72952673622768e-06, + "loss": 2.0516, + "step": 15533 + }, + { + "epoch": 0.52, + "grad_norm": 0.7580354809761047, + "learning_rate": 9.72846423367558e-06, + "loss": 2.0497, + "step": 15534 + }, + { + "epoch": 0.52, + "grad_norm": 0.7220734357833862, + "learning_rate": 9.727401734191134e-06, + "loss": 2.0757, + "step": 15535 + }, + { + "epoch": 0.52, + "grad_norm": 0.766269326210022, + "learning_rate": 9.726339237786338e-06, + "loss": 2.0983, + "step": 15536 + }, + { + "epoch": 0.52, + "grad_norm": 0.7298220992088318, + "learning_rate": 9.725276744473206e-06, + "loss": 2.0778, + "step": 15537 + }, + { + "epoch": 0.52, + "grad_norm": 0.7452158331871033, + "learning_rate": 9.724214254263737e-06, + "loss": 2.0963, + "step": 15538 + }, + { + "epoch": 0.52, + "grad_norm": 0.7327550649642944, + "learning_rate": 9.72315176716993e-06, + "loss": 2.0423, + "step": 15539 + }, + { + "epoch": 0.52, + "grad_norm": 0.7246344089508057, + "learning_rate": 9.722089283203795e-06, + "loss": 2.0486, + "step": 15540 + }, + { + "epoch": 0.52, + "grad_norm": 0.7150371670722961, + "learning_rate": 9.721026802377331e-06, + "loss": 2.071, + "step": 15541 + }, + { + "epoch": 0.52, + "grad_norm": 0.7200292944908142, + "learning_rate": 9.719964324702545e-06, + "loss": 2.0106, + "step": 15542 + }, + { + "epoch": 0.52, + "grad_norm": 0.7637087106704712, + "learning_rate": 9.718901850191435e-06, + "loss": 2.0936, + "step": 15543 + }, + { + "epoch": 0.52, + "grad_norm": 0.7801696062088013, + "learning_rate": 9.717839378856006e-06, + "loss": 2.1, + "step": 15544 + }, + { + "epoch": 0.52, + "grad_norm": 0.7393117547035217, + "learning_rate": 9.716776910708265e-06, + "loss": 2.0983, + "step": 15545 + }, + { + "epoch": 0.52, + "grad_norm": 0.7470100522041321, + "learning_rate": 9.715714445760212e-06, + "loss": 2.0617, + "step": 15546 + }, + { + "epoch": 0.52, + "grad_norm": 0.7474226951599121, + "learning_rate": 9.714651984023853e-06, + "loss": 2.0158, + "step": 15547 + }, + { + "epoch": 0.52, + "grad_norm": 0.7845041751861572, + "learning_rate": 9.713589525511185e-06, + "loss": 2.0418, + "step": 15548 + }, + { + "epoch": 0.52, + "grad_norm": 0.7198737859725952, + "learning_rate": 9.712527070234216e-06, + "loss": 2.022, + "step": 15549 + }, + { + "epoch": 0.52, + "grad_norm": 0.7450085282325745, + "learning_rate": 9.711464618204945e-06, + "loss": 2.0532, + "step": 15550 + }, + { + "epoch": 0.52, + "grad_norm": 0.7316471338272095, + "learning_rate": 9.710402169435385e-06, + "loss": 2.1293, + "step": 15551 + }, + { + "epoch": 0.52, + "grad_norm": 0.7153242826461792, + "learning_rate": 9.709339723937524e-06, + "loss": 2.1143, + "step": 15552 + }, + { + "epoch": 0.52, + "grad_norm": 0.7542601823806763, + "learning_rate": 9.708277281723375e-06, + "loss": 2.1353, + "step": 15553 + }, + { + "epoch": 0.52, + "grad_norm": 0.7349059581756592, + "learning_rate": 9.707214842804939e-06, + "loss": 2.1952, + "step": 15554 + }, + { + "epoch": 0.52, + "grad_norm": 0.7501107454299927, + "learning_rate": 9.706152407194217e-06, + "loss": 2.105, + "step": 15555 + }, + { + "epoch": 0.52, + "grad_norm": 0.727715253829956, + "learning_rate": 9.705089974903216e-06, + "loss": 2.0784, + "step": 15556 + }, + { + "epoch": 0.52, + "grad_norm": 0.744004487991333, + "learning_rate": 9.704027545943934e-06, + "loss": 2.1167, + "step": 15557 + }, + { + "epoch": 0.52, + "grad_norm": 0.7010319828987122, + "learning_rate": 9.702965120328372e-06, + "loss": 1.9665, + "step": 15558 + }, + { + "epoch": 0.52, + "grad_norm": 0.731033444404602, + "learning_rate": 9.701902698068539e-06, + "loss": 2.0776, + "step": 15559 + }, + { + "epoch": 0.52, + "grad_norm": 0.7611697912216187, + "learning_rate": 9.700840279176435e-06, + "loss": 2.1183, + "step": 15560 + }, + { + "epoch": 0.52, + "grad_norm": 0.7328007817268372, + "learning_rate": 9.699777863664062e-06, + "loss": 2.0478, + "step": 15561 + }, + { + "epoch": 0.52, + "grad_norm": 0.7671434283256531, + "learning_rate": 9.698715451543422e-06, + "loss": 2.1109, + "step": 15562 + }, + { + "epoch": 0.52, + "grad_norm": 0.7581314444541931, + "learning_rate": 9.697653042826516e-06, + "loss": 2.0886, + "step": 15563 + }, + { + "epoch": 0.52, + "grad_norm": 0.764369785785675, + "learning_rate": 9.696590637525354e-06, + "loss": 2.0315, + "step": 15564 + }, + { + "epoch": 0.52, + "grad_norm": 0.731525719165802, + "learning_rate": 9.69552823565193e-06, + "loss": 2.0346, + "step": 15565 + }, + { + "epoch": 0.52, + "grad_norm": 0.7416642308235168, + "learning_rate": 9.694465837218247e-06, + "loss": 2.0398, + "step": 15566 + }, + { + "epoch": 0.52, + "grad_norm": 0.7409846782684326, + "learning_rate": 9.693403442236313e-06, + "loss": 2.0868, + "step": 15567 + }, + { + "epoch": 0.52, + "grad_norm": 0.7339968681335449, + "learning_rate": 9.692341050718126e-06, + "loss": 2.1072, + "step": 15568 + }, + { + "epoch": 0.52, + "grad_norm": 0.7538873553276062, + "learning_rate": 9.691278662675694e-06, + "loss": 2.0655, + "step": 15569 + }, + { + "epoch": 0.52, + "grad_norm": 0.727290689945221, + "learning_rate": 9.690216278121011e-06, + "loss": 2.0388, + "step": 15570 + }, + { + "epoch": 0.52, + "grad_norm": 0.7230735421180725, + "learning_rate": 9.689153897066082e-06, + "loss": 2.1174, + "step": 15571 + }, + { + "epoch": 0.52, + "grad_norm": 0.7105750441551208, + "learning_rate": 9.68809151952291e-06, + "loss": 2.0501, + "step": 15572 + }, + { + "epoch": 0.52, + "grad_norm": 0.7411559224128723, + "learning_rate": 9.687029145503503e-06, + "loss": 2.0669, + "step": 15573 + }, + { + "epoch": 0.52, + "grad_norm": 0.7452191710472107, + "learning_rate": 9.68596677501985e-06, + "loss": 2.0551, + "step": 15574 + }, + { + "epoch": 0.52, + "grad_norm": 0.7293837666511536, + "learning_rate": 9.684904408083962e-06, + "loss": 2.0482, + "step": 15575 + }, + { + "epoch": 0.52, + "grad_norm": 0.7480900287628174, + "learning_rate": 9.683842044707841e-06, + "loss": 2.0255, + "step": 15576 + }, + { + "epoch": 0.52, + "grad_norm": 0.7426881194114685, + "learning_rate": 9.682779684903486e-06, + "loss": 2.087, + "step": 15577 + }, + { + "epoch": 0.52, + "grad_norm": 0.7375836968421936, + "learning_rate": 9.681717328682904e-06, + "loss": 2.0823, + "step": 15578 + }, + { + "epoch": 0.52, + "grad_norm": 0.7787740230560303, + "learning_rate": 9.68065497605809e-06, + "loss": 2.1588, + "step": 15579 + }, + { + "epoch": 0.52, + "grad_norm": 0.7724153399467468, + "learning_rate": 9.679592627041046e-06, + "loss": 2.1198, + "step": 15580 + }, + { + "epoch": 0.52, + "grad_norm": 0.7504175901412964, + "learning_rate": 9.678530281643779e-06, + "loss": 2.0601, + "step": 15581 + }, + { + "epoch": 0.52, + "grad_norm": 0.7513518929481506, + "learning_rate": 9.677467939878292e-06, + "loss": 2.0904, + "step": 15582 + }, + { + "epoch": 0.52, + "grad_norm": 0.7302078604698181, + "learning_rate": 9.67640560175658e-06, + "loss": 2.096, + "step": 15583 + }, + { + "epoch": 0.52, + "grad_norm": 0.7388531565666199, + "learning_rate": 9.675343267290647e-06, + "loss": 2.0967, + "step": 15584 + }, + { + "epoch": 0.52, + "grad_norm": 0.7590838670730591, + "learning_rate": 9.674280936492496e-06, + "loss": 2.0323, + "step": 15585 + }, + { + "epoch": 0.52, + "grad_norm": 0.75392085313797, + "learning_rate": 9.673218609374127e-06, + "loss": 2.0687, + "step": 15586 + }, + { + "epoch": 0.52, + "grad_norm": 0.7369232177734375, + "learning_rate": 9.672156285947549e-06, + "loss": 2.0584, + "step": 15587 + }, + { + "epoch": 0.52, + "grad_norm": 0.7518595457077026, + "learning_rate": 9.671093966224749e-06, + "loss": 2.1102, + "step": 15588 + }, + { + "epoch": 0.52, + "grad_norm": 0.795049250125885, + "learning_rate": 9.67003165021774e-06, + "loss": 2.065, + "step": 15589 + }, + { + "epoch": 0.52, + "grad_norm": 0.719650387763977, + "learning_rate": 9.668969337938521e-06, + "loss": 2.0489, + "step": 15590 + }, + { + "epoch": 0.52, + "grad_norm": 0.7470910549163818, + "learning_rate": 9.667907029399094e-06, + "loss": 2.0173, + "step": 15591 + }, + { + "epoch": 0.52, + "grad_norm": 0.7344533801078796, + "learning_rate": 9.666844724611456e-06, + "loss": 2.1142, + "step": 15592 + }, + { + "epoch": 0.52, + "grad_norm": 0.7749903202056885, + "learning_rate": 9.665782423587611e-06, + "loss": 2.0897, + "step": 15593 + }, + { + "epoch": 0.52, + "grad_norm": 0.7238495945930481, + "learning_rate": 9.664720126339559e-06, + "loss": 2.0848, + "step": 15594 + }, + { + "epoch": 0.52, + "grad_norm": 0.7621783018112183, + "learning_rate": 9.663657832879307e-06, + "loss": 2.0491, + "step": 15595 + }, + { + "epoch": 0.52, + "grad_norm": 0.8091264367103577, + "learning_rate": 9.662595543218847e-06, + "loss": 2.1907, + "step": 15596 + }, + { + "epoch": 0.52, + "grad_norm": 0.7234330177307129, + "learning_rate": 9.661533257370188e-06, + "loss": 2.0327, + "step": 15597 + }, + { + "epoch": 0.52, + "grad_norm": 0.7885359525680542, + "learning_rate": 9.660470975345325e-06, + "loss": 1.9826, + "step": 15598 + }, + { + "epoch": 0.52, + "grad_norm": 0.822218656539917, + "learning_rate": 9.659408697156264e-06, + "loss": 2.0889, + "step": 15599 + }, + { + "epoch": 0.52, + "grad_norm": 0.7173131704330444, + "learning_rate": 9.658346422815006e-06, + "loss": 2.0554, + "step": 15600 + }, + { + "epoch": 0.52, + "grad_norm": 0.7182657122612, + "learning_rate": 9.657284152333547e-06, + "loss": 2.0971, + "step": 15601 + }, + { + "epoch": 0.52, + "grad_norm": 0.8015856146812439, + "learning_rate": 9.65622188572389e-06, + "loss": 2.0837, + "step": 15602 + }, + { + "epoch": 0.52, + "grad_norm": 0.7591501474380493, + "learning_rate": 9.655159622998037e-06, + "loss": 2.0357, + "step": 15603 + }, + { + "epoch": 0.52, + "grad_norm": 0.7415878176689148, + "learning_rate": 9.65409736416799e-06, + "loss": 2.0732, + "step": 15604 + }, + { + "epoch": 0.52, + "grad_norm": 0.7339306473731995, + "learning_rate": 9.653035109245749e-06, + "loss": 1.9814, + "step": 15605 + }, + { + "epoch": 0.52, + "grad_norm": 0.7654830813407898, + "learning_rate": 9.651972858243312e-06, + "loss": 2.0572, + "step": 15606 + }, + { + "epoch": 0.52, + "grad_norm": 0.7673883438110352, + "learning_rate": 9.650910611172681e-06, + "loss": 2.0857, + "step": 15607 + }, + { + "epoch": 0.52, + "grad_norm": 0.7576623558998108, + "learning_rate": 9.649848368045856e-06, + "loss": 2.142, + "step": 15608 + }, + { + "epoch": 0.52, + "grad_norm": 0.7089091539382935, + "learning_rate": 9.648786128874846e-06, + "loss": 2.0497, + "step": 15609 + }, + { + "epoch": 0.52, + "grad_norm": 0.7996172308921814, + "learning_rate": 9.647723893671639e-06, + "loss": 2.0808, + "step": 15610 + }, + { + "epoch": 0.52, + "grad_norm": 0.7176097631454468, + "learning_rate": 9.646661662448241e-06, + "loss": 2.0339, + "step": 15611 + }, + { + "epoch": 0.52, + "grad_norm": 0.7384870052337646, + "learning_rate": 9.645599435216653e-06, + "loss": 2.0507, + "step": 15612 + }, + { + "epoch": 0.52, + "grad_norm": 0.7355840802192688, + "learning_rate": 9.644537211988877e-06, + "loss": 2.0441, + "step": 15613 + }, + { + "epoch": 0.52, + "grad_norm": 0.7313699126243591, + "learning_rate": 9.64347499277691e-06, + "loss": 2.0774, + "step": 15614 + }, + { + "epoch": 0.52, + "grad_norm": 0.728209376335144, + "learning_rate": 9.642412777592753e-06, + "loss": 2.0528, + "step": 15615 + }, + { + "epoch": 0.52, + "grad_norm": 0.7632866501808167, + "learning_rate": 9.641350566448405e-06, + "loss": 2.0603, + "step": 15616 + }, + { + "epoch": 0.52, + "grad_norm": 0.7017009854316711, + "learning_rate": 9.640288359355875e-06, + "loss": 2.0926, + "step": 15617 + }, + { + "epoch": 0.52, + "grad_norm": 0.729819118976593, + "learning_rate": 9.639226156327149e-06, + "loss": 2.0396, + "step": 15618 + }, + { + "epoch": 0.52, + "grad_norm": 0.7357673645019531, + "learning_rate": 9.638163957374238e-06, + "loss": 2.0588, + "step": 15619 + }, + { + "epoch": 0.52, + "grad_norm": 0.7222998738288879, + "learning_rate": 9.637101762509136e-06, + "loss": 2.0495, + "step": 15620 + }, + { + "epoch": 0.52, + "grad_norm": 0.7392330765724182, + "learning_rate": 9.636039571743845e-06, + "loss": 2.1075, + "step": 15621 + }, + { + "epoch": 0.52, + "grad_norm": 0.7758607864379883, + "learning_rate": 9.634977385090372e-06, + "loss": 2.0415, + "step": 15622 + }, + { + "epoch": 0.52, + "grad_norm": 0.7159187197685242, + "learning_rate": 9.633915202560705e-06, + "loss": 2.029, + "step": 15623 + }, + { + "epoch": 0.52, + "grad_norm": 0.7334617972373962, + "learning_rate": 9.63285302416685e-06, + "loss": 2.0291, + "step": 15624 + }, + { + "epoch": 0.52, + "grad_norm": 0.7247999310493469, + "learning_rate": 9.631790849920805e-06, + "loss": 2.0225, + "step": 15625 + }, + { + "epoch": 0.52, + "grad_norm": 0.7163276076316833, + "learning_rate": 9.630728679834577e-06, + "loss": 2.0576, + "step": 15626 + }, + { + "epoch": 0.52, + "grad_norm": 0.7445126175880432, + "learning_rate": 9.629666513920155e-06, + "loss": 2.0651, + "step": 15627 + }, + { + "epoch": 0.52, + "grad_norm": 0.776853621006012, + "learning_rate": 9.628604352189546e-06, + "loss": 2.086, + "step": 15628 + }, + { + "epoch": 0.52, + "grad_norm": 0.738005518913269, + "learning_rate": 9.627542194654743e-06, + "loss": 2.079, + "step": 15629 + }, + { + "epoch": 0.52, + "grad_norm": 0.7269394397735596, + "learning_rate": 9.626480041327754e-06, + "loss": 2.0773, + "step": 15630 + }, + { + "epoch": 0.52, + "grad_norm": 0.7316299676895142, + "learning_rate": 9.625417892220575e-06, + "loss": 2.1224, + "step": 15631 + }, + { + "epoch": 0.52, + "grad_norm": 0.7305302023887634, + "learning_rate": 9.624355747345205e-06, + "loss": 2.0894, + "step": 15632 + }, + { + "epoch": 0.52, + "grad_norm": 0.7339333891868591, + "learning_rate": 9.623293606713641e-06, + "loss": 2.0949, + "step": 15633 + }, + { + "epoch": 0.52, + "grad_norm": 0.7231512665748596, + "learning_rate": 9.622231470337887e-06, + "loss": 2.0842, + "step": 15634 + }, + { + "epoch": 0.52, + "grad_norm": 0.7310609221458435, + "learning_rate": 9.621169338229943e-06, + "loss": 2.1719, + "step": 15635 + }, + { + "epoch": 0.52, + "grad_norm": 0.7382004857063293, + "learning_rate": 9.620107210401802e-06, + "loss": 2.1282, + "step": 15636 + }, + { + "epoch": 0.52, + "grad_norm": 0.7339346408843994, + "learning_rate": 9.619045086865467e-06, + "loss": 2.0786, + "step": 15637 + }, + { + "epoch": 0.52, + "grad_norm": 0.7268326878547668, + "learning_rate": 9.617982967632937e-06, + "loss": 2.0701, + "step": 15638 + }, + { + "epoch": 0.52, + "grad_norm": 0.7250897884368896, + "learning_rate": 9.616920852716214e-06, + "loss": 2.0398, + "step": 15639 + }, + { + "epoch": 0.52, + "grad_norm": 0.7248212099075317, + "learning_rate": 9.615858742127296e-06, + "loss": 2.1378, + "step": 15640 + }, + { + "epoch": 0.52, + "grad_norm": 0.725259006023407, + "learning_rate": 9.614796635878178e-06, + "loss": 2.0715, + "step": 15641 + }, + { + "epoch": 0.52, + "grad_norm": 0.7079594731330872, + "learning_rate": 9.613734533980861e-06, + "loss": 2.1301, + "step": 15642 + }, + { + "epoch": 0.52, + "grad_norm": 0.7052373886108398, + "learning_rate": 9.612672436447345e-06, + "loss": 2.071, + "step": 15643 + }, + { + "epoch": 0.52, + "grad_norm": 0.7427425384521484, + "learning_rate": 9.611610343289635e-06, + "loss": 2.0805, + "step": 15644 + }, + { + "epoch": 0.52, + "grad_norm": 0.7867558002471924, + "learning_rate": 9.610548254519716e-06, + "loss": 2.0804, + "step": 15645 + }, + { + "epoch": 0.52, + "grad_norm": 0.7661941647529602, + "learning_rate": 9.609486170149597e-06, + "loss": 2.1064, + "step": 15646 + }, + { + "epoch": 0.52, + "grad_norm": 0.7181205749511719, + "learning_rate": 9.608424090191274e-06, + "loss": 2.063, + "step": 15647 + }, + { + "epoch": 0.52, + "grad_norm": 0.7304525375366211, + "learning_rate": 9.60736201465675e-06, + "loss": 2.105, + "step": 15648 + }, + { + "epoch": 0.52, + "grad_norm": 0.7100653052330017, + "learning_rate": 9.606299943558017e-06, + "loss": 2.0914, + "step": 15649 + }, + { + "epoch": 0.52, + "grad_norm": 0.7128437757492065, + "learning_rate": 9.605237876907075e-06, + "loss": 1.9821, + "step": 15650 + }, + { + "epoch": 0.52, + "grad_norm": 0.7711062431335449, + "learning_rate": 9.604175814715923e-06, + "loss": 2.076, + "step": 15651 + }, + { + "epoch": 0.52, + "grad_norm": 0.7840140461921692, + "learning_rate": 9.603113756996562e-06, + "loss": 2.0621, + "step": 15652 + }, + { + "epoch": 0.52, + "grad_norm": 0.7471088767051697, + "learning_rate": 9.602051703760994e-06, + "loss": 2.0794, + "step": 15653 + }, + { + "epoch": 0.52, + "grad_norm": 0.7418235540390015, + "learning_rate": 9.600989655021208e-06, + "loss": 2.0833, + "step": 15654 + }, + { + "epoch": 0.52, + "grad_norm": 0.7122145891189575, + "learning_rate": 9.599927610789207e-06, + "loss": 2.0023, + "step": 15655 + }, + { + "epoch": 0.52, + "grad_norm": 0.7504203915596008, + "learning_rate": 9.59886557107699e-06, + "loss": 2.0605, + "step": 15656 + }, + { + "epoch": 0.52, + "grad_norm": 0.7434459924697876, + "learning_rate": 9.597803535896558e-06, + "loss": 2.0022, + "step": 15657 + }, + { + "epoch": 0.52, + "grad_norm": 0.7607999444007874, + "learning_rate": 9.596741505259903e-06, + "loss": 2.0711, + "step": 15658 + }, + { + "epoch": 0.52, + "grad_norm": 0.7218484282493591, + "learning_rate": 9.595679479179023e-06, + "loss": 2.1075, + "step": 15659 + }, + { + "epoch": 0.52, + "grad_norm": 0.6917795538902283, + "learning_rate": 9.594617457665921e-06, + "loss": 1.9981, + "step": 15660 + }, + { + "epoch": 0.52, + "grad_norm": 0.7257257699966431, + "learning_rate": 9.593555440732595e-06, + "loss": 2.0641, + "step": 15661 + }, + { + "epoch": 0.52, + "grad_norm": 0.7253341674804688, + "learning_rate": 9.592493428391042e-06, + "loss": 2.0578, + "step": 15662 + }, + { + "epoch": 0.52, + "grad_norm": 0.7403224110603333, + "learning_rate": 9.59143142065326e-06, + "loss": 2.0707, + "step": 15663 + }, + { + "epoch": 0.52, + "grad_norm": 0.7456210851669312, + "learning_rate": 9.590369417531245e-06, + "loss": 2.059, + "step": 15664 + }, + { + "epoch": 0.52, + "grad_norm": 0.7223693132400513, + "learning_rate": 9.589307419036993e-06, + "loss": 2.0749, + "step": 15665 + }, + { + "epoch": 0.52, + "grad_norm": 0.7177839875221252, + "learning_rate": 9.588245425182513e-06, + "loss": 2.0554, + "step": 15666 + }, + { + "epoch": 0.52, + "grad_norm": 0.741969645023346, + "learning_rate": 9.587183435979788e-06, + "loss": 2.0956, + "step": 15667 + }, + { + "epoch": 0.52, + "grad_norm": 0.7246341109275818, + "learning_rate": 9.586121451440824e-06, + "loss": 2.1022, + "step": 15668 + }, + { + "epoch": 0.52, + "grad_norm": 0.7434824109077454, + "learning_rate": 9.585059471577618e-06, + "loss": 2.0682, + "step": 15669 + }, + { + "epoch": 0.52, + "grad_norm": 0.7317240834236145, + "learning_rate": 9.583997496402166e-06, + "loss": 2.0692, + "step": 15670 + }, + { + "epoch": 0.52, + "grad_norm": 0.7391767501831055, + "learning_rate": 9.58293552592647e-06, + "loss": 2.0706, + "step": 15671 + }, + { + "epoch": 0.52, + "grad_norm": 0.7675814032554626, + "learning_rate": 9.58187356016252e-06, + "loss": 2.066, + "step": 15672 + }, + { + "epoch": 0.52, + "grad_norm": 0.7880501747131348, + "learning_rate": 9.580811599122318e-06, + "loss": 2.1229, + "step": 15673 + }, + { + "epoch": 0.52, + "grad_norm": 0.8052398562431335, + "learning_rate": 9.579749642817862e-06, + "loss": 2.1123, + "step": 15674 + }, + { + "epoch": 0.52, + "grad_norm": 0.7399470210075378, + "learning_rate": 9.578687691261149e-06, + "loss": 2.0813, + "step": 15675 + }, + { + "epoch": 0.52, + "grad_norm": 0.699654757976532, + "learning_rate": 9.577625744464176e-06, + "loss": 2.0749, + "step": 15676 + }, + { + "epoch": 0.52, + "grad_norm": 0.7200848460197449, + "learning_rate": 9.576563802438937e-06, + "loss": 2.0896, + "step": 15677 + }, + { + "epoch": 0.52, + "grad_norm": 0.7724472284317017, + "learning_rate": 9.575501865197433e-06, + "loss": 2.0847, + "step": 15678 + }, + { + "epoch": 0.52, + "grad_norm": 0.7234969735145569, + "learning_rate": 9.574439932751663e-06, + "loss": 2.1139, + "step": 15679 + }, + { + "epoch": 0.52, + "grad_norm": 0.7368679046630859, + "learning_rate": 9.573378005113618e-06, + "loss": 2.0898, + "step": 15680 + }, + { + "epoch": 0.52, + "grad_norm": 0.7394647002220154, + "learning_rate": 9.572316082295297e-06, + "loss": 2.1129, + "step": 15681 + }, + { + "epoch": 0.52, + "grad_norm": 0.792491614818573, + "learning_rate": 9.571254164308701e-06, + "loss": 1.9983, + "step": 15682 + }, + { + "epoch": 0.52, + "grad_norm": 0.7280409932136536, + "learning_rate": 9.570192251165823e-06, + "loss": 2.0351, + "step": 15683 + }, + { + "epoch": 0.52, + "grad_norm": 0.7441772222518921, + "learning_rate": 9.569130342878663e-06, + "loss": 2.0424, + "step": 15684 + }, + { + "epoch": 0.52, + "grad_norm": 0.7174262404441833, + "learning_rate": 9.568068439459214e-06, + "loss": 2.089, + "step": 15685 + }, + { + "epoch": 0.52, + "grad_norm": 0.7425598502159119, + "learning_rate": 9.567006540919475e-06, + "loss": 2.0367, + "step": 15686 + }, + { + "epoch": 0.52, + "grad_norm": 0.7531047463417053, + "learning_rate": 9.56594464727144e-06, + "loss": 2.1734, + "step": 15687 + }, + { + "epoch": 0.52, + "grad_norm": 0.7682279348373413, + "learning_rate": 9.564882758527115e-06, + "loss": 2.0675, + "step": 15688 + }, + { + "epoch": 0.52, + "grad_norm": 0.751871645450592, + "learning_rate": 9.563820874698483e-06, + "loss": 2.0384, + "step": 15689 + }, + { + "epoch": 0.52, + "grad_norm": 0.7457606792449951, + "learning_rate": 9.562758995797548e-06, + "loss": 2.1188, + "step": 15690 + }, + { + "epoch": 0.52, + "grad_norm": 0.7763810157775879, + "learning_rate": 9.561697121836308e-06, + "loss": 2.0363, + "step": 15691 + }, + { + "epoch": 0.52, + "grad_norm": 0.7256711721420288, + "learning_rate": 9.560635252826755e-06, + "loss": 2.0025, + "step": 15692 + }, + { + "epoch": 0.52, + "grad_norm": 0.7388671636581421, + "learning_rate": 9.55957338878089e-06, + "loss": 2.0253, + "step": 15693 + }, + { + "epoch": 0.52, + "grad_norm": 0.7272011041641235, + "learning_rate": 9.558511529710704e-06, + "loss": 2.0942, + "step": 15694 + }, + { + "epoch": 0.52, + "grad_norm": 0.7340438365936279, + "learning_rate": 9.557449675628195e-06, + "loss": 2.0918, + "step": 15695 + }, + { + "epoch": 0.52, + "grad_norm": 0.7305469512939453, + "learning_rate": 9.556387826545363e-06, + "loss": 2.1108, + "step": 15696 + }, + { + "epoch": 0.52, + "grad_norm": 0.7320858240127563, + "learning_rate": 9.555325982474201e-06, + "loss": 2.0539, + "step": 15697 + }, + { + "epoch": 0.52, + "grad_norm": 0.757124662399292, + "learning_rate": 9.554264143426706e-06, + "loss": 2.0506, + "step": 15698 + }, + { + "epoch": 0.52, + "grad_norm": 0.7323154807090759, + "learning_rate": 9.553202309414872e-06, + "loss": 2.0326, + "step": 15699 + }, + { + "epoch": 0.52, + "grad_norm": 0.7463230490684509, + "learning_rate": 9.552140480450695e-06, + "loss": 2.0583, + "step": 15700 + }, + { + "epoch": 0.52, + "grad_norm": 0.7544329166412354, + "learning_rate": 9.551078656546173e-06, + "loss": 1.9961, + "step": 15701 + }, + { + "epoch": 0.52, + "grad_norm": 0.7124444246292114, + "learning_rate": 9.550016837713307e-06, + "loss": 2.0705, + "step": 15702 + }, + { + "epoch": 0.52, + "grad_norm": 0.7511016726493835, + "learning_rate": 9.54895502396408e-06, + "loss": 2.0319, + "step": 15703 + }, + { + "epoch": 0.52, + "grad_norm": 0.7437857985496521, + "learning_rate": 9.547893215310498e-06, + "loss": 2.1308, + "step": 15704 + }, + { + "epoch": 0.52, + "grad_norm": 0.7489336729049683, + "learning_rate": 9.546831411764552e-06, + "loss": 2.1006, + "step": 15705 + }, + { + "epoch": 0.52, + "grad_norm": 0.7377965450286865, + "learning_rate": 9.545769613338242e-06, + "loss": 2.032, + "step": 15706 + }, + { + "epoch": 0.52, + "grad_norm": 0.7281315326690674, + "learning_rate": 9.544707820043558e-06, + "loss": 2.0589, + "step": 15707 + }, + { + "epoch": 0.52, + "grad_norm": 0.7634711265563965, + "learning_rate": 9.5436460318925e-06, + "loss": 2.1049, + "step": 15708 + }, + { + "epoch": 0.52, + "grad_norm": 0.7188063859939575, + "learning_rate": 9.542584248897057e-06, + "loss": 2.0291, + "step": 15709 + }, + { + "epoch": 0.52, + "grad_norm": 0.7308862209320068, + "learning_rate": 9.541522471069237e-06, + "loss": 2.1017, + "step": 15710 + }, + { + "epoch": 0.52, + "grad_norm": 0.7983438968658447, + "learning_rate": 9.540460698421022e-06, + "loss": 2.103, + "step": 15711 + }, + { + "epoch": 0.52, + "grad_norm": 0.7809951901435852, + "learning_rate": 9.539398930964412e-06, + "loss": 2.0465, + "step": 15712 + }, + { + "epoch": 0.52, + "grad_norm": 0.7638314962387085, + "learning_rate": 9.538337168711405e-06, + "loss": 2.1133, + "step": 15713 + }, + { + "epoch": 0.52, + "grad_norm": 0.7468096017837524, + "learning_rate": 9.537275411673992e-06, + "loss": 2.1609, + "step": 15714 + }, + { + "epoch": 0.52, + "grad_norm": 0.7424658536911011, + "learning_rate": 9.536213659864176e-06, + "loss": 2.0808, + "step": 15715 + }, + { + "epoch": 0.52, + "grad_norm": 0.7466991543769836, + "learning_rate": 9.535151913293942e-06, + "loss": 2.0162, + "step": 15716 + }, + { + "epoch": 0.52, + "grad_norm": 0.7339297533035278, + "learning_rate": 9.534090171975286e-06, + "loss": 2.0988, + "step": 15717 + }, + { + "epoch": 0.52, + "grad_norm": 0.70721834897995, + "learning_rate": 9.53302843592021e-06, + "loss": 2.0169, + "step": 15718 + }, + { + "epoch": 0.52, + "grad_norm": 0.7596811056137085, + "learning_rate": 9.531966705140707e-06, + "loss": 2.0618, + "step": 15719 + }, + { + "epoch": 0.52, + "grad_norm": 0.7654244303703308, + "learning_rate": 9.530904979648767e-06, + "loss": 2.1149, + "step": 15720 + }, + { + "epoch": 0.52, + "grad_norm": 0.7776036262512207, + "learning_rate": 9.529843259456389e-06, + "loss": 2.0832, + "step": 15721 + }, + { + "epoch": 0.52, + "grad_norm": 0.7850261926651001, + "learning_rate": 9.528781544575563e-06, + "loss": 2.1057, + "step": 15722 + }, + { + "epoch": 0.52, + "grad_norm": 0.7335439920425415, + "learning_rate": 9.52771983501829e-06, + "loss": 2.1113, + "step": 15723 + }, + { + "epoch": 0.52, + "grad_norm": 0.7434214353561401, + "learning_rate": 9.526658130796566e-06, + "loss": 2.122, + "step": 15724 + }, + { + "epoch": 0.52, + "grad_norm": 0.7806556820869446, + "learning_rate": 9.525596431922372e-06, + "loss": 2.1181, + "step": 15725 + }, + { + "epoch": 0.52, + "grad_norm": 0.7381771206855774, + "learning_rate": 9.524534738407716e-06, + "loss": 2.091, + "step": 15726 + }, + { + "epoch": 0.52, + "grad_norm": 0.7438307404518127, + "learning_rate": 9.523473050264588e-06, + "loss": 2.1019, + "step": 15727 + }, + { + "epoch": 0.52, + "grad_norm": 0.7604383826255798, + "learning_rate": 9.522411367504984e-06, + "loss": 2.0668, + "step": 15728 + }, + { + "epoch": 0.52, + "grad_norm": 0.7254573702812195, + "learning_rate": 9.521349690140892e-06, + "loss": 2.0621, + "step": 15729 + }, + { + "epoch": 0.52, + "grad_norm": 0.7669029831886292, + "learning_rate": 9.520288018184311e-06, + "loss": 2.0862, + "step": 15730 + }, + { + "epoch": 0.52, + "grad_norm": 0.7311447262763977, + "learning_rate": 9.519226351647237e-06, + "loss": 2.0101, + "step": 15731 + }, + { + "epoch": 0.52, + "grad_norm": 0.8445455431938171, + "learning_rate": 9.518164690541662e-06, + "loss": 2.0962, + "step": 15732 + }, + { + "epoch": 0.52, + "grad_norm": 0.724023699760437, + "learning_rate": 9.51710303487958e-06, + "loss": 2.093, + "step": 15733 + }, + { + "epoch": 0.52, + "grad_norm": 0.7382174730300903, + "learning_rate": 9.516041384672987e-06, + "loss": 2.0085, + "step": 15734 + }, + { + "epoch": 0.52, + "grad_norm": 0.734093964099884, + "learning_rate": 9.51497973993387e-06, + "loss": 1.9806, + "step": 15735 + }, + { + "epoch": 0.52, + "grad_norm": 0.760665237903595, + "learning_rate": 9.51391810067423e-06, + "loss": 2.1102, + "step": 15736 + }, + { + "epoch": 0.52, + "grad_norm": 0.7396479249000549, + "learning_rate": 9.512856466906061e-06, + "loss": 2.0624, + "step": 15737 + }, + { + "epoch": 0.52, + "grad_norm": 0.7344732880592346, + "learning_rate": 9.511794838641349e-06, + "loss": 2.0368, + "step": 15738 + }, + { + "epoch": 0.52, + "grad_norm": 0.7259513735771179, + "learning_rate": 9.510733215892095e-06, + "loss": 2.0857, + "step": 15739 + }, + { + "epoch": 0.52, + "grad_norm": 0.7683669328689575, + "learning_rate": 9.50967159867029e-06, + "loss": 2.059, + "step": 15740 + }, + { + "epoch": 0.52, + "grad_norm": 0.7500695586204529, + "learning_rate": 9.50860998698793e-06, + "loss": 2.1114, + "step": 15741 + }, + { + "epoch": 0.52, + "grad_norm": 0.7788426280021667, + "learning_rate": 9.507548380857006e-06, + "loss": 2.1336, + "step": 15742 + }, + { + "epoch": 0.52, + "grad_norm": 0.7230142951011658, + "learning_rate": 9.50648678028951e-06, + "loss": 2.0642, + "step": 15743 + }, + { + "epoch": 0.52, + "grad_norm": 0.7244279980659485, + "learning_rate": 9.505425185297436e-06, + "loss": 2.1015, + "step": 15744 + }, + { + "epoch": 0.52, + "grad_norm": 0.7430877089500427, + "learning_rate": 9.50436359589278e-06, + "loss": 2.0847, + "step": 15745 + }, + { + "epoch": 0.52, + "grad_norm": 0.7482494711875916, + "learning_rate": 9.503302012087536e-06, + "loss": 2.0845, + "step": 15746 + }, + { + "epoch": 0.52, + "grad_norm": 0.7594310641288757, + "learning_rate": 9.502240433893692e-06, + "loss": 2.0668, + "step": 15747 + }, + { + "epoch": 0.52, + "grad_norm": 0.7254951596260071, + "learning_rate": 9.501178861323245e-06, + "loss": 2.0711, + "step": 15748 + }, + { + "epoch": 0.52, + "grad_norm": 0.7483556270599365, + "learning_rate": 9.500117294388185e-06, + "loss": 2.1087, + "step": 15749 + }, + { + "epoch": 0.52, + "grad_norm": 0.7409572005271912, + "learning_rate": 9.499055733100511e-06, + "loss": 2.0474, + "step": 15750 + }, + { + "epoch": 0.52, + "grad_norm": 0.736400842666626, + "learning_rate": 9.49799417747221e-06, + "loss": 2.062, + "step": 15751 + }, + { + "epoch": 0.52, + "grad_norm": 0.7645689249038696, + "learning_rate": 9.496932627515273e-06, + "loss": 2.0918, + "step": 15752 + }, + { + "epoch": 0.52, + "grad_norm": 0.7306239604949951, + "learning_rate": 9.4958710832417e-06, + "loss": 1.9713, + "step": 15753 + }, + { + "epoch": 0.52, + "grad_norm": 0.7504016160964966, + "learning_rate": 9.494809544663477e-06, + "loss": 2.1383, + "step": 15754 + }, + { + "epoch": 0.52, + "grad_norm": 0.7311137318611145, + "learning_rate": 9.493748011792604e-06, + "loss": 2.0301, + "step": 15755 + }, + { + "epoch": 0.52, + "grad_norm": 0.7563546895980835, + "learning_rate": 9.492686484641068e-06, + "loss": 2.0522, + "step": 15756 + }, + { + "epoch": 0.52, + "grad_norm": 0.7652077674865723, + "learning_rate": 9.49162496322086e-06, + "loss": 2.0855, + "step": 15757 + }, + { + "epoch": 0.52, + "grad_norm": 0.7568408250808716, + "learning_rate": 9.490563447543975e-06, + "loss": 2.0712, + "step": 15758 + }, + { + "epoch": 0.52, + "grad_norm": 0.7727518081665039, + "learning_rate": 9.489501937622412e-06, + "loss": 2.12, + "step": 15759 + }, + { + "epoch": 0.52, + "grad_norm": 0.7362053394317627, + "learning_rate": 9.48844043346815e-06, + "loss": 2.0788, + "step": 15760 + }, + { + "epoch": 0.52, + "grad_norm": 0.742225170135498, + "learning_rate": 9.48737893509319e-06, + "loss": 2.0499, + "step": 15761 + }, + { + "epoch": 0.52, + "grad_norm": 0.7362850904464722, + "learning_rate": 9.486317442509523e-06, + "loss": 2.1266, + "step": 15762 + }, + { + "epoch": 0.52, + "grad_norm": 0.7402482032775879, + "learning_rate": 9.48525595572914e-06, + "loss": 2.1284, + "step": 15763 + }, + { + "epoch": 0.52, + "grad_norm": 0.7439380884170532, + "learning_rate": 9.484194474764037e-06, + "loss": 2.0478, + "step": 15764 + }, + { + "epoch": 0.52, + "grad_norm": 0.7478271722793579, + "learning_rate": 9.483132999626199e-06, + "loss": 2.0321, + "step": 15765 + }, + { + "epoch": 0.52, + "grad_norm": 0.7143625617027283, + "learning_rate": 9.48207153032762e-06, + "loss": 2.0246, + "step": 15766 + }, + { + "epoch": 0.52, + "grad_norm": 0.7555240988731384, + "learning_rate": 9.481010066880295e-06, + "loss": 2.0967, + "step": 15767 + }, + { + "epoch": 0.52, + "grad_norm": 0.7884346842765808, + "learning_rate": 9.479948609296217e-06, + "loss": 2.1578, + "step": 15768 + }, + { + "epoch": 0.52, + "grad_norm": 0.7755685448646545, + "learning_rate": 9.478887157587372e-06, + "loss": 2.1375, + "step": 15769 + }, + { + "epoch": 0.52, + "grad_norm": 0.723800003528595, + "learning_rate": 9.477825711765754e-06, + "loss": 2.069, + "step": 15770 + }, + { + "epoch": 0.52, + "grad_norm": 0.7671294808387756, + "learning_rate": 9.476764271843356e-06, + "loss": 2.0953, + "step": 15771 + }, + { + "epoch": 0.52, + "grad_norm": 0.7486233115196228, + "learning_rate": 9.47570283783217e-06, + "loss": 2.0755, + "step": 15772 + }, + { + "epoch": 0.52, + "grad_norm": 0.7416868209838867, + "learning_rate": 9.474641409744185e-06, + "loss": 2.0736, + "step": 15773 + }, + { + "epoch": 0.52, + "grad_norm": 0.7744722962379456, + "learning_rate": 9.473579987591391e-06, + "loss": 2.0116, + "step": 15774 + }, + { + "epoch": 0.52, + "grad_norm": 0.767002522945404, + "learning_rate": 9.472518571385784e-06, + "loss": 2.1284, + "step": 15775 + }, + { + "epoch": 0.52, + "grad_norm": 0.7750516533851624, + "learning_rate": 9.471457161139353e-06, + "loss": 2.1571, + "step": 15776 + }, + { + "epoch": 0.52, + "grad_norm": 0.7492559552192688, + "learning_rate": 9.47039575686409e-06, + "loss": 2.0626, + "step": 15777 + }, + { + "epoch": 0.52, + "grad_norm": 0.7435364127159119, + "learning_rate": 9.469334358571985e-06, + "loss": 2.0488, + "step": 15778 + }, + { + "epoch": 0.52, + "grad_norm": 0.7209482789039612, + "learning_rate": 9.468272966275029e-06, + "loss": 2.0208, + "step": 15779 + }, + { + "epoch": 0.53, + "grad_norm": 0.7359265685081482, + "learning_rate": 9.46721157998521e-06, + "loss": 1.9891, + "step": 15780 + }, + { + "epoch": 0.53, + "grad_norm": 0.735598087310791, + "learning_rate": 9.46615019971453e-06, + "loss": 2.0773, + "step": 15781 + }, + { + "epoch": 0.53, + "grad_norm": 0.7395017743110657, + "learning_rate": 9.465088825474967e-06, + "loss": 2.0549, + "step": 15782 + }, + { + "epoch": 0.53, + "grad_norm": 0.7272185683250427, + "learning_rate": 9.464027457278516e-06, + "loss": 2.0781, + "step": 15783 + }, + { + "epoch": 0.53, + "grad_norm": 0.750967264175415, + "learning_rate": 9.462966095137171e-06, + "loss": 2.0924, + "step": 15784 + }, + { + "epoch": 0.53, + "grad_norm": 0.7419408559799194, + "learning_rate": 9.461904739062919e-06, + "loss": 2.1077, + "step": 15785 + }, + { + "epoch": 0.53, + "grad_norm": 0.7364767789840698, + "learning_rate": 9.460843389067756e-06, + "loss": 2.033, + "step": 15786 + }, + { + "epoch": 0.53, + "grad_norm": 0.7381106615066528, + "learning_rate": 9.459782045163664e-06, + "loss": 2.0877, + "step": 15787 + }, + { + "epoch": 0.53, + "grad_norm": 0.7438233494758606, + "learning_rate": 9.458720707362636e-06, + "loss": 2.1061, + "step": 15788 + }, + { + "epoch": 0.53, + "grad_norm": 0.7786669731140137, + "learning_rate": 9.457659375676668e-06, + "loss": 2.0593, + "step": 15789 + }, + { + "epoch": 0.53, + "grad_norm": 0.7632341384887695, + "learning_rate": 9.456598050117747e-06, + "loss": 2.132, + "step": 15790 + }, + { + "epoch": 0.53, + "grad_norm": 0.7435451149940491, + "learning_rate": 9.455536730697862e-06, + "loss": 2.1111, + "step": 15791 + }, + { + "epoch": 0.53, + "grad_norm": 0.7558166980743408, + "learning_rate": 9.454475417429003e-06, + "loss": 2.0168, + "step": 15792 + }, + { + "epoch": 0.53, + "grad_norm": 0.7478861212730408, + "learning_rate": 9.453414110323161e-06, + "loss": 2.031, + "step": 15793 + }, + { + "epoch": 0.53, + "grad_norm": 0.7423695921897888, + "learning_rate": 9.45235280939233e-06, + "loss": 2.0486, + "step": 15794 + }, + { + "epoch": 0.53, + "grad_norm": 0.7125422358512878, + "learning_rate": 9.45129151464849e-06, + "loss": 2.0601, + "step": 15795 + }, + { + "epoch": 0.53, + "grad_norm": 0.7318973541259766, + "learning_rate": 9.450230226103638e-06, + "loss": 2.0893, + "step": 15796 + }, + { + "epoch": 0.53, + "grad_norm": 0.7244168519973755, + "learning_rate": 9.449168943769765e-06, + "loss": 2.0447, + "step": 15797 + }, + { + "epoch": 0.53, + "grad_norm": 0.7325756549835205, + "learning_rate": 9.448107667658856e-06, + "loss": 2.0813, + "step": 15798 + }, + { + "epoch": 0.53, + "grad_norm": 0.7265756130218506, + "learning_rate": 9.447046397782907e-06, + "loss": 2.0506, + "step": 15799 + }, + { + "epoch": 0.53, + "grad_norm": 0.7130411267280579, + "learning_rate": 9.445985134153901e-06, + "loss": 2.0992, + "step": 15800 + }, + { + "epoch": 0.53, + "grad_norm": 0.7140209674835205, + "learning_rate": 9.44492387678383e-06, + "loss": 2.065, + "step": 15801 + }, + { + "epoch": 0.53, + "grad_norm": 0.7602997422218323, + "learning_rate": 9.443862625684682e-06, + "loss": 2.0608, + "step": 15802 + }, + { + "epoch": 0.53, + "grad_norm": 0.7834575772285461, + "learning_rate": 9.442801380868454e-06, + "loss": 2.0289, + "step": 15803 + }, + { + "epoch": 0.53, + "grad_norm": 0.7600589394569397, + "learning_rate": 9.441740142347125e-06, + "loss": 2.0771, + "step": 15804 + }, + { + "epoch": 0.53, + "grad_norm": 0.729640543460846, + "learning_rate": 9.44067891013269e-06, + "loss": 2.1011, + "step": 15805 + }, + { + "epoch": 0.53, + "grad_norm": 0.7252516746520996, + "learning_rate": 9.439617684237135e-06, + "loss": 2.0815, + "step": 15806 + }, + { + "epoch": 0.53, + "grad_norm": 0.7101517915725708, + "learning_rate": 9.43855646467245e-06, + "loss": 2.0159, + "step": 15807 + }, + { + "epoch": 0.53, + "grad_norm": 0.7258523106575012, + "learning_rate": 9.437495251450633e-06, + "loss": 2.0312, + "step": 15808 + }, + { + "epoch": 0.53, + "grad_norm": 0.7389060258865356, + "learning_rate": 9.436434044583658e-06, + "loss": 2.1157, + "step": 15809 + }, + { + "epoch": 0.53, + "grad_norm": 0.7136008143424988, + "learning_rate": 9.43537284408352e-06, + "loss": 2.0687, + "step": 15810 + }, + { + "epoch": 0.53, + "grad_norm": 0.7355362176895142, + "learning_rate": 9.434311649962211e-06, + "loss": 2.0547, + "step": 15811 + }, + { + "epoch": 0.53, + "grad_norm": 0.7053923010826111, + "learning_rate": 9.43325046223172e-06, + "loss": 2.0235, + "step": 15812 + }, + { + "epoch": 0.53, + "grad_norm": 0.7544941306114197, + "learning_rate": 9.43218928090403e-06, + "loss": 2.1299, + "step": 15813 + }, + { + "epoch": 0.53, + "grad_norm": 0.7230919599533081, + "learning_rate": 9.431128105991134e-06, + "loss": 2.0619, + "step": 15814 + }, + { + "epoch": 0.53, + "grad_norm": 0.7529556751251221, + "learning_rate": 9.430066937505018e-06, + "loss": 2.1546, + "step": 15815 + }, + { + "epoch": 0.53, + "grad_norm": 0.7836734056472778, + "learning_rate": 9.429005775457672e-06, + "loss": 2.0359, + "step": 15816 + }, + { + "epoch": 0.53, + "grad_norm": 0.7046622633934021, + "learning_rate": 9.427944619861088e-06, + "loss": 2.0602, + "step": 15817 + }, + { + "epoch": 0.53, + "grad_norm": 0.7204525470733643, + "learning_rate": 9.426883470727245e-06, + "loss": 2.0742, + "step": 15818 + }, + { + "epoch": 0.53, + "grad_norm": 0.7767167091369629, + "learning_rate": 9.425822328068138e-06, + "loss": 2.0537, + "step": 15819 + }, + { + "epoch": 0.53, + "grad_norm": 0.7305651307106018, + "learning_rate": 9.424761191895755e-06, + "loss": 2.1283, + "step": 15820 + }, + { + "epoch": 0.53, + "grad_norm": 0.7382656931877136, + "learning_rate": 9.423700062222084e-06, + "loss": 2.1179, + "step": 15821 + }, + { + "epoch": 0.53, + "grad_norm": 0.7248588800430298, + "learning_rate": 9.42263893905911e-06, + "loss": 2.0587, + "step": 15822 + }, + { + "epoch": 0.53, + "grad_norm": 0.7596117854118347, + "learning_rate": 9.421577822418822e-06, + "loss": 2.037, + "step": 15823 + }, + { + "epoch": 0.53, + "grad_norm": 0.7299769520759583, + "learning_rate": 9.420516712313209e-06, + "loss": 2.0938, + "step": 15824 + }, + { + "epoch": 0.53, + "grad_norm": 0.7371147871017456, + "learning_rate": 9.419455608754265e-06, + "loss": 1.9524, + "step": 15825 + }, + { + "epoch": 0.53, + "grad_norm": 0.7545364499092102, + "learning_rate": 9.418394511753962e-06, + "loss": 2.1051, + "step": 15826 + }, + { + "epoch": 0.53, + "grad_norm": 0.7525614500045776, + "learning_rate": 9.4173334213243e-06, + "loss": 2.0947, + "step": 15827 + }, + { + "epoch": 0.53, + "grad_norm": 0.7079751491546631, + "learning_rate": 9.416272337477266e-06, + "loss": 2.0313, + "step": 15828 + }, + { + "epoch": 0.53, + "grad_norm": 0.7101505994796753, + "learning_rate": 9.41521126022484e-06, + "loss": 2.0589, + "step": 15829 + }, + { + "epoch": 0.53, + "grad_norm": 0.698821485042572, + "learning_rate": 9.414150189579022e-06, + "loss": 2.0423, + "step": 15830 + }, + { + "epoch": 0.53, + "grad_norm": 0.7328428030014038, + "learning_rate": 9.413089125551785e-06, + "loss": 2.0214, + "step": 15831 + }, + { + "epoch": 0.53, + "grad_norm": 0.7246461510658264, + "learning_rate": 9.412028068155126e-06, + "loss": 2.1057, + "step": 15832 + }, + { + "epoch": 0.53, + "grad_norm": 0.7135437726974487, + "learning_rate": 9.41096701740103e-06, + "loss": 2.1221, + "step": 15833 + }, + { + "epoch": 0.53, + "grad_norm": 0.7545658349990845, + "learning_rate": 9.409905973301483e-06, + "loss": 2.0592, + "step": 15834 + }, + { + "epoch": 0.53, + "grad_norm": 0.7284616231918335, + "learning_rate": 9.408844935868473e-06, + "loss": 2.1123, + "step": 15835 + }, + { + "epoch": 0.53, + "grad_norm": 0.7177366614341736, + "learning_rate": 9.407783905113985e-06, + "loss": 2.0438, + "step": 15836 + }, + { + "epoch": 0.53, + "grad_norm": 0.7558740377426147, + "learning_rate": 9.406722881050005e-06, + "loss": 2.0508, + "step": 15837 + }, + { + "epoch": 0.53, + "grad_norm": 0.7324455380439758, + "learning_rate": 9.405661863688526e-06, + "loss": 2.0253, + "step": 15838 + }, + { + "epoch": 0.53, + "grad_norm": 0.7695493102073669, + "learning_rate": 9.404600853041531e-06, + "loss": 2.1424, + "step": 15839 + }, + { + "epoch": 0.53, + "grad_norm": 0.7359585165977478, + "learning_rate": 9.403539849121006e-06, + "loss": 2.0983, + "step": 15840 + }, + { + "epoch": 0.53, + "grad_norm": 0.7323364019393921, + "learning_rate": 9.402478851938938e-06, + "loss": 2.0658, + "step": 15841 + }, + { + "epoch": 0.53, + "grad_norm": 0.7378591299057007, + "learning_rate": 9.401417861507313e-06, + "loss": 2.082, + "step": 15842 + }, + { + "epoch": 0.53, + "grad_norm": 0.7363995313644409, + "learning_rate": 9.400356877838121e-06, + "loss": 2.044, + "step": 15843 + }, + { + "epoch": 0.53, + "grad_norm": 0.7286170125007629, + "learning_rate": 9.399295900943343e-06, + "loss": 2.0859, + "step": 15844 + }, + { + "epoch": 0.53, + "grad_norm": 0.7152288556098938, + "learning_rate": 9.398234930834966e-06, + "loss": 2.0839, + "step": 15845 + }, + { + "epoch": 0.53, + "grad_norm": 0.7402381300926208, + "learning_rate": 9.397173967524981e-06, + "loss": 2.1061, + "step": 15846 + }, + { + "epoch": 0.53, + "grad_norm": 0.7478959560394287, + "learning_rate": 9.396113011025369e-06, + "loss": 2.045, + "step": 15847 + }, + { + "epoch": 0.53, + "grad_norm": 0.7314088940620422, + "learning_rate": 9.395052061348123e-06, + "loss": 2.0655, + "step": 15848 + }, + { + "epoch": 0.53, + "grad_norm": 0.7541019916534424, + "learning_rate": 9.39399111850522e-06, + "loss": 2.1193, + "step": 15849 + }, + { + "epoch": 0.53, + "grad_norm": 0.7061830759048462, + "learning_rate": 9.39293018250865e-06, + "loss": 1.999, + "step": 15850 + }, + { + "epoch": 0.53, + "grad_norm": 0.7455294132232666, + "learning_rate": 9.3918692533704e-06, + "loss": 2.1187, + "step": 15851 + }, + { + "epoch": 0.53, + "grad_norm": 0.7494887113571167, + "learning_rate": 9.390808331102459e-06, + "loss": 2.0961, + "step": 15852 + }, + { + "epoch": 0.53, + "grad_norm": 0.7483009099960327, + "learning_rate": 9.389747415716802e-06, + "loss": 2.0741, + "step": 15853 + }, + { + "epoch": 0.53, + "grad_norm": 0.7065709829330444, + "learning_rate": 9.388686507225423e-06, + "loss": 2.1226, + "step": 15854 + }, + { + "epoch": 0.53, + "grad_norm": 0.7431784272193909, + "learning_rate": 9.387625605640305e-06, + "loss": 2.1025, + "step": 15855 + }, + { + "epoch": 0.53, + "grad_norm": 0.7415103316307068, + "learning_rate": 9.386564710973438e-06, + "loss": 2.0317, + "step": 15856 + }, + { + "epoch": 0.53, + "grad_norm": 0.730280339717865, + "learning_rate": 9.385503823236802e-06, + "loss": 2.098, + "step": 15857 + }, + { + "epoch": 0.53, + "grad_norm": 0.7357711791992188, + "learning_rate": 9.38444294244238e-06, + "loss": 2.0468, + "step": 15858 + }, + { + "epoch": 0.53, + "grad_norm": 0.7279675602912903, + "learning_rate": 9.383382068602162e-06, + "loss": 2.0672, + "step": 15859 + }, + { + "epoch": 0.53, + "grad_norm": 0.7332284450531006, + "learning_rate": 9.382321201728132e-06, + "loss": 1.9717, + "step": 15860 + }, + { + "epoch": 0.53, + "grad_norm": 0.7530045509338379, + "learning_rate": 9.381260341832279e-06, + "loss": 2.043, + "step": 15861 + }, + { + "epoch": 0.53, + "grad_norm": 0.7332090139389038, + "learning_rate": 9.380199488926581e-06, + "loss": 2.057, + "step": 15862 + }, + { + "epoch": 0.53, + "grad_norm": 1.1976981163024902, + "learning_rate": 9.379138643023026e-06, + "loss": 2.1329, + "step": 15863 + }, + { + "epoch": 0.53, + "grad_norm": 0.7334766387939453, + "learning_rate": 9.378077804133598e-06, + "loss": 2.0798, + "step": 15864 + }, + { + "epoch": 0.53, + "grad_norm": 0.7234364151954651, + "learning_rate": 9.377016972270286e-06, + "loss": 1.9968, + "step": 15865 + }, + { + "epoch": 0.53, + "grad_norm": 0.7135488986968994, + "learning_rate": 9.37595614744507e-06, + "loss": 2.0541, + "step": 15866 + }, + { + "epoch": 0.53, + "grad_norm": 0.7311728596687317, + "learning_rate": 9.374895329669931e-06, + "loss": 2.0885, + "step": 15867 + }, + { + "epoch": 0.53, + "grad_norm": 0.7306535243988037, + "learning_rate": 9.373834518956861e-06, + "loss": 2.0747, + "step": 15868 + }, + { + "epoch": 0.53, + "grad_norm": 0.733503520488739, + "learning_rate": 9.372773715317842e-06, + "loss": 2.0493, + "step": 15869 + }, + { + "epoch": 0.53, + "grad_norm": 0.7361902594566345, + "learning_rate": 9.37171291876486e-06, + "loss": 2.0461, + "step": 15870 + }, + { + "epoch": 0.53, + "grad_norm": 0.7731547355651855, + "learning_rate": 9.370652129309895e-06, + "loss": 2.065, + "step": 15871 + }, + { + "epoch": 0.53, + "grad_norm": 0.7021054625511169, + "learning_rate": 9.369591346964934e-06, + "loss": 2.0787, + "step": 15872 + }, + { + "epoch": 0.53, + "grad_norm": 0.7471688389778137, + "learning_rate": 9.368530571741958e-06, + "loss": 2.0916, + "step": 15873 + }, + { + "epoch": 0.53, + "grad_norm": 0.7331233024597168, + "learning_rate": 9.36746980365296e-06, + "loss": 2.0663, + "step": 15874 + }, + { + "epoch": 0.53, + "grad_norm": 0.7613646388053894, + "learning_rate": 9.36640904270991e-06, + "loss": 2.0638, + "step": 15875 + }, + { + "epoch": 0.53, + "grad_norm": 0.7453413605690002, + "learning_rate": 9.365348288924803e-06, + "loss": 2.0288, + "step": 15876 + }, + { + "epoch": 0.53, + "grad_norm": 0.7449492812156677, + "learning_rate": 9.36428754230962e-06, + "loss": 1.9543, + "step": 15877 + }, + { + "epoch": 0.53, + "grad_norm": 0.725737988948822, + "learning_rate": 9.36322680287634e-06, + "loss": 1.9944, + "step": 15878 + }, + { + "epoch": 0.53, + "grad_norm": 0.7219983339309692, + "learning_rate": 9.362166070636956e-06, + "loss": 2.1017, + "step": 15879 + }, + { + "epoch": 0.53, + "grad_norm": 0.7992630004882812, + "learning_rate": 9.361105345603443e-06, + "loss": 2.0734, + "step": 15880 + }, + { + "epoch": 0.53, + "grad_norm": 0.7272497415542603, + "learning_rate": 9.360044627787785e-06, + "loss": 2.0692, + "step": 15881 + }, + { + "epoch": 0.53, + "grad_norm": 0.7185360193252563, + "learning_rate": 9.35898391720197e-06, + "loss": 2.0602, + "step": 15882 + }, + { + "epoch": 0.53, + "grad_norm": 0.71562659740448, + "learning_rate": 9.357923213857981e-06, + "loss": 2.1034, + "step": 15883 + }, + { + "epoch": 0.53, + "grad_norm": 0.7642841935157776, + "learning_rate": 9.356862517767796e-06, + "loss": 2.0837, + "step": 15884 + }, + { + "epoch": 0.53, + "grad_norm": 0.7575034499168396, + "learning_rate": 9.355801828943402e-06, + "loss": 2.0419, + "step": 15885 + }, + { + "epoch": 0.53, + "grad_norm": 0.736322283744812, + "learning_rate": 9.354741147396781e-06, + "loss": 2.0731, + "step": 15886 + }, + { + "epoch": 0.53, + "grad_norm": 0.7693501114845276, + "learning_rate": 9.35368047313992e-06, + "loss": 2.0783, + "step": 15887 + }, + { + "epoch": 0.53, + "grad_norm": 0.8010561466217041, + "learning_rate": 9.352619806184795e-06, + "loss": 2.0822, + "step": 15888 + }, + { + "epoch": 0.53, + "grad_norm": 0.7613589763641357, + "learning_rate": 9.351559146543391e-06, + "loss": 2.0257, + "step": 15889 + }, + { + "epoch": 0.53, + "grad_norm": 0.7548536658287048, + "learning_rate": 9.350498494227693e-06, + "loss": 2.0373, + "step": 15890 + }, + { + "epoch": 0.53, + "grad_norm": 0.738605260848999, + "learning_rate": 9.349437849249683e-06, + "loss": 2.0035, + "step": 15891 + }, + { + "epoch": 0.53, + "grad_norm": 0.7517900466918945, + "learning_rate": 9.348377211621345e-06, + "loss": 2.0684, + "step": 15892 + }, + { + "epoch": 0.53, + "grad_norm": 0.7380388379096985, + "learning_rate": 9.347316581354655e-06, + "loss": 2.0322, + "step": 15893 + }, + { + "epoch": 0.53, + "grad_norm": 0.7132270932197571, + "learning_rate": 9.346255958461603e-06, + "loss": 2.0554, + "step": 15894 + }, + { + "epoch": 0.53, + "grad_norm": 0.7483150959014893, + "learning_rate": 9.345195342954163e-06, + "loss": 2.0034, + "step": 15895 + }, + { + "epoch": 0.53, + "grad_norm": 0.7544447183609009, + "learning_rate": 9.344134734844332e-06, + "loss": 2.0262, + "step": 15896 + }, + { + "epoch": 0.53, + "grad_norm": 0.7355642318725586, + "learning_rate": 9.343074134144072e-06, + "loss": 2.0759, + "step": 15897 + }, + { + "epoch": 0.53, + "grad_norm": 0.7268276214599609, + "learning_rate": 9.34201354086538e-06, + "loss": 2.1108, + "step": 15898 + }, + { + "epoch": 0.53, + "grad_norm": 0.7692199945449829, + "learning_rate": 9.340952955020234e-06, + "loss": 2.1444, + "step": 15899 + }, + { + "epoch": 0.53, + "grad_norm": 0.7469164133071899, + "learning_rate": 9.339892376620611e-06, + "loss": 2.0963, + "step": 15900 + }, + { + "epoch": 0.53, + "grad_norm": 0.7846393585205078, + "learning_rate": 9.338831805678506e-06, + "loss": 2.0965, + "step": 15901 + }, + { + "epoch": 0.53, + "grad_norm": 0.7346498966217041, + "learning_rate": 9.337771242205886e-06, + "loss": 2.1197, + "step": 15902 + }, + { + "epoch": 0.53, + "grad_norm": 0.7151606678962708, + "learning_rate": 9.336710686214735e-06, + "loss": 2.0631, + "step": 15903 + }, + { + "epoch": 0.53, + "grad_norm": 0.7369479537010193, + "learning_rate": 9.335650137717043e-06, + "loss": 2.0799, + "step": 15904 + }, + { + "epoch": 0.53, + "grad_norm": 0.7005956768989563, + "learning_rate": 9.334589596724786e-06, + "loss": 2.0627, + "step": 15905 + }, + { + "epoch": 0.53, + "grad_norm": 0.7518231272697449, + "learning_rate": 9.333529063249945e-06, + "loss": 2.0413, + "step": 15906 + }, + { + "epoch": 0.53, + "grad_norm": 0.7471197247505188, + "learning_rate": 9.332468537304501e-06, + "loss": 2.1366, + "step": 15907 + }, + { + "epoch": 0.53, + "grad_norm": 0.7283148169517517, + "learning_rate": 9.331408018900436e-06, + "loss": 2.0184, + "step": 15908 + }, + { + "epoch": 0.53, + "grad_norm": 0.7611947655677795, + "learning_rate": 9.330347508049731e-06, + "loss": 2.122, + "step": 15909 + }, + { + "epoch": 0.53, + "grad_norm": 0.7322114109992981, + "learning_rate": 9.329287004764374e-06, + "loss": 2.1117, + "step": 15910 + }, + { + "epoch": 0.53, + "grad_norm": 0.7239534258842468, + "learning_rate": 9.328226509056331e-06, + "loss": 2.0801, + "step": 15911 + }, + { + "epoch": 0.53, + "grad_norm": 0.7653972506523132, + "learning_rate": 9.327166020937595e-06, + "loss": 2.026, + "step": 15912 + }, + { + "epoch": 0.53, + "grad_norm": 0.7509065270423889, + "learning_rate": 9.326105540420143e-06, + "loss": 2.0544, + "step": 15913 + }, + { + "epoch": 0.53, + "grad_norm": 0.76719731092453, + "learning_rate": 9.325045067515957e-06, + "loss": 2.0404, + "step": 15914 + }, + { + "epoch": 0.53, + "grad_norm": 0.7251100540161133, + "learning_rate": 9.323984602237014e-06, + "loss": 2.0965, + "step": 15915 + }, + { + "epoch": 0.53, + "grad_norm": 0.7555171847343445, + "learning_rate": 9.322924144595294e-06, + "loss": 2.0828, + "step": 15916 + }, + { + "epoch": 0.53, + "grad_norm": 0.7464106678962708, + "learning_rate": 9.321863694602784e-06, + "loss": 2.038, + "step": 15917 + }, + { + "epoch": 0.53, + "grad_norm": 0.7507331967353821, + "learning_rate": 9.320803252271464e-06, + "loss": 2.0619, + "step": 15918 + }, + { + "epoch": 0.53, + "grad_norm": 0.7521811127662659, + "learning_rate": 9.319742817613304e-06, + "loss": 2.0277, + "step": 15919 + }, + { + "epoch": 0.53, + "grad_norm": 0.7191751599311829, + "learning_rate": 9.318682390640293e-06, + "loss": 2.0395, + "step": 15920 + }, + { + "epoch": 0.53, + "grad_norm": 0.7472249865531921, + "learning_rate": 9.31762197136441e-06, + "loss": 2.0462, + "step": 15921 + }, + { + "epoch": 0.53, + "grad_norm": 0.748084306716919, + "learning_rate": 9.316561559797632e-06, + "loss": 2.0716, + "step": 15922 + }, + { + "epoch": 0.53, + "grad_norm": 0.7248259782791138, + "learning_rate": 9.315501155951947e-06, + "loss": 2.0381, + "step": 15923 + }, + { + "epoch": 0.53, + "grad_norm": 0.7302417755126953, + "learning_rate": 9.314440759839321e-06, + "loss": 2.0538, + "step": 15924 + }, + { + "epoch": 0.53, + "grad_norm": 0.7608217597007751, + "learning_rate": 9.313380371471747e-06, + "loss": 2.0259, + "step": 15925 + }, + { + "epoch": 0.53, + "grad_norm": 0.7580153942108154, + "learning_rate": 9.312319990861196e-06, + "loss": 2.1632, + "step": 15926 + }, + { + "epoch": 0.53, + "grad_norm": 0.731648325920105, + "learning_rate": 9.311259618019653e-06, + "loss": 2.1136, + "step": 15927 + }, + { + "epoch": 0.53, + "grad_norm": 0.7302659749984741, + "learning_rate": 9.310199252959095e-06, + "loss": 1.9977, + "step": 15928 + }, + { + "epoch": 0.53, + "grad_norm": 0.7995164394378662, + "learning_rate": 9.309138895691501e-06, + "loss": 2.1677, + "step": 15929 + }, + { + "epoch": 0.53, + "grad_norm": 0.7981265187263489, + "learning_rate": 9.308078546228849e-06, + "loss": 2.0395, + "step": 15930 + }, + { + "epoch": 0.53, + "grad_norm": 0.7587030529975891, + "learning_rate": 9.30701820458312e-06, + "loss": 2.0385, + "step": 15931 + }, + { + "epoch": 0.53, + "grad_norm": 0.7124722003936768, + "learning_rate": 9.305957870766297e-06, + "loss": 2.0262, + "step": 15932 + }, + { + "epoch": 0.53, + "grad_norm": 0.7316484451293945, + "learning_rate": 9.304897544790353e-06, + "loss": 2.1416, + "step": 15933 + }, + { + "epoch": 0.53, + "grad_norm": 0.7431384325027466, + "learning_rate": 9.303837226667269e-06, + "loss": 2.1107, + "step": 15934 + }, + { + "epoch": 0.53, + "grad_norm": 0.7530476450920105, + "learning_rate": 9.302776916409024e-06, + "loss": 2.1362, + "step": 15935 + }, + { + "epoch": 0.53, + "grad_norm": 0.7857641577720642, + "learning_rate": 9.301716614027599e-06, + "loss": 2.1101, + "step": 15936 + }, + { + "epoch": 0.53, + "grad_norm": 0.7001701593399048, + "learning_rate": 9.300656319534968e-06, + "loss": 2.0378, + "step": 15937 + }, + { + "epoch": 0.53, + "grad_norm": 0.7270858883857727, + "learning_rate": 9.299596032943111e-06, + "loss": 2.102, + "step": 15938 + }, + { + "epoch": 0.53, + "grad_norm": 0.7247528433799744, + "learning_rate": 9.298535754264008e-06, + "loss": 2.0579, + "step": 15939 + }, + { + "epoch": 0.53, + "grad_norm": 0.7364323735237122, + "learning_rate": 9.29747548350964e-06, + "loss": 2.0386, + "step": 15940 + }, + { + "epoch": 0.53, + "grad_norm": 0.7209014892578125, + "learning_rate": 9.29641522069198e-06, + "loss": 2.0716, + "step": 15941 + }, + { + "epoch": 0.53, + "grad_norm": 0.7620095014572144, + "learning_rate": 9.29535496582301e-06, + "loss": 2.0582, + "step": 15942 + }, + { + "epoch": 0.53, + "grad_norm": 0.7178986668586731, + "learning_rate": 9.294294718914705e-06, + "loss": 2.0447, + "step": 15943 + }, + { + "epoch": 0.53, + "grad_norm": 0.7734183669090271, + "learning_rate": 9.293234479979044e-06, + "loss": 2.0757, + "step": 15944 + }, + { + "epoch": 0.53, + "grad_norm": 0.7778571248054504, + "learning_rate": 9.29217424902801e-06, + "loss": 2.098, + "step": 15945 + }, + { + "epoch": 0.53, + "grad_norm": 0.7628556489944458, + "learning_rate": 9.291114026073571e-06, + "loss": 2.0508, + "step": 15946 + }, + { + "epoch": 0.53, + "grad_norm": 0.7472501993179321, + "learning_rate": 9.290053811127713e-06, + "loss": 2.0123, + "step": 15947 + }, + { + "epoch": 0.53, + "grad_norm": 0.7359395623207092, + "learning_rate": 9.288993604202409e-06, + "loss": 2.0957, + "step": 15948 + }, + { + "epoch": 0.53, + "grad_norm": 0.7122228741645813, + "learning_rate": 9.287933405309643e-06, + "loss": 2.0294, + "step": 15949 + }, + { + "epoch": 0.53, + "grad_norm": 0.7597297430038452, + "learning_rate": 9.286873214461385e-06, + "loss": 2.0164, + "step": 15950 + }, + { + "epoch": 0.53, + "grad_norm": 0.7028793692588806, + "learning_rate": 9.285813031669616e-06, + "loss": 2.0631, + "step": 15951 + }, + { + "epoch": 0.53, + "grad_norm": 0.7687334418296814, + "learning_rate": 9.284752856946309e-06, + "loss": 2.1056, + "step": 15952 + }, + { + "epoch": 0.53, + "grad_norm": 0.735640287399292, + "learning_rate": 9.283692690303448e-06, + "loss": 2.1401, + "step": 15953 + }, + { + "epoch": 0.53, + "grad_norm": 0.7564795017242432, + "learning_rate": 9.28263253175301e-06, + "loss": 2.1223, + "step": 15954 + }, + { + "epoch": 0.53, + "grad_norm": 0.7364903092384338, + "learning_rate": 9.281572381306968e-06, + "loss": 2.0563, + "step": 15955 + }, + { + "epoch": 0.53, + "grad_norm": 0.7497307062149048, + "learning_rate": 9.280512238977298e-06, + "loss": 2.0999, + "step": 15956 + }, + { + "epoch": 0.53, + "grad_norm": 0.7197167277336121, + "learning_rate": 9.279452104775982e-06, + "loss": 2.0195, + "step": 15957 + }, + { + "epoch": 0.53, + "grad_norm": 0.7526372671127319, + "learning_rate": 9.278391978714993e-06, + "loss": 2.1083, + "step": 15958 + }, + { + "epoch": 0.53, + "grad_norm": 0.7457206845283508, + "learning_rate": 9.277331860806306e-06, + "loss": 2.0435, + "step": 15959 + }, + { + "epoch": 0.53, + "grad_norm": 0.7184691429138184, + "learning_rate": 9.2762717510619e-06, + "loss": 2.0582, + "step": 15960 + }, + { + "epoch": 0.53, + "grad_norm": 0.7513689994812012, + "learning_rate": 9.275211649493753e-06, + "loss": 2.0727, + "step": 15961 + }, + { + "epoch": 0.53, + "grad_norm": 0.7421584129333496, + "learning_rate": 9.274151556113841e-06, + "loss": 2.0529, + "step": 15962 + }, + { + "epoch": 0.53, + "grad_norm": 0.7788191437721252, + "learning_rate": 9.27309147093414e-06, + "loss": 2.0449, + "step": 15963 + }, + { + "epoch": 0.53, + "grad_norm": 0.7572394013404846, + "learning_rate": 9.272031393966625e-06, + "loss": 2.0837, + "step": 15964 + }, + { + "epoch": 0.53, + "grad_norm": 0.7335614562034607, + "learning_rate": 9.270971325223271e-06, + "loss": 2.0115, + "step": 15965 + }, + { + "epoch": 0.53, + "grad_norm": 0.7741708159446716, + "learning_rate": 9.269911264716056e-06, + "loss": 2.0438, + "step": 15966 + }, + { + "epoch": 0.53, + "grad_norm": 0.735787034034729, + "learning_rate": 9.268851212456959e-06, + "loss": 2.0625, + "step": 15967 + }, + { + "epoch": 0.53, + "grad_norm": 0.7941128611564636, + "learning_rate": 9.267791168457946e-06, + "loss": 2.0165, + "step": 15968 + }, + { + "epoch": 0.53, + "grad_norm": 0.7362989187240601, + "learning_rate": 9.266731132731003e-06, + "loss": 2.0431, + "step": 15969 + }, + { + "epoch": 0.53, + "grad_norm": 0.7495849132537842, + "learning_rate": 9.265671105288104e-06, + "loss": 2.006, + "step": 15970 + }, + { + "epoch": 0.53, + "grad_norm": 0.7700591087341309, + "learning_rate": 9.264611086141222e-06, + "loss": 2.0796, + "step": 15971 + }, + { + "epoch": 0.53, + "grad_norm": 0.7365755438804626, + "learning_rate": 9.26355107530233e-06, + "loss": 2.0562, + "step": 15972 + }, + { + "epoch": 0.53, + "grad_norm": 0.7625305652618408, + "learning_rate": 9.262491072783409e-06, + "loss": 2.017, + "step": 15973 + }, + { + "epoch": 0.53, + "grad_norm": 0.7295148968696594, + "learning_rate": 9.261431078596428e-06, + "loss": 2.0731, + "step": 15974 + }, + { + "epoch": 0.53, + "grad_norm": 0.7187147736549377, + "learning_rate": 9.260371092753368e-06, + "loss": 2.0581, + "step": 15975 + }, + { + "epoch": 0.53, + "grad_norm": 0.7318670749664307, + "learning_rate": 9.259311115266205e-06, + "loss": 2.0573, + "step": 15976 + }, + { + "epoch": 0.53, + "grad_norm": 0.7463316321372986, + "learning_rate": 9.258251146146907e-06, + "loss": 2.1557, + "step": 15977 + }, + { + "epoch": 0.53, + "grad_norm": 0.7334805130958557, + "learning_rate": 9.257191185407454e-06, + "loss": 2.0499, + "step": 15978 + }, + { + "epoch": 0.53, + "grad_norm": 0.7716814875602722, + "learning_rate": 9.256131233059819e-06, + "loss": 2.0639, + "step": 15979 + }, + { + "epoch": 0.53, + "grad_norm": 0.7400861978530884, + "learning_rate": 9.25507128911598e-06, + "loss": 2.1087, + "step": 15980 + }, + { + "epoch": 0.53, + "grad_norm": 0.7209727764129639, + "learning_rate": 9.254011353587907e-06, + "loss": 2.1423, + "step": 15981 + }, + { + "epoch": 0.53, + "grad_norm": 0.7437472939491272, + "learning_rate": 9.252951426487574e-06, + "loss": 2.0571, + "step": 15982 + }, + { + "epoch": 0.53, + "grad_norm": 0.7227652668952942, + "learning_rate": 9.25189150782696e-06, + "loss": 2.0668, + "step": 15983 + }, + { + "epoch": 0.53, + "grad_norm": 0.770972728729248, + "learning_rate": 9.250831597618037e-06, + "loss": 2.0623, + "step": 15984 + }, + { + "epoch": 0.53, + "grad_norm": 0.7300300002098083, + "learning_rate": 9.249771695872784e-06, + "loss": 2.1043, + "step": 15985 + }, + { + "epoch": 0.53, + "grad_norm": 0.732370913028717, + "learning_rate": 9.248711802603166e-06, + "loss": 2.1387, + "step": 15986 + }, + { + "epoch": 0.53, + "grad_norm": 0.7470964789390564, + "learning_rate": 9.247651917821163e-06, + "loss": 2.0844, + "step": 15987 + }, + { + "epoch": 0.53, + "grad_norm": 0.746906042098999, + "learning_rate": 9.246592041538745e-06, + "loss": 2.0946, + "step": 15988 + }, + { + "epoch": 0.53, + "grad_norm": 0.7387294769287109, + "learning_rate": 9.245532173767895e-06, + "loss": 2.066, + "step": 15989 + }, + { + "epoch": 0.53, + "grad_norm": 0.7141947746276855, + "learning_rate": 9.244472314520573e-06, + "loss": 2.0267, + "step": 15990 + }, + { + "epoch": 0.53, + "grad_norm": 0.7304352521896362, + "learning_rate": 9.243412463808765e-06, + "loss": 1.9975, + "step": 15991 + }, + { + "epoch": 0.53, + "grad_norm": 0.7589696049690247, + "learning_rate": 9.242352621644438e-06, + "loss": 2.0633, + "step": 15992 + }, + { + "epoch": 0.53, + "grad_norm": 0.8823273777961731, + "learning_rate": 9.241292788039566e-06, + "loss": 2.0972, + "step": 15993 + }, + { + "epoch": 0.53, + "grad_norm": 0.7484263777732849, + "learning_rate": 9.240232963006128e-06, + "loss": 2.0597, + "step": 15994 + }, + { + "epoch": 0.53, + "grad_norm": 0.7398248910903931, + "learning_rate": 9.23917314655609e-06, + "loss": 2.0696, + "step": 15995 + }, + { + "epoch": 0.53, + "grad_norm": 0.7671696543693542, + "learning_rate": 9.238113338701422e-06, + "loss": 2.1627, + "step": 15996 + }, + { + "epoch": 0.53, + "grad_norm": 0.7640106678009033, + "learning_rate": 9.237053539454108e-06, + "loss": 2.0688, + "step": 15997 + }, + { + "epoch": 0.53, + "grad_norm": 0.7214716672897339, + "learning_rate": 9.235993748826118e-06, + "loss": 1.9921, + "step": 15998 + }, + { + "epoch": 0.53, + "grad_norm": 0.7194445133209229, + "learning_rate": 9.23493396682942e-06, + "loss": 1.9789, + "step": 15999 + }, + { + "epoch": 0.53, + "grad_norm": 0.732318103313446, + "learning_rate": 9.233874193475992e-06, + "loss": 1.9512, + "step": 16000 + }, + { + "epoch": 0.53, + "grad_norm": 0.7848606109619141, + "learning_rate": 9.232814428777803e-06, + "loss": 2.1032, + "step": 16001 + }, + { + "epoch": 0.53, + "grad_norm": 0.7297263145446777, + "learning_rate": 9.231754672746829e-06, + "loss": 2.0383, + "step": 16002 + }, + { + "epoch": 0.53, + "grad_norm": 0.7562658190727234, + "learning_rate": 9.230694925395038e-06, + "loss": 2.0156, + "step": 16003 + }, + { + "epoch": 0.53, + "grad_norm": 0.7618056535720825, + "learning_rate": 9.229635186734403e-06, + "loss": 2.0539, + "step": 16004 + }, + { + "epoch": 0.53, + "grad_norm": 0.7276254296302795, + "learning_rate": 9.228575456776901e-06, + "loss": 2.0325, + "step": 16005 + }, + { + "epoch": 0.53, + "grad_norm": 0.752739429473877, + "learning_rate": 9.2275157355345e-06, + "loss": 2.0664, + "step": 16006 + }, + { + "epoch": 0.53, + "grad_norm": 0.7278198599815369, + "learning_rate": 9.226456023019178e-06, + "loss": 2.0488, + "step": 16007 + }, + { + "epoch": 0.53, + "grad_norm": 0.7573243379592896, + "learning_rate": 9.225396319242898e-06, + "loss": 2.0771, + "step": 16008 + }, + { + "epoch": 0.53, + "grad_norm": 0.7025761604309082, + "learning_rate": 9.224336624217635e-06, + "loss": 2.0635, + "step": 16009 + }, + { + "epoch": 0.53, + "grad_norm": 0.7310386300086975, + "learning_rate": 9.223276937955364e-06, + "loss": 2.0162, + "step": 16010 + }, + { + "epoch": 0.53, + "grad_norm": 0.7539940476417542, + "learning_rate": 9.22221726046806e-06, + "loss": 2.1346, + "step": 16011 + }, + { + "epoch": 0.53, + "grad_norm": 0.7575652599334717, + "learning_rate": 9.221157591767683e-06, + "loss": 2.1174, + "step": 16012 + }, + { + "epoch": 0.53, + "grad_norm": 0.7140714526176453, + "learning_rate": 9.220097931866213e-06, + "loss": 2.0418, + "step": 16013 + }, + { + "epoch": 0.53, + "grad_norm": 0.7271680235862732, + "learning_rate": 9.21903828077562e-06, + "loss": 2.0855, + "step": 16014 + }, + { + "epoch": 0.53, + "grad_norm": 0.7224127054214478, + "learning_rate": 9.217978638507871e-06, + "loss": 2.0037, + "step": 16015 + }, + { + "epoch": 0.53, + "grad_norm": 0.7882169485092163, + "learning_rate": 9.216919005074949e-06, + "loss": 2.0628, + "step": 16016 + }, + { + "epoch": 0.53, + "grad_norm": 0.750856339931488, + "learning_rate": 9.21585938048881e-06, + "loss": 2.1081, + "step": 16017 + }, + { + "epoch": 0.53, + "grad_norm": 0.7655377984046936, + "learning_rate": 9.214799764761436e-06, + "loss": 2.0696, + "step": 16018 + }, + { + "epoch": 0.53, + "grad_norm": 0.7266107201576233, + "learning_rate": 9.21374015790479e-06, + "loss": 2.0761, + "step": 16019 + }, + { + "epoch": 0.53, + "grad_norm": 0.7755805253982544, + "learning_rate": 9.212680559930853e-06, + "loss": 2.0336, + "step": 16020 + }, + { + "epoch": 0.53, + "grad_norm": 0.7473196983337402, + "learning_rate": 9.211620970851586e-06, + "loss": 2.052, + "step": 16021 + }, + { + "epoch": 0.53, + "grad_norm": 0.7513112425804138, + "learning_rate": 9.210561390678964e-06, + "loss": 2.1181, + "step": 16022 + }, + { + "epoch": 0.53, + "grad_norm": 0.7416778802871704, + "learning_rate": 9.209501819424953e-06, + "loss": 2.0195, + "step": 16023 + }, + { + "epoch": 0.53, + "grad_norm": 0.7785535454750061, + "learning_rate": 9.208442257101531e-06, + "loss": 2.0946, + "step": 16024 + }, + { + "epoch": 0.53, + "grad_norm": 0.7548454403877258, + "learning_rate": 9.207382703720666e-06, + "loss": 2.0952, + "step": 16025 + }, + { + "epoch": 0.53, + "grad_norm": 0.7580496668815613, + "learning_rate": 9.206323159294325e-06, + "loss": 2.0763, + "step": 16026 + }, + { + "epoch": 0.53, + "grad_norm": 0.7459222078323364, + "learning_rate": 9.205263623834479e-06, + "loss": 2.065, + "step": 16027 + }, + { + "epoch": 0.53, + "grad_norm": 0.7287431359291077, + "learning_rate": 9.2042040973531e-06, + "loss": 1.9973, + "step": 16028 + }, + { + "epoch": 0.53, + "grad_norm": 0.7263702750205994, + "learning_rate": 9.20314457986216e-06, + "loss": 2.0102, + "step": 16029 + }, + { + "epoch": 0.53, + "grad_norm": 0.7440166473388672, + "learning_rate": 9.20208507137362e-06, + "loss": 2.0551, + "step": 16030 + }, + { + "epoch": 0.53, + "grad_norm": 0.720483660697937, + "learning_rate": 9.201025571899455e-06, + "loss": 1.9949, + "step": 16031 + }, + { + "epoch": 0.53, + "grad_norm": 0.7443420886993408, + "learning_rate": 9.199966081451636e-06, + "loss": 2.1093, + "step": 16032 + }, + { + "epoch": 0.53, + "grad_norm": 0.772274374961853, + "learning_rate": 9.198906600042135e-06, + "loss": 2.1204, + "step": 16033 + }, + { + "epoch": 0.53, + "grad_norm": 0.7459427714347839, + "learning_rate": 9.197847127682914e-06, + "loss": 2.0704, + "step": 16034 + }, + { + "epoch": 0.53, + "grad_norm": 0.752036988735199, + "learning_rate": 9.196787664385946e-06, + "loss": 2.0165, + "step": 16035 + }, + { + "epoch": 0.53, + "grad_norm": 0.7689023613929749, + "learning_rate": 9.195728210163202e-06, + "loss": 2.036, + "step": 16036 + }, + { + "epoch": 0.53, + "grad_norm": 0.7844099402427673, + "learning_rate": 9.194668765026645e-06, + "loss": 2.0233, + "step": 16037 + }, + { + "epoch": 0.53, + "grad_norm": 0.7220128774642944, + "learning_rate": 9.193609328988257e-06, + "loss": 1.9683, + "step": 16038 + }, + { + "epoch": 0.53, + "grad_norm": 0.732947826385498, + "learning_rate": 9.19254990205999e-06, + "loss": 2.016, + "step": 16039 + }, + { + "epoch": 0.53, + "grad_norm": 0.7263383269309998, + "learning_rate": 9.191490484253824e-06, + "loss": 2.0834, + "step": 16040 + }, + { + "epoch": 0.53, + "grad_norm": 0.7345051169395447, + "learning_rate": 9.190431075581725e-06, + "loss": 2.0499, + "step": 16041 + }, + { + "epoch": 0.53, + "grad_norm": 0.771159827709198, + "learning_rate": 9.189371676055663e-06, + "loss": 2.001, + "step": 16042 + }, + { + "epoch": 0.53, + "grad_norm": 0.7602362632751465, + "learning_rate": 9.188312285687602e-06, + "loss": 2.0196, + "step": 16043 + }, + { + "epoch": 0.53, + "grad_norm": 0.7179849743843079, + "learning_rate": 9.187252904489513e-06, + "loss": 2.0573, + "step": 16044 + }, + { + "epoch": 0.53, + "grad_norm": 0.7222908735275269, + "learning_rate": 9.186193532473364e-06, + "loss": 2.0258, + "step": 16045 + }, + { + "epoch": 0.53, + "grad_norm": 0.7636646628379822, + "learning_rate": 9.185134169651124e-06, + "loss": 2.1187, + "step": 16046 + }, + { + "epoch": 0.53, + "grad_norm": 0.7379689812660217, + "learning_rate": 9.184074816034763e-06, + "loss": 2.0459, + "step": 16047 + }, + { + "epoch": 0.53, + "grad_norm": 0.7704395055770874, + "learning_rate": 9.183015471636244e-06, + "loss": 2.0527, + "step": 16048 + }, + { + "epoch": 0.53, + "grad_norm": 0.7609572410583496, + "learning_rate": 9.181956136467537e-06, + "loss": 2.1446, + "step": 16049 + }, + { + "epoch": 0.53, + "grad_norm": 0.7350201606750488, + "learning_rate": 9.180896810540613e-06, + "loss": 2.071, + "step": 16050 + }, + { + "epoch": 0.53, + "grad_norm": 0.6975039839744568, + "learning_rate": 9.179837493867438e-06, + "loss": 2.116, + "step": 16051 + }, + { + "epoch": 0.53, + "grad_norm": 0.7201418280601501, + "learning_rate": 9.178778186459974e-06, + "loss": 2.0475, + "step": 16052 + }, + { + "epoch": 0.53, + "grad_norm": 0.7411181330680847, + "learning_rate": 9.177718888330192e-06, + "loss": 2.0896, + "step": 16053 + }, + { + "epoch": 0.53, + "grad_norm": 0.7326714992523193, + "learning_rate": 9.176659599490061e-06, + "loss": 2.0152, + "step": 16054 + }, + { + "epoch": 0.53, + "grad_norm": 0.7135183215141296, + "learning_rate": 9.17560031995155e-06, + "loss": 2.0945, + "step": 16055 + }, + { + "epoch": 0.53, + "grad_norm": 0.7686595916748047, + "learning_rate": 9.174541049726625e-06, + "loss": 2.1613, + "step": 16056 + }, + { + "epoch": 0.53, + "grad_norm": 0.7410893440246582, + "learning_rate": 9.173481788827248e-06, + "loss": 2.0826, + "step": 16057 + }, + { + "epoch": 0.53, + "grad_norm": 0.7164680361747742, + "learning_rate": 9.17242253726539e-06, + "loss": 2.0463, + "step": 16058 + }, + { + "epoch": 0.53, + "grad_norm": 0.7621414065361023, + "learning_rate": 9.171363295053013e-06, + "loss": 2.0786, + "step": 16059 + }, + { + "epoch": 0.53, + "grad_norm": 0.709117591381073, + "learning_rate": 9.170304062202097e-06, + "loss": 2.0697, + "step": 16060 + }, + { + "epoch": 0.53, + "grad_norm": 0.7376083731651306, + "learning_rate": 9.169244838724591e-06, + "loss": 2.0554, + "step": 16061 + }, + { + "epoch": 0.53, + "grad_norm": 0.7514504194259644, + "learning_rate": 9.168185624632474e-06, + "loss": 2.0679, + "step": 16062 + }, + { + "epoch": 0.53, + "grad_norm": 0.7439470291137695, + "learning_rate": 9.167126419937707e-06, + "loss": 2.0757, + "step": 16063 + }, + { + "epoch": 0.53, + "grad_norm": 0.7106485962867737, + "learning_rate": 9.16606722465226e-06, + "loss": 2.099, + "step": 16064 + }, + { + "epoch": 0.53, + "grad_norm": 0.7550050020217896, + "learning_rate": 9.165008038788096e-06, + "loss": 2.0571, + "step": 16065 + }, + { + "epoch": 0.53, + "grad_norm": 0.7180540561676025, + "learning_rate": 9.163948862357181e-06, + "loss": 2.0725, + "step": 16066 + }, + { + "epoch": 0.53, + "grad_norm": 0.7606554627418518, + "learning_rate": 9.162889695371478e-06, + "loss": 2.0486, + "step": 16067 + }, + { + "epoch": 0.53, + "grad_norm": 0.7469570636749268, + "learning_rate": 9.16183053784296e-06, + "loss": 2.1457, + "step": 16068 + }, + { + "epoch": 0.53, + "grad_norm": 0.7186725735664368, + "learning_rate": 9.160771389783591e-06, + "loss": 2.0632, + "step": 16069 + }, + { + "epoch": 0.53, + "grad_norm": 0.7411295771598816, + "learning_rate": 9.159712251205335e-06, + "loss": 2.0708, + "step": 16070 + }, + { + "epoch": 0.53, + "grad_norm": 0.7392745018005371, + "learning_rate": 9.158653122120156e-06, + "loss": 2.0583, + "step": 16071 + }, + { + "epoch": 0.53, + "grad_norm": 0.7609171271324158, + "learning_rate": 9.15759400254002e-06, + "loss": 2.0359, + "step": 16072 + }, + { + "epoch": 0.53, + "grad_norm": 0.7403454780578613, + "learning_rate": 9.156534892476898e-06, + "loss": 2.026, + "step": 16073 + }, + { + "epoch": 0.53, + "grad_norm": 0.7484779953956604, + "learning_rate": 9.155475791942745e-06, + "loss": 2.0465, + "step": 16074 + }, + { + "epoch": 0.53, + "grad_norm": 0.7578922510147095, + "learning_rate": 9.15441670094953e-06, + "loss": 2.1687, + "step": 16075 + }, + { + "epoch": 0.53, + "grad_norm": 0.7346295118331909, + "learning_rate": 9.153357619509225e-06, + "loss": 2.1262, + "step": 16076 + }, + { + "epoch": 0.53, + "grad_norm": 0.7380934953689575, + "learning_rate": 9.152298547633787e-06, + "loss": 2.1445, + "step": 16077 + }, + { + "epoch": 0.53, + "grad_norm": 0.7431432008743286, + "learning_rate": 9.151239485335184e-06, + "loss": 2.1418, + "step": 16078 + }, + { + "epoch": 0.53, + "grad_norm": 0.7498764991760254, + "learning_rate": 9.150180432625379e-06, + "loss": 2.0631, + "step": 16079 + }, + { + "epoch": 0.53, + "grad_norm": 0.7398027181625366, + "learning_rate": 9.149121389516338e-06, + "loss": 2.0563, + "step": 16080 + }, + { + "epoch": 0.54, + "grad_norm": 0.7248805165290833, + "learning_rate": 9.148062356020024e-06, + "loss": 2.0595, + "step": 16081 + }, + { + "epoch": 0.54, + "grad_norm": 0.7447824478149414, + "learning_rate": 9.147003332148406e-06, + "loss": 2.1401, + "step": 16082 + }, + { + "epoch": 0.54, + "grad_norm": 0.733701229095459, + "learning_rate": 9.14594431791344e-06, + "loss": 2.0748, + "step": 16083 + }, + { + "epoch": 0.54, + "grad_norm": 0.7724853754043579, + "learning_rate": 9.144885313327096e-06, + "loss": 2.0811, + "step": 16084 + }, + { + "epoch": 0.54, + "grad_norm": 0.7426878809928894, + "learning_rate": 9.143826318401335e-06, + "loss": 2.0711, + "step": 16085 + }, + { + "epoch": 0.54, + "grad_norm": 0.711362361907959, + "learning_rate": 9.142767333148126e-06, + "loss": 2.0984, + "step": 16086 + }, + { + "epoch": 0.54, + "grad_norm": 0.7430671453475952, + "learning_rate": 9.141708357579427e-06, + "loss": 2.0942, + "step": 16087 + }, + { + "epoch": 0.54, + "grad_norm": 0.7391875982284546, + "learning_rate": 9.140649391707204e-06, + "loss": 2.047, + "step": 16088 + }, + { + "epoch": 0.54, + "grad_norm": 0.7467110753059387, + "learning_rate": 9.139590435543419e-06, + "loss": 2.0372, + "step": 16089 + }, + { + "epoch": 0.54, + "grad_norm": 0.7503875494003296, + "learning_rate": 9.138531489100038e-06, + "loss": 2.0905, + "step": 16090 + }, + { + "epoch": 0.54, + "grad_norm": 0.740982711315155, + "learning_rate": 9.137472552389027e-06, + "loss": 2.0638, + "step": 16091 + }, + { + "epoch": 0.54, + "grad_norm": 0.7608187198638916, + "learning_rate": 9.136413625422343e-06, + "loss": 2.0465, + "step": 16092 + }, + { + "epoch": 0.54, + "grad_norm": 0.7321698665618896, + "learning_rate": 9.135354708211952e-06, + "loss": 2.0898, + "step": 16093 + }, + { + "epoch": 0.54, + "grad_norm": 0.7443631291389465, + "learning_rate": 9.134295800769817e-06, + "loss": 2.0541, + "step": 16094 + }, + { + "epoch": 0.54, + "grad_norm": 0.7146250605583191, + "learning_rate": 9.1332369031079e-06, + "loss": 2.0765, + "step": 16095 + }, + { + "epoch": 0.54, + "grad_norm": 0.7480495572090149, + "learning_rate": 9.132178015238165e-06, + "loss": 2.0841, + "step": 16096 + }, + { + "epoch": 0.54, + "grad_norm": 0.7449318170547485, + "learning_rate": 9.131119137172571e-06, + "loss": 2.0497, + "step": 16097 + }, + { + "epoch": 0.54, + "grad_norm": 0.7293895483016968, + "learning_rate": 9.130060268923086e-06, + "loss": 2.0757, + "step": 16098 + }, + { + "epoch": 0.54, + "grad_norm": 0.746681809425354, + "learning_rate": 9.129001410501671e-06, + "loss": 2.1067, + "step": 16099 + }, + { + "epoch": 0.54, + "grad_norm": 0.7334829568862915, + "learning_rate": 9.12794256192029e-06, + "loss": 2.0069, + "step": 16100 + }, + { + "epoch": 0.54, + "grad_norm": 0.738744854927063, + "learning_rate": 9.1268837231909e-06, + "loss": 2.0933, + "step": 16101 + }, + { + "epoch": 0.54, + "grad_norm": 0.7283045649528503, + "learning_rate": 9.125824894325465e-06, + "loss": 2.0529, + "step": 16102 + }, + { + "epoch": 0.54, + "grad_norm": 0.7260571718215942, + "learning_rate": 9.124766075335949e-06, + "loss": 1.9908, + "step": 16103 + }, + { + "epoch": 0.54, + "grad_norm": 0.758544921875, + "learning_rate": 9.123707266234317e-06, + "loss": 2.0468, + "step": 16104 + }, + { + "epoch": 0.54, + "grad_norm": 0.7505977749824524, + "learning_rate": 9.12264846703252e-06, + "loss": 2.1078, + "step": 16105 + }, + { + "epoch": 0.54, + "grad_norm": 0.7734881043434143, + "learning_rate": 9.12158967774253e-06, + "loss": 2.073, + "step": 16106 + }, + { + "epoch": 0.54, + "grad_norm": 0.7540022134780884, + "learning_rate": 9.120530898376302e-06, + "loss": 2.0999, + "step": 16107 + }, + { + "epoch": 0.54, + "grad_norm": 0.741304874420166, + "learning_rate": 9.1194721289458e-06, + "loss": 2.0651, + "step": 16108 + }, + { + "epoch": 0.54, + "grad_norm": 0.7225976586341858, + "learning_rate": 9.118413369462994e-06, + "loss": 2.1356, + "step": 16109 + }, + { + "epoch": 0.54, + "grad_norm": 0.7370829582214355, + "learning_rate": 9.11735461993983e-06, + "loss": 2.0921, + "step": 16110 + }, + { + "epoch": 0.54, + "grad_norm": 0.7164703607559204, + "learning_rate": 9.116295880388277e-06, + "loss": 2.1013, + "step": 16111 + }, + { + "epoch": 0.54, + "grad_norm": 0.7514081001281738, + "learning_rate": 9.115237150820298e-06, + "loss": 2.0568, + "step": 16112 + }, + { + "epoch": 0.54, + "grad_norm": 0.7405162453651428, + "learning_rate": 9.114178431247851e-06, + "loss": 2.0951, + "step": 16113 + }, + { + "epoch": 0.54, + "grad_norm": 0.7289748787879944, + "learning_rate": 9.113119721682896e-06, + "loss": 2.0838, + "step": 16114 + }, + { + "epoch": 0.54, + "grad_norm": 0.713518500328064, + "learning_rate": 9.112061022137393e-06, + "loss": 2.0805, + "step": 16115 + }, + { + "epoch": 0.54, + "grad_norm": 0.7457652688026428, + "learning_rate": 9.111002332623303e-06, + "loss": 2.0737, + "step": 16116 + }, + { + "epoch": 0.54, + "grad_norm": 0.7515103816986084, + "learning_rate": 9.109943653152593e-06, + "loss": 2.0728, + "step": 16117 + }, + { + "epoch": 0.54, + "grad_norm": 0.7351611852645874, + "learning_rate": 9.108884983737212e-06, + "loss": 2.0241, + "step": 16118 + }, + { + "epoch": 0.54, + "grad_norm": 0.7572402358055115, + "learning_rate": 9.10782632438913e-06, + "loss": 2.0374, + "step": 16119 + }, + { + "epoch": 0.54, + "grad_norm": 0.7435814142227173, + "learning_rate": 9.106767675120301e-06, + "loss": 2.1318, + "step": 16120 + }, + { + "epoch": 0.54, + "grad_norm": 0.7317203283309937, + "learning_rate": 9.105709035942689e-06, + "loss": 2.1107, + "step": 16121 + }, + { + "epoch": 0.54, + "grad_norm": 0.7276882529258728, + "learning_rate": 9.104650406868254e-06, + "loss": 2.0933, + "step": 16122 + }, + { + "epoch": 0.54, + "grad_norm": 0.7675588130950928, + "learning_rate": 9.103591787908952e-06, + "loss": 2.0925, + "step": 16123 + }, + { + "epoch": 0.54, + "grad_norm": 0.731278657913208, + "learning_rate": 9.102533179076743e-06, + "loss": 2.1017, + "step": 16124 + }, + { + "epoch": 0.54, + "grad_norm": 0.7704048156738281, + "learning_rate": 9.101474580383589e-06, + "loss": 2.0069, + "step": 16125 + }, + { + "epoch": 0.54, + "grad_norm": 0.7406962513923645, + "learning_rate": 9.100415991841452e-06, + "loss": 2.0428, + "step": 16126 + }, + { + "epoch": 0.54, + "grad_norm": 0.7625757455825806, + "learning_rate": 9.099357413462284e-06, + "loss": 2.0459, + "step": 16127 + }, + { + "epoch": 0.54, + "grad_norm": 0.7589849829673767, + "learning_rate": 9.098298845258049e-06, + "loss": 2.1383, + "step": 16128 + }, + { + "epoch": 0.54, + "grad_norm": 0.7585662007331848, + "learning_rate": 9.097240287240705e-06, + "loss": 2.05, + "step": 16129 + }, + { + "epoch": 0.54, + "grad_norm": 0.7413753271102905, + "learning_rate": 9.09618173942221e-06, + "loss": 2.0668, + "step": 16130 + }, + { + "epoch": 0.54, + "grad_norm": 0.718809962272644, + "learning_rate": 9.095123201814529e-06, + "loss": 2.0453, + "step": 16131 + }, + { + "epoch": 0.54, + "grad_norm": 0.768153190612793, + "learning_rate": 9.09406467442961e-06, + "loss": 2.0457, + "step": 16132 + }, + { + "epoch": 0.54, + "grad_norm": 0.7476561665534973, + "learning_rate": 9.093006157279421e-06, + "loss": 2.087, + "step": 16133 + }, + { + "epoch": 0.54, + "grad_norm": 0.7554195523262024, + "learning_rate": 9.091947650375916e-06, + "loss": 2.0119, + "step": 16134 + }, + { + "epoch": 0.54, + "grad_norm": 0.8251556754112244, + "learning_rate": 9.090889153731055e-06, + "loss": 2.1137, + "step": 16135 + }, + { + "epoch": 0.54, + "grad_norm": 0.7942646145820618, + "learning_rate": 9.089830667356794e-06, + "loss": 2.0463, + "step": 16136 + }, + { + "epoch": 0.54, + "grad_norm": 0.7700174450874329, + "learning_rate": 9.088772191265093e-06, + "loss": 2.0855, + "step": 16137 + }, + { + "epoch": 0.54, + "grad_norm": 0.7480670809745789, + "learning_rate": 9.087713725467909e-06, + "loss": 2.0943, + "step": 16138 + }, + { + "epoch": 0.54, + "grad_norm": 0.7469131350517273, + "learning_rate": 9.0866552699772e-06, + "loss": 2.0285, + "step": 16139 + }, + { + "epoch": 0.54, + "grad_norm": 0.7369194030761719, + "learning_rate": 9.085596824804928e-06, + "loss": 2.0611, + "step": 16140 + }, + { + "epoch": 0.54, + "grad_norm": 0.7487397193908691, + "learning_rate": 9.084538389963045e-06, + "loss": 2.0866, + "step": 16141 + }, + { + "epoch": 0.54, + "grad_norm": 0.7577124834060669, + "learning_rate": 9.083479965463511e-06, + "loss": 2.1081, + "step": 16142 + }, + { + "epoch": 0.54, + "grad_norm": 0.7707308530807495, + "learning_rate": 9.082421551318284e-06, + "loss": 2.1824, + "step": 16143 + }, + { + "epoch": 0.54, + "grad_norm": 0.7502233386039734, + "learning_rate": 9.081363147539322e-06, + "loss": 2.1968, + "step": 16144 + }, + { + "epoch": 0.54, + "grad_norm": 0.7382599115371704, + "learning_rate": 9.080304754138577e-06, + "loss": 2.0209, + "step": 16145 + }, + { + "epoch": 0.54, + "grad_norm": 0.7302808165550232, + "learning_rate": 9.07924637112801e-06, + "loss": 2.0301, + "step": 16146 + }, + { + "epoch": 0.54, + "grad_norm": 0.7310431599617004, + "learning_rate": 9.078187998519579e-06, + "loss": 2.0952, + "step": 16147 + }, + { + "epoch": 0.54, + "grad_norm": 0.7677083611488342, + "learning_rate": 9.077129636325242e-06, + "loss": 2.1208, + "step": 16148 + }, + { + "epoch": 0.54, + "grad_norm": 0.7403497099876404, + "learning_rate": 9.076071284556953e-06, + "loss": 2.0338, + "step": 16149 + }, + { + "epoch": 0.54, + "grad_norm": 0.7446951866149902, + "learning_rate": 9.075012943226667e-06, + "loss": 2.0691, + "step": 16150 + }, + { + "epoch": 0.54, + "grad_norm": 0.7945897579193115, + "learning_rate": 9.073954612346342e-06, + "loss": 2.111, + "step": 16151 + }, + { + "epoch": 0.54, + "grad_norm": 0.7324787378311157, + "learning_rate": 9.072896291927935e-06, + "loss": 2.0523, + "step": 16152 + }, + { + "epoch": 0.54, + "grad_norm": 0.7656278610229492, + "learning_rate": 9.071837981983408e-06, + "loss": 2.0457, + "step": 16153 + }, + { + "epoch": 0.54, + "grad_norm": 0.7410079836845398, + "learning_rate": 9.070779682524706e-06, + "loss": 2.0802, + "step": 16154 + }, + { + "epoch": 0.54, + "grad_norm": 0.7331940531730652, + "learning_rate": 9.069721393563792e-06, + "loss": 2.0261, + "step": 16155 + }, + { + "epoch": 0.54, + "grad_norm": 0.7268899083137512, + "learning_rate": 9.06866311511262e-06, + "loss": 2.0287, + "step": 16156 + }, + { + "epoch": 0.54, + "grad_norm": 0.7414692640304565, + "learning_rate": 9.067604847183149e-06, + "loss": 2.1066, + "step": 16157 + }, + { + "epoch": 0.54, + "grad_norm": 0.7182986736297607, + "learning_rate": 9.06654658978733e-06, + "loss": 2.0423, + "step": 16158 + }, + { + "epoch": 0.54, + "grad_norm": 0.7330798506736755, + "learning_rate": 9.06548834293712e-06, + "loss": 2.1183, + "step": 16159 + }, + { + "epoch": 0.54, + "grad_norm": 0.7413531541824341, + "learning_rate": 9.064430106644473e-06, + "loss": 2.092, + "step": 16160 + }, + { + "epoch": 0.54, + "grad_norm": 0.7121817469596863, + "learning_rate": 9.063371880921347e-06, + "loss": 2.0294, + "step": 16161 + }, + { + "epoch": 0.54, + "grad_norm": 0.7239598035812378, + "learning_rate": 9.062313665779701e-06, + "loss": 2.0272, + "step": 16162 + }, + { + "epoch": 0.54, + "grad_norm": 0.7395011186599731, + "learning_rate": 9.061255461231484e-06, + "loss": 2.0847, + "step": 16163 + }, + { + "epoch": 0.54, + "grad_norm": 0.7319414019584656, + "learning_rate": 9.060197267288651e-06, + "loss": 2.0674, + "step": 16164 + }, + { + "epoch": 0.54, + "grad_norm": 0.706632673740387, + "learning_rate": 9.059139083963158e-06, + "loss": 1.9886, + "step": 16165 + }, + { + "epoch": 0.54, + "grad_norm": 0.7694850564002991, + "learning_rate": 9.058080911266965e-06, + "loss": 2.0172, + "step": 16166 + }, + { + "epoch": 0.54, + "grad_norm": 0.7421568036079407, + "learning_rate": 9.057022749212018e-06, + "loss": 2.0633, + "step": 16167 + }, + { + "epoch": 0.54, + "grad_norm": 0.7180240750312805, + "learning_rate": 9.055964597810273e-06, + "loss": 2.1059, + "step": 16168 + }, + { + "epoch": 0.54, + "grad_norm": 0.7587441802024841, + "learning_rate": 9.05490645707369e-06, + "loss": 2.0178, + "step": 16169 + }, + { + "epoch": 0.54, + "grad_norm": 0.7535413503646851, + "learning_rate": 9.05384832701422e-06, + "loss": 2.04, + "step": 16170 + }, + { + "epoch": 0.54, + "grad_norm": 0.736610472202301, + "learning_rate": 9.052790207643817e-06, + "loss": 2.0879, + "step": 16171 + }, + { + "epoch": 0.54, + "grad_norm": 0.7255251407623291, + "learning_rate": 9.051732098974434e-06, + "loss": 2.0414, + "step": 16172 + }, + { + "epoch": 0.54, + "grad_norm": 0.7483540773391724, + "learning_rate": 9.050674001018025e-06, + "loss": 2.0733, + "step": 16173 + }, + { + "epoch": 0.54, + "grad_norm": 0.7389726042747498, + "learning_rate": 9.049615913786545e-06, + "loss": 2.0048, + "step": 16174 + }, + { + "epoch": 0.54, + "grad_norm": 0.7407342195510864, + "learning_rate": 9.048557837291954e-06, + "loss": 2.0505, + "step": 16175 + }, + { + "epoch": 0.54, + "grad_norm": 0.7210568189620972, + "learning_rate": 9.04749977154619e-06, + "loss": 2.034, + "step": 16176 + }, + { + "epoch": 0.54, + "grad_norm": 0.7721253037452698, + "learning_rate": 9.046441716561219e-06, + "loss": 2.0911, + "step": 16177 + }, + { + "epoch": 0.54, + "grad_norm": 0.7353857159614563, + "learning_rate": 9.04538367234899e-06, + "loss": 2.0862, + "step": 16178 + }, + { + "epoch": 0.54, + "grad_norm": 0.8708370923995972, + "learning_rate": 9.044325638921461e-06, + "loss": 1.9916, + "step": 16179 + }, + { + "epoch": 0.54, + "grad_norm": 0.7454116940498352, + "learning_rate": 9.043267616290576e-06, + "loss": 2.0955, + "step": 16180 + }, + { + "epoch": 0.54, + "grad_norm": 0.7351784706115723, + "learning_rate": 9.042209604468294e-06, + "loss": 2.0996, + "step": 16181 + }, + { + "epoch": 0.54, + "grad_norm": 0.7427186965942383, + "learning_rate": 9.041151603466565e-06, + "loss": 2.0771, + "step": 16182 + }, + { + "epoch": 0.54, + "grad_norm": 0.7075121402740479, + "learning_rate": 9.040093613297344e-06, + "loss": 2.0639, + "step": 16183 + }, + { + "epoch": 0.54, + "grad_norm": 0.7156421542167664, + "learning_rate": 9.039035633972585e-06, + "loss": 2.0304, + "step": 16184 + }, + { + "epoch": 0.54, + "grad_norm": 0.773158073425293, + "learning_rate": 9.037977665504236e-06, + "loss": 2.126, + "step": 16185 + }, + { + "epoch": 0.54, + "grad_norm": 0.7273066639900208, + "learning_rate": 9.036919707904254e-06, + "loss": 2.0138, + "step": 16186 + }, + { + "epoch": 0.54, + "grad_norm": 0.7115066051483154, + "learning_rate": 9.035861761184587e-06, + "loss": 2.1259, + "step": 16187 + }, + { + "epoch": 0.54, + "grad_norm": 0.7173439860343933, + "learning_rate": 9.034803825357191e-06, + "loss": 2.0793, + "step": 16188 + }, + { + "epoch": 0.54, + "grad_norm": 0.7285423874855042, + "learning_rate": 9.033745900434013e-06, + "loss": 2.0359, + "step": 16189 + }, + { + "epoch": 0.54, + "grad_norm": 0.7512083053588867, + "learning_rate": 9.032687986427006e-06, + "loss": 2.0584, + "step": 16190 + }, + { + "epoch": 0.54, + "grad_norm": 0.7425249814987183, + "learning_rate": 9.031630083348125e-06, + "loss": 2.111, + "step": 16191 + }, + { + "epoch": 0.54, + "grad_norm": 0.7439605593681335, + "learning_rate": 9.030572191209322e-06, + "loss": 2.0396, + "step": 16192 + }, + { + "epoch": 0.54, + "grad_norm": 0.7213541269302368, + "learning_rate": 9.029514310022546e-06, + "loss": 2.0104, + "step": 16193 + }, + { + "epoch": 0.54, + "grad_norm": 0.7300660610198975, + "learning_rate": 9.028456439799747e-06, + "loss": 2.1314, + "step": 16194 + }, + { + "epoch": 0.54, + "grad_norm": 0.7191729545593262, + "learning_rate": 9.027398580552878e-06, + "loss": 1.9987, + "step": 16195 + }, + { + "epoch": 0.54, + "grad_norm": 0.7377094626426697, + "learning_rate": 9.026340732293886e-06, + "loss": 2.0191, + "step": 16196 + }, + { + "epoch": 0.54, + "grad_norm": 0.7304640412330627, + "learning_rate": 9.025282895034734e-06, + "loss": 1.9988, + "step": 16197 + }, + { + "epoch": 0.54, + "grad_norm": 0.7341344952583313, + "learning_rate": 9.024225068787358e-06, + "loss": 1.9537, + "step": 16198 + }, + { + "epoch": 0.54, + "grad_norm": 0.7419582605361938, + "learning_rate": 9.023167253563717e-06, + "loss": 2.0686, + "step": 16199 + }, + { + "epoch": 0.54, + "grad_norm": 0.7607742547988892, + "learning_rate": 9.02210944937576e-06, + "loss": 2.1546, + "step": 16200 + }, + { + "epoch": 0.54, + "grad_norm": 0.7684308886528015, + "learning_rate": 9.021051656235435e-06, + "loss": 2.0936, + "step": 16201 + }, + { + "epoch": 0.54, + "grad_norm": 0.7703185081481934, + "learning_rate": 9.019993874154701e-06, + "loss": 2.0552, + "step": 16202 + }, + { + "epoch": 0.54, + "grad_norm": 0.7494840621948242, + "learning_rate": 9.018936103145499e-06, + "loss": 2.0515, + "step": 16203 + }, + { + "epoch": 0.54, + "grad_norm": 0.7157339453697205, + "learning_rate": 9.017878343219778e-06, + "loss": 2.0611, + "step": 16204 + }, + { + "epoch": 0.54, + "grad_norm": 0.7513536214828491, + "learning_rate": 9.016820594389495e-06, + "loss": 1.967, + "step": 16205 + }, + { + "epoch": 0.54, + "grad_norm": 0.7592445015907288, + "learning_rate": 9.015762856666599e-06, + "loss": 2.0943, + "step": 16206 + }, + { + "epoch": 0.54, + "grad_norm": 0.7481652498245239, + "learning_rate": 9.014705130063035e-06, + "loss": 2.0641, + "step": 16207 + }, + { + "epoch": 0.54, + "grad_norm": 0.7607617974281311, + "learning_rate": 9.013647414590755e-06, + "loss": 2.0, + "step": 16208 + }, + { + "epoch": 0.54, + "grad_norm": 0.7149009704589844, + "learning_rate": 9.012589710261706e-06, + "loss": 2.0335, + "step": 16209 + }, + { + "epoch": 0.54, + "grad_norm": 0.7259677648544312, + "learning_rate": 9.011532017087845e-06, + "loss": 2.0787, + "step": 16210 + }, + { + "epoch": 0.54, + "grad_norm": 0.7185119390487671, + "learning_rate": 9.010474335081115e-06, + "loss": 2.112, + "step": 16211 + }, + { + "epoch": 0.54, + "grad_norm": 0.7556877732276917, + "learning_rate": 9.009416664253462e-06, + "loss": 2.0783, + "step": 16212 + }, + { + "epoch": 0.54, + "grad_norm": 0.7617525458335876, + "learning_rate": 9.00835900461684e-06, + "loss": 2.1198, + "step": 16213 + }, + { + "epoch": 0.54, + "grad_norm": 0.7449831366539001, + "learning_rate": 9.0073013561832e-06, + "loss": 2.0579, + "step": 16214 + }, + { + "epoch": 0.54, + "grad_norm": 0.7478481531143188, + "learning_rate": 9.006243718964487e-06, + "loss": 1.9981, + "step": 16215 + }, + { + "epoch": 0.54, + "grad_norm": 0.7514544725418091, + "learning_rate": 9.005186092972647e-06, + "loss": 2.0855, + "step": 16216 + }, + { + "epoch": 0.54, + "grad_norm": 0.704127848148346, + "learning_rate": 9.00412847821963e-06, + "loss": 2.0071, + "step": 16217 + }, + { + "epoch": 0.54, + "grad_norm": 0.7376036643981934, + "learning_rate": 9.00307087471739e-06, + "loss": 2.0599, + "step": 16218 + }, + { + "epoch": 0.54, + "grad_norm": 0.7440323829650879, + "learning_rate": 9.002013282477873e-06, + "loss": 2.002, + "step": 16219 + }, + { + "epoch": 0.54, + "grad_norm": 0.7418403625488281, + "learning_rate": 9.000955701513022e-06, + "loss": 2.1034, + "step": 16220 + }, + { + "epoch": 0.54, + "grad_norm": 0.7720919251441956, + "learning_rate": 8.999898131834788e-06, + "loss": 2.1118, + "step": 16221 + }, + { + "epoch": 0.54, + "grad_norm": 0.7663979530334473, + "learning_rate": 8.998840573455119e-06, + "loss": 2.0998, + "step": 16222 + }, + { + "epoch": 0.54, + "grad_norm": 0.7163242101669312, + "learning_rate": 8.99778302638596e-06, + "loss": 2.1059, + "step": 16223 + }, + { + "epoch": 0.54, + "grad_norm": 0.7514809966087341, + "learning_rate": 8.996725490639267e-06, + "loss": 2.0792, + "step": 16224 + }, + { + "epoch": 0.54, + "grad_norm": 0.7812800407409668, + "learning_rate": 8.995667966226975e-06, + "loss": 2.0053, + "step": 16225 + }, + { + "epoch": 0.54, + "grad_norm": 0.7364043593406677, + "learning_rate": 8.99461045316104e-06, + "loss": 2.0753, + "step": 16226 + }, + { + "epoch": 0.54, + "grad_norm": 0.7410843372344971, + "learning_rate": 8.993552951453407e-06, + "loss": 2.0659, + "step": 16227 + }, + { + "epoch": 0.54, + "grad_norm": 0.732086181640625, + "learning_rate": 8.992495461116026e-06, + "loss": 2.0275, + "step": 16228 + }, + { + "epoch": 0.54, + "grad_norm": 0.7653730511665344, + "learning_rate": 8.991437982160838e-06, + "loss": 2.0618, + "step": 16229 + }, + { + "epoch": 0.54, + "grad_norm": 0.7546468377113342, + "learning_rate": 8.990380514599793e-06, + "loss": 2.0155, + "step": 16230 + }, + { + "epoch": 0.54, + "grad_norm": 0.787878692150116, + "learning_rate": 8.989323058444835e-06, + "loss": 2.0889, + "step": 16231 + }, + { + "epoch": 0.54, + "grad_norm": 0.7119103074073792, + "learning_rate": 8.988265613707914e-06, + "loss": 2.0806, + "step": 16232 + }, + { + "epoch": 0.54, + "grad_norm": 0.7557964324951172, + "learning_rate": 8.987208180400978e-06, + "loss": 2.1351, + "step": 16233 + }, + { + "epoch": 0.54, + "grad_norm": 0.763425886631012, + "learning_rate": 8.986150758535967e-06, + "loss": 2.0304, + "step": 16234 + }, + { + "epoch": 0.54, + "grad_norm": 0.7396345138549805, + "learning_rate": 8.985093348124833e-06, + "loss": 2.0696, + "step": 16235 + }, + { + "epoch": 0.54, + "grad_norm": 0.7193009257316589, + "learning_rate": 8.984035949179518e-06, + "loss": 2.0742, + "step": 16236 + }, + { + "epoch": 0.54, + "grad_norm": 0.7525489926338196, + "learning_rate": 8.982978561711973e-06, + "loss": 2.078, + "step": 16237 + }, + { + "epoch": 0.54, + "grad_norm": 0.7478075623512268, + "learning_rate": 8.981921185734136e-06, + "loss": 2.1097, + "step": 16238 + }, + { + "epoch": 0.54, + "grad_norm": 0.7698614001274109, + "learning_rate": 8.980863821257956e-06, + "loss": 2.076, + "step": 16239 + }, + { + "epoch": 0.54, + "grad_norm": 0.7490207552909851, + "learning_rate": 8.97980646829538e-06, + "loss": 2.0975, + "step": 16240 + }, + { + "epoch": 0.54, + "grad_norm": 0.709922730922699, + "learning_rate": 8.978749126858356e-06, + "loss": 1.9925, + "step": 16241 + }, + { + "epoch": 0.54, + "grad_norm": 0.7205783724784851, + "learning_rate": 8.977691796958823e-06, + "loss": 2.1036, + "step": 16242 + }, + { + "epoch": 0.54, + "grad_norm": 0.7257975935935974, + "learning_rate": 8.97663447860873e-06, + "loss": 2.0634, + "step": 16243 + }, + { + "epoch": 0.54, + "grad_norm": 0.7788102030754089, + "learning_rate": 8.97557717182002e-06, + "loss": 2.0918, + "step": 16244 + }, + { + "epoch": 0.54, + "grad_norm": 0.700707733631134, + "learning_rate": 8.974519876604637e-06, + "loss": 2.0723, + "step": 16245 + }, + { + "epoch": 0.54, + "grad_norm": 0.7587103247642517, + "learning_rate": 8.973462592974535e-06, + "loss": 2.0106, + "step": 16246 + }, + { + "epoch": 0.54, + "grad_norm": 0.7439965009689331, + "learning_rate": 8.972405320941644e-06, + "loss": 2.0749, + "step": 16247 + }, + { + "epoch": 0.54, + "grad_norm": 0.7697708606719971, + "learning_rate": 8.971348060517915e-06, + "loss": 2.1124, + "step": 16248 + }, + { + "epoch": 0.54, + "grad_norm": 0.7248289585113525, + "learning_rate": 8.970290811715296e-06, + "loss": 2.0574, + "step": 16249 + }, + { + "epoch": 0.54, + "grad_norm": 0.7576391696929932, + "learning_rate": 8.969233574545729e-06, + "loss": 2.0449, + "step": 16250 + }, + { + "epoch": 0.54, + "grad_norm": 0.7459337115287781, + "learning_rate": 8.968176349021153e-06, + "loss": 2.1233, + "step": 16251 + }, + { + "epoch": 0.54, + "grad_norm": 0.7211782932281494, + "learning_rate": 8.967119135153519e-06, + "loss": 2.0372, + "step": 16252 + }, + { + "epoch": 0.54, + "grad_norm": 0.7357760071754456, + "learning_rate": 8.966061932954762e-06, + "loss": 2.1153, + "step": 16253 + }, + { + "epoch": 0.54, + "grad_norm": 0.726985514163971, + "learning_rate": 8.965004742436837e-06, + "loss": 2.069, + "step": 16254 + }, + { + "epoch": 0.54, + "grad_norm": 0.7001355290412903, + "learning_rate": 8.963947563611682e-06, + "loss": 2.0795, + "step": 16255 + }, + { + "epoch": 0.54, + "grad_norm": 0.7488248348236084, + "learning_rate": 8.96289039649124e-06, + "loss": 2.0716, + "step": 16256 + }, + { + "epoch": 0.54, + "grad_norm": 0.7550780773162842, + "learning_rate": 8.961833241087452e-06, + "loss": 2.0653, + "step": 16257 + }, + { + "epoch": 0.54, + "grad_norm": 0.7502903938293457, + "learning_rate": 8.960776097412265e-06, + "loss": 2.1059, + "step": 16258 + }, + { + "epoch": 0.54, + "grad_norm": 0.7185998558998108, + "learning_rate": 8.959718965477622e-06, + "loss": 2.0884, + "step": 16259 + }, + { + "epoch": 0.54, + "grad_norm": 0.7165513038635254, + "learning_rate": 8.958661845295464e-06, + "loss": 2.032, + "step": 16260 + }, + { + "epoch": 0.54, + "grad_norm": 0.7208660244941711, + "learning_rate": 8.95760473687773e-06, + "loss": 2.0507, + "step": 16261 + }, + { + "epoch": 0.54, + "grad_norm": 0.7690922021865845, + "learning_rate": 8.95654764023637e-06, + "loss": 2.079, + "step": 16262 + }, + { + "epoch": 0.54, + "grad_norm": 0.7364292740821838, + "learning_rate": 8.955490555383325e-06, + "loss": 2.0876, + "step": 16263 + }, + { + "epoch": 0.54, + "grad_norm": 0.7338917255401611, + "learning_rate": 8.954433482330534e-06, + "loss": 2.027, + "step": 16264 + }, + { + "epoch": 0.54, + "grad_norm": 0.7267216444015503, + "learning_rate": 8.95337642108994e-06, + "loss": 2.1407, + "step": 16265 + }, + { + "epoch": 0.54, + "grad_norm": 0.7158786058425903, + "learning_rate": 8.952319371673486e-06, + "loss": 2.0967, + "step": 16266 + }, + { + "epoch": 0.54, + "grad_norm": 0.7620810866355896, + "learning_rate": 8.951262334093111e-06, + "loss": 2.0181, + "step": 16267 + }, + { + "epoch": 0.54, + "grad_norm": 0.7380293011665344, + "learning_rate": 8.950205308360767e-06, + "loss": 2.0822, + "step": 16268 + }, + { + "epoch": 0.54, + "grad_norm": 0.7258612513542175, + "learning_rate": 8.949148294488382e-06, + "loss": 2.1114, + "step": 16269 + }, + { + "epoch": 0.54, + "grad_norm": 0.7434398531913757, + "learning_rate": 8.948091292487905e-06, + "loss": 2.0492, + "step": 16270 + }, + { + "epoch": 0.54, + "grad_norm": 0.7498160004615784, + "learning_rate": 8.947034302371278e-06, + "loss": 2.0411, + "step": 16271 + }, + { + "epoch": 0.54, + "grad_norm": 0.75923752784729, + "learning_rate": 8.94597732415044e-06, + "loss": 2.0712, + "step": 16272 + }, + { + "epoch": 0.54, + "grad_norm": 0.7656378746032715, + "learning_rate": 8.944920357837332e-06, + "loss": 2.0592, + "step": 16273 + }, + { + "epoch": 0.54, + "grad_norm": 0.7707496881484985, + "learning_rate": 8.943863403443895e-06, + "loss": 2.0515, + "step": 16274 + }, + { + "epoch": 0.54, + "grad_norm": 0.7154850959777832, + "learning_rate": 8.942806460982066e-06, + "loss": 2.0965, + "step": 16275 + }, + { + "epoch": 0.54, + "grad_norm": 0.7067455649375916, + "learning_rate": 8.941749530463794e-06, + "loss": 2.0236, + "step": 16276 + }, + { + "epoch": 0.54, + "grad_norm": 0.7330570816993713, + "learning_rate": 8.940692611901018e-06, + "loss": 2.0634, + "step": 16277 + }, + { + "epoch": 0.54, + "grad_norm": 0.7584925889968872, + "learning_rate": 8.939635705305674e-06, + "loss": 2.0718, + "step": 16278 + }, + { + "epoch": 0.54, + "grad_norm": 0.7464617490768433, + "learning_rate": 8.938578810689704e-06, + "loss": 2.0745, + "step": 16279 + }, + { + "epoch": 0.54, + "grad_norm": 0.7350733876228333, + "learning_rate": 8.93752192806505e-06, + "loss": 2.0708, + "step": 16280 + }, + { + "epoch": 0.54, + "grad_norm": 0.7704419493675232, + "learning_rate": 8.936465057443651e-06, + "loss": 2.1003, + "step": 16281 + }, + { + "epoch": 0.54, + "grad_norm": 0.7061072587966919, + "learning_rate": 8.935408198837445e-06, + "loss": 1.9995, + "step": 16282 + }, + { + "epoch": 0.54, + "grad_norm": 0.7205577492713928, + "learning_rate": 8.93435135225837e-06, + "loss": 2.1138, + "step": 16283 + }, + { + "epoch": 0.54, + "grad_norm": 0.7163249850273132, + "learning_rate": 8.933294517718372e-06, + "loss": 2.0614, + "step": 16284 + }, + { + "epoch": 0.54, + "grad_norm": 0.7564570307731628, + "learning_rate": 8.932237695229388e-06, + "loss": 2.0772, + "step": 16285 + }, + { + "epoch": 0.54, + "grad_norm": 0.715981125831604, + "learning_rate": 8.931180884803358e-06, + "loss": 2.0685, + "step": 16286 + }, + { + "epoch": 0.54, + "grad_norm": 0.7391754984855652, + "learning_rate": 8.930124086452218e-06, + "loss": 2.0779, + "step": 16287 + }, + { + "epoch": 0.54, + "grad_norm": 0.7761326432228088, + "learning_rate": 8.929067300187908e-06, + "loss": 2.1213, + "step": 16288 + }, + { + "epoch": 0.54, + "grad_norm": 0.7507399916648865, + "learning_rate": 8.928010526022367e-06, + "loss": 2.1064, + "step": 16289 + }, + { + "epoch": 0.54, + "grad_norm": 0.7556437253952026, + "learning_rate": 8.926953763967542e-06, + "loss": 2.0909, + "step": 16290 + }, + { + "epoch": 0.54, + "grad_norm": 0.7838396430015564, + "learning_rate": 8.925897014035357e-06, + "loss": 2.0516, + "step": 16291 + }, + { + "epoch": 0.54, + "grad_norm": 0.7268619537353516, + "learning_rate": 8.92484027623776e-06, + "loss": 2.0941, + "step": 16292 + }, + { + "epoch": 0.54, + "grad_norm": 0.7410727739334106, + "learning_rate": 8.923783550586687e-06, + "loss": 2.0805, + "step": 16293 + }, + { + "epoch": 0.54, + "grad_norm": 0.753060519695282, + "learning_rate": 8.92272683709408e-06, + "loss": 2.0235, + "step": 16294 + }, + { + "epoch": 0.54, + "grad_norm": 0.7532007098197937, + "learning_rate": 8.921670135771873e-06, + "loss": 2.0638, + "step": 16295 + }, + { + "epoch": 0.54, + "grad_norm": 0.7364959120750427, + "learning_rate": 8.920613446632002e-06, + "loss": 2.1032, + "step": 16296 + }, + { + "epoch": 0.54, + "grad_norm": 0.7531649470329285, + "learning_rate": 8.919556769686408e-06, + "loss": 2.0525, + "step": 16297 + }, + { + "epoch": 0.54, + "grad_norm": 0.7300714254379272, + "learning_rate": 8.91850010494703e-06, + "loss": 2.0844, + "step": 16298 + }, + { + "epoch": 0.54, + "grad_norm": 0.7341073155403137, + "learning_rate": 8.917443452425804e-06, + "loss": 2.0725, + "step": 16299 + }, + { + "epoch": 0.54, + "grad_norm": 0.7265954613685608, + "learning_rate": 8.916386812134668e-06, + "loss": 2.0515, + "step": 16300 + }, + { + "epoch": 0.54, + "grad_norm": 0.7453778386116028, + "learning_rate": 8.915330184085556e-06, + "loss": 2.0457, + "step": 16301 + }, + { + "epoch": 0.54, + "grad_norm": 0.760808527469635, + "learning_rate": 8.914273568290408e-06, + "loss": 2.0789, + "step": 16302 + }, + { + "epoch": 0.54, + "grad_norm": 0.7292805910110474, + "learning_rate": 8.913216964761167e-06, + "loss": 2.0363, + "step": 16303 + }, + { + "epoch": 0.54, + "grad_norm": 0.7843396067619324, + "learning_rate": 8.912160373509759e-06, + "loss": 2.0192, + "step": 16304 + }, + { + "epoch": 0.54, + "grad_norm": 0.7307671904563904, + "learning_rate": 8.911103794548124e-06, + "loss": 2.0508, + "step": 16305 + }, + { + "epoch": 0.54, + "grad_norm": 0.7470925450325012, + "learning_rate": 8.910047227888202e-06, + "loss": 2.148, + "step": 16306 + }, + { + "epoch": 0.54, + "grad_norm": 0.7362359762191772, + "learning_rate": 8.908990673541928e-06, + "loss": 2.0578, + "step": 16307 + }, + { + "epoch": 0.54, + "grad_norm": 0.7493798732757568, + "learning_rate": 8.907934131521238e-06, + "loss": 2.0248, + "step": 16308 + }, + { + "epoch": 0.54, + "grad_norm": 0.7416777014732361, + "learning_rate": 8.90687760183807e-06, + "loss": 2.0812, + "step": 16309 + }, + { + "epoch": 0.54, + "grad_norm": 0.7177509069442749, + "learning_rate": 8.905821084504353e-06, + "loss": 2.1011, + "step": 16310 + }, + { + "epoch": 0.54, + "grad_norm": 0.7297325730323792, + "learning_rate": 8.904764579532031e-06, + "loss": 1.9583, + "step": 16311 + }, + { + "epoch": 0.54, + "grad_norm": 0.7191773653030396, + "learning_rate": 8.90370808693304e-06, + "loss": 2.0743, + "step": 16312 + }, + { + "epoch": 0.54, + "grad_norm": 0.7441610097885132, + "learning_rate": 8.902651606719308e-06, + "loss": 2.0935, + "step": 16313 + }, + { + "epoch": 0.54, + "grad_norm": 0.7911031246185303, + "learning_rate": 8.901595138902777e-06, + "loss": 2.1091, + "step": 16314 + }, + { + "epoch": 0.54, + "grad_norm": 0.7798601984977722, + "learning_rate": 8.90053868349538e-06, + "loss": 2.093, + "step": 16315 + }, + { + "epoch": 0.54, + "grad_norm": 0.7286093235015869, + "learning_rate": 8.89948224050905e-06, + "loss": 2.0557, + "step": 16316 + }, + { + "epoch": 0.54, + "grad_norm": 0.7270519137382507, + "learning_rate": 8.898425809955731e-06, + "loss": 2.0851, + "step": 16317 + }, + { + "epoch": 0.54, + "grad_norm": 0.731543779373169, + "learning_rate": 8.897369391847347e-06, + "loss": 2.1046, + "step": 16318 + }, + { + "epoch": 0.54, + "grad_norm": 0.7244449853897095, + "learning_rate": 8.896312986195837e-06, + "loss": 2.0213, + "step": 16319 + }, + { + "epoch": 0.54, + "grad_norm": 0.7579661011695862, + "learning_rate": 8.895256593013139e-06, + "loss": 2.1528, + "step": 16320 + }, + { + "epoch": 0.54, + "grad_norm": 0.7737211585044861, + "learning_rate": 8.894200212311185e-06, + "loss": 2.033, + "step": 16321 + }, + { + "epoch": 0.54, + "grad_norm": 0.7177257537841797, + "learning_rate": 8.893143844101906e-06, + "loss": 2.0686, + "step": 16322 + }, + { + "epoch": 0.54, + "grad_norm": 0.7622888088226318, + "learning_rate": 8.892087488397241e-06, + "loss": 2.0876, + "step": 16323 + }, + { + "epoch": 0.54, + "grad_norm": 0.7203890085220337, + "learning_rate": 8.891031145209118e-06, + "loss": 2.0547, + "step": 16324 + }, + { + "epoch": 0.54, + "grad_norm": 0.7218117117881775, + "learning_rate": 8.889974814549483e-06, + "loss": 1.9708, + "step": 16325 + }, + { + "epoch": 0.54, + "grad_norm": 0.7412674427032471, + "learning_rate": 8.888918496430255e-06, + "loss": 2.1429, + "step": 16326 + }, + { + "epoch": 0.54, + "grad_norm": 0.7095749974250793, + "learning_rate": 8.887862190863378e-06, + "loss": 2.0996, + "step": 16327 + }, + { + "epoch": 0.54, + "grad_norm": 0.7347303032875061, + "learning_rate": 8.88680589786078e-06, + "loss": 2.0658, + "step": 16328 + }, + { + "epoch": 0.54, + "grad_norm": 0.7229824066162109, + "learning_rate": 8.885749617434399e-06, + "loss": 2.0856, + "step": 16329 + }, + { + "epoch": 0.54, + "grad_norm": 0.7854555249214172, + "learning_rate": 8.884693349596168e-06, + "loss": 1.9825, + "step": 16330 + }, + { + "epoch": 0.54, + "grad_norm": 0.7309315800666809, + "learning_rate": 8.883637094358014e-06, + "loss": 2.1037, + "step": 16331 + }, + { + "epoch": 0.54, + "grad_norm": 0.750438392162323, + "learning_rate": 8.882580851731872e-06, + "loss": 2.1121, + "step": 16332 + }, + { + "epoch": 0.54, + "grad_norm": 0.7555505633354187, + "learning_rate": 8.88152462172968e-06, + "loss": 2.0725, + "step": 16333 + }, + { + "epoch": 0.54, + "grad_norm": 0.7532895803451538, + "learning_rate": 8.880468404363368e-06, + "loss": 2.0532, + "step": 16334 + }, + { + "epoch": 0.54, + "grad_norm": 0.7346996068954468, + "learning_rate": 8.879412199644866e-06, + "loss": 2.0612, + "step": 16335 + }, + { + "epoch": 0.54, + "grad_norm": 0.7353106141090393, + "learning_rate": 8.87835600758611e-06, + "loss": 2.0661, + "step": 16336 + }, + { + "epoch": 0.54, + "grad_norm": 0.7091019749641418, + "learning_rate": 8.877299828199027e-06, + "loss": 2.0991, + "step": 16337 + }, + { + "epoch": 0.54, + "grad_norm": 0.7403883934020996, + "learning_rate": 8.876243661495553e-06, + "loss": 2.0793, + "step": 16338 + }, + { + "epoch": 0.54, + "grad_norm": 0.7267485857009888, + "learning_rate": 8.875187507487626e-06, + "loss": 2.0315, + "step": 16339 + }, + { + "epoch": 0.54, + "grad_norm": 0.735567569732666, + "learning_rate": 8.874131366187165e-06, + "loss": 2.0554, + "step": 16340 + }, + { + "epoch": 0.54, + "grad_norm": 0.7663167715072632, + "learning_rate": 8.873075237606108e-06, + "loss": 2.0465, + "step": 16341 + }, + { + "epoch": 0.54, + "grad_norm": 0.7100227475166321, + "learning_rate": 8.872019121756388e-06, + "loss": 2.1102, + "step": 16342 + }, + { + "epoch": 0.54, + "grad_norm": 0.7401384711265564, + "learning_rate": 8.870963018649937e-06, + "loss": 2.0062, + "step": 16343 + }, + { + "epoch": 0.54, + "grad_norm": 0.7168064117431641, + "learning_rate": 8.869906928298683e-06, + "loss": 2.0104, + "step": 16344 + }, + { + "epoch": 0.54, + "grad_norm": 0.7407863736152649, + "learning_rate": 8.868850850714556e-06, + "loss": 2.0572, + "step": 16345 + }, + { + "epoch": 0.54, + "grad_norm": 0.7458263635635376, + "learning_rate": 8.86779478590949e-06, + "loss": 2.0737, + "step": 16346 + }, + { + "epoch": 0.54, + "grad_norm": 0.7497252821922302, + "learning_rate": 8.866738733895415e-06, + "loss": 2.0545, + "step": 16347 + }, + { + "epoch": 0.54, + "grad_norm": 0.7619100213050842, + "learning_rate": 8.865682694684263e-06, + "loss": 2.0858, + "step": 16348 + }, + { + "epoch": 0.54, + "grad_norm": 0.7518855929374695, + "learning_rate": 8.864626668287963e-06, + "loss": 2.0811, + "step": 16349 + }, + { + "epoch": 0.54, + "grad_norm": 0.7021903395652771, + "learning_rate": 8.863570654718444e-06, + "loss": 2.0255, + "step": 16350 + }, + { + "epoch": 0.54, + "grad_norm": 0.7539703845977783, + "learning_rate": 8.862514653987639e-06, + "loss": 1.997, + "step": 16351 + }, + { + "epoch": 0.54, + "grad_norm": 0.7241875529289246, + "learning_rate": 8.861458666107479e-06, + "loss": 2.0855, + "step": 16352 + }, + { + "epoch": 0.54, + "grad_norm": 0.7283573746681213, + "learning_rate": 8.860402691089888e-06, + "loss": 2.0164, + "step": 16353 + }, + { + "epoch": 0.54, + "grad_norm": 0.7650176286697388, + "learning_rate": 8.859346728946797e-06, + "loss": 2.0927, + "step": 16354 + }, + { + "epoch": 0.54, + "grad_norm": 0.7596832513809204, + "learning_rate": 8.858290779690142e-06, + "loss": 2.1711, + "step": 16355 + }, + { + "epoch": 0.54, + "grad_norm": 0.7846308350563049, + "learning_rate": 8.857234843331852e-06, + "loss": 2.0723, + "step": 16356 + }, + { + "epoch": 0.54, + "grad_norm": 0.7709128260612488, + "learning_rate": 8.856178919883849e-06, + "loss": 2.0588, + "step": 16357 + }, + { + "epoch": 0.54, + "grad_norm": 0.7674776315689087, + "learning_rate": 8.855123009358066e-06, + "loss": 2.1107, + "step": 16358 + }, + { + "epoch": 0.54, + "grad_norm": 0.734981894493103, + "learning_rate": 8.854067111766433e-06, + "loss": 2.0562, + "step": 16359 + }, + { + "epoch": 0.54, + "grad_norm": 0.7432228326797485, + "learning_rate": 8.853011227120875e-06, + "loss": 2.0074, + "step": 16360 + }, + { + "epoch": 0.54, + "grad_norm": 0.7195361256599426, + "learning_rate": 8.851955355433332e-06, + "loss": 2.1244, + "step": 16361 + }, + { + "epoch": 0.54, + "grad_norm": 0.7259693741798401, + "learning_rate": 8.850899496715718e-06, + "loss": 2.0712, + "step": 16362 + }, + { + "epoch": 0.54, + "grad_norm": 0.7404330968856812, + "learning_rate": 8.84984365097997e-06, + "loss": 2.0101, + "step": 16363 + }, + { + "epoch": 0.54, + "grad_norm": 0.7575764060020447, + "learning_rate": 8.848787818238013e-06, + "loss": 2.0347, + "step": 16364 + }, + { + "epoch": 0.54, + "grad_norm": 0.7332408428192139, + "learning_rate": 8.847731998501781e-06, + "loss": 2.0897, + "step": 16365 + }, + { + "epoch": 0.54, + "grad_norm": 0.7336429953575134, + "learning_rate": 8.846676191783193e-06, + "loss": 2.1056, + "step": 16366 + }, + { + "epoch": 0.54, + "grad_norm": 0.7381898164749146, + "learning_rate": 8.845620398094184e-06, + "loss": 2.0875, + "step": 16367 + }, + { + "epoch": 0.54, + "grad_norm": 0.7727584838867188, + "learning_rate": 8.844564617446674e-06, + "loss": 2.0834, + "step": 16368 + }, + { + "epoch": 0.54, + "grad_norm": 0.7485928535461426, + "learning_rate": 8.8435088498526e-06, + "loss": 2.0019, + "step": 16369 + }, + { + "epoch": 0.54, + "grad_norm": 0.753943920135498, + "learning_rate": 8.842453095323887e-06, + "loss": 2.0938, + "step": 16370 + }, + { + "epoch": 0.54, + "grad_norm": 0.7791716456413269, + "learning_rate": 8.841397353872459e-06, + "loss": 2.0941, + "step": 16371 + }, + { + "epoch": 0.54, + "grad_norm": 0.7774559259414673, + "learning_rate": 8.840341625510243e-06, + "loss": 2.0225, + "step": 16372 + }, + { + "epoch": 0.54, + "grad_norm": 0.7509011030197144, + "learning_rate": 8.839285910249168e-06, + "loss": 2.0519, + "step": 16373 + }, + { + "epoch": 0.54, + "grad_norm": 0.76008141040802, + "learning_rate": 8.838230208101165e-06, + "loss": 2.0619, + "step": 16374 + }, + { + "epoch": 0.54, + "grad_norm": 0.7372307181358337, + "learning_rate": 8.837174519078153e-06, + "loss": 2.048, + "step": 16375 + }, + { + "epoch": 0.54, + "grad_norm": 0.7336348295211792, + "learning_rate": 8.836118843192059e-06, + "loss": 2.0739, + "step": 16376 + }, + { + "epoch": 0.54, + "grad_norm": 0.731756865978241, + "learning_rate": 8.835063180454815e-06, + "loss": 2.0933, + "step": 16377 + }, + { + "epoch": 0.54, + "grad_norm": 0.726236879825592, + "learning_rate": 8.834007530878343e-06, + "loss": 2.1016, + "step": 16378 + }, + { + "epoch": 0.54, + "grad_norm": 0.7615071535110474, + "learning_rate": 8.832951894474574e-06, + "loss": 1.9864, + "step": 16379 + }, + { + "epoch": 0.54, + "grad_norm": 0.7209869623184204, + "learning_rate": 8.831896271255429e-06, + "loss": 2.1012, + "step": 16380 + }, + { + "epoch": 0.54, + "grad_norm": 0.7409300208091736, + "learning_rate": 8.830840661232833e-06, + "loss": 2.0608, + "step": 16381 + }, + { + "epoch": 0.55, + "grad_norm": 0.7446460127830505, + "learning_rate": 8.829785064418713e-06, + "loss": 2.0783, + "step": 16382 + }, + { + "epoch": 0.55, + "grad_norm": 0.7276361584663391, + "learning_rate": 8.828729480825003e-06, + "loss": 2.0929, + "step": 16383 + }, + { + "epoch": 0.55, + "grad_norm": 0.7155804634094238, + "learning_rate": 8.827673910463614e-06, + "loss": 1.9797, + "step": 16384 + }, + { + "epoch": 0.55, + "grad_norm": 0.7350172400474548, + "learning_rate": 8.82661835334648e-06, + "loss": 2.0749, + "step": 16385 + }, + { + "epoch": 0.55, + "grad_norm": 0.7399376630783081, + "learning_rate": 8.825562809485523e-06, + "loss": 2.0603, + "step": 16386 + }, + { + "epoch": 0.55, + "grad_norm": 0.7186457514762878, + "learning_rate": 8.824507278892673e-06, + "loss": 2.0803, + "step": 16387 + }, + { + "epoch": 0.55, + "grad_norm": 0.7316640019416809, + "learning_rate": 8.82345176157985e-06, + "loss": 2.0311, + "step": 16388 + }, + { + "epoch": 0.55, + "grad_norm": 0.7412613034248352, + "learning_rate": 8.822396257558976e-06, + "loss": 2.1119, + "step": 16389 + }, + { + "epoch": 0.55, + "grad_norm": 0.7315983772277832, + "learning_rate": 8.821340766841979e-06, + "loss": 2.0947, + "step": 16390 + }, + { + "epoch": 0.55, + "grad_norm": 0.7742661237716675, + "learning_rate": 8.820285289440784e-06, + "loss": 2.0993, + "step": 16391 + }, + { + "epoch": 0.55, + "grad_norm": 0.7630462050437927, + "learning_rate": 8.819229825367318e-06, + "loss": 2.1406, + "step": 16392 + }, + { + "epoch": 0.55, + "grad_norm": 0.9673565626144409, + "learning_rate": 8.8181743746335e-06, + "loss": 2.0757, + "step": 16393 + }, + { + "epoch": 0.55, + "grad_norm": 0.7499637007713318, + "learning_rate": 8.817118937251253e-06, + "loss": 2.1382, + "step": 16394 + }, + { + "epoch": 0.55, + "grad_norm": 0.7543171644210815, + "learning_rate": 8.816063513232502e-06, + "loss": 2.01, + "step": 16395 + }, + { + "epoch": 0.55, + "grad_norm": 0.7438796758651733, + "learning_rate": 8.815008102589178e-06, + "loss": 2.0917, + "step": 16396 + }, + { + "epoch": 0.55, + "grad_norm": 0.7333088517189026, + "learning_rate": 8.813952705333193e-06, + "loss": 2.1347, + "step": 16397 + }, + { + "epoch": 0.55, + "grad_norm": 0.7183871865272522, + "learning_rate": 8.812897321476473e-06, + "loss": 2.0638, + "step": 16398 + }, + { + "epoch": 0.55, + "grad_norm": 0.7447047829627991, + "learning_rate": 8.811841951030946e-06, + "loss": 2.0168, + "step": 16399 + }, + { + "epoch": 0.55, + "grad_norm": 0.7352822422981262, + "learning_rate": 8.810786594008533e-06, + "loss": 1.9715, + "step": 16400 + }, + { + "epoch": 0.55, + "grad_norm": 0.7336325645446777, + "learning_rate": 8.809731250421157e-06, + "loss": 2.0754, + "step": 16401 + }, + { + "epoch": 0.55, + "grad_norm": 0.727049708366394, + "learning_rate": 8.808675920280739e-06, + "loss": 2.1251, + "step": 16402 + }, + { + "epoch": 0.55, + "grad_norm": 0.7498889565467834, + "learning_rate": 8.8076206035992e-06, + "loss": 2.0328, + "step": 16403 + }, + { + "epoch": 0.55, + "grad_norm": 0.7203955054283142, + "learning_rate": 8.806565300388465e-06, + "loss": 1.9741, + "step": 16404 + }, + { + "epoch": 0.55, + "grad_norm": 0.7142823934555054, + "learning_rate": 8.805510010660462e-06, + "loss": 2.0366, + "step": 16405 + }, + { + "epoch": 0.55, + "grad_norm": 0.7268344163894653, + "learning_rate": 8.804454734427099e-06, + "loss": 2.0161, + "step": 16406 + }, + { + "epoch": 0.55, + "grad_norm": 0.7361729741096497, + "learning_rate": 8.803399471700309e-06, + "loss": 2.0878, + "step": 16407 + }, + { + "epoch": 0.55, + "grad_norm": 0.7373544573783875, + "learning_rate": 8.80234422249201e-06, + "loss": 2.1055, + "step": 16408 + }, + { + "epoch": 0.55, + "grad_norm": 0.762712836265564, + "learning_rate": 8.801288986814123e-06, + "loss": 2.0677, + "step": 16409 + }, + { + "epoch": 0.55, + "grad_norm": 0.7404637932777405, + "learning_rate": 8.800233764678574e-06, + "loss": 1.9704, + "step": 16410 + }, + { + "epoch": 0.55, + "grad_norm": 0.721305787563324, + "learning_rate": 8.799178556097278e-06, + "loss": 2.0269, + "step": 16411 + }, + { + "epoch": 0.55, + "grad_norm": 0.7558053731918335, + "learning_rate": 8.798123361082159e-06, + "loss": 2.0937, + "step": 16412 + }, + { + "epoch": 0.55, + "grad_norm": 0.7867981195449829, + "learning_rate": 8.797068179645139e-06, + "loss": 2.1392, + "step": 16413 + }, + { + "epoch": 0.55, + "grad_norm": 0.7282512187957764, + "learning_rate": 8.79601301179814e-06, + "loss": 2.0511, + "step": 16414 + }, + { + "epoch": 0.55, + "grad_norm": 0.7018710374832153, + "learning_rate": 8.794957857553076e-06, + "loss": 2.0441, + "step": 16415 + }, + { + "epoch": 0.55, + "grad_norm": 0.7805810570716858, + "learning_rate": 8.793902716921874e-06, + "loss": 2.0117, + "step": 16416 + }, + { + "epoch": 0.55, + "grad_norm": 0.757597804069519, + "learning_rate": 8.792847589916449e-06, + "loss": 2.0694, + "step": 16417 + }, + { + "epoch": 0.55, + "grad_norm": 0.7313772439956665, + "learning_rate": 8.791792476548731e-06, + "loss": 2.0916, + "step": 16418 + }, + { + "epoch": 0.55, + "grad_norm": 0.7542209625244141, + "learning_rate": 8.790737376830628e-06, + "loss": 2.0449, + "step": 16419 + }, + { + "epoch": 0.55, + "grad_norm": 0.7357569932937622, + "learning_rate": 8.789682290774067e-06, + "loss": 2.0325, + "step": 16420 + }, + { + "epoch": 0.55, + "grad_norm": 0.749394953250885, + "learning_rate": 8.788627218390965e-06, + "loss": 2.053, + "step": 16421 + }, + { + "epoch": 0.55, + "grad_norm": 0.7415332794189453, + "learning_rate": 8.787572159693245e-06, + "loss": 2.0296, + "step": 16422 + }, + { + "epoch": 0.55, + "grad_norm": 0.7371897101402283, + "learning_rate": 8.786517114692826e-06, + "loss": 2.0258, + "step": 16423 + }, + { + "epoch": 0.55, + "grad_norm": 0.7630943655967712, + "learning_rate": 8.785462083401622e-06, + "loss": 2.1062, + "step": 16424 + }, + { + "epoch": 0.55, + "grad_norm": 0.7523031830787659, + "learning_rate": 8.784407065831554e-06, + "loss": 2.1208, + "step": 16425 + }, + { + "epoch": 0.55, + "grad_norm": 0.7253754734992981, + "learning_rate": 8.783352061994545e-06, + "loss": 2.0302, + "step": 16426 + }, + { + "epoch": 0.55, + "grad_norm": 0.7486311197280884, + "learning_rate": 8.782297071902512e-06, + "loss": 2.1541, + "step": 16427 + }, + { + "epoch": 0.55, + "grad_norm": 0.7621978521347046, + "learning_rate": 8.781242095567373e-06, + "loss": 2.1069, + "step": 16428 + }, + { + "epoch": 0.55, + "grad_norm": 0.7580153346061707, + "learning_rate": 8.780187133001046e-06, + "loss": 2.085, + "step": 16429 + }, + { + "epoch": 0.55, + "grad_norm": 0.7255485653877258, + "learning_rate": 8.779132184215449e-06, + "loss": 1.9676, + "step": 16430 + }, + { + "epoch": 0.55, + "grad_norm": 0.7207717895507812, + "learning_rate": 8.7780772492225e-06, + "loss": 2.064, + "step": 16431 + }, + { + "epoch": 0.55, + "grad_norm": 0.73396235704422, + "learning_rate": 8.777022328034123e-06, + "loss": 2.1004, + "step": 16432 + }, + { + "epoch": 0.55, + "grad_norm": 0.7451662421226501, + "learning_rate": 8.775967420662227e-06, + "loss": 2.1047, + "step": 16433 + }, + { + "epoch": 0.55, + "grad_norm": 0.7580092549324036, + "learning_rate": 8.774912527118734e-06, + "loss": 2.0853, + "step": 16434 + }, + { + "epoch": 0.55, + "grad_norm": 0.7154918909072876, + "learning_rate": 8.773857647415562e-06, + "loss": 2.0127, + "step": 16435 + }, + { + "epoch": 0.55, + "grad_norm": 0.759900689125061, + "learning_rate": 8.77280278156463e-06, + "loss": 2.0175, + "step": 16436 + }, + { + "epoch": 0.55, + "grad_norm": 0.7003214955329895, + "learning_rate": 8.77174792957785e-06, + "loss": 2.0018, + "step": 16437 + }, + { + "epoch": 0.55, + "grad_norm": 0.7213950753211975, + "learning_rate": 8.770693091467142e-06, + "loss": 2.0408, + "step": 16438 + }, + { + "epoch": 0.55, + "grad_norm": 0.7741171717643738, + "learning_rate": 8.769638267244423e-06, + "loss": 2.0574, + "step": 16439 + }, + { + "epoch": 0.55, + "grad_norm": 0.7192155718803406, + "learning_rate": 8.768583456921613e-06, + "loss": 2.0653, + "step": 16440 + }, + { + "epoch": 0.55, + "grad_norm": 0.7209720611572266, + "learning_rate": 8.76752866051062e-06, + "loss": 1.9983, + "step": 16441 + }, + { + "epoch": 0.55, + "grad_norm": 0.7691717147827148, + "learning_rate": 8.76647387802337e-06, + "loss": 2.1055, + "step": 16442 + }, + { + "epoch": 0.55, + "grad_norm": 0.7311484217643738, + "learning_rate": 8.765419109471773e-06, + "loss": 2.0706, + "step": 16443 + }, + { + "epoch": 0.55, + "grad_norm": 0.7514926195144653, + "learning_rate": 8.76436435486775e-06, + "loss": 2.032, + "step": 16444 + }, + { + "epoch": 0.55, + "grad_norm": 0.7472614645957947, + "learning_rate": 8.763309614223215e-06, + "loss": 1.9625, + "step": 16445 + }, + { + "epoch": 0.55, + "grad_norm": 0.7446077466011047, + "learning_rate": 8.76225488755008e-06, + "loss": 2.0227, + "step": 16446 + }, + { + "epoch": 0.55, + "grad_norm": 0.7139190435409546, + "learning_rate": 8.761200174860262e-06, + "loss": 2.0222, + "step": 16447 + }, + { + "epoch": 0.55, + "grad_norm": 0.7562984228134155, + "learning_rate": 8.760145476165681e-06, + "loss": 2.0708, + "step": 16448 + }, + { + "epoch": 0.55, + "grad_norm": 0.7522115111351013, + "learning_rate": 8.759090791478253e-06, + "loss": 2.0889, + "step": 16449 + }, + { + "epoch": 0.55, + "grad_norm": 0.7712689638137817, + "learning_rate": 8.758036120809887e-06, + "loss": 2.0916, + "step": 16450 + }, + { + "epoch": 0.55, + "grad_norm": 0.7339121699333191, + "learning_rate": 8.7569814641725e-06, + "loss": 2.1025, + "step": 16451 + }, + { + "epoch": 0.55, + "grad_norm": 0.7472110986709595, + "learning_rate": 8.75592682157801e-06, + "loss": 2.0213, + "step": 16452 + }, + { + "epoch": 0.55, + "grad_norm": 0.7393128275871277, + "learning_rate": 8.754872193038326e-06, + "loss": 2.0774, + "step": 16453 + }, + { + "epoch": 0.55, + "grad_norm": 0.7281447649002075, + "learning_rate": 8.753817578565372e-06, + "loss": 2.0659, + "step": 16454 + }, + { + "epoch": 0.55, + "grad_norm": 0.7336798310279846, + "learning_rate": 8.752762978171052e-06, + "loss": 2.0236, + "step": 16455 + }, + { + "epoch": 0.55, + "grad_norm": 0.7374682426452637, + "learning_rate": 8.751708391867286e-06, + "loss": 2.0149, + "step": 16456 + }, + { + "epoch": 0.55, + "grad_norm": 0.8017622828483582, + "learning_rate": 8.750653819665987e-06, + "loss": 2.04, + "step": 16457 + }, + { + "epoch": 0.55, + "grad_norm": 0.7338774800300598, + "learning_rate": 8.749599261579072e-06, + "loss": 2.1439, + "step": 16458 + }, + { + "epoch": 0.55, + "grad_norm": 0.7582857608795166, + "learning_rate": 8.748544717618449e-06, + "loss": 2.1613, + "step": 16459 + }, + { + "epoch": 0.55, + "grad_norm": 0.7346146702766418, + "learning_rate": 8.747490187796035e-06, + "loss": 2.0802, + "step": 16460 + }, + { + "epoch": 0.55, + "grad_norm": 0.7700697183609009, + "learning_rate": 8.74643567212374e-06, + "loss": 2.0834, + "step": 16461 + }, + { + "epoch": 0.55, + "grad_norm": 0.7341357469558716, + "learning_rate": 8.745381170613483e-06, + "loss": 2.0131, + "step": 16462 + }, + { + "epoch": 0.55, + "grad_norm": 0.7856462597846985, + "learning_rate": 8.744326683277175e-06, + "loss": 2.0972, + "step": 16463 + }, + { + "epoch": 0.55, + "grad_norm": 0.7200462818145752, + "learning_rate": 8.743272210126728e-06, + "loss": 2.0518, + "step": 16464 + }, + { + "epoch": 0.55, + "grad_norm": 0.7624671459197998, + "learning_rate": 8.742217751174053e-06, + "loss": 2.1662, + "step": 16465 + }, + { + "epoch": 0.55, + "grad_norm": 0.7486549019813538, + "learning_rate": 8.741163306431066e-06, + "loss": 2.0891, + "step": 16466 + }, + { + "epoch": 0.55, + "grad_norm": 0.7333300113677979, + "learning_rate": 8.74010887590968e-06, + "loss": 2.106, + "step": 16467 + }, + { + "epoch": 0.55, + "grad_norm": 0.7472153306007385, + "learning_rate": 8.739054459621803e-06, + "loss": 2.1254, + "step": 16468 + }, + { + "epoch": 0.55, + "grad_norm": 0.7572233080863953, + "learning_rate": 8.738000057579348e-06, + "loss": 2.0237, + "step": 16469 + }, + { + "epoch": 0.55, + "grad_norm": 0.7770799398422241, + "learning_rate": 8.736945669794231e-06, + "loss": 2.1385, + "step": 16470 + }, + { + "epoch": 0.55, + "grad_norm": 0.7321068048477173, + "learning_rate": 8.735891296278363e-06, + "loss": 2.084, + "step": 16471 + }, + { + "epoch": 0.55, + "grad_norm": 0.7369509339332581, + "learning_rate": 8.734836937043654e-06, + "loss": 2.0601, + "step": 16472 + }, + { + "epoch": 0.55, + "grad_norm": 0.7463334798812866, + "learning_rate": 8.733782592102014e-06, + "loss": 2.0082, + "step": 16473 + }, + { + "epoch": 0.55, + "grad_norm": 0.7468870282173157, + "learning_rate": 8.732728261465356e-06, + "loss": 2.0951, + "step": 16474 + }, + { + "epoch": 0.55, + "grad_norm": 0.7458130717277527, + "learning_rate": 8.731673945145591e-06, + "loss": 2.0892, + "step": 16475 + }, + { + "epoch": 0.55, + "grad_norm": 0.7328843474388123, + "learning_rate": 8.730619643154637e-06, + "loss": 1.9699, + "step": 16476 + }, + { + "epoch": 0.55, + "grad_norm": 0.7309742569923401, + "learning_rate": 8.72956535550439e-06, + "loss": 2.1138, + "step": 16477 + }, + { + "epoch": 0.55, + "grad_norm": 0.761681854724884, + "learning_rate": 8.728511082206771e-06, + "loss": 2.0866, + "step": 16478 + }, + { + "epoch": 0.55, + "grad_norm": 0.7546001076698303, + "learning_rate": 8.72745682327369e-06, + "loss": 2.0924, + "step": 16479 + }, + { + "epoch": 0.55, + "grad_norm": 0.7201979756355286, + "learning_rate": 8.726402578717057e-06, + "loss": 2.068, + "step": 16480 + }, + { + "epoch": 0.55, + "grad_norm": 0.7547780871391296, + "learning_rate": 8.725348348548781e-06, + "loss": 2.0795, + "step": 16481 + }, + { + "epoch": 0.55, + "grad_norm": 0.7751436233520508, + "learning_rate": 8.724294132780772e-06, + "loss": 2.0831, + "step": 16482 + }, + { + "epoch": 0.55, + "grad_norm": 0.7417458295822144, + "learning_rate": 8.723239931424937e-06, + "loss": 2.0504, + "step": 16483 + }, + { + "epoch": 0.55, + "grad_norm": 0.7622350454330444, + "learning_rate": 8.722185744493192e-06, + "loss": 2.031, + "step": 16484 + }, + { + "epoch": 0.55, + "grad_norm": 0.758143424987793, + "learning_rate": 8.721131571997446e-06, + "loss": 2.0416, + "step": 16485 + }, + { + "epoch": 0.55, + "grad_norm": 0.7258217334747314, + "learning_rate": 8.720077413949601e-06, + "loss": 2.1094, + "step": 16486 + }, + { + "epoch": 0.55, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.719023270361575e-06, + "loss": 2.1265, + "step": 16487 + }, + { + "epoch": 0.55, + "grad_norm": 0.7318562865257263, + "learning_rate": 8.717969141245272e-06, + "loss": 2.0622, + "step": 16488 + }, + { + "epoch": 0.55, + "grad_norm": 0.7662108540534973, + "learning_rate": 8.716915026612603e-06, + "loss": 2.0706, + "step": 16489 + }, + { + "epoch": 0.55, + "grad_norm": 0.7394196391105652, + "learning_rate": 8.715860926475476e-06, + "loss": 2.0144, + "step": 16490 + }, + { + "epoch": 0.55, + "grad_norm": 0.7161291241645813, + "learning_rate": 8.714806840845797e-06, + "loss": 2.0527, + "step": 16491 + }, + { + "epoch": 0.55, + "grad_norm": 0.7171561121940613, + "learning_rate": 8.713752769735478e-06, + "loss": 2.0639, + "step": 16492 + }, + { + "epoch": 0.55, + "grad_norm": 0.7340219020843506, + "learning_rate": 8.712698713156427e-06, + "loss": 2.046, + "step": 16493 + }, + { + "epoch": 0.55, + "grad_norm": 0.7242436408996582, + "learning_rate": 8.711644671120555e-06, + "loss": 2.0917, + "step": 16494 + }, + { + "epoch": 0.55, + "grad_norm": 0.7369041442871094, + "learning_rate": 8.710590643639762e-06, + "loss": 1.9866, + "step": 16495 + }, + { + "epoch": 0.55, + "grad_norm": 0.7433868646621704, + "learning_rate": 8.709536630725961e-06, + "loss": 1.9983, + "step": 16496 + }, + { + "epoch": 0.55, + "grad_norm": 0.7507178783416748, + "learning_rate": 8.708482632391058e-06, + "loss": 1.962, + "step": 16497 + }, + { + "epoch": 0.55, + "grad_norm": 0.7221167683601379, + "learning_rate": 8.707428648646968e-06, + "loss": 2.0879, + "step": 16498 + }, + { + "epoch": 0.55, + "grad_norm": 0.718222439289093, + "learning_rate": 8.706374679505584e-06, + "loss": 2.048, + "step": 16499 + }, + { + "epoch": 0.55, + "grad_norm": 0.7409595847129822, + "learning_rate": 8.705320724978822e-06, + "loss": 2.1006, + "step": 16500 + }, + { + "epoch": 0.55, + "grad_norm": 0.7389588952064514, + "learning_rate": 8.70426678507859e-06, + "loss": 2.0059, + "step": 16501 + }, + { + "epoch": 0.55, + "grad_norm": 0.7479934692382812, + "learning_rate": 8.703212859816793e-06, + "loss": 2.1182, + "step": 16502 + }, + { + "epoch": 0.55, + "grad_norm": 0.8031801581382751, + "learning_rate": 8.702158949205336e-06, + "loss": 2.0686, + "step": 16503 + }, + { + "epoch": 0.55, + "grad_norm": 0.7791239023208618, + "learning_rate": 8.701105053256123e-06, + "loss": 2.0796, + "step": 16504 + }, + { + "epoch": 0.55, + "grad_norm": 0.7407656908035278, + "learning_rate": 8.700051171981067e-06, + "loss": 2.0367, + "step": 16505 + }, + { + "epoch": 0.55, + "grad_norm": 0.7432065606117249, + "learning_rate": 8.698997305392072e-06, + "loss": 2.1034, + "step": 16506 + }, + { + "epoch": 0.55, + "grad_norm": 0.7563015818595886, + "learning_rate": 8.697943453501043e-06, + "loss": 2.0432, + "step": 16507 + }, + { + "epoch": 0.55, + "grad_norm": 0.7101261019706726, + "learning_rate": 8.696889616319885e-06, + "loss": 2.0622, + "step": 16508 + }, + { + "epoch": 0.55, + "grad_norm": 0.7513152360916138, + "learning_rate": 8.695835793860505e-06, + "loss": 2.0544, + "step": 16509 + }, + { + "epoch": 0.55, + "grad_norm": 0.7645315527915955, + "learning_rate": 8.694781986134803e-06, + "loss": 2.0479, + "step": 16510 + }, + { + "epoch": 0.55, + "grad_norm": 0.7208690643310547, + "learning_rate": 8.693728193154697e-06, + "loss": 2.0857, + "step": 16511 + }, + { + "epoch": 0.55, + "grad_norm": 0.7870786190032959, + "learning_rate": 8.692674414932079e-06, + "loss": 2.1685, + "step": 16512 + }, + { + "epoch": 0.55, + "grad_norm": 0.7334544062614441, + "learning_rate": 8.69162065147886e-06, + "loss": 2.1058, + "step": 16513 + }, + { + "epoch": 0.55, + "grad_norm": 0.7286974191665649, + "learning_rate": 8.690566902806946e-06, + "loss": 2.0788, + "step": 16514 + }, + { + "epoch": 0.55, + "grad_norm": 0.7775580883026123, + "learning_rate": 8.689513168928239e-06, + "loss": 2.1163, + "step": 16515 + }, + { + "epoch": 0.55, + "grad_norm": 0.7246006727218628, + "learning_rate": 8.688459449854644e-06, + "loss": 2.1215, + "step": 16516 + }, + { + "epoch": 0.55, + "grad_norm": 0.7750433087348938, + "learning_rate": 8.687405745598066e-06, + "loss": 2.0104, + "step": 16517 + }, + { + "epoch": 0.55, + "grad_norm": 0.7462099194526672, + "learning_rate": 8.686352056170406e-06, + "loss": 2.0742, + "step": 16518 + }, + { + "epoch": 0.55, + "grad_norm": 0.729267954826355, + "learning_rate": 8.685298381583573e-06, + "loss": 2.0872, + "step": 16519 + }, + { + "epoch": 0.55, + "grad_norm": 0.7599778175354004, + "learning_rate": 8.68424472184947e-06, + "loss": 2.0934, + "step": 16520 + }, + { + "epoch": 0.55, + "grad_norm": 0.7321904897689819, + "learning_rate": 8.683191076979996e-06, + "loss": 2.0766, + "step": 16521 + }, + { + "epoch": 0.55, + "grad_norm": 0.7425832152366638, + "learning_rate": 8.68213744698706e-06, + "loss": 2.0943, + "step": 16522 + }, + { + "epoch": 0.55, + "grad_norm": 0.7381272912025452, + "learning_rate": 8.68108383188256e-06, + "loss": 1.9724, + "step": 16523 + }, + { + "epoch": 0.55, + "grad_norm": 0.739753007888794, + "learning_rate": 8.6800302316784e-06, + "loss": 2.0667, + "step": 16524 + }, + { + "epoch": 0.55, + "grad_norm": 0.7081009149551392, + "learning_rate": 8.678976646386494e-06, + "loss": 2.0266, + "step": 16525 + }, + { + "epoch": 0.55, + "grad_norm": 0.7296431064605713, + "learning_rate": 8.677923076018727e-06, + "loss": 2.1299, + "step": 16526 + }, + { + "epoch": 0.55, + "grad_norm": 0.7543759346008301, + "learning_rate": 8.676869520587012e-06, + "loss": 2.0355, + "step": 16527 + }, + { + "epoch": 0.55, + "grad_norm": 0.7315911054611206, + "learning_rate": 8.67581598010325e-06, + "loss": 2.0542, + "step": 16528 + }, + { + "epoch": 0.55, + "grad_norm": 0.7469914555549622, + "learning_rate": 8.674762454579347e-06, + "loss": 1.9833, + "step": 16529 + }, + { + "epoch": 0.55, + "grad_norm": 0.7629496455192566, + "learning_rate": 8.673708944027196e-06, + "loss": 2.0736, + "step": 16530 + }, + { + "epoch": 0.55, + "grad_norm": 0.7489063143730164, + "learning_rate": 8.672655448458707e-06, + "loss": 2.0969, + "step": 16531 + }, + { + "epoch": 0.55, + "grad_norm": 0.7569689154624939, + "learning_rate": 8.671601967885775e-06, + "loss": 2.172, + "step": 16532 + }, + { + "epoch": 0.55, + "grad_norm": 0.7368515729904175, + "learning_rate": 8.670548502320313e-06, + "loss": 2.084, + "step": 16533 + }, + { + "epoch": 0.55, + "grad_norm": 0.7274659872055054, + "learning_rate": 8.669495051774208e-06, + "loss": 2.0868, + "step": 16534 + }, + { + "epoch": 0.55, + "grad_norm": 0.7237684726715088, + "learning_rate": 8.66844161625937e-06, + "loss": 2.0621, + "step": 16535 + }, + { + "epoch": 0.55, + "grad_norm": 0.7403201460838318, + "learning_rate": 8.667388195787698e-06, + "loss": 2.0858, + "step": 16536 + }, + { + "epoch": 0.55, + "grad_norm": 0.7313265800476074, + "learning_rate": 8.666334790371093e-06, + "loss": 2.0689, + "step": 16537 + }, + { + "epoch": 0.55, + "grad_norm": 0.7565664649009705, + "learning_rate": 8.66528140002146e-06, + "loss": 2.0156, + "step": 16538 + }, + { + "epoch": 0.55, + "grad_norm": 0.7245146036148071, + "learning_rate": 8.664228024750691e-06, + "loss": 2.1686, + "step": 16539 + }, + { + "epoch": 0.55, + "grad_norm": 0.7347381711006165, + "learning_rate": 8.663174664570691e-06, + "loss": 2.0121, + "step": 16540 + }, + { + "epoch": 0.55, + "grad_norm": 0.7291240692138672, + "learning_rate": 8.662121319493359e-06, + "loss": 2.058, + "step": 16541 + }, + { + "epoch": 0.55, + "grad_norm": 0.7480359077453613, + "learning_rate": 8.661067989530602e-06, + "loss": 2.0085, + "step": 16542 + }, + { + "epoch": 0.55, + "grad_norm": 0.7226346135139465, + "learning_rate": 8.660014674694311e-06, + "loss": 2.0989, + "step": 16543 + }, + { + "epoch": 0.55, + "grad_norm": 0.7640408873558044, + "learning_rate": 8.658961374996388e-06, + "loss": 2.0177, + "step": 16544 + }, + { + "epoch": 0.55, + "grad_norm": 0.7345245480537415, + "learning_rate": 8.657908090448734e-06, + "loss": 2.0824, + "step": 16545 + }, + { + "epoch": 0.55, + "grad_norm": 0.7203503251075745, + "learning_rate": 8.656854821063245e-06, + "loss": 1.9963, + "step": 16546 + }, + { + "epoch": 0.55, + "grad_norm": 0.7438231110572815, + "learning_rate": 8.65580156685183e-06, + "loss": 2.0159, + "step": 16547 + }, + { + "epoch": 0.55, + "grad_norm": 0.7365139126777649, + "learning_rate": 8.654748327826374e-06, + "loss": 2.0422, + "step": 16548 + }, + { + "epoch": 0.55, + "grad_norm": 0.7612596154212952, + "learning_rate": 8.653695103998788e-06, + "loss": 2.1934, + "step": 16549 + }, + { + "epoch": 0.55, + "grad_norm": 0.746025562286377, + "learning_rate": 8.652641895380962e-06, + "loss": 2.034, + "step": 16550 + }, + { + "epoch": 0.55, + "grad_norm": 0.7477189898490906, + "learning_rate": 8.651588701984804e-06, + "loss": 2.1124, + "step": 16551 + }, + { + "epoch": 0.55, + "grad_norm": 0.7265787720680237, + "learning_rate": 8.6505355238222e-06, + "loss": 2.0113, + "step": 16552 + }, + { + "epoch": 0.55, + "grad_norm": 0.7614309191703796, + "learning_rate": 8.649482360905058e-06, + "loss": 2.1932, + "step": 16553 + }, + { + "epoch": 0.55, + "grad_norm": 0.7433967590332031, + "learning_rate": 8.64842921324527e-06, + "loss": 2.019, + "step": 16554 + }, + { + "epoch": 0.55, + "grad_norm": 0.7171213626861572, + "learning_rate": 8.647376080854738e-06, + "loss": 1.9965, + "step": 16555 + }, + { + "epoch": 0.55, + "grad_norm": 0.7529215216636658, + "learning_rate": 8.646322963745362e-06, + "loss": 1.9999, + "step": 16556 + }, + { + "epoch": 0.55, + "grad_norm": 0.7579878568649292, + "learning_rate": 8.645269861929033e-06, + "loss": 1.9896, + "step": 16557 + }, + { + "epoch": 0.55, + "grad_norm": 0.7403212189674377, + "learning_rate": 8.644216775417651e-06, + "loss": 2.021, + "step": 16558 + }, + { + "epoch": 0.55, + "grad_norm": 0.7308504581451416, + "learning_rate": 8.643163704223114e-06, + "loss": 2.0088, + "step": 16559 + }, + { + "epoch": 0.55, + "grad_norm": 0.7473870515823364, + "learning_rate": 8.64211064835732e-06, + "loss": 2.0071, + "step": 16560 + }, + { + "epoch": 0.55, + "grad_norm": 0.7900104522705078, + "learning_rate": 8.641057607832161e-06, + "loss": 2.0624, + "step": 16561 + }, + { + "epoch": 0.55, + "grad_norm": 0.7551008462905884, + "learning_rate": 8.640004582659534e-06, + "loss": 2.0343, + "step": 16562 + }, + { + "epoch": 0.55, + "grad_norm": 0.7325859665870667, + "learning_rate": 8.638951572851343e-06, + "loss": 2.0203, + "step": 16563 + }, + { + "epoch": 0.55, + "grad_norm": 0.740439772605896, + "learning_rate": 8.637898578419479e-06, + "loss": 2.1324, + "step": 16564 + }, + { + "epoch": 0.55, + "grad_norm": 0.7063793540000916, + "learning_rate": 8.636845599375838e-06, + "loss": 1.9648, + "step": 16565 + }, + { + "epoch": 0.55, + "grad_norm": 0.7512066960334778, + "learning_rate": 8.635792635732316e-06, + "loss": 2.0641, + "step": 16566 + }, + { + "epoch": 0.55, + "grad_norm": 0.7407785654067993, + "learning_rate": 8.634739687500811e-06, + "loss": 2.015, + "step": 16567 + }, + { + "epoch": 0.55, + "grad_norm": 0.7464678883552551, + "learning_rate": 8.633686754693212e-06, + "loss": 2.089, + "step": 16568 + }, + { + "epoch": 0.55, + "grad_norm": 0.7471652030944824, + "learning_rate": 8.632633837321426e-06, + "loss": 2.0426, + "step": 16569 + }, + { + "epoch": 0.55, + "grad_norm": 0.7374445796012878, + "learning_rate": 8.631580935397336e-06, + "loss": 2.1322, + "step": 16570 + }, + { + "epoch": 0.55, + "grad_norm": 0.7281451225280762, + "learning_rate": 8.630528048932847e-06, + "loss": 2.1281, + "step": 16571 + }, + { + "epoch": 0.55, + "grad_norm": 0.7499199509620667, + "learning_rate": 8.629475177939846e-06, + "loss": 2.1031, + "step": 16572 + }, + { + "epoch": 0.55, + "grad_norm": 0.7474506497383118, + "learning_rate": 8.628422322430236e-06, + "loss": 2.047, + "step": 16573 + }, + { + "epoch": 0.55, + "grad_norm": 0.7745090126991272, + "learning_rate": 8.627369482415902e-06, + "loss": 2.0617, + "step": 16574 + }, + { + "epoch": 0.55, + "grad_norm": 0.715404212474823, + "learning_rate": 8.626316657908745e-06, + "loss": 2.0683, + "step": 16575 + }, + { + "epoch": 0.55, + "grad_norm": 0.7193914651870728, + "learning_rate": 8.625263848920656e-06, + "loss": 2.0772, + "step": 16576 + }, + { + "epoch": 0.55, + "grad_norm": 0.7458266019821167, + "learning_rate": 8.62421105546353e-06, + "loss": 2.0706, + "step": 16577 + }, + { + "epoch": 0.55, + "grad_norm": 0.7418307065963745, + "learning_rate": 8.623158277549265e-06, + "loss": 2.0929, + "step": 16578 + }, + { + "epoch": 0.55, + "grad_norm": 0.747020423412323, + "learning_rate": 8.622105515189749e-06, + "loss": 2.0343, + "step": 16579 + }, + { + "epoch": 0.55, + "grad_norm": 0.7525926828384399, + "learning_rate": 8.621052768396877e-06, + "loss": 2.0387, + "step": 16580 + }, + { + "epoch": 0.55, + "grad_norm": 0.7497417330741882, + "learning_rate": 8.620000037182541e-06, + "loss": 2.095, + "step": 16581 + }, + { + "epoch": 0.55, + "grad_norm": 0.797443687915802, + "learning_rate": 8.61894732155864e-06, + "loss": 1.9991, + "step": 16582 + }, + { + "epoch": 0.55, + "grad_norm": 0.7296102643013, + "learning_rate": 8.61789462153706e-06, + "loss": 2.0531, + "step": 16583 + }, + { + "epoch": 0.55, + "grad_norm": 0.7121030688285828, + "learning_rate": 8.616841937129695e-06, + "loss": 2.0351, + "step": 16584 + }, + { + "epoch": 0.55, + "grad_norm": 0.736751139163971, + "learning_rate": 8.61578926834844e-06, + "loss": 2.0211, + "step": 16585 + }, + { + "epoch": 0.55, + "grad_norm": 0.7401643395423889, + "learning_rate": 8.614736615205189e-06, + "loss": 2.0125, + "step": 16586 + }, + { + "epoch": 0.55, + "grad_norm": 0.7396181225776672, + "learning_rate": 8.61368397771183e-06, + "loss": 2.0437, + "step": 16587 + }, + { + "epoch": 0.55, + "grad_norm": 0.7332243919372559, + "learning_rate": 8.612631355880259e-06, + "loss": 2.0368, + "step": 16588 + }, + { + "epoch": 0.55, + "grad_norm": 0.741874635219574, + "learning_rate": 8.611578749722362e-06, + "loss": 2.1056, + "step": 16589 + }, + { + "epoch": 0.55, + "grad_norm": 0.7566730380058289, + "learning_rate": 8.610526159250034e-06, + "loss": 2.0775, + "step": 16590 + }, + { + "epoch": 0.55, + "grad_norm": 0.7542139887809753, + "learning_rate": 8.609473584475176e-06, + "loss": 2.0487, + "step": 16591 + }, + { + "epoch": 0.55, + "grad_norm": 0.7411539554595947, + "learning_rate": 8.608421025409662e-06, + "loss": 2.0714, + "step": 16592 + }, + { + "epoch": 0.55, + "grad_norm": 0.7386788129806519, + "learning_rate": 8.607368482065394e-06, + "loss": 2.063, + "step": 16593 + }, + { + "epoch": 0.55, + "grad_norm": 0.7344232201576233, + "learning_rate": 8.606315954454259e-06, + "loss": 2.1, + "step": 16594 + }, + { + "epoch": 0.55, + "grad_norm": 0.7509559392929077, + "learning_rate": 8.605263442588155e-06, + "loss": 2.092, + "step": 16595 + }, + { + "epoch": 0.55, + "grad_norm": 0.7308575510978699, + "learning_rate": 8.604210946478963e-06, + "loss": 2.0769, + "step": 16596 + }, + { + "epoch": 0.55, + "grad_norm": 0.7411999106407166, + "learning_rate": 8.603158466138576e-06, + "loss": 2.0129, + "step": 16597 + }, + { + "epoch": 0.55, + "grad_norm": 0.7192003726959229, + "learning_rate": 8.602106001578888e-06, + "loss": 2.0662, + "step": 16598 + }, + { + "epoch": 0.55, + "grad_norm": 0.711035966873169, + "learning_rate": 8.601053552811789e-06, + "loss": 2.0574, + "step": 16599 + }, + { + "epoch": 0.55, + "grad_norm": 0.7375175952911377, + "learning_rate": 8.600001119849167e-06, + "loss": 2.1135, + "step": 16600 + }, + { + "epoch": 0.55, + "grad_norm": 0.7306444644927979, + "learning_rate": 8.59894870270291e-06, + "loss": 2.0144, + "step": 16601 + }, + { + "epoch": 0.55, + "grad_norm": 0.7241631746292114, + "learning_rate": 8.597896301384909e-06, + "loss": 2.0483, + "step": 16602 + }, + { + "epoch": 0.55, + "grad_norm": 0.7347139716148376, + "learning_rate": 8.596843915907053e-06, + "loss": 2.0448, + "step": 16603 + }, + { + "epoch": 0.55, + "grad_norm": 0.7405506372451782, + "learning_rate": 8.595791546281238e-06, + "loss": 2.0824, + "step": 16604 + }, + { + "epoch": 0.55, + "grad_norm": 0.7392181158065796, + "learning_rate": 8.594739192519341e-06, + "loss": 2.0736, + "step": 16605 + }, + { + "epoch": 0.55, + "grad_norm": 0.7310424447059631, + "learning_rate": 8.593686854633259e-06, + "loss": 2.0808, + "step": 16606 + }, + { + "epoch": 0.55, + "grad_norm": 0.8175680041313171, + "learning_rate": 8.592634532634877e-06, + "loss": 2.0383, + "step": 16607 + }, + { + "epoch": 0.55, + "grad_norm": 0.7483745813369751, + "learning_rate": 8.591582226536089e-06, + "loss": 2.1011, + "step": 16608 + }, + { + "epoch": 0.55, + "grad_norm": 0.713896632194519, + "learning_rate": 8.59052993634878e-06, + "loss": 2.0045, + "step": 16609 + }, + { + "epoch": 0.55, + "grad_norm": 0.741944432258606, + "learning_rate": 8.589477662084835e-06, + "loss": 2.039, + "step": 16610 + }, + { + "epoch": 0.55, + "grad_norm": 0.7125879526138306, + "learning_rate": 8.588425403756143e-06, + "loss": 2.0402, + "step": 16611 + }, + { + "epoch": 0.55, + "grad_norm": 0.7613575458526611, + "learning_rate": 8.587373161374594e-06, + "loss": 2.0654, + "step": 16612 + }, + { + "epoch": 0.55, + "grad_norm": 0.7500796914100647, + "learning_rate": 8.58632093495208e-06, + "loss": 2.0999, + "step": 16613 + }, + { + "epoch": 0.55, + "grad_norm": 0.7684041857719421, + "learning_rate": 8.58526872450048e-06, + "loss": 2.0826, + "step": 16614 + }, + { + "epoch": 0.55, + "grad_norm": 0.7503336668014526, + "learning_rate": 8.584216530031685e-06, + "loss": 2.0957, + "step": 16615 + }, + { + "epoch": 0.55, + "grad_norm": 0.7542755603790283, + "learning_rate": 8.583164351557582e-06, + "loss": 2.1096, + "step": 16616 + }, + { + "epoch": 0.55, + "grad_norm": 0.7351455688476562, + "learning_rate": 8.582112189090061e-06, + "loss": 2.0249, + "step": 16617 + }, + { + "epoch": 0.55, + "grad_norm": 0.7745035886764526, + "learning_rate": 8.581060042641001e-06, + "loss": 2.0259, + "step": 16618 + }, + { + "epoch": 0.55, + "grad_norm": 0.7146393060684204, + "learning_rate": 8.580007912222294e-06, + "loss": 2.0509, + "step": 16619 + }, + { + "epoch": 0.55, + "grad_norm": 0.7604787349700928, + "learning_rate": 8.578955797845824e-06, + "loss": 2.1236, + "step": 16620 + }, + { + "epoch": 0.55, + "grad_norm": 0.7524462938308716, + "learning_rate": 8.577903699523482e-06, + "loss": 2.099, + "step": 16621 + }, + { + "epoch": 0.55, + "grad_norm": 0.7318755388259888, + "learning_rate": 8.576851617267151e-06, + "loss": 2.0673, + "step": 16622 + }, + { + "epoch": 0.55, + "grad_norm": 0.7297762632369995, + "learning_rate": 8.575799551088713e-06, + "loss": 2.0698, + "step": 16623 + }, + { + "epoch": 0.55, + "grad_norm": 0.740635097026825, + "learning_rate": 8.574747501000059e-06, + "loss": 2.0259, + "step": 16624 + }, + { + "epoch": 0.55, + "grad_norm": 0.7482772469520569, + "learning_rate": 8.573695467013071e-06, + "loss": 2.0438, + "step": 16625 + }, + { + "epoch": 0.55, + "grad_norm": 0.743240237236023, + "learning_rate": 8.57264344913964e-06, + "loss": 2.1062, + "step": 16626 + }, + { + "epoch": 0.55, + "grad_norm": 0.7569797039031982, + "learning_rate": 8.571591447391642e-06, + "loss": 2.077, + "step": 16627 + }, + { + "epoch": 0.55, + "grad_norm": 0.7482658624649048, + "learning_rate": 8.570539461780967e-06, + "loss": 2.08, + "step": 16628 + }, + { + "epoch": 0.55, + "grad_norm": 0.7489868998527527, + "learning_rate": 8.569487492319502e-06, + "loss": 2.0798, + "step": 16629 + }, + { + "epoch": 0.55, + "grad_norm": 0.7610114216804504, + "learning_rate": 8.568435539019126e-06, + "loss": 2.1593, + "step": 16630 + }, + { + "epoch": 0.55, + "grad_norm": 0.7612504363059998, + "learning_rate": 8.56738360189173e-06, + "loss": 2.0845, + "step": 16631 + }, + { + "epoch": 0.55, + "grad_norm": 0.7707653045654297, + "learning_rate": 8.566331680949193e-06, + "loss": 1.998, + "step": 16632 + }, + { + "epoch": 0.55, + "grad_norm": 0.7285487651824951, + "learning_rate": 8.565279776203397e-06, + "loss": 2.0798, + "step": 16633 + }, + { + "epoch": 0.55, + "grad_norm": 0.7132567763328552, + "learning_rate": 8.564227887666231e-06, + "loss": 2.0647, + "step": 16634 + }, + { + "epoch": 0.55, + "grad_norm": 0.7683488130569458, + "learning_rate": 8.563176015349581e-06, + "loss": 2.1337, + "step": 16635 + }, + { + "epoch": 0.55, + "grad_norm": 0.7454658150672913, + "learning_rate": 8.562124159265323e-06, + "loss": 2.0341, + "step": 16636 + }, + { + "epoch": 0.55, + "grad_norm": 0.7358244061470032, + "learning_rate": 8.561072319425344e-06, + "loss": 2.0537, + "step": 16637 + }, + { + "epoch": 0.55, + "grad_norm": 0.7353538274765015, + "learning_rate": 8.560020495841526e-06, + "loss": 2.1103, + "step": 16638 + }, + { + "epoch": 0.55, + "grad_norm": 0.7644364237785339, + "learning_rate": 8.55896868852575e-06, + "loss": 2.009, + "step": 16639 + }, + { + "epoch": 0.55, + "grad_norm": 0.7569608092308044, + "learning_rate": 8.557916897489909e-06, + "loss": 2.0949, + "step": 16640 + }, + { + "epoch": 0.55, + "grad_norm": 0.74721759557724, + "learning_rate": 8.55686512274587e-06, + "loss": 2.1578, + "step": 16641 + }, + { + "epoch": 0.55, + "grad_norm": 0.73298579454422, + "learning_rate": 8.555813364305526e-06, + "loss": 2.1098, + "step": 16642 + }, + { + "epoch": 0.55, + "grad_norm": 0.7489504814147949, + "learning_rate": 8.554761622180757e-06, + "loss": 2.0194, + "step": 16643 + }, + { + "epoch": 0.55, + "grad_norm": 0.7614023089408875, + "learning_rate": 8.553709896383445e-06, + "loss": 2.0604, + "step": 16644 + }, + { + "epoch": 0.55, + "grad_norm": 0.7530896067619324, + "learning_rate": 8.552658186925469e-06, + "loss": 2.0376, + "step": 16645 + }, + { + "epoch": 0.55, + "grad_norm": 0.7655874490737915, + "learning_rate": 8.551606493818713e-06, + "loss": 2.0129, + "step": 16646 + }, + { + "epoch": 0.55, + "grad_norm": 0.7612796425819397, + "learning_rate": 8.550554817075057e-06, + "loss": 2.0055, + "step": 16647 + }, + { + "epoch": 0.55, + "grad_norm": 0.7210971117019653, + "learning_rate": 8.549503156706387e-06, + "loss": 2.1089, + "step": 16648 + }, + { + "epoch": 0.55, + "grad_norm": 0.772261917591095, + "learning_rate": 8.548451512724576e-06, + "loss": 2.1435, + "step": 16649 + }, + { + "epoch": 0.55, + "grad_norm": 0.7566694021224976, + "learning_rate": 8.547399885141511e-06, + "loss": 2.0296, + "step": 16650 + }, + { + "epoch": 0.55, + "grad_norm": 0.7337427735328674, + "learning_rate": 8.546348273969069e-06, + "loss": 2.0955, + "step": 16651 + }, + { + "epoch": 0.55, + "grad_norm": 0.7413740158081055, + "learning_rate": 8.545296679219136e-06, + "loss": 2.0966, + "step": 16652 + }, + { + "epoch": 0.55, + "grad_norm": 0.7298580408096313, + "learning_rate": 8.544245100903586e-06, + "loss": 2.0029, + "step": 16653 + }, + { + "epoch": 0.55, + "grad_norm": 0.7241822481155396, + "learning_rate": 8.543193539034302e-06, + "loss": 2.0111, + "step": 16654 + }, + { + "epoch": 0.55, + "grad_norm": 0.7459873557090759, + "learning_rate": 8.542141993623162e-06, + "loss": 2.08, + "step": 16655 + }, + { + "epoch": 0.55, + "grad_norm": 0.736303985118866, + "learning_rate": 8.541090464682049e-06, + "loss": 2.1133, + "step": 16656 + }, + { + "epoch": 0.55, + "grad_norm": 0.7282065153121948, + "learning_rate": 8.540038952222842e-06, + "loss": 2.0373, + "step": 16657 + }, + { + "epoch": 0.55, + "grad_norm": 0.7662880420684814, + "learning_rate": 8.538987456257418e-06, + "loss": 2.1136, + "step": 16658 + }, + { + "epoch": 0.55, + "grad_norm": 0.7325481176376343, + "learning_rate": 8.537935976797657e-06, + "loss": 2.0477, + "step": 16659 + }, + { + "epoch": 0.55, + "grad_norm": 0.7756228446960449, + "learning_rate": 8.53688451385544e-06, + "loss": 2.0828, + "step": 16660 + }, + { + "epoch": 0.55, + "grad_norm": 0.7225051522254944, + "learning_rate": 8.535833067442641e-06, + "loss": 2.0759, + "step": 16661 + }, + { + "epoch": 0.55, + "grad_norm": 0.7147581577301025, + "learning_rate": 8.534781637571149e-06, + "loss": 2.1547, + "step": 16662 + }, + { + "epoch": 0.55, + "grad_norm": 0.7189888954162598, + "learning_rate": 8.533730224252828e-06, + "loss": 2.0441, + "step": 16663 + }, + { + "epoch": 0.55, + "grad_norm": 0.7515743970870972, + "learning_rate": 8.532678827499566e-06, + "loss": 2.1121, + "step": 16664 + }, + { + "epoch": 0.55, + "grad_norm": 0.7584514021873474, + "learning_rate": 8.531627447323238e-06, + "loss": 2.0439, + "step": 16665 + }, + { + "epoch": 0.55, + "grad_norm": 0.763821542263031, + "learning_rate": 8.530576083735726e-06, + "loss": 2.0424, + "step": 16666 + }, + { + "epoch": 0.55, + "grad_norm": 0.7620757818222046, + "learning_rate": 8.529524736748903e-06, + "loss": 2.1903, + "step": 16667 + }, + { + "epoch": 0.55, + "grad_norm": 0.7464105486869812, + "learning_rate": 8.528473406374645e-06, + "loss": 2.0711, + "step": 16668 + }, + { + "epoch": 0.55, + "grad_norm": 0.7380724549293518, + "learning_rate": 8.527422092624832e-06, + "loss": 2.0598, + "step": 16669 + }, + { + "epoch": 0.55, + "grad_norm": 0.7136291265487671, + "learning_rate": 8.526370795511343e-06, + "loss": 2.0428, + "step": 16670 + }, + { + "epoch": 0.55, + "grad_norm": 0.7326642274856567, + "learning_rate": 8.525319515046054e-06, + "loss": 2.0691, + "step": 16671 + }, + { + "epoch": 0.55, + "grad_norm": 0.7272912263870239, + "learning_rate": 8.524268251240841e-06, + "loss": 2.025, + "step": 16672 + }, + { + "epoch": 0.55, + "grad_norm": 0.73006671667099, + "learning_rate": 8.523217004107578e-06, + "loss": 2.0448, + "step": 16673 + }, + { + "epoch": 0.55, + "grad_norm": 0.7180224061012268, + "learning_rate": 8.522165773658146e-06, + "loss": 2.0934, + "step": 16674 + }, + { + "epoch": 0.55, + "grad_norm": 0.7334069609642029, + "learning_rate": 8.52111455990442e-06, + "loss": 2.0006, + "step": 16675 + }, + { + "epoch": 0.55, + "grad_norm": 0.7747009992599487, + "learning_rate": 8.520063362858271e-06, + "loss": 2.0802, + "step": 16676 + }, + { + "epoch": 0.55, + "grad_norm": 0.7535221576690674, + "learning_rate": 8.519012182531579e-06, + "loss": 2.0041, + "step": 16677 + }, + { + "epoch": 0.55, + "grad_norm": 0.7366111278533936, + "learning_rate": 8.517961018936222e-06, + "loss": 2.0318, + "step": 16678 + }, + { + "epoch": 0.55, + "grad_norm": 0.7255215644836426, + "learning_rate": 8.516909872084073e-06, + "loss": 2.1056, + "step": 16679 + }, + { + "epoch": 0.55, + "grad_norm": 0.7402520179748535, + "learning_rate": 8.515858741987007e-06, + "loss": 2.1018, + "step": 16680 + }, + { + "epoch": 0.55, + "grad_norm": 0.7555060386657715, + "learning_rate": 8.514807628656899e-06, + "loss": 2.0613, + "step": 16681 + }, + { + "epoch": 0.56, + "grad_norm": 0.7324411869049072, + "learning_rate": 8.51375653210562e-06, + "loss": 2.0786, + "step": 16682 + }, + { + "epoch": 0.56, + "grad_norm": 0.7216768860816956, + "learning_rate": 8.512705452345051e-06, + "loss": 2.0308, + "step": 16683 + }, + { + "epoch": 0.56, + "grad_norm": 0.7511988282203674, + "learning_rate": 8.51165438938707e-06, + "loss": 2.057, + "step": 16684 + }, + { + "epoch": 0.56, + "grad_norm": 0.7523396611213684, + "learning_rate": 8.510603343243538e-06, + "loss": 2.1045, + "step": 16685 + }, + { + "epoch": 0.56, + "grad_norm": 0.7295357584953308, + "learning_rate": 8.509552313926339e-06, + "loss": 2.0954, + "step": 16686 + }, + { + "epoch": 0.56, + "grad_norm": 0.7326708436012268, + "learning_rate": 8.508501301447345e-06, + "loss": 2.081, + "step": 16687 + }, + { + "epoch": 0.56, + "grad_norm": 0.7313787937164307, + "learning_rate": 8.50745030581843e-06, + "loss": 2.0415, + "step": 16688 + }, + { + "epoch": 0.56, + "grad_norm": 0.744006872177124, + "learning_rate": 8.506399327051465e-06, + "loss": 2.0998, + "step": 16689 + }, + { + "epoch": 0.56, + "grad_norm": 0.7513835430145264, + "learning_rate": 8.505348365158325e-06, + "loss": 2.0719, + "step": 16690 + }, + { + "epoch": 0.56, + "grad_norm": 0.7507015466690063, + "learning_rate": 8.504297420150882e-06, + "loss": 2.0537, + "step": 16691 + }, + { + "epoch": 0.56, + "grad_norm": 0.7489732503890991, + "learning_rate": 8.503246492041013e-06, + "loss": 2.0194, + "step": 16692 + }, + { + "epoch": 0.56, + "grad_norm": 0.772464394569397, + "learning_rate": 8.502195580840588e-06, + "loss": 2.0088, + "step": 16693 + }, + { + "epoch": 0.56, + "grad_norm": 0.7171416878700256, + "learning_rate": 8.501144686561479e-06, + "loss": 2.0699, + "step": 16694 + }, + { + "epoch": 0.56, + "grad_norm": 0.6983522772789001, + "learning_rate": 8.500093809215558e-06, + "loss": 1.9862, + "step": 16695 + }, + { + "epoch": 0.56, + "grad_norm": 0.7744280099868774, + "learning_rate": 8.499042948814696e-06, + "loss": 1.9997, + "step": 16696 + }, + { + "epoch": 0.56, + "grad_norm": 0.7388389706611633, + "learning_rate": 8.497992105370774e-06, + "loss": 1.9827, + "step": 16697 + }, + { + "epoch": 0.56, + "grad_norm": 0.7499402165412903, + "learning_rate": 8.496941278895653e-06, + "loss": 2.0952, + "step": 16698 + }, + { + "epoch": 0.56, + "grad_norm": 0.7866347432136536, + "learning_rate": 8.495890469401204e-06, + "loss": 2.0504, + "step": 16699 + }, + { + "epoch": 0.56, + "grad_norm": 0.7501490116119385, + "learning_rate": 8.494839676899307e-06, + "loss": 2.071, + "step": 16700 + }, + { + "epoch": 0.56, + "grad_norm": 0.7905648350715637, + "learning_rate": 8.493788901401831e-06, + "loss": 2.0114, + "step": 16701 + }, + { + "epoch": 0.56, + "grad_norm": 0.7692969441413879, + "learning_rate": 8.492738142920645e-06, + "loss": 2.0612, + "step": 16702 + }, + { + "epoch": 0.56, + "grad_norm": 0.7460446953773499, + "learning_rate": 8.491687401467618e-06, + "loss": 2.0701, + "step": 16703 + }, + { + "epoch": 0.56, + "grad_norm": 0.7274276614189148, + "learning_rate": 8.49063667705462e-06, + "loss": 2.0593, + "step": 16704 + }, + { + "epoch": 0.56, + "grad_norm": 0.7628127336502075, + "learning_rate": 8.48958596969353e-06, + "loss": 2.0783, + "step": 16705 + }, + { + "epoch": 0.56, + "grad_norm": 0.7190506458282471, + "learning_rate": 8.488535279396212e-06, + "loss": 2.0829, + "step": 16706 + }, + { + "epoch": 0.56, + "grad_norm": 0.7803504467010498, + "learning_rate": 8.487484606174534e-06, + "loss": 2.0335, + "step": 16707 + }, + { + "epoch": 0.56, + "grad_norm": 0.7428827881813049, + "learning_rate": 8.486433950040369e-06, + "loss": 2.0164, + "step": 16708 + }, + { + "epoch": 0.56, + "grad_norm": 0.7503796815872192, + "learning_rate": 8.485383311005586e-06, + "loss": 2.0557, + "step": 16709 + }, + { + "epoch": 0.56, + "grad_norm": 0.7121695280075073, + "learning_rate": 8.484332689082057e-06, + "loss": 2.0376, + "step": 16710 + }, + { + "epoch": 0.56, + "grad_norm": 0.7454421520233154, + "learning_rate": 8.483282084281648e-06, + "loss": 2.1091, + "step": 16711 + }, + { + "epoch": 0.56, + "grad_norm": 0.7335931658744812, + "learning_rate": 8.482231496616226e-06, + "loss": 2.0761, + "step": 16712 + }, + { + "epoch": 0.56, + "grad_norm": 0.7471561431884766, + "learning_rate": 8.481180926097665e-06, + "loss": 2.0228, + "step": 16713 + }, + { + "epoch": 0.56, + "grad_norm": 0.7073904275894165, + "learning_rate": 8.480130372737832e-06, + "loss": 2.0438, + "step": 16714 + }, + { + "epoch": 0.56, + "grad_norm": 0.7536607384681702, + "learning_rate": 8.479079836548598e-06, + "loss": 2.1102, + "step": 16715 + }, + { + "epoch": 0.56, + "grad_norm": 0.7500789761543274, + "learning_rate": 8.478029317541825e-06, + "loss": 2.0857, + "step": 16716 + }, + { + "epoch": 0.56, + "grad_norm": 0.7411984801292419, + "learning_rate": 8.476978815729386e-06, + "loss": 2.0995, + "step": 16717 + }, + { + "epoch": 0.56, + "grad_norm": 0.7154254913330078, + "learning_rate": 8.475928331123146e-06, + "loss": 2.0338, + "step": 16718 + }, + { + "epoch": 0.56, + "grad_norm": 0.7668420076370239, + "learning_rate": 8.47487786373498e-06, + "loss": 2.0844, + "step": 16719 + }, + { + "epoch": 0.56, + "grad_norm": 0.7501354813575745, + "learning_rate": 8.473827413576746e-06, + "loss": 2.1359, + "step": 16720 + }, + { + "epoch": 0.56, + "grad_norm": 0.7358644604682922, + "learning_rate": 8.472776980660315e-06, + "loss": 2.0128, + "step": 16721 + }, + { + "epoch": 0.56, + "grad_norm": 0.8009878396987915, + "learning_rate": 8.471726564997554e-06, + "loss": 2.0536, + "step": 16722 + }, + { + "epoch": 0.56, + "grad_norm": 0.7800041437149048, + "learning_rate": 8.470676166600333e-06, + "loss": 2.0113, + "step": 16723 + }, + { + "epoch": 0.56, + "grad_norm": 0.7271307110786438, + "learning_rate": 8.469625785480518e-06, + "loss": 2.0392, + "step": 16724 + }, + { + "epoch": 0.56, + "grad_norm": 0.76762455701828, + "learning_rate": 8.468575421649971e-06, + "loss": 2.1025, + "step": 16725 + }, + { + "epoch": 0.56, + "grad_norm": 0.7439972162246704, + "learning_rate": 8.467525075120558e-06, + "loss": 2.048, + "step": 16726 + }, + { + "epoch": 0.56, + "grad_norm": 0.8246400356292725, + "learning_rate": 8.466474745904154e-06, + "loss": 2.0461, + "step": 16727 + }, + { + "epoch": 0.56, + "grad_norm": 0.7291239500045776, + "learning_rate": 8.465424434012619e-06, + "loss": 2.0845, + "step": 16728 + }, + { + "epoch": 0.56, + "grad_norm": 0.7072829008102417, + "learning_rate": 8.464374139457819e-06, + "loss": 2.0243, + "step": 16729 + }, + { + "epoch": 0.56, + "grad_norm": 0.7153077721595764, + "learning_rate": 8.463323862251619e-06, + "loss": 2.0363, + "step": 16730 + }, + { + "epoch": 0.56, + "grad_norm": 0.7212857604026794, + "learning_rate": 8.462273602405885e-06, + "loss": 2.0708, + "step": 16731 + }, + { + "epoch": 0.56, + "grad_norm": 0.7523965239524841, + "learning_rate": 8.461223359932481e-06, + "loss": 2.0362, + "step": 16732 + }, + { + "epoch": 0.56, + "grad_norm": 0.8195013999938965, + "learning_rate": 8.460173134843282e-06, + "loss": 2.079, + "step": 16733 + }, + { + "epoch": 0.56, + "grad_norm": 0.7346334457397461, + "learning_rate": 8.459122927150135e-06, + "loss": 2.0538, + "step": 16734 + }, + { + "epoch": 0.56, + "grad_norm": 0.7347535490989685, + "learning_rate": 8.458072736864918e-06, + "loss": 2.0074, + "step": 16735 + }, + { + "epoch": 0.56, + "grad_norm": 0.7356991171836853, + "learning_rate": 8.45702256399949e-06, + "loss": 2.0384, + "step": 16736 + }, + { + "epoch": 0.56, + "grad_norm": 0.763713538646698, + "learning_rate": 8.455972408565722e-06, + "loss": 1.9293, + "step": 16737 + }, + { + "epoch": 0.56, + "grad_norm": 0.7369728684425354, + "learning_rate": 8.454922270575467e-06, + "loss": 2.088, + "step": 16738 + }, + { + "epoch": 0.56, + "grad_norm": 0.7270092964172363, + "learning_rate": 8.453872150040598e-06, + "loss": 2.0698, + "step": 16739 + }, + { + "epoch": 0.56, + "grad_norm": 0.7209954261779785, + "learning_rate": 8.452822046972971e-06, + "loss": 1.9942, + "step": 16740 + }, + { + "epoch": 0.56, + "grad_norm": 0.7575922012329102, + "learning_rate": 8.45177196138446e-06, + "loss": 2.0961, + "step": 16741 + }, + { + "epoch": 0.56, + "grad_norm": 0.7480920553207397, + "learning_rate": 8.450721893286917e-06, + "loss": 2.0809, + "step": 16742 + }, + { + "epoch": 0.56, + "grad_norm": 0.7328463196754456, + "learning_rate": 8.449671842692212e-06, + "loss": 2.0936, + "step": 16743 + }, + { + "epoch": 0.56, + "grad_norm": 0.7560030221939087, + "learning_rate": 8.448621809612205e-06, + "loss": 2.0891, + "step": 16744 + }, + { + "epoch": 0.56, + "grad_norm": 0.7235105037689209, + "learning_rate": 8.44757179405876e-06, + "loss": 1.9919, + "step": 16745 + }, + { + "epoch": 0.56, + "grad_norm": 0.7721200585365295, + "learning_rate": 8.446521796043743e-06, + "loss": 2.1198, + "step": 16746 + }, + { + "epoch": 0.56, + "grad_norm": 0.7190041542053223, + "learning_rate": 8.445471815579009e-06, + "loss": 2.0472, + "step": 16747 + }, + { + "epoch": 0.56, + "grad_norm": 0.7581098079681396, + "learning_rate": 8.44442185267642e-06, + "loss": 2.0574, + "step": 16748 + }, + { + "epoch": 0.56, + "grad_norm": 0.7667680382728577, + "learning_rate": 8.443371907347844e-06, + "loss": 2.0652, + "step": 16749 + }, + { + "epoch": 0.56, + "grad_norm": 0.7571572065353394, + "learning_rate": 8.442321979605143e-06, + "loss": 2.1043, + "step": 16750 + }, + { + "epoch": 0.56, + "grad_norm": 0.7084314227104187, + "learning_rate": 8.441272069460171e-06, + "loss": 2.0475, + "step": 16751 + }, + { + "epoch": 0.56, + "grad_norm": 0.736884355545044, + "learning_rate": 8.440222176924796e-06, + "loss": 2.0691, + "step": 16752 + }, + { + "epoch": 0.56, + "grad_norm": 0.7467279434204102, + "learning_rate": 8.439172302010877e-06, + "loss": 2.121, + "step": 16753 + }, + { + "epoch": 0.56, + "grad_norm": 0.7498335838317871, + "learning_rate": 8.43812244473027e-06, + "loss": 2.0418, + "step": 16754 + }, + { + "epoch": 0.56, + "grad_norm": 0.7791133522987366, + "learning_rate": 8.43707260509485e-06, + "loss": 2.0628, + "step": 16755 + }, + { + "epoch": 0.56, + "grad_norm": 0.7558866739273071, + "learning_rate": 8.436022783116458e-06, + "loss": 2.0166, + "step": 16756 + }, + { + "epoch": 0.56, + "grad_norm": 0.7172505855560303, + "learning_rate": 8.434972978806967e-06, + "loss": 2.0932, + "step": 16757 + }, + { + "epoch": 0.56, + "grad_norm": 0.7278936505317688, + "learning_rate": 8.433923192178235e-06, + "loss": 2.0228, + "step": 16758 + }, + { + "epoch": 0.56, + "grad_norm": 0.7596557140350342, + "learning_rate": 8.432873423242123e-06, + "loss": 2.1009, + "step": 16759 + }, + { + "epoch": 0.56, + "grad_norm": 0.7843927145004272, + "learning_rate": 8.431823672010486e-06, + "loss": 2.1353, + "step": 16760 + }, + { + "epoch": 0.56, + "grad_norm": 0.7333033084869385, + "learning_rate": 8.430773938495187e-06, + "loss": 2.0425, + "step": 16761 + }, + { + "epoch": 0.56, + "grad_norm": 0.740612268447876, + "learning_rate": 8.429724222708081e-06, + "loss": 2.0036, + "step": 16762 + }, + { + "epoch": 0.56, + "grad_norm": 0.7729616761207581, + "learning_rate": 8.428674524661039e-06, + "loss": 2.0484, + "step": 16763 + }, + { + "epoch": 0.56, + "grad_norm": 0.7519281506538391, + "learning_rate": 8.427624844365902e-06, + "loss": 2.0905, + "step": 16764 + }, + { + "epoch": 0.56, + "grad_norm": 0.7422950863838196, + "learning_rate": 8.426575181834541e-06, + "loss": 2.0446, + "step": 16765 + }, + { + "epoch": 0.56, + "grad_norm": 0.7161850929260254, + "learning_rate": 8.425525537078812e-06, + "loss": 2.0582, + "step": 16766 + }, + { + "epoch": 0.56, + "grad_norm": 0.6909325122833252, + "learning_rate": 8.424475910110573e-06, + "loss": 2.0624, + "step": 16767 + }, + { + "epoch": 0.56, + "grad_norm": 0.7443349957466125, + "learning_rate": 8.423426300941684e-06, + "loss": 2.1091, + "step": 16768 + }, + { + "epoch": 0.56, + "grad_norm": 0.7540214657783508, + "learning_rate": 8.422376709584e-06, + "loss": 2.0487, + "step": 16769 + }, + { + "epoch": 0.56, + "grad_norm": 0.7226146459579468, + "learning_rate": 8.421327136049374e-06, + "loss": 2.0376, + "step": 16770 + }, + { + "epoch": 0.56, + "grad_norm": 0.7182632088661194, + "learning_rate": 8.420277580349672e-06, + "loss": 2.0894, + "step": 16771 + }, + { + "epoch": 0.56, + "grad_norm": 0.7491751909255981, + "learning_rate": 8.419228042496751e-06, + "loss": 2.0657, + "step": 16772 + }, + { + "epoch": 0.56, + "grad_norm": 0.7263699769973755, + "learning_rate": 8.418178522502463e-06, + "loss": 2.048, + "step": 16773 + }, + { + "epoch": 0.56, + "grad_norm": 0.7348998188972473, + "learning_rate": 8.417129020378668e-06, + "loss": 2.0409, + "step": 16774 + }, + { + "epoch": 0.56, + "grad_norm": 0.7435441613197327, + "learning_rate": 8.416079536137219e-06, + "loss": 2.0701, + "step": 16775 + }, + { + "epoch": 0.56, + "grad_norm": 0.7647241353988647, + "learning_rate": 8.415030069789974e-06, + "loss": 2.0818, + "step": 16776 + }, + { + "epoch": 0.56, + "grad_norm": 0.721744179725647, + "learning_rate": 8.413980621348799e-06, + "loss": 2.102, + "step": 16777 + }, + { + "epoch": 0.56, + "grad_norm": 0.7339802980422974, + "learning_rate": 8.412931190825532e-06, + "loss": 2.0859, + "step": 16778 + }, + { + "epoch": 0.56, + "grad_norm": 0.7314200401306152, + "learning_rate": 8.411881778232042e-06, + "loss": 2.0393, + "step": 16779 + }, + { + "epoch": 0.56, + "grad_norm": 0.7117515206336975, + "learning_rate": 8.410832383580181e-06, + "loss": 2.1135, + "step": 16780 + }, + { + "epoch": 0.56, + "grad_norm": 0.7433462738990784, + "learning_rate": 8.409783006881806e-06, + "loss": 2.0337, + "step": 16781 + }, + { + "epoch": 0.56, + "grad_norm": 0.7403644323348999, + "learning_rate": 8.408733648148768e-06, + "loss": 2.0422, + "step": 16782 + }, + { + "epoch": 0.56, + "grad_norm": 0.7490825057029724, + "learning_rate": 8.407684307392924e-06, + "loss": 2.0423, + "step": 16783 + }, + { + "epoch": 0.56, + "grad_norm": 0.7862105965614319, + "learning_rate": 8.40663498462613e-06, + "loss": 2.1463, + "step": 16784 + }, + { + "epoch": 0.56, + "grad_norm": 0.701542854309082, + "learning_rate": 8.405585679860241e-06, + "loss": 2.0197, + "step": 16785 + }, + { + "epoch": 0.56, + "grad_norm": 0.7154602408409119, + "learning_rate": 8.404536393107112e-06, + "loss": 2.0021, + "step": 16786 + }, + { + "epoch": 0.56, + "grad_norm": 0.7079073190689087, + "learning_rate": 8.403487124378595e-06, + "loss": 2.0782, + "step": 16787 + }, + { + "epoch": 0.56, + "grad_norm": 0.742972195148468, + "learning_rate": 8.402437873686544e-06, + "loss": 1.9964, + "step": 16788 + }, + { + "epoch": 0.56, + "grad_norm": 0.7355003356933594, + "learning_rate": 8.401388641042811e-06, + "loss": 2.0615, + "step": 16789 + }, + { + "epoch": 0.56, + "grad_norm": 0.748282790184021, + "learning_rate": 8.400339426459259e-06, + "loss": 2.1343, + "step": 16790 + }, + { + "epoch": 0.56, + "grad_norm": 0.7776012420654297, + "learning_rate": 8.399290229947733e-06, + "loss": 2.0817, + "step": 16791 + }, + { + "epoch": 0.56, + "grad_norm": 0.7180848717689514, + "learning_rate": 8.398241051520082e-06, + "loss": 2.1021, + "step": 16792 + }, + { + "epoch": 0.56, + "grad_norm": 0.7320981025695801, + "learning_rate": 8.397191891188169e-06, + "loss": 2.1219, + "step": 16793 + }, + { + "epoch": 0.56, + "grad_norm": 0.7596744298934937, + "learning_rate": 8.396142748963844e-06, + "loss": 2.0453, + "step": 16794 + }, + { + "epoch": 0.56, + "grad_norm": 0.723743200302124, + "learning_rate": 8.395093624858956e-06, + "loss": 2.0344, + "step": 16795 + }, + { + "epoch": 0.56, + "grad_norm": 0.7396324872970581, + "learning_rate": 8.39404451888536e-06, + "loss": 2.1052, + "step": 16796 + }, + { + "epoch": 0.56, + "grad_norm": 0.7423840761184692, + "learning_rate": 8.392995431054906e-06, + "loss": 2.1527, + "step": 16797 + }, + { + "epoch": 0.56, + "grad_norm": 0.7974815368652344, + "learning_rate": 8.391946361379449e-06, + "loss": 2.0576, + "step": 16798 + }, + { + "epoch": 0.56, + "grad_norm": 0.7525286078453064, + "learning_rate": 8.390897309870845e-06, + "loss": 2.1107, + "step": 16799 + }, + { + "epoch": 0.56, + "grad_norm": 0.7498737573623657, + "learning_rate": 8.389848276540933e-06, + "loss": 2.1305, + "step": 16800 + }, + { + "epoch": 0.56, + "grad_norm": 0.7181726694107056, + "learning_rate": 8.388799261401575e-06, + "loss": 2.0903, + "step": 16801 + }, + { + "epoch": 0.56, + "grad_norm": 0.7593972682952881, + "learning_rate": 8.387750264464617e-06, + "loss": 2.1285, + "step": 16802 + }, + { + "epoch": 0.56, + "grad_norm": 0.7410340905189514, + "learning_rate": 8.386701285741914e-06, + "loss": 2.0362, + "step": 16803 + }, + { + "epoch": 0.56, + "grad_norm": 0.7696154117584229, + "learning_rate": 8.385652325245312e-06, + "loss": 2.0687, + "step": 16804 + }, + { + "epoch": 0.56, + "grad_norm": 0.778201699256897, + "learning_rate": 8.384603382986663e-06, + "loss": 2.0366, + "step": 16805 + }, + { + "epoch": 0.56, + "grad_norm": 0.7688080668449402, + "learning_rate": 8.383554458977821e-06, + "loss": 2.0876, + "step": 16806 + }, + { + "epoch": 0.56, + "grad_norm": 0.7319440245628357, + "learning_rate": 8.382505553230632e-06, + "loss": 2.0209, + "step": 16807 + }, + { + "epoch": 0.56, + "grad_norm": 0.7023982405662537, + "learning_rate": 8.38145666575695e-06, + "loss": 2.0602, + "step": 16808 + }, + { + "epoch": 0.56, + "grad_norm": 0.7453293204307556, + "learning_rate": 8.38040779656862e-06, + "loss": 2.0923, + "step": 16809 + }, + { + "epoch": 0.56, + "grad_norm": 0.7665380239486694, + "learning_rate": 8.379358945677493e-06, + "loss": 2.0588, + "step": 16810 + }, + { + "epoch": 0.56, + "grad_norm": 0.7308092713356018, + "learning_rate": 8.378310113095415e-06, + "loss": 2.1175, + "step": 16811 + }, + { + "epoch": 0.56, + "grad_norm": 0.7042902112007141, + "learning_rate": 8.377261298834249e-06, + "loss": 2.0307, + "step": 16812 + }, + { + "epoch": 0.56, + "grad_norm": 0.7634798288345337, + "learning_rate": 8.376212502905825e-06, + "loss": 2.1133, + "step": 16813 + }, + { + "epoch": 0.56, + "grad_norm": 0.7541706562042236, + "learning_rate": 8.375163725322005e-06, + "loss": 2.0484, + "step": 16814 + }, + { + "epoch": 0.56, + "grad_norm": 0.7624711394309998, + "learning_rate": 8.374114966094629e-06, + "loss": 2.0673, + "step": 16815 + }, + { + "epoch": 0.56, + "grad_norm": 0.7456276416778564, + "learning_rate": 8.373066225235552e-06, + "loss": 2.0279, + "step": 16816 + }, + { + "epoch": 0.56, + "grad_norm": 0.7726129293441772, + "learning_rate": 8.372017502756621e-06, + "loss": 2.0446, + "step": 16817 + }, + { + "epoch": 0.56, + "grad_norm": 0.7053190469741821, + "learning_rate": 8.370968798669681e-06, + "loss": 2.0323, + "step": 16818 + }, + { + "epoch": 0.56, + "grad_norm": 0.7628491520881653, + "learning_rate": 8.369920112986578e-06, + "loss": 2.0779, + "step": 16819 + }, + { + "epoch": 0.56, + "grad_norm": 0.7345894575119019, + "learning_rate": 8.368871445719163e-06, + "loss": 2.1608, + "step": 16820 + }, + { + "epoch": 0.56, + "grad_norm": 0.731438159942627, + "learning_rate": 8.367822796879286e-06, + "loss": 2.0797, + "step": 16821 + }, + { + "epoch": 0.56, + "grad_norm": 0.7195285558700562, + "learning_rate": 8.366774166478788e-06, + "loss": 2.0273, + "step": 16822 + }, + { + "epoch": 0.56, + "grad_norm": 0.7461681365966797, + "learning_rate": 8.365725554529518e-06, + "loss": 2.0355, + "step": 16823 + }, + { + "epoch": 0.56, + "grad_norm": 0.7426743507385254, + "learning_rate": 8.364676961043324e-06, + "loss": 2.0694, + "step": 16824 + }, + { + "epoch": 0.56, + "grad_norm": 0.7170631289482117, + "learning_rate": 8.363628386032053e-06, + "loss": 2.1099, + "step": 16825 + }, + { + "epoch": 0.56, + "grad_norm": 0.7615997791290283, + "learning_rate": 8.362579829507547e-06, + "loss": 2.029, + "step": 16826 + }, + { + "epoch": 0.56, + "grad_norm": 0.7627228498458862, + "learning_rate": 8.361531291481651e-06, + "loss": 2.0628, + "step": 16827 + }, + { + "epoch": 0.56, + "grad_norm": 0.7607656717300415, + "learning_rate": 8.360482771966219e-06, + "loss": 2.0749, + "step": 16828 + }, + { + "epoch": 0.56, + "grad_norm": 0.7418903708457947, + "learning_rate": 8.35943427097309e-06, + "loss": 2.0521, + "step": 16829 + }, + { + "epoch": 0.56, + "grad_norm": 0.7557323575019836, + "learning_rate": 8.358385788514114e-06, + "loss": 2.1258, + "step": 16830 + }, + { + "epoch": 0.56, + "grad_norm": 0.7228327989578247, + "learning_rate": 8.35733732460113e-06, + "loss": 2.0049, + "step": 16831 + }, + { + "epoch": 0.56, + "grad_norm": 0.7489004135131836, + "learning_rate": 8.356288879245987e-06, + "loss": 2.079, + "step": 16832 + }, + { + "epoch": 0.56, + "grad_norm": 0.7277175784111023, + "learning_rate": 8.355240452460527e-06, + "loss": 2.0942, + "step": 16833 + }, + { + "epoch": 0.56, + "grad_norm": 0.7584128379821777, + "learning_rate": 8.354192044256604e-06, + "loss": 2.0604, + "step": 16834 + }, + { + "epoch": 0.56, + "grad_norm": 0.7833227515220642, + "learning_rate": 8.353143654646046e-06, + "loss": 2.0542, + "step": 16835 + }, + { + "epoch": 0.56, + "grad_norm": 0.7734096050262451, + "learning_rate": 8.35209528364071e-06, + "loss": 2.0645, + "step": 16836 + }, + { + "epoch": 0.56, + "grad_norm": 0.7233988642692566, + "learning_rate": 8.351046931252435e-06, + "loss": 1.9765, + "step": 16837 + }, + { + "epoch": 0.56, + "grad_norm": 0.7298857569694519, + "learning_rate": 8.349998597493064e-06, + "loss": 2.1032, + "step": 16838 + }, + { + "epoch": 0.56, + "grad_norm": 0.7337982654571533, + "learning_rate": 8.348950282374446e-06, + "loss": 2.0606, + "step": 16839 + }, + { + "epoch": 0.56, + "grad_norm": 0.7481353282928467, + "learning_rate": 8.347901985908417e-06, + "loss": 2.0919, + "step": 16840 + }, + { + "epoch": 0.56, + "grad_norm": 0.7400870323181152, + "learning_rate": 8.346853708106821e-06, + "loss": 2.0091, + "step": 16841 + }, + { + "epoch": 0.56, + "grad_norm": 0.7478057742118835, + "learning_rate": 8.345805448981505e-06, + "loss": 2.0961, + "step": 16842 + }, + { + "epoch": 0.56, + "grad_norm": 0.7515630722045898, + "learning_rate": 8.344757208544312e-06, + "loss": 2.0283, + "step": 16843 + }, + { + "epoch": 0.56, + "grad_norm": 0.7632972002029419, + "learning_rate": 8.34370898680708e-06, + "loss": 2.0906, + "step": 16844 + }, + { + "epoch": 0.56, + "grad_norm": 0.7500972151756287, + "learning_rate": 8.342660783781653e-06, + "loss": 2.0673, + "step": 16845 + }, + { + "epoch": 0.56, + "grad_norm": 0.7498136162757874, + "learning_rate": 8.341612599479873e-06, + "loss": 2.0166, + "step": 16846 + }, + { + "epoch": 0.56, + "grad_norm": 0.7213338613510132, + "learning_rate": 8.34056443391358e-06, + "loss": 2.0521, + "step": 16847 + }, + { + "epoch": 0.56, + "grad_norm": 0.7353786826133728, + "learning_rate": 8.339516287094625e-06, + "loss": 2.0746, + "step": 16848 + }, + { + "epoch": 0.56, + "grad_norm": 0.7712132334709167, + "learning_rate": 8.338468159034834e-06, + "loss": 2.1227, + "step": 16849 + }, + { + "epoch": 0.56, + "grad_norm": 0.7551206946372986, + "learning_rate": 8.337420049746058e-06, + "loss": 2.0359, + "step": 16850 + }, + { + "epoch": 0.56, + "grad_norm": 0.6947364211082458, + "learning_rate": 8.336371959240136e-06, + "loss": 2.0123, + "step": 16851 + }, + { + "epoch": 0.56, + "grad_norm": 0.7283193469047546, + "learning_rate": 8.33532388752891e-06, + "loss": 2.0639, + "step": 16852 + }, + { + "epoch": 0.56, + "grad_norm": 0.7416529059410095, + "learning_rate": 8.334275834624219e-06, + "loss": 2.0277, + "step": 16853 + }, + { + "epoch": 0.56, + "grad_norm": 0.7300173044204712, + "learning_rate": 8.333227800537903e-06, + "loss": 2.0887, + "step": 16854 + }, + { + "epoch": 0.56, + "grad_norm": 0.7377007603645325, + "learning_rate": 8.3321797852818e-06, + "loss": 2.0785, + "step": 16855 + }, + { + "epoch": 0.56, + "grad_norm": 0.7681763172149658, + "learning_rate": 8.33113178886776e-06, + "loss": 2.0793, + "step": 16856 + }, + { + "epoch": 0.56, + "grad_norm": 0.791058361530304, + "learning_rate": 8.330083811307605e-06, + "loss": 2.0324, + "step": 16857 + }, + { + "epoch": 0.56, + "grad_norm": 0.7533602714538574, + "learning_rate": 8.32903585261319e-06, + "loss": 2.0952, + "step": 16858 + }, + { + "epoch": 0.56, + "grad_norm": 0.7043353319168091, + "learning_rate": 8.327987912796346e-06, + "loss": 2.03, + "step": 16859 + }, + { + "epoch": 0.56, + "grad_norm": 0.7328832149505615, + "learning_rate": 8.326939991868918e-06, + "loss": 2.1701, + "step": 16860 + }, + { + "epoch": 0.56, + "grad_norm": 0.7266687154769897, + "learning_rate": 8.32589208984274e-06, + "loss": 2.034, + "step": 16861 + }, + { + "epoch": 0.56, + "grad_norm": 0.7840631008148193, + "learning_rate": 8.324844206729652e-06, + "loss": 2.1154, + "step": 16862 + }, + { + "epoch": 0.56, + "grad_norm": 0.73016756772995, + "learning_rate": 8.32379634254149e-06, + "loss": 2.0607, + "step": 16863 + }, + { + "epoch": 0.56, + "grad_norm": 0.7633053660392761, + "learning_rate": 8.322748497290098e-06, + "loss": 2.0643, + "step": 16864 + }, + { + "epoch": 0.56, + "grad_norm": 0.7259398102760315, + "learning_rate": 8.32170067098731e-06, + "loss": 2.083, + "step": 16865 + }, + { + "epoch": 0.56, + "grad_norm": 0.7380948662757874, + "learning_rate": 8.320652863644963e-06, + "loss": 2.0741, + "step": 16866 + }, + { + "epoch": 0.56, + "grad_norm": 0.7427602410316467, + "learning_rate": 8.319605075274898e-06, + "loss": 2.0679, + "step": 16867 + }, + { + "epoch": 0.56, + "grad_norm": 0.7542269825935364, + "learning_rate": 8.318557305888947e-06, + "loss": 2.0334, + "step": 16868 + }, + { + "epoch": 0.56, + "grad_norm": 0.7096551060676575, + "learning_rate": 8.31750955549895e-06, + "loss": 2.0085, + "step": 16869 + }, + { + "epoch": 0.56, + "grad_norm": 0.7602319717407227, + "learning_rate": 8.316461824116748e-06, + "loss": 2.0604, + "step": 16870 + }, + { + "epoch": 0.56, + "grad_norm": 0.7765876650810242, + "learning_rate": 8.31541411175417e-06, + "loss": 1.9951, + "step": 16871 + }, + { + "epoch": 0.56, + "grad_norm": 0.7706303000450134, + "learning_rate": 8.314366418423056e-06, + "loss": 2.0864, + "step": 16872 + }, + { + "epoch": 0.56, + "grad_norm": 0.7454278469085693, + "learning_rate": 8.313318744135244e-06, + "loss": 2.0434, + "step": 16873 + }, + { + "epoch": 0.56, + "grad_norm": 0.7712646126747131, + "learning_rate": 8.31227108890257e-06, + "loss": 2.1017, + "step": 16874 + }, + { + "epoch": 0.56, + "grad_norm": 0.7353293299674988, + "learning_rate": 8.311223452736865e-06, + "loss": 2.0812, + "step": 16875 + }, + { + "epoch": 0.56, + "grad_norm": 0.739406168460846, + "learning_rate": 8.31017583564997e-06, + "loss": 2.1037, + "step": 16876 + }, + { + "epoch": 0.56, + "grad_norm": 0.7452880144119263, + "learning_rate": 8.309128237653714e-06, + "loss": 2.0022, + "step": 16877 + }, + { + "epoch": 0.56, + "grad_norm": 0.7778764963150024, + "learning_rate": 8.308080658759939e-06, + "loss": 2.0519, + "step": 16878 + }, + { + "epoch": 0.56, + "grad_norm": 0.7428486943244934, + "learning_rate": 8.30703309898048e-06, + "loss": 2.0704, + "step": 16879 + }, + { + "epoch": 0.56, + "grad_norm": 0.7345752120018005, + "learning_rate": 8.305985558327165e-06, + "loss": 2.1008, + "step": 16880 + }, + { + "epoch": 0.56, + "grad_norm": 0.7461683750152588, + "learning_rate": 8.304938036811833e-06, + "loss": 2.0814, + "step": 16881 + }, + { + "epoch": 0.56, + "grad_norm": 0.746198832988739, + "learning_rate": 8.303890534446315e-06, + "loss": 1.9922, + "step": 16882 + }, + { + "epoch": 0.56, + "grad_norm": 0.7936160564422607, + "learning_rate": 8.302843051242455e-06, + "loss": 2.1035, + "step": 16883 + }, + { + "epoch": 0.56, + "grad_norm": 0.7319876551628113, + "learning_rate": 8.301795587212076e-06, + "loss": 2.1279, + "step": 16884 + }, + { + "epoch": 0.56, + "grad_norm": 0.738196074962616, + "learning_rate": 8.300748142367012e-06, + "loss": 2.0397, + "step": 16885 + }, + { + "epoch": 0.56, + "grad_norm": 0.7211881279945374, + "learning_rate": 8.299700716719103e-06, + "loss": 2.0697, + "step": 16886 + }, + { + "epoch": 0.56, + "grad_norm": 0.7151875495910645, + "learning_rate": 8.29865331028018e-06, + "loss": 2.102, + "step": 16887 + }, + { + "epoch": 0.56, + "grad_norm": 0.7350781559944153, + "learning_rate": 8.297605923062073e-06, + "loss": 2.0098, + "step": 16888 + }, + { + "epoch": 0.56, + "grad_norm": 0.7104495763778687, + "learning_rate": 8.296558555076614e-06, + "loss": 2.0207, + "step": 16889 + }, + { + "epoch": 0.56, + "grad_norm": 0.7670384049415588, + "learning_rate": 8.29551120633564e-06, + "loss": 2.037, + "step": 16890 + }, + { + "epoch": 0.56, + "grad_norm": 0.735596776008606, + "learning_rate": 8.29446387685098e-06, + "loss": 2.0576, + "step": 16891 + }, + { + "epoch": 0.56, + "grad_norm": 0.7880738973617554, + "learning_rate": 8.293416566634475e-06, + "loss": 2.0586, + "step": 16892 + }, + { + "epoch": 0.56, + "grad_norm": 0.7320733070373535, + "learning_rate": 8.292369275697942e-06, + "loss": 2.082, + "step": 16893 + }, + { + "epoch": 0.56, + "grad_norm": 0.7433901429176331, + "learning_rate": 8.291322004053222e-06, + "loss": 1.9847, + "step": 16894 + }, + { + "epoch": 0.56, + "grad_norm": 0.766225278377533, + "learning_rate": 8.290274751712143e-06, + "loss": 2.0901, + "step": 16895 + }, + { + "epoch": 0.56, + "grad_norm": 0.7629501223564148, + "learning_rate": 8.289227518686543e-06, + "loss": 2.0475, + "step": 16896 + }, + { + "epoch": 0.56, + "grad_norm": 0.7677200436592102, + "learning_rate": 8.288180304988245e-06, + "loss": 2.155, + "step": 16897 + }, + { + "epoch": 0.56, + "grad_norm": 0.7372588515281677, + "learning_rate": 8.28713311062908e-06, + "loss": 2.1331, + "step": 16898 + }, + { + "epoch": 0.56, + "grad_norm": 0.7421506643295288, + "learning_rate": 8.286085935620883e-06, + "loss": 2.073, + "step": 16899 + }, + { + "epoch": 0.56, + "grad_norm": 0.7920910120010376, + "learning_rate": 8.285038779975483e-06, + "loss": 2.0234, + "step": 16900 + }, + { + "epoch": 0.56, + "grad_norm": 0.723716676235199, + "learning_rate": 8.283991643704712e-06, + "loss": 2.0479, + "step": 16901 + }, + { + "epoch": 0.56, + "grad_norm": 0.7653703093528748, + "learning_rate": 8.282944526820395e-06, + "loss": 2.0149, + "step": 16902 + }, + { + "epoch": 0.56, + "grad_norm": 0.7333273887634277, + "learning_rate": 8.281897429334366e-06, + "loss": 2.0412, + "step": 16903 + }, + { + "epoch": 0.56, + "grad_norm": 0.728277325630188, + "learning_rate": 8.28085035125845e-06, + "loss": 2.0808, + "step": 16904 + }, + { + "epoch": 0.56, + "grad_norm": 0.7482560873031616, + "learning_rate": 8.279803292604485e-06, + "loss": 2.1631, + "step": 16905 + }, + { + "epoch": 0.56, + "grad_norm": 0.7058760523796082, + "learning_rate": 8.278756253384288e-06, + "loss": 1.9954, + "step": 16906 + }, + { + "epoch": 0.56, + "grad_norm": 0.7321876883506775, + "learning_rate": 8.277709233609696e-06, + "loss": 2.0602, + "step": 16907 + }, + { + "epoch": 0.56, + "grad_norm": 0.7453299760818481, + "learning_rate": 8.276662233292538e-06, + "loss": 2.0692, + "step": 16908 + }, + { + "epoch": 0.56, + "grad_norm": 0.7306625843048096, + "learning_rate": 8.275615252444638e-06, + "loss": 2.0439, + "step": 16909 + }, + { + "epoch": 0.56, + "grad_norm": 0.75696861743927, + "learning_rate": 8.274568291077829e-06, + "loss": 2.0065, + "step": 16910 + }, + { + "epoch": 0.56, + "grad_norm": 0.758575439453125, + "learning_rate": 8.273521349203934e-06, + "loss": 2.0737, + "step": 16911 + }, + { + "epoch": 0.56, + "grad_norm": 0.7682310342788696, + "learning_rate": 8.27247442683478e-06, + "loss": 2.1188, + "step": 16912 + }, + { + "epoch": 0.56, + "grad_norm": 0.7318697571754456, + "learning_rate": 8.2714275239822e-06, + "loss": 2.0379, + "step": 16913 + }, + { + "epoch": 0.56, + "grad_norm": 0.7258166670799255, + "learning_rate": 8.270380640658021e-06, + "loss": 2.072, + "step": 16914 + }, + { + "epoch": 0.56, + "grad_norm": 0.7358026504516602, + "learning_rate": 8.269333776874066e-06, + "loss": 2.1335, + "step": 16915 + }, + { + "epoch": 0.56, + "grad_norm": 0.7216898798942566, + "learning_rate": 8.268286932642164e-06, + "loss": 2.062, + "step": 16916 + }, + { + "epoch": 0.56, + "grad_norm": 0.7271497845649719, + "learning_rate": 8.267240107974141e-06, + "loss": 2.0766, + "step": 16917 + }, + { + "epoch": 0.56, + "grad_norm": 0.7113763689994812, + "learning_rate": 8.266193302881826e-06, + "loss": 2.0822, + "step": 16918 + }, + { + "epoch": 0.56, + "grad_norm": 0.7349677681922913, + "learning_rate": 8.26514651737704e-06, + "loss": 2.0254, + "step": 16919 + }, + { + "epoch": 0.56, + "grad_norm": 0.7567934989929199, + "learning_rate": 8.264099751471613e-06, + "loss": 2.0973, + "step": 16920 + }, + { + "epoch": 0.56, + "grad_norm": 0.7469909191131592, + "learning_rate": 8.263053005177369e-06, + "loss": 2.0803, + "step": 16921 + }, + { + "epoch": 0.56, + "grad_norm": 0.7613385915756226, + "learning_rate": 8.262006278506133e-06, + "loss": 2.0349, + "step": 16922 + }, + { + "epoch": 0.56, + "grad_norm": 0.7174393534660339, + "learning_rate": 8.260959571469737e-06, + "loss": 2.0547, + "step": 16923 + }, + { + "epoch": 0.56, + "grad_norm": 0.7459761500358582, + "learning_rate": 8.259912884079996e-06, + "loss": 2.1424, + "step": 16924 + }, + { + "epoch": 0.56, + "grad_norm": 0.740079939365387, + "learning_rate": 8.25886621634874e-06, + "loss": 2.0921, + "step": 16925 + }, + { + "epoch": 0.56, + "grad_norm": 0.7167969346046448, + "learning_rate": 8.25781956828779e-06, + "loss": 2.0584, + "step": 16926 + }, + { + "epoch": 0.56, + "grad_norm": 0.732787013053894, + "learning_rate": 8.256772939908982e-06, + "loss": 2.1102, + "step": 16927 + }, + { + "epoch": 0.56, + "grad_norm": 0.7419447898864746, + "learning_rate": 8.255726331224124e-06, + "loss": 2.068, + "step": 16928 + }, + { + "epoch": 0.56, + "grad_norm": 0.7233572602272034, + "learning_rate": 8.254679742245051e-06, + "loss": 2.0828, + "step": 16929 + }, + { + "epoch": 0.56, + "grad_norm": 0.7445202469825745, + "learning_rate": 8.253633172983581e-06, + "loss": 2.0283, + "step": 16930 + }, + { + "epoch": 0.56, + "grad_norm": 0.7320336103439331, + "learning_rate": 8.252586623451541e-06, + "loss": 2.1411, + "step": 16931 + }, + { + "epoch": 0.56, + "grad_norm": 0.7277160286903381, + "learning_rate": 8.251540093660758e-06, + "loss": 2.0408, + "step": 16932 + }, + { + "epoch": 0.56, + "grad_norm": 0.7278395295143127, + "learning_rate": 8.250493583623045e-06, + "loss": 2.0484, + "step": 16933 + }, + { + "epoch": 0.56, + "grad_norm": 0.7649754285812378, + "learning_rate": 8.249447093350228e-06, + "loss": 2.0965, + "step": 16934 + }, + { + "epoch": 0.56, + "grad_norm": 0.7683017253875732, + "learning_rate": 8.248400622854137e-06, + "loss": 1.9578, + "step": 16935 + }, + { + "epoch": 0.56, + "grad_norm": 0.7508137822151184, + "learning_rate": 8.24735417214659e-06, + "loss": 2.0555, + "step": 16936 + }, + { + "epoch": 0.56, + "grad_norm": 0.7491703033447266, + "learning_rate": 8.246307741239405e-06, + "loss": 2.0967, + "step": 16937 + }, + { + "epoch": 0.56, + "grad_norm": 0.7342215180397034, + "learning_rate": 8.245261330144407e-06, + "loss": 2.0774, + "step": 16938 + }, + { + "epoch": 0.56, + "grad_norm": 0.7476543188095093, + "learning_rate": 8.24421493887342e-06, + "loss": 2.1278, + "step": 16939 + }, + { + "epoch": 0.56, + "grad_norm": 0.7282251119613647, + "learning_rate": 8.243168567438264e-06, + "loss": 2.081, + "step": 16940 + }, + { + "epoch": 0.56, + "grad_norm": 0.7234349846839905, + "learning_rate": 8.242122215850758e-06, + "loss": 2.0022, + "step": 16941 + }, + { + "epoch": 0.56, + "grad_norm": 0.8079465627670288, + "learning_rate": 8.241075884122724e-06, + "loss": 2.0284, + "step": 16942 + }, + { + "epoch": 0.56, + "grad_norm": 0.7177997827529907, + "learning_rate": 8.240029572265986e-06, + "loss": 2.1089, + "step": 16943 + }, + { + "epoch": 0.56, + "grad_norm": 0.7278389930725098, + "learning_rate": 8.238983280292362e-06, + "loss": 2.0877, + "step": 16944 + }, + { + "epoch": 0.56, + "grad_norm": 0.7399302124977112, + "learning_rate": 8.237937008213674e-06, + "loss": 2.0688, + "step": 16945 + }, + { + "epoch": 0.56, + "grad_norm": 0.7312920689582825, + "learning_rate": 8.23689075604174e-06, + "loss": 2.0857, + "step": 16946 + }, + { + "epoch": 0.56, + "grad_norm": 0.7458975315093994, + "learning_rate": 8.235844523788382e-06, + "loss": 2.0802, + "step": 16947 + }, + { + "epoch": 0.56, + "grad_norm": 0.7342796325683594, + "learning_rate": 8.234798311465415e-06, + "loss": 2.1266, + "step": 16948 + }, + { + "epoch": 0.56, + "grad_norm": 0.7531648278236389, + "learning_rate": 8.233752119084668e-06, + "loss": 2.0133, + "step": 16949 + }, + { + "epoch": 0.56, + "grad_norm": 0.7985450029373169, + "learning_rate": 8.232705946657949e-06, + "loss": 2.1012, + "step": 16950 + }, + { + "epoch": 0.56, + "grad_norm": 0.7235446572303772, + "learning_rate": 8.231659794197084e-06, + "loss": 2.1286, + "step": 16951 + }, + { + "epoch": 0.56, + "grad_norm": 0.7931275963783264, + "learning_rate": 8.230613661713891e-06, + "loss": 2.0993, + "step": 16952 + }, + { + "epoch": 0.56, + "grad_norm": 0.7526986598968506, + "learning_rate": 8.229567549220188e-06, + "loss": 2.023, + "step": 16953 + }, + { + "epoch": 0.56, + "grad_norm": 0.7853092551231384, + "learning_rate": 8.228521456727795e-06, + "loss": 1.9906, + "step": 16954 + }, + { + "epoch": 0.56, + "grad_norm": 0.7526300549507141, + "learning_rate": 8.227475384248526e-06, + "loss": 2.0009, + "step": 16955 + }, + { + "epoch": 0.56, + "grad_norm": 0.7730827331542969, + "learning_rate": 8.2264293317942e-06, + "loss": 1.9941, + "step": 16956 + }, + { + "epoch": 0.56, + "grad_norm": 0.7484804391860962, + "learning_rate": 8.225383299376639e-06, + "loss": 2.0655, + "step": 16957 + }, + { + "epoch": 0.56, + "grad_norm": 0.7525172233581543, + "learning_rate": 8.224337287007658e-06, + "loss": 2.0637, + "step": 16958 + }, + { + "epoch": 0.56, + "grad_norm": 0.7320743203163147, + "learning_rate": 8.223291294699071e-06, + "loss": 2.026, + "step": 16959 + }, + { + "epoch": 0.56, + "grad_norm": 0.7296802401542664, + "learning_rate": 8.222245322462699e-06, + "loss": 2.0024, + "step": 16960 + }, + { + "epoch": 0.56, + "grad_norm": 0.7238101959228516, + "learning_rate": 8.221199370310357e-06, + "loss": 2.0389, + "step": 16961 + }, + { + "epoch": 0.56, + "grad_norm": 0.7531610727310181, + "learning_rate": 8.220153438253859e-06, + "loss": 2.0138, + "step": 16962 + }, + { + "epoch": 0.56, + "grad_norm": 0.7388815879821777, + "learning_rate": 8.219107526305033e-06, + "loss": 2.012, + "step": 16963 + }, + { + "epoch": 0.56, + "grad_norm": 0.7233477234840393, + "learning_rate": 8.218061634475677e-06, + "loss": 2.0401, + "step": 16964 + }, + { + "epoch": 0.56, + "grad_norm": 0.729682207107544, + "learning_rate": 8.21701576277762e-06, + "loss": 2.0812, + "step": 16965 + }, + { + "epoch": 0.56, + "grad_norm": 0.7398176193237305, + "learning_rate": 8.215969911222674e-06, + "loss": 2.0782, + "step": 16966 + }, + { + "epoch": 0.56, + "grad_norm": 0.7357902526855469, + "learning_rate": 8.214924079822658e-06, + "loss": 2.0281, + "step": 16967 + }, + { + "epoch": 0.56, + "grad_norm": 0.7481693625450134, + "learning_rate": 8.21387826858938e-06, + "loss": 2.0839, + "step": 16968 + }, + { + "epoch": 0.56, + "grad_norm": 0.7237067222595215, + "learning_rate": 8.212832477534658e-06, + "loss": 2.0631, + "step": 16969 + }, + { + "epoch": 0.56, + "grad_norm": 0.7454030513763428, + "learning_rate": 8.211786706670306e-06, + "loss": 2.1423, + "step": 16970 + }, + { + "epoch": 0.56, + "grad_norm": 0.7432722449302673, + "learning_rate": 8.210740956008146e-06, + "loss": 2.0932, + "step": 16971 + }, + { + "epoch": 0.56, + "grad_norm": 0.7561458349227905, + "learning_rate": 8.20969522555998e-06, + "loss": 2.151, + "step": 16972 + }, + { + "epoch": 0.56, + "grad_norm": 0.7348697185516357, + "learning_rate": 8.20864951533763e-06, + "loss": 2.0763, + "step": 16973 + }, + { + "epoch": 0.56, + "grad_norm": 0.7251679301261902, + "learning_rate": 8.207603825352908e-06, + "loss": 2.1079, + "step": 16974 + }, + { + "epoch": 0.56, + "grad_norm": 0.7202357053756714, + "learning_rate": 8.206558155617625e-06, + "loss": 1.9972, + "step": 16975 + }, + { + "epoch": 0.56, + "grad_norm": 0.7224113941192627, + "learning_rate": 8.205512506143603e-06, + "loss": 2.0432, + "step": 16976 + }, + { + "epoch": 0.56, + "grad_norm": 0.7455394268035889, + "learning_rate": 8.204466876942646e-06, + "loss": 2.0503, + "step": 16977 + }, + { + "epoch": 0.56, + "grad_norm": 0.7600259780883789, + "learning_rate": 8.203421268026567e-06, + "loss": 2.0265, + "step": 16978 + }, + { + "epoch": 0.56, + "grad_norm": 0.732416570186615, + "learning_rate": 8.202375679407185e-06, + "loss": 2.0655, + "step": 16979 + }, + { + "epoch": 0.56, + "grad_norm": 0.7577841281890869, + "learning_rate": 8.201330111096309e-06, + "loss": 2.102, + "step": 16980 + }, + { + "epoch": 0.56, + "grad_norm": 0.7591338157653809, + "learning_rate": 8.20028456310575e-06, + "loss": 2.1058, + "step": 16981 + }, + { + "epoch": 0.56, + "grad_norm": 0.7448843717575073, + "learning_rate": 8.199239035447322e-06, + "loss": 2.0668, + "step": 16982 + }, + { + "epoch": 0.57, + "grad_norm": 0.7480595707893372, + "learning_rate": 8.198193528132833e-06, + "loss": 2.074, + "step": 16983 + }, + { + "epoch": 0.57, + "grad_norm": 0.7089710831642151, + "learning_rate": 8.1971480411741e-06, + "loss": 2.0148, + "step": 16984 + }, + { + "epoch": 0.57, + "grad_norm": 0.707197368144989, + "learning_rate": 8.196102574582936e-06, + "loss": 1.9972, + "step": 16985 + }, + { + "epoch": 0.57, + "grad_norm": 0.7421472072601318, + "learning_rate": 8.195057128371143e-06, + "loss": 2.0908, + "step": 16986 + }, + { + "epoch": 0.57, + "grad_norm": 0.7494401931762695, + "learning_rate": 8.194011702550538e-06, + "loss": 2.0302, + "step": 16987 + }, + { + "epoch": 0.57, + "grad_norm": 0.7262296080589294, + "learning_rate": 8.19296629713293e-06, + "loss": 2.0961, + "step": 16988 + }, + { + "epoch": 0.57, + "grad_norm": 0.7473929524421692, + "learning_rate": 8.191920912130131e-06, + "loss": 2.0867, + "step": 16989 + }, + { + "epoch": 0.57, + "grad_norm": 0.7438567280769348, + "learning_rate": 8.190875547553949e-06, + "loss": 2.0854, + "step": 16990 + }, + { + "epoch": 0.57, + "grad_norm": 0.7884774804115295, + "learning_rate": 8.189830203416192e-06, + "loss": 2.0066, + "step": 16991 + }, + { + "epoch": 0.57, + "grad_norm": 0.7366700172424316, + "learning_rate": 8.188784879728675e-06, + "loss": 2.0468, + "step": 16992 + }, + { + "epoch": 0.57, + "grad_norm": 0.7352626919746399, + "learning_rate": 8.187739576503204e-06, + "loss": 2.007, + "step": 16993 + }, + { + "epoch": 0.57, + "grad_norm": 0.7432831525802612, + "learning_rate": 8.186694293751593e-06, + "loss": 2.1443, + "step": 16994 + }, + { + "epoch": 0.57, + "grad_norm": 0.7388004064559937, + "learning_rate": 8.185649031485643e-06, + "loss": 1.9779, + "step": 16995 + }, + { + "epoch": 0.57, + "grad_norm": 0.802349865436554, + "learning_rate": 8.18460378971717e-06, + "loss": 2.1357, + "step": 16996 + }, + { + "epoch": 0.57, + "grad_norm": 0.7232075929641724, + "learning_rate": 8.183558568457974e-06, + "loss": 2.1164, + "step": 16997 + }, + { + "epoch": 0.57, + "grad_norm": 0.7650680541992188, + "learning_rate": 8.182513367719878e-06, + "loss": 2.06, + "step": 16998 + }, + { + "epoch": 0.57, + "grad_norm": 0.7210886478424072, + "learning_rate": 8.181468187514674e-06, + "loss": 2.041, + "step": 16999 + }, + { + "epoch": 0.57, + "grad_norm": 0.7546193599700928, + "learning_rate": 8.180423027854177e-06, + "loss": 2.0603, + "step": 17000 + }, + { + "epoch": 0.57, + "grad_norm": 0.7500667572021484, + "learning_rate": 8.179377888750194e-06, + "loss": 2.1023, + "step": 17001 + }, + { + "epoch": 0.57, + "grad_norm": 0.7135975956916809, + "learning_rate": 8.178332770214537e-06, + "loss": 2.0534, + "step": 17002 + }, + { + "epoch": 0.57, + "grad_norm": 0.7406973242759705, + "learning_rate": 8.177287672259005e-06, + "loss": 2.1087, + "step": 17003 + }, + { + "epoch": 0.57, + "grad_norm": 0.7706504464149475, + "learning_rate": 8.17624259489541e-06, + "loss": 2.0332, + "step": 17004 + }, + { + "epoch": 0.57, + "grad_norm": 0.7311902642250061, + "learning_rate": 8.175197538135553e-06, + "loss": 2.1176, + "step": 17005 + }, + { + "epoch": 0.57, + "grad_norm": 0.7533340454101562, + "learning_rate": 8.174152501991248e-06, + "loss": 2.0477, + "step": 17006 + }, + { + "epoch": 0.57, + "grad_norm": 0.7406693696975708, + "learning_rate": 8.1731074864743e-06, + "loss": 2.0656, + "step": 17007 + }, + { + "epoch": 0.57, + "grad_norm": 0.7273715138435364, + "learning_rate": 8.172062491596513e-06, + "loss": 2.0113, + "step": 17008 + }, + { + "epoch": 0.57, + "grad_norm": 0.7499680519104004, + "learning_rate": 8.171017517369691e-06, + "loss": 2.01, + "step": 17009 + }, + { + "epoch": 0.57, + "grad_norm": 0.754675567150116, + "learning_rate": 8.169972563805641e-06, + "loss": 2.0534, + "step": 17010 + }, + { + "epoch": 0.57, + "grad_norm": 0.7410624623298645, + "learning_rate": 8.16892763091617e-06, + "loss": 2.0626, + "step": 17011 + }, + { + "epoch": 0.57, + "grad_norm": 0.7312633395195007, + "learning_rate": 8.167882718713081e-06, + "loss": 2.0401, + "step": 17012 + }, + { + "epoch": 0.57, + "grad_norm": 0.755429744720459, + "learning_rate": 8.166837827208176e-06, + "loss": 2.0671, + "step": 17013 + }, + { + "epoch": 0.57, + "grad_norm": 0.7829160094261169, + "learning_rate": 8.165792956413265e-06, + "loss": 2.053, + "step": 17014 + }, + { + "epoch": 0.57, + "grad_norm": 0.7410233020782471, + "learning_rate": 8.16474810634015e-06, + "loss": 2.0348, + "step": 17015 + }, + { + "epoch": 0.57, + "grad_norm": 0.7611442804336548, + "learning_rate": 8.163703277000639e-06, + "loss": 2.0405, + "step": 17016 + }, + { + "epoch": 0.57, + "grad_norm": 0.7449426651000977, + "learning_rate": 8.162658468406529e-06, + "loss": 2.1235, + "step": 17017 + }, + { + "epoch": 0.57, + "grad_norm": 0.7550774812698364, + "learning_rate": 8.161613680569627e-06, + "loss": 2.0518, + "step": 17018 + }, + { + "epoch": 0.57, + "grad_norm": 0.7267982959747314, + "learning_rate": 8.160568913501734e-06, + "loss": 2.0718, + "step": 17019 + }, + { + "epoch": 0.57, + "grad_norm": 0.7500813007354736, + "learning_rate": 8.15952416721466e-06, + "loss": 2.015, + "step": 17020 + }, + { + "epoch": 0.57, + "grad_norm": 0.7406695485115051, + "learning_rate": 8.1584794417202e-06, + "loss": 2.0763, + "step": 17021 + }, + { + "epoch": 0.57, + "grad_norm": 0.7630616426467896, + "learning_rate": 8.157434737030161e-06, + "loss": 2.0833, + "step": 17022 + }, + { + "epoch": 0.57, + "grad_norm": 0.7512649893760681, + "learning_rate": 8.156390053156345e-06, + "loss": 2.0402, + "step": 17023 + }, + { + "epoch": 0.57, + "grad_norm": 0.7367219924926758, + "learning_rate": 8.155345390110552e-06, + "loss": 2.0866, + "step": 17024 + }, + { + "epoch": 0.57, + "grad_norm": 0.7510017156600952, + "learning_rate": 8.154300747904589e-06, + "loss": 2.0197, + "step": 17025 + }, + { + "epoch": 0.57, + "grad_norm": 0.7582144737243652, + "learning_rate": 8.153256126550253e-06, + "loss": 2.059, + "step": 17026 + }, + { + "epoch": 0.57, + "grad_norm": 0.7426238656044006, + "learning_rate": 8.152211526059344e-06, + "loss": 2.0481, + "step": 17027 + }, + { + "epoch": 0.57, + "grad_norm": 0.7472699880599976, + "learning_rate": 8.151166946443668e-06, + "loss": 2.0574, + "step": 17028 + }, + { + "epoch": 0.57, + "grad_norm": 0.7463499307632446, + "learning_rate": 8.150122387715027e-06, + "loss": 2.0878, + "step": 17029 + }, + { + "epoch": 0.57, + "grad_norm": 0.7555307745933533, + "learning_rate": 8.149077849885217e-06, + "loss": 2.0888, + "step": 17030 + }, + { + "epoch": 0.57, + "grad_norm": 0.7193235754966736, + "learning_rate": 8.148033332966042e-06, + "loss": 2.0247, + "step": 17031 + }, + { + "epoch": 0.57, + "grad_norm": 0.75104820728302, + "learning_rate": 8.146988836969299e-06, + "loss": 2.0366, + "step": 17032 + }, + { + "epoch": 0.57, + "grad_norm": 0.7757755517959595, + "learning_rate": 8.145944361906792e-06, + "loss": 2.0643, + "step": 17033 + }, + { + "epoch": 0.57, + "grad_norm": 0.7373749613761902, + "learning_rate": 8.144899907790319e-06, + "loss": 2.0965, + "step": 17034 + }, + { + "epoch": 0.57, + "grad_norm": 0.7438095211982727, + "learning_rate": 8.143855474631677e-06, + "loss": 2.0251, + "step": 17035 + }, + { + "epoch": 0.57, + "grad_norm": 0.7282347083091736, + "learning_rate": 8.142811062442669e-06, + "loss": 2.0224, + "step": 17036 + }, + { + "epoch": 0.57, + "grad_norm": 0.7268370389938354, + "learning_rate": 8.141766671235094e-06, + "loss": 2.115, + "step": 17037 + }, + { + "epoch": 0.57, + "grad_norm": 0.7212538719177246, + "learning_rate": 8.14072230102075e-06, + "loss": 2.0119, + "step": 17038 + }, + { + "epoch": 0.57, + "grad_norm": 0.7663792967796326, + "learning_rate": 8.139677951811438e-06, + "loss": 2.1118, + "step": 17039 + }, + { + "epoch": 0.57, + "grad_norm": 0.7457291483879089, + "learning_rate": 8.138633623618952e-06, + "loss": 1.9713, + "step": 17040 + }, + { + "epoch": 0.57, + "grad_norm": 0.7398581504821777, + "learning_rate": 8.13758931645509e-06, + "loss": 2.0583, + "step": 17041 + }, + { + "epoch": 0.57, + "grad_norm": 0.7091923952102661, + "learning_rate": 8.13654503033166e-06, + "loss": 2.0689, + "step": 17042 + }, + { + "epoch": 0.57, + "grad_norm": 0.7224785089492798, + "learning_rate": 8.135500765260443e-06, + "loss": 1.9712, + "step": 17043 + }, + { + "epoch": 0.57, + "grad_norm": 0.7078338861465454, + "learning_rate": 8.13445652125325e-06, + "loss": 2.0249, + "step": 17044 + }, + { + "epoch": 0.57, + "grad_norm": 0.7576523423194885, + "learning_rate": 8.133412298321874e-06, + "loss": 2.1295, + "step": 17045 + }, + { + "epoch": 0.57, + "grad_norm": 0.7234470248222351, + "learning_rate": 8.132368096478111e-06, + "loss": 2.0369, + "step": 17046 + }, + { + "epoch": 0.57, + "grad_norm": 0.7381924390792847, + "learning_rate": 8.131323915733763e-06, + "loss": 2.0863, + "step": 17047 + }, + { + "epoch": 0.57, + "grad_norm": 0.7391882538795471, + "learning_rate": 8.130279756100616e-06, + "loss": 2.0291, + "step": 17048 + }, + { + "epoch": 0.57, + "grad_norm": 0.7242146730422974, + "learning_rate": 8.129235617590474e-06, + "loss": 1.9969, + "step": 17049 + }, + { + "epoch": 0.57, + "grad_norm": 0.7562114000320435, + "learning_rate": 8.128191500215132e-06, + "loss": 2.0169, + "step": 17050 + }, + { + "epoch": 0.57, + "grad_norm": 0.7361788153648376, + "learning_rate": 8.127147403986388e-06, + "loss": 2.1232, + "step": 17051 + }, + { + "epoch": 0.57, + "grad_norm": 0.7673287987709045, + "learning_rate": 8.126103328916034e-06, + "loss": 2.1629, + "step": 17052 + }, + { + "epoch": 0.57, + "grad_norm": 0.7415579557418823, + "learning_rate": 8.125059275015866e-06, + "loss": 2.118, + "step": 17053 + }, + { + "epoch": 0.57, + "grad_norm": 0.7548667192459106, + "learning_rate": 8.124015242297678e-06, + "loss": 2.1, + "step": 17054 + }, + { + "epoch": 0.57, + "grad_norm": 0.7153352499008179, + "learning_rate": 8.122971230773265e-06, + "loss": 2.0528, + "step": 17055 + }, + { + "epoch": 0.57, + "grad_norm": 0.7338407039642334, + "learning_rate": 8.12192724045443e-06, + "loss": 2.0356, + "step": 17056 + }, + { + "epoch": 0.57, + "grad_norm": 0.7370111346244812, + "learning_rate": 8.120883271352953e-06, + "loss": 2.0492, + "step": 17057 + }, + { + "epoch": 0.57, + "grad_norm": 0.7904776334762573, + "learning_rate": 8.119839323480639e-06, + "loss": 2.1255, + "step": 17058 + }, + { + "epoch": 0.57, + "grad_norm": 0.7730766534805298, + "learning_rate": 8.118795396849277e-06, + "loss": 2.0929, + "step": 17059 + }, + { + "epoch": 0.57, + "grad_norm": 0.7510932087898254, + "learning_rate": 8.117751491470664e-06, + "loss": 2.1155, + "step": 17060 + }, + { + "epoch": 0.57, + "grad_norm": 0.7636374831199646, + "learning_rate": 8.116707607356591e-06, + "loss": 2.0957, + "step": 17061 + }, + { + "epoch": 0.57, + "grad_norm": 0.7976534366607666, + "learning_rate": 8.115663744518852e-06, + "loss": 2.0887, + "step": 17062 + }, + { + "epoch": 0.57, + "grad_norm": 0.7607437372207642, + "learning_rate": 8.114619902969237e-06, + "loss": 2.0364, + "step": 17063 + }, + { + "epoch": 0.57, + "grad_norm": 0.7315516471862793, + "learning_rate": 8.113576082719546e-06, + "loss": 2.0186, + "step": 17064 + }, + { + "epoch": 0.57, + "grad_norm": 0.7578868865966797, + "learning_rate": 8.112532283781562e-06, + "loss": 2.0514, + "step": 17065 + }, + { + "epoch": 0.57, + "grad_norm": 0.7114315629005432, + "learning_rate": 8.111488506167085e-06, + "loss": 2.0574, + "step": 17066 + }, + { + "epoch": 0.57, + "grad_norm": 0.7378893494606018, + "learning_rate": 8.110444749887902e-06, + "loss": 2.0792, + "step": 17067 + }, + { + "epoch": 0.57, + "grad_norm": 0.718105137348175, + "learning_rate": 8.109401014955808e-06, + "loss": 2.0681, + "step": 17068 + }, + { + "epoch": 0.57, + "grad_norm": 0.7147154211997986, + "learning_rate": 8.108357301382596e-06, + "loss": 2.054, + "step": 17069 + }, + { + "epoch": 0.57, + "grad_norm": 0.737736165523529, + "learning_rate": 8.107313609180051e-06, + "loss": 2.0929, + "step": 17070 + }, + { + "epoch": 0.57, + "grad_norm": 0.713158130645752, + "learning_rate": 8.106269938359968e-06, + "loss": 1.9967, + "step": 17071 + }, + { + "epoch": 0.57, + "grad_norm": 0.7607359290122986, + "learning_rate": 8.105226288934135e-06, + "loss": 2.0181, + "step": 17072 + }, + { + "epoch": 0.57, + "grad_norm": 0.6978567838668823, + "learning_rate": 8.104182660914352e-06, + "loss": 2.0368, + "step": 17073 + }, + { + "epoch": 0.57, + "grad_norm": 0.7471758723258972, + "learning_rate": 8.103139054312398e-06, + "loss": 2.0789, + "step": 17074 + }, + { + "epoch": 0.57, + "grad_norm": 0.7648957967758179, + "learning_rate": 8.102095469140068e-06, + "loss": 1.9749, + "step": 17075 + }, + { + "epoch": 0.57, + "grad_norm": 0.8468007445335388, + "learning_rate": 8.101051905409149e-06, + "loss": 2.1233, + "step": 17076 + }, + { + "epoch": 0.57, + "grad_norm": 0.7360994219779968, + "learning_rate": 8.100008363131433e-06, + "loss": 2.0906, + "step": 17077 + }, + { + "epoch": 0.57, + "grad_norm": 0.7190072536468506, + "learning_rate": 8.098964842318714e-06, + "loss": 2.0071, + "step": 17078 + }, + { + "epoch": 0.57, + "grad_norm": 0.7156478762626648, + "learning_rate": 8.097921342982771e-06, + "loss": 2.0615, + "step": 17079 + }, + { + "epoch": 0.57, + "grad_norm": 0.762956440448761, + "learning_rate": 8.096877865135399e-06, + "loss": 2.0186, + "step": 17080 + }, + { + "epoch": 0.57, + "grad_norm": 0.7714542150497437, + "learning_rate": 8.095834408788386e-06, + "loss": 2.0781, + "step": 17081 + }, + { + "epoch": 0.57, + "grad_norm": 0.7219070792198181, + "learning_rate": 8.094790973953521e-06, + "loss": 2.1098, + "step": 17082 + }, + { + "epoch": 0.57, + "grad_norm": 0.7380785942077637, + "learning_rate": 8.09374756064259e-06, + "loss": 2.0571, + "step": 17083 + }, + { + "epoch": 0.57, + "grad_norm": 0.7403053045272827, + "learning_rate": 8.092704168867379e-06, + "loss": 2.1219, + "step": 17084 + }, + { + "epoch": 0.57, + "grad_norm": 0.7409008145332336, + "learning_rate": 8.091660798639681e-06, + "loss": 2.0964, + "step": 17085 + }, + { + "epoch": 0.57, + "grad_norm": 0.7817615866661072, + "learning_rate": 8.090617449971286e-06, + "loss": 2.0765, + "step": 17086 + }, + { + "epoch": 0.57, + "grad_norm": 0.7500510811805725, + "learning_rate": 8.089574122873969e-06, + "loss": 2.1222, + "step": 17087 + }, + { + "epoch": 0.57, + "grad_norm": 0.7383649349212646, + "learning_rate": 8.088530817359526e-06, + "loss": 2.0912, + "step": 17088 + }, + { + "epoch": 0.57, + "grad_norm": 0.7605384588241577, + "learning_rate": 8.08748753343974e-06, + "loss": 2.1023, + "step": 17089 + }, + { + "epoch": 0.57, + "grad_norm": 0.7343368530273438, + "learning_rate": 8.0864442711264e-06, + "loss": 2.0206, + "step": 17090 + }, + { + "epoch": 0.57, + "grad_norm": 0.7432597279548645, + "learning_rate": 8.085401030431295e-06, + "loss": 2.1033, + "step": 17091 + }, + { + "epoch": 0.57, + "grad_norm": 0.7041411399841309, + "learning_rate": 8.084357811366202e-06, + "loss": 2.0642, + "step": 17092 + }, + { + "epoch": 0.57, + "grad_norm": 0.7271472215652466, + "learning_rate": 8.083314613942913e-06, + "loss": 2.0897, + "step": 17093 + }, + { + "epoch": 0.57, + "grad_norm": 0.7213118672370911, + "learning_rate": 8.082271438173214e-06, + "loss": 2.0745, + "step": 17094 + }, + { + "epoch": 0.57, + "grad_norm": 0.7546001076698303, + "learning_rate": 8.081228284068889e-06, + "loss": 2.0743, + "step": 17095 + }, + { + "epoch": 0.57, + "grad_norm": 0.767673134803772, + "learning_rate": 8.08018515164172e-06, + "loss": 2.0704, + "step": 17096 + }, + { + "epoch": 0.57, + "grad_norm": 0.7117115259170532, + "learning_rate": 8.079142040903496e-06, + "loss": 2.048, + "step": 17097 + }, + { + "epoch": 0.57, + "grad_norm": 0.7848190665245056, + "learning_rate": 8.078098951865995e-06, + "loss": 2.0523, + "step": 17098 + }, + { + "epoch": 0.57, + "grad_norm": 0.7393033504486084, + "learning_rate": 8.077055884541009e-06, + "loss": 2.0668, + "step": 17099 + }, + { + "epoch": 0.57, + "grad_norm": 0.7572908401489258, + "learning_rate": 8.07601283894032e-06, + "loss": 2.0313, + "step": 17100 + }, + { + "epoch": 0.57, + "grad_norm": 0.7229450345039368, + "learning_rate": 8.074969815075709e-06, + "loss": 2.051, + "step": 17101 + }, + { + "epoch": 0.57, + "grad_norm": 0.7746517658233643, + "learning_rate": 8.073926812958962e-06, + "loss": 1.9956, + "step": 17102 + }, + { + "epoch": 0.57, + "grad_norm": 0.7412286400794983, + "learning_rate": 8.072883832601858e-06, + "loss": 2.0055, + "step": 17103 + }, + { + "epoch": 0.57, + "grad_norm": 0.7535357475280762, + "learning_rate": 8.071840874016187e-06, + "loss": 2.1502, + "step": 17104 + }, + { + "epoch": 0.57, + "grad_norm": 0.7136909365653992, + "learning_rate": 8.070797937213725e-06, + "loss": 2.1318, + "step": 17105 + }, + { + "epoch": 0.57, + "grad_norm": 0.7250989079475403, + "learning_rate": 8.069755022206257e-06, + "loss": 2.0549, + "step": 17106 + }, + { + "epoch": 0.57, + "grad_norm": 0.7634677886962891, + "learning_rate": 8.068712129005566e-06, + "loss": 2.1405, + "step": 17107 + }, + { + "epoch": 0.57, + "grad_norm": 0.7400010228157043, + "learning_rate": 8.067669257623434e-06, + "loss": 2.0274, + "step": 17108 + }, + { + "epoch": 0.57, + "grad_norm": 0.7430810928344727, + "learning_rate": 8.066626408071644e-06, + "loss": 2.1448, + "step": 17109 + }, + { + "epoch": 0.57, + "grad_norm": 0.7226834893226624, + "learning_rate": 8.065583580361972e-06, + "loss": 2.0399, + "step": 17110 + }, + { + "epoch": 0.57, + "grad_norm": 0.7559519410133362, + "learning_rate": 8.064540774506207e-06, + "loss": 2.0093, + "step": 17111 + }, + { + "epoch": 0.57, + "grad_norm": 0.7325488328933716, + "learning_rate": 8.063497990516122e-06, + "loss": 2.0436, + "step": 17112 + }, + { + "epoch": 0.57, + "grad_norm": 0.7469612956047058, + "learning_rate": 8.062455228403508e-06, + "loss": 2.0521, + "step": 17113 + }, + { + "epoch": 0.57, + "grad_norm": 0.7206214666366577, + "learning_rate": 8.061412488180133e-06, + "loss": 2.062, + "step": 17114 + }, + { + "epoch": 0.57, + "grad_norm": 0.7417789101600647, + "learning_rate": 8.060369769857785e-06, + "loss": 1.9739, + "step": 17115 + }, + { + "epoch": 0.57, + "grad_norm": 0.767518937587738, + "learning_rate": 8.059327073448244e-06, + "loss": 2.0277, + "step": 17116 + }, + { + "epoch": 0.57, + "grad_norm": 0.7396699786186218, + "learning_rate": 8.058284398963289e-06, + "loss": 2.0453, + "step": 17117 + }, + { + "epoch": 0.57, + "grad_norm": 0.736834704875946, + "learning_rate": 8.057241746414698e-06, + "loss": 2.0229, + "step": 17118 + }, + { + "epoch": 0.57, + "grad_norm": 0.7253525257110596, + "learning_rate": 8.05619911581425e-06, + "loss": 2.0052, + "step": 17119 + }, + { + "epoch": 0.57, + "grad_norm": 0.7488128542900085, + "learning_rate": 8.055156507173725e-06, + "loss": 2.0485, + "step": 17120 + }, + { + "epoch": 0.57, + "grad_norm": 0.7512637376785278, + "learning_rate": 8.054113920504902e-06, + "loss": 2.0382, + "step": 17121 + }, + { + "epoch": 0.57, + "grad_norm": 0.7473495602607727, + "learning_rate": 8.053071355819564e-06, + "loss": 1.9607, + "step": 17122 + }, + { + "epoch": 0.57, + "grad_norm": 0.7282321453094482, + "learning_rate": 8.05202881312948e-06, + "loss": 2.0532, + "step": 17123 + }, + { + "epoch": 0.57, + "grad_norm": 0.7317062020301819, + "learning_rate": 8.050986292446434e-06, + "loss": 2.0507, + "step": 17124 + }, + { + "epoch": 0.57, + "grad_norm": 0.734809398651123, + "learning_rate": 8.049943793782203e-06, + "loss": 2.0529, + "step": 17125 + }, + { + "epoch": 0.57, + "grad_norm": 0.7382770776748657, + "learning_rate": 8.048901317148566e-06, + "loss": 2.0896, + "step": 17126 + }, + { + "epoch": 0.57, + "grad_norm": 0.7528815269470215, + "learning_rate": 8.047858862557297e-06, + "loss": 2.0448, + "step": 17127 + }, + { + "epoch": 0.57, + "grad_norm": 0.7539359927177429, + "learning_rate": 8.046816430020172e-06, + "loss": 2.0081, + "step": 17128 + }, + { + "epoch": 0.57, + "grad_norm": 0.7364723086357117, + "learning_rate": 8.045774019548972e-06, + "loss": 2.1004, + "step": 17129 + }, + { + "epoch": 0.57, + "grad_norm": 0.7571067214012146, + "learning_rate": 8.044731631155473e-06, + "loss": 2.0018, + "step": 17130 + }, + { + "epoch": 0.57, + "grad_norm": 0.7279937267303467, + "learning_rate": 8.043689264851452e-06, + "loss": 2.0455, + "step": 17131 + }, + { + "epoch": 0.57, + "grad_norm": 0.7200897932052612, + "learning_rate": 8.04264692064868e-06, + "loss": 2.063, + "step": 17132 + }, + { + "epoch": 0.57, + "grad_norm": 0.7754957675933838, + "learning_rate": 8.041604598558936e-06, + "loss": 2.0831, + "step": 17133 + }, + { + "epoch": 0.57, + "grad_norm": 0.7384926080703735, + "learning_rate": 8.040562298593993e-06, + "loss": 2.0533, + "step": 17134 + }, + { + "epoch": 0.57, + "grad_norm": 0.7484129667282104, + "learning_rate": 8.039520020765636e-06, + "loss": 2.0111, + "step": 17135 + }, + { + "epoch": 0.57, + "grad_norm": 0.7823920249938965, + "learning_rate": 8.038477765085626e-06, + "loss": 1.9974, + "step": 17136 + }, + { + "epoch": 0.57, + "grad_norm": 0.7557188272476196, + "learning_rate": 8.037435531565747e-06, + "loss": 2.1407, + "step": 17137 + }, + { + "epoch": 0.57, + "grad_norm": 0.7303891181945801, + "learning_rate": 8.03639332021777e-06, + "loss": 1.9987, + "step": 17138 + }, + { + "epoch": 0.57, + "grad_norm": 0.7495269179344177, + "learning_rate": 8.035351131053473e-06, + "loss": 2.053, + "step": 17139 + }, + { + "epoch": 0.57, + "grad_norm": 0.7523943185806274, + "learning_rate": 8.034308964084629e-06, + "loss": 2.125, + "step": 17140 + }, + { + "epoch": 0.57, + "grad_norm": 0.7601137757301331, + "learning_rate": 8.033266819323008e-06, + "loss": 1.9618, + "step": 17141 + }, + { + "epoch": 0.57, + "grad_norm": 0.7799620032310486, + "learning_rate": 8.032224696780383e-06, + "loss": 2.0252, + "step": 17142 + }, + { + "epoch": 0.57, + "grad_norm": 0.722940981388092, + "learning_rate": 8.031182596468532e-06, + "loss": 2.0193, + "step": 17143 + }, + { + "epoch": 0.57, + "grad_norm": 0.780859649181366, + "learning_rate": 8.03014051839923e-06, + "loss": 2.0658, + "step": 17144 + }, + { + "epoch": 0.57, + "grad_norm": 0.746974766254425, + "learning_rate": 8.029098462584242e-06, + "loss": 2.0824, + "step": 17145 + }, + { + "epoch": 0.57, + "grad_norm": 0.7359817624092102, + "learning_rate": 8.028056429035345e-06, + "loss": 2.1424, + "step": 17146 + }, + { + "epoch": 0.57, + "grad_norm": 0.7404485940933228, + "learning_rate": 8.027014417764311e-06, + "loss": 2.0712, + "step": 17147 + }, + { + "epoch": 0.57, + "grad_norm": 0.7576310038566589, + "learning_rate": 8.025972428782915e-06, + "loss": 2.0632, + "step": 17148 + }, + { + "epoch": 0.57, + "grad_norm": 0.750542938709259, + "learning_rate": 8.024930462102923e-06, + "loss": 2.0515, + "step": 17149 + }, + { + "epoch": 0.57, + "grad_norm": 0.7453352212905884, + "learning_rate": 8.023888517736106e-06, + "loss": 2.06, + "step": 17150 + }, + { + "epoch": 0.57, + "grad_norm": 0.7599689960479736, + "learning_rate": 8.022846595694241e-06, + "loss": 2.0369, + "step": 17151 + }, + { + "epoch": 0.57, + "grad_norm": 0.7303507328033447, + "learning_rate": 8.021804695989098e-06, + "loss": 2.0151, + "step": 17152 + }, + { + "epoch": 0.57, + "grad_norm": 0.7523336410522461, + "learning_rate": 8.020762818632446e-06, + "loss": 2.0253, + "step": 17153 + }, + { + "epoch": 0.57, + "grad_norm": 0.7349201440811157, + "learning_rate": 8.019720963636054e-06, + "loss": 2.0185, + "step": 17154 + }, + { + "epoch": 0.57, + "grad_norm": 0.7671202421188354, + "learning_rate": 8.018679131011695e-06, + "loss": 2.1099, + "step": 17155 + }, + { + "epoch": 0.57, + "grad_norm": 0.7543513774871826, + "learning_rate": 8.017637320771134e-06, + "loss": 2.0842, + "step": 17156 + }, + { + "epoch": 0.57, + "grad_norm": 0.7141208648681641, + "learning_rate": 8.016595532926154e-06, + "loss": 2.0703, + "step": 17157 + }, + { + "epoch": 0.57, + "grad_norm": 0.7708291411399841, + "learning_rate": 8.015553767488506e-06, + "loss": 2.0913, + "step": 17158 + }, + { + "epoch": 0.57, + "grad_norm": 0.7589324712753296, + "learning_rate": 8.014512024469973e-06, + "loss": 2.0336, + "step": 17159 + }, + { + "epoch": 0.57, + "grad_norm": 0.7440407276153564, + "learning_rate": 8.013470303882318e-06, + "loss": 2.0642, + "step": 17160 + }, + { + "epoch": 0.57, + "grad_norm": 0.7379841804504395, + "learning_rate": 8.012428605737313e-06, + "loss": 2.1036, + "step": 17161 + }, + { + "epoch": 0.57, + "grad_norm": 0.717802882194519, + "learning_rate": 8.011386930046726e-06, + "loss": 2.0691, + "step": 17162 + }, + { + "epoch": 0.57, + "grad_norm": 0.721459150314331, + "learning_rate": 8.01034527682232e-06, + "loss": 2.036, + "step": 17163 + }, + { + "epoch": 0.57, + "grad_norm": 0.7595359683036804, + "learning_rate": 8.009303646075867e-06, + "loss": 2.0756, + "step": 17164 + }, + { + "epoch": 0.57, + "grad_norm": 0.7532103657722473, + "learning_rate": 8.008262037819137e-06, + "loss": 2.1036, + "step": 17165 + }, + { + "epoch": 0.57, + "grad_norm": 0.7328066825866699, + "learning_rate": 8.007220452063896e-06, + "loss": 2.0697, + "step": 17166 + }, + { + "epoch": 0.57, + "grad_norm": 0.7460857033729553, + "learning_rate": 8.00617888882191e-06, + "loss": 2.141, + "step": 17167 + }, + { + "epoch": 0.57, + "grad_norm": 0.7490686774253845, + "learning_rate": 8.005137348104946e-06, + "loss": 2.0613, + "step": 17168 + }, + { + "epoch": 0.57, + "grad_norm": 0.7344531416893005, + "learning_rate": 8.004095829924769e-06, + "loss": 1.9214, + "step": 17169 + }, + { + "epoch": 0.57, + "grad_norm": 0.724797248840332, + "learning_rate": 8.003054334293147e-06, + "loss": 2.099, + "step": 17170 + }, + { + "epoch": 0.57, + "grad_norm": 0.7411088943481445, + "learning_rate": 8.002012861221855e-06, + "loss": 2.0321, + "step": 17171 + }, + { + "epoch": 0.57, + "grad_norm": 0.7376947999000549, + "learning_rate": 8.000971410722641e-06, + "loss": 2.0815, + "step": 17172 + }, + { + "epoch": 0.57, + "grad_norm": 0.7443318963050842, + "learning_rate": 7.999929982807286e-06, + "loss": 2.0821, + "step": 17173 + }, + { + "epoch": 0.57, + "grad_norm": 0.7186601161956787, + "learning_rate": 7.998888577487547e-06, + "loss": 2.0369, + "step": 17174 + }, + { + "epoch": 0.57, + "grad_norm": 0.746322512626648, + "learning_rate": 7.997847194775198e-06, + "loss": 2.0972, + "step": 17175 + }, + { + "epoch": 0.57, + "grad_norm": 0.7303517460823059, + "learning_rate": 7.99680583468199e-06, + "loss": 2.0411, + "step": 17176 + }, + { + "epoch": 0.57, + "grad_norm": 0.7721924185752869, + "learning_rate": 7.995764497219701e-06, + "loss": 2.0556, + "step": 17177 + }, + { + "epoch": 0.57, + "grad_norm": 0.731717586517334, + "learning_rate": 7.994723182400086e-06, + "loss": 2.1038, + "step": 17178 + }, + { + "epoch": 0.57, + "grad_norm": 0.7152645587921143, + "learning_rate": 7.993681890234918e-06, + "loss": 2.0472, + "step": 17179 + }, + { + "epoch": 0.57, + "grad_norm": 0.7476188540458679, + "learning_rate": 7.992640620735952e-06, + "loss": 2.0789, + "step": 17180 + }, + { + "epoch": 0.57, + "grad_norm": 0.7399317622184753, + "learning_rate": 7.991599373914958e-06, + "loss": 2.0509, + "step": 17181 + }, + { + "epoch": 0.57, + "grad_norm": 0.7329614162445068, + "learning_rate": 7.990558149783695e-06, + "loss": 2.0133, + "step": 17182 + }, + { + "epoch": 0.57, + "grad_norm": 0.7275972962379456, + "learning_rate": 7.98951694835393e-06, + "loss": 2.1893, + "step": 17183 + }, + { + "epoch": 0.57, + "grad_norm": 0.7227086424827576, + "learning_rate": 7.988475769637429e-06, + "loss": 2.0503, + "step": 17184 + }, + { + "epoch": 0.57, + "grad_norm": 0.7978876233100891, + "learning_rate": 7.987434613645941e-06, + "loss": 2.0, + "step": 17185 + }, + { + "epoch": 0.57, + "grad_norm": 0.7346686720848083, + "learning_rate": 7.986393480391242e-06, + "loss": 2.0646, + "step": 17186 + }, + { + "epoch": 0.57, + "grad_norm": 0.7568705081939697, + "learning_rate": 7.985352369885089e-06, + "loss": 2.0996, + "step": 17187 + }, + { + "epoch": 0.57, + "grad_norm": 0.7378888130187988, + "learning_rate": 7.984311282139247e-06, + "loss": 2.03, + "step": 17188 + }, + { + "epoch": 0.57, + "grad_norm": 0.7622794508934021, + "learning_rate": 7.983270217165471e-06, + "loss": 2.0184, + "step": 17189 + }, + { + "epoch": 0.57, + "grad_norm": 0.7497325539588928, + "learning_rate": 7.982229174975527e-06, + "loss": 2.0778, + "step": 17190 + }, + { + "epoch": 0.57, + "grad_norm": 0.7458800077438354, + "learning_rate": 7.981188155581173e-06, + "loss": 2.0618, + "step": 17191 + }, + { + "epoch": 0.57, + "grad_norm": 0.7652097344398499, + "learning_rate": 7.980147158994175e-06, + "loss": 2.0157, + "step": 17192 + }, + { + "epoch": 0.57, + "grad_norm": 0.7112547755241394, + "learning_rate": 7.979106185226291e-06, + "loss": 2.0664, + "step": 17193 + }, + { + "epoch": 0.57, + "grad_norm": 0.7529163360595703, + "learning_rate": 7.978065234289281e-06, + "loss": 2.0998, + "step": 17194 + }, + { + "epoch": 0.57, + "grad_norm": 0.7317906022071838, + "learning_rate": 7.977024306194903e-06, + "loss": 1.9694, + "step": 17195 + }, + { + "epoch": 0.57, + "grad_norm": 0.7348636984825134, + "learning_rate": 7.97598340095492e-06, + "loss": 2.0807, + "step": 17196 + }, + { + "epoch": 0.57, + "grad_norm": 0.7501640915870667, + "learning_rate": 7.974942518581092e-06, + "loss": 2.0142, + "step": 17197 + }, + { + "epoch": 0.57, + "grad_norm": 0.7218626141548157, + "learning_rate": 7.973901659085175e-06, + "loss": 2.0152, + "step": 17198 + }, + { + "epoch": 0.57, + "grad_norm": 0.7464247941970825, + "learning_rate": 7.972860822478928e-06, + "loss": 2.0519, + "step": 17199 + }, + { + "epoch": 0.57, + "grad_norm": 0.7218948602676392, + "learning_rate": 7.971820008774111e-06, + "loss": 2.0467, + "step": 17200 + }, + { + "epoch": 0.57, + "grad_norm": 0.7223109602928162, + "learning_rate": 7.970779217982487e-06, + "loss": 2.0423, + "step": 17201 + }, + { + "epoch": 0.57, + "grad_norm": 0.7714511156082153, + "learning_rate": 7.969738450115809e-06, + "loss": 2.0376, + "step": 17202 + }, + { + "epoch": 0.57, + "grad_norm": 0.7560158967971802, + "learning_rate": 7.968697705185836e-06, + "loss": 2.0203, + "step": 17203 + }, + { + "epoch": 0.57, + "grad_norm": 0.7776143550872803, + "learning_rate": 7.967656983204323e-06, + "loss": 1.9891, + "step": 17204 + }, + { + "epoch": 0.57, + "grad_norm": 0.7590973377227783, + "learning_rate": 7.966616284183031e-06, + "loss": 2.0832, + "step": 17205 + }, + { + "epoch": 0.57, + "grad_norm": 0.7555524706840515, + "learning_rate": 7.965575608133722e-06, + "loss": 2.0148, + "step": 17206 + }, + { + "epoch": 0.57, + "grad_norm": 0.7567825317382812, + "learning_rate": 7.96453495506814e-06, + "loss": 2.0232, + "step": 17207 + }, + { + "epoch": 0.57, + "grad_norm": 0.7442529201507568, + "learning_rate": 7.963494324998054e-06, + "loss": 2.0822, + "step": 17208 + }, + { + "epoch": 0.57, + "grad_norm": 0.7443017363548279, + "learning_rate": 7.962453717935214e-06, + "loss": 2.0261, + "step": 17209 + }, + { + "epoch": 0.57, + "grad_norm": 0.7546848058700562, + "learning_rate": 7.96141313389138e-06, + "loss": 2.0079, + "step": 17210 + }, + { + "epoch": 0.57, + "grad_norm": 0.7328857779502869, + "learning_rate": 7.960372572878304e-06, + "loss": 2.0722, + "step": 17211 + }, + { + "epoch": 0.57, + "grad_norm": 0.7243262529373169, + "learning_rate": 7.95933203490774e-06, + "loss": 2.0246, + "step": 17212 + }, + { + "epoch": 0.57, + "grad_norm": 0.7621217370033264, + "learning_rate": 7.95829151999145e-06, + "loss": 2.103, + "step": 17213 + }, + { + "epoch": 0.57, + "grad_norm": 0.7275574207305908, + "learning_rate": 7.957251028141184e-06, + "loss": 2.0062, + "step": 17214 + }, + { + "epoch": 0.57, + "grad_norm": 0.7500011324882507, + "learning_rate": 7.956210559368702e-06, + "loss": 2.0819, + "step": 17215 + }, + { + "epoch": 0.57, + "grad_norm": 0.7568040490150452, + "learning_rate": 7.955170113685754e-06, + "loss": 2.1076, + "step": 17216 + }, + { + "epoch": 0.57, + "grad_norm": 0.7476917505264282, + "learning_rate": 7.954129691104095e-06, + "loss": 2.0371, + "step": 17217 + }, + { + "epoch": 0.57, + "grad_norm": 0.7331631183624268, + "learning_rate": 7.95308929163548e-06, + "loss": 2.0589, + "step": 17218 + }, + { + "epoch": 0.57, + "grad_norm": 0.751981794834137, + "learning_rate": 7.952048915291664e-06, + "loss": 1.9777, + "step": 17219 + }, + { + "epoch": 0.57, + "grad_norm": 0.7637856006622314, + "learning_rate": 7.951008562084398e-06, + "loss": 2.0161, + "step": 17220 + }, + { + "epoch": 0.57, + "grad_norm": 0.7636890411376953, + "learning_rate": 7.949968232025434e-06, + "loss": 2.0182, + "step": 17221 + }, + { + "epoch": 0.57, + "grad_norm": 0.7305976748466492, + "learning_rate": 7.948927925126529e-06, + "loss": 2.0826, + "step": 17222 + }, + { + "epoch": 0.57, + "grad_norm": 0.7499392628669739, + "learning_rate": 7.947887641399435e-06, + "loss": 2.0448, + "step": 17223 + }, + { + "epoch": 0.57, + "grad_norm": 0.7591826915740967, + "learning_rate": 7.946847380855905e-06, + "loss": 2.0338, + "step": 17224 + }, + { + "epoch": 0.57, + "grad_norm": 0.7297161817550659, + "learning_rate": 7.945807143507688e-06, + "loss": 2.0229, + "step": 17225 + }, + { + "epoch": 0.57, + "grad_norm": 0.7471473217010498, + "learning_rate": 7.94476692936654e-06, + "loss": 2.0102, + "step": 17226 + }, + { + "epoch": 0.57, + "grad_norm": 0.7940963506698608, + "learning_rate": 7.943726738444207e-06, + "loss": 2.0328, + "step": 17227 + }, + { + "epoch": 0.57, + "grad_norm": 0.7770169377326965, + "learning_rate": 7.942686570752451e-06, + "loss": 2.0563, + "step": 17228 + }, + { + "epoch": 0.57, + "grad_norm": 0.7700167894363403, + "learning_rate": 7.94164642630301e-06, + "loss": 2.0926, + "step": 17229 + }, + { + "epoch": 0.57, + "grad_norm": 0.7342116236686707, + "learning_rate": 7.940606305107642e-06, + "loss": 2.0641, + "step": 17230 + }, + { + "epoch": 0.57, + "grad_norm": 0.7330238223075867, + "learning_rate": 7.939566207178099e-06, + "loss": 1.9786, + "step": 17231 + }, + { + "epoch": 0.57, + "grad_norm": 0.7703621983528137, + "learning_rate": 7.93852613252613e-06, + "loss": 2.1314, + "step": 17232 + }, + { + "epoch": 0.57, + "grad_norm": 0.7340764999389648, + "learning_rate": 7.937486081163483e-06, + "loss": 2.0845, + "step": 17233 + }, + { + "epoch": 0.57, + "grad_norm": 0.7359911203384399, + "learning_rate": 7.93644605310191e-06, + "loss": 2.0511, + "step": 17234 + }, + { + "epoch": 0.57, + "grad_norm": 0.7481018304824829, + "learning_rate": 7.935406048353159e-06, + "loss": 2.0518, + "step": 17235 + }, + { + "epoch": 0.57, + "grad_norm": 0.7372741103172302, + "learning_rate": 7.934366066928981e-06, + "loss": 2.0256, + "step": 17236 + }, + { + "epoch": 0.57, + "grad_norm": 0.7193230390548706, + "learning_rate": 7.933326108841127e-06, + "loss": 1.97, + "step": 17237 + }, + { + "epoch": 0.57, + "grad_norm": 0.7305843830108643, + "learning_rate": 7.932286174101343e-06, + "loss": 1.9734, + "step": 17238 + }, + { + "epoch": 0.57, + "grad_norm": 0.7576166391372681, + "learning_rate": 7.931246262721375e-06, + "loss": 2.0774, + "step": 17239 + }, + { + "epoch": 0.57, + "grad_norm": 0.7921248078346252, + "learning_rate": 7.930206374712977e-06, + "loss": 2.0521, + "step": 17240 + }, + { + "epoch": 0.57, + "grad_norm": 0.7179931402206421, + "learning_rate": 7.929166510087895e-06, + "loss": 2.0685, + "step": 17241 + }, + { + "epoch": 0.57, + "grad_norm": 0.7550375461578369, + "learning_rate": 7.928126668857873e-06, + "loss": 2.0745, + "step": 17242 + }, + { + "epoch": 0.57, + "grad_norm": 0.7656317949295044, + "learning_rate": 7.927086851034662e-06, + "loss": 2.1207, + "step": 17243 + }, + { + "epoch": 0.57, + "grad_norm": 0.7728816866874695, + "learning_rate": 7.92604705663001e-06, + "loss": 2.0636, + "step": 17244 + }, + { + "epoch": 0.57, + "grad_norm": 0.7134026288986206, + "learning_rate": 7.925007285655663e-06, + "loss": 2.1002, + "step": 17245 + }, + { + "epoch": 0.57, + "grad_norm": 0.7521299719810486, + "learning_rate": 7.923967538123369e-06, + "loss": 2.117, + "step": 17246 + }, + { + "epoch": 0.57, + "grad_norm": 0.741278350353241, + "learning_rate": 7.922927814044872e-06, + "loss": 1.9849, + "step": 17247 + }, + { + "epoch": 0.57, + "grad_norm": 0.787230908870697, + "learning_rate": 7.921888113431918e-06, + "loss": 2.1252, + "step": 17248 + }, + { + "epoch": 0.57, + "grad_norm": 0.769725501537323, + "learning_rate": 7.920848436296253e-06, + "loss": 2.0438, + "step": 17249 + }, + { + "epoch": 0.57, + "grad_norm": 0.7424687743186951, + "learning_rate": 7.919808782649631e-06, + "loss": 2.0882, + "step": 17250 + }, + { + "epoch": 0.57, + "grad_norm": 0.708843469619751, + "learning_rate": 7.918769152503782e-06, + "loss": 1.9519, + "step": 17251 + }, + { + "epoch": 0.57, + "grad_norm": 0.7164061069488525, + "learning_rate": 7.917729545870462e-06, + "loss": 2.0212, + "step": 17252 + }, + { + "epoch": 0.57, + "grad_norm": 0.7577266693115234, + "learning_rate": 7.916689962761415e-06, + "loss": 2.0743, + "step": 17253 + }, + { + "epoch": 0.57, + "grad_norm": 0.7395599484443665, + "learning_rate": 7.915650403188382e-06, + "loss": 2.0556, + "step": 17254 + }, + { + "epoch": 0.57, + "grad_norm": 0.7140969038009644, + "learning_rate": 7.914610867163113e-06, + "loss": 2.0319, + "step": 17255 + }, + { + "epoch": 0.57, + "grad_norm": 0.7289227247238159, + "learning_rate": 7.913571354697344e-06, + "loss": 2.0617, + "step": 17256 + }, + { + "epoch": 0.57, + "grad_norm": 0.7285024523735046, + "learning_rate": 7.912531865802822e-06, + "loss": 2.045, + "step": 17257 + }, + { + "epoch": 0.57, + "grad_norm": 0.7389302849769592, + "learning_rate": 7.911492400491294e-06, + "loss": 2.1072, + "step": 17258 + }, + { + "epoch": 0.57, + "grad_norm": 0.7558693289756775, + "learning_rate": 7.910452958774503e-06, + "loss": 2.0727, + "step": 17259 + }, + { + "epoch": 0.57, + "grad_norm": 0.7440516948699951, + "learning_rate": 7.909413540664188e-06, + "loss": 2.1013, + "step": 17260 + }, + { + "epoch": 0.57, + "grad_norm": 0.7608960270881653, + "learning_rate": 7.908374146172094e-06, + "loss": 2.1101, + "step": 17261 + }, + { + "epoch": 0.57, + "grad_norm": 0.7507086396217346, + "learning_rate": 7.90733477530996e-06, + "loss": 2.0097, + "step": 17262 + }, + { + "epoch": 0.57, + "grad_norm": 0.721200168132782, + "learning_rate": 7.906295428089537e-06, + "loss": 2.0022, + "step": 17263 + }, + { + "epoch": 0.57, + "grad_norm": 0.7818373441696167, + "learning_rate": 7.905256104522558e-06, + "loss": 2.0541, + "step": 17264 + }, + { + "epoch": 0.57, + "grad_norm": 0.7486129999160767, + "learning_rate": 7.904216804620764e-06, + "loss": 2.0303, + "step": 17265 + }, + { + "epoch": 0.57, + "grad_norm": 0.722420871257782, + "learning_rate": 7.903177528395905e-06, + "loss": 2.0451, + "step": 17266 + }, + { + "epoch": 0.57, + "grad_norm": 0.7076942324638367, + "learning_rate": 7.902138275859716e-06, + "loss": 2.0768, + "step": 17267 + }, + { + "epoch": 0.57, + "grad_norm": 0.7330958843231201, + "learning_rate": 7.90109904702394e-06, + "loss": 2.0251, + "step": 17268 + }, + { + "epoch": 0.57, + "grad_norm": 0.7436361312866211, + "learning_rate": 7.900059841900318e-06, + "loss": 2.1124, + "step": 17269 + }, + { + "epoch": 0.57, + "grad_norm": 0.7239755988121033, + "learning_rate": 7.899020660500588e-06, + "loss": 2.0941, + "step": 17270 + }, + { + "epoch": 0.57, + "grad_norm": 0.7130014300346375, + "learning_rate": 7.897981502836489e-06, + "loss": 2.0563, + "step": 17271 + }, + { + "epoch": 0.57, + "grad_norm": 0.753129243850708, + "learning_rate": 7.89694236891977e-06, + "loss": 2.1575, + "step": 17272 + }, + { + "epoch": 0.57, + "grad_norm": 0.7116495370864868, + "learning_rate": 7.895903258762157e-06, + "loss": 2.0237, + "step": 17273 + }, + { + "epoch": 0.57, + "grad_norm": 0.7680467963218689, + "learning_rate": 7.894864172375395e-06, + "loss": 2.0488, + "step": 17274 + }, + { + "epoch": 0.57, + "grad_norm": 0.7454050183296204, + "learning_rate": 7.893825109771229e-06, + "loss": 2.0709, + "step": 17275 + }, + { + "epoch": 0.57, + "grad_norm": 0.755456805229187, + "learning_rate": 7.892786070961386e-06, + "loss": 2.0577, + "step": 17276 + }, + { + "epoch": 0.57, + "grad_norm": 0.7581737637519836, + "learning_rate": 7.89174705595762e-06, + "loss": 2.0711, + "step": 17277 + }, + { + "epoch": 0.57, + "grad_norm": 0.714931309223175, + "learning_rate": 7.890708064771655e-06, + "loss": 2.0207, + "step": 17278 + }, + { + "epoch": 0.57, + "grad_norm": 0.7360647320747375, + "learning_rate": 7.889669097415232e-06, + "loss": 1.9334, + "step": 17279 + }, + { + "epoch": 0.57, + "grad_norm": 0.7530723810195923, + "learning_rate": 7.888630153900093e-06, + "loss": 2.0351, + "step": 17280 + }, + { + "epoch": 0.57, + "grad_norm": 0.7844183444976807, + "learning_rate": 7.887591234237975e-06, + "loss": 2.1454, + "step": 17281 + }, + { + "epoch": 0.57, + "grad_norm": 0.741177499294281, + "learning_rate": 7.886552338440612e-06, + "loss": 2.0692, + "step": 17282 + }, + { + "epoch": 0.58, + "grad_norm": 0.7305881381034851, + "learning_rate": 7.885513466519742e-06, + "loss": 2.0769, + "step": 17283 + }, + { + "epoch": 0.58, + "grad_norm": 0.747803270816803, + "learning_rate": 7.8844746184871e-06, + "loss": 2.1061, + "step": 17284 + }, + { + "epoch": 0.58, + "grad_norm": 0.7842484712600708, + "learning_rate": 7.883435794354424e-06, + "loss": 2.0401, + "step": 17285 + }, + { + "epoch": 0.58, + "grad_norm": 0.7240668535232544, + "learning_rate": 7.882396994133456e-06, + "loss": 1.9817, + "step": 17286 + }, + { + "epoch": 0.58, + "grad_norm": 0.7215327620506287, + "learning_rate": 7.881358217835919e-06, + "loss": 2.0435, + "step": 17287 + }, + { + "epoch": 0.58, + "grad_norm": 0.7056887149810791, + "learning_rate": 7.880319465473556e-06, + "loss": 2.0218, + "step": 17288 + }, + { + "epoch": 0.58, + "grad_norm": 0.7830666899681091, + "learning_rate": 7.879280737058102e-06, + "loss": 2.0978, + "step": 17289 + }, + { + "epoch": 0.58, + "grad_norm": 0.7306040525436401, + "learning_rate": 7.878242032601294e-06, + "loss": 2.0696, + "step": 17290 + }, + { + "epoch": 0.58, + "grad_norm": 0.7575163245201111, + "learning_rate": 7.877203352114862e-06, + "loss": 2.087, + "step": 17291 + }, + { + "epoch": 0.58, + "grad_norm": 0.7359973192214966, + "learning_rate": 7.87616469561054e-06, + "loss": 2.0459, + "step": 17292 + }, + { + "epoch": 0.58, + "grad_norm": 0.7582885026931763, + "learning_rate": 7.875126063100066e-06, + "loss": 2.0867, + "step": 17293 + }, + { + "epoch": 0.58, + "grad_norm": 0.7595769166946411, + "learning_rate": 7.874087454595177e-06, + "loss": 2.0227, + "step": 17294 + }, + { + "epoch": 0.58, + "grad_norm": 0.7331162691116333, + "learning_rate": 7.873048870107597e-06, + "loss": 1.9979, + "step": 17295 + }, + { + "epoch": 0.58, + "grad_norm": 0.7590515613555908, + "learning_rate": 7.872010309649068e-06, + "loss": 2.0663, + "step": 17296 + }, + { + "epoch": 0.58, + "grad_norm": 0.7198009490966797, + "learning_rate": 7.870971773231316e-06, + "loss": 2.0643, + "step": 17297 + }, + { + "epoch": 0.58, + "grad_norm": 0.7331738471984863, + "learning_rate": 7.869933260866076e-06, + "loss": 2.0973, + "step": 17298 + }, + { + "epoch": 0.58, + "grad_norm": 0.7302350997924805, + "learning_rate": 7.868894772565089e-06, + "loss": 2.0316, + "step": 17299 + }, + { + "epoch": 0.58, + "grad_norm": 0.7922964096069336, + "learning_rate": 7.867856308340072e-06, + "loss": 2.0585, + "step": 17300 + }, + { + "epoch": 0.58, + "grad_norm": 0.7391806244850159, + "learning_rate": 7.866817868202768e-06, + "loss": 2.0641, + "step": 17301 + }, + { + "epoch": 0.58, + "grad_norm": 0.7720181345939636, + "learning_rate": 7.865779452164906e-06, + "loss": 2.0592, + "step": 17302 + }, + { + "epoch": 0.58, + "grad_norm": 0.743485689163208, + "learning_rate": 7.864741060238218e-06, + "loss": 2.0459, + "step": 17303 + }, + { + "epoch": 0.58, + "grad_norm": 0.7617506384849548, + "learning_rate": 7.863702692434431e-06, + "loss": 2.0222, + "step": 17304 + }, + { + "epoch": 0.58, + "grad_norm": 0.7448143362998962, + "learning_rate": 7.86266434876528e-06, + "loss": 2.0709, + "step": 17305 + }, + { + "epoch": 0.58, + "grad_norm": 0.7567383646965027, + "learning_rate": 7.861626029242493e-06, + "loss": 2.0234, + "step": 17306 + }, + { + "epoch": 0.58, + "grad_norm": 0.7609814405441284, + "learning_rate": 7.860587733877804e-06, + "loss": 2.1263, + "step": 17307 + }, + { + "epoch": 0.58, + "grad_norm": 0.7350336909294128, + "learning_rate": 7.859549462682944e-06, + "loss": 2.0198, + "step": 17308 + }, + { + "epoch": 0.58, + "grad_norm": 0.7451945543289185, + "learning_rate": 7.858511215669636e-06, + "loss": 2.0743, + "step": 17309 + }, + { + "epoch": 0.58, + "grad_norm": 0.759648859500885, + "learning_rate": 7.857472992849614e-06, + "loss": 2.0535, + "step": 17310 + }, + { + "epoch": 0.58, + "grad_norm": 0.7382601499557495, + "learning_rate": 7.856434794234607e-06, + "loss": 1.9995, + "step": 17311 + }, + { + "epoch": 0.58, + "grad_norm": 0.7237899899482727, + "learning_rate": 7.855396619836344e-06, + "loss": 2.1117, + "step": 17312 + }, + { + "epoch": 0.58, + "grad_norm": 0.7309780120849609, + "learning_rate": 7.854358469666553e-06, + "loss": 2.018, + "step": 17313 + }, + { + "epoch": 0.58, + "grad_norm": 0.7413632273674011, + "learning_rate": 7.853320343736959e-06, + "loss": 2.0949, + "step": 17314 + }, + { + "epoch": 0.58, + "grad_norm": 0.7469753623008728, + "learning_rate": 7.852282242059296e-06, + "loss": 2.023, + "step": 17315 + }, + { + "epoch": 0.58, + "grad_norm": 0.7385715246200562, + "learning_rate": 7.85124416464529e-06, + "loss": 2.0998, + "step": 17316 + }, + { + "epoch": 0.58, + "grad_norm": 0.7525288462638855, + "learning_rate": 7.85020611150667e-06, + "loss": 2.0471, + "step": 17317 + }, + { + "epoch": 0.58, + "grad_norm": 0.747439444065094, + "learning_rate": 7.849168082655159e-06, + "loss": 2.0904, + "step": 17318 + }, + { + "epoch": 0.58, + "grad_norm": 0.7694146037101746, + "learning_rate": 7.848130078102486e-06, + "loss": 2.0681, + "step": 17319 + }, + { + "epoch": 0.58, + "grad_norm": 0.7242448925971985, + "learning_rate": 7.847092097860377e-06, + "loss": 2.0602, + "step": 17320 + }, + { + "epoch": 0.58, + "grad_norm": 0.7295181155204773, + "learning_rate": 7.846054141940567e-06, + "loss": 2.0468, + "step": 17321 + }, + { + "epoch": 0.58, + "grad_norm": 0.7924940586090088, + "learning_rate": 7.845016210354767e-06, + "loss": 2.045, + "step": 17322 + }, + { + "epoch": 0.58, + "grad_norm": 0.7413346171379089, + "learning_rate": 7.843978303114714e-06, + "loss": 2.062, + "step": 17323 + }, + { + "epoch": 0.58, + "grad_norm": 0.7604600191116333, + "learning_rate": 7.84294042023213e-06, + "loss": 2.0935, + "step": 17324 + }, + { + "epoch": 0.58, + "grad_norm": 0.7378384470939636, + "learning_rate": 7.841902561718743e-06, + "loss": 2.0434, + "step": 17325 + }, + { + "epoch": 0.58, + "grad_norm": 0.7457320094108582, + "learning_rate": 7.840864727586275e-06, + "loss": 2.0784, + "step": 17326 + }, + { + "epoch": 0.58, + "grad_norm": 0.7351875901222229, + "learning_rate": 7.83982691784645e-06, + "loss": 2.0, + "step": 17327 + }, + { + "epoch": 0.58, + "grad_norm": 0.7610269784927368, + "learning_rate": 7.838789132510993e-06, + "loss": 2.0687, + "step": 17328 + }, + { + "epoch": 0.58, + "grad_norm": 0.7807416319847107, + "learning_rate": 7.837751371591634e-06, + "loss": 2.1091, + "step": 17329 + }, + { + "epoch": 0.58, + "grad_norm": 0.7713356614112854, + "learning_rate": 7.836713635100092e-06, + "loss": 2.0833, + "step": 17330 + }, + { + "epoch": 0.58, + "grad_norm": 0.728196382522583, + "learning_rate": 7.835675923048091e-06, + "loss": 1.9959, + "step": 17331 + }, + { + "epoch": 0.58, + "grad_norm": 0.7599889636039734, + "learning_rate": 7.834638235447355e-06, + "loss": 2.088, + "step": 17332 + }, + { + "epoch": 0.58, + "grad_norm": 0.7622956037521362, + "learning_rate": 7.833600572309607e-06, + "loss": 2.0768, + "step": 17333 + }, + { + "epoch": 0.58, + "grad_norm": 0.7294535636901855, + "learning_rate": 7.832562933646572e-06, + "loss": 2.0507, + "step": 17334 + }, + { + "epoch": 0.58, + "grad_norm": 0.755134642124176, + "learning_rate": 7.83152531946997e-06, + "loss": 2.098, + "step": 17335 + }, + { + "epoch": 0.58, + "grad_norm": 0.7552882432937622, + "learning_rate": 7.83048772979152e-06, + "loss": 2.062, + "step": 17336 + }, + { + "epoch": 0.58, + "grad_norm": 0.7497100830078125, + "learning_rate": 7.829450164622952e-06, + "loss": 2.1044, + "step": 17337 + }, + { + "epoch": 0.58, + "grad_norm": 0.7370823621749878, + "learning_rate": 7.828412623975983e-06, + "loss": 2.0814, + "step": 17338 + }, + { + "epoch": 0.58, + "grad_norm": 0.7301714420318604, + "learning_rate": 7.827375107862337e-06, + "loss": 2.1188, + "step": 17339 + }, + { + "epoch": 0.58, + "grad_norm": 0.7348575592041016, + "learning_rate": 7.826337616293732e-06, + "loss": 2.0916, + "step": 17340 + }, + { + "epoch": 0.58, + "grad_norm": 0.7609323859214783, + "learning_rate": 7.82530014928189e-06, + "loss": 2.0968, + "step": 17341 + }, + { + "epoch": 0.58, + "grad_norm": 0.7490087747573853, + "learning_rate": 7.824262706838532e-06, + "loss": 2.147, + "step": 17342 + }, + { + "epoch": 0.58, + "grad_norm": 0.749661386013031, + "learning_rate": 7.823225288975385e-06, + "loss": 2.1138, + "step": 17343 + }, + { + "epoch": 0.58, + "grad_norm": 0.7424139380455017, + "learning_rate": 7.822187895704157e-06, + "loss": 2.0773, + "step": 17344 + }, + { + "epoch": 0.58, + "grad_norm": 0.7401151061058044, + "learning_rate": 7.821150527036574e-06, + "loss": 2.0475, + "step": 17345 + }, + { + "epoch": 0.58, + "grad_norm": 0.740336000919342, + "learning_rate": 7.820113182984357e-06, + "loss": 2.1321, + "step": 17346 + }, + { + "epoch": 0.58, + "grad_norm": 0.7437052130699158, + "learning_rate": 7.819075863559222e-06, + "loss": 2.0196, + "step": 17347 + }, + { + "epoch": 0.58, + "grad_norm": 0.7514331936836243, + "learning_rate": 7.818038568772894e-06, + "loss": 2.1593, + "step": 17348 + }, + { + "epoch": 0.58, + "grad_norm": 0.7630886435508728, + "learning_rate": 7.817001298637084e-06, + "loss": 2.0993, + "step": 17349 + }, + { + "epoch": 0.58, + "grad_norm": 0.7463335394859314, + "learning_rate": 7.815964053163512e-06, + "loss": 2.0569, + "step": 17350 + }, + { + "epoch": 0.58, + "grad_norm": 0.7699261903762817, + "learning_rate": 7.814926832363902e-06, + "loss": 2.0599, + "step": 17351 + }, + { + "epoch": 0.58, + "grad_norm": 0.7129340171813965, + "learning_rate": 7.813889636249969e-06, + "loss": 2.0283, + "step": 17352 + }, + { + "epoch": 0.58, + "grad_norm": 0.7399489879608154, + "learning_rate": 7.812852464833428e-06, + "loss": 2.079, + "step": 17353 + }, + { + "epoch": 0.58, + "grad_norm": 0.7603597640991211, + "learning_rate": 7.811815318125996e-06, + "loss": 2.1028, + "step": 17354 + }, + { + "epoch": 0.58, + "grad_norm": 0.7158148884773254, + "learning_rate": 7.810778196139393e-06, + "loss": 2.1289, + "step": 17355 + }, + { + "epoch": 0.58, + "grad_norm": 0.7565138339996338, + "learning_rate": 7.809741098885338e-06, + "loss": 2.1485, + "step": 17356 + }, + { + "epoch": 0.58, + "grad_norm": 0.7400334477424622, + "learning_rate": 7.808704026375542e-06, + "loss": 2.0758, + "step": 17357 + }, + { + "epoch": 0.58, + "grad_norm": 0.734768807888031, + "learning_rate": 7.807666978621721e-06, + "loss": 2.0819, + "step": 17358 + }, + { + "epoch": 0.58, + "grad_norm": 0.7334807515144348, + "learning_rate": 7.806629955635597e-06, + "loss": 2.0198, + "step": 17359 + }, + { + "epoch": 0.58, + "grad_norm": 0.7478862404823303, + "learning_rate": 7.80559295742888e-06, + "loss": 2.1156, + "step": 17360 + }, + { + "epoch": 0.58, + "grad_norm": 0.7472066283226013, + "learning_rate": 7.804555984013293e-06, + "loss": 2.118, + "step": 17361 + }, + { + "epoch": 0.58, + "grad_norm": 0.7464023232460022, + "learning_rate": 7.803519035400542e-06, + "loss": 2.0776, + "step": 17362 + }, + { + "epoch": 0.58, + "grad_norm": 0.7464212775230408, + "learning_rate": 7.802482111602345e-06, + "loss": 2.0976, + "step": 17363 + }, + { + "epoch": 0.58, + "grad_norm": 0.7521456480026245, + "learning_rate": 7.801445212630416e-06, + "loss": 2.1053, + "step": 17364 + }, + { + "epoch": 0.58, + "grad_norm": 0.7686184048652649, + "learning_rate": 7.800408338496478e-06, + "loss": 2.108, + "step": 17365 + }, + { + "epoch": 0.58, + "grad_norm": 0.7243461608886719, + "learning_rate": 7.799371489212228e-06, + "loss": 2.0259, + "step": 17366 + }, + { + "epoch": 0.58, + "grad_norm": 0.7253236174583435, + "learning_rate": 7.798334664789395e-06, + "loss": 2.0684, + "step": 17367 + }, + { + "epoch": 0.58, + "grad_norm": 0.7606551051139832, + "learning_rate": 7.797297865239684e-06, + "loss": 2.0791, + "step": 17368 + }, + { + "epoch": 0.58, + "grad_norm": 0.742860734462738, + "learning_rate": 7.796261090574809e-06, + "loss": 2.0669, + "step": 17369 + }, + { + "epoch": 0.58, + "grad_norm": 0.7606264352798462, + "learning_rate": 7.795224340806492e-06, + "loss": 2.1076, + "step": 17370 + }, + { + "epoch": 0.58, + "grad_norm": 0.7465896010398865, + "learning_rate": 7.794187615946433e-06, + "loss": 1.9987, + "step": 17371 + }, + { + "epoch": 0.58, + "grad_norm": 0.7205922603607178, + "learning_rate": 7.793150916006349e-06, + "loss": 2.0126, + "step": 17372 + }, + { + "epoch": 0.58, + "grad_norm": 0.7139093279838562, + "learning_rate": 7.792114240997954e-06, + "loss": 2.0355, + "step": 17373 + }, + { + "epoch": 0.58, + "grad_norm": 0.7206209301948547, + "learning_rate": 7.79107759093296e-06, + "loss": 2.0557, + "step": 17374 + }, + { + "epoch": 0.58, + "grad_norm": 0.7571203112602234, + "learning_rate": 7.790040965823077e-06, + "loss": 2.0908, + "step": 17375 + }, + { + "epoch": 0.58, + "grad_norm": 0.7445367574691772, + "learning_rate": 7.789004365680012e-06, + "loss": 2.0862, + "step": 17376 + }, + { + "epoch": 0.58, + "grad_norm": 0.7295871376991272, + "learning_rate": 7.78796779051548e-06, + "loss": 2.0564, + "step": 17377 + }, + { + "epoch": 0.58, + "grad_norm": 0.717801570892334, + "learning_rate": 7.786931240341194e-06, + "loss": 2.0298, + "step": 17378 + }, + { + "epoch": 0.58, + "grad_norm": 0.7377458214759827, + "learning_rate": 7.785894715168865e-06, + "loss": 2.0364, + "step": 17379 + }, + { + "epoch": 0.58, + "grad_norm": 0.7790558934211731, + "learning_rate": 7.784858215010194e-06, + "loss": 2.1451, + "step": 17380 + }, + { + "epoch": 0.58, + "grad_norm": 0.7316217422485352, + "learning_rate": 7.783821739876899e-06, + "loss": 2.0655, + "step": 17381 + }, + { + "epoch": 0.58, + "grad_norm": 0.7462796568870544, + "learning_rate": 7.782785289780688e-06, + "loss": 2.0084, + "step": 17382 + }, + { + "epoch": 0.58, + "grad_norm": 0.7543364763259888, + "learning_rate": 7.78174886473327e-06, + "loss": 2.0886, + "step": 17383 + }, + { + "epoch": 0.58, + "grad_norm": 0.7059748768806458, + "learning_rate": 7.780712464746352e-06, + "loss": 2.0643, + "step": 17384 + }, + { + "epoch": 0.58, + "grad_norm": 0.7539125084877014, + "learning_rate": 7.779676089831641e-06, + "loss": 2.0097, + "step": 17385 + }, + { + "epoch": 0.58, + "grad_norm": 0.7451120018959045, + "learning_rate": 7.778639740000851e-06, + "loss": 2.0861, + "step": 17386 + }, + { + "epoch": 0.58, + "grad_norm": 0.7427269220352173, + "learning_rate": 7.777603415265691e-06, + "loss": 2.0159, + "step": 17387 + }, + { + "epoch": 0.58, + "grad_norm": 0.739164412021637, + "learning_rate": 7.77656711563786e-06, + "loss": 2.0692, + "step": 17388 + }, + { + "epoch": 0.58, + "grad_norm": 0.7497209310531616, + "learning_rate": 7.775530841129072e-06, + "loss": 2.0907, + "step": 17389 + }, + { + "epoch": 0.58, + "grad_norm": 0.7496910095214844, + "learning_rate": 7.774494591751034e-06, + "loss": 1.9738, + "step": 17390 + }, + { + "epoch": 0.58, + "grad_norm": 0.7634798884391785, + "learning_rate": 7.773458367515449e-06, + "loss": 2.0627, + "step": 17391 + }, + { + "epoch": 0.58, + "grad_norm": 0.7251195907592773, + "learning_rate": 7.772422168434034e-06, + "loss": 1.9762, + "step": 17392 + }, + { + "epoch": 0.58, + "grad_norm": 0.7290628552436829, + "learning_rate": 7.771385994518479e-06, + "loss": 2.0415, + "step": 17393 + }, + { + "epoch": 0.58, + "grad_norm": 0.7531698346138, + "learning_rate": 7.770349845780502e-06, + "loss": 2.1319, + "step": 17394 + }, + { + "epoch": 0.58, + "grad_norm": 0.7175230979919434, + "learning_rate": 7.769313722231807e-06, + "loss": 2.0439, + "step": 17395 + }, + { + "epoch": 0.58, + "grad_norm": 0.7451719641685486, + "learning_rate": 7.768277623884098e-06, + "loss": 2.056, + "step": 17396 + }, + { + "epoch": 0.58, + "grad_norm": 0.7396416068077087, + "learning_rate": 7.767241550749079e-06, + "loss": 2.0634, + "step": 17397 + }, + { + "epoch": 0.58, + "grad_norm": 0.7493788599967957, + "learning_rate": 7.766205502838457e-06, + "loss": 2.0494, + "step": 17398 + }, + { + "epoch": 0.58, + "grad_norm": 0.7432458996772766, + "learning_rate": 7.765169480163935e-06, + "loss": 2.0414, + "step": 17399 + }, + { + "epoch": 0.58, + "grad_norm": 0.7387503385543823, + "learning_rate": 7.76413348273722e-06, + "loss": 2.0699, + "step": 17400 + }, + { + "epoch": 0.58, + "grad_norm": 0.7569003701210022, + "learning_rate": 7.763097510570016e-06, + "loss": 2.0448, + "step": 17401 + }, + { + "epoch": 0.58, + "grad_norm": 0.7413594722747803, + "learning_rate": 7.762061563674024e-06, + "loss": 2.0878, + "step": 17402 + }, + { + "epoch": 0.58, + "grad_norm": 0.7320802211761475, + "learning_rate": 7.76102564206095e-06, + "loss": 2.0157, + "step": 17403 + }, + { + "epoch": 0.58, + "grad_norm": 0.7352131605148315, + "learning_rate": 7.759989745742493e-06, + "loss": 2.0186, + "step": 17404 + }, + { + "epoch": 0.58, + "grad_norm": 0.7776873707771301, + "learning_rate": 7.758953874730363e-06, + "loss": 2.0641, + "step": 17405 + }, + { + "epoch": 0.58, + "grad_norm": 0.7237109541893005, + "learning_rate": 7.757918029036257e-06, + "loss": 2.039, + "step": 17406 + }, + { + "epoch": 0.58, + "grad_norm": 0.7463359236717224, + "learning_rate": 7.756882208671875e-06, + "loss": 2.0016, + "step": 17407 + }, + { + "epoch": 0.58, + "grad_norm": 0.7474258542060852, + "learning_rate": 7.755846413648928e-06, + "loss": 2.0277, + "step": 17408 + }, + { + "epoch": 0.58, + "grad_norm": 0.7493696212768555, + "learning_rate": 7.754810643979114e-06, + "loss": 2.0492, + "step": 17409 + }, + { + "epoch": 0.58, + "grad_norm": 0.7247046232223511, + "learning_rate": 7.753774899674131e-06, + "loss": 1.9979, + "step": 17410 + }, + { + "epoch": 0.58, + "grad_norm": 0.7688528299331665, + "learning_rate": 7.752739180745683e-06, + "loss": 2.0897, + "step": 17411 + }, + { + "epoch": 0.58, + "grad_norm": 0.7707491517066956, + "learning_rate": 7.751703487205471e-06, + "loss": 2.1069, + "step": 17412 + }, + { + "epoch": 0.58, + "grad_norm": 0.7358784675598145, + "learning_rate": 7.750667819065193e-06, + "loss": 2.083, + "step": 17413 + }, + { + "epoch": 0.58, + "grad_norm": 0.7353017926216125, + "learning_rate": 7.749632176336558e-06, + "loss": 2.1439, + "step": 17414 + }, + { + "epoch": 0.58, + "grad_norm": 0.7381200790405273, + "learning_rate": 7.748596559031254e-06, + "loss": 2.0488, + "step": 17415 + }, + { + "epoch": 0.58, + "grad_norm": 0.7448513507843018, + "learning_rate": 7.747560967160988e-06, + "loss": 2.0676, + "step": 17416 + }, + { + "epoch": 0.58, + "grad_norm": 0.7788548469543457, + "learning_rate": 7.746525400737458e-06, + "loss": 2.0327, + "step": 17417 + }, + { + "epoch": 0.58, + "grad_norm": 0.7512484788894653, + "learning_rate": 7.745489859772367e-06, + "loss": 2.0892, + "step": 17418 + }, + { + "epoch": 0.58, + "grad_norm": 0.733829140663147, + "learning_rate": 7.744454344277406e-06, + "loss": 2.0248, + "step": 17419 + }, + { + "epoch": 0.58, + "grad_norm": 0.7473303079605103, + "learning_rate": 7.74341885426428e-06, + "loss": 2.026, + "step": 17420 + }, + { + "epoch": 0.58, + "grad_norm": 0.7779171466827393, + "learning_rate": 7.742383389744681e-06, + "loss": 2.1039, + "step": 17421 + }, + { + "epoch": 0.58, + "grad_norm": 0.7964597344398499, + "learning_rate": 7.741347950730316e-06, + "loss": 2.0772, + "step": 17422 + }, + { + "epoch": 0.58, + "grad_norm": 0.7253778576850891, + "learning_rate": 7.740312537232878e-06, + "loss": 1.9879, + "step": 17423 + }, + { + "epoch": 0.58, + "grad_norm": 0.7164488434791565, + "learning_rate": 7.739277149264066e-06, + "loss": 2.08, + "step": 17424 + }, + { + "epoch": 0.58, + "grad_norm": 0.7435901761054993, + "learning_rate": 7.738241786835571e-06, + "loss": 2.066, + "step": 17425 + }, + { + "epoch": 0.58, + "grad_norm": 0.7549481987953186, + "learning_rate": 7.737206449959098e-06, + "loss": 2.1115, + "step": 17426 + }, + { + "epoch": 0.58, + "grad_norm": 0.7708032131195068, + "learning_rate": 7.736171138646342e-06, + "loss": 2.1369, + "step": 17427 + }, + { + "epoch": 0.58, + "grad_norm": 0.7282251119613647, + "learning_rate": 7.735135852908997e-06, + "loss": 2.0476, + "step": 17428 + }, + { + "epoch": 0.58, + "grad_norm": 0.7787927985191345, + "learning_rate": 7.734100592758755e-06, + "loss": 2.0771, + "step": 17429 + }, + { + "epoch": 0.58, + "grad_norm": 0.749177873134613, + "learning_rate": 7.73306535820732e-06, + "loss": 2.0528, + "step": 17430 + }, + { + "epoch": 0.58, + "grad_norm": 0.7604622840881348, + "learning_rate": 7.732030149266382e-06, + "loss": 2.0395, + "step": 17431 + }, + { + "epoch": 0.58, + "grad_norm": 0.7528326511383057, + "learning_rate": 7.730994965947643e-06, + "loss": 2.063, + "step": 17432 + }, + { + "epoch": 0.58, + "grad_norm": 0.7310172319412231, + "learning_rate": 7.72995980826279e-06, + "loss": 2.0344, + "step": 17433 + }, + { + "epoch": 0.58, + "grad_norm": 0.7548729181289673, + "learning_rate": 7.728924676223521e-06, + "loss": 2.0424, + "step": 17434 + }, + { + "epoch": 0.58, + "grad_norm": 0.7577201724052429, + "learning_rate": 7.727889569841528e-06, + "loss": 2.0111, + "step": 17435 + }, + { + "epoch": 0.58, + "grad_norm": 0.7365975379943848, + "learning_rate": 7.726854489128513e-06, + "loss": 2.0101, + "step": 17436 + }, + { + "epoch": 0.58, + "grad_norm": 0.7256361842155457, + "learning_rate": 7.725819434096157e-06, + "loss": 2.0285, + "step": 17437 + }, + { + "epoch": 0.58, + "grad_norm": 0.7774661183357239, + "learning_rate": 7.724784404756163e-06, + "loss": 2.0706, + "step": 17438 + }, + { + "epoch": 0.58, + "grad_norm": 0.740241289138794, + "learning_rate": 7.723749401120222e-06, + "loss": 2.0661, + "step": 17439 + }, + { + "epoch": 0.58, + "grad_norm": 0.7365866899490356, + "learning_rate": 7.722714423200027e-06, + "loss": 2.06, + "step": 17440 + }, + { + "epoch": 0.58, + "grad_norm": 0.7518870830535889, + "learning_rate": 7.721679471007268e-06, + "loss": 2.0966, + "step": 17441 + }, + { + "epoch": 0.58, + "grad_norm": 0.7267422080039978, + "learning_rate": 7.720644544553639e-06, + "loss": 2.0597, + "step": 17442 + }, + { + "epoch": 0.58, + "grad_norm": 0.73319411277771, + "learning_rate": 7.719609643850832e-06, + "loss": 2.08, + "step": 17443 + }, + { + "epoch": 0.58, + "grad_norm": 0.722403883934021, + "learning_rate": 7.718574768910538e-06, + "loss": 2.0176, + "step": 17444 + }, + { + "epoch": 0.58, + "grad_norm": 0.7851993441581726, + "learning_rate": 7.717539919744453e-06, + "loss": 2.1465, + "step": 17445 + }, + { + "epoch": 0.58, + "grad_norm": 0.7367965579032898, + "learning_rate": 7.716505096364262e-06, + "loss": 2.0593, + "step": 17446 + }, + { + "epoch": 0.58, + "grad_norm": 0.7510982155799866, + "learning_rate": 7.715470298781659e-06, + "loss": 2.0726, + "step": 17447 + }, + { + "epoch": 0.58, + "grad_norm": 0.7541019916534424, + "learning_rate": 7.714435527008332e-06, + "loss": 2.0664, + "step": 17448 + }, + { + "epoch": 0.58, + "grad_norm": 0.7327848076820374, + "learning_rate": 7.713400781055977e-06, + "loss": 2.0555, + "step": 17449 + }, + { + "epoch": 0.58, + "grad_norm": 0.7458142638206482, + "learning_rate": 7.712366060936277e-06, + "loss": 2.1116, + "step": 17450 + }, + { + "epoch": 0.58, + "grad_norm": 0.7374458909034729, + "learning_rate": 7.711331366660922e-06, + "loss": 2.0663, + "step": 17451 + }, + { + "epoch": 0.58, + "grad_norm": 0.7517438530921936, + "learning_rate": 7.710296698241605e-06, + "loss": 2.0364, + "step": 17452 + }, + { + "epoch": 0.58, + "grad_norm": 0.7169767022132874, + "learning_rate": 7.709262055690014e-06, + "loss": 2.0435, + "step": 17453 + }, + { + "epoch": 0.58, + "grad_norm": 0.7351388931274414, + "learning_rate": 7.708227439017841e-06, + "loss": 2.0745, + "step": 17454 + }, + { + "epoch": 0.58, + "grad_norm": 0.7589265704154968, + "learning_rate": 7.70719284823677e-06, + "loss": 2.0851, + "step": 17455 + }, + { + "epoch": 0.58, + "grad_norm": 0.7409583926200867, + "learning_rate": 7.706158283358488e-06, + "loss": 2.0009, + "step": 17456 + }, + { + "epoch": 0.58, + "grad_norm": 0.746087908744812, + "learning_rate": 7.705123744394687e-06, + "loss": 1.9847, + "step": 17457 + }, + { + "epoch": 0.58, + "grad_norm": 0.743193507194519, + "learning_rate": 7.704089231357057e-06, + "loss": 2.0815, + "step": 17458 + }, + { + "epoch": 0.58, + "grad_norm": 0.7362068295478821, + "learning_rate": 7.703054744257275e-06, + "loss": 2.0004, + "step": 17459 + }, + { + "epoch": 0.58, + "grad_norm": 0.734274685382843, + "learning_rate": 7.702020283107037e-06, + "loss": 2.067, + "step": 17460 + }, + { + "epoch": 0.58, + "grad_norm": 0.733478307723999, + "learning_rate": 7.700985847918026e-06, + "loss": 2.0388, + "step": 17461 + }, + { + "epoch": 0.58, + "grad_norm": 0.782490074634552, + "learning_rate": 7.69995143870193e-06, + "loss": 2.0244, + "step": 17462 + }, + { + "epoch": 0.58, + "grad_norm": 0.7735369205474854, + "learning_rate": 7.698917055470438e-06, + "loss": 2.1589, + "step": 17463 + }, + { + "epoch": 0.58, + "grad_norm": 0.7039288878440857, + "learning_rate": 7.697882698235229e-06, + "loss": 2.06, + "step": 17464 + }, + { + "epoch": 0.58, + "grad_norm": 0.7719813585281372, + "learning_rate": 7.696848367007992e-06, + "loss": 2.0303, + "step": 17465 + }, + { + "epoch": 0.58, + "grad_norm": 0.7700414061546326, + "learning_rate": 7.695814061800413e-06, + "loss": 2.0361, + "step": 17466 + }, + { + "epoch": 0.58, + "grad_norm": 0.7483057379722595, + "learning_rate": 7.694779782624178e-06, + "loss": 2.0922, + "step": 17467 + }, + { + "epoch": 0.58, + "grad_norm": 0.7486280202865601, + "learning_rate": 7.693745529490968e-06, + "loss": 2.0695, + "step": 17468 + }, + { + "epoch": 0.58, + "grad_norm": 0.7262049913406372, + "learning_rate": 7.69271130241247e-06, + "loss": 2.0236, + "step": 17469 + }, + { + "epoch": 0.58, + "grad_norm": 0.7191380262374878, + "learning_rate": 7.691677101400366e-06, + "loss": 2.0721, + "step": 17470 + }, + { + "epoch": 0.58, + "grad_norm": 0.7430191040039062, + "learning_rate": 7.690642926466346e-06, + "loss": 2.0189, + "step": 17471 + }, + { + "epoch": 0.58, + "grad_norm": 0.7590774893760681, + "learning_rate": 7.689608777622086e-06, + "loss": 1.9794, + "step": 17472 + }, + { + "epoch": 0.58, + "grad_norm": 0.7510725259780884, + "learning_rate": 7.68857465487927e-06, + "loss": 2.046, + "step": 17473 + }, + { + "epoch": 0.58, + "grad_norm": 0.7602266669273376, + "learning_rate": 7.687540558249583e-06, + "loss": 2.0653, + "step": 17474 + }, + { + "epoch": 0.58, + "grad_norm": 0.7670218348503113, + "learning_rate": 7.68650648774471e-06, + "loss": 2.0999, + "step": 17475 + }, + { + "epoch": 0.58, + "grad_norm": 0.7479475140571594, + "learning_rate": 7.685472443376331e-06, + "loss": 2.0777, + "step": 17476 + }, + { + "epoch": 0.58, + "grad_norm": 0.7386905550956726, + "learning_rate": 7.684438425156127e-06, + "loss": 2.1075, + "step": 17477 + }, + { + "epoch": 0.58, + "grad_norm": 0.7453323006629944, + "learning_rate": 7.683404433095779e-06, + "loss": 2.0302, + "step": 17478 + }, + { + "epoch": 0.58, + "grad_norm": 0.7332302331924438, + "learning_rate": 7.68237046720697e-06, + "loss": 2.0645, + "step": 17479 + }, + { + "epoch": 0.58, + "grad_norm": 0.7530774474143982, + "learning_rate": 7.681336527501388e-06, + "loss": 2.102, + "step": 17480 + }, + { + "epoch": 0.58, + "grad_norm": 0.745632529258728, + "learning_rate": 7.680302613990699e-06, + "loss": 2.1098, + "step": 17481 + }, + { + "epoch": 0.58, + "grad_norm": 0.7491067051887512, + "learning_rate": 7.679268726686594e-06, + "loss": 1.9696, + "step": 17482 + }, + { + "epoch": 0.58, + "grad_norm": 0.773668110370636, + "learning_rate": 7.678234865600752e-06, + "loss": 2.0798, + "step": 17483 + }, + { + "epoch": 0.58, + "grad_norm": 0.7314655780792236, + "learning_rate": 7.677201030744847e-06, + "loss": 2.134, + "step": 17484 + }, + { + "epoch": 0.58, + "grad_norm": 0.7247464060783386, + "learning_rate": 7.676167222130573e-06, + "loss": 2.0378, + "step": 17485 + }, + { + "epoch": 0.58, + "grad_norm": 0.7148680686950684, + "learning_rate": 7.675133439769592e-06, + "loss": 1.9948, + "step": 17486 + }, + { + "epoch": 0.58, + "grad_norm": 0.7336472272872925, + "learning_rate": 7.674099683673593e-06, + "loss": 2.0247, + "step": 17487 + }, + { + "epoch": 0.58, + "grad_norm": 0.7533717155456543, + "learning_rate": 7.673065953854251e-06, + "loss": 2.0722, + "step": 17488 + }, + { + "epoch": 0.58, + "grad_norm": 0.730694591999054, + "learning_rate": 7.67203225032325e-06, + "loss": 2.0602, + "step": 17489 + }, + { + "epoch": 0.58, + "grad_norm": 0.7377015352249146, + "learning_rate": 7.670998573092263e-06, + "loss": 1.9994, + "step": 17490 + }, + { + "epoch": 0.58, + "grad_norm": 0.7786106467247009, + "learning_rate": 7.669964922172968e-06, + "loss": 2.1018, + "step": 17491 + }, + { + "epoch": 0.58, + "grad_norm": 0.7203280329704285, + "learning_rate": 7.668931297577042e-06, + "loss": 2.031, + "step": 17492 + }, + { + "epoch": 0.58, + "grad_norm": 0.712897002696991, + "learning_rate": 7.667897699316166e-06, + "loss": 2.0726, + "step": 17493 + }, + { + "epoch": 0.58, + "grad_norm": 0.7282452583312988, + "learning_rate": 7.666864127402016e-06, + "loss": 2.025, + "step": 17494 + }, + { + "epoch": 0.58, + "grad_norm": 0.729796290397644, + "learning_rate": 7.665830581846268e-06, + "loss": 1.9463, + "step": 17495 + }, + { + "epoch": 0.58, + "grad_norm": 0.7368864417076111, + "learning_rate": 7.664797062660597e-06, + "loss": 2.064, + "step": 17496 + }, + { + "epoch": 0.58, + "grad_norm": 0.7650624513626099, + "learning_rate": 7.66376356985668e-06, + "loss": 2.0402, + "step": 17497 + }, + { + "epoch": 0.58, + "grad_norm": 0.7281854748725891, + "learning_rate": 7.662730103446197e-06, + "loss": 1.9779, + "step": 17498 + }, + { + "epoch": 0.58, + "grad_norm": 0.7584214806556702, + "learning_rate": 7.661696663440815e-06, + "loss": 2.0546, + "step": 17499 + }, + { + "epoch": 0.58, + "grad_norm": 0.7363767623901367, + "learning_rate": 7.660663249852212e-06, + "loss": 2.0355, + "step": 17500 + }, + { + "epoch": 0.58, + "grad_norm": 0.7221778035163879, + "learning_rate": 7.659629862692067e-06, + "loss": 2.0415, + "step": 17501 + }, + { + "epoch": 0.58, + "grad_norm": 0.7445442080497742, + "learning_rate": 7.658596501972056e-06, + "loss": 2.0745, + "step": 17502 + }, + { + "epoch": 0.58, + "grad_norm": 0.7380548715591431, + "learning_rate": 7.657563167703845e-06, + "loss": 2.0499, + "step": 17503 + }, + { + "epoch": 0.58, + "grad_norm": 0.7469645738601685, + "learning_rate": 7.656529859899113e-06, + "loss": 2.0701, + "step": 17504 + }, + { + "epoch": 0.58, + "grad_norm": 0.7615806460380554, + "learning_rate": 7.655496578569533e-06, + "loss": 2.1273, + "step": 17505 + }, + { + "epoch": 0.58, + "grad_norm": 0.7354166507720947, + "learning_rate": 7.654463323726778e-06, + "loss": 2.0827, + "step": 17506 + }, + { + "epoch": 0.58, + "grad_norm": 0.7513006925582886, + "learning_rate": 7.653430095382528e-06, + "loss": 2.0889, + "step": 17507 + }, + { + "epoch": 0.58, + "grad_norm": 0.7492263913154602, + "learning_rate": 7.652396893548441e-06, + "loss": 2.0597, + "step": 17508 + }, + { + "epoch": 0.58, + "grad_norm": 0.7354400753974915, + "learning_rate": 7.651363718236202e-06, + "loss": 2.0764, + "step": 17509 + }, + { + "epoch": 0.58, + "grad_norm": 0.7112005949020386, + "learning_rate": 7.65033056945748e-06, + "loss": 2.0635, + "step": 17510 + }, + { + "epoch": 0.58, + "grad_norm": 0.7373778223991394, + "learning_rate": 7.649297447223947e-06, + "loss": 2.1214, + "step": 17511 + }, + { + "epoch": 0.58, + "grad_norm": 0.7370812296867371, + "learning_rate": 7.648264351547271e-06, + "loss": 2.074, + "step": 17512 + }, + { + "epoch": 0.58, + "grad_norm": 0.746357798576355, + "learning_rate": 7.647231282439126e-06, + "loss": 2.0507, + "step": 17513 + }, + { + "epoch": 0.58, + "grad_norm": 0.7210960388183594, + "learning_rate": 7.646198239911184e-06, + "loss": 2.0497, + "step": 17514 + }, + { + "epoch": 0.58, + "grad_norm": 0.7284794449806213, + "learning_rate": 7.645165223975113e-06, + "loss": 1.9811, + "step": 17515 + }, + { + "epoch": 0.58, + "grad_norm": 0.7269585728645325, + "learning_rate": 7.64413223464259e-06, + "loss": 2.0341, + "step": 17516 + }, + { + "epoch": 0.58, + "grad_norm": 0.7469526529312134, + "learning_rate": 7.643099271925278e-06, + "loss": 2.0446, + "step": 17517 + }, + { + "epoch": 0.58, + "grad_norm": 0.73759526014328, + "learning_rate": 7.64206633583485e-06, + "loss": 2.1581, + "step": 17518 + }, + { + "epoch": 0.58, + "grad_norm": 0.7635288834571838, + "learning_rate": 7.641033426382973e-06, + "loss": 2.0017, + "step": 17519 + }, + { + "epoch": 0.58, + "grad_norm": 0.729167640209198, + "learning_rate": 7.64000054358132e-06, + "loss": 2.0578, + "step": 17520 + }, + { + "epoch": 0.58, + "grad_norm": 0.7536032795906067, + "learning_rate": 7.638967687441556e-06, + "loss": 2.0881, + "step": 17521 + }, + { + "epoch": 0.58, + "grad_norm": 0.7823000550270081, + "learning_rate": 7.637934857975349e-06, + "loss": 2.0958, + "step": 17522 + }, + { + "epoch": 0.58, + "grad_norm": 0.7444952726364136, + "learning_rate": 7.636902055194371e-06, + "loss": 1.9673, + "step": 17523 + }, + { + "epoch": 0.58, + "grad_norm": 0.7347999215126038, + "learning_rate": 7.635869279110291e-06, + "loss": 1.9993, + "step": 17524 + }, + { + "epoch": 0.58, + "grad_norm": 0.7580734491348267, + "learning_rate": 7.634836529734775e-06, + "loss": 2.0522, + "step": 17525 + }, + { + "epoch": 0.58, + "grad_norm": 0.7444814443588257, + "learning_rate": 7.633803807079487e-06, + "loss": 2.0208, + "step": 17526 + }, + { + "epoch": 0.58, + "grad_norm": 0.7627196311950684, + "learning_rate": 7.632771111156098e-06, + "loss": 2.0861, + "step": 17527 + }, + { + "epoch": 0.58, + "grad_norm": 0.7694302201271057, + "learning_rate": 7.63173844197627e-06, + "loss": 2.0658, + "step": 17528 + }, + { + "epoch": 0.58, + "grad_norm": 0.7179054021835327, + "learning_rate": 7.63070579955168e-06, + "loss": 2.0339, + "step": 17529 + }, + { + "epoch": 0.58, + "grad_norm": 0.7908560633659363, + "learning_rate": 7.629673183893984e-06, + "loss": 2.1161, + "step": 17530 + }, + { + "epoch": 0.58, + "grad_norm": 0.748127281665802, + "learning_rate": 7.62864059501485e-06, + "loss": 2.0924, + "step": 17531 + }, + { + "epoch": 0.58, + "grad_norm": 0.7328124642372131, + "learning_rate": 7.627608032925946e-06, + "loss": 2.0092, + "step": 17532 + }, + { + "epoch": 0.58, + "grad_norm": 0.7364363074302673, + "learning_rate": 7.626575497638938e-06, + "loss": 2.1092, + "step": 17533 + }, + { + "epoch": 0.58, + "grad_norm": 0.7096620798110962, + "learning_rate": 7.625542989165487e-06, + "loss": 2.0657, + "step": 17534 + }, + { + "epoch": 0.58, + "grad_norm": 0.7504627108573914, + "learning_rate": 7.62451050751726e-06, + "loss": 1.9509, + "step": 17535 + }, + { + "epoch": 0.58, + "grad_norm": 0.751465380191803, + "learning_rate": 7.6234780527059185e-06, + "loss": 1.9769, + "step": 17536 + }, + { + "epoch": 0.58, + "grad_norm": 0.7992161512374878, + "learning_rate": 7.622445624743131e-06, + "loss": 2.1251, + "step": 17537 + }, + { + "epoch": 0.58, + "grad_norm": 0.7546550631523132, + "learning_rate": 7.6214132236405625e-06, + "loss": 2.0552, + "step": 17538 + }, + { + "epoch": 0.58, + "grad_norm": 0.717498779296875, + "learning_rate": 7.620380849409871e-06, + "loss": 2.0507, + "step": 17539 + }, + { + "epoch": 0.58, + "grad_norm": 0.7461324334144592, + "learning_rate": 7.619348502062721e-06, + "loss": 2.0598, + "step": 17540 + }, + { + "epoch": 0.58, + "grad_norm": 0.7266407608985901, + "learning_rate": 7.618316181610777e-06, + "loss": 2.0841, + "step": 17541 + }, + { + "epoch": 0.58, + "grad_norm": 0.7460744380950928, + "learning_rate": 7.617283888065704e-06, + "loss": 2.0706, + "step": 17542 + }, + { + "epoch": 0.58, + "grad_norm": 0.7377756237983704, + "learning_rate": 7.6162516214391595e-06, + "loss": 2.0417, + "step": 17543 + }, + { + "epoch": 0.58, + "grad_norm": 0.7448939085006714, + "learning_rate": 7.615219381742803e-06, + "loss": 2.0431, + "step": 17544 + }, + { + "epoch": 0.58, + "grad_norm": 0.713191032409668, + "learning_rate": 7.614187168988304e-06, + "loss": 2.0027, + "step": 17545 + }, + { + "epoch": 0.58, + "grad_norm": 0.7505508065223694, + "learning_rate": 7.61315498318732e-06, + "loss": 2.0809, + "step": 17546 + }, + { + "epoch": 0.58, + "grad_norm": 0.7530224323272705, + "learning_rate": 7.612122824351513e-06, + "loss": 2.0783, + "step": 17547 + }, + { + "epoch": 0.58, + "grad_norm": 0.7626066207885742, + "learning_rate": 7.6110906924925424e-06, + "loss": 2.0813, + "step": 17548 + }, + { + "epoch": 0.58, + "grad_norm": 0.7268019914627075, + "learning_rate": 7.610058587622068e-06, + "loss": 2.0512, + "step": 17549 + }, + { + "epoch": 0.58, + "grad_norm": 0.7149131298065186, + "learning_rate": 7.609026509751749e-06, + "loss": 2.0892, + "step": 17550 + }, + { + "epoch": 0.58, + "grad_norm": 0.7528133988380432, + "learning_rate": 7.6079944588932545e-06, + "loss": 2.0682, + "step": 17551 + }, + { + "epoch": 0.58, + "grad_norm": 0.7297781705856323, + "learning_rate": 7.60696243505823e-06, + "loss": 2.0764, + "step": 17552 + }, + { + "epoch": 0.58, + "grad_norm": 0.749937117099762, + "learning_rate": 7.605930438258343e-06, + "loss": 2.0528, + "step": 17553 + }, + { + "epoch": 0.58, + "grad_norm": 0.7584051489830017, + "learning_rate": 7.604898468505251e-06, + "loss": 2.036, + "step": 17554 + }, + { + "epoch": 0.58, + "grad_norm": 0.7718164324760437, + "learning_rate": 7.603866525810613e-06, + "loss": 2.0591, + "step": 17555 + }, + { + "epoch": 0.58, + "grad_norm": 0.734883725643158, + "learning_rate": 7.602834610186088e-06, + "loss": 2.0629, + "step": 17556 + }, + { + "epoch": 0.58, + "grad_norm": 0.7330256700515747, + "learning_rate": 7.601802721643332e-06, + "loss": 2.1074, + "step": 17557 + }, + { + "epoch": 0.58, + "grad_norm": 0.7398889660835266, + "learning_rate": 7.600770860194e-06, + "loss": 2.0365, + "step": 17558 + }, + { + "epoch": 0.58, + "grad_norm": 0.7155783772468567, + "learning_rate": 7.599739025849755e-06, + "loss": 2.1221, + "step": 17559 + }, + { + "epoch": 0.58, + "grad_norm": 0.7461991310119629, + "learning_rate": 7.5987072186222545e-06, + "loss": 2.0468, + "step": 17560 + }, + { + "epoch": 0.58, + "grad_norm": 0.7397270202636719, + "learning_rate": 7.59767543852315e-06, + "loss": 2.0263, + "step": 17561 + }, + { + "epoch": 0.58, + "grad_norm": 0.7453396320343018, + "learning_rate": 7.5966436855641004e-06, + "loss": 2.0799, + "step": 17562 + }, + { + "epoch": 0.58, + "grad_norm": 0.724539577960968, + "learning_rate": 7.59561195975676e-06, + "loss": 2.0088, + "step": 17563 + }, + { + "epoch": 0.58, + "grad_norm": 0.7381888031959534, + "learning_rate": 7.594580261112793e-06, + "loss": 2.036, + "step": 17564 + }, + { + "epoch": 0.58, + "grad_norm": 0.7535859942436218, + "learning_rate": 7.593548589643844e-06, + "loss": 2.1219, + "step": 17565 + }, + { + "epoch": 0.58, + "grad_norm": 0.7686878442764282, + "learning_rate": 7.592516945361571e-06, + "loss": 2.0897, + "step": 17566 + }, + { + "epoch": 0.58, + "grad_norm": 0.7559021711349487, + "learning_rate": 7.591485328277632e-06, + "loss": 2.0914, + "step": 17567 + }, + { + "epoch": 0.58, + "grad_norm": 0.7427282333374023, + "learning_rate": 7.59045373840368e-06, + "loss": 2.0088, + "step": 17568 + }, + { + "epoch": 0.58, + "grad_norm": 0.7279033660888672, + "learning_rate": 7.5894221757513735e-06, + "loss": 2.05, + "step": 17569 + }, + { + "epoch": 0.58, + "grad_norm": 0.7510033845901489, + "learning_rate": 7.58839064033236e-06, + "loss": 2.0655, + "step": 17570 + }, + { + "epoch": 0.58, + "grad_norm": 0.7204381227493286, + "learning_rate": 7.587359132158292e-06, + "loss": 2.0487, + "step": 17571 + }, + { + "epoch": 0.58, + "grad_norm": 0.7264611124992371, + "learning_rate": 7.58632765124083e-06, + "loss": 2.0611, + "step": 17572 + }, + { + "epoch": 0.58, + "grad_norm": 0.7632628083229065, + "learning_rate": 7.585296197591628e-06, + "loss": 2.0859, + "step": 17573 + }, + { + "epoch": 0.58, + "grad_norm": 0.7134138345718384, + "learning_rate": 7.584264771222326e-06, + "loss": 1.9269, + "step": 17574 + }, + { + "epoch": 0.58, + "grad_norm": 0.726304292678833, + "learning_rate": 7.583233372144589e-06, + "loss": 2.0568, + "step": 17575 + }, + { + "epoch": 0.58, + "grad_norm": 0.7399855256080627, + "learning_rate": 7.582202000370065e-06, + "loss": 2.0152, + "step": 17576 + }, + { + "epoch": 0.58, + "grad_norm": 0.7417418956756592, + "learning_rate": 7.581170655910402e-06, + "loss": 2.0644, + "step": 17577 + }, + { + "epoch": 0.58, + "grad_norm": 0.7679969072341919, + "learning_rate": 7.5801393387772635e-06, + "loss": 2.0816, + "step": 17578 + }, + { + "epoch": 0.58, + "grad_norm": 0.7833313345909119, + "learning_rate": 7.579108048982286e-06, + "loss": 2.093, + "step": 17579 + }, + { + "epoch": 0.58, + "grad_norm": 0.7727939486503601, + "learning_rate": 7.578076786537129e-06, + "loss": 1.9803, + "step": 17580 + }, + { + "epoch": 0.58, + "grad_norm": 0.7487653493881226, + "learning_rate": 7.57704555145344e-06, + "loss": 2.1069, + "step": 17581 + }, + { + "epoch": 0.58, + "grad_norm": 0.7269291877746582, + "learning_rate": 7.576014343742873e-06, + "loss": 2.0744, + "step": 17582 + }, + { + "epoch": 0.58, + "grad_norm": 0.761785089969635, + "learning_rate": 7.5749831634170734e-06, + "loss": 2.0369, + "step": 17583 + }, + { + "epoch": 0.59, + "grad_norm": 0.7442471385002136, + "learning_rate": 7.573952010487693e-06, + "loss": 2.0468, + "step": 17584 + }, + { + "epoch": 0.59, + "grad_norm": 0.7325069904327393, + "learning_rate": 7.572920884966379e-06, + "loss": 2.0338, + "step": 17585 + }, + { + "epoch": 0.59, + "grad_norm": 0.7632442712783813, + "learning_rate": 7.571889786864789e-06, + "loss": 2.0137, + "step": 17586 + }, + { + "epoch": 0.59, + "grad_norm": 0.7422991394996643, + "learning_rate": 7.570858716194558e-06, + "loss": 2.0957, + "step": 17587 + }, + { + "epoch": 0.59, + "grad_norm": 0.7239232063293457, + "learning_rate": 7.569827672967345e-06, + "loss": 2.0533, + "step": 17588 + }, + { + "epoch": 0.59, + "grad_norm": 0.7236452698707581, + "learning_rate": 7.5687966571947925e-06, + "loss": 2.1239, + "step": 17589 + }, + { + "epoch": 0.59, + "grad_norm": 0.7207648754119873, + "learning_rate": 7.567765668888553e-06, + "loss": 2.1178, + "step": 17590 + }, + { + "epoch": 0.59, + "grad_norm": 0.7437989115715027, + "learning_rate": 7.5667347080602715e-06, + "loss": 2.0421, + "step": 17591 + }, + { + "epoch": 0.59, + "grad_norm": 0.7432490587234497, + "learning_rate": 7.565703774721595e-06, + "loss": 2.0596, + "step": 17592 + }, + { + "epoch": 0.59, + "grad_norm": 0.7236190438270569, + "learning_rate": 7.564672868884168e-06, + "loss": 2.0492, + "step": 17593 + }, + { + "epoch": 0.59, + "grad_norm": 0.7365027666091919, + "learning_rate": 7.5636419905596405e-06, + "loss": 2.0672, + "step": 17594 + }, + { + "epoch": 0.59, + "grad_norm": 0.7086513042449951, + "learning_rate": 7.562611139759662e-06, + "loss": 1.9998, + "step": 17595 + }, + { + "epoch": 0.59, + "grad_norm": 0.7439348101615906, + "learning_rate": 7.561580316495872e-06, + "loss": 2.0185, + "step": 17596 + }, + { + "epoch": 0.59, + "grad_norm": 0.7480746507644653, + "learning_rate": 7.5605495207799165e-06, + "loss": 2.056, + "step": 17597 + }, + { + "epoch": 0.59, + "grad_norm": 0.7516438364982605, + "learning_rate": 7.559518752623444e-06, + "loss": 2.1186, + "step": 17598 + }, + { + "epoch": 0.59, + "grad_norm": 0.7640146017074585, + "learning_rate": 7.558488012038097e-06, + "loss": 2.0522, + "step": 17599 + }, + { + "epoch": 0.59, + "grad_norm": 0.7664073705673218, + "learning_rate": 7.5574572990355265e-06, + "loss": 1.994, + "step": 17600 + }, + { + "epoch": 0.59, + "grad_norm": 0.7271788120269775, + "learning_rate": 7.556426613627367e-06, + "loss": 2.0278, + "step": 17601 + }, + { + "epoch": 0.59, + "grad_norm": 0.7477800846099854, + "learning_rate": 7.555395955825269e-06, + "loss": 1.9969, + "step": 17602 + }, + { + "epoch": 0.59, + "grad_norm": 0.7379085421562195, + "learning_rate": 7.554365325640876e-06, + "loss": 2.0085, + "step": 17603 + }, + { + "epoch": 0.59, + "grad_norm": 0.7312513589859009, + "learning_rate": 7.55333472308583e-06, + "loss": 2.0603, + "step": 17604 + }, + { + "epoch": 0.59, + "grad_norm": 0.734138548374176, + "learning_rate": 7.552304148171774e-06, + "loss": 1.9783, + "step": 17605 + }, + { + "epoch": 0.59, + "grad_norm": 0.7805414199829102, + "learning_rate": 7.55127360091035e-06, + "loss": 1.9661, + "step": 17606 + }, + { + "epoch": 0.59, + "grad_norm": 0.7499974370002747, + "learning_rate": 7.550243081313201e-06, + "loss": 2.0952, + "step": 17607 + }, + { + "epoch": 0.59, + "grad_norm": 0.7417061924934387, + "learning_rate": 7.5492125893919724e-06, + "loss": 2.0979, + "step": 17608 + }, + { + "epoch": 0.59, + "grad_norm": 0.7168869376182556, + "learning_rate": 7.548182125158304e-06, + "loss": 2.0711, + "step": 17609 + }, + { + "epoch": 0.59, + "grad_norm": 0.7445345520973206, + "learning_rate": 7.547151688623836e-06, + "loss": 2.052, + "step": 17610 + }, + { + "epoch": 0.59, + "grad_norm": 0.7561436891555786, + "learning_rate": 7.546121279800212e-06, + "loss": 2.0457, + "step": 17611 + }, + { + "epoch": 0.59, + "grad_norm": 0.7429252862930298, + "learning_rate": 7.5450908986990705e-06, + "loss": 2.0711, + "step": 17612 + }, + { + "epoch": 0.59, + "grad_norm": 0.724929928779602, + "learning_rate": 7.5440605453320545e-06, + "loss": 2.06, + "step": 17613 + }, + { + "epoch": 0.59, + "grad_norm": 0.726159930229187, + "learning_rate": 7.5430302197108005e-06, + "loss": 2.024, + "step": 17614 + }, + { + "epoch": 0.59, + "grad_norm": 0.7570730447769165, + "learning_rate": 7.541999921846951e-06, + "loss": 2.0461, + "step": 17615 + }, + { + "epoch": 0.59, + "grad_norm": 0.7577831745147705, + "learning_rate": 7.540969651752148e-06, + "loss": 2.0674, + "step": 17616 + }, + { + "epoch": 0.59, + "grad_norm": 0.7463246583938599, + "learning_rate": 7.539939409438029e-06, + "loss": 1.9972, + "step": 17617 + }, + { + "epoch": 0.59, + "grad_norm": 0.7243469953536987, + "learning_rate": 7.538909194916233e-06, + "loss": 2.1171, + "step": 17618 + }, + { + "epoch": 0.59, + "grad_norm": 0.748498260974884, + "learning_rate": 7.537879008198397e-06, + "loss": 2.0801, + "step": 17619 + }, + { + "epoch": 0.59, + "grad_norm": 0.7632192969322205, + "learning_rate": 7.536848849296161e-06, + "loss": 2.0306, + "step": 17620 + }, + { + "epoch": 0.59, + "grad_norm": 0.7671763300895691, + "learning_rate": 7.535818718221161e-06, + "loss": 2.068, + "step": 17621 + }, + { + "epoch": 0.59, + "grad_norm": 0.7456734776496887, + "learning_rate": 7.534788614985045e-06, + "loss": 2.0889, + "step": 17622 + }, + { + "epoch": 0.59, + "grad_norm": 0.7639561891555786, + "learning_rate": 7.533758539599434e-06, + "loss": 2.1013, + "step": 17623 + }, + { + "epoch": 0.59, + "grad_norm": 0.7272188067436218, + "learning_rate": 7.532728492075977e-06, + "loss": 2.0595, + "step": 17624 + }, + { + "epoch": 0.59, + "grad_norm": 0.7256278991699219, + "learning_rate": 7.531698472426307e-06, + "loss": 2.0916, + "step": 17625 + }, + { + "epoch": 0.59, + "grad_norm": 0.7265766859054565, + "learning_rate": 7.5306684806620636e-06, + "loss": 2.0952, + "step": 17626 + }, + { + "epoch": 0.59, + "grad_norm": 0.7260339260101318, + "learning_rate": 7.529638516794878e-06, + "loss": 2.115, + "step": 17627 + }, + { + "epoch": 0.59, + "grad_norm": 0.7943592667579651, + "learning_rate": 7.528608580836389e-06, + "loss": 2.0663, + "step": 17628 + }, + { + "epoch": 0.59, + "grad_norm": 0.7220326662063599, + "learning_rate": 7.527578672798229e-06, + "loss": 2.0706, + "step": 17629 + }, + { + "epoch": 0.59, + "grad_norm": 0.7528584599494934, + "learning_rate": 7.526548792692039e-06, + "loss": 2.0177, + "step": 17630 + }, + { + "epoch": 0.59, + "grad_norm": 0.7268966436386108, + "learning_rate": 7.525518940529454e-06, + "loss": 2.1183, + "step": 17631 + }, + { + "epoch": 0.59, + "grad_norm": 0.785613477230072, + "learning_rate": 7.5244891163221025e-06, + "loss": 2.0496, + "step": 17632 + }, + { + "epoch": 0.59, + "grad_norm": 0.7384819388389587, + "learning_rate": 7.523459320081623e-06, + "loss": 2.0891, + "step": 17633 + }, + { + "epoch": 0.59, + "grad_norm": 0.7446824312210083, + "learning_rate": 7.522429551819648e-06, + "loss": 2.0876, + "step": 17634 + }, + { + "epoch": 0.59, + "grad_norm": 0.7553513646125793, + "learning_rate": 7.5213998115478134e-06, + "loss": 2.1699, + "step": 17635 + }, + { + "epoch": 0.59, + "grad_norm": 0.7468435168266296, + "learning_rate": 7.520370099277751e-06, + "loss": 2.0711, + "step": 17636 + }, + { + "epoch": 0.59, + "grad_norm": 0.7943869233131409, + "learning_rate": 7.51934041502109e-06, + "loss": 2.0402, + "step": 17637 + }, + { + "epoch": 0.59, + "grad_norm": 0.7574362754821777, + "learning_rate": 7.518310758789471e-06, + "loss": 2.0437, + "step": 17638 + }, + { + "epoch": 0.59, + "grad_norm": 0.7430615425109863, + "learning_rate": 7.517281130594521e-06, + "loss": 2.0554, + "step": 17639 + }, + { + "epoch": 0.59, + "grad_norm": 0.7411746382713318, + "learning_rate": 7.516251530447877e-06, + "loss": 2.0518, + "step": 17640 + }, + { + "epoch": 0.59, + "grad_norm": 0.7515670657157898, + "learning_rate": 7.515221958361165e-06, + "loss": 2.0864, + "step": 17641 + }, + { + "epoch": 0.59, + "grad_norm": 0.7506204843521118, + "learning_rate": 7.5141924143460195e-06, + "loss": 2.081, + "step": 17642 + }, + { + "epoch": 0.59, + "grad_norm": 0.7756949067115784, + "learning_rate": 7.513162898414068e-06, + "loss": 2.1136, + "step": 17643 + }, + { + "epoch": 0.59, + "grad_norm": 0.8023573756217957, + "learning_rate": 7.512133410576953e-06, + "loss": 2.1303, + "step": 17644 + }, + { + "epoch": 0.59, + "grad_norm": 0.7889078259468079, + "learning_rate": 7.511103950846289e-06, + "loss": 2.0352, + "step": 17645 + }, + { + "epoch": 0.59, + "grad_norm": 0.7400040626525879, + "learning_rate": 7.510074519233717e-06, + "loss": 2.0663, + "step": 17646 + }, + { + "epoch": 0.59, + "grad_norm": 0.727421224117279, + "learning_rate": 7.509045115750862e-06, + "loss": 2.1047, + "step": 17647 + }, + { + "epoch": 0.59, + "grad_norm": 0.7777203917503357, + "learning_rate": 7.508015740409359e-06, + "loss": 2.1023, + "step": 17648 + }, + { + "epoch": 0.59, + "grad_norm": 0.7542639374732971, + "learning_rate": 7.506986393220831e-06, + "loss": 2.1508, + "step": 17649 + }, + { + "epoch": 0.59, + "grad_norm": 0.7315654158592224, + "learning_rate": 7.50595707419691e-06, + "loss": 2.0545, + "step": 17650 + }, + { + "epoch": 0.59, + "grad_norm": 0.7466400861740112, + "learning_rate": 7.504927783349222e-06, + "loss": 2.0387, + "step": 17651 + }, + { + "epoch": 0.59, + "grad_norm": 0.7527769207954407, + "learning_rate": 7.5038985206894e-06, + "loss": 2.0194, + "step": 17652 + }, + { + "epoch": 0.59, + "grad_norm": 0.7366842031478882, + "learning_rate": 7.502869286229072e-06, + "loss": 2.0384, + "step": 17653 + }, + { + "epoch": 0.59, + "grad_norm": 0.7356754541397095, + "learning_rate": 7.5018400799798605e-06, + "loss": 2.1044, + "step": 17654 + }, + { + "epoch": 0.59, + "grad_norm": 0.7419320940971375, + "learning_rate": 7.500810901953396e-06, + "loss": 2.099, + "step": 17655 + }, + { + "epoch": 0.59, + "grad_norm": 0.7306846380233765, + "learning_rate": 7.499781752161306e-06, + "loss": 2.0894, + "step": 17656 + }, + { + "epoch": 0.59, + "grad_norm": 0.7321548461914062, + "learning_rate": 7.498752630615218e-06, + "loss": 2.0756, + "step": 17657 + }, + { + "epoch": 0.59, + "grad_norm": 0.7341476678848267, + "learning_rate": 7.497723537326754e-06, + "loss": 2.0728, + "step": 17658 + }, + { + "epoch": 0.59, + "grad_norm": 0.7661813497543335, + "learning_rate": 7.4966944723075416e-06, + "loss": 1.9779, + "step": 17659 + }, + { + "epoch": 0.59, + "grad_norm": 0.7620532512664795, + "learning_rate": 7.4956654355692105e-06, + "loss": 2.1432, + "step": 17660 + }, + { + "epoch": 0.59, + "grad_norm": 0.7839008569717407, + "learning_rate": 7.4946364271233825e-06, + "loss": 2.0541, + "step": 17661 + }, + { + "epoch": 0.59, + "grad_norm": 0.714858889579773, + "learning_rate": 7.493607446981688e-06, + "loss": 2.0674, + "step": 17662 + }, + { + "epoch": 0.59, + "grad_norm": 0.7521733641624451, + "learning_rate": 7.4925784951557445e-06, + "loss": 1.9875, + "step": 17663 + }, + { + "epoch": 0.59, + "grad_norm": 0.7793568968772888, + "learning_rate": 7.4915495716571795e-06, + "loss": 2.1494, + "step": 17664 + }, + { + "epoch": 0.59, + "grad_norm": 0.7606765627861023, + "learning_rate": 7.490520676497615e-06, + "loss": 2.0473, + "step": 17665 + }, + { + "epoch": 0.59, + "grad_norm": 0.7544727921485901, + "learning_rate": 7.489491809688683e-06, + "loss": 2.0289, + "step": 17666 + }, + { + "epoch": 0.59, + "grad_norm": 0.7753239870071411, + "learning_rate": 7.4884629712419965e-06, + "loss": 2.1364, + "step": 17667 + }, + { + "epoch": 0.59, + "grad_norm": 0.741447389125824, + "learning_rate": 7.487434161169185e-06, + "loss": 2.0604, + "step": 17668 + }, + { + "epoch": 0.59, + "grad_norm": 0.7883172035217285, + "learning_rate": 7.4864053794818696e-06, + "loss": 1.9854, + "step": 17669 + }, + { + "epoch": 0.59, + "grad_norm": 0.7314766645431519, + "learning_rate": 7.485376626191669e-06, + "loss": 2.0909, + "step": 17670 + }, + { + "epoch": 0.59, + "grad_norm": 0.7710330486297607, + "learning_rate": 7.484347901310218e-06, + "loss": 2.1969, + "step": 17671 + }, + { + "epoch": 0.59, + "grad_norm": 0.7284137010574341, + "learning_rate": 7.483319204849124e-06, + "loss": 2.0081, + "step": 17672 + }, + { + "epoch": 0.59, + "grad_norm": 0.7630683779716492, + "learning_rate": 7.482290536820016e-06, + "loss": 2.0101, + "step": 17673 + }, + { + "epoch": 0.59, + "grad_norm": 0.7487777471542358, + "learning_rate": 7.4812618972345155e-06, + "loss": 2.082, + "step": 17674 + }, + { + "epoch": 0.59, + "grad_norm": 0.7572994828224182, + "learning_rate": 7.4802332861042425e-06, + "loss": 2.0088, + "step": 17675 + }, + { + "epoch": 0.59, + "grad_norm": 0.7358850240707397, + "learning_rate": 7.4792047034408156e-06, + "loss": 2.1164, + "step": 17676 + }, + { + "epoch": 0.59, + "grad_norm": 0.7252094149589539, + "learning_rate": 7.478176149255856e-06, + "loss": 2.0362, + "step": 17677 + }, + { + "epoch": 0.59, + "grad_norm": 0.7432246804237366, + "learning_rate": 7.477147623560983e-06, + "loss": 2.0799, + "step": 17678 + }, + { + "epoch": 0.59, + "grad_norm": 0.7615903615951538, + "learning_rate": 7.4761191263678245e-06, + "loss": 2.1197, + "step": 17679 + }, + { + "epoch": 0.59, + "grad_norm": 0.7458433508872986, + "learning_rate": 7.475090657687985e-06, + "loss": 2.03, + "step": 17680 + }, + { + "epoch": 0.59, + "grad_norm": 0.7390851974487305, + "learning_rate": 7.474062217533094e-06, + "loss": 2.0554, + "step": 17681 + }, + { + "epoch": 0.59, + "grad_norm": 0.7398363947868347, + "learning_rate": 7.473033805914769e-06, + "loss": 2.0295, + "step": 17682 + }, + { + "epoch": 0.59, + "grad_norm": 0.7734447717666626, + "learning_rate": 7.472005422844626e-06, + "loss": 2.1253, + "step": 17683 + }, + { + "epoch": 0.59, + "grad_norm": 0.76604163646698, + "learning_rate": 7.470977068334286e-06, + "loss": 2.1087, + "step": 17684 + }, + { + "epoch": 0.59, + "grad_norm": 0.7414438128471375, + "learning_rate": 7.469948742395363e-06, + "loss": 2.046, + "step": 17685 + }, + { + "epoch": 0.59, + "grad_norm": 0.7619325518608093, + "learning_rate": 7.468920445039476e-06, + "loss": 2.1187, + "step": 17686 + }, + { + "epoch": 0.59, + "grad_norm": 0.7636239528656006, + "learning_rate": 7.4678921762782415e-06, + "loss": 2.0214, + "step": 17687 + }, + { + "epoch": 0.59, + "grad_norm": 0.7501306533813477, + "learning_rate": 7.466863936123282e-06, + "loss": 2.019, + "step": 17688 + }, + { + "epoch": 0.59, + "grad_norm": 0.7532680630683899, + "learning_rate": 7.465835724586205e-06, + "loss": 2.0572, + "step": 17689 + }, + { + "epoch": 0.59, + "grad_norm": 0.780039370059967, + "learning_rate": 7.464807541678634e-06, + "loss": 2.023, + "step": 17690 + }, + { + "epoch": 0.59, + "grad_norm": 0.7525805234909058, + "learning_rate": 7.4637793874121775e-06, + "loss": 2.0268, + "step": 17691 + }, + { + "epoch": 0.59, + "grad_norm": 0.7352322936058044, + "learning_rate": 7.462751261798456e-06, + "loss": 2.024, + "step": 17692 + }, + { + "epoch": 0.59, + "grad_norm": 0.8357675075531006, + "learning_rate": 7.4617231648490885e-06, + "loss": 2.0668, + "step": 17693 + }, + { + "epoch": 0.59, + "grad_norm": 0.7364036440849304, + "learning_rate": 7.46069509657568e-06, + "loss": 1.9879, + "step": 17694 + }, + { + "epoch": 0.59, + "grad_norm": 0.7361488938331604, + "learning_rate": 7.459667056989852e-06, + "loss": 1.9991, + "step": 17695 + }, + { + "epoch": 0.59, + "grad_norm": 0.733354926109314, + "learning_rate": 7.458639046103216e-06, + "loss": 2.0825, + "step": 17696 + }, + { + "epoch": 0.59, + "grad_norm": 0.770213782787323, + "learning_rate": 7.457611063927389e-06, + "loss": 2.0843, + "step": 17697 + }, + { + "epoch": 0.59, + "grad_norm": 0.7373711466789246, + "learning_rate": 7.456583110473981e-06, + "loss": 2.0511, + "step": 17698 + }, + { + "epoch": 0.59, + "grad_norm": 0.741649329662323, + "learning_rate": 7.455555185754606e-06, + "loss": 2.0196, + "step": 17699 + }, + { + "epoch": 0.59, + "grad_norm": 0.7481525540351868, + "learning_rate": 7.454527289780874e-06, + "loss": 2.0739, + "step": 17700 + }, + { + "epoch": 0.59, + "grad_norm": 0.7365272641181946, + "learning_rate": 7.453499422564404e-06, + "loss": 1.9543, + "step": 17701 + }, + { + "epoch": 0.59, + "grad_norm": 0.7408632040023804, + "learning_rate": 7.452471584116807e-06, + "loss": 2.1058, + "step": 17702 + }, + { + "epoch": 0.59, + "grad_norm": 0.7279224395751953, + "learning_rate": 7.451443774449692e-06, + "loss": 2.0912, + "step": 17703 + }, + { + "epoch": 0.59, + "grad_norm": 0.7461594343185425, + "learning_rate": 7.45041599357467e-06, + "loss": 2.0864, + "step": 17704 + }, + { + "epoch": 0.59, + "grad_norm": 0.7708375453948975, + "learning_rate": 7.449388241503355e-06, + "loss": 2.031, + "step": 17705 + }, + { + "epoch": 0.59, + "grad_norm": 0.7397706508636475, + "learning_rate": 7.448360518247358e-06, + "loss": 1.9936, + "step": 17706 + }, + { + "epoch": 0.59, + "grad_norm": 0.7420316338539124, + "learning_rate": 7.447332823818287e-06, + "loss": 2.0358, + "step": 17707 + }, + { + "epoch": 0.59, + "grad_norm": 0.7284911274909973, + "learning_rate": 7.4463051582277515e-06, + "loss": 2.0527, + "step": 17708 + }, + { + "epoch": 0.59, + "grad_norm": 0.7508965730667114, + "learning_rate": 7.4452775214873664e-06, + "loss": 2.0553, + "step": 17709 + }, + { + "epoch": 0.59, + "grad_norm": 0.7549227476119995, + "learning_rate": 7.444249913608741e-06, + "loss": 2.0681, + "step": 17710 + }, + { + "epoch": 0.59, + "grad_norm": 0.7928520441055298, + "learning_rate": 7.44322233460348e-06, + "loss": 2.047, + "step": 17711 + }, + { + "epoch": 0.59, + "grad_norm": 0.7126542925834656, + "learning_rate": 7.442194784483192e-06, + "loss": 2.0172, + "step": 17712 + }, + { + "epoch": 0.59, + "grad_norm": 0.7781335711479187, + "learning_rate": 7.4411672632594915e-06, + "loss": 2.0736, + "step": 17713 + }, + { + "epoch": 0.59, + "grad_norm": 0.7449811100959778, + "learning_rate": 7.4401397709439795e-06, + "loss": 2.0922, + "step": 17714 + }, + { + "epoch": 0.59, + "grad_norm": 0.726498544216156, + "learning_rate": 7.439112307548276e-06, + "loss": 2.0467, + "step": 17715 + }, + { + "epoch": 0.59, + "grad_norm": 0.7464563846588135, + "learning_rate": 7.438084873083974e-06, + "loss": 2.1294, + "step": 17716 + }, + { + "epoch": 0.59, + "grad_norm": 0.7600895762443542, + "learning_rate": 7.437057467562689e-06, + "loss": 2.0195, + "step": 17717 + }, + { + "epoch": 0.59, + "grad_norm": 0.7245799899101257, + "learning_rate": 7.436030090996028e-06, + "loss": 2.0661, + "step": 17718 + }, + { + "epoch": 0.59, + "grad_norm": 0.7593080401420593, + "learning_rate": 7.4350027433955985e-06, + "loss": 2.1653, + "step": 17719 + }, + { + "epoch": 0.59, + "grad_norm": 0.7599025368690491, + "learning_rate": 7.4339754247730015e-06, + "loss": 2.0982, + "step": 17720 + }, + { + "epoch": 0.59, + "grad_norm": 0.7334645986557007, + "learning_rate": 7.432948135139846e-06, + "loss": 2.0341, + "step": 17721 + }, + { + "epoch": 0.59, + "grad_norm": 0.7520384192466736, + "learning_rate": 7.431920874507738e-06, + "loss": 2.0026, + "step": 17722 + }, + { + "epoch": 0.59, + "grad_norm": 0.7388368844985962, + "learning_rate": 7.430893642888284e-06, + "loss": 2.0531, + "step": 17723 + }, + { + "epoch": 0.59, + "grad_norm": 0.7473230957984924, + "learning_rate": 7.4298664402930895e-06, + "loss": 2.0798, + "step": 17724 + }, + { + "epoch": 0.59, + "grad_norm": 0.7643396854400635, + "learning_rate": 7.428839266733756e-06, + "loss": 2.1012, + "step": 17725 + }, + { + "epoch": 0.59, + "grad_norm": 0.7569268941879272, + "learning_rate": 7.427812122221889e-06, + "loss": 2.0428, + "step": 17726 + }, + { + "epoch": 0.59, + "grad_norm": 0.7806915044784546, + "learning_rate": 7.426785006769094e-06, + "loss": 2.0358, + "step": 17727 + }, + { + "epoch": 0.59, + "grad_norm": 0.7343487739562988, + "learning_rate": 7.425757920386975e-06, + "loss": 2.0736, + "step": 17728 + }, + { + "epoch": 0.59, + "grad_norm": 0.7507137656211853, + "learning_rate": 7.424730863087134e-06, + "loss": 2.0256, + "step": 17729 + }, + { + "epoch": 0.59, + "grad_norm": 0.7378207445144653, + "learning_rate": 7.423703834881171e-06, + "loss": 2.117, + "step": 17730 + }, + { + "epoch": 0.59, + "grad_norm": 0.7440178394317627, + "learning_rate": 7.422676835780696e-06, + "loss": 2.0599, + "step": 17731 + }, + { + "epoch": 0.59, + "grad_norm": 0.7573827505111694, + "learning_rate": 7.421649865797307e-06, + "loss": 2.1147, + "step": 17732 + }, + { + "epoch": 0.59, + "grad_norm": 0.7461088299751282, + "learning_rate": 7.4206229249426065e-06, + "loss": 2.0193, + "step": 17733 + }, + { + "epoch": 0.59, + "grad_norm": 0.7320839762687683, + "learning_rate": 7.4195960132281965e-06, + "loss": 2.0453, + "step": 17734 + }, + { + "epoch": 0.59, + "grad_norm": 0.7362501621246338, + "learning_rate": 7.418569130665678e-06, + "loss": 2.0878, + "step": 17735 + }, + { + "epoch": 0.59, + "grad_norm": 0.7760770320892334, + "learning_rate": 7.417542277266651e-06, + "loss": 2.013, + "step": 17736 + }, + { + "epoch": 0.59, + "grad_norm": 0.7016144394874573, + "learning_rate": 7.416515453042723e-06, + "loss": 2.0121, + "step": 17737 + }, + { + "epoch": 0.59, + "grad_norm": 0.7516626715660095, + "learning_rate": 7.415488658005484e-06, + "loss": 2.0747, + "step": 17738 + }, + { + "epoch": 0.59, + "grad_norm": 0.7502581477165222, + "learning_rate": 7.414461892166542e-06, + "loss": 2.0511, + "step": 17739 + }, + { + "epoch": 0.59, + "grad_norm": 0.7340521216392517, + "learning_rate": 7.413435155537494e-06, + "loss": 2.0665, + "step": 17740 + }, + { + "epoch": 0.59, + "grad_norm": 0.7667483687400818, + "learning_rate": 7.4124084481299405e-06, + "loss": 2.0363, + "step": 17741 + }, + { + "epoch": 0.59, + "grad_norm": 0.7313727736473083, + "learning_rate": 7.411381769955479e-06, + "loss": 2.0601, + "step": 17742 + }, + { + "epoch": 0.59, + "grad_norm": 0.7961923480033875, + "learning_rate": 7.4103551210257095e-06, + "loss": 2.1401, + "step": 17743 + }, + { + "epoch": 0.59, + "grad_norm": 0.7360961437225342, + "learning_rate": 7.409328501352228e-06, + "loss": 2.0199, + "step": 17744 + }, + { + "epoch": 0.59, + "grad_norm": 0.7178511023521423, + "learning_rate": 7.408301910946636e-06, + "loss": 2.043, + "step": 17745 + }, + { + "epoch": 0.59, + "grad_norm": 0.7400296330451965, + "learning_rate": 7.407275349820533e-06, + "loss": 1.9741, + "step": 17746 + }, + { + "epoch": 0.59, + "grad_norm": 0.7485824823379517, + "learning_rate": 7.406248817985511e-06, + "loss": 2.1478, + "step": 17747 + }, + { + "epoch": 0.59, + "grad_norm": 0.7607347369194031, + "learning_rate": 7.40522231545317e-06, + "loss": 1.9981, + "step": 17748 + }, + { + "epoch": 0.59, + "grad_norm": 0.7282645106315613, + "learning_rate": 7.404195842235107e-06, + "loss": 2.0044, + "step": 17749 + }, + { + "epoch": 0.59, + "grad_norm": 0.7420344352722168, + "learning_rate": 7.4031693983429195e-06, + "loss": 2.0906, + "step": 17750 + }, + { + "epoch": 0.59, + "grad_norm": 0.7412364482879639, + "learning_rate": 7.402142983788201e-06, + "loss": 2.0336, + "step": 17751 + }, + { + "epoch": 0.59, + "grad_norm": 0.7402769923210144, + "learning_rate": 7.401116598582545e-06, + "loss": 2.0338, + "step": 17752 + }, + { + "epoch": 0.59, + "grad_norm": 0.7416160106658936, + "learning_rate": 7.4000902427375544e-06, + "loss": 1.9889, + "step": 17753 + }, + { + "epoch": 0.59, + "grad_norm": 0.7586106061935425, + "learning_rate": 7.399063916264819e-06, + "loss": 2.0744, + "step": 17754 + }, + { + "epoch": 0.59, + "grad_norm": 0.7518903017044067, + "learning_rate": 7.3980376191759376e-06, + "loss": 2.1105, + "step": 17755 + }, + { + "epoch": 0.59, + "grad_norm": 0.7239671349525452, + "learning_rate": 7.397011351482501e-06, + "loss": 2.0692, + "step": 17756 + }, + { + "epoch": 0.59, + "grad_norm": 0.7311205863952637, + "learning_rate": 7.395985113196105e-06, + "loss": 2.0476, + "step": 17757 + }, + { + "epoch": 0.59, + "grad_norm": 0.729142963886261, + "learning_rate": 7.39495890432834e-06, + "loss": 2.0722, + "step": 17758 + }, + { + "epoch": 0.59, + "grad_norm": 0.7488257884979248, + "learning_rate": 7.393932724890809e-06, + "loss": 2.0275, + "step": 17759 + }, + { + "epoch": 0.59, + "grad_norm": 0.763679027557373, + "learning_rate": 7.392906574895091e-06, + "loss": 2.009, + "step": 17760 + }, + { + "epoch": 0.59, + "grad_norm": 0.7489261627197266, + "learning_rate": 7.391880454352791e-06, + "loss": 2.1009, + "step": 17761 + }, + { + "epoch": 0.59, + "grad_norm": 0.7624302506446838, + "learning_rate": 7.390854363275497e-06, + "loss": 2.0365, + "step": 17762 + }, + { + "epoch": 0.59, + "grad_norm": 0.7260489463806152, + "learning_rate": 7.3898283016748015e-06, + "loss": 1.9936, + "step": 17763 + }, + { + "epoch": 0.59, + "grad_norm": 0.751052737236023, + "learning_rate": 7.388802269562296e-06, + "loss": 2.05, + "step": 17764 + }, + { + "epoch": 0.59, + "grad_norm": 0.7342755794525146, + "learning_rate": 7.387776266949571e-06, + "loss": 2.0367, + "step": 17765 + }, + { + "epoch": 0.59, + "grad_norm": 0.784067690372467, + "learning_rate": 7.386750293848217e-06, + "loss": 2.0104, + "step": 17766 + }, + { + "epoch": 0.59, + "grad_norm": 0.7388331890106201, + "learning_rate": 7.38572435026983e-06, + "loss": 2.0407, + "step": 17767 + }, + { + "epoch": 0.59, + "grad_norm": 0.7333582639694214, + "learning_rate": 7.384698436225997e-06, + "loss": 2.1001, + "step": 17768 + }, + { + "epoch": 0.59, + "grad_norm": 0.7617678046226501, + "learning_rate": 7.383672551728308e-06, + "loss": 2.1298, + "step": 17769 + }, + { + "epoch": 0.59, + "grad_norm": 0.7139295339584351, + "learning_rate": 7.382646696788353e-06, + "loss": 1.9787, + "step": 17770 + }, + { + "epoch": 0.59, + "grad_norm": 0.7355831265449524, + "learning_rate": 7.381620871417718e-06, + "loss": 2.0699, + "step": 17771 + }, + { + "epoch": 0.59, + "grad_norm": 0.7565092444419861, + "learning_rate": 7.380595075628006e-06, + "loss": 2.0299, + "step": 17772 + }, + { + "epoch": 0.59, + "grad_norm": 0.7425851225852966, + "learning_rate": 7.379569309430785e-06, + "loss": 2.0613, + "step": 17773 + }, + { + "epoch": 0.59, + "grad_norm": 0.7438064813613892, + "learning_rate": 7.378543572837659e-06, + "loss": 2.0475, + "step": 17774 + }, + { + "epoch": 0.59, + "grad_norm": 0.723822832107544, + "learning_rate": 7.377517865860211e-06, + "loss": 2.0264, + "step": 17775 + }, + { + "epoch": 0.59, + "grad_norm": 0.7587792277336121, + "learning_rate": 7.376492188510029e-06, + "loss": 2.1468, + "step": 17776 + }, + { + "epoch": 0.59, + "grad_norm": 0.7258195877075195, + "learning_rate": 7.375466540798701e-06, + "loss": 2.0642, + "step": 17777 + }, + { + "epoch": 0.59, + "grad_norm": 0.727118194103241, + "learning_rate": 7.374440922737813e-06, + "loss": 1.9957, + "step": 17778 + }, + { + "epoch": 0.59, + "grad_norm": 0.7043047547340393, + "learning_rate": 7.373415334338952e-06, + "loss": 2.0089, + "step": 17779 + }, + { + "epoch": 0.59, + "grad_norm": 0.7414527535438538, + "learning_rate": 7.3723897756137065e-06, + "loss": 1.9953, + "step": 17780 + }, + { + "epoch": 0.59, + "grad_norm": 0.7334194183349609, + "learning_rate": 7.371364246573664e-06, + "loss": 2.1033, + "step": 17781 + }, + { + "epoch": 0.59, + "grad_norm": 0.7746586799621582, + "learning_rate": 7.3703387472304044e-06, + "loss": 2.1177, + "step": 17782 + }, + { + "epoch": 0.59, + "grad_norm": 0.752119243144989, + "learning_rate": 7.369313277595516e-06, + "loss": 2.0425, + "step": 17783 + }, + { + "epoch": 0.59, + "grad_norm": 0.7389459013938904, + "learning_rate": 7.368287837680587e-06, + "loss": 2.1192, + "step": 17784 + }, + { + "epoch": 0.59, + "grad_norm": 0.7919149994850159, + "learning_rate": 7.367262427497195e-06, + "loss": 2.0668, + "step": 17785 + }, + { + "epoch": 0.59, + "grad_norm": 0.743135929107666, + "learning_rate": 7.366237047056937e-06, + "loss": 2.0587, + "step": 17786 + }, + { + "epoch": 0.59, + "grad_norm": 0.7341259121894836, + "learning_rate": 7.365211696371383e-06, + "loss": 2.0018, + "step": 17787 + }, + { + "epoch": 0.59, + "grad_norm": 0.7511879205703735, + "learning_rate": 7.364186375452125e-06, + "loss": 2.0657, + "step": 17788 + }, + { + "epoch": 0.59, + "grad_norm": 0.7610931396484375, + "learning_rate": 7.363161084310744e-06, + "loss": 2.0686, + "step": 17789 + }, + { + "epoch": 0.59, + "grad_norm": 0.7377996444702148, + "learning_rate": 7.362135822958826e-06, + "loss": 2.1125, + "step": 17790 + }, + { + "epoch": 0.59, + "grad_norm": 0.7370397448539734, + "learning_rate": 7.361110591407949e-06, + "loss": 2.0606, + "step": 17791 + }, + { + "epoch": 0.59, + "grad_norm": 0.7400810718536377, + "learning_rate": 7.360085389669699e-06, + "loss": 2.0475, + "step": 17792 + }, + { + "epoch": 0.59, + "grad_norm": 0.7479839324951172, + "learning_rate": 7.359060217755655e-06, + "loss": 2.0531, + "step": 17793 + }, + { + "epoch": 0.59, + "grad_norm": 0.7449530959129333, + "learning_rate": 7.358035075677407e-06, + "loss": 2.0178, + "step": 17794 + }, + { + "epoch": 0.59, + "grad_norm": 0.7437887191772461, + "learning_rate": 7.357009963446524e-06, + "loss": 2.0982, + "step": 17795 + }, + { + "epoch": 0.59, + "grad_norm": 0.7237464189529419, + "learning_rate": 7.355984881074595e-06, + "loss": 2.1313, + "step": 17796 + }, + { + "epoch": 0.59, + "grad_norm": 0.7515939474105835, + "learning_rate": 7.3549598285732e-06, + "loss": 2.0629, + "step": 17797 + }, + { + "epoch": 0.59, + "grad_norm": 0.7517289519309998, + "learning_rate": 7.353934805953918e-06, + "loss": 2.0639, + "step": 17798 + }, + { + "epoch": 0.59, + "grad_norm": 0.7352995872497559, + "learning_rate": 7.352909813228332e-06, + "loss": 2.0118, + "step": 17799 + }, + { + "epoch": 0.59, + "grad_norm": 0.7311394214630127, + "learning_rate": 7.351884850408019e-06, + "loss": 2.0324, + "step": 17800 + }, + { + "epoch": 0.59, + "grad_norm": 0.7241724729537964, + "learning_rate": 7.350859917504556e-06, + "loss": 2.0373, + "step": 17801 + }, + { + "epoch": 0.59, + "grad_norm": 0.737528383731842, + "learning_rate": 7.349835014529527e-06, + "loss": 2.1413, + "step": 17802 + }, + { + "epoch": 0.59, + "grad_norm": 0.7196407318115234, + "learning_rate": 7.3488101414945115e-06, + "loss": 2.0368, + "step": 17803 + }, + { + "epoch": 0.59, + "grad_norm": 0.7350615859031677, + "learning_rate": 7.347785298411081e-06, + "loss": 2.0961, + "step": 17804 + }, + { + "epoch": 0.59, + "grad_norm": 0.7256792783737183, + "learning_rate": 7.3467604852908205e-06, + "loss": 2.0103, + "step": 17805 + }, + { + "epoch": 0.59, + "grad_norm": 0.7421337962150574, + "learning_rate": 7.345735702145303e-06, + "loss": 2.0138, + "step": 17806 + }, + { + "epoch": 0.59, + "grad_norm": 0.7479231953620911, + "learning_rate": 7.344710948986107e-06, + "loss": 2.0582, + "step": 17807 + }, + { + "epoch": 0.59, + "grad_norm": 0.7323600053787231, + "learning_rate": 7.343686225824818e-06, + "loss": 2.072, + "step": 17808 + }, + { + "epoch": 0.59, + "grad_norm": 0.7286627888679504, + "learning_rate": 7.342661532672996e-06, + "loss": 2.0709, + "step": 17809 + }, + { + "epoch": 0.59, + "grad_norm": 0.7404723763465881, + "learning_rate": 7.341636869542232e-06, + "loss": 2.0659, + "step": 17810 + }, + { + "epoch": 0.59, + "grad_norm": 0.7348681688308716, + "learning_rate": 7.3406122364440956e-06, + "loss": 2.039, + "step": 17811 + }, + { + "epoch": 0.59, + "grad_norm": 0.7604536414146423, + "learning_rate": 7.339587633390164e-06, + "loss": 2.0932, + "step": 17812 + }, + { + "epoch": 0.59, + "grad_norm": 0.7365281581878662, + "learning_rate": 7.3385630603920125e-06, + "loss": 2.1849, + "step": 17813 + }, + { + "epoch": 0.59, + "grad_norm": 0.7569277882575989, + "learning_rate": 7.337538517461213e-06, + "loss": 2.1177, + "step": 17814 + }, + { + "epoch": 0.59, + "grad_norm": 0.7005457878112793, + "learning_rate": 7.336514004609343e-06, + "loss": 2.0606, + "step": 17815 + }, + { + "epoch": 0.59, + "grad_norm": 0.706281840801239, + "learning_rate": 7.335489521847979e-06, + "loss": 2.0905, + "step": 17816 + }, + { + "epoch": 0.59, + "grad_norm": 0.7244832515716553, + "learning_rate": 7.3344650691886944e-06, + "loss": 2.0739, + "step": 17817 + }, + { + "epoch": 0.59, + "grad_norm": 0.7549883723258972, + "learning_rate": 7.33344064664306e-06, + "loss": 2.1149, + "step": 17818 + }, + { + "epoch": 0.59, + "grad_norm": 0.7273354530334473, + "learning_rate": 7.3324162542226496e-06, + "loss": 2.1087, + "step": 17819 + }, + { + "epoch": 0.59, + "grad_norm": 0.7335820198059082, + "learning_rate": 7.331391891939037e-06, + "loss": 2.0354, + "step": 17820 + }, + { + "epoch": 0.59, + "grad_norm": 0.742071270942688, + "learning_rate": 7.330367559803797e-06, + "loss": 2.0625, + "step": 17821 + }, + { + "epoch": 0.59, + "grad_norm": 0.7222149968147278, + "learning_rate": 7.3293432578284964e-06, + "loss": 2.038, + "step": 17822 + }, + { + "epoch": 0.59, + "grad_norm": 0.7608742117881775, + "learning_rate": 7.3283189860247095e-06, + "loss": 2.0672, + "step": 17823 + }, + { + "epoch": 0.59, + "grad_norm": 0.749167799949646, + "learning_rate": 7.327294744404012e-06, + "loss": 2.0734, + "step": 17824 + }, + { + "epoch": 0.59, + "grad_norm": 0.7248327732086182, + "learning_rate": 7.326270532977972e-06, + "loss": 2.0268, + "step": 17825 + }, + { + "epoch": 0.59, + "grad_norm": 0.8058667182922363, + "learning_rate": 7.325246351758162e-06, + "loss": 2.011, + "step": 17826 + }, + { + "epoch": 0.59, + "grad_norm": 0.7379785776138306, + "learning_rate": 7.324222200756148e-06, + "loss": 2.0337, + "step": 17827 + }, + { + "epoch": 0.59, + "grad_norm": 0.7427505850791931, + "learning_rate": 7.323198079983504e-06, + "loss": 2.0518, + "step": 17828 + }, + { + "epoch": 0.59, + "grad_norm": 0.7466215491294861, + "learning_rate": 7.322173989451798e-06, + "loss": 2.0631, + "step": 17829 + }, + { + "epoch": 0.59, + "grad_norm": 0.7374353408813477, + "learning_rate": 7.321149929172606e-06, + "loss": 2.0333, + "step": 17830 + }, + { + "epoch": 0.59, + "grad_norm": 0.7147338390350342, + "learning_rate": 7.320125899157488e-06, + "loss": 2.0453, + "step": 17831 + }, + { + "epoch": 0.59, + "grad_norm": 0.7602620720863342, + "learning_rate": 7.319101899418018e-06, + "loss": 2.0495, + "step": 17832 + }, + { + "epoch": 0.59, + "grad_norm": 0.756175696849823, + "learning_rate": 7.318077929965763e-06, + "loss": 2.0907, + "step": 17833 + }, + { + "epoch": 0.59, + "grad_norm": 0.7279608249664307, + "learning_rate": 7.3170539908122936e-06, + "loss": 2.0774, + "step": 17834 + }, + { + "epoch": 0.59, + "grad_norm": 0.758105993270874, + "learning_rate": 7.316030081969174e-06, + "loss": 2.0671, + "step": 17835 + }, + { + "epoch": 0.59, + "grad_norm": 0.7391592264175415, + "learning_rate": 7.315006203447974e-06, + "loss": 2.0209, + "step": 17836 + }, + { + "epoch": 0.59, + "grad_norm": 0.7743181586265564, + "learning_rate": 7.313982355260259e-06, + "loss": 2.0719, + "step": 17837 + }, + { + "epoch": 0.59, + "grad_norm": 0.7753261923789978, + "learning_rate": 7.312958537417598e-06, + "loss": 1.9928, + "step": 17838 + }, + { + "epoch": 0.59, + "grad_norm": 0.7553514242172241, + "learning_rate": 7.311934749931559e-06, + "loss": 2.1084, + "step": 17839 + }, + { + "epoch": 0.59, + "grad_norm": 0.762657880783081, + "learning_rate": 7.3109109928137046e-06, + "loss": 2.1374, + "step": 17840 + }, + { + "epoch": 0.59, + "grad_norm": 0.7669672966003418, + "learning_rate": 7.309887266075601e-06, + "loss": 2.0134, + "step": 17841 + }, + { + "epoch": 0.59, + "grad_norm": 0.7367768883705139, + "learning_rate": 7.308863569728816e-06, + "loss": 2.0721, + "step": 17842 + }, + { + "epoch": 0.59, + "grad_norm": 0.7719810009002686, + "learning_rate": 7.307839903784913e-06, + "loss": 2.0269, + "step": 17843 + }, + { + "epoch": 0.59, + "grad_norm": 0.7666747570037842, + "learning_rate": 7.306816268255457e-06, + "loss": 2.0814, + "step": 17844 + }, + { + "epoch": 0.59, + "grad_norm": 0.7137101292610168, + "learning_rate": 7.305792663152009e-06, + "loss": 2.0167, + "step": 17845 + }, + { + "epoch": 0.59, + "grad_norm": 0.7196882367134094, + "learning_rate": 7.304769088486139e-06, + "loss": 2.0055, + "step": 17846 + }, + { + "epoch": 0.59, + "grad_norm": 0.7837442755699158, + "learning_rate": 7.303745544269408e-06, + "loss": 2.0205, + "step": 17847 + }, + { + "epoch": 0.59, + "grad_norm": 0.7269256114959717, + "learning_rate": 7.3027220305133825e-06, + "loss": 2.0144, + "step": 17848 + }, + { + "epoch": 0.59, + "grad_norm": 0.7971704602241516, + "learning_rate": 7.301698547229621e-06, + "loss": 2.0444, + "step": 17849 + }, + { + "epoch": 0.59, + "grad_norm": 0.7280442714691162, + "learning_rate": 7.300675094429687e-06, + "loss": 2.1118, + "step": 17850 + }, + { + "epoch": 0.59, + "grad_norm": 0.7282776832580566, + "learning_rate": 7.299651672125141e-06, + "loss": 2.0868, + "step": 17851 + }, + { + "epoch": 0.59, + "grad_norm": 0.724920928478241, + "learning_rate": 7.298628280327555e-06, + "loss": 2.0116, + "step": 17852 + }, + { + "epoch": 0.59, + "grad_norm": 0.7390250563621521, + "learning_rate": 7.297604919048477e-06, + "loss": 2.0596, + "step": 17853 + }, + { + "epoch": 0.59, + "grad_norm": 0.729977011680603, + "learning_rate": 7.296581588299478e-06, + "loss": 2.0348, + "step": 17854 + }, + { + "epoch": 0.59, + "grad_norm": 0.7874935269355774, + "learning_rate": 7.295558288092115e-06, + "loss": 2.1174, + "step": 17855 + }, + { + "epoch": 0.59, + "grad_norm": 0.7345505952835083, + "learning_rate": 7.294535018437952e-06, + "loss": 2.0861, + "step": 17856 + }, + { + "epoch": 0.59, + "grad_norm": 0.7641611099243164, + "learning_rate": 7.293511779348543e-06, + "loss": 2.1065, + "step": 17857 + }, + { + "epoch": 0.59, + "grad_norm": 0.7234377264976501, + "learning_rate": 7.292488570835454e-06, + "loss": 2.0995, + "step": 17858 + }, + { + "epoch": 0.59, + "grad_norm": 0.7408335208892822, + "learning_rate": 7.29146539291024e-06, + "loss": 2.0377, + "step": 17859 + }, + { + "epoch": 0.59, + "grad_norm": 0.7750611901283264, + "learning_rate": 7.290442245584463e-06, + "loss": 2.0812, + "step": 17860 + }, + { + "epoch": 0.59, + "grad_norm": 0.7305443286895752, + "learning_rate": 7.289419128869686e-06, + "loss": 2.1033, + "step": 17861 + }, + { + "epoch": 0.59, + "grad_norm": 0.7511100769042969, + "learning_rate": 7.2883960427774596e-06, + "loss": 2.0365, + "step": 17862 + }, + { + "epoch": 0.59, + "grad_norm": 0.7300098538398743, + "learning_rate": 7.287372987319345e-06, + "loss": 2.0811, + "step": 17863 + }, + { + "epoch": 0.59, + "grad_norm": 0.7811533212661743, + "learning_rate": 7.2863499625069e-06, + "loss": 2.0463, + "step": 17864 + }, + { + "epoch": 0.59, + "grad_norm": 0.7321670055389404, + "learning_rate": 7.285326968351689e-06, + "loss": 2.0697, + "step": 17865 + }, + { + "epoch": 0.59, + "grad_norm": 0.757247805595398, + "learning_rate": 7.284304004865257e-06, + "loss": 2.0987, + "step": 17866 + }, + { + "epoch": 0.59, + "grad_norm": 0.7395954728126526, + "learning_rate": 7.283281072059166e-06, + "loss": 2.0657, + "step": 17867 + }, + { + "epoch": 0.59, + "grad_norm": 0.7651152014732361, + "learning_rate": 7.282258169944975e-06, + "loss": 2.0651, + "step": 17868 + }, + { + "epoch": 0.59, + "grad_norm": 0.778978168964386, + "learning_rate": 7.2812352985342395e-06, + "loss": 2.0186, + "step": 17869 + }, + { + "epoch": 0.59, + "grad_norm": 0.7850691080093384, + "learning_rate": 7.280212457838516e-06, + "loss": 2.0577, + "step": 17870 + }, + { + "epoch": 0.59, + "grad_norm": 0.7409400343894958, + "learning_rate": 7.279189647869355e-06, + "loss": 2.1197, + "step": 17871 + }, + { + "epoch": 0.59, + "grad_norm": 0.7363841533660889, + "learning_rate": 7.278166868638314e-06, + "loss": 2.0981, + "step": 17872 + }, + { + "epoch": 0.59, + "grad_norm": 0.7663241028785706, + "learning_rate": 7.277144120156949e-06, + "loss": 2.0634, + "step": 17873 + }, + { + "epoch": 0.59, + "grad_norm": 0.7524129748344421, + "learning_rate": 7.27612140243682e-06, + "loss": 2.0284, + "step": 17874 + }, + { + "epoch": 0.59, + "grad_norm": 0.7456262111663818, + "learning_rate": 7.275098715489468e-06, + "loss": 2.0129, + "step": 17875 + }, + { + "epoch": 0.59, + "grad_norm": 0.7135251760482788, + "learning_rate": 7.274076059326456e-06, + "loss": 2.1406, + "step": 17876 + }, + { + "epoch": 0.59, + "grad_norm": 0.7329150438308716, + "learning_rate": 7.273053433959334e-06, + "loss": 2.1283, + "step": 17877 + }, + { + "epoch": 0.59, + "grad_norm": 0.7537440061569214, + "learning_rate": 7.272030839399655e-06, + "loss": 2.0728, + "step": 17878 + }, + { + "epoch": 0.59, + "grad_norm": 0.7616793513298035, + "learning_rate": 7.27100827565898e-06, + "loss": 2.0959, + "step": 17879 + }, + { + "epoch": 0.59, + "grad_norm": 0.7395745515823364, + "learning_rate": 7.269985742748847e-06, + "loss": 2.0758, + "step": 17880 + }, + { + "epoch": 0.59, + "grad_norm": 0.7580609917640686, + "learning_rate": 7.268963240680816e-06, + "loss": 1.9875, + "step": 17881 + }, + { + "epoch": 0.59, + "grad_norm": 0.7498340606689453, + "learning_rate": 7.267940769466439e-06, + "loss": 2.1184, + "step": 17882 + }, + { + "epoch": 0.59, + "grad_norm": 0.7456172704696655, + "learning_rate": 7.266918329117268e-06, + "loss": 2.0155, + "step": 17883 + }, + { + "epoch": 0.6, + "grad_norm": 0.721441924571991, + "learning_rate": 7.26589591964485e-06, + "loss": 2.0668, + "step": 17884 + }, + { + "epoch": 0.6, + "grad_norm": 0.7541223764419556, + "learning_rate": 7.2648735410607375e-06, + "loss": 2.1293, + "step": 17885 + }, + { + "epoch": 0.6, + "grad_norm": 0.733963668346405, + "learning_rate": 7.2638511933764785e-06, + "loss": 2.0249, + "step": 17886 + }, + { + "epoch": 0.6, + "grad_norm": 0.7305247783660889, + "learning_rate": 7.262828876603632e-06, + "loss": 1.9565, + "step": 17887 + }, + { + "epoch": 0.6, + "grad_norm": 0.7469213008880615, + "learning_rate": 7.261806590753735e-06, + "loss": 2.0329, + "step": 17888 + }, + { + "epoch": 0.6, + "grad_norm": 0.7154189944267273, + "learning_rate": 7.260784335838342e-06, + "loss": 2.074, + "step": 17889 + }, + { + "epoch": 0.6, + "grad_norm": 0.7596710324287415, + "learning_rate": 7.259762111869004e-06, + "loss": 2.0388, + "step": 17890 + }, + { + "epoch": 0.6, + "grad_norm": 0.7186411619186401, + "learning_rate": 7.258739918857268e-06, + "loss": 2.0028, + "step": 17891 + }, + { + "epoch": 0.6, + "grad_norm": 0.7785205245018005, + "learning_rate": 7.257717756814684e-06, + "loss": 2.1362, + "step": 17892 + }, + { + "epoch": 0.6, + "grad_norm": 0.744401752948761, + "learning_rate": 7.2566956257527955e-06, + "loss": 2.0834, + "step": 17893 + }, + { + "epoch": 0.6, + "grad_norm": 0.769404947757721, + "learning_rate": 7.255673525683149e-06, + "loss": 2.0605, + "step": 17894 + }, + { + "epoch": 0.6, + "grad_norm": 0.7238067388534546, + "learning_rate": 7.2546514566172976e-06, + "loss": 2.0423, + "step": 17895 + }, + { + "epoch": 0.6, + "grad_norm": 0.7312318682670593, + "learning_rate": 7.253629418566788e-06, + "loss": 2.0546, + "step": 17896 + }, + { + "epoch": 0.6, + "grad_norm": 0.7429822683334351, + "learning_rate": 7.2526074115431596e-06, + "loss": 2.1178, + "step": 17897 + }, + { + "epoch": 0.6, + "grad_norm": 0.760276198387146, + "learning_rate": 7.251585435557964e-06, + "loss": 2.0882, + "step": 17898 + }, + { + "epoch": 0.6, + "grad_norm": 0.7697278261184692, + "learning_rate": 7.250563490622744e-06, + "loss": 2.0796, + "step": 17899 + }, + { + "epoch": 0.6, + "grad_norm": 0.7592955827713013, + "learning_rate": 7.2495415767490455e-06, + "loss": 2.0276, + "step": 17900 + }, + { + "epoch": 0.6, + "grad_norm": 0.7309766411781311, + "learning_rate": 7.2485196939484206e-06, + "loss": 2.0738, + "step": 17901 + }, + { + "epoch": 0.6, + "grad_norm": 0.753669261932373, + "learning_rate": 7.2474978422324005e-06, + "loss": 2.0784, + "step": 17902 + }, + { + "epoch": 0.6, + "grad_norm": 0.7397944927215576, + "learning_rate": 7.24647602161254e-06, + "loss": 2.0599, + "step": 17903 + }, + { + "epoch": 0.6, + "grad_norm": 0.7328388094902039, + "learning_rate": 7.245454232100379e-06, + "loss": 2.0207, + "step": 17904 + }, + { + "epoch": 0.6, + "grad_norm": 0.7648415565490723, + "learning_rate": 7.244432473707463e-06, + "loss": 2.087, + "step": 17905 + }, + { + "epoch": 0.6, + "grad_norm": 0.72069251537323, + "learning_rate": 7.243410746445333e-06, + "loss": 2.0721, + "step": 17906 + }, + { + "epoch": 0.6, + "grad_norm": 0.759331464767456, + "learning_rate": 7.242389050325534e-06, + "loss": 2.0027, + "step": 17907 + }, + { + "epoch": 0.6, + "grad_norm": 0.7365291714668274, + "learning_rate": 7.241367385359603e-06, + "loss": 2.0525, + "step": 17908 + }, + { + "epoch": 0.6, + "grad_norm": 0.7261979579925537, + "learning_rate": 7.240345751559094e-06, + "loss": 2.002, + "step": 17909 + }, + { + "epoch": 0.6, + "grad_norm": 0.730476438999176, + "learning_rate": 7.239324148935534e-06, + "loss": 2.0873, + "step": 17910 + }, + { + "epoch": 0.6, + "grad_norm": 0.7355251312255859, + "learning_rate": 7.2383025775004755e-06, + "loss": 2.0648, + "step": 17911 + }, + { + "epoch": 0.6, + "grad_norm": 0.7314817905426025, + "learning_rate": 7.237281037265456e-06, + "loss": 2.0702, + "step": 17912 + }, + { + "epoch": 0.6, + "grad_norm": 0.758685827255249, + "learning_rate": 7.2362595282420145e-06, + "loss": 2.0887, + "step": 17913 + }, + { + "epoch": 0.6, + "grad_norm": 0.741740345954895, + "learning_rate": 7.235238050441697e-06, + "loss": 2.0969, + "step": 17914 + }, + { + "epoch": 0.6, + "grad_norm": 0.7191538214683533, + "learning_rate": 7.2342166038760365e-06, + "loss": 2.0898, + "step": 17915 + }, + { + "epoch": 0.6, + "grad_norm": 0.7320390343666077, + "learning_rate": 7.233195188556575e-06, + "loss": 2.0545, + "step": 17916 + }, + { + "epoch": 0.6, + "grad_norm": 0.7380185723304749, + "learning_rate": 7.232173804494853e-06, + "loss": 2.0369, + "step": 17917 + }, + { + "epoch": 0.6, + "grad_norm": 0.7580181956291199, + "learning_rate": 7.231152451702412e-06, + "loss": 2.0429, + "step": 17918 + }, + { + "epoch": 0.6, + "grad_norm": 0.7267543077468872, + "learning_rate": 7.230131130190786e-06, + "loss": 2.0553, + "step": 17919 + }, + { + "epoch": 0.6, + "grad_norm": 0.7393495440483093, + "learning_rate": 7.229109839971515e-06, + "loss": 2.099, + "step": 17920 + }, + { + "epoch": 0.6, + "grad_norm": 0.7081518173217773, + "learning_rate": 7.228088581056138e-06, + "loss": 2.0878, + "step": 17921 + }, + { + "epoch": 0.6, + "grad_norm": 0.7415730953216553, + "learning_rate": 7.227067353456189e-06, + "loss": 2.0445, + "step": 17922 + }, + { + "epoch": 0.6, + "grad_norm": 0.753551721572876, + "learning_rate": 7.226046157183215e-06, + "loss": 2.1551, + "step": 17923 + }, + { + "epoch": 0.6, + "grad_norm": 0.7541089653968811, + "learning_rate": 7.225024992248738e-06, + "loss": 2.1355, + "step": 17924 + }, + { + "epoch": 0.6, + "grad_norm": 0.7250111103057861, + "learning_rate": 7.224003858664306e-06, + "loss": 2.0474, + "step": 17925 + }, + { + "epoch": 0.6, + "grad_norm": 0.742510974407196, + "learning_rate": 7.22298275644145e-06, + "loss": 2.1101, + "step": 17926 + }, + { + "epoch": 0.6, + "grad_norm": 0.7271057963371277, + "learning_rate": 7.22196168559171e-06, + "loss": 2.0062, + "step": 17927 + }, + { + "epoch": 0.6, + "grad_norm": 0.7311848402023315, + "learning_rate": 7.220940646126617e-06, + "loss": 2.0787, + "step": 17928 + }, + { + "epoch": 0.6, + "grad_norm": 0.7355474829673767, + "learning_rate": 7.219919638057709e-06, + "loss": 2.103, + "step": 17929 + }, + { + "epoch": 0.6, + "grad_norm": 0.7226027846336365, + "learning_rate": 7.218898661396516e-06, + "loss": 2.0114, + "step": 17930 + }, + { + "epoch": 0.6, + "grad_norm": 0.7263904213905334, + "learning_rate": 7.217877716154578e-06, + "loss": 2.0872, + "step": 17931 + }, + { + "epoch": 0.6, + "grad_norm": 0.7259615063667297, + "learning_rate": 7.2168568023434305e-06, + "loss": 2.0047, + "step": 17932 + }, + { + "epoch": 0.6, + "grad_norm": 0.7554442882537842, + "learning_rate": 7.215835919974601e-06, + "loss": 2.0629, + "step": 17933 + }, + { + "epoch": 0.6, + "grad_norm": 0.7583115696907043, + "learning_rate": 7.214815069059624e-06, + "loss": 2.1538, + "step": 17934 + }, + { + "epoch": 0.6, + "grad_norm": 0.7534477710723877, + "learning_rate": 7.213794249610036e-06, + "loss": 2.0956, + "step": 17935 + }, + { + "epoch": 0.6, + "grad_norm": 0.7588562965393066, + "learning_rate": 7.212773461637369e-06, + "loss": 2.066, + "step": 17936 + }, + { + "epoch": 0.6, + "grad_norm": 0.7516148686408997, + "learning_rate": 7.211752705153152e-06, + "loss": 2.0868, + "step": 17937 + }, + { + "epoch": 0.6, + "grad_norm": 0.7448422908782959, + "learning_rate": 7.210731980168917e-06, + "loss": 2.031, + "step": 17938 + }, + { + "epoch": 0.6, + "grad_norm": 0.7475968599319458, + "learning_rate": 7.2097112866961995e-06, + "loss": 2.021, + "step": 17939 + }, + { + "epoch": 0.6, + "grad_norm": 0.7238162755966187, + "learning_rate": 7.208690624746531e-06, + "loss": 2.0914, + "step": 17940 + }, + { + "epoch": 0.6, + "grad_norm": 0.7308852076530457, + "learning_rate": 7.207669994331436e-06, + "loss": 2.0549, + "step": 17941 + }, + { + "epoch": 0.6, + "grad_norm": 0.7234670519828796, + "learning_rate": 7.2066493954624515e-06, + "loss": 2.0747, + "step": 17942 + }, + { + "epoch": 0.6, + "grad_norm": 0.7665688395500183, + "learning_rate": 7.205628828151105e-06, + "loss": 2.0923, + "step": 17943 + }, + { + "epoch": 0.6, + "grad_norm": 0.7416234612464905, + "learning_rate": 7.2046082924089225e-06, + "loss": 2.104, + "step": 17944 + }, + { + "epoch": 0.6, + "grad_norm": 0.7242316603660583, + "learning_rate": 7.203587788247444e-06, + "loss": 2.0429, + "step": 17945 + }, + { + "epoch": 0.6, + "grad_norm": 0.7783018946647644, + "learning_rate": 7.202567315678186e-06, + "loss": 2.0479, + "step": 17946 + }, + { + "epoch": 0.6, + "grad_norm": 0.7264226078987122, + "learning_rate": 7.201546874712685e-06, + "loss": 2.0087, + "step": 17947 + }, + { + "epoch": 0.6, + "grad_norm": 0.7330355644226074, + "learning_rate": 7.200526465362467e-06, + "loss": 2.0535, + "step": 17948 + }, + { + "epoch": 0.6, + "grad_norm": 0.7609871625900269, + "learning_rate": 7.1995060876390635e-06, + "loss": 2.1133, + "step": 17949 + }, + { + "epoch": 0.6, + "grad_norm": 0.7337692975997925, + "learning_rate": 7.198485741553996e-06, + "loss": 2.034, + "step": 17950 + }, + { + "epoch": 0.6, + "grad_norm": 0.7522493004798889, + "learning_rate": 7.197465427118795e-06, + "loss": 2.0977, + "step": 17951 + }, + { + "epoch": 0.6, + "grad_norm": 0.7586632966995239, + "learning_rate": 7.1964451443449835e-06, + "loss": 2.0329, + "step": 17952 + }, + { + "epoch": 0.6, + "grad_norm": 0.7458822727203369, + "learning_rate": 7.195424893244096e-06, + "loss": 2.0068, + "step": 17953 + }, + { + "epoch": 0.6, + "grad_norm": 0.7102757692337036, + "learning_rate": 7.194404673827655e-06, + "loss": 2.0581, + "step": 17954 + }, + { + "epoch": 0.6, + "grad_norm": 0.770132839679718, + "learning_rate": 7.193384486107185e-06, + "loss": 2.0479, + "step": 17955 + }, + { + "epoch": 0.6, + "grad_norm": 0.7801522016525269, + "learning_rate": 7.192364330094211e-06, + "loss": 2.0888, + "step": 17956 + }, + { + "epoch": 0.6, + "grad_norm": 0.717063307762146, + "learning_rate": 7.191344205800258e-06, + "loss": 2.0847, + "step": 17957 + }, + { + "epoch": 0.6, + "grad_norm": 0.7659975290298462, + "learning_rate": 7.1903241132368585e-06, + "loss": 2.0721, + "step": 17958 + }, + { + "epoch": 0.6, + "grad_norm": 0.7504226565361023, + "learning_rate": 7.189304052415527e-06, + "loss": 2.1044, + "step": 17959 + }, + { + "epoch": 0.6, + "grad_norm": 0.7535562515258789, + "learning_rate": 7.188284023347787e-06, + "loss": 2.033, + "step": 17960 + }, + { + "epoch": 0.6, + "grad_norm": 0.8167473673820496, + "learning_rate": 7.18726402604517e-06, + "loss": 2.0448, + "step": 17961 + }, + { + "epoch": 0.6, + "grad_norm": 0.7546722292900085, + "learning_rate": 7.186244060519194e-06, + "loss": 2.015, + "step": 17962 + }, + { + "epoch": 0.6, + "grad_norm": 0.7364355325698853, + "learning_rate": 7.185224126781387e-06, + "loss": 2.133, + "step": 17963 + }, + { + "epoch": 0.6, + "grad_norm": 0.7394760251045227, + "learning_rate": 7.184204224843266e-06, + "loss": 2.1231, + "step": 17964 + }, + { + "epoch": 0.6, + "grad_norm": 0.7557095885276794, + "learning_rate": 7.183184354716353e-06, + "loss": 2.0895, + "step": 17965 + }, + { + "epoch": 0.6, + "grad_norm": 0.7203226685523987, + "learning_rate": 7.182164516412176e-06, + "loss": 1.9864, + "step": 17966 + }, + { + "epoch": 0.6, + "grad_norm": 0.7318986058235168, + "learning_rate": 7.181144709942255e-06, + "loss": 2.0836, + "step": 17967 + }, + { + "epoch": 0.6, + "grad_norm": 0.7299092411994934, + "learning_rate": 7.180124935318104e-06, + "loss": 2.0121, + "step": 17968 + }, + { + "epoch": 0.6, + "grad_norm": 0.71380215883255, + "learning_rate": 7.17910519255125e-06, + "loss": 2.0794, + "step": 17969 + }, + { + "epoch": 0.6, + "grad_norm": 0.7712464332580566, + "learning_rate": 7.178085481653212e-06, + "loss": 2.0264, + "step": 17970 + }, + { + "epoch": 0.6, + "grad_norm": 0.7579473257064819, + "learning_rate": 7.177065802635514e-06, + "loss": 2.1623, + "step": 17971 + }, + { + "epoch": 0.6, + "grad_norm": 0.7407003045082092, + "learning_rate": 7.176046155509669e-06, + "loss": 2.1105, + "step": 17972 + }, + { + "epoch": 0.6, + "grad_norm": 0.759030282497406, + "learning_rate": 7.175026540287198e-06, + "loss": 2.0729, + "step": 17973 + }, + { + "epoch": 0.6, + "grad_norm": 0.7267532348632812, + "learning_rate": 7.174006956979624e-06, + "loss": 2.0225, + "step": 17974 + }, + { + "epoch": 0.6, + "grad_norm": 0.7460594177246094, + "learning_rate": 7.1729874055984615e-06, + "loss": 2.0585, + "step": 17975 + }, + { + "epoch": 0.6, + "grad_norm": 0.7238723635673523, + "learning_rate": 7.171967886155235e-06, + "loss": 2.0518, + "step": 17976 + }, + { + "epoch": 0.6, + "grad_norm": 0.7584162950515747, + "learning_rate": 7.170948398661455e-06, + "loss": 2.1248, + "step": 17977 + }, + { + "epoch": 0.6, + "grad_norm": 0.7261293530464172, + "learning_rate": 7.169928943128641e-06, + "loss": 2.072, + "step": 17978 + }, + { + "epoch": 0.6, + "grad_norm": 0.733440101146698, + "learning_rate": 7.16890951956831e-06, + "loss": 1.9702, + "step": 17979 + }, + { + "epoch": 0.6, + "grad_norm": 0.7274598479270935, + "learning_rate": 7.167890127991986e-06, + "loss": 2.0399, + "step": 17980 + }, + { + "epoch": 0.6, + "grad_norm": 0.7679572105407715, + "learning_rate": 7.1668707684111724e-06, + "loss": 2.0206, + "step": 17981 + }, + { + "epoch": 0.6, + "grad_norm": 0.7306627035140991, + "learning_rate": 7.165851440837396e-06, + "loss": 2.0372, + "step": 17982 + }, + { + "epoch": 0.6, + "grad_norm": 0.7349066734313965, + "learning_rate": 7.1648321452821675e-06, + "loss": 2.0691, + "step": 17983 + }, + { + "epoch": 0.6, + "grad_norm": 0.7820791006088257, + "learning_rate": 7.1638128817570064e-06, + "loss": 2.0598, + "step": 17984 + }, + { + "epoch": 0.6, + "grad_norm": 0.7475489377975464, + "learning_rate": 7.1627936502734255e-06, + "loss": 2.1282, + "step": 17985 + }, + { + "epoch": 0.6, + "grad_norm": 0.737223744392395, + "learning_rate": 7.161774450842938e-06, + "loss": 2.0838, + "step": 17986 + }, + { + "epoch": 0.6, + "grad_norm": 0.7610255479812622, + "learning_rate": 7.160755283477056e-06, + "loss": 2.0312, + "step": 17987 + }, + { + "epoch": 0.6, + "grad_norm": 0.7358512878417969, + "learning_rate": 7.1597361481873e-06, + "loss": 2.0384, + "step": 17988 + }, + { + "epoch": 0.6, + "grad_norm": 0.7503926157951355, + "learning_rate": 7.158717044985182e-06, + "loss": 2.0614, + "step": 17989 + }, + { + "epoch": 0.6, + "grad_norm": 0.7606817483901978, + "learning_rate": 7.157697973882211e-06, + "loss": 2.0713, + "step": 17990 + }, + { + "epoch": 0.6, + "grad_norm": 0.7520619034767151, + "learning_rate": 7.1566789348899035e-06, + "loss": 2.0496, + "step": 17991 + }, + { + "epoch": 0.6, + "grad_norm": 0.7657539248466492, + "learning_rate": 7.155659928019772e-06, + "loss": 2.0818, + "step": 17992 + }, + { + "epoch": 0.6, + "grad_norm": 0.7440844178199768, + "learning_rate": 7.154640953283323e-06, + "loss": 2.012, + "step": 17993 + }, + { + "epoch": 0.6, + "grad_norm": 0.7560644745826721, + "learning_rate": 7.1536220106920806e-06, + "loss": 2.0935, + "step": 17994 + }, + { + "epoch": 0.6, + "grad_norm": 0.7839361429214478, + "learning_rate": 7.152603100257543e-06, + "loss": 2.1166, + "step": 17995 + }, + { + "epoch": 0.6, + "grad_norm": 0.7554149627685547, + "learning_rate": 7.151584221991228e-06, + "loss": 2.1226, + "step": 17996 + }, + { + "epoch": 0.6, + "grad_norm": 0.7348247766494751, + "learning_rate": 7.150565375904646e-06, + "loss": 2.1048, + "step": 17997 + }, + { + "epoch": 0.6, + "grad_norm": 0.7266359925270081, + "learning_rate": 7.149546562009307e-06, + "loss": 2.0478, + "step": 17998 + }, + { + "epoch": 0.6, + "grad_norm": 0.7353495955467224, + "learning_rate": 7.14852778031672e-06, + "loss": 2.0227, + "step": 17999 + }, + { + "epoch": 0.6, + "grad_norm": 0.7610156536102295, + "learning_rate": 7.147509030838393e-06, + "loss": 2.0514, + "step": 18000 + }, + { + "epoch": 0.6, + "grad_norm": 0.7567755579948425, + "learning_rate": 7.146490313585837e-06, + "loss": 1.9938, + "step": 18001 + }, + { + "epoch": 0.6, + "grad_norm": 0.7315750122070312, + "learning_rate": 7.145471628570567e-06, + "loss": 2.0548, + "step": 18002 + }, + { + "epoch": 0.6, + "grad_norm": 0.7647253274917603, + "learning_rate": 7.144452975804079e-06, + "loss": 2.1238, + "step": 18003 + }, + { + "epoch": 0.6, + "grad_norm": 0.7441285252571106, + "learning_rate": 7.14343435529789e-06, + "loss": 2.0525, + "step": 18004 + }, + { + "epoch": 0.6, + "grad_norm": 0.7529734373092651, + "learning_rate": 7.142415767063506e-06, + "loss": 2.1253, + "step": 18005 + }, + { + "epoch": 0.6, + "grad_norm": 0.7385460138320923, + "learning_rate": 7.141397211112433e-06, + "loss": 2.0226, + "step": 18006 + }, + { + "epoch": 0.6, + "grad_norm": 0.7354551553726196, + "learning_rate": 7.140378687456182e-06, + "loss": 2.0724, + "step": 18007 + }, + { + "epoch": 0.6, + "grad_norm": 0.7536328434944153, + "learning_rate": 7.139360196106254e-06, + "loss": 2.0308, + "step": 18008 + }, + { + "epoch": 0.6, + "grad_norm": 0.7330408692359924, + "learning_rate": 7.138341737074157e-06, + "loss": 2.0781, + "step": 18009 + }, + { + "epoch": 0.6, + "grad_norm": 0.7354621291160583, + "learning_rate": 7.137323310371398e-06, + "loss": 2.0599, + "step": 18010 + }, + { + "epoch": 0.6, + "grad_norm": 0.7453399896621704, + "learning_rate": 7.136304916009487e-06, + "loss": 2.1046, + "step": 18011 + }, + { + "epoch": 0.6, + "grad_norm": 0.7583020329475403, + "learning_rate": 7.135286553999921e-06, + "loss": 2.0924, + "step": 18012 + }, + { + "epoch": 0.6, + "grad_norm": 0.7479957342147827, + "learning_rate": 7.134268224354208e-06, + "loss": 2.0334, + "step": 18013 + }, + { + "epoch": 0.6, + "grad_norm": 0.7251604199409485, + "learning_rate": 7.1332499270838555e-06, + "loss": 2.0588, + "step": 18014 + }, + { + "epoch": 0.6, + "grad_norm": 0.7254505753517151, + "learning_rate": 7.132231662200361e-06, + "loss": 2.0312, + "step": 18015 + }, + { + "epoch": 0.6, + "grad_norm": 0.7298362851142883, + "learning_rate": 7.1312134297152405e-06, + "loss": 2.0419, + "step": 18016 + }, + { + "epoch": 0.6, + "grad_norm": 0.7568021416664124, + "learning_rate": 7.130195229639982e-06, + "loss": 2.0727, + "step": 18017 + }, + { + "epoch": 0.6, + "grad_norm": 0.7288749814033508, + "learning_rate": 7.129177061986099e-06, + "loss": 2.0768, + "step": 18018 + }, + { + "epoch": 0.6, + "grad_norm": 0.7445328235626221, + "learning_rate": 7.128158926765089e-06, + "loss": 2.1317, + "step": 18019 + }, + { + "epoch": 0.6, + "grad_norm": 0.7336205840110779, + "learning_rate": 7.127140823988461e-06, + "loss": 2.0374, + "step": 18020 + }, + { + "epoch": 0.6, + "grad_norm": 0.7346328496932983, + "learning_rate": 7.126122753667709e-06, + "loss": 2.0645, + "step": 18021 + }, + { + "epoch": 0.6, + "grad_norm": 0.7292501926422119, + "learning_rate": 7.1251047158143375e-06, + "loss": 2.0145, + "step": 18022 + }, + { + "epoch": 0.6, + "grad_norm": 0.7757049798965454, + "learning_rate": 7.124086710439846e-06, + "loss": 1.9689, + "step": 18023 + }, + { + "epoch": 0.6, + "grad_norm": 0.7141915559768677, + "learning_rate": 7.123068737555741e-06, + "loss": 2.0621, + "step": 18024 + }, + { + "epoch": 0.6, + "grad_norm": 0.7428039908409119, + "learning_rate": 7.12205079717352e-06, + "loss": 2.05, + "step": 18025 + }, + { + "epoch": 0.6, + "grad_norm": 0.7673541903495789, + "learning_rate": 7.1210328893046806e-06, + "loss": 2.085, + "step": 18026 + }, + { + "epoch": 0.6, + "grad_norm": 0.7300795316696167, + "learning_rate": 7.120015013960724e-06, + "loss": 2.0322, + "step": 18027 + }, + { + "epoch": 0.6, + "grad_norm": 0.7526683807373047, + "learning_rate": 7.11899717115315e-06, + "loss": 2.0871, + "step": 18028 + }, + { + "epoch": 0.6, + "grad_norm": 0.7445402145385742, + "learning_rate": 7.117979360893461e-06, + "loss": 2.0928, + "step": 18029 + }, + { + "epoch": 0.6, + "grad_norm": 0.7442363500595093, + "learning_rate": 7.11696158319315e-06, + "loss": 2.0099, + "step": 18030 + }, + { + "epoch": 0.6, + "grad_norm": 0.7453740239143372, + "learning_rate": 7.115943838063714e-06, + "loss": 2.0634, + "step": 18031 + }, + { + "epoch": 0.6, + "grad_norm": 0.7569308280944824, + "learning_rate": 7.114926125516657e-06, + "loss": 2.0504, + "step": 18032 + }, + { + "epoch": 0.6, + "grad_norm": 0.7178748250007629, + "learning_rate": 7.113908445563476e-06, + "loss": 2.0273, + "step": 18033 + }, + { + "epoch": 0.6, + "grad_norm": 0.756657063961029, + "learning_rate": 7.112890798215662e-06, + "loss": 2.0211, + "step": 18034 + }, + { + "epoch": 0.6, + "grad_norm": 0.7130996584892273, + "learning_rate": 7.111873183484719e-06, + "loss": 2.0833, + "step": 18035 + }, + { + "epoch": 0.6, + "grad_norm": 0.7441720366477966, + "learning_rate": 7.110855601382138e-06, + "loss": 2.1397, + "step": 18036 + }, + { + "epoch": 0.6, + "grad_norm": 0.7583101391792297, + "learning_rate": 7.1098380519194155e-06, + "loss": 1.9749, + "step": 18037 + }, + { + "epoch": 0.6, + "grad_norm": 0.7455106377601624, + "learning_rate": 7.108820535108055e-06, + "loss": 2.0437, + "step": 18038 + }, + { + "epoch": 0.6, + "grad_norm": 0.7399457097053528, + "learning_rate": 7.10780305095954e-06, + "loss": 2.0697, + "step": 18039 + }, + { + "epoch": 0.6, + "grad_norm": 0.718306839466095, + "learning_rate": 7.106785599485373e-06, + "loss": 2.0669, + "step": 18040 + }, + { + "epoch": 0.6, + "grad_norm": 0.7415761947631836, + "learning_rate": 7.1057681806970454e-06, + "loss": 2.0279, + "step": 18041 + }, + { + "epoch": 0.6, + "grad_norm": 0.7630634903907776, + "learning_rate": 7.1047507946060565e-06, + "loss": 2.0171, + "step": 18042 + }, + { + "epoch": 0.6, + "grad_norm": 0.7491014003753662, + "learning_rate": 7.1037334412238945e-06, + "loss": 2.0968, + "step": 18043 + }, + { + "epoch": 0.6, + "grad_norm": 0.7615128755569458, + "learning_rate": 7.1027161205620544e-06, + "loss": 2.0475, + "step": 18044 + }, + { + "epoch": 0.6, + "grad_norm": 0.7651875615119934, + "learning_rate": 7.1016988326320265e-06, + "loss": 2.1358, + "step": 18045 + }, + { + "epoch": 0.6, + "grad_norm": 0.7522626519203186, + "learning_rate": 7.10068157744531e-06, + "loss": 2.058, + "step": 18046 + }, + { + "epoch": 0.6, + "grad_norm": 0.8102654218673706, + "learning_rate": 7.099664355013394e-06, + "loss": 1.9938, + "step": 18047 + }, + { + "epoch": 0.6, + "grad_norm": 0.7514823079109192, + "learning_rate": 7.09864716534777e-06, + "loss": 2.0167, + "step": 18048 + }, + { + "epoch": 0.6, + "grad_norm": 0.7498155832290649, + "learning_rate": 7.0976300084599305e-06, + "loss": 2.0517, + "step": 18049 + }, + { + "epoch": 0.6, + "grad_norm": 0.7520943880081177, + "learning_rate": 7.0966128843613626e-06, + "loss": 1.9845, + "step": 18050 + }, + { + "epoch": 0.6, + "grad_norm": 0.7664876580238342, + "learning_rate": 7.095595793063568e-06, + "loss": 2.0978, + "step": 18051 + }, + { + "epoch": 0.6, + "grad_norm": 0.7374511361122131, + "learning_rate": 7.094578734578024e-06, + "loss": 2.0769, + "step": 18052 + }, + { + "epoch": 0.6, + "grad_norm": 0.7230625748634338, + "learning_rate": 7.093561708916226e-06, + "loss": 2.0929, + "step": 18053 + }, + { + "epoch": 0.6, + "grad_norm": 0.7672091126441956, + "learning_rate": 7.092544716089666e-06, + "loss": 2.1082, + "step": 18054 + }, + { + "epoch": 0.6, + "grad_norm": 0.7308668494224548, + "learning_rate": 7.091527756109835e-06, + "loss": 2.0395, + "step": 18055 + }, + { + "epoch": 0.6, + "grad_norm": 0.7366390824317932, + "learning_rate": 7.090510828988217e-06, + "loss": 2.0047, + "step": 18056 + }, + { + "epoch": 0.6, + "grad_norm": 0.7361094355583191, + "learning_rate": 7.089493934736303e-06, + "loss": 2.0521, + "step": 18057 + }, + { + "epoch": 0.6, + "grad_norm": 0.73790442943573, + "learning_rate": 7.088477073365576e-06, + "loss": 2.0748, + "step": 18058 + }, + { + "epoch": 0.6, + "grad_norm": 0.7527731657028198, + "learning_rate": 7.087460244887532e-06, + "loss": 2.0797, + "step": 18059 + }, + { + "epoch": 0.6, + "grad_norm": 0.7341246604919434, + "learning_rate": 7.0864434493136605e-06, + "loss": 2.0252, + "step": 18060 + }, + { + "epoch": 0.6, + "grad_norm": 0.7160708904266357, + "learning_rate": 7.085426686655437e-06, + "loss": 2.0839, + "step": 18061 + }, + { + "epoch": 0.6, + "grad_norm": 0.7648302912712097, + "learning_rate": 7.084409956924355e-06, + "loss": 2.0859, + "step": 18062 + }, + { + "epoch": 0.6, + "grad_norm": 0.7517544031143188, + "learning_rate": 7.083393260131902e-06, + "loss": 2.0298, + "step": 18063 + }, + { + "epoch": 0.6, + "grad_norm": 0.7727470397949219, + "learning_rate": 7.082376596289564e-06, + "loss": 2.099, + "step": 18064 + }, + { + "epoch": 0.6, + "grad_norm": 0.7306755185127258, + "learning_rate": 7.081359965408825e-06, + "loss": 2.0287, + "step": 18065 + }, + { + "epoch": 0.6, + "grad_norm": 0.7938405871391296, + "learning_rate": 7.080343367501166e-06, + "loss": 2.0735, + "step": 18066 + }, + { + "epoch": 0.6, + "grad_norm": 0.7469201683998108, + "learning_rate": 7.07932680257808e-06, + "loss": 2.0678, + "step": 18067 + }, + { + "epoch": 0.6, + "grad_norm": 0.7460929751396179, + "learning_rate": 7.078310270651048e-06, + "loss": 2.0706, + "step": 18068 + }, + { + "epoch": 0.6, + "grad_norm": 0.7478275299072266, + "learning_rate": 7.077293771731557e-06, + "loss": 2.0209, + "step": 18069 + }, + { + "epoch": 0.6, + "grad_norm": 0.7411018013954163, + "learning_rate": 7.0762773058310855e-06, + "loss": 1.9688, + "step": 18070 + }, + { + "epoch": 0.6, + "grad_norm": 0.7111914753913879, + "learning_rate": 7.075260872961118e-06, + "loss": 2.0359, + "step": 18071 + }, + { + "epoch": 0.6, + "grad_norm": 0.7612175345420837, + "learning_rate": 7.07424447313314e-06, + "loss": 2.1008, + "step": 18072 + }, + { + "epoch": 0.6, + "grad_norm": 0.7553202509880066, + "learning_rate": 7.0732281063586375e-06, + "loss": 2.0742, + "step": 18073 + }, + { + "epoch": 0.6, + "grad_norm": 0.7372733950614929, + "learning_rate": 7.072211772649083e-06, + "loss": 2.0548, + "step": 18074 + }, + { + "epoch": 0.6, + "grad_norm": 0.7415850162506104, + "learning_rate": 7.0711954720159656e-06, + "loss": 2.0382, + "step": 18075 + }, + { + "epoch": 0.6, + "grad_norm": 0.7618895173072815, + "learning_rate": 7.0701792044707665e-06, + "loss": 2.0445, + "step": 18076 + }, + { + "epoch": 0.6, + "grad_norm": 0.7362727522850037, + "learning_rate": 7.069162970024963e-06, + "loss": 2.0706, + "step": 18077 + }, + { + "epoch": 0.6, + "grad_norm": 0.731784462928772, + "learning_rate": 7.0681467686900426e-06, + "loss": 2.0291, + "step": 18078 + }, + { + "epoch": 0.6, + "grad_norm": 0.7363026142120361, + "learning_rate": 7.067130600477481e-06, + "loss": 2.0285, + "step": 18079 + }, + { + "epoch": 0.6, + "grad_norm": 0.734635591506958, + "learning_rate": 7.0661144653987546e-06, + "loss": 1.9913, + "step": 18080 + }, + { + "epoch": 0.6, + "grad_norm": 0.7456371188163757, + "learning_rate": 7.06509836346535e-06, + "loss": 2.0694, + "step": 18081 + }, + { + "epoch": 0.6, + "grad_norm": 0.7284636497497559, + "learning_rate": 7.064082294688748e-06, + "loss": 2.0142, + "step": 18082 + }, + { + "epoch": 0.6, + "grad_norm": 0.8021695613861084, + "learning_rate": 7.06306625908042e-06, + "loss": 2.0865, + "step": 18083 + }, + { + "epoch": 0.6, + "grad_norm": 0.7355048060417175, + "learning_rate": 7.062050256651849e-06, + "loss": 2.0367, + "step": 18084 + }, + { + "epoch": 0.6, + "grad_norm": 0.7409468293190002, + "learning_rate": 7.061034287414512e-06, + "loss": 2.0342, + "step": 18085 + }, + { + "epoch": 0.6, + "grad_norm": 0.7389208078384399, + "learning_rate": 7.0600183513798895e-06, + "loss": 2.0909, + "step": 18086 + }, + { + "epoch": 0.6, + "grad_norm": 0.7692319750785828, + "learning_rate": 7.0590024485594535e-06, + "loss": 2.0929, + "step": 18087 + }, + { + "epoch": 0.6, + "grad_norm": 0.7708553671836853, + "learning_rate": 7.057986578964683e-06, + "loss": 2.0585, + "step": 18088 + }, + { + "epoch": 0.6, + "grad_norm": 0.7319138646125793, + "learning_rate": 7.056970742607058e-06, + "loss": 2.1247, + "step": 18089 + }, + { + "epoch": 0.6, + "grad_norm": 0.7338038682937622, + "learning_rate": 7.055954939498053e-06, + "loss": 2.0439, + "step": 18090 + }, + { + "epoch": 0.6, + "grad_norm": 0.7281038165092468, + "learning_rate": 7.054939169649144e-06, + "loss": 2.002, + "step": 18091 + }, + { + "epoch": 0.6, + "grad_norm": 0.7459545135498047, + "learning_rate": 7.0539234330718065e-06, + "loss": 1.9635, + "step": 18092 + }, + { + "epoch": 0.6, + "grad_norm": 0.7280564308166504, + "learning_rate": 7.052907729777515e-06, + "loss": 1.9998, + "step": 18093 + }, + { + "epoch": 0.6, + "grad_norm": 0.7475546002388, + "learning_rate": 7.0518920597777426e-06, + "loss": 2.0288, + "step": 18094 + }, + { + "epoch": 0.6, + "grad_norm": 0.748587965965271, + "learning_rate": 7.0508764230839726e-06, + "loss": 2.102, + "step": 18095 + }, + { + "epoch": 0.6, + "grad_norm": 0.7370194792747498, + "learning_rate": 7.049860819707664e-06, + "loss": 1.9967, + "step": 18096 + }, + { + "epoch": 0.6, + "grad_norm": 0.874679446220398, + "learning_rate": 7.048845249660303e-06, + "loss": 2.0963, + "step": 18097 + }, + { + "epoch": 0.6, + "grad_norm": 0.7541933655738831, + "learning_rate": 7.047829712953358e-06, + "loss": 2.0522, + "step": 18098 + }, + { + "epoch": 0.6, + "grad_norm": 0.7603085041046143, + "learning_rate": 7.046814209598304e-06, + "loss": 2.0865, + "step": 18099 + }, + { + "epoch": 0.6, + "grad_norm": 0.7144136428833008, + "learning_rate": 7.045798739606613e-06, + "loss": 2.0835, + "step": 18100 + }, + { + "epoch": 0.6, + "grad_norm": 0.7531644701957703, + "learning_rate": 7.044783302989755e-06, + "loss": 1.9642, + "step": 18101 + }, + { + "epoch": 0.6, + "grad_norm": 0.7483905553817749, + "learning_rate": 7.0437678997592015e-06, + "loss": 2.0714, + "step": 18102 + }, + { + "epoch": 0.6, + "grad_norm": 0.725131630897522, + "learning_rate": 7.042752529926428e-06, + "loss": 2.0412, + "step": 18103 + }, + { + "epoch": 0.6, + "grad_norm": 0.7616822719573975, + "learning_rate": 7.041737193502904e-06, + "loss": 2.0768, + "step": 18104 + }, + { + "epoch": 0.6, + "grad_norm": 0.7543684840202332, + "learning_rate": 7.0407218905001e-06, + "loss": 2.0541, + "step": 18105 + }, + { + "epoch": 0.6, + "grad_norm": 0.8712818026542664, + "learning_rate": 7.039706620929483e-06, + "loss": 2.0779, + "step": 18106 + }, + { + "epoch": 0.6, + "grad_norm": 0.742544412612915, + "learning_rate": 7.0386913848025274e-06, + "loss": 2.0098, + "step": 18107 + }, + { + "epoch": 0.6, + "grad_norm": 0.726898193359375, + "learning_rate": 7.037676182130697e-06, + "loss": 2.1157, + "step": 18108 + }, + { + "epoch": 0.6, + "grad_norm": 0.7171651721000671, + "learning_rate": 7.036661012925473e-06, + "loss": 1.9814, + "step": 18109 + }, + { + "epoch": 0.6, + "grad_norm": 0.7455134391784668, + "learning_rate": 7.035645877198309e-06, + "loss": 2.054, + "step": 18110 + }, + { + "epoch": 0.6, + "grad_norm": 0.7424584031105042, + "learning_rate": 7.034630774960685e-06, + "loss": 2.045, + "step": 18111 + }, + { + "epoch": 0.6, + "grad_norm": 0.7152026295661926, + "learning_rate": 7.033615706224062e-06, + "loss": 2.0315, + "step": 18112 + }, + { + "epoch": 0.6, + "grad_norm": 0.7167842388153076, + "learning_rate": 7.0326006709999126e-06, + "loss": 2.0923, + "step": 18113 + }, + { + "epoch": 0.6, + "grad_norm": 0.750282347202301, + "learning_rate": 7.031585669299701e-06, + "loss": 2.0203, + "step": 18114 + }, + { + "epoch": 0.6, + "grad_norm": 0.739466667175293, + "learning_rate": 7.030570701134893e-06, + "loss": 2.0882, + "step": 18115 + }, + { + "epoch": 0.6, + "grad_norm": 0.7285804748535156, + "learning_rate": 7.029555766516956e-06, + "loss": 2.0558, + "step": 18116 + }, + { + "epoch": 0.6, + "grad_norm": 0.7640320658683777, + "learning_rate": 7.028540865457364e-06, + "loss": 2.1033, + "step": 18117 + }, + { + "epoch": 0.6, + "grad_norm": 0.7902219295501709, + "learning_rate": 7.027525997967569e-06, + "loss": 2.0939, + "step": 18118 + }, + { + "epoch": 0.6, + "grad_norm": 0.7541019320487976, + "learning_rate": 7.026511164059046e-06, + "loss": 2.0715, + "step": 18119 + }, + { + "epoch": 0.6, + "grad_norm": 0.734528124332428, + "learning_rate": 7.025496363743256e-06, + "loss": 2.1014, + "step": 18120 + }, + { + "epoch": 0.6, + "grad_norm": 0.7371823787689209, + "learning_rate": 7.024481597031665e-06, + "loss": 2.0798, + "step": 18121 + }, + { + "epoch": 0.6, + "grad_norm": 0.7510143518447876, + "learning_rate": 7.02346686393574e-06, + "loss": 2.034, + "step": 18122 + }, + { + "epoch": 0.6, + "grad_norm": 0.7397037148475647, + "learning_rate": 7.022452164466939e-06, + "loss": 2.0524, + "step": 18123 + }, + { + "epoch": 0.6, + "grad_norm": 0.7526743412017822, + "learning_rate": 7.021437498636727e-06, + "loss": 2.0169, + "step": 18124 + }, + { + "epoch": 0.6, + "grad_norm": 0.7480762600898743, + "learning_rate": 7.020422866456571e-06, + "loss": 2.065, + "step": 18125 + }, + { + "epoch": 0.6, + "grad_norm": 0.749373733997345, + "learning_rate": 7.019408267937933e-06, + "loss": 2.0272, + "step": 18126 + }, + { + "epoch": 0.6, + "grad_norm": 0.7428086400032043, + "learning_rate": 7.01839370309227e-06, + "loss": 2.018, + "step": 18127 + }, + { + "epoch": 0.6, + "grad_norm": 0.7550248503684998, + "learning_rate": 7.017379171931049e-06, + "loss": 2.0974, + "step": 18128 + }, + { + "epoch": 0.6, + "grad_norm": 0.724724292755127, + "learning_rate": 7.01636467446573e-06, + "loss": 2.0284, + "step": 18129 + }, + { + "epoch": 0.6, + "grad_norm": 0.7309322953224182, + "learning_rate": 7.015350210707772e-06, + "loss": 2.0776, + "step": 18130 + }, + { + "epoch": 0.6, + "grad_norm": 0.7560361623764038, + "learning_rate": 7.014335780668644e-06, + "loss": 2.1305, + "step": 18131 + }, + { + "epoch": 0.6, + "grad_norm": 0.7583454251289368, + "learning_rate": 7.013321384359795e-06, + "loss": 2.0581, + "step": 18132 + }, + { + "epoch": 0.6, + "grad_norm": 0.7379392385482788, + "learning_rate": 7.012307021792692e-06, + "loss": 2.0349, + "step": 18133 + }, + { + "epoch": 0.6, + "grad_norm": 0.7524101734161377, + "learning_rate": 7.011292692978795e-06, + "loss": 2.0191, + "step": 18134 + }, + { + "epoch": 0.6, + "grad_norm": 0.7881929874420166, + "learning_rate": 7.010278397929562e-06, + "loss": 2.0165, + "step": 18135 + }, + { + "epoch": 0.6, + "grad_norm": 0.7490965127944946, + "learning_rate": 7.009264136656451e-06, + "loss": 2.1086, + "step": 18136 + }, + { + "epoch": 0.6, + "grad_norm": 0.7501972913742065, + "learning_rate": 7.008249909170919e-06, + "loss": 2.0319, + "step": 18137 + }, + { + "epoch": 0.6, + "grad_norm": 0.7473400831222534, + "learning_rate": 7.007235715484426e-06, + "loss": 2.0092, + "step": 18138 + }, + { + "epoch": 0.6, + "grad_norm": 0.71453857421875, + "learning_rate": 7.00622155560843e-06, + "loss": 1.991, + "step": 18139 + }, + { + "epoch": 0.6, + "grad_norm": 0.7535525560379028, + "learning_rate": 7.005207429554392e-06, + "loss": 2.0664, + "step": 18140 + }, + { + "epoch": 0.6, + "grad_norm": 0.7530552744865417, + "learning_rate": 7.004193337333762e-06, + "loss": 2.0249, + "step": 18141 + }, + { + "epoch": 0.6, + "grad_norm": 0.7480345368385315, + "learning_rate": 7.003179278958001e-06, + "loss": 2.1416, + "step": 18142 + }, + { + "epoch": 0.6, + "grad_norm": 0.7439345121383667, + "learning_rate": 7.002165254438564e-06, + "loss": 2.0591, + "step": 18143 + }, + { + "epoch": 0.6, + "grad_norm": 0.7347488403320312, + "learning_rate": 7.00115126378691e-06, + "loss": 2.0722, + "step": 18144 + }, + { + "epoch": 0.6, + "grad_norm": 0.7574864029884338, + "learning_rate": 7.0001373070144885e-06, + "loss": 2.0725, + "step": 18145 + }, + { + "epoch": 0.6, + "grad_norm": 0.7754042148590088, + "learning_rate": 6.999123384132755e-06, + "loss": 2.1173, + "step": 18146 + }, + { + "epoch": 0.6, + "grad_norm": 0.7491180300712585, + "learning_rate": 6.99810949515317e-06, + "loss": 2.0697, + "step": 18147 + }, + { + "epoch": 0.6, + "grad_norm": 0.7150956392288208, + "learning_rate": 6.997095640087186e-06, + "loss": 2.0872, + "step": 18148 + }, + { + "epoch": 0.6, + "grad_norm": 0.7521947026252747, + "learning_rate": 6.996081818946254e-06, + "loss": 2.16, + "step": 18149 + }, + { + "epoch": 0.6, + "grad_norm": 0.7278939485549927, + "learning_rate": 6.99506803174183e-06, + "loss": 2.0479, + "step": 18150 + }, + { + "epoch": 0.6, + "grad_norm": 0.7365953922271729, + "learning_rate": 6.994054278485363e-06, + "loss": 2.1116, + "step": 18151 + }, + { + "epoch": 0.6, + "grad_norm": 0.7362545132637024, + "learning_rate": 6.993040559188311e-06, + "loss": 2.0437, + "step": 18152 + }, + { + "epoch": 0.6, + "grad_norm": 0.740341067314148, + "learning_rate": 6.992026873862128e-06, + "loss": 2.0602, + "step": 18153 + }, + { + "epoch": 0.6, + "grad_norm": 0.7152746319770813, + "learning_rate": 6.991013222518257e-06, + "loss": 2.0581, + "step": 18154 + }, + { + "epoch": 0.6, + "grad_norm": 0.7657521367073059, + "learning_rate": 6.989999605168158e-06, + "loss": 2.044, + "step": 18155 + }, + { + "epoch": 0.6, + "grad_norm": 0.6964150667190552, + "learning_rate": 6.988986021823278e-06, + "loss": 1.9869, + "step": 18156 + }, + { + "epoch": 0.6, + "grad_norm": 0.7674651145935059, + "learning_rate": 6.987972472495073e-06, + "loss": 2.0541, + "step": 18157 + }, + { + "epoch": 0.6, + "grad_norm": 0.7327138781547546, + "learning_rate": 6.986958957194987e-06, + "loss": 2.0214, + "step": 18158 + }, + { + "epoch": 0.6, + "grad_norm": 0.7678636908531189, + "learning_rate": 6.9859454759344715e-06, + "loss": 2.1532, + "step": 18159 + }, + { + "epoch": 0.6, + "grad_norm": 0.7500786185264587, + "learning_rate": 6.984932028724979e-06, + "loss": 2.0584, + "step": 18160 + }, + { + "epoch": 0.6, + "grad_norm": 0.7126113176345825, + "learning_rate": 6.983918615577957e-06, + "loss": 2.0771, + "step": 18161 + }, + { + "epoch": 0.6, + "grad_norm": 0.7425627708435059, + "learning_rate": 6.9829052365048575e-06, + "loss": 2.0804, + "step": 18162 + }, + { + "epoch": 0.6, + "grad_norm": 0.755148708820343, + "learning_rate": 6.981891891517124e-06, + "loss": 2.1279, + "step": 18163 + }, + { + "epoch": 0.6, + "grad_norm": 0.7634586691856384, + "learning_rate": 6.9808785806262065e-06, + "loss": 2.0576, + "step": 18164 + }, + { + "epoch": 0.6, + "grad_norm": 0.7546915411949158, + "learning_rate": 6.979865303843554e-06, + "loss": 2.1051, + "step": 18165 + }, + { + "epoch": 0.6, + "grad_norm": 0.7502654790878296, + "learning_rate": 6.978852061180617e-06, + "loss": 2.0236, + "step": 18166 + }, + { + "epoch": 0.6, + "grad_norm": 0.742770254611969, + "learning_rate": 6.977838852648834e-06, + "loss": 2.0981, + "step": 18167 + }, + { + "epoch": 0.6, + "grad_norm": 0.7127379179000854, + "learning_rate": 6.976825678259658e-06, + "loss": 2.0388, + "step": 18168 + }, + { + "epoch": 0.6, + "grad_norm": 0.7363996505737305, + "learning_rate": 6.975812538024533e-06, + "loss": 2.0451, + "step": 18169 + }, + { + "epoch": 0.6, + "grad_norm": 0.7417378425598145, + "learning_rate": 6.974799431954906e-06, + "loss": 2.0398, + "step": 18170 + }, + { + "epoch": 0.6, + "grad_norm": 0.7367112636566162, + "learning_rate": 6.9737863600622244e-06, + "loss": 1.9832, + "step": 18171 + }, + { + "epoch": 0.6, + "grad_norm": 0.7370618581771851, + "learning_rate": 6.97277332235793e-06, + "loss": 1.9401, + "step": 18172 + }, + { + "epoch": 0.6, + "grad_norm": 0.789521336555481, + "learning_rate": 6.971760318853465e-06, + "loss": 2.0558, + "step": 18173 + }, + { + "epoch": 0.6, + "grad_norm": 0.7447170615196228, + "learning_rate": 6.97074734956028e-06, + "loss": 2.0702, + "step": 18174 + }, + { + "epoch": 0.6, + "grad_norm": 0.7433381080627441, + "learning_rate": 6.969734414489817e-06, + "loss": 2.0556, + "step": 18175 + }, + { + "epoch": 0.6, + "grad_norm": 0.7597349882125854, + "learning_rate": 6.968721513653518e-06, + "loss": 2.0098, + "step": 18176 + }, + { + "epoch": 0.6, + "grad_norm": 0.720356822013855, + "learning_rate": 6.967708647062826e-06, + "loss": 1.9944, + "step": 18177 + }, + { + "epoch": 0.6, + "grad_norm": 0.7344619035720825, + "learning_rate": 6.966695814729185e-06, + "loss": 2.0315, + "step": 18178 + }, + { + "epoch": 0.6, + "grad_norm": 0.7849770188331604, + "learning_rate": 6.9656830166640386e-06, + "loss": 2.067, + "step": 18179 + }, + { + "epoch": 0.6, + "grad_norm": 0.7438925504684448, + "learning_rate": 6.964670252878826e-06, + "loss": 2.0682, + "step": 18180 + }, + { + "epoch": 0.6, + "grad_norm": 0.7585625648498535, + "learning_rate": 6.963657523384988e-06, + "loss": 2.0561, + "step": 18181 + }, + { + "epoch": 0.6, + "grad_norm": 0.7291232347488403, + "learning_rate": 6.9626448281939685e-06, + "loss": 2.0134, + "step": 18182 + }, + { + "epoch": 0.6, + "grad_norm": 0.7285677790641785, + "learning_rate": 6.961632167317209e-06, + "loss": 2.0304, + "step": 18183 + }, + { + "epoch": 0.6, + "grad_norm": 0.723584771156311, + "learning_rate": 6.960619540766149e-06, + "loss": 2.0925, + "step": 18184 + }, + { + "epoch": 0.61, + "grad_norm": 0.7189376950263977, + "learning_rate": 6.959606948552228e-06, + "loss": 2.0081, + "step": 18185 + }, + { + "epoch": 0.61, + "grad_norm": 0.7332265377044678, + "learning_rate": 6.958594390686884e-06, + "loss": 2.1064, + "step": 18186 + }, + { + "epoch": 0.61, + "grad_norm": 0.7597272396087646, + "learning_rate": 6.957581867181558e-06, + "loss": 2.0967, + "step": 18187 + }, + { + "epoch": 0.61, + "grad_norm": 0.7286507487297058, + "learning_rate": 6.956569378047695e-06, + "loss": 2.0868, + "step": 18188 + }, + { + "epoch": 0.61, + "grad_norm": 0.7588894963264465, + "learning_rate": 6.95555692329672e-06, + "loss": 2.0954, + "step": 18189 + }, + { + "epoch": 0.61, + "grad_norm": 0.7559142708778381, + "learning_rate": 6.954544502940081e-06, + "loss": 2.0429, + "step": 18190 + }, + { + "epoch": 0.61, + "grad_norm": 0.7189953923225403, + "learning_rate": 6.953532116989215e-06, + "loss": 2.0861, + "step": 18191 + }, + { + "epoch": 0.61, + "grad_norm": 0.7446472644805908, + "learning_rate": 6.952519765455557e-06, + "loss": 2.1248, + "step": 18192 + }, + { + "epoch": 0.61, + "grad_norm": 0.7270435690879822, + "learning_rate": 6.951507448350545e-06, + "loss": 2.0325, + "step": 18193 + }, + { + "epoch": 0.61, + "grad_norm": 0.7205339074134827, + "learning_rate": 6.9504951656856155e-06, + "loss": 2.0393, + "step": 18194 + }, + { + "epoch": 0.61, + "grad_norm": 0.7231996059417725, + "learning_rate": 6.949482917472201e-06, + "loss": 2.0385, + "step": 18195 + }, + { + "epoch": 0.61, + "grad_norm": 0.7476510405540466, + "learning_rate": 6.948470703721744e-06, + "loss": 2.0507, + "step": 18196 + }, + { + "epoch": 0.61, + "grad_norm": 0.7621219754219055, + "learning_rate": 6.947458524445677e-06, + "loss": 2.1001, + "step": 18197 + }, + { + "epoch": 0.61, + "grad_norm": 0.7413654923439026, + "learning_rate": 6.946446379655434e-06, + "loss": 2.0137, + "step": 18198 + }, + { + "epoch": 0.61, + "grad_norm": 0.7493494153022766, + "learning_rate": 6.94543426936245e-06, + "loss": 2.0946, + "step": 18199 + }, + { + "epoch": 0.61, + "grad_norm": 0.765190601348877, + "learning_rate": 6.944422193578158e-06, + "loss": 2.032, + "step": 18200 + }, + { + "epoch": 0.61, + "grad_norm": 0.7761021256446838, + "learning_rate": 6.943410152313992e-06, + "loss": 2.0568, + "step": 18201 + }, + { + "epoch": 0.61, + "grad_norm": 0.7578926086425781, + "learning_rate": 6.942398145581395e-06, + "loss": 1.9944, + "step": 18202 + }, + { + "epoch": 0.61, + "grad_norm": 0.7496709227561951, + "learning_rate": 6.941386173391783e-06, + "loss": 2.1391, + "step": 18203 + }, + { + "epoch": 0.61, + "grad_norm": 0.7286618947982788, + "learning_rate": 6.940374235756601e-06, + "loss": 2.063, + "step": 18204 + }, + { + "epoch": 0.61, + "grad_norm": 0.7208502292633057, + "learning_rate": 6.939362332687276e-06, + "loss": 2.1064, + "step": 18205 + }, + { + "epoch": 0.61, + "grad_norm": 0.7326884269714355, + "learning_rate": 6.9383504641952456e-06, + "loss": 2.1525, + "step": 18206 + }, + { + "epoch": 0.61, + "grad_norm": 0.7668668031692505, + "learning_rate": 6.937338630291934e-06, + "loss": 2.1133, + "step": 18207 + }, + { + "epoch": 0.61, + "grad_norm": 0.7831897139549255, + "learning_rate": 6.936326830988778e-06, + "loss": 2.1251, + "step": 18208 + }, + { + "epoch": 0.61, + "grad_norm": 0.7393659949302673, + "learning_rate": 6.9353150662972015e-06, + "loss": 2.1005, + "step": 18209 + }, + { + "epoch": 0.61, + "grad_norm": 0.754098117351532, + "learning_rate": 6.934303336228647e-06, + "loss": 2.0567, + "step": 18210 + }, + { + "epoch": 0.61, + "grad_norm": 0.7291830778121948, + "learning_rate": 6.933291640794529e-06, + "loss": 2.0431, + "step": 18211 + }, + { + "epoch": 0.61, + "grad_norm": 0.7129613757133484, + "learning_rate": 6.932279980006288e-06, + "loss": 2.0157, + "step": 18212 + }, + { + "epoch": 0.61, + "grad_norm": 0.7537611722946167, + "learning_rate": 6.931268353875349e-06, + "loss": 2.0763, + "step": 18213 + }, + { + "epoch": 0.61, + "grad_norm": 0.7093154788017273, + "learning_rate": 6.930256762413144e-06, + "loss": 2.1204, + "step": 18214 + }, + { + "epoch": 0.61, + "grad_norm": 0.785552978515625, + "learning_rate": 6.929245205631099e-06, + "loss": 2.0705, + "step": 18215 + }, + { + "epoch": 0.61, + "grad_norm": 0.7133358120918274, + "learning_rate": 6.9282336835406394e-06, + "loss": 2.0595, + "step": 18216 + }, + { + "epoch": 0.61, + "grad_norm": 0.7240973114967346, + "learning_rate": 6.9272221961531935e-06, + "loss": 2.0806, + "step": 18217 + }, + { + "epoch": 0.61, + "grad_norm": 0.7669894099235535, + "learning_rate": 6.9262107434801935e-06, + "loss": 2.0055, + "step": 18218 + }, + { + "epoch": 0.61, + "grad_norm": 0.7436234354972839, + "learning_rate": 6.925199325533065e-06, + "loss": 2.1167, + "step": 18219 + }, + { + "epoch": 0.61, + "grad_norm": 0.744316041469574, + "learning_rate": 6.9241879423232305e-06, + "loss": 2.0898, + "step": 18220 + }, + { + "epoch": 0.61, + "grad_norm": 0.7304786443710327, + "learning_rate": 6.923176593862119e-06, + "loss": 2.0662, + "step": 18221 + }, + { + "epoch": 0.61, + "grad_norm": 0.7371495962142944, + "learning_rate": 6.922165280161153e-06, + "loss": 2.0637, + "step": 18222 + }, + { + "epoch": 0.61, + "grad_norm": 0.7590890526771545, + "learning_rate": 6.9211540012317595e-06, + "loss": 2.0631, + "step": 18223 + }, + { + "epoch": 0.61, + "grad_norm": 0.7795783877372742, + "learning_rate": 6.920142757085368e-06, + "loss": 2.104, + "step": 18224 + }, + { + "epoch": 0.61, + "grad_norm": 0.718311071395874, + "learning_rate": 6.919131547733396e-06, + "loss": 2.0495, + "step": 18225 + }, + { + "epoch": 0.61, + "grad_norm": 0.7413643002510071, + "learning_rate": 6.91812037318727e-06, + "loss": 2.0638, + "step": 18226 + }, + { + "epoch": 0.61, + "grad_norm": 0.7659221291542053, + "learning_rate": 6.917109233458412e-06, + "loss": 2.078, + "step": 18227 + }, + { + "epoch": 0.61, + "grad_norm": 0.7427729964256287, + "learning_rate": 6.916098128558252e-06, + "loss": 2.0465, + "step": 18228 + }, + { + "epoch": 0.61, + "grad_norm": 0.7672173380851746, + "learning_rate": 6.915087058498205e-06, + "loss": 2.0816, + "step": 18229 + }, + { + "epoch": 0.61, + "grad_norm": 0.7420541644096375, + "learning_rate": 6.914076023289697e-06, + "loss": 2.1033, + "step": 18230 + }, + { + "epoch": 0.61, + "grad_norm": 0.7388933897018433, + "learning_rate": 6.913065022944146e-06, + "loss": 2.0463, + "step": 18231 + }, + { + "epoch": 0.61, + "grad_norm": 0.7369776964187622, + "learning_rate": 6.912054057472984e-06, + "loss": 2.0026, + "step": 18232 + }, + { + "epoch": 0.61, + "grad_norm": 0.7766572833061218, + "learning_rate": 6.91104312688762e-06, + "loss": 2.0494, + "step": 18233 + }, + { + "epoch": 0.61, + "grad_norm": 0.7203308343887329, + "learning_rate": 6.910032231199483e-06, + "loss": 2.08, + "step": 18234 + }, + { + "epoch": 0.61, + "grad_norm": 0.7288216352462769, + "learning_rate": 6.909021370419991e-06, + "loss": 2.0403, + "step": 18235 + }, + { + "epoch": 0.61, + "grad_norm": 0.7403327226638794, + "learning_rate": 6.908010544560562e-06, + "loss": 2.0082, + "step": 18236 + }, + { + "epoch": 0.61, + "grad_norm": 0.7094191312789917, + "learning_rate": 6.906999753632621e-06, + "loss": 2.0973, + "step": 18237 + }, + { + "epoch": 0.61, + "grad_norm": 0.7305642366409302, + "learning_rate": 6.905988997647582e-06, + "loss": 2.0955, + "step": 18238 + }, + { + "epoch": 0.61, + "grad_norm": 0.7384151220321655, + "learning_rate": 6.904978276616865e-06, + "loss": 2.0293, + "step": 18239 + }, + { + "epoch": 0.61, + "grad_norm": 0.8124091029167175, + "learning_rate": 6.90396759055189e-06, + "loss": 2.0888, + "step": 18240 + }, + { + "epoch": 0.61, + "grad_norm": 0.7468953132629395, + "learning_rate": 6.902956939464078e-06, + "loss": 2.0936, + "step": 18241 + }, + { + "epoch": 0.61, + "grad_norm": 0.740871250629425, + "learning_rate": 6.901946323364841e-06, + "loss": 2.1664, + "step": 18242 + }, + { + "epoch": 0.61, + "grad_norm": 0.7360413670539856, + "learning_rate": 6.900935742265599e-06, + "loss": 2.0426, + "step": 18243 + }, + { + "epoch": 0.61, + "grad_norm": 0.7525025010108948, + "learning_rate": 6.899925196177768e-06, + "loss": 2.0686, + "step": 18244 + }, + { + "epoch": 0.61, + "grad_norm": 0.7337868809700012, + "learning_rate": 6.898914685112763e-06, + "loss": 2.0599, + "step": 18245 + }, + { + "epoch": 0.61, + "grad_norm": 0.7474092841148376, + "learning_rate": 6.8979042090820106e-06, + "loss": 2.0192, + "step": 18246 + }, + { + "epoch": 0.61, + "grad_norm": 0.7477457523345947, + "learning_rate": 6.896893768096911e-06, + "loss": 2.0532, + "step": 18247 + }, + { + "epoch": 0.61, + "grad_norm": 0.7334063053131104, + "learning_rate": 6.895883362168889e-06, + "loss": 2.0913, + "step": 18248 + }, + { + "epoch": 0.61, + "grad_norm": 0.7706827521324158, + "learning_rate": 6.894872991309358e-06, + "loss": 2.0169, + "step": 18249 + }, + { + "epoch": 0.61, + "grad_norm": 0.75234454870224, + "learning_rate": 6.893862655529733e-06, + "loss": 2.0462, + "step": 18250 + }, + { + "epoch": 0.61, + "grad_norm": 0.776328980922699, + "learning_rate": 6.892852354841426e-06, + "loss": 2.0801, + "step": 18251 + }, + { + "epoch": 0.61, + "grad_norm": 0.739913821220398, + "learning_rate": 6.891842089255853e-06, + "loss": 1.9989, + "step": 18252 + }, + { + "epoch": 0.61, + "grad_norm": 0.73139888048172, + "learning_rate": 6.890831858784424e-06, + "loss": 2.042, + "step": 18253 + }, + { + "epoch": 0.61, + "grad_norm": 0.7494426369667053, + "learning_rate": 6.889821663438556e-06, + "loss": 2.111, + "step": 18254 + }, + { + "epoch": 0.61, + "grad_norm": 0.7208462953567505, + "learning_rate": 6.888811503229662e-06, + "loss": 2.0243, + "step": 18255 + }, + { + "epoch": 0.61, + "grad_norm": 0.7479475140571594, + "learning_rate": 6.8878013781691525e-06, + "loss": 2.1137, + "step": 18256 + }, + { + "epoch": 0.61, + "grad_norm": 0.732677161693573, + "learning_rate": 6.886791288268436e-06, + "loss": 2.0721, + "step": 18257 + }, + { + "epoch": 0.61, + "grad_norm": 0.7236099243164062, + "learning_rate": 6.885781233538927e-06, + "loss": 2.0554, + "step": 18258 + }, + { + "epoch": 0.61, + "grad_norm": 0.7083085179328918, + "learning_rate": 6.884771213992042e-06, + "loss": 2.0869, + "step": 18259 + }, + { + "epoch": 0.61, + "grad_norm": 0.7463151812553406, + "learning_rate": 6.8837612296391795e-06, + "loss": 2.0546, + "step": 18260 + }, + { + "epoch": 0.61, + "grad_norm": 0.7234214544296265, + "learning_rate": 6.882751280491759e-06, + "loss": 1.9444, + "step": 18261 + }, + { + "epoch": 0.61, + "grad_norm": 0.7817516326904297, + "learning_rate": 6.881741366561187e-06, + "loss": 2.0606, + "step": 18262 + }, + { + "epoch": 0.61, + "grad_norm": 0.7475141882896423, + "learning_rate": 6.880731487858876e-06, + "loss": 2.0575, + "step": 18263 + }, + { + "epoch": 0.61, + "grad_norm": 0.7212839126586914, + "learning_rate": 6.8797216443962316e-06, + "loss": 2.096, + "step": 18264 + }, + { + "epoch": 0.61, + "grad_norm": 0.7221172451972961, + "learning_rate": 6.878711836184662e-06, + "loss": 2.0835, + "step": 18265 + }, + { + "epoch": 0.61, + "grad_norm": 0.7483072280883789, + "learning_rate": 6.877702063235577e-06, + "loss": 2.0605, + "step": 18266 + }, + { + "epoch": 0.61, + "grad_norm": 0.7404163479804993, + "learning_rate": 6.876692325560383e-06, + "loss": 2.1038, + "step": 18267 + }, + { + "epoch": 0.61, + "grad_norm": 0.7535737752914429, + "learning_rate": 6.875682623170492e-06, + "loss": 2.1002, + "step": 18268 + }, + { + "epoch": 0.61, + "grad_norm": 0.7278376221656799, + "learning_rate": 6.874672956077306e-06, + "loss": 2.1425, + "step": 18269 + }, + { + "epoch": 0.61, + "grad_norm": 0.7086727023124695, + "learning_rate": 6.873663324292233e-06, + "loss": 2.016, + "step": 18270 + }, + { + "epoch": 0.61, + "grad_norm": 0.7278428077697754, + "learning_rate": 6.872653727826679e-06, + "loss": 2.0242, + "step": 18271 + }, + { + "epoch": 0.61, + "grad_norm": 0.7262416481971741, + "learning_rate": 6.871644166692054e-06, + "loss": 2.074, + "step": 18272 + }, + { + "epoch": 0.61, + "grad_norm": 0.7270860075950623, + "learning_rate": 6.870634640899757e-06, + "loss": 2.0176, + "step": 18273 + }, + { + "epoch": 0.61, + "grad_norm": 0.7612005472183228, + "learning_rate": 6.869625150461194e-06, + "loss": 2.1016, + "step": 18274 + }, + { + "epoch": 0.61, + "grad_norm": 0.740704357624054, + "learning_rate": 6.868615695387772e-06, + "loss": 2.008, + "step": 18275 + }, + { + "epoch": 0.61, + "grad_norm": 0.7130936980247498, + "learning_rate": 6.867606275690897e-06, + "loss": 2.0525, + "step": 18276 + }, + { + "epoch": 0.61, + "grad_norm": 0.7626101970672607, + "learning_rate": 6.8665968913819715e-06, + "loss": 2.0708, + "step": 18277 + }, + { + "epoch": 0.61, + "grad_norm": 0.7217760682106018, + "learning_rate": 6.865587542472396e-06, + "loss": 2.0619, + "step": 18278 + }, + { + "epoch": 0.61, + "grad_norm": 0.7392137050628662, + "learning_rate": 6.864578228973575e-06, + "loss": 2.0962, + "step": 18279 + }, + { + "epoch": 0.61, + "grad_norm": 0.7717177867889404, + "learning_rate": 6.8635689508969105e-06, + "loss": 2.0839, + "step": 18280 + }, + { + "epoch": 0.61, + "grad_norm": 0.7491602897644043, + "learning_rate": 6.862559708253811e-06, + "loss": 2.0911, + "step": 18281 + }, + { + "epoch": 0.61, + "grad_norm": 0.7501385807991028, + "learning_rate": 6.861550501055667e-06, + "loss": 2.1131, + "step": 18282 + }, + { + "epoch": 0.61, + "grad_norm": 0.7387890815734863, + "learning_rate": 6.86054132931389e-06, + "loss": 1.9938, + "step": 18283 + }, + { + "epoch": 0.61, + "grad_norm": 0.7624941468238831, + "learning_rate": 6.859532193039875e-06, + "loss": 2.0668, + "step": 18284 + }, + { + "epoch": 0.61, + "grad_norm": 0.7394777536392212, + "learning_rate": 6.858523092245026e-06, + "loss": 2.0959, + "step": 18285 + }, + { + "epoch": 0.61, + "grad_norm": 0.7738788723945618, + "learning_rate": 6.8575140269407434e-06, + "loss": 2.0869, + "step": 18286 + }, + { + "epoch": 0.61, + "grad_norm": 0.7552123665809631, + "learning_rate": 6.856504997138424e-06, + "loss": 2.1117, + "step": 18287 + }, + { + "epoch": 0.61, + "grad_norm": 0.7498782277107239, + "learning_rate": 6.855496002849467e-06, + "loss": 2.0699, + "step": 18288 + }, + { + "epoch": 0.61, + "grad_norm": 0.7292712330818176, + "learning_rate": 6.854487044085275e-06, + "loss": 1.9774, + "step": 18289 + }, + { + "epoch": 0.61, + "grad_norm": 0.7111600041389465, + "learning_rate": 6.853478120857247e-06, + "loss": 2.0973, + "step": 18290 + }, + { + "epoch": 0.61, + "grad_norm": 0.7142061591148376, + "learning_rate": 6.852469233176777e-06, + "loss": 2.0028, + "step": 18291 + }, + { + "epoch": 0.61, + "grad_norm": 0.7392668724060059, + "learning_rate": 6.851460381055266e-06, + "loss": 2.1294, + "step": 18292 + }, + { + "epoch": 0.61, + "grad_norm": 0.7337582111358643, + "learning_rate": 6.850451564504109e-06, + "loss": 2.089, + "step": 18293 + }, + { + "epoch": 0.61, + "grad_norm": 0.7718833684921265, + "learning_rate": 6.849442783534708e-06, + "loss": 1.9778, + "step": 18294 + }, + { + "epoch": 0.61, + "grad_norm": 0.7311339378356934, + "learning_rate": 6.848434038158452e-06, + "loss": 2.1452, + "step": 18295 + }, + { + "epoch": 0.61, + "grad_norm": 0.7324917912483215, + "learning_rate": 6.847425328386741e-06, + "loss": 2.0249, + "step": 18296 + }, + { + "epoch": 0.61, + "grad_norm": 0.7163553833961487, + "learning_rate": 6.846416654230972e-06, + "loss": 2.0709, + "step": 18297 + }, + { + "epoch": 0.61, + "grad_norm": 0.7434574961662292, + "learning_rate": 6.845408015702539e-06, + "loss": 2.1334, + "step": 18298 + }, + { + "epoch": 0.61, + "grad_norm": 0.7435803413391113, + "learning_rate": 6.844399412812841e-06, + "loss": 2.0288, + "step": 18299 + }, + { + "epoch": 0.61, + "grad_norm": 0.7465127110481262, + "learning_rate": 6.843390845573265e-06, + "loss": 2.0745, + "step": 18300 + }, + { + "epoch": 0.61, + "grad_norm": 0.7331094741821289, + "learning_rate": 6.842382313995212e-06, + "loss": 2.012, + "step": 18301 + }, + { + "epoch": 0.61, + "grad_norm": 0.7311022281646729, + "learning_rate": 6.841373818090069e-06, + "loss": 2.0705, + "step": 18302 + }, + { + "epoch": 0.61, + "grad_norm": 0.7777676582336426, + "learning_rate": 6.840365357869241e-06, + "loss": 2.0623, + "step": 18303 + }, + { + "epoch": 0.61, + "grad_norm": 0.7453696727752686, + "learning_rate": 6.839356933344106e-06, + "loss": 2.128, + "step": 18304 + }, + { + "epoch": 0.61, + "grad_norm": 0.7418368458747864, + "learning_rate": 6.838348544526067e-06, + "loss": 2.1084, + "step": 18305 + }, + { + "epoch": 0.61, + "grad_norm": 0.75780189037323, + "learning_rate": 6.837340191426513e-06, + "loss": 2.1053, + "step": 18306 + }, + { + "epoch": 0.61, + "grad_norm": 0.7905848622322083, + "learning_rate": 6.836331874056835e-06, + "loss": 2.0577, + "step": 18307 + }, + { + "epoch": 0.61, + "grad_norm": 0.7332119345664978, + "learning_rate": 6.835323592428429e-06, + "loss": 2.0418, + "step": 18308 + }, + { + "epoch": 0.61, + "grad_norm": 0.7225558757781982, + "learning_rate": 6.834315346552679e-06, + "loss": 2.0872, + "step": 18309 + }, + { + "epoch": 0.61, + "grad_norm": 0.7501688003540039, + "learning_rate": 6.833307136440977e-06, + "loss": 2.077, + "step": 18310 + }, + { + "epoch": 0.61, + "grad_norm": 0.7427091598510742, + "learning_rate": 6.832298962104718e-06, + "loss": 2.0343, + "step": 18311 + }, + { + "epoch": 0.61, + "grad_norm": 0.7550827264785767, + "learning_rate": 6.831290823555291e-06, + "loss": 2.0622, + "step": 18312 + }, + { + "epoch": 0.61, + "grad_norm": 0.7726485133171082, + "learning_rate": 6.83028272080408e-06, + "loss": 2.1355, + "step": 18313 + }, + { + "epoch": 0.61, + "grad_norm": 0.7859175205230713, + "learning_rate": 6.829274653862479e-06, + "loss": 2.1358, + "step": 18314 + }, + { + "epoch": 0.61, + "grad_norm": 0.7417984008789062, + "learning_rate": 6.828266622741873e-06, + "loss": 2.0562, + "step": 18315 + }, + { + "epoch": 0.61, + "grad_norm": 0.7341346740722656, + "learning_rate": 6.82725862745365e-06, + "loss": 2.0908, + "step": 18316 + }, + { + "epoch": 0.61, + "grad_norm": 0.7390155792236328, + "learning_rate": 6.826250668009207e-06, + "loss": 2.119, + "step": 18317 + }, + { + "epoch": 0.61, + "grad_norm": 0.7509703040122986, + "learning_rate": 6.825242744419918e-06, + "loss": 2.1016, + "step": 18318 + }, + { + "epoch": 0.61, + "grad_norm": 0.7445874214172363, + "learning_rate": 6.824234856697176e-06, + "loss": 2.0534, + "step": 18319 + }, + { + "epoch": 0.61, + "grad_norm": 0.7243942618370056, + "learning_rate": 6.823227004852369e-06, + "loss": 2.0557, + "step": 18320 + }, + { + "epoch": 0.61, + "grad_norm": 0.745593249797821, + "learning_rate": 6.822219188896883e-06, + "loss": 2.0694, + "step": 18321 + }, + { + "epoch": 0.61, + "grad_norm": 0.8173550963401794, + "learning_rate": 6.821211408842099e-06, + "loss": 2.0261, + "step": 18322 + }, + { + "epoch": 0.61, + "grad_norm": 0.7469823956489563, + "learning_rate": 6.820203664699407e-06, + "loss": 2.07, + "step": 18323 + }, + { + "epoch": 0.61, + "grad_norm": 0.7456266283988953, + "learning_rate": 6.819195956480189e-06, + "loss": 2.0812, + "step": 18324 + }, + { + "epoch": 0.61, + "grad_norm": 0.7292190194129944, + "learning_rate": 6.818188284195836e-06, + "loss": 1.9946, + "step": 18325 + }, + { + "epoch": 0.61, + "grad_norm": 0.7258754968643188, + "learning_rate": 6.81718064785772e-06, + "loss": 2.0829, + "step": 18326 + }, + { + "epoch": 0.61, + "grad_norm": 0.7230064272880554, + "learning_rate": 6.8161730474772355e-06, + "loss": 2.0257, + "step": 18327 + }, + { + "epoch": 0.61, + "grad_norm": 0.7246164083480835, + "learning_rate": 6.815165483065762e-06, + "loss": 2.0067, + "step": 18328 + }, + { + "epoch": 0.61, + "grad_norm": 0.7395140528678894, + "learning_rate": 6.8141579546346814e-06, + "loss": 2.0563, + "step": 18329 + }, + { + "epoch": 0.61, + "grad_norm": 0.7418071627616882, + "learning_rate": 6.8131504621953795e-06, + "loss": 2.0798, + "step": 18330 + }, + { + "epoch": 0.61, + "grad_norm": 0.7509021162986755, + "learning_rate": 6.812143005759234e-06, + "loss": 2.0437, + "step": 18331 + }, + { + "epoch": 0.61, + "grad_norm": 0.763006329536438, + "learning_rate": 6.811135585337627e-06, + "loss": 2.0082, + "step": 18332 + }, + { + "epoch": 0.61, + "grad_norm": 0.7587042450904846, + "learning_rate": 6.810128200941942e-06, + "loss": 2.037, + "step": 18333 + }, + { + "epoch": 0.61, + "grad_norm": 0.7177515029907227, + "learning_rate": 6.809120852583563e-06, + "loss": 2.0849, + "step": 18334 + }, + { + "epoch": 0.61, + "grad_norm": 0.7240169644355774, + "learning_rate": 6.808113540273863e-06, + "loss": 2.0446, + "step": 18335 + }, + { + "epoch": 0.61, + "grad_norm": 0.7504788637161255, + "learning_rate": 6.8071062640242254e-06, + "loss": 2.0324, + "step": 18336 + }, + { + "epoch": 0.61, + "grad_norm": 0.739625871181488, + "learning_rate": 6.806099023846031e-06, + "loss": 2.0158, + "step": 18337 + }, + { + "epoch": 0.61, + "grad_norm": 0.7648923993110657, + "learning_rate": 6.805091819750656e-06, + "loss": 2.1078, + "step": 18338 + }, + { + "epoch": 0.61, + "grad_norm": 0.7783934473991394, + "learning_rate": 6.804084651749487e-06, + "loss": 2.0255, + "step": 18339 + }, + { + "epoch": 0.61, + "grad_norm": 0.7486938834190369, + "learning_rate": 6.803077519853891e-06, + "loss": 2.1277, + "step": 18340 + }, + { + "epoch": 0.61, + "grad_norm": 0.7740841507911682, + "learning_rate": 6.802070424075254e-06, + "loss": 2.1301, + "step": 18341 + }, + { + "epoch": 0.61, + "grad_norm": 0.7448037266731262, + "learning_rate": 6.801063364424949e-06, + "loss": 2.0197, + "step": 18342 + }, + { + "epoch": 0.61, + "grad_norm": 0.7407925724983215, + "learning_rate": 6.80005634091436e-06, + "loss": 2.0038, + "step": 18343 + }, + { + "epoch": 0.61, + "grad_norm": 0.7466814517974854, + "learning_rate": 6.799049353554856e-06, + "loss": 2.0602, + "step": 18344 + }, + { + "epoch": 0.61, + "grad_norm": 0.73466557264328, + "learning_rate": 6.798042402357817e-06, + "loss": 2.0553, + "step": 18345 + }, + { + "epoch": 0.61, + "grad_norm": 0.780166506767273, + "learning_rate": 6.797035487334614e-06, + "loss": 2.1165, + "step": 18346 + }, + { + "epoch": 0.61, + "grad_norm": 0.7334133386611938, + "learning_rate": 6.796028608496631e-06, + "loss": 2.0967, + "step": 18347 + }, + { + "epoch": 0.61, + "grad_norm": 0.7509230375289917, + "learning_rate": 6.795021765855241e-06, + "loss": 2.1019, + "step": 18348 + }, + { + "epoch": 0.61, + "grad_norm": 0.7461289763450623, + "learning_rate": 6.794014959421815e-06, + "loss": 2.0445, + "step": 18349 + }, + { + "epoch": 0.61, + "grad_norm": 0.7383481860160828, + "learning_rate": 6.793008189207728e-06, + "loss": 2.0878, + "step": 18350 + }, + { + "epoch": 0.61, + "grad_norm": 0.7769946455955505, + "learning_rate": 6.792001455224353e-06, + "loss": 2.214, + "step": 18351 + }, + { + "epoch": 0.61, + "grad_norm": 0.7748289108276367, + "learning_rate": 6.790994757483073e-06, + "loss": 2.0995, + "step": 18352 + }, + { + "epoch": 0.61, + "grad_norm": 0.7307443022727966, + "learning_rate": 6.789988095995248e-06, + "loss": 2.0563, + "step": 18353 + }, + { + "epoch": 0.61, + "grad_norm": 0.7601832747459412, + "learning_rate": 6.788981470772256e-06, + "loss": 2.0805, + "step": 18354 + }, + { + "epoch": 0.61, + "grad_norm": 0.7346131801605225, + "learning_rate": 6.78797488182547e-06, + "loss": 2.0322, + "step": 18355 + }, + { + "epoch": 0.61, + "grad_norm": 0.7610582113265991, + "learning_rate": 6.7869683291662634e-06, + "loss": 2.0448, + "step": 18356 + }, + { + "epoch": 0.61, + "grad_norm": 0.7298884391784668, + "learning_rate": 6.785961812806004e-06, + "loss": 2.0795, + "step": 18357 + }, + { + "epoch": 0.61, + "grad_norm": 0.7294670343399048, + "learning_rate": 6.784955332756065e-06, + "loss": 2.0507, + "step": 18358 + }, + { + "epoch": 0.61, + "grad_norm": 0.7797404527664185, + "learning_rate": 6.783948889027814e-06, + "loss": 2.1029, + "step": 18359 + }, + { + "epoch": 0.61, + "grad_norm": 0.707570493221283, + "learning_rate": 6.782942481632625e-06, + "loss": 1.9876, + "step": 18360 + }, + { + "epoch": 0.61, + "grad_norm": 0.7228553891181946, + "learning_rate": 6.7819361105818694e-06, + "loss": 2.0954, + "step": 18361 + }, + { + "epoch": 0.61, + "grad_norm": 0.7360501885414124, + "learning_rate": 6.780929775886911e-06, + "loss": 2.0595, + "step": 18362 + }, + { + "epoch": 0.61, + "grad_norm": 0.7117306590080261, + "learning_rate": 6.779923477559122e-06, + "loss": 2.0799, + "step": 18363 + }, + { + "epoch": 0.61, + "grad_norm": 0.7397478818893433, + "learning_rate": 6.77891721560987e-06, + "loss": 2.08, + "step": 18364 + }, + { + "epoch": 0.61, + "grad_norm": 0.7367069125175476, + "learning_rate": 6.777910990050525e-06, + "loss": 2.0283, + "step": 18365 + }, + { + "epoch": 0.61, + "grad_norm": 0.7536083459854126, + "learning_rate": 6.7769048008924525e-06, + "loss": 2.0385, + "step": 18366 + }, + { + "epoch": 0.61, + "grad_norm": 0.7222318053245544, + "learning_rate": 6.775898648147018e-06, + "loss": 2.0766, + "step": 18367 + }, + { + "epoch": 0.61, + "grad_norm": 0.7275909781455994, + "learning_rate": 6.774892531825593e-06, + "loss": 2.0737, + "step": 18368 + }, + { + "epoch": 0.61, + "grad_norm": 0.7236939072608948, + "learning_rate": 6.773886451939542e-06, + "loss": 2.0485, + "step": 18369 + }, + { + "epoch": 0.61, + "grad_norm": 0.7356298565864563, + "learning_rate": 6.772880408500233e-06, + "loss": 2.1176, + "step": 18370 + }, + { + "epoch": 0.61, + "grad_norm": 0.7407395839691162, + "learning_rate": 6.77187440151903e-06, + "loss": 2.0662, + "step": 18371 + }, + { + "epoch": 0.61, + "grad_norm": 0.7195042371749878, + "learning_rate": 6.770868431007296e-06, + "loss": 1.9862, + "step": 18372 + }, + { + "epoch": 0.61, + "grad_norm": 0.716382622718811, + "learning_rate": 6.769862496976396e-06, + "loss": 2.0134, + "step": 18373 + }, + { + "epoch": 0.61, + "grad_norm": 0.731662929058075, + "learning_rate": 6.768856599437703e-06, + "loss": 2.1402, + "step": 18374 + }, + { + "epoch": 0.61, + "grad_norm": 0.7474837303161621, + "learning_rate": 6.767850738402568e-06, + "loss": 2.0542, + "step": 18375 + }, + { + "epoch": 0.61, + "grad_norm": 0.7314082384109497, + "learning_rate": 6.7668449138823635e-06, + "loss": 2.0537, + "step": 18376 + }, + { + "epoch": 0.61, + "grad_norm": 0.7353610396385193, + "learning_rate": 6.76583912588845e-06, + "loss": 2.043, + "step": 18377 + }, + { + "epoch": 0.61, + "grad_norm": 0.7164190411567688, + "learning_rate": 6.764833374432191e-06, + "loss": 2.0358, + "step": 18378 + }, + { + "epoch": 0.61, + "grad_norm": 0.7325714230537415, + "learning_rate": 6.763827659524951e-06, + "loss": 2.1277, + "step": 18379 + }, + { + "epoch": 0.61, + "grad_norm": 0.7469738125801086, + "learning_rate": 6.762821981178087e-06, + "loss": 2.1015, + "step": 18380 + }, + { + "epoch": 0.61, + "grad_norm": 0.7329541444778442, + "learning_rate": 6.761816339402961e-06, + "loss": 2.0307, + "step": 18381 + }, + { + "epoch": 0.61, + "grad_norm": 0.7335854768753052, + "learning_rate": 6.760810734210938e-06, + "loss": 1.9868, + "step": 18382 + }, + { + "epoch": 0.61, + "grad_norm": 0.6994116902351379, + "learning_rate": 6.75980516561338e-06, + "loss": 2.0982, + "step": 18383 + }, + { + "epoch": 0.61, + "grad_norm": 0.7387685775756836, + "learning_rate": 6.758799633621642e-06, + "loss": 2.0488, + "step": 18384 + }, + { + "epoch": 0.61, + "grad_norm": 0.7489231824874878, + "learning_rate": 6.757794138247085e-06, + "loss": 2.0081, + "step": 18385 + }, + { + "epoch": 0.61, + "grad_norm": 0.7993369698524475, + "learning_rate": 6.75678867950107e-06, + "loss": 2.0628, + "step": 18386 + }, + { + "epoch": 0.61, + "grad_norm": 0.7146592736244202, + "learning_rate": 6.755783257394959e-06, + "loss": 2.0484, + "step": 18387 + }, + { + "epoch": 0.61, + "grad_norm": 0.7356510758399963, + "learning_rate": 6.754777871940106e-06, + "loss": 2.0267, + "step": 18388 + }, + { + "epoch": 0.61, + "grad_norm": 0.7581258416175842, + "learning_rate": 6.7537725231478665e-06, + "loss": 2.0336, + "step": 18389 + }, + { + "epoch": 0.61, + "grad_norm": 0.7559896111488342, + "learning_rate": 6.752767211029605e-06, + "loss": 2.0105, + "step": 18390 + }, + { + "epoch": 0.61, + "grad_norm": 0.7393775582313538, + "learning_rate": 6.751761935596678e-06, + "loss": 2.0674, + "step": 18391 + }, + { + "epoch": 0.61, + "grad_norm": 0.7657954096794128, + "learning_rate": 6.750756696860442e-06, + "loss": 1.9824, + "step": 18392 + }, + { + "epoch": 0.61, + "grad_norm": 0.746306300163269, + "learning_rate": 6.74975149483225e-06, + "loss": 2.043, + "step": 18393 + }, + { + "epoch": 0.61, + "grad_norm": 0.7265415191650391, + "learning_rate": 6.748746329523461e-06, + "loss": 2.0348, + "step": 18394 + }, + { + "epoch": 0.61, + "grad_norm": 0.7116268873214722, + "learning_rate": 6.74774120094543e-06, + "loss": 2.0803, + "step": 18395 + }, + { + "epoch": 0.61, + "grad_norm": 0.7705291509628296, + "learning_rate": 6.746736109109518e-06, + "loss": 2.0998, + "step": 18396 + }, + { + "epoch": 0.61, + "grad_norm": 0.7329710125923157, + "learning_rate": 6.745731054027069e-06, + "loss": 2.0695, + "step": 18397 + }, + { + "epoch": 0.61, + "grad_norm": 0.7447865605354309, + "learning_rate": 6.744726035709445e-06, + "loss": 2.0934, + "step": 18398 + }, + { + "epoch": 0.61, + "grad_norm": 0.7582442760467529, + "learning_rate": 6.743721054167998e-06, + "loss": 2.1285, + "step": 18399 + }, + { + "epoch": 0.61, + "grad_norm": 0.7348441481590271, + "learning_rate": 6.742716109414083e-06, + "loss": 2.1181, + "step": 18400 + }, + { + "epoch": 0.61, + "grad_norm": 0.7339258193969727, + "learning_rate": 6.741711201459053e-06, + "loss": 1.9627, + "step": 18401 + }, + { + "epoch": 0.61, + "grad_norm": 0.748984694480896, + "learning_rate": 6.740706330314261e-06, + "loss": 2.0739, + "step": 18402 + }, + { + "epoch": 0.61, + "grad_norm": 0.7580757737159729, + "learning_rate": 6.739701495991056e-06, + "loss": 2.0685, + "step": 18403 + }, + { + "epoch": 0.61, + "grad_norm": 0.7279966473579407, + "learning_rate": 6.738696698500793e-06, + "loss": 2.058, + "step": 18404 + }, + { + "epoch": 0.61, + "grad_norm": 0.7286447882652283, + "learning_rate": 6.737691937854826e-06, + "loss": 2.0845, + "step": 18405 + }, + { + "epoch": 0.61, + "grad_norm": 0.7269473671913147, + "learning_rate": 6.736687214064502e-06, + "loss": 2.0812, + "step": 18406 + }, + { + "epoch": 0.61, + "grad_norm": 0.7312459349632263, + "learning_rate": 6.735682527141174e-06, + "loss": 2.0512, + "step": 18407 + }, + { + "epoch": 0.61, + "grad_norm": 0.7474531531333923, + "learning_rate": 6.734677877096191e-06, + "loss": 2.1034, + "step": 18408 + }, + { + "epoch": 0.61, + "grad_norm": 0.7328645586967468, + "learning_rate": 6.733673263940905e-06, + "loss": 2.0509, + "step": 18409 + }, + { + "epoch": 0.61, + "grad_norm": 0.7562387585639954, + "learning_rate": 6.732668687686663e-06, + "loss": 2.061, + "step": 18410 + }, + { + "epoch": 0.61, + "grad_norm": 0.7350242733955383, + "learning_rate": 6.731664148344814e-06, + "loss": 1.9607, + "step": 18411 + }, + { + "epoch": 0.61, + "grad_norm": 0.7807022929191589, + "learning_rate": 6.730659645926709e-06, + "loss": 2.1291, + "step": 18412 + }, + { + "epoch": 0.61, + "grad_norm": 0.7233297824859619, + "learning_rate": 6.729655180443695e-06, + "loss": 2.068, + "step": 18413 + }, + { + "epoch": 0.61, + "grad_norm": 0.7439149618148804, + "learning_rate": 6.728650751907124e-06, + "loss": 2.0344, + "step": 18414 + }, + { + "epoch": 0.61, + "grad_norm": 0.7627593874931335, + "learning_rate": 6.727646360328336e-06, + "loss": 1.9976, + "step": 18415 + }, + { + "epoch": 0.61, + "grad_norm": 0.7293951511383057, + "learning_rate": 6.726642005718682e-06, + "loss": 2.0824, + "step": 18416 + }, + { + "epoch": 0.61, + "grad_norm": 0.7175565958023071, + "learning_rate": 6.725637688089506e-06, + "loss": 2.0198, + "step": 18417 + }, + { + "epoch": 0.61, + "grad_norm": 0.7496711611747742, + "learning_rate": 6.724633407452164e-06, + "loss": 2.0684, + "step": 18418 + }, + { + "epoch": 0.61, + "grad_norm": 0.7368891835212708, + "learning_rate": 6.723629163817986e-06, + "loss": 2.1075, + "step": 18419 + }, + { + "epoch": 0.61, + "grad_norm": 0.7421596050262451, + "learning_rate": 6.722624957198328e-06, + "loss": 1.9933, + "step": 18420 + }, + { + "epoch": 0.61, + "grad_norm": 0.7583807110786438, + "learning_rate": 6.721620787604533e-06, + "loss": 2.0893, + "step": 18421 + }, + { + "epoch": 0.61, + "grad_norm": 0.7468669414520264, + "learning_rate": 6.720616655047944e-06, + "loss": 1.9784, + "step": 18422 + }, + { + "epoch": 0.61, + "grad_norm": 0.7592142224311829, + "learning_rate": 6.71961255953991e-06, + "loss": 1.9859, + "step": 18423 + }, + { + "epoch": 0.61, + "grad_norm": 0.7409591674804688, + "learning_rate": 6.718608501091768e-06, + "loss": 2.082, + "step": 18424 + }, + { + "epoch": 0.61, + "grad_norm": 0.7203793525695801, + "learning_rate": 6.7176044797148625e-06, + "loss": 2.0792, + "step": 18425 + }, + { + "epoch": 0.61, + "grad_norm": 0.7320877313613892, + "learning_rate": 6.716600495420539e-06, + "loss": 2.0026, + "step": 18426 + }, + { + "epoch": 0.61, + "grad_norm": 0.7911693453788757, + "learning_rate": 6.715596548220142e-06, + "loss": 2.0704, + "step": 18427 + }, + { + "epoch": 0.61, + "grad_norm": 0.76523756980896, + "learning_rate": 6.714592638125006e-06, + "loss": 2.0687, + "step": 18428 + }, + { + "epoch": 0.61, + "grad_norm": 0.7689847350120544, + "learning_rate": 6.713588765146479e-06, + "loss": 2.079, + "step": 18429 + }, + { + "epoch": 0.61, + "grad_norm": 0.7348833680152893, + "learning_rate": 6.7125849292959e-06, + "loss": 2.0177, + "step": 18430 + }, + { + "epoch": 0.61, + "grad_norm": 0.7341191172599792, + "learning_rate": 6.711581130584608e-06, + "loss": 2.0583, + "step": 18431 + }, + { + "epoch": 0.61, + "grad_norm": 0.7732895612716675, + "learning_rate": 6.710577369023949e-06, + "loss": 2.005, + "step": 18432 + }, + { + "epoch": 0.61, + "grad_norm": 0.742037296295166, + "learning_rate": 6.709573644625256e-06, + "loss": 2.0123, + "step": 18433 + }, + { + "epoch": 0.61, + "grad_norm": 0.7439534068107605, + "learning_rate": 6.708569957399872e-06, + "loss": 2.062, + "step": 18434 + }, + { + "epoch": 0.61, + "grad_norm": 0.7266255021095276, + "learning_rate": 6.707566307359137e-06, + "loss": 2.0489, + "step": 18435 + }, + { + "epoch": 0.61, + "grad_norm": 0.7161381840705872, + "learning_rate": 6.706562694514389e-06, + "loss": 2.041, + "step": 18436 + }, + { + "epoch": 0.61, + "grad_norm": 0.7617234587669373, + "learning_rate": 6.705559118876963e-06, + "loss": 2.0744, + "step": 18437 + }, + { + "epoch": 0.61, + "grad_norm": 0.740382969379425, + "learning_rate": 6.704555580458201e-06, + "loss": 2.0834, + "step": 18438 + }, + { + "epoch": 0.61, + "grad_norm": 0.7304786443710327, + "learning_rate": 6.703552079269435e-06, + "loss": 2.1013, + "step": 18439 + }, + { + "epoch": 0.61, + "grad_norm": 0.768141508102417, + "learning_rate": 6.702548615322013e-06, + "loss": 2.054, + "step": 18440 + }, + { + "epoch": 0.61, + "grad_norm": 0.7651693224906921, + "learning_rate": 6.701545188627258e-06, + "loss": 1.9818, + "step": 18441 + }, + { + "epoch": 0.61, + "grad_norm": 0.7493178248405457, + "learning_rate": 6.7005417991965135e-06, + "loss": 2.0885, + "step": 18442 + }, + { + "epoch": 0.61, + "grad_norm": 0.7126354575157166, + "learning_rate": 6.699538447041115e-06, + "loss": 2.0811, + "step": 18443 + }, + { + "epoch": 0.61, + "grad_norm": 0.7364470958709717, + "learning_rate": 6.698535132172394e-06, + "loss": 2.1081, + "step": 18444 + }, + { + "epoch": 0.61, + "grad_norm": 0.7296072840690613, + "learning_rate": 6.697531854601695e-06, + "loss": 2.0842, + "step": 18445 + }, + { + "epoch": 0.61, + "grad_norm": 0.7903474569320679, + "learning_rate": 6.696528614340343e-06, + "loss": 2.0718, + "step": 18446 + }, + { + "epoch": 0.61, + "grad_norm": 0.7271576523780823, + "learning_rate": 6.6955254113996704e-06, + "loss": 2.0491, + "step": 18447 + }, + { + "epoch": 0.61, + "grad_norm": 0.777874231338501, + "learning_rate": 6.694522245791017e-06, + "loss": 2.1067, + "step": 18448 + }, + { + "epoch": 0.61, + "grad_norm": 0.7434333562850952, + "learning_rate": 6.693519117525719e-06, + "loss": 2.1115, + "step": 18449 + }, + { + "epoch": 0.61, + "grad_norm": 0.7488025426864624, + "learning_rate": 6.6925160266150994e-06, + "loss": 2.0482, + "step": 18450 + }, + { + "epoch": 0.61, + "grad_norm": 0.7355394959449768, + "learning_rate": 6.691512973070497e-06, + "loss": 2.0656, + "step": 18451 + }, + { + "epoch": 0.61, + "grad_norm": 0.7592676877975464, + "learning_rate": 6.6905099569032385e-06, + "loss": 2.0391, + "step": 18452 + }, + { + "epoch": 0.61, + "grad_norm": 0.748758852481842, + "learning_rate": 6.689506978124663e-06, + "loss": 2.0435, + "step": 18453 + }, + { + "epoch": 0.61, + "grad_norm": 0.7562140822410583, + "learning_rate": 6.6885040367461e-06, + "loss": 2.0165, + "step": 18454 + }, + { + "epoch": 0.61, + "grad_norm": 0.7367588877677917, + "learning_rate": 6.687501132778871e-06, + "loss": 2.0504, + "step": 18455 + }, + { + "epoch": 0.61, + "grad_norm": 0.7489113211631775, + "learning_rate": 6.686498266234316e-06, + "loss": 1.9668, + "step": 18456 + }, + { + "epoch": 0.61, + "grad_norm": 0.7488969564437866, + "learning_rate": 6.685495437123761e-06, + "loss": 2.0431, + "step": 18457 + }, + { + "epoch": 0.61, + "grad_norm": 0.7641574144363403, + "learning_rate": 6.684492645458539e-06, + "loss": 2.0341, + "step": 18458 + }, + { + "epoch": 0.61, + "grad_norm": 0.7666091918945312, + "learning_rate": 6.683489891249973e-06, + "loss": 2.102, + "step": 18459 + }, + { + "epoch": 0.61, + "grad_norm": 0.7313241958618164, + "learning_rate": 6.682487174509393e-06, + "loss": 2.0753, + "step": 18460 + }, + { + "epoch": 0.61, + "grad_norm": 0.7338123321533203, + "learning_rate": 6.681484495248132e-06, + "loss": 2.027, + "step": 18461 + }, + { + "epoch": 0.61, + "grad_norm": 0.7557225227355957, + "learning_rate": 6.680481853477511e-06, + "loss": 2.0674, + "step": 18462 + }, + { + "epoch": 0.61, + "grad_norm": 0.7447043657302856, + "learning_rate": 6.679479249208867e-06, + "loss": 2.0064, + "step": 18463 + }, + { + "epoch": 0.61, + "grad_norm": 0.7302720546722412, + "learning_rate": 6.678476682453515e-06, + "loss": 2.0392, + "step": 18464 + }, + { + "epoch": 0.61, + "grad_norm": 0.7419682145118713, + "learning_rate": 6.677474153222787e-06, + "loss": 2.0805, + "step": 18465 + }, + { + "epoch": 0.61, + "grad_norm": 0.7353573441505432, + "learning_rate": 6.676471661528008e-06, + "loss": 2.0408, + "step": 18466 + }, + { + "epoch": 0.61, + "grad_norm": 0.7257627248764038, + "learning_rate": 6.675469207380511e-06, + "loss": 2.0541, + "step": 18467 + }, + { + "epoch": 0.61, + "grad_norm": 0.7347812652587891, + "learning_rate": 6.674466790791608e-06, + "loss": 2.0675, + "step": 18468 + }, + { + "epoch": 0.61, + "grad_norm": 0.7464412450790405, + "learning_rate": 6.6734644117726324e-06, + "loss": 2.0622, + "step": 18469 + }, + { + "epoch": 0.61, + "grad_norm": 0.764533519744873, + "learning_rate": 6.672462070334904e-06, + "loss": 2.1465, + "step": 18470 + }, + { + "epoch": 0.61, + "grad_norm": 0.7607544660568237, + "learning_rate": 6.671459766489754e-06, + "loss": 2.0871, + "step": 18471 + }, + { + "epoch": 0.61, + "grad_norm": 0.7826836109161377, + "learning_rate": 6.670457500248497e-06, + "loss": 2.094, + "step": 18472 + }, + { + "epoch": 0.61, + "grad_norm": 0.8203714489936829, + "learning_rate": 6.66945527162246e-06, + "loss": 2.0204, + "step": 18473 + }, + { + "epoch": 0.61, + "grad_norm": 0.7409281730651855, + "learning_rate": 6.6684530806229654e-06, + "loss": 1.9983, + "step": 18474 + }, + { + "epoch": 0.61, + "grad_norm": 0.7420737147331238, + "learning_rate": 6.6674509272613364e-06, + "loss": 2.047, + "step": 18475 + }, + { + "epoch": 0.61, + "grad_norm": 0.7578684091567993, + "learning_rate": 6.666448811548895e-06, + "loss": 2.0877, + "step": 18476 + }, + { + "epoch": 0.61, + "grad_norm": 0.7497484087944031, + "learning_rate": 6.665446733496958e-06, + "loss": 1.998, + "step": 18477 + }, + { + "epoch": 0.61, + "grad_norm": 0.743179202079773, + "learning_rate": 6.664444693116853e-06, + "loss": 2.1187, + "step": 18478 + }, + { + "epoch": 0.61, + "grad_norm": 0.7174262404441833, + "learning_rate": 6.663442690419895e-06, + "loss": 2.0759, + "step": 18479 + }, + { + "epoch": 0.61, + "grad_norm": 0.74409419298172, + "learning_rate": 6.662440725417409e-06, + "loss": 2.0075, + "step": 18480 + }, + { + "epoch": 0.61, + "grad_norm": 0.711876392364502, + "learning_rate": 6.661438798120707e-06, + "loss": 2.0686, + "step": 18481 + }, + { + "epoch": 0.61, + "grad_norm": 0.7251047492027283, + "learning_rate": 6.6604369085411146e-06, + "loss": 1.9935, + "step": 18482 + }, + { + "epoch": 0.61, + "grad_norm": 0.8006262183189392, + "learning_rate": 6.659435056689949e-06, + "loss": 1.9989, + "step": 18483 + }, + { + "epoch": 0.61, + "grad_norm": 0.7704413533210754, + "learning_rate": 6.658433242578528e-06, + "loss": 2.0244, + "step": 18484 + }, + { + "epoch": 0.61, + "grad_norm": 0.7554044723510742, + "learning_rate": 6.6574314662181715e-06, + "loss": 2.0108, + "step": 18485 + }, + { + "epoch": 0.62, + "grad_norm": 0.7527334094047546, + "learning_rate": 6.656429727620195e-06, + "loss": 2.0894, + "step": 18486 + }, + { + "epoch": 0.62, + "grad_norm": 0.7796379923820496, + "learning_rate": 6.655428026795916e-06, + "loss": 2.0312, + "step": 18487 + }, + { + "epoch": 0.62, + "grad_norm": 0.7374343276023865, + "learning_rate": 6.654426363756647e-06, + "loss": 2.084, + "step": 18488 + }, + { + "epoch": 0.62, + "grad_norm": 0.738320529460907, + "learning_rate": 6.6534247385137164e-06, + "loss": 2.046, + "step": 18489 + }, + { + "epoch": 0.62, + "grad_norm": 0.7060432434082031, + "learning_rate": 6.652423151078424e-06, + "loss": 2.0325, + "step": 18490 + }, + { + "epoch": 0.62, + "grad_norm": 0.8049686551094055, + "learning_rate": 6.651421601462096e-06, + "loss": 2.0477, + "step": 18491 + }, + { + "epoch": 0.62, + "grad_norm": 0.7570608854293823, + "learning_rate": 6.650420089676044e-06, + "loss": 2.0288, + "step": 18492 + }, + { + "epoch": 0.62, + "grad_norm": 0.7236107587814331, + "learning_rate": 6.649418615731582e-06, + "loss": 2.1105, + "step": 18493 + }, + { + "epoch": 0.62, + "grad_norm": 0.7687537670135498, + "learning_rate": 6.648417179640029e-06, + "loss": 2.0354, + "step": 18494 + }, + { + "epoch": 0.62, + "grad_norm": 0.702089250087738, + "learning_rate": 6.64741578141269e-06, + "loss": 2.069, + "step": 18495 + }, + { + "epoch": 0.62, + "grad_norm": 0.7813162207603455, + "learning_rate": 6.646414421060883e-06, + "loss": 2.06, + "step": 18496 + }, + { + "epoch": 0.62, + "grad_norm": 0.7599120140075684, + "learning_rate": 6.6454130985959205e-06, + "loss": 2.1108, + "step": 18497 + }, + { + "epoch": 0.62, + "grad_norm": 0.7184450030326843, + "learning_rate": 6.644411814029118e-06, + "loss": 2.0256, + "step": 18498 + }, + { + "epoch": 0.62, + "grad_norm": 0.748634934425354, + "learning_rate": 6.643410567371782e-06, + "loss": 2.1011, + "step": 18499 + }, + { + "epoch": 0.62, + "grad_norm": 0.7581062316894531, + "learning_rate": 6.642409358635227e-06, + "loss": 2.0223, + "step": 18500 + }, + { + "epoch": 0.62, + "grad_norm": 0.7567332983016968, + "learning_rate": 6.641408187830762e-06, + "loss": 2.051, + "step": 18501 + }, + { + "epoch": 0.62, + "grad_norm": 0.724748432636261, + "learning_rate": 6.640407054969702e-06, + "loss": 2.0302, + "step": 18502 + }, + { + "epoch": 0.62, + "grad_norm": 0.716160774230957, + "learning_rate": 6.639405960063351e-06, + "loss": 2.1197, + "step": 18503 + }, + { + "epoch": 0.62, + "grad_norm": 0.7451995015144348, + "learning_rate": 6.63840490312302e-06, + "loss": 2.1023, + "step": 18504 + }, + { + "epoch": 0.62, + "grad_norm": 0.7319174408912659, + "learning_rate": 6.637403884160023e-06, + "loss": 2.045, + "step": 18505 + }, + { + "epoch": 0.62, + "grad_norm": 0.729198157787323, + "learning_rate": 6.636402903185666e-06, + "loss": 2.0904, + "step": 18506 + }, + { + "epoch": 0.62, + "grad_norm": 0.723279595375061, + "learning_rate": 6.6354019602112605e-06, + "loss": 2.0464, + "step": 18507 + }, + { + "epoch": 0.62, + "grad_norm": 0.755120038986206, + "learning_rate": 6.63440105524811e-06, + "loss": 2.0741, + "step": 18508 + }, + { + "epoch": 0.62, + "grad_norm": 0.7346803545951843, + "learning_rate": 6.633400188307523e-06, + "loss": 2.0764, + "step": 18509 + }, + { + "epoch": 0.62, + "grad_norm": 0.7358288168907166, + "learning_rate": 6.632399359400805e-06, + "loss": 2.0336, + "step": 18510 + }, + { + "epoch": 0.62, + "grad_norm": 0.7369973063468933, + "learning_rate": 6.631398568539273e-06, + "loss": 2.0748, + "step": 18511 + }, + { + "epoch": 0.62, + "grad_norm": 0.7449140548706055, + "learning_rate": 6.630397815734219e-06, + "loss": 2.075, + "step": 18512 + }, + { + "epoch": 0.62, + "grad_norm": 0.7668919563293457, + "learning_rate": 6.6293971009969574e-06, + "loss": 2.089, + "step": 18513 + }, + { + "epoch": 0.62, + "grad_norm": 0.740190863609314, + "learning_rate": 6.6283964243387925e-06, + "loss": 2.0374, + "step": 18514 + }, + { + "epoch": 0.62, + "grad_norm": 0.7501717209815979, + "learning_rate": 6.627395785771029e-06, + "loss": 2.0612, + "step": 18515 + }, + { + "epoch": 0.62, + "grad_norm": 0.7683904767036438, + "learning_rate": 6.626395185304972e-06, + "loss": 2.1385, + "step": 18516 + }, + { + "epoch": 0.62, + "grad_norm": 0.7253684401512146, + "learning_rate": 6.625394622951924e-06, + "loss": 2.0448, + "step": 18517 + }, + { + "epoch": 0.62, + "grad_norm": 0.7418583035469055, + "learning_rate": 6.624394098723188e-06, + "loss": 2.0304, + "step": 18518 + }, + { + "epoch": 0.62, + "grad_norm": 0.738362193107605, + "learning_rate": 6.6233936126300715e-06, + "loss": 2.1297, + "step": 18519 + }, + { + "epoch": 0.62, + "grad_norm": 0.7750780582427979, + "learning_rate": 6.622393164683877e-06, + "loss": 2.0816, + "step": 18520 + }, + { + "epoch": 0.62, + "grad_norm": 0.7565974593162537, + "learning_rate": 6.621392754895902e-06, + "loss": 2.0151, + "step": 18521 + }, + { + "epoch": 0.62, + "grad_norm": 0.7451688051223755, + "learning_rate": 6.6203923832774534e-06, + "loss": 2.0805, + "step": 18522 + }, + { + "epoch": 0.62, + "grad_norm": 0.7435022592544556, + "learning_rate": 6.61939204983983e-06, + "loss": 2.087, + "step": 18523 + }, + { + "epoch": 0.62, + "grad_norm": 0.7271678447723389, + "learning_rate": 6.618391754594331e-06, + "loss": 2.0815, + "step": 18524 + }, + { + "epoch": 0.62, + "grad_norm": 0.7610463500022888, + "learning_rate": 6.617391497552268e-06, + "loss": 2.0309, + "step": 18525 + }, + { + "epoch": 0.62, + "grad_norm": 0.7413537502288818, + "learning_rate": 6.616391278724925e-06, + "loss": 2.1002, + "step": 18526 + }, + { + "epoch": 0.62, + "grad_norm": 0.7352351546287537, + "learning_rate": 6.615391098123615e-06, + "loss": 2.0511, + "step": 18527 + }, + { + "epoch": 0.62, + "grad_norm": 0.7370828986167908, + "learning_rate": 6.614390955759631e-06, + "loss": 2.107, + "step": 18528 + }, + { + "epoch": 0.62, + "grad_norm": 0.7311270833015442, + "learning_rate": 6.613390851644277e-06, + "loss": 2.0287, + "step": 18529 + }, + { + "epoch": 0.62, + "grad_norm": 0.7523303627967834, + "learning_rate": 6.6123907857888455e-06, + "loss": 2.019, + "step": 18530 + }, + { + "epoch": 0.62, + "grad_norm": 0.7598008513450623, + "learning_rate": 6.6113907582046386e-06, + "loss": 2.1078, + "step": 18531 + }, + { + "epoch": 0.62, + "grad_norm": 0.7564973831176758, + "learning_rate": 6.61039076890295e-06, + "loss": 2.0271, + "step": 18532 + }, + { + "epoch": 0.62, + "grad_norm": 0.7365027070045471, + "learning_rate": 6.6093908178950875e-06, + "loss": 2.0824, + "step": 18533 + }, + { + "epoch": 0.62, + "grad_norm": 0.7344740629196167, + "learning_rate": 6.608390905192332e-06, + "loss": 2.0873, + "step": 18534 + }, + { + "epoch": 0.62, + "grad_norm": 0.7407451868057251, + "learning_rate": 6.607391030805992e-06, + "loss": 2.0686, + "step": 18535 + }, + { + "epoch": 0.62, + "grad_norm": 0.7146464586257935, + "learning_rate": 6.606391194747359e-06, + "loss": 2.0567, + "step": 18536 + }, + { + "epoch": 0.62, + "grad_norm": 0.7395581007003784, + "learning_rate": 6.605391397027728e-06, + "loss": 2.046, + "step": 18537 + }, + { + "epoch": 0.62, + "grad_norm": 0.7429301738739014, + "learning_rate": 6.604391637658403e-06, + "loss": 2.1161, + "step": 18538 + }, + { + "epoch": 0.62, + "grad_norm": 0.751114010810852, + "learning_rate": 6.603391916650665e-06, + "loss": 2.1003, + "step": 18539 + }, + { + "epoch": 0.62, + "grad_norm": 0.779699444770813, + "learning_rate": 6.602392234015813e-06, + "loss": 2.1002, + "step": 18540 + }, + { + "epoch": 0.62, + "grad_norm": 0.7516820430755615, + "learning_rate": 6.601392589765145e-06, + "loss": 1.9803, + "step": 18541 + }, + { + "epoch": 0.62, + "grad_norm": 0.764054536819458, + "learning_rate": 6.600392983909953e-06, + "loss": 1.9509, + "step": 18542 + }, + { + "epoch": 0.62, + "grad_norm": 0.7106245160102844, + "learning_rate": 6.5993934164615285e-06, + "loss": 2.0499, + "step": 18543 + }, + { + "epoch": 0.62, + "grad_norm": 0.7369493246078491, + "learning_rate": 6.5983938874311615e-06, + "loss": 2.0224, + "step": 18544 + }, + { + "epoch": 0.62, + "grad_norm": 0.7746315002441406, + "learning_rate": 6.597394396830146e-06, + "loss": 2.1313, + "step": 18545 + }, + { + "epoch": 0.62, + "grad_norm": 0.7222324013710022, + "learning_rate": 6.596394944669777e-06, + "loss": 2.0667, + "step": 18546 + }, + { + "epoch": 0.62, + "grad_norm": 0.7578703165054321, + "learning_rate": 6.595395530961346e-06, + "loss": 2.1307, + "step": 18547 + }, + { + "epoch": 0.62, + "grad_norm": 0.7602728009223938, + "learning_rate": 6.594396155716136e-06, + "loss": 2.0473, + "step": 18548 + }, + { + "epoch": 0.62, + "grad_norm": 0.7471659183502197, + "learning_rate": 6.593396818945444e-06, + "loss": 2.103, + "step": 18549 + }, + { + "epoch": 0.62, + "grad_norm": 0.7322835326194763, + "learning_rate": 6.592397520660559e-06, + "loss": 1.976, + "step": 18550 + }, + { + "epoch": 0.62, + "grad_norm": 0.7507852911949158, + "learning_rate": 6.591398260872772e-06, + "loss": 2.0413, + "step": 18551 + }, + { + "epoch": 0.62, + "grad_norm": 0.7564936280250549, + "learning_rate": 6.590399039593367e-06, + "loss": 2.0494, + "step": 18552 + }, + { + "epoch": 0.62, + "grad_norm": 0.7578199505805969, + "learning_rate": 6.589399856833634e-06, + "loss": 2.0612, + "step": 18553 + }, + { + "epoch": 0.62, + "grad_norm": 0.7551158666610718, + "learning_rate": 6.588400712604863e-06, + "loss": 2.0606, + "step": 18554 + }, + { + "epoch": 0.62, + "grad_norm": 0.7667950987815857, + "learning_rate": 6.587401606918346e-06, + "loss": 1.9682, + "step": 18555 + }, + { + "epoch": 0.62, + "grad_norm": 0.7982816696166992, + "learning_rate": 6.586402539785361e-06, + "loss": 2.1332, + "step": 18556 + }, + { + "epoch": 0.62, + "grad_norm": 0.7543305158615112, + "learning_rate": 6.585403511217201e-06, + "loss": 2.0477, + "step": 18557 + }, + { + "epoch": 0.62, + "grad_norm": 0.7680090069770813, + "learning_rate": 6.58440452122515e-06, + "loss": 2.079, + "step": 18558 + }, + { + "epoch": 0.62, + "grad_norm": 0.7442122101783752, + "learning_rate": 6.583405569820493e-06, + "loss": 2.0377, + "step": 18559 + }, + { + "epoch": 0.62, + "grad_norm": 0.7470167279243469, + "learning_rate": 6.582406657014524e-06, + "loss": 2.1016, + "step": 18560 + }, + { + "epoch": 0.62, + "grad_norm": 0.7974061369895935, + "learning_rate": 6.581407782818517e-06, + "loss": 2.1421, + "step": 18561 + }, + { + "epoch": 0.62, + "grad_norm": 0.7259982228279114, + "learning_rate": 6.580408947243762e-06, + "loss": 2.0858, + "step": 18562 + }, + { + "epoch": 0.62, + "grad_norm": 0.7713364958763123, + "learning_rate": 6.579410150301542e-06, + "loss": 2.0368, + "step": 18563 + }, + { + "epoch": 0.62, + "grad_norm": 0.7410104870796204, + "learning_rate": 6.578411392003145e-06, + "loss": 2.1292, + "step": 18564 + }, + { + "epoch": 0.62, + "grad_norm": 0.7354673147201538, + "learning_rate": 6.5774126723598485e-06, + "loss": 2.0171, + "step": 18565 + }, + { + "epoch": 0.62, + "grad_norm": 0.7544422745704651, + "learning_rate": 6.5764139913829375e-06, + "loss": 2.0134, + "step": 18566 + }, + { + "epoch": 0.62, + "grad_norm": 0.7577709555625916, + "learning_rate": 6.575415349083691e-06, + "loss": 1.9614, + "step": 18567 + }, + { + "epoch": 0.62, + "grad_norm": 0.7345360517501831, + "learning_rate": 6.5744167454734e-06, + "loss": 2.1248, + "step": 18568 + }, + { + "epoch": 0.62, + "grad_norm": 0.7373293042182922, + "learning_rate": 6.573418180563341e-06, + "loss": 2.1422, + "step": 18569 + }, + { + "epoch": 0.62, + "grad_norm": 0.7358099818229675, + "learning_rate": 6.5724196543647945e-06, + "loss": 2.0634, + "step": 18570 + }, + { + "epoch": 0.62, + "grad_norm": 0.7530602812767029, + "learning_rate": 6.57142116688904e-06, + "loss": 2.0213, + "step": 18571 + }, + { + "epoch": 0.62, + "grad_norm": 0.7092524170875549, + "learning_rate": 6.570422718147362e-06, + "loss": 1.9974, + "step": 18572 + }, + { + "epoch": 0.62, + "grad_norm": 0.7512563467025757, + "learning_rate": 6.56942430815104e-06, + "loss": 2.0375, + "step": 18573 + }, + { + "epoch": 0.62, + "grad_norm": 0.7542651295661926, + "learning_rate": 6.568425936911349e-06, + "loss": 2.079, + "step": 18574 + }, + { + "epoch": 0.62, + "grad_norm": 0.7435977458953857, + "learning_rate": 6.567427604439569e-06, + "loss": 2.0053, + "step": 18575 + }, + { + "epoch": 0.62, + "grad_norm": 0.7171084880828857, + "learning_rate": 6.5664293107469816e-06, + "loss": 2.0331, + "step": 18576 + }, + { + "epoch": 0.62, + "grad_norm": 0.7775682210922241, + "learning_rate": 6.565431055844864e-06, + "loss": 2.0395, + "step": 18577 + }, + { + "epoch": 0.62, + "grad_norm": 0.7534427642822266, + "learning_rate": 6.564432839744494e-06, + "loss": 2.0958, + "step": 18578 + }, + { + "epoch": 0.62, + "grad_norm": 0.7390238046646118, + "learning_rate": 6.563434662457149e-06, + "loss": 2.0276, + "step": 18579 + }, + { + "epoch": 0.62, + "grad_norm": 0.7491191029548645, + "learning_rate": 6.562436523994105e-06, + "loss": 2.0514, + "step": 18580 + }, + { + "epoch": 0.62, + "grad_norm": 0.7336969971656799, + "learning_rate": 6.561438424366634e-06, + "loss": 2.0236, + "step": 18581 + }, + { + "epoch": 0.62, + "grad_norm": 0.7265817523002625, + "learning_rate": 6.560440363586025e-06, + "loss": 2.0702, + "step": 18582 + }, + { + "epoch": 0.62, + "grad_norm": 0.7386250495910645, + "learning_rate": 6.559442341663538e-06, + "loss": 2.0736, + "step": 18583 + }, + { + "epoch": 0.62, + "grad_norm": 0.7195847034454346, + "learning_rate": 6.558444358610457e-06, + "loss": 2.0197, + "step": 18584 + }, + { + "epoch": 0.62, + "grad_norm": 0.7293532490730286, + "learning_rate": 6.557446414438053e-06, + "loss": 2.1337, + "step": 18585 + }, + { + "epoch": 0.62, + "grad_norm": 0.7479192018508911, + "learning_rate": 6.556448509157607e-06, + "loss": 2.0234, + "step": 18586 + }, + { + "epoch": 0.62, + "grad_norm": 0.7440563440322876, + "learning_rate": 6.555450642780383e-06, + "loss": 2.0098, + "step": 18587 + }, + { + "epoch": 0.62, + "grad_norm": 0.7220137715339661, + "learning_rate": 6.55445281531766e-06, + "loss": 1.9824, + "step": 18588 + }, + { + "epoch": 0.62, + "grad_norm": 0.7574236392974854, + "learning_rate": 6.553455026780709e-06, + "loss": 2.0997, + "step": 18589 + }, + { + "epoch": 0.62, + "grad_norm": 0.7628411650657654, + "learning_rate": 6.552457277180804e-06, + "loss": 1.9646, + "step": 18590 + }, + { + "epoch": 0.62, + "grad_norm": 0.7317736148834229, + "learning_rate": 6.551459566529218e-06, + "loss": 2.0694, + "step": 18591 + }, + { + "epoch": 0.62, + "grad_norm": 0.7322997450828552, + "learning_rate": 6.550461894837219e-06, + "loss": 2.0876, + "step": 18592 + }, + { + "epoch": 0.62, + "grad_norm": 0.7850541472434998, + "learning_rate": 6.54946426211608e-06, + "loss": 2.1307, + "step": 18593 + }, + { + "epoch": 0.62, + "grad_norm": 0.7619250416755676, + "learning_rate": 6.548466668377072e-06, + "loss": 2.0039, + "step": 18594 + }, + { + "epoch": 0.62, + "grad_norm": 0.7589712738990784, + "learning_rate": 6.547469113631466e-06, + "loss": 2.0307, + "step": 18595 + }, + { + "epoch": 0.62, + "grad_norm": 0.7439185976982117, + "learning_rate": 6.546471597890529e-06, + "loss": 2.0712, + "step": 18596 + }, + { + "epoch": 0.62, + "grad_norm": 0.7332115769386292, + "learning_rate": 6.54547412116553e-06, + "loss": 2.0485, + "step": 18597 + }, + { + "epoch": 0.62, + "grad_norm": 0.7299757599830627, + "learning_rate": 6.544476683467742e-06, + "loss": 2.1653, + "step": 18598 + }, + { + "epoch": 0.62, + "grad_norm": 0.7329026460647583, + "learning_rate": 6.543479284808429e-06, + "loss": 1.9965, + "step": 18599 + }, + { + "epoch": 0.62, + "grad_norm": 0.7692344784736633, + "learning_rate": 6.542481925198867e-06, + "loss": 2.1086, + "step": 18600 + }, + { + "epoch": 0.62, + "grad_norm": 0.7431564331054688, + "learning_rate": 6.541484604650314e-06, + "loss": 2.0737, + "step": 18601 + }, + { + "epoch": 0.62, + "grad_norm": 0.7647882699966431, + "learning_rate": 6.540487323174041e-06, + "loss": 2.1082, + "step": 18602 + }, + { + "epoch": 0.62, + "grad_norm": 0.7459238171577454, + "learning_rate": 6.539490080781312e-06, + "loss": 2.1055, + "step": 18603 + }, + { + "epoch": 0.62, + "grad_norm": 0.7486720085144043, + "learning_rate": 6.538492877483405e-06, + "loss": 2.1142, + "step": 18604 + }, + { + "epoch": 0.62, + "grad_norm": 0.7434862852096558, + "learning_rate": 6.537495713291569e-06, + "loss": 2.09, + "step": 18605 + }, + { + "epoch": 0.62, + "grad_norm": 0.7574500441551208, + "learning_rate": 6.536498588217077e-06, + "loss": 2.0853, + "step": 18606 + }, + { + "epoch": 0.62, + "grad_norm": 0.7360900044441223, + "learning_rate": 6.535501502271198e-06, + "loss": 2.017, + "step": 18607 + }, + { + "epoch": 0.62, + "grad_norm": 0.7576977610588074, + "learning_rate": 6.534504455465189e-06, + "loss": 2.0722, + "step": 18608 + }, + { + "epoch": 0.62, + "grad_norm": 0.7284348607063293, + "learning_rate": 6.533507447810322e-06, + "loss": 1.9841, + "step": 18609 + }, + { + "epoch": 0.62, + "grad_norm": 0.7146414518356323, + "learning_rate": 6.532510479317854e-06, + "loss": 2.0287, + "step": 18610 + }, + { + "epoch": 0.62, + "grad_norm": 0.7438479661941528, + "learning_rate": 6.531513549999048e-06, + "loss": 2.0424, + "step": 18611 + }, + { + "epoch": 0.62, + "grad_norm": 0.7490966320037842, + "learning_rate": 6.530516659865171e-06, + "loss": 2.0965, + "step": 18612 + }, + { + "epoch": 0.62, + "grad_norm": 0.7352954149246216, + "learning_rate": 6.529519808927487e-06, + "loss": 2.1059, + "step": 18613 + }, + { + "epoch": 0.62, + "grad_norm": 0.7383978366851807, + "learning_rate": 6.528522997197251e-06, + "loss": 2.01, + "step": 18614 + }, + { + "epoch": 0.62, + "grad_norm": 0.7254314422607422, + "learning_rate": 6.527526224685727e-06, + "loss": 2.0492, + "step": 18615 + }, + { + "epoch": 0.62, + "grad_norm": 0.7378753423690796, + "learning_rate": 6.5265294914041775e-06, + "loss": 2.043, + "step": 18616 + }, + { + "epoch": 0.62, + "grad_norm": 0.7849466800689697, + "learning_rate": 6.525532797363865e-06, + "loss": 2.1109, + "step": 18617 + }, + { + "epoch": 0.62, + "grad_norm": 0.7531061768531799, + "learning_rate": 6.524536142576043e-06, + "loss": 2.0784, + "step": 18618 + }, + { + "epoch": 0.62, + "grad_norm": 0.7506963610649109, + "learning_rate": 6.523539527051974e-06, + "loss": 2.0142, + "step": 18619 + }, + { + "epoch": 0.62, + "grad_norm": 0.7287465929985046, + "learning_rate": 6.5225429508029194e-06, + "loss": 2.1078, + "step": 18620 + }, + { + "epoch": 0.62, + "grad_norm": 0.7326143383979797, + "learning_rate": 6.521546413840137e-06, + "loss": 2.0994, + "step": 18621 + }, + { + "epoch": 0.62, + "grad_norm": 0.7535800337791443, + "learning_rate": 6.520549916174888e-06, + "loss": 1.9911, + "step": 18622 + }, + { + "epoch": 0.62, + "grad_norm": 0.81272953748703, + "learning_rate": 6.519553457818421e-06, + "loss": 2.0249, + "step": 18623 + }, + { + "epoch": 0.62, + "grad_norm": 0.7339419722557068, + "learning_rate": 6.518557038782003e-06, + "loss": 2.0275, + "step": 18624 + }, + { + "epoch": 0.62, + "grad_norm": 0.8154991865158081, + "learning_rate": 6.517560659076883e-06, + "loss": 2.0438, + "step": 18625 + }, + { + "epoch": 0.62, + "grad_norm": 0.7350296378135681, + "learning_rate": 6.516564318714329e-06, + "loss": 1.9909, + "step": 18626 + }, + { + "epoch": 0.62, + "grad_norm": 0.7530209422111511, + "learning_rate": 6.515568017705585e-06, + "loss": 2.053, + "step": 18627 + }, + { + "epoch": 0.62, + "grad_norm": 0.752149224281311, + "learning_rate": 6.514571756061911e-06, + "loss": 2.1196, + "step": 18628 + }, + { + "epoch": 0.62, + "grad_norm": 0.7609082460403442, + "learning_rate": 6.513575533794564e-06, + "loss": 2.0659, + "step": 18629 + }, + { + "epoch": 0.62, + "grad_norm": 0.7366597652435303, + "learning_rate": 6.512579350914796e-06, + "loss": 2.0386, + "step": 18630 + }, + { + "epoch": 0.62, + "grad_norm": 0.7405533790588379, + "learning_rate": 6.511583207433867e-06, + "loss": 2.0984, + "step": 18631 + }, + { + "epoch": 0.62, + "grad_norm": 0.7394817471504211, + "learning_rate": 6.510587103363022e-06, + "loss": 2.0218, + "step": 18632 + }, + { + "epoch": 0.62, + "grad_norm": 0.7754571437835693, + "learning_rate": 6.509591038713519e-06, + "loss": 2.0161, + "step": 18633 + }, + { + "epoch": 0.62, + "grad_norm": 0.7264158725738525, + "learning_rate": 6.5085950134966114e-06, + "loss": 2.0168, + "step": 18634 + }, + { + "epoch": 0.62, + "grad_norm": 0.7486907839775085, + "learning_rate": 6.507599027723554e-06, + "loss": 2.0733, + "step": 18635 + }, + { + "epoch": 0.62, + "grad_norm": 0.7322481870651245, + "learning_rate": 6.506603081405593e-06, + "loss": 2.0795, + "step": 18636 + }, + { + "epoch": 0.62, + "grad_norm": 0.781262218952179, + "learning_rate": 6.505607174553985e-06, + "loss": 2.0985, + "step": 18637 + }, + { + "epoch": 0.62, + "grad_norm": 0.7223289012908936, + "learning_rate": 6.504611307179976e-06, + "loss": 2.0166, + "step": 18638 + }, + { + "epoch": 0.62, + "grad_norm": 0.7323281764984131, + "learning_rate": 6.5036154792948215e-06, + "loss": 2.0576, + "step": 18639 + }, + { + "epoch": 0.62, + "grad_norm": 0.7427610754966736, + "learning_rate": 6.502619690909775e-06, + "loss": 2.1117, + "step": 18640 + }, + { + "epoch": 0.62, + "grad_norm": 0.7252446413040161, + "learning_rate": 6.501623942036075e-06, + "loss": 2.0444, + "step": 18641 + }, + { + "epoch": 0.62, + "grad_norm": 0.7693911790847778, + "learning_rate": 6.50062823268498e-06, + "loss": 2.0276, + "step": 18642 + }, + { + "epoch": 0.62, + "grad_norm": 0.7529953122138977, + "learning_rate": 6.499632562867736e-06, + "loss": 2.0136, + "step": 18643 + }, + { + "epoch": 0.62, + "grad_norm": 0.7839812636375427, + "learning_rate": 6.498636932595594e-06, + "loss": 1.9861, + "step": 18644 + }, + { + "epoch": 0.62, + "grad_norm": 0.7708064317703247, + "learning_rate": 6.497641341879799e-06, + "loss": 2.0608, + "step": 18645 + }, + { + "epoch": 0.62, + "grad_norm": 0.7637760639190674, + "learning_rate": 6.496645790731595e-06, + "loss": 2.0478, + "step": 18646 + }, + { + "epoch": 0.62, + "grad_norm": 0.7299715280532837, + "learning_rate": 6.495650279162237e-06, + "loss": 2.0025, + "step": 18647 + }, + { + "epoch": 0.62, + "grad_norm": 0.7631213665008545, + "learning_rate": 6.494654807182974e-06, + "loss": 2.0233, + "step": 18648 + }, + { + "epoch": 0.62, + "grad_norm": 0.7270708084106445, + "learning_rate": 6.493659374805039e-06, + "loss": 2.0337, + "step": 18649 + }, + { + "epoch": 0.62, + "grad_norm": 0.7305204272270203, + "learning_rate": 6.492663982039687e-06, + "loss": 2.0174, + "step": 18650 + }, + { + "epoch": 0.62, + "grad_norm": 0.7252962589263916, + "learning_rate": 6.4916686288981626e-06, + "loss": 2.0308, + "step": 18651 + }, + { + "epoch": 0.62, + "grad_norm": 0.7339796423912048, + "learning_rate": 6.490673315391709e-06, + "loss": 2.0419, + "step": 18652 + }, + { + "epoch": 0.62, + "grad_norm": 0.7314265966415405, + "learning_rate": 6.489678041531575e-06, + "loss": 2.0869, + "step": 18653 + }, + { + "epoch": 0.62, + "grad_norm": 0.7639754414558411, + "learning_rate": 6.488682807328998e-06, + "loss": 2.0728, + "step": 18654 + }, + { + "epoch": 0.62, + "grad_norm": 0.7587751746177673, + "learning_rate": 6.487687612795225e-06, + "loss": 2.0064, + "step": 18655 + }, + { + "epoch": 0.62, + "grad_norm": 0.7325262427330017, + "learning_rate": 6.486692457941499e-06, + "loss": 2.0074, + "step": 18656 + }, + { + "epoch": 0.62, + "grad_norm": 0.7487002611160278, + "learning_rate": 6.485697342779066e-06, + "loss": 2.0582, + "step": 18657 + }, + { + "epoch": 0.62, + "grad_norm": 0.7500911355018616, + "learning_rate": 6.484702267319162e-06, + "loss": 2.0243, + "step": 18658 + }, + { + "epoch": 0.62, + "grad_norm": 0.736762285232544, + "learning_rate": 6.483707231573031e-06, + "loss": 2.0751, + "step": 18659 + }, + { + "epoch": 0.62, + "grad_norm": 0.7285959720611572, + "learning_rate": 6.482712235551913e-06, + "loss": 2.0565, + "step": 18660 + }, + { + "epoch": 0.62, + "grad_norm": 0.7394577264785767, + "learning_rate": 6.481717279267055e-06, + "loss": 2.0657, + "step": 18661 + }, + { + "epoch": 0.62, + "grad_norm": 0.7303134202957153, + "learning_rate": 6.480722362729692e-06, + "loss": 2.0865, + "step": 18662 + }, + { + "epoch": 0.62, + "grad_norm": 0.749218761920929, + "learning_rate": 6.479727485951066e-06, + "loss": 2.114, + "step": 18663 + }, + { + "epoch": 0.62, + "grad_norm": 0.759756863117218, + "learning_rate": 6.478732648942414e-06, + "loss": 2.0115, + "step": 18664 + }, + { + "epoch": 0.62, + "grad_norm": 0.7273774147033691, + "learning_rate": 6.477737851714978e-06, + "loss": 2.0354, + "step": 18665 + }, + { + "epoch": 0.62, + "grad_norm": 0.7539119720458984, + "learning_rate": 6.476743094279996e-06, + "loss": 2.0247, + "step": 18666 + }, + { + "epoch": 0.62, + "grad_norm": 0.7333539128303528, + "learning_rate": 6.475748376648705e-06, + "loss": 2.0584, + "step": 18667 + }, + { + "epoch": 0.62, + "grad_norm": 0.7649315595626831, + "learning_rate": 6.474753698832341e-06, + "loss": 2.0007, + "step": 18668 + }, + { + "epoch": 0.62, + "grad_norm": 0.7428023219108582, + "learning_rate": 6.473759060842146e-06, + "loss": 2.0572, + "step": 18669 + }, + { + "epoch": 0.62, + "grad_norm": 0.7401738166809082, + "learning_rate": 6.472764462689354e-06, + "loss": 2.0553, + "step": 18670 + }, + { + "epoch": 0.62, + "grad_norm": 0.7846989035606384, + "learning_rate": 6.471769904385205e-06, + "loss": 2.059, + "step": 18671 + }, + { + "epoch": 0.62, + "grad_norm": 0.7468416094779968, + "learning_rate": 6.470775385940928e-06, + "loss": 2.0536, + "step": 18672 + }, + { + "epoch": 0.62, + "grad_norm": 0.734138548374176, + "learning_rate": 6.4697809073677645e-06, + "loss": 2.0694, + "step": 18673 + }, + { + "epoch": 0.62, + "grad_norm": 0.7326032519340515, + "learning_rate": 6.468786468676944e-06, + "loss": 2.0581, + "step": 18674 + }, + { + "epoch": 0.62, + "grad_norm": 0.7980799674987793, + "learning_rate": 6.4677920698797115e-06, + "loss": 2.0984, + "step": 18675 + }, + { + "epoch": 0.62, + "grad_norm": 0.7337396740913391, + "learning_rate": 6.466797710987287e-06, + "loss": 2.0581, + "step": 18676 + }, + { + "epoch": 0.62, + "grad_norm": 0.7542121410369873, + "learning_rate": 6.4658033920109145e-06, + "loss": 2.0279, + "step": 18677 + }, + { + "epoch": 0.62, + "grad_norm": 0.7093291878700256, + "learning_rate": 6.464809112961823e-06, + "loss": 2.0322, + "step": 18678 + }, + { + "epoch": 0.62, + "grad_norm": 0.7377110719680786, + "learning_rate": 6.4638148738512485e-06, + "loss": 2.1181, + "step": 18679 + }, + { + "epoch": 0.62, + "grad_norm": 0.7264137268066406, + "learning_rate": 6.4628206746904195e-06, + "loss": 2.0391, + "step": 18680 + }, + { + "epoch": 0.62, + "grad_norm": 0.7288026213645935, + "learning_rate": 6.46182651549057e-06, + "loss": 1.9488, + "step": 18681 + }, + { + "epoch": 0.62, + "grad_norm": 0.7632126212120056, + "learning_rate": 6.460832396262929e-06, + "loss": 2.0333, + "step": 18682 + }, + { + "epoch": 0.62, + "grad_norm": 0.707691490650177, + "learning_rate": 6.45983831701873e-06, + "loss": 2.0451, + "step": 18683 + }, + { + "epoch": 0.62, + "grad_norm": 0.7263169884681702, + "learning_rate": 6.458844277769205e-06, + "loss": 2.0973, + "step": 18684 + }, + { + "epoch": 0.62, + "grad_norm": 0.8025210499763489, + "learning_rate": 6.457850278525581e-06, + "loss": 2.1304, + "step": 18685 + }, + { + "epoch": 0.62, + "grad_norm": 0.7579771280288696, + "learning_rate": 6.456856319299089e-06, + "loss": 2.0455, + "step": 18686 + }, + { + "epoch": 0.62, + "grad_norm": 0.7365447282791138, + "learning_rate": 6.4558624001009565e-06, + "loss": 2.0863, + "step": 18687 + }, + { + "epoch": 0.62, + "grad_norm": 0.7418259382247925, + "learning_rate": 6.4548685209424164e-06, + "loss": 2.1442, + "step": 18688 + }, + { + "epoch": 0.62, + "grad_norm": 0.7231990694999695, + "learning_rate": 6.453874681834692e-06, + "loss": 2.0924, + "step": 18689 + }, + { + "epoch": 0.62, + "grad_norm": 0.7420551180839539, + "learning_rate": 6.452880882789011e-06, + "loss": 2.0362, + "step": 18690 + }, + { + "epoch": 0.62, + "grad_norm": 0.7739832401275635, + "learning_rate": 6.451887123816605e-06, + "loss": 2.1134, + "step": 18691 + }, + { + "epoch": 0.62, + "grad_norm": 0.7566604018211365, + "learning_rate": 6.450893404928699e-06, + "loss": 2.0859, + "step": 18692 + }, + { + "epoch": 0.62, + "grad_norm": 0.7582191228866577, + "learning_rate": 6.449899726136521e-06, + "loss": 2.0834, + "step": 18693 + }, + { + "epoch": 0.62, + "grad_norm": 0.7484259605407715, + "learning_rate": 6.448906087451294e-06, + "loss": 2.0583, + "step": 18694 + }, + { + "epoch": 0.62, + "grad_norm": 0.739743173122406, + "learning_rate": 6.447912488884245e-06, + "loss": 2.0309, + "step": 18695 + }, + { + "epoch": 0.62, + "grad_norm": 0.7430228590965271, + "learning_rate": 6.446918930446596e-06, + "loss": 2.0861, + "step": 18696 + }, + { + "epoch": 0.62, + "grad_norm": 0.7674102187156677, + "learning_rate": 6.445925412149581e-06, + "loss": 2.0436, + "step": 18697 + }, + { + "epoch": 0.62, + "grad_norm": 0.7373661398887634, + "learning_rate": 6.444931934004412e-06, + "loss": 2.0017, + "step": 18698 + }, + { + "epoch": 0.62, + "grad_norm": 0.8076265454292297, + "learning_rate": 6.44393849602232e-06, + "loss": 2.0765, + "step": 18699 + }, + { + "epoch": 0.62, + "grad_norm": 0.7347669005393982, + "learning_rate": 6.442945098214528e-06, + "loss": 2.0545, + "step": 18700 + }, + { + "epoch": 0.62, + "grad_norm": 0.7427253723144531, + "learning_rate": 6.441951740592256e-06, + "loss": 2.0403, + "step": 18701 + }, + { + "epoch": 0.62, + "grad_norm": 0.7446337342262268, + "learning_rate": 6.440958423166732e-06, + "loss": 2.017, + "step": 18702 + }, + { + "epoch": 0.62, + "grad_norm": 0.7308014035224915, + "learning_rate": 6.439965145949171e-06, + "loss": 1.9847, + "step": 18703 + }, + { + "epoch": 0.62, + "grad_norm": 0.7425352931022644, + "learning_rate": 6.438971908950795e-06, + "loss": 2.0106, + "step": 18704 + }, + { + "epoch": 0.62, + "grad_norm": 0.7417636513710022, + "learning_rate": 6.4379787121828286e-06, + "loss": 2.1202, + "step": 18705 + }, + { + "epoch": 0.62, + "grad_norm": 0.7295835018157959, + "learning_rate": 6.436985555656495e-06, + "loss": 2.0216, + "step": 18706 + }, + { + "epoch": 0.62, + "grad_norm": 0.7420196533203125, + "learning_rate": 6.435992439383007e-06, + "loss": 2.0603, + "step": 18707 + }, + { + "epoch": 0.62, + "grad_norm": 0.7313871383666992, + "learning_rate": 6.434999363373589e-06, + "loss": 2.0297, + "step": 18708 + }, + { + "epoch": 0.62, + "grad_norm": 0.7531962990760803, + "learning_rate": 6.4340063276394585e-06, + "loss": 2.1336, + "step": 18709 + }, + { + "epoch": 0.62, + "grad_norm": 0.7855445146560669, + "learning_rate": 6.433013332191837e-06, + "loss": 2.0704, + "step": 18710 + }, + { + "epoch": 0.62, + "grad_norm": 0.732207715511322, + "learning_rate": 6.432020377041937e-06, + "loss": 2.0647, + "step": 18711 + }, + { + "epoch": 0.62, + "grad_norm": 0.7460048794746399, + "learning_rate": 6.431027462200979e-06, + "loss": 2.035, + "step": 18712 + }, + { + "epoch": 0.62, + "grad_norm": 0.7749593257904053, + "learning_rate": 6.430034587680183e-06, + "loss": 2.0897, + "step": 18713 + }, + { + "epoch": 0.62, + "grad_norm": 0.7406306862831116, + "learning_rate": 6.429041753490763e-06, + "loss": 2.0204, + "step": 18714 + }, + { + "epoch": 0.62, + "grad_norm": 0.7183247804641724, + "learning_rate": 6.42804895964394e-06, + "loss": 2.0585, + "step": 18715 + }, + { + "epoch": 0.62, + "grad_norm": 0.7433620691299438, + "learning_rate": 6.427056206150923e-06, + "loss": 2.0462, + "step": 18716 + }, + { + "epoch": 0.62, + "grad_norm": 0.7360041737556458, + "learning_rate": 6.4260634930229315e-06, + "loss": 2.1453, + "step": 18717 + }, + { + "epoch": 0.62, + "grad_norm": 0.7492828965187073, + "learning_rate": 6.425070820271178e-06, + "loss": 2.0092, + "step": 18718 + }, + { + "epoch": 0.62, + "grad_norm": 0.7267391681671143, + "learning_rate": 6.424078187906886e-06, + "loss": 2.0884, + "step": 18719 + }, + { + "epoch": 0.62, + "grad_norm": 0.7435503005981445, + "learning_rate": 6.423085595941256e-06, + "loss": 2.0289, + "step": 18720 + }, + { + "epoch": 0.62, + "grad_norm": 0.7530199289321899, + "learning_rate": 6.4220930443855114e-06, + "loss": 2.0705, + "step": 18721 + }, + { + "epoch": 0.62, + "grad_norm": 0.7416803240776062, + "learning_rate": 6.421100533250864e-06, + "loss": 2.0462, + "step": 18722 + }, + { + "epoch": 0.62, + "grad_norm": 0.7259017825126648, + "learning_rate": 6.420108062548522e-06, + "loss": 2.0659, + "step": 18723 + }, + { + "epoch": 0.62, + "grad_norm": 0.743595540523529, + "learning_rate": 6.419115632289706e-06, + "loss": 1.9893, + "step": 18724 + }, + { + "epoch": 0.62, + "grad_norm": 0.7349228262901306, + "learning_rate": 6.418123242485619e-06, + "loss": 2.0662, + "step": 18725 + }, + { + "epoch": 0.62, + "grad_norm": 0.7312232851982117, + "learning_rate": 6.417130893147476e-06, + "loss": 2.0, + "step": 18726 + }, + { + "epoch": 0.62, + "grad_norm": 0.744740903377533, + "learning_rate": 6.416138584286489e-06, + "loss": 2.074, + "step": 18727 + }, + { + "epoch": 0.62, + "grad_norm": 0.7381023168563843, + "learning_rate": 6.415146315913872e-06, + "loss": 2.0204, + "step": 18728 + }, + { + "epoch": 0.62, + "grad_norm": 0.7310817241668701, + "learning_rate": 6.414154088040827e-06, + "loss": 2.1035, + "step": 18729 + }, + { + "epoch": 0.62, + "grad_norm": 0.730907142162323, + "learning_rate": 6.413161900678568e-06, + "loss": 2.0759, + "step": 18730 + }, + { + "epoch": 0.62, + "grad_norm": 0.7264283299446106, + "learning_rate": 6.412169753838304e-06, + "loss": 2.0223, + "step": 18731 + }, + { + "epoch": 0.62, + "grad_norm": 0.7308387756347656, + "learning_rate": 6.411177647531246e-06, + "loss": 2.0935, + "step": 18732 + }, + { + "epoch": 0.62, + "grad_norm": 0.7343766093254089, + "learning_rate": 6.410185581768596e-06, + "loss": 2.0389, + "step": 18733 + }, + { + "epoch": 0.62, + "grad_norm": 0.7109951972961426, + "learning_rate": 6.409193556561566e-06, + "loss": 1.964, + "step": 18734 + }, + { + "epoch": 0.62, + "grad_norm": 0.7619087100028992, + "learning_rate": 6.4082015719213616e-06, + "loss": 2.0308, + "step": 18735 + }, + { + "epoch": 0.62, + "grad_norm": 0.7385216951370239, + "learning_rate": 6.407209627859194e-06, + "loss": 2.045, + "step": 18736 + }, + { + "epoch": 0.62, + "grad_norm": 0.7368202209472656, + "learning_rate": 6.406217724386267e-06, + "loss": 2.0764, + "step": 18737 + }, + { + "epoch": 0.62, + "grad_norm": 0.7617353796958923, + "learning_rate": 6.405225861513784e-06, + "loss": 2.0652, + "step": 18738 + }, + { + "epoch": 0.62, + "grad_norm": 0.7459923624992371, + "learning_rate": 6.404234039252952e-06, + "loss": 2.0529, + "step": 18739 + }, + { + "epoch": 0.62, + "grad_norm": 0.7317358255386353, + "learning_rate": 6.403242257614974e-06, + "loss": 2.0328, + "step": 18740 + }, + { + "epoch": 0.62, + "grad_norm": 0.7320467233657837, + "learning_rate": 6.402250516611066e-06, + "loss": 2.0343, + "step": 18741 + }, + { + "epoch": 0.62, + "grad_norm": 0.7482047080993652, + "learning_rate": 6.401258816252415e-06, + "loss": 2.0868, + "step": 18742 + }, + { + "epoch": 0.62, + "grad_norm": 0.7384339570999146, + "learning_rate": 6.400267156550235e-06, + "loss": 2.0375, + "step": 18743 + }, + { + "epoch": 0.62, + "grad_norm": 0.7386241555213928, + "learning_rate": 6.3992755375157266e-06, + "loss": 2.0985, + "step": 18744 + }, + { + "epoch": 0.62, + "grad_norm": 0.7524778246879578, + "learning_rate": 6.398283959160092e-06, + "loss": 2.1486, + "step": 18745 + }, + { + "epoch": 0.62, + "grad_norm": 0.749251127243042, + "learning_rate": 6.3972924214945386e-06, + "loss": 2.0073, + "step": 18746 + }, + { + "epoch": 0.62, + "grad_norm": 0.7471858859062195, + "learning_rate": 6.39630092453026e-06, + "loss": 2.0451, + "step": 18747 + }, + { + "epoch": 0.62, + "grad_norm": 0.7485433220863342, + "learning_rate": 6.395309468278463e-06, + "loss": 2.1109, + "step": 18748 + }, + { + "epoch": 0.62, + "grad_norm": 0.737152099609375, + "learning_rate": 6.3943180527503464e-06, + "loss": 1.983, + "step": 18749 + }, + { + "epoch": 0.62, + "grad_norm": 0.7454278469085693, + "learning_rate": 6.3933266779571146e-06, + "loss": 2.108, + "step": 18750 + }, + { + "epoch": 0.62, + "grad_norm": 0.7259944081306458, + "learning_rate": 6.392335343909961e-06, + "loss": 1.9763, + "step": 18751 + }, + { + "epoch": 0.62, + "grad_norm": 0.7083936333656311, + "learning_rate": 6.3913440506200896e-06, + "loss": 2.0154, + "step": 18752 + }, + { + "epoch": 0.62, + "grad_norm": 0.7520551085472107, + "learning_rate": 6.390352798098695e-06, + "loss": 2.0173, + "step": 18753 + }, + { + "epoch": 0.62, + "grad_norm": 0.7846324443817139, + "learning_rate": 6.3893615863569816e-06, + "loss": 2.0291, + "step": 18754 + }, + { + "epoch": 0.62, + "grad_norm": 0.7679775357246399, + "learning_rate": 6.388370415406147e-06, + "loss": 2.0628, + "step": 18755 + }, + { + "epoch": 0.62, + "grad_norm": 0.7163739204406738, + "learning_rate": 6.387379285257385e-06, + "loss": 2.044, + "step": 18756 + }, + { + "epoch": 0.62, + "grad_norm": 0.7470616698265076, + "learning_rate": 6.386388195921893e-06, + "loss": 2.0238, + "step": 18757 + }, + { + "epoch": 0.62, + "grad_norm": 0.7423052787780762, + "learning_rate": 6.385397147410873e-06, + "loss": 2.0296, + "step": 18758 + }, + { + "epoch": 0.62, + "grad_norm": 0.7891109585762024, + "learning_rate": 6.3844061397355166e-06, + "loss": 2.0465, + "step": 18759 + }, + { + "epoch": 0.62, + "grad_norm": 0.7684121131896973, + "learning_rate": 6.383415172907019e-06, + "loss": 2.0532, + "step": 18760 + }, + { + "epoch": 0.62, + "grad_norm": 0.7775060534477234, + "learning_rate": 6.382424246936576e-06, + "loss": 2.0173, + "step": 18761 + }, + { + "epoch": 0.62, + "grad_norm": 0.7461726069450378, + "learning_rate": 6.381433361835385e-06, + "loss": 2.0323, + "step": 18762 + }, + { + "epoch": 0.62, + "grad_norm": 0.7559880018234253, + "learning_rate": 6.380442517614643e-06, + "loss": 2.0817, + "step": 18763 + }, + { + "epoch": 0.62, + "grad_norm": 0.7563473582267761, + "learning_rate": 6.379451714285536e-06, + "loss": 2.0937, + "step": 18764 + }, + { + "epoch": 0.62, + "grad_norm": 0.7257256507873535, + "learning_rate": 6.378460951859262e-06, + "loss": 2.0808, + "step": 18765 + }, + { + "epoch": 0.62, + "grad_norm": 0.7032012343406677, + "learning_rate": 6.3774702303470134e-06, + "loss": 2.0326, + "step": 18766 + }, + { + "epoch": 0.62, + "grad_norm": 0.7205162048339844, + "learning_rate": 6.376479549759983e-06, + "loss": 2.0323, + "step": 18767 + }, + { + "epoch": 0.62, + "grad_norm": 0.7723408937454224, + "learning_rate": 6.375488910109369e-06, + "loss": 2.0689, + "step": 18768 + }, + { + "epoch": 0.62, + "grad_norm": 0.7330254912376404, + "learning_rate": 6.37449831140635e-06, + "loss": 2.0783, + "step": 18769 + }, + { + "epoch": 0.62, + "grad_norm": 0.755257785320282, + "learning_rate": 6.373507753662126e-06, + "loss": 2.0329, + "step": 18770 + }, + { + "epoch": 0.62, + "grad_norm": 0.7577824592590332, + "learning_rate": 6.372517236887886e-06, + "loss": 2.1095, + "step": 18771 + }, + { + "epoch": 0.62, + "grad_norm": 0.7358545660972595, + "learning_rate": 6.3715267610948235e-06, + "loss": 2.0193, + "step": 18772 + }, + { + "epoch": 0.62, + "grad_norm": 0.7600029706954956, + "learning_rate": 6.370536326294122e-06, + "loss": 2.052, + "step": 18773 + }, + { + "epoch": 0.62, + "grad_norm": 0.7242233753204346, + "learning_rate": 6.3695459324969745e-06, + "loss": 2.0666, + "step": 18774 + }, + { + "epoch": 0.62, + "grad_norm": 0.7165502309799194, + "learning_rate": 6.368555579714568e-06, + "loss": 2.012, + "step": 18775 + }, + { + "epoch": 0.62, + "grad_norm": 0.7487038969993591, + "learning_rate": 6.3675652679580935e-06, + "loss": 2.0192, + "step": 18776 + }, + { + "epoch": 0.62, + "grad_norm": 0.76726895570755, + "learning_rate": 6.3665749972387405e-06, + "loss": 2.0843, + "step": 18777 + }, + { + "epoch": 0.62, + "grad_norm": 0.7534230351448059, + "learning_rate": 6.365584767567692e-06, + "loss": 2.0355, + "step": 18778 + }, + { + "epoch": 0.62, + "grad_norm": 0.7393602132797241, + "learning_rate": 6.3645945789561355e-06, + "loss": 2.0991, + "step": 18779 + }, + { + "epoch": 0.62, + "grad_norm": 0.7179310321807861, + "learning_rate": 6.3636044314152605e-06, + "loss": 2.0137, + "step": 18780 + }, + { + "epoch": 0.62, + "grad_norm": 0.7253159284591675, + "learning_rate": 6.362614324956251e-06, + "loss": 2.0459, + "step": 18781 + }, + { + "epoch": 0.62, + "grad_norm": 0.7539914846420288, + "learning_rate": 6.361624259590293e-06, + "loss": 2.1153, + "step": 18782 + }, + { + "epoch": 0.62, + "grad_norm": 0.7424477934837341, + "learning_rate": 6.36063423532857e-06, + "loss": 2.0425, + "step": 18783 + }, + { + "epoch": 0.62, + "grad_norm": 0.7200141549110413, + "learning_rate": 6.35964425218227e-06, + "loss": 2.0172, + "step": 18784 + }, + { + "epoch": 0.62, + "grad_norm": 0.7303518652915955, + "learning_rate": 6.3586543101625755e-06, + "loss": 2.0619, + "step": 18785 + }, + { + "epoch": 0.63, + "grad_norm": 0.7125300765037537, + "learning_rate": 6.357664409280673e-06, + "loss": 1.9523, + "step": 18786 + }, + { + "epoch": 0.63, + "grad_norm": 0.758920431137085, + "learning_rate": 6.356674549547741e-06, + "loss": 2.0219, + "step": 18787 + }, + { + "epoch": 0.63, + "grad_norm": 0.7676115036010742, + "learning_rate": 6.355684730974965e-06, + "loss": 2.1011, + "step": 18788 + }, + { + "epoch": 0.63, + "grad_norm": 0.7601562738418579, + "learning_rate": 6.354694953573526e-06, + "loss": 2.0497, + "step": 18789 + }, + { + "epoch": 0.63, + "grad_norm": 0.7763060927391052, + "learning_rate": 6.3537052173546125e-06, + "loss": 2.0567, + "step": 18790 + }, + { + "epoch": 0.63, + "grad_norm": 0.7505967020988464, + "learning_rate": 6.352715522329394e-06, + "loss": 2.1275, + "step": 18791 + }, + { + "epoch": 0.63, + "grad_norm": 0.7501811981201172, + "learning_rate": 6.35172586850906e-06, + "loss": 2.0197, + "step": 18792 + }, + { + "epoch": 0.63, + "grad_norm": 0.7911760807037354, + "learning_rate": 6.3507362559047895e-06, + "loss": 2.1185, + "step": 18793 + }, + { + "epoch": 0.63, + "grad_norm": 0.7416780591011047, + "learning_rate": 6.349746684527763e-06, + "loss": 2.0292, + "step": 18794 + }, + { + "epoch": 0.63, + "grad_norm": 0.7451139092445374, + "learning_rate": 6.348757154389158e-06, + "loss": 2.0702, + "step": 18795 + }, + { + "epoch": 0.63, + "grad_norm": 0.7680156826972961, + "learning_rate": 6.3477676655001555e-06, + "loss": 2.0738, + "step": 18796 + }, + { + "epoch": 0.63, + "grad_norm": 0.7559218406677246, + "learning_rate": 6.346778217871931e-06, + "loss": 2.0396, + "step": 18797 + }, + { + "epoch": 0.63, + "grad_norm": 0.7054615020751953, + "learning_rate": 6.345788811515667e-06, + "loss": 1.9979, + "step": 18798 + }, + { + "epoch": 0.63, + "grad_norm": 0.7205617427825928, + "learning_rate": 6.344799446442542e-06, + "loss": 2.0038, + "step": 18799 + }, + { + "epoch": 0.63, + "grad_norm": 0.7144215703010559, + "learning_rate": 6.343810122663727e-06, + "loss": 2.066, + "step": 18800 + }, + { + "epoch": 0.63, + "grad_norm": 0.7509363889694214, + "learning_rate": 6.342820840190404e-06, + "loss": 2.0233, + "step": 18801 + }, + { + "epoch": 0.63, + "grad_norm": 0.7284533381462097, + "learning_rate": 6.3418315990337474e-06, + "loss": 2.0219, + "step": 18802 + }, + { + "epoch": 0.63, + "grad_norm": 0.7296149730682373, + "learning_rate": 6.340842399204936e-06, + "loss": 2.0583, + "step": 18803 + }, + { + "epoch": 0.63, + "grad_norm": 0.7395448684692383, + "learning_rate": 6.339853240715141e-06, + "loss": 2.0851, + "step": 18804 + }, + { + "epoch": 0.63, + "grad_norm": 0.7388550043106079, + "learning_rate": 6.338864123575537e-06, + "loss": 2.0942, + "step": 18805 + }, + { + "epoch": 0.63, + "grad_norm": 0.7206251621246338, + "learning_rate": 6.337875047797302e-06, + "loss": 2.0826, + "step": 18806 + }, + { + "epoch": 0.63, + "grad_norm": 0.7091691493988037, + "learning_rate": 6.336886013391607e-06, + "loss": 2.0247, + "step": 18807 + }, + { + "epoch": 0.63, + "grad_norm": 0.7675430178642273, + "learning_rate": 6.33589702036963e-06, + "loss": 2.041, + "step": 18808 + }, + { + "epoch": 0.63, + "grad_norm": 0.7665770053863525, + "learning_rate": 6.334908068742541e-06, + "loss": 2.0008, + "step": 18809 + }, + { + "epoch": 0.63, + "grad_norm": 0.7505615949630737, + "learning_rate": 6.333919158521511e-06, + "loss": 2.0334, + "step": 18810 + }, + { + "epoch": 0.63, + "grad_norm": 0.7185102701187134, + "learning_rate": 6.332930289717711e-06, + "loss": 2.0753, + "step": 18811 + }, + { + "epoch": 0.63, + "grad_norm": 0.7296304106712341, + "learning_rate": 6.3319414623423234e-06, + "loss": 2.0587, + "step": 18812 + }, + { + "epoch": 0.63, + "grad_norm": 0.7443856596946716, + "learning_rate": 6.330952676406503e-06, + "loss": 2.0348, + "step": 18813 + }, + { + "epoch": 0.63, + "grad_norm": 0.7643793821334839, + "learning_rate": 6.329963931921433e-06, + "loss": 2.0405, + "step": 18814 + }, + { + "epoch": 0.63, + "grad_norm": 0.8094752430915833, + "learning_rate": 6.3289752288982795e-06, + "loss": 2.1057, + "step": 18815 + }, + { + "epoch": 0.63, + "grad_norm": 0.7543708086013794, + "learning_rate": 6.327986567348212e-06, + "loss": 2.0209, + "step": 18816 + }, + { + "epoch": 0.63, + "grad_norm": 0.7493652105331421, + "learning_rate": 6.3269979472824025e-06, + "loss": 1.9915, + "step": 18817 + }, + { + "epoch": 0.63, + "grad_norm": 0.7479726672172546, + "learning_rate": 6.326009368712016e-06, + "loss": 2.0059, + "step": 18818 + }, + { + "epoch": 0.63, + "grad_norm": 0.7446410059928894, + "learning_rate": 6.3250208316482185e-06, + "loss": 2.0518, + "step": 18819 + }, + { + "epoch": 0.63, + "grad_norm": 0.7375264763832092, + "learning_rate": 6.324032336102186e-06, + "loss": 2.0219, + "step": 18820 + }, + { + "epoch": 0.63, + "grad_norm": 0.7324852347373962, + "learning_rate": 6.323043882085084e-06, + "loss": 2.0326, + "step": 18821 + }, + { + "epoch": 0.63, + "grad_norm": 0.752420961856842, + "learning_rate": 6.322055469608074e-06, + "loss": 2.0192, + "step": 18822 + }, + { + "epoch": 0.63, + "grad_norm": 0.7476141452789307, + "learning_rate": 6.321067098682327e-06, + "loss": 2.0403, + "step": 18823 + }, + { + "epoch": 0.63, + "grad_norm": 0.773668110370636, + "learning_rate": 6.320078769319007e-06, + "loss": 2.0494, + "step": 18824 + }, + { + "epoch": 0.63, + "grad_norm": 0.7394488453865051, + "learning_rate": 6.319090481529282e-06, + "loss": 2.0447, + "step": 18825 + }, + { + "epoch": 0.63, + "grad_norm": 0.7454477548599243, + "learning_rate": 6.318102235324315e-06, + "loss": 2.038, + "step": 18826 + }, + { + "epoch": 0.63, + "grad_norm": 0.7297065258026123, + "learning_rate": 6.317114030715268e-06, + "loss": 2.0683, + "step": 18827 + }, + { + "epoch": 0.63, + "grad_norm": 0.7349576354026794, + "learning_rate": 6.31612586771331e-06, + "loss": 1.9774, + "step": 18828 + }, + { + "epoch": 0.63, + "grad_norm": 0.7506108283996582, + "learning_rate": 6.315137746329603e-06, + "loss": 2.1085, + "step": 18829 + }, + { + "epoch": 0.63, + "grad_norm": 0.7347702980041504, + "learning_rate": 6.314149666575313e-06, + "loss": 2.0122, + "step": 18830 + }, + { + "epoch": 0.63, + "grad_norm": 0.743095874786377, + "learning_rate": 6.3131616284615975e-06, + "loss": 2.056, + "step": 18831 + }, + { + "epoch": 0.63, + "grad_norm": 0.7406293749809265, + "learning_rate": 6.312173631999621e-06, + "loss": 2.0326, + "step": 18832 + }, + { + "epoch": 0.63, + "grad_norm": 0.7895215749740601, + "learning_rate": 6.311185677200544e-06, + "loss": 1.9859, + "step": 18833 + }, + { + "epoch": 0.63, + "grad_norm": 0.7579501867294312, + "learning_rate": 6.310197764075536e-06, + "loss": 2.0292, + "step": 18834 + }, + { + "epoch": 0.63, + "grad_norm": 0.758316159248352, + "learning_rate": 6.3092098926357435e-06, + "loss": 2.0597, + "step": 18835 + }, + { + "epoch": 0.63, + "grad_norm": 0.7564467191696167, + "learning_rate": 6.308222062892337e-06, + "loss": 2.0556, + "step": 18836 + }, + { + "epoch": 0.63, + "grad_norm": 0.7399389743804932, + "learning_rate": 6.307234274856476e-06, + "loss": 2.0491, + "step": 18837 + }, + { + "epoch": 0.63, + "grad_norm": 0.7551249861717224, + "learning_rate": 6.3062465285393144e-06, + "loss": 2.0523, + "step": 18838 + }, + { + "epoch": 0.63, + "grad_norm": 0.7298482656478882, + "learning_rate": 6.305258823952021e-06, + "loss": 1.9853, + "step": 18839 + }, + { + "epoch": 0.63, + "grad_norm": 0.7758609056472778, + "learning_rate": 6.304271161105744e-06, + "loss": 2.0415, + "step": 18840 + }, + { + "epoch": 0.63, + "grad_norm": 0.7690340876579285, + "learning_rate": 6.3032835400116444e-06, + "loss": 2.0893, + "step": 18841 + }, + { + "epoch": 0.63, + "grad_norm": 0.7569836974143982, + "learning_rate": 6.302295960680882e-06, + "loss": 2.1109, + "step": 18842 + }, + { + "epoch": 0.63, + "grad_norm": 0.7443965673446655, + "learning_rate": 6.3013084231246145e-06, + "loss": 2.0124, + "step": 18843 + }, + { + "epoch": 0.63, + "grad_norm": 0.7283071875572205, + "learning_rate": 6.300320927353995e-06, + "loss": 2.032, + "step": 18844 + }, + { + "epoch": 0.63, + "grad_norm": 0.7403337955474854, + "learning_rate": 6.299333473380181e-06, + "loss": 2.0412, + "step": 18845 + }, + { + "epoch": 0.63, + "grad_norm": 0.7507011294364929, + "learning_rate": 6.298346061214328e-06, + "loss": 1.9751, + "step": 18846 + }, + { + "epoch": 0.63, + "grad_norm": 0.7596787214279175, + "learning_rate": 6.297358690867592e-06, + "loss": 2.0802, + "step": 18847 + }, + { + "epoch": 0.63, + "grad_norm": 0.7611180543899536, + "learning_rate": 6.296371362351131e-06, + "loss": 2.0374, + "step": 18848 + }, + { + "epoch": 0.63, + "grad_norm": 0.797439694404602, + "learning_rate": 6.295384075676094e-06, + "loss": 2.0166, + "step": 18849 + }, + { + "epoch": 0.63, + "grad_norm": 0.7259639501571655, + "learning_rate": 6.2943968308536355e-06, + "loss": 2.075, + "step": 18850 + }, + { + "epoch": 0.63, + "grad_norm": 0.7475815415382385, + "learning_rate": 6.293409627894911e-06, + "loss": 2.0614, + "step": 18851 + }, + { + "epoch": 0.63, + "grad_norm": 0.782560408115387, + "learning_rate": 6.292422466811072e-06, + "loss": 2.0379, + "step": 18852 + }, + { + "epoch": 0.63, + "grad_norm": 0.7449297904968262, + "learning_rate": 6.291435347613271e-06, + "loss": 2.086, + "step": 18853 + }, + { + "epoch": 0.63, + "grad_norm": 0.7293553948402405, + "learning_rate": 6.290448270312659e-06, + "loss": 2.0851, + "step": 18854 + }, + { + "epoch": 0.63, + "grad_norm": 0.7401352524757385, + "learning_rate": 6.289461234920389e-06, + "loss": 1.9794, + "step": 18855 + }, + { + "epoch": 0.63, + "grad_norm": 0.7567707896232605, + "learning_rate": 6.288474241447613e-06, + "loss": 2.0699, + "step": 18856 + }, + { + "epoch": 0.63, + "grad_norm": 0.7312875986099243, + "learning_rate": 6.2874872899054785e-06, + "loss": 2.0306, + "step": 18857 + }, + { + "epoch": 0.63, + "grad_norm": 0.7319051027297974, + "learning_rate": 6.286500380305136e-06, + "loss": 2.0655, + "step": 18858 + }, + { + "epoch": 0.63, + "grad_norm": 0.7353688478469849, + "learning_rate": 6.285513512657737e-06, + "loss": 2.0912, + "step": 18859 + }, + { + "epoch": 0.63, + "grad_norm": 0.7498114705085754, + "learning_rate": 6.284526686974427e-06, + "loss": 2.0623, + "step": 18860 + }, + { + "epoch": 0.63, + "grad_norm": 0.7116929888725281, + "learning_rate": 6.2835399032663636e-06, + "loss": 2.0806, + "step": 18861 + }, + { + "epoch": 0.63, + "grad_norm": 0.7725409865379333, + "learning_rate": 6.2825531615446824e-06, + "loss": 2.0085, + "step": 18862 + }, + { + "epoch": 0.63, + "grad_norm": 0.7499915361404419, + "learning_rate": 6.281566461820537e-06, + "loss": 2.0156, + "step": 18863 + }, + { + "epoch": 0.63, + "grad_norm": 0.7374317049980164, + "learning_rate": 6.280579804105076e-06, + "loss": 1.9742, + "step": 18864 + }, + { + "epoch": 0.63, + "grad_norm": 0.7191151976585388, + "learning_rate": 6.2795931884094475e-06, + "loss": 2.1051, + "step": 18865 + }, + { + "epoch": 0.63, + "grad_norm": 0.746450662612915, + "learning_rate": 6.278606614744791e-06, + "loss": 2.0737, + "step": 18866 + }, + { + "epoch": 0.63, + "grad_norm": 0.7371501326560974, + "learning_rate": 6.2776200831222564e-06, + "loss": 2.013, + "step": 18867 + }, + { + "epoch": 0.63, + "grad_norm": 0.7405148148536682, + "learning_rate": 6.2766335935529875e-06, + "loss": 2.0381, + "step": 18868 + }, + { + "epoch": 0.63, + "grad_norm": 0.73369300365448, + "learning_rate": 6.275647146048132e-06, + "loss": 2.0712, + "step": 18869 + }, + { + "epoch": 0.63, + "grad_norm": 0.7255458235740662, + "learning_rate": 6.2746607406188335e-06, + "loss": 2.0127, + "step": 18870 + }, + { + "epoch": 0.63, + "grad_norm": 0.7474572062492371, + "learning_rate": 6.273674377276233e-06, + "loss": 2.0622, + "step": 18871 + }, + { + "epoch": 0.63, + "grad_norm": 0.7415841817855835, + "learning_rate": 6.272688056031475e-06, + "loss": 2.0033, + "step": 18872 + }, + { + "epoch": 0.63, + "grad_norm": 0.7845476269721985, + "learning_rate": 6.271701776895704e-06, + "loss": 2.1478, + "step": 18873 + }, + { + "epoch": 0.63, + "grad_norm": 0.7431630492210388, + "learning_rate": 6.270715539880063e-06, + "loss": 2.0553, + "step": 18874 + }, + { + "epoch": 0.63, + "grad_norm": 0.7332531809806824, + "learning_rate": 6.26972934499569e-06, + "loss": 2.0915, + "step": 18875 + }, + { + "epoch": 0.63, + "grad_norm": 0.7215174436569214, + "learning_rate": 6.268743192253726e-06, + "loss": 2.0074, + "step": 18876 + }, + { + "epoch": 0.63, + "grad_norm": 0.7457708716392517, + "learning_rate": 6.267757081665318e-06, + "loss": 2.0066, + "step": 18877 + }, + { + "epoch": 0.63, + "grad_norm": 0.7775422930717468, + "learning_rate": 6.266771013241605e-06, + "loss": 2.111, + "step": 18878 + }, + { + "epoch": 0.63, + "grad_norm": 0.7220528721809387, + "learning_rate": 6.265784986993723e-06, + "loss": 2.0063, + "step": 18879 + }, + { + "epoch": 0.63, + "grad_norm": 0.7515190243721008, + "learning_rate": 6.264799002932813e-06, + "loss": 2.059, + "step": 18880 + }, + { + "epoch": 0.63, + "grad_norm": 0.7343034148216248, + "learning_rate": 6.2638130610700165e-06, + "loss": 2.0313, + "step": 18881 + }, + { + "epoch": 0.63, + "grad_norm": 0.7566269636154175, + "learning_rate": 6.262827161416467e-06, + "loss": 2.0743, + "step": 18882 + }, + { + "epoch": 0.63, + "grad_norm": 0.7605767250061035, + "learning_rate": 6.261841303983312e-06, + "loss": 2.0509, + "step": 18883 + }, + { + "epoch": 0.63, + "grad_norm": 0.75034499168396, + "learning_rate": 6.2608554887816775e-06, + "loss": 2.0545, + "step": 18884 + }, + { + "epoch": 0.63, + "grad_norm": 0.7680898308753967, + "learning_rate": 6.259869715822708e-06, + "loss": 2.0814, + "step": 18885 + }, + { + "epoch": 0.63, + "grad_norm": 0.7437170147895813, + "learning_rate": 6.258883985117539e-06, + "loss": 2.1077, + "step": 18886 + }, + { + "epoch": 0.63, + "grad_norm": 0.7450584769248962, + "learning_rate": 6.257898296677309e-06, + "loss": 2.0673, + "step": 18887 + }, + { + "epoch": 0.63, + "grad_norm": 0.7381688356399536, + "learning_rate": 6.256912650513147e-06, + "loss": 2.0077, + "step": 18888 + }, + { + "epoch": 0.63, + "grad_norm": 0.7569523453712463, + "learning_rate": 6.255927046636194e-06, + "loss": 2.0891, + "step": 18889 + }, + { + "epoch": 0.63, + "grad_norm": 0.7439249157905579, + "learning_rate": 6.254941485057579e-06, + "loss": 2.1107, + "step": 18890 + }, + { + "epoch": 0.63, + "grad_norm": 0.7431926131248474, + "learning_rate": 6.253955965788445e-06, + "loss": 2.0644, + "step": 18891 + }, + { + "epoch": 0.63, + "grad_norm": 0.7719762921333313, + "learning_rate": 6.252970488839923e-06, + "loss": 2.0672, + "step": 18892 + }, + { + "epoch": 0.63, + "grad_norm": 0.7319810390472412, + "learning_rate": 6.25198505422314e-06, + "loss": 2.052, + "step": 18893 + }, + { + "epoch": 0.63, + "grad_norm": 0.7023764252662659, + "learning_rate": 6.250999661949234e-06, + "loss": 1.9886, + "step": 18894 + }, + { + "epoch": 0.63, + "grad_norm": 0.7248998284339905, + "learning_rate": 6.2500143120293375e-06, + "loss": 2.0277, + "step": 18895 + }, + { + "epoch": 0.63, + "grad_norm": 0.7359451651573181, + "learning_rate": 6.249029004474583e-06, + "loss": 2.0112, + "step": 18896 + }, + { + "epoch": 0.63, + "grad_norm": 0.7736878991127014, + "learning_rate": 6.248043739296099e-06, + "loss": 2.0923, + "step": 18897 + }, + { + "epoch": 0.63, + "grad_norm": 0.7660742998123169, + "learning_rate": 6.247058516505016e-06, + "loss": 2.1117, + "step": 18898 + }, + { + "epoch": 0.63, + "grad_norm": 0.7357897162437439, + "learning_rate": 6.246073336112468e-06, + "loss": 2.0711, + "step": 18899 + }, + { + "epoch": 0.63, + "grad_norm": 0.7587547302246094, + "learning_rate": 6.245088198129583e-06, + "loss": 2.0658, + "step": 18900 + }, + { + "epoch": 0.63, + "grad_norm": 0.7507548332214355, + "learning_rate": 6.244103102567494e-06, + "loss": 2.0349, + "step": 18901 + }, + { + "epoch": 0.63, + "grad_norm": 0.7386896014213562, + "learning_rate": 6.243118049437324e-06, + "loss": 2.0747, + "step": 18902 + }, + { + "epoch": 0.63, + "grad_norm": 0.7314772009849548, + "learning_rate": 6.242133038750206e-06, + "loss": 2.0844, + "step": 18903 + }, + { + "epoch": 0.63, + "grad_norm": 0.7198120355606079, + "learning_rate": 6.241148070517263e-06, + "loss": 2.1009, + "step": 18904 + }, + { + "epoch": 0.63, + "grad_norm": 0.746636152267456, + "learning_rate": 6.240163144749634e-06, + "loss": 2.0591, + "step": 18905 + }, + { + "epoch": 0.63, + "grad_norm": 0.7432420253753662, + "learning_rate": 6.23917826145843e-06, + "loss": 2.1291, + "step": 18906 + }, + { + "epoch": 0.63, + "grad_norm": 0.7301312685012817, + "learning_rate": 6.2381934206547905e-06, + "loss": 2.051, + "step": 18907 + }, + { + "epoch": 0.63, + "grad_norm": 0.7646254301071167, + "learning_rate": 6.237208622349835e-06, + "loss": 2.0685, + "step": 18908 + }, + { + "epoch": 0.63, + "grad_norm": 0.7602600455284119, + "learning_rate": 6.2362238665546936e-06, + "loss": 2.0391, + "step": 18909 + }, + { + "epoch": 0.63, + "grad_norm": 0.7583686709403992, + "learning_rate": 6.235239153280488e-06, + "loss": 2.0748, + "step": 18910 + }, + { + "epoch": 0.63, + "grad_norm": 0.7293695211410522, + "learning_rate": 6.234254482538343e-06, + "loss": 2.0225, + "step": 18911 + }, + { + "epoch": 0.63, + "grad_norm": 0.7662408351898193, + "learning_rate": 6.2332698543393835e-06, + "loss": 2.0571, + "step": 18912 + }, + { + "epoch": 0.63, + "grad_norm": 0.8053572177886963, + "learning_rate": 6.232285268694733e-06, + "loss": 2.0432, + "step": 18913 + }, + { + "epoch": 0.63, + "grad_norm": 0.7788376808166504, + "learning_rate": 6.23130072561552e-06, + "loss": 2.0647, + "step": 18914 + }, + { + "epoch": 0.63, + "grad_norm": 0.7436146140098572, + "learning_rate": 6.2303162251128605e-06, + "loss": 2.0823, + "step": 18915 + }, + { + "epoch": 0.63, + "grad_norm": 0.7464315295219421, + "learning_rate": 6.2293317671978774e-06, + "loss": 2.1677, + "step": 18916 + }, + { + "epoch": 0.63, + "grad_norm": 0.7420830726623535, + "learning_rate": 6.228347351881696e-06, + "loss": 2.1319, + "step": 18917 + }, + { + "epoch": 0.63, + "grad_norm": 0.7582462430000305, + "learning_rate": 6.227362979175436e-06, + "loss": 2.0607, + "step": 18918 + }, + { + "epoch": 0.63, + "grad_norm": 0.7229472398757935, + "learning_rate": 6.226378649090217e-06, + "loss": 2.0593, + "step": 18919 + }, + { + "epoch": 0.63, + "grad_norm": 0.7313787937164307, + "learning_rate": 6.2253943616371584e-06, + "loss": 2.0725, + "step": 18920 + }, + { + "epoch": 0.63, + "grad_norm": 0.7334299087524414, + "learning_rate": 6.224410116827384e-06, + "loss": 2.0663, + "step": 18921 + }, + { + "epoch": 0.63, + "grad_norm": 0.7316075563430786, + "learning_rate": 6.223425914672012e-06, + "loss": 2.0255, + "step": 18922 + }, + { + "epoch": 0.63, + "grad_norm": 0.7424073219299316, + "learning_rate": 6.222441755182163e-06, + "loss": 2.0498, + "step": 18923 + }, + { + "epoch": 0.63, + "grad_norm": 0.7573325037956238, + "learning_rate": 6.221457638368951e-06, + "loss": 2.0554, + "step": 18924 + }, + { + "epoch": 0.63, + "grad_norm": 0.7465238571166992, + "learning_rate": 6.220473564243495e-06, + "loss": 1.9506, + "step": 18925 + }, + { + "epoch": 0.63, + "grad_norm": 0.7241690158843994, + "learning_rate": 6.2194895328169125e-06, + "loss": 1.9883, + "step": 18926 + }, + { + "epoch": 0.63, + "grad_norm": 0.778339684009552, + "learning_rate": 6.218505544100328e-06, + "loss": 2.072, + "step": 18927 + }, + { + "epoch": 0.63, + "grad_norm": 0.7653154730796814, + "learning_rate": 6.217521598104846e-06, + "loss": 2.0309, + "step": 18928 + }, + { + "epoch": 0.63, + "grad_norm": 0.7538443803787231, + "learning_rate": 6.216537694841589e-06, + "loss": 2.0668, + "step": 18929 + }, + { + "epoch": 0.63, + "grad_norm": 0.7608966827392578, + "learning_rate": 6.215553834321671e-06, + "loss": 2.0407, + "step": 18930 + }, + { + "epoch": 0.63, + "grad_norm": 0.7398580312728882, + "learning_rate": 6.214570016556207e-06, + "loss": 2.0948, + "step": 18931 + }, + { + "epoch": 0.63, + "grad_norm": 0.7513548135757446, + "learning_rate": 6.21358624155632e-06, + "loss": 2.0335, + "step": 18932 + }, + { + "epoch": 0.63, + "grad_norm": 0.7951690554618835, + "learning_rate": 6.21260250933311e-06, + "loss": 2.018, + "step": 18933 + }, + { + "epoch": 0.63, + "grad_norm": 0.8567488789558411, + "learning_rate": 6.211618819897697e-06, + "loss": 2.0214, + "step": 18934 + }, + { + "epoch": 0.63, + "grad_norm": 0.7366189360618591, + "learning_rate": 6.210635173261196e-06, + "loss": 2.0782, + "step": 18935 + }, + { + "epoch": 0.63, + "grad_norm": 0.7258440256118774, + "learning_rate": 6.20965156943472e-06, + "loss": 2.08, + "step": 18936 + }, + { + "epoch": 0.63, + "grad_norm": 0.7189493179321289, + "learning_rate": 6.2086680084293775e-06, + "loss": 2.0772, + "step": 18937 + }, + { + "epoch": 0.63, + "grad_norm": 0.8011818528175354, + "learning_rate": 6.20768449025628e-06, + "loss": 2.0059, + "step": 18938 + }, + { + "epoch": 0.63, + "grad_norm": 0.7659844160079956, + "learning_rate": 6.20670101492654e-06, + "loss": 2.0732, + "step": 18939 + }, + { + "epoch": 0.63, + "grad_norm": 0.7397416830062866, + "learning_rate": 6.2057175824512765e-06, + "loss": 2.1713, + "step": 18940 + }, + { + "epoch": 0.63, + "grad_norm": 0.75091153383255, + "learning_rate": 6.204734192841586e-06, + "loss": 2.0287, + "step": 18941 + }, + { + "epoch": 0.63, + "grad_norm": 0.770153284072876, + "learning_rate": 6.2037508461085824e-06, + "loss": 2.0134, + "step": 18942 + }, + { + "epoch": 0.63, + "grad_norm": 0.743701696395874, + "learning_rate": 6.202767542263379e-06, + "loss": 2.0143, + "step": 18943 + }, + { + "epoch": 0.63, + "grad_norm": 0.771472156047821, + "learning_rate": 6.201784281317082e-06, + "loss": 2.0456, + "step": 18944 + }, + { + "epoch": 0.63, + "grad_norm": 0.7537869811058044, + "learning_rate": 6.200801063280803e-06, + "loss": 2.1383, + "step": 18945 + }, + { + "epoch": 0.63, + "grad_norm": 0.732533872127533, + "learning_rate": 6.199817888165647e-06, + "loss": 2.0567, + "step": 18946 + }, + { + "epoch": 0.63, + "grad_norm": 0.7311195135116577, + "learning_rate": 6.1988347559827164e-06, + "loss": 2.0848, + "step": 18947 + }, + { + "epoch": 0.63, + "grad_norm": 0.7823708057403564, + "learning_rate": 6.197851666743127e-06, + "loss": 2.0828, + "step": 18948 + }, + { + "epoch": 0.63, + "grad_norm": 0.7525556087493896, + "learning_rate": 6.196868620457983e-06, + "loss": 2.0616, + "step": 18949 + }, + { + "epoch": 0.63, + "grad_norm": 0.7519344687461853, + "learning_rate": 6.195885617138384e-06, + "loss": 2.0798, + "step": 18950 + }, + { + "epoch": 0.63, + "grad_norm": 0.7866621613502502, + "learning_rate": 6.194902656795442e-06, + "loss": 2.0851, + "step": 18951 + }, + { + "epoch": 0.63, + "grad_norm": 0.7650327086448669, + "learning_rate": 6.19391973944026e-06, + "loss": 2.0297, + "step": 18952 + }, + { + "epoch": 0.63, + "grad_norm": 0.728334367275238, + "learning_rate": 6.192936865083938e-06, + "loss": 1.9922, + "step": 18953 + }, + { + "epoch": 0.63, + "grad_norm": 0.7529034614562988, + "learning_rate": 6.191954033737593e-06, + "loss": 2.0684, + "step": 18954 + }, + { + "epoch": 0.63, + "grad_norm": 0.7680163383483887, + "learning_rate": 6.190971245412311e-06, + "loss": 2.0423, + "step": 18955 + }, + { + "epoch": 0.63, + "grad_norm": 0.7563573718070984, + "learning_rate": 6.189988500119208e-06, + "loss": 1.9834, + "step": 18956 + }, + { + "epoch": 0.63, + "grad_norm": 0.7373895049095154, + "learning_rate": 6.18900579786938e-06, + "loss": 2.0874, + "step": 18957 + }, + { + "epoch": 0.63, + "grad_norm": 0.742694616317749, + "learning_rate": 6.188023138673936e-06, + "loss": 2.1133, + "step": 18958 + }, + { + "epoch": 0.63, + "grad_norm": 0.7468710541725159, + "learning_rate": 6.1870405225439676e-06, + "loss": 1.9955, + "step": 18959 + }, + { + "epoch": 0.63, + "grad_norm": 0.7460325360298157, + "learning_rate": 6.186057949490582e-06, + "loss": 2.0938, + "step": 18960 + }, + { + "epoch": 0.63, + "grad_norm": 0.7731943726539612, + "learning_rate": 6.185075419524875e-06, + "loss": 2.0157, + "step": 18961 + }, + { + "epoch": 0.63, + "grad_norm": 0.7375780940055847, + "learning_rate": 6.184092932657955e-06, + "loss": 2.1012, + "step": 18962 + }, + { + "epoch": 0.63, + "grad_norm": 0.757139265537262, + "learning_rate": 6.183110488900916e-06, + "loss": 2.0576, + "step": 18963 + }, + { + "epoch": 0.63, + "grad_norm": 0.76951003074646, + "learning_rate": 6.182128088264856e-06, + "loss": 2.0523, + "step": 18964 + }, + { + "epoch": 0.63, + "grad_norm": 0.758908748626709, + "learning_rate": 6.181145730760877e-06, + "loss": 2.1013, + "step": 18965 + }, + { + "epoch": 0.63, + "grad_norm": 0.7553361654281616, + "learning_rate": 6.180163416400075e-06, + "loss": 1.9999, + "step": 18966 + }, + { + "epoch": 0.63, + "grad_norm": 0.7436027526855469, + "learning_rate": 6.179181145193549e-06, + "loss": 2.045, + "step": 18967 + }, + { + "epoch": 0.63, + "grad_norm": 0.7504845261573792, + "learning_rate": 6.178198917152394e-06, + "loss": 2.0902, + "step": 18968 + }, + { + "epoch": 0.63, + "grad_norm": 0.7410982847213745, + "learning_rate": 6.177216732287705e-06, + "loss": 2.143, + "step": 18969 + }, + { + "epoch": 0.63, + "grad_norm": 0.7179784774780273, + "learning_rate": 6.176234590610585e-06, + "loss": 2.0389, + "step": 18970 + }, + { + "epoch": 0.63, + "grad_norm": 0.7723079919815063, + "learning_rate": 6.175252492132127e-06, + "loss": 2.1084, + "step": 18971 + }, + { + "epoch": 0.63, + "grad_norm": 0.7151641845703125, + "learning_rate": 6.174270436863421e-06, + "loss": 2.0772, + "step": 18972 + }, + { + "epoch": 0.63, + "grad_norm": 0.7439836859703064, + "learning_rate": 6.173288424815567e-06, + "loss": 2.0842, + "step": 18973 + }, + { + "epoch": 0.63, + "grad_norm": 0.7648043632507324, + "learning_rate": 6.172306455999657e-06, + "loss": 2.0742, + "step": 18974 + }, + { + "epoch": 0.63, + "grad_norm": 0.792762279510498, + "learning_rate": 6.171324530426785e-06, + "loss": 2.0964, + "step": 18975 + }, + { + "epoch": 0.63, + "grad_norm": 0.762012243270874, + "learning_rate": 6.170342648108049e-06, + "loss": 2.1139, + "step": 18976 + }, + { + "epoch": 0.63, + "grad_norm": 0.749306857585907, + "learning_rate": 6.169360809054532e-06, + "loss": 2.0385, + "step": 18977 + }, + { + "epoch": 0.63, + "grad_norm": 0.7499094009399414, + "learning_rate": 6.168379013277332e-06, + "loss": 2.0402, + "step": 18978 + }, + { + "epoch": 0.63, + "grad_norm": 0.7303341627120972, + "learning_rate": 6.167397260787542e-06, + "loss": 1.9865, + "step": 18979 + }, + { + "epoch": 0.63, + "grad_norm": 0.7549657821655273, + "learning_rate": 6.166415551596255e-06, + "loss": 2.0686, + "step": 18980 + }, + { + "epoch": 0.63, + "grad_norm": 0.7460115551948547, + "learning_rate": 6.165433885714554e-06, + "loss": 2.0521, + "step": 18981 + }, + { + "epoch": 0.63, + "grad_norm": 0.7544904351234436, + "learning_rate": 6.164452263153534e-06, + "loss": 2.0351, + "step": 18982 + }, + { + "epoch": 0.63, + "grad_norm": 0.7404347062110901, + "learning_rate": 6.163470683924283e-06, + "loss": 2.0467, + "step": 18983 + }, + { + "epoch": 0.63, + "grad_norm": 0.7850483655929565, + "learning_rate": 6.162489148037894e-06, + "loss": 2.056, + "step": 18984 + }, + { + "epoch": 0.63, + "grad_norm": 0.7432218790054321, + "learning_rate": 6.161507655505456e-06, + "loss": 2.0172, + "step": 18985 + }, + { + "epoch": 0.63, + "grad_norm": 0.7372378706932068, + "learning_rate": 6.160526206338051e-06, + "loss": 2.0648, + "step": 18986 + }, + { + "epoch": 0.63, + "grad_norm": 0.7359632849693298, + "learning_rate": 6.159544800546772e-06, + "loss": 2.0578, + "step": 18987 + }, + { + "epoch": 0.63, + "grad_norm": 1.9628517627716064, + "learning_rate": 6.158563438142706e-06, + "loss": 2.1338, + "step": 18988 + }, + { + "epoch": 0.63, + "grad_norm": 0.7353386282920837, + "learning_rate": 6.15758211913694e-06, + "loss": 2.1106, + "step": 18989 + }, + { + "epoch": 0.63, + "grad_norm": 0.7633206844329834, + "learning_rate": 6.156600843540558e-06, + "loss": 2.0105, + "step": 18990 + }, + { + "epoch": 0.63, + "grad_norm": 0.7370821237564087, + "learning_rate": 6.155619611364645e-06, + "loss": 2.0696, + "step": 18991 + }, + { + "epoch": 0.63, + "grad_norm": 0.7421459555625916, + "learning_rate": 6.154638422620289e-06, + "loss": 2.0277, + "step": 18992 + }, + { + "epoch": 0.63, + "grad_norm": 0.7444862127304077, + "learning_rate": 6.153657277318577e-06, + "loss": 2.1075, + "step": 18993 + }, + { + "epoch": 0.63, + "grad_norm": 0.7598435878753662, + "learning_rate": 6.152676175470591e-06, + "loss": 2.0886, + "step": 18994 + }, + { + "epoch": 0.63, + "grad_norm": 0.7477863430976868, + "learning_rate": 6.151695117087413e-06, + "loss": 2.0416, + "step": 18995 + }, + { + "epoch": 0.63, + "grad_norm": 0.7621042728424072, + "learning_rate": 6.150714102180128e-06, + "loss": 2.0405, + "step": 18996 + }, + { + "epoch": 0.63, + "grad_norm": 0.7530084848403931, + "learning_rate": 6.1497331307598185e-06, + "loss": 2.0606, + "step": 18997 + }, + { + "epoch": 0.63, + "grad_norm": 0.7559533715248108, + "learning_rate": 6.148752202837574e-06, + "loss": 2.0701, + "step": 18998 + }, + { + "epoch": 0.63, + "grad_norm": 0.7394441962242126, + "learning_rate": 6.147771318424463e-06, + "loss": 2.0065, + "step": 18999 + }, + { + "epoch": 0.63, + "grad_norm": 0.7441007494926453, + "learning_rate": 6.146790477531577e-06, + "loss": 2.163, + "step": 19000 + }, + { + "epoch": 0.63, + "grad_norm": 0.733950674533844, + "learning_rate": 6.145809680169992e-06, + "loss": 1.9913, + "step": 19001 + }, + { + "epoch": 0.63, + "grad_norm": 0.7372605204582214, + "learning_rate": 6.144828926350795e-06, + "loss": 2.0893, + "step": 19002 + }, + { + "epoch": 0.63, + "grad_norm": 0.7606984376907349, + "learning_rate": 6.1438482160850564e-06, + "loss": 2.001, + "step": 19003 + }, + { + "epoch": 0.63, + "grad_norm": 0.7368562817573547, + "learning_rate": 6.142867549383862e-06, + "loss": 2.0692, + "step": 19004 + }, + { + "epoch": 0.63, + "grad_norm": 0.7467755079269409, + "learning_rate": 6.141886926258288e-06, + "loss": 2.0911, + "step": 19005 + }, + { + "epoch": 0.63, + "grad_norm": 0.7457506060600281, + "learning_rate": 6.140906346719417e-06, + "loss": 1.9745, + "step": 19006 + }, + { + "epoch": 0.63, + "grad_norm": 0.7633259892463684, + "learning_rate": 6.1399258107783255e-06, + "loss": 2.0657, + "step": 19007 + }, + { + "epoch": 0.63, + "grad_norm": 0.7399932146072388, + "learning_rate": 6.138945318446088e-06, + "loss": 2.0671, + "step": 19008 + }, + { + "epoch": 0.63, + "grad_norm": 0.7649640440940857, + "learning_rate": 6.137964869733784e-06, + "loss": 2.0461, + "step": 19009 + }, + { + "epoch": 0.63, + "grad_norm": 0.7500134110450745, + "learning_rate": 6.136984464652489e-06, + "loss": 2.0212, + "step": 19010 + }, + { + "epoch": 0.63, + "grad_norm": 0.7268110513687134, + "learning_rate": 6.136004103213282e-06, + "loss": 2.0502, + "step": 19011 + }, + { + "epoch": 0.63, + "grad_norm": 0.7325233221054077, + "learning_rate": 6.135023785427234e-06, + "loss": 2.0784, + "step": 19012 + }, + { + "epoch": 0.63, + "grad_norm": 0.7395865321159363, + "learning_rate": 6.13404351130542e-06, + "loss": 2.0843, + "step": 19013 + }, + { + "epoch": 0.63, + "grad_norm": 0.7059459686279297, + "learning_rate": 6.133063280858919e-06, + "loss": 1.9953, + "step": 19014 + }, + { + "epoch": 0.63, + "grad_norm": 0.7485175728797913, + "learning_rate": 6.132083094098802e-06, + "loss": 2.0188, + "step": 19015 + }, + { + "epoch": 0.63, + "grad_norm": 0.7527862787246704, + "learning_rate": 6.131102951036145e-06, + "loss": 2.0947, + "step": 19016 + }, + { + "epoch": 0.63, + "grad_norm": 0.7434383034706116, + "learning_rate": 6.130122851682019e-06, + "loss": 1.9987, + "step": 19017 + }, + { + "epoch": 0.63, + "grad_norm": 0.7599979043006897, + "learning_rate": 6.129142796047497e-06, + "loss": 2.0251, + "step": 19018 + }, + { + "epoch": 0.63, + "grad_norm": 0.7668964266777039, + "learning_rate": 6.128162784143649e-06, + "loss": 2.0119, + "step": 19019 + }, + { + "epoch": 0.63, + "grad_norm": 0.7716457843780518, + "learning_rate": 6.127182815981554e-06, + "loss": 2.0977, + "step": 19020 + }, + { + "epoch": 0.63, + "grad_norm": 0.7432906627655029, + "learning_rate": 6.126202891572273e-06, + "loss": 2.0335, + "step": 19021 + }, + { + "epoch": 0.63, + "grad_norm": 0.7379641532897949, + "learning_rate": 6.1252230109268815e-06, + "loss": 2.0739, + "step": 19022 + }, + { + "epoch": 0.63, + "grad_norm": 0.7216739058494568, + "learning_rate": 6.124243174056451e-06, + "loss": 2.0757, + "step": 19023 + }, + { + "epoch": 0.63, + "grad_norm": 0.7414604425430298, + "learning_rate": 6.123263380972047e-06, + "loss": 2.037, + "step": 19024 + }, + { + "epoch": 0.63, + "grad_norm": 0.7387256622314453, + "learning_rate": 6.122283631684749e-06, + "loss": 2.0829, + "step": 19025 + }, + { + "epoch": 0.63, + "grad_norm": 0.7434527277946472, + "learning_rate": 6.121303926205613e-06, + "loss": 2.0555, + "step": 19026 + }, + { + "epoch": 0.63, + "grad_norm": 0.7379152178764343, + "learning_rate": 6.12032426454571e-06, + "loss": 2.0671, + "step": 19027 + }, + { + "epoch": 0.63, + "grad_norm": 0.7796815037727356, + "learning_rate": 6.11934464671611e-06, + "loss": 2.0313, + "step": 19028 + }, + { + "epoch": 0.63, + "grad_norm": 0.7453703880310059, + "learning_rate": 6.118365072727884e-06, + "loss": 2.0718, + "step": 19029 + }, + { + "epoch": 0.63, + "grad_norm": 0.7363460659980774, + "learning_rate": 6.1173855425920925e-06, + "loss": 2.0882, + "step": 19030 + }, + { + "epoch": 0.63, + "grad_norm": 0.7074148654937744, + "learning_rate": 6.116406056319804e-06, + "loss": 1.9717, + "step": 19031 + }, + { + "epoch": 0.63, + "grad_norm": 0.7342532277107239, + "learning_rate": 6.11542661392208e-06, + "loss": 2.1006, + "step": 19032 + }, + { + "epoch": 0.63, + "grad_norm": 0.7515509128570557, + "learning_rate": 6.114447215409998e-06, + "loss": 2.0826, + "step": 19033 + }, + { + "epoch": 0.63, + "grad_norm": 0.7187480330467224, + "learning_rate": 6.113467860794608e-06, + "loss": 2.0339, + "step": 19034 + }, + { + "epoch": 0.63, + "grad_norm": 0.7254448533058167, + "learning_rate": 6.112488550086979e-06, + "loss": 2.1013, + "step": 19035 + }, + { + "epoch": 0.63, + "grad_norm": 0.7331321239471436, + "learning_rate": 6.111509283298178e-06, + "loss": 2.0714, + "step": 19036 + }, + { + "epoch": 0.63, + "grad_norm": 0.7561440467834473, + "learning_rate": 6.110530060439267e-06, + "loss": 2.1132, + "step": 19037 + }, + { + "epoch": 0.63, + "grad_norm": 0.7482367157936096, + "learning_rate": 6.109550881521309e-06, + "loss": 2.046, + "step": 19038 + }, + { + "epoch": 0.63, + "grad_norm": 0.7554906010627747, + "learning_rate": 6.108571746555364e-06, + "loss": 2.0119, + "step": 19039 + }, + { + "epoch": 0.63, + "grad_norm": 0.718529224395752, + "learning_rate": 6.107592655552492e-06, + "loss": 2.0967, + "step": 19040 + }, + { + "epoch": 0.63, + "grad_norm": 0.7599681615829468, + "learning_rate": 6.106613608523759e-06, + "loss": 2.0571, + "step": 19041 + }, + { + "epoch": 0.63, + "grad_norm": 0.7526300549507141, + "learning_rate": 6.105634605480228e-06, + "loss": 2.0204, + "step": 19042 + }, + { + "epoch": 0.63, + "grad_norm": 0.7577962875366211, + "learning_rate": 6.1046556464329495e-06, + "loss": 2.1025, + "step": 19043 + }, + { + "epoch": 0.63, + "grad_norm": 0.7504680156707764, + "learning_rate": 6.103676731392991e-06, + "loss": 2.0949, + "step": 19044 + }, + { + "epoch": 0.63, + "grad_norm": 0.7198368906974792, + "learning_rate": 6.102697860371407e-06, + "loss": 2.0802, + "step": 19045 + }, + { + "epoch": 0.63, + "grad_norm": 0.7264554500579834, + "learning_rate": 6.1017190333792584e-06, + "loss": 2.0167, + "step": 19046 + }, + { + "epoch": 0.63, + "grad_norm": 0.7941092848777771, + "learning_rate": 6.10074025042761e-06, + "loss": 2.0976, + "step": 19047 + }, + { + "epoch": 0.63, + "grad_norm": 0.7501634359359741, + "learning_rate": 6.099761511527505e-06, + "loss": 2.1237, + "step": 19048 + }, + { + "epoch": 0.63, + "grad_norm": 0.7681805491447449, + "learning_rate": 6.098782816690012e-06, + "loss": 2.0611, + "step": 19049 + }, + { + "epoch": 0.63, + "grad_norm": 0.767367422580719, + "learning_rate": 6.097804165926184e-06, + "loss": 1.9902, + "step": 19050 + }, + { + "epoch": 0.63, + "grad_norm": 0.7810367941856384, + "learning_rate": 6.09682555924708e-06, + "loss": 2.1044, + "step": 19051 + }, + { + "epoch": 0.63, + "grad_norm": 0.7504892945289612, + "learning_rate": 6.095846996663753e-06, + "loss": 2.0476, + "step": 19052 + }, + { + "epoch": 0.63, + "grad_norm": 0.7267982959747314, + "learning_rate": 6.094868478187256e-06, + "loss": 2.0273, + "step": 19053 + }, + { + "epoch": 0.63, + "grad_norm": 0.7418473362922668, + "learning_rate": 6.093890003828644e-06, + "loss": 2.0439, + "step": 19054 + }, + { + "epoch": 0.63, + "grad_norm": 0.7511504292488098, + "learning_rate": 6.092911573598981e-06, + "loss": 2.0498, + "step": 19055 + }, + { + "epoch": 0.63, + "grad_norm": 0.7115115523338318, + "learning_rate": 6.091933187509307e-06, + "loss": 1.9938, + "step": 19056 + }, + { + "epoch": 0.63, + "grad_norm": 0.7520797848701477, + "learning_rate": 6.0909548455706836e-06, + "loss": 2.1027, + "step": 19057 + }, + { + "epoch": 0.63, + "grad_norm": 0.7222529649734497, + "learning_rate": 6.089976547794161e-06, + "loss": 2.0253, + "step": 19058 + }, + { + "epoch": 0.63, + "grad_norm": 0.7306830286979675, + "learning_rate": 6.088998294190792e-06, + "loss": 1.9993, + "step": 19059 + }, + { + "epoch": 0.63, + "grad_norm": 0.7403666377067566, + "learning_rate": 6.0880200847716305e-06, + "loss": 2.0295, + "step": 19060 + }, + { + "epoch": 0.63, + "grad_norm": 0.7364788055419922, + "learning_rate": 6.0870419195477225e-06, + "loss": 2.1113, + "step": 19061 + }, + { + "epoch": 0.63, + "grad_norm": 0.7246177792549133, + "learning_rate": 6.086063798530122e-06, + "loss": 2.0668, + "step": 19062 + }, + { + "epoch": 0.63, + "grad_norm": 0.7360320091247559, + "learning_rate": 6.085085721729879e-06, + "loss": 2.0429, + "step": 19063 + }, + { + "epoch": 0.63, + "grad_norm": 0.730337381362915, + "learning_rate": 6.0841076891580455e-06, + "loss": 2.0821, + "step": 19064 + }, + { + "epoch": 0.63, + "grad_norm": 0.7329220175743103, + "learning_rate": 6.083129700825665e-06, + "loss": 2.0015, + "step": 19065 + }, + { + "epoch": 0.63, + "grad_norm": 0.7216154932975769, + "learning_rate": 6.082151756743791e-06, + "loss": 2.0675, + "step": 19066 + }, + { + "epoch": 0.63, + "grad_norm": 0.8051868081092834, + "learning_rate": 6.08117385692347e-06, + "loss": 2.0045, + "step": 19067 + }, + { + "epoch": 0.63, + "grad_norm": 0.7464811205863953, + "learning_rate": 6.080196001375749e-06, + "loss": 2.0358, + "step": 19068 + }, + { + "epoch": 0.63, + "grad_norm": 0.7313085198402405, + "learning_rate": 6.079218190111682e-06, + "loss": 1.9768, + "step": 19069 + }, + { + "epoch": 0.63, + "grad_norm": 0.7746431827545166, + "learning_rate": 6.078240423142304e-06, + "loss": 2.0817, + "step": 19070 + }, + { + "epoch": 0.63, + "grad_norm": 0.753528892993927, + "learning_rate": 6.07726270047867e-06, + "loss": 2.1245, + "step": 19071 + }, + { + "epoch": 0.63, + "grad_norm": 0.7479079961776733, + "learning_rate": 6.076285022131822e-06, + "loss": 2.0999, + "step": 19072 + }, + { + "epoch": 0.63, + "grad_norm": 0.7363954186439514, + "learning_rate": 6.075307388112808e-06, + "loss": 1.991, + "step": 19073 + }, + { + "epoch": 0.63, + "grad_norm": 0.7307406067848206, + "learning_rate": 6.07432979843267e-06, + "loss": 2.081, + "step": 19074 + }, + { + "epoch": 0.63, + "grad_norm": 0.727405846118927, + "learning_rate": 6.0733522531024525e-06, + "loss": 2.0917, + "step": 19075 + }, + { + "epoch": 0.63, + "grad_norm": 0.7711823582649231, + "learning_rate": 6.072374752133199e-06, + "loss": 2.1087, + "step": 19076 + }, + { + "epoch": 0.63, + "grad_norm": 0.7533994317054749, + "learning_rate": 6.071397295535954e-06, + "loss": 2.0788, + "step": 19077 + }, + { + "epoch": 0.63, + "grad_norm": 0.7498617768287659, + "learning_rate": 6.070419883321763e-06, + "loss": 2.0468, + "step": 19078 + }, + { + "epoch": 0.63, + "grad_norm": 0.7496045231819153, + "learning_rate": 6.069442515501665e-06, + "loss": 2.051, + "step": 19079 + }, + { + "epoch": 0.63, + "grad_norm": 0.7359864711761475, + "learning_rate": 6.0684651920866984e-06, + "loss": 2.0802, + "step": 19080 + }, + { + "epoch": 0.63, + "grad_norm": 0.735066294670105, + "learning_rate": 6.067487913087911e-06, + "loss": 2.064, + "step": 19081 + }, + { + "epoch": 0.63, + "grad_norm": 0.7362089157104492, + "learning_rate": 6.06651067851634e-06, + "loss": 2.0755, + "step": 19082 + }, + { + "epoch": 0.63, + "grad_norm": 0.7744904160499573, + "learning_rate": 6.065533488383027e-06, + "loss": 2.0828, + "step": 19083 + }, + { + "epoch": 0.63, + "grad_norm": 0.7433419823646545, + "learning_rate": 6.064556342699006e-06, + "loss": 2.0412, + "step": 19084 + }, + { + "epoch": 0.63, + "grad_norm": 0.7551929950714111, + "learning_rate": 6.063579241475324e-06, + "loss": 2.031, + "step": 19085 + }, + { + "epoch": 0.63, + "grad_norm": 0.7276280522346497, + "learning_rate": 6.0626021847230185e-06, + "loss": 2.0256, + "step": 19086 + }, + { + "epoch": 0.64, + "grad_norm": 0.7605628371238708, + "learning_rate": 6.061625172453125e-06, + "loss": 2.0164, + "step": 19087 + }, + { + "epoch": 0.64, + "grad_norm": 0.7388648390769958, + "learning_rate": 6.060648204676683e-06, + "loss": 2.0555, + "step": 19088 + }, + { + "epoch": 0.64, + "grad_norm": 0.7514597177505493, + "learning_rate": 6.059671281404727e-06, + "loss": 2.0265, + "step": 19089 + }, + { + "epoch": 0.64, + "grad_norm": 0.7534149885177612, + "learning_rate": 6.058694402648293e-06, + "loss": 1.9767, + "step": 19090 + }, + { + "epoch": 0.64, + "grad_norm": 0.7699351906776428, + "learning_rate": 6.0577175684184265e-06, + "loss": 2.0116, + "step": 19091 + }, + { + "epoch": 0.64, + "grad_norm": 0.7233486771583557, + "learning_rate": 6.0567407787261515e-06, + "loss": 2.0347, + "step": 19092 + }, + { + "epoch": 0.64, + "grad_norm": 0.7396078705787659, + "learning_rate": 6.05576403358251e-06, + "loss": 2.1008, + "step": 19093 + }, + { + "epoch": 0.64, + "grad_norm": 0.7538793683052063, + "learning_rate": 6.054787332998534e-06, + "loss": 2.0109, + "step": 19094 + }, + { + "epoch": 0.64, + "grad_norm": 0.7183260321617126, + "learning_rate": 6.053810676985261e-06, + "loss": 2.076, + "step": 19095 + }, + { + "epoch": 0.64, + "grad_norm": 0.7488780617713928, + "learning_rate": 6.052834065553721e-06, + "loss": 2.0722, + "step": 19096 + }, + { + "epoch": 0.64, + "grad_norm": 0.7384499907493591, + "learning_rate": 6.051857498714948e-06, + "loss": 2.0224, + "step": 19097 + }, + { + "epoch": 0.64, + "grad_norm": 0.7839840650558472, + "learning_rate": 6.0508809764799725e-06, + "loss": 2.083, + "step": 19098 + }, + { + "epoch": 0.64, + "grad_norm": 0.708393394947052, + "learning_rate": 6.049904498859831e-06, + "loss": 2.0376, + "step": 19099 + }, + { + "epoch": 0.64, + "grad_norm": 0.7500677108764648, + "learning_rate": 6.048928065865557e-06, + "loss": 2.0504, + "step": 19100 + }, + { + "epoch": 0.64, + "grad_norm": 0.7380995750427246, + "learning_rate": 6.047951677508175e-06, + "loss": 2.0271, + "step": 19101 + }, + { + "epoch": 0.64, + "grad_norm": 0.7628551125526428, + "learning_rate": 6.046975333798719e-06, + "loss": 2.0762, + "step": 19102 + }, + { + "epoch": 0.64, + "grad_norm": 0.7465739846229553, + "learning_rate": 6.04599903474822e-06, + "loss": 2.0173, + "step": 19103 + }, + { + "epoch": 0.64, + "grad_norm": 0.7347677946090698, + "learning_rate": 6.045022780367709e-06, + "loss": 1.9893, + "step": 19104 + }, + { + "epoch": 0.64, + "grad_norm": 0.7281553149223328, + "learning_rate": 6.044046570668209e-06, + "loss": 2.0831, + "step": 19105 + }, + { + "epoch": 0.64, + "grad_norm": 0.71649169921875, + "learning_rate": 6.043070405660752e-06, + "loss": 2.0719, + "step": 19106 + }, + { + "epoch": 0.64, + "grad_norm": 0.7366738319396973, + "learning_rate": 6.042094285356366e-06, + "loss": 2.0994, + "step": 19107 + }, + { + "epoch": 0.64, + "grad_norm": 0.745780348777771, + "learning_rate": 6.041118209766081e-06, + "loss": 2.0669, + "step": 19108 + }, + { + "epoch": 0.64, + "grad_norm": 0.7423575520515442, + "learning_rate": 6.040142178900925e-06, + "loss": 2.1502, + "step": 19109 + }, + { + "epoch": 0.64, + "grad_norm": 0.7108673453330994, + "learning_rate": 6.039166192771919e-06, + "loss": 2.0192, + "step": 19110 + }, + { + "epoch": 0.64, + "grad_norm": 0.7495326995849609, + "learning_rate": 6.038190251390093e-06, + "loss": 2.1305, + "step": 19111 + }, + { + "epoch": 0.64, + "grad_norm": 0.7389315366744995, + "learning_rate": 6.037214354766467e-06, + "loss": 1.9906, + "step": 19112 + }, + { + "epoch": 0.64, + "grad_norm": 0.7322598695755005, + "learning_rate": 6.036238502912079e-06, + "loss": 2.0824, + "step": 19113 + }, + { + "epoch": 0.64, + "grad_norm": 0.7282187342643738, + "learning_rate": 6.03526269583794e-06, + "loss": 2.0722, + "step": 19114 + }, + { + "epoch": 0.64, + "grad_norm": 0.7238509058952332, + "learning_rate": 6.03428693355508e-06, + "loss": 2.0217, + "step": 19115 + }, + { + "epoch": 0.64, + "grad_norm": 0.7318863868713379, + "learning_rate": 6.033311216074522e-06, + "loss": 2.1182, + "step": 19116 + }, + { + "epoch": 0.64, + "grad_norm": 0.7685198783874512, + "learning_rate": 6.032335543407293e-06, + "loss": 2.1149, + "step": 19117 + }, + { + "epoch": 0.64, + "grad_norm": 0.7499120235443115, + "learning_rate": 6.0313599155644085e-06, + "loss": 2.0433, + "step": 19118 + }, + { + "epoch": 0.64, + "grad_norm": 0.7354946136474609, + "learning_rate": 6.030384332556893e-06, + "loss": 2.019, + "step": 19119 + }, + { + "epoch": 0.64, + "grad_norm": 0.7552575469017029, + "learning_rate": 6.029408794395769e-06, + "loss": 2.0688, + "step": 19120 + }, + { + "epoch": 0.64, + "grad_norm": 0.716498851776123, + "learning_rate": 6.028433301092058e-06, + "loss": 2.0253, + "step": 19121 + }, + { + "epoch": 0.64, + "grad_norm": 0.7641031742095947, + "learning_rate": 6.027457852656782e-06, + "loss": 2.0042, + "step": 19122 + }, + { + "epoch": 0.64, + "grad_norm": 0.7754260897636414, + "learning_rate": 6.026482449100958e-06, + "loss": 2.0328, + "step": 19123 + }, + { + "epoch": 0.64, + "grad_norm": 0.7616274952888489, + "learning_rate": 6.025507090435605e-06, + "loss": 2.0367, + "step": 19124 + }, + { + "epoch": 0.64, + "grad_norm": 0.7371190786361694, + "learning_rate": 6.024531776671743e-06, + "loss": 2.0127, + "step": 19125 + }, + { + "epoch": 0.64, + "grad_norm": 0.732056736946106, + "learning_rate": 6.023556507820396e-06, + "loss": 2.0277, + "step": 19126 + }, + { + "epoch": 0.64, + "grad_norm": 0.7304233908653259, + "learning_rate": 6.022581283892574e-06, + "loss": 2.0299, + "step": 19127 + }, + { + "epoch": 0.64, + "grad_norm": 0.7453927397727966, + "learning_rate": 6.021606104899296e-06, + "loss": 1.9987, + "step": 19128 + }, + { + "epoch": 0.64, + "grad_norm": 0.7373469471931458, + "learning_rate": 6.020630970851582e-06, + "loss": 2.0557, + "step": 19129 + }, + { + "epoch": 0.64, + "grad_norm": 0.7404829263687134, + "learning_rate": 6.019655881760448e-06, + "loss": 2.0833, + "step": 19130 + }, + { + "epoch": 0.64, + "grad_norm": 0.7484673261642456, + "learning_rate": 6.01868083763691e-06, + "loss": 2.0619, + "step": 19131 + }, + { + "epoch": 0.64, + "grad_norm": 0.726915717124939, + "learning_rate": 6.017705838491981e-06, + "loss": 2.0662, + "step": 19132 + }, + { + "epoch": 0.64, + "grad_norm": 0.7894306778907776, + "learning_rate": 6.016730884336675e-06, + "loss": 2.084, + "step": 19133 + }, + { + "epoch": 0.64, + "grad_norm": 0.7407098412513733, + "learning_rate": 6.0157559751820114e-06, + "loss": 1.9999, + "step": 19134 + }, + { + "epoch": 0.64, + "grad_norm": 0.724006712436676, + "learning_rate": 6.014781111039008e-06, + "loss": 2.0273, + "step": 19135 + }, + { + "epoch": 0.64, + "grad_norm": 0.7340962886810303, + "learning_rate": 6.013806291918662e-06, + "loss": 2.0534, + "step": 19136 + }, + { + "epoch": 0.64, + "grad_norm": 0.7371348738670349, + "learning_rate": 6.012831517832002e-06, + "loss": 2.0994, + "step": 19137 + }, + { + "epoch": 0.64, + "grad_norm": 0.708812952041626, + "learning_rate": 6.011856788790034e-06, + "loss": 2.0436, + "step": 19138 + }, + { + "epoch": 0.64, + "grad_norm": 0.7353003025054932, + "learning_rate": 6.010882104803767e-06, + "loss": 2.0646, + "step": 19139 + }, + { + "epoch": 0.64, + "grad_norm": 0.7479092478752136, + "learning_rate": 6.009907465884223e-06, + "loss": 2.0727, + "step": 19140 + }, + { + "epoch": 0.64, + "grad_norm": 0.7548202872276306, + "learning_rate": 6.0089328720424e-06, + "loss": 2.0404, + "step": 19141 + }, + { + "epoch": 0.64, + "grad_norm": 0.7340999245643616, + "learning_rate": 6.007958323289316e-06, + "loss": 2.1217, + "step": 19142 + }, + { + "epoch": 0.64, + "grad_norm": 0.7164263129234314, + "learning_rate": 6.006983819635981e-06, + "loss": 1.9961, + "step": 19143 + }, + { + "epoch": 0.64, + "grad_norm": 0.7564786672592163, + "learning_rate": 6.006009361093403e-06, + "loss": 2.132, + "step": 19144 + }, + { + "epoch": 0.64, + "grad_norm": 0.736319363117218, + "learning_rate": 6.005034947672589e-06, + "loss": 1.9355, + "step": 19145 + }, + { + "epoch": 0.64, + "grad_norm": 0.732265830039978, + "learning_rate": 6.00406057938455e-06, + "loss": 2.0716, + "step": 19146 + }, + { + "epoch": 0.64, + "grad_norm": 0.7521505355834961, + "learning_rate": 6.003086256240289e-06, + "loss": 2.1018, + "step": 19147 + }, + { + "epoch": 0.64, + "grad_norm": 0.7507603168487549, + "learning_rate": 6.002111978250824e-06, + "loss": 2.0444, + "step": 19148 + }, + { + "epoch": 0.64, + "grad_norm": 0.7531901597976685, + "learning_rate": 6.001137745427148e-06, + "loss": 2.0279, + "step": 19149 + }, + { + "epoch": 0.64, + "grad_norm": 0.7980210185050964, + "learning_rate": 6.000163557780276e-06, + "loss": 2.0461, + "step": 19150 + }, + { + "epoch": 0.64, + "grad_norm": 0.7654465436935425, + "learning_rate": 5.999189415321214e-06, + "loss": 2.0668, + "step": 19151 + }, + { + "epoch": 0.64, + "grad_norm": 0.760086715221405, + "learning_rate": 5.998215318060964e-06, + "loss": 2.0511, + "step": 19152 + }, + { + "epoch": 0.64, + "grad_norm": 0.734534502029419, + "learning_rate": 5.997241266010534e-06, + "loss": 2.1294, + "step": 19153 + }, + { + "epoch": 0.64, + "grad_norm": 0.7488111853599548, + "learning_rate": 5.996267259180923e-06, + "loss": 2.039, + "step": 19154 + }, + { + "epoch": 0.64, + "grad_norm": 0.7419861555099487, + "learning_rate": 5.995293297583138e-06, + "loss": 2.0779, + "step": 19155 + }, + { + "epoch": 0.64, + "grad_norm": 0.7214480042457581, + "learning_rate": 5.994319381228184e-06, + "loss": 1.9984, + "step": 19156 + }, + { + "epoch": 0.64, + "grad_norm": 0.7586864233016968, + "learning_rate": 5.993345510127064e-06, + "loss": 2.1168, + "step": 19157 + }, + { + "epoch": 0.64, + "grad_norm": 0.7587993741035461, + "learning_rate": 5.992371684290776e-06, + "loss": 2.05, + "step": 19158 + }, + { + "epoch": 0.64, + "grad_norm": 0.7237886190414429, + "learning_rate": 5.991397903730325e-06, + "loss": 1.9867, + "step": 19159 + }, + { + "epoch": 0.64, + "grad_norm": 0.7438483238220215, + "learning_rate": 5.99042416845671e-06, + "loss": 2.0314, + "step": 19160 + }, + { + "epoch": 0.64, + "grad_norm": 0.7289181351661682, + "learning_rate": 5.989450478480932e-06, + "loss": 2.0579, + "step": 19161 + }, + { + "epoch": 0.64, + "grad_norm": 0.7463656663894653, + "learning_rate": 5.988476833813997e-06, + "loss": 2.0208, + "step": 19162 + }, + { + "epoch": 0.64, + "grad_norm": 0.7410986423492432, + "learning_rate": 5.987503234466893e-06, + "loss": 2.0256, + "step": 19163 + }, + { + "epoch": 0.64, + "grad_norm": 0.7611365914344788, + "learning_rate": 5.98652968045063e-06, + "loss": 2.0598, + "step": 19164 + }, + { + "epoch": 0.64, + "grad_norm": 0.7491375207901001, + "learning_rate": 5.985556171776203e-06, + "loss": 2.037, + "step": 19165 + }, + { + "epoch": 0.64, + "grad_norm": 0.7455026507377625, + "learning_rate": 5.984582708454609e-06, + "loss": 2.0055, + "step": 19166 + }, + { + "epoch": 0.64, + "grad_norm": 0.7361711263656616, + "learning_rate": 5.983609290496846e-06, + "loss": 2.06, + "step": 19167 + }, + { + "epoch": 0.64, + "grad_norm": 0.7529285550117493, + "learning_rate": 5.98263591791391e-06, + "loss": 2.0599, + "step": 19168 + }, + { + "epoch": 0.64, + "grad_norm": 0.7700035572052002, + "learning_rate": 5.981662590716799e-06, + "loss": 2.0836, + "step": 19169 + }, + { + "epoch": 0.64, + "grad_norm": 0.7223185896873474, + "learning_rate": 5.98068930891651e-06, + "loss": 2.0884, + "step": 19170 + }, + { + "epoch": 0.64, + "grad_norm": 0.7457024455070496, + "learning_rate": 5.97971607252404e-06, + "loss": 2.036, + "step": 19171 + }, + { + "epoch": 0.64, + "grad_norm": 0.7347481846809387, + "learning_rate": 5.978742881550379e-06, + "loss": 2.0752, + "step": 19172 + }, + { + "epoch": 0.64, + "grad_norm": 0.7358304858207703, + "learning_rate": 5.977769736006525e-06, + "loss": 2.2, + "step": 19173 + }, + { + "epoch": 0.64, + "grad_norm": 0.7456408739089966, + "learning_rate": 5.97679663590347e-06, + "loss": 2.032, + "step": 19174 + }, + { + "epoch": 0.64, + "grad_norm": 0.7679852843284607, + "learning_rate": 5.975823581252213e-06, + "loss": 1.995, + "step": 19175 + }, + { + "epoch": 0.64, + "grad_norm": 0.7573293447494507, + "learning_rate": 5.974850572063739e-06, + "loss": 2.0841, + "step": 19176 + }, + { + "epoch": 0.64, + "grad_norm": 0.7442545294761658, + "learning_rate": 5.973877608349043e-06, + "loss": 2.0245, + "step": 19177 + }, + { + "epoch": 0.64, + "grad_norm": 0.7365491390228271, + "learning_rate": 5.972904690119119e-06, + "loss": 2.0924, + "step": 19178 + }, + { + "epoch": 0.64, + "grad_norm": 0.750670850276947, + "learning_rate": 5.971931817384961e-06, + "loss": 2.0752, + "step": 19179 + }, + { + "epoch": 0.64, + "grad_norm": 0.7695629596710205, + "learning_rate": 5.970958990157555e-06, + "loss": 2.1139, + "step": 19180 + }, + { + "epoch": 0.64, + "grad_norm": 0.7435917854309082, + "learning_rate": 5.969986208447892e-06, + "loss": 2.1091, + "step": 19181 + }, + { + "epoch": 0.64, + "grad_norm": 0.7457911968231201, + "learning_rate": 5.969013472266963e-06, + "loss": 2.1064, + "step": 19182 + }, + { + "epoch": 0.64, + "grad_norm": 0.7086613178253174, + "learning_rate": 5.9680407816257565e-06, + "loss": 2.0946, + "step": 19183 + }, + { + "epoch": 0.64, + "grad_norm": 0.734380841255188, + "learning_rate": 5.967068136535268e-06, + "loss": 2.0304, + "step": 19184 + }, + { + "epoch": 0.64, + "grad_norm": 0.7453035116195679, + "learning_rate": 5.966095537006474e-06, + "loss": 2.0942, + "step": 19185 + }, + { + "epoch": 0.64, + "grad_norm": 0.6994192600250244, + "learning_rate": 5.965122983050369e-06, + "loss": 2.0625, + "step": 19186 + }, + { + "epoch": 0.64, + "grad_norm": 0.7254350781440735, + "learning_rate": 5.964150474677941e-06, + "loss": 2.048, + "step": 19187 + }, + { + "epoch": 0.64, + "grad_norm": 0.7239168286323547, + "learning_rate": 5.963178011900179e-06, + "loss": 2.0109, + "step": 19188 + }, + { + "epoch": 0.64, + "grad_norm": 0.7233615517616272, + "learning_rate": 5.962205594728062e-06, + "loss": 1.9992, + "step": 19189 + }, + { + "epoch": 0.64, + "grad_norm": 0.7357884645462036, + "learning_rate": 5.961233223172581e-06, + "loss": 2.0629, + "step": 19190 + }, + { + "epoch": 0.64, + "grad_norm": 0.7586014270782471, + "learning_rate": 5.960260897244718e-06, + "loss": 2.0305, + "step": 19191 + }, + { + "epoch": 0.64, + "grad_norm": 0.7470717430114746, + "learning_rate": 5.959288616955461e-06, + "loss": 2.0738, + "step": 19192 + }, + { + "epoch": 0.64, + "grad_norm": 0.7382586598396301, + "learning_rate": 5.9583163823157964e-06, + "loss": 2.0621, + "step": 19193 + }, + { + "epoch": 0.64, + "grad_norm": 0.7709409594535828, + "learning_rate": 5.957344193336702e-06, + "loss": 2.1101, + "step": 19194 + }, + { + "epoch": 0.64, + "grad_norm": 0.7249276638031006, + "learning_rate": 5.956372050029164e-06, + "loss": 2.0866, + "step": 19195 + }, + { + "epoch": 0.64, + "grad_norm": 0.7140607237815857, + "learning_rate": 5.955399952404164e-06, + "loss": 2.0681, + "step": 19196 + }, + { + "epoch": 0.64, + "grad_norm": 0.7380946278572083, + "learning_rate": 5.954427900472688e-06, + "loss": 2.0458, + "step": 19197 + }, + { + "epoch": 0.64, + "grad_norm": 0.7358243465423584, + "learning_rate": 5.9534558942457125e-06, + "loss": 2.0682, + "step": 19198 + }, + { + "epoch": 0.64, + "grad_norm": 0.7433512806892395, + "learning_rate": 5.952483933734217e-06, + "loss": 2.1639, + "step": 19199 + }, + { + "epoch": 0.64, + "grad_norm": 0.782249927520752, + "learning_rate": 5.951512018949189e-06, + "loss": 2.039, + "step": 19200 + }, + { + "epoch": 0.64, + "grad_norm": 0.7400708794593811, + "learning_rate": 5.950540149901609e-06, + "loss": 2.124, + "step": 19201 + }, + { + "epoch": 0.64, + "grad_norm": 0.7516607046127319, + "learning_rate": 5.94956832660245e-06, + "loss": 2.039, + "step": 19202 + }, + { + "epoch": 0.64, + "grad_norm": 0.7418254017829895, + "learning_rate": 5.948596549062693e-06, + "loss": 2.1014, + "step": 19203 + }, + { + "epoch": 0.64, + "grad_norm": 0.7593517899513245, + "learning_rate": 5.94762481729332e-06, + "loss": 2.087, + "step": 19204 + }, + { + "epoch": 0.64, + "grad_norm": 0.7435586452484131, + "learning_rate": 5.946653131305304e-06, + "loss": 2.0988, + "step": 19205 + }, + { + "epoch": 0.64, + "grad_norm": 0.7288510203361511, + "learning_rate": 5.945681491109632e-06, + "loss": 2.0964, + "step": 19206 + }, + { + "epoch": 0.64, + "grad_norm": 0.7469578385353088, + "learning_rate": 5.944709896717267e-06, + "loss": 2.0151, + "step": 19207 + }, + { + "epoch": 0.64, + "grad_norm": 0.7223272323608398, + "learning_rate": 5.9437383481391965e-06, + "loss": 2.0502, + "step": 19208 + }, + { + "epoch": 0.64, + "grad_norm": 0.7579313516616821, + "learning_rate": 5.942766845386392e-06, + "loss": 2.0254, + "step": 19209 + }, + { + "epoch": 0.64, + "grad_norm": 0.7483022212982178, + "learning_rate": 5.9417953884698325e-06, + "loss": 2.1108, + "step": 19210 + }, + { + "epoch": 0.64, + "grad_norm": 0.7677231431007385, + "learning_rate": 5.940823977400487e-06, + "loss": 2.1057, + "step": 19211 + }, + { + "epoch": 0.64, + "grad_norm": 0.8006629943847656, + "learning_rate": 5.939852612189334e-06, + "loss": 2.0114, + "step": 19212 + }, + { + "epoch": 0.64, + "grad_norm": 0.7508304119110107, + "learning_rate": 5.938881292847346e-06, + "loss": 2.1146, + "step": 19213 + }, + { + "epoch": 0.64, + "grad_norm": 0.7391867637634277, + "learning_rate": 5.937910019385498e-06, + "loss": 2.0628, + "step": 19214 + }, + { + "epoch": 0.64, + "grad_norm": 0.7518301010131836, + "learning_rate": 5.936938791814764e-06, + "loss": 2.055, + "step": 19215 + }, + { + "epoch": 0.64, + "grad_norm": 0.7381339073181152, + "learning_rate": 5.935967610146113e-06, + "loss": 2.0662, + "step": 19216 + }, + { + "epoch": 0.64, + "grad_norm": 0.7265461087226868, + "learning_rate": 5.934996474390517e-06, + "loss": 2.0036, + "step": 19217 + }, + { + "epoch": 0.64, + "grad_norm": 0.7341548800468445, + "learning_rate": 5.93402538455895e-06, + "loss": 2.033, + "step": 19218 + }, + { + "epoch": 0.64, + "grad_norm": 0.7477364540100098, + "learning_rate": 5.933054340662382e-06, + "loss": 2.061, + "step": 19219 + }, + { + "epoch": 0.64, + "grad_norm": 0.7255041599273682, + "learning_rate": 5.932083342711781e-06, + "loss": 2.0635, + "step": 19220 + }, + { + "epoch": 0.64, + "grad_norm": 0.7433214783668518, + "learning_rate": 5.931112390718116e-06, + "loss": 2.0445, + "step": 19221 + }, + { + "epoch": 0.64, + "grad_norm": 0.7255092263221741, + "learning_rate": 5.9301414846923625e-06, + "loss": 2.013, + "step": 19222 + }, + { + "epoch": 0.64, + "grad_norm": 0.7595827579498291, + "learning_rate": 5.929170624645482e-06, + "loss": 2.0775, + "step": 19223 + }, + { + "epoch": 0.64, + "grad_norm": 0.725871205329895, + "learning_rate": 5.928199810588451e-06, + "loss": 2.042, + "step": 19224 + }, + { + "epoch": 0.64, + "grad_norm": 0.7592419385910034, + "learning_rate": 5.927229042532229e-06, + "loss": 2.0548, + "step": 19225 + }, + { + "epoch": 0.64, + "grad_norm": 0.7516739964485168, + "learning_rate": 5.926258320487784e-06, + "loss": 2.0611, + "step": 19226 + }, + { + "epoch": 0.64, + "grad_norm": 0.7327117919921875, + "learning_rate": 5.925287644466088e-06, + "loss": 2.0475, + "step": 19227 + }, + { + "epoch": 0.64, + "grad_norm": 0.7661980986595154, + "learning_rate": 5.924317014478108e-06, + "loss": 2.0747, + "step": 19228 + }, + { + "epoch": 0.64, + "grad_norm": 0.7286189794540405, + "learning_rate": 5.9233464305348e-06, + "loss": 2.0921, + "step": 19229 + }, + { + "epoch": 0.64, + "grad_norm": 0.7389179468154907, + "learning_rate": 5.922375892647136e-06, + "loss": 2.0769, + "step": 19230 + }, + { + "epoch": 0.64, + "grad_norm": 0.732392430305481, + "learning_rate": 5.921405400826079e-06, + "loss": 2.0414, + "step": 19231 + }, + { + "epoch": 0.64, + "grad_norm": 0.7302137017250061, + "learning_rate": 5.920434955082597e-06, + "loss": 1.9575, + "step": 19232 + }, + { + "epoch": 0.64, + "grad_norm": 0.7562469244003296, + "learning_rate": 5.9194645554276476e-06, + "loss": 2.0328, + "step": 19233 + }, + { + "epoch": 0.64, + "grad_norm": 0.7462032437324524, + "learning_rate": 5.918494201872194e-06, + "loss": 2.0978, + "step": 19234 + }, + { + "epoch": 0.64, + "grad_norm": 0.7223637104034424, + "learning_rate": 5.917523894427203e-06, + "loss": 2.0334, + "step": 19235 + }, + { + "epoch": 0.64, + "grad_norm": 0.7506973147392273, + "learning_rate": 5.9165536331036346e-06, + "loss": 2.1301, + "step": 19236 + }, + { + "epoch": 0.64, + "grad_norm": 0.7551196813583374, + "learning_rate": 5.915583417912452e-06, + "loss": 2.0424, + "step": 19237 + }, + { + "epoch": 0.64, + "grad_norm": 0.720759928226471, + "learning_rate": 5.914613248864611e-06, + "loss": 2.0834, + "step": 19238 + }, + { + "epoch": 0.64, + "grad_norm": 0.7510061860084534, + "learning_rate": 5.913643125971077e-06, + "loss": 2.0592, + "step": 19239 + }, + { + "epoch": 0.64, + "grad_norm": 0.7208470106124878, + "learning_rate": 5.9126730492428055e-06, + "loss": 2.0674, + "step": 19240 + }, + { + "epoch": 0.64, + "grad_norm": 0.7481751441955566, + "learning_rate": 5.911703018690764e-06, + "loss": 2.1479, + "step": 19241 + }, + { + "epoch": 0.64, + "grad_norm": 0.7326985597610474, + "learning_rate": 5.910733034325902e-06, + "loss": 2.0618, + "step": 19242 + }, + { + "epoch": 0.64, + "grad_norm": 0.7425806522369385, + "learning_rate": 5.90976309615918e-06, + "loss": 2.0645, + "step": 19243 + }, + { + "epoch": 0.64, + "grad_norm": 0.7452540993690491, + "learning_rate": 5.9087932042015595e-06, + "loss": 2.0633, + "step": 19244 + }, + { + "epoch": 0.64, + "grad_norm": 0.7579016089439392, + "learning_rate": 5.907823358463997e-06, + "loss": 2.1258, + "step": 19245 + }, + { + "epoch": 0.64, + "grad_norm": 0.7459138631820679, + "learning_rate": 5.906853558957448e-06, + "loss": 2.0105, + "step": 19246 + }, + { + "epoch": 0.64, + "grad_norm": 0.7671080827713013, + "learning_rate": 5.905883805692867e-06, + "loss": 2.0304, + "step": 19247 + }, + { + "epoch": 0.64, + "grad_norm": 0.7103632688522339, + "learning_rate": 5.904914098681209e-06, + "loss": 2.062, + "step": 19248 + }, + { + "epoch": 0.64, + "grad_norm": 0.7271375060081482, + "learning_rate": 5.903944437933435e-06, + "loss": 2.0686, + "step": 19249 + }, + { + "epoch": 0.64, + "grad_norm": 0.7257586121559143, + "learning_rate": 5.902974823460497e-06, + "loss": 2.069, + "step": 19250 + }, + { + "epoch": 0.64, + "grad_norm": 0.7446653842926025, + "learning_rate": 5.9020052552733465e-06, + "loss": 2.0674, + "step": 19251 + }, + { + "epoch": 0.64, + "grad_norm": 0.7253462076187134, + "learning_rate": 5.901035733382939e-06, + "loss": 2.0401, + "step": 19252 + }, + { + "epoch": 0.64, + "grad_norm": 0.7309347987174988, + "learning_rate": 5.900066257800228e-06, + "loss": 2.0605, + "step": 19253 + }, + { + "epoch": 0.64, + "grad_norm": 0.7597154378890991, + "learning_rate": 5.899096828536164e-06, + "loss": 2.0873, + "step": 19254 + }, + { + "epoch": 0.64, + "grad_norm": 0.7332039475440979, + "learning_rate": 5.898127445601706e-06, + "loss": 2.0919, + "step": 19255 + }, + { + "epoch": 0.64, + "grad_norm": 0.7476253509521484, + "learning_rate": 5.897158109007793e-06, + "loss": 2.0664, + "step": 19256 + }, + { + "epoch": 0.64, + "grad_norm": 0.7333676815032959, + "learning_rate": 5.896188818765387e-06, + "loss": 2.0098, + "step": 19257 + }, + { + "epoch": 0.64, + "grad_norm": 0.7383410930633545, + "learning_rate": 5.895219574885433e-06, + "loss": 2.0208, + "step": 19258 + }, + { + "epoch": 0.64, + "grad_norm": 0.7190619707107544, + "learning_rate": 5.8942503773788875e-06, + "loss": 2.0182, + "step": 19259 + }, + { + "epoch": 0.64, + "grad_norm": 0.7608978748321533, + "learning_rate": 5.893281226256691e-06, + "loss": 2.0697, + "step": 19260 + }, + { + "epoch": 0.64, + "grad_norm": 0.7587461471557617, + "learning_rate": 5.892312121529795e-06, + "loss": 1.9883, + "step": 19261 + }, + { + "epoch": 0.64, + "grad_norm": 0.7532840967178345, + "learning_rate": 5.891343063209149e-06, + "loss": 2.0745, + "step": 19262 + }, + { + "epoch": 0.64, + "grad_norm": 0.7378754615783691, + "learning_rate": 5.890374051305707e-06, + "loss": 2.1629, + "step": 19263 + }, + { + "epoch": 0.64, + "grad_norm": 0.7699658274650574, + "learning_rate": 5.889405085830405e-06, + "loss": 2.0759, + "step": 19264 + }, + { + "epoch": 0.64, + "grad_norm": 0.7339633703231812, + "learning_rate": 5.888436166794198e-06, + "loss": 2.0987, + "step": 19265 + }, + { + "epoch": 0.64, + "grad_norm": 0.7366234660148621, + "learning_rate": 5.887467294208027e-06, + "loss": 2.0435, + "step": 19266 + }, + { + "epoch": 0.64, + "grad_norm": 0.736527144908905, + "learning_rate": 5.886498468082842e-06, + "loss": 2.0938, + "step": 19267 + }, + { + "epoch": 0.64, + "grad_norm": 0.7632753849029541, + "learning_rate": 5.885529688429589e-06, + "loss": 2.0661, + "step": 19268 + }, + { + "epoch": 0.64, + "grad_norm": 0.7658427357673645, + "learning_rate": 5.884560955259208e-06, + "loss": 2.1159, + "step": 19269 + }, + { + "epoch": 0.64, + "grad_norm": 0.7677491903305054, + "learning_rate": 5.883592268582643e-06, + "loss": 2.0382, + "step": 19270 + }, + { + "epoch": 0.64, + "grad_norm": 0.74180668592453, + "learning_rate": 5.8826236284108414e-06, + "loss": 2.0791, + "step": 19271 + }, + { + "epoch": 0.64, + "grad_norm": 0.7231850624084473, + "learning_rate": 5.881655034754747e-06, + "loss": 2.0748, + "step": 19272 + }, + { + "epoch": 0.64, + "grad_norm": 0.7487387657165527, + "learning_rate": 5.8806864876253e-06, + "loss": 2.0966, + "step": 19273 + }, + { + "epoch": 0.64, + "grad_norm": 0.7177024483680725, + "learning_rate": 5.879717987033442e-06, + "loss": 2.0282, + "step": 19274 + }, + { + "epoch": 0.64, + "grad_norm": 0.7158411145210266, + "learning_rate": 5.878749532990115e-06, + "loss": 2.0027, + "step": 19275 + }, + { + "epoch": 0.64, + "grad_norm": 0.7476642727851868, + "learning_rate": 5.877781125506259e-06, + "loss": 2.0662, + "step": 19276 + }, + { + "epoch": 0.64, + "grad_norm": 0.7672646045684814, + "learning_rate": 5.876812764592823e-06, + "loss": 2.0712, + "step": 19277 + }, + { + "epoch": 0.64, + "grad_norm": 0.722247302532196, + "learning_rate": 5.875844450260733e-06, + "loss": 1.9937, + "step": 19278 + }, + { + "epoch": 0.64, + "grad_norm": 0.7458515167236328, + "learning_rate": 5.874876182520937e-06, + "loss": 2.092, + "step": 19279 + }, + { + "epoch": 0.64, + "grad_norm": 0.7151179313659668, + "learning_rate": 5.873907961384373e-06, + "loss": 2.008, + "step": 19280 + }, + { + "epoch": 0.64, + "grad_norm": 0.7594295144081116, + "learning_rate": 5.87293978686198e-06, + "loss": 2.0703, + "step": 19281 + }, + { + "epoch": 0.64, + "grad_norm": 0.7413676381111145, + "learning_rate": 5.871971658964693e-06, + "loss": 2.0658, + "step": 19282 + }, + { + "epoch": 0.64, + "grad_norm": 0.7421931028366089, + "learning_rate": 5.8710035777034515e-06, + "loss": 2.0958, + "step": 19283 + }, + { + "epoch": 0.64, + "grad_norm": 0.7141311168670654, + "learning_rate": 5.870035543089189e-06, + "loss": 2.0586, + "step": 19284 + }, + { + "epoch": 0.64, + "grad_norm": 0.737067699432373, + "learning_rate": 5.869067555132846e-06, + "loss": 2.0211, + "step": 19285 + }, + { + "epoch": 0.64, + "grad_norm": 0.7343761920928955, + "learning_rate": 5.868099613845359e-06, + "loss": 2.0725, + "step": 19286 + }, + { + "epoch": 0.64, + "grad_norm": 0.7383939623832703, + "learning_rate": 5.867131719237661e-06, + "loss": 2.0454, + "step": 19287 + }, + { + "epoch": 0.64, + "grad_norm": 0.7365996241569519, + "learning_rate": 5.866163871320685e-06, + "loss": 2.0282, + "step": 19288 + }, + { + "epoch": 0.64, + "grad_norm": 0.7391613125801086, + "learning_rate": 5.865196070105368e-06, + "loss": 2.0481, + "step": 19289 + }, + { + "epoch": 0.64, + "grad_norm": 0.7368939518928528, + "learning_rate": 5.864228315602643e-06, + "loss": 2.1101, + "step": 19290 + }, + { + "epoch": 0.64, + "grad_norm": 0.735118567943573, + "learning_rate": 5.8632606078234424e-06, + "loss": 2.062, + "step": 19291 + }, + { + "epoch": 0.64, + "grad_norm": 0.7388679385185242, + "learning_rate": 5.862292946778696e-06, + "loss": 1.9737, + "step": 19292 + }, + { + "epoch": 0.64, + "grad_norm": 0.7371713519096375, + "learning_rate": 5.86132533247934e-06, + "loss": 2.0368, + "step": 19293 + }, + { + "epoch": 0.64, + "grad_norm": 0.7312802672386169, + "learning_rate": 5.860357764936309e-06, + "loss": 2.0142, + "step": 19294 + }, + { + "epoch": 0.64, + "grad_norm": 0.7293230295181274, + "learning_rate": 5.859390244160526e-06, + "loss": 1.988, + "step": 19295 + }, + { + "epoch": 0.64, + "grad_norm": 0.7205647230148315, + "learning_rate": 5.858422770162926e-06, + "loss": 2.043, + "step": 19296 + }, + { + "epoch": 0.64, + "grad_norm": 0.7324780821800232, + "learning_rate": 5.857455342954439e-06, + "loss": 2.0381, + "step": 19297 + }, + { + "epoch": 0.64, + "grad_norm": 0.7390618920326233, + "learning_rate": 5.856487962545991e-06, + "loss": 2.0216, + "step": 19298 + }, + { + "epoch": 0.64, + "grad_norm": 0.7698684334754944, + "learning_rate": 5.855520628948521e-06, + "loss": 2.0134, + "step": 19299 + }, + { + "epoch": 0.64, + "grad_norm": 0.7475852370262146, + "learning_rate": 5.854553342172943e-06, + "loss": 2.0586, + "step": 19300 + }, + { + "epoch": 0.64, + "grad_norm": 0.7435522079467773, + "learning_rate": 5.853586102230193e-06, + "loss": 2.0284, + "step": 19301 + }, + { + "epoch": 0.64, + "grad_norm": 0.7425633668899536, + "learning_rate": 5.852618909131199e-06, + "loss": 2.1087, + "step": 19302 + }, + { + "epoch": 0.64, + "grad_norm": 0.7309922575950623, + "learning_rate": 5.851651762886887e-06, + "loss": 1.9555, + "step": 19303 + }, + { + "epoch": 0.64, + "grad_norm": 0.7259184718132019, + "learning_rate": 5.8506846635081805e-06, + "loss": 2.0185, + "step": 19304 + }, + { + "epoch": 0.64, + "grad_norm": 0.7532318234443665, + "learning_rate": 5.849717611006007e-06, + "loss": 2.0905, + "step": 19305 + }, + { + "epoch": 0.64, + "grad_norm": 0.7281666398048401, + "learning_rate": 5.848750605391289e-06, + "loss": 2.0243, + "step": 19306 + }, + { + "epoch": 0.64, + "grad_norm": 0.7618240118026733, + "learning_rate": 5.847783646674956e-06, + "loss": 2.1155, + "step": 19307 + }, + { + "epoch": 0.64, + "grad_norm": 0.7278028726577759, + "learning_rate": 5.846816734867933e-06, + "loss": 2.0609, + "step": 19308 + }, + { + "epoch": 0.64, + "grad_norm": 0.7037928700447083, + "learning_rate": 5.845849869981137e-06, + "loss": 2.07, + "step": 19309 + }, + { + "epoch": 0.64, + "grad_norm": 0.7242680191993713, + "learning_rate": 5.844883052025495e-06, + "loss": 2.0062, + "step": 19310 + }, + { + "epoch": 0.64, + "grad_norm": 0.7730261087417603, + "learning_rate": 5.84391628101193e-06, + "loss": 2.1198, + "step": 19311 + }, + { + "epoch": 0.64, + "grad_norm": 0.7794146537780762, + "learning_rate": 5.842949556951365e-06, + "loss": 2.064, + "step": 19312 + }, + { + "epoch": 0.64, + "grad_norm": 0.7712916135787964, + "learning_rate": 5.841982879854716e-06, + "loss": 2.0233, + "step": 19313 + }, + { + "epoch": 0.64, + "grad_norm": 0.7195977568626404, + "learning_rate": 5.841016249732907e-06, + "loss": 1.989, + "step": 19314 + }, + { + "epoch": 0.64, + "grad_norm": 0.7682862877845764, + "learning_rate": 5.840049666596861e-06, + "loss": 2.0746, + "step": 19315 + }, + { + "epoch": 0.64, + "grad_norm": 0.724327027797699, + "learning_rate": 5.839083130457495e-06, + "loss": 2.0371, + "step": 19316 + }, + { + "epoch": 0.64, + "grad_norm": 0.7283473610877991, + "learning_rate": 5.838116641325733e-06, + "loss": 2.1012, + "step": 19317 + }, + { + "epoch": 0.64, + "grad_norm": 0.7155072689056396, + "learning_rate": 5.837150199212484e-06, + "loss": 2.0933, + "step": 19318 + }, + { + "epoch": 0.64, + "grad_norm": 0.7399768829345703, + "learning_rate": 5.836183804128678e-06, + "loss": 2.0443, + "step": 19319 + }, + { + "epoch": 0.64, + "grad_norm": 0.7203693985939026, + "learning_rate": 5.835217456085223e-06, + "loss": 2.1381, + "step": 19320 + }, + { + "epoch": 0.64, + "grad_norm": 0.7302543520927429, + "learning_rate": 5.8342511550930425e-06, + "loss": 2.0388, + "step": 19321 + }, + { + "epoch": 0.64, + "grad_norm": 0.7463512420654297, + "learning_rate": 5.833284901163053e-06, + "loss": 2.0212, + "step": 19322 + }, + { + "epoch": 0.64, + "grad_norm": 0.7585991621017456, + "learning_rate": 5.832318694306165e-06, + "loss": 2.0599, + "step": 19323 + }, + { + "epoch": 0.64, + "grad_norm": 0.7586641311645508, + "learning_rate": 5.831352534533297e-06, + "loss": 2.0419, + "step": 19324 + }, + { + "epoch": 0.64, + "grad_norm": 0.7267483472824097, + "learning_rate": 5.830386421855372e-06, + "loss": 1.9803, + "step": 19325 + }, + { + "epoch": 0.64, + "grad_norm": 0.7542256116867065, + "learning_rate": 5.82942035628329e-06, + "loss": 2.1037, + "step": 19326 + }, + { + "epoch": 0.64, + "grad_norm": 0.7502795457839966, + "learning_rate": 5.828454337827975e-06, + "loss": 2.0784, + "step": 19327 + }, + { + "epoch": 0.64, + "grad_norm": 0.7340919375419617, + "learning_rate": 5.82748836650034e-06, + "loss": 2.0388, + "step": 19328 + }, + { + "epoch": 0.64, + "grad_norm": 0.7320643663406372, + "learning_rate": 5.826522442311293e-06, + "loss": 2.0036, + "step": 19329 + }, + { + "epoch": 0.64, + "grad_norm": 0.7654105424880981, + "learning_rate": 5.825556565271752e-06, + "loss": 2.0668, + "step": 19330 + }, + { + "epoch": 0.64, + "grad_norm": 0.7645840644836426, + "learning_rate": 5.8245907353926235e-06, + "loss": 2.0877, + "step": 19331 + }, + { + "epoch": 0.64, + "grad_norm": 0.7496606707572937, + "learning_rate": 5.8236249526848265e-06, + "loss": 2.1354, + "step": 19332 + }, + { + "epoch": 0.64, + "grad_norm": 0.7427768707275391, + "learning_rate": 5.822659217159263e-06, + "loss": 2.0187, + "step": 19333 + }, + { + "epoch": 0.64, + "grad_norm": 0.7317584156990051, + "learning_rate": 5.821693528826851e-06, + "loss": 2.068, + "step": 19334 + }, + { + "epoch": 0.64, + "grad_norm": 0.7440376281738281, + "learning_rate": 5.8207278876984965e-06, + "loss": 2.0807, + "step": 19335 + }, + { + "epoch": 0.64, + "grad_norm": 0.7936128973960876, + "learning_rate": 5.819762293785105e-06, + "loss": 2.0555, + "step": 19336 + }, + { + "epoch": 0.64, + "grad_norm": 0.752167820930481, + "learning_rate": 5.81879674709759e-06, + "loss": 2.0284, + "step": 19337 + }, + { + "epoch": 0.64, + "grad_norm": 0.7355301976203918, + "learning_rate": 5.817831247646862e-06, + "loss": 2.0846, + "step": 19338 + }, + { + "epoch": 0.64, + "grad_norm": 0.7559276819229126, + "learning_rate": 5.8168657954438265e-06, + "loss": 2.0324, + "step": 19339 + }, + { + "epoch": 0.64, + "grad_norm": 0.7320514917373657, + "learning_rate": 5.815900390499385e-06, + "loss": 2.0623, + "step": 19340 + }, + { + "epoch": 0.64, + "grad_norm": 0.7647855281829834, + "learning_rate": 5.814935032824454e-06, + "loss": 2.0782, + "step": 19341 + }, + { + "epoch": 0.64, + "grad_norm": 0.7557506561279297, + "learning_rate": 5.813969722429928e-06, + "loss": 2.021, + "step": 19342 + }, + { + "epoch": 0.64, + "grad_norm": 0.7308631539344788, + "learning_rate": 5.8130044593267254e-06, + "loss": 2.0332, + "step": 19343 + }, + { + "epoch": 0.64, + "grad_norm": 0.7623540163040161, + "learning_rate": 5.812039243525743e-06, + "loss": 2.0967, + "step": 19344 + }, + { + "epoch": 0.64, + "grad_norm": 0.7644964456558228, + "learning_rate": 5.811074075037884e-06, + "loss": 2.0963, + "step": 19345 + }, + { + "epoch": 0.64, + "grad_norm": 0.7548443675041199, + "learning_rate": 5.810108953874054e-06, + "loss": 2.0031, + "step": 19346 + }, + { + "epoch": 0.64, + "grad_norm": 0.7097053527832031, + "learning_rate": 5.809143880045162e-06, + "loss": 2.0426, + "step": 19347 + }, + { + "epoch": 0.64, + "grad_norm": 0.7633403539657593, + "learning_rate": 5.8081788535621054e-06, + "loss": 2.0003, + "step": 19348 + }, + { + "epoch": 0.64, + "grad_norm": 0.7294674515724182, + "learning_rate": 5.807213874435784e-06, + "loss": 2.0367, + "step": 19349 + }, + { + "epoch": 0.64, + "grad_norm": 0.7612988352775574, + "learning_rate": 5.806248942677108e-06, + "loss": 2.0416, + "step": 19350 + }, + { + "epoch": 0.64, + "grad_norm": 0.7672849893569946, + "learning_rate": 5.8052840582969694e-06, + "loss": 1.9726, + "step": 19351 + }, + { + "epoch": 0.64, + "grad_norm": 0.7392340302467346, + "learning_rate": 5.804319221306276e-06, + "loss": 2.0066, + "step": 19352 + }, + { + "epoch": 0.64, + "grad_norm": 0.7485770583152771, + "learning_rate": 5.803354431715922e-06, + "loss": 2.0193, + "step": 19353 + }, + { + "epoch": 0.64, + "grad_norm": 0.7442919015884399, + "learning_rate": 5.802389689536815e-06, + "loss": 2.0727, + "step": 19354 + }, + { + "epoch": 0.64, + "grad_norm": 0.7927093505859375, + "learning_rate": 5.801424994779844e-06, + "loss": 2.1193, + "step": 19355 + }, + { + "epoch": 0.64, + "grad_norm": 0.7455630302429199, + "learning_rate": 5.800460347455917e-06, + "loss": 2.0338, + "step": 19356 + }, + { + "epoch": 0.64, + "grad_norm": 0.767478346824646, + "learning_rate": 5.799495747575927e-06, + "loss": 2.1311, + "step": 19357 + }, + { + "epoch": 0.64, + "grad_norm": 0.7590939402580261, + "learning_rate": 5.79853119515077e-06, + "loss": 2.1129, + "step": 19358 + }, + { + "epoch": 0.64, + "grad_norm": 0.7514663338661194, + "learning_rate": 5.797566690191345e-06, + "loss": 2.1004, + "step": 19359 + }, + { + "epoch": 0.64, + "grad_norm": 0.772892951965332, + "learning_rate": 5.7966022327085525e-06, + "loss": 2.0497, + "step": 19360 + }, + { + "epoch": 0.64, + "grad_norm": 0.7244913578033447, + "learning_rate": 5.795637822713283e-06, + "loss": 2.0228, + "step": 19361 + }, + { + "epoch": 0.64, + "grad_norm": 0.7378636002540588, + "learning_rate": 5.794673460216431e-06, + "loss": 2.0622, + "step": 19362 + }, + { + "epoch": 0.64, + "grad_norm": 0.7456867694854736, + "learning_rate": 5.793709145228898e-06, + "loss": 2.0013, + "step": 19363 + }, + { + "epoch": 0.64, + "grad_norm": 0.7523425221443176, + "learning_rate": 5.7927448777615695e-06, + "loss": 2.0321, + "step": 19364 + }, + { + "epoch": 0.64, + "grad_norm": 0.719103217124939, + "learning_rate": 5.791780657825347e-06, + "loss": 2.0718, + "step": 19365 + }, + { + "epoch": 0.64, + "grad_norm": 0.74888676404953, + "learning_rate": 5.790816485431121e-06, + "loss": 2.107, + "step": 19366 + }, + { + "epoch": 0.64, + "grad_norm": 0.7395013570785522, + "learning_rate": 5.789852360589778e-06, + "loss": 2.0485, + "step": 19367 + }, + { + "epoch": 0.64, + "grad_norm": 0.7528979778289795, + "learning_rate": 5.788888283312217e-06, + "loss": 2.0972, + "step": 19368 + }, + { + "epoch": 0.64, + "grad_norm": 0.7309052348136902, + "learning_rate": 5.7879242536093325e-06, + "loss": 2.0043, + "step": 19369 + }, + { + "epoch": 0.64, + "grad_norm": 0.7477414608001709, + "learning_rate": 5.786960271492011e-06, + "loss": 2.1012, + "step": 19370 + }, + { + "epoch": 0.64, + "grad_norm": 0.7451391816139221, + "learning_rate": 5.785996336971141e-06, + "loss": 2.0361, + "step": 19371 + }, + { + "epoch": 0.64, + "grad_norm": 0.7219563722610474, + "learning_rate": 5.785032450057615e-06, + "loss": 2.0396, + "step": 19372 + }, + { + "epoch": 0.64, + "grad_norm": 0.7223684787750244, + "learning_rate": 5.784068610762321e-06, + "loss": 2.0509, + "step": 19373 + }, + { + "epoch": 0.64, + "grad_norm": 0.7322028279304504, + "learning_rate": 5.783104819096152e-06, + "loss": 2.085, + "step": 19374 + }, + { + "epoch": 0.64, + "grad_norm": 0.7592397928237915, + "learning_rate": 5.782141075069989e-06, + "loss": 2.0217, + "step": 19375 + }, + { + "epoch": 0.64, + "grad_norm": 0.7386236190795898, + "learning_rate": 5.781177378694729e-06, + "loss": 2.0661, + "step": 19376 + }, + { + "epoch": 0.64, + "grad_norm": 0.7334775924682617, + "learning_rate": 5.78021372998125e-06, + "loss": 2.0481, + "step": 19377 + }, + { + "epoch": 0.64, + "grad_norm": 0.7374983429908752, + "learning_rate": 5.779250128940448e-06, + "loss": 2.057, + "step": 19378 + }, + { + "epoch": 0.64, + "grad_norm": 0.7519813179969788, + "learning_rate": 5.778286575583204e-06, + "loss": 2.0135, + "step": 19379 + }, + { + "epoch": 0.64, + "grad_norm": 0.7982410788536072, + "learning_rate": 5.7773230699204e-06, + "loss": 2.026, + "step": 19380 + }, + { + "epoch": 0.64, + "grad_norm": 0.7886046767234802, + "learning_rate": 5.776359611962925e-06, + "loss": 2.0746, + "step": 19381 + }, + { + "epoch": 0.64, + "grad_norm": 0.7747352719306946, + "learning_rate": 5.775396201721668e-06, + "loss": 2.107, + "step": 19382 + }, + { + "epoch": 0.64, + "grad_norm": 0.7699923515319824, + "learning_rate": 5.774432839207509e-06, + "loss": 2.0289, + "step": 19383 + }, + { + "epoch": 0.64, + "grad_norm": 0.7316173911094666, + "learning_rate": 5.773469524431328e-06, + "loss": 2.0382, + "step": 19384 + }, + { + "epoch": 0.64, + "grad_norm": 0.7248473763465881, + "learning_rate": 5.772506257404015e-06, + "loss": 2.0545, + "step": 19385 + }, + { + "epoch": 0.64, + "grad_norm": 0.7396063208580017, + "learning_rate": 5.7715430381364445e-06, + "loss": 2.0428, + "step": 19386 + }, + { + "epoch": 0.65, + "grad_norm": 0.7512771487236023, + "learning_rate": 5.7705798666395055e-06, + "loss": 2.0178, + "step": 19387 + }, + { + "epoch": 0.65, + "grad_norm": 0.7658633589744568, + "learning_rate": 5.769616742924078e-06, + "loss": 2.0727, + "step": 19388 + }, + { + "epoch": 0.65, + "grad_norm": 0.7438501715660095, + "learning_rate": 5.768653667001036e-06, + "loss": 2.0275, + "step": 19389 + }, + { + "epoch": 0.65, + "grad_norm": 0.7386695742607117, + "learning_rate": 5.767690638881267e-06, + "loss": 2.0652, + "step": 19390 + }, + { + "epoch": 0.65, + "grad_norm": 0.7638639807701111, + "learning_rate": 5.766727658575651e-06, + "loss": 2.1377, + "step": 19391 + }, + { + "epoch": 0.65, + "grad_norm": 0.7473623752593994, + "learning_rate": 5.765764726095067e-06, + "loss": 2.1341, + "step": 19392 + }, + { + "epoch": 0.65, + "grad_norm": 0.7257064580917358, + "learning_rate": 5.764801841450385e-06, + "loss": 2.067, + "step": 19393 + }, + { + "epoch": 0.65, + "grad_norm": 0.7486692070960999, + "learning_rate": 5.763839004652495e-06, + "loss": 2.1181, + "step": 19394 + }, + { + "epoch": 0.65, + "grad_norm": 0.7448033690452576, + "learning_rate": 5.762876215712265e-06, + "loss": 2.0695, + "step": 19395 + }, + { + "epoch": 0.65, + "grad_norm": 0.7675816416740417, + "learning_rate": 5.761913474640582e-06, + "loss": 1.9844, + "step": 19396 + }, + { + "epoch": 0.65, + "grad_norm": 0.7241543531417847, + "learning_rate": 5.7609507814483105e-06, + "loss": 1.9962, + "step": 19397 + }, + { + "epoch": 0.65, + "grad_norm": 0.7510273456573486, + "learning_rate": 5.759988136146337e-06, + "loss": 2.0829, + "step": 19398 + }, + { + "epoch": 0.65, + "grad_norm": 0.7283874750137329, + "learning_rate": 5.75902553874553e-06, + "loss": 2.0442, + "step": 19399 + }, + { + "epoch": 0.65, + "grad_norm": 0.7749091982841492, + "learning_rate": 5.75806298925677e-06, + "loss": 2.0809, + "step": 19400 + }, + { + "epoch": 0.65, + "grad_norm": 0.7831537127494812, + "learning_rate": 5.757100487690928e-06, + "loss": 2.0922, + "step": 19401 + }, + { + "epoch": 0.65, + "grad_norm": 0.7498375177383423, + "learning_rate": 5.756138034058876e-06, + "loss": 2.0291, + "step": 19402 + }, + { + "epoch": 0.65, + "grad_norm": 0.7431955933570862, + "learning_rate": 5.755175628371488e-06, + "loss": 2.0428, + "step": 19403 + }, + { + "epoch": 0.65, + "grad_norm": 0.7263168692588806, + "learning_rate": 5.754213270639641e-06, + "loss": 2.0291, + "step": 19404 + }, + { + "epoch": 0.65, + "grad_norm": 0.7727211117744446, + "learning_rate": 5.753250960874206e-06, + "loss": 2.0624, + "step": 19405 + }, + { + "epoch": 0.65, + "grad_norm": 0.7376839518547058, + "learning_rate": 5.752288699086048e-06, + "loss": 2.0465, + "step": 19406 + }, + { + "epoch": 0.65, + "grad_norm": 0.7739841938018799, + "learning_rate": 5.751326485286046e-06, + "loss": 2.0302, + "step": 19407 + }, + { + "epoch": 0.65, + "grad_norm": 0.7334620952606201, + "learning_rate": 5.750364319485064e-06, + "loss": 1.9641, + "step": 19408 + }, + { + "epoch": 0.65, + "grad_norm": 0.7411165237426758, + "learning_rate": 5.749402201693981e-06, + "loss": 1.9964, + "step": 19409 + }, + { + "epoch": 0.65, + "grad_norm": 0.7419471144676208, + "learning_rate": 5.74844013192366e-06, + "loss": 2.0848, + "step": 19410 + }, + { + "epoch": 0.65, + "grad_norm": 0.7416826486587524, + "learning_rate": 5.747478110184965e-06, + "loss": 2.1211, + "step": 19411 + }, + { + "epoch": 0.65, + "grad_norm": 0.796728253364563, + "learning_rate": 5.746516136488772e-06, + "loss": 2.0687, + "step": 19412 + }, + { + "epoch": 0.65, + "grad_norm": 0.7421063184738159, + "learning_rate": 5.745554210845951e-06, + "loss": 2.0373, + "step": 19413 + }, + { + "epoch": 0.65, + "grad_norm": 0.7619338035583496, + "learning_rate": 5.744592333267365e-06, + "loss": 2.0652, + "step": 19414 + }, + { + "epoch": 0.65, + "grad_norm": 0.7377417683601379, + "learning_rate": 5.743630503763875e-06, + "loss": 2.0885, + "step": 19415 + }, + { + "epoch": 0.65, + "grad_norm": 0.7505934238433838, + "learning_rate": 5.7426687223463585e-06, + "loss": 2.12, + "step": 19416 + }, + { + "epoch": 0.65, + "grad_norm": 0.7287315130233765, + "learning_rate": 5.741706989025673e-06, + "loss": 2.0536, + "step": 19417 + }, + { + "epoch": 0.65, + "grad_norm": 0.7966857552528381, + "learning_rate": 5.740745303812688e-06, + "loss": 2.1518, + "step": 19418 + }, + { + "epoch": 0.65, + "grad_norm": 0.7386447787284851, + "learning_rate": 5.739783666718264e-06, + "loss": 2.046, + "step": 19419 + }, + { + "epoch": 0.65, + "grad_norm": 0.7366262078285217, + "learning_rate": 5.73882207775327e-06, + "loss": 2.0123, + "step": 19420 + }, + { + "epoch": 0.65, + "grad_norm": 0.7422581911087036, + "learning_rate": 5.737860536928563e-06, + "loss": 1.9843, + "step": 19421 + }, + { + "epoch": 0.65, + "grad_norm": 0.7076981067657471, + "learning_rate": 5.736899044255011e-06, + "loss": 2.1299, + "step": 19422 + }, + { + "epoch": 0.65, + "grad_norm": 0.7595935463905334, + "learning_rate": 5.735937599743483e-06, + "loss": 2.0903, + "step": 19423 + }, + { + "epoch": 0.65, + "grad_norm": 0.7507017254829407, + "learning_rate": 5.734976203404825e-06, + "loss": 2.037, + "step": 19424 + }, + { + "epoch": 0.65, + "grad_norm": 0.7223144769668579, + "learning_rate": 5.734014855249905e-06, + "loss": 2.0901, + "step": 19425 + }, + { + "epoch": 0.65, + "grad_norm": 0.7433385848999023, + "learning_rate": 5.733053555289592e-06, + "loss": 2.0044, + "step": 19426 + }, + { + "epoch": 0.65, + "grad_norm": 0.7472304105758667, + "learning_rate": 5.732092303534736e-06, + "loss": 2.0265, + "step": 19427 + }, + { + "epoch": 0.65, + "grad_norm": 0.7494765520095825, + "learning_rate": 5.731131099996197e-06, + "loss": 2.042, + "step": 19428 + }, + { + "epoch": 0.65, + "grad_norm": 0.7683257460594177, + "learning_rate": 5.730169944684842e-06, + "loss": 2.0605, + "step": 19429 + }, + { + "epoch": 0.65, + "grad_norm": 0.7253664135932922, + "learning_rate": 5.7292088376115196e-06, + "loss": 2.0087, + "step": 19430 + }, + { + "epoch": 0.65, + "grad_norm": 0.7241952419281006, + "learning_rate": 5.728247778787093e-06, + "loss": 2.0274, + "step": 19431 + }, + { + "epoch": 0.65, + "grad_norm": 0.7326438426971436, + "learning_rate": 5.727286768222428e-06, + "loss": 2.0373, + "step": 19432 + }, + { + "epoch": 0.65, + "grad_norm": 0.7287969589233398, + "learning_rate": 5.726325805928364e-06, + "loss": 2.027, + "step": 19433 + }, + { + "epoch": 0.65, + "grad_norm": 0.7718603610992432, + "learning_rate": 5.725364891915768e-06, + "loss": 2.0725, + "step": 19434 + }, + { + "epoch": 0.65, + "grad_norm": 0.7437602877616882, + "learning_rate": 5.724404026195496e-06, + "loss": 2.0406, + "step": 19435 + }, + { + "epoch": 0.65, + "grad_norm": 0.7366235256195068, + "learning_rate": 5.723443208778403e-06, + "loss": 2.0025, + "step": 19436 + }, + { + "epoch": 0.65, + "grad_norm": 0.744770348072052, + "learning_rate": 5.722482439675339e-06, + "loss": 2.0652, + "step": 19437 + }, + { + "epoch": 0.65, + "grad_norm": 0.7605843544006348, + "learning_rate": 5.721521718897161e-06, + "loss": 2.0353, + "step": 19438 + }, + { + "epoch": 0.65, + "grad_norm": 0.7382888197898865, + "learning_rate": 5.7205610464547265e-06, + "loss": 2.0605, + "step": 19439 + }, + { + "epoch": 0.65, + "grad_norm": 0.7444260120391846, + "learning_rate": 5.719600422358886e-06, + "loss": 2.0764, + "step": 19440 + }, + { + "epoch": 0.65, + "grad_norm": 0.8098291158676147, + "learning_rate": 5.718639846620486e-06, + "loss": 1.9586, + "step": 19441 + }, + { + "epoch": 0.65, + "grad_norm": 0.7371774911880493, + "learning_rate": 5.717679319250388e-06, + "loss": 1.9305, + "step": 19442 + }, + { + "epoch": 0.65, + "grad_norm": 0.7559946179389954, + "learning_rate": 5.716718840259437e-06, + "loss": 2.0201, + "step": 19443 + }, + { + "epoch": 0.65, + "grad_norm": 0.739691436290741, + "learning_rate": 5.7157584096584866e-06, + "loss": 2.0632, + "step": 19444 + }, + { + "epoch": 0.65, + "grad_norm": 0.7354042530059814, + "learning_rate": 5.7147980274583935e-06, + "loss": 2.0514, + "step": 19445 + }, + { + "epoch": 0.65, + "grad_norm": 0.7199611663818359, + "learning_rate": 5.713837693669993e-06, + "loss": 2.0428, + "step": 19446 + }, + { + "epoch": 0.65, + "grad_norm": 0.7737756371498108, + "learning_rate": 5.712877408304143e-06, + "loss": 2.015, + "step": 19447 + }, + { + "epoch": 0.65, + "grad_norm": 0.7603662610054016, + "learning_rate": 5.711917171371695e-06, + "loss": 2.112, + "step": 19448 + }, + { + "epoch": 0.65, + "grad_norm": 0.7443706393241882, + "learning_rate": 5.7109569828834935e-06, + "loss": 2.1144, + "step": 19449 + }, + { + "epoch": 0.65, + "grad_norm": 0.7563647031784058, + "learning_rate": 5.709996842850383e-06, + "loss": 2.0966, + "step": 19450 + }, + { + "epoch": 0.65, + "grad_norm": 0.7375903725624084, + "learning_rate": 5.709036751283218e-06, + "loss": 2.0463, + "step": 19451 + }, + { + "epoch": 0.65, + "grad_norm": 0.7632454633712769, + "learning_rate": 5.708076708192837e-06, + "loss": 2.0715, + "step": 19452 + }, + { + "epoch": 0.65, + "grad_norm": 0.7655351161956787, + "learning_rate": 5.70711671359009e-06, + "loss": 2.0306, + "step": 19453 + }, + { + "epoch": 0.65, + "grad_norm": 0.7505519986152649, + "learning_rate": 5.706156767485827e-06, + "loss": 2.0705, + "step": 19454 + }, + { + "epoch": 0.65, + "grad_norm": 0.7584763765335083, + "learning_rate": 5.705196869890887e-06, + "loss": 2.0479, + "step": 19455 + }, + { + "epoch": 0.65, + "grad_norm": 0.761532187461853, + "learning_rate": 5.704237020816112e-06, + "loss": 2.0541, + "step": 19456 + }, + { + "epoch": 0.65, + "grad_norm": 0.7704643607139587, + "learning_rate": 5.703277220272354e-06, + "loss": 2.1491, + "step": 19457 + }, + { + "epoch": 0.65, + "grad_norm": 0.7389281392097473, + "learning_rate": 5.7023174682704515e-06, + "loss": 1.9965, + "step": 19458 + }, + { + "epoch": 0.65, + "grad_norm": 0.7665868401527405, + "learning_rate": 5.701357764821245e-06, + "loss": 2.1359, + "step": 19459 + }, + { + "epoch": 0.65, + "grad_norm": 0.7701390981674194, + "learning_rate": 5.700398109935578e-06, + "loss": 2.0531, + "step": 19460 + }, + { + "epoch": 0.65, + "grad_norm": 0.7802620530128479, + "learning_rate": 5.699438503624297e-06, + "loss": 2.1126, + "step": 19461 + }, + { + "epoch": 0.65, + "grad_norm": 0.7476899027824402, + "learning_rate": 5.698478945898236e-06, + "loss": 2.0601, + "step": 19462 + }, + { + "epoch": 0.65, + "grad_norm": 0.7184543609619141, + "learning_rate": 5.697519436768243e-06, + "loss": 2.0641, + "step": 19463 + }, + { + "epoch": 0.65, + "grad_norm": 0.7296576499938965, + "learning_rate": 5.696559976245153e-06, + "loss": 2.0568, + "step": 19464 + }, + { + "epoch": 0.65, + "grad_norm": 0.7817097902297974, + "learning_rate": 5.695600564339803e-06, + "loss": 2.096, + "step": 19465 + }, + { + "epoch": 0.65, + "grad_norm": 0.7401478886604309, + "learning_rate": 5.694641201063036e-06, + "loss": 2.0237, + "step": 19466 + }, + { + "epoch": 0.65, + "grad_norm": 0.7490755319595337, + "learning_rate": 5.693681886425697e-06, + "loss": 2.0661, + "step": 19467 + }, + { + "epoch": 0.65, + "grad_norm": 0.754508912563324, + "learning_rate": 5.692722620438608e-06, + "loss": 2.1601, + "step": 19468 + }, + { + "epoch": 0.65, + "grad_norm": 0.7290053367614746, + "learning_rate": 5.691763403112614e-06, + "loss": 2.0222, + "step": 19469 + }, + { + "epoch": 0.65, + "grad_norm": 0.7764450311660767, + "learning_rate": 5.690804234458557e-06, + "loss": 2.0059, + "step": 19470 + }, + { + "epoch": 0.65, + "grad_norm": 0.7240897417068481, + "learning_rate": 5.689845114487268e-06, + "loss": 2.0092, + "step": 19471 + }, + { + "epoch": 0.65, + "grad_norm": 0.7328385710716248, + "learning_rate": 5.688886043209579e-06, + "loss": 1.9539, + "step": 19472 + }, + { + "epoch": 0.65, + "grad_norm": 0.782795786857605, + "learning_rate": 5.687927020636332e-06, + "loss": 2.1574, + "step": 19473 + }, + { + "epoch": 0.65, + "grad_norm": 0.7345569729804993, + "learning_rate": 5.686968046778356e-06, + "loss": 1.9965, + "step": 19474 + }, + { + "epoch": 0.65, + "grad_norm": 0.7304505705833435, + "learning_rate": 5.6860091216464875e-06, + "loss": 2.0839, + "step": 19475 + }, + { + "epoch": 0.65, + "grad_norm": 0.7205492258071899, + "learning_rate": 5.685050245251562e-06, + "loss": 2.0329, + "step": 19476 + }, + { + "epoch": 0.65, + "grad_norm": 0.7615114450454712, + "learning_rate": 5.684091417604411e-06, + "loss": 2.0503, + "step": 19477 + }, + { + "epoch": 0.65, + "grad_norm": 0.7482945322990417, + "learning_rate": 5.683132638715862e-06, + "loss": 2.0483, + "step": 19478 + }, + { + "epoch": 0.65, + "grad_norm": 0.7542290687561035, + "learning_rate": 5.682173908596754e-06, + "loss": 2.0911, + "step": 19479 + }, + { + "epoch": 0.65, + "grad_norm": 0.7569753527641296, + "learning_rate": 5.681215227257915e-06, + "loss": 2.0426, + "step": 19480 + }, + { + "epoch": 0.65, + "grad_norm": 0.7392866611480713, + "learning_rate": 5.6802565947101714e-06, + "loss": 2.0831, + "step": 19481 + }, + { + "epoch": 0.65, + "grad_norm": 0.7456530332565308, + "learning_rate": 5.679298010964357e-06, + "loss": 1.9991, + "step": 19482 + }, + { + "epoch": 0.65, + "grad_norm": 0.7383148074150085, + "learning_rate": 5.678339476031305e-06, + "loss": 2.041, + "step": 19483 + }, + { + "epoch": 0.65, + "grad_norm": 0.782660722732544, + "learning_rate": 5.6773809899218366e-06, + "loss": 2.0085, + "step": 19484 + }, + { + "epoch": 0.65, + "grad_norm": 0.7700353264808655, + "learning_rate": 5.67642255264679e-06, + "loss": 2.1, + "step": 19485 + }, + { + "epoch": 0.65, + "grad_norm": 0.7472296953201294, + "learning_rate": 5.675464164216986e-06, + "loss": 1.9953, + "step": 19486 + }, + { + "epoch": 0.65, + "grad_norm": 0.7187811732292175, + "learning_rate": 5.674505824643251e-06, + "loss": 2.0449, + "step": 19487 + }, + { + "epoch": 0.65, + "grad_norm": 0.7457808256149292, + "learning_rate": 5.673547533936413e-06, + "loss": 2.0077, + "step": 19488 + }, + { + "epoch": 0.65, + "grad_norm": 0.7338558435440063, + "learning_rate": 5.6725892921073075e-06, + "loss": 1.9933, + "step": 19489 + }, + { + "epoch": 0.65, + "grad_norm": 0.7376455664634705, + "learning_rate": 5.6716310991667455e-06, + "loss": 2.0459, + "step": 19490 + }, + { + "epoch": 0.65, + "grad_norm": 0.7485432028770447, + "learning_rate": 5.6706729551255566e-06, + "loss": 2.0059, + "step": 19491 + }, + { + "epoch": 0.65, + "grad_norm": 0.7623322606086731, + "learning_rate": 5.6697148599945724e-06, + "loss": 2.0685, + "step": 19492 + }, + { + "epoch": 0.65, + "grad_norm": 0.7522966265678406, + "learning_rate": 5.668756813784608e-06, + "loss": 2.0508, + "step": 19493 + }, + { + "epoch": 0.65, + "grad_norm": 0.73207688331604, + "learning_rate": 5.6677988165064945e-06, + "loss": 2.0408, + "step": 19494 + }, + { + "epoch": 0.65, + "grad_norm": 0.7407101988792419, + "learning_rate": 5.666840868171051e-06, + "loss": 1.9876, + "step": 19495 + }, + { + "epoch": 0.65, + "grad_norm": 0.7429600358009338, + "learning_rate": 5.665882968789096e-06, + "loss": 2.0357, + "step": 19496 + }, + { + "epoch": 0.65, + "grad_norm": 0.7351678013801575, + "learning_rate": 5.664925118371456e-06, + "loss": 2.067, + "step": 19497 + }, + { + "epoch": 0.65, + "grad_norm": 0.7288463115692139, + "learning_rate": 5.663967316928954e-06, + "loss": 2.0954, + "step": 19498 + }, + { + "epoch": 0.65, + "grad_norm": 0.7242512702941895, + "learning_rate": 5.663009564472408e-06, + "loss": 2.0665, + "step": 19499 + }, + { + "epoch": 0.65, + "grad_norm": 0.7324820160865784, + "learning_rate": 5.662051861012636e-06, + "loss": 2.0483, + "step": 19500 + }, + { + "epoch": 0.65, + "grad_norm": 0.7637337446212769, + "learning_rate": 5.6610942065604625e-06, + "loss": 2.062, + "step": 19501 + }, + { + "epoch": 0.65, + "grad_norm": 0.7275210618972778, + "learning_rate": 5.660136601126705e-06, + "loss": 2.0106, + "step": 19502 + }, + { + "epoch": 0.65, + "grad_norm": 0.7587413787841797, + "learning_rate": 5.659179044722174e-06, + "loss": 2.1523, + "step": 19503 + }, + { + "epoch": 0.65, + "grad_norm": 0.7411825656890869, + "learning_rate": 5.658221537357697e-06, + "loss": 2.0448, + "step": 19504 + }, + { + "epoch": 0.65, + "grad_norm": 0.7222442030906677, + "learning_rate": 5.657264079044091e-06, + "loss": 2.0365, + "step": 19505 + }, + { + "epoch": 0.65, + "grad_norm": 0.7505329251289368, + "learning_rate": 5.656306669792166e-06, + "loss": 2.0258, + "step": 19506 + }, + { + "epoch": 0.65, + "grad_norm": 0.7594444751739502, + "learning_rate": 5.655349309612749e-06, + "loss": 2.0931, + "step": 19507 + }, + { + "epoch": 0.65, + "grad_norm": 0.7588348388671875, + "learning_rate": 5.654391998516647e-06, + "loss": 2.0902, + "step": 19508 + }, + { + "epoch": 0.65, + "grad_norm": 0.7546616792678833, + "learning_rate": 5.653434736514675e-06, + "loss": 2.1068, + "step": 19509 + }, + { + "epoch": 0.65, + "grad_norm": 0.7324091792106628, + "learning_rate": 5.652477523617649e-06, + "loss": 1.9526, + "step": 19510 + }, + { + "epoch": 0.65, + "grad_norm": 0.7973360419273376, + "learning_rate": 5.651520359836391e-06, + "loss": 2.0423, + "step": 19511 + }, + { + "epoch": 0.65, + "grad_norm": 0.7721773982048035, + "learning_rate": 5.650563245181701e-06, + "loss": 2.0758, + "step": 19512 + }, + { + "epoch": 0.65, + "grad_norm": 0.7424566745758057, + "learning_rate": 5.649606179664399e-06, + "loss": 1.9872, + "step": 19513 + }, + { + "epoch": 0.65, + "grad_norm": 0.7309728264808655, + "learning_rate": 5.648649163295299e-06, + "loss": 2.0307, + "step": 19514 + }, + { + "epoch": 0.65, + "grad_norm": 0.7741451859474182, + "learning_rate": 5.647692196085208e-06, + "loss": 2.1028, + "step": 19515 + }, + { + "epoch": 0.65, + "grad_norm": 0.7368655204772949, + "learning_rate": 5.6467352780449435e-06, + "loss": 2.038, + "step": 19516 + }, + { + "epoch": 0.65, + "grad_norm": 0.7623676657676697, + "learning_rate": 5.6457784091853115e-06, + "loss": 2.0769, + "step": 19517 + }, + { + "epoch": 0.65, + "grad_norm": 0.7327426671981812, + "learning_rate": 5.644821589517121e-06, + "loss": 2.1007, + "step": 19518 + }, + { + "epoch": 0.65, + "grad_norm": 0.7521306872367859, + "learning_rate": 5.6438648190511835e-06, + "loss": 2.0718, + "step": 19519 + }, + { + "epoch": 0.65, + "grad_norm": 0.7192142605781555, + "learning_rate": 5.642908097798312e-06, + "loss": 2.0908, + "step": 19520 + }, + { + "epoch": 0.65, + "grad_norm": 0.7225168347358704, + "learning_rate": 5.641951425769311e-06, + "loss": 2.0381, + "step": 19521 + }, + { + "epoch": 0.65, + "grad_norm": 0.7924708127975464, + "learning_rate": 5.640994802974984e-06, + "loss": 2.0796, + "step": 19522 + }, + { + "epoch": 0.65, + "grad_norm": 0.7717782855033875, + "learning_rate": 5.640038229426145e-06, + "loss": 2.1126, + "step": 19523 + }, + { + "epoch": 0.65, + "grad_norm": 0.7277173399925232, + "learning_rate": 5.639081705133601e-06, + "loss": 2.0176, + "step": 19524 + }, + { + "epoch": 0.65, + "grad_norm": 0.7578544020652771, + "learning_rate": 5.638125230108158e-06, + "loss": 2.168, + "step": 19525 + }, + { + "epoch": 0.65, + "grad_norm": 0.7500308156013489, + "learning_rate": 5.637168804360614e-06, + "loss": 2.0703, + "step": 19526 + }, + { + "epoch": 0.65, + "grad_norm": 0.7537912130355835, + "learning_rate": 5.636212427901785e-06, + "loss": 2.0617, + "step": 19527 + }, + { + "epoch": 0.65, + "grad_norm": 0.759221076965332, + "learning_rate": 5.6352561007424655e-06, + "loss": 2.0329, + "step": 19528 + }, + { + "epoch": 0.65, + "grad_norm": 0.7061829566955566, + "learning_rate": 5.63429982289347e-06, + "loss": 2.1202, + "step": 19529 + }, + { + "epoch": 0.65, + "grad_norm": 0.7606263160705566, + "learning_rate": 5.633343594365597e-06, + "loss": 2.0445, + "step": 19530 + }, + { + "epoch": 0.65, + "grad_norm": 0.7495203018188477, + "learning_rate": 5.632387415169643e-06, + "loss": 2.0584, + "step": 19531 + }, + { + "epoch": 0.65, + "grad_norm": 0.7259423732757568, + "learning_rate": 5.6314312853164175e-06, + "loss": 2.0214, + "step": 19532 + }, + { + "epoch": 0.65, + "grad_norm": 0.7746559381484985, + "learning_rate": 5.630475204816729e-06, + "loss": 2.0205, + "step": 19533 + }, + { + "epoch": 0.65, + "grad_norm": 0.72916179895401, + "learning_rate": 5.629519173681363e-06, + "loss": 2.0204, + "step": 19534 + }, + { + "epoch": 0.65, + "grad_norm": 0.7190880179405212, + "learning_rate": 5.628563191921128e-06, + "loss": 2.043, + "step": 19535 + }, + { + "epoch": 0.65, + "grad_norm": 0.7427259087562561, + "learning_rate": 5.627607259546828e-06, + "loss": 2.1085, + "step": 19536 + }, + { + "epoch": 0.65, + "grad_norm": 0.7584714293479919, + "learning_rate": 5.626651376569254e-06, + "loss": 1.9946, + "step": 19537 + }, + { + "epoch": 0.65, + "grad_norm": 0.7303141355514526, + "learning_rate": 5.625695542999215e-06, + "loss": 2.046, + "step": 19538 + }, + { + "epoch": 0.65, + "grad_norm": 0.7365593314170837, + "learning_rate": 5.624739758847498e-06, + "loss": 2.0988, + "step": 19539 + }, + { + "epoch": 0.65, + "grad_norm": 0.7867761254310608, + "learning_rate": 5.623784024124913e-06, + "loss": 2.0822, + "step": 19540 + }, + { + "epoch": 0.65, + "grad_norm": 0.7571848034858704, + "learning_rate": 5.622828338842248e-06, + "loss": 2.0545, + "step": 19541 + }, + { + "epoch": 0.65, + "grad_norm": 0.7367216348648071, + "learning_rate": 5.621872703010305e-06, + "loss": 2.0449, + "step": 19542 + }, + { + "epoch": 0.65, + "grad_norm": 0.7408362030982971, + "learning_rate": 5.620917116639879e-06, + "loss": 2.0571, + "step": 19543 + }, + { + "epoch": 0.65, + "grad_norm": 0.7886891961097717, + "learning_rate": 5.619961579741762e-06, + "loss": 2.036, + "step": 19544 + }, + { + "epoch": 0.65, + "grad_norm": 0.7480760216712952, + "learning_rate": 5.619006092326751e-06, + "loss": 2.0132, + "step": 19545 + }, + { + "epoch": 0.65, + "grad_norm": 0.7563057541847229, + "learning_rate": 5.618050654405647e-06, + "loss": 1.9871, + "step": 19546 + }, + { + "epoch": 0.65, + "grad_norm": 0.7795711159706116, + "learning_rate": 5.617095265989237e-06, + "loss": 2.0776, + "step": 19547 + }, + { + "epoch": 0.65, + "grad_norm": 0.7605361938476562, + "learning_rate": 5.616139927088313e-06, + "loss": 2.0296, + "step": 19548 + }, + { + "epoch": 0.65, + "grad_norm": 0.7357608675956726, + "learning_rate": 5.615184637713675e-06, + "loss": 2.0368, + "step": 19549 + }, + { + "epoch": 0.65, + "grad_norm": 0.793256938457489, + "learning_rate": 5.6142293978761075e-06, + "loss": 2.1577, + "step": 19550 + }, + { + "epoch": 0.65, + "grad_norm": 0.7241933345794678, + "learning_rate": 5.613274207586409e-06, + "loss": 2.0908, + "step": 19551 + }, + { + "epoch": 0.65, + "grad_norm": 0.7310154438018799, + "learning_rate": 5.6123190668553686e-06, + "loss": 2.0341, + "step": 19552 + }, + { + "epoch": 0.65, + "grad_norm": 0.7121602296829224, + "learning_rate": 5.611363975693771e-06, + "loss": 2.0761, + "step": 19553 + }, + { + "epoch": 0.65, + "grad_norm": 0.750005304813385, + "learning_rate": 5.610408934112412e-06, + "loss": 2.0377, + "step": 19554 + }, + { + "epoch": 0.65, + "grad_norm": 0.7561762928962708, + "learning_rate": 5.609453942122083e-06, + "loss": 2.0452, + "step": 19555 + }, + { + "epoch": 0.65, + "grad_norm": 0.7533517479896545, + "learning_rate": 5.608498999733571e-06, + "loss": 2.0212, + "step": 19556 + }, + { + "epoch": 0.65, + "grad_norm": 0.7263368964195251, + "learning_rate": 5.607544106957661e-06, + "loss": 2.0552, + "step": 19557 + }, + { + "epoch": 0.65, + "grad_norm": 0.7331065535545349, + "learning_rate": 5.606589263805147e-06, + "loss": 2.0108, + "step": 19558 + }, + { + "epoch": 0.65, + "grad_norm": 0.7458671927452087, + "learning_rate": 5.605634470286807e-06, + "loss": 2.0802, + "step": 19559 + }, + { + "epoch": 0.65, + "grad_norm": 0.742459237575531, + "learning_rate": 5.604679726413438e-06, + "loss": 2.0593, + "step": 19560 + }, + { + "epoch": 0.65, + "grad_norm": 0.7391287088394165, + "learning_rate": 5.603725032195818e-06, + "loss": 2.0863, + "step": 19561 + }, + { + "epoch": 0.65, + "grad_norm": 0.7931849956512451, + "learning_rate": 5.6027703876447405e-06, + "loss": 2.0486, + "step": 19562 + }, + { + "epoch": 0.65, + "grad_norm": 0.7547613382339478, + "learning_rate": 5.601815792770981e-06, + "loss": 2.0284, + "step": 19563 + }, + { + "epoch": 0.65, + "grad_norm": 0.7238773107528687, + "learning_rate": 5.600861247585334e-06, + "loss": 2.0713, + "step": 19564 + }, + { + "epoch": 0.65, + "grad_norm": 0.7398040890693665, + "learning_rate": 5.599906752098578e-06, + "loss": 2.0184, + "step": 19565 + }, + { + "epoch": 0.65, + "grad_norm": 0.7282812595367432, + "learning_rate": 5.5989523063214936e-06, + "loss": 2.0511, + "step": 19566 + }, + { + "epoch": 0.65, + "grad_norm": 0.7346721887588501, + "learning_rate": 5.597997910264866e-06, + "loss": 2.1091, + "step": 19567 + }, + { + "epoch": 0.65, + "grad_norm": 0.7421655058860779, + "learning_rate": 5.597043563939483e-06, + "loss": 2.1149, + "step": 19568 + }, + { + "epoch": 0.65, + "grad_norm": 0.7590541243553162, + "learning_rate": 5.5960892673561196e-06, + "loss": 2.079, + "step": 19569 + }, + { + "epoch": 0.65, + "grad_norm": 0.7407588362693787, + "learning_rate": 5.595135020525557e-06, + "loss": 2.0436, + "step": 19570 + }, + { + "epoch": 0.65, + "grad_norm": 0.7701472640037537, + "learning_rate": 5.5941808234585796e-06, + "loss": 2.0795, + "step": 19571 + }, + { + "epoch": 0.65, + "grad_norm": 0.729473888874054, + "learning_rate": 5.593226676165962e-06, + "loss": 2.009, + "step": 19572 + }, + { + "epoch": 0.65, + "grad_norm": 0.7512383460998535, + "learning_rate": 5.592272578658491e-06, + "loss": 2.0464, + "step": 19573 + }, + { + "epoch": 0.65, + "grad_norm": 0.7690520286560059, + "learning_rate": 5.591318530946941e-06, + "loss": 2.0164, + "step": 19574 + }, + { + "epoch": 0.65, + "grad_norm": 0.7310040593147278, + "learning_rate": 5.590364533042087e-06, + "loss": 2.0385, + "step": 19575 + }, + { + "epoch": 0.65, + "grad_norm": 0.7455087900161743, + "learning_rate": 5.589410584954708e-06, + "loss": 2.0599, + "step": 19576 + }, + { + "epoch": 0.65, + "grad_norm": 0.7293804883956909, + "learning_rate": 5.58845668669559e-06, + "loss": 2.0718, + "step": 19577 + }, + { + "epoch": 0.65, + "grad_norm": 0.7608941197395325, + "learning_rate": 5.587502838275502e-06, + "loss": 2.1001, + "step": 19578 + }, + { + "epoch": 0.65, + "grad_norm": 0.7469384670257568, + "learning_rate": 5.586549039705218e-06, + "loss": 2.0431, + "step": 19579 + }, + { + "epoch": 0.65, + "grad_norm": 0.7249419689178467, + "learning_rate": 5.585595290995518e-06, + "loss": 2.0125, + "step": 19580 + }, + { + "epoch": 0.65, + "grad_norm": 0.78377366065979, + "learning_rate": 5.584641592157174e-06, + "loss": 2.0263, + "step": 19581 + }, + { + "epoch": 0.65, + "grad_norm": 0.7254436016082764, + "learning_rate": 5.583687943200964e-06, + "loss": 2.0254, + "step": 19582 + }, + { + "epoch": 0.65, + "grad_norm": 0.7522599697113037, + "learning_rate": 5.582734344137655e-06, + "loss": 2.0567, + "step": 19583 + }, + { + "epoch": 0.65, + "grad_norm": 0.7589452862739563, + "learning_rate": 5.581780794978029e-06, + "loss": 2.0551, + "step": 19584 + }, + { + "epoch": 0.65, + "grad_norm": 0.7531152367591858, + "learning_rate": 5.580827295732852e-06, + "loss": 2.0197, + "step": 19585 + }, + { + "epoch": 0.65, + "grad_norm": 0.7827947735786438, + "learning_rate": 5.5798738464129e-06, + "loss": 2.0804, + "step": 19586 + }, + { + "epoch": 0.65, + "grad_norm": 0.7234615683555603, + "learning_rate": 5.578920447028943e-06, + "loss": 2.0441, + "step": 19587 + }, + { + "epoch": 0.65, + "grad_norm": 0.7164815068244934, + "learning_rate": 5.57796709759175e-06, + "loss": 2.0516, + "step": 19588 + }, + { + "epoch": 0.65, + "grad_norm": 0.7341342568397522, + "learning_rate": 5.577013798112091e-06, + "loss": 2.0537, + "step": 19589 + }, + { + "epoch": 0.65, + "grad_norm": 0.7366927266120911, + "learning_rate": 5.576060548600742e-06, + "loss": 2.0223, + "step": 19590 + }, + { + "epoch": 0.65, + "grad_norm": 0.7171513438224792, + "learning_rate": 5.5751073490684696e-06, + "loss": 2.0096, + "step": 19591 + }, + { + "epoch": 0.65, + "grad_norm": 0.7562494874000549, + "learning_rate": 5.574154199526037e-06, + "loss": 2.0849, + "step": 19592 + }, + { + "epoch": 0.65, + "grad_norm": 0.759568989276886, + "learning_rate": 5.573201099984219e-06, + "loss": 2.1198, + "step": 19593 + }, + { + "epoch": 0.65, + "grad_norm": 0.7625716924667358, + "learning_rate": 5.572248050453777e-06, + "loss": 2.1028, + "step": 19594 + }, + { + "epoch": 0.65, + "grad_norm": 0.7648226618766785, + "learning_rate": 5.571295050945487e-06, + "loss": 2.0429, + "step": 19595 + }, + { + "epoch": 0.65, + "grad_norm": 0.7351311445236206, + "learning_rate": 5.570342101470108e-06, + "loss": 2.0274, + "step": 19596 + }, + { + "epoch": 0.65, + "grad_norm": 0.7135232090950012, + "learning_rate": 5.569389202038405e-06, + "loss": 2.0429, + "step": 19597 + }, + { + "epoch": 0.65, + "grad_norm": 0.7387197613716125, + "learning_rate": 5.568436352661146e-06, + "loss": 2.0981, + "step": 19598 + }, + { + "epoch": 0.65, + "grad_norm": 0.764707624912262, + "learning_rate": 5.567483553349101e-06, + "loss": 2.0425, + "step": 19599 + }, + { + "epoch": 0.65, + "grad_norm": 0.7871748805046082, + "learning_rate": 5.566530804113028e-06, + "loss": 2.0275, + "step": 19600 + }, + { + "epoch": 0.65, + "grad_norm": 0.7661146521568298, + "learning_rate": 5.565578104963688e-06, + "loss": 2.1114, + "step": 19601 + }, + { + "epoch": 0.65, + "grad_norm": 0.750670313835144, + "learning_rate": 5.564625455911852e-06, + "loss": 2.1222, + "step": 19602 + }, + { + "epoch": 0.65, + "grad_norm": 0.741730809211731, + "learning_rate": 5.563672856968274e-06, + "loss": 2.0747, + "step": 19603 + }, + { + "epoch": 0.65, + "grad_norm": 0.7372225522994995, + "learning_rate": 5.562720308143724e-06, + "loss": 2.0286, + "step": 19604 + }, + { + "epoch": 0.65, + "grad_norm": 0.727664589881897, + "learning_rate": 5.561767809448956e-06, + "loss": 2.0256, + "step": 19605 + }, + { + "epoch": 0.65, + "grad_norm": 0.7347592115402222, + "learning_rate": 5.560815360894738e-06, + "loss": 2.0535, + "step": 19606 + }, + { + "epoch": 0.65, + "grad_norm": 0.7356708645820618, + "learning_rate": 5.559862962491822e-06, + "loss": 2.0339, + "step": 19607 + }, + { + "epoch": 0.65, + "grad_norm": 0.7301467061042786, + "learning_rate": 5.558910614250972e-06, + "loss": 2.0551, + "step": 19608 + }, + { + "epoch": 0.65, + "grad_norm": 0.7556509375572205, + "learning_rate": 5.557958316182956e-06, + "loss": 2.0962, + "step": 19609 + }, + { + "epoch": 0.65, + "grad_norm": 0.7481658458709717, + "learning_rate": 5.557006068298514e-06, + "loss": 2.038, + "step": 19610 + }, + { + "epoch": 0.65, + "grad_norm": 0.7360104322433472, + "learning_rate": 5.556053870608415e-06, + "loss": 2.0775, + "step": 19611 + }, + { + "epoch": 0.65, + "grad_norm": 0.7646734714508057, + "learning_rate": 5.555101723123419e-06, + "loss": 2.1493, + "step": 19612 + }, + { + "epoch": 0.65, + "grad_norm": 0.7490301728248596, + "learning_rate": 5.5541496258542774e-06, + "loss": 2.0259, + "step": 19613 + }, + { + "epoch": 0.65, + "grad_norm": 0.7321333885192871, + "learning_rate": 5.553197578811745e-06, + "loss": 2.0977, + "step": 19614 + }, + { + "epoch": 0.65, + "grad_norm": 0.7561035752296448, + "learning_rate": 5.5522455820065835e-06, + "loss": 2.1632, + "step": 19615 + }, + { + "epoch": 0.65, + "grad_norm": 0.786316454410553, + "learning_rate": 5.551293635449542e-06, + "loss": 2.031, + "step": 19616 + }, + { + "epoch": 0.65, + "grad_norm": 0.7408419251441956, + "learning_rate": 5.550341739151382e-06, + "loss": 2.1028, + "step": 19617 + }, + { + "epoch": 0.65, + "grad_norm": 0.7293141484260559, + "learning_rate": 5.549389893122852e-06, + "loss": 2.1178, + "step": 19618 + }, + { + "epoch": 0.65, + "grad_norm": 0.7868127822875977, + "learning_rate": 5.548438097374702e-06, + "loss": 2.0776, + "step": 19619 + }, + { + "epoch": 0.65, + "grad_norm": 0.7648585438728333, + "learning_rate": 5.5474863519176916e-06, + "loss": 2.0766, + "step": 19620 + }, + { + "epoch": 0.65, + "grad_norm": 0.7292461395263672, + "learning_rate": 5.546534656762573e-06, + "loss": 2.0316, + "step": 19621 + }, + { + "epoch": 0.65, + "grad_norm": 0.6971485018730164, + "learning_rate": 5.545583011920097e-06, + "loss": 1.9956, + "step": 19622 + }, + { + "epoch": 0.65, + "grad_norm": 0.7470062971115112, + "learning_rate": 5.544631417401009e-06, + "loss": 2.0261, + "step": 19623 + }, + { + "epoch": 0.65, + "grad_norm": 0.7032642960548401, + "learning_rate": 5.5436798732160655e-06, + "loss": 1.9848, + "step": 19624 + }, + { + "epoch": 0.65, + "grad_norm": 0.7521611452102661, + "learning_rate": 5.5427283793760174e-06, + "loss": 2.0703, + "step": 19625 + }, + { + "epoch": 0.65, + "grad_norm": 0.739387035369873, + "learning_rate": 5.541776935891613e-06, + "loss": 2.0466, + "step": 19626 + }, + { + "epoch": 0.65, + "grad_norm": 0.7621315121650696, + "learning_rate": 5.540825542773596e-06, + "loss": 2.0746, + "step": 19627 + }, + { + "epoch": 0.65, + "grad_norm": 0.7290324568748474, + "learning_rate": 5.539874200032722e-06, + "loss": 2.0583, + "step": 19628 + }, + { + "epoch": 0.65, + "grad_norm": 0.7492371201515198, + "learning_rate": 5.538922907679731e-06, + "loss": 2.1333, + "step": 19629 + }, + { + "epoch": 0.65, + "grad_norm": 0.7589752674102783, + "learning_rate": 5.5379716657253755e-06, + "loss": 2.048, + "step": 19630 + }, + { + "epoch": 0.65, + "grad_norm": 0.7361384630203247, + "learning_rate": 5.537020474180409e-06, + "loss": 2.0762, + "step": 19631 + }, + { + "epoch": 0.65, + "grad_norm": 0.7307048439979553, + "learning_rate": 5.536069333055562e-06, + "loss": 2.0141, + "step": 19632 + }, + { + "epoch": 0.65, + "grad_norm": 0.7685796022415161, + "learning_rate": 5.535118242361587e-06, + "loss": 1.9984, + "step": 19633 + }, + { + "epoch": 0.65, + "grad_norm": 0.7387921810150146, + "learning_rate": 5.534167202109233e-06, + "loss": 2.0646, + "step": 19634 + }, + { + "epoch": 0.65, + "grad_norm": 0.7297471761703491, + "learning_rate": 5.533216212309241e-06, + "loss": 2.0476, + "step": 19635 + }, + { + "epoch": 0.65, + "grad_norm": 0.7760244011878967, + "learning_rate": 5.53226527297235e-06, + "loss": 2.0629, + "step": 19636 + }, + { + "epoch": 0.65, + "grad_norm": 0.7446165680885315, + "learning_rate": 5.531314384109313e-06, + "loss": 2.0742, + "step": 19637 + }, + { + "epoch": 0.65, + "grad_norm": 0.7211996912956238, + "learning_rate": 5.530363545730862e-06, + "loss": 2.0412, + "step": 19638 + }, + { + "epoch": 0.65, + "grad_norm": 0.7500234842300415, + "learning_rate": 5.529412757847745e-06, + "loss": 2.044, + "step": 19639 + }, + { + "epoch": 0.65, + "grad_norm": 0.7551546096801758, + "learning_rate": 5.528462020470706e-06, + "loss": 2.1282, + "step": 19640 + }, + { + "epoch": 0.65, + "grad_norm": 0.7420210242271423, + "learning_rate": 5.527511333610482e-06, + "loss": 2.0632, + "step": 19641 + }, + { + "epoch": 0.65, + "grad_norm": 0.7384548187255859, + "learning_rate": 5.526560697277812e-06, + "loss": 2.1293, + "step": 19642 + }, + { + "epoch": 0.65, + "grad_norm": 0.7413015961647034, + "learning_rate": 5.525610111483439e-06, + "loss": 2.0784, + "step": 19643 + }, + { + "epoch": 0.65, + "grad_norm": 0.7680754065513611, + "learning_rate": 5.524659576238102e-06, + "loss": 2.0734, + "step": 19644 + }, + { + "epoch": 0.65, + "grad_norm": 0.7408783435821533, + "learning_rate": 5.523709091552535e-06, + "loss": 2.0845, + "step": 19645 + }, + { + "epoch": 0.65, + "grad_norm": 0.7617834806442261, + "learning_rate": 5.522758657437478e-06, + "loss": 2.0895, + "step": 19646 + }, + { + "epoch": 0.65, + "grad_norm": 0.7709333896636963, + "learning_rate": 5.521808273903675e-06, + "loss": 2.1035, + "step": 19647 + }, + { + "epoch": 0.65, + "grad_norm": 0.763192892074585, + "learning_rate": 5.520857940961857e-06, + "loss": 2.0713, + "step": 19648 + }, + { + "epoch": 0.65, + "grad_norm": 0.7635023593902588, + "learning_rate": 5.519907658622756e-06, + "loss": 1.9942, + "step": 19649 + }, + { + "epoch": 0.65, + "grad_norm": 0.7393171787261963, + "learning_rate": 5.518957426897118e-06, + "loss": 2.0775, + "step": 19650 + }, + { + "epoch": 0.65, + "grad_norm": 0.7395033240318298, + "learning_rate": 5.518007245795668e-06, + "loss": 1.9617, + "step": 19651 + }, + { + "epoch": 0.65, + "grad_norm": 0.7473169565200806, + "learning_rate": 5.517057115329146e-06, + "loss": 2.0368, + "step": 19652 + }, + { + "epoch": 0.65, + "grad_norm": 0.7427636384963989, + "learning_rate": 5.516107035508292e-06, + "loss": 2.0577, + "step": 19653 + }, + { + "epoch": 0.65, + "grad_norm": 0.7609863877296448, + "learning_rate": 5.515157006343828e-06, + "loss": 2.0449, + "step": 19654 + }, + { + "epoch": 0.65, + "grad_norm": 0.7596525549888611, + "learning_rate": 5.514207027846489e-06, + "loss": 2.0976, + "step": 19655 + }, + { + "epoch": 0.65, + "grad_norm": 0.7819472551345825, + "learning_rate": 5.5132571000270145e-06, + "loss": 2.0401, + "step": 19656 + }, + { + "epoch": 0.65, + "grad_norm": 0.7262775897979736, + "learning_rate": 5.512307222896132e-06, + "loss": 2.0723, + "step": 19657 + }, + { + "epoch": 0.65, + "grad_norm": 0.7380446791648865, + "learning_rate": 5.511357396464569e-06, + "loss": 2.0543, + "step": 19658 + }, + { + "epoch": 0.65, + "grad_norm": 0.764438807964325, + "learning_rate": 5.510407620743064e-06, + "loss": 2.0464, + "step": 19659 + }, + { + "epoch": 0.65, + "grad_norm": 0.7211927175521851, + "learning_rate": 5.509457895742336e-06, + "loss": 2.0, + "step": 19660 + }, + { + "epoch": 0.65, + "grad_norm": 0.7760045528411865, + "learning_rate": 5.508508221473124e-06, + "loss": 2.0366, + "step": 19661 + }, + { + "epoch": 0.65, + "grad_norm": 0.7364428043365479, + "learning_rate": 5.507558597946156e-06, + "loss": 2.0496, + "step": 19662 + }, + { + "epoch": 0.65, + "grad_norm": 0.7347718477249146, + "learning_rate": 5.5066090251721586e-06, + "loss": 2.1065, + "step": 19663 + }, + { + "epoch": 0.65, + "grad_norm": 0.7134438157081604, + "learning_rate": 5.505659503161855e-06, + "loss": 2.0351, + "step": 19664 + }, + { + "epoch": 0.65, + "grad_norm": 0.7505150437355042, + "learning_rate": 5.504710031925982e-06, + "loss": 1.9995, + "step": 19665 + }, + { + "epoch": 0.65, + "grad_norm": 0.8008188009262085, + "learning_rate": 5.5037606114752576e-06, + "loss": 2.0418, + "step": 19666 + }, + { + "epoch": 0.65, + "grad_norm": 0.747008740901947, + "learning_rate": 5.5028112418204095e-06, + "loss": 2.1185, + "step": 19667 + }, + { + "epoch": 0.65, + "grad_norm": 0.7310032844543457, + "learning_rate": 5.501861922972163e-06, + "loss": 2.0703, + "step": 19668 + }, + { + "epoch": 0.65, + "grad_norm": 0.7666939496994019, + "learning_rate": 5.500912654941248e-06, + "loss": 2.0696, + "step": 19669 + }, + { + "epoch": 0.65, + "grad_norm": 0.7351548671722412, + "learning_rate": 5.499963437738382e-06, + "loss": 2.1072, + "step": 19670 + }, + { + "epoch": 0.65, + "grad_norm": 0.7615830302238464, + "learning_rate": 5.4990142713742945e-06, + "loss": 1.988, + "step": 19671 + }, + { + "epoch": 0.65, + "grad_norm": 0.7271462678909302, + "learning_rate": 5.498065155859706e-06, + "loss": 2.055, + "step": 19672 + }, + { + "epoch": 0.65, + "grad_norm": 0.7374181747436523, + "learning_rate": 5.497116091205336e-06, + "loss": 2.0506, + "step": 19673 + }, + { + "epoch": 0.65, + "grad_norm": 0.7781620025634766, + "learning_rate": 5.49616707742191e-06, + "loss": 2.0548, + "step": 19674 + }, + { + "epoch": 0.65, + "grad_norm": 0.764536440372467, + "learning_rate": 5.495218114520156e-06, + "loss": 1.9853, + "step": 19675 + }, + { + "epoch": 0.65, + "grad_norm": 0.7336354851722717, + "learning_rate": 5.49426920251078e-06, + "loss": 2.1152, + "step": 19676 + }, + { + "epoch": 0.65, + "grad_norm": 0.7385858297348022, + "learning_rate": 5.493320341404509e-06, + "loss": 1.9814, + "step": 19677 + }, + { + "epoch": 0.65, + "grad_norm": 0.7404362559318542, + "learning_rate": 5.4923715312120686e-06, + "loss": 2.0353, + "step": 19678 + }, + { + "epoch": 0.65, + "grad_norm": 0.7485159635543823, + "learning_rate": 5.4914227719441726e-06, + "loss": 2.0705, + "step": 19679 + }, + { + "epoch": 0.65, + "grad_norm": 0.7675884366035461, + "learning_rate": 5.490474063611535e-06, + "loss": 2.0367, + "step": 19680 + }, + { + "epoch": 0.65, + "grad_norm": 0.7122249007225037, + "learning_rate": 5.4895254062248845e-06, + "loss": 2.0245, + "step": 19681 + }, + { + "epoch": 0.65, + "grad_norm": 0.7568567395210266, + "learning_rate": 5.4885767997949265e-06, + "loss": 2.0471, + "step": 19682 + }, + { + "epoch": 0.65, + "grad_norm": 0.7437814474105835, + "learning_rate": 5.487628244332386e-06, + "loss": 2.0393, + "step": 19683 + }, + { + "epoch": 0.65, + "grad_norm": 0.7349488735198975, + "learning_rate": 5.48667973984798e-06, + "loss": 2.0481, + "step": 19684 + }, + { + "epoch": 0.65, + "grad_norm": 0.7411438822746277, + "learning_rate": 5.48573128635242e-06, + "loss": 2.1024, + "step": 19685 + }, + { + "epoch": 0.65, + "grad_norm": 0.7478607296943665, + "learning_rate": 5.48478288385642e-06, + "loss": 1.9975, + "step": 19686 + }, + { + "epoch": 0.65, + "grad_norm": 0.7716949582099915, + "learning_rate": 5.4838345323707e-06, + "loss": 2.1017, + "step": 19687 + }, + { + "epoch": 0.66, + "grad_norm": 0.7322512865066528, + "learning_rate": 5.4828862319059705e-06, + "loss": 2.0395, + "step": 19688 + }, + { + "epoch": 0.66, + "grad_norm": 0.7583751678466797, + "learning_rate": 5.4819379824729424e-06, + "loss": 2.054, + "step": 19689 + }, + { + "epoch": 0.66, + "grad_norm": 0.7522130012512207, + "learning_rate": 5.48098978408233e-06, + "loss": 2.078, + "step": 19690 + }, + { + "epoch": 0.66, + "grad_norm": 0.75520920753479, + "learning_rate": 5.48004163674485e-06, + "loss": 2.0066, + "step": 19691 + }, + { + "epoch": 0.66, + "grad_norm": 0.7588621377944946, + "learning_rate": 5.479093540471208e-06, + "loss": 2.1121, + "step": 19692 + }, + { + "epoch": 0.66, + "grad_norm": 0.7495948672294617, + "learning_rate": 5.4781454952721225e-06, + "loss": 2.0807, + "step": 19693 + }, + { + "epoch": 0.66, + "grad_norm": 0.744258463382721, + "learning_rate": 5.477197501158298e-06, + "loss": 2.0421, + "step": 19694 + }, + { + "epoch": 0.66, + "grad_norm": 0.7458747625350952, + "learning_rate": 5.47624955814044e-06, + "loss": 2.0585, + "step": 19695 + }, + { + "epoch": 0.66, + "grad_norm": 0.755458652973175, + "learning_rate": 5.4753016662292645e-06, + "loss": 2.0567, + "step": 19696 + }, + { + "epoch": 0.66, + "grad_norm": 0.7341300249099731, + "learning_rate": 5.474353825435488e-06, + "loss": 2.0649, + "step": 19697 + }, + { + "epoch": 0.66, + "grad_norm": 0.7299632430076599, + "learning_rate": 5.4734060357698004e-06, + "loss": 2.0686, + "step": 19698 + }, + { + "epoch": 0.66, + "grad_norm": 0.7370162606239319, + "learning_rate": 5.472458297242919e-06, + "loss": 2.0584, + "step": 19699 + }, + { + "epoch": 0.66, + "grad_norm": 0.736213743686676, + "learning_rate": 5.471510609865555e-06, + "loss": 2.0918, + "step": 19700 + }, + { + "epoch": 0.66, + "grad_norm": 0.7233362197875977, + "learning_rate": 5.47056297364841e-06, + "loss": 2.0629, + "step": 19701 + }, + { + "epoch": 0.66, + "grad_norm": 0.7354491949081421, + "learning_rate": 5.469615388602185e-06, + "loss": 2.0457, + "step": 19702 + }, + { + "epoch": 0.66, + "grad_norm": 0.7724765539169312, + "learning_rate": 5.468667854737595e-06, + "loss": 2.0476, + "step": 19703 + }, + { + "epoch": 0.66, + "grad_norm": 0.7542290091514587, + "learning_rate": 5.467720372065335e-06, + "loss": 2.1405, + "step": 19704 + }, + { + "epoch": 0.66, + "grad_norm": 0.7747575044631958, + "learning_rate": 5.466772940596116e-06, + "loss": 2.0122, + "step": 19705 + }, + { + "epoch": 0.66, + "grad_norm": 0.7280795574188232, + "learning_rate": 5.465825560340642e-06, + "loss": 2.0257, + "step": 19706 + }, + { + "epoch": 0.66, + "grad_norm": 0.783419668674469, + "learning_rate": 5.464878231309614e-06, + "loss": 2.1004, + "step": 19707 + }, + { + "epoch": 0.66, + "grad_norm": 0.7385865449905396, + "learning_rate": 5.46393095351373e-06, + "loss": 2.0513, + "step": 19708 + }, + { + "epoch": 0.66, + "grad_norm": 0.7379505038261414, + "learning_rate": 5.462983726963695e-06, + "loss": 2.0579, + "step": 19709 + }, + { + "epoch": 0.66, + "grad_norm": 0.8014028668403625, + "learning_rate": 5.4620365516702204e-06, + "loss": 2.0854, + "step": 19710 + }, + { + "epoch": 0.66, + "grad_norm": 0.7600050568580627, + "learning_rate": 5.461089427643988e-06, + "loss": 2.0756, + "step": 19711 + }, + { + "epoch": 0.66, + "grad_norm": 0.7606709003448486, + "learning_rate": 5.460142354895707e-06, + "loss": 2.0589, + "step": 19712 + }, + { + "epoch": 0.66, + "grad_norm": 0.7199676632881165, + "learning_rate": 5.459195333436082e-06, + "loss": 2.0555, + "step": 19713 + }, + { + "epoch": 0.66, + "grad_norm": 0.7474765181541443, + "learning_rate": 5.458248363275802e-06, + "loss": 2.0495, + "step": 19714 + }, + { + "epoch": 0.66, + "grad_norm": 0.7738089561462402, + "learning_rate": 5.457301444425576e-06, + "loss": 2.1036, + "step": 19715 + }, + { + "epoch": 0.66, + "grad_norm": 0.7274369597434998, + "learning_rate": 5.456354576896094e-06, + "loss": 2.0065, + "step": 19716 + }, + { + "epoch": 0.66, + "grad_norm": 0.7285518050193787, + "learning_rate": 5.455407760698053e-06, + "loss": 2.0169, + "step": 19717 + }, + { + "epoch": 0.66, + "grad_norm": 0.7219896912574768, + "learning_rate": 5.45446099584215e-06, + "loss": 2.0489, + "step": 19718 + }, + { + "epoch": 0.66, + "grad_norm": 0.7287682890892029, + "learning_rate": 5.453514282339092e-06, + "loss": 2.0968, + "step": 19719 + }, + { + "epoch": 0.66, + "grad_norm": 0.7363569140434265, + "learning_rate": 5.452567620199556e-06, + "loss": 2.0978, + "step": 19720 + }, + { + "epoch": 0.66, + "grad_norm": 0.7803433537483215, + "learning_rate": 5.451621009434247e-06, + "loss": 2.0022, + "step": 19721 + }, + { + "epoch": 0.66, + "grad_norm": 0.7706819772720337, + "learning_rate": 5.450674450053861e-06, + "loss": 1.9931, + "step": 19722 + }, + { + "epoch": 0.66, + "grad_norm": 0.7571954131126404, + "learning_rate": 5.449727942069086e-06, + "loss": 2.0296, + "step": 19723 + }, + { + "epoch": 0.66, + "grad_norm": 0.750938892364502, + "learning_rate": 5.448781485490622e-06, + "loss": 2.04, + "step": 19724 + }, + { + "epoch": 0.66, + "grad_norm": 0.7727022171020508, + "learning_rate": 5.4478350803291536e-06, + "loss": 2.0418, + "step": 19725 + }, + { + "epoch": 0.66, + "grad_norm": 0.7717944979667664, + "learning_rate": 5.446888726595381e-06, + "loss": 2.0223, + "step": 19726 + }, + { + "epoch": 0.66, + "grad_norm": 0.7570573091506958, + "learning_rate": 5.445942424299986e-06, + "loss": 2.0455, + "step": 19727 + }, + { + "epoch": 0.66, + "grad_norm": 0.7799829244613647, + "learning_rate": 5.444996173453668e-06, + "loss": 2.1299, + "step": 19728 + }, + { + "epoch": 0.66, + "grad_norm": 0.745093584060669, + "learning_rate": 5.444049974067115e-06, + "loss": 2.1388, + "step": 19729 + }, + { + "epoch": 0.66, + "grad_norm": 0.7703282237052917, + "learning_rate": 5.4431038261510104e-06, + "loss": 1.9845, + "step": 19730 + }, + { + "epoch": 0.66, + "grad_norm": 0.7471466660499573, + "learning_rate": 5.442157729716049e-06, + "loss": 2.0651, + "step": 19731 + }, + { + "epoch": 0.66, + "grad_norm": 0.7399017214775085, + "learning_rate": 5.441211684772927e-06, + "loss": 2.1515, + "step": 19732 + }, + { + "epoch": 0.66, + "grad_norm": 0.7519108653068542, + "learning_rate": 5.4402656913323135e-06, + "loss": 2.0743, + "step": 19733 + }, + { + "epoch": 0.66, + "grad_norm": 0.7227649092674255, + "learning_rate": 5.439319749404907e-06, + "loss": 2.0384, + "step": 19734 + }, + { + "epoch": 0.66, + "grad_norm": 0.7172938585281372, + "learning_rate": 5.438373859001399e-06, + "loss": 2.0351, + "step": 19735 + }, + { + "epoch": 0.66, + "grad_norm": 0.7852622866630554, + "learning_rate": 5.437428020132464e-06, + "loss": 2.0453, + "step": 19736 + }, + { + "epoch": 0.66, + "grad_norm": 0.7595424056053162, + "learning_rate": 5.436482232808797e-06, + "loss": 2.098, + "step": 19737 + }, + { + "epoch": 0.66, + "grad_norm": 0.75477534532547, + "learning_rate": 5.435536497041081e-06, + "loss": 2.104, + "step": 19738 + }, + { + "epoch": 0.66, + "grad_norm": 0.7570781707763672, + "learning_rate": 5.434590812839993e-06, + "loss": 2.0665, + "step": 19739 + }, + { + "epoch": 0.66, + "grad_norm": 0.7319640517234802, + "learning_rate": 5.433645180216223e-06, + "loss": 2.1142, + "step": 19740 + }, + { + "epoch": 0.66, + "grad_norm": 0.7563708424568176, + "learning_rate": 5.432699599180457e-06, + "loss": 2.0574, + "step": 19741 + }, + { + "epoch": 0.66, + "grad_norm": 0.7478208541870117, + "learning_rate": 5.431754069743374e-06, + "loss": 1.994, + "step": 19742 + }, + { + "epoch": 0.66, + "grad_norm": 0.7453145384788513, + "learning_rate": 5.430808591915654e-06, + "loss": 2.0486, + "step": 19743 + }, + { + "epoch": 0.66, + "grad_norm": 0.7371820211410522, + "learning_rate": 5.429863165707983e-06, + "loss": 2.146, + "step": 19744 + }, + { + "epoch": 0.66, + "grad_norm": 0.7339878678321838, + "learning_rate": 5.4289177911310365e-06, + "loss": 2.0643, + "step": 19745 + }, + { + "epoch": 0.66, + "grad_norm": 0.7759557366371155, + "learning_rate": 5.427972468195501e-06, + "loss": 2.0611, + "step": 19746 + }, + { + "epoch": 0.66, + "grad_norm": 0.7640635371208191, + "learning_rate": 5.42702719691205e-06, + "loss": 2.0867, + "step": 19747 + }, + { + "epoch": 0.66, + "grad_norm": 0.7491858005523682, + "learning_rate": 5.42608197729137e-06, + "loss": 2.0078, + "step": 19748 + }, + { + "epoch": 0.66, + "grad_norm": 0.7212887406349182, + "learning_rate": 5.42513680934413e-06, + "loss": 2.0102, + "step": 19749 + }, + { + "epoch": 0.66, + "grad_norm": 0.7689663171768188, + "learning_rate": 5.424191693081018e-06, + "loss": 1.97, + "step": 19750 + }, + { + "epoch": 0.66, + "grad_norm": 0.7429208159446716, + "learning_rate": 5.423246628512706e-06, + "loss": 2.111, + "step": 19751 + }, + { + "epoch": 0.66, + "grad_norm": 0.7256428599357605, + "learning_rate": 5.422301615649868e-06, + "loss": 2.0829, + "step": 19752 + }, + { + "epoch": 0.66, + "grad_norm": 0.7550253868103027, + "learning_rate": 5.421356654503183e-06, + "loss": 2.0416, + "step": 19753 + }, + { + "epoch": 0.66, + "grad_norm": 0.7384501695632935, + "learning_rate": 5.4204117450833315e-06, + "loss": 2.1119, + "step": 19754 + }, + { + "epoch": 0.66, + "grad_norm": 0.7323759198188782, + "learning_rate": 5.419466887400985e-06, + "loss": 2.0756, + "step": 19755 + }, + { + "epoch": 0.66, + "grad_norm": 0.7427287697792053, + "learning_rate": 5.418522081466812e-06, + "loss": 1.9783, + "step": 19756 + }, + { + "epoch": 0.66, + "grad_norm": 0.7797662615776062, + "learning_rate": 5.417577327291496e-06, + "loss": 1.9917, + "step": 19757 + }, + { + "epoch": 0.66, + "grad_norm": 0.7585448622703552, + "learning_rate": 5.416632624885701e-06, + "loss": 2.0274, + "step": 19758 + }, + { + "epoch": 0.66, + "grad_norm": 0.7598999738693237, + "learning_rate": 5.41568797426011e-06, + "loss": 2.1064, + "step": 19759 + }, + { + "epoch": 0.66, + "grad_norm": 0.7197571396827698, + "learning_rate": 5.414743375425389e-06, + "loss": 2.125, + "step": 19760 + }, + { + "epoch": 0.66, + "grad_norm": 0.7665246725082397, + "learning_rate": 5.413798828392205e-06, + "loss": 2.1403, + "step": 19761 + }, + { + "epoch": 0.66, + "grad_norm": 0.7306970357894897, + "learning_rate": 5.412854333171236e-06, + "loss": 2.0329, + "step": 19762 + }, + { + "epoch": 0.66, + "grad_norm": 0.732742190361023, + "learning_rate": 5.411909889773153e-06, + "loss": 2.0902, + "step": 19763 + }, + { + "epoch": 0.66, + "grad_norm": 0.761755108833313, + "learning_rate": 5.410965498208622e-06, + "loss": 2.058, + "step": 19764 + }, + { + "epoch": 0.66, + "grad_norm": 0.7586823105812073, + "learning_rate": 5.4100211584883126e-06, + "loss": 2.1033, + "step": 19765 + }, + { + "epoch": 0.66, + "grad_norm": 0.7191476225852966, + "learning_rate": 5.409076870622896e-06, + "loss": 2.0321, + "step": 19766 + }, + { + "epoch": 0.66, + "grad_norm": 0.7183763980865479, + "learning_rate": 5.408132634623035e-06, + "loss": 2.0834, + "step": 19767 + }, + { + "epoch": 0.66, + "grad_norm": 0.7388396263122559, + "learning_rate": 5.407188450499403e-06, + "loss": 1.9888, + "step": 19768 + }, + { + "epoch": 0.66, + "grad_norm": 0.73529452085495, + "learning_rate": 5.406244318262662e-06, + "loss": 2.0209, + "step": 19769 + }, + { + "epoch": 0.66, + "grad_norm": 0.7525423765182495, + "learning_rate": 5.405300237923483e-06, + "loss": 2.0674, + "step": 19770 + }, + { + "epoch": 0.66, + "grad_norm": 0.7598569989204407, + "learning_rate": 5.404356209492527e-06, + "loss": 1.9819, + "step": 19771 + }, + { + "epoch": 0.66, + "grad_norm": 0.7715239524841309, + "learning_rate": 5.403412232980465e-06, + "loss": 2.0709, + "step": 19772 + }, + { + "epoch": 0.66, + "grad_norm": 0.7499877214431763, + "learning_rate": 5.402468308397957e-06, + "loss": 2.0422, + "step": 19773 + }, + { + "epoch": 0.66, + "grad_norm": 0.7537586688995361, + "learning_rate": 5.401524435755663e-06, + "loss": 2.0573, + "step": 19774 + }, + { + "epoch": 0.66, + "grad_norm": 0.7416905164718628, + "learning_rate": 5.400580615064252e-06, + "loss": 1.9734, + "step": 19775 + }, + { + "epoch": 0.66, + "grad_norm": 0.7273804545402527, + "learning_rate": 5.399636846334388e-06, + "loss": 2.0147, + "step": 19776 + }, + { + "epoch": 0.66, + "grad_norm": 0.7350367903709412, + "learning_rate": 5.398693129576733e-06, + "loss": 2.0857, + "step": 19777 + }, + { + "epoch": 0.66, + "grad_norm": 0.7223485112190247, + "learning_rate": 5.397749464801941e-06, + "loss": 2.0228, + "step": 19778 + }, + { + "epoch": 0.66, + "grad_norm": 0.7379430532455444, + "learning_rate": 5.396805852020683e-06, + "loss": 1.9777, + "step": 19779 + }, + { + "epoch": 0.66, + "grad_norm": 0.7442083954811096, + "learning_rate": 5.395862291243611e-06, + "loss": 1.9731, + "step": 19780 + }, + { + "epoch": 0.66, + "grad_norm": 0.757374107837677, + "learning_rate": 5.394918782481392e-06, + "loss": 2.0251, + "step": 19781 + }, + { + "epoch": 0.66, + "grad_norm": 0.751857578754425, + "learning_rate": 5.393975325744682e-06, + "loss": 2.0454, + "step": 19782 + }, + { + "epoch": 0.66, + "grad_norm": 0.7656716108322144, + "learning_rate": 5.3930319210441354e-06, + "loss": 2.1023, + "step": 19783 + }, + { + "epoch": 0.66, + "grad_norm": 0.762579619884491, + "learning_rate": 5.392088568390415e-06, + "loss": 1.9963, + "step": 19784 + }, + { + "epoch": 0.66, + "grad_norm": 0.7674401998519897, + "learning_rate": 5.39114526779418e-06, + "loss": 2.1245, + "step": 19785 + }, + { + "epoch": 0.66, + "grad_norm": 0.7667040824890137, + "learning_rate": 5.390202019266084e-06, + "loss": 2.0757, + "step": 19786 + }, + { + "epoch": 0.66, + "grad_norm": 0.7771633863449097, + "learning_rate": 5.389258822816782e-06, + "loss": 2.0628, + "step": 19787 + }, + { + "epoch": 0.66, + "grad_norm": 0.7289263606071472, + "learning_rate": 5.3883156784569345e-06, + "loss": 1.9789, + "step": 19788 + }, + { + "epoch": 0.66, + "grad_norm": 0.766508936882019, + "learning_rate": 5.387372586197191e-06, + "loss": 2.1838, + "step": 19789 + }, + { + "epoch": 0.66, + "grad_norm": 0.7497766017913818, + "learning_rate": 5.386429546048211e-06, + "loss": 2.0673, + "step": 19790 + }, + { + "epoch": 0.66, + "grad_norm": 0.7579146027565002, + "learning_rate": 5.385486558020643e-06, + "loss": 2.0931, + "step": 19791 + }, + { + "epoch": 0.66, + "grad_norm": 0.7560113072395325, + "learning_rate": 5.384543622125148e-06, + "loss": 2.1174, + "step": 19792 + }, + { + "epoch": 0.66, + "grad_norm": 0.7545515894889832, + "learning_rate": 5.38360073837237e-06, + "loss": 2.0724, + "step": 19793 + }, + { + "epoch": 0.66, + "grad_norm": 0.748836874961853, + "learning_rate": 5.382657906772969e-06, + "loss": 2.0201, + "step": 19794 + }, + { + "epoch": 0.66, + "grad_norm": 0.7140710353851318, + "learning_rate": 5.3817151273375934e-06, + "loss": 2.0541, + "step": 19795 + }, + { + "epoch": 0.66, + "grad_norm": 0.7162518501281738, + "learning_rate": 5.3807724000768895e-06, + "loss": 2.0239, + "step": 19796 + }, + { + "epoch": 0.66, + "grad_norm": 0.7653456330299377, + "learning_rate": 5.379829725001511e-06, + "loss": 2.0596, + "step": 19797 + }, + { + "epoch": 0.66, + "grad_norm": 0.7563791275024414, + "learning_rate": 5.3788871021221145e-06, + "loss": 2.0381, + "step": 19798 + }, + { + "epoch": 0.66, + "grad_norm": 0.7218927145004272, + "learning_rate": 5.377944531449341e-06, + "loss": 2.0801, + "step": 19799 + }, + { + "epoch": 0.66, + "grad_norm": 0.7141469717025757, + "learning_rate": 5.3770020129938395e-06, + "loss": 2.0641, + "step": 19800 + }, + { + "epoch": 0.66, + "grad_norm": 0.7583063840866089, + "learning_rate": 5.376059546766264e-06, + "loss": 2.0237, + "step": 19801 + }, + { + "epoch": 0.66, + "grad_norm": 0.7499353289604187, + "learning_rate": 5.375117132777252e-06, + "loss": 2.0781, + "step": 19802 + }, + { + "epoch": 0.66, + "grad_norm": 0.730440080165863, + "learning_rate": 5.37417477103746e-06, + "loss": 2.0126, + "step": 19803 + }, + { + "epoch": 0.66, + "grad_norm": 0.7603054642677307, + "learning_rate": 5.373232461557532e-06, + "loss": 2.0461, + "step": 19804 + }, + { + "epoch": 0.66, + "grad_norm": 0.7560338377952576, + "learning_rate": 5.3722902043481075e-06, + "loss": 2.0306, + "step": 19805 + }, + { + "epoch": 0.66, + "grad_norm": 0.7698346376419067, + "learning_rate": 5.371347999419836e-06, + "loss": 2.046, + "step": 19806 + }, + { + "epoch": 0.66, + "grad_norm": 0.7237495183944702, + "learning_rate": 5.370405846783366e-06, + "loss": 2.0453, + "step": 19807 + }, + { + "epoch": 0.66, + "grad_norm": 0.7479041814804077, + "learning_rate": 5.3694637464493395e-06, + "loss": 2.0339, + "step": 19808 + }, + { + "epoch": 0.66, + "grad_norm": 0.7250011563301086, + "learning_rate": 5.368521698428392e-06, + "loss": 2.0603, + "step": 19809 + }, + { + "epoch": 0.66, + "grad_norm": 0.750363290309906, + "learning_rate": 5.367579702731176e-06, + "loss": 2.0232, + "step": 19810 + }, + { + "epoch": 0.66, + "grad_norm": 0.747576117515564, + "learning_rate": 5.366637759368325e-06, + "loss": 2.1004, + "step": 19811 + }, + { + "epoch": 0.66, + "grad_norm": 0.7434778213500977, + "learning_rate": 5.365695868350491e-06, + "loss": 2.0606, + "step": 19812 + }, + { + "epoch": 0.66, + "grad_norm": 0.7559706568717957, + "learning_rate": 5.364754029688304e-06, + "loss": 2.0826, + "step": 19813 + }, + { + "epoch": 0.66, + "grad_norm": 0.7356227040290833, + "learning_rate": 5.363812243392414e-06, + "loss": 2.0474, + "step": 19814 + }, + { + "epoch": 0.66, + "grad_norm": 0.7391427755355835, + "learning_rate": 5.362870509473452e-06, + "loss": 1.9577, + "step": 19815 + }, + { + "epoch": 0.66, + "grad_norm": 0.7255693674087524, + "learning_rate": 5.36192882794206e-06, + "loss": 2.0871, + "step": 19816 + }, + { + "epoch": 0.66, + "grad_norm": 0.7460416555404663, + "learning_rate": 5.360987198808888e-06, + "loss": 2.0773, + "step": 19817 + }, + { + "epoch": 0.66, + "grad_norm": 0.7636624574661255, + "learning_rate": 5.360045622084555e-06, + "loss": 2.0483, + "step": 19818 + }, + { + "epoch": 0.66, + "grad_norm": 0.7631672620773315, + "learning_rate": 5.359104097779708e-06, + "loss": 2.115, + "step": 19819 + }, + { + "epoch": 0.66, + "grad_norm": 0.7428252696990967, + "learning_rate": 5.358162625904985e-06, + "loss": 2.0578, + "step": 19820 + }, + { + "epoch": 0.66, + "grad_norm": 0.769159197807312, + "learning_rate": 5.357221206471022e-06, + "loss": 2.067, + "step": 19821 + }, + { + "epoch": 0.66, + "grad_norm": 0.7813783288002014, + "learning_rate": 5.356279839488449e-06, + "loss": 2.0208, + "step": 19822 + }, + { + "epoch": 0.66, + "grad_norm": 0.7492214441299438, + "learning_rate": 5.355338524967908e-06, + "loss": 2.0232, + "step": 19823 + }, + { + "epoch": 0.66, + "grad_norm": 0.781178891658783, + "learning_rate": 5.354397262920028e-06, + "loss": 2.0861, + "step": 19824 + }, + { + "epoch": 0.66, + "grad_norm": 0.7603986859321594, + "learning_rate": 5.353456053355447e-06, + "loss": 2.0151, + "step": 19825 + }, + { + "epoch": 0.66, + "grad_norm": 0.7563328146934509, + "learning_rate": 5.352514896284793e-06, + "loss": 2.1212, + "step": 19826 + }, + { + "epoch": 0.66, + "grad_norm": 0.7241660952568054, + "learning_rate": 5.3515737917187075e-06, + "loss": 2.0515, + "step": 19827 + }, + { + "epoch": 0.66, + "grad_norm": 0.7466849088668823, + "learning_rate": 5.350632739667811e-06, + "loss": 2.0861, + "step": 19828 + }, + { + "epoch": 0.66, + "grad_norm": 0.7603322863578796, + "learning_rate": 5.349691740142746e-06, + "loss": 2.0577, + "step": 19829 + }, + { + "epoch": 0.66, + "grad_norm": 0.7365947365760803, + "learning_rate": 5.348750793154138e-06, + "loss": 2.0361, + "step": 19830 + }, + { + "epoch": 0.66, + "grad_norm": 0.7431128621101379, + "learning_rate": 5.347809898712614e-06, + "loss": 2.0992, + "step": 19831 + }, + { + "epoch": 0.66, + "grad_norm": 0.7111643552780151, + "learning_rate": 5.346869056828807e-06, + "loss": 1.981, + "step": 19832 + }, + { + "epoch": 0.66, + "grad_norm": 0.7219464182853699, + "learning_rate": 5.34592826751335e-06, + "loss": 2.0646, + "step": 19833 + }, + { + "epoch": 0.66, + "grad_norm": 0.7504080533981323, + "learning_rate": 5.344987530776868e-06, + "loss": 2.0321, + "step": 19834 + }, + { + "epoch": 0.66, + "grad_norm": 0.7847098112106323, + "learning_rate": 5.3440468466299866e-06, + "loss": 2.0029, + "step": 19835 + }, + { + "epoch": 0.66, + "grad_norm": 0.7563808560371399, + "learning_rate": 5.343106215083338e-06, + "loss": 2.0545, + "step": 19836 + }, + { + "epoch": 0.66, + "grad_norm": 0.7318623065948486, + "learning_rate": 5.342165636147542e-06, + "loss": 2.1518, + "step": 19837 + }, + { + "epoch": 0.66, + "grad_norm": 0.7477677464485168, + "learning_rate": 5.341225109833228e-06, + "loss": 2.0879, + "step": 19838 + }, + { + "epoch": 0.66, + "grad_norm": 0.7810040712356567, + "learning_rate": 5.340284636151032e-06, + "loss": 2.0714, + "step": 19839 + }, + { + "epoch": 0.66, + "grad_norm": 0.7411178350448608, + "learning_rate": 5.33934421511156e-06, + "loss": 2.1211, + "step": 19840 + }, + { + "epoch": 0.66, + "grad_norm": 0.7598457336425781, + "learning_rate": 5.338403846725446e-06, + "loss": 2.0268, + "step": 19841 + }, + { + "epoch": 0.66, + "grad_norm": 0.7214486598968506, + "learning_rate": 5.337463531003319e-06, + "loss": 2.065, + "step": 19842 + }, + { + "epoch": 0.66, + "grad_norm": 0.7749118208885193, + "learning_rate": 5.336523267955794e-06, + "loss": 2.0255, + "step": 19843 + }, + { + "epoch": 0.66, + "grad_norm": 0.7391039133071899, + "learning_rate": 5.335583057593494e-06, + "loss": 1.9614, + "step": 19844 + }, + { + "epoch": 0.66, + "grad_norm": 0.7386989593505859, + "learning_rate": 5.334642899927046e-06, + "loss": 2.0631, + "step": 19845 + }, + { + "epoch": 0.66, + "grad_norm": 0.7345327138900757, + "learning_rate": 5.3337027949670635e-06, + "loss": 2.0313, + "step": 19846 + }, + { + "epoch": 0.66, + "grad_norm": 0.7425429224967957, + "learning_rate": 5.332762742724173e-06, + "loss": 2.0791, + "step": 19847 + }, + { + "epoch": 0.66, + "grad_norm": 0.7565324306488037, + "learning_rate": 5.331822743208999e-06, + "loss": 2.0676, + "step": 19848 + }, + { + "epoch": 0.66, + "grad_norm": 0.7313607931137085, + "learning_rate": 5.330882796432155e-06, + "loss": 1.996, + "step": 19849 + }, + { + "epoch": 0.66, + "grad_norm": 0.7483778595924377, + "learning_rate": 5.329942902404257e-06, + "loss": 2.0279, + "step": 19850 + }, + { + "epoch": 0.66, + "grad_norm": 0.7266696095466614, + "learning_rate": 5.32900306113593e-06, + "loss": 1.9857, + "step": 19851 + }, + { + "epoch": 0.66, + "grad_norm": 0.7487393021583557, + "learning_rate": 5.328063272637789e-06, + "loss": 2.153, + "step": 19852 + }, + { + "epoch": 0.66, + "grad_norm": 0.7430779337882996, + "learning_rate": 5.327123536920449e-06, + "loss": 2.0856, + "step": 19853 + }, + { + "epoch": 0.66, + "grad_norm": 0.7344644665718079, + "learning_rate": 5.3261838539945265e-06, + "loss": 2.0582, + "step": 19854 + }, + { + "epoch": 0.66, + "grad_norm": 0.7301772832870483, + "learning_rate": 5.325244223870645e-06, + "loss": 2.0258, + "step": 19855 + }, + { + "epoch": 0.66, + "grad_norm": 0.7243576049804688, + "learning_rate": 5.324304646559415e-06, + "loss": 2.0844, + "step": 19856 + }, + { + "epoch": 0.66, + "grad_norm": 0.7731500864028931, + "learning_rate": 5.323365122071446e-06, + "loss": 2.1195, + "step": 19857 + }, + { + "epoch": 0.66, + "grad_norm": 0.7676663398742676, + "learning_rate": 5.322425650417361e-06, + "loss": 2.0695, + "step": 19858 + }, + { + "epoch": 0.66, + "grad_norm": 0.7373532056808472, + "learning_rate": 5.321486231607767e-06, + "loss": 2.0614, + "step": 19859 + }, + { + "epoch": 0.66, + "grad_norm": 0.7399048805236816, + "learning_rate": 5.320546865653278e-06, + "loss": 2.0725, + "step": 19860 + }, + { + "epoch": 0.66, + "grad_norm": 0.7232118844985962, + "learning_rate": 5.319607552564516e-06, + "loss": 2.0605, + "step": 19861 + }, + { + "epoch": 0.66, + "grad_norm": 0.7304097414016724, + "learning_rate": 5.318668292352078e-06, + "loss": 2.0582, + "step": 19862 + }, + { + "epoch": 0.66, + "grad_norm": 0.7253690361976624, + "learning_rate": 5.317729085026582e-06, + "loss": 2.0318, + "step": 19863 + }, + { + "epoch": 0.66, + "grad_norm": 0.7731072306632996, + "learning_rate": 5.3167899305986416e-06, + "loss": 2.0767, + "step": 19864 + }, + { + "epoch": 0.66, + "grad_norm": 0.7335454225540161, + "learning_rate": 5.315850829078864e-06, + "loss": 2.0654, + "step": 19865 + }, + { + "epoch": 0.66, + "grad_norm": 0.7608612179756165, + "learning_rate": 5.314911780477856e-06, + "loss": 2.061, + "step": 19866 + }, + { + "epoch": 0.66, + "grad_norm": 0.7415502071380615, + "learning_rate": 5.313972784806232e-06, + "loss": 2.063, + "step": 19867 + }, + { + "epoch": 0.66, + "grad_norm": 0.7321881055831909, + "learning_rate": 5.3130338420745935e-06, + "loss": 2.0547, + "step": 19868 + }, + { + "epoch": 0.66, + "grad_norm": 0.7696564793586731, + "learning_rate": 5.312094952293552e-06, + "loss": 2.0859, + "step": 19869 + }, + { + "epoch": 0.66, + "grad_norm": 0.7354307770729065, + "learning_rate": 5.311156115473718e-06, + "loss": 2.0228, + "step": 19870 + }, + { + "epoch": 0.66, + "grad_norm": 0.7332853674888611, + "learning_rate": 5.310217331625695e-06, + "loss": 2.0092, + "step": 19871 + }, + { + "epoch": 0.66, + "grad_norm": 0.8143352270126343, + "learning_rate": 5.309278600760083e-06, + "loss": 2.0506, + "step": 19872 + }, + { + "epoch": 0.66, + "grad_norm": 0.7098326683044434, + "learning_rate": 5.308339922887497e-06, + "loss": 2.0738, + "step": 19873 + }, + { + "epoch": 0.66, + "grad_norm": 0.7538421154022217, + "learning_rate": 5.307401298018536e-06, + "loss": 2.0569, + "step": 19874 + }, + { + "epoch": 0.66, + "grad_norm": 0.7396963238716125, + "learning_rate": 5.306462726163802e-06, + "loss": 2.0579, + "step": 19875 + }, + { + "epoch": 0.66, + "grad_norm": 0.7569546103477478, + "learning_rate": 5.305524207333901e-06, + "loss": 2.0504, + "step": 19876 + }, + { + "epoch": 0.66, + "grad_norm": 0.7797518968582153, + "learning_rate": 5.304585741539441e-06, + "loss": 2.0642, + "step": 19877 + }, + { + "epoch": 0.66, + "grad_norm": 0.726717472076416, + "learning_rate": 5.30364732879102e-06, + "loss": 2.0929, + "step": 19878 + }, + { + "epoch": 0.66, + "grad_norm": 0.7441016435623169, + "learning_rate": 5.302708969099233e-06, + "loss": 2.0631, + "step": 19879 + }, + { + "epoch": 0.66, + "grad_norm": 0.7279456853866577, + "learning_rate": 5.301770662474692e-06, + "loss": 1.9721, + "step": 19880 + }, + { + "epoch": 0.66, + "grad_norm": 0.7581925988197327, + "learning_rate": 5.30083240892799e-06, + "loss": 2.0241, + "step": 19881 + }, + { + "epoch": 0.66, + "grad_norm": 0.7430950999259949, + "learning_rate": 5.299894208469727e-06, + "loss": 2.0567, + "step": 19882 + }, + { + "epoch": 0.66, + "grad_norm": 0.7804224491119385, + "learning_rate": 5.298956061110514e-06, + "loss": 2.0719, + "step": 19883 + }, + { + "epoch": 0.66, + "grad_norm": 0.7645136713981628, + "learning_rate": 5.298017966860934e-06, + "loss": 2.0291, + "step": 19884 + }, + { + "epoch": 0.66, + "grad_norm": 0.7815450429916382, + "learning_rate": 5.2970799257315895e-06, + "loss": 2.0743, + "step": 19885 + }, + { + "epoch": 0.66, + "grad_norm": 0.7373940944671631, + "learning_rate": 5.296141937733083e-06, + "loss": 2.0605, + "step": 19886 + }, + { + "epoch": 0.66, + "grad_norm": 0.7616168856620789, + "learning_rate": 5.2952040028760096e-06, + "loss": 2.0796, + "step": 19887 + }, + { + "epoch": 0.66, + "grad_norm": 0.7629055976867676, + "learning_rate": 5.29426612117096e-06, + "loss": 2.0607, + "step": 19888 + }, + { + "epoch": 0.66, + "grad_norm": 0.7281550765037537, + "learning_rate": 5.293328292628538e-06, + "loss": 2.0465, + "step": 19889 + }, + { + "epoch": 0.66, + "grad_norm": 0.7405682802200317, + "learning_rate": 5.292390517259331e-06, + "loss": 2.0659, + "step": 19890 + }, + { + "epoch": 0.66, + "grad_norm": 0.7313674688339233, + "learning_rate": 5.291452795073936e-06, + "loss": 1.9816, + "step": 19891 + }, + { + "epoch": 0.66, + "grad_norm": 0.740441620349884, + "learning_rate": 5.290515126082951e-06, + "loss": 2.0449, + "step": 19892 + }, + { + "epoch": 0.66, + "grad_norm": 0.7219458222389221, + "learning_rate": 5.289577510296968e-06, + "loss": 2.0435, + "step": 19893 + }, + { + "epoch": 0.66, + "grad_norm": 0.7322210669517517, + "learning_rate": 5.288639947726573e-06, + "loss": 2.0973, + "step": 19894 + }, + { + "epoch": 0.66, + "grad_norm": 0.7987276315689087, + "learning_rate": 5.2877024383823676e-06, + "loss": 2.0378, + "step": 19895 + }, + { + "epoch": 0.66, + "grad_norm": 0.7828547358512878, + "learning_rate": 5.286764982274937e-06, + "loss": 2.027, + "step": 19896 + }, + { + "epoch": 0.66, + "grad_norm": 0.7652302980422974, + "learning_rate": 5.285827579414869e-06, + "loss": 2.0783, + "step": 19897 + }, + { + "epoch": 0.66, + "grad_norm": 0.746668815612793, + "learning_rate": 5.2848902298127595e-06, + "loss": 2.047, + "step": 19898 + }, + { + "epoch": 0.66, + "grad_norm": 0.7368899583816528, + "learning_rate": 5.2839529334791996e-06, + "loss": 2.0039, + "step": 19899 + }, + { + "epoch": 0.66, + "grad_norm": 0.7455770969390869, + "learning_rate": 5.2830156904247735e-06, + "loss": 2.0547, + "step": 19900 + }, + { + "epoch": 0.66, + "grad_norm": 0.7459996342658997, + "learning_rate": 5.282078500660074e-06, + "loss": 1.976, + "step": 19901 + }, + { + "epoch": 0.66, + "grad_norm": 0.7488899230957031, + "learning_rate": 5.281141364195687e-06, + "loss": 2.1192, + "step": 19902 + }, + { + "epoch": 0.66, + "grad_norm": 0.7316449880599976, + "learning_rate": 5.280204281042196e-06, + "loss": 2.0865, + "step": 19903 + }, + { + "epoch": 0.66, + "grad_norm": 0.764094889163971, + "learning_rate": 5.279267251210191e-06, + "loss": 2.0741, + "step": 19904 + }, + { + "epoch": 0.66, + "grad_norm": 0.7434185743331909, + "learning_rate": 5.278330274710265e-06, + "loss": 2.0473, + "step": 19905 + }, + { + "epoch": 0.66, + "grad_norm": 0.7129002213478088, + "learning_rate": 5.277393351552989e-06, + "loss": 2.0476, + "step": 19906 + }, + { + "epoch": 0.66, + "grad_norm": 0.7266384363174438, + "learning_rate": 5.276456481748955e-06, + "loss": 2.0627, + "step": 19907 + }, + { + "epoch": 0.66, + "grad_norm": 0.7599093914031982, + "learning_rate": 5.275519665308751e-06, + "loss": 2.1002, + "step": 19908 + }, + { + "epoch": 0.66, + "grad_norm": 0.7283793091773987, + "learning_rate": 5.274582902242957e-06, + "loss": 2.0156, + "step": 19909 + }, + { + "epoch": 0.66, + "grad_norm": 0.7429416179656982, + "learning_rate": 5.273646192562154e-06, + "loss": 2.0569, + "step": 19910 + }, + { + "epoch": 0.66, + "grad_norm": 0.7249921560287476, + "learning_rate": 5.272709536276928e-06, + "loss": 2.0216, + "step": 19911 + }, + { + "epoch": 0.66, + "grad_norm": 0.7380189895629883, + "learning_rate": 5.2717729333978565e-06, + "loss": 1.9882, + "step": 19912 + }, + { + "epoch": 0.66, + "grad_norm": 0.7266087532043457, + "learning_rate": 5.2708363839355224e-06, + "loss": 2.1028, + "step": 19913 + }, + { + "epoch": 0.66, + "grad_norm": 0.772238552570343, + "learning_rate": 5.269899887900512e-06, + "loss": 2.1044, + "step": 19914 + }, + { + "epoch": 0.66, + "grad_norm": 0.7243350148200989, + "learning_rate": 5.268963445303401e-06, + "loss": 2.0439, + "step": 19915 + }, + { + "epoch": 0.66, + "grad_norm": 0.7428587675094604, + "learning_rate": 5.268027056154764e-06, + "loss": 2.064, + "step": 19916 + }, + { + "epoch": 0.66, + "grad_norm": 0.7535495162010193, + "learning_rate": 5.267090720465185e-06, + "loss": 2.0988, + "step": 19917 + }, + { + "epoch": 0.66, + "grad_norm": 0.7133070230484009, + "learning_rate": 5.266154438245247e-06, + "loss": 2.0498, + "step": 19918 + }, + { + "epoch": 0.66, + "grad_norm": 0.7141594290733337, + "learning_rate": 5.265218209505515e-06, + "loss": 2.0488, + "step": 19919 + }, + { + "epoch": 0.66, + "grad_norm": 0.7474831342697144, + "learning_rate": 5.264282034256573e-06, + "loss": 2.0632, + "step": 19920 + }, + { + "epoch": 0.66, + "grad_norm": 0.7682548761367798, + "learning_rate": 5.263345912509001e-06, + "loss": 2.0167, + "step": 19921 + }, + { + "epoch": 0.66, + "grad_norm": 0.7651169300079346, + "learning_rate": 5.262409844273366e-06, + "loss": 1.9975, + "step": 19922 + }, + { + "epoch": 0.66, + "grad_norm": 0.7359027862548828, + "learning_rate": 5.261473829560253e-06, + "loss": 2.0822, + "step": 19923 + }, + { + "epoch": 0.66, + "grad_norm": 0.7360365390777588, + "learning_rate": 5.260537868380232e-06, + "loss": 2.0256, + "step": 19924 + }, + { + "epoch": 0.66, + "grad_norm": 0.7417351007461548, + "learning_rate": 5.259601960743872e-06, + "loss": 2.1517, + "step": 19925 + }, + { + "epoch": 0.66, + "grad_norm": 0.7542404532432556, + "learning_rate": 5.258666106661752e-06, + "loss": 2.1161, + "step": 19926 + }, + { + "epoch": 0.66, + "grad_norm": 0.7494674921035767, + "learning_rate": 5.257730306144446e-06, + "loss": 2.0389, + "step": 19927 + }, + { + "epoch": 0.66, + "grad_norm": 0.720329761505127, + "learning_rate": 5.256794559202525e-06, + "loss": 2.0281, + "step": 19928 + }, + { + "epoch": 0.66, + "grad_norm": 0.7066154479980469, + "learning_rate": 5.2558588658465545e-06, + "loss": 2.0894, + "step": 19929 + }, + { + "epoch": 0.66, + "grad_norm": 0.7589161992073059, + "learning_rate": 5.2549232260871144e-06, + "loss": 2.0275, + "step": 19930 + }, + { + "epoch": 0.66, + "grad_norm": 0.7606261968612671, + "learning_rate": 5.2539876399347675e-06, + "loss": 2.0511, + "step": 19931 + }, + { + "epoch": 0.66, + "grad_norm": 0.7273508310317993, + "learning_rate": 5.25305210740009e-06, + "loss": 2.0369, + "step": 19932 + }, + { + "epoch": 0.66, + "grad_norm": 0.7534418106079102, + "learning_rate": 5.252116628493644e-06, + "loss": 2.0617, + "step": 19933 + }, + { + "epoch": 0.66, + "grad_norm": 0.7711711525917053, + "learning_rate": 5.251181203226006e-06, + "loss": 2.0514, + "step": 19934 + }, + { + "epoch": 0.66, + "grad_norm": 0.807056725025177, + "learning_rate": 5.250245831607734e-06, + "loss": 1.9888, + "step": 19935 + }, + { + "epoch": 0.66, + "grad_norm": 0.7039667963981628, + "learning_rate": 5.249310513649407e-06, + "loss": 1.9892, + "step": 19936 + }, + { + "epoch": 0.66, + "grad_norm": 0.7574709057807922, + "learning_rate": 5.2483752493615856e-06, + "loss": 2.0768, + "step": 19937 + }, + { + "epoch": 0.66, + "grad_norm": 0.7482070326805115, + "learning_rate": 5.247440038754832e-06, + "loss": 2.1215, + "step": 19938 + }, + { + "epoch": 0.66, + "grad_norm": 0.742131769657135, + "learning_rate": 5.246504881839714e-06, + "loss": 2.0348, + "step": 19939 + }, + { + "epoch": 0.66, + "grad_norm": 0.7584533095359802, + "learning_rate": 5.2455697786268066e-06, + "loss": 2.1008, + "step": 19940 + }, + { + "epoch": 0.66, + "grad_norm": 0.7259510159492493, + "learning_rate": 5.244634729126658e-06, + "loss": 2.0063, + "step": 19941 + }, + { + "epoch": 0.66, + "grad_norm": 0.7475548982620239, + "learning_rate": 5.24369973334984e-06, + "loss": 2.045, + "step": 19942 + }, + { + "epoch": 0.66, + "grad_norm": 0.7696491479873657, + "learning_rate": 5.242764791306918e-06, + "loss": 2.0993, + "step": 19943 + }, + { + "epoch": 0.66, + "grad_norm": 0.7594428658485413, + "learning_rate": 5.241829903008447e-06, + "loss": 1.9864, + "step": 19944 + }, + { + "epoch": 0.66, + "grad_norm": 0.7473368048667908, + "learning_rate": 5.240895068464997e-06, + "loss": 2.0561, + "step": 19945 + }, + { + "epoch": 0.66, + "grad_norm": 0.7332181334495544, + "learning_rate": 5.239960287687127e-06, + "loss": 2.086, + "step": 19946 + }, + { + "epoch": 0.66, + "grad_norm": 0.7444409132003784, + "learning_rate": 5.239025560685392e-06, + "loss": 2.0171, + "step": 19947 + }, + { + "epoch": 0.66, + "grad_norm": 0.7400988340377808, + "learning_rate": 5.238090887470354e-06, + "loss": 2.0553, + "step": 19948 + }, + { + "epoch": 0.66, + "grad_norm": 0.7323520183563232, + "learning_rate": 5.237156268052579e-06, + "loss": 2.0285, + "step": 19949 + }, + { + "epoch": 0.66, + "grad_norm": 0.7472159266471863, + "learning_rate": 5.236221702442622e-06, + "loss": 2.0478, + "step": 19950 + }, + { + "epoch": 0.66, + "grad_norm": 0.7648470401763916, + "learning_rate": 5.235287190651036e-06, + "loss": 1.9858, + "step": 19951 + }, + { + "epoch": 0.66, + "grad_norm": 0.7110038995742798, + "learning_rate": 5.234352732688387e-06, + "loss": 2.08, + "step": 19952 + }, + { + "epoch": 0.66, + "grad_norm": 0.7678295969963074, + "learning_rate": 5.233418328565224e-06, + "loss": 2.0557, + "step": 19953 + }, + { + "epoch": 0.66, + "grad_norm": 0.7629528641700745, + "learning_rate": 5.232483978292111e-06, + "loss": 2.0565, + "step": 19954 + }, + { + "epoch": 0.66, + "grad_norm": 0.7358205914497375, + "learning_rate": 5.231549681879596e-06, + "loss": 1.9522, + "step": 19955 + }, + { + "epoch": 0.66, + "grad_norm": 0.7580413222312927, + "learning_rate": 5.2306154393382424e-06, + "loss": 2.0677, + "step": 19956 + }, + { + "epoch": 0.66, + "grad_norm": 0.7195526957511902, + "learning_rate": 5.229681250678596e-06, + "loss": 2.0417, + "step": 19957 + }, + { + "epoch": 0.66, + "grad_norm": 0.7724376916885376, + "learning_rate": 5.228747115911219e-06, + "loss": 2.0566, + "step": 19958 + }, + { + "epoch": 0.66, + "grad_norm": 0.7452408671379089, + "learning_rate": 5.2278130350466615e-06, + "loss": 2.1133, + "step": 19959 + }, + { + "epoch": 0.66, + "grad_norm": 0.729656457901001, + "learning_rate": 5.226879008095472e-06, + "loss": 2.0317, + "step": 19960 + }, + { + "epoch": 0.66, + "grad_norm": 0.7790346145629883, + "learning_rate": 5.225945035068205e-06, + "loss": 2.0465, + "step": 19961 + }, + { + "epoch": 0.66, + "grad_norm": 0.729098916053772, + "learning_rate": 5.225011115975418e-06, + "loss": 2.0353, + "step": 19962 + }, + { + "epoch": 0.66, + "grad_norm": 0.7400538325309753, + "learning_rate": 5.224077250827655e-06, + "loss": 2.1183, + "step": 19963 + }, + { + "epoch": 0.66, + "grad_norm": 0.727157473564148, + "learning_rate": 5.223143439635467e-06, + "loss": 2.0979, + "step": 19964 + }, + { + "epoch": 0.66, + "grad_norm": 0.7277399301528931, + "learning_rate": 5.222209682409407e-06, + "loss": 2.0525, + "step": 19965 + }, + { + "epoch": 0.66, + "grad_norm": 0.7268252968788147, + "learning_rate": 5.221275979160019e-06, + "loss": 2.0624, + "step": 19966 + }, + { + "epoch": 0.66, + "grad_norm": 0.7479997873306274, + "learning_rate": 5.220342329897859e-06, + "loss": 2.0253, + "step": 19967 + }, + { + "epoch": 0.66, + "grad_norm": 0.7392446994781494, + "learning_rate": 5.219408734633467e-06, + "loss": 2.0931, + "step": 19968 + }, + { + "epoch": 0.66, + "grad_norm": 0.7270455360412598, + "learning_rate": 5.218475193377392e-06, + "loss": 2.0447, + "step": 19969 + }, + { + "epoch": 0.66, + "grad_norm": 0.7498703598976135, + "learning_rate": 5.217541706140182e-06, + "loss": 2.1104, + "step": 19970 + }, + { + "epoch": 0.66, + "grad_norm": 0.754414975643158, + "learning_rate": 5.2166082729323864e-06, + "loss": 2.1149, + "step": 19971 + }, + { + "epoch": 0.66, + "grad_norm": 0.7405804395675659, + "learning_rate": 5.215674893764548e-06, + "loss": 2.0885, + "step": 19972 + }, + { + "epoch": 0.66, + "grad_norm": 0.7434942722320557, + "learning_rate": 5.214741568647205e-06, + "loss": 2.0513, + "step": 19973 + }, + { + "epoch": 0.66, + "grad_norm": 0.7541471719741821, + "learning_rate": 5.213808297590915e-06, + "loss": 2.0816, + "step": 19974 + }, + { + "epoch": 0.66, + "grad_norm": 0.7442706823348999, + "learning_rate": 5.212875080606205e-06, + "loss": 2.049, + "step": 19975 + }, + { + "epoch": 0.66, + "grad_norm": 0.765104353427887, + "learning_rate": 5.211941917703633e-06, + "loss": 2.0681, + "step": 19976 + }, + { + "epoch": 0.66, + "grad_norm": 0.7398830056190491, + "learning_rate": 5.211008808893732e-06, + "loss": 2.0597, + "step": 19977 + }, + { + "epoch": 0.66, + "grad_norm": 0.7380279302597046, + "learning_rate": 5.2100757541870505e-06, + "loss": 1.9759, + "step": 19978 + }, + { + "epoch": 0.66, + "grad_norm": 0.7522118091583252, + "learning_rate": 5.209142753594122e-06, + "loss": 2.0663, + "step": 19979 + }, + { + "epoch": 0.66, + "grad_norm": 0.7491288185119629, + "learning_rate": 5.208209807125495e-06, + "loss": 2.1054, + "step": 19980 + }, + { + "epoch": 0.66, + "grad_norm": 0.7285361886024475, + "learning_rate": 5.207276914791704e-06, + "loss": 2.0537, + "step": 19981 + }, + { + "epoch": 0.66, + "grad_norm": 0.7258206009864807, + "learning_rate": 5.206344076603287e-06, + "loss": 2.1286, + "step": 19982 + }, + { + "epoch": 0.66, + "grad_norm": 0.7306070327758789, + "learning_rate": 5.205411292570784e-06, + "loss": 1.9826, + "step": 19983 + }, + { + "epoch": 0.66, + "grad_norm": 0.7251618504524231, + "learning_rate": 5.20447856270474e-06, + "loss": 2.0984, + "step": 19984 + }, + { + "epoch": 0.66, + "grad_norm": 0.7967289686203003, + "learning_rate": 5.203545887015685e-06, + "loss": 2.0448, + "step": 19985 + }, + { + "epoch": 0.66, + "grad_norm": 0.7521054744720459, + "learning_rate": 5.202613265514155e-06, + "loss": 2.0123, + "step": 19986 + }, + { + "epoch": 0.66, + "grad_norm": 0.7265276908874512, + "learning_rate": 5.201680698210692e-06, + "loss": 1.9526, + "step": 19987 + }, + { + "epoch": 0.67, + "grad_norm": 0.7440691590309143, + "learning_rate": 5.200748185115825e-06, + "loss": 2.0316, + "step": 19988 + }, + { + "epoch": 0.67, + "grad_norm": 0.7906495332717896, + "learning_rate": 5.199815726240096e-06, + "loss": 2.1201, + "step": 19989 + }, + { + "epoch": 0.67, + "grad_norm": 0.7587226629257202, + "learning_rate": 5.198883321594035e-06, + "loss": 2.109, + "step": 19990 + }, + { + "epoch": 0.67, + "grad_norm": 0.7623456716537476, + "learning_rate": 5.197950971188174e-06, + "loss": 2.0895, + "step": 19991 + }, + { + "epoch": 0.67, + "grad_norm": 0.7347291707992554, + "learning_rate": 5.1970186750330475e-06, + "loss": 2.0545, + "step": 19992 + }, + { + "epoch": 0.67, + "grad_norm": 0.7406683564186096, + "learning_rate": 5.196086433139193e-06, + "loss": 2.0509, + "step": 19993 + }, + { + "epoch": 0.67, + "grad_norm": 0.7540918588638306, + "learning_rate": 5.19515424551714e-06, + "loss": 2.0061, + "step": 19994 + }, + { + "epoch": 0.67, + "grad_norm": 0.7136572599411011, + "learning_rate": 5.194222112177413e-06, + "loss": 2.0557, + "step": 19995 + }, + { + "epoch": 0.67, + "grad_norm": 0.7437838315963745, + "learning_rate": 5.193290033130553e-06, + "loss": 2.1386, + "step": 19996 + }, + { + "epoch": 0.67, + "grad_norm": 0.7490946650505066, + "learning_rate": 5.19235800838708e-06, + "loss": 2.0568, + "step": 19997 + }, + { + "epoch": 0.67, + "grad_norm": 0.7455244064331055, + "learning_rate": 5.191426037957535e-06, + "loss": 2.0906, + "step": 19998 + }, + { + "epoch": 0.67, + "grad_norm": 0.7630649209022522, + "learning_rate": 5.190494121852434e-06, + "loss": 1.9964, + "step": 19999 + }, + { + "epoch": 0.67, + "grad_norm": 0.7571759223937988, + "learning_rate": 5.189562260082317e-06, + "loss": 2.0671, + "step": 20000 + }, + { + "epoch": 0.67, + "grad_norm": 0.7247136831283569, + "learning_rate": 5.188630452657701e-06, + "loss": 2.0538, + "step": 20001 + }, + { + "epoch": 0.67, + "grad_norm": 0.7452728152275085, + "learning_rate": 5.187698699589126e-06, + "loss": 2.0477, + "step": 20002 + }, + { + "epoch": 0.67, + "grad_norm": 0.7373649477958679, + "learning_rate": 5.1867670008871075e-06, + "loss": 2.1015, + "step": 20003 + }, + { + "epoch": 0.67, + "grad_norm": 0.7305713891983032, + "learning_rate": 5.185835356562171e-06, + "loss": 2.0371, + "step": 20004 + }, + { + "epoch": 0.67, + "grad_norm": 0.7433360815048218, + "learning_rate": 5.184903766624846e-06, + "loss": 2.1105, + "step": 20005 + }, + { + "epoch": 0.67, + "grad_norm": 0.749610960483551, + "learning_rate": 5.18397223108566e-06, + "loss": 2.0265, + "step": 20006 + }, + { + "epoch": 0.67, + "grad_norm": 0.7169085144996643, + "learning_rate": 5.183040749955133e-06, + "loss": 2.0323, + "step": 20007 + }, + { + "epoch": 0.67, + "grad_norm": 0.7832186222076416, + "learning_rate": 5.1821093232437845e-06, + "loss": 2.0054, + "step": 20008 + }, + { + "epoch": 0.67, + "grad_norm": 0.7493696808815002, + "learning_rate": 5.181177950962146e-06, + "loss": 2.0821, + "step": 20009 + }, + { + "epoch": 0.67, + "grad_norm": 0.7384002208709717, + "learning_rate": 5.180246633120731e-06, + "loss": 2.0385, + "step": 20010 + }, + { + "epoch": 0.67, + "grad_norm": 0.7419658303260803, + "learning_rate": 5.179315369730069e-06, + "loss": 2.0029, + "step": 20011 + }, + { + "epoch": 0.67, + "grad_norm": 0.7409466505050659, + "learning_rate": 5.178384160800676e-06, + "loss": 2.0558, + "step": 20012 + }, + { + "epoch": 0.67, + "grad_norm": 0.7367585897445679, + "learning_rate": 5.1774530063430695e-06, + "loss": 2.0284, + "step": 20013 + }, + { + "epoch": 0.67, + "grad_norm": 0.7381303310394287, + "learning_rate": 5.176521906367773e-06, + "loss": 2.0617, + "step": 20014 + }, + { + "epoch": 0.67, + "grad_norm": 0.7545026540756226, + "learning_rate": 5.175590860885308e-06, + "loss": 2.0453, + "step": 20015 + }, + { + "epoch": 0.67, + "grad_norm": 0.7902075052261353, + "learning_rate": 5.174659869906191e-06, + "loss": 2.0302, + "step": 20016 + }, + { + "epoch": 0.67, + "grad_norm": 0.7370849847793579, + "learning_rate": 5.173728933440933e-06, + "loss": 2.0626, + "step": 20017 + }, + { + "epoch": 0.67, + "grad_norm": 0.7438147068023682, + "learning_rate": 5.17279805150006e-06, + "loss": 2.0966, + "step": 20018 + }, + { + "epoch": 0.67, + "grad_norm": 0.7443079352378845, + "learning_rate": 5.1718672240940885e-06, + "loss": 2.0697, + "step": 20019 + }, + { + "epoch": 0.67, + "grad_norm": 0.7590956091880798, + "learning_rate": 5.1709364512335305e-06, + "loss": 1.9949, + "step": 20020 + }, + { + "epoch": 0.67, + "grad_norm": 0.7073783874511719, + "learning_rate": 5.1700057329289e-06, + "loss": 2.0951, + "step": 20021 + }, + { + "epoch": 0.67, + "grad_norm": 0.7462553381919861, + "learning_rate": 5.1690750691907165e-06, + "loss": 2.0961, + "step": 20022 + }, + { + "epoch": 0.67, + "grad_norm": 0.7517601847648621, + "learning_rate": 5.168144460029488e-06, + "loss": 1.9983, + "step": 20023 + }, + { + "epoch": 0.67, + "grad_norm": 0.7401293516159058, + "learning_rate": 5.167213905455737e-06, + "loss": 2.0703, + "step": 20024 + }, + { + "epoch": 0.67, + "grad_norm": 0.7259426116943359, + "learning_rate": 5.166283405479969e-06, + "loss": 2.0753, + "step": 20025 + }, + { + "epoch": 0.67, + "grad_norm": 0.7252581715583801, + "learning_rate": 5.165352960112695e-06, + "loss": 2.0019, + "step": 20026 + }, + { + "epoch": 0.67, + "grad_norm": 0.7144137620925903, + "learning_rate": 5.164422569364431e-06, + "loss": 2.0136, + "step": 20027 + }, + { + "epoch": 0.67, + "grad_norm": 0.7215256094932556, + "learning_rate": 5.163492233245689e-06, + "loss": 2.0144, + "step": 20028 + }, + { + "epoch": 0.67, + "grad_norm": 0.724189281463623, + "learning_rate": 5.162561951766979e-06, + "loss": 2.0375, + "step": 20029 + }, + { + "epoch": 0.67, + "grad_norm": 0.7326545715332031, + "learning_rate": 5.161631724938805e-06, + "loss": 2.0505, + "step": 20030 + }, + { + "epoch": 0.67, + "grad_norm": 0.7610042691230774, + "learning_rate": 5.160701552771683e-06, + "loss": 2.154, + "step": 20031 + }, + { + "epoch": 0.67, + "grad_norm": 0.7465367913246155, + "learning_rate": 5.159771435276115e-06, + "loss": 2.1116, + "step": 20032 + }, + { + "epoch": 0.67, + "grad_norm": 0.7436209321022034, + "learning_rate": 5.158841372462617e-06, + "loss": 2.0161, + "step": 20033 + }, + { + "epoch": 0.67, + "grad_norm": 0.7725054025650024, + "learning_rate": 5.1579113643416875e-06, + "loss": 2.1128, + "step": 20034 + }, + { + "epoch": 0.67, + "grad_norm": 0.7418740391731262, + "learning_rate": 5.156981410923843e-06, + "loss": 2.09, + "step": 20035 + }, + { + "epoch": 0.67, + "grad_norm": 0.7483685612678528, + "learning_rate": 5.156051512219579e-06, + "loss": 2.0591, + "step": 20036 + }, + { + "epoch": 0.67, + "grad_norm": 0.7394378185272217, + "learning_rate": 5.15512166823941e-06, + "loss": 2.0313, + "step": 20037 + }, + { + "epoch": 0.67, + "grad_norm": 0.7457744479179382, + "learning_rate": 5.154191878993837e-06, + "loss": 2.0701, + "step": 20038 + }, + { + "epoch": 0.67, + "grad_norm": 0.7545459866523743, + "learning_rate": 5.153262144493361e-06, + "loss": 2.0649, + "step": 20039 + }, + { + "epoch": 0.67, + "grad_norm": 0.71275794506073, + "learning_rate": 5.152332464748488e-06, + "loss": 2.0785, + "step": 20040 + }, + { + "epoch": 0.67, + "grad_norm": 0.7768306732177734, + "learning_rate": 5.151402839769728e-06, + "loss": 2.112, + "step": 20041 + }, + { + "epoch": 0.67, + "grad_norm": 0.7237671613693237, + "learning_rate": 5.150473269567575e-06, + "loss": 2.0299, + "step": 20042 + }, + { + "epoch": 0.67, + "grad_norm": 0.7397258877754211, + "learning_rate": 5.149543754152529e-06, + "loss": 1.9948, + "step": 20043 + }, + { + "epoch": 0.67, + "grad_norm": 0.7323939800262451, + "learning_rate": 5.148614293535099e-06, + "loss": 2.0347, + "step": 20044 + }, + { + "epoch": 0.67, + "grad_norm": 0.7212158441543579, + "learning_rate": 5.147684887725779e-06, + "loss": 2.0848, + "step": 20045 + }, + { + "epoch": 0.67, + "grad_norm": 0.7401658892631531, + "learning_rate": 5.1467555367350705e-06, + "loss": 2.0377, + "step": 20046 + }, + { + "epoch": 0.67, + "grad_norm": 0.729089081287384, + "learning_rate": 5.145826240573481e-06, + "loss": 1.9853, + "step": 20047 + }, + { + "epoch": 0.67, + "grad_norm": 0.7421712279319763, + "learning_rate": 5.144896999251494e-06, + "loss": 2.0624, + "step": 20048 + }, + { + "epoch": 0.67, + "grad_norm": 0.7291492819786072, + "learning_rate": 5.143967812779616e-06, + "loss": 2.0371, + "step": 20049 + }, + { + "epoch": 0.67, + "grad_norm": 0.7577301263809204, + "learning_rate": 5.1430386811683475e-06, + "loss": 2.0273, + "step": 20050 + }, + { + "epoch": 0.67, + "grad_norm": 0.7708485126495361, + "learning_rate": 5.142109604428182e-06, + "loss": 2.0588, + "step": 20051 + }, + { + "epoch": 0.67, + "grad_norm": 0.7486909031867981, + "learning_rate": 5.14118058256961e-06, + "loss": 2.0067, + "step": 20052 + }, + { + "epoch": 0.67, + "grad_norm": 0.7512775659561157, + "learning_rate": 5.1402516156031375e-06, + "loss": 2.0467, + "step": 20053 + }, + { + "epoch": 0.67, + "grad_norm": 0.7575007081031799, + "learning_rate": 5.139322703539249e-06, + "loss": 2.0675, + "step": 20054 + }, + { + "epoch": 0.67, + "grad_norm": 0.738827645778656, + "learning_rate": 5.138393846388449e-06, + "loss": 2.1307, + "step": 20055 + }, + { + "epoch": 0.67, + "grad_norm": 0.7490928769111633, + "learning_rate": 5.13746504416122e-06, + "loss": 2.0303, + "step": 20056 + }, + { + "epoch": 0.67, + "grad_norm": 0.7330386638641357, + "learning_rate": 5.136536296868068e-06, + "loss": 2.056, + "step": 20057 + }, + { + "epoch": 0.67, + "grad_norm": 0.7562073469161987, + "learning_rate": 5.135607604519474e-06, + "loss": 2.1006, + "step": 20058 + }, + { + "epoch": 0.67, + "grad_norm": 0.7223935723304749, + "learning_rate": 5.134678967125937e-06, + "loss": 2.1147, + "step": 20059 + }, + { + "epoch": 0.67, + "grad_norm": 0.7269719243049622, + "learning_rate": 5.133750384697946e-06, + "loss": 2.0126, + "step": 20060 + }, + { + "epoch": 0.67, + "grad_norm": 0.7357509136199951, + "learning_rate": 5.132821857245989e-06, + "loss": 2.0654, + "step": 20061 + }, + { + "epoch": 0.67, + "grad_norm": 0.7558983564376831, + "learning_rate": 5.1318933847805575e-06, + "loss": 2.0992, + "step": 20062 + }, + { + "epoch": 0.67, + "grad_norm": 0.7379820346832275, + "learning_rate": 5.1309649673121445e-06, + "loss": 2.0907, + "step": 20063 + }, + { + "epoch": 0.67, + "grad_norm": 0.7279856204986572, + "learning_rate": 5.130036604851236e-06, + "loss": 2.0556, + "step": 20064 + }, + { + "epoch": 0.67, + "grad_norm": 0.7204431295394897, + "learning_rate": 5.129108297408316e-06, + "loss": 2.0407, + "step": 20065 + }, + { + "epoch": 0.67, + "grad_norm": 0.7701833248138428, + "learning_rate": 5.12818004499388e-06, + "loss": 2.0316, + "step": 20066 + }, + { + "epoch": 0.67, + "grad_norm": 0.732609748840332, + "learning_rate": 5.127251847618407e-06, + "loss": 2.0728, + "step": 20067 + }, + { + "epoch": 0.67, + "grad_norm": 0.7291454672813416, + "learning_rate": 5.126323705292388e-06, + "loss": 2.0765, + "step": 20068 + }, + { + "epoch": 0.67, + "grad_norm": 0.7350364923477173, + "learning_rate": 5.125395618026313e-06, + "loss": 1.9922, + "step": 20069 + }, + { + "epoch": 0.67, + "grad_norm": 0.7442386150360107, + "learning_rate": 5.124467585830655e-06, + "loss": 2.053, + "step": 20070 + }, + { + "epoch": 0.67, + "grad_norm": 0.7704840898513794, + "learning_rate": 5.123539608715904e-06, + "loss": 2.1284, + "step": 20071 + }, + { + "epoch": 0.67, + "grad_norm": 0.7391213774681091, + "learning_rate": 5.122611686692549e-06, + "loss": 2.038, + "step": 20072 + }, + { + "epoch": 0.67, + "grad_norm": 0.7477456331253052, + "learning_rate": 5.12168381977107e-06, + "loss": 2.1045, + "step": 20073 + }, + { + "epoch": 0.67, + "grad_norm": 0.7386415004730225, + "learning_rate": 5.120756007961943e-06, + "loss": 1.9846, + "step": 20074 + }, + { + "epoch": 0.67, + "grad_norm": 0.749864399433136, + "learning_rate": 5.119828251275659e-06, + "loss": 2.0561, + "step": 20075 + }, + { + "epoch": 0.67, + "grad_norm": 0.7605765461921692, + "learning_rate": 5.1189005497226915e-06, + "loss": 2.0556, + "step": 20076 + }, + { + "epoch": 0.67, + "grad_norm": 0.7270441055297852, + "learning_rate": 5.117972903313526e-06, + "loss": 2.0782, + "step": 20077 + }, + { + "epoch": 0.67, + "grad_norm": 0.7454894781112671, + "learning_rate": 5.117045312058644e-06, + "loss": 2.0336, + "step": 20078 + }, + { + "epoch": 0.67, + "grad_norm": 0.7591621279716492, + "learning_rate": 5.1161177759685235e-06, + "loss": 2.0597, + "step": 20079 + }, + { + "epoch": 0.67, + "grad_norm": 0.7574657797813416, + "learning_rate": 5.115190295053637e-06, + "loss": 2.0287, + "step": 20080 + }, + { + "epoch": 0.67, + "grad_norm": 0.7541659474372864, + "learning_rate": 5.114262869324472e-06, + "loss": 2.1193, + "step": 20081 + }, + { + "epoch": 0.67, + "grad_norm": 0.7173806428909302, + "learning_rate": 5.113335498791503e-06, + "loss": 2.0313, + "step": 20082 + }, + { + "epoch": 0.67, + "grad_norm": 0.7680360674858093, + "learning_rate": 5.112408183465201e-06, + "loss": 2.0505, + "step": 20083 + }, + { + "epoch": 0.67, + "grad_norm": 0.7402453422546387, + "learning_rate": 5.111480923356046e-06, + "loss": 2.0421, + "step": 20084 + }, + { + "epoch": 0.67, + "grad_norm": 0.7516781687736511, + "learning_rate": 5.110553718474519e-06, + "loss": 2.0124, + "step": 20085 + }, + { + "epoch": 0.67, + "grad_norm": 0.7488648891448975, + "learning_rate": 5.109626568831092e-06, + "loss": 2.0546, + "step": 20086 + }, + { + "epoch": 0.67, + "grad_norm": 0.7771928906440735, + "learning_rate": 5.108699474436232e-06, + "loss": 2.018, + "step": 20087 + }, + { + "epoch": 0.67, + "grad_norm": 0.757161021232605, + "learning_rate": 5.1077724353004245e-06, + "loss": 2.0654, + "step": 20088 + }, + { + "epoch": 0.67, + "grad_norm": 0.7520354986190796, + "learning_rate": 5.106845451434131e-06, + "loss": 2.0345, + "step": 20089 + }, + { + "epoch": 0.67, + "grad_norm": 0.727942168712616, + "learning_rate": 5.1059185228478314e-06, + "loss": 1.9976, + "step": 20090 + }, + { + "epoch": 0.67, + "grad_norm": 0.7235636115074158, + "learning_rate": 5.104991649552004e-06, + "loss": 2.0792, + "step": 20091 + }, + { + "epoch": 0.67, + "grad_norm": 0.7473171353340149, + "learning_rate": 5.104064831557103e-06, + "loss": 2.0928, + "step": 20092 + }, + { + "epoch": 0.67, + "grad_norm": 0.7357332110404968, + "learning_rate": 5.10313806887361e-06, + "loss": 2.1112, + "step": 20093 + }, + { + "epoch": 0.67, + "grad_norm": 0.7877339124679565, + "learning_rate": 5.102211361511995e-06, + "loss": 2.0575, + "step": 20094 + }, + { + "epoch": 0.67, + "grad_norm": 0.7484314441680908, + "learning_rate": 5.1012847094827276e-06, + "loss": 2.0577, + "step": 20095 + }, + { + "epoch": 0.67, + "grad_norm": 0.7079607844352722, + "learning_rate": 5.100358112796271e-06, + "loss": 2.0286, + "step": 20096 + }, + { + "epoch": 0.67, + "grad_norm": 0.7349743247032166, + "learning_rate": 5.099431571463099e-06, + "loss": 1.995, + "step": 20097 + }, + { + "epoch": 0.67, + "grad_norm": 0.7466428875923157, + "learning_rate": 5.098505085493675e-06, + "loss": 2.0056, + "step": 20098 + }, + { + "epoch": 0.67, + "grad_norm": 0.742247462272644, + "learning_rate": 5.0975786548984665e-06, + "loss": 2.0835, + "step": 20099 + }, + { + "epoch": 0.67, + "grad_norm": 0.7676612138748169, + "learning_rate": 5.096652279687946e-06, + "loss": 2.1192, + "step": 20100 + }, + { + "epoch": 0.67, + "grad_norm": 0.7876275181770325, + "learning_rate": 5.0957259598725735e-06, + "loss": 2.0656, + "step": 20101 + }, + { + "epoch": 0.67, + "grad_norm": 0.7228032350540161, + "learning_rate": 5.094799695462812e-06, + "loss": 2.0314, + "step": 20102 + }, + { + "epoch": 0.67, + "grad_norm": 0.7323240637779236, + "learning_rate": 5.0938734864691286e-06, + "loss": 1.9901, + "step": 20103 + }, + { + "epoch": 0.67, + "grad_norm": 0.7284857630729675, + "learning_rate": 5.092947332901995e-06, + "loss": 2.0941, + "step": 20104 + }, + { + "epoch": 0.67, + "grad_norm": 0.7204777002334595, + "learning_rate": 5.092021234771859e-06, + "loss": 2.0256, + "step": 20105 + }, + { + "epoch": 0.67, + "grad_norm": 0.7562199234962463, + "learning_rate": 5.091095192089191e-06, + "loss": 2.012, + "step": 20106 + }, + { + "epoch": 0.67, + "grad_norm": 0.7451204061508179, + "learning_rate": 5.090169204864454e-06, + "loss": 2.0756, + "step": 20107 + }, + { + "epoch": 0.67, + "grad_norm": 0.7404773831367493, + "learning_rate": 5.089243273108108e-06, + "loss": 2.0261, + "step": 20108 + }, + { + "epoch": 0.67, + "grad_norm": 0.7379840016365051, + "learning_rate": 5.088317396830616e-06, + "loss": 2.0318, + "step": 20109 + }, + { + "epoch": 0.67, + "grad_norm": 0.7416504621505737, + "learning_rate": 5.087391576042434e-06, + "loss": 2.1036, + "step": 20110 + }, + { + "epoch": 0.67, + "grad_norm": 0.7413413524627686, + "learning_rate": 5.08646581075402e-06, + "loss": 2.1051, + "step": 20111 + }, + { + "epoch": 0.67, + "grad_norm": 0.7426283359527588, + "learning_rate": 5.085540100975835e-06, + "loss": 2.061, + "step": 20112 + }, + { + "epoch": 0.67, + "grad_norm": 0.734928548336029, + "learning_rate": 5.084614446718346e-06, + "loss": 2.0862, + "step": 20113 + }, + { + "epoch": 0.67, + "grad_norm": 0.7508774995803833, + "learning_rate": 5.083688847991996e-06, + "loss": 2.0271, + "step": 20114 + }, + { + "epoch": 0.67, + "grad_norm": 0.7515003085136414, + "learning_rate": 5.082763304807246e-06, + "loss": 2.0122, + "step": 20115 + }, + { + "epoch": 0.67, + "grad_norm": 0.7840917110443115, + "learning_rate": 5.0818378171745596e-06, + "loss": 2.0817, + "step": 20116 + }, + { + "epoch": 0.67, + "grad_norm": 0.7541688084602356, + "learning_rate": 5.080912385104386e-06, + "loss": 2.1253, + "step": 20117 + }, + { + "epoch": 0.67, + "grad_norm": 0.7512958645820618, + "learning_rate": 5.0799870086071786e-06, + "loss": 1.9948, + "step": 20118 + }, + { + "epoch": 0.67, + "grad_norm": 0.7273443341255188, + "learning_rate": 5.079061687693394e-06, + "loss": 2.0042, + "step": 20119 + }, + { + "epoch": 0.67, + "grad_norm": 0.7307820916175842, + "learning_rate": 5.078136422373492e-06, + "loss": 1.9866, + "step": 20120 + }, + { + "epoch": 0.67, + "grad_norm": 0.7286121845245361, + "learning_rate": 5.077211212657914e-06, + "loss": 2.058, + "step": 20121 + }, + { + "epoch": 0.67, + "grad_norm": 0.7404541969299316, + "learning_rate": 5.076286058557122e-06, + "loss": 2.091, + "step": 20122 + }, + { + "epoch": 0.67, + "grad_norm": 0.7277871966362, + "learning_rate": 5.075360960081568e-06, + "loss": 2.0351, + "step": 20123 + }, + { + "epoch": 0.67, + "grad_norm": 0.7821372747421265, + "learning_rate": 5.074435917241694e-06, + "loss": 2.0563, + "step": 20124 + }, + { + "epoch": 0.67, + "grad_norm": 0.7270851135253906, + "learning_rate": 5.073510930047956e-06, + "loss": 1.9878, + "step": 20125 + }, + { + "epoch": 0.67, + "grad_norm": 0.7430328130722046, + "learning_rate": 5.072585998510813e-06, + "loss": 2.0072, + "step": 20126 + }, + { + "epoch": 0.67, + "grad_norm": 0.7706193327903748, + "learning_rate": 5.071661122640696e-06, + "loss": 2.1189, + "step": 20127 + }, + { + "epoch": 0.67, + "grad_norm": 0.7251985669136047, + "learning_rate": 5.0707363024480645e-06, + "loss": 2.0503, + "step": 20128 + }, + { + "epoch": 0.67, + "grad_norm": 0.7563086748123169, + "learning_rate": 5.06981153794337e-06, + "loss": 2.018, + "step": 20129 + }, + { + "epoch": 0.67, + "grad_norm": 0.7297871112823486, + "learning_rate": 5.068886829137051e-06, + "loss": 2.0584, + "step": 20130 + }, + { + "epoch": 0.67, + "grad_norm": 0.7636356353759766, + "learning_rate": 5.067962176039563e-06, + "loss": 2.0574, + "step": 20131 + }, + { + "epoch": 0.67, + "grad_norm": 0.7723695635795593, + "learning_rate": 5.067037578661347e-06, + "loss": 2.0425, + "step": 20132 + }, + { + "epoch": 0.67, + "grad_norm": 0.7517685890197754, + "learning_rate": 5.0661130370128455e-06, + "loss": 2.0296, + "step": 20133 + }, + { + "epoch": 0.67, + "grad_norm": 0.7514216303825378, + "learning_rate": 5.065188551104508e-06, + "loss": 1.978, + "step": 20134 + }, + { + "epoch": 0.67, + "grad_norm": 0.7456119656562805, + "learning_rate": 5.0642641209467815e-06, + "loss": 1.9984, + "step": 20135 + }, + { + "epoch": 0.67, + "grad_norm": 0.7448129653930664, + "learning_rate": 5.063339746550107e-06, + "loss": 2.053, + "step": 20136 + }, + { + "epoch": 0.67, + "grad_norm": 0.7284356951713562, + "learning_rate": 5.062415427924921e-06, + "loss": 2.0523, + "step": 20137 + }, + { + "epoch": 0.67, + "grad_norm": 0.7735322713851929, + "learning_rate": 5.0614911650816775e-06, + "loss": 2.0856, + "step": 20138 + }, + { + "epoch": 0.67, + "grad_norm": 0.7671406269073486, + "learning_rate": 5.060566958030809e-06, + "loss": 2.1047, + "step": 20139 + }, + { + "epoch": 0.67, + "grad_norm": 0.7591806054115295, + "learning_rate": 5.059642806782763e-06, + "loss": 2.1821, + "step": 20140 + }, + { + "epoch": 0.67, + "grad_norm": 0.7437096834182739, + "learning_rate": 5.058718711347974e-06, + "loss": 1.9782, + "step": 20141 + }, + { + "epoch": 0.67, + "grad_norm": 0.7560997605323792, + "learning_rate": 5.057794671736889e-06, + "loss": 2.0899, + "step": 20142 + }, + { + "epoch": 0.67, + "grad_norm": 0.7178904414176941, + "learning_rate": 5.05687068795994e-06, + "loss": 2.0771, + "step": 20143 + }, + { + "epoch": 0.67, + "grad_norm": 0.7227692008018494, + "learning_rate": 5.055946760027572e-06, + "loss": 2.077, + "step": 20144 + }, + { + "epoch": 0.67, + "grad_norm": 0.7364628911018372, + "learning_rate": 5.055022887950221e-06, + "loss": 1.9643, + "step": 20145 + }, + { + "epoch": 0.67, + "grad_norm": 0.7421463131904602, + "learning_rate": 5.054099071738319e-06, + "loss": 1.9642, + "step": 20146 + }, + { + "epoch": 0.67, + "grad_norm": 0.7379451990127563, + "learning_rate": 5.053175311402305e-06, + "loss": 2.0321, + "step": 20147 + }, + { + "epoch": 0.67, + "grad_norm": 0.7867157459259033, + "learning_rate": 5.052251606952627e-06, + "loss": 2.0037, + "step": 20148 + }, + { + "epoch": 0.67, + "grad_norm": 0.7711898684501648, + "learning_rate": 5.051327958399703e-06, + "loss": 2.1314, + "step": 20149 + }, + { + "epoch": 0.67, + "grad_norm": 0.7432572841644287, + "learning_rate": 5.050404365753976e-06, + "loss": 2.0794, + "step": 20150 + }, + { + "epoch": 0.67, + "grad_norm": 0.7395318746566772, + "learning_rate": 5.049480829025883e-06, + "loss": 2.0745, + "step": 20151 + }, + { + "epoch": 0.67, + "grad_norm": 0.7119020223617554, + "learning_rate": 5.04855734822585e-06, + "loss": 2.0361, + "step": 20152 + }, + { + "epoch": 0.67, + "grad_norm": 0.7582393288612366, + "learning_rate": 5.047633923364319e-06, + "loss": 2.0267, + "step": 20153 + }, + { + "epoch": 0.67, + "grad_norm": 0.7178239822387695, + "learning_rate": 5.046710554451717e-06, + "loss": 2.0546, + "step": 20154 + }, + { + "epoch": 0.67, + "grad_norm": 0.7576134204864502, + "learning_rate": 5.045787241498472e-06, + "loss": 2.0723, + "step": 20155 + }, + { + "epoch": 0.67, + "grad_norm": 0.7419856786727905, + "learning_rate": 5.044863984515019e-06, + "loss": 2.0765, + "step": 20156 + }, + { + "epoch": 0.67, + "grad_norm": 0.7691748738288879, + "learning_rate": 5.043940783511794e-06, + "loss": 2.1191, + "step": 20157 + }, + { + "epoch": 0.67, + "grad_norm": 0.7436739802360535, + "learning_rate": 5.043017638499221e-06, + "loss": 2.0157, + "step": 20158 + }, + { + "epoch": 0.67, + "grad_norm": 0.7361814379692078, + "learning_rate": 5.042094549487725e-06, + "loss": 2.0315, + "step": 20159 + }, + { + "epoch": 0.67, + "grad_norm": 0.7527881860733032, + "learning_rate": 5.041171516487744e-06, + "loss": 2.0589, + "step": 20160 + }, + { + "epoch": 0.67, + "grad_norm": 0.7332875728607178, + "learning_rate": 5.040248539509696e-06, + "loss": 2.0984, + "step": 20161 + }, + { + "epoch": 0.67, + "grad_norm": 0.7163532972335815, + "learning_rate": 5.039325618564019e-06, + "loss": 2.012, + "step": 20162 + }, + { + "epoch": 0.67, + "grad_norm": 0.7222986221313477, + "learning_rate": 5.038402753661129e-06, + "loss": 2.0764, + "step": 20163 + }, + { + "epoch": 0.67, + "grad_norm": 0.736585795879364, + "learning_rate": 5.03747994481146e-06, + "loss": 1.9682, + "step": 20164 + }, + { + "epoch": 0.67, + "grad_norm": 0.7504633665084839, + "learning_rate": 5.03655719202543e-06, + "loss": 2.0639, + "step": 20165 + }, + { + "epoch": 0.67, + "grad_norm": 0.7210029363632202, + "learning_rate": 5.035634495313474e-06, + "loss": 2.0371, + "step": 20166 + }, + { + "epoch": 0.67, + "grad_norm": 0.7701663970947266, + "learning_rate": 5.03471185468601e-06, + "loss": 2.0545, + "step": 20167 + }, + { + "epoch": 0.67, + "grad_norm": 0.7473902702331543, + "learning_rate": 5.0337892701534555e-06, + "loss": 2.0895, + "step": 20168 + }, + { + "epoch": 0.67, + "grad_norm": 0.7309924960136414, + "learning_rate": 5.032866741726241e-06, + "loss": 2.0314, + "step": 20169 + }, + { + "epoch": 0.67, + "grad_norm": 0.7625515460968018, + "learning_rate": 5.03194426941479e-06, + "loss": 2.0218, + "step": 20170 + }, + { + "epoch": 0.67, + "grad_norm": 0.7600356340408325, + "learning_rate": 5.0310218532295215e-06, + "loss": 2.1146, + "step": 20171 + }, + { + "epoch": 0.67, + "grad_norm": 0.7412950992584229, + "learning_rate": 5.030099493180853e-06, + "loss": 2.0257, + "step": 20172 + }, + { + "epoch": 0.67, + "grad_norm": 0.7458981275558472, + "learning_rate": 5.029177189279211e-06, + "loss": 2.0065, + "step": 20173 + }, + { + "epoch": 0.67, + "grad_norm": 0.7469793558120728, + "learning_rate": 5.028254941535007e-06, + "loss": 1.9491, + "step": 20174 + }, + { + "epoch": 0.67, + "grad_norm": 0.733208417892456, + "learning_rate": 5.02733274995867e-06, + "loss": 2.0558, + "step": 20175 + }, + { + "epoch": 0.67, + "grad_norm": 0.7655627727508545, + "learning_rate": 5.026410614560613e-06, + "loss": 2.0462, + "step": 20176 + }, + { + "epoch": 0.67, + "grad_norm": 0.7639448642730713, + "learning_rate": 5.02548853535125e-06, + "loss": 2.0447, + "step": 20177 + }, + { + "epoch": 0.67, + "grad_norm": 0.7658535242080688, + "learning_rate": 5.0245665123410025e-06, + "loss": 2.1014, + "step": 20178 + }, + { + "epoch": 0.67, + "grad_norm": 0.7848217487335205, + "learning_rate": 5.023644545540289e-06, + "loss": 2.0871, + "step": 20179 + }, + { + "epoch": 0.67, + "grad_norm": 0.8134219646453857, + "learning_rate": 5.022722634959525e-06, + "loss": 2.0102, + "step": 20180 + }, + { + "epoch": 0.67, + "grad_norm": 0.7372454404830933, + "learning_rate": 5.02180078060912e-06, + "loss": 2.0412, + "step": 20181 + }, + { + "epoch": 0.67, + "grad_norm": 0.7544186115264893, + "learning_rate": 5.020878982499495e-06, + "loss": 2.0488, + "step": 20182 + }, + { + "epoch": 0.67, + "grad_norm": 0.7728284001350403, + "learning_rate": 5.0199572406410575e-06, + "loss": 2.1243, + "step": 20183 + }, + { + "epoch": 0.67, + "grad_norm": 0.7464938759803772, + "learning_rate": 5.0190355550442296e-06, + "loss": 2.0784, + "step": 20184 + }, + { + "epoch": 0.67, + "grad_norm": 0.7765725255012512, + "learning_rate": 5.018113925719412e-06, + "loss": 2.058, + "step": 20185 + }, + { + "epoch": 0.67, + "grad_norm": 0.7639203667640686, + "learning_rate": 5.01719235267703e-06, + "loss": 2.1222, + "step": 20186 + }, + { + "epoch": 0.67, + "grad_norm": 0.7611469626426697, + "learning_rate": 5.016270835927485e-06, + "loss": 2.0738, + "step": 20187 + }, + { + "epoch": 0.67, + "grad_norm": 0.7171225547790527, + "learning_rate": 5.015349375481194e-06, + "loss": 2.0887, + "step": 20188 + }, + { + "epoch": 0.67, + "grad_norm": 0.7405815720558167, + "learning_rate": 5.014427971348565e-06, + "loss": 2.0376, + "step": 20189 + }, + { + "epoch": 0.67, + "grad_norm": 0.7422084808349609, + "learning_rate": 5.013506623540003e-06, + "loss": 2.081, + "step": 20190 + }, + { + "epoch": 0.67, + "grad_norm": 0.7416921257972717, + "learning_rate": 5.01258533206592e-06, + "loss": 2.0061, + "step": 20191 + }, + { + "epoch": 0.67, + "grad_norm": 0.7340272665023804, + "learning_rate": 5.01166409693673e-06, + "loss": 2.0381, + "step": 20192 + }, + { + "epoch": 0.67, + "grad_norm": 0.7215927839279175, + "learning_rate": 5.010742918162834e-06, + "loss": 1.9879, + "step": 20193 + }, + { + "epoch": 0.67, + "grad_norm": 0.7443199157714844, + "learning_rate": 5.009821795754639e-06, + "loss": 2.0834, + "step": 20194 + }, + { + "epoch": 0.67, + "grad_norm": 0.7572246193885803, + "learning_rate": 5.008900729722555e-06, + "loss": 2.0319, + "step": 20195 + }, + { + "epoch": 0.67, + "grad_norm": 0.7422019839286804, + "learning_rate": 5.007979720076982e-06, + "loss": 2.1361, + "step": 20196 + }, + { + "epoch": 0.67, + "grad_norm": 0.7569596767425537, + "learning_rate": 5.007058766828332e-06, + "loss": 2.0468, + "step": 20197 + }, + { + "epoch": 0.67, + "grad_norm": 0.7486196160316467, + "learning_rate": 5.006137869987006e-06, + "loss": 2.0119, + "step": 20198 + }, + { + "epoch": 0.67, + "grad_norm": 0.7292360067367554, + "learning_rate": 5.0052170295634054e-06, + "loss": 2.0542, + "step": 20199 + }, + { + "epoch": 0.67, + "grad_norm": 0.7497430443763733, + "learning_rate": 5.004296245567934e-06, + "loss": 2.0722, + "step": 20200 + }, + { + "epoch": 0.67, + "grad_norm": 0.7626875042915344, + "learning_rate": 5.003375518011e-06, + "loss": 2.1399, + "step": 20201 + }, + { + "epoch": 0.67, + "grad_norm": 0.7521160244941711, + "learning_rate": 5.002454846903001e-06, + "loss": 2.008, + "step": 20202 + }, + { + "epoch": 0.67, + "grad_norm": 0.724474310874939, + "learning_rate": 5.001534232254335e-06, + "loss": 2.029, + "step": 20203 + }, + { + "epoch": 0.67, + "grad_norm": 0.7609326243400574, + "learning_rate": 5.000613674075405e-06, + "loss": 2.119, + "step": 20204 + }, + { + "epoch": 0.67, + "grad_norm": 0.7410104870796204, + "learning_rate": 4.999693172376616e-06, + "loss": 2.0567, + "step": 20205 + }, + { + "epoch": 0.67, + "grad_norm": 0.7534316182136536, + "learning_rate": 4.998772727168363e-06, + "loss": 2.0729, + "step": 20206 + }, + { + "epoch": 0.67, + "grad_norm": 0.7654723525047302, + "learning_rate": 4.9978523384610415e-06, + "loss": 2.0195, + "step": 20207 + }, + { + "epoch": 0.67, + "grad_norm": 0.7872684001922607, + "learning_rate": 4.996932006265056e-06, + "loss": 1.9924, + "step": 20208 + }, + { + "epoch": 0.67, + "grad_norm": 0.7648528218269348, + "learning_rate": 4.996011730590796e-06, + "loss": 2.1342, + "step": 20209 + }, + { + "epoch": 0.67, + "grad_norm": 0.7380174994468689, + "learning_rate": 4.995091511448668e-06, + "loss": 2.1132, + "step": 20210 + }, + { + "epoch": 0.67, + "grad_norm": 0.7399367094039917, + "learning_rate": 4.994171348849063e-06, + "loss": 2.0181, + "step": 20211 + }, + { + "epoch": 0.67, + "grad_norm": 0.7644140720367432, + "learning_rate": 4.9932512428023715e-06, + "loss": 2.0172, + "step": 20212 + }, + { + "epoch": 0.67, + "grad_norm": 0.728918731212616, + "learning_rate": 4.992331193318995e-06, + "loss": 2.022, + "step": 20213 + }, + { + "epoch": 0.67, + "grad_norm": 0.774364173412323, + "learning_rate": 4.991411200409327e-06, + "loss": 2.0694, + "step": 20214 + }, + { + "epoch": 0.67, + "grad_norm": 0.7457428574562073, + "learning_rate": 4.990491264083762e-06, + "loss": 2.0405, + "step": 20215 + }, + { + "epoch": 0.67, + "grad_norm": 0.7411237955093384, + "learning_rate": 4.989571384352686e-06, + "loss": 2.0213, + "step": 20216 + }, + { + "epoch": 0.67, + "grad_norm": 0.7275320291519165, + "learning_rate": 4.988651561226501e-06, + "loss": 2.038, + "step": 20217 + }, + { + "epoch": 0.67, + "grad_norm": 0.743889331817627, + "learning_rate": 4.987731794715589e-06, + "loss": 2.0915, + "step": 20218 + }, + { + "epoch": 0.67, + "grad_norm": 0.7613849639892578, + "learning_rate": 4.986812084830349e-06, + "loss": 2.0229, + "step": 20219 + }, + { + "epoch": 0.67, + "grad_norm": 0.7522518038749695, + "learning_rate": 4.9858924315811656e-06, + "loss": 2.05, + "step": 20220 + }, + { + "epoch": 0.67, + "grad_norm": 0.7419420480728149, + "learning_rate": 4.984972834978434e-06, + "loss": 2.0583, + "step": 20221 + }, + { + "epoch": 0.67, + "grad_norm": 0.7336491346359253, + "learning_rate": 4.984053295032536e-06, + "loss": 2.0061, + "step": 20222 + }, + { + "epoch": 0.67, + "grad_norm": 0.7542902827262878, + "learning_rate": 4.98313381175387e-06, + "loss": 2.0837, + "step": 20223 + }, + { + "epoch": 0.67, + "grad_norm": 0.7150443196296692, + "learning_rate": 4.982214385152816e-06, + "loss": 2.0285, + "step": 20224 + }, + { + "epoch": 0.67, + "grad_norm": 0.7371305227279663, + "learning_rate": 4.98129501523976e-06, + "loss": 2.0779, + "step": 20225 + }, + { + "epoch": 0.67, + "grad_norm": 0.7909289002418518, + "learning_rate": 4.980375702025091e-06, + "loss": 1.9799, + "step": 20226 + }, + { + "epoch": 0.67, + "grad_norm": 0.7459080815315247, + "learning_rate": 4.9794564455192005e-06, + "loss": 2.084, + "step": 20227 + }, + { + "epoch": 0.67, + "grad_norm": 0.7883503437042236, + "learning_rate": 4.978537245732468e-06, + "loss": 2.0325, + "step": 20228 + }, + { + "epoch": 0.67, + "grad_norm": 0.7346156239509583, + "learning_rate": 4.977618102675276e-06, + "loss": 2.0613, + "step": 20229 + }, + { + "epoch": 0.67, + "grad_norm": 0.7766497135162354, + "learning_rate": 4.9766990163580145e-06, + "loss": 2.0469, + "step": 20230 + }, + { + "epoch": 0.67, + "grad_norm": 0.7682079672813416, + "learning_rate": 4.975779986791058e-06, + "loss": 2.0531, + "step": 20231 + }, + { + "epoch": 0.67, + "grad_norm": 0.7674205899238586, + "learning_rate": 4.974861013984801e-06, + "loss": 2.0955, + "step": 20232 + }, + { + "epoch": 0.67, + "grad_norm": 0.7623017430305481, + "learning_rate": 4.973942097949619e-06, + "loss": 2.0122, + "step": 20233 + }, + { + "epoch": 0.67, + "grad_norm": 0.7504947185516357, + "learning_rate": 4.973023238695889e-06, + "loss": 2.0464, + "step": 20234 + }, + { + "epoch": 0.67, + "grad_norm": 0.7465676069259644, + "learning_rate": 4.972104436233997e-06, + "loss": 1.9995, + "step": 20235 + }, + { + "epoch": 0.67, + "grad_norm": 0.7662003636360168, + "learning_rate": 4.971185690574325e-06, + "loss": 2.0476, + "step": 20236 + }, + { + "epoch": 0.67, + "grad_norm": 0.7539246678352356, + "learning_rate": 4.97026700172725e-06, + "loss": 2.0907, + "step": 20237 + }, + { + "epoch": 0.67, + "grad_norm": 0.7450074553489685, + "learning_rate": 4.969348369703149e-06, + "loss": 2.1369, + "step": 20238 + }, + { + "epoch": 0.67, + "grad_norm": 0.7725008130073547, + "learning_rate": 4.968429794512404e-06, + "loss": 1.9899, + "step": 20239 + }, + { + "epoch": 0.67, + "grad_norm": 0.7478427290916443, + "learning_rate": 4.967511276165387e-06, + "loss": 1.9892, + "step": 20240 + }, + { + "epoch": 0.67, + "grad_norm": 0.7403272390365601, + "learning_rate": 4.966592814672481e-06, + "loss": 2.0286, + "step": 20241 + }, + { + "epoch": 0.67, + "grad_norm": 0.7236624360084534, + "learning_rate": 4.965674410044057e-06, + "loss": 2.0061, + "step": 20242 + }, + { + "epoch": 0.67, + "grad_norm": 0.758434534072876, + "learning_rate": 4.964756062290496e-06, + "loss": 2.0962, + "step": 20243 + }, + { + "epoch": 0.67, + "grad_norm": 0.7605840563774109, + "learning_rate": 4.963837771422168e-06, + "loss": 2.0276, + "step": 20244 + }, + { + "epoch": 0.67, + "grad_norm": 0.7595072984695435, + "learning_rate": 4.962919537449451e-06, + "loss": 2.0905, + "step": 20245 + }, + { + "epoch": 0.67, + "grad_norm": 0.7529708743095398, + "learning_rate": 4.962001360382717e-06, + "loss": 2.0964, + "step": 20246 + }, + { + "epoch": 0.67, + "grad_norm": 0.7270084619522095, + "learning_rate": 4.961083240232337e-06, + "loss": 2.0108, + "step": 20247 + }, + { + "epoch": 0.67, + "grad_norm": 0.7639528512954712, + "learning_rate": 4.960165177008685e-06, + "loss": 2.0072, + "step": 20248 + }, + { + "epoch": 0.67, + "grad_norm": 0.7482243180274963, + "learning_rate": 4.959247170722137e-06, + "loss": 2.0831, + "step": 20249 + }, + { + "epoch": 0.67, + "grad_norm": 0.7259333729743958, + "learning_rate": 4.95832922138306e-06, + "loss": 2.0056, + "step": 20250 + }, + { + "epoch": 0.67, + "grad_norm": 0.744625449180603, + "learning_rate": 4.957411329001821e-06, + "loss": 2.0256, + "step": 20251 + }, + { + "epoch": 0.67, + "grad_norm": 0.7366506457328796, + "learning_rate": 4.956493493588798e-06, + "loss": 2.1093, + "step": 20252 + }, + { + "epoch": 0.67, + "grad_norm": 0.7946878671646118, + "learning_rate": 4.95557571515435e-06, + "loss": 2.0458, + "step": 20253 + }, + { + "epoch": 0.67, + "grad_norm": 0.7475458383560181, + "learning_rate": 4.954657993708854e-06, + "loss": 2.117, + "step": 20254 + }, + { + "epoch": 0.67, + "grad_norm": 0.7527669668197632, + "learning_rate": 4.953740329262681e-06, + "loss": 2.0329, + "step": 20255 + }, + { + "epoch": 0.67, + "grad_norm": 0.7667949795722961, + "learning_rate": 4.952822721826185e-06, + "loss": 2.0219, + "step": 20256 + }, + { + "epoch": 0.67, + "grad_norm": 0.7709560990333557, + "learning_rate": 4.95190517140974e-06, + "loss": 2.0301, + "step": 20257 + }, + { + "epoch": 0.67, + "grad_norm": 0.7398699522018433, + "learning_rate": 4.950987678023715e-06, + "loss": 2.0061, + "step": 20258 + }, + { + "epoch": 0.67, + "grad_norm": 0.74430912733078, + "learning_rate": 4.950070241678473e-06, + "loss": 2.0727, + "step": 20259 + }, + { + "epoch": 0.67, + "grad_norm": 0.7148361802101135, + "learning_rate": 4.9491528623843745e-06, + "loss": 2.0758, + "step": 20260 + }, + { + "epoch": 0.67, + "grad_norm": 0.7388547658920288, + "learning_rate": 4.94823554015179e-06, + "loss": 2.0403, + "step": 20261 + }, + { + "epoch": 0.67, + "grad_norm": 0.7750593423843384, + "learning_rate": 4.947318274991075e-06, + "loss": 2.0008, + "step": 20262 + }, + { + "epoch": 0.67, + "grad_norm": 0.7450194954872131, + "learning_rate": 4.946401066912603e-06, + "loss": 2.0381, + "step": 20263 + }, + { + "epoch": 0.67, + "grad_norm": 0.7713590860366821, + "learning_rate": 4.945483915926724e-06, + "loss": 2.1119, + "step": 20264 + }, + { + "epoch": 0.67, + "grad_norm": 0.7457413077354431, + "learning_rate": 4.944566822043811e-06, + "loss": 2.0469, + "step": 20265 + }, + { + "epoch": 0.67, + "grad_norm": 0.7562053203582764, + "learning_rate": 4.943649785274215e-06, + "loss": 2.0163, + "step": 20266 + }, + { + "epoch": 0.67, + "grad_norm": 0.7206932902336121, + "learning_rate": 4.942732805628304e-06, + "loss": 2.0275, + "step": 20267 + }, + { + "epoch": 0.67, + "grad_norm": 0.7527848482131958, + "learning_rate": 4.941815883116434e-06, + "loss": 2.0329, + "step": 20268 + }, + { + "epoch": 0.67, + "grad_norm": 0.7741339802742004, + "learning_rate": 4.940899017748959e-06, + "loss": 2.1259, + "step": 20269 + }, + { + "epoch": 0.67, + "grad_norm": 0.7469882965087891, + "learning_rate": 4.939982209536244e-06, + "loss": 2.0885, + "step": 20270 + }, + { + "epoch": 0.67, + "grad_norm": 0.7610406279563904, + "learning_rate": 4.939065458488646e-06, + "loss": 2.0591, + "step": 20271 + }, + { + "epoch": 0.67, + "grad_norm": 0.7616428732872009, + "learning_rate": 4.938148764616523e-06, + "loss": 2.009, + "step": 20272 + }, + { + "epoch": 0.67, + "grad_norm": 0.755438506603241, + "learning_rate": 4.937232127930223e-06, + "loss": 2.1145, + "step": 20273 + }, + { + "epoch": 0.67, + "grad_norm": 0.7465696930885315, + "learning_rate": 4.936315548440111e-06, + "loss": 2.0181, + "step": 20274 + }, + { + "epoch": 0.67, + "grad_norm": 0.7882199287414551, + "learning_rate": 4.935399026156536e-06, + "loss": 2.0389, + "step": 20275 + }, + { + "epoch": 0.67, + "grad_norm": 0.7349933981895447, + "learning_rate": 4.934482561089854e-06, + "loss": 2.0716, + "step": 20276 + }, + { + "epoch": 0.67, + "grad_norm": 0.766400158405304, + "learning_rate": 4.933566153250426e-06, + "loss": 2.0069, + "step": 20277 + }, + { + "epoch": 0.67, + "grad_norm": 0.7231996655464172, + "learning_rate": 4.932649802648593e-06, + "loss": 2.1048, + "step": 20278 + }, + { + "epoch": 0.67, + "grad_norm": 0.7518306374549866, + "learning_rate": 4.931733509294711e-06, + "loss": 2.0339, + "step": 20279 + }, + { + "epoch": 0.67, + "grad_norm": 0.7173805236816406, + "learning_rate": 4.930817273199138e-06, + "loss": 1.9898, + "step": 20280 + }, + { + "epoch": 0.67, + "grad_norm": 0.7579454183578491, + "learning_rate": 4.929901094372219e-06, + "loss": 2.106, + "step": 20281 + }, + { + "epoch": 0.67, + "grad_norm": 0.7537594437599182, + "learning_rate": 4.928984972824304e-06, + "loss": 2.009, + "step": 20282 + }, + { + "epoch": 0.67, + "grad_norm": 0.7648007869720459, + "learning_rate": 4.928068908565748e-06, + "loss": 1.9961, + "step": 20283 + }, + { + "epoch": 0.67, + "grad_norm": 0.736355185508728, + "learning_rate": 4.927152901606894e-06, + "loss": 2.0935, + "step": 20284 + }, + { + "epoch": 0.67, + "grad_norm": 0.7260897159576416, + "learning_rate": 4.926236951958094e-06, + "loss": 1.9959, + "step": 20285 + }, + { + "epoch": 0.67, + "grad_norm": 0.7549333572387695, + "learning_rate": 4.925321059629697e-06, + "loss": 2.0424, + "step": 20286 + }, + { + "epoch": 0.67, + "grad_norm": 0.7273990511894226, + "learning_rate": 4.924405224632051e-06, + "loss": 2.0846, + "step": 20287 + }, + { + "epoch": 0.67, + "grad_norm": 0.7282859683036804, + "learning_rate": 4.923489446975494e-06, + "loss": 2.0618, + "step": 20288 + }, + { + "epoch": 0.68, + "grad_norm": 0.7400189638137817, + "learning_rate": 4.922573726670383e-06, + "loss": 2.063, + "step": 20289 + }, + { + "epoch": 0.68, + "grad_norm": 0.7513425350189209, + "learning_rate": 4.921658063727059e-06, + "loss": 2.1098, + "step": 20290 + }, + { + "epoch": 0.68, + "grad_norm": 0.7649042010307312, + "learning_rate": 4.9207424581558615e-06, + "loss": 2.0703, + "step": 20291 + }, + { + "epoch": 0.68, + "grad_norm": 0.7169502973556519, + "learning_rate": 4.919826909967139e-06, + "loss": 2.0656, + "step": 20292 + }, + { + "epoch": 0.68, + "grad_norm": 0.7470426559448242, + "learning_rate": 4.918911419171239e-06, + "loss": 2.0896, + "step": 20293 + }, + { + "epoch": 0.68, + "grad_norm": 0.7448184490203857, + "learning_rate": 4.9179959857785e-06, + "loss": 2.08, + "step": 20294 + }, + { + "epoch": 0.68, + "grad_norm": 0.7486196160316467, + "learning_rate": 4.91708060979926e-06, + "loss": 2.0852, + "step": 20295 + }, + { + "epoch": 0.68, + "grad_norm": 0.7555215358734131, + "learning_rate": 4.9161652912438684e-06, + "loss": 1.9551, + "step": 20296 + }, + { + "epoch": 0.68, + "grad_norm": 0.7569241523742676, + "learning_rate": 4.915250030122657e-06, + "loss": 2.0467, + "step": 20297 + }, + { + "epoch": 0.68, + "grad_norm": 0.7789692878723145, + "learning_rate": 4.914334826445973e-06, + "loss": 2.0798, + "step": 20298 + }, + { + "epoch": 0.68, + "grad_norm": 0.7335914373397827, + "learning_rate": 4.9134196802241584e-06, + "loss": 2.1291, + "step": 20299 + }, + { + "epoch": 0.68, + "grad_norm": 0.7551798224449158, + "learning_rate": 4.912504591467542e-06, + "loss": 2.0688, + "step": 20300 + }, + { + "epoch": 0.68, + "grad_norm": 0.7534513473510742, + "learning_rate": 4.911589560186466e-06, + "loss": 2.0535, + "step": 20301 + }, + { + "epoch": 0.68, + "grad_norm": 0.753381073474884, + "learning_rate": 4.910674586391273e-06, + "loss": 2.0995, + "step": 20302 + }, + { + "epoch": 0.68, + "grad_norm": 0.7559484243392944, + "learning_rate": 4.909759670092296e-06, + "loss": 2.0351, + "step": 20303 + }, + { + "epoch": 0.68, + "grad_norm": 0.7543895244598389, + "learning_rate": 4.908844811299868e-06, + "loss": 1.9691, + "step": 20304 + }, + { + "epoch": 0.68, + "grad_norm": 0.7272427678108215, + "learning_rate": 4.907930010024326e-06, + "loss": 2.0241, + "step": 20305 + }, + { + "epoch": 0.68, + "grad_norm": 0.7709632515907288, + "learning_rate": 4.9070152662760115e-06, + "loss": 2.0672, + "step": 20306 + }, + { + "epoch": 0.68, + "grad_norm": 0.7403322458267212, + "learning_rate": 4.90610058006525e-06, + "loss": 2.0416, + "step": 20307 + }, + { + "epoch": 0.68, + "grad_norm": 0.7713525295257568, + "learning_rate": 4.905185951402382e-06, + "loss": 2.0875, + "step": 20308 + }, + { + "epoch": 0.68, + "grad_norm": 0.735372006893158, + "learning_rate": 4.904271380297737e-06, + "loss": 2.0153, + "step": 20309 + }, + { + "epoch": 0.68, + "grad_norm": 0.7406299114227295, + "learning_rate": 4.903356866761645e-06, + "loss": 2.0424, + "step": 20310 + }, + { + "epoch": 0.68, + "grad_norm": 0.771120548248291, + "learning_rate": 4.902442410804439e-06, + "loss": 2.0309, + "step": 20311 + }, + { + "epoch": 0.68, + "grad_norm": 0.7251144051551819, + "learning_rate": 4.901528012436459e-06, + "loss": 2.0446, + "step": 20312 + }, + { + "epoch": 0.68, + "grad_norm": 0.7609586715698242, + "learning_rate": 4.9006136716680204e-06, + "loss": 2.101, + "step": 20313 + }, + { + "epoch": 0.68, + "grad_norm": 0.7532699108123779, + "learning_rate": 4.8996993885094604e-06, + "loss": 2.083, + "step": 20314 + }, + { + "epoch": 0.68, + "grad_norm": 0.7276277542114258, + "learning_rate": 4.89878516297111e-06, + "loss": 2.07, + "step": 20315 + }, + { + "epoch": 0.68, + "grad_norm": 0.7588284015655518, + "learning_rate": 4.897870995063293e-06, + "loss": 2.0662, + "step": 20316 + }, + { + "epoch": 0.68, + "grad_norm": 0.7322894334793091, + "learning_rate": 4.896956884796342e-06, + "loss": 2.0547, + "step": 20317 + }, + { + "epoch": 0.68, + "grad_norm": 0.7784355878829956, + "learning_rate": 4.896042832180582e-06, + "loss": 2.0884, + "step": 20318 + }, + { + "epoch": 0.68, + "grad_norm": 0.7522271871566772, + "learning_rate": 4.895128837226335e-06, + "loss": 2.0993, + "step": 20319 + }, + { + "epoch": 0.68, + "grad_norm": 0.7424781322479248, + "learning_rate": 4.89421489994393e-06, + "loss": 2.1079, + "step": 20320 + }, + { + "epoch": 0.68, + "grad_norm": 0.724582314491272, + "learning_rate": 4.893301020343697e-06, + "loss": 2.0196, + "step": 20321 + }, + { + "epoch": 0.68, + "grad_norm": 0.7345448732376099, + "learning_rate": 4.892387198435957e-06, + "loss": 2.095, + "step": 20322 + }, + { + "epoch": 0.68, + "grad_norm": 0.7577356696128845, + "learning_rate": 4.891473434231029e-06, + "loss": 2.0118, + "step": 20323 + }, + { + "epoch": 0.68, + "grad_norm": 0.7410258650779724, + "learning_rate": 4.890559727739243e-06, + "loss": 1.9982, + "step": 20324 + }, + { + "epoch": 0.68, + "grad_norm": 0.7438172698020935, + "learning_rate": 4.88964607897092e-06, + "loss": 1.9892, + "step": 20325 + }, + { + "epoch": 0.68, + "grad_norm": 0.7261031270027161, + "learning_rate": 4.888732487936376e-06, + "loss": 2.0531, + "step": 20326 + }, + { + "epoch": 0.68, + "grad_norm": 0.722767174243927, + "learning_rate": 4.887818954645938e-06, + "loss": 2.1851, + "step": 20327 + }, + { + "epoch": 0.68, + "grad_norm": 0.7393938899040222, + "learning_rate": 4.886905479109928e-06, + "loss": 2.0448, + "step": 20328 + }, + { + "epoch": 0.68, + "grad_norm": 0.7540817260742188, + "learning_rate": 4.885992061338659e-06, + "loss": 2.0334, + "step": 20329 + }, + { + "epoch": 0.68, + "grad_norm": 0.761029839515686, + "learning_rate": 4.885078701342459e-06, + "loss": 2.0829, + "step": 20330 + }, + { + "epoch": 0.68, + "grad_norm": 0.7174654006958008, + "learning_rate": 4.884165399131643e-06, + "loss": 2.053, + "step": 20331 + }, + { + "epoch": 0.68, + "grad_norm": 0.7766607403755188, + "learning_rate": 4.883252154716525e-06, + "loss": 2.145, + "step": 20332 + }, + { + "epoch": 0.68, + "grad_norm": 0.7217127084732056, + "learning_rate": 4.882338968107423e-06, + "loss": 2.0547, + "step": 20333 + }, + { + "epoch": 0.68, + "grad_norm": 0.7582725286483765, + "learning_rate": 4.881425839314665e-06, + "loss": 2.033, + "step": 20334 + }, + { + "epoch": 0.68, + "grad_norm": 0.7554873824119568, + "learning_rate": 4.8805127683485505e-06, + "loss": 2.0552, + "step": 20335 + }, + { + "epoch": 0.68, + "grad_norm": 0.7513056993484497, + "learning_rate": 4.879599755219403e-06, + "loss": 2.0338, + "step": 20336 + }, + { + "epoch": 0.68, + "grad_norm": 0.7349498867988586, + "learning_rate": 4.87868679993754e-06, + "loss": 2.0714, + "step": 20337 + }, + { + "epoch": 0.68, + "grad_norm": 0.7764919996261597, + "learning_rate": 4.877773902513268e-06, + "loss": 2.0858, + "step": 20338 + }, + { + "epoch": 0.68, + "grad_norm": 0.7439562678337097, + "learning_rate": 4.876861062956908e-06, + "loss": 2.0247, + "step": 20339 + }, + { + "epoch": 0.68, + "grad_norm": 0.7294931411743164, + "learning_rate": 4.87594828127877e-06, + "loss": 1.9829, + "step": 20340 + }, + { + "epoch": 0.68, + "grad_norm": 0.7425527572631836, + "learning_rate": 4.8750355574891616e-06, + "loss": 2.1112, + "step": 20341 + }, + { + "epoch": 0.68, + "grad_norm": 0.7498957514762878, + "learning_rate": 4.874122891598397e-06, + "loss": 2.1181, + "step": 20342 + }, + { + "epoch": 0.68, + "grad_norm": 0.7695623636245728, + "learning_rate": 4.873210283616793e-06, + "loss": 2.0127, + "step": 20343 + }, + { + "epoch": 0.68, + "grad_norm": 0.7829837203025818, + "learning_rate": 4.872297733554654e-06, + "loss": 1.9818, + "step": 20344 + }, + { + "epoch": 0.68, + "grad_norm": 0.7342893481254578, + "learning_rate": 4.871385241422286e-06, + "loss": 1.9928, + "step": 20345 + }, + { + "epoch": 0.68, + "grad_norm": 0.7282729744911194, + "learning_rate": 4.870472807230005e-06, + "loss": 2.0053, + "step": 20346 + }, + { + "epoch": 0.68, + "grad_norm": 0.7584148049354553, + "learning_rate": 4.869560430988119e-06, + "loss": 2.1086, + "step": 20347 + }, + { + "epoch": 0.68, + "grad_norm": 0.7623714208602905, + "learning_rate": 4.8686481127069255e-06, + "loss": 2.0209, + "step": 20348 + }, + { + "epoch": 0.68, + "grad_norm": 0.7448371648788452, + "learning_rate": 4.86773585239674e-06, + "loss": 2.0369, + "step": 20349 + }, + { + "epoch": 0.68, + "grad_norm": 0.7644042372703552, + "learning_rate": 4.86682365006787e-06, + "loss": 2.054, + "step": 20350 + }, + { + "epoch": 0.68, + "grad_norm": 0.7371789813041687, + "learning_rate": 4.865911505730615e-06, + "loss": 1.9901, + "step": 20351 + }, + { + "epoch": 0.68, + "grad_norm": 0.7283911108970642, + "learning_rate": 4.864999419395285e-06, + "loss": 2.0822, + "step": 20352 + }, + { + "epoch": 0.68, + "grad_norm": 0.748014509677887, + "learning_rate": 4.864087391072184e-06, + "loss": 2.0598, + "step": 20353 + }, + { + "epoch": 0.68, + "grad_norm": 0.759214460849762, + "learning_rate": 4.863175420771609e-06, + "loss": 2.0407, + "step": 20354 + }, + { + "epoch": 0.68, + "grad_norm": 0.7503107190132141, + "learning_rate": 4.86226350850387e-06, + "loss": 2.0753, + "step": 20355 + }, + { + "epoch": 0.68, + "grad_norm": 0.7295525074005127, + "learning_rate": 4.861351654279272e-06, + "loss": 2.0302, + "step": 20356 + }, + { + "epoch": 0.68, + "grad_norm": 0.7519281506538391, + "learning_rate": 4.860439858108104e-06, + "loss": 2.0355, + "step": 20357 + }, + { + "epoch": 0.68, + "grad_norm": 0.7402242422103882, + "learning_rate": 4.859528120000675e-06, + "loss": 2.1186, + "step": 20358 + }, + { + "epoch": 0.68, + "grad_norm": 0.7495577931404114, + "learning_rate": 4.8586164399672875e-06, + "loss": 2.0934, + "step": 20359 + }, + { + "epoch": 0.68, + "grad_norm": 0.7431715726852417, + "learning_rate": 4.857704818018235e-06, + "loss": 2.0549, + "step": 20360 + }, + { + "epoch": 0.68, + "grad_norm": 0.7669147849082947, + "learning_rate": 4.856793254163824e-06, + "loss": 2.0875, + "step": 20361 + }, + { + "epoch": 0.68, + "grad_norm": 0.7521370649337769, + "learning_rate": 4.85588174841435e-06, + "loss": 2.0338, + "step": 20362 + }, + { + "epoch": 0.68, + "grad_norm": 0.7257447838783264, + "learning_rate": 4.854970300780103e-06, + "loss": 2.1013, + "step": 20363 + }, + { + "epoch": 0.68, + "grad_norm": 0.7225533723831177, + "learning_rate": 4.854058911271387e-06, + "loss": 1.9696, + "step": 20364 + }, + { + "epoch": 0.68, + "grad_norm": 0.7443277835845947, + "learning_rate": 4.853147579898502e-06, + "loss": 2.0066, + "step": 20365 + }, + { + "epoch": 0.68, + "grad_norm": 0.7521112561225891, + "learning_rate": 4.852236306671738e-06, + "loss": 2.1167, + "step": 20366 + }, + { + "epoch": 0.68, + "grad_norm": 0.7702720761299133, + "learning_rate": 4.851325091601388e-06, + "loss": 2.0378, + "step": 20367 + }, + { + "epoch": 0.68, + "grad_norm": 0.7443459630012512, + "learning_rate": 4.850413934697755e-06, + "loss": 1.99, + "step": 20368 + }, + { + "epoch": 0.68, + "grad_norm": 0.7265153527259827, + "learning_rate": 4.8495028359711226e-06, + "loss": 2.0596, + "step": 20369 + }, + { + "epoch": 0.68, + "grad_norm": 0.7205891609191895, + "learning_rate": 4.848591795431792e-06, + "loss": 1.9984, + "step": 20370 + }, + { + "epoch": 0.68, + "grad_norm": 0.7610408663749695, + "learning_rate": 4.847680813090049e-06, + "loss": 2.0627, + "step": 20371 + }, + { + "epoch": 0.68, + "grad_norm": 0.7298734784126282, + "learning_rate": 4.846769888956192e-06, + "loss": 2.0032, + "step": 20372 + }, + { + "epoch": 0.68, + "grad_norm": 0.7168726325035095, + "learning_rate": 4.845859023040506e-06, + "loss": 2.0882, + "step": 20373 + }, + { + "epoch": 0.68, + "grad_norm": 0.7291662096977234, + "learning_rate": 4.8449482153532865e-06, + "loss": 2.0484, + "step": 20374 + }, + { + "epoch": 0.68, + "grad_norm": 0.7627370953559875, + "learning_rate": 4.844037465904821e-06, + "loss": 2.0183, + "step": 20375 + }, + { + "epoch": 0.68, + "grad_norm": 0.7389827966690063, + "learning_rate": 4.843126774705396e-06, + "loss": 2.0617, + "step": 20376 + }, + { + "epoch": 0.68, + "grad_norm": 0.742956817150116, + "learning_rate": 4.842216141765301e-06, + "loss": 2.0683, + "step": 20377 + }, + { + "epoch": 0.68, + "grad_norm": 0.750640332698822, + "learning_rate": 4.841305567094834e-06, + "loss": 2.0704, + "step": 20378 + }, + { + "epoch": 0.68, + "grad_norm": 0.7446610331535339, + "learning_rate": 4.840395050704266e-06, + "loss": 2.0673, + "step": 20379 + }, + { + "epoch": 0.68, + "grad_norm": 0.7661430835723877, + "learning_rate": 4.8394845926038905e-06, + "loss": 2.0234, + "step": 20380 + }, + { + "epoch": 0.68, + "grad_norm": 0.7378416657447815, + "learning_rate": 4.838574192803996e-06, + "loss": 2.0595, + "step": 20381 + }, + { + "epoch": 0.68, + "grad_norm": 0.7276251912117004, + "learning_rate": 4.837663851314863e-06, + "loss": 2.047, + "step": 20382 + }, + { + "epoch": 0.68, + "grad_norm": 0.7150817513465881, + "learning_rate": 4.8367535681467825e-06, + "loss": 2.0456, + "step": 20383 + }, + { + "epoch": 0.68, + "grad_norm": 0.7495589256286621, + "learning_rate": 4.835843343310034e-06, + "loss": 2.0572, + "step": 20384 + }, + { + "epoch": 0.68, + "grad_norm": 0.7423121333122253, + "learning_rate": 4.834933176814897e-06, + "loss": 1.9679, + "step": 20385 + }, + { + "epoch": 0.68, + "grad_norm": 0.7680132985115051, + "learning_rate": 4.834023068671658e-06, + "loss": 2.1354, + "step": 20386 + }, + { + "epoch": 0.68, + "grad_norm": 0.7638832330703735, + "learning_rate": 4.8331130188906026e-06, + "loss": 2.0261, + "step": 20387 + }, + { + "epoch": 0.68, + "grad_norm": 0.742345929145813, + "learning_rate": 4.832203027482008e-06, + "loss": 2.052, + "step": 20388 + }, + { + "epoch": 0.68, + "grad_norm": 0.7553034424781799, + "learning_rate": 4.8312930944561505e-06, + "loss": 2.0596, + "step": 20389 + }, + { + "epoch": 0.68, + "grad_norm": 0.7282969951629639, + "learning_rate": 4.830383219823319e-06, + "loss": 2.0171, + "step": 20390 + }, + { + "epoch": 0.68, + "grad_norm": 0.7771686911582947, + "learning_rate": 4.829473403593785e-06, + "loss": 1.9674, + "step": 20391 + }, + { + "epoch": 0.68, + "grad_norm": 0.7629665732383728, + "learning_rate": 4.828563645777834e-06, + "loss": 2.0723, + "step": 20392 + }, + { + "epoch": 0.68, + "grad_norm": 0.7780708074569702, + "learning_rate": 4.827653946385735e-06, + "loss": 2.1162, + "step": 20393 + }, + { + "epoch": 0.68, + "grad_norm": 0.8334059119224548, + "learning_rate": 4.826744305427775e-06, + "loss": 2.0355, + "step": 20394 + }, + { + "epoch": 0.68, + "grad_norm": 0.7552018165588379, + "learning_rate": 4.825834722914222e-06, + "loss": 2.065, + "step": 20395 + }, + { + "epoch": 0.68, + "grad_norm": 0.7361000776290894, + "learning_rate": 4.8249251988553604e-06, + "loss": 2.0541, + "step": 20396 + }, + { + "epoch": 0.68, + "grad_norm": 0.7520321607589722, + "learning_rate": 4.824015733261461e-06, + "loss": 2.1027, + "step": 20397 + }, + { + "epoch": 0.68, + "grad_norm": 0.739631175994873, + "learning_rate": 4.823106326142794e-06, + "loss": 2.0566, + "step": 20398 + }, + { + "epoch": 0.68, + "grad_norm": 0.7471319437026978, + "learning_rate": 4.822196977509637e-06, + "loss": 2.0837, + "step": 20399 + }, + { + "epoch": 0.68, + "grad_norm": 0.7342908382415771, + "learning_rate": 4.82128768737227e-06, + "loss": 1.9992, + "step": 20400 + }, + { + "epoch": 0.68, + "grad_norm": 0.7498869895935059, + "learning_rate": 4.820378455740958e-06, + "loss": 2.042, + "step": 20401 + }, + { + "epoch": 0.68, + "grad_norm": 0.7480858564376831, + "learning_rate": 4.819469282625971e-06, + "loss": 1.9531, + "step": 20402 + }, + { + "epoch": 0.68, + "grad_norm": 0.7311519384384155, + "learning_rate": 4.818560168037589e-06, + "loss": 2.07, + "step": 20403 + }, + { + "epoch": 0.68, + "grad_norm": 0.7690201997756958, + "learning_rate": 4.817651111986073e-06, + "loss": 2.1156, + "step": 20404 + }, + { + "epoch": 0.68, + "grad_norm": 0.7537568211555481, + "learning_rate": 4.816742114481702e-06, + "loss": 2.0254, + "step": 20405 + }, + { + "epoch": 0.68, + "grad_norm": 0.7524348497390747, + "learning_rate": 4.815833175534736e-06, + "loss": 2.0702, + "step": 20406 + }, + { + "epoch": 0.68, + "grad_norm": 0.7387667298316956, + "learning_rate": 4.814924295155453e-06, + "loss": 2.0797, + "step": 20407 + }, + { + "epoch": 0.68, + "grad_norm": 0.7292746305465698, + "learning_rate": 4.814015473354112e-06, + "loss": 2.0995, + "step": 20408 + }, + { + "epoch": 0.68, + "grad_norm": 0.7267752289772034, + "learning_rate": 4.81310671014099e-06, + "loss": 2.0697, + "step": 20409 + }, + { + "epoch": 0.68, + "grad_norm": 0.723987877368927, + "learning_rate": 4.812198005526348e-06, + "loss": 1.9733, + "step": 20410 + }, + { + "epoch": 0.68, + "grad_norm": 0.7221235036849976, + "learning_rate": 4.811289359520448e-06, + "loss": 2.0677, + "step": 20411 + }, + { + "epoch": 0.68, + "grad_norm": 0.7846252918243408, + "learning_rate": 4.810380772133561e-06, + "loss": 2.0519, + "step": 20412 + }, + { + "epoch": 0.68, + "grad_norm": 0.7510960698127747, + "learning_rate": 4.8094722433759535e-06, + "loss": 2.057, + "step": 20413 + }, + { + "epoch": 0.68, + "grad_norm": 0.7808570861816406, + "learning_rate": 4.8085637732578874e-06, + "loss": 1.9589, + "step": 20414 + }, + { + "epoch": 0.68, + "grad_norm": 0.7436812520027161, + "learning_rate": 4.80765536178962e-06, + "loss": 2.0733, + "step": 20415 + }, + { + "epoch": 0.68, + "grad_norm": 0.7322107553482056, + "learning_rate": 4.806747008981425e-06, + "loss": 2.0411, + "step": 20416 + }, + { + "epoch": 0.68, + "grad_norm": 0.7608382105827332, + "learning_rate": 4.8058387148435544e-06, + "loss": 2.0774, + "step": 20417 + }, + { + "epoch": 0.68, + "grad_norm": 0.7409543991088867, + "learning_rate": 4.804930479386278e-06, + "loss": 2.0456, + "step": 20418 + }, + { + "epoch": 0.68, + "grad_norm": 0.7738876342773438, + "learning_rate": 4.804022302619852e-06, + "loss": 2.0681, + "step": 20419 + }, + { + "epoch": 0.68, + "grad_norm": 0.7454972863197327, + "learning_rate": 4.8031141845545326e-06, + "loss": 2.0065, + "step": 20420 + }, + { + "epoch": 0.68, + "grad_norm": 0.7192872762680054, + "learning_rate": 4.802206125200585e-06, + "loss": 2.0718, + "step": 20421 + }, + { + "epoch": 0.68, + "grad_norm": 0.7515000700950623, + "learning_rate": 4.801298124568269e-06, + "loss": 2.0854, + "step": 20422 + }, + { + "epoch": 0.68, + "grad_norm": 0.7728748917579651, + "learning_rate": 4.80039018266784e-06, + "loss": 2.0832, + "step": 20423 + }, + { + "epoch": 0.68, + "grad_norm": 0.7670896053314209, + "learning_rate": 4.799482299509551e-06, + "loss": 2.0419, + "step": 20424 + }, + { + "epoch": 0.68, + "grad_norm": 0.7562118172645569, + "learning_rate": 4.798574475103669e-06, + "loss": 2.0281, + "step": 20425 + }, + { + "epoch": 0.68, + "grad_norm": 0.7317204475402832, + "learning_rate": 4.797666709460439e-06, + "loss": 2.0709, + "step": 20426 + }, + { + "epoch": 0.68, + "grad_norm": 0.758247971534729, + "learning_rate": 4.796759002590126e-06, + "loss": 2.0628, + "step": 20427 + }, + { + "epoch": 0.68, + "grad_norm": 0.723442554473877, + "learning_rate": 4.795851354502976e-06, + "loss": 2.0266, + "step": 20428 + }, + { + "epoch": 0.68, + "grad_norm": 0.7600600123405457, + "learning_rate": 4.794943765209251e-06, + "loss": 2.0312, + "step": 20429 + }, + { + "epoch": 0.68, + "grad_norm": 0.7232041358947754, + "learning_rate": 4.794036234719198e-06, + "loss": 2.0026, + "step": 20430 + }, + { + "epoch": 0.68, + "grad_norm": 0.7174778580665588, + "learning_rate": 4.793128763043077e-06, + "loss": 2.0214, + "step": 20431 + }, + { + "epoch": 0.68, + "grad_norm": 0.724126398563385, + "learning_rate": 4.792221350191134e-06, + "loss": 2.0104, + "step": 20432 + }, + { + "epoch": 0.68, + "grad_norm": 0.7235143184661865, + "learning_rate": 4.791313996173619e-06, + "loss": 2.0621, + "step": 20433 + }, + { + "epoch": 0.68, + "grad_norm": 0.7409482002258301, + "learning_rate": 4.790406701000786e-06, + "loss": 2.0634, + "step": 20434 + }, + { + "epoch": 0.68, + "grad_norm": 0.759099543094635, + "learning_rate": 4.789499464682889e-06, + "loss": 2.0579, + "step": 20435 + }, + { + "epoch": 0.68, + "grad_norm": 0.7290852665901184, + "learning_rate": 4.7885922872301734e-06, + "loss": 2.0414, + "step": 20436 + }, + { + "epoch": 0.68, + "grad_norm": 0.7448917031288147, + "learning_rate": 4.787685168652883e-06, + "loss": 2.0277, + "step": 20437 + }, + { + "epoch": 0.68, + "grad_norm": 0.7272475361824036, + "learning_rate": 4.786778108961277e-06, + "loss": 2.0372, + "step": 20438 + }, + { + "epoch": 0.68, + "grad_norm": 0.7505134344100952, + "learning_rate": 4.785871108165591e-06, + "loss": 2.0067, + "step": 20439 + }, + { + "epoch": 0.68, + "grad_norm": 0.7514858841896057, + "learning_rate": 4.784964166276082e-06, + "loss": 2.1668, + "step": 20440 + }, + { + "epoch": 0.68, + "grad_norm": 0.7385721206665039, + "learning_rate": 4.784057283302991e-06, + "loss": 2.0606, + "step": 20441 + }, + { + "epoch": 0.68, + "grad_norm": 0.7280479073524475, + "learning_rate": 4.7831504592565605e-06, + "loss": 2.0884, + "step": 20442 + }, + { + "epoch": 0.68, + "grad_norm": 0.7752751111984253, + "learning_rate": 4.782243694147038e-06, + "loss": 2.1108, + "step": 20443 + }, + { + "epoch": 0.68, + "grad_norm": 0.7416374087333679, + "learning_rate": 4.781336987984672e-06, + "loss": 2.0289, + "step": 20444 + }, + { + "epoch": 0.68, + "grad_norm": 0.7601982951164246, + "learning_rate": 4.780430340779705e-06, + "loss": 1.9475, + "step": 20445 + }, + { + "epoch": 0.68, + "grad_norm": 0.7930278182029724, + "learning_rate": 4.779523752542371e-06, + "loss": 2.1037, + "step": 20446 + }, + { + "epoch": 0.68, + "grad_norm": 0.7413278818130493, + "learning_rate": 4.778617223282922e-06, + "loss": 2.0806, + "step": 20447 + }, + { + "epoch": 0.68, + "grad_norm": 0.7162112593650818, + "learning_rate": 4.777710753011592e-06, + "loss": 2.1349, + "step": 20448 + }, + { + "epoch": 0.68, + "grad_norm": 0.750184178352356, + "learning_rate": 4.776804341738629e-06, + "loss": 2.0645, + "step": 20449 + }, + { + "epoch": 0.68, + "grad_norm": 0.7359859943389893, + "learning_rate": 4.775897989474266e-06, + "loss": 2.0391, + "step": 20450 + }, + { + "epoch": 0.68, + "grad_norm": 0.7356467247009277, + "learning_rate": 4.774991696228749e-06, + "loss": 1.9737, + "step": 20451 + }, + { + "epoch": 0.68, + "grad_norm": 0.7214052081108093, + "learning_rate": 4.774085462012311e-06, + "loss": 2.0512, + "step": 20452 + }, + { + "epoch": 0.68, + "grad_norm": 0.7331635355949402, + "learning_rate": 4.773179286835196e-06, + "loss": 2.0202, + "step": 20453 + }, + { + "epoch": 0.68, + "grad_norm": 0.7434374094009399, + "learning_rate": 4.7722731707076375e-06, + "loss": 2.0918, + "step": 20454 + }, + { + "epoch": 0.68, + "grad_norm": 0.7501091361045837, + "learning_rate": 4.77136711363987e-06, + "loss": 2.0839, + "step": 20455 + }, + { + "epoch": 0.68, + "grad_norm": 0.764359176158905, + "learning_rate": 4.770461115642133e-06, + "loss": 2.1057, + "step": 20456 + }, + { + "epoch": 0.68, + "grad_norm": 0.7278562784194946, + "learning_rate": 4.769555176724664e-06, + "loss": 2.017, + "step": 20457 + }, + { + "epoch": 0.68, + "grad_norm": 0.7361772656440735, + "learning_rate": 4.768649296897696e-06, + "loss": 2.0697, + "step": 20458 + }, + { + "epoch": 0.68, + "grad_norm": 0.7353608012199402, + "learning_rate": 4.767743476171459e-06, + "loss": 2.0007, + "step": 20459 + }, + { + "epoch": 0.68, + "grad_norm": 0.737897515296936, + "learning_rate": 4.766837714556193e-06, + "loss": 2.0216, + "step": 20460 + }, + { + "epoch": 0.68, + "grad_norm": 0.7516582608222961, + "learning_rate": 4.765932012062124e-06, + "loss": 2.0516, + "step": 20461 + }, + { + "epoch": 0.68, + "grad_norm": 0.7193740606307983, + "learning_rate": 4.765026368699488e-06, + "loss": 2.0644, + "step": 20462 + }, + { + "epoch": 0.68, + "grad_norm": 0.7444588541984558, + "learning_rate": 4.7641207844785235e-06, + "loss": 2.0242, + "step": 20463 + }, + { + "epoch": 0.68, + "grad_norm": 0.7530665993690491, + "learning_rate": 4.763215259409445e-06, + "loss": 2.0757, + "step": 20464 + }, + { + "epoch": 0.68, + "grad_norm": 0.751469075679779, + "learning_rate": 4.762309793502493e-06, + "loss": 1.9776, + "step": 20465 + }, + { + "epoch": 0.68, + "grad_norm": 0.7186638116836548, + "learning_rate": 4.761404386767898e-06, + "loss": 2.0801, + "step": 20466 + }, + { + "epoch": 0.68, + "grad_norm": 0.7297989130020142, + "learning_rate": 4.760499039215887e-06, + "loss": 2.0574, + "step": 20467 + }, + { + "epoch": 0.68, + "grad_norm": 0.7413662075996399, + "learning_rate": 4.759593750856684e-06, + "loss": 2.0585, + "step": 20468 + }, + { + "epoch": 0.68, + "grad_norm": 0.7596720457077026, + "learning_rate": 4.758688521700522e-06, + "loss": 2.0655, + "step": 20469 + }, + { + "epoch": 0.68, + "grad_norm": 0.7487229108810425, + "learning_rate": 4.7577833517576225e-06, + "loss": 2.0336, + "step": 20470 + }, + { + "epoch": 0.68, + "grad_norm": 0.7519277930259705, + "learning_rate": 4.756878241038218e-06, + "loss": 2.0469, + "step": 20471 + }, + { + "epoch": 0.68, + "grad_norm": 0.7618056535720825, + "learning_rate": 4.755973189552526e-06, + "loss": 2.0758, + "step": 20472 + }, + { + "epoch": 0.68, + "grad_norm": 0.7563413381576538, + "learning_rate": 4.755068197310779e-06, + "loss": 1.9951, + "step": 20473 + }, + { + "epoch": 0.68, + "grad_norm": 0.7489362955093384, + "learning_rate": 4.754163264323195e-06, + "loss": 2.0276, + "step": 20474 + }, + { + "epoch": 0.68, + "grad_norm": 0.762759804725647, + "learning_rate": 4.753258390600004e-06, + "loss": 2.0855, + "step": 20475 + }, + { + "epoch": 0.68, + "grad_norm": 0.7637871503829956, + "learning_rate": 4.752353576151425e-06, + "loss": 1.9752, + "step": 20476 + }, + { + "epoch": 0.68, + "grad_norm": 0.7573428153991699, + "learning_rate": 4.7514488209876756e-06, + "loss": 2.0404, + "step": 20477 + }, + { + "epoch": 0.68, + "grad_norm": 0.7467048764228821, + "learning_rate": 4.750544125118981e-06, + "loss": 2.0644, + "step": 20478 + }, + { + "epoch": 0.68, + "grad_norm": 0.7243214845657349, + "learning_rate": 4.749639488555567e-06, + "loss": 2.0781, + "step": 20479 + }, + { + "epoch": 0.68, + "grad_norm": 0.714649498462677, + "learning_rate": 4.7487349113076475e-06, + "loss": 2.0241, + "step": 20480 + }, + { + "epoch": 0.68, + "grad_norm": 0.7398096919059753, + "learning_rate": 4.747830393385441e-06, + "loss": 2.0858, + "step": 20481 + }, + { + "epoch": 0.68, + "grad_norm": 0.7399649620056152, + "learning_rate": 4.746925934799173e-06, + "loss": 2.0609, + "step": 20482 + }, + { + "epoch": 0.68, + "grad_norm": 0.7404807209968567, + "learning_rate": 4.746021535559053e-06, + "loss": 2.0778, + "step": 20483 + }, + { + "epoch": 0.68, + "grad_norm": 0.7340667843818665, + "learning_rate": 4.745117195675301e-06, + "loss": 2.0773, + "step": 20484 + }, + { + "epoch": 0.68, + "grad_norm": 0.7648415565490723, + "learning_rate": 4.744212915158144e-06, + "loss": 2.0674, + "step": 20485 + }, + { + "epoch": 0.68, + "grad_norm": 0.7472794651985168, + "learning_rate": 4.743308694017782e-06, + "loss": 2.0145, + "step": 20486 + }, + { + "epoch": 0.68, + "grad_norm": 0.7433467507362366, + "learning_rate": 4.742404532264437e-06, + "loss": 2.0904, + "step": 20487 + }, + { + "epoch": 0.68, + "grad_norm": 0.7263256907463074, + "learning_rate": 4.741500429908328e-06, + "loss": 2.1474, + "step": 20488 + }, + { + "epoch": 0.68, + "grad_norm": 0.7645857334136963, + "learning_rate": 4.740596386959666e-06, + "loss": 2.115, + "step": 20489 + }, + { + "epoch": 0.68, + "grad_norm": 0.730527937412262, + "learning_rate": 4.73969240342866e-06, + "loss": 1.9591, + "step": 20490 + }, + { + "epoch": 0.68, + "grad_norm": 0.7711911797523499, + "learning_rate": 4.7387884793255305e-06, + "loss": 2.0992, + "step": 20491 + }, + { + "epoch": 0.68, + "grad_norm": 0.7540721893310547, + "learning_rate": 4.737884614660481e-06, + "loss": 2.033, + "step": 20492 + }, + { + "epoch": 0.68, + "grad_norm": 0.750450611114502, + "learning_rate": 4.7369808094437265e-06, + "loss": 2.0707, + "step": 20493 + }, + { + "epoch": 0.68, + "grad_norm": 0.7450700402259827, + "learning_rate": 4.736077063685482e-06, + "loss": 2.1053, + "step": 20494 + }, + { + "epoch": 0.68, + "grad_norm": 0.7187801003456116, + "learning_rate": 4.735173377395955e-06, + "loss": 2.0051, + "step": 20495 + }, + { + "epoch": 0.68, + "grad_norm": 0.7522356510162354, + "learning_rate": 4.734269750585351e-06, + "loss": 2.0737, + "step": 20496 + }, + { + "epoch": 0.68, + "grad_norm": 0.7331739664077759, + "learning_rate": 4.733366183263879e-06, + "loss": 2.129, + "step": 20497 + }, + { + "epoch": 0.68, + "grad_norm": 0.7554284334182739, + "learning_rate": 4.7324626754417576e-06, + "loss": 2.0646, + "step": 20498 + }, + { + "epoch": 0.68, + "grad_norm": 0.7666828632354736, + "learning_rate": 4.731559227129179e-06, + "loss": 2.0682, + "step": 20499 + }, + { + "epoch": 0.68, + "grad_norm": 0.7296462059020996, + "learning_rate": 4.730655838336356e-06, + "loss": 2.0792, + "step": 20500 + }, + { + "epoch": 0.68, + "grad_norm": 0.7414780855178833, + "learning_rate": 4.7297525090735e-06, + "loss": 1.9611, + "step": 20501 + }, + { + "epoch": 0.68, + "grad_norm": 0.7634021639823914, + "learning_rate": 4.7288492393508105e-06, + "loss": 2.0707, + "step": 20502 + }, + { + "epoch": 0.68, + "grad_norm": 0.712399423122406, + "learning_rate": 4.727946029178489e-06, + "loss": 2.0153, + "step": 20503 + }, + { + "epoch": 0.68, + "grad_norm": 0.7428499460220337, + "learning_rate": 4.727042878566748e-06, + "loss": 2.0316, + "step": 20504 + }, + { + "epoch": 0.68, + "grad_norm": 0.772386908531189, + "learning_rate": 4.726139787525782e-06, + "loss": 2.0018, + "step": 20505 + }, + { + "epoch": 0.68, + "grad_norm": 0.7363370060920715, + "learning_rate": 4.725236756065798e-06, + "loss": 2.0829, + "step": 20506 + }, + { + "epoch": 0.68, + "grad_norm": 0.7690001130104065, + "learning_rate": 4.724333784197002e-06, + "loss": 2.0778, + "step": 20507 + }, + { + "epoch": 0.68, + "grad_norm": 0.7524162530899048, + "learning_rate": 4.723430871929591e-06, + "loss": 2.074, + "step": 20508 + }, + { + "epoch": 0.68, + "grad_norm": 0.7658815383911133, + "learning_rate": 4.722528019273762e-06, + "loss": 2.1016, + "step": 20509 + }, + { + "epoch": 0.68, + "grad_norm": 0.7552663087844849, + "learning_rate": 4.721625226239721e-06, + "loss": 2.0229, + "step": 20510 + }, + { + "epoch": 0.68, + "grad_norm": 0.7261641025543213, + "learning_rate": 4.720722492837666e-06, + "loss": 2.0887, + "step": 20511 + }, + { + "epoch": 0.68, + "grad_norm": 0.7457427382469177, + "learning_rate": 4.719819819077791e-06, + "loss": 2.0321, + "step": 20512 + }, + { + "epoch": 0.68, + "grad_norm": 0.7392370700836182, + "learning_rate": 4.718917204970296e-06, + "loss": 1.9621, + "step": 20513 + }, + { + "epoch": 0.68, + "grad_norm": 0.7675729393959045, + "learning_rate": 4.718014650525384e-06, + "loss": 2.0704, + "step": 20514 + }, + { + "epoch": 0.68, + "grad_norm": 0.7457990050315857, + "learning_rate": 4.717112155753243e-06, + "loss": 2.0906, + "step": 20515 + }, + { + "epoch": 0.68, + "grad_norm": 0.7620640397071838, + "learning_rate": 4.716209720664076e-06, + "loss": 2.0682, + "step": 20516 + }, + { + "epoch": 0.68, + "grad_norm": 0.751504123210907, + "learning_rate": 4.715307345268075e-06, + "loss": 2.031, + "step": 20517 + }, + { + "epoch": 0.68, + "grad_norm": 0.7397680282592773, + "learning_rate": 4.7144050295754315e-06, + "loss": 2.0697, + "step": 20518 + }, + { + "epoch": 0.68, + "grad_norm": 0.7322844862937927, + "learning_rate": 4.713502773596342e-06, + "loss": 2.054, + "step": 20519 + }, + { + "epoch": 0.68, + "grad_norm": 0.744929850101471, + "learning_rate": 4.712600577341008e-06, + "loss": 2.0255, + "step": 20520 + }, + { + "epoch": 0.68, + "grad_norm": 0.7625748515129089, + "learning_rate": 4.711698440819606e-06, + "loss": 2.0862, + "step": 20521 + }, + { + "epoch": 0.68, + "grad_norm": 0.7595180869102478, + "learning_rate": 4.7107963640423345e-06, + "loss": 2.0425, + "step": 20522 + }, + { + "epoch": 0.68, + "grad_norm": 0.7510827779769897, + "learning_rate": 4.7098943470193915e-06, + "loss": 2.0521, + "step": 20523 + }, + { + "epoch": 0.68, + "grad_norm": 0.7614597082138062, + "learning_rate": 4.70899238976096e-06, + "loss": 1.9971, + "step": 20524 + }, + { + "epoch": 0.68, + "grad_norm": 0.725976288318634, + "learning_rate": 4.708090492277229e-06, + "loss": 2.0315, + "step": 20525 + }, + { + "epoch": 0.68, + "grad_norm": 0.7496153116226196, + "learning_rate": 4.707188654578395e-06, + "loss": 2.0728, + "step": 20526 + }, + { + "epoch": 0.68, + "grad_norm": 0.7535601854324341, + "learning_rate": 4.706286876674636e-06, + "loss": 2.0798, + "step": 20527 + }, + { + "epoch": 0.68, + "grad_norm": 0.7332228422164917, + "learning_rate": 4.705385158576146e-06, + "loss": 2.0031, + "step": 20528 + }, + { + "epoch": 0.68, + "grad_norm": 0.754009485244751, + "learning_rate": 4.704483500293117e-06, + "loss": 2.0683, + "step": 20529 + }, + { + "epoch": 0.68, + "grad_norm": 0.7289254665374756, + "learning_rate": 4.703581901835729e-06, + "loss": 2.0641, + "step": 20530 + }, + { + "epoch": 0.68, + "grad_norm": 0.7583450078964233, + "learning_rate": 4.702680363214164e-06, + "loss": 2.0636, + "step": 20531 + }, + { + "epoch": 0.68, + "grad_norm": 0.723491907119751, + "learning_rate": 4.701778884438616e-06, + "loss": 2.1046, + "step": 20532 + }, + { + "epoch": 0.68, + "grad_norm": 0.7596973180770874, + "learning_rate": 4.700877465519264e-06, + "loss": 2.0808, + "step": 20533 + }, + { + "epoch": 0.68, + "grad_norm": 0.71733158826828, + "learning_rate": 4.699976106466291e-06, + "loss": 2.0931, + "step": 20534 + }, + { + "epoch": 0.68, + "grad_norm": 0.7321738600730896, + "learning_rate": 4.69907480728988e-06, + "loss": 2.0512, + "step": 20535 + }, + { + "epoch": 0.68, + "grad_norm": 0.733470618724823, + "learning_rate": 4.69817356800022e-06, + "loss": 2.0778, + "step": 20536 + }, + { + "epoch": 0.68, + "grad_norm": 0.7388619780540466, + "learning_rate": 4.6972723886074845e-06, + "loss": 2.0839, + "step": 20537 + }, + { + "epoch": 0.68, + "grad_norm": 0.7670189142227173, + "learning_rate": 4.696371269121862e-06, + "loss": 2.1255, + "step": 20538 + }, + { + "epoch": 0.68, + "grad_norm": 0.7171437740325928, + "learning_rate": 4.6954702095535276e-06, + "loss": 2.1051, + "step": 20539 + }, + { + "epoch": 0.68, + "grad_norm": 0.7075817584991455, + "learning_rate": 4.694569209912658e-06, + "loss": 2.1092, + "step": 20540 + }, + { + "epoch": 0.68, + "grad_norm": 0.7409306764602661, + "learning_rate": 4.693668270209437e-06, + "loss": 2.0543, + "step": 20541 + }, + { + "epoch": 0.68, + "grad_norm": 0.782078206539154, + "learning_rate": 4.692767390454049e-06, + "loss": 2.0682, + "step": 20542 + }, + { + "epoch": 0.68, + "grad_norm": 0.7323274612426758, + "learning_rate": 4.691866570656658e-06, + "loss": 2.0801, + "step": 20543 + }, + { + "epoch": 0.68, + "grad_norm": 0.7530894875526428, + "learning_rate": 4.690965810827447e-06, + "loss": 2.0167, + "step": 20544 + }, + { + "epoch": 0.68, + "grad_norm": 0.7447795271873474, + "learning_rate": 4.690065110976596e-06, + "loss": 1.9928, + "step": 20545 + }, + { + "epoch": 0.68, + "grad_norm": 0.7484596967697144, + "learning_rate": 4.689164471114274e-06, + "loss": 2.0922, + "step": 20546 + }, + { + "epoch": 0.68, + "grad_norm": 0.7381933331489563, + "learning_rate": 4.688263891250664e-06, + "loss": 1.9893, + "step": 20547 + }, + { + "epoch": 0.68, + "grad_norm": 0.7429854273796082, + "learning_rate": 4.687363371395934e-06, + "loss": 2.0649, + "step": 20548 + }, + { + "epoch": 0.68, + "grad_norm": 0.7724708318710327, + "learning_rate": 4.686462911560257e-06, + "loss": 2.0202, + "step": 20549 + }, + { + "epoch": 0.68, + "grad_norm": 0.7481154203414917, + "learning_rate": 4.685562511753807e-06, + "loss": 2.0692, + "step": 20550 + }, + { + "epoch": 0.68, + "grad_norm": 0.7638412117958069, + "learning_rate": 4.6846621719867615e-06, + "loss": 2.0186, + "step": 20551 + }, + { + "epoch": 0.68, + "grad_norm": 0.7455574870109558, + "learning_rate": 4.683761892269287e-06, + "loss": 2.0613, + "step": 20552 + }, + { + "epoch": 0.68, + "grad_norm": 0.733113706111908, + "learning_rate": 4.682861672611553e-06, + "loss": 2.0565, + "step": 20553 + }, + { + "epoch": 0.68, + "grad_norm": 0.7891930341720581, + "learning_rate": 4.681961513023734e-06, + "loss": 2.0146, + "step": 20554 + }, + { + "epoch": 0.68, + "grad_norm": 0.7603203058242798, + "learning_rate": 4.681061413515997e-06, + "loss": 2.06, + "step": 20555 + }, + { + "epoch": 0.68, + "grad_norm": 0.7696518301963806, + "learning_rate": 4.680161374098508e-06, + "loss": 2.0463, + "step": 20556 + }, + { + "epoch": 0.68, + "grad_norm": 0.7396694421768188, + "learning_rate": 4.679261394781437e-06, + "loss": 2.098, + "step": 20557 + }, + { + "epoch": 0.68, + "grad_norm": 0.7369046807289124, + "learning_rate": 4.678361475574956e-06, + "loss": 1.9948, + "step": 20558 + }, + { + "epoch": 0.68, + "grad_norm": 0.752503514289856, + "learning_rate": 4.677461616489226e-06, + "loss": 2.0432, + "step": 20559 + }, + { + "epoch": 0.68, + "grad_norm": 0.7310574054718018, + "learning_rate": 4.676561817534419e-06, + "loss": 2.0153, + "step": 20560 + }, + { + "epoch": 0.68, + "grad_norm": 0.7470492720603943, + "learning_rate": 4.675662078720695e-06, + "loss": 2.0997, + "step": 20561 + }, + { + "epoch": 0.68, + "grad_norm": 0.7730416059494019, + "learning_rate": 4.674762400058218e-06, + "loss": 2.0878, + "step": 20562 + }, + { + "epoch": 0.68, + "grad_norm": 0.7962812781333923, + "learning_rate": 4.673862781557154e-06, + "loss": 2.1179, + "step": 20563 + }, + { + "epoch": 0.68, + "grad_norm": 0.7277517318725586, + "learning_rate": 4.672963223227676e-06, + "loss": 2.0518, + "step": 20564 + }, + { + "epoch": 0.68, + "grad_norm": 0.7481294870376587, + "learning_rate": 4.672063725079929e-06, + "loss": 2.0493, + "step": 20565 + }, + { + "epoch": 0.68, + "grad_norm": 0.7629480361938477, + "learning_rate": 4.671164287124083e-06, + "loss": 2.0815, + "step": 20566 + }, + { + "epoch": 0.68, + "grad_norm": 0.7178028225898743, + "learning_rate": 4.670264909370304e-06, + "loss": 2.0346, + "step": 20567 + }, + { + "epoch": 0.68, + "grad_norm": 0.7206147909164429, + "learning_rate": 4.669365591828744e-06, + "loss": 1.9999, + "step": 20568 + }, + { + "epoch": 0.68, + "grad_norm": 0.7332802414894104, + "learning_rate": 4.668466334509573e-06, + "loss": 1.981, + "step": 20569 + }, + { + "epoch": 0.68, + "grad_norm": 0.7353415489196777, + "learning_rate": 4.6675671374229436e-06, + "loss": 2.1159, + "step": 20570 + }, + { + "epoch": 0.68, + "grad_norm": 0.7349573373794556, + "learning_rate": 4.666668000579011e-06, + "loss": 2.0362, + "step": 20571 + }, + { + "epoch": 0.68, + "grad_norm": 0.7618395090103149, + "learning_rate": 4.665768923987939e-06, + "loss": 2.0612, + "step": 20572 + }, + { + "epoch": 0.68, + "grad_norm": 0.8040956854820251, + "learning_rate": 4.664869907659887e-06, + "loss": 2.0582, + "step": 20573 + }, + { + "epoch": 0.68, + "grad_norm": 0.7944275140762329, + "learning_rate": 4.663970951605008e-06, + "loss": 2.0161, + "step": 20574 + }, + { + "epoch": 0.68, + "grad_norm": 0.7462409138679504, + "learning_rate": 4.663072055833454e-06, + "loss": 2.0926, + "step": 20575 + }, + { + "epoch": 0.68, + "grad_norm": 0.7482702732086182, + "learning_rate": 4.662173220355389e-06, + "loss": 2.0514, + "step": 20576 + }, + { + "epoch": 0.68, + "grad_norm": 0.7561256289482117, + "learning_rate": 4.661274445180958e-06, + "loss": 2.0495, + "step": 20577 + }, + { + "epoch": 0.68, + "grad_norm": 0.7476239204406738, + "learning_rate": 4.6603757303203234e-06, + "loss": 2.0683, + "step": 20578 + }, + { + "epoch": 0.68, + "grad_norm": 0.771104633808136, + "learning_rate": 4.659477075783631e-06, + "loss": 2.0545, + "step": 20579 + }, + { + "epoch": 0.68, + "grad_norm": 0.7423458695411682, + "learning_rate": 4.65857848158104e-06, + "loss": 2.0359, + "step": 20580 + }, + { + "epoch": 0.68, + "grad_norm": 0.7229087352752686, + "learning_rate": 4.657679947722695e-06, + "loss": 2.076, + "step": 20581 + }, + { + "epoch": 0.68, + "grad_norm": 0.7289699912071228, + "learning_rate": 4.656781474218756e-06, + "loss": 2.0132, + "step": 20582 + }, + { + "epoch": 0.68, + "grad_norm": 0.7427715063095093, + "learning_rate": 4.655883061079367e-06, + "loss": 2.0275, + "step": 20583 + }, + { + "epoch": 0.68, + "grad_norm": 0.7307316660881042, + "learning_rate": 4.654984708314676e-06, + "loss": 2.022, + "step": 20584 + }, + { + "epoch": 0.68, + "grad_norm": 0.7169957160949707, + "learning_rate": 4.654086415934835e-06, + "loss": 2.001, + "step": 20585 + }, + { + "epoch": 0.68, + "grad_norm": 0.7443612813949585, + "learning_rate": 4.653188183950001e-06, + "loss": 2.0472, + "step": 20586 + }, + { + "epoch": 0.68, + "grad_norm": 0.7593010663986206, + "learning_rate": 4.652290012370305e-06, + "loss": 2.0624, + "step": 20587 + }, + { + "epoch": 0.68, + "grad_norm": 0.7444010972976685, + "learning_rate": 4.651391901205903e-06, + "loss": 2.1033, + "step": 20588 + }, + { + "epoch": 0.68, + "grad_norm": 0.764243483543396, + "learning_rate": 4.650493850466944e-06, + "loss": 2.0856, + "step": 20589 + }, + { + "epoch": 0.69, + "grad_norm": 0.754604697227478, + "learning_rate": 4.649595860163567e-06, + "loss": 2.0115, + "step": 20590 + }, + { + "epoch": 0.69, + "grad_norm": 0.740834653377533, + "learning_rate": 4.6486979303059245e-06, + "loss": 2.1185, + "step": 20591 + }, + { + "epoch": 0.69, + "grad_norm": 0.7251995801925659, + "learning_rate": 4.647800060904155e-06, + "loss": 2.0435, + "step": 20592 + }, + { + "epoch": 0.69, + "grad_norm": 0.7572815418243408, + "learning_rate": 4.646902251968402e-06, + "loss": 2.0285, + "step": 20593 + }, + { + "epoch": 0.69, + "grad_norm": 0.7676687836647034, + "learning_rate": 4.6460045035088085e-06, + "loss": 2.0819, + "step": 20594 + }, + { + "epoch": 0.69, + "grad_norm": 0.7294437289237976, + "learning_rate": 4.645106815535523e-06, + "loss": 2.0622, + "step": 20595 + }, + { + "epoch": 0.69, + "grad_norm": 0.7461565732955933, + "learning_rate": 4.644209188058683e-06, + "loss": 2.0477, + "step": 20596 + }, + { + "epoch": 0.69, + "grad_norm": 0.7593443393707275, + "learning_rate": 4.643311621088423e-06, + "loss": 2.0988, + "step": 20597 + }, + { + "epoch": 0.69, + "grad_norm": 0.7722288966178894, + "learning_rate": 4.642414114634891e-06, + "loss": 2.0459, + "step": 20598 + }, + { + "epoch": 0.69, + "grad_norm": 0.7255918383598328, + "learning_rate": 4.6415166687082265e-06, + "loss": 2.0974, + "step": 20599 + }, + { + "epoch": 0.69, + "grad_norm": 0.7778660655021667, + "learning_rate": 4.640619283318568e-06, + "loss": 2.0836, + "step": 20600 + }, + { + "epoch": 0.69, + "grad_norm": 0.7646399140357971, + "learning_rate": 4.639721958476047e-06, + "loss": 2.0945, + "step": 20601 + }, + { + "epoch": 0.69, + "grad_norm": 0.7390388250350952, + "learning_rate": 4.63882469419081e-06, + "loss": 2.1229, + "step": 20602 + }, + { + "epoch": 0.69, + "grad_norm": 0.741004467010498, + "learning_rate": 4.637927490472986e-06, + "loss": 2.0102, + "step": 20603 + }, + { + "epoch": 0.69, + "grad_norm": 0.7691118121147156, + "learning_rate": 4.637030347332719e-06, + "loss": 2.069, + "step": 20604 + }, + { + "epoch": 0.69, + "grad_norm": 0.7195420265197754, + "learning_rate": 4.636133264780139e-06, + "loss": 2.0681, + "step": 20605 + }, + { + "epoch": 0.69, + "grad_norm": 0.7475249171257019, + "learning_rate": 4.635236242825379e-06, + "loss": 1.9803, + "step": 20606 + }, + { + "epoch": 0.69, + "grad_norm": 0.7600256204605103, + "learning_rate": 4.634339281478575e-06, + "loss": 2.1122, + "step": 20607 + }, + { + "epoch": 0.69, + "grad_norm": 0.7564387321472168, + "learning_rate": 4.633442380749865e-06, + "loss": 2.0599, + "step": 20608 + }, + { + "epoch": 0.69, + "grad_norm": 0.7315038442611694, + "learning_rate": 4.632545540649379e-06, + "loss": 1.9785, + "step": 20609 + }, + { + "epoch": 0.69, + "grad_norm": 0.7411050796508789, + "learning_rate": 4.6316487611872426e-06, + "loss": 2.0009, + "step": 20610 + }, + { + "epoch": 0.69, + "grad_norm": 0.7475032210350037, + "learning_rate": 4.6307520423735975e-06, + "loss": 1.9819, + "step": 20611 + }, + { + "epoch": 0.69, + "grad_norm": 0.7271531224250793, + "learning_rate": 4.6298553842185644e-06, + "loss": 2.0071, + "step": 20612 + }, + { + "epoch": 0.69, + "grad_norm": 0.7877135872840881, + "learning_rate": 4.628958786732283e-06, + "loss": 2.0297, + "step": 20613 + }, + { + "epoch": 0.69, + "grad_norm": 0.7407258152961731, + "learning_rate": 4.628062249924873e-06, + "loss": 2.0624, + "step": 20614 + }, + { + "epoch": 0.69, + "grad_norm": 0.7576996684074402, + "learning_rate": 4.62716577380647e-06, + "loss": 2.0162, + "step": 20615 + }, + { + "epoch": 0.69, + "grad_norm": 0.764274001121521, + "learning_rate": 4.6262693583871975e-06, + "loss": 2.0156, + "step": 20616 + }, + { + "epoch": 0.69, + "grad_norm": 0.7336465120315552, + "learning_rate": 4.625373003677187e-06, + "loss": 2.0306, + "step": 20617 + }, + { + "epoch": 0.69, + "grad_norm": 0.7349523901939392, + "learning_rate": 4.624476709686563e-06, + "loss": 2.0718, + "step": 20618 + }, + { + "epoch": 0.69, + "grad_norm": 0.7525498270988464, + "learning_rate": 4.623580476425447e-06, + "loss": 2.0632, + "step": 20619 + }, + { + "epoch": 0.69, + "grad_norm": 0.7286232709884644, + "learning_rate": 4.622684303903967e-06, + "loss": 2.0319, + "step": 20620 + }, + { + "epoch": 0.69, + "grad_norm": 0.7352551221847534, + "learning_rate": 4.621788192132253e-06, + "loss": 1.9962, + "step": 20621 + }, + { + "epoch": 0.69, + "grad_norm": 0.723606288433075, + "learning_rate": 4.620892141120423e-06, + "loss": 2.1099, + "step": 20622 + }, + { + "epoch": 0.69, + "grad_norm": 0.7471742033958435, + "learning_rate": 4.619996150878598e-06, + "loss": 2.0824, + "step": 20623 + }, + { + "epoch": 0.69, + "grad_norm": 0.7192665934562683, + "learning_rate": 4.619100221416908e-06, + "loss": 2.0594, + "step": 20624 + }, + { + "epoch": 0.69, + "grad_norm": 0.7166378498077393, + "learning_rate": 4.618204352745466e-06, + "loss": 1.9926, + "step": 20625 + }, + { + "epoch": 0.69, + "grad_norm": 0.7362362742424011, + "learning_rate": 4.6173085448744e-06, + "loss": 1.9973, + "step": 20626 + }, + { + "epoch": 0.69, + "grad_norm": 0.7270293235778809, + "learning_rate": 4.616412797813829e-06, + "loss": 2.0718, + "step": 20627 + }, + { + "epoch": 0.69, + "grad_norm": 0.746406614780426, + "learning_rate": 4.615517111573867e-06, + "loss": 2.0615, + "step": 20628 + }, + { + "epoch": 0.69, + "grad_norm": 0.7357895374298096, + "learning_rate": 4.614621486164636e-06, + "loss": 1.9852, + "step": 20629 + }, + { + "epoch": 0.69, + "grad_norm": 0.7593170404434204, + "learning_rate": 4.61372592159626e-06, + "loss": 2.0919, + "step": 20630 + }, + { + "epoch": 0.69, + "grad_norm": 0.7290507555007935, + "learning_rate": 4.6128304178788516e-06, + "loss": 2.057, + "step": 20631 + }, + { + "epoch": 0.69, + "grad_norm": 0.7337861061096191, + "learning_rate": 4.611934975022524e-06, + "loss": 2.0018, + "step": 20632 + }, + { + "epoch": 0.69, + "grad_norm": 0.7265309691429138, + "learning_rate": 4.611039593037402e-06, + "loss": 2.1178, + "step": 20633 + }, + { + "epoch": 0.69, + "grad_norm": 0.7529125809669495, + "learning_rate": 4.610144271933592e-06, + "loss": 2.0105, + "step": 20634 + }, + { + "epoch": 0.69, + "grad_norm": 0.7269579172134399, + "learning_rate": 4.609249011721216e-06, + "loss": 2.047, + "step": 20635 + }, + { + "epoch": 0.69, + "grad_norm": 0.7493817806243896, + "learning_rate": 4.608353812410384e-06, + "loss": 2.0359, + "step": 20636 + }, + { + "epoch": 0.69, + "grad_norm": 0.783760666847229, + "learning_rate": 4.607458674011212e-06, + "loss": 2.0546, + "step": 20637 + }, + { + "epoch": 0.69, + "grad_norm": 0.7337034940719604, + "learning_rate": 4.606563596533809e-06, + "loss": 1.9935, + "step": 20638 + }, + { + "epoch": 0.69, + "grad_norm": 0.7485724091529846, + "learning_rate": 4.605668579988294e-06, + "loss": 2.1013, + "step": 20639 + }, + { + "epoch": 0.69, + "grad_norm": 0.7395462393760681, + "learning_rate": 4.604773624384773e-06, + "loss": 2.008, + "step": 20640 + }, + { + "epoch": 0.69, + "grad_norm": 0.7468209266662598, + "learning_rate": 4.603878729733355e-06, + "loss": 2.0063, + "step": 20641 + }, + { + "epoch": 0.69, + "grad_norm": 0.743109405040741, + "learning_rate": 4.602983896044152e-06, + "loss": 2.0565, + "step": 20642 + }, + { + "epoch": 0.69, + "grad_norm": 0.7304777503013611, + "learning_rate": 4.6020891233272766e-06, + "loss": 2.089, + "step": 20643 + }, + { + "epoch": 0.69, + "grad_norm": 0.7182857990264893, + "learning_rate": 4.601194411592836e-06, + "loss": 1.9974, + "step": 20644 + }, + { + "epoch": 0.69, + "grad_norm": 0.7245355248451233, + "learning_rate": 4.600299760850933e-06, + "loss": 2.1012, + "step": 20645 + }, + { + "epoch": 0.69, + "grad_norm": 0.7487111687660217, + "learning_rate": 4.599405171111683e-06, + "loss": 2.031, + "step": 20646 + }, + { + "epoch": 0.69, + "grad_norm": 0.7455403804779053, + "learning_rate": 4.598510642385184e-06, + "loss": 2.0316, + "step": 20647 + }, + { + "epoch": 0.69, + "grad_norm": 0.7576206922531128, + "learning_rate": 4.597616174681551e-06, + "loss": 2.1144, + "step": 20648 + }, + { + "epoch": 0.69, + "grad_norm": 0.7099148035049438, + "learning_rate": 4.596721768010883e-06, + "loss": 2.0355, + "step": 20649 + }, + { + "epoch": 0.69, + "grad_norm": 0.7514818906784058, + "learning_rate": 4.595827422383282e-06, + "loss": 2.0929, + "step": 20650 + }, + { + "epoch": 0.69, + "grad_norm": 0.7305217981338501, + "learning_rate": 4.594933137808857e-06, + "loss": 2.0168, + "step": 20651 + }, + { + "epoch": 0.69, + "grad_norm": 0.7320857644081116, + "learning_rate": 4.5940389142977125e-06, + "loss": 2.0652, + "step": 20652 + }, + { + "epoch": 0.69, + "grad_norm": 0.7518050670623779, + "learning_rate": 4.593144751859948e-06, + "loss": 2.1426, + "step": 20653 + }, + { + "epoch": 0.69, + "grad_norm": 0.7550265192985535, + "learning_rate": 4.592250650505662e-06, + "loss": 2.0469, + "step": 20654 + }, + { + "epoch": 0.69, + "grad_norm": 0.749140739440918, + "learning_rate": 4.5913566102449625e-06, + "loss": 2.0679, + "step": 20655 + }, + { + "epoch": 0.69, + "grad_norm": 0.740583598613739, + "learning_rate": 4.5904626310879415e-06, + "loss": 2.1177, + "step": 20656 + }, + { + "epoch": 0.69, + "grad_norm": 0.7535557150840759, + "learning_rate": 4.5895687130447085e-06, + "loss": 2.1063, + "step": 20657 + }, + { + "epoch": 0.69, + "grad_norm": 0.7353165745735168, + "learning_rate": 4.588674856125353e-06, + "loss": 2.049, + "step": 20658 + }, + { + "epoch": 0.69, + "grad_norm": 0.7539626359939575, + "learning_rate": 4.587781060339982e-06, + "loss": 2.0784, + "step": 20659 + }, + { + "epoch": 0.69, + "grad_norm": 0.7740181088447571, + "learning_rate": 4.586887325698684e-06, + "loss": 2.1294, + "step": 20660 + }, + { + "epoch": 0.69, + "grad_norm": 0.7257598042488098, + "learning_rate": 4.585993652211565e-06, + "loss": 2.0579, + "step": 20661 + }, + { + "epoch": 0.69, + "grad_norm": 0.757351815700531, + "learning_rate": 4.585100039888718e-06, + "loss": 2.0183, + "step": 20662 + }, + { + "epoch": 0.69, + "grad_norm": 0.7439040541648865, + "learning_rate": 4.584206488740231e-06, + "loss": 1.983, + "step": 20663 + }, + { + "epoch": 0.69, + "grad_norm": 0.7441537380218506, + "learning_rate": 4.583312998776207e-06, + "loss": 2.0595, + "step": 20664 + }, + { + "epoch": 0.69, + "grad_norm": 0.7405146956443787, + "learning_rate": 4.582419570006742e-06, + "loss": 2.0092, + "step": 20665 + }, + { + "epoch": 0.69, + "grad_norm": 0.7386098504066467, + "learning_rate": 4.581526202441925e-06, + "loss": 2.0385, + "step": 20666 + }, + { + "epoch": 0.69, + "grad_norm": 0.7512227892875671, + "learning_rate": 4.580632896091845e-06, + "loss": 2.0219, + "step": 20667 + }, + { + "epoch": 0.69, + "grad_norm": 0.738365888595581, + "learning_rate": 4.5797396509666035e-06, + "loss": 2.0976, + "step": 20668 + }, + { + "epoch": 0.69, + "grad_norm": 0.728626012802124, + "learning_rate": 4.578846467076283e-06, + "loss": 2.0452, + "step": 20669 + }, + { + "epoch": 0.69, + "grad_norm": 0.7077351808547974, + "learning_rate": 4.577953344430981e-06, + "loss": 1.9964, + "step": 20670 + }, + { + "epoch": 0.69, + "grad_norm": 0.7818958759307861, + "learning_rate": 4.577060283040785e-06, + "loss": 2.0141, + "step": 20671 + }, + { + "epoch": 0.69, + "grad_norm": 0.7469938397407532, + "learning_rate": 4.57616728291578e-06, + "loss": 2.0579, + "step": 20672 + }, + { + "epoch": 0.69, + "grad_norm": 0.7606632113456726, + "learning_rate": 4.5752743440660586e-06, + "loss": 2.0733, + "step": 20673 + }, + { + "epoch": 0.69, + "grad_norm": 0.7833924293518066, + "learning_rate": 4.574381466501711e-06, + "loss": 2.0533, + "step": 20674 + }, + { + "epoch": 0.69, + "grad_norm": 0.7717723250389099, + "learning_rate": 4.5734886502328236e-06, + "loss": 2.0557, + "step": 20675 + }, + { + "epoch": 0.69, + "grad_norm": 0.7466371059417725, + "learning_rate": 4.572595895269476e-06, + "loss": 2.0967, + "step": 20676 + }, + { + "epoch": 0.69, + "grad_norm": 0.746803343296051, + "learning_rate": 4.5717032016217635e-06, + "loss": 1.9864, + "step": 20677 + }, + { + "epoch": 0.69, + "grad_norm": 0.7636306881904602, + "learning_rate": 4.5708105692997625e-06, + "loss": 2.0232, + "step": 20678 + }, + { + "epoch": 0.69, + "grad_norm": 0.7452732920646667, + "learning_rate": 4.569917998313567e-06, + "loss": 2.0539, + "step": 20679 + }, + { + "epoch": 0.69, + "grad_norm": 0.736293375492096, + "learning_rate": 4.569025488673251e-06, + "loss": 2.051, + "step": 20680 + }, + { + "epoch": 0.69, + "grad_norm": 0.7503386735916138, + "learning_rate": 4.568133040388906e-06, + "loss": 1.9707, + "step": 20681 + }, + { + "epoch": 0.69, + "grad_norm": 0.7489938735961914, + "learning_rate": 4.567240653470607e-06, + "loss": 1.9514, + "step": 20682 + }, + { + "epoch": 0.69, + "grad_norm": 0.7505583763122559, + "learning_rate": 4.566348327928439e-06, + "loss": 2.0382, + "step": 20683 + }, + { + "epoch": 0.69, + "grad_norm": 0.7644677758216858, + "learning_rate": 4.565456063772491e-06, + "loss": 2.0597, + "step": 20684 + }, + { + "epoch": 0.69, + "grad_norm": 0.7377408742904663, + "learning_rate": 4.564563861012827e-06, + "loss": 2.0417, + "step": 20685 + }, + { + "epoch": 0.69, + "grad_norm": 0.778724730014801, + "learning_rate": 4.563671719659536e-06, + "loss": 2.0493, + "step": 20686 + }, + { + "epoch": 0.69, + "grad_norm": 0.7527204751968384, + "learning_rate": 4.5627796397227e-06, + "loss": 2.0696, + "step": 20687 + }, + { + "epoch": 0.69, + "grad_norm": 0.7249613404273987, + "learning_rate": 4.561887621212392e-06, + "loss": 2.0366, + "step": 20688 + }, + { + "epoch": 0.69, + "grad_norm": 0.7465932965278625, + "learning_rate": 4.560995664138687e-06, + "loss": 2.0244, + "step": 20689 + }, + { + "epoch": 0.69, + "grad_norm": 0.7409705519676208, + "learning_rate": 4.560103768511669e-06, + "loss": 2.0766, + "step": 20690 + }, + { + "epoch": 0.69, + "grad_norm": 0.7407171726226807, + "learning_rate": 4.5592119343414074e-06, + "loss": 2.0528, + "step": 20691 + }, + { + "epoch": 0.69, + "grad_norm": 0.7700774073600769, + "learning_rate": 4.55832016163798e-06, + "loss": 2.0649, + "step": 20692 + }, + { + "epoch": 0.69, + "grad_norm": 0.816064178943634, + "learning_rate": 4.557428450411471e-06, + "loss": 2.0646, + "step": 20693 + }, + { + "epoch": 0.69, + "grad_norm": 0.746732771396637, + "learning_rate": 4.556536800671938e-06, + "loss": 2.0337, + "step": 20694 + }, + { + "epoch": 0.69, + "grad_norm": 0.7390273213386536, + "learning_rate": 4.555645212429461e-06, + "loss": 2.0344, + "step": 20695 + }, + { + "epoch": 0.69, + "grad_norm": 0.7555868625640869, + "learning_rate": 4.554753685694118e-06, + "loss": 2.0636, + "step": 20696 + }, + { + "epoch": 0.69, + "grad_norm": 0.7718966603279114, + "learning_rate": 4.553862220475976e-06, + "loss": 2.0908, + "step": 20697 + }, + { + "epoch": 0.69, + "grad_norm": 0.8078123927116394, + "learning_rate": 4.552970816785105e-06, + "loss": 2.0931, + "step": 20698 + }, + { + "epoch": 0.69, + "grad_norm": 0.7131874561309814, + "learning_rate": 4.552079474631576e-06, + "loss": 2.1001, + "step": 20699 + }, + { + "epoch": 0.69, + "grad_norm": 0.7474454045295715, + "learning_rate": 4.551188194025464e-06, + "loss": 2.0712, + "step": 20700 + }, + { + "epoch": 0.69, + "grad_norm": 0.7461369037628174, + "learning_rate": 4.5502969749768345e-06, + "loss": 2.0215, + "step": 20701 + }, + { + "epoch": 0.69, + "grad_norm": 0.7163628339767456, + "learning_rate": 4.549405817495753e-06, + "loss": 2.0817, + "step": 20702 + }, + { + "epoch": 0.69, + "grad_norm": 0.7464097142219543, + "learning_rate": 4.548514721592293e-06, + "loss": 2.1172, + "step": 20703 + }, + { + "epoch": 0.69, + "grad_norm": 0.7577479481697083, + "learning_rate": 4.547623687276516e-06, + "loss": 2.0232, + "step": 20704 + }, + { + "epoch": 0.69, + "grad_norm": 0.7378754019737244, + "learning_rate": 4.54673271455849e-06, + "loss": 2.0562, + "step": 20705 + }, + { + "epoch": 0.69, + "grad_norm": 0.743084192276001, + "learning_rate": 4.5458418034482895e-06, + "loss": 2.0392, + "step": 20706 + }, + { + "epoch": 0.69, + "grad_norm": 0.744170606136322, + "learning_rate": 4.544950953955966e-06, + "loss": 2.0033, + "step": 20707 + }, + { + "epoch": 0.69, + "grad_norm": 0.7399832010269165, + "learning_rate": 4.544060166091589e-06, + "loss": 1.9628, + "step": 20708 + }, + { + "epoch": 0.69, + "grad_norm": 0.7041359543800354, + "learning_rate": 4.5431694398652244e-06, + "loss": 2.0099, + "step": 20709 + }, + { + "epoch": 0.69, + "grad_norm": 0.7466070652008057, + "learning_rate": 4.542278775286936e-06, + "loss": 2.0641, + "step": 20710 + }, + { + "epoch": 0.69, + "grad_norm": 0.7509164810180664, + "learning_rate": 4.541388172366779e-06, + "loss": 2.046, + "step": 20711 + }, + { + "epoch": 0.69, + "grad_norm": 0.787859320640564, + "learning_rate": 4.540497631114822e-06, + "loss": 1.9834, + "step": 20712 + }, + { + "epoch": 0.69, + "grad_norm": 0.7788780927658081, + "learning_rate": 4.539607151541121e-06, + "loss": 2.0329, + "step": 20713 + }, + { + "epoch": 0.69, + "grad_norm": 0.7453721761703491, + "learning_rate": 4.538716733655737e-06, + "loss": 2.061, + "step": 20714 + }, + { + "epoch": 0.69, + "grad_norm": 0.73618084192276, + "learning_rate": 4.537826377468735e-06, + "loss": 2.0339, + "step": 20715 + }, + { + "epoch": 0.69, + "grad_norm": 0.7471417188644409, + "learning_rate": 4.53693608299017e-06, + "loss": 2.0238, + "step": 20716 + }, + { + "epoch": 0.69, + "grad_norm": 0.7380427122116089, + "learning_rate": 4.536045850230095e-06, + "loss": 2.0307, + "step": 20717 + }, + { + "epoch": 0.69, + "grad_norm": 0.7479676604270935, + "learning_rate": 4.535155679198575e-06, + "loss": 2.0112, + "step": 20718 + }, + { + "epoch": 0.69, + "grad_norm": 0.7329914569854736, + "learning_rate": 4.5342655699056644e-06, + "loss": 2.0499, + "step": 20719 + }, + { + "epoch": 0.69, + "grad_norm": 0.733182966709137, + "learning_rate": 4.533375522361415e-06, + "loss": 2.0208, + "step": 20720 + }, + { + "epoch": 0.69, + "grad_norm": 0.7644082307815552, + "learning_rate": 4.5324855365758836e-06, + "loss": 1.9936, + "step": 20721 + }, + { + "epoch": 0.69, + "grad_norm": 0.7468414306640625, + "learning_rate": 4.531595612559131e-06, + "loss": 1.9706, + "step": 20722 + }, + { + "epoch": 0.69, + "grad_norm": 0.7537177205085754, + "learning_rate": 4.530705750321203e-06, + "loss": 2.0651, + "step": 20723 + }, + { + "epoch": 0.69, + "grad_norm": 0.7326295971870422, + "learning_rate": 4.52981594987216e-06, + "loss": 2.0244, + "step": 20724 + }, + { + "epoch": 0.69, + "grad_norm": 0.7241556644439697, + "learning_rate": 4.528926211222049e-06, + "loss": 2.0724, + "step": 20725 + }, + { + "epoch": 0.69, + "grad_norm": 0.7399282455444336, + "learning_rate": 4.52803653438092e-06, + "loss": 2.0541, + "step": 20726 + }, + { + "epoch": 0.69, + "grad_norm": 0.7799809575080872, + "learning_rate": 4.527146919358828e-06, + "loss": 2.0187, + "step": 20727 + }, + { + "epoch": 0.69, + "grad_norm": 0.7700902223587036, + "learning_rate": 4.52625736616583e-06, + "loss": 2.157, + "step": 20728 + }, + { + "epoch": 0.69, + "grad_norm": 0.7256603837013245, + "learning_rate": 4.525367874811961e-06, + "loss": 2.0672, + "step": 20729 + }, + { + "epoch": 0.69, + "grad_norm": 0.7290663719177246, + "learning_rate": 4.5244784453072766e-06, + "loss": 2.0412, + "step": 20730 + }, + { + "epoch": 0.69, + "grad_norm": 0.7704488039016724, + "learning_rate": 4.523589077661831e-06, + "loss": 2.0808, + "step": 20731 + }, + { + "epoch": 0.69, + "grad_norm": 0.7384892106056213, + "learning_rate": 4.5226997718856645e-06, + "loss": 1.999, + "step": 20732 + }, + { + "epoch": 0.69, + "grad_norm": 0.7603267431259155, + "learning_rate": 4.521810527988824e-06, + "loss": 2.0581, + "step": 20733 + }, + { + "epoch": 0.69, + "grad_norm": 0.7297340631484985, + "learning_rate": 4.5209213459813605e-06, + "loss": 2.0398, + "step": 20734 + }, + { + "epoch": 0.69, + "grad_norm": 0.7596359252929688, + "learning_rate": 4.520032225873313e-06, + "loss": 2.0853, + "step": 20735 + }, + { + "epoch": 0.69, + "grad_norm": 0.7111049294471741, + "learning_rate": 4.5191431676747296e-06, + "loss": 1.9701, + "step": 20736 + }, + { + "epoch": 0.69, + "grad_norm": 0.7719289660453796, + "learning_rate": 4.518254171395659e-06, + "loss": 1.9509, + "step": 20737 + }, + { + "epoch": 0.69, + "grad_norm": 0.7342584729194641, + "learning_rate": 4.51736523704614e-06, + "loss": 2.0419, + "step": 20738 + }, + { + "epoch": 0.69, + "grad_norm": 0.7334843277931213, + "learning_rate": 4.516476364636211e-06, + "loss": 1.9935, + "step": 20739 + }, + { + "epoch": 0.69, + "grad_norm": 0.7519233226776123, + "learning_rate": 4.515587554175922e-06, + "loss": 2.0898, + "step": 20740 + }, + { + "epoch": 0.69, + "grad_norm": 0.7534491419792175, + "learning_rate": 4.514698805675311e-06, + "loss": 2.0472, + "step": 20741 + }, + { + "epoch": 0.69, + "grad_norm": 0.7541595101356506, + "learning_rate": 4.513810119144415e-06, + "loss": 2.0152, + "step": 20742 + }, + { + "epoch": 0.69, + "grad_norm": 0.7310873866081238, + "learning_rate": 4.512921494593276e-06, + "loss": 2.062, + "step": 20743 + }, + { + "epoch": 0.69, + "grad_norm": 0.7659986615180969, + "learning_rate": 4.512032932031939e-06, + "loss": 1.9964, + "step": 20744 + }, + { + "epoch": 0.69, + "grad_norm": 0.743319571018219, + "learning_rate": 4.5111444314704334e-06, + "loss": 2.053, + "step": 20745 + }, + { + "epoch": 0.69, + "grad_norm": 0.7624233365058899, + "learning_rate": 4.510255992918805e-06, + "loss": 2.0479, + "step": 20746 + }, + { + "epoch": 0.69, + "grad_norm": 0.7290558815002441, + "learning_rate": 4.509367616387087e-06, + "loss": 2.0114, + "step": 20747 + }, + { + "epoch": 0.69, + "grad_norm": 0.7348864674568176, + "learning_rate": 4.508479301885312e-06, + "loss": 2.0528, + "step": 20748 + }, + { + "epoch": 0.69, + "grad_norm": 0.7290246486663818, + "learning_rate": 4.50759104942352e-06, + "loss": 2.0978, + "step": 20749 + }, + { + "epoch": 0.69, + "grad_norm": 0.7566142082214355, + "learning_rate": 4.5067028590117525e-06, + "loss": 1.9224, + "step": 20750 + }, + { + "epoch": 0.69, + "grad_norm": 0.7833240628242493, + "learning_rate": 4.50581473066003e-06, + "loss": 2.0714, + "step": 20751 + }, + { + "epoch": 0.69, + "grad_norm": 0.7549216151237488, + "learning_rate": 4.504926664378392e-06, + "loss": 2.029, + "step": 20752 + }, + { + "epoch": 0.69, + "grad_norm": 0.7524871230125427, + "learning_rate": 4.504038660176876e-06, + "loss": 2.0943, + "step": 20753 + }, + { + "epoch": 0.69, + "grad_norm": 0.7237919569015503, + "learning_rate": 4.503150718065507e-06, + "loss": 2.0462, + "step": 20754 + }, + { + "epoch": 0.69, + "grad_norm": 0.7428598403930664, + "learning_rate": 4.502262838054322e-06, + "loss": 2.1008, + "step": 20755 + }, + { + "epoch": 0.69, + "grad_norm": 0.7651428580284119, + "learning_rate": 4.501375020153351e-06, + "loss": 2.0361, + "step": 20756 + }, + { + "epoch": 0.69, + "grad_norm": 0.7211928963661194, + "learning_rate": 4.500487264372618e-06, + "loss": 2.0436, + "step": 20757 + }, + { + "epoch": 0.69, + "grad_norm": 0.7370629906654358, + "learning_rate": 4.4995995707221574e-06, + "loss": 1.9601, + "step": 20758 + }, + { + "epoch": 0.69, + "grad_norm": 0.7417383790016174, + "learning_rate": 4.4987119392120005e-06, + "loss": 2.0799, + "step": 20759 + }, + { + "epoch": 0.69, + "grad_norm": 0.7371569871902466, + "learning_rate": 4.497824369852173e-06, + "loss": 2.0137, + "step": 20760 + }, + { + "epoch": 0.69, + "grad_norm": 0.7550243139266968, + "learning_rate": 4.496936862652697e-06, + "loss": 2.1249, + "step": 20761 + }, + { + "epoch": 0.69, + "grad_norm": 0.7530125379562378, + "learning_rate": 4.496049417623606e-06, + "loss": 1.9857, + "step": 20762 + }, + { + "epoch": 0.69, + "grad_norm": 0.7377340793609619, + "learning_rate": 4.495162034774923e-06, + "loss": 2.0132, + "step": 20763 + }, + { + "epoch": 0.69, + "grad_norm": 0.735969603061676, + "learning_rate": 4.494274714116671e-06, + "loss": 1.982, + "step": 20764 + }, + { + "epoch": 0.69, + "grad_norm": 0.7294069528579712, + "learning_rate": 4.4933874556588755e-06, + "loss": 2.0431, + "step": 20765 + }, + { + "epoch": 0.69, + "grad_norm": 0.7481253743171692, + "learning_rate": 4.492500259411565e-06, + "loss": 2.0991, + "step": 20766 + }, + { + "epoch": 0.69, + "grad_norm": 0.7536741495132446, + "learning_rate": 4.491613125384756e-06, + "loss": 1.971, + "step": 20767 + }, + { + "epoch": 0.69, + "grad_norm": 0.7384999394416809, + "learning_rate": 4.4907260535884766e-06, + "loss": 2.0145, + "step": 20768 + }, + { + "epoch": 0.69, + "grad_norm": 0.7133184671401978, + "learning_rate": 4.489839044032746e-06, + "loss": 2.0348, + "step": 20769 + }, + { + "epoch": 0.69, + "grad_norm": 0.7517434358596802, + "learning_rate": 4.4889520967275806e-06, + "loss": 1.9865, + "step": 20770 + }, + { + "epoch": 0.69, + "grad_norm": 0.7638693451881409, + "learning_rate": 4.4880652116830046e-06, + "loss": 2.0911, + "step": 20771 + }, + { + "epoch": 0.69, + "grad_norm": 0.7233844995498657, + "learning_rate": 4.487178388909045e-06, + "loss": 2.031, + "step": 20772 + }, + { + "epoch": 0.69, + "grad_norm": 0.7490777969360352, + "learning_rate": 4.486291628415705e-06, + "loss": 2.0302, + "step": 20773 + }, + { + "epoch": 0.69, + "grad_norm": 0.777400553226471, + "learning_rate": 4.485404930213012e-06, + "loss": 2.0361, + "step": 20774 + }, + { + "epoch": 0.69, + "grad_norm": 0.759142279624939, + "learning_rate": 4.484518294310985e-06, + "loss": 2.0815, + "step": 20775 + }, + { + "epoch": 0.69, + "grad_norm": 0.7801027894020081, + "learning_rate": 4.483631720719635e-06, + "loss": 2.1633, + "step": 20776 + }, + { + "epoch": 0.69, + "grad_norm": 0.7248302698135376, + "learning_rate": 4.482745209448985e-06, + "loss": 2.0487, + "step": 20777 + }, + { + "epoch": 0.69, + "grad_norm": 0.7488502860069275, + "learning_rate": 4.481858760509046e-06, + "loss": 2.0261, + "step": 20778 + }, + { + "epoch": 0.69, + "grad_norm": 0.7612342834472656, + "learning_rate": 4.480972373909827e-06, + "loss": 2.1106, + "step": 20779 + }, + { + "epoch": 0.69, + "grad_norm": 0.7591012716293335, + "learning_rate": 4.480086049661351e-06, + "loss": 2.0186, + "step": 20780 + }, + { + "epoch": 0.69, + "grad_norm": 0.759454607963562, + "learning_rate": 4.4791997877736295e-06, + "loss": 2.0683, + "step": 20781 + }, + { + "epoch": 0.69, + "grad_norm": 0.7688586115837097, + "learning_rate": 4.478313588256673e-06, + "loss": 2.1423, + "step": 20782 + }, + { + "epoch": 0.69, + "grad_norm": 0.7735649347305298, + "learning_rate": 4.477427451120491e-06, + "loss": 2.0813, + "step": 20783 + }, + { + "epoch": 0.69, + "grad_norm": 0.7577570676803589, + "learning_rate": 4.476541376375096e-06, + "loss": 2.0337, + "step": 20784 + }, + { + "epoch": 0.69, + "grad_norm": 0.7704837322235107, + "learning_rate": 4.475655364030503e-06, + "loss": 2.0298, + "step": 20785 + }, + { + "epoch": 0.69, + "grad_norm": 0.7391753792762756, + "learning_rate": 4.47476941409672e-06, + "loss": 2.0523, + "step": 20786 + }, + { + "epoch": 0.69, + "grad_norm": 0.7368494272232056, + "learning_rate": 4.473883526583749e-06, + "loss": 1.9872, + "step": 20787 + }, + { + "epoch": 0.69, + "grad_norm": 0.7604776620864868, + "learning_rate": 4.472997701501607e-06, + "loss": 2.0781, + "step": 20788 + }, + { + "epoch": 0.69, + "grad_norm": 0.7496863603591919, + "learning_rate": 4.472111938860294e-06, + "loss": 2.0919, + "step": 20789 + }, + { + "epoch": 0.69, + "grad_norm": 0.7324661612510681, + "learning_rate": 4.4712262386698245e-06, + "loss": 2.0437, + "step": 20790 + }, + { + "epoch": 0.69, + "grad_norm": 0.7379403710365295, + "learning_rate": 4.470340600940202e-06, + "loss": 1.989, + "step": 20791 + }, + { + "epoch": 0.69, + "grad_norm": 0.7646297812461853, + "learning_rate": 4.469455025681425e-06, + "loss": 1.9787, + "step": 20792 + }, + { + "epoch": 0.69, + "grad_norm": 0.7677327394485474, + "learning_rate": 4.468569512903506e-06, + "loss": 2.0304, + "step": 20793 + }, + { + "epoch": 0.69, + "grad_norm": 0.742173433303833, + "learning_rate": 4.4676840626164515e-06, + "loss": 2.0636, + "step": 20794 + }, + { + "epoch": 0.69, + "grad_norm": 0.756023108959198, + "learning_rate": 4.466798674830255e-06, + "loss": 2.0003, + "step": 20795 + }, + { + "epoch": 0.69, + "grad_norm": 0.755954921245575, + "learning_rate": 4.465913349554923e-06, + "loss": 2.086, + "step": 20796 + }, + { + "epoch": 0.69, + "grad_norm": 0.7673295736312866, + "learning_rate": 4.465028086800464e-06, + "loss": 2.0671, + "step": 20797 + }, + { + "epoch": 0.69, + "grad_norm": 0.7505176663398743, + "learning_rate": 4.4641428865768685e-06, + "loss": 2.0125, + "step": 20798 + }, + { + "epoch": 0.69, + "grad_norm": 0.7369430661201477, + "learning_rate": 4.463257748894147e-06, + "loss": 2.0274, + "step": 20799 + }, + { + "epoch": 0.69, + "grad_norm": 0.7497841119766235, + "learning_rate": 4.462372673762291e-06, + "loss": 2.033, + "step": 20800 + }, + { + "epoch": 0.69, + "grad_norm": 0.7442204356193542, + "learning_rate": 4.461487661191307e-06, + "loss": 1.992, + "step": 20801 + }, + { + "epoch": 0.69, + "grad_norm": 0.7401643991470337, + "learning_rate": 4.460602711191185e-06, + "loss": 2.0354, + "step": 20802 + }, + { + "epoch": 0.69, + "grad_norm": 0.7482219338417053, + "learning_rate": 4.459717823771932e-06, + "loss": 2.0555, + "step": 20803 + }, + { + "epoch": 0.69, + "grad_norm": 0.7713789343833923, + "learning_rate": 4.458832998943539e-06, + "loss": 2.0459, + "step": 20804 + }, + { + "epoch": 0.69, + "grad_norm": 0.7458503842353821, + "learning_rate": 4.457948236716e-06, + "loss": 2.0243, + "step": 20805 + }, + { + "epoch": 0.69, + "grad_norm": 0.8652258515357971, + "learning_rate": 4.457063537099314e-06, + "loss": 2.1076, + "step": 20806 + }, + { + "epoch": 0.69, + "grad_norm": 0.7668341994285583, + "learning_rate": 4.456178900103479e-06, + "loss": 2.1124, + "step": 20807 + }, + { + "epoch": 0.69, + "grad_norm": 0.7421086430549622, + "learning_rate": 4.4552943257384865e-06, + "loss": 2.0411, + "step": 20808 + }, + { + "epoch": 0.69, + "grad_norm": 0.7413544058799744, + "learning_rate": 4.4544098140143245e-06, + "loss": 2.0997, + "step": 20809 + }, + { + "epoch": 0.69, + "grad_norm": 0.7332929372787476, + "learning_rate": 4.453525364940995e-06, + "loss": 2.0415, + "step": 20810 + }, + { + "epoch": 0.69, + "grad_norm": 0.7543869614601135, + "learning_rate": 4.4526409785284805e-06, + "loss": 2.0877, + "step": 20811 + }, + { + "epoch": 0.69, + "grad_norm": 0.7388126254081726, + "learning_rate": 4.451756654786782e-06, + "loss": 2.0428, + "step": 20812 + }, + { + "epoch": 0.69, + "grad_norm": 0.737076461315155, + "learning_rate": 4.4508723937258845e-06, + "loss": 2.0285, + "step": 20813 + }, + { + "epoch": 0.69, + "grad_norm": 0.7427147626876831, + "learning_rate": 4.449988195355775e-06, + "loss": 2.06, + "step": 20814 + }, + { + "epoch": 0.69, + "grad_norm": 0.8073004484176636, + "learning_rate": 4.449104059686446e-06, + "loss": 2.0009, + "step": 20815 + }, + { + "epoch": 0.69, + "grad_norm": 0.7563554644584656, + "learning_rate": 4.448219986727892e-06, + "loss": 2.083, + "step": 20816 + }, + { + "epoch": 0.69, + "grad_norm": 0.7553269267082214, + "learning_rate": 4.447335976490092e-06, + "loss": 2.0941, + "step": 20817 + }, + { + "epoch": 0.69, + "grad_norm": 0.7905998229980469, + "learning_rate": 4.446452028983034e-06, + "loss": 2.1161, + "step": 20818 + }, + { + "epoch": 0.69, + "grad_norm": 0.7428863644599915, + "learning_rate": 4.44556814421671e-06, + "loss": 1.9842, + "step": 20819 + }, + { + "epoch": 0.69, + "grad_norm": 0.764682412147522, + "learning_rate": 4.444684322201097e-06, + "loss": 2.0279, + "step": 20820 + }, + { + "epoch": 0.69, + "grad_norm": 0.7824804186820984, + "learning_rate": 4.44380056294619e-06, + "loss": 2.0965, + "step": 20821 + }, + { + "epoch": 0.69, + "grad_norm": 0.7619942426681519, + "learning_rate": 4.4429168664619636e-06, + "loss": 2.0963, + "step": 20822 + }, + { + "epoch": 0.69, + "grad_norm": 0.8125794529914856, + "learning_rate": 4.442033232758409e-06, + "loss": 2.0527, + "step": 20823 + }, + { + "epoch": 0.69, + "grad_norm": 0.7460378408432007, + "learning_rate": 4.441149661845502e-06, + "loss": 2.0014, + "step": 20824 + }, + { + "epoch": 0.69, + "grad_norm": 0.7442352175712585, + "learning_rate": 4.440266153733232e-06, + "loss": 2.03, + "step": 20825 + }, + { + "epoch": 0.69, + "grad_norm": 0.786788284778595, + "learning_rate": 4.4393827084315774e-06, + "loss": 2.0477, + "step": 20826 + }, + { + "epoch": 0.69, + "grad_norm": 0.7161422967910767, + "learning_rate": 4.438499325950514e-06, + "loss": 2.0111, + "step": 20827 + }, + { + "epoch": 0.69, + "grad_norm": 0.7410697340965271, + "learning_rate": 4.437616006300025e-06, + "loss": 2.0749, + "step": 20828 + }, + { + "epoch": 0.69, + "grad_norm": 0.719542384147644, + "learning_rate": 4.436732749490096e-06, + "loss": 2.0226, + "step": 20829 + }, + { + "epoch": 0.69, + "grad_norm": 0.7266137599945068, + "learning_rate": 4.435849555530698e-06, + "loss": 1.9702, + "step": 20830 + }, + { + "epoch": 0.69, + "grad_norm": 0.7762044668197632, + "learning_rate": 4.434966424431809e-06, + "loss": 2.1038, + "step": 20831 + }, + { + "epoch": 0.69, + "grad_norm": 0.7996153831481934, + "learning_rate": 4.4340833562034105e-06, + "loss": 2.0618, + "step": 20832 + }, + { + "epoch": 0.69, + "grad_norm": 0.7810563445091248, + "learning_rate": 4.433200350855472e-06, + "loss": 2.0361, + "step": 20833 + }, + { + "epoch": 0.69, + "grad_norm": 0.7641668915748596, + "learning_rate": 4.432317408397978e-06, + "loss": 2.0255, + "step": 20834 + }, + { + "epoch": 0.69, + "grad_norm": 0.783947765827179, + "learning_rate": 4.431434528840899e-06, + "loss": 2.0733, + "step": 20835 + }, + { + "epoch": 0.69, + "grad_norm": 0.743037223815918, + "learning_rate": 4.430551712194206e-06, + "loss": 2.0015, + "step": 20836 + }, + { + "epoch": 0.69, + "grad_norm": 0.7233148217201233, + "learning_rate": 4.429668958467875e-06, + "loss": 1.9739, + "step": 20837 + }, + { + "epoch": 0.69, + "grad_norm": 0.7716609835624695, + "learning_rate": 4.428786267671883e-06, + "loss": 2.058, + "step": 20838 + }, + { + "epoch": 0.69, + "grad_norm": 0.7279636859893799, + "learning_rate": 4.427903639816199e-06, + "loss": 1.9954, + "step": 20839 + }, + { + "epoch": 0.69, + "grad_norm": 0.7316717505455017, + "learning_rate": 4.427021074910791e-06, + "loss": 2.1015, + "step": 20840 + }, + { + "epoch": 0.69, + "grad_norm": 0.7835211753845215, + "learning_rate": 4.426138572965636e-06, + "loss": 2.1206, + "step": 20841 + }, + { + "epoch": 0.69, + "grad_norm": 0.7458429336547852, + "learning_rate": 4.425256133990697e-06, + "loss": 1.9581, + "step": 20842 + }, + { + "epoch": 0.69, + "grad_norm": 0.7534080147743225, + "learning_rate": 4.4243737579959514e-06, + "loss": 2.0013, + "step": 20843 + }, + { + "epoch": 0.69, + "grad_norm": 0.7578807473182678, + "learning_rate": 4.423491444991359e-06, + "loss": 2.0306, + "step": 20844 + }, + { + "epoch": 0.69, + "grad_norm": 0.7501322031021118, + "learning_rate": 4.422609194986896e-06, + "loss": 2.0157, + "step": 20845 + }, + { + "epoch": 0.69, + "grad_norm": 0.7568397521972656, + "learning_rate": 4.421727007992521e-06, + "loss": 2.0421, + "step": 20846 + }, + { + "epoch": 0.69, + "grad_norm": 0.7529815435409546, + "learning_rate": 4.42084488401821e-06, + "loss": 2.0531, + "step": 20847 + }, + { + "epoch": 0.69, + "grad_norm": 0.7425066232681274, + "learning_rate": 4.419962823073924e-06, + "loss": 2.0758, + "step": 20848 + }, + { + "epoch": 0.69, + "grad_norm": 0.730161726474762, + "learning_rate": 4.419080825169623e-06, + "loss": 2.0353, + "step": 20849 + }, + { + "epoch": 0.69, + "grad_norm": 0.7719929814338684, + "learning_rate": 4.418198890315277e-06, + "loss": 2.0996, + "step": 20850 + }, + { + "epoch": 0.69, + "grad_norm": 0.7241494059562683, + "learning_rate": 4.417317018520852e-06, + "loss": 2.0753, + "step": 20851 + }, + { + "epoch": 0.69, + "grad_norm": 0.7702763080596924, + "learning_rate": 4.416435209796308e-06, + "loss": 2.0619, + "step": 20852 + }, + { + "epoch": 0.69, + "grad_norm": 0.7326291799545288, + "learning_rate": 4.415553464151603e-06, + "loss": 2.0125, + "step": 20853 + }, + { + "epoch": 0.69, + "grad_norm": 0.7589186429977417, + "learning_rate": 4.414671781596705e-06, + "loss": 2.025, + "step": 20854 + }, + { + "epoch": 0.69, + "grad_norm": 0.8006117939949036, + "learning_rate": 4.413790162141569e-06, + "loss": 2.0227, + "step": 20855 + }, + { + "epoch": 0.69, + "grad_norm": 0.7472789883613586, + "learning_rate": 4.412908605796161e-06, + "loss": 2.0299, + "step": 20856 + }, + { + "epoch": 0.69, + "grad_norm": 0.7466250061988831, + "learning_rate": 4.412027112570438e-06, + "loss": 2.0088, + "step": 20857 + }, + { + "epoch": 0.69, + "grad_norm": 0.7326253652572632, + "learning_rate": 4.411145682474354e-06, + "loss": 2.0088, + "step": 20858 + }, + { + "epoch": 0.69, + "grad_norm": 0.7280779480934143, + "learning_rate": 4.410264315517869e-06, + "loss": 2.0634, + "step": 20859 + }, + { + "epoch": 0.69, + "grad_norm": 0.743938684463501, + "learning_rate": 4.409383011710948e-06, + "loss": 2.0153, + "step": 20860 + }, + { + "epoch": 0.69, + "grad_norm": 0.739898145198822, + "learning_rate": 4.40850177106354e-06, + "loss": 2.0658, + "step": 20861 + }, + { + "epoch": 0.69, + "grad_norm": 0.7107728123664856, + "learning_rate": 4.407620593585598e-06, + "loss": 2.0208, + "step": 20862 + }, + { + "epoch": 0.69, + "grad_norm": 0.7273580431938171, + "learning_rate": 4.406739479287085e-06, + "loss": 2.0246, + "step": 20863 + }, + { + "epoch": 0.69, + "grad_norm": 0.7113926410675049, + "learning_rate": 4.405858428177949e-06, + "loss": 2.0506, + "step": 20864 + }, + { + "epoch": 0.69, + "grad_norm": 0.789524495601654, + "learning_rate": 4.404977440268149e-06, + "loss": 2.0891, + "step": 20865 + }, + { + "epoch": 0.69, + "grad_norm": 0.7280378937721252, + "learning_rate": 4.404096515567631e-06, + "loss": 1.9893, + "step": 20866 + }, + { + "epoch": 0.69, + "grad_norm": 0.7493146061897278, + "learning_rate": 4.403215654086353e-06, + "loss": 2.079, + "step": 20867 + }, + { + "epoch": 0.69, + "grad_norm": 0.7611211538314819, + "learning_rate": 4.402334855834264e-06, + "loss": 2.1065, + "step": 20868 + }, + { + "epoch": 0.69, + "grad_norm": 0.7188528776168823, + "learning_rate": 4.401454120821317e-06, + "loss": 2.0156, + "step": 20869 + }, + { + "epoch": 0.69, + "grad_norm": 0.7213507890701294, + "learning_rate": 4.400573449057461e-06, + "loss": 1.993, + "step": 20870 + }, + { + "epoch": 0.69, + "grad_norm": 0.7683514356613159, + "learning_rate": 4.39969284055264e-06, + "loss": 2.0045, + "step": 20871 + }, + { + "epoch": 0.69, + "grad_norm": 0.7380040287971497, + "learning_rate": 4.398812295316808e-06, + "loss": 2.0441, + "step": 20872 + }, + { + "epoch": 0.69, + "grad_norm": 0.7355031967163086, + "learning_rate": 4.397931813359916e-06, + "loss": 2.0502, + "step": 20873 + }, + { + "epoch": 0.69, + "grad_norm": 0.7478317022323608, + "learning_rate": 4.397051394691906e-06, + "loss": 2.0626, + "step": 20874 + }, + { + "epoch": 0.69, + "grad_norm": 0.7597023844718933, + "learning_rate": 4.396171039322723e-06, + "loss": 2.0386, + "step": 20875 + }, + { + "epoch": 0.69, + "grad_norm": 0.7457597851753235, + "learning_rate": 4.395290747262318e-06, + "loss": 2.0561, + "step": 20876 + }, + { + "epoch": 0.69, + "grad_norm": 0.7545474171638489, + "learning_rate": 4.3944105185206305e-06, + "loss": 2.1344, + "step": 20877 + }, + { + "epoch": 0.69, + "grad_norm": 0.7520078420639038, + "learning_rate": 4.393530353107612e-06, + "loss": 2.0438, + "step": 20878 + }, + { + "epoch": 0.69, + "grad_norm": 0.763316810131073, + "learning_rate": 4.3926502510332015e-06, + "loss": 2.1304, + "step": 20879 + }, + { + "epoch": 0.69, + "grad_norm": 0.7145616412162781, + "learning_rate": 4.391770212307339e-06, + "loss": 2.0254, + "step": 20880 + }, + { + "epoch": 0.69, + "grad_norm": 0.7690443396568298, + "learning_rate": 4.390890236939969e-06, + "loss": 2.1471, + "step": 20881 + }, + { + "epoch": 0.69, + "grad_norm": 0.7353788614273071, + "learning_rate": 4.390010324941038e-06, + "loss": 1.9803, + "step": 20882 + }, + { + "epoch": 0.69, + "grad_norm": 0.7601040005683899, + "learning_rate": 4.389130476320483e-06, + "loss": 2.1161, + "step": 20883 + }, + { + "epoch": 0.69, + "grad_norm": 0.7856127619743347, + "learning_rate": 4.388250691088238e-06, + "loss": 2.0307, + "step": 20884 + }, + { + "epoch": 0.69, + "grad_norm": 0.7432969212532043, + "learning_rate": 4.38737096925425e-06, + "loss": 2.0661, + "step": 20885 + }, + { + "epoch": 0.69, + "grad_norm": 0.7459391355514526, + "learning_rate": 4.386491310828458e-06, + "loss": 2.0061, + "step": 20886 + }, + { + "epoch": 0.69, + "grad_norm": 0.7476521730422974, + "learning_rate": 4.385611715820798e-06, + "loss": 2.1387, + "step": 20887 + }, + { + "epoch": 0.69, + "grad_norm": 0.7181851863861084, + "learning_rate": 4.384732184241202e-06, + "loss": 2.0542, + "step": 20888 + }, + { + "epoch": 0.69, + "grad_norm": 0.7343102097511292, + "learning_rate": 4.383852716099616e-06, + "loss": 2.0714, + "step": 20889 + }, + { + "epoch": 0.7, + "grad_norm": 0.7368954420089722, + "learning_rate": 4.382973311405966e-06, + "loss": 2.0334, + "step": 20890 + }, + { + "epoch": 0.7, + "grad_norm": 0.7403631210327148, + "learning_rate": 4.382093970170192e-06, + "loss": 2.0019, + "step": 20891 + }, + { + "epoch": 0.7, + "grad_norm": 0.7525413036346436, + "learning_rate": 4.381214692402235e-06, + "loss": 2.0172, + "step": 20892 + }, + { + "epoch": 0.7, + "grad_norm": 0.7506888508796692, + "learning_rate": 4.380335478112014e-06, + "loss": 2.0014, + "step": 20893 + }, + { + "epoch": 0.7, + "grad_norm": 0.7323147654533386, + "learning_rate": 4.379456327309469e-06, + "loss": 2.1111, + "step": 20894 + }, + { + "epoch": 0.7, + "grad_norm": 0.7436090707778931, + "learning_rate": 4.378577240004537e-06, + "loss": 2.0652, + "step": 20895 + }, + { + "epoch": 0.7, + "grad_norm": 0.7276225686073303, + "learning_rate": 4.3776982162071435e-06, + "loss": 1.9943, + "step": 20896 + }, + { + "epoch": 0.7, + "grad_norm": 0.7257574200630188, + "learning_rate": 4.376819255927218e-06, + "loss": 2.0492, + "step": 20897 + }, + { + "epoch": 0.7, + "grad_norm": 0.7536762356758118, + "learning_rate": 4.375940359174697e-06, + "loss": 2.0704, + "step": 20898 + }, + { + "epoch": 0.7, + "grad_norm": 0.737755537033081, + "learning_rate": 4.375061525959501e-06, + "loss": 2.0504, + "step": 20899 + }, + { + "epoch": 0.7, + "grad_norm": 0.7262691855430603, + "learning_rate": 4.374182756291564e-06, + "loss": 2.0501, + "step": 20900 + }, + { + "epoch": 0.7, + "grad_norm": 0.7723524570465088, + "learning_rate": 4.373304050180816e-06, + "loss": 1.97, + "step": 20901 + }, + { + "epoch": 0.7, + "grad_norm": 0.7597225308418274, + "learning_rate": 4.372425407637183e-06, + "loss": 2.0688, + "step": 20902 + }, + { + "epoch": 0.7, + "grad_norm": 0.7424806356430054, + "learning_rate": 4.3715468286705854e-06, + "loss": 1.9986, + "step": 20903 + }, + { + "epoch": 0.7, + "grad_norm": 0.7479363679885864, + "learning_rate": 4.370668313290957e-06, + "loss": 2.0455, + "step": 20904 + }, + { + "epoch": 0.7, + "grad_norm": 0.7332838177680969, + "learning_rate": 4.369789861508218e-06, + "loss": 2.1273, + "step": 20905 + }, + { + "epoch": 0.7, + "grad_norm": 0.7419009208679199, + "learning_rate": 4.36891147333229e-06, + "loss": 2.045, + "step": 20906 + }, + { + "epoch": 0.7, + "grad_norm": 0.7403627038002014, + "learning_rate": 4.3680331487731e-06, + "loss": 2.0272, + "step": 20907 + }, + { + "epoch": 0.7, + "grad_norm": 0.7130307555198669, + "learning_rate": 4.367154887840574e-06, + "loss": 2.0137, + "step": 20908 + }, + { + "epoch": 0.7, + "grad_norm": 0.7361346483230591, + "learning_rate": 4.366276690544633e-06, + "loss": 2.0525, + "step": 20909 + }, + { + "epoch": 0.7, + "grad_norm": 0.7482061386108398, + "learning_rate": 4.365398556895191e-06, + "loss": 2.0731, + "step": 20910 + }, + { + "epoch": 0.7, + "grad_norm": 0.7448564171791077, + "learning_rate": 4.364520486902178e-06, + "loss": 2.0639, + "step": 20911 + }, + { + "epoch": 0.7, + "grad_norm": 0.7333919405937195, + "learning_rate": 4.3636424805755055e-06, + "loss": 1.8909, + "step": 20912 + }, + { + "epoch": 0.7, + "grad_norm": 0.7612888216972351, + "learning_rate": 4.362764537925097e-06, + "loss": 2.0235, + "step": 20913 + }, + { + "epoch": 0.7, + "grad_norm": 0.762174129486084, + "learning_rate": 4.361886658960879e-06, + "loss": 1.967, + "step": 20914 + }, + { + "epoch": 0.7, + "grad_norm": 0.7602876424789429, + "learning_rate": 4.361008843692755e-06, + "loss": 2.0764, + "step": 20915 + }, + { + "epoch": 0.7, + "grad_norm": 0.7404674887657166, + "learning_rate": 4.360131092130646e-06, + "loss": 2.0103, + "step": 20916 + }, + { + "epoch": 0.7, + "grad_norm": 0.7679657340049744, + "learning_rate": 4.359253404284476e-06, + "loss": 2.1127, + "step": 20917 + }, + { + "epoch": 0.7, + "grad_norm": 0.7320108413696289, + "learning_rate": 4.3583757801641544e-06, + "loss": 2.0087, + "step": 20918 + }, + { + "epoch": 0.7, + "grad_norm": 0.7394277453422546, + "learning_rate": 4.357498219779594e-06, + "loss": 2.048, + "step": 20919 + }, + { + "epoch": 0.7, + "grad_norm": 0.7317453622817993, + "learning_rate": 4.356620723140714e-06, + "loss": 2.0135, + "step": 20920 + }, + { + "epoch": 0.7, + "grad_norm": 0.7696622014045715, + "learning_rate": 4.355743290257424e-06, + "loss": 2.0668, + "step": 20921 + }, + { + "epoch": 0.7, + "grad_norm": 0.7574049830436707, + "learning_rate": 4.354865921139637e-06, + "loss": 2.0867, + "step": 20922 + }, + { + "epoch": 0.7, + "grad_norm": 0.7377979159355164, + "learning_rate": 4.35398861579727e-06, + "loss": 2.001, + "step": 20923 + }, + { + "epoch": 0.7, + "grad_norm": 0.735192596912384, + "learning_rate": 4.353111374240232e-06, + "loss": 2.0848, + "step": 20924 + }, + { + "epoch": 0.7, + "grad_norm": 0.7342488765716553, + "learning_rate": 4.3522341964784275e-06, + "loss": 1.9757, + "step": 20925 + }, + { + "epoch": 0.7, + "grad_norm": 0.7903199195861816, + "learning_rate": 4.351357082521775e-06, + "loss": 2.0419, + "step": 20926 + }, + { + "epoch": 0.7, + "grad_norm": 0.7413288354873657, + "learning_rate": 4.350480032380181e-06, + "loss": 1.9733, + "step": 20927 + }, + { + "epoch": 0.7, + "grad_norm": 0.7485882043838501, + "learning_rate": 4.349603046063547e-06, + "loss": 2.0269, + "step": 20928 + }, + { + "epoch": 0.7, + "grad_norm": 0.7932947278022766, + "learning_rate": 4.3487261235817875e-06, + "loss": 2.1098, + "step": 20929 + }, + { + "epoch": 0.7, + "grad_norm": 0.7317259311676025, + "learning_rate": 4.347849264944812e-06, + "loss": 2.0328, + "step": 20930 + }, + { + "epoch": 0.7, + "grad_norm": 0.7468205690383911, + "learning_rate": 4.34697247016252e-06, + "loss": 2.0634, + "step": 20931 + }, + { + "epoch": 0.7, + "grad_norm": 0.7577621340751648, + "learning_rate": 4.346095739244822e-06, + "loss": 2.0523, + "step": 20932 + }, + { + "epoch": 0.7, + "grad_norm": 0.7528536319732666, + "learning_rate": 4.345219072201622e-06, + "loss": 2.1111, + "step": 20933 + }, + { + "epoch": 0.7, + "grad_norm": 0.7423205375671387, + "learning_rate": 4.344342469042819e-06, + "loss": 2.0947, + "step": 20934 + }, + { + "epoch": 0.7, + "grad_norm": 0.7344153523445129, + "learning_rate": 4.34346592977832e-06, + "loss": 2.0887, + "step": 20935 + }, + { + "epoch": 0.7, + "grad_norm": 0.7375187873840332, + "learning_rate": 4.342589454418036e-06, + "loss": 2.0906, + "step": 20936 + }, + { + "epoch": 0.7, + "grad_norm": 0.7269762754440308, + "learning_rate": 4.3417130429718525e-06, + "loss": 2.013, + "step": 20937 + }, + { + "epoch": 0.7, + "grad_norm": 0.725002110004425, + "learning_rate": 4.340836695449679e-06, + "loss": 1.9872, + "step": 20938 + }, + { + "epoch": 0.7, + "grad_norm": 0.7511231303215027, + "learning_rate": 4.339960411861419e-06, + "loss": 2.0393, + "step": 20939 + }, + { + "epoch": 0.7, + "grad_norm": 0.7751635313034058, + "learning_rate": 4.33908419221697e-06, + "loss": 2.0242, + "step": 20940 + }, + { + "epoch": 0.7, + "grad_norm": 0.7222945690155029, + "learning_rate": 4.338208036526227e-06, + "loss": 2.013, + "step": 20941 + }, + { + "epoch": 0.7, + "grad_norm": 0.7517219185829163, + "learning_rate": 4.337331944799095e-06, + "loss": 2.0914, + "step": 20942 + }, + { + "epoch": 0.7, + "grad_norm": 0.7752137184143066, + "learning_rate": 4.336455917045464e-06, + "loss": 2.0267, + "step": 20943 + }, + { + "epoch": 0.7, + "grad_norm": 0.7595149278640747, + "learning_rate": 4.335579953275235e-06, + "loss": 2.0407, + "step": 20944 + }, + { + "epoch": 0.7, + "grad_norm": 0.7288411259651184, + "learning_rate": 4.334704053498307e-06, + "loss": 2.0686, + "step": 20945 + }, + { + "epoch": 0.7, + "grad_norm": 0.7827559113502502, + "learning_rate": 4.333828217724572e-06, + "loss": 2.0261, + "step": 20946 + }, + { + "epoch": 0.7, + "grad_norm": 0.7418181300163269, + "learning_rate": 4.3329524459639235e-06, + "loss": 2.0447, + "step": 20947 + }, + { + "epoch": 0.7, + "grad_norm": 0.7492141127586365, + "learning_rate": 4.33207673822626e-06, + "loss": 2.0065, + "step": 20948 + }, + { + "epoch": 0.7, + "grad_norm": 0.7948200702667236, + "learning_rate": 4.331201094521471e-06, + "loss": 2.016, + "step": 20949 + }, + { + "epoch": 0.7, + "grad_norm": 0.7476620674133301, + "learning_rate": 4.330325514859447e-06, + "loss": 2.0004, + "step": 20950 + }, + { + "epoch": 0.7, + "grad_norm": 0.7451117634773254, + "learning_rate": 4.329449999250082e-06, + "loss": 2.0602, + "step": 20951 + }, + { + "epoch": 0.7, + "grad_norm": 0.7352684140205383, + "learning_rate": 4.328574547703272e-06, + "loss": 2.0919, + "step": 20952 + }, + { + "epoch": 0.7, + "grad_norm": 0.738615870475769, + "learning_rate": 4.3276991602288975e-06, + "loss": 1.9731, + "step": 20953 + }, + { + "epoch": 0.7, + "grad_norm": 0.7358562350273132, + "learning_rate": 4.326823836836859e-06, + "loss": 2.0772, + "step": 20954 + }, + { + "epoch": 0.7, + "grad_norm": 0.7248356342315674, + "learning_rate": 4.325948577537039e-06, + "loss": 2.0315, + "step": 20955 + }, + { + "epoch": 0.7, + "grad_norm": 0.7414695620536804, + "learning_rate": 4.3250733823393245e-06, + "loss": 1.9947, + "step": 20956 + }, + { + "epoch": 0.7, + "grad_norm": 0.7431443929672241, + "learning_rate": 4.324198251253604e-06, + "loss": 2.1041, + "step": 20957 + }, + { + "epoch": 0.7, + "grad_norm": 0.7553936839103699, + "learning_rate": 4.3233231842897725e-06, + "loss": 2.0654, + "step": 20958 + }, + { + "epoch": 0.7, + "grad_norm": 0.7770218849182129, + "learning_rate": 4.3224481814577015e-06, + "loss": 2.0002, + "step": 20959 + }, + { + "epoch": 0.7, + "grad_norm": 0.755456805229187, + "learning_rate": 4.321573242767284e-06, + "loss": 2.0834, + "step": 20960 + }, + { + "epoch": 0.7, + "grad_norm": 0.755619466304779, + "learning_rate": 4.3206983682284075e-06, + "loss": 2.0519, + "step": 20961 + }, + { + "epoch": 0.7, + "grad_norm": 0.7442072033882141, + "learning_rate": 4.319823557850948e-06, + "loss": 2.082, + "step": 20962 + }, + { + "epoch": 0.7, + "grad_norm": 0.7534950375556946, + "learning_rate": 4.3189488116447975e-06, + "loss": 2.083, + "step": 20963 + }, + { + "epoch": 0.7, + "grad_norm": 0.7583304047584534, + "learning_rate": 4.318074129619835e-06, + "loss": 2.0234, + "step": 20964 + }, + { + "epoch": 0.7, + "grad_norm": 0.7556930184364319, + "learning_rate": 4.317199511785935e-06, + "loss": 2.0207, + "step": 20965 + }, + { + "epoch": 0.7, + "grad_norm": 0.7183598875999451, + "learning_rate": 4.316324958152987e-06, + "loss": 2.0972, + "step": 20966 + }, + { + "epoch": 0.7, + "grad_norm": 0.734172523021698, + "learning_rate": 4.315450468730871e-06, + "loss": 2.1143, + "step": 20967 + }, + { + "epoch": 0.7, + "grad_norm": 0.71507328748703, + "learning_rate": 4.314576043529464e-06, + "loss": 2.035, + "step": 20968 + }, + { + "epoch": 0.7, + "grad_norm": 0.7492097020149231, + "learning_rate": 4.313701682558641e-06, + "loss": 2.0405, + "step": 20969 + }, + { + "epoch": 0.7, + "grad_norm": 0.7467382550239563, + "learning_rate": 4.312827385828287e-06, + "loss": 2.1079, + "step": 20970 + }, + { + "epoch": 0.7, + "grad_norm": 0.7231432199478149, + "learning_rate": 4.311953153348278e-06, + "loss": 2.0342, + "step": 20971 + }, + { + "epoch": 0.7, + "grad_norm": 0.7084182500839233, + "learning_rate": 4.311078985128484e-06, + "loss": 2.0111, + "step": 20972 + }, + { + "epoch": 0.7, + "grad_norm": 0.7634757161140442, + "learning_rate": 4.310204881178787e-06, + "loss": 2.0548, + "step": 20973 + }, + { + "epoch": 0.7, + "grad_norm": 0.7556639313697815, + "learning_rate": 4.3093308415090625e-06, + "loss": 2.0337, + "step": 20974 + }, + { + "epoch": 0.7, + "grad_norm": 0.7499269843101501, + "learning_rate": 4.3084568661291805e-06, + "loss": 1.9524, + "step": 20975 + }, + { + "epoch": 0.7, + "grad_norm": 0.7183787226676941, + "learning_rate": 4.30758295504902e-06, + "loss": 2.1186, + "step": 20976 + }, + { + "epoch": 0.7, + "grad_norm": 0.7539067268371582, + "learning_rate": 4.306709108278452e-06, + "loss": 1.9414, + "step": 20977 + }, + { + "epoch": 0.7, + "grad_norm": 0.7588797807693481, + "learning_rate": 4.305835325827344e-06, + "loss": 2.1252, + "step": 20978 + }, + { + "epoch": 0.7, + "grad_norm": 0.7294199466705322, + "learning_rate": 4.304961607705571e-06, + "loss": 2.0471, + "step": 20979 + }, + { + "epoch": 0.7, + "grad_norm": 0.7335314750671387, + "learning_rate": 4.304087953923012e-06, + "loss": 2.0491, + "step": 20980 + }, + { + "epoch": 0.7, + "grad_norm": 0.7508732080459595, + "learning_rate": 4.303214364489522e-06, + "loss": 2.0798, + "step": 20981 + }, + { + "epoch": 0.7, + "grad_norm": 0.7473549842834473, + "learning_rate": 4.302340839414977e-06, + "loss": 2.0493, + "step": 20982 + }, + { + "epoch": 0.7, + "grad_norm": 0.7533383965492249, + "learning_rate": 4.30146737870925e-06, + "loss": 2.0773, + "step": 20983 + }, + { + "epoch": 0.7, + "grad_norm": 0.7534587979316711, + "learning_rate": 4.300593982382201e-06, + "loss": 2.0773, + "step": 20984 + }, + { + "epoch": 0.7, + "grad_norm": 0.7360287308692932, + "learning_rate": 4.299720650443705e-06, + "loss": 2.0046, + "step": 20985 + }, + { + "epoch": 0.7, + "grad_norm": 0.7410458922386169, + "learning_rate": 4.298847382903624e-06, + "loss": 2.0817, + "step": 20986 + }, + { + "epoch": 0.7, + "grad_norm": 0.758012056350708, + "learning_rate": 4.29797417977182e-06, + "loss": 2.0247, + "step": 20987 + }, + { + "epoch": 0.7, + "grad_norm": 0.7606417536735535, + "learning_rate": 4.297101041058163e-06, + "loss": 2.0312, + "step": 20988 + }, + { + "epoch": 0.7, + "grad_norm": 0.774811863899231, + "learning_rate": 4.296227966772519e-06, + "loss": 2.0086, + "step": 20989 + }, + { + "epoch": 0.7, + "grad_norm": 0.7648612856864929, + "learning_rate": 4.295354956924749e-06, + "loss": 2.1601, + "step": 20990 + }, + { + "epoch": 0.7, + "grad_norm": 0.7314696311950684, + "learning_rate": 4.294482011524712e-06, + "loss": 2.0637, + "step": 20991 + }, + { + "epoch": 0.7, + "grad_norm": 0.7665406465530396, + "learning_rate": 4.293609130582274e-06, + "loss": 2.0349, + "step": 20992 + }, + { + "epoch": 0.7, + "grad_norm": 0.7729514837265015, + "learning_rate": 4.2927363141072985e-06, + "loss": 2.1032, + "step": 20993 + }, + { + "epoch": 0.7, + "grad_norm": 0.792939305305481, + "learning_rate": 4.2918635621096434e-06, + "loss": 1.9774, + "step": 20994 + }, + { + "epoch": 0.7, + "grad_norm": 0.7295057773590088, + "learning_rate": 4.290990874599165e-06, + "loss": 2.1058, + "step": 20995 + }, + { + "epoch": 0.7, + "grad_norm": 0.7357131242752075, + "learning_rate": 4.29011825158573e-06, + "loss": 2.0721, + "step": 20996 + }, + { + "epoch": 0.7, + "grad_norm": 0.7390365600585938, + "learning_rate": 4.289245693079188e-06, + "loss": 2.0238, + "step": 20997 + }, + { + "epoch": 0.7, + "grad_norm": 0.730293869972229, + "learning_rate": 4.288373199089406e-06, + "loss": 2.0366, + "step": 20998 + }, + { + "epoch": 0.7, + "grad_norm": 0.7476906180381775, + "learning_rate": 4.287500769626236e-06, + "loss": 2.0088, + "step": 20999 + }, + { + "epoch": 0.7, + "grad_norm": 0.7538316249847412, + "learning_rate": 4.28662840469953e-06, + "loss": 2.0543, + "step": 21000 + }, + { + "epoch": 0.7, + "grad_norm": 0.7453371286392212, + "learning_rate": 4.285756104319149e-06, + "loss": 1.9689, + "step": 21001 + }, + { + "epoch": 0.7, + "grad_norm": 0.7979702949523926, + "learning_rate": 4.28488386849495e-06, + "loss": 2.0149, + "step": 21002 + }, + { + "epoch": 0.7, + "grad_norm": 0.7417769432067871, + "learning_rate": 4.2840116972367825e-06, + "loss": 2.0298, + "step": 21003 + }, + { + "epoch": 0.7, + "grad_norm": 0.7227742075920105, + "learning_rate": 4.2831395905544995e-06, + "loss": 2.0126, + "step": 21004 + }, + { + "epoch": 0.7, + "grad_norm": 0.7313569188117981, + "learning_rate": 4.282267548457957e-06, + "loss": 2.1004, + "step": 21005 + }, + { + "epoch": 0.7, + "grad_norm": 0.7342365384101868, + "learning_rate": 4.281395570957002e-06, + "loss": 2.0503, + "step": 21006 + }, + { + "epoch": 0.7, + "grad_norm": 0.7255306243896484, + "learning_rate": 4.280523658061492e-06, + "loss": 2.0229, + "step": 21007 + }, + { + "epoch": 0.7, + "grad_norm": 0.7710427641868591, + "learning_rate": 4.279651809781269e-06, + "loss": 2.041, + "step": 21008 + }, + { + "epoch": 0.7, + "grad_norm": 0.7418169975280762, + "learning_rate": 4.2787800261261924e-06, + "loss": 2.0617, + "step": 21009 + }, + { + "epoch": 0.7, + "grad_norm": 0.747134804725647, + "learning_rate": 4.277908307106101e-06, + "loss": 2.0209, + "step": 21010 + }, + { + "epoch": 0.7, + "grad_norm": 0.7242542505264282, + "learning_rate": 4.277036652730854e-06, + "loss": 1.9805, + "step": 21011 + }, + { + "epoch": 0.7, + "grad_norm": 0.7391896843910217, + "learning_rate": 4.276165063010291e-06, + "loss": 2.0791, + "step": 21012 + }, + { + "epoch": 0.7, + "grad_norm": 0.7502656579017639, + "learning_rate": 4.275293537954257e-06, + "loss": 1.9633, + "step": 21013 + }, + { + "epoch": 0.7, + "grad_norm": 0.7289907336235046, + "learning_rate": 4.274422077572602e-06, + "loss": 2.0852, + "step": 21014 + }, + { + "epoch": 0.7, + "grad_norm": 0.7822039127349854, + "learning_rate": 4.273550681875175e-06, + "loss": 2.0263, + "step": 21015 + }, + { + "epoch": 0.7, + "grad_norm": 0.7572738528251648, + "learning_rate": 4.272679350871816e-06, + "loss": 2.0733, + "step": 21016 + }, + { + "epoch": 0.7, + "grad_norm": 0.7437106966972351, + "learning_rate": 4.271808084572365e-06, + "loss": 1.9805, + "step": 21017 + }, + { + "epoch": 0.7, + "grad_norm": 0.7589899897575378, + "learning_rate": 4.270936882986674e-06, + "loss": 2.083, + "step": 21018 + }, + { + "epoch": 0.7, + "grad_norm": 0.7456165552139282, + "learning_rate": 4.2700657461245766e-06, + "loss": 2.1259, + "step": 21019 + }, + { + "epoch": 0.7, + "grad_norm": 0.7609027624130249, + "learning_rate": 4.269194673995921e-06, + "loss": 2.091, + "step": 21020 + }, + { + "epoch": 0.7, + "grad_norm": 0.7182235717773438, + "learning_rate": 4.268323666610547e-06, + "loss": 2.0344, + "step": 21021 + }, + { + "epoch": 0.7, + "grad_norm": 0.7459161877632141, + "learning_rate": 4.267452723978288e-06, + "loss": 2.0636, + "step": 21022 + }, + { + "epoch": 0.7, + "grad_norm": 0.7360116839408875, + "learning_rate": 4.266581846108989e-06, + "loss": 2.1076, + "step": 21023 + }, + { + "epoch": 0.7, + "grad_norm": 0.7476702332496643, + "learning_rate": 4.265711033012491e-06, + "loss": 2.0299, + "step": 21024 + }, + { + "epoch": 0.7, + "grad_norm": 0.7542983889579773, + "learning_rate": 4.2648402846986305e-06, + "loss": 1.9617, + "step": 21025 + }, + { + "epoch": 0.7, + "grad_norm": 0.7456952929496765, + "learning_rate": 4.2639696011772394e-06, + "loss": 2.0613, + "step": 21026 + }, + { + "epoch": 0.7, + "grad_norm": 0.7220632433891296, + "learning_rate": 4.263098982458162e-06, + "loss": 2.0514, + "step": 21027 + }, + { + "epoch": 0.7, + "grad_norm": 0.8018488883972168, + "learning_rate": 4.262228428551225e-06, + "loss": 2.1148, + "step": 21028 + }, + { + "epoch": 0.7, + "grad_norm": 0.736148476600647, + "learning_rate": 4.2613579394662726e-06, + "loss": 2.0039, + "step": 21029 + }, + { + "epoch": 0.7, + "grad_norm": 0.7318221926689148, + "learning_rate": 4.260487515213133e-06, + "loss": 2.0497, + "step": 21030 + }, + { + "epoch": 0.7, + "grad_norm": 0.7555590271949768, + "learning_rate": 4.259617155801644e-06, + "loss": 1.9626, + "step": 21031 + }, + { + "epoch": 0.7, + "grad_norm": 0.715420126914978, + "learning_rate": 4.258746861241633e-06, + "loss": 2.0491, + "step": 21032 + }, + { + "epoch": 0.7, + "grad_norm": 0.7699520587921143, + "learning_rate": 4.257876631542939e-06, + "loss": 2.0804, + "step": 21033 + }, + { + "epoch": 0.7, + "grad_norm": 0.7378990650177002, + "learning_rate": 4.25700646671539e-06, + "loss": 2.055, + "step": 21034 + }, + { + "epoch": 0.7, + "grad_norm": 0.746109664440155, + "learning_rate": 4.256136366768812e-06, + "loss": 2.0452, + "step": 21035 + }, + { + "epoch": 0.7, + "grad_norm": 0.7395492792129517, + "learning_rate": 4.255266331713038e-06, + "loss": 2.0725, + "step": 21036 + }, + { + "epoch": 0.7, + "grad_norm": 0.7483282685279846, + "learning_rate": 4.2543963615579035e-06, + "loss": 2.066, + "step": 21037 + }, + { + "epoch": 0.7, + "grad_norm": 0.7508336305618286, + "learning_rate": 4.2535264563132305e-06, + "loss": 1.9821, + "step": 21038 + }, + { + "epoch": 0.7, + "grad_norm": 0.7591149210929871, + "learning_rate": 4.252656615988845e-06, + "loss": 2.0181, + "step": 21039 + }, + { + "epoch": 0.7, + "grad_norm": 0.7492714524269104, + "learning_rate": 4.251786840594581e-06, + "loss": 2.1037, + "step": 21040 + }, + { + "epoch": 0.7, + "grad_norm": 0.7390636205673218, + "learning_rate": 4.250917130140256e-06, + "loss": 2.0334, + "step": 21041 + }, + { + "epoch": 0.7, + "grad_norm": 0.752388596534729, + "learning_rate": 4.250047484635703e-06, + "loss": 2.0624, + "step": 21042 + }, + { + "epoch": 0.7, + "grad_norm": 0.7288717031478882, + "learning_rate": 4.2491779040907446e-06, + "loss": 2.0611, + "step": 21043 + }, + { + "epoch": 0.7, + "grad_norm": 0.7594519853591919, + "learning_rate": 4.2483083885152e-06, + "loss": 2.0125, + "step": 21044 + }, + { + "epoch": 0.7, + "grad_norm": 0.7400230765342712, + "learning_rate": 4.247438937918897e-06, + "loss": 2.0037, + "step": 21045 + }, + { + "epoch": 0.7, + "grad_norm": 0.742692768573761, + "learning_rate": 4.2465695523116605e-06, + "loss": 2.0332, + "step": 21046 + }, + { + "epoch": 0.7, + "grad_norm": 0.7476955056190491, + "learning_rate": 4.245700231703309e-06, + "loss": 1.9941, + "step": 21047 + }, + { + "epoch": 0.7, + "grad_norm": 0.7290368676185608, + "learning_rate": 4.244830976103661e-06, + "loss": 2.0993, + "step": 21048 + }, + { + "epoch": 0.7, + "grad_norm": 0.7544967532157898, + "learning_rate": 4.243961785522543e-06, + "loss": 2.0124, + "step": 21049 + }, + { + "epoch": 0.7, + "grad_norm": 0.7738626599311829, + "learning_rate": 4.243092659969769e-06, + "loss": 2.0769, + "step": 21050 + }, + { + "epoch": 0.7, + "grad_norm": 0.7474047541618347, + "learning_rate": 4.242223599455163e-06, + "loss": 2.0246, + "step": 21051 + }, + { + "epoch": 0.7, + "grad_norm": 0.7523102760314941, + "learning_rate": 4.241354603988537e-06, + "loss": 2.0479, + "step": 21052 + }, + { + "epoch": 0.7, + "grad_norm": 0.7259365916252136, + "learning_rate": 4.240485673579714e-06, + "loss": 2.0748, + "step": 21053 + }, + { + "epoch": 0.7, + "grad_norm": 0.7227917313575745, + "learning_rate": 4.239616808238506e-06, + "loss": 2.1259, + "step": 21054 + }, + { + "epoch": 0.7, + "grad_norm": 0.7475734353065491, + "learning_rate": 4.238748007974734e-06, + "loss": 2.0844, + "step": 21055 + }, + { + "epoch": 0.7, + "grad_norm": 0.7138159871101379, + "learning_rate": 4.237879272798212e-06, + "loss": 1.9265, + "step": 21056 + }, + { + "epoch": 0.7, + "grad_norm": 0.774865448474884, + "learning_rate": 4.237010602718749e-06, + "loss": 2.0619, + "step": 21057 + }, + { + "epoch": 0.7, + "grad_norm": 0.7187455892562866, + "learning_rate": 4.236141997746163e-06, + "loss": 2.0425, + "step": 21058 + }, + { + "epoch": 0.7, + "grad_norm": 0.7787527441978455, + "learning_rate": 4.23527345789027e-06, + "loss": 2.0832, + "step": 21059 + }, + { + "epoch": 0.7, + "grad_norm": 0.7430287599563599, + "learning_rate": 4.234404983160879e-06, + "loss": 2.0817, + "step": 21060 + }, + { + "epoch": 0.7, + "grad_norm": 0.747592031955719, + "learning_rate": 4.233536573567798e-06, + "loss": 2.037, + "step": 21061 + }, + { + "epoch": 0.7, + "grad_norm": 0.7205986380577087, + "learning_rate": 4.232668229120845e-06, + "loss": 1.9729, + "step": 21062 + }, + { + "epoch": 0.7, + "grad_norm": 0.7626847624778748, + "learning_rate": 4.2317999498298225e-06, + "loss": 2.0343, + "step": 21063 + }, + { + "epoch": 0.7, + "grad_norm": 0.7446271181106567, + "learning_rate": 4.230931735704548e-06, + "loss": 2.0758, + "step": 21064 + }, + { + "epoch": 0.7, + "grad_norm": 0.738436222076416, + "learning_rate": 4.230063586754824e-06, + "loss": 1.9513, + "step": 21065 + }, + { + "epoch": 0.7, + "grad_norm": 0.7439196705818176, + "learning_rate": 4.229195502990459e-06, + "loss": 2.0742, + "step": 21066 + }, + { + "epoch": 0.7, + "grad_norm": 0.7966446876525879, + "learning_rate": 4.228327484421258e-06, + "loss": 2.033, + "step": 21067 + }, + { + "epoch": 0.7, + "grad_norm": 0.7459100484848022, + "learning_rate": 4.227459531057036e-06, + "loss": 2.0206, + "step": 21068 + }, + { + "epoch": 0.7, + "grad_norm": 0.7852141857147217, + "learning_rate": 4.2265916429075936e-06, + "loss": 2.1247, + "step": 21069 + }, + { + "epoch": 0.7, + "grad_norm": 0.7393693327903748, + "learning_rate": 4.225723819982732e-06, + "loss": 2.1023, + "step": 21070 + }, + { + "epoch": 0.7, + "grad_norm": 0.7960148453712463, + "learning_rate": 4.224856062292261e-06, + "loss": 2.0011, + "step": 21071 + }, + { + "epoch": 0.7, + "grad_norm": 0.7540220022201538, + "learning_rate": 4.2239883698459786e-06, + "loss": 2.0546, + "step": 21072 + }, + { + "epoch": 0.7, + "grad_norm": 0.7263895869255066, + "learning_rate": 4.223120742653694e-06, + "loss": 2.0958, + "step": 21073 + }, + { + "epoch": 0.7, + "grad_norm": 0.7181264758110046, + "learning_rate": 4.222253180725202e-06, + "loss": 2.0466, + "step": 21074 + }, + { + "epoch": 0.7, + "grad_norm": 0.7425655722618103, + "learning_rate": 4.2213856840703115e-06, + "loss": 2.0145, + "step": 21075 + }, + { + "epoch": 0.7, + "grad_norm": 0.7140820622444153, + "learning_rate": 4.220518252698814e-06, + "loss": 2.0511, + "step": 21076 + }, + { + "epoch": 0.7, + "grad_norm": 0.7371968626976013, + "learning_rate": 4.2196508866205155e-06, + "loss": 2.0699, + "step": 21077 + }, + { + "epoch": 0.7, + "grad_norm": 0.7523742914199829, + "learning_rate": 4.2187835858452205e-06, + "loss": 2.0723, + "step": 21078 + }, + { + "epoch": 0.7, + "grad_norm": 0.7941635847091675, + "learning_rate": 4.217916350382713e-06, + "loss": 2.0678, + "step": 21079 + }, + { + "epoch": 0.7, + "grad_norm": 0.7553759813308716, + "learning_rate": 4.217049180242798e-06, + "loss": 2.083, + "step": 21080 + }, + { + "epoch": 0.7, + "grad_norm": 0.7196733951568604, + "learning_rate": 4.2161820754352765e-06, + "loss": 2.1521, + "step": 21081 + }, + { + "epoch": 0.7, + "grad_norm": 0.7599328756332397, + "learning_rate": 4.2153150359699405e-06, + "loss": 2.0044, + "step": 21082 + }, + { + "epoch": 0.7, + "grad_norm": 0.7524970769882202, + "learning_rate": 4.2144480618565794e-06, + "loss": 2.0398, + "step": 21083 + }, + { + "epoch": 0.7, + "grad_norm": 0.766802966594696, + "learning_rate": 4.2135811531049985e-06, + "loss": 2.0346, + "step": 21084 + }, + { + "epoch": 0.7, + "grad_norm": 0.7712546586990356, + "learning_rate": 4.212714309724984e-06, + "loss": 2.0482, + "step": 21085 + }, + { + "epoch": 0.7, + "grad_norm": 0.7100131511688232, + "learning_rate": 4.211847531726333e-06, + "loss": 2.0459, + "step": 21086 + }, + { + "epoch": 0.7, + "grad_norm": 0.7697110176086426, + "learning_rate": 4.210980819118837e-06, + "loss": 2.0677, + "step": 21087 + }, + { + "epoch": 0.7, + "grad_norm": 0.7413848638534546, + "learning_rate": 4.210114171912284e-06, + "loss": 2.047, + "step": 21088 + }, + { + "epoch": 0.7, + "grad_norm": 0.757116973400116, + "learning_rate": 4.209247590116467e-06, + "loss": 2.0313, + "step": 21089 + }, + { + "epoch": 0.7, + "grad_norm": 0.741335928440094, + "learning_rate": 4.208381073741182e-06, + "loss": 2.0835, + "step": 21090 + }, + { + "epoch": 0.7, + "grad_norm": 0.7616726756095886, + "learning_rate": 4.2075146227962125e-06, + "loss": 2.0021, + "step": 21091 + }, + { + "epoch": 0.7, + "grad_norm": 0.7213118076324463, + "learning_rate": 4.2066482372913455e-06, + "loss": 1.9977, + "step": 21092 + }, + { + "epoch": 0.7, + "grad_norm": 0.723595917224884, + "learning_rate": 4.2057819172363705e-06, + "loss": 2.0951, + "step": 21093 + }, + { + "epoch": 0.7, + "grad_norm": 0.7553505897521973, + "learning_rate": 4.204915662641079e-06, + "loss": 2.0968, + "step": 21094 + }, + { + "epoch": 0.7, + "grad_norm": 0.7894681692123413, + "learning_rate": 4.2040494735152545e-06, + "loss": 2.0131, + "step": 21095 + }, + { + "epoch": 0.7, + "grad_norm": 0.7436649799346924, + "learning_rate": 4.203183349868678e-06, + "loss": 2.0492, + "step": 21096 + }, + { + "epoch": 0.7, + "grad_norm": 0.7550320625305176, + "learning_rate": 4.202317291711143e-06, + "loss": 1.998, + "step": 21097 + }, + { + "epoch": 0.7, + "grad_norm": 0.7144345641136169, + "learning_rate": 4.201451299052426e-06, + "loss": 1.9696, + "step": 21098 + }, + { + "epoch": 0.7, + "grad_norm": 0.7527051568031311, + "learning_rate": 4.200585371902313e-06, + "loss": 2.009, + "step": 21099 + }, + { + "epoch": 0.7, + "grad_norm": 0.7350705862045288, + "learning_rate": 4.199719510270597e-06, + "loss": 2.0169, + "step": 21100 + }, + { + "epoch": 0.7, + "grad_norm": 0.7215577960014343, + "learning_rate": 4.198853714167042e-06, + "loss": 2.0594, + "step": 21101 + }, + { + "epoch": 0.7, + "grad_norm": 0.7307222485542297, + "learning_rate": 4.197987983601438e-06, + "loss": 2.0066, + "step": 21102 + }, + { + "epoch": 0.7, + "grad_norm": 0.7510185241699219, + "learning_rate": 4.197122318583568e-06, + "loss": 2.0858, + "step": 21103 + }, + { + "epoch": 0.7, + "grad_norm": 0.7271878123283386, + "learning_rate": 4.196256719123212e-06, + "loss": 2.0209, + "step": 21104 + }, + { + "epoch": 0.7, + "grad_norm": 0.7476165294647217, + "learning_rate": 4.19539118523014e-06, + "loss": 2.0544, + "step": 21105 + }, + { + "epoch": 0.7, + "grad_norm": 0.7673901319503784, + "learning_rate": 4.194525716914142e-06, + "loss": 2.0345, + "step": 21106 + }, + { + "epoch": 0.7, + "grad_norm": 0.7536939978599548, + "learning_rate": 4.193660314184985e-06, + "loss": 1.9984, + "step": 21107 + }, + { + "epoch": 0.7, + "grad_norm": 0.7503591179847717, + "learning_rate": 4.1927949770524515e-06, + "loss": 2.0912, + "step": 21108 + }, + { + "epoch": 0.7, + "grad_norm": 0.7571287155151367, + "learning_rate": 4.191929705526321e-06, + "loss": 2.0141, + "step": 21109 + }, + { + "epoch": 0.7, + "grad_norm": 0.7579281330108643, + "learning_rate": 4.191064499616364e-06, + "loss": 2.0657, + "step": 21110 + }, + { + "epoch": 0.7, + "grad_norm": 0.7343004941940308, + "learning_rate": 4.190199359332353e-06, + "loss": 2.0356, + "step": 21111 + }, + { + "epoch": 0.7, + "grad_norm": 0.7436162233352661, + "learning_rate": 4.189334284684068e-06, + "loss": 2.0623, + "step": 21112 + }, + { + "epoch": 0.7, + "grad_norm": 0.7513119578361511, + "learning_rate": 4.18846927568128e-06, + "loss": 2.0481, + "step": 21113 + }, + { + "epoch": 0.7, + "grad_norm": 0.7383400797843933, + "learning_rate": 4.187604332333754e-06, + "loss": 2.0409, + "step": 21114 + }, + { + "epoch": 0.7, + "grad_norm": 0.7594037055969238, + "learning_rate": 4.18673945465127e-06, + "loss": 1.9551, + "step": 21115 + }, + { + "epoch": 0.7, + "grad_norm": 0.7496464848518372, + "learning_rate": 4.1858746426436e-06, + "loss": 2.0357, + "step": 21116 + }, + { + "epoch": 0.7, + "grad_norm": 0.7450153827667236, + "learning_rate": 4.185009896320511e-06, + "loss": 2.0899, + "step": 21117 + }, + { + "epoch": 0.7, + "grad_norm": 0.7222242951393127, + "learning_rate": 4.184145215691768e-06, + "loss": 2.0336, + "step": 21118 + }, + { + "epoch": 0.7, + "grad_norm": 0.7727676630020142, + "learning_rate": 4.183280600767148e-06, + "loss": 1.9743, + "step": 21119 + }, + { + "epoch": 0.7, + "grad_norm": 0.8211701512336731, + "learning_rate": 4.1824160515564116e-06, + "loss": 2.0295, + "step": 21120 + }, + { + "epoch": 0.7, + "grad_norm": 0.7615621089935303, + "learning_rate": 4.181551568069328e-06, + "loss": 2.1144, + "step": 21121 + }, + { + "epoch": 0.7, + "grad_norm": 0.7511119842529297, + "learning_rate": 4.180687150315673e-06, + "loss": 2.0675, + "step": 21122 + }, + { + "epoch": 0.7, + "grad_norm": 0.7202280759811401, + "learning_rate": 4.179822798305198e-06, + "loss": 2.0432, + "step": 21123 + }, + { + "epoch": 0.7, + "grad_norm": 0.7367507815361023, + "learning_rate": 4.1789585120476714e-06, + "loss": 2.1228, + "step": 21124 + }, + { + "epoch": 0.7, + "grad_norm": 0.7337611317634583, + "learning_rate": 4.178094291552866e-06, + "loss": 2.0564, + "step": 21125 + }, + { + "epoch": 0.7, + "grad_norm": 0.760400116443634, + "learning_rate": 4.177230136830538e-06, + "loss": 2.0925, + "step": 21126 + }, + { + "epoch": 0.7, + "grad_norm": 0.7509064674377441, + "learning_rate": 4.176366047890448e-06, + "loss": 2.1394, + "step": 21127 + }, + { + "epoch": 0.7, + "grad_norm": 0.7551696300506592, + "learning_rate": 4.175502024742365e-06, + "loss": 2.0501, + "step": 21128 + }, + { + "epoch": 0.7, + "grad_norm": 0.7799085974693298, + "learning_rate": 4.174638067396044e-06, + "loss": 2.065, + "step": 21129 + }, + { + "epoch": 0.7, + "grad_norm": 0.7300541400909424, + "learning_rate": 4.173774175861247e-06, + "loss": 2.0639, + "step": 21130 + }, + { + "epoch": 0.7, + "grad_norm": 0.7414801716804504, + "learning_rate": 4.172910350147739e-06, + "loss": 2.0452, + "step": 21131 + }, + { + "epoch": 0.7, + "grad_norm": 0.7696745991706848, + "learning_rate": 4.172046590265275e-06, + "loss": 2.094, + "step": 21132 + }, + { + "epoch": 0.7, + "grad_norm": 0.7716477513313293, + "learning_rate": 4.171182896223609e-06, + "loss": 2.1346, + "step": 21133 + }, + { + "epoch": 0.7, + "grad_norm": 0.7372884154319763, + "learning_rate": 4.170319268032506e-06, + "loss": 2.0353, + "step": 21134 + }, + { + "epoch": 0.7, + "grad_norm": 0.7364732027053833, + "learning_rate": 4.16945570570172e-06, + "loss": 2.0391, + "step": 21135 + }, + { + "epoch": 0.7, + "grad_norm": 0.7534252405166626, + "learning_rate": 4.168592209241002e-06, + "loss": 2.082, + "step": 21136 + }, + { + "epoch": 0.7, + "grad_norm": 0.7084001898765564, + "learning_rate": 4.167728778660113e-06, + "loss": 1.9833, + "step": 21137 + }, + { + "epoch": 0.7, + "grad_norm": 0.7450693249702454, + "learning_rate": 4.166865413968809e-06, + "loss": 2.0622, + "step": 21138 + }, + { + "epoch": 0.7, + "grad_norm": 0.7291602492332458, + "learning_rate": 4.166002115176837e-06, + "loss": 2.0135, + "step": 21139 + }, + { + "epoch": 0.7, + "grad_norm": 0.7351317405700684, + "learning_rate": 4.165138882293959e-06, + "loss": 2.0216, + "step": 21140 + }, + { + "epoch": 0.7, + "grad_norm": 0.7490224242210388, + "learning_rate": 4.16427571532992e-06, + "loss": 2.0443, + "step": 21141 + }, + { + "epoch": 0.7, + "grad_norm": 0.7626574635505676, + "learning_rate": 4.163412614294473e-06, + "loss": 2.0468, + "step": 21142 + }, + { + "epoch": 0.7, + "grad_norm": 0.7457489371299744, + "learning_rate": 4.162549579197368e-06, + "loss": 1.9997, + "step": 21143 + }, + { + "epoch": 0.7, + "grad_norm": 0.7390961647033691, + "learning_rate": 4.1616866100483646e-06, + "loss": 2.088, + "step": 21144 + }, + { + "epoch": 0.7, + "grad_norm": 0.7708448171615601, + "learning_rate": 4.160823706857197e-06, + "loss": 2.1157, + "step": 21145 + }, + { + "epoch": 0.7, + "grad_norm": 0.7340668439865112, + "learning_rate": 4.1599608696336215e-06, + "loss": 2.0811, + "step": 21146 + }, + { + "epoch": 0.7, + "grad_norm": 0.7487319111824036, + "learning_rate": 4.159098098387388e-06, + "loss": 2.0759, + "step": 21147 + }, + { + "epoch": 0.7, + "grad_norm": 0.7525543570518494, + "learning_rate": 4.158235393128242e-06, + "loss": 2.1056, + "step": 21148 + }, + { + "epoch": 0.7, + "grad_norm": 0.7721343040466309, + "learning_rate": 4.157372753865925e-06, + "loss": 2.0569, + "step": 21149 + }, + { + "epoch": 0.7, + "grad_norm": 0.711355984210968, + "learning_rate": 4.156510180610191e-06, + "loss": 2.0776, + "step": 21150 + }, + { + "epoch": 0.7, + "grad_norm": 0.7315240502357483, + "learning_rate": 4.155647673370775e-06, + "loss": 2.0584, + "step": 21151 + }, + { + "epoch": 0.7, + "grad_norm": 0.748462975025177, + "learning_rate": 4.154785232157428e-06, + "loss": 2.0153, + "step": 21152 + }, + { + "epoch": 0.7, + "grad_norm": 0.7599499821662903, + "learning_rate": 4.153922856979894e-06, + "loss": 2.0382, + "step": 21153 + }, + { + "epoch": 0.7, + "grad_norm": 0.7571646571159363, + "learning_rate": 4.153060547847915e-06, + "loss": 1.9976, + "step": 21154 + }, + { + "epoch": 0.7, + "grad_norm": 0.7218501567840576, + "learning_rate": 4.152198304771226e-06, + "loss": 2.0599, + "step": 21155 + }, + { + "epoch": 0.7, + "grad_norm": 0.7356863021850586, + "learning_rate": 4.1513361277595775e-06, + "loss": 2.0344, + "step": 21156 + }, + { + "epoch": 0.7, + "grad_norm": 0.78043133020401, + "learning_rate": 4.150474016822706e-06, + "loss": 2.1459, + "step": 21157 + }, + { + "epoch": 0.7, + "grad_norm": 0.7683544754981995, + "learning_rate": 4.149611971970348e-06, + "loss": 2.0762, + "step": 21158 + }, + { + "epoch": 0.7, + "grad_norm": 0.8066489100456238, + "learning_rate": 4.148749993212245e-06, + "loss": 2.0565, + "step": 21159 + }, + { + "epoch": 0.7, + "grad_norm": 0.7338630557060242, + "learning_rate": 4.147888080558139e-06, + "loss": 2.012, + "step": 21160 + }, + { + "epoch": 0.7, + "grad_norm": 0.7711341381072998, + "learning_rate": 4.147026234017759e-06, + "loss": 2.0805, + "step": 21161 + }, + { + "epoch": 0.7, + "grad_norm": 0.7382785081863403, + "learning_rate": 4.146164453600851e-06, + "loss": 2.1114, + "step": 21162 + }, + { + "epoch": 0.7, + "grad_norm": 0.7462503910064697, + "learning_rate": 4.145302739317147e-06, + "loss": 2.0668, + "step": 21163 + }, + { + "epoch": 0.7, + "grad_norm": 0.7692015171051025, + "learning_rate": 4.1444410911763766e-06, + "loss": 2.0866, + "step": 21164 + }, + { + "epoch": 0.7, + "grad_norm": 0.7501469254493713, + "learning_rate": 4.14357950918828e-06, + "loss": 2.1324, + "step": 21165 + }, + { + "epoch": 0.7, + "grad_norm": 0.7722364664077759, + "learning_rate": 4.142717993362596e-06, + "loss": 2.0301, + "step": 21166 + }, + { + "epoch": 0.7, + "grad_norm": 0.7971662282943726, + "learning_rate": 4.141856543709045e-06, + "loss": 2.0892, + "step": 21167 + }, + { + "epoch": 0.7, + "grad_norm": 0.749788224697113, + "learning_rate": 4.140995160237366e-06, + "loss": 2.0432, + "step": 21168 + }, + { + "epoch": 0.7, + "grad_norm": 0.7396644949913025, + "learning_rate": 4.140133842957292e-06, + "loss": 2.0979, + "step": 21169 + }, + { + "epoch": 0.7, + "grad_norm": 0.7401080131530762, + "learning_rate": 4.139272591878553e-06, + "loss": 2.1058, + "step": 21170 + }, + { + "epoch": 0.7, + "grad_norm": 0.7734871506690979, + "learning_rate": 4.138411407010874e-06, + "loss": 2.0313, + "step": 21171 + }, + { + "epoch": 0.7, + "grad_norm": 0.7337951064109802, + "learning_rate": 4.1375502883639905e-06, + "loss": 2.0812, + "step": 21172 + }, + { + "epoch": 0.7, + "grad_norm": 0.7274264693260193, + "learning_rate": 4.1366892359476255e-06, + "loss": 2.0601, + "step": 21173 + }, + { + "epoch": 0.7, + "grad_norm": 0.7199066877365112, + "learning_rate": 4.135828249771509e-06, + "loss": 2.0544, + "step": 21174 + }, + { + "epoch": 0.7, + "grad_norm": 0.7285482883453369, + "learning_rate": 4.134967329845371e-06, + "loss": 1.9976, + "step": 21175 + }, + { + "epoch": 0.7, + "grad_norm": 0.7522498965263367, + "learning_rate": 4.134106476178935e-06, + "loss": 2.1344, + "step": 21176 + }, + { + "epoch": 0.7, + "grad_norm": 0.7349876761436462, + "learning_rate": 4.133245688781923e-06, + "loss": 2.0672, + "step": 21177 + }, + { + "epoch": 0.7, + "grad_norm": 0.7467001676559448, + "learning_rate": 4.132384967664063e-06, + "loss": 2.0416, + "step": 21178 + }, + { + "epoch": 0.7, + "grad_norm": 0.7529169321060181, + "learning_rate": 4.131524312835086e-06, + "loss": 2.0785, + "step": 21179 + }, + { + "epoch": 0.7, + "grad_norm": 0.7465900182723999, + "learning_rate": 4.130663724304701e-06, + "loss": 2.0765, + "step": 21180 + }, + { + "epoch": 0.7, + "grad_norm": 0.7788456678390503, + "learning_rate": 4.129803202082638e-06, + "loss": 1.976, + "step": 21181 + }, + { + "epoch": 0.7, + "grad_norm": 0.7323268055915833, + "learning_rate": 4.12894274617862e-06, + "loss": 2.0643, + "step": 21182 + }, + { + "epoch": 0.7, + "grad_norm": 0.759446382522583, + "learning_rate": 4.128082356602364e-06, + "loss": 2.0903, + "step": 21183 + }, + { + "epoch": 0.7, + "grad_norm": 0.740777313709259, + "learning_rate": 4.127222033363596e-06, + "loss": 2.1029, + "step": 21184 + }, + { + "epoch": 0.7, + "grad_norm": 0.7321988344192505, + "learning_rate": 4.1263617764720305e-06, + "loss": 2.109, + "step": 21185 + }, + { + "epoch": 0.7, + "grad_norm": 0.7759171724319458, + "learning_rate": 4.125501585937385e-06, + "loss": 2.0956, + "step": 21186 + }, + { + "epoch": 0.7, + "grad_norm": 0.7270573973655701, + "learning_rate": 4.1246414617693785e-06, + "loss": 2.0221, + "step": 21187 + }, + { + "epoch": 0.7, + "grad_norm": 0.7446803450584412, + "learning_rate": 4.123781403977737e-06, + "loss": 2.0967, + "step": 21188 + }, + { + "epoch": 0.7, + "grad_norm": 0.7315859198570251, + "learning_rate": 4.122921412572163e-06, + "loss": 1.9801, + "step": 21189 + }, + { + "epoch": 0.7, + "grad_norm": 0.7851159572601318, + "learning_rate": 4.122061487562378e-06, + "loss": 2.1115, + "step": 21190 + }, + { + "epoch": 0.71, + "grad_norm": 0.7572748064994812, + "learning_rate": 4.121201628958101e-06, + "loss": 2.042, + "step": 21191 + }, + { + "epoch": 0.71, + "grad_norm": 0.8017840385437012, + "learning_rate": 4.12034183676904e-06, + "loss": 2.0625, + "step": 21192 + }, + { + "epoch": 0.71, + "grad_norm": 0.7449313402175903, + "learning_rate": 4.119482111004913e-06, + "loss": 2.056, + "step": 21193 + }, + { + "epoch": 0.71, + "grad_norm": 0.7714320421218872, + "learning_rate": 4.118622451675428e-06, + "loss": 2.0491, + "step": 21194 + }, + { + "epoch": 0.71, + "grad_norm": 0.7843227386474609, + "learning_rate": 4.117762858790304e-06, + "loss": 2.0139, + "step": 21195 + }, + { + "epoch": 0.71, + "grad_norm": 0.7438114285469055, + "learning_rate": 4.116903332359243e-06, + "loss": 2.078, + "step": 21196 + }, + { + "epoch": 0.71, + "grad_norm": 0.7246606945991516, + "learning_rate": 4.116043872391966e-06, + "loss": 2.0367, + "step": 21197 + }, + { + "epoch": 0.71, + "grad_norm": 0.7526752352714539, + "learning_rate": 4.115184478898176e-06, + "loss": 2.0745, + "step": 21198 + }, + { + "epoch": 0.71, + "grad_norm": 0.7319991588592529, + "learning_rate": 4.114325151887578e-06, + "loss": 2.0645, + "step": 21199 + }, + { + "epoch": 0.71, + "grad_norm": 0.7308666110038757, + "learning_rate": 4.113465891369886e-06, + "loss": 2.0629, + "step": 21200 + }, + { + "epoch": 0.71, + "grad_norm": 0.742752194404602, + "learning_rate": 4.112606697354814e-06, + "loss": 2.045, + "step": 21201 + }, + { + "epoch": 0.71, + "grad_norm": 0.724912703037262, + "learning_rate": 4.111747569852053e-06, + "loss": 2.0269, + "step": 21202 + }, + { + "epoch": 0.71, + "grad_norm": 0.7503074407577515, + "learning_rate": 4.110888508871319e-06, + "loss": 2.1369, + "step": 21203 + }, + { + "epoch": 0.71, + "grad_norm": 0.7225751876831055, + "learning_rate": 4.110029514422318e-06, + "loss": 2.0305, + "step": 21204 + }, + { + "epoch": 0.71, + "grad_norm": 0.7492079734802246, + "learning_rate": 4.109170586514747e-06, + "loss": 2.0346, + "step": 21205 + }, + { + "epoch": 0.71, + "grad_norm": 0.7500308752059937, + "learning_rate": 4.108311725158319e-06, + "loss": 1.9794, + "step": 21206 + }, + { + "epoch": 0.71, + "grad_norm": 0.8019648790359497, + "learning_rate": 4.107452930362732e-06, + "loss": 1.995, + "step": 21207 + }, + { + "epoch": 0.71, + "grad_norm": 0.7460503578186035, + "learning_rate": 4.106594202137685e-06, + "loss": 2.0129, + "step": 21208 + }, + { + "epoch": 0.71, + "grad_norm": 0.7221418619155884, + "learning_rate": 4.105735540492883e-06, + "loss": 2.0176, + "step": 21209 + }, + { + "epoch": 0.71, + "grad_norm": 0.7298194169998169, + "learning_rate": 4.10487694543803e-06, + "loss": 2.1055, + "step": 21210 + }, + { + "epoch": 0.71, + "grad_norm": 0.7203308343887329, + "learning_rate": 4.1040184169828215e-06, + "loss": 2.0188, + "step": 21211 + }, + { + "epoch": 0.71, + "grad_norm": 0.7507845163345337, + "learning_rate": 4.103159955136955e-06, + "loss": 2.0439, + "step": 21212 + }, + { + "epoch": 0.71, + "grad_norm": 0.7286096811294556, + "learning_rate": 4.102301559910134e-06, + "loss": 2.066, + "step": 21213 + }, + { + "epoch": 0.71, + "grad_norm": 0.7619630098342896, + "learning_rate": 4.101443231312051e-06, + "loss": 2.061, + "step": 21214 + }, + { + "epoch": 0.71, + "grad_norm": 0.7443332076072693, + "learning_rate": 4.100584969352409e-06, + "loss": 2.0133, + "step": 21215 + }, + { + "epoch": 0.71, + "grad_norm": 0.7614389061927795, + "learning_rate": 4.099726774040896e-06, + "loss": 2.0917, + "step": 21216 + }, + { + "epoch": 0.71, + "grad_norm": 0.7440372705459595, + "learning_rate": 4.098868645387217e-06, + "loss": 2.0098, + "step": 21217 + }, + { + "epoch": 0.71, + "grad_norm": 0.7518798112869263, + "learning_rate": 4.098010583401058e-06, + "loss": 2.0827, + "step": 21218 + }, + { + "epoch": 0.71, + "grad_norm": 0.7577544450759888, + "learning_rate": 4.097152588092119e-06, + "loss": 2.0792, + "step": 21219 + }, + { + "epoch": 0.71, + "grad_norm": 0.7271602749824524, + "learning_rate": 4.096294659470092e-06, + "loss": 2.0068, + "step": 21220 + }, + { + "epoch": 0.71, + "grad_norm": 0.7386311292648315, + "learning_rate": 4.095436797544663e-06, + "loss": 2.046, + "step": 21221 + }, + { + "epoch": 0.71, + "grad_norm": 0.7639943361282349, + "learning_rate": 4.094579002325528e-06, + "loss": 2.0461, + "step": 21222 + }, + { + "epoch": 0.71, + "grad_norm": 0.7812091708183289, + "learning_rate": 4.093721273822384e-06, + "loss": 2.0419, + "step": 21223 + }, + { + "epoch": 0.71, + "grad_norm": 0.7445766925811768, + "learning_rate": 4.092863612044915e-06, + "loss": 2.0324, + "step": 21224 + }, + { + "epoch": 0.71, + "grad_norm": 0.7536240816116333, + "learning_rate": 4.092006017002807e-06, + "loss": 2.1078, + "step": 21225 + }, + { + "epoch": 0.71, + "grad_norm": 0.7600485682487488, + "learning_rate": 4.091148488705757e-06, + "loss": 2.0239, + "step": 21226 + }, + { + "epoch": 0.71, + "grad_norm": 0.736775279045105, + "learning_rate": 4.0902910271634445e-06, + "loss": 1.9806, + "step": 21227 + }, + { + "epoch": 0.71, + "grad_norm": 0.7672955989837646, + "learning_rate": 4.0894336323855645e-06, + "loss": 2.0263, + "step": 21228 + }, + { + "epoch": 0.71, + "grad_norm": 0.7606447339057922, + "learning_rate": 4.088576304381798e-06, + "loss": 2.0031, + "step": 21229 + }, + { + "epoch": 0.71, + "grad_norm": 0.7457943558692932, + "learning_rate": 4.0877190431618295e-06, + "loss": 2.0025, + "step": 21230 + }, + { + "epoch": 0.71, + "grad_norm": 0.7377336621284485, + "learning_rate": 4.086861848735346e-06, + "loss": 2.1082, + "step": 21231 + }, + { + "epoch": 0.71, + "grad_norm": 0.7360185384750366, + "learning_rate": 4.086004721112035e-06, + "loss": 2.0282, + "step": 21232 + }, + { + "epoch": 0.71, + "grad_norm": 0.756420373916626, + "learning_rate": 4.085147660301578e-06, + "loss": 2.0075, + "step": 21233 + }, + { + "epoch": 0.71, + "grad_norm": 0.7417958378791809, + "learning_rate": 4.084290666313653e-06, + "loss": 2.0223, + "step": 21234 + }, + { + "epoch": 0.71, + "grad_norm": 0.7650126814842224, + "learning_rate": 4.083433739157947e-06, + "loss": 2.1059, + "step": 21235 + }, + { + "epoch": 0.71, + "grad_norm": 0.7502461075782776, + "learning_rate": 4.082576878844137e-06, + "loss": 2.0638, + "step": 21236 + }, + { + "epoch": 0.71, + "grad_norm": 0.7931260466575623, + "learning_rate": 4.081720085381909e-06, + "loss": 2.1172, + "step": 21237 + }, + { + "epoch": 0.71, + "grad_norm": 0.781365156173706, + "learning_rate": 4.0808633587809335e-06, + "loss": 2.1195, + "step": 21238 + }, + { + "epoch": 0.71, + "grad_norm": 0.7687897682189941, + "learning_rate": 4.0800066990509005e-06, + "loss": 2.0745, + "step": 21239 + }, + { + "epoch": 0.71, + "grad_norm": 0.7438607215881348, + "learning_rate": 4.079150106201477e-06, + "loss": 1.9598, + "step": 21240 + }, + { + "epoch": 0.71, + "grad_norm": 0.7564340829849243, + "learning_rate": 4.078293580242351e-06, + "loss": 2.0705, + "step": 21241 + }, + { + "epoch": 0.71, + "grad_norm": 0.7360088229179382, + "learning_rate": 4.077437121183192e-06, + "loss": 2.0594, + "step": 21242 + }, + { + "epoch": 0.71, + "grad_norm": 0.7288684248924255, + "learning_rate": 4.0765807290336754e-06, + "loss": 1.9965, + "step": 21243 + }, + { + "epoch": 0.71, + "grad_norm": 0.7138208746910095, + "learning_rate": 4.075724403803477e-06, + "loss": 2.0128, + "step": 21244 + }, + { + "epoch": 0.71, + "grad_norm": 0.7765095829963684, + "learning_rate": 4.074868145502277e-06, + "loss": 2.0956, + "step": 21245 + }, + { + "epoch": 0.71, + "grad_norm": 0.7369911074638367, + "learning_rate": 4.074011954139744e-06, + "loss": 2.086, + "step": 21246 + }, + { + "epoch": 0.71, + "grad_norm": 0.7661831974983215, + "learning_rate": 4.073155829725547e-06, + "loss": 2.0805, + "step": 21247 + }, + { + "epoch": 0.71, + "grad_norm": 0.7349428534507751, + "learning_rate": 4.072299772269366e-06, + "loss": 2.0712, + "step": 21248 + }, + { + "epoch": 0.71, + "grad_norm": 0.7489886283874512, + "learning_rate": 4.0714437817808636e-06, + "loss": 1.9968, + "step": 21249 + }, + { + "epoch": 0.71, + "grad_norm": 0.744309663772583, + "learning_rate": 4.070587858269719e-06, + "loss": 2.0418, + "step": 21250 + }, + { + "epoch": 0.71, + "grad_norm": 0.7691968083381653, + "learning_rate": 4.069732001745599e-06, + "loss": 1.9849, + "step": 21251 + }, + { + "epoch": 0.71, + "grad_norm": 0.7161206007003784, + "learning_rate": 4.068876212218166e-06, + "loss": 2.0442, + "step": 21252 + }, + { + "epoch": 0.71, + "grad_norm": 0.7284473180770874, + "learning_rate": 4.0680204896970945e-06, + "loss": 2.057, + "step": 21253 + }, + { + "epoch": 0.71, + "grad_norm": 0.7282404899597168, + "learning_rate": 4.0671648341920545e-06, + "loss": 2.0709, + "step": 21254 + }, + { + "epoch": 0.71, + "grad_norm": 0.7428138256072998, + "learning_rate": 4.066309245712709e-06, + "loss": 2.0123, + "step": 21255 + }, + { + "epoch": 0.71, + "grad_norm": 0.7475786209106445, + "learning_rate": 4.065453724268721e-06, + "loss": 2.1857, + "step": 21256 + }, + { + "epoch": 0.71, + "grad_norm": 0.7161439061164856, + "learning_rate": 4.064598269869762e-06, + "loss": 2.0459, + "step": 21257 + }, + { + "epoch": 0.71, + "grad_norm": 0.7482534050941467, + "learning_rate": 4.06374288252549e-06, + "loss": 2.0383, + "step": 21258 + }, + { + "epoch": 0.71, + "grad_norm": 0.7543566823005676, + "learning_rate": 4.062887562245574e-06, + "loss": 2.0576, + "step": 21259 + }, + { + "epoch": 0.71, + "grad_norm": 0.7574505805969238, + "learning_rate": 4.062032309039673e-06, + "loss": 2.0545, + "step": 21260 + }, + { + "epoch": 0.71, + "grad_norm": 0.7527257204055786, + "learning_rate": 4.061177122917454e-06, + "loss": 2.0634, + "step": 21261 + }, + { + "epoch": 0.71, + "grad_norm": 0.7766549587249756, + "learning_rate": 4.0603220038885725e-06, + "loss": 2.0742, + "step": 21262 + }, + { + "epoch": 0.71, + "grad_norm": 0.7360628247261047, + "learning_rate": 4.059466951962695e-06, + "loss": 2.0318, + "step": 21263 + }, + { + "epoch": 0.71, + "grad_norm": 0.7445189356803894, + "learning_rate": 4.058611967149479e-06, + "loss": 1.974, + "step": 21264 + }, + { + "epoch": 0.71, + "grad_norm": 0.7440265417098999, + "learning_rate": 4.0577570494585784e-06, + "loss": 2.0367, + "step": 21265 + }, + { + "epoch": 0.71, + "grad_norm": 0.7752862572669983, + "learning_rate": 4.056902198899656e-06, + "loss": 2.03, + "step": 21266 + }, + { + "epoch": 0.71, + "grad_norm": 0.7330976724624634, + "learning_rate": 4.056047415482374e-06, + "loss": 2.0709, + "step": 21267 + }, + { + "epoch": 0.71, + "grad_norm": 0.7066649198532104, + "learning_rate": 4.055192699216385e-06, + "loss": 2.0486, + "step": 21268 + }, + { + "epoch": 0.71, + "grad_norm": 0.7284162640571594, + "learning_rate": 4.054338050111341e-06, + "loss": 2.0204, + "step": 21269 + }, + { + "epoch": 0.71, + "grad_norm": 0.7552085518836975, + "learning_rate": 4.0534834681769045e-06, + "loss": 2.097, + "step": 21270 + }, + { + "epoch": 0.71, + "grad_norm": 0.7341477274894714, + "learning_rate": 4.052628953422722e-06, + "loss": 2.0481, + "step": 21271 + }, + { + "epoch": 0.71, + "grad_norm": 0.757088840007782, + "learning_rate": 4.051774505858458e-06, + "loss": 1.9901, + "step": 21272 + }, + { + "epoch": 0.71, + "grad_norm": 0.7234058380126953, + "learning_rate": 4.050920125493758e-06, + "loss": 1.9827, + "step": 21273 + }, + { + "epoch": 0.71, + "grad_norm": 0.7285860776901245, + "learning_rate": 4.050065812338273e-06, + "loss": 2.0084, + "step": 21274 + }, + { + "epoch": 0.71, + "grad_norm": 0.7341120839118958, + "learning_rate": 4.049211566401657e-06, + "loss": 2.0302, + "step": 21275 + }, + { + "epoch": 0.71, + "grad_norm": 0.7501924633979797, + "learning_rate": 4.048357387693566e-06, + "loss": 2.0675, + "step": 21276 + }, + { + "epoch": 0.71, + "grad_norm": 0.7225576639175415, + "learning_rate": 4.047503276223644e-06, + "loss": 1.9988, + "step": 21277 + }, + { + "epoch": 0.71, + "grad_norm": 0.71672523021698, + "learning_rate": 4.0466492320015384e-06, + "loss": 2.0426, + "step": 21278 + }, + { + "epoch": 0.71, + "grad_norm": 0.7027603983879089, + "learning_rate": 4.045795255036901e-06, + "loss": 2.0373, + "step": 21279 + }, + { + "epoch": 0.71, + "grad_norm": 0.7571180462837219, + "learning_rate": 4.044941345339383e-06, + "loss": 2.0997, + "step": 21280 + }, + { + "epoch": 0.71, + "grad_norm": 0.7571749091148376, + "learning_rate": 4.0440875029186264e-06, + "loss": 2.0474, + "step": 21281 + }, + { + "epoch": 0.71, + "grad_norm": 0.7222186326980591, + "learning_rate": 4.043233727784276e-06, + "loss": 1.9903, + "step": 21282 + }, + { + "epoch": 0.71, + "grad_norm": 0.757353663444519, + "learning_rate": 4.042380019945984e-06, + "loss": 2.0782, + "step": 21283 + }, + { + "epoch": 0.71, + "grad_norm": 0.7647561430931091, + "learning_rate": 4.041526379413386e-06, + "loss": 2.0216, + "step": 21284 + }, + { + "epoch": 0.71, + "grad_norm": 0.7630417346954346, + "learning_rate": 4.040672806196132e-06, + "loss": 2.0544, + "step": 21285 + }, + { + "epoch": 0.71, + "grad_norm": 0.7682597637176514, + "learning_rate": 4.039819300303871e-06, + "loss": 2.0262, + "step": 21286 + }, + { + "epoch": 0.71, + "grad_norm": 0.7500863671302795, + "learning_rate": 4.038965861746231e-06, + "loss": 2.1353, + "step": 21287 + }, + { + "epoch": 0.71, + "grad_norm": 0.7579953670501709, + "learning_rate": 4.038112490532863e-06, + "loss": 2.1042, + "step": 21288 + }, + { + "epoch": 0.71, + "grad_norm": 0.7646166086196899, + "learning_rate": 4.0372591866734075e-06, + "loss": 2.0496, + "step": 21289 + }, + { + "epoch": 0.71, + "grad_norm": 0.7469083666801453, + "learning_rate": 4.036405950177504e-06, + "loss": 2.0748, + "step": 21290 + }, + { + "epoch": 0.71, + "grad_norm": 0.7607119083404541, + "learning_rate": 4.035552781054788e-06, + "loss": 2.0531, + "step": 21291 + }, + { + "epoch": 0.71, + "grad_norm": 0.7517334222793579, + "learning_rate": 4.034699679314904e-06, + "loss": 2.0797, + "step": 21292 + }, + { + "epoch": 0.71, + "grad_norm": 0.7112447023391724, + "learning_rate": 4.033846644967484e-06, + "loss": 1.998, + "step": 21293 + }, + { + "epoch": 0.71, + "grad_norm": 0.7686113119125366, + "learning_rate": 4.032993678022171e-06, + "loss": 2.1176, + "step": 21294 + }, + { + "epoch": 0.71, + "grad_norm": 0.7528063058853149, + "learning_rate": 4.032140778488596e-06, + "loss": 1.9898, + "step": 21295 + }, + { + "epoch": 0.71, + "grad_norm": 0.7219647169113159, + "learning_rate": 4.0312879463764e-06, + "loss": 2.0548, + "step": 21296 + }, + { + "epoch": 0.71, + "grad_norm": 0.7587942481040955, + "learning_rate": 4.03043518169521e-06, + "loss": 2.0857, + "step": 21297 + }, + { + "epoch": 0.71, + "grad_norm": 0.7651197910308838, + "learning_rate": 4.029582484454669e-06, + "loss": 2.0726, + "step": 21298 + }, + { + "epoch": 0.71, + "grad_norm": 0.7514709234237671, + "learning_rate": 4.028729854664407e-06, + "loss": 2.1182, + "step": 21299 + }, + { + "epoch": 0.71, + "grad_norm": 0.758703351020813, + "learning_rate": 4.027877292334051e-06, + "loss": 2.0518, + "step": 21300 + }, + { + "epoch": 0.71, + "grad_norm": 0.7077468037605286, + "learning_rate": 4.027024797473239e-06, + "loss": 2.0566, + "step": 21301 + }, + { + "epoch": 0.71, + "grad_norm": 0.7395803928375244, + "learning_rate": 4.026172370091602e-06, + "loss": 2.0151, + "step": 21302 + }, + { + "epoch": 0.71, + "grad_norm": 0.7239998579025269, + "learning_rate": 4.02532001019877e-06, + "loss": 2.0472, + "step": 21303 + }, + { + "epoch": 0.71, + "grad_norm": 0.7565023899078369, + "learning_rate": 4.024467717804367e-06, + "loss": 2.0653, + "step": 21304 + }, + { + "epoch": 0.71, + "grad_norm": 0.7333878874778748, + "learning_rate": 4.0236154929180285e-06, + "loss": 2.1081, + "step": 21305 + }, + { + "epoch": 0.71, + "grad_norm": 0.7882394790649414, + "learning_rate": 4.022763335549377e-06, + "loss": 1.9773, + "step": 21306 + }, + { + "epoch": 0.71, + "grad_norm": 0.7473047375679016, + "learning_rate": 4.021911245708041e-06, + "loss": 1.991, + "step": 21307 + }, + { + "epoch": 0.71, + "grad_norm": 0.7568400502204895, + "learning_rate": 4.0210592234036564e-06, + "loss": 2.0985, + "step": 21308 + }, + { + "epoch": 0.71, + "grad_norm": 0.7377325892448425, + "learning_rate": 4.0202072686458336e-06, + "loss": 2.035, + "step": 21309 + }, + { + "epoch": 0.71, + "grad_norm": 0.7409537434577942, + "learning_rate": 4.019355381444204e-06, + "loss": 2.0519, + "step": 21310 + }, + { + "epoch": 0.71, + "grad_norm": 0.7115760445594788, + "learning_rate": 4.018503561808397e-06, + "loss": 2.0268, + "step": 21311 + }, + { + "epoch": 0.71, + "grad_norm": 0.7857978343963623, + "learning_rate": 4.01765180974803e-06, + "loss": 2.0624, + "step": 21312 + }, + { + "epoch": 0.71, + "grad_norm": 0.7179853320121765, + "learning_rate": 4.016800125272724e-06, + "loss": 2.0864, + "step": 21313 + }, + { + "epoch": 0.71, + "grad_norm": 0.7399454116821289, + "learning_rate": 4.015948508392107e-06, + "loss": 2.0455, + "step": 21314 + }, + { + "epoch": 0.71, + "grad_norm": 0.7557646632194519, + "learning_rate": 4.015096959115794e-06, + "loss": 2.0243, + "step": 21315 + }, + { + "epoch": 0.71, + "grad_norm": 0.7594959735870361, + "learning_rate": 4.014245477453407e-06, + "loss": 2.0423, + "step": 21316 + }, + { + "epoch": 0.71, + "grad_norm": 0.7429527044296265, + "learning_rate": 4.013394063414571e-06, + "loss": 2.0482, + "step": 21317 + }, + { + "epoch": 0.71, + "grad_norm": 0.7559183239936829, + "learning_rate": 4.012542717008899e-06, + "loss": 2.0604, + "step": 21318 + }, + { + "epoch": 0.71, + "grad_norm": 0.7324590086936951, + "learning_rate": 4.0116914382460086e-06, + "loss": 2.0687, + "step": 21319 + }, + { + "epoch": 0.71, + "grad_norm": 0.7298872470855713, + "learning_rate": 4.01084022713552e-06, + "loss": 2.0176, + "step": 21320 + }, + { + "epoch": 0.71, + "grad_norm": 0.7775012254714966, + "learning_rate": 4.009989083687051e-06, + "loss": 2.0576, + "step": 21321 + }, + { + "epoch": 0.71, + "grad_norm": 0.7438179850578308, + "learning_rate": 4.00913800791021e-06, + "loss": 2.0232, + "step": 21322 + }, + { + "epoch": 0.71, + "grad_norm": 0.7394139766693115, + "learning_rate": 4.008286999814617e-06, + "loss": 2.0316, + "step": 21323 + }, + { + "epoch": 0.71, + "grad_norm": 0.7422469854354858, + "learning_rate": 4.007436059409891e-06, + "loss": 2.0237, + "step": 21324 + }, + { + "epoch": 0.71, + "grad_norm": 0.7643668055534363, + "learning_rate": 4.006585186705638e-06, + "loss": 2.0695, + "step": 21325 + }, + { + "epoch": 0.71, + "grad_norm": 0.7612189650535583, + "learning_rate": 4.00573438171147e-06, + "loss": 2.0943, + "step": 21326 + }, + { + "epoch": 0.71, + "grad_norm": 0.7669534087181091, + "learning_rate": 4.004883644437006e-06, + "loss": 2.0904, + "step": 21327 + }, + { + "epoch": 0.71, + "grad_norm": 0.7432018518447876, + "learning_rate": 4.004032974891851e-06, + "loss": 2.1258, + "step": 21328 + }, + { + "epoch": 0.71, + "grad_norm": 0.7501332759857178, + "learning_rate": 4.003182373085616e-06, + "loss": 2.0563, + "step": 21329 + }, + { + "epoch": 0.71, + "grad_norm": 0.7527912855148315, + "learning_rate": 4.002331839027919e-06, + "loss": 2.0166, + "step": 21330 + }, + { + "epoch": 0.71, + "grad_norm": 0.7284863591194153, + "learning_rate": 4.0014813727283555e-06, + "loss": 2.0797, + "step": 21331 + }, + { + "epoch": 0.71, + "grad_norm": 0.7435654997825623, + "learning_rate": 4.000630974196539e-06, + "loss": 2.0377, + "step": 21332 + }, + { + "epoch": 0.71, + "grad_norm": 0.7310600280761719, + "learning_rate": 3.999780643442081e-06, + "loss": 2.0069, + "step": 21333 + }, + { + "epoch": 0.71, + "grad_norm": 0.7430515289306641, + "learning_rate": 3.998930380474587e-06, + "loss": 2.1448, + "step": 21334 + }, + { + "epoch": 0.71, + "grad_norm": 0.7232074737548828, + "learning_rate": 3.998080185303656e-06, + "loss": 2.063, + "step": 21335 + }, + { + "epoch": 0.71, + "grad_norm": 0.7502022385597229, + "learning_rate": 3.9972300579389e-06, + "loss": 1.9953, + "step": 21336 + }, + { + "epoch": 0.71, + "grad_norm": 0.7370855212211609, + "learning_rate": 3.996379998389919e-06, + "loss": 2.0406, + "step": 21337 + }, + { + "epoch": 0.71, + "grad_norm": 0.7352990508079529, + "learning_rate": 3.9955300066663175e-06, + "loss": 2.0829, + "step": 21338 + }, + { + "epoch": 0.71, + "grad_norm": 0.7266630530357361, + "learning_rate": 3.994680082777702e-06, + "loss": 2.0996, + "step": 21339 + }, + { + "epoch": 0.71, + "grad_norm": 0.7441461682319641, + "learning_rate": 3.993830226733673e-06, + "loss": 2.0362, + "step": 21340 + }, + { + "epoch": 0.71, + "grad_norm": 0.794948399066925, + "learning_rate": 3.992980438543824e-06, + "loss": 2.108, + "step": 21341 + }, + { + "epoch": 0.71, + "grad_norm": 0.7389959096908569, + "learning_rate": 3.992130718217767e-06, + "loss": 2.1068, + "step": 21342 + }, + { + "epoch": 0.71, + "grad_norm": 0.7634884119033813, + "learning_rate": 3.991281065765096e-06, + "loss": 2.0365, + "step": 21343 + }, + { + "epoch": 0.71, + "grad_norm": 0.7259693145751953, + "learning_rate": 3.990431481195407e-06, + "loss": 2.0282, + "step": 21344 + }, + { + "epoch": 0.71, + "grad_norm": 0.7387796640396118, + "learning_rate": 3.9895819645182996e-06, + "loss": 2.0115, + "step": 21345 + }, + { + "epoch": 0.71, + "grad_norm": 0.7690161466598511, + "learning_rate": 3.988732515743377e-06, + "loss": 2.1236, + "step": 21346 + }, + { + "epoch": 0.71, + "grad_norm": 0.7430232763290405, + "learning_rate": 3.987883134880233e-06, + "loss": 2.0377, + "step": 21347 + }, + { + "epoch": 0.71, + "grad_norm": 0.764133632183075, + "learning_rate": 3.9870338219384565e-06, + "loss": 2.063, + "step": 21348 + }, + { + "epoch": 0.71, + "grad_norm": 0.738273024559021, + "learning_rate": 3.986184576927652e-06, + "loss": 1.9718, + "step": 21349 + }, + { + "epoch": 0.71, + "grad_norm": 0.7478065490722656, + "learning_rate": 3.9853353998574065e-06, + "loss": 2.073, + "step": 21350 + }, + { + "epoch": 0.71, + "grad_norm": 0.7851174473762512, + "learning_rate": 3.984486290737316e-06, + "loss": 2.0409, + "step": 21351 + }, + { + "epoch": 0.71, + "grad_norm": 0.7587388753890991, + "learning_rate": 3.983637249576983e-06, + "loss": 2.0833, + "step": 21352 + }, + { + "epoch": 0.71, + "grad_norm": 0.77997887134552, + "learning_rate": 3.982788276385981e-06, + "loss": 2.1537, + "step": 21353 + }, + { + "epoch": 0.71, + "grad_norm": 0.7260881662368774, + "learning_rate": 3.981939371173912e-06, + "loss": 1.9942, + "step": 21354 + }, + { + "epoch": 0.71, + "grad_norm": 0.7690961360931396, + "learning_rate": 3.981090533950367e-06, + "loss": 2.0405, + "step": 21355 + }, + { + "epoch": 0.71, + "grad_norm": 0.7225907444953918, + "learning_rate": 3.980241764724935e-06, + "loss": 2.0736, + "step": 21356 + }, + { + "epoch": 0.71, + "grad_norm": 0.7503808736801147, + "learning_rate": 3.979393063507199e-06, + "loss": 1.9669, + "step": 21357 + }, + { + "epoch": 0.71, + "grad_norm": 0.764228343963623, + "learning_rate": 3.978544430306757e-06, + "loss": 2.1097, + "step": 21358 + }, + { + "epoch": 0.71, + "grad_norm": 0.722518265247345, + "learning_rate": 3.977695865133186e-06, + "loss": 1.985, + "step": 21359 + }, + { + "epoch": 0.71, + "grad_norm": 0.7803450226783752, + "learning_rate": 3.97684736799608e-06, + "loss": 2.1004, + "step": 21360 + }, + { + "epoch": 0.71, + "grad_norm": 0.7315242886543274, + "learning_rate": 3.975998938905023e-06, + "loss": 2.0165, + "step": 21361 + }, + { + "epoch": 0.71, + "grad_norm": 0.7466260194778442, + "learning_rate": 3.975150577869602e-06, + "loss": 2.1572, + "step": 21362 + }, + { + "epoch": 0.71, + "grad_norm": 0.7696135640144348, + "learning_rate": 3.974302284899394e-06, + "loss": 2.0188, + "step": 21363 + }, + { + "epoch": 0.71, + "grad_norm": 0.7399890422821045, + "learning_rate": 3.973454060003992e-06, + "loss": 2.0231, + "step": 21364 + }, + { + "epoch": 0.71, + "grad_norm": 0.7707297801971436, + "learning_rate": 3.972605903192973e-06, + "loss": 2.039, + "step": 21365 + }, + { + "epoch": 0.71, + "grad_norm": 0.7262179255485535, + "learning_rate": 3.971757814475916e-06, + "loss": 2.0411, + "step": 21366 + }, + { + "epoch": 0.71, + "grad_norm": 0.7617310881614685, + "learning_rate": 3.970909793862407e-06, + "loss": 2.0262, + "step": 21367 + }, + { + "epoch": 0.71, + "grad_norm": 0.7320592999458313, + "learning_rate": 3.970061841362031e-06, + "loss": 1.9711, + "step": 21368 + }, + { + "epoch": 0.71, + "grad_norm": 0.7193421721458435, + "learning_rate": 3.969213956984357e-06, + "loss": 2.0601, + "step": 21369 + }, + { + "epoch": 0.71, + "grad_norm": 0.7337999939918518, + "learning_rate": 3.968366140738973e-06, + "loss": 2.0577, + "step": 21370 + }, + { + "epoch": 0.71, + "grad_norm": 0.7412766218185425, + "learning_rate": 3.967518392635455e-06, + "loss": 2.1195, + "step": 21371 + }, + { + "epoch": 0.71, + "grad_norm": 0.7772192358970642, + "learning_rate": 3.966670712683373e-06, + "loss": 2.0744, + "step": 21372 + }, + { + "epoch": 0.71, + "grad_norm": 0.7592119574546814, + "learning_rate": 3.965823100892311e-06, + "loss": 2.0563, + "step": 21373 + }, + { + "epoch": 0.71, + "grad_norm": 0.7117629647254944, + "learning_rate": 3.96497555727185e-06, + "loss": 2.0334, + "step": 21374 + }, + { + "epoch": 0.71, + "grad_norm": 0.7413805723190308, + "learning_rate": 3.96412808183155e-06, + "loss": 2.0347, + "step": 21375 + }, + { + "epoch": 0.71, + "grad_norm": 0.7522855997085571, + "learning_rate": 3.963280674580995e-06, + "loss": 2.0302, + "step": 21376 + }, + { + "epoch": 0.71, + "grad_norm": 0.7498225569725037, + "learning_rate": 3.96243333552976e-06, + "loss": 2.0803, + "step": 21377 + }, + { + "epoch": 0.71, + "grad_norm": 0.7160243988037109, + "learning_rate": 3.961586064687415e-06, + "loss": 2.0033, + "step": 21378 + }, + { + "epoch": 0.71, + "grad_norm": 0.7567553520202637, + "learning_rate": 3.960738862063528e-06, + "loss": 2.0542, + "step": 21379 + }, + { + "epoch": 0.71, + "grad_norm": 0.7421330809593201, + "learning_rate": 3.959891727667674e-06, + "loss": 1.995, + "step": 21380 + }, + { + "epoch": 0.71, + "grad_norm": 0.7460967898368835, + "learning_rate": 3.959044661509428e-06, + "loss": 2.0632, + "step": 21381 + }, + { + "epoch": 0.71, + "grad_norm": 0.7928849458694458, + "learning_rate": 3.95819766359835e-06, + "loss": 2.0483, + "step": 21382 + }, + { + "epoch": 0.71, + "grad_norm": 0.7430436611175537, + "learning_rate": 3.9573507339440186e-06, + "loss": 2.052, + "step": 21383 + }, + { + "epoch": 0.71, + "grad_norm": 0.7506824135780334, + "learning_rate": 3.9565038725559965e-06, + "loss": 1.9686, + "step": 21384 + }, + { + "epoch": 0.71, + "grad_norm": 0.7379739880561829, + "learning_rate": 3.955657079443849e-06, + "loss": 2.0193, + "step": 21385 + }, + { + "epoch": 0.71, + "grad_norm": 0.73024982213974, + "learning_rate": 3.954810354617145e-06, + "loss": 2.0286, + "step": 21386 + }, + { + "epoch": 0.71, + "grad_norm": 0.7755810022354126, + "learning_rate": 3.953963698085458e-06, + "loss": 1.9977, + "step": 21387 + }, + { + "epoch": 0.71, + "grad_norm": 0.7560776472091675, + "learning_rate": 3.953117109858339e-06, + "loss": 2.079, + "step": 21388 + }, + { + "epoch": 0.71, + "grad_norm": 0.7478843927383423, + "learning_rate": 3.952270589945358e-06, + "loss": 2.0169, + "step": 21389 + }, + { + "epoch": 0.71, + "grad_norm": 0.7652735114097595, + "learning_rate": 3.951424138356083e-06, + "loss": 2.0577, + "step": 21390 + }, + { + "epoch": 0.71, + "grad_norm": 0.7692378759384155, + "learning_rate": 3.950577755100072e-06, + "loss": 2.0864, + "step": 21391 + }, + { + "epoch": 0.71, + "grad_norm": 0.7544057965278625, + "learning_rate": 3.94973144018689e-06, + "loss": 2.046, + "step": 21392 + }, + { + "epoch": 0.71, + "grad_norm": 0.7314255237579346, + "learning_rate": 3.948885193626097e-06, + "loss": 2.0764, + "step": 21393 + }, + { + "epoch": 0.71, + "grad_norm": 0.7477734684944153, + "learning_rate": 3.948039015427248e-06, + "loss": 2.0307, + "step": 21394 + }, + { + "epoch": 0.71, + "grad_norm": 0.7233955264091492, + "learning_rate": 3.9471929055999095e-06, + "loss": 2.004, + "step": 21395 + }, + { + "epoch": 0.71, + "grad_norm": 0.7487828135490417, + "learning_rate": 3.94634686415364e-06, + "loss": 1.9984, + "step": 21396 + }, + { + "epoch": 0.71, + "grad_norm": 0.7148840427398682, + "learning_rate": 3.945500891097996e-06, + "loss": 1.9625, + "step": 21397 + }, + { + "epoch": 0.71, + "grad_norm": 0.7414119243621826, + "learning_rate": 3.944654986442532e-06, + "loss": 2.0716, + "step": 21398 + }, + { + "epoch": 0.71, + "grad_norm": 0.7241600155830383, + "learning_rate": 3.9438091501968104e-06, + "loss": 2.0401, + "step": 21399 + }, + { + "epoch": 0.71, + "grad_norm": 0.7317625284194946, + "learning_rate": 3.942963382370381e-06, + "loss": 2.1135, + "step": 21400 + }, + { + "epoch": 0.71, + "grad_norm": 0.7348754405975342, + "learning_rate": 3.942117682972803e-06, + "loss": 1.9299, + "step": 21401 + }, + { + "epoch": 0.71, + "grad_norm": 0.7352507710456848, + "learning_rate": 3.941272052013627e-06, + "loss": 2.077, + "step": 21402 + }, + { + "epoch": 0.71, + "grad_norm": 0.7569603323936462, + "learning_rate": 3.940426489502413e-06, + "loss": 2.0066, + "step": 21403 + }, + { + "epoch": 0.71, + "grad_norm": 0.749212384223938, + "learning_rate": 3.939580995448704e-06, + "loss": 2.1216, + "step": 21404 + }, + { + "epoch": 0.71, + "grad_norm": 0.7462937831878662, + "learning_rate": 3.938735569862061e-06, + "loss": 2.0365, + "step": 21405 + }, + { + "epoch": 0.71, + "grad_norm": 0.7680366635322571, + "learning_rate": 3.937890212752033e-06, + "loss": 1.9848, + "step": 21406 + }, + { + "epoch": 0.71, + "grad_norm": 0.7387123107910156, + "learning_rate": 3.9370449241281625e-06, + "loss": 2.049, + "step": 21407 + }, + { + "epoch": 0.71, + "grad_norm": 0.7487072944641113, + "learning_rate": 3.936199704000006e-06, + "loss": 2.0654, + "step": 21408 + }, + { + "epoch": 0.71, + "grad_norm": 0.7406418919563293, + "learning_rate": 3.935354552377119e-06, + "loss": 2.038, + "step": 21409 + }, + { + "epoch": 0.71, + "grad_norm": 0.7581602334976196, + "learning_rate": 3.934509469269035e-06, + "loss": 2.1079, + "step": 21410 + }, + { + "epoch": 0.71, + "grad_norm": 0.7259083390235901, + "learning_rate": 3.933664454685308e-06, + "loss": 2.0271, + "step": 21411 + }, + { + "epoch": 0.71, + "grad_norm": 0.736352264881134, + "learning_rate": 3.932819508635489e-06, + "loss": 2.0548, + "step": 21412 + }, + { + "epoch": 0.71, + "grad_norm": 0.7491690516471863, + "learning_rate": 3.931974631129116e-06, + "loss": 2.1485, + "step": 21413 + }, + { + "epoch": 0.71, + "grad_norm": 0.72933429479599, + "learning_rate": 3.931129822175741e-06, + "loss": 2.0654, + "step": 21414 + }, + { + "epoch": 0.71, + "grad_norm": 0.7801526784896851, + "learning_rate": 3.930285081784904e-06, + "loss": 2.0771, + "step": 21415 + }, + { + "epoch": 0.71, + "grad_norm": 0.7803142070770264, + "learning_rate": 3.929440409966146e-06, + "loss": 1.963, + "step": 21416 + }, + { + "epoch": 0.71, + "grad_norm": 0.7689708471298218, + "learning_rate": 3.928595806729011e-06, + "loss": 2.0732, + "step": 21417 + }, + { + "epoch": 0.71, + "grad_norm": 0.7630940675735474, + "learning_rate": 3.927751272083047e-06, + "loss": 2.0239, + "step": 21418 + }, + { + "epoch": 0.71, + "grad_norm": 0.7382839918136597, + "learning_rate": 3.92690680603779e-06, + "loss": 2.0361, + "step": 21419 + }, + { + "epoch": 0.71, + "grad_norm": 0.7478662133216858, + "learning_rate": 3.926062408602778e-06, + "loss": 1.9768, + "step": 21420 + }, + { + "epoch": 0.71, + "grad_norm": 0.7571648955345154, + "learning_rate": 3.925218079787556e-06, + "loss": 2.0739, + "step": 21421 + }, + { + "epoch": 0.71, + "grad_norm": 0.7343557476997375, + "learning_rate": 3.924373819601657e-06, + "loss": 2.0649, + "step": 21422 + }, + { + "epoch": 0.71, + "grad_norm": 0.6995106339454651, + "learning_rate": 3.923529628054625e-06, + "loss": 2.0624, + "step": 21423 + }, + { + "epoch": 0.71, + "grad_norm": 0.7152701020240784, + "learning_rate": 3.922685505155991e-06, + "loss": 2.0368, + "step": 21424 + }, + { + "epoch": 0.71, + "grad_norm": 0.7540472745895386, + "learning_rate": 3.921841450915298e-06, + "loss": 2.0329, + "step": 21425 + }, + { + "epoch": 0.71, + "grad_norm": 0.7402940988540649, + "learning_rate": 3.920997465342075e-06, + "loss": 2.0738, + "step": 21426 + }, + { + "epoch": 0.71, + "grad_norm": 0.7235376834869385, + "learning_rate": 3.920153548445862e-06, + "loss": 2.0378, + "step": 21427 + }, + { + "epoch": 0.71, + "grad_norm": 0.7677651047706604, + "learning_rate": 3.9193097002361925e-06, + "loss": 2.0632, + "step": 21428 + }, + { + "epoch": 0.71, + "grad_norm": 0.7298663258552551, + "learning_rate": 3.9184659207225935e-06, + "loss": 1.9868, + "step": 21429 + }, + { + "epoch": 0.71, + "grad_norm": 0.7538634538650513, + "learning_rate": 3.917622209914604e-06, + "loss": 2.1251, + "step": 21430 + }, + { + "epoch": 0.71, + "grad_norm": 0.7540154457092285, + "learning_rate": 3.916778567821756e-06, + "loss": 2.0574, + "step": 21431 + }, + { + "epoch": 0.71, + "grad_norm": 0.7502318024635315, + "learning_rate": 3.915934994453581e-06, + "loss": 2.0824, + "step": 21432 + }, + { + "epoch": 0.71, + "grad_norm": 0.7425729036331177, + "learning_rate": 3.915091489819601e-06, + "loss": 2.035, + "step": 21433 + }, + { + "epoch": 0.71, + "grad_norm": 0.7414492964744568, + "learning_rate": 3.9142480539293555e-06, + "loss": 2.1069, + "step": 21434 + }, + { + "epoch": 0.71, + "grad_norm": 0.7266777753829956, + "learning_rate": 3.913404686792366e-06, + "loss": 2.0128, + "step": 21435 + }, + { + "epoch": 0.71, + "grad_norm": 0.7208375334739685, + "learning_rate": 3.9125613884181655e-06, + "loss": 1.9742, + "step": 21436 + }, + { + "epoch": 0.71, + "grad_norm": 0.7459979057312012, + "learning_rate": 3.91171815881628e-06, + "loss": 2.0638, + "step": 21437 + }, + { + "epoch": 0.71, + "grad_norm": 0.7630561590194702, + "learning_rate": 3.910874997996231e-06, + "loss": 2.0663, + "step": 21438 + }, + { + "epoch": 0.71, + "grad_norm": 0.7476745843887329, + "learning_rate": 3.910031905967547e-06, + "loss": 2.1321, + "step": 21439 + }, + { + "epoch": 0.71, + "grad_norm": 0.7570052146911621, + "learning_rate": 3.909188882739757e-06, + "loss": 2.0722, + "step": 21440 + }, + { + "epoch": 0.71, + "grad_norm": 0.7259035706520081, + "learning_rate": 3.9083459283223825e-06, + "loss": 1.9792, + "step": 21441 + }, + { + "epoch": 0.71, + "grad_norm": 0.7302448749542236, + "learning_rate": 3.907503042724942e-06, + "loss": 2.0439, + "step": 21442 + }, + { + "epoch": 0.71, + "grad_norm": 0.7748907208442688, + "learning_rate": 3.9066602259569645e-06, + "loss": 2.0541, + "step": 21443 + }, + { + "epoch": 0.71, + "grad_norm": 0.7658467888832092, + "learning_rate": 3.905817478027965e-06, + "loss": 2.0543, + "step": 21444 + }, + { + "epoch": 0.71, + "grad_norm": 0.761789858341217, + "learning_rate": 3.904974798947472e-06, + "loss": 1.9304, + "step": 21445 + }, + { + "epoch": 0.71, + "grad_norm": 0.7721625566482544, + "learning_rate": 3.904132188724997e-06, + "loss": 2.0236, + "step": 21446 + }, + { + "epoch": 0.71, + "grad_norm": 0.7606375217437744, + "learning_rate": 3.9032896473700685e-06, + "loss": 2.0615, + "step": 21447 + }, + { + "epoch": 0.71, + "grad_norm": 0.750980794429779, + "learning_rate": 3.902447174892198e-06, + "loss": 2.0628, + "step": 21448 + }, + { + "epoch": 0.71, + "grad_norm": 0.7425456643104553, + "learning_rate": 3.901604771300907e-06, + "loss": 2.0527, + "step": 21449 + }, + { + "epoch": 0.71, + "grad_norm": 0.7835389971733093, + "learning_rate": 3.900762436605714e-06, + "loss": 2.0646, + "step": 21450 + }, + { + "epoch": 0.71, + "grad_norm": 0.7637593150138855, + "learning_rate": 3.899920170816127e-06, + "loss": 2.1123, + "step": 21451 + }, + { + "epoch": 0.71, + "grad_norm": 0.7873336672782898, + "learning_rate": 3.899077973941667e-06, + "loss": 2.0898, + "step": 21452 + }, + { + "epoch": 0.71, + "grad_norm": 0.7414779663085938, + "learning_rate": 3.898235845991853e-06, + "loss": 2.0251, + "step": 21453 + }, + { + "epoch": 0.71, + "grad_norm": 0.7563363909721375, + "learning_rate": 3.897393786976195e-06, + "loss": 2.0321, + "step": 21454 + }, + { + "epoch": 0.71, + "grad_norm": 0.7331929802894592, + "learning_rate": 3.896551796904201e-06, + "loss": 2.0463, + "step": 21455 + }, + { + "epoch": 0.71, + "grad_norm": 0.7873733639717102, + "learning_rate": 3.89570987578539e-06, + "loss": 2.0163, + "step": 21456 + }, + { + "epoch": 0.71, + "grad_norm": 0.7550294399261475, + "learning_rate": 3.8948680236292704e-06, + "loss": 1.9914, + "step": 21457 + }, + { + "epoch": 0.71, + "grad_norm": 0.7415096759796143, + "learning_rate": 3.894026240445357e-06, + "loss": 2.0486, + "step": 21458 + }, + { + "epoch": 0.71, + "grad_norm": 0.733745813369751, + "learning_rate": 3.893184526243155e-06, + "loss": 2.0219, + "step": 21459 + }, + { + "epoch": 0.71, + "grad_norm": 0.7427349090576172, + "learning_rate": 3.892342881032173e-06, + "loss": 2.0665, + "step": 21460 + }, + { + "epoch": 0.71, + "grad_norm": 0.7349501848220825, + "learning_rate": 3.8915013048219205e-06, + "loss": 2.0718, + "step": 21461 + }, + { + "epoch": 0.71, + "grad_norm": 0.7173113226890564, + "learning_rate": 3.8906597976219115e-06, + "loss": 2.0557, + "step": 21462 + }, + { + "epoch": 0.71, + "grad_norm": 0.7213647961616516, + "learning_rate": 3.889818359441647e-06, + "loss": 2.0597, + "step": 21463 + }, + { + "epoch": 0.71, + "grad_norm": 0.7322161197662354, + "learning_rate": 3.888976990290629e-06, + "loss": 2.0755, + "step": 21464 + }, + { + "epoch": 0.71, + "grad_norm": 0.7613168358802795, + "learning_rate": 3.888135690178373e-06, + "loss": 2.0138, + "step": 21465 + }, + { + "epoch": 0.71, + "grad_norm": 0.7436955571174622, + "learning_rate": 3.8872944591143735e-06, + "loss": 2.1143, + "step": 21466 + }, + { + "epoch": 0.71, + "grad_norm": 0.7717354893684387, + "learning_rate": 3.886453297108143e-06, + "loss": 2.076, + "step": 21467 + }, + { + "epoch": 0.71, + "grad_norm": 0.7147963643074036, + "learning_rate": 3.8856122041691765e-06, + "loss": 2.0178, + "step": 21468 + }, + { + "epoch": 0.71, + "grad_norm": 0.7277140617370605, + "learning_rate": 3.884771180306983e-06, + "loss": 1.9674, + "step": 21469 + }, + { + "epoch": 0.71, + "grad_norm": 0.7208665609359741, + "learning_rate": 3.8839302255310575e-06, + "loss": 1.9855, + "step": 21470 + }, + { + "epoch": 0.71, + "grad_norm": 0.740422785282135, + "learning_rate": 3.883089339850907e-06, + "loss": 2.0048, + "step": 21471 + }, + { + "epoch": 0.71, + "grad_norm": 0.7569411993026733, + "learning_rate": 3.88224852327603e-06, + "loss": 2.043, + "step": 21472 + }, + { + "epoch": 0.71, + "grad_norm": 0.7522729635238647, + "learning_rate": 3.881407775815919e-06, + "loss": 2.0141, + "step": 21473 + }, + { + "epoch": 0.71, + "grad_norm": 0.7667873501777649, + "learning_rate": 3.880567097480077e-06, + "loss": 2.0684, + "step": 21474 + }, + { + "epoch": 0.71, + "grad_norm": 0.7424026727676392, + "learning_rate": 3.879726488278005e-06, + "loss": 2.0381, + "step": 21475 + }, + { + "epoch": 0.71, + "grad_norm": 0.7515433430671692, + "learning_rate": 3.878885948219197e-06, + "loss": 2.0956, + "step": 21476 + }, + { + "epoch": 0.71, + "grad_norm": 0.7306869626045227, + "learning_rate": 3.878045477313145e-06, + "loss": 1.9961, + "step": 21477 + }, + { + "epoch": 0.71, + "grad_norm": 0.7354339957237244, + "learning_rate": 3.87720507556935e-06, + "loss": 2.0522, + "step": 21478 + }, + { + "epoch": 0.71, + "grad_norm": 0.7475075125694275, + "learning_rate": 3.876364742997301e-06, + "loss": 2.0572, + "step": 21479 + }, + { + "epoch": 0.71, + "grad_norm": 0.7628931403160095, + "learning_rate": 3.875524479606496e-06, + "loss": 2.1192, + "step": 21480 + }, + { + "epoch": 0.71, + "grad_norm": 0.772562563419342, + "learning_rate": 3.874684285406425e-06, + "loss": 2.0421, + "step": 21481 + }, + { + "epoch": 0.71, + "grad_norm": 0.8290002942085266, + "learning_rate": 3.873844160406584e-06, + "loss": 2.1171, + "step": 21482 + }, + { + "epoch": 0.71, + "grad_norm": 0.732464611530304, + "learning_rate": 3.873004104616457e-06, + "loss": 2.0024, + "step": 21483 + }, + { + "epoch": 0.71, + "grad_norm": 0.7848442792892456, + "learning_rate": 3.872164118045543e-06, + "loss": 2.0191, + "step": 21484 + }, + { + "epoch": 0.71, + "grad_norm": 0.7439508438110352, + "learning_rate": 3.871324200703327e-06, + "loss": 2.0364, + "step": 21485 + }, + { + "epoch": 0.71, + "grad_norm": 0.7405891418457031, + "learning_rate": 3.870484352599295e-06, + "loss": 2.0341, + "step": 21486 + }, + { + "epoch": 0.71, + "grad_norm": 0.7574360370635986, + "learning_rate": 3.8696445737429385e-06, + "loss": 2.0537, + "step": 21487 + }, + { + "epoch": 0.71, + "grad_norm": 0.7521209120750427, + "learning_rate": 3.868804864143749e-06, + "loss": 2.0518, + "step": 21488 + }, + { + "epoch": 0.71, + "grad_norm": 0.743155300617218, + "learning_rate": 3.86796522381121e-06, + "loss": 2.1213, + "step": 21489 + }, + { + "epoch": 0.71, + "grad_norm": 0.737542986869812, + "learning_rate": 3.8671256527548005e-06, + "loss": 2.0603, + "step": 21490 + }, + { + "epoch": 0.72, + "grad_norm": 0.7969669103622437, + "learning_rate": 3.866286150984016e-06, + "loss": 2.1625, + "step": 21491 + }, + { + "epoch": 0.72, + "grad_norm": 0.7197364568710327, + "learning_rate": 3.865446718508331e-06, + "loss": 2.049, + "step": 21492 + }, + { + "epoch": 0.72, + "grad_norm": 0.777123749256134, + "learning_rate": 3.8646073553372385e-06, + "loss": 2.0096, + "step": 21493 + }, + { + "epoch": 0.72, + "grad_norm": 0.7500991821289062, + "learning_rate": 3.863768061480216e-06, + "loss": 2.032, + "step": 21494 + }, + { + "epoch": 0.72, + "grad_norm": 0.751045823097229, + "learning_rate": 3.862928836946742e-06, + "loss": 2.0831, + "step": 21495 + }, + { + "epoch": 0.72, + "grad_norm": 0.7382387518882751, + "learning_rate": 3.862089681746301e-06, + "loss": 2.0403, + "step": 21496 + }, + { + "epoch": 0.72, + "grad_norm": 0.7508519291877747, + "learning_rate": 3.8612505958883786e-06, + "loss": 2.0819, + "step": 21497 + }, + { + "epoch": 0.72, + "grad_norm": 0.7182164192199707, + "learning_rate": 3.860411579382448e-06, + "loss": 2.0569, + "step": 21498 + }, + { + "epoch": 0.72, + "grad_norm": 0.7209972143173218, + "learning_rate": 3.859572632237987e-06, + "loss": 1.9511, + "step": 21499 + }, + { + "epoch": 0.72, + "grad_norm": 0.7400722503662109, + "learning_rate": 3.8587337544644776e-06, + "loss": 2.0848, + "step": 21500 + }, + { + "epoch": 0.72, + "grad_norm": 0.722638726234436, + "learning_rate": 3.857894946071393e-06, + "loss": 2.026, + "step": 21501 + }, + { + "epoch": 0.72, + "grad_norm": 0.7230244874954224, + "learning_rate": 3.857056207068215e-06, + "loss": 2.0225, + "step": 21502 + }, + { + "epoch": 0.72, + "grad_norm": 0.748950719833374, + "learning_rate": 3.856217537464412e-06, + "loss": 2.1133, + "step": 21503 + }, + { + "epoch": 0.72, + "grad_norm": 0.7379436492919922, + "learning_rate": 3.855378937269465e-06, + "loss": 2.1023, + "step": 21504 + }, + { + "epoch": 0.72, + "grad_norm": 0.7290952801704407, + "learning_rate": 3.854540406492844e-06, + "loss": 2.0199, + "step": 21505 + }, + { + "epoch": 0.72, + "grad_norm": 0.7335649728775024, + "learning_rate": 3.853701945144026e-06, + "loss": 2.0268, + "step": 21506 + }, + { + "epoch": 0.72, + "grad_norm": 0.7217668890953064, + "learning_rate": 3.852863553232482e-06, + "loss": 2.0092, + "step": 21507 + }, + { + "epoch": 0.72, + "grad_norm": 0.7387538552284241, + "learning_rate": 3.8520252307676795e-06, + "loss": 2.0459, + "step": 21508 + }, + { + "epoch": 0.72, + "grad_norm": 0.7419674396514893, + "learning_rate": 3.8511869777590925e-06, + "loss": 2.1174, + "step": 21509 + }, + { + "epoch": 0.72, + "grad_norm": 0.7644091844558716, + "learning_rate": 3.850348794216196e-06, + "loss": 2.0466, + "step": 21510 + }, + { + "epoch": 0.72, + "grad_norm": 0.7405504584312439, + "learning_rate": 3.8495106801484535e-06, + "loss": 1.9902, + "step": 21511 + }, + { + "epoch": 0.72, + "grad_norm": 0.7505638003349304, + "learning_rate": 3.848672635565333e-06, + "loss": 2.08, + "step": 21512 + }, + { + "epoch": 0.72, + "grad_norm": 0.7229627370834351, + "learning_rate": 3.847834660476306e-06, + "loss": 2.0612, + "step": 21513 + }, + { + "epoch": 0.72, + "grad_norm": 0.727674663066864, + "learning_rate": 3.8469967548908335e-06, + "loss": 2.0653, + "step": 21514 + }, + { + "epoch": 0.72, + "grad_norm": 0.7076156139373779, + "learning_rate": 3.846158918818387e-06, + "loss": 2.1248, + "step": 21515 + }, + { + "epoch": 0.72, + "grad_norm": 0.7592533230781555, + "learning_rate": 3.845321152268437e-06, + "loss": 1.9897, + "step": 21516 + }, + { + "epoch": 0.72, + "grad_norm": 0.7777933478355408, + "learning_rate": 3.8444834552504355e-06, + "loss": 2.0548, + "step": 21517 + }, + { + "epoch": 0.72, + "grad_norm": 0.7544406056404114, + "learning_rate": 3.843645827773851e-06, + "loss": 2.027, + "step": 21518 + }, + { + "epoch": 0.72, + "grad_norm": 0.7289154529571533, + "learning_rate": 3.842808269848153e-06, + "loss": 2.0405, + "step": 21519 + }, + { + "epoch": 0.72, + "grad_norm": 0.7296667695045471, + "learning_rate": 3.8419707814827965e-06, + "loss": 2.011, + "step": 21520 + }, + { + "epoch": 0.72, + "grad_norm": 0.7448946237564087, + "learning_rate": 3.841133362687244e-06, + "loss": 2.0964, + "step": 21521 + }, + { + "epoch": 0.72, + "grad_norm": 0.8013725280761719, + "learning_rate": 3.84029601347096e-06, + "loss": 2.0698, + "step": 21522 + }, + { + "epoch": 0.72, + "grad_norm": 0.7270270586013794, + "learning_rate": 3.839458733843398e-06, + "loss": 1.9686, + "step": 21523 + }, + { + "epoch": 0.72, + "grad_norm": 0.7436785697937012, + "learning_rate": 3.8386215238140235e-06, + "loss": 1.9855, + "step": 21524 + }, + { + "epoch": 0.72, + "grad_norm": 0.7553300261497498, + "learning_rate": 3.837784383392289e-06, + "loss": 2.0065, + "step": 21525 + }, + { + "epoch": 0.72, + "grad_norm": 0.7277274131774902, + "learning_rate": 3.83694731258766e-06, + "loss": 2.0231, + "step": 21526 + }, + { + "epoch": 0.72, + "grad_norm": 0.7313604950904846, + "learning_rate": 3.836110311409583e-06, + "loss": 2.0776, + "step": 21527 + }, + { + "epoch": 0.72, + "grad_norm": 0.7905172109603882, + "learning_rate": 3.835273379867525e-06, + "loss": 2.089, + "step": 21528 + }, + { + "epoch": 0.72, + "grad_norm": 0.7373872399330139, + "learning_rate": 3.834436517970933e-06, + "loss": 2.0455, + "step": 21529 + }, + { + "epoch": 0.72, + "grad_norm": 0.7588306069374084, + "learning_rate": 3.833599725729261e-06, + "loss": 2.0692, + "step": 21530 + }, + { + "epoch": 0.72, + "grad_norm": 0.7282304167747498, + "learning_rate": 3.832763003151967e-06, + "loss": 2.0105, + "step": 21531 + }, + { + "epoch": 0.72, + "grad_norm": 0.7318737506866455, + "learning_rate": 3.831926350248504e-06, + "loss": 2.0757, + "step": 21532 + }, + { + "epoch": 0.72, + "grad_norm": 0.7589004635810852, + "learning_rate": 3.831089767028323e-06, + "loss": 2.108, + "step": 21533 + }, + { + "epoch": 0.72, + "grad_norm": 0.7397930026054382, + "learning_rate": 3.830253253500871e-06, + "loss": 2.0537, + "step": 21534 + }, + { + "epoch": 0.72, + "grad_norm": 0.7481244802474976, + "learning_rate": 3.829416809675606e-06, + "loss": 2.0636, + "step": 21535 + }, + { + "epoch": 0.72, + "grad_norm": 0.7941077947616577, + "learning_rate": 3.828580435561969e-06, + "loss": 2.1034, + "step": 21536 + }, + { + "epoch": 0.72, + "grad_norm": 0.7500936388969421, + "learning_rate": 3.827744131169413e-06, + "loss": 2.0739, + "step": 21537 + }, + { + "epoch": 0.72, + "grad_norm": 0.7513919472694397, + "learning_rate": 3.826907896507394e-06, + "loss": 2.0437, + "step": 21538 + }, + { + "epoch": 0.72, + "grad_norm": 0.7727073431015015, + "learning_rate": 3.826071731585346e-06, + "loss": 2.0676, + "step": 21539 + }, + { + "epoch": 0.72, + "grad_norm": 0.7821904420852661, + "learning_rate": 3.82523563641272e-06, + "loss": 2.0193, + "step": 21540 + }, + { + "epoch": 0.72, + "grad_norm": 0.7399191856384277, + "learning_rate": 3.824399610998966e-06, + "loss": 2.0892, + "step": 21541 + }, + { + "epoch": 0.72, + "grad_norm": 0.7797570824623108, + "learning_rate": 3.823563655353528e-06, + "loss": 2.0599, + "step": 21542 + }, + { + "epoch": 0.72, + "grad_norm": 0.7480449080467224, + "learning_rate": 3.822727769485843e-06, + "loss": 1.9836, + "step": 21543 + }, + { + "epoch": 0.72, + "grad_norm": 0.7507665753364563, + "learning_rate": 3.821891953405363e-06, + "loss": 2.0347, + "step": 21544 + }, + { + "epoch": 0.72, + "grad_norm": 0.7362467646598816, + "learning_rate": 3.8210562071215244e-06, + "loss": 2.0395, + "step": 21545 + }, + { + "epoch": 0.72, + "grad_norm": 0.740428626537323, + "learning_rate": 3.820220530643771e-06, + "loss": 2.0462, + "step": 21546 + }, + { + "epoch": 0.72, + "grad_norm": 0.7387788891792297, + "learning_rate": 3.8193849239815476e-06, + "loss": 2.0533, + "step": 21547 + }, + { + "epoch": 0.72, + "grad_norm": 0.7580827474594116, + "learning_rate": 3.818549387144292e-06, + "loss": 1.9864, + "step": 21548 + }, + { + "epoch": 0.72, + "grad_norm": 0.7236201167106628, + "learning_rate": 3.817713920141438e-06, + "loss": 2.0719, + "step": 21549 + }, + { + "epoch": 0.72, + "grad_norm": 0.7252009510993958, + "learning_rate": 3.816878522982433e-06, + "loss": 2.0532, + "step": 21550 + }, + { + "epoch": 0.72, + "grad_norm": 0.7367619276046753, + "learning_rate": 3.81604319567671e-06, + "loss": 2.1164, + "step": 21551 + }, + { + "epoch": 0.72, + "grad_norm": 0.7204755544662476, + "learning_rate": 3.815207938233705e-06, + "loss": 2.0013, + "step": 21552 + }, + { + "epoch": 0.72, + "grad_norm": 0.7876721620559692, + "learning_rate": 3.8143727506628557e-06, + "loss": 2.0542, + "step": 21553 + }, + { + "epoch": 0.72, + "grad_norm": 0.731182873249054, + "learning_rate": 3.8135376329736006e-06, + "loss": 2.0512, + "step": 21554 + }, + { + "epoch": 0.72, + "grad_norm": 0.734283983707428, + "learning_rate": 3.8127025851753717e-06, + "loss": 2.043, + "step": 21555 + }, + { + "epoch": 0.72, + "grad_norm": 0.7458754181861877, + "learning_rate": 3.8118676072775996e-06, + "loss": 2.0813, + "step": 21556 + }, + { + "epoch": 0.72, + "grad_norm": 0.7408250570297241, + "learning_rate": 3.8110326992897252e-06, + "loss": 2.1003, + "step": 21557 + }, + { + "epoch": 0.72, + "grad_norm": 0.7290232181549072, + "learning_rate": 3.810197861221171e-06, + "loss": 2.0626, + "step": 21558 + }, + { + "epoch": 0.72, + "grad_norm": 0.7285833954811096, + "learning_rate": 3.809363093081375e-06, + "loss": 2.0462, + "step": 21559 + }, + { + "epoch": 0.72, + "grad_norm": 0.7419911026954651, + "learning_rate": 3.8085283948797737e-06, + "loss": 2.0444, + "step": 21560 + }, + { + "epoch": 0.72, + "grad_norm": 0.7306240200996399, + "learning_rate": 3.807693766625782e-06, + "loss": 1.984, + "step": 21561 + }, + { + "epoch": 0.72, + "grad_norm": 0.7614285349845886, + "learning_rate": 3.806859208328838e-06, + "loss": 2.0794, + "step": 21562 + }, + { + "epoch": 0.72, + "grad_norm": 0.756132960319519, + "learning_rate": 3.8060247199983724e-06, + "loss": 2.0412, + "step": 21563 + }, + { + "epoch": 0.72, + "grad_norm": 0.736219048500061, + "learning_rate": 3.8051903016438097e-06, + "loss": 2.0665, + "step": 21564 + }, + { + "epoch": 0.72, + "grad_norm": 0.7423990964889526, + "learning_rate": 3.8043559532745722e-06, + "loss": 2.0392, + "step": 21565 + }, + { + "epoch": 0.72, + "grad_norm": 0.7708452939987183, + "learning_rate": 3.8035216749000946e-06, + "loss": 2.0705, + "step": 21566 + }, + { + "epoch": 0.72, + "grad_norm": 0.7682214975357056, + "learning_rate": 3.8026874665297942e-06, + "loss": 2.0665, + "step": 21567 + }, + { + "epoch": 0.72, + "grad_norm": 0.717788815498352, + "learning_rate": 3.801853328173097e-06, + "loss": 2.0148, + "step": 21568 + }, + { + "epoch": 0.72, + "grad_norm": 0.7442294359207153, + "learning_rate": 3.8010192598394336e-06, + "loss": 2.0644, + "step": 21569 + }, + { + "epoch": 0.72, + "grad_norm": 0.7120874524116516, + "learning_rate": 3.800185261538222e-06, + "loss": 2.0158, + "step": 21570 + }, + { + "epoch": 0.72, + "grad_norm": 0.7640377879142761, + "learning_rate": 3.7993513332788788e-06, + "loss": 2.0566, + "step": 21571 + }, + { + "epoch": 0.72, + "grad_norm": 0.7456615567207336, + "learning_rate": 3.798517475070832e-06, + "loss": 1.9765, + "step": 21572 + }, + { + "epoch": 0.72, + "grad_norm": 0.751395583152771, + "learning_rate": 3.797683686923507e-06, + "loss": 2.0656, + "step": 21573 + }, + { + "epoch": 0.72, + "grad_norm": 0.7519612908363342, + "learning_rate": 3.796849968846309e-06, + "loss": 2.078, + "step": 21574 + }, + { + "epoch": 0.72, + "grad_norm": 0.7474293112754822, + "learning_rate": 3.7960163208486644e-06, + "loss": 2.078, + "step": 21575 + }, + { + "epoch": 0.72, + "grad_norm": 0.7299622893333435, + "learning_rate": 3.7951827429399956e-06, + "loss": 2.1233, + "step": 21576 + }, + { + "epoch": 0.72, + "grad_norm": 0.7540601491928101, + "learning_rate": 3.794349235129712e-06, + "loss": 2.0729, + "step": 21577 + }, + { + "epoch": 0.72, + "grad_norm": 0.7533949017524719, + "learning_rate": 3.7935157974272373e-06, + "loss": 2.0649, + "step": 21578 + }, + { + "epoch": 0.72, + "grad_norm": 0.7274425625801086, + "learning_rate": 3.7926824298419853e-06, + "loss": 2.0529, + "step": 21579 + }, + { + "epoch": 0.72, + "grad_norm": 0.7268219590187073, + "learning_rate": 3.791849132383364e-06, + "loss": 2.0991, + "step": 21580 + }, + { + "epoch": 0.72, + "grad_norm": 0.7536106705665588, + "learning_rate": 3.791015905060793e-06, + "loss": 2.0744, + "step": 21581 + }, + { + "epoch": 0.72, + "grad_norm": 0.7600683569908142, + "learning_rate": 3.7901827478836895e-06, + "loss": 2.0488, + "step": 21582 + }, + { + "epoch": 0.72, + "grad_norm": 0.7468461990356445, + "learning_rate": 3.789349660861462e-06, + "loss": 2.0704, + "step": 21583 + }, + { + "epoch": 0.72, + "grad_norm": 0.7356901168823242, + "learning_rate": 3.7885166440035195e-06, + "loss": 1.9909, + "step": 21584 + }, + { + "epoch": 0.72, + "grad_norm": 0.728850781917572, + "learning_rate": 3.787683697319278e-06, + "loss": 2.0298, + "step": 21585 + }, + { + "epoch": 0.72, + "grad_norm": 0.7194045186042786, + "learning_rate": 3.7868508208181453e-06, + "loss": 2.102, + "step": 21586 + }, + { + "epoch": 0.72, + "grad_norm": 0.7471626996994019, + "learning_rate": 3.786018014509528e-06, + "loss": 2.0547, + "step": 21587 + }, + { + "epoch": 0.72, + "grad_norm": 0.7512971758842468, + "learning_rate": 3.7851852784028374e-06, + "loss": 2.0013, + "step": 21588 + }, + { + "epoch": 0.72, + "grad_norm": 0.7494862079620361, + "learning_rate": 3.7843526125074847e-06, + "loss": 2.0233, + "step": 21589 + }, + { + "epoch": 0.72, + "grad_norm": 0.7349495887756348, + "learning_rate": 3.783520016832869e-06, + "loss": 2.0624, + "step": 21590 + }, + { + "epoch": 0.72, + "grad_norm": 0.736751139163971, + "learning_rate": 3.782687491388406e-06, + "loss": 2.0592, + "step": 21591 + }, + { + "epoch": 0.72, + "grad_norm": 0.7692188620567322, + "learning_rate": 3.781855036183495e-06, + "loss": 2.0273, + "step": 21592 + }, + { + "epoch": 0.72, + "grad_norm": 0.7243779301643372, + "learning_rate": 3.7810226512275385e-06, + "loss": 1.9866, + "step": 21593 + }, + { + "epoch": 0.72, + "grad_norm": 0.7490507364273071, + "learning_rate": 3.780190336529943e-06, + "loss": 2.0309, + "step": 21594 + }, + { + "epoch": 0.72, + "grad_norm": 0.7662305235862732, + "learning_rate": 3.7793580921001195e-06, + "loss": 2.0796, + "step": 21595 + }, + { + "epoch": 0.72, + "grad_norm": 0.7392173409461975, + "learning_rate": 3.7785259179474544e-06, + "loss": 2.0492, + "step": 21596 + }, + { + "epoch": 0.72, + "grad_norm": 0.7431060671806335, + "learning_rate": 3.777693814081358e-06, + "loss": 2.0363, + "step": 21597 + }, + { + "epoch": 0.72, + "grad_norm": 0.7477800250053406, + "learning_rate": 3.776861780511234e-06, + "loss": 2.0598, + "step": 21598 + }, + { + "epoch": 0.72, + "grad_norm": 0.7250113487243652, + "learning_rate": 3.7760298172464747e-06, + "loss": 2.0449, + "step": 21599 + }, + { + "epoch": 0.72, + "grad_norm": 0.7409761548042297, + "learning_rate": 3.7751979242964878e-06, + "loss": 2.0817, + "step": 21600 + }, + { + "epoch": 0.72, + "grad_norm": 0.7400169968605042, + "learning_rate": 3.7743661016706646e-06, + "loss": 2.0246, + "step": 21601 + }, + { + "epoch": 0.72, + "grad_norm": 0.7603542804718018, + "learning_rate": 3.7735343493784026e-06, + "loss": 2.1047, + "step": 21602 + }, + { + "epoch": 0.72, + "grad_norm": 0.7335270643234253, + "learning_rate": 3.7727026674291e-06, + "loss": 2.0021, + "step": 21603 + }, + { + "epoch": 0.72, + "grad_norm": 0.7503976225852966, + "learning_rate": 3.771871055832156e-06, + "loss": 2.0831, + "step": 21604 + }, + { + "epoch": 0.72, + "grad_norm": 0.7597103118896484, + "learning_rate": 3.771039514596964e-06, + "loss": 2.0277, + "step": 21605 + }, + { + "epoch": 0.72, + "grad_norm": 0.7354955077171326, + "learning_rate": 3.7702080437329126e-06, + "loss": 2.0297, + "step": 21606 + }, + { + "epoch": 0.72, + "grad_norm": 0.7315601110458374, + "learning_rate": 3.769376643249403e-06, + "loss": 2.0882, + "step": 21607 + }, + { + "epoch": 0.72, + "grad_norm": 0.7215171456336975, + "learning_rate": 3.7685453131558214e-06, + "loss": 2.1018, + "step": 21608 + }, + { + "epoch": 0.72, + "grad_norm": 0.7600667476654053, + "learning_rate": 3.7677140534615665e-06, + "loss": 2.0475, + "step": 21609 + }, + { + "epoch": 0.72, + "grad_norm": 0.7460535764694214, + "learning_rate": 3.7668828641760223e-06, + "loss": 2.0307, + "step": 21610 + }, + { + "epoch": 0.72, + "grad_norm": 0.7252292633056641, + "learning_rate": 3.7660517453085855e-06, + "loss": 1.9611, + "step": 21611 + }, + { + "epoch": 0.72, + "grad_norm": 0.7567553520202637, + "learning_rate": 3.76522069686864e-06, + "loss": 2.0753, + "step": 21612 + }, + { + "epoch": 0.72, + "grad_norm": 0.7616589665412903, + "learning_rate": 3.7643897188655797e-06, + "loss": 2.0521, + "step": 21613 + }, + { + "epoch": 0.72, + "grad_norm": 0.7324743866920471, + "learning_rate": 3.7635588113087906e-06, + "loss": 2.0433, + "step": 21614 + }, + { + "epoch": 0.72, + "grad_norm": 0.732434093952179, + "learning_rate": 3.762727974207655e-06, + "loss": 2.0907, + "step": 21615 + }, + { + "epoch": 0.72, + "grad_norm": 1.3238574266433716, + "learning_rate": 3.7618972075715643e-06, + "loss": 2.0421, + "step": 21616 + }, + { + "epoch": 0.72, + "grad_norm": 0.7584875226020813, + "learning_rate": 3.761066511409909e-06, + "loss": 2.0903, + "step": 21617 + }, + { + "epoch": 0.72, + "grad_norm": 0.7575371861457825, + "learning_rate": 3.760235885732062e-06, + "loss": 2.063, + "step": 21618 + }, + { + "epoch": 0.72, + "grad_norm": 0.7505260705947876, + "learning_rate": 3.759405330547412e-06, + "loss": 2.0031, + "step": 21619 + }, + { + "epoch": 0.72, + "grad_norm": 0.7564098834991455, + "learning_rate": 3.758574845865347e-06, + "loss": 2.0545, + "step": 21620 + }, + { + "epoch": 0.72, + "grad_norm": 0.7262295484542847, + "learning_rate": 3.757744431695243e-06, + "loss": 2.0283, + "step": 21621 + }, + { + "epoch": 0.72, + "grad_norm": 0.774411678314209, + "learning_rate": 3.756914088046487e-06, + "loss": 2.0562, + "step": 21622 + }, + { + "epoch": 0.72, + "grad_norm": 0.7485014200210571, + "learning_rate": 3.7560838149284564e-06, + "loss": 2.0398, + "step": 21623 + }, + { + "epoch": 0.72, + "grad_norm": 0.7545369863510132, + "learning_rate": 3.755253612350528e-06, + "loss": 1.9667, + "step": 21624 + }, + { + "epoch": 0.72, + "grad_norm": 0.7644257545471191, + "learning_rate": 3.7544234803220848e-06, + "loss": 1.9995, + "step": 21625 + }, + { + "epoch": 0.72, + "grad_norm": 0.7792132496833801, + "learning_rate": 3.753593418852509e-06, + "loss": 1.9856, + "step": 21626 + }, + { + "epoch": 0.72, + "grad_norm": 0.7371617555618286, + "learning_rate": 3.752763427951174e-06, + "loss": 2.0818, + "step": 21627 + }, + { + "epoch": 0.72, + "grad_norm": 0.7424009442329407, + "learning_rate": 3.751933507627452e-06, + "loss": 2.0714, + "step": 21628 + }, + { + "epoch": 0.72, + "grad_norm": 0.7716426849365234, + "learning_rate": 3.7511036578907267e-06, + "loss": 2.0761, + "step": 21629 + }, + { + "epoch": 0.72, + "grad_norm": 0.7692919969558716, + "learning_rate": 3.7502738787503677e-06, + "loss": 2.1072, + "step": 21630 + }, + { + "epoch": 0.72, + "grad_norm": 0.741477906703949, + "learning_rate": 3.7494441702157545e-06, + "loss": 2.0018, + "step": 21631 + }, + { + "epoch": 0.72, + "grad_norm": 0.7506579756736755, + "learning_rate": 3.7486145322962555e-06, + "loss": 2.0598, + "step": 21632 + }, + { + "epoch": 0.72, + "grad_norm": 0.7790831923484802, + "learning_rate": 3.747784965001249e-06, + "loss": 2.0586, + "step": 21633 + }, + { + "epoch": 0.72, + "grad_norm": 0.8017479181289673, + "learning_rate": 3.7469554683400987e-06, + "loss": 2.0095, + "step": 21634 + }, + { + "epoch": 0.72, + "grad_norm": 0.7882770299911499, + "learning_rate": 3.7461260423221857e-06, + "loss": 2.1719, + "step": 21635 + }, + { + "epoch": 0.72, + "grad_norm": 0.7295850515365601, + "learning_rate": 3.745296686956876e-06, + "loss": 2.0146, + "step": 21636 + }, + { + "epoch": 0.72, + "grad_norm": 0.7515215873718262, + "learning_rate": 3.7444674022535343e-06, + "loss": 2.0802, + "step": 21637 + }, + { + "epoch": 0.72, + "grad_norm": 0.720660388469696, + "learning_rate": 3.7436381882215343e-06, + "loss": 2.0381, + "step": 21638 + }, + { + "epoch": 0.72, + "grad_norm": 0.7594308853149414, + "learning_rate": 3.742809044870247e-06, + "loss": 2.0652, + "step": 21639 + }, + { + "epoch": 0.72, + "grad_norm": 0.7497535347938538, + "learning_rate": 3.7419799722090356e-06, + "loss": 2.0745, + "step": 21640 + }, + { + "epoch": 0.72, + "grad_norm": 0.7305979132652283, + "learning_rate": 3.741150970247264e-06, + "loss": 2.0846, + "step": 21641 + }, + { + "epoch": 0.72, + "grad_norm": 0.7293744087219238, + "learning_rate": 3.740322038994304e-06, + "loss": 2.011, + "step": 21642 + }, + { + "epoch": 0.72, + "grad_norm": 0.7398267984390259, + "learning_rate": 3.7394931784595132e-06, + "loss": 2.0731, + "step": 21643 + }, + { + "epoch": 0.72, + "grad_norm": 0.7478500008583069, + "learning_rate": 3.7386643886522635e-06, + "loss": 1.961, + "step": 21644 + }, + { + "epoch": 0.72, + "grad_norm": 0.7407857179641724, + "learning_rate": 3.737835669581913e-06, + "loss": 2.0444, + "step": 21645 + }, + { + "epoch": 0.72, + "grad_norm": 0.7605364322662354, + "learning_rate": 3.7370070212578223e-06, + "loss": 2.0855, + "step": 21646 + }, + { + "epoch": 0.72, + "grad_norm": 0.7235673666000366, + "learning_rate": 3.7361784436893554e-06, + "loss": 1.9357, + "step": 21647 + }, + { + "epoch": 0.72, + "grad_norm": 0.7532749176025391, + "learning_rate": 3.7353499368858782e-06, + "loss": 1.9676, + "step": 21648 + }, + { + "epoch": 0.72, + "grad_norm": 0.7507176399230957, + "learning_rate": 3.7345215008567447e-06, + "loss": 2.0896, + "step": 21649 + }, + { + "epoch": 0.72, + "grad_norm": 0.7307401895523071, + "learning_rate": 3.7336931356113127e-06, + "loss": 2.0523, + "step": 21650 + }, + { + "epoch": 0.72, + "grad_norm": 0.773980438709259, + "learning_rate": 3.7328648411589463e-06, + "loss": 2.1132, + "step": 21651 + }, + { + "epoch": 0.72, + "grad_norm": 0.7169507741928101, + "learning_rate": 3.7320366175089962e-06, + "loss": 1.9981, + "step": 21652 + }, + { + "epoch": 0.72, + "grad_norm": 0.7570340633392334, + "learning_rate": 3.731208464670827e-06, + "loss": 2.1106, + "step": 21653 + }, + { + "epoch": 0.72, + "grad_norm": 0.7264828681945801, + "learning_rate": 3.7303803826537867e-06, + "loss": 2.086, + "step": 21654 + }, + { + "epoch": 0.72, + "grad_norm": 0.7211494445800781, + "learning_rate": 3.729552371467239e-06, + "loss": 2.0341, + "step": 21655 + }, + { + "epoch": 0.72, + "grad_norm": 0.7361804842948914, + "learning_rate": 3.7287244311205296e-06, + "loss": 2.0821, + "step": 21656 + }, + { + "epoch": 0.72, + "grad_norm": 0.7795487642288208, + "learning_rate": 3.727896561623019e-06, + "loss": 2.123, + "step": 21657 + }, + { + "epoch": 0.72, + "grad_norm": 0.7395868897438049, + "learning_rate": 3.7270687629840586e-06, + "loss": 1.9913, + "step": 21658 + }, + { + "epoch": 0.72, + "grad_norm": 0.7265045642852783, + "learning_rate": 3.726241035212995e-06, + "loss": 2.0234, + "step": 21659 + }, + { + "epoch": 0.72, + "grad_norm": 0.7518748044967651, + "learning_rate": 3.7254133783191827e-06, + "loss": 2.0581, + "step": 21660 + }, + { + "epoch": 0.72, + "grad_norm": 0.7395328879356384, + "learning_rate": 3.7245857923119775e-06, + "loss": 2.0314, + "step": 21661 + }, + { + "epoch": 0.72, + "grad_norm": 0.7616763114929199, + "learning_rate": 3.723758277200723e-06, + "loss": 2.0566, + "step": 21662 + }, + { + "epoch": 0.72, + "grad_norm": 0.7270215749740601, + "learning_rate": 3.7229308329947665e-06, + "loss": 2.0851, + "step": 21663 + }, + { + "epoch": 0.72, + "grad_norm": 0.7578681707382202, + "learning_rate": 3.7221034597034624e-06, + "loss": 2.0584, + "step": 21664 + }, + { + "epoch": 0.72, + "grad_norm": 0.7401824593544006, + "learning_rate": 3.72127615733615e-06, + "loss": 2.015, + "step": 21665 + }, + { + "epoch": 0.72, + "grad_norm": 0.7593291401863098, + "learning_rate": 3.720448925902185e-06, + "loss": 2.0208, + "step": 21666 + }, + { + "epoch": 0.72, + "grad_norm": 0.7248796224594116, + "learning_rate": 3.719621765410906e-06, + "loss": 2.053, + "step": 21667 + }, + { + "epoch": 0.72, + "grad_norm": 0.7829385995864868, + "learning_rate": 3.7187946758716563e-06, + "loss": 2.0669, + "step": 21668 + }, + { + "epoch": 0.72, + "grad_norm": 0.7575364708900452, + "learning_rate": 3.7179676572937838e-06, + "loss": 2.1045, + "step": 21669 + }, + { + "epoch": 0.72, + "grad_norm": 0.7770982384681702, + "learning_rate": 3.717140709686635e-06, + "loss": 2.094, + "step": 21670 + }, + { + "epoch": 0.72, + "grad_norm": 0.7504127025604248, + "learning_rate": 3.7163138330595473e-06, + "loss": 2.0694, + "step": 21671 + }, + { + "epoch": 0.72, + "grad_norm": 0.7380024194717407, + "learning_rate": 3.71548702742186e-06, + "loss": 1.9964, + "step": 21672 + }, + { + "epoch": 0.72, + "grad_norm": 0.7207601070404053, + "learning_rate": 3.7146602927829178e-06, + "loss": 2.0068, + "step": 21673 + }, + { + "epoch": 0.72, + "grad_norm": 0.7460630536079407, + "learning_rate": 3.713833629152064e-06, + "loss": 2.0684, + "step": 21674 + }, + { + "epoch": 0.72, + "grad_norm": 0.7658509612083435, + "learning_rate": 3.713007036538633e-06, + "loss": 2.0315, + "step": 21675 + }, + { + "epoch": 0.72, + "grad_norm": 0.7567182183265686, + "learning_rate": 3.7121805149519596e-06, + "loss": 2.025, + "step": 21676 + }, + { + "epoch": 0.72, + "grad_norm": 0.7388661503791809, + "learning_rate": 3.711354064401391e-06, + "loss": 2.0786, + "step": 21677 + }, + { + "epoch": 0.72, + "grad_norm": 0.7182818055152893, + "learning_rate": 3.7105276848962535e-06, + "loss": 2.0318, + "step": 21678 + }, + { + "epoch": 0.72, + "grad_norm": 0.7276864051818848, + "learning_rate": 3.7097013764458935e-06, + "loss": 2.0892, + "step": 21679 + }, + { + "epoch": 0.72, + "grad_norm": 0.7617847919464111, + "learning_rate": 3.708875139059639e-06, + "loss": 2.051, + "step": 21680 + }, + { + "epoch": 0.72, + "grad_norm": 0.7668141722679138, + "learning_rate": 3.708048972746824e-06, + "loss": 2.0327, + "step": 21681 + }, + { + "epoch": 0.72, + "grad_norm": 0.7541234493255615, + "learning_rate": 3.707222877516784e-06, + "loss": 1.9961, + "step": 21682 + }, + { + "epoch": 0.72, + "grad_norm": 0.7529972195625305, + "learning_rate": 3.706396853378855e-06, + "loss": 2.0738, + "step": 21683 + }, + { + "epoch": 0.72, + "grad_norm": 0.7632958889007568, + "learning_rate": 3.705570900342367e-06, + "loss": 1.9683, + "step": 21684 + }, + { + "epoch": 0.72, + "grad_norm": 0.7876235246658325, + "learning_rate": 3.7047450184166457e-06, + "loss": 2.0481, + "step": 21685 + }, + { + "epoch": 0.72, + "grad_norm": 0.7209914922714233, + "learning_rate": 3.70391920761103e-06, + "loss": 2.0077, + "step": 21686 + }, + { + "epoch": 0.72, + "grad_norm": 0.7664456367492676, + "learning_rate": 3.703093467934841e-06, + "loss": 2.0344, + "step": 21687 + }, + { + "epoch": 0.72, + "grad_norm": 0.7390437722206116, + "learning_rate": 3.702267799397414e-06, + "loss": 2.0167, + "step": 21688 + }, + { + "epoch": 0.72, + "grad_norm": 0.7355640530586243, + "learning_rate": 3.7014422020080733e-06, + "loss": 1.9747, + "step": 21689 + }, + { + "epoch": 0.72, + "grad_norm": 0.7546474933624268, + "learning_rate": 3.7006166757761496e-06, + "loss": 2.1081, + "step": 21690 + }, + { + "epoch": 0.72, + "grad_norm": 0.7680472135543823, + "learning_rate": 3.6997912207109644e-06, + "loss": 2.1282, + "step": 21691 + }, + { + "epoch": 0.72, + "grad_norm": 0.735420286655426, + "learning_rate": 3.6989658368218484e-06, + "loss": 2.0238, + "step": 21692 + }, + { + "epoch": 0.72, + "grad_norm": 0.7936490774154663, + "learning_rate": 3.6981405241181232e-06, + "loss": 2.0866, + "step": 21693 + }, + { + "epoch": 0.72, + "grad_norm": 0.7604075074195862, + "learning_rate": 3.6973152826091106e-06, + "loss": 2.0981, + "step": 21694 + }, + { + "epoch": 0.72, + "grad_norm": 0.7325469255447388, + "learning_rate": 3.696490112304135e-06, + "loss": 2.0413, + "step": 21695 + }, + { + "epoch": 0.72, + "grad_norm": 0.7955654859542847, + "learning_rate": 3.695665013212525e-06, + "loss": 2.1248, + "step": 21696 + }, + { + "epoch": 0.72, + "grad_norm": 0.7450180053710938, + "learning_rate": 3.694839985343596e-06, + "loss": 2.0153, + "step": 21697 + }, + { + "epoch": 0.72, + "grad_norm": 0.742081344127655, + "learning_rate": 3.6940150287066656e-06, + "loss": 2.0598, + "step": 21698 + }, + { + "epoch": 0.72, + "grad_norm": 0.7770227789878845, + "learning_rate": 3.6931901433110627e-06, + "loss": 2.1071, + "step": 21699 + }, + { + "epoch": 0.72, + "grad_norm": 0.7610399723052979, + "learning_rate": 3.6923653291660967e-06, + "loss": 2.103, + "step": 21700 + }, + { + "epoch": 0.72, + "grad_norm": 0.734785795211792, + "learning_rate": 3.691540586281095e-06, + "loss": 2.0988, + "step": 21701 + }, + { + "epoch": 0.72, + "grad_norm": 0.7467179298400879, + "learning_rate": 3.6907159146653694e-06, + "loss": 2.0876, + "step": 21702 + }, + { + "epoch": 0.72, + "grad_norm": 0.7484380602836609, + "learning_rate": 3.6898913143282355e-06, + "loss": 2.061, + "step": 21703 + }, + { + "epoch": 0.72, + "grad_norm": 0.7441505789756775, + "learning_rate": 3.6890667852790106e-06, + "loss": 2.0405, + "step": 21704 + }, + { + "epoch": 0.72, + "grad_norm": 0.7350597977638245, + "learning_rate": 3.688242327527014e-06, + "loss": 2.0346, + "step": 21705 + }, + { + "epoch": 0.72, + "grad_norm": 0.7352396845817566, + "learning_rate": 3.687417941081557e-06, + "loss": 2.0013, + "step": 21706 + }, + { + "epoch": 0.72, + "grad_norm": 0.7347570657730103, + "learning_rate": 3.68659362595195e-06, + "loss": 2.0555, + "step": 21707 + }, + { + "epoch": 0.72, + "grad_norm": 0.7444417476654053, + "learning_rate": 3.6857693821475104e-06, + "loss": 2.11, + "step": 21708 + }, + { + "epoch": 0.72, + "grad_norm": 0.7247772216796875, + "learning_rate": 3.684945209677544e-06, + "loss": 2.0183, + "step": 21709 + }, + { + "epoch": 0.72, + "grad_norm": 0.7479152083396912, + "learning_rate": 3.6841211085513705e-06, + "loss": 2.077, + "step": 21710 + }, + { + "epoch": 0.72, + "grad_norm": 0.7393128275871277, + "learning_rate": 3.683297078778291e-06, + "loss": 2.0611, + "step": 21711 + }, + { + "epoch": 0.72, + "grad_norm": 0.7576247453689575, + "learning_rate": 3.6824731203676223e-06, + "loss": 2.0206, + "step": 21712 + }, + { + "epoch": 0.72, + "grad_norm": 0.7845340967178345, + "learning_rate": 3.681649233328667e-06, + "loss": 2.0926, + "step": 21713 + }, + { + "epoch": 0.72, + "grad_norm": 0.7492756247520447, + "learning_rate": 3.6808254176707393e-06, + "loss": 2.0818, + "step": 21714 + }, + { + "epoch": 0.72, + "grad_norm": 0.743295431137085, + "learning_rate": 3.680001673403142e-06, + "loss": 2.0029, + "step": 21715 + }, + { + "epoch": 0.72, + "grad_norm": 0.7412112951278687, + "learning_rate": 3.6791780005351784e-06, + "loss": 2.0743, + "step": 21716 + }, + { + "epoch": 0.72, + "grad_norm": 0.7301713228225708, + "learning_rate": 3.6783543990761585e-06, + "loss": 2.021, + "step": 21717 + }, + { + "epoch": 0.72, + "grad_norm": 0.7687027454376221, + "learning_rate": 3.677530869035388e-06, + "loss": 2.0413, + "step": 21718 + }, + { + "epoch": 0.72, + "grad_norm": 0.7344357371330261, + "learning_rate": 3.676707410422169e-06, + "loss": 2.0372, + "step": 21719 + }, + { + "epoch": 0.72, + "grad_norm": 0.7597854137420654, + "learning_rate": 3.6758840232458005e-06, + "loss": 2.1309, + "step": 21720 + }, + { + "epoch": 0.72, + "grad_norm": 0.7847921848297119, + "learning_rate": 3.6750607075155907e-06, + "loss": 2.0378, + "step": 21721 + }, + { + "epoch": 0.72, + "grad_norm": 0.7579687833786011, + "learning_rate": 3.674237463240835e-06, + "loss": 2.0243, + "step": 21722 + }, + { + "epoch": 0.72, + "grad_norm": 0.7426260113716125, + "learning_rate": 3.673414290430838e-06, + "loss": 2.0559, + "step": 21723 + }, + { + "epoch": 0.72, + "grad_norm": 0.721921980381012, + "learning_rate": 3.6725911890949053e-06, + "loss": 2.0581, + "step": 21724 + }, + { + "epoch": 0.72, + "grad_norm": 0.7688610553741455, + "learning_rate": 3.6717681592423217e-06, + "loss": 2.115, + "step": 21725 + }, + { + "epoch": 0.72, + "grad_norm": 0.7677981853485107, + "learning_rate": 3.670945200882393e-06, + "loss": 2.0566, + "step": 21726 + }, + { + "epoch": 0.72, + "grad_norm": 0.6997689604759216, + "learning_rate": 3.670122314024419e-06, + "loss": 1.9801, + "step": 21727 + }, + { + "epoch": 0.72, + "grad_norm": 0.727685272693634, + "learning_rate": 3.6692994986776944e-06, + "loss": 1.9876, + "step": 21728 + }, + { + "epoch": 0.72, + "grad_norm": 0.7522323727607727, + "learning_rate": 3.66847675485151e-06, + "loss": 1.9586, + "step": 21729 + }, + { + "epoch": 0.72, + "grad_norm": 0.7319626808166504, + "learning_rate": 3.6676540825551676e-06, + "loss": 1.9768, + "step": 21730 + }, + { + "epoch": 0.72, + "grad_norm": 0.728995680809021, + "learning_rate": 3.6668314817979554e-06, + "loss": 2.0806, + "step": 21731 + }, + { + "epoch": 0.72, + "grad_norm": 0.7291548848152161, + "learning_rate": 3.666008952589173e-06, + "loss": 1.9802, + "step": 21732 + }, + { + "epoch": 0.72, + "grad_norm": 0.7720770239830017, + "learning_rate": 3.6651864949381068e-06, + "loss": 1.9775, + "step": 21733 + }, + { + "epoch": 0.72, + "grad_norm": 0.774888813495636, + "learning_rate": 3.6643641088540526e-06, + "loss": 2.1124, + "step": 21734 + }, + { + "epoch": 0.72, + "grad_norm": 0.7967333197593689, + "learning_rate": 3.663541794346297e-06, + "loss": 2.1005, + "step": 21735 + }, + { + "epoch": 0.72, + "grad_norm": 0.7408243417739868, + "learning_rate": 3.6627195514241365e-06, + "loss": 2.0343, + "step": 21736 + }, + { + "epoch": 0.72, + "grad_norm": 0.7290113568305969, + "learning_rate": 3.661897380096856e-06, + "loss": 2.0116, + "step": 21737 + }, + { + "epoch": 0.72, + "grad_norm": 0.819386899471283, + "learning_rate": 3.6610752803737415e-06, + "loss": 2.0024, + "step": 21738 + }, + { + "epoch": 0.72, + "grad_norm": 0.7865351438522339, + "learning_rate": 3.660253252264083e-06, + "loss": 2.0126, + "step": 21739 + }, + { + "epoch": 0.72, + "grad_norm": 0.7646365165710449, + "learning_rate": 3.6594312957771716e-06, + "loss": 2.0702, + "step": 21740 + }, + { + "epoch": 0.72, + "grad_norm": 0.7418750524520874, + "learning_rate": 3.6586094109222893e-06, + "loss": 2.1008, + "step": 21741 + }, + { + "epoch": 0.72, + "grad_norm": 0.7796114087104797, + "learning_rate": 3.6577875977087186e-06, + "loss": 2.0482, + "step": 21742 + }, + { + "epoch": 0.72, + "grad_norm": 0.7574689984321594, + "learning_rate": 3.6569658561457487e-06, + "loss": 2.0993, + "step": 21743 + }, + { + "epoch": 0.72, + "grad_norm": 0.7521612048149109, + "learning_rate": 3.6561441862426593e-06, + "loss": 2.0342, + "step": 21744 + }, + { + "epoch": 0.72, + "grad_norm": 0.7477236390113831, + "learning_rate": 3.655322588008734e-06, + "loss": 2.0391, + "step": 21745 + }, + { + "epoch": 0.72, + "grad_norm": 0.738766074180603, + "learning_rate": 3.654501061453263e-06, + "loss": 2.0567, + "step": 21746 + }, + { + "epoch": 0.72, + "grad_norm": 0.762089192867279, + "learning_rate": 3.653679606585513e-06, + "loss": 2.0237, + "step": 21747 + }, + { + "epoch": 0.72, + "grad_norm": 0.7420791983604431, + "learning_rate": 3.6528582234147725e-06, + "loss": 2.006, + "step": 21748 + }, + { + "epoch": 0.72, + "grad_norm": 0.7567542791366577, + "learning_rate": 3.6520369119503228e-06, + "loss": 2.048, + "step": 21749 + }, + { + "epoch": 0.72, + "grad_norm": 0.7451452612876892, + "learning_rate": 3.6512156722014392e-06, + "loss": 2.0402, + "step": 21750 + }, + { + "epoch": 0.72, + "grad_norm": 0.7255205512046814, + "learning_rate": 3.650394504177397e-06, + "loss": 2.0051, + "step": 21751 + }, + { + "epoch": 0.72, + "grad_norm": 0.7352153062820435, + "learning_rate": 3.64957340788748e-06, + "loss": 2.0116, + "step": 21752 + }, + { + "epoch": 0.72, + "grad_norm": 0.7253125905990601, + "learning_rate": 3.6487523833409577e-06, + "loss": 2.0115, + "step": 21753 + }, + { + "epoch": 0.72, + "grad_norm": 0.7490274310112, + "learning_rate": 3.6479314305471093e-06, + "loss": 2.0185, + "step": 21754 + }, + { + "epoch": 0.72, + "grad_norm": 0.7254589796066284, + "learning_rate": 3.6471105495152114e-06, + "loss": 1.9686, + "step": 21755 + }, + { + "epoch": 0.72, + "grad_norm": 0.7806925177574158, + "learning_rate": 3.646289740254536e-06, + "loss": 2.011, + "step": 21756 + }, + { + "epoch": 0.72, + "grad_norm": 0.7477225661277771, + "learning_rate": 3.645469002774352e-06, + "loss": 2.0856, + "step": 21757 + }, + { + "epoch": 0.72, + "grad_norm": 0.7251269817352295, + "learning_rate": 3.6446483370839347e-06, + "loss": 2.0422, + "step": 21758 + }, + { + "epoch": 0.72, + "grad_norm": 0.7578094601631165, + "learning_rate": 3.643827743192563e-06, + "loss": 2.0922, + "step": 21759 + }, + { + "epoch": 0.72, + "grad_norm": 0.7645106315612793, + "learning_rate": 3.6430072211094938e-06, + "loss": 2.0263, + "step": 21760 + }, + { + "epoch": 0.72, + "grad_norm": 0.745076060295105, + "learning_rate": 3.642186770844003e-06, + "loss": 2.0871, + "step": 21761 + }, + { + "epoch": 0.72, + "grad_norm": 0.7257620096206665, + "learning_rate": 3.6413663924053633e-06, + "loss": 2.0185, + "step": 21762 + }, + { + "epoch": 0.72, + "grad_norm": 0.7563227415084839, + "learning_rate": 3.6405460858028398e-06, + "loss": 2.0097, + "step": 21763 + }, + { + "epoch": 0.72, + "grad_norm": 0.739338755607605, + "learning_rate": 3.6397258510456957e-06, + "loss": 2.0765, + "step": 21764 + }, + { + "epoch": 0.72, + "grad_norm": 0.7878612279891968, + "learning_rate": 3.6389056881432048e-06, + "loss": 2.1452, + "step": 21765 + }, + { + "epoch": 0.72, + "grad_norm": 0.7562803626060486, + "learning_rate": 3.638085597104627e-06, + "loss": 2.0287, + "step": 21766 + }, + { + "epoch": 0.72, + "grad_norm": 0.7337028980255127, + "learning_rate": 3.63726557793923e-06, + "loss": 2.0919, + "step": 21767 + }, + { + "epoch": 0.72, + "grad_norm": 0.7751567363739014, + "learning_rate": 3.6364456306562834e-06, + "loss": 2.1517, + "step": 21768 + }, + { + "epoch": 0.72, + "grad_norm": 0.766032338142395, + "learning_rate": 3.6356257552650378e-06, + "loss": 2.1445, + "step": 21769 + }, + { + "epoch": 0.72, + "grad_norm": 0.7341062426567078, + "learning_rate": 3.6348059517747624e-06, + "loss": 2.0642, + "step": 21770 + }, + { + "epoch": 0.72, + "grad_norm": 0.7230467796325684, + "learning_rate": 3.633986220194723e-06, + "loss": 2.0457, + "step": 21771 + }, + { + "epoch": 0.72, + "grad_norm": 0.7464435696601868, + "learning_rate": 3.633166560534177e-06, + "loss": 2.0358, + "step": 21772 + }, + { + "epoch": 0.72, + "grad_norm": 0.8250384330749512, + "learning_rate": 3.6323469728023796e-06, + "loss": 1.977, + "step": 21773 + }, + { + "epoch": 0.72, + "grad_norm": 0.733219563961029, + "learning_rate": 3.6315274570085947e-06, + "loss": 2.0779, + "step": 21774 + }, + { + "epoch": 0.72, + "grad_norm": 0.7792534828186035, + "learning_rate": 3.630708013162083e-06, + "loss": 2.0423, + "step": 21775 + }, + { + "epoch": 0.72, + "grad_norm": 0.7413418292999268, + "learning_rate": 3.629888641272097e-06, + "loss": 2.0345, + "step": 21776 + }, + { + "epoch": 0.72, + "grad_norm": 0.7510575652122498, + "learning_rate": 3.6290693413478982e-06, + "loss": 2.0098, + "step": 21777 + }, + { + "epoch": 0.72, + "grad_norm": 0.738841712474823, + "learning_rate": 3.62825011339874e-06, + "loss": 2.0803, + "step": 21778 + }, + { + "epoch": 0.72, + "grad_norm": 0.7418696880340576, + "learning_rate": 3.6274309574338763e-06, + "loss": 1.9769, + "step": 21779 + }, + { + "epoch": 0.72, + "grad_norm": 0.7196971774101257, + "learning_rate": 3.626611873462561e-06, + "loss": 2.0892, + "step": 21780 + }, + { + "epoch": 0.72, + "grad_norm": 0.7523905634880066, + "learning_rate": 3.6257928614940573e-06, + "loss": 2.0395, + "step": 21781 + }, + { + "epoch": 0.72, + "grad_norm": 0.7083031535148621, + "learning_rate": 3.6249739215376035e-06, + "loss": 2.0097, + "step": 21782 + }, + { + "epoch": 0.72, + "grad_norm": 0.7924720644950867, + "learning_rate": 3.6241550536024584e-06, + "loss": 2.0619, + "step": 21783 + }, + { + "epoch": 0.72, + "grad_norm": 0.7433786392211914, + "learning_rate": 3.6233362576978758e-06, + "loss": 2.1054, + "step": 21784 + }, + { + "epoch": 0.72, + "grad_norm": 0.7283850312232971, + "learning_rate": 3.6225175338330997e-06, + "loss": 2.046, + "step": 21785 + }, + { + "epoch": 0.72, + "grad_norm": 0.7239839434623718, + "learning_rate": 3.621698882017386e-06, + "loss": 2.077, + "step": 21786 + }, + { + "epoch": 0.72, + "grad_norm": 0.7538217306137085, + "learning_rate": 3.6208803022599805e-06, + "loss": 1.9973, + "step": 21787 + }, + { + "epoch": 0.72, + "grad_norm": 0.7495763301849365, + "learning_rate": 3.6200617945701277e-06, + "loss": 2.1001, + "step": 21788 + }, + { + "epoch": 0.72, + "grad_norm": 0.7470661997795105, + "learning_rate": 3.6192433589570773e-06, + "loss": 2.0095, + "step": 21789 + }, + { + "epoch": 0.72, + "grad_norm": 0.7537137866020203, + "learning_rate": 3.61842499543008e-06, + "loss": 2.0895, + "step": 21790 + }, + { + "epoch": 0.72, + "grad_norm": 0.7776414155960083, + "learning_rate": 3.6176067039983763e-06, + "loss": 2.0601, + "step": 21791 + }, + { + "epoch": 0.73, + "grad_norm": 0.702070415019989, + "learning_rate": 3.616788484671209e-06, + "loss": 2.043, + "step": 21792 + }, + { + "epoch": 0.73, + "grad_norm": 0.7186402678489685, + "learning_rate": 3.615970337457828e-06, + "loss": 2.0689, + "step": 21793 + }, + { + "epoch": 0.73, + "grad_norm": 0.7302215695381165, + "learning_rate": 3.6151522623674717e-06, + "loss": 2.0362, + "step": 21794 + }, + { + "epoch": 0.73, + "grad_norm": 0.7680872082710266, + "learning_rate": 3.614334259409381e-06, + "loss": 2.0204, + "step": 21795 + }, + { + "epoch": 0.73, + "grad_norm": 0.7586742639541626, + "learning_rate": 3.6135163285927987e-06, + "loss": 2.0576, + "step": 21796 + }, + { + "epoch": 0.73, + "grad_norm": 0.762871265411377, + "learning_rate": 3.6126984699269696e-06, + "loss": 2.077, + "step": 21797 + }, + { + "epoch": 0.73, + "grad_norm": 0.7418943047523499, + "learning_rate": 3.611880683421126e-06, + "loss": 2.1079, + "step": 21798 + }, + { + "epoch": 0.73, + "grad_norm": 0.7461593151092529, + "learning_rate": 3.6110629690845147e-06, + "loss": 2.0769, + "step": 21799 + }, + { + "epoch": 0.73, + "grad_norm": 0.7487608790397644, + "learning_rate": 3.6102453269263695e-06, + "loss": 2.0353, + "step": 21800 + }, + { + "epoch": 0.73, + "grad_norm": 0.7583315968513489, + "learning_rate": 3.6094277569559245e-06, + "loss": 2.0208, + "step": 21801 + }, + { + "epoch": 0.73, + "grad_norm": 0.7332434058189392, + "learning_rate": 3.60861025918242e-06, + "loss": 2.0188, + "step": 21802 + }, + { + "epoch": 0.73, + "grad_norm": 0.7511343359947205, + "learning_rate": 3.607792833615097e-06, + "loss": 2.0375, + "step": 21803 + }, + { + "epoch": 0.73, + "grad_norm": 0.7405607104301453, + "learning_rate": 3.6069754802631773e-06, + "loss": 1.9913, + "step": 21804 + }, + { + "epoch": 0.73, + "grad_norm": 0.778723955154419, + "learning_rate": 3.606158199135903e-06, + "loss": 2.1151, + "step": 21805 + }, + { + "epoch": 0.73, + "grad_norm": 0.7166258096694946, + "learning_rate": 3.6053409902425096e-06, + "loss": 2.0422, + "step": 21806 + }, + { + "epoch": 0.73, + "grad_norm": 0.7846384644508362, + "learning_rate": 3.604523853592222e-06, + "loss": 2.0132, + "step": 21807 + }, + { + "epoch": 0.73, + "grad_norm": 0.7842736840248108, + "learning_rate": 3.603706789194279e-06, + "loss": 2.0872, + "step": 21808 + }, + { + "epoch": 0.73, + "grad_norm": 0.7671645283699036, + "learning_rate": 3.602889797057909e-06, + "loss": 2.04, + "step": 21809 + }, + { + "epoch": 0.73, + "grad_norm": 0.7349531650543213, + "learning_rate": 3.602072877192336e-06, + "loss": 2.0487, + "step": 21810 + }, + { + "epoch": 0.73, + "grad_norm": 0.7678916454315186, + "learning_rate": 3.6012560296067955e-06, + "loss": 2.0043, + "step": 21811 + }, + { + "epoch": 0.73, + "grad_norm": 0.7576454281806946, + "learning_rate": 3.6004392543105182e-06, + "loss": 2.0563, + "step": 21812 + }, + { + "epoch": 0.73, + "grad_norm": 0.7245506644248962, + "learning_rate": 3.599622551312726e-06, + "loss": 2.0552, + "step": 21813 + }, + { + "epoch": 0.73, + "grad_norm": 0.7298718690872192, + "learning_rate": 3.5988059206226455e-06, + "loss": 2.0687, + "step": 21814 + }, + { + "epoch": 0.73, + "grad_norm": 0.7548322081565857, + "learning_rate": 3.5979893622495065e-06, + "loss": 1.9764, + "step": 21815 + }, + { + "epoch": 0.73, + "grad_norm": 0.7611265778541565, + "learning_rate": 3.5971728762025314e-06, + "loss": 2.0329, + "step": 21816 + }, + { + "epoch": 0.73, + "grad_norm": 0.757684588432312, + "learning_rate": 3.596356462490943e-06, + "loss": 2.0344, + "step": 21817 + }, + { + "epoch": 0.73, + "grad_norm": 0.7353269457817078, + "learning_rate": 3.595540121123965e-06, + "loss": 2.0764, + "step": 21818 + }, + { + "epoch": 0.73, + "grad_norm": 0.7585800290107727, + "learning_rate": 3.594723852110825e-06, + "loss": 2.0098, + "step": 21819 + }, + { + "epoch": 0.73, + "grad_norm": 0.7482415437698364, + "learning_rate": 3.5939076554607376e-06, + "loss": 2.0728, + "step": 21820 + }, + { + "epoch": 0.73, + "grad_norm": 0.7166197896003723, + "learning_rate": 3.5930915311829306e-06, + "loss": 2.0238, + "step": 21821 + }, + { + "epoch": 0.73, + "grad_norm": 0.7556038498878479, + "learning_rate": 3.592275479286621e-06, + "loss": 2.0252, + "step": 21822 + }, + { + "epoch": 0.73, + "grad_norm": 0.7464967370033264, + "learning_rate": 3.591459499781025e-06, + "loss": 1.9925, + "step": 21823 + }, + { + "epoch": 0.73, + "grad_norm": 0.8110756278038025, + "learning_rate": 3.5906435926753624e-06, + "loss": 2.0584, + "step": 21824 + }, + { + "epoch": 0.73, + "grad_norm": 0.7550192475318909, + "learning_rate": 3.5898277579788598e-06, + "loss": 2.1613, + "step": 21825 + }, + { + "epoch": 0.73, + "grad_norm": 0.7350652813911438, + "learning_rate": 3.5890119957007184e-06, + "loss": 2.0342, + "step": 21826 + }, + { + "epoch": 0.73, + "grad_norm": 0.7668139934539795, + "learning_rate": 3.5881963058501647e-06, + "loss": 2.0434, + "step": 21827 + }, + { + "epoch": 0.73, + "grad_norm": 0.7406257390975952, + "learning_rate": 3.5873806884364125e-06, + "loss": 2.1098, + "step": 21828 + }, + { + "epoch": 0.73, + "grad_norm": 0.7229348421096802, + "learning_rate": 3.586565143468673e-06, + "loss": 2.0341, + "step": 21829 + }, + { + "epoch": 0.73, + "grad_norm": 0.7453036904335022, + "learning_rate": 3.5857496709561645e-06, + "loss": 1.9912, + "step": 21830 + }, + { + "epoch": 0.73, + "grad_norm": 0.7564787864685059, + "learning_rate": 3.5849342709080983e-06, + "loss": 2.0629, + "step": 21831 + }, + { + "epoch": 0.73, + "grad_norm": 0.742656409740448, + "learning_rate": 3.584118943333681e-06, + "loss": 2.0754, + "step": 21832 + }, + { + "epoch": 0.73, + "grad_norm": 0.7487382888793945, + "learning_rate": 3.583303688242127e-06, + "loss": 2.1315, + "step": 21833 + }, + { + "epoch": 0.73, + "grad_norm": 0.7510069608688354, + "learning_rate": 3.5824885056426516e-06, + "loss": 1.9479, + "step": 21834 + }, + { + "epoch": 0.73, + "grad_norm": 0.7455934882164001, + "learning_rate": 3.5816733955444606e-06, + "loss": 2.0564, + "step": 21835 + }, + { + "epoch": 0.73, + "grad_norm": 0.7172040343284607, + "learning_rate": 3.580858357956758e-06, + "loss": 2.005, + "step": 21836 + }, + { + "epoch": 0.73, + "grad_norm": 0.746868371963501, + "learning_rate": 3.580043392888759e-06, + "loss": 2.038, + "step": 21837 + }, + { + "epoch": 0.73, + "grad_norm": 0.7526938319206238, + "learning_rate": 3.579228500349664e-06, + "loss": 2.1281, + "step": 21838 + }, + { + "epoch": 0.73, + "grad_norm": 0.7642523646354675, + "learning_rate": 3.5784136803486858e-06, + "loss": 2.0933, + "step": 21839 + }, + { + "epoch": 0.73, + "grad_norm": 0.7245362401008606, + "learning_rate": 3.5775989328950235e-06, + "loss": 2.0462, + "step": 21840 + }, + { + "epoch": 0.73, + "grad_norm": 0.7671468257904053, + "learning_rate": 3.576784257997887e-06, + "loss": 2.0192, + "step": 21841 + }, + { + "epoch": 0.73, + "grad_norm": 0.762110710144043, + "learning_rate": 3.5759696556664746e-06, + "loss": 2.0306, + "step": 21842 + }, + { + "epoch": 0.73, + "grad_norm": 0.7455496788024902, + "learning_rate": 3.575155125909996e-06, + "loss": 2.0563, + "step": 21843 + }, + { + "epoch": 0.73, + "grad_norm": 0.7331894040107727, + "learning_rate": 3.574340668737649e-06, + "loss": 2.1, + "step": 21844 + }, + { + "epoch": 0.73, + "grad_norm": 0.7222306132316589, + "learning_rate": 3.5735262841586317e-06, + "loss": 1.9978, + "step": 21845 + }, + { + "epoch": 0.73, + "grad_norm": 0.7299706935882568, + "learning_rate": 3.572711972182149e-06, + "loss": 2.0393, + "step": 21846 + }, + { + "epoch": 0.73, + "grad_norm": 0.7208203673362732, + "learning_rate": 3.5718977328174053e-06, + "loss": 2.0771, + "step": 21847 + }, + { + "epoch": 0.73, + "grad_norm": 0.7589237093925476, + "learning_rate": 3.571083566073589e-06, + "loss": 2.0445, + "step": 21848 + }, + { + "epoch": 0.73, + "grad_norm": 0.7421824336051941, + "learning_rate": 3.5702694719599008e-06, + "loss": 2.0255, + "step": 21849 + }, + { + "epoch": 0.73, + "grad_norm": 0.7436151504516602, + "learning_rate": 3.5694554504855437e-06, + "loss": 2.0529, + "step": 21850 + }, + { + "epoch": 0.73, + "grad_norm": 0.749893069267273, + "learning_rate": 3.5686415016597075e-06, + "loss": 2.1168, + "step": 21851 + }, + { + "epoch": 0.73, + "grad_norm": 0.7367182970046997, + "learning_rate": 3.5678276254915935e-06, + "loss": 2.0283, + "step": 21852 + }, + { + "epoch": 0.73, + "grad_norm": 0.7289462685585022, + "learning_rate": 3.5670138219903937e-06, + "loss": 2.0183, + "step": 21853 + }, + { + "epoch": 0.73, + "grad_norm": 0.7591384649276733, + "learning_rate": 3.5662000911652983e-06, + "loss": 2.0896, + "step": 21854 + }, + { + "epoch": 0.73, + "grad_norm": 0.725933313369751, + "learning_rate": 3.565386433025503e-06, + "loss": 2.0386, + "step": 21855 + }, + { + "epoch": 0.73, + "grad_norm": 0.7601847052574158, + "learning_rate": 3.5645728475802044e-06, + "loss": 2.0523, + "step": 21856 + }, + { + "epoch": 0.73, + "grad_norm": 0.7530854940414429, + "learning_rate": 3.5637593348385903e-06, + "loss": 2.0201, + "step": 21857 + }, + { + "epoch": 0.73, + "grad_norm": 0.7309998273849487, + "learning_rate": 3.5629458948098483e-06, + "loss": 2.0455, + "step": 21858 + }, + { + "epoch": 0.73, + "grad_norm": 0.7353950142860413, + "learning_rate": 3.5621325275031703e-06, + "loss": 1.9886, + "step": 21859 + }, + { + "epoch": 0.73, + "grad_norm": 0.7321322560310364, + "learning_rate": 3.56131923292775e-06, + "loss": 1.9738, + "step": 21860 + }, + { + "epoch": 0.73, + "grad_norm": 0.7233182787895203, + "learning_rate": 3.5605060110927712e-06, + "loss": 1.9906, + "step": 21861 + }, + { + "epoch": 0.73, + "grad_norm": 0.7458654642105103, + "learning_rate": 3.5596928620074176e-06, + "loss": 2.0013, + "step": 21862 + }, + { + "epoch": 0.73, + "grad_norm": 0.774597704410553, + "learning_rate": 3.5588797856808842e-06, + "loss": 2.0156, + "step": 21863 + }, + { + "epoch": 0.73, + "grad_norm": 0.7273744344711304, + "learning_rate": 3.558066782122348e-06, + "loss": 2.0802, + "step": 21864 + }, + { + "epoch": 0.73, + "grad_norm": 0.7335337400436401, + "learning_rate": 3.5572538513410026e-06, + "loss": 2.0302, + "step": 21865 + }, + { + "epoch": 0.73, + "grad_norm": 0.7475301027297974, + "learning_rate": 3.5564409933460264e-06, + "loss": 1.9859, + "step": 21866 + }, + { + "epoch": 0.73, + "grad_norm": 0.7385428547859192, + "learning_rate": 3.5556282081466e-06, + "loss": 2.0664, + "step": 21867 + }, + { + "epoch": 0.73, + "grad_norm": 0.7578171491622925, + "learning_rate": 3.5548154957519097e-06, + "loss": 1.9534, + "step": 21868 + }, + { + "epoch": 0.73, + "grad_norm": 0.7541801333427429, + "learning_rate": 3.554002856171139e-06, + "loss": 2.0334, + "step": 21869 + }, + { + "epoch": 0.73, + "grad_norm": 0.7522043585777283, + "learning_rate": 3.5531902894134672e-06, + "loss": 2.0193, + "step": 21870 + }, + { + "epoch": 0.73, + "grad_norm": 0.7453630566596985, + "learning_rate": 3.5523777954880702e-06, + "loss": 1.9853, + "step": 21871 + }, + { + "epoch": 0.73, + "grad_norm": 0.7678565382957458, + "learning_rate": 3.5515653744041334e-06, + "loss": 2.0773, + "step": 21872 + }, + { + "epoch": 0.73, + "grad_norm": 0.7546950578689575, + "learning_rate": 3.5507530261708288e-06, + "loss": 2.0708, + "step": 21873 + }, + { + "epoch": 0.73, + "grad_norm": 0.7173621654510498, + "learning_rate": 3.5499407507973395e-06, + "loss": 2.0942, + "step": 21874 + }, + { + "epoch": 0.73, + "grad_norm": 0.7275766730308533, + "learning_rate": 3.549128548292836e-06, + "loss": 2.0826, + "step": 21875 + }, + { + "epoch": 0.73, + "grad_norm": 0.7295228838920593, + "learning_rate": 3.548316418666502e-06, + "loss": 1.9741, + "step": 21876 + }, + { + "epoch": 0.73, + "grad_norm": 0.7192540168762207, + "learning_rate": 3.547504361927504e-06, + "loss": 2.068, + "step": 21877 + }, + { + "epoch": 0.73, + "grad_norm": 0.7474537491798401, + "learning_rate": 3.546692378085024e-06, + "loss": 2.0252, + "step": 21878 + }, + { + "epoch": 0.73, + "grad_norm": 0.7443355321884155, + "learning_rate": 3.5458804671482305e-06, + "loss": 2.0357, + "step": 21879 + }, + { + "epoch": 0.73, + "grad_norm": 0.7322970628738403, + "learning_rate": 3.545068629126295e-06, + "loss": 1.998, + "step": 21880 + }, + { + "epoch": 0.73, + "grad_norm": 0.7619025111198425, + "learning_rate": 3.5442568640283903e-06, + "loss": 2.026, + "step": 21881 + }, + { + "epoch": 0.73, + "grad_norm": 0.7609520554542542, + "learning_rate": 3.543445171863691e-06, + "loss": 2.1369, + "step": 21882 + }, + { + "epoch": 0.73, + "grad_norm": 0.7881824970245361, + "learning_rate": 3.542633552641365e-06, + "loss": 2.0666, + "step": 21883 + }, + { + "epoch": 0.73, + "grad_norm": 0.7858774662017822, + "learning_rate": 3.5418220063705766e-06, + "loss": 2.0175, + "step": 21884 + }, + { + "epoch": 0.73, + "grad_norm": 0.752856433391571, + "learning_rate": 3.5410105330605028e-06, + "loss": 2.0413, + "step": 21885 + }, + { + "epoch": 0.73, + "grad_norm": 0.7580515146255493, + "learning_rate": 3.5401991327203022e-06, + "loss": 2.0462, + "step": 21886 + }, + { + "epoch": 0.73, + "grad_norm": 0.7340289354324341, + "learning_rate": 3.53938780535915e-06, + "loss": 2.0327, + "step": 21887 + }, + { + "epoch": 0.73, + "grad_norm": 0.7451505661010742, + "learning_rate": 3.538576550986208e-06, + "loss": 2.0283, + "step": 21888 + }, + { + "epoch": 0.73, + "grad_norm": 0.7284174561500549, + "learning_rate": 3.5377653696106386e-06, + "loss": 2.0077, + "step": 21889 + }, + { + "epoch": 0.73, + "grad_norm": 0.7714676260948181, + "learning_rate": 3.5369542612416087e-06, + "loss": 2.0711, + "step": 21890 + }, + { + "epoch": 0.73, + "grad_norm": 0.7632626295089722, + "learning_rate": 3.536143225888284e-06, + "loss": 2.1135, + "step": 21891 + }, + { + "epoch": 0.73, + "grad_norm": 0.744427502155304, + "learning_rate": 3.5353322635598253e-06, + "loss": 2.0757, + "step": 21892 + }, + { + "epoch": 0.73, + "grad_norm": 0.7260180711746216, + "learning_rate": 3.5345213742653915e-06, + "loss": 2.001, + "step": 21893 + }, + { + "epoch": 0.73, + "grad_norm": 0.7563790678977966, + "learning_rate": 3.5337105580141485e-06, + "loss": 1.9958, + "step": 21894 + }, + { + "epoch": 0.73, + "grad_norm": 0.7438982725143433, + "learning_rate": 3.5328998148152515e-06, + "loss": 2.0606, + "step": 21895 + }, + { + "epoch": 0.73, + "grad_norm": 0.7843042016029358, + "learning_rate": 3.532089144677865e-06, + "loss": 1.9785, + "step": 21896 + }, + { + "epoch": 0.73, + "grad_norm": 0.7628198862075806, + "learning_rate": 3.531278547611142e-06, + "loss": 2.0307, + "step": 21897 + }, + { + "epoch": 0.73, + "grad_norm": 0.7578327655792236, + "learning_rate": 3.530468023624246e-06, + "loss": 2.0409, + "step": 21898 + }, + { + "epoch": 0.73, + "grad_norm": 0.7420735955238342, + "learning_rate": 3.529657572726327e-06, + "loss": 2.0664, + "step": 21899 + }, + { + "epoch": 0.73, + "grad_norm": 0.732926070690155, + "learning_rate": 3.528847194926549e-06, + "loss": 2.0459, + "step": 21900 + }, + { + "epoch": 0.73, + "grad_norm": 0.7543691992759705, + "learning_rate": 3.5280368902340624e-06, + "loss": 2.0719, + "step": 21901 + }, + { + "epoch": 0.73, + "grad_norm": 0.7463003396987915, + "learning_rate": 3.527226658658018e-06, + "loss": 2.0809, + "step": 21902 + }, + { + "epoch": 0.73, + "grad_norm": 0.77821284532547, + "learning_rate": 3.5264165002075747e-06, + "loss": 2.149, + "step": 21903 + }, + { + "epoch": 0.73, + "grad_norm": 0.7767094373703003, + "learning_rate": 3.5256064148918867e-06, + "loss": 2.0808, + "step": 21904 + }, + { + "epoch": 0.73, + "grad_norm": 0.7331055998802185, + "learning_rate": 3.524796402720102e-06, + "loss": 2.0232, + "step": 21905 + }, + { + "epoch": 0.73, + "grad_norm": 0.7976982593536377, + "learning_rate": 3.523986463701371e-06, + "loss": 2.0523, + "step": 21906 + }, + { + "epoch": 0.73, + "grad_norm": 0.7459834814071655, + "learning_rate": 3.5231765978448486e-06, + "loss": 2.1459, + "step": 21907 + }, + { + "epoch": 0.73, + "grad_norm": 0.7619378566741943, + "learning_rate": 3.5223668051596773e-06, + "loss": 2.087, + "step": 21908 + }, + { + "epoch": 0.73, + "grad_norm": 0.7582420110702515, + "learning_rate": 3.521557085655013e-06, + "loss": 2.0633, + "step": 21909 + }, + { + "epoch": 0.73, + "grad_norm": 0.767217755317688, + "learning_rate": 3.5207474393399997e-06, + "loss": 2.036, + "step": 21910 + }, + { + "epoch": 0.73, + "grad_norm": 0.7344055771827698, + "learning_rate": 3.5199378662237826e-06, + "loss": 2.0794, + "step": 21911 + }, + { + "epoch": 0.73, + "grad_norm": 0.7665054798126221, + "learning_rate": 3.5191283663155084e-06, + "loss": 2.0009, + "step": 21912 + }, + { + "epoch": 0.73, + "grad_norm": 0.7368614077568054, + "learning_rate": 3.5183189396243277e-06, + "loss": 1.9912, + "step": 21913 + }, + { + "epoch": 0.73, + "grad_norm": 0.775973379611969, + "learning_rate": 3.517509586159381e-06, + "loss": 2.0248, + "step": 21914 + }, + { + "epoch": 0.73, + "grad_norm": 0.7357608079910278, + "learning_rate": 3.5167003059298087e-06, + "loss": 2.0447, + "step": 21915 + }, + { + "epoch": 0.73, + "grad_norm": 0.7351614832878113, + "learning_rate": 3.515891098944759e-06, + "loss": 2.0594, + "step": 21916 + }, + { + "epoch": 0.73, + "grad_norm": 0.7268781065940857, + "learning_rate": 3.5150819652133694e-06, + "loss": 1.9979, + "step": 21917 + }, + { + "epoch": 0.73, + "grad_norm": 0.7279900908470154, + "learning_rate": 3.5142729047447867e-06, + "loss": 2.0264, + "step": 21918 + }, + { + "epoch": 0.73, + "grad_norm": 0.7398910522460938, + "learning_rate": 3.513463917548143e-06, + "loss": 2.0127, + "step": 21919 + }, + { + "epoch": 0.73, + "grad_norm": 0.7790997624397278, + "learning_rate": 3.512655003632588e-06, + "loss": 2.092, + "step": 21920 + }, + { + "epoch": 0.73, + "grad_norm": 0.7216687202453613, + "learning_rate": 3.5118461630072496e-06, + "loss": 2.0028, + "step": 21921 + }, + { + "epoch": 0.73, + "grad_norm": 0.7760214805603027, + "learning_rate": 3.5110373956812747e-06, + "loss": 2.0046, + "step": 21922 + }, + { + "epoch": 0.73, + "grad_norm": 0.7429458498954773, + "learning_rate": 3.510228701663797e-06, + "loss": 2.0889, + "step": 21923 + }, + { + "epoch": 0.73, + "grad_norm": 0.7619438171386719, + "learning_rate": 3.509420080963948e-06, + "loss": 2.0615, + "step": 21924 + }, + { + "epoch": 0.73, + "grad_norm": 0.7492358088493347, + "learning_rate": 3.5086115335908677e-06, + "loss": 2.0331, + "step": 21925 + }, + { + "epoch": 0.73, + "grad_norm": 0.7462440729141235, + "learning_rate": 3.5078030595536926e-06, + "loss": 2.07, + "step": 21926 + }, + { + "epoch": 0.73, + "grad_norm": 0.7404877543449402, + "learning_rate": 3.506994658861553e-06, + "loss": 1.9926, + "step": 21927 + }, + { + "epoch": 0.73, + "grad_norm": 0.7329072952270508, + "learning_rate": 3.506186331523581e-06, + "loss": 2.0217, + "step": 21928 + }, + { + "epoch": 0.73, + "grad_norm": 0.7485182881355286, + "learning_rate": 3.505378077548912e-06, + "loss": 2.0591, + "step": 21929 + }, + { + "epoch": 0.73, + "grad_norm": 0.7510843873023987, + "learning_rate": 3.5045698969466736e-06, + "loss": 2.0667, + "step": 21930 + }, + { + "epoch": 0.73, + "grad_norm": 0.7725932598114014, + "learning_rate": 3.5037617897259958e-06, + "loss": 2.1167, + "step": 21931 + }, + { + "epoch": 0.73, + "grad_norm": 0.7507808804512024, + "learning_rate": 3.502953755896018e-06, + "loss": 2.0499, + "step": 21932 + }, + { + "epoch": 0.73, + "grad_norm": 0.7361055612564087, + "learning_rate": 3.5021457954658542e-06, + "loss": 2.067, + "step": 21933 + }, + { + "epoch": 0.73, + "grad_norm": 0.7400034666061401, + "learning_rate": 3.5013379084446386e-06, + "loss": 2.0014, + "step": 21934 + }, + { + "epoch": 0.73, + "grad_norm": 0.7240464091300964, + "learning_rate": 3.500530094841502e-06, + "loss": 2.0101, + "step": 21935 + }, + { + "epoch": 0.73, + "grad_norm": 0.7522979378700256, + "learning_rate": 3.4997223546655677e-06, + "loss": 2.0193, + "step": 21936 + }, + { + "epoch": 0.73, + "grad_norm": 0.7448248863220215, + "learning_rate": 3.4989146879259583e-06, + "loss": 2.0252, + "step": 21937 + }, + { + "epoch": 0.73, + "grad_norm": 0.7635264992713928, + "learning_rate": 3.498107094631803e-06, + "loss": 2.063, + "step": 21938 + }, + { + "epoch": 0.73, + "grad_norm": 0.7579444646835327, + "learning_rate": 3.49729957479222e-06, + "loss": 1.991, + "step": 21939 + }, + { + "epoch": 0.73, + "grad_norm": 0.7302706241607666, + "learning_rate": 3.496492128416339e-06, + "loss": 2.0346, + "step": 21940 + }, + { + "epoch": 0.73, + "grad_norm": 0.7431196570396423, + "learning_rate": 3.4956847555132746e-06, + "loss": 2.0298, + "step": 21941 + }, + { + "epoch": 0.73, + "grad_norm": 0.73491370677948, + "learning_rate": 3.494877456092156e-06, + "loss": 2.0286, + "step": 21942 + }, + { + "epoch": 0.73, + "grad_norm": 0.7373284697532654, + "learning_rate": 3.4940702301620954e-06, + "loss": 2.0038, + "step": 21943 + }, + { + "epoch": 0.73, + "grad_norm": 0.7755711078643799, + "learning_rate": 3.493263077732221e-06, + "loss": 2.0241, + "step": 21944 + }, + { + "epoch": 0.73, + "grad_norm": 0.73579341173172, + "learning_rate": 3.492455998811646e-06, + "loss": 1.9627, + "step": 21945 + }, + { + "epoch": 0.73, + "grad_norm": 0.7552460432052612, + "learning_rate": 3.4916489934094865e-06, + "loss": 2.0172, + "step": 21946 + }, + { + "epoch": 0.73, + "grad_norm": 0.7431113719940186, + "learning_rate": 3.4908420615348616e-06, + "loss": 2.0608, + "step": 21947 + }, + { + "epoch": 0.73, + "grad_norm": 0.7221951484680176, + "learning_rate": 3.490035203196893e-06, + "loss": 2.1493, + "step": 21948 + }, + { + "epoch": 0.73, + "grad_norm": 0.7242643237113953, + "learning_rate": 3.489228418404691e-06, + "loss": 2.0515, + "step": 21949 + }, + { + "epoch": 0.73, + "grad_norm": 0.7540082931518555, + "learning_rate": 3.4884217071673665e-06, + "loss": 2.1009, + "step": 21950 + }, + { + "epoch": 0.73, + "grad_norm": 0.7547493577003479, + "learning_rate": 3.4876150694940415e-06, + "loss": 2.0664, + "step": 21951 + }, + { + "epoch": 0.73, + "grad_norm": 0.7211744785308838, + "learning_rate": 3.4868085053938217e-06, + "loss": 2.0395, + "step": 21952 + }, + { + "epoch": 0.73, + "grad_norm": 0.7982618808746338, + "learning_rate": 3.486002014875821e-06, + "loss": 2.0549, + "step": 21953 + }, + { + "epoch": 0.73, + "grad_norm": 0.71919184923172, + "learning_rate": 3.4851955979491603e-06, + "loss": 2.0511, + "step": 21954 + }, + { + "epoch": 0.73, + "grad_norm": 0.7254493832588196, + "learning_rate": 3.484389254622934e-06, + "loss": 2.048, + "step": 21955 + }, + { + "epoch": 0.73, + "grad_norm": 0.7291633486747742, + "learning_rate": 3.4835829849062597e-06, + "loss": 2.0512, + "step": 21956 + }, + { + "epoch": 0.73, + "grad_norm": 0.7729559540748596, + "learning_rate": 3.4827767888082486e-06, + "loss": 2.0667, + "step": 21957 + }, + { + "epoch": 0.73, + "grad_norm": 0.7443308234214783, + "learning_rate": 3.481970666338007e-06, + "loss": 2.0844, + "step": 21958 + }, + { + "epoch": 0.73, + "grad_norm": 0.7337605953216553, + "learning_rate": 3.4811646175046365e-06, + "loss": 2.0029, + "step": 21959 + }, + { + "epoch": 0.73, + "grad_norm": 0.7373915314674377, + "learning_rate": 3.4803586423172484e-06, + "loss": 1.9915, + "step": 21960 + }, + { + "epoch": 0.73, + "grad_norm": 0.7648467421531677, + "learning_rate": 3.4795527407849507e-06, + "loss": 2.0408, + "step": 21961 + }, + { + "epoch": 0.73, + "grad_norm": 0.7355338931083679, + "learning_rate": 3.4787469129168405e-06, + "loss": 2.0661, + "step": 21962 + }, + { + "epoch": 0.73, + "grad_norm": 0.7178565859794617, + "learning_rate": 3.47794115872203e-06, + "loss": 1.9664, + "step": 21963 + }, + { + "epoch": 0.73, + "grad_norm": 0.7369266152381897, + "learning_rate": 3.4771354782096177e-06, + "loss": 2.0466, + "step": 21964 + }, + { + "epoch": 0.73, + "grad_norm": 0.774416446685791, + "learning_rate": 3.476329871388704e-06, + "loss": 2.1482, + "step": 21965 + }, + { + "epoch": 0.73, + "grad_norm": 0.7408351898193359, + "learning_rate": 3.47552433826839e-06, + "loss": 2.0624, + "step": 21966 + }, + { + "epoch": 0.73, + "grad_norm": 0.7591580748558044, + "learning_rate": 3.474718878857787e-06, + "loss": 2.0317, + "step": 21967 + }, + { + "epoch": 0.73, + "grad_norm": 0.766352653503418, + "learning_rate": 3.473913493165978e-06, + "loss": 2.0737, + "step": 21968 + }, + { + "epoch": 0.73, + "grad_norm": 0.7312650084495544, + "learning_rate": 3.4731081812020696e-06, + "loss": 2.0552, + "step": 21969 + }, + { + "epoch": 0.73, + "grad_norm": 0.7558214664459229, + "learning_rate": 3.472302942975164e-06, + "loss": 2.0662, + "step": 21970 + }, + { + "epoch": 0.73, + "grad_norm": 0.7799909710884094, + "learning_rate": 3.4714977784943537e-06, + "loss": 2.0832, + "step": 21971 + }, + { + "epoch": 0.73, + "grad_norm": 0.7597474455833435, + "learning_rate": 3.470692687768732e-06, + "loss": 2.0439, + "step": 21972 + }, + { + "epoch": 0.73, + "grad_norm": 0.7348108291625977, + "learning_rate": 3.4698876708074015e-06, + "loss": 2.0214, + "step": 21973 + }, + { + "epoch": 0.73, + "grad_norm": 0.7593065500259399, + "learning_rate": 3.4690827276194493e-06, + "loss": 2.1209, + "step": 21974 + }, + { + "epoch": 0.73, + "grad_norm": 0.7610917091369629, + "learning_rate": 3.468277858213973e-06, + "loss": 2.0743, + "step": 21975 + }, + { + "epoch": 0.73, + "grad_norm": 0.7422091960906982, + "learning_rate": 3.4674730626000684e-06, + "loss": 2.08, + "step": 21976 + }, + { + "epoch": 0.73, + "grad_norm": 0.7613323330879211, + "learning_rate": 3.466668340786825e-06, + "loss": 2.036, + "step": 21977 + }, + { + "epoch": 0.73, + "grad_norm": 0.747014045715332, + "learning_rate": 3.465863692783331e-06, + "loss": 2.0034, + "step": 21978 + }, + { + "epoch": 0.73, + "grad_norm": 0.7534889578819275, + "learning_rate": 3.4650591185986827e-06, + "loss": 2.0461, + "step": 21979 + }, + { + "epoch": 0.73, + "grad_norm": 0.7346971035003662, + "learning_rate": 3.4642546182419668e-06, + "loss": 2.0002, + "step": 21980 + }, + { + "epoch": 0.73, + "grad_norm": 0.7238320112228394, + "learning_rate": 3.4634501917222686e-06, + "loss": 2.1091, + "step": 21981 + }, + { + "epoch": 0.73, + "grad_norm": 0.7262044548988342, + "learning_rate": 3.462645839048678e-06, + "loss": 2.1038, + "step": 21982 + }, + { + "epoch": 0.73, + "grad_norm": 0.7538089752197266, + "learning_rate": 3.461841560230288e-06, + "loss": 2.0835, + "step": 21983 + }, + { + "epoch": 0.73, + "grad_norm": 0.733538806438446, + "learning_rate": 3.4610373552761777e-06, + "loss": 2.0417, + "step": 21984 + }, + { + "epoch": 0.73, + "grad_norm": 0.7391936182975769, + "learning_rate": 3.4602332241954373e-06, + "loss": 2.0643, + "step": 21985 + }, + { + "epoch": 0.73, + "grad_norm": 0.726144015789032, + "learning_rate": 3.459429166997149e-06, + "loss": 2.0357, + "step": 21986 + }, + { + "epoch": 0.73, + "grad_norm": 0.7673753499984741, + "learning_rate": 3.458625183690394e-06, + "loss": 2.0809, + "step": 21987 + }, + { + "epoch": 0.73, + "grad_norm": 0.7668999433517456, + "learning_rate": 3.457821274284259e-06, + "loss": 2.0537, + "step": 21988 + }, + { + "epoch": 0.73, + "grad_norm": 0.7318655252456665, + "learning_rate": 3.457017438787831e-06, + "loss": 2.0259, + "step": 21989 + }, + { + "epoch": 0.73, + "grad_norm": 0.7278853058815002, + "learning_rate": 3.4562136772101785e-06, + "loss": 2.0071, + "step": 21990 + }, + { + "epoch": 0.73, + "grad_norm": 0.7428708076477051, + "learning_rate": 3.4554099895603886e-06, + "loss": 2.115, + "step": 21991 + }, + { + "epoch": 0.73, + "grad_norm": 0.7199832201004028, + "learning_rate": 3.4546063758475444e-06, + "loss": 2.0089, + "step": 21992 + }, + { + "epoch": 0.73, + "grad_norm": 0.7396317720413208, + "learning_rate": 3.453802836080722e-06, + "loss": 2.0289, + "step": 21993 + }, + { + "epoch": 0.73, + "grad_norm": 0.7196389436721802, + "learning_rate": 3.4529993702689955e-06, + "loss": 1.9811, + "step": 21994 + }, + { + "epoch": 0.73, + "grad_norm": 0.7384416460990906, + "learning_rate": 3.4521959784214486e-06, + "loss": 2.0278, + "step": 21995 + }, + { + "epoch": 0.73, + "grad_norm": 0.7443186640739441, + "learning_rate": 3.4513926605471504e-06, + "loss": 2.049, + "step": 21996 + }, + { + "epoch": 0.73, + "grad_norm": 0.756470263004303, + "learning_rate": 3.45058941665518e-06, + "loss": 2.0893, + "step": 21997 + }, + { + "epoch": 0.73, + "grad_norm": 0.7462362051010132, + "learning_rate": 3.449786246754615e-06, + "loss": 2.0415, + "step": 21998 + }, + { + "epoch": 0.73, + "grad_norm": 0.7609695792198181, + "learning_rate": 3.4489831508545267e-06, + "loss": 2.0423, + "step": 21999 + }, + { + "epoch": 0.73, + "grad_norm": 0.7346771359443665, + "learning_rate": 3.448180128963984e-06, + "loss": 2.0077, + "step": 22000 + }, + { + "epoch": 0.73, + "grad_norm": 0.7259600758552551, + "learning_rate": 3.4473771810920665e-06, + "loss": 2.0343, + "step": 22001 + }, + { + "epoch": 0.73, + "grad_norm": 0.7570362091064453, + "learning_rate": 3.446574307247841e-06, + "loss": 2.1285, + "step": 22002 + }, + { + "epoch": 0.73, + "grad_norm": 0.7363032698631287, + "learning_rate": 3.4457715074403743e-06, + "loss": 2.0667, + "step": 22003 + }, + { + "epoch": 0.73, + "grad_norm": 0.7343173623085022, + "learning_rate": 3.4449687816787404e-06, + "loss": 2.0379, + "step": 22004 + }, + { + "epoch": 0.73, + "grad_norm": 0.7564393281936646, + "learning_rate": 3.444166129972011e-06, + "loss": 2.0995, + "step": 22005 + }, + { + "epoch": 0.73, + "grad_norm": 0.7725486755371094, + "learning_rate": 3.4433635523292475e-06, + "loss": 2.1478, + "step": 22006 + }, + { + "epoch": 0.73, + "grad_norm": 0.7252218723297119, + "learning_rate": 3.442561048759523e-06, + "loss": 2.0024, + "step": 22007 + }, + { + "epoch": 0.73, + "grad_norm": 0.7216829657554626, + "learning_rate": 3.4417586192719e-06, + "loss": 2.087, + "step": 22008 + }, + { + "epoch": 0.73, + "grad_norm": 0.7366796135902405, + "learning_rate": 3.4409562638754425e-06, + "loss": 2.1119, + "step": 22009 + }, + { + "epoch": 0.73, + "grad_norm": 0.7319130897521973, + "learning_rate": 3.4401539825792162e-06, + "loss": 2.0032, + "step": 22010 + }, + { + "epoch": 0.73, + "grad_norm": 0.7575193643569946, + "learning_rate": 3.4393517753922933e-06, + "loss": 2.0323, + "step": 22011 + }, + { + "epoch": 0.73, + "grad_norm": 0.7761048674583435, + "learning_rate": 3.438549642323722e-06, + "loss": 1.9898, + "step": 22012 + }, + { + "epoch": 0.73, + "grad_norm": 0.7262493968009949, + "learning_rate": 3.4377475833825714e-06, + "loss": 2.1171, + "step": 22013 + }, + { + "epoch": 0.73, + "grad_norm": 0.7469989061355591, + "learning_rate": 3.4369455985779065e-06, + "loss": 1.9799, + "step": 22014 + }, + { + "epoch": 0.73, + "grad_norm": 0.760104775428772, + "learning_rate": 3.4361436879187802e-06, + "loss": 2.097, + "step": 22015 + }, + { + "epoch": 0.73, + "grad_norm": 0.7543305158615112, + "learning_rate": 3.435341851414259e-06, + "loss": 2.0287, + "step": 22016 + }, + { + "epoch": 0.73, + "grad_norm": 0.758040726184845, + "learning_rate": 3.4345400890733983e-06, + "loss": 2.0435, + "step": 22017 + }, + { + "epoch": 0.73, + "grad_norm": 0.7447519898414612, + "learning_rate": 3.433738400905253e-06, + "loss": 2.0694, + "step": 22018 + }, + { + "epoch": 0.73, + "grad_norm": 0.7724086046218872, + "learning_rate": 3.432936786918882e-06, + "loss": 2.0457, + "step": 22019 + }, + { + "epoch": 0.73, + "grad_norm": 0.7522633075714111, + "learning_rate": 3.4321352471233473e-06, + "loss": 2.086, + "step": 22020 + }, + { + "epoch": 0.73, + "grad_norm": 0.7203301191329956, + "learning_rate": 3.431333781527699e-06, + "loss": 2.0167, + "step": 22021 + }, + { + "epoch": 0.73, + "grad_norm": 0.7195686101913452, + "learning_rate": 3.430532390140988e-06, + "loss": 2.0129, + "step": 22022 + }, + { + "epoch": 0.73, + "grad_norm": 0.7443708181381226, + "learning_rate": 3.4297310729722757e-06, + "loss": 2.0167, + "step": 22023 + }, + { + "epoch": 0.73, + "grad_norm": 0.8004683256149292, + "learning_rate": 3.4289298300306117e-06, + "loss": 2.0389, + "step": 22024 + }, + { + "epoch": 0.73, + "grad_norm": 0.7190501093864441, + "learning_rate": 3.428128661325043e-06, + "loss": 1.9871, + "step": 22025 + }, + { + "epoch": 0.73, + "grad_norm": 0.7454371452331543, + "learning_rate": 3.4273275668646254e-06, + "loss": 2.0603, + "step": 22026 + }, + { + "epoch": 0.73, + "grad_norm": 0.7811813354492188, + "learning_rate": 3.426526546658413e-06, + "loss": 2.0188, + "step": 22027 + }, + { + "epoch": 0.73, + "grad_norm": 0.7571337819099426, + "learning_rate": 3.4257256007154483e-06, + "loss": 2.0649, + "step": 22028 + }, + { + "epoch": 0.73, + "grad_norm": 0.7402219772338867, + "learning_rate": 3.424924729044785e-06, + "loss": 2.0586, + "step": 22029 + }, + { + "epoch": 0.73, + "grad_norm": 0.7332258820533752, + "learning_rate": 3.4241239316554697e-06, + "loss": 1.9875, + "step": 22030 + }, + { + "epoch": 0.73, + "grad_norm": 0.7862280607223511, + "learning_rate": 3.423323208556545e-06, + "loss": 2.0967, + "step": 22031 + }, + { + "epoch": 0.73, + "grad_norm": 0.7423326969146729, + "learning_rate": 3.422522559757059e-06, + "loss": 2.0195, + "step": 22032 + }, + { + "epoch": 0.73, + "grad_norm": 0.7573521733283997, + "learning_rate": 3.4217219852660664e-06, + "loss": 2.0624, + "step": 22033 + }, + { + "epoch": 0.73, + "grad_norm": 0.7342960238456726, + "learning_rate": 3.4209214850925964e-06, + "loss": 2.0446, + "step": 22034 + }, + { + "epoch": 0.73, + "grad_norm": 0.7732722759246826, + "learning_rate": 3.4201210592457e-06, + "loss": 2.0778, + "step": 22035 + }, + { + "epoch": 0.73, + "grad_norm": 0.7818910479545593, + "learning_rate": 3.4193207077344227e-06, + "loss": 2.114, + "step": 22036 + }, + { + "epoch": 0.73, + "grad_norm": 0.7405540347099304, + "learning_rate": 3.4185204305678e-06, + "loss": 2.0542, + "step": 22037 + }, + { + "epoch": 0.73, + "grad_norm": 0.7460864186286926, + "learning_rate": 3.4177202277548805e-06, + "loss": 2.0424, + "step": 22038 + }, + { + "epoch": 0.73, + "grad_norm": 0.7105057835578918, + "learning_rate": 3.416920099304699e-06, + "loss": 2.0497, + "step": 22039 + }, + { + "epoch": 0.73, + "grad_norm": 0.7740036249160767, + "learning_rate": 3.4161200452262933e-06, + "loss": 2.0917, + "step": 22040 + }, + { + "epoch": 0.73, + "grad_norm": 0.730941891670227, + "learning_rate": 3.4153200655287057e-06, + "loss": 2.0325, + "step": 22041 + }, + { + "epoch": 0.73, + "grad_norm": 0.7270932197570801, + "learning_rate": 3.4145201602209756e-06, + "loss": 2.1337, + "step": 22042 + }, + { + "epoch": 0.73, + "grad_norm": 0.7469239234924316, + "learning_rate": 3.4137203293121367e-06, + "loss": 2.0478, + "step": 22043 + }, + { + "epoch": 0.73, + "grad_norm": 0.7913739681243896, + "learning_rate": 3.4129205728112234e-06, + "loss": 2.1019, + "step": 22044 + }, + { + "epoch": 0.73, + "grad_norm": 0.7695972919464111, + "learning_rate": 3.4121208907272753e-06, + "loss": 1.9497, + "step": 22045 + }, + { + "epoch": 0.73, + "grad_norm": 0.7432686686515808, + "learning_rate": 3.411321283069322e-06, + "loss": 2.0009, + "step": 22046 + }, + { + "epoch": 0.73, + "grad_norm": 0.7346075773239136, + "learning_rate": 3.4105217498464026e-06, + "loss": 2.0189, + "step": 22047 + }, + { + "epoch": 0.73, + "grad_norm": 0.7304443717002869, + "learning_rate": 3.409722291067543e-06, + "loss": 2.0142, + "step": 22048 + }, + { + "epoch": 0.73, + "grad_norm": 0.7526780366897583, + "learning_rate": 3.4089229067417827e-06, + "loss": 2.0536, + "step": 22049 + }, + { + "epoch": 0.73, + "grad_norm": 0.7358272671699524, + "learning_rate": 3.4081235968781445e-06, + "loss": 2.0846, + "step": 22050 + }, + { + "epoch": 0.73, + "grad_norm": 0.7588794827461243, + "learning_rate": 3.4073243614856664e-06, + "loss": 2.06, + "step": 22051 + }, + { + "epoch": 0.73, + "grad_norm": 0.7572571635246277, + "learning_rate": 3.406525200573374e-06, + "loss": 2.1183, + "step": 22052 + }, + { + "epoch": 0.73, + "grad_norm": 0.7737666368484497, + "learning_rate": 3.405726114150292e-06, + "loss": 2.1042, + "step": 22053 + }, + { + "epoch": 0.73, + "grad_norm": 0.7248309254646301, + "learning_rate": 3.4049271022254527e-06, + "loss": 2.0377, + "step": 22054 + }, + { + "epoch": 0.73, + "grad_norm": 0.7304350137710571, + "learning_rate": 3.404128164807887e-06, + "loss": 2.0643, + "step": 22055 + }, + { + "epoch": 0.73, + "grad_norm": 0.743652880191803, + "learning_rate": 3.4033293019066107e-06, + "loss": 2.1009, + "step": 22056 + }, + { + "epoch": 0.73, + "grad_norm": 0.7278335094451904, + "learning_rate": 3.402530513530653e-06, + "loss": 2.1079, + "step": 22057 + }, + { + "epoch": 0.73, + "grad_norm": 0.7626019716262817, + "learning_rate": 3.401731799689043e-06, + "loss": 1.9947, + "step": 22058 + }, + { + "epoch": 0.73, + "grad_norm": 0.7729936242103577, + "learning_rate": 3.400933160390796e-06, + "loss": 2.0053, + "step": 22059 + }, + { + "epoch": 0.73, + "grad_norm": 0.7294853925704956, + "learning_rate": 3.400134595644943e-06, + "loss": 1.9573, + "step": 22060 + }, + { + "epoch": 0.73, + "grad_norm": 0.7472320199012756, + "learning_rate": 3.399336105460501e-06, + "loss": 2.0906, + "step": 22061 + }, + { + "epoch": 0.73, + "grad_norm": 0.7488674521446228, + "learning_rate": 3.3985376898464874e-06, + "loss": 2.0511, + "step": 22062 + }, + { + "epoch": 0.73, + "grad_norm": 0.7861932516098022, + "learning_rate": 3.397739348811927e-06, + "loss": 2.0847, + "step": 22063 + }, + { + "epoch": 0.73, + "grad_norm": 0.7188287973403931, + "learning_rate": 3.396941082365841e-06, + "loss": 2.0418, + "step": 22064 + }, + { + "epoch": 0.73, + "grad_norm": 0.7608891725540161, + "learning_rate": 3.3961428905172457e-06, + "loss": 2.0509, + "step": 22065 + }, + { + "epoch": 0.73, + "grad_norm": 0.7647976875305176, + "learning_rate": 3.395344773275153e-06, + "loss": 2.1184, + "step": 22066 + }, + { + "epoch": 0.73, + "grad_norm": 0.7377094626426697, + "learning_rate": 3.3945467306485856e-06, + "loss": 2.13, + "step": 22067 + }, + { + "epoch": 0.73, + "grad_norm": 0.7402511835098267, + "learning_rate": 3.3937487626465604e-06, + "loss": 2.0744, + "step": 22068 + }, + { + "epoch": 0.73, + "grad_norm": 0.7236462831497192, + "learning_rate": 3.392950869278091e-06, + "loss": 1.9357, + "step": 22069 + }, + { + "epoch": 0.73, + "grad_norm": 0.7358348369598389, + "learning_rate": 3.392153050552186e-06, + "loss": 2.0796, + "step": 22070 + }, + { + "epoch": 0.73, + "grad_norm": 0.7291209697723389, + "learning_rate": 3.391355306477868e-06, + "loss": 2.0627, + "step": 22071 + }, + { + "epoch": 0.73, + "grad_norm": 0.7506392002105713, + "learning_rate": 3.39055763706414e-06, + "loss": 2.0242, + "step": 22072 + }, + { + "epoch": 0.73, + "grad_norm": 0.7334917783737183, + "learning_rate": 3.389760042320023e-06, + "loss": 2.0527, + "step": 22073 + }, + { + "epoch": 0.73, + "grad_norm": 0.7368965148925781, + "learning_rate": 3.388962522254522e-06, + "loss": 2.0821, + "step": 22074 + }, + { + "epoch": 0.73, + "grad_norm": 0.760286808013916, + "learning_rate": 3.388165076876645e-06, + "loss": 2.0518, + "step": 22075 + }, + { + "epoch": 0.73, + "grad_norm": 0.7505927681922913, + "learning_rate": 3.3873677061954045e-06, + "loss": 2.0896, + "step": 22076 + }, + { + "epoch": 0.73, + "grad_norm": 0.7552119493484497, + "learning_rate": 3.3865704102198117e-06, + "loss": 2.1024, + "step": 22077 + }, + { + "epoch": 0.73, + "grad_norm": 0.7553349733352661, + "learning_rate": 3.3857731889588697e-06, + "loss": 2.0573, + "step": 22078 + }, + { + "epoch": 0.73, + "grad_norm": 0.7699952125549316, + "learning_rate": 3.3849760424215826e-06, + "loss": 2.1266, + "step": 22079 + }, + { + "epoch": 0.73, + "grad_norm": 0.7664619088172913, + "learning_rate": 3.384178970616964e-06, + "loss": 2.0955, + "step": 22080 + }, + { + "epoch": 0.73, + "grad_norm": 0.7309025526046753, + "learning_rate": 3.3833819735540098e-06, + "loss": 2.0317, + "step": 22081 + }, + { + "epoch": 0.73, + "grad_norm": 0.730108380317688, + "learning_rate": 3.3825850512417315e-06, + "loss": 2.0233, + "step": 22082 + }, + { + "epoch": 0.73, + "grad_norm": 0.7597236633300781, + "learning_rate": 3.3817882036891257e-06, + "loss": 2.014, + "step": 22083 + }, + { + "epoch": 0.73, + "grad_norm": 0.724894642829895, + "learning_rate": 3.3809914309052016e-06, + "loss": 2.0393, + "step": 22084 + }, + { + "epoch": 0.73, + "grad_norm": 0.7193269729614258, + "learning_rate": 3.3801947328989537e-06, + "loss": 2.0555, + "step": 22085 + }, + { + "epoch": 0.73, + "grad_norm": 0.7674509286880493, + "learning_rate": 3.3793981096793903e-06, + "loss": 2.1059, + "step": 22086 + }, + { + "epoch": 0.73, + "grad_norm": 0.7193686962127686, + "learning_rate": 3.378601561255507e-06, + "loss": 2.0112, + "step": 22087 + }, + { + "epoch": 0.73, + "grad_norm": 0.7191973328590393, + "learning_rate": 3.3778050876362988e-06, + "loss": 2.0506, + "step": 22088 + }, + { + "epoch": 0.73, + "grad_norm": 0.7486215233802795, + "learning_rate": 3.3770086888307676e-06, + "loss": 2.0939, + "step": 22089 + }, + { + "epoch": 0.73, + "grad_norm": 0.7348816394805908, + "learning_rate": 3.3762123648479138e-06, + "loss": 2.0476, + "step": 22090 + }, + { + "epoch": 0.73, + "grad_norm": 0.7693881988525391, + "learning_rate": 3.375416115696731e-06, + "loss": 2.047, + "step": 22091 + }, + { + "epoch": 0.74, + "grad_norm": 0.7356235980987549, + "learning_rate": 3.3746199413862124e-06, + "loss": 2.0496, + "step": 22092 + }, + { + "epoch": 0.74, + "grad_norm": 0.7413351535797119, + "learning_rate": 3.3738238419253566e-06, + "loss": 2.0542, + "step": 22093 + }, + { + "epoch": 0.74, + "grad_norm": 0.7346134781837463, + "learning_rate": 3.3730278173231534e-06, + "loss": 2.079, + "step": 22094 + }, + { + "epoch": 0.74, + "grad_norm": 0.7542963624000549, + "learning_rate": 3.3722318675886012e-06, + "loss": 1.993, + "step": 22095 + }, + { + "epoch": 0.74, + "grad_norm": 0.759371280670166, + "learning_rate": 3.3714359927306893e-06, + "loss": 2.0388, + "step": 22096 + }, + { + "epoch": 0.74, + "grad_norm": 0.7669425010681152, + "learning_rate": 3.3706401927584054e-06, + "loss": 2.0618, + "step": 22097 + }, + { + "epoch": 0.74, + "grad_norm": 0.7534034252166748, + "learning_rate": 3.369844467680743e-06, + "loss": 2.0072, + "step": 22098 + }, + { + "epoch": 0.74, + "grad_norm": 0.761694073677063, + "learning_rate": 3.369048817506696e-06, + "loss": 2.0218, + "step": 22099 + }, + { + "epoch": 0.74, + "grad_norm": 0.7431424260139465, + "learning_rate": 3.3682532422452487e-06, + "loss": 2.0345, + "step": 22100 + }, + { + "epoch": 0.74, + "grad_norm": 0.7485072016716003, + "learning_rate": 3.3674577419053866e-06, + "loss": 2.0312, + "step": 22101 + }, + { + "epoch": 0.74, + "grad_norm": 0.7714682817459106, + "learning_rate": 3.3666623164961034e-06, + "loss": 2.0837, + "step": 22102 + }, + { + "epoch": 0.74, + "grad_norm": 0.7677016854286194, + "learning_rate": 3.3658669660263788e-06, + "loss": 2.0361, + "step": 22103 + }, + { + "epoch": 0.74, + "grad_norm": 0.7685104608535767, + "learning_rate": 3.3650716905052037e-06, + "loss": 2.0704, + "step": 22104 + }, + { + "epoch": 0.74, + "grad_norm": 0.7618870139122009, + "learning_rate": 3.3642764899415583e-06, + "loss": 2.0156, + "step": 22105 + }, + { + "epoch": 0.74, + "grad_norm": 0.7502275109291077, + "learning_rate": 3.3634813643444297e-06, + "loss": 2.0695, + "step": 22106 + }, + { + "epoch": 0.74, + "grad_norm": 0.7285387516021729, + "learning_rate": 3.362686313722797e-06, + "loss": 2.0676, + "step": 22107 + }, + { + "epoch": 0.74, + "grad_norm": 0.7358669638633728, + "learning_rate": 3.361891338085648e-06, + "loss": 2.0639, + "step": 22108 + }, + { + "epoch": 0.74, + "grad_norm": 0.771622359752655, + "learning_rate": 3.3610964374419598e-06, + "loss": 2.0419, + "step": 22109 + }, + { + "epoch": 0.74, + "grad_norm": 0.7350928783416748, + "learning_rate": 3.3603016118007103e-06, + "loss": 2.0216, + "step": 22110 + }, + { + "epoch": 0.74, + "grad_norm": 0.7385023236274719, + "learning_rate": 3.3595068611708813e-06, + "loss": 1.9719, + "step": 22111 + }, + { + "epoch": 0.74, + "grad_norm": 0.7869828939437866, + "learning_rate": 3.358712185561457e-06, + "loss": 1.9662, + "step": 22112 + }, + { + "epoch": 0.74, + "grad_norm": 0.7233776450157166, + "learning_rate": 3.3579175849814083e-06, + "loss": 1.9626, + "step": 22113 + }, + { + "epoch": 0.74, + "grad_norm": 0.7473151683807373, + "learning_rate": 3.357123059439712e-06, + "loss": 2.0663, + "step": 22114 + }, + { + "epoch": 0.74, + "grad_norm": 0.8240140676498413, + "learning_rate": 3.3563286089453497e-06, + "loss": 2.1127, + "step": 22115 + }, + { + "epoch": 0.74, + "grad_norm": 0.7196578979492188, + "learning_rate": 3.355534233507289e-06, + "loss": 2.0309, + "step": 22116 + }, + { + "epoch": 0.74, + "grad_norm": 0.7662948369979858, + "learning_rate": 3.354739933134512e-06, + "loss": 2.0613, + "step": 22117 + }, + { + "epoch": 0.74, + "grad_norm": 0.7353666424751282, + "learning_rate": 3.3539457078359894e-06, + "loss": 2.0454, + "step": 22118 + }, + { + "epoch": 0.74, + "grad_norm": 0.7424390912055969, + "learning_rate": 3.3531515576206887e-06, + "loss": 2.1119, + "step": 22119 + }, + { + "epoch": 0.74, + "grad_norm": 0.7820848226547241, + "learning_rate": 3.352357482497587e-06, + "loss": 1.9961, + "step": 22120 + }, + { + "epoch": 0.74, + "grad_norm": 0.7346158027648926, + "learning_rate": 3.3515634824756582e-06, + "loss": 2.0595, + "step": 22121 + }, + { + "epoch": 0.74, + "grad_norm": 0.7797508239746094, + "learning_rate": 3.3507695575638687e-06, + "loss": 2.0752, + "step": 22122 + }, + { + "epoch": 0.74, + "grad_norm": 0.7496840357780457, + "learning_rate": 3.3499757077711835e-06, + "loss": 2.0759, + "step": 22123 + }, + { + "epoch": 0.74, + "grad_norm": 0.748189389705658, + "learning_rate": 3.34918193310658e-06, + "loss": 2.0848, + "step": 22124 + }, + { + "epoch": 0.74, + "grad_norm": 0.708651065826416, + "learning_rate": 3.3483882335790173e-06, + "loss": 1.9672, + "step": 22125 + }, + { + "epoch": 0.74, + "grad_norm": 0.7485222816467285, + "learning_rate": 3.34759460919747e-06, + "loss": 2.0343, + "step": 22126 + }, + { + "epoch": 0.74, + "grad_norm": 0.7451032400131226, + "learning_rate": 3.3468010599708967e-06, + "loss": 2.0122, + "step": 22127 + }, + { + "epoch": 0.74, + "grad_norm": 0.7255945801734924, + "learning_rate": 3.3460075859082685e-06, + "loss": 2.0376, + "step": 22128 + }, + { + "epoch": 0.74, + "grad_norm": 0.7336903810501099, + "learning_rate": 3.345214187018545e-06, + "loss": 2.0792, + "step": 22129 + }, + { + "epoch": 0.74, + "grad_norm": 0.7269100546836853, + "learning_rate": 3.3444208633106935e-06, + "loss": 2.0729, + "step": 22130 + }, + { + "epoch": 0.74, + "grad_norm": 0.7421281933784485, + "learning_rate": 3.3436276147936754e-06, + "loss": 2.0751, + "step": 22131 + }, + { + "epoch": 0.74, + "grad_norm": 0.7496660351753235, + "learning_rate": 3.342834441476448e-06, + "loss": 2.0739, + "step": 22132 + }, + { + "epoch": 0.74, + "grad_norm": 0.7094911932945251, + "learning_rate": 3.3420413433679745e-06, + "loss": 2.0072, + "step": 22133 + }, + { + "epoch": 0.74, + "grad_norm": 0.7650269865989685, + "learning_rate": 3.3412483204772207e-06, + "loss": 2.0376, + "step": 22134 + }, + { + "epoch": 0.74, + "grad_norm": 0.7688501477241516, + "learning_rate": 3.3404553728131405e-06, + "loss": 2.0839, + "step": 22135 + }, + { + "epoch": 0.74, + "grad_norm": 0.7776604294776917, + "learning_rate": 3.3396625003846892e-06, + "loss": 2.0521, + "step": 22136 + }, + { + "epoch": 0.74, + "grad_norm": 0.7357088923454285, + "learning_rate": 3.338869703200831e-06, + "loss": 1.9996, + "step": 22137 + }, + { + "epoch": 0.74, + "grad_norm": 0.7088037133216858, + "learning_rate": 3.338076981270516e-06, + "loss": 2.0215, + "step": 22138 + }, + { + "epoch": 0.74, + "grad_norm": 0.777632474899292, + "learning_rate": 3.337284334602705e-06, + "loss": 2.0564, + "step": 22139 + }, + { + "epoch": 0.74, + "grad_norm": 0.7721948623657227, + "learning_rate": 3.336491763206352e-06, + "loss": 2.0004, + "step": 22140 + }, + { + "epoch": 0.74, + "grad_norm": 0.7479133605957031, + "learning_rate": 3.3356992670904065e-06, + "loss": 2.0084, + "step": 22141 + }, + { + "epoch": 0.74, + "grad_norm": 0.7704938650131226, + "learning_rate": 3.3349068462638254e-06, + "loss": 2.0771, + "step": 22142 + }, + { + "epoch": 0.74, + "grad_norm": 0.7240856885910034, + "learning_rate": 3.3341145007355635e-06, + "loss": 2.1308, + "step": 22143 + }, + { + "epoch": 0.74, + "grad_norm": 0.7621692419052124, + "learning_rate": 3.3333222305145694e-06, + "loss": 2.0509, + "step": 22144 + }, + { + "epoch": 0.74, + "grad_norm": 0.7420933842658997, + "learning_rate": 3.33253003560979e-06, + "loss": 2.1305, + "step": 22145 + }, + { + "epoch": 0.74, + "grad_norm": 0.7493348121643066, + "learning_rate": 3.3317379160301833e-06, + "loss": 2.0796, + "step": 22146 + }, + { + "epoch": 0.74, + "grad_norm": 0.7451976537704468, + "learning_rate": 3.3309458717846886e-06, + "loss": 2.0579, + "step": 22147 + }, + { + "epoch": 0.74, + "grad_norm": 0.7184981107711792, + "learning_rate": 3.3301539028822638e-06, + "loss": 2.0696, + "step": 22148 + }, + { + "epoch": 0.74, + "grad_norm": 0.7519020438194275, + "learning_rate": 3.3293620093318467e-06, + "loss": 1.9881, + "step": 22149 + }, + { + "epoch": 0.74, + "grad_norm": 0.7504491209983826, + "learning_rate": 3.3285701911423928e-06, + "loss": 2.0609, + "step": 22150 + }, + { + "epoch": 0.74, + "grad_norm": 0.7549226880073547, + "learning_rate": 3.3277784483228393e-06, + "loss": 2.0688, + "step": 22151 + }, + { + "epoch": 0.74, + "grad_norm": 0.7448413968086243, + "learning_rate": 3.3269867808821344e-06, + "loss": 2.0528, + "step": 22152 + }, + { + "epoch": 0.74, + "grad_norm": 0.759600043296814, + "learning_rate": 3.326195188829228e-06, + "loss": 2.0374, + "step": 22153 + }, + { + "epoch": 0.74, + "grad_norm": 0.7269254326820374, + "learning_rate": 3.325403672173051e-06, + "loss": 2.1103, + "step": 22154 + }, + { + "epoch": 0.74, + "grad_norm": 0.7295447587966919, + "learning_rate": 3.3246122309225527e-06, + "loss": 2.0136, + "step": 22155 + }, + { + "epoch": 0.74, + "grad_norm": 0.7458457946777344, + "learning_rate": 3.3238208650866756e-06, + "loss": 2.0511, + "step": 22156 + }, + { + "epoch": 0.74, + "grad_norm": 0.7688998579978943, + "learning_rate": 3.323029574674357e-06, + "loss": 2.0965, + "step": 22157 + }, + { + "epoch": 0.74, + "grad_norm": 0.7221521139144897, + "learning_rate": 3.3222383596945353e-06, + "loss": 2.028, + "step": 22158 + }, + { + "epoch": 0.74, + "grad_norm": 0.7359201908111572, + "learning_rate": 3.3214472201561533e-06, + "loss": 2.0831, + "step": 22159 + }, + { + "epoch": 0.74, + "grad_norm": 0.7515683174133301, + "learning_rate": 3.320656156068144e-06, + "loss": 2.0486, + "step": 22160 + }, + { + "epoch": 0.74, + "grad_norm": 0.7465779781341553, + "learning_rate": 3.3198651674394468e-06, + "loss": 2.089, + "step": 22161 + }, + { + "epoch": 0.74, + "grad_norm": 0.7769495844841003, + "learning_rate": 3.3190742542790043e-06, + "loss": 2.0593, + "step": 22162 + }, + { + "epoch": 0.74, + "grad_norm": 0.7404967546463013, + "learning_rate": 3.318283416595739e-06, + "loss": 2.0186, + "step": 22163 + }, + { + "epoch": 0.74, + "grad_norm": 0.7394290566444397, + "learning_rate": 3.317492654398592e-06, + "loss": 2.0208, + "step": 22164 + }, + { + "epoch": 0.74, + "grad_norm": 0.772930920124054, + "learning_rate": 3.3167019676964995e-06, + "loss": 2.063, + "step": 22165 + }, + { + "epoch": 0.74, + "grad_norm": 0.7465413212776184, + "learning_rate": 3.3159113564983912e-06, + "loss": 2.0419, + "step": 22166 + }, + { + "epoch": 0.74, + "grad_norm": 0.7373952865600586, + "learning_rate": 3.315120820813197e-06, + "loss": 2.0535, + "step": 22167 + }, + { + "epoch": 0.74, + "grad_norm": 0.7644451856613159, + "learning_rate": 3.314330360649849e-06, + "loss": 2.0716, + "step": 22168 + }, + { + "epoch": 0.74, + "grad_norm": 0.7352064251899719, + "learning_rate": 3.3135399760172827e-06, + "loss": 1.9979, + "step": 22169 + }, + { + "epoch": 0.74, + "grad_norm": 0.7662498354911804, + "learning_rate": 3.3127496669244217e-06, + "loss": 2.0671, + "step": 22170 + }, + { + "epoch": 0.74, + "grad_norm": 0.7551062107086182, + "learning_rate": 3.311959433380194e-06, + "loss": 2.0838, + "step": 22171 + }, + { + "epoch": 0.74, + "grad_norm": 0.7728142142295837, + "learning_rate": 3.311169275393531e-06, + "loss": 2.031, + "step": 22172 + }, + { + "epoch": 0.74, + "grad_norm": 0.7507092952728271, + "learning_rate": 3.3103791929733552e-06, + "loss": 2.067, + "step": 22173 + }, + { + "epoch": 0.74, + "grad_norm": 0.7657943964004517, + "learning_rate": 3.3095891861285944e-06, + "loss": 2.0793, + "step": 22174 + }, + { + "epoch": 0.74, + "grad_norm": 0.7218884825706482, + "learning_rate": 3.30879925486818e-06, + "loss": 2.0274, + "step": 22175 + }, + { + "epoch": 0.74, + "grad_norm": 0.7566397786140442, + "learning_rate": 3.3080093992010245e-06, + "loss": 1.9932, + "step": 22176 + }, + { + "epoch": 0.74, + "grad_norm": 0.7601485252380371, + "learning_rate": 3.307219619136057e-06, + "loss": 2.0604, + "step": 22177 + }, + { + "epoch": 0.74, + "grad_norm": 0.745028018951416, + "learning_rate": 3.306429914682202e-06, + "loss": 2.0406, + "step": 22178 + }, + { + "epoch": 0.74, + "grad_norm": 0.7598259449005127, + "learning_rate": 3.3056402858483807e-06, + "loss": 2.0332, + "step": 22179 + }, + { + "epoch": 0.74, + "grad_norm": 0.7550256848335266, + "learning_rate": 3.3048507326435074e-06, + "loss": 2.1142, + "step": 22180 + }, + { + "epoch": 0.74, + "grad_norm": 0.7382687330245972, + "learning_rate": 3.3040612550765104e-06, + "loss": 2.1274, + "step": 22181 + }, + { + "epoch": 0.74, + "grad_norm": 0.7453802227973938, + "learning_rate": 3.303271853156302e-06, + "loss": 2.0716, + "step": 22182 + }, + { + "epoch": 0.74, + "grad_norm": 0.7379159331321716, + "learning_rate": 3.3024825268918046e-06, + "loss": 2.1063, + "step": 22183 + }, + { + "epoch": 0.74, + "grad_norm": 0.7373242378234863, + "learning_rate": 3.301693276291936e-06, + "loss": 2.079, + "step": 22184 + }, + { + "epoch": 0.74, + "grad_norm": 0.7235468029975891, + "learning_rate": 3.300904101365613e-06, + "loss": 2.1325, + "step": 22185 + }, + { + "epoch": 0.74, + "grad_norm": 0.7384099960327148, + "learning_rate": 3.3001150021217444e-06, + "loss": 2.0643, + "step": 22186 + }, + { + "epoch": 0.74, + "grad_norm": 0.7517136335372925, + "learning_rate": 3.2993259785692543e-06, + "loss": 2.1035, + "step": 22187 + }, + { + "epoch": 0.74, + "grad_norm": 0.7891362905502319, + "learning_rate": 3.2985370307170516e-06, + "loss": 2.0248, + "step": 22188 + }, + { + "epoch": 0.74, + "grad_norm": 0.7310405373573303, + "learning_rate": 3.297748158574048e-06, + "loss": 2.0595, + "step": 22189 + }, + { + "epoch": 0.74, + "grad_norm": 0.7453989386558533, + "learning_rate": 3.2969593621491567e-06, + "loss": 2.0469, + "step": 22190 + }, + { + "epoch": 0.74, + "grad_norm": 0.7469889521598816, + "learning_rate": 3.296170641451294e-06, + "loss": 2.0592, + "step": 22191 + }, + { + "epoch": 0.74, + "grad_norm": 0.7396084666252136, + "learning_rate": 3.295381996489363e-06, + "loss": 2.0884, + "step": 22192 + }, + { + "epoch": 0.74, + "grad_norm": 0.7534698247909546, + "learning_rate": 3.29459342727228e-06, + "loss": 1.9702, + "step": 22193 + }, + { + "epoch": 0.74, + "grad_norm": 0.7330057621002197, + "learning_rate": 3.2938049338089505e-06, + "loss": 1.9607, + "step": 22194 + }, + { + "epoch": 0.74, + "grad_norm": 0.7441755533218384, + "learning_rate": 3.2930165161082794e-06, + "loss": 1.9973, + "step": 22195 + }, + { + "epoch": 0.74, + "grad_norm": 0.7269352078437805, + "learning_rate": 3.2922281741791783e-06, + "loss": 2.0468, + "step": 22196 + }, + { + "epoch": 0.74, + "grad_norm": 0.731460690498352, + "learning_rate": 3.291439908030557e-06, + "loss": 2.0519, + "step": 22197 + }, + { + "epoch": 0.74, + "grad_norm": 0.7212933301925659, + "learning_rate": 3.2906517176713102e-06, + "loss": 2.0337, + "step": 22198 + }, + { + "epoch": 0.74, + "grad_norm": 0.7965194582939148, + "learning_rate": 3.289863603110349e-06, + "loss": 2.0741, + "step": 22199 + }, + { + "epoch": 0.74, + "grad_norm": 0.7331159114837646, + "learning_rate": 3.2890755643565787e-06, + "loss": 1.9767, + "step": 22200 + }, + { + "epoch": 0.74, + "grad_norm": 0.759445071220398, + "learning_rate": 3.2882876014189003e-06, + "loss": 2.1019, + "step": 22201 + }, + { + "epoch": 0.74, + "grad_norm": 0.7961204051971436, + "learning_rate": 3.2874997143062103e-06, + "loss": 2.0002, + "step": 22202 + }, + { + "epoch": 0.74, + "grad_norm": 0.7729451060295105, + "learning_rate": 3.2867119030274196e-06, + "loss": 2.0946, + "step": 22203 + }, + { + "epoch": 0.74, + "grad_norm": 0.7333393692970276, + "learning_rate": 3.28592416759142e-06, + "loss": 2.0655, + "step": 22204 + }, + { + "epoch": 0.74, + "grad_norm": 0.723981499671936, + "learning_rate": 3.285136508007113e-06, + "loss": 1.9794, + "step": 22205 + }, + { + "epoch": 0.74, + "grad_norm": 0.7416284680366516, + "learning_rate": 3.2843489242834025e-06, + "loss": 2.098, + "step": 22206 + }, + { + "epoch": 0.74, + "grad_norm": 0.7751772403717041, + "learning_rate": 3.2835614164291827e-06, + "loss": 2.0049, + "step": 22207 + }, + { + "epoch": 0.74, + "grad_norm": 0.7494246959686279, + "learning_rate": 3.282773984453346e-06, + "loss": 2.093, + "step": 22208 + }, + { + "epoch": 0.74, + "grad_norm": 0.7638769745826721, + "learning_rate": 3.281986628364795e-06, + "loss": 2.0801, + "step": 22209 + }, + { + "epoch": 0.74, + "grad_norm": 0.7460142374038696, + "learning_rate": 3.2811993481724227e-06, + "loss": 2.0276, + "step": 22210 + }, + { + "epoch": 0.74, + "grad_norm": 0.7632189393043518, + "learning_rate": 3.280412143885119e-06, + "loss": 2.0371, + "step": 22211 + }, + { + "epoch": 0.74, + "grad_norm": 0.7309544086456299, + "learning_rate": 3.2796250155117804e-06, + "loss": 1.9982, + "step": 22212 + }, + { + "epoch": 0.74, + "grad_norm": 0.7215017080307007, + "learning_rate": 3.278837963061303e-06, + "loss": 1.9906, + "step": 22213 + }, + { + "epoch": 0.74, + "grad_norm": 0.7621486783027649, + "learning_rate": 3.2780509865425736e-06, + "loss": 2.0824, + "step": 22214 + }, + { + "epoch": 0.74, + "grad_norm": 0.7434404492378235, + "learning_rate": 3.2772640859644868e-06, + "loss": 1.9568, + "step": 22215 + }, + { + "epoch": 0.74, + "grad_norm": 0.7554197907447815, + "learning_rate": 3.2764772613359306e-06, + "loss": 2.1024, + "step": 22216 + }, + { + "epoch": 0.74, + "grad_norm": 0.7211920022964478, + "learning_rate": 3.27569051266579e-06, + "loss": 2.0127, + "step": 22217 + }, + { + "epoch": 0.74, + "grad_norm": 0.7470629215240479, + "learning_rate": 3.2749038399629585e-06, + "loss": 2.0952, + "step": 22218 + }, + { + "epoch": 0.74, + "grad_norm": 0.7381715178489685, + "learning_rate": 3.274117243236328e-06, + "loss": 2.0628, + "step": 22219 + }, + { + "epoch": 0.74, + "grad_norm": 0.7558199167251587, + "learning_rate": 3.2733307224947718e-06, + "loss": 2.0834, + "step": 22220 + }, + { + "epoch": 0.74, + "grad_norm": 0.7380803823471069, + "learning_rate": 3.272544277747184e-06, + "loss": 2.0128, + "step": 22221 + }, + { + "epoch": 0.74, + "grad_norm": 0.7458100318908691, + "learning_rate": 3.2717579090024507e-06, + "loss": 2.0446, + "step": 22222 + }, + { + "epoch": 0.74, + "grad_norm": 0.7276365756988525, + "learning_rate": 3.2709716162694506e-06, + "loss": 2.0027, + "step": 22223 + }, + { + "epoch": 0.74, + "grad_norm": 0.7607114911079407, + "learning_rate": 3.270185399557073e-06, + "loss": 2.059, + "step": 22224 + }, + { + "epoch": 0.74, + "grad_norm": 0.7345208525657654, + "learning_rate": 3.2693992588741965e-06, + "loss": 2.0235, + "step": 22225 + }, + { + "epoch": 0.74, + "grad_norm": 0.7323665618896484, + "learning_rate": 3.2686131942296994e-06, + "loss": 2.0501, + "step": 22226 + }, + { + "epoch": 0.74, + "grad_norm": 0.7412180304527283, + "learning_rate": 3.2678272056324657e-06, + "loss": 2.022, + "step": 22227 + }, + { + "epoch": 0.74, + "grad_norm": 0.7286436557769775, + "learning_rate": 3.2670412930913775e-06, + "loss": 2.0727, + "step": 22228 + }, + { + "epoch": 0.74, + "grad_norm": 0.7286287546157837, + "learning_rate": 3.2662554566153116e-06, + "loss": 2.0524, + "step": 22229 + }, + { + "epoch": 0.74, + "grad_norm": 0.7574065327644348, + "learning_rate": 3.2654696962131415e-06, + "loss": 2.0954, + "step": 22230 + }, + { + "epoch": 0.74, + "grad_norm": 0.7412490248680115, + "learning_rate": 3.2646840118937506e-06, + "loss": 1.9514, + "step": 22231 + }, + { + "epoch": 0.74, + "grad_norm": 0.7628432512283325, + "learning_rate": 3.2638984036660136e-06, + "loss": 2.0335, + "step": 22232 + }, + { + "epoch": 0.74, + "grad_norm": 0.7409769296646118, + "learning_rate": 3.2631128715388006e-06, + "loss": 2.0117, + "step": 22233 + }, + { + "epoch": 0.74, + "grad_norm": 0.74897301197052, + "learning_rate": 3.2623274155209906e-06, + "loss": 2.0008, + "step": 22234 + }, + { + "epoch": 0.74, + "grad_norm": 0.7371807098388672, + "learning_rate": 3.26154203562146e-06, + "loss": 2.0689, + "step": 22235 + }, + { + "epoch": 0.74, + "grad_norm": 0.7458344101905823, + "learning_rate": 3.260756731849075e-06, + "loss": 2.0605, + "step": 22236 + }, + { + "epoch": 0.74, + "grad_norm": 0.7580293416976929, + "learning_rate": 3.2599715042127146e-06, + "loss": 2.1262, + "step": 22237 + }, + { + "epoch": 0.74, + "grad_norm": 0.737846851348877, + "learning_rate": 3.2591863527212453e-06, + "loss": 2.0482, + "step": 22238 + }, + { + "epoch": 0.74, + "grad_norm": 0.7206730842590332, + "learning_rate": 3.258401277383535e-06, + "loss": 2.0171, + "step": 22239 + }, + { + "epoch": 0.74, + "grad_norm": 0.7570645809173584, + "learning_rate": 3.2576162782084564e-06, + "loss": 2.078, + "step": 22240 + }, + { + "epoch": 0.74, + "grad_norm": 0.7354682683944702, + "learning_rate": 3.2568313552048835e-06, + "loss": 2.0138, + "step": 22241 + }, + { + "epoch": 0.74, + "grad_norm": 0.7740846276283264, + "learning_rate": 3.2560465083816726e-06, + "loss": 1.9665, + "step": 22242 + }, + { + "epoch": 0.74, + "grad_norm": 0.7227646112442017, + "learning_rate": 3.255261737747696e-06, + "loss": 2.0277, + "step": 22243 + }, + { + "epoch": 0.74, + "grad_norm": 0.7426685690879822, + "learning_rate": 3.254477043311821e-06, + "loss": 2.0321, + "step": 22244 + }, + { + "epoch": 0.74, + "grad_norm": 0.7486542463302612, + "learning_rate": 3.2536924250829095e-06, + "loss": 2.0574, + "step": 22245 + }, + { + "epoch": 0.74, + "grad_norm": 0.7805135846138, + "learning_rate": 3.2529078830698304e-06, + "loss": 2.0107, + "step": 22246 + }, + { + "epoch": 0.74, + "grad_norm": 0.7241868376731873, + "learning_rate": 3.252123417281443e-06, + "loss": 2.0181, + "step": 22247 + }, + { + "epoch": 0.74, + "grad_norm": 0.746249794960022, + "learning_rate": 3.2513390277266076e-06, + "loss": 2.052, + "step": 22248 + }, + { + "epoch": 0.74, + "grad_norm": 0.7432492971420288, + "learning_rate": 3.250554714414189e-06, + "loss": 2.0512, + "step": 22249 + }, + { + "epoch": 0.74, + "grad_norm": 0.7555325031280518, + "learning_rate": 3.2497704773530515e-06, + "loss": 2.0181, + "step": 22250 + }, + { + "epoch": 0.74, + "grad_norm": 0.7270098924636841, + "learning_rate": 3.248986316552051e-06, + "loss": 2.0537, + "step": 22251 + }, + { + "epoch": 0.74, + "grad_norm": 0.7685802578926086, + "learning_rate": 3.248202232020042e-06, + "loss": 1.9618, + "step": 22252 + }, + { + "epoch": 0.74, + "grad_norm": 0.7535423636436462, + "learning_rate": 3.247418223765888e-06, + "loss": 2.0282, + "step": 22253 + }, + { + "epoch": 0.74, + "grad_norm": 0.7459569573402405, + "learning_rate": 3.2466342917984496e-06, + "loss": 2.0116, + "step": 22254 + }, + { + "epoch": 0.74, + "grad_norm": 0.7370901703834534, + "learning_rate": 3.2458504361265775e-06, + "loss": 2.0651, + "step": 22255 + }, + { + "epoch": 0.74, + "grad_norm": 0.7668207287788391, + "learning_rate": 3.2450666567591273e-06, + "loss": 2.0489, + "step": 22256 + }, + { + "epoch": 0.74, + "grad_norm": 0.7648358941078186, + "learning_rate": 3.244282953704957e-06, + "loss": 2.051, + "step": 22257 + }, + { + "epoch": 0.74, + "grad_norm": 0.7852567434310913, + "learning_rate": 3.2434993269729163e-06, + "loss": 2.0722, + "step": 22258 + }, + { + "epoch": 0.74, + "grad_norm": 0.7253546714782715, + "learning_rate": 3.2427157765718632e-06, + "loss": 2.026, + "step": 22259 + }, + { + "epoch": 0.74, + "grad_norm": 0.7392908930778503, + "learning_rate": 3.2419323025106477e-06, + "loss": 2.0437, + "step": 22260 + }, + { + "epoch": 0.74, + "grad_norm": 0.727792501449585, + "learning_rate": 3.241148904798117e-06, + "loss": 2.0109, + "step": 22261 + }, + { + "epoch": 0.74, + "grad_norm": 0.7448568344116211, + "learning_rate": 3.2403655834431246e-06, + "loss": 2.0892, + "step": 22262 + }, + { + "epoch": 0.74, + "grad_norm": 0.7522231936454773, + "learning_rate": 3.2395823384545267e-06, + "loss": 2.0611, + "step": 22263 + }, + { + "epoch": 0.74, + "grad_norm": 0.7411322593688965, + "learning_rate": 3.2387991698411593e-06, + "loss": 2.0874, + "step": 22264 + }, + { + "epoch": 0.74, + "grad_norm": 0.7100245952606201, + "learning_rate": 3.238016077611876e-06, + "loss": 2.0557, + "step": 22265 + }, + { + "epoch": 0.74, + "grad_norm": 0.7298486828804016, + "learning_rate": 3.2372330617755286e-06, + "loss": 2.0877, + "step": 22266 + }, + { + "epoch": 0.74, + "grad_norm": 0.7426810264587402, + "learning_rate": 3.236450122340955e-06, + "loss": 2.0131, + "step": 22267 + }, + { + "epoch": 0.74, + "grad_norm": 0.7703245282173157, + "learning_rate": 3.235667259317007e-06, + "loss": 2.0676, + "step": 22268 + }, + { + "epoch": 0.74, + "grad_norm": 0.7220068573951721, + "learning_rate": 3.234884472712523e-06, + "loss": 2.0786, + "step": 22269 + }, + { + "epoch": 0.74, + "grad_norm": 0.759346604347229, + "learning_rate": 3.2341017625363526e-06, + "loss": 2.0496, + "step": 22270 + }, + { + "epoch": 0.74, + "grad_norm": 0.7599336504936218, + "learning_rate": 3.233319128797332e-06, + "loss": 2.1154, + "step": 22271 + }, + { + "epoch": 0.74, + "grad_norm": 0.7436297535896301, + "learning_rate": 3.2325365715043088e-06, + "loss": 2.0137, + "step": 22272 + }, + { + "epoch": 0.74, + "grad_norm": 0.7367570996284485, + "learning_rate": 3.2317540906661226e-06, + "loss": 1.9497, + "step": 22273 + }, + { + "epoch": 0.74, + "grad_norm": 0.7532768845558167, + "learning_rate": 3.2309716862916072e-06, + "loss": 2.0986, + "step": 22274 + }, + { + "epoch": 0.74, + "grad_norm": 0.7432680726051331, + "learning_rate": 3.230189358389608e-06, + "loss": 2.0491, + "step": 22275 + }, + { + "epoch": 0.74, + "grad_norm": 0.734361469745636, + "learning_rate": 3.2294071069689647e-06, + "loss": 1.979, + "step": 22276 + }, + { + "epoch": 0.74, + "grad_norm": 0.7767292261123657, + "learning_rate": 3.228624932038512e-06, + "loss": 2.0702, + "step": 22277 + }, + { + "epoch": 0.74, + "grad_norm": 0.7359764575958252, + "learning_rate": 3.2278428336070834e-06, + "loss": 1.9546, + "step": 22278 + }, + { + "epoch": 0.74, + "grad_norm": 0.7637745141983032, + "learning_rate": 3.227060811683521e-06, + "loss": 2.012, + "step": 22279 + }, + { + "epoch": 0.74, + "grad_norm": 0.7211446166038513, + "learning_rate": 3.226278866276652e-06, + "loss": 2.0404, + "step": 22280 + }, + { + "epoch": 0.74, + "grad_norm": 0.7533299326896667, + "learning_rate": 3.2254969973953186e-06, + "loss": 2.014, + "step": 22281 + }, + { + "epoch": 0.74, + "grad_norm": 0.7401911616325378, + "learning_rate": 3.2247152050483497e-06, + "loss": 2.0569, + "step": 22282 + }, + { + "epoch": 0.74, + "grad_norm": 0.7467284202575684, + "learning_rate": 3.2239334892445753e-06, + "loss": 2.0399, + "step": 22283 + }, + { + "epoch": 0.74, + "grad_norm": 0.7396549582481384, + "learning_rate": 3.223151849992828e-06, + "loss": 2.0169, + "step": 22284 + }, + { + "epoch": 0.74, + "grad_norm": 0.7866047024726868, + "learning_rate": 3.2223702873019424e-06, + "loss": 2.1309, + "step": 22285 + }, + { + "epoch": 0.74, + "grad_norm": 0.7141895294189453, + "learning_rate": 3.221588801180746e-06, + "loss": 2.0492, + "step": 22286 + }, + { + "epoch": 0.74, + "grad_norm": 0.7516275644302368, + "learning_rate": 3.2208073916380635e-06, + "loss": 2.0349, + "step": 22287 + }, + { + "epoch": 0.74, + "grad_norm": 0.761690080165863, + "learning_rate": 3.2200260586827293e-06, + "loss": 2.054, + "step": 22288 + }, + { + "epoch": 0.74, + "grad_norm": 0.7326943874359131, + "learning_rate": 3.2192448023235646e-06, + "loss": 2.0477, + "step": 22289 + }, + { + "epoch": 0.74, + "grad_norm": 0.7479966878890991, + "learning_rate": 3.218463622569401e-06, + "loss": 2.053, + "step": 22290 + }, + { + "epoch": 0.74, + "grad_norm": 0.7634629011154175, + "learning_rate": 3.2176825194290573e-06, + "loss": 2.0996, + "step": 22291 + }, + { + "epoch": 0.74, + "grad_norm": 0.740103006362915, + "learning_rate": 3.216901492911365e-06, + "loss": 2.0115, + "step": 22292 + }, + { + "epoch": 0.74, + "grad_norm": 0.7479941844940186, + "learning_rate": 3.216120543025141e-06, + "loss": 2.055, + "step": 22293 + }, + { + "epoch": 0.74, + "grad_norm": 0.7670915126800537, + "learning_rate": 3.215339669779215e-06, + "loss": 2.0369, + "step": 22294 + }, + { + "epoch": 0.74, + "grad_norm": 0.7458199858665466, + "learning_rate": 3.214558873182405e-06, + "loss": 2.0515, + "step": 22295 + }, + { + "epoch": 0.74, + "grad_norm": 0.7210725545883179, + "learning_rate": 3.21377815324353e-06, + "loss": 2.0645, + "step": 22296 + }, + { + "epoch": 0.74, + "grad_norm": 0.7615597248077393, + "learning_rate": 3.2129975099714106e-06, + "loss": 2.0551, + "step": 22297 + }, + { + "epoch": 0.74, + "grad_norm": 0.7421480417251587, + "learning_rate": 3.212216943374872e-06, + "loss": 2.0794, + "step": 22298 + }, + { + "epoch": 0.74, + "grad_norm": 0.7504313588142395, + "learning_rate": 3.2114364534627284e-06, + "loss": 2.0574, + "step": 22299 + }, + { + "epoch": 0.74, + "grad_norm": 0.759382426738739, + "learning_rate": 3.2106560402437937e-06, + "loss": 2.0659, + "step": 22300 + }, + { + "epoch": 0.74, + "grad_norm": 0.7769963145256042, + "learning_rate": 3.209875703726891e-06, + "loss": 2.0633, + "step": 22301 + }, + { + "epoch": 0.74, + "grad_norm": 0.7394306063652039, + "learning_rate": 3.20909544392083e-06, + "loss": 2.0498, + "step": 22302 + }, + { + "epoch": 0.74, + "grad_norm": 0.7504224181175232, + "learning_rate": 3.2083152608344326e-06, + "loss": 2.0534, + "step": 22303 + }, + { + "epoch": 0.74, + "grad_norm": 0.7295312285423279, + "learning_rate": 3.2075351544765086e-06, + "loss": 2.0296, + "step": 22304 + }, + { + "epoch": 0.74, + "grad_norm": 0.7356663942337036, + "learning_rate": 3.2067551248558694e-06, + "loss": 2.0413, + "step": 22305 + }, + { + "epoch": 0.74, + "grad_norm": 0.7510823607444763, + "learning_rate": 3.205975171981328e-06, + "loss": 2.0576, + "step": 22306 + }, + { + "epoch": 0.74, + "grad_norm": 0.7515228986740112, + "learning_rate": 3.2051952958617017e-06, + "loss": 2.0773, + "step": 22307 + }, + { + "epoch": 0.74, + "grad_norm": 0.7723273038864136, + "learning_rate": 3.2044154965057973e-06, + "loss": 2.1316, + "step": 22308 + }, + { + "epoch": 0.74, + "grad_norm": 0.7313694357872009, + "learning_rate": 3.20363577392242e-06, + "loss": 2.0284, + "step": 22309 + }, + { + "epoch": 0.74, + "grad_norm": 0.7397423982620239, + "learning_rate": 3.202856128120386e-06, + "loss": 1.9872, + "step": 22310 + }, + { + "epoch": 0.74, + "grad_norm": 0.763303279876709, + "learning_rate": 3.202076559108497e-06, + "loss": 2.0278, + "step": 22311 + }, + { + "epoch": 0.74, + "grad_norm": 0.767754316329956, + "learning_rate": 3.2012970668955657e-06, + "loss": 2.0908, + "step": 22312 + }, + { + "epoch": 0.74, + "grad_norm": 0.7294447422027588, + "learning_rate": 3.2005176514903926e-06, + "loss": 2.0416, + "step": 22313 + }, + { + "epoch": 0.74, + "grad_norm": 0.7457343935966492, + "learning_rate": 3.199738312901789e-06, + "loss": 2.0025, + "step": 22314 + }, + { + "epoch": 0.74, + "grad_norm": 0.7523823380470276, + "learning_rate": 3.1989590511385547e-06, + "loss": 2.0742, + "step": 22315 + }, + { + "epoch": 0.74, + "grad_norm": 0.7587494254112244, + "learning_rate": 3.1981798662094977e-06, + "loss": 2.0767, + "step": 22316 + }, + { + "epoch": 0.74, + "grad_norm": 0.7422943711280823, + "learning_rate": 3.197400758123418e-06, + "loss": 2.0907, + "step": 22317 + }, + { + "epoch": 0.74, + "grad_norm": 0.7486933469772339, + "learning_rate": 3.1966217268891155e-06, + "loss": 1.9683, + "step": 22318 + }, + { + "epoch": 0.74, + "grad_norm": 0.7673466801643372, + "learning_rate": 3.195842772515393e-06, + "loss": 2.0903, + "step": 22319 + }, + { + "epoch": 0.74, + "grad_norm": 0.7478808164596558, + "learning_rate": 3.1950638950110535e-06, + "loss": 2.009, + "step": 22320 + }, + { + "epoch": 0.74, + "grad_norm": 0.7606772184371948, + "learning_rate": 3.1942850943848956e-06, + "loss": 2.0965, + "step": 22321 + }, + { + "epoch": 0.74, + "grad_norm": 0.7608497142791748, + "learning_rate": 3.1935063706457127e-06, + "loss": 2.0435, + "step": 22322 + }, + { + "epoch": 0.74, + "grad_norm": 0.7370107173919678, + "learning_rate": 3.192727723802308e-06, + "loss": 2.109, + "step": 22323 + }, + { + "epoch": 0.74, + "grad_norm": 0.7349649667739868, + "learning_rate": 3.191949153863474e-06, + "loss": 2.061, + "step": 22324 + }, + { + "epoch": 0.74, + "grad_norm": 0.7287658452987671, + "learning_rate": 3.191170660838011e-06, + "loss": 2.0706, + "step": 22325 + }, + { + "epoch": 0.74, + "grad_norm": 0.8100554943084717, + "learning_rate": 3.1903922447347115e-06, + "loss": 2.066, + "step": 22326 + }, + { + "epoch": 0.74, + "grad_norm": 0.7326253056526184, + "learning_rate": 3.1896139055623666e-06, + "loss": 2.0652, + "step": 22327 + }, + { + "epoch": 0.74, + "grad_norm": 0.7203582525253296, + "learning_rate": 3.188835643329773e-06, + "loss": 2.1105, + "step": 22328 + }, + { + "epoch": 0.74, + "grad_norm": 0.7398223876953125, + "learning_rate": 3.1880574580457246e-06, + "loss": 2.025, + "step": 22329 + }, + { + "epoch": 0.74, + "grad_norm": 0.7392032146453857, + "learning_rate": 3.1872793497190114e-06, + "loss": 2.012, + "step": 22330 + }, + { + "epoch": 0.74, + "grad_norm": 0.7362843155860901, + "learning_rate": 3.1865013183584205e-06, + "loss": 2.0364, + "step": 22331 + }, + { + "epoch": 0.74, + "grad_norm": 0.735448956489563, + "learning_rate": 3.185723363972748e-06, + "loss": 2.0283, + "step": 22332 + }, + { + "epoch": 0.74, + "grad_norm": 0.7439790368080139, + "learning_rate": 3.1849454865707764e-06, + "loss": 2.0001, + "step": 22333 + }, + { + "epoch": 0.74, + "grad_norm": 0.7423273921012878, + "learning_rate": 3.184167686161299e-06, + "loss": 2.0564, + "step": 22334 + }, + { + "epoch": 0.74, + "grad_norm": 0.736746609210968, + "learning_rate": 3.1833899627530975e-06, + "loss": 2.0823, + "step": 22335 + }, + { + "epoch": 0.74, + "grad_norm": 0.7634266018867493, + "learning_rate": 3.182612316354965e-06, + "loss": 2.1429, + "step": 22336 + }, + { + "epoch": 0.74, + "grad_norm": 0.7439766526222229, + "learning_rate": 3.1818347469756793e-06, + "loss": 2.0068, + "step": 22337 + }, + { + "epoch": 0.74, + "grad_norm": 0.762452244758606, + "learning_rate": 3.181057254624029e-06, + "loss": 2.0071, + "step": 22338 + }, + { + "epoch": 0.74, + "grad_norm": 0.7409733533859253, + "learning_rate": 3.180279839308804e-06, + "loss": 2.0562, + "step": 22339 + }, + { + "epoch": 0.74, + "grad_norm": 0.764900267124176, + "learning_rate": 3.179502501038775e-06, + "loss": 2.017, + "step": 22340 + }, + { + "epoch": 0.74, + "grad_norm": 0.7477806806564331, + "learning_rate": 3.1787252398227285e-06, + "loss": 2.0808, + "step": 22341 + }, + { + "epoch": 0.74, + "grad_norm": 0.7834635376930237, + "learning_rate": 3.177948055669451e-06, + "loss": 2.0203, + "step": 22342 + }, + { + "epoch": 0.74, + "grad_norm": 0.7584335803985596, + "learning_rate": 3.1771709485877167e-06, + "loss": 2.0792, + "step": 22343 + }, + { + "epoch": 0.74, + "grad_norm": 0.7173949480056763, + "learning_rate": 3.1763939185863047e-06, + "loss": 2.0907, + "step": 22344 + }, + { + "epoch": 0.74, + "grad_norm": 0.7423549890518188, + "learning_rate": 3.175616965673998e-06, + "loss": 2.0694, + "step": 22345 + }, + { + "epoch": 0.74, + "grad_norm": 0.723930835723877, + "learning_rate": 3.1748400898595666e-06, + "loss": 2.0132, + "step": 22346 + }, + { + "epoch": 0.74, + "grad_norm": 0.7401352524757385, + "learning_rate": 3.1740632911517965e-06, + "loss": 2.0004, + "step": 22347 + }, + { + "epoch": 0.74, + "grad_norm": 0.7421391606330872, + "learning_rate": 3.1732865695594594e-06, + "loss": 2.1311, + "step": 22348 + }, + { + "epoch": 0.74, + "grad_norm": 0.7346087098121643, + "learning_rate": 3.172509925091326e-06, + "loss": 2.0794, + "step": 22349 + }, + { + "epoch": 0.74, + "grad_norm": 0.7473320364952087, + "learning_rate": 3.1717333577561737e-06, + "loss": 2.0308, + "step": 22350 + }, + { + "epoch": 0.74, + "grad_norm": 0.766850471496582, + "learning_rate": 3.17095686756278e-06, + "loss": 2.0868, + "step": 22351 + }, + { + "epoch": 0.74, + "grad_norm": 0.7663282155990601, + "learning_rate": 3.1701804545199133e-06, + "loss": 2.0651, + "step": 22352 + }, + { + "epoch": 0.74, + "grad_norm": 0.7200099229812622, + "learning_rate": 3.1694041186363424e-06, + "loss": 2.0572, + "step": 22353 + }, + { + "epoch": 0.74, + "grad_norm": 0.7191290259361267, + "learning_rate": 3.1686278599208396e-06, + "loss": 2.0109, + "step": 22354 + }, + { + "epoch": 0.74, + "grad_norm": 0.7261087894439697, + "learning_rate": 3.1678516783821788e-06, + "loss": 1.9942, + "step": 22355 + }, + { + "epoch": 0.74, + "grad_norm": 0.7159486413002014, + "learning_rate": 3.167075574029127e-06, + "loss": 2.0028, + "step": 22356 + }, + { + "epoch": 0.74, + "grad_norm": 0.718169629573822, + "learning_rate": 3.166299546870447e-06, + "loss": 2.0402, + "step": 22357 + }, + { + "epoch": 0.74, + "grad_norm": 0.7482108473777771, + "learning_rate": 3.165523596914912e-06, + "loss": 1.9922, + "step": 22358 + }, + { + "epoch": 0.74, + "grad_norm": 0.7600855827331543, + "learning_rate": 3.1647477241712843e-06, + "loss": 2.0443, + "step": 22359 + }, + { + "epoch": 0.74, + "grad_norm": 0.7388033270835876, + "learning_rate": 3.1639719286483304e-06, + "loss": 2.056, + "step": 22360 + }, + { + "epoch": 0.74, + "grad_norm": 0.7413361072540283, + "learning_rate": 3.1631962103548217e-06, + "loss": 2.0414, + "step": 22361 + }, + { + "epoch": 0.74, + "grad_norm": 0.7584394812583923, + "learning_rate": 3.162420569299509e-06, + "loss": 2.1137, + "step": 22362 + }, + { + "epoch": 0.74, + "grad_norm": 0.7635613679885864, + "learning_rate": 3.161645005491162e-06, + "loss": 1.9723, + "step": 22363 + }, + { + "epoch": 0.74, + "grad_norm": 0.7402101159095764, + "learning_rate": 3.1608695189385454e-06, + "loss": 2.0245, + "step": 22364 + }, + { + "epoch": 0.74, + "grad_norm": 0.7490797638893127, + "learning_rate": 3.1600941096504156e-06, + "loss": 2.0702, + "step": 22365 + }, + { + "epoch": 0.74, + "grad_norm": 0.7378631234169006, + "learning_rate": 3.1593187776355316e-06, + "loss": 1.9592, + "step": 22366 + }, + { + "epoch": 0.74, + "grad_norm": 0.7659499049186707, + "learning_rate": 3.1585435229026585e-06, + "loss": 2.1671, + "step": 22367 + }, + { + "epoch": 0.74, + "grad_norm": 0.7646713256835938, + "learning_rate": 3.157768345460547e-06, + "loss": 2.0457, + "step": 22368 + }, + { + "epoch": 0.74, + "grad_norm": 0.7605593800544739, + "learning_rate": 3.1569932453179596e-06, + "loss": 2.0877, + "step": 22369 + }, + { + "epoch": 0.74, + "grad_norm": 0.7417692542076111, + "learning_rate": 3.1562182224836556e-06, + "loss": 2.0764, + "step": 22370 + }, + { + "epoch": 0.74, + "grad_norm": 0.7433812022209167, + "learning_rate": 3.155443276966387e-06, + "loss": 2.0779, + "step": 22371 + }, + { + "epoch": 0.74, + "grad_norm": 0.7464502453804016, + "learning_rate": 3.1546684087749045e-06, + "loss": 2.0876, + "step": 22372 + }, + { + "epoch": 0.74, + "grad_norm": 0.7534196376800537, + "learning_rate": 3.153893617917971e-06, + "loss": 2.0519, + "step": 22373 + }, + { + "epoch": 0.74, + "grad_norm": 0.7375979423522949, + "learning_rate": 3.1531189044043353e-06, + "loss": 2.0947, + "step": 22374 + }, + { + "epoch": 0.74, + "grad_norm": 0.7461097836494446, + "learning_rate": 3.1523442682427465e-06, + "loss": 2.0766, + "step": 22375 + }, + { + "epoch": 0.74, + "grad_norm": 0.7347460389137268, + "learning_rate": 3.1515697094419582e-06, + "loss": 1.9848, + "step": 22376 + }, + { + "epoch": 0.74, + "grad_norm": 0.7717547416687012, + "learning_rate": 3.1507952280107247e-06, + "loss": 2.0369, + "step": 22377 + }, + { + "epoch": 0.74, + "grad_norm": 0.7372307777404785, + "learning_rate": 3.1500208239577933e-06, + "loss": 1.958, + "step": 22378 + }, + { + "epoch": 0.74, + "grad_norm": 0.7491931915283203, + "learning_rate": 3.149246497291909e-06, + "loss": 2.1068, + "step": 22379 + }, + { + "epoch": 0.74, + "grad_norm": 0.7384610176086426, + "learning_rate": 3.1484722480218265e-06, + "loss": 2.0887, + "step": 22380 + }, + { + "epoch": 0.74, + "grad_norm": 0.75984787940979, + "learning_rate": 3.147698076156285e-06, + "loss": 1.9736, + "step": 22381 + }, + { + "epoch": 0.74, + "grad_norm": 0.7757591009140015, + "learning_rate": 3.1469239817040355e-06, + "loss": 2.0025, + "step": 22382 + }, + { + "epoch": 0.74, + "grad_norm": 0.7683458924293518, + "learning_rate": 3.1461499646738293e-06, + "loss": 2.0649, + "step": 22383 + }, + { + "epoch": 0.74, + "grad_norm": 0.7802779674530029, + "learning_rate": 3.145376025074397e-06, + "loss": 2.0285, + "step": 22384 + }, + { + "epoch": 0.74, + "grad_norm": 0.7464694380760193, + "learning_rate": 3.1446021629144885e-06, + "loss": 2.0042, + "step": 22385 + }, + { + "epoch": 0.74, + "grad_norm": 0.7300896048545837, + "learning_rate": 3.143828378202851e-06, + "loss": 2.0035, + "step": 22386 + }, + { + "epoch": 0.74, + "grad_norm": 0.7477436661720276, + "learning_rate": 3.143054670948222e-06, + "loss": 2.0836, + "step": 22387 + }, + { + "epoch": 0.74, + "grad_norm": 0.7348775267601013, + "learning_rate": 3.1422810411593406e-06, + "loss": 2.039, + "step": 22388 + }, + { + "epoch": 0.74, + "grad_norm": 0.7333837747573853, + "learning_rate": 3.1415074888449513e-06, + "loss": 2.0413, + "step": 22389 + }, + { + "epoch": 0.74, + "grad_norm": 0.7494324445724487, + "learning_rate": 3.1407340140137878e-06, + "loss": 2.0033, + "step": 22390 + }, + { + "epoch": 0.74, + "grad_norm": 0.7390850186347961, + "learning_rate": 3.139960616674592e-06, + "loss": 2.1067, + "step": 22391 + }, + { + "epoch": 0.74, + "grad_norm": 0.7529094219207764, + "learning_rate": 3.1391872968361037e-06, + "loss": 2.0947, + "step": 22392 + }, + { + "epoch": 0.75, + "grad_norm": 0.7675386667251587, + "learning_rate": 3.138414054507056e-06, + "loss": 2.0478, + "step": 22393 + }, + { + "epoch": 0.75, + "grad_norm": 0.7869687676429749, + "learning_rate": 3.1376408896961817e-06, + "loss": 2.0321, + "step": 22394 + }, + { + "epoch": 0.75, + "grad_norm": 0.7473276257514954, + "learning_rate": 3.1368678024122233e-06, + "loss": 2.0585, + "step": 22395 + }, + { + "epoch": 0.75, + "grad_norm": 0.7380658388137817, + "learning_rate": 3.1360947926639096e-06, + "loss": 2.0939, + "step": 22396 + }, + { + "epoch": 0.75, + "grad_norm": 0.7371789813041687, + "learning_rate": 3.135321860459971e-06, + "loss": 2.0922, + "step": 22397 + }, + { + "epoch": 0.75, + "grad_norm": 0.7323634624481201, + "learning_rate": 3.134549005809143e-06, + "loss": 2.0656, + "step": 22398 + }, + { + "epoch": 0.75, + "grad_norm": 0.7332828640937805, + "learning_rate": 3.1337762287201602e-06, + "loss": 2.0457, + "step": 22399 + }, + { + "epoch": 0.75, + "grad_norm": 0.7701794505119324, + "learning_rate": 3.1330035292017458e-06, + "loss": 2.0316, + "step": 22400 + }, + { + "epoch": 0.75, + "grad_norm": 0.7202067971229553, + "learning_rate": 3.132230907262637e-06, + "loss": 1.9735, + "step": 22401 + }, + { + "epoch": 0.75, + "grad_norm": 0.7672884464263916, + "learning_rate": 3.131458362911558e-06, + "loss": 2.1032, + "step": 22402 + }, + { + "epoch": 0.75, + "grad_norm": 0.7564374208450317, + "learning_rate": 3.130685896157234e-06, + "loss": 2.1135, + "step": 22403 + }, + { + "epoch": 0.75, + "grad_norm": 0.7424620985984802, + "learning_rate": 3.1299135070083952e-06, + "loss": 2.0543, + "step": 22404 + }, + { + "epoch": 0.75, + "grad_norm": 0.7950997948646545, + "learning_rate": 3.129141195473773e-06, + "loss": 2.1282, + "step": 22405 + }, + { + "epoch": 0.75, + "grad_norm": 0.759041428565979, + "learning_rate": 3.1283689615620804e-06, + "loss": 2.105, + "step": 22406 + }, + { + "epoch": 0.75, + "grad_norm": 0.7729628086090088, + "learning_rate": 3.1275968052820494e-06, + "loss": 2.0031, + "step": 22407 + }, + { + "epoch": 0.75, + "grad_norm": 0.7445035576820374, + "learning_rate": 3.1268247266424046e-06, + "loss": 2.0507, + "step": 22408 + }, + { + "epoch": 0.75, + "grad_norm": 0.7606754899024963, + "learning_rate": 3.126052725651866e-06, + "loss": 2.0229, + "step": 22409 + }, + { + "epoch": 0.75, + "grad_norm": 0.7545680999755859, + "learning_rate": 3.125280802319152e-06, + "loss": 1.932, + "step": 22410 + }, + { + "epoch": 0.75, + "grad_norm": 0.739841878414154, + "learning_rate": 3.1245089566529885e-06, + "loss": 2.0291, + "step": 22411 + }, + { + "epoch": 0.75, + "grad_norm": 0.762795090675354, + "learning_rate": 3.1237371886620914e-06, + "loss": 1.9947, + "step": 22412 + }, + { + "epoch": 0.75, + "grad_norm": 0.7483312487602234, + "learning_rate": 3.1229654983551817e-06, + "loss": 2.0598, + "step": 22413 + }, + { + "epoch": 0.75, + "grad_norm": 0.7436697483062744, + "learning_rate": 3.1221938857409807e-06, + "loss": 2.1297, + "step": 22414 + }, + { + "epoch": 0.75, + "grad_norm": 0.7580074667930603, + "learning_rate": 3.1214223508282016e-06, + "loss": 1.9988, + "step": 22415 + }, + { + "epoch": 0.75, + "grad_norm": 0.7382920980453491, + "learning_rate": 3.1206508936255585e-06, + "loss": 2.0091, + "step": 22416 + }, + { + "epoch": 0.75, + "grad_norm": 0.7696748375892639, + "learning_rate": 3.119879514141774e-06, + "loss": 2.135, + "step": 22417 + }, + { + "epoch": 0.75, + "grad_norm": 0.7328803539276123, + "learning_rate": 3.1191082123855576e-06, + "loss": 2.0553, + "step": 22418 + }, + { + "epoch": 0.75, + "grad_norm": 0.76280677318573, + "learning_rate": 3.118336988365621e-06, + "loss": 2.0323, + "step": 22419 + }, + { + "epoch": 0.75, + "grad_norm": 0.7603625059127808, + "learning_rate": 3.117565842090681e-06, + "loss": 1.9989, + "step": 22420 + }, + { + "epoch": 0.75, + "grad_norm": 0.7449672818183899, + "learning_rate": 3.1167947735694513e-06, + "loss": 2.0427, + "step": 22421 + }, + { + "epoch": 0.75, + "grad_norm": 0.7192001342773438, + "learning_rate": 3.1160237828106363e-06, + "loss": 2.0108, + "step": 22422 + }, + { + "epoch": 0.75, + "grad_norm": 0.7489215135574341, + "learning_rate": 3.1152528698229544e-06, + "loss": 2.1198, + "step": 22423 + }, + { + "epoch": 0.75, + "grad_norm": 0.7555010318756104, + "learning_rate": 3.1144820346151105e-06, + "loss": 2.073, + "step": 22424 + }, + { + "epoch": 0.75, + "grad_norm": 0.7394611835479736, + "learning_rate": 3.11371127719581e-06, + "loss": 2.0132, + "step": 22425 + }, + { + "epoch": 0.75, + "grad_norm": 0.7460958957672119, + "learning_rate": 3.1129405975737637e-06, + "loss": 2.0845, + "step": 22426 + }, + { + "epoch": 0.75, + "grad_norm": 0.7331417202949524, + "learning_rate": 3.1121699957576847e-06, + "loss": 1.9843, + "step": 22427 + }, + { + "epoch": 0.75, + "grad_norm": 0.7267646789550781, + "learning_rate": 3.1113994717562656e-06, + "loss": 2.0407, + "step": 22428 + }, + { + "epoch": 0.75, + "grad_norm": 0.7256742119789124, + "learning_rate": 3.110629025578219e-06, + "loss": 2.0785, + "step": 22429 + }, + { + "epoch": 0.75, + "grad_norm": 0.7490482330322266, + "learning_rate": 3.109858657232251e-06, + "loss": 2.0295, + "step": 22430 + }, + { + "epoch": 0.75, + "grad_norm": 0.7470577359199524, + "learning_rate": 3.109088366727058e-06, + "loss": 2.1204, + "step": 22431 + }, + { + "epoch": 0.75, + "grad_norm": 0.7536673545837402, + "learning_rate": 3.10831815407135e-06, + "loss": 2.0713, + "step": 22432 + }, + { + "epoch": 0.75, + "grad_norm": 0.7368770241737366, + "learning_rate": 3.107548019273824e-06, + "loss": 2.0351, + "step": 22433 + }, + { + "epoch": 0.75, + "grad_norm": 0.7687072157859802, + "learning_rate": 3.1067779623431783e-06, + "loss": 2.0223, + "step": 22434 + }, + { + "epoch": 0.75, + "grad_norm": 0.7330232858657837, + "learning_rate": 3.1060079832881164e-06, + "loss": 2.015, + "step": 22435 + }, + { + "epoch": 0.75, + "grad_norm": 0.7784209251403809, + "learning_rate": 3.105238082117338e-06, + "loss": 2.0829, + "step": 22436 + }, + { + "epoch": 0.75, + "grad_norm": 0.7776232361793518, + "learning_rate": 3.104468258839539e-06, + "loss": 2.0883, + "step": 22437 + }, + { + "epoch": 0.75, + "grad_norm": 0.7391170859336853, + "learning_rate": 3.1036985134634135e-06, + "loss": 1.9991, + "step": 22438 + }, + { + "epoch": 0.75, + "grad_norm": 0.740599513053894, + "learning_rate": 3.1029288459976637e-06, + "loss": 2.0355, + "step": 22439 + }, + { + "epoch": 0.75, + "grad_norm": 0.7746738791465759, + "learning_rate": 3.1021592564509817e-06, + "loss": 2.0293, + "step": 22440 + }, + { + "epoch": 0.75, + "grad_norm": 0.7476466298103333, + "learning_rate": 3.1013897448320584e-06, + "loss": 2.0047, + "step": 22441 + }, + { + "epoch": 0.75, + "grad_norm": 0.7456259727478027, + "learning_rate": 3.100620311149591e-06, + "loss": 2.0776, + "step": 22442 + }, + { + "epoch": 0.75, + "grad_norm": 0.7681435346603394, + "learning_rate": 3.0998509554122757e-06, + "loss": 2.0771, + "step": 22443 + }, + { + "epoch": 0.75, + "grad_norm": 0.7408738136291504, + "learning_rate": 3.099081677628797e-06, + "loss": 2.0476, + "step": 22444 + }, + { + "epoch": 0.75, + "grad_norm": 0.7477917075157166, + "learning_rate": 3.098312477807852e-06, + "loss": 2.0272, + "step": 22445 + }, + { + "epoch": 0.75, + "grad_norm": 0.7432190179824829, + "learning_rate": 3.097543355958128e-06, + "loss": 1.9824, + "step": 22446 + }, + { + "epoch": 0.75, + "grad_norm": 0.7547247409820557, + "learning_rate": 3.096774312088311e-06, + "loss": 2.0171, + "step": 22447 + }, + { + "epoch": 0.75, + "grad_norm": 0.7076192498207092, + "learning_rate": 3.0960053462070917e-06, + "loss": 1.9521, + "step": 22448 + }, + { + "epoch": 0.75, + "grad_norm": 0.7628404498100281, + "learning_rate": 3.095236458323164e-06, + "loss": 1.9991, + "step": 22449 + }, + { + "epoch": 0.75, + "grad_norm": 0.7696869373321533, + "learning_rate": 3.094467648445202e-06, + "loss": 1.9448, + "step": 22450 + }, + { + "epoch": 0.75, + "grad_norm": 0.7575535178184509, + "learning_rate": 3.0936989165818977e-06, + "loss": 2.0473, + "step": 22451 + }, + { + "epoch": 0.75, + "grad_norm": 0.7805883884429932, + "learning_rate": 3.092930262741939e-06, + "loss": 2.0498, + "step": 22452 + }, + { + "epoch": 0.75, + "grad_norm": 0.7141225934028625, + "learning_rate": 3.092161686934002e-06, + "loss": 1.9841, + "step": 22453 + }, + { + "epoch": 0.75, + "grad_norm": 0.7509320974349976, + "learning_rate": 3.091393189166778e-06, + "loss": 2.0246, + "step": 22454 + }, + { + "epoch": 0.75, + "grad_norm": 0.735600471496582, + "learning_rate": 3.0906247694489423e-06, + "loss": 2.0283, + "step": 22455 + }, + { + "epoch": 0.75, + "grad_norm": 0.7517402768135071, + "learning_rate": 3.089856427789181e-06, + "loss": 2.0227, + "step": 22456 + }, + { + "epoch": 0.75, + "grad_norm": 0.7559683322906494, + "learning_rate": 3.089088164196169e-06, + "loss": 1.9579, + "step": 22457 + }, + { + "epoch": 0.75, + "grad_norm": 0.733515202999115, + "learning_rate": 3.088319978678591e-06, + "loss": 2.0517, + "step": 22458 + }, + { + "epoch": 0.75, + "grad_norm": 0.7629181146621704, + "learning_rate": 3.087551871245125e-06, + "loss": 2.0626, + "step": 22459 + }, + { + "epoch": 0.75, + "grad_norm": 0.7500926852226257, + "learning_rate": 3.0867838419044427e-06, + "loss": 2.0773, + "step": 22460 + }, + { + "epoch": 0.75, + "grad_norm": 0.7385639548301697, + "learning_rate": 3.086015890665225e-06, + "loss": 2.0367, + "step": 22461 + }, + { + "epoch": 0.75, + "grad_norm": 0.756473958492279, + "learning_rate": 3.085248017536151e-06, + "loss": 2.1166, + "step": 22462 + }, + { + "epoch": 0.75, + "grad_norm": 0.7077974677085876, + "learning_rate": 3.0844802225258917e-06, + "loss": 1.9807, + "step": 22463 + }, + { + "epoch": 0.75, + "grad_norm": 0.7534484267234802, + "learning_rate": 3.0837125056431205e-06, + "loss": 2.155, + "step": 22464 + }, + { + "epoch": 0.75, + "grad_norm": 0.7526543140411377, + "learning_rate": 3.0829448668965133e-06, + "loss": 2.0189, + "step": 22465 + }, + { + "epoch": 0.75, + "grad_norm": 0.7618220448493958, + "learning_rate": 3.082177306294739e-06, + "loss": 2.0208, + "step": 22466 + }, + { + "epoch": 0.75, + "grad_norm": 0.7505958676338196, + "learning_rate": 3.081409823846475e-06, + "loss": 2.0581, + "step": 22467 + }, + { + "epoch": 0.75, + "grad_norm": 0.742641031742096, + "learning_rate": 3.0806424195603877e-06, + "loss": 1.9615, + "step": 22468 + }, + { + "epoch": 0.75, + "grad_norm": 0.7457727789878845, + "learning_rate": 3.079875093445144e-06, + "loss": 2.0515, + "step": 22469 + }, + { + "epoch": 0.75, + "grad_norm": 0.7481655478477478, + "learning_rate": 3.079107845509416e-06, + "loss": 1.996, + "step": 22470 + }, + { + "epoch": 0.75, + "grad_norm": 0.7417362332344055, + "learning_rate": 3.078340675761874e-06, + "loss": 2.0609, + "step": 22471 + }, + { + "epoch": 0.75, + "grad_norm": 0.7424882650375366, + "learning_rate": 3.077573584211183e-06, + "loss": 2.0626, + "step": 22472 + }, + { + "epoch": 0.75, + "grad_norm": 0.723423182964325, + "learning_rate": 3.0768065708660055e-06, + "loss": 2.0448, + "step": 22473 + }, + { + "epoch": 0.75, + "grad_norm": 0.7270727753639221, + "learning_rate": 3.0760396357350143e-06, + "loss": 2.0418, + "step": 22474 + }, + { + "epoch": 0.75, + "grad_norm": 0.7297767400741577, + "learning_rate": 3.0752727788268644e-06, + "loss": 2.0208, + "step": 22475 + }, + { + "epoch": 0.75, + "grad_norm": 0.7541229128837585, + "learning_rate": 3.074506000150228e-06, + "loss": 2.0091, + "step": 22476 + }, + { + "epoch": 0.75, + "grad_norm": 0.7391154766082764, + "learning_rate": 3.0737392997137615e-06, + "loss": 1.9514, + "step": 22477 + }, + { + "epoch": 0.75, + "grad_norm": 0.7623884081840515, + "learning_rate": 3.0729726775261328e-06, + "loss": 2.0428, + "step": 22478 + }, + { + "epoch": 0.75, + "grad_norm": 0.7564516067504883, + "learning_rate": 3.0722061335959954e-06, + "loss": 2.0603, + "step": 22479 + }, + { + "epoch": 0.75, + "grad_norm": 0.7422676086425781, + "learning_rate": 3.0714396679320157e-06, + "loss": 2.0681, + "step": 22480 + }, + { + "epoch": 0.75, + "grad_norm": 0.7417407035827637, + "learning_rate": 3.070673280542851e-06, + "loss": 2.0441, + "step": 22481 + }, + { + "epoch": 0.75, + "grad_norm": 0.7274176478385925, + "learning_rate": 3.069906971437154e-06, + "loss": 2.0568, + "step": 22482 + }, + { + "epoch": 0.75, + "grad_norm": 0.7387732267379761, + "learning_rate": 3.0691407406235873e-06, + "loss": 2.0744, + "step": 22483 + }, + { + "epoch": 0.75, + "grad_norm": 0.8083133101463318, + "learning_rate": 3.068374588110811e-06, + "loss": 2.0969, + "step": 22484 + }, + { + "epoch": 0.75, + "grad_norm": 0.7489886283874512, + "learning_rate": 3.067608513907475e-06, + "loss": 1.9952, + "step": 22485 + }, + { + "epoch": 0.75, + "grad_norm": 0.7621607184410095, + "learning_rate": 3.066842518022233e-06, + "loss": 2.0949, + "step": 22486 + }, + { + "epoch": 0.75, + "grad_norm": 0.7479332089424133, + "learning_rate": 3.0660766004637433e-06, + "loss": 2.0923, + "step": 22487 + }, + { + "epoch": 0.75, + "grad_norm": 0.78489089012146, + "learning_rate": 3.065310761240653e-06, + "loss": 2.0751, + "step": 22488 + }, + { + "epoch": 0.75, + "grad_norm": 0.7534067034721375, + "learning_rate": 3.064545000361622e-06, + "loss": 2.0328, + "step": 22489 + }, + { + "epoch": 0.75, + "grad_norm": 0.7381646037101746, + "learning_rate": 3.063779317835296e-06, + "loss": 2.0255, + "step": 22490 + }, + { + "epoch": 0.75, + "grad_norm": 0.7548058032989502, + "learning_rate": 3.063013713670323e-06, + "loss": 2.0465, + "step": 22491 + }, + { + "epoch": 0.75, + "grad_norm": 0.7456128001213074, + "learning_rate": 3.062248187875356e-06, + "loss": 2.0274, + "step": 22492 + }, + { + "epoch": 0.75, + "grad_norm": 0.731165885925293, + "learning_rate": 3.0614827404590464e-06, + "loss": 2.0358, + "step": 22493 + }, + { + "epoch": 0.75, + "grad_norm": 0.7430233955383301, + "learning_rate": 3.0607173714300376e-06, + "loss": 1.9972, + "step": 22494 + }, + { + "epoch": 0.75, + "grad_norm": 0.7444012761116028, + "learning_rate": 3.059952080796975e-06, + "loss": 2.0932, + "step": 22495 + }, + { + "epoch": 0.75, + "grad_norm": 0.7301807403564453, + "learning_rate": 3.0591868685685087e-06, + "loss": 2.067, + "step": 22496 + }, + { + "epoch": 0.75, + "grad_norm": 0.7216402292251587, + "learning_rate": 3.0584217347532796e-06, + "loss": 2.0301, + "step": 22497 + }, + { + "epoch": 0.75, + "grad_norm": 0.8821868300437927, + "learning_rate": 3.057656679359936e-06, + "loss": 2.0783, + "step": 22498 + }, + { + "epoch": 0.75, + "grad_norm": 0.7370589971542358, + "learning_rate": 3.056891702397116e-06, + "loss": 2.0777, + "step": 22499 + }, + { + "epoch": 0.75, + "grad_norm": 0.7623509168624878, + "learning_rate": 3.056126803873466e-06, + "loss": 2.0059, + "step": 22500 + }, + { + "epoch": 0.75, + "grad_norm": 0.7227151393890381, + "learning_rate": 3.0553619837976245e-06, + "loss": 2.0553, + "step": 22501 + }, + { + "epoch": 0.75, + "grad_norm": 0.7908801436424255, + "learning_rate": 3.0545972421782355e-06, + "loss": 2.0905, + "step": 22502 + }, + { + "epoch": 0.75, + "grad_norm": 0.7511583566665649, + "learning_rate": 3.0538325790239363e-06, + "loss": 2.0511, + "step": 22503 + }, + { + "epoch": 0.75, + "grad_norm": 0.7420516014099121, + "learning_rate": 3.053067994343364e-06, + "loss": 2.0553, + "step": 22504 + }, + { + "epoch": 0.75, + "grad_norm": 0.7602129578590393, + "learning_rate": 3.0523034881451564e-06, + "loss": 2.0643, + "step": 22505 + }, + { + "epoch": 0.75, + "grad_norm": 0.7503677606582642, + "learning_rate": 3.051539060437957e-06, + "loss": 2.0896, + "step": 22506 + }, + { + "epoch": 0.75, + "grad_norm": 0.7405288815498352, + "learning_rate": 3.0507747112303963e-06, + "loss": 2.1004, + "step": 22507 + }, + { + "epoch": 0.75, + "grad_norm": 0.7383706569671631, + "learning_rate": 3.0500104405311072e-06, + "loss": 2.069, + "step": 22508 + }, + { + "epoch": 0.75, + "grad_norm": 0.7434817552566528, + "learning_rate": 3.0492462483487294e-06, + "loss": 2.132, + "step": 22509 + }, + { + "epoch": 0.75, + "grad_norm": 0.7413058876991272, + "learning_rate": 3.0484821346918924e-06, + "loss": 2.065, + "step": 22510 + }, + { + "epoch": 0.75, + "grad_norm": 0.7673773765563965, + "learning_rate": 3.0477180995692326e-06, + "loss": 2.0245, + "step": 22511 + }, + { + "epoch": 0.75, + "grad_norm": 0.7183076739311218, + "learning_rate": 3.046954142989379e-06, + "loss": 2.016, + "step": 22512 + }, + { + "epoch": 0.75, + "grad_norm": 0.798474907875061, + "learning_rate": 3.0461902649609597e-06, + "loss": 2.1371, + "step": 22513 + }, + { + "epoch": 0.75, + "grad_norm": 0.7593542337417603, + "learning_rate": 3.0454264654926067e-06, + "loss": 2.0847, + "step": 22514 + }, + { + "epoch": 0.75, + "grad_norm": 0.753358006477356, + "learning_rate": 3.0446627445929546e-06, + "loss": 2.1216, + "step": 22515 + }, + { + "epoch": 0.75, + "grad_norm": 0.7411670088768005, + "learning_rate": 3.0438991022706254e-06, + "loss": 2.0664, + "step": 22516 + }, + { + "epoch": 0.75, + "grad_norm": 0.7905920147895813, + "learning_rate": 3.043135538534244e-06, + "loss": 2.0418, + "step": 22517 + }, + { + "epoch": 0.75, + "grad_norm": 0.7665175199508667, + "learning_rate": 3.042372053392444e-06, + "loss": 2.1109, + "step": 22518 + }, + { + "epoch": 0.75, + "grad_norm": 0.7305836081504822, + "learning_rate": 3.041608646853844e-06, + "loss": 2.0781, + "step": 22519 + }, + { + "epoch": 0.75, + "grad_norm": 0.7481439709663391, + "learning_rate": 3.0408453189270738e-06, + "loss": 2.0594, + "step": 22520 + }, + { + "epoch": 0.75, + "grad_norm": 0.765350878238678, + "learning_rate": 3.0400820696207523e-06, + "loss": 2.0634, + "step": 22521 + }, + { + "epoch": 0.75, + "grad_norm": 0.7211467027664185, + "learning_rate": 3.0393188989435075e-06, + "loss": 2.0848, + "step": 22522 + }, + { + "epoch": 0.75, + "grad_norm": 0.7525205016136169, + "learning_rate": 3.0385558069039557e-06, + "loss": 2.0501, + "step": 22523 + }, + { + "epoch": 0.75, + "grad_norm": 0.7510121464729309, + "learning_rate": 3.037792793510723e-06, + "loss": 2.1441, + "step": 22524 + }, + { + "epoch": 0.75, + "grad_norm": 0.7528380751609802, + "learning_rate": 3.037029858772428e-06, + "loss": 2.0059, + "step": 22525 + }, + { + "epoch": 0.75, + "grad_norm": 0.7278952598571777, + "learning_rate": 3.036267002697685e-06, + "loss": 2.0418, + "step": 22526 + }, + { + "epoch": 0.75, + "grad_norm": 0.7589005827903748, + "learning_rate": 3.035504225295116e-06, + "loss": 2.052, + "step": 22527 + }, + { + "epoch": 0.75, + "grad_norm": 0.7450535297393799, + "learning_rate": 3.0347415265733426e-06, + "loss": 2.0835, + "step": 22528 + }, + { + "epoch": 0.75, + "grad_norm": 0.7593364119529724, + "learning_rate": 3.0339789065409775e-06, + "loss": 2.0944, + "step": 22529 + }, + { + "epoch": 0.75, + "grad_norm": 0.7622039914131165, + "learning_rate": 3.033216365206633e-06, + "loss": 2.1078, + "step": 22530 + }, + { + "epoch": 0.75, + "grad_norm": 0.7340630888938904, + "learning_rate": 3.032453902578929e-06, + "loss": 2.1063, + "step": 22531 + }, + { + "epoch": 0.75, + "grad_norm": 0.8260413408279419, + "learning_rate": 3.0316915186664752e-06, + "loss": 2.1573, + "step": 22532 + }, + { + "epoch": 0.75, + "grad_norm": 0.7661426663398743, + "learning_rate": 3.03092921347789e-06, + "loss": 2.1063, + "step": 22533 + }, + { + "epoch": 0.75, + "grad_norm": 0.7391083836555481, + "learning_rate": 3.030166987021782e-06, + "loss": 2.1514, + "step": 22534 + }, + { + "epoch": 0.75, + "grad_norm": 0.7492218613624573, + "learning_rate": 3.02940483930676e-06, + "loss": 2.0696, + "step": 22535 + }, + { + "epoch": 0.75, + "grad_norm": 0.7586084604263306, + "learning_rate": 3.028642770341437e-06, + "loss": 2.0374, + "step": 22536 + }, + { + "epoch": 0.75, + "grad_norm": 0.7670184373855591, + "learning_rate": 3.0278807801344246e-06, + "loss": 2.0551, + "step": 22537 + }, + { + "epoch": 0.75, + "grad_norm": 0.7137128114700317, + "learning_rate": 3.02711886869433e-06, + "loss": 2.013, + "step": 22538 + }, + { + "epoch": 0.75, + "grad_norm": 0.77633136510849, + "learning_rate": 3.0263570360297566e-06, + "loss": 2.1386, + "step": 22539 + }, + { + "epoch": 0.75, + "grad_norm": 0.7546154856681824, + "learning_rate": 3.0255952821493174e-06, + "loss": 2.0846, + "step": 22540 + }, + { + "epoch": 0.75, + "grad_norm": 0.7523732781410217, + "learning_rate": 3.0248336070616126e-06, + "loss": 2.013, + "step": 22541 + }, + { + "epoch": 0.75, + "grad_norm": 0.7736267447471619, + "learning_rate": 3.024072010775252e-06, + "loss": 2.0702, + "step": 22542 + }, + { + "epoch": 0.75, + "grad_norm": 0.7314648628234863, + "learning_rate": 3.0233104932988355e-06, + "loss": 2.0997, + "step": 22543 + }, + { + "epoch": 0.75, + "grad_norm": 0.7273348569869995, + "learning_rate": 3.0225490546409707e-06, + "loss": 2.0447, + "step": 22544 + }, + { + "epoch": 0.75, + "grad_norm": 0.7652947306632996, + "learning_rate": 3.0217876948102544e-06, + "loss": 2.0159, + "step": 22545 + }, + { + "epoch": 0.75, + "grad_norm": 0.7797354459762573, + "learning_rate": 3.021026413815291e-06, + "loss": 1.9941, + "step": 22546 + }, + { + "epoch": 0.75, + "grad_norm": 0.7620541453361511, + "learning_rate": 3.020265211664688e-06, + "loss": 2.0552, + "step": 22547 + }, + { + "epoch": 0.75, + "grad_norm": 0.7470033168792725, + "learning_rate": 3.0195040883670313e-06, + "loss": 2.0362, + "step": 22548 + }, + { + "epoch": 0.75, + "grad_norm": 0.7467162609100342, + "learning_rate": 3.018743043930926e-06, + "loss": 2.0204, + "step": 22549 + }, + { + "epoch": 0.75, + "grad_norm": 0.723630964756012, + "learning_rate": 3.017982078364975e-06, + "loss": 2.0278, + "step": 22550 + }, + { + "epoch": 0.75, + "grad_norm": 0.7552885413169861, + "learning_rate": 3.0172211916777695e-06, + "loss": 2.068, + "step": 22551 + }, + { + "epoch": 0.75, + "grad_norm": 0.7650761604309082, + "learning_rate": 3.0164603838779037e-06, + "loss": 2.0298, + "step": 22552 + }, + { + "epoch": 0.75, + "grad_norm": 0.745740532875061, + "learning_rate": 3.015699654973979e-06, + "loss": 1.9943, + "step": 22553 + }, + { + "epoch": 0.75, + "grad_norm": 0.757552981376648, + "learning_rate": 3.014939004974583e-06, + "loss": 1.9918, + "step": 22554 + }, + { + "epoch": 0.75, + "grad_norm": 0.7809370756149292, + "learning_rate": 3.0141784338883164e-06, + "loss": 2.0787, + "step": 22555 + }, + { + "epoch": 0.75, + "grad_norm": 0.7489234209060669, + "learning_rate": 3.013417941723763e-06, + "loss": 2.0334, + "step": 22556 + }, + { + "epoch": 0.75, + "grad_norm": 0.7322822213172913, + "learning_rate": 3.0126575284895233e-06, + "loss": 2.1285, + "step": 22557 + }, + { + "epoch": 0.75, + "grad_norm": 0.7187131643295288, + "learning_rate": 3.011897194194181e-06, + "loss": 2.0741, + "step": 22558 + }, + { + "epoch": 0.75, + "grad_norm": 0.7302840352058411, + "learning_rate": 3.011136938846332e-06, + "loss": 1.9698, + "step": 22559 + }, + { + "epoch": 0.75, + "grad_norm": 0.743945837020874, + "learning_rate": 3.010376762454561e-06, + "loss": 2.059, + "step": 22560 + }, + { + "epoch": 0.75, + "grad_norm": 0.7236581444740295, + "learning_rate": 3.009616665027455e-06, + "loss": 2.0178, + "step": 22561 + }, + { + "epoch": 0.75, + "grad_norm": 0.7602822184562683, + "learning_rate": 3.0088566465736024e-06, + "loss": 2.0093, + "step": 22562 + }, + { + "epoch": 0.75, + "grad_norm": 0.7992000579833984, + "learning_rate": 3.008096707101593e-06, + "loss": 2.015, + "step": 22563 + }, + { + "epoch": 0.75, + "grad_norm": 0.7656684517860413, + "learning_rate": 3.0073368466200104e-06, + "loss": 2.0772, + "step": 22564 + }, + { + "epoch": 0.75, + "grad_norm": 0.7782395482063293, + "learning_rate": 3.0065770651374348e-06, + "loss": 2.0081, + "step": 22565 + }, + { + "epoch": 0.75, + "grad_norm": 0.7718179225921631, + "learning_rate": 3.0058173626624553e-06, + "loss": 2.0887, + "step": 22566 + }, + { + "epoch": 0.75, + "grad_norm": 0.7243524193763733, + "learning_rate": 3.0050577392036495e-06, + "loss": 2.0428, + "step": 22567 + }, + { + "epoch": 0.75, + "grad_norm": 0.7722897529602051, + "learning_rate": 3.0042981947696016e-06, + "loss": 2.0346, + "step": 22568 + }, + { + "epoch": 0.75, + "grad_norm": 0.7703007459640503, + "learning_rate": 3.0035387293689e-06, + "loss": 2.0887, + "step": 22569 + }, + { + "epoch": 0.75, + "grad_norm": 0.7337794899940491, + "learning_rate": 3.0027793430101106e-06, + "loss": 2.0703, + "step": 22570 + }, + { + "epoch": 0.75, + "grad_norm": 0.7501999139785767, + "learning_rate": 3.002020035701819e-06, + "loss": 2.083, + "step": 22571 + }, + { + "epoch": 0.75, + "grad_norm": 0.7481458187103271, + "learning_rate": 3.001260807452607e-06, + "loss": 2.0369, + "step": 22572 + }, + { + "epoch": 0.75, + "grad_norm": 0.7787818908691406, + "learning_rate": 3.00050165827105e-06, + "loss": 2.1443, + "step": 22573 + }, + { + "epoch": 0.75, + "grad_norm": 0.7434309124946594, + "learning_rate": 2.999742588165719e-06, + "loss": 2.0878, + "step": 22574 + }, + { + "epoch": 0.75, + "grad_norm": 0.7259165048599243, + "learning_rate": 2.9989835971451976e-06, + "loss": 2.0844, + "step": 22575 + }, + { + "epoch": 0.75, + "grad_norm": 0.7410356402397156, + "learning_rate": 2.9982246852180517e-06, + "loss": 1.986, + "step": 22576 + }, + { + "epoch": 0.75, + "grad_norm": 0.767663836479187, + "learning_rate": 2.9974658523928614e-06, + "loss": 2.0108, + "step": 22577 + }, + { + "epoch": 0.75, + "grad_norm": 0.7757781744003296, + "learning_rate": 2.996707098678201e-06, + "loss": 2.0834, + "step": 22578 + }, + { + "epoch": 0.75, + "grad_norm": 0.7286555767059326, + "learning_rate": 2.9959484240826385e-06, + "loss": 2.0313, + "step": 22579 + }, + { + "epoch": 0.75, + "grad_norm": 0.7353644371032715, + "learning_rate": 2.995189828614744e-06, + "loss": 2.0918, + "step": 22580 + }, + { + "epoch": 0.75, + "grad_norm": 0.7684770822525024, + "learning_rate": 2.9944313122830913e-06, + "loss": 2.0391, + "step": 22581 + }, + { + "epoch": 0.75, + "grad_norm": 0.7084128856658936, + "learning_rate": 2.9936728750962494e-06, + "loss": 1.9643, + "step": 22582 + }, + { + "epoch": 0.75, + "grad_norm": 0.7928056716918945, + "learning_rate": 2.9929145170627815e-06, + "loss": 1.991, + "step": 22583 + }, + { + "epoch": 0.75, + "grad_norm": 0.7325501441955566, + "learning_rate": 2.9921562381912594e-06, + "loss": 2.0113, + "step": 22584 + }, + { + "epoch": 0.75, + "grad_norm": 0.7555614113807678, + "learning_rate": 2.991398038490252e-06, + "loss": 2.0772, + "step": 22585 + }, + { + "epoch": 0.75, + "grad_norm": 0.7730307579040527, + "learning_rate": 2.990639917968321e-06, + "loss": 2.0958, + "step": 22586 + }, + { + "epoch": 0.75, + "grad_norm": 0.7246148586273193, + "learning_rate": 2.98988187663403e-06, + "loss": 2.0374, + "step": 22587 + }, + { + "epoch": 0.75, + "grad_norm": 0.749282956123352, + "learning_rate": 2.9891239144959484e-06, + "loss": 2.006, + "step": 22588 + }, + { + "epoch": 0.75, + "grad_norm": 0.7372558116912842, + "learning_rate": 2.988366031562633e-06, + "loss": 2.081, + "step": 22589 + }, + { + "epoch": 0.75, + "grad_norm": 0.7339449524879456, + "learning_rate": 2.987608227842649e-06, + "loss": 2.0246, + "step": 22590 + }, + { + "epoch": 0.75, + "grad_norm": 0.75889652967453, + "learning_rate": 2.986850503344564e-06, + "loss": 2.0553, + "step": 22591 + }, + { + "epoch": 0.75, + "grad_norm": 0.8152104616165161, + "learning_rate": 2.9860928580769256e-06, + "loss": 2.0459, + "step": 22592 + }, + { + "epoch": 0.75, + "grad_norm": 0.7334176301956177, + "learning_rate": 2.985335292048298e-06, + "loss": 2.0337, + "step": 22593 + }, + { + "epoch": 0.75, + "grad_norm": 0.738373339176178, + "learning_rate": 2.9845778052672457e-06, + "loss": 2.06, + "step": 22594 + }, + { + "epoch": 0.75, + "grad_norm": 0.712801992893219, + "learning_rate": 2.983820397742323e-06, + "loss": 1.9967, + "step": 22595 + }, + { + "epoch": 0.75, + "grad_norm": 0.7481465339660645, + "learning_rate": 2.9830630694820804e-06, + "loss": 2.0303, + "step": 22596 + }, + { + "epoch": 0.75, + "grad_norm": 0.732541024684906, + "learning_rate": 2.9823058204950837e-06, + "loss": 2.0454, + "step": 22597 + }, + { + "epoch": 0.75, + "grad_norm": 0.7262259721755981, + "learning_rate": 2.9815486507898784e-06, + "loss": 2.0279, + "step": 22598 + }, + { + "epoch": 0.75, + "grad_norm": 0.7480810284614563, + "learning_rate": 2.980791560375025e-06, + "loss": 2.0611, + "step": 22599 + }, + { + "epoch": 0.75, + "grad_norm": 0.7543347477912903, + "learning_rate": 2.9800345492590766e-06, + "loss": 2.0451, + "step": 22600 + }, + { + "epoch": 0.75, + "grad_norm": 0.7453309297561646, + "learning_rate": 2.9792776174505843e-06, + "loss": 2.0599, + "step": 22601 + }, + { + "epoch": 0.75, + "grad_norm": 0.725368320941925, + "learning_rate": 2.978520764958096e-06, + "loss": 2.0309, + "step": 22602 + }, + { + "epoch": 0.75, + "grad_norm": 0.7317690253257751, + "learning_rate": 2.977763991790168e-06, + "loss": 2.0219, + "step": 22603 + }, + { + "epoch": 0.75, + "grad_norm": 0.7536773085594177, + "learning_rate": 2.9770072979553466e-06, + "loss": 2.0471, + "step": 22604 + }, + { + "epoch": 0.75, + "grad_norm": 0.7761757969856262, + "learning_rate": 2.9762506834621773e-06, + "loss": 2.0583, + "step": 22605 + }, + { + "epoch": 0.75, + "grad_norm": 0.7515122890472412, + "learning_rate": 2.9754941483192125e-06, + "loss": 2.0571, + "step": 22606 + }, + { + "epoch": 0.75, + "grad_norm": 0.7875292301177979, + "learning_rate": 2.9747376925350013e-06, + "loss": 2.0007, + "step": 22607 + }, + { + "epoch": 0.75, + "grad_norm": 0.7623459696769714, + "learning_rate": 2.973981316118083e-06, + "loss": 2.1231, + "step": 22608 + }, + { + "epoch": 0.75, + "grad_norm": 0.7492282390594482, + "learning_rate": 2.9732250190770084e-06, + "loss": 2.0191, + "step": 22609 + }, + { + "epoch": 0.75, + "grad_norm": 0.7528647184371948, + "learning_rate": 2.9724688014203208e-06, + "loss": 2.0674, + "step": 22610 + }, + { + "epoch": 0.75, + "grad_norm": 0.7651874423027039, + "learning_rate": 2.9717126631565585e-06, + "loss": 2.0821, + "step": 22611 + }, + { + "epoch": 0.75, + "grad_norm": 0.7327250242233276, + "learning_rate": 2.9709566042942674e-06, + "loss": 1.9541, + "step": 22612 + }, + { + "epoch": 0.75, + "grad_norm": 0.7590217590332031, + "learning_rate": 2.9702006248419957e-06, + "loss": 2.0074, + "step": 22613 + }, + { + "epoch": 0.75, + "grad_norm": 0.7499263882637024, + "learning_rate": 2.96944472480827e-06, + "loss": 2.035, + "step": 22614 + }, + { + "epoch": 0.75, + "grad_norm": 0.7688724994659424, + "learning_rate": 2.9686889042016396e-06, + "loss": 2.1279, + "step": 22615 + }, + { + "epoch": 0.75, + "grad_norm": 0.745100200176239, + "learning_rate": 2.967933163030643e-06, + "loss": 2.0947, + "step": 22616 + }, + { + "epoch": 0.75, + "grad_norm": 0.7396244406700134, + "learning_rate": 2.967177501303816e-06, + "loss": 2.0593, + "step": 22617 + }, + { + "epoch": 0.75, + "grad_norm": 0.724320650100708, + "learning_rate": 2.966421919029694e-06, + "loss": 2.0458, + "step": 22618 + }, + { + "epoch": 0.75, + "grad_norm": 0.7484925389289856, + "learning_rate": 2.965666416216818e-06, + "loss": 2.1333, + "step": 22619 + }, + { + "epoch": 0.75, + "grad_norm": 0.7382520437240601, + "learning_rate": 2.9649109928737164e-06, + "loss": 2.021, + "step": 22620 + }, + { + "epoch": 0.75, + "grad_norm": 0.8054434061050415, + "learning_rate": 2.964155649008927e-06, + "loss": 2.0493, + "step": 22621 + }, + { + "epoch": 0.75, + "grad_norm": 0.7202202081680298, + "learning_rate": 2.9634003846309887e-06, + "loss": 2.0696, + "step": 22622 + }, + { + "epoch": 0.75, + "grad_norm": 0.7623435854911804, + "learning_rate": 2.9626451997484273e-06, + "loss": 2.0569, + "step": 22623 + }, + { + "epoch": 0.75, + "grad_norm": 0.7965266704559326, + "learning_rate": 2.9618900943697737e-06, + "loss": 2.0239, + "step": 22624 + }, + { + "epoch": 0.75, + "grad_norm": 0.7476407885551453, + "learning_rate": 2.9611350685035645e-06, + "loss": 2.0716, + "step": 22625 + }, + { + "epoch": 0.75, + "grad_norm": 0.7439313530921936, + "learning_rate": 2.960380122158325e-06, + "loss": 2.0574, + "step": 22626 + }, + { + "epoch": 0.75, + "grad_norm": 0.7394826412200928, + "learning_rate": 2.959625255342583e-06, + "loss": 2.0414, + "step": 22627 + }, + { + "epoch": 0.75, + "grad_norm": 0.7328760623931885, + "learning_rate": 2.9588704680648694e-06, + "loss": 2.0539, + "step": 22628 + }, + { + "epoch": 0.75, + "grad_norm": 0.7159720659255981, + "learning_rate": 2.958115760333713e-06, + "loss": 2.039, + "step": 22629 + }, + { + "epoch": 0.75, + "grad_norm": 0.7536383867263794, + "learning_rate": 2.9573611321576344e-06, + "loss": 2.1139, + "step": 22630 + }, + { + "epoch": 0.75, + "grad_norm": 0.7587533593177795, + "learning_rate": 2.956606583545166e-06, + "loss": 2.0357, + "step": 22631 + }, + { + "epoch": 0.75, + "grad_norm": 0.7350855469703674, + "learning_rate": 2.955852114504829e-06, + "loss": 1.9967, + "step": 22632 + }, + { + "epoch": 0.75, + "grad_norm": 0.7340081930160522, + "learning_rate": 2.955097725045143e-06, + "loss": 1.9534, + "step": 22633 + }, + { + "epoch": 0.75, + "grad_norm": 0.7744357585906982, + "learning_rate": 2.954343415174633e-06, + "loss": 2.0428, + "step": 22634 + }, + { + "epoch": 0.75, + "grad_norm": 0.7952952980995178, + "learning_rate": 2.9535891849018293e-06, + "loss": 2.0606, + "step": 22635 + }, + { + "epoch": 0.75, + "grad_norm": 0.7766268253326416, + "learning_rate": 2.95283503423524e-06, + "loss": 2.0336, + "step": 22636 + }, + { + "epoch": 0.75, + "grad_norm": 0.7420289516448975, + "learning_rate": 2.952080963183389e-06, + "loss": 2.0197, + "step": 22637 + }, + { + "epoch": 0.75, + "grad_norm": 0.738042414188385, + "learning_rate": 2.951326971754801e-06, + "loss": 1.9974, + "step": 22638 + }, + { + "epoch": 0.75, + "grad_norm": 0.7221134305000305, + "learning_rate": 2.9505730599579883e-06, + "loss": 2.0452, + "step": 22639 + }, + { + "epoch": 0.75, + "grad_norm": 0.7329491972923279, + "learning_rate": 2.949819227801468e-06, + "loss": 2.0511, + "step": 22640 + }, + { + "epoch": 0.75, + "grad_norm": 0.7287574410438538, + "learning_rate": 2.949065475293761e-06, + "loss": 2.0395, + "step": 22641 + }, + { + "epoch": 0.75, + "grad_norm": 0.7589728236198425, + "learning_rate": 2.9483118024433777e-06, + "loss": 2.0886, + "step": 22642 + }, + { + "epoch": 0.75, + "grad_norm": 0.7625011801719666, + "learning_rate": 2.947558209258834e-06, + "loss": 2.1048, + "step": 22643 + }, + { + "epoch": 0.75, + "grad_norm": 0.7657559514045715, + "learning_rate": 2.946804695748647e-06, + "loss": 2.0633, + "step": 22644 + }, + { + "epoch": 0.75, + "grad_norm": 0.7427349090576172, + "learning_rate": 2.946051261921329e-06, + "loss": 2.0211, + "step": 22645 + }, + { + "epoch": 0.75, + "grad_norm": 0.7636844515800476, + "learning_rate": 2.9452979077853847e-06, + "loss": 2.0159, + "step": 22646 + }, + { + "epoch": 0.75, + "grad_norm": 0.7512263059616089, + "learning_rate": 2.944544633349332e-06, + "loss": 2.1151, + "step": 22647 + }, + { + "epoch": 0.75, + "grad_norm": 0.7367919683456421, + "learning_rate": 2.943791438621684e-06, + "loss": 2.0392, + "step": 22648 + }, + { + "epoch": 0.75, + "grad_norm": 0.7454794049263, + "learning_rate": 2.9430383236109393e-06, + "loss": 2.124, + "step": 22649 + }, + { + "epoch": 0.75, + "grad_norm": 0.7674774527549744, + "learning_rate": 2.9422852883256115e-06, + "loss": 2.0191, + "step": 22650 + }, + { + "epoch": 0.75, + "grad_norm": 0.7572967410087585, + "learning_rate": 2.941532332774212e-06, + "loss": 1.9516, + "step": 22651 + }, + { + "epoch": 0.75, + "grad_norm": 0.7606080174446106, + "learning_rate": 2.9407794569652392e-06, + "loss": 2.0299, + "step": 22652 + }, + { + "epoch": 0.75, + "grad_norm": 0.7374034523963928, + "learning_rate": 2.940026660907207e-06, + "loss": 2.013, + "step": 22653 + }, + { + "epoch": 0.75, + "grad_norm": 0.7524685859680176, + "learning_rate": 2.939273944608616e-06, + "loss": 2.1021, + "step": 22654 + }, + { + "epoch": 0.75, + "grad_norm": 0.734710693359375, + "learning_rate": 2.9385213080779674e-06, + "loss": 2.0589, + "step": 22655 + }, + { + "epoch": 0.75, + "grad_norm": 0.7480674386024475, + "learning_rate": 2.9377687513237664e-06, + "loss": 2.0454, + "step": 22656 + }, + { + "epoch": 0.75, + "grad_norm": 0.7109075784683228, + "learning_rate": 2.937016274354517e-06, + "loss": 1.9885, + "step": 22657 + }, + { + "epoch": 0.75, + "grad_norm": 0.7300166487693787, + "learning_rate": 2.93626387717872e-06, + "loss": 2.049, + "step": 22658 + }, + { + "epoch": 0.75, + "grad_norm": 0.7387818098068237, + "learning_rate": 2.93551155980487e-06, + "loss": 2.0298, + "step": 22659 + }, + { + "epoch": 0.75, + "grad_norm": 0.744696319103241, + "learning_rate": 2.9347593222414737e-06, + "loss": 2.0132, + "step": 22660 + }, + { + "epoch": 0.75, + "grad_norm": 0.8759850263595581, + "learning_rate": 2.9340071644970223e-06, + "loss": 2.154, + "step": 22661 + }, + { + "epoch": 0.75, + "grad_norm": 0.7840388417243958, + "learning_rate": 2.93325508658002e-06, + "loss": 2.1163, + "step": 22662 + }, + { + "epoch": 0.75, + "grad_norm": 0.7456493377685547, + "learning_rate": 2.932503088498958e-06, + "loss": 2.0361, + "step": 22663 + }, + { + "epoch": 0.75, + "grad_norm": 0.7483821511268616, + "learning_rate": 2.931751170262337e-06, + "loss": 2.0904, + "step": 22664 + }, + { + "epoch": 0.75, + "grad_norm": 0.7588597536087036, + "learning_rate": 2.9309993318786457e-06, + "loss": 2.0446, + "step": 22665 + }, + { + "epoch": 0.75, + "grad_norm": 0.7661793231964111, + "learning_rate": 2.9302475733563828e-06, + "loss": 2.0761, + "step": 22666 + }, + { + "epoch": 0.75, + "grad_norm": 0.7173787355422974, + "learning_rate": 2.929495894704041e-06, + "loss": 2.0049, + "step": 22667 + }, + { + "epoch": 0.75, + "grad_norm": 0.7571566104888916, + "learning_rate": 2.928744295930108e-06, + "loss": 2.0139, + "step": 22668 + }, + { + "epoch": 0.75, + "grad_norm": 0.7443260550498962, + "learning_rate": 2.9279927770430773e-06, + "loss": 2.0707, + "step": 22669 + }, + { + "epoch": 0.75, + "grad_norm": 0.7448168992996216, + "learning_rate": 2.9272413380514453e-06, + "loss": 2.0291, + "step": 22670 + }, + { + "epoch": 0.75, + "grad_norm": 0.7479420304298401, + "learning_rate": 2.9264899789636903e-06, + "loss": 2.0637, + "step": 22671 + }, + { + "epoch": 0.75, + "grad_norm": 0.7363066673278809, + "learning_rate": 2.9257386997883053e-06, + "loss": 2.035, + "step": 22672 + }, + { + "epoch": 0.75, + "grad_norm": 0.7268658876419067, + "learning_rate": 2.9249875005337823e-06, + "loss": 2.0371, + "step": 22673 + }, + { + "epoch": 0.75, + "grad_norm": 0.7813587784767151, + "learning_rate": 2.9242363812086005e-06, + "loss": 2.0315, + "step": 22674 + }, + { + "epoch": 0.75, + "grad_norm": 0.7182080149650574, + "learning_rate": 2.9234853418212528e-06, + "loss": 2.0558, + "step": 22675 + }, + { + "epoch": 0.75, + "grad_norm": 0.745822548866272, + "learning_rate": 2.92273438238022e-06, + "loss": 2.0523, + "step": 22676 + }, + { + "epoch": 0.75, + "grad_norm": 0.7519271373748779, + "learning_rate": 2.9219835028939838e-06, + "loss": 2.0402, + "step": 22677 + }, + { + "epoch": 0.75, + "grad_norm": 0.7843594551086426, + "learning_rate": 2.92123270337103e-06, + "loss": 2.0928, + "step": 22678 + }, + { + "epoch": 0.75, + "grad_norm": 0.7534686923027039, + "learning_rate": 2.920481983819843e-06, + "loss": 2.0085, + "step": 22679 + }, + { + "epoch": 0.75, + "grad_norm": 0.7463078498840332, + "learning_rate": 2.919731344248902e-06, + "loss": 1.9969, + "step": 22680 + }, + { + "epoch": 0.75, + "grad_norm": 0.7494160532951355, + "learning_rate": 2.9189807846666828e-06, + "loss": 2.0336, + "step": 22681 + }, + { + "epoch": 0.75, + "grad_norm": 0.7696128487586975, + "learning_rate": 2.918230305081673e-06, + "loss": 2.042, + "step": 22682 + }, + { + "epoch": 0.75, + "grad_norm": 0.7276766896247864, + "learning_rate": 2.917479905502343e-06, + "loss": 2.0475, + "step": 22683 + }, + { + "epoch": 0.75, + "grad_norm": 0.7440370321273804, + "learning_rate": 2.916729585937178e-06, + "loss": 2.0803, + "step": 22684 + }, + { + "epoch": 0.75, + "grad_norm": 0.7477647662162781, + "learning_rate": 2.9159793463946474e-06, + "loss": 2.0497, + "step": 22685 + }, + { + "epoch": 0.75, + "grad_norm": 0.7365114688873291, + "learning_rate": 2.9152291868832337e-06, + "loss": 2.0072, + "step": 22686 + }, + { + "epoch": 0.75, + "grad_norm": 0.7570539712905884, + "learning_rate": 2.9144791074114064e-06, + "loss": 2.0359, + "step": 22687 + }, + { + "epoch": 0.75, + "grad_norm": 0.7246546149253845, + "learning_rate": 2.9137291079876438e-06, + "loss": 1.9985, + "step": 22688 + }, + { + "epoch": 0.75, + "grad_norm": 0.7559397220611572, + "learning_rate": 2.912979188620417e-06, + "loss": 2.0726, + "step": 22689 + }, + { + "epoch": 0.75, + "grad_norm": 0.7361375093460083, + "learning_rate": 2.912229349318194e-06, + "loss": 2.0288, + "step": 22690 + }, + { + "epoch": 0.75, + "grad_norm": 0.7233638167381287, + "learning_rate": 2.911479590089451e-06, + "loss": 1.9971, + "step": 22691 + }, + { + "epoch": 0.75, + "grad_norm": 0.7768986821174622, + "learning_rate": 2.9107299109426602e-06, + "loss": 2.0048, + "step": 22692 + }, + { + "epoch": 0.75, + "grad_norm": 0.7424880862236023, + "learning_rate": 2.9099803118862878e-06, + "loss": 2.1086, + "step": 22693 + }, + { + "epoch": 0.76, + "grad_norm": 0.7472460865974426, + "learning_rate": 2.9092307929288e-06, + "loss": 2.0763, + "step": 22694 + }, + { + "epoch": 0.76, + "grad_norm": 0.7406028509140015, + "learning_rate": 2.9084813540786704e-06, + "loss": 2.0163, + "step": 22695 + }, + { + "epoch": 0.76, + "grad_norm": 0.7557093501091003, + "learning_rate": 2.9077319953443594e-06, + "loss": 2.042, + "step": 22696 + }, + { + "epoch": 0.76, + "grad_norm": 0.7623248100280762, + "learning_rate": 2.906982716734338e-06, + "loss": 2.0075, + "step": 22697 + }, + { + "epoch": 0.76, + "grad_norm": 0.7446292042732239, + "learning_rate": 2.90623351825707e-06, + "loss": 2.0699, + "step": 22698 + }, + { + "epoch": 0.76, + "grad_norm": 0.7672387957572937, + "learning_rate": 2.9054843999210147e-06, + "loss": 2.0372, + "step": 22699 + }, + { + "epoch": 0.76, + "grad_norm": 0.7352509498596191, + "learning_rate": 2.9047353617346386e-06, + "loss": 2.0838, + "step": 22700 + }, + { + "epoch": 0.76, + "grad_norm": 0.7627996206283569, + "learning_rate": 2.903986403706407e-06, + "loss": 2.1219, + "step": 22701 + }, + { + "epoch": 0.76, + "grad_norm": 0.7455876469612122, + "learning_rate": 2.90323752584478e-06, + "loss": 2.1121, + "step": 22702 + }, + { + "epoch": 0.76, + "grad_norm": 0.7555532455444336, + "learning_rate": 2.9024887281582113e-06, + "loss": 2.0483, + "step": 22703 + }, + { + "epoch": 0.76, + "grad_norm": 0.7447641491889954, + "learning_rate": 2.9017400106551696e-06, + "loss": 2.0089, + "step": 22704 + }, + { + "epoch": 0.76, + "grad_norm": 0.7668010592460632, + "learning_rate": 2.9009913733441052e-06, + "loss": 2.0461, + "step": 22705 + }, + { + "epoch": 0.76, + "grad_norm": 0.7513505220413208, + "learning_rate": 2.900242816233484e-06, + "loss": 2.0561, + "step": 22706 + }, + { + "epoch": 0.76, + "grad_norm": 0.763373076915741, + "learning_rate": 2.8994943393317555e-06, + "loss": 2.078, + "step": 22707 + }, + { + "epoch": 0.76, + "grad_norm": 0.7274625897407532, + "learning_rate": 2.898745942647381e-06, + "loss": 1.9682, + "step": 22708 + }, + { + "epoch": 0.76, + "grad_norm": 0.7455772757530212, + "learning_rate": 2.8979976261888097e-06, + "loss": 2.1197, + "step": 22709 + }, + { + "epoch": 0.76, + "grad_norm": 0.7653241753578186, + "learning_rate": 2.8972493899645036e-06, + "loss": 2.047, + "step": 22710 + }, + { + "epoch": 0.76, + "grad_norm": 0.7589138746261597, + "learning_rate": 2.89650123398291e-06, + "loss": 2.0545, + "step": 22711 + }, + { + "epoch": 0.76, + "grad_norm": 0.7377748489379883, + "learning_rate": 2.89575315825248e-06, + "loss": 2.0305, + "step": 22712 + }, + { + "epoch": 0.76, + "grad_norm": 0.7405987977981567, + "learning_rate": 2.8950051627816667e-06, + "loss": 2.1129, + "step": 22713 + }, + { + "epoch": 0.76, + "grad_norm": 0.7699908018112183, + "learning_rate": 2.8942572475789254e-06, + "loss": 1.9922, + "step": 22714 + }, + { + "epoch": 0.76, + "grad_norm": 0.7605773210525513, + "learning_rate": 2.8935094126526996e-06, + "loss": 2.032, + "step": 22715 + }, + { + "epoch": 0.76, + "grad_norm": 0.7453346848487854, + "learning_rate": 2.892761658011438e-06, + "loss": 2.048, + "step": 22716 + }, + { + "epoch": 0.76, + "grad_norm": 0.7545034885406494, + "learning_rate": 2.892013983663593e-06, + "loss": 2.1008, + "step": 22717 + }, + { + "epoch": 0.76, + "grad_norm": 0.7768744826316833, + "learning_rate": 2.891266389617604e-06, + "loss": 2.1043, + "step": 22718 + }, + { + "epoch": 0.76, + "grad_norm": 0.73736572265625, + "learning_rate": 2.8905188758819257e-06, + "loss": 2.0451, + "step": 22719 + }, + { + "epoch": 0.76, + "grad_norm": 0.7751873731613159, + "learning_rate": 2.889771442464997e-06, + "loss": 2.0798, + "step": 22720 + }, + { + "epoch": 0.76, + "grad_norm": 0.7433980703353882, + "learning_rate": 2.889024089375262e-06, + "loss": 2.0152, + "step": 22721 + }, + { + "epoch": 0.76, + "grad_norm": 0.7612611055374146, + "learning_rate": 2.888276816621165e-06, + "loss": 2.0679, + "step": 22722 + }, + { + "epoch": 0.76, + "grad_norm": 0.7978125214576721, + "learning_rate": 2.8875296242111507e-06, + "loss": 2.0162, + "step": 22723 + }, + { + "epoch": 0.76, + "grad_norm": 0.7777748107910156, + "learning_rate": 2.8867825121536595e-06, + "loss": 2.0158, + "step": 22724 + }, + { + "epoch": 0.76, + "grad_norm": 0.7413191199302673, + "learning_rate": 2.886035480457128e-06, + "loss": 2.094, + "step": 22725 + }, + { + "epoch": 0.76, + "grad_norm": 0.7417134642601013, + "learning_rate": 2.8852885291300004e-06, + "loss": 2.0235, + "step": 22726 + }, + { + "epoch": 0.76, + "grad_norm": 0.7270734906196594, + "learning_rate": 2.8845416581807105e-06, + "loss": 2.0058, + "step": 22727 + }, + { + "epoch": 0.76, + "grad_norm": 0.7813875675201416, + "learning_rate": 2.8837948676177017e-06, + "loss": 2.0277, + "step": 22728 + }, + { + "epoch": 0.76, + "grad_norm": 0.7309724688529968, + "learning_rate": 2.8830481574494063e-06, + "loss": 2.049, + "step": 22729 + }, + { + "epoch": 0.76, + "grad_norm": 0.7163568735122681, + "learning_rate": 2.8823015276842638e-06, + "loss": 2.0145, + "step": 22730 + }, + { + "epoch": 0.76, + "grad_norm": 0.7306113243103027, + "learning_rate": 2.881554978330704e-06, + "loss": 2.0344, + "step": 22731 + }, + { + "epoch": 0.76, + "grad_norm": 0.7483114004135132, + "learning_rate": 2.880808509397168e-06, + "loss": 2.0482, + "step": 22732 + }, + { + "epoch": 0.76, + "grad_norm": 0.7488372325897217, + "learning_rate": 2.8800621208920853e-06, + "loss": 1.9394, + "step": 22733 + }, + { + "epoch": 0.76, + "grad_norm": 0.759667694568634, + "learning_rate": 2.8793158128238843e-06, + "loss": 2.0223, + "step": 22734 + }, + { + "epoch": 0.76, + "grad_norm": 0.7745639681816101, + "learning_rate": 2.878569585201001e-06, + "loss": 2.0839, + "step": 22735 + }, + { + "epoch": 0.76, + "grad_norm": 0.7300155758857727, + "learning_rate": 2.877823438031867e-06, + "loss": 2.044, + "step": 22736 + }, + { + "epoch": 0.76, + "grad_norm": 0.7173658609390259, + "learning_rate": 2.87707737132491e-06, + "loss": 2.0258, + "step": 22737 + }, + { + "epoch": 0.76, + "grad_norm": 0.7419295310974121, + "learning_rate": 2.8763313850885566e-06, + "loss": 2.0495, + "step": 22738 + }, + { + "epoch": 0.76, + "grad_norm": 0.7540974617004395, + "learning_rate": 2.8755854793312377e-06, + "loss": 2.0764, + "step": 22739 + }, + { + "epoch": 0.76, + "grad_norm": 0.7596832513809204, + "learning_rate": 2.8748396540613765e-06, + "loss": 2.0454, + "step": 22740 + }, + { + "epoch": 0.76, + "grad_norm": 0.7529978156089783, + "learning_rate": 2.874093909287404e-06, + "loss": 2.0882, + "step": 22741 + }, + { + "epoch": 0.76, + "grad_norm": 0.7518634796142578, + "learning_rate": 2.873348245017743e-06, + "loss": 2.1326, + "step": 22742 + }, + { + "epoch": 0.76, + "grad_norm": 0.7591113448143005, + "learning_rate": 2.8726026612608125e-06, + "loss": 2.0348, + "step": 22743 + }, + { + "epoch": 0.76, + "grad_norm": 0.7428948283195496, + "learning_rate": 2.871857158025041e-06, + "loss": 2.0727, + "step": 22744 + }, + { + "epoch": 0.76, + "grad_norm": 0.739607036113739, + "learning_rate": 2.8711117353188535e-06, + "loss": 2.0395, + "step": 22745 + }, + { + "epoch": 0.76, + "grad_norm": 0.7560466527938843, + "learning_rate": 2.8703663931506664e-06, + "loss": 2.07, + "step": 22746 + }, + { + "epoch": 0.76, + "grad_norm": 0.7224982380867004, + "learning_rate": 2.869621131528899e-06, + "loss": 1.9763, + "step": 22747 + }, + { + "epoch": 0.76, + "grad_norm": 0.7437267899513245, + "learning_rate": 2.868875950461972e-06, + "loss": 2.0349, + "step": 22748 + }, + { + "epoch": 0.76, + "grad_norm": 0.7242689728736877, + "learning_rate": 2.8681308499583103e-06, + "loss": 1.9429, + "step": 22749 + }, + { + "epoch": 0.76, + "grad_norm": 0.7306990027427673, + "learning_rate": 2.8673858300263257e-06, + "loss": 2.1277, + "step": 22750 + }, + { + "epoch": 0.76, + "grad_norm": 0.7411149740219116, + "learning_rate": 2.8666408906744327e-06, + "loss": 2.0081, + "step": 22751 + }, + { + "epoch": 0.76, + "grad_norm": 0.7521913051605225, + "learning_rate": 2.865896031911054e-06, + "loss": 2.0848, + "step": 22752 + }, + { + "epoch": 0.76, + "grad_norm": 0.7480065822601318, + "learning_rate": 2.865151253744597e-06, + "loss": 2.068, + "step": 22753 + }, + { + "epoch": 0.76, + "grad_norm": 0.7338721752166748, + "learning_rate": 2.86440655618348e-06, + "loss": 2.0267, + "step": 22754 + }, + { + "epoch": 0.76, + "grad_norm": 0.7653493285179138, + "learning_rate": 2.863661939236122e-06, + "loss": 2.0541, + "step": 22755 + }, + { + "epoch": 0.76, + "grad_norm": 0.7390453219413757, + "learning_rate": 2.862917402910923e-06, + "loss": 2.0226, + "step": 22756 + }, + { + "epoch": 0.76, + "grad_norm": 0.7454491853713989, + "learning_rate": 2.8621729472163006e-06, + "loss": 2.0429, + "step": 22757 + }, + { + "epoch": 0.76, + "grad_norm": 0.7457079291343689, + "learning_rate": 2.8614285721606683e-06, + "loss": 2.0612, + "step": 22758 + }, + { + "epoch": 0.76, + "grad_norm": 0.7485204935073853, + "learning_rate": 2.8606842777524325e-06, + "loss": 2.0216, + "step": 22759 + }, + { + "epoch": 0.76, + "grad_norm": 0.7779741287231445, + "learning_rate": 2.8599400639999975e-06, + "loss": 2.0378, + "step": 22760 + }, + { + "epoch": 0.76, + "grad_norm": 0.7353267073631287, + "learning_rate": 2.859195930911779e-06, + "loss": 2.0459, + "step": 22761 + }, + { + "epoch": 0.76, + "grad_norm": 0.7637642621994019, + "learning_rate": 2.8584518784961766e-06, + "loss": 2.0017, + "step": 22762 + }, + { + "epoch": 0.76, + "grad_norm": 0.7146471738815308, + "learning_rate": 2.857707906761603e-06, + "loss": 1.9969, + "step": 22763 + }, + { + "epoch": 0.76, + "grad_norm": 0.7348971366882324, + "learning_rate": 2.856964015716457e-06, + "loss": 2.0433, + "step": 22764 + }, + { + "epoch": 0.76, + "grad_norm": 0.7220140099525452, + "learning_rate": 2.8562202053691477e-06, + "loss": 2.089, + "step": 22765 + }, + { + "epoch": 0.76, + "grad_norm": 0.7416256666183472, + "learning_rate": 2.855476475728073e-06, + "loss": 2.1392, + "step": 22766 + }, + { + "epoch": 0.76, + "grad_norm": 0.7576702237129211, + "learning_rate": 2.8547328268016407e-06, + "loss": 2.0613, + "step": 22767 + }, + { + "epoch": 0.76, + "grad_norm": 0.7445651292800903, + "learning_rate": 2.85398925859825e-06, + "loss": 1.9908, + "step": 22768 + }, + { + "epoch": 0.76, + "grad_norm": 0.7502511143684387, + "learning_rate": 2.853245771126296e-06, + "loss": 2.0786, + "step": 22769 + }, + { + "epoch": 0.76, + "grad_norm": 0.7497684359550476, + "learning_rate": 2.8525023643941853e-06, + "loss": 2.0972, + "step": 22770 + }, + { + "epoch": 0.76, + "grad_norm": 0.7444024085998535, + "learning_rate": 2.8517590384103157e-06, + "loss": 2.0563, + "step": 22771 + }, + { + "epoch": 0.76, + "grad_norm": 0.7406321167945862, + "learning_rate": 2.8510157931830827e-06, + "loss": 2.0867, + "step": 22772 + }, + { + "epoch": 0.76, + "grad_norm": 0.7493699789047241, + "learning_rate": 2.8502726287208817e-06, + "loss": 2.092, + "step": 22773 + }, + { + "epoch": 0.76, + "grad_norm": 0.7597299218177795, + "learning_rate": 2.8495295450321126e-06, + "loss": 1.9773, + "step": 22774 + }, + { + "epoch": 0.76, + "grad_norm": 0.7407028675079346, + "learning_rate": 2.848786542125166e-06, + "loss": 2.0848, + "step": 22775 + }, + { + "epoch": 0.76, + "grad_norm": 0.7526385188102722, + "learning_rate": 2.8480436200084372e-06, + "loss": 2.0283, + "step": 22776 + }, + { + "epoch": 0.76, + "grad_norm": 0.7731091976165771, + "learning_rate": 2.8473007786903262e-06, + "loss": 1.9896, + "step": 22777 + }, + { + "epoch": 0.76, + "grad_norm": 0.7436456680297852, + "learning_rate": 2.846558018179214e-06, + "loss": 1.994, + "step": 22778 + }, + { + "epoch": 0.76, + "grad_norm": 0.7733966112136841, + "learning_rate": 2.8458153384834964e-06, + "loss": 1.9903, + "step": 22779 + }, + { + "epoch": 0.76, + "grad_norm": 0.7830696105957031, + "learning_rate": 2.8450727396115662e-06, + "loss": 2.0559, + "step": 22780 + }, + { + "epoch": 0.76, + "grad_norm": 0.759630024433136, + "learning_rate": 2.8443302215718127e-06, + "loss": 2.0441, + "step": 22781 + }, + { + "epoch": 0.76, + "grad_norm": 0.7294036149978638, + "learning_rate": 2.843587784372619e-06, + "loss": 2.0204, + "step": 22782 + }, + { + "epoch": 0.76, + "grad_norm": 0.7080414891242981, + "learning_rate": 2.842845428022379e-06, + "loss": 2.0727, + "step": 22783 + }, + { + "epoch": 0.76, + "grad_norm": 0.7561578154563904, + "learning_rate": 2.842103152529475e-06, + "loss": 2.0254, + "step": 22784 + }, + { + "epoch": 0.76, + "grad_norm": 0.7158710956573486, + "learning_rate": 2.841360957902294e-06, + "loss": 2.0194, + "step": 22785 + }, + { + "epoch": 0.76, + "grad_norm": 0.7433933019638062, + "learning_rate": 2.8406188441492245e-06, + "loss": 2.1361, + "step": 22786 + }, + { + "epoch": 0.76, + "grad_norm": 0.7620903849601746, + "learning_rate": 2.8398768112786467e-06, + "loss": 2.0968, + "step": 22787 + }, + { + "epoch": 0.76, + "grad_norm": 0.7642061114311218, + "learning_rate": 2.839134859298942e-06, + "loss": 2.0538, + "step": 22788 + }, + { + "epoch": 0.76, + "grad_norm": 0.7542993426322937, + "learning_rate": 2.838392988218499e-06, + "loss": 2.0882, + "step": 22789 + }, + { + "epoch": 0.76, + "grad_norm": 0.7718907594680786, + "learning_rate": 2.8376511980456946e-06, + "loss": 2.0869, + "step": 22790 + }, + { + "epoch": 0.76, + "grad_norm": 0.7224864363670349, + "learning_rate": 2.836909488788905e-06, + "loss": 2.0489, + "step": 22791 + }, + { + "epoch": 0.76, + "grad_norm": 0.7465981841087341, + "learning_rate": 2.8361678604565155e-06, + "loss": 2.0313, + "step": 22792 + }, + { + "epoch": 0.76, + "grad_norm": 0.7449187636375427, + "learning_rate": 2.835426313056905e-06, + "loss": 2.0311, + "step": 22793 + }, + { + "epoch": 0.76, + "grad_norm": 0.769643247127533, + "learning_rate": 2.83468484659845e-06, + "loss": 2.03, + "step": 22794 + }, + { + "epoch": 0.76, + "grad_norm": 0.7353590130805969, + "learning_rate": 2.8339434610895234e-06, + "loss": 2.0634, + "step": 22795 + }, + { + "epoch": 0.76, + "grad_norm": 0.753582775592804, + "learning_rate": 2.833202156538506e-06, + "loss": 1.9786, + "step": 22796 + }, + { + "epoch": 0.76, + "grad_norm": 0.7406953573226929, + "learning_rate": 2.8324609329537677e-06, + "loss": 2.031, + "step": 22797 + }, + { + "epoch": 0.76, + "grad_norm": 0.7696153521537781, + "learning_rate": 2.8317197903436857e-06, + "loss": 2.0016, + "step": 22798 + }, + { + "epoch": 0.76, + "grad_norm": 0.7360467910766602, + "learning_rate": 2.8309787287166377e-06, + "loss": 1.9836, + "step": 22799 + }, + { + "epoch": 0.76, + "grad_norm": 0.7296015024185181, + "learning_rate": 2.8302377480809863e-06, + "loss": 1.9921, + "step": 22800 + }, + { + "epoch": 0.76, + "grad_norm": 0.7530319094657898, + "learning_rate": 2.8294968484451046e-06, + "loss": 1.9961, + "step": 22801 + }, + { + "epoch": 0.76, + "grad_norm": 0.7379713654518127, + "learning_rate": 2.82875602981737e-06, + "loss": 2.0353, + "step": 22802 + }, + { + "epoch": 0.76, + "grad_norm": 0.7686151266098022, + "learning_rate": 2.8280152922061465e-06, + "loss": 2.0444, + "step": 22803 + }, + { + "epoch": 0.76, + "grad_norm": 0.7530451416969299, + "learning_rate": 2.8272746356198e-06, + "loss": 2.0136, + "step": 22804 + }, + { + "epoch": 0.76, + "grad_norm": 0.7173929214477539, + "learning_rate": 2.8265340600667037e-06, + "loss": 2.0888, + "step": 22805 + }, + { + "epoch": 0.76, + "grad_norm": 0.7532140016555786, + "learning_rate": 2.825793565555218e-06, + "loss": 2.0485, + "step": 22806 + }, + { + "epoch": 0.76, + "grad_norm": 0.737980842590332, + "learning_rate": 2.825053152093713e-06, + "loss": 2.0323, + "step": 22807 + }, + { + "epoch": 0.76, + "grad_norm": 0.7561329007148743, + "learning_rate": 2.8243128196905557e-06, + "loss": 2.0326, + "step": 22808 + }, + { + "epoch": 0.76, + "grad_norm": 0.7359281182289124, + "learning_rate": 2.823572568354106e-06, + "loss": 2.0457, + "step": 22809 + }, + { + "epoch": 0.76, + "grad_norm": 0.7557693123817444, + "learning_rate": 2.8228323980927254e-06, + "loss": 2.009, + "step": 22810 + }, + { + "epoch": 0.76, + "grad_norm": 0.7427211999893188, + "learning_rate": 2.8220923089147813e-06, + "loss": 2.0814, + "step": 22811 + }, + { + "epoch": 0.76, + "grad_norm": 0.7207542061805725, + "learning_rate": 2.8213523008286303e-06, + "loss": 2.0245, + "step": 22812 + }, + { + "epoch": 0.76, + "grad_norm": 0.7453858256340027, + "learning_rate": 2.820612373842632e-06, + "loss": 2.0509, + "step": 22813 + }, + { + "epoch": 0.76, + "grad_norm": 0.7511812448501587, + "learning_rate": 2.8198725279651473e-06, + "loss": 2.0711, + "step": 22814 + }, + { + "epoch": 0.76, + "grad_norm": 0.7107770442962646, + "learning_rate": 2.8191327632045383e-06, + "loss": 1.9964, + "step": 22815 + }, + { + "epoch": 0.76, + "grad_norm": 0.7525960803031921, + "learning_rate": 2.8183930795691583e-06, + "loss": 2.0405, + "step": 22816 + }, + { + "epoch": 0.76, + "grad_norm": 0.7364457249641418, + "learning_rate": 2.8176534770673614e-06, + "loss": 2.0832, + "step": 22817 + }, + { + "epoch": 0.76, + "grad_norm": 0.753216028213501, + "learning_rate": 2.8169139557075097e-06, + "loss": 2.1008, + "step": 22818 + }, + { + "epoch": 0.76, + "grad_norm": 0.7421953082084656, + "learning_rate": 2.8161745154979514e-06, + "loss": 1.99, + "step": 22819 + }, + { + "epoch": 0.76, + "grad_norm": 0.7326236963272095, + "learning_rate": 2.8154351564470426e-06, + "loss": 2.0345, + "step": 22820 + }, + { + "epoch": 0.76, + "grad_norm": 0.7462367415428162, + "learning_rate": 2.8146958785631427e-06, + "loss": 2.0173, + "step": 22821 + }, + { + "epoch": 0.76, + "grad_norm": 0.7705219984054565, + "learning_rate": 2.8139566818545927e-06, + "loss": 2.0778, + "step": 22822 + }, + { + "epoch": 0.76, + "grad_norm": 0.7700719237327576, + "learning_rate": 2.813217566329749e-06, + "loss": 2.0618, + "step": 22823 + }, + { + "epoch": 0.76, + "grad_norm": 0.7637203931808472, + "learning_rate": 2.812478531996964e-06, + "loss": 2.0402, + "step": 22824 + }, + { + "epoch": 0.76, + "grad_norm": 0.7371863126754761, + "learning_rate": 2.811739578864583e-06, + "loss": 2.0052, + "step": 22825 + }, + { + "epoch": 0.76, + "grad_norm": 0.736935555934906, + "learning_rate": 2.8110007069409537e-06, + "loss": 2.0662, + "step": 22826 + }, + { + "epoch": 0.76, + "grad_norm": 0.7244799137115479, + "learning_rate": 2.810261916234428e-06, + "loss": 2.0455, + "step": 22827 + }, + { + "epoch": 0.76, + "grad_norm": 0.7546132206916809, + "learning_rate": 2.809523206753346e-06, + "loss": 2.0784, + "step": 22828 + }, + { + "epoch": 0.76, + "grad_norm": 0.7331262826919556, + "learning_rate": 2.8087845785060576e-06, + "loss": 2.0079, + "step": 22829 + }, + { + "epoch": 0.76, + "grad_norm": 0.745187520980835, + "learning_rate": 2.8080460315009093e-06, + "loss": 2.0422, + "step": 22830 + }, + { + "epoch": 0.76, + "grad_norm": 0.7327925562858582, + "learning_rate": 2.8073075657462424e-06, + "loss": 2.1107, + "step": 22831 + }, + { + "epoch": 0.76, + "grad_norm": 0.7126834392547607, + "learning_rate": 2.806569181250396e-06, + "loss": 1.9922, + "step": 22832 + }, + { + "epoch": 0.76, + "grad_norm": 0.7263754606246948, + "learning_rate": 2.805830878021715e-06, + "loss": 2.0121, + "step": 22833 + }, + { + "epoch": 0.76, + "grad_norm": 0.7632060647010803, + "learning_rate": 2.8050926560685467e-06, + "loss": 2.073, + "step": 22834 + }, + { + "epoch": 0.76, + "grad_norm": 0.7432557344436646, + "learning_rate": 2.804354515399219e-06, + "loss": 2.0291, + "step": 22835 + }, + { + "epoch": 0.76, + "grad_norm": 0.7295231223106384, + "learning_rate": 2.8036164560220768e-06, + "loss": 2.0007, + "step": 22836 + }, + { + "epoch": 0.76, + "grad_norm": 0.763951301574707, + "learning_rate": 2.802878477945462e-06, + "loss": 2.0797, + "step": 22837 + }, + { + "epoch": 0.76, + "grad_norm": 0.7526400685310364, + "learning_rate": 2.8021405811777045e-06, + "loss": 2.071, + "step": 22838 + }, + { + "epoch": 0.76, + "grad_norm": 0.7495793700218201, + "learning_rate": 2.801402765727147e-06, + "loss": 2.0452, + "step": 22839 + }, + { + "epoch": 0.76, + "grad_norm": 0.7792326211929321, + "learning_rate": 2.800665031602123e-06, + "loss": 2.0447, + "step": 22840 + }, + { + "epoch": 0.76, + "grad_norm": 0.7496023774147034, + "learning_rate": 2.799927378810964e-06, + "loss": 2.0561, + "step": 22841 + }, + { + "epoch": 0.76, + "grad_norm": 0.7682244777679443, + "learning_rate": 2.799189807362004e-06, + "loss": 2.0746, + "step": 22842 + }, + { + "epoch": 0.76, + "grad_norm": 0.7639046907424927, + "learning_rate": 2.7984523172635845e-06, + "loss": 2.0271, + "step": 22843 + }, + { + "epoch": 0.76, + "grad_norm": 0.7418408989906311, + "learning_rate": 2.797714908524024e-06, + "loss": 2.0707, + "step": 22844 + }, + { + "epoch": 0.76, + "grad_norm": 0.7404930591583252, + "learning_rate": 2.79697758115166e-06, + "loss": 2.0948, + "step": 22845 + }, + { + "epoch": 0.76, + "grad_norm": 0.7557381391525269, + "learning_rate": 2.796240335154824e-06, + "loss": 2.1182, + "step": 22846 + }, + { + "epoch": 0.76, + "grad_norm": 0.7494269013404846, + "learning_rate": 2.795503170541843e-06, + "loss": 2.0497, + "step": 22847 + }, + { + "epoch": 0.76, + "grad_norm": 0.7013692855834961, + "learning_rate": 2.7947660873210427e-06, + "loss": 2.1248, + "step": 22848 + }, + { + "epoch": 0.76, + "grad_norm": 0.7491157650947571, + "learning_rate": 2.7940290855007525e-06, + "loss": 1.993, + "step": 22849 + }, + { + "epoch": 0.76, + "grad_norm": 0.7449735403060913, + "learning_rate": 2.793292165089301e-06, + "loss": 2.0824, + "step": 22850 + }, + { + "epoch": 0.76, + "grad_norm": 0.7609108090400696, + "learning_rate": 2.792555326095008e-06, + "loss": 2.1298, + "step": 22851 + }, + { + "epoch": 0.76, + "grad_norm": 0.7150327563285828, + "learning_rate": 2.7918185685262045e-06, + "loss": 2.0627, + "step": 22852 + }, + { + "epoch": 0.76, + "grad_norm": 0.7578579783439636, + "learning_rate": 2.7910818923912096e-06, + "loss": 2.1449, + "step": 22853 + }, + { + "epoch": 0.76, + "grad_norm": 0.7456454634666443, + "learning_rate": 2.7903452976983436e-06, + "loss": 2.0342, + "step": 22854 + }, + { + "epoch": 0.76, + "grad_norm": 0.7480971813201904, + "learning_rate": 2.7896087844559316e-06, + "loss": 2.0237, + "step": 22855 + }, + { + "epoch": 0.76, + "grad_norm": 0.7543898224830627, + "learning_rate": 2.7888723526723004e-06, + "loss": 2.0252, + "step": 22856 + }, + { + "epoch": 0.76, + "grad_norm": 0.7621117830276489, + "learning_rate": 2.7881360023557568e-06, + "loss": 2.1324, + "step": 22857 + }, + { + "epoch": 0.76, + "grad_norm": 0.755854070186615, + "learning_rate": 2.7873997335146254e-06, + "loss": 2.0158, + "step": 22858 + }, + { + "epoch": 0.76, + "grad_norm": 0.7405219674110413, + "learning_rate": 2.786663546157229e-06, + "loss": 2.0967, + "step": 22859 + }, + { + "epoch": 0.76, + "grad_norm": 0.7010526061058044, + "learning_rate": 2.785927440291877e-06, + "loss": 2.0497, + "step": 22860 + }, + { + "epoch": 0.76, + "grad_norm": 0.7419354319572449, + "learning_rate": 2.785191415926891e-06, + "loss": 2.0273, + "step": 22861 + }, + { + "epoch": 0.76, + "grad_norm": 0.7444115877151489, + "learning_rate": 2.7844554730705853e-06, + "loss": 2.0248, + "step": 22862 + }, + { + "epoch": 0.76, + "grad_norm": 0.7346345782279968, + "learning_rate": 2.783719611731269e-06, + "loss": 2.1008, + "step": 22863 + }, + { + "epoch": 0.76, + "grad_norm": 0.7400256395339966, + "learning_rate": 2.78298383191726e-06, + "loss": 1.9928, + "step": 22864 + }, + { + "epoch": 0.76, + "grad_norm": 0.7487415075302124, + "learning_rate": 2.7822481336368733e-06, + "loss": 2.0493, + "step": 22865 + }, + { + "epoch": 0.76, + "grad_norm": 0.7278691530227661, + "learning_rate": 2.781512516898417e-06, + "loss": 2.0095, + "step": 22866 + }, + { + "epoch": 0.76, + "grad_norm": 0.7287070751190186, + "learning_rate": 2.780776981710198e-06, + "loss": 2.0571, + "step": 22867 + }, + { + "epoch": 0.76, + "grad_norm": 0.7447271347045898, + "learning_rate": 2.7800415280805337e-06, + "loss": 2.0326, + "step": 22868 + }, + { + "epoch": 0.76, + "grad_norm": 0.7377855181694031, + "learning_rate": 2.7793061560177247e-06, + "loss": 2.0406, + "step": 22869 + }, + { + "epoch": 0.76, + "grad_norm": 0.759272038936615, + "learning_rate": 2.778570865530088e-06, + "loss": 2.0656, + "step": 22870 + }, + { + "epoch": 0.76, + "grad_norm": 0.791037917137146, + "learning_rate": 2.7778356566259214e-06, + "loss": 2.0494, + "step": 22871 + }, + { + "epoch": 0.76, + "grad_norm": 0.7595379948616028, + "learning_rate": 2.777100529313538e-06, + "loss": 2.0402, + "step": 22872 + }, + { + "epoch": 0.76, + "grad_norm": 0.7502461075782776, + "learning_rate": 2.7763654836012367e-06, + "loss": 2.0139, + "step": 22873 + }, + { + "epoch": 0.76, + "grad_norm": 0.7427219152450562, + "learning_rate": 2.7756305194973278e-06, + "loss": 2.0631, + "step": 22874 + }, + { + "epoch": 0.76, + "grad_norm": 0.7593291997909546, + "learning_rate": 2.774895637010111e-06, + "loss": 1.9916, + "step": 22875 + }, + { + "epoch": 0.76, + "grad_norm": 0.7484283447265625, + "learning_rate": 2.7741608361478855e-06, + "loss": 1.9805, + "step": 22876 + }, + { + "epoch": 0.76, + "grad_norm": 0.7353966236114502, + "learning_rate": 2.773426116918957e-06, + "loss": 2.0106, + "step": 22877 + }, + { + "epoch": 0.76, + "grad_norm": 0.7234967947006226, + "learning_rate": 2.772691479331632e-06, + "loss": 2.0825, + "step": 22878 + }, + { + "epoch": 0.76, + "grad_norm": 0.7462483644485474, + "learning_rate": 2.7719569233941956e-06, + "loss": 2.102, + "step": 22879 + }, + { + "epoch": 0.76, + "grad_norm": 0.7308499813079834, + "learning_rate": 2.771222449114954e-06, + "loss": 2.094, + "step": 22880 + }, + { + "epoch": 0.76, + "grad_norm": 0.7385032773017883, + "learning_rate": 2.7704880565022074e-06, + "loss": 1.9976, + "step": 22881 + }, + { + "epoch": 0.76, + "grad_norm": 0.7400795817375183, + "learning_rate": 2.7697537455642476e-06, + "loss": 1.9792, + "step": 22882 + }, + { + "epoch": 0.76, + "grad_norm": 0.7241218090057373, + "learning_rate": 2.769019516309376e-06, + "loss": 2.0732, + "step": 22883 + }, + { + "epoch": 0.76, + "grad_norm": 0.7485787868499756, + "learning_rate": 2.7682853687458833e-06, + "loss": 2.0907, + "step": 22884 + }, + { + "epoch": 0.76, + "grad_norm": 0.7482932806015015, + "learning_rate": 2.7675513028820613e-06, + "loss": 2.0659, + "step": 22885 + }, + { + "epoch": 0.76, + "grad_norm": 0.7356153130531311, + "learning_rate": 2.766817318726206e-06, + "loss": 2.0177, + "step": 22886 + }, + { + "epoch": 0.76, + "grad_norm": 0.7160064578056335, + "learning_rate": 2.7660834162866136e-06, + "loss": 2.0052, + "step": 22887 + }, + { + "epoch": 0.76, + "grad_norm": 0.7750246524810791, + "learning_rate": 2.7653495955715702e-06, + "loss": 2.1174, + "step": 22888 + }, + { + "epoch": 0.76, + "grad_norm": 0.7555484771728516, + "learning_rate": 2.7646158565893644e-06, + "loss": 2.0059, + "step": 22889 + }, + { + "epoch": 0.76, + "grad_norm": 0.7672293186187744, + "learning_rate": 2.7638821993482913e-06, + "loss": 2.0548, + "step": 22890 + }, + { + "epoch": 0.76, + "grad_norm": 0.7527955770492554, + "learning_rate": 2.763148623856633e-06, + "loss": 2.0988, + "step": 22891 + }, + { + "epoch": 0.76, + "grad_norm": 0.750593364238739, + "learning_rate": 2.7624151301226843e-06, + "loss": 2.0521, + "step": 22892 + }, + { + "epoch": 0.76, + "grad_norm": 0.7746703028678894, + "learning_rate": 2.761681718154724e-06, + "loss": 2.0838, + "step": 22893 + }, + { + "epoch": 0.76, + "grad_norm": 0.7591778635978699, + "learning_rate": 2.7609483879610444e-06, + "loss": 2.1118, + "step": 22894 + }, + { + "epoch": 0.76, + "grad_norm": 0.7384560108184814, + "learning_rate": 2.7602151395499254e-06, + "loss": 2.0234, + "step": 22895 + }, + { + "epoch": 0.76, + "grad_norm": 0.7612176537513733, + "learning_rate": 2.7594819729296553e-06, + "loss": 2.0296, + "step": 22896 + }, + { + "epoch": 0.76, + "grad_norm": 0.7423012852668762, + "learning_rate": 2.7587488881085145e-06, + "loss": 2.0502, + "step": 22897 + }, + { + "epoch": 0.76, + "grad_norm": 0.7309958338737488, + "learning_rate": 2.7580158850947813e-06, + "loss": 2.0811, + "step": 22898 + }, + { + "epoch": 0.76, + "grad_norm": 0.7536032199859619, + "learning_rate": 2.7572829638967415e-06, + "loss": 2.0724, + "step": 22899 + }, + { + "epoch": 0.76, + "grad_norm": 0.7247188687324524, + "learning_rate": 2.756550124522677e-06, + "loss": 2.0118, + "step": 22900 + }, + { + "epoch": 0.76, + "grad_norm": 0.7585209608078003, + "learning_rate": 2.755817366980863e-06, + "loss": 2.0362, + "step": 22901 + }, + { + "epoch": 0.76, + "grad_norm": 0.7693536281585693, + "learning_rate": 2.755084691279577e-06, + "loss": 2.0474, + "step": 22902 + }, + { + "epoch": 0.76, + "grad_norm": 0.7552255988121033, + "learning_rate": 2.754352097427101e-06, + "loss": 2.1067, + "step": 22903 + }, + { + "epoch": 0.76, + "grad_norm": 0.7555841207504272, + "learning_rate": 2.7536195854317047e-06, + "loss": 1.9756, + "step": 22904 + }, + { + "epoch": 0.76, + "grad_norm": 0.7392663359642029, + "learning_rate": 2.752887155301672e-06, + "loss": 2.0547, + "step": 22905 + }, + { + "epoch": 0.76, + "grad_norm": 0.7157472968101501, + "learning_rate": 2.752154807045272e-06, + "loss": 2.0232, + "step": 22906 + }, + { + "epoch": 0.76, + "grad_norm": 0.7753708958625793, + "learning_rate": 2.7514225406707773e-06, + "loss": 2.0479, + "step": 22907 + }, + { + "epoch": 0.76, + "grad_norm": 0.7481390833854675, + "learning_rate": 2.7506903561864615e-06, + "loss": 2.0746, + "step": 22908 + }, + { + "epoch": 0.76, + "grad_norm": 0.7574496865272522, + "learning_rate": 2.749958253600601e-06, + "loss": 2.0248, + "step": 22909 + }, + { + "epoch": 0.76, + "grad_norm": 0.7433311343193054, + "learning_rate": 2.7492262329214636e-06, + "loss": 2.0389, + "step": 22910 + }, + { + "epoch": 0.76, + "grad_norm": 0.7587152719497681, + "learning_rate": 2.7484942941573155e-06, + "loss": 2.0246, + "step": 22911 + }, + { + "epoch": 0.76, + "grad_norm": 0.7461514472961426, + "learning_rate": 2.7477624373164326e-06, + "loss": 2.0783, + "step": 22912 + }, + { + "epoch": 0.76, + "grad_norm": 0.7583099007606506, + "learning_rate": 2.7470306624070753e-06, + "loss": 2.0177, + "step": 22913 + }, + { + "epoch": 0.76, + "grad_norm": 0.7543014287948608, + "learning_rate": 2.7462989694375186e-06, + "loss": 2.1068, + "step": 22914 + }, + { + "epoch": 0.76, + "grad_norm": 0.7607203722000122, + "learning_rate": 2.7455673584160223e-06, + "loss": 2.0475, + "step": 22915 + }, + { + "epoch": 0.76, + "grad_norm": 0.7467170357704163, + "learning_rate": 2.744835829350857e-06, + "loss": 2.0961, + "step": 22916 + }, + { + "epoch": 0.76, + "grad_norm": 0.7372157573699951, + "learning_rate": 2.7441043822502823e-06, + "loss": 2.0357, + "step": 22917 + }, + { + "epoch": 0.76, + "grad_norm": 0.7519665956497192, + "learning_rate": 2.743373017122566e-06, + "loss": 2.0472, + "step": 22918 + }, + { + "epoch": 0.76, + "grad_norm": 0.7432259917259216, + "learning_rate": 2.742641733975969e-06, + "loss": 2.0728, + "step": 22919 + }, + { + "epoch": 0.76, + "grad_norm": 0.7239487171173096, + "learning_rate": 2.741910532818749e-06, + "loss": 2.0473, + "step": 22920 + }, + { + "epoch": 0.76, + "grad_norm": 0.7432849407196045, + "learning_rate": 2.7411794136591706e-06, + "loss": 2.0659, + "step": 22921 + }, + { + "epoch": 0.76, + "grad_norm": 0.7440228462219238, + "learning_rate": 2.7404483765054955e-06, + "loss": 2.068, + "step": 22922 + }, + { + "epoch": 0.76, + "grad_norm": 0.7427909970283508, + "learning_rate": 2.7397174213659815e-06, + "loss": 2.0012, + "step": 22923 + }, + { + "epoch": 0.76, + "grad_norm": 0.7215114235877991, + "learning_rate": 2.738986548248881e-06, + "loss": 2.0395, + "step": 22924 + }, + { + "epoch": 0.76, + "grad_norm": 0.7525121569633484, + "learning_rate": 2.7382557571624592e-06, + "loss": 2.0202, + "step": 22925 + }, + { + "epoch": 0.76, + "grad_norm": 0.7128960490226746, + "learning_rate": 2.737525048114964e-06, + "loss": 2.0649, + "step": 22926 + }, + { + "epoch": 0.76, + "grad_norm": 0.7379663586616516, + "learning_rate": 2.7367944211146567e-06, + "loss": 2.0634, + "step": 22927 + }, + { + "epoch": 0.76, + "grad_norm": 0.7514234185218811, + "learning_rate": 2.736063876169791e-06, + "loss": 2.0317, + "step": 22928 + }, + { + "epoch": 0.76, + "grad_norm": 0.7321406602859497, + "learning_rate": 2.7353334132886157e-06, + "loss": 1.9656, + "step": 22929 + }, + { + "epoch": 0.76, + "grad_norm": 0.756450891494751, + "learning_rate": 2.7346030324793847e-06, + "loss": 2.0149, + "step": 22930 + }, + { + "epoch": 0.76, + "grad_norm": 0.7578282952308655, + "learning_rate": 2.7338727337503546e-06, + "loss": 2.0036, + "step": 22931 + }, + { + "epoch": 0.76, + "grad_norm": 0.7585911154747009, + "learning_rate": 2.7331425171097713e-06, + "loss": 2.075, + "step": 22932 + }, + { + "epoch": 0.76, + "grad_norm": 0.7233803868293762, + "learning_rate": 2.732412382565882e-06, + "loss": 2.0164, + "step": 22933 + }, + { + "epoch": 0.76, + "grad_norm": 0.7444892525672913, + "learning_rate": 2.731682330126939e-06, + "loss": 2.0724, + "step": 22934 + }, + { + "epoch": 0.76, + "grad_norm": 0.7486906051635742, + "learning_rate": 2.7309523598011922e-06, + "loss": 2.0469, + "step": 22935 + }, + { + "epoch": 0.76, + "grad_norm": 0.7834153771400452, + "learning_rate": 2.7302224715968863e-06, + "loss": 2.0159, + "step": 22936 + }, + { + "epoch": 0.76, + "grad_norm": 0.7678384184837341, + "learning_rate": 2.729492665522262e-06, + "loss": 2.0496, + "step": 22937 + }, + { + "epoch": 0.76, + "grad_norm": 0.7662795782089233, + "learning_rate": 2.728762941585573e-06, + "loss": 2.0434, + "step": 22938 + }, + { + "epoch": 0.76, + "grad_norm": 0.717410147190094, + "learning_rate": 2.7280332997950554e-06, + "loss": 2.1004, + "step": 22939 + }, + { + "epoch": 0.76, + "grad_norm": 0.7340196371078491, + "learning_rate": 2.7273037401589586e-06, + "loss": 2.0519, + "step": 22940 + }, + { + "epoch": 0.76, + "grad_norm": 0.7601124048233032, + "learning_rate": 2.726574262685522e-06, + "loss": 2.064, + "step": 22941 + }, + { + "epoch": 0.76, + "grad_norm": 0.7916771769523621, + "learning_rate": 2.725844867382983e-06, + "loss": 2.1286, + "step": 22942 + }, + { + "epoch": 0.76, + "grad_norm": 0.7689327001571655, + "learning_rate": 2.7251155542595862e-06, + "loss": 2.0128, + "step": 22943 + }, + { + "epoch": 0.76, + "grad_norm": 0.7314073443412781, + "learning_rate": 2.7243863233235735e-06, + "loss": 2.0524, + "step": 22944 + }, + { + "epoch": 0.76, + "grad_norm": 0.7353020310401917, + "learning_rate": 2.7236571745831806e-06, + "loss": 2.0648, + "step": 22945 + }, + { + "epoch": 0.76, + "grad_norm": 0.742962658405304, + "learning_rate": 2.7229281080466407e-06, + "loss": 2.1079, + "step": 22946 + }, + { + "epoch": 0.76, + "grad_norm": 0.7524154186248779, + "learning_rate": 2.722199123722198e-06, + "loss": 2.1294, + "step": 22947 + }, + { + "epoch": 0.76, + "grad_norm": 0.7065379023551941, + "learning_rate": 2.721470221618081e-06, + "loss": 2.0317, + "step": 22948 + }, + { + "epoch": 0.76, + "grad_norm": 0.7527520060539246, + "learning_rate": 2.7207414017425305e-06, + "loss": 2.0733, + "step": 22949 + }, + { + "epoch": 0.76, + "grad_norm": 0.7462311387062073, + "learning_rate": 2.720012664103775e-06, + "loss": 2.0514, + "step": 22950 + }, + { + "epoch": 0.76, + "grad_norm": 0.7339749336242676, + "learning_rate": 2.7192840087100537e-06, + "loss": 2.0711, + "step": 22951 + }, + { + "epoch": 0.76, + "grad_norm": 0.7383087873458862, + "learning_rate": 2.718555435569591e-06, + "loss": 2.0168, + "step": 22952 + }, + { + "epoch": 0.76, + "grad_norm": 0.7327321171760559, + "learning_rate": 2.7178269446906236e-06, + "loss": 2.0595, + "step": 22953 + }, + { + "epoch": 0.76, + "grad_norm": 0.7754136323928833, + "learning_rate": 2.717098536081381e-06, + "loss": 2.0204, + "step": 22954 + }, + { + "epoch": 0.76, + "grad_norm": 0.7481711506843567, + "learning_rate": 2.7163702097500877e-06, + "loss": 2.0973, + "step": 22955 + }, + { + "epoch": 0.76, + "grad_norm": 0.7621837854385376, + "learning_rate": 2.715641965704975e-06, + "loss": 1.9994, + "step": 22956 + }, + { + "epoch": 0.76, + "grad_norm": 0.748285174369812, + "learning_rate": 2.7149138039542735e-06, + "loss": 2.0393, + "step": 22957 + }, + { + "epoch": 0.76, + "grad_norm": 0.744909405708313, + "learning_rate": 2.714185724506205e-06, + "loss": 2.0609, + "step": 22958 + }, + { + "epoch": 0.76, + "grad_norm": 0.7442793250083923, + "learning_rate": 2.7134577273689955e-06, + "loss": 2.0951, + "step": 22959 + }, + { + "epoch": 0.76, + "grad_norm": 0.7711385488510132, + "learning_rate": 2.7127298125508717e-06, + "loss": 2.0293, + "step": 22960 + }, + { + "epoch": 0.76, + "grad_norm": 0.7569025754928589, + "learning_rate": 2.712001980060053e-06, + "loss": 2.0161, + "step": 22961 + }, + { + "epoch": 0.76, + "grad_norm": 0.7248103618621826, + "learning_rate": 2.7112742299047678e-06, + "loss": 2.0235, + "step": 22962 + }, + { + "epoch": 0.76, + "grad_norm": 0.7388492822647095, + "learning_rate": 2.7105465620932357e-06, + "loss": 2.0284, + "step": 22963 + }, + { + "epoch": 0.76, + "grad_norm": 0.7385640740394592, + "learning_rate": 2.7098189766336726e-06, + "loss": 2.077, + "step": 22964 + }, + { + "epoch": 0.76, + "grad_norm": 0.7434827089309692, + "learning_rate": 2.709091473534302e-06, + "loss": 1.9644, + "step": 22965 + }, + { + "epoch": 0.76, + "grad_norm": 0.7498022317886353, + "learning_rate": 2.708364052803346e-06, + "loss": 2.1104, + "step": 22966 + }, + { + "epoch": 0.76, + "grad_norm": 0.7398502230644226, + "learning_rate": 2.70763671444902e-06, + "loss": 2.0626, + "step": 22967 + }, + { + "epoch": 0.76, + "grad_norm": 0.750668466091156, + "learning_rate": 2.7069094584795376e-06, + "loss": 2.055, + "step": 22968 + }, + { + "epoch": 0.76, + "grad_norm": 0.7551640272140503, + "learning_rate": 2.7061822849031215e-06, + "loss": 2.0665, + "step": 22969 + }, + { + "epoch": 0.76, + "grad_norm": 0.7414166331291199, + "learning_rate": 2.7054551937279793e-06, + "loss": 2.121, + "step": 22970 + }, + { + "epoch": 0.76, + "grad_norm": 0.7332898378372192, + "learning_rate": 2.704728184962333e-06, + "loss": 2.0766, + "step": 22971 + }, + { + "epoch": 0.76, + "grad_norm": 0.7825677990913391, + "learning_rate": 2.7040012586143894e-06, + "loss": 2.0362, + "step": 22972 + }, + { + "epoch": 0.76, + "grad_norm": 0.7340701818466187, + "learning_rate": 2.703274414692366e-06, + "loss": 1.9866, + "step": 22973 + }, + { + "epoch": 0.76, + "grad_norm": 0.7423158884048462, + "learning_rate": 2.702547653204469e-06, + "loss": 2.0693, + "step": 22974 + }, + { + "epoch": 0.76, + "grad_norm": 0.7552281618118286, + "learning_rate": 2.7018209741589163e-06, + "loss": 2.0675, + "step": 22975 + }, + { + "epoch": 0.76, + "grad_norm": 0.7242433428764343, + "learning_rate": 2.701094377563912e-06, + "loss": 2.0629, + "step": 22976 + }, + { + "epoch": 0.76, + "grad_norm": 0.742133378982544, + "learning_rate": 2.700367863427662e-06, + "loss": 2.0333, + "step": 22977 + }, + { + "epoch": 0.76, + "grad_norm": 0.7377882599830627, + "learning_rate": 2.6996414317583787e-06, + "loss": 1.9656, + "step": 22978 + }, + { + "epoch": 0.76, + "grad_norm": 0.7440063953399658, + "learning_rate": 2.6989150825642717e-06, + "loss": 2.0339, + "step": 22979 + }, + { + "epoch": 0.76, + "grad_norm": 0.7440783381462097, + "learning_rate": 2.698188815853542e-06, + "loss": 2.1122, + "step": 22980 + }, + { + "epoch": 0.76, + "grad_norm": 0.7194262742996216, + "learning_rate": 2.6974626316343935e-06, + "loss": 2.0558, + "step": 22981 + }, + { + "epoch": 0.76, + "grad_norm": 0.75040203332901, + "learning_rate": 2.696736529915036e-06, + "loss": 2.1216, + "step": 22982 + }, + { + "epoch": 0.76, + "grad_norm": 0.7275949120521545, + "learning_rate": 2.696010510703665e-06, + "loss": 2.0357, + "step": 22983 + }, + { + "epoch": 0.76, + "grad_norm": 0.746261715888977, + "learning_rate": 2.6952845740084877e-06, + "loss": 2.0502, + "step": 22984 + }, + { + "epoch": 0.76, + "grad_norm": 0.7711354494094849, + "learning_rate": 2.6945587198377087e-06, + "loss": 2.0533, + "step": 22985 + }, + { + "epoch": 0.76, + "grad_norm": 0.7380674481391907, + "learning_rate": 2.6938329481995195e-06, + "loss": 1.9954, + "step": 22986 + }, + { + "epoch": 0.76, + "grad_norm": 0.723931074142456, + "learning_rate": 2.6931072591021237e-06, + "loss": 2.0349, + "step": 22987 + }, + { + "epoch": 0.76, + "grad_norm": 0.7307219505310059, + "learning_rate": 2.6923816525537217e-06, + "loss": 2.0, + "step": 22988 + }, + { + "epoch": 0.76, + "grad_norm": 0.7910880446434021, + "learning_rate": 2.6916561285625096e-06, + "loss": 2.0405, + "step": 22989 + }, + { + "epoch": 0.76, + "grad_norm": 0.7574506402015686, + "learning_rate": 2.6909306871366814e-06, + "loss": 2.018, + "step": 22990 + }, + { + "epoch": 0.76, + "grad_norm": 0.7464982867240906, + "learning_rate": 2.6902053282844366e-06, + "loss": 2.045, + "step": 22991 + }, + { + "epoch": 0.76, + "grad_norm": 0.7294722199440002, + "learning_rate": 2.6894800520139653e-06, + "loss": 1.964, + "step": 22992 + }, + { + "epoch": 0.76, + "grad_norm": 0.7535343766212463, + "learning_rate": 2.6887548583334666e-06, + "loss": 2.0889, + "step": 22993 + }, + { + "epoch": 0.77, + "grad_norm": 0.7569416165351868, + "learning_rate": 2.6880297472511287e-06, + "loss": 1.9707, + "step": 22994 + }, + { + "epoch": 0.77, + "grad_norm": 0.7502040863037109, + "learning_rate": 2.687304718775148e-06, + "loss": 2.0631, + "step": 22995 + }, + { + "epoch": 0.77, + "grad_norm": 0.7440193295478821, + "learning_rate": 2.68657977291371e-06, + "loss": 2.0486, + "step": 22996 + }, + { + "epoch": 0.77, + "grad_norm": 0.7516134977340698, + "learning_rate": 2.685854909675011e-06, + "loss": 1.9759, + "step": 22997 + }, + { + "epoch": 0.77, + "grad_norm": 0.7502976655960083, + "learning_rate": 2.685130129067236e-06, + "loss": 1.9725, + "step": 22998 + }, + { + "epoch": 0.77, + "grad_norm": 0.7361271977424622, + "learning_rate": 2.6844054310985713e-06, + "loss": 2.0414, + "step": 22999 + }, + { + "epoch": 0.77, + "grad_norm": 0.7320762276649475, + "learning_rate": 2.6836808157772055e-06, + "loss": 2.0866, + "step": 23000 + }, + { + "epoch": 0.77, + "grad_norm": 0.7951169013977051, + "learning_rate": 2.682956283111331e-06, + "loss": 2.0989, + "step": 23001 + }, + { + "epoch": 0.77, + "grad_norm": 0.7393758296966553, + "learning_rate": 2.6822318331091267e-06, + "loss": 1.9684, + "step": 23002 + }, + { + "epoch": 0.77, + "grad_norm": 0.7511451244354248, + "learning_rate": 2.6815074657787764e-06, + "loss": 2.0832, + "step": 23003 + }, + { + "epoch": 0.77, + "grad_norm": 0.7790981531143188, + "learning_rate": 2.680783181128468e-06, + "loss": 2.0408, + "step": 23004 + }, + { + "epoch": 0.77, + "grad_norm": 0.7416275143623352, + "learning_rate": 2.680058979166379e-06, + "loss": 2.0911, + "step": 23005 + }, + { + "epoch": 0.77, + "grad_norm": 0.7401715517044067, + "learning_rate": 2.679334859900694e-06, + "loss": 1.9541, + "step": 23006 + }, + { + "epoch": 0.77, + "grad_norm": 0.7430713176727295, + "learning_rate": 2.6786108233395993e-06, + "loss": 2.0818, + "step": 23007 + }, + { + "epoch": 0.77, + "grad_norm": 0.7540525794029236, + "learning_rate": 2.677886869491263e-06, + "loss": 1.9796, + "step": 23008 + }, + { + "epoch": 0.77, + "grad_norm": 0.741267204284668, + "learning_rate": 2.677162998363869e-06, + "loss": 2.0354, + "step": 23009 + }, + { + "epoch": 0.77, + "grad_norm": 0.7692165374755859, + "learning_rate": 2.6764392099656e-06, + "loss": 2.0401, + "step": 23010 + }, + { + "epoch": 0.77, + "grad_norm": 0.7353464961051941, + "learning_rate": 2.6757155043046278e-06, + "loss": 2.0227, + "step": 23011 + }, + { + "epoch": 0.77, + "grad_norm": 0.7519355416297913, + "learning_rate": 2.6749918813891264e-06, + "loss": 2.073, + "step": 23012 + }, + { + "epoch": 0.77, + "grad_norm": 0.7517755627632141, + "learning_rate": 2.6742683412272774e-06, + "loss": 2.0869, + "step": 23013 + }, + { + "epoch": 0.77, + "grad_norm": 0.7612603306770325, + "learning_rate": 2.673544883827248e-06, + "loss": 2.0571, + "step": 23014 + }, + { + "epoch": 0.77, + "grad_norm": 0.7214151620864868, + "learning_rate": 2.6728215091972143e-06, + "loss": 2.0909, + "step": 23015 + }, + { + "epoch": 0.77, + "grad_norm": 0.7723817825317383, + "learning_rate": 2.6720982173453523e-06, + "loss": 2.1102, + "step": 23016 + }, + { + "epoch": 0.77, + "grad_norm": 0.7285270094871521, + "learning_rate": 2.671375008279831e-06, + "loss": 2.0992, + "step": 23017 + }, + { + "epoch": 0.77, + "grad_norm": 0.7769633531570435, + "learning_rate": 2.6706518820088158e-06, + "loss": 2.0876, + "step": 23018 + }, + { + "epoch": 0.77, + "grad_norm": 0.7566593289375305, + "learning_rate": 2.6699288385404844e-06, + "loss": 2.0711, + "step": 23019 + }, + { + "epoch": 0.77, + "grad_norm": 0.7480419874191284, + "learning_rate": 2.669205877883e-06, + "loss": 2.0122, + "step": 23020 + }, + { + "epoch": 0.77, + "grad_norm": 0.7765623927116394, + "learning_rate": 2.668483000044528e-06, + "loss": 2.1112, + "step": 23021 + }, + { + "epoch": 0.77, + "grad_norm": 0.7176385521888733, + "learning_rate": 2.6677602050332398e-06, + "loss": 1.9653, + "step": 23022 + }, + { + "epoch": 0.77, + "grad_norm": 0.7475897073745728, + "learning_rate": 2.6670374928573016e-06, + "loss": 2.0737, + "step": 23023 + }, + { + "epoch": 0.77, + "grad_norm": 0.7404859066009521, + "learning_rate": 2.666314863524877e-06, + "loss": 2.087, + "step": 23024 + }, + { + "epoch": 0.77, + "grad_norm": 0.7478281259536743, + "learning_rate": 2.6655923170441257e-06, + "loss": 2.0862, + "step": 23025 + }, + { + "epoch": 0.77, + "grad_norm": 0.7813414335250854, + "learning_rate": 2.6648698534232165e-06, + "loss": 2.0224, + "step": 23026 + }, + { + "epoch": 0.77, + "grad_norm": 0.7516582608222961, + "learning_rate": 2.6641474726703066e-06, + "loss": 2.0702, + "step": 23027 + }, + { + "epoch": 0.77, + "grad_norm": 0.7650073170661926, + "learning_rate": 2.663425174793559e-06, + "loss": 2.0845, + "step": 23028 + }, + { + "epoch": 0.77, + "grad_norm": 0.7453831434249878, + "learning_rate": 2.66270295980114e-06, + "loss": 1.9919, + "step": 23029 + }, + { + "epoch": 0.77, + "grad_norm": 0.740872859954834, + "learning_rate": 2.6619808277011973e-06, + "loss": 1.9525, + "step": 23030 + }, + { + "epoch": 0.77, + "grad_norm": 0.7351100444793701, + "learning_rate": 2.661258778501895e-06, + "loss": 2.0855, + "step": 23031 + }, + { + "epoch": 0.77, + "grad_norm": 0.7596026659011841, + "learning_rate": 2.6605368122113926e-06, + "loss": 2.0735, + "step": 23032 + }, + { + "epoch": 0.77, + "grad_norm": 0.7400634288787842, + "learning_rate": 2.6598149288378438e-06, + "loss": 2.0669, + "step": 23033 + }, + { + "epoch": 0.77, + "grad_norm": 0.7812380790710449, + "learning_rate": 2.6590931283894015e-06, + "loss": 1.9935, + "step": 23034 + }, + { + "epoch": 0.77, + "grad_norm": 0.7489427328109741, + "learning_rate": 2.658371410874222e-06, + "loss": 2.0787, + "step": 23035 + }, + { + "epoch": 0.77, + "grad_norm": 0.7496021389961243, + "learning_rate": 2.6576497763004637e-06, + "loss": 2.0195, + "step": 23036 + }, + { + "epoch": 0.77, + "grad_norm": 0.7212013006210327, + "learning_rate": 2.6569282246762718e-06, + "loss": 2.0358, + "step": 23037 + }, + { + "epoch": 0.77, + "grad_norm": 0.7791311144828796, + "learning_rate": 2.656206756009805e-06, + "loss": 2.0089, + "step": 23038 + }, + { + "epoch": 0.77, + "grad_norm": 0.7476695775985718, + "learning_rate": 2.6554853703092097e-06, + "loss": 2.0907, + "step": 23039 + }, + { + "epoch": 0.77, + "grad_norm": 0.7322556972503662, + "learning_rate": 2.6547640675826335e-06, + "loss": 2.0, + "step": 23040 + }, + { + "epoch": 0.77, + "grad_norm": 0.7558596134185791, + "learning_rate": 2.654042847838227e-06, + "loss": 2.0803, + "step": 23041 + }, + { + "epoch": 0.77, + "grad_norm": 0.7448135018348694, + "learning_rate": 2.653321711084147e-06, + "loss": 2.0489, + "step": 23042 + }, + { + "epoch": 0.77, + "grad_norm": 0.7571619153022766, + "learning_rate": 2.6526006573285268e-06, + "loss": 2.1088, + "step": 23043 + }, + { + "epoch": 0.77, + "grad_norm": 0.7577088475227356, + "learning_rate": 2.6518796865795173e-06, + "loss": 2.1233, + "step": 23044 + }, + { + "epoch": 0.77, + "grad_norm": 0.7685409188270569, + "learning_rate": 2.651158798845268e-06, + "loss": 2.091, + "step": 23045 + }, + { + "epoch": 0.77, + "grad_norm": 0.7400873899459839, + "learning_rate": 2.6504379941339164e-06, + "loss": 2.1133, + "step": 23046 + }, + { + "epoch": 0.77, + "grad_norm": 0.7754441499710083, + "learning_rate": 2.6497172724536126e-06, + "loss": 2.0508, + "step": 23047 + }, + { + "epoch": 0.77, + "grad_norm": 0.7478275299072266, + "learning_rate": 2.648996633812495e-06, + "loss": 2.0911, + "step": 23048 + }, + { + "epoch": 0.77, + "grad_norm": 0.7425262928009033, + "learning_rate": 2.6482760782187034e-06, + "loss": 2.1021, + "step": 23049 + }, + { + "epoch": 0.77, + "grad_norm": 0.8017736077308655, + "learning_rate": 2.6475556056803784e-06, + "loss": 2.1514, + "step": 23050 + }, + { + "epoch": 0.77, + "grad_norm": 0.7769775986671448, + "learning_rate": 2.6468352162056656e-06, + "loss": 2.0786, + "step": 23051 + }, + { + "epoch": 0.77, + "grad_norm": 0.7384634613990784, + "learning_rate": 2.6461149098026985e-06, + "loss": 2.0232, + "step": 23052 + }, + { + "epoch": 0.77, + "grad_norm": 0.7649946212768555, + "learning_rate": 2.645394686479613e-06, + "loss": 2.0206, + "step": 23053 + }, + { + "epoch": 0.77, + "grad_norm": 0.7619380950927734, + "learning_rate": 2.64467454624455e-06, + "loss": 1.9989, + "step": 23054 + }, + { + "epoch": 0.77, + "grad_norm": 0.748152494430542, + "learning_rate": 2.6439544891056445e-06, + "loss": 1.9973, + "step": 23055 + }, + { + "epoch": 0.77, + "grad_norm": 0.7547283172607422, + "learning_rate": 2.6432345150710257e-06, + "loss": 2.0618, + "step": 23056 + }, + { + "epoch": 0.77, + "grad_norm": 0.7643991708755493, + "learning_rate": 2.6425146241488332e-06, + "loss": 2.0876, + "step": 23057 + }, + { + "epoch": 0.77, + "grad_norm": 0.7142226099967957, + "learning_rate": 2.641794816347202e-06, + "loss": 1.9976, + "step": 23058 + }, + { + "epoch": 0.77, + "grad_norm": 0.7705869674682617, + "learning_rate": 2.6410750916742556e-06, + "loss": 2.0575, + "step": 23059 + }, + { + "epoch": 0.77, + "grad_norm": 0.7607940435409546, + "learning_rate": 2.6403554501381347e-06, + "loss": 2.0959, + "step": 23060 + }, + { + "epoch": 0.77, + "grad_norm": 0.7554407119750977, + "learning_rate": 2.6396358917469644e-06, + "loss": 1.969, + "step": 23061 + }, + { + "epoch": 0.77, + "grad_norm": 0.7196580767631531, + "learning_rate": 2.638916416508871e-06, + "loss": 2.0257, + "step": 23062 + }, + { + "epoch": 0.77, + "grad_norm": 0.7893468141555786, + "learning_rate": 2.6381970244319853e-06, + "loss": 2.0831, + "step": 23063 + }, + { + "epoch": 0.77, + "grad_norm": 0.7612413763999939, + "learning_rate": 2.6374777155244425e-06, + "loss": 2.056, + "step": 23064 + }, + { + "epoch": 0.77, + "grad_norm": 0.7462210655212402, + "learning_rate": 2.6367584897943543e-06, + "loss": 2.0002, + "step": 23065 + }, + { + "epoch": 0.77, + "grad_norm": 0.7644414305686951, + "learning_rate": 2.6360393472498548e-06, + "loss": 2.0238, + "step": 23066 + }, + { + "epoch": 0.77, + "grad_norm": 0.728872537612915, + "learning_rate": 2.635320287899069e-06, + "loss": 1.9148, + "step": 23067 + }, + { + "epoch": 0.77, + "grad_norm": 0.7525331377983093, + "learning_rate": 2.634601311750116e-06, + "loss": 2.0431, + "step": 23068 + }, + { + "epoch": 0.77, + "grad_norm": 0.7628155946731567, + "learning_rate": 2.6338824188111233e-06, + "loss": 2.0404, + "step": 23069 + }, + { + "epoch": 0.77, + "grad_norm": 0.7331541776657104, + "learning_rate": 2.6331636090902103e-06, + "loss": 2.0301, + "step": 23070 + }, + { + "epoch": 0.77, + "grad_norm": 0.7288308143615723, + "learning_rate": 2.632444882595494e-06, + "loss": 2.0739, + "step": 23071 + }, + { + "epoch": 0.77, + "grad_norm": 0.7466153502464294, + "learning_rate": 2.6317262393350982e-06, + "loss": 2.0188, + "step": 23072 + }, + { + "epoch": 0.77, + "grad_norm": 0.7413694262504578, + "learning_rate": 2.6310076793171447e-06, + "loss": 2.0118, + "step": 23073 + }, + { + "epoch": 0.77, + "grad_norm": 0.7112367153167725, + "learning_rate": 2.6302892025497473e-06, + "loss": 2.0452, + "step": 23074 + }, + { + "epoch": 0.77, + "grad_norm": 0.7467717528343201, + "learning_rate": 2.6295708090410198e-06, + "loss": 1.9391, + "step": 23075 + }, + { + "epoch": 0.77, + "grad_norm": 0.7682757377624512, + "learning_rate": 2.6288524987990847e-06, + "loss": 2.0164, + "step": 23076 + }, + { + "epoch": 0.77, + "grad_norm": 0.7590654492378235, + "learning_rate": 2.6281342718320525e-06, + "loss": 2.1162, + "step": 23077 + }, + { + "epoch": 0.77, + "grad_norm": 0.7253022789955139, + "learning_rate": 2.6274161281480403e-06, + "loss": 2.0863, + "step": 23078 + }, + { + "epoch": 0.77, + "grad_norm": 0.7303053736686707, + "learning_rate": 2.626698067755158e-06, + "loss": 2.0302, + "step": 23079 + }, + { + "epoch": 0.77, + "grad_norm": 0.7731384634971619, + "learning_rate": 2.625980090661523e-06, + "loss": 2.0783, + "step": 23080 + }, + { + "epoch": 0.77, + "grad_norm": 0.7907983660697937, + "learning_rate": 2.625262196875239e-06, + "loss": 2.0571, + "step": 23081 + }, + { + "epoch": 0.77, + "grad_norm": 0.7206768989562988, + "learning_rate": 2.624544386404425e-06, + "loss": 2.0121, + "step": 23082 + }, + { + "epoch": 0.77, + "grad_norm": 0.7531947493553162, + "learning_rate": 2.623826659257186e-06, + "loss": 2.0543, + "step": 23083 + }, + { + "epoch": 0.77, + "grad_norm": 0.7287341952323914, + "learning_rate": 2.623109015441627e-06, + "loss": 2.0724, + "step": 23084 + }, + { + "epoch": 0.77, + "grad_norm": 0.7168127298355103, + "learning_rate": 2.622391454965859e-06, + "loss": 1.9957, + "step": 23085 + }, + { + "epoch": 0.77, + "grad_norm": 0.7427310347557068, + "learning_rate": 2.6216739778379953e-06, + "loss": 2.1157, + "step": 23086 + }, + { + "epoch": 0.77, + "grad_norm": 0.7418673038482666, + "learning_rate": 2.620956584066128e-06, + "loss": 2.0584, + "step": 23087 + }, + { + "epoch": 0.77, + "grad_norm": 0.74381023645401, + "learning_rate": 2.6202392736583695e-06, + "loss": 2.0962, + "step": 23088 + }, + { + "epoch": 0.77, + "grad_norm": 0.7382112741470337, + "learning_rate": 2.6195220466228244e-06, + "loss": 2.0634, + "step": 23089 + }, + { + "epoch": 0.77, + "grad_norm": 0.7506797909736633, + "learning_rate": 2.618804902967592e-06, + "loss": 2.0018, + "step": 23090 + }, + { + "epoch": 0.77, + "grad_norm": 0.7490646839141846, + "learning_rate": 2.6180878427007793e-06, + "loss": 2.0783, + "step": 23091 + }, + { + "epoch": 0.77, + "grad_norm": 0.7715083360671997, + "learning_rate": 2.617370865830483e-06, + "loss": 2.0635, + "step": 23092 + }, + { + "epoch": 0.77, + "grad_norm": 0.852545440196991, + "learning_rate": 2.616653972364801e-06, + "loss": 2.0281, + "step": 23093 + }, + { + "epoch": 0.77, + "grad_norm": 0.7549353837966919, + "learning_rate": 2.6159371623118357e-06, + "loss": 2.0399, + "step": 23094 + }, + { + "epoch": 0.77, + "grad_norm": 0.7757030129432678, + "learning_rate": 2.6152204356796885e-06, + "loss": 2.0463, + "step": 23095 + }, + { + "epoch": 0.77, + "grad_norm": 0.7799726128578186, + "learning_rate": 2.6145037924764517e-06, + "loss": 2.0482, + "step": 23096 + }, + { + "epoch": 0.77, + "grad_norm": 0.7488247156143188, + "learning_rate": 2.6137872327102207e-06, + "loss": 2.0135, + "step": 23097 + }, + { + "epoch": 0.77, + "grad_norm": 0.7174603939056396, + "learning_rate": 2.6130707563890954e-06, + "loss": 2.0425, + "step": 23098 + }, + { + "epoch": 0.77, + "grad_norm": 0.7451650500297546, + "learning_rate": 2.6123543635211645e-06, + "loss": 2.0429, + "step": 23099 + }, + { + "epoch": 0.77, + "grad_norm": 0.7573601007461548, + "learning_rate": 2.611638054114528e-06, + "loss": 2.0069, + "step": 23100 + }, + { + "epoch": 0.77, + "grad_norm": 0.776407778263092, + "learning_rate": 2.6109218281772707e-06, + "loss": 2.0618, + "step": 23101 + }, + { + "epoch": 0.77, + "grad_norm": 0.7536882758140564, + "learning_rate": 2.6102056857174917e-06, + "loss": 2.0988, + "step": 23102 + }, + { + "epoch": 0.77, + "grad_norm": 0.7317177057266235, + "learning_rate": 2.6094896267432744e-06, + "loss": 1.9915, + "step": 23103 + }, + { + "epoch": 0.77, + "grad_norm": 0.7501627206802368, + "learning_rate": 2.608773651262716e-06, + "loss": 2.0656, + "step": 23104 + }, + { + "epoch": 0.77, + "grad_norm": 0.7642002701759338, + "learning_rate": 2.6080577592839007e-06, + "loss": 2.0248, + "step": 23105 + }, + { + "epoch": 0.77, + "grad_norm": 0.7600533366203308, + "learning_rate": 2.6073419508149147e-06, + "loss": 2.0362, + "step": 23106 + }, + { + "epoch": 0.77, + "grad_norm": 0.7495854496955872, + "learning_rate": 2.606626225863845e-06, + "loss": 2.0507, + "step": 23107 + }, + { + "epoch": 0.77, + "grad_norm": 0.7516706585884094, + "learning_rate": 2.605910584438783e-06, + "loss": 2.0418, + "step": 23108 + }, + { + "epoch": 0.77, + "grad_norm": 0.7292174696922302, + "learning_rate": 2.605195026547811e-06, + "loss": 2.0389, + "step": 23109 + }, + { + "epoch": 0.77, + "grad_norm": 0.7485889196395874, + "learning_rate": 2.6044795521990076e-06, + "loss": 2.1145, + "step": 23110 + }, + { + "epoch": 0.77, + "grad_norm": 0.7385496497154236, + "learning_rate": 2.603764161400464e-06, + "loss": 2.0826, + "step": 23111 + }, + { + "epoch": 0.77, + "grad_norm": 0.74369877576828, + "learning_rate": 2.603048854160254e-06, + "loss": 2.077, + "step": 23112 + }, + { + "epoch": 0.77, + "grad_norm": 0.7472503781318665, + "learning_rate": 2.6023336304864666e-06, + "loss": 2.0773, + "step": 23113 + }, + { + "epoch": 0.77, + "grad_norm": 0.7360715866088867, + "learning_rate": 2.601618490387179e-06, + "loss": 1.9985, + "step": 23114 + }, + { + "epoch": 0.77, + "grad_norm": 0.7419888377189636, + "learning_rate": 2.6009034338704666e-06, + "loss": 2.0924, + "step": 23115 + }, + { + "epoch": 0.77, + "grad_norm": 0.7567590475082397, + "learning_rate": 2.6001884609444093e-06, + "loss": 2.1087, + "step": 23116 + }, + { + "epoch": 0.77, + "grad_norm": 0.7183083295822144, + "learning_rate": 2.5994735716170904e-06, + "loss": 2.0426, + "step": 23117 + }, + { + "epoch": 0.77, + "grad_norm": 0.7556402087211609, + "learning_rate": 2.5987587658965817e-06, + "loss": 1.9819, + "step": 23118 + }, + { + "epoch": 0.77, + "grad_norm": 0.754566490650177, + "learning_rate": 2.598044043790957e-06, + "loss": 2.0659, + "step": 23119 + }, + { + "epoch": 0.77, + "grad_norm": 0.7535070180892944, + "learning_rate": 2.597329405308294e-06, + "loss": 2.0231, + "step": 23120 + }, + { + "epoch": 0.77, + "grad_norm": 0.7175498008728027, + "learning_rate": 2.5966148504566635e-06, + "loss": 2.0242, + "step": 23121 + }, + { + "epoch": 0.77, + "grad_norm": 0.7413702011108398, + "learning_rate": 2.5959003792441418e-06, + "loss": 2.0853, + "step": 23122 + }, + { + "epoch": 0.77, + "grad_norm": 0.7277328968048096, + "learning_rate": 2.5951859916787947e-06, + "loss": 2.0229, + "step": 23123 + }, + { + "epoch": 0.77, + "grad_norm": 0.7225123643875122, + "learning_rate": 2.5944716877687004e-06, + "loss": 2.0333, + "step": 23124 + }, + { + "epoch": 0.77, + "grad_norm": 0.7711116671562195, + "learning_rate": 2.5937574675219222e-06, + "loss": 1.944, + "step": 23125 + }, + { + "epoch": 0.77, + "grad_norm": 0.7642003297805786, + "learning_rate": 2.593043330946534e-06, + "loss": 2.0519, + "step": 23126 + }, + { + "epoch": 0.77, + "grad_norm": 0.7218152284622192, + "learning_rate": 2.5923292780506016e-06, + "loss": 2.1185, + "step": 23127 + }, + { + "epoch": 0.77, + "grad_norm": 0.7598666548728943, + "learning_rate": 2.591615308842189e-06, + "loss": 2.0577, + "step": 23128 + }, + { + "epoch": 0.77, + "grad_norm": 0.7734375, + "learning_rate": 2.590901423329365e-06, + "loss": 2.0318, + "step": 23129 + }, + { + "epoch": 0.77, + "grad_norm": 0.7248088717460632, + "learning_rate": 2.590187621520197e-06, + "loss": 1.9938, + "step": 23130 + }, + { + "epoch": 0.77, + "grad_norm": 0.737025797367096, + "learning_rate": 2.5894739034227468e-06, + "loss": 2.0121, + "step": 23131 + }, + { + "epoch": 0.77, + "grad_norm": 0.7295243740081787, + "learning_rate": 2.588760269045075e-06, + "loss": 2.0869, + "step": 23132 + }, + { + "epoch": 0.77, + "grad_norm": 0.7744941115379333, + "learning_rate": 2.5880467183952483e-06, + "loss": 2.0768, + "step": 23133 + }, + { + "epoch": 0.77, + "grad_norm": 0.767135739326477, + "learning_rate": 2.587333251481324e-06, + "loss": 2.0954, + "step": 23134 + }, + { + "epoch": 0.77, + "grad_norm": 0.7459158301353455, + "learning_rate": 2.5866198683113664e-06, + "loss": 2.0325, + "step": 23135 + }, + { + "epoch": 0.77, + "grad_norm": 0.718892514705658, + "learning_rate": 2.5859065688934302e-06, + "loss": 2.0098, + "step": 23136 + }, + { + "epoch": 0.77, + "grad_norm": 0.7337863445281982, + "learning_rate": 2.58519335323558e-06, + "loss": 2.0431, + "step": 23137 + }, + { + "epoch": 0.77, + "grad_norm": 0.741270899772644, + "learning_rate": 2.5844802213458666e-06, + "loss": 2.1027, + "step": 23138 + }, + { + "epoch": 0.77, + "grad_norm": 0.750564694404602, + "learning_rate": 2.583767173232352e-06, + "loss": 2.0825, + "step": 23139 + }, + { + "epoch": 0.77, + "grad_norm": 0.7308881282806396, + "learning_rate": 2.5830542089030906e-06, + "loss": 2.0268, + "step": 23140 + }, + { + "epoch": 0.77, + "grad_norm": 0.7434291839599609, + "learning_rate": 2.5823413283661323e-06, + "loss": 2.0363, + "step": 23141 + }, + { + "epoch": 0.77, + "grad_norm": 0.727092444896698, + "learning_rate": 2.581628531629534e-06, + "loss": 2.0588, + "step": 23142 + }, + { + "epoch": 0.77, + "grad_norm": 0.7616970539093018, + "learning_rate": 2.5809158187013527e-06, + "loss": 1.9752, + "step": 23143 + }, + { + "epoch": 0.77, + "grad_norm": 0.7606179118156433, + "learning_rate": 2.580203189589636e-06, + "loss": 2.1009, + "step": 23144 + }, + { + "epoch": 0.77, + "grad_norm": 0.7640833258628845, + "learning_rate": 2.5794906443024335e-06, + "loss": 2.0505, + "step": 23145 + }, + { + "epoch": 0.77, + "grad_norm": 0.7441621422767639, + "learning_rate": 2.5787781828477987e-06, + "loss": 2.0578, + "step": 23146 + }, + { + "epoch": 0.77, + "grad_norm": 0.7598081231117249, + "learning_rate": 2.578065805233776e-06, + "loss": 2.057, + "step": 23147 + }, + { + "epoch": 0.77, + "grad_norm": 0.7681798934936523, + "learning_rate": 2.577353511468419e-06, + "loss": 2.0535, + "step": 23148 + }, + { + "epoch": 0.77, + "grad_norm": 0.7360518574714661, + "learning_rate": 2.5766413015597726e-06, + "loss": 2.1291, + "step": 23149 + }, + { + "epoch": 0.77, + "grad_norm": 0.7576141953468323, + "learning_rate": 2.575929175515879e-06, + "loss": 2.0669, + "step": 23150 + }, + { + "epoch": 0.77, + "grad_norm": 0.7417619824409485, + "learning_rate": 2.575217133344786e-06, + "loss": 2.0052, + "step": 23151 + }, + { + "epoch": 0.77, + "grad_norm": 0.7638119459152222, + "learning_rate": 2.574505175054541e-06, + "loss": 2.014, + "step": 23152 + }, + { + "epoch": 0.77, + "grad_norm": 0.762856125831604, + "learning_rate": 2.5737933006531866e-06, + "loss": 2.0625, + "step": 23153 + }, + { + "epoch": 0.77, + "grad_norm": 0.7657785415649414, + "learning_rate": 2.5730815101487593e-06, + "loss": 2.0657, + "step": 23154 + }, + { + "epoch": 0.77, + "grad_norm": 0.7676966190338135, + "learning_rate": 2.572369803549307e-06, + "loss": 2.0354, + "step": 23155 + }, + { + "epoch": 0.77, + "grad_norm": 0.7719388604164124, + "learning_rate": 2.571658180862865e-06, + "loss": 2.0395, + "step": 23156 + }, + { + "epoch": 0.77, + "grad_norm": 0.7784674763679504, + "learning_rate": 2.5709466420974793e-06, + "loss": 2.0898, + "step": 23157 + }, + { + "epoch": 0.77, + "grad_norm": 0.7600753903388977, + "learning_rate": 2.5702351872611807e-06, + "loss": 2.0087, + "step": 23158 + }, + { + "epoch": 0.77, + "grad_norm": 0.7516697645187378, + "learning_rate": 2.569523816362014e-06, + "loss": 2.0531, + "step": 23159 + }, + { + "epoch": 0.77, + "grad_norm": 0.7239949107170105, + "learning_rate": 2.568812529408009e-06, + "loss": 2.0799, + "step": 23160 + }, + { + "epoch": 0.77, + "grad_norm": 0.7942061424255371, + "learning_rate": 2.5681013264072085e-06, + "loss": 2.0776, + "step": 23161 + }, + { + "epoch": 0.77, + "grad_norm": 0.7459508776664734, + "learning_rate": 2.567390207367644e-06, + "loss": 2.0292, + "step": 23162 + }, + { + "epoch": 0.77, + "grad_norm": 0.7878516912460327, + "learning_rate": 2.566679172297345e-06, + "loss": 2.0882, + "step": 23163 + }, + { + "epoch": 0.77, + "grad_norm": 0.7609124779701233, + "learning_rate": 2.565968221204349e-06, + "loss": 2.1078, + "step": 23164 + }, + { + "epoch": 0.77, + "grad_norm": 0.7543255686759949, + "learning_rate": 2.5652573540966896e-06, + "loss": 1.9891, + "step": 23165 + }, + { + "epoch": 0.77, + "grad_norm": 0.7049428820610046, + "learning_rate": 2.5645465709823968e-06, + "loss": 2.0339, + "step": 23166 + }, + { + "epoch": 0.77, + "grad_norm": 0.7621498703956604, + "learning_rate": 2.5638358718694955e-06, + "loss": 2.1192, + "step": 23167 + }, + { + "epoch": 0.77, + "grad_norm": 0.7518184185028076, + "learning_rate": 2.5631252567660212e-06, + "loss": 2.07, + "step": 23168 + }, + { + "epoch": 0.77, + "grad_norm": 0.7573378682136536, + "learning_rate": 2.562414725679997e-06, + "loss": 2.0919, + "step": 23169 + }, + { + "epoch": 0.77, + "grad_norm": 0.7513402104377747, + "learning_rate": 2.5617042786194547e-06, + "loss": 1.9786, + "step": 23170 + }, + { + "epoch": 0.77, + "grad_norm": 0.7442273497581482, + "learning_rate": 2.560993915592418e-06, + "loss": 1.9995, + "step": 23171 + }, + { + "epoch": 0.77, + "grad_norm": 0.7423152327537537, + "learning_rate": 2.5602836366069095e-06, + "loss": 2.0797, + "step": 23172 + }, + { + "epoch": 0.77, + "grad_norm": 0.7709124684333801, + "learning_rate": 2.5595734416709574e-06, + "loss": 2.0684, + "step": 23173 + }, + { + "epoch": 0.77, + "grad_norm": 0.7575253844261169, + "learning_rate": 2.558863330792586e-06, + "loss": 2.0416, + "step": 23174 + }, + { + "epoch": 0.77, + "grad_norm": 0.720293402671814, + "learning_rate": 2.5581533039798156e-06, + "loss": 1.9962, + "step": 23175 + }, + { + "epoch": 0.77, + "grad_norm": 0.7539215683937073, + "learning_rate": 2.5574433612406657e-06, + "loss": 2.0716, + "step": 23176 + }, + { + "epoch": 0.77, + "grad_norm": 0.7254588603973389, + "learning_rate": 2.556733502583161e-06, + "loss": 2.0819, + "step": 23177 + }, + { + "epoch": 0.77, + "grad_norm": 0.748740553855896, + "learning_rate": 2.5560237280153167e-06, + "loss": 2.044, + "step": 23178 + }, + { + "epoch": 0.77, + "grad_norm": 0.7665413618087769, + "learning_rate": 2.5553140375451567e-06, + "loss": 2.0638, + "step": 23179 + }, + { + "epoch": 0.77, + "grad_norm": 0.7500876784324646, + "learning_rate": 2.5546044311806926e-06, + "loss": 2.0746, + "step": 23180 + }, + { + "epoch": 0.77, + "grad_norm": 0.7474533319473267, + "learning_rate": 2.553894908929947e-06, + "loss": 1.9895, + "step": 23181 + }, + { + "epoch": 0.77, + "grad_norm": 0.742586076259613, + "learning_rate": 2.5531854708009298e-06, + "loss": 2.0376, + "step": 23182 + }, + { + "epoch": 0.77, + "grad_norm": 0.7254591584205627, + "learning_rate": 2.552476116801662e-06, + "loss": 2.0167, + "step": 23183 + }, + { + "epoch": 0.77, + "grad_norm": 0.7116720080375671, + "learning_rate": 2.5517668469401546e-06, + "loss": 2.0238, + "step": 23184 + }, + { + "epoch": 0.77, + "grad_norm": 0.776931881904602, + "learning_rate": 2.5510576612244164e-06, + "loss": 2.0222, + "step": 23185 + }, + { + "epoch": 0.77, + "grad_norm": 0.7377454042434692, + "learning_rate": 2.5503485596624645e-06, + "loss": 2.0903, + "step": 23186 + }, + { + "epoch": 0.77, + "grad_norm": 0.7658206820487976, + "learning_rate": 2.549639542262311e-06, + "loss": 2.0556, + "step": 23187 + }, + { + "epoch": 0.77, + "grad_norm": 0.7470245361328125, + "learning_rate": 2.548930609031963e-06, + "loss": 2.0382, + "step": 23188 + }, + { + "epoch": 0.77, + "grad_norm": 0.7469624876976013, + "learning_rate": 2.548221759979429e-06, + "loss": 2.0723, + "step": 23189 + }, + { + "epoch": 0.77, + "grad_norm": 0.7347672581672668, + "learning_rate": 2.5475129951127197e-06, + "loss": 1.9887, + "step": 23190 + }, + { + "epoch": 0.77, + "grad_norm": 0.734441876411438, + "learning_rate": 2.546804314439839e-06, + "loss": 2.0273, + "step": 23191 + }, + { + "epoch": 0.77, + "grad_norm": 0.7524778246879578, + "learning_rate": 2.546095717968795e-06, + "loss": 2.0643, + "step": 23192 + }, + { + "epoch": 0.77, + "grad_norm": 0.754393994808197, + "learning_rate": 2.545387205707599e-06, + "loss": 2.0395, + "step": 23193 + }, + { + "epoch": 0.77, + "grad_norm": 0.7166599631309509, + "learning_rate": 2.5446787776642436e-06, + "loss": 2.0141, + "step": 23194 + }, + { + "epoch": 0.77, + "grad_norm": 0.7594064474105835, + "learning_rate": 2.5439704338467377e-06, + "loss": 2.0152, + "step": 23195 + }, + { + "epoch": 0.77, + "grad_norm": 0.7374521493911743, + "learning_rate": 2.543262174263087e-06, + "loss": 2.0053, + "step": 23196 + }, + { + "epoch": 0.77, + "grad_norm": 0.7470736503601074, + "learning_rate": 2.5425539989212913e-06, + "loss": 2.0334, + "step": 23197 + }, + { + "epoch": 0.77, + "grad_norm": 0.7143168449401855, + "learning_rate": 2.5418459078293458e-06, + "loss": 2.0485, + "step": 23198 + }, + { + "epoch": 0.77, + "grad_norm": 0.760317862033844, + "learning_rate": 2.5411379009952573e-06, + "loss": 2.0388, + "step": 23199 + }, + { + "epoch": 0.77, + "grad_norm": 0.7659141421318054, + "learning_rate": 2.5404299784270193e-06, + "loss": 2.0568, + "step": 23200 + }, + { + "epoch": 0.77, + "grad_norm": 0.757943868637085, + "learning_rate": 2.539722140132634e-06, + "loss": 2.0628, + "step": 23201 + }, + { + "epoch": 0.77, + "grad_norm": 0.7608553767204285, + "learning_rate": 2.5390143861200932e-06, + "loss": 2.0473, + "step": 23202 + }, + { + "epoch": 0.77, + "grad_norm": 0.7445995211601257, + "learning_rate": 2.5383067163973983e-06, + "loss": 2.0883, + "step": 23203 + }, + { + "epoch": 0.77, + "grad_norm": 0.7729456424713135, + "learning_rate": 2.5375991309725388e-06, + "loss": 2.0178, + "step": 23204 + }, + { + "epoch": 0.77, + "grad_norm": 0.7609541416168213, + "learning_rate": 2.536891629853513e-06, + "loss": 1.94, + "step": 23205 + }, + { + "epoch": 0.77, + "grad_norm": 0.7459778189659119, + "learning_rate": 2.5361842130483116e-06, + "loss": 1.9653, + "step": 23206 + }, + { + "epoch": 0.77, + "grad_norm": 0.744012713432312, + "learning_rate": 2.5354768805649245e-06, + "loss": 1.9696, + "step": 23207 + }, + { + "epoch": 0.77, + "grad_norm": 0.7421319484710693, + "learning_rate": 2.534769632411345e-06, + "loss": 2.0124, + "step": 23208 + }, + { + "epoch": 0.77, + "grad_norm": 0.7398131489753723, + "learning_rate": 2.534062468595565e-06, + "loss": 2.0859, + "step": 23209 + }, + { + "epoch": 0.77, + "grad_norm": 0.7619217038154602, + "learning_rate": 2.5333553891255722e-06, + "loss": 2.1159, + "step": 23210 + }, + { + "epoch": 0.77, + "grad_norm": 0.7545625567436218, + "learning_rate": 2.5326483940093526e-06, + "loss": 2.0154, + "step": 23211 + }, + { + "epoch": 0.77, + "grad_norm": 0.7888229489326477, + "learning_rate": 2.5319414832548973e-06, + "loss": 2.0269, + "step": 23212 + }, + { + "epoch": 0.77, + "grad_norm": 0.7586806416511536, + "learning_rate": 2.5312346568701874e-06, + "loss": 2.031, + "step": 23213 + }, + { + "epoch": 0.77, + "grad_norm": 0.7270857691764832, + "learning_rate": 2.5305279148632113e-06, + "loss": 2.0826, + "step": 23214 + }, + { + "epoch": 0.77, + "grad_norm": 0.7326899170875549, + "learning_rate": 2.529821257241959e-06, + "loss": 2.0701, + "step": 23215 + }, + { + "epoch": 0.77, + "grad_norm": 0.7331536412239075, + "learning_rate": 2.529114684014402e-06, + "loss": 2.0617, + "step": 23216 + }, + { + "epoch": 0.77, + "grad_norm": 0.748709499835968, + "learning_rate": 2.5284081951885288e-06, + "loss": 2.0316, + "step": 23217 + }, + { + "epoch": 0.77, + "grad_norm": 0.7637634873390198, + "learning_rate": 2.5277017907723245e-06, + "loss": 2.0473, + "step": 23218 + }, + { + "epoch": 0.77, + "grad_norm": 0.7509251832962036, + "learning_rate": 2.5269954707737667e-06, + "loss": 2.0162, + "step": 23219 + }, + { + "epoch": 0.77, + "grad_norm": 0.7376219034194946, + "learning_rate": 2.5262892352008305e-06, + "loss": 1.996, + "step": 23220 + }, + { + "epoch": 0.77, + "grad_norm": 0.7695110440254211, + "learning_rate": 2.5255830840615014e-06, + "loss": 2.051, + "step": 23221 + }, + { + "epoch": 0.77, + "grad_norm": 0.7476853728294373, + "learning_rate": 2.5248770173637516e-06, + "loss": 2.0835, + "step": 23222 + }, + { + "epoch": 0.77, + "grad_norm": 0.7460974454879761, + "learning_rate": 2.524171035115561e-06, + "loss": 2.0642, + "step": 23223 + }, + { + "epoch": 0.77, + "grad_norm": 0.7163519263267517, + "learning_rate": 2.5234651373249076e-06, + "loss": 2.0019, + "step": 23224 + }, + { + "epoch": 0.77, + "grad_norm": 0.7410582304000854, + "learning_rate": 2.522759323999763e-06, + "loss": 2.0135, + "step": 23225 + }, + { + "epoch": 0.77, + "grad_norm": 0.7593340277671814, + "learning_rate": 2.5220535951480985e-06, + "loss": 2.0287, + "step": 23226 + }, + { + "epoch": 0.77, + "grad_norm": 0.746727705001831, + "learning_rate": 2.521347950777889e-06, + "loss": 2.114, + "step": 23227 + }, + { + "epoch": 0.77, + "grad_norm": 0.7773048281669617, + "learning_rate": 2.5206423908971145e-06, + "loss": 2.0817, + "step": 23228 + }, + { + "epoch": 0.77, + "grad_norm": 0.7414720058441162, + "learning_rate": 2.519936915513733e-06, + "loss": 2.0001, + "step": 23229 + }, + { + "epoch": 0.77, + "grad_norm": 0.7342730760574341, + "learning_rate": 2.51923152463572e-06, + "loss": 2.0192, + "step": 23230 + }, + { + "epoch": 0.77, + "grad_norm": 0.7706368565559387, + "learning_rate": 2.518526218271049e-06, + "loss": 2.016, + "step": 23231 + }, + { + "epoch": 0.77, + "grad_norm": 0.7652897834777832, + "learning_rate": 2.5178209964276832e-06, + "loss": 2.0796, + "step": 23232 + }, + { + "epoch": 0.77, + "grad_norm": 0.7546705603599548, + "learning_rate": 2.517115859113588e-06, + "loss": 2.0078, + "step": 23233 + }, + { + "epoch": 0.77, + "grad_norm": 0.7734571695327759, + "learning_rate": 2.5164108063367356e-06, + "loss": 2.0088, + "step": 23234 + }, + { + "epoch": 0.77, + "grad_norm": 0.735771894454956, + "learning_rate": 2.515705838105086e-06, + "loss": 2.0191, + "step": 23235 + }, + { + "epoch": 0.77, + "grad_norm": 0.7152687311172485, + "learning_rate": 2.5150009544266043e-06, + "loss": 2.031, + "step": 23236 + }, + { + "epoch": 0.77, + "grad_norm": 0.7341321706771851, + "learning_rate": 2.5142961553092614e-06, + "loss": 2.0122, + "step": 23237 + }, + { + "epoch": 0.77, + "grad_norm": 0.7482951283454895, + "learning_rate": 2.5135914407610073e-06, + "loss": 2.0583, + "step": 23238 + }, + { + "epoch": 0.77, + "grad_norm": 0.76332688331604, + "learning_rate": 2.5128868107898107e-06, + "loss": 2.0034, + "step": 23239 + }, + { + "epoch": 0.77, + "grad_norm": 0.7647858262062073, + "learning_rate": 2.512182265403633e-06, + "loss": 2.1061, + "step": 23240 + }, + { + "epoch": 0.77, + "grad_norm": 0.7258380055427551, + "learning_rate": 2.5114778046104325e-06, + "loss": 2.0308, + "step": 23241 + }, + { + "epoch": 0.77, + "grad_norm": 0.747856080532074, + "learning_rate": 2.510773428418164e-06, + "loss": 2.0873, + "step": 23242 + }, + { + "epoch": 0.77, + "grad_norm": 0.7488059401512146, + "learning_rate": 2.5100691368347876e-06, + "loss": 2.1389, + "step": 23243 + }, + { + "epoch": 0.77, + "grad_norm": 0.7324599623680115, + "learning_rate": 2.509364929868263e-06, + "loss": 2.0919, + "step": 23244 + }, + { + "epoch": 0.77, + "grad_norm": 0.7501180171966553, + "learning_rate": 2.5086608075265415e-06, + "loss": 2.059, + "step": 23245 + }, + { + "epoch": 0.77, + "grad_norm": 0.7471052408218384, + "learning_rate": 2.5079567698175835e-06, + "loss": 1.9456, + "step": 23246 + }, + { + "epoch": 0.77, + "grad_norm": 0.7212860584259033, + "learning_rate": 2.5072528167493383e-06, + "loss": 2.0323, + "step": 23247 + }, + { + "epoch": 0.77, + "grad_norm": 0.7633925080299377, + "learning_rate": 2.5065489483297556e-06, + "loss": 2.0374, + "step": 23248 + }, + { + "epoch": 0.77, + "grad_norm": 0.7668169736862183, + "learning_rate": 2.5058451645667927e-06, + "loss": 2.0166, + "step": 23249 + }, + { + "epoch": 0.77, + "grad_norm": 0.7470428943634033, + "learning_rate": 2.505141465468405e-06, + "loss": 2.0623, + "step": 23250 + }, + { + "epoch": 0.77, + "grad_norm": 0.7245231866836548, + "learning_rate": 2.5044378510425303e-06, + "loss": 2.0282, + "step": 23251 + }, + { + "epoch": 0.77, + "grad_norm": 0.7383379340171814, + "learning_rate": 2.5037343212971232e-06, + "loss": 2.012, + "step": 23252 + }, + { + "epoch": 0.77, + "grad_norm": 0.7377979159355164, + "learning_rate": 2.5030308762401366e-06, + "loss": 2.0409, + "step": 23253 + }, + { + "epoch": 0.77, + "grad_norm": 0.7442428469657898, + "learning_rate": 2.50232751587951e-06, + "loss": 2.0598, + "step": 23254 + }, + { + "epoch": 0.77, + "grad_norm": 0.7664313316345215, + "learning_rate": 2.501624240223196e-06, + "loss": 1.959, + "step": 23255 + }, + { + "epoch": 0.77, + "grad_norm": 0.7458236813545227, + "learning_rate": 2.500921049279137e-06, + "loss": 2.0385, + "step": 23256 + }, + { + "epoch": 0.77, + "grad_norm": 0.7332466244697571, + "learning_rate": 2.500217943055274e-06, + "loss": 2.0324, + "step": 23257 + }, + { + "epoch": 0.77, + "grad_norm": 0.7495574355125427, + "learning_rate": 2.499514921559554e-06, + "loss": 2.0457, + "step": 23258 + }, + { + "epoch": 0.77, + "grad_norm": 0.7925146818161011, + "learning_rate": 2.4988119847999214e-06, + "loss": 2.1001, + "step": 23259 + }, + { + "epoch": 0.77, + "grad_norm": 0.7526647448539734, + "learning_rate": 2.4981091327843143e-06, + "loss": 1.9662, + "step": 23260 + }, + { + "epoch": 0.77, + "grad_norm": 0.7690455913543701, + "learning_rate": 2.4974063655206717e-06, + "loss": 2.0489, + "step": 23261 + }, + { + "epoch": 0.77, + "grad_norm": 0.7405413389205933, + "learning_rate": 2.4967036830169365e-06, + "loss": 2.0072, + "step": 23262 + }, + { + "epoch": 0.77, + "grad_norm": 0.7440643906593323, + "learning_rate": 2.4960010852810467e-06, + "loss": 2.0227, + "step": 23263 + }, + { + "epoch": 0.77, + "grad_norm": 0.726311206817627, + "learning_rate": 2.4952985723209365e-06, + "loss": 2.0435, + "step": 23264 + }, + { + "epoch": 0.77, + "grad_norm": 0.7640082240104675, + "learning_rate": 2.4945961441445443e-06, + "loss": 2.0244, + "step": 23265 + }, + { + "epoch": 0.77, + "grad_norm": 0.7541269063949585, + "learning_rate": 2.4938938007598092e-06, + "loss": 2.0845, + "step": 23266 + }, + { + "epoch": 0.77, + "grad_norm": 0.7884525060653687, + "learning_rate": 2.4931915421746588e-06, + "loss": 2.1212, + "step": 23267 + }, + { + "epoch": 0.77, + "grad_norm": 0.7179670333862305, + "learning_rate": 2.492489368397035e-06, + "loss": 2.0578, + "step": 23268 + }, + { + "epoch": 0.77, + "grad_norm": 0.7450252175331116, + "learning_rate": 2.4917872794348673e-06, + "loss": 2.0444, + "step": 23269 + }, + { + "epoch": 0.77, + "grad_norm": 0.766559898853302, + "learning_rate": 2.4910852752960823e-06, + "loss": 2.0504, + "step": 23270 + }, + { + "epoch": 0.77, + "grad_norm": 0.7627468109130859, + "learning_rate": 2.490383355988616e-06, + "loss": 2.1085, + "step": 23271 + }, + { + "epoch": 0.77, + "grad_norm": 0.738466203212738, + "learning_rate": 2.489681521520403e-06, + "loss": 2.0616, + "step": 23272 + }, + { + "epoch": 0.77, + "grad_norm": 0.7826035618782043, + "learning_rate": 2.488979771899361e-06, + "loss": 2.0134, + "step": 23273 + }, + { + "epoch": 0.77, + "grad_norm": 0.7297622561454773, + "learning_rate": 2.488278107133424e-06, + "loss": 2.0166, + "step": 23274 + }, + { + "epoch": 0.77, + "grad_norm": 0.7571211457252502, + "learning_rate": 2.4875765272305206e-06, + "loss": 2.0195, + "step": 23275 + }, + { + "epoch": 0.77, + "grad_norm": 0.7165980339050293, + "learning_rate": 2.4868750321985724e-06, + "loss": 1.9945, + "step": 23276 + }, + { + "epoch": 0.77, + "grad_norm": 0.7498269081115723, + "learning_rate": 2.48617362204551e-06, + "loss": 2.0576, + "step": 23277 + }, + { + "epoch": 0.77, + "grad_norm": 0.7377191781997681, + "learning_rate": 2.4854722967792543e-06, + "loss": 2.076, + "step": 23278 + }, + { + "epoch": 0.77, + "grad_norm": 0.7650166153907776, + "learning_rate": 2.4847710564077265e-06, + "loss": 2.0403, + "step": 23279 + }, + { + "epoch": 0.77, + "grad_norm": 0.7682573795318604, + "learning_rate": 2.48406990093885e-06, + "loss": 2.0373, + "step": 23280 + }, + { + "epoch": 0.77, + "grad_norm": 0.7513816356658936, + "learning_rate": 2.4833688303805503e-06, + "loss": 2.033, + "step": 23281 + }, + { + "epoch": 0.77, + "grad_norm": 0.7702515721321106, + "learning_rate": 2.4826678447407436e-06, + "loss": 2.1125, + "step": 23282 + }, + { + "epoch": 0.77, + "grad_norm": 0.7328887581825256, + "learning_rate": 2.4819669440273486e-06, + "loss": 2.009, + "step": 23283 + }, + { + "epoch": 0.77, + "grad_norm": 0.7822847366333008, + "learning_rate": 2.4812661282482876e-06, + "loss": 2.0695, + "step": 23284 + }, + { + "epoch": 0.77, + "grad_norm": 0.7292688488960266, + "learning_rate": 2.480565397411474e-06, + "loss": 2.0433, + "step": 23285 + }, + { + "epoch": 0.77, + "grad_norm": 0.7741066813468933, + "learning_rate": 2.479864751524824e-06, + "loss": 2.0319, + "step": 23286 + }, + { + "epoch": 0.77, + "grad_norm": 0.7526381611824036, + "learning_rate": 2.479164190596255e-06, + "loss": 2.0949, + "step": 23287 + }, + { + "epoch": 0.77, + "grad_norm": 0.7720553278923035, + "learning_rate": 2.4784637146336844e-06, + "loss": 2.0102, + "step": 23288 + }, + { + "epoch": 0.77, + "grad_norm": 0.7315800189971924, + "learning_rate": 2.4777633236450193e-06, + "loss": 2.0366, + "step": 23289 + }, + { + "epoch": 0.77, + "grad_norm": 0.768068253993988, + "learning_rate": 2.4770630176381783e-06, + "loss": 2.1025, + "step": 23290 + }, + { + "epoch": 0.77, + "grad_norm": 0.7632604241371155, + "learning_rate": 2.4763627966210702e-06, + "loss": 2.0998, + "step": 23291 + }, + { + "epoch": 0.77, + "grad_norm": 0.8026905655860901, + "learning_rate": 2.4756626606016042e-06, + "loss": 2.0132, + "step": 23292 + }, + { + "epoch": 0.77, + "grad_norm": 0.7429221868515015, + "learning_rate": 2.474962609587691e-06, + "loss": 2.0537, + "step": 23293 + }, + { + "epoch": 0.77, + "grad_norm": 0.7574822306632996, + "learning_rate": 2.474262643587246e-06, + "loss": 2.023, + "step": 23294 + }, + { + "epoch": 0.78, + "grad_norm": 0.7176968455314636, + "learning_rate": 2.473562762608166e-06, + "loss": 2.0575, + "step": 23295 + }, + { + "epoch": 0.78, + "grad_norm": 0.7388931512832642, + "learning_rate": 2.4728629666583616e-06, + "loss": 1.9961, + "step": 23296 + }, + { + "epoch": 0.78, + "grad_norm": 0.744503915309906, + "learning_rate": 2.4721632557457444e-06, + "loss": 2.0611, + "step": 23297 + }, + { + "epoch": 0.78, + "grad_norm": 0.7026798725128174, + "learning_rate": 2.4714636298782114e-06, + "loss": 2.0184, + "step": 23298 + }, + { + "epoch": 0.78, + "grad_norm": 0.7590958476066589, + "learning_rate": 2.470764089063673e-06, + "loss": 2.0043, + "step": 23299 + }, + { + "epoch": 0.78, + "grad_norm": 0.7769315838813782, + "learning_rate": 2.47006463331003e-06, + "loss": 2.0498, + "step": 23300 + }, + { + "epoch": 0.78, + "grad_norm": 0.7500719428062439, + "learning_rate": 2.4693652626251798e-06, + "loss": 2.036, + "step": 23301 + }, + { + "epoch": 0.78, + "grad_norm": 0.7416940927505493, + "learning_rate": 2.4686659770170287e-06, + "loss": 2.0659, + "step": 23302 + }, + { + "epoch": 0.78, + "grad_norm": 0.7317529320716858, + "learning_rate": 2.4679667764934777e-06, + "loss": 1.9662, + "step": 23303 + }, + { + "epoch": 0.78, + "grad_norm": 0.7453404068946838, + "learning_rate": 2.4672676610624233e-06, + "loss": 2.0871, + "step": 23304 + }, + { + "epoch": 0.78, + "grad_norm": 0.736285924911499, + "learning_rate": 2.4665686307317625e-06, + "loss": 2.0153, + "step": 23305 + }, + { + "epoch": 0.78, + "grad_norm": 0.7598045468330383, + "learning_rate": 2.4658696855093967e-06, + "loss": 2.0575, + "step": 23306 + }, + { + "epoch": 0.78, + "grad_norm": 0.7303100228309631, + "learning_rate": 2.465170825403217e-06, + "loss": 2.0246, + "step": 23307 + }, + { + "epoch": 0.78, + "grad_norm": 0.7114728093147278, + "learning_rate": 2.464472050421124e-06, + "loss": 1.9969, + "step": 23308 + }, + { + "epoch": 0.78, + "grad_norm": 0.7648562788963318, + "learning_rate": 2.463773360571007e-06, + "loss": 2.0485, + "step": 23309 + }, + { + "epoch": 0.78, + "grad_norm": 0.7444080710411072, + "learning_rate": 2.463074755860765e-06, + "loss": 1.9549, + "step": 23310 + }, + { + "epoch": 0.78, + "grad_norm": 0.7734590768814087, + "learning_rate": 2.462376236298284e-06, + "loss": 2.1079, + "step": 23311 + }, + { + "epoch": 0.78, + "grad_norm": 0.7090965509414673, + "learning_rate": 2.4616778018914623e-06, + "loss": 2.0665, + "step": 23312 + }, + { + "epoch": 0.78, + "grad_norm": 0.70216304063797, + "learning_rate": 2.4609794526481854e-06, + "loss": 2.009, + "step": 23313 + }, + { + "epoch": 0.78, + "grad_norm": 0.764106810092926, + "learning_rate": 2.460281188576343e-06, + "loss": 2.0504, + "step": 23314 + }, + { + "epoch": 0.78, + "grad_norm": 0.7769772410392761, + "learning_rate": 2.4595830096838247e-06, + "loss": 2.0354, + "step": 23315 + }, + { + "epoch": 0.78, + "grad_norm": 0.7483271956443787, + "learning_rate": 2.4588849159785245e-06, + "loss": 2.003, + "step": 23316 + }, + { + "epoch": 0.78, + "grad_norm": 0.7399762868881226, + "learning_rate": 2.458186907468316e-06, + "loss": 2.0285, + "step": 23317 + }, + { + "epoch": 0.78, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.4574889841610926e-06, + "loss": 2.0827, + "step": 23318 + }, + { + "epoch": 0.78, + "grad_norm": 0.7572110295295715, + "learning_rate": 2.456791146064741e-06, + "loss": 2.0287, + "step": 23319 + }, + { + "epoch": 0.78, + "grad_norm": 0.7604380249977112, + "learning_rate": 2.4560933931871402e-06, + "loss": 2.0631, + "step": 23320 + }, + { + "epoch": 0.78, + "grad_norm": 0.7237051129341125, + "learning_rate": 2.4553957255361772e-06, + "loss": 2.0057, + "step": 23321 + }, + { + "epoch": 0.78, + "grad_norm": 0.7260814309120178, + "learning_rate": 2.4546981431197316e-06, + "loss": 2.0788, + "step": 23322 + }, + { + "epoch": 0.78, + "grad_norm": 0.7824169397354126, + "learning_rate": 2.454000645945682e-06, + "loss": 2.106, + "step": 23323 + }, + { + "epoch": 0.78, + "grad_norm": 0.7276002764701843, + "learning_rate": 2.453303234021911e-06, + "loss": 2.0964, + "step": 23324 + }, + { + "epoch": 0.78, + "grad_norm": 0.7538354992866516, + "learning_rate": 2.4526059073562993e-06, + "loss": 2.0705, + "step": 23325 + }, + { + "epoch": 0.78, + "grad_norm": 0.7723227143287659, + "learning_rate": 2.451908665956724e-06, + "loss": 2.0597, + "step": 23326 + }, + { + "epoch": 0.78, + "grad_norm": 0.7338346838951111, + "learning_rate": 2.4512115098310563e-06, + "loss": 2.0574, + "step": 23327 + }, + { + "epoch": 0.78, + "grad_norm": 0.7396935224533081, + "learning_rate": 2.450514438987178e-06, + "loss": 2.1167, + "step": 23328 + }, + { + "epoch": 0.78, + "grad_norm": 0.7737733721733093, + "learning_rate": 2.4498174534329667e-06, + "loss": 1.9939, + "step": 23329 + }, + { + "epoch": 0.78, + "grad_norm": 0.7365962266921997, + "learning_rate": 2.449120553176292e-06, + "loss": 2.024, + "step": 23330 + }, + { + "epoch": 0.78, + "grad_norm": 0.7541148662567139, + "learning_rate": 2.4484237382250254e-06, + "loss": 1.9948, + "step": 23331 + }, + { + "epoch": 0.78, + "grad_norm": 0.7303698658943176, + "learning_rate": 2.4477270085870442e-06, + "loss": 1.9956, + "step": 23332 + }, + { + "epoch": 0.78, + "grad_norm": 0.7327603101730347, + "learning_rate": 2.4470303642702154e-06, + "loss": 1.9941, + "step": 23333 + }, + { + "epoch": 0.78, + "grad_norm": 0.7330635786056519, + "learning_rate": 2.4463338052824125e-06, + "loss": 2.0497, + "step": 23334 + }, + { + "epoch": 0.78, + "grad_norm": 0.7537268400192261, + "learning_rate": 2.4456373316315053e-06, + "loss": 2.0435, + "step": 23335 + }, + { + "epoch": 0.78, + "grad_norm": 0.7413952946662903, + "learning_rate": 2.4449409433253556e-06, + "loss": 2.0638, + "step": 23336 + }, + { + "epoch": 0.78, + "grad_norm": 0.7319790124893188, + "learning_rate": 2.4442446403718356e-06, + "loss": 2.0423, + "step": 23337 + }, + { + "epoch": 0.78, + "grad_norm": 0.7612069249153137, + "learning_rate": 2.443548422778815e-06, + "loss": 1.9948, + "step": 23338 + }, + { + "epoch": 0.78, + "grad_norm": 0.7573962807655334, + "learning_rate": 2.4428522905541564e-06, + "loss": 1.9124, + "step": 23339 + }, + { + "epoch": 0.78, + "grad_norm": 0.7579341530799866, + "learning_rate": 2.44215624370572e-06, + "loss": 2.0787, + "step": 23340 + }, + { + "epoch": 0.78, + "grad_norm": 0.7847955822944641, + "learning_rate": 2.441460282241376e-06, + "loss": 2.0189, + "step": 23341 + }, + { + "epoch": 0.78, + "grad_norm": 0.7587438821792603, + "learning_rate": 2.440764406168981e-06, + "loss": 2.058, + "step": 23342 + }, + { + "epoch": 0.78, + "grad_norm": 0.7209582924842834, + "learning_rate": 2.4400686154964027e-06, + "loss": 2.0353, + "step": 23343 + }, + { + "epoch": 0.78, + "grad_norm": 0.7249478697776794, + "learning_rate": 2.4393729102314955e-06, + "loss": 2.0482, + "step": 23344 + }, + { + "epoch": 0.78, + "grad_norm": 0.7305390238761902, + "learning_rate": 2.4386772903821254e-06, + "loss": 2.08, + "step": 23345 + }, + { + "epoch": 0.78, + "grad_norm": 0.7638789415359497, + "learning_rate": 2.4379817559561445e-06, + "loss": 2.0371, + "step": 23346 + }, + { + "epoch": 0.78, + "grad_norm": 0.7391451597213745, + "learning_rate": 2.437286306961417e-06, + "loss": 2.0086, + "step": 23347 + }, + { + "epoch": 0.78, + "grad_norm": 0.7408673763275146, + "learning_rate": 2.4365909434057978e-06, + "loss": 2.0424, + "step": 23348 + }, + { + "epoch": 0.78, + "grad_norm": 0.750360906124115, + "learning_rate": 2.4358956652971367e-06, + "loss": 2.051, + "step": 23349 + }, + { + "epoch": 0.78, + "grad_norm": 0.7405845522880554, + "learning_rate": 2.4352004726432944e-06, + "loss": 2.1148, + "step": 23350 + }, + { + "epoch": 0.78, + "grad_norm": 0.7701925039291382, + "learning_rate": 2.4345053654521267e-06, + "loss": 2.092, + "step": 23351 + }, + { + "epoch": 0.78, + "grad_norm": 0.7555758357048035, + "learning_rate": 2.433810343731483e-06, + "loss": 2.0093, + "step": 23352 + }, + { + "epoch": 0.78, + "grad_norm": 0.7479721903800964, + "learning_rate": 2.4331154074892138e-06, + "loss": 2.0814, + "step": 23353 + }, + { + "epoch": 0.78, + "grad_norm": 0.7460130453109741, + "learning_rate": 2.432420556733175e-06, + "loss": 1.9991, + "step": 23354 + }, + { + "epoch": 0.78, + "grad_norm": 0.732802152633667, + "learning_rate": 2.43172579147121e-06, + "loss": 2.0471, + "step": 23355 + }, + { + "epoch": 0.78, + "grad_norm": 0.7451066970825195, + "learning_rate": 2.431031111711175e-06, + "loss": 2.0138, + "step": 23356 + }, + { + "epoch": 0.78, + "grad_norm": 0.7391600012779236, + "learning_rate": 2.430336517460914e-06, + "loss": 2.0588, + "step": 23357 + }, + { + "epoch": 0.78, + "grad_norm": 0.7460057735443115, + "learning_rate": 2.4296420087282725e-06, + "loss": 2.1024, + "step": 23358 + }, + { + "epoch": 0.78, + "grad_norm": 0.7350541353225708, + "learning_rate": 2.4289475855210988e-06, + "loss": 2.0431, + "step": 23359 + }, + { + "epoch": 0.78, + "grad_norm": 0.7730778455734253, + "learning_rate": 2.428253247847241e-06, + "loss": 2.0329, + "step": 23360 + }, + { + "epoch": 0.78, + "grad_norm": 0.7575381994247437, + "learning_rate": 2.4275589957145408e-06, + "loss": 2.0201, + "step": 23361 + }, + { + "epoch": 0.78, + "grad_norm": 0.7797570824623108, + "learning_rate": 2.4268648291308384e-06, + "loss": 2.0699, + "step": 23362 + }, + { + "epoch": 0.78, + "grad_norm": 0.7381778955459595, + "learning_rate": 2.426170748103981e-06, + "loss": 1.9464, + "step": 23363 + }, + { + "epoch": 0.78, + "grad_norm": 0.7415148615837097, + "learning_rate": 2.4254767526418056e-06, + "loss": 1.9797, + "step": 23364 + }, + { + "epoch": 0.78, + "grad_norm": 0.7837454676628113, + "learning_rate": 2.424782842752157e-06, + "loss": 2.0331, + "step": 23365 + }, + { + "epoch": 0.78, + "grad_norm": 0.7485068440437317, + "learning_rate": 2.42408901844287e-06, + "loss": 2.0165, + "step": 23366 + }, + { + "epoch": 0.78, + "grad_norm": 0.7541255950927734, + "learning_rate": 2.4233952797217876e-06, + "loss": 2.0068, + "step": 23367 + }, + { + "epoch": 0.78, + "grad_norm": 0.7533348202705383, + "learning_rate": 2.422701626596743e-06, + "loss": 1.9922, + "step": 23368 + }, + { + "epoch": 0.78, + "grad_norm": 0.7371532320976257, + "learning_rate": 2.422008059075577e-06, + "loss": 2.0418, + "step": 23369 + }, + { + "epoch": 0.78, + "grad_norm": 0.7413358092308044, + "learning_rate": 2.421314577166123e-06, + "loss": 2.067, + "step": 23370 + }, + { + "epoch": 0.78, + "grad_norm": 0.7418953776359558, + "learning_rate": 2.4206211808762127e-06, + "loss": 2.0421, + "step": 23371 + }, + { + "epoch": 0.78, + "grad_norm": 0.7781485319137573, + "learning_rate": 2.419927870213682e-06, + "loss": 2.1183, + "step": 23372 + }, + { + "epoch": 0.78, + "grad_norm": 0.7504087686538696, + "learning_rate": 2.419234645186367e-06, + "loss": 2.0605, + "step": 23373 + }, + { + "epoch": 0.78, + "grad_norm": 0.7601073384284973, + "learning_rate": 2.4185415058020956e-06, + "loss": 2.0531, + "step": 23374 + }, + { + "epoch": 0.78, + "grad_norm": 0.7696033120155334, + "learning_rate": 2.417848452068696e-06, + "loss": 2.0622, + "step": 23375 + }, + { + "epoch": 0.78, + "grad_norm": 0.770108163356781, + "learning_rate": 2.417155483994005e-06, + "loss": 2.0496, + "step": 23376 + }, + { + "epoch": 0.78, + "grad_norm": 0.7269140481948853, + "learning_rate": 2.416462601585844e-06, + "loss": 2.0671, + "step": 23377 + }, + { + "epoch": 0.78, + "grad_norm": 0.738740861415863, + "learning_rate": 2.4157698048520473e-06, + "loss": 2.0339, + "step": 23378 + }, + { + "epoch": 0.78, + "grad_norm": 0.7243169546127319, + "learning_rate": 2.4150770938004374e-06, + "loss": 2.0766, + "step": 23379 + }, + { + "epoch": 0.78, + "grad_norm": 0.7414936423301697, + "learning_rate": 2.4143844684388394e-06, + "loss": 1.996, + "step": 23380 + }, + { + "epoch": 0.78, + "grad_norm": 0.7001222968101501, + "learning_rate": 2.4136919287750803e-06, + "loss": 2.0642, + "step": 23381 + }, + { + "epoch": 0.78, + "grad_norm": 0.7368388772010803, + "learning_rate": 2.412999474816986e-06, + "loss": 2.0173, + "step": 23382 + }, + { + "epoch": 0.78, + "grad_norm": 0.752288281917572, + "learning_rate": 2.412307106572378e-06, + "loss": 2.0088, + "step": 23383 + }, + { + "epoch": 0.78, + "grad_norm": 0.7396050095558167, + "learning_rate": 2.4116148240490745e-06, + "loss": 2.0576, + "step": 23384 + }, + { + "epoch": 0.78, + "grad_norm": 0.7667602896690369, + "learning_rate": 2.4109226272549015e-06, + "loss": 2.0893, + "step": 23385 + }, + { + "epoch": 0.78, + "grad_norm": 0.7315330505371094, + "learning_rate": 2.4102305161976746e-06, + "loss": 1.9714, + "step": 23386 + }, + { + "epoch": 0.78, + "grad_norm": 0.7568930983543396, + "learning_rate": 2.409538490885218e-06, + "loss": 2.0445, + "step": 23387 + }, + { + "epoch": 0.78, + "grad_norm": 0.748307466506958, + "learning_rate": 2.4088465513253446e-06, + "loss": 2.0716, + "step": 23388 + }, + { + "epoch": 0.78, + "grad_norm": 0.710921585559845, + "learning_rate": 2.408154697525876e-06, + "loss": 2.0552, + "step": 23389 + }, + { + "epoch": 0.78, + "grad_norm": 0.7548015713691711, + "learning_rate": 2.407462929494625e-06, + "loss": 2.0957, + "step": 23390 + }, + { + "epoch": 0.78, + "grad_norm": 0.733204185962677, + "learning_rate": 2.40677124723941e-06, + "loss": 1.9471, + "step": 23391 + }, + { + "epoch": 0.78, + "grad_norm": 0.7375377416610718, + "learning_rate": 2.406079650768044e-06, + "loss": 2.1176, + "step": 23392 + }, + { + "epoch": 0.78, + "grad_norm": 0.7408237457275391, + "learning_rate": 2.4053881400883363e-06, + "loss": 1.9548, + "step": 23393 + }, + { + "epoch": 0.78, + "grad_norm": 0.7223044633865356, + "learning_rate": 2.4046967152081025e-06, + "loss": 2.0716, + "step": 23394 + }, + { + "epoch": 0.78, + "grad_norm": 0.7372767329216003, + "learning_rate": 2.4040053761351566e-06, + "loss": 2.0356, + "step": 23395 + }, + { + "epoch": 0.78, + "grad_norm": 0.7458848357200623, + "learning_rate": 2.4033141228773073e-06, + "loss": 2.0837, + "step": 23396 + }, + { + "epoch": 0.78, + "grad_norm": 0.7781601548194885, + "learning_rate": 2.4026229554423588e-06, + "loss": 1.9209, + "step": 23397 + }, + { + "epoch": 0.78, + "grad_norm": 0.756543755531311, + "learning_rate": 2.401931873838127e-06, + "loss": 1.9509, + "step": 23398 + }, + { + "epoch": 0.78, + "grad_norm": 0.7493532299995422, + "learning_rate": 2.4012408780724127e-06, + "loss": 2.0678, + "step": 23399 + }, + { + "epoch": 0.78, + "grad_norm": 0.76082843542099, + "learning_rate": 2.4005499681530264e-06, + "loss": 2.1175, + "step": 23400 + }, + { + "epoch": 0.78, + "grad_norm": 0.7614288926124573, + "learning_rate": 2.399859144087778e-06, + "loss": 2.0424, + "step": 23401 + }, + { + "epoch": 0.78, + "grad_norm": 0.748488187789917, + "learning_rate": 2.3991684058844624e-06, + "loss": 2.0088, + "step": 23402 + }, + { + "epoch": 0.78, + "grad_norm": 0.729911744594574, + "learning_rate": 2.398477753550886e-06, + "loss": 2.03, + "step": 23403 + }, + { + "epoch": 0.78, + "grad_norm": 0.7355022430419922, + "learning_rate": 2.3977871870948566e-06, + "loss": 2.0538, + "step": 23404 + }, + { + "epoch": 0.78, + "grad_norm": 0.7184388041496277, + "learning_rate": 2.3970967065241724e-06, + "loss": 2.0912, + "step": 23405 + }, + { + "epoch": 0.78, + "grad_norm": 0.7494268417358398, + "learning_rate": 2.3964063118466308e-06, + "loss": 2.0468, + "step": 23406 + }, + { + "epoch": 0.78, + "grad_norm": 0.7325255274772644, + "learning_rate": 2.3957160030700364e-06, + "loss": 2.1015, + "step": 23407 + }, + { + "epoch": 0.78, + "grad_norm": 0.7660855650901794, + "learning_rate": 2.395025780202185e-06, + "loss": 2.0885, + "step": 23408 + }, + { + "epoch": 0.78, + "grad_norm": 0.7530097961425781, + "learning_rate": 2.3943356432508767e-06, + "loss": 2.0354, + "step": 23409 + }, + { + "epoch": 0.78, + "grad_norm": 0.7253831028938293, + "learning_rate": 2.3936455922239056e-06, + "loss": 1.9908, + "step": 23410 + }, + { + "epoch": 0.78, + "grad_norm": 0.7819858193397522, + "learning_rate": 2.39295562712907e-06, + "loss": 2.0413, + "step": 23411 + }, + { + "epoch": 0.78, + "grad_norm": 0.7732041478157043, + "learning_rate": 2.392265747974162e-06, + "loss": 2.0594, + "step": 23412 + }, + { + "epoch": 0.78, + "grad_norm": 0.7459157109260559, + "learning_rate": 2.391575954766977e-06, + "loss": 2.0722, + "step": 23413 + }, + { + "epoch": 0.78, + "grad_norm": 0.7450140118598938, + "learning_rate": 2.390886247515313e-06, + "loss": 2.0477, + "step": 23414 + }, + { + "epoch": 0.78, + "grad_norm": 0.7548434138298035, + "learning_rate": 2.3901966262269505e-06, + "loss": 2.0879, + "step": 23415 + }, + { + "epoch": 0.78, + "grad_norm": 0.7473630905151367, + "learning_rate": 2.3895070909096887e-06, + "loss": 2.0838, + "step": 23416 + }, + { + "epoch": 0.78, + "grad_norm": 0.9641106128692627, + "learning_rate": 2.388817641571316e-06, + "loss": 2.0609, + "step": 23417 + }, + { + "epoch": 0.78, + "grad_norm": 0.7401993274688721, + "learning_rate": 2.3881282782196224e-06, + "loss": 1.972, + "step": 23418 + }, + { + "epoch": 0.78, + "grad_norm": 0.7498778700828552, + "learning_rate": 2.3874390008623916e-06, + "loss": 2.0879, + "step": 23419 + }, + { + "epoch": 0.78, + "grad_norm": 0.7570474147796631, + "learning_rate": 2.386749809507417e-06, + "loss": 2.0973, + "step": 23420 + }, + { + "epoch": 0.78, + "grad_norm": 0.7704286575317383, + "learning_rate": 2.386060704162477e-06, + "loss": 2.0625, + "step": 23421 + }, + { + "epoch": 0.78, + "grad_norm": 0.7458311915397644, + "learning_rate": 2.3853716848353624e-06, + "loss": 2.1214, + "step": 23422 + }, + { + "epoch": 0.78, + "grad_norm": 0.7479351758956909, + "learning_rate": 2.384682751533861e-06, + "loss": 1.9162, + "step": 23423 + }, + { + "epoch": 0.78, + "grad_norm": 0.7799244523048401, + "learning_rate": 2.3839939042657446e-06, + "loss": 2.0325, + "step": 23424 + }, + { + "epoch": 0.78, + "grad_norm": 0.737411618232727, + "learning_rate": 2.383305143038802e-06, + "loss": 2.0586, + "step": 23425 + }, + { + "epoch": 0.78, + "grad_norm": 0.7311658263206482, + "learning_rate": 2.3826164678608167e-06, + "loss": 2.0939, + "step": 23426 + }, + { + "epoch": 0.78, + "grad_norm": 0.7360967993736267, + "learning_rate": 2.381927878739567e-06, + "loss": 2.0271, + "step": 23427 + }, + { + "epoch": 0.78, + "grad_norm": 0.7492108345031738, + "learning_rate": 2.381239375682828e-06, + "loss": 2.0716, + "step": 23428 + }, + { + "epoch": 0.78, + "grad_norm": 0.7420879006385803, + "learning_rate": 2.380550958698382e-06, + "loss": 2.0557, + "step": 23429 + }, + { + "epoch": 0.78, + "grad_norm": 0.7333759665489197, + "learning_rate": 2.3798626277940086e-06, + "loss": 2.1041, + "step": 23430 + }, + { + "epoch": 0.78, + "grad_norm": 0.7620464563369751, + "learning_rate": 2.379174382977478e-06, + "loss": 2.0313, + "step": 23431 + }, + { + "epoch": 0.78, + "grad_norm": 0.7438222169876099, + "learning_rate": 2.378486224256572e-06, + "loss": 2.0513, + "step": 23432 + }, + { + "epoch": 0.78, + "grad_norm": 0.7130557894706726, + "learning_rate": 2.3777981516390623e-06, + "loss": 1.9545, + "step": 23433 + }, + { + "epoch": 0.78, + "grad_norm": 0.7504770159721375, + "learning_rate": 2.37711016513272e-06, + "loss": 2.0279, + "step": 23434 + }, + { + "epoch": 0.78, + "grad_norm": 0.7847603559494019, + "learning_rate": 2.3764222647453184e-06, + "loss": 1.9978, + "step": 23435 + }, + { + "epoch": 0.78, + "grad_norm": 0.7502672076225281, + "learning_rate": 2.3757344504846356e-06, + "loss": 2.0158, + "step": 23436 + }, + { + "epoch": 0.78, + "grad_norm": 0.7489727735519409, + "learning_rate": 2.3750467223584317e-06, + "loss": 2.0997, + "step": 23437 + }, + { + "epoch": 0.78, + "grad_norm": 0.7641331553459167, + "learning_rate": 2.374359080374482e-06, + "loss": 2.0409, + "step": 23438 + }, + { + "epoch": 0.78, + "grad_norm": 0.756416916847229, + "learning_rate": 2.373671524540556e-06, + "loss": 2.0563, + "step": 23439 + }, + { + "epoch": 0.78, + "grad_norm": 0.7475770115852356, + "learning_rate": 2.37298405486442e-06, + "loss": 2.0735, + "step": 23440 + }, + { + "epoch": 0.78, + "grad_norm": 0.7600544095039368, + "learning_rate": 2.3722966713538375e-06, + "loss": 2.0694, + "step": 23441 + }, + { + "epoch": 0.78, + "grad_norm": 0.7446238398551941, + "learning_rate": 2.37160937401658e-06, + "loss": 2.0503, + "step": 23442 + }, + { + "epoch": 0.78, + "grad_norm": 0.8005606532096863, + "learning_rate": 2.370922162860406e-06, + "loss": 2.0794, + "step": 23443 + }, + { + "epoch": 0.78, + "grad_norm": 0.7287567257881165, + "learning_rate": 2.370235037893083e-06, + "loss": 2.0188, + "step": 23444 + }, + { + "epoch": 0.78, + "grad_norm": 0.7734804153442383, + "learning_rate": 2.3695479991223747e-06, + "loss": 2.0343, + "step": 23445 + }, + { + "epoch": 0.78, + "grad_norm": 0.7170252799987793, + "learning_rate": 2.3688610465560414e-06, + "loss": 2.0584, + "step": 23446 + }, + { + "epoch": 0.78, + "grad_norm": 0.7365675568580627, + "learning_rate": 2.368174180201841e-06, + "loss": 2.0675, + "step": 23447 + }, + { + "epoch": 0.78, + "grad_norm": 0.7450243830680847, + "learning_rate": 2.3674874000675397e-06, + "loss": 2.0613, + "step": 23448 + }, + { + "epoch": 0.78, + "grad_norm": 0.7802800536155701, + "learning_rate": 2.3668007061608924e-06, + "loss": 2.1809, + "step": 23449 + }, + { + "epoch": 0.78, + "grad_norm": 0.7641650438308716, + "learning_rate": 2.3661140984896534e-06, + "loss": 2.0813, + "step": 23450 + }, + { + "epoch": 0.78, + "grad_norm": 0.739465057849884, + "learning_rate": 2.365427577061584e-06, + "loss": 2.0677, + "step": 23451 + }, + { + "epoch": 0.78, + "grad_norm": 0.7329521775245667, + "learning_rate": 2.364741141884442e-06, + "loss": 2.0213, + "step": 23452 + }, + { + "epoch": 0.78, + "grad_norm": 0.7570412158966064, + "learning_rate": 2.3640547929659787e-06, + "loss": 2.0032, + "step": 23453 + }, + { + "epoch": 0.78, + "grad_norm": 0.7617425918579102, + "learning_rate": 2.3633685303139507e-06, + "loss": 2.0221, + "step": 23454 + }, + { + "epoch": 0.78, + "grad_norm": 0.7156916260719299, + "learning_rate": 2.36268235393611e-06, + "loss": 1.989, + "step": 23455 + }, + { + "epoch": 0.78, + "grad_norm": 0.7371447086334229, + "learning_rate": 2.361996263840205e-06, + "loss": 2.0197, + "step": 23456 + }, + { + "epoch": 0.78, + "grad_norm": 0.7214310169219971, + "learning_rate": 2.36131026003399e-06, + "loss": 2.0602, + "step": 23457 + }, + { + "epoch": 0.78, + "grad_norm": 0.7453267574310303, + "learning_rate": 2.3606243425252196e-06, + "loss": 2.0373, + "step": 23458 + }, + { + "epoch": 0.78, + "grad_norm": 0.7647746801376343, + "learning_rate": 2.3599385113216346e-06, + "loss": 2.0568, + "step": 23459 + }, + { + "epoch": 0.78, + "grad_norm": 0.7448199391365051, + "learning_rate": 2.359252766430985e-06, + "loss": 2.088, + "step": 23460 + }, + { + "epoch": 0.78, + "grad_norm": 0.7889588475227356, + "learning_rate": 2.3585671078610238e-06, + "loss": 2.0462, + "step": 23461 + }, + { + "epoch": 0.78, + "grad_norm": 0.754124641418457, + "learning_rate": 2.3578815356194927e-06, + "loss": 2.0746, + "step": 23462 + }, + { + "epoch": 0.78, + "grad_norm": 0.7604997158050537, + "learning_rate": 2.3571960497141344e-06, + "loss": 2.0842, + "step": 23463 + }, + { + "epoch": 0.78, + "grad_norm": 0.7162451148033142, + "learning_rate": 2.356510650152698e-06, + "loss": 2.0913, + "step": 23464 + }, + { + "epoch": 0.78, + "grad_norm": 0.7356861233711243, + "learning_rate": 2.355825336942923e-06, + "loss": 2.0857, + "step": 23465 + }, + { + "epoch": 0.78, + "grad_norm": 0.7419259548187256, + "learning_rate": 2.355140110092553e-06, + "loss": 2.0265, + "step": 23466 + }, + { + "epoch": 0.78, + "grad_norm": 0.7603479027748108, + "learning_rate": 2.354454969609333e-06, + "loss": 2.0763, + "step": 23467 + }, + { + "epoch": 0.78, + "grad_norm": 0.7977761626243591, + "learning_rate": 2.3537699155009997e-06, + "loss": 2.0737, + "step": 23468 + }, + { + "epoch": 0.78, + "grad_norm": 0.7733137011528015, + "learning_rate": 2.35308494777529e-06, + "loss": 2.0103, + "step": 23469 + }, + { + "epoch": 0.78, + "grad_norm": 0.7497432827949524, + "learning_rate": 2.3524000664399482e-06, + "loss": 2.0103, + "step": 23470 + }, + { + "epoch": 0.78, + "grad_norm": 0.7742173671722412, + "learning_rate": 2.351715271502708e-06, + "loss": 2.0208, + "step": 23471 + }, + { + "epoch": 0.78, + "grad_norm": 0.7715641260147095, + "learning_rate": 2.351030562971304e-06, + "loss": 2.156, + "step": 23472 + }, + { + "epoch": 0.78, + "grad_norm": 0.7167426943778992, + "learning_rate": 2.3503459408534725e-06, + "loss": 2.0661, + "step": 23473 + }, + { + "epoch": 0.78, + "grad_norm": 0.7894922494888306, + "learning_rate": 2.3496614051569533e-06, + "loss": 2.0157, + "step": 23474 + }, + { + "epoch": 0.78, + "grad_norm": 0.7574504613876343, + "learning_rate": 2.3489769558894738e-06, + "loss": 2.0289, + "step": 23475 + }, + { + "epoch": 0.78, + "grad_norm": 0.733540415763855, + "learning_rate": 2.3482925930587707e-06, + "loss": 2.0207, + "step": 23476 + }, + { + "epoch": 0.78, + "grad_norm": 0.7561203241348267, + "learning_rate": 2.3476083166725737e-06, + "loss": 2.0243, + "step": 23477 + }, + { + "epoch": 0.78, + "grad_norm": 0.7321612238883972, + "learning_rate": 2.34692412673861e-06, + "loss": 2.1282, + "step": 23478 + }, + { + "epoch": 0.78, + "grad_norm": 0.7351642847061157, + "learning_rate": 2.346240023264613e-06, + "loss": 2.0434, + "step": 23479 + }, + { + "epoch": 0.78, + "grad_norm": 0.7331467270851135, + "learning_rate": 2.345556006258316e-06, + "loss": 1.9734, + "step": 23480 + }, + { + "epoch": 0.78, + "grad_norm": 0.7624969482421875, + "learning_rate": 2.3448720757274368e-06, + "loss": 2.1269, + "step": 23481 + }, + { + "epoch": 0.78, + "grad_norm": 0.7439268827438354, + "learning_rate": 2.3441882316797047e-06, + "loss": 2.0016, + "step": 23482 + }, + { + "epoch": 0.78, + "grad_norm": 0.7550613284111023, + "learning_rate": 2.3435044741228507e-06, + "loss": 2.0949, + "step": 23483 + }, + { + "epoch": 0.78, + "grad_norm": 0.7514625191688538, + "learning_rate": 2.342820803064594e-06, + "loss": 2.0599, + "step": 23484 + }, + { + "epoch": 0.78, + "grad_norm": 0.7443356513977051, + "learning_rate": 2.342137218512662e-06, + "loss": 1.9982, + "step": 23485 + }, + { + "epoch": 0.78, + "grad_norm": 0.7426179647445679, + "learning_rate": 2.3414537204747766e-06, + "loss": 2.0104, + "step": 23486 + }, + { + "epoch": 0.78, + "grad_norm": 0.7371115684509277, + "learning_rate": 2.3407703089586553e-06, + "loss": 2.0275, + "step": 23487 + }, + { + "epoch": 0.78, + "grad_norm": 0.8575018644332886, + "learning_rate": 2.340086983972023e-06, + "loss": 2.0714, + "step": 23488 + }, + { + "epoch": 0.78, + "grad_norm": 0.735481858253479, + "learning_rate": 2.3394037455226015e-06, + "loss": 2.0951, + "step": 23489 + }, + { + "epoch": 0.78, + "grad_norm": 0.7563784122467041, + "learning_rate": 2.338720593618107e-06, + "loss": 2.0867, + "step": 23490 + }, + { + "epoch": 0.78, + "grad_norm": 0.7404896020889282, + "learning_rate": 2.338037528266254e-06, + "loss": 2.0283, + "step": 23491 + }, + { + "epoch": 0.78, + "grad_norm": 0.7541285157203674, + "learning_rate": 2.337354549474765e-06, + "loss": 2.0797, + "step": 23492 + }, + { + "epoch": 0.78, + "grad_norm": 0.7350534200668335, + "learning_rate": 2.3366716572513536e-06, + "loss": 2.0432, + "step": 23493 + }, + { + "epoch": 0.78, + "grad_norm": 0.7396516799926758, + "learning_rate": 2.3359888516037334e-06, + "loss": 2.0811, + "step": 23494 + }, + { + "epoch": 0.78, + "grad_norm": 0.7381443977355957, + "learning_rate": 2.3353061325396177e-06, + "loss": 1.9687, + "step": 23495 + }, + { + "epoch": 0.78, + "grad_norm": 0.728945255279541, + "learning_rate": 2.334623500066725e-06, + "loss": 2.0456, + "step": 23496 + }, + { + "epoch": 0.78, + "grad_norm": 0.7696000337600708, + "learning_rate": 2.3339409541927617e-06, + "loss": 2.0247, + "step": 23497 + }, + { + "epoch": 0.78, + "grad_norm": 0.7517894506454468, + "learning_rate": 2.333258494925442e-06, + "loss": 1.9846, + "step": 23498 + }, + { + "epoch": 0.78, + "grad_norm": 0.7358246445655823, + "learning_rate": 2.332576122272475e-06, + "loss": 2.0646, + "step": 23499 + }, + { + "epoch": 0.78, + "grad_norm": 0.8261101841926575, + "learning_rate": 2.3318938362415676e-06, + "loss": 1.9982, + "step": 23500 + }, + { + "epoch": 0.78, + "grad_norm": 0.766869306564331, + "learning_rate": 2.331211636840429e-06, + "loss": 2.0694, + "step": 23501 + }, + { + "epoch": 0.78, + "grad_norm": 0.7412298917770386, + "learning_rate": 2.3305295240767724e-06, + "loss": 2.0765, + "step": 23502 + }, + { + "epoch": 0.78, + "grad_norm": 0.7593595385551453, + "learning_rate": 2.329847497958293e-06, + "loss": 2.0911, + "step": 23503 + }, + { + "epoch": 0.78, + "grad_norm": 0.7239635586738586, + "learning_rate": 2.329165558492702e-06, + "loss": 2.0081, + "step": 23504 + }, + { + "epoch": 0.78, + "grad_norm": 0.7685195803642273, + "learning_rate": 2.328483705687705e-06, + "loss": 2.082, + "step": 23505 + }, + { + "epoch": 0.78, + "grad_norm": 0.75153648853302, + "learning_rate": 2.3278019395510008e-06, + "loss": 2.0658, + "step": 23506 + }, + { + "epoch": 0.78, + "grad_norm": 0.7516559958457947, + "learning_rate": 2.3271202600902966e-06, + "loss": 2.0606, + "step": 23507 + }, + { + "epoch": 0.78, + "grad_norm": 0.7454524040222168, + "learning_rate": 2.326438667313291e-06, + "loss": 2.1215, + "step": 23508 + }, + { + "epoch": 0.78, + "grad_norm": 0.7594374418258667, + "learning_rate": 2.3257571612276818e-06, + "loss": 1.9674, + "step": 23509 + }, + { + "epoch": 0.78, + "grad_norm": 0.7282988429069519, + "learning_rate": 2.3250757418411698e-06, + "loss": 2.0581, + "step": 23510 + }, + { + "epoch": 0.78, + "grad_norm": 0.7189530730247498, + "learning_rate": 2.3243944091614577e-06, + "loss": 2.0032, + "step": 23511 + }, + { + "epoch": 0.78, + "grad_norm": 0.7365098595619202, + "learning_rate": 2.3237131631962383e-06, + "loss": 2.0631, + "step": 23512 + }, + { + "epoch": 0.78, + "grad_norm": 0.7153369188308716, + "learning_rate": 2.3230320039532074e-06, + "loss": 2.0036, + "step": 23513 + }, + { + "epoch": 0.78, + "grad_norm": 0.7661045789718628, + "learning_rate": 2.3223509314400637e-06, + "loss": 2.0484, + "step": 23514 + }, + { + "epoch": 0.78, + "grad_norm": 0.7179904580116272, + "learning_rate": 2.3216699456644964e-06, + "loss": 2.0522, + "step": 23515 + }, + { + "epoch": 0.78, + "grad_norm": 0.7751089334487915, + "learning_rate": 2.3209890466342055e-06, + "loss": 2.0347, + "step": 23516 + }, + { + "epoch": 0.78, + "grad_norm": 0.7579507827758789, + "learning_rate": 2.320308234356877e-06, + "loss": 2.0363, + "step": 23517 + }, + { + "epoch": 0.78, + "grad_norm": 0.7656630873680115, + "learning_rate": 2.319627508840209e-06, + "loss": 2.0334, + "step": 23518 + }, + { + "epoch": 0.78, + "grad_norm": 0.738566517829895, + "learning_rate": 2.318946870091884e-06, + "loss": 2.0511, + "step": 23519 + }, + { + "epoch": 0.78, + "grad_norm": 0.7315531373023987, + "learning_rate": 2.3182663181195997e-06, + "loss": 2.039, + "step": 23520 + }, + { + "epoch": 0.78, + "grad_norm": 0.7282350659370422, + "learning_rate": 2.3175858529310404e-06, + "loss": 1.9789, + "step": 23521 + }, + { + "epoch": 0.78, + "grad_norm": 0.74435955286026, + "learning_rate": 2.3169054745338903e-06, + "loss": 1.9935, + "step": 23522 + }, + { + "epoch": 0.78, + "grad_norm": 0.748498797416687, + "learning_rate": 2.3162251829358397e-06, + "loss": 2.0311, + "step": 23523 + }, + { + "epoch": 0.78, + "grad_norm": 0.7749775052070618, + "learning_rate": 2.315544978144579e-06, + "loss": 2.0406, + "step": 23524 + }, + { + "epoch": 0.78, + "grad_norm": 0.7482819557189941, + "learning_rate": 2.3148648601677825e-06, + "loss": 1.9789, + "step": 23525 + }, + { + "epoch": 0.78, + "grad_norm": 0.729158878326416, + "learning_rate": 2.3141848290131397e-06, + "loss": 1.9937, + "step": 23526 + }, + { + "epoch": 0.78, + "grad_norm": 0.7662271857261658, + "learning_rate": 2.3135048846883344e-06, + "loss": 1.9859, + "step": 23527 + }, + { + "epoch": 0.78, + "grad_norm": 0.7428956627845764, + "learning_rate": 2.3128250272010432e-06, + "loss": 2.0283, + "step": 23528 + }, + { + "epoch": 0.78, + "grad_norm": 0.7326934337615967, + "learning_rate": 2.312145256558953e-06, + "loss": 2.1012, + "step": 23529 + }, + { + "epoch": 0.78, + "grad_norm": 0.7199211716651917, + "learning_rate": 2.3114655727697364e-06, + "loss": 2.0119, + "step": 23530 + }, + { + "epoch": 0.78, + "grad_norm": 0.7496190071105957, + "learning_rate": 2.31078597584108e-06, + "loss": 2.0334, + "step": 23531 + }, + { + "epoch": 0.78, + "grad_norm": 0.7249230146408081, + "learning_rate": 2.3101064657806537e-06, + "loss": 2.0073, + "step": 23532 + }, + { + "epoch": 0.78, + "grad_norm": 0.7357186675071716, + "learning_rate": 2.309427042596141e-06, + "loss": 2.0029, + "step": 23533 + }, + { + "epoch": 0.78, + "grad_norm": 0.7654830813407898, + "learning_rate": 2.3087477062952135e-06, + "loss": 2.0161, + "step": 23534 + }, + { + "epoch": 0.78, + "grad_norm": 0.7393261790275574, + "learning_rate": 2.308068456885545e-06, + "loss": 2.0584, + "step": 23535 + }, + { + "epoch": 0.78, + "grad_norm": 0.7441073656082153, + "learning_rate": 2.3073892943748113e-06, + "loss": 2.0306, + "step": 23536 + }, + { + "epoch": 0.78, + "grad_norm": 0.7413317561149597, + "learning_rate": 2.306710218770688e-06, + "loss": 2.0593, + "step": 23537 + }, + { + "epoch": 0.78, + "grad_norm": 0.7435477375984192, + "learning_rate": 2.306031230080843e-06, + "loss": 2.0113, + "step": 23538 + }, + { + "epoch": 0.78, + "grad_norm": 0.7806531190872192, + "learning_rate": 2.3053523283129455e-06, + "loss": 2.0082, + "step": 23539 + }, + { + "epoch": 0.78, + "grad_norm": 0.7768741846084595, + "learning_rate": 2.304673513474671e-06, + "loss": 2.0074, + "step": 23540 + }, + { + "epoch": 0.78, + "grad_norm": 0.7511498928070068, + "learning_rate": 2.303994785573682e-06, + "loss": 2.0672, + "step": 23541 + }, + { + "epoch": 0.78, + "grad_norm": 0.7799888849258423, + "learning_rate": 2.303316144617653e-06, + "loss": 2.1485, + "step": 23542 + }, + { + "epoch": 0.78, + "grad_norm": 0.7495607733726501, + "learning_rate": 2.302637590614247e-06, + "loss": 2.1416, + "step": 23543 + }, + { + "epoch": 0.78, + "grad_norm": 0.7572643160820007, + "learning_rate": 2.301959123571128e-06, + "loss": 1.9855, + "step": 23544 + }, + { + "epoch": 0.78, + "grad_norm": 0.7390634417533875, + "learning_rate": 2.301280743495964e-06, + "loss": 2.1307, + "step": 23545 + }, + { + "epoch": 0.78, + "grad_norm": 0.7172415256500244, + "learning_rate": 2.3006024503964197e-06, + "loss": 2.0289, + "step": 23546 + }, + { + "epoch": 0.78, + "grad_norm": 0.7477096915245056, + "learning_rate": 2.299924244280157e-06, + "loss": 2.0893, + "step": 23547 + }, + { + "epoch": 0.78, + "grad_norm": 0.7351077198982239, + "learning_rate": 2.299246125154835e-06, + "loss": 2.0521, + "step": 23548 + }, + { + "epoch": 0.78, + "grad_norm": 0.7521158456802368, + "learning_rate": 2.2985680930281207e-06, + "loss": 2.031, + "step": 23549 + }, + { + "epoch": 0.78, + "grad_norm": 0.7830166220664978, + "learning_rate": 2.2978901479076665e-06, + "loss": 2.1102, + "step": 23550 + }, + { + "epoch": 0.78, + "grad_norm": 0.7598782181739807, + "learning_rate": 2.2972122898011384e-06, + "loss": 2.0407, + "step": 23551 + }, + { + "epoch": 0.78, + "grad_norm": 0.7617616057395935, + "learning_rate": 2.2965345187161892e-06, + "loss": 1.9989, + "step": 23552 + }, + { + "epoch": 0.78, + "grad_norm": 0.7692623734474182, + "learning_rate": 2.2958568346604814e-06, + "loss": 2.0405, + "step": 23553 + }, + { + "epoch": 0.78, + "grad_norm": 0.7600765228271484, + "learning_rate": 2.2951792376416648e-06, + "loss": 2.0424, + "step": 23554 + }, + { + "epoch": 0.78, + "grad_norm": 0.7529767751693726, + "learning_rate": 2.294501727667401e-06, + "loss": 2.0905, + "step": 23555 + }, + { + "epoch": 0.78, + "grad_norm": 0.7281036376953125, + "learning_rate": 2.2938243047453403e-06, + "loss": 2.0321, + "step": 23556 + }, + { + "epoch": 0.78, + "grad_norm": 0.7528734803199768, + "learning_rate": 2.293146968883134e-06, + "loss": 2.0315, + "step": 23557 + }, + { + "epoch": 0.78, + "grad_norm": 0.735353946685791, + "learning_rate": 2.292469720088436e-06, + "loss": 2.0322, + "step": 23558 + }, + { + "epoch": 0.78, + "grad_norm": 0.731687605381012, + "learning_rate": 2.2917925583689016e-06, + "loss": 2.0563, + "step": 23559 + }, + { + "epoch": 0.78, + "grad_norm": 0.7748310565948486, + "learning_rate": 2.291115483732177e-06, + "loss": 2.0684, + "step": 23560 + }, + { + "epoch": 0.78, + "grad_norm": 0.7436119318008423, + "learning_rate": 2.2904384961859085e-06, + "loss": 2.0256, + "step": 23561 + }, + { + "epoch": 0.78, + "grad_norm": 0.7371118664741516, + "learning_rate": 2.2897615957377507e-06, + "loss": 2.0151, + "step": 23562 + }, + { + "epoch": 0.78, + "grad_norm": 0.7507503628730774, + "learning_rate": 2.2890847823953453e-06, + "loss": 2.0297, + "step": 23563 + }, + { + "epoch": 0.78, + "grad_norm": 0.7812598347663879, + "learning_rate": 2.2884080561663437e-06, + "loss": 2.0846, + "step": 23564 + }, + { + "epoch": 0.78, + "grad_norm": 0.7584078311920166, + "learning_rate": 2.2877314170583886e-06, + "loss": 2.0885, + "step": 23565 + }, + { + "epoch": 0.78, + "grad_norm": 0.7463155388832092, + "learning_rate": 2.2870548650791215e-06, + "loss": 2.0675, + "step": 23566 + }, + { + "epoch": 0.78, + "grad_norm": 0.7308967709541321, + "learning_rate": 2.2863784002361878e-06, + "loss": 2.038, + "step": 23567 + }, + { + "epoch": 0.78, + "grad_norm": 0.7545138001441956, + "learning_rate": 2.2857020225372327e-06, + "loss": 2.0659, + "step": 23568 + }, + { + "epoch": 0.78, + "grad_norm": 0.7375462651252747, + "learning_rate": 2.285025731989896e-06, + "loss": 2.0525, + "step": 23569 + }, + { + "epoch": 0.78, + "grad_norm": 0.7747974991798401, + "learning_rate": 2.2843495286018135e-06, + "loss": 2.0786, + "step": 23570 + }, + { + "epoch": 0.78, + "grad_norm": 0.7513628602027893, + "learning_rate": 2.2836734123806316e-06, + "loss": 2.0756, + "step": 23571 + }, + { + "epoch": 0.78, + "grad_norm": 0.755552351474762, + "learning_rate": 2.2829973833339825e-06, + "loss": 2.025, + "step": 23572 + }, + { + "epoch": 0.78, + "grad_norm": 0.7485615611076355, + "learning_rate": 2.2823214414695094e-06, + "loss": 2.0224, + "step": 23573 + }, + { + "epoch": 0.78, + "grad_norm": 0.7787623405456543, + "learning_rate": 2.2816455867948416e-06, + "loss": 1.9945, + "step": 23574 + }, + { + "epoch": 0.78, + "grad_norm": 0.7569385766983032, + "learning_rate": 2.2809698193176223e-06, + "loss": 2.0853, + "step": 23575 + }, + { + "epoch": 0.78, + "grad_norm": 0.7888135313987732, + "learning_rate": 2.2802941390454793e-06, + "loss": 2.1015, + "step": 23576 + }, + { + "epoch": 0.78, + "grad_norm": 0.7930558919906616, + "learning_rate": 2.2796185459860522e-06, + "loss": 2.0597, + "step": 23577 + }, + { + "epoch": 0.78, + "grad_norm": 0.7524420022964478, + "learning_rate": 2.2789430401469693e-06, + "loss": 2.0357, + "step": 23578 + }, + { + "epoch": 0.78, + "grad_norm": 0.739093005657196, + "learning_rate": 2.278267621535861e-06, + "loss": 2.0106, + "step": 23579 + }, + { + "epoch": 0.78, + "grad_norm": 0.7531952261924744, + "learning_rate": 2.277592290160359e-06, + "loss": 2.0562, + "step": 23580 + }, + { + "epoch": 0.78, + "grad_norm": 0.7313003540039062, + "learning_rate": 2.2769170460280965e-06, + "loss": 2.0459, + "step": 23581 + }, + { + "epoch": 0.78, + "grad_norm": 0.766413688659668, + "learning_rate": 2.2762418891467e-06, + "loss": 2.0402, + "step": 23582 + }, + { + "epoch": 0.78, + "grad_norm": 0.7260028719902039, + "learning_rate": 2.2755668195237924e-06, + "loss": 2.0818, + "step": 23583 + }, + { + "epoch": 0.78, + "grad_norm": 0.7636836767196655, + "learning_rate": 2.274891837167006e-06, + "loss": 2.1054, + "step": 23584 + }, + { + "epoch": 0.78, + "grad_norm": 0.7263052463531494, + "learning_rate": 2.274216942083962e-06, + "loss": 2.071, + "step": 23585 + }, + { + "epoch": 0.78, + "grad_norm": 0.7351335883140564, + "learning_rate": 2.2735421342822903e-06, + "loss": 2.0161, + "step": 23586 + }, + { + "epoch": 0.78, + "grad_norm": 0.7323692440986633, + "learning_rate": 2.2728674137696117e-06, + "loss": 1.9184, + "step": 23587 + }, + { + "epoch": 0.78, + "grad_norm": 0.75522780418396, + "learning_rate": 2.272192780553546e-06, + "loss": 2.0274, + "step": 23588 + }, + { + "epoch": 0.78, + "grad_norm": 0.7576169967651367, + "learning_rate": 2.2715182346417164e-06, + "loss": 2.0378, + "step": 23589 + }, + { + "epoch": 0.78, + "grad_norm": 0.738373875617981, + "learning_rate": 2.270843776041748e-06, + "loss": 2.0251, + "step": 23590 + }, + { + "epoch": 0.78, + "grad_norm": 0.7415732145309448, + "learning_rate": 2.2701694047612555e-06, + "loss": 2.0661, + "step": 23591 + }, + { + "epoch": 0.78, + "grad_norm": 0.7485771775245667, + "learning_rate": 2.2694951208078574e-06, + "loss": 2.0615, + "step": 23592 + }, + { + "epoch": 0.78, + "grad_norm": 0.753497838973999, + "learning_rate": 2.2688209241891758e-06, + "loss": 1.9826, + "step": 23593 + }, + { + "epoch": 0.78, + "grad_norm": 0.76187664270401, + "learning_rate": 2.2681468149128217e-06, + "loss": 2.1067, + "step": 23594 + }, + { + "epoch": 0.79, + "grad_norm": 0.7242427468299866, + "learning_rate": 2.267472792986415e-06, + "loss": 2.0387, + "step": 23595 + }, + { + "epoch": 0.79, + "grad_norm": 0.7302752733230591, + "learning_rate": 2.2667988584175673e-06, + "loss": 2.0757, + "step": 23596 + }, + { + "epoch": 0.79, + "grad_norm": 0.7430997490882874, + "learning_rate": 2.2661250112138966e-06, + "loss": 2.065, + "step": 23597 + }, + { + "epoch": 0.79, + "grad_norm": 0.7281623482704163, + "learning_rate": 2.2654512513830105e-06, + "loss": 2.0584, + "step": 23598 + }, + { + "epoch": 0.79, + "grad_norm": 0.7423932552337646, + "learning_rate": 2.2647775789325253e-06, + "loss": 1.9977, + "step": 23599 + }, + { + "epoch": 0.79, + "grad_norm": 0.7709879279136658, + "learning_rate": 2.2641039938700503e-06, + "loss": 2.0482, + "step": 23600 + }, + { + "epoch": 0.79, + "grad_norm": 0.7609750032424927, + "learning_rate": 2.263430496203193e-06, + "loss": 2.091, + "step": 23601 + }, + { + "epoch": 0.79, + "grad_norm": 0.7479986548423767, + "learning_rate": 2.262757085939562e-06, + "loss": 2.0246, + "step": 23602 + }, + { + "epoch": 0.79, + "grad_norm": 0.7440741658210754, + "learning_rate": 2.262083763086771e-06, + "loss": 2.1033, + "step": 23603 + }, + { + "epoch": 0.79, + "grad_norm": 0.7406244277954102, + "learning_rate": 2.2614105276524223e-06, + "loss": 2.0291, + "step": 23604 + }, + { + "epoch": 0.79, + "grad_norm": 0.7464913725852966, + "learning_rate": 2.260737379644119e-06, + "loss": 2.0048, + "step": 23605 + }, + { + "epoch": 0.79, + "grad_norm": 0.7784124612808228, + "learning_rate": 2.260064319069473e-06, + "loss": 1.992, + "step": 23606 + }, + { + "epoch": 0.79, + "grad_norm": 0.7339638471603394, + "learning_rate": 2.2593913459360804e-06, + "loss": 2.071, + "step": 23607 + }, + { + "epoch": 0.79, + "grad_norm": 0.7853075861930847, + "learning_rate": 2.258718460251551e-06, + "loss": 2.0552, + "step": 23608 + }, + { + "epoch": 0.79, + "grad_norm": 0.7653780579566956, + "learning_rate": 2.2580456620234836e-06, + "loss": 2.0327, + "step": 23609 + }, + { + "epoch": 0.79, + "grad_norm": 0.7785540223121643, + "learning_rate": 2.2573729512594767e-06, + "loss": 2.0599, + "step": 23610 + }, + { + "epoch": 0.79, + "grad_norm": 0.7656276226043701, + "learning_rate": 2.2567003279671316e-06, + "loss": 2.1037, + "step": 23611 + }, + { + "epoch": 0.79, + "grad_norm": 0.7550110816955566, + "learning_rate": 2.2560277921540517e-06, + "loss": 2.0342, + "step": 23612 + }, + { + "epoch": 0.79, + "grad_norm": 0.7178660035133362, + "learning_rate": 2.255355343827832e-06, + "loss": 2.0894, + "step": 23613 + }, + { + "epoch": 0.79, + "grad_norm": 0.774670422077179, + "learning_rate": 2.2546829829960647e-06, + "loss": 2.0018, + "step": 23614 + }, + { + "epoch": 0.79, + "grad_norm": 0.7617074251174927, + "learning_rate": 2.2540107096663533e-06, + "loss": 2.0502, + "step": 23615 + }, + { + "epoch": 0.79, + "grad_norm": 0.7612568736076355, + "learning_rate": 2.253338523846287e-06, + "loss": 2.0414, + "step": 23616 + }, + { + "epoch": 0.79, + "grad_norm": 0.7606356739997864, + "learning_rate": 2.2526664255434637e-06, + "loss": 2.0436, + "step": 23617 + }, + { + "epoch": 0.79, + "grad_norm": 0.7578970789909363, + "learning_rate": 2.251994414765474e-06, + "loss": 2.0382, + "step": 23618 + }, + { + "epoch": 0.79, + "grad_norm": 0.7440611124038696, + "learning_rate": 2.2513224915199117e-06, + "loss": 2.1006, + "step": 23619 + }, + { + "epoch": 0.79, + "grad_norm": 0.711390495300293, + "learning_rate": 2.250650655814364e-06, + "loss": 2.0406, + "step": 23620 + }, + { + "epoch": 0.79, + "grad_norm": 0.7530195116996765, + "learning_rate": 2.2499789076564237e-06, + "loss": 2.042, + "step": 23621 + }, + { + "epoch": 0.79, + "grad_norm": 0.7395023107528687, + "learning_rate": 2.2493072470536857e-06, + "loss": 2.0039, + "step": 23622 + }, + { + "epoch": 0.79, + "grad_norm": 0.7774317860603333, + "learning_rate": 2.248635674013726e-06, + "loss": 2.0914, + "step": 23623 + }, + { + "epoch": 0.79, + "grad_norm": 0.7292062640190125, + "learning_rate": 2.2479641885441382e-06, + "loss": 2.0086, + "step": 23624 + }, + { + "epoch": 0.79, + "grad_norm": 0.7318323850631714, + "learning_rate": 2.247292790652511e-06, + "loss": 2.0655, + "step": 23625 + }, + { + "epoch": 0.79, + "grad_norm": 0.7490631341934204, + "learning_rate": 2.246621480346426e-06, + "loss": 2.0168, + "step": 23626 + }, + { + "epoch": 0.79, + "grad_norm": 0.7465776205062866, + "learning_rate": 2.2459502576334634e-06, + "loss": 2.0273, + "step": 23627 + }, + { + "epoch": 0.79, + "grad_norm": 0.7453117370605469, + "learning_rate": 2.2452791225212156e-06, + "loss": 2.0067, + "step": 23628 + }, + { + "epoch": 0.79, + "grad_norm": 0.7839908003807068, + "learning_rate": 2.244608075017255e-06, + "loss": 2.0539, + "step": 23629 + }, + { + "epoch": 0.79, + "grad_norm": 0.7341722249984741, + "learning_rate": 2.2439371151291677e-06, + "loss": 2.0237, + "step": 23630 + }, + { + "epoch": 0.79, + "grad_norm": 0.7251865863800049, + "learning_rate": 2.243266242864537e-06, + "loss": 2.0363, + "step": 23631 + }, + { + "epoch": 0.79, + "grad_norm": 0.755972146987915, + "learning_rate": 2.2425954582309374e-06, + "loss": 2.0175, + "step": 23632 + }, + { + "epoch": 0.79, + "grad_norm": 0.7262917160987854, + "learning_rate": 2.2419247612359453e-06, + "loss": 2.0502, + "step": 23633 + }, + { + "epoch": 0.79, + "grad_norm": 0.7265647649765015, + "learning_rate": 2.2412541518871445e-06, + "loss": 2.0678, + "step": 23634 + }, + { + "epoch": 0.79, + "grad_norm": 0.7309658527374268, + "learning_rate": 2.2405836301921057e-06, + "loss": 2.0521, + "step": 23635 + }, + { + "epoch": 0.79, + "grad_norm": 0.7781156897544861, + "learning_rate": 2.239913196158403e-06, + "loss": 2.0395, + "step": 23636 + }, + { + "epoch": 0.79, + "grad_norm": 0.7624852657318115, + "learning_rate": 2.239242849793615e-06, + "loss": 2.029, + "step": 23637 + }, + { + "epoch": 0.79, + "grad_norm": 0.7409361004829407, + "learning_rate": 2.2385725911053136e-06, + "loss": 2.0583, + "step": 23638 + }, + { + "epoch": 0.79, + "grad_norm": 0.7424526810646057, + "learning_rate": 2.2379024201010715e-06, + "loss": 2.0906, + "step": 23639 + }, + { + "epoch": 0.79, + "grad_norm": 0.7443002462387085, + "learning_rate": 2.237232336788455e-06, + "loss": 2.0245, + "step": 23640 + }, + { + "epoch": 0.79, + "grad_norm": 0.7554900050163269, + "learning_rate": 2.2365623411750427e-06, + "loss": 2.0864, + "step": 23641 + }, + { + "epoch": 0.79, + "grad_norm": 0.7053746581077576, + "learning_rate": 2.235892433268395e-06, + "loss": 2.0403, + "step": 23642 + }, + { + "epoch": 0.79, + "grad_norm": 0.7602192759513855, + "learning_rate": 2.2352226130760847e-06, + "loss": 2.0881, + "step": 23643 + }, + { + "epoch": 0.79, + "grad_norm": 0.7537850141525269, + "learning_rate": 2.234552880605685e-06, + "loss": 2.0079, + "step": 23644 + }, + { + "epoch": 0.79, + "grad_norm": 0.7397011518478394, + "learning_rate": 2.23388323586475e-06, + "loss": 1.9865, + "step": 23645 + }, + { + "epoch": 0.79, + "grad_norm": 0.7676513195037842, + "learning_rate": 2.2332136788608505e-06, + "loss": 2.0262, + "step": 23646 + }, + { + "epoch": 0.79, + "grad_norm": 0.7051846981048584, + "learning_rate": 2.232544209601554e-06, + "loss": 2.0205, + "step": 23647 + }, + { + "epoch": 0.79, + "grad_norm": 0.761152982711792, + "learning_rate": 2.2318748280944204e-06, + "loss": 2.0273, + "step": 23648 + }, + { + "epoch": 0.79, + "grad_norm": 0.7555580735206604, + "learning_rate": 2.23120553434701e-06, + "loss": 2.0543, + "step": 23649 + }, + { + "epoch": 0.79, + "grad_norm": 0.7850946187973022, + "learning_rate": 2.230536328366889e-06, + "loss": 2.0376, + "step": 23650 + }, + { + "epoch": 0.79, + "grad_norm": 0.753035843372345, + "learning_rate": 2.2298672101616125e-06, + "loss": 2.1189, + "step": 23651 + }, + { + "epoch": 0.79, + "grad_norm": 0.7970600128173828, + "learning_rate": 2.229198179738743e-06, + "loss": 2.0299, + "step": 23652 + }, + { + "epoch": 0.79, + "grad_norm": 0.7501860857009888, + "learning_rate": 2.228529237105841e-06, + "loss": 2.0395, + "step": 23653 + }, + { + "epoch": 0.79, + "grad_norm": 0.7318435907363892, + "learning_rate": 2.22786038227046e-06, + "loss": 1.9822, + "step": 23654 + }, + { + "epoch": 0.79, + "grad_norm": 0.7863469123840332, + "learning_rate": 2.227191615240156e-06, + "loss": 2.0715, + "step": 23655 + }, + { + "epoch": 0.79, + "grad_norm": 0.7523480653762817, + "learning_rate": 2.2265229360224883e-06, + "loss": 2.0674, + "step": 23656 + }, + { + "epoch": 0.79, + "grad_norm": 0.76875239610672, + "learning_rate": 2.2258543446250092e-06, + "loss": 2.0494, + "step": 23657 + }, + { + "epoch": 0.79, + "grad_norm": 0.7588374614715576, + "learning_rate": 2.2251858410552686e-06, + "loss": 2.0795, + "step": 23658 + }, + { + "epoch": 0.79, + "grad_norm": 0.7442683577537537, + "learning_rate": 2.2245174253208214e-06, + "loss": 1.9968, + "step": 23659 + }, + { + "epoch": 0.79, + "grad_norm": 0.7446786165237427, + "learning_rate": 2.2238490974292224e-06, + "loss": 2.0726, + "step": 23660 + }, + { + "epoch": 0.79, + "grad_norm": 0.7287999391555786, + "learning_rate": 2.2231808573880165e-06, + "loss": 2.0646, + "step": 23661 + }, + { + "epoch": 0.79, + "grad_norm": 0.7380803823471069, + "learning_rate": 2.222512705204758e-06, + "loss": 2.1259, + "step": 23662 + }, + { + "epoch": 0.79, + "grad_norm": 0.7377799153327942, + "learning_rate": 2.221844640886993e-06, + "loss": 2.0361, + "step": 23663 + }, + { + "epoch": 0.79, + "grad_norm": 0.7310378551483154, + "learning_rate": 2.221176664442266e-06, + "loss": 2.0203, + "step": 23664 + }, + { + "epoch": 0.79, + "grad_norm": 0.7672024369239807, + "learning_rate": 2.220508775878126e-06, + "loss": 2.0275, + "step": 23665 + }, + { + "epoch": 0.79, + "grad_norm": 0.7604436874389648, + "learning_rate": 2.2198409752021245e-06, + "loss": 2.0541, + "step": 23666 + }, + { + "epoch": 0.79, + "grad_norm": 0.7680114507675171, + "learning_rate": 2.2191732624217954e-06, + "loss": 2.0955, + "step": 23667 + }, + { + "epoch": 0.79, + "grad_norm": 0.7599309682846069, + "learning_rate": 2.2185056375446854e-06, + "loss": 2.0192, + "step": 23668 + }, + { + "epoch": 0.79, + "grad_norm": 0.7287400364875793, + "learning_rate": 2.2178381005783413e-06, + "loss": 2.0135, + "step": 23669 + }, + { + "epoch": 0.79, + "grad_norm": 0.749250590801239, + "learning_rate": 2.2171706515303016e-06, + "loss": 2.0442, + "step": 23670 + }, + { + "epoch": 0.79, + "grad_norm": 0.759213924407959, + "learning_rate": 2.216503290408104e-06, + "loss": 1.9958, + "step": 23671 + }, + { + "epoch": 0.79, + "grad_norm": 0.7460319995880127, + "learning_rate": 2.215836017219294e-06, + "loss": 2.1775, + "step": 23672 + }, + { + "epoch": 0.79, + "grad_norm": 0.7417365312576294, + "learning_rate": 2.2151688319714037e-06, + "loss": 2.0132, + "step": 23673 + }, + { + "epoch": 0.79, + "grad_norm": 0.7209345102310181, + "learning_rate": 2.214501734671973e-06, + "loss": 2.1216, + "step": 23674 + }, + { + "epoch": 0.79, + "grad_norm": 0.7342620491981506, + "learning_rate": 2.213834725328542e-06, + "loss": 2.0249, + "step": 23675 + }, + { + "epoch": 0.79, + "grad_norm": 0.7343745827674866, + "learning_rate": 2.213167803948644e-06, + "loss": 1.9812, + "step": 23676 + }, + { + "epoch": 0.79, + "grad_norm": 0.7436458468437195, + "learning_rate": 2.212500970539808e-06, + "loss": 1.9753, + "step": 23677 + }, + { + "epoch": 0.79, + "grad_norm": 0.7423403263092041, + "learning_rate": 2.211834225109576e-06, + "loss": 2.0253, + "step": 23678 + }, + { + "epoch": 0.79, + "grad_norm": 0.7360560894012451, + "learning_rate": 2.2111675676654764e-06, + "loss": 2.0582, + "step": 23679 + }, + { + "epoch": 0.79, + "grad_norm": 0.7348859310150146, + "learning_rate": 2.2105009982150395e-06, + "loss": 2.0268, + "step": 23680 + }, + { + "epoch": 0.79, + "grad_norm": 0.7292592525482178, + "learning_rate": 2.209834516765795e-06, + "loss": 1.9193, + "step": 23681 + }, + { + "epoch": 0.79, + "grad_norm": 0.7509409189224243, + "learning_rate": 2.2091681233252793e-06, + "loss": 2.0115, + "step": 23682 + }, + { + "epoch": 0.79, + "grad_norm": 0.7474276423454285, + "learning_rate": 2.208501817901012e-06, + "loss": 1.9757, + "step": 23683 + }, + { + "epoch": 0.79, + "grad_norm": 0.7274956107139587, + "learning_rate": 2.2078356005005285e-06, + "loss": 2.0437, + "step": 23684 + }, + { + "epoch": 0.79, + "grad_norm": 0.7594448328018188, + "learning_rate": 2.2071694711313516e-06, + "loss": 2.0406, + "step": 23685 + }, + { + "epoch": 0.79, + "grad_norm": 0.7546924352645874, + "learning_rate": 2.2065034298010035e-06, + "loss": 2.0556, + "step": 23686 + }, + { + "epoch": 0.79, + "grad_norm": 0.7417657375335693, + "learning_rate": 2.2058374765170134e-06, + "loss": 2.0704, + "step": 23687 + }, + { + "epoch": 0.79, + "grad_norm": 0.7451030611991882, + "learning_rate": 2.205171611286908e-06, + "loss": 2.0421, + "step": 23688 + }, + { + "epoch": 0.79, + "grad_norm": 0.750341534614563, + "learning_rate": 2.2045058341182013e-06, + "loss": 2.044, + "step": 23689 + }, + { + "epoch": 0.79, + "grad_norm": 0.7353470921516418, + "learning_rate": 2.2038401450184177e-06, + "loss": 1.971, + "step": 23690 + }, + { + "epoch": 0.79, + "grad_norm": 0.7611587047576904, + "learning_rate": 2.2031745439950837e-06, + "loss": 2.1121, + "step": 23691 + }, + { + "epoch": 0.79, + "grad_norm": 0.7636519074440002, + "learning_rate": 2.2025090310557097e-06, + "loss": 2.0848, + "step": 23692 + }, + { + "epoch": 0.79, + "grad_norm": 0.8071422576904297, + "learning_rate": 2.201843606207823e-06, + "loss": 2.0832, + "step": 23693 + }, + { + "epoch": 0.79, + "grad_norm": 0.7979187369346619, + "learning_rate": 2.2011782694589356e-06, + "loss": 2.0846, + "step": 23694 + }, + { + "epoch": 0.79, + "grad_norm": 0.7547232508659363, + "learning_rate": 2.2005130208165636e-06, + "loss": 2.0401, + "step": 23695 + }, + { + "epoch": 0.79, + "grad_norm": 0.759938657283783, + "learning_rate": 2.1998478602882255e-06, + "loss": 2.1031, + "step": 23696 + }, + { + "epoch": 0.79, + "grad_norm": 0.7636541724205017, + "learning_rate": 2.1991827878814364e-06, + "loss": 2.0809, + "step": 23697 + }, + { + "epoch": 0.79, + "grad_norm": 0.722571611404419, + "learning_rate": 2.1985178036037093e-06, + "loss": 2.0154, + "step": 23698 + }, + { + "epoch": 0.79, + "grad_norm": 0.7514976859092712, + "learning_rate": 2.197852907462552e-06, + "loss": 2.0722, + "step": 23699 + }, + { + "epoch": 0.79, + "grad_norm": 0.748512327671051, + "learning_rate": 2.1971880994654836e-06, + "loss": 2.0605, + "step": 23700 + }, + { + "epoch": 0.79, + "grad_norm": 0.7539935111999512, + "learning_rate": 2.1965233796200114e-06, + "loss": 2.0568, + "step": 23701 + }, + { + "epoch": 0.79, + "grad_norm": 0.7448989152908325, + "learning_rate": 2.195858747933641e-06, + "loss": 2.1012, + "step": 23702 + }, + { + "epoch": 0.79, + "grad_norm": 0.7734174132347107, + "learning_rate": 2.1951942044138865e-06, + "loss": 2.0843, + "step": 23703 + }, + { + "epoch": 0.79, + "grad_norm": 0.7483270168304443, + "learning_rate": 2.194529749068255e-06, + "loss": 2.0506, + "step": 23704 + }, + { + "epoch": 0.79, + "grad_norm": 0.7270799279212952, + "learning_rate": 2.19386538190425e-06, + "loss": 2.0559, + "step": 23705 + }, + { + "epoch": 0.79, + "grad_norm": 0.7467303276062012, + "learning_rate": 2.193201102929381e-06, + "loss": 2.1433, + "step": 23706 + }, + { + "epoch": 0.79, + "grad_norm": 0.7677479982376099, + "learning_rate": 2.192536912151152e-06, + "loss": 2.0396, + "step": 23707 + }, + { + "epoch": 0.79, + "grad_norm": 0.7468218803405762, + "learning_rate": 2.191872809577061e-06, + "loss": 2.0162, + "step": 23708 + }, + { + "epoch": 0.79, + "grad_norm": 0.754470705986023, + "learning_rate": 2.1912087952146167e-06, + "loss": 2.0498, + "step": 23709 + }, + { + "epoch": 0.79, + "grad_norm": 0.7728621363639832, + "learning_rate": 2.1905448690713236e-06, + "loss": 2.0292, + "step": 23710 + }, + { + "epoch": 0.79, + "grad_norm": 0.7209801077842712, + "learning_rate": 2.1898810311546723e-06, + "loss": 2.0394, + "step": 23711 + }, + { + "epoch": 0.79, + "grad_norm": 0.7537707090377808, + "learning_rate": 2.189217281472168e-06, + "loss": 2.0933, + "step": 23712 + }, + { + "epoch": 0.79, + "grad_norm": 0.7559280395507812, + "learning_rate": 2.188553620031312e-06, + "loss": 2.0732, + "step": 23713 + }, + { + "epoch": 0.79, + "grad_norm": 0.7252349853515625, + "learning_rate": 2.187890046839596e-06, + "loss": 2.0368, + "step": 23714 + }, + { + "epoch": 0.79, + "grad_norm": 0.7561346292495728, + "learning_rate": 2.187226561904523e-06, + "loss": 2.0321, + "step": 23715 + }, + { + "epoch": 0.79, + "grad_norm": 0.7298838496208191, + "learning_rate": 2.1865631652335863e-06, + "loss": 1.9998, + "step": 23716 + }, + { + "epoch": 0.79, + "grad_norm": 0.7268658876419067, + "learning_rate": 2.185899856834276e-06, + "loss": 2.0838, + "step": 23717 + }, + { + "epoch": 0.79, + "grad_norm": 0.7621577978134155, + "learning_rate": 2.185236636714091e-06, + "loss": 2.042, + "step": 23718 + }, + { + "epoch": 0.79, + "grad_norm": 0.7524171471595764, + "learning_rate": 2.184573504880524e-06, + "loss": 1.9774, + "step": 23719 + }, + { + "epoch": 0.79, + "grad_norm": 0.7304760217666626, + "learning_rate": 2.1839104613410655e-06, + "loss": 2.0832, + "step": 23720 + }, + { + "epoch": 0.79, + "grad_norm": 0.7413729429244995, + "learning_rate": 2.183247506103203e-06, + "loss": 2.0261, + "step": 23721 + }, + { + "epoch": 0.79, + "grad_norm": 0.7955942153930664, + "learning_rate": 2.18258463917443e-06, + "loss": 2.0674, + "step": 23722 + }, + { + "epoch": 0.79, + "grad_norm": 0.7688320875167847, + "learning_rate": 2.1819218605622362e-06, + "loss": 1.9601, + "step": 23723 + }, + { + "epoch": 0.79, + "grad_norm": 0.7308465838432312, + "learning_rate": 2.181259170274108e-06, + "loss": 2.1032, + "step": 23724 + }, + { + "epoch": 0.79, + "grad_norm": 0.7447122931480408, + "learning_rate": 2.180596568317528e-06, + "loss": 2.088, + "step": 23725 + }, + { + "epoch": 0.79, + "grad_norm": 0.7433052062988281, + "learning_rate": 2.179934054699989e-06, + "loss": 2.0439, + "step": 23726 + }, + { + "epoch": 0.79, + "grad_norm": 0.7689745426177979, + "learning_rate": 2.1792716294289683e-06, + "loss": 2.0674, + "step": 23727 + }, + { + "epoch": 0.79, + "grad_norm": 0.7288118600845337, + "learning_rate": 2.1786092925119573e-06, + "loss": 2.0643, + "step": 23728 + }, + { + "epoch": 0.79, + "grad_norm": 0.750744104385376, + "learning_rate": 2.1779470439564345e-06, + "loss": 2.0287, + "step": 23729 + }, + { + "epoch": 0.79, + "grad_norm": 0.7266230583190918, + "learning_rate": 2.1772848837698778e-06, + "loss": 2.005, + "step": 23730 + }, + { + "epoch": 0.79, + "grad_norm": 0.7595853805541992, + "learning_rate": 2.1766228119597733e-06, + "loss": 2.0618, + "step": 23731 + }, + { + "epoch": 0.79, + "grad_norm": 0.7518106698989868, + "learning_rate": 2.175960828533601e-06, + "loss": 1.998, + "step": 23732 + }, + { + "epoch": 0.79, + "grad_norm": 0.7265350818634033, + "learning_rate": 2.1752989334988385e-06, + "loss": 2.0376, + "step": 23733 + }, + { + "epoch": 0.79, + "grad_norm": 0.7308914661407471, + "learning_rate": 2.1746371268629594e-06, + "loss": 2.0497, + "step": 23734 + }, + { + "epoch": 0.79, + "grad_norm": 0.769344687461853, + "learning_rate": 2.1739754086334477e-06, + "loss": 2.0446, + "step": 23735 + }, + { + "epoch": 0.79, + "grad_norm": 0.7268562316894531, + "learning_rate": 2.173313778817773e-06, + "loss": 2.018, + "step": 23736 + }, + { + "epoch": 0.79, + "grad_norm": 0.7512324452400208, + "learning_rate": 2.172652237423414e-06, + "loss": 2.0167, + "step": 23737 + }, + { + "epoch": 0.79, + "grad_norm": 0.7773889303207397, + "learning_rate": 2.17199078445784e-06, + "loss": 2.0793, + "step": 23738 + }, + { + "epoch": 0.79, + "grad_norm": 0.7406313419342041, + "learning_rate": 2.1713294199285293e-06, + "loss": 2.0244, + "step": 23739 + }, + { + "epoch": 0.79, + "grad_norm": 0.7373641133308411, + "learning_rate": 2.170668143842949e-06, + "loss": 1.973, + "step": 23740 + }, + { + "epoch": 0.79, + "grad_norm": 0.7507232427597046, + "learning_rate": 2.1700069562085736e-06, + "loss": 2.0154, + "step": 23741 + }, + { + "epoch": 0.79, + "grad_norm": 0.7756134867668152, + "learning_rate": 2.1693458570328707e-06, + "loss": 2.0568, + "step": 23742 + }, + { + "epoch": 0.79, + "grad_norm": 0.7778127193450928, + "learning_rate": 2.1686848463233057e-06, + "loss": 2.099, + "step": 23743 + }, + { + "epoch": 0.79, + "grad_norm": 0.76261967420578, + "learning_rate": 2.16802392408735e-06, + "loss": 2.0976, + "step": 23744 + }, + { + "epoch": 0.79, + "grad_norm": 0.7695951461791992, + "learning_rate": 2.167363090332474e-06, + "loss": 2.0611, + "step": 23745 + }, + { + "epoch": 0.79, + "grad_norm": 0.7367326617240906, + "learning_rate": 2.1667023450661383e-06, + "loss": 1.9828, + "step": 23746 + }, + { + "epoch": 0.79, + "grad_norm": 0.7277584075927734, + "learning_rate": 2.166041688295807e-06, + "loss": 2.0213, + "step": 23747 + }, + { + "epoch": 0.79, + "grad_norm": 0.7555390000343323, + "learning_rate": 2.1653811200289467e-06, + "loss": 2.0159, + "step": 23748 + }, + { + "epoch": 0.79, + "grad_norm": 0.7677854895591736, + "learning_rate": 2.164720640273017e-06, + "loss": 2.0691, + "step": 23749 + }, + { + "epoch": 0.79, + "grad_norm": 0.7738224267959595, + "learning_rate": 2.1640602490354846e-06, + "loss": 2.0039, + "step": 23750 + }, + { + "epoch": 0.79, + "grad_norm": 0.7399836182594299, + "learning_rate": 2.1633999463238075e-06, + "loss": 1.9797, + "step": 23751 + }, + { + "epoch": 0.79, + "grad_norm": 0.7431377172470093, + "learning_rate": 2.1627397321454413e-06, + "loss": 2.0607, + "step": 23752 + }, + { + "epoch": 0.79, + "grad_norm": 0.7499891519546509, + "learning_rate": 2.1620796065078496e-06, + "loss": 2.0752, + "step": 23753 + }, + { + "epoch": 0.79, + "grad_norm": 0.7329750657081604, + "learning_rate": 2.1614195694184914e-06, + "loss": 2.0212, + "step": 23754 + }, + { + "epoch": 0.79, + "grad_norm": 0.7458094358444214, + "learning_rate": 2.16075962088482e-06, + "loss": 1.9728, + "step": 23755 + }, + { + "epoch": 0.79, + "grad_norm": 0.7337527275085449, + "learning_rate": 2.1600997609142914e-06, + "loss": 2.1137, + "step": 23756 + }, + { + "epoch": 0.79, + "grad_norm": 0.8071608543395996, + "learning_rate": 2.1594399895143626e-06, + "loss": 2.1056, + "step": 23757 + }, + { + "epoch": 0.79, + "grad_norm": 0.7372292876243591, + "learning_rate": 2.158780306692483e-06, + "loss": 2.0795, + "step": 23758 + }, + { + "epoch": 0.79, + "grad_norm": 0.7833998799324036, + "learning_rate": 2.158120712456111e-06, + "loss": 2.0649, + "step": 23759 + }, + { + "epoch": 0.79, + "grad_norm": 0.7692787647247314, + "learning_rate": 2.157461206812693e-06, + "loss": 2.0633, + "step": 23760 + }, + { + "epoch": 0.79, + "grad_norm": 0.7255768179893494, + "learning_rate": 2.1568017897696847e-06, + "loss": 2.0256, + "step": 23761 + }, + { + "epoch": 0.79, + "grad_norm": 0.7694878578186035, + "learning_rate": 2.1561424613345295e-06, + "loss": 2.0442, + "step": 23762 + }, + { + "epoch": 0.79, + "grad_norm": 0.7284373044967651, + "learning_rate": 2.155483221514684e-06, + "loss": 2.0622, + "step": 23763 + }, + { + "epoch": 0.79, + "grad_norm": 0.7694969177246094, + "learning_rate": 2.1548240703175903e-06, + "loss": 2.0582, + "step": 23764 + }, + { + "epoch": 0.79, + "grad_norm": 0.7884646654129028, + "learning_rate": 2.1541650077506947e-06, + "loss": 2.0272, + "step": 23765 + }, + { + "epoch": 0.79, + "grad_norm": 0.7542934417724609, + "learning_rate": 2.1535060338214453e-06, + "loss": 2.1055, + "step": 23766 + }, + { + "epoch": 0.79, + "grad_norm": 0.7202092409133911, + "learning_rate": 2.152847148537288e-06, + "loss": 2.0707, + "step": 23767 + }, + { + "epoch": 0.79, + "grad_norm": 0.7285504341125488, + "learning_rate": 2.152188351905665e-06, + "loss": 1.9957, + "step": 23768 + }, + { + "epoch": 0.79, + "grad_norm": 0.7719646096229553, + "learning_rate": 2.151529643934016e-06, + "loss": 2.1484, + "step": 23769 + }, + { + "epoch": 0.79, + "grad_norm": 0.7532100081443787, + "learning_rate": 2.150871024629788e-06, + "loss": 2.0807, + "step": 23770 + }, + { + "epoch": 0.79, + "grad_norm": 0.7245051264762878, + "learning_rate": 2.1502124940004167e-06, + "loss": 2.049, + "step": 23771 + }, + { + "epoch": 0.79, + "grad_norm": 0.7317708134651184, + "learning_rate": 2.1495540520533465e-06, + "loss": 2.0418, + "step": 23772 + }, + { + "epoch": 0.79, + "grad_norm": 0.7627023458480835, + "learning_rate": 2.148895698796014e-06, + "loss": 2.0688, + "step": 23773 + }, + { + "epoch": 0.79, + "grad_norm": 0.7387031316757202, + "learning_rate": 2.1482374342358547e-06, + "loss": 2.0787, + "step": 23774 + }, + { + "epoch": 0.79, + "grad_norm": 0.742496907711029, + "learning_rate": 2.1475792583803067e-06, + "loss": 2.116, + "step": 23775 + }, + { + "epoch": 0.79, + "grad_norm": 0.7471238970756531, + "learning_rate": 2.1469211712368088e-06, + "loss": 2.0464, + "step": 23776 + }, + { + "epoch": 0.79, + "grad_norm": 0.7389901280403137, + "learning_rate": 2.1462631728127937e-06, + "loss": 2.0708, + "step": 23777 + }, + { + "epoch": 0.79, + "grad_norm": 0.7288243174552917, + "learning_rate": 2.145605263115691e-06, + "loss": 2.0862, + "step": 23778 + }, + { + "epoch": 0.79, + "grad_norm": 0.732835590839386, + "learning_rate": 2.14494744215294e-06, + "loss": 2.0359, + "step": 23779 + }, + { + "epoch": 0.79, + "grad_norm": 0.720294713973999, + "learning_rate": 2.1442897099319673e-06, + "loss": 2.0091, + "step": 23780 + }, + { + "epoch": 0.79, + "grad_norm": 0.7516170144081116, + "learning_rate": 2.143632066460207e-06, + "loss": 2.0379, + "step": 23781 + }, + { + "epoch": 0.79, + "grad_norm": 0.7479332089424133, + "learning_rate": 2.142974511745085e-06, + "loss": 1.9876, + "step": 23782 + }, + { + "epoch": 0.79, + "grad_norm": 0.7255694270133972, + "learning_rate": 2.1423170457940355e-06, + "loss": 1.9969, + "step": 23783 + }, + { + "epoch": 0.79, + "grad_norm": 0.7325586080551147, + "learning_rate": 2.1416596686144796e-06, + "loss": 2.0446, + "step": 23784 + }, + { + "epoch": 0.79, + "grad_norm": 0.7238690257072449, + "learning_rate": 2.1410023802138513e-06, + "loss": 2.0215, + "step": 23785 + }, + { + "epoch": 0.79, + "grad_norm": 0.7633205652236938, + "learning_rate": 2.140345180599571e-06, + "loss": 2.0511, + "step": 23786 + }, + { + "epoch": 0.79, + "grad_norm": 0.7324872612953186, + "learning_rate": 2.139688069779062e-06, + "loss": 2.0091, + "step": 23787 + }, + { + "epoch": 0.79, + "grad_norm": 0.7247252464294434, + "learning_rate": 2.1390310477597507e-06, + "loss": 2.0108, + "step": 23788 + }, + { + "epoch": 0.79, + "grad_norm": 0.7717952132225037, + "learning_rate": 2.1383741145490633e-06, + "loss": 2.068, + "step": 23789 + }, + { + "epoch": 0.79, + "grad_norm": 0.7341840863227844, + "learning_rate": 2.1377172701544167e-06, + "loss": 2.0133, + "step": 23790 + }, + { + "epoch": 0.79, + "grad_norm": 0.7492052316665649, + "learning_rate": 2.13706051458323e-06, + "loss": 2.0768, + "step": 23791 + }, + { + "epoch": 0.79, + "grad_norm": 0.71937096118927, + "learning_rate": 2.1364038478429283e-06, + "loss": 2.0228, + "step": 23792 + }, + { + "epoch": 0.79, + "grad_norm": 0.777675986289978, + "learning_rate": 2.1357472699409253e-06, + "loss": 2.064, + "step": 23793 + }, + { + "epoch": 0.79, + "grad_norm": 0.7439678907394409, + "learning_rate": 2.1350907808846434e-06, + "loss": 1.9987, + "step": 23794 + }, + { + "epoch": 0.79, + "grad_norm": 0.7402308583259583, + "learning_rate": 2.134434380681496e-06, + "loss": 2.0568, + "step": 23795 + }, + { + "epoch": 0.79, + "grad_norm": 0.7478187084197998, + "learning_rate": 2.1337780693388964e-06, + "loss": 2.058, + "step": 23796 + }, + { + "epoch": 0.79, + "grad_norm": 0.7419453859329224, + "learning_rate": 2.1331218468642622e-06, + "loss": 2.0201, + "step": 23797 + }, + { + "epoch": 0.79, + "grad_norm": 0.747994065284729, + "learning_rate": 2.1324657132650107e-06, + "loss": 2.0919, + "step": 23798 + }, + { + "epoch": 0.79, + "grad_norm": 0.7557610273361206, + "learning_rate": 2.13180966854855e-06, + "loss": 2.0827, + "step": 23799 + }, + { + "epoch": 0.79, + "grad_norm": 0.7382601499557495, + "learning_rate": 2.1311537127222894e-06, + "loss": 2.0355, + "step": 23800 + }, + { + "epoch": 0.79, + "grad_norm": 0.7647411823272705, + "learning_rate": 2.130497845793645e-06, + "loss": 2.0151, + "step": 23801 + }, + { + "epoch": 0.79, + "grad_norm": 0.7414058446884155, + "learning_rate": 2.1298420677700226e-06, + "loss": 2.0359, + "step": 23802 + }, + { + "epoch": 0.79, + "grad_norm": 0.7301093339920044, + "learning_rate": 2.129186378658834e-06, + "loss": 2.0681, + "step": 23803 + }, + { + "epoch": 0.79, + "grad_norm": 0.7572154402732849, + "learning_rate": 2.1285307784674827e-06, + "loss": 2.0703, + "step": 23804 + }, + { + "epoch": 0.79, + "grad_norm": 0.7398231029510498, + "learning_rate": 2.1278752672033787e-06, + "loss": 2.042, + "step": 23805 + }, + { + "epoch": 0.79, + "grad_norm": 0.7268497943878174, + "learning_rate": 2.1272198448739255e-06, + "loss": 2.0548, + "step": 23806 + }, + { + "epoch": 0.79, + "grad_norm": 0.7275376915931702, + "learning_rate": 2.1265645114865275e-06, + "loss": 2.0329, + "step": 23807 + }, + { + "epoch": 0.79, + "grad_norm": 0.7279407382011414, + "learning_rate": 2.125909267048596e-06, + "loss": 2.0143, + "step": 23808 + }, + { + "epoch": 0.79, + "grad_norm": 0.7743759751319885, + "learning_rate": 2.125254111567521e-06, + "loss": 2.0332, + "step": 23809 + }, + { + "epoch": 0.79, + "grad_norm": 0.7421111464500427, + "learning_rate": 2.124599045050709e-06, + "loss": 2.0219, + "step": 23810 + }, + { + "epoch": 0.79, + "grad_norm": 0.7650473117828369, + "learning_rate": 2.1239440675055643e-06, + "loss": 2.0855, + "step": 23811 + }, + { + "epoch": 0.79, + "grad_norm": 0.7282496094703674, + "learning_rate": 2.123289178939485e-06, + "loss": 1.9756, + "step": 23812 + }, + { + "epoch": 0.79, + "grad_norm": 0.7483336925506592, + "learning_rate": 2.1226343793598646e-06, + "loss": 1.9975, + "step": 23813 + }, + { + "epoch": 0.79, + "grad_norm": 0.7448161244392395, + "learning_rate": 2.1219796687741078e-06, + "loss": 2.0612, + "step": 23814 + }, + { + "epoch": 0.79, + "grad_norm": 0.7480015754699707, + "learning_rate": 2.121325047189605e-06, + "loss": 2.034, + "step": 23815 + }, + { + "epoch": 0.79, + "grad_norm": 0.7583790421485901, + "learning_rate": 2.1206705146137574e-06, + "loss": 2.0845, + "step": 23816 + }, + { + "epoch": 0.79, + "grad_norm": 0.7382501363754272, + "learning_rate": 2.120016071053955e-06, + "loss": 2.0328, + "step": 23817 + }, + { + "epoch": 0.79, + "grad_norm": 0.7349620461463928, + "learning_rate": 2.119361716517592e-06, + "loss": 2.047, + "step": 23818 + }, + { + "epoch": 0.79, + "grad_norm": 0.7276656031608582, + "learning_rate": 2.118707451012061e-06, + "loss": 1.9819, + "step": 23819 + }, + { + "epoch": 0.79, + "grad_norm": 0.7503131628036499, + "learning_rate": 2.1180532745447568e-06, + "loss": 2.0497, + "step": 23820 + }, + { + "epoch": 0.79, + "grad_norm": 0.7267643809318542, + "learning_rate": 2.1173991871230683e-06, + "loss": 2.0598, + "step": 23821 + }, + { + "epoch": 0.79, + "grad_norm": 0.7460064888000488, + "learning_rate": 2.116745188754381e-06, + "loss": 2.0343, + "step": 23822 + }, + { + "epoch": 0.79, + "grad_norm": 0.7642011642456055, + "learning_rate": 2.1160912794460863e-06, + "loss": 1.9927, + "step": 23823 + }, + { + "epoch": 0.79, + "grad_norm": 0.8003596067428589, + "learning_rate": 2.115437459205575e-06, + "loss": 2.0492, + "step": 23824 + }, + { + "epoch": 0.79, + "grad_norm": 0.7773311734199524, + "learning_rate": 2.1147837280402293e-06, + "loss": 1.9894, + "step": 23825 + }, + { + "epoch": 0.79, + "grad_norm": 0.7727688550949097, + "learning_rate": 2.1141300859574344e-06, + "loss": 2.0933, + "step": 23826 + }, + { + "epoch": 0.79, + "grad_norm": 0.747826099395752, + "learning_rate": 2.113476532964579e-06, + "loss": 2.0333, + "step": 23827 + }, + { + "epoch": 0.79, + "grad_norm": 0.742027759552002, + "learning_rate": 2.11282306906904e-06, + "loss": 2.0364, + "step": 23828 + }, + { + "epoch": 0.79, + "grad_norm": 0.7422628402709961, + "learning_rate": 2.1121696942782044e-06, + "loss": 2.0187, + "step": 23829 + }, + { + "epoch": 0.79, + "grad_norm": 0.7441149353981018, + "learning_rate": 2.111516408599459e-06, + "loss": 2.0868, + "step": 23830 + }, + { + "epoch": 0.79, + "grad_norm": 0.7450019717216492, + "learning_rate": 2.1108632120401718e-06, + "loss": 2.0348, + "step": 23831 + }, + { + "epoch": 0.79, + "grad_norm": 0.78092360496521, + "learning_rate": 2.1102101046077283e-06, + "loss": 2.0353, + "step": 23832 + }, + { + "epoch": 0.79, + "grad_norm": 0.7648126482963562, + "learning_rate": 2.1095570863095093e-06, + "loss": 2.0767, + "step": 23833 + }, + { + "epoch": 0.79, + "grad_norm": 0.7563747763633728, + "learning_rate": 2.108904157152891e-06, + "loss": 2.0619, + "step": 23834 + }, + { + "epoch": 0.79, + "grad_norm": 0.7490023970603943, + "learning_rate": 2.1082513171452468e-06, + "loss": 2.0452, + "step": 23835 + }, + { + "epoch": 0.79, + "grad_norm": 0.7652599811553955, + "learning_rate": 2.1075985662939556e-06, + "loss": 2.1034, + "step": 23836 + }, + { + "epoch": 0.79, + "grad_norm": 0.7646187543869019, + "learning_rate": 2.106945904606389e-06, + "loss": 2.0556, + "step": 23837 + }, + { + "epoch": 0.79, + "grad_norm": 0.7383307218551636, + "learning_rate": 2.10629333208992e-06, + "loss": 1.9899, + "step": 23838 + }, + { + "epoch": 0.79, + "grad_norm": 0.7453235387802124, + "learning_rate": 2.1056408487519274e-06, + "loss": 2.0454, + "step": 23839 + }, + { + "epoch": 0.79, + "grad_norm": 0.7448431849479675, + "learning_rate": 2.104988454599777e-06, + "loss": 2.0557, + "step": 23840 + }, + { + "epoch": 0.79, + "grad_norm": 0.7670080065727234, + "learning_rate": 2.1043361496408377e-06, + "loss": 2.0013, + "step": 23841 + }, + { + "epoch": 0.79, + "grad_norm": 0.7708730101585388, + "learning_rate": 2.1036839338824846e-06, + "loss": 2.0311, + "step": 23842 + }, + { + "epoch": 0.79, + "grad_norm": 0.7660013437271118, + "learning_rate": 2.103031807332081e-06, + "loss": 2.0166, + "step": 23843 + }, + { + "epoch": 0.79, + "grad_norm": 0.7123951315879822, + "learning_rate": 2.102379769996994e-06, + "loss": 2.0468, + "step": 23844 + }, + { + "epoch": 0.79, + "grad_norm": 0.7426466345787048, + "learning_rate": 2.1017278218845927e-06, + "loss": 1.975, + "step": 23845 + }, + { + "epoch": 0.79, + "grad_norm": 0.7411551475524902, + "learning_rate": 2.1010759630022436e-06, + "loss": 2.0212, + "step": 23846 + }, + { + "epoch": 0.79, + "grad_norm": 0.7469449043273926, + "learning_rate": 2.100424193357309e-06, + "loss": 2.0405, + "step": 23847 + }, + { + "epoch": 0.79, + "grad_norm": 0.7269944548606873, + "learning_rate": 2.0997725129571502e-06, + "loss": 2.0441, + "step": 23848 + }, + { + "epoch": 0.79, + "grad_norm": 0.7457621097564697, + "learning_rate": 2.0991209218091336e-06, + "loss": 1.982, + "step": 23849 + }, + { + "epoch": 0.79, + "grad_norm": 0.7549929022789001, + "learning_rate": 2.0984694199206156e-06, + "loss": 2.0134, + "step": 23850 + }, + { + "epoch": 0.79, + "grad_norm": 0.7349221110343933, + "learning_rate": 2.0978180072989597e-06, + "loss": 2.021, + "step": 23851 + }, + { + "epoch": 0.79, + "grad_norm": 0.7361078262329102, + "learning_rate": 2.0971666839515305e-06, + "loss": 2.0203, + "step": 23852 + }, + { + "epoch": 0.79, + "grad_norm": 0.770860493183136, + "learning_rate": 2.0965154498856744e-06, + "loss": 2.0032, + "step": 23853 + }, + { + "epoch": 0.79, + "grad_norm": 0.7455860376358032, + "learning_rate": 2.0958643051087558e-06, + "loss": 2.0389, + "step": 23854 + }, + { + "epoch": 0.79, + "grad_norm": 0.7228385210037231, + "learning_rate": 2.095213249628132e-06, + "loss": 2.0126, + "step": 23855 + }, + { + "epoch": 0.79, + "grad_norm": 0.7507182359695435, + "learning_rate": 2.094562283451157e-06, + "loss": 2.1085, + "step": 23856 + }, + { + "epoch": 0.79, + "grad_norm": 0.7216410636901855, + "learning_rate": 2.093911406585181e-06, + "loss": 2.0554, + "step": 23857 + }, + { + "epoch": 0.79, + "grad_norm": 0.7724502682685852, + "learning_rate": 2.0932606190375624e-06, + "loss": 2.0705, + "step": 23858 + }, + { + "epoch": 0.79, + "grad_norm": 0.7391021251678467, + "learning_rate": 2.0926099208156505e-06, + "loss": 2.0404, + "step": 23859 + }, + { + "epoch": 0.79, + "grad_norm": 0.7532942891120911, + "learning_rate": 2.0919593119267967e-06, + "loss": 1.9934, + "step": 23860 + }, + { + "epoch": 0.79, + "grad_norm": 0.7816571593284607, + "learning_rate": 2.0913087923783547e-06, + "loss": 2.0617, + "step": 23861 + }, + { + "epoch": 0.79, + "grad_norm": 0.7489032745361328, + "learning_rate": 2.090658362177671e-06, + "loss": 2.0111, + "step": 23862 + }, + { + "epoch": 0.79, + "grad_norm": 0.7545264959335327, + "learning_rate": 2.0900080213320904e-06, + "loss": 2.004, + "step": 23863 + }, + { + "epoch": 0.79, + "grad_norm": 0.7162925601005554, + "learning_rate": 2.0893577698489674e-06, + "loss": 2.013, + "step": 23864 + }, + { + "epoch": 0.79, + "grad_norm": 0.7426259517669678, + "learning_rate": 2.088707607735644e-06, + "loss": 2.0845, + "step": 23865 + }, + { + "epoch": 0.79, + "grad_norm": 0.7458286881446838, + "learning_rate": 2.0880575349994623e-06, + "loss": 2.1092, + "step": 23866 + }, + { + "epoch": 0.79, + "grad_norm": 0.7328277230262756, + "learning_rate": 2.08740755164777e-06, + "loss": 2.0587, + "step": 23867 + }, + { + "epoch": 0.79, + "grad_norm": 0.7315108776092529, + "learning_rate": 2.0867576576879133e-06, + "loss": 1.9971, + "step": 23868 + }, + { + "epoch": 0.79, + "grad_norm": 0.7838161587715149, + "learning_rate": 2.086107853127227e-06, + "loss": 2.062, + "step": 23869 + }, + { + "epoch": 0.79, + "grad_norm": 0.7372422218322754, + "learning_rate": 2.08545813797306e-06, + "loss": 2.0665, + "step": 23870 + }, + { + "epoch": 0.79, + "grad_norm": 0.7289823889732361, + "learning_rate": 2.0848085122327476e-06, + "loss": 2.0465, + "step": 23871 + }, + { + "epoch": 0.79, + "grad_norm": 0.7478293776512146, + "learning_rate": 2.084158975913628e-06, + "loss": 2.0552, + "step": 23872 + }, + { + "epoch": 0.79, + "grad_norm": 0.7334038615226746, + "learning_rate": 2.083509529023041e-06, + "loss": 1.957, + "step": 23873 + }, + { + "epoch": 0.79, + "grad_norm": 0.7341417074203491, + "learning_rate": 2.0828601715683295e-06, + "loss": 2.0813, + "step": 23874 + }, + { + "epoch": 0.79, + "grad_norm": 0.7505955696105957, + "learning_rate": 2.082210903556817e-06, + "loss": 2.0594, + "step": 23875 + }, + { + "epoch": 0.79, + "grad_norm": 0.7447357773780823, + "learning_rate": 2.0815617249958462e-06, + "loss": 2.0586, + "step": 23876 + }, + { + "epoch": 0.79, + "grad_norm": 0.7629621624946594, + "learning_rate": 2.0809126358927546e-06, + "loss": 2.0422, + "step": 23877 + }, + { + "epoch": 0.79, + "grad_norm": 0.7442324161529541, + "learning_rate": 2.080263636254869e-06, + "loss": 2.0934, + "step": 23878 + }, + { + "epoch": 0.79, + "grad_norm": 0.7598207592964172, + "learning_rate": 2.0796147260895214e-06, + "loss": 2.1284, + "step": 23879 + }, + { + "epoch": 0.79, + "grad_norm": 0.7288111448287964, + "learning_rate": 2.078965905404048e-06, + "loss": 2.0641, + "step": 23880 + }, + { + "epoch": 0.79, + "grad_norm": 0.7293943166732788, + "learning_rate": 2.078317174205773e-06, + "loss": 2.0925, + "step": 23881 + }, + { + "epoch": 0.79, + "grad_norm": 0.7395011186599731, + "learning_rate": 2.0776685325020273e-06, + "loss": 2.0267, + "step": 23882 + }, + { + "epoch": 0.79, + "grad_norm": 0.7300148606300354, + "learning_rate": 2.077019980300142e-06, + "loss": 2.0615, + "step": 23883 + }, + { + "epoch": 0.79, + "grad_norm": 0.730800211429596, + "learning_rate": 2.0763715176074417e-06, + "loss": 1.9932, + "step": 23884 + }, + { + "epoch": 0.79, + "grad_norm": 0.758759081363678, + "learning_rate": 2.0757231444312507e-06, + "loss": 2.0387, + "step": 23885 + }, + { + "epoch": 0.79, + "grad_norm": 1.052169919013977, + "learning_rate": 2.0750748607788973e-06, + "loss": 2.0176, + "step": 23886 + }, + { + "epoch": 0.79, + "grad_norm": 0.7489136457443237, + "learning_rate": 2.074426666657704e-06, + "loss": 1.983, + "step": 23887 + }, + { + "epoch": 0.79, + "grad_norm": 0.7223396897315979, + "learning_rate": 2.0737785620749907e-06, + "loss": 2.0205, + "step": 23888 + }, + { + "epoch": 0.79, + "grad_norm": 0.7384513020515442, + "learning_rate": 2.0731305470380814e-06, + "loss": 2.0235, + "step": 23889 + }, + { + "epoch": 0.79, + "grad_norm": 0.7136140465736389, + "learning_rate": 2.0724826215543013e-06, + "loss": 2.0506, + "step": 23890 + }, + { + "epoch": 0.79, + "grad_norm": 0.7408373951911926, + "learning_rate": 2.071834785630963e-06, + "loss": 2.0729, + "step": 23891 + }, + { + "epoch": 0.79, + "grad_norm": 0.7481176853179932, + "learning_rate": 2.071187039275392e-06, + "loss": 2.0885, + "step": 23892 + }, + { + "epoch": 0.79, + "grad_norm": 0.7725847363471985, + "learning_rate": 2.0705393824949025e-06, + "loss": 2.0118, + "step": 23893 + }, + { + "epoch": 0.79, + "grad_norm": 0.7322697639465332, + "learning_rate": 2.0698918152968104e-06, + "loss": 2.0367, + "step": 23894 + }, + { + "epoch": 0.79, + "grad_norm": 0.7533455491065979, + "learning_rate": 2.0692443376884318e-06, + "loss": 1.982, + "step": 23895 + }, + { + "epoch": 0.8, + "grad_norm": 0.755894124507904, + "learning_rate": 2.0685969496770896e-06, + "loss": 2.0116, + "step": 23896 + }, + { + "epoch": 0.8, + "grad_norm": 0.7433171272277832, + "learning_rate": 2.067949651270085e-06, + "loss": 1.9765, + "step": 23897 + }, + { + "epoch": 0.8, + "grad_norm": 0.7190411686897278, + "learning_rate": 2.0673024424747356e-06, + "loss": 1.9739, + "step": 23898 + }, + { + "epoch": 0.8, + "grad_norm": 0.7932247519493103, + "learning_rate": 2.066655323298358e-06, + "loss": 2.1058, + "step": 23899 + }, + { + "epoch": 0.8, + "grad_norm": 0.7437026500701904, + "learning_rate": 2.066008293748255e-06, + "loss": 2.1022, + "step": 23900 + }, + { + "epoch": 0.8, + "grad_norm": 0.7600690126419067, + "learning_rate": 2.065361353831744e-06, + "loss": 2.0009, + "step": 23901 + }, + { + "epoch": 0.8, + "grad_norm": 0.8063015937805176, + "learning_rate": 2.064714503556131e-06, + "loss": 2.0684, + "step": 23902 + }, + { + "epoch": 0.8, + "grad_norm": 0.7964978814125061, + "learning_rate": 2.0640677429287203e-06, + "loss": 2.098, + "step": 23903 + }, + { + "epoch": 0.8, + "grad_norm": 0.7567841410636902, + "learning_rate": 2.0634210719568206e-06, + "loss": 2.0607, + "step": 23904 + }, + { + "epoch": 0.8, + "grad_norm": 0.7546612024307251, + "learning_rate": 2.062774490647741e-06, + "loss": 2.084, + "step": 23905 + }, + { + "epoch": 0.8, + "grad_norm": 0.7798894643783569, + "learning_rate": 2.062127999008784e-06, + "loss": 2.0694, + "step": 23906 + }, + { + "epoch": 0.8, + "grad_norm": 0.7356494665145874, + "learning_rate": 2.06148159704725e-06, + "loss": 2.0194, + "step": 23907 + }, + { + "epoch": 0.8, + "grad_norm": 0.7583564519882202, + "learning_rate": 2.0608352847704437e-06, + "loss": 2.0258, + "step": 23908 + }, + { + "epoch": 0.8, + "grad_norm": 0.7442474961280823, + "learning_rate": 2.0601890621856736e-06, + "loss": 2.0348, + "step": 23909 + }, + { + "epoch": 0.8, + "grad_norm": 0.7236694693565369, + "learning_rate": 2.059542929300229e-06, + "loss": 1.9994, + "step": 23910 + }, + { + "epoch": 0.8, + "grad_norm": 0.778202474117279, + "learning_rate": 2.058896886121415e-06, + "loss": 2.0915, + "step": 23911 + }, + { + "epoch": 0.8, + "grad_norm": 0.7426068782806396, + "learning_rate": 2.0582509326565324e-06, + "loss": 2.0977, + "step": 23912 + }, + { + "epoch": 0.8, + "grad_norm": 0.7896616458892822, + "learning_rate": 2.0576050689128734e-06, + "loss": 2.0654, + "step": 23913 + }, + { + "epoch": 0.8, + "grad_norm": 0.760249137878418, + "learning_rate": 2.0569592948977413e-06, + "loss": 2.0675, + "step": 23914 + }, + { + "epoch": 0.8, + "grad_norm": 0.7379205226898193, + "learning_rate": 2.056313610618428e-06, + "loss": 2.0539, + "step": 23915 + }, + { + "epoch": 0.8, + "grad_norm": 0.7622074484825134, + "learning_rate": 2.055668016082224e-06, + "loss": 2.0356, + "step": 23916 + }, + { + "epoch": 0.8, + "grad_norm": 0.7344079613685608, + "learning_rate": 2.0550225112964283e-06, + "loss": 2.0482, + "step": 23917 + }, + { + "epoch": 0.8, + "grad_norm": 0.726259708404541, + "learning_rate": 2.0543770962683363e-06, + "loss": 1.9909, + "step": 23918 + }, + { + "epoch": 0.8, + "grad_norm": 0.7238035798072815, + "learning_rate": 2.0537317710052305e-06, + "loss": 2.0489, + "step": 23919 + }, + { + "epoch": 0.8, + "grad_norm": 0.7438998818397522, + "learning_rate": 2.053086535514406e-06, + "loss": 2.0523, + "step": 23920 + }, + { + "epoch": 0.8, + "grad_norm": 0.7411240339279175, + "learning_rate": 2.052441389803156e-06, + "loss": 1.9882, + "step": 23921 + }, + { + "epoch": 0.8, + "grad_norm": 0.7896168231964111, + "learning_rate": 2.0517963338787617e-06, + "loss": 2.1195, + "step": 23922 + }, + { + "epoch": 0.8, + "grad_norm": 0.7211143970489502, + "learning_rate": 2.0511513677485173e-06, + "loss": 1.9445, + "step": 23923 + }, + { + "epoch": 0.8, + "grad_norm": 0.7509300112724304, + "learning_rate": 2.0505064914197036e-06, + "loss": 2.0168, + "step": 23924 + }, + { + "epoch": 0.8, + "grad_norm": 0.7656954526901245, + "learning_rate": 2.0498617048996117e-06, + "loss": 2.059, + "step": 23925 + }, + { + "epoch": 0.8, + "grad_norm": 0.7557909488677979, + "learning_rate": 2.04921700819552e-06, + "loss": 2.0314, + "step": 23926 + }, + { + "epoch": 0.8, + "grad_norm": 0.7507382035255432, + "learning_rate": 2.048572401314718e-06, + "loss": 2.0292, + "step": 23927 + }, + { + "epoch": 0.8, + "grad_norm": 0.7456069588661194, + "learning_rate": 2.0479278842644846e-06, + "loss": 2.0447, + "step": 23928 + }, + { + "epoch": 0.8, + "grad_norm": 0.741738498210907, + "learning_rate": 2.0472834570520983e-06, + "loss": 2.0075, + "step": 23929 + }, + { + "epoch": 0.8, + "grad_norm": 0.7369498610496521, + "learning_rate": 2.0466391196848432e-06, + "loss": 2.0073, + "step": 23930 + }, + { + "epoch": 0.8, + "grad_norm": 0.7572205066680908, + "learning_rate": 2.0459948721700016e-06, + "loss": 1.9837, + "step": 23931 + }, + { + "epoch": 0.8, + "grad_norm": 0.7596014738082886, + "learning_rate": 2.0453507145148487e-06, + "loss": 2.1312, + "step": 23932 + }, + { + "epoch": 0.8, + "grad_norm": 0.7297394871711731, + "learning_rate": 2.0447066467266576e-06, + "loss": 2.0573, + "step": 23933 + }, + { + "epoch": 0.8, + "grad_norm": 0.7534695863723755, + "learning_rate": 2.0440626688127117e-06, + "loss": 2.021, + "step": 23934 + }, + { + "epoch": 0.8, + "grad_norm": 0.7488219141960144, + "learning_rate": 2.043418780780281e-06, + "loss": 2.0653, + "step": 23935 + }, + { + "epoch": 0.8, + "grad_norm": 0.7560086250305176, + "learning_rate": 2.0427749826366438e-06, + "loss": 2.0519, + "step": 23936 + }, + { + "epoch": 0.8, + "grad_norm": 0.7344833612442017, + "learning_rate": 2.042131274389072e-06, + "loss": 2.0144, + "step": 23937 + }, + { + "epoch": 0.8, + "grad_norm": 0.7489877343177795, + "learning_rate": 2.041487656044834e-06, + "loss": 2.0662, + "step": 23938 + }, + { + "epoch": 0.8, + "grad_norm": 0.7554758787155151, + "learning_rate": 2.0408441276112047e-06, + "loss": 2.001, + "step": 23939 + }, + { + "epoch": 0.8, + "grad_norm": 0.7404783964157104, + "learning_rate": 2.040200689095456e-06, + "loss": 2.0867, + "step": 23940 + }, + { + "epoch": 0.8, + "grad_norm": 0.762158989906311, + "learning_rate": 2.0395573405048564e-06, + "loss": 2.0866, + "step": 23941 + }, + { + "epoch": 0.8, + "grad_norm": 0.8025885224342346, + "learning_rate": 2.038914081846669e-06, + "loss": 2.014, + "step": 23942 + }, + { + "epoch": 0.8, + "grad_norm": 0.7783696055412292, + "learning_rate": 2.0382709131281674e-06, + "loss": 2.0228, + "step": 23943 + }, + { + "epoch": 0.8, + "grad_norm": 0.7527107000350952, + "learning_rate": 2.0376278343566125e-06, + "loss": 2.0065, + "step": 23944 + }, + { + "epoch": 0.8, + "grad_norm": 0.7369482517242432, + "learning_rate": 2.036984845539275e-06, + "loss": 2.0645, + "step": 23945 + }, + { + "epoch": 0.8, + "grad_norm": 0.7583514451980591, + "learning_rate": 2.0363419466834122e-06, + "loss": 1.9463, + "step": 23946 + }, + { + "epoch": 0.8, + "grad_norm": 0.7693171501159668, + "learning_rate": 2.0356991377962944e-06, + "loss": 1.9586, + "step": 23947 + }, + { + "epoch": 0.8, + "grad_norm": 0.7182508111000061, + "learning_rate": 2.0350564188851773e-06, + "loss": 2.0296, + "step": 23948 + }, + { + "epoch": 0.8, + "grad_norm": 0.737296998500824, + "learning_rate": 2.034413789957328e-06, + "loss": 1.9869, + "step": 23949 + }, + { + "epoch": 0.8, + "grad_norm": 0.7439113259315491, + "learning_rate": 2.0337712510200026e-06, + "loss": 2.0601, + "step": 23950 + }, + { + "epoch": 0.8, + "grad_norm": 0.7364987134933472, + "learning_rate": 2.0331288020804585e-06, + "loss": 2.031, + "step": 23951 + }, + { + "epoch": 0.8, + "grad_norm": 0.739911675453186, + "learning_rate": 2.032486443145957e-06, + "loss": 2.0718, + "step": 23952 + }, + { + "epoch": 0.8, + "grad_norm": 0.7292237877845764, + "learning_rate": 2.031844174223756e-06, + "loss": 2.0703, + "step": 23953 + }, + { + "epoch": 0.8, + "grad_norm": 0.7598031163215637, + "learning_rate": 2.031201995321109e-06, + "loss": 2.0274, + "step": 23954 + }, + { + "epoch": 0.8, + "grad_norm": 0.7216752171516418, + "learning_rate": 2.030559906445271e-06, + "loss": 1.9849, + "step": 23955 + }, + { + "epoch": 0.8, + "grad_norm": 0.7204724550247192, + "learning_rate": 2.029917907603498e-06, + "loss": 2.0308, + "step": 23956 + }, + { + "epoch": 0.8, + "grad_norm": 0.7365097403526306, + "learning_rate": 2.0292759988030386e-06, + "loss": 2.0945, + "step": 23957 + }, + { + "epoch": 0.8, + "grad_norm": 0.7813359498977661, + "learning_rate": 2.028634180051151e-06, + "loss": 2.0448, + "step": 23958 + }, + { + "epoch": 0.8, + "grad_norm": 0.7346799969673157, + "learning_rate": 2.027992451355083e-06, + "loss": 2.0198, + "step": 23959 + }, + { + "epoch": 0.8, + "grad_norm": 0.7495178580284119, + "learning_rate": 2.027350812722081e-06, + "loss": 1.9696, + "step": 23960 + }, + { + "epoch": 0.8, + "grad_norm": 0.764894425868988, + "learning_rate": 2.0267092641593965e-06, + "loss": 2.0763, + "step": 23961 + }, + { + "epoch": 0.8, + "grad_norm": 0.7430404424667358, + "learning_rate": 2.0260678056742822e-06, + "loss": 1.9986, + "step": 23962 + }, + { + "epoch": 0.8, + "grad_norm": 0.7402697801589966, + "learning_rate": 2.0254264372739798e-06, + "loss": 2.0868, + "step": 23963 + }, + { + "epoch": 0.8, + "grad_norm": 0.7406750917434692, + "learning_rate": 2.024785158965733e-06, + "loss": 2.0541, + "step": 23964 + }, + { + "epoch": 0.8, + "grad_norm": 0.7259573340415955, + "learning_rate": 2.0241439707567925e-06, + "loss": 2.0298, + "step": 23965 + }, + { + "epoch": 0.8, + "grad_norm": 0.7383511066436768, + "learning_rate": 2.023502872654396e-06, + "loss": 2.0554, + "step": 23966 + }, + { + "epoch": 0.8, + "grad_norm": 0.7651811838150024, + "learning_rate": 2.0228618646657928e-06, + "loss": 2.0339, + "step": 23967 + }, + { + "epoch": 0.8, + "grad_norm": 0.7312690019607544, + "learning_rate": 2.022220946798218e-06, + "loss": 2.0476, + "step": 23968 + }, + { + "epoch": 0.8, + "grad_norm": 0.7294860482215881, + "learning_rate": 2.0215801190589177e-06, + "loss": 2.0607, + "step": 23969 + }, + { + "epoch": 0.8, + "grad_norm": 0.7321736812591553, + "learning_rate": 2.020939381455128e-06, + "loss": 2.0551, + "step": 23970 + }, + { + "epoch": 0.8, + "grad_norm": 0.7419860363006592, + "learning_rate": 2.0202987339940918e-06, + "loss": 2.0776, + "step": 23971 + }, + { + "epoch": 0.8, + "grad_norm": 0.7457311153411865, + "learning_rate": 2.0196581766830425e-06, + "loss": 2.0614, + "step": 23972 + }, + { + "epoch": 0.8, + "grad_norm": 0.7435533404350281, + "learning_rate": 2.0190177095292163e-06, + "loss": 2.0195, + "step": 23973 + }, + { + "epoch": 0.8, + "grad_norm": 0.7520368695259094, + "learning_rate": 2.0183773325398505e-06, + "loss": 2.1257, + "step": 23974 + }, + { + "epoch": 0.8, + "grad_norm": 0.778453528881073, + "learning_rate": 2.017737045722182e-06, + "loss": 2.0282, + "step": 23975 + }, + { + "epoch": 0.8, + "grad_norm": 0.7452737092971802, + "learning_rate": 2.017096849083443e-06, + "loss": 2.1122, + "step": 23976 + }, + { + "epoch": 0.8, + "grad_norm": 0.7365851998329163, + "learning_rate": 2.0164567426308634e-06, + "loss": 1.9858, + "step": 23977 + }, + { + "epoch": 0.8, + "grad_norm": 0.7520131468772888, + "learning_rate": 2.0158167263716786e-06, + "loss": 2.0711, + "step": 23978 + }, + { + "epoch": 0.8, + "grad_norm": 0.7548579573631287, + "learning_rate": 2.0151768003131145e-06, + "loss": 2.0226, + "step": 23979 + }, + { + "epoch": 0.8, + "grad_norm": 0.7525720000267029, + "learning_rate": 2.0145369644624056e-06, + "loss": 2.0466, + "step": 23980 + }, + { + "epoch": 0.8, + "grad_norm": 0.7186271548271179, + "learning_rate": 2.0138972188267793e-06, + "loss": 2.0355, + "step": 23981 + }, + { + "epoch": 0.8, + "grad_norm": 0.7449716925621033, + "learning_rate": 2.0132575634134577e-06, + "loss": 2.0929, + "step": 23982 + }, + { + "epoch": 0.8, + "grad_norm": 0.7404311299324036, + "learning_rate": 2.0126179982296724e-06, + "loss": 1.9712, + "step": 23983 + }, + { + "epoch": 0.8, + "grad_norm": 0.7164410948753357, + "learning_rate": 2.0119785232826503e-06, + "loss": 2.0172, + "step": 23984 + }, + { + "epoch": 0.8, + "grad_norm": 0.7498258352279663, + "learning_rate": 2.0113391385796145e-06, + "loss": 2.0528, + "step": 23985 + }, + { + "epoch": 0.8, + "grad_norm": 0.7182027101516724, + "learning_rate": 2.0106998441277837e-06, + "loss": 2.0678, + "step": 23986 + }, + { + "epoch": 0.8, + "grad_norm": 0.7292402982711792, + "learning_rate": 2.010060639934386e-06, + "loss": 2.0712, + "step": 23987 + }, + { + "epoch": 0.8, + "grad_norm": 0.7549788355827332, + "learning_rate": 2.0094215260066383e-06, + "loss": 1.9644, + "step": 23988 + }, + { + "epoch": 0.8, + "grad_norm": 0.7361125349998474, + "learning_rate": 2.0087825023517673e-06, + "loss": 2.0284, + "step": 23989 + }, + { + "epoch": 0.8, + "grad_norm": 0.7673788070678711, + "learning_rate": 2.0081435689769834e-06, + "loss": 2.0623, + "step": 23990 + }, + { + "epoch": 0.8, + "grad_norm": 0.7537578344345093, + "learning_rate": 2.007504725889514e-06, + "loss": 2.0111, + "step": 23991 + }, + { + "epoch": 0.8, + "grad_norm": 0.7365519404411316, + "learning_rate": 2.006865973096569e-06, + "loss": 2.0056, + "step": 23992 + }, + { + "epoch": 0.8, + "grad_norm": 0.740678608417511, + "learning_rate": 2.00622731060537e-06, + "loss": 2.0053, + "step": 23993 + }, + { + "epoch": 0.8, + "grad_norm": 0.7439206838607788, + "learning_rate": 2.005588738423131e-06, + "loss": 2.1465, + "step": 23994 + }, + { + "epoch": 0.8, + "grad_norm": 0.7328673601150513, + "learning_rate": 2.0049502565570612e-06, + "loss": 2.0286, + "step": 23995 + }, + { + "epoch": 0.8, + "grad_norm": 0.7167171835899353, + "learning_rate": 2.004311865014379e-06, + "loss": 2.0352, + "step": 23996 + }, + { + "epoch": 0.8, + "grad_norm": 0.7395989894866943, + "learning_rate": 2.0036735638022976e-06, + "loss": 2.098, + "step": 23997 + }, + { + "epoch": 0.8, + "grad_norm": 0.7301743030548096, + "learning_rate": 2.0030353529280267e-06, + "loss": 1.9987, + "step": 23998 + }, + { + "epoch": 0.8, + "grad_norm": 0.7665370106697083, + "learning_rate": 2.002397232398772e-06, + "loss": 2.0615, + "step": 23999 + }, + { + "epoch": 0.8, + "grad_norm": 0.7669080495834351, + "learning_rate": 2.001759202221749e-06, + "loss": 1.9261, + "step": 24000 + }, + { + "epoch": 0.8, + "grad_norm": 0.7636668682098389, + "learning_rate": 2.0011212624041622e-06, + "loss": 2.0242, + "step": 24001 + }, + { + "epoch": 0.8, + "grad_norm": 0.7591378688812256, + "learning_rate": 2.00048341295322e-06, + "loss": 2.0487, + "step": 24002 + }, + { + "epoch": 0.8, + "grad_norm": 0.752450704574585, + "learning_rate": 1.999845653876129e-06, + "loss": 1.9827, + "step": 24003 + }, + { + "epoch": 0.8, + "grad_norm": 0.7688359022140503, + "learning_rate": 1.9992079851800905e-06, + "loss": 2.0877, + "step": 24004 + }, + { + "epoch": 0.8, + "grad_norm": 0.7523253560066223, + "learning_rate": 1.998570406872311e-06, + "loss": 2.0538, + "step": 24005 + }, + { + "epoch": 0.8, + "grad_norm": 0.751998782157898, + "learning_rate": 1.9979329189599972e-06, + "loss": 2.0463, + "step": 24006 + }, + { + "epoch": 0.8, + "grad_norm": 0.7635302543640137, + "learning_rate": 1.9972955214503476e-06, + "loss": 2.0943, + "step": 24007 + }, + { + "epoch": 0.8, + "grad_norm": 0.7524195313453674, + "learning_rate": 1.9966582143505595e-06, + "loss": 2.1036, + "step": 24008 + }, + { + "epoch": 0.8, + "grad_norm": 0.7478085160255432, + "learning_rate": 1.996020997667837e-06, + "loss": 2.0446, + "step": 24009 + }, + { + "epoch": 0.8, + "grad_norm": 0.7267443537712097, + "learning_rate": 1.995383871409381e-06, + "loss": 2.0668, + "step": 24010 + }, + { + "epoch": 0.8, + "grad_norm": 0.7589496970176697, + "learning_rate": 1.9947468355823876e-06, + "loss": 2.0455, + "step": 24011 + }, + { + "epoch": 0.8, + "grad_norm": 0.7430739998817444, + "learning_rate": 1.994109890194049e-06, + "loss": 2.0173, + "step": 24012 + }, + { + "epoch": 0.8, + "grad_norm": 0.7363247871398926, + "learning_rate": 1.9934730352515685e-06, + "loss": 2.0629, + "step": 24013 + }, + { + "epoch": 0.8, + "grad_norm": 0.7687974572181702, + "learning_rate": 1.992836270762134e-06, + "loss": 2.0244, + "step": 24014 + }, + { + "epoch": 0.8, + "grad_norm": 0.7530408501625061, + "learning_rate": 1.992199596732943e-06, + "loss": 2.0465, + "step": 24015 + }, + { + "epoch": 0.8, + "grad_norm": 0.7589410543441772, + "learning_rate": 1.991563013171194e-06, + "loss": 2.034, + "step": 24016 + }, + { + "epoch": 0.8, + "grad_norm": 0.7424665093421936, + "learning_rate": 1.9909265200840667e-06, + "loss": 2.1034, + "step": 24017 + }, + { + "epoch": 0.8, + "grad_norm": 0.7523882389068604, + "learning_rate": 1.990290117478757e-06, + "loss": 2.1157, + "step": 24018 + }, + { + "epoch": 0.8, + "grad_norm": 0.7241427898406982, + "learning_rate": 1.989653805362459e-06, + "loss": 2.034, + "step": 24019 + }, + { + "epoch": 0.8, + "grad_norm": 0.7740873098373413, + "learning_rate": 1.9890175837423573e-06, + "loss": 2.0913, + "step": 24020 + }, + { + "epoch": 0.8, + "grad_norm": 0.7444481253623962, + "learning_rate": 1.9883814526256384e-06, + "loss": 1.991, + "step": 24021 + }, + { + "epoch": 0.8, + "grad_norm": 0.7467379570007324, + "learning_rate": 1.987745412019493e-06, + "loss": 1.9274, + "step": 24022 + }, + { + "epoch": 0.8, + "grad_norm": 0.7424873113632202, + "learning_rate": 1.9871094619311005e-06, + "loss": 2.0276, + "step": 24023 + }, + { + "epoch": 0.8, + "grad_norm": 0.7617087960243225, + "learning_rate": 1.9864736023676522e-06, + "loss": 2.1217, + "step": 24024 + }, + { + "epoch": 0.8, + "grad_norm": 0.7388319969177246, + "learning_rate": 1.985837833336327e-06, + "loss": 2.0002, + "step": 24025 + }, + { + "epoch": 0.8, + "grad_norm": 0.7753557562828064, + "learning_rate": 1.985202154844311e-06, + "loss": 2.0652, + "step": 24026 + }, + { + "epoch": 0.8, + "grad_norm": 0.7638347148895264, + "learning_rate": 1.9845665668987825e-06, + "loss": 2.0427, + "step": 24027 + }, + { + "epoch": 0.8, + "grad_norm": 0.759086549282074, + "learning_rate": 1.9839310695069248e-06, + "loss": 2.0997, + "step": 24028 + }, + { + "epoch": 0.8, + "grad_norm": 0.752033531665802, + "learning_rate": 1.983295662675916e-06, + "loss": 1.9378, + "step": 24029 + }, + { + "epoch": 0.8, + "grad_norm": 0.735769510269165, + "learning_rate": 1.9826603464129324e-06, + "loss": 2.0728, + "step": 24030 + }, + { + "epoch": 0.8, + "grad_norm": 0.7381555438041687, + "learning_rate": 1.982025120725154e-06, + "loss": 1.9881, + "step": 24031 + }, + { + "epoch": 0.8, + "grad_norm": 0.7404716610908508, + "learning_rate": 1.981389985619758e-06, + "loss": 2.058, + "step": 24032 + }, + { + "epoch": 0.8, + "grad_norm": 0.7416315674781799, + "learning_rate": 1.9807549411039204e-06, + "loss": 1.9967, + "step": 24033 + }, + { + "epoch": 0.8, + "grad_norm": 0.7627708315849304, + "learning_rate": 1.98011998718481e-06, + "loss": 1.9754, + "step": 24034 + }, + { + "epoch": 0.8, + "grad_norm": 0.749401867389679, + "learning_rate": 1.9794851238696066e-06, + "loss": 2.0708, + "step": 24035 + }, + { + "epoch": 0.8, + "grad_norm": 0.7820358872413635, + "learning_rate": 1.978850351165478e-06, + "loss": 1.96, + "step": 24036 + }, + { + "epoch": 0.8, + "grad_norm": 0.7129834890365601, + "learning_rate": 1.978215669079596e-06, + "loss": 1.9931, + "step": 24037 + }, + { + "epoch": 0.8, + "grad_norm": 0.764801561832428, + "learning_rate": 1.9775810776191372e-06, + "loss": 2.0179, + "step": 24038 + }, + { + "epoch": 0.8, + "grad_norm": 0.7585146427154541, + "learning_rate": 1.9769465767912622e-06, + "loss": 2.0483, + "step": 24039 + }, + { + "epoch": 0.8, + "grad_norm": 0.7263974547386169, + "learning_rate": 1.9763121666031416e-06, + "loss": 2.0331, + "step": 24040 + }, + { + "epoch": 0.8, + "grad_norm": 0.7646559476852417, + "learning_rate": 1.9756778470619463e-06, + "loss": 2.0819, + "step": 24041 + }, + { + "epoch": 0.8, + "grad_norm": 0.7539601922035217, + "learning_rate": 1.9750436181748413e-06, + "loss": 2.0248, + "step": 24042 + }, + { + "epoch": 0.8, + "grad_norm": 0.762263834476471, + "learning_rate": 1.9744094799489868e-06, + "loss": 1.9964, + "step": 24043 + }, + { + "epoch": 0.8, + "grad_norm": 0.7268050312995911, + "learning_rate": 1.9737754323915527e-06, + "loss": 2.0301, + "step": 24044 + }, + { + "epoch": 0.8, + "grad_norm": 0.7425455451011658, + "learning_rate": 1.973141475509698e-06, + "loss": 1.9889, + "step": 24045 + }, + { + "epoch": 0.8, + "grad_norm": 0.74796062707901, + "learning_rate": 1.972507609310587e-06, + "loss": 2.0551, + "step": 24046 + }, + { + "epoch": 0.8, + "grad_norm": 0.7582500576972961, + "learning_rate": 1.971873833801382e-06, + "loss": 2.0166, + "step": 24047 + }, + { + "epoch": 0.8, + "grad_norm": 0.7322655320167542, + "learning_rate": 1.971240148989242e-06, + "loss": 2.0465, + "step": 24048 + }, + { + "epoch": 0.8, + "grad_norm": 0.73333340883255, + "learning_rate": 1.9706065548813235e-06, + "loss": 2.03, + "step": 24049 + }, + { + "epoch": 0.8, + "grad_norm": 0.7201894521713257, + "learning_rate": 1.9699730514847882e-06, + "loss": 2.0407, + "step": 24050 + }, + { + "epoch": 0.8, + "grad_norm": 0.7542328834533691, + "learning_rate": 1.969339638806792e-06, + "loss": 2.0515, + "step": 24051 + }, + { + "epoch": 0.8, + "grad_norm": 0.726930558681488, + "learning_rate": 1.968706316854487e-06, + "loss": 2.0366, + "step": 24052 + }, + { + "epoch": 0.8, + "grad_norm": 0.7783737778663635, + "learning_rate": 1.9680730856350315e-06, + "loss": 2.044, + "step": 24053 + }, + { + "epoch": 0.8, + "grad_norm": 0.7424106001853943, + "learning_rate": 1.9674399451555813e-06, + "loss": 2.0433, + "step": 24054 + }, + { + "epoch": 0.8, + "grad_norm": 0.7547112703323364, + "learning_rate": 1.966806895423288e-06, + "loss": 2.0384, + "step": 24055 + }, + { + "epoch": 0.8, + "grad_norm": 0.7293472290039062, + "learning_rate": 1.966173936445299e-06, + "loss": 2.1143, + "step": 24056 + }, + { + "epoch": 0.8, + "grad_norm": 0.7412411570549011, + "learning_rate": 1.9655410682287713e-06, + "loss": 2.0596, + "step": 24057 + }, + { + "epoch": 0.8, + "grad_norm": 0.7491490244865417, + "learning_rate": 1.9649082907808494e-06, + "loss": 2.0692, + "step": 24058 + }, + { + "epoch": 0.8, + "grad_norm": 0.7542949914932251, + "learning_rate": 1.964275604108684e-06, + "loss": 2.0711, + "step": 24059 + }, + { + "epoch": 0.8, + "grad_norm": 0.7532376646995544, + "learning_rate": 1.96364300821943e-06, + "loss": 2.0434, + "step": 24060 + }, + { + "epoch": 0.8, + "grad_norm": 0.7357166409492493, + "learning_rate": 1.9630105031202217e-06, + "loss": 2.065, + "step": 24061 + }, + { + "epoch": 0.8, + "grad_norm": 0.737655520439148, + "learning_rate": 1.9623780888182107e-06, + "loss": 2.0546, + "step": 24062 + }, + { + "epoch": 0.8, + "grad_norm": 0.7477555274963379, + "learning_rate": 1.961745765320544e-06, + "loss": 2.0553, + "step": 24063 + }, + { + "epoch": 0.8, + "grad_norm": 0.742662787437439, + "learning_rate": 1.961113532634362e-06, + "loss": 2.0169, + "step": 24064 + }, + { + "epoch": 0.8, + "grad_norm": 0.7546602487564087, + "learning_rate": 1.9604813907668064e-06, + "loss": 2.0364, + "step": 24065 + }, + { + "epoch": 0.8, + "grad_norm": 0.7241734266281128, + "learning_rate": 1.9598493397250227e-06, + "loss": 2.0045, + "step": 24066 + }, + { + "epoch": 0.8, + "grad_norm": 0.7701930999755859, + "learning_rate": 1.9592173795161474e-06, + "loss": 2.0467, + "step": 24067 + }, + { + "epoch": 0.8, + "grad_norm": 0.7520945072174072, + "learning_rate": 1.9585855101473206e-06, + "loss": 2.0838, + "step": 24068 + }, + { + "epoch": 0.8, + "grad_norm": 0.7686367630958557, + "learning_rate": 1.957953731625686e-06, + "loss": 2.003, + "step": 24069 + }, + { + "epoch": 0.8, + "grad_norm": 0.7123660445213318, + "learning_rate": 1.957322043958375e-06, + "loss": 1.9866, + "step": 24070 + }, + { + "epoch": 0.8, + "grad_norm": 0.7771791815757751, + "learning_rate": 1.956690447152525e-06, + "loss": 2.0625, + "step": 24071 + }, + { + "epoch": 0.8, + "grad_norm": 0.7729154229164124, + "learning_rate": 1.956058941215274e-06, + "loss": 2.0498, + "step": 24072 + }, + { + "epoch": 0.8, + "grad_norm": 0.7548573613166809, + "learning_rate": 1.955427526153756e-06, + "loss": 2.1009, + "step": 24073 + }, + { + "epoch": 0.8, + "grad_norm": 0.7320715188980103, + "learning_rate": 1.9547962019751e-06, + "loss": 1.9936, + "step": 24074 + }, + { + "epoch": 0.8, + "grad_norm": 0.7527363896369934, + "learning_rate": 1.9541649686864417e-06, + "loss": 1.9876, + "step": 24075 + }, + { + "epoch": 0.8, + "grad_norm": 0.7464690804481506, + "learning_rate": 1.9535338262949154e-06, + "loss": 2.0665, + "step": 24076 + }, + { + "epoch": 0.8, + "grad_norm": 0.764543354511261, + "learning_rate": 1.9529027748076447e-06, + "loss": 2.0181, + "step": 24077 + }, + { + "epoch": 0.8, + "grad_norm": 0.7448415756225586, + "learning_rate": 1.9522718142317655e-06, + "loss": 1.9835, + "step": 24078 + }, + { + "epoch": 0.8, + "grad_norm": 0.7496123909950256, + "learning_rate": 1.9516409445744035e-06, + "loss": 2.0568, + "step": 24079 + }, + { + "epoch": 0.8, + "grad_norm": 0.7689026594161987, + "learning_rate": 1.9510101658426817e-06, + "loss": 2.1025, + "step": 24080 + }, + { + "epoch": 0.8, + "grad_norm": 0.7579641342163086, + "learning_rate": 1.95037947804373e-06, + "loss": 2.0651, + "step": 24081 + }, + { + "epoch": 0.8, + "grad_norm": 0.7261137962341309, + "learning_rate": 1.949748881184679e-06, + "loss": 2.0402, + "step": 24082 + }, + { + "epoch": 0.8, + "grad_norm": 0.7577558159828186, + "learning_rate": 1.9491183752726416e-06, + "loss": 2.0883, + "step": 24083 + }, + { + "epoch": 0.8, + "grad_norm": 0.7400363683700562, + "learning_rate": 1.9484879603147464e-06, + "loss": 2.0284, + "step": 24084 + }, + { + "epoch": 0.8, + "grad_norm": 0.754889726638794, + "learning_rate": 1.947857636318119e-06, + "loss": 2.0648, + "step": 24085 + }, + { + "epoch": 0.8, + "grad_norm": 0.7724670171737671, + "learning_rate": 1.9472274032898764e-06, + "loss": 2.1193, + "step": 24086 + }, + { + "epoch": 0.8, + "grad_norm": 0.7386720776557922, + "learning_rate": 1.9465972612371364e-06, + "loss": 2.036, + "step": 24087 + }, + { + "epoch": 0.8, + "grad_norm": 0.7616339921951294, + "learning_rate": 1.9459672101670247e-06, + "loss": 2.0859, + "step": 24088 + }, + { + "epoch": 0.8, + "grad_norm": 0.7527587413787842, + "learning_rate": 1.9453372500866507e-06, + "loss": 2.0723, + "step": 24089 + }, + { + "epoch": 0.8, + "grad_norm": 0.7683152556419373, + "learning_rate": 1.944707381003138e-06, + "loss": 2.0509, + "step": 24090 + }, + { + "epoch": 0.8, + "grad_norm": 0.7492933869361877, + "learning_rate": 1.9440776029236018e-06, + "loss": 2.0478, + "step": 24091 + }, + { + "epoch": 0.8, + "grad_norm": 0.7381847500801086, + "learning_rate": 1.9434479158551557e-06, + "loss": 2.0987, + "step": 24092 + }, + { + "epoch": 0.8, + "grad_norm": 0.7532115578651428, + "learning_rate": 1.942818319804911e-06, + "loss": 2.0831, + "step": 24093 + }, + { + "epoch": 0.8, + "grad_norm": 0.7682026624679565, + "learning_rate": 1.942188814779986e-06, + "loss": 2.0932, + "step": 24094 + }, + { + "epoch": 0.8, + "grad_norm": 0.7594884634017944, + "learning_rate": 1.941559400787488e-06, + "loss": 2.0073, + "step": 24095 + }, + { + "epoch": 0.8, + "grad_norm": 0.7494890689849854, + "learning_rate": 1.9409300778345287e-06, + "loss": 2.0255, + "step": 24096 + }, + { + "epoch": 0.8, + "grad_norm": 0.7350327372550964, + "learning_rate": 1.9403008459282167e-06, + "loss": 1.9862, + "step": 24097 + }, + { + "epoch": 0.8, + "grad_norm": 0.746798574924469, + "learning_rate": 1.9396717050756654e-06, + "loss": 2.1184, + "step": 24098 + }, + { + "epoch": 0.8, + "grad_norm": 0.7848989963531494, + "learning_rate": 1.939042655283977e-06, + "loss": 2.0574, + "step": 24099 + }, + { + "epoch": 0.8, + "grad_norm": 0.7219083309173584, + "learning_rate": 1.938413696560263e-06, + "loss": 2.0618, + "step": 24100 + }, + { + "epoch": 0.8, + "grad_norm": 0.7361511588096619, + "learning_rate": 1.9377848289116263e-06, + "loss": 2.0771, + "step": 24101 + }, + { + "epoch": 0.8, + "grad_norm": 0.7353638410568237, + "learning_rate": 1.937156052345168e-06, + "loss": 2.0397, + "step": 24102 + }, + { + "epoch": 0.8, + "grad_norm": 0.737454891204834, + "learning_rate": 1.9365273668679974e-06, + "loss": 2.0409, + "step": 24103 + }, + { + "epoch": 0.8, + "grad_norm": 0.7594074606895447, + "learning_rate": 1.935898772487219e-06, + "loss": 2.0741, + "step": 24104 + }, + { + "epoch": 0.8, + "grad_norm": 0.7397553324699402, + "learning_rate": 1.9352702692099256e-06, + "loss": 2.0583, + "step": 24105 + }, + { + "epoch": 0.8, + "grad_norm": 0.7256757020950317, + "learning_rate": 1.9346418570432213e-06, + "loss": 2.0659, + "step": 24106 + }, + { + "epoch": 0.8, + "grad_norm": 0.7591580748558044, + "learning_rate": 1.93401353599421e-06, + "loss": 1.9941, + "step": 24107 + }, + { + "epoch": 0.8, + "grad_norm": 0.7584403157234192, + "learning_rate": 1.933385306069986e-06, + "loss": 1.9703, + "step": 24108 + }, + { + "epoch": 0.8, + "grad_norm": 0.7407048940658569, + "learning_rate": 1.9327571672776456e-06, + "loss": 2.0749, + "step": 24109 + }, + { + "epoch": 0.8, + "grad_norm": 0.7671871781349182, + "learning_rate": 1.9321291196242865e-06, + "loss": 2.0419, + "step": 24110 + }, + { + "epoch": 0.8, + "grad_norm": 0.7658242583274841, + "learning_rate": 1.9315011631170067e-06, + "loss": 2.0648, + "step": 24111 + }, + { + "epoch": 0.8, + "grad_norm": 0.7539470791816711, + "learning_rate": 1.930873297762895e-06, + "loss": 2.0399, + "step": 24112 + }, + { + "epoch": 0.8, + "grad_norm": 0.7473941445350647, + "learning_rate": 1.9302455235690522e-06, + "loss": 2.0335, + "step": 24113 + }, + { + "epoch": 0.8, + "grad_norm": 0.7680037617683411, + "learning_rate": 1.929617840542565e-06, + "loss": 2.0427, + "step": 24114 + }, + { + "epoch": 0.8, + "grad_norm": 0.7454906105995178, + "learning_rate": 1.928990248690523e-06, + "loss": 2.065, + "step": 24115 + }, + { + "epoch": 0.8, + "grad_norm": 0.7474365830421448, + "learning_rate": 1.9283627480200196e-06, + "loss": 2.0427, + "step": 24116 + }, + { + "epoch": 0.8, + "grad_norm": 0.7344571948051453, + "learning_rate": 1.9277353385381483e-06, + "loss": 1.9783, + "step": 24117 + }, + { + "epoch": 0.8, + "grad_norm": 0.7475095987319946, + "learning_rate": 1.9271080202519864e-06, + "loss": 2.0415, + "step": 24118 + }, + { + "epoch": 0.8, + "grad_norm": 0.7664642930030823, + "learning_rate": 1.926480793168628e-06, + "loss": 2.0616, + "step": 24119 + }, + { + "epoch": 0.8, + "grad_norm": 0.7399535179138184, + "learning_rate": 1.9258536572951605e-06, + "loss": 2.1128, + "step": 24120 + }, + { + "epoch": 0.8, + "grad_norm": 0.7486439943313599, + "learning_rate": 1.925226612638663e-06, + "loss": 2.0144, + "step": 24121 + }, + { + "epoch": 0.8, + "grad_norm": 0.7437174916267395, + "learning_rate": 1.9245996592062266e-06, + "loss": 2.0846, + "step": 24122 + }, + { + "epoch": 0.8, + "grad_norm": 0.723362922668457, + "learning_rate": 1.9239727970049306e-06, + "loss": 2.0121, + "step": 24123 + }, + { + "epoch": 0.8, + "grad_norm": 0.7494576573371887, + "learning_rate": 1.9233460260418533e-06, + "loss": 2.1364, + "step": 24124 + }, + { + "epoch": 0.8, + "grad_norm": 0.7557364106178284, + "learning_rate": 1.9227193463240802e-06, + "loss": 2.0217, + "step": 24125 + }, + { + "epoch": 0.8, + "grad_norm": 0.7501938343048096, + "learning_rate": 1.9220927578586924e-06, + "loss": 2.0727, + "step": 24126 + }, + { + "epoch": 0.8, + "grad_norm": 0.7384154796600342, + "learning_rate": 1.921466260652767e-06, + "loss": 1.9668, + "step": 24127 + }, + { + "epoch": 0.8, + "grad_norm": 0.7558481097221375, + "learning_rate": 1.9208398547133778e-06, + "loss": 2.0331, + "step": 24128 + }, + { + "epoch": 0.8, + "grad_norm": 0.7469013333320618, + "learning_rate": 1.9202135400476073e-06, + "loss": 2.112, + "step": 24129 + }, + { + "epoch": 0.8, + "grad_norm": 0.7902927994728088, + "learning_rate": 1.919587316662528e-06, + "loss": 2.0786, + "step": 24130 + }, + { + "epoch": 0.8, + "grad_norm": 0.7178947329521179, + "learning_rate": 1.9189611845652166e-06, + "loss": 1.9932, + "step": 24131 + }, + { + "epoch": 0.8, + "grad_norm": 0.7487528920173645, + "learning_rate": 1.918335143762744e-06, + "loss": 2.1002, + "step": 24132 + }, + { + "epoch": 0.8, + "grad_norm": 0.7504995465278625, + "learning_rate": 1.917709194262187e-06, + "loss": 2.0312, + "step": 24133 + }, + { + "epoch": 0.8, + "grad_norm": 0.7460945844650269, + "learning_rate": 1.9170833360706133e-06, + "loss": 2.0274, + "step": 24134 + }, + { + "epoch": 0.8, + "grad_norm": 0.7296907305717468, + "learning_rate": 1.916457569195097e-06, + "loss": 1.9545, + "step": 24135 + }, + { + "epoch": 0.8, + "grad_norm": 0.7328383922576904, + "learning_rate": 1.9158318936427044e-06, + "loss": 2.0897, + "step": 24136 + }, + { + "epoch": 0.8, + "grad_norm": 0.746177077293396, + "learning_rate": 1.9152063094205042e-06, + "loss": 2.033, + "step": 24137 + }, + { + "epoch": 0.8, + "grad_norm": 0.7522748112678528, + "learning_rate": 1.914580816535565e-06, + "loss": 2.0787, + "step": 24138 + }, + { + "epoch": 0.8, + "grad_norm": 0.7220520377159119, + "learning_rate": 1.913955414994958e-06, + "loss": 2.006, + "step": 24139 + }, + { + "epoch": 0.8, + "grad_norm": 0.730384349822998, + "learning_rate": 1.9133301048057383e-06, + "loss": 2.0606, + "step": 24140 + }, + { + "epoch": 0.8, + "grad_norm": 0.7597929835319519, + "learning_rate": 1.9127048859749753e-06, + "loss": 2.0293, + "step": 24141 + }, + { + "epoch": 0.8, + "grad_norm": 0.7728990912437439, + "learning_rate": 1.9120797585097363e-06, + "loss": 2.0415, + "step": 24142 + }, + { + "epoch": 0.8, + "grad_norm": 0.755099892616272, + "learning_rate": 1.9114547224170774e-06, + "loss": 2.0398, + "step": 24143 + }, + { + "epoch": 0.8, + "grad_norm": 0.7230612635612488, + "learning_rate": 1.9108297777040664e-06, + "loss": 2.1003, + "step": 24144 + }, + { + "epoch": 0.8, + "grad_norm": 0.7630913853645325, + "learning_rate": 1.910204924377759e-06, + "loss": 2.1595, + "step": 24145 + }, + { + "epoch": 0.8, + "grad_norm": 0.7458087205886841, + "learning_rate": 1.9095801624452117e-06, + "loss": 2.0185, + "step": 24146 + }, + { + "epoch": 0.8, + "grad_norm": 0.7574073672294617, + "learning_rate": 1.9089554919134868e-06, + "loss": 2.0602, + "step": 24147 + }, + { + "epoch": 0.8, + "grad_norm": 0.7308272123336792, + "learning_rate": 1.9083309127896443e-06, + "loss": 2.0527, + "step": 24148 + }, + { + "epoch": 0.8, + "grad_norm": 0.7744826078414917, + "learning_rate": 1.9077064250807365e-06, + "loss": 2.1446, + "step": 24149 + }, + { + "epoch": 0.8, + "grad_norm": 0.7543187737464905, + "learning_rate": 1.9070820287938164e-06, + "loss": 2.0392, + "step": 24150 + }, + { + "epoch": 0.8, + "grad_norm": 0.733694851398468, + "learning_rate": 1.9064577239359428e-06, + "loss": 2.072, + "step": 24151 + }, + { + "epoch": 0.8, + "grad_norm": 0.7475532293319702, + "learning_rate": 1.9058335105141645e-06, + "loss": 2.057, + "step": 24152 + }, + { + "epoch": 0.8, + "grad_norm": 0.7242964506149292, + "learning_rate": 1.9052093885355382e-06, + "loss": 2.0223, + "step": 24153 + }, + { + "epoch": 0.8, + "grad_norm": 0.7716489434242249, + "learning_rate": 1.9045853580071093e-06, + "loss": 2.0581, + "step": 24154 + }, + { + "epoch": 0.8, + "grad_norm": 0.7742037177085876, + "learning_rate": 1.9039614189359334e-06, + "loss": 2.0087, + "step": 24155 + }, + { + "epoch": 0.8, + "grad_norm": 0.7815915942192078, + "learning_rate": 1.9033375713290535e-06, + "loss": 2.0686, + "step": 24156 + }, + { + "epoch": 0.8, + "grad_norm": 0.7441413402557373, + "learning_rate": 1.9027138151935242e-06, + "loss": 2.0397, + "step": 24157 + }, + { + "epoch": 0.8, + "grad_norm": 0.7537321448326111, + "learning_rate": 1.9020901505363887e-06, + "loss": 1.9799, + "step": 24158 + }, + { + "epoch": 0.8, + "grad_norm": 0.7504726648330688, + "learning_rate": 1.9014665773646889e-06, + "loss": 2.0355, + "step": 24159 + }, + { + "epoch": 0.8, + "grad_norm": 0.7748067378997803, + "learning_rate": 1.900843095685475e-06, + "loss": 2.0549, + "step": 24160 + }, + { + "epoch": 0.8, + "grad_norm": 0.7617127895355225, + "learning_rate": 1.9002197055057914e-06, + "loss": 2.0635, + "step": 24161 + }, + { + "epoch": 0.8, + "grad_norm": 0.756274938583374, + "learning_rate": 1.8995964068326777e-06, + "loss": 2.0603, + "step": 24162 + }, + { + "epoch": 0.8, + "grad_norm": 0.7469486594200134, + "learning_rate": 1.8989731996731752e-06, + "loss": 2.0262, + "step": 24163 + }, + { + "epoch": 0.8, + "grad_norm": 0.7266932725906372, + "learning_rate": 1.8983500840343282e-06, + "loss": 2.0538, + "step": 24164 + }, + { + "epoch": 0.8, + "grad_norm": 0.7373751997947693, + "learning_rate": 1.89772705992317e-06, + "loss": 2.1132, + "step": 24165 + }, + { + "epoch": 0.8, + "grad_norm": 0.748246967792511, + "learning_rate": 1.8971041273467472e-06, + "loss": 2.0472, + "step": 24166 + }, + { + "epoch": 0.8, + "grad_norm": 0.7331543564796448, + "learning_rate": 1.896481286312093e-06, + "loss": 2.0709, + "step": 24167 + }, + { + "epoch": 0.8, + "grad_norm": 0.7441941499710083, + "learning_rate": 1.8958585368262405e-06, + "loss": 2.0342, + "step": 24168 + }, + { + "epoch": 0.8, + "grad_norm": 0.7428725957870483, + "learning_rate": 1.8952358788962299e-06, + "loss": 2.1066, + "step": 24169 + }, + { + "epoch": 0.8, + "grad_norm": 0.7549260854721069, + "learning_rate": 1.8946133125290966e-06, + "loss": 2.0823, + "step": 24170 + }, + { + "epoch": 0.8, + "grad_norm": 0.7421898245811462, + "learning_rate": 1.8939908377318717e-06, + "loss": 2.0553, + "step": 24171 + }, + { + "epoch": 0.8, + "grad_norm": 0.7397654056549072, + "learning_rate": 1.893368454511585e-06, + "loss": 1.9952, + "step": 24172 + }, + { + "epoch": 0.8, + "grad_norm": 0.7762250900268555, + "learning_rate": 1.8927461628752741e-06, + "loss": 2.167, + "step": 24173 + }, + { + "epoch": 0.8, + "grad_norm": 0.7685840129852295, + "learning_rate": 1.8921239628299626e-06, + "loss": 2.0001, + "step": 24174 + }, + { + "epoch": 0.8, + "grad_norm": 0.7320700883865356, + "learning_rate": 1.8915018543826846e-06, + "loss": 2.0365, + "step": 24175 + }, + { + "epoch": 0.8, + "grad_norm": 0.7516846656799316, + "learning_rate": 1.8908798375404646e-06, + "loss": 2.094, + "step": 24176 + }, + { + "epoch": 0.8, + "grad_norm": 0.7538803815841675, + "learning_rate": 1.8902579123103348e-06, + "loss": 2.0586, + "step": 24177 + }, + { + "epoch": 0.8, + "grad_norm": 0.7362875938415527, + "learning_rate": 1.8896360786993162e-06, + "loss": 2.0555, + "step": 24178 + }, + { + "epoch": 0.8, + "grad_norm": 0.723981499671936, + "learning_rate": 1.8890143367144375e-06, + "loss": 2.0026, + "step": 24179 + }, + { + "epoch": 0.8, + "grad_norm": 0.7648736834526062, + "learning_rate": 1.8883926863627223e-06, + "loss": 2.0623, + "step": 24180 + }, + { + "epoch": 0.8, + "grad_norm": 0.7295845746994019, + "learning_rate": 1.887771127651189e-06, + "loss": 2.0973, + "step": 24181 + }, + { + "epoch": 0.8, + "grad_norm": 0.7280145287513733, + "learning_rate": 1.8871496605868634e-06, + "loss": 2.1062, + "step": 24182 + }, + { + "epoch": 0.8, + "grad_norm": 0.7307288646697998, + "learning_rate": 1.8865282851767697e-06, + "loss": 2.0041, + "step": 24183 + }, + { + "epoch": 0.8, + "grad_norm": 0.7433872222900391, + "learning_rate": 1.8859070014279245e-06, + "loss": 2.0079, + "step": 24184 + }, + { + "epoch": 0.8, + "grad_norm": 0.7533906102180481, + "learning_rate": 1.8852858093473437e-06, + "loss": 2.0088, + "step": 24185 + }, + { + "epoch": 0.8, + "grad_norm": 0.7069349884986877, + "learning_rate": 1.88466470894205e-06, + "loss": 2.003, + "step": 24186 + }, + { + "epoch": 0.8, + "grad_norm": 0.7571207284927368, + "learning_rate": 1.8840437002190571e-06, + "loss": 2.0912, + "step": 24187 + }, + { + "epoch": 0.8, + "grad_norm": 0.751305103302002, + "learning_rate": 1.8834227831853835e-06, + "loss": 2.0636, + "step": 24188 + }, + { + "epoch": 0.8, + "grad_norm": 0.7948052883148193, + "learning_rate": 1.8828019578480428e-06, + "loss": 2.0791, + "step": 24189 + }, + { + "epoch": 0.8, + "grad_norm": 0.7479168772697449, + "learning_rate": 1.882181224214047e-06, + "loss": 2.0507, + "step": 24190 + }, + { + "epoch": 0.8, + "grad_norm": 0.7845588326454163, + "learning_rate": 1.8815605822904093e-06, + "loss": 2.1017, + "step": 24191 + }, + { + "epoch": 0.8, + "grad_norm": 0.774436354637146, + "learning_rate": 1.8809400320841443e-06, + "loss": 1.9819, + "step": 24192 + }, + { + "epoch": 0.8, + "grad_norm": 0.7503799796104431, + "learning_rate": 1.8803195736022618e-06, + "loss": 2.0706, + "step": 24193 + }, + { + "epoch": 0.8, + "grad_norm": 0.7276161313056946, + "learning_rate": 1.879699206851766e-06, + "loss": 1.9625, + "step": 24194 + }, + { + "epoch": 0.8, + "grad_norm": 0.7501667737960815, + "learning_rate": 1.879078931839673e-06, + "loss": 2.0434, + "step": 24195 + }, + { + "epoch": 0.81, + "grad_norm": 0.7379202246665955, + "learning_rate": 1.8784587485729843e-06, + "loss": 2.0551, + "step": 24196 + }, + { + "epoch": 0.81, + "grad_norm": 0.7356285452842712, + "learning_rate": 1.8778386570587125e-06, + "loss": 2.0099, + "step": 24197 + }, + { + "epoch": 0.81, + "grad_norm": 0.7162164449691772, + "learning_rate": 1.8772186573038553e-06, + "loss": 2.0268, + "step": 24198 + }, + { + "epoch": 0.81, + "grad_norm": 0.7319517135620117, + "learning_rate": 1.8765987493154247e-06, + "loss": 2.0886, + "step": 24199 + }, + { + "epoch": 0.81, + "grad_norm": 0.7349777817726135, + "learning_rate": 1.8759789331004185e-06, + "loss": 2.0355, + "step": 24200 + }, + { + "epoch": 0.81, + "grad_norm": 0.7635425329208374, + "learning_rate": 1.8753592086658434e-06, + "loss": 2.0042, + "step": 24201 + }, + { + "epoch": 0.81, + "grad_norm": 0.7250452637672424, + "learning_rate": 1.874739576018698e-06, + "loss": 2.0276, + "step": 24202 + }, + { + "epoch": 0.81, + "grad_norm": 0.7262831330299377, + "learning_rate": 1.8741200351659805e-06, + "loss": 2.0616, + "step": 24203 + }, + { + "epoch": 0.81, + "grad_norm": 0.7490938305854797, + "learning_rate": 1.8735005861146927e-06, + "loss": 2.0727, + "step": 24204 + }, + { + "epoch": 0.81, + "grad_norm": 0.765129804611206, + "learning_rate": 1.8728812288718357e-06, + "loss": 2.0386, + "step": 24205 + }, + { + "epoch": 0.81, + "grad_norm": 0.7243542671203613, + "learning_rate": 1.872261963444404e-06, + "loss": 2.0339, + "step": 24206 + }, + { + "epoch": 0.81, + "grad_norm": 0.7413199543952942, + "learning_rate": 1.8716427898393896e-06, + "loss": 1.9846, + "step": 24207 + }, + { + "epoch": 0.81, + "grad_norm": 0.7519736886024475, + "learning_rate": 1.8710237080637938e-06, + "loss": 1.9928, + "step": 24208 + }, + { + "epoch": 0.81, + "grad_norm": 0.7478240728378296, + "learning_rate": 1.8704047181246065e-06, + "loss": 2.0685, + "step": 24209 + }, + { + "epoch": 0.81, + "grad_norm": 0.7507136464118958, + "learning_rate": 1.8697858200288244e-06, + "loss": 2.051, + "step": 24210 + }, + { + "epoch": 0.81, + "grad_norm": 0.7429404258728027, + "learning_rate": 1.869167013783435e-06, + "loss": 2.0438, + "step": 24211 + }, + { + "epoch": 0.81, + "grad_norm": 0.7333064079284668, + "learning_rate": 1.8685482993954341e-06, + "loss": 2.004, + "step": 24212 + }, + { + "epoch": 0.81, + "grad_norm": 0.7925693988800049, + "learning_rate": 1.867929676871806e-06, + "loss": 2.0626, + "step": 24213 + }, + { + "epoch": 0.81, + "grad_norm": 0.731850266456604, + "learning_rate": 1.8673111462195449e-06, + "loss": 1.987, + "step": 24214 + }, + { + "epoch": 0.81, + "grad_norm": 0.7237173318862915, + "learning_rate": 1.8666927074456365e-06, + "loss": 1.9953, + "step": 24215 + }, + { + "epoch": 0.81, + "grad_norm": 0.7579448819160461, + "learning_rate": 1.8660743605570652e-06, + "loss": 2.0952, + "step": 24216 + }, + { + "epoch": 0.81, + "grad_norm": 0.7799676656723022, + "learning_rate": 1.865456105560819e-06, + "loss": 2.0487, + "step": 24217 + }, + { + "epoch": 0.81, + "grad_norm": 0.7362228631973267, + "learning_rate": 1.864837942463884e-06, + "loss": 1.9845, + "step": 24218 + }, + { + "epoch": 0.81, + "grad_norm": 0.7605487704277039, + "learning_rate": 1.864219871273243e-06, + "loss": 2.0962, + "step": 24219 + }, + { + "epoch": 0.81, + "grad_norm": 0.7434645295143127, + "learning_rate": 1.8636018919958753e-06, + "loss": 2.02, + "step": 24220 + }, + { + "epoch": 0.81, + "grad_norm": 0.8152747750282288, + "learning_rate": 1.862984004638767e-06, + "loss": 2.0741, + "step": 24221 + }, + { + "epoch": 0.81, + "grad_norm": 0.7325339317321777, + "learning_rate": 1.8623662092088945e-06, + "loss": 2.0053, + "step": 24222 + }, + { + "epoch": 0.81, + "grad_norm": 0.7410983443260193, + "learning_rate": 1.861748505713239e-06, + "loss": 2.0838, + "step": 24223 + }, + { + "epoch": 0.81, + "grad_norm": 0.7392004728317261, + "learning_rate": 1.8611308941587858e-06, + "loss": 1.964, + "step": 24224 + }, + { + "epoch": 0.81, + "grad_norm": 0.7942520976066589, + "learning_rate": 1.8605133745524995e-06, + "loss": 2.005, + "step": 24225 + }, + { + "epoch": 0.81, + "grad_norm": 0.7314419150352478, + "learning_rate": 1.8598959469013634e-06, + "loss": 1.9997, + "step": 24226 + }, + { + "epoch": 0.81, + "grad_norm": 0.7434040904045105, + "learning_rate": 1.859278611212354e-06, + "loss": 2.0482, + "step": 24227 + }, + { + "epoch": 0.81, + "grad_norm": 0.7133690714836121, + "learning_rate": 1.8586613674924447e-06, + "loss": 1.9944, + "step": 24228 + }, + { + "epoch": 0.81, + "grad_norm": 0.748551607131958, + "learning_rate": 1.8580442157486056e-06, + "loss": 2.014, + "step": 24229 + }, + { + "epoch": 0.81, + "grad_norm": 0.7272062301635742, + "learning_rate": 1.8574271559878144e-06, + "loss": 2.0472, + "step": 24230 + }, + { + "epoch": 0.81, + "grad_norm": 0.7297928333282471, + "learning_rate": 1.8568101882170353e-06, + "loss": 2.0206, + "step": 24231 + }, + { + "epoch": 0.81, + "grad_norm": 0.7379338145256042, + "learning_rate": 1.8561933124432451e-06, + "loss": 2.0572, + "step": 24232 + }, + { + "epoch": 0.81, + "grad_norm": 0.7397128939628601, + "learning_rate": 1.8555765286734084e-06, + "loss": 2.012, + "step": 24233 + }, + { + "epoch": 0.81, + "grad_norm": 0.7521272897720337, + "learning_rate": 1.8549598369144972e-06, + "loss": 2.002, + "step": 24234 + }, + { + "epoch": 0.81, + "grad_norm": 0.7513400912284851, + "learning_rate": 1.8543432371734738e-06, + "loss": 2.0356, + "step": 24235 + }, + { + "epoch": 0.81, + "grad_norm": 0.7428647875785828, + "learning_rate": 1.853726729457309e-06, + "loss": 2.1105, + "step": 24236 + }, + { + "epoch": 0.81, + "grad_norm": 0.7444289922714233, + "learning_rate": 1.8531103137729656e-06, + "loss": 2.0222, + "step": 24237 + }, + { + "epoch": 0.81, + "grad_norm": 0.7444307804107666, + "learning_rate": 1.8524939901274042e-06, + "loss": 1.9939, + "step": 24238 + }, + { + "epoch": 0.81, + "grad_norm": 0.7816635370254517, + "learning_rate": 1.8518777585275916e-06, + "loss": 2.0135, + "step": 24239 + }, + { + "epoch": 0.81, + "grad_norm": 0.7352049946784973, + "learning_rate": 1.851261618980491e-06, + "loss": 2.0776, + "step": 24240 + }, + { + "epoch": 0.81, + "grad_norm": 0.7782276272773743, + "learning_rate": 1.8506455714930604e-06, + "loss": 2.1488, + "step": 24241 + }, + { + "epoch": 0.81, + "grad_norm": 0.7558673620223999, + "learning_rate": 1.8500296160722586e-06, + "loss": 2.0549, + "step": 24242 + }, + { + "epoch": 0.81, + "grad_norm": 0.7305112481117249, + "learning_rate": 1.8494137527250476e-06, + "loss": 2.0475, + "step": 24243 + }, + { + "epoch": 0.81, + "grad_norm": 0.7354997992515564, + "learning_rate": 1.8487979814583812e-06, + "loss": 2.0819, + "step": 24244 + }, + { + "epoch": 0.81, + "grad_norm": 0.7304483652114868, + "learning_rate": 1.8481823022792177e-06, + "loss": 2.0333, + "step": 24245 + }, + { + "epoch": 0.81, + "grad_norm": 0.7496348023414612, + "learning_rate": 1.8475667151945187e-06, + "loss": 1.999, + "step": 24246 + }, + { + "epoch": 0.81, + "grad_norm": 0.7594172358512878, + "learning_rate": 1.8469512202112283e-06, + "loss": 1.9934, + "step": 24247 + }, + { + "epoch": 0.81, + "grad_norm": 0.7247515320777893, + "learning_rate": 1.8463358173363045e-06, + "loss": 2.138, + "step": 24248 + }, + { + "epoch": 0.81, + "grad_norm": 0.7295111417770386, + "learning_rate": 1.8457205065767026e-06, + "loss": 2.0598, + "step": 24249 + }, + { + "epoch": 0.81, + "grad_norm": 0.7577369809150696, + "learning_rate": 1.8451052879393715e-06, + "loss": 2.0408, + "step": 24250 + }, + { + "epoch": 0.81, + "grad_norm": 0.735059916973114, + "learning_rate": 1.8444901614312593e-06, + "loss": 2.086, + "step": 24251 + }, + { + "epoch": 0.81, + "grad_norm": 0.7383008003234863, + "learning_rate": 1.8438751270593202e-06, + "loss": 2.0402, + "step": 24252 + }, + { + "epoch": 0.81, + "grad_norm": 0.7569987177848816, + "learning_rate": 1.8432601848304976e-06, + "loss": 2.0279, + "step": 24253 + }, + { + "epoch": 0.81, + "grad_norm": 0.7318865656852722, + "learning_rate": 1.8426453347517403e-06, + "loss": 2.0462, + "step": 24254 + }, + { + "epoch": 0.81, + "grad_norm": 0.7530444860458374, + "learning_rate": 1.8420305768299983e-06, + "loss": 1.9845, + "step": 24255 + }, + { + "epoch": 0.81, + "grad_norm": 0.7730318903923035, + "learning_rate": 1.841415911072214e-06, + "loss": 2.1055, + "step": 24256 + }, + { + "epoch": 0.81, + "grad_norm": 0.7818357348442078, + "learning_rate": 1.8408013374853284e-06, + "loss": 2.0696, + "step": 24257 + }, + { + "epoch": 0.81, + "grad_norm": 0.7372931838035583, + "learning_rate": 1.8401868560762903e-06, + "loss": 2.0524, + "step": 24258 + }, + { + "epoch": 0.81, + "grad_norm": 0.79176926612854, + "learning_rate": 1.8395724668520398e-06, + "loss": 2.1298, + "step": 24259 + }, + { + "epoch": 0.81, + "grad_norm": 0.7549452185630798, + "learning_rate": 1.8389581698195136e-06, + "loss": 2.0029, + "step": 24260 + }, + { + "epoch": 0.81, + "grad_norm": 0.7337024211883545, + "learning_rate": 1.8383439649856548e-06, + "loss": 2.0061, + "step": 24261 + }, + { + "epoch": 0.81, + "grad_norm": 0.8021436333656311, + "learning_rate": 1.837729852357406e-06, + "loss": 2.0347, + "step": 24262 + }, + { + "epoch": 0.81, + "grad_norm": 0.7428921461105347, + "learning_rate": 1.8371158319417015e-06, + "loss": 2.0107, + "step": 24263 + }, + { + "epoch": 0.81, + "grad_norm": 0.7261497974395752, + "learning_rate": 1.8365019037454757e-06, + "loss": 2.0697, + "step": 24264 + }, + { + "epoch": 0.81, + "grad_norm": 0.77569180727005, + "learning_rate": 1.8358880677756707e-06, + "loss": 2.1293, + "step": 24265 + }, + { + "epoch": 0.81, + "grad_norm": 0.7533355951309204, + "learning_rate": 1.8352743240392135e-06, + "loss": 2.1046, + "step": 24266 + }, + { + "epoch": 0.81, + "grad_norm": 0.7461055517196655, + "learning_rate": 1.8346606725430426e-06, + "loss": 2.1007, + "step": 24267 + }, + { + "epoch": 0.81, + "grad_norm": 0.7565174698829651, + "learning_rate": 1.8340471132940962e-06, + "loss": 2.0734, + "step": 24268 + }, + { + "epoch": 0.81, + "grad_norm": 0.7349610924720764, + "learning_rate": 1.833433646299293e-06, + "loss": 2.0042, + "step": 24269 + }, + { + "epoch": 0.81, + "grad_norm": 0.7646200060844421, + "learning_rate": 1.832820271565572e-06, + "loss": 2.1137, + "step": 24270 + }, + { + "epoch": 0.81, + "grad_norm": 0.7635456919670105, + "learning_rate": 1.832206989099863e-06, + "loss": 2.0842, + "step": 24271 + }, + { + "epoch": 0.81, + "grad_norm": 0.7432968020439148, + "learning_rate": 1.8315937989090926e-06, + "loss": 2.0698, + "step": 24272 + }, + { + "epoch": 0.81, + "grad_norm": 0.7361358404159546, + "learning_rate": 1.8309807010001856e-06, + "loss": 2.0567, + "step": 24273 + }, + { + "epoch": 0.81, + "grad_norm": 0.7535871863365173, + "learning_rate": 1.8303676953800731e-06, + "loss": 1.9669, + "step": 24274 + }, + { + "epoch": 0.81, + "grad_norm": 0.7485437393188477, + "learning_rate": 1.829754782055677e-06, + "loss": 2.0713, + "step": 24275 + }, + { + "epoch": 0.81, + "grad_norm": 0.7223719954490662, + "learning_rate": 1.8291419610339222e-06, + "loss": 2.0482, + "step": 24276 + }, + { + "epoch": 0.81, + "grad_norm": 0.7448598146438599, + "learning_rate": 1.8285292323217362e-06, + "loss": 2.0152, + "step": 24277 + }, + { + "epoch": 0.81, + "grad_norm": 0.7425165176391602, + "learning_rate": 1.827916595926038e-06, + "loss": 2.0454, + "step": 24278 + }, + { + "epoch": 0.81, + "grad_norm": 0.7546934485435486, + "learning_rate": 1.8273040518537466e-06, + "loss": 2.0462, + "step": 24279 + }, + { + "epoch": 0.81, + "grad_norm": 0.7458992004394531, + "learning_rate": 1.8266916001117862e-06, + "loss": 2.051, + "step": 24280 + }, + { + "epoch": 0.81, + "grad_norm": 0.7696303725242615, + "learning_rate": 1.8260792407070737e-06, + "loss": 2.0479, + "step": 24281 + }, + { + "epoch": 0.81, + "grad_norm": 0.7640809416770935, + "learning_rate": 1.8254669736465257e-06, + "loss": 2.0034, + "step": 24282 + }, + { + "epoch": 0.81, + "grad_norm": 0.7493027448654175, + "learning_rate": 1.8248547989370614e-06, + "loss": 2.0333, + "step": 24283 + }, + { + "epoch": 0.81, + "grad_norm": 0.729155421257019, + "learning_rate": 1.8242427165855981e-06, + "loss": 2.0756, + "step": 24284 + }, + { + "epoch": 0.81, + "grad_norm": 0.7434857487678528, + "learning_rate": 1.8236307265990493e-06, + "loss": 2.0386, + "step": 24285 + }, + { + "epoch": 0.81, + "grad_norm": 0.7426034212112427, + "learning_rate": 1.8230188289843265e-06, + "loss": 2.0366, + "step": 24286 + }, + { + "epoch": 0.81, + "grad_norm": 0.7583605051040649, + "learning_rate": 1.8224070237483471e-06, + "loss": 2.0197, + "step": 24287 + }, + { + "epoch": 0.81, + "grad_norm": 0.7473025918006897, + "learning_rate": 1.821795310898019e-06, + "loss": 2.0556, + "step": 24288 + }, + { + "epoch": 0.81, + "grad_norm": 0.7454568147659302, + "learning_rate": 1.8211836904402536e-06, + "loss": 2.157, + "step": 24289 + }, + { + "epoch": 0.81, + "grad_norm": 0.7470406293869019, + "learning_rate": 1.8205721623819672e-06, + "loss": 2.0367, + "step": 24290 + }, + { + "epoch": 0.81, + "grad_norm": 0.7272387146949768, + "learning_rate": 1.8199607267300568e-06, + "loss": 2.0466, + "step": 24291 + }, + { + "epoch": 0.81, + "grad_norm": 0.7336909174919128, + "learning_rate": 1.8193493834914366e-06, + "loss": 2.0512, + "step": 24292 + }, + { + "epoch": 0.81, + "grad_norm": 0.7517204880714417, + "learning_rate": 1.8187381326730158e-06, + "loss": 2.0297, + "step": 24293 + }, + { + "epoch": 0.81, + "grad_norm": 0.7709594964981079, + "learning_rate": 1.8181269742816965e-06, + "loss": 2.075, + "step": 24294 + }, + { + "epoch": 0.81, + "grad_norm": 0.7747277021408081, + "learning_rate": 1.8175159083243809e-06, + "loss": 2.0304, + "step": 24295 + }, + { + "epoch": 0.81, + "grad_norm": 0.7449919581413269, + "learning_rate": 1.8169049348079782e-06, + "loss": 2.0595, + "step": 24296 + }, + { + "epoch": 0.81, + "grad_norm": 0.7367678880691528, + "learning_rate": 1.8162940537393859e-06, + "loss": 1.9926, + "step": 24297 + }, + { + "epoch": 0.81, + "grad_norm": 0.7377228736877441, + "learning_rate": 1.8156832651255064e-06, + "loss": 2.0452, + "step": 24298 + }, + { + "epoch": 0.81, + "grad_norm": 0.721007227897644, + "learning_rate": 1.815072568973243e-06, + "loss": 2.0229, + "step": 24299 + }, + { + "epoch": 0.81, + "grad_norm": 0.7533445358276367, + "learning_rate": 1.8144619652894936e-06, + "loss": 2.0375, + "step": 24300 + }, + { + "epoch": 0.81, + "grad_norm": 0.7165380716323853, + "learning_rate": 1.8138514540811525e-06, + "loss": 2.0677, + "step": 24301 + }, + { + "epoch": 0.81, + "grad_norm": 0.7336419820785522, + "learning_rate": 1.81324103535512e-06, + "loss": 1.9819, + "step": 24302 + }, + { + "epoch": 0.81, + "grad_norm": 0.7546417713165283, + "learning_rate": 1.8126307091182982e-06, + "loss": 2.054, + "step": 24303 + }, + { + "epoch": 0.81, + "grad_norm": 0.7387623190879822, + "learning_rate": 1.8120204753775717e-06, + "loss": 2.0184, + "step": 24304 + }, + { + "epoch": 0.81, + "grad_norm": 0.7303354740142822, + "learning_rate": 1.811410334139838e-06, + "loss": 2.0145, + "step": 24305 + }, + { + "epoch": 0.81, + "grad_norm": 0.7097549438476562, + "learning_rate": 1.8108002854119945e-06, + "loss": 2.0453, + "step": 24306 + }, + { + "epoch": 0.81, + "grad_norm": 0.7465159893035889, + "learning_rate": 1.810190329200927e-06, + "loss": 2.0736, + "step": 24307 + }, + { + "epoch": 0.81, + "grad_norm": 0.7227551937103271, + "learning_rate": 1.809580465513533e-06, + "loss": 2.09, + "step": 24308 + }, + { + "epoch": 0.81, + "grad_norm": 0.7380690574645996, + "learning_rate": 1.8089706943566987e-06, + "loss": 2.0706, + "step": 24309 + }, + { + "epoch": 0.81, + "grad_norm": 0.7471098303794861, + "learning_rate": 1.8083610157373098e-06, + "loss": 2.0924, + "step": 24310 + }, + { + "epoch": 0.81, + "grad_norm": 0.7550942897796631, + "learning_rate": 1.8077514296622578e-06, + "loss": 2.0029, + "step": 24311 + }, + { + "epoch": 0.81, + "grad_norm": 0.7423034310340881, + "learning_rate": 1.8071419361384335e-06, + "loss": 2.0569, + "step": 24312 + }, + { + "epoch": 0.81, + "grad_norm": 0.7246367931365967, + "learning_rate": 1.8065325351727136e-06, + "loss": 2.0088, + "step": 24313 + }, + { + "epoch": 0.81, + "grad_norm": 0.7443142533302307, + "learning_rate": 1.8059232267719872e-06, + "loss": 2.0058, + "step": 24314 + }, + { + "epoch": 0.81, + "grad_norm": 0.7584161758422852, + "learning_rate": 1.805314010943141e-06, + "loss": 2.0581, + "step": 24315 + }, + { + "epoch": 0.81, + "grad_norm": 0.7913779020309448, + "learning_rate": 1.804704887693054e-06, + "loss": 2.1017, + "step": 24316 + }, + { + "epoch": 0.81, + "grad_norm": 0.7487267255783081, + "learning_rate": 1.804095857028606e-06, + "loss": 2.0235, + "step": 24317 + }, + { + "epoch": 0.81, + "grad_norm": 0.7411563992500305, + "learning_rate": 1.8034869189566794e-06, + "loss": 2.083, + "step": 24318 + }, + { + "epoch": 0.81, + "grad_norm": 0.7459330558776855, + "learning_rate": 1.8028780734841567e-06, + "loss": 1.9975, + "step": 24319 + }, + { + "epoch": 0.81, + "grad_norm": 0.7521233558654785, + "learning_rate": 1.802269320617911e-06, + "loss": 2.0539, + "step": 24320 + }, + { + "epoch": 0.81, + "grad_norm": 0.7269390821456909, + "learning_rate": 1.8016606603648246e-06, + "loss": 2.0699, + "step": 24321 + }, + { + "epoch": 0.81, + "grad_norm": 0.7520357966423035, + "learning_rate": 1.8010520927317709e-06, + "loss": 2.0059, + "step": 24322 + }, + { + "epoch": 0.81, + "grad_norm": 0.7402631044387817, + "learning_rate": 1.8004436177256236e-06, + "loss": 2.0631, + "step": 24323 + }, + { + "epoch": 0.81, + "grad_norm": 0.7807687520980835, + "learning_rate": 1.7998352353532588e-06, + "loss": 2.0677, + "step": 24324 + }, + { + "epoch": 0.81, + "grad_norm": 0.748363196849823, + "learning_rate": 1.799226945621555e-06, + "loss": 2.0269, + "step": 24325 + }, + { + "epoch": 0.81, + "grad_norm": 0.76650470495224, + "learning_rate": 1.798618748537374e-06, + "loss": 2.1512, + "step": 24326 + }, + { + "epoch": 0.81, + "grad_norm": 0.7281331419944763, + "learning_rate": 1.7980106441075917e-06, + "loss": 1.9759, + "step": 24327 + }, + { + "epoch": 0.81, + "grad_norm": 0.7175100445747375, + "learning_rate": 1.7974026323390814e-06, + "loss": 2.0111, + "step": 24328 + }, + { + "epoch": 0.81, + "grad_norm": 0.7649305462837219, + "learning_rate": 1.7967947132387054e-06, + "loss": 2.1579, + "step": 24329 + }, + { + "epoch": 0.81, + "grad_norm": 0.7481555342674255, + "learning_rate": 1.7961868868133392e-06, + "loss": 2.0544, + "step": 24330 + }, + { + "epoch": 0.81, + "grad_norm": 0.7388166189193726, + "learning_rate": 1.795579153069844e-06, + "loss": 2.0633, + "step": 24331 + }, + { + "epoch": 0.81, + "grad_norm": 0.7547290921211243, + "learning_rate": 1.7949715120150856e-06, + "loss": 2.1221, + "step": 24332 + }, + { + "epoch": 0.81, + "grad_norm": 0.768934428691864, + "learning_rate": 1.7943639636559306e-06, + "loss": 2.0204, + "step": 24333 + }, + { + "epoch": 0.81, + "grad_norm": 0.7694311141967773, + "learning_rate": 1.7937565079992447e-06, + "loss": 2.0482, + "step": 24334 + }, + { + "epoch": 0.81, + "grad_norm": 0.7396800518035889, + "learning_rate": 1.7931491450518879e-06, + "loss": 2.0407, + "step": 24335 + }, + { + "epoch": 0.81, + "grad_norm": 0.724143922328949, + "learning_rate": 1.7925418748207212e-06, + "loss": 2.0506, + "step": 24336 + }, + { + "epoch": 0.81, + "grad_norm": 0.7323275208473206, + "learning_rate": 1.7919346973126074e-06, + "loss": 2.0657, + "step": 24337 + }, + { + "epoch": 0.81, + "grad_norm": 0.7558744549751282, + "learning_rate": 1.7913276125344038e-06, + "loss": 2.0874, + "step": 24338 + }, + { + "epoch": 0.81, + "grad_norm": 0.713801920413971, + "learning_rate": 1.7907206204929716e-06, + "loss": 2.0723, + "step": 24339 + }, + { + "epoch": 0.81, + "grad_norm": 0.7538487911224365, + "learning_rate": 1.7901137211951648e-06, + "loss": 2.0333, + "step": 24340 + }, + { + "epoch": 0.81, + "grad_norm": 0.7670883536338806, + "learning_rate": 1.789506914647844e-06, + "loss": 2.0802, + "step": 24341 + }, + { + "epoch": 0.81, + "grad_norm": 0.7542937994003296, + "learning_rate": 1.7889002008578593e-06, + "loss": 2.0642, + "step": 24342 + }, + { + "epoch": 0.81, + "grad_norm": 0.7365074157714844, + "learning_rate": 1.7882935798320712e-06, + "loss": 1.9792, + "step": 24343 + }, + { + "epoch": 0.81, + "grad_norm": 0.7487741708755493, + "learning_rate": 1.7876870515773292e-06, + "loss": 2.075, + "step": 24344 + }, + { + "epoch": 0.81, + "grad_norm": 0.7365841269493103, + "learning_rate": 1.787080616100484e-06, + "loss": 2.0661, + "step": 24345 + }, + { + "epoch": 0.81, + "grad_norm": 0.7152438163757324, + "learning_rate": 1.7864742734083884e-06, + "loss": 1.9892, + "step": 24346 + }, + { + "epoch": 0.81, + "grad_norm": 0.7796722054481506, + "learning_rate": 1.7858680235078984e-06, + "loss": 2.0126, + "step": 24347 + }, + { + "epoch": 0.81, + "grad_norm": 0.7420482039451599, + "learning_rate": 1.7852618664058518e-06, + "loss": 2.0385, + "step": 24348 + }, + { + "epoch": 0.81, + "grad_norm": 0.7448991537094116, + "learning_rate": 1.7846558021091032e-06, + "loss": 2.0307, + "step": 24349 + }, + { + "epoch": 0.81, + "grad_norm": 0.7769578099250793, + "learning_rate": 1.7840498306245001e-06, + "loss": 2.0363, + "step": 24350 + }, + { + "epoch": 0.81, + "grad_norm": 0.7710939645767212, + "learning_rate": 1.7834439519588854e-06, + "loss": 2.0827, + "step": 24351 + }, + { + "epoch": 0.81, + "grad_norm": 0.7319108843803406, + "learning_rate": 1.7828381661191075e-06, + "loss": 2.0423, + "step": 24352 + }, + { + "epoch": 0.81, + "grad_norm": 0.7591949701309204, + "learning_rate": 1.7822324731120078e-06, + "loss": 2.09, + "step": 24353 + }, + { + "epoch": 0.81, + "grad_norm": 0.777458667755127, + "learning_rate": 1.7816268729444287e-06, + "loss": 2.1064, + "step": 24354 + }, + { + "epoch": 0.81, + "grad_norm": 0.7271568775177002, + "learning_rate": 1.7810213656232111e-06, + "loss": 2.0306, + "step": 24355 + }, + { + "epoch": 0.81, + "grad_norm": 0.746362030506134, + "learning_rate": 1.7804159511552e-06, + "loss": 1.9994, + "step": 24356 + }, + { + "epoch": 0.81, + "grad_norm": 0.7325948476791382, + "learning_rate": 1.7798106295472328e-06, + "loss": 2.0616, + "step": 24357 + }, + { + "epoch": 0.81, + "grad_norm": 0.753436267375946, + "learning_rate": 1.7792054008061456e-06, + "loss": 2.0934, + "step": 24358 + }, + { + "epoch": 0.81, + "grad_norm": 0.7316683530807495, + "learning_rate": 1.77860026493878e-06, + "loss": 2.0174, + "step": 24359 + }, + { + "epoch": 0.81, + "grad_norm": 0.7557212114334106, + "learning_rate": 1.7779952219519669e-06, + "loss": 2.0863, + "step": 24360 + }, + { + "epoch": 0.81, + "grad_norm": 0.7890271544456482, + "learning_rate": 1.7773902718525493e-06, + "loss": 2.0682, + "step": 24361 + }, + { + "epoch": 0.81, + "grad_norm": 0.7312085628509521, + "learning_rate": 1.776785414647354e-06, + "loss": 1.9849, + "step": 24362 + }, + { + "epoch": 0.81, + "grad_norm": 0.7421086430549622, + "learning_rate": 1.776180650343221e-06, + "loss": 2.0384, + "step": 24363 + }, + { + "epoch": 0.81, + "grad_norm": 0.7418829202651978, + "learning_rate": 1.7755759789469762e-06, + "loss": 2.0585, + "step": 24364 + }, + { + "epoch": 0.81, + "grad_norm": 0.7719664573669434, + "learning_rate": 1.7749714004654562e-06, + "loss": 1.9915, + "step": 24365 + }, + { + "epoch": 0.81, + "grad_norm": 0.7319421768188477, + "learning_rate": 1.7743669149054898e-06, + "loss": 2.0369, + "step": 24366 + }, + { + "epoch": 0.81, + "grad_norm": 0.7165801525115967, + "learning_rate": 1.773762522273903e-06, + "loss": 2.0054, + "step": 24367 + }, + { + "epoch": 0.81, + "grad_norm": 0.7573260068893433, + "learning_rate": 1.7731582225775256e-06, + "loss": 2.0323, + "step": 24368 + }, + { + "epoch": 0.81, + "grad_norm": 0.7286785244941711, + "learning_rate": 1.772554015823188e-06, + "loss": 2.0224, + "step": 24369 + }, + { + "epoch": 0.81, + "grad_norm": 0.7448521256446838, + "learning_rate": 1.7719499020177122e-06, + "loss": 2.0357, + "step": 24370 + }, + { + "epoch": 0.81, + "grad_norm": 0.7336699366569519, + "learning_rate": 1.771345881167923e-06, + "loss": 2.079, + "step": 24371 + }, + { + "epoch": 0.81, + "grad_norm": 0.7484866380691528, + "learning_rate": 1.770741953280648e-06, + "loss": 2.0529, + "step": 24372 + }, + { + "epoch": 0.81, + "grad_norm": 0.7529163360595703, + "learning_rate": 1.7701381183627052e-06, + "loss": 2.0525, + "step": 24373 + }, + { + "epoch": 0.81, + "grad_norm": 0.738771378993988, + "learning_rate": 1.7695343764209205e-06, + "loss": 2.0645, + "step": 24374 + }, + { + "epoch": 0.81, + "grad_norm": 0.7359669804573059, + "learning_rate": 1.7689307274621137e-06, + "loss": 2.0542, + "step": 24375 + }, + { + "epoch": 0.81, + "grad_norm": 0.7188895344734192, + "learning_rate": 1.7683271714931005e-06, + "loss": 2.0076, + "step": 24376 + }, + { + "epoch": 0.81, + "grad_norm": 0.7403409481048584, + "learning_rate": 1.7677237085207034e-06, + "loss": 2.0245, + "step": 24377 + }, + { + "epoch": 0.81, + "grad_norm": 0.7194327712059021, + "learning_rate": 1.767120338551741e-06, + "loss": 2.0119, + "step": 24378 + }, + { + "epoch": 0.81, + "grad_norm": 0.7731668949127197, + "learning_rate": 1.7665170615930295e-06, + "loss": 2.045, + "step": 24379 + }, + { + "epoch": 0.81, + "grad_norm": 0.7777301073074341, + "learning_rate": 1.7659138776513784e-06, + "loss": 2.0728, + "step": 24380 + }, + { + "epoch": 0.81, + "grad_norm": 0.7535193562507629, + "learning_rate": 1.7653107867336106e-06, + "loss": 2.0004, + "step": 24381 + }, + { + "epoch": 0.81, + "grad_norm": 0.729555070400238, + "learning_rate": 1.7647077888465325e-06, + "loss": 2.045, + "step": 24382 + }, + { + "epoch": 0.81, + "grad_norm": 0.7446118593215942, + "learning_rate": 1.764104883996962e-06, + "loss": 2.0728, + "step": 24383 + }, + { + "epoch": 0.81, + "grad_norm": 0.7420564293861389, + "learning_rate": 1.7635020721917052e-06, + "loss": 2.0523, + "step": 24384 + }, + { + "epoch": 0.81, + "grad_norm": 0.7345011830329895, + "learning_rate": 1.7628993534375783e-06, + "loss": 2.0804, + "step": 24385 + }, + { + "epoch": 0.81, + "grad_norm": 0.7475037574768066, + "learning_rate": 1.7622967277413837e-06, + "loss": 2.0768, + "step": 24386 + }, + { + "epoch": 0.81, + "grad_norm": 0.7558488845825195, + "learning_rate": 1.7616941951099354e-06, + "loss": 2.0538, + "step": 24387 + }, + { + "epoch": 0.81, + "grad_norm": 0.7316545248031616, + "learning_rate": 1.7610917555500384e-06, + "loss": 2.0174, + "step": 24388 + }, + { + "epoch": 0.81, + "grad_norm": 0.7688521146774292, + "learning_rate": 1.7604894090684955e-06, + "loss": 2.0717, + "step": 24389 + }, + { + "epoch": 0.81, + "grad_norm": 0.7228105068206787, + "learning_rate": 1.7598871556721143e-06, + "loss": 1.9627, + "step": 24390 + }, + { + "epoch": 0.81, + "grad_norm": 0.7335205674171448, + "learning_rate": 1.7592849953677016e-06, + "loss": 2.0189, + "step": 24391 + }, + { + "epoch": 0.81, + "grad_norm": 0.7589786052703857, + "learning_rate": 1.7586829281620566e-06, + "loss": 2.0804, + "step": 24392 + }, + { + "epoch": 0.81, + "grad_norm": 0.7369424104690552, + "learning_rate": 1.7580809540619803e-06, + "loss": 2.0434, + "step": 24393 + }, + { + "epoch": 0.81, + "grad_norm": 0.7552473545074463, + "learning_rate": 1.7574790730742775e-06, + "loss": 2.099, + "step": 24394 + }, + { + "epoch": 0.81, + "grad_norm": 0.732036292552948, + "learning_rate": 1.7568772852057436e-06, + "loss": 2.0671, + "step": 24395 + }, + { + "epoch": 0.81, + "grad_norm": 0.736139178276062, + "learning_rate": 1.7562755904631811e-06, + "loss": 2.0631, + "step": 24396 + }, + { + "epoch": 0.81, + "grad_norm": 0.7451645135879517, + "learning_rate": 1.7556739888533858e-06, + "loss": 2.0529, + "step": 24397 + }, + { + "epoch": 0.81, + "grad_norm": 0.7382019758224487, + "learning_rate": 1.755072480383152e-06, + "loss": 1.9722, + "step": 24398 + }, + { + "epoch": 0.81, + "grad_norm": 0.7661020755767822, + "learning_rate": 1.7544710650592767e-06, + "loss": 1.9932, + "step": 24399 + }, + { + "epoch": 0.81, + "grad_norm": 0.7296252250671387, + "learning_rate": 1.7538697428885577e-06, + "loss": 2.0048, + "step": 24400 + }, + { + "epoch": 0.81, + "grad_norm": 0.7404804229736328, + "learning_rate": 1.753268513877786e-06, + "loss": 2.0838, + "step": 24401 + }, + { + "epoch": 0.81, + "grad_norm": 0.7494199275970459, + "learning_rate": 1.75266737803375e-06, + "loss": 2.0682, + "step": 24402 + }, + { + "epoch": 0.81, + "grad_norm": 0.7747084498405457, + "learning_rate": 1.7520663353632461e-06, + "loss": 2.0458, + "step": 24403 + }, + { + "epoch": 0.81, + "grad_norm": 0.7472316026687622, + "learning_rate": 1.7514653858730646e-06, + "loss": 2.0449, + "step": 24404 + }, + { + "epoch": 0.81, + "grad_norm": 0.7199827432632446, + "learning_rate": 1.7508645295699922e-06, + "loss": 2.0145, + "step": 24405 + }, + { + "epoch": 0.81, + "grad_norm": 0.7447462677955627, + "learning_rate": 1.750263766460817e-06, + "loss": 2.0239, + "step": 24406 + }, + { + "epoch": 0.81, + "grad_norm": 0.7584530115127563, + "learning_rate": 1.7496630965523287e-06, + "loss": 2.0929, + "step": 24407 + }, + { + "epoch": 0.81, + "grad_norm": 0.7105960845947266, + "learning_rate": 1.749062519851309e-06, + "loss": 2.0289, + "step": 24408 + }, + { + "epoch": 0.81, + "grad_norm": 0.7256761193275452, + "learning_rate": 1.7484620363645477e-06, + "loss": 1.9687, + "step": 24409 + }, + { + "epoch": 0.81, + "grad_norm": 0.7138117551803589, + "learning_rate": 1.7478616460988274e-06, + "loss": 2.0241, + "step": 24410 + }, + { + "epoch": 0.81, + "grad_norm": 0.7528107762336731, + "learning_rate": 1.7472613490609259e-06, + "loss": 2.0375, + "step": 24411 + }, + { + "epoch": 0.81, + "grad_norm": 0.747986912727356, + "learning_rate": 1.7466611452576299e-06, + "loss": 2.1121, + "step": 24412 + }, + { + "epoch": 0.81, + "grad_norm": 0.7571225762367249, + "learning_rate": 1.746061034695723e-06, + "loss": 2.0614, + "step": 24413 + }, + { + "epoch": 0.81, + "grad_norm": 0.7674474120140076, + "learning_rate": 1.7454610173819797e-06, + "loss": 2.0404, + "step": 24414 + }, + { + "epoch": 0.81, + "grad_norm": 0.7537508010864258, + "learning_rate": 1.744861093323178e-06, + "loss": 2.0357, + "step": 24415 + }, + { + "epoch": 0.81, + "grad_norm": 0.7553339004516602, + "learning_rate": 1.7442612625261003e-06, + "loss": 2.0646, + "step": 24416 + }, + { + "epoch": 0.81, + "grad_norm": 0.7386004328727722, + "learning_rate": 1.7436615249975186e-06, + "loss": 1.9859, + "step": 24417 + }, + { + "epoch": 0.81, + "grad_norm": 0.7623127698898315, + "learning_rate": 1.7430618807442123e-06, + "loss": 2.031, + "step": 24418 + }, + { + "epoch": 0.81, + "grad_norm": 0.7410336136817932, + "learning_rate": 1.7424623297729515e-06, + "loss": 1.985, + "step": 24419 + }, + { + "epoch": 0.81, + "grad_norm": 0.7407962679862976, + "learning_rate": 1.741862872090514e-06, + "loss": 1.948, + "step": 24420 + }, + { + "epoch": 0.81, + "grad_norm": 0.7624854445457458, + "learning_rate": 1.7412635077036678e-06, + "loss": 2.0483, + "step": 24421 + }, + { + "epoch": 0.81, + "grad_norm": 0.7255503535270691, + "learning_rate": 1.7406642366191883e-06, + "loss": 1.9762, + "step": 24422 + }, + { + "epoch": 0.81, + "grad_norm": 0.7390708327293396, + "learning_rate": 1.740065058843845e-06, + "loss": 2.1457, + "step": 24423 + }, + { + "epoch": 0.81, + "grad_norm": 0.7563819289207458, + "learning_rate": 1.7394659743844022e-06, + "loss": 1.9916, + "step": 24424 + }, + { + "epoch": 0.81, + "grad_norm": 0.7777734398841858, + "learning_rate": 1.7388669832476324e-06, + "loss": 2.0669, + "step": 24425 + }, + { + "epoch": 0.81, + "grad_norm": 0.7708575129508972, + "learning_rate": 1.7382680854403044e-06, + "loss": 2.0379, + "step": 24426 + }, + { + "epoch": 0.81, + "grad_norm": 0.7511506676673889, + "learning_rate": 1.737669280969182e-06, + "loss": 1.9981, + "step": 24427 + }, + { + "epoch": 0.81, + "grad_norm": 0.7509694695472717, + "learning_rate": 1.7370705698410261e-06, + "loss": 2.1004, + "step": 24428 + }, + { + "epoch": 0.81, + "grad_norm": 0.7440566420555115, + "learning_rate": 1.7364719520626084e-06, + "loss": 1.9691, + "step": 24429 + }, + { + "epoch": 0.81, + "grad_norm": 0.7336593270301819, + "learning_rate": 1.7358734276406841e-06, + "loss": 2.0368, + "step": 24430 + }, + { + "epoch": 0.81, + "grad_norm": 0.7451946139335632, + "learning_rate": 1.7352749965820214e-06, + "loss": 2.0238, + "step": 24431 + }, + { + "epoch": 0.81, + "grad_norm": 0.7545853853225708, + "learning_rate": 1.7346766588933783e-06, + "loss": 2.0358, + "step": 24432 + }, + { + "epoch": 0.81, + "grad_norm": 0.7318029403686523, + "learning_rate": 1.7340784145815115e-06, + "loss": 2.0155, + "step": 24433 + }, + { + "epoch": 0.81, + "grad_norm": 0.723529577255249, + "learning_rate": 1.7334802636531834e-06, + "loss": 2.1013, + "step": 24434 + }, + { + "epoch": 0.81, + "grad_norm": 0.747162401676178, + "learning_rate": 1.7328822061151518e-06, + "loss": 2.0689, + "step": 24435 + }, + { + "epoch": 0.81, + "grad_norm": 0.7324395179748535, + "learning_rate": 1.7322842419741726e-06, + "loss": 2.0203, + "step": 24436 + }, + { + "epoch": 0.81, + "grad_norm": 0.7566588521003723, + "learning_rate": 1.731686371236999e-06, + "loss": 2.1046, + "step": 24437 + }, + { + "epoch": 0.81, + "grad_norm": 0.7557225227355957, + "learning_rate": 1.7310885939103883e-06, + "loss": 2.0895, + "step": 24438 + }, + { + "epoch": 0.81, + "grad_norm": 0.7387061715126038, + "learning_rate": 1.7304909100010902e-06, + "loss": 2.0108, + "step": 24439 + }, + { + "epoch": 0.81, + "grad_norm": 0.7409157752990723, + "learning_rate": 1.729893319515863e-06, + "loss": 2.0526, + "step": 24440 + }, + { + "epoch": 0.81, + "grad_norm": 0.7638505697250366, + "learning_rate": 1.7292958224614508e-06, + "loss": 2.0655, + "step": 24441 + }, + { + "epoch": 0.81, + "grad_norm": 0.7757713198661804, + "learning_rate": 1.7286984188446098e-06, + "loss": 2.0892, + "step": 24442 + }, + { + "epoch": 0.81, + "grad_norm": 0.7393655776977539, + "learning_rate": 1.7281011086720855e-06, + "loss": 2.0184, + "step": 24443 + }, + { + "epoch": 0.81, + "grad_norm": 0.7504480481147766, + "learning_rate": 1.7275038919506283e-06, + "loss": 2.0951, + "step": 24444 + }, + { + "epoch": 0.81, + "grad_norm": 0.757816731929779, + "learning_rate": 1.7269067686869835e-06, + "loss": 2.0633, + "step": 24445 + }, + { + "epoch": 0.81, + "grad_norm": 0.7457289099693298, + "learning_rate": 1.7263097388878958e-06, + "loss": 2.1361, + "step": 24446 + }, + { + "epoch": 0.81, + "grad_norm": 0.7603092789649963, + "learning_rate": 1.7257128025601123e-06, + "loss": 2.0448, + "step": 24447 + }, + { + "epoch": 0.81, + "grad_norm": 0.747394859790802, + "learning_rate": 1.7251159597103784e-06, + "loss": 2.0193, + "step": 24448 + }, + { + "epoch": 0.81, + "grad_norm": 0.7410550713539124, + "learning_rate": 1.7245192103454344e-06, + "loss": 1.9671, + "step": 24449 + }, + { + "epoch": 0.81, + "grad_norm": 0.7555094361305237, + "learning_rate": 1.7239225544720205e-06, + "loss": 1.9878, + "step": 24450 + }, + { + "epoch": 0.81, + "grad_norm": 0.7479246258735657, + "learning_rate": 1.7233259920968814e-06, + "loss": 2.1862, + "step": 24451 + }, + { + "epoch": 0.81, + "grad_norm": 0.7452386021614075, + "learning_rate": 1.7227295232267517e-06, + "loss": 2.1099, + "step": 24452 + }, + { + "epoch": 0.81, + "grad_norm": 0.7817735075950623, + "learning_rate": 1.7221331478683734e-06, + "loss": 2.0426, + "step": 24453 + }, + { + "epoch": 0.81, + "grad_norm": 0.7606265544891357, + "learning_rate": 1.7215368660284892e-06, + "loss": 2.1183, + "step": 24454 + }, + { + "epoch": 0.81, + "grad_norm": 0.7569113969802856, + "learning_rate": 1.7209406777138239e-06, + "loss": 2.1123, + "step": 24455 + }, + { + "epoch": 0.81, + "grad_norm": 0.7665259838104248, + "learning_rate": 1.7203445829311194e-06, + "loss": 2.0558, + "step": 24456 + }, + { + "epoch": 0.81, + "grad_norm": 0.7542934417724609, + "learning_rate": 1.7197485816871118e-06, + "loss": 2.0138, + "step": 24457 + }, + { + "epoch": 0.81, + "grad_norm": 0.7507808804512024, + "learning_rate": 1.7191526739885312e-06, + "loss": 2.1118, + "step": 24458 + }, + { + "epoch": 0.81, + "grad_norm": 0.7366945147514343, + "learning_rate": 1.7185568598421088e-06, + "loss": 1.9905, + "step": 24459 + }, + { + "epoch": 0.81, + "grad_norm": 0.7167713642120361, + "learning_rate": 1.7179611392545803e-06, + "loss": 2.0673, + "step": 24460 + }, + { + "epoch": 0.81, + "grad_norm": 0.7365698218345642, + "learning_rate": 1.7173655122326705e-06, + "loss": 2.0804, + "step": 24461 + }, + { + "epoch": 0.81, + "grad_norm": 0.7615119218826294, + "learning_rate": 1.716769978783114e-06, + "loss": 2.0751, + "step": 24462 + }, + { + "epoch": 0.81, + "grad_norm": 0.7753332853317261, + "learning_rate": 1.7161745389126328e-06, + "loss": 2.0026, + "step": 24463 + }, + { + "epoch": 0.81, + "grad_norm": 0.7452757954597473, + "learning_rate": 1.7155791926279585e-06, + "loss": 2.0641, + "step": 24464 + }, + { + "epoch": 0.81, + "grad_norm": 0.7272971272468567, + "learning_rate": 1.7149839399358136e-06, + "loss": 2.0771, + "step": 24465 + }, + { + "epoch": 0.81, + "grad_norm": 0.7489147782325745, + "learning_rate": 1.7143887808429272e-06, + "loss": 2.1361, + "step": 24466 + }, + { + "epoch": 0.81, + "grad_norm": 0.7432457208633423, + "learning_rate": 1.7137937153560213e-06, + "loss": 2.0825, + "step": 24467 + }, + { + "epoch": 0.81, + "grad_norm": 0.7778719663619995, + "learning_rate": 1.7131987434818143e-06, + "loss": 2.0472, + "step": 24468 + }, + { + "epoch": 0.81, + "grad_norm": 0.7567726969718933, + "learning_rate": 1.7126038652270316e-06, + "loss": 2.075, + "step": 24469 + }, + { + "epoch": 0.81, + "grad_norm": 0.7548190951347351, + "learning_rate": 1.7120090805983957e-06, + "loss": 2.057, + "step": 24470 + }, + { + "epoch": 0.81, + "grad_norm": 0.728968620300293, + "learning_rate": 1.7114143896026248e-06, + "loss": 2.0193, + "step": 24471 + }, + { + "epoch": 0.81, + "grad_norm": 0.7624780535697937, + "learning_rate": 1.7108197922464332e-06, + "loss": 2.0192, + "step": 24472 + }, + { + "epoch": 0.81, + "grad_norm": 0.7407897710800171, + "learning_rate": 1.7102252885365445e-06, + "loss": 2.0273, + "step": 24473 + }, + { + "epoch": 0.81, + "grad_norm": 0.8049396276473999, + "learning_rate": 1.70963087847967e-06, + "loss": 2.0163, + "step": 24474 + }, + { + "epoch": 0.81, + "grad_norm": 0.7493434548377991, + "learning_rate": 1.7090365620825266e-06, + "loss": 2.0633, + "step": 24475 + }, + { + "epoch": 0.81, + "grad_norm": 0.7303407788276672, + "learning_rate": 1.7084423393518346e-06, + "loss": 2.0232, + "step": 24476 + }, + { + "epoch": 0.81, + "grad_norm": 0.7913075685501099, + "learning_rate": 1.7078482102942963e-06, + "loss": 2.0741, + "step": 24477 + }, + { + "epoch": 0.81, + "grad_norm": 0.7184578776359558, + "learning_rate": 1.7072541749166293e-06, + "loss": 2.0333, + "step": 24478 + }, + { + "epoch": 0.81, + "grad_norm": 0.7623928189277649, + "learning_rate": 1.7066602332255477e-06, + "loss": 2.0524, + "step": 24479 + }, + { + "epoch": 0.81, + "grad_norm": 0.7485759854316711, + "learning_rate": 1.706066385227758e-06, + "loss": 2.0497, + "step": 24480 + }, + { + "epoch": 0.81, + "grad_norm": 0.7492665648460388, + "learning_rate": 1.7054726309299675e-06, + "loss": 2.0434, + "step": 24481 + }, + { + "epoch": 0.81, + "grad_norm": 0.7363805770874023, + "learning_rate": 1.7048789703388878e-06, + "loss": 2.0188, + "step": 24482 + }, + { + "epoch": 0.81, + "grad_norm": 0.7778322100639343, + "learning_rate": 1.7042854034612222e-06, + "loss": 2.0066, + "step": 24483 + }, + { + "epoch": 0.81, + "grad_norm": 0.7408059239387512, + "learning_rate": 1.703691930303678e-06, + "loss": 2.0571, + "step": 24484 + }, + { + "epoch": 0.81, + "grad_norm": 0.7532210946083069, + "learning_rate": 1.7030985508729624e-06, + "loss": 1.9983, + "step": 24485 + }, + { + "epoch": 0.81, + "grad_norm": 0.7453421950340271, + "learning_rate": 1.702505265175778e-06, + "loss": 1.9613, + "step": 24486 + }, + { + "epoch": 0.81, + "grad_norm": 0.7172645330429077, + "learning_rate": 1.7019120732188232e-06, + "loss": 2.0386, + "step": 24487 + }, + { + "epoch": 0.81, + "grad_norm": 0.7340813875198364, + "learning_rate": 1.7013189750088032e-06, + "loss": 1.9774, + "step": 24488 + }, + { + "epoch": 0.81, + "grad_norm": 0.7267454862594604, + "learning_rate": 1.7007259705524216e-06, + "loss": 2.0986, + "step": 24489 + }, + { + "epoch": 0.81, + "grad_norm": 0.7326659560203552, + "learning_rate": 1.7001330598563704e-06, + "loss": 2.0468, + "step": 24490 + }, + { + "epoch": 0.81, + "grad_norm": 0.7686564922332764, + "learning_rate": 1.699540242927351e-06, + "loss": 1.9749, + "step": 24491 + }, + { + "epoch": 0.81, + "grad_norm": 0.770440936088562, + "learning_rate": 1.698947519772064e-06, + "loss": 1.9953, + "step": 24492 + }, + { + "epoch": 0.81, + "grad_norm": 0.7358247637748718, + "learning_rate": 1.6983548903972036e-06, + "loss": 2.0582, + "step": 24493 + }, + { + "epoch": 0.81, + "grad_norm": 0.7365022897720337, + "learning_rate": 1.6977623548094612e-06, + "loss": 2.016, + "step": 24494 + }, + { + "epoch": 0.81, + "grad_norm": 0.726660430431366, + "learning_rate": 1.6971699130155361e-06, + "loss": 1.9653, + "step": 24495 + }, + { + "epoch": 0.81, + "grad_norm": 0.7718369960784912, + "learning_rate": 1.6965775650221184e-06, + "loss": 2.0485, + "step": 24496 + }, + { + "epoch": 0.82, + "grad_norm": 0.7348105311393738, + "learning_rate": 1.6959853108359004e-06, + "loss": 2.0279, + "step": 24497 + }, + { + "epoch": 0.82, + "grad_norm": 0.734720766544342, + "learning_rate": 1.695393150463578e-06, + "loss": 2.0755, + "step": 24498 + }, + { + "epoch": 0.82, + "grad_norm": 0.7499367594718933, + "learning_rate": 1.6948010839118323e-06, + "loss": 2.0526, + "step": 24499 + }, + { + "epoch": 0.82, + "grad_norm": 0.7268011569976807, + "learning_rate": 1.694209111187357e-06, + "loss": 2.0762, + "step": 24500 + }, + { + "epoch": 0.82, + "grad_norm": 0.7263720035552979, + "learning_rate": 1.6936172322968425e-06, + "loss": 1.9717, + "step": 24501 + }, + { + "epoch": 0.82, + "grad_norm": 0.7753645777702332, + "learning_rate": 1.6930254472469709e-06, + "loss": 2.0984, + "step": 24502 + }, + { + "epoch": 0.82, + "grad_norm": 0.7486563324928284, + "learning_rate": 1.692433756044428e-06, + "loss": 2.0686, + "step": 24503 + }, + { + "epoch": 0.82, + "grad_norm": 0.7719558477401733, + "learning_rate": 1.6918421586959e-06, + "loss": 2.0588, + "step": 24504 + }, + { + "epoch": 0.82, + "grad_norm": 0.7465249300003052, + "learning_rate": 1.691250655208072e-06, + "loss": 2.0795, + "step": 24505 + }, + { + "epoch": 0.82, + "grad_norm": 0.7705626487731934, + "learning_rate": 1.6906592455876226e-06, + "loss": 2.1261, + "step": 24506 + }, + { + "epoch": 0.82, + "grad_norm": 0.7503687143325806, + "learning_rate": 1.6900679298412381e-06, + "loss": 1.9558, + "step": 24507 + }, + { + "epoch": 0.82, + "grad_norm": 0.7387207746505737, + "learning_rate": 1.6894767079755958e-06, + "loss": 2.0652, + "step": 24508 + }, + { + "epoch": 0.82, + "grad_norm": 0.740697979927063, + "learning_rate": 1.6888855799973725e-06, + "loss": 2.0591, + "step": 24509 + }, + { + "epoch": 0.82, + "grad_norm": 0.770946741104126, + "learning_rate": 1.6882945459132493e-06, + "loss": 2.0607, + "step": 24510 + }, + { + "epoch": 0.82, + "grad_norm": 0.7290916442871094, + "learning_rate": 1.6877036057299078e-06, + "loss": 2.0489, + "step": 24511 + }, + { + "epoch": 0.82, + "grad_norm": 0.7611687183380127, + "learning_rate": 1.6871127594540149e-06, + "loss": 2.0628, + "step": 24512 + }, + { + "epoch": 0.82, + "grad_norm": 0.7313723564147949, + "learning_rate": 1.6865220070922495e-06, + "loss": 1.9598, + "step": 24513 + }, + { + "epoch": 0.82, + "grad_norm": 0.7581969499588013, + "learning_rate": 1.6859313486512896e-06, + "loss": 2.0088, + "step": 24514 + }, + { + "epoch": 0.82, + "grad_norm": 0.7544588446617126, + "learning_rate": 1.6853407841378022e-06, + "loss": 2.03, + "step": 24515 + }, + { + "epoch": 0.82, + "grad_norm": 0.7417830228805542, + "learning_rate": 1.684750313558464e-06, + "loss": 2.0674, + "step": 24516 + }, + { + "epoch": 0.82, + "grad_norm": 0.7411788702011108, + "learning_rate": 1.6841599369199435e-06, + "loss": 2.0408, + "step": 24517 + }, + { + "epoch": 0.82, + "grad_norm": 0.7454735636711121, + "learning_rate": 1.6835696542289082e-06, + "loss": 2.141, + "step": 24518 + }, + { + "epoch": 0.82, + "grad_norm": 0.7467008829116821, + "learning_rate": 1.6829794654920295e-06, + "loss": 1.9263, + "step": 24519 + }, + { + "epoch": 0.82, + "grad_norm": 0.7584688663482666, + "learning_rate": 1.6823893707159755e-06, + "loss": 2.1133, + "step": 24520 + }, + { + "epoch": 0.82, + "grad_norm": 0.7470139861106873, + "learning_rate": 1.6817993699074131e-06, + "loss": 2.0816, + "step": 24521 + }, + { + "epoch": 0.82, + "grad_norm": 0.7449947595596313, + "learning_rate": 1.6812094630730037e-06, + "loss": 2.0221, + "step": 24522 + }, + { + "epoch": 0.82, + "grad_norm": 0.7774048447608948, + "learning_rate": 1.6806196502194173e-06, + "loss": 1.971, + "step": 24523 + }, + { + "epoch": 0.82, + "grad_norm": 0.783819317817688, + "learning_rate": 1.6800299313533142e-06, + "loss": 2.069, + "step": 24524 + }, + { + "epoch": 0.82, + "grad_norm": 0.7619116306304932, + "learning_rate": 1.6794403064813536e-06, + "loss": 2.0478, + "step": 24525 + }, + { + "epoch": 0.82, + "grad_norm": 0.7302319407463074, + "learning_rate": 1.6788507756102012e-06, + "loss": 2.0364, + "step": 24526 + }, + { + "epoch": 0.82, + "grad_norm": 0.7524310350418091, + "learning_rate": 1.6782613387465185e-06, + "loss": 2.0727, + "step": 24527 + }, + { + "epoch": 0.82, + "grad_norm": 0.747408390045166, + "learning_rate": 1.677671995896959e-06, + "loss": 2.0217, + "step": 24528 + }, + { + "epoch": 0.82, + "grad_norm": 0.7467747926712036, + "learning_rate": 1.6770827470681872e-06, + "loss": 2.0312, + "step": 24529 + }, + { + "epoch": 0.82, + "grad_norm": 0.759817898273468, + "learning_rate": 1.6764935922668569e-06, + "loss": 2.1025, + "step": 24530 + }, + { + "epoch": 0.82, + "grad_norm": 0.7594185471534729, + "learning_rate": 1.6759045314996202e-06, + "loss": 2.0648, + "step": 24531 + }, + { + "epoch": 0.82, + "grad_norm": 0.7296598553657532, + "learning_rate": 1.6753155647731367e-06, + "loss": 2.0653, + "step": 24532 + }, + { + "epoch": 0.82, + "grad_norm": 0.7643938660621643, + "learning_rate": 1.6747266920940642e-06, + "loss": 2.075, + "step": 24533 + }, + { + "epoch": 0.82, + "grad_norm": 0.7425946593284607, + "learning_rate": 1.674137913469045e-06, + "loss": 2.0569, + "step": 24534 + }, + { + "epoch": 0.82, + "grad_norm": 0.7510147094726562, + "learning_rate": 1.6735492289047362e-06, + "loss": 2.1427, + "step": 24535 + }, + { + "epoch": 0.82, + "grad_norm": 0.7498977184295654, + "learning_rate": 1.6729606384077912e-06, + "loss": 2.1012, + "step": 24536 + }, + { + "epoch": 0.82, + "grad_norm": 0.7713295817375183, + "learning_rate": 1.6723721419848549e-06, + "loss": 2.001, + "step": 24537 + }, + { + "epoch": 0.82, + "grad_norm": 0.735720694065094, + "learning_rate": 1.6717837396425795e-06, + "loss": 2.0349, + "step": 24538 + }, + { + "epoch": 0.82, + "grad_norm": 0.7762858271598816, + "learning_rate": 1.67119543138761e-06, + "loss": 2.0703, + "step": 24539 + }, + { + "epoch": 0.82, + "grad_norm": 0.7564401626586914, + "learning_rate": 1.6706072172265919e-06, + "loss": 2.0349, + "step": 24540 + }, + { + "epoch": 0.82, + "grad_norm": 0.732455313205719, + "learning_rate": 1.6700190971661712e-06, + "loss": 2.0174, + "step": 24541 + }, + { + "epoch": 0.82, + "grad_norm": 0.7477617859840393, + "learning_rate": 1.6694310712129958e-06, + "loss": 2.0383, + "step": 24542 + }, + { + "epoch": 0.82, + "grad_norm": 0.7415032982826233, + "learning_rate": 1.668843139373706e-06, + "loss": 2.0026, + "step": 24543 + }, + { + "epoch": 0.82, + "grad_norm": 0.7218418121337891, + "learning_rate": 1.668255301654942e-06, + "loss": 2.0263, + "step": 24544 + }, + { + "epoch": 0.82, + "grad_norm": 0.7402378916740417, + "learning_rate": 1.6676675580633483e-06, + "loss": 2.0089, + "step": 24545 + }, + { + "epoch": 0.82, + "grad_norm": 0.7382590770721436, + "learning_rate": 1.667079908605561e-06, + "loss": 2.0129, + "step": 24546 + }, + { + "epoch": 0.82, + "grad_norm": 0.7498797178268433, + "learning_rate": 1.6664923532882239e-06, + "loss": 2.1123, + "step": 24547 + }, + { + "epoch": 0.82, + "grad_norm": 0.7568278908729553, + "learning_rate": 1.6659048921179698e-06, + "loss": 2.0583, + "step": 24548 + }, + { + "epoch": 0.82, + "grad_norm": 0.7213453054428101, + "learning_rate": 1.6653175251014397e-06, + "loss": 1.9896, + "step": 24549 + }, + { + "epoch": 0.82, + "grad_norm": 0.7355955839157104, + "learning_rate": 1.6647302522452658e-06, + "loss": 2.0218, + "step": 24550 + }, + { + "epoch": 0.82, + "grad_norm": 0.7454813718795776, + "learning_rate": 1.664143073556087e-06, + "loss": 2.0689, + "step": 24551 + }, + { + "epoch": 0.82, + "grad_norm": 0.732250452041626, + "learning_rate": 1.6635559890405351e-06, + "loss": 2.0107, + "step": 24552 + }, + { + "epoch": 0.82, + "grad_norm": 0.7605438828468323, + "learning_rate": 1.6629689987052388e-06, + "loss": 2.0175, + "step": 24553 + }, + { + "epoch": 0.82, + "grad_norm": 0.7370153665542603, + "learning_rate": 1.6623821025568331e-06, + "loss": 2.0675, + "step": 24554 + }, + { + "epoch": 0.82, + "grad_norm": 0.764571487903595, + "learning_rate": 1.6617953006019527e-06, + "loss": 2.0031, + "step": 24555 + }, + { + "epoch": 0.82, + "grad_norm": 0.7388578057289124, + "learning_rate": 1.6612085928472177e-06, + "loss": 2.1111, + "step": 24556 + }, + { + "epoch": 0.82, + "grad_norm": 0.7645792365074158, + "learning_rate": 1.6606219792992606e-06, + "loss": 2.0759, + "step": 24557 + }, + { + "epoch": 0.82, + "grad_norm": 0.734192430973053, + "learning_rate": 1.6600354599647116e-06, + "loss": 2.0682, + "step": 24558 + }, + { + "epoch": 0.82, + "grad_norm": 0.729652464389801, + "learning_rate": 1.6594490348501913e-06, + "loss": 2.0657, + "step": 24559 + }, + { + "epoch": 0.82, + "grad_norm": 0.7525078058242798, + "learning_rate": 1.6588627039623317e-06, + "loss": 2.0815, + "step": 24560 + }, + { + "epoch": 0.82, + "grad_norm": 0.7328199744224548, + "learning_rate": 1.6582764673077511e-06, + "loss": 2.0226, + "step": 24561 + }, + { + "epoch": 0.82, + "grad_norm": 0.7466614246368408, + "learning_rate": 1.657690324893073e-06, + "loss": 2.0374, + "step": 24562 + }, + { + "epoch": 0.82, + "grad_norm": 0.749165415763855, + "learning_rate": 1.65710427672492e-06, + "loss": 2.0256, + "step": 24563 + }, + { + "epoch": 0.82, + "grad_norm": 0.7522661685943604, + "learning_rate": 1.656518322809917e-06, + "loss": 2.0906, + "step": 24564 + }, + { + "epoch": 0.82, + "grad_norm": 0.761629581451416, + "learning_rate": 1.6559324631546792e-06, + "loss": 2.0879, + "step": 24565 + }, + { + "epoch": 0.82, + "grad_norm": 0.7588980197906494, + "learning_rate": 1.6553466977658238e-06, + "loss": 2.0592, + "step": 24566 + }, + { + "epoch": 0.82, + "grad_norm": 0.7484273910522461, + "learning_rate": 1.6547610266499736e-06, + "loss": 2.0421, + "step": 24567 + }, + { + "epoch": 0.82, + "grad_norm": 0.7427703738212585, + "learning_rate": 1.6541754498137396e-06, + "loss": 2.0455, + "step": 24568 + }, + { + "epoch": 0.82, + "grad_norm": 0.7355174422264099, + "learning_rate": 1.6535899672637435e-06, + "loss": 2.0351, + "step": 24569 + }, + { + "epoch": 0.82, + "grad_norm": 0.7615810036659241, + "learning_rate": 1.653004579006594e-06, + "loss": 2.052, + "step": 24570 + }, + { + "epoch": 0.82, + "grad_norm": 0.7332801222801208, + "learning_rate": 1.6524192850489096e-06, + "loss": 1.9977, + "step": 24571 + }, + { + "epoch": 0.82, + "grad_norm": 0.7462335228919983, + "learning_rate": 1.6518340853972969e-06, + "loss": 1.9745, + "step": 24572 + }, + { + "epoch": 0.82, + "grad_norm": 0.7507336735725403, + "learning_rate": 1.651248980058373e-06, + "loss": 2.0417, + "step": 24573 + }, + { + "epoch": 0.82, + "grad_norm": 0.7614277601242065, + "learning_rate": 1.650663969038745e-06, + "loss": 2.0428, + "step": 24574 + }, + { + "epoch": 0.82, + "grad_norm": 0.7457935214042664, + "learning_rate": 1.6500790523450195e-06, + "loss": 2.0367, + "step": 24575 + }, + { + "epoch": 0.82, + "grad_norm": 0.7891905307769775, + "learning_rate": 1.649494229983808e-06, + "loss": 2.111, + "step": 24576 + }, + { + "epoch": 0.82, + "grad_norm": 0.7668673396110535, + "learning_rate": 1.6489095019617185e-06, + "loss": 1.9939, + "step": 24577 + }, + { + "epoch": 0.82, + "grad_norm": 0.7392739653587341, + "learning_rate": 1.6483248682853558e-06, + "loss": 2.1285, + "step": 24578 + }, + { + "epoch": 0.82, + "grad_norm": 0.7716066837310791, + "learning_rate": 1.6477403289613215e-06, + "loss": 2.0639, + "step": 24579 + }, + { + "epoch": 0.82, + "grad_norm": 0.7748960256576538, + "learning_rate": 1.6471558839962253e-06, + "loss": 2.0407, + "step": 24580 + }, + { + "epoch": 0.82, + "grad_norm": 0.7532209753990173, + "learning_rate": 1.6465715333966636e-06, + "loss": 2.1106, + "step": 24581 + }, + { + "epoch": 0.82, + "grad_norm": 0.7628597617149353, + "learning_rate": 1.645987277169243e-06, + "loss": 2.0539, + "step": 24582 + }, + { + "epoch": 0.82, + "grad_norm": 0.7131446003913879, + "learning_rate": 1.645403115320563e-06, + "loss": 2.0718, + "step": 24583 + }, + { + "epoch": 0.82, + "grad_norm": 0.7546490430831909, + "learning_rate": 1.64481904785722e-06, + "loss": 2.1012, + "step": 24584 + }, + { + "epoch": 0.82, + "grad_norm": 0.7365564703941345, + "learning_rate": 1.6442350747858139e-06, + "loss": 2.0327, + "step": 24585 + }, + { + "epoch": 0.82, + "grad_norm": 0.7771188020706177, + "learning_rate": 1.6436511961129464e-06, + "loss": 2.0507, + "step": 24586 + }, + { + "epoch": 0.82, + "grad_norm": 0.7132853865623474, + "learning_rate": 1.6430674118452095e-06, + "loss": 2.0557, + "step": 24587 + }, + { + "epoch": 0.82, + "grad_norm": 0.7698830366134644, + "learning_rate": 1.6424837219891976e-06, + "loss": 2.0177, + "step": 24588 + }, + { + "epoch": 0.82, + "grad_norm": 0.7774745225906372, + "learning_rate": 1.6419001265515067e-06, + "loss": 2.0127, + "step": 24589 + }, + { + "epoch": 0.82, + "grad_norm": 0.733350932598114, + "learning_rate": 1.6413166255387313e-06, + "loss": 2.0329, + "step": 24590 + }, + { + "epoch": 0.82, + "grad_norm": 0.7675386667251587, + "learning_rate": 1.6407332189574632e-06, + "loss": 2.1012, + "step": 24591 + }, + { + "epoch": 0.82, + "grad_norm": 0.712372362613678, + "learning_rate": 1.640149906814289e-06, + "loss": 2.0402, + "step": 24592 + }, + { + "epoch": 0.82, + "grad_norm": 0.7344722151756287, + "learning_rate": 1.6395666891158046e-06, + "loss": 2.0297, + "step": 24593 + }, + { + "epoch": 0.82, + "grad_norm": 0.7221224308013916, + "learning_rate": 1.6389835658685938e-06, + "loss": 2.0168, + "step": 24594 + }, + { + "epoch": 0.82, + "grad_norm": 0.7723449468612671, + "learning_rate": 1.6384005370792478e-06, + "loss": 2.0901, + "step": 24595 + }, + { + "epoch": 0.82, + "grad_norm": 0.7529548406600952, + "learning_rate": 1.6378176027543535e-06, + "loss": 2.06, + "step": 24596 + }, + { + "epoch": 0.82, + "grad_norm": 0.7539964914321899, + "learning_rate": 1.6372347629004924e-06, + "loss": 2.0227, + "step": 24597 + }, + { + "epoch": 0.82, + "grad_norm": 0.7455288171768188, + "learning_rate": 1.6366520175242518e-06, + "loss": 2.0104, + "step": 24598 + }, + { + "epoch": 0.82, + "grad_norm": 0.763960063457489, + "learning_rate": 1.6360693666322181e-06, + "loss": 2.0274, + "step": 24599 + }, + { + "epoch": 0.82, + "grad_norm": 0.723447322845459, + "learning_rate": 1.6354868102309696e-06, + "loss": 2.0127, + "step": 24600 + }, + { + "epoch": 0.82, + "grad_norm": 0.7618283629417419, + "learning_rate": 1.6349043483270876e-06, + "loss": 2.097, + "step": 24601 + }, + { + "epoch": 0.82, + "grad_norm": 0.7468940615653992, + "learning_rate": 1.634321980927157e-06, + "loss": 2.0887, + "step": 24602 + }, + { + "epoch": 0.82, + "grad_norm": 0.7379149794578552, + "learning_rate": 1.6337397080377503e-06, + "loss": 2.1098, + "step": 24603 + }, + { + "epoch": 0.82, + "grad_norm": 0.7264783382415771, + "learning_rate": 1.6331575296654522e-06, + "loss": 2.0211, + "step": 24604 + }, + { + "epoch": 0.82, + "grad_norm": 0.7703182101249695, + "learning_rate": 1.6325754458168341e-06, + "loss": 2.04, + "step": 24605 + }, + { + "epoch": 0.82, + "grad_norm": 0.774222195148468, + "learning_rate": 1.6319934564984774e-06, + "loss": 2.059, + "step": 24606 + }, + { + "epoch": 0.82, + "grad_norm": 0.7240437269210815, + "learning_rate": 1.6314115617169523e-06, + "loss": 2.0653, + "step": 24607 + }, + { + "epoch": 0.82, + "grad_norm": 0.7502554655075073, + "learning_rate": 1.630829761478837e-06, + "loss": 1.9935, + "step": 24608 + }, + { + "epoch": 0.82, + "grad_norm": 0.7653800249099731, + "learning_rate": 1.630248055790703e-06, + "loss": 2.0466, + "step": 24609 + }, + { + "epoch": 0.82, + "grad_norm": 0.7408431768417358, + "learning_rate": 1.6296664446591181e-06, + "loss": 2.1171, + "step": 24610 + }, + { + "epoch": 0.82, + "grad_norm": 0.7434965372085571, + "learning_rate": 1.6290849280906573e-06, + "loss": 2.0596, + "step": 24611 + }, + { + "epoch": 0.82, + "grad_norm": 0.7596830129623413, + "learning_rate": 1.6285035060918908e-06, + "loss": 2.024, + "step": 24612 + }, + { + "epoch": 0.82, + "grad_norm": 0.768607497215271, + "learning_rate": 1.6279221786693844e-06, + "loss": 2.0239, + "step": 24613 + }, + { + "epoch": 0.82, + "grad_norm": 0.7548766732215881, + "learning_rate": 1.6273409458297063e-06, + "loss": 2.0567, + "step": 24614 + }, + { + "epoch": 0.82, + "grad_norm": 0.7422938942909241, + "learning_rate": 1.6267598075794244e-06, + "loss": 1.979, + "step": 24615 + }, + { + "epoch": 0.82, + "grad_norm": 0.7561812996864319, + "learning_rate": 1.6261787639251003e-06, + "loss": 2.0135, + "step": 24616 + }, + { + "epoch": 0.82, + "grad_norm": 0.7785735726356506, + "learning_rate": 1.6255978148733042e-06, + "loss": 2.0132, + "step": 24617 + }, + { + "epoch": 0.82, + "grad_norm": 0.7781122326850891, + "learning_rate": 1.6250169604305966e-06, + "loss": 1.9659, + "step": 24618 + }, + { + "epoch": 0.82, + "grad_norm": 0.7320306301116943, + "learning_rate": 1.6244362006035363e-06, + "loss": 2.0405, + "step": 24619 + }, + { + "epoch": 0.82, + "grad_norm": 0.7523961067199707, + "learning_rate": 1.6238555353986863e-06, + "loss": 2.0589, + "step": 24620 + }, + { + "epoch": 0.82, + "grad_norm": 0.7618944644927979, + "learning_rate": 1.6232749648226109e-06, + "loss": 2.0587, + "step": 24621 + }, + { + "epoch": 0.82, + "grad_norm": 0.7323446869850159, + "learning_rate": 1.6226944888818651e-06, + "loss": 2.0014, + "step": 24622 + }, + { + "epoch": 0.82, + "grad_norm": 0.7585964798927307, + "learning_rate": 1.622114107583006e-06, + "loss": 1.9995, + "step": 24623 + }, + { + "epoch": 0.82, + "grad_norm": 0.7511516213417053, + "learning_rate": 1.6215338209325938e-06, + "loss": 2.0951, + "step": 24624 + }, + { + "epoch": 0.82, + "grad_norm": 0.7365175485610962, + "learning_rate": 1.6209536289371796e-06, + "loss": 1.9801, + "step": 24625 + }, + { + "epoch": 0.82, + "grad_norm": 0.7180129885673523, + "learning_rate": 1.6203735316033231e-06, + "loss": 2.0315, + "step": 24626 + }, + { + "epoch": 0.82, + "grad_norm": 0.7375575304031372, + "learning_rate": 1.6197935289375733e-06, + "loss": 2.0412, + "step": 24627 + }, + { + "epoch": 0.82, + "grad_norm": 0.7332069873809814, + "learning_rate": 1.619213620946487e-06, + "loss": 2.0702, + "step": 24628 + }, + { + "epoch": 0.82, + "grad_norm": 0.7303208112716675, + "learning_rate": 1.6186338076366115e-06, + "loss": 2.0235, + "step": 24629 + }, + { + "epoch": 0.82, + "grad_norm": 0.7562184929847717, + "learning_rate": 1.6180540890145014e-06, + "loss": 2.0742, + "step": 24630 + }, + { + "epoch": 0.82, + "grad_norm": 0.707876443862915, + "learning_rate": 1.6174744650867036e-06, + "loss": 2.0176, + "step": 24631 + }, + { + "epoch": 0.82, + "grad_norm": 0.796810507774353, + "learning_rate": 1.6168949358597652e-06, + "loss": 2.0847, + "step": 24632 + }, + { + "epoch": 0.82, + "grad_norm": 0.7438734769821167, + "learning_rate": 1.6163155013402331e-06, + "loss": 2.0357, + "step": 24633 + }, + { + "epoch": 0.82, + "grad_norm": 0.7598115801811218, + "learning_rate": 1.6157361615346589e-06, + "loss": 2.014, + "step": 24634 + }, + { + "epoch": 0.82, + "grad_norm": 0.7524226903915405, + "learning_rate": 1.615156916449584e-06, + "loss": 1.9684, + "step": 24635 + }, + { + "epoch": 0.82, + "grad_norm": 0.7687348127365112, + "learning_rate": 1.6145777660915496e-06, + "loss": 2.0852, + "step": 24636 + }, + { + "epoch": 0.82, + "grad_norm": 0.7468103766441345, + "learning_rate": 1.613998710467104e-06, + "loss": 2.0784, + "step": 24637 + }, + { + "epoch": 0.82, + "grad_norm": 0.7184838056564331, + "learning_rate": 1.613419749582783e-06, + "loss": 2.0654, + "step": 24638 + }, + { + "epoch": 0.82, + "grad_norm": 0.7395575046539307, + "learning_rate": 1.6128408834451336e-06, + "loss": 2.0154, + "step": 24639 + }, + { + "epoch": 0.82, + "grad_norm": 0.7380672097206116, + "learning_rate": 1.6122621120606929e-06, + "loss": 2.0648, + "step": 24640 + }, + { + "epoch": 0.82, + "grad_norm": 0.7515471577644348, + "learning_rate": 1.6116834354359968e-06, + "loss": 2.0914, + "step": 24641 + }, + { + "epoch": 0.82, + "grad_norm": 0.7638505101203918, + "learning_rate": 1.6111048535775842e-06, + "loss": 2.0562, + "step": 24642 + }, + { + "epoch": 0.82, + "grad_norm": 0.7853606343269348, + "learning_rate": 1.6105263664919957e-06, + "loss": 2.0915, + "step": 24643 + }, + { + "epoch": 0.82, + "grad_norm": 0.7652499079704285, + "learning_rate": 1.6099479741857639e-06, + "loss": 2.0473, + "step": 24644 + }, + { + "epoch": 0.82, + "grad_norm": 0.7278482913970947, + "learning_rate": 1.6093696766654199e-06, + "loss": 2.0304, + "step": 24645 + }, + { + "epoch": 0.82, + "grad_norm": 0.7803865075111389, + "learning_rate": 1.608791473937502e-06, + "loss": 2.0633, + "step": 24646 + }, + { + "epoch": 0.82, + "grad_norm": 0.753241777420044, + "learning_rate": 1.6082133660085386e-06, + "loss": 2.0355, + "step": 24647 + }, + { + "epoch": 0.82, + "grad_norm": 0.7282392382621765, + "learning_rate": 1.607635352885064e-06, + "loss": 1.996, + "step": 24648 + }, + { + "epoch": 0.82, + "grad_norm": 0.7931697368621826, + "learning_rate": 1.6070574345736056e-06, + "loss": 2.0253, + "step": 24649 + }, + { + "epoch": 0.82, + "grad_norm": 0.7466763257980347, + "learning_rate": 1.6064796110806945e-06, + "loss": 2.1404, + "step": 24650 + }, + { + "epoch": 0.82, + "grad_norm": 0.7644801139831543, + "learning_rate": 1.605901882412857e-06, + "loss": 2.0728, + "step": 24651 + }, + { + "epoch": 0.82, + "grad_norm": 0.7290539741516113, + "learning_rate": 1.605324248576622e-06, + "loss": 2.049, + "step": 24652 + }, + { + "epoch": 0.82, + "grad_norm": 0.7890642881393433, + "learning_rate": 1.6047467095785142e-06, + "loss": 2.0635, + "step": 24653 + }, + { + "epoch": 0.82, + "grad_norm": 0.7434118390083313, + "learning_rate": 1.6041692654250551e-06, + "loss": 2.0504, + "step": 24654 + }, + { + "epoch": 0.82, + "grad_norm": 0.7930094003677368, + "learning_rate": 1.603591916122771e-06, + "loss": 2.0756, + "step": 24655 + }, + { + "epoch": 0.82, + "grad_norm": 0.7471768260002136, + "learning_rate": 1.6030146616781882e-06, + "loss": 2.0862, + "step": 24656 + }, + { + "epoch": 0.82, + "grad_norm": 0.7720146179199219, + "learning_rate": 1.6024375020978234e-06, + "loss": 2.0835, + "step": 24657 + }, + { + "epoch": 0.82, + "grad_norm": 0.7535285949707031, + "learning_rate": 1.6018604373881963e-06, + "loss": 2.001, + "step": 24658 + }, + { + "epoch": 0.82, + "grad_norm": 0.7511516809463501, + "learning_rate": 1.601283467555831e-06, + "loss": 2.0435, + "step": 24659 + }, + { + "epoch": 0.82, + "grad_norm": 0.7551955580711365, + "learning_rate": 1.6007065926072406e-06, + "loss": 2.0902, + "step": 24660 + }, + { + "epoch": 0.82, + "grad_norm": 0.759741485118866, + "learning_rate": 1.600129812548944e-06, + "loss": 2.0654, + "step": 24661 + }, + { + "epoch": 0.82, + "grad_norm": 0.7355949878692627, + "learning_rate": 1.5995531273874632e-06, + "loss": 1.9998, + "step": 24662 + }, + { + "epoch": 0.82, + "grad_norm": 0.7355269193649292, + "learning_rate": 1.5989765371293032e-06, + "loss": 2.0449, + "step": 24663 + }, + { + "epoch": 0.82, + "grad_norm": 0.7465232610702515, + "learning_rate": 1.598400041780982e-06, + "loss": 2.0437, + "step": 24664 + }, + { + "epoch": 0.82, + "grad_norm": 0.7346485257148743, + "learning_rate": 1.5978236413490166e-06, + "loss": 2.045, + "step": 24665 + }, + { + "epoch": 0.82, + "grad_norm": 0.7511296272277832, + "learning_rate": 1.5972473358399153e-06, + "loss": 2.0944, + "step": 24666 + }, + { + "epoch": 0.82, + "grad_norm": 0.7130091190338135, + "learning_rate": 1.5966711252601874e-06, + "loss": 2.0097, + "step": 24667 + }, + { + "epoch": 0.82, + "grad_norm": 0.7297332882881165, + "learning_rate": 1.5960950096163453e-06, + "loss": 1.9637, + "step": 24668 + }, + { + "epoch": 0.82, + "grad_norm": 0.7632662057876587, + "learning_rate": 1.5955189889148948e-06, + "loss": 2.0724, + "step": 24669 + }, + { + "epoch": 0.82, + "grad_norm": 0.760145366191864, + "learning_rate": 1.5949430631623487e-06, + "loss": 2.0578, + "step": 24670 + }, + { + "epoch": 0.82, + "grad_norm": 0.7408374547958374, + "learning_rate": 1.594367232365206e-06, + "loss": 1.9997, + "step": 24671 + }, + { + "epoch": 0.82, + "grad_norm": 0.7327712178230286, + "learning_rate": 1.5937914965299794e-06, + "loss": 1.9802, + "step": 24672 + }, + { + "epoch": 0.82, + "grad_norm": 0.7711045742034912, + "learning_rate": 1.5932158556631672e-06, + "loss": 2.0774, + "step": 24673 + }, + { + "epoch": 0.82, + "grad_norm": 0.7370678186416626, + "learning_rate": 1.5926403097712784e-06, + "loss": 2.0583, + "step": 24674 + }, + { + "epoch": 0.82, + "grad_norm": 0.7402954697608948, + "learning_rate": 1.5920648588608112e-06, + "loss": 2.0183, + "step": 24675 + }, + { + "epoch": 0.82, + "grad_norm": 0.7355253100395203, + "learning_rate": 1.591489502938266e-06, + "loss": 2.0246, + "step": 24676 + }, + { + "epoch": 0.82, + "grad_norm": 0.7816030979156494, + "learning_rate": 1.5909142420101442e-06, + "loss": 2.0399, + "step": 24677 + }, + { + "epoch": 0.82, + "grad_norm": 0.7639210224151611, + "learning_rate": 1.5903390760829484e-06, + "loss": 2.0457, + "step": 24678 + }, + { + "epoch": 0.82, + "grad_norm": 0.7285211682319641, + "learning_rate": 1.5897640051631724e-06, + "loss": 2.0555, + "step": 24679 + }, + { + "epoch": 0.82, + "grad_norm": 0.7460406422615051, + "learning_rate": 1.589189029257311e-06, + "loss": 2.0524, + "step": 24680 + }, + { + "epoch": 0.82, + "grad_norm": 0.750711977481842, + "learning_rate": 1.5886141483718665e-06, + "loss": 2.0406, + "step": 24681 + }, + { + "epoch": 0.82, + "grad_norm": 0.7633406519889832, + "learning_rate": 1.588039362513326e-06, + "loss": 2.0773, + "step": 24682 + }, + { + "epoch": 0.82, + "grad_norm": 0.749678373336792, + "learning_rate": 1.587464671688187e-06, + "loss": 2.0937, + "step": 24683 + }, + { + "epoch": 0.82, + "grad_norm": 0.7542497515678406, + "learning_rate": 1.5868900759029472e-06, + "loss": 2.0733, + "step": 24684 + }, + { + "epoch": 0.82, + "grad_norm": 0.7439835071563721, + "learning_rate": 1.5863155751640879e-06, + "loss": 2.0887, + "step": 24685 + }, + { + "epoch": 0.82, + "grad_norm": 0.7492047548294067, + "learning_rate": 1.5857411694781044e-06, + "loss": 2.0871, + "step": 24686 + }, + { + "epoch": 0.82, + "grad_norm": 0.7941245436668396, + "learning_rate": 1.5851668588514878e-06, + "loss": 2.1572, + "step": 24687 + }, + { + "epoch": 0.82, + "grad_norm": 0.7431154847145081, + "learning_rate": 1.5845926432907256e-06, + "loss": 2.108, + "step": 24688 + }, + { + "epoch": 0.82, + "grad_norm": 0.774812638759613, + "learning_rate": 1.5840185228022997e-06, + "loss": 2.0755, + "step": 24689 + }, + { + "epoch": 0.82, + "grad_norm": 0.7613157629966736, + "learning_rate": 1.5834444973927043e-06, + "loss": 2.1219, + "step": 24690 + }, + { + "epoch": 0.82, + "grad_norm": 0.7448336482048035, + "learning_rate": 1.5828705670684174e-06, + "loss": 2.0647, + "step": 24691 + }, + { + "epoch": 0.82, + "grad_norm": 0.7452444434165955, + "learning_rate": 1.582296731835925e-06, + "loss": 2.0769, + "step": 24692 + }, + { + "epoch": 0.82, + "grad_norm": 0.7306809425354004, + "learning_rate": 1.581722991701714e-06, + "loss": 2.0093, + "step": 24693 + }, + { + "epoch": 0.82, + "grad_norm": 0.7399933338165283, + "learning_rate": 1.5811493466722638e-06, + "loss": 2.018, + "step": 24694 + }, + { + "epoch": 0.82, + "grad_norm": 0.7592442035675049, + "learning_rate": 1.5805757967540514e-06, + "loss": 1.9878, + "step": 24695 + }, + { + "epoch": 0.82, + "grad_norm": 0.7524027228355408, + "learning_rate": 1.5800023419535592e-06, + "loss": 2.0334, + "step": 24696 + }, + { + "epoch": 0.82, + "grad_norm": 0.7307648658752441, + "learning_rate": 1.57942898227727e-06, + "loss": 2.0556, + "step": 24697 + }, + { + "epoch": 0.82, + "grad_norm": 0.7876189351081848, + "learning_rate": 1.5788557177316533e-06, + "loss": 2.0471, + "step": 24698 + }, + { + "epoch": 0.82, + "grad_norm": 0.7385796904563904, + "learning_rate": 1.57828254832319e-06, + "loss": 2.0906, + "step": 24699 + }, + { + "epoch": 0.82, + "grad_norm": 0.729205310344696, + "learning_rate": 1.5777094740583566e-06, + "loss": 2.1308, + "step": 24700 + }, + { + "epoch": 0.82, + "grad_norm": 0.7614633440971375, + "learning_rate": 1.5771364949436251e-06, + "loss": 1.9896, + "step": 24701 + }, + { + "epoch": 0.82, + "grad_norm": 0.753969132900238, + "learning_rate": 1.5765636109854676e-06, + "loss": 2.0598, + "step": 24702 + }, + { + "epoch": 0.82, + "grad_norm": 0.7359817624092102, + "learning_rate": 1.5759908221903596e-06, + "loss": 2.0906, + "step": 24703 + }, + { + "epoch": 0.82, + "grad_norm": 0.7458236217498779, + "learning_rate": 1.5754181285647684e-06, + "loss": 1.9773, + "step": 24704 + }, + { + "epoch": 0.82, + "grad_norm": 0.7319135665893555, + "learning_rate": 1.5748455301151655e-06, + "loss": 2.0767, + "step": 24705 + }, + { + "epoch": 0.82, + "grad_norm": 0.7432857155799866, + "learning_rate": 1.5742730268480232e-06, + "loss": 2.0654, + "step": 24706 + }, + { + "epoch": 0.82, + "grad_norm": 0.7468056082725525, + "learning_rate": 1.5737006187698055e-06, + "loss": 2.0182, + "step": 24707 + }, + { + "epoch": 0.82, + "grad_norm": 0.7329849600791931, + "learning_rate": 1.5731283058869785e-06, + "loss": 2.059, + "step": 24708 + }, + { + "epoch": 0.82, + "grad_norm": 0.8048843741416931, + "learning_rate": 1.5725560882060108e-06, + "loss": 2.0675, + "step": 24709 + }, + { + "epoch": 0.82, + "grad_norm": 0.7300288677215576, + "learning_rate": 1.5719839657333657e-06, + "loss": 1.9894, + "step": 24710 + }, + { + "epoch": 0.82, + "grad_norm": 0.7429967522621155, + "learning_rate": 1.5714119384755044e-06, + "loss": 2.0676, + "step": 24711 + }, + { + "epoch": 0.82, + "grad_norm": 0.7354058623313904, + "learning_rate": 1.5708400064388907e-06, + "loss": 2.0972, + "step": 24712 + }, + { + "epoch": 0.82, + "grad_norm": 0.7282070517539978, + "learning_rate": 1.5702681696299893e-06, + "loss": 2.0124, + "step": 24713 + }, + { + "epoch": 0.82, + "grad_norm": 0.7163010239601135, + "learning_rate": 1.569696428055255e-06, + "loss": 2.0033, + "step": 24714 + }, + { + "epoch": 0.82, + "grad_norm": 0.7472782135009766, + "learning_rate": 1.569124781721153e-06, + "loss": 2.0888, + "step": 24715 + }, + { + "epoch": 0.82, + "grad_norm": 0.7678632736206055, + "learning_rate": 1.5685532306341379e-06, + "loss": 2.0586, + "step": 24716 + }, + { + "epoch": 0.82, + "grad_norm": 0.7533635497093201, + "learning_rate": 1.5679817748006653e-06, + "loss": 2.071, + "step": 24717 + }, + { + "epoch": 0.82, + "grad_norm": 0.7370835542678833, + "learning_rate": 1.5674104142271917e-06, + "loss": 2.0918, + "step": 24718 + }, + { + "epoch": 0.82, + "grad_norm": 0.7656005620956421, + "learning_rate": 1.5668391489201794e-06, + "loss": 2.0586, + "step": 24719 + }, + { + "epoch": 0.82, + "grad_norm": 0.7605054378509521, + "learning_rate": 1.5662679788860702e-06, + "loss": 2.0463, + "step": 24720 + }, + { + "epoch": 0.82, + "grad_norm": 0.7772300243377686, + "learning_rate": 1.565696904131323e-06, + "loss": 2.0422, + "step": 24721 + }, + { + "epoch": 0.82, + "grad_norm": 0.7316023111343384, + "learning_rate": 1.5651259246623917e-06, + "loss": 1.9821, + "step": 24722 + }, + { + "epoch": 0.82, + "grad_norm": 0.7254810333251953, + "learning_rate": 1.5645550404857223e-06, + "loss": 2.0376, + "step": 24723 + }, + { + "epoch": 0.82, + "grad_norm": 0.7456212043762207, + "learning_rate": 1.5639842516077685e-06, + "loss": 2.0267, + "step": 24724 + }, + { + "epoch": 0.82, + "grad_norm": 0.7336885333061218, + "learning_rate": 1.5634135580349763e-06, + "loss": 2.0925, + "step": 24725 + }, + { + "epoch": 0.82, + "grad_norm": 0.7532624006271362, + "learning_rate": 1.5628429597737915e-06, + "loss": 2.1057, + "step": 24726 + }, + { + "epoch": 0.82, + "grad_norm": 0.7245036959648132, + "learning_rate": 1.5622724568306624e-06, + "loss": 2.0111, + "step": 24727 + }, + { + "epoch": 0.82, + "grad_norm": 0.7442290782928467, + "learning_rate": 1.561702049212036e-06, + "loss": 2.0203, + "step": 24728 + }, + { + "epoch": 0.82, + "grad_norm": 0.7539690732955933, + "learning_rate": 1.561131736924355e-06, + "loss": 2.0244, + "step": 24729 + }, + { + "epoch": 0.82, + "grad_norm": 0.7333362102508545, + "learning_rate": 1.5605615199740597e-06, + "loss": 2.0148, + "step": 24730 + }, + { + "epoch": 0.82, + "grad_norm": 0.7597872018814087, + "learning_rate": 1.5599913983675962e-06, + "loss": 2.0934, + "step": 24731 + }, + { + "epoch": 0.82, + "grad_norm": 0.7553220391273499, + "learning_rate": 1.5594213721114038e-06, + "loss": 2.046, + "step": 24732 + }, + { + "epoch": 0.82, + "grad_norm": 0.7654428482055664, + "learning_rate": 1.5588514412119193e-06, + "loss": 2.0349, + "step": 24733 + }, + { + "epoch": 0.82, + "grad_norm": 0.7535482048988342, + "learning_rate": 1.5582816056755844e-06, + "loss": 2.0332, + "step": 24734 + }, + { + "epoch": 0.82, + "grad_norm": 0.7631350755691528, + "learning_rate": 1.5577118655088397e-06, + "loss": 1.9968, + "step": 24735 + }, + { + "epoch": 0.82, + "grad_norm": 0.7596483826637268, + "learning_rate": 1.5571422207181153e-06, + "loss": 2.1403, + "step": 24736 + }, + { + "epoch": 0.82, + "grad_norm": 0.7418236136436462, + "learning_rate": 1.5565726713098528e-06, + "loss": 2.0334, + "step": 24737 + }, + { + "epoch": 0.82, + "grad_norm": 0.7330083250999451, + "learning_rate": 1.5560032172904837e-06, + "loss": 2.1, + "step": 24738 + }, + { + "epoch": 0.82, + "grad_norm": 0.7567529082298279, + "learning_rate": 1.5554338586664398e-06, + "loss": 2.0523, + "step": 24739 + }, + { + "epoch": 0.82, + "grad_norm": 0.7426371574401855, + "learning_rate": 1.5548645954441544e-06, + "loss": 2.0813, + "step": 24740 + }, + { + "epoch": 0.82, + "grad_norm": 0.783898651599884, + "learning_rate": 1.5542954276300647e-06, + "loss": 2.0269, + "step": 24741 + }, + { + "epoch": 0.82, + "grad_norm": 0.7807183265686035, + "learning_rate": 1.5537263552305914e-06, + "loss": 2.0393, + "step": 24742 + }, + { + "epoch": 0.82, + "grad_norm": 0.7230624556541443, + "learning_rate": 1.553157378252167e-06, + "loss": 2.0387, + "step": 24743 + }, + { + "epoch": 0.82, + "grad_norm": 0.7492788434028625, + "learning_rate": 1.5525884967012227e-06, + "loss": 2.0346, + "step": 24744 + }, + { + "epoch": 0.82, + "grad_norm": 0.7256644368171692, + "learning_rate": 1.5520197105841805e-06, + "loss": 2.031, + "step": 24745 + }, + { + "epoch": 0.82, + "grad_norm": 0.7339571714401245, + "learning_rate": 1.5514510199074706e-06, + "loss": 2.0019, + "step": 24746 + }, + { + "epoch": 0.82, + "grad_norm": 0.7513602375984192, + "learning_rate": 1.5508824246775167e-06, + "loss": 2.0119, + "step": 24747 + }, + { + "epoch": 0.82, + "grad_norm": 0.8020707368850708, + "learning_rate": 1.5503139249007381e-06, + "loss": 2.0437, + "step": 24748 + }, + { + "epoch": 0.82, + "grad_norm": 0.7423878312110901, + "learning_rate": 1.549745520583562e-06, + "loss": 2.0606, + "step": 24749 + }, + { + "epoch": 0.82, + "grad_norm": 0.7596179842948914, + "learning_rate": 1.54917721173241e-06, + "loss": 2.0923, + "step": 24750 + }, + { + "epoch": 0.82, + "grad_norm": 0.7379164695739746, + "learning_rate": 1.5486089983537012e-06, + "loss": 2.0101, + "step": 24751 + }, + { + "epoch": 0.82, + "grad_norm": 0.7871154546737671, + "learning_rate": 1.5480408804538526e-06, + "loss": 2.0415, + "step": 24752 + }, + { + "epoch": 0.82, + "grad_norm": 0.7450940012931824, + "learning_rate": 1.5474728580392884e-06, + "loss": 2.0465, + "step": 24753 + }, + { + "epoch": 0.82, + "grad_norm": 0.7255831360816956, + "learning_rate": 1.5469049311164208e-06, + "loss": 2.0271, + "step": 24754 + }, + { + "epoch": 0.82, + "grad_norm": 0.7373485565185547, + "learning_rate": 1.546337099691665e-06, + "loss": 2.0237, + "step": 24755 + }, + { + "epoch": 0.82, + "grad_norm": 0.75111323595047, + "learning_rate": 1.5457693637714389e-06, + "loss": 2.076, + "step": 24756 + }, + { + "epoch": 0.82, + "grad_norm": 0.7407294511795044, + "learning_rate": 1.5452017233621575e-06, + "loss": 2.0809, + "step": 24757 + }, + { + "epoch": 0.82, + "grad_norm": 0.7730043530464172, + "learning_rate": 1.54463417847023e-06, + "loss": 2.0748, + "step": 24758 + }, + { + "epoch": 0.82, + "grad_norm": 0.7276864051818848, + "learning_rate": 1.5440667291020728e-06, + "loss": 2.0172, + "step": 24759 + }, + { + "epoch": 0.82, + "grad_norm": 0.7292165160179138, + "learning_rate": 1.5434993752640948e-06, + "loss": 2.0197, + "step": 24760 + }, + { + "epoch": 0.82, + "grad_norm": 0.7439363598823547, + "learning_rate": 1.542932116962701e-06, + "loss": 2.0465, + "step": 24761 + }, + { + "epoch": 0.82, + "grad_norm": 0.7563647627830505, + "learning_rate": 1.5423649542043052e-06, + "loss": 2.0331, + "step": 24762 + }, + { + "epoch": 0.82, + "grad_norm": 0.7628288865089417, + "learning_rate": 1.5417978869953166e-06, + "loss": 2.0561, + "step": 24763 + }, + { + "epoch": 0.82, + "grad_norm": 0.7728883624076843, + "learning_rate": 1.5412309153421346e-06, + "loss": 2.0358, + "step": 24764 + }, + { + "epoch": 0.82, + "grad_norm": 0.7875117063522339, + "learning_rate": 1.5406640392511684e-06, + "loss": 2.1614, + "step": 24765 + }, + { + "epoch": 0.82, + "grad_norm": 0.7547198534011841, + "learning_rate": 1.5400972587288254e-06, + "loss": 1.969, + "step": 24766 + }, + { + "epoch": 0.82, + "grad_norm": 0.7479784488677979, + "learning_rate": 1.5395305737815025e-06, + "loss": 2.1037, + "step": 24767 + }, + { + "epoch": 0.82, + "grad_norm": 0.7540486454963684, + "learning_rate": 1.5389639844156069e-06, + "loss": 2.084, + "step": 24768 + }, + { + "epoch": 0.82, + "grad_norm": 0.7532678246498108, + "learning_rate": 1.5383974906375377e-06, + "loss": 2.0476, + "step": 24769 + }, + { + "epoch": 0.82, + "grad_norm": 0.7456028461456299, + "learning_rate": 1.537831092453692e-06, + "loss": 2.0746, + "step": 24770 + }, + { + "epoch": 0.82, + "grad_norm": 0.7707963585853577, + "learning_rate": 1.5372647898704718e-06, + "loss": 2.0424, + "step": 24771 + }, + { + "epoch": 0.82, + "grad_norm": 0.7400874495506287, + "learning_rate": 1.536698582894277e-06, + "loss": 2.0405, + "step": 24772 + }, + { + "epoch": 0.82, + "grad_norm": 0.7456061244010925, + "learning_rate": 1.5361324715315006e-06, + "loss": 2.0877, + "step": 24773 + }, + { + "epoch": 0.82, + "grad_norm": 0.7650687098503113, + "learning_rate": 1.5355664557885385e-06, + "loss": 2.0744, + "step": 24774 + }, + { + "epoch": 0.82, + "grad_norm": 0.7734844088554382, + "learning_rate": 1.5350005356717868e-06, + "loss": 2.0255, + "step": 24775 + }, + { + "epoch": 0.82, + "grad_norm": 0.7319495677947998, + "learning_rate": 1.5344347111876367e-06, + "loss": 2.0651, + "step": 24776 + }, + { + "epoch": 0.82, + "grad_norm": 0.7321783304214478, + "learning_rate": 1.5338689823424836e-06, + "loss": 2.0174, + "step": 24777 + }, + { + "epoch": 0.82, + "grad_norm": 0.7307380437850952, + "learning_rate": 1.533303349142715e-06, + "loss": 2.0201, + "step": 24778 + }, + { + "epoch": 0.82, + "grad_norm": 0.760513186454773, + "learning_rate": 1.5327378115947255e-06, + "loss": 2.0534, + "step": 24779 + }, + { + "epoch": 0.82, + "grad_norm": 0.7649939656257629, + "learning_rate": 1.5321723697048995e-06, + "loss": 1.9236, + "step": 24780 + }, + { + "epoch": 0.82, + "grad_norm": 0.751089870929718, + "learning_rate": 1.53160702347963e-06, + "loss": 2.1068, + "step": 24781 + }, + { + "epoch": 0.82, + "grad_norm": 0.7439225912094116, + "learning_rate": 1.5310417729253013e-06, + "loss": 2.0866, + "step": 24782 + }, + { + "epoch": 0.82, + "grad_norm": 0.7315003275871277, + "learning_rate": 1.5304766180482966e-06, + "loss": 2.0164, + "step": 24783 + }, + { + "epoch": 0.82, + "grad_norm": 0.7567814588546753, + "learning_rate": 1.529911558855004e-06, + "loss": 1.9752, + "step": 24784 + }, + { + "epoch": 0.82, + "grad_norm": 0.7418934106826782, + "learning_rate": 1.5293465953518105e-06, + "loss": 1.9813, + "step": 24785 + }, + { + "epoch": 0.82, + "grad_norm": 0.7305530309677124, + "learning_rate": 1.528781727545091e-06, + "loss": 1.9998, + "step": 24786 + }, + { + "epoch": 0.82, + "grad_norm": 0.759048342704773, + "learning_rate": 1.5282169554412307e-06, + "loss": 2.1039, + "step": 24787 + }, + { + "epoch": 0.82, + "grad_norm": 0.7438389658927917, + "learning_rate": 1.527652279046613e-06, + "loss": 2.0109, + "step": 24788 + }, + { + "epoch": 0.82, + "grad_norm": 0.7528771758079529, + "learning_rate": 1.5270876983676108e-06, + "loss": 2.1201, + "step": 24789 + }, + { + "epoch": 0.82, + "grad_norm": 0.7434794902801514, + "learning_rate": 1.52652321341061e-06, + "loss": 2.052, + "step": 24790 + }, + { + "epoch": 0.82, + "grad_norm": 0.7505081295967102, + "learning_rate": 1.5259588241819833e-06, + "loss": 1.9835, + "step": 24791 + }, + { + "epoch": 0.82, + "grad_norm": 0.7506682276725769, + "learning_rate": 1.5253945306881057e-06, + "loss": 2.0943, + "step": 24792 + }, + { + "epoch": 0.82, + "grad_norm": 0.7411180734634399, + "learning_rate": 1.5248303329353543e-06, + "loss": 2.0692, + "step": 24793 + }, + { + "epoch": 0.82, + "grad_norm": 0.7329487800598145, + "learning_rate": 1.524266230930105e-06, + "loss": 2.0651, + "step": 24794 + }, + { + "epoch": 0.82, + "grad_norm": 0.7391970753669739, + "learning_rate": 1.523702224678728e-06, + "loss": 1.9865, + "step": 24795 + }, + { + "epoch": 0.82, + "grad_norm": 0.7162978649139404, + "learning_rate": 1.5231383141875934e-06, + "loss": 2.058, + "step": 24796 + }, + { + "epoch": 0.82, + "grad_norm": 0.7709743976593018, + "learning_rate": 1.5225744994630742e-06, + "loss": 2.0065, + "step": 24797 + }, + { + "epoch": 0.83, + "grad_norm": 0.7408787608146667, + "learning_rate": 1.5220107805115424e-06, + "loss": 1.9629, + "step": 24798 + }, + { + "epoch": 0.83, + "grad_norm": 0.7529571652412415, + "learning_rate": 1.5214471573393653e-06, + "loss": 2.0759, + "step": 24799 + }, + { + "epoch": 0.83, + "grad_norm": 0.7181904315948486, + "learning_rate": 1.520883629952905e-06, + "loss": 2.0087, + "step": 24800 + }, + { + "epoch": 0.83, + "grad_norm": 0.7279215455055237, + "learning_rate": 1.5203201983585358e-06, + "loss": 2.0452, + "step": 24801 + }, + { + "epoch": 0.83, + "grad_norm": 0.7429754734039307, + "learning_rate": 1.519756862562617e-06, + "loss": 2.0596, + "step": 24802 + }, + { + "epoch": 0.83, + "grad_norm": 0.7268272638320923, + "learning_rate": 1.5191936225715176e-06, + "loss": 1.9947, + "step": 24803 + }, + { + "epoch": 0.83, + "grad_norm": 0.7643913626670837, + "learning_rate": 1.5186304783915983e-06, + "loss": 2.0526, + "step": 24804 + }, + { + "epoch": 0.83, + "grad_norm": 0.7359586358070374, + "learning_rate": 1.5180674300292185e-06, + "loss": 2.0297, + "step": 24805 + }, + { + "epoch": 0.83, + "grad_norm": 0.7537996172904968, + "learning_rate": 1.5175044774907433e-06, + "loss": 2.0977, + "step": 24806 + }, + { + "epoch": 0.83, + "grad_norm": 0.7732250690460205, + "learning_rate": 1.5169416207825327e-06, + "loss": 2.0812, + "step": 24807 + }, + { + "epoch": 0.83, + "grad_norm": 0.7159727215766907, + "learning_rate": 1.5163788599109442e-06, + "loss": 2.0327, + "step": 24808 + }, + { + "epoch": 0.83, + "grad_norm": 0.7774404883384705, + "learning_rate": 1.5158161948823325e-06, + "loss": 2.0758, + "step": 24809 + }, + { + "epoch": 0.83, + "grad_norm": 0.7422087788581848, + "learning_rate": 1.5152536257030604e-06, + "loss": 1.9764, + "step": 24810 + }, + { + "epoch": 0.83, + "grad_norm": 0.7168338894844055, + "learning_rate": 1.514691152379477e-06, + "loss": 2.0369, + "step": 24811 + }, + { + "epoch": 0.83, + "grad_norm": 0.7576701641082764, + "learning_rate": 1.5141287749179434e-06, + "loss": 2.0207, + "step": 24812 + }, + { + "epoch": 0.83, + "grad_norm": 0.7526422142982483, + "learning_rate": 1.5135664933248074e-06, + "loss": 2.086, + "step": 24813 + }, + { + "epoch": 0.83, + "grad_norm": 0.7316453456878662, + "learning_rate": 1.513004307606425e-06, + "loss": 2.032, + "step": 24814 + }, + { + "epoch": 0.83, + "grad_norm": 0.7603732347488403, + "learning_rate": 1.5124422177691445e-06, + "loss": 2.0756, + "step": 24815 + }, + { + "epoch": 0.83, + "grad_norm": 0.7363866567611694, + "learning_rate": 1.5118802238193197e-06, + "loss": 2.0393, + "step": 24816 + }, + { + "epoch": 0.83, + "grad_norm": 0.7656158804893494, + "learning_rate": 1.511318325763298e-06, + "loss": 1.975, + "step": 24817 + }, + { + "epoch": 0.83, + "grad_norm": 0.7169625759124756, + "learning_rate": 1.510756523607424e-06, + "loss": 2.0373, + "step": 24818 + }, + { + "epoch": 0.83, + "grad_norm": 0.743604302406311, + "learning_rate": 1.5101948173580483e-06, + "loss": 2.0897, + "step": 24819 + }, + { + "epoch": 0.83, + "grad_norm": 0.7340902090072632, + "learning_rate": 1.509633207021518e-06, + "loss": 2.0081, + "step": 24820 + }, + { + "epoch": 0.83, + "grad_norm": 0.7773296236991882, + "learning_rate": 1.509071692604176e-06, + "loss": 2.1129, + "step": 24821 + }, + { + "epoch": 0.83, + "grad_norm": 0.7375178337097168, + "learning_rate": 1.5085102741123626e-06, + "loss": 2.0029, + "step": 24822 + }, + { + "epoch": 0.83, + "grad_norm": 0.7441858053207397, + "learning_rate": 1.507948951552427e-06, + "loss": 2.0036, + "step": 24823 + }, + { + "epoch": 0.83, + "grad_norm": 0.7257186770439148, + "learning_rate": 1.5073877249307045e-06, + "loss": 2.0964, + "step": 24824 + }, + { + "epoch": 0.83, + "grad_norm": 0.7642390727996826, + "learning_rate": 1.506826594253541e-06, + "loss": 2.0762, + "step": 24825 + }, + { + "epoch": 0.83, + "grad_norm": 0.748098611831665, + "learning_rate": 1.5062655595272735e-06, + "loss": 2.126, + "step": 24826 + }, + { + "epoch": 0.83, + "grad_norm": 0.7200208902359009, + "learning_rate": 1.505704620758237e-06, + "loss": 1.9989, + "step": 24827 + }, + { + "epoch": 0.83, + "grad_norm": 0.7468613982200623, + "learning_rate": 1.5051437779527722e-06, + "loss": 2.007, + "step": 24828 + }, + { + "epoch": 0.83, + "grad_norm": 0.7690402865409851, + "learning_rate": 1.504583031117216e-06, + "loss": 2.0562, + "step": 24829 + }, + { + "epoch": 0.83, + "grad_norm": 0.7629485130310059, + "learning_rate": 1.5040223802579025e-06, + "loss": 1.962, + "step": 24830 + }, + { + "epoch": 0.83, + "grad_norm": 0.7495771050453186, + "learning_rate": 1.5034618253811616e-06, + "loss": 2.0722, + "step": 24831 + }, + { + "epoch": 0.83, + "grad_norm": 0.7416913509368896, + "learning_rate": 1.5029013664933335e-06, + "loss": 2.0417, + "step": 24832 + }, + { + "epoch": 0.83, + "grad_norm": 0.7573280930519104, + "learning_rate": 1.5023410036007424e-06, + "loss": 2.0891, + "step": 24833 + }, + { + "epoch": 0.83, + "grad_norm": 0.7307788133621216, + "learning_rate": 1.5017807367097248e-06, + "loss": 1.9758, + "step": 24834 + }, + { + "epoch": 0.83, + "grad_norm": 0.7629086971282959, + "learning_rate": 1.5012205658266066e-06, + "loss": 1.9765, + "step": 24835 + }, + { + "epoch": 0.83, + "grad_norm": 0.7630726099014282, + "learning_rate": 1.5006604909577193e-06, + "loss": 2.1183, + "step": 24836 + }, + { + "epoch": 0.83, + "grad_norm": 0.7442490458488464, + "learning_rate": 1.500100512109387e-06, + "loss": 2.0619, + "step": 24837 + }, + { + "epoch": 0.83, + "grad_norm": 0.7420192956924438, + "learning_rate": 1.4995406292879388e-06, + "loss": 1.9632, + "step": 24838 + }, + { + "epoch": 0.83, + "grad_norm": 0.7361243963241577, + "learning_rate": 1.4989808424996998e-06, + "loss": 2.0383, + "step": 24839 + }, + { + "epoch": 0.83, + "grad_norm": 0.7191910147666931, + "learning_rate": 1.4984211517509905e-06, + "loss": 2.0243, + "step": 24840 + }, + { + "epoch": 0.83, + "grad_norm": 0.7341801524162292, + "learning_rate": 1.497861557048137e-06, + "loss": 2.0634, + "step": 24841 + }, + { + "epoch": 0.83, + "grad_norm": 0.732101321220398, + "learning_rate": 1.497302058397463e-06, + "loss": 2.0156, + "step": 24842 + }, + { + "epoch": 0.83, + "grad_norm": 0.7340344786643982, + "learning_rate": 1.4967426558052878e-06, + "loss": 2.0131, + "step": 24843 + }, + { + "epoch": 0.83, + "grad_norm": 0.7823872566223145, + "learning_rate": 1.4961833492779276e-06, + "loss": 2.0495, + "step": 24844 + }, + { + "epoch": 0.83, + "grad_norm": 0.7334010004997253, + "learning_rate": 1.4956241388217063e-06, + "loss": 1.9814, + "step": 24845 + }, + { + "epoch": 0.83, + "grad_norm": 0.7264289259910583, + "learning_rate": 1.4950650244429377e-06, + "loss": 2.0014, + "step": 24846 + }, + { + "epoch": 0.83, + "grad_norm": 0.7532520294189453, + "learning_rate": 1.4945060061479422e-06, + "loss": 2.0829, + "step": 24847 + }, + { + "epoch": 0.83, + "grad_norm": 0.8003633618354797, + "learning_rate": 1.4939470839430338e-06, + "loss": 2.0344, + "step": 24848 + }, + { + "epoch": 0.83, + "grad_norm": 0.7563724517822266, + "learning_rate": 1.4933882578345227e-06, + "loss": 2.0137, + "step": 24849 + }, + { + "epoch": 0.83, + "grad_norm": 0.7585241198539734, + "learning_rate": 1.4928295278287264e-06, + "loss": 2.0346, + "step": 24850 + }, + { + "epoch": 0.83, + "grad_norm": 0.7512881755828857, + "learning_rate": 1.4922708939319587e-06, + "loss": 2.0594, + "step": 24851 + }, + { + "epoch": 0.83, + "grad_norm": 0.7724229097366333, + "learning_rate": 1.4917123561505275e-06, + "loss": 2.1233, + "step": 24852 + }, + { + "epoch": 0.83, + "grad_norm": 0.7541494965553284, + "learning_rate": 1.4911539144907429e-06, + "loss": 2.015, + "step": 24853 + }, + { + "epoch": 0.83, + "grad_norm": 0.7142285704612732, + "learning_rate": 1.4905955689589158e-06, + "loss": 1.9862, + "step": 24854 + }, + { + "epoch": 0.83, + "grad_norm": 0.756924569606781, + "learning_rate": 1.4900373195613515e-06, + "loss": 2.0334, + "step": 24855 + }, + { + "epoch": 0.83, + "grad_norm": 0.7574272155761719, + "learning_rate": 1.4894791663043596e-06, + "loss": 2.0707, + "step": 24856 + }, + { + "epoch": 0.83, + "grad_norm": 0.7171395421028137, + "learning_rate": 1.4889211091942436e-06, + "loss": 1.9911, + "step": 24857 + }, + { + "epoch": 0.83, + "grad_norm": 0.7445770502090454, + "learning_rate": 1.4883631482373096e-06, + "loss": 1.9962, + "step": 24858 + }, + { + "epoch": 0.83, + "grad_norm": 0.7773033976554871, + "learning_rate": 1.4878052834398593e-06, + "loss": 2.0264, + "step": 24859 + }, + { + "epoch": 0.83, + "grad_norm": 0.7424682378768921, + "learning_rate": 1.4872475148081977e-06, + "loss": 1.9937, + "step": 24860 + }, + { + "epoch": 0.83, + "grad_norm": 0.7593292593955994, + "learning_rate": 1.4866898423486253e-06, + "loss": 2.0969, + "step": 24861 + }, + { + "epoch": 0.83, + "grad_norm": 0.7616005539894104, + "learning_rate": 1.4861322660674393e-06, + "loss": 1.9281, + "step": 24862 + }, + { + "epoch": 0.83, + "grad_norm": 0.7516399621963501, + "learning_rate": 1.4855747859709413e-06, + "loss": 2.0553, + "step": 24863 + }, + { + "epoch": 0.83, + "grad_norm": 0.735551118850708, + "learning_rate": 1.4850174020654318e-06, + "loss": 2.0519, + "step": 24864 + }, + { + "epoch": 0.83, + "grad_norm": 0.7431236505508423, + "learning_rate": 1.4844601143572057e-06, + "loss": 2.0001, + "step": 24865 + }, + { + "epoch": 0.83, + "grad_norm": 0.7233752608299255, + "learning_rate": 1.483902922852556e-06, + "loss": 1.9788, + "step": 24866 + }, + { + "epoch": 0.83, + "grad_norm": 0.7340803742408752, + "learning_rate": 1.4833458275577828e-06, + "loss": 1.9875, + "step": 24867 + }, + { + "epoch": 0.83, + "grad_norm": 0.7438344359397888, + "learning_rate": 1.4827888284791747e-06, + "loss": 2.0066, + "step": 24868 + }, + { + "epoch": 0.83, + "grad_norm": 0.7409992814064026, + "learning_rate": 1.4822319256230267e-06, + "loss": 2.0211, + "step": 24869 + }, + { + "epoch": 0.83, + "grad_norm": 0.7075291275978088, + "learning_rate": 1.4816751189956346e-06, + "loss": 2.0319, + "step": 24870 + }, + { + "epoch": 0.83, + "grad_norm": 0.7767680883407593, + "learning_rate": 1.4811184086032814e-06, + "loss": 2.0157, + "step": 24871 + }, + { + "epoch": 0.83, + "grad_norm": 0.7490136623382568, + "learning_rate": 1.4805617944522588e-06, + "loss": 2.0005, + "step": 24872 + }, + { + "epoch": 0.83, + "grad_norm": 0.7313303351402283, + "learning_rate": 1.4800052765488592e-06, + "loss": 2.0331, + "step": 24873 + }, + { + "epoch": 0.83, + "grad_norm": 0.7321137189865112, + "learning_rate": 1.4794488548993668e-06, + "loss": 2.0209, + "step": 24874 + }, + { + "epoch": 0.83, + "grad_norm": 0.7748061418533325, + "learning_rate": 1.4788925295100642e-06, + "loss": 2.0565, + "step": 24875 + }, + { + "epoch": 0.83, + "grad_norm": 0.7291797399520874, + "learning_rate": 1.478336300387243e-06, + "loss": 2.0961, + "step": 24876 + }, + { + "epoch": 0.83, + "grad_norm": 0.7370447516441345, + "learning_rate": 1.4777801675371828e-06, + "loss": 2.1036, + "step": 24877 + }, + { + "epoch": 0.83, + "grad_norm": 0.7600078582763672, + "learning_rate": 1.4772241309661684e-06, + "loss": 1.9991, + "step": 24878 + }, + { + "epoch": 0.83, + "grad_norm": 0.7252472043037415, + "learning_rate": 1.4766681906804792e-06, + "loss": 2.0259, + "step": 24879 + }, + { + "epoch": 0.83, + "grad_norm": 0.7419667840003967, + "learning_rate": 1.4761123466864002e-06, + "loss": 2.0226, + "step": 24880 + }, + { + "epoch": 0.83, + "grad_norm": 0.7757155895233154, + "learning_rate": 1.4755565989902065e-06, + "loss": 2.1047, + "step": 24881 + }, + { + "epoch": 0.83, + "grad_norm": 0.7458028793334961, + "learning_rate": 1.4750009475981774e-06, + "loss": 2.0538, + "step": 24882 + }, + { + "epoch": 0.83, + "grad_norm": 0.7708866000175476, + "learning_rate": 1.4744453925165969e-06, + "loss": 1.9739, + "step": 24883 + }, + { + "epoch": 0.83, + "grad_norm": 0.7627978920936584, + "learning_rate": 1.473889933751731e-06, + "loss": 2.0803, + "step": 24884 + }, + { + "epoch": 0.83, + "grad_norm": 0.7578418850898743, + "learning_rate": 1.4733345713098602e-06, + "loss": 2.0904, + "step": 24885 + }, + { + "epoch": 0.83, + "grad_norm": 0.7354166507720947, + "learning_rate": 1.4727793051972605e-06, + "loss": 2.0154, + "step": 24886 + }, + { + "epoch": 0.83, + "grad_norm": 0.7664469480514526, + "learning_rate": 1.4722241354202027e-06, + "loss": 2.0588, + "step": 24887 + }, + { + "epoch": 0.83, + "grad_norm": 0.7416092157363892, + "learning_rate": 1.471669061984956e-06, + "loss": 2.0182, + "step": 24888 + }, + { + "epoch": 0.83, + "grad_norm": 0.7276415228843689, + "learning_rate": 1.4711140848977967e-06, + "loss": 2.0257, + "step": 24889 + }, + { + "epoch": 0.83, + "grad_norm": 0.7509539127349854, + "learning_rate": 1.4705592041649908e-06, + "loss": 2.0165, + "step": 24890 + }, + { + "epoch": 0.83, + "grad_norm": 0.7533541321754456, + "learning_rate": 1.4700044197928065e-06, + "loss": 2.0176, + "step": 24891 + }, + { + "epoch": 0.83, + "grad_norm": 0.7541894912719727, + "learning_rate": 1.4694497317875189e-06, + "loss": 2.1044, + "step": 24892 + }, + { + "epoch": 0.83, + "grad_norm": 0.7428129315376282, + "learning_rate": 1.4688951401553841e-06, + "loss": 2.1016, + "step": 24893 + }, + { + "epoch": 0.83, + "grad_norm": 0.7337577939033508, + "learning_rate": 1.4683406449026727e-06, + "loss": 2.0864, + "step": 24894 + }, + { + "epoch": 0.83, + "grad_norm": 0.732695996761322, + "learning_rate": 1.4677862460356506e-06, + "loss": 2.0473, + "step": 24895 + }, + { + "epoch": 0.83, + "grad_norm": 0.7389583587646484, + "learning_rate": 1.4672319435605787e-06, + "loss": 2.0654, + "step": 24896 + }, + { + "epoch": 0.83, + "grad_norm": 0.7543737292289734, + "learning_rate": 1.4666777374837171e-06, + "loss": 2.0691, + "step": 24897 + }, + { + "epoch": 0.83, + "grad_norm": 0.738282322883606, + "learning_rate": 1.46612362781133e-06, + "loss": 1.9842, + "step": 24898 + }, + { + "epoch": 0.83, + "grad_norm": 0.7562292814254761, + "learning_rate": 1.465569614549679e-06, + "loss": 2.0029, + "step": 24899 + }, + { + "epoch": 0.83, + "grad_norm": 0.7625837922096252, + "learning_rate": 1.465015697705019e-06, + "loss": 2.0722, + "step": 24900 + }, + { + "epoch": 0.83, + "grad_norm": 0.7556712627410889, + "learning_rate": 1.4644618772836116e-06, + "loss": 2.0218, + "step": 24901 + }, + { + "epoch": 0.83, + "grad_norm": 0.7502215504646301, + "learning_rate": 1.463908153291711e-06, + "loss": 2.0919, + "step": 24902 + }, + { + "epoch": 0.83, + "grad_norm": 0.7650898694992065, + "learning_rate": 1.4633545257355718e-06, + "loss": 2.019, + "step": 24903 + }, + { + "epoch": 0.83, + "grad_norm": 0.75739985704422, + "learning_rate": 1.4628009946214505e-06, + "loss": 2.08, + "step": 24904 + }, + { + "epoch": 0.83, + "grad_norm": 0.7352570295333862, + "learning_rate": 1.4622475599556041e-06, + "loss": 2.0552, + "step": 24905 + }, + { + "epoch": 0.83, + "grad_norm": 0.7424171566963196, + "learning_rate": 1.4616942217442764e-06, + "loss": 2.0248, + "step": 24906 + }, + { + "epoch": 0.83, + "grad_norm": 0.8046366572380066, + "learning_rate": 1.4611409799937248e-06, + "loss": 2.0401, + "step": 24907 + }, + { + "epoch": 0.83, + "grad_norm": 0.754127025604248, + "learning_rate": 1.4605878347101988e-06, + "loss": 2.0694, + "step": 24908 + }, + { + "epoch": 0.83, + "grad_norm": 0.7638735771179199, + "learning_rate": 1.4600347858999476e-06, + "loss": 2.0488, + "step": 24909 + }, + { + "epoch": 0.83, + "grad_norm": 0.7526469230651855, + "learning_rate": 1.4594818335692163e-06, + "loss": 2.0556, + "step": 24910 + }, + { + "epoch": 0.83, + "grad_norm": 0.7513940930366516, + "learning_rate": 1.4589289777242565e-06, + "loss": 2.0731, + "step": 24911 + }, + { + "epoch": 0.83, + "grad_norm": 0.7152296900749207, + "learning_rate": 1.458376218371309e-06, + "loss": 2.0295, + "step": 24912 + }, + { + "epoch": 0.83, + "grad_norm": 0.7388644218444824, + "learning_rate": 1.457823555516621e-06, + "loss": 2.1876, + "step": 24913 + }, + { + "epoch": 0.83, + "grad_norm": 0.7313733696937561, + "learning_rate": 1.4572709891664383e-06, + "loss": 2.0041, + "step": 24914 + }, + { + "epoch": 0.83, + "grad_norm": 0.7303443551063538, + "learning_rate": 1.4567185193270016e-06, + "loss": 2.0401, + "step": 24915 + }, + { + "epoch": 0.83, + "grad_norm": 0.754470944404602, + "learning_rate": 1.4561661460045506e-06, + "loss": 1.9954, + "step": 24916 + }, + { + "epoch": 0.83, + "grad_norm": 0.7471160888671875, + "learning_rate": 1.455613869205329e-06, + "loss": 1.9838, + "step": 24917 + }, + { + "epoch": 0.83, + "grad_norm": 0.7645959258079529, + "learning_rate": 1.455061688935574e-06, + "loss": 1.9776, + "step": 24918 + }, + { + "epoch": 0.83, + "grad_norm": 0.7683794498443604, + "learning_rate": 1.454509605201523e-06, + "loss": 2.0778, + "step": 24919 + }, + { + "epoch": 0.83, + "grad_norm": 0.7820815443992615, + "learning_rate": 1.4539576180094139e-06, + "loss": 2.0469, + "step": 24920 + }, + { + "epoch": 0.83, + "grad_norm": 0.72707200050354, + "learning_rate": 1.4534057273654844e-06, + "loss": 2.0536, + "step": 24921 + }, + { + "epoch": 0.83, + "grad_norm": 0.7351446151733398, + "learning_rate": 1.4528539332759673e-06, + "loss": 2.037, + "step": 24922 + }, + { + "epoch": 0.83, + "grad_norm": 0.7233196496963501, + "learning_rate": 1.4523022357470996e-06, + "loss": 2.0081, + "step": 24923 + }, + { + "epoch": 0.83, + "grad_norm": 0.7394496202468872, + "learning_rate": 1.4517506347851107e-06, + "loss": 2.0643, + "step": 24924 + }, + { + "epoch": 0.83, + "grad_norm": 0.7514690160751343, + "learning_rate": 1.4511991303962314e-06, + "loss": 2.1315, + "step": 24925 + }, + { + "epoch": 0.83, + "grad_norm": 0.7188594341278076, + "learning_rate": 1.4506477225866944e-06, + "loss": 2.0507, + "step": 24926 + }, + { + "epoch": 0.83, + "grad_norm": 0.7346506118774414, + "learning_rate": 1.4500964113627337e-06, + "loss": 2.0562, + "step": 24927 + }, + { + "epoch": 0.83, + "grad_norm": 0.744236409664154, + "learning_rate": 1.4495451967305686e-06, + "loss": 2.0267, + "step": 24928 + }, + { + "epoch": 0.83, + "grad_norm": 0.7599499821662903, + "learning_rate": 1.4489940786964306e-06, + "loss": 1.9929, + "step": 24929 + }, + { + "epoch": 0.83, + "grad_norm": 0.7448164820671082, + "learning_rate": 1.4484430572665486e-06, + "loss": 2.072, + "step": 24930 + }, + { + "epoch": 0.83, + "grad_norm": 0.7348343133926392, + "learning_rate": 1.447892132447145e-06, + "loss": 2.0068, + "step": 24931 + }, + { + "epoch": 0.83, + "grad_norm": 0.7368728518486023, + "learning_rate": 1.4473413042444416e-06, + "loss": 2.0235, + "step": 24932 + }, + { + "epoch": 0.83, + "grad_norm": 0.7424443364143372, + "learning_rate": 1.446790572664667e-06, + "loss": 2.0791, + "step": 24933 + }, + { + "epoch": 0.83, + "grad_norm": 0.7498221397399902, + "learning_rate": 1.4462399377140369e-06, + "loss": 2.0938, + "step": 24934 + }, + { + "epoch": 0.83, + "grad_norm": 0.7571089863777161, + "learning_rate": 1.4456893993987752e-06, + "loss": 2.0636, + "step": 24935 + }, + { + "epoch": 0.83, + "grad_norm": 0.7512771487236023, + "learning_rate": 1.4451389577251029e-06, + "loss": 2.0469, + "step": 24936 + }, + { + "epoch": 0.83, + "grad_norm": 0.7596918940544128, + "learning_rate": 1.444588612699238e-06, + "loss": 2.0534, + "step": 24937 + }, + { + "epoch": 0.83, + "grad_norm": 0.7547183036804199, + "learning_rate": 1.4440383643273936e-06, + "loss": 2.0612, + "step": 24938 + }, + { + "epoch": 0.83, + "grad_norm": 0.8248137831687927, + "learning_rate": 1.4434882126157924e-06, + "loss": 2.0551, + "step": 24939 + }, + { + "epoch": 0.83, + "grad_norm": 0.7174193263053894, + "learning_rate": 1.442938157570647e-06, + "loss": 1.9518, + "step": 24940 + }, + { + "epoch": 0.83, + "grad_norm": 0.766645073890686, + "learning_rate": 1.442388199198169e-06, + "loss": 2.0177, + "step": 24941 + }, + { + "epoch": 0.83, + "grad_norm": 0.7730458974838257, + "learning_rate": 1.441838337504573e-06, + "loss": 2.059, + "step": 24942 + }, + { + "epoch": 0.83, + "grad_norm": 0.7574564218521118, + "learning_rate": 1.4412885724960758e-06, + "loss": 2.0624, + "step": 24943 + }, + { + "epoch": 0.83, + "grad_norm": 0.7196966409683228, + "learning_rate": 1.440738904178881e-06, + "loss": 2.0211, + "step": 24944 + }, + { + "epoch": 0.83, + "grad_norm": 0.746587336063385, + "learning_rate": 1.4401893325592042e-06, + "loss": 2.0056, + "step": 24945 + }, + { + "epoch": 0.83, + "grad_norm": 0.7573887705802917, + "learning_rate": 1.4396398576432525e-06, + "loss": 2.0744, + "step": 24946 + }, + { + "epoch": 0.83, + "grad_norm": 0.753665566444397, + "learning_rate": 1.4390904794372295e-06, + "loss": 2.0426, + "step": 24947 + }, + { + "epoch": 0.83, + "grad_norm": 0.7425118088722229, + "learning_rate": 1.438541197947345e-06, + "loss": 2.0638, + "step": 24948 + }, + { + "epoch": 0.83, + "grad_norm": 0.7178659439086914, + "learning_rate": 1.4379920131798098e-06, + "loss": 2.0334, + "step": 24949 + }, + { + "epoch": 0.83, + "grad_norm": 0.7429017424583435, + "learning_rate": 1.4374429251408183e-06, + "loss": 2.117, + "step": 24950 + }, + { + "epoch": 0.83, + "grad_norm": 0.7457799911499023, + "learning_rate": 1.4368939338365783e-06, + "loss": 2.0503, + "step": 24951 + }, + { + "epoch": 0.83, + "grad_norm": 0.7323011159896851, + "learning_rate": 1.4363450392732947e-06, + "loss": 2.0531, + "step": 24952 + }, + { + "epoch": 0.83, + "grad_norm": 0.746109664440155, + "learning_rate": 1.4357962414571635e-06, + "loss": 1.9997, + "step": 24953 + }, + { + "epoch": 0.83, + "grad_norm": 0.7290377020835876, + "learning_rate": 1.4352475403943899e-06, + "loss": 1.9965, + "step": 24954 + }, + { + "epoch": 0.83, + "grad_norm": 0.7234928607940674, + "learning_rate": 1.4346989360911701e-06, + "loss": 2.0306, + "step": 24955 + }, + { + "epoch": 0.83, + "grad_norm": 0.742479145526886, + "learning_rate": 1.4341504285537e-06, + "loss": 2.0342, + "step": 24956 + }, + { + "epoch": 0.83, + "grad_norm": 0.7563445568084717, + "learning_rate": 1.433602017788177e-06, + "loss": 2.0051, + "step": 24957 + }, + { + "epoch": 0.83, + "grad_norm": 0.7501334547996521, + "learning_rate": 1.4330537038008019e-06, + "loss": 2.0572, + "step": 24958 + }, + { + "epoch": 0.83, + "grad_norm": 0.7636805772781372, + "learning_rate": 1.432505486597764e-06, + "loss": 2.0643, + "step": 24959 + }, + { + "epoch": 0.83, + "grad_norm": 0.795948326587677, + "learning_rate": 1.431957366185256e-06, + "loss": 2.0661, + "step": 24960 + }, + { + "epoch": 0.83, + "grad_norm": 0.7754184007644653, + "learning_rate": 1.4314093425694753e-06, + "loss": 1.9942, + "step": 24961 + }, + { + "epoch": 0.83, + "grad_norm": 0.7141072154045105, + "learning_rate": 1.4308614157566103e-06, + "loss": 2.0012, + "step": 24962 + }, + { + "epoch": 0.83, + "grad_norm": 0.7436487674713135, + "learning_rate": 1.4303135857528473e-06, + "loss": 2.0306, + "step": 24963 + }, + { + "epoch": 0.83, + "grad_norm": 0.7525427937507629, + "learning_rate": 1.4297658525643798e-06, + "loss": 2.0413, + "step": 24964 + }, + { + "epoch": 0.83, + "grad_norm": 0.7549144625663757, + "learning_rate": 1.4292182161973977e-06, + "loss": 2.0298, + "step": 24965 + }, + { + "epoch": 0.83, + "grad_norm": 0.7496790885925293, + "learning_rate": 1.4286706766580827e-06, + "loss": 2.0711, + "step": 24966 + }, + { + "epoch": 0.83, + "grad_norm": 0.7328413128852844, + "learning_rate": 1.4281232339526262e-06, + "loss": 2.0377, + "step": 24967 + }, + { + "epoch": 0.83, + "grad_norm": 0.7330312132835388, + "learning_rate": 1.427575888087208e-06, + "loss": 2.0458, + "step": 24968 + }, + { + "epoch": 0.83, + "grad_norm": 0.7308694124221802, + "learning_rate": 1.4270286390680132e-06, + "loss": 1.9903, + "step": 24969 + }, + { + "epoch": 0.83, + "grad_norm": 0.7718901634216309, + "learning_rate": 1.4264814869012234e-06, + "loss": 2.0611, + "step": 24970 + }, + { + "epoch": 0.83, + "grad_norm": 0.7286138534545898, + "learning_rate": 1.4259344315930256e-06, + "loss": 2.0341, + "step": 24971 + }, + { + "epoch": 0.83, + "grad_norm": 0.7309878468513489, + "learning_rate": 1.425387473149592e-06, + "loss": 2.0449, + "step": 24972 + }, + { + "epoch": 0.83, + "grad_norm": 0.7500165104866028, + "learning_rate": 1.424840611577105e-06, + "loss": 2.0348, + "step": 24973 + }, + { + "epoch": 0.83, + "grad_norm": 0.7308431267738342, + "learning_rate": 1.4242938468817448e-06, + "loss": 2.0223, + "step": 24974 + }, + { + "epoch": 0.83, + "grad_norm": 0.7213445901870728, + "learning_rate": 1.4237471790696856e-06, + "loss": 2.1098, + "step": 24975 + }, + { + "epoch": 0.83, + "grad_norm": 0.7381265759468079, + "learning_rate": 1.4232006081471062e-06, + "loss": 2.0409, + "step": 24976 + }, + { + "epoch": 0.83, + "grad_norm": 0.7249129414558411, + "learning_rate": 1.4226541341201804e-06, + "loss": 2.0475, + "step": 24977 + }, + { + "epoch": 0.83, + "grad_norm": 0.7500261664390564, + "learning_rate": 1.4221077569950791e-06, + "loss": 2.0179, + "step": 24978 + }, + { + "epoch": 0.83, + "grad_norm": 0.741402268409729, + "learning_rate": 1.4215614767779772e-06, + "loss": 1.9908, + "step": 24979 + }, + { + "epoch": 0.83, + "grad_norm": 0.7662835717201233, + "learning_rate": 1.4210152934750475e-06, + "loss": 2.058, + "step": 24980 + }, + { + "epoch": 0.83, + "grad_norm": 0.7358508110046387, + "learning_rate": 1.4204692070924608e-06, + "loss": 2.0655, + "step": 24981 + }, + { + "epoch": 0.83, + "grad_norm": 0.73835289478302, + "learning_rate": 1.419923217636382e-06, + "loss": 2.0055, + "step": 24982 + }, + { + "epoch": 0.83, + "grad_norm": 0.8847495317459106, + "learning_rate": 1.4193773251129816e-06, + "loss": 2.0292, + "step": 24983 + }, + { + "epoch": 0.83, + "grad_norm": 0.7387908697128296, + "learning_rate": 1.4188315295284306e-06, + "loss": 2.0312, + "step": 24984 + }, + { + "epoch": 0.83, + "grad_norm": 0.7960773706436157, + "learning_rate": 1.4182858308888913e-06, + "loss": 2.0179, + "step": 24985 + }, + { + "epoch": 0.83, + "grad_norm": 0.74830162525177, + "learning_rate": 1.4177402292005282e-06, + "loss": 2.0621, + "step": 24986 + }, + { + "epoch": 0.83, + "grad_norm": 0.7734114527702332, + "learning_rate": 1.4171947244695073e-06, + "loss": 2.0515, + "step": 24987 + }, + { + "epoch": 0.83, + "grad_norm": 0.7556977272033691, + "learning_rate": 1.416649316701989e-06, + "loss": 2.0092, + "step": 24988 + }, + { + "epoch": 0.83, + "grad_norm": 0.7361196279525757, + "learning_rate": 1.4161040059041375e-06, + "loss": 2.0846, + "step": 24989 + }, + { + "epoch": 0.83, + "grad_norm": 0.7426493167877197, + "learning_rate": 1.4155587920821133e-06, + "loss": 2.0394, + "step": 24990 + }, + { + "epoch": 0.83, + "grad_norm": 0.7707929611206055, + "learning_rate": 1.4150136752420718e-06, + "loss": 2.0515, + "step": 24991 + }, + { + "epoch": 0.83, + "grad_norm": 0.7358521819114685, + "learning_rate": 1.4144686553901754e-06, + "loss": 2.0682, + "step": 24992 + }, + { + "epoch": 0.83, + "grad_norm": 0.754878044128418, + "learning_rate": 1.413923732532585e-06, + "loss": 2.0505, + "step": 24993 + }, + { + "epoch": 0.83, + "grad_norm": 0.75506991147995, + "learning_rate": 1.4133789066754465e-06, + "loss": 2.075, + "step": 24994 + }, + { + "epoch": 0.83, + "grad_norm": 0.750690758228302, + "learning_rate": 1.4128341778249223e-06, + "loss": 2.0804, + "step": 24995 + }, + { + "epoch": 0.83, + "grad_norm": 0.742994487285614, + "learning_rate": 1.4122895459871666e-06, + "loss": 2.1061, + "step": 24996 + }, + { + "epoch": 0.83, + "grad_norm": 0.7485726475715637, + "learning_rate": 1.4117450111683284e-06, + "loss": 2.0613, + "step": 24997 + }, + { + "epoch": 0.83, + "grad_norm": 0.7496106028556824, + "learning_rate": 1.4112005733745647e-06, + "loss": 1.9947, + "step": 24998 + }, + { + "epoch": 0.83, + "grad_norm": 0.7527778148651123, + "learning_rate": 1.410656232612021e-06, + "loss": 2.1145, + "step": 24999 + }, + { + "epoch": 0.83, + "grad_norm": 0.7567235231399536, + "learning_rate": 1.4101119888868508e-06, + "loss": 2.0735, + "step": 25000 + }, + { + "epoch": 0.83, + "grad_norm": 0.7352137565612793, + "learning_rate": 1.4095678422051995e-06, + "loss": 2.0364, + "step": 25001 + }, + { + "epoch": 0.83, + "grad_norm": 0.7519750595092773, + "learning_rate": 1.4090237925732186e-06, + "loss": 2.0808, + "step": 25002 + }, + { + "epoch": 0.83, + "grad_norm": 0.7570549845695496, + "learning_rate": 1.4084798399970522e-06, + "loss": 2.0972, + "step": 25003 + }, + { + "epoch": 0.83, + "grad_norm": 0.7711437940597534, + "learning_rate": 1.4079359844828433e-06, + "loss": 2.0498, + "step": 25004 + }, + { + "epoch": 0.83, + "grad_norm": 0.7512153387069702, + "learning_rate": 1.4073922260367378e-06, + "loss": 2.0929, + "step": 25005 + }, + { + "epoch": 0.83, + "grad_norm": 0.7496294379234314, + "learning_rate": 1.406848564664881e-06, + "loss": 2.0579, + "step": 25006 + }, + { + "epoch": 0.83, + "grad_norm": 0.758820116519928, + "learning_rate": 1.4063050003734135e-06, + "loss": 2.0524, + "step": 25007 + }, + { + "epoch": 0.83, + "grad_norm": 0.7568273544311523, + "learning_rate": 1.4057615331684736e-06, + "loss": 2.0548, + "step": 25008 + }, + { + "epoch": 0.83, + "grad_norm": 0.7226709127426147, + "learning_rate": 1.4052181630562055e-06, + "loss": 1.9856, + "step": 25009 + }, + { + "epoch": 0.83, + "grad_norm": 0.7358258962631226, + "learning_rate": 1.4046748900427432e-06, + "loss": 2.0966, + "step": 25010 + }, + { + "epoch": 0.83, + "grad_norm": 0.7271834015846252, + "learning_rate": 1.4041317141342281e-06, + "loss": 2.0687, + "step": 25011 + }, + { + "epoch": 0.83, + "grad_norm": 0.747855007648468, + "learning_rate": 1.4035886353367968e-06, + "loss": 2.0317, + "step": 25012 + }, + { + "epoch": 0.83, + "grad_norm": 0.7373946309089661, + "learning_rate": 1.4030456536565796e-06, + "loss": 2.0498, + "step": 25013 + }, + { + "epoch": 0.83, + "grad_norm": 0.7466525435447693, + "learning_rate": 1.4025027690997139e-06, + "loss": 2.0656, + "step": 25014 + }, + { + "epoch": 0.83, + "grad_norm": 0.7209491729736328, + "learning_rate": 1.401959981672336e-06, + "loss": 2.0519, + "step": 25015 + }, + { + "epoch": 0.83, + "grad_norm": 0.7503870725631714, + "learning_rate": 1.4014172913805768e-06, + "loss": 1.9966, + "step": 25016 + }, + { + "epoch": 0.83, + "grad_norm": 0.7859004735946655, + "learning_rate": 1.400874698230562e-06, + "loss": 2.0211, + "step": 25017 + }, + { + "epoch": 0.83, + "grad_norm": 0.7427493929862976, + "learning_rate": 1.400332202228427e-06, + "loss": 2.0806, + "step": 25018 + }, + { + "epoch": 0.83, + "grad_norm": 0.7610830664634705, + "learning_rate": 1.3997898033802982e-06, + "loss": 2.0425, + "step": 25019 + }, + { + "epoch": 0.83, + "grad_norm": 0.7572808265686035, + "learning_rate": 1.3992475016923058e-06, + "loss": 2.0957, + "step": 25020 + }, + { + "epoch": 0.83, + "grad_norm": 0.7164633274078369, + "learning_rate": 1.3987052971705718e-06, + "loss": 2.0712, + "step": 25021 + }, + { + "epoch": 0.83, + "grad_norm": 0.7434188723564148, + "learning_rate": 1.3981631898212266e-06, + "loss": 2.1078, + "step": 25022 + }, + { + "epoch": 0.83, + "grad_norm": 0.7677701711654663, + "learning_rate": 1.3976211796503903e-06, + "loss": 2.022, + "step": 25023 + }, + { + "epoch": 0.83, + "grad_norm": 0.7595756649971008, + "learning_rate": 1.3970792666641919e-06, + "loss": 2.0448, + "step": 25024 + }, + { + "epoch": 0.83, + "grad_norm": 0.7680371999740601, + "learning_rate": 1.396537450868749e-06, + "loss": 2.0045, + "step": 25025 + }, + { + "epoch": 0.83, + "grad_norm": 0.7705698013305664, + "learning_rate": 1.395995732270181e-06, + "loss": 1.9945, + "step": 25026 + }, + { + "epoch": 0.83, + "grad_norm": 0.7396299839019775, + "learning_rate": 1.3954541108746123e-06, + "loss": 2.0314, + "step": 25027 + }, + { + "epoch": 0.83, + "grad_norm": 0.7372692227363586, + "learning_rate": 1.3949125866881619e-06, + "loss": 2.0022, + "step": 25028 + }, + { + "epoch": 0.83, + "grad_norm": 0.7675361633300781, + "learning_rate": 1.3943711597169463e-06, + "loss": 2.1005, + "step": 25029 + }, + { + "epoch": 0.83, + "grad_norm": 0.7285292744636536, + "learning_rate": 1.3938298299670793e-06, + "loss": 2.0845, + "step": 25030 + }, + { + "epoch": 0.83, + "grad_norm": 0.7436361312866211, + "learning_rate": 1.3932885974446808e-06, + "loss": 2.0589, + "step": 25031 + }, + { + "epoch": 0.83, + "grad_norm": 0.7167371511459351, + "learning_rate": 1.3927474621558624e-06, + "loss": 2.031, + "step": 25032 + }, + { + "epoch": 0.83, + "grad_norm": 0.748346745967865, + "learning_rate": 1.3922064241067412e-06, + "loss": 2.0842, + "step": 25033 + }, + { + "epoch": 0.83, + "grad_norm": 0.7404475808143616, + "learning_rate": 1.391665483303426e-06, + "loss": 2.0288, + "step": 25034 + }, + { + "epoch": 0.83, + "grad_norm": 0.7668644189834595, + "learning_rate": 1.3911246397520285e-06, + "loss": 2.0329, + "step": 25035 + }, + { + "epoch": 0.83, + "grad_norm": 0.718517541885376, + "learning_rate": 1.390583893458658e-06, + "loss": 2.0551, + "step": 25036 + }, + { + "epoch": 0.83, + "grad_norm": 0.7408729791641235, + "learning_rate": 1.3900432444294288e-06, + "loss": 2.0518, + "step": 25037 + }, + { + "epoch": 0.83, + "grad_norm": 0.7193726897239685, + "learning_rate": 1.3895026926704435e-06, + "loss": 1.977, + "step": 25038 + }, + { + "epoch": 0.83, + "grad_norm": 0.7746989727020264, + "learning_rate": 1.3889622381878098e-06, + "loss": 2.085, + "step": 25039 + }, + { + "epoch": 0.83, + "grad_norm": 0.7408944964408875, + "learning_rate": 1.388421880987636e-06, + "loss": 2.0045, + "step": 25040 + }, + { + "epoch": 0.83, + "grad_norm": 0.7758175730705261, + "learning_rate": 1.3878816210760214e-06, + "loss": 2.0335, + "step": 25041 + }, + { + "epoch": 0.83, + "grad_norm": 0.7779616117477417, + "learning_rate": 1.3873414584590771e-06, + "loss": 2.0596, + "step": 25042 + }, + { + "epoch": 0.83, + "grad_norm": 0.7642082571983337, + "learning_rate": 1.3868013931428981e-06, + "loss": 1.9832, + "step": 25043 + }, + { + "epoch": 0.83, + "grad_norm": 0.7376130223274231, + "learning_rate": 1.3862614251335916e-06, + "loss": 2.0607, + "step": 25044 + }, + { + "epoch": 0.83, + "grad_norm": 0.7548394799232483, + "learning_rate": 1.3857215544372538e-06, + "loss": 2.0191, + "step": 25045 + }, + { + "epoch": 0.83, + "grad_norm": 0.7503734230995178, + "learning_rate": 1.3851817810599866e-06, + "loss": 2.0048, + "step": 25046 + }, + { + "epoch": 0.83, + "grad_norm": 0.7692141532897949, + "learning_rate": 1.3846421050078874e-06, + "loss": 2.0006, + "step": 25047 + }, + { + "epoch": 0.83, + "grad_norm": 0.7268210053443909, + "learning_rate": 1.38410252628705e-06, + "loss": 2.0142, + "step": 25048 + }, + { + "epoch": 0.83, + "grad_norm": 0.7160589694976807, + "learning_rate": 1.383563044903573e-06, + "loss": 1.9728, + "step": 25049 + }, + { + "epoch": 0.83, + "grad_norm": 0.746710479259491, + "learning_rate": 1.383023660863554e-06, + "loss": 2.0054, + "step": 25050 + }, + { + "epoch": 0.83, + "grad_norm": 0.7268765568733215, + "learning_rate": 1.382484374173083e-06, + "loss": 2.0274, + "step": 25051 + }, + { + "epoch": 0.83, + "grad_norm": 0.786273717880249, + "learning_rate": 1.3819451848382514e-06, + "loss": 2.018, + "step": 25052 + }, + { + "epoch": 0.83, + "grad_norm": 0.741789698600769, + "learning_rate": 1.381406092865154e-06, + "loss": 2.1063, + "step": 25053 + }, + { + "epoch": 0.83, + "grad_norm": 0.7721714973449707, + "learning_rate": 1.3808670982598772e-06, + "loss": 2.1069, + "step": 25054 + }, + { + "epoch": 0.83, + "grad_norm": 0.7567890882492065, + "learning_rate": 1.3803282010285156e-06, + "loss": 2.0109, + "step": 25055 + }, + { + "epoch": 0.83, + "grad_norm": 0.7555682063102722, + "learning_rate": 1.379789401177154e-06, + "loss": 2.0941, + "step": 25056 + }, + { + "epoch": 0.83, + "grad_norm": 0.7459248900413513, + "learning_rate": 1.379250698711877e-06, + "loss": 2.0621, + "step": 25057 + }, + { + "epoch": 0.83, + "grad_norm": 0.7545117735862732, + "learning_rate": 1.3787120936387744e-06, + "loss": 2.1046, + "step": 25058 + }, + { + "epoch": 0.83, + "grad_norm": 0.7548273205757141, + "learning_rate": 1.3781735859639311e-06, + "loss": 2.036, + "step": 25059 + }, + { + "epoch": 0.83, + "grad_norm": 0.737248420715332, + "learning_rate": 1.3776351756934313e-06, + "loss": 2.0652, + "step": 25060 + }, + { + "epoch": 0.83, + "grad_norm": 0.7423721551895142, + "learning_rate": 1.3770968628333525e-06, + "loss": 2.044, + "step": 25061 + }, + { + "epoch": 0.83, + "grad_norm": 0.7539017200469971, + "learning_rate": 1.3765586473897829e-06, + "loss": 1.9974, + "step": 25062 + }, + { + "epoch": 0.83, + "grad_norm": 0.7847672700881958, + "learning_rate": 1.3760205293687967e-06, + "loss": 2.0586, + "step": 25063 + }, + { + "epoch": 0.83, + "grad_norm": 0.765710711479187, + "learning_rate": 1.375482508776479e-06, + "loss": 2.0291, + "step": 25064 + }, + { + "epoch": 0.83, + "grad_norm": 0.7743377089500427, + "learning_rate": 1.3749445856189037e-06, + "loss": 2.0433, + "step": 25065 + }, + { + "epoch": 0.83, + "grad_norm": 0.7551912665367126, + "learning_rate": 1.3744067599021515e-06, + "loss": 2.0475, + "step": 25066 + }, + { + "epoch": 0.83, + "grad_norm": 0.8225637078285217, + "learning_rate": 1.3738690316322945e-06, + "loss": 2.053, + "step": 25067 + }, + { + "epoch": 0.83, + "grad_norm": 0.767142117023468, + "learning_rate": 1.373331400815412e-06, + "loss": 2.011, + "step": 25068 + }, + { + "epoch": 0.83, + "grad_norm": 0.7446503043174744, + "learning_rate": 1.3727938674575758e-06, + "loss": 2.0475, + "step": 25069 + }, + { + "epoch": 0.83, + "grad_norm": 0.7426686882972717, + "learning_rate": 1.3722564315648556e-06, + "loss": 2.0591, + "step": 25070 + }, + { + "epoch": 0.83, + "grad_norm": 0.7285228967666626, + "learning_rate": 1.3717190931433267e-06, + "loss": 2.0245, + "step": 25071 + }, + { + "epoch": 0.83, + "grad_norm": 0.7637831568717957, + "learning_rate": 1.3711818521990605e-06, + "loss": 1.9677, + "step": 25072 + }, + { + "epoch": 0.83, + "grad_norm": 0.7469715476036072, + "learning_rate": 1.3706447087381247e-06, + "loss": 2.0318, + "step": 25073 + }, + { + "epoch": 0.83, + "grad_norm": 0.7488355040550232, + "learning_rate": 1.3701076627665854e-06, + "loss": 2.1217, + "step": 25074 + }, + { + "epoch": 0.83, + "grad_norm": 0.7682512402534485, + "learning_rate": 1.3695707142905156e-06, + "loss": 2.0657, + "step": 25075 + }, + { + "epoch": 0.83, + "grad_norm": 0.7365228533744812, + "learning_rate": 1.3690338633159738e-06, + "loss": 2.0337, + "step": 25076 + }, + { + "epoch": 0.83, + "grad_norm": 0.7702351212501526, + "learning_rate": 1.368497109849033e-06, + "loss": 2.0487, + "step": 25077 + }, + { + "epoch": 0.83, + "grad_norm": 0.7207365036010742, + "learning_rate": 1.3679604538957525e-06, + "loss": 2.0108, + "step": 25078 + }, + { + "epoch": 0.83, + "grad_norm": 0.7382466197013855, + "learning_rate": 1.3674238954621933e-06, + "loss": 2.0503, + "step": 25079 + }, + { + "epoch": 0.83, + "grad_norm": 0.8027022480964661, + "learning_rate": 1.3668874345544203e-06, + "loss": 2.1247, + "step": 25080 + }, + { + "epoch": 0.83, + "grad_norm": 0.7402763962745667, + "learning_rate": 1.3663510711784965e-06, + "loss": 2.1203, + "step": 25081 + }, + { + "epoch": 0.83, + "grad_norm": 0.7324920296669006, + "learning_rate": 1.3658148053404773e-06, + "loss": 2.0241, + "step": 25082 + }, + { + "epoch": 0.83, + "grad_norm": 0.7351951599121094, + "learning_rate": 1.365278637046421e-06, + "loss": 2.0138, + "step": 25083 + }, + { + "epoch": 0.83, + "grad_norm": 0.7668065428733826, + "learning_rate": 1.364742566302385e-06, + "loss": 2.1655, + "step": 25084 + }, + { + "epoch": 0.83, + "grad_norm": 0.7341428995132446, + "learning_rate": 1.36420659311443e-06, + "loss": 2.0219, + "step": 25085 + }, + { + "epoch": 0.83, + "grad_norm": 0.7541335821151733, + "learning_rate": 1.3636707174886077e-06, + "loss": 2.0122, + "step": 25086 + }, + { + "epoch": 0.83, + "grad_norm": 0.7372667193412781, + "learning_rate": 1.36313493943097e-06, + "loss": 2.0151, + "step": 25087 + }, + { + "epoch": 0.83, + "grad_norm": 0.746778666973114, + "learning_rate": 1.3625992589475734e-06, + "loss": 2.0215, + "step": 25088 + }, + { + "epoch": 0.83, + "grad_norm": 0.7581526041030884, + "learning_rate": 1.3620636760444671e-06, + "loss": 2.0215, + "step": 25089 + }, + { + "epoch": 0.83, + "grad_norm": 0.739772617816925, + "learning_rate": 1.3615281907277034e-06, + "loss": 2.0281, + "step": 25090 + }, + { + "epoch": 0.83, + "grad_norm": 0.7470480799674988, + "learning_rate": 1.3609928030033348e-06, + "loss": 1.9735, + "step": 25091 + }, + { + "epoch": 0.83, + "grad_norm": 0.7609680891036987, + "learning_rate": 1.3604575128774022e-06, + "loss": 2.0576, + "step": 25092 + }, + { + "epoch": 0.83, + "grad_norm": 0.7715288996696472, + "learning_rate": 1.3599223203559586e-06, + "loss": 2.0492, + "step": 25093 + }, + { + "epoch": 0.83, + "grad_norm": 0.7324267625808716, + "learning_rate": 1.3593872254450502e-06, + "loss": 2.0502, + "step": 25094 + }, + { + "epoch": 0.83, + "grad_norm": 0.7684316635131836, + "learning_rate": 1.358852228150721e-06, + "loss": 2.0692, + "step": 25095 + }, + { + "epoch": 0.83, + "grad_norm": 0.763430118560791, + "learning_rate": 1.358317328479014e-06, + "loss": 2.0342, + "step": 25096 + }, + { + "epoch": 0.83, + "grad_norm": 0.734992504119873, + "learning_rate": 1.3577825264359745e-06, + "loss": 2.0323, + "step": 25097 + }, + { + "epoch": 0.84, + "grad_norm": 0.7526440620422363, + "learning_rate": 1.3572478220276407e-06, + "loss": 2.063, + "step": 25098 + }, + { + "epoch": 0.84, + "grad_norm": 0.7658821940422058, + "learning_rate": 1.3567132152600572e-06, + "loss": 2.0379, + "step": 25099 + }, + { + "epoch": 0.84, + "grad_norm": 0.7326551675796509, + "learning_rate": 1.356178706139264e-06, + "loss": 2.0663, + "step": 25100 + }, + { + "epoch": 0.84, + "grad_norm": 0.7488975524902344, + "learning_rate": 1.3556442946712977e-06, + "loss": 2.0524, + "step": 25101 + }, + { + "epoch": 0.84, + "grad_norm": 0.728011965751648, + "learning_rate": 1.3551099808621937e-06, + "loss": 1.9557, + "step": 25102 + }, + { + "epoch": 0.84, + "grad_norm": 0.7403274774551392, + "learning_rate": 1.3545757647179924e-06, + "loss": 2.0569, + "step": 25103 + }, + { + "epoch": 0.84, + "grad_norm": 0.7323578000068665, + "learning_rate": 1.354041646244728e-06, + "loss": 2.0384, + "step": 25104 + }, + { + "epoch": 0.84, + "grad_norm": 0.7479914426803589, + "learning_rate": 1.3535076254484324e-06, + "loss": 2.0648, + "step": 25105 + }, + { + "epoch": 0.84, + "grad_norm": 0.7221078276634216, + "learning_rate": 1.3529737023351397e-06, + "loss": 2.041, + "step": 25106 + }, + { + "epoch": 0.84, + "grad_norm": 0.7198437452316284, + "learning_rate": 1.3524398769108848e-06, + "loss": 2.0005, + "step": 25107 + }, + { + "epoch": 0.84, + "grad_norm": 0.7207092046737671, + "learning_rate": 1.3519061491816965e-06, + "loss": 1.9997, + "step": 25108 + }, + { + "epoch": 0.84, + "grad_norm": 0.7637871503829956, + "learning_rate": 1.351372519153602e-06, + "loss": 1.985, + "step": 25109 + }, + { + "epoch": 0.84, + "grad_norm": 0.7236823439598083, + "learning_rate": 1.3508389868326345e-06, + "loss": 2.0269, + "step": 25110 + }, + { + "epoch": 0.84, + "grad_norm": 0.7510977387428284, + "learning_rate": 1.3503055522248166e-06, + "loss": 2.011, + "step": 25111 + }, + { + "epoch": 0.84, + "grad_norm": 0.7671706676483154, + "learning_rate": 1.3497722153361769e-06, + "loss": 2.0469, + "step": 25112 + }, + { + "epoch": 0.84, + "grad_norm": 0.7745264768600464, + "learning_rate": 1.3492389761727465e-06, + "loss": 2.1386, + "step": 25113 + }, + { + "epoch": 0.84, + "grad_norm": 0.7537067532539368, + "learning_rate": 1.348705834740539e-06, + "loss": 2.078, + "step": 25114 + }, + { + "epoch": 0.84, + "grad_norm": 0.7596760988235474, + "learning_rate": 1.3481727910455832e-06, + "loss": 1.9844, + "step": 25115 + }, + { + "epoch": 0.84, + "grad_norm": 0.7658923268318176, + "learning_rate": 1.3476398450939032e-06, + "loss": 2.0315, + "step": 25116 + }, + { + "epoch": 0.84, + "grad_norm": 0.7806052565574646, + "learning_rate": 1.3471069968915174e-06, + "loss": 2.0479, + "step": 25117 + }, + { + "epoch": 0.84, + "grad_norm": 0.7459091544151306, + "learning_rate": 1.3465742464444442e-06, + "loss": 2.015, + "step": 25118 + }, + { + "epoch": 0.84, + "grad_norm": 0.7538060545921326, + "learning_rate": 1.3460415937587047e-06, + "loss": 1.9798, + "step": 25119 + }, + { + "epoch": 0.84, + "grad_norm": 0.7477102875709534, + "learning_rate": 1.3455090388403137e-06, + "loss": 2.0532, + "step": 25120 + }, + { + "epoch": 0.84, + "grad_norm": 0.7364475727081299, + "learning_rate": 1.3449765816952899e-06, + "loss": 2.0606, + "step": 25121 + }, + { + "epoch": 0.84, + "grad_norm": 0.7414153814315796, + "learning_rate": 1.3444442223296505e-06, + "loss": 1.9697, + "step": 25122 + }, + { + "epoch": 0.84, + "grad_norm": 0.7764838933944702, + "learning_rate": 1.3439119607494077e-06, + "loss": 2.0112, + "step": 25123 + }, + { + "epoch": 0.84, + "grad_norm": 0.7431680560112, + "learning_rate": 1.3433797969605721e-06, + "loss": 2.0358, + "step": 25124 + }, + { + "epoch": 0.84, + "grad_norm": 0.7583439350128174, + "learning_rate": 1.34284773096916e-06, + "loss": 2.0889, + "step": 25125 + }, + { + "epoch": 0.84, + "grad_norm": 0.7658635973930359, + "learning_rate": 1.342315762781181e-06, + "loss": 2.1617, + "step": 25126 + }, + { + "epoch": 0.84, + "grad_norm": 0.741082489490509, + "learning_rate": 1.3417838924026426e-06, + "loss": 2.0625, + "step": 25127 + }, + { + "epoch": 0.84, + "grad_norm": 0.7264873385429382, + "learning_rate": 1.3412521198395556e-06, + "loss": 2.0956, + "step": 25128 + }, + { + "epoch": 0.84, + "grad_norm": 0.7705053091049194, + "learning_rate": 1.3407204450979294e-06, + "loss": 2.0349, + "step": 25129 + }, + { + "epoch": 0.84, + "grad_norm": 0.7603328227996826, + "learning_rate": 1.3401888681837671e-06, + "loss": 2.0912, + "step": 25130 + }, + { + "epoch": 0.84, + "grad_norm": 0.7482497692108154, + "learning_rate": 1.3396573891030784e-06, + "loss": 2.0427, + "step": 25131 + }, + { + "epoch": 0.84, + "grad_norm": 0.7430740594863892, + "learning_rate": 1.3391260078618639e-06, + "loss": 2.0554, + "step": 25132 + }, + { + "epoch": 0.84, + "grad_norm": 0.7279108166694641, + "learning_rate": 1.3385947244661268e-06, + "loss": 2.0681, + "step": 25133 + }, + { + "epoch": 0.84, + "grad_norm": 0.74683678150177, + "learning_rate": 1.3380635389218698e-06, + "loss": 2.0495, + "step": 25134 + }, + { + "epoch": 0.84, + "grad_norm": 0.7670799493789673, + "learning_rate": 1.3375324512350995e-06, + "loss": 2.0581, + "step": 25135 + }, + { + "epoch": 0.84, + "grad_norm": 0.7483343482017517, + "learning_rate": 1.3370014614118054e-06, + "loss": 2.0282, + "step": 25136 + }, + { + "epoch": 0.84, + "grad_norm": 0.7364310622215271, + "learning_rate": 1.3364705694579927e-06, + "loss": 2.0663, + "step": 25137 + }, + { + "epoch": 0.84, + "grad_norm": 0.7254992127418518, + "learning_rate": 1.335939775379661e-06, + "loss": 2.0496, + "step": 25138 + }, + { + "epoch": 0.84, + "grad_norm": 0.7599915862083435, + "learning_rate": 1.3354090791828024e-06, + "loss": 2.0782, + "step": 25139 + }, + { + "epoch": 0.84, + "grad_norm": 0.7428900599479675, + "learning_rate": 1.3348784808734127e-06, + "loss": 2.0817, + "step": 25140 + }, + { + "epoch": 0.84, + "grad_norm": 0.7594741582870483, + "learning_rate": 1.33434798045749e-06, + "loss": 2.0127, + "step": 25141 + }, + { + "epoch": 0.84, + "grad_norm": 0.7383279204368591, + "learning_rate": 1.3338175779410235e-06, + "loss": 2.056, + "step": 25142 + }, + { + "epoch": 0.84, + "grad_norm": 0.7319151163101196, + "learning_rate": 1.3332872733300063e-06, + "loss": 1.9589, + "step": 25143 + }, + { + "epoch": 0.84, + "grad_norm": 0.7256348133087158, + "learning_rate": 1.3327570666304323e-06, + "loss": 2.0905, + "step": 25144 + }, + { + "epoch": 0.84, + "grad_norm": 0.7703343033790588, + "learning_rate": 1.3322269578482906e-06, + "loss": 1.9656, + "step": 25145 + }, + { + "epoch": 0.84, + "grad_norm": 0.7417930960655212, + "learning_rate": 1.3316969469895658e-06, + "loss": 2.0519, + "step": 25146 + }, + { + "epoch": 0.84, + "grad_norm": 0.7719858884811401, + "learning_rate": 1.331167034060251e-06, + "loss": 2.0331, + "step": 25147 + }, + { + "epoch": 0.84, + "grad_norm": 0.727957010269165, + "learning_rate": 1.3306372190663308e-06, + "loss": 2.023, + "step": 25148 + }, + { + "epoch": 0.84, + "grad_norm": 0.761118471622467, + "learning_rate": 1.3301075020137887e-06, + "loss": 1.9945, + "step": 25149 + }, + { + "epoch": 0.84, + "grad_norm": 0.770431637763977, + "learning_rate": 1.3295778829086103e-06, + "loss": 2.0878, + "step": 25150 + }, + { + "epoch": 0.84, + "grad_norm": 0.7343979477882385, + "learning_rate": 1.329048361756783e-06, + "loss": 2.08, + "step": 25151 + }, + { + "epoch": 0.84, + "grad_norm": 0.7670046091079712, + "learning_rate": 1.3285189385642816e-06, + "loss": 2.0506, + "step": 25152 + }, + { + "epoch": 0.84, + "grad_norm": 0.7407084107398987, + "learning_rate": 1.3279896133370951e-06, + "loss": 2.0495, + "step": 25153 + }, + { + "epoch": 0.84, + "grad_norm": 0.7505419850349426, + "learning_rate": 1.3274603860811986e-06, + "loss": 2.0012, + "step": 25154 + }, + { + "epoch": 0.84, + "grad_norm": 0.7217773199081421, + "learning_rate": 1.3269312568025705e-06, + "loss": 1.9906, + "step": 25155 + }, + { + "epoch": 0.84, + "grad_norm": 0.7171909213066101, + "learning_rate": 1.3264022255071896e-06, + "loss": 2.0937, + "step": 25156 + }, + { + "epoch": 0.84, + "grad_norm": 0.7457893490791321, + "learning_rate": 1.3258732922010375e-06, + "loss": 2.0706, + "step": 25157 + }, + { + "epoch": 0.84, + "grad_norm": 0.7622553706169128, + "learning_rate": 1.3253444568900819e-06, + "loss": 2.1563, + "step": 25158 + }, + { + "epoch": 0.84, + "grad_norm": 0.7559348344802856, + "learning_rate": 1.3248157195803001e-06, + "loss": 2.0674, + "step": 25159 + }, + { + "epoch": 0.84, + "grad_norm": 0.7268890142440796, + "learning_rate": 1.3242870802776685e-06, + "loss": 2.0461, + "step": 25160 + }, + { + "epoch": 0.84, + "grad_norm": 0.7106286287307739, + "learning_rate": 1.3237585389881547e-06, + "loss": 2.0459, + "step": 25161 + }, + { + "epoch": 0.84, + "grad_norm": 0.7390077710151672, + "learning_rate": 1.3232300957177347e-06, + "loss": 2.0975, + "step": 25162 + }, + { + "epoch": 0.84, + "grad_norm": 0.7644327282905579, + "learning_rate": 1.3227017504723749e-06, + "loss": 2.0579, + "step": 25163 + }, + { + "epoch": 0.84, + "grad_norm": 0.7560936808586121, + "learning_rate": 1.322173503258044e-06, + "loss": 1.9858, + "step": 25164 + }, + { + "epoch": 0.84, + "grad_norm": 0.7526465654373169, + "learning_rate": 1.3216453540807116e-06, + "loss": 2.0127, + "step": 25165 + }, + { + "epoch": 0.84, + "grad_norm": 0.7458640336990356, + "learning_rate": 1.3211173029463453e-06, + "loss": 1.9934, + "step": 25166 + }, + { + "epoch": 0.84, + "grad_norm": 0.7520620226860046, + "learning_rate": 1.3205893498609102e-06, + "loss": 2.0462, + "step": 25167 + }, + { + "epoch": 0.84, + "grad_norm": 0.745143473148346, + "learning_rate": 1.3200614948303669e-06, + "loss": 1.9849, + "step": 25168 + }, + { + "epoch": 0.84, + "grad_norm": 0.7449918389320374, + "learning_rate": 1.3195337378606843e-06, + "loss": 2.0482, + "step": 25169 + }, + { + "epoch": 0.84, + "grad_norm": 0.7413531541824341, + "learning_rate": 1.319006078957823e-06, + "loss": 2.0464, + "step": 25170 + }, + { + "epoch": 0.84, + "grad_norm": 0.7508883476257324, + "learning_rate": 1.3184785181277404e-06, + "loss": 2.0807, + "step": 25171 + }, + { + "epoch": 0.84, + "grad_norm": 0.7754811644554138, + "learning_rate": 1.3179510553763998e-06, + "loss": 2.2027, + "step": 25172 + }, + { + "epoch": 0.84, + "grad_norm": 0.7478004693984985, + "learning_rate": 1.3174236907097626e-06, + "loss": 2.1024, + "step": 25173 + }, + { + "epoch": 0.84, + "grad_norm": 0.7371152639389038, + "learning_rate": 1.3168964241337823e-06, + "loss": 1.9881, + "step": 25174 + }, + { + "epoch": 0.84, + "grad_norm": 0.7109218239784241, + "learning_rate": 1.3163692556544183e-06, + "loss": 1.9963, + "step": 25175 + }, + { + "epoch": 0.84, + "grad_norm": 0.7128262519836426, + "learning_rate": 1.315842185277626e-06, + "loss": 2.0836, + "step": 25176 + }, + { + "epoch": 0.84, + "grad_norm": 0.7441182136535645, + "learning_rate": 1.3153152130093571e-06, + "loss": 2.1338, + "step": 25177 + }, + { + "epoch": 0.84, + "grad_norm": 0.7657666206359863, + "learning_rate": 1.3147883388555672e-06, + "loss": 2.0506, + "step": 25178 + }, + { + "epoch": 0.84, + "grad_norm": 0.7295756340026855, + "learning_rate": 1.3142615628222134e-06, + "loss": 2.0486, + "step": 25179 + }, + { + "epoch": 0.84, + "grad_norm": 0.7198320627212524, + "learning_rate": 1.3137348849152364e-06, + "loss": 2.0144, + "step": 25180 + }, + { + "epoch": 0.84, + "grad_norm": 0.723551332950592, + "learning_rate": 1.313208305140593e-06, + "loss": 2.0104, + "step": 25181 + }, + { + "epoch": 0.84, + "grad_norm": 0.7326354384422302, + "learning_rate": 1.3126818235042338e-06, + "loss": 1.9969, + "step": 25182 + }, + { + "epoch": 0.84, + "grad_norm": 0.7440686821937561, + "learning_rate": 1.3121554400121016e-06, + "loss": 2.0032, + "step": 25183 + }, + { + "epoch": 0.84, + "grad_norm": 0.7371004819869995, + "learning_rate": 1.3116291546701476e-06, + "loss": 2.1082, + "step": 25184 + }, + { + "epoch": 0.84, + "grad_norm": 0.7588104009628296, + "learning_rate": 1.3111029674843134e-06, + "loss": 2.0658, + "step": 25185 + }, + { + "epoch": 0.84, + "grad_norm": 0.7330766916275024, + "learning_rate": 1.3105768784605477e-06, + "loss": 2.0499, + "step": 25186 + }, + { + "epoch": 0.84, + "grad_norm": 0.73399418592453, + "learning_rate": 1.3100508876047902e-06, + "loss": 1.9807, + "step": 25187 + }, + { + "epoch": 0.84, + "grad_norm": 0.7637670040130615, + "learning_rate": 1.309524994922986e-06, + "loss": 2.0705, + "step": 25188 + }, + { + "epoch": 0.84, + "grad_norm": 0.7333109378814697, + "learning_rate": 1.3089992004210761e-06, + "loss": 2.0158, + "step": 25189 + }, + { + "epoch": 0.84, + "grad_norm": 0.7543295621871948, + "learning_rate": 1.3084735041049979e-06, + "loss": 2.0092, + "step": 25190 + }, + { + "epoch": 0.84, + "grad_norm": 0.738252580165863, + "learning_rate": 1.307947905980692e-06, + "loss": 2.0517, + "step": 25191 + }, + { + "epoch": 0.84, + "grad_norm": 0.7534932494163513, + "learning_rate": 1.3074224060540984e-06, + "loss": 2.0021, + "step": 25192 + }, + { + "epoch": 0.84, + "grad_norm": 0.7674052715301514, + "learning_rate": 1.3068970043311535e-06, + "loss": 2.0153, + "step": 25193 + }, + { + "epoch": 0.84, + "grad_norm": 0.7250233292579651, + "learning_rate": 1.3063717008177878e-06, + "loss": 2.0141, + "step": 25194 + }, + { + "epoch": 0.84, + "grad_norm": 0.7527196407318115, + "learning_rate": 1.3058464955199423e-06, + "loss": 2.0941, + "step": 25195 + }, + { + "epoch": 0.84, + "grad_norm": 0.7555750012397766, + "learning_rate": 1.3053213884435468e-06, + "loss": 2.0499, + "step": 25196 + }, + { + "epoch": 0.84, + "grad_norm": 0.7417261004447937, + "learning_rate": 1.304796379594535e-06, + "loss": 1.9856, + "step": 25197 + }, + { + "epoch": 0.84, + "grad_norm": 0.7267795205116272, + "learning_rate": 1.3042714689788393e-06, + "loss": 2.0699, + "step": 25198 + }, + { + "epoch": 0.84, + "grad_norm": 0.7626472115516663, + "learning_rate": 1.303746656602386e-06, + "loss": 2.0021, + "step": 25199 + }, + { + "epoch": 0.84, + "grad_norm": 0.7490193843841553, + "learning_rate": 1.3032219424711056e-06, + "loss": 2.0302, + "step": 25200 + }, + { + "epoch": 0.84, + "grad_norm": 0.7557934522628784, + "learning_rate": 1.3026973265909292e-06, + "loss": 2.0784, + "step": 25201 + }, + { + "epoch": 0.84, + "grad_norm": 0.7751348614692688, + "learning_rate": 1.302172808967782e-06, + "loss": 2.0299, + "step": 25202 + }, + { + "epoch": 0.84, + "grad_norm": 0.7343829274177551, + "learning_rate": 1.301648389607586e-06, + "loss": 1.9857, + "step": 25203 + }, + { + "epoch": 0.84, + "grad_norm": 0.7598791122436523, + "learning_rate": 1.3011240685162719e-06, + "loss": 2.0832, + "step": 25204 + }, + { + "epoch": 0.84, + "grad_norm": 0.7633927464485168, + "learning_rate": 1.300599845699757e-06, + "loss": 2.045, + "step": 25205 + }, + { + "epoch": 0.84, + "grad_norm": 0.7455004453659058, + "learning_rate": 1.3000757211639692e-06, + "loss": 2.0556, + "step": 25206 + }, + { + "epoch": 0.84, + "grad_norm": 0.7329928278923035, + "learning_rate": 1.2995516949148245e-06, + "loss": 2.0547, + "step": 25207 + }, + { + "epoch": 0.84, + "grad_norm": 0.7779415845870972, + "learning_rate": 1.2990277669582485e-06, + "loss": 2.0382, + "step": 25208 + }, + { + "epoch": 0.84, + "grad_norm": 0.7289942502975464, + "learning_rate": 1.2985039373001562e-06, + "loss": 2.0465, + "step": 25209 + }, + { + "epoch": 0.84, + "grad_norm": 0.7401782274246216, + "learning_rate": 1.2979802059464674e-06, + "loss": 2.0995, + "step": 25210 + }, + { + "epoch": 0.84, + "grad_norm": 0.7250211834907532, + "learning_rate": 1.2974565729030996e-06, + "loss": 2.1211, + "step": 25211 + }, + { + "epoch": 0.84, + "grad_norm": 0.748907208442688, + "learning_rate": 1.2969330381759648e-06, + "loss": 1.9701, + "step": 25212 + }, + { + "epoch": 0.84, + "grad_norm": 0.7727431654930115, + "learning_rate": 1.2964096017709793e-06, + "loss": 1.9948, + "step": 25213 + }, + { + "epoch": 0.84, + "grad_norm": 0.7299375534057617, + "learning_rate": 1.2958862636940605e-06, + "loss": 2.0656, + "step": 25214 + }, + { + "epoch": 0.84, + "grad_norm": 0.7606677412986755, + "learning_rate": 1.2953630239511173e-06, + "loss": 2.0532, + "step": 25215 + }, + { + "epoch": 0.84, + "grad_norm": 0.7589752078056335, + "learning_rate": 1.294839882548058e-06, + "loss": 2.0201, + "step": 25216 + }, + { + "epoch": 0.84, + "grad_norm": 0.7460601329803467, + "learning_rate": 1.294316839490799e-06, + "loss": 1.9689, + "step": 25217 + }, + { + "epoch": 0.84, + "grad_norm": 0.7449535131454468, + "learning_rate": 1.2937938947852447e-06, + "loss": 2.0113, + "step": 25218 + }, + { + "epoch": 0.84, + "grad_norm": 0.7384112477302551, + "learning_rate": 1.2932710484373057e-06, + "loss": 2.1658, + "step": 25219 + }, + { + "epoch": 0.84, + "grad_norm": 0.7341070175170898, + "learning_rate": 1.2927483004528884e-06, + "loss": 2.0777, + "step": 25220 + }, + { + "epoch": 0.84, + "grad_norm": 0.768615186214447, + "learning_rate": 1.2922256508378962e-06, + "loss": 2.0054, + "step": 25221 + }, + { + "epoch": 0.84, + "grad_norm": 0.7648570537567139, + "learning_rate": 1.2917030995982337e-06, + "loss": 2.0126, + "step": 25222 + }, + { + "epoch": 0.84, + "grad_norm": 0.7308836579322815, + "learning_rate": 1.2911806467398103e-06, + "loss": 2.0569, + "step": 25223 + }, + { + "epoch": 0.84, + "grad_norm": 0.7563140988349915, + "learning_rate": 1.2906582922685229e-06, + "loss": 2.0939, + "step": 25224 + }, + { + "epoch": 0.84, + "grad_norm": 0.7246943712234497, + "learning_rate": 1.2901360361902716e-06, + "loss": 1.9814, + "step": 25225 + }, + { + "epoch": 0.84, + "grad_norm": 0.7550070881843567, + "learning_rate": 1.2896138785109612e-06, + "loss": 2.0744, + "step": 25226 + }, + { + "epoch": 0.84, + "grad_norm": 0.7848421335220337, + "learning_rate": 1.2890918192364865e-06, + "loss": 2.0405, + "step": 25227 + }, + { + "epoch": 0.84, + "grad_norm": 0.7906895279884338, + "learning_rate": 1.288569858372749e-06, + "loss": 2.1167, + "step": 25228 + }, + { + "epoch": 0.84, + "grad_norm": 0.7479054927825928, + "learning_rate": 1.288047995925642e-06, + "loss": 1.9669, + "step": 25229 + }, + { + "epoch": 0.84, + "grad_norm": 0.7528484463691711, + "learning_rate": 1.287526231901065e-06, + "loss": 2.0632, + "step": 25230 + }, + { + "epoch": 0.84, + "grad_norm": 0.7277924418449402, + "learning_rate": 1.2870045663049092e-06, + "loss": 2.0269, + "step": 25231 + }, + { + "epoch": 0.84, + "grad_norm": 0.7739904522895813, + "learning_rate": 1.2864829991430706e-06, + "loss": 2.0439, + "step": 25232 + }, + { + "epoch": 0.84, + "grad_norm": 0.7670672535896301, + "learning_rate": 1.2859615304214413e-06, + "loss": 2.0756, + "step": 25233 + }, + { + "epoch": 0.84, + "grad_norm": 0.785698413848877, + "learning_rate": 1.2854401601459088e-06, + "loss": 2.0274, + "step": 25234 + }, + { + "epoch": 0.84, + "grad_norm": 0.7571924328804016, + "learning_rate": 1.2849188883223673e-06, + "loss": 1.987, + "step": 25235 + }, + { + "epoch": 0.84, + "grad_norm": 0.7543885707855225, + "learning_rate": 1.2843977149567056e-06, + "loss": 2.0392, + "step": 25236 + }, + { + "epoch": 0.84, + "grad_norm": 0.7375682592391968, + "learning_rate": 1.283876640054812e-06, + "loss": 2.0427, + "step": 25237 + }, + { + "epoch": 0.84, + "grad_norm": 0.7282437682151794, + "learning_rate": 1.2833556636225686e-06, + "loss": 2.1309, + "step": 25238 + }, + { + "epoch": 0.84, + "grad_norm": 0.7729713320732117, + "learning_rate": 1.2828347856658663e-06, + "loss": 2.1235, + "step": 25239 + }, + { + "epoch": 0.84, + "grad_norm": 0.750762939453125, + "learning_rate": 1.282314006190587e-06, + "loss": 1.9929, + "step": 25240 + }, + { + "epoch": 0.84, + "grad_norm": 0.7610924243927002, + "learning_rate": 1.281793325202616e-06, + "loss": 2.0974, + "step": 25241 + }, + { + "epoch": 0.84, + "grad_norm": 0.741794228553772, + "learning_rate": 1.2812727427078353e-06, + "loss": 2.0974, + "step": 25242 + }, + { + "epoch": 0.84, + "grad_norm": 0.7532973289489746, + "learning_rate": 1.2807522587121236e-06, + "loss": 2.0535, + "step": 25243 + }, + { + "epoch": 0.84, + "grad_norm": 0.7604883313179016, + "learning_rate": 1.2802318732213625e-06, + "loss": 2.0072, + "step": 25244 + }, + { + "epoch": 0.84, + "grad_norm": 0.766514003276825, + "learning_rate": 1.2797115862414333e-06, + "loss": 2.075, + "step": 25245 + }, + { + "epoch": 0.84, + "grad_norm": 0.771040678024292, + "learning_rate": 1.2791913977782121e-06, + "loss": 2.0886, + "step": 25246 + }, + { + "epoch": 0.84, + "grad_norm": 0.7392578721046448, + "learning_rate": 1.2786713078375734e-06, + "loss": 2.087, + "step": 25247 + }, + { + "epoch": 0.84, + "grad_norm": 0.7273556590080261, + "learning_rate": 1.2781513164253978e-06, + "loss": 2.0445, + "step": 25248 + }, + { + "epoch": 0.84, + "grad_norm": 0.7258641123771667, + "learning_rate": 1.2776314235475551e-06, + "loss": 2.0761, + "step": 25249 + }, + { + "epoch": 0.84, + "grad_norm": 0.7360250949859619, + "learning_rate": 1.2771116292099216e-06, + "loss": 2.0986, + "step": 25250 + }, + { + "epoch": 0.84, + "grad_norm": 0.728259265422821, + "learning_rate": 1.2765919334183674e-06, + "loss": 2.0551, + "step": 25251 + }, + { + "epoch": 0.84, + "grad_norm": 0.7631586194038391, + "learning_rate": 1.2760723361787675e-06, + "loss": 1.9561, + "step": 25252 + }, + { + "epoch": 0.84, + "grad_norm": 0.7701394557952881, + "learning_rate": 1.2755528374969873e-06, + "loss": 2.0956, + "step": 25253 + }, + { + "epoch": 0.84, + "grad_norm": 0.7619103789329529, + "learning_rate": 1.2750334373789008e-06, + "loss": 2.0163, + "step": 25254 + }, + { + "epoch": 0.84, + "grad_norm": 0.7387005686759949, + "learning_rate": 1.2745141358303726e-06, + "loss": 2.0108, + "step": 25255 + }, + { + "epoch": 0.84, + "grad_norm": 0.7507039308547974, + "learning_rate": 1.2739949328572677e-06, + "loss": 2.0603, + "step": 25256 + }, + { + "epoch": 0.84, + "grad_norm": 0.7563625574111938, + "learning_rate": 1.2734758284654547e-06, + "loss": 2.0701, + "step": 25257 + }, + { + "epoch": 0.84, + "grad_norm": 0.7437933683395386, + "learning_rate": 1.2729568226607992e-06, + "loss": 2.1404, + "step": 25258 + }, + { + "epoch": 0.84, + "grad_norm": 0.7523148059844971, + "learning_rate": 1.2724379154491628e-06, + "loss": 2.0542, + "step": 25259 + }, + { + "epoch": 0.84, + "grad_norm": 0.7422696352005005, + "learning_rate": 1.2719191068364056e-06, + "loss": 2.1243, + "step": 25260 + }, + { + "epoch": 0.84, + "grad_norm": 0.7384955883026123, + "learning_rate": 1.271400396828394e-06, + "loss": 2.0639, + "step": 25261 + }, + { + "epoch": 0.84, + "grad_norm": 0.7350273728370667, + "learning_rate": 1.270881785430983e-06, + "loss": 2.0462, + "step": 25262 + }, + { + "epoch": 0.84, + "grad_norm": 0.747558057308197, + "learning_rate": 1.2703632726500359e-06, + "loss": 2.0259, + "step": 25263 + }, + { + "epoch": 0.84, + "grad_norm": 0.7409343719482422, + "learning_rate": 1.2698448584914091e-06, + "loss": 2.07, + "step": 25264 + }, + { + "epoch": 0.84, + "grad_norm": 0.7559645771980286, + "learning_rate": 1.269326542960956e-06, + "loss": 2.0607, + "step": 25265 + }, + { + "epoch": 0.84, + "grad_norm": 0.7659122943878174, + "learning_rate": 1.2688083260645345e-06, + "loss": 2.0673, + "step": 25266 + }, + { + "epoch": 0.84, + "grad_norm": 0.7683814764022827, + "learning_rate": 1.2682902078080029e-06, + "loss": 2.0438, + "step": 25267 + }, + { + "epoch": 0.84, + "grad_norm": 0.7599559426307678, + "learning_rate": 1.2677721881972095e-06, + "loss": 1.9757, + "step": 25268 + }, + { + "epoch": 0.84, + "grad_norm": 0.741051197052002, + "learning_rate": 1.2672542672380073e-06, + "loss": 2.0733, + "step": 25269 + }, + { + "epoch": 0.84, + "grad_norm": 0.7399287223815918, + "learning_rate": 1.2667364449362507e-06, + "loss": 1.9815, + "step": 25270 + }, + { + "epoch": 0.84, + "grad_norm": 0.7443737983703613, + "learning_rate": 1.266218721297785e-06, + "loss": 2.0422, + "step": 25271 + }, + { + "epoch": 0.84, + "grad_norm": 0.7541748285293579, + "learning_rate": 1.2657010963284643e-06, + "loss": 2.0424, + "step": 25272 + }, + { + "epoch": 0.84, + "grad_norm": 0.754728376865387, + "learning_rate": 1.2651835700341309e-06, + "loss": 2.006, + "step": 25273 + }, + { + "epoch": 0.84, + "grad_norm": 0.7470657229423523, + "learning_rate": 1.2646661424206376e-06, + "loss": 2.0421, + "step": 25274 + }, + { + "epoch": 0.84, + "grad_norm": 0.7627711892127991, + "learning_rate": 1.264148813493824e-06, + "loss": 1.994, + "step": 25275 + }, + { + "epoch": 0.84, + "grad_norm": 0.7268960475921631, + "learning_rate": 1.263631583259538e-06, + "loss": 2.0474, + "step": 25276 + }, + { + "epoch": 0.84, + "grad_norm": 0.738702654838562, + "learning_rate": 1.263114451723626e-06, + "loss": 1.9915, + "step": 25277 + }, + { + "epoch": 0.84, + "grad_norm": 0.7314087748527527, + "learning_rate": 1.262597418891922e-06, + "loss": 2.0319, + "step": 25278 + }, + { + "epoch": 0.84, + "grad_norm": 0.7452148795127869, + "learning_rate": 1.2620804847702728e-06, + "loss": 1.9888, + "step": 25279 + }, + { + "epoch": 0.84, + "grad_norm": 0.7460310459136963, + "learning_rate": 1.261563649364519e-06, + "loss": 1.9824, + "step": 25280 + }, + { + "epoch": 0.84, + "grad_norm": 0.7457327246665955, + "learning_rate": 1.261046912680497e-06, + "loss": 2.0799, + "step": 25281 + }, + { + "epoch": 0.84, + "grad_norm": 0.7303289771080017, + "learning_rate": 1.2605302747240444e-06, + "loss": 1.9896, + "step": 25282 + }, + { + "epoch": 0.84, + "grad_norm": 0.7576649785041809, + "learning_rate": 1.260013735501001e-06, + "loss": 2.0018, + "step": 25283 + }, + { + "epoch": 0.84, + "grad_norm": 0.7163962125778198, + "learning_rate": 1.259497295017198e-06, + "loss": 1.9952, + "step": 25284 + }, + { + "epoch": 0.84, + "grad_norm": 0.737267255783081, + "learning_rate": 1.2589809532784735e-06, + "loss": 2.0394, + "step": 25285 + }, + { + "epoch": 0.84, + "grad_norm": 0.737406313419342, + "learning_rate": 1.258464710290659e-06, + "loss": 1.9981, + "step": 25286 + }, + { + "epoch": 0.84, + "grad_norm": 0.7562631964683533, + "learning_rate": 1.257948566059588e-06, + "loss": 2.0538, + "step": 25287 + }, + { + "epoch": 0.84, + "grad_norm": 0.7464264035224915, + "learning_rate": 1.2574325205910886e-06, + "loss": 2.0702, + "step": 25288 + }, + { + "epoch": 0.84, + "grad_norm": 0.7192738056182861, + "learning_rate": 1.2569165738909949e-06, + "loss": 2.0709, + "step": 25289 + }, + { + "epoch": 0.84, + "grad_norm": 0.7267374396324158, + "learning_rate": 1.2564007259651345e-06, + "loss": 2.035, + "step": 25290 + }, + { + "epoch": 0.84, + "grad_norm": 0.7254093885421753, + "learning_rate": 1.2558849768193327e-06, + "loss": 2.0336, + "step": 25291 + }, + { + "epoch": 0.84, + "grad_norm": 0.7453846335411072, + "learning_rate": 1.2553693264594169e-06, + "loss": 2.0156, + "step": 25292 + }, + { + "epoch": 0.84, + "grad_norm": 0.7652398347854614, + "learning_rate": 1.254853774891216e-06, + "loss": 2.0094, + "step": 25293 + }, + { + "epoch": 0.84, + "grad_norm": 0.7694460153579712, + "learning_rate": 1.254338322120552e-06, + "loss": 2.056, + "step": 25294 + }, + { + "epoch": 0.84, + "grad_norm": 0.735845148563385, + "learning_rate": 1.2538229681532465e-06, + "loss": 2.0873, + "step": 25295 + }, + { + "epoch": 0.84, + "grad_norm": 0.755448043346405, + "learning_rate": 1.2533077129951254e-06, + "loss": 2.0066, + "step": 25296 + }, + { + "epoch": 0.84, + "grad_norm": 0.7351285219192505, + "learning_rate": 1.2527925566520049e-06, + "loss": 1.9966, + "step": 25297 + }, + { + "epoch": 0.84, + "grad_norm": 0.7506215572357178, + "learning_rate": 1.2522774991297081e-06, + "loss": 2.0479, + "step": 25298 + }, + { + "epoch": 0.84, + "grad_norm": 0.7476725578308105, + "learning_rate": 1.2517625404340573e-06, + "loss": 2.0759, + "step": 25299 + }, + { + "epoch": 0.84, + "grad_norm": 0.7444807291030884, + "learning_rate": 1.2512476805708629e-06, + "loss": 2.0032, + "step": 25300 + }, + { + "epoch": 0.84, + "grad_norm": 0.7494280338287354, + "learning_rate": 1.2507329195459439e-06, + "loss": 1.9735, + "step": 25301 + }, + { + "epoch": 0.84, + "grad_norm": 0.7901392579078674, + "learning_rate": 1.25021825736512e-06, + "loss": 1.9824, + "step": 25302 + }, + { + "epoch": 0.84, + "grad_norm": 0.7447489500045776, + "learning_rate": 1.2497036940342023e-06, + "loss": 2.0627, + "step": 25303 + }, + { + "epoch": 0.84, + "grad_norm": 0.7485110759735107, + "learning_rate": 1.2491892295590013e-06, + "loss": 2.0416, + "step": 25304 + }, + { + "epoch": 0.84, + "grad_norm": 0.7399124503135681, + "learning_rate": 1.2486748639453339e-06, + "loss": 2.0654, + "step": 25305 + }, + { + "epoch": 0.84, + "grad_norm": 0.7590696215629578, + "learning_rate": 1.2481605971990073e-06, + "loss": 1.9205, + "step": 25306 + }, + { + "epoch": 0.84, + "grad_norm": 0.7470648884773254, + "learning_rate": 1.247646429325834e-06, + "loss": 2.0402, + "step": 25307 + }, + { + "epoch": 0.84, + "grad_norm": 0.7450483441352844, + "learning_rate": 1.2471323603316233e-06, + "loss": 2.0092, + "step": 25308 + }, + { + "epoch": 0.84, + "grad_norm": 0.7480080127716064, + "learning_rate": 1.2466183902221819e-06, + "loss": 2.0238, + "step": 25309 + }, + { + "epoch": 0.84, + "grad_norm": 0.7603583335876465, + "learning_rate": 1.2461045190033127e-06, + "loss": 2.1106, + "step": 25310 + }, + { + "epoch": 0.84, + "grad_norm": 0.7747641205787659, + "learning_rate": 1.245590746680827e-06, + "loss": 2.0094, + "step": 25311 + }, + { + "epoch": 0.84, + "grad_norm": 0.7354148030281067, + "learning_rate": 1.2450770732605267e-06, + "loss": 2.0197, + "step": 25312 + }, + { + "epoch": 0.84, + "grad_norm": 0.7383816242218018, + "learning_rate": 1.2445634987482124e-06, + "loss": 2.0221, + "step": 25313 + }, + { + "epoch": 0.84, + "grad_norm": 0.7253024578094482, + "learning_rate": 1.2440500231496889e-06, + "loss": 2.0521, + "step": 25314 + }, + { + "epoch": 0.84, + "grad_norm": 0.7561866641044617, + "learning_rate": 1.2435366464707589e-06, + "loss": 2.0048, + "step": 25315 + }, + { + "epoch": 0.84, + "grad_norm": 0.751228928565979, + "learning_rate": 1.24302336871722e-06, + "loss": 2.0114, + "step": 25316 + }, + { + "epoch": 0.84, + "grad_norm": 0.7449452877044678, + "learning_rate": 1.2425101898948689e-06, + "loss": 2.0114, + "step": 25317 + }, + { + "epoch": 0.84, + "grad_norm": 0.7391721606254578, + "learning_rate": 1.2419971100095073e-06, + "loss": 2.1011, + "step": 25318 + }, + { + "epoch": 0.84, + "grad_norm": 0.7433247566223145, + "learning_rate": 1.2414841290669277e-06, + "loss": 2.0663, + "step": 25319 + }, + { + "epoch": 0.84, + "grad_norm": 0.7417053580284119, + "learning_rate": 1.2409712470729275e-06, + "loss": 2.0872, + "step": 25320 + }, + { + "epoch": 0.84, + "grad_norm": 0.7357958555221558, + "learning_rate": 1.240458464033304e-06, + "loss": 1.9746, + "step": 25321 + }, + { + "epoch": 0.84, + "grad_norm": 0.765435516834259, + "learning_rate": 1.239945779953844e-06, + "loss": 2.0587, + "step": 25322 + }, + { + "epoch": 0.84, + "grad_norm": 0.7597655653953552, + "learning_rate": 1.2394331948403427e-06, + "loss": 2.1195, + "step": 25323 + }, + { + "epoch": 0.84, + "grad_norm": 0.7379902005195618, + "learning_rate": 1.2389207086985922e-06, + "loss": 1.9721, + "step": 25324 + }, + { + "epoch": 0.84, + "grad_norm": 0.7225934863090515, + "learning_rate": 1.2384083215343824e-06, + "loss": 2.0483, + "step": 25325 + }, + { + "epoch": 0.84, + "grad_norm": 0.7396114468574524, + "learning_rate": 1.2378960333534973e-06, + "loss": 2.0112, + "step": 25326 + }, + { + "epoch": 0.84, + "grad_norm": 0.7500832676887512, + "learning_rate": 1.2373838441617302e-06, + "loss": 2.0159, + "step": 25327 + }, + { + "epoch": 0.84, + "grad_norm": 0.7272298336029053, + "learning_rate": 1.2368717539648634e-06, + "loss": 2.0042, + "step": 25328 + }, + { + "epoch": 0.84, + "grad_norm": 0.7270601391792297, + "learning_rate": 1.236359762768683e-06, + "loss": 1.9794, + "step": 25329 + }, + { + "epoch": 0.84, + "grad_norm": 0.7376267910003662, + "learning_rate": 1.2358478705789768e-06, + "loss": 2.0539, + "step": 25330 + }, + { + "epoch": 0.84, + "grad_norm": 0.7854896187782288, + "learning_rate": 1.2353360774015245e-06, + "loss": 2.0913, + "step": 25331 + }, + { + "epoch": 0.84, + "grad_norm": 0.7512280941009521, + "learning_rate": 1.234824383242107e-06, + "loss": 2.1103, + "step": 25332 + }, + { + "epoch": 0.84, + "grad_norm": 0.7399800419807434, + "learning_rate": 1.2343127881065076e-06, + "loss": 2.0801, + "step": 25333 + }, + { + "epoch": 0.84, + "grad_norm": 0.7520300149917603, + "learning_rate": 1.2338012920005071e-06, + "loss": 2.118, + "step": 25334 + }, + { + "epoch": 0.84, + "grad_norm": 0.7485512495040894, + "learning_rate": 1.2332898949298788e-06, + "loss": 2.0594, + "step": 25335 + }, + { + "epoch": 0.84, + "grad_norm": 0.7656073570251465, + "learning_rate": 1.2327785969004036e-06, + "loss": 2.0, + "step": 25336 + }, + { + "epoch": 0.84, + "grad_norm": 0.7756001353263855, + "learning_rate": 1.2322673979178602e-06, + "loss": 2.1244, + "step": 25337 + }, + { + "epoch": 0.84, + "grad_norm": 0.7258834838867188, + "learning_rate": 1.2317562979880182e-06, + "loss": 2.0007, + "step": 25338 + }, + { + "epoch": 0.84, + "grad_norm": 0.7529205679893494, + "learning_rate": 1.2312452971166577e-06, + "loss": 2.0551, + "step": 25339 + }, + { + "epoch": 0.84, + "grad_norm": 0.764609158039093, + "learning_rate": 1.2307343953095485e-06, + "loss": 2.0322, + "step": 25340 + }, + { + "epoch": 0.84, + "grad_norm": 0.7463908195495605, + "learning_rate": 1.2302235925724614e-06, + "loss": 2.0443, + "step": 25341 + }, + { + "epoch": 0.84, + "grad_norm": 0.7505912184715271, + "learning_rate": 1.2297128889111686e-06, + "loss": 1.9558, + "step": 25342 + }, + { + "epoch": 0.84, + "grad_norm": 0.7852020263671875, + "learning_rate": 1.2292022843314432e-06, + "loss": 2.0434, + "step": 25343 + }, + { + "epoch": 0.84, + "grad_norm": 0.7296972274780273, + "learning_rate": 1.2286917788390463e-06, + "loss": 1.954, + "step": 25344 + }, + { + "epoch": 0.84, + "grad_norm": 0.7459531426429749, + "learning_rate": 1.2281813724397496e-06, + "loss": 2.0337, + "step": 25345 + }, + { + "epoch": 0.84, + "grad_norm": 0.741602897644043, + "learning_rate": 1.2276710651393199e-06, + "loss": 2.0515, + "step": 25346 + }, + { + "epoch": 0.84, + "grad_norm": 0.7596555352210999, + "learning_rate": 1.2271608569435222e-06, + "loss": 2.0496, + "step": 25347 + }, + { + "epoch": 0.84, + "grad_norm": 0.7494763731956482, + "learning_rate": 1.226650747858118e-06, + "loss": 2.046, + "step": 25348 + }, + { + "epoch": 0.84, + "grad_norm": 0.7505432367324829, + "learning_rate": 1.2261407378888735e-06, + "loss": 2.0402, + "step": 25349 + }, + { + "epoch": 0.84, + "grad_norm": 0.7442536354064941, + "learning_rate": 1.2256308270415473e-06, + "loss": 2.0134, + "step": 25350 + }, + { + "epoch": 0.84, + "grad_norm": 0.7627303600311279, + "learning_rate": 1.2251210153219007e-06, + "loss": 1.9723, + "step": 25351 + }, + { + "epoch": 0.84, + "grad_norm": 0.7572183609008789, + "learning_rate": 1.2246113027356977e-06, + "loss": 2.0238, + "step": 25352 + }, + { + "epoch": 0.84, + "grad_norm": 0.729246199131012, + "learning_rate": 1.2241016892886925e-06, + "loss": 2.0024, + "step": 25353 + }, + { + "epoch": 0.84, + "grad_norm": 0.7345984578132629, + "learning_rate": 1.223592174986641e-06, + "loss": 1.9236, + "step": 25354 + }, + { + "epoch": 0.84, + "grad_norm": 0.7105081081390381, + "learning_rate": 1.2230827598353045e-06, + "loss": 2.0069, + "step": 25355 + }, + { + "epoch": 0.84, + "grad_norm": 0.7124063372612, + "learning_rate": 1.2225734438404346e-06, + "loss": 2.0313, + "step": 25356 + }, + { + "epoch": 0.84, + "grad_norm": 0.770212709903717, + "learning_rate": 1.2220642270077843e-06, + "loss": 2.0581, + "step": 25357 + }, + { + "epoch": 0.84, + "grad_norm": 0.736091136932373, + "learning_rate": 1.2215551093431078e-06, + "loss": 2.0062, + "step": 25358 + }, + { + "epoch": 0.84, + "grad_norm": 0.7458770871162415, + "learning_rate": 1.2210460908521583e-06, + "loss": 2.0713, + "step": 25359 + }, + { + "epoch": 0.84, + "grad_norm": 0.7418363094329834, + "learning_rate": 1.2205371715406845e-06, + "loss": 2.0245, + "step": 25360 + }, + { + "epoch": 0.84, + "grad_norm": 0.7704051733016968, + "learning_rate": 1.2200283514144373e-06, + "loss": 1.9436, + "step": 25361 + }, + { + "epoch": 0.84, + "grad_norm": 0.745011568069458, + "learning_rate": 1.2195196304791646e-06, + "loss": 1.9866, + "step": 25362 + }, + { + "epoch": 0.84, + "grad_norm": 0.7637007236480713, + "learning_rate": 1.2190110087406115e-06, + "loss": 2.0355, + "step": 25363 + }, + { + "epoch": 0.84, + "grad_norm": 0.746093213558197, + "learning_rate": 1.2185024862045248e-06, + "loss": 2.0312, + "step": 25364 + }, + { + "epoch": 0.84, + "grad_norm": 0.740727961063385, + "learning_rate": 1.2179940628766563e-06, + "loss": 2.0646, + "step": 25365 + }, + { + "epoch": 0.84, + "grad_norm": 0.7488850355148315, + "learning_rate": 1.217485738762738e-06, + "loss": 1.9912, + "step": 25366 + }, + { + "epoch": 0.84, + "grad_norm": 0.7682657241821289, + "learning_rate": 1.2169775138685203e-06, + "loss": 2.063, + "step": 25367 + }, + { + "epoch": 0.84, + "grad_norm": 0.7529699802398682, + "learning_rate": 1.2164693881997446e-06, + "loss": 2.1372, + "step": 25368 + }, + { + "epoch": 0.84, + "grad_norm": 0.7560436129570007, + "learning_rate": 1.215961361762149e-06, + "loss": 2.1288, + "step": 25369 + }, + { + "epoch": 0.84, + "grad_norm": 0.7511946558952332, + "learning_rate": 1.2154534345614754e-06, + "loss": 1.9689, + "step": 25370 + }, + { + "epoch": 0.84, + "grad_norm": 0.7513819336891174, + "learning_rate": 1.2149456066034604e-06, + "loss": 2.0852, + "step": 25371 + }, + { + "epoch": 0.84, + "grad_norm": 0.7603201270103455, + "learning_rate": 1.2144378778938392e-06, + "loss": 1.9777, + "step": 25372 + }, + { + "epoch": 0.84, + "grad_norm": 0.7423480749130249, + "learning_rate": 1.2139302484383507e-06, + "loss": 2.1297, + "step": 25373 + }, + { + "epoch": 0.84, + "grad_norm": 0.7612413763999939, + "learning_rate": 1.2134227182427306e-06, + "loss": 2.0268, + "step": 25374 + }, + { + "epoch": 0.84, + "grad_norm": 0.7494702935218811, + "learning_rate": 1.2129152873127114e-06, + "loss": 2.0919, + "step": 25375 + }, + { + "epoch": 0.84, + "grad_norm": 0.7470599412918091, + "learning_rate": 1.2124079556540236e-06, + "loss": 2.0686, + "step": 25376 + }, + { + "epoch": 0.84, + "grad_norm": 0.7497073411941528, + "learning_rate": 1.211900723272401e-06, + "loss": 2.0722, + "step": 25377 + }, + { + "epoch": 0.84, + "grad_norm": 0.7279725670814514, + "learning_rate": 1.2113935901735774e-06, + "loss": 2.0114, + "step": 25378 + }, + { + "epoch": 0.84, + "grad_norm": 0.7560218572616577, + "learning_rate": 1.2108865563632743e-06, + "loss": 2.0566, + "step": 25379 + }, + { + "epoch": 0.84, + "grad_norm": 0.7198745012283325, + "learning_rate": 1.2103796218472241e-06, + "loss": 1.9964, + "step": 25380 + }, + { + "epoch": 0.84, + "grad_norm": 0.7433822154998779, + "learning_rate": 1.2098727866311554e-06, + "loss": 2.0214, + "step": 25381 + }, + { + "epoch": 0.84, + "grad_norm": 0.7389475703239441, + "learning_rate": 1.2093660507207904e-06, + "loss": 2.0621, + "step": 25382 + }, + { + "epoch": 0.84, + "grad_norm": 0.7363719344139099, + "learning_rate": 1.208859414121859e-06, + "loss": 2.0229, + "step": 25383 + }, + { + "epoch": 0.84, + "grad_norm": 0.7337396144866943, + "learning_rate": 1.208352876840081e-06, + "loss": 1.9386, + "step": 25384 + }, + { + "epoch": 0.84, + "grad_norm": 0.7351333498954773, + "learning_rate": 1.2078464388811773e-06, + "loss": 2.0608, + "step": 25385 + }, + { + "epoch": 0.84, + "grad_norm": 0.7564588189125061, + "learning_rate": 1.2073401002508722e-06, + "loss": 2.041, + "step": 25386 + }, + { + "epoch": 0.84, + "grad_norm": 0.7403412461280823, + "learning_rate": 1.206833860954888e-06, + "loss": 2.0892, + "step": 25387 + }, + { + "epoch": 0.84, + "grad_norm": 0.7586153745651245, + "learning_rate": 1.206327720998941e-06, + "loss": 2.0587, + "step": 25388 + }, + { + "epoch": 0.84, + "grad_norm": 0.7551866769790649, + "learning_rate": 1.205821680388749e-06, + "loss": 2.1187, + "step": 25389 + }, + { + "epoch": 0.84, + "grad_norm": 0.7998202443122864, + "learning_rate": 1.2053157391300307e-06, + "loss": 2.0948, + "step": 25390 + }, + { + "epoch": 0.84, + "grad_norm": 0.7638534903526306, + "learning_rate": 1.2048098972284993e-06, + "loss": 2.0874, + "step": 25391 + }, + { + "epoch": 0.84, + "grad_norm": 0.7591213583946228, + "learning_rate": 1.2043041546898726e-06, + "loss": 2.0091, + "step": 25392 + }, + { + "epoch": 0.84, + "grad_norm": 0.7547223567962646, + "learning_rate": 1.2037985115198614e-06, + "loss": 2.0146, + "step": 25393 + }, + { + "epoch": 0.84, + "grad_norm": 0.7368488907814026, + "learning_rate": 1.2032929677241812e-06, + "loss": 2.0462, + "step": 25394 + }, + { + "epoch": 0.84, + "grad_norm": 0.7529860734939575, + "learning_rate": 1.2027875233085395e-06, + "loss": 2.0089, + "step": 25395 + }, + { + "epoch": 0.84, + "grad_norm": 0.7599092125892639, + "learning_rate": 1.2022821782786508e-06, + "loss": 2.0013, + "step": 25396 + }, + { + "epoch": 0.84, + "grad_norm": 0.745557427406311, + "learning_rate": 1.201776932640223e-06, + "loss": 2.1181, + "step": 25397 + }, + { + "epoch": 0.84, + "grad_norm": 0.7367281317710876, + "learning_rate": 1.201271786398961e-06, + "loss": 2.0721, + "step": 25398 + }, + { + "epoch": 0.85, + "grad_norm": 0.7321810126304626, + "learning_rate": 1.2007667395605727e-06, + "loss": 2.1054, + "step": 25399 + }, + { + "epoch": 0.85, + "grad_norm": 0.750800609588623, + "learning_rate": 1.200261792130767e-06, + "loss": 2.0265, + "step": 25400 + }, + { + "epoch": 0.85, + "grad_norm": 0.7458320260047913, + "learning_rate": 1.199756944115248e-06, + "loss": 2.0403, + "step": 25401 + }, + { + "epoch": 0.85, + "grad_norm": 0.7594524025917053, + "learning_rate": 1.1992521955197134e-06, + "loss": 2.1205, + "step": 25402 + }, + { + "epoch": 0.85, + "grad_norm": 0.7131485342979431, + "learning_rate": 1.1987475463498733e-06, + "loss": 2.0077, + "step": 25403 + }, + { + "epoch": 0.85, + "grad_norm": 0.7324795126914978, + "learning_rate": 1.1982429966114228e-06, + "loss": 2.0722, + "step": 25404 + }, + { + "epoch": 0.85, + "grad_norm": 0.7647042274475098, + "learning_rate": 1.1977385463100666e-06, + "loss": 2.0393, + "step": 25405 + }, + { + "epoch": 0.85, + "grad_norm": 0.7673027515411377, + "learning_rate": 1.197234195451502e-06, + "loss": 2.0297, + "step": 25406 + }, + { + "epoch": 0.85, + "grad_norm": 0.753304660320282, + "learning_rate": 1.1967299440414249e-06, + "loss": 2.0804, + "step": 25407 + }, + { + "epoch": 0.85, + "grad_norm": 0.7348125576972961, + "learning_rate": 1.1962257920855324e-06, + "loss": 2.0497, + "step": 25408 + }, + { + "epoch": 0.85, + "grad_norm": 0.7185177803039551, + "learning_rate": 1.1957217395895237e-06, + "loss": 2.0238, + "step": 25409 + }, + { + "epoch": 0.85, + "grad_norm": 0.7549484372138977, + "learning_rate": 1.1952177865590919e-06, + "loss": 2.0855, + "step": 25410 + }, + { + "epoch": 0.85, + "grad_norm": 0.7673759460449219, + "learning_rate": 1.194713932999927e-06, + "loss": 2.0337, + "step": 25411 + }, + { + "epoch": 0.85, + "grad_norm": 0.7153233885765076, + "learning_rate": 1.1942101789177253e-06, + "loss": 2.0342, + "step": 25412 + }, + { + "epoch": 0.85, + "grad_norm": 0.7404223084449768, + "learning_rate": 1.1937065243181744e-06, + "loss": 2.0713, + "step": 25413 + }, + { + "epoch": 0.85, + "grad_norm": 0.7522891759872437, + "learning_rate": 1.193202969206969e-06, + "loss": 2.0454, + "step": 25414 + }, + { + "epoch": 0.85, + "grad_norm": 0.7712404131889343, + "learning_rate": 1.1926995135897923e-06, + "loss": 2.0972, + "step": 25415 + }, + { + "epoch": 0.85, + "grad_norm": 0.7347492575645447, + "learning_rate": 1.1921961574723373e-06, + "loss": 2.1077, + "step": 25416 + }, + { + "epoch": 0.85, + "grad_norm": 0.7498459815979004, + "learning_rate": 1.1916929008602863e-06, + "loss": 1.9918, + "step": 25417 + }, + { + "epoch": 0.85, + "grad_norm": 0.7276119589805603, + "learning_rate": 1.1911897437593279e-06, + "loss": 2.0384, + "step": 25418 + }, + { + "epoch": 0.85, + "grad_norm": 0.7372073531150818, + "learning_rate": 1.1906866861751466e-06, + "loss": 2.0247, + "step": 25419 + }, + { + "epoch": 0.85, + "grad_norm": 0.7364742755889893, + "learning_rate": 1.190183728113421e-06, + "loss": 1.9952, + "step": 25420 + }, + { + "epoch": 0.85, + "grad_norm": 0.7318270206451416, + "learning_rate": 1.1896808695798368e-06, + "loss": 2.0588, + "step": 25421 + }, + { + "epoch": 0.85, + "grad_norm": 0.7443020343780518, + "learning_rate": 1.1891781105800782e-06, + "loss": 1.9573, + "step": 25422 + }, + { + "epoch": 0.85, + "grad_norm": 0.7799516916275024, + "learning_rate": 1.1886754511198206e-06, + "loss": 2.0403, + "step": 25423 + }, + { + "epoch": 0.85, + "grad_norm": 0.7437310218811035, + "learning_rate": 1.188172891204742e-06, + "loss": 2.1055, + "step": 25424 + }, + { + "epoch": 0.85, + "grad_norm": 0.7208044528961182, + "learning_rate": 1.1876704308405228e-06, + "loss": 2.0234, + "step": 25425 + }, + { + "epoch": 0.85, + "grad_norm": 0.7307316660881042, + "learning_rate": 1.187168070032838e-06, + "loss": 2.0262, + "step": 25426 + }, + { + "epoch": 0.85, + "grad_norm": 0.7535862922668457, + "learning_rate": 1.186665808787365e-06, + "loss": 2.0262, + "step": 25427 + }, + { + "epoch": 0.85, + "grad_norm": 0.7238076329231262, + "learning_rate": 1.186163647109776e-06, + "loss": 1.9986, + "step": 25428 + }, + { + "epoch": 0.85, + "grad_norm": 0.7503595352172852, + "learning_rate": 1.185661585005744e-06, + "loss": 2.0501, + "step": 25429 + }, + { + "epoch": 0.85, + "grad_norm": 0.7441965937614441, + "learning_rate": 1.1851596224809404e-06, + "loss": 1.9945, + "step": 25430 + }, + { + "epoch": 0.85, + "grad_norm": 0.7329748272895813, + "learning_rate": 1.1846577595410402e-06, + "loss": 2.0297, + "step": 25431 + }, + { + "epoch": 0.85, + "grad_norm": 0.7497860789299011, + "learning_rate": 1.1841559961917103e-06, + "loss": 2.0665, + "step": 25432 + }, + { + "epoch": 0.85, + "grad_norm": 0.7643478512763977, + "learning_rate": 1.1836543324386162e-06, + "loss": 2.09, + "step": 25433 + }, + { + "epoch": 0.85, + "grad_norm": 0.7383556365966797, + "learning_rate": 1.183152768287432e-06, + "loss": 2.0804, + "step": 25434 + }, + { + "epoch": 0.85, + "grad_norm": 0.760520339012146, + "learning_rate": 1.1826513037438182e-06, + "loss": 2.0188, + "step": 25435 + }, + { + "epoch": 0.85, + "grad_norm": 0.7337445020675659, + "learning_rate": 1.1821499388134449e-06, + "loss": 2.0435, + "step": 25436 + }, + { + "epoch": 0.85, + "grad_norm": 0.7420154809951782, + "learning_rate": 1.1816486735019705e-06, + "loss": 2.0938, + "step": 25437 + }, + { + "epoch": 0.85, + "grad_norm": 0.7261763215065002, + "learning_rate": 1.1811475078150647e-06, + "loss": 2.0524, + "step": 25438 + }, + { + "epoch": 0.85, + "grad_norm": 0.7407971024513245, + "learning_rate": 1.1806464417583829e-06, + "loss": 2.0171, + "step": 25439 + }, + { + "epoch": 0.85, + "grad_norm": 0.7220339775085449, + "learning_rate": 1.1801454753375918e-06, + "loss": 2.0709, + "step": 25440 + }, + { + "epoch": 0.85, + "grad_norm": 0.7758349776268005, + "learning_rate": 1.179644608558348e-06, + "loss": 2.0864, + "step": 25441 + }, + { + "epoch": 0.85, + "grad_norm": 0.747570812702179, + "learning_rate": 1.1791438414263078e-06, + "loss": 1.9923, + "step": 25442 + }, + { + "epoch": 0.85, + "grad_norm": 0.7489310503005981, + "learning_rate": 1.1786431739471315e-06, + "loss": 1.9762, + "step": 25443 + }, + { + "epoch": 0.85, + "grad_norm": 0.7479124665260315, + "learning_rate": 1.1781426061264766e-06, + "loss": 2.0005, + "step": 25444 + }, + { + "epoch": 0.85, + "grad_norm": 0.78728848695755, + "learning_rate": 1.1776421379699965e-06, + "loss": 2.0808, + "step": 25445 + }, + { + "epoch": 0.85, + "grad_norm": 0.7510107159614563, + "learning_rate": 1.1771417694833432e-06, + "loss": 2.1014, + "step": 25446 + }, + { + "epoch": 0.85, + "grad_norm": 0.7366072535514832, + "learning_rate": 1.1766415006721732e-06, + "loss": 2.0393, + "step": 25447 + }, + { + "epoch": 0.85, + "grad_norm": 0.7826735973358154, + "learning_rate": 1.1761413315421343e-06, + "loss": 2.1219, + "step": 25448 + }, + { + "epoch": 0.85, + "grad_norm": 0.7491790056228638, + "learning_rate": 1.1756412620988822e-06, + "loss": 2.0614, + "step": 25449 + }, + { + "epoch": 0.85, + "grad_norm": 0.7468246817588806, + "learning_rate": 1.175141292348062e-06, + "loss": 2.0325, + "step": 25450 + }, + { + "epoch": 0.85, + "grad_norm": 0.7504499554634094, + "learning_rate": 1.1746414222953228e-06, + "loss": 2.073, + "step": 25451 + }, + { + "epoch": 0.85, + "grad_norm": 0.7411946654319763, + "learning_rate": 1.1741416519463123e-06, + "loss": 2.0335, + "step": 25452 + }, + { + "epoch": 0.85, + "grad_norm": 0.7617331743240356, + "learning_rate": 1.173641981306679e-06, + "loss": 2.0529, + "step": 25453 + }, + { + "epoch": 0.85, + "grad_norm": 0.7682652473449707, + "learning_rate": 1.1731424103820666e-06, + "loss": 2.1454, + "step": 25454 + }, + { + "epoch": 0.85, + "grad_norm": 0.7573553323745728, + "learning_rate": 1.1726429391781158e-06, + "loss": 2.0809, + "step": 25455 + }, + { + "epoch": 0.85, + "grad_norm": 0.7370169162750244, + "learning_rate": 1.1721435677004733e-06, + "loss": 2.0488, + "step": 25456 + }, + { + "epoch": 0.85, + "grad_norm": 0.758801281452179, + "learning_rate": 1.171644295954777e-06, + "loss": 2.0894, + "step": 25457 + }, + { + "epoch": 0.85, + "grad_norm": 0.7704338431358337, + "learning_rate": 1.171145123946672e-06, + "loss": 2.0871, + "step": 25458 + }, + { + "epoch": 0.85, + "grad_norm": 0.7466415762901306, + "learning_rate": 1.170646051681793e-06, + "loss": 2.0652, + "step": 25459 + }, + { + "epoch": 0.85, + "grad_norm": 0.7362510561943054, + "learning_rate": 1.1701470791657822e-06, + "loss": 2.0318, + "step": 25460 + }, + { + "epoch": 0.85, + "grad_norm": 0.7413967847824097, + "learning_rate": 1.1696482064042735e-06, + "loss": 1.9645, + "step": 25461 + }, + { + "epoch": 0.85, + "grad_norm": 0.7412198185920715, + "learning_rate": 1.1691494334029052e-06, + "loss": 2.0693, + "step": 25462 + }, + { + "epoch": 0.85, + "grad_norm": 0.7685073018074036, + "learning_rate": 1.1686507601673125e-06, + "loss": 2.0323, + "step": 25463 + }, + { + "epoch": 0.85, + "grad_norm": 0.7570863366127014, + "learning_rate": 1.1681521867031253e-06, + "loss": 2.0706, + "step": 25464 + }, + { + "epoch": 0.85, + "grad_norm": 0.7335966229438782, + "learning_rate": 1.1676537130159782e-06, + "loss": 2.1258, + "step": 25465 + }, + { + "epoch": 0.85, + "grad_norm": 0.7508025765419006, + "learning_rate": 1.1671553391115054e-06, + "loss": 2.0174, + "step": 25466 + }, + { + "epoch": 0.85, + "grad_norm": 0.7532062530517578, + "learning_rate": 1.1666570649953358e-06, + "loss": 2.09, + "step": 25467 + }, + { + "epoch": 0.85, + "grad_norm": 0.7381278276443481, + "learning_rate": 1.1661588906730946e-06, + "loss": 2.012, + "step": 25468 + }, + { + "epoch": 0.85, + "grad_norm": 0.8015879392623901, + "learning_rate": 1.1656608161504158e-06, + "loss": 2.0267, + "step": 25469 + }, + { + "epoch": 0.85, + "grad_norm": 0.7692397832870483, + "learning_rate": 1.165162841432922e-06, + "loss": 2.0398, + "step": 25470 + }, + { + "epoch": 0.85, + "grad_norm": 0.7887807488441467, + "learning_rate": 1.164664966526242e-06, + "loss": 2.0039, + "step": 25471 + }, + { + "epoch": 0.85, + "grad_norm": 0.7189185619354248, + "learning_rate": 1.1641671914359997e-06, + "loss": 1.9984, + "step": 25472 + }, + { + "epoch": 0.85, + "grad_norm": 0.7554293274879456, + "learning_rate": 1.1636695161678158e-06, + "loss": 2.0581, + "step": 25473 + }, + { + "epoch": 0.85, + "grad_norm": 0.7260330319404602, + "learning_rate": 1.1631719407273156e-06, + "loss": 2.037, + "step": 25474 + }, + { + "epoch": 0.85, + "grad_norm": 0.7498303651809692, + "learning_rate": 1.1626744651201217e-06, + "loss": 2.0873, + "step": 25475 + }, + { + "epoch": 0.85, + "grad_norm": 0.7247176766395569, + "learning_rate": 1.1621770893518525e-06, + "loss": 2.0212, + "step": 25476 + }, + { + "epoch": 0.85, + "grad_norm": 0.7451639771461487, + "learning_rate": 1.161679813428125e-06, + "loss": 2.0387, + "step": 25477 + }, + { + "epoch": 0.85, + "grad_norm": 0.7324416637420654, + "learning_rate": 1.1611826373545587e-06, + "loss": 2.0459, + "step": 25478 + }, + { + "epoch": 0.85, + "grad_norm": 0.7474309206008911, + "learning_rate": 1.160685561136774e-06, + "loss": 2.0287, + "step": 25479 + }, + { + "epoch": 0.85, + "grad_norm": 0.7723796367645264, + "learning_rate": 1.160188584780383e-06, + "loss": 2.0064, + "step": 25480 + }, + { + "epoch": 0.85, + "grad_norm": 0.7569363713264465, + "learning_rate": 1.1596917082909987e-06, + "loss": 2.0708, + "step": 25481 + }, + { + "epoch": 0.85, + "grad_norm": 0.759850800037384, + "learning_rate": 1.159194931674238e-06, + "loss": 2.0356, + "step": 25482 + }, + { + "epoch": 0.85, + "grad_norm": 0.7253577709197998, + "learning_rate": 1.1586982549357106e-06, + "loss": 2.0503, + "step": 25483 + }, + { + "epoch": 0.85, + "grad_norm": 0.7198708057403564, + "learning_rate": 1.158201678081028e-06, + "loss": 1.9566, + "step": 25484 + }, + { + "epoch": 0.85, + "grad_norm": 0.7418367266654968, + "learning_rate": 1.1577052011158064e-06, + "loss": 2.0321, + "step": 25485 + }, + { + "epoch": 0.85, + "grad_norm": 0.7481715083122253, + "learning_rate": 1.1572088240456436e-06, + "loss": 2.1149, + "step": 25486 + }, + { + "epoch": 0.85, + "grad_norm": 0.76832115650177, + "learning_rate": 1.1567125468761542e-06, + "loss": 2.0518, + "step": 25487 + }, + { + "epoch": 0.85, + "grad_norm": 0.732866644859314, + "learning_rate": 1.1562163696129459e-06, + "loss": 1.9125, + "step": 25488 + }, + { + "epoch": 0.85, + "grad_norm": 0.7413383722305298, + "learning_rate": 1.1557202922616217e-06, + "loss": 2.0417, + "step": 25489 + }, + { + "epoch": 0.85, + "grad_norm": 0.7307530045509338, + "learning_rate": 1.1552243148277842e-06, + "loss": 2.0541, + "step": 25490 + }, + { + "epoch": 0.85, + "grad_norm": 0.7439472675323486, + "learning_rate": 1.154728437317041e-06, + "loss": 2.0177, + "step": 25491 + }, + { + "epoch": 0.85, + "grad_norm": 0.7621258497238159, + "learning_rate": 1.1542326597349896e-06, + "loss": 2.0715, + "step": 25492 + }, + { + "epoch": 0.85, + "grad_norm": 0.7660601735115051, + "learning_rate": 1.1537369820872367e-06, + "loss": 1.9851, + "step": 25493 + }, + { + "epoch": 0.85, + "grad_norm": 0.7690000534057617, + "learning_rate": 1.153241404379376e-06, + "loss": 2.058, + "step": 25494 + }, + { + "epoch": 0.85, + "grad_norm": 0.7577580809593201, + "learning_rate": 1.1527459266170116e-06, + "loss": 2.0319, + "step": 25495 + }, + { + "epoch": 0.85, + "grad_norm": 0.7869577407836914, + "learning_rate": 1.1522505488057366e-06, + "loss": 2.0772, + "step": 25496 + }, + { + "epoch": 0.85, + "grad_norm": 0.7485555410385132, + "learning_rate": 1.1517552709511514e-06, + "loss": 2.1006, + "step": 25497 + }, + { + "epoch": 0.85, + "grad_norm": 0.7327066659927368, + "learning_rate": 1.1512600930588492e-06, + "loss": 2.1332, + "step": 25498 + }, + { + "epoch": 0.85, + "grad_norm": 0.7441847324371338, + "learning_rate": 1.150765015134424e-06, + "loss": 2.0125, + "step": 25499 + }, + { + "epoch": 0.85, + "grad_norm": 0.7413938045501709, + "learning_rate": 1.1502700371834685e-06, + "loss": 2.0255, + "step": 25500 + }, + { + "epoch": 0.85, + "grad_norm": 0.7364518642425537, + "learning_rate": 1.149775159211578e-06, + "loss": 2.0028, + "step": 25501 + }, + { + "epoch": 0.85, + "grad_norm": 0.7678237557411194, + "learning_rate": 1.1492803812243403e-06, + "loss": 2.0314, + "step": 25502 + }, + { + "epoch": 0.85, + "grad_norm": 0.8155337572097778, + "learning_rate": 1.148785703227344e-06, + "loss": 2.0438, + "step": 25503 + }, + { + "epoch": 0.85, + "grad_norm": 0.7294251322746277, + "learning_rate": 1.148291125226182e-06, + "loss": 2.0447, + "step": 25504 + }, + { + "epoch": 0.85, + "grad_norm": 0.7276601791381836, + "learning_rate": 1.1477966472264367e-06, + "loss": 2.0183, + "step": 25505 + }, + { + "epoch": 0.85, + "grad_norm": 0.7848840951919556, + "learning_rate": 1.1473022692336977e-06, + "loss": 2.0941, + "step": 25506 + }, + { + "epoch": 0.85, + "grad_norm": 0.7748706936836243, + "learning_rate": 1.1468079912535534e-06, + "loss": 2.0363, + "step": 25507 + }, + { + "epoch": 0.85, + "grad_norm": 0.7341156601905823, + "learning_rate": 1.1463138132915796e-06, + "loss": 2.0216, + "step": 25508 + }, + { + "epoch": 0.85, + "grad_norm": 0.73978590965271, + "learning_rate": 1.145819735353364e-06, + "loss": 1.9652, + "step": 25509 + }, + { + "epoch": 0.85, + "grad_norm": 0.728164553642273, + "learning_rate": 1.1453257574444899e-06, + "loss": 1.9596, + "step": 25510 + }, + { + "epoch": 0.85, + "grad_norm": 0.7132040858268738, + "learning_rate": 1.1448318795705349e-06, + "loss": 1.9824, + "step": 25511 + }, + { + "epoch": 0.85, + "grad_norm": 0.7461531758308411, + "learning_rate": 1.1443381017370792e-06, + "loss": 2.0458, + "step": 25512 + }, + { + "epoch": 0.85, + "grad_norm": 0.7205559611320496, + "learning_rate": 1.1438444239497027e-06, + "loss": 2.0345, + "step": 25513 + }, + { + "epoch": 0.85, + "grad_norm": 0.7151537537574768, + "learning_rate": 1.1433508462139797e-06, + "loss": 2.0313, + "step": 25514 + }, + { + "epoch": 0.85, + "grad_norm": 0.731884241104126, + "learning_rate": 1.1428573685354894e-06, + "loss": 2.0558, + "step": 25515 + }, + { + "epoch": 0.85, + "grad_norm": 0.7399733066558838, + "learning_rate": 1.142363990919807e-06, + "loss": 2.0367, + "step": 25516 + }, + { + "epoch": 0.85, + "grad_norm": 0.7168453931808472, + "learning_rate": 1.141870713372505e-06, + "loss": 2.0329, + "step": 25517 + }, + { + "epoch": 0.85, + "grad_norm": 0.7173789143562317, + "learning_rate": 1.1413775358991542e-06, + "loss": 2.0684, + "step": 25518 + }, + { + "epoch": 0.85, + "grad_norm": 0.7331874966621399, + "learning_rate": 1.1408844585053302e-06, + "loss": 2.0618, + "step": 25519 + }, + { + "epoch": 0.85, + "grad_norm": 0.7190109491348267, + "learning_rate": 1.140391481196602e-06, + "loss": 2.0154, + "step": 25520 + }, + { + "epoch": 0.85, + "grad_norm": 0.7759547829627991, + "learning_rate": 1.1398986039785376e-06, + "loss": 2.0494, + "step": 25521 + }, + { + "epoch": 0.85, + "grad_norm": 0.757175862789154, + "learning_rate": 1.1394058268567054e-06, + "loss": 2.0845, + "step": 25522 + }, + { + "epoch": 0.85, + "grad_norm": 0.7645297050476074, + "learning_rate": 1.1389131498366745e-06, + "loss": 2.0331, + "step": 25523 + }, + { + "epoch": 0.85, + "grad_norm": 0.7445845007896423, + "learning_rate": 1.1384205729240105e-06, + "loss": 2.0453, + "step": 25524 + }, + { + "epoch": 0.85, + "grad_norm": 0.7423800826072693, + "learning_rate": 1.1379280961242756e-06, + "loss": 2.0441, + "step": 25525 + }, + { + "epoch": 0.85, + "grad_norm": 0.7272167205810547, + "learning_rate": 1.1374357194430374e-06, + "loss": 2.0796, + "step": 25526 + }, + { + "epoch": 0.85, + "grad_norm": 0.7231281995773315, + "learning_rate": 1.1369434428858539e-06, + "loss": 1.9872, + "step": 25527 + }, + { + "epoch": 0.85, + "grad_norm": 0.7214996814727783, + "learning_rate": 1.1364512664582894e-06, + "loss": 2.112, + "step": 25528 + }, + { + "epoch": 0.85, + "grad_norm": 0.7728033661842346, + "learning_rate": 1.1359591901659083e-06, + "loss": 2.1227, + "step": 25529 + }, + { + "epoch": 0.85, + "grad_norm": 0.7460951805114746, + "learning_rate": 1.135467214014262e-06, + "loss": 2.0895, + "step": 25530 + }, + { + "epoch": 0.85, + "grad_norm": 0.7359938025474548, + "learning_rate": 1.1349753380089102e-06, + "loss": 2.0748, + "step": 25531 + }, + { + "epoch": 0.85, + "grad_norm": 0.7507954835891724, + "learning_rate": 1.134483562155415e-06, + "loss": 2.0368, + "step": 25532 + }, + { + "epoch": 0.85, + "grad_norm": 0.7548166513442993, + "learning_rate": 1.133991886459328e-06, + "loss": 2.0295, + "step": 25533 + }, + { + "epoch": 0.85, + "grad_norm": 0.7229151725769043, + "learning_rate": 1.1335003109262033e-06, + "loss": 2.0778, + "step": 25534 + }, + { + "epoch": 0.85, + "grad_norm": 0.774015486240387, + "learning_rate": 1.1330088355615976e-06, + "loss": 2.0328, + "step": 25535 + }, + { + "epoch": 0.85, + "grad_norm": 0.7786744236946106, + "learning_rate": 1.13251746037106e-06, + "loss": 1.996, + "step": 25536 + }, + { + "epoch": 0.85, + "grad_norm": 0.7786620855331421, + "learning_rate": 1.1320261853601422e-06, + "loss": 2.0005, + "step": 25537 + }, + { + "epoch": 0.85, + "grad_norm": 0.7331827878952026, + "learning_rate": 1.131535010534398e-06, + "loss": 2.0167, + "step": 25538 + }, + { + "epoch": 0.85, + "grad_norm": 0.7627809047698975, + "learning_rate": 1.1310439358993742e-06, + "loss": 2.017, + "step": 25539 + }, + { + "epoch": 0.85, + "grad_norm": 0.7448964715003967, + "learning_rate": 1.1305529614606158e-06, + "loss": 2.0677, + "step": 25540 + }, + { + "epoch": 0.85, + "grad_norm": 0.756600558757782, + "learning_rate": 1.1300620872236745e-06, + "loss": 2.064, + "step": 25541 + }, + { + "epoch": 0.85, + "grad_norm": 0.7472687363624573, + "learning_rate": 1.1295713131940933e-06, + "loss": 2.0349, + "step": 25542 + }, + { + "epoch": 0.85, + "grad_norm": 0.7513116002082825, + "learning_rate": 1.1290806393774145e-06, + "loss": 2.0647, + "step": 25543 + }, + { + "epoch": 0.85, + "grad_norm": 0.7636187672615051, + "learning_rate": 1.1285900657791836e-06, + "loss": 2.1422, + "step": 25544 + }, + { + "epoch": 0.85, + "grad_norm": 0.7481735348701477, + "learning_rate": 1.1280995924049453e-06, + "loss": 2.0318, + "step": 25545 + }, + { + "epoch": 0.85, + "grad_norm": 0.7470899224281311, + "learning_rate": 1.1276092192602362e-06, + "loss": 2.1109, + "step": 25546 + }, + { + "epoch": 0.85, + "grad_norm": 0.7598119974136353, + "learning_rate": 1.1271189463506006e-06, + "loss": 1.9597, + "step": 25547 + }, + { + "epoch": 0.85, + "grad_norm": 0.7258020043373108, + "learning_rate": 1.126628773681575e-06, + "loss": 2.0004, + "step": 25548 + }, + { + "epoch": 0.85, + "grad_norm": 0.7323870658874512, + "learning_rate": 1.1261387012586955e-06, + "loss": 2.0275, + "step": 25549 + }, + { + "epoch": 0.85, + "grad_norm": 0.7854240536689758, + "learning_rate": 1.1256487290874995e-06, + "loss": 2.0649, + "step": 25550 + }, + { + "epoch": 0.85, + "grad_norm": 0.7275902032852173, + "learning_rate": 1.1251588571735284e-06, + "loss": 1.993, + "step": 25551 + }, + { + "epoch": 0.85, + "grad_norm": 0.773851215839386, + "learning_rate": 1.1246690855223063e-06, + "loss": 1.9964, + "step": 25552 + }, + { + "epoch": 0.85, + "grad_norm": 0.7523597478866577, + "learning_rate": 1.1241794141393725e-06, + "loss": 2.0783, + "step": 25553 + }, + { + "epoch": 0.85, + "grad_norm": 0.7775759100914001, + "learning_rate": 1.123689843030259e-06, + "loss": 2.0098, + "step": 25554 + }, + { + "epoch": 0.85, + "grad_norm": 0.7536785006523132, + "learning_rate": 1.123200372200497e-06, + "loss": 2.1355, + "step": 25555 + }, + { + "epoch": 0.85, + "grad_norm": 0.7498986721038818, + "learning_rate": 1.122711001655611e-06, + "loss": 2.0739, + "step": 25556 + }, + { + "epoch": 0.85, + "grad_norm": 0.7649057507514954, + "learning_rate": 1.1222217314011364e-06, + "loss": 1.9741, + "step": 25557 + }, + { + "epoch": 0.85, + "grad_norm": 0.7573047280311584, + "learning_rate": 1.1217325614425966e-06, + "loss": 2.166, + "step": 25558 + }, + { + "epoch": 0.85, + "grad_norm": 0.7349489331245422, + "learning_rate": 1.1212434917855175e-06, + "loss": 2.079, + "step": 25559 + }, + { + "epoch": 0.85, + "grad_norm": 0.7193424701690674, + "learning_rate": 1.1207545224354288e-06, + "loss": 1.9917, + "step": 25560 + }, + { + "epoch": 0.85, + "grad_norm": 0.7615953683853149, + "learning_rate": 1.1202656533978517e-06, + "loss": 2.0525, + "step": 25561 + }, + { + "epoch": 0.85, + "grad_norm": 0.7715849280357361, + "learning_rate": 1.1197768846783074e-06, + "loss": 2.1047, + "step": 25562 + }, + { + "epoch": 0.85, + "grad_norm": 0.7319328784942627, + "learning_rate": 1.1192882162823193e-06, + "loss": 2.0586, + "step": 25563 + }, + { + "epoch": 0.85, + "grad_norm": 0.7264840602874756, + "learning_rate": 1.118799648215413e-06, + "loss": 2.0003, + "step": 25564 + }, + { + "epoch": 0.85, + "grad_norm": 0.7499474883079529, + "learning_rate": 1.1183111804830994e-06, + "loss": 2.0188, + "step": 25565 + }, + { + "epoch": 0.85, + "grad_norm": 0.7261382341384888, + "learning_rate": 1.1178228130908997e-06, + "loss": 1.9712, + "step": 25566 + }, + { + "epoch": 0.85, + "grad_norm": 0.73532634973526, + "learning_rate": 1.1173345460443352e-06, + "loss": 2.0289, + "step": 25567 + }, + { + "epoch": 0.85, + "grad_norm": 0.7755469679832458, + "learning_rate": 1.116846379348917e-06, + "loss": 2.0226, + "step": 25568 + }, + { + "epoch": 0.85, + "grad_norm": 0.7395761609077454, + "learning_rate": 1.116358313010164e-06, + "loss": 2.0151, + "step": 25569 + }, + { + "epoch": 0.85, + "grad_norm": 0.7293194532394409, + "learning_rate": 1.1158703470335896e-06, + "loss": 1.9611, + "step": 25570 + }, + { + "epoch": 0.85, + "grad_norm": 0.7172329425811768, + "learning_rate": 1.1153824814247028e-06, + "loss": 1.9672, + "step": 25571 + }, + { + "epoch": 0.85, + "grad_norm": 0.7232022285461426, + "learning_rate": 1.1148947161890177e-06, + "loss": 2.0181, + "step": 25572 + }, + { + "epoch": 0.85, + "grad_norm": 0.7606463432312012, + "learning_rate": 1.1144070513320483e-06, + "loss": 1.986, + "step": 25573 + }, + { + "epoch": 0.85, + "grad_norm": 0.7412828803062439, + "learning_rate": 1.1139194868592973e-06, + "loss": 2.0179, + "step": 25574 + }, + { + "epoch": 0.85, + "grad_norm": 0.7248949408531189, + "learning_rate": 1.1134320227762762e-06, + "loss": 2.0345, + "step": 25575 + }, + { + "epoch": 0.85, + "grad_norm": 0.7832808494567871, + "learning_rate": 1.112944659088494e-06, + "loss": 2.0477, + "step": 25576 + }, + { + "epoch": 0.85, + "grad_norm": 0.7331267595291138, + "learning_rate": 1.112457395801455e-06, + "loss": 2.0571, + "step": 25577 + }, + { + "epoch": 0.85, + "grad_norm": 0.7391887307167053, + "learning_rate": 1.1119702329206616e-06, + "loss": 2.0632, + "step": 25578 + }, + { + "epoch": 0.85, + "grad_norm": 0.7343783378601074, + "learning_rate": 1.1114831704516193e-06, + "loss": 2.0278, + "step": 25579 + }, + { + "epoch": 0.85, + "grad_norm": 0.7450145483016968, + "learning_rate": 1.1109962083998326e-06, + "loss": 2.0649, + "step": 25580 + }, + { + "epoch": 0.85, + "grad_norm": 0.7479609847068787, + "learning_rate": 1.1105093467707994e-06, + "loss": 2.0122, + "step": 25581 + }, + { + "epoch": 0.85, + "grad_norm": 0.7185435891151428, + "learning_rate": 1.1100225855700242e-06, + "loss": 2.0279, + "step": 25582 + }, + { + "epoch": 0.85, + "grad_norm": 0.7416254281997681, + "learning_rate": 1.1095359248030046e-06, + "loss": 2.0537, + "step": 25583 + }, + { + "epoch": 0.85, + "grad_norm": 0.7201782464981079, + "learning_rate": 1.1090493644752342e-06, + "loss": 2.0156, + "step": 25584 + }, + { + "epoch": 0.85, + "grad_norm": 0.7546603083610535, + "learning_rate": 1.108562904592214e-06, + "loss": 2.0141, + "step": 25585 + }, + { + "epoch": 0.85, + "grad_norm": 0.7132443189620972, + "learning_rate": 1.1080765451594444e-06, + "loss": 1.9846, + "step": 25586 + }, + { + "epoch": 0.85, + "grad_norm": 0.7304291129112244, + "learning_rate": 1.1075902861824095e-06, + "loss": 1.9411, + "step": 25587 + }, + { + "epoch": 0.85, + "grad_norm": 0.7557387948036194, + "learning_rate": 1.1071041276666084e-06, + "loss": 1.9618, + "step": 25588 + }, + { + "epoch": 0.85, + "grad_norm": 0.7824674248695374, + "learning_rate": 1.1066180696175354e-06, + "loss": 2.0808, + "step": 25589 + }, + { + "epoch": 0.85, + "grad_norm": 0.7659381628036499, + "learning_rate": 1.1061321120406776e-06, + "loss": 2.0571, + "step": 25590 + }, + { + "epoch": 0.85, + "grad_norm": 0.739253044128418, + "learning_rate": 1.105646254941528e-06, + "loss": 2.056, + "step": 25591 + }, + { + "epoch": 0.85, + "grad_norm": 0.7618462443351746, + "learning_rate": 1.1051604983255748e-06, + "loss": 2.0337, + "step": 25592 + }, + { + "epoch": 0.85, + "grad_norm": 0.7802045941352844, + "learning_rate": 1.1046748421983033e-06, + "loss": 2.0515, + "step": 25593 + }, + { + "epoch": 0.85, + "grad_norm": 0.7340210676193237, + "learning_rate": 1.1041892865652027e-06, + "loss": 2.0862, + "step": 25594 + }, + { + "epoch": 0.85, + "grad_norm": 0.7337337732315063, + "learning_rate": 1.1037038314317593e-06, + "loss": 2.0938, + "step": 25595 + }, + { + "epoch": 0.85, + "grad_norm": 0.7595854997634888, + "learning_rate": 1.103218476803457e-06, + "loss": 2.065, + "step": 25596 + }, + { + "epoch": 0.85, + "grad_norm": 0.802827775478363, + "learning_rate": 1.1027332226857768e-06, + "loss": 1.9826, + "step": 25597 + }, + { + "epoch": 0.85, + "grad_norm": 0.7528466582298279, + "learning_rate": 1.102248069084203e-06, + "loss": 2.0853, + "step": 25598 + }, + { + "epoch": 0.85, + "grad_norm": 0.7504701018333435, + "learning_rate": 1.1017630160042147e-06, + "loss": 2.0801, + "step": 25599 + }, + { + "epoch": 0.85, + "grad_norm": 0.730383038520813, + "learning_rate": 1.1012780634512954e-06, + "loss": 2.0854, + "step": 25600 + }, + { + "epoch": 0.85, + "grad_norm": 0.7294831275939941, + "learning_rate": 1.1007932114309184e-06, + "loss": 2.0935, + "step": 25601 + }, + { + "epoch": 0.85, + "grad_norm": 0.7153273224830627, + "learning_rate": 1.100308459948567e-06, + "loss": 2.0768, + "step": 25602 + }, + { + "epoch": 0.85, + "grad_norm": 0.7667094469070435, + "learning_rate": 1.0998238090097124e-06, + "loss": 2.0447, + "step": 25603 + }, + { + "epoch": 0.85, + "grad_norm": 0.7227137088775635, + "learning_rate": 1.0993392586198349e-06, + "loss": 1.9862, + "step": 25604 + }, + { + "epoch": 0.85, + "grad_norm": 0.7257834076881409, + "learning_rate": 1.0988548087844054e-06, + "loss": 1.9713, + "step": 25605 + }, + { + "epoch": 0.85, + "grad_norm": 0.7546156048774719, + "learning_rate": 1.0983704595088962e-06, + "loss": 2.0447, + "step": 25606 + }, + { + "epoch": 0.85, + "grad_norm": 0.7466554641723633, + "learning_rate": 1.0978862107987799e-06, + "loss": 2.0435, + "step": 25607 + }, + { + "epoch": 0.85, + "grad_norm": 0.7298524975776672, + "learning_rate": 1.0974020626595328e-06, + "loss": 1.9683, + "step": 25608 + }, + { + "epoch": 0.85, + "grad_norm": 0.7515607476234436, + "learning_rate": 1.0969180150966162e-06, + "loss": 2.0234, + "step": 25609 + }, + { + "epoch": 0.85, + "grad_norm": 0.7550762891769409, + "learning_rate": 1.0964340681155017e-06, + "loss": 1.9821, + "step": 25610 + }, + { + "epoch": 0.85, + "grad_norm": 0.7641791701316833, + "learning_rate": 1.0959502217216589e-06, + "loss": 2.0348, + "step": 25611 + }, + { + "epoch": 0.85, + "grad_norm": 0.7577338218688965, + "learning_rate": 1.0954664759205503e-06, + "loss": 2.0436, + "step": 25612 + }, + { + "epoch": 0.85, + "grad_norm": 0.7384804487228394, + "learning_rate": 1.0949828307176447e-06, + "loss": 2.0559, + "step": 25613 + }, + { + "epoch": 0.85, + "grad_norm": 0.7456777095794678, + "learning_rate": 1.0944992861184044e-06, + "loss": 2.0315, + "step": 25614 + }, + { + "epoch": 0.85, + "grad_norm": 0.7368715405464172, + "learning_rate": 1.0940158421282898e-06, + "loss": 2.0549, + "step": 25615 + }, + { + "epoch": 0.85, + "grad_norm": 0.7639155983924866, + "learning_rate": 1.093532498752765e-06, + "loss": 2.0194, + "step": 25616 + }, + { + "epoch": 0.85, + "grad_norm": 0.7549625039100647, + "learning_rate": 1.0930492559972928e-06, + "loss": 2.0362, + "step": 25617 + }, + { + "epoch": 0.85, + "grad_norm": 0.7858089208602905, + "learning_rate": 1.0925661138673293e-06, + "loss": 2.0368, + "step": 25618 + }, + { + "epoch": 0.85, + "grad_norm": 0.7371587753295898, + "learning_rate": 1.0920830723683328e-06, + "loss": 2.039, + "step": 25619 + }, + { + "epoch": 0.85, + "grad_norm": 0.7286743521690369, + "learning_rate": 1.0916001315057623e-06, + "loss": 2.0835, + "step": 25620 + }, + { + "epoch": 0.85, + "grad_norm": 0.7272665500640869, + "learning_rate": 1.091117291285071e-06, + "loss": 2.086, + "step": 25621 + }, + { + "epoch": 0.85, + "grad_norm": 0.7482417821884155, + "learning_rate": 1.0906345517117167e-06, + "loss": 1.9871, + "step": 25622 + }, + { + "epoch": 0.85, + "grad_norm": 0.7229847311973572, + "learning_rate": 1.0901519127911497e-06, + "loss": 2.023, + "step": 25623 + }, + { + "epoch": 0.85, + "grad_norm": 0.7484353184700012, + "learning_rate": 1.0896693745288279e-06, + "loss": 2.0203, + "step": 25624 + }, + { + "epoch": 0.85, + "grad_norm": 0.7271804213523865, + "learning_rate": 1.0891869369301967e-06, + "loss": 2.0478, + "step": 25625 + }, + { + "epoch": 0.85, + "grad_norm": 0.7588217854499817, + "learning_rate": 1.0887046000007117e-06, + "loss": 2.0441, + "step": 25626 + }, + { + "epoch": 0.85, + "grad_norm": 0.7179100513458252, + "learning_rate": 1.088222363745819e-06, + "loss": 1.9992, + "step": 25627 + }, + { + "epoch": 0.85, + "grad_norm": 0.7399671673774719, + "learning_rate": 1.087740228170966e-06, + "loss": 2.0602, + "step": 25628 + }, + { + "epoch": 0.85, + "grad_norm": 0.7445325255393982, + "learning_rate": 1.0872581932816006e-06, + "loss": 2.0829, + "step": 25629 + }, + { + "epoch": 0.85, + "grad_norm": 0.7651894688606262, + "learning_rate": 1.0867762590831709e-06, + "loss": 2.0974, + "step": 25630 + }, + { + "epoch": 0.85, + "grad_norm": 0.7661094069480896, + "learning_rate": 1.086294425581118e-06, + "loss": 2.0087, + "step": 25631 + }, + { + "epoch": 0.85, + "grad_norm": 0.7849562168121338, + "learning_rate": 1.0858126927808866e-06, + "loss": 2.0331, + "step": 25632 + }, + { + "epoch": 0.85, + "grad_norm": 0.7501575350761414, + "learning_rate": 1.0853310606879197e-06, + "loss": 2.0808, + "step": 25633 + }, + { + "epoch": 0.85, + "grad_norm": 0.7664833664894104, + "learning_rate": 1.0848495293076567e-06, + "loss": 2.0106, + "step": 25634 + }, + { + "epoch": 0.85, + "grad_norm": 0.7493854761123657, + "learning_rate": 1.0843680986455408e-06, + "loss": 2.0815, + "step": 25635 + }, + { + "epoch": 0.85, + "grad_norm": 0.7543217539787292, + "learning_rate": 1.08388676870701e-06, + "loss": 2.0215, + "step": 25636 + }, + { + "epoch": 0.85, + "grad_norm": 0.7357943058013916, + "learning_rate": 1.0834055394974973e-06, + "loss": 2.0267, + "step": 25637 + }, + { + "epoch": 0.85, + "grad_norm": 0.7303991317749023, + "learning_rate": 1.0829244110224447e-06, + "loss": 2.0159, + "step": 25638 + }, + { + "epoch": 0.85, + "grad_norm": 0.7448989152908325, + "learning_rate": 1.0824433832872873e-06, + "loss": 1.9924, + "step": 25639 + }, + { + "epoch": 0.85, + "grad_norm": 0.7493236064910889, + "learning_rate": 1.0819624562974584e-06, + "loss": 2.0316, + "step": 25640 + }, + { + "epoch": 0.85, + "grad_norm": 0.7773019075393677, + "learning_rate": 1.0814816300583896e-06, + "loss": 2.1019, + "step": 25641 + }, + { + "epoch": 0.85, + "grad_norm": 0.7728946208953857, + "learning_rate": 1.0810009045755165e-06, + "loss": 2.1127, + "step": 25642 + }, + { + "epoch": 0.85, + "grad_norm": 0.7427574396133423, + "learning_rate": 1.0805202798542658e-06, + "loss": 2.0737, + "step": 25643 + }, + { + "epoch": 0.85, + "grad_norm": 0.7579944729804993, + "learning_rate": 1.080039755900072e-06, + "loss": 1.9549, + "step": 25644 + }, + { + "epoch": 0.85, + "grad_norm": 0.7827484011650085, + "learning_rate": 1.0795593327183596e-06, + "loss": 2.1541, + "step": 25645 + }, + { + "epoch": 0.85, + "grad_norm": 0.747428834438324, + "learning_rate": 1.07907901031456e-06, + "loss": 2.0369, + "step": 25646 + }, + { + "epoch": 0.85, + "grad_norm": 0.7605673670768738, + "learning_rate": 1.0785987886940952e-06, + "loss": 2.0971, + "step": 25647 + }, + { + "epoch": 0.85, + "grad_norm": 0.7300199270248413, + "learning_rate": 1.0781186678623945e-06, + "loss": 2.059, + "step": 25648 + }, + { + "epoch": 0.85, + "grad_norm": 0.7193981409072876, + "learning_rate": 1.0776386478248803e-06, + "loss": 2.0527, + "step": 25649 + }, + { + "epoch": 0.85, + "grad_norm": 0.7398879528045654, + "learning_rate": 1.0771587285869744e-06, + "loss": 2.0604, + "step": 25650 + }, + { + "epoch": 0.85, + "grad_norm": 0.7175271511077881, + "learning_rate": 1.0766789101540998e-06, + "loss": 1.9924, + "step": 25651 + }, + { + "epoch": 0.85, + "grad_norm": 0.7435488700866699, + "learning_rate": 1.0761991925316783e-06, + "loss": 1.9703, + "step": 25652 + }, + { + "epoch": 0.85, + "grad_norm": 0.7405022382736206, + "learning_rate": 1.0757195757251293e-06, + "loss": 2.0373, + "step": 25653 + }, + { + "epoch": 0.85, + "grad_norm": 0.7471997737884521, + "learning_rate": 1.0752400597398683e-06, + "loss": 2.1105, + "step": 25654 + }, + { + "epoch": 0.85, + "grad_norm": 0.7272506952285767, + "learning_rate": 1.0747606445813175e-06, + "loss": 1.9889, + "step": 25655 + }, + { + "epoch": 0.85, + "grad_norm": 0.7743155360221863, + "learning_rate": 1.074281330254887e-06, + "loss": 2.0184, + "step": 25656 + }, + { + "epoch": 0.85, + "grad_norm": 0.7350564002990723, + "learning_rate": 1.0738021167659974e-06, + "loss": 2.0917, + "step": 25657 + }, + { + "epoch": 0.85, + "grad_norm": 0.7490598559379578, + "learning_rate": 1.0733230041200603e-06, + "loss": 2.0851, + "step": 25658 + }, + { + "epoch": 0.85, + "grad_norm": 0.7429494857788086, + "learning_rate": 1.0728439923224865e-06, + "loss": 2.0853, + "step": 25659 + }, + { + "epoch": 0.85, + "grad_norm": 0.7397154569625854, + "learning_rate": 1.072365081378689e-06, + "loss": 2.0059, + "step": 25660 + }, + { + "epoch": 0.85, + "grad_norm": 0.7442455291748047, + "learning_rate": 1.0718862712940815e-06, + "loss": 2.0145, + "step": 25661 + }, + { + "epoch": 0.85, + "grad_norm": 0.7686842083930969, + "learning_rate": 1.0714075620740694e-06, + "loss": 2.0001, + "step": 25662 + }, + { + "epoch": 0.85, + "grad_norm": 0.7437859177589417, + "learning_rate": 1.0709289537240608e-06, + "loss": 2.0724, + "step": 25663 + }, + { + "epoch": 0.85, + "grad_norm": 0.7169320583343506, + "learning_rate": 1.0704504462494637e-06, + "loss": 2.0237, + "step": 25664 + }, + { + "epoch": 0.85, + "grad_norm": 0.761655330657959, + "learning_rate": 1.0699720396556868e-06, + "loss": 2.0384, + "step": 25665 + }, + { + "epoch": 0.85, + "grad_norm": 0.730991542339325, + "learning_rate": 1.0694937339481315e-06, + "loss": 2.0318, + "step": 25666 + }, + { + "epoch": 0.85, + "grad_norm": 0.743136465549469, + "learning_rate": 1.0690155291322002e-06, + "loss": 2.0051, + "step": 25667 + }, + { + "epoch": 0.85, + "grad_norm": 0.7378832697868347, + "learning_rate": 1.0685374252132996e-06, + "loss": 2.1331, + "step": 25668 + }, + { + "epoch": 0.85, + "grad_norm": 0.7237527966499329, + "learning_rate": 1.0680594221968265e-06, + "loss": 2.0007, + "step": 25669 + }, + { + "epoch": 0.85, + "grad_norm": 0.7638330459594727, + "learning_rate": 1.0675815200881868e-06, + "loss": 2.0611, + "step": 25670 + }, + { + "epoch": 0.85, + "grad_norm": 0.7427726984024048, + "learning_rate": 1.0671037188927747e-06, + "loss": 2.0765, + "step": 25671 + }, + { + "epoch": 0.85, + "grad_norm": 0.7686672806739807, + "learning_rate": 1.0666260186159882e-06, + "loss": 2.0555, + "step": 25672 + }, + { + "epoch": 0.85, + "grad_norm": 0.7554507851600647, + "learning_rate": 1.0661484192632255e-06, + "loss": 2.1012, + "step": 25673 + }, + { + "epoch": 0.85, + "grad_norm": 0.7501378059387207, + "learning_rate": 1.065670920839883e-06, + "loss": 2.0263, + "step": 25674 + }, + { + "epoch": 0.85, + "grad_norm": 0.7537559866905212, + "learning_rate": 1.0651935233513555e-06, + "loss": 2.0542, + "step": 25675 + }, + { + "epoch": 0.85, + "grad_norm": 0.7471948266029358, + "learning_rate": 1.0647162268030331e-06, + "loss": 1.9957, + "step": 25676 + }, + { + "epoch": 0.85, + "grad_norm": 0.7303323149681091, + "learning_rate": 1.0642390312003126e-06, + "loss": 2.0349, + "step": 25677 + }, + { + "epoch": 0.85, + "grad_norm": 0.7317492961883545, + "learning_rate": 1.0637619365485808e-06, + "loss": 2.0118, + "step": 25678 + }, + { + "epoch": 0.85, + "grad_norm": 0.7261767387390137, + "learning_rate": 1.063284942853231e-06, + "loss": 2.0939, + "step": 25679 + }, + { + "epoch": 0.85, + "grad_norm": 0.7600923180580139, + "learning_rate": 1.062808050119648e-06, + "loss": 2.01, + "step": 25680 + }, + { + "epoch": 0.85, + "grad_norm": 0.7691116333007812, + "learning_rate": 1.0623312583532242e-06, + "loss": 2.0853, + "step": 25681 + }, + { + "epoch": 0.85, + "grad_norm": 0.7224286198616028, + "learning_rate": 1.0618545675593429e-06, + "loss": 2.076, + "step": 25682 + }, + { + "epoch": 0.85, + "grad_norm": 0.7406932711601257, + "learning_rate": 1.0613779777433908e-06, + "loss": 2.0914, + "step": 25683 + }, + { + "epoch": 0.85, + "grad_norm": 0.7692009806632996, + "learning_rate": 1.0609014889107527e-06, + "loss": 1.9773, + "step": 25684 + }, + { + "epoch": 0.85, + "grad_norm": 0.7442301511764526, + "learning_rate": 1.0604251010668088e-06, + "loss": 2.0408, + "step": 25685 + }, + { + "epoch": 0.85, + "grad_norm": 0.7615671157836914, + "learning_rate": 1.0599488142169434e-06, + "loss": 1.9826, + "step": 25686 + }, + { + "epoch": 0.85, + "grad_norm": 0.7385686039924622, + "learning_rate": 1.059472628366538e-06, + "loss": 2.0635, + "step": 25687 + }, + { + "epoch": 0.85, + "grad_norm": 0.7261627316474915, + "learning_rate": 1.0589965435209714e-06, + "loss": 2.0735, + "step": 25688 + }, + { + "epoch": 0.85, + "grad_norm": 0.741863489151001, + "learning_rate": 1.0585205596856196e-06, + "loss": 2.0633, + "step": 25689 + }, + { + "epoch": 0.85, + "grad_norm": 0.7368066310882568, + "learning_rate": 1.0580446768658648e-06, + "loss": 2.0331, + "step": 25690 + }, + { + "epoch": 0.85, + "grad_norm": 0.7324717044830322, + "learning_rate": 1.0575688950670793e-06, + "loss": 2.0119, + "step": 25691 + }, + { + "epoch": 0.85, + "grad_norm": 0.7567073702812195, + "learning_rate": 1.0570932142946389e-06, + "loss": 2.0125, + "step": 25692 + }, + { + "epoch": 0.85, + "grad_norm": 0.7309162020683289, + "learning_rate": 1.0566176345539225e-06, + "loss": 2.0627, + "step": 25693 + }, + { + "epoch": 0.85, + "grad_norm": 0.7520669102668762, + "learning_rate": 1.056142155850295e-06, + "loss": 2.0009, + "step": 25694 + }, + { + "epoch": 0.85, + "grad_norm": 0.7557404637336731, + "learning_rate": 1.0556667781891305e-06, + "loss": 2.0363, + "step": 25695 + }, + { + "epoch": 0.85, + "grad_norm": 0.7563972473144531, + "learning_rate": 1.0551915015758042e-06, + "loss": 2.0721, + "step": 25696 + }, + { + "epoch": 0.85, + "grad_norm": 0.7686523795127869, + "learning_rate": 1.054716326015681e-06, + "loss": 2.0981, + "step": 25697 + }, + { + "epoch": 0.85, + "grad_norm": 0.7877217531204224, + "learning_rate": 1.0542412515141298e-06, + "loss": 2.0941, + "step": 25698 + }, + { + "epoch": 0.86, + "grad_norm": 0.7257724404335022, + "learning_rate": 1.053766278076519e-06, + "loss": 2.091, + "step": 25699 + }, + { + "epoch": 0.86, + "grad_norm": 0.7235658764839172, + "learning_rate": 1.0532914057082123e-06, + "loss": 2.0676, + "step": 25700 + }, + { + "epoch": 0.86, + "grad_norm": 0.7406724095344543, + "learning_rate": 1.0528166344145785e-06, + "loss": 1.9995, + "step": 25701 + }, + { + "epoch": 0.86, + "grad_norm": 0.7324613332748413, + "learning_rate": 1.0523419642009758e-06, + "loss": 1.9248, + "step": 25702 + }, + { + "epoch": 0.86, + "grad_norm": 0.7732944488525391, + "learning_rate": 1.051867395072772e-06, + "loss": 2.0682, + "step": 25703 + }, + { + "epoch": 0.86, + "grad_norm": 0.7542123794555664, + "learning_rate": 1.0513929270353252e-06, + "loss": 2.0648, + "step": 25704 + }, + { + "epoch": 0.86, + "grad_norm": 0.7391310334205627, + "learning_rate": 1.0509185600939975e-06, + "loss": 2.0359, + "step": 25705 + }, + { + "epoch": 0.86, + "grad_norm": 0.720277726650238, + "learning_rate": 1.050444294254148e-06, + "loss": 1.9842, + "step": 25706 + }, + { + "epoch": 0.86, + "grad_norm": 0.731803834438324, + "learning_rate": 1.0499701295211318e-06, + "loss": 2.0203, + "step": 25707 + }, + { + "epoch": 0.86, + "grad_norm": 0.7510703802108765, + "learning_rate": 1.0494960659003072e-06, + "loss": 2.0453, + "step": 25708 + }, + { + "epoch": 0.86, + "grad_norm": 0.7525373101234436, + "learning_rate": 1.0490221033970328e-06, + "loss": 1.9853, + "step": 25709 + }, + { + "epoch": 0.86, + "grad_norm": 0.7505938410758972, + "learning_rate": 1.0485482420166614e-06, + "loss": 2.0651, + "step": 25710 + }, + { + "epoch": 0.86, + "grad_norm": 0.7590008974075317, + "learning_rate": 1.0480744817645438e-06, + "loss": 2.0369, + "step": 25711 + }, + { + "epoch": 0.86, + "grad_norm": 0.7414840459823608, + "learning_rate": 1.0476008226460354e-06, + "loss": 2.0474, + "step": 25712 + }, + { + "epoch": 0.86, + "grad_norm": 0.7427039742469788, + "learning_rate": 1.0471272646664854e-06, + "loss": 2.0688, + "step": 25713 + }, + { + "epoch": 0.86, + "grad_norm": 0.7563658356666565, + "learning_rate": 1.0466538078312437e-06, + "loss": 2.0729, + "step": 25714 + }, + { + "epoch": 0.86, + "grad_norm": 0.7695289254188538, + "learning_rate": 1.0461804521456654e-06, + "loss": 2.0176, + "step": 25715 + }, + { + "epoch": 0.86, + "grad_norm": 0.759555459022522, + "learning_rate": 1.045707197615088e-06, + "loss": 2.034, + "step": 25716 + }, + { + "epoch": 0.86, + "grad_norm": 0.729200005531311, + "learning_rate": 1.0452340442448628e-06, + "loss": 1.9496, + "step": 25717 + }, + { + "epoch": 0.86, + "grad_norm": 0.7555876970291138, + "learning_rate": 1.044760992040338e-06, + "loss": 2.1093, + "step": 25718 + }, + { + "epoch": 0.86, + "grad_norm": 0.7286893725395203, + "learning_rate": 1.0442880410068546e-06, + "loss": 2.0027, + "step": 25719 + }, + { + "epoch": 0.86, + "grad_norm": 0.7448348999023438, + "learning_rate": 1.0438151911497552e-06, + "loss": 1.9858, + "step": 25720 + }, + { + "epoch": 0.86, + "grad_norm": 0.7271620035171509, + "learning_rate": 1.0433424424743842e-06, + "loss": 2.102, + "step": 25721 + }, + { + "epoch": 0.86, + "grad_norm": 0.7297613620758057, + "learning_rate": 1.0428697949860788e-06, + "loss": 2.0057, + "step": 25722 + }, + { + "epoch": 0.86, + "grad_norm": 0.7493481636047363, + "learning_rate": 1.0423972486901823e-06, + "loss": 2.0531, + "step": 25723 + }, + { + "epoch": 0.86, + "grad_norm": 0.7563741207122803, + "learning_rate": 1.0419248035920326e-06, + "loss": 1.9966, + "step": 25724 + }, + { + "epoch": 0.86, + "grad_norm": 0.7450403571128845, + "learning_rate": 1.0414524596969676e-06, + "loss": 2.0886, + "step": 25725 + }, + { + "epoch": 0.86, + "grad_norm": 0.7410711646080017, + "learning_rate": 1.04098021701032e-06, + "loss": 1.9734, + "step": 25726 + }, + { + "epoch": 0.86, + "grad_norm": 0.7379253506660461, + "learning_rate": 1.0405080755374297e-06, + "loss": 2.0136, + "step": 25727 + }, + { + "epoch": 0.86, + "grad_norm": 0.7556507587432861, + "learning_rate": 1.040036035283628e-06, + "loss": 2.0431, + "step": 25728 + }, + { + "epoch": 0.86, + "grad_norm": 0.7782156467437744, + "learning_rate": 1.0395640962542464e-06, + "loss": 2.0762, + "step": 25729 + }, + { + "epoch": 0.86, + "grad_norm": 0.7497164011001587, + "learning_rate": 1.039092258454618e-06, + "loss": 2.0076, + "step": 25730 + }, + { + "epoch": 0.86, + "grad_norm": 0.7253671288490295, + "learning_rate": 1.0386205218900759e-06, + "loss": 2.0952, + "step": 25731 + }, + { + "epoch": 0.86, + "grad_norm": 0.7224748730659485, + "learning_rate": 1.0381488865659473e-06, + "loss": 1.9896, + "step": 25732 + }, + { + "epoch": 0.86, + "grad_norm": 0.7455363273620605, + "learning_rate": 1.0376773524875583e-06, + "loss": 2.0748, + "step": 25733 + }, + { + "epoch": 0.86, + "grad_norm": 0.760480523109436, + "learning_rate": 1.0372059196602401e-06, + "loss": 1.9962, + "step": 25734 + }, + { + "epoch": 0.86, + "grad_norm": 0.7405458688735962, + "learning_rate": 1.0367345880893155e-06, + "loss": 2.1212, + "step": 25735 + }, + { + "epoch": 0.86, + "grad_norm": 0.7357072830200195, + "learning_rate": 1.0362633577801096e-06, + "loss": 2.0512, + "step": 25736 + }, + { + "epoch": 0.86, + "grad_norm": 0.7341716289520264, + "learning_rate": 1.035792228737952e-06, + "loss": 2.0868, + "step": 25737 + }, + { + "epoch": 0.86, + "grad_norm": 0.7235838770866394, + "learning_rate": 1.0353212009681545e-06, + "loss": 2.0778, + "step": 25738 + }, + { + "epoch": 0.86, + "grad_norm": 0.7489319443702698, + "learning_rate": 1.0348502744760457e-06, + "loss": 2.0375, + "step": 25739 + }, + { + "epoch": 0.86, + "grad_norm": 0.7543151378631592, + "learning_rate": 1.0343794492669457e-06, + "loss": 2.0705, + "step": 25740 + }, + { + "epoch": 0.86, + "grad_norm": 0.7414613366127014, + "learning_rate": 1.0339087253461732e-06, + "loss": 2.0184, + "step": 25741 + }, + { + "epoch": 0.86, + "grad_norm": 0.7648339867591858, + "learning_rate": 1.033438102719042e-06, + "loss": 2.065, + "step": 25742 + }, + { + "epoch": 0.86, + "grad_norm": 0.7445924878120422, + "learning_rate": 1.0329675813908758e-06, + "loss": 2.0883, + "step": 25743 + }, + { + "epoch": 0.86, + "grad_norm": 0.7281510829925537, + "learning_rate": 1.0324971613669832e-06, + "loss": 2.0415, + "step": 25744 + }, + { + "epoch": 0.86, + "grad_norm": 0.762367308139801, + "learning_rate": 1.0320268426526825e-06, + "loss": 2.065, + "step": 25745 + }, + { + "epoch": 0.86, + "grad_norm": 0.7247112989425659, + "learning_rate": 1.0315566252532894e-06, + "loss": 1.9791, + "step": 25746 + }, + { + "epoch": 0.86, + "grad_norm": 0.7449166774749756, + "learning_rate": 1.031086509174113e-06, + "loss": 2.0674, + "step": 25747 + }, + { + "epoch": 0.86, + "grad_norm": 0.727767825126648, + "learning_rate": 1.0306164944204634e-06, + "loss": 2.0274, + "step": 25748 + }, + { + "epoch": 0.86, + "grad_norm": 0.764346182346344, + "learning_rate": 1.0301465809976541e-06, + "loss": 2.0318, + "step": 25749 + }, + { + "epoch": 0.86, + "grad_norm": 0.7388762831687927, + "learning_rate": 1.029676768910991e-06, + "loss": 1.9891, + "step": 25750 + }, + { + "epoch": 0.86, + "grad_norm": 0.7313951849937439, + "learning_rate": 1.0292070581657809e-06, + "loss": 2.0054, + "step": 25751 + }, + { + "epoch": 0.86, + "grad_norm": 0.7080071568489075, + "learning_rate": 1.028737448767333e-06, + "loss": 2.0571, + "step": 25752 + }, + { + "epoch": 0.86, + "grad_norm": 0.7233836650848389, + "learning_rate": 1.0282679407209529e-06, + "loss": 2.0532, + "step": 25753 + }, + { + "epoch": 0.86, + "grad_norm": 0.7567445039749146, + "learning_rate": 1.027798534031944e-06, + "loss": 2.0188, + "step": 25754 + }, + { + "epoch": 0.86, + "grad_norm": 0.74405437707901, + "learning_rate": 1.0273292287056069e-06, + "loss": 2.0173, + "step": 25755 + }, + { + "epoch": 0.86, + "grad_norm": 0.7492372989654541, + "learning_rate": 1.026860024747247e-06, + "loss": 2.0079, + "step": 25756 + }, + { + "epoch": 0.86, + "grad_norm": 0.7637950778007507, + "learning_rate": 1.0263909221621627e-06, + "loss": 2.0465, + "step": 25757 + }, + { + "epoch": 0.86, + "grad_norm": 0.7583557367324829, + "learning_rate": 1.0259219209556536e-06, + "loss": 2.0225, + "step": 25758 + }, + { + "epoch": 0.86, + "grad_norm": 0.7359915971755981, + "learning_rate": 1.025453021133025e-06, + "loss": 2.0514, + "step": 25759 + }, + { + "epoch": 0.86, + "grad_norm": 0.7662783861160278, + "learning_rate": 1.0249842226995633e-06, + "loss": 2.0096, + "step": 25760 + }, + { + "epoch": 0.86, + "grad_norm": 0.7452043890953064, + "learning_rate": 1.0245155256605699e-06, + "loss": 2.0323, + "step": 25761 + }, + { + "epoch": 0.86, + "grad_norm": 0.7338219285011292, + "learning_rate": 1.024046930021343e-06, + "loss": 2.0483, + "step": 25762 + }, + { + "epoch": 0.86, + "grad_norm": 0.7277237176895142, + "learning_rate": 1.0235784357871725e-06, + "loss": 2.071, + "step": 25763 + }, + { + "epoch": 0.86, + "grad_norm": 0.7613467574119568, + "learning_rate": 1.0231100429633511e-06, + "loss": 2.0356, + "step": 25764 + }, + { + "epoch": 0.86, + "grad_norm": 0.7483755350112915, + "learning_rate": 1.0226417515551724e-06, + "loss": 2.0242, + "step": 25765 + }, + { + "epoch": 0.86, + "grad_norm": 0.7352116107940674, + "learning_rate": 1.022173561567924e-06, + "loss": 2.0541, + "step": 25766 + }, + { + "epoch": 0.86, + "grad_norm": 0.7330235838890076, + "learning_rate": 1.0217054730068975e-06, + "loss": 2.0134, + "step": 25767 + }, + { + "epoch": 0.86, + "grad_norm": 0.7904192209243774, + "learning_rate": 1.0212374858773832e-06, + "loss": 2.0129, + "step": 25768 + }, + { + "epoch": 0.86, + "grad_norm": 0.7245510220527649, + "learning_rate": 1.0207696001846655e-06, + "loss": 1.9769, + "step": 25769 + }, + { + "epoch": 0.86, + "grad_norm": 0.7236512899398804, + "learning_rate": 1.0203018159340282e-06, + "loss": 2.0261, + "step": 25770 + }, + { + "epoch": 0.86, + "grad_norm": 0.7691099643707275, + "learning_rate": 1.0198341331307582e-06, + "loss": 2.0609, + "step": 25771 + }, + { + "epoch": 0.86, + "grad_norm": 0.7410522103309631, + "learning_rate": 1.0193665517801443e-06, + "loss": 1.9685, + "step": 25772 + }, + { + "epoch": 0.86, + "grad_norm": 0.7280692458152771, + "learning_rate": 1.018899071887459e-06, + "loss": 2.0092, + "step": 25773 + }, + { + "epoch": 0.86, + "grad_norm": 0.7256282567977905, + "learning_rate": 1.0184316934579885e-06, + "loss": 1.982, + "step": 25774 + }, + { + "epoch": 0.86, + "grad_norm": 0.7744770050048828, + "learning_rate": 1.017964416497015e-06, + "loss": 2.011, + "step": 25775 + }, + { + "epoch": 0.86, + "grad_norm": 0.724440336227417, + "learning_rate": 1.0174972410098138e-06, + "loss": 2.0261, + "step": 25776 + }, + { + "epoch": 0.86, + "grad_norm": 0.7367421388626099, + "learning_rate": 1.017030167001667e-06, + "loss": 2.0665, + "step": 25777 + }, + { + "epoch": 0.86, + "grad_norm": 0.7193717956542969, + "learning_rate": 1.0165631944778475e-06, + "loss": 2.0794, + "step": 25778 + }, + { + "epoch": 0.86, + "grad_norm": 0.7565749883651733, + "learning_rate": 1.0160963234436316e-06, + "loss": 2.0793, + "step": 25779 + }, + { + "epoch": 0.86, + "grad_norm": 0.7250953316688538, + "learning_rate": 1.0156295539042937e-06, + "loss": 2.0222, + "step": 25780 + }, + { + "epoch": 0.86, + "grad_norm": 0.7378321290016174, + "learning_rate": 1.0151628858651097e-06, + "loss": 2.0266, + "step": 25781 + }, + { + "epoch": 0.86, + "grad_norm": 0.746042013168335, + "learning_rate": 1.014696319331351e-06, + "loss": 2.0462, + "step": 25782 + }, + { + "epoch": 0.86, + "grad_norm": 0.7468606233596802, + "learning_rate": 1.014229854308284e-06, + "loss": 2.092, + "step": 25783 + }, + { + "epoch": 0.86, + "grad_norm": 0.7180263996124268, + "learning_rate": 1.013763490801184e-06, + "loss": 2.045, + "step": 25784 + }, + { + "epoch": 0.86, + "grad_norm": 0.7249382734298706, + "learning_rate": 1.0132972288153175e-06, + "loss": 2.0714, + "step": 25785 + }, + { + "epoch": 0.86, + "grad_norm": 0.7478383779525757, + "learning_rate": 1.0128310683559507e-06, + "loss": 2.0022, + "step": 25786 + }, + { + "epoch": 0.86, + "grad_norm": 0.7205400466918945, + "learning_rate": 1.012365009428351e-06, + "loss": 1.992, + "step": 25787 + }, + { + "epoch": 0.86, + "grad_norm": 0.7346038222312927, + "learning_rate": 1.011899052037787e-06, + "loss": 2.0128, + "step": 25788 + }, + { + "epoch": 0.86, + "grad_norm": 0.73969966173172, + "learning_rate": 1.0114331961895174e-06, + "loss": 2.0471, + "step": 25789 + }, + { + "epoch": 0.86, + "grad_norm": 0.7428418397903442, + "learning_rate": 1.0109674418888093e-06, + "loss": 2.0247, + "step": 25790 + }, + { + "epoch": 0.86, + "grad_norm": 0.7601402401924133, + "learning_rate": 1.0105017891409242e-06, + "loss": 2.0143, + "step": 25791 + }, + { + "epoch": 0.86, + "grad_norm": 0.7239035964012146, + "learning_rate": 1.0100362379511186e-06, + "loss": 2.0355, + "step": 25792 + }, + { + "epoch": 0.86, + "grad_norm": 0.7687256932258606, + "learning_rate": 1.0095707883246552e-06, + "loss": 2.0433, + "step": 25793 + }, + { + "epoch": 0.86, + "grad_norm": 0.754247784614563, + "learning_rate": 1.0091054402667955e-06, + "loss": 2.0838, + "step": 25794 + }, + { + "epoch": 0.86, + "grad_norm": 0.7615288496017456, + "learning_rate": 1.0086401937827906e-06, + "loss": 2.0499, + "step": 25795 + }, + { + "epoch": 0.86, + "grad_norm": 0.7577502131462097, + "learning_rate": 1.0081750488778985e-06, + "loss": 2.0797, + "step": 25796 + }, + { + "epoch": 0.86, + "grad_norm": 0.755070149898529, + "learning_rate": 1.0077100055573774e-06, + "loss": 2.0639, + "step": 25797 + }, + { + "epoch": 0.86, + "grad_norm": 0.7582204937934875, + "learning_rate": 1.0072450638264763e-06, + "loss": 2.0444, + "step": 25798 + }, + { + "epoch": 0.86, + "grad_norm": 0.7567570209503174, + "learning_rate": 1.0067802236904523e-06, + "loss": 2.0173, + "step": 25799 + }, + { + "epoch": 0.86, + "grad_norm": 0.7594166994094849, + "learning_rate": 1.0063154851545542e-06, + "loss": 2.0239, + "step": 25800 + }, + { + "epoch": 0.86, + "grad_norm": 0.7277630567550659, + "learning_rate": 1.0058508482240315e-06, + "loss": 1.9733, + "step": 25801 + }, + { + "epoch": 0.86, + "grad_norm": 0.7470248937606812, + "learning_rate": 1.0053863129041353e-06, + "loss": 2.0243, + "step": 25802 + }, + { + "epoch": 0.86, + "grad_norm": 0.7536552548408508, + "learning_rate": 1.0049218792001147e-06, + "loss": 2.0387, + "step": 25803 + }, + { + "epoch": 0.86, + "grad_norm": 0.7575001120567322, + "learning_rate": 1.0044575471172147e-06, + "loss": 2.0467, + "step": 25804 + }, + { + "epoch": 0.86, + "grad_norm": 0.7614201307296753, + "learning_rate": 1.0039933166606797e-06, + "loss": 2.0721, + "step": 25805 + }, + { + "epoch": 0.86, + "grad_norm": 0.7290003299713135, + "learning_rate": 1.0035291878357578e-06, + "loss": 1.9898, + "step": 25806 + }, + { + "epoch": 0.86, + "grad_norm": 0.7385231256484985, + "learning_rate": 1.0030651606476893e-06, + "loss": 2.0349, + "step": 25807 + }, + { + "epoch": 0.86, + "grad_norm": 0.7457908391952515, + "learning_rate": 1.00260123510172e-06, + "loss": 2.0494, + "step": 25808 + }, + { + "epoch": 0.86, + "grad_norm": 0.7281091809272766, + "learning_rate": 1.0021374112030857e-06, + "loss": 2.0246, + "step": 25809 + }, + { + "epoch": 0.86, + "grad_norm": 0.7556902170181274, + "learning_rate": 1.0016736889570332e-06, + "loss": 2.0554, + "step": 25810 + }, + { + "epoch": 0.86, + "grad_norm": 0.722694456577301, + "learning_rate": 1.0012100683687953e-06, + "loss": 2.0215, + "step": 25811 + }, + { + "epoch": 0.86, + "grad_norm": 0.7761778831481934, + "learning_rate": 1.0007465494436142e-06, + "loss": 2.0971, + "step": 25812 + }, + { + "epoch": 0.86, + "grad_norm": 0.7278135418891907, + "learning_rate": 1.0002831321867235e-06, + "loss": 2.0636, + "step": 25813 + }, + { + "epoch": 0.86, + "grad_norm": 0.7265075445175171, + "learning_rate": 9.99819816603359e-07, + "loss": 2.0126, + "step": 25814 + }, + { + "epoch": 0.86, + "grad_norm": 0.7465983629226685, + "learning_rate": 9.993566026987544e-07, + "loss": 1.9358, + "step": 25815 + }, + { + "epoch": 0.86, + "grad_norm": 0.7712118029594421, + "learning_rate": 9.988934904781488e-07, + "loss": 2.0723, + "step": 25816 + }, + { + "epoch": 0.86, + "grad_norm": 0.7292450070381165, + "learning_rate": 9.98430479946766e-07, + "loss": 1.9269, + "step": 25817 + }, + { + "epoch": 0.86, + "grad_norm": 0.7559846043586731, + "learning_rate": 9.97967571109839e-07, + "loss": 2.0205, + "step": 25818 + }, + { + "epoch": 0.86, + "grad_norm": 0.7426708340644836, + "learning_rate": 9.97504763972601e-07, + "loss": 2.1079, + "step": 25819 + }, + { + "epoch": 0.86, + "grad_norm": 0.766106903553009, + "learning_rate": 9.970420585402762e-07, + "loss": 2.0437, + "step": 25820 + }, + { + "epoch": 0.86, + "grad_norm": 0.7611443996429443, + "learning_rate": 9.965794548180963e-07, + "loss": 2.0476, + "step": 25821 + }, + { + "epoch": 0.86, + "grad_norm": 0.7937203049659729, + "learning_rate": 9.961169528112847e-07, + "loss": 2.0654, + "step": 25822 + }, + { + "epoch": 0.86, + "grad_norm": 0.7490450143814087, + "learning_rate": 9.956545525250661e-07, + "loss": 2.0061, + "step": 25823 + }, + { + "epoch": 0.86, + "grad_norm": 0.7592965364456177, + "learning_rate": 9.951922539646642e-07, + "loss": 2.0987, + "step": 25824 + }, + { + "epoch": 0.86, + "grad_norm": 0.7501606941223145, + "learning_rate": 9.947300571353047e-07, + "loss": 2.0633, + "step": 25825 + }, + { + "epoch": 0.86, + "grad_norm": 0.7309877872467041, + "learning_rate": 9.94267962042208e-07, + "loss": 2.0924, + "step": 25826 + }, + { + "epoch": 0.86, + "grad_norm": 0.7499247193336487, + "learning_rate": 9.93805968690592e-07, + "loss": 2.0134, + "step": 25827 + }, + { + "epoch": 0.86, + "grad_norm": 0.7084293365478516, + "learning_rate": 9.933440770856795e-07, + "loss": 2.0226, + "step": 25828 + }, + { + "epoch": 0.86, + "grad_norm": 0.7412239909172058, + "learning_rate": 9.92882287232686e-07, + "loss": 2.0339, + "step": 25829 + }, + { + "epoch": 0.86, + "grad_norm": 0.7101403474807739, + "learning_rate": 9.92420599136832e-07, + "loss": 2.0756, + "step": 25830 + }, + { + "epoch": 0.86, + "grad_norm": 0.7837666273117065, + "learning_rate": 9.919590128033275e-07, + "loss": 2.1011, + "step": 25831 + }, + { + "epoch": 0.86, + "grad_norm": 0.7392111420631409, + "learning_rate": 9.91497528237394e-07, + "loss": 1.9371, + "step": 25832 + }, + { + "epoch": 0.86, + "grad_norm": 0.7430040240287781, + "learning_rate": 9.910361454442396e-07, + "loss": 2.0728, + "step": 25833 + }, + { + "epoch": 0.86, + "grad_norm": 0.752509593963623, + "learning_rate": 9.905748644290815e-07, + "loss": 2.0866, + "step": 25834 + }, + { + "epoch": 0.86, + "grad_norm": 0.7462926506996155, + "learning_rate": 9.901136851971283e-07, + "loss": 1.9459, + "step": 25835 + }, + { + "epoch": 0.86, + "grad_norm": 0.7393240332603455, + "learning_rate": 9.896526077535884e-07, + "loss": 2.0053, + "step": 25836 + }, + { + "epoch": 0.86, + "grad_norm": 0.7462487816810608, + "learning_rate": 9.89191632103672e-07, + "loss": 2.0342, + "step": 25837 + }, + { + "epoch": 0.86, + "grad_norm": 0.7826508283615112, + "learning_rate": 9.887307582525907e-07, + "loss": 2.1251, + "step": 25838 + }, + { + "epoch": 0.86, + "grad_norm": 0.7426804304122925, + "learning_rate": 9.882699862055479e-07, + "loss": 2.0986, + "step": 25839 + }, + { + "epoch": 0.86, + "grad_norm": 0.7832672595977783, + "learning_rate": 9.878093159677471e-07, + "loss": 2.0264, + "step": 25840 + }, + { + "epoch": 0.86, + "grad_norm": 0.7444686889648438, + "learning_rate": 9.873487475443965e-07, + "loss": 2.0664, + "step": 25841 + }, + { + "epoch": 0.86, + "grad_norm": 0.7687342166900635, + "learning_rate": 9.868882809406964e-07, + "loss": 2.0925, + "step": 25842 + }, + { + "epoch": 0.86, + "grad_norm": 0.7473678588867188, + "learning_rate": 9.864279161618528e-07, + "loss": 2.0797, + "step": 25843 + }, + { + "epoch": 0.86, + "grad_norm": 0.7303598523139954, + "learning_rate": 9.859676532130636e-07, + "loss": 2.0988, + "step": 25844 + }, + { + "epoch": 0.86, + "grad_norm": 0.7247107625007629, + "learning_rate": 9.855074920995278e-07, + "loss": 2.0321, + "step": 25845 + }, + { + "epoch": 0.86, + "grad_norm": 0.7181931138038635, + "learning_rate": 9.850474328264448e-07, + "loss": 2.0315, + "step": 25846 + }, + { + "epoch": 0.86, + "grad_norm": 0.7480742335319519, + "learning_rate": 9.845874753990138e-07, + "loss": 2.0487, + "step": 25847 + }, + { + "epoch": 0.86, + "grad_norm": 0.7156351804733276, + "learning_rate": 9.841276198224315e-07, + "loss": 2.0008, + "step": 25848 + }, + { + "epoch": 0.86, + "grad_norm": 0.7483587861061096, + "learning_rate": 9.836678661018884e-07, + "loss": 2.0755, + "step": 25849 + }, + { + "epoch": 0.86, + "grad_norm": 0.7326157093048096, + "learning_rate": 9.832082142425836e-07, + "loss": 1.9071, + "step": 25850 + }, + { + "epoch": 0.86, + "grad_norm": 0.7745359539985657, + "learning_rate": 9.827486642497064e-07, + "loss": 2.1483, + "step": 25851 + }, + { + "epoch": 0.86, + "grad_norm": 0.7388675808906555, + "learning_rate": 9.822892161284525e-07, + "loss": 2.1346, + "step": 25852 + }, + { + "epoch": 0.86, + "grad_norm": 0.7546560168266296, + "learning_rate": 9.81829869884008e-07, + "loss": 2.0854, + "step": 25853 + }, + { + "epoch": 0.86, + "grad_norm": 0.727989912033081, + "learning_rate": 9.81370625521566e-07, + "loss": 2.0088, + "step": 25854 + }, + { + "epoch": 0.86, + "grad_norm": 0.7250304818153381, + "learning_rate": 9.809114830463118e-07, + "loss": 2.0199, + "step": 25855 + }, + { + "epoch": 0.86, + "grad_norm": 0.7670961618423462, + "learning_rate": 9.804524424634355e-07, + "loss": 2.139, + "step": 25856 + }, + { + "epoch": 0.86, + "grad_norm": 0.7555557489395142, + "learning_rate": 9.799935037781217e-07, + "loss": 2.0057, + "step": 25857 + }, + { + "epoch": 0.86, + "grad_norm": 0.765159010887146, + "learning_rate": 9.79534666995553e-07, + "loss": 1.9568, + "step": 25858 + }, + { + "epoch": 0.86, + "grad_norm": 0.7768719792366028, + "learning_rate": 9.790759321209152e-07, + "loss": 2.0334, + "step": 25859 + }, + { + "epoch": 0.86, + "grad_norm": 0.7363049983978271, + "learning_rate": 9.786172991593934e-07, + "loss": 2.0006, + "step": 25860 + }, + { + "epoch": 0.86, + "grad_norm": 0.7451250553131104, + "learning_rate": 9.781587681161652e-07, + "loss": 2.0176, + "step": 25861 + }, + { + "epoch": 0.86, + "grad_norm": 0.7400102615356445, + "learning_rate": 9.7770033899641e-07, + "loss": 2.0444, + "step": 25862 + }, + { + "epoch": 0.86, + "grad_norm": 0.7256300449371338, + "learning_rate": 9.772420118053117e-07, + "loss": 1.964, + "step": 25863 + }, + { + "epoch": 0.86, + "grad_norm": 0.7720984816551208, + "learning_rate": 9.767837865480434e-07, + "loss": 2.0688, + "step": 25864 + }, + { + "epoch": 0.86, + "grad_norm": 0.7473896741867065, + "learning_rate": 9.763256632297847e-07, + "loss": 2.0841, + "step": 25865 + }, + { + "epoch": 0.86, + "grad_norm": 0.7628147006034851, + "learning_rate": 9.7586764185571e-07, + "loss": 2.0016, + "step": 25866 + }, + { + "epoch": 0.86, + "grad_norm": 0.7686033844947815, + "learning_rate": 9.754097224309934e-07, + "loss": 2.0025, + "step": 25867 + }, + { + "epoch": 0.86, + "grad_norm": 0.7593132853507996, + "learning_rate": 9.749519049608081e-07, + "loss": 2.1048, + "step": 25868 + }, + { + "epoch": 0.86, + "grad_norm": 0.7308956384658813, + "learning_rate": 9.74494189450329e-07, + "loss": 1.9692, + "step": 25869 + }, + { + "epoch": 0.86, + "grad_norm": 0.736372709274292, + "learning_rate": 9.740365759047255e-07, + "loss": 1.9992, + "step": 25870 + }, + { + "epoch": 0.86, + "grad_norm": 0.7395648956298828, + "learning_rate": 9.735790643291644e-07, + "loss": 2.0302, + "step": 25871 + }, + { + "epoch": 0.86, + "grad_norm": 0.7363121509552002, + "learning_rate": 9.731216547288168e-07, + "loss": 2.0476, + "step": 25872 + }, + { + "epoch": 0.86, + "grad_norm": 0.7756577134132385, + "learning_rate": 9.726643471088526e-07, + "loss": 2.0699, + "step": 25873 + }, + { + "epoch": 0.86, + "grad_norm": 0.7218660712242126, + "learning_rate": 9.72207141474437e-07, + "loss": 2.0283, + "step": 25874 + }, + { + "epoch": 0.86, + "grad_norm": 0.7502326369285583, + "learning_rate": 9.71750037830732e-07, + "loss": 2.063, + "step": 25875 + }, + { + "epoch": 0.86, + "grad_norm": 0.7390763163566589, + "learning_rate": 9.712930361829054e-07, + "loss": 2.023, + "step": 25876 + }, + { + "epoch": 0.86, + "grad_norm": 0.7505145072937012, + "learning_rate": 9.708361365361173e-07, + "loss": 2.0673, + "step": 25877 + }, + { + "epoch": 0.86, + "grad_norm": 0.7782769203186035, + "learning_rate": 9.703793388955318e-07, + "loss": 2.1059, + "step": 25878 + }, + { + "epoch": 0.86, + "grad_norm": 0.739403486251831, + "learning_rate": 9.6992264326631e-07, + "loss": 1.936, + "step": 25879 + }, + { + "epoch": 0.86, + "grad_norm": 0.7706981897354126, + "learning_rate": 9.694660496536079e-07, + "loss": 2.0477, + "step": 25880 + }, + { + "epoch": 0.86, + "grad_norm": 0.7154578566551208, + "learning_rate": 9.690095580625859e-07, + "loss": 2.0233, + "step": 25881 + }, + { + "epoch": 0.86, + "grad_norm": 0.7544198036193848, + "learning_rate": 9.68553168498403e-07, + "loss": 2.1212, + "step": 25882 + }, + { + "epoch": 0.86, + "grad_norm": 0.7208190560340881, + "learning_rate": 9.680968809662128e-07, + "loss": 2.0165, + "step": 25883 + }, + { + "epoch": 0.86, + "grad_norm": 0.7738233208656311, + "learning_rate": 9.676406954711693e-07, + "loss": 2.0242, + "step": 25884 + }, + { + "epoch": 0.86, + "grad_norm": 0.7672053575515747, + "learning_rate": 9.671846120184303e-07, + "loss": 2.0341, + "step": 25885 + }, + { + "epoch": 0.86, + "grad_norm": 0.7381328344345093, + "learning_rate": 9.667286306131429e-07, + "loss": 2.0451, + "step": 25886 + }, + { + "epoch": 0.86, + "grad_norm": 0.7485707998275757, + "learning_rate": 9.662727512604642e-07, + "loss": 2.0537, + "step": 25887 + }, + { + "epoch": 0.86, + "grad_norm": 0.7471725940704346, + "learning_rate": 9.658169739655388e-07, + "loss": 2.0671, + "step": 25888 + }, + { + "epoch": 0.86, + "grad_norm": 0.7442446947097778, + "learning_rate": 9.653612987335215e-07, + "loss": 1.9365, + "step": 25889 + }, + { + "epoch": 0.86, + "grad_norm": 0.7283656001091003, + "learning_rate": 9.649057255695549e-07, + "loss": 2.0293, + "step": 25890 + }, + { + "epoch": 0.86, + "grad_norm": 0.7624244093894958, + "learning_rate": 9.644502544787905e-07, + "loss": 2.1114, + "step": 25891 + }, + { + "epoch": 0.86, + "grad_norm": 0.7553475499153137, + "learning_rate": 9.639948854663717e-07, + "loss": 2.0159, + "step": 25892 + }, + { + "epoch": 0.86, + "grad_norm": 0.7623302936553955, + "learning_rate": 9.635396185374412e-07, + "loss": 2.0533, + "step": 25893 + }, + { + "epoch": 0.86, + "grad_norm": 0.7714210748672485, + "learning_rate": 9.630844536971451e-07, + "loss": 2.0709, + "step": 25894 + }, + { + "epoch": 0.86, + "grad_norm": 0.7370160818099976, + "learning_rate": 9.626293909506257e-07, + "loss": 2.0286, + "step": 25895 + }, + { + "epoch": 0.86, + "grad_norm": 0.7419747710227966, + "learning_rate": 9.621744303030223e-07, + "loss": 2.0401, + "step": 25896 + }, + { + "epoch": 0.86, + "grad_norm": 0.750767707824707, + "learning_rate": 9.617195717594751e-07, + "loss": 2.0733, + "step": 25897 + }, + { + "epoch": 0.86, + "grad_norm": 0.7311986088752747, + "learning_rate": 9.612648153251236e-07, + "loss": 2.0728, + "step": 25898 + }, + { + "epoch": 0.86, + "grad_norm": 0.76406329870224, + "learning_rate": 9.608101610051045e-07, + "loss": 2.0218, + "step": 25899 + }, + { + "epoch": 0.86, + "grad_norm": 0.7233788967132568, + "learning_rate": 9.603556088045562e-07, + "loss": 2.0704, + "step": 25900 + }, + { + "epoch": 0.86, + "grad_norm": 0.7302941679954529, + "learning_rate": 9.599011587286123e-07, + "loss": 2.0494, + "step": 25901 + }, + { + "epoch": 0.86, + "grad_norm": 0.7442104816436768, + "learning_rate": 9.59446810782405e-07, + "loss": 2.0337, + "step": 25902 + }, + { + "epoch": 0.86, + "grad_norm": 0.7410232424736023, + "learning_rate": 9.589925649710697e-07, + "loss": 2.0261, + "step": 25903 + }, + { + "epoch": 0.86, + "grad_norm": 0.7334192991256714, + "learning_rate": 9.585384212997395e-07, + "loss": 2.0647, + "step": 25904 + }, + { + "epoch": 0.86, + "grad_norm": 0.7352466583251953, + "learning_rate": 9.580843797735428e-07, + "loss": 2.0479, + "step": 25905 + }, + { + "epoch": 0.86, + "grad_norm": 0.7660627961158752, + "learning_rate": 9.57630440397609e-07, + "loss": 2.1313, + "step": 25906 + }, + { + "epoch": 0.86, + "grad_norm": 0.7452356219291687, + "learning_rate": 9.571766031770691e-07, + "loss": 2.0611, + "step": 25907 + }, + { + "epoch": 0.86, + "grad_norm": 0.7804729342460632, + "learning_rate": 9.567228681170447e-07, + "loss": 2.006, + "step": 25908 + }, + { + "epoch": 0.86, + "grad_norm": 0.7775230407714844, + "learning_rate": 9.562692352226688e-07, + "loss": 2.1375, + "step": 25909 + }, + { + "epoch": 0.86, + "grad_norm": 0.7514261603355408, + "learning_rate": 9.558157044990612e-07, + "loss": 1.9799, + "step": 25910 + }, + { + "epoch": 0.86, + "grad_norm": 0.7650182843208313, + "learning_rate": 9.55362275951348e-07, + "loss": 2.0262, + "step": 25911 + }, + { + "epoch": 0.86, + "grad_norm": 0.7443432211875916, + "learning_rate": 9.549089495846509e-07, + "loss": 2.0577, + "step": 25912 + }, + { + "epoch": 0.86, + "grad_norm": 0.7523942589759827, + "learning_rate": 9.54455725404092e-07, + "loss": 2.0983, + "step": 25913 + }, + { + "epoch": 0.86, + "grad_norm": 0.7565601468086243, + "learning_rate": 9.54002603414792e-07, + "loss": 2.0746, + "step": 25914 + }, + { + "epoch": 0.86, + "grad_norm": 0.7297800779342651, + "learning_rate": 9.535495836218666e-07, + "loss": 2.0166, + "step": 25915 + }, + { + "epoch": 0.86, + "grad_norm": 0.7498037219047546, + "learning_rate": 9.530966660304363e-07, + "loss": 2.0344, + "step": 25916 + }, + { + "epoch": 0.86, + "grad_norm": 0.7590672969818115, + "learning_rate": 9.526438506456204e-07, + "loss": 2.0668, + "step": 25917 + }, + { + "epoch": 0.86, + "grad_norm": 0.7453739047050476, + "learning_rate": 9.521911374725312e-07, + "loss": 2.1032, + "step": 25918 + }, + { + "epoch": 0.86, + "grad_norm": 0.7258577942848206, + "learning_rate": 9.517385265162826e-07, + "loss": 2.0448, + "step": 25919 + }, + { + "epoch": 0.86, + "grad_norm": 0.7842622995376587, + "learning_rate": 9.512860177819916e-07, + "loss": 2.0296, + "step": 25920 + }, + { + "epoch": 0.86, + "grad_norm": 0.7398545742034912, + "learning_rate": 9.508336112747651e-07, + "loss": 2.0571, + "step": 25921 + }, + { + "epoch": 0.86, + "grad_norm": 0.7325178980827332, + "learning_rate": 9.50381306999718e-07, + "loss": 2.0263, + "step": 25922 + }, + { + "epoch": 0.86, + "grad_norm": 0.7357069849967957, + "learning_rate": 9.49929104961963e-07, + "loss": 2.0517, + "step": 25923 + }, + { + "epoch": 0.86, + "grad_norm": 0.7440596222877502, + "learning_rate": 9.494770051666013e-07, + "loss": 2.0202, + "step": 25924 + }, + { + "epoch": 0.86, + "grad_norm": 0.7203040719032288, + "learning_rate": 9.490250076187446e-07, + "loss": 1.9925, + "step": 25925 + }, + { + "epoch": 0.86, + "grad_norm": 0.7143058776855469, + "learning_rate": 9.485731123234998e-07, + "loss": 2.0501, + "step": 25926 + }, + { + "epoch": 0.86, + "grad_norm": 0.7274224162101746, + "learning_rate": 9.481213192859717e-07, + "loss": 1.9855, + "step": 25927 + }, + { + "epoch": 0.86, + "grad_norm": 0.7742795348167419, + "learning_rate": 9.476696285112629e-07, + "loss": 2.0799, + "step": 25928 + }, + { + "epoch": 0.86, + "grad_norm": 0.7491094470024109, + "learning_rate": 9.472180400044784e-07, + "loss": 2.0947, + "step": 25929 + }, + { + "epoch": 0.86, + "grad_norm": 0.7620345950126648, + "learning_rate": 9.467665537707182e-07, + "loss": 2.0564, + "step": 25930 + }, + { + "epoch": 0.86, + "grad_norm": 0.7194277048110962, + "learning_rate": 9.46315169815084e-07, + "loss": 2.0973, + "step": 25931 + }, + { + "epoch": 0.86, + "grad_norm": 0.7297775149345398, + "learning_rate": 9.45863888142674e-07, + "loss": 1.9733, + "step": 25932 + }, + { + "epoch": 0.86, + "grad_norm": 0.8071901202201843, + "learning_rate": 9.454127087585896e-07, + "loss": 2.0795, + "step": 25933 + }, + { + "epoch": 0.86, + "grad_norm": 0.7592566013336182, + "learning_rate": 9.449616316679244e-07, + "loss": 2.0599, + "step": 25934 + }, + { + "epoch": 0.86, + "grad_norm": 0.7243884205818176, + "learning_rate": 9.445106568757778e-07, + "loss": 1.9771, + "step": 25935 + }, + { + "epoch": 0.86, + "grad_norm": 0.7671956419944763, + "learning_rate": 9.440597843872423e-07, + "loss": 2.0919, + "step": 25936 + }, + { + "epoch": 0.86, + "grad_norm": 0.7702879905700684, + "learning_rate": 9.436090142074095e-07, + "loss": 2.1229, + "step": 25937 + }, + { + "epoch": 0.86, + "grad_norm": 0.7729118466377258, + "learning_rate": 9.431583463413752e-07, + "loss": 2.0515, + "step": 25938 + }, + { + "epoch": 0.86, + "grad_norm": 0.7131680250167847, + "learning_rate": 9.427077807942319e-07, + "loss": 1.9665, + "step": 25939 + }, + { + "epoch": 0.86, + "grad_norm": 0.7983318567276001, + "learning_rate": 9.422573175710681e-07, + "loss": 2.0788, + "step": 25940 + }, + { + "epoch": 0.86, + "grad_norm": 0.7479981184005737, + "learning_rate": 9.418069566769717e-07, + "loss": 2.0392, + "step": 25941 + }, + { + "epoch": 0.86, + "grad_norm": 0.7848258018493652, + "learning_rate": 9.413566981170319e-07, + "loss": 2.0931, + "step": 25942 + }, + { + "epoch": 0.86, + "grad_norm": 0.7295976877212524, + "learning_rate": 9.409065418963348e-07, + "loss": 2.0633, + "step": 25943 + }, + { + "epoch": 0.86, + "grad_norm": 0.7395895719528198, + "learning_rate": 9.404564880199652e-07, + "loss": 2.1118, + "step": 25944 + }, + { + "epoch": 0.86, + "grad_norm": 0.7482845783233643, + "learning_rate": 9.400065364930133e-07, + "loss": 2.0222, + "step": 25945 + }, + { + "epoch": 0.86, + "grad_norm": 0.7485872507095337, + "learning_rate": 9.395566873205542e-07, + "loss": 2.0474, + "step": 25946 + }, + { + "epoch": 0.86, + "grad_norm": 0.7452778220176697, + "learning_rate": 9.391069405076736e-07, + "loss": 2.042, + "step": 25947 + }, + { + "epoch": 0.86, + "grad_norm": 0.7633265256881714, + "learning_rate": 9.386572960594542e-07, + "loss": 2.0135, + "step": 25948 + }, + { + "epoch": 0.86, + "grad_norm": 0.7407006621360779, + "learning_rate": 9.382077539809742e-07, + "loss": 2.0604, + "step": 25949 + }, + { + "epoch": 0.86, + "grad_norm": 0.7550188899040222, + "learning_rate": 9.377583142773106e-07, + "loss": 2.094, + "step": 25950 + }, + { + "epoch": 0.86, + "grad_norm": 0.7337163686752319, + "learning_rate": 9.373089769535437e-07, + "loss": 2.0062, + "step": 25951 + }, + { + "epoch": 0.86, + "grad_norm": 0.7670320272445679, + "learning_rate": 9.368597420147474e-07, + "loss": 1.9772, + "step": 25952 + }, + { + "epoch": 0.86, + "grad_norm": 0.7380965948104858, + "learning_rate": 9.364106094659986e-07, + "loss": 1.997, + "step": 25953 + }, + { + "epoch": 0.86, + "grad_norm": 0.740817666053772, + "learning_rate": 9.359615793123722e-07, + "loss": 2.0461, + "step": 25954 + }, + { + "epoch": 0.86, + "grad_norm": 0.7396414875984192, + "learning_rate": 9.355126515589408e-07, + "loss": 2.0321, + "step": 25955 + }, + { + "epoch": 0.86, + "grad_norm": 0.7441156506538391, + "learning_rate": 9.350638262107725e-07, + "loss": 1.985, + "step": 25956 + }, + { + "epoch": 0.86, + "grad_norm": 0.8011876344680786, + "learning_rate": 9.3461510327294e-07, + "loss": 2.0597, + "step": 25957 + }, + { + "epoch": 0.86, + "grad_norm": 0.7383939623832703, + "learning_rate": 9.341664827505182e-07, + "loss": 2.0736, + "step": 25958 + }, + { + "epoch": 0.86, + "grad_norm": 0.7388496994972229, + "learning_rate": 9.337179646485661e-07, + "loss": 1.9517, + "step": 25959 + }, + { + "epoch": 0.86, + "grad_norm": 0.7420719265937805, + "learning_rate": 9.332695489721555e-07, + "loss": 2.0257, + "step": 25960 + }, + { + "epoch": 0.86, + "grad_norm": 0.7358705997467041, + "learning_rate": 9.328212357263533e-07, + "loss": 2.0545, + "step": 25961 + }, + { + "epoch": 0.86, + "grad_norm": 0.7533799409866333, + "learning_rate": 9.323730249162221e-07, + "loss": 2.0047, + "step": 25962 + }, + { + "epoch": 0.86, + "grad_norm": 0.7479290962219238, + "learning_rate": 9.319249165468258e-07, + "loss": 2.0478, + "step": 25963 + }, + { + "epoch": 0.86, + "grad_norm": 0.7249380946159363, + "learning_rate": 9.31476910623228e-07, + "loss": 2.0365, + "step": 25964 + }, + { + "epoch": 0.86, + "grad_norm": 0.7362590432167053, + "learning_rate": 9.31029007150489e-07, + "loss": 2.013, + "step": 25965 + }, + { + "epoch": 0.86, + "grad_norm": 0.7964341044425964, + "learning_rate": 9.305812061336683e-07, + "loss": 2.0904, + "step": 25966 + }, + { + "epoch": 0.86, + "grad_norm": 0.7415114641189575, + "learning_rate": 9.301335075778295e-07, + "loss": 2.0732, + "step": 25967 + }, + { + "epoch": 0.86, + "grad_norm": 0.7472359538078308, + "learning_rate": 9.296859114880241e-07, + "loss": 2.0096, + "step": 25968 + }, + { + "epoch": 0.86, + "grad_norm": 0.778104305267334, + "learning_rate": 9.292384178693104e-07, + "loss": 2.0436, + "step": 25969 + }, + { + "epoch": 0.86, + "grad_norm": 0.7336345314979553, + "learning_rate": 9.287910267267474e-07, + "loss": 2.0037, + "step": 25970 + }, + { + "epoch": 0.86, + "grad_norm": 0.7477326989173889, + "learning_rate": 9.283437380653881e-07, + "loss": 2.0689, + "step": 25971 + }, + { + "epoch": 0.86, + "grad_norm": 0.7556303143501282, + "learning_rate": 9.278965518902816e-07, + "loss": 2.0544, + "step": 25972 + }, + { + "epoch": 0.86, + "grad_norm": 0.7354278564453125, + "learning_rate": 9.274494682064839e-07, + "loss": 2.0762, + "step": 25973 + }, + { + "epoch": 0.86, + "grad_norm": 0.742210328578949, + "learning_rate": 9.270024870190464e-07, + "loss": 1.9872, + "step": 25974 + }, + { + "epoch": 0.86, + "grad_norm": 0.7174685001373291, + "learning_rate": 9.265556083330152e-07, + "loss": 2.0545, + "step": 25975 + }, + { + "epoch": 0.86, + "grad_norm": 0.7708245515823364, + "learning_rate": 9.261088321534439e-07, + "loss": 2.083, + "step": 25976 + }, + { + "epoch": 0.86, + "grad_norm": 0.7628448009490967, + "learning_rate": 9.256621584853764e-07, + "loss": 2.0397, + "step": 25977 + }, + { + "epoch": 0.86, + "grad_norm": 0.724772036075592, + "learning_rate": 9.252155873338586e-07, + "loss": 1.9871, + "step": 25978 + }, + { + "epoch": 0.86, + "grad_norm": 0.7278578281402588, + "learning_rate": 9.247691187039365e-07, + "loss": 2.0186, + "step": 25979 + }, + { + "epoch": 0.86, + "grad_norm": 0.7602989673614502, + "learning_rate": 9.243227526006582e-07, + "loss": 2.0489, + "step": 25980 + }, + { + "epoch": 0.86, + "grad_norm": 0.747604250907898, + "learning_rate": 9.238764890290585e-07, + "loss": 2.0594, + "step": 25981 + }, + { + "epoch": 0.86, + "grad_norm": 0.7269222140312195, + "learning_rate": 9.234303279941837e-07, + "loss": 2.0494, + "step": 25982 + }, + { + "epoch": 0.86, + "grad_norm": 0.7612643837928772, + "learning_rate": 9.229842695010749e-07, + "loss": 2.0512, + "step": 25983 + }, + { + "epoch": 0.86, + "grad_norm": 0.7487745881080627, + "learning_rate": 9.225383135547683e-07, + "loss": 2.0949, + "step": 25984 + }, + { + "epoch": 0.86, + "grad_norm": 0.7577506303787231, + "learning_rate": 9.220924601603065e-07, + "loss": 2.0275, + "step": 25985 + }, + { + "epoch": 0.86, + "grad_norm": 0.7573544383049011, + "learning_rate": 9.216467093227233e-07, + "loss": 2.0131, + "step": 25986 + }, + { + "epoch": 0.86, + "grad_norm": 0.7388461232185364, + "learning_rate": 9.212010610470534e-07, + "loss": 2.1298, + "step": 25987 + }, + { + "epoch": 0.86, + "grad_norm": 0.7494735717773438, + "learning_rate": 9.207555153383329e-07, + "loss": 2.0481, + "step": 25988 + }, + { + "epoch": 0.86, + "grad_norm": 0.7471747994422913, + "learning_rate": 9.203100722015979e-07, + "loss": 2.0191, + "step": 25989 + }, + { + "epoch": 0.86, + "grad_norm": 0.7673196792602539, + "learning_rate": 9.198647316418785e-07, + "loss": 2.1281, + "step": 25990 + }, + { + "epoch": 0.86, + "grad_norm": 0.7598997950553894, + "learning_rate": 9.194194936642042e-07, + "loss": 1.991, + "step": 25991 + }, + { + "epoch": 0.86, + "grad_norm": 0.7313405871391296, + "learning_rate": 9.189743582736077e-07, + "loss": 2.0613, + "step": 25992 + }, + { + "epoch": 0.86, + "grad_norm": 0.7763650417327881, + "learning_rate": 9.185293254751182e-07, + "loss": 2.035, + "step": 25993 + }, + { + "epoch": 0.86, + "grad_norm": 0.7444823384284973, + "learning_rate": 9.180843952737594e-07, + "loss": 2.0085, + "step": 25994 + }, + { + "epoch": 0.86, + "grad_norm": 0.7560100555419922, + "learning_rate": 9.176395676745608e-07, + "loss": 1.9957, + "step": 25995 + }, + { + "epoch": 0.86, + "grad_norm": 0.7714390158653259, + "learning_rate": 9.171948426825494e-07, + "loss": 2.0559, + "step": 25996 + }, + { + "epoch": 0.86, + "grad_norm": 0.7365151643753052, + "learning_rate": 9.167502203027457e-07, + "loss": 2.0816, + "step": 25997 + }, + { + "epoch": 0.86, + "grad_norm": 0.783757209777832, + "learning_rate": 9.163057005401766e-07, + "loss": 2.0629, + "step": 25998 + }, + { + "epoch": 0.86, + "grad_norm": 0.7707266211509705, + "learning_rate": 9.158612833998614e-07, + "loss": 1.9864, + "step": 25999 + }, + { + "epoch": 0.87, + "grad_norm": 0.7445627450942993, + "learning_rate": 9.154169688868208e-07, + "loss": 2.0197, + "step": 26000 + }, + { + "epoch": 0.87, + "grad_norm": 0.7357868552207947, + "learning_rate": 9.149727570060751e-07, + "loss": 1.9896, + "step": 26001 + }, + { + "epoch": 0.87, + "grad_norm": 0.7368590235710144, + "learning_rate": 9.145286477626458e-07, + "loss": 2.0328, + "step": 26002 + }, + { + "epoch": 0.87, + "grad_norm": 0.7526722550392151, + "learning_rate": 9.140846411615445e-07, + "loss": 2.011, + "step": 26003 + }, + { + "epoch": 0.87, + "grad_norm": 0.7341996431350708, + "learning_rate": 9.136407372077894e-07, + "loss": 2.0538, + "step": 26004 + }, + { + "epoch": 0.87, + "grad_norm": 0.7241879105567932, + "learning_rate": 9.131969359063986e-07, + "loss": 2.0562, + "step": 26005 + }, + { + "epoch": 0.87, + "grad_norm": 0.7429711222648621, + "learning_rate": 9.127532372623804e-07, + "loss": 2.0869, + "step": 26006 + }, + { + "epoch": 0.87, + "grad_norm": 0.7392231225967407, + "learning_rate": 9.123096412807531e-07, + "loss": 2.0512, + "step": 26007 + }, + { + "epoch": 0.87, + "grad_norm": 0.8000741600990295, + "learning_rate": 9.118661479665258e-07, + "loss": 2.0234, + "step": 26008 + }, + { + "epoch": 0.87, + "grad_norm": 0.7803536653518677, + "learning_rate": 9.114227573247059e-07, + "loss": 2.0425, + "step": 26009 + }, + { + "epoch": 0.87, + "grad_norm": 0.7460272908210754, + "learning_rate": 9.109794693603058e-07, + "loss": 2.0104, + "step": 26010 + }, + { + "epoch": 0.87, + "grad_norm": 0.7896559834480286, + "learning_rate": 9.105362840783349e-07, + "loss": 2.0597, + "step": 26011 + }, + { + "epoch": 0.87, + "grad_norm": 0.7576748728752136, + "learning_rate": 9.100932014837982e-07, + "loss": 2.07, + "step": 26012 + }, + { + "epoch": 0.87, + "grad_norm": 0.765677809715271, + "learning_rate": 9.096502215816994e-07, + "loss": 1.9985, + "step": 26013 + }, + { + "epoch": 0.87, + "grad_norm": 0.7342962026596069, + "learning_rate": 9.092073443770466e-07, + "loss": 2.01, + "step": 26014 + }, + { + "epoch": 0.87, + "grad_norm": 0.7366447448730469, + "learning_rate": 9.087645698748393e-07, + "loss": 2.0426, + "step": 26015 + }, + { + "epoch": 0.87, + "grad_norm": 0.7369386553764343, + "learning_rate": 9.083218980800845e-07, + "loss": 2.0283, + "step": 26016 + }, + { + "epoch": 0.87, + "grad_norm": 0.7423820495605469, + "learning_rate": 9.078793289977783e-07, + "loss": 2.0624, + "step": 26017 + }, + { + "epoch": 0.87, + "grad_norm": 0.7445455193519592, + "learning_rate": 9.074368626329255e-07, + "loss": 2.0795, + "step": 26018 + }, + { + "epoch": 0.87, + "grad_norm": 0.7274477481842041, + "learning_rate": 9.069944989905199e-07, + "loss": 2.1314, + "step": 26019 + }, + { + "epoch": 0.87, + "grad_norm": 0.7674857378005981, + "learning_rate": 9.065522380755632e-07, + "loss": 2.0815, + "step": 26020 + }, + { + "epoch": 0.87, + "grad_norm": 0.7458963990211487, + "learning_rate": 9.0611007989305e-07, + "loss": 2.0525, + "step": 26021 + }, + { + "epoch": 0.87, + "grad_norm": 0.7949268817901611, + "learning_rate": 9.056680244479732e-07, + "loss": 2.0891, + "step": 26022 + }, + { + "epoch": 0.87, + "grad_norm": 0.7394000887870789, + "learning_rate": 9.052260717453299e-07, + "loss": 2.0723, + "step": 26023 + }, + { + "epoch": 0.87, + "grad_norm": 0.746723473072052, + "learning_rate": 9.04784221790116e-07, + "loss": 2.0797, + "step": 26024 + }, + { + "epoch": 0.87, + "grad_norm": 0.7523587942123413, + "learning_rate": 9.043424745873164e-07, + "loss": 1.9896, + "step": 26025 + }, + { + "epoch": 0.87, + "grad_norm": 0.7480191588401794, + "learning_rate": 9.03900830141925e-07, + "loss": 2.0271, + "step": 26026 + }, + { + "epoch": 0.87, + "grad_norm": 0.7278357148170471, + "learning_rate": 9.034592884589321e-07, + "loss": 2.0142, + "step": 26027 + }, + { + "epoch": 0.87, + "grad_norm": 0.7568182945251465, + "learning_rate": 9.030178495433239e-07, + "loss": 2.0695, + "step": 26028 + }, + { + "epoch": 0.87, + "grad_norm": 0.7750005125999451, + "learning_rate": 9.025765134000896e-07, + "loss": 2.043, + "step": 26029 + }, + { + "epoch": 0.87, + "grad_norm": 0.7172654867172241, + "learning_rate": 9.021352800342153e-07, + "loss": 2.0281, + "step": 26030 + }, + { + "epoch": 0.87, + "grad_norm": 0.7657871246337891, + "learning_rate": 9.016941494506826e-07, + "loss": 2.0212, + "step": 26031 + }, + { + "epoch": 0.87, + "grad_norm": 0.7468778491020203, + "learning_rate": 9.012531216544773e-07, + "loss": 2.0757, + "step": 26032 + }, + { + "epoch": 0.87, + "grad_norm": 0.7230513095855713, + "learning_rate": 9.008121966505834e-07, + "loss": 2.0772, + "step": 26033 + }, + { + "epoch": 0.87, + "grad_norm": 0.7486144304275513, + "learning_rate": 9.003713744439802e-07, + "loss": 1.9658, + "step": 26034 + }, + { + "epoch": 0.87, + "grad_norm": 0.751187801361084, + "learning_rate": 8.99930655039648e-07, + "loss": 2.0811, + "step": 26035 + }, + { + "epoch": 0.87, + "grad_norm": 0.7990497350692749, + "learning_rate": 8.994900384425665e-07, + "loss": 2.0311, + "step": 26036 + }, + { + "epoch": 0.87, + "grad_norm": 0.748857319355011, + "learning_rate": 8.990495246577125e-07, + "loss": 2.0092, + "step": 26037 + }, + { + "epoch": 0.87, + "grad_norm": 0.7557612657546997, + "learning_rate": 8.986091136900643e-07, + "loss": 2.0454, + "step": 26038 + }, + { + "epoch": 0.87, + "grad_norm": 0.7426319718360901, + "learning_rate": 8.981688055445947e-07, + "loss": 2.0857, + "step": 26039 + }, + { + "epoch": 0.87, + "grad_norm": 0.744658887386322, + "learning_rate": 8.977286002262808e-07, + "loss": 2.0048, + "step": 26040 + }, + { + "epoch": 0.87, + "grad_norm": 0.7669386863708496, + "learning_rate": 8.972884977400941e-07, + "loss": 2.0881, + "step": 26041 + }, + { + "epoch": 0.87, + "grad_norm": 0.7294650673866272, + "learning_rate": 8.968484980910086e-07, + "loss": 1.9825, + "step": 26042 + }, + { + "epoch": 0.87, + "grad_norm": 0.7116085886955261, + "learning_rate": 8.964086012839934e-07, + "loss": 2.0087, + "step": 26043 + }, + { + "epoch": 0.87, + "grad_norm": 0.7089520692825317, + "learning_rate": 8.959688073240169e-07, + "loss": 1.9918, + "step": 26044 + }, + { + "epoch": 0.87, + "grad_norm": 0.7657942175865173, + "learning_rate": 8.955291162160507e-07, + "loss": 2.0619, + "step": 26045 + }, + { + "epoch": 0.87, + "grad_norm": 0.7596400380134583, + "learning_rate": 8.950895279650607e-07, + "loss": 2.126, + "step": 26046 + }, + { + "epoch": 0.87, + "grad_norm": 0.7539043426513672, + "learning_rate": 8.946500425760141e-07, + "loss": 2.1561, + "step": 26047 + }, + { + "epoch": 0.87, + "grad_norm": 0.7769302129745483, + "learning_rate": 8.942106600538736e-07, + "loss": 2.1091, + "step": 26048 + }, + { + "epoch": 0.87, + "grad_norm": 0.7355259656906128, + "learning_rate": 8.937713804036052e-07, + "loss": 1.9492, + "step": 26049 + }, + { + "epoch": 0.87, + "grad_norm": 0.7223173379898071, + "learning_rate": 8.933322036301706e-07, + "loss": 2.0049, + "step": 26050 + }, + { + "epoch": 0.87, + "grad_norm": 0.7520372271537781, + "learning_rate": 8.928931297385324e-07, + "loss": 2.0237, + "step": 26051 + }, + { + "epoch": 0.87, + "grad_norm": 0.7419622540473938, + "learning_rate": 8.92454158733651e-07, + "loss": 2.0397, + "step": 26052 + }, + { + "epoch": 0.87, + "grad_norm": 0.750339150428772, + "learning_rate": 8.920152906204826e-07, + "loss": 1.9908, + "step": 26053 + }, + { + "epoch": 0.87, + "grad_norm": 0.7568366527557373, + "learning_rate": 8.915765254039888e-07, + "loss": 2.1255, + "step": 26054 + }, + { + "epoch": 0.87, + "grad_norm": 0.7394478917121887, + "learning_rate": 8.911378630891266e-07, + "loss": 1.9806, + "step": 26055 + }, + { + "epoch": 0.87, + "grad_norm": 0.7275696396827698, + "learning_rate": 8.906993036808498e-07, + "loss": 2.0212, + "step": 26056 + }, + { + "epoch": 0.87, + "grad_norm": 0.7336542010307312, + "learning_rate": 8.902608471841123e-07, + "loss": 1.999, + "step": 26057 + }, + { + "epoch": 0.87, + "grad_norm": 0.7188431024551392, + "learning_rate": 8.898224936038691e-07, + "loss": 2.0794, + "step": 26058 + }, + { + "epoch": 0.87, + "grad_norm": 0.7493748664855957, + "learning_rate": 8.893842429450739e-07, + "loss": 2.0353, + "step": 26059 + }, + { + "epoch": 0.87, + "grad_norm": 0.7361557483673096, + "learning_rate": 8.889460952126761e-07, + "loss": 2.0286, + "step": 26060 + }, + { + "epoch": 0.87, + "grad_norm": 0.7839362025260925, + "learning_rate": 8.885080504116239e-07, + "loss": 2.0065, + "step": 26061 + }, + { + "epoch": 0.87, + "grad_norm": 0.7731884121894836, + "learning_rate": 8.880701085468701e-07, + "loss": 2.0355, + "step": 26062 + }, + { + "epoch": 0.87, + "grad_norm": 0.7372866868972778, + "learning_rate": 8.876322696233574e-07, + "loss": 2.0836, + "step": 26063 + }, + { + "epoch": 0.87, + "grad_norm": 0.7773445248603821, + "learning_rate": 8.871945336460375e-07, + "loss": 2.0786, + "step": 26064 + }, + { + "epoch": 0.87, + "grad_norm": 0.7269904613494873, + "learning_rate": 8.867569006198528e-07, + "loss": 1.9948, + "step": 26065 + }, + { + "epoch": 0.87, + "grad_norm": 0.764481782913208, + "learning_rate": 8.863193705497464e-07, + "loss": 2.0884, + "step": 26066 + }, + { + "epoch": 0.87, + "grad_norm": 0.73249351978302, + "learning_rate": 8.85881943440663e-07, + "loss": 2.0115, + "step": 26067 + }, + { + "epoch": 0.87, + "grad_norm": 0.7192801833152771, + "learning_rate": 8.854446192975441e-07, + "loss": 2.0556, + "step": 26068 + }, + { + "epoch": 0.87, + "grad_norm": 0.7057031989097595, + "learning_rate": 8.850073981253315e-07, + "loss": 1.9782, + "step": 26069 + }, + { + "epoch": 0.87, + "grad_norm": 0.7374588251113892, + "learning_rate": 8.845702799289613e-07, + "loss": 1.9507, + "step": 26070 + }, + { + "epoch": 0.87, + "grad_norm": 0.733747661113739, + "learning_rate": 8.84133264713376e-07, + "loss": 2.0647, + "step": 26071 + }, + { + "epoch": 0.87, + "grad_norm": 0.7654053568840027, + "learning_rate": 8.836963524835085e-07, + "loss": 2.0836, + "step": 26072 + }, + { + "epoch": 0.87, + "grad_norm": 0.7582880258560181, + "learning_rate": 8.832595432442992e-07, + "loss": 2.0838, + "step": 26073 + }, + { + "epoch": 0.87, + "grad_norm": 0.754362165927887, + "learning_rate": 8.828228370006786e-07, + "loss": 1.9989, + "step": 26074 + }, + { + "epoch": 0.87, + "grad_norm": 0.7118877172470093, + "learning_rate": 8.823862337575839e-07, + "loss": 2.012, + "step": 26075 + }, + { + "epoch": 0.87, + "grad_norm": 0.7350810170173645, + "learning_rate": 8.819497335199445e-07, + "loss": 2.0774, + "step": 26076 + }, + { + "epoch": 0.87, + "grad_norm": 0.7620537281036377, + "learning_rate": 8.815133362926953e-07, + "loss": 1.9649, + "step": 26077 + }, + { + "epoch": 0.87, + "grad_norm": 0.7511849403381348, + "learning_rate": 8.810770420807647e-07, + "loss": 2.0153, + "step": 26078 + }, + { + "epoch": 0.87, + "grad_norm": 0.7317536473274231, + "learning_rate": 8.806408508890796e-07, + "loss": 1.9758, + "step": 26079 + }, + { + "epoch": 0.87, + "grad_norm": 0.7334953546524048, + "learning_rate": 8.802047627225685e-07, + "loss": 2.0397, + "step": 26080 + }, + { + "epoch": 0.87, + "grad_norm": 0.7556837201118469, + "learning_rate": 8.797687775861619e-07, + "loss": 2.0181, + "step": 26081 + }, + { + "epoch": 0.87, + "grad_norm": 0.7242208123207092, + "learning_rate": 8.793328954847835e-07, + "loss": 1.9877, + "step": 26082 + }, + { + "epoch": 0.87, + "grad_norm": 0.7341518402099609, + "learning_rate": 8.788971164233539e-07, + "loss": 2.0684, + "step": 26083 + }, + { + "epoch": 0.87, + "grad_norm": 0.7540560364723206, + "learning_rate": 8.784614404068015e-07, + "loss": 2.0349, + "step": 26084 + }, + { + "epoch": 0.87, + "grad_norm": 0.7437508702278137, + "learning_rate": 8.780258674400433e-07, + "loss": 1.9549, + "step": 26085 + }, + { + "epoch": 0.87, + "grad_norm": 0.7480018138885498, + "learning_rate": 8.775903975280054e-07, + "loss": 2.0204, + "step": 26086 + }, + { + "epoch": 0.87, + "grad_norm": 0.7488433122634888, + "learning_rate": 8.771550306756049e-07, + "loss": 2.1124, + "step": 26087 + }, + { + "epoch": 0.87, + "grad_norm": 0.7354269623756409, + "learning_rate": 8.767197668877592e-07, + "loss": 2.0443, + "step": 26088 + }, + { + "epoch": 0.87, + "grad_norm": 0.7319847941398621, + "learning_rate": 8.762846061693852e-07, + "loss": 2.0784, + "step": 26089 + }, + { + "epoch": 0.87, + "grad_norm": 0.7533643245697021, + "learning_rate": 8.758495485254037e-07, + "loss": 2.0749, + "step": 26090 + }, + { + "epoch": 0.87, + "grad_norm": 0.7987291812896729, + "learning_rate": 8.754145939607262e-07, + "loss": 2.1008, + "step": 26091 + }, + { + "epoch": 0.87, + "grad_norm": 0.7291937470436096, + "learning_rate": 8.749797424802664e-07, + "loss": 2.0296, + "step": 26092 + }, + { + "epoch": 0.87, + "grad_norm": 0.7204707860946655, + "learning_rate": 8.745449940889384e-07, + "loss": 2.0139, + "step": 26093 + }, + { + "epoch": 0.87, + "grad_norm": 0.7761337161064148, + "learning_rate": 8.741103487916514e-07, + "loss": 2.0459, + "step": 26094 + }, + { + "epoch": 0.87, + "grad_norm": 0.7239903211593628, + "learning_rate": 8.736758065933193e-07, + "loss": 1.9895, + "step": 26095 + }, + { + "epoch": 0.87, + "grad_norm": 0.7312216758728027, + "learning_rate": 8.732413674988471e-07, + "loss": 1.9608, + "step": 26096 + }, + { + "epoch": 0.87, + "grad_norm": 0.7601781487464905, + "learning_rate": 8.728070315131476e-07, + "loss": 2.0202, + "step": 26097 + }, + { + "epoch": 0.87, + "grad_norm": 0.7548706531524658, + "learning_rate": 8.723727986411235e-07, + "loss": 2.0258, + "step": 26098 + }, + { + "epoch": 0.87, + "grad_norm": 0.7665787935256958, + "learning_rate": 8.71938668887683e-07, + "loss": 2.1145, + "step": 26099 + }, + { + "epoch": 0.87, + "grad_norm": 0.7559893131256104, + "learning_rate": 8.715046422577311e-07, + "loss": 2.0669, + "step": 26100 + }, + { + "epoch": 0.87, + "grad_norm": 0.7597693204879761, + "learning_rate": 8.710707187561674e-07, + "loss": 2.0784, + "step": 26101 + }, + { + "epoch": 0.87, + "grad_norm": 0.761865496635437, + "learning_rate": 8.706368983878965e-07, + "loss": 2.0152, + "step": 26102 + }, + { + "epoch": 0.87, + "grad_norm": 0.7484170794487, + "learning_rate": 8.702031811578216e-07, + "loss": 2.0396, + "step": 26103 + }, + { + "epoch": 0.87, + "grad_norm": 0.7326580882072449, + "learning_rate": 8.697695670708406e-07, + "loss": 2.0098, + "step": 26104 + }, + { + "epoch": 0.87, + "grad_norm": 0.7145528793334961, + "learning_rate": 8.693360561318509e-07, + "loss": 2.0391, + "step": 26105 + }, + { + "epoch": 0.87, + "grad_norm": 0.7399336099624634, + "learning_rate": 8.689026483457519e-07, + "loss": 2.0888, + "step": 26106 + }, + { + "epoch": 0.87, + "grad_norm": 0.750868558883667, + "learning_rate": 8.684693437174385e-07, + "loss": 2.0159, + "step": 26107 + }, + { + "epoch": 0.87, + "grad_norm": 0.7159035205841064, + "learning_rate": 8.680361422518091e-07, + "loss": 2.01, + "step": 26108 + }, + { + "epoch": 0.87, + "grad_norm": 0.7498615980148315, + "learning_rate": 8.676030439537542e-07, + "loss": 1.9582, + "step": 26109 + }, + { + "epoch": 0.87, + "grad_norm": 0.7738543748855591, + "learning_rate": 8.671700488281675e-07, + "loss": 2.0222, + "step": 26110 + }, + { + "epoch": 0.87, + "grad_norm": 0.7434841394424438, + "learning_rate": 8.667371568799399e-07, + "loss": 2.04, + "step": 26111 + }, + { + "epoch": 0.87, + "grad_norm": 0.7654726505279541, + "learning_rate": 8.66304368113966e-07, + "loss": 2.0962, + "step": 26112 + }, + { + "epoch": 0.87, + "grad_norm": 0.7408851981163025, + "learning_rate": 8.658716825351332e-07, + "loss": 1.9954, + "step": 26113 + }, + { + "epoch": 0.87, + "grad_norm": 0.7858261466026306, + "learning_rate": 8.654391001483253e-07, + "loss": 2.0439, + "step": 26114 + }, + { + "epoch": 0.87, + "grad_norm": 0.7424229979515076, + "learning_rate": 8.650066209584363e-07, + "loss": 2.0303, + "step": 26115 + }, + { + "epoch": 0.87, + "grad_norm": 0.7670367956161499, + "learning_rate": 8.645742449703464e-07, + "loss": 2.0298, + "step": 26116 + }, + { + "epoch": 0.87, + "grad_norm": 0.7460410594940186, + "learning_rate": 8.641419721889454e-07, + "loss": 2.0373, + "step": 26117 + }, + { + "epoch": 0.87, + "grad_norm": 0.7535685300827026, + "learning_rate": 8.637098026191115e-07, + "loss": 2.0677, + "step": 26118 + }, + { + "epoch": 0.87, + "grad_norm": 0.7521857619285583, + "learning_rate": 8.632777362657319e-07, + "loss": 1.9949, + "step": 26119 + }, + { + "epoch": 0.87, + "grad_norm": 0.7349094152450562, + "learning_rate": 8.628457731336848e-07, + "loss": 2.0223, + "step": 26120 + }, + { + "epoch": 0.87, + "grad_norm": 0.7516452074050903, + "learning_rate": 8.624139132278519e-07, + "loss": 2.0757, + "step": 26121 + }, + { + "epoch": 0.87, + "grad_norm": 0.733060359954834, + "learning_rate": 8.619821565531128e-07, + "loss": 1.9759, + "step": 26122 + }, + { + "epoch": 0.87, + "grad_norm": 0.7503489255905151, + "learning_rate": 8.615505031143411e-07, + "loss": 2.0838, + "step": 26123 + }, + { + "epoch": 0.87, + "grad_norm": 0.7479836344718933, + "learning_rate": 8.611189529164165e-07, + "loss": 2.0401, + "step": 26124 + }, + { + "epoch": 0.87, + "grad_norm": 0.7196901440620422, + "learning_rate": 8.60687505964215e-07, + "loss": 2.0121, + "step": 26125 + }, + { + "epoch": 0.87, + "grad_norm": 0.7345967292785645, + "learning_rate": 8.602561622626104e-07, + "loss": 2.007, + "step": 26126 + }, + { + "epoch": 0.87, + "grad_norm": 0.738929033279419, + "learning_rate": 8.598249218164745e-07, + "loss": 1.9625, + "step": 26127 + }, + { + "epoch": 0.87, + "grad_norm": 0.7429989576339722, + "learning_rate": 8.593937846306799e-07, + "loss": 2.0564, + "step": 26128 + }, + { + "epoch": 0.87, + "grad_norm": 0.7296868562698364, + "learning_rate": 8.589627507100973e-07, + "loss": 1.9579, + "step": 26129 + }, + { + "epoch": 0.87, + "grad_norm": 0.7939447164535522, + "learning_rate": 8.58531820059596e-07, + "loss": 2.0316, + "step": 26130 + }, + { + "epoch": 0.87, + "grad_norm": 0.7418543696403503, + "learning_rate": 8.581009926840478e-07, + "loss": 2.004, + "step": 26131 + }, + { + "epoch": 0.87, + "grad_norm": 0.7300820350646973, + "learning_rate": 8.576702685883132e-07, + "loss": 2.0828, + "step": 26132 + }, + { + "epoch": 0.87, + "grad_norm": 0.7351304292678833, + "learning_rate": 8.572396477772627e-07, + "loss": 2.0364, + "step": 26133 + }, + { + "epoch": 0.87, + "grad_norm": 0.7615849375724792, + "learning_rate": 8.568091302557613e-07, + "loss": 2.0511, + "step": 26134 + }, + { + "epoch": 0.87, + "grad_norm": 0.7370851039886475, + "learning_rate": 8.56378716028673e-07, + "loss": 2.0435, + "step": 26135 + }, + { + "epoch": 0.87, + "grad_norm": 0.7111318707466125, + "learning_rate": 8.559484051008571e-07, + "loss": 2.0558, + "step": 26136 + }, + { + "epoch": 0.87, + "grad_norm": 0.7329342365264893, + "learning_rate": 8.555181974771787e-07, + "loss": 2.0508, + "step": 26137 + }, + { + "epoch": 0.87, + "grad_norm": 0.7335216999053955, + "learning_rate": 8.55088093162495e-07, + "loss": 2.0649, + "step": 26138 + }, + { + "epoch": 0.87, + "grad_norm": 0.7488418817520142, + "learning_rate": 8.546580921616676e-07, + "loss": 2.0519, + "step": 26139 + }, + { + "epoch": 0.87, + "grad_norm": 0.7692394852638245, + "learning_rate": 8.542281944795528e-07, + "loss": 2.0899, + "step": 26140 + }, + { + "epoch": 0.87, + "grad_norm": 0.7547497153282166, + "learning_rate": 8.537984001210087e-07, + "loss": 2.0866, + "step": 26141 + }, + { + "epoch": 0.87, + "grad_norm": 0.7116230726242065, + "learning_rate": 8.533687090908893e-07, + "loss": 2.0333, + "step": 26142 + }, + { + "epoch": 0.87, + "grad_norm": 0.7162382006645203, + "learning_rate": 8.529391213940508e-07, + "loss": 1.9313, + "step": 26143 + }, + { + "epoch": 0.87, + "grad_norm": 0.7477768659591675, + "learning_rate": 8.525096370353458e-07, + "loss": 2.0117, + "step": 26144 + }, + { + "epoch": 0.87, + "grad_norm": 0.7796523571014404, + "learning_rate": 8.52080256019624e-07, + "loss": 2.0074, + "step": 26145 + }, + { + "epoch": 0.87, + "grad_norm": 0.7533775568008423, + "learning_rate": 8.516509783517379e-07, + "loss": 2.0713, + "step": 26146 + }, + { + "epoch": 0.87, + "grad_norm": 0.7249441742897034, + "learning_rate": 8.512218040365394e-07, + "loss": 2.0241, + "step": 26147 + }, + { + "epoch": 0.87, + "grad_norm": 0.7516166567802429, + "learning_rate": 8.507927330788757e-07, + "loss": 2.0644, + "step": 26148 + }, + { + "epoch": 0.87, + "grad_norm": 0.7337638139724731, + "learning_rate": 8.503637654835916e-07, + "loss": 2.0677, + "step": 26149 + }, + { + "epoch": 0.87, + "grad_norm": 0.7596551775932312, + "learning_rate": 8.499349012555381e-07, + "loss": 2.0866, + "step": 26150 + }, + { + "epoch": 0.87, + "grad_norm": 0.7552168965339661, + "learning_rate": 8.495061403995553e-07, + "loss": 2.0333, + "step": 26151 + }, + { + "epoch": 0.87, + "grad_norm": 0.7323265671730042, + "learning_rate": 8.490774829204896e-07, + "loss": 2.0358, + "step": 26152 + }, + { + "epoch": 0.87, + "grad_norm": 0.7373947501182556, + "learning_rate": 8.486489288231858e-07, + "loss": 2.009, + "step": 26153 + }, + { + "epoch": 0.87, + "grad_norm": 0.729453980922699, + "learning_rate": 8.482204781124815e-07, + "loss": 2.0263, + "step": 26154 + }, + { + "epoch": 0.87, + "grad_norm": 0.7041894197463989, + "learning_rate": 8.47792130793218e-07, + "loss": 1.9992, + "step": 26155 + }, + { + "epoch": 0.87, + "grad_norm": 0.7723562717437744, + "learning_rate": 8.47363886870236e-07, + "loss": 2.0725, + "step": 26156 + }, + { + "epoch": 0.87, + "grad_norm": 0.7094305753707886, + "learning_rate": 8.46935746348374e-07, + "loss": 2.0232, + "step": 26157 + }, + { + "epoch": 0.87, + "grad_norm": 0.7519320249557495, + "learning_rate": 8.465077092324658e-07, + "loss": 2.0903, + "step": 26158 + }, + { + "epoch": 0.87, + "grad_norm": 0.7269750833511353, + "learning_rate": 8.460797755273487e-07, + "loss": 2.0108, + "step": 26159 + }, + { + "epoch": 0.87, + "grad_norm": 0.7357923984527588, + "learning_rate": 8.456519452378597e-07, + "loss": 1.9933, + "step": 26160 + }, + { + "epoch": 0.87, + "grad_norm": 0.7666115164756775, + "learning_rate": 8.452242183688286e-07, + "loss": 2.0505, + "step": 26161 + }, + { + "epoch": 0.87, + "grad_norm": 0.7499266862869263, + "learning_rate": 8.447965949250903e-07, + "loss": 2.0694, + "step": 26162 + }, + { + "epoch": 0.87, + "grad_norm": 0.739158570766449, + "learning_rate": 8.443690749114741e-07, + "loss": 2.0768, + "step": 26163 + }, + { + "epoch": 0.87, + "grad_norm": 0.7603508234024048, + "learning_rate": 8.439416583328097e-07, + "loss": 2.081, + "step": 26164 + }, + { + "epoch": 0.87, + "grad_norm": 0.7535427808761597, + "learning_rate": 8.435143451939265e-07, + "loss": 2.0125, + "step": 26165 + }, + { + "epoch": 0.87, + "grad_norm": 0.7088193297386169, + "learning_rate": 8.43087135499655e-07, + "loss": 2.0141, + "step": 26166 + }, + { + "epoch": 0.87, + "grad_norm": 0.76165372133255, + "learning_rate": 8.426600292548148e-07, + "loss": 2.0372, + "step": 26167 + }, + { + "epoch": 0.87, + "grad_norm": 0.7223299741744995, + "learning_rate": 8.422330264642354e-07, + "loss": 2.0241, + "step": 26168 + }, + { + "epoch": 0.87, + "grad_norm": 0.7322620153427124, + "learning_rate": 8.418061271327415e-07, + "loss": 1.9941, + "step": 26169 + }, + { + "epoch": 0.87, + "grad_norm": 0.7736354470252991, + "learning_rate": 8.413793312651541e-07, + "loss": 2.0472, + "step": 26170 + }, + { + "epoch": 0.87, + "grad_norm": 0.7302213311195374, + "learning_rate": 8.409526388662947e-07, + "loss": 2.0829, + "step": 26171 + }, + { + "epoch": 0.87, + "grad_norm": 0.7282377481460571, + "learning_rate": 8.405260499409873e-07, + "loss": 2.0153, + "step": 26172 + }, + { + "epoch": 0.87, + "grad_norm": 0.7290664911270142, + "learning_rate": 8.400995644940457e-07, + "loss": 2.0074, + "step": 26173 + }, + { + "epoch": 0.87, + "grad_norm": 0.7530043721199036, + "learning_rate": 8.396731825302906e-07, + "loss": 2.0752, + "step": 26174 + }, + { + "epoch": 0.87, + "grad_norm": 0.7531362175941467, + "learning_rate": 8.392469040545426e-07, + "loss": 2.0102, + "step": 26175 + }, + { + "epoch": 0.87, + "grad_norm": 0.7369971871376038, + "learning_rate": 8.388207290716133e-07, + "loss": 2.0001, + "step": 26176 + }, + { + "epoch": 0.87, + "grad_norm": 0.7616905570030212, + "learning_rate": 8.383946575863166e-07, + "loss": 2.0335, + "step": 26177 + }, + { + "epoch": 0.87, + "grad_norm": 0.7397873401641846, + "learning_rate": 8.3796868960347e-07, + "loss": 2.0452, + "step": 26178 + }, + { + "epoch": 0.87, + "grad_norm": 0.735589325428009, + "learning_rate": 8.375428251278839e-07, + "loss": 2.0234, + "step": 26179 + }, + { + "epoch": 0.87, + "grad_norm": 0.7499776482582092, + "learning_rate": 8.371170641643667e-07, + "loss": 1.9932, + "step": 26180 + }, + { + "epoch": 0.87, + "grad_norm": 0.7413488626480103, + "learning_rate": 8.366914067177312e-07, + "loss": 1.9895, + "step": 26181 + }, + { + "epoch": 0.87, + "grad_norm": 0.7432525157928467, + "learning_rate": 8.362658527927881e-07, + "loss": 2.0616, + "step": 26182 + }, + { + "epoch": 0.87, + "grad_norm": 0.7550211548805237, + "learning_rate": 8.358404023943412e-07, + "loss": 1.9723, + "step": 26183 + }, + { + "epoch": 0.87, + "grad_norm": 0.7545579075813293, + "learning_rate": 8.354150555272e-07, + "loss": 2.1081, + "step": 26184 + }, + { + "epoch": 0.87, + "grad_norm": 0.7402257323265076, + "learning_rate": 8.349898121961686e-07, + "loss": 2.0711, + "step": 26185 + }, + { + "epoch": 0.87, + "grad_norm": 0.7684229612350464, + "learning_rate": 8.345646724060497e-07, + "loss": 2.0657, + "step": 26186 + }, + { + "epoch": 0.87, + "grad_norm": 0.7262281179428101, + "learning_rate": 8.341396361616472e-07, + "loss": 2.0329, + "step": 26187 + }, + { + "epoch": 0.87, + "grad_norm": 0.7382645606994629, + "learning_rate": 8.337147034677673e-07, + "loss": 2.0115, + "step": 26188 + }, + { + "epoch": 0.87, + "grad_norm": 0.7389950752258301, + "learning_rate": 8.332898743292028e-07, + "loss": 2.0129, + "step": 26189 + }, + { + "epoch": 0.87, + "grad_norm": 0.7545232772827148, + "learning_rate": 8.328651487507577e-07, + "loss": 2.0219, + "step": 26190 + }, + { + "epoch": 0.87, + "grad_norm": 0.7239479422569275, + "learning_rate": 8.324405267372304e-07, + "loss": 2.0535, + "step": 26191 + }, + { + "epoch": 0.87, + "grad_norm": 0.7266597151756287, + "learning_rate": 8.32016008293417e-07, + "loss": 1.9762, + "step": 26192 + }, + { + "epoch": 0.87, + "grad_norm": 0.7659448981285095, + "learning_rate": 8.315915934241148e-07, + "loss": 2.0832, + "step": 26193 + }, + { + "epoch": 0.87, + "grad_norm": 0.730658769607544, + "learning_rate": 8.311672821341165e-07, + "loss": 2.0987, + "step": 26194 + }, + { + "epoch": 0.87, + "grad_norm": 0.716567873954773, + "learning_rate": 8.307430744282164e-07, + "loss": 2.0384, + "step": 26195 + }, + { + "epoch": 0.87, + "grad_norm": 0.754006564617157, + "learning_rate": 8.303189703112069e-07, + "loss": 2.0655, + "step": 26196 + }, + { + "epoch": 0.87, + "grad_norm": 0.7325912117958069, + "learning_rate": 8.298949697878811e-07, + "loss": 2.0416, + "step": 26197 + }, + { + "epoch": 0.87, + "grad_norm": 0.7155423760414124, + "learning_rate": 8.294710728630284e-07, + "loss": 2.034, + "step": 26198 + }, + { + "epoch": 0.87, + "grad_norm": 0.7548443078994751, + "learning_rate": 8.29047279541435e-07, + "loss": 2.0181, + "step": 26199 + }, + { + "epoch": 0.87, + "grad_norm": 0.7647164463996887, + "learning_rate": 8.286235898278927e-07, + "loss": 2.0371, + "step": 26200 + }, + { + "epoch": 0.87, + "grad_norm": 0.7409639358520508, + "learning_rate": 8.282000037271864e-07, + "loss": 2.0341, + "step": 26201 + }, + { + "epoch": 0.87, + "grad_norm": 0.7623330950737, + "learning_rate": 8.277765212440981e-07, + "loss": 2.0262, + "step": 26202 + }, + { + "epoch": 0.87, + "grad_norm": 0.7453184127807617, + "learning_rate": 8.27353142383417e-07, + "loss": 2.0066, + "step": 26203 + }, + { + "epoch": 0.87, + "grad_norm": 0.7607576251029968, + "learning_rate": 8.269298671499248e-07, + "loss": 2.0288, + "step": 26204 + }, + { + "epoch": 0.87, + "grad_norm": 0.7142048478126526, + "learning_rate": 8.265066955484013e-07, + "loss": 2.0262, + "step": 26205 + }, + { + "epoch": 0.87, + "grad_norm": 0.7428783178329468, + "learning_rate": 8.260836275836315e-07, + "loss": 2.0931, + "step": 26206 + }, + { + "epoch": 0.87, + "grad_norm": 0.7461102604866028, + "learning_rate": 8.256606632603926e-07, + "loss": 1.973, + "step": 26207 + }, + { + "epoch": 0.87, + "grad_norm": 0.7251810431480408, + "learning_rate": 8.252378025834606e-07, + "loss": 2.0485, + "step": 26208 + }, + { + "epoch": 0.87, + "grad_norm": 0.7395917773246765, + "learning_rate": 8.248150455576143e-07, + "loss": 2.0417, + "step": 26209 + }, + { + "epoch": 0.87, + "grad_norm": 0.7333256006240845, + "learning_rate": 8.243923921876351e-07, + "loss": 2.0379, + "step": 26210 + }, + { + "epoch": 0.87, + "grad_norm": 0.7454107999801636, + "learning_rate": 8.239698424782894e-07, + "loss": 2.0641, + "step": 26211 + }, + { + "epoch": 0.87, + "grad_norm": 0.7295187711715698, + "learning_rate": 8.235473964343543e-07, + "loss": 2.0835, + "step": 26212 + }, + { + "epoch": 0.87, + "grad_norm": 0.7689849734306335, + "learning_rate": 8.23125054060605e-07, + "loss": 2.0303, + "step": 26213 + }, + { + "epoch": 0.87, + "grad_norm": 0.7851645946502686, + "learning_rate": 8.227028153618077e-07, + "loss": 1.9667, + "step": 26214 + }, + { + "epoch": 0.87, + "grad_norm": 0.7135246396064758, + "learning_rate": 8.222806803427386e-07, + "loss": 1.9613, + "step": 26215 + }, + { + "epoch": 0.87, + "grad_norm": 0.7843337655067444, + "learning_rate": 8.218586490081636e-07, + "loss": 2.0539, + "step": 26216 + }, + { + "epoch": 0.87, + "grad_norm": 0.7629027366638184, + "learning_rate": 8.214367213628493e-07, + "loss": 2.1019, + "step": 26217 + }, + { + "epoch": 0.87, + "grad_norm": 0.793875515460968, + "learning_rate": 8.210148974115628e-07, + "loss": 2.0441, + "step": 26218 + }, + { + "epoch": 0.87, + "grad_norm": 0.7373671531677246, + "learning_rate": 8.205931771590725e-07, + "loss": 2.0249, + "step": 26219 + }, + { + "epoch": 0.87, + "grad_norm": 0.7603422403335571, + "learning_rate": 8.201715606101413e-07, + "loss": 2.0431, + "step": 26220 + }, + { + "epoch": 0.87, + "grad_norm": 0.7383587956428528, + "learning_rate": 8.197500477695297e-07, + "loss": 2.0717, + "step": 26221 + }, + { + "epoch": 0.87, + "grad_norm": 0.722493052482605, + "learning_rate": 8.19328638642004e-07, + "loss": 2.0437, + "step": 26222 + }, + { + "epoch": 0.87, + "grad_norm": 0.7490050792694092, + "learning_rate": 8.189073332323227e-07, + "loss": 2.0308, + "step": 26223 + }, + { + "epoch": 0.87, + "grad_norm": 0.7656798958778381, + "learning_rate": 8.184861315452442e-07, + "loss": 2.0752, + "step": 26224 + }, + { + "epoch": 0.87, + "grad_norm": 0.7712162137031555, + "learning_rate": 8.180650335855278e-07, + "loss": 2.1078, + "step": 26225 + }, + { + "epoch": 0.87, + "grad_norm": 0.739031195640564, + "learning_rate": 8.176440393579343e-07, + "loss": 2.0361, + "step": 26226 + }, + { + "epoch": 0.87, + "grad_norm": 0.7344566583633423, + "learning_rate": 8.172231488672145e-07, + "loss": 2.1097, + "step": 26227 + }, + { + "epoch": 0.87, + "grad_norm": 0.7639145851135254, + "learning_rate": 8.168023621181276e-07, + "loss": 2.0441, + "step": 26228 + }, + { + "epoch": 0.87, + "grad_norm": 0.7794142961502075, + "learning_rate": 8.163816791154266e-07, + "loss": 1.9493, + "step": 26229 + }, + { + "epoch": 0.87, + "grad_norm": 0.7477810978889465, + "learning_rate": 8.159610998638612e-07, + "loss": 2.0378, + "step": 26230 + }, + { + "epoch": 0.87, + "grad_norm": 0.7755833864212036, + "learning_rate": 8.15540624368184e-07, + "loss": 2.0844, + "step": 26231 + }, + { + "epoch": 0.87, + "grad_norm": 0.7709144353866577, + "learning_rate": 8.151202526331503e-07, + "loss": 2.0444, + "step": 26232 + }, + { + "epoch": 0.87, + "grad_norm": 0.7284353375434875, + "learning_rate": 8.146999846635017e-07, + "loss": 2.0114, + "step": 26233 + }, + { + "epoch": 0.87, + "grad_norm": 0.7367143630981445, + "learning_rate": 8.142798204639901e-07, + "loss": 2.0379, + "step": 26234 + }, + { + "epoch": 0.87, + "grad_norm": 0.7553098797798157, + "learning_rate": 8.138597600393628e-07, + "loss": 2.0363, + "step": 26235 + }, + { + "epoch": 0.87, + "grad_norm": 0.7275480628013611, + "learning_rate": 8.134398033943624e-07, + "loss": 1.9812, + "step": 26236 + }, + { + "epoch": 0.87, + "grad_norm": 0.7098502516746521, + "learning_rate": 8.130199505337377e-07, + "loss": 1.994, + "step": 26237 + }, + { + "epoch": 0.87, + "grad_norm": 0.7587746381759644, + "learning_rate": 8.126002014622292e-07, + "loss": 2.0298, + "step": 26238 + }, + { + "epoch": 0.87, + "grad_norm": 0.7535634636878967, + "learning_rate": 8.121805561845775e-07, + "loss": 2.01, + "step": 26239 + }, + { + "epoch": 0.87, + "grad_norm": 0.7612124085426331, + "learning_rate": 8.117610147055254e-07, + "loss": 2.0403, + "step": 26240 + }, + { + "epoch": 0.87, + "grad_norm": 0.7397382259368896, + "learning_rate": 8.113415770298139e-07, + "loss": 2.0126, + "step": 26241 + }, + { + "epoch": 0.87, + "grad_norm": 0.743455708026886, + "learning_rate": 8.1092224316218e-07, + "loss": 2.0553, + "step": 26242 + }, + { + "epoch": 0.87, + "grad_norm": 0.7954873442649841, + "learning_rate": 8.105030131073599e-07, + "loss": 2.1584, + "step": 26243 + }, + { + "epoch": 0.87, + "grad_norm": 0.8024573922157288, + "learning_rate": 8.100838868700933e-07, + "loss": 2.0194, + "step": 26244 + }, + { + "epoch": 0.87, + "grad_norm": 0.7332099676132202, + "learning_rate": 8.096648644551109e-07, + "loss": 2.0095, + "step": 26245 + }, + { + "epoch": 0.87, + "grad_norm": 0.7480826377868652, + "learning_rate": 8.092459458671509e-07, + "loss": 2.0844, + "step": 26246 + }, + { + "epoch": 0.87, + "grad_norm": 0.7248606085777283, + "learning_rate": 8.088271311109419e-07, + "loss": 2.0156, + "step": 26247 + }, + { + "epoch": 0.87, + "grad_norm": 0.7645732760429382, + "learning_rate": 8.08408420191219e-07, + "loss": 2.073, + "step": 26248 + }, + { + "epoch": 0.87, + "grad_norm": 0.737801194190979, + "learning_rate": 8.079898131127095e-07, + "loss": 2.1071, + "step": 26249 + }, + { + "epoch": 0.87, + "grad_norm": 0.741977870464325, + "learning_rate": 8.075713098801463e-07, + "loss": 2.0763, + "step": 26250 + }, + { + "epoch": 0.87, + "grad_norm": 0.7460431456565857, + "learning_rate": 8.071529104982545e-07, + "loss": 2.0682, + "step": 26251 + }, + { + "epoch": 0.87, + "grad_norm": 0.7416695356369019, + "learning_rate": 8.067346149717592e-07, + "loss": 2.1023, + "step": 26252 + }, + { + "epoch": 0.87, + "grad_norm": 0.7273126840591431, + "learning_rate": 8.063164233053888e-07, + "loss": 2.0643, + "step": 26253 + }, + { + "epoch": 0.87, + "grad_norm": 0.7504833340644836, + "learning_rate": 8.058983355038718e-07, + "loss": 2.0328, + "step": 26254 + }, + { + "epoch": 0.87, + "grad_norm": 0.7454068064689636, + "learning_rate": 8.054803515719234e-07, + "loss": 2.1017, + "step": 26255 + }, + { + "epoch": 0.87, + "grad_norm": 0.7185887098312378, + "learning_rate": 8.050624715142685e-07, + "loss": 2.0735, + "step": 26256 + }, + { + "epoch": 0.87, + "grad_norm": 0.7821540236473083, + "learning_rate": 8.046446953356313e-07, + "loss": 1.9751, + "step": 26257 + }, + { + "epoch": 0.87, + "grad_norm": 0.7268336415290833, + "learning_rate": 8.042270230407278e-07, + "loss": 2.0231, + "step": 26258 + }, + { + "epoch": 0.87, + "grad_norm": 0.7370143532752991, + "learning_rate": 8.038094546342801e-07, + "loss": 2.0075, + "step": 26259 + }, + { + "epoch": 0.87, + "grad_norm": 0.7629903554916382, + "learning_rate": 8.033919901210019e-07, + "loss": 1.9786, + "step": 26260 + }, + { + "epoch": 0.87, + "grad_norm": 0.7801638841629028, + "learning_rate": 8.029746295056129e-07, + "loss": 2.033, + "step": 26261 + }, + { + "epoch": 0.87, + "grad_norm": 0.7574886083602905, + "learning_rate": 8.025573727928238e-07, + "loss": 2.0515, + "step": 26262 + }, + { + "epoch": 0.87, + "grad_norm": 0.7380183935165405, + "learning_rate": 8.02140219987354e-07, + "loss": 2.0275, + "step": 26263 + }, + { + "epoch": 0.87, + "grad_norm": 0.7454653978347778, + "learning_rate": 8.017231710939133e-07, + "loss": 2.0361, + "step": 26264 + }, + { + "epoch": 0.87, + "grad_norm": 0.7465541958808899, + "learning_rate": 8.013062261172122e-07, + "loss": 2.0171, + "step": 26265 + }, + { + "epoch": 0.87, + "grad_norm": 0.7393627762794495, + "learning_rate": 8.008893850619615e-07, + "loss": 2.0589, + "step": 26266 + }, + { + "epoch": 0.87, + "grad_norm": 0.7551132440567017, + "learning_rate": 8.004726479328739e-07, + "loss": 2.0531, + "step": 26267 + }, + { + "epoch": 0.87, + "grad_norm": 0.7648429870605469, + "learning_rate": 8.000560147346547e-07, + "loss": 2.0298, + "step": 26268 + }, + { + "epoch": 0.87, + "grad_norm": 0.7581826448440552, + "learning_rate": 7.996394854720091e-07, + "loss": 2.0969, + "step": 26269 + }, + { + "epoch": 0.87, + "grad_norm": 0.7409756779670715, + "learning_rate": 7.992230601496465e-07, + "loss": 2.0619, + "step": 26270 + }, + { + "epoch": 0.87, + "grad_norm": 0.7358968257904053, + "learning_rate": 7.988067387722675e-07, + "loss": 2.0637, + "step": 26271 + }, + { + "epoch": 0.87, + "grad_norm": 0.7573010921478271, + "learning_rate": 7.983905213445798e-07, + "loss": 2.0174, + "step": 26272 + }, + { + "epoch": 0.87, + "grad_norm": 0.7365456223487854, + "learning_rate": 7.979744078712826e-07, + "loss": 2.0299, + "step": 26273 + }, + { + "epoch": 0.87, + "grad_norm": 1.1528874635696411, + "learning_rate": 7.975583983570768e-07, + "loss": 2.0402, + "step": 26274 + }, + { + "epoch": 0.87, + "grad_norm": 0.7599830031394958, + "learning_rate": 7.971424928066618e-07, + "loss": 2.0056, + "step": 26275 + }, + { + "epoch": 0.87, + "grad_norm": 0.7673853039741516, + "learning_rate": 7.967266912247395e-07, + "loss": 2.0138, + "step": 26276 + }, + { + "epoch": 0.87, + "grad_norm": 0.7377079725265503, + "learning_rate": 7.963109936160063e-07, + "loss": 2.0608, + "step": 26277 + }, + { + "epoch": 0.87, + "grad_norm": 0.7319782376289368, + "learning_rate": 7.95895399985156e-07, + "loss": 1.9687, + "step": 26278 + }, + { + "epoch": 0.87, + "grad_norm": 0.7344086170196533, + "learning_rate": 7.95479910336886e-07, + "loss": 2.0712, + "step": 26279 + }, + { + "epoch": 0.87, + "grad_norm": 0.7579789161682129, + "learning_rate": 7.950645246758881e-07, + "loss": 2.0154, + "step": 26280 + }, + { + "epoch": 0.87, + "grad_norm": 0.7293540239334106, + "learning_rate": 7.946492430068586e-07, + "loss": 2.1091, + "step": 26281 + }, + { + "epoch": 0.87, + "grad_norm": 0.7341769337654114, + "learning_rate": 7.94234065334486e-07, + "loss": 2.086, + "step": 26282 + }, + { + "epoch": 0.87, + "grad_norm": 0.7867773175239563, + "learning_rate": 7.938189916634619e-07, + "loss": 2.0153, + "step": 26283 + }, + { + "epoch": 0.87, + "grad_norm": 0.7890391945838928, + "learning_rate": 7.934040219984751e-07, + "loss": 2.0293, + "step": 26284 + }, + { + "epoch": 0.87, + "grad_norm": 0.7641716003417969, + "learning_rate": 7.92989156344216e-07, + "loss": 2.0952, + "step": 26285 + }, + { + "epoch": 0.87, + "grad_norm": 0.7397662401199341, + "learning_rate": 7.925743947053688e-07, + "loss": 2.0082, + "step": 26286 + }, + { + "epoch": 0.87, + "grad_norm": 0.7331759333610535, + "learning_rate": 7.921597370866185e-07, + "loss": 2.0325, + "step": 26287 + }, + { + "epoch": 0.87, + "grad_norm": 0.7580739259719849, + "learning_rate": 7.917451834926515e-07, + "loss": 1.9758, + "step": 26288 + }, + { + "epoch": 0.87, + "grad_norm": 0.7553606629371643, + "learning_rate": 7.913307339281517e-07, + "loss": 2.0906, + "step": 26289 + }, + { + "epoch": 0.87, + "grad_norm": 0.7575421929359436, + "learning_rate": 7.90916388397801e-07, + "loss": 2.0479, + "step": 26290 + }, + { + "epoch": 0.87, + "grad_norm": 0.7442637085914612, + "learning_rate": 7.905021469062779e-07, + "loss": 2.0806, + "step": 26291 + }, + { + "epoch": 0.87, + "grad_norm": 0.7745006084442139, + "learning_rate": 7.900880094582664e-07, + "loss": 2.0739, + "step": 26292 + }, + { + "epoch": 0.87, + "grad_norm": 0.746653139591217, + "learning_rate": 7.896739760584415e-07, + "loss": 2.1147, + "step": 26293 + }, + { + "epoch": 0.87, + "grad_norm": 0.7388312220573425, + "learning_rate": 7.89260046711483e-07, + "loss": 2.032, + "step": 26294 + }, + { + "epoch": 0.87, + "grad_norm": 0.7484868168830872, + "learning_rate": 7.888462214220671e-07, + "loss": 1.9632, + "step": 26295 + }, + { + "epoch": 0.87, + "grad_norm": 0.758126437664032, + "learning_rate": 7.884325001948667e-07, + "loss": 2.0639, + "step": 26296 + }, + { + "epoch": 0.87, + "grad_norm": 0.7262428402900696, + "learning_rate": 7.880188830345569e-07, + "loss": 2.0655, + "step": 26297 + }, + { + "epoch": 0.87, + "grad_norm": 0.7202563285827637, + "learning_rate": 7.876053699458131e-07, + "loss": 2.0383, + "step": 26298 + }, + { + "epoch": 0.87, + "grad_norm": 0.7157536149024963, + "learning_rate": 7.871919609333056e-07, + "loss": 1.9861, + "step": 26299 + }, + { + "epoch": 0.88, + "grad_norm": 0.7857599854469299, + "learning_rate": 7.86778656001701e-07, + "loss": 2.0625, + "step": 26300 + }, + { + "epoch": 0.88, + "grad_norm": 0.7309787273406982, + "learning_rate": 7.863654551556743e-07, + "loss": 2.0554, + "step": 26301 + }, + { + "epoch": 0.88, + "grad_norm": 0.7530249357223511, + "learning_rate": 7.859523583998884e-07, + "loss": 2.0996, + "step": 26302 + }, + { + "epoch": 0.88, + "grad_norm": 0.7495779395103455, + "learning_rate": 7.855393657390154e-07, + "loss": 1.9862, + "step": 26303 + }, + { + "epoch": 0.88, + "grad_norm": 0.7540934681892395, + "learning_rate": 7.851264771777167e-07, + "loss": 2.0219, + "step": 26304 + }, + { + "epoch": 0.88, + "grad_norm": 0.7269781231880188, + "learning_rate": 7.8471369272066e-07, + "loss": 2.0525, + "step": 26305 + }, + { + "epoch": 0.88, + "grad_norm": 0.7454487681388855, + "learning_rate": 7.843010123725048e-07, + "loss": 2.1233, + "step": 26306 + }, + { + "epoch": 0.88, + "grad_norm": 0.7507216930389404, + "learning_rate": 7.838884361379185e-07, + "loss": 2.0871, + "step": 26307 + }, + { + "epoch": 0.88, + "grad_norm": 0.7801713943481445, + "learning_rate": 7.834759640215595e-07, + "loss": 2.0646, + "step": 26308 + }, + { + "epoch": 0.88, + "grad_norm": 0.7474452257156372, + "learning_rate": 7.830635960280852e-07, + "loss": 2.0944, + "step": 26309 + }, + { + "epoch": 0.88, + "grad_norm": 0.7268065214157104, + "learning_rate": 7.826513321621576e-07, + "loss": 2.0131, + "step": 26310 + }, + { + "epoch": 0.88, + "grad_norm": 0.7813860774040222, + "learning_rate": 7.822391724284351e-07, + "loss": 1.9443, + "step": 26311 + }, + { + "epoch": 0.88, + "grad_norm": 0.7407920956611633, + "learning_rate": 7.818271168315716e-07, + "loss": 2.0948, + "step": 26312 + }, + { + "epoch": 0.88, + "grad_norm": 0.7049204111099243, + "learning_rate": 7.814151653762214e-07, + "loss": 2.0573, + "step": 26313 + }, + { + "epoch": 0.88, + "grad_norm": 0.7173821926116943, + "learning_rate": 7.810033180670429e-07, + "loss": 1.9916, + "step": 26314 + }, + { + "epoch": 0.88, + "grad_norm": 0.7453610301017761, + "learning_rate": 7.805915749086824e-07, + "loss": 1.9909, + "step": 26315 + }, + { + "epoch": 0.88, + "grad_norm": 0.7367833852767944, + "learning_rate": 7.801799359057982e-07, + "loss": 2.0535, + "step": 26316 + }, + { + "epoch": 0.88, + "grad_norm": 0.7298067808151245, + "learning_rate": 7.79768401063038e-07, + "loss": 2.0149, + "step": 26317 + }, + { + "epoch": 0.88, + "grad_norm": 0.7338241338729858, + "learning_rate": 7.793569703850479e-07, + "loss": 2.0353, + "step": 26318 + }, + { + "epoch": 0.88, + "grad_norm": 0.7431285381317139, + "learning_rate": 7.789456438764798e-07, + "loss": 2.1085, + "step": 26319 + }, + { + "epoch": 0.88, + "grad_norm": 0.7254428267478943, + "learning_rate": 7.78534421541981e-07, + "loss": 2.0804, + "step": 26320 + }, + { + "epoch": 0.88, + "grad_norm": 0.7380424737930298, + "learning_rate": 7.781233033861957e-07, + "loss": 2.0775, + "step": 26321 + }, + { + "epoch": 0.88, + "grad_norm": 0.7428423166275024, + "learning_rate": 7.777122894137679e-07, + "loss": 2.1024, + "step": 26322 + }, + { + "epoch": 0.88, + "grad_norm": 0.7416144609451294, + "learning_rate": 7.773013796293439e-07, + "loss": 1.9696, + "step": 26323 + }, + { + "epoch": 0.88, + "grad_norm": 0.7438064217567444, + "learning_rate": 7.76890574037561e-07, + "loss": 1.9977, + "step": 26324 + }, + { + "epoch": 0.88, + "grad_norm": 0.733548104763031, + "learning_rate": 7.764798726430655e-07, + "loss": 2.0378, + "step": 26325 + }, + { + "epoch": 0.88, + "grad_norm": 0.7357192039489746, + "learning_rate": 7.760692754504928e-07, + "loss": 1.994, + "step": 26326 + }, + { + "epoch": 0.88, + "grad_norm": 0.7355829477310181, + "learning_rate": 7.756587824644857e-07, + "loss": 2.0088, + "step": 26327 + }, + { + "epoch": 0.88, + "grad_norm": 0.742100179195404, + "learning_rate": 7.752483936896771e-07, + "loss": 2.0375, + "step": 26328 + }, + { + "epoch": 0.88, + "grad_norm": 0.7232990860939026, + "learning_rate": 7.748381091307089e-07, + "loss": 2.0771, + "step": 26329 + }, + { + "epoch": 0.88, + "grad_norm": 0.7615727782249451, + "learning_rate": 7.74427928792213e-07, + "loss": 2.1025, + "step": 26330 + }, + { + "epoch": 0.88, + "grad_norm": 0.7557610273361206, + "learning_rate": 7.740178526788211e-07, + "loss": 2.0423, + "step": 26331 + }, + { + "epoch": 0.88, + "grad_norm": 0.7456275224685669, + "learning_rate": 7.736078807951696e-07, + "loss": 2.0228, + "step": 26332 + }, + { + "epoch": 0.88, + "grad_norm": 0.7457318902015686, + "learning_rate": 7.731980131458905e-07, + "loss": 2.0496, + "step": 26333 + }, + { + "epoch": 0.88, + "grad_norm": 0.7533623576164246, + "learning_rate": 7.727882497356121e-07, + "loss": 2.0745, + "step": 26334 + }, + { + "epoch": 0.88, + "grad_norm": 0.7600063681602478, + "learning_rate": 7.723785905689629e-07, + "loss": 2.0028, + "step": 26335 + }, + { + "epoch": 0.88, + "grad_norm": 0.8000017404556274, + "learning_rate": 7.719690356505749e-07, + "loss": 2.1033, + "step": 26336 + }, + { + "epoch": 0.88, + "grad_norm": 0.745420515537262, + "learning_rate": 7.715595849850699e-07, + "loss": 1.9973, + "step": 26337 + }, + { + "epoch": 0.88, + "grad_norm": 0.7538821697235107, + "learning_rate": 7.711502385770774e-07, + "loss": 2.0988, + "step": 26338 + }, + { + "epoch": 0.88, + "grad_norm": 0.7479807734489441, + "learning_rate": 7.70740996431224e-07, + "loss": 2.1192, + "step": 26339 + }, + { + "epoch": 0.88, + "grad_norm": 0.740813672542572, + "learning_rate": 7.703318585521257e-07, + "loss": 2.1006, + "step": 26340 + }, + { + "epoch": 0.88, + "grad_norm": 0.7505103349685669, + "learning_rate": 7.69922824944409e-07, + "loss": 2.0687, + "step": 26341 + }, + { + "epoch": 0.88, + "grad_norm": 0.7700856924057007, + "learning_rate": 7.69513895612698e-07, + "loss": 2.0877, + "step": 26342 + }, + { + "epoch": 0.88, + "grad_norm": 0.7559993863105774, + "learning_rate": 7.691050705616077e-07, + "loss": 2.0941, + "step": 26343 + }, + { + "epoch": 0.88, + "grad_norm": 0.7382938265800476, + "learning_rate": 7.686963497957578e-07, + "loss": 2.0838, + "step": 26344 + }, + { + "epoch": 0.88, + "grad_norm": 0.7610565423965454, + "learning_rate": 7.682877333197681e-07, + "loss": 1.9983, + "step": 26345 + }, + { + "epoch": 0.88, + "grad_norm": 0.7649804353713989, + "learning_rate": 7.678792211382513e-07, + "loss": 2.0044, + "step": 26346 + }, + { + "epoch": 0.88, + "grad_norm": 0.7679364085197449, + "learning_rate": 7.674708132558261e-07, + "loss": 2.0719, + "step": 26347 + }, + { + "epoch": 0.88, + "grad_norm": 0.7587807178497314, + "learning_rate": 7.670625096771034e-07, + "loss": 2.0953, + "step": 26348 + }, + { + "epoch": 0.88, + "grad_norm": 0.7314034104347229, + "learning_rate": 7.666543104066992e-07, + "loss": 2.027, + "step": 26349 + }, + { + "epoch": 0.88, + "grad_norm": 0.7628390789031982, + "learning_rate": 7.662462154492212e-07, + "loss": 2.0085, + "step": 26350 + }, + { + "epoch": 0.88, + "grad_norm": 0.7543811202049255, + "learning_rate": 7.658382248092822e-07, + "loss": 2.0245, + "step": 26351 + }, + { + "epoch": 0.88, + "grad_norm": 0.7537243962287903, + "learning_rate": 7.654303384914952e-07, + "loss": 2.007, + "step": 26352 + }, + { + "epoch": 0.88, + "grad_norm": 0.7362833619117737, + "learning_rate": 7.650225565004598e-07, + "loss": 1.9959, + "step": 26353 + }, + { + "epoch": 0.88, + "grad_norm": 0.728485643863678, + "learning_rate": 7.646148788407881e-07, + "loss": 2.0026, + "step": 26354 + }, + { + "epoch": 0.88, + "grad_norm": 0.7340567111968994, + "learning_rate": 7.642073055170862e-07, + "loss": 2.0399, + "step": 26355 + }, + { + "epoch": 0.88, + "grad_norm": 0.7563779950141907, + "learning_rate": 7.637998365339583e-07, + "loss": 2.038, + "step": 26356 + }, + { + "epoch": 0.88, + "grad_norm": 0.7270867824554443, + "learning_rate": 7.633924718960039e-07, + "loss": 2.0107, + "step": 26357 + }, + { + "epoch": 0.88, + "grad_norm": 0.7485671639442444, + "learning_rate": 7.629852116078307e-07, + "loss": 2.0621, + "step": 26358 + }, + { + "epoch": 0.88, + "grad_norm": 0.738422691822052, + "learning_rate": 7.625780556740358e-07, + "loss": 2.0656, + "step": 26359 + }, + { + "epoch": 0.88, + "grad_norm": 0.7685933113098145, + "learning_rate": 7.621710040992192e-07, + "loss": 2.0174, + "step": 26360 + }, + { + "epoch": 0.88, + "grad_norm": 0.7431742548942566, + "learning_rate": 7.617640568879836e-07, + "loss": 2.04, + "step": 26361 + }, + { + "epoch": 0.88, + "grad_norm": 0.7814656496047974, + "learning_rate": 7.613572140449233e-07, + "loss": 2.0558, + "step": 26362 + }, + { + "epoch": 0.88, + "grad_norm": 0.8179240822792053, + "learning_rate": 7.609504755746322e-07, + "loss": 2.0996, + "step": 26363 + }, + { + "epoch": 0.88, + "grad_norm": 0.7479534149169922, + "learning_rate": 7.605438414817101e-07, + "loss": 2.0883, + "step": 26364 + }, + { + "epoch": 0.88, + "grad_norm": 0.7574878931045532, + "learning_rate": 7.6013731177075e-07, + "loss": 2.0474, + "step": 26365 + }, + { + "epoch": 0.88, + "grad_norm": 0.7467697262763977, + "learning_rate": 7.597308864463404e-07, + "loss": 2.0546, + "step": 26366 + }, + { + "epoch": 0.88, + "grad_norm": 0.7816771864891052, + "learning_rate": 7.593245655130766e-07, + "loss": 2.0325, + "step": 26367 + }, + { + "epoch": 0.88, + "grad_norm": 0.7302454710006714, + "learning_rate": 7.589183489755491e-07, + "loss": 1.9852, + "step": 26368 + }, + { + "epoch": 0.88, + "grad_norm": 0.7293674945831299, + "learning_rate": 7.585122368383457e-07, + "loss": 2.0286, + "step": 26369 + }, + { + "epoch": 0.88, + "grad_norm": 0.7350574135780334, + "learning_rate": 7.581062291060559e-07, + "loss": 2.0225, + "step": 26370 + }, + { + "epoch": 0.88, + "grad_norm": 0.7589457035064697, + "learning_rate": 7.577003257832661e-07, + "loss": 2.1276, + "step": 26371 + }, + { + "epoch": 0.88, + "grad_norm": 0.7650185823440552, + "learning_rate": 7.572945268745602e-07, + "loss": 2.0265, + "step": 26372 + }, + { + "epoch": 0.88, + "grad_norm": 0.7286154627799988, + "learning_rate": 7.568888323845236e-07, + "loss": 1.9871, + "step": 26373 + }, + { + "epoch": 0.88, + "grad_norm": 0.7368381023406982, + "learning_rate": 7.564832423177427e-07, + "loss": 2.0295, + "step": 26374 + }, + { + "epoch": 0.88, + "grad_norm": 0.7433767318725586, + "learning_rate": 7.560777566787947e-07, + "loss": 2.0099, + "step": 26375 + }, + { + "epoch": 0.88, + "grad_norm": 0.7593421936035156, + "learning_rate": 7.556723754722617e-07, + "loss": 1.9895, + "step": 26376 + }, + { + "epoch": 0.88, + "grad_norm": 0.7208841443061829, + "learning_rate": 7.552670987027267e-07, + "loss": 1.9887, + "step": 26377 + }, + { + "epoch": 0.88, + "grad_norm": 0.7333230376243591, + "learning_rate": 7.548619263747658e-07, + "loss": 2.0753, + "step": 26378 + }, + { + "epoch": 0.88, + "grad_norm": 0.7555572986602783, + "learning_rate": 7.544568584929546e-07, + "loss": 2.0366, + "step": 26379 + }, + { + "epoch": 0.88, + "grad_norm": 0.7341757416725159, + "learning_rate": 7.540518950618736e-07, + "loss": 2.0295, + "step": 26380 + }, + { + "epoch": 0.88, + "grad_norm": 0.7260982990264893, + "learning_rate": 7.536470360860948e-07, + "loss": 1.9792, + "step": 26381 + }, + { + "epoch": 0.88, + "grad_norm": 0.7694514393806458, + "learning_rate": 7.532422815701912e-07, + "loss": 2.0932, + "step": 26382 + }, + { + "epoch": 0.88, + "grad_norm": 0.7871598601341248, + "learning_rate": 7.528376315187403e-07, + "loss": 2.0693, + "step": 26383 + }, + { + "epoch": 0.88, + "grad_norm": 0.76329505443573, + "learning_rate": 7.524330859363094e-07, + "loss": 2.051, + "step": 26384 + }, + { + "epoch": 0.88, + "grad_norm": 0.778018593788147, + "learning_rate": 7.520286448274694e-07, + "loss": 2.0293, + "step": 26385 + }, + { + "epoch": 0.88, + "grad_norm": 0.7390135526657104, + "learning_rate": 7.5162430819679e-07, + "loss": 2.0497, + "step": 26386 + }, + { + "epoch": 0.88, + "grad_norm": 0.739269495010376, + "learning_rate": 7.512200760488409e-07, + "loss": 2.0223, + "step": 26387 + }, + { + "epoch": 0.88, + "grad_norm": 0.7386905550956726, + "learning_rate": 7.508159483881839e-07, + "loss": 2.0541, + "step": 26388 + }, + { + "epoch": 0.88, + "grad_norm": 0.7574033737182617, + "learning_rate": 7.504119252193886e-07, + "loss": 2.0616, + "step": 26389 + }, + { + "epoch": 0.88, + "grad_norm": 0.7468547821044922, + "learning_rate": 7.500080065470194e-07, + "loss": 2.0074, + "step": 26390 + }, + { + "epoch": 0.88, + "grad_norm": 0.7471441626548767, + "learning_rate": 7.496041923756381e-07, + "loss": 1.9615, + "step": 26391 + }, + { + "epoch": 0.88, + "grad_norm": 0.7442898154258728, + "learning_rate": 7.492004827098088e-07, + "loss": 2.0362, + "step": 26392 + }, + { + "epoch": 0.88, + "grad_norm": 0.7246881127357483, + "learning_rate": 7.487968775540899e-07, + "loss": 2.0067, + "step": 26393 + }, + { + "epoch": 0.88, + "grad_norm": 0.7344276905059814, + "learning_rate": 7.483933769130414e-07, + "loss": 2.0322, + "step": 26394 + }, + { + "epoch": 0.88, + "grad_norm": 0.7571027278900146, + "learning_rate": 7.479899807912227e-07, + "loss": 2.0514, + "step": 26395 + }, + { + "epoch": 0.88, + "grad_norm": 0.7393615245819092, + "learning_rate": 7.475866891931938e-07, + "loss": 1.9932, + "step": 26396 + }, + { + "epoch": 0.88, + "grad_norm": 0.7445781826972961, + "learning_rate": 7.471835021235052e-07, + "loss": 2.0727, + "step": 26397 + }, + { + "epoch": 0.88, + "grad_norm": 0.7247915267944336, + "learning_rate": 7.467804195867145e-07, + "loss": 2.0197, + "step": 26398 + }, + { + "epoch": 0.88, + "grad_norm": 0.7440077066421509, + "learning_rate": 7.46377441587377e-07, + "loss": 2.1172, + "step": 26399 + }, + { + "epoch": 0.88, + "grad_norm": 0.7396876811981201, + "learning_rate": 7.459745681300445e-07, + "loss": 2.0435, + "step": 26400 + }, + { + "epoch": 0.88, + "grad_norm": 0.7395563721656799, + "learning_rate": 7.455717992192657e-07, + "loss": 2.0289, + "step": 26401 + }, + { + "epoch": 0.88, + "grad_norm": 0.741050660610199, + "learning_rate": 7.451691348595957e-07, + "loss": 2.123, + "step": 26402 + }, + { + "epoch": 0.88, + "grad_norm": 0.7924953103065491, + "learning_rate": 7.447665750555788e-07, + "loss": 2.0279, + "step": 26403 + }, + { + "epoch": 0.88, + "grad_norm": 0.808619499206543, + "learning_rate": 7.443641198117646e-07, + "loss": 1.9745, + "step": 26404 + }, + { + "epoch": 0.88, + "grad_norm": 0.7439736127853394, + "learning_rate": 7.439617691327028e-07, + "loss": 2.0293, + "step": 26405 + }, + { + "epoch": 0.88, + "grad_norm": 0.7520738244056702, + "learning_rate": 7.435595230229353e-07, + "loss": 2.0804, + "step": 26406 + }, + { + "epoch": 0.88, + "grad_norm": 0.732196569442749, + "learning_rate": 7.431573814870064e-07, + "loss": 2.0431, + "step": 26407 + }, + { + "epoch": 0.88, + "grad_norm": 0.7328974604606628, + "learning_rate": 7.427553445294622e-07, + "loss": 2.0551, + "step": 26408 + }, + { + "epoch": 0.88, + "grad_norm": 0.7316774129867554, + "learning_rate": 7.423534121548426e-07, + "loss": 2.0681, + "step": 26409 + }, + { + "epoch": 0.88, + "grad_norm": 0.7727085947990417, + "learning_rate": 7.419515843676872e-07, + "loss": 1.9518, + "step": 26410 + }, + { + "epoch": 0.88, + "grad_norm": 0.7414073944091797, + "learning_rate": 7.41549861172537e-07, + "loss": 2.054, + "step": 26411 + }, + { + "epoch": 0.88, + "grad_norm": 0.7564743161201477, + "learning_rate": 7.411482425739314e-07, + "loss": 2.0816, + "step": 26412 + }, + { + "epoch": 0.88, + "grad_norm": 0.7419377565383911, + "learning_rate": 7.407467285764059e-07, + "loss": 2.0969, + "step": 26413 + }, + { + "epoch": 0.88, + "grad_norm": 0.7510760426521301, + "learning_rate": 7.403453191844989e-07, + "loss": 2.0543, + "step": 26414 + }, + { + "epoch": 0.88, + "grad_norm": 0.7635425329208374, + "learning_rate": 7.399440144027436e-07, + "loss": 2.0898, + "step": 26415 + }, + { + "epoch": 0.88, + "grad_norm": 0.7701706290245056, + "learning_rate": 7.395428142356731e-07, + "loss": 2.074, + "step": 26416 + }, + { + "epoch": 0.88, + "grad_norm": 0.7268241047859192, + "learning_rate": 7.391417186878191e-07, + "loss": 2.0605, + "step": 26417 + }, + { + "epoch": 0.88, + "grad_norm": 0.7377090454101562, + "learning_rate": 7.387407277637193e-07, + "loss": 2.0572, + "step": 26418 + }, + { + "epoch": 0.88, + "grad_norm": 0.7595619559288025, + "learning_rate": 7.383398414678955e-07, + "loss": 2.0374, + "step": 26419 + }, + { + "epoch": 0.88, + "grad_norm": 0.7361778020858765, + "learning_rate": 7.379390598048797e-07, + "loss": 2.0078, + "step": 26420 + }, + { + "epoch": 0.88, + "grad_norm": 0.7585855722427368, + "learning_rate": 7.375383827792027e-07, + "loss": 2.0092, + "step": 26421 + }, + { + "epoch": 0.88, + "grad_norm": 0.7654544115066528, + "learning_rate": 7.371378103953863e-07, + "loss": 2.0294, + "step": 26422 + }, + { + "epoch": 0.88, + "grad_norm": 0.7516477704048157, + "learning_rate": 7.367373426579605e-07, + "loss": 2.0207, + "step": 26423 + }, + { + "epoch": 0.88, + "grad_norm": 0.7430716753005981, + "learning_rate": 7.36336979571447e-07, + "loss": 1.9939, + "step": 26424 + }, + { + "epoch": 0.88, + "grad_norm": 0.7393102049827576, + "learning_rate": 7.359367211403679e-07, + "loss": 2.0631, + "step": 26425 + }, + { + "epoch": 0.88, + "grad_norm": 0.7286345362663269, + "learning_rate": 7.355365673692471e-07, + "loss": 2.0357, + "step": 26426 + }, + { + "epoch": 0.88, + "grad_norm": 0.7644442319869995, + "learning_rate": 7.351365182626058e-07, + "loss": 2.0613, + "step": 26427 + }, + { + "epoch": 0.88, + "grad_norm": 0.7338255643844604, + "learning_rate": 7.347365738249624e-07, + "loss": 1.972, + "step": 26428 + }, + { + "epoch": 0.88, + "grad_norm": 0.77260422706604, + "learning_rate": 7.343367340608331e-07, + "loss": 1.9653, + "step": 26429 + }, + { + "epoch": 0.88, + "grad_norm": 0.74418044090271, + "learning_rate": 7.339369989747392e-07, + "loss": 2.0189, + "step": 26430 + }, + { + "epoch": 0.88, + "grad_norm": 0.7452181577682495, + "learning_rate": 7.335373685711944e-07, + "loss": 2.0163, + "step": 26431 + }, + { + "epoch": 0.88, + "grad_norm": 0.7488622069358826, + "learning_rate": 7.33137842854712e-07, + "loss": 2.0367, + "step": 26432 + }, + { + "epoch": 0.88, + "grad_norm": 0.758019745349884, + "learning_rate": 7.327384218298083e-07, + "loss": 2.0248, + "step": 26433 + }, + { + "epoch": 0.88, + "grad_norm": 0.7595903277397156, + "learning_rate": 7.323391055009954e-07, + "loss": 2.0775, + "step": 26434 + }, + { + "epoch": 0.88, + "grad_norm": 0.7825447916984558, + "learning_rate": 7.319398938727829e-07, + "loss": 2.0385, + "step": 26435 + }, + { + "epoch": 0.88, + "grad_norm": 0.7442269921302795, + "learning_rate": 7.315407869496827e-07, + "loss": 1.9977, + "step": 26436 + }, + { + "epoch": 0.88, + "grad_norm": 0.7422558665275574, + "learning_rate": 7.311417847362035e-07, + "loss": 2.118, + "step": 26437 + }, + { + "epoch": 0.88, + "grad_norm": 0.7676902413368225, + "learning_rate": 7.307428872368505e-07, + "loss": 2.0272, + "step": 26438 + }, + { + "epoch": 0.88, + "grad_norm": 0.7200085520744324, + "learning_rate": 7.303440944561324e-07, + "loss": 2.0021, + "step": 26439 + }, + { + "epoch": 0.88, + "grad_norm": 0.7337111830711365, + "learning_rate": 7.299454063985567e-07, + "loss": 2.0585, + "step": 26440 + }, + { + "epoch": 0.88, + "grad_norm": 0.7280609607696533, + "learning_rate": 7.29546823068622e-07, + "loss": 2.0754, + "step": 26441 + }, + { + "epoch": 0.88, + "grad_norm": 0.7327431440353394, + "learning_rate": 7.291483444708347e-07, + "loss": 1.9796, + "step": 26442 + }, + { + "epoch": 0.88, + "grad_norm": 0.7320011854171753, + "learning_rate": 7.287499706096968e-07, + "loss": 2.0279, + "step": 26443 + }, + { + "epoch": 0.88, + "grad_norm": 0.7411849498748779, + "learning_rate": 7.283517014897079e-07, + "loss": 2.0144, + "step": 26444 + }, + { + "epoch": 0.88, + "grad_norm": 0.7268558740615845, + "learning_rate": 7.279535371153689e-07, + "loss": 2.0447, + "step": 26445 + }, + { + "epoch": 0.88, + "grad_norm": 0.7433812022209167, + "learning_rate": 7.275554774911764e-07, + "loss": 1.979, + "step": 26446 + }, + { + "epoch": 0.88, + "grad_norm": 0.7657671570777893, + "learning_rate": 7.271575226216265e-07, + "loss": 2.0262, + "step": 26447 + }, + { + "epoch": 0.88, + "grad_norm": 0.7456580400466919, + "learning_rate": 7.267596725112169e-07, + "loss": 2.0297, + "step": 26448 + }, + { + "epoch": 0.88, + "grad_norm": 0.7219790816307068, + "learning_rate": 7.263619271644429e-07, + "loss": 2.1402, + "step": 26449 + }, + { + "epoch": 0.88, + "grad_norm": 0.7132549285888672, + "learning_rate": 7.259642865857975e-07, + "loss": 2.0373, + "step": 26450 + }, + { + "epoch": 0.88, + "grad_norm": 0.7332443594932556, + "learning_rate": 7.255667507797703e-07, + "loss": 1.946, + "step": 26451 + }, + { + "epoch": 0.88, + "grad_norm": 0.7466189861297607, + "learning_rate": 7.251693197508558e-07, + "loss": 2.0259, + "step": 26452 + }, + { + "epoch": 0.88, + "grad_norm": 0.7678950428962708, + "learning_rate": 7.247719935035436e-07, + "loss": 2.0984, + "step": 26453 + }, + { + "epoch": 0.88, + "grad_norm": 0.7300118803977966, + "learning_rate": 7.243747720423211e-07, + "loss": 2.0864, + "step": 26454 + }, + { + "epoch": 0.88, + "grad_norm": 0.7387140393257141, + "learning_rate": 7.239776553716749e-07, + "loss": 2.0322, + "step": 26455 + }, + { + "epoch": 0.88, + "grad_norm": 0.7114147543907166, + "learning_rate": 7.235806434960946e-07, + "loss": 2.0451, + "step": 26456 + }, + { + "epoch": 0.88, + "grad_norm": 0.765238344669342, + "learning_rate": 7.231837364200623e-07, + "loss": 2.0628, + "step": 26457 + }, + { + "epoch": 0.88, + "grad_norm": 0.7328755259513855, + "learning_rate": 7.227869341480653e-07, + "loss": 1.9951, + "step": 26458 + }, + { + "epoch": 0.88, + "grad_norm": 0.7428206205368042, + "learning_rate": 7.223902366845847e-07, + "loss": 2.1181, + "step": 26459 + }, + { + "epoch": 0.88, + "grad_norm": 0.7576242089271545, + "learning_rate": 7.219936440340991e-07, + "loss": 2.0453, + "step": 26460 + }, + { + "epoch": 0.88, + "grad_norm": 0.7584467530250549, + "learning_rate": 7.215971562010926e-07, + "loss": 2.0591, + "step": 26461 + }, + { + "epoch": 0.88, + "grad_norm": 0.7323764562606812, + "learning_rate": 7.212007731900462e-07, + "loss": 2.0014, + "step": 26462 + }, + { + "epoch": 0.88, + "grad_norm": 0.7369996905326843, + "learning_rate": 7.208044950054338e-07, + "loss": 2.0177, + "step": 26463 + }, + { + "epoch": 0.88, + "grad_norm": 0.7372731566429138, + "learning_rate": 7.204083216517344e-07, + "loss": 2.0571, + "step": 26464 + }, + { + "epoch": 0.88, + "grad_norm": 0.7596994042396545, + "learning_rate": 7.20012253133423e-07, + "loss": 2.0655, + "step": 26465 + }, + { + "epoch": 0.88, + "grad_norm": 0.7575705051422119, + "learning_rate": 7.196162894549741e-07, + "loss": 2.0325, + "step": 26466 + }, + { + "epoch": 0.88, + "grad_norm": 0.7497710585594177, + "learning_rate": 7.192204306208628e-07, + "loss": 2.0041, + "step": 26467 + }, + { + "epoch": 0.88, + "grad_norm": 0.803541362285614, + "learning_rate": 7.188246766355588e-07, + "loss": 2.0959, + "step": 26468 + }, + { + "epoch": 0.88, + "grad_norm": 0.725372314453125, + "learning_rate": 7.184290275035355e-07, + "loss": 1.9858, + "step": 26469 + }, + { + "epoch": 0.88, + "grad_norm": 0.7647196054458618, + "learning_rate": 7.180334832292601e-07, + "loss": 2.1081, + "step": 26470 + }, + { + "epoch": 0.88, + "grad_norm": 0.7598264813423157, + "learning_rate": 7.176380438172048e-07, + "loss": 2.0326, + "step": 26471 + }, + { + "epoch": 0.88, + "grad_norm": 0.7464932203292847, + "learning_rate": 7.172427092718348e-07, + "loss": 2.0343, + "step": 26472 + }, + { + "epoch": 0.88, + "grad_norm": 0.7246004939079285, + "learning_rate": 7.168474795976144e-07, + "loss": 1.9777, + "step": 26473 + }, + { + "epoch": 0.88, + "grad_norm": 0.7622198462486267, + "learning_rate": 7.164523547990099e-07, + "loss": 2.1171, + "step": 26474 + }, + { + "epoch": 0.88, + "grad_norm": 0.7321404218673706, + "learning_rate": 7.160573348804889e-07, + "loss": 2.0479, + "step": 26475 + }, + { + "epoch": 0.88, + "grad_norm": 0.7492873668670654, + "learning_rate": 7.1566241984651e-07, + "loss": 2.0976, + "step": 26476 + }, + { + "epoch": 0.88, + "grad_norm": 0.7410077452659607, + "learning_rate": 7.152676097015354e-07, + "loss": 1.9861, + "step": 26477 + }, + { + "epoch": 0.88, + "grad_norm": 0.7286153435707092, + "learning_rate": 7.148729044500269e-07, + "loss": 2.0148, + "step": 26478 + }, + { + "epoch": 0.88, + "grad_norm": 0.75808185338974, + "learning_rate": 7.144783040964421e-07, + "loss": 2.0016, + "step": 26479 + }, + { + "epoch": 0.88, + "grad_norm": 0.7379709482192993, + "learning_rate": 7.140838086452395e-07, + "loss": 2.0684, + "step": 26480 + }, + { + "epoch": 0.88, + "grad_norm": 0.7698858380317688, + "learning_rate": 7.136894181008768e-07, + "loss": 1.9937, + "step": 26481 + }, + { + "epoch": 0.88, + "grad_norm": 0.7318554520606995, + "learning_rate": 7.132951324678084e-07, + "loss": 2.0797, + "step": 26482 + }, + { + "epoch": 0.88, + "grad_norm": 0.7562581300735474, + "learning_rate": 7.12900951750487e-07, + "loss": 2.0592, + "step": 26483 + }, + { + "epoch": 0.88, + "grad_norm": 0.7352113127708435, + "learning_rate": 7.125068759533704e-07, + "loss": 2.0741, + "step": 26484 + }, + { + "epoch": 0.88, + "grad_norm": 0.7350488901138306, + "learning_rate": 7.121129050809083e-07, + "loss": 2.0109, + "step": 26485 + }, + { + "epoch": 0.88, + "grad_norm": 0.7458146214485168, + "learning_rate": 7.117190391375495e-07, + "loss": 2.0796, + "step": 26486 + }, + { + "epoch": 0.88, + "grad_norm": 0.7478554844856262, + "learning_rate": 7.113252781277468e-07, + "loss": 2.0186, + "step": 26487 + }, + { + "epoch": 0.88, + "grad_norm": 0.7593786120414734, + "learning_rate": 7.10931622055947e-07, + "loss": 2.0402, + "step": 26488 + }, + { + "epoch": 0.88, + "grad_norm": 0.7420054078102112, + "learning_rate": 7.105380709265985e-07, + "loss": 2.0895, + "step": 26489 + }, + { + "epoch": 0.88, + "grad_norm": 0.7457179427146912, + "learning_rate": 7.101446247441446e-07, + "loss": 2.0319, + "step": 26490 + }, + { + "epoch": 0.88, + "grad_norm": 0.7534802556037903, + "learning_rate": 7.097512835130349e-07, + "loss": 2.1183, + "step": 26491 + }, + { + "epoch": 0.88, + "grad_norm": 0.7339127659797668, + "learning_rate": 7.093580472377082e-07, + "loss": 2.066, + "step": 26492 + }, + { + "epoch": 0.88, + "grad_norm": 0.7291773557662964, + "learning_rate": 7.08964915922612e-07, + "loss": 2.0812, + "step": 26493 + }, + { + "epoch": 0.88, + "grad_norm": 0.7058745622634888, + "learning_rate": 7.085718895721849e-07, + "loss": 2.0509, + "step": 26494 + }, + { + "epoch": 0.88, + "grad_norm": 0.7566458582878113, + "learning_rate": 7.081789681908646e-07, + "loss": 2.1196, + "step": 26495 + }, + { + "epoch": 0.88, + "grad_norm": 0.7218544483184814, + "learning_rate": 7.077861517830942e-07, + "loss": 1.984, + "step": 26496 + }, + { + "epoch": 0.88, + "grad_norm": 0.7245951294898987, + "learning_rate": 7.073934403533101e-07, + "loss": 1.9591, + "step": 26497 + }, + { + "epoch": 0.88, + "grad_norm": 0.756462574005127, + "learning_rate": 7.070008339059497e-07, + "loss": 2.035, + "step": 26498 + }, + { + "epoch": 0.88, + "grad_norm": 0.7721906900405884, + "learning_rate": 7.066083324454465e-07, + "loss": 1.9546, + "step": 26499 + }, + { + "epoch": 0.88, + "grad_norm": 0.7380475997924805, + "learning_rate": 7.062159359762378e-07, + "loss": 2.0238, + "step": 26500 + }, + { + "epoch": 0.88, + "grad_norm": 0.7506248950958252, + "learning_rate": 7.058236445027522e-07, + "loss": 2.0171, + "step": 26501 + }, + { + "epoch": 0.88, + "grad_norm": 0.7781503796577454, + "learning_rate": 7.054314580294252e-07, + "loss": 2.1069, + "step": 26502 + }, + { + "epoch": 0.88, + "grad_norm": 0.7556978464126587, + "learning_rate": 7.050393765606878e-07, + "loss": 1.9994, + "step": 26503 + }, + { + "epoch": 0.88, + "grad_norm": 0.7589229345321655, + "learning_rate": 7.046474001009662e-07, + "loss": 2.1239, + "step": 26504 + }, + { + "epoch": 0.88, + "grad_norm": 0.7527299523353577, + "learning_rate": 7.042555286546904e-07, + "loss": 2.0423, + "step": 26505 + }, + { + "epoch": 0.88, + "grad_norm": 0.7582868337631226, + "learning_rate": 7.03863762226289e-07, + "loss": 1.9939, + "step": 26506 + }, + { + "epoch": 0.88, + "grad_norm": 0.7491586804389954, + "learning_rate": 7.034721008201872e-07, + "loss": 2.0746, + "step": 26507 + }, + { + "epoch": 0.88, + "grad_norm": 0.7429332137107849, + "learning_rate": 7.030805444408085e-07, + "loss": 2.0528, + "step": 26508 + }, + { + "epoch": 0.88, + "grad_norm": 0.7391231656074524, + "learning_rate": 7.026890930925778e-07, + "loss": 1.9935, + "step": 26509 + }, + { + "epoch": 0.88, + "grad_norm": 0.7251815795898438, + "learning_rate": 7.022977467799152e-07, + "loss": 2.0139, + "step": 26510 + }, + { + "epoch": 0.88, + "grad_norm": 0.7659398317337036, + "learning_rate": 7.01906505507246e-07, + "loss": 1.9828, + "step": 26511 + }, + { + "epoch": 0.88, + "grad_norm": 0.7408603429794312, + "learning_rate": 7.015153692789866e-07, + "loss": 2.0568, + "step": 26512 + }, + { + "epoch": 0.88, + "grad_norm": 0.7994125485420227, + "learning_rate": 7.011243380995581e-07, + "loss": 2.0755, + "step": 26513 + }, + { + "epoch": 0.88, + "grad_norm": 0.7625914812088013, + "learning_rate": 7.007334119733755e-07, + "loss": 2.0435, + "step": 26514 + }, + { + "epoch": 0.88, + "grad_norm": 0.7196329832077026, + "learning_rate": 7.003425909048578e-07, + "loss": 2.0066, + "step": 26515 + }, + { + "epoch": 0.88, + "grad_norm": 0.7342898845672607, + "learning_rate": 6.999518748984202e-07, + "loss": 1.9848, + "step": 26516 + }, + { + "epoch": 0.88, + "grad_norm": 0.7342272400856018, + "learning_rate": 6.995612639584748e-07, + "loss": 2.0463, + "step": 26517 + }, + { + "epoch": 0.88, + "grad_norm": 0.7354905605316162, + "learning_rate": 6.991707580894346e-07, + "loss": 2.1275, + "step": 26518 + }, + { + "epoch": 0.88, + "grad_norm": 0.7462214231491089, + "learning_rate": 6.987803572957153e-07, + "loss": 2.1382, + "step": 26519 + }, + { + "epoch": 0.88, + "grad_norm": 0.7806128859519958, + "learning_rate": 6.98390061581723e-07, + "loss": 2.0094, + "step": 26520 + }, + { + "epoch": 0.88, + "grad_norm": 0.7756829261779785, + "learning_rate": 6.979998709518677e-07, + "loss": 2.0605, + "step": 26521 + }, + { + "epoch": 0.88, + "grad_norm": 0.7664603590965271, + "learning_rate": 6.976097854105602e-07, + "loss": 1.9872, + "step": 26522 + }, + { + "epoch": 0.88, + "grad_norm": 0.746811032295227, + "learning_rate": 6.972198049622036e-07, + "loss": 2.0335, + "step": 26523 + }, + { + "epoch": 0.88, + "grad_norm": 0.7430893778800964, + "learning_rate": 6.968299296112069e-07, + "loss": 2.0475, + "step": 26524 + }, + { + "epoch": 0.88, + "grad_norm": 0.7427177429199219, + "learning_rate": 6.96440159361974e-07, + "loss": 2.0005, + "step": 26525 + }, + { + "epoch": 0.88, + "grad_norm": 0.7456814050674438, + "learning_rate": 6.960504942189061e-07, + "loss": 2.1197, + "step": 26526 + }, + { + "epoch": 0.88, + "grad_norm": 0.7501267194747925, + "learning_rate": 6.956609341864073e-07, + "loss": 2.0186, + "step": 26527 + }, + { + "epoch": 0.88, + "grad_norm": 0.7502648234367371, + "learning_rate": 6.952714792688797e-07, + "loss": 2.0351, + "step": 26528 + }, + { + "epoch": 0.88, + "grad_norm": 0.7430863380432129, + "learning_rate": 6.94882129470722e-07, + "loss": 1.9725, + "step": 26529 + }, + { + "epoch": 0.88, + "grad_norm": 0.7320888042449951, + "learning_rate": 6.944928847963306e-07, + "loss": 2.0059, + "step": 26530 + }, + { + "epoch": 0.88, + "grad_norm": 0.7501510977745056, + "learning_rate": 6.941037452501076e-07, + "loss": 1.9586, + "step": 26531 + }, + { + "epoch": 0.88, + "grad_norm": 0.7599371671676636, + "learning_rate": 6.937147108364439e-07, + "loss": 2.0579, + "step": 26532 + }, + { + "epoch": 0.88, + "grad_norm": 0.7800934314727783, + "learning_rate": 6.933257815597394e-07, + "loss": 1.979, + "step": 26533 + }, + { + "epoch": 0.88, + "grad_norm": 0.7491557002067566, + "learning_rate": 6.92936957424385e-07, + "loss": 2.0172, + "step": 26534 + }, + { + "epoch": 0.88, + "grad_norm": 0.7377597689628601, + "learning_rate": 6.925482384347748e-07, + "loss": 2.0603, + "step": 26535 + }, + { + "epoch": 0.88, + "grad_norm": 0.7333504557609558, + "learning_rate": 6.921596245952989e-07, + "loss": 2.0038, + "step": 26536 + }, + { + "epoch": 0.88, + "grad_norm": 0.7402035593986511, + "learning_rate": 6.917711159103491e-07, + "loss": 2.041, + "step": 26537 + }, + { + "epoch": 0.88, + "grad_norm": 0.7260160446166992, + "learning_rate": 6.913827123843165e-07, + "loss": 2.0761, + "step": 26538 + }, + { + "epoch": 0.88, + "grad_norm": 0.7535466551780701, + "learning_rate": 6.909944140215841e-07, + "loss": 2.1587, + "step": 26539 + }, + { + "epoch": 0.88, + "grad_norm": 0.7271521687507629, + "learning_rate": 6.906062208265407e-07, + "loss": 2.0885, + "step": 26540 + }, + { + "epoch": 0.88, + "grad_norm": 0.748368501663208, + "learning_rate": 6.90218132803574e-07, + "loss": 2.0732, + "step": 26541 + }, + { + "epoch": 0.88, + "grad_norm": 0.7486216425895691, + "learning_rate": 6.89830149957067e-07, + "loss": 2.0661, + "step": 26542 + }, + { + "epoch": 0.88, + "grad_norm": 0.7737284302711487, + "learning_rate": 6.894422722914007e-07, + "loss": 2.0627, + "step": 26543 + }, + { + "epoch": 0.88, + "grad_norm": 0.9019026756286621, + "learning_rate": 6.890544998109616e-07, + "loss": 2.1682, + "step": 26544 + }, + { + "epoch": 0.88, + "grad_norm": 0.7330599427223206, + "learning_rate": 6.886668325201252e-07, + "loss": 1.9643, + "step": 26545 + }, + { + "epoch": 0.88, + "grad_norm": 0.7535129189491272, + "learning_rate": 6.882792704232743e-07, + "loss": 2.0018, + "step": 26546 + }, + { + "epoch": 0.88, + "grad_norm": 0.7671772837638855, + "learning_rate": 6.878918135247914e-07, + "loss": 2.0099, + "step": 26547 + }, + { + "epoch": 0.88, + "grad_norm": 0.7297726273536682, + "learning_rate": 6.87504461829045e-07, + "loss": 2.0821, + "step": 26548 + }, + { + "epoch": 0.88, + "grad_norm": 0.7395918369293213, + "learning_rate": 6.87117215340416e-07, + "loss": 1.9655, + "step": 26549 + }, + { + "epoch": 0.88, + "grad_norm": 0.7843712568283081, + "learning_rate": 6.867300740632799e-07, + "loss": 2.1788, + "step": 26550 + }, + { + "epoch": 0.88, + "grad_norm": 0.7352198958396912, + "learning_rate": 6.863430380020097e-07, + "loss": 2.0272, + "step": 26551 + }, + { + "epoch": 0.88, + "grad_norm": 0.7744807004928589, + "learning_rate": 6.859561071609755e-07, + "loss": 2.0674, + "step": 26552 + }, + { + "epoch": 0.88, + "grad_norm": 0.7316256165504456, + "learning_rate": 6.855692815445503e-07, + "loss": 2.0129, + "step": 26553 + }, + { + "epoch": 0.88, + "grad_norm": 0.7733190655708313, + "learning_rate": 6.851825611571072e-07, + "loss": 1.9968, + "step": 26554 + }, + { + "epoch": 0.88, + "grad_norm": 0.7294735312461853, + "learning_rate": 6.847959460030118e-07, + "loss": 2.0529, + "step": 26555 + }, + { + "epoch": 0.88, + "grad_norm": 0.7523104548454285, + "learning_rate": 6.844094360866316e-07, + "loss": 1.9091, + "step": 26556 + }, + { + "epoch": 0.88, + "grad_norm": 0.7517918348312378, + "learning_rate": 6.840230314123353e-07, + "loss": 2.0483, + "step": 26557 + }, + { + "epoch": 0.88, + "grad_norm": 0.7677464485168457, + "learning_rate": 6.836367319844861e-07, + "loss": 2.0387, + "step": 26558 + }, + { + "epoch": 0.88, + "grad_norm": 0.7304145693778992, + "learning_rate": 6.832505378074484e-07, + "loss": 1.9952, + "step": 26559 + }, + { + "epoch": 0.88, + "grad_norm": 0.7390241622924805, + "learning_rate": 6.828644488855895e-07, + "loss": 2.0343, + "step": 26560 + }, + { + "epoch": 0.88, + "grad_norm": 0.7728503942489624, + "learning_rate": 6.824784652232641e-07, + "loss": 2.0574, + "step": 26561 + }, + { + "epoch": 0.88, + "grad_norm": 0.7333219647407532, + "learning_rate": 6.820925868248362e-07, + "loss": 2.0294, + "step": 26562 + }, + { + "epoch": 0.88, + "grad_norm": 0.7299553155899048, + "learning_rate": 6.81706813694668e-07, + "loss": 2.1594, + "step": 26563 + }, + { + "epoch": 0.88, + "grad_norm": 0.740734338760376, + "learning_rate": 6.813211458371149e-07, + "loss": 2.0646, + "step": 26564 + }, + { + "epoch": 0.88, + "grad_norm": 0.7588878273963928, + "learning_rate": 6.809355832565323e-07, + "loss": 1.9621, + "step": 26565 + }, + { + "epoch": 0.88, + "grad_norm": 0.7782971262931824, + "learning_rate": 6.805501259572789e-07, + "loss": 2.0286, + "step": 26566 + }, + { + "epoch": 0.88, + "grad_norm": 0.7436327934265137, + "learning_rate": 6.801647739437079e-07, + "loss": 2.0094, + "step": 26567 + }, + { + "epoch": 0.88, + "grad_norm": 0.7539224028587341, + "learning_rate": 6.797795272201735e-07, + "loss": 2.125, + "step": 26568 + }, + { + "epoch": 0.88, + "grad_norm": 0.7687482833862305, + "learning_rate": 6.79394385791029e-07, + "loss": 2.0988, + "step": 26569 + }, + { + "epoch": 0.88, + "grad_norm": 0.758817732334137, + "learning_rate": 6.790093496606243e-07, + "loss": 2.1148, + "step": 26570 + }, + { + "epoch": 0.88, + "grad_norm": 0.7613086700439453, + "learning_rate": 6.786244188333069e-07, + "loss": 2.0495, + "step": 26571 + }, + { + "epoch": 0.88, + "grad_norm": 0.7290220856666565, + "learning_rate": 6.7823959331343e-07, + "loss": 2.0902, + "step": 26572 + }, + { + "epoch": 0.88, + "grad_norm": 0.7710554003715515, + "learning_rate": 6.778548731053403e-07, + "loss": 1.9939, + "step": 26573 + }, + { + "epoch": 0.88, + "grad_norm": 0.7453715801239014, + "learning_rate": 6.774702582133796e-07, + "loss": 2.0528, + "step": 26574 + }, + { + "epoch": 0.88, + "grad_norm": 0.7385093569755554, + "learning_rate": 6.77085748641898e-07, + "loss": 2.0809, + "step": 26575 + }, + { + "epoch": 0.88, + "grad_norm": 0.7329203486442566, + "learning_rate": 6.767013443952386e-07, + "loss": 2.0828, + "step": 26576 + }, + { + "epoch": 0.88, + "grad_norm": 0.7378323078155518, + "learning_rate": 6.763170454777435e-07, + "loss": 2.0382, + "step": 26577 + }, + { + "epoch": 0.88, + "grad_norm": 0.7477156519889832, + "learning_rate": 6.759328518937524e-07, + "loss": 1.9992, + "step": 26578 + }, + { + "epoch": 0.88, + "grad_norm": 0.7646410465240479, + "learning_rate": 6.755487636476088e-07, + "loss": 2.0574, + "step": 26579 + }, + { + "epoch": 0.88, + "grad_norm": 0.7296204566955566, + "learning_rate": 6.751647807436501e-07, + "loss": 1.9569, + "step": 26580 + }, + { + "epoch": 0.88, + "grad_norm": 0.7768306136131287, + "learning_rate": 6.74780903186214e-07, + "loss": 2.079, + "step": 26581 + }, + { + "epoch": 0.88, + "grad_norm": 0.7539736032485962, + "learning_rate": 6.743971309796416e-07, + "loss": 2.1453, + "step": 26582 + }, + { + "epoch": 0.88, + "grad_norm": 0.7256012558937073, + "learning_rate": 6.740134641282614e-07, + "loss": 2.0746, + "step": 26583 + }, + { + "epoch": 0.88, + "grad_norm": 0.7351658940315247, + "learning_rate": 6.736299026364123e-07, + "loss": 2.0537, + "step": 26584 + }, + { + "epoch": 0.88, + "grad_norm": 0.7628863453865051, + "learning_rate": 6.732464465084288e-07, + "loss": 2.0341, + "step": 26585 + }, + { + "epoch": 0.88, + "grad_norm": 0.7522174119949341, + "learning_rate": 6.728630957486393e-07, + "loss": 2.0637, + "step": 26586 + }, + { + "epoch": 0.88, + "grad_norm": 0.7587566375732422, + "learning_rate": 6.724798503613761e-07, + "loss": 2.0603, + "step": 26587 + }, + { + "epoch": 0.88, + "grad_norm": 0.767328679561615, + "learning_rate": 6.720967103509701e-07, + "loss": 2.057, + "step": 26588 + }, + { + "epoch": 0.88, + "grad_norm": 0.7446606159210205, + "learning_rate": 6.717136757217468e-07, + "loss": 2.0484, + "step": 26589 + }, + { + "epoch": 0.88, + "grad_norm": 0.7279035449028015, + "learning_rate": 6.71330746478036e-07, + "loss": 2.0178, + "step": 26590 + }, + { + "epoch": 0.88, + "grad_norm": 0.7354421019554138, + "learning_rate": 6.709479226241644e-07, + "loss": 2.093, + "step": 26591 + }, + { + "epoch": 0.88, + "grad_norm": 0.7535824775695801, + "learning_rate": 6.705652041644562e-07, + "loss": 2.0314, + "step": 26592 + }, + { + "epoch": 0.88, + "grad_norm": 0.7514384984970093, + "learning_rate": 6.701825911032333e-07, + "loss": 2.1024, + "step": 26593 + }, + { + "epoch": 0.88, + "grad_norm": 0.7247806787490845, + "learning_rate": 6.698000834448215e-07, + "loss": 1.9974, + "step": 26594 + }, + { + "epoch": 0.88, + "grad_norm": 0.7281789183616638, + "learning_rate": 6.694176811935394e-07, + "loss": 2.0013, + "step": 26595 + }, + { + "epoch": 0.88, + "grad_norm": 0.7524659037590027, + "learning_rate": 6.690353843537078e-07, + "loss": 1.9814, + "step": 26596 + }, + { + "epoch": 0.88, + "grad_norm": 0.7157271504402161, + "learning_rate": 6.686531929296447e-07, + "loss": 2.0347, + "step": 26597 + }, + { + "epoch": 0.88, + "grad_norm": 0.749940812587738, + "learning_rate": 6.682711069256709e-07, + "loss": 2.0708, + "step": 26598 + }, + { + "epoch": 0.88, + "grad_norm": 0.7544182538986206, + "learning_rate": 6.678891263461007e-07, + "loss": 2.0366, + "step": 26599 + }, + { + "epoch": 0.88, + "grad_norm": 0.7334120869636536, + "learning_rate": 6.675072511952507e-07, + "loss": 2.0014, + "step": 26600 + }, + { + "epoch": 0.89, + "grad_norm": 0.7459714412689209, + "learning_rate": 6.671254814774342e-07, + "loss": 1.9703, + "step": 26601 + }, + { + "epoch": 0.89, + "grad_norm": 0.749575138092041, + "learning_rate": 6.667438171969631e-07, + "loss": 2.0678, + "step": 26602 + }, + { + "epoch": 0.89, + "grad_norm": 0.7445259094238281, + "learning_rate": 6.663622583581508e-07, + "loss": 2.0617, + "step": 26603 + }, + { + "epoch": 0.89, + "grad_norm": 0.7420641183853149, + "learning_rate": 6.659808049653105e-07, + "loss": 2.1126, + "step": 26604 + }, + { + "epoch": 0.89, + "grad_norm": 0.7307288646697998, + "learning_rate": 6.655994570227453e-07, + "loss": 2.0278, + "step": 26605 + }, + { + "epoch": 0.89, + "grad_norm": 0.7178513407707214, + "learning_rate": 6.652182145347675e-07, + "loss": 2.0018, + "step": 26606 + }, + { + "epoch": 0.89, + "grad_norm": 0.7375272512435913, + "learning_rate": 6.648370775056845e-07, + "loss": 2.0152, + "step": 26607 + }, + { + "epoch": 0.89, + "grad_norm": 0.7567374110221863, + "learning_rate": 6.64456045939802e-07, + "loss": 2.0047, + "step": 26608 + }, + { + "epoch": 0.89, + "grad_norm": 0.7590065598487854, + "learning_rate": 6.64075119841422e-07, + "loss": 2.0321, + "step": 26609 + }, + { + "epoch": 0.89, + "grad_norm": 0.7293776869773865, + "learning_rate": 6.636942992148521e-07, + "loss": 2.0504, + "step": 26610 + }, + { + "epoch": 0.89, + "grad_norm": 0.7744541764259338, + "learning_rate": 6.633135840643901e-07, + "loss": 2.0966, + "step": 26611 + }, + { + "epoch": 0.89, + "grad_norm": 0.7227846384048462, + "learning_rate": 6.629329743943392e-07, + "loss": 2.0521, + "step": 26612 + }, + { + "epoch": 0.89, + "grad_norm": 0.7318065762519836, + "learning_rate": 6.625524702090013e-07, + "loss": 2.0865, + "step": 26613 + }, + { + "epoch": 0.89, + "grad_norm": 0.7435641288757324, + "learning_rate": 6.621720715126745e-07, + "loss": 2.065, + "step": 26614 + }, + { + "epoch": 0.89, + "grad_norm": 0.7608646154403687, + "learning_rate": 6.617917783096517e-07, + "loss": 2.083, + "step": 26615 + }, + { + "epoch": 0.89, + "grad_norm": 0.7389757037162781, + "learning_rate": 6.614115906042351e-07, + "loss": 2.0568, + "step": 26616 + }, + { + "epoch": 0.89, + "grad_norm": 0.7342638373374939, + "learning_rate": 6.610315084007179e-07, + "loss": 2.0746, + "step": 26617 + }, + { + "epoch": 0.89, + "grad_norm": 0.7387537360191345, + "learning_rate": 6.606515317033912e-07, + "loss": 2.0029, + "step": 26618 + }, + { + "epoch": 0.89, + "grad_norm": 0.7389355897903442, + "learning_rate": 6.602716605165504e-07, + "loss": 2.1011, + "step": 26619 + }, + { + "epoch": 0.89, + "grad_norm": 0.7701226472854614, + "learning_rate": 6.598918948444877e-07, + "loss": 2.1454, + "step": 26620 + }, + { + "epoch": 0.89, + "grad_norm": 0.7223533987998962, + "learning_rate": 6.595122346914918e-07, + "loss": 2.034, + "step": 26621 + }, + { + "epoch": 0.89, + "grad_norm": 0.7490042448043823, + "learning_rate": 6.591326800618536e-07, + "loss": 2.0712, + "step": 26622 + }, + { + "epoch": 0.89, + "grad_norm": 0.764431893825531, + "learning_rate": 6.58753230959861e-07, + "loss": 2.0194, + "step": 26623 + }, + { + "epoch": 0.89, + "grad_norm": 0.7420855760574341, + "learning_rate": 6.583738873897971e-07, + "loss": 1.9941, + "step": 26624 + }, + { + "epoch": 0.89, + "grad_norm": 0.7364943623542786, + "learning_rate": 6.579946493559519e-07, + "loss": 2.0328, + "step": 26625 + }, + { + "epoch": 0.89, + "grad_norm": 0.7223743796348572, + "learning_rate": 6.576155168626097e-07, + "loss": 2.0399, + "step": 26626 + }, + { + "epoch": 0.89, + "grad_norm": 0.7158247232437134, + "learning_rate": 6.572364899140505e-07, + "loss": 1.9856, + "step": 26627 + }, + { + "epoch": 0.89, + "grad_norm": 0.7540010213851929, + "learning_rate": 6.568575685145561e-07, + "loss": 2.061, + "step": 26628 + }, + { + "epoch": 0.89, + "grad_norm": 0.7794114947319031, + "learning_rate": 6.564787526684124e-07, + "loss": 2.047, + "step": 26629 + }, + { + "epoch": 0.89, + "grad_norm": 0.7541570067405701, + "learning_rate": 6.561000423798935e-07, + "loss": 1.9976, + "step": 26630 + }, + { + "epoch": 0.89, + "grad_norm": 0.7274417877197266, + "learning_rate": 6.557214376532828e-07, + "loss": 2.0425, + "step": 26631 + }, + { + "epoch": 0.89, + "grad_norm": 0.7578456401824951, + "learning_rate": 6.553429384928545e-07, + "loss": 1.9801, + "step": 26632 + }, + { + "epoch": 0.89, + "grad_norm": 0.8011856079101562, + "learning_rate": 6.549645449028841e-07, + "loss": 1.972, + "step": 26633 + }, + { + "epoch": 0.89, + "grad_norm": 0.7068088054656982, + "learning_rate": 6.545862568876471e-07, + "loss": 2.0893, + "step": 26634 + }, + { + "epoch": 0.89, + "grad_norm": 0.7521716952323914, + "learning_rate": 6.54208074451419e-07, + "loss": 2.0226, + "step": 26635 + }, + { + "epoch": 0.89, + "grad_norm": 0.7356855869293213, + "learning_rate": 6.538299975984708e-07, + "loss": 2.0948, + "step": 26636 + }, + { + "epoch": 0.89, + "grad_norm": 0.7715505957603455, + "learning_rate": 6.534520263330723e-07, + "loss": 1.9535, + "step": 26637 + }, + { + "epoch": 0.89, + "grad_norm": 0.7470303177833557, + "learning_rate": 6.53074160659496e-07, + "loss": 2.123, + "step": 26638 + }, + { + "epoch": 0.89, + "grad_norm": 0.7485473155975342, + "learning_rate": 6.526964005820124e-07, + "loss": 2.0757, + "step": 26639 + }, + { + "epoch": 0.89, + "grad_norm": 0.702972412109375, + "learning_rate": 6.52318746104883e-07, + "loss": 2.0808, + "step": 26640 + }, + { + "epoch": 0.89, + "grad_norm": 0.7370464205741882, + "learning_rate": 6.519411972323797e-07, + "loss": 2.0023, + "step": 26641 + }, + { + "epoch": 0.89, + "grad_norm": 0.7066406011581421, + "learning_rate": 6.515637539687669e-07, + "loss": 2.0181, + "step": 26642 + }, + { + "epoch": 0.89, + "grad_norm": 0.7694652080535889, + "learning_rate": 6.511864163183068e-07, + "loss": 2.0334, + "step": 26643 + }, + { + "epoch": 0.89, + "grad_norm": 0.7401752471923828, + "learning_rate": 6.508091842852649e-07, + "loss": 2.0354, + "step": 26644 + }, + { + "epoch": 0.89, + "grad_norm": 0.7737911343574524, + "learning_rate": 6.504320578739021e-07, + "loss": 2.0406, + "step": 26645 + }, + { + "epoch": 0.89, + "grad_norm": 0.7554739117622375, + "learning_rate": 6.500550370884762e-07, + "loss": 2.0095, + "step": 26646 + }, + { + "epoch": 0.89, + "grad_norm": 0.7552582621574402, + "learning_rate": 6.496781219332493e-07, + "loss": 2.0524, + "step": 26647 + }, + { + "epoch": 0.89, + "grad_norm": 0.7447125315666199, + "learning_rate": 6.493013124124825e-07, + "loss": 1.9991, + "step": 26648 + }, + { + "epoch": 0.89, + "grad_norm": 0.7421413064002991, + "learning_rate": 6.489246085304268e-07, + "loss": 2.0082, + "step": 26649 + }, + { + "epoch": 0.89, + "grad_norm": 0.7449020743370056, + "learning_rate": 6.4854801029134e-07, + "loss": 1.9932, + "step": 26650 + }, + { + "epoch": 0.89, + "grad_norm": 0.7505519986152649, + "learning_rate": 6.481715176994785e-07, + "loss": 2.0261, + "step": 26651 + }, + { + "epoch": 0.89, + "grad_norm": 0.7160792350769043, + "learning_rate": 6.477951307590935e-07, + "loss": 2.0164, + "step": 26652 + }, + { + "epoch": 0.89, + "grad_norm": 0.7548861503601074, + "learning_rate": 6.474188494744394e-07, + "loss": 2.0215, + "step": 26653 + }, + { + "epoch": 0.89, + "grad_norm": 0.7338448762893677, + "learning_rate": 6.470426738497649e-07, + "loss": 2.0608, + "step": 26654 + }, + { + "epoch": 0.89, + "grad_norm": 0.7523289322853088, + "learning_rate": 6.466666038893221e-07, + "loss": 2.0878, + "step": 26655 + }, + { + "epoch": 0.89, + "grad_norm": 0.7652626633644104, + "learning_rate": 6.462906395973567e-07, + "loss": 2.03, + "step": 26656 + }, + { + "epoch": 0.89, + "grad_norm": 0.7147501111030579, + "learning_rate": 6.459147809781196e-07, + "loss": 2.0526, + "step": 26657 + }, + { + "epoch": 0.89, + "grad_norm": 0.7607391476631165, + "learning_rate": 6.455390280358553e-07, + "loss": 2.0331, + "step": 26658 + }, + { + "epoch": 0.89, + "grad_norm": 0.739440381526947, + "learning_rate": 6.45163380774807e-07, + "loss": 2.0174, + "step": 26659 + }, + { + "epoch": 0.89, + "grad_norm": 0.7463353872299194, + "learning_rate": 6.447878391992201e-07, + "loss": 2.0591, + "step": 26660 + }, + { + "epoch": 0.89, + "grad_norm": 0.7504194378852844, + "learning_rate": 6.444124033133403e-07, + "loss": 2.0646, + "step": 26661 + }, + { + "epoch": 0.89, + "grad_norm": 0.7576066851615906, + "learning_rate": 6.440370731214051e-07, + "loss": 1.9744, + "step": 26662 + }, + { + "epoch": 0.89, + "grad_norm": 0.7382180094718933, + "learning_rate": 6.436618486276547e-07, + "loss": 2.0781, + "step": 26663 + }, + { + "epoch": 0.89, + "grad_norm": 0.7660902738571167, + "learning_rate": 6.43286729836331e-07, + "loss": 2.0142, + "step": 26664 + }, + { + "epoch": 0.89, + "grad_norm": 0.7437325716018677, + "learning_rate": 6.429117167516685e-07, + "loss": 2.0495, + "step": 26665 + }, + { + "epoch": 0.89, + "grad_norm": 0.7604483962059021, + "learning_rate": 6.425368093779071e-07, + "loss": 2.0756, + "step": 26666 + }, + { + "epoch": 0.89, + "grad_norm": 0.7335903644561768, + "learning_rate": 6.421620077192814e-07, + "loss": 1.9702, + "step": 26667 + }, + { + "epoch": 0.89, + "grad_norm": 0.7337174415588379, + "learning_rate": 6.417873117800233e-07, + "loss": 2.0081, + "step": 26668 + }, + { + "epoch": 0.89, + "grad_norm": 0.7416128516197205, + "learning_rate": 6.414127215643672e-07, + "loss": 2.0037, + "step": 26669 + }, + { + "epoch": 0.89, + "grad_norm": 0.7363864779472351, + "learning_rate": 6.410382370765477e-07, + "loss": 2.0788, + "step": 26670 + }, + { + "epoch": 0.89, + "grad_norm": 0.789065420627594, + "learning_rate": 6.406638583207935e-07, + "loss": 2.0492, + "step": 26671 + }, + { + "epoch": 0.89, + "grad_norm": 0.7818248271942139, + "learning_rate": 6.402895853013314e-07, + "loss": 2.0406, + "step": 26672 + }, + { + "epoch": 0.89, + "grad_norm": 0.7138779163360596, + "learning_rate": 6.399154180223943e-07, + "loss": 2.0407, + "step": 26673 + }, + { + "epoch": 0.89, + "grad_norm": 0.7794494032859802, + "learning_rate": 6.395413564882058e-07, + "loss": 2.028, + "step": 26674 + }, + { + "epoch": 0.89, + "grad_norm": 0.7516987323760986, + "learning_rate": 6.391674007029946e-07, + "loss": 2.0956, + "step": 26675 + }, + { + "epoch": 0.89, + "grad_norm": 0.772293210029602, + "learning_rate": 6.38793550670983e-07, + "loss": 2.0807, + "step": 26676 + }, + { + "epoch": 0.89, + "grad_norm": 0.7267881035804749, + "learning_rate": 6.384198063963964e-07, + "loss": 2.0138, + "step": 26677 + }, + { + "epoch": 0.89, + "grad_norm": 0.7654637694358826, + "learning_rate": 6.380461678834559e-07, + "loss": 2.0191, + "step": 26678 + }, + { + "epoch": 0.89, + "grad_norm": 0.7666645646095276, + "learning_rate": 6.376726351363837e-07, + "loss": 2.0433, + "step": 26679 + }, + { + "epoch": 0.89, + "grad_norm": 0.759799599647522, + "learning_rate": 6.372992081593999e-07, + "loss": 2.0525, + "step": 26680 + }, + { + "epoch": 0.89, + "grad_norm": 0.7738442420959473, + "learning_rate": 6.369258869567207e-07, + "loss": 2.1088, + "step": 26681 + }, + { + "epoch": 0.89, + "grad_norm": 0.7601478695869446, + "learning_rate": 6.365526715325665e-07, + "loss": 2.0233, + "step": 26682 + }, + { + "epoch": 0.89, + "grad_norm": 0.7458545565605164, + "learning_rate": 6.361795618911526e-07, + "loss": 2.0189, + "step": 26683 + }, + { + "epoch": 0.89, + "grad_norm": 0.7551502585411072, + "learning_rate": 6.358065580366957e-07, + "loss": 2.1416, + "step": 26684 + }, + { + "epoch": 0.89, + "grad_norm": 0.7482866644859314, + "learning_rate": 6.354336599734057e-07, + "loss": 2.0372, + "step": 26685 + }, + { + "epoch": 0.89, + "grad_norm": 0.7730600237846375, + "learning_rate": 6.350608677055003e-07, + "loss": 2.0285, + "step": 26686 + }, + { + "epoch": 0.89, + "grad_norm": 0.738500714302063, + "learning_rate": 6.346881812371875e-07, + "loss": 2.0684, + "step": 26687 + }, + { + "epoch": 0.89, + "grad_norm": 0.7263556718826294, + "learning_rate": 6.343156005726791e-07, + "loss": 2.0719, + "step": 26688 + }, + { + "epoch": 0.89, + "grad_norm": 0.7477583885192871, + "learning_rate": 6.339431257161854e-07, + "loss": 2.0632, + "step": 26689 + }, + { + "epoch": 0.89, + "grad_norm": 0.7344197630882263, + "learning_rate": 6.335707566719118e-07, + "loss": 2.0045, + "step": 26690 + }, + { + "epoch": 0.89, + "grad_norm": 0.7668885588645935, + "learning_rate": 6.33198493444066e-07, + "loss": 2.0132, + "step": 26691 + }, + { + "epoch": 0.89, + "grad_norm": 0.7566169500350952, + "learning_rate": 6.328263360368558e-07, + "loss": 2.0693, + "step": 26692 + }, + { + "epoch": 0.89, + "grad_norm": 0.7373117804527283, + "learning_rate": 6.324542844544846e-07, + "loss": 2.0407, + "step": 26693 + }, + { + "epoch": 0.89, + "grad_norm": 0.7705291509628296, + "learning_rate": 6.320823387011521e-07, + "loss": 2.0836, + "step": 26694 + }, + { + "epoch": 0.89, + "grad_norm": 0.7576350569725037, + "learning_rate": 6.317104987810662e-07, + "loss": 2.0866, + "step": 26695 + }, + { + "epoch": 0.89, + "grad_norm": 0.7461427450180054, + "learning_rate": 6.313387646984226e-07, + "loss": 1.9305, + "step": 26696 + }, + { + "epoch": 0.89, + "grad_norm": 0.7482441663742065, + "learning_rate": 6.309671364574243e-07, + "loss": 1.9479, + "step": 26697 + }, + { + "epoch": 0.89, + "grad_norm": 0.7405073046684265, + "learning_rate": 6.305956140622671e-07, + "loss": 2.0782, + "step": 26698 + }, + { + "epoch": 0.89, + "grad_norm": 0.7503206729888916, + "learning_rate": 6.302241975171508e-07, + "loss": 2.1534, + "step": 26699 + }, + { + "epoch": 0.89, + "grad_norm": 0.7325937151908875, + "learning_rate": 6.298528868262699e-07, + "loss": 2.0566, + "step": 26700 + }, + { + "epoch": 0.89, + "grad_norm": 0.7592355608940125, + "learning_rate": 6.294816819938198e-07, + "loss": 2.053, + "step": 26701 + }, + { + "epoch": 0.89, + "grad_norm": 0.7623224258422852, + "learning_rate": 6.291105830239952e-07, + "loss": 2.0841, + "step": 26702 + }, + { + "epoch": 0.89, + "grad_norm": 0.7596186995506287, + "learning_rate": 6.287395899209847e-07, + "loss": 2.1019, + "step": 26703 + }, + { + "epoch": 0.89, + "grad_norm": 0.7117161750793457, + "learning_rate": 6.283687026889829e-07, + "loss": 1.9914, + "step": 26704 + }, + { + "epoch": 0.89, + "grad_norm": 0.7472636699676514, + "learning_rate": 6.279979213321807e-07, + "loss": 2.0721, + "step": 26705 + }, + { + "epoch": 0.89, + "grad_norm": 0.7442077994346619, + "learning_rate": 6.27627245854765e-07, + "loss": 2.0556, + "step": 26706 + }, + { + "epoch": 0.89, + "grad_norm": 0.741631805896759, + "learning_rate": 6.272566762609234e-07, + "loss": 1.9991, + "step": 26707 + }, + { + "epoch": 0.89, + "grad_norm": 0.7421914935112, + "learning_rate": 6.268862125548436e-07, + "loss": 2.045, + "step": 26708 + }, + { + "epoch": 0.89, + "grad_norm": 0.7307175993919373, + "learning_rate": 6.265158547407091e-07, + "loss": 2.0202, + "step": 26709 + }, + { + "epoch": 0.89, + "grad_norm": 0.7356524467468262, + "learning_rate": 6.261456028227064e-07, + "loss": 2.1394, + "step": 26710 + }, + { + "epoch": 0.89, + "grad_norm": 0.7400479316711426, + "learning_rate": 6.257754568050167e-07, + "loss": 2.0922, + "step": 26711 + }, + { + "epoch": 0.89, + "grad_norm": 0.8011283874511719, + "learning_rate": 6.254054166918211e-07, + "loss": 1.9984, + "step": 26712 + }, + { + "epoch": 0.89, + "grad_norm": 0.7177273035049438, + "learning_rate": 6.250354824873006e-07, + "loss": 1.9862, + "step": 26713 + }, + { + "epoch": 0.89, + "grad_norm": 0.7417380809783936, + "learning_rate": 6.246656541956364e-07, + "loss": 2.0417, + "step": 26714 + }, + { + "epoch": 0.89, + "grad_norm": 0.7765234708786011, + "learning_rate": 6.242959318210052e-07, + "loss": 2.0012, + "step": 26715 + }, + { + "epoch": 0.89, + "grad_norm": 0.7295082807540894, + "learning_rate": 6.239263153675823e-07, + "loss": 2.05, + "step": 26716 + }, + { + "epoch": 0.89, + "grad_norm": 0.7536152601242065, + "learning_rate": 6.235568048395468e-07, + "loss": 1.9934, + "step": 26717 + }, + { + "epoch": 0.89, + "grad_norm": 0.7230278253555298, + "learning_rate": 6.231874002410699e-07, + "loss": 2.0462, + "step": 26718 + }, + { + "epoch": 0.89, + "grad_norm": 0.767373263835907, + "learning_rate": 6.228181015763279e-07, + "loss": 2.0425, + "step": 26719 + }, + { + "epoch": 0.89, + "grad_norm": 0.7331621646881104, + "learning_rate": 6.2244890884949e-07, + "loss": 2.0679, + "step": 26720 + }, + { + "epoch": 0.89, + "grad_norm": 0.7705336213111877, + "learning_rate": 6.220798220647295e-07, + "loss": 2.0436, + "step": 26721 + }, + { + "epoch": 0.89, + "grad_norm": 0.7567675113677979, + "learning_rate": 6.217108412262141e-07, + "loss": 2.1064, + "step": 26722 + }, + { + "epoch": 0.89, + "grad_norm": 0.7405493259429932, + "learning_rate": 6.213419663381149e-07, + "loss": 2.05, + "step": 26723 + }, + { + "epoch": 0.89, + "grad_norm": 0.7321170568466187, + "learning_rate": 6.209731974045985e-07, + "loss": 2.1023, + "step": 26724 + }, + { + "epoch": 0.89, + "grad_norm": 0.7282808423042297, + "learning_rate": 6.206045344298273e-07, + "loss": 1.9804, + "step": 26725 + }, + { + "epoch": 0.89, + "grad_norm": 0.7823128700256348, + "learning_rate": 6.202359774179701e-07, + "loss": 2.0998, + "step": 26726 + }, + { + "epoch": 0.89, + "grad_norm": 0.7819924354553223, + "learning_rate": 6.198675263731912e-07, + "loss": 2.1141, + "step": 26727 + }, + { + "epoch": 0.89, + "grad_norm": 0.7460556626319885, + "learning_rate": 6.194991812996509e-07, + "loss": 2.0672, + "step": 26728 + }, + { + "epoch": 0.89, + "grad_norm": 0.7619464993476868, + "learning_rate": 6.191309422015101e-07, + "loss": 2.1034, + "step": 26729 + }, + { + "epoch": 0.89, + "grad_norm": 0.7551543116569519, + "learning_rate": 6.187628090829322e-07, + "loss": 2.1001, + "step": 26730 + }, + { + "epoch": 0.89, + "grad_norm": 0.7451285123825073, + "learning_rate": 6.183947819480729e-07, + "loss": 2.0541, + "step": 26731 + }, + { + "epoch": 0.89, + "grad_norm": 0.7397496104240417, + "learning_rate": 6.18026860801092e-07, + "loss": 2.0694, + "step": 26732 + }, + { + "epoch": 0.89, + "grad_norm": 0.7273218631744385, + "learning_rate": 6.176590456461451e-07, + "loss": 2.0041, + "step": 26733 + }, + { + "epoch": 0.89, + "grad_norm": 0.746175229549408, + "learning_rate": 6.172913364873867e-07, + "loss": 2.0579, + "step": 26734 + }, + { + "epoch": 0.89, + "grad_norm": 0.7669066786766052, + "learning_rate": 6.169237333289723e-07, + "loss": 2.0756, + "step": 26735 + }, + { + "epoch": 0.89, + "grad_norm": 0.7393285632133484, + "learning_rate": 6.165562361750555e-07, + "loss": 2.0759, + "step": 26736 + }, + { + "epoch": 0.89, + "grad_norm": 0.739879846572876, + "learning_rate": 6.161888450297871e-07, + "loss": 2.0505, + "step": 26737 + }, + { + "epoch": 0.89, + "grad_norm": 0.7532406449317932, + "learning_rate": 6.158215598973161e-07, + "loss": 2.0687, + "step": 26738 + }, + { + "epoch": 0.89, + "grad_norm": 0.7550314664840698, + "learning_rate": 6.154543807817936e-07, + "loss": 2.0667, + "step": 26739 + }, + { + "epoch": 0.89, + "grad_norm": 0.7731047868728638, + "learning_rate": 6.150873076873698e-07, + "loss": 2.075, + "step": 26740 + }, + { + "epoch": 0.89, + "grad_norm": 0.7553466558456421, + "learning_rate": 6.14720340618189e-07, + "loss": 2.0338, + "step": 26741 + }, + { + "epoch": 0.89, + "grad_norm": 0.7297695279121399, + "learning_rate": 6.143534795783956e-07, + "loss": 2.0763, + "step": 26742 + }, + { + "epoch": 0.89, + "grad_norm": 0.7507240176200867, + "learning_rate": 6.139867245721376e-07, + "loss": 2.0242, + "step": 26743 + }, + { + "epoch": 0.89, + "grad_norm": 0.7413674592971802, + "learning_rate": 6.13620075603556e-07, + "loss": 2.0146, + "step": 26744 + }, + { + "epoch": 0.89, + "grad_norm": 0.7299187183380127, + "learning_rate": 6.13253532676793e-07, + "loss": 2.0222, + "step": 26745 + }, + { + "epoch": 0.89, + "grad_norm": 0.7330113053321838, + "learning_rate": 6.128870957959932e-07, + "loss": 2.0123, + "step": 26746 + }, + { + "epoch": 0.89, + "grad_norm": 0.7312183380126953, + "learning_rate": 6.12520764965292e-07, + "loss": 2.0255, + "step": 26747 + }, + { + "epoch": 0.89, + "grad_norm": 0.7326365113258362, + "learning_rate": 6.121545401888285e-07, + "loss": 2.0036, + "step": 26748 + }, + { + "epoch": 0.89, + "grad_norm": 0.7596232891082764, + "learning_rate": 6.117884214707426e-07, + "loss": 2.0618, + "step": 26749 + }, + { + "epoch": 0.89, + "grad_norm": 0.7574792504310608, + "learning_rate": 6.114224088151698e-07, + "loss": 2.0285, + "step": 26750 + }, + { + "epoch": 0.89, + "grad_norm": 0.7369068264961243, + "learning_rate": 6.110565022262426e-07, + "loss": 1.9883, + "step": 26751 + }, + { + "epoch": 0.89, + "grad_norm": 0.7699119448661804, + "learning_rate": 6.106907017080976e-07, + "loss": 2.0829, + "step": 26752 + }, + { + "epoch": 0.89, + "grad_norm": 0.7466102242469788, + "learning_rate": 6.103250072648659e-07, + "loss": 2.084, + "step": 26753 + }, + { + "epoch": 0.89, + "grad_norm": 0.7960851788520813, + "learning_rate": 6.099594189006796e-07, + "loss": 2.1026, + "step": 26754 + }, + { + "epoch": 0.89, + "grad_norm": 0.7456473112106323, + "learning_rate": 6.095939366196679e-07, + "loss": 2.0467, + "step": 26755 + }, + { + "epoch": 0.89, + "grad_norm": 0.7280260920524597, + "learning_rate": 6.092285604259618e-07, + "loss": 2.0334, + "step": 26756 + }, + { + "epoch": 0.89, + "grad_norm": 0.7388403415679932, + "learning_rate": 6.088632903236869e-07, + "loss": 1.949, + "step": 26757 + }, + { + "epoch": 0.89, + "grad_norm": 0.7621145248413086, + "learning_rate": 6.084981263169721e-07, + "loss": 2.0272, + "step": 26758 + }, + { + "epoch": 0.89, + "grad_norm": 0.7531647086143494, + "learning_rate": 6.081330684099418e-07, + "loss": 2.036, + "step": 26759 + }, + { + "epoch": 0.89, + "grad_norm": 0.7246076464653015, + "learning_rate": 6.077681166067173e-07, + "loss": 1.9869, + "step": 26760 + }, + { + "epoch": 0.89, + "grad_norm": 0.7324761748313904, + "learning_rate": 6.074032709114252e-07, + "loss": 2.0079, + "step": 26761 + }, + { + "epoch": 0.89, + "grad_norm": 0.7732455730438232, + "learning_rate": 6.070385313281879e-07, + "loss": 2.0995, + "step": 26762 + }, + { + "epoch": 0.89, + "grad_norm": 0.7558773756027222, + "learning_rate": 6.066738978611242e-07, + "loss": 2.0689, + "step": 26763 + }, + { + "epoch": 0.89, + "grad_norm": 0.7268763184547424, + "learning_rate": 6.063093705143519e-07, + "loss": 2.0867, + "step": 26764 + }, + { + "epoch": 0.89, + "grad_norm": 0.716711699962616, + "learning_rate": 6.059449492919933e-07, + "loss": 2.0266, + "step": 26765 + }, + { + "epoch": 0.89, + "grad_norm": 0.7511341571807861, + "learning_rate": 6.055806341981607e-07, + "loss": 2.1055, + "step": 26766 + }, + { + "epoch": 0.89, + "grad_norm": 0.7269211411476135, + "learning_rate": 6.052164252369729e-07, + "loss": 2.0423, + "step": 26767 + }, + { + "epoch": 0.89, + "grad_norm": 0.7347042560577393, + "learning_rate": 6.04852322412548e-07, + "loss": 2.0295, + "step": 26768 + }, + { + "epoch": 0.89, + "grad_norm": 0.7339388728141785, + "learning_rate": 6.044883257289913e-07, + "loss": 2.0629, + "step": 26769 + }, + { + "epoch": 0.89, + "grad_norm": 0.7583845853805542, + "learning_rate": 6.041244351904197e-07, + "loss": 2.0855, + "step": 26770 + }, + { + "epoch": 0.89, + "grad_norm": 0.7364854216575623, + "learning_rate": 6.037606508009453e-07, + "loss": 2.0583, + "step": 26771 + }, + { + "epoch": 0.89, + "grad_norm": 0.7215999960899353, + "learning_rate": 6.03396972564676e-07, + "loss": 1.9929, + "step": 26772 + }, + { + "epoch": 0.89, + "grad_norm": 0.7856223583221436, + "learning_rate": 6.030334004857186e-07, + "loss": 2.001, + "step": 26773 + }, + { + "epoch": 0.89, + "grad_norm": 0.7823106050491333, + "learning_rate": 6.026699345681852e-07, + "loss": 2.0884, + "step": 26774 + }, + { + "epoch": 0.89, + "grad_norm": 0.7538570761680603, + "learning_rate": 6.023065748161782e-07, + "loss": 2.0205, + "step": 26775 + }, + { + "epoch": 0.89, + "grad_norm": 0.7534996271133423, + "learning_rate": 6.019433212338033e-07, + "loss": 2.0608, + "step": 26776 + }, + { + "epoch": 0.89, + "grad_norm": 0.7311971187591553, + "learning_rate": 6.015801738251659e-07, + "loss": 2.0954, + "step": 26777 + }, + { + "epoch": 0.89, + "grad_norm": 0.7502958178520203, + "learning_rate": 6.012171325943683e-07, + "loss": 2.0221, + "step": 26778 + }, + { + "epoch": 0.89, + "grad_norm": 0.7447825074195862, + "learning_rate": 6.008541975455106e-07, + "loss": 2.0173, + "step": 26779 + }, + { + "epoch": 0.89, + "grad_norm": 0.7507491111755371, + "learning_rate": 6.004913686826941e-07, + "loss": 2.0259, + "step": 26780 + }, + { + "epoch": 0.89, + "grad_norm": 0.789646327495575, + "learning_rate": 6.001286460100186e-07, + "loss": 2.0069, + "step": 26781 + }, + { + "epoch": 0.89, + "grad_norm": 0.7772741913795471, + "learning_rate": 5.997660295315777e-07, + "loss": 2.094, + "step": 26782 + }, + { + "epoch": 0.89, + "grad_norm": 0.7480953931808472, + "learning_rate": 5.994035192514724e-07, + "loss": 2.0513, + "step": 26783 + }, + { + "epoch": 0.89, + "grad_norm": 0.7661371827125549, + "learning_rate": 5.990411151737985e-07, + "loss": 1.9886, + "step": 26784 + }, + { + "epoch": 0.89, + "grad_norm": 0.7284296154975891, + "learning_rate": 5.98678817302647e-07, + "loss": 2.0631, + "step": 26785 + }, + { + "epoch": 0.89, + "grad_norm": 0.7280776500701904, + "learning_rate": 5.983166256421125e-07, + "loss": 2.0381, + "step": 26786 + }, + { + "epoch": 0.89, + "grad_norm": 0.7455608248710632, + "learning_rate": 5.979545401962883e-07, + "loss": 2.0862, + "step": 26787 + }, + { + "epoch": 0.89, + "grad_norm": 0.7274338006973267, + "learning_rate": 5.975925609692612e-07, + "loss": 1.9799, + "step": 26788 + }, + { + "epoch": 0.89, + "grad_norm": 0.75301194190979, + "learning_rate": 5.972306879651235e-07, + "loss": 2.0726, + "step": 26789 + }, + { + "epoch": 0.89, + "grad_norm": 0.7474877238273621, + "learning_rate": 5.968689211879653e-07, + "loss": 2.1057, + "step": 26790 + }, + { + "epoch": 0.89, + "grad_norm": 0.7693120241165161, + "learning_rate": 5.965072606418698e-07, + "loss": 2.1169, + "step": 26791 + }, + { + "epoch": 0.89, + "grad_norm": 0.7455722093582153, + "learning_rate": 5.961457063309228e-07, + "loss": 2.0601, + "step": 26792 + }, + { + "epoch": 0.89, + "grad_norm": 0.7424631714820862, + "learning_rate": 5.957842582592121e-07, + "loss": 2.0142, + "step": 26793 + }, + { + "epoch": 0.89, + "grad_norm": 0.7237871885299683, + "learning_rate": 5.954229164308201e-07, + "loss": 1.998, + "step": 26794 + }, + { + "epoch": 0.89, + "grad_norm": 0.7454896569252014, + "learning_rate": 5.950616808498266e-07, + "loss": 2.1098, + "step": 26795 + }, + { + "epoch": 0.89, + "grad_norm": 0.7543712854385376, + "learning_rate": 5.947005515203153e-07, + "loss": 2.0629, + "step": 26796 + }, + { + "epoch": 0.89, + "grad_norm": 0.7333405017852783, + "learning_rate": 5.943395284463649e-07, + "loss": 2.0216, + "step": 26797 + }, + { + "epoch": 0.89, + "grad_norm": 0.7327441573143005, + "learning_rate": 5.939786116320534e-07, + "loss": 2.0697, + "step": 26798 + }, + { + "epoch": 0.89, + "grad_norm": 0.7487781643867493, + "learning_rate": 5.936178010814597e-07, + "loss": 2.0238, + "step": 26799 + }, + { + "epoch": 0.89, + "grad_norm": 0.7438755631446838, + "learning_rate": 5.932570967986606e-07, + "loss": 2.0097, + "step": 26800 + }, + { + "epoch": 0.89, + "grad_norm": 0.7622124552726746, + "learning_rate": 5.928964987877283e-07, + "loss": 2.0436, + "step": 26801 + }, + { + "epoch": 0.89, + "grad_norm": 0.7725982666015625, + "learning_rate": 5.925360070527398e-07, + "loss": 2.0693, + "step": 26802 + }, + { + "epoch": 0.89, + "grad_norm": 0.7298283576965332, + "learning_rate": 5.921756215977659e-07, + "loss": 2.0449, + "step": 26803 + }, + { + "epoch": 0.89, + "grad_norm": 0.7347543239593506, + "learning_rate": 5.918153424268769e-07, + "loss": 2.0505, + "step": 26804 + }, + { + "epoch": 0.89, + "grad_norm": 0.765644907951355, + "learning_rate": 5.914551695441451e-07, + "loss": 2.0396, + "step": 26805 + }, + { + "epoch": 0.89, + "grad_norm": 0.7311999201774597, + "learning_rate": 5.910951029536394e-07, + "loss": 2.0973, + "step": 26806 + }, + { + "epoch": 0.89, + "grad_norm": 0.7443760633468628, + "learning_rate": 5.907351426594254e-07, + "loss": 2.0495, + "step": 26807 + }, + { + "epoch": 0.89, + "grad_norm": 0.7606602311134338, + "learning_rate": 5.903752886655733e-07, + "loss": 2.0661, + "step": 26808 + }, + { + "epoch": 0.89, + "grad_norm": 0.7304977178573608, + "learning_rate": 5.900155409761465e-07, + "loss": 2.06, + "step": 26809 + }, + { + "epoch": 0.89, + "grad_norm": 0.7611575722694397, + "learning_rate": 5.896558995952084e-07, + "loss": 1.9904, + "step": 26810 + }, + { + "epoch": 0.89, + "grad_norm": 0.7486984133720398, + "learning_rate": 5.892963645268224e-07, + "loss": 2.0651, + "step": 26811 + }, + { + "epoch": 0.89, + "grad_norm": 0.7296981811523438, + "learning_rate": 5.88936935775054e-07, + "loss": 2.0673, + "step": 26812 + }, + { + "epoch": 0.89, + "grad_norm": 0.7515977621078491, + "learning_rate": 5.88577613343958e-07, + "loss": 2.052, + "step": 26813 + }, + { + "epoch": 0.89, + "grad_norm": 0.7565595507621765, + "learning_rate": 5.882183972375955e-07, + "loss": 2.0497, + "step": 26814 + }, + { + "epoch": 0.89, + "grad_norm": 0.7510352730751038, + "learning_rate": 5.878592874600275e-07, + "loss": 2.056, + "step": 26815 + }, + { + "epoch": 0.89, + "grad_norm": 0.7326372861862183, + "learning_rate": 5.875002840153099e-07, + "loss": 2.0916, + "step": 26816 + }, + { + "epoch": 0.89, + "grad_norm": 0.7442916035652161, + "learning_rate": 5.87141386907496e-07, + "loss": 2.0523, + "step": 26817 + }, + { + "epoch": 0.89, + "grad_norm": 0.7336155772209167, + "learning_rate": 5.867825961406437e-07, + "loss": 2.0282, + "step": 26818 + }, + { + "epoch": 0.89, + "grad_norm": 0.759099006652832, + "learning_rate": 5.864239117188031e-07, + "loss": 2.0063, + "step": 26819 + }, + { + "epoch": 0.89, + "grad_norm": 0.7481719851493835, + "learning_rate": 5.860653336460287e-07, + "loss": 2.1326, + "step": 26820 + }, + { + "epoch": 0.89, + "grad_norm": 0.7478468418121338, + "learning_rate": 5.857068619263728e-07, + "loss": 2.033, + "step": 26821 + }, + { + "epoch": 0.89, + "grad_norm": 0.7536263465881348, + "learning_rate": 5.853484965638834e-07, + "loss": 2.1152, + "step": 26822 + }, + { + "epoch": 0.89, + "grad_norm": 0.7659785747528076, + "learning_rate": 5.849902375626071e-07, + "loss": 2.0592, + "step": 26823 + }, + { + "epoch": 0.89, + "grad_norm": 0.7538750767707825, + "learning_rate": 5.846320849265952e-07, + "loss": 2.053, + "step": 26824 + }, + { + "epoch": 0.89, + "grad_norm": 0.7700193524360657, + "learning_rate": 5.842740386598921e-07, + "loss": 2.0955, + "step": 26825 + }, + { + "epoch": 0.89, + "grad_norm": 0.7565541863441467, + "learning_rate": 5.839160987665404e-07, + "loss": 1.9857, + "step": 26826 + }, + { + "epoch": 0.89, + "grad_norm": 0.7457347512245178, + "learning_rate": 5.835582652505877e-07, + "loss": 1.9862, + "step": 26827 + }, + { + "epoch": 0.89, + "grad_norm": 0.7410709261894226, + "learning_rate": 5.832005381160755e-07, + "loss": 2.0062, + "step": 26828 + }, + { + "epoch": 0.89, + "grad_norm": 0.7709906697273254, + "learning_rate": 5.828429173670436e-07, + "loss": 2.0139, + "step": 26829 + }, + { + "epoch": 0.89, + "grad_norm": 0.7411143779754639, + "learning_rate": 5.824854030075355e-07, + "loss": 1.9856, + "step": 26830 + }, + { + "epoch": 0.89, + "grad_norm": 0.7281932830810547, + "learning_rate": 5.821279950415882e-07, + "loss": 2.0093, + "step": 26831 + }, + { + "epoch": 0.89, + "grad_norm": 0.7799766659736633, + "learning_rate": 5.81770693473237e-07, + "loss": 2.0806, + "step": 26832 + }, + { + "epoch": 0.89, + "grad_norm": 0.7608921527862549, + "learning_rate": 5.814134983065212e-07, + "loss": 2.0332, + "step": 26833 + }, + { + "epoch": 0.89, + "grad_norm": 0.7611744403839111, + "learning_rate": 5.810564095454785e-07, + "loss": 2.0163, + "step": 26834 + }, + { + "epoch": 0.89, + "grad_norm": 0.7796937823295593, + "learning_rate": 5.80699427194138e-07, + "loss": 1.976, + "step": 26835 + }, + { + "epoch": 0.89, + "grad_norm": 0.744566798210144, + "learning_rate": 5.803425512565353e-07, + "loss": 2.0554, + "step": 26836 + }, + { + "epoch": 0.89, + "grad_norm": 0.7287493348121643, + "learning_rate": 5.799857817367027e-07, + "loss": 2.0621, + "step": 26837 + }, + { + "epoch": 0.89, + "grad_norm": 0.7415565848350525, + "learning_rate": 5.796291186386693e-07, + "loss": 2.0882, + "step": 26838 + }, + { + "epoch": 0.89, + "grad_norm": 0.7295355200767517, + "learning_rate": 5.792725619664663e-07, + "loss": 2.0272, + "step": 26839 + }, + { + "epoch": 0.89, + "grad_norm": 0.7915575504302979, + "learning_rate": 5.789161117241193e-07, + "loss": 2.1246, + "step": 26840 + }, + { + "epoch": 0.89, + "grad_norm": 0.745983898639679, + "learning_rate": 5.785597679156585e-07, + "loss": 2.0926, + "step": 26841 + }, + { + "epoch": 0.89, + "grad_norm": 0.768291711807251, + "learning_rate": 5.782035305451072e-07, + "loss": 2.0751, + "step": 26842 + }, + { + "epoch": 0.89, + "grad_norm": 0.7417081594467163, + "learning_rate": 5.778473996164913e-07, + "loss": 2.0644, + "step": 26843 + }, + { + "epoch": 0.89, + "grad_norm": 0.738188624382019, + "learning_rate": 5.774913751338329e-07, + "loss": 2.0945, + "step": 26844 + }, + { + "epoch": 0.89, + "grad_norm": 0.7360116839408875, + "learning_rate": 5.771354571011545e-07, + "loss": 1.9655, + "step": 26845 + }, + { + "epoch": 0.89, + "grad_norm": 0.7933596968650818, + "learning_rate": 5.767796455224772e-07, + "loss": 2.1241, + "step": 26846 + }, + { + "epoch": 0.89, + "grad_norm": 0.7706090807914734, + "learning_rate": 5.764239404018235e-07, + "loss": 2.0484, + "step": 26847 + }, + { + "epoch": 0.89, + "grad_norm": 0.7358858585357666, + "learning_rate": 5.760683417432067e-07, + "loss": 1.9973, + "step": 26848 + }, + { + "epoch": 0.89, + "grad_norm": 0.7996397018432617, + "learning_rate": 5.757128495506459e-07, + "loss": 2.0207, + "step": 26849 + }, + { + "epoch": 0.89, + "grad_norm": 0.7407367825508118, + "learning_rate": 5.753574638281612e-07, + "loss": 2.1165, + "step": 26850 + }, + { + "epoch": 0.89, + "grad_norm": 0.7487921714782715, + "learning_rate": 5.750021845797615e-07, + "loss": 2.0401, + "step": 26851 + }, + { + "epoch": 0.89, + "grad_norm": 0.706542432308197, + "learning_rate": 5.74647011809466e-07, + "loss": 1.9813, + "step": 26852 + }, + { + "epoch": 0.89, + "grad_norm": 0.7501137852668762, + "learning_rate": 5.742919455212848e-07, + "loss": 2.0561, + "step": 26853 + }, + { + "epoch": 0.89, + "grad_norm": 0.7457038164138794, + "learning_rate": 5.739369857192267e-07, + "loss": 2.0392, + "step": 26854 + }, + { + "epoch": 0.89, + "grad_norm": 0.7404676079750061, + "learning_rate": 5.735821324073054e-07, + "loss": 1.9017, + "step": 26855 + }, + { + "epoch": 0.89, + "grad_norm": 0.7670263648033142, + "learning_rate": 5.732273855895298e-07, + "loss": 2.0759, + "step": 26856 + }, + { + "epoch": 0.89, + "grad_norm": 0.7539442181587219, + "learning_rate": 5.728727452699068e-07, + "loss": 1.9943, + "step": 26857 + }, + { + "epoch": 0.89, + "grad_norm": 0.7237810492515564, + "learning_rate": 5.725182114524408e-07, + "loss": 1.9949, + "step": 26858 + }, + { + "epoch": 0.89, + "grad_norm": 0.737122118473053, + "learning_rate": 5.721637841411409e-07, + "loss": 1.958, + "step": 26859 + }, + { + "epoch": 0.89, + "grad_norm": 0.7268405556678772, + "learning_rate": 5.718094633400073e-07, + "loss": 2.0696, + "step": 26860 + }, + { + "epoch": 0.89, + "grad_norm": 0.7415480017662048, + "learning_rate": 5.714552490530467e-07, + "loss": 2.0524, + "step": 26861 + }, + { + "epoch": 0.89, + "grad_norm": 0.7366801500320435, + "learning_rate": 5.711011412842571e-07, + "loss": 2.0466, + "step": 26862 + }, + { + "epoch": 0.89, + "grad_norm": 0.7471643090248108, + "learning_rate": 5.70747140037643e-07, + "loss": 2.0691, + "step": 26863 + }, + { + "epoch": 0.89, + "grad_norm": 0.7320636510848999, + "learning_rate": 5.703932453172001e-07, + "loss": 2.084, + "step": 26864 + }, + { + "epoch": 0.89, + "grad_norm": 0.728541374206543, + "learning_rate": 5.700394571269296e-07, + "loss": 2.0271, + "step": 26865 + }, + { + "epoch": 0.89, + "grad_norm": 0.734802782535553, + "learning_rate": 5.696857754708262e-07, + "loss": 2.0262, + "step": 26866 + }, + { + "epoch": 0.89, + "grad_norm": 0.769321620464325, + "learning_rate": 5.693322003528856e-07, + "loss": 2.0648, + "step": 26867 + }, + { + "epoch": 0.89, + "grad_norm": 0.7468048930168152, + "learning_rate": 5.689787317771022e-07, + "loss": 2.0482, + "step": 26868 + }, + { + "epoch": 0.89, + "grad_norm": 0.747482180595398, + "learning_rate": 5.686253697474719e-07, + "loss": 2.0558, + "step": 26869 + }, + { + "epoch": 0.89, + "grad_norm": 0.7281048893928528, + "learning_rate": 5.682721142679836e-07, + "loss": 2.0481, + "step": 26870 + }, + { + "epoch": 0.89, + "grad_norm": 0.7226170301437378, + "learning_rate": 5.679189653426298e-07, + "loss": 2.0714, + "step": 26871 + }, + { + "epoch": 0.89, + "grad_norm": 0.7490057945251465, + "learning_rate": 5.675659229754005e-07, + "loss": 2.0726, + "step": 26872 + }, + { + "epoch": 0.89, + "grad_norm": 0.7540087699890137, + "learning_rate": 5.672129871702814e-07, + "loss": 2.0771, + "step": 26873 + }, + { + "epoch": 0.89, + "grad_norm": 0.7498872876167297, + "learning_rate": 5.668601579312638e-07, + "loss": 2.0655, + "step": 26874 + }, + { + "epoch": 0.89, + "grad_norm": 0.7233832478523254, + "learning_rate": 5.665074352623323e-07, + "loss": 2.0508, + "step": 26875 + }, + { + "epoch": 0.89, + "grad_norm": 0.7480850219726562, + "learning_rate": 5.661548191674681e-07, + "loss": 2.0366, + "step": 26876 + }, + { + "epoch": 0.89, + "grad_norm": 0.7863582968711853, + "learning_rate": 5.658023096506593e-07, + "loss": 1.9977, + "step": 26877 + }, + { + "epoch": 0.89, + "grad_norm": 0.7653627395629883, + "learning_rate": 5.654499067158881e-07, + "loss": 2.0342, + "step": 26878 + }, + { + "epoch": 0.89, + "grad_norm": 0.7334886193275452, + "learning_rate": 5.650976103671357e-07, + "loss": 1.9953, + "step": 26879 + }, + { + "epoch": 0.89, + "grad_norm": 0.7379988431930542, + "learning_rate": 5.64745420608378e-07, + "loss": 2.0902, + "step": 26880 + }, + { + "epoch": 0.89, + "grad_norm": 0.7469719052314758, + "learning_rate": 5.643933374435994e-07, + "loss": 2.0244, + "step": 26881 + }, + { + "epoch": 0.89, + "grad_norm": 0.7325426340103149, + "learning_rate": 5.640413608767737e-07, + "loss": 2.051, + "step": 26882 + }, + { + "epoch": 0.89, + "grad_norm": 0.7712374329566956, + "learning_rate": 5.636894909118796e-07, + "loss": 2.0795, + "step": 26883 + }, + { + "epoch": 0.89, + "grad_norm": 0.7337093353271484, + "learning_rate": 5.633377275528906e-07, + "loss": 2.0513, + "step": 26884 + }, + { + "epoch": 0.89, + "grad_norm": 0.7074521780014038, + "learning_rate": 5.629860708037826e-07, + "loss": 1.9491, + "step": 26885 + }, + { + "epoch": 0.89, + "grad_norm": 0.7709642052650452, + "learning_rate": 5.626345206685257e-07, + "loss": 2.0757, + "step": 26886 + }, + { + "epoch": 0.89, + "grad_norm": 0.7714084386825562, + "learning_rate": 5.622830771510945e-07, + "loss": 2.0662, + "step": 26887 + }, + { + "epoch": 0.89, + "grad_norm": 0.7319288849830627, + "learning_rate": 5.619317402554581e-07, + "loss": 2.0736, + "step": 26888 + }, + { + "epoch": 0.89, + "grad_norm": 0.7358079552650452, + "learning_rate": 5.615805099855842e-07, + "loss": 2.1122, + "step": 26889 + }, + { + "epoch": 0.89, + "grad_norm": 0.7885095477104187, + "learning_rate": 5.612293863454432e-07, + "loss": 2.0366, + "step": 26890 + }, + { + "epoch": 0.89, + "grad_norm": 0.7655701041221619, + "learning_rate": 5.608783693390008e-07, + "loss": 2.0259, + "step": 26891 + }, + { + "epoch": 0.89, + "grad_norm": 0.7609039545059204, + "learning_rate": 5.605274589702237e-07, + "loss": 2.1184, + "step": 26892 + }, + { + "epoch": 0.89, + "grad_norm": 0.7649220824241638, + "learning_rate": 5.601766552430743e-07, + "loss": 2.1437, + "step": 26893 + }, + { + "epoch": 0.89, + "grad_norm": 0.7477615475654602, + "learning_rate": 5.598259581615173e-07, + "loss": 2.0349, + "step": 26894 + }, + { + "epoch": 0.89, + "grad_norm": 0.7641354203224182, + "learning_rate": 5.594753677295128e-07, + "loss": 2.0766, + "step": 26895 + }, + { + "epoch": 0.89, + "grad_norm": 0.7420513033866882, + "learning_rate": 5.591248839510244e-07, + "loss": 2.075, + "step": 26896 + }, + { + "epoch": 0.89, + "grad_norm": 0.7668627500534058, + "learning_rate": 5.587745068300099e-07, + "loss": 2.1119, + "step": 26897 + }, + { + "epoch": 0.89, + "grad_norm": 0.7368153929710388, + "learning_rate": 5.584242363704273e-07, + "loss": 2.0023, + "step": 26898 + }, + { + "epoch": 0.89, + "grad_norm": 0.762692391872406, + "learning_rate": 5.580740725762335e-07, + "loss": 2.0503, + "step": 26899 + }, + { + "epoch": 0.89, + "grad_norm": 0.7256408333778381, + "learning_rate": 5.577240154513874e-07, + "loss": 1.969, + "step": 26900 + }, + { + "epoch": 0.89, + "grad_norm": 0.7607681751251221, + "learning_rate": 5.573740649998416e-07, + "loss": 2.0373, + "step": 26901 + }, + { + "epoch": 0.9, + "grad_norm": 0.7171739339828491, + "learning_rate": 5.570242212255484e-07, + "loss": 2.054, + "step": 26902 + }, + { + "epoch": 0.9, + "grad_norm": 0.7665387392044067, + "learning_rate": 5.566744841324623e-07, + "loss": 2.0434, + "step": 26903 + }, + { + "epoch": 0.9, + "grad_norm": 0.745210587978363, + "learning_rate": 5.563248537245325e-07, + "loss": 2.0277, + "step": 26904 + }, + { + "epoch": 0.9, + "grad_norm": 0.7858266830444336, + "learning_rate": 5.559753300057114e-07, + "loss": 2.0415, + "step": 26905 + }, + { + "epoch": 0.9, + "grad_norm": 0.7739076614379883, + "learning_rate": 5.556259129799446e-07, + "loss": 2.0099, + "step": 26906 + }, + { + "epoch": 0.9, + "grad_norm": 0.7702251672744751, + "learning_rate": 5.552766026511825e-07, + "loss": 2.1132, + "step": 26907 + }, + { + "epoch": 0.9, + "grad_norm": 0.7716066837310791, + "learning_rate": 5.549273990233695e-07, + "loss": 2.041, + "step": 26908 + }, + { + "epoch": 0.9, + "grad_norm": 0.7275430560112, + "learning_rate": 5.545783021004525e-07, + "loss": 2.0059, + "step": 26909 + }, + { + "epoch": 0.9, + "grad_norm": 0.7659602761268616, + "learning_rate": 5.54229311886374e-07, + "loss": 2.125, + "step": 26910 + }, + { + "epoch": 0.9, + "grad_norm": 0.7582475543022156, + "learning_rate": 5.538804283850763e-07, + "loss": 2.0475, + "step": 26911 + }, + { + "epoch": 0.9, + "grad_norm": 0.7267307639122009, + "learning_rate": 5.535316516005007e-07, + "loss": 2.0096, + "step": 26912 + }, + { + "epoch": 0.9, + "grad_norm": 0.7591155171394348, + "learning_rate": 5.531829815365897e-07, + "loss": 1.9729, + "step": 26913 + }, + { + "epoch": 0.9, + "grad_norm": 0.7996005415916443, + "learning_rate": 5.528344181972811e-07, + "loss": 2.0383, + "step": 26914 + }, + { + "epoch": 0.9, + "grad_norm": 0.7296708822250366, + "learning_rate": 5.524859615865119e-07, + "loss": 2.0565, + "step": 26915 + }, + { + "epoch": 0.9, + "grad_norm": 0.7162570357322693, + "learning_rate": 5.5213761170822e-07, + "loss": 2.0358, + "step": 26916 + }, + { + "epoch": 0.9, + "grad_norm": 0.75110924243927, + "learning_rate": 5.5178936856634e-07, + "loss": 2.0785, + "step": 26917 + }, + { + "epoch": 0.9, + "grad_norm": 0.7339128851890564, + "learning_rate": 5.514412321648077e-07, + "loss": 2.039, + "step": 26918 + }, + { + "epoch": 0.9, + "grad_norm": 0.7672488689422607, + "learning_rate": 5.510932025075543e-07, + "loss": 2.0993, + "step": 26919 + }, + { + "epoch": 0.9, + "grad_norm": 0.7108776569366455, + "learning_rate": 5.507452795985114e-07, + "loss": 1.9927, + "step": 26920 + }, + { + "epoch": 0.9, + "grad_norm": 0.7517478466033936, + "learning_rate": 5.503974634416098e-07, + "loss": 1.9737, + "step": 26921 + }, + { + "epoch": 0.9, + "grad_norm": 0.7337257862091064, + "learning_rate": 5.500497540407823e-07, + "loss": 2.108, + "step": 26922 + }, + { + "epoch": 0.9, + "grad_norm": 0.7433484792709351, + "learning_rate": 5.497021513999535e-07, + "loss": 1.9872, + "step": 26923 + }, + { + "epoch": 0.9, + "grad_norm": 0.7508088946342468, + "learning_rate": 5.4935465552305e-07, + "loss": 2.0254, + "step": 26924 + }, + { + "epoch": 0.9, + "grad_norm": 0.7388478517532349, + "learning_rate": 5.490072664140012e-07, + "loss": 2.0837, + "step": 26925 + }, + { + "epoch": 0.9, + "grad_norm": 0.7516533732414246, + "learning_rate": 5.48659984076727e-07, + "loss": 2.1046, + "step": 26926 + }, + { + "epoch": 0.9, + "grad_norm": 0.7340182662010193, + "learning_rate": 5.483128085151557e-07, + "loss": 1.9676, + "step": 26927 + }, + { + "epoch": 0.9, + "grad_norm": 0.7733637690544128, + "learning_rate": 5.47965739733205e-07, + "loss": 2.0736, + "step": 26928 + }, + { + "epoch": 0.9, + "grad_norm": 0.7810878753662109, + "learning_rate": 5.476187777347997e-07, + "loss": 2.0118, + "step": 26929 + }, + { + "epoch": 0.9, + "grad_norm": 0.7485711574554443, + "learning_rate": 5.472719225238554e-07, + "loss": 1.946, + "step": 26930 + }, + { + "epoch": 0.9, + "grad_norm": 0.7349092960357666, + "learning_rate": 5.469251741042958e-07, + "loss": 2.0287, + "step": 26931 + }, + { + "epoch": 0.9, + "grad_norm": 0.7228965759277344, + "learning_rate": 5.465785324800354e-07, + "loss": 2.0236, + "step": 26932 + }, + { + "epoch": 0.9, + "grad_norm": 0.7300693988800049, + "learning_rate": 5.462319976549879e-07, + "loss": 2.0116, + "step": 26933 + }, + { + "epoch": 0.9, + "grad_norm": 0.7541865706443787, + "learning_rate": 5.458855696330723e-07, + "loss": 2.0014, + "step": 26934 + }, + { + "epoch": 0.9, + "grad_norm": 0.7615839242935181, + "learning_rate": 5.455392484182009e-07, + "loss": 2.063, + "step": 26935 + }, + { + "epoch": 0.9, + "grad_norm": 0.7451556921005249, + "learning_rate": 5.451930340142875e-07, + "loss": 2.0744, + "step": 26936 + }, + { + "epoch": 0.9, + "grad_norm": 0.7672232389450073, + "learning_rate": 5.448469264252398e-07, + "loss": 2.0018, + "step": 26937 + }, + { + "epoch": 0.9, + "grad_norm": 0.7256489992141724, + "learning_rate": 5.445009256549727e-07, + "loss": 1.9996, + "step": 26938 + }, + { + "epoch": 0.9, + "grad_norm": 0.732761800289154, + "learning_rate": 5.441550317073896e-07, + "loss": 2.0611, + "step": 26939 + }, + { + "epoch": 0.9, + "grad_norm": 0.742633581161499, + "learning_rate": 5.43809244586404e-07, + "loss": 2.0334, + "step": 26940 + }, + { + "epoch": 0.9, + "grad_norm": 0.7921823859214783, + "learning_rate": 5.434635642959196e-07, + "loss": 2.0549, + "step": 26941 + }, + { + "epoch": 0.9, + "grad_norm": 0.7654536962509155, + "learning_rate": 5.431179908398398e-07, + "loss": 2.0826, + "step": 26942 + }, + { + "epoch": 0.9, + "grad_norm": 0.7512344717979431, + "learning_rate": 5.427725242220716e-07, + "loss": 1.9456, + "step": 26943 + }, + { + "epoch": 0.9, + "grad_norm": 0.756268322467804, + "learning_rate": 5.424271644465173e-07, + "loss": 2.0259, + "step": 26944 + }, + { + "epoch": 0.9, + "grad_norm": 0.7653051614761353, + "learning_rate": 5.420819115170783e-07, + "loss": 2.0696, + "step": 26945 + }, + { + "epoch": 0.9, + "grad_norm": 0.7791337966918945, + "learning_rate": 5.417367654376548e-07, + "loss": 2.0326, + "step": 26946 + }, + { + "epoch": 0.9, + "grad_norm": 0.7451674938201904, + "learning_rate": 5.413917262121449e-07, + "loss": 1.996, + "step": 26947 + }, + { + "epoch": 0.9, + "grad_norm": 0.7252696752548218, + "learning_rate": 5.410467938444497e-07, + "loss": 1.9873, + "step": 26948 + }, + { + "epoch": 0.9, + "grad_norm": 0.7509819269180298, + "learning_rate": 5.407019683384651e-07, + "loss": 2.0311, + "step": 26949 + }, + { + "epoch": 0.9, + "grad_norm": 0.7448418140411377, + "learning_rate": 5.403572496980836e-07, + "loss": 2.0671, + "step": 26950 + }, + { + "epoch": 0.9, + "grad_norm": 0.79044508934021, + "learning_rate": 5.400126379272042e-07, + "loss": 2.025, + "step": 26951 + }, + { + "epoch": 0.9, + "grad_norm": 0.7812781929969788, + "learning_rate": 5.396681330297159e-07, + "loss": 2.0099, + "step": 26952 + }, + { + "epoch": 0.9, + "grad_norm": 0.7292900085449219, + "learning_rate": 5.393237350095126e-07, + "loss": 2.0463, + "step": 26953 + }, + { + "epoch": 0.9, + "grad_norm": 0.7490742802619934, + "learning_rate": 5.389794438704887e-07, + "loss": 2.0502, + "step": 26954 + }, + { + "epoch": 0.9, + "grad_norm": 0.7482472062110901, + "learning_rate": 5.386352596165267e-07, + "loss": 2.0796, + "step": 26955 + }, + { + "epoch": 0.9, + "grad_norm": 0.7403907179832458, + "learning_rate": 5.382911822515191e-07, + "loss": 2.0875, + "step": 26956 + }, + { + "epoch": 0.9, + "grad_norm": 0.7163010835647583, + "learning_rate": 5.379472117793549e-07, + "loss": 2.0715, + "step": 26957 + }, + { + "epoch": 0.9, + "grad_norm": 0.7788358926773071, + "learning_rate": 5.376033482039166e-07, + "loss": 2.0491, + "step": 26958 + }, + { + "epoch": 0.9, + "grad_norm": 0.7473604679107666, + "learning_rate": 5.37259591529089e-07, + "loss": 1.9643, + "step": 26959 + }, + { + "epoch": 0.9, + "grad_norm": 0.7432107925415039, + "learning_rate": 5.369159417587588e-07, + "loss": 2.0535, + "step": 26960 + }, + { + "epoch": 0.9, + "grad_norm": 0.7301928997039795, + "learning_rate": 5.365723988968041e-07, + "loss": 2.0612, + "step": 26961 + }, + { + "epoch": 0.9, + "grad_norm": 0.7319395542144775, + "learning_rate": 5.362289629471095e-07, + "loss": 2.0067, + "step": 26962 + }, + { + "epoch": 0.9, + "grad_norm": 0.7301526665687561, + "learning_rate": 5.358856339135532e-07, + "loss": 2.003, + "step": 26963 + }, + { + "epoch": 0.9, + "grad_norm": 0.7434555888175964, + "learning_rate": 5.355424118000163e-07, + "loss": 2.031, + "step": 26964 + }, + { + "epoch": 0.9, + "grad_norm": 0.757209300994873, + "learning_rate": 5.351992966103725e-07, + "loss": 2.1425, + "step": 26965 + }, + { + "epoch": 0.9, + "grad_norm": 0.7407654523849487, + "learning_rate": 5.348562883485009e-07, + "loss": 2.0562, + "step": 26966 + }, + { + "epoch": 0.9, + "grad_norm": 0.7699578404426575, + "learning_rate": 5.345133870182773e-07, + "loss": 2.0984, + "step": 26967 + }, + { + "epoch": 0.9, + "grad_norm": 0.734982430934906, + "learning_rate": 5.341705926235718e-07, + "loss": 2.0833, + "step": 26968 + }, + { + "epoch": 0.9, + "grad_norm": 0.7863766551017761, + "learning_rate": 5.338279051682594e-07, + "loss": 2.0752, + "step": 26969 + }, + { + "epoch": 0.9, + "grad_norm": 0.7463781237602234, + "learning_rate": 5.334853246562132e-07, + "loss": 2.0632, + "step": 26970 + }, + { + "epoch": 0.9, + "grad_norm": 0.7242060899734497, + "learning_rate": 5.331428510913017e-07, + "loss": 1.9176, + "step": 26971 + }, + { + "epoch": 0.9, + "grad_norm": 0.7766664028167725, + "learning_rate": 5.328004844773926e-07, + "loss": 2.0673, + "step": 26972 + }, + { + "epoch": 0.9, + "grad_norm": 0.7228651642799377, + "learning_rate": 5.324582248183574e-07, + "loss": 2.0354, + "step": 26973 + }, + { + "epoch": 0.9, + "grad_norm": 0.7047490477561951, + "learning_rate": 5.321160721180585e-07, + "loss": 1.9829, + "step": 26974 + }, + { + "epoch": 0.9, + "grad_norm": 0.7621657252311707, + "learning_rate": 5.317740263803639e-07, + "loss": 2.0329, + "step": 26975 + }, + { + "epoch": 0.9, + "grad_norm": 0.7462449073791504, + "learning_rate": 5.314320876091406e-07, + "loss": 2.0401, + "step": 26976 + }, + { + "epoch": 0.9, + "grad_norm": 0.7307231426239014, + "learning_rate": 5.310902558082442e-07, + "loss": 1.973, + "step": 26977 + }, + { + "epoch": 0.9, + "grad_norm": 0.7253983020782471, + "learning_rate": 5.307485309815419e-07, + "loss": 2.0925, + "step": 26978 + }, + { + "epoch": 0.9, + "grad_norm": 0.7262557744979858, + "learning_rate": 5.304069131328948e-07, + "loss": 2.0064, + "step": 26979 + }, + { + "epoch": 0.9, + "grad_norm": 0.7326792478561401, + "learning_rate": 5.3006540226616e-07, + "loss": 1.997, + "step": 26980 + }, + { + "epoch": 0.9, + "grad_norm": 0.7362827658653259, + "learning_rate": 5.297239983851954e-07, + "loss": 2.0084, + "step": 26981 + }, + { + "epoch": 0.9, + "grad_norm": 0.7429168224334717, + "learning_rate": 5.293827014938601e-07, + "loss": 2.0587, + "step": 26982 + }, + { + "epoch": 0.9, + "grad_norm": 0.7198291420936584, + "learning_rate": 5.290415115960079e-07, + "loss": 1.9758, + "step": 26983 + }, + { + "epoch": 0.9, + "grad_norm": 0.7450733780860901, + "learning_rate": 5.287004286954933e-07, + "loss": 2.037, + "step": 26984 + }, + { + "epoch": 0.9, + "grad_norm": 0.7643210291862488, + "learning_rate": 5.283594527961722e-07, + "loss": 1.9919, + "step": 26985 + }, + { + "epoch": 0.9, + "grad_norm": 0.7423257827758789, + "learning_rate": 5.280185839018948e-07, + "loss": 2.0027, + "step": 26986 + }, + { + "epoch": 0.9, + "grad_norm": 0.7475537061691284, + "learning_rate": 5.276778220165113e-07, + "loss": 1.9544, + "step": 26987 + }, + { + "epoch": 0.9, + "grad_norm": 0.742992103099823, + "learning_rate": 5.273371671438743e-07, + "loss": 2.0181, + "step": 26988 + }, + { + "epoch": 0.9, + "grad_norm": 0.7447389364242554, + "learning_rate": 5.269966192878295e-07, + "loss": 2.0092, + "step": 26989 + }, + { + "epoch": 0.9, + "grad_norm": 0.7271109819412231, + "learning_rate": 5.266561784522251e-07, + "loss": 2.0369, + "step": 26990 + }, + { + "epoch": 0.9, + "grad_norm": 0.7332441210746765, + "learning_rate": 5.263158446409056e-07, + "loss": 2.0461, + "step": 26991 + }, + { + "epoch": 0.9, + "grad_norm": 0.7433024644851685, + "learning_rate": 5.259756178577203e-07, + "loss": 1.9958, + "step": 26992 + }, + { + "epoch": 0.9, + "grad_norm": 0.730384349822998, + "learning_rate": 5.256354981065092e-07, + "loss": 2.0336, + "step": 26993 + }, + { + "epoch": 0.9, + "grad_norm": 0.7674983143806458, + "learning_rate": 5.252954853911151e-07, + "loss": 2.0502, + "step": 26994 + }, + { + "epoch": 0.9, + "grad_norm": 0.743308424949646, + "learning_rate": 5.249555797153805e-07, + "loss": 1.9863, + "step": 26995 + }, + { + "epoch": 0.9, + "grad_norm": 0.7620336413383484, + "learning_rate": 5.246157810831442e-07, + "loss": 2.0189, + "step": 26996 + }, + { + "epoch": 0.9, + "grad_norm": 0.7430989146232605, + "learning_rate": 5.242760894982457e-07, + "loss": 2.0512, + "step": 26997 + }, + { + "epoch": 0.9, + "grad_norm": 0.7522795796394348, + "learning_rate": 5.23936504964525e-07, + "loss": 2.0942, + "step": 26998 + }, + { + "epoch": 0.9, + "grad_norm": 0.7349939346313477, + "learning_rate": 5.235970274858138e-07, + "loss": 2.0659, + "step": 26999 + }, + { + "epoch": 0.9, + "grad_norm": 0.7293635606765747, + "learning_rate": 5.232576570659487e-07, + "loss": 2.0173, + "step": 27000 + }, + { + "epoch": 0.9, + "grad_norm": 0.7363609671592712, + "learning_rate": 5.229183937087667e-07, + "loss": 2.0796, + "step": 27001 + }, + { + "epoch": 0.9, + "grad_norm": 0.7646094560623169, + "learning_rate": 5.225792374180983e-07, + "loss": 2.0562, + "step": 27002 + }, + { + "epoch": 0.9, + "grad_norm": 0.7520452737808228, + "learning_rate": 5.222401881977745e-07, + "loss": 2.053, + "step": 27003 + }, + { + "epoch": 0.9, + "grad_norm": 0.7562854886054993, + "learning_rate": 5.219012460516271e-07, + "loss": 2.0971, + "step": 27004 + }, + { + "epoch": 0.9, + "grad_norm": 0.723134458065033, + "learning_rate": 5.215624109834838e-07, + "loss": 2.0249, + "step": 27005 + }, + { + "epoch": 0.9, + "grad_norm": 0.7467735409736633, + "learning_rate": 5.21223682997174e-07, + "loss": 2.0645, + "step": 27006 + }, + { + "epoch": 0.9, + "grad_norm": 0.7454159259796143, + "learning_rate": 5.208850620965245e-07, + "loss": 2.0222, + "step": 27007 + }, + { + "epoch": 0.9, + "grad_norm": 0.7370786070823669, + "learning_rate": 5.205465482853611e-07, + "loss": 2.0671, + "step": 27008 + }, + { + "epoch": 0.9, + "grad_norm": 0.7202531099319458, + "learning_rate": 5.202081415675053e-07, + "loss": 2.0037, + "step": 27009 + }, + { + "epoch": 0.9, + "grad_norm": 0.7588405013084412, + "learning_rate": 5.198698419467841e-07, + "loss": 2.0351, + "step": 27010 + }, + { + "epoch": 0.9, + "grad_norm": 0.7160064578056335, + "learning_rate": 5.195316494270164e-07, + "loss": 2.02, + "step": 27011 + }, + { + "epoch": 0.9, + "grad_norm": 0.7371523380279541, + "learning_rate": 5.191935640120238e-07, + "loss": 2.0399, + "step": 27012 + }, + { + "epoch": 0.9, + "grad_norm": 0.7569615244865417, + "learning_rate": 5.188555857056254e-07, + "loss": 2.0699, + "step": 27013 + }, + { + "epoch": 0.9, + "grad_norm": 0.7328880429267883, + "learning_rate": 5.185177145116415e-07, + "loss": 2.0842, + "step": 27014 + }, + { + "epoch": 0.9, + "grad_norm": 0.7620487213134766, + "learning_rate": 5.181799504338869e-07, + "loss": 2.0388, + "step": 27015 + }, + { + "epoch": 0.9, + "grad_norm": 0.7487577795982361, + "learning_rate": 5.178422934761795e-07, + "loss": 1.9506, + "step": 27016 + }, + { + "epoch": 0.9, + "grad_norm": 0.7214451432228088, + "learning_rate": 5.175047436423319e-07, + "loss": 2.1044, + "step": 27017 + }, + { + "epoch": 0.9, + "grad_norm": 0.7533203959465027, + "learning_rate": 5.171673009361589e-07, + "loss": 2.0185, + "step": 27018 + }, + { + "epoch": 0.9, + "grad_norm": 0.7581403851509094, + "learning_rate": 5.168299653614706e-07, + "loss": 2.0909, + "step": 27019 + }, + { + "epoch": 0.9, + "grad_norm": 0.7517644762992859, + "learning_rate": 5.164927369220829e-07, + "loss": 2.0755, + "step": 27020 + }, + { + "epoch": 0.9, + "grad_norm": 0.7404831647872925, + "learning_rate": 5.161556156217995e-07, + "loss": 2.0907, + "step": 27021 + }, + { + "epoch": 0.9, + "grad_norm": 0.7361766695976257, + "learning_rate": 5.158186014644318e-07, + "loss": 2.0411, + "step": 27022 + }, + { + "epoch": 0.9, + "grad_norm": 0.7281767129898071, + "learning_rate": 5.154816944537889e-07, + "loss": 2.0443, + "step": 27023 + }, + { + "epoch": 0.9, + "grad_norm": 0.7476963400840759, + "learning_rate": 5.151448945936754e-07, + "loss": 2.0166, + "step": 27024 + }, + { + "epoch": 0.9, + "grad_norm": 0.7690434455871582, + "learning_rate": 5.148082018878952e-07, + "loss": 2.0087, + "step": 27025 + }, + { + "epoch": 0.9, + "grad_norm": 0.7568269371986389, + "learning_rate": 5.144716163402542e-07, + "loss": 2.0448, + "step": 27026 + }, + { + "epoch": 0.9, + "grad_norm": 0.7555501461029053, + "learning_rate": 5.141351379545523e-07, + "loss": 1.9516, + "step": 27027 + }, + { + "epoch": 0.9, + "grad_norm": 0.7793776392936707, + "learning_rate": 5.137987667345923e-07, + "loss": 2.0541, + "step": 27028 + }, + { + "epoch": 0.9, + "grad_norm": 0.7565628290176392, + "learning_rate": 5.134625026841755e-07, + "loss": 2.0442, + "step": 27029 + }, + { + "epoch": 0.9, + "grad_norm": 0.7423378229141235, + "learning_rate": 5.131263458071001e-07, + "loss": 2.0215, + "step": 27030 + }, + { + "epoch": 0.9, + "grad_norm": 0.7259252667427063, + "learning_rate": 5.127902961071618e-07, + "loss": 1.9915, + "step": 27031 + }, + { + "epoch": 0.9, + "grad_norm": 0.7894111275672913, + "learning_rate": 5.124543535881588e-07, + "loss": 2.1366, + "step": 27032 + }, + { + "epoch": 0.9, + "grad_norm": 0.7414683699607849, + "learning_rate": 5.12118518253889e-07, + "loss": 2.0442, + "step": 27033 + }, + { + "epoch": 0.9, + "grad_norm": 0.7552931904792786, + "learning_rate": 5.117827901081406e-07, + "loss": 2.0191, + "step": 27034 + }, + { + "epoch": 0.9, + "grad_norm": 0.7368302345275879, + "learning_rate": 5.114471691547096e-07, + "loss": 1.9923, + "step": 27035 + }, + { + "epoch": 0.9, + "grad_norm": 0.7382238507270813, + "learning_rate": 5.111116553973894e-07, + "loss": 2.0408, + "step": 27036 + }, + { + "epoch": 0.9, + "grad_norm": 0.7565481662750244, + "learning_rate": 5.10776248839967e-07, + "loss": 2.0303, + "step": 27037 + }, + { + "epoch": 0.9, + "grad_norm": 0.7438725829124451, + "learning_rate": 5.10440949486235e-07, + "loss": 2.0596, + "step": 27038 + }, + { + "epoch": 0.9, + "grad_norm": 0.7720710039138794, + "learning_rate": 5.101057573399803e-07, + "loss": 1.9984, + "step": 27039 + }, + { + "epoch": 0.9, + "grad_norm": 0.7454878091812134, + "learning_rate": 5.097706724049867e-07, + "loss": 2.0438, + "step": 27040 + }, + { + "epoch": 0.9, + "grad_norm": 0.7662563323974609, + "learning_rate": 5.094356946850431e-07, + "loss": 2.0705, + "step": 27041 + }, + { + "epoch": 0.9, + "grad_norm": 0.7354649305343628, + "learning_rate": 5.091008241839346e-07, + "loss": 2.0718, + "step": 27042 + }, + { + "epoch": 0.9, + "grad_norm": 0.7660358548164368, + "learning_rate": 5.087660609054412e-07, + "loss": 2.1405, + "step": 27043 + }, + { + "epoch": 0.9, + "grad_norm": 0.7287164330482483, + "learning_rate": 5.084314048533456e-07, + "loss": 2.0131, + "step": 27044 + }, + { + "epoch": 0.9, + "grad_norm": 0.7514034509658813, + "learning_rate": 5.080968560314315e-07, + "loss": 2.0712, + "step": 27045 + }, + { + "epoch": 0.9, + "grad_norm": 0.743866503238678, + "learning_rate": 5.077624144434756e-07, + "loss": 2.0889, + "step": 27046 + }, + { + "epoch": 0.9, + "grad_norm": 0.7519726157188416, + "learning_rate": 5.074280800932563e-07, + "loss": 2.1144, + "step": 27047 + }, + { + "epoch": 0.9, + "grad_norm": 0.7461344003677368, + "learning_rate": 5.070938529845504e-07, + "loss": 2.1008, + "step": 27048 + }, + { + "epoch": 0.9, + "grad_norm": 0.7630887627601624, + "learning_rate": 5.067597331211372e-07, + "loss": 2.1109, + "step": 27049 + }, + { + "epoch": 0.9, + "grad_norm": 0.702914834022522, + "learning_rate": 5.064257205067869e-07, + "loss": 2.0449, + "step": 27050 + }, + { + "epoch": 0.9, + "grad_norm": 0.7731585502624512, + "learning_rate": 5.060918151452765e-07, + "loss": 2.0291, + "step": 27051 + }, + { + "epoch": 0.9, + "grad_norm": 0.7206317186355591, + "learning_rate": 5.057580170403775e-07, + "loss": 1.9682, + "step": 27052 + }, + { + "epoch": 0.9, + "grad_norm": 0.7426854968070984, + "learning_rate": 5.054243261958581e-07, + "loss": 2.0535, + "step": 27053 + }, + { + "epoch": 0.9, + "grad_norm": 0.7682952880859375, + "learning_rate": 5.050907426154905e-07, + "loss": 2.0614, + "step": 27054 + }, + { + "epoch": 0.9, + "grad_norm": 0.73529452085495, + "learning_rate": 5.047572663030453e-07, + "loss": 2.0544, + "step": 27055 + }, + { + "epoch": 0.9, + "grad_norm": 0.7498229742050171, + "learning_rate": 5.044238972622861e-07, + "loss": 2.0773, + "step": 27056 + }, + { + "epoch": 0.9, + "grad_norm": 0.7547264695167542, + "learning_rate": 5.040906354969799e-07, + "loss": 1.9999, + "step": 27057 + }, + { + "epoch": 0.9, + "grad_norm": 0.7484133839607239, + "learning_rate": 5.037574810108937e-07, + "loss": 2.0022, + "step": 27058 + }, + { + "epoch": 0.9, + "grad_norm": 0.751929759979248, + "learning_rate": 5.034244338077898e-07, + "loss": 2.1634, + "step": 27059 + }, + { + "epoch": 0.9, + "grad_norm": 0.7217931151390076, + "learning_rate": 5.030914938914321e-07, + "loss": 2.0403, + "step": 27060 + }, + { + "epoch": 0.9, + "grad_norm": 0.7354684472084045, + "learning_rate": 5.027586612655799e-07, + "loss": 2.006, + "step": 27061 + }, + { + "epoch": 0.9, + "grad_norm": 0.7464145421981812, + "learning_rate": 5.024259359339944e-07, + "loss": 2.1009, + "step": 27062 + }, + { + "epoch": 0.9, + "grad_norm": 0.7327633500099182, + "learning_rate": 5.02093317900434e-07, + "loss": 2.0259, + "step": 27063 + }, + { + "epoch": 0.9, + "grad_norm": 0.7375994920730591, + "learning_rate": 5.017608071686586e-07, + "loss": 2.0765, + "step": 27064 + }, + { + "epoch": 0.9, + "grad_norm": 0.7414315342903137, + "learning_rate": 5.014284037424233e-07, + "loss": 2.071, + "step": 27065 + }, + { + "epoch": 0.9, + "grad_norm": 0.7649632692337036, + "learning_rate": 5.010961076254806e-07, + "loss": 2.0666, + "step": 27066 + }, + { + "epoch": 0.9, + "grad_norm": 0.740566074848175, + "learning_rate": 5.007639188215896e-07, + "loss": 2.0483, + "step": 27067 + }, + { + "epoch": 0.9, + "grad_norm": 0.7272589206695557, + "learning_rate": 5.004318373344996e-07, + "loss": 2.046, + "step": 27068 + }, + { + "epoch": 0.9, + "grad_norm": 0.7377866506576538, + "learning_rate": 5.000998631679643e-07, + "loss": 1.9703, + "step": 27069 + }, + { + "epoch": 0.9, + "grad_norm": 0.7418698668479919, + "learning_rate": 4.997679963257318e-07, + "loss": 2.0502, + "step": 27070 + }, + { + "epoch": 0.9, + "grad_norm": 0.7199897766113281, + "learning_rate": 4.994362368115546e-07, + "loss": 1.9693, + "step": 27071 + }, + { + "epoch": 0.9, + "grad_norm": 0.7556372284889221, + "learning_rate": 4.991045846291764e-07, + "loss": 2.0622, + "step": 27072 + }, + { + "epoch": 0.9, + "grad_norm": 0.7479092478752136, + "learning_rate": 4.987730397823487e-07, + "loss": 2.0602, + "step": 27073 + }, + { + "epoch": 0.9, + "grad_norm": 0.750408947467804, + "learning_rate": 4.984416022748151e-07, + "loss": 2.0325, + "step": 27074 + }, + { + "epoch": 0.9, + "grad_norm": 0.7369655966758728, + "learning_rate": 4.981102721103193e-07, + "loss": 2.0629, + "step": 27075 + }, + { + "epoch": 0.9, + "grad_norm": 0.7437099814414978, + "learning_rate": 4.977790492926038e-07, + "loss": 2.0585, + "step": 27076 + }, + { + "epoch": 0.9, + "grad_norm": 0.7695946097373962, + "learning_rate": 4.974479338254146e-07, + "loss": 2.0772, + "step": 27077 + }, + { + "epoch": 0.9, + "grad_norm": 0.7202904224395752, + "learning_rate": 4.971169257124864e-07, + "loss": 2.0062, + "step": 27078 + }, + { + "epoch": 0.9, + "grad_norm": 0.7570523619651794, + "learning_rate": 4.967860249575629e-07, + "loss": 2.0616, + "step": 27079 + }, + { + "epoch": 0.9, + "grad_norm": 0.7607750296592712, + "learning_rate": 4.964552315643822e-07, + "loss": 2.0256, + "step": 27080 + }, + { + "epoch": 0.9, + "grad_norm": 0.7446511387825012, + "learning_rate": 4.961245455366792e-07, + "loss": 1.9807, + "step": 27081 + }, + { + "epoch": 0.9, + "grad_norm": 0.7301918268203735, + "learning_rate": 4.957939668781931e-07, + "loss": 2.0482, + "step": 27082 + }, + { + "epoch": 0.9, + "grad_norm": 0.7503844499588013, + "learning_rate": 4.954634955926574e-07, + "loss": 2.0197, + "step": 27083 + }, + { + "epoch": 0.9, + "grad_norm": 0.7488322854042053, + "learning_rate": 4.951331316838026e-07, + "loss": 2.0211, + "step": 27084 + }, + { + "epoch": 0.9, + "grad_norm": 0.7072362303733826, + "learning_rate": 4.948028751553635e-07, + "loss": 2.0552, + "step": 27085 + }, + { + "epoch": 0.9, + "grad_norm": 0.7244053483009338, + "learning_rate": 4.944727260110715e-07, + "loss": 2.0492, + "step": 27086 + }, + { + "epoch": 0.9, + "grad_norm": 0.7351243495941162, + "learning_rate": 4.941426842546571e-07, + "loss": 2.0142, + "step": 27087 + }, + { + "epoch": 0.9, + "grad_norm": 0.7761110067367554, + "learning_rate": 4.938127498898459e-07, + "loss": 1.9812, + "step": 27088 + }, + { + "epoch": 0.9, + "grad_norm": 0.75872802734375, + "learning_rate": 4.934829229203685e-07, + "loss": 2.0452, + "step": 27089 + }, + { + "epoch": 0.9, + "grad_norm": 0.7456680536270142, + "learning_rate": 4.931532033499475e-07, + "loss": 2.0871, + "step": 27090 + }, + { + "epoch": 0.9, + "grad_norm": 0.7709147930145264, + "learning_rate": 4.928235911823121e-07, + "loss": 2.0273, + "step": 27091 + }, + { + "epoch": 0.9, + "grad_norm": 0.7303224205970764, + "learning_rate": 4.924940864211825e-07, + "loss": 2.079, + "step": 27092 + }, + { + "epoch": 0.9, + "grad_norm": 0.7164412140846252, + "learning_rate": 4.921646890702847e-07, + "loss": 1.9756, + "step": 27093 + }, + { + "epoch": 0.9, + "grad_norm": 0.7366651296615601, + "learning_rate": 4.918353991333358e-07, + "loss": 2.0164, + "step": 27094 + }, + { + "epoch": 0.9, + "grad_norm": 0.7471129894256592, + "learning_rate": 4.915062166140605e-07, + "loss": 2.013, + "step": 27095 + }, + { + "epoch": 0.9, + "grad_norm": 0.7312847375869751, + "learning_rate": 4.911771415161748e-07, + "loss": 1.9979, + "step": 27096 + }, + { + "epoch": 0.9, + "grad_norm": 0.7334935665130615, + "learning_rate": 4.908481738433957e-07, + "loss": 1.9989, + "step": 27097 + }, + { + "epoch": 0.9, + "grad_norm": 0.744944155216217, + "learning_rate": 4.905193135994413e-07, + "loss": 2.0499, + "step": 27098 + }, + { + "epoch": 0.9, + "grad_norm": 0.8649877309799194, + "learning_rate": 4.901905607880287e-07, + "loss": 2.0129, + "step": 27099 + }, + { + "epoch": 0.9, + "grad_norm": 0.7543284296989441, + "learning_rate": 4.898619154128681e-07, + "loss": 2.026, + "step": 27100 + }, + { + "epoch": 0.9, + "grad_norm": 0.7839173078536987, + "learning_rate": 4.895333774776745e-07, + "loss": 1.9944, + "step": 27101 + }, + { + "epoch": 0.9, + "grad_norm": 0.7556594014167786, + "learning_rate": 4.892049469861592e-07, + "loss": 2.0571, + "step": 27102 + }, + { + "epoch": 0.9, + "grad_norm": 0.7706905603408813, + "learning_rate": 4.888766239420317e-07, + "loss": 2.0997, + "step": 27103 + }, + { + "epoch": 0.9, + "grad_norm": 0.7642306685447693, + "learning_rate": 4.885484083490033e-07, + "loss": 2.1167, + "step": 27104 + }, + { + "epoch": 0.9, + "grad_norm": 0.7485413551330566, + "learning_rate": 4.882203002107811e-07, + "loss": 2.0459, + "step": 27105 + }, + { + "epoch": 0.9, + "grad_norm": 0.7663609385490417, + "learning_rate": 4.878922995310698e-07, + "loss": 2.138, + "step": 27106 + }, + { + "epoch": 0.9, + "grad_norm": 0.7456453442573547, + "learning_rate": 4.875644063135765e-07, + "loss": 2.0618, + "step": 27107 + }, + { + "epoch": 0.9, + "grad_norm": 0.7579767107963562, + "learning_rate": 4.872366205620071e-07, + "loss": 2.0206, + "step": 27108 + }, + { + "epoch": 0.9, + "grad_norm": 0.7478576898574829, + "learning_rate": 4.869089422800633e-07, + "loss": 2.0518, + "step": 27109 + }, + { + "epoch": 0.9, + "grad_norm": 0.7535732388496399, + "learning_rate": 4.865813714714451e-07, + "loss": 1.9959, + "step": 27110 + }, + { + "epoch": 0.9, + "grad_norm": 0.7318512797355652, + "learning_rate": 4.862539081398576e-07, + "loss": 2.03, + "step": 27111 + }, + { + "epoch": 0.9, + "grad_norm": 0.7211381793022156, + "learning_rate": 4.859265522889967e-07, + "loss": 2.0776, + "step": 27112 + }, + { + "epoch": 0.9, + "grad_norm": 0.7655084133148193, + "learning_rate": 4.855993039225626e-07, + "loss": 2.0183, + "step": 27113 + }, + { + "epoch": 0.9, + "grad_norm": 0.7552460432052612, + "learning_rate": 4.852721630442492e-07, + "loss": 2.0524, + "step": 27114 + }, + { + "epoch": 0.9, + "grad_norm": 0.7510892748832703, + "learning_rate": 4.849451296577578e-07, + "loss": 2.0859, + "step": 27115 + }, + { + "epoch": 0.9, + "grad_norm": 0.765746533870697, + "learning_rate": 4.846182037667779e-07, + "loss": 2.0827, + "step": 27116 + }, + { + "epoch": 0.9, + "grad_norm": 0.7234635949134827, + "learning_rate": 4.842913853750064e-07, + "loss": 2.0496, + "step": 27117 + }, + { + "epoch": 0.9, + "grad_norm": 0.7556241750717163, + "learning_rate": 4.839646744861337e-07, + "loss": 2.0458, + "step": 27118 + }, + { + "epoch": 0.9, + "grad_norm": 0.7452849745750427, + "learning_rate": 4.836380711038502e-07, + "loss": 2.0201, + "step": 27119 + }, + { + "epoch": 0.9, + "grad_norm": 0.7753193378448486, + "learning_rate": 4.833115752318462e-07, + "loss": 2.0913, + "step": 27120 + }, + { + "epoch": 0.9, + "grad_norm": 0.726535439491272, + "learning_rate": 4.829851868738122e-07, + "loss": 2.0921, + "step": 27121 + }, + { + "epoch": 0.9, + "grad_norm": 0.7456585764884949, + "learning_rate": 4.826589060334341e-07, + "loss": 2.0761, + "step": 27122 + }, + { + "epoch": 0.9, + "grad_norm": 0.7307118773460388, + "learning_rate": 4.823327327143968e-07, + "loss": 2.1309, + "step": 27123 + }, + { + "epoch": 0.9, + "grad_norm": 0.7597889304161072, + "learning_rate": 4.820066669203883e-07, + "loss": 2.0618, + "step": 27124 + }, + { + "epoch": 0.9, + "grad_norm": 0.7671752572059631, + "learning_rate": 4.816807086550879e-07, + "loss": 2.0837, + "step": 27125 + }, + { + "epoch": 0.9, + "grad_norm": 0.7538430094718933, + "learning_rate": 4.813548579221828e-07, + "loss": 1.9987, + "step": 27126 + }, + { + "epoch": 0.9, + "grad_norm": 0.7858676314353943, + "learning_rate": 4.810291147253521e-07, + "loss": 2.0929, + "step": 27127 + }, + { + "epoch": 0.9, + "grad_norm": 0.743984043598175, + "learning_rate": 4.807034790682741e-07, + "loss": 2.0459, + "step": 27128 + }, + { + "epoch": 0.9, + "grad_norm": 0.7324937582015991, + "learning_rate": 4.803779509546292e-07, + "loss": 2.024, + "step": 27129 + }, + { + "epoch": 0.9, + "grad_norm": 0.7180732488632202, + "learning_rate": 4.800525303880966e-07, + "loss": 1.9935, + "step": 27130 + }, + { + "epoch": 0.9, + "grad_norm": 0.7210368514060974, + "learning_rate": 4.79727217372351e-07, + "loss": 2.0739, + "step": 27131 + }, + { + "epoch": 0.9, + "grad_norm": 0.7431942224502563, + "learning_rate": 4.794020119110665e-07, + "loss": 2.0913, + "step": 27132 + }, + { + "epoch": 0.9, + "grad_norm": 0.7325198650360107, + "learning_rate": 4.790769140079188e-07, + "loss": 1.9829, + "step": 27133 + }, + { + "epoch": 0.9, + "grad_norm": 0.7760083675384521, + "learning_rate": 4.787519236665816e-07, + "loss": 2.0408, + "step": 27134 + }, + { + "epoch": 0.9, + "grad_norm": 0.7466432452201843, + "learning_rate": 4.784270408907254e-07, + "loss": 2.0562, + "step": 27135 + }, + { + "epoch": 0.9, + "grad_norm": 0.7381594777107239, + "learning_rate": 4.781022656840184e-07, + "loss": 2.0641, + "step": 27136 + }, + { + "epoch": 0.9, + "grad_norm": 0.763980507850647, + "learning_rate": 4.777775980501331e-07, + "loss": 2.0445, + "step": 27137 + }, + { + "epoch": 0.9, + "grad_norm": 0.7529333233833313, + "learning_rate": 4.774530379927345e-07, + "loss": 2.0408, + "step": 27138 + }, + { + "epoch": 0.9, + "grad_norm": 0.746748149394989, + "learning_rate": 4.771285855154928e-07, + "loss": 2.0177, + "step": 27139 + }, + { + "epoch": 0.9, + "grad_norm": 0.7413294315338135, + "learning_rate": 4.7680424062207075e-07, + "loss": 2.0067, + "step": 27140 + }, + { + "epoch": 0.9, + "grad_norm": 0.7652919292449951, + "learning_rate": 4.7648000331613207e-07, + "loss": 2.1138, + "step": 27141 + }, + { + "epoch": 0.9, + "grad_norm": 0.7411420345306396, + "learning_rate": 4.761558736013405e-07, + "loss": 2.0844, + "step": 27142 + }, + { + "epoch": 0.9, + "grad_norm": 0.7302148938179016, + "learning_rate": 4.758318514813609e-07, + "loss": 2.0831, + "step": 27143 + }, + { + "epoch": 0.9, + "grad_norm": 0.741340696811676, + "learning_rate": 4.755079369598503e-07, + "loss": 2.0703, + "step": 27144 + }, + { + "epoch": 0.9, + "grad_norm": 0.7437747120857239, + "learning_rate": 4.7518413004046805e-07, + "loss": 2.0798, + "step": 27145 + }, + { + "epoch": 0.9, + "grad_norm": 0.7413864731788635, + "learning_rate": 4.7486043072687447e-07, + "loss": 1.9819, + "step": 27146 + }, + { + "epoch": 0.9, + "grad_norm": 0.7291920781135559, + "learning_rate": 4.745368390227245e-07, + "loss": 2.0232, + "step": 27147 + }, + { + "epoch": 0.9, + "grad_norm": 0.7635176181793213, + "learning_rate": 4.742133549316763e-07, + "loss": 2.0904, + "step": 27148 + }, + { + "epoch": 0.9, + "grad_norm": 0.7395678162574768, + "learning_rate": 4.7388997845738137e-07, + "loss": 2.0449, + "step": 27149 + }, + { + "epoch": 0.9, + "grad_norm": 0.7517318725585938, + "learning_rate": 4.735667096034957e-07, + "loss": 1.995, + "step": 27150 + }, + { + "epoch": 0.9, + "grad_norm": 0.7475565671920776, + "learning_rate": 4.732435483736697e-07, + "loss": 1.9816, + "step": 27151 + }, + { + "epoch": 0.9, + "grad_norm": 0.7626528143882751, + "learning_rate": 4.72920494771556e-07, + "loss": 2.0995, + "step": 27152 + }, + { + "epoch": 0.9, + "grad_norm": 0.7431235909461975, + "learning_rate": 4.7259754880080277e-07, + "loss": 2.0399, + "step": 27153 + }, + { + "epoch": 0.9, + "grad_norm": 0.746757447719574, + "learning_rate": 4.722747104650582e-07, + "loss": 2.0557, + "step": 27154 + }, + { + "epoch": 0.9, + "grad_norm": 0.7459179162979126, + "learning_rate": 4.719519797679695e-07, + "loss": 2.0516, + "step": 27155 + }, + { + "epoch": 0.9, + "grad_norm": 0.7435001134872437, + "learning_rate": 4.716293567131847e-07, + "loss": 2.0622, + "step": 27156 + }, + { + "epoch": 0.9, + "grad_norm": 0.7230681777000427, + "learning_rate": 4.7130684130434755e-07, + "loss": 2.0126, + "step": 27157 + }, + { + "epoch": 0.9, + "grad_norm": 0.7326712012290955, + "learning_rate": 4.7098443354509973e-07, + "loss": 1.9997, + "step": 27158 + }, + { + "epoch": 0.9, + "grad_norm": 0.7275022864341736, + "learning_rate": 4.7066213343908596e-07, + "loss": 2.0076, + "step": 27159 + }, + { + "epoch": 0.9, + "grad_norm": 0.7304858565330505, + "learning_rate": 4.7033994098994674e-07, + "loss": 2.0196, + "step": 27160 + }, + { + "epoch": 0.9, + "grad_norm": 0.7503100633621216, + "learning_rate": 4.700178562013202e-07, + "loss": 2.0079, + "step": 27161 + }, + { + "epoch": 0.9, + "grad_norm": 0.7476886510848999, + "learning_rate": 4.696958790768513e-07, + "loss": 2.0234, + "step": 27162 + }, + { + "epoch": 0.9, + "grad_norm": 0.765396773815155, + "learning_rate": 4.693740096201693e-07, + "loss": 1.9728, + "step": 27163 + }, + { + "epoch": 0.9, + "grad_norm": 0.7505879998207092, + "learning_rate": 4.6905224783491464e-07, + "loss": 2.0694, + "step": 27164 + }, + { + "epoch": 0.9, + "grad_norm": 0.7419947981834412, + "learning_rate": 4.687305937247233e-07, + "loss": 2.0781, + "step": 27165 + }, + { + "epoch": 0.9, + "grad_norm": 0.7218696475028992, + "learning_rate": 4.684090472932279e-07, + "loss": 2.0785, + "step": 27166 + }, + { + "epoch": 0.9, + "grad_norm": 0.7634477615356445, + "learning_rate": 4.6808760854405885e-07, + "loss": 2.0639, + "step": 27167 + }, + { + "epoch": 0.9, + "grad_norm": 0.7482348084449768, + "learning_rate": 4.6776627748085225e-07, + "loss": 2.0637, + "step": 27168 + }, + { + "epoch": 0.9, + "grad_norm": 0.7162985801696777, + "learning_rate": 4.6744505410723397e-07, + "loss": 2.1038, + "step": 27169 + }, + { + "epoch": 0.9, + "grad_norm": 0.7516705393791199, + "learning_rate": 4.671239384268356e-07, + "loss": 2.0446, + "step": 27170 + }, + { + "epoch": 0.9, + "grad_norm": 0.7414560317993164, + "learning_rate": 4.6680293044328417e-07, + "loss": 2.0335, + "step": 27171 + }, + { + "epoch": 0.9, + "grad_norm": 0.7323722839355469, + "learning_rate": 4.664820301602069e-07, + "loss": 2.0858, + "step": 27172 + }, + { + "epoch": 0.9, + "grad_norm": 0.7679982781410217, + "learning_rate": 4.6616123758122636e-07, + "loss": 2.0469, + "step": 27173 + }, + { + "epoch": 0.9, + "grad_norm": 0.7764168381690979, + "learning_rate": 4.6584055270997077e-07, + "loss": 2.0976, + "step": 27174 + }, + { + "epoch": 0.9, + "grad_norm": 0.745108962059021, + "learning_rate": 4.655199755500617e-07, + "loss": 2.0261, + "step": 27175 + }, + { + "epoch": 0.9, + "grad_norm": 0.7494735717773438, + "learning_rate": 4.6519950610511734e-07, + "loss": 2.0027, + "step": 27176 + }, + { + "epoch": 0.9, + "grad_norm": 0.7368234992027283, + "learning_rate": 4.6487914437876257e-07, + "loss": 2.0468, + "step": 27177 + }, + { + "epoch": 0.9, + "grad_norm": 0.7543310523033142, + "learning_rate": 4.6455889037461564e-07, + "loss": 2.0878, + "step": 27178 + }, + { + "epoch": 0.9, + "grad_norm": 0.7143838405609131, + "learning_rate": 4.6423874409629365e-07, + "loss": 1.9742, + "step": 27179 + }, + { + "epoch": 0.9, + "grad_norm": 0.7212214469909668, + "learning_rate": 4.639187055474137e-07, + "loss": 1.9663, + "step": 27180 + }, + { + "epoch": 0.9, + "grad_norm": 0.7832921743392944, + "learning_rate": 4.635987747315918e-07, + "loss": 2.119, + "step": 27181 + }, + { + "epoch": 0.9, + "grad_norm": 0.7653928995132446, + "learning_rate": 4.6327895165244164e-07, + "loss": 2.1316, + "step": 27182 + }, + { + "epoch": 0.9, + "grad_norm": 0.7269788384437561, + "learning_rate": 4.62959236313576e-07, + "loss": 2.08, + "step": 27183 + }, + { + "epoch": 0.9, + "grad_norm": 0.7331037521362305, + "learning_rate": 4.626396287186108e-07, + "loss": 2.0744, + "step": 27184 + }, + { + "epoch": 0.9, + "grad_norm": 0.7766017913818359, + "learning_rate": 4.6232012887115095e-07, + "loss": 2.0697, + "step": 27185 + }, + { + "epoch": 0.9, + "grad_norm": 0.7590108513832092, + "learning_rate": 4.6200073677480804e-07, + "loss": 2.0469, + "step": 27186 + }, + { + "epoch": 0.9, + "grad_norm": 0.7213303446769714, + "learning_rate": 4.616814524331925e-07, + "loss": 2.0911, + "step": 27187 + }, + { + "epoch": 0.9, + "grad_norm": 0.7811734080314636, + "learning_rate": 4.613622758499103e-07, + "loss": 2.0417, + "step": 27188 + }, + { + "epoch": 0.9, + "grad_norm": 0.7582060694694519, + "learning_rate": 4.6104320702856533e-07, + "loss": 2.1037, + "step": 27189 + }, + { + "epoch": 0.9, + "grad_norm": 0.812232255935669, + "learning_rate": 4.607242459727657e-07, + "loss": 2.0454, + "step": 27190 + }, + { + "epoch": 0.9, + "grad_norm": 0.7333725094795227, + "learning_rate": 4.604053926861107e-07, + "loss": 2.0382, + "step": 27191 + }, + { + "epoch": 0.9, + "grad_norm": 0.7528991103172302, + "learning_rate": 4.600866471722054e-07, + "loss": 2.0403, + "step": 27192 + }, + { + "epoch": 0.9, + "grad_norm": 0.7418551445007324, + "learning_rate": 4.597680094346513e-07, + "loss": 2.015, + "step": 27193 + }, + { + "epoch": 0.9, + "grad_norm": 0.7414429187774658, + "learning_rate": 4.5944947947704765e-07, + "loss": 1.9767, + "step": 27194 + }, + { + "epoch": 0.9, + "grad_norm": 0.7212668061256409, + "learning_rate": 4.5913105730299055e-07, + "loss": 2.0033, + "step": 27195 + }, + { + "epoch": 0.9, + "grad_norm": 0.7240128517150879, + "learning_rate": 4.588127429160816e-07, + "loss": 2.115, + "step": 27196 + }, + { + "epoch": 0.9, + "grad_norm": 0.7466779351234436, + "learning_rate": 4.5849453631991335e-07, + "loss": 2.0644, + "step": 27197 + }, + { + "epoch": 0.9, + "grad_norm": 0.7503958940505981, + "learning_rate": 4.5817643751808086e-07, + "loss": 2.0272, + "step": 27198 + }, + { + "epoch": 0.9, + "grad_norm": 0.7405818104743958, + "learning_rate": 4.5785844651418e-07, + "loss": 2.0446, + "step": 27199 + }, + { + "epoch": 0.9, + "grad_norm": 0.7428597807884216, + "learning_rate": 4.575405633118024e-07, + "loss": 2.0814, + "step": 27200 + }, + { + "epoch": 0.9, + "grad_norm": 0.7610568404197693, + "learning_rate": 4.572227879145386e-07, + "loss": 2.0398, + "step": 27201 + }, + { + "epoch": 0.91, + "grad_norm": 0.7541273236274719, + "learning_rate": 4.569051203259789e-07, + "loss": 2.077, + "step": 27202 + }, + { + "epoch": 0.91, + "grad_norm": 0.7375293374061584, + "learning_rate": 4.565875605497139e-07, + "loss": 2.0089, + "step": 27203 + }, + { + "epoch": 0.91, + "grad_norm": 0.7622026801109314, + "learning_rate": 4.562701085893273e-07, + "loss": 2.0236, + "step": 27204 + }, + { + "epoch": 0.91, + "grad_norm": 0.7403553128242493, + "learning_rate": 4.559527644484085e-07, + "loss": 2.052, + "step": 27205 + }, + { + "epoch": 0.91, + "grad_norm": 0.7683464884757996, + "learning_rate": 4.5563552813054466e-07, + "loss": 2.0031, + "step": 27206 + }, + { + "epoch": 0.91, + "grad_norm": 0.7548992037773132, + "learning_rate": 4.55318399639314e-07, + "loss": 2.0678, + "step": 27207 + }, + { + "epoch": 0.91, + "grad_norm": 0.7375810146331787, + "learning_rate": 4.550013789783025e-07, + "loss": 2.0553, + "step": 27208 + }, + { + "epoch": 0.91, + "grad_norm": 0.7473897933959961, + "learning_rate": 4.5468446615109294e-07, + "loss": 2.0127, + "step": 27209 + }, + { + "epoch": 0.91, + "grad_norm": 0.7527197003364563, + "learning_rate": 4.5436766116126353e-07, + "loss": 2.0616, + "step": 27210 + }, + { + "epoch": 0.91, + "grad_norm": 0.7621583342552185, + "learning_rate": 4.5405096401239354e-07, + "loss": 2.0477, + "step": 27211 + }, + { + "epoch": 0.91, + "grad_norm": 0.7665872573852539, + "learning_rate": 4.537343747080625e-07, + "loss": 1.9802, + "step": 27212 + }, + { + "epoch": 0.91, + "grad_norm": 0.7572036385536194, + "learning_rate": 4.5341789325184404e-07, + "loss": 2.0665, + "step": 27213 + }, + { + "epoch": 0.91, + "grad_norm": 0.745315432548523, + "learning_rate": 4.531015196473154e-07, + "loss": 2.0378, + "step": 27214 + }, + { + "epoch": 0.91, + "grad_norm": 0.7504767775535583, + "learning_rate": 4.527852538980526e-07, + "loss": 2.1323, + "step": 27215 + }, + { + "epoch": 0.91, + "grad_norm": 0.7375007271766663, + "learning_rate": 4.5246909600762613e-07, + "loss": 2.0619, + "step": 27216 + }, + { + "epoch": 0.91, + "grad_norm": 0.7386402487754822, + "learning_rate": 4.5215304597960754e-07, + "loss": 2.0014, + "step": 27217 + }, + { + "epoch": 0.91, + "grad_norm": 0.7384512424468994, + "learning_rate": 4.518371038175684e-07, + "loss": 2.0429, + "step": 27218 + }, + { + "epoch": 0.91, + "grad_norm": 0.7507937550544739, + "learning_rate": 4.5152126952508034e-07, + "loss": 2.0809, + "step": 27219 + }, + { + "epoch": 0.91, + "grad_norm": 0.7525652647018433, + "learning_rate": 4.5120554310570607e-07, + "loss": 1.9981, + "step": 27220 + }, + { + "epoch": 0.91, + "grad_norm": 0.7631418108940125, + "learning_rate": 4.5088992456301605e-07, + "loss": 2.0813, + "step": 27221 + }, + { + "epoch": 0.91, + "grad_norm": 0.7673131823539734, + "learning_rate": 4.5057441390057635e-07, + "loss": 2.109, + "step": 27222 + }, + { + "epoch": 0.91, + "grad_norm": 0.7537752389907837, + "learning_rate": 4.502590111219507e-07, + "loss": 2.1128, + "step": 27223 + }, + { + "epoch": 0.91, + "grad_norm": 0.7530593276023865, + "learning_rate": 4.4994371623069964e-07, + "loss": 2.0165, + "step": 27224 + }, + { + "epoch": 0.91, + "grad_norm": 0.7353842854499817, + "learning_rate": 4.4962852923039036e-07, + "loss": 2.0916, + "step": 27225 + }, + { + "epoch": 0.91, + "grad_norm": 0.7565382719039917, + "learning_rate": 4.493134501245788e-07, + "loss": 2.0762, + "step": 27226 + }, + { + "epoch": 0.91, + "grad_norm": 0.7596974968910217, + "learning_rate": 4.489984789168267e-07, + "loss": 2.0196, + "step": 27227 + }, + { + "epoch": 0.91, + "grad_norm": 0.7390725612640381, + "learning_rate": 4.486836156106944e-07, + "loss": 2.0399, + "step": 27228 + }, + { + "epoch": 0.91, + "grad_norm": 0.7634503841400146, + "learning_rate": 4.483688602097358e-07, + "loss": 2.1223, + "step": 27229 + }, + { + "epoch": 0.91, + "grad_norm": 0.735130786895752, + "learning_rate": 4.480542127175069e-07, + "loss": 2.0037, + "step": 27230 + }, + { + "epoch": 0.91, + "grad_norm": 0.7377784848213196, + "learning_rate": 4.4773967313756497e-07, + "loss": 2.074, + "step": 27231 + }, + { + "epoch": 0.91, + "grad_norm": 0.7616094350814819, + "learning_rate": 4.4742524147346254e-07, + "loss": 2.0392, + "step": 27232 + }, + { + "epoch": 0.91, + "grad_norm": 0.7815489172935486, + "learning_rate": 4.4711091772874917e-07, + "loss": 2.0044, + "step": 27233 + }, + { + "epoch": 0.91, + "grad_norm": 0.7499237656593323, + "learning_rate": 4.4679670190697853e-07, + "loss": 2.0618, + "step": 27234 + }, + { + "epoch": 0.91, + "grad_norm": 0.7361255884170532, + "learning_rate": 4.4648259401170123e-07, + "loss": 2.1037, + "step": 27235 + }, + { + "epoch": 0.91, + "grad_norm": 0.74874347448349, + "learning_rate": 4.461685940464644e-07, + "loss": 2.0326, + "step": 27236 + }, + { + "epoch": 0.91, + "grad_norm": 0.7651322484016418, + "learning_rate": 4.458547020148163e-07, + "loss": 2.0292, + "step": 27237 + }, + { + "epoch": 0.91, + "grad_norm": 0.7285462617874146, + "learning_rate": 4.455409179203019e-07, + "loss": 2.0134, + "step": 27238 + }, + { + "epoch": 0.91, + "grad_norm": 0.7600539326667786, + "learning_rate": 4.4522724176646605e-07, + "loss": 2.1237, + "step": 27239 + }, + { + "epoch": 0.91, + "grad_norm": 0.7835752964019775, + "learning_rate": 4.4491367355685266e-07, + "loss": 1.9904, + "step": 27240 + }, + { + "epoch": 0.91, + "grad_norm": 0.7645593285560608, + "learning_rate": 4.446002132950078e-07, + "loss": 2.0253, + "step": 27241 + }, + { + "epoch": 0.91, + "grad_norm": 0.7423568367958069, + "learning_rate": 4.442868609844675e-07, + "loss": 2.0818, + "step": 27242 + }, + { + "epoch": 0.91, + "grad_norm": 0.7289443016052246, + "learning_rate": 4.439736166287734e-07, + "loss": 2.0427, + "step": 27243 + }, + { + "epoch": 0.91, + "grad_norm": 0.78445965051651, + "learning_rate": 4.436604802314659e-07, + "loss": 1.9468, + "step": 27244 + }, + { + "epoch": 0.91, + "grad_norm": 0.7338494658470154, + "learning_rate": 4.433474517960812e-07, + "loss": 2.0992, + "step": 27245 + }, + { + "epoch": 0.91, + "grad_norm": 0.757244348526001, + "learning_rate": 4.430345313261575e-07, + "loss": 2.0941, + "step": 27246 + }, + { + "epoch": 0.91, + "grad_norm": 0.7690564393997192, + "learning_rate": 4.4272171882522865e-07, + "loss": 1.9839, + "step": 27247 + }, + { + "epoch": 0.91, + "grad_norm": 0.760442852973938, + "learning_rate": 4.4240901429682737e-07, + "loss": 2.0739, + "step": 27248 + }, + { + "epoch": 0.91, + "grad_norm": 0.7288614511489868, + "learning_rate": 4.4209641774448753e-07, + "loss": 2.0584, + "step": 27249 + }, + { + "epoch": 0.91, + "grad_norm": 0.7467079162597656, + "learning_rate": 4.4178392917174296e-07, + "loss": 2.0638, + "step": 27250 + }, + { + "epoch": 0.91, + "grad_norm": 0.7373058795928955, + "learning_rate": 4.414715485821208e-07, + "loss": 1.9754, + "step": 27251 + }, + { + "epoch": 0.91, + "grad_norm": 0.7578360438346863, + "learning_rate": 4.4115927597915053e-07, + "loss": 2.0406, + "step": 27252 + }, + { + "epoch": 0.91, + "grad_norm": 0.7696007490158081, + "learning_rate": 4.408471113663615e-07, + "loss": 2.0562, + "step": 27253 + }, + { + "epoch": 0.91, + "grad_norm": 0.7486810088157654, + "learning_rate": 4.4053505474728085e-07, + "loss": 2.0643, + "step": 27254 + }, + { + "epoch": 0.91, + "grad_norm": 0.7665430307388306, + "learning_rate": 4.402231061254303e-07, + "loss": 1.9978, + "step": 27255 + }, + { + "epoch": 0.91, + "grad_norm": 0.746160089969635, + "learning_rate": 4.399112655043369e-07, + "loss": 2.0626, + "step": 27256 + }, + { + "epoch": 0.91, + "grad_norm": 0.745145320892334, + "learning_rate": 4.3959953288752466e-07, + "loss": 1.9698, + "step": 27257 + }, + { + "epoch": 0.91, + "grad_norm": 0.7654933929443359, + "learning_rate": 4.3928790827851284e-07, + "loss": 2.0736, + "step": 27258 + }, + { + "epoch": 0.91, + "grad_norm": 0.7345343828201294, + "learning_rate": 4.389763916808232e-07, + "loss": 2.0881, + "step": 27259 + }, + { + "epoch": 0.91, + "grad_norm": 0.7512836456298828, + "learning_rate": 4.386649830979761e-07, + "loss": 2.007, + "step": 27260 + }, + { + "epoch": 0.91, + "grad_norm": 0.7301349639892578, + "learning_rate": 4.383536825334866e-07, + "loss": 2.0531, + "step": 27261 + }, + { + "epoch": 0.91, + "grad_norm": 0.749341607093811, + "learning_rate": 4.380424899908742e-07, + "loss": 2.0299, + "step": 27262 + }, + { + "epoch": 0.91, + "grad_norm": 0.7210928201675415, + "learning_rate": 4.377314054736559e-07, + "loss": 2.038, + "step": 27263 + }, + { + "epoch": 0.91, + "grad_norm": 0.7523465752601624, + "learning_rate": 4.3742042898534234e-07, + "loss": 2.017, + "step": 27264 + }, + { + "epoch": 0.91, + "grad_norm": 0.7519701719284058, + "learning_rate": 4.371095605294484e-07, + "loss": 2.0102, + "step": 27265 + }, + { + "epoch": 0.91, + "grad_norm": 0.7561206817626953, + "learning_rate": 4.3679880010948693e-07, + "loss": 2.1069, + "step": 27266 + }, + { + "epoch": 0.91, + "grad_norm": 0.7261016368865967, + "learning_rate": 4.364881477289673e-07, + "loss": 1.9687, + "step": 27267 + }, + { + "epoch": 0.91, + "grad_norm": 0.7484026551246643, + "learning_rate": 4.361776033914e-07, + "loss": 2.0595, + "step": 27268 + }, + { + "epoch": 0.91, + "grad_norm": 0.751403272151947, + "learning_rate": 4.358671671002945e-07, + "loss": 2.0661, + "step": 27269 + }, + { + "epoch": 0.91, + "grad_norm": 0.7374343276023865, + "learning_rate": 4.3555683885915466e-07, + "loss": 1.9217, + "step": 27270 + }, + { + "epoch": 0.91, + "grad_norm": 0.7459091544151306, + "learning_rate": 4.3524661867148875e-07, + "loss": 2.0409, + "step": 27271 + }, + { + "epoch": 0.91, + "grad_norm": 0.7699228525161743, + "learning_rate": 4.3493650654080177e-07, + "loss": 2.108, + "step": 27272 + }, + { + "epoch": 0.91, + "grad_norm": 0.7439596056938171, + "learning_rate": 4.3462650247059646e-07, + "loss": 2.0086, + "step": 27273 + }, + { + "epoch": 0.91, + "grad_norm": 0.7643613219261169, + "learning_rate": 4.3431660646437444e-07, + "loss": 2.0718, + "step": 27274 + }, + { + "epoch": 0.91, + "grad_norm": 0.7296448945999146, + "learning_rate": 4.3400681852563853e-07, + "loss": 1.9716, + "step": 27275 + }, + { + "epoch": 0.91, + "grad_norm": 0.7166428565979004, + "learning_rate": 4.3369713865788585e-07, + "loss": 2.1054, + "step": 27276 + }, + { + "epoch": 0.91, + "grad_norm": 0.7583434581756592, + "learning_rate": 4.3338756686461704e-07, + "loss": 2.0953, + "step": 27277 + }, + { + "epoch": 0.91, + "grad_norm": 0.7275704741477966, + "learning_rate": 4.3307810314932917e-07, + "loss": 1.9988, + "step": 27278 + }, + { + "epoch": 0.91, + "grad_norm": 0.7607447504997253, + "learning_rate": 4.327687475155184e-07, + "loss": 2.0739, + "step": 27279 + }, + { + "epoch": 0.91, + "grad_norm": 0.7460162043571472, + "learning_rate": 4.324594999666776e-07, + "loss": 2.1326, + "step": 27280 + }, + { + "epoch": 0.91, + "grad_norm": 0.737190306186676, + "learning_rate": 4.321503605063049e-07, + "loss": 2.0477, + "step": 27281 + }, + { + "epoch": 0.91, + "grad_norm": 0.7477969527244568, + "learning_rate": 4.318413291378887e-07, + "loss": 2.0461, + "step": 27282 + }, + { + "epoch": 0.91, + "grad_norm": 0.761574387550354, + "learning_rate": 4.3153240586492174e-07, + "loss": 2.0817, + "step": 27283 + }, + { + "epoch": 0.91, + "grad_norm": 0.7492398619651794, + "learning_rate": 4.3122359069089345e-07, + "loss": 2.0609, + "step": 27284 + }, + { + "epoch": 0.91, + "grad_norm": 0.7186716794967651, + "learning_rate": 4.3091488361929556e-07, + "loss": 2.0624, + "step": 27285 + }, + { + "epoch": 0.91, + "grad_norm": 0.7704103589057922, + "learning_rate": 4.306062846536108e-07, + "loss": 2.0544, + "step": 27286 + }, + { + "epoch": 0.91, + "grad_norm": 0.7424799203872681, + "learning_rate": 4.302977937973274e-07, + "loss": 2.0257, + "step": 27287 + }, + { + "epoch": 0.91, + "grad_norm": 0.7350695729255676, + "learning_rate": 4.2998941105393375e-07, + "loss": 1.9568, + "step": 27288 + }, + { + "epoch": 0.91, + "grad_norm": 0.7453460097312927, + "learning_rate": 4.2968113642690933e-07, + "loss": 2.0084, + "step": 27289 + }, + { + "epoch": 0.91, + "grad_norm": 0.7324519157409668, + "learning_rate": 4.293729699197391e-07, + "loss": 2.0134, + "step": 27290 + }, + { + "epoch": 0.91, + "grad_norm": 0.7294197678565979, + "learning_rate": 4.290649115359047e-07, + "loss": 2.0988, + "step": 27291 + }, + { + "epoch": 0.91, + "grad_norm": 0.7409774661064148, + "learning_rate": 4.2875696127888446e-07, + "loss": 2.0085, + "step": 27292 + }, + { + "epoch": 0.91, + "grad_norm": 0.7409294247627258, + "learning_rate": 4.284491191521589e-07, + "loss": 2.0331, + "step": 27293 + }, + { + "epoch": 0.91, + "grad_norm": 0.7285907864570618, + "learning_rate": 4.2814138515920753e-07, + "loss": 2.0122, + "step": 27294 + }, + { + "epoch": 0.91, + "grad_norm": 0.7287317514419556, + "learning_rate": 4.2783375930350426e-07, + "loss": 2.028, + "step": 27295 + }, + { + "epoch": 0.91, + "grad_norm": 0.75761479139328, + "learning_rate": 4.275262415885251e-07, + "loss": 2.0531, + "step": 27296 + }, + { + "epoch": 0.91, + "grad_norm": 0.7740064263343811, + "learning_rate": 4.2721883201774506e-07, + "loss": 2.0425, + "step": 27297 + }, + { + "epoch": 0.91, + "grad_norm": 0.7537700533866882, + "learning_rate": 4.2691153059463475e-07, + "loss": 2.0344, + "step": 27298 + }, + { + "epoch": 0.91, + "grad_norm": 0.7413243055343628, + "learning_rate": 4.2660433732266913e-07, + "loss": 2.0943, + "step": 27299 + }, + { + "epoch": 0.91, + "grad_norm": 0.7568277716636658, + "learning_rate": 4.262972522053166e-07, + "loss": 2.0627, + "step": 27300 + }, + { + "epoch": 0.91, + "grad_norm": 0.7404831647872925, + "learning_rate": 4.259902752460476e-07, + "loss": 2.0784, + "step": 27301 + }, + { + "epoch": 0.91, + "grad_norm": 0.7655001878738403, + "learning_rate": 4.256834064483284e-07, + "loss": 1.9952, + "step": 27302 + }, + { + "epoch": 0.91, + "grad_norm": 0.7347220778465271, + "learning_rate": 4.253766458156283e-07, + "loss": 2.0191, + "step": 27303 + }, + { + "epoch": 0.91, + "grad_norm": 0.7642946243286133, + "learning_rate": 4.250699933514113e-07, + "loss": 2.0073, + "step": 27304 + }, + { + "epoch": 0.91, + "grad_norm": 0.7280412912368774, + "learning_rate": 4.247634490591401e-07, + "loss": 2.0404, + "step": 27305 + }, + { + "epoch": 0.91, + "grad_norm": 0.7524710893630981, + "learning_rate": 4.2445701294228095e-07, + "loss": 2.1265, + "step": 27306 + }, + { + "epoch": 0.91, + "grad_norm": 0.7413941621780396, + "learning_rate": 4.2415068500429646e-07, + "loss": 2.0014, + "step": 27307 + }, + { + "epoch": 0.91, + "grad_norm": 0.7534601092338562, + "learning_rate": 4.23844465248644e-07, + "loss": 2.0985, + "step": 27308 + }, + { + "epoch": 0.91, + "grad_norm": 0.7424414753913879, + "learning_rate": 4.2353835367878404e-07, + "loss": 2.0539, + "step": 27309 + }, + { + "epoch": 0.91, + "grad_norm": 0.7325050234794617, + "learning_rate": 4.232323502981772e-07, + "loss": 1.9935, + "step": 27310 + }, + { + "epoch": 0.91, + "grad_norm": 0.7339297533035278, + "learning_rate": 4.229264551102763e-07, + "loss": 2.036, + "step": 27311 + }, + { + "epoch": 0.91, + "grad_norm": 0.7558871507644653, + "learning_rate": 4.2262066811854183e-07, + "loss": 2.0183, + "step": 27312 + }, + { + "epoch": 0.91, + "grad_norm": 0.7621297836303711, + "learning_rate": 4.2231498932642555e-07, + "loss": 2.014, + "step": 27313 + }, + { + "epoch": 0.91, + "grad_norm": 0.7755623459815979, + "learning_rate": 4.220094187373802e-07, + "loss": 2.0973, + "step": 27314 + }, + { + "epoch": 0.91, + "grad_norm": 0.7383571863174438, + "learning_rate": 4.217039563548597e-07, + "loss": 2.0071, + "step": 27315 + }, + { + "epoch": 0.91, + "grad_norm": 0.7490650415420532, + "learning_rate": 4.2139860218231575e-07, + "loss": 2.1004, + "step": 27316 + }, + { + "epoch": 0.91, + "grad_norm": 0.7200995683670044, + "learning_rate": 4.210933562231967e-07, + "loss": 2.0263, + "step": 27317 + }, + { + "epoch": 0.91, + "grad_norm": 0.7488175630569458, + "learning_rate": 4.2078821848094975e-07, + "loss": 2.0978, + "step": 27318 + }, + { + "epoch": 0.91, + "grad_norm": 0.768964946269989, + "learning_rate": 4.204831889590244e-07, + "loss": 2.0848, + "step": 27319 + }, + { + "epoch": 0.91, + "grad_norm": 0.7332388162612915, + "learning_rate": 4.2017826766086454e-07, + "loss": 2.0546, + "step": 27320 + }, + { + "epoch": 0.91, + "grad_norm": 0.7552720308303833, + "learning_rate": 4.1987345458991747e-07, + "loss": 2.0823, + "step": 27321 + }, + { + "epoch": 0.91, + "grad_norm": 0.7609044909477234, + "learning_rate": 4.195687497496248e-07, + "loss": 2.0337, + "step": 27322 + }, + { + "epoch": 0.91, + "grad_norm": 0.7537041902542114, + "learning_rate": 4.192641531434316e-07, + "loss": 1.9834, + "step": 27323 + }, + { + "epoch": 0.91, + "grad_norm": 0.7487196922302246, + "learning_rate": 4.1895966477477515e-07, + "loss": 2.0635, + "step": 27324 + }, + { + "epoch": 0.91, + "grad_norm": 0.7514746189117432, + "learning_rate": 4.1865528464709814e-07, + "loss": 2.0596, + "step": 27325 + }, + { + "epoch": 0.91, + "grad_norm": 0.74385005235672, + "learning_rate": 4.1835101276383907e-07, + "loss": 2.0887, + "step": 27326 + }, + { + "epoch": 0.91, + "grad_norm": 0.7372734546661377, + "learning_rate": 4.18046849128434e-07, + "loss": 2.0712, + "step": 27327 + }, + { + "epoch": 0.91, + "grad_norm": 0.7318451404571533, + "learning_rate": 4.1774279374431903e-07, + "loss": 2.03, + "step": 27328 + }, + { + "epoch": 0.91, + "grad_norm": 0.7508850693702698, + "learning_rate": 4.1743884661493264e-07, + "loss": 2.0176, + "step": 27329 + }, + { + "epoch": 0.91, + "grad_norm": 0.7809758186340332, + "learning_rate": 4.171350077437053e-07, + "loss": 2.1629, + "step": 27330 + }, + { + "epoch": 0.91, + "grad_norm": 0.7604416012763977, + "learning_rate": 4.168312771340699e-07, + "loss": 2.1007, + "step": 27331 + }, + { + "epoch": 0.91, + "grad_norm": 0.7466316223144531, + "learning_rate": 4.165276547894592e-07, + "loss": 2.0649, + "step": 27332 + }, + { + "epoch": 0.91, + "grad_norm": 0.7419121861457825, + "learning_rate": 4.162241407133016e-07, + "loss": 2.0836, + "step": 27333 + }, + { + "epoch": 0.91, + "grad_norm": 0.7313079833984375, + "learning_rate": 4.159207349090277e-07, + "loss": 1.9826, + "step": 27334 + }, + { + "epoch": 0.91, + "grad_norm": 0.7823788523674011, + "learning_rate": 4.156174373800648e-07, + "loss": 2.0522, + "step": 27335 + }, + { + "epoch": 0.91, + "grad_norm": 0.7896131277084351, + "learning_rate": 4.153142481298389e-07, + "loss": 2.1371, + "step": 27336 + }, + { + "epoch": 0.91, + "grad_norm": 0.7621791362762451, + "learning_rate": 4.1501116716177515e-07, + "loss": 1.9975, + "step": 27337 + }, + { + "epoch": 0.91, + "grad_norm": 0.7754830121994019, + "learning_rate": 4.1470819447929857e-07, + "loss": 2.0859, + "step": 27338 + }, + { + "epoch": 0.91, + "grad_norm": 0.7694870829582214, + "learning_rate": 4.144053300858308e-07, + "loss": 2.0778, + "step": 27339 + }, + { + "epoch": 0.91, + "grad_norm": 0.7369939684867859, + "learning_rate": 4.141025739847937e-07, + "loss": 1.9806, + "step": 27340 + }, + { + "epoch": 0.91, + "grad_norm": 0.7189335227012634, + "learning_rate": 4.137999261796066e-07, + "loss": 2.0064, + "step": 27341 + }, + { + "epoch": 0.91, + "grad_norm": 0.755294144153595, + "learning_rate": 4.1349738667369246e-07, + "loss": 2.0422, + "step": 27342 + }, + { + "epoch": 0.91, + "grad_norm": 0.7233870625495911, + "learning_rate": 4.1319495547046506e-07, + "loss": 2.0535, + "step": 27343 + }, + { + "epoch": 0.91, + "grad_norm": 0.7535879015922546, + "learning_rate": 4.128926325733429e-07, + "loss": 2.019, + "step": 27344 + }, + { + "epoch": 0.91, + "grad_norm": 0.7583195567131042, + "learning_rate": 4.1259041798574205e-07, + "loss": 2.089, + "step": 27345 + }, + { + "epoch": 0.91, + "grad_norm": 0.7333357930183411, + "learning_rate": 4.122883117110743e-07, + "loss": 2.0201, + "step": 27346 + }, + { + "epoch": 0.91, + "grad_norm": 0.7266172170639038, + "learning_rate": 4.119863137527558e-07, + "loss": 2.0556, + "step": 27347 + }, + { + "epoch": 0.91, + "grad_norm": 0.7337165474891663, + "learning_rate": 4.11684424114196e-07, + "loss": 1.9964, + "step": 27348 + }, + { + "epoch": 0.91, + "grad_norm": 0.7413678169250488, + "learning_rate": 4.1138264279880546e-07, + "loss": 1.9812, + "step": 27349 + }, + { + "epoch": 0.91, + "grad_norm": 0.7808709740638733, + "learning_rate": 4.110809698099949e-07, + "loss": 2.0321, + "step": 27350 + }, + { + "epoch": 0.91, + "grad_norm": 0.7807044386863708, + "learning_rate": 4.1077940515117266e-07, + "loss": 2.0957, + "step": 27351 + }, + { + "epoch": 0.91, + "grad_norm": 0.7442800998687744, + "learning_rate": 4.1047794882574487e-07, + "loss": 2.0303, + "step": 27352 + }, + { + "epoch": 0.91, + "grad_norm": 0.7498773336410522, + "learning_rate": 4.101766008371155e-07, + "loss": 2.1079, + "step": 27353 + }, + { + "epoch": 0.91, + "grad_norm": 0.7489928007125854, + "learning_rate": 4.0987536118869297e-07, + "loss": 2.0257, + "step": 27354 + }, + { + "epoch": 0.91, + "grad_norm": 0.766908586025238, + "learning_rate": 4.0957422988387673e-07, + "loss": 1.9821, + "step": 27355 + }, + { + "epoch": 0.91, + "grad_norm": 0.741818368434906, + "learning_rate": 4.0927320692607075e-07, + "loss": 2.0424, + "step": 27356 + }, + { + "epoch": 0.91, + "grad_norm": 0.7340573668479919, + "learning_rate": 4.0897229231867454e-07, + "loss": 2.0816, + "step": 27357 + }, + { + "epoch": 0.91, + "grad_norm": 0.7337018251419067, + "learning_rate": 4.086714860650909e-07, + "loss": 2.0692, + "step": 27358 + }, + { + "epoch": 0.91, + "grad_norm": 0.7399473190307617, + "learning_rate": 4.0837078816871265e-07, + "loss": 2.088, + "step": 27359 + }, + { + "epoch": 0.91, + "grad_norm": 0.7470431923866272, + "learning_rate": 4.0807019863294273e-07, + "loss": 2.0544, + "step": 27360 + }, + { + "epoch": 0.91, + "grad_norm": 0.7440586686134338, + "learning_rate": 4.077697174611728e-07, + "loss": 2.0089, + "step": 27361 + }, + { + "epoch": 0.91, + "grad_norm": 0.7290531396865845, + "learning_rate": 4.0746934465679897e-07, + "loss": 2.0375, + "step": 27362 + }, + { + "epoch": 0.91, + "grad_norm": 0.7470327615737915, + "learning_rate": 4.071690802232142e-07, + "loss": 2.0602, + "step": 27363 + }, + { + "epoch": 0.91, + "grad_norm": 0.7562233805656433, + "learning_rate": 4.068689241638124e-07, + "loss": 2.0611, + "step": 27364 + }, + { + "epoch": 0.91, + "grad_norm": 0.7457106709480286, + "learning_rate": 4.0656887648198416e-07, + "loss": 2.0812, + "step": 27365 + }, + { + "epoch": 0.91, + "grad_norm": 0.732226550579071, + "learning_rate": 4.062689371811157e-07, + "loss": 2.1268, + "step": 27366 + }, + { + "epoch": 0.91, + "grad_norm": 0.7533044219017029, + "learning_rate": 4.0596910626460093e-07, + "loss": 2.0027, + "step": 27367 + }, + { + "epoch": 0.91, + "grad_norm": 0.7608362436294556, + "learning_rate": 4.0566938373582275e-07, + "loss": 2.1373, + "step": 27368 + }, + { + "epoch": 0.91, + "grad_norm": 0.7218877673149109, + "learning_rate": 4.0536976959817064e-07, + "loss": 2.0762, + "step": 27369 + }, + { + "epoch": 0.91, + "grad_norm": 0.741619348526001, + "learning_rate": 4.0507026385502747e-07, + "loss": 2.0588, + "step": 27370 + }, + { + "epoch": 0.91, + "grad_norm": 0.7445287108421326, + "learning_rate": 4.0477086650977604e-07, + "loss": 2.0242, + "step": 27371 + }, + { + "epoch": 0.91, + "grad_norm": 0.7644695043563843, + "learning_rate": 4.0447157756580035e-07, + "loss": 2.0634, + "step": 27372 + }, + { + "epoch": 0.91, + "grad_norm": 0.7628416419029236, + "learning_rate": 4.041723970264821e-07, + "loss": 2.0781, + "step": 27373 + }, + { + "epoch": 0.91, + "grad_norm": 0.743776261806488, + "learning_rate": 4.038733248951998e-07, + "loss": 2.0621, + "step": 27374 + }, + { + "epoch": 0.91, + "grad_norm": 0.7691857814788818, + "learning_rate": 4.035743611753329e-07, + "loss": 2.0385, + "step": 27375 + }, + { + "epoch": 0.91, + "grad_norm": 0.7516829371452332, + "learning_rate": 4.0327550587025864e-07, + "loss": 2.01, + "step": 27376 + }, + { + "epoch": 0.91, + "grad_norm": 0.7452287077903748, + "learning_rate": 4.0297675898335334e-07, + "loss": 2.0465, + "step": 27377 + }, + { + "epoch": 0.91, + "grad_norm": 0.7835694551467896, + "learning_rate": 4.0267812051799306e-07, + "loss": 2.0722, + "step": 27378 + }, + { + "epoch": 0.91, + "grad_norm": 0.7340983748435974, + "learning_rate": 4.023795904775496e-07, + "loss": 2.0084, + "step": 27379 + }, + { + "epoch": 0.91, + "grad_norm": 0.7549338936805725, + "learning_rate": 4.0208116886539806e-07, + "loss": 2.0562, + "step": 27380 + }, + { + "epoch": 0.91, + "grad_norm": 0.7653358578681946, + "learning_rate": 4.0178285568490795e-07, + "loss": 2.0128, + "step": 27381 + }, + { + "epoch": 0.91, + "grad_norm": 0.7582991719245911, + "learning_rate": 4.014846509394499e-07, + "loss": 2.0633, + "step": 27382 + }, + { + "epoch": 0.91, + "grad_norm": 0.7238643765449524, + "learning_rate": 4.0118655463239453e-07, + "loss": 2.0409, + "step": 27383 + }, + { + "epoch": 0.91, + "grad_norm": 0.7570232152938843, + "learning_rate": 4.008885667671059e-07, + "loss": 2.0149, + "step": 27384 + }, + { + "epoch": 0.91, + "grad_norm": 0.7459805011749268, + "learning_rate": 4.005906873469523e-07, + "loss": 2.0786, + "step": 27385 + }, + { + "epoch": 0.91, + "grad_norm": 0.7322351932525635, + "learning_rate": 4.002929163753011e-07, + "loss": 1.966, + "step": 27386 + }, + { + "epoch": 0.91, + "grad_norm": 0.7392884492874146, + "learning_rate": 3.999952538555141e-07, + "loss": 2.0252, + "step": 27387 + }, + { + "epoch": 0.91, + "grad_norm": 0.7593870162963867, + "learning_rate": 3.99697699790953e-07, + "loss": 2.1258, + "step": 27388 + }, + { + "epoch": 0.91, + "grad_norm": 0.7252902388572693, + "learning_rate": 3.9940025418498285e-07, + "loss": 2.0032, + "step": 27389 + }, + { + "epoch": 0.91, + "grad_norm": 0.7523022294044495, + "learning_rate": 3.9910291704096104e-07, + "loss": 2.0152, + "step": 27390 + }, + { + "epoch": 0.91, + "grad_norm": 0.713740348815918, + "learning_rate": 3.9880568836224707e-07, + "loss": 2.0865, + "step": 27391 + }, + { + "epoch": 0.91, + "grad_norm": 0.7317163944244385, + "learning_rate": 3.9850856815220275e-07, + "loss": 2.0257, + "step": 27392 + }, + { + "epoch": 0.91, + "grad_norm": 0.7534809112548828, + "learning_rate": 3.982115564141786e-07, + "loss": 2.0585, + "step": 27393 + }, + { + "epoch": 0.91, + "grad_norm": 0.7478227615356445, + "learning_rate": 3.9791465315153434e-07, + "loss": 2.0349, + "step": 27394 + }, + { + "epoch": 0.91, + "grad_norm": 0.7133371233940125, + "learning_rate": 3.9761785836762267e-07, + "loss": 2.0511, + "step": 27395 + }, + { + "epoch": 0.91, + "grad_norm": 0.7364675998687744, + "learning_rate": 3.9732117206579765e-07, + "loss": 2.063, + "step": 27396 + }, + { + "epoch": 0.91, + "grad_norm": 0.7627322673797607, + "learning_rate": 3.970245942494089e-07, + "loss": 2.1047, + "step": 27397 + }, + { + "epoch": 0.91, + "grad_norm": 0.7295727729797363, + "learning_rate": 3.967281249218091e-07, + "loss": 2.0218, + "step": 27398 + }, + { + "epoch": 0.91, + "grad_norm": 0.7716072201728821, + "learning_rate": 3.9643176408634686e-07, + "loss": 2.0934, + "step": 27399 + }, + { + "epoch": 0.91, + "grad_norm": 0.7506695985794067, + "learning_rate": 3.9613551174637057e-07, + "loss": 2.0594, + "step": 27400 + }, + { + "epoch": 0.91, + "grad_norm": 0.7332156896591187, + "learning_rate": 3.9583936790522523e-07, + "loss": 2.0604, + "step": 27401 + }, + { + "epoch": 0.91, + "grad_norm": 0.7541760206222534, + "learning_rate": 3.9554333256626055e-07, + "loss": 2.03, + "step": 27402 + }, + { + "epoch": 0.91, + "grad_norm": 0.7483181953430176, + "learning_rate": 3.9524740573281707e-07, + "loss": 2.1044, + "step": 27403 + }, + { + "epoch": 0.91, + "grad_norm": 0.7564271092414856, + "learning_rate": 3.9495158740823993e-07, + "loss": 1.9896, + "step": 27404 + }, + { + "epoch": 0.91, + "grad_norm": 0.7870551347732544, + "learning_rate": 3.946558775958709e-07, + "loss": 2.1267, + "step": 27405 + }, + { + "epoch": 0.91, + "grad_norm": 0.7321218848228455, + "learning_rate": 3.9436027629904947e-07, + "loss": 1.9923, + "step": 27406 + }, + { + "epoch": 0.91, + "grad_norm": 0.7440309524536133, + "learning_rate": 3.9406478352111644e-07, + "loss": 2.0238, + "step": 27407 + }, + { + "epoch": 0.91, + "grad_norm": 0.72625333070755, + "learning_rate": 3.9376939926541015e-07, + "loss": 2.0341, + "step": 27408 + }, + { + "epoch": 0.91, + "grad_norm": 0.7353745698928833, + "learning_rate": 3.9347412353526793e-07, + "loss": 2.1006, + "step": 27409 + }, + { + "epoch": 0.91, + "grad_norm": 0.7492586970329285, + "learning_rate": 3.931789563340249e-07, + "loss": 2.0836, + "step": 27410 + }, + { + "epoch": 0.91, + "grad_norm": 0.7198688983917236, + "learning_rate": 3.928838976650162e-07, + "loss": 2.0569, + "step": 27411 + }, + { + "epoch": 0.91, + "grad_norm": 0.7719637155532837, + "learning_rate": 3.925889475315736e-07, + "loss": 2.0878, + "step": 27412 + }, + { + "epoch": 0.91, + "grad_norm": 0.8289550542831421, + "learning_rate": 3.9229410593703e-07, + "loss": 2.1032, + "step": 27413 + }, + { + "epoch": 0.91, + "grad_norm": 0.7269850373268127, + "learning_rate": 3.919993728847205e-07, + "loss": 2.0151, + "step": 27414 + }, + { + "epoch": 0.91, + "grad_norm": 0.7554654479026794, + "learning_rate": 3.917047483779679e-07, + "loss": 2.0866, + "step": 27415 + }, + { + "epoch": 0.91, + "grad_norm": 0.7510951161384583, + "learning_rate": 3.914102324201041e-07, + "loss": 2.0472, + "step": 27416 + }, + { + "epoch": 0.91, + "grad_norm": 0.7514237761497498, + "learning_rate": 3.9111582501445755e-07, + "loss": 2.023, + "step": 27417 + }, + { + "epoch": 0.91, + "grad_norm": 0.7676913738250732, + "learning_rate": 3.9082152616435333e-07, + "loss": 2.0727, + "step": 27418 + }, + { + "epoch": 0.91, + "grad_norm": 0.7341684699058533, + "learning_rate": 3.9052733587311543e-07, + "loss": 2.0779, + "step": 27419 + }, + { + "epoch": 0.91, + "grad_norm": 0.7579126358032227, + "learning_rate": 3.902332541440679e-07, + "loss": 2.0354, + "step": 27420 + }, + { + "epoch": 0.91, + "grad_norm": 0.7665988206863403, + "learning_rate": 3.899392809805325e-07, + "loss": 2.0964, + "step": 27421 + }, + { + "epoch": 0.91, + "grad_norm": 0.7308052778244019, + "learning_rate": 3.896454163858321e-07, + "loss": 2.0097, + "step": 27422 + }, + { + "epoch": 0.91, + "grad_norm": 0.7163789868354797, + "learning_rate": 3.893516603632852e-07, + "loss": 1.9979, + "step": 27423 + }, + { + "epoch": 0.91, + "grad_norm": 0.7743442058563232, + "learning_rate": 3.8905801291621135e-07, + "loss": 2.0654, + "step": 27424 + }, + { + "epoch": 0.91, + "grad_norm": 0.7607342600822449, + "learning_rate": 3.8876447404792684e-07, + "loss": 2.0592, + "step": 27425 + }, + { + "epoch": 0.91, + "grad_norm": 0.7724065780639648, + "learning_rate": 3.884710437617478e-07, + "loss": 2.0487, + "step": 27426 + }, + { + "epoch": 0.91, + "grad_norm": 0.703120231628418, + "learning_rate": 3.881777220609928e-07, + "loss": 1.9502, + "step": 27427 + }, + { + "epoch": 0.91, + "grad_norm": 0.7442044019699097, + "learning_rate": 3.8788450894897024e-07, + "loss": 1.9645, + "step": 27428 + }, + { + "epoch": 0.91, + "grad_norm": 0.7330692410469055, + "learning_rate": 3.8759140442899524e-07, + "loss": 2.046, + "step": 27429 + }, + { + "epoch": 0.91, + "grad_norm": 0.732552170753479, + "learning_rate": 3.872984085043807e-07, + "loss": 2.0579, + "step": 27430 + }, + { + "epoch": 0.91, + "grad_norm": 0.7150179743766785, + "learning_rate": 3.8700552117843514e-07, + "loss": 2.059, + "step": 27431 + }, + { + "epoch": 0.91, + "grad_norm": 0.7221666574478149, + "learning_rate": 3.867127424544659e-07, + "loss": 2.0559, + "step": 27432 + }, + { + "epoch": 0.91, + "grad_norm": 0.7662800550460815, + "learning_rate": 3.864200723357836e-07, + "loss": 2.0051, + "step": 27433 + }, + { + "epoch": 0.91, + "grad_norm": 0.7387768030166626, + "learning_rate": 3.861275108256912e-07, + "loss": 1.9618, + "step": 27434 + }, + { + "epoch": 0.91, + "grad_norm": 0.7429881691932678, + "learning_rate": 3.858350579274972e-07, + "loss": 2.0201, + "step": 27435 + }, + { + "epoch": 0.91, + "grad_norm": 0.7195424437522888, + "learning_rate": 3.8554271364450445e-07, + "loss": 2.0139, + "step": 27436 + }, + { + "epoch": 0.91, + "grad_norm": 0.7355684638023376, + "learning_rate": 3.852504779800159e-07, + "loss": 2.0776, + "step": 27437 + }, + { + "epoch": 0.91, + "grad_norm": 0.7297601103782654, + "learning_rate": 3.8495835093733113e-07, + "loss": 2.0294, + "step": 27438 + }, + { + "epoch": 0.91, + "grad_norm": 0.7359074950218201, + "learning_rate": 3.846663325197542e-07, + "loss": 2.0259, + "step": 27439 + }, + { + "epoch": 0.91, + "grad_norm": 0.7576746940612793, + "learning_rate": 3.843744227305812e-07, + "loss": 2.0033, + "step": 27440 + }, + { + "epoch": 0.91, + "grad_norm": 0.7370162606239319, + "learning_rate": 3.840826215731086e-07, + "loss": 2.0903, + "step": 27441 + }, + { + "epoch": 0.91, + "grad_norm": 0.7371616363525391, + "learning_rate": 3.8379092905063585e-07, + "loss": 2.0553, + "step": 27442 + }, + { + "epoch": 0.91, + "grad_norm": 0.7390612363815308, + "learning_rate": 3.8349934516645925e-07, + "loss": 2.003, + "step": 27443 + }, + { + "epoch": 0.91, + "grad_norm": 0.7552176117897034, + "learning_rate": 3.832078699238695e-07, + "loss": 2.0444, + "step": 27444 + }, + { + "epoch": 0.91, + "grad_norm": 0.7280820608139038, + "learning_rate": 3.829165033261628e-07, + "loss": 2.0326, + "step": 27445 + }, + { + "epoch": 0.91, + "grad_norm": 0.7386280298233032, + "learning_rate": 3.826252453766288e-07, + "loss": 2.0001, + "step": 27446 + }, + { + "epoch": 0.91, + "grad_norm": 0.7406368851661682, + "learning_rate": 3.823340960785571e-07, + "loss": 2.0674, + "step": 27447 + }, + { + "epoch": 0.91, + "grad_norm": 0.7545284628868103, + "learning_rate": 3.8204305543523837e-07, + "loss": 2.0631, + "step": 27448 + }, + { + "epoch": 0.91, + "grad_norm": 0.7496291399002075, + "learning_rate": 3.817521234499633e-07, + "loss": 2.0538, + "step": 27449 + }, + { + "epoch": 0.91, + "grad_norm": 0.757919430732727, + "learning_rate": 3.8146130012601365e-07, + "loss": 2.045, + "step": 27450 + }, + { + "epoch": 0.91, + "grad_norm": 0.7482235431671143, + "learning_rate": 3.8117058546667695e-07, + "loss": 2.0972, + "step": 27451 + }, + { + "epoch": 0.91, + "grad_norm": 0.7504217028617859, + "learning_rate": 3.8087997947523825e-07, + "loss": 2.0438, + "step": 27452 + }, + { + "epoch": 0.91, + "grad_norm": 0.7346357703208923, + "learning_rate": 3.805894821549805e-07, + "loss": 2.0221, + "step": 27453 + }, + { + "epoch": 0.91, + "grad_norm": 0.7408124804496765, + "learning_rate": 3.8029909350918547e-07, + "loss": 2.0262, + "step": 27454 + }, + { + "epoch": 0.91, + "grad_norm": 0.732194721698761, + "learning_rate": 3.8000881354113283e-07, + "loss": 2.0441, + "step": 27455 + }, + { + "epoch": 0.91, + "grad_norm": 0.732491135597229, + "learning_rate": 3.797186422541033e-07, + "loss": 2.0155, + "step": 27456 + }, + { + "epoch": 0.91, + "grad_norm": 0.7348154187202454, + "learning_rate": 3.7942857965137303e-07, + "loss": 2.0705, + "step": 27457 + }, + { + "epoch": 0.91, + "grad_norm": 0.7364615797996521, + "learning_rate": 3.7913862573622286e-07, + "loss": 2.0768, + "step": 27458 + }, + { + "epoch": 0.91, + "grad_norm": 0.7435455918312073, + "learning_rate": 3.7884878051192565e-07, + "loss": 2.0903, + "step": 27459 + }, + { + "epoch": 0.91, + "grad_norm": 0.7162229418754578, + "learning_rate": 3.785590439817544e-07, + "loss": 2.0346, + "step": 27460 + }, + { + "epoch": 0.91, + "grad_norm": 0.7298769354820251, + "learning_rate": 3.782694161489864e-07, + "loss": 2.0202, + "step": 27461 + }, + { + "epoch": 0.91, + "grad_norm": 0.730826199054718, + "learning_rate": 3.779798970168913e-07, + "loss": 2.0027, + "step": 27462 + }, + { + "epoch": 0.91, + "grad_norm": 0.7588793635368347, + "learning_rate": 3.7769048658873984e-07, + "loss": 2.0646, + "step": 27463 + }, + { + "epoch": 0.91, + "grad_norm": 0.7409497499465942, + "learning_rate": 3.7740118486780054e-07, + "loss": 2.0698, + "step": 27464 + }, + { + "epoch": 0.91, + "grad_norm": 0.7720558047294617, + "learning_rate": 3.7711199185734514e-07, + "loss": 2.0398, + "step": 27465 + }, + { + "epoch": 0.91, + "grad_norm": 0.7268298864364624, + "learning_rate": 3.768229075606389e-07, + "loss": 2.0294, + "step": 27466 + }, + { + "epoch": 0.91, + "grad_norm": 0.736134946346283, + "learning_rate": 3.765339319809469e-07, + "loss": 2.0648, + "step": 27467 + }, + { + "epoch": 0.91, + "grad_norm": 0.7300928831100464, + "learning_rate": 3.7624506512153656e-07, + "loss": 1.9775, + "step": 27468 + }, + { + "epoch": 0.91, + "grad_norm": 0.7649882435798645, + "learning_rate": 3.7595630698566644e-07, + "loss": 1.9447, + "step": 27469 + }, + { + "epoch": 0.91, + "grad_norm": 0.7762765288352966, + "learning_rate": 3.7566765757660275e-07, + "loss": 2.0443, + "step": 27470 + }, + { + "epoch": 0.91, + "grad_norm": 0.7147378921508789, + "learning_rate": 3.753791168976073e-07, + "loss": 2.0476, + "step": 27471 + }, + { + "epoch": 0.91, + "grad_norm": 0.7329960465431213, + "learning_rate": 3.7509068495193644e-07, + "loss": 2.0488, + "step": 27472 + }, + { + "epoch": 0.91, + "grad_norm": 0.760723888874054, + "learning_rate": 3.748023617428498e-07, + "loss": 2.0506, + "step": 27473 + }, + { + "epoch": 0.91, + "grad_norm": 0.7451719641685486, + "learning_rate": 3.745141472736058e-07, + "loss": 2.0656, + "step": 27474 + }, + { + "epoch": 0.91, + "grad_norm": 0.7724258899688721, + "learning_rate": 3.7422604154745965e-07, + "loss": 2.0151, + "step": 27475 + }, + { + "epoch": 0.91, + "grad_norm": 0.7436041831970215, + "learning_rate": 3.739380445676677e-07, + "loss": 1.9797, + "step": 27476 + }, + { + "epoch": 0.91, + "grad_norm": 0.7287425994873047, + "learning_rate": 3.736501563374817e-07, + "loss": 1.998, + "step": 27477 + }, + { + "epoch": 0.91, + "grad_norm": 0.7354134321212769, + "learning_rate": 3.7336237686015353e-07, + "loss": 2.0845, + "step": 27478 + }, + { + "epoch": 0.91, + "grad_norm": 0.7672419548034668, + "learning_rate": 3.73074706138935e-07, + "loss": 2.0152, + "step": 27479 + }, + { + "epoch": 0.91, + "grad_norm": 0.7639938592910767, + "learning_rate": 3.7278714417707807e-07, + "loss": 2.0754, + "step": 27480 + }, + { + "epoch": 0.91, + "grad_norm": 0.7598233819007874, + "learning_rate": 3.7249969097783e-07, + "loss": 2.0479, + "step": 27481 + }, + { + "epoch": 0.91, + "grad_norm": 0.7292041182518005, + "learning_rate": 3.7221234654443716e-07, + "loss": 2.0923, + "step": 27482 + }, + { + "epoch": 0.91, + "grad_norm": 0.744170606136322, + "learning_rate": 3.7192511088014804e-07, + "loss": 2.0461, + "step": 27483 + }, + { + "epoch": 0.91, + "grad_norm": 0.7313699126243591, + "learning_rate": 3.716379839882045e-07, + "loss": 2.0563, + "step": 27484 + }, + { + "epoch": 0.91, + "grad_norm": 0.7566964626312256, + "learning_rate": 3.7135096587185393e-07, + "loss": 2.0097, + "step": 27485 + }, + { + "epoch": 0.91, + "grad_norm": 0.733971893787384, + "learning_rate": 3.7106405653433595e-07, + "loss": 2.0431, + "step": 27486 + }, + { + "epoch": 0.91, + "grad_norm": 0.7231170535087585, + "learning_rate": 3.707772559788947e-07, + "loss": 2.0472, + "step": 27487 + }, + { + "epoch": 0.91, + "grad_norm": 0.7284144759178162, + "learning_rate": 3.7049056420876637e-07, + "loss": 2.0215, + "step": 27488 + }, + { + "epoch": 0.91, + "grad_norm": 0.7388703227043152, + "learning_rate": 3.7020398122719516e-07, + "loss": 2.0203, + "step": 27489 + }, + { + "epoch": 0.91, + "grad_norm": 0.7505077123641968, + "learning_rate": 3.699175070374139e-07, + "loss": 2.0528, + "step": 27490 + }, + { + "epoch": 0.91, + "grad_norm": 0.7713919281959534, + "learning_rate": 3.696311416426612e-07, + "loss": 2.0284, + "step": 27491 + }, + { + "epoch": 0.91, + "grad_norm": 0.7308657169342041, + "learning_rate": 3.6934488504617117e-07, + "loss": 2.0246, + "step": 27492 + }, + { + "epoch": 0.91, + "grad_norm": 0.7289283275604248, + "learning_rate": 3.6905873725118113e-07, + "loss": 2.0185, + "step": 27493 + }, + { + "epoch": 0.91, + "grad_norm": 0.7859716415405273, + "learning_rate": 3.687726982609185e-07, + "loss": 2.0085, + "step": 27494 + }, + { + "epoch": 0.91, + "grad_norm": 0.7605398297309875, + "learning_rate": 3.684867680786175e-07, + "loss": 2.0456, + "step": 27495 + }, + { + "epoch": 0.91, + "grad_norm": 0.7533289790153503, + "learning_rate": 3.6820094670750986e-07, + "loss": 2.0315, + "step": 27496 + }, + { + "epoch": 0.91, + "grad_norm": 0.7487311959266663, + "learning_rate": 3.679152341508219e-07, + "loss": 2.0796, + "step": 27497 + }, + { + "epoch": 0.91, + "grad_norm": 0.7333808541297913, + "learning_rate": 3.676296304117832e-07, + "loss": 2.0661, + "step": 27498 + }, + { + "epoch": 0.91, + "grad_norm": 0.7354040741920471, + "learning_rate": 3.673441354936202e-07, + "loss": 2.0388, + "step": 27499 + }, + { + "epoch": 0.91, + "grad_norm": 0.7418214678764343, + "learning_rate": 3.67058749399557e-07, + "loss": 2.0787, + "step": 27500 + }, + { + "epoch": 0.91, + "grad_norm": 0.7730532884597778, + "learning_rate": 3.6677347213281867e-07, + "loss": 2.0382, + "step": 27501 + }, + { + "epoch": 0.91, + "grad_norm": 0.7209863066673279, + "learning_rate": 3.6648830369662936e-07, + "loss": 2.0533, + "step": 27502 + }, + { + "epoch": 0.92, + "grad_norm": 0.7592591047286987, + "learning_rate": 3.662032440942087e-07, + "loss": 2.0223, + "step": 27503 + }, + { + "epoch": 0.92, + "grad_norm": 0.7403962016105652, + "learning_rate": 3.6591829332877747e-07, + "loss": 2.0301, + "step": 27504 + }, + { + "epoch": 0.92, + "grad_norm": 0.7473664283752441, + "learning_rate": 3.6563345140355535e-07, + "loss": 2.06, + "step": 27505 + }, + { + "epoch": 0.92, + "grad_norm": 0.7360561490058899, + "learning_rate": 3.653487183217597e-07, + "loss": 2.0916, + "step": 27506 + }, + { + "epoch": 0.92, + "grad_norm": 0.765838623046875, + "learning_rate": 3.6506409408660906e-07, + "loss": 1.9556, + "step": 27507 + }, + { + "epoch": 0.92, + "grad_norm": 0.7543379068374634, + "learning_rate": 3.647795787013164e-07, + "loss": 2.0554, + "step": 27508 + }, + { + "epoch": 0.92, + "grad_norm": 0.7381190657615662, + "learning_rate": 3.6449517216909813e-07, + "loss": 2.1212, + "step": 27509 + }, + { + "epoch": 0.92, + "grad_norm": 0.7779924273490906, + "learning_rate": 3.642108744931649e-07, + "loss": 2.0452, + "step": 27510 + }, + { + "epoch": 0.92, + "grad_norm": 0.7593322396278381, + "learning_rate": 3.6392668567673205e-07, + "loss": 2.0923, + "step": 27511 + }, + { + "epoch": 0.92, + "grad_norm": 0.759253740310669, + "learning_rate": 3.63642605723008e-07, + "loss": 1.9928, + "step": 27512 + }, + { + "epoch": 0.92, + "grad_norm": 0.7421619296073914, + "learning_rate": 3.633586346352014e-07, + "loss": 2.0057, + "step": 27513 + }, + { + "epoch": 0.92, + "grad_norm": 0.7459618449211121, + "learning_rate": 3.6307477241652065e-07, + "loss": 2.0409, + "step": 27514 + }, + { + "epoch": 0.92, + "grad_norm": 0.7399706840515137, + "learning_rate": 3.6279101907017554e-07, + "loss": 2.0659, + "step": 27515 + }, + { + "epoch": 0.92, + "grad_norm": 0.7464442849159241, + "learning_rate": 3.62507374599369e-07, + "loss": 2.0636, + "step": 27516 + }, + { + "epoch": 0.92, + "grad_norm": 0.7298858165740967, + "learning_rate": 3.6222383900730404e-07, + "loss": 1.9757, + "step": 27517 + }, + { + "epoch": 0.92, + "grad_norm": 0.7626159191131592, + "learning_rate": 3.619404122971881e-07, + "loss": 1.9184, + "step": 27518 + }, + { + "epoch": 0.92, + "grad_norm": 0.756585955619812, + "learning_rate": 3.6165709447221866e-07, + "loss": 1.9781, + "step": 27519 + }, + { + "epoch": 0.92, + "grad_norm": 0.7326863408088684, + "learning_rate": 3.613738855356008e-07, + "loss": 2.0216, + "step": 27520 + }, + { + "epoch": 0.92, + "grad_norm": 0.761835515499115, + "learning_rate": 3.6109078549053213e-07, + "loss": 2.0159, + "step": 27521 + }, + { + "epoch": 0.92, + "grad_norm": 0.7639132738113403, + "learning_rate": 3.6080779434020883e-07, + "loss": 1.9892, + "step": 27522 + }, + { + "epoch": 0.92, + "grad_norm": 0.7316587567329407, + "learning_rate": 3.605249120878307e-07, + "loss": 2.1365, + "step": 27523 + }, + { + "epoch": 0.92, + "grad_norm": 0.7516165375709534, + "learning_rate": 3.6024213873659396e-07, + "loss": 2.0385, + "step": 27524 + }, + { + "epoch": 0.92, + "grad_norm": 0.7540576457977295, + "learning_rate": 3.5995947428969167e-07, + "loss": 1.9717, + "step": 27525 + }, + { + "epoch": 0.92, + "grad_norm": 0.7510865926742554, + "learning_rate": 3.5967691875031686e-07, + "loss": 2.0934, + "step": 27526 + }, + { + "epoch": 0.92, + "grad_norm": 0.7325657606124878, + "learning_rate": 3.593944721216624e-07, + "loss": 2.0162, + "step": 27527 + }, + { + "epoch": 0.92, + "grad_norm": 0.7787989974021912, + "learning_rate": 3.591121344069204e-07, + "loss": 2.0237, + "step": 27528 + }, + { + "epoch": 0.92, + "grad_norm": 0.7360804080963135, + "learning_rate": 3.5882990560927923e-07, + "loss": 1.9765, + "step": 27529 + }, + { + "epoch": 0.92, + "grad_norm": 0.7749325037002563, + "learning_rate": 3.585477857319264e-07, + "loss": 2.079, + "step": 27530 + }, + { + "epoch": 0.92, + "grad_norm": 0.7415676116943359, + "learning_rate": 3.582657747780505e-07, + "loss": 2.1029, + "step": 27531 + }, + { + "epoch": 0.92, + "grad_norm": 0.7974497079849243, + "learning_rate": 3.579838727508378e-07, + "loss": 2.0621, + "step": 27532 + }, + { + "epoch": 0.92, + "grad_norm": 0.7780328392982483, + "learning_rate": 3.577020796534725e-07, + "loss": 2.0426, + "step": 27533 + }, + { + "epoch": 0.92, + "grad_norm": 0.7363455891609192, + "learning_rate": 3.5742039548913864e-07, + "loss": 2.0367, + "step": 27534 + }, + { + "epoch": 0.92, + "grad_norm": 0.7355459928512573, + "learning_rate": 3.57138820261016e-07, + "loss": 2.053, + "step": 27535 + }, + { + "epoch": 0.92, + "grad_norm": 0.7832638621330261, + "learning_rate": 3.568573539722886e-07, + "loss": 2.0665, + "step": 27536 + }, + { + "epoch": 0.92, + "grad_norm": 0.7744941115379333, + "learning_rate": 3.565759966261362e-07, + "loss": 2.1091, + "step": 27537 + }, + { + "epoch": 0.92, + "grad_norm": 0.7403947114944458, + "learning_rate": 3.562947482257373e-07, + "loss": 2.0821, + "step": 27538 + }, + { + "epoch": 0.92, + "grad_norm": 0.729976236820221, + "learning_rate": 3.560136087742672e-07, + "loss": 2.0122, + "step": 27539 + }, + { + "epoch": 0.92, + "grad_norm": 0.7683988809585571, + "learning_rate": 3.557325782749044e-07, + "loss": 2.0616, + "step": 27540 + }, + { + "epoch": 0.92, + "grad_norm": 0.7541755437850952, + "learning_rate": 3.554516567308208e-07, + "loss": 2.1573, + "step": 27541 + }, + { + "epoch": 0.92, + "grad_norm": 0.7659843564033508, + "learning_rate": 3.55170844145194e-07, + "loss": 2.1084, + "step": 27542 + }, + { + "epoch": 0.92, + "grad_norm": 0.7346780300140381, + "learning_rate": 3.548901405211935e-07, + "loss": 2.0981, + "step": 27543 + }, + { + "epoch": 0.92, + "grad_norm": 0.7583374977111816, + "learning_rate": 3.5460954586199246e-07, + "loss": 2.0454, + "step": 27544 + }, + { + "epoch": 0.92, + "grad_norm": 0.7600131034851074, + "learning_rate": 3.543290601707594e-07, + "loss": 2.0148, + "step": 27545 + }, + { + "epoch": 0.92, + "grad_norm": 0.7440279722213745, + "learning_rate": 3.540486834506651e-07, + "loss": 2.0497, + "step": 27546 + }, + { + "epoch": 0.92, + "grad_norm": 0.7115409970283508, + "learning_rate": 3.5376841570487487e-07, + "loss": 2.0523, + "step": 27547 + }, + { + "epoch": 0.92, + "grad_norm": 0.7426609992980957, + "learning_rate": 3.5348825693655496e-07, + "loss": 2.0563, + "step": 27548 + }, + { + "epoch": 0.92, + "grad_norm": 0.7468178868293762, + "learning_rate": 3.5320820714887073e-07, + "loss": 2.0064, + "step": 27549 + }, + { + "epoch": 0.92, + "grad_norm": 0.7592133283615112, + "learning_rate": 3.5292826634498845e-07, + "loss": 1.9758, + "step": 27550 + }, + { + "epoch": 0.92, + "grad_norm": 0.7255106568336487, + "learning_rate": 3.5264843452806893e-07, + "loss": 2.0593, + "step": 27551 + }, + { + "epoch": 0.92, + "grad_norm": 0.7375859618186951, + "learning_rate": 3.52368711701272e-07, + "loss": 2.0762, + "step": 27552 + }, + { + "epoch": 0.92, + "grad_norm": 0.7460224032402039, + "learning_rate": 3.5208909786776046e-07, + "loss": 2.0242, + "step": 27553 + }, + { + "epoch": 0.92, + "grad_norm": 0.7159547209739685, + "learning_rate": 3.5180959303069084e-07, + "loss": 2.0084, + "step": 27554 + }, + { + "epoch": 0.92, + "grad_norm": 0.738386332988739, + "learning_rate": 3.515301971932239e-07, + "loss": 1.9902, + "step": 27555 + }, + { + "epoch": 0.92, + "grad_norm": 0.7599590420722961, + "learning_rate": 3.512509103585138e-07, + "loss": 2.0486, + "step": 27556 + }, + { + "epoch": 0.92, + "grad_norm": 0.7833879590034485, + "learning_rate": 3.509717325297146e-07, + "loss": 2.0634, + "step": 27557 + }, + { + "epoch": 0.92, + "grad_norm": 0.7348275780677795, + "learning_rate": 3.506926637099828e-07, + "loss": 2.0568, + "step": 27558 + }, + { + "epoch": 0.92, + "grad_norm": 0.7912258505821228, + "learning_rate": 3.504137039024702e-07, + "loss": 2.0866, + "step": 27559 + }, + { + "epoch": 0.92, + "grad_norm": 0.7673795819282532, + "learning_rate": 3.501348531103299e-07, + "loss": 1.9504, + "step": 27560 + }, + { + "epoch": 0.92, + "grad_norm": 0.7692797780036926, + "learning_rate": 3.4985611133670826e-07, + "loss": 2.086, + "step": 27561 + }, + { + "epoch": 0.92, + "grad_norm": 0.7537751793861389, + "learning_rate": 3.4957747858475835e-07, + "loss": 1.97, + "step": 27562 + }, + { + "epoch": 0.92, + "grad_norm": 0.7188949584960938, + "learning_rate": 3.492989548576253e-07, + "loss": 2.0186, + "step": 27563 + }, + { + "epoch": 0.92, + "grad_norm": 0.7401921153068542, + "learning_rate": 3.490205401584579e-07, + "loss": 2.0624, + "step": 27564 + }, + { + "epoch": 0.92, + "grad_norm": 0.7180664539337158, + "learning_rate": 3.4874223449039903e-07, + "loss": 2.0415, + "step": 27565 + }, + { + "epoch": 0.92, + "grad_norm": 0.7615936398506165, + "learning_rate": 3.484640378565951e-07, + "loss": 1.987, + "step": 27566 + }, + { + "epoch": 0.92, + "grad_norm": 0.7585820555686951, + "learning_rate": 3.48185950260187e-07, + "loss": 2.0911, + "step": 27567 + }, + { + "epoch": 0.92, + "grad_norm": 0.7129085063934326, + "learning_rate": 3.479079717043188e-07, + "loss": 1.9633, + "step": 27568 + }, + { + "epoch": 0.92, + "grad_norm": 0.7468371391296387, + "learning_rate": 3.476301021921302e-07, + "loss": 2.0459, + "step": 27569 + }, + { + "epoch": 0.92, + "grad_norm": 0.733013927936554, + "learning_rate": 3.473523417267588e-07, + "loss": 2.0765, + "step": 27570 + }, + { + "epoch": 0.92, + "grad_norm": 0.7291478514671326, + "learning_rate": 3.470746903113431e-07, + "loss": 2.1096, + "step": 27571 + }, + { + "epoch": 0.92, + "grad_norm": 0.7657207250595093, + "learning_rate": 3.4679714794902173e-07, + "loss": 2.037, + "step": 27572 + }, + { + "epoch": 0.92, + "grad_norm": 0.7497923970222473, + "learning_rate": 3.465197146429278e-07, + "loss": 2.128, + "step": 27573 + }, + { + "epoch": 0.92, + "grad_norm": 0.7357337474822998, + "learning_rate": 3.4624239039619643e-07, + "loss": 2.022, + "step": 27574 + }, + { + "epoch": 0.92, + "grad_norm": 0.7296460270881653, + "learning_rate": 3.4596517521196305e-07, + "loss": 2.0955, + "step": 27575 + }, + { + "epoch": 0.92, + "grad_norm": 0.7511003017425537, + "learning_rate": 3.4568806909335505e-07, + "loss": 2.0452, + "step": 27576 + }, + { + "epoch": 0.92, + "grad_norm": 0.7694618701934814, + "learning_rate": 3.4541107204350556e-07, + "loss": 2.0505, + "step": 27577 + }, + { + "epoch": 0.92, + "grad_norm": 0.7169292569160461, + "learning_rate": 3.451341840655453e-07, + "loss": 2.0159, + "step": 27578 + }, + { + "epoch": 0.92, + "grad_norm": 0.7414395213127136, + "learning_rate": 3.448574051625986e-07, + "loss": 2.0076, + "step": 27579 + }, + { + "epoch": 0.92, + "grad_norm": 0.742670476436615, + "learning_rate": 3.44580735337795e-07, + "loss": 2.0466, + "step": 27580 + }, + { + "epoch": 0.92, + "grad_norm": 0.7397040128707886, + "learning_rate": 3.443041745942599e-07, + "loss": 2.0372, + "step": 27581 + }, + { + "epoch": 0.92, + "grad_norm": 0.7517799139022827, + "learning_rate": 3.4402772293511854e-07, + "loss": 2.0758, + "step": 27582 + }, + { + "epoch": 0.92, + "grad_norm": 0.7356666922569275, + "learning_rate": 3.4375138036349066e-07, + "loss": 2.0165, + "step": 27583 + }, + { + "epoch": 0.92, + "grad_norm": 0.7546393275260925, + "learning_rate": 3.4347514688250263e-07, + "loss": 2.1016, + "step": 27584 + }, + { + "epoch": 0.92, + "grad_norm": 0.751071035861969, + "learning_rate": 3.431990224952719e-07, + "loss": 2.0643, + "step": 27585 + }, + { + "epoch": 0.92, + "grad_norm": 0.76637864112854, + "learning_rate": 3.4292300720491946e-07, + "loss": 2.0665, + "step": 27586 + }, + { + "epoch": 0.92, + "grad_norm": 0.7580568194389343, + "learning_rate": 3.4264710101456376e-07, + "loss": 2.0098, + "step": 27587 + }, + { + "epoch": 0.92, + "grad_norm": 0.7629618048667908, + "learning_rate": 3.423713039273213e-07, + "loss": 2.0591, + "step": 27588 + }, + { + "epoch": 0.92, + "grad_norm": 0.7623186707496643, + "learning_rate": 3.4209561594630627e-07, + "loss": 2.0234, + "step": 27589 + }, + { + "epoch": 0.92, + "grad_norm": 0.7312891483306885, + "learning_rate": 3.4182003707463716e-07, + "loss": 1.9804, + "step": 27590 + }, + { + "epoch": 0.92, + "grad_norm": 0.7653946280479431, + "learning_rate": 3.41544567315425e-07, + "loss": 2.0957, + "step": 27591 + }, + { + "epoch": 0.92, + "grad_norm": 0.7689878344535828, + "learning_rate": 3.412692066717804e-07, + "loss": 2.0525, + "step": 27592 + }, + { + "epoch": 0.92, + "grad_norm": 0.7284693717956543, + "learning_rate": 3.4099395514681557e-07, + "loss": 2.0338, + "step": 27593 + }, + { + "epoch": 0.92, + "grad_norm": 0.7579547762870789, + "learning_rate": 3.4071881274364117e-07, + "loss": 2.0128, + "step": 27594 + }, + { + "epoch": 0.92, + "grad_norm": 0.7335493564605713, + "learning_rate": 3.404437794653659e-07, + "loss": 2.0216, + "step": 27595 + }, + { + "epoch": 0.92, + "grad_norm": 0.7740117311477661, + "learning_rate": 3.401688553150939e-07, + "loss": 2.0914, + "step": 27596 + }, + { + "epoch": 0.92, + "grad_norm": 0.7390198707580566, + "learning_rate": 3.398940402959339e-07, + "loss": 2.0646, + "step": 27597 + }, + { + "epoch": 0.92, + "grad_norm": 0.7441772818565369, + "learning_rate": 3.3961933441098884e-07, + "loss": 2.0021, + "step": 27598 + }, + { + "epoch": 0.92, + "grad_norm": 0.7458149790763855, + "learning_rate": 3.3934473766336296e-07, + "loss": 1.9979, + "step": 27599 + }, + { + "epoch": 0.92, + "grad_norm": 0.7587944865226746, + "learning_rate": 3.3907025005616044e-07, + "loss": 2.0968, + "step": 27600 + }, + { + "epoch": 0.92, + "grad_norm": 0.7294687032699585, + "learning_rate": 3.3879587159247773e-07, + "loss": 2.0624, + "step": 27601 + }, + { + "epoch": 0.92, + "grad_norm": 0.775977373123169, + "learning_rate": 3.385216022754179e-07, + "loss": 2.0463, + "step": 27602 + }, + { + "epoch": 0.92, + "grad_norm": 0.7988474369049072, + "learning_rate": 3.382474421080806e-07, + "loss": 2.0699, + "step": 27603 + }, + { + "epoch": 0.92, + "grad_norm": 0.7609072923660278, + "learning_rate": 3.379733910935601e-07, + "loss": 2.047, + "step": 27604 + }, + { + "epoch": 0.92, + "grad_norm": 0.7817708849906921, + "learning_rate": 3.376994492349539e-07, + "loss": 2.0579, + "step": 27605 + }, + { + "epoch": 0.92, + "grad_norm": 0.7727806568145752, + "learning_rate": 3.374256165353562e-07, + "loss": 2.0627, + "step": 27606 + }, + { + "epoch": 0.92, + "grad_norm": 0.7486349940299988, + "learning_rate": 3.371518929978612e-07, + "loss": 2.0649, + "step": 27607 + }, + { + "epoch": 0.92, + "grad_norm": 0.7648556232452393, + "learning_rate": 3.36878278625562e-07, + "loss": 2.0431, + "step": 27608 + }, + { + "epoch": 0.92, + "grad_norm": 0.7246996760368347, + "learning_rate": 3.3660477342154716e-07, + "loss": 2.0198, + "step": 27609 + }, + { + "epoch": 0.92, + "grad_norm": 0.7365943789482117, + "learning_rate": 3.3633137738890984e-07, + "loss": 2.0918, + "step": 27610 + }, + { + "epoch": 0.92, + "grad_norm": 0.7324069142341614, + "learning_rate": 3.3605809053073646e-07, + "loss": 1.977, + "step": 27611 + }, + { + "epoch": 0.92, + "grad_norm": 0.7139624357223511, + "learning_rate": 3.357849128501145e-07, + "loss": 2.0517, + "step": 27612 + }, + { + "epoch": 0.92, + "grad_norm": 0.7324069142341614, + "learning_rate": 3.355118443501337e-07, + "loss": 2.0159, + "step": 27613 + }, + { + "epoch": 0.92, + "grad_norm": 0.7657989859580994, + "learning_rate": 3.352388850338728e-07, + "loss": 1.9641, + "step": 27614 + }, + { + "epoch": 0.92, + "grad_norm": 0.7399185299873352, + "learning_rate": 3.3496603490441927e-07, + "loss": 2.0597, + "step": 27615 + }, + { + "epoch": 0.92, + "grad_norm": 0.7372857928276062, + "learning_rate": 3.3469329396485726e-07, + "loss": 2.1119, + "step": 27616 + }, + { + "epoch": 0.92, + "grad_norm": 0.7680679559707642, + "learning_rate": 3.3442066221826553e-07, + "loss": 2.0549, + "step": 27617 + }, + { + "epoch": 0.92, + "grad_norm": 0.7426912188529968, + "learning_rate": 3.341481396677226e-07, + "loss": 2.1273, + "step": 27618 + }, + { + "epoch": 0.92, + "grad_norm": 0.763296902179718, + "learning_rate": 3.338757263163117e-07, + "loss": 2.0342, + "step": 27619 + }, + { + "epoch": 0.92, + "grad_norm": 0.7364580631256104, + "learning_rate": 3.3360342216710583e-07, + "loss": 2.0461, + "step": 27620 + }, + { + "epoch": 0.92, + "grad_norm": 0.7506850361824036, + "learning_rate": 3.333312272231837e-07, + "loss": 2.0308, + "step": 27621 + }, + { + "epoch": 0.92, + "grad_norm": 0.7369828820228577, + "learning_rate": 3.3305914148762273e-07, + "loss": 2.0333, + "step": 27622 + }, + { + "epoch": 0.92, + "grad_norm": 0.702512800693512, + "learning_rate": 3.327871649634906e-07, + "loss": 2.0519, + "step": 27623 + }, + { + "epoch": 0.92, + "grad_norm": 0.7778658866882324, + "learning_rate": 3.325152976538648e-07, + "loss": 2.0709, + "step": 27624 + }, + { + "epoch": 0.92, + "grad_norm": 0.7795168161392212, + "learning_rate": 3.322435395618162e-07, + "loss": 2.0974, + "step": 27625 + }, + { + "epoch": 0.92, + "grad_norm": 0.728728175163269, + "learning_rate": 3.319718906904146e-07, + "loss": 2.0842, + "step": 27626 + }, + { + "epoch": 0.92, + "grad_norm": 0.7503560185432434, + "learning_rate": 3.3170035104272745e-07, + "loss": 2.0981, + "step": 27627 + }, + { + "epoch": 0.92, + "grad_norm": 0.745243489742279, + "learning_rate": 3.314289206218235e-07, + "loss": 2.0928, + "step": 27628 + }, + { + "epoch": 0.92, + "grad_norm": 0.7384575605392456, + "learning_rate": 3.311575994307703e-07, + "loss": 2.0207, + "step": 27629 + }, + { + "epoch": 0.92, + "grad_norm": 0.7331790328025818, + "learning_rate": 3.3088638747263093e-07, + "loss": 2.0384, + "step": 27630 + }, + { + "epoch": 0.92, + "grad_norm": 0.7511398792266846, + "learning_rate": 3.306152847504707e-07, + "loss": 2.1399, + "step": 27631 + }, + { + "epoch": 0.92, + "grad_norm": 0.7215908169746399, + "learning_rate": 3.303442912673538e-07, + "loss": 2.0004, + "step": 27632 + }, + { + "epoch": 0.92, + "grad_norm": 0.7458534240722656, + "learning_rate": 3.300734070263378e-07, + "loss": 2.0497, + "step": 27633 + }, + { + "epoch": 0.92, + "grad_norm": 0.7726430892944336, + "learning_rate": 3.298026320304859e-07, + "loss": 2.1379, + "step": 27634 + }, + { + "epoch": 0.92, + "grad_norm": 0.7475405931472778, + "learning_rate": 3.2953196628285887e-07, + "loss": 2.0623, + "step": 27635 + }, + { + "epoch": 0.92, + "grad_norm": 0.7420817017555237, + "learning_rate": 3.292614097865088e-07, + "loss": 2.0471, + "step": 27636 + }, + { + "epoch": 0.92, + "grad_norm": 0.7497652173042297, + "learning_rate": 3.289909625444976e-07, + "loss": 2.0386, + "step": 27637 + }, + { + "epoch": 0.92, + "grad_norm": 0.718167245388031, + "learning_rate": 3.287206245598784e-07, + "loss": 1.9858, + "step": 27638 + }, + { + "epoch": 0.92, + "grad_norm": 0.7252147197723389, + "learning_rate": 3.284503958357066e-07, + "loss": 2.0459, + "step": 27639 + }, + { + "epoch": 0.92, + "grad_norm": 0.8097555041313171, + "learning_rate": 3.281802763750319e-07, + "loss": 2.0348, + "step": 27640 + }, + { + "epoch": 0.92, + "grad_norm": 0.7462708950042725, + "learning_rate": 3.279102661809108e-07, + "loss": 2.0223, + "step": 27641 + }, + { + "epoch": 0.92, + "grad_norm": 0.7451258897781372, + "learning_rate": 3.2764036525638866e-07, + "loss": 2.0061, + "step": 27642 + }, + { + "epoch": 0.92, + "grad_norm": 0.7512140274047852, + "learning_rate": 3.273705736045174e-07, + "loss": 2.0513, + "step": 27643 + }, + { + "epoch": 0.92, + "grad_norm": 0.7722046971321106, + "learning_rate": 3.2710089122834686e-07, + "loss": 2.0475, + "step": 27644 + }, + { + "epoch": 0.92, + "grad_norm": 0.7464533448219299, + "learning_rate": 3.2683131813092015e-07, + "loss": 2.1396, + "step": 27645 + }, + { + "epoch": 0.92, + "grad_norm": 0.7333582043647766, + "learning_rate": 3.2656185431528375e-07, + "loss": 2.0368, + "step": 27646 + }, + { + "epoch": 0.92, + "grad_norm": 0.7492766976356506, + "learning_rate": 3.2629249978448296e-07, + "loss": 1.9906, + "step": 27647 + }, + { + "epoch": 0.92, + "grad_norm": 0.785752534866333, + "learning_rate": 3.260232545415609e-07, + "loss": 2.0217, + "step": 27648 + }, + { + "epoch": 0.92, + "grad_norm": 0.7555016875267029, + "learning_rate": 3.257541185895574e-07, + "loss": 2.0208, + "step": 27649 + }, + { + "epoch": 0.92, + "grad_norm": 0.7398478388786316, + "learning_rate": 3.2548509193151333e-07, + "loss": 2.0333, + "step": 27650 + }, + { + "epoch": 0.92, + "grad_norm": 0.7614901661872864, + "learning_rate": 3.252161745704707e-07, + "loss": 2.0467, + "step": 27651 + }, + { + "epoch": 0.92, + "grad_norm": 0.7250102758407593, + "learning_rate": 3.249473665094649e-07, + "loss": 2.0108, + "step": 27652 + }, + { + "epoch": 0.92, + "grad_norm": 0.7306356430053711, + "learning_rate": 3.2467866775153454e-07, + "loss": 1.951, + "step": 27653 + }, + { + "epoch": 0.92, + "grad_norm": 0.7301570773124695, + "learning_rate": 3.244100782997139e-07, + "loss": 2.0853, + "step": 27654 + }, + { + "epoch": 0.92, + "grad_norm": 0.7345647215843201, + "learning_rate": 3.241415981570373e-07, + "loss": 2.0381, + "step": 27655 + }, + { + "epoch": 0.92, + "grad_norm": 0.7391906976699829, + "learning_rate": 3.238732273265377e-07, + "loss": 2.0072, + "step": 27656 + }, + { + "epoch": 0.92, + "grad_norm": 0.752103865146637, + "learning_rate": 3.2360496581124943e-07, + "loss": 1.9838, + "step": 27657 + }, + { + "epoch": 0.92, + "grad_norm": 0.7293895483016968, + "learning_rate": 3.233368136142001e-07, + "loss": 2.0094, + "step": 27658 + }, + { + "epoch": 0.92, + "grad_norm": 0.7485709190368652, + "learning_rate": 3.230687707384195e-07, + "loss": 2.047, + "step": 27659 + }, + { + "epoch": 0.92, + "grad_norm": 0.7544573545455933, + "learning_rate": 3.2280083718693845e-07, + "loss": 2.0979, + "step": 27660 + }, + { + "epoch": 0.92, + "grad_norm": 0.7439588308334351, + "learning_rate": 3.2253301296278125e-07, + "loss": 2.0136, + "step": 27661 + }, + { + "epoch": 0.92, + "grad_norm": 0.7464872598648071, + "learning_rate": 3.2226529806897445e-07, + "loss": 2.0009, + "step": 27662 + }, + { + "epoch": 0.92, + "grad_norm": 0.7387205362319946, + "learning_rate": 3.219976925085444e-07, + "loss": 2.0664, + "step": 27663 + }, + { + "epoch": 0.92, + "grad_norm": 0.7663566470146179, + "learning_rate": 3.217301962845099e-07, + "loss": 2.0833, + "step": 27664 + }, + { + "epoch": 0.92, + "grad_norm": 0.743693470954895, + "learning_rate": 3.2146280939989616e-07, + "loss": 2.0177, + "step": 27665 + }, + { + "epoch": 0.92, + "grad_norm": 0.7562993764877319, + "learning_rate": 3.2119553185772423e-07, + "loss": 2.0056, + "step": 27666 + }, + { + "epoch": 0.92, + "grad_norm": 0.7402998208999634, + "learning_rate": 3.2092836366101386e-07, + "loss": 2.0537, + "step": 27667 + }, + { + "epoch": 0.92, + "grad_norm": 0.7410950660705566, + "learning_rate": 3.2066130481278045e-07, + "loss": 2.0603, + "step": 27668 + }, + { + "epoch": 0.92, + "grad_norm": 0.7640246748924255, + "learning_rate": 3.203943553160438e-07, + "loss": 2.0852, + "step": 27669 + }, + { + "epoch": 0.92, + "grad_norm": 0.7431691288948059, + "learning_rate": 3.2012751517381924e-07, + "loss": 1.9777, + "step": 27670 + }, + { + "epoch": 0.92, + "grad_norm": 0.7969907522201538, + "learning_rate": 3.1986078438911994e-07, + "loss": 2.0866, + "step": 27671 + }, + { + "epoch": 0.92, + "grad_norm": 0.7685071229934692, + "learning_rate": 3.1959416296496013e-07, + "loss": 2.107, + "step": 27672 + }, + { + "epoch": 0.92, + "grad_norm": 0.7259434461593628, + "learning_rate": 3.1932765090435414e-07, + "loss": 2.0303, + "step": 27673 + }, + { + "epoch": 0.92, + "grad_norm": 0.7721937298774719, + "learning_rate": 3.1906124821030836e-07, + "loss": 2.0079, + "step": 27674 + }, + { + "epoch": 0.92, + "grad_norm": 0.7577913403511047, + "learning_rate": 3.187949548858371e-07, + "loss": 2.047, + "step": 27675 + }, + { + "epoch": 0.92, + "grad_norm": 0.7574318647384644, + "learning_rate": 3.1852877093394686e-07, + "loss": 2.0498, + "step": 27676 + }, + { + "epoch": 0.92, + "grad_norm": 0.7250029444694519, + "learning_rate": 3.1826269635764296e-07, + "loss": 2.0528, + "step": 27677 + }, + { + "epoch": 0.92, + "grad_norm": 0.7249597311019897, + "learning_rate": 3.1799673115993413e-07, + "loss": 1.9698, + "step": 27678 + }, + { + "epoch": 0.92, + "grad_norm": 0.74311363697052, + "learning_rate": 3.177308753438246e-07, + "loss": 2.0123, + "step": 27679 + }, + { + "epoch": 0.92, + "grad_norm": 0.7568889260292053, + "learning_rate": 3.1746512891231653e-07, + "loss": 2.065, + "step": 27680 + }, + { + "epoch": 0.92, + "grad_norm": 0.7424436211585999, + "learning_rate": 3.171994918684118e-07, + "loss": 2.0081, + "step": 27681 + }, + { + "epoch": 0.92, + "grad_norm": 0.7420914173126221, + "learning_rate": 3.169339642151148e-07, + "loss": 2.0871, + "step": 27682 + }, + { + "epoch": 0.92, + "grad_norm": 0.7239668369293213, + "learning_rate": 3.166685459554208e-07, + "loss": 2.0417, + "step": 27683 + }, + { + "epoch": 0.92, + "grad_norm": 0.7450498938560486, + "learning_rate": 3.16403237092332e-07, + "loss": 2.1023, + "step": 27684 + }, + { + "epoch": 0.92, + "grad_norm": 0.7478615045547485, + "learning_rate": 3.161380376288448e-07, + "loss": 1.9884, + "step": 27685 + }, + { + "epoch": 0.92, + "grad_norm": 0.7439897656440735, + "learning_rate": 3.158729475679534e-07, + "loss": 2.0532, + "step": 27686 + }, + { + "epoch": 0.92, + "grad_norm": 0.7438661456108093, + "learning_rate": 3.1560796691265326e-07, + "loss": 2.0268, + "step": 27687 + }, + { + "epoch": 0.92, + "grad_norm": 0.7466949224472046, + "learning_rate": 3.153430956659409e-07, + "loss": 2.0626, + "step": 27688 + }, + { + "epoch": 0.92, + "grad_norm": 0.7543800473213196, + "learning_rate": 3.150783338308061e-07, + "loss": 2.1109, + "step": 27689 + }, + { + "epoch": 0.92, + "grad_norm": 0.7582016587257385, + "learning_rate": 3.148136814102387e-07, + "loss": 2.0837, + "step": 27690 + }, + { + "epoch": 0.92, + "grad_norm": 0.7708203196525574, + "learning_rate": 3.145491384072319e-07, + "loss": 2.08, + "step": 27691 + }, + { + "epoch": 0.92, + "grad_norm": 0.7646574974060059, + "learning_rate": 3.1428470482477324e-07, + "loss": 2.0877, + "step": 27692 + }, + { + "epoch": 0.92, + "grad_norm": 0.7319545149803162, + "learning_rate": 3.1402038066584705e-07, + "loss": 1.9771, + "step": 27693 + }, + { + "epoch": 0.92, + "grad_norm": 0.7412433624267578, + "learning_rate": 3.1375616593344316e-07, + "loss": 2.05, + "step": 27694 + }, + { + "epoch": 0.92, + "grad_norm": 0.7650107741355896, + "learning_rate": 3.1349206063054693e-07, + "loss": 2.0824, + "step": 27695 + }, + { + "epoch": 0.92, + "grad_norm": 0.7498570084571838, + "learning_rate": 3.1322806476013824e-07, + "loss": 2.0909, + "step": 27696 + }, + { + "epoch": 0.92, + "grad_norm": 0.7431207299232483, + "learning_rate": 3.1296417832520356e-07, + "loss": 1.9951, + "step": 27697 + }, + { + "epoch": 0.92, + "grad_norm": 0.7238743305206299, + "learning_rate": 3.127004013287227e-07, + "loss": 2.0449, + "step": 27698 + }, + { + "epoch": 0.92, + "grad_norm": 0.7596167922019958, + "learning_rate": 3.1243673377367335e-07, + "loss": 2.0155, + "step": 27699 + }, + { + "epoch": 0.92, + "grad_norm": 0.7470327019691467, + "learning_rate": 3.121731756630375e-07, + "loss": 2.0852, + "step": 27700 + }, + { + "epoch": 0.92, + "grad_norm": 0.7704834938049316, + "learning_rate": 3.119097269997928e-07, + "loss": 2.0289, + "step": 27701 + }, + { + "epoch": 0.92, + "grad_norm": 0.7676897048950195, + "learning_rate": 3.116463877869125e-07, + "loss": 2.0345, + "step": 27702 + }, + { + "epoch": 0.92, + "grad_norm": 0.7366531491279602, + "learning_rate": 3.11383158027373e-07, + "loss": 2.0723, + "step": 27703 + }, + { + "epoch": 0.92, + "grad_norm": 0.7341530323028564, + "learning_rate": 3.1112003772414967e-07, + "loss": 2.0194, + "step": 27704 + }, + { + "epoch": 0.92, + "grad_norm": 0.7782750725746155, + "learning_rate": 3.108570268802125e-07, + "loss": 1.958, + "step": 27705 + }, + { + "epoch": 0.92, + "grad_norm": 0.7287595272064209, + "learning_rate": 3.105941254985367e-07, + "loss": 2.0083, + "step": 27706 + }, + { + "epoch": 0.92, + "grad_norm": 0.7534686923027039, + "learning_rate": 3.10331333582089e-07, + "loss": 2.0975, + "step": 27707 + }, + { + "epoch": 0.92, + "grad_norm": 0.7921737432479858, + "learning_rate": 3.10068651133838e-07, + "loss": 2.118, + "step": 27708 + }, + { + "epoch": 0.92, + "grad_norm": 0.7504991292953491, + "learning_rate": 3.0980607815675245e-07, + "loss": 1.9919, + "step": 27709 + }, + { + "epoch": 0.92, + "grad_norm": 0.7587846517562866, + "learning_rate": 3.095436146538e-07, + "loss": 2.0619, + "step": 27710 + }, + { + "epoch": 0.92, + "grad_norm": 0.7559679746627808, + "learning_rate": 3.0928126062794496e-07, + "loss": 2.0301, + "step": 27711 + }, + { + "epoch": 0.92, + "grad_norm": 0.7743144035339355, + "learning_rate": 3.0901901608214936e-07, + "loss": 2.1081, + "step": 27712 + }, + { + "epoch": 0.92, + "grad_norm": 0.730049192905426, + "learning_rate": 3.087568810193775e-07, + "loss": 2.0741, + "step": 27713 + }, + { + "epoch": 0.92, + "grad_norm": 0.7464324831962585, + "learning_rate": 3.084948554425915e-07, + "loss": 2.0473, + "step": 27714 + }, + { + "epoch": 0.92, + "grad_norm": 0.7562131881713867, + "learning_rate": 3.082329393547523e-07, + "loss": 2.0204, + "step": 27715 + }, + { + "epoch": 0.92, + "grad_norm": 0.717248260974884, + "learning_rate": 3.079711327588153e-07, + "loss": 1.9518, + "step": 27716 + }, + { + "epoch": 0.92, + "grad_norm": 0.7551710605621338, + "learning_rate": 3.077094356577415e-07, + "loss": 2.0335, + "step": 27717 + }, + { + "epoch": 0.92, + "grad_norm": 0.7462586164474487, + "learning_rate": 3.074478480544851e-07, + "loss": 2.037, + "step": 27718 + }, + { + "epoch": 0.92, + "grad_norm": 0.7253049612045288, + "learning_rate": 3.071863699520039e-07, + "loss": 2.0298, + "step": 27719 + }, + { + "epoch": 0.92, + "grad_norm": 0.7469426989555359, + "learning_rate": 3.069250013532499e-07, + "loss": 2.0993, + "step": 27720 + }, + { + "epoch": 0.92, + "grad_norm": 0.748832106590271, + "learning_rate": 3.0666374226117625e-07, + "loss": 1.9962, + "step": 27721 + }, + { + "epoch": 0.92, + "grad_norm": 0.7599135041236877, + "learning_rate": 3.06402592678734e-07, + "loss": 2.0126, + "step": 27722 + }, + { + "epoch": 0.92, + "grad_norm": 0.7544862031936646, + "learning_rate": 3.0614155260887733e-07, + "loss": 2.0214, + "step": 27723 + }, + { + "epoch": 0.92, + "grad_norm": 0.7334742546081543, + "learning_rate": 3.058806220545496e-07, + "loss": 1.9852, + "step": 27724 + }, + { + "epoch": 0.92, + "grad_norm": 0.7521848082542419, + "learning_rate": 3.056198010187006e-07, + "loss": 2.0237, + "step": 27725 + }, + { + "epoch": 0.92, + "grad_norm": 0.7528825998306274, + "learning_rate": 3.0535908950428017e-07, + "loss": 2.0781, + "step": 27726 + }, + { + "epoch": 0.92, + "grad_norm": 0.7192471027374268, + "learning_rate": 3.050984875142293e-07, + "loss": 2.0439, + "step": 27727 + }, + { + "epoch": 0.92, + "grad_norm": 0.7353876233100891, + "learning_rate": 3.048379950514946e-07, + "loss": 2.0315, + "step": 27728 + }, + { + "epoch": 0.92, + "grad_norm": 0.7753949165344238, + "learning_rate": 3.0457761211901804e-07, + "loss": 2.0264, + "step": 27729 + }, + { + "epoch": 0.92, + "grad_norm": 0.7380372285842896, + "learning_rate": 3.043173387197418e-07, + "loss": 2.0752, + "step": 27730 + }, + { + "epoch": 0.92, + "grad_norm": 0.7567800283432007, + "learning_rate": 3.040571748566057e-07, + "loss": 2.0796, + "step": 27731 + }, + { + "epoch": 0.92, + "grad_norm": 0.7507311701774597, + "learning_rate": 3.037971205325496e-07, + "loss": 2.0991, + "step": 27732 + }, + { + "epoch": 0.92, + "grad_norm": 0.7370536923408508, + "learning_rate": 3.035371757505112e-07, + "loss": 2.0206, + "step": 27733 + }, + { + "epoch": 0.92, + "grad_norm": 0.7381748557090759, + "learning_rate": 3.0327734051342705e-07, + "loss": 2.0055, + "step": 27734 + }, + { + "epoch": 0.92, + "grad_norm": 0.7792350649833679, + "learning_rate": 3.030176148242314e-07, + "loss": 2.0978, + "step": 27735 + }, + { + "epoch": 0.92, + "grad_norm": 0.7473456859588623, + "learning_rate": 3.02757998685862e-07, + "loss": 2.0001, + "step": 27736 + }, + { + "epoch": 0.92, + "grad_norm": 0.7511739134788513, + "learning_rate": 3.024984921012486e-07, + "loss": 2.028, + "step": 27737 + }, + { + "epoch": 0.92, + "grad_norm": 0.7346223592758179, + "learning_rate": 3.022390950733234e-07, + "loss": 2.0577, + "step": 27738 + }, + { + "epoch": 0.92, + "grad_norm": 0.7358810305595398, + "learning_rate": 3.019798076050184e-07, + "loss": 2.0626, + "step": 27739 + }, + { + "epoch": 0.92, + "grad_norm": 0.7527163028717041, + "learning_rate": 3.017206296992603e-07, + "loss": 1.976, + "step": 27740 + }, + { + "epoch": 0.92, + "grad_norm": 0.7601765394210815, + "learning_rate": 3.014615613589811e-07, + "loss": 2.0888, + "step": 27741 + }, + { + "epoch": 0.92, + "grad_norm": 0.7566134333610535, + "learning_rate": 3.01202602587104e-07, + "loss": 2.0367, + "step": 27742 + }, + { + "epoch": 0.92, + "grad_norm": 0.7677571177482605, + "learning_rate": 3.0094375338655556e-07, + "loss": 2.0658, + "step": 27743 + }, + { + "epoch": 0.92, + "grad_norm": 0.7175692915916443, + "learning_rate": 3.006850137602602e-07, + "loss": 2.0886, + "step": 27744 + }, + { + "epoch": 0.92, + "grad_norm": 0.7648827433586121, + "learning_rate": 3.004263837111421e-07, + "loss": 2.0492, + "step": 27745 + }, + { + "epoch": 0.92, + "grad_norm": 0.742368757724762, + "learning_rate": 3.001678632421223e-07, + "loss": 2.1145, + "step": 27746 + }, + { + "epoch": 0.92, + "grad_norm": 0.7374505400657654, + "learning_rate": 2.999094523561208e-07, + "loss": 2.0367, + "step": 27747 + }, + { + "epoch": 0.92, + "grad_norm": 0.734950065612793, + "learning_rate": 2.9965115105605733e-07, + "loss": 1.9964, + "step": 27748 + }, + { + "epoch": 0.92, + "grad_norm": 0.734057605266571, + "learning_rate": 2.993929593448497e-07, + "loss": 2.0903, + "step": 27749 + }, + { + "epoch": 0.92, + "grad_norm": 0.7659988403320312, + "learning_rate": 2.9913487722541655e-07, + "loss": 2.0108, + "step": 27750 + }, + { + "epoch": 0.92, + "grad_norm": 0.7558735013008118, + "learning_rate": 2.988769047006712e-07, + "loss": 2.0666, + "step": 27751 + }, + { + "epoch": 0.92, + "grad_norm": 0.7403257489204407, + "learning_rate": 2.986190417735302e-07, + "loss": 2.0736, + "step": 27752 + }, + { + "epoch": 0.92, + "grad_norm": 0.7615982294082642, + "learning_rate": 2.983612884469045e-07, + "loss": 2.0408, + "step": 27753 + }, + { + "epoch": 0.92, + "grad_norm": 0.7448079586029053, + "learning_rate": 2.9810364472370843e-07, + "loss": 2.0411, + "step": 27754 + }, + { + "epoch": 0.92, + "grad_norm": 0.7467453479766846, + "learning_rate": 2.97846110606852e-07, + "loss": 2.0334, + "step": 27755 + }, + { + "epoch": 0.92, + "grad_norm": 0.7232568860054016, + "learning_rate": 2.975886860992427e-07, + "loss": 2.0549, + "step": 27756 + }, + { + "epoch": 0.92, + "grad_norm": 0.7580637335777283, + "learning_rate": 2.9733137120379065e-07, + "loss": 2.0521, + "step": 27757 + }, + { + "epoch": 0.92, + "grad_norm": 0.7344949245452881, + "learning_rate": 2.970741659234022e-07, + "loss": 2.0583, + "step": 27758 + }, + { + "epoch": 0.92, + "grad_norm": 0.7170581817626953, + "learning_rate": 2.9681707026098514e-07, + "loss": 1.9781, + "step": 27759 + }, + { + "epoch": 0.92, + "grad_norm": 0.7625787854194641, + "learning_rate": 2.965600842194394e-07, + "loss": 1.9749, + "step": 27760 + }, + { + "epoch": 0.92, + "grad_norm": 0.7346869111061096, + "learning_rate": 2.9630320780167366e-07, + "loss": 2.0312, + "step": 27761 + }, + { + "epoch": 0.92, + "grad_norm": 0.7336499094963074, + "learning_rate": 2.9604644101058565e-07, + "loss": 1.9909, + "step": 27762 + }, + { + "epoch": 0.92, + "grad_norm": 0.7484045028686523, + "learning_rate": 2.957897838490786e-07, + "loss": 2.1084, + "step": 27763 + }, + { + "epoch": 0.92, + "grad_norm": 0.7607395052909851, + "learning_rate": 2.955332363200514e-07, + "loss": 2.0512, + "step": 27764 + }, + { + "epoch": 0.92, + "grad_norm": 0.7536801099777222, + "learning_rate": 2.9527679842640153e-07, + "loss": 2.0568, + "step": 27765 + }, + { + "epoch": 0.92, + "grad_norm": 0.774182140827179, + "learning_rate": 2.9502047017102687e-07, + "loss": 1.9764, + "step": 27766 + }, + { + "epoch": 0.92, + "grad_norm": 0.7581747174263, + "learning_rate": 2.947642515568239e-07, + "loss": 2.0213, + "step": 27767 + }, + { + "epoch": 0.92, + "grad_norm": 0.7543849945068359, + "learning_rate": 2.9450814258668693e-07, + "loss": 2.0757, + "step": 27768 + }, + { + "epoch": 0.92, + "grad_norm": 0.7381640672683716, + "learning_rate": 2.9425214326350816e-07, + "loss": 1.9918, + "step": 27769 + }, + { + "epoch": 0.92, + "grad_norm": 0.7476697564125061, + "learning_rate": 2.9399625359018193e-07, + "loss": 2.0163, + "step": 27770 + }, + { + "epoch": 0.92, + "grad_norm": 0.7519593834877014, + "learning_rate": 2.937404735695959e-07, + "loss": 2.057, + "step": 27771 + }, + { + "epoch": 0.92, + "grad_norm": 0.7299101948738098, + "learning_rate": 2.934848032046422e-07, + "loss": 2.0564, + "step": 27772 + }, + { + "epoch": 0.92, + "grad_norm": 0.7523294687271118, + "learning_rate": 2.932292424982086e-07, + "loss": 2.0115, + "step": 27773 + }, + { + "epoch": 0.92, + "grad_norm": 0.736680805683136, + "learning_rate": 2.9297379145318274e-07, + "loss": 2.0916, + "step": 27774 + }, + { + "epoch": 0.92, + "grad_norm": 0.7316254377365112, + "learning_rate": 2.9271845007245004e-07, + "loss": 2.0764, + "step": 27775 + }, + { + "epoch": 0.92, + "grad_norm": 0.7437477707862854, + "learning_rate": 2.924632183588949e-07, + "loss": 2.0698, + "step": 27776 + }, + { + "epoch": 0.92, + "grad_norm": 0.7351751923561096, + "learning_rate": 2.922080963154017e-07, + "loss": 1.9997, + "step": 27777 + }, + { + "epoch": 0.92, + "grad_norm": 0.7282069325447083, + "learning_rate": 2.9195308394485147e-07, + "loss": 2.037, + "step": 27778 + }, + { + "epoch": 0.92, + "grad_norm": 0.7197161912918091, + "learning_rate": 2.916981812501252e-07, + "loss": 2.0145, + "step": 27779 + }, + { + "epoch": 0.92, + "grad_norm": 0.7297230362892151, + "learning_rate": 2.91443388234105e-07, + "loss": 2.0015, + "step": 27780 + }, + { + "epoch": 0.92, + "grad_norm": 0.7370126247406006, + "learning_rate": 2.9118870489966753e-07, + "loss": 1.9739, + "step": 27781 + }, + { + "epoch": 0.92, + "grad_norm": 0.7700838446617126, + "learning_rate": 2.909341312496883e-07, + "loss": 2.0291, + "step": 27782 + }, + { + "epoch": 0.92, + "grad_norm": 0.7294010519981384, + "learning_rate": 2.906796672870471e-07, + "loss": 1.9787, + "step": 27783 + }, + { + "epoch": 0.92, + "grad_norm": 0.7314468026161194, + "learning_rate": 2.9042531301461506e-07, + "loss": 2.0457, + "step": 27784 + }, + { + "epoch": 0.92, + "grad_norm": 0.754212498664856, + "learning_rate": 2.9017106843526876e-07, + "loss": 2.0528, + "step": 27785 + }, + { + "epoch": 0.92, + "grad_norm": 0.755840539932251, + "learning_rate": 2.899169335518792e-07, + "loss": 2.0765, + "step": 27786 + }, + { + "epoch": 0.92, + "grad_norm": 0.7168874144554138, + "learning_rate": 2.8966290836731745e-07, + "loss": 2.0318, + "step": 27787 + }, + { + "epoch": 0.92, + "grad_norm": 0.7285027503967285, + "learning_rate": 2.8940899288445237e-07, + "loss": 1.992, + "step": 27788 + }, + { + "epoch": 0.92, + "grad_norm": 0.7419102787971497, + "learning_rate": 2.8915518710615376e-07, + "loss": 2.0239, + "step": 27789 + }, + { + "epoch": 0.92, + "grad_norm": 0.732861340045929, + "learning_rate": 2.889014910352905e-07, + "loss": 2.0129, + "step": 27790 + }, + { + "epoch": 0.92, + "grad_norm": 0.7722498774528503, + "learning_rate": 2.886479046747248e-07, + "loss": 2.0812, + "step": 27791 + }, + { + "epoch": 0.92, + "grad_norm": 0.7624475359916687, + "learning_rate": 2.883944280273243e-07, + "loss": 2.0483, + "step": 27792 + }, + { + "epoch": 0.92, + "grad_norm": 0.7525282502174377, + "learning_rate": 2.881410610959523e-07, + "loss": 2.0804, + "step": 27793 + }, + { + "epoch": 0.92, + "grad_norm": 0.7676408886909485, + "learning_rate": 2.878878038834709e-07, + "loss": 2.0513, + "step": 27794 + }, + { + "epoch": 0.92, + "grad_norm": 0.7185543179512024, + "learning_rate": 2.876346563927401e-07, + "loss": 2.0377, + "step": 27795 + }, + { + "epoch": 0.92, + "grad_norm": 0.7555459141731262, + "learning_rate": 2.8738161862662207e-07, + "loss": 1.9662, + "step": 27796 + }, + { + "epoch": 0.92, + "grad_norm": 0.7270405292510986, + "learning_rate": 2.8712869058797445e-07, + "loss": 2.1201, + "step": 27797 + }, + { + "epoch": 0.92, + "grad_norm": 0.7435339093208313, + "learning_rate": 2.8687587227965385e-07, + "loss": 2.0281, + "step": 27798 + }, + { + "epoch": 0.92, + "grad_norm": 0.7333428859710693, + "learning_rate": 2.8662316370451806e-07, + "loss": 2.0368, + "step": 27799 + }, + { + "epoch": 0.92, + "grad_norm": 0.742886483669281, + "learning_rate": 2.8637056486542026e-07, + "loss": 1.9998, + "step": 27800 + }, + { + "epoch": 0.92, + "grad_norm": 0.7383636236190796, + "learning_rate": 2.861180757652149e-07, + "loss": 1.9869, + "step": 27801 + }, + { + "epoch": 0.92, + "grad_norm": 0.7411794662475586, + "learning_rate": 2.858656964067563e-07, + "loss": 2.0622, + "step": 27802 + }, + { + "epoch": 0.93, + "grad_norm": 0.7370967864990234, + "learning_rate": 2.8561342679289337e-07, + "loss": 2.1043, + "step": 27803 + }, + { + "epoch": 0.93, + "grad_norm": 0.7297102212905884, + "learning_rate": 2.8536126692647606e-07, + "loss": 2.0786, + "step": 27804 + }, + { + "epoch": 0.93, + "grad_norm": 0.745228111743927, + "learning_rate": 2.851092168103542e-07, + "loss": 1.9948, + "step": 27805 + }, + { + "epoch": 0.93, + "grad_norm": 0.7371108531951904, + "learning_rate": 2.8485727644737447e-07, + "loss": 2.0174, + "step": 27806 + }, + { + "epoch": 0.93, + "grad_norm": 0.7554144263267517, + "learning_rate": 2.846054458403835e-07, + "loss": 2.1258, + "step": 27807 + }, + { + "epoch": 0.93, + "grad_norm": 0.7192895412445068, + "learning_rate": 2.8435372499222793e-07, + "loss": 2.1065, + "step": 27808 + }, + { + "epoch": 0.93, + "grad_norm": 0.7098931074142456, + "learning_rate": 2.841021139057487e-07, + "loss": 1.9536, + "step": 27809 + }, + { + "epoch": 0.93, + "grad_norm": 0.757360577583313, + "learning_rate": 2.8385061258378923e-07, + "loss": 2.1026, + "step": 27810 + }, + { + "epoch": 0.93, + "grad_norm": 0.7316729426383972, + "learning_rate": 2.835992210291927e-07, + "loss": 2.0026, + "step": 27811 + }, + { + "epoch": 0.93, + "grad_norm": 0.751430332660675, + "learning_rate": 2.8334793924479797e-07, + "loss": 2.0582, + "step": 27812 + }, + { + "epoch": 0.93, + "grad_norm": 0.7631667256355286, + "learning_rate": 2.830967672334428e-07, + "loss": 2.0803, + "step": 27813 + }, + { + "epoch": 0.93, + "grad_norm": 0.7661742568016052, + "learning_rate": 2.8284570499796496e-07, + "loss": 2.097, + "step": 27814 + }, + { + "epoch": 0.93, + "grad_norm": 0.7558079361915588, + "learning_rate": 2.8259475254120315e-07, + "loss": 1.9494, + "step": 27815 + }, + { + "epoch": 0.93, + "grad_norm": 0.7462192177772522, + "learning_rate": 2.823439098659908e-07, + "loss": 2.0685, + "step": 27816 + }, + { + "epoch": 0.93, + "grad_norm": 0.7554885149002075, + "learning_rate": 2.8209317697516e-07, + "loss": 2.0609, + "step": 27817 + }, + { + "epoch": 0.93, + "grad_norm": 0.7765205502510071, + "learning_rate": 2.8184255387154744e-07, + "loss": 2.1027, + "step": 27818 + }, + { + "epoch": 0.93, + "grad_norm": 0.7259227633476257, + "learning_rate": 2.8159204055798085e-07, + "loss": 1.9972, + "step": 27819 + }, + { + "epoch": 0.93, + "grad_norm": 0.7740525603294373, + "learning_rate": 2.813416370372912e-07, + "loss": 2.0156, + "step": 27820 + }, + { + "epoch": 0.93, + "grad_norm": 0.7714994549751282, + "learning_rate": 2.8109134331231083e-07, + "loss": 2.0266, + "step": 27821 + }, + { + "epoch": 0.93, + "grad_norm": 0.7852103114128113, + "learning_rate": 2.8084115938586177e-07, + "loss": 1.9885, + "step": 27822 + }, + { + "epoch": 0.93, + "grad_norm": 0.7389509081840515, + "learning_rate": 2.8059108526077404e-07, + "loss": 2.0001, + "step": 27823 + }, + { + "epoch": 0.93, + "grad_norm": 0.7501384615898132, + "learning_rate": 2.803411209398732e-07, + "loss": 2.0133, + "step": 27824 + }, + { + "epoch": 0.93, + "grad_norm": 0.7325080633163452, + "learning_rate": 2.800912664259825e-07, + "loss": 2.0607, + "step": 27825 + }, + { + "epoch": 0.93, + "grad_norm": 0.7479891180992126, + "learning_rate": 2.798415217219219e-07, + "loss": 1.9714, + "step": 27826 + }, + { + "epoch": 0.93, + "grad_norm": 0.7832179665565491, + "learning_rate": 2.7959188683051696e-07, + "loss": 2.0281, + "step": 27827 + }, + { + "epoch": 0.93, + "grad_norm": 0.7498183250427246, + "learning_rate": 2.793423617545854e-07, + "loss": 2.0773, + "step": 27828 + }, + { + "epoch": 0.93, + "grad_norm": 0.7317245006561279, + "learning_rate": 2.7909294649694606e-07, + "loss": 2.01, + "step": 27829 + }, + { + "epoch": 0.93, + "grad_norm": 0.7390229105949402, + "learning_rate": 2.788436410604201e-07, + "loss": 2.0559, + "step": 27830 + }, + { + "epoch": 0.93, + "grad_norm": 0.7410488724708557, + "learning_rate": 2.7859444544782064e-07, + "loss": 2.0427, + "step": 27831 + }, + { + "epoch": 0.93, + "grad_norm": 0.7306001782417297, + "learning_rate": 2.783453596619623e-07, + "loss": 2.0805, + "step": 27832 + }, + { + "epoch": 0.93, + "grad_norm": 0.7568457126617432, + "learning_rate": 2.780963837056627e-07, + "loss": 2.0906, + "step": 27833 + }, + { + "epoch": 0.93, + "grad_norm": 0.7483450770378113, + "learning_rate": 2.778475175817319e-07, + "loss": 2.0536, + "step": 27834 + }, + { + "epoch": 0.93, + "grad_norm": 0.7529596090316772, + "learning_rate": 2.775987612929809e-07, + "loss": 2.011, + "step": 27835 + }, + { + "epoch": 0.93, + "grad_norm": 0.7707741856575012, + "learning_rate": 2.77350114842222e-07, + "loss": 2.1078, + "step": 27836 + }, + { + "epoch": 0.93, + "grad_norm": 0.7418769001960754, + "learning_rate": 2.771015782322639e-07, + "loss": 1.9772, + "step": 27837 + }, + { + "epoch": 0.93, + "grad_norm": 0.7614316940307617, + "learning_rate": 2.7685315146591343e-07, + "loss": 2.0641, + "step": 27838 + }, + { + "epoch": 0.93, + "grad_norm": 0.7126253247261047, + "learning_rate": 2.766048345459782e-07, + "loss": 2.0214, + "step": 27839 + }, + { + "epoch": 0.93, + "grad_norm": 0.7546053528785706, + "learning_rate": 2.763566274752638e-07, + "loss": 2.0079, + "step": 27840 + }, + { + "epoch": 0.93, + "grad_norm": 0.7334279417991638, + "learning_rate": 2.761085302565714e-07, + "loss": 2.0217, + "step": 27841 + }, + { + "epoch": 0.93, + "grad_norm": 0.7316117882728577, + "learning_rate": 2.758605428927075e-07, + "loss": 2.0624, + "step": 27842 + }, + { + "epoch": 0.93, + "grad_norm": 0.7327911257743835, + "learning_rate": 2.7561266538647323e-07, + "loss": 2.0557, + "step": 27843 + }, + { + "epoch": 0.93, + "grad_norm": 0.7581915855407715, + "learning_rate": 2.7536489774066644e-07, + "loss": 1.9988, + "step": 27844 + }, + { + "epoch": 0.93, + "grad_norm": 0.7346964478492737, + "learning_rate": 2.7511723995808705e-07, + "loss": 2.0695, + "step": 27845 + }, + { + "epoch": 0.93, + "grad_norm": 0.7348883152008057, + "learning_rate": 2.7486969204153613e-07, + "loss": 2.0784, + "step": 27846 + }, + { + "epoch": 0.93, + "grad_norm": 0.7330226302146912, + "learning_rate": 2.7462225399380705e-07, + "loss": 2.0985, + "step": 27847 + }, + { + "epoch": 0.93, + "grad_norm": 0.7600095868110657, + "learning_rate": 2.7437492581769534e-07, + "loss": 2.0555, + "step": 27848 + }, + { + "epoch": 0.93, + "grad_norm": 0.7869946956634521, + "learning_rate": 2.741277075159965e-07, + "loss": 2.032, + "step": 27849 + }, + { + "epoch": 0.93, + "grad_norm": 0.7264682650566101, + "learning_rate": 2.7388059909150276e-07, + "loss": 2.0737, + "step": 27850 + }, + { + "epoch": 0.93, + "grad_norm": 0.7407927513122559, + "learning_rate": 2.736336005470053e-07, + "loss": 2.0223, + "step": 27851 + }, + { + "epoch": 0.93, + "grad_norm": 0.7393671274185181, + "learning_rate": 2.733867118852962e-07, + "loss": 2.0038, + "step": 27852 + }, + { + "epoch": 0.93, + "grad_norm": 0.7185119390487671, + "learning_rate": 2.7313993310916443e-07, + "loss": 2.0448, + "step": 27853 + }, + { + "epoch": 0.93, + "grad_norm": 0.75821453332901, + "learning_rate": 2.728932642213955e-07, + "loss": 2.0136, + "step": 27854 + }, + { + "epoch": 0.93, + "grad_norm": 0.7126148343086243, + "learning_rate": 2.726467052247794e-07, + "loss": 2.0198, + "step": 27855 + }, + { + "epoch": 0.93, + "grad_norm": 0.7342941164970398, + "learning_rate": 2.7240025612209954e-07, + "loss": 2.1063, + "step": 27856 + }, + { + "epoch": 0.93, + "grad_norm": 0.7413205504417419, + "learning_rate": 2.721539169161391e-07, + "loss": 2.0389, + "step": 27857 + }, + { + "epoch": 0.93, + "grad_norm": 0.7501566410064697, + "learning_rate": 2.7190768760968376e-07, + "loss": 2.0966, + "step": 27858 + }, + { + "epoch": 0.93, + "grad_norm": 0.7557350397109985, + "learning_rate": 2.716615682055146e-07, + "loss": 2.1455, + "step": 27859 + }, + { + "epoch": 0.93, + "grad_norm": 0.7484878301620483, + "learning_rate": 2.7141555870641045e-07, + "loss": 2.0656, + "step": 27860 + }, + { + "epoch": 0.93, + "grad_norm": 0.7559391856193542, + "learning_rate": 2.711696591151536e-07, + "loss": 2.0703, + "step": 27861 + }, + { + "epoch": 0.93, + "grad_norm": 0.7430881857872009, + "learning_rate": 2.7092386943451954e-07, + "loss": 1.9874, + "step": 27862 + }, + { + "epoch": 0.93, + "grad_norm": 0.7316837310791016, + "learning_rate": 2.7067818966728497e-07, + "loss": 2.0028, + "step": 27863 + }, + { + "epoch": 0.93, + "grad_norm": 0.7509291172027588, + "learning_rate": 2.704326198162266e-07, + "loss": 2.051, + "step": 27864 + }, + { + "epoch": 0.93, + "grad_norm": 0.7525030374526978, + "learning_rate": 2.7018715988411994e-07, + "loss": 2.0591, + "step": 27865 + }, + { + "epoch": 0.93, + "grad_norm": 0.728617250919342, + "learning_rate": 2.6994180987373496e-07, + "loss": 2.0216, + "step": 27866 + }, + { + "epoch": 0.93, + "grad_norm": 0.7736281156539917, + "learning_rate": 2.6969656978784396e-07, + "loss": 1.9672, + "step": 27867 + }, + { + "epoch": 0.93, + "grad_norm": 0.760162353515625, + "learning_rate": 2.694514396292203e-07, + "loss": 2.0191, + "step": 27868 + }, + { + "epoch": 0.93, + "grad_norm": 0.7245417237281799, + "learning_rate": 2.6920641940063276e-07, + "loss": 2.1005, + "step": 27869 + }, + { + "epoch": 0.93, + "grad_norm": 0.7642731666564941, + "learning_rate": 2.6896150910484586e-07, + "loss": 2.0365, + "step": 27870 + }, + { + "epoch": 0.93, + "grad_norm": 0.7467430830001831, + "learning_rate": 2.6871670874462964e-07, + "loss": 2.0797, + "step": 27871 + }, + { + "epoch": 0.93, + "grad_norm": 0.7355383038520813, + "learning_rate": 2.684720183227496e-07, + "loss": 2.0858, + "step": 27872 + }, + { + "epoch": 0.93, + "grad_norm": 0.766723096370697, + "learning_rate": 2.6822743784196804e-07, + "loss": 1.9927, + "step": 27873 + }, + { + "epoch": 0.93, + "grad_norm": 0.7569175362586975, + "learning_rate": 2.6798296730505046e-07, + "loss": 2.0404, + "step": 27874 + }, + { + "epoch": 0.93, + "grad_norm": 0.748996376991272, + "learning_rate": 2.6773860671475913e-07, + "loss": 1.9798, + "step": 27875 + }, + { + "epoch": 0.93, + "grad_norm": 0.7404541373252869, + "learning_rate": 2.674943560738508e-07, + "loss": 2.0395, + "step": 27876 + }, + { + "epoch": 0.93, + "grad_norm": 0.7506717443466187, + "learning_rate": 2.6725021538508977e-07, + "loss": 1.9652, + "step": 27877 + }, + { + "epoch": 0.93, + "grad_norm": 0.745093047618866, + "learning_rate": 2.670061846512306e-07, + "loss": 2.0599, + "step": 27878 + }, + { + "epoch": 0.93, + "grad_norm": 0.7612241506576538, + "learning_rate": 2.6676226387503114e-07, + "loss": 2.0642, + "step": 27879 + }, + { + "epoch": 0.93, + "grad_norm": 0.7584373950958252, + "learning_rate": 2.66518453059248e-07, + "loss": 2.0412, + "step": 27880 + }, + { + "epoch": 0.93, + "grad_norm": 0.7781530022621155, + "learning_rate": 2.662747522066345e-07, + "loss": 2.0075, + "step": 27881 + }, + { + "epoch": 0.93, + "grad_norm": 0.7199565768241882, + "learning_rate": 2.66031161319944e-07, + "loss": 1.9915, + "step": 27882 + }, + { + "epoch": 0.93, + "grad_norm": 0.7701200842857361, + "learning_rate": 2.6578768040192995e-07, + "loss": 2.0701, + "step": 27883 + }, + { + "epoch": 0.93, + "grad_norm": 0.714232325553894, + "learning_rate": 2.6554430945534225e-07, + "loss": 2.0728, + "step": 27884 + }, + { + "epoch": 0.93, + "grad_norm": 0.7406719326972961, + "learning_rate": 2.653010484829288e-07, + "loss": 2.0573, + "step": 27885 + }, + { + "epoch": 0.93, + "grad_norm": 0.7422420978546143, + "learning_rate": 2.6505789748743846e-07, + "loss": 1.9974, + "step": 27886 + }, + { + "epoch": 0.93, + "grad_norm": 0.764012336730957, + "learning_rate": 2.648148564716213e-07, + "loss": 2.0003, + "step": 27887 + }, + { + "epoch": 0.93, + "grad_norm": 0.7540760040283203, + "learning_rate": 2.645719254382184e-07, + "loss": 2.0248, + "step": 27888 + }, + { + "epoch": 0.93, + "grad_norm": 0.7420525550842285, + "learning_rate": 2.643291043899765e-07, + "loss": 1.9695, + "step": 27889 + }, + { + "epoch": 0.93, + "grad_norm": 0.7442198991775513, + "learning_rate": 2.6408639332963893e-07, + "loss": 1.9752, + "step": 27890 + }, + { + "epoch": 0.93, + "grad_norm": 0.7698887586593628, + "learning_rate": 2.6384379225994684e-07, + "loss": 2.075, + "step": 27891 + }, + { + "epoch": 0.93, + "grad_norm": 0.765974760055542, + "learning_rate": 2.636013011836436e-07, + "loss": 2.0281, + "step": 27892 + }, + { + "epoch": 0.93, + "grad_norm": 0.7182168960571289, + "learning_rate": 2.6335892010346587e-07, + "loss": 1.9914, + "step": 27893 + }, + { + "epoch": 0.93, + "grad_norm": 0.7500603795051575, + "learning_rate": 2.631166490221515e-07, + "loss": 2.0028, + "step": 27894 + }, + { + "epoch": 0.93, + "grad_norm": 0.7582493424415588, + "learning_rate": 2.628744879424394e-07, + "loss": 2.0824, + "step": 27895 + }, + { + "epoch": 0.93, + "grad_norm": 0.7277804613113403, + "learning_rate": 2.626324368670652e-07, + "loss": 2.046, + "step": 27896 + }, + { + "epoch": 0.93, + "grad_norm": 0.7764238715171814, + "learning_rate": 2.623904957987644e-07, + "loss": 2.07, + "step": 27897 + }, + { + "epoch": 0.93, + "grad_norm": 0.7843112945556641, + "learning_rate": 2.621486647402671e-07, + "loss": 2.089, + "step": 27898 + }, + { + "epoch": 0.93, + "grad_norm": 0.7297757267951965, + "learning_rate": 2.619069436943078e-07, + "loss": 1.9168, + "step": 27899 + }, + { + "epoch": 0.93, + "grad_norm": 0.7402142286300659, + "learning_rate": 2.616653326636176e-07, + "loss": 2.0293, + "step": 27900 + }, + { + "epoch": 0.93, + "grad_norm": 0.7449386715888977, + "learning_rate": 2.6142383165092433e-07, + "loss": 2.1063, + "step": 27901 + }, + { + "epoch": 0.93, + "grad_norm": 0.7214683294296265, + "learning_rate": 2.6118244065895693e-07, + "loss": 2.055, + "step": 27902 + }, + { + "epoch": 0.93, + "grad_norm": 0.748918890953064, + "learning_rate": 2.609411596904432e-07, + "loss": 2.0357, + "step": 27903 + }, + { + "epoch": 0.93, + "grad_norm": 0.7619758248329163, + "learning_rate": 2.6069998874810766e-07, + "loss": 2.0926, + "step": 27904 + }, + { + "epoch": 0.93, + "grad_norm": 0.7452168464660645, + "learning_rate": 2.604589278346781e-07, + "loss": 2.0076, + "step": 27905 + }, + { + "epoch": 0.93, + "grad_norm": 0.7235488295555115, + "learning_rate": 2.6021797695287453e-07, + "loss": 2.025, + "step": 27906 + }, + { + "epoch": 0.93, + "grad_norm": 0.7461463809013367, + "learning_rate": 2.599771361054193e-07, + "loss": 2.0504, + "step": 27907 + }, + { + "epoch": 0.93, + "grad_norm": 0.7565953731536865, + "learning_rate": 2.5973640529503466e-07, + "loss": 2.1018, + "step": 27908 + }, + { + "epoch": 0.93, + "grad_norm": 0.7196838855743408, + "learning_rate": 2.594957845244417e-07, + "loss": 1.9591, + "step": 27909 + }, + { + "epoch": 0.93, + "grad_norm": 0.7476866245269775, + "learning_rate": 2.59255273796355e-07, + "loss": 2.108, + "step": 27910 + }, + { + "epoch": 0.93, + "grad_norm": 0.7530679106712341, + "learning_rate": 2.590148731134923e-07, + "loss": 1.9993, + "step": 27911 + }, + { + "epoch": 0.93, + "grad_norm": 0.7391665577888489, + "learning_rate": 2.587745824785726e-07, + "loss": 2.0114, + "step": 27912 + }, + { + "epoch": 0.93, + "grad_norm": 0.7437525987625122, + "learning_rate": 2.5853440189430814e-07, + "loss": 2.0954, + "step": 27913 + }, + { + "epoch": 0.93, + "grad_norm": 0.7517217397689819, + "learning_rate": 2.582943313634134e-07, + "loss": 2.0752, + "step": 27914 + }, + { + "epoch": 0.93, + "grad_norm": 0.7529842257499695, + "learning_rate": 2.5805437088859964e-07, + "loss": 2.0175, + "step": 27915 + }, + { + "epoch": 0.93, + "grad_norm": 0.7393817901611328, + "learning_rate": 2.5781452047257905e-07, + "loss": 1.97, + "step": 27916 + }, + { + "epoch": 0.93, + "grad_norm": 0.7171664237976074, + "learning_rate": 2.575747801180595e-07, + "loss": 2.0042, + "step": 27917 + }, + { + "epoch": 0.93, + "grad_norm": 0.7397173047065735, + "learning_rate": 2.573351498277521e-07, + "loss": 2.0198, + "step": 27918 + }, + { + "epoch": 0.93, + "grad_norm": 0.7386016845703125, + "learning_rate": 2.570956296043614e-07, + "loss": 2.0639, + "step": 27919 + }, + { + "epoch": 0.93, + "grad_norm": 0.7404997944831848, + "learning_rate": 2.5685621945059414e-07, + "loss": 2.0004, + "step": 27920 + }, + { + "epoch": 0.93, + "grad_norm": 0.7680494785308838, + "learning_rate": 2.5661691936915477e-07, + "loss": 2.0211, + "step": 27921 + }, + { + "epoch": 0.93, + "grad_norm": 0.7619282603263855, + "learning_rate": 2.5637772936274783e-07, + "loss": 2.0734, + "step": 27922 + }, + { + "epoch": 0.93, + "grad_norm": 0.7816585898399353, + "learning_rate": 2.561386494340756e-07, + "loss": 2.0264, + "step": 27923 + }, + { + "epoch": 0.93, + "grad_norm": 0.7177309989929199, + "learning_rate": 2.5589967958583706e-07, + "loss": 2.0349, + "step": 27924 + }, + { + "epoch": 0.93, + "grad_norm": 0.7667866945266724, + "learning_rate": 2.5566081982073443e-07, + "loss": 1.9945, + "step": 27925 + }, + { + "epoch": 0.93, + "grad_norm": 0.7431163787841797, + "learning_rate": 2.554220701414645e-07, + "loss": 2.1019, + "step": 27926 + }, + { + "epoch": 0.93, + "grad_norm": 0.7544727325439453, + "learning_rate": 2.5518343055072615e-07, + "loss": 1.9863, + "step": 27927 + }, + { + "epoch": 0.93, + "grad_norm": 0.7579176425933838, + "learning_rate": 2.5494490105121396e-07, + "loss": 2.0565, + "step": 27928 + }, + { + "epoch": 0.93, + "grad_norm": 0.7550086379051208, + "learning_rate": 2.547064816456224e-07, + "loss": 2.0313, + "step": 27929 + }, + { + "epoch": 0.93, + "grad_norm": 0.7622935175895691, + "learning_rate": 2.54468172336646e-07, + "loss": 2.0841, + "step": 27930 + }, + { + "epoch": 0.93, + "grad_norm": 0.7593290209770203, + "learning_rate": 2.5422997312697704e-07, + "loss": 2.1454, + "step": 27931 + }, + { + "epoch": 0.93, + "grad_norm": 0.7562953233718872, + "learning_rate": 2.5399188401930676e-07, + "loss": 2.0461, + "step": 27932 + }, + { + "epoch": 0.93, + "grad_norm": 0.7268831729888916, + "learning_rate": 2.537539050163229e-07, + "loss": 2.0605, + "step": 27933 + }, + { + "epoch": 0.93, + "grad_norm": 0.7707730531692505, + "learning_rate": 2.5351603612071784e-07, + "loss": 2.0689, + "step": 27934 + }, + { + "epoch": 0.93, + "grad_norm": 0.7599323391914368, + "learning_rate": 2.5327827733517385e-07, + "loss": 2.058, + "step": 27935 + }, + { + "epoch": 0.93, + "grad_norm": 0.7391889691352844, + "learning_rate": 2.530406286623821e-07, + "loss": 2.0067, + "step": 27936 + }, + { + "epoch": 0.93, + "grad_norm": 0.7436203956604004, + "learning_rate": 2.528030901050238e-07, + "loss": 2.038, + "step": 27937 + }, + { + "epoch": 0.93, + "grad_norm": 0.7612622380256653, + "learning_rate": 2.5256566166578455e-07, + "loss": 2.0749, + "step": 27938 + }, + { + "epoch": 0.93, + "grad_norm": 0.7785054445266724, + "learning_rate": 2.5232834334734445e-07, + "loss": 2.0955, + "step": 27939 + }, + { + "epoch": 0.93, + "grad_norm": 0.7840487957000732, + "learning_rate": 2.52091135152388e-07, + "loss": 2.0645, + "step": 27940 + }, + { + "epoch": 0.93, + "grad_norm": 0.7244338393211365, + "learning_rate": 2.5185403708359204e-07, + "loss": 2.0585, + "step": 27941 + }, + { + "epoch": 0.93, + "grad_norm": 0.7714110612869263, + "learning_rate": 2.5161704914363536e-07, + "loss": 1.9534, + "step": 27942 + }, + { + "epoch": 0.93, + "grad_norm": 0.718863844871521, + "learning_rate": 2.513801713351971e-07, + "loss": 2.0082, + "step": 27943 + }, + { + "epoch": 0.93, + "grad_norm": 0.7214365601539612, + "learning_rate": 2.511434036609528e-07, + "loss": 1.9924, + "step": 27944 + }, + { + "epoch": 0.93, + "grad_norm": 0.7528209090232849, + "learning_rate": 2.5090674612357704e-07, + "loss": 2.0595, + "step": 27945 + }, + { + "epoch": 0.93, + "grad_norm": 0.7849499583244324, + "learning_rate": 2.5067019872574205e-07, + "loss": 2.1153, + "step": 27946 + }, + { + "epoch": 0.93, + "grad_norm": 0.7654549479484558, + "learning_rate": 2.5043376147012243e-07, + "loss": 2.058, + "step": 27947 + }, + { + "epoch": 0.93, + "grad_norm": 0.7268943190574646, + "learning_rate": 2.5019743435938715e-07, + "loss": 2.0381, + "step": 27948 + }, + { + "epoch": 0.93, + "grad_norm": 0.7318461537361145, + "learning_rate": 2.499612173962096e-07, + "loss": 2.0569, + "step": 27949 + }, + { + "epoch": 0.93, + "grad_norm": 0.7483956813812256, + "learning_rate": 2.4972511058325434e-07, + "loss": 2.0772, + "step": 27950 + }, + { + "epoch": 0.93, + "grad_norm": 0.7712288498878479, + "learning_rate": 2.4948911392319143e-07, + "loss": 2.028, + "step": 27951 + }, + { + "epoch": 0.93, + "grad_norm": 0.7424534559249878, + "learning_rate": 2.492532274186843e-07, + "loss": 2.0384, + "step": 27952 + }, + { + "epoch": 0.93, + "grad_norm": 0.7072364091873169, + "learning_rate": 2.4901745107240195e-07, + "loss": 1.9763, + "step": 27953 + }, + { + "epoch": 0.93, + "grad_norm": 0.7210165858268738, + "learning_rate": 2.4878178488700554e-07, + "loss": 1.9684, + "step": 27954 + }, + { + "epoch": 0.93, + "grad_norm": 0.7977557182312012, + "learning_rate": 2.4854622886515634e-07, + "loss": 2.0878, + "step": 27955 + }, + { + "epoch": 0.93, + "grad_norm": 0.748574435710907, + "learning_rate": 2.483107830095188e-07, + "loss": 2.0812, + "step": 27956 + }, + { + "epoch": 0.93, + "grad_norm": 0.7427569627761841, + "learning_rate": 2.480754473227498e-07, + "loss": 2.0108, + "step": 27957 + }, + { + "epoch": 0.93, + "grad_norm": 0.7094568610191345, + "learning_rate": 2.478402218075093e-07, + "loss": 2.016, + "step": 27958 + }, + { + "epoch": 0.93, + "grad_norm": 0.7647199630737305, + "learning_rate": 2.476051064664542e-07, + "loss": 2.0081, + "step": 27959 + }, + { + "epoch": 0.93, + "grad_norm": 0.7378902435302734, + "learning_rate": 2.473701013022423e-07, + "loss": 2.1271, + "step": 27960 + }, + { + "epoch": 0.93, + "grad_norm": 0.7373604774475098, + "learning_rate": 2.4713520631752587e-07, + "loss": 2.0092, + "step": 27961 + }, + { + "epoch": 0.93, + "grad_norm": 0.7290225028991699, + "learning_rate": 2.4690042151496174e-07, + "loss": 2.0065, + "step": 27962 + }, + { + "epoch": 0.93, + "grad_norm": 0.734314501285553, + "learning_rate": 2.466657468972e-07, + "loss": 1.9777, + "step": 27963 + }, + { + "epoch": 0.93, + "grad_norm": 0.7562270164489746, + "learning_rate": 2.46431182466893e-07, + "loss": 2.0512, + "step": 27964 + }, + { + "epoch": 0.93, + "grad_norm": 0.7201805710792542, + "learning_rate": 2.4619672822668974e-07, + "loss": 2.0894, + "step": 27965 + }, + { + "epoch": 0.93, + "grad_norm": 0.7354416251182556, + "learning_rate": 2.4596238417924025e-07, + "loss": 2.0006, + "step": 27966 + }, + { + "epoch": 0.93, + "grad_norm": 0.7386675477027893, + "learning_rate": 2.4572815032719133e-07, + "loss": 2.0823, + "step": 27967 + }, + { + "epoch": 0.93, + "grad_norm": 0.742964506149292, + "learning_rate": 2.4549402667318754e-07, + "loss": 2.0778, + "step": 27968 + }, + { + "epoch": 0.93, + "grad_norm": 0.7420713901519775, + "learning_rate": 2.452600132198779e-07, + "loss": 1.9728, + "step": 27969 + }, + { + "epoch": 0.93, + "grad_norm": 0.7666468620300293, + "learning_rate": 2.450261099699014e-07, + "loss": 2.0495, + "step": 27970 + }, + { + "epoch": 0.93, + "grad_norm": 0.7206969857215881, + "learning_rate": 2.447923169259048e-07, + "loss": 2.0392, + "step": 27971 + }, + { + "epoch": 0.93, + "grad_norm": 0.7331469058990479, + "learning_rate": 2.4455863409052816e-07, + "loss": 1.9956, + "step": 27972 + }, + { + "epoch": 0.93, + "grad_norm": 0.7426043748855591, + "learning_rate": 2.443250614664083e-07, + "loss": 2.1411, + "step": 27973 + }, + { + "epoch": 0.93, + "grad_norm": 0.7391018867492676, + "learning_rate": 2.440915990561876e-07, + "loss": 2.0752, + "step": 27974 + }, + { + "epoch": 0.93, + "grad_norm": 0.7588388919830322, + "learning_rate": 2.438582468625028e-07, + "loss": 2.0083, + "step": 27975 + }, + { + "epoch": 0.93, + "grad_norm": 0.7827141284942627, + "learning_rate": 2.436250048879907e-07, + "loss": 2.0645, + "step": 27976 + }, + { + "epoch": 0.93, + "grad_norm": 0.7294793725013733, + "learning_rate": 2.433918731352836e-07, + "loss": 1.9831, + "step": 27977 + }, + { + "epoch": 0.93, + "grad_norm": 0.7249341607093811, + "learning_rate": 2.4315885160701936e-07, + "loss": 2.0523, + "step": 27978 + }, + { + "epoch": 0.93, + "grad_norm": 0.7558111548423767, + "learning_rate": 2.4292594030582597e-07, + "loss": 2.0793, + "step": 27979 + }, + { + "epoch": 0.93, + "grad_norm": 0.7405421733856201, + "learning_rate": 2.4269313923433904e-07, + "loss": 2.044, + "step": 27980 + }, + { + "epoch": 0.93, + "grad_norm": 0.7508049011230469, + "learning_rate": 2.4246044839518534e-07, + "loss": 2.016, + "step": 27981 + }, + { + "epoch": 0.93, + "grad_norm": 0.7585733532905579, + "learning_rate": 2.4222786779099617e-07, + "loss": 2.0529, + "step": 27982 + }, + { + "epoch": 0.93, + "grad_norm": 0.7552363276481628, + "learning_rate": 2.41995397424396e-07, + "loss": 1.9681, + "step": 27983 + }, + { + "epoch": 0.93, + "grad_norm": 0.7727384567260742, + "learning_rate": 2.417630372980151e-07, + "loss": 2.0242, + "step": 27984 + }, + { + "epoch": 0.93, + "grad_norm": 0.7331233024597168, + "learning_rate": 2.415307874144768e-07, + "loss": 2.0114, + "step": 27985 + }, + { + "epoch": 0.93, + "grad_norm": 0.728410542011261, + "learning_rate": 2.4129864777640235e-07, + "loss": 2.0077, + "step": 27986 + }, + { + "epoch": 0.93, + "grad_norm": 0.7551946043968201, + "learning_rate": 2.410666183864174e-07, + "loss": 1.9697, + "step": 27987 + }, + { + "epoch": 0.93, + "grad_norm": 0.7394986748695374, + "learning_rate": 2.4083469924714443e-07, + "loss": 1.9924, + "step": 27988 + }, + { + "epoch": 0.93, + "grad_norm": 0.7358717918395996, + "learning_rate": 2.406028903612001e-07, + "loss": 2.0273, + "step": 27989 + }, + { + "epoch": 0.93, + "grad_norm": 0.7398594617843628, + "learning_rate": 2.403711917312046e-07, + "loss": 2.0204, + "step": 27990 + }, + { + "epoch": 0.93, + "grad_norm": 0.7531290054321289, + "learning_rate": 2.4013960335977584e-07, + "loss": 2.0382, + "step": 27991 + }, + { + "epoch": 0.93, + "grad_norm": 0.7616372108459473, + "learning_rate": 2.3990812524952946e-07, + "loss": 2.0877, + "step": 27992 + }, + { + "epoch": 0.93, + "grad_norm": 0.7439168691635132, + "learning_rate": 2.3967675740308226e-07, + "loss": 2.0348, + "step": 27993 + }, + { + "epoch": 0.93, + "grad_norm": 0.739776611328125, + "learning_rate": 2.3944549982304664e-07, + "loss": 2.0625, + "step": 27994 + }, + { + "epoch": 0.93, + "grad_norm": 0.7496808767318726, + "learning_rate": 2.3921435251203496e-07, + "loss": 1.993, + "step": 27995 + }, + { + "epoch": 0.93, + "grad_norm": 0.7755863666534424, + "learning_rate": 2.389833154726595e-07, + "loss": 2.0431, + "step": 27996 + }, + { + "epoch": 0.93, + "grad_norm": 0.7898247838020325, + "learning_rate": 2.3875238870753046e-07, + "loss": 2.1109, + "step": 27997 + }, + { + "epoch": 0.93, + "grad_norm": 0.7207484245300293, + "learning_rate": 2.385215722192558e-07, + "loss": 2.0052, + "step": 27998 + }, + { + "epoch": 0.93, + "grad_norm": 0.7522338628768921, + "learning_rate": 2.3829086601044327e-07, + "loss": 1.943, + "step": 27999 + }, + { + "epoch": 0.93, + "grad_norm": 0.744543731212616, + "learning_rate": 2.380602700837009e-07, + "loss": 2.1063, + "step": 28000 + }, + { + "epoch": 0.93, + "grad_norm": 0.7459607720375061, + "learning_rate": 2.3782978444163108e-07, + "loss": 2.0936, + "step": 28001 + }, + { + "epoch": 0.93, + "grad_norm": 0.7656667232513428, + "learning_rate": 2.3759940908683942e-07, + "loss": 2.0531, + "step": 28002 + }, + { + "epoch": 0.93, + "grad_norm": 0.760022759437561, + "learning_rate": 2.3736914402192834e-07, + "loss": 1.9323, + "step": 28003 + }, + { + "epoch": 0.93, + "grad_norm": 0.7677549719810486, + "learning_rate": 2.3713898924950018e-07, + "loss": 2.0752, + "step": 28004 + }, + { + "epoch": 0.93, + "grad_norm": 0.7869532704353333, + "learning_rate": 2.3690894477215288e-07, + "loss": 2.0282, + "step": 28005 + }, + { + "epoch": 0.93, + "grad_norm": 0.725181519985199, + "learning_rate": 2.3667901059248656e-07, + "loss": 2.0404, + "step": 28006 + }, + { + "epoch": 0.93, + "grad_norm": 0.7499891519546509, + "learning_rate": 2.3644918671310024e-07, + "loss": 1.9886, + "step": 28007 + }, + { + "epoch": 0.93, + "grad_norm": 0.7205195426940918, + "learning_rate": 2.3621947313658856e-07, + "loss": 1.9988, + "step": 28008 + }, + { + "epoch": 0.93, + "grad_norm": 0.7316867113113403, + "learning_rate": 2.3598986986554606e-07, + "loss": 2.0647, + "step": 28009 + }, + { + "epoch": 0.93, + "grad_norm": 0.7315390706062317, + "learning_rate": 2.3576037690256848e-07, + "loss": 1.9771, + "step": 28010 + }, + { + "epoch": 0.93, + "grad_norm": 0.7673647999763489, + "learning_rate": 2.3553099425024818e-07, + "loss": 2.0435, + "step": 28011 + }, + { + "epoch": 0.93, + "grad_norm": 0.7407047748565674, + "learning_rate": 2.3530172191117528e-07, + "loss": 1.9932, + "step": 28012 + }, + { + "epoch": 0.93, + "grad_norm": 0.732793390750885, + "learning_rate": 2.350725598879422e-07, + "loss": 2.0413, + "step": 28013 + }, + { + "epoch": 0.93, + "grad_norm": 0.771995484828949, + "learning_rate": 2.348435081831346e-07, + "loss": 2.0519, + "step": 28014 + }, + { + "epoch": 0.93, + "grad_norm": 0.7245350480079651, + "learning_rate": 2.346145667993427e-07, + "loss": 2.0432, + "step": 28015 + }, + { + "epoch": 0.93, + "grad_norm": 0.7007312774658203, + "learning_rate": 2.3438573573915436e-07, + "loss": 1.9817, + "step": 28016 + }, + { + "epoch": 0.93, + "grad_norm": 0.7540026903152466, + "learning_rate": 2.3415701500515086e-07, + "loss": 2.0353, + "step": 28017 + }, + { + "epoch": 0.93, + "grad_norm": 0.7522410154342651, + "learning_rate": 2.3392840459991795e-07, + "loss": 2.077, + "step": 28018 + }, + { + "epoch": 0.93, + "grad_norm": 0.7347499132156372, + "learning_rate": 2.3369990452603907e-07, + "loss": 2.0153, + "step": 28019 + }, + { + "epoch": 0.93, + "grad_norm": 0.735512375831604, + "learning_rate": 2.3347151478609554e-07, + "loss": 2.0494, + "step": 28020 + }, + { + "epoch": 0.93, + "grad_norm": 0.802232027053833, + "learning_rate": 2.3324323538266524e-07, + "loss": 2.09, + "step": 28021 + }, + { + "epoch": 0.93, + "grad_norm": 0.7380008101463318, + "learning_rate": 2.330150663183295e-07, + "loss": 2.0469, + "step": 28022 + }, + { + "epoch": 0.93, + "grad_norm": 0.7257122993469238, + "learning_rate": 2.327870075956673e-07, + "loss": 2.0794, + "step": 28023 + }, + { + "epoch": 0.93, + "grad_norm": 0.7509109377861023, + "learning_rate": 2.3255905921725219e-07, + "loss": 2.1458, + "step": 28024 + }, + { + "epoch": 0.93, + "grad_norm": 0.7736875414848328, + "learning_rate": 2.3233122118565988e-07, + "loss": 1.9821, + "step": 28025 + }, + { + "epoch": 0.93, + "grad_norm": 0.7294339537620544, + "learning_rate": 2.3210349350346607e-07, + "loss": 2.044, + "step": 28026 + }, + { + "epoch": 0.93, + "grad_norm": 0.7399587631225586, + "learning_rate": 2.3187587617324204e-07, + "loss": 2.0529, + "step": 28027 + }, + { + "epoch": 0.93, + "grad_norm": 0.7400123476982117, + "learning_rate": 2.3164836919755905e-07, + "loss": 2.0704, + "step": 28028 + }, + { + "epoch": 0.93, + "grad_norm": 0.7414452433586121, + "learning_rate": 2.3142097257898955e-07, + "loss": 2.0573, + "step": 28029 + }, + { + "epoch": 0.93, + "grad_norm": 0.734362781047821, + "learning_rate": 2.3119368632010031e-07, + "loss": 2.0112, + "step": 28030 + }, + { + "epoch": 0.93, + "grad_norm": 0.7472421526908875, + "learning_rate": 2.309665104234582e-07, + "loss": 2.0708, + "step": 28031 + }, + { + "epoch": 0.93, + "grad_norm": 0.7583260536193848, + "learning_rate": 2.3073944489163337e-07, + "loss": 2.0596, + "step": 28032 + }, + { + "epoch": 0.93, + "grad_norm": 0.7616268396377563, + "learning_rate": 2.305124897271882e-07, + "loss": 2.0421, + "step": 28033 + }, + { + "epoch": 0.93, + "grad_norm": 0.7394059896469116, + "learning_rate": 2.3028564493268733e-07, + "loss": 2.007, + "step": 28034 + }, + { + "epoch": 0.93, + "grad_norm": 0.7329918146133423, + "learning_rate": 2.300589105106943e-07, + "loss": 2.0464, + "step": 28035 + }, + { + "epoch": 0.93, + "grad_norm": 0.734288215637207, + "learning_rate": 2.2983228646376808e-07, + "loss": 1.9856, + "step": 28036 + }, + { + "epoch": 0.93, + "grad_norm": 0.7182545065879822, + "learning_rate": 2.2960577279447116e-07, + "loss": 2.0219, + "step": 28037 + }, + { + "epoch": 0.93, + "grad_norm": 0.7408545613288879, + "learning_rate": 2.2937936950536365e-07, + "loss": 2.0557, + "step": 28038 + }, + { + "epoch": 0.93, + "grad_norm": 0.7295119762420654, + "learning_rate": 2.2915307659900243e-07, + "loss": 2.0209, + "step": 28039 + }, + { + "epoch": 0.93, + "grad_norm": 0.7122246026992798, + "learning_rate": 2.28926894077941e-07, + "loss": 1.9999, + "step": 28040 + }, + { + "epoch": 0.93, + "grad_norm": 0.737155556678772, + "learning_rate": 2.2870082194473954e-07, + "loss": 2.0253, + "step": 28041 + }, + { + "epoch": 0.93, + "grad_norm": 0.7549123167991638, + "learning_rate": 2.284748602019482e-07, + "loss": 2.0351, + "step": 28042 + }, + { + "epoch": 0.93, + "grad_norm": 0.7305973172187805, + "learning_rate": 2.2824900885212165e-07, + "loss": 1.9916, + "step": 28043 + }, + { + "epoch": 0.93, + "grad_norm": 0.7417933940887451, + "learning_rate": 2.2802326789781005e-07, + "loss": 2.0495, + "step": 28044 + }, + { + "epoch": 0.93, + "grad_norm": 0.7741519212722778, + "learning_rate": 2.277976373415658e-07, + "loss": 2.0991, + "step": 28045 + }, + { + "epoch": 0.93, + "grad_norm": 0.7422214150428772, + "learning_rate": 2.2757211718593686e-07, + "loss": 2.0584, + "step": 28046 + }, + { + "epoch": 0.93, + "grad_norm": 0.7591766119003296, + "learning_rate": 2.273467074334701e-07, + "loss": 2.0612, + "step": 28047 + }, + { + "epoch": 0.93, + "grad_norm": 0.7199655771255493, + "learning_rate": 2.2712140808671345e-07, + "loss": 2.1208, + "step": 28048 + }, + { + "epoch": 0.93, + "grad_norm": 0.7292852997779846, + "learning_rate": 2.2689621914821157e-07, + "loss": 2.0082, + "step": 28049 + }, + { + "epoch": 0.93, + "grad_norm": 0.7759328484535217, + "learning_rate": 2.26671140620508e-07, + "loss": 1.97, + "step": 28050 + }, + { + "epoch": 0.93, + "grad_norm": 0.7437945008277893, + "learning_rate": 2.2644617250614732e-07, + "loss": 2.0394, + "step": 28051 + }, + { + "epoch": 0.93, + "grad_norm": 0.7486053705215454, + "learning_rate": 2.2622131480766974e-07, + "loss": 2.0467, + "step": 28052 + }, + { + "epoch": 0.93, + "grad_norm": 0.734088659286499, + "learning_rate": 2.2599656752761433e-07, + "loss": 2.0934, + "step": 28053 + }, + { + "epoch": 0.93, + "grad_norm": 0.7579296827316284, + "learning_rate": 2.2577193066852242e-07, + "loss": 2.0909, + "step": 28054 + }, + { + "epoch": 0.93, + "grad_norm": 0.7074944972991943, + "learning_rate": 2.2554740423293198e-07, + "loss": 2.0442, + "step": 28055 + }, + { + "epoch": 0.93, + "grad_norm": 0.7578952312469482, + "learning_rate": 2.2532298822337762e-07, + "loss": 2.0137, + "step": 28056 + }, + { + "epoch": 0.93, + "grad_norm": 0.7697674632072449, + "learning_rate": 2.250986826423962e-07, + "loss": 2.0474, + "step": 28057 + }, + { + "epoch": 0.93, + "grad_norm": 0.7605390548706055, + "learning_rate": 2.2487448749252017e-07, + "loss": 2.0712, + "step": 28058 + }, + { + "epoch": 0.93, + "grad_norm": 0.7327914237976074, + "learning_rate": 2.2465040277628303e-07, + "loss": 2.0572, + "step": 28059 + }, + { + "epoch": 0.93, + "grad_norm": 0.7244095206260681, + "learning_rate": 2.244264284962183e-07, + "loss": 2.0356, + "step": 28060 + }, + { + "epoch": 0.93, + "grad_norm": 0.7222782969474792, + "learning_rate": 2.2420256465485403e-07, + "loss": 2.0454, + "step": 28061 + }, + { + "epoch": 0.93, + "grad_norm": 0.7518909573554993, + "learning_rate": 2.2397881125471922e-07, + "loss": 2.0339, + "step": 28062 + }, + { + "epoch": 0.93, + "grad_norm": 0.7530210614204407, + "learning_rate": 2.237551682983441e-07, + "loss": 1.9957, + "step": 28063 + }, + { + "epoch": 0.93, + "grad_norm": 0.7931407690048218, + "learning_rate": 2.2353163578825333e-07, + "loss": 2.0776, + "step": 28064 + }, + { + "epoch": 0.93, + "grad_norm": 0.7547805309295654, + "learning_rate": 2.2330821372697154e-07, + "loss": 2.0355, + "step": 28065 + }, + { + "epoch": 0.93, + "grad_norm": 0.761118471622467, + "learning_rate": 2.2308490211702338e-07, + "loss": 2.0813, + "step": 28066 + }, + { + "epoch": 0.93, + "grad_norm": 0.750560462474823, + "learning_rate": 2.2286170096093352e-07, + "loss": 2.0455, + "step": 28067 + }, + { + "epoch": 0.93, + "grad_norm": 0.7362675070762634, + "learning_rate": 2.2263861026122213e-07, + "loss": 2.0452, + "step": 28068 + }, + { + "epoch": 0.93, + "grad_norm": 0.738545835018158, + "learning_rate": 2.2241563002040945e-07, + "loss": 2.0323, + "step": 28069 + }, + { + "epoch": 0.93, + "grad_norm": 0.7388875484466553, + "learning_rate": 2.2219276024101456e-07, + "loss": 1.9414, + "step": 28070 + }, + { + "epoch": 0.93, + "grad_norm": 0.7838796973228455, + "learning_rate": 2.2197000092555544e-07, + "loss": 2.1119, + "step": 28071 + }, + { + "epoch": 0.93, + "grad_norm": 0.7481625080108643, + "learning_rate": 2.2174735207654895e-07, + "loss": 2.0112, + "step": 28072 + }, + { + "epoch": 0.93, + "grad_norm": 0.7383284568786621, + "learning_rate": 2.215248136965109e-07, + "loss": 2.0938, + "step": 28073 + }, + { + "epoch": 0.93, + "grad_norm": 0.7471939921379089, + "learning_rate": 2.2130238578795372e-07, + "loss": 2.0967, + "step": 28074 + }, + { + "epoch": 0.93, + "grad_norm": 0.7329567670822144, + "learning_rate": 2.210800683533909e-07, + "loss": 1.9997, + "step": 28075 + }, + { + "epoch": 0.93, + "grad_norm": 0.7704600691795349, + "learning_rate": 2.2085786139533606e-07, + "loss": 2.0782, + "step": 28076 + }, + { + "epoch": 0.93, + "grad_norm": 0.7359515428543091, + "learning_rate": 2.2063576491629712e-07, + "loss": 2.0162, + "step": 28077 + }, + { + "epoch": 0.93, + "grad_norm": 0.7156592011451721, + "learning_rate": 2.2041377891878436e-07, + "loss": 1.9805, + "step": 28078 + }, + { + "epoch": 0.93, + "grad_norm": 0.7603126168251038, + "learning_rate": 2.201919034053046e-07, + "loss": 2.0579, + "step": 28079 + }, + { + "epoch": 0.93, + "grad_norm": 0.7617754340171814, + "learning_rate": 2.1997013837836589e-07, + "loss": 2.0822, + "step": 28080 + }, + { + "epoch": 0.93, + "grad_norm": 0.7407201528549194, + "learning_rate": 2.1974848384047177e-07, + "loss": 1.9292, + "step": 28081 + }, + { + "epoch": 0.93, + "grad_norm": 0.7522522807121277, + "learning_rate": 2.1952693979412798e-07, + "loss": 2.049, + "step": 28082 + }, + { + "epoch": 0.93, + "grad_norm": 0.7404746413230896, + "learning_rate": 2.193055062418381e-07, + "loss": 2.0118, + "step": 28083 + }, + { + "epoch": 0.93, + "grad_norm": 0.7674007415771484, + "learning_rate": 2.1908418318610125e-07, + "loss": 2.0128, + "step": 28084 + }, + { + "epoch": 0.93, + "grad_norm": 0.76450115442276, + "learning_rate": 2.1886297062941985e-07, + "loss": 2.1129, + "step": 28085 + }, + { + "epoch": 0.93, + "grad_norm": 0.7293853759765625, + "learning_rate": 2.1864186857429303e-07, + "loss": 2.0713, + "step": 28086 + }, + { + "epoch": 0.93, + "grad_norm": 0.7815470099449158, + "learning_rate": 2.1842087702321545e-07, + "loss": 2.0846, + "step": 28087 + }, + { + "epoch": 0.93, + "grad_norm": 0.7278974652290344, + "learning_rate": 2.1819999597868735e-07, + "loss": 2.1167, + "step": 28088 + }, + { + "epoch": 0.93, + "grad_norm": 0.7420892119407654, + "learning_rate": 2.179792254432045e-07, + "loss": 2.006, + "step": 28089 + }, + { + "epoch": 0.93, + "grad_norm": 0.7969798445701599, + "learning_rate": 2.177585654192571e-07, + "loss": 2.0815, + "step": 28090 + }, + { + "epoch": 0.93, + "grad_norm": 0.755756676197052, + "learning_rate": 2.175380159093421e-07, + "loss": 1.9916, + "step": 28091 + }, + { + "epoch": 0.93, + "grad_norm": 0.7682430148124695, + "learning_rate": 2.1731757691594968e-07, + "loss": 2.0873, + "step": 28092 + }, + { + "epoch": 0.93, + "grad_norm": 0.7515072226524353, + "learning_rate": 2.17097248441569e-07, + "loss": 2.0835, + "step": 28093 + }, + { + "epoch": 0.93, + "grad_norm": 0.7316335439682007, + "learning_rate": 2.1687703048869025e-07, + "loss": 2.0479, + "step": 28094 + }, + { + "epoch": 0.93, + "grad_norm": 0.7294155359268188, + "learning_rate": 2.166569230598037e-07, + "loss": 2.0195, + "step": 28095 + }, + { + "epoch": 0.93, + "grad_norm": 0.7303521633148193, + "learning_rate": 2.1643692615739176e-07, + "loss": 2.0483, + "step": 28096 + }, + { + "epoch": 0.93, + "grad_norm": 0.7538743615150452, + "learning_rate": 2.1621703978394137e-07, + "loss": 2.1011, + "step": 28097 + }, + { + "epoch": 0.93, + "grad_norm": 0.7781128883361816, + "learning_rate": 2.1599726394193722e-07, + "loss": 2.0983, + "step": 28098 + }, + { + "epoch": 0.93, + "grad_norm": 0.7532880306243896, + "learning_rate": 2.157775986338617e-07, + "loss": 2.0182, + "step": 28099 + }, + { + "epoch": 0.93, + "grad_norm": 0.7647575736045837, + "learning_rate": 2.1555804386219735e-07, + "loss": 2.0347, + "step": 28100 + }, + { + "epoch": 0.93, + "grad_norm": 0.7569502592086792, + "learning_rate": 2.1533859962942438e-07, + "loss": 2.0564, + "step": 28101 + }, + { + "epoch": 0.93, + "grad_norm": 0.7714378833770752, + "learning_rate": 2.151192659380208e-07, + "loss": 2.1549, + "step": 28102 + }, + { + "epoch": 0.93, + "grad_norm": 0.7836398482322693, + "learning_rate": 2.149000427904646e-07, + "loss": 2.0741, + "step": 28103 + }, + { + "epoch": 0.94, + "grad_norm": 0.7313203811645508, + "learning_rate": 2.1468093018923497e-07, + "loss": 1.9401, + "step": 28104 + }, + { + "epoch": 0.94, + "grad_norm": 0.7311463356018066, + "learning_rate": 2.1446192813680433e-07, + "loss": 2.0176, + "step": 28105 + }, + { + "epoch": 0.94, + "grad_norm": 0.7424915432929993, + "learning_rate": 2.1424303663564737e-07, + "loss": 2.0127, + "step": 28106 + }, + { + "epoch": 0.94, + "grad_norm": 0.7267068028450012, + "learning_rate": 2.140242556882377e-07, + "loss": 2.0301, + "step": 28107 + }, + { + "epoch": 0.94, + "grad_norm": 0.7418103218078613, + "learning_rate": 2.1380558529704888e-07, + "loss": 1.996, + "step": 28108 + }, + { + "epoch": 0.94, + "grad_norm": 0.8219940662384033, + "learning_rate": 2.1358702546454779e-07, + "loss": 2.0073, + "step": 28109 + }, + { + "epoch": 0.94, + "grad_norm": 0.732789933681488, + "learning_rate": 2.1336857619320362e-07, + "loss": 2.1028, + "step": 28110 + }, + { + "epoch": 0.94, + "grad_norm": 0.748769998550415, + "learning_rate": 2.131502374854877e-07, + "loss": 2.0909, + "step": 28111 + }, + { + "epoch": 0.94, + "grad_norm": 0.7236852645874023, + "learning_rate": 2.129320093438636e-07, + "loss": 1.9705, + "step": 28112 + }, + { + "epoch": 0.94, + "grad_norm": 0.7357243895530701, + "learning_rate": 2.1271389177079938e-07, + "loss": 2.0998, + "step": 28113 + }, + { + "epoch": 0.94, + "grad_norm": 0.743503212928772, + "learning_rate": 2.124958847687575e-07, + "loss": 2.0692, + "step": 28114 + }, + { + "epoch": 0.94, + "grad_norm": 0.7469790577888489, + "learning_rate": 2.122779883401993e-07, + "loss": 2.1115, + "step": 28115 + }, + { + "epoch": 0.94, + "grad_norm": 0.7310565710067749, + "learning_rate": 2.120602024875895e-07, + "loss": 2.0823, + "step": 28116 + }, + { + "epoch": 0.94, + "grad_norm": 0.7582325339317322, + "learning_rate": 2.1184252721338838e-07, + "loss": 2.0617, + "step": 28117 + }, + { + "epoch": 0.94, + "grad_norm": 0.7520031929016113, + "learning_rate": 2.1162496252005172e-07, + "loss": 2.0341, + "step": 28118 + }, + { + "epoch": 0.94, + "grad_norm": 0.7410632967948914, + "learning_rate": 2.1140750841003975e-07, + "loss": 2.0661, + "step": 28119 + }, + { + "epoch": 0.94, + "grad_norm": 0.7370291352272034, + "learning_rate": 2.1119016488581058e-07, + "loss": 1.9691, + "step": 28120 + }, + { + "epoch": 0.94, + "grad_norm": 0.713188111782074, + "learning_rate": 2.1097293194981662e-07, + "loss": 2.0151, + "step": 28121 + }, + { + "epoch": 0.94, + "grad_norm": 0.7436332106590271, + "learning_rate": 2.107558096045148e-07, + "loss": 2.0221, + "step": 28122 + }, + { + "epoch": 0.94, + "grad_norm": 0.7481253147125244, + "learning_rate": 2.1053879785235653e-07, + "loss": 2.0088, + "step": 28123 + }, + { + "epoch": 0.94, + "grad_norm": 0.7673843502998352, + "learning_rate": 2.1032189669579317e-07, + "loss": 2.0916, + "step": 28124 + }, + { + "epoch": 0.94, + "grad_norm": 0.7568921446800232, + "learning_rate": 2.101051061372761e-07, + "loss": 2.0285, + "step": 28125 + }, + { + "epoch": 0.94, + "grad_norm": 0.7405672073364258, + "learning_rate": 2.0988842617925442e-07, + "loss": 1.9999, + "step": 28126 + }, + { + "epoch": 0.94, + "grad_norm": 0.7252838611602783, + "learning_rate": 2.0967185682417625e-07, + "loss": 1.9852, + "step": 28127 + }, + { + "epoch": 0.94, + "grad_norm": 0.7522289156913757, + "learning_rate": 2.0945539807448623e-07, + "loss": 2.1143, + "step": 28128 + }, + { + "epoch": 0.94, + "grad_norm": 0.7249211668968201, + "learning_rate": 2.0923904993263132e-07, + "loss": 2.0499, + "step": 28129 + }, + { + "epoch": 0.94, + "grad_norm": 0.755709707736969, + "learning_rate": 2.0902281240105627e-07, + "loss": 2.08, + "step": 28130 + }, + { + "epoch": 0.94, + "grad_norm": 0.737132728099823, + "learning_rate": 2.0880668548220463e-07, + "loss": 2.056, + "step": 28131 + }, + { + "epoch": 0.94, + "grad_norm": 0.7643870711326599, + "learning_rate": 2.0859066917851445e-07, + "loss": 2.0048, + "step": 28132 + }, + { + "epoch": 0.94, + "grad_norm": 0.739196240901947, + "learning_rate": 2.0837476349243046e-07, + "loss": 2.0394, + "step": 28133 + }, + { + "epoch": 0.94, + "grad_norm": 0.7299288511276245, + "learning_rate": 2.081589684263885e-07, + "loss": 1.9996, + "step": 28134 + }, + { + "epoch": 0.94, + "grad_norm": 0.7216627597808838, + "learning_rate": 2.0794328398282992e-07, + "loss": 2.0902, + "step": 28135 + }, + { + "epoch": 0.94, + "grad_norm": 0.7541171312332153, + "learning_rate": 2.0772771016418836e-07, + "loss": 2.0682, + "step": 28136 + }, + { + "epoch": 0.94, + "grad_norm": 0.741381049156189, + "learning_rate": 2.075122469728996e-07, + "loss": 1.9696, + "step": 28137 + }, + { + "epoch": 0.94, + "grad_norm": 0.7600972652435303, + "learning_rate": 2.0729689441139844e-07, + "loss": 2.0442, + "step": 28138 + }, + { + "epoch": 0.94, + "grad_norm": 0.766302227973938, + "learning_rate": 2.0708165248211843e-07, + "loss": 2.0074, + "step": 28139 + }, + { + "epoch": 0.94, + "grad_norm": 0.8163720965385437, + "learning_rate": 2.0686652118749207e-07, + "loss": 2.056, + "step": 28140 + }, + { + "epoch": 0.94, + "grad_norm": 0.744296669960022, + "learning_rate": 2.0665150052994632e-07, + "loss": 2.0521, + "step": 28141 + }, + { + "epoch": 0.94, + "grad_norm": 0.7360948920249939, + "learning_rate": 2.0643659051191366e-07, + "loss": 2.0863, + "step": 28142 + }, + { + "epoch": 0.94, + "grad_norm": 0.7409694194793701, + "learning_rate": 2.0622179113581997e-07, + "loss": 1.9769, + "step": 28143 + }, + { + "epoch": 0.94, + "grad_norm": 0.7717030048370361, + "learning_rate": 2.0600710240409327e-07, + "loss": 2.0539, + "step": 28144 + }, + { + "epoch": 0.94, + "grad_norm": 0.7367424368858337, + "learning_rate": 2.057925243191583e-07, + "loss": 2.0567, + "step": 28145 + }, + { + "epoch": 0.94, + "grad_norm": 0.7140920162200928, + "learning_rate": 2.0557805688343978e-07, + "loss": 1.9633, + "step": 28146 + }, + { + "epoch": 0.94, + "grad_norm": 0.7554545998573303, + "learning_rate": 2.0536370009935914e-07, + "loss": 2.0843, + "step": 28147 + }, + { + "epoch": 0.94, + "grad_norm": 0.7350105047225952, + "learning_rate": 2.0514945396933993e-07, + "loss": 2.007, + "step": 28148 + }, + { + "epoch": 0.94, + "grad_norm": 0.7714079022407532, + "learning_rate": 2.049353184958025e-07, + "loss": 2.0787, + "step": 28149 + }, + { + "epoch": 0.94, + "grad_norm": 0.7355878353118896, + "learning_rate": 2.047212936811649e-07, + "loss": 2.0647, + "step": 28150 + }, + { + "epoch": 0.94, + "grad_norm": 0.7433454394340515, + "learning_rate": 2.0450737952784517e-07, + "loss": 2.0064, + "step": 28151 + }, + { + "epoch": 0.94, + "grad_norm": 0.7268030643463135, + "learning_rate": 2.0429357603826028e-07, + "loss": 2.0534, + "step": 28152 + }, + { + "epoch": 0.94, + "grad_norm": 0.7110949158668518, + "learning_rate": 2.0407988321482718e-07, + "loss": 2.0149, + "step": 28153 + }, + { + "epoch": 0.94, + "grad_norm": 0.7422033548355103, + "learning_rate": 2.0386630105995618e-07, + "loss": 2.0284, + "step": 28154 + }, + { + "epoch": 0.94, + "grad_norm": 0.7450889945030212, + "learning_rate": 2.0365282957606424e-07, + "loss": 2.0824, + "step": 28155 + }, + { + "epoch": 0.94, + "grad_norm": 0.7447482347488403, + "learning_rate": 2.0343946876556164e-07, + "loss": 2.0809, + "step": 28156 + }, + { + "epoch": 0.94, + "grad_norm": 0.753074586391449, + "learning_rate": 2.0322621863085756e-07, + "loss": 2.0503, + "step": 28157 + }, + { + "epoch": 0.94, + "grad_norm": 0.7411597371101379, + "learning_rate": 2.0301307917436341e-07, + "loss": 2.0666, + "step": 28158 + }, + { + "epoch": 0.94, + "grad_norm": 0.7490658760070801, + "learning_rate": 2.0280005039848505e-07, + "loss": 2.0828, + "step": 28159 + }, + { + "epoch": 0.94, + "grad_norm": 0.7272852659225464, + "learning_rate": 2.0258713230562943e-07, + "loss": 2.039, + "step": 28160 + }, + { + "epoch": 0.94, + "grad_norm": 0.7387398481369019, + "learning_rate": 2.0237432489820352e-07, + "loss": 2.0435, + "step": 28161 + }, + { + "epoch": 0.94, + "grad_norm": 0.7438418865203857, + "learning_rate": 2.0216162817860985e-07, + "loss": 2.0999, + "step": 28162 + }, + { + "epoch": 0.94, + "grad_norm": 0.7278345823287964, + "learning_rate": 2.0194904214925205e-07, + "loss": 2.0894, + "step": 28163 + }, + { + "epoch": 0.94, + "grad_norm": 0.7639041543006897, + "learning_rate": 2.0173656681253262e-07, + "loss": 2.0911, + "step": 28164 + }, + { + "epoch": 0.94, + "grad_norm": 0.7376741766929626, + "learning_rate": 2.0152420217084966e-07, + "loss": 2.0474, + "step": 28165 + }, + { + "epoch": 0.94, + "grad_norm": 0.7775146961212158, + "learning_rate": 2.013119482266057e-07, + "loss": 2.0563, + "step": 28166 + }, + { + "epoch": 0.94, + "grad_norm": 0.7368758320808411, + "learning_rate": 2.010998049821955e-07, + "loss": 2.0242, + "step": 28167 + }, + { + "epoch": 0.94, + "grad_norm": 0.7319049835205078, + "learning_rate": 2.008877724400171e-07, + "loss": 2.0981, + "step": 28168 + }, + { + "epoch": 0.94, + "grad_norm": 0.7465494275093079, + "learning_rate": 2.0067585060246531e-07, + "loss": 2.0516, + "step": 28169 + }, + { + "epoch": 0.94, + "grad_norm": 0.7248865365982056, + "learning_rate": 2.0046403947193594e-07, + "loss": 2.0978, + "step": 28170 + }, + { + "epoch": 0.94, + "grad_norm": 0.7468136548995972, + "learning_rate": 2.002523390508204e-07, + "loss": 2.0433, + "step": 28171 + }, + { + "epoch": 0.94, + "grad_norm": 0.7477654814720154, + "learning_rate": 2.0004074934151019e-07, + "loss": 2.0436, + "step": 28172 + }, + { + "epoch": 0.94, + "grad_norm": 0.7288998365402222, + "learning_rate": 1.9982927034639665e-07, + "loss": 2.0386, + "step": 28173 + }, + { + "epoch": 0.94, + "grad_norm": 0.7658536434173584, + "learning_rate": 1.9961790206786901e-07, + "loss": 2.1652, + "step": 28174 + }, + { + "epoch": 0.94, + "grad_norm": 0.7352390885353088, + "learning_rate": 1.9940664450831425e-07, + "loss": 2.043, + "step": 28175 + }, + { + "epoch": 0.94, + "grad_norm": 0.7358065247535706, + "learning_rate": 1.9919549767011938e-07, + "loss": 2.0835, + "step": 28176 + }, + { + "epoch": 0.94, + "grad_norm": 0.7275101542472839, + "learning_rate": 1.989844615556702e-07, + "loss": 2.0853, + "step": 28177 + }, + { + "epoch": 0.94, + "grad_norm": 0.802442193031311, + "learning_rate": 1.987735361673493e-07, + "loss": 2.0906, + "step": 28178 + }, + { + "epoch": 0.94, + "grad_norm": 0.7385225296020508, + "learning_rate": 1.985627215075425e-07, + "loss": 2.0197, + "step": 28179 + }, + { + "epoch": 0.94, + "grad_norm": 0.789284348487854, + "learning_rate": 1.983520175786302e-07, + "loss": 1.9989, + "step": 28180 + }, + { + "epoch": 0.94, + "grad_norm": 0.7460114359855652, + "learning_rate": 1.9814142438299156e-07, + "loss": 2.0775, + "step": 28181 + }, + { + "epoch": 0.94, + "grad_norm": 0.7618851661682129, + "learning_rate": 1.9793094192300577e-07, + "loss": 2.0995, + "step": 28182 + }, + { + "epoch": 0.94, + "grad_norm": 0.7256976366043091, + "learning_rate": 1.9772057020105317e-07, + "loss": 2.0404, + "step": 28183 + }, + { + "epoch": 0.94, + "grad_norm": 0.7586345076560974, + "learning_rate": 1.9751030921950854e-07, + "loss": 2.0076, + "step": 28184 + }, + { + "epoch": 0.94, + "grad_norm": 0.7642638683319092, + "learning_rate": 1.9730015898074662e-07, + "loss": 2.0582, + "step": 28185 + }, + { + "epoch": 0.94, + "grad_norm": 0.7528810501098633, + "learning_rate": 1.970901194871444e-07, + "loss": 2.0401, + "step": 28186 + }, + { + "epoch": 0.94, + "grad_norm": 0.7620170712471008, + "learning_rate": 1.968801907410711e-07, + "loss": 2.0628, + "step": 28187 + }, + { + "epoch": 0.94, + "grad_norm": 0.7333848476409912, + "learning_rate": 1.9667037274490153e-07, + "loss": 2.0514, + "step": 28188 + }, + { + "epoch": 0.94, + "grad_norm": 0.7701336145401001, + "learning_rate": 1.9646066550100374e-07, + "loss": 2.0952, + "step": 28189 + }, + { + "epoch": 0.94, + "grad_norm": 0.7643424868583679, + "learning_rate": 1.9625106901174918e-07, + "loss": 1.9422, + "step": 28190 + }, + { + "epoch": 0.94, + "grad_norm": 0.7378793954849243, + "learning_rate": 1.9604158327950263e-07, + "loss": 2.0828, + "step": 28191 + }, + { + "epoch": 0.94, + "grad_norm": 0.7595254182815552, + "learning_rate": 1.9583220830663441e-07, + "loss": 2.0135, + "step": 28192 + }, + { + "epoch": 0.94, + "grad_norm": 0.7632750868797302, + "learning_rate": 1.9562294409550708e-07, + "loss": 2.0722, + "step": 28193 + }, + { + "epoch": 0.94, + "grad_norm": 0.7199559807777405, + "learning_rate": 1.9541379064848542e-07, + "loss": 1.9962, + "step": 28194 + }, + { + "epoch": 0.94, + "grad_norm": 0.7338096499443054, + "learning_rate": 1.952047479679331e-07, + "loss": 2.0311, + "step": 28195 + }, + { + "epoch": 0.94, + "grad_norm": 0.756389319896698, + "learning_rate": 1.9499581605621266e-07, + "loss": 2.0938, + "step": 28196 + }, + { + "epoch": 0.94, + "grad_norm": 0.757422149181366, + "learning_rate": 1.947869949156822e-07, + "loss": 2.021, + "step": 28197 + }, + { + "epoch": 0.94, + "grad_norm": 0.752947986125946, + "learning_rate": 1.94578284548701e-07, + "loss": 2.0125, + "step": 28198 + }, + { + "epoch": 0.94, + "grad_norm": 0.7873149514198303, + "learning_rate": 1.943696849576293e-07, + "loss": 2.0101, + "step": 28199 + }, + { + "epoch": 0.94, + "grad_norm": 0.726495623588562, + "learning_rate": 1.9416119614482089e-07, + "loss": 2.0275, + "step": 28200 + }, + { + "epoch": 0.94, + "grad_norm": 0.7671146392822266, + "learning_rate": 1.9395281811263377e-07, + "loss": 2.0783, + "step": 28201 + }, + { + "epoch": 0.94, + "grad_norm": 0.7347133755683899, + "learning_rate": 1.937445508634206e-07, + "loss": 1.9786, + "step": 28202 + }, + { + "epoch": 0.94, + "grad_norm": 0.7001929879188538, + "learning_rate": 1.9353639439953387e-07, + "loss": 2.0373, + "step": 28203 + }, + { + "epoch": 0.94, + "grad_norm": 0.7235764265060425, + "learning_rate": 1.9332834872332507e-07, + "loss": 2.0821, + "step": 28204 + }, + { + "epoch": 0.94, + "grad_norm": 0.7280557751655579, + "learning_rate": 1.9312041383714674e-07, + "loss": 2.0605, + "step": 28205 + }, + { + "epoch": 0.94, + "grad_norm": 0.7409875392913818, + "learning_rate": 1.9291258974334592e-07, + "loss": 1.9524, + "step": 28206 + }, + { + "epoch": 0.94, + "grad_norm": 0.7390321493148804, + "learning_rate": 1.9270487644427072e-07, + "loss": 1.9937, + "step": 28207 + }, + { + "epoch": 0.94, + "grad_norm": 0.7340453863143921, + "learning_rate": 1.9249727394226925e-07, + "loss": 2.0786, + "step": 28208 + }, + { + "epoch": 0.94, + "grad_norm": 0.7230122089385986, + "learning_rate": 1.922897822396852e-07, + "loss": 1.9847, + "step": 28209 + }, + { + "epoch": 0.94, + "grad_norm": 0.7584959268569946, + "learning_rate": 1.9208240133886335e-07, + "loss": 1.9893, + "step": 28210 + }, + { + "epoch": 0.94, + "grad_norm": 0.7738015651702881, + "learning_rate": 1.918751312421463e-07, + "loss": 2.0216, + "step": 28211 + }, + { + "epoch": 0.94, + "grad_norm": 0.7722808718681335, + "learning_rate": 1.916679719518766e-07, + "loss": 2.0461, + "step": 28212 + }, + { + "epoch": 0.94, + "grad_norm": 0.7201647162437439, + "learning_rate": 1.9146092347039346e-07, + "loss": 2.0471, + "step": 28213 + }, + { + "epoch": 0.94, + "grad_norm": 0.7485525012016296, + "learning_rate": 1.9125398580003617e-07, + "loss": 2.0726, + "step": 28214 + }, + { + "epoch": 0.94, + "grad_norm": 0.740333080291748, + "learning_rate": 1.9104715894314397e-07, + "loss": 2.0727, + "step": 28215 + }, + { + "epoch": 0.94, + "grad_norm": 0.7636852264404297, + "learning_rate": 1.9084044290205162e-07, + "loss": 1.9932, + "step": 28216 + }, + { + "epoch": 0.94, + "grad_norm": 0.7347451448440552, + "learning_rate": 1.9063383767909392e-07, + "loss": 1.9765, + "step": 28217 + }, + { + "epoch": 0.94, + "grad_norm": 0.7465278506278992, + "learning_rate": 1.9042734327660794e-07, + "loss": 2.0626, + "step": 28218 + }, + { + "epoch": 0.94, + "grad_norm": 0.7537314891815186, + "learning_rate": 1.902209596969251e-07, + "loss": 2.0191, + "step": 28219 + }, + { + "epoch": 0.94, + "grad_norm": 0.7364344000816345, + "learning_rate": 1.900146869423758e-07, + "loss": 2.0537, + "step": 28220 + }, + { + "epoch": 0.94, + "grad_norm": 0.7271739840507507, + "learning_rate": 1.8980852501529146e-07, + "loss": 2.0777, + "step": 28221 + }, + { + "epoch": 0.94, + "grad_norm": 0.7679404020309448, + "learning_rate": 1.8960247391800134e-07, + "loss": 2.0637, + "step": 28222 + }, + { + "epoch": 0.94, + "grad_norm": 0.7484942078590393, + "learning_rate": 1.893965336528336e-07, + "loss": 2.0338, + "step": 28223 + }, + { + "epoch": 0.94, + "grad_norm": 0.7485313415527344, + "learning_rate": 1.8919070422211306e-07, + "loss": 2.042, + "step": 28224 + }, + { + "epoch": 0.94, + "grad_norm": 0.7216722965240479, + "learning_rate": 1.889849856281667e-07, + "loss": 2.069, + "step": 28225 + }, + { + "epoch": 0.94, + "grad_norm": 0.7566137909889221, + "learning_rate": 1.8877937787331714e-07, + "loss": 2.0587, + "step": 28226 + }, + { + "epoch": 0.94, + "grad_norm": 0.7391642332077026, + "learning_rate": 1.8857388095989026e-07, + "loss": 2.1168, + "step": 28227 + }, + { + "epoch": 0.94, + "grad_norm": 0.7059034705162048, + "learning_rate": 1.8836849489020537e-07, + "loss": 2.0032, + "step": 28228 + }, + { + "epoch": 0.94, + "grad_norm": 0.7885773777961731, + "learning_rate": 1.881632196665817e-07, + "loss": 2.0099, + "step": 28229 + }, + { + "epoch": 0.94, + "grad_norm": 0.7154566049575806, + "learning_rate": 1.8795805529133959e-07, + "loss": 1.9764, + "step": 28230 + }, + { + "epoch": 0.94, + "grad_norm": 0.7505607604980469, + "learning_rate": 1.8775300176679834e-07, + "loss": 2.1414, + "step": 28231 + }, + { + "epoch": 0.94, + "grad_norm": 0.7214109301567078, + "learning_rate": 1.8754805909527274e-07, + "loss": 2.0525, + "step": 28232 + }, + { + "epoch": 0.94, + "grad_norm": 0.7589523792266846, + "learning_rate": 1.873432272790776e-07, + "loss": 2.0263, + "step": 28233 + }, + { + "epoch": 0.94, + "grad_norm": 0.7556894421577454, + "learning_rate": 1.871385063205289e-07, + "loss": 2.004, + "step": 28234 + }, + { + "epoch": 0.94, + "grad_norm": 0.7544842958450317, + "learning_rate": 1.8693389622193692e-07, + "loss": 2.0317, + "step": 28235 + }, + { + "epoch": 0.94, + "grad_norm": 0.736167848110199, + "learning_rate": 1.8672939698561544e-07, + "loss": 1.9811, + "step": 28236 + }, + { + "epoch": 0.94, + "grad_norm": 0.743106484413147, + "learning_rate": 1.865250086138759e-07, + "loss": 2.0331, + "step": 28237 + }, + { + "epoch": 0.94, + "grad_norm": 0.77440345287323, + "learning_rate": 1.8632073110902428e-07, + "loss": 2.0395, + "step": 28238 + }, + { + "epoch": 0.94, + "grad_norm": 0.7486289739608765, + "learning_rate": 1.8611656447336868e-07, + "loss": 2.1089, + "step": 28239 + }, + { + "epoch": 0.94, + "grad_norm": 0.7164742350578308, + "learning_rate": 1.859125087092184e-07, + "loss": 1.9477, + "step": 28240 + }, + { + "epoch": 0.94, + "grad_norm": 0.7724472284317017, + "learning_rate": 1.8570856381887603e-07, + "loss": 2.0479, + "step": 28241 + }, + { + "epoch": 0.94, + "grad_norm": 0.7318061590194702, + "learning_rate": 1.855047298046464e-07, + "loss": 1.9245, + "step": 28242 + }, + { + "epoch": 0.94, + "grad_norm": 0.7490097284317017, + "learning_rate": 1.8530100666883322e-07, + "loss": 2.0387, + "step": 28243 + }, + { + "epoch": 0.94, + "grad_norm": 0.7235598564147949, + "learning_rate": 1.8509739441373576e-07, + "loss": 2.0979, + "step": 28244 + }, + { + "epoch": 0.94, + "grad_norm": 0.8222164511680603, + "learning_rate": 1.848938930416566e-07, + "loss": 1.9763, + "step": 28245 + }, + { + "epoch": 0.94, + "grad_norm": 0.7476462721824646, + "learning_rate": 1.8469050255489506e-07, + "loss": 2.0123, + "step": 28246 + }, + { + "epoch": 0.94, + "grad_norm": 0.7721613049507141, + "learning_rate": 1.8448722295574705e-07, + "loss": 2.0084, + "step": 28247 + }, + { + "epoch": 0.94, + "grad_norm": 0.7403656840324402, + "learning_rate": 1.8428405424650964e-07, + "loss": 1.9892, + "step": 28248 + }, + { + "epoch": 0.94, + "grad_norm": 0.7229350805282593, + "learning_rate": 1.8408099642947874e-07, + "loss": 1.9968, + "step": 28249 + }, + { + "epoch": 0.94, + "grad_norm": 0.7326645255088806, + "learning_rate": 1.838780495069492e-07, + "loss": 2.0676, + "step": 28250 + }, + { + "epoch": 0.94, + "grad_norm": 0.7832779288291931, + "learning_rate": 1.836752134812103e-07, + "loss": 2.1203, + "step": 28251 + }, + { + "epoch": 0.94, + "grad_norm": 0.7860056757926941, + "learning_rate": 1.834724883545569e-07, + "loss": 1.9958, + "step": 28252 + }, + { + "epoch": 0.94, + "grad_norm": 0.7495105862617493, + "learning_rate": 1.8326987412927932e-07, + "loss": 2.0715, + "step": 28253 + }, + { + "epoch": 0.94, + "grad_norm": 0.731623113155365, + "learning_rate": 1.8306737080766468e-07, + "loss": 2.077, + "step": 28254 + }, + { + "epoch": 0.94, + "grad_norm": 0.7362052202224731, + "learning_rate": 1.8286497839200112e-07, + "loss": 2.0763, + "step": 28255 + }, + { + "epoch": 0.94, + "grad_norm": 0.7488023638725281, + "learning_rate": 1.8266269688457682e-07, + "loss": 2.0069, + "step": 28256 + }, + { + "epoch": 0.94, + "grad_norm": 0.7696954607963562, + "learning_rate": 1.8246052628767442e-07, + "loss": 2.0637, + "step": 28257 + }, + { + "epoch": 0.94, + "grad_norm": 0.7478713393211365, + "learning_rate": 1.822584666035798e-07, + "loss": 2.0495, + "step": 28258 + }, + { + "epoch": 0.94, + "grad_norm": 0.7723537087440491, + "learning_rate": 1.820565178345768e-07, + "loss": 2.0586, + "step": 28259 + }, + { + "epoch": 0.94, + "grad_norm": 0.7291403412818909, + "learning_rate": 1.8185467998294347e-07, + "loss": 2.093, + "step": 28260 + }, + { + "epoch": 0.94, + "grad_norm": 0.7738631367683411, + "learning_rate": 1.8165295305096254e-07, + "loss": 2.0762, + "step": 28261 + }, + { + "epoch": 0.94, + "grad_norm": 0.7681684494018555, + "learning_rate": 1.8145133704091323e-07, + "loss": 2.0175, + "step": 28262 + }, + { + "epoch": 0.94, + "grad_norm": 0.7546780705451965, + "learning_rate": 1.812498319550715e-07, + "loss": 2.0127, + "step": 28263 + }, + { + "epoch": 0.94, + "grad_norm": 0.7464657425880432, + "learning_rate": 1.8104843779571447e-07, + "loss": 2.0764, + "step": 28264 + }, + { + "epoch": 0.94, + "grad_norm": 0.7276400923728943, + "learning_rate": 1.8084715456511913e-07, + "loss": 2.0589, + "step": 28265 + }, + { + "epoch": 0.94, + "grad_norm": 0.7347224354743958, + "learning_rate": 1.8064598226555706e-07, + "loss": 2.0418, + "step": 28266 + }, + { + "epoch": 0.94, + "grad_norm": 0.7595275640487671, + "learning_rate": 1.804449208993009e-07, + "loss": 2.0885, + "step": 28267 + }, + { + "epoch": 0.94, + "grad_norm": 0.7277171611785889, + "learning_rate": 1.802439704686254e-07, + "loss": 2.0294, + "step": 28268 + }, + { + "epoch": 0.94, + "grad_norm": 0.7411624193191528, + "learning_rate": 1.8004313097579772e-07, + "loss": 2.0507, + "step": 28269 + }, + { + "epoch": 0.94, + "grad_norm": 0.7438887357711792, + "learning_rate": 1.7984240242308714e-07, + "loss": 2.0352, + "step": 28270 + }, + { + "epoch": 0.94, + "grad_norm": 0.7290762066841125, + "learning_rate": 1.7964178481276295e-07, + "loss": 2.0387, + "step": 28271 + }, + { + "epoch": 0.94, + "grad_norm": 0.7570380568504333, + "learning_rate": 1.7944127814709e-07, + "loss": 1.9369, + "step": 28272 + }, + { + "epoch": 0.94, + "grad_norm": 0.7962685823440552, + "learning_rate": 1.7924088242833427e-07, + "loss": 2.0095, + "step": 28273 + }, + { + "epoch": 0.94, + "grad_norm": 0.7719094753265381, + "learning_rate": 1.7904059765875837e-07, + "loss": 2.046, + "step": 28274 + }, + { + "epoch": 0.94, + "grad_norm": 0.7585697174072266, + "learning_rate": 1.7884042384062827e-07, + "loss": 2.0811, + "step": 28275 + }, + { + "epoch": 0.94, + "grad_norm": 0.7371112108230591, + "learning_rate": 1.7864036097620108e-07, + "loss": 2.0271, + "step": 28276 + }, + { + "epoch": 0.94, + "grad_norm": 0.7562046647071838, + "learning_rate": 1.784404090677405e-07, + "loss": 2.0337, + "step": 28277 + }, + { + "epoch": 0.94, + "grad_norm": 0.7499122023582458, + "learning_rate": 1.782405681175048e-07, + "loss": 1.93, + "step": 28278 + }, + { + "epoch": 0.94, + "grad_norm": 0.7518165111541748, + "learning_rate": 1.7804083812774985e-07, + "loss": 2.0007, + "step": 28279 + }, + { + "epoch": 0.94, + "grad_norm": 0.7126772403717041, + "learning_rate": 1.7784121910073282e-07, + "loss": 1.9807, + "step": 28280 + }, + { + "epoch": 0.94, + "grad_norm": 0.7511412501335144, + "learning_rate": 1.7764171103871187e-07, + "loss": 2.0675, + "step": 28281 + }, + { + "epoch": 0.94, + "grad_norm": 0.7182841300964355, + "learning_rate": 1.774423139439363e-07, + "loss": 2.0088, + "step": 28282 + }, + { + "epoch": 0.94, + "grad_norm": 0.7198595404624939, + "learning_rate": 1.7724302781866098e-07, + "loss": 2.0408, + "step": 28283 + }, + { + "epoch": 0.94, + "grad_norm": 0.7396387457847595, + "learning_rate": 1.7704385266513747e-07, + "loss": 1.9792, + "step": 28284 + }, + { + "epoch": 0.94, + "grad_norm": 0.7296948432922363, + "learning_rate": 1.7684478848561616e-07, + "loss": 2.032, + "step": 28285 + }, + { + "epoch": 0.94, + "grad_norm": 0.7366349101066589, + "learning_rate": 1.7664583528234413e-07, + "loss": 1.966, + "step": 28286 + }, + { + "epoch": 0.94, + "grad_norm": 0.7235068678855896, + "learning_rate": 1.7644699305757184e-07, + "loss": 2.0961, + "step": 28287 + }, + { + "epoch": 0.94, + "grad_norm": 0.7307411432266235, + "learning_rate": 1.7624826181354415e-07, + "loss": 2.11, + "step": 28288 + }, + { + "epoch": 0.94, + "grad_norm": 0.7531940937042236, + "learning_rate": 1.760496415525048e-07, + "loss": 1.9982, + "step": 28289 + }, + { + "epoch": 0.94, + "grad_norm": 0.7397749423980713, + "learning_rate": 1.7585113227670092e-07, + "loss": 2.0457, + "step": 28290 + }, + { + "epoch": 0.94, + "grad_norm": 0.792334794998169, + "learning_rate": 1.756527339883729e-07, + "loss": 2.1144, + "step": 28291 + }, + { + "epoch": 0.94, + "grad_norm": 0.7362340688705444, + "learning_rate": 1.754544466897623e-07, + "loss": 2.0472, + "step": 28292 + }, + { + "epoch": 0.94, + "grad_norm": 0.746554434299469, + "learning_rate": 1.7525627038310955e-07, + "loss": 2.0498, + "step": 28293 + }, + { + "epoch": 0.94, + "grad_norm": 0.7348185181617737, + "learning_rate": 1.7505820507065508e-07, + "loss": 2.0312, + "step": 28294 + }, + { + "epoch": 0.94, + "grad_norm": 0.7473952174186707, + "learning_rate": 1.7486025075463375e-07, + "loss": 2.1029, + "step": 28295 + }, + { + "epoch": 0.94, + "grad_norm": 0.7585883140563965, + "learning_rate": 1.746624074372827e-07, + "loss": 2.0093, + "step": 28296 + }, + { + "epoch": 0.94, + "grad_norm": 0.736268162727356, + "learning_rate": 1.7446467512083897e-07, + "loss": 2.0541, + "step": 28297 + }, + { + "epoch": 0.94, + "grad_norm": 0.7622162103652954, + "learning_rate": 1.7426705380753306e-07, + "loss": 2.0535, + "step": 28298 + }, + { + "epoch": 0.94, + "grad_norm": 0.7357087731361389, + "learning_rate": 1.7406954349960203e-07, + "loss": 1.9852, + "step": 28299 + }, + { + "epoch": 0.94, + "grad_norm": 0.7691460251808167, + "learning_rate": 1.73872144199273e-07, + "loss": 2.0408, + "step": 28300 + }, + { + "epoch": 0.94, + "grad_norm": 0.7713519930839539, + "learning_rate": 1.7367485590877863e-07, + "loss": 1.9845, + "step": 28301 + }, + { + "epoch": 0.94, + "grad_norm": 0.7451587915420532, + "learning_rate": 1.7347767863034605e-07, + "loss": 2.0524, + "step": 28302 + }, + { + "epoch": 0.94, + "grad_norm": 0.7565116882324219, + "learning_rate": 1.7328061236620564e-07, + "loss": 2.0838, + "step": 28303 + }, + { + "epoch": 0.94, + "grad_norm": 0.7465039491653442, + "learning_rate": 1.73083657118579e-07, + "loss": 2.0999, + "step": 28304 + }, + { + "epoch": 0.94, + "grad_norm": 0.7290777564048767, + "learning_rate": 1.7288681288969544e-07, + "loss": 1.9937, + "step": 28305 + }, + { + "epoch": 0.94, + "grad_norm": 0.7499036192893982, + "learning_rate": 1.7269007968177765e-07, + "loss": 1.9387, + "step": 28306 + }, + { + "epoch": 0.94, + "grad_norm": 0.7434399724006653, + "learning_rate": 1.7249345749704828e-07, + "loss": 2.0215, + "step": 28307 + }, + { + "epoch": 0.94, + "grad_norm": 0.7299054265022278, + "learning_rate": 1.7229694633772775e-07, + "loss": 2.0832, + "step": 28308 + }, + { + "epoch": 0.94, + "grad_norm": 0.7239153385162354, + "learning_rate": 1.7210054620603656e-07, + "loss": 2.0073, + "step": 28309 + }, + { + "epoch": 0.94, + "grad_norm": 0.7587687373161316, + "learning_rate": 1.71904257104194e-07, + "loss": 2.0789, + "step": 28310 + }, + { + "epoch": 0.94, + "grad_norm": 0.7295830249786377, + "learning_rate": 1.717080790344172e-07, + "loss": 2.0189, + "step": 28311 + }, + { + "epoch": 0.94, + "grad_norm": 0.7542331218719482, + "learning_rate": 1.7151201199892331e-07, + "loss": 2.0574, + "step": 28312 + }, + { + "epoch": 0.94, + "grad_norm": 0.7858145833015442, + "learning_rate": 1.7131605599992606e-07, + "loss": 2.0653, + "step": 28313 + }, + { + "epoch": 0.94, + "grad_norm": 0.7913341522216797, + "learning_rate": 1.7112021103963927e-07, + "loss": 2.0558, + "step": 28314 + }, + { + "epoch": 0.94, + "grad_norm": 0.7490506172180176, + "learning_rate": 1.709244771202767e-07, + "loss": 2.0219, + "step": 28315 + }, + { + "epoch": 0.94, + "grad_norm": 0.7350800633430481, + "learning_rate": 1.7072885424404996e-07, + "loss": 2.0739, + "step": 28316 + }, + { + "epoch": 0.94, + "grad_norm": 0.7498719096183777, + "learning_rate": 1.705333424131661e-07, + "loss": 2.0198, + "step": 28317 + }, + { + "epoch": 0.94, + "grad_norm": 0.7415547370910645, + "learning_rate": 1.7033794162983676e-07, + "loss": 1.9879, + "step": 28318 + }, + { + "epoch": 0.94, + "grad_norm": 0.7545487284660339, + "learning_rate": 1.70142651896269e-07, + "loss": 1.9818, + "step": 28319 + }, + { + "epoch": 0.94, + "grad_norm": 0.7751821279525757, + "learning_rate": 1.6994747321466777e-07, + "loss": 2.0927, + "step": 28320 + }, + { + "epoch": 0.94, + "grad_norm": 0.7195343375205994, + "learning_rate": 1.6975240558724014e-07, + "loss": 2.0057, + "step": 28321 + }, + { + "epoch": 0.94, + "grad_norm": 0.756786584854126, + "learning_rate": 1.6955744901618887e-07, + "loss": 2.0354, + "step": 28322 + }, + { + "epoch": 0.94, + "grad_norm": 0.7642935514450073, + "learning_rate": 1.6936260350371437e-07, + "loss": 2.0977, + "step": 28323 + }, + { + "epoch": 0.94, + "grad_norm": 0.7660335898399353, + "learning_rate": 1.6916786905202153e-07, + "loss": 2.0779, + "step": 28324 + }, + { + "epoch": 0.94, + "grad_norm": 0.745856523513794, + "learning_rate": 1.6897324566330864e-07, + "loss": 2.0278, + "step": 28325 + }, + { + "epoch": 0.94, + "grad_norm": 0.742396354675293, + "learning_rate": 1.6877873333977392e-07, + "loss": 2.0131, + "step": 28326 + }, + { + "epoch": 0.94, + "grad_norm": 0.735607922077179, + "learning_rate": 1.6858433208361558e-07, + "loss": 2.0453, + "step": 28327 + }, + { + "epoch": 0.94, + "grad_norm": 0.7740545272827148, + "learning_rate": 1.6839004189702968e-07, + "loss": 2.065, + "step": 28328 + }, + { + "epoch": 0.94, + "grad_norm": 0.7439558506011963, + "learning_rate": 1.681958627822111e-07, + "loss": 2.0043, + "step": 28329 + }, + { + "epoch": 0.94, + "grad_norm": 0.7579848170280457, + "learning_rate": 1.680017947413537e-07, + "loss": 2.0214, + "step": 28330 + }, + { + "epoch": 0.94, + "grad_norm": 0.7669976353645325, + "learning_rate": 1.678078377766501e-07, + "loss": 2.026, + "step": 28331 + }, + { + "epoch": 0.94, + "grad_norm": 0.7325143814086914, + "learning_rate": 1.6761399189029082e-07, + "loss": 2.0469, + "step": 28332 + }, + { + "epoch": 0.94, + "grad_norm": 0.7471593022346497, + "learning_rate": 1.674202570844663e-07, + "loss": 1.9697, + "step": 28333 + }, + { + "epoch": 0.94, + "grad_norm": 0.7568303942680359, + "learning_rate": 1.6722663336136703e-07, + "loss": 2.0784, + "step": 28334 + }, + { + "epoch": 0.94, + "grad_norm": 0.7640582323074341, + "learning_rate": 1.670331207231768e-07, + "loss": 2.1056, + "step": 28335 + }, + { + "epoch": 0.94, + "grad_norm": 0.7403440475463867, + "learning_rate": 1.6683971917208387e-07, + "loss": 2.0621, + "step": 28336 + }, + { + "epoch": 0.94, + "grad_norm": 0.7237331867218018, + "learning_rate": 1.6664642871027314e-07, + "loss": 2.0473, + "step": 28337 + }, + { + "epoch": 0.94, + "grad_norm": 0.7541647553443909, + "learning_rate": 1.6645324933992845e-07, + "loss": 1.9948, + "step": 28338 + }, + { + "epoch": 0.94, + "grad_norm": 0.725741446018219, + "learning_rate": 1.662601810632325e-07, + "loss": 1.9948, + "step": 28339 + }, + { + "epoch": 0.94, + "grad_norm": 0.7446455955505371, + "learning_rate": 1.660672238823646e-07, + "loss": 2.0939, + "step": 28340 + }, + { + "epoch": 0.94, + "grad_norm": 0.7481258511543274, + "learning_rate": 1.6587437779950755e-07, + "loss": 2.0493, + "step": 28341 + }, + { + "epoch": 0.94, + "grad_norm": 0.7438316345214844, + "learning_rate": 1.6568164281683618e-07, + "loss": 2.0447, + "step": 28342 + }, + { + "epoch": 0.94, + "grad_norm": 0.7250549793243408, + "learning_rate": 1.6548901893653214e-07, + "loss": 2.0318, + "step": 28343 + }, + { + "epoch": 0.94, + "grad_norm": 0.778458297252655, + "learning_rate": 1.652965061607692e-07, + "loss": 2.0801, + "step": 28344 + }, + { + "epoch": 0.94, + "grad_norm": 0.71019446849823, + "learning_rate": 1.651041044917212e-07, + "loss": 1.9876, + "step": 28345 + }, + { + "epoch": 0.94, + "grad_norm": 0.7694583535194397, + "learning_rate": 1.6491181393156419e-07, + "loss": 2.0679, + "step": 28346 + }, + { + "epoch": 0.94, + "grad_norm": 0.7302212119102478, + "learning_rate": 1.6471963448246976e-07, + "loss": 2.0167, + "step": 28347 + }, + { + "epoch": 0.94, + "grad_norm": 0.7205811142921448, + "learning_rate": 1.6452756614660948e-07, + "loss": 2.0953, + "step": 28348 + }, + { + "epoch": 0.94, + "grad_norm": 0.7533875703811646, + "learning_rate": 1.6433560892615163e-07, + "loss": 2.0678, + "step": 28349 + }, + { + "epoch": 0.94, + "grad_norm": 0.7474888563156128, + "learning_rate": 1.641437628232667e-07, + "loss": 2.0946, + "step": 28350 + }, + { + "epoch": 0.94, + "grad_norm": 0.7399880290031433, + "learning_rate": 1.6395202784011965e-07, + "loss": 2.0474, + "step": 28351 + }, + { + "epoch": 0.94, + "grad_norm": 0.7406282424926758, + "learning_rate": 1.6376040397887872e-07, + "loss": 2.0079, + "step": 28352 + }, + { + "epoch": 0.94, + "grad_norm": 0.7734681963920593, + "learning_rate": 1.6356889124170882e-07, + "loss": 2.0926, + "step": 28353 + }, + { + "epoch": 0.94, + "grad_norm": 0.756147027015686, + "learning_rate": 1.6337748963077273e-07, + "loss": 2.0926, + "step": 28354 + }, + { + "epoch": 0.94, + "grad_norm": 0.7375460267066956, + "learning_rate": 1.6318619914823198e-07, + "loss": 2.1086, + "step": 28355 + }, + { + "epoch": 0.94, + "grad_norm": 0.7504284977912903, + "learning_rate": 1.6299501979624933e-07, + "loss": 1.9235, + "step": 28356 + }, + { + "epoch": 0.94, + "grad_norm": 0.7604022026062012, + "learning_rate": 1.6280395157698414e-07, + "loss": 2.0145, + "step": 28357 + }, + { + "epoch": 0.94, + "grad_norm": 0.7696889042854309, + "learning_rate": 1.6261299449259472e-07, + "loss": 2.0328, + "step": 28358 + }, + { + "epoch": 0.94, + "grad_norm": 0.7469714879989624, + "learning_rate": 1.6242214854523707e-07, + "loss": 2.1047, + "step": 28359 + }, + { + "epoch": 0.94, + "grad_norm": 0.7709844708442688, + "learning_rate": 1.6223141373707063e-07, + "loss": 2.09, + "step": 28360 + }, + { + "epoch": 0.94, + "grad_norm": 0.746536374092102, + "learning_rate": 1.6204079007024698e-07, + "loss": 2.0776, + "step": 28361 + }, + { + "epoch": 0.94, + "grad_norm": 0.7545673251152039, + "learning_rate": 1.6185027754692106e-07, + "loss": 2.0927, + "step": 28362 + }, + { + "epoch": 0.94, + "grad_norm": 0.7516542077064514, + "learning_rate": 1.616598761692456e-07, + "loss": 2.0697, + "step": 28363 + }, + { + "epoch": 0.94, + "grad_norm": 0.7715277075767517, + "learning_rate": 1.6146958593937e-07, + "loss": 2.0379, + "step": 28364 + }, + { + "epoch": 0.94, + "grad_norm": 0.7570193409919739, + "learning_rate": 1.6127940685944589e-07, + "loss": 2.0523, + "step": 28365 + }, + { + "epoch": 0.94, + "grad_norm": 0.7394590377807617, + "learning_rate": 1.6108933893162039e-07, + "loss": 2.0138, + "step": 28366 + }, + { + "epoch": 0.94, + "grad_norm": 0.7429561614990234, + "learning_rate": 1.608993821580418e-07, + "loss": 2.0452, + "step": 28367 + }, + { + "epoch": 0.94, + "grad_norm": 0.7527019381523132, + "learning_rate": 1.6070953654085507e-07, + "loss": 2.1095, + "step": 28368 + }, + { + "epoch": 0.94, + "grad_norm": 0.7455555200576782, + "learning_rate": 1.605198020822063e-07, + "loss": 2.024, + "step": 28369 + }, + { + "epoch": 0.94, + "grad_norm": 0.7376919984817505, + "learning_rate": 1.6033017878423818e-07, + "loss": 2.0334, + "step": 28370 + }, + { + "epoch": 0.94, + "grad_norm": 0.7549187541007996, + "learning_rate": 1.6014066664909234e-07, + "loss": 2.0674, + "step": 28371 + }, + { + "epoch": 0.94, + "grad_norm": 0.7485802173614502, + "learning_rate": 1.599512656789115e-07, + "loss": 1.9673, + "step": 28372 + }, + { + "epoch": 0.94, + "grad_norm": 0.7388875484466553, + "learning_rate": 1.5976197587583399e-07, + "loss": 1.9912, + "step": 28373 + }, + { + "epoch": 0.94, + "grad_norm": 0.7337551116943359, + "learning_rate": 1.5957279724199914e-07, + "loss": 2.063, + "step": 28374 + }, + { + "epoch": 0.94, + "grad_norm": 0.7287161946296692, + "learning_rate": 1.5938372977954307e-07, + "loss": 2.0948, + "step": 28375 + }, + { + "epoch": 0.94, + "grad_norm": 0.7483761310577393, + "learning_rate": 1.5919477349060298e-07, + "loss": 2.0845, + "step": 28376 + }, + { + "epoch": 0.94, + "grad_norm": 0.7411022186279297, + "learning_rate": 1.5900592837731264e-07, + "loss": 2.0523, + "step": 28377 + }, + { + "epoch": 0.94, + "grad_norm": 0.7285013198852539, + "learning_rate": 1.5881719444180598e-07, + "loss": 1.9989, + "step": 28378 + }, + { + "epoch": 0.94, + "grad_norm": 0.7327479124069214, + "learning_rate": 1.586285716862157e-07, + "loss": 2.0528, + "step": 28379 + }, + { + "epoch": 0.94, + "grad_norm": 0.7655050754547119, + "learning_rate": 1.5844006011267122e-07, + "loss": 2.0148, + "step": 28380 + }, + { + "epoch": 0.94, + "grad_norm": 0.7391484379768372, + "learning_rate": 1.5825165972330413e-07, + "loss": 2.0778, + "step": 28381 + }, + { + "epoch": 0.94, + "grad_norm": 0.7341198921203613, + "learning_rate": 1.5806337052024168e-07, + "loss": 2.0031, + "step": 28382 + }, + { + "epoch": 0.94, + "grad_norm": 0.7228718400001526, + "learning_rate": 1.578751925056121e-07, + "loss": 2.0209, + "step": 28383 + }, + { + "epoch": 0.94, + "grad_norm": 0.7728800177574158, + "learning_rate": 1.576871256815393e-07, + "loss": 2.0324, + "step": 28384 + }, + { + "epoch": 0.94, + "grad_norm": 0.7297536730766296, + "learning_rate": 1.5749917005014936e-07, + "loss": 2.0626, + "step": 28385 + }, + { + "epoch": 0.94, + "grad_norm": 0.7636086344718933, + "learning_rate": 1.573113256135661e-07, + "loss": 2.0235, + "step": 28386 + }, + { + "epoch": 0.94, + "grad_norm": 0.7305300235748291, + "learning_rate": 1.5712359237391118e-07, + "loss": 2.0663, + "step": 28387 + }, + { + "epoch": 0.94, + "grad_norm": 0.7554391026496887, + "learning_rate": 1.569359703333051e-07, + "loss": 2.0619, + "step": 28388 + }, + { + "epoch": 0.94, + "grad_norm": 0.7666745185852051, + "learning_rate": 1.567484594938673e-07, + "loss": 2.0191, + "step": 28389 + }, + { + "epoch": 0.94, + "grad_norm": 0.7664971351623535, + "learning_rate": 1.5656105985771718e-07, + "loss": 2.0224, + "step": 28390 + }, + { + "epoch": 0.94, + "grad_norm": 0.7475983500480652, + "learning_rate": 1.5637377142697196e-07, + "loss": 2.0678, + "step": 28391 + }, + { + "epoch": 0.94, + "grad_norm": 0.7408192753791809, + "learning_rate": 1.561865942037466e-07, + "loss": 2.025, + "step": 28392 + }, + { + "epoch": 0.94, + "grad_norm": 0.7450444102287292, + "learning_rate": 1.5599952819015496e-07, + "loss": 1.9791, + "step": 28393 + }, + { + "epoch": 0.94, + "grad_norm": 0.7386817932128906, + "learning_rate": 1.5581257338831313e-07, + "loss": 2.0332, + "step": 28394 + }, + { + "epoch": 0.94, + "grad_norm": 0.732686460018158, + "learning_rate": 1.5562572980033053e-07, + "loss": 2.0748, + "step": 28395 + }, + { + "epoch": 0.94, + "grad_norm": 0.7775156497955322, + "learning_rate": 1.5543899742831993e-07, + "loss": 1.987, + "step": 28396 + }, + { + "epoch": 0.94, + "grad_norm": 0.7728347182273865, + "learning_rate": 1.552523762743885e-07, + "loss": 2.0586, + "step": 28397 + }, + { + "epoch": 0.94, + "grad_norm": 0.7374674081802368, + "learning_rate": 1.5506586634064792e-07, + "loss": 2.0743, + "step": 28398 + }, + { + "epoch": 0.94, + "grad_norm": 0.7305232882499695, + "learning_rate": 1.5487946762920203e-07, + "loss": 1.9663, + "step": 28399 + }, + { + "epoch": 0.94, + "grad_norm": 0.7428101301193237, + "learning_rate": 1.5469318014215917e-07, + "loss": 2.045, + "step": 28400 + }, + { + "epoch": 0.94, + "grad_norm": 0.7395274043083191, + "learning_rate": 1.545070038816232e-07, + "loss": 2.045, + "step": 28401 + }, + { + "epoch": 0.94, + "grad_norm": 0.7485905885696411, + "learning_rate": 1.543209388496958e-07, + "loss": 2.0359, + "step": 28402 + }, + { + "epoch": 0.94, + "grad_norm": 0.7275430560112, + "learning_rate": 1.541349850484797e-07, + "loss": 2.0249, + "step": 28403 + }, + { + "epoch": 0.95, + "grad_norm": 0.7517231702804565, + "learning_rate": 1.5394914248007875e-07, + "loss": 2.1011, + "step": 28404 + }, + { + "epoch": 0.95, + "grad_norm": 0.7438262701034546, + "learning_rate": 1.537634111465891e-07, + "loss": 2.0846, + "step": 28405 + }, + { + "epoch": 0.95, + "grad_norm": 0.7387683987617493, + "learning_rate": 1.5357779105010905e-07, + "loss": 2.0926, + "step": 28406 + }, + { + "epoch": 0.95, + "grad_norm": 0.7335829138755798, + "learning_rate": 1.5339228219273694e-07, + "loss": 1.9862, + "step": 28407 + }, + { + "epoch": 0.95, + "grad_norm": 0.7312666773796082, + "learning_rate": 1.5320688457656774e-07, + "loss": 2.0478, + "step": 28408 + }, + { + "epoch": 0.95, + "grad_norm": 0.7414546012878418, + "learning_rate": 1.5302159820369756e-07, + "loss": 2.0636, + "step": 28409 + }, + { + "epoch": 0.95, + "grad_norm": 0.7388017773628235, + "learning_rate": 1.5283642307621693e-07, + "loss": 2.0177, + "step": 28410 + }, + { + "epoch": 0.95, + "grad_norm": 0.7487220764160156, + "learning_rate": 1.5265135919622086e-07, + "loss": 1.9636, + "step": 28411 + }, + { + "epoch": 0.95, + "grad_norm": 0.7452407479286194, + "learning_rate": 1.524664065657977e-07, + "loss": 2.0826, + "step": 28412 + }, + { + "epoch": 0.95, + "grad_norm": 0.7256044745445251, + "learning_rate": 1.5228156518703906e-07, + "loss": 2.0104, + "step": 28413 + }, + { + "epoch": 0.95, + "grad_norm": 0.7575449347496033, + "learning_rate": 1.520968350620311e-07, + "loss": 2.1031, + "step": 28414 + }, + { + "epoch": 0.95, + "grad_norm": 0.7580878734588623, + "learning_rate": 1.5191221619286102e-07, + "loss": 2.0408, + "step": 28415 + }, + { + "epoch": 0.95, + "grad_norm": 0.7436800599098206, + "learning_rate": 1.5172770858161601e-07, + "loss": 2.0373, + "step": 28416 + }, + { + "epoch": 0.95, + "grad_norm": 0.726304292678833, + "learning_rate": 1.5154331223037887e-07, + "loss": 2.0146, + "step": 28417 + }, + { + "epoch": 0.95, + "grad_norm": 0.7285186648368835, + "learning_rate": 1.5135902714123464e-07, + "loss": 2.0306, + "step": 28418 + }, + { + "epoch": 0.95, + "grad_norm": 0.7661466002464294, + "learning_rate": 1.511748533162638e-07, + "loss": 2.0944, + "step": 28419 + }, + { + "epoch": 0.95, + "grad_norm": 0.7457267045974731, + "learning_rate": 1.5099079075754808e-07, + "loss": 1.967, + "step": 28420 + }, + { + "epoch": 0.95, + "grad_norm": 0.7371566295623779, + "learning_rate": 1.5080683946716578e-07, + "loss": 2.0362, + "step": 28421 + }, + { + "epoch": 0.95, + "grad_norm": 0.7245676517486572, + "learning_rate": 1.5062299944719526e-07, + "loss": 2.0361, + "step": 28422 + }, + { + "epoch": 0.95, + "grad_norm": 0.7544835805892944, + "learning_rate": 1.5043927069971487e-07, + "loss": 2.0405, + "step": 28423 + }, + { + "epoch": 0.95, + "grad_norm": 0.7272149920463562, + "learning_rate": 1.5025565322679848e-07, + "loss": 2.0123, + "step": 28424 + }, + { + "epoch": 0.95, + "grad_norm": 0.7294792532920837, + "learning_rate": 1.5007214703051997e-07, + "loss": 2.0913, + "step": 28425 + }, + { + "epoch": 0.95, + "grad_norm": 0.7548956274986267, + "learning_rate": 1.498887521129555e-07, + "loss": 2.0717, + "step": 28426 + }, + { + "epoch": 0.95, + "grad_norm": 0.7533094882965088, + "learning_rate": 1.4970546847617562e-07, + "loss": 2.0662, + "step": 28427 + }, + { + "epoch": 0.95, + "grad_norm": 0.7679675817489624, + "learning_rate": 1.4952229612224866e-07, + "loss": 2.0905, + "step": 28428 + }, + { + "epoch": 0.95, + "grad_norm": 0.7357742190361023, + "learning_rate": 1.4933923505324745e-07, + "loss": 2.0294, + "step": 28429 + }, + { + "epoch": 0.95, + "grad_norm": 0.7486264705657959, + "learning_rate": 1.4915628527123693e-07, + "loss": 2.0882, + "step": 28430 + }, + { + "epoch": 0.95, + "grad_norm": 0.7551736235618591, + "learning_rate": 1.489734467782866e-07, + "loss": 2.126, + "step": 28431 + }, + { + "epoch": 0.95, + "grad_norm": 0.7354605197906494, + "learning_rate": 1.487907195764604e-07, + "loss": 2.0008, + "step": 28432 + }, + { + "epoch": 0.95, + "grad_norm": 0.7396884560585022, + "learning_rate": 1.4860810366782442e-07, + "loss": 2.1194, + "step": 28433 + }, + { + "epoch": 0.95, + "grad_norm": 0.7500935792922974, + "learning_rate": 1.4842559905443921e-07, + "loss": 2.0154, + "step": 28434 + }, + { + "epoch": 0.95, + "grad_norm": 0.7391665577888489, + "learning_rate": 1.482432057383687e-07, + "loss": 1.9955, + "step": 28435 + }, + { + "epoch": 0.95, + "grad_norm": 0.7395474314689636, + "learning_rate": 1.4806092372167348e-07, + "loss": 2.0286, + "step": 28436 + }, + { + "epoch": 0.95, + "grad_norm": 0.7288504242897034, + "learning_rate": 1.4787875300641074e-07, + "loss": 2.092, + "step": 28437 + }, + { + "epoch": 0.95, + "grad_norm": 0.7440198063850403, + "learning_rate": 1.4769669359464e-07, + "loss": 2.0051, + "step": 28438 + }, + { + "epoch": 0.95, + "grad_norm": 0.7375121116638184, + "learning_rate": 1.4751474548841848e-07, + "loss": 2.0279, + "step": 28439 + }, + { + "epoch": 0.95, + "grad_norm": 0.7525367736816406, + "learning_rate": 1.4733290868980232e-07, + "loss": 2.0412, + "step": 28440 + }, + { + "epoch": 0.95, + "grad_norm": 0.7290826439857483, + "learning_rate": 1.471511832008432e-07, + "loss": 2.0019, + "step": 28441 + }, + { + "epoch": 0.95, + "grad_norm": 0.7247813940048218, + "learning_rate": 1.4696956902359616e-07, + "loss": 2.0253, + "step": 28442 + }, + { + "epoch": 0.95, + "grad_norm": 0.7464855909347534, + "learning_rate": 1.4678806616011287e-07, + "loss": 2.025, + "step": 28443 + }, + { + "epoch": 0.95, + "grad_norm": 0.7288612723350525, + "learning_rate": 1.466066746124417e-07, + "loss": 2.0106, + "step": 28444 + }, + { + "epoch": 0.95, + "grad_norm": 0.7284706234931946, + "learning_rate": 1.464253943826366e-07, + "loss": 1.9789, + "step": 28445 + }, + { + "epoch": 0.95, + "grad_norm": 0.7453920841217041, + "learning_rate": 1.462442254727403e-07, + "loss": 2.0966, + "step": 28446 + }, + { + "epoch": 0.95, + "grad_norm": 0.7240303754806519, + "learning_rate": 1.4606316788480236e-07, + "loss": 2.058, + "step": 28447 + }, + { + "epoch": 0.95, + "grad_norm": 0.735941469669342, + "learning_rate": 1.4588222162086884e-07, + "loss": 2.0197, + "step": 28448 + }, + { + "epoch": 0.95, + "grad_norm": 0.7546452879905701, + "learning_rate": 1.457013866829826e-07, + "loss": 2.1018, + "step": 28449 + }, + { + "epoch": 0.95, + "grad_norm": 0.8033734560012817, + "learning_rate": 1.4552066307318536e-07, + "loss": 2.0865, + "step": 28450 + }, + { + "epoch": 0.95, + "grad_norm": 0.7451728582382202, + "learning_rate": 1.453400507935221e-07, + "loss": 2.0198, + "step": 28451 + }, + { + "epoch": 0.95, + "grad_norm": 0.7348548173904419, + "learning_rate": 1.451595498460312e-07, + "loss": 2.0615, + "step": 28452 + }, + { + "epoch": 0.95, + "grad_norm": 0.7506760358810425, + "learning_rate": 1.4497916023275104e-07, + "loss": 2.0471, + "step": 28453 + }, + { + "epoch": 0.95, + "grad_norm": 0.7752646803855896, + "learning_rate": 1.4479888195572222e-07, + "loss": 2.0916, + "step": 28454 + }, + { + "epoch": 0.95, + "grad_norm": 0.729526937007904, + "learning_rate": 1.4461871501697976e-07, + "loss": 1.9575, + "step": 28455 + }, + { + "epoch": 0.95, + "grad_norm": 0.7493242621421814, + "learning_rate": 1.4443865941855762e-07, + "loss": 2.0057, + "step": 28456 + }, + { + "epoch": 0.95, + "grad_norm": 0.7409965991973877, + "learning_rate": 1.4425871516249302e-07, + "loss": 2.0127, + "step": 28457 + }, + { + "epoch": 0.95, + "grad_norm": 0.7261967062950134, + "learning_rate": 1.4407888225081768e-07, + "loss": 2.0974, + "step": 28458 + }, + { + "epoch": 0.95, + "grad_norm": 0.7618956565856934, + "learning_rate": 1.438991606855622e-07, + "loss": 2.0177, + "step": 28459 + }, + { + "epoch": 0.95, + "grad_norm": 0.7207528948783875, + "learning_rate": 1.437195504687572e-07, + "loss": 2.0309, + "step": 28460 + }, + { + "epoch": 0.95, + "grad_norm": 0.7475081086158752, + "learning_rate": 1.4354005160243434e-07, + "loss": 1.9605, + "step": 28461 + }, + { + "epoch": 0.95, + "grad_norm": 0.7617717385292053, + "learning_rate": 1.433606640886187e-07, + "loss": 2.0666, + "step": 28462 + }, + { + "epoch": 0.95, + "grad_norm": 0.7372684478759766, + "learning_rate": 1.4318138792933646e-07, + "loss": 2.0078, + "step": 28463 + }, + { + "epoch": 0.95, + "grad_norm": 0.7516733407974243, + "learning_rate": 1.4300222312661593e-07, + "loss": 2.0079, + "step": 28464 + }, + { + "epoch": 0.95, + "grad_norm": 0.7308643460273743, + "learning_rate": 1.4282316968247778e-07, + "loss": 2.0602, + "step": 28465 + }, + { + "epoch": 0.95, + "grad_norm": 0.731998085975647, + "learning_rate": 1.4264422759894702e-07, + "loss": 2.1157, + "step": 28466 + }, + { + "epoch": 0.95, + "grad_norm": 0.7614666819572449, + "learning_rate": 1.424653968780465e-07, + "loss": 2.09, + "step": 28467 + }, + { + "epoch": 0.95, + "grad_norm": 0.759365975856781, + "learning_rate": 1.4228667752179347e-07, + "loss": 2.0183, + "step": 28468 + }, + { + "epoch": 0.95, + "grad_norm": 0.7782214283943176, + "learning_rate": 1.4210806953220857e-07, + "loss": 2.0965, + "step": 28469 + }, + { + "epoch": 0.95, + "grad_norm": 0.7407270669937134, + "learning_rate": 1.4192957291130905e-07, + "loss": 2.0706, + "step": 28470 + }, + { + "epoch": 0.95, + "grad_norm": 0.7834709882736206, + "learning_rate": 1.417511876611133e-07, + "loss": 2.0149, + "step": 28471 + }, + { + "epoch": 0.95, + "grad_norm": 0.738029956817627, + "learning_rate": 1.4157291378363303e-07, + "loss": 2.0759, + "step": 28472 + }, + { + "epoch": 0.95, + "grad_norm": 0.7465745210647583, + "learning_rate": 1.4139475128088552e-07, + "loss": 2.1144, + "step": 28473 + }, + { + "epoch": 0.95, + "grad_norm": 0.7632477879524231, + "learning_rate": 1.4121670015488253e-07, + "loss": 2.0558, + "step": 28474 + }, + { + "epoch": 0.95, + "grad_norm": 0.7353018522262573, + "learning_rate": 1.4103876040763466e-07, + "loss": 2.0965, + "step": 28475 + }, + { + "epoch": 0.95, + "grad_norm": 0.7343853116035461, + "learning_rate": 1.408609320411536e-07, + "loss": 2.0103, + "step": 28476 + }, + { + "epoch": 0.95, + "grad_norm": 0.7195243835449219, + "learning_rate": 1.4068321505744776e-07, + "loss": 1.9915, + "step": 28477 + }, + { + "epoch": 0.95, + "grad_norm": 0.727735698223114, + "learning_rate": 1.4050560945852444e-07, + "loss": 1.9919, + "step": 28478 + }, + { + "epoch": 0.95, + "grad_norm": 0.7325900793075562, + "learning_rate": 1.403281152463909e-07, + "loss": 2.0129, + "step": 28479 + }, + { + "epoch": 0.95, + "grad_norm": 0.7481728196144104, + "learning_rate": 1.4015073242305222e-07, + "loss": 2.1057, + "step": 28480 + }, + { + "epoch": 0.95, + "grad_norm": 0.7408105134963989, + "learning_rate": 1.3997346099051123e-07, + "loss": 2.0275, + "step": 28481 + }, + { + "epoch": 0.95, + "grad_norm": 0.7865252494812012, + "learning_rate": 1.397963009507719e-07, + "loss": 2.0462, + "step": 28482 + }, + { + "epoch": 0.95, + "grad_norm": 0.7346115708351135, + "learning_rate": 1.3961925230583484e-07, + "loss": 2.0036, + "step": 28483 + }, + { + "epoch": 0.95, + "grad_norm": 0.798490047454834, + "learning_rate": 1.3944231505770178e-07, + "loss": 1.9704, + "step": 28484 + }, + { + "epoch": 0.95, + "grad_norm": 0.715848445892334, + "learning_rate": 1.3926548920837002e-07, + "loss": 2.0676, + "step": 28485 + }, + { + "epoch": 0.95, + "grad_norm": 0.7405949234962463, + "learning_rate": 1.3908877475983796e-07, + "loss": 2.0536, + "step": 28486 + }, + { + "epoch": 0.95, + "grad_norm": 0.7531590461730957, + "learning_rate": 1.3891217171410177e-07, + "loss": 2.0468, + "step": 28487 + }, + { + "epoch": 0.95, + "grad_norm": 0.7611472010612488, + "learning_rate": 1.3873568007315652e-07, + "loss": 2.0526, + "step": 28488 + }, + { + "epoch": 0.95, + "grad_norm": 0.7280851602554321, + "learning_rate": 1.385592998389973e-07, + "loss": 2.0062, + "step": 28489 + }, + { + "epoch": 0.95, + "grad_norm": 0.7876123189926147, + "learning_rate": 1.3838303101361471e-07, + "loss": 2.0608, + "step": 28490 + }, + { + "epoch": 0.95, + "grad_norm": 0.7556420564651489, + "learning_rate": 1.3820687359900053e-07, + "loss": 2.0484, + "step": 28491 + }, + { + "epoch": 0.95, + "grad_norm": 0.7811589241027832, + "learning_rate": 1.3803082759714648e-07, + "loss": 2.0772, + "step": 28492 + }, + { + "epoch": 0.95, + "grad_norm": 0.7394065260887146, + "learning_rate": 1.3785489301004096e-07, + "loss": 2.0052, + "step": 28493 + }, + { + "epoch": 0.95, + "grad_norm": 0.7497679591178894, + "learning_rate": 1.3767906983967018e-07, + "loss": 2.0939, + "step": 28494 + }, + { + "epoch": 0.95, + "grad_norm": 0.7506534457206726, + "learning_rate": 1.375033580880214e-07, + "loss": 2.0704, + "step": 28495 + }, + { + "epoch": 0.95, + "grad_norm": 0.7549369931221008, + "learning_rate": 1.3732775775707973e-07, + "loss": 2.0543, + "step": 28496 + }, + { + "epoch": 0.95, + "grad_norm": 0.7826048135757446, + "learning_rate": 1.3715226884882916e-07, + "loss": 2.0624, + "step": 28497 + }, + { + "epoch": 0.95, + "grad_norm": 0.7772423624992371, + "learning_rate": 1.369768913652525e-07, + "loss": 2.0787, + "step": 28498 + }, + { + "epoch": 0.95, + "grad_norm": 0.7651323080062866, + "learning_rate": 1.3680162530833042e-07, + "loss": 2.0885, + "step": 28499 + }, + { + "epoch": 0.95, + "grad_norm": 0.7521963715553284, + "learning_rate": 1.3662647068004242e-07, + "loss": 2.0431, + "step": 28500 + }, + { + "epoch": 0.95, + "grad_norm": 0.7563682794570923, + "learning_rate": 1.3645142748236805e-07, + "loss": 1.9652, + "step": 28501 + }, + { + "epoch": 0.95, + "grad_norm": 0.7326636910438538, + "learning_rate": 1.3627649571728573e-07, + "loss": 2.016, + "step": 28502 + }, + { + "epoch": 0.95, + "grad_norm": 0.727057695388794, + "learning_rate": 1.3610167538677054e-07, + "loss": 2.0277, + "step": 28503 + }, + { + "epoch": 0.95, + "grad_norm": 0.7421227097511292, + "learning_rate": 1.3592696649279758e-07, + "loss": 1.9988, + "step": 28504 + }, + { + "epoch": 0.95, + "grad_norm": 0.7919015884399414, + "learning_rate": 1.3575236903734078e-07, + "loss": 2.0871, + "step": 28505 + }, + { + "epoch": 0.95, + "grad_norm": 0.758823573589325, + "learning_rate": 1.3557788302237308e-07, + "loss": 2.0222, + "step": 28506 + }, + { + "epoch": 0.95, + "grad_norm": 0.7408049702644348, + "learning_rate": 1.3540350844986505e-07, + "loss": 2.0455, + "step": 28507 + }, + { + "epoch": 0.95, + "grad_norm": 0.7485356330871582, + "learning_rate": 1.3522924532178738e-07, + "loss": 2.0465, + "step": 28508 + }, + { + "epoch": 0.95, + "grad_norm": 0.7242406010627747, + "learning_rate": 1.350550936401085e-07, + "loss": 2.0553, + "step": 28509 + }, + { + "epoch": 0.95, + "grad_norm": 0.7367553114891052, + "learning_rate": 1.3488105340679457e-07, + "loss": 2.0412, + "step": 28510 + }, + { + "epoch": 0.95, + "grad_norm": 0.7277079224586487, + "learning_rate": 1.3470712462381408e-07, + "loss": 1.9896, + "step": 28511 + }, + { + "epoch": 0.95, + "grad_norm": 0.7252501249313354, + "learning_rate": 1.3453330729313098e-07, + "loss": 2.0043, + "step": 28512 + }, + { + "epoch": 0.95, + "grad_norm": 0.7573903799057007, + "learning_rate": 1.3435960141670813e-07, + "loss": 2.1117, + "step": 28513 + }, + { + "epoch": 0.95, + "grad_norm": 0.7289398312568665, + "learning_rate": 1.341860069965084e-07, + "loss": 2.0753, + "step": 28514 + }, + { + "epoch": 0.95, + "grad_norm": 0.7309209108352661, + "learning_rate": 1.3401252403449472e-07, + "loss": 2.0573, + "step": 28515 + }, + { + "epoch": 0.95, + "grad_norm": 0.7334218621253967, + "learning_rate": 1.3383915253262436e-07, + "loss": 2.0946, + "step": 28516 + }, + { + "epoch": 0.95, + "grad_norm": 0.7478011846542358, + "learning_rate": 1.3366589249285687e-07, + "loss": 1.9797, + "step": 28517 + }, + { + "epoch": 0.95, + "grad_norm": 0.7408818006515503, + "learning_rate": 1.3349274391715072e-07, + "loss": 2.0055, + "step": 28518 + }, + { + "epoch": 0.95, + "grad_norm": 0.7568060159683228, + "learning_rate": 1.3331970680745986e-07, + "loss": 2.0176, + "step": 28519 + }, + { + "epoch": 0.95, + "grad_norm": 0.719158947467804, + "learning_rate": 1.3314678116574166e-07, + "loss": 2.0505, + "step": 28520 + }, + { + "epoch": 0.95, + "grad_norm": 0.736534059047699, + "learning_rate": 1.3297396699394893e-07, + "loss": 2.0351, + "step": 28521 + }, + { + "epoch": 0.95, + "grad_norm": 0.7723605632781982, + "learning_rate": 1.328012642940324e-07, + "loss": 2.0639, + "step": 28522 + }, + { + "epoch": 0.95, + "grad_norm": 0.7540204524993896, + "learning_rate": 1.326286730679449e-07, + "loss": 2.0851, + "step": 28523 + }, + { + "epoch": 0.95, + "grad_norm": 0.7801617980003357, + "learning_rate": 1.3245619331763716e-07, + "loss": 2.1001, + "step": 28524 + }, + { + "epoch": 0.95, + "grad_norm": 0.7578858733177185, + "learning_rate": 1.3228382504505532e-07, + "loss": 2.0432, + "step": 28525 + }, + { + "epoch": 0.95, + "grad_norm": 0.7669897675514221, + "learning_rate": 1.3211156825214676e-07, + "loss": 2.0629, + "step": 28526 + }, + { + "epoch": 0.95, + "grad_norm": 0.7698136568069458, + "learning_rate": 1.3193942294085992e-07, + "loss": 2.007, + "step": 28527 + }, + { + "epoch": 0.95, + "grad_norm": 0.729936957359314, + "learning_rate": 1.317673891131377e-07, + "loss": 2.0371, + "step": 28528 + }, + { + "epoch": 0.95, + "grad_norm": 0.7488539814949036, + "learning_rate": 1.3159546677092516e-07, + "loss": 2.0207, + "step": 28529 + }, + { + "epoch": 0.95, + "grad_norm": 0.7614069581031799, + "learning_rate": 1.3142365591616303e-07, + "loss": 2.0641, + "step": 28530 + }, + { + "epoch": 0.95, + "grad_norm": 0.7549223899841309, + "learning_rate": 1.3125195655079192e-07, + "loss": 2.0375, + "step": 28531 + }, + { + "epoch": 0.95, + "grad_norm": 0.7765169739723206, + "learning_rate": 1.3108036867675256e-07, + "loss": 2.0224, + "step": 28532 + }, + { + "epoch": 0.95, + "grad_norm": 0.735680103302002, + "learning_rate": 1.3090889229598447e-07, + "loss": 2.0758, + "step": 28533 + }, + { + "epoch": 0.95, + "grad_norm": 0.7738703489303589, + "learning_rate": 1.3073752741042388e-07, + "loss": 2.0431, + "step": 28534 + }, + { + "epoch": 0.95, + "grad_norm": 0.7168803811073303, + "learning_rate": 1.3056627402200594e-07, + "loss": 1.9887, + "step": 28535 + }, + { + "epoch": 0.95, + "grad_norm": 0.7260355353355408, + "learning_rate": 1.3039513213266685e-07, + "loss": 2.0265, + "step": 28536 + }, + { + "epoch": 0.95, + "grad_norm": 0.7297505140304565, + "learning_rate": 1.3022410174433842e-07, + "loss": 2.0558, + "step": 28537 + }, + { + "epoch": 0.95, + "grad_norm": 0.7644875645637512, + "learning_rate": 1.3005318285895574e-07, + "loss": 2.088, + "step": 28538 + }, + { + "epoch": 0.95, + "grad_norm": 0.7596598863601685, + "learning_rate": 1.2988237547844617e-07, + "loss": 1.9719, + "step": 28539 + }, + { + "epoch": 0.95, + "grad_norm": 0.7348309755325317, + "learning_rate": 1.2971167960474152e-07, + "loss": 2.0688, + "step": 28540 + }, + { + "epoch": 0.95, + "grad_norm": 0.7433788776397705, + "learning_rate": 1.295410952397691e-07, + "loss": 2.062, + "step": 28541 + }, + { + "epoch": 0.95, + "grad_norm": 0.7096604108810425, + "learning_rate": 1.293706223854585e-07, + "loss": 2.0596, + "step": 28542 + }, + { + "epoch": 0.95, + "grad_norm": 0.7426185011863708, + "learning_rate": 1.2920026104373261e-07, + "loss": 2.0389, + "step": 28543 + }, + { + "epoch": 0.95, + "grad_norm": 0.7203823924064636, + "learning_rate": 1.290300112165177e-07, + "loss": 2.0641, + "step": 28544 + }, + { + "epoch": 0.95, + "grad_norm": 0.7482665777206421, + "learning_rate": 1.2885987290573555e-07, + "loss": 2.035, + "step": 28545 + }, + { + "epoch": 0.95, + "grad_norm": 0.7347431778907776, + "learning_rate": 1.2868984611331124e-07, + "loss": 2.0475, + "step": 28546 + }, + { + "epoch": 0.95, + "grad_norm": 0.727058470249176, + "learning_rate": 1.2851993084116333e-07, + "loss": 2.0721, + "step": 28547 + }, + { + "epoch": 0.95, + "grad_norm": 0.7440776824951172, + "learning_rate": 1.283501270912113e-07, + "loss": 2.0266, + "step": 28548 + }, + { + "epoch": 0.95, + "grad_norm": 0.7505230903625488, + "learning_rate": 1.281804348653748e-07, + "loss": 2.0586, + "step": 28549 + }, + { + "epoch": 0.95, + "grad_norm": 0.741380512714386, + "learning_rate": 1.2801085416557003e-07, + "loss": 2.0498, + "step": 28550 + }, + { + "epoch": 0.95, + "grad_norm": 0.752052903175354, + "learning_rate": 1.2784138499371325e-07, + "loss": 2.0557, + "step": 28551 + }, + { + "epoch": 0.95, + "grad_norm": 0.7592599987983704, + "learning_rate": 1.2767202735171958e-07, + "loss": 2.0873, + "step": 28552 + }, + { + "epoch": 0.95, + "grad_norm": 0.7741233706474304, + "learning_rate": 1.2750278124149973e-07, + "loss": 2.0656, + "step": 28553 + }, + { + "epoch": 0.95, + "grad_norm": 0.7629435658454895, + "learning_rate": 1.2733364666496883e-07, + "loss": 2.058, + "step": 28554 + }, + { + "epoch": 0.95, + "grad_norm": 0.7637726068496704, + "learning_rate": 1.2716462362403649e-07, + "loss": 2.0836, + "step": 28555 + }, + { + "epoch": 0.95, + "grad_norm": 0.7383171916007996, + "learning_rate": 1.2699571212061225e-07, + "loss": 2.0243, + "step": 28556 + }, + { + "epoch": 0.95, + "grad_norm": 0.7305157780647278, + "learning_rate": 1.2682691215660347e-07, + "loss": 1.9554, + "step": 28557 + }, + { + "epoch": 0.95, + "grad_norm": 0.7961694002151489, + "learning_rate": 1.2665822373391978e-07, + "loss": 2.0745, + "step": 28558 + }, + { + "epoch": 0.95, + "grad_norm": 0.7379926443099976, + "learning_rate": 1.2648964685446296e-07, + "loss": 1.9991, + "step": 28559 + }, + { + "epoch": 0.95, + "grad_norm": 0.7493047118186951, + "learning_rate": 1.263211815201415e-07, + "loss": 2.0541, + "step": 28560 + }, + { + "epoch": 0.95, + "grad_norm": 0.7484104633331299, + "learning_rate": 1.2615282773285498e-07, + "loss": 2.0519, + "step": 28561 + }, + { + "epoch": 0.95, + "grad_norm": 0.7362362146377563, + "learning_rate": 1.2598458549450853e-07, + "loss": 2.0116, + "step": 28562 + }, + { + "epoch": 0.95, + "grad_norm": 0.7650010585784912, + "learning_rate": 1.2581645480700066e-07, + "loss": 2.0445, + "step": 28563 + }, + { + "epoch": 0.95, + "grad_norm": 0.7376635670661926, + "learning_rate": 1.2564843567223317e-07, + "loss": 2.0892, + "step": 28564 + }, + { + "epoch": 0.95, + "grad_norm": 0.764620840549469, + "learning_rate": 1.254805280921012e-07, + "loss": 2.083, + "step": 28565 + }, + { + "epoch": 0.95, + "grad_norm": 0.7527552843093872, + "learning_rate": 1.2531273206850325e-07, + "loss": 2.0129, + "step": 28566 + }, + { + "epoch": 0.95, + "grad_norm": 0.7629365921020508, + "learning_rate": 1.2514504760333557e-07, + "loss": 2.0759, + "step": 28567 + }, + { + "epoch": 0.95, + "grad_norm": 0.7253546118736267, + "learning_rate": 1.2497747469849108e-07, + "loss": 2.0146, + "step": 28568 + }, + { + "epoch": 0.95, + "grad_norm": 0.8023061752319336, + "learning_rate": 1.2481001335586497e-07, + "loss": 2.0228, + "step": 28569 + }, + { + "epoch": 0.95, + "grad_norm": 0.7748093605041504, + "learning_rate": 1.2464266357734677e-07, + "loss": 2.0914, + "step": 28570 + }, + { + "epoch": 0.95, + "grad_norm": 0.7617517709732056, + "learning_rate": 1.244754253648295e-07, + "loss": 2.0394, + "step": 28571 + }, + { + "epoch": 0.95, + "grad_norm": 0.7129969596862793, + "learning_rate": 1.2430829872019934e-07, + "loss": 2.0703, + "step": 28572 + }, + { + "epoch": 0.95, + "grad_norm": 0.7633289694786072, + "learning_rate": 1.2414128364534818e-07, + "loss": 2.0278, + "step": 28573 + }, + { + "epoch": 0.95, + "grad_norm": 0.7370293736457825, + "learning_rate": 1.2397438014216e-07, + "loss": 1.968, + "step": 28574 + }, + { + "epoch": 0.95, + "grad_norm": 0.7366570830345154, + "learning_rate": 1.2380758821252116e-07, + "loss": 2.0212, + "step": 28575 + }, + { + "epoch": 0.95, + "grad_norm": 0.7286252379417419, + "learning_rate": 1.2364090785831672e-07, + "loss": 1.9655, + "step": 28576 + }, + { + "epoch": 0.95, + "grad_norm": 0.7130202054977417, + "learning_rate": 1.2347433908142858e-07, + "loss": 1.9784, + "step": 28577 + }, + { + "epoch": 0.95, + "grad_norm": 0.7651662230491638, + "learning_rate": 1.2330788188373965e-07, + "loss": 2.0205, + "step": 28578 + }, + { + "epoch": 0.95, + "grad_norm": 0.7573035359382629, + "learning_rate": 1.2314153626712954e-07, + "loss": 2.0897, + "step": 28579 + }, + { + "epoch": 0.95, + "grad_norm": 0.7515241503715515, + "learning_rate": 1.2297530223347898e-07, + "loss": 2.016, + "step": 28580 + }, + { + "epoch": 0.95, + "grad_norm": 0.7582022547721863, + "learning_rate": 1.2280917978466423e-07, + "loss": 2.0049, + "step": 28581 + }, + { + "epoch": 0.95, + "grad_norm": 0.7236115336418152, + "learning_rate": 1.2264316892256266e-07, + "loss": 2.0393, + "step": 28582 + }, + { + "epoch": 0.95, + "grad_norm": 0.7457336187362671, + "learning_rate": 1.224772696490495e-07, + "loss": 2.0485, + "step": 28583 + }, + { + "epoch": 0.95, + "grad_norm": 0.7515358328819275, + "learning_rate": 1.2231148196600096e-07, + "loss": 2.094, + "step": 28584 + }, + { + "epoch": 0.95, + "grad_norm": 0.752422571182251, + "learning_rate": 1.2214580587528668e-07, + "loss": 2.0487, + "step": 28585 + }, + { + "epoch": 0.95, + "grad_norm": 0.7412647008895874, + "learning_rate": 1.2198024137878073e-07, + "loss": 2.0612, + "step": 28586 + }, + { + "epoch": 0.95, + "grad_norm": 0.7286475300788879, + "learning_rate": 1.218147884783527e-07, + "loss": 2.0701, + "step": 28587 + }, + { + "epoch": 0.95, + "grad_norm": 0.7538265585899353, + "learning_rate": 1.2164944717587224e-07, + "loss": 2.0521, + "step": 28588 + }, + { + "epoch": 0.95, + "grad_norm": 0.7535181045532227, + "learning_rate": 1.214842174732067e-07, + "loss": 2.0615, + "step": 28589 + }, + { + "epoch": 0.95, + "grad_norm": 0.7363185882568359, + "learning_rate": 1.2131909937222354e-07, + "loss": 2.0222, + "step": 28590 + }, + { + "epoch": 0.95, + "grad_norm": 0.7342584133148193, + "learning_rate": 1.2115409287478785e-07, + "loss": 2.0144, + "step": 28591 + }, + { + "epoch": 0.95, + "grad_norm": 0.7449294328689575, + "learning_rate": 1.2098919798276376e-07, + "loss": 2.041, + "step": 28592 + }, + { + "epoch": 0.95, + "grad_norm": 0.7489109039306641, + "learning_rate": 1.2082441469801308e-07, + "loss": 2.0568, + "step": 28593 + }, + { + "epoch": 0.95, + "grad_norm": 0.7555636167526245, + "learning_rate": 1.206597430223988e-07, + "loss": 2.0544, + "step": 28594 + }, + { + "epoch": 0.95, + "grad_norm": 0.7566273808479309, + "learning_rate": 1.2049518295778162e-07, + "loss": 2.0331, + "step": 28595 + }, + { + "epoch": 0.95, + "grad_norm": 0.7314335107803345, + "learning_rate": 1.2033073450602006e-07, + "loss": 2.0515, + "step": 28596 + }, + { + "epoch": 0.95, + "grad_norm": 0.7581839561462402, + "learning_rate": 1.2016639766897043e-07, + "loss": 2.0605, + "step": 28597 + }, + { + "epoch": 0.95, + "grad_norm": 0.7689090371131897, + "learning_rate": 1.2000217244849122e-07, + "loss": 2.0592, + "step": 28598 + }, + { + "epoch": 0.95, + "grad_norm": 0.7555123567581177, + "learning_rate": 1.1983805884643763e-07, + "loss": 2.1348, + "step": 28599 + }, + { + "epoch": 0.95, + "grad_norm": 0.7491744756698608, + "learning_rate": 1.196740568646626e-07, + "loss": 2.0613, + "step": 28600 + }, + { + "epoch": 0.95, + "grad_norm": 0.7228789329528809, + "learning_rate": 1.1951016650502024e-07, + "loss": 1.9965, + "step": 28601 + }, + { + "epoch": 0.95, + "grad_norm": 0.7371302247047424, + "learning_rate": 1.1934638776936015e-07, + "loss": 2.0976, + "step": 28602 + }, + { + "epoch": 0.95, + "grad_norm": 0.7275402545928955, + "learning_rate": 1.191827206595364e-07, + "loss": 2.0328, + "step": 28603 + }, + { + "epoch": 0.95, + "grad_norm": 0.726569414138794, + "learning_rate": 1.1901916517739421e-07, + "loss": 2.0449, + "step": 28604 + }, + { + "epoch": 0.95, + "grad_norm": 0.7519346475601196, + "learning_rate": 1.1885572132478207e-07, + "loss": 1.9365, + "step": 28605 + }, + { + "epoch": 0.95, + "grad_norm": 0.740821361541748, + "learning_rate": 1.1869238910354852e-07, + "loss": 2.031, + "step": 28606 + }, + { + "epoch": 0.95, + "grad_norm": 0.7059916853904724, + "learning_rate": 1.1852916851553542e-07, + "loss": 2.0358, + "step": 28607 + }, + { + "epoch": 0.95, + "grad_norm": 0.7687329053878784, + "learning_rate": 1.1836605956259017e-07, + "loss": 1.9872, + "step": 28608 + }, + { + "epoch": 0.95, + "grad_norm": 0.7150636911392212, + "learning_rate": 1.1820306224655353e-07, + "loss": 2.0439, + "step": 28609 + }, + { + "epoch": 0.95, + "grad_norm": 0.7502188682556152, + "learning_rate": 1.1804017656926736e-07, + "loss": 2.1067, + "step": 28610 + }, + { + "epoch": 0.95, + "grad_norm": 0.7261785268783569, + "learning_rate": 1.178774025325713e-07, + "loss": 2.0485, + "step": 28611 + }, + { + "epoch": 0.95, + "grad_norm": 0.7605353593826294, + "learning_rate": 1.1771474013830608e-07, + "loss": 2.0691, + "step": 28612 + }, + { + "epoch": 0.95, + "grad_norm": 0.7837786674499512, + "learning_rate": 1.1755218938830803e-07, + "loss": 1.962, + "step": 28613 + }, + { + "epoch": 0.95, + "grad_norm": 0.747161328792572, + "learning_rate": 1.1738975028441236e-07, + "loss": 2.0765, + "step": 28614 + }, + { + "epoch": 0.95, + "grad_norm": 0.7233255505561829, + "learning_rate": 1.1722742282845756e-07, + "loss": 2.0796, + "step": 28615 + }, + { + "epoch": 0.95, + "grad_norm": 0.7441611886024475, + "learning_rate": 1.170652070222733e-07, + "loss": 2.0386, + "step": 28616 + }, + { + "epoch": 0.95, + "grad_norm": 0.7729007005691528, + "learning_rate": 1.169031028676959e-07, + "loss": 1.9767, + "step": 28617 + }, + { + "epoch": 0.95, + "grad_norm": 0.7489969730377197, + "learning_rate": 1.1674111036655389e-07, + "loss": 2.0437, + "step": 28618 + }, + { + "epoch": 0.95, + "grad_norm": 0.752183735370636, + "learning_rate": 1.1657922952068024e-07, + "loss": 2.0438, + "step": 28619 + }, + { + "epoch": 0.95, + "grad_norm": 0.75187087059021, + "learning_rate": 1.1641746033190015e-07, + "loss": 2.0541, + "step": 28620 + }, + { + "epoch": 0.95, + "grad_norm": 0.7474924921989441, + "learning_rate": 1.1625580280204552e-07, + "loss": 2.0781, + "step": 28621 + }, + { + "epoch": 0.95, + "grad_norm": 0.7701267600059509, + "learning_rate": 1.160942569329393e-07, + "loss": 2.078, + "step": 28622 + }, + { + "epoch": 0.95, + "grad_norm": 0.7274881601333618, + "learning_rate": 1.1593282272640671e-07, + "loss": 2.0423, + "step": 28623 + }, + { + "epoch": 0.95, + "grad_norm": 0.8035042881965637, + "learning_rate": 1.1577150018427297e-07, + "loss": 2.05, + "step": 28624 + }, + { + "epoch": 0.95, + "grad_norm": 0.7427250742912292, + "learning_rate": 1.1561028930836105e-07, + "loss": 2.0443, + "step": 28625 + }, + { + "epoch": 0.95, + "grad_norm": 0.7830579280853271, + "learning_rate": 1.154491901004906e-07, + "loss": 2.1077, + "step": 28626 + }, + { + "epoch": 0.95, + "grad_norm": 0.73859703540802, + "learning_rate": 1.152882025624824e-07, + "loss": 2.0516, + "step": 28627 + }, + { + "epoch": 0.95, + "grad_norm": 0.780911386013031, + "learning_rate": 1.15127326696155e-07, + "loss": 2.1168, + "step": 28628 + }, + { + "epoch": 0.95, + "grad_norm": 0.7387827038764954, + "learning_rate": 1.1496656250332582e-07, + "loss": 1.9209, + "step": 28629 + }, + { + "epoch": 0.95, + "grad_norm": 0.737053394317627, + "learning_rate": 1.148059099858112e-07, + "loss": 2.0994, + "step": 28630 + }, + { + "epoch": 0.95, + "grad_norm": 0.7374988794326782, + "learning_rate": 1.1464536914542745e-07, + "loss": 2.0541, + "step": 28631 + }, + { + "epoch": 0.95, + "grad_norm": 0.741296648979187, + "learning_rate": 1.1448493998398535e-07, + "loss": 2.0486, + "step": 28632 + }, + { + "epoch": 0.95, + "grad_norm": 0.7400513887405396, + "learning_rate": 1.1432462250329901e-07, + "loss": 1.9812, + "step": 28633 + }, + { + "epoch": 0.95, + "grad_norm": 0.7461794018745422, + "learning_rate": 1.141644167051803e-07, + "loss": 2.0548, + "step": 28634 + }, + { + "epoch": 0.95, + "grad_norm": 0.7359539866447449, + "learning_rate": 1.1400432259143779e-07, + "loss": 1.9906, + "step": 28635 + }, + { + "epoch": 0.95, + "grad_norm": 0.7408196330070496, + "learning_rate": 1.1384434016388113e-07, + "loss": 2.0845, + "step": 28636 + }, + { + "epoch": 0.95, + "grad_norm": 0.7656357884407043, + "learning_rate": 1.1368446942431666e-07, + "loss": 2.0003, + "step": 28637 + }, + { + "epoch": 0.95, + "grad_norm": 0.7472095489501953, + "learning_rate": 1.135247103745507e-07, + "loss": 2.0622, + "step": 28638 + }, + { + "epoch": 0.95, + "grad_norm": 0.7568441033363342, + "learning_rate": 1.1336506301638961e-07, + "loss": 2.0046, + "step": 28639 + }, + { + "epoch": 0.95, + "grad_norm": 0.7535372376441956, + "learning_rate": 1.1320552735163525e-07, + "loss": 2.0237, + "step": 28640 + }, + { + "epoch": 0.95, + "grad_norm": 0.7311646342277527, + "learning_rate": 1.1304610338209177e-07, + "loss": 2.0495, + "step": 28641 + }, + { + "epoch": 0.95, + "grad_norm": 0.7639433145523071, + "learning_rate": 1.128867911095577e-07, + "loss": 2.0569, + "step": 28642 + }, + { + "epoch": 0.95, + "grad_norm": 0.7473360896110535, + "learning_rate": 1.1272759053583493e-07, + "loss": 2.0265, + "step": 28643 + }, + { + "epoch": 0.95, + "grad_norm": 0.7467397451400757, + "learning_rate": 1.1256850166272093e-07, + "loss": 2.0596, + "step": 28644 + }, + { + "epoch": 0.95, + "grad_norm": 0.760664701461792, + "learning_rate": 1.1240952449201315e-07, + "loss": 2.0811, + "step": 28645 + }, + { + "epoch": 0.95, + "grad_norm": 0.7418599128723145, + "learning_rate": 1.1225065902550792e-07, + "loss": 2.0668, + "step": 28646 + }, + { + "epoch": 0.95, + "grad_norm": 0.7384566068649292, + "learning_rate": 1.1209190526500047e-07, + "loss": 2.0226, + "step": 28647 + }, + { + "epoch": 0.95, + "grad_norm": 0.7502782344818115, + "learning_rate": 1.1193326321228492e-07, + "loss": 2.0867, + "step": 28648 + }, + { + "epoch": 0.95, + "grad_norm": 0.7258604764938354, + "learning_rate": 1.1177473286914986e-07, + "loss": 2.0061, + "step": 28649 + }, + { + "epoch": 0.95, + "grad_norm": 0.7358530163764954, + "learning_rate": 1.116163142373905e-07, + "loss": 1.9964, + "step": 28650 + }, + { + "epoch": 0.95, + "grad_norm": 0.7093873023986816, + "learning_rate": 1.114580073187943e-07, + "loss": 2.0404, + "step": 28651 + }, + { + "epoch": 0.95, + "grad_norm": 0.7596768140792847, + "learning_rate": 1.1129981211514984e-07, + "loss": 2.0179, + "step": 28652 + }, + { + "epoch": 0.95, + "grad_norm": 0.7384152412414551, + "learning_rate": 1.111417286282468e-07, + "loss": 2.0554, + "step": 28653 + }, + { + "epoch": 0.95, + "grad_norm": 0.7256574034690857, + "learning_rate": 1.1098375685986707e-07, + "loss": 1.978, + "step": 28654 + }, + { + "epoch": 0.95, + "grad_norm": 0.7446480393409729, + "learning_rate": 1.1082589681179701e-07, + "loss": 2.0874, + "step": 28655 + }, + { + "epoch": 0.95, + "grad_norm": 0.7562741637229919, + "learning_rate": 1.1066814848582186e-07, + "loss": 2.0084, + "step": 28656 + }, + { + "epoch": 0.95, + "grad_norm": 0.7268078923225403, + "learning_rate": 1.1051051188372131e-07, + "loss": 2.0528, + "step": 28657 + }, + { + "epoch": 0.95, + "grad_norm": 0.7587331533432007, + "learning_rate": 1.1035298700727726e-07, + "loss": 2.0468, + "step": 28658 + }, + { + "epoch": 0.95, + "grad_norm": 0.7448142766952515, + "learning_rate": 1.101955738582694e-07, + "loss": 2.0249, + "step": 28659 + }, + { + "epoch": 0.95, + "grad_norm": 0.7738903760910034, + "learning_rate": 1.1003827243847631e-07, + "loss": 2.0469, + "step": 28660 + }, + { + "epoch": 0.95, + "grad_norm": 0.7613240480422974, + "learning_rate": 1.0988108274967435e-07, + "loss": 1.9857, + "step": 28661 + }, + { + "epoch": 0.95, + "grad_norm": 0.7377136945724487, + "learning_rate": 1.0972400479363987e-07, + "loss": 2.0533, + "step": 28662 + }, + { + "epoch": 0.95, + "grad_norm": 0.7845824956893921, + "learning_rate": 1.0956703857214701e-07, + "loss": 2.1349, + "step": 28663 + }, + { + "epoch": 0.95, + "grad_norm": 0.7358260154724121, + "learning_rate": 1.094101840869699e-07, + "loss": 2.0449, + "step": 28664 + }, + { + "epoch": 0.95, + "grad_norm": 0.767694354057312, + "learning_rate": 1.0925344133987936e-07, + "loss": 2.0288, + "step": 28665 + }, + { + "epoch": 0.95, + "grad_norm": 0.7372912764549255, + "learning_rate": 1.0909681033264841e-07, + "loss": 2.0174, + "step": 28666 + }, + { + "epoch": 0.95, + "grad_norm": 0.7376200556755066, + "learning_rate": 1.089402910670434e-07, + "loss": 2.0725, + "step": 28667 + }, + { + "epoch": 0.95, + "grad_norm": 0.7449764013290405, + "learning_rate": 1.0878388354483404e-07, + "loss": 2.0373, + "step": 28668 + }, + { + "epoch": 0.95, + "grad_norm": 0.7363483309745789, + "learning_rate": 1.0862758776778892e-07, + "loss": 2.0344, + "step": 28669 + }, + { + "epoch": 0.95, + "grad_norm": 0.760418176651001, + "learning_rate": 1.0847140373767218e-07, + "loss": 2.079, + "step": 28670 + }, + { + "epoch": 0.95, + "grad_norm": 0.757100522518158, + "learning_rate": 1.0831533145624795e-07, + "loss": 2.0561, + "step": 28671 + }, + { + "epoch": 0.95, + "grad_norm": 0.7479868531227112, + "learning_rate": 1.081593709252804e-07, + "loss": 2.0161, + "step": 28672 + }, + { + "epoch": 0.95, + "grad_norm": 0.7149615287780762, + "learning_rate": 1.0800352214653031e-07, + "loss": 2.0088, + "step": 28673 + }, + { + "epoch": 0.95, + "grad_norm": 0.7360186576843262, + "learning_rate": 1.0784778512175964e-07, + "loss": 2.0431, + "step": 28674 + }, + { + "epoch": 0.95, + "grad_norm": 0.755361795425415, + "learning_rate": 1.0769215985272807e-07, + "loss": 2.0418, + "step": 28675 + }, + { + "epoch": 0.95, + "grad_norm": 0.734107255935669, + "learning_rate": 1.0753664634119198e-07, + "loss": 2.0343, + "step": 28676 + }, + { + "epoch": 0.95, + "grad_norm": 0.7883127927780151, + "learning_rate": 1.0738124458890887e-07, + "loss": 2.0019, + "step": 28677 + }, + { + "epoch": 0.95, + "grad_norm": 0.7369222640991211, + "learning_rate": 1.0722595459763618e-07, + "loss": 1.9578, + "step": 28678 + }, + { + "epoch": 0.95, + "grad_norm": 0.7316192388534546, + "learning_rate": 1.07070776369127e-07, + "loss": 2.0608, + "step": 28679 + }, + { + "epoch": 0.95, + "grad_norm": 0.7118890285491943, + "learning_rate": 1.0691570990513322e-07, + "loss": 2.0766, + "step": 28680 + }, + { + "epoch": 0.95, + "grad_norm": 0.7194395661354065, + "learning_rate": 1.0676075520740791e-07, + "loss": 1.9792, + "step": 28681 + }, + { + "epoch": 0.95, + "grad_norm": 0.7654778361320496, + "learning_rate": 1.0660591227770189e-07, + "loss": 2.1259, + "step": 28682 + }, + { + "epoch": 0.95, + "grad_norm": 0.741857647895813, + "learning_rate": 1.0645118111776376e-07, + "loss": 1.9914, + "step": 28683 + }, + { + "epoch": 0.95, + "grad_norm": 0.7469571828842163, + "learning_rate": 1.0629656172934322e-07, + "loss": 2.063, + "step": 28684 + }, + { + "epoch": 0.95, + "grad_norm": 0.7291365265846252, + "learning_rate": 1.0614205411418554e-07, + "loss": 1.9764, + "step": 28685 + }, + { + "epoch": 0.95, + "grad_norm": 0.7237410545349121, + "learning_rate": 1.0598765827403602e-07, + "loss": 2.0447, + "step": 28686 + }, + { + "epoch": 0.95, + "grad_norm": 0.7562398314476013, + "learning_rate": 1.0583337421063878e-07, + "loss": 2.0727, + "step": 28687 + }, + { + "epoch": 0.95, + "grad_norm": 0.7528773546218872, + "learning_rate": 1.0567920192573911e-07, + "loss": 2.0335, + "step": 28688 + }, + { + "epoch": 0.95, + "grad_norm": 0.7367376089096069, + "learning_rate": 1.0552514142107672e-07, + "loss": 2.0688, + "step": 28689 + }, + { + "epoch": 0.95, + "grad_norm": 0.7565695643424988, + "learning_rate": 1.0537119269839135e-07, + "loss": 2.1415, + "step": 28690 + }, + { + "epoch": 0.95, + "grad_norm": 0.7651036381721497, + "learning_rate": 1.0521735575942494e-07, + "loss": 1.9834, + "step": 28691 + }, + { + "epoch": 0.95, + "grad_norm": 0.7305153608322144, + "learning_rate": 1.0506363060591385e-07, + "loss": 1.955, + "step": 28692 + }, + { + "epoch": 0.95, + "grad_norm": 0.7481963634490967, + "learning_rate": 1.0491001723959338e-07, + "loss": 2.0373, + "step": 28693 + }, + { + "epoch": 0.95, + "grad_norm": 0.7346555590629578, + "learning_rate": 1.0475651566220213e-07, + "loss": 2.0794, + "step": 28694 + }, + { + "epoch": 0.95, + "grad_norm": 0.7491049766540527, + "learning_rate": 1.0460312587547094e-07, + "loss": 2.0807, + "step": 28695 + }, + { + "epoch": 0.95, + "grad_norm": 0.7417234182357788, + "learning_rate": 1.044498478811351e-07, + "loss": 2.0217, + "step": 28696 + }, + { + "epoch": 0.95, + "grad_norm": 0.743399977684021, + "learning_rate": 1.0429668168092655e-07, + "loss": 1.9982, + "step": 28697 + }, + { + "epoch": 0.95, + "grad_norm": 0.7455649971961975, + "learning_rate": 1.041436272765739e-07, + "loss": 2.1149, + "step": 28698 + }, + { + "epoch": 0.95, + "grad_norm": 0.7458542585372925, + "learning_rate": 1.0399068466980688e-07, + "loss": 2.0888, + "step": 28699 + }, + { + "epoch": 0.95, + "grad_norm": 0.7343627214431763, + "learning_rate": 1.0383785386235301e-07, + "loss": 2.0342, + "step": 28700 + }, + { + "epoch": 0.95, + "grad_norm": 0.7347861528396606, + "learning_rate": 1.0368513485594089e-07, + "loss": 1.9451, + "step": 28701 + }, + { + "epoch": 0.95, + "grad_norm": 0.7316446900367737, + "learning_rate": 1.0353252765229249e-07, + "loss": 2.0682, + "step": 28702 + }, + { + "epoch": 0.95, + "grad_norm": 0.7549335956573486, + "learning_rate": 1.033800322531342e-07, + "loss": 2.0115, + "step": 28703 + }, + { + "epoch": 0.95, + "grad_norm": 0.7718681693077087, + "learning_rate": 1.0322764866018908e-07, + "loss": 2.0675, + "step": 28704 + }, + { + "epoch": 0.96, + "grad_norm": 0.7223976254463196, + "learning_rate": 1.0307537687517688e-07, + "loss": 2.0379, + "step": 28705 + }, + { + "epoch": 0.96, + "grad_norm": 0.7304015755653381, + "learning_rate": 1.0292321689981954e-07, + "loss": 2.0108, + "step": 28706 + }, + { + "epoch": 0.96, + "grad_norm": 0.7386050820350647, + "learning_rate": 1.0277116873583571e-07, + "loss": 2.1091, + "step": 28707 + }, + { + "epoch": 0.96, + "grad_norm": 0.7167690396308899, + "learning_rate": 1.0261923238494175e-07, + "loss": 2.0387, + "step": 28708 + }, + { + "epoch": 0.96, + "grad_norm": 0.7573454976081848, + "learning_rate": 1.0246740784885634e-07, + "loss": 2.041, + "step": 28709 + }, + { + "epoch": 0.96, + "grad_norm": 0.7409628033638, + "learning_rate": 1.0231569512929363e-07, + "loss": 2.0235, + "step": 28710 + }, + { + "epoch": 0.96, + "grad_norm": 0.7428113222122192, + "learning_rate": 1.0216409422796669e-07, + "loss": 2.0777, + "step": 28711 + }, + { + "epoch": 0.96, + "grad_norm": 0.7730421423912048, + "learning_rate": 1.0201260514658973e-07, + "loss": 2.0588, + "step": 28712 + }, + { + "epoch": 0.96, + "grad_norm": 0.7434766888618469, + "learning_rate": 1.0186122788687358e-07, + "loss": 2.0169, + "step": 28713 + }, + { + "epoch": 0.96, + "grad_norm": 0.7657095193862915, + "learning_rate": 1.01709962450528e-07, + "loss": 2.0709, + "step": 28714 + }, + { + "epoch": 0.96, + "grad_norm": 0.7505764961242676, + "learning_rate": 1.0155880883926272e-07, + "loss": 2.0719, + "step": 28715 + }, + { + "epoch": 0.96, + "grad_norm": 0.748176097869873, + "learning_rate": 1.0140776705478528e-07, + "loss": 2.0719, + "step": 28716 + }, + { + "epoch": 0.96, + "grad_norm": 0.7549925446510315, + "learning_rate": 1.0125683709880096e-07, + "loss": 2.074, + "step": 28717 + }, + { + "epoch": 0.96, + "grad_norm": 0.7779072523117065, + "learning_rate": 1.011060189730162e-07, + "loss": 2.0837, + "step": 28718 + }, + { + "epoch": 0.96, + "grad_norm": 0.7540832161903381, + "learning_rate": 1.0095531267913405e-07, + "loss": 2.121, + "step": 28719 + }, + { + "epoch": 0.96, + "grad_norm": 0.7389533519744873, + "learning_rate": 1.0080471821885762e-07, + "loss": 2.0618, + "step": 28720 + }, + { + "epoch": 0.96, + "grad_norm": 0.7536616921424866, + "learning_rate": 1.0065423559388776e-07, + "loss": 2.0346, + "step": 28721 + }, + { + "epoch": 0.96, + "grad_norm": 0.7630150318145752, + "learning_rate": 1.0050386480592533e-07, + "loss": 2.1062, + "step": 28722 + }, + { + "epoch": 0.96, + "grad_norm": 0.74809330701828, + "learning_rate": 1.0035360585666786e-07, + "loss": 2.0845, + "step": 28723 + }, + { + "epoch": 0.96, + "grad_norm": 0.8102424740791321, + "learning_rate": 1.00203458747814e-07, + "loss": 2.1354, + "step": 28724 + }, + { + "epoch": 0.96, + "grad_norm": 0.7356367707252502, + "learning_rate": 1.0005342348105906e-07, + "loss": 2.0408, + "step": 28725 + }, + { + "epoch": 0.96, + "grad_norm": 0.7479951977729797, + "learning_rate": 9.990350005809835e-08, + "loss": 2.0782, + "step": 28726 + }, + { + "epoch": 0.96, + "grad_norm": 0.7814403772354126, + "learning_rate": 9.975368848062606e-08, + "loss": 2.0527, + "step": 28727 + }, + { + "epoch": 0.96, + "grad_norm": 0.7389352321624756, + "learning_rate": 9.960398875033527e-08, + "loss": 2.0229, + "step": 28728 + }, + { + "epoch": 0.96, + "grad_norm": 0.7411439418792725, + "learning_rate": 9.945440086891688e-08, + "loss": 2.041, + "step": 28729 + }, + { + "epoch": 0.96, + "grad_norm": 0.7428656816482544, + "learning_rate": 9.930492483805843e-08, + "loss": 2.0514, + "step": 28730 + }, + { + "epoch": 0.96, + "grad_norm": 0.7575491070747375, + "learning_rate": 9.915556065945186e-08, + "loss": 2.0328, + "step": 28731 + }, + { + "epoch": 0.96, + "grad_norm": 0.7489508390426636, + "learning_rate": 9.900630833478364e-08, + "loss": 2.0193, + "step": 28732 + }, + { + "epoch": 0.96, + "grad_norm": 0.7498624324798584, + "learning_rate": 9.885716786573907e-08, + "loss": 2.0789, + "step": 28733 + }, + { + "epoch": 0.96, + "grad_norm": 0.7056251764297485, + "learning_rate": 9.870813925400346e-08, + "loss": 2.028, + "step": 28734 + }, + { + "epoch": 0.96, + "grad_norm": 0.7531627416610718, + "learning_rate": 9.855922250126105e-08, + "loss": 2.0822, + "step": 28735 + }, + { + "epoch": 0.96, + "grad_norm": 0.7574704885482788, + "learning_rate": 9.841041760919268e-08, + "loss": 2.0352, + "step": 28736 + }, + { + "epoch": 0.96, + "grad_norm": 0.7346788644790649, + "learning_rate": 9.826172457948147e-08, + "loss": 2.0344, + "step": 28737 + }, + { + "epoch": 0.96, + "grad_norm": 0.7184901237487793, + "learning_rate": 9.811314341380606e-08, + "loss": 2.0839, + "step": 28738 + }, + { + "epoch": 0.96, + "grad_norm": 0.7373314499855042, + "learning_rate": 9.796467411384513e-08, + "loss": 2.0052, + "step": 28739 + }, + { + "epoch": 0.96, + "grad_norm": 0.7102420330047607, + "learning_rate": 9.781631668127622e-08, + "loss": 2.0373, + "step": 28740 + }, + { + "epoch": 0.96, + "grad_norm": 0.7478048205375671, + "learning_rate": 9.766807111777465e-08, + "loss": 2.0628, + "step": 28741 + }, + { + "epoch": 0.96, + "grad_norm": 0.7541193962097168, + "learning_rate": 9.751993742501686e-08, + "loss": 2.0405, + "step": 28742 + }, + { + "epoch": 0.96, + "grad_norm": 0.7814239263534546, + "learning_rate": 9.737191560467485e-08, + "loss": 2.1096, + "step": 28743 + }, + { + "epoch": 0.96, + "grad_norm": 0.7488712072372437, + "learning_rate": 9.722400565842283e-08, + "loss": 2.0539, + "step": 28744 + }, + { + "epoch": 0.96, + "grad_norm": 0.76631760597229, + "learning_rate": 9.707620758792835e-08, + "loss": 2.0806, + "step": 28745 + }, + { + "epoch": 0.96, + "grad_norm": 0.7401441335678101, + "learning_rate": 9.692852139486453e-08, + "loss": 2.0578, + "step": 28746 + }, + { + "epoch": 0.96, + "grad_norm": 0.7271931171417236, + "learning_rate": 9.67809470808978e-08, + "loss": 1.997, + "step": 28747 + }, + { + "epoch": 0.96, + "grad_norm": 0.731926441192627, + "learning_rate": 9.663348464769684e-08, + "loss": 2.0187, + "step": 28748 + }, + { + "epoch": 0.96, + "grad_norm": 0.7533583641052246, + "learning_rate": 9.648613409692587e-08, + "loss": 2.0563, + "step": 28749 + }, + { + "epoch": 0.96, + "grad_norm": 0.7400550842285156, + "learning_rate": 9.633889543025133e-08, + "loss": 2.0761, + "step": 28750 + }, + { + "epoch": 0.96, + "grad_norm": 0.739736020565033, + "learning_rate": 9.619176864933522e-08, + "loss": 1.9838, + "step": 28751 + }, + { + "epoch": 0.96, + "grad_norm": 0.737621545791626, + "learning_rate": 9.604475375584065e-08, + "loss": 2.0275, + "step": 28752 + }, + { + "epoch": 0.96, + "grad_norm": 0.7857711911201477, + "learning_rate": 9.589785075142744e-08, + "loss": 1.9635, + "step": 28753 + }, + { + "epoch": 0.96, + "grad_norm": 0.7478498816490173, + "learning_rate": 9.575105963775755e-08, + "loss": 2.0277, + "step": 28754 + }, + { + "epoch": 0.96, + "grad_norm": 0.7466282248497009, + "learning_rate": 9.560438041648634e-08, + "loss": 2.0083, + "step": 28755 + }, + { + "epoch": 0.96, + "grad_norm": 0.7439160943031311, + "learning_rate": 9.545781308927249e-08, + "loss": 2.0729, + "step": 28756 + }, + { + "epoch": 0.96, + "grad_norm": 0.7442426085472107, + "learning_rate": 9.531135765777245e-08, + "loss": 2.0081, + "step": 28757 + }, + { + "epoch": 0.96, + "grad_norm": 0.7315365672111511, + "learning_rate": 9.516501412363821e-08, + "loss": 2.0607, + "step": 28758 + }, + { + "epoch": 0.96, + "grad_norm": 0.7446399331092834, + "learning_rate": 9.501878248852625e-08, + "loss": 1.9919, + "step": 28759 + }, + { + "epoch": 0.96, + "grad_norm": 0.7262716889381409, + "learning_rate": 9.487266275408746e-08, + "loss": 2.0614, + "step": 28760 + }, + { + "epoch": 0.96, + "grad_norm": 0.7658587694168091, + "learning_rate": 9.472665492197163e-08, + "loss": 1.9969, + "step": 28761 + }, + { + "epoch": 0.96, + "grad_norm": 0.7941670417785645, + "learning_rate": 9.458075899382858e-08, + "loss": 2.0145, + "step": 28762 + }, + { + "epoch": 0.96, + "grad_norm": 0.7706116437911987, + "learning_rate": 9.443497497130804e-08, + "loss": 2.0386, + "step": 28763 + }, + { + "epoch": 0.96, + "grad_norm": 0.7372916340827942, + "learning_rate": 9.428930285605654e-08, + "loss": 2.0413, + "step": 28764 + }, + { + "epoch": 0.96, + "grad_norm": 0.7452129125595093, + "learning_rate": 9.414374264971715e-08, + "loss": 1.9996, + "step": 28765 + }, + { + "epoch": 0.96, + "grad_norm": 0.7453997731208801, + "learning_rate": 9.399829435393748e-08, + "loss": 2.0657, + "step": 28766 + }, + { + "epoch": 0.96, + "grad_norm": 0.7542822957038879, + "learning_rate": 9.385295797035953e-08, + "loss": 2.0721, + "step": 28767 + }, + { + "epoch": 0.96, + "grad_norm": 0.7773736119270325, + "learning_rate": 9.370773350062534e-08, + "loss": 2.0066, + "step": 28768 + }, + { + "epoch": 0.96, + "grad_norm": 0.7260607481002808, + "learning_rate": 9.356262094637469e-08, + "loss": 2.084, + "step": 28769 + }, + { + "epoch": 0.96, + "grad_norm": 0.7489703297615051, + "learning_rate": 9.341762030924962e-08, + "loss": 2.0275, + "step": 28770 + }, + { + "epoch": 0.96, + "grad_norm": 0.780580997467041, + "learning_rate": 9.327273159088434e-08, + "loss": 2.1097, + "step": 28771 + }, + { + "epoch": 0.96, + "grad_norm": 0.7455532550811768, + "learning_rate": 9.31279547929198e-08, + "loss": 2.0526, + "step": 28772 + }, + { + "epoch": 0.96, + "grad_norm": 0.7439217567443848, + "learning_rate": 9.298328991698802e-08, + "loss": 2.1277, + "step": 28773 + }, + { + "epoch": 0.96, + "grad_norm": 0.7304056882858276, + "learning_rate": 9.283873696472545e-08, + "loss": 2.1337, + "step": 28774 + }, + { + "epoch": 0.96, + "grad_norm": 0.7803228497505188, + "learning_rate": 9.269429593776413e-08, + "loss": 2.0867, + "step": 28775 + }, + { + "epoch": 0.96, + "grad_norm": 0.7476922869682312, + "learning_rate": 9.254996683773831e-08, + "loss": 2.0926, + "step": 28776 + }, + { + "epoch": 0.96, + "grad_norm": 0.745601236820221, + "learning_rate": 9.240574966627558e-08, + "loss": 2.0397, + "step": 28777 + }, + { + "epoch": 0.96, + "grad_norm": 0.7883411645889282, + "learning_rate": 9.226164442500573e-08, + "loss": 2.0313, + "step": 28778 + }, + { + "epoch": 0.96, + "grad_norm": 0.7247946858406067, + "learning_rate": 9.211765111555748e-08, + "loss": 2.0898, + "step": 28779 + }, + { + "epoch": 0.96, + "grad_norm": 0.7257028818130493, + "learning_rate": 9.19737697395573e-08, + "loss": 2.0647, + "step": 28780 + }, + { + "epoch": 0.96, + "grad_norm": 0.7526800036430359, + "learning_rate": 9.183000029863165e-08, + "loss": 2.1105, + "step": 28781 + }, + { + "epoch": 0.96, + "grad_norm": 0.7623597979545593, + "learning_rate": 9.16863427944037e-08, + "loss": 2.0071, + "step": 28782 + }, + { + "epoch": 0.96, + "grad_norm": 0.7737778425216675, + "learning_rate": 9.154279722849546e-08, + "loss": 2.0657, + "step": 28783 + }, + { + "epoch": 0.96, + "grad_norm": 0.7557483315467834, + "learning_rate": 9.13993636025301e-08, + "loss": 2.0217, + "step": 28784 + }, + { + "epoch": 0.96, + "grad_norm": 0.764198362827301, + "learning_rate": 9.125604191812854e-08, + "loss": 2.0632, + "step": 28785 + }, + { + "epoch": 0.96, + "grad_norm": 0.8041329383850098, + "learning_rate": 9.111283217690948e-08, + "loss": 2.0307, + "step": 28786 + }, + { + "epoch": 0.96, + "grad_norm": 0.7491889595985413, + "learning_rate": 9.09697343804894e-08, + "loss": 1.9997, + "step": 28787 + }, + { + "epoch": 0.96, + "grad_norm": 0.7481538653373718, + "learning_rate": 9.082674853048589e-08, + "loss": 2.0222, + "step": 28788 + }, + { + "epoch": 0.96, + "grad_norm": 0.7227540612220764, + "learning_rate": 9.068387462851435e-08, + "loss": 2.1069, + "step": 28789 + }, + { + "epoch": 0.96, + "grad_norm": 0.7346522212028503, + "learning_rate": 9.054111267619015e-08, + "loss": 2.0064, + "step": 28790 + }, + { + "epoch": 0.96, + "grad_norm": 0.7587025165557861, + "learning_rate": 9.03984626751242e-08, + "loss": 2.0379, + "step": 28791 + }, + { + "epoch": 0.96, + "grad_norm": 0.7325794100761414, + "learning_rate": 9.025592462692856e-08, + "loss": 2.0525, + "step": 28792 + }, + { + "epoch": 0.96, + "grad_norm": 0.7329788208007812, + "learning_rate": 9.011349853321416e-08, + "loss": 2.0313, + "step": 28793 + }, + { + "epoch": 0.96, + "grad_norm": 0.7759432196617126, + "learning_rate": 8.997118439558972e-08, + "loss": 2.0632, + "step": 28794 + }, + { + "epoch": 0.96, + "grad_norm": 0.7502753734588623, + "learning_rate": 8.982898221566394e-08, + "loss": 2.1322, + "step": 28795 + }, + { + "epoch": 0.96, + "grad_norm": 0.7386521697044373, + "learning_rate": 8.96868919950411e-08, + "loss": 2.0316, + "step": 28796 + }, + { + "epoch": 0.96, + "grad_norm": 0.7299783229827881, + "learning_rate": 8.95449137353277e-08, + "loss": 2.0106, + "step": 28797 + }, + { + "epoch": 0.96, + "grad_norm": 0.7445585131645203, + "learning_rate": 8.9403047438128e-08, + "loss": 2.0608, + "step": 28798 + }, + { + "epoch": 0.96, + "grad_norm": 0.7425210475921631, + "learning_rate": 8.926129310504516e-08, + "loss": 2.0346, + "step": 28799 + }, + { + "epoch": 0.96, + "grad_norm": 0.7514474987983704, + "learning_rate": 8.911965073767903e-08, + "loss": 2.0221, + "step": 28800 + }, + { + "epoch": 0.96, + "grad_norm": 0.7600212097167969, + "learning_rate": 8.897812033763165e-08, + "loss": 2.044, + "step": 28801 + }, + { + "epoch": 0.96, + "grad_norm": 0.7252838611602783, + "learning_rate": 8.883670190649951e-08, + "loss": 2.034, + "step": 28802 + }, + { + "epoch": 0.96, + "grad_norm": 0.7614043354988098, + "learning_rate": 8.869539544588357e-08, + "loss": 1.9935, + "step": 28803 + }, + { + "epoch": 0.96, + "grad_norm": 0.7334284782409668, + "learning_rate": 8.855420095737699e-08, + "loss": 2.0823, + "step": 28804 + }, + { + "epoch": 0.96, + "grad_norm": 0.7646724581718445, + "learning_rate": 8.841311844257738e-08, + "loss": 2.0408, + "step": 28805 + }, + { + "epoch": 0.96, + "grad_norm": 0.7628721594810486, + "learning_rate": 8.82721479030768e-08, + "loss": 2.0496, + "step": 28806 + }, + { + "epoch": 0.96, + "grad_norm": 0.7408182621002197, + "learning_rate": 8.813128934046955e-08, + "loss": 2.0724, + "step": 28807 + }, + { + "epoch": 0.96, + "grad_norm": 0.7715884447097778, + "learning_rate": 8.799054275634545e-08, + "loss": 2.0907, + "step": 28808 + }, + { + "epoch": 0.96, + "grad_norm": 0.7652683854103088, + "learning_rate": 8.784990815229543e-08, + "loss": 2.0904, + "step": 28809 + }, + { + "epoch": 0.96, + "grad_norm": 0.7156738042831421, + "learning_rate": 8.770938552990826e-08, + "loss": 2.0998, + "step": 28810 + }, + { + "epoch": 0.96, + "grad_norm": 0.7185529470443726, + "learning_rate": 8.756897489077043e-08, + "loss": 2.0151, + "step": 28811 + }, + { + "epoch": 0.96, + "grad_norm": 0.765351414680481, + "learning_rate": 8.742867623647067e-08, + "loss": 2.0301, + "step": 28812 + }, + { + "epoch": 0.96, + "grad_norm": 0.7472516298294067, + "learning_rate": 8.728848956859104e-08, + "loss": 2.0616, + "step": 28813 + }, + { + "epoch": 0.96, + "grad_norm": 0.7201507687568665, + "learning_rate": 8.714841488871695e-08, + "loss": 2.0104, + "step": 28814 + }, + { + "epoch": 0.96, + "grad_norm": 0.7690840363502502, + "learning_rate": 8.700845219843046e-08, + "loss": 2.077, + "step": 28815 + }, + { + "epoch": 0.96, + "grad_norm": 0.7646406292915344, + "learning_rate": 8.686860149931253e-08, + "loss": 2.0638, + "step": 28816 + }, + { + "epoch": 0.96, + "grad_norm": 0.7455672025680542, + "learning_rate": 8.672886279294523e-08, + "loss": 2.0354, + "step": 28817 + }, + { + "epoch": 0.96, + "grad_norm": 0.7456795573234558, + "learning_rate": 8.658923608090397e-08, + "loss": 2.0927, + "step": 28818 + }, + { + "epoch": 0.96, + "grad_norm": 0.7295423746109009, + "learning_rate": 8.644972136476748e-08, + "loss": 2.0254, + "step": 28819 + }, + { + "epoch": 0.96, + "grad_norm": 0.7496533989906311, + "learning_rate": 8.631031864611228e-08, + "loss": 2.1402, + "step": 28820 + }, + { + "epoch": 0.96, + "grad_norm": 0.7263340950012207, + "learning_rate": 8.617102792651377e-08, + "loss": 2.0235, + "step": 28821 + }, + { + "epoch": 0.96, + "grad_norm": 0.7462524175643921, + "learning_rate": 8.603184920754404e-08, + "loss": 2.058, + "step": 28822 + }, + { + "epoch": 0.96, + "grad_norm": 0.7780177593231201, + "learning_rate": 8.589278249077737e-08, + "loss": 2.1657, + "step": 28823 + }, + { + "epoch": 0.96, + "grad_norm": 0.7397934198379517, + "learning_rate": 8.575382777778252e-08, + "loss": 2.0823, + "step": 28824 + }, + { + "epoch": 0.96, + "grad_norm": 0.7458871603012085, + "learning_rate": 8.561498507013266e-08, + "loss": 2.085, + "step": 28825 + }, + { + "epoch": 0.96, + "grad_norm": 0.7458474636077881, + "learning_rate": 8.547625436939211e-08, + "loss": 2.1151, + "step": 28826 + }, + { + "epoch": 0.96, + "grad_norm": 0.7505542039871216, + "learning_rate": 8.533763567713183e-08, + "loss": 2.0463, + "step": 28827 + }, + { + "epoch": 0.96, + "grad_norm": 0.7505803108215332, + "learning_rate": 8.519912899491501e-08, + "loss": 2.0369, + "step": 28828 + }, + { + "epoch": 0.96, + "grad_norm": 0.7204444408416748, + "learning_rate": 8.506073432430929e-08, + "loss": 2.085, + "step": 28829 + }, + { + "epoch": 0.96, + "grad_norm": 0.7758601903915405, + "learning_rate": 8.492245166687562e-08, + "loss": 2.0631, + "step": 28830 + }, + { + "epoch": 0.96, + "grad_norm": 0.7837344408035278, + "learning_rate": 8.478428102417725e-08, + "loss": 2.0946, + "step": 28831 + }, + { + "epoch": 0.96, + "grad_norm": 0.7752772569656372, + "learning_rate": 8.46462223977762e-08, + "loss": 2.0706, + "step": 28832 + }, + { + "epoch": 0.96, + "grad_norm": 0.7393060326576233, + "learning_rate": 8.450827578923016e-08, + "loss": 2.0737, + "step": 28833 + }, + { + "epoch": 0.96, + "grad_norm": 0.7521793246269226, + "learning_rate": 8.437044120009897e-08, + "loss": 2.0223, + "step": 28834 + }, + { + "epoch": 0.96, + "grad_norm": 0.7673332691192627, + "learning_rate": 8.423271863193915e-08, + "loss": 2.1169, + "step": 28835 + }, + { + "epoch": 0.96, + "grad_norm": 0.7373728156089783, + "learning_rate": 8.409510808630728e-08, + "loss": 2.0116, + "step": 28836 + }, + { + "epoch": 0.96, + "grad_norm": 0.7135067582130432, + "learning_rate": 8.395760956475763e-08, + "loss": 2.038, + "step": 28837 + }, + { + "epoch": 0.96, + "grad_norm": 0.7294144034385681, + "learning_rate": 8.38202230688434e-08, + "loss": 2.0488, + "step": 28838 + }, + { + "epoch": 0.96, + "grad_norm": 0.7496734261512756, + "learning_rate": 8.368294860011672e-08, + "loss": 2.0726, + "step": 28839 + }, + { + "epoch": 0.96, + "grad_norm": 0.7476152777671814, + "learning_rate": 8.354578616012854e-08, + "loss": 2.0577, + "step": 28840 + }, + { + "epoch": 0.96, + "grad_norm": 0.7320188283920288, + "learning_rate": 8.340873575042874e-08, + "loss": 2.0108, + "step": 28841 + }, + { + "epoch": 0.96, + "grad_norm": 0.7118979692459106, + "learning_rate": 8.327179737256496e-08, + "loss": 2.0068, + "step": 28842 + }, + { + "epoch": 0.96, + "grad_norm": 0.7497416138648987, + "learning_rate": 8.313497102808487e-08, + "loss": 2.0222, + "step": 28843 + }, + { + "epoch": 0.96, + "grad_norm": 0.7564935684204102, + "learning_rate": 8.29982567185339e-08, + "loss": 2.0486, + "step": 28844 + }, + { + "epoch": 0.96, + "grad_norm": 0.7485154867172241, + "learning_rate": 8.286165444545635e-08, + "loss": 2.0586, + "step": 28845 + }, + { + "epoch": 0.96, + "grad_norm": 0.7184957265853882, + "learning_rate": 8.272516421039656e-08, + "loss": 1.9907, + "step": 28846 + }, + { + "epoch": 0.96, + "grad_norm": 0.7209492325782776, + "learning_rate": 8.25887860148955e-08, + "loss": 2.0628, + "step": 28847 + }, + { + "epoch": 0.96, + "grad_norm": 0.7558860182762146, + "learning_rate": 8.245251986049307e-08, + "loss": 2.0364, + "step": 28848 + }, + { + "epoch": 0.96, + "grad_norm": 0.7647889852523804, + "learning_rate": 8.231636574873137e-08, + "loss": 2.013, + "step": 28849 + }, + { + "epoch": 0.96, + "grad_norm": 0.7553786635398865, + "learning_rate": 8.218032368114692e-08, + "loss": 2.083, + "step": 28850 + }, + { + "epoch": 0.96, + "grad_norm": 0.7671339511871338, + "learning_rate": 8.20443936592763e-08, + "loss": 2.1382, + "step": 28851 + }, + { + "epoch": 0.96, + "grad_norm": 0.7357126474380493, + "learning_rate": 8.190857568465605e-08, + "loss": 2.0789, + "step": 28852 + }, + { + "epoch": 0.96, + "grad_norm": 0.7759916186332703, + "learning_rate": 8.177286975881938e-08, + "loss": 2.0316, + "step": 28853 + }, + { + "epoch": 0.96, + "grad_norm": 0.7308028340339661, + "learning_rate": 8.163727588329951e-08, + "loss": 2.0366, + "step": 28854 + }, + { + "epoch": 0.96, + "grad_norm": 0.7526029348373413, + "learning_rate": 8.150179405963077e-08, + "loss": 2.0154, + "step": 28855 + }, + { + "epoch": 0.96, + "grad_norm": 0.7430981397628784, + "learning_rate": 8.136642428934083e-08, + "loss": 1.9436, + "step": 28856 + }, + { + "epoch": 0.96, + "grad_norm": 0.7524028420448303, + "learning_rate": 8.123116657396068e-08, + "loss": 2.043, + "step": 28857 + }, + { + "epoch": 0.96, + "grad_norm": 0.7466210722923279, + "learning_rate": 8.1096020915018e-08, + "loss": 2.0193, + "step": 28858 + }, + { + "epoch": 0.96, + "grad_norm": 0.7538317441940308, + "learning_rate": 8.096098731403823e-08, + "loss": 1.9666, + "step": 28859 + }, + { + "epoch": 0.96, + "grad_norm": 0.7528130412101746, + "learning_rate": 8.082606577254904e-08, + "loss": 2.0744, + "step": 28860 + }, + { + "epoch": 0.96, + "grad_norm": 0.7222666144371033, + "learning_rate": 8.069125629207475e-08, + "loss": 1.976, + "step": 28861 + }, + { + "epoch": 0.96, + "grad_norm": 0.7432643175125122, + "learning_rate": 8.055655887413527e-08, + "loss": 2.0001, + "step": 28862 + }, + { + "epoch": 0.96, + "grad_norm": 0.7185947299003601, + "learning_rate": 8.042197352025494e-08, + "loss": 2.0092, + "step": 28863 + }, + { + "epoch": 0.96, + "grad_norm": 0.7216563820838928, + "learning_rate": 8.028750023195475e-08, + "loss": 2.017, + "step": 28864 + }, + { + "epoch": 0.96, + "grad_norm": 0.7504189610481262, + "learning_rate": 8.015313901075239e-08, + "loss": 2.042, + "step": 28865 + }, + { + "epoch": 0.96, + "grad_norm": 0.756300151348114, + "learning_rate": 8.001888985816553e-08, + "loss": 2.0928, + "step": 28866 + }, + { + "epoch": 0.96, + "grad_norm": 0.7267733216285706, + "learning_rate": 7.988475277571295e-08, + "loss": 2.067, + "step": 28867 + }, + { + "epoch": 0.96, + "grad_norm": 0.7821259498596191, + "learning_rate": 7.975072776490789e-08, + "loss": 2.0662, + "step": 28868 + }, + { + "epoch": 0.96, + "grad_norm": 0.7699817419052124, + "learning_rate": 7.96168148272658e-08, + "loss": 2.1177, + "step": 28869 + }, + { + "epoch": 0.96, + "grad_norm": 0.7503629326820374, + "learning_rate": 7.948301396429769e-08, + "loss": 2.0629, + "step": 28870 + }, + { + "epoch": 0.96, + "grad_norm": 0.7410330176353455, + "learning_rate": 7.934932517751793e-08, + "loss": 2.0228, + "step": 28871 + }, + { + "epoch": 0.96, + "grad_norm": 0.7521970868110657, + "learning_rate": 7.921574846843417e-08, + "loss": 2.0508, + "step": 28872 + }, + { + "epoch": 0.96, + "grad_norm": 0.7348074913024902, + "learning_rate": 7.908228383855743e-08, + "loss": 2.0049, + "step": 28873 + }, + { + "epoch": 0.96, + "grad_norm": 0.7255772352218628, + "learning_rate": 7.89489312893954e-08, + "loss": 1.954, + "step": 28874 + }, + { + "epoch": 0.96, + "grad_norm": 0.7951341867446899, + "learning_rate": 7.881569082245244e-08, + "loss": 2.0055, + "step": 28875 + }, + { + "epoch": 0.96, + "grad_norm": 0.7480822801589966, + "learning_rate": 7.868256243923622e-08, + "loss": 2.0816, + "step": 28876 + }, + { + "epoch": 0.96, + "grad_norm": 0.749695897102356, + "learning_rate": 7.85495461412511e-08, + "loss": 2.0095, + "step": 28877 + }, + { + "epoch": 0.96, + "grad_norm": 0.7443879842758179, + "learning_rate": 7.841664192999809e-08, + "loss": 2.0369, + "step": 28878 + }, + { + "epoch": 0.96, + "grad_norm": 0.774549126625061, + "learning_rate": 7.828384980697822e-08, + "loss": 2.0776, + "step": 28879 + }, + { + "epoch": 0.96, + "grad_norm": 0.7527526617050171, + "learning_rate": 7.815116977369363e-08, + "loss": 2.0417, + "step": 28880 + }, + { + "epoch": 0.96, + "grad_norm": 0.7556657195091248, + "learning_rate": 7.8018601831642e-08, + "loss": 2.083, + "step": 28881 + }, + { + "epoch": 0.96, + "grad_norm": 0.7575960159301758, + "learning_rate": 7.788614598232103e-08, + "loss": 2.064, + "step": 28882 + }, + { + "epoch": 0.96, + "grad_norm": 0.7440369129180908, + "learning_rate": 7.775380222722839e-08, + "loss": 2.0583, + "step": 28883 + }, + { + "epoch": 0.96, + "grad_norm": 0.7180289030075073, + "learning_rate": 7.762157056785735e-08, + "loss": 2.0367, + "step": 28884 + }, + { + "epoch": 0.96, + "grad_norm": 0.7400633096694946, + "learning_rate": 7.748945100570226e-08, + "loss": 2.039, + "step": 28885 + }, + { + "epoch": 0.96, + "grad_norm": 0.7503394484519958, + "learning_rate": 7.73574435422575e-08, + "loss": 2.0293, + "step": 28886 + }, + { + "epoch": 0.96, + "grad_norm": 0.7502999305725098, + "learning_rate": 7.722554817901296e-08, + "loss": 2.0681, + "step": 28887 + }, + { + "epoch": 0.96, + "grad_norm": 0.7551541328430176, + "learning_rate": 7.709376491745857e-08, + "loss": 2.0625, + "step": 28888 + }, + { + "epoch": 0.96, + "grad_norm": 0.7333679795265198, + "learning_rate": 7.696209375908315e-08, + "loss": 2.0788, + "step": 28889 + }, + { + "epoch": 0.96, + "grad_norm": 0.780125081539154, + "learning_rate": 7.683053470537439e-08, + "loss": 1.9942, + "step": 28890 + }, + { + "epoch": 0.96, + "grad_norm": 0.741783857345581, + "learning_rate": 7.669908775781887e-08, + "loss": 2.0695, + "step": 28891 + }, + { + "epoch": 0.96, + "grad_norm": 0.7597998976707458, + "learning_rate": 7.656775291790208e-08, + "loss": 1.9876, + "step": 28892 + }, + { + "epoch": 0.96, + "grad_norm": 0.7643751502037048, + "learning_rate": 7.643653018710729e-08, + "loss": 2.0536, + "step": 28893 + }, + { + "epoch": 0.96, + "grad_norm": 0.7884009480476379, + "learning_rate": 7.63054195669155e-08, + "loss": 2.0477, + "step": 28894 + }, + { + "epoch": 0.96, + "grad_norm": 0.7447836995124817, + "learning_rate": 7.617442105881001e-08, + "loss": 2.1086, + "step": 28895 + }, + { + "epoch": 0.96, + "grad_norm": 0.7266417741775513, + "learning_rate": 7.604353466427072e-08, + "loss": 2.0646, + "step": 28896 + }, + { + "epoch": 0.96, + "grad_norm": 0.7657048106193542, + "learning_rate": 7.591276038477425e-08, + "loss": 2.0485, + "step": 28897 + }, + { + "epoch": 0.96, + "grad_norm": 0.7509279251098633, + "learning_rate": 7.578209822179938e-08, + "loss": 2.0076, + "step": 28898 + }, + { + "epoch": 0.96, + "grad_norm": 0.7228952050209045, + "learning_rate": 7.565154817682274e-08, + "loss": 2.0287, + "step": 28899 + }, + { + "epoch": 0.96, + "grad_norm": 0.7269327640533447, + "learning_rate": 7.552111025131869e-08, + "loss": 2.0283, + "step": 28900 + }, + { + "epoch": 0.96, + "grad_norm": 0.7704408764839172, + "learning_rate": 7.539078444676051e-08, + "loss": 1.9756, + "step": 28901 + }, + { + "epoch": 0.96, + "grad_norm": 0.7412946224212646, + "learning_rate": 7.526057076462145e-08, + "loss": 1.9986, + "step": 28902 + }, + { + "epoch": 0.96, + "grad_norm": 0.7515783905982971, + "learning_rate": 7.513046920637146e-08, + "loss": 2.0649, + "step": 28903 + }, + { + "epoch": 0.96, + "grad_norm": 0.7565885186195374, + "learning_rate": 7.500047977348046e-08, + "loss": 1.9703, + "step": 28904 + }, + { + "epoch": 0.96, + "grad_norm": 0.7550889253616333, + "learning_rate": 7.48706024674184e-08, + "loss": 2.0268, + "step": 28905 + }, + { + "epoch": 0.96, + "grad_norm": 0.7521265149116516, + "learning_rate": 7.474083728965076e-08, + "loss": 2.04, + "step": 28906 + }, + { + "epoch": 0.96, + "grad_norm": 0.7509969472885132, + "learning_rate": 7.461118424164415e-08, + "loss": 2.0367, + "step": 28907 + }, + { + "epoch": 0.96, + "grad_norm": 0.7306939363479614, + "learning_rate": 7.448164332486519e-08, + "loss": 2.0887, + "step": 28908 + }, + { + "epoch": 0.96, + "grad_norm": 0.7440593838691711, + "learning_rate": 7.435221454077491e-08, + "loss": 2.0922, + "step": 28909 + }, + { + "epoch": 0.96, + "grad_norm": 0.704559862613678, + "learning_rate": 7.422289789083548e-08, + "loss": 2.0396, + "step": 28910 + }, + { + "epoch": 0.96, + "grad_norm": 0.7528597712516785, + "learning_rate": 7.409369337650907e-08, + "loss": 2.0287, + "step": 28911 + }, + { + "epoch": 0.96, + "grad_norm": 0.7378642559051514, + "learning_rate": 7.396460099925451e-08, + "loss": 2.0287, + "step": 28912 + }, + { + "epoch": 0.96, + "grad_norm": 0.8209337592124939, + "learning_rate": 7.383562076053174e-08, + "loss": 2.0111, + "step": 28913 + }, + { + "epoch": 0.96, + "grad_norm": 0.7397079467773438, + "learning_rate": 7.370675266179628e-08, + "loss": 2.0299, + "step": 28914 + }, + { + "epoch": 0.96, + "grad_norm": 0.7886514067649841, + "learning_rate": 7.357799670450472e-08, + "loss": 2.0332, + "step": 28915 + }, + { + "epoch": 0.96, + "grad_norm": 0.7646514773368835, + "learning_rate": 7.344935289011146e-08, + "loss": 2.0312, + "step": 28916 + }, + { + "epoch": 0.96, + "grad_norm": 0.7521873116493225, + "learning_rate": 7.332082122006979e-08, + "loss": 2.0332, + "step": 28917 + }, + { + "epoch": 0.96, + "grad_norm": 0.7282657027244568, + "learning_rate": 7.319240169583186e-08, + "loss": 2.1085, + "step": 28918 + }, + { + "epoch": 0.96, + "grad_norm": 0.7328762412071228, + "learning_rate": 7.306409431884875e-08, + "loss": 2.0806, + "step": 28919 + }, + { + "epoch": 0.96, + "grad_norm": 0.7582875490188599, + "learning_rate": 7.29358990905693e-08, + "loss": 2.1055, + "step": 28920 + }, + { + "epoch": 0.96, + "grad_norm": 0.7659229636192322, + "learning_rate": 7.280781601244235e-08, + "loss": 2.0365, + "step": 28921 + }, + { + "epoch": 0.96, + "grad_norm": 0.74201899766922, + "learning_rate": 7.267984508591452e-08, + "loss": 2.0379, + "step": 28922 + }, + { + "epoch": 0.96, + "grad_norm": 0.7303563952445984, + "learning_rate": 7.255198631243243e-08, + "loss": 2.0675, + "step": 28923 + }, + { + "epoch": 0.96, + "grad_norm": 0.7446170449256897, + "learning_rate": 7.242423969344048e-08, + "loss": 2.0954, + "step": 28924 + }, + { + "epoch": 0.96, + "grad_norm": 0.7393648624420166, + "learning_rate": 7.229660523037974e-08, + "loss": 2.0608, + "step": 28925 + }, + { + "epoch": 0.96, + "grad_norm": 0.7304803133010864, + "learning_rate": 7.21690829246946e-08, + "loss": 1.953, + "step": 28926 + }, + { + "epoch": 0.96, + "grad_norm": 0.7664514183998108, + "learning_rate": 7.204167277782503e-08, + "loss": 2.0564, + "step": 28927 + }, + { + "epoch": 0.96, + "grad_norm": 0.7689447402954102, + "learning_rate": 7.191437479120989e-08, + "loss": 2.0857, + "step": 28928 + }, + { + "epoch": 0.96, + "grad_norm": 0.7483336925506592, + "learning_rate": 7.1787188966288e-08, + "loss": 2.0231, + "step": 28929 + }, + { + "epoch": 0.96, + "grad_norm": 0.7294921875, + "learning_rate": 7.166011530449601e-08, + "loss": 2.0525, + "step": 28930 + }, + { + "epoch": 0.96, + "grad_norm": 0.753108561038971, + "learning_rate": 7.153315380726944e-08, + "loss": 2.024, + "step": 28931 + }, + { + "epoch": 0.96, + "grad_norm": 0.7425176501274109, + "learning_rate": 7.140630447604268e-08, + "loss": 2.0762, + "step": 28932 + }, + { + "epoch": 0.96, + "grad_norm": 0.7400618195533752, + "learning_rate": 7.127956731224794e-08, + "loss": 1.9791, + "step": 28933 + }, + { + "epoch": 0.96, + "grad_norm": 0.7528064250946045, + "learning_rate": 7.11529423173185e-08, + "loss": 2.0539, + "step": 28934 + }, + { + "epoch": 0.96, + "grad_norm": 0.7512118816375732, + "learning_rate": 7.102642949268435e-08, + "loss": 2.0406, + "step": 28935 + }, + { + "epoch": 0.96, + "grad_norm": 0.7711220383644104, + "learning_rate": 7.090002883977431e-08, + "loss": 2.0427, + "step": 28936 + }, + { + "epoch": 0.96, + "grad_norm": 0.7332038879394531, + "learning_rate": 7.077374036001728e-08, + "loss": 1.9904, + "step": 28937 + }, + { + "epoch": 0.96, + "grad_norm": 0.7384692430496216, + "learning_rate": 7.064756405483986e-08, + "loss": 2.028, + "step": 28938 + }, + { + "epoch": 0.96, + "grad_norm": 0.722040593624115, + "learning_rate": 7.052149992566537e-08, + "loss": 2.0499, + "step": 28939 + }, + { + "epoch": 0.96, + "grad_norm": 0.7493895292282104, + "learning_rate": 7.039554797392157e-08, + "loss": 2.0283, + "step": 28940 + }, + { + "epoch": 0.96, + "grad_norm": 0.7240380048751831, + "learning_rate": 7.026970820102841e-08, + "loss": 1.9974, + "step": 28941 + }, + { + "epoch": 0.96, + "grad_norm": 0.736992597579956, + "learning_rate": 7.014398060840921e-08, + "loss": 1.9494, + "step": 28942 + }, + { + "epoch": 0.96, + "grad_norm": 0.7390832901000977, + "learning_rate": 7.001836519748285e-08, + "loss": 2.0885, + "step": 28943 + }, + { + "epoch": 0.96, + "grad_norm": 0.7628918290138245, + "learning_rate": 6.989286196967038e-08, + "loss": 2.0416, + "step": 28944 + }, + { + "epoch": 0.96, + "grad_norm": 0.737324595451355, + "learning_rate": 6.976747092638848e-08, + "loss": 2.0399, + "step": 28945 + }, + { + "epoch": 0.96, + "grad_norm": 0.7331181764602661, + "learning_rate": 6.964219206905487e-08, + "loss": 2.0881, + "step": 28946 + }, + { + "epoch": 0.96, + "grad_norm": 0.7519974112510681, + "learning_rate": 6.951702539908289e-08, + "loss": 1.9593, + "step": 28947 + }, + { + "epoch": 0.96, + "grad_norm": 0.7352447509765625, + "learning_rate": 6.939197091788807e-08, + "loss": 2.1293, + "step": 28948 + }, + { + "epoch": 0.96, + "grad_norm": 0.7299389243125916, + "learning_rate": 6.926702862688262e-08, + "loss": 2.0609, + "step": 28949 + }, + { + "epoch": 0.96, + "grad_norm": 0.7379013895988464, + "learning_rate": 6.914219852747872e-08, + "loss": 2.0223, + "step": 28950 + }, + { + "epoch": 0.96, + "grad_norm": 0.7552069425582886, + "learning_rate": 6.901748062108638e-08, + "loss": 2.0489, + "step": 28951 + }, + { + "epoch": 0.96, + "grad_norm": 0.7454066872596741, + "learning_rate": 6.889287490911445e-08, + "loss": 2.0947, + "step": 28952 + }, + { + "epoch": 0.96, + "grad_norm": 0.8056999444961548, + "learning_rate": 6.87683813929696e-08, + "loss": 2.0257, + "step": 28953 + }, + { + "epoch": 0.96, + "grad_norm": 0.7525133490562439, + "learning_rate": 6.864400007405957e-08, + "loss": 2.0127, + "step": 28954 + }, + { + "epoch": 0.96, + "grad_norm": 0.729185938835144, + "learning_rate": 6.851973095378994e-08, + "loss": 2.0215, + "step": 28955 + }, + { + "epoch": 0.96, + "grad_norm": 0.732745885848999, + "learning_rate": 6.8395574033564e-08, + "loss": 2.0966, + "step": 28956 + }, + { + "epoch": 0.96, + "grad_norm": 0.7138034105300903, + "learning_rate": 6.827152931478398e-08, + "loss": 2.0603, + "step": 28957 + }, + { + "epoch": 0.96, + "grad_norm": 0.7614477872848511, + "learning_rate": 6.814759679885207e-08, + "loss": 2.0164, + "step": 28958 + }, + { + "epoch": 0.96, + "grad_norm": 0.7697935104370117, + "learning_rate": 6.802377648716718e-08, + "loss": 1.9745, + "step": 28959 + }, + { + "epoch": 0.96, + "grad_norm": 0.7562699913978577, + "learning_rate": 6.790006838112928e-08, + "loss": 1.9992, + "step": 28960 + }, + { + "epoch": 0.96, + "grad_norm": 0.7690843939781189, + "learning_rate": 6.777647248213503e-08, + "loss": 2.0162, + "step": 28961 + }, + { + "epoch": 0.96, + "grad_norm": 0.7489324808120728, + "learning_rate": 6.765298879158332e-08, + "loss": 2.0266, + "step": 28962 + }, + { + "epoch": 0.96, + "grad_norm": 0.761817991733551, + "learning_rate": 6.752961731086527e-08, + "loss": 2.0728, + "step": 28963 + }, + { + "epoch": 0.96, + "grad_norm": 0.7664352655410767, + "learning_rate": 6.740635804137752e-08, + "loss": 2.0059, + "step": 28964 + }, + { + "epoch": 0.96, + "grad_norm": 0.7590832114219666, + "learning_rate": 6.728321098451119e-08, + "loss": 2.0217, + "step": 28965 + }, + { + "epoch": 0.96, + "grad_norm": 0.7573423385620117, + "learning_rate": 6.716017614165848e-08, + "loss": 2.0474, + "step": 28966 + }, + { + "epoch": 0.96, + "grad_norm": 0.7522915601730347, + "learning_rate": 6.703725351420832e-08, + "loss": 2.1411, + "step": 28967 + }, + { + "epoch": 0.96, + "grad_norm": 0.7393189072608948, + "learning_rate": 6.691444310355067e-08, + "loss": 2.0408, + "step": 28968 + }, + { + "epoch": 0.96, + "grad_norm": 0.754711389541626, + "learning_rate": 6.67917449110722e-08, + "loss": 2.0062, + "step": 28969 + }, + { + "epoch": 0.96, + "grad_norm": 0.7508113384246826, + "learning_rate": 6.66691589381585e-08, + "loss": 2.0786, + "step": 28970 + }, + { + "epoch": 0.96, + "grad_norm": 0.741977870464325, + "learning_rate": 6.654668518619623e-08, + "loss": 2.0619, + "step": 28971 + }, + { + "epoch": 0.96, + "grad_norm": 0.7559650540351868, + "learning_rate": 6.642432365656648e-08, + "loss": 2.0401, + "step": 28972 + }, + { + "epoch": 0.96, + "grad_norm": 0.7487648725509644, + "learning_rate": 6.630207435065372e-08, + "loss": 2.1132, + "step": 28973 + }, + { + "epoch": 0.96, + "grad_norm": 0.765039324760437, + "learning_rate": 6.617993726983907e-08, + "loss": 2.054, + "step": 28974 + }, + { + "epoch": 0.96, + "grad_norm": 0.7337497472763062, + "learning_rate": 6.605791241550142e-08, + "loss": 2.0492, + "step": 28975 + }, + { + "epoch": 0.96, + "grad_norm": 0.7413203120231628, + "learning_rate": 6.593599978901855e-08, + "loss": 2.0437, + "step": 28976 + }, + { + "epoch": 0.96, + "grad_norm": 0.7204717397689819, + "learning_rate": 6.581419939176936e-08, + "loss": 2.0033, + "step": 28977 + }, + { + "epoch": 0.96, + "grad_norm": 0.7481163740158081, + "learning_rate": 6.569251122513054e-08, + "loss": 2.0207, + "step": 28978 + }, + { + "epoch": 0.96, + "grad_norm": 0.739377498626709, + "learning_rate": 6.557093529047432e-08, + "loss": 2.0259, + "step": 28979 + }, + { + "epoch": 0.96, + "grad_norm": 0.7433655858039856, + "learning_rate": 6.544947158917625e-08, + "loss": 2.0141, + "step": 28980 + }, + { + "epoch": 0.96, + "grad_norm": 0.737528920173645, + "learning_rate": 6.532812012260748e-08, + "loss": 2.0004, + "step": 28981 + }, + { + "epoch": 0.96, + "grad_norm": 0.7599485516548157, + "learning_rate": 6.520688089213912e-08, + "loss": 2.0238, + "step": 28982 + }, + { + "epoch": 0.96, + "grad_norm": 0.7532410025596619, + "learning_rate": 6.508575389914118e-08, + "loss": 2.0821, + "step": 28983 + }, + { + "epoch": 0.96, + "grad_norm": 0.7481398582458496, + "learning_rate": 6.496473914498147e-08, + "loss": 1.9912, + "step": 28984 + }, + { + "epoch": 0.96, + "grad_norm": 0.7430540323257446, + "learning_rate": 6.484383663102889e-08, + "loss": 2.0657, + "step": 28985 + }, + { + "epoch": 0.96, + "grad_norm": 0.7415584325790405, + "learning_rate": 6.472304635864678e-08, + "loss": 1.9872, + "step": 28986 + }, + { + "epoch": 0.96, + "grad_norm": 0.7189276218414307, + "learning_rate": 6.460236832920185e-08, + "loss": 2.009, + "step": 28987 + }, + { + "epoch": 0.96, + "grad_norm": 0.743125319480896, + "learning_rate": 6.448180254405633e-08, + "loss": 2.0796, + "step": 28988 + }, + { + "epoch": 0.96, + "grad_norm": 0.7273780703544617, + "learning_rate": 6.436134900457358e-08, + "loss": 2.0124, + "step": 28989 + }, + { + "epoch": 0.96, + "grad_norm": 0.7614495754241943, + "learning_rate": 6.424100771211251e-08, + "loss": 2.0022, + "step": 28990 + }, + { + "epoch": 0.96, + "grad_norm": 0.7750645279884338, + "learning_rate": 6.412077866803312e-08, + "loss": 2.0623, + "step": 28991 + }, + { + "epoch": 0.96, + "grad_norm": 0.7312312722206116, + "learning_rate": 6.400066187369547e-08, + "loss": 1.9976, + "step": 28992 + }, + { + "epoch": 0.96, + "grad_norm": 0.7096292972564697, + "learning_rate": 6.388065733045401e-08, + "loss": 2.0562, + "step": 28993 + }, + { + "epoch": 0.96, + "grad_norm": 0.8079985976219177, + "learning_rate": 6.376076503966655e-08, + "loss": 2.1109, + "step": 28994 + }, + { + "epoch": 0.96, + "grad_norm": 0.7363803386688232, + "learning_rate": 6.364098500268645e-08, + "loss": 2.0643, + "step": 28995 + }, + { + "epoch": 0.96, + "grad_norm": 0.7387759685516357, + "learning_rate": 6.352131722086707e-08, + "loss": 1.9433, + "step": 28996 + }, + { + "epoch": 0.96, + "grad_norm": 0.735599160194397, + "learning_rate": 6.340176169556067e-08, + "loss": 1.9956, + "step": 28997 + }, + { + "epoch": 0.96, + "grad_norm": 0.7228538990020752, + "learning_rate": 6.328231842811839e-08, + "loss": 2.0413, + "step": 28998 + }, + { + "epoch": 0.96, + "grad_norm": 0.7223252654075623, + "learning_rate": 6.316298741988802e-08, + "loss": 1.9661, + "step": 28999 + }, + { + "epoch": 0.96, + "grad_norm": 0.7391944527626038, + "learning_rate": 6.304376867221962e-08, + "loss": 2.0823, + "step": 29000 + }, + { + "epoch": 0.96, + "grad_norm": 0.7537028193473816, + "learning_rate": 6.292466218645765e-08, + "loss": 2.0685, + "step": 29001 + }, + { + "epoch": 0.96, + "grad_norm": 0.7065121531486511, + "learning_rate": 6.280566796395105e-08, + "loss": 1.9963, + "step": 29002 + }, + { + "epoch": 0.96, + "grad_norm": 0.7442588210105896, + "learning_rate": 6.268678600604095e-08, + "loss": 2.0353, + "step": 29003 + }, + { + "epoch": 0.96, + "grad_norm": 0.7650353908538818, + "learning_rate": 6.256801631407184e-08, + "loss": 2.1055, + "step": 29004 + }, + { + "epoch": 0.96, + "grad_norm": 0.7484534382820129, + "learning_rate": 6.244935888938485e-08, + "loss": 2.0808, + "step": 29005 + }, + { + "epoch": 0.97, + "grad_norm": 0.7225353121757507, + "learning_rate": 6.233081373332228e-08, + "loss": 2.0327, + "step": 29006 + }, + { + "epoch": 0.97, + "grad_norm": 0.7634073495864868, + "learning_rate": 6.221238084722192e-08, + "loss": 2.1246, + "step": 29007 + }, + { + "epoch": 0.97, + "grad_norm": 0.7813124060630798, + "learning_rate": 6.209406023242049e-08, + "loss": 2.0828, + "step": 29008 + }, + { + "epoch": 0.97, + "grad_norm": 0.748350977897644, + "learning_rate": 6.1975851890258e-08, + "loss": 2.0005, + "step": 29009 + }, + { + "epoch": 0.97, + "grad_norm": 0.7416631579399109, + "learning_rate": 6.185775582206677e-08, + "loss": 2.0538, + "step": 29010 + }, + { + "epoch": 0.97, + "grad_norm": 0.7401524782180786, + "learning_rate": 6.173977202918346e-08, + "loss": 2.0076, + "step": 29011 + }, + { + "epoch": 0.97, + "grad_norm": 0.7560703754425049, + "learning_rate": 6.162190051293925e-08, + "loss": 2.066, + "step": 29012 + }, + { + "epoch": 0.97, + "grad_norm": 0.7423843741416931, + "learning_rate": 6.150414127466642e-08, + "loss": 2.1156, + "step": 29013 + }, + { + "epoch": 0.97, + "grad_norm": 0.7280965447425842, + "learning_rate": 6.138649431569499e-08, + "loss": 2.0208, + "step": 29014 + }, + { + "epoch": 0.97, + "grad_norm": 0.7192726135253906, + "learning_rate": 6.126895963735391e-08, + "loss": 2.0353, + "step": 29015 + }, + { + "epoch": 0.97, + "grad_norm": 0.7671297192573547, + "learning_rate": 6.115153724097323e-08, + "loss": 2.0466, + "step": 29016 + }, + { + "epoch": 0.97, + "grad_norm": 0.7226524353027344, + "learning_rate": 6.103422712787632e-08, + "loss": 1.9613, + "step": 29017 + }, + { + "epoch": 0.97, + "grad_norm": 0.730130672454834, + "learning_rate": 6.091702929938992e-08, + "loss": 2.0591, + "step": 29018 + }, + { + "epoch": 0.97, + "grad_norm": 0.7481353878974915, + "learning_rate": 6.07999437568385e-08, + "loss": 2.0096, + "step": 29019 + }, + { + "epoch": 0.97, + "grad_norm": 0.7259511351585388, + "learning_rate": 6.068297050154437e-08, + "loss": 2.0838, + "step": 29020 + }, + { + "epoch": 0.97, + "grad_norm": 0.7114219069480896, + "learning_rate": 6.056610953482866e-08, + "loss": 1.9516, + "step": 29021 + }, + { + "epoch": 0.97, + "grad_norm": 0.7733140587806702, + "learning_rate": 6.044936085801146e-08, + "loss": 2.0729, + "step": 29022 + }, + { + "epoch": 0.97, + "grad_norm": 0.7676507830619812, + "learning_rate": 6.033272447241278e-08, + "loss": 2.1271, + "step": 29023 + }, + { + "epoch": 0.97, + "grad_norm": 0.7615839838981628, + "learning_rate": 6.02162003793505e-08, + "loss": 2.0445, + "step": 29024 + }, + { + "epoch": 0.97, + "grad_norm": 0.7395052313804626, + "learning_rate": 6.009978858014021e-08, + "loss": 2.0405, + "step": 29025 + }, + { + "epoch": 0.97, + "grad_norm": 0.7613799571990967, + "learning_rate": 5.998348907609641e-08, + "loss": 2.0583, + "step": 29026 + }, + { + "epoch": 0.97, + "grad_norm": 0.737933337688446, + "learning_rate": 5.986730186853362e-08, + "loss": 1.9918, + "step": 29027 + }, + { + "epoch": 0.97, + "grad_norm": 0.7355883717536926, + "learning_rate": 5.975122695876522e-08, + "loss": 2.0326, + "step": 29028 + }, + { + "epoch": 0.97, + "grad_norm": 0.7704764604568481, + "learning_rate": 5.96352643481013e-08, + "loss": 2.0672, + "step": 29029 + }, + { + "epoch": 0.97, + "grad_norm": 0.725749135017395, + "learning_rate": 5.9519414037852994e-08, + "loss": 2.0473, + "step": 29030 + }, + { + "epoch": 0.97, + "grad_norm": 0.7410781383514404, + "learning_rate": 5.9403676029328175e-08, + "loss": 2.0831, + "step": 29031 + }, + { + "epoch": 0.97, + "grad_norm": 0.7277485728263855, + "learning_rate": 5.928805032383467e-08, + "loss": 2.0151, + "step": 29032 + }, + { + "epoch": 0.97, + "grad_norm": 0.7296704649925232, + "learning_rate": 5.917253692268032e-08, + "loss": 2.0932, + "step": 29033 + }, + { + "epoch": 0.97, + "grad_norm": 0.7552341222763062, + "learning_rate": 5.9057135827167435e-08, + "loss": 2.0987, + "step": 29034 + }, + { + "epoch": 0.97, + "grad_norm": 0.7646024227142334, + "learning_rate": 5.894184703860162e-08, + "loss": 2.0154, + "step": 29035 + }, + { + "epoch": 0.97, + "grad_norm": 0.737494707107544, + "learning_rate": 5.882667055828406e-08, + "loss": 2.0324, + "step": 29036 + }, + { + "epoch": 0.97, + "grad_norm": 0.7273925542831421, + "learning_rate": 5.871160638751816e-08, + "loss": 2.0419, + "step": 29037 + }, + { + "epoch": 0.97, + "grad_norm": 0.7386282682418823, + "learning_rate": 5.859665452760177e-08, + "loss": 2.0346, + "step": 29038 + }, + { + "epoch": 0.97, + "grad_norm": 0.7568864226341248, + "learning_rate": 5.848181497983274e-08, + "loss": 2.0759, + "step": 29039 + }, + { + "epoch": 0.97, + "grad_norm": 0.7352434396743774, + "learning_rate": 5.836708774551114e-08, + "loss": 2.0934, + "step": 29040 + }, + { + "epoch": 0.97, + "grad_norm": 0.7033225297927856, + "learning_rate": 5.825247282593149e-08, + "loss": 2.084, + "step": 29041 + }, + { + "epoch": 0.97, + "grad_norm": 0.7662063241004944, + "learning_rate": 5.813797022238943e-08, + "loss": 2.0607, + "step": 29042 + }, + { + "epoch": 0.97, + "grad_norm": 0.7338163256645203, + "learning_rate": 5.802357993617835e-08, + "loss": 2.0115, + "step": 29043 + }, + { + "epoch": 0.97, + "grad_norm": 0.7215204834938049, + "learning_rate": 5.790930196858946e-08, + "loss": 2.053, + "step": 29044 + }, + { + "epoch": 0.97, + "grad_norm": 0.7399293184280396, + "learning_rate": 5.779513632091505e-08, + "loss": 2.04, + "step": 29045 + }, + { + "epoch": 0.97, + "grad_norm": 0.7710161209106445, + "learning_rate": 5.768108299444408e-08, + "loss": 2.0538, + "step": 29046 + }, + { + "epoch": 0.97, + "grad_norm": 0.7531158328056335, + "learning_rate": 5.756714199046553e-08, + "loss": 2.0446, + "step": 29047 + }, + { + "epoch": 0.97, + "grad_norm": 0.7178133726119995, + "learning_rate": 5.7453313310267264e-08, + "loss": 2.0535, + "step": 29048 + }, + { + "epoch": 0.97, + "grad_norm": 0.7536664009094238, + "learning_rate": 5.73395969551338e-08, + "loss": 2.0522, + "step": 29049 + }, + { + "epoch": 0.97, + "grad_norm": 0.7335782051086426, + "learning_rate": 5.722599292635078e-08, + "loss": 2.0491, + "step": 29050 + }, + { + "epoch": 0.97, + "grad_norm": 0.7559759616851807, + "learning_rate": 5.711250122520162e-08, + "loss": 2.0539, + "step": 29051 + }, + { + "epoch": 0.97, + "grad_norm": 0.7440823316574097, + "learning_rate": 5.699912185296752e-08, + "loss": 2.031, + "step": 29052 + }, + { + "epoch": 0.97, + "grad_norm": 0.7325991988182068, + "learning_rate": 5.68858548109319e-08, + "loss": 1.9825, + "step": 29053 + }, + { + "epoch": 0.97, + "grad_norm": 0.7515924572944641, + "learning_rate": 5.6772700100371504e-08, + "loss": 2.0492, + "step": 29054 + }, + { + "epoch": 0.97, + "grad_norm": 0.7323790788650513, + "learning_rate": 5.665965772256532e-08, + "loss": 1.9529, + "step": 29055 + }, + { + "epoch": 0.97, + "grad_norm": 0.7622612118721008, + "learning_rate": 5.6546727678792324e-08, + "loss": 2.0658, + "step": 29056 + }, + { + "epoch": 0.97, + "grad_norm": 0.7377007007598877, + "learning_rate": 5.643390997032594e-08, + "loss": 2.0349, + "step": 29057 + }, + { + "epoch": 0.97, + "grad_norm": 0.7276656031608582, + "learning_rate": 5.632120459844181e-08, + "loss": 2.037, + "step": 29058 + }, + { + "epoch": 0.97, + "grad_norm": 0.7768324017524719, + "learning_rate": 5.620861156441337e-08, + "loss": 2.0006, + "step": 29059 + }, + { + "epoch": 0.97, + "grad_norm": 0.7395504117012024, + "learning_rate": 5.6096130869512934e-08, + "loss": 2.0122, + "step": 29060 + }, + { + "epoch": 0.97, + "grad_norm": 0.7336729168891907, + "learning_rate": 5.5983762515009476e-08, + "loss": 2.0195, + "step": 29061 + }, + { + "epoch": 0.97, + "grad_norm": 0.7562929391860962, + "learning_rate": 5.587150650217421e-08, + "loss": 2.0333, + "step": 29062 + }, + { + "epoch": 0.97, + "grad_norm": 0.7388936877250671, + "learning_rate": 5.575936283227501e-08, + "loss": 2.048, + "step": 29063 + }, + { + "epoch": 0.97, + "grad_norm": 0.717592179775238, + "learning_rate": 5.564733150657975e-08, + "loss": 2.0604, + "step": 29064 + }, + { + "epoch": 0.97, + "grad_norm": 0.7575987577438354, + "learning_rate": 5.553541252635075e-08, + "loss": 2.0288, + "step": 29065 + }, + { + "epoch": 0.97, + "grad_norm": 0.7681446075439453, + "learning_rate": 5.542360589285589e-08, + "loss": 2.1356, + "step": 29066 + }, + { + "epoch": 0.97, + "grad_norm": 0.7275213003158569, + "learning_rate": 5.531191160735749e-08, + "loss": 2.0062, + "step": 29067 + }, + { + "epoch": 0.97, + "grad_norm": 0.7394939064979553, + "learning_rate": 5.520032967111566e-08, + "loss": 2.0982, + "step": 29068 + }, + { + "epoch": 0.97, + "grad_norm": 0.766218364238739, + "learning_rate": 5.5088860085393825e-08, + "loss": 2.0696, + "step": 29069 + }, + { + "epoch": 0.97, + "grad_norm": 0.7595894932746887, + "learning_rate": 5.497750285144876e-08, + "loss": 2.041, + "step": 29070 + }, + { + "epoch": 0.97, + "grad_norm": 0.7612637281417847, + "learning_rate": 5.486625797053946e-08, + "loss": 2.1198, + "step": 29071 + }, + { + "epoch": 0.97, + "grad_norm": 0.7550984025001526, + "learning_rate": 5.4755125443922695e-08, + "loss": 2.0885, + "step": 29072 + }, + { + "epoch": 0.97, + "grad_norm": 0.7625553607940674, + "learning_rate": 5.464410527285524e-08, + "loss": 1.9847, + "step": 29073 + }, + { + "epoch": 0.97, + "grad_norm": 0.7424097061157227, + "learning_rate": 5.453319745858832e-08, + "loss": 2.073, + "step": 29074 + }, + { + "epoch": 0.97, + "grad_norm": 0.7774603962898254, + "learning_rate": 5.442240200237759e-08, + "loss": 1.9919, + "step": 29075 + }, + { + "epoch": 0.97, + "grad_norm": 0.7656825184822083, + "learning_rate": 5.431171890547426e-08, + "loss": 2.0802, + "step": 29076 + }, + { + "epoch": 0.97, + "grad_norm": 0.7224841117858887, + "learning_rate": 5.420114816912847e-08, + "loss": 2.0013, + "step": 29077 + }, + { + "epoch": 0.97, + "grad_norm": 0.7326154708862305, + "learning_rate": 5.409068979458809e-08, + "loss": 2.0082, + "step": 29078 + }, + { + "epoch": 0.97, + "grad_norm": 0.7507390379905701, + "learning_rate": 5.398034378310324e-08, + "loss": 2.1636, + "step": 29079 + }, + { + "epoch": 0.97, + "grad_norm": 0.7503821849822998, + "learning_rate": 5.3870110135919586e-08, + "loss": 1.9807, + "step": 29080 + }, + { + "epoch": 0.97, + "grad_norm": 0.7579681873321533, + "learning_rate": 5.3759988854281686e-08, + "loss": 2.0914, + "step": 29081 + }, + { + "epoch": 0.97, + "grad_norm": 0.7389354705810547, + "learning_rate": 5.364997993943521e-08, + "loss": 2.0364, + "step": 29082 + }, + { + "epoch": 0.97, + "grad_norm": 0.7581992149353027, + "learning_rate": 5.35400833926214e-08, + "loss": 2.1103, + "step": 29083 + }, + { + "epoch": 0.97, + "grad_norm": 0.7444687485694885, + "learning_rate": 5.343029921508258e-08, + "loss": 2.0253, + "step": 29084 + }, + { + "epoch": 0.97, + "grad_norm": 0.7197502255439758, + "learning_rate": 5.332062740805888e-08, + "loss": 2.0658, + "step": 29085 + }, + { + "epoch": 0.97, + "grad_norm": 0.7424801588058472, + "learning_rate": 5.321106797279041e-08, + "loss": 2.0488, + "step": 29086 + }, + { + "epoch": 0.97, + "grad_norm": 0.7322023510932922, + "learning_rate": 5.310162091051285e-08, + "loss": 2.0491, + "step": 29087 + }, + { + "epoch": 0.97, + "grad_norm": 0.7882124781608582, + "learning_rate": 5.2992286222464105e-08, + "loss": 2.0903, + "step": 29088 + }, + { + "epoch": 0.97, + "grad_norm": 0.7491951584815979, + "learning_rate": 5.2883063909878743e-08, + "loss": 2.0401, + "step": 29089 + }, + { + "epoch": 0.97, + "grad_norm": 0.7461510300636292, + "learning_rate": 5.277395397399132e-08, + "loss": 2.0899, + "step": 29090 + }, + { + "epoch": 0.97, + "grad_norm": 0.7436741590499878, + "learning_rate": 5.2664956416034196e-08, + "loss": 2.0568, + "step": 29091 + }, + { + "epoch": 0.97, + "grad_norm": 0.7614251971244812, + "learning_rate": 5.2556071237238606e-08, + "loss": 2.0445, + "step": 29092 + }, + { + "epoch": 0.97, + "grad_norm": 0.73028963804245, + "learning_rate": 5.244729843883467e-08, + "loss": 2.0654, + "step": 29093 + }, + { + "epoch": 0.97, + "grad_norm": 0.7380455732345581, + "learning_rate": 5.233863802205141e-08, + "loss": 2.0821, + "step": 29094 + }, + { + "epoch": 0.97, + "grad_norm": 0.761981725692749, + "learning_rate": 5.223008998811674e-08, + "loss": 2.0558, + "step": 29095 + }, + { + "epoch": 0.97, + "grad_norm": 0.7487305998802185, + "learning_rate": 5.2121654338255226e-08, + "loss": 2.0613, + "step": 29096 + }, + { + "epoch": 0.97, + "grad_norm": 0.7516537308692932, + "learning_rate": 5.201333107369366e-08, + "loss": 2.0357, + "step": 29097 + }, + { + "epoch": 0.97, + "grad_norm": 0.7404289841651917, + "learning_rate": 5.1905120195655524e-08, + "loss": 2.0063, + "step": 29098 + }, + { + "epoch": 0.97, + "grad_norm": 0.7483970522880554, + "learning_rate": 5.179702170536316e-08, + "loss": 2.0285, + "step": 29099 + }, + { + "epoch": 0.97, + "grad_norm": 0.7592679858207703, + "learning_rate": 5.168903560403893e-08, + "loss": 2.045, + "step": 29100 + }, + { + "epoch": 0.97, + "grad_norm": 0.7569833397865295, + "learning_rate": 5.1581161892900746e-08, + "loss": 2.045, + "step": 29101 + }, + { + "epoch": 0.97, + "grad_norm": 0.7518364191055298, + "learning_rate": 5.147340057316763e-08, + "loss": 2.0314, + "step": 29102 + }, + { + "epoch": 0.97, + "grad_norm": 0.7628983855247498, + "learning_rate": 5.136575164605861e-08, + "loss": 2.1692, + "step": 29103 + }, + { + "epoch": 0.97, + "grad_norm": 0.7567183971405029, + "learning_rate": 5.1258215112789386e-08, + "loss": 2.0166, + "step": 29104 + }, + { + "epoch": 0.97, + "grad_norm": 0.7611680030822754, + "learning_rate": 5.115079097457343e-08, + "loss": 2.0312, + "step": 29105 + }, + { + "epoch": 0.97, + "grad_norm": 0.7365122437477112, + "learning_rate": 5.1043479232624205e-08, + "loss": 2.0798, + "step": 29106 + }, + { + "epoch": 0.97, + "grad_norm": 0.7223207354545593, + "learning_rate": 5.0936279888156304e-08, + "loss": 2.0103, + "step": 29107 + }, + { + "epoch": 0.97, + "grad_norm": 0.7277834415435791, + "learning_rate": 5.0829192942379866e-08, + "loss": 2.0596, + "step": 29108 + }, + { + "epoch": 0.97, + "grad_norm": 0.749782383441925, + "learning_rate": 5.072221839650393e-08, + "loss": 2.0028, + "step": 29109 + }, + { + "epoch": 0.97, + "grad_norm": 0.7451756596565247, + "learning_rate": 5.0615356251737525e-08, + "loss": 2.0019, + "step": 29110 + }, + { + "epoch": 0.97, + "grad_norm": 0.7164727449417114, + "learning_rate": 5.050860650928857e-08, + "loss": 1.9867, + "step": 29111 + }, + { + "epoch": 0.97, + "grad_norm": 0.7343751192092896, + "learning_rate": 5.040196917036166e-08, + "loss": 2.0904, + "step": 29112 + }, + { + "epoch": 0.97, + "grad_norm": 0.7372276186943054, + "learning_rate": 5.0295444236162504e-08, + "loss": 1.9819, + "step": 29113 + }, + { + "epoch": 0.97, + "grad_norm": 0.7556163668632507, + "learning_rate": 5.018903170789458e-08, + "loss": 2.0453, + "step": 29114 + }, + { + "epoch": 0.97, + "grad_norm": 0.7306556701660156, + "learning_rate": 5.0082731586759134e-08, + "loss": 2.1084, + "step": 29115 + }, + { + "epoch": 0.97, + "grad_norm": 0.7674879431724548, + "learning_rate": 4.997654387395745e-08, + "loss": 2.0874, + "step": 29116 + }, + { + "epoch": 0.97, + "grad_norm": 0.7262237071990967, + "learning_rate": 4.987046857069078e-08, + "loss": 2.0369, + "step": 29117 + }, + { + "epoch": 0.97, + "grad_norm": 0.7901185750961304, + "learning_rate": 4.976450567815483e-08, + "loss": 2.0059, + "step": 29118 + }, + { + "epoch": 0.97, + "grad_norm": 0.7210655212402344, + "learning_rate": 4.9658655197548644e-08, + "loss": 2.0305, + "step": 29119 + }, + { + "epoch": 0.97, + "grad_norm": 0.7304787039756775, + "learning_rate": 4.9552917130067935e-08, + "loss": 2.0612, + "step": 29120 + }, + { + "epoch": 0.97, + "grad_norm": 0.7489160299301147, + "learning_rate": 4.944729147690619e-08, + "loss": 2.0492, + "step": 29121 + }, + { + "epoch": 0.97, + "grad_norm": 0.7471380233764648, + "learning_rate": 4.934177823925801e-08, + "loss": 1.9651, + "step": 29122 + }, + { + "epoch": 0.97, + "grad_norm": 0.7341728210449219, + "learning_rate": 4.923637741831466e-08, + "loss": 2.0456, + "step": 29123 + }, + { + "epoch": 0.97, + "grad_norm": 0.7330076694488525, + "learning_rate": 4.913108901526742e-08, + "loss": 2.0299, + "step": 29124 + }, + { + "epoch": 0.97, + "grad_norm": 0.7185376286506653, + "learning_rate": 4.902591303130422e-08, + "loss": 2.019, + "step": 29125 + }, + { + "epoch": 0.97, + "grad_norm": 0.7315093874931335, + "learning_rate": 4.892084946761522e-08, + "loss": 2.0662, + "step": 29126 + }, + { + "epoch": 0.97, + "grad_norm": 0.7521397471427917, + "learning_rate": 4.881589832538614e-08, + "loss": 2.0278, + "step": 29127 + }, + { + "epoch": 0.97, + "grad_norm": 0.7190423607826233, + "learning_rate": 4.871105960580269e-08, + "loss": 1.9851, + "step": 29128 + }, + { + "epoch": 0.97, + "grad_norm": 0.7722873687744141, + "learning_rate": 4.86063333100506e-08, + "loss": 2.0967, + "step": 29129 + }, + { + "epoch": 0.97, + "grad_norm": 0.7487995028495789, + "learning_rate": 4.850171943931114e-08, + "loss": 2.0232, + "step": 29130 + }, + { + "epoch": 0.97, + "grad_norm": 0.7543607354164124, + "learning_rate": 4.839721799476893e-08, + "loss": 2.0848, + "step": 29131 + }, + { + "epoch": 0.97, + "grad_norm": 0.7456114888191223, + "learning_rate": 4.829282897760079e-08, + "loss": 2.034, + "step": 29132 + }, + { + "epoch": 0.97, + "grad_norm": 0.7335970997810364, + "learning_rate": 4.818855238898912e-08, + "loss": 2.0948, + "step": 29133 + }, + { + "epoch": 0.97, + "grad_norm": 0.7695701122283936, + "learning_rate": 4.8084388230109636e-08, + "loss": 2.0132, + "step": 29134 + }, + { + "epoch": 0.97, + "grad_norm": 0.7518985867500305, + "learning_rate": 4.79803365021414e-08, + "loss": 2.0489, + "step": 29135 + }, + { + "epoch": 0.97, + "grad_norm": 0.7235926389694214, + "learning_rate": 4.787639720625903e-08, + "loss": 2.0223, + "step": 29136 + }, + { + "epoch": 0.97, + "grad_norm": 0.7602631449699402, + "learning_rate": 4.777257034363603e-08, + "loss": 2.0498, + "step": 29137 + }, + { + "epoch": 0.97, + "grad_norm": 0.7562738656997681, + "learning_rate": 4.76688559154459e-08, + "loss": 2.0638, + "step": 29138 + }, + { + "epoch": 0.97, + "grad_norm": 0.7670345306396484, + "learning_rate": 4.7565253922861045e-08, + "loss": 2.1211, + "step": 29139 + }, + { + "epoch": 0.97, + "grad_norm": 0.7302688360214233, + "learning_rate": 4.746176436705052e-08, + "loss": 1.9879, + "step": 29140 + }, + { + "epoch": 0.97, + "grad_norm": 0.7528406381607056, + "learning_rate": 4.73583872491834e-08, + "loss": 2.0743, + "step": 29141 + }, + { + "epoch": 0.97, + "grad_norm": 0.7400205731391907, + "learning_rate": 4.725512257042986e-08, + "loss": 2.0831, + "step": 29142 + }, + { + "epoch": 0.97, + "grad_norm": 0.7529747486114502, + "learning_rate": 4.715197033195451e-08, + "loss": 2.0314, + "step": 29143 + }, + { + "epoch": 0.97, + "grad_norm": 0.7333756685256958, + "learning_rate": 4.70489305349231e-08, + "loss": 2.0468, + "step": 29144 + }, + { + "epoch": 0.97, + "grad_norm": 0.728471040725708, + "learning_rate": 4.6946003180500246e-08, + "loss": 2.0505, + "step": 29145 + }, + { + "epoch": 0.97, + "grad_norm": 0.7578626871109009, + "learning_rate": 4.684318826984835e-08, + "loss": 2.0641, + "step": 29146 + }, + { + "epoch": 0.97, + "grad_norm": 0.7417796850204468, + "learning_rate": 4.674048580412871e-08, + "loss": 2.0896, + "step": 29147 + }, + { + "epoch": 0.97, + "grad_norm": 0.7563989758491516, + "learning_rate": 4.663789578450262e-08, + "loss": 2.1458, + "step": 29148 + }, + { + "epoch": 0.97, + "grad_norm": 0.7407302260398865, + "learning_rate": 4.653541821212803e-08, + "loss": 2.0158, + "step": 29149 + }, + { + "epoch": 0.97, + "grad_norm": 0.7220346331596375, + "learning_rate": 4.6433053088162925e-08, + "loss": 2.0577, + "step": 29150 + }, + { + "epoch": 0.97, + "grad_norm": 0.7730715870857239, + "learning_rate": 4.633080041376303e-08, + "loss": 2.0311, + "step": 29151 + }, + { + "epoch": 0.97, + "grad_norm": 0.7569090723991394, + "learning_rate": 4.6228660190085206e-08, + "loss": 2.1349, + "step": 29152 + }, + { + "epoch": 0.97, + "grad_norm": 0.7082535028457642, + "learning_rate": 4.612663241828186e-08, + "loss": 2.0348, + "step": 29153 + }, + { + "epoch": 0.97, + "grad_norm": 0.7277864217758179, + "learning_rate": 4.602471709950762e-08, + "loss": 1.9863, + "step": 29154 + }, + { + "epoch": 0.97, + "grad_norm": 0.7526946067810059, + "learning_rate": 4.592291423491158e-08, + "loss": 2.0505, + "step": 29155 + }, + { + "epoch": 0.97, + "grad_norm": 0.7481178641319275, + "learning_rate": 4.5821223825643915e-08, + "loss": 2.0493, + "step": 29156 + }, + { + "epoch": 0.97, + "grad_norm": 0.7619435787200928, + "learning_rate": 4.5719645872855936e-08, + "loss": 2.0455, + "step": 29157 + }, + { + "epoch": 0.97, + "grad_norm": 0.7525490522384644, + "learning_rate": 4.561818037769228e-08, + "loss": 2.0562, + "step": 29158 + }, + { + "epoch": 0.97, + "grad_norm": 0.7606086134910583, + "learning_rate": 4.551682734130203e-08, + "loss": 2.0165, + "step": 29159 + }, + { + "epoch": 0.97, + "grad_norm": 0.7671846151351929, + "learning_rate": 4.54155867648276e-08, + "loss": 2.0304, + "step": 29160 + }, + { + "epoch": 0.97, + "grad_norm": 0.719883382320404, + "learning_rate": 4.531445864941475e-08, + "loss": 2.0112, + "step": 29161 + }, + { + "epoch": 0.97, + "grad_norm": 0.7404399514198303, + "learning_rate": 4.521344299620367e-08, + "loss": 1.9952, + "step": 29162 + }, + { + "epoch": 0.97, + "grad_norm": 0.7131633162498474, + "learning_rate": 4.5112539806337895e-08, + "loss": 2.0892, + "step": 29163 + }, + { + "epoch": 0.97, + "grad_norm": 0.7469248175621033, + "learning_rate": 4.5011749080957625e-08, + "loss": 2.0159, + "step": 29164 + }, + { + "epoch": 0.97, + "grad_norm": 0.7516930103302002, + "learning_rate": 4.491107082119861e-08, + "loss": 2.1159, + "step": 29165 + }, + { + "epoch": 0.97, + "grad_norm": 0.721413254737854, + "learning_rate": 4.4810505028201056e-08, + "loss": 2.141, + "step": 29166 + }, + { + "epoch": 0.97, + "grad_norm": 0.75139981508255, + "learning_rate": 4.471005170310072e-08, + "loss": 2.0632, + "step": 29167 + }, + { + "epoch": 0.97, + "grad_norm": 0.7599073648452759, + "learning_rate": 4.460971084703003e-08, + "loss": 2.0917, + "step": 29168 + }, + { + "epoch": 0.97, + "grad_norm": 0.7397643327713013, + "learning_rate": 4.4509482461125855e-08, + "loss": 2.0029, + "step": 29169 + }, + { + "epoch": 0.97, + "grad_norm": 0.7287266254425049, + "learning_rate": 4.44093665465184e-08, + "loss": 2.0486, + "step": 29170 + }, + { + "epoch": 0.97, + "grad_norm": 0.7567946910858154, + "learning_rate": 4.430936310434009e-08, + "loss": 2.0379, + "step": 29171 + }, + { + "epoch": 0.97, + "grad_norm": 0.7328543663024902, + "learning_rate": 4.420947213571891e-08, + "loss": 1.9658, + "step": 29172 + }, + { + "epoch": 0.97, + "grad_norm": 0.7548821568489075, + "learning_rate": 4.410969364178508e-08, + "loss": 2.0554, + "step": 29173 + }, + { + "epoch": 0.97, + "grad_norm": 0.7098091840744019, + "learning_rate": 4.401002762366546e-08, + "loss": 2.02, + "step": 29174 + }, + { + "epoch": 0.97, + "grad_norm": 0.7328534126281738, + "learning_rate": 4.391047408248472e-08, + "loss": 2.0009, + "step": 29175 + }, + { + "epoch": 0.97, + "grad_norm": 0.7250224947929382, + "learning_rate": 4.381103301936973e-08, + "loss": 2.0653, + "step": 29176 + }, + { + "epoch": 0.97, + "grad_norm": 0.7702951431274414, + "learning_rate": 4.371170443544182e-08, + "loss": 2.0688, + "step": 29177 + }, + { + "epoch": 0.97, + "grad_norm": 0.7229671478271484, + "learning_rate": 4.361248833182452e-08, + "loss": 2.0792, + "step": 29178 + }, + { + "epoch": 0.97, + "grad_norm": 0.7605637907981873, + "learning_rate": 4.351338470963917e-08, + "loss": 2.0085, + "step": 29179 + }, + { + "epoch": 0.97, + "grad_norm": 0.7301909923553467, + "learning_rate": 4.3414393570003765e-08, + "loss": 1.9944, + "step": 29180 + }, + { + "epoch": 0.97, + "grad_norm": 0.7481768131256104, + "learning_rate": 4.33155149140374e-08, + "loss": 2.0395, + "step": 29181 + }, + { + "epoch": 0.97, + "grad_norm": 0.772861659526825, + "learning_rate": 4.3216748742856975e-08, + "loss": 2.1005, + "step": 29182 + }, + { + "epoch": 0.97, + "grad_norm": 0.7764679789543152, + "learning_rate": 4.311809505757936e-08, + "loss": 2.0957, + "step": 29183 + }, + { + "epoch": 0.97, + "grad_norm": 0.7575426697731018, + "learning_rate": 4.3019553859317e-08, + "loss": 2.0345, + "step": 29184 + }, + { + "epoch": 0.97, + "grad_norm": 0.7302182912826538, + "learning_rate": 4.292112514918456e-08, + "loss": 1.9896, + "step": 29185 + }, + { + "epoch": 0.97, + "grad_norm": 0.7696461081504822, + "learning_rate": 4.282280892829449e-08, + "loss": 2.1197, + "step": 29186 + }, + { + "epoch": 0.97, + "grad_norm": 0.7428693175315857, + "learning_rate": 4.272460519775701e-08, + "loss": 2.0323, + "step": 29187 + }, + { + "epoch": 0.97, + "grad_norm": 0.7418534159660339, + "learning_rate": 4.262651395868123e-08, + "loss": 2.0548, + "step": 29188 + }, + { + "epoch": 0.97, + "grad_norm": 0.7256563305854797, + "learning_rate": 4.2528535212175146e-08, + "loss": 2.0756, + "step": 29189 + }, + { + "epoch": 0.97, + "grad_norm": 0.7481099963188171, + "learning_rate": 4.243066895934567e-08, + "loss": 2.0571, + "step": 29190 + }, + { + "epoch": 0.97, + "grad_norm": 0.7209005355834961, + "learning_rate": 4.2332915201298566e-08, + "loss": 2.0789, + "step": 29191 + }, + { + "epoch": 0.97, + "grad_norm": 0.7448647618293762, + "learning_rate": 4.223527393913962e-08, + "loss": 2.0672, + "step": 29192 + }, + { + "epoch": 0.97, + "grad_norm": 0.7893803119659424, + "learning_rate": 4.213774517397018e-08, + "loss": 2.0441, + "step": 29193 + }, + { + "epoch": 0.97, + "grad_norm": 0.712611734867096, + "learning_rate": 4.204032890689269e-08, + "loss": 2.0759, + "step": 29194 + }, + { + "epoch": 0.97, + "grad_norm": 0.7360569834709167, + "learning_rate": 4.194302513900739e-08, + "loss": 2.0224, + "step": 29195 + }, + { + "epoch": 0.97, + "grad_norm": 0.7265943884849548, + "learning_rate": 4.184583387141339e-08, + "loss": 2.0331, + "step": 29196 + }, + { + "epoch": 0.97, + "grad_norm": 0.720267117023468, + "learning_rate": 4.1748755105209814e-08, + "loss": 2.0143, + "step": 29197 + }, + { + "epoch": 0.97, + "grad_norm": 0.7371760010719299, + "learning_rate": 4.165178884149246e-08, + "loss": 2.0593, + "step": 29198 + }, + { + "epoch": 0.97, + "grad_norm": 0.7384008765220642, + "learning_rate": 4.1554935081357104e-08, + "loss": 1.9241, + "step": 29199 + }, + { + "epoch": 0.97, + "grad_norm": 0.7674313187599182, + "learning_rate": 4.145819382589844e-08, + "loss": 2.06, + "step": 29200 + }, + { + "epoch": 0.97, + "grad_norm": 0.757977306842804, + "learning_rate": 4.136156507620781e-08, + "loss": 2.0493, + "step": 29201 + }, + { + "epoch": 0.97, + "grad_norm": 0.7460862398147583, + "learning_rate": 4.1265048833378784e-08, + "loss": 2.0446, + "step": 29202 + }, + { + "epoch": 0.97, + "grad_norm": 0.7289160490036011, + "learning_rate": 4.1168645098500494e-08, + "loss": 2.0314, + "step": 29203 + }, + { + "epoch": 0.97, + "grad_norm": 0.7429171800613403, + "learning_rate": 4.107235387266206e-08, + "loss": 2.0248, + "step": 29204 + }, + { + "epoch": 0.97, + "grad_norm": 0.7336742281913757, + "learning_rate": 4.0976175156951515e-08, + "loss": 2.0912, + "step": 29205 + }, + { + "epoch": 0.97, + "grad_norm": 0.7690092325210571, + "learning_rate": 4.0880108952456866e-08, + "loss": 1.9986, + "step": 29206 + }, + { + "epoch": 0.97, + "grad_norm": 0.7633804082870483, + "learning_rate": 4.0784155260260584e-08, + "loss": 2.0707, + "step": 29207 + }, + { + "epoch": 0.97, + "grad_norm": 0.7094035744667053, + "learning_rate": 4.068831408144958e-08, + "loss": 2.0894, + "step": 29208 + }, + { + "epoch": 0.97, + "grad_norm": 0.7410601377487183, + "learning_rate": 4.059258541710409e-08, + "loss": 2.065, + "step": 29209 + }, + { + "epoch": 0.97, + "grad_norm": 0.7327796816825867, + "learning_rate": 4.049696926830771e-08, + "loss": 2.0453, + "step": 29210 + }, + { + "epoch": 0.97, + "grad_norm": 0.735737144947052, + "learning_rate": 4.0401465636139556e-08, + "loss": 2.0193, + "step": 29211 + }, + { + "epoch": 0.97, + "grad_norm": 0.7469883561134338, + "learning_rate": 4.0306074521677673e-08, + "loss": 1.9847, + "step": 29212 + }, + { + "epoch": 0.97, + "grad_norm": 0.7568383812904358, + "learning_rate": 4.0210795926001186e-08, + "loss": 2.0591, + "step": 29213 + }, + { + "epoch": 0.97, + "grad_norm": 0.7379813194274902, + "learning_rate": 4.0115629850187024e-08, + "loss": 2.0099, + "step": 29214 + }, + { + "epoch": 0.97, + "grad_norm": 0.7758004665374756, + "learning_rate": 4.0020576295308755e-08, + "loss": 2.0011, + "step": 29215 + }, + { + "epoch": 0.97, + "grad_norm": 0.7378414273262024, + "learning_rate": 3.9925635262441084e-08, + "loss": 2.048, + "step": 29216 + }, + { + "epoch": 0.97, + "grad_norm": 0.7421635985374451, + "learning_rate": 3.9830806752656495e-08, + "loss": 2.0593, + "step": 29217 + }, + { + "epoch": 0.97, + "grad_norm": 0.7714683413505554, + "learning_rate": 3.973609076702634e-08, + "loss": 1.9935, + "step": 29218 + }, + { + "epoch": 0.97, + "grad_norm": 0.735077977180481, + "learning_rate": 3.964148730661976e-08, + "loss": 2.0893, + "step": 29219 + }, + { + "epoch": 0.97, + "grad_norm": 0.7345150113105774, + "learning_rate": 3.9546996372507027e-08, + "loss": 2.0077, + "step": 29220 + }, + { + "epoch": 0.97, + "grad_norm": 0.7440850138664246, + "learning_rate": 3.945261796575506e-08, + "loss": 2.0053, + "step": 29221 + }, + { + "epoch": 0.97, + "grad_norm": 0.7897260785102844, + "learning_rate": 3.935835208742966e-08, + "loss": 2.0654, + "step": 29222 + }, + { + "epoch": 0.97, + "grad_norm": 0.7450955510139465, + "learning_rate": 3.926419873859666e-08, + "loss": 1.9985, + "step": 29223 + }, + { + "epoch": 0.97, + "grad_norm": 0.7278220057487488, + "learning_rate": 3.917015792031853e-08, + "loss": 2.0343, + "step": 29224 + }, + { + "epoch": 0.97, + "grad_norm": 0.7684190273284912, + "learning_rate": 3.907622963365776e-08, + "loss": 2.0466, + "step": 29225 + }, + { + "epoch": 0.97, + "grad_norm": 0.7469903826713562, + "learning_rate": 3.898241387967683e-08, + "loss": 2.0339, + "step": 29226 + }, + { + "epoch": 0.97, + "grad_norm": 0.7290650606155396, + "learning_rate": 3.8888710659434894e-08, + "loss": 1.9912, + "step": 29227 + }, + { + "epoch": 0.97, + "grad_norm": 0.7257793545722961, + "learning_rate": 3.87951199739911e-08, + "loss": 2.0193, + "step": 29228 + }, + { + "epoch": 0.97, + "grad_norm": 0.7449692487716675, + "learning_rate": 3.870164182440128e-08, + "loss": 2.029, + "step": 29229 + }, + { + "epoch": 0.97, + "grad_norm": 0.7385895252227783, + "learning_rate": 3.860827621172236e-08, + "loss": 2.0783, + "step": 29230 + }, + { + "epoch": 0.97, + "grad_norm": 0.7360743880271912, + "learning_rate": 3.851502313700906e-08, + "loss": 2.0436, + "step": 29231 + }, + { + "epoch": 0.97, + "grad_norm": 0.7701732516288757, + "learning_rate": 3.8421882601316075e-08, + "loss": 2.0097, + "step": 29232 + }, + { + "epoch": 0.97, + "grad_norm": 0.7386481761932373, + "learning_rate": 3.832885460569369e-08, + "loss": 2.0959, + "step": 29233 + }, + { + "epoch": 0.97, + "grad_norm": 0.730781078338623, + "learning_rate": 3.823593915119439e-08, + "loss": 2.0204, + "step": 29234 + }, + { + "epoch": 0.97, + "grad_norm": 0.7177600860595703, + "learning_rate": 3.814313623886623e-08, + "loss": 2.1408, + "step": 29235 + }, + { + "epoch": 0.97, + "grad_norm": 0.7370340824127197, + "learning_rate": 3.8050445869759476e-08, + "loss": 1.9934, + "step": 29236 + }, + { + "epoch": 0.97, + "grad_norm": 0.7403169274330139, + "learning_rate": 3.7957868044921075e-08, + "loss": 2.0657, + "step": 29237 + }, + { + "epoch": 0.97, + "grad_norm": 0.7843307256698608, + "learning_rate": 3.7865402765395744e-08, + "loss": 2.0287, + "step": 29238 + }, + { + "epoch": 0.97, + "grad_norm": 0.7840523719787598, + "learning_rate": 3.77730500322282e-08, + "loss": 2.0693, + "step": 29239 + }, + { + "epoch": 0.97, + "grad_norm": 0.7744840383529663, + "learning_rate": 3.768080984646316e-08, + "loss": 2.0889, + "step": 29240 + }, + { + "epoch": 0.97, + "grad_norm": 0.7646792531013489, + "learning_rate": 3.7588682209140916e-08, + "loss": 1.99, + "step": 29241 + }, + { + "epoch": 0.97, + "grad_norm": 0.7263439297676086, + "learning_rate": 3.749666712130395e-08, + "loss": 2.0678, + "step": 29242 + }, + { + "epoch": 0.97, + "grad_norm": 0.7870316505432129, + "learning_rate": 3.7404764583991445e-08, + "loss": 2.0835, + "step": 29243 + }, + { + "epoch": 0.97, + "grad_norm": 0.7363491058349609, + "learning_rate": 3.731297459824035e-08, + "loss": 2.0081, + "step": 29244 + }, + { + "epoch": 0.97, + "grad_norm": 0.7411637306213379, + "learning_rate": 3.722129716508871e-08, + "loss": 2.0667, + "step": 29245 + }, + { + "epoch": 0.97, + "grad_norm": 0.7221792340278625, + "learning_rate": 3.712973228557237e-08, + "loss": 1.9776, + "step": 29246 + }, + { + "epoch": 0.97, + "grad_norm": 0.7152597904205322, + "learning_rate": 3.7038279960724955e-08, + "loss": 1.9721, + "step": 29247 + }, + { + "epoch": 0.97, + "grad_norm": 0.7625518441200256, + "learning_rate": 3.694694019158007e-08, + "loss": 2.0733, + "step": 29248 + }, + { + "epoch": 0.97, + "grad_norm": 0.7547231316566467, + "learning_rate": 3.685571297917134e-08, + "loss": 2.0748, + "step": 29249 + }, + { + "epoch": 0.97, + "grad_norm": 0.7913203835487366, + "learning_rate": 3.676459832452683e-08, + "loss": 2.0479, + "step": 29250 + }, + { + "epoch": 0.97, + "grad_norm": 0.7813690900802612, + "learning_rate": 3.667359622867572e-08, + "loss": 2.1247, + "step": 29251 + }, + { + "epoch": 0.97, + "grad_norm": 0.7526161670684814, + "learning_rate": 3.6582706692649403e-08, + "loss": 2.1116, + "step": 29252 + }, + { + "epoch": 0.97, + "grad_norm": 0.7368454337120056, + "learning_rate": 3.64919297174704e-08, + "loss": 2.0796, + "step": 29253 + }, + { + "epoch": 0.97, + "grad_norm": 0.720645546913147, + "learning_rate": 3.6401265304167875e-08, + "loss": 1.9912, + "step": 29254 + }, + { + "epoch": 0.97, + "grad_norm": 0.7790018916130066, + "learning_rate": 3.6310713453764356e-08, + "loss": 2.056, + "step": 29255 + }, + { + "epoch": 0.97, + "grad_norm": 0.763236403465271, + "learning_rate": 3.6220274167282354e-08, + "loss": 2.0924, + "step": 29256 + }, + { + "epoch": 0.97, + "grad_norm": 0.7481763958930969, + "learning_rate": 3.6129947445744385e-08, + "loss": 2.0863, + "step": 29257 + }, + { + "epoch": 0.97, + "grad_norm": 0.7485073804855347, + "learning_rate": 3.603973329017185e-08, + "loss": 1.9945, + "step": 29258 + }, + { + "epoch": 0.97, + "grad_norm": 0.7226691842079163, + "learning_rate": 3.594963170158283e-08, + "loss": 1.9495, + "step": 29259 + }, + { + "epoch": 0.97, + "grad_norm": 0.7686389684677124, + "learning_rate": 3.585964268099429e-08, + "loss": 2.0025, + "step": 29260 + }, + { + "epoch": 0.97, + "grad_norm": 0.7592135071754456, + "learning_rate": 3.5769766229425405e-08, + "loss": 2.0164, + "step": 29261 + }, + { + "epoch": 0.97, + "grad_norm": 0.7605711817741394, + "learning_rate": 3.5680002347888707e-08, + "loss": 2.0188, + "step": 29262 + }, + { + "epoch": 0.97, + "grad_norm": 0.7534777522087097, + "learning_rate": 3.559035103740005e-08, + "loss": 2.0598, + "step": 29263 + }, + { + "epoch": 0.97, + "grad_norm": 0.7408876419067383, + "learning_rate": 3.550081229897195e-08, + "loss": 2.095, + "step": 29264 + }, + { + "epoch": 0.97, + "grad_norm": 0.7327544093132019, + "learning_rate": 3.541138613361694e-08, + "loss": 2.0563, + "step": 29265 + }, + { + "epoch": 0.97, + "grad_norm": 0.7415438890457153, + "learning_rate": 3.53220725423431e-08, + "loss": 2.0332, + "step": 29266 + }, + { + "epoch": 0.97, + "grad_norm": 0.7405075430870056, + "learning_rate": 3.523287152616073e-08, + "loss": 2.0358, + "step": 29267 + }, + { + "epoch": 0.97, + "grad_norm": 0.725394070148468, + "learning_rate": 3.514378308607791e-08, + "loss": 1.988, + "step": 29268 + }, + { + "epoch": 0.97, + "grad_norm": 0.7278767824172974, + "learning_rate": 3.5054807223100504e-08, + "loss": 2.0266, + "step": 29269 + }, + { + "epoch": 0.97, + "grad_norm": 0.7820540070533752, + "learning_rate": 3.496594393823327e-08, + "loss": 1.9844, + "step": 29270 + }, + { + "epoch": 0.97, + "grad_norm": 0.7289568185806274, + "learning_rate": 3.487719323248096e-08, + "loss": 2.0419, + "step": 29271 + }, + { + "epoch": 0.97, + "grad_norm": 0.7435545325279236, + "learning_rate": 3.47885551068472e-08, + "loss": 1.9952, + "step": 29272 + }, + { + "epoch": 0.97, + "grad_norm": 0.7308414578437805, + "learning_rate": 3.47000295623301e-08, + "loss": 2.0122, + "step": 29273 + }, + { + "epoch": 0.97, + "grad_norm": 0.7619144320487976, + "learning_rate": 3.461161659993328e-08, + "loss": 2.0502, + "step": 29274 + }, + { + "epoch": 0.97, + "grad_norm": 0.7386350631713867, + "learning_rate": 3.452331622065375e-08, + "loss": 2.0249, + "step": 29275 + }, + { + "epoch": 0.97, + "grad_norm": 0.7373751997947693, + "learning_rate": 3.4435128425489575e-08, + "loss": 2.0295, + "step": 29276 + }, + { + "epoch": 0.97, + "grad_norm": 0.7175050973892212, + "learning_rate": 3.434705321543663e-08, + "loss": 2.0148, + "step": 29277 + }, + { + "epoch": 0.97, + "grad_norm": 0.7567881345748901, + "learning_rate": 3.4259090591490795e-08, + "loss": 2.0143, + "step": 29278 + }, + { + "epoch": 0.97, + "grad_norm": 0.7517544031143188, + "learning_rate": 3.4171240554644604e-08, + "loss": 2.0228, + "step": 29279 + }, + { + "epoch": 0.97, + "grad_norm": 0.735106885433197, + "learning_rate": 3.4083503105891705e-08, + "loss": 2.0337, + "step": 29280 + }, + { + "epoch": 0.97, + "grad_norm": 0.7352213859558105, + "learning_rate": 3.3995878246222416e-08, + "loss": 2.0319, + "step": 29281 + }, + { + "epoch": 0.97, + "grad_norm": 0.7510189414024353, + "learning_rate": 3.390836597662706e-08, + "loss": 2.1108, + "step": 29282 + }, + { + "epoch": 0.97, + "grad_norm": 0.745866060256958, + "learning_rate": 3.382096629809373e-08, + "loss": 1.9574, + "step": 29283 + }, + { + "epoch": 0.97, + "grad_norm": 0.7446914315223694, + "learning_rate": 3.373367921161164e-08, + "loss": 2.0823, + "step": 29284 + }, + { + "epoch": 0.97, + "grad_norm": 0.7464628219604492, + "learning_rate": 3.364650471816444e-08, + "loss": 2.0415, + "step": 29285 + }, + { + "epoch": 0.97, + "grad_norm": 0.7358343601226807, + "learning_rate": 3.355944281873913e-08, + "loss": 2.0232, + "step": 29286 + }, + { + "epoch": 0.97, + "grad_norm": 0.7484214901924133, + "learning_rate": 3.347249351431714e-08, + "loss": 2.1048, + "step": 29287 + }, + { + "epoch": 0.97, + "grad_norm": 0.7285729050636292, + "learning_rate": 3.3385656805883235e-08, + "loss": 2.0389, + "step": 29288 + }, + { + "epoch": 0.97, + "grad_norm": 0.7495244741439819, + "learning_rate": 3.329893269441664e-08, + "loss": 2.0387, + "step": 29289 + }, + { + "epoch": 0.97, + "grad_norm": 0.7547352910041809, + "learning_rate": 3.3212321180897676e-08, + "loss": 1.9868, + "step": 29290 + }, + { + "epoch": 0.97, + "grad_norm": 0.7827876210212708, + "learning_rate": 3.312582226630445e-08, + "loss": 2.0996, + "step": 29291 + }, + { + "epoch": 0.97, + "grad_norm": 0.7101939916610718, + "learning_rate": 3.303943595161507e-08, + "loss": 2.0477, + "step": 29292 + }, + { + "epoch": 0.97, + "grad_norm": 0.7370198369026184, + "learning_rate": 3.295316223780432e-08, + "loss": 2.019, + "step": 29293 + }, + { + "epoch": 0.97, + "grad_norm": 0.7574487924575806, + "learning_rate": 3.286700112584806e-08, + "loss": 2.0207, + "step": 29294 + }, + { + "epoch": 0.97, + "grad_norm": 0.7522162199020386, + "learning_rate": 3.278095261671888e-08, + "loss": 2.0353, + "step": 29295 + }, + { + "epoch": 0.97, + "grad_norm": 0.7233257293701172, + "learning_rate": 3.2695016711389304e-08, + "loss": 2.0286, + "step": 29296 + }, + { + "epoch": 0.97, + "grad_norm": 0.7328599691390991, + "learning_rate": 3.260919341082969e-08, + "loss": 2.0278, + "step": 29297 + }, + { + "epoch": 0.97, + "grad_norm": 0.744659423828125, + "learning_rate": 3.252348271601036e-08, + "loss": 2.0223, + "step": 29298 + }, + { + "epoch": 0.97, + "grad_norm": 0.7363348603248596, + "learning_rate": 3.243788462789943e-08, + "loss": 2.0274, + "step": 29299 + }, + { + "epoch": 0.97, + "grad_norm": 0.7408971190452576, + "learning_rate": 3.23523991474628e-08, + "loss": 2.1354, + "step": 29300 + }, + { + "epoch": 0.97, + "grad_norm": 0.747258722782135, + "learning_rate": 3.226702627566747e-08, + "loss": 2.0502, + "step": 29301 + }, + { + "epoch": 0.97, + "grad_norm": 0.745252788066864, + "learning_rate": 3.218176601347822e-08, + "loss": 2.0378, + "step": 29302 + }, + { + "epoch": 0.97, + "grad_norm": 0.7400323748588562, + "learning_rate": 3.209661836185762e-08, + "loss": 2.1078, + "step": 29303 + }, + { + "epoch": 0.97, + "grad_norm": 0.7377060651779175, + "learning_rate": 3.201158332176712e-08, + "loss": 2.0405, + "step": 29304 + }, + { + "epoch": 0.97, + "grad_norm": 0.7518259882926941, + "learning_rate": 3.192666089416707e-08, + "loss": 2.0257, + "step": 29305 + }, + { + "epoch": 0.98, + "grad_norm": 0.7264400720596313, + "learning_rate": 3.1841851080018915e-08, + "loss": 1.9768, + "step": 29306 + }, + { + "epoch": 0.98, + "grad_norm": 0.7252213358879089, + "learning_rate": 3.175715388027967e-08, + "loss": 2.0355, + "step": 29307 + }, + { + "epoch": 0.98, + "grad_norm": 0.7488684058189392, + "learning_rate": 3.167256929590634e-08, + "loss": 1.9702, + "step": 29308 + }, + { + "epoch": 0.98, + "grad_norm": 0.7420268654823303, + "learning_rate": 3.158809732785373e-08, + "loss": 2.032, + "step": 29309 + }, + { + "epoch": 0.98, + "grad_norm": 0.7812830805778503, + "learning_rate": 3.150373797707662e-08, + "loss": 2.0282, + "step": 29310 + }, + { + "epoch": 0.98, + "grad_norm": 0.7494191527366638, + "learning_rate": 3.14194912445287e-08, + "loss": 2.0459, + "step": 29311 + }, + { + "epoch": 0.98, + "grad_norm": 0.7648168802261353, + "learning_rate": 3.1335357131161424e-08, + "loss": 2.1919, + "step": 29312 + }, + { + "epoch": 0.98, + "grad_norm": 0.779164731502533, + "learning_rate": 3.125133563792404e-08, + "loss": 2.0307, + "step": 29313 + }, + { + "epoch": 0.98, + "grad_norm": 0.7374753355979919, + "learning_rate": 3.116742676576689e-08, + "loss": 2.0529, + "step": 29314 + }, + { + "epoch": 0.98, + "grad_norm": 0.7372316718101501, + "learning_rate": 3.108363051563812e-08, + "loss": 2.0014, + "step": 29315 + }, + { + "epoch": 0.98, + "grad_norm": 0.7344589233398438, + "learning_rate": 3.099994688848473e-08, + "loss": 1.9612, + "step": 29316 + }, + { + "epoch": 0.98, + "grad_norm": 0.7689493894577026, + "learning_rate": 3.091637588525154e-08, + "loss": 2.1096, + "step": 29317 + }, + { + "epoch": 0.98, + "grad_norm": 0.776805579662323, + "learning_rate": 3.083291750688222e-08, + "loss": 2.0552, + "step": 29318 + }, + { + "epoch": 0.98, + "grad_norm": 0.726927638053894, + "learning_rate": 3.074957175431936e-08, + "loss": 2.0349, + "step": 29319 + }, + { + "epoch": 0.98, + "grad_norm": 0.8585315942764282, + "learning_rate": 3.066633862850665e-08, + "loss": 2.142, + "step": 29320 + }, + { + "epoch": 0.98, + "grad_norm": 0.7576346397399902, + "learning_rate": 3.058321813038223e-08, + "loss": 2.0176, + "step": 29321 + }, + { + "epoch": 0.98, + "grad_norm": 0.7717317938804626, + "learning_rate": 3.0500210260886455e-08, + "loss": 2.0725, + "step": 29322 + }, + { + "epoch": 0.98, + "grad_norm": 0.7225966453552246, + "learning_rate": 3.041731502095635e-08, + "loss": 2.0448, + "step": 29323 + }, + { + "epoch": 0.98, + "grad_norm": 0.7220733761787415, + "learning_rate": 3.033453241152784e-08, + "loss": 2.0348, + "step": 29324 + }, + { + "epoch": 0.98, + "grad_norm": 0.7422890067100525, + "learning_rate": 3.0251862433537946e-08, + "loss": 2.0319, + "step": 29325 + }, + { + "epoch": 0.98, + "grad_norm": 0.745943009853363, + "learning_rate": 3.016930508792038e-08, + "loss": 2.0937, + "step": 29326 + }, + { + "epoch": 0.98, + "grad_norm": 0.7437798380851746, + "learning_rate": 3.008686037560549e-08, + "loss": 2.045, + "step": 29327 + }, + { + "epoch": 0.98, + "grad_norm": 0.7862579226493835, + "learning_rate": 3.000452829752698e-08, + "loss": 2.0465, + "step": 29328 + }, + { + "epoch": 0.98, + "grad_norm": 0.7073240876197815, + "learning_rate": 2.992230885461411e-08, + "loss": 2.0288, + "step": 29329 + }, + { + "epoch": 0.98, + "grad_norm": 0.7497251033782959, + "learning_rate": 2.984020204779725e-08, + "loss": 2.0893, + "step": 29330 + }, + { + "epoch": 0.98, + "grad_norm": 0.736870288848877, + "learning_rate": 2.975820787800121e-08, + "loss": 2.0346, + "step": 29331 + }, + { + "epoch": 0.98, + "grad_norm": 0.7323058843612671, + "learning_rate": 2.9676326346154137e-08, + "loss": 2.0351, + "step": 29332 + }, + { + "epoch": 0.98, + "grad_norm": 0.7193198204040527, + "learning_rate": 2.959455745318085e-08, + "loss": 2.0809, + "step": 29333 + }, + { + "epoch": 0.98, + "grad_norm": 0.7655064463615417, + "learning_rate": 2.9512901200005052e-08, + "loss": 2.0029, + "step": 29334 + }, + { + "epoch": 0.98, + "grad_norm": 0.7439627051353455, + "learning_rate": 2.9431357587549336e-08, + "loss": 2.0991, + "step": 29335 + }, + { + "epoch": 0.98, + "grad_norm": 0.7420775294303894, + "learning_rate": 2.934992661673408e-08, + "loss": 2.0818, + "step": 29336 + }, + { + "epoch": 0.98, + "grad_norm": 0.7294792532920837, + "learning_rate": 2.926860828848077e-08, + "loss": 2.0354, + "step": 29337 + }, + { + "epoch": 0.98, + "grad_norm": 0.7574129700660706, + "learning_rate": 2.918740260370756e-08, + "loss": 2.0776, + "step": 29338 + }, + { + "epoch": 0.98, + "grad_norm": 0.7367841005325317, + "learning_rate": 2.9106309563331493e-08, + "loss": 2.0132, + "step": 29339 + }, + { + "epoch": 0.98, + "grad_norm": 0.7288132309913635, + "learning_rate": 2.90253291682685e-08, + "loss": 1.9739, + "step": 29340 + }, + { + "epoch": 0.98, + "grad_norm": 0.7488398551940918, + "learning_rate": 2.8944461419433413e-08, + "loss": 2.0527, + "step": 29341 + }, + { + "epoch": 0.98, + "grad_norm": 0.7898589372634888, + "learning_rate": 2.8863706317739937e-08, + "loss": 2.1155, + "step": 29342 + }, + { + "epoch": 0.98, + "grad_norm": 0.7256209850311279, + "learning_rate": 2.878306386410179e-08, + "loss": 2.1007, + "step": 29343 + }, + { + "epoch": 0.98, + "grad_norm": 0.7454590201377869, + "learning_rate": 2.8702534059428245e-08, + "loss": 1.9412, + "step": 29344 + }, + { + "epoch": 0.98, + "grad_norm": 0.7770012617111206, + "learning_rate": 2.8622116904629683e-08, + "loss": 2.112, + "step": 29345 + }, + { + "epoch": 0.98, + "grad_norm": 0.7592570185661316, + "learning_rate": 2.8541812400615378e-08, + "loss": 2.0265, + "step": 29346 + }, + { + "epoch": 0.98, + "grad_norm": 0.7534446716308594, + "learning_rate": 2.8461620548291268e-08, + "loss": 2.0923, + "step": 29347 + }, + { + "epoch": 0.98, + "grad_norm": 0.7456063628196716, + "learning_rate": 2.83815413485633e-08, + "loss": 2.0968, + "step": 29348 + }, + { + "epoch": 0.98, + "grad_norm": 0.7405781149864197, + "learning_rate": 2.8301574802337418e-08, + "loss": 2.0849, + "step": 29349 + }, + { + "epoch": 0.98, + "grad_norm": 0.7542093992233276, + "learning_rate": 2.8221720910516227e-08, + "loss": 2.0954, + "step": 29350 + }, + { + "epoch": 0.98, + "grad_norm": 0.7183520793914795, + "learning_rate": 2.814197967400234e-08, + "loss": 2.0267, + "step": 29351 + }, + { + "epoch": 0.98, + "grad_norm": 0.7541064620018005, + "learning_rate": 2.806235109369504e-08, + "loss": 2.0654, + "step": 29352 + }, + { + "epoch": 0.98, + "grad_norm": 0.773010790348053, + "learning_rate": 2.7982835170496936e-08, + "loss": 2.0448, + "step": 29353 + }, + { + "epoch": 0.98, + "grad_norm": 0.7693642377853394, + "learning_rate": 2.790343190530287e-08, + "loss": 2.0506, + "step": 29354 + }, + { + "epoch": 0.98, + "grad_norm": 0.8021641373634338, + "learning_rate": 2.782414129901323e-08, + "loss": 2.0334, + "step": 29355 + }, + { + "epoch": 0.98, + "grad_norm": 0.7557674050331116, + "learning_rate": 2.7744963352521748e-08, + "loss": 2.0602, + "step": 29356 + }, + { + "epoch": 0.98, + "grad_norm": 0.7517495155334473, + "learning_rate": 2.7665898066722153e-08, + "loss": 1.9862, + "step": 29357 + }, + { + "epoch": 0.98, + "grad_norm": 0.7144278883934021, + "learning_rate": 2.758694544250928e-08, + "loss": 2.0314, + "step": 29358 + }, + { + "epoch": 0.98, + "grad_norm": 0.7326173186302185, + "learning_rate": 2.750810548077576e-08, + "loss": 1.9813, + "step": 29359 + }, + { + "epoch": 0.98, + "grad_norm": 0.7637680768966675, + "learning_rate": 2.742937818241087e-08, + "loss": 2.0561, + "step": 29360 + }, + { + "epoch": 0.98, + "grad_norm": 0.74089515209198, + "learning_rate": 2.7350763548303904e-08, + "loss": 2.0966, + "step": 29361 + }, + { + "epoch": 0.98, + "grad_norm": 0.7455511689186096, + "learning_rate": 2.727226157934304e-08, + "loss": 2.0368, + "step": 29362 + }, + { + "epoch": 0.98, + "grad_norm": 0.7451121211051941, + "learning_rate": 2.7193872276416455e-08, + "loss": 2.1327, + "step": 29363 + }, + { + "epoch": 0.98, + "grad_norm": 0.7322178483009338, + "learning_rate": 2.7115595640408997e-08, + "loss": 2.0899, + "step": 29364 + }, + { + "epoch": 0.98, + "grad_norm": 0.7536479830741882, + "learning_rate": 2.7037431672204405e-08, + "loss": 2.0056, + "step": 29365 + }, + { + "epoch": 0.98, + "grad_norm": 0.7790051102638245, + "learning_rate": 2.6959380372686416e-08, + "loss": 2.0319, + "step": 29366 + }, + { + "epoch": 0.98, + "grad_norm": 0.7282761335372925, + "learning_rate": 2.6881441742735437e-08, + "loss": 2.0594, + "step": 29367 + }, + { + "epoch": 0.98, + "grad_norm": 0.7305344343185425, + "learning_rate": 2.6803615783234094e-08, + "loss": 1.9841, + "step": 29368 + }, + { + "epoch": 0.98, + "grad_norm": 0.7390607595443726, + "learning_rate": 2.6725902495060575e-08, + "loss": 2.0526, + "step": 29369 + }, + { + "epoch": 0.98, + "grad_norm": 0.7290567755699158, + "learning_rate": 2.6648301879093063e-08, + "loss": 2.0031, + "step": 29370 + }, + { + "epoch": 0.98, + "grad_norm": 0.7620627284049988, + "learning_rate": 2.657081393620753e-08, + "loss": 2.0376, + "step": 29371 + }, + { + "epoch": 0.98, + "grad_norm": 0.7454214096069336, + "learning_rate": 2.6493438667281047e-08, + "loss": 2.0847, + "step": 29372 + }, + { + "epoch": 0.98, + "grad_norm": 0.7452623844146729, + "learning_rate": 2.6416176073185142e-08, + "loss": 2.0482, + "step": 29373 + }, + { + "epoch": 0.98, + "grad_norm": 0.7562936544418335, + "learning_rate": 2.6339026154795777e-08, + "loss": 2.045, + "step": 29374 + }, + { + "epoch": 0.98, + "grad_norm": 0.7706596255302429, + "learning_rate": 2.626198891298226e-08, + "loss": 2.0529, + "step": 29375 + }, + { + "epoch": 0.98, + "grad_norm": 0.7458763122558594, + "learning_rate": 2.6185064348615008e-08, + "loss": 1.9987, + "step": 29376 + }, + { + "epoch": 0.98, + "grad_norm": 0.7423750758171082, + "learning_rate": 2.6108252462564433e-08, + "loss": 2.0116, + "step": 29377 + }, + { + "epoch": 0.98, + "grad_norm": 0.747506856918335, + "learning_rate": 2.603155325569762e-08, + "loss": 2.0382, + "step": 29378 + }, + { + "epoch": 0.98, + "grad_norm": 0.7569146752357483, + "learning_rate": 2.595496672888054e-08, + "loss": 2.1183, + "step": 29379 + }, + { + "epoch": 0.98, + "grad_norm": 0.7375648021697998, + "learning_rate": 2.5878492882978057e-08, + "loss": 1.9968, + "step": 29380 + }, + { + "epoch": 0.98, + "grad_norm": 0.7698425054550171, + "learning_rate": 2.580213171885615e-08, + "loss": 2.0333, + "step": 29381 + }, + { + "epoch": 0.98, + "grad_norm": 0.748769223690033, + "learning_rate": 2.572588323737635e-08, + "loss": 2.0137, + "step": 29382 + }, + { + "epoch": 0.98, + "grad_norm": 0.7573413848876953, + "learning_rate": 2.5649747439399076e-08, + "loss": 1.9936, + "step": 29383 + }, + { + "epoch": 0.98, + "grad_norm": 0.7400423288345337, + "learning_rate": 2.5573724325785864e-08, + "loss": 2.0659, + "step": 29384 + }, + { + "epoch": 0.98, + "grad_norm": 0.7357686161994934, + "learning_rate": 2.549781389739603e-08, + "loss": 2.0694, + "step": 29385 + }, + { + "epoch": 0.98, + "grad_norm": 0.7538254261016846, + "learning_rate": 2.5422016155085548e-08, + "loss": 2.0528, + "step": 29386 + }, + { + "epoch": 0.98, + "grad_norm": 0.7565253376960754, + "learning_rate": 2.5346331099711518e-08, + "loss": 2.0526, + "step": 29387 + }, + { + "epoch": 0.98, + "grad_norm": 0.7397695183753967, + "learning_rate": 2.527075873212881e-08, + "loss": 2.1293, + "step": 29388 + }, + { + "epoch": 0.98, + "grad_norm": 0.7330043911933899, + "learning_rate": 2.519529905319118e-08, + "loss": 2.1158, + "step": 29389 + }, + { + "epoch": 0.98, + "grad_norm": 0.7262603640556335, + "learning_rate": 2.5119952063751283e-08, + "loss": 1.9945, + "step": 29390 + }, + { + "epoch": 0.98, + "grad_norm": 0.7386509776115417, + "learning_rate": 2.5044717764660663e-08, + "loss": 2.0188, + "step": 29391 + }, + { + "epoch": 0.98, + "grad_norm": 0.7373019456863403, + "learning_rate": 2.4969596156768638e-08, + "loss": 2.0441, + "step": 29392 + }, + { + "epoch": 0.98, + "grad_norm": 0.7476633191108704, + "learning_rate": 2.4894587240923417e-08, + "loss": 2.003, + "step": 29393 + }, + { + "epoch": 0.98, + "grad_norm": 0.7162723541259766, + "learning_rate": 2.4819691017974324e-08, + "loss": 2.0466, + "step": 29394 + }, + { + "epoch": 0.98, + "grad_norm": 0.7529038786888123, + "learning_rate": 2.4744907488766235e-08, + "loss": 2.009, + "step": 29395 + }, + { + "epoch": 0.98, + "grad_norm": 0.7432703375816345, + "learning_rate": 2.467023665414403e-08, + "loss": 1.9579, + "step": 29396 + }, + { + "epoch": 0.98, + "grad_norm": 0.7757458090782166, + "learning_rate": 2.459567851495148e-08, + "loss": 2.0704, + "step": 29397 + }, + { + "epoch": 0.98, + "grad_norm": 0.760806679725647, + "learning_rate": 2.4521233072030136e-08, + "loss": 2.0925, + "step": 29398 + }, + { + "epoch": 0.98, + "grad_norm": 0.7359606623649597, + "learning_rate": 2.4446900326221546e-08, + "loss": 2.0926, + "step": 29399 + }, + { + "epoch": 0.98, + "grad_norm": 0.7385281920433044, + "learning_rate": 2.4372680278366145e-08, + "loss": 2.0881, + "step": 29400 + }, + { + "epoch": 0.98, + "grad_norm": 0.7395015358924866, + "learning_rate": 2.4298572929302154e-08, + "loss": 2.0404, + "step": 29401 + }, + { + "epoch": 0.98, + "grad_norm": 0.7336463332176208, + "learning_rate": 2.4224578279866685e-08, + "loss": 2.0498, + "step": 29402 + }, + { + "epoch": 0.98, + "grad_norm": 0.7778065800666809, + "learning_rate": 2.415069633089462e-08, + "loss": 2.0837, + "step": 29403 + }, + { + "epoch": 0.98, + "grad_norm": 0.7437312006950378, + "learning_rate": 2.4076927083221958e-08, + "loss": 2.0579, + "step": 29404 + }, + { + "epoch": 0.98, + "grad_norm": 0.7457354664802551, + "learning_rate": 2.4003270537682476e-08, + "loss": 2.0958, + "step": 29405 + }, + { + "epoch": 0.98, + "grad_norm": 0.7468243837356567, + "learning_rate": 2.3929726695106625e-08, + "loss": 2.0544, + "step": 29406 + }, + { + "epoch": 0.98, + "grad_norm": 0.7298638224601746, + "learning_rate": 2.385629555632707e-08, + "loss": 1.9889, + "step": 29407 + }, + { + "epoch": 0.98, + "grad_norm": 0.7524846196174622, + "learning_rate": 2.3782977122170925e-08, + "loss": 1.9358, + "step": 29408 + }, + { + "epoch": 0.98, + "grad_norm": 0.7457708120346069, + "learning_rate": 2.370977139346975e-08, + "loss": 2.049, + "step": 29409 + }, + { + "epoch": 0.98, + "grad_norm": 0.7436990141868591, + "learning_rate": 2.363667837104844e-08, + "loss": 2.0708, + "step": 29410 + }, + { + "epoch": 0.98, + "grad_norm": 0.8649788498878479, + "learning_rate": 2.3563698055732996e-08, + "loss": 2.0458, + "step": 29411 + }, + { + "epoch": 0.98, + "grad_norm": 0.7574883103370667, + "learning_rate": 2.3490830448347212e-08, + "loss": 2.035, + "step": 29412 + }, + { + "epoch": 0.98, + "grad_norm": 0.7400567531585693, + "learning_rate": 2.341807554971598e-08, + "loss": 2.0675, + "step": 29413 + }, + { + "epoch": 0.98, + "grad_norm": 0.7336089611053467, + "learning_rate": 2.3345433360660862e-08, + "loss": 2.0558, + "step": 29414 + }, + { + "epoch": 0.98, + "grad_norm": 0.7311110496520996, + "learning_rate": 2.3272903882002317e-08, + "loss": 1.9729, + "step": 29415 + }, + { + "epoch": 0.98, + "grad_norm": 0.7742444276809692, + "learning_rate": 2.3200487114558578e-08, + "loss": 1.9891, + "step": 29416 + }, + { + "epoch": 0.98, + "grad_norm": 0.7852016687393188, + "learning_rate": 2.31281830591501e-08, + "loss": 1.9626, + "step": 29417 + }, + { + "epoch": 0.98, + "grad_norm": 0.749605655670166, + "learning_rate": 2.3055991716590676e-08, + "loss": 2.0609, + "step": 29418 + }, + { + "epoch": 0.98, + "grad_norm": 0.7289119362831116, + "learning_rate": 2.298391308769965e-08, + "loss": 2.0068, + "step": 29419 + }, + { + "epoch": 0.98, + "grad_norm": 0.7433663010597229, + "learning_rate": 2.2911947173287486e-08, + "loss": 2.0569, + "step": 29420 + }, + { + "epoch": 0.98, + "grad_norm": 0.750966489315033, + "learning_rate": 2.2840093974169087e-08, + "loss": 2.0608, + "step": 29421 + }, + { + "epoch": 0.98, + "grad_norm": 0.7541066408157349, + "learning_rate": 2.2768353491157136e-08, + "loss": 2.0828, + "step": 29422 + }, + { + "epoch": 0.98, + "grad_norm": 0.7495532035827637, + "learning_rate": 2.2696725725060987e-08, + "loss": 2.0623, + "step": 29423 + }, + { + "epoch": 0.98, + "grad_norm": 0.7380034923553467, + "learning_rate": 2.262521067668888e-08, + "loss": 2.0021, + "step": 29424 + }, + { + "epoch": 0.98, + "grad_norm": 0.8025142550468445, + "learning_rate": 2.255380834685128e-08, + "loss": 1.9405, + "step": 29425 + }, + { + "epoch": 0.98, + "grad_norm": 0.7490605711936951, + "learning_rate": 2.2482518736351988e-08, + "loss": 2.0364, + "step": 29426 + }, + { + "epoch": 0.98, + "grad_norm": 0.794539213180542, + "learning_rate": 2.2411341845999247e-08, + "loss": 2.084, + "step": 29427 + }, + { + "epoch": 0.98, + "grad_norm": 0.7571232318878174, + "learning_rate": 2.2340277676594634e-08, + "loss": 2.0391, + "step": 29428 + }, + { + "epoch": 0.98, + "grad_norm": 0.7575921416282654, + "learning_rate": 2.2269326228941955e-08, + "loss": 2.0379, + "step": 29429 + }, + { + "epoch": 0.98, + "grad_norm": 0.729982852935791, + "learning_rate": 2.21984875038439e-08, + "loss": 2.0477, + "step": 29430 + }, + { + "epoch": 0.98, + "grad_norm": 0.7419064044952393, + "learning_rate": 2.2127761502098722e-08, + "loss": 2.057, + "step": 29431 + }, + { + "epoch": 0.98, + "grad_norm": 0.7254217267036438, + "learning_rate": 2.2057148224507996e-08, + "loss": 2.0505, + "step": 29432 + }, + { + "epoch": 0.98, + "grad_norm": 0.7280661463737488, + "learning_rate": 2.1986647671866647e-08, + "loss": 2.0061, + "step": 29433 + }, + { + "epoch": 0.98, + "grad_norm": 0.7255533337593079, + "learning_rate": 2.191625984497181e-08, + "loss": 2.0489, + "step": 29434 + }, + { + "epoch": 0.98, + "grad_norm": 0.7380592823028564, + "learning_rate": 2.184598474462063e-08, + "loss": 2.0897, + "step": 29435 + }, + { + "epoch": 0.98, + "grad_norm": 0.7484970688819885, + "learning_rate": 2.1775822371605803e-08, + "loss": 2.0594, + "step": 29436 + }, + { + "epoch": 0.98, + "grad_norm": 0.7230631113052368, + "learning_rate": 2.170577272671892e-08, + "loss": 2.0514, + "step": 29437 + }, + { + "epoch": 0.98, + "grad_norm": 0.7763804197311401, + "learning_rate": 2.163583581075379e-08, + "loss": 2.0119, + "step": 29438 + }, + { + "epoch": 0.98, + "grad_norm": 0.7494169473648071, + "learning_rate": 2.1566011624497562e-08, + "loss": 2.0406, + "step": 29439 + }, + { + "epoch": 0.98, + "grad_norm": 0.7479962706565857, + "learning_rate": 2.1496300168741823e-08, + "loss": 2.085, + "step": 29440 + }, + { + "epoch": 0.98, + "grad_norm": 0.7252236008644104, + "learning_rate": 2.1426701444272614e-08, + "loss": 1.983, + "step": 29441 + }, + { + "epoch": 0.98, + "grad_norm": 0.7312276363372803, + "learning_rate": 2.1357215451875967e-08, + "loss": 2.083, + "step": 29442 + }, + { + "epoch": 0.98, + "grad_norm": 0.7349491119384766, + "learning_rate": 2.1287842192337926e-08, + "loss": 2.0301, + "step": 29443 + }, + { + "epoch": 0.98, + "grad_norm": 0.752582848072052, + "learning_rate": 2.1218581666441197e-08, + "loss": 1.9869, + "step": 29444 + }, + { + "epoch": 0.98, + "grad_norm": 0.7455941438674927, + "learning_rate": 2.1149433874969595e-08, + "loss": 2.0186, + "step": 29445 + }, + { + "epoch": 0.98, + "grad_norm": 0.7596490383148193, + "learning_rate": 2.1080398818702496e-08, + "loss": 2.0746, + "step": 29446 + }, + { + "epoch": 0.98, + "grad_norm": 0.7599145770072937, + "learning_rate": 2.10114764984215e-08, + "loss": 2.0093, + "step": 29447 + }, + { + "epoch": 0.98, + "grad_norm": 0.7501655220985413, + "learning_rate": 2.0942666914904874e-08, + "loss": 2.0748, + "step": 29448 + }, + { + "epoch": 0.98, + "grad_norm": 0.7493268847465515, + "learning_rate": 2.0873970068928662e-08, + "loss": 2.0496, + "step": 29449 + }, + { + "epoch": 0.98, + "grad_norm": 0.7420281767845154, + "learning_rate": 2.080538596127113e-08, + "loss": 2.0445, + "step": 29450 + }, + { + "epoch": 0.98, + "grad_norm": 0.7468722462654114, + "learning_rate": 2.0736914592704993e-08, + "loss": 2.0641, + "step": 29451 + }, + { + "epoch": 0.98, + "grad_norm": 0.7560680508613586, + "learning_rate": 2.0668555964005188e-08, + "loss": 2.0356, + "step": 29452 + }, + { + "epoch": 0.98, + "grad_norm": 0.7382835745811462, + "learning_rate": 2.060031007594443e-08, + "loss": 1.9545, + "step": 29453 + }, + { + "epoch": 0.98, + "grad_norm": 0.7260236144065857, + "learning_rate": 2.0532176929292103e-08, + "loss": 1.9785, + "step": 29454 + }, + { + "epoch": 0.98, + "grad_norm": 0.7342872619628906, + "learning_rate": 2.04641565248187e-08, + "loss": 2.0301, + "step": 29455 + }, + { + "epoch": 0.98, + "grad_norm": 0.7518648505210876, + "learning_rate": 2.03962488632925e-08, + "loss": 2.0148, + "step": 29456 + }, + { + "epoch": 0.98, + "grad_norm": 0.7374304533004761, + "learning_rate": 2.0328453945480662e-08, + "loss": 2.0239, + "step": 29457 + }, + { + "epoch": 0.98, + "grad_norm": 0.7572942972183228, + "learning_rate": 2.0260771772150356e-08, + "loss": 2.0707, + "step": 29458 + }, + { + "epoch": 0.98, + "grad_norm": 0.7302369475364685, + "learning_rate": 2.01932023440643e-08, + "loss": 2.0184, + "step": 29459 + }, + { + "epoch": 0.98, + "grad_norm": 0.746562659740448, + "learning_rate": 2.012574566198633e-08, + "loss": 1.9963, + "step": 29460 + }, + { + "epoch": 0.98, + "grad_norm": 0.7334021329879761, + "learning_rate": 2.0058401726679166e-08, + "loss": 2.0358, + "step": 29461 + }, + { + "epoch": 0.98, + "grad_norm": 0.7479128241539001, + "learning_rate": 1.9991170538904425e-08, + "loss": 2.0132, + "step": 29462 + }, + { + "epoch": 0.98, + "grad_norm": 0.7861015200614929, + "learning_rate": 1.9924052099419278e-08, + "loss": 2.0161, + "step": 29463 + }, + { + "epoch": 0.98, + "grad_norm": 0.7479234933853149, + "learning_rate": 1.9857046408984226e-08, + "loss": 2.0371, + "step": 29464 + }, + { + "epoch": 0.98, + "grad_norm": 0.7484697699546814, + "learning_rate": 1.9790153468354222e-08, + "loss": 2.0438, + "step": 29465 + }, + { + "epoch": 0.98, + "grad_norm": 0.7572760581970215, + "learning_rate": 1.972337327828755e-08, + "loss": 2.0907, + "step": 29466 + }, + { + "epoch": 0.98, + "grad_norm": 0.7560367584228516, + "learning_rate": 1.965670583953583e-08, + "loss": 2.0687, + "step": 29467 + }, + { + "epoch": 0.98, + "grad_norm": 0.74394291639328, + "learning_rate": 1.9590151152854008e-08, + "loss": 2.0257, + "step": 29468 + }, + { + "epoch": 0.98, + "grad_norm": 0.7362436652183533, + "learning_rate": 1.9523709218993715e-08, + "loss": 2.1362, + "step": 29469 + }, + { + "epoch": 0.98, + "grad_norm": 0.7707787752151489, + "learning_rate": 1.9457380038705454e-08, + "loss": 2.1186, + "step": 29470 + }, + { + "epoch": 0.98, + "grad_norm": 0.7447287440299988, + "learning_rate": 1.939116361273863e-08, + "loss": 2.0038, + "step": 29471 + }, + { + "epoch": 0.98, + "grad_norm": 0.7444043159484863, + "learning_rate": 1.9325059941840417e-08, + "loss": 1.9613, + "step": 29472 + }, + { + "epoch": 0.98, + "grad_norm": 0.7259814739227295, + "learning_rate": 1.9259069026760225e-08, + "loss": 2.0658, + "step": 29473 + }, + { + "epoch": 0.98, + "grad_norm": 0.7391754388809204, + "learning_rate": 1.9193190868239674e-08, + "loss": 2.0075, + "step": 29474 + }, + { + "epoch": 0.98, + "grad_norm": 0.7758517861366272, + "learning_rate": 1.9127425467025952e-08, + "loss": 2.0041, + "step": 29475 + }, + { + "epoch": 0.98, + "grad_norm": 0.7531094551086426, + "learning_rate": 1.906177282386179e-08, + "loss": 2.0881, + "step": 29476 + }, + { + "epoch": 0.98, + "grad_norm": 0.7357012629508972, + "learning_rate": 1.8996232939487713e-08, + "loss": 2.0042, + "step": 29477 + }, + { + "epoch": 0.98, + "grad_norm": 0.746687114238739, + "learning_rate": 1.893080581464535e-08, + "loss": 2.0724, + "step": 29478 + }, + { + "epoch": 0.98, + "grad_norm": 0.7484965324401855, + "learning_rate": 1.8865491450072993e-08, + "loss": 2.0544, + "step": 29479 + }, + { + "epoch": 0.98, + "grad_norm": 0.7309772968292236, + "learning_rate": 1.8800289846508947e-08, + "loss": 2.0621, + "step": 29480 + }, + { + "epoch": 0.98, + "grad_norm": 0.7399543523788452, + "learning_rate": 1.8735201004689287e-08, + "loss": 2.0533, + "step": 29481 + }, + { + "epoch": 0.98, + "grad_norm": 0.7407483458518982, + "learning_rate": 1.8670224925348978e-08, + "loss": 2.0458, + "step": 29482 + }, + { + "epoch": 0.98, + "grad_norm": 0.7825150489807129, + "learning_rate": 1.8605361609224106e-08, + "loss": 2.0457, + "step": 29483 + }, + { + "epoch": 0.98, + "grad_norm": 0.7484698295593262, + "learning_rate": 1.854061105704519e-08, + "loss": 2.0085, + "step": 29484 + }, + { + "epoch": 0.98, + "grad_norm": 0.7498574256896973, + "learning_rate": 1.8475973269546088e-08, + "loss": 2.0966, + "step": 29485 + }, + { + "epoch": 0.98, + "grad_norm": 0.7407478094100952, + "learning_rate": 1.841144824745511e-08, + "loss": 2.0304, + "step": 29486 + }, + { + "epoch": 0.98, + "grad_norm": 0.7292353510856628, + "learning_rate": 1.834703599150056e-08, + "loss": 2.0005, + "step": 29487 + }, + { + "epoch": 0.98, + "grad_norm": 0.7498602271080017, + "learning_rate": 1.8282736502412968e-08, + "loss": 2.0751, + "step": 29488 + }, + { + "epoch": 0.98, + "grad_norm": 0.7229365706443787, + "learning_rate": 1.8218549780917305e-08, + "loss": 2.0404, + "step": 29489 + }, + { + "epoch": 0.98, + "grad_norm": 0.7302898168563843, + "learning_rate": 1.8154475827737438e-08, + "loss": 2.0191, + "step": 29490 + }, + { + "epoch": 0.98, + "grad_norm": 0.7422252297401428, + "learning_rate": 1.8090514643598345e-08, + "loss": 2.0666, + "step": 29491 + }, + { + "epoch": 0.98, + "grad_norm": 0.7347898483276367, + "learning_rate": 1.8026666229222778e-08, + "loss": 2.0146, + "step": 29492 + }, + { + "epoch": 0.98, + "grad_norm": 0.7506165504455566, + "learning_rate": 1.796293058533238e-08, + "loss": 2.0602, + "step": 29493 + }, + { + "epoch": 0.98, + "grad_norm": 0.7565147876739502, + "learning_rate": 1.7899307712646587e-08, + "loss": 2.0605, + "step": 29494 + }, + { + "epoch": 0.98, + "grad_norm": 0.740929126739502, + "learning_rate": 1.783579761188481e-08, + "loss": 2.0986, + "step": 29495 + }, + { + "epoch": 0.98, + "grad_norm": 0.8062602281570435, + "learning_rate": 1.7772400283763148e-08, + "loss": 2.0244, + "step": 29496 + }, + { + "epoch": 0.98, + "grad_norm": 0.7222066521644592, + "learning_rate": 1.7709115728999915e-08, + "loss": 2.0052, + "step": 29497 + }, + { + "epoch": 0.98, + "grad_norm": 0.7307189702987671, + "learning_rate": 1.7645943948307875e-08, + "loss": 2.0226, + "step": 29498 + }, + { + "epoch": 0.98, + "grad_norm": 0.7577928304672241, + "learning_rate": 1.7582884942402013e-08, + "loss": 2.0333, + "step": 29499 + }, + { + "epoch": 0.98, + "grad_norm": 0.7576102018356323, + "learning_rate": 1.751993871199398e-08, + "loss": 2.0079, + "step": 29500 + }, + { + "epoch": 0.98, + "grad_norm": 0.7542493343353271, + "learning_rate": 1.7457105257796535e-08, + "loss": 2.0701, + "step": 29501 + }, + { + "epoch": 0.98, + "grad_norm": 0.7300220131874084, + "learning_rate": 1.7394384580518008e-08, + "loss": 2.0531, + "step": 29502 + }, + { + "epoch": 0.98, + "grad_norm": 0.7652482986450195, + "learning_rate": 1.733177668086672e-08, + "loss": 2.0727, + "step": 29503 + }, + { + "epoch": 0.98, + "grad_norm": 0.7316184639930725, + "learning_rate": 1.7269281559550988e-08, + "loss": 2.0036, + "step": 29504 + }, + { + "epoch": 0.98, + "grad_norm": 0.7651429772377014, + "learning_rate": 1.7206899217275807e-08, + "loss": 2.0291, + "step": 29505 + }, + { + "epoch": 0.98, + "grad_norm": 0.7546770572662354, + "learning_rate": 1.714462965474728e-08, + "loss": 2.0266, + "step": 29506 + }, + { + "epoch": 0.98, + "grad_norm": 0.7330334782600403, + "learning_rate": 1.7082472872668176e-08, + "loss": 2.0851, + "step": 29507 + }, + { + "epoch": 0.98, + "grad_norm": 0.7222148180007935, + "learning_rate": 1.702042887174127e-08, + "loss": 2.0723, + "step": 29508 + }, + { + "epoch": 0.98, + "grad_norm": 0.7068103551864624, + "learning_rate": 1.6958497652665996e-08, + "loss": 2.0503, + "step": 29509 + }, + { + "epoch": 0.98, + "grad_norm": 0.7782090306282043, + "learning_rate": 1.689667921614291e-08, + "loss": 2.0464, + "step": 29510 + }, + { + "epoch": 0.98, + "grad_norm": 0.7475563883781433, + "learning_rate": 1.683497356287145e-08, + "loss": 2.0938, + "step": 29511 + }, + { + "epoch": 0.98, + "grad_norm": 0.7553049325942993, + "learning_rate": 1.6773380693546616e-08, + "loss": 2.118, + "step": 29512 + }, + { + "epoch": 0.98, + "grad_norm": 0.7494302988052368, + "learning_rate": 1.6711900608865628e-08, + "loss": 2.0732, + "step": 29513 + }, + { + "epoch": 0.98, + "grad_norm": 0.7336648106575012, + "learning_rate": 1.6650533309523486e-08, + "loss": 2.025, + "step": 29514 + }, + { + "epoch": 0.98, + "grad_norm": 0.749219536781311, + "learning_rate": 1.6589278796212972e-08, + "loss": 1.981, + "step": 29515 + }, + { + "epoch": 0.98, + "grad_norm": 0.7438738346099854, + "learning_rate": 1.652813706962464e-08, + "loss": 2.1071, + "step": 29516 + }, + { + "epoch": 0.98, + "grad_norm": 0.7481508255004883, + "learning_rate": 1.646710813045127e-08, + "loss": 1.9306, + "step": 29517 + }, + { + "epoch": 0.98, + "grad_norm": 0.7123644351959229, + "learning_rate": 1.6406191979382315e-08, + "loss": 2.0349, + "step": 29518 + }, + { + "epoch": 0.98, + "grad_norm": 0.7726489901542664, + "learning_rate": 1.6345388617103885e-08, + "loss": 2.0378, + "step": 29519 + }, + { + "epoch": 0.98, + "grad_norm": 0.7686692476272583, + "learning_rate": 1.6284698044305434e-08, + "loss": 1.9719, + "step": 29520 + }, + { + "epoch": 0.98, + "grad_norm": 0.7811552286148071, + "learning_rate": 1.6224120261670863e-08, + "loss": 2.0849, + "step": 29521 + }, + { + "epoch": 0.98, + "grad_norm": 0.7561351656913757, + "learning_rate": 1.616365526988517e-08, + "loss": 2.1096, + "step": 29522 + }, + { + "epoch": 0.98, + "grad_norm": 0.7295064330101013, + "learning_rate": 1.610330306963115e-08, + "loss": 1.9981, + "step": 29523 + }, + { + "epoch": 0.98, + "grad_norm": 0.7390469312667847, + "learning_rate": 1.6043063661591584e-08, + "loss": 2.0026, + "step": 29524 + }, + { + "epoch": 0.98, + "grad_norm": 0.7628945112228394, + "learning_rate": 1.5982937046444823e-08, + "loss": 2.0456, + "step": 29525 + }, + { + "epoch": 0.98, + "grad_norm": 0.7747546434402466, + "learning_rate": 1.5922923224872545e-08, + "loss": 2.0321, + "step": 29526 + }, + { + "epoch": 0.98, + "grad_norm": 0.739540696144104, + "learning_rate": 1.586302219755198e-08, + "loss": 2.0125, + "step": 29527 + }, + { + "epoch": 0.98, + "grad_norm": 0.7367304563522339, + "learning_rate": 1.580323396515926e-08, + "loss": 2.0717, + "step": 29528 + }, + { + "epoch": 0.98, + "grad_norm": 0.7799481749534607, + "learning_rate": 1.5743558528371615e-08, + "loss": 2.1441, + "step": 29529 + }, + { + "epoch": 0.98, + "grad_norm": 0.7562848329544067, + "learning_rate": 1.5683995887860738e-08, + "loss": 2.005, + "step": 29530 + }, + { + "epoch": 0.98, + "grad_norm": 0.7527329921722412, + "learning_rate": 1.5624546044300525e-08, + "loss": 2.0239, + "step": 29531 + }, + { + "epoch": 0.98, + "grad_norm": 0.7479802370071411, + "learning_rate": 1.556520899836378e-08, + "loss": 2.032, + "step": 29532 + }, + { + "epoch": 0.98, + "grad_norm": 0.7515600323677063, + "learning_rate": 1.550598475071885e-08, + "loss": 2.0218, + "step": 29533 + }, + { + "epoch": 0.98, + "grad_norm": 0.7401430010795593, + "learning_rate": 1.5446873302036314e-08, + "loss": 2.0931, + "step": 29534 + }, + { + "epoch": 0.98, + "grad_norm": 0.7397017478942871, + "learning_rate": 1.5387874652983415e-08, + "loss": 2.0383, + "step": 29535 + }, + { + "epoch": 0.98, + "grad_norm": 0.7486613988876343, + "learning_rate": 1.5328988804226285e-08, + "loss": 2.0738, + "step": 29536 + }, + { + "epoch": 0.98, + "grad_norm": 0.7537353038787842, + "learning_rate": 1.527021575643106e-08, + "loss": 2.0808, + "step": 29537 + }, + { + "epoch": 0.98, + "grad_norm": 0.7955949902534485, + "learning_rate": 1.5211555510261655e-08, + "loss": 2.0531, + "step": 29538 + }, + { + "epoch": 0.98, + "grad_norm": 0.7563859820365906, + "learning_rate": 1.515300806637976e-08, + "loss": 2.0007, + "step": 29539 + }, + { + "epoch": 0.98, + "grad_norm": 0.7405341863632202, + "learning_rate": 1.509457342544818e-08, + "loss": 2.0667, + "step": 29540 + }, + { + "epoch": 0.98, + "grad_norm": 0.7750539183616638, + "learning_rate": 1.5036251588125274e-08, + "loss": 2.1104, + "step": 29541 + }, + { + "epoch": 0.98, + "grad_norm": 0.7347989678382874, + "learning_rate": 1.4978042555072734e-08, + "loss": 2.0539, + "step": 29542 + }, + { + "epoch": 0.98, + "grad_norm": 0.7428281903266907, + "learning_rate": 1.49199463269456e-08, + "loss": 2.0465, + "step": 29543 + }, + { + "epoch": 0.98, + "grad_norm": 0.7458774447441101, + "learning_rate": 1.4861962904401118e-08, + "loss": 2.1313, + "step": 29544 + }, + { + "epoch": 0.98, + "grad_norm": 0.7424444556236267, + "learning_rate": 1.4804092288093208e-08, + "loss": 2.0041, + "step": 29545 + }, + { + "epoch": 0.98, + "grad_norm": 0.7710654139518738, + "learning_rate": 1.4746334478678014e-08, + "loss": 2.0339, + "step": 29546 + }, + { + "epoch": 0.98, + "grad_norm": 0.7892676591873169, + "learning_rate": 1.4688689476807239e-08, + "loss": 2.006, + "step": 29547 + }, + { + "epoch": 0.98, + "grad_norm": 0.7307862043380737, + "learning_rate": 1.4631157283130359e-08, + "loss": 2.0476, + "step": 29548 + }, + { + "epoch": 0.98, + "grad_norm": 0.742354154586792, + "learning_rate": 1.4573737898299078e-08, + "loss": 2.0345, + "step": 29549 + }, + { + "epoch": 0.98, + "grad_norm": 0.7802332639694214, + "learning_rate": 1.4516431322961765e-08, + "loss": 2.0298, + "step": 29550 + }, + { + "epoch": 0.98, + "grad_norm": 0.7733453512191772, + "learning_rate": 1.4459237557766792e-08, + "loss": 2.0846, + "step": 29551 + }, + { + "epoch": 0.98, + "grad_norm": 0.7710857391357422, + "learning_rate": 1.4402156603358086e-08, + "loss": 2.0037, + "step": 29552 + }, + { + "epoch": 0.98, + "grad_norm": 0.7386457324028015, + "learning_rate": 1.4345188460381798e-08, + "loss": 2.0342, + "step": 29553 + }, + { + "epoch": 0.98, + "grad_norm": 0.7242453098297119, + "learning_rate": 1.4288333129481857e-08, + "loss": 2.0362, + "step": 29554 + }, + { + "epoch": 0.98, + "grad_norm": 0.7186710834503174, + "learning_rate": 1.4231590611299972e-08, + "loss": 1.9885, + "step": 29555 + }, + { + "epoch": 0.98, + "grad_norm": 0.7374231219291687, + "learning_rate": 1.4174960906476742e-08, + "loss": 2.0945, + "step": 29556 + }, + { + "epoch": 0.98, + "grad_norm": 0.7453835010528564, + "learning_rate": 1.4118444015652766e-08, + "loss": 2.0179, + "step": 29557 + }, + { + "epoch": 0.98, + "grad_norm": 0.8274710178375244, + "learning_rate": 1.4062039939466421e-08, + "loss": 2.0576, + "step": 29558 + }, + { + "epoch": 0.98, + "grad_norm": 0.7202733755111694, + "learning_rate": 1.4005748678554976e-08, + "loss": 1.9881, + "step": 29559 + }, + { + "epoch": 0.98, + "grad_norm": 0.7747653126716614, + "learning_rate": 1.3949570233553478e-08, + "loss": 2.0566, + "step": 29560 + }, + { + "epoch": 0.98, + "grad_norm": 0.7432642579078674, + "learning_rate": 1.3893504605098086e-08, + "loss": 2.0375, + "step": 29561 + }, + { + "epoch": 0.98, + "grad_norm": 0.7311941385269165, + "learning_rate": 1.3837551793821625e-08, + "loss": 2.0504, + "step": 29562 + }, + { + "epoch": 0.98, + "grad_norm": 0.7494640350341797, + "learning_rate": 1.3781711800355813e-08, + "loss": 2.0431, + "step": 29563 + }, + { + "epoch": 0.98, + "grad_norm": 0.7465858459472656, + "learning_rate": 1.3725984625332367e-08, + "loss": 2.0663, + "step": 29564 + }, + { + "epoch": 0.98, + "grad_norm": 0.7584214210510254, + "learning_rate": 1.3670370269378563e-08, + "loss": 1.9609, + "step": 29565 + }, + { + "epoch": 0.98, + "grad_norm": 0.7568184733390808, + "learning_rate": 1.3614868733125008e-08, + "loss": 2.0191, + "step": 29566 + }, + { + "epoch": 0.98, + "grad_norm": 0.7417550086975098, + "learning_rate": 1.3559480017198978e-08, + "loss": 2.0459, + "step": 29567 + }, + { + "epoch": 0.98, + "grad_norm": 0.7618486285209656, + "learning_rate": 1.3504204122224418e-08, + "loss": 2.1306, + "step": 29568 + }, + { + "epoch": 0.98, + "grad_norm": 0.7364776730537415, + "learning_rate": 1.3449041048827494e-08, + "loss": 2.0277, + "step": 29569 + }, + { + "epoch": 0.98, + "grad_norm": 0.7382826209068298, + "learning_rate": 1.339399079762993e-08, + "loss": 2.0747, + "step": 29570 + }, + { + "epoch": 0.98, + "grad_norm": 0.7807106971740723, + "learning_rate": 1.3339053369254562e-08, + "loss": 2.0417, + "step": 29571 + }, + { + "epoch": 0.98, + "grad_norm": 0.725170910358429, + "learning_rate": 1.3284228764320895e-08, + "loss": 2.1039, + "step": 29572 + }, + { + "epoch": 0.98, + "grad_norm": 0.7597686052322388, + "learning_rate": 1.3229516983450652e-08, + "loss": 2.0164, + "step": 29573 + }, + { + "epoch": 0.98, + "grad_norm": 0.739107608795166, + "learning_rate": 1.317491802726001e-08, + "loss": 2.0248, + "step": 29574 + }, + { + "epoch": 0.98, + "grad_norm": 0.7512032985687256, + "learning_rate": 1.3120431896366248e-08, + "loss": 1.9587, + "step": 29575 + }, + { + "epoch": 0.98, + "grad_norm": 0.7588019371032715, + "learning_rate": 1.3066058591384434e-08, + "loss": 2.1234, + "step": 29576 + }, + { + "epoch": 0.98, + "grad_norm": 0.7493727803230286, + "learning_rate": 1.301179811293074e-08, + "loss": 2.0889, + "step": 29577 + }, + { + "epoch": 0.98, + "grad_norm": 0.7338868975639343, + "learning_rate": 1.295765046161579e-08, + "loss": 1.9625, + "step": 29578 + }, + { + "epoch": 0.98, + "grad_norm": 0.7567895650863647, + "learning_rate": 1.2903615638051315e-08, + "loss": 2.0153, + "step": 29579 + }, + { + "epoch": 0.98, + "grad_norm": 0.7556126117706299, + "learning_rate": 1.2849693642850159e-08, + "loss": 2.0444, + "step": 29580 + }, + { + "epoch": 0.98, + "grad_norm": 0.7612341046333313, + "learning_rate": 1.2795884476619613e-08, + "loss": 2.0805, + "step": 29581 + }, + { + "epoch": 0.98, + "grad_norm": 0.7357383966445923, + "learning_rate": 1.2742188139969191e-08, + "loss": 2.0411, + "step": 29582 + }, + { + "epoch": 0.98, + "grad_norm": 0.7807154059410095, + "learning_rate": 1.2688604633503965e-08, + "loss": 2.0683, + "step": 29583 + }, + { + "epoch": 0.98, + "grad_norm": 0.7407559156417847, + "learning_rate": 1.2635133957829004e-08, + "loss": 2.0471, + "step": 29584 + }, + { + "epoch": 0.98, + "grad_norm": 0.7420535683631897, + "learning_rate": 1.2581776113549382e-08, + "loss": 2.0779, + "step": 29585 + }, + { + "epoch": 0.98, + "grad_norm": 0.7476972937583923, + "learning_rate": 1.2528531101267949e-08, + "loss": 2.0873, + "step": 29586 + }, + { + "epoch": 0.98, + "grad_norm": 0.7247859835624695, + "learning_rate": 1.2475398921585335e-08, + "loss": 2.0219, + "step": 29587 + }, + { + "epoch": 0.98, + "grad_norm": 0.7406803965568542, + "learning_rate": 1.2422379575102172e-08, + "loss": 1.9931, + "step": 29588 + }, + { + "epoch": 0.98, + "grad_norm": 0.7296877503395081, + "learning_rate": 1.236947306241909e-08, + "loss": 2.0133, + "step": 29589 + }, + { + "epoch": 0.98, + "grad_norm": 0.7596883177757263, + "learning_rate": 1.231667938413117e-08, + "loss": 1.9814, + "step": 29590 + }, + { + "epoch": 0.98, + "grad_norm": 0.7269588708877563, + "learning_rate": 1.226399854083682e-08, + "loss": 2.0812, + "step": 29591 + }, + { + "epoch": 0.98, + "grad_norm": 0.7719945311546326, + "learning_rate": 1.2211430533130009e-08, + "loss": 2.0562, + "step": 29592 + }, + { + "epoch": 0.98, + "grad_norm": 0.758465051651001, + "learning_rate": 1.2158975361604708e-08, + "loss": 2.0015, + "step": 29593 + }, + { + "epoch": 0.98, + "grad_norm": 0.7640414237976074, + "learning_rate": 1.2106633026854886e-08, + "loss": 2.0533, + "step": 29594 + }, + { + "epoch": 0.98, + "grad_norm": 0.7794142961502075, + "learning_rate": 1.205440352947007e-08, + "loss": 2.0934, + "step": 29595 + }, + { + "epoch": 0.98, + "grad_norm": 0.7244154810905457, + "learning_rate": 1.2002286870040902e-08, + "loss": 2.0317, + "step": 29596 + }, + { + "epoch": 0.98, + "grad_norm": 0.7503039240837097, + "learning_rate": 1.1950283049156907e-08, + "loss": 2.0845, + "step": 29597 + }, + { + "epoch": 0.98, + "grad_norm": 0.7408627271652222, + "learning_rate": 1.1898392067404286e-08, + "loss": 2.1072, + "step": 29598 + }, + { + "epoch": 0.98, + "grad_norm": 0.7340129613876343, + "learning_rate": 1.1846613925370342e-08, + "loss": 1.9957, + "step": 29599 + }, + { + "epoch": 0.98, + "grad_norm": 0.7448450922966003, + "learning_rate": 1.1794948623640168e-08, + "loss": 2.0431, + "step": 29600 + }, + { + "epoch": 0.98, + "grad_norm": 0.7366681694984436, + "learning_rate": 1.1743396162796627e-08, + "loss": 1.9557, + "step": 29601 + }, + { + "epoch": 0.98, + "grad_norm": 0.7510009407997131, + "learning_rate": 1.1691956543422589e-08, + "loss": 2.0743, + "step": 29602 + }, + { + "epoch": 0.98, + "grad_norm": 0.7619598507881165, + "learning_rate": 1.1640629766099808e-08, + "loss": 2.0006, + "step": 29603 + }, + { + "epoch": 0.98, + "grad_norm": 0.7773876190185547, + "learning_rate": 1.1589415831406713e-08, + "loss": 2.014, + "step": 29604 + }, + { + "epoch": 0.98, + "grad_norm": 0.784982442855835, + "learning_rate": 1.153831473992284e-08, + "loss": 2.0201, + "step": 29605 + }, + { + "epoch": 0.98, + "grad_norm": 0.7617422342300415, + "learning_rate": 1.1487326492224393e-08, + "loss": 2.0168, + "step": 29606 + }, + { + "epoch": 0.99, + "grad_norm": 0.7350152730941772, + "learning_rate": 1.14364510888898e-08, + "loss": 2.0355, + "step": 29607 + }, + { + "epoch": 0.99, + "grad_norm": 0.7190080285072327, + "learning_rate": 1.1385688530490824e-08, + "loss": 1.937, + "step": 29608 + }, + { + "epoch": 0.99, + "grad_norm": 0.7455457448959351, + "learning_rate": 1.1335038817603672e-08, + "loss": 2.0857, + "step": 29609 + }, + { + "epoch": 0.99, + "grad_norm": 0.7399818897247314, + "learning_rate": 1.1284501950798998e-08, + "loss": 2.0474, + "step": 29610 + }, + { + "epoch": 0.99, + "grad_norm": 0.7759206891059875, + "learning_rate": 1.1234077930647458e-08, + "loss": 2.0103, + "step": 29611 + }, + { + "epoch": 0.99, + "grad_norm": 0.7352486252784729, + "learning_rate": 1.1183766757719705e-08, + "loss": 2.0795, + "step": 29612 + }, + { + "epoch": 0.99, + "grad_norm": 0.7422617673873901, + "learning_rate": 1.1133568432584175e-08, + "loss": 2.0318, + "step": 29613 + }, + { + "epoch": 0.99, + "grad_norm": 0.7562263011932373, + "learning_rate": 1.1083482955805969e-08, + "loss": 2.0602, + "step": 29614 + }, + { + "epoch": 0.99, + "grad_norm": 0.7274857759475708, + "learning_rate": 1.1033510327954632e-08, + "loss": 2.0917, + "step": 29615 + }, + { + "epoch": 0.99, + "grad_norm": 0.7442124485969543, + "learning_rate": 1.098365054959083e-08, + "loss": 2.0546, + "step": 29616 + }, + { + "epoch": 0.99, + "grad_norm": 0.7492870092391968, + "learning_rate": 1.0933903621280773e-08, + "loss": 2.0097, + "step": 29617 + }, + { + "epoch": 0.99, + "grad_norm": 0.7204904556274414, + "learning_rate": 1.0884269543585124e-08, + "loss": 1.9993, + "step": 29618 + }, + { + "epoch": 0.99, + "grad_norm": 0.7400587201118469, + "learning_rate": 1.0834748317064547e-08, + "loss": 2.1099, + "step": 29619 + }, + { + "epoch": 0.99, + "grad_norm": 0.7545045614242554, + "learning_rate": 1.0785339942278594e-08, + "loss": 2.0918, + "step": 29620 + }, + { + "epoch": 0.99, + "grad_norm": 0.7184118032455444, + "learning_rate": 1.0736044419785707e-08, + "loss": 2.0128, + "step": 29621 + }, + { + "epoch": 0.99, + "grad_norm": 0.7397406697273254, + "learning_rate": 1.0686861750142108e-08, + "loss": 2.0585, + "step": 29622 + }, + { + "epoch": 0.99, + "grad_norm": 0.7237058281898499, + "learning_rate": 1.0637791933905128e-08, + "loss": 2.0502, + "step": 29623 + }, + { + "epoch": 0.99, + "grad_norm": 0.7550670504570007, + "learning_rate": 1.0588834971627659e-08, + "loss": 2.0742, + "step": 29624 + }, + { + "epoch": 0.99, + "grad_norm": 0.7255280017852783, + "learning_rate": 1.0539990863862592e-08, + "loss": 2.0403, + "step": 29625 + }, + { + "epoch": 0.99, + "grad_norm": 0.7502086162567139, + "learning_rate": 1.0491259611162818e-08, + "loss": 2.0854, + "step": 29626 + }, + { + "epoch": 0.99, + "grad_norm": 0.7208864688873291, + "learning_rate": 1.0442641214079007e-08, + "loss": 1.9534, + "step": 29627 + }, + { + "epoch": 0.99, + "grad_norm": 0.7577435374259949, + "learning_rate": 1.0394135673158501e-08, + "loss": 2.0205, + "step": 29628 + }, + { + "epoch": 0.99, + "grad_norm": 0.7699874043464661, + "learning_rate": 1.0345742988951969e-08, + "loss": 2.0415, + "step": 29629 + }, + { + "epoch": 0.99, + "grad_norm": 0.7290918231010437, + "learning_rate": 1.0297463162003419e-08, + "loss": 2.1093, + "step": 29630 + }, + { + "epoch": 0.99, + "grad_norm": 0.7549092769622803, + "learning_rate": 1.0249296192860192e-08, + "loss": 2.0528, + "step": 29631 + }, + { + "epoch": 0.99, + "grad_norm": 0.784666895866394, + "learning_rate": 1.0201242082066298e-08, + "loss": 2.0729, + "step": 29632 + }, + { + "epoch": 0.99, + "grad_norm": 0.7704221606254578, + "learning_rate": 1.0153300830163526e-08, + "loss": 2.0253, + "step": 29633 + }, + { + "epoch": 0.99, + "grad_norm": 0.7227026224136353, + "learning_rate": 1.0105472437693664e-08, + "loss": 2.04, + "step": 29634 + }, + { + "epoch": 0.99, + "grad_norm": 0.7599170804023743, + "learning_rate": 1.0057756905198501e-08, + "loss": 1.9933, + "step": 29635 + }, + { + "epoch": 0.99, + "grad_norm": 0.7635394334793091, + "learning_rate": 1.0010154233215386e-08, + "loss": 2.0861, + "step": 29636 + }, + { + "epoch": 0.99, + "grad_norm": 0.7088033556938171, + "learning_rate": 9.962664422283885e-09, + "loss": 2.0271, + "step": 29637 + }, + { + "epoch": 0.99, + "grad_norm": 0.7389460802078247, + "learning_rate": 9.915287472938017e-09, + "loss": 1.9971, + "step": 29638 + }, + { + "epoch": 0.99, + "grad_norm": 0.7813568115234375, + "learning_rate": 9.86802338571624e-09, + "loss": 2.0412, + "step": 29639 + }, + { + "epoch": 0.99, + "grad_norm": 0.7510318160057068, + "learning_rate": 9.820872161149242e-09, + "loss": 2.0311, + "step": 29640 + }, + { + "epoch": 0.99, + "grad_norm": 0.7509331703186035, + "learning_rate": 9.773833799772148e-09, + "loss": 2.0148, + "step": 29641 + }, + { + "epoch": 0.99, + "grad_norm": 0.7266464233398438, + "learning_rate": 9.726908302114536e-09, + "loss": 2.0278, + "step": 29642 + }, + { + "epoch": 0.99, + "grad_norm": 0.738103985786438, + "learning_rate": 9.680095668708201e-09, + "loss": 2.0379, + "step": 29643 + }, + { + "epoch": 0.99, + "grad_norm": 0.718728244304657, + "learning_rate": 9.6333959000805e-09, + "loss": 2.0861, + "step": 29644 + }, + { + "epoch": 0.99, + "grad_norm": 0.7732469439506531, + "learning_rate": 9.5868089967599e-09, + "loss": 2.0609, + "step": 29645 + }, + { + "epoch": 0.99, + "grad_norm": 0.7301507592201233, + "learning_rate": 9.540334959273757e-09, + "loss": 2.0737, + "step": 29646 + }, + { + "epoch": 0.99, + "grad_norm": 0.7596578598022461, + "learning_rate": 9.493973788144984e-09, + "loss": 2.051, + "step": 29647 + }, + { + "epoch": 0.99, + "grad_norm": 0.7596074342727661, + "learning_rate": 9.447725483897607e-09, + "loss": 2.1161, + "step": 29648 + }, + { + "epoch": 0.99, + "grad_norm": 0.7364417910575867, + "learning_rate": 9.401590047055654e-09, + "loss": 2.0431, + "step": 29649 + }, + { + "epoch": 0.99, + "grad_norm": 0.7456708550453186, + "learning_rate": 9.355567478139816e-09, + "loss": 2.0127, + "step": 29650 + }, + { + "epoch": 0.99, + "grad_norm": 0.7360016703605652, + "learning_rate": 9.309657777668567e-09, + "loss": 2.0113, + "step": 29651 + }, + { + "epoch": 0.99, + "grad_norm": 0.7269750833511353, + "learning_rate": 9.263860946162606e-09, + "loss": 2.0153, + "step": 29652 + }, + { + "epoch": 0.99, + "grad_norm": 0.7294180393218994, + "learning_rate": 9.218176984138183e-09, + "loss": 2.0944, + "step": 29653 + }, + { + "epoch": 0.99, + "grad_norm": 0.7614117860794067, + "learning_rate": 9.172605892111553e-09, + "loss": 1.993, + "step": 29654 + }, + { + "epoch": 0.99, + "grad_norm": 0.7454225420951843, + "learning_rate": 9.127147670597857e-09, + "loss": 2.0419, + "step": 29655 + }, + { + "epoch": 0.99, + "grad_norm": 0.7067463994026184, + "learning_rate": 9.081802320110023e-09, + "loss": 2.0469, + "step": 29656 + }, + { + "epoch": 0.99, + "grad_norm": 0.7330135107040405, + "learning_rate": 9.036569841160969e-09, + "loss": 1.9889, + "step": 29657 + }, + { + "epoch": 0.99, + "grad_norm": 0.7079134583473206, + "learning_rate": 8.991450234261401e-09, + "loss": 2.0369, + "step": 29658 + }, + { + "epoch": 0.99, + "grad_norm": 0.7251440286636353, + "learning_rate": 8.946443499920909e-09, + "loss": 1.9958, + "step": 29659 + }, + { + "epoch": 0.99, + "grad_norm": 0.7601599097251892, + "learning_rate": 8.901549638649087e-09, + "loss": 2.0143, + "step": 29660 + }, + { + "epoch": 0.99, + "grad_norm": 0.7717041373252869, + "learning_rate": 8.856768650951087e-09, + "loss": 2.0537, + "step": 29661 + }, + { + "epoch": 0.99, + "grad_norm": 0.7553925514221191, + "learning_rate": 8.812100537334278e-09, + "loss": 2.0493, + "step": 29662 + }, + { + "epoch": 0.99, + "grad_norm": 0.7448487877845764, + "learning_rate": 8.767545298302705e-09, + "loss": 2.091, + "step": 29663 + }, + { + "epoch": 0.99, + "grad_norm": 0.771101713180542, + "learning_rate": 8.723102934360405e-09, + "loss": 2.0883, + "step": 29664 + }, + { + "epoch": 0.99, + "grad_norm": 0.7331533432006836, + "learning_rate": 8.678773446009204e-09, + "loss": 2.0631, + "step": 29665 + }, + { + "epoch": 0.99, + "grad_norm": 0.7572809457778931, + "learning_rate": 8.634556833749808e-09, + "loss": 2.028, + "step": 29666 + }, + { + "epoch": 0.99, + "grad_norm": 0.7649959325790405, + "learning_rate": 8.59045309808071e-09, + "loss": 2.0588, + "step": 29667 + }, + { + "epoch": 0.99, + "grad_norm": 0.7431898713111877, + "learning_rate": 8.546462239501507e-09, + "loss": 2.0215, + "step": 29668 + }, + { + "epoch": 0.99, + "grad_norm": 0.7499906420707703, + "learning_rate": 8.502584258509583e-09, + "loss": 2.0637, + "step": 29669 + }, + { + "epoch": 0.99, + "grad_norm": 0.7345190644264221, + "learning_rate": 8.458819155598985e-09, + "loss": 2.0044, + "step": 29670 + }, + { + "epoch": 0.99, + "grad_norm": 0.7668816447257996, + "learning_rate": 8.415166931264873e-09, + "loss": 2.0766, + "step": 29671 + }, + { + "epoch": 0.99, + "grad_norm": 0.724146842956543, + "learning_rate": 8.371627586001296e-09, + "loss": 2.0674, + "step": 29672 + }, + { + "epoch": 0.99, + "grad_norm": 0.7534029483795166, + "learning_rate": 8.328201120298974e-09, + "loss": 2.053, + "step": 29673 + }, + { + "epoch": 0.99, + "grad_norm": 0.7470707297325134, + "learning_rate": 8.284887534649732e-09, + "loss": 2.0973, + "step": 29674 + }, + { + "epoch": 0.99, + "grad_norm": 0.7218387126922607, + "learning_rate": 8.241686829540962e-09, + "loss": 2.0382, + "step": 29675 + }, + { + "epoch": 0.99, + "grad_norm": 0.743836522102356, + "learning_rate": 8.198599005462271e-09, + "loss": 2.0343, + "step": 29676 + }, + { + "epoch": 0.99, + "grad_norm": 0.7407042980194092, + "learning_rate": 8.155624062899937e-09, + "loss": 1.9875, + "step": 29677 + }, + { + "epoch": 0.99, + "grad_norm": 0.7585245966911316, + "learning_rate": 8.112762002340236e-09, + "loss": 2.0656, + "step": 29678 + }, + { + "epoch": 0.99, + "grad_norm": 0.7230626940727234, + "learning_rate": 8.070012824266115e-09, + "loss": 2.037, + "step": 29679 + }, + { + "epoch": 0.99, + "grad_norm": 0.7681514024734497, + "learning_rate": 8.027376529161634e-09, + "loss": 2.0645, + "step": 29680 + }, + { + "epoch": 0.99, + "grad_norm": 0.7748247981071472, + "learning_rate": 7.984853117507519e-09, + "loss": 1.9916, + "step": 29681 + }, + { + "epoch": 0.99, + "grad_norm": 0.7601355910301208, + "learning_rate": 7.942442589784493e-09, + "loss": 2.0704, + "step": 29682 + }, + { + "epoch": 0.99, + "grad_norm": 0.7219997644424438, + "learning_rate": 7.900144946472178e-09, + "loss": 2.0289, + "step": 29683 + }, + { + "epoch": 0.99, + "grad_norm": 0.7585458159446716, + "learning_rate": 7.857960188047964e-09, + "loss": 2.0793, + "step": 29684 + }, + { + "epoch": 0.99, + "grad_norm": 0.7438634037971497, + "learning_rate": 7.81588831498814e-09, + "loss": 2.1122, + "step": 29685 + }, + { + "epoch": 0.99, + "grad_norm": 0.7371315956115723, + "learning_rate": 7.773929327768992e-09, + "loss": 2.0037, + "step": 29686 + }, + { + "epoch": 0.99, + "grad_norm": 0.7867748737335205, + "learning_rate": 7.732083226864584e-09, + "loss": 2.0982, + "step": 29687 + }, + { + "epoch": 0.99, + "grad_norm": 0.7369576096534729, + "learning_rate": 7.690350012745651e-09, + "loss": 2.0421, + "step": 29688 + }, + { + "epoch": 0.99, + "grad_norm": 0.7357144951820374, + "learning_rate": 7.648729685886258e-09, + "loss": 2.0333, + "step": 29689 + }, + { + "epoch": 0.99, + "grad_norm": 0.7184031009674072, + "learning_rate": 7.60722224675492e-09, + "loss": 2.0271, + "step": 29690 + }, + { + "epoch": 0.99, + "grad_norm": 0.7642199993133545, + "learning_rate": 7.56582769582015e-09, + "loss": 2.0271, + "step": 29691 + }, + { + "epoch": 0.99, + "grad_norm": 0.7612342238426208, + "learning_rate": 7.524546033551573e-09, + "loss": 2.1027, + "step": 29692 + }, + { + "epoch": 0.99, + "grad_norm": 0.7427971959114075, + "learning_rate": 7.483377260414371e-09, + "loss": 2.0451, + "step": 29693 + }, + { + "epoch": 0.99, + "grad_norm": 0.7498494386672974, + "learning_rate": 7.442321376873729e-09, + "loss": 2.0547, + "step": 29694 + }, + { + "epoch": 0.99, + "grad_norm": 0.7374542951583862, + "learning_rate": 7.40137838339372e-09, + "loss": 2.0807, + "step": 29695 + }, + { + "epoch": 0.99, + "grad_norm": 0.7531236410140991, + "learning_rate": 7.360548280437307e-09, + "loss": 2.0744, + "step": 29696 + }, + { + "epoch": 0.99, + "grad_norm": 0.7319228649139404, + "learning_rate": 7.3198310684641224e-09, + "loss": 2.0814, + "step": 29697 + }, + { + "epoch": 0.99, + "grad_norm": 0.7377665042877197, + "learning_rate": 7.2792267479349085e-09, + "loss": 2.0492, + "step": 29698 + }, + { + "epoch": 0.99, + "grad_norm": 0.7268849611282349, + "learning_rate": 7.2387353193092976e-09, + "loss": 2.0281, + "step": 29699 + }, + { + "epoch": 0.99, + "grad_norm": 0.7611731290817261, + "learning_rate": 7.198356783044702e-09, + "loss": 2.056, + "step": 29700 + }, + { + "epoch": 0.99, + "grad_norm": 0.7511168122291565, + "learning_rate": 7.158091139595202e-09, + "loss": 2.0307, + "step": 29701 + }, + { + "epoch": 0.99, + "grad_norm": 0.7155812382698059, + "learning_rate": 7.117938389418211e-09, + "loss": 2.0492, + "step": 29702 + }, + { + "epoch": 0.99, + "grad_norm": 0.7363794445991516, + "learning_rate": 7.077898532966698e-09, + "loss": 2.0766, + "step": 29703 + }, + { + "epoch": 0.99, + "grad_norm": 0.745597243309021, + "learning_rate": 7.037971570692526e-09, + "loss": 2.0093, + "step": 29704 + }, + { + "epoch": 0.99, + "grad_norm": 0.7444412708282471, + "learning_rate": 6.998157503046443e-09, + "loss": 2.0103, + "step": 29705 + }, + { + "epoch": 0.99, + "grad_norm": 0.7229227423667908, + "learning_rate": 6.958456330478092e-09, + "loss": 2.0341, + "step": 29706 + }, + { + "epoch": 0.99, + "grad_norm": 0.7718050479888916, + "learning_rate": 6.918868053437111e-09, + "loss": 2.0436, + "step": 29707 + }, + { + "epoch": 0.99, + "grad_norm": 0.72885662317276, + "learning_rate": 6.879392672370921e-09, + "loss": 1.9681, + "step": 29708 + }, + { + "epoch": 0.99, + "grad_norm": 0.7438578605651855, + "learning_rate": 6.8400301877247225e-09, + "loss": 2.0896, + "step": 29709 + }, + { + "epoch": 0.99, + "grad_norm": 0.7355804443359375, + "learning_rate": 6.800780599942602e-09, + "loss": 2.0462, + "step": 29710 + }, + { + "epoch": 0.99, + "grad_norm": 0.733769416809082, + "learning_rate": 6.7616439094686515e-09, + "loss": 2.0807, + "step": 29711 + }, + { + "epoch": 0.99, + "grad_norm": 0.7588351964950562, + "learning_rate": 6.7226201167458485e-09, + "loss": 2.0371, + "step": 29712 + }, + { + "epoch": 0.99, + "grad_norm": 0.7769057750701904, + "learning_rate": 6.6837092222138415e-09, + "loss": 1.9914, + "step": 29713 + }, + { + "epoch": 0.99, + "grad_norm": 0.7687263488769531, + "learning_rate": 6.6449112263122786e-09, + "loss": 2.0593, + "step": 29714 + }, + { + "epoch": 0.99, + "grad_norm": 0.7562466263771057, + "learning_rate": 6.606226129479698e-09, + "loss": 2.0619, + "step": 29715 + }, + { + "epoch": 0.99, + "grad_norm": 0.7476504445075989, + "learning_rate": 6.567653932154638e-09, + "loss": 2.0715, + "step": 29716 + }, + { + "epoch": 0.99, + "grad_norm": 0.7379528880119324, + "learning_rate": 6.529194634770086e-09, + "loss": 2.0215, + "step": 29717 + }, + { + "epoch": 0.99, + "grad_norm": 0.7403675317764282, + "learning_rate": 6.49084823776236e-09, + "loss": 2.0343, + "step": 29718 + }, + { + "epoch": 0.99, + "grad_norm": 0.7936694622039795, + "learning_rate": 6.452614741564445e-09, + "loss": 2.0803, + "step": 29719 + }, + { + "epoch": 0.99, + "grad_norm": 0.7684759497642517, + "learning_rate": 6.414494146608219e-09, + "loss": 2.1022, + "step": 29720 + }, + { + "epoch": 0.99, + "grad_norm": 0.7687221765518188, + "learning_rate": 6.376486453323339e-09, + "loss": 2.0353, + "step": 29721 + }, + { + "epoch": 0.99, + "grad_norm": 0.7182919383049011, + "learning_rate": 6.33859166214057e-09, + "loss": 2.0384, + "step": 29722 + }, + { + "epoch": 0.99, + "grad_norm": 0.7458496689796448, + "learning_rate": 6.300809773488459e-09, + "loss": 2.0143, + "step": 29723 + }, + { + "epoch": 0.99, + "grad_norm": 0.7239437103271484, + "learning_rate": 6.263140787791111e-09, + "loss": 2.0606, + "step": 29724 + }, + { + "epoch": 0.99, + "grad_norm": 0.7361354231834412, + "learning_rate": 6.225584705477073e-09, + "loss": 2.0772, + "step": 29725 + }, + { + "epoch": 0.99, + "grad_norm": 0.75112384557724, + "learning_rate": 6.188141526969338e-09, + "loss": 2.1305, + "step": 29726 + }, + { + "epoch": 0.99, + "grad_norm": 0.7456130385398865, + "learning_rate": 6.150811252690902e-09, + "loss": 2.0345, + "step": 29727 + }, + { + "epoch": 0.99, + "grad_norm": 0.7336434721946716, + "learning_rate": 6.113593883063651e-09, + "loss": 2.0786, + "step": 29728 + }, + { + "epoch": 0.99, + "grad_norm": 0.7628545165061951, + "learning_rate": 6.076489418508358e-09, + "loss": 2.065, + "step": 29729 + }, + { + "epoch": 0.99, + "grad_norm": 0.7467114925384521, + "learning_rate": 6.039497859443577e-09, + "loss": 2.1038, + "step": 29730 + }, + { + "epoch": 0.99, + "grad_norm": 0.7414436340332031, + "learning_rate": 6.002619206287863e-09, + "loss": 2.0947, + "step": 29731 + }, + { + "epoch": 0.99, + "grad_norm": 0.7386296391487122, + "learning_rate": 5.965853459456439e-09, + "loss": 1.9619, + "step": 29732 + }, + { + "epoch": 0.99, + "grad_norm": 0.7349427938461304, + "learning_rate": 5.929200619366748e-09, + "loss": 2.0399, + "step": 29733 + }, + { + "epoch": 0.99, + "grad_norm": 0.7421636581420898, + "learning_rate": 5.892660686431795e-09, + "loss": 2.0488, + "step": 29734 + }, + { + "epoch": 0.99, + "grad_norm": 0.741570770740509, + "learning_rate": 5.856233661063471e-09, + "loss": 2.0301, + "step": 29735 + }, + { + "epoch": 0.99, + "grad_norm": 0.7685263156890869, + "learning_rate": 5.819919543674779e-09, + "loss": 2.059, + "step": 29736 + }, + { + "epoch": 0.99, + "grad_norm": 0.7527056932449341, + "learning_rate": 5.7837183346765026e-09, + "loss": 2.0603, + "step": 29737 + }, + { + "epoch": 0.99, + "grad_norm": 0.730739176273346, + "learning_rate": 5.747630034474983e-09, + "loss": 2.0735, + "step": 29738 + }, + { + "epoch": 0.99, + "grad_norm": 0.7473884224891663, + "learning_rate": 5.711654643481001e-09, + "loss": 2.0478, + "step": 29739 + }, + { + "epoch": 0.99, + "grad_norm": 0.7674278616905212, + "learning_rate": 5.675792162098681e-09, + "loss": 2.0496, + "step": 29740 + }, + { + "epoch": 0.99, + "grad_norm": 0.7264618873596191, + "learning_rate": 5.640042590734363e-09, + "loss": 2.0238, + "step": 29741 + }, + { + "epoch": 0.99, + "grad_norm": 0.7265802025794983, + "learning_rate": 5.604405929791057e-09, + "loss": 1.9846, + "step": 29742 + }, + { + "epoch": 0.99, + "grad_norm": 0.7242604494094849, + "learning_rate": 5.568882179671775e-09, + "loss": 2.0438, + "step": 29743 + }, + { + "epoch": 0.99, + "grad_norm": 0.7134151458740234, + "learning_rate": 5.533471340778418e-09, + "loss": 1.9752, + "step": 29744 + }, + { + "epoch": 0.99, + "grad_norm": 0.7598056197166443, + "learning_rate": 5.498173413510666e-09, + "loss": 2.0222, + "step": 29745 + }, + { + "epoch": 0.99, + "grad_norm": 0.7902846932411194, + "learning_rate": 5.462988398267088e-09, + "loss": 2.0956, + "step": 29746 + }, + { + "epoch": 0.99, + "grad_norm": 0.7384594082832336, + "learning_rate": 5.427916295445146e-09, + "loss": 2.0823, + "step": 29747 + }, + { + "epoch": 0.99, + "grad_norm": 0.7374424934387207, + "learning_rate": 5.392957105441188e-09, + "loss": 2.0575, + "step": 29748 + }, + { + "epoch": 0.99, + "grad_norm": 0.7300637364387512, + "learning_rate": 5.358110828650453e-09, + "loss": 2.0639, + "step": 29749 + }, + { + "epoch": 0.99, + "grad_norm": 0.7577400803565979, + "learning_rate": 5.3233774654670726e-09, + "loss": 1.9891, + "step": 29750 + }, + { + "epoch": 0.99, + "grad_norm": 0.7438801527023315, + "learning_rate": 5.288757016280732e-09, + "loss": 2.0265, + "step": 29751 + }, + { + "epoch": 0.99, + "grad_norm": 0.7334945797920227, + "learning_rate": 5.2542494814855625e-09, + "loss": 2.0082, + "step": 29752 + }, + { + "epoch": 0.99, + "grad_norm": 0.7470763921737671, + "learning_rate": 5.219854861471252e-09, + "loss": 2.0531, + "step": 29753 + }, + { + "epoch": 0.99, + "grad_norm": 0.7655604481697083, + "learning_rate": 5.185573156624157e-09, + "loss": 2.0685, + "step": 29754 + }, + { + "epoch": 0.99, + "grad_norm": 0.7120406627655029, + "learning_rate": 5.1514043673339676e-09, + "loss": 2.0826, + "step": 29755 + }, + { + "epoch": 0.99, + "grad_norm": 0.7336919903755188, + "learning_rate": 5.117348493984819e-09, + "loss": 2.0048, + "step": 29756 + }, + { + "epoch": 0.99, + "grad_norm": 0.7167341709136963, + "learning_rate": 5.083405536963071e-09, + "loss": 2.0906, + "step": 29757 + }, + { + "epoch": 0.99, + "grad_norm": 0.719492495059967, + "learning_rate": 5.049575496650638e-09, + "loss": 2.0015, + "step": 29758 + }, + { + "epoch": 0.99, + "grad_norm": 0.7418146729469299, + "learning_rate": 5.015858373430549e-09, + "loss": 2.0247, + "step": 29759 + }, + { + "epoch": 0.99, + "grad_norm": 0.7380086779594421, + "learning_rate": 4.982254167684719e-09, + "loss": 1.989, + "step": 29760 + }, + { + "epoch": 0.99, + "grad_norm": 0.7662522196769714, + "learning_rate": 4.948762879790625e-09, + "loss": 2.0487, + "step": 29761 + }, + { + "epoch": 0.99, + "grad_norm": 0.7406746745109558, + "learning_rate": 4.915384510127963e-09, + "loss": 2.0882, + "step": 29762 + }, + { + "epoch": 0.99, + "grad_norm": 0.7317564487457275, + "learning_rate": 4.882119059074209e-09, + "loss": 2.0608, + "step": 29763 + }, + { + "epoch": 0.99, + "grad_norm": 0.750950038433075, + "learning_rate": 4.8489665270035065e-09, + "loss": 2.0467, + "step": 29764 + }, + { + "epoch": 0.99, + "grad_norm": 0.7487965226173401, + "learning_rate": 4.815926914292224e-09, + "loss": 1.9789, + "step": 29765 + }, + { + "epoch": 0.99, + "grad_norm": 0.7353529930114746, + "learning_rate": 4.783000221312284e-09, + "loss": 2.0593, + "step": 29766 + }, + { + "epoch": 0.99, + "grad_norm": 0.764518678188324, + "learning_rate": 4.750186448436722e-09, + "loss": 2.0306, + "step": 29767 + }, + { + "epoch": 0.99, + "grad_norm": 0.7514039874076843, + "learning_rate": 4.7174855960352425e-09, + "loss": 2.1046, + "step": 29768 + }, + { + "epoch": 0.99, + "grad_norm": 0.7701081037521362, + "learning_rate": 4.68489766447866e-09, + "loss": 2.0319, + "step": 29769 + }, + { + "epoch": 0.99, + "grad_norm": 0.7539198994636536, + "learning_rate": 4.652422654133349e-09, + "loss": 2.0326, + "step": 29770 + }, + { + "epoch": 0.99, + "grad_norm": 0.7633764743804932, + "learning_rate": 4.620060565367901e-09, + "loss": 2.0207, + "step": 29771 + }, + { + "epoch": 0.99, + "grad_norm": 0.7434589862823486, + "learning_rate": 4.587811398546471e-09, + "loss": 2.0601, + "step": 29772 + }, + { + "epoch": 0.99, + "grad_norm": 0.7705041170120239, + "learning_rate": 4.555675154034322e-09, + "loss": 2.0045, + "step": 29773 + }, + { + "epoch": 0.99, + "grad_norm": 0.7179151177406311, + "learning_rate": 4.523651832194498e-09, + "loss": 1.9183, + "step": 29774 + }, + { + "epoch": 0.99, + "grad_norm": 0.7549268007278442, + "learning_rate": 4.49174143338893e-09, + "loss": 2.1068, + "step": 29775 + }, + { + "epoch": 0.99, + "grad_norm": 0.7398304343223572, + "learning_rate": 4.459943957976221e-09, + "loss": 2.033, + "step": 29776 + }, + { + "epoch": 0.99, + "grad_norm": 0.7293802499771118, + "learning_rate": 4.4282594063183025e-09, + "loss": 2.0089, + "step": 29777 + }, + { + "epoch": 0.99, + "grad_norm": 0.7181735038757324, + "learning_rate": 4.396687778771558e-09, + "loss": 2.0224, + "step": 29778 + }, + { + "epoch": 0.99, + "grad_norm": 0.7577188014984131, + "learning_rate": 4.365229075693478e-09, + "loss": 2.1095, + "step": 29779 + }, + { + "epoch": 0.99, + "grad_norm": 0.7307273745536804, + "learning_rate": 4.333883297438224e-09, + "loss": 2.0738, + "step": 29780 + }, + { + "epoch": 0.99, + "grad_norm": 0.7482345104217529, + "learning_rate": 4.3026504443610675e-09, + "loss": 2.0877, + "step": 29781 + }, + { + "epoch": 0.99, + "grad_norm": 0.7417686581611633, + "learning_rate": 4.271530516815059e-09, + "loss": 2.0988, + "step": 29782 + }, + { + "epoch": 0.99, + "grad_norm": 0.7734464406967163, + "learning_rate": 4.24052351515103e-09, + "loss": 2.0461, + "step": 29783 + }, + { + "epoch": 0.99, + "grad_norm": 0.7529640197753906, + "learning_rate": 4.209629439718699e-09, + "loss": 2.0654, + "step": 29784 + }, + { + "epoch": 0.99, + "grad_norm": 0.7443147897720337, + "learning_rate": 4.178848290868897e-09, + "loss": 2.0289, + "step": 29785 + }, + { + "epoch": 0.99, + "grad_norm": 0.7400572896003723, + "learning_rate": 4.148180068946906e-09, + "loss": 1.9984, + "step": 29786 + }, + { + "epoch": 0.99, + "grad_norm": 0.7442275285720825, + "learning_rate": 4.117624774302442e-09, + "loss": 2.0802, + "step": 29787 + }, + { + "epoch": 0.99, + "grad_norm": 0.7369182705879211, + "learning_rate": 4.087182407277457e-09, + "loss": 1.9597, + "step": 29788 + }, + { + "epoch": 0.99, + "grad_norm": 0.7372701168060303, + "learning_rate": 4.0568529682183385e-09, + "loss": 2.0218, + "step": 29789 + }, + { + "epoch": 0.99, + "grad_norm": 0.7393997311592102, + "learning_rate": 4.0266364574659264e-09, + "loss": 2.0666, + "step": 29790 + }, + { + "epoch": 0.99, + "grad_norm": 0.7620957493782043, + "learning_rate": 3.996532875362169e-09, + "loss": 2.0768, + "step": 29791 + }, + { + "epoch": 0.99, + "grad_norm": 0.7546300888061523, + "learning_rate": 3.966542222247904e-09, + "loss": 2.1012, + "step": 29792 + }, + { + "epoch": 0.99, + "grad_norm": 0.7544488906860352, + "learning_rate": 3.936664498461751e-09, + "loss": 1.9908, + "step": 29793 + }, + { + "epoch": 0.99, + "grad_norm": 0.7754558324813843, + "learning_rate": 3.906899704340106e-09, + "loss": 2.0459, + "step": 29794 + }, + { + "epoch": 0.99, + "grad_norm": 0.7280136346817017, + "learning_rate": 3.877247840220477e-09, + "loss": 1.9975, + "step": 29795 + }, + { + "epoch": 0.99, + "grad_norm": 0.7703222036361694, + "learning_rate": 3.847708906437042e-09, + "loss": 1.9957, + "step": 29796 + }, + { + "epoch": 0.99, + "grad_norm": 0.8217587471008301, + "learning_rate": 3.818282903323978e-09, + "loss": 2.0223, + "step": 29797 + }, + { + "epoch": 0.99, + "grad_norm": 0.7617921829223633, + "learning_rate": 3.788969831214351e-09, + "loss": 2.0872, + "step": 29798 + }, + { + "epoch": 0.99, + "grad_norm": 0.7546753287315369, + "learning_rate": 3.759769690437898e-09, + "loss": 2.0639, + "step": 29799 + }, + { + "epoch": 0.99, + "grad_norm": 0.734090268611908, + "learning_rate": 3.730682481325465e-09, + "loss": 2.1197, + "step": 29800 + }, + { + "epoch": 0.99, + "grad_norm": 0.7444719076156616, + "learning_rate": 3.701708204204568e-09, + "loss": 2.0151, + "step": 29801 + }, + { + "epoch": 0.99, + "grad_norm": 0.759628415107727, + "learning_rate": 3.6728468594038334e-09, + "loss": 2.0119, + "step": 29802 + }, + { + "epoch": 0.99, + "grad_norm": 0.746938169002533, + "learning_rate": 3.6440984472496664e-09, + "loss": 2.0614, + "step": 29803 + }, + { + "epoch": 0.99, + "grad_norm": 0.7536671161651611, + "learning_rate": 3.615462968065142e-09, + "loss": 1.9821, + "step": 29804 + }, + { + "epoch": 0.99, + "grad_norm": 0.7402051687240601, + "learning_rate": 3.5869404221755556e-09, + "loss": 1.9341, + "step": 29805 + }, + { + "epoch": 0.99, + "grad_norm": 0.7187182307243347, + "learning_rate": 3.558530809900651e-09, + "loss": 2.0193, + "step": 29806 + }, + { + "epoch": 0.99, + "grad_norm": 0.7408974766731262, + "learning_rate": 3.530234131564614e-09, + "loss": 2.048, + "step": 29807 + }, + { + "epoch": 0.99, + "grad_norm": 0.7606223821640015, + "learning_rate": 3.5020503874849675e-09, + "loss": 2.0572, + "step": 29808 + }, + { + "epoch": 0.99, + "grad_norm": 0.76725834608078, + "learning_rate": 3.4739795779803465e-09, + "loss": 2.0642, + "step": 29809 + }, + { + "epoch": 0.99, + "grad_norm": 0.7686064839363098, + "learning_rate": 3.4460217033682743e-09, + "loss": 2.0617, + "step": 29810 + }, + { + "epoch": 0.99, + "grad_norm": 0.7634626626968384, + "learning_rate": 3.418176763964054e-09, + "loss": 2.0822, + "step": 29811 + }, + { + "epoch": 0.99, + "grad_norm": 0.7247858643531799, + "learning_rate": 3.3904447600829892e-09, + "loss": 2.0412, + "step": 29812 + }, + { + "epoch": 0.99, + "grad_norm": 0.7405315041542053, + "learning_rate": 3.362825692038163e-09, + "loss": 1.9816, + "step": 29813 + }, + { + "epoch": 0.99, + "grad_norm": 0.7717000246047974, + "learning_rate": 3.3353195601415477e-09, + "loss": 2.078, + "step": 29814 + }, + { + "epoch": 0.99, + "grad_norm": 0.7307572364807129, + "learning_rate": 3.3079263647040063e-09, + "loss": 2.0309, + "step": 29815 + }, + { + "epoch": 0.99, + "grad_norm": 0.7386662364006042, + "learning_rate": 3.28064610603529e-09, + "loss": 1.998, + "step": 29816 + }, + { + "epoch": 0.99, + "grad_norm": 0.763184666633606, + "learning_rate": 3.2534787844429316e-09, + "loss": 2.0431, + "step": 29817 + }, + { + "epoch": 0.99, + "grad_norm": 0.7335972785949707, + "learning_rate": 3.226424400233352e-09, + "loss": 2.0647, + "step": 29818 + }, + { + "epoch": 0.99, + "grad_norm": 0.7244024872779846, + "learning_rate": 3.199482953714084e-09, + "loss": 2.0443, + "step": 29819 + }, + { + "epoch": 0.99, + "grad_norm": 0.7186897397041321, + "learning_rate": 3.1726544451871067e-09, + "loss": 2.0021, + "step": 29820 + }, + { + "epoch": 0.99, + "grad_norm": 0.712941586971283, + "learning_rate": 3.1459388749577324e-09, + "loss": 2.0007, + "step": 29821 + }, + { + "epoch": 0.99, + "grad_norm": 0.7409473061561584, + "learning_rate": 3.1193362433257213e-09, + "loss": 2.04, + "step": 29822 + }, + { + "epoch": 0.99, + "grad_norm": 0.7436716556549072, + "learning_rate": 3.092846550594164e-09, + "loss": 1.996, + "step": 29823 + }, + { + "epoch": 0.99, + "grad_norm": 0.7529311776161194, + "learning_rate": 3.0664697970594904e-09, + "loss": 2.1097, + "step": 29824 + }, + { + "epoch": 0.99, + "grad_norm": 0.7243223786354065, + "learning_rate": 3.0402059830225706e-09, + "loss": 2.0224, + "step": 29825 + }, + { + "epoch": 0.99, + "grad_norm": 0.7718964219093323, + "learning_rate": 3.0140551087776138e-09, + "loss": 2.0808, + "step": 29826 + }, + { + "epoch": 0.99, + "grad_norm": 0.7570213079452515, + "learning_rate": 2.9880171746210497e-09, + "loss": 2.0302, + "step": 29827 + }, + { + "epoch": 0.99, + "grad_norm": 0.7510157823562622, + "learning_rate": 2.962092180847087e-09, + "loss": 2.0225, + "step": 29828 + }, + { + "epoch": 0.99, + "grad_norm": 0.7550667524337769, + "learning_rate": 2.936280127748825e-09, + "loss": 2.0902, + "step": 29829 + }, + { + "epoch": 0.99, + "grad_norm": 0.7680709958076477, + "learning_rate": 2.9105810156171423e-09, + "loss": 2.0541, + "step": 29830 + }, + { + "epoch": 0.99, + "grad_norm": 0.7224922776222229, + "learning_rate": 2.8849948447429168e-09, + "loss": 2.0499, + "step": 29831 + }, + { + "epoch": 0.99, + "grad_norm": 0.7553815245628357, + "learning_rate": 2.8595216154159167e-09, + "loss": 1.9698, + "step": 29832 + }, + { + "epoch": 0.99, + "grad_norm": 0.7676466703414917, + "learning_rate": 2.8341613279225797e-09, + "loss": 2.0111, + "step": 29833 + }, + { + "epoch": 0.99, + "grad_norm": 0.7479047179222107, + "learning_rate": 2.808913982550454e-09, + "loss": 2.0212, + "step": 29834 + }, + { + "epoch": 0.99, + "grad_norm": 0.8118809461593628, + "learning_rate": 2.7837795795837564e-09, + "loss": 2.0195, + "step": 29835 + }, + { + "epoch": 0.99, + "grad_norm": 0.7683497667312622, + "learning_rate": 2.7587581193067035e-09, + "loss": 2.0374, + "step": 29836 + }, + { + "epoch": 0.99, + "grad_norm": 0.7969748377799988, + "learning_rate": 2.733849602002403e-09, + "loss": 2.0504, + "step": 29837 + }, + { + "epoch": 0.99, + "grad_norm": 0.7508156895637512, + "learning_rate": 2.709054027952851e-09, + "loss": 1.9562, + "step": 29838 + }, + { + "epoch": 0.99, + "grad_norm": 0.7445412278175354, + "learning_rate": 2.6843713974367136e-09, + "loss": 2.0852, + "step": 29839 + }, + { + "epoch": 0.99, + "grad_norm": 0.7348653674125671, + "learning_rate": 2.659801710733767e-09, + "loss": 2.1057, + "step": 29840 + }, + { + "epoch": 0.99, + "grad_norm": 0.7156270742416382, + "learning_rate": 2.6353449681204568e-09, + "loss": 1.9938, + "step": 29841 + }, + { + "epoch": 0.99, + "grad_norm": 0.7586727738380432, + "learning_rate": 2.6110011698754487e-09, + "loss": 2.0305, + "step": 29842 + }, + { + "epoch": 0.99, + "grad_norm": 0.7782207727432251, + "learning_rate": 2.5867703162718584e-09, + "loss": 2.0649, + "step": 29843 + }, + { + "epoch": 0.99, + "grad_norm": 0.7509174346923828, + "learning_rate": 2.5626524075828e-09, + "loss": 2.0734, + "step": 29844 + }, + { + "epoch": 0.99, + "grad_norm": 0.7275986075401306, + "learning_rate": 2.5386474440836086e-09, + "loss": 1.9885, + "step": 29845 + }, + { + "epoch": 0.99, + "grad_norm": 0.7723815441131592, + "learning_rate": 2.5147554260418484e-09, + "loss": 2.066, + "step": 29846 + }, + { + "epoch": 0.99, + "grad_norm": 0.7597500085830688, + "learning_rate": 2.4909763537306343e-09, + "loss": 2.0793, + "step": 29847 + }, + { + "epoch": 0.99, + "grad_norm": 0.7478899359703064, + "learning_rate": 2.4673102274164195e-09, + "loss": 2.0993, + "step": 29848 + }, + { + "epoch": 0.99, + "grad_norm": 0.7401597499847412, + "learning_rate": 2.4437570473678783e-09, + "loss": 2.0208, + "step": 29849 + }, + { + "epoch": 0.99, + "grad_norm": 0.7216199040412903, + "learning_rate": 2.4203168138514644e-09, + "loss": 2.0327, + "step": 29850 + }, + { + "epoch": 0.99, + "grad_norm": 0.7516917586326599, + "learning_rate": 2.3969895271303e-09, + "loss": 2.0059, + "step": 29851 + }, + { + "epoch": 0.99, + "grad_norm": 0.7287757396697998, + "learning_rate": 2.373775187468619e-09, + "loss": 2.0258, + "step": 29852 + }, + { + "epoch": 0.99, + "grad_norm": 0.7130069136619568, + "learning_rate": 2.350673795128433e-09, + "loss": 1.9762, + "step": 29853 + }, + { + "epoch": 0.99, + "grad_norm": 0.7495232820510864, + "learning_rate": 2.3276853503717557e-09, + "loss": 2.0254, + "step": 29854 + }, + { + "epoch": 0.99, + "grad_norm": 0.7562741041183472, + "learning_rate": 2.3048098534572684e-09, + "loss": 1.9963, + "step": 29855 + }, + { + "epoch": 0.99, + "grad_norm": 0.757908046245575, + "learning_rate": 2.2820473046447634e-09, + "loss": 1.9716, + "step": 29856 + }, + { + "epoch": 0.99, + "grad_norm": 0.7227439880371094, + "learning_rate": 2.259397704189592e-09, + "loss": 2.0302, + "step": 29857 + }, + { + "epoch": 0.99, + "grad_norm": 0.7256495952606201, + "learning_rate": 2.236861052348216e-09, + "loss": 2.0523, + "step": 29858 + }, + { + "epoch": 0.99, + "grad_norm": 0.7868555188179016, + "learning_rate": 2.214437349375986e-09, + "loss": 2.1287, + "step": 29859 + }, + { + "epoch": 0.99, + "grad_norm": 0.7411066889762878, + "learning_rate": 2.192126595526034e-09, + "loss": 1.9836, + "step": 29860 + }, + { + "epoch": 0.99, + "grad_norm": 0.7502613663673401, + "learning_rate": 2.1699287910503795e-09, + "loss": 2.0437, + "step": 29861 + }, + { + "epoch": 0.99, + "grad_norm": 0.7456634044647217, + "learning_rate": 2.1478439361988235e-09, + "loss": 2.0257, + "step": 29862 + }, + { + "epoch": 0.99, + "grad_norm": 0.7811187505722046, + "learning_rate": 2.125872031222276e-09, + "loss": 1.9934, + "step": 29863 + }, + { + "epoch": 0.99, + "grad_norm": 0.7530847787857056, + "learning_rate": 2.104013076367206e-09, + "loss": 2.0055, + "step": 29864 + }, + { + "epoch": 0.99, + "grad_norm": 0.7474566698074341, + "learning_rate": 2.0822670718823046e-09, + "loss": 2.065, + "step": 29865 + }, + { + "epoch": 0.99, + "grad_norm": 0.7490169405937195, + "learning_rate": 2.0606340180129305e-09, + "loss": 2.0545, + "step": 29866 + }, + { + "epoch": 0.99, + "grad_norm": 0.792894184589386, + "learning_rate": 2.0391139150033325e-09, + "loss": 2.0598, + "step": 29867 + }, + { + "epoch": 0.99, + "grad_norm": 0.7788861393928528, + "learning_rate": 2.0177067630955395e-09, + "loss": 2.0175, + "step": 29868 + }, + { + "epoch": 0.99, + "grad_norm": 0.7672091722488403, + "learning_rate": 1.99641256253269e-09, + "loss": 2.029, + "step": 29869 + }, + { + "epoch": 0.99, + "grad_norm": 0.7585312724113464, + "learning_rate": 1.975231313555703e-09, + "loss": 2.0056, + "step": 29870 + }, + { + "epoch": 0.99, + "grad_norm": 0.7147022485733032, + "learning_rate": 1.9541630164021664e-09, + "loss": 1.9611, + "step": 29871 + }, + { + "epoch": 0.99, + "grad_norm": 0.761692225933075, + "learning_rate": 1.9332076713107772e-09, + "loss": 2.0929, + "step": 29872 + }, + { + "epoch": 0.99, + "grad_norm": 0.7389810085296631, + "learning_rate": 1.912365278519124e-09, + "loss": 2.0018, + "step": 29873 + }, + { + "epoch": 0.99, + "grad_norm": 0.7708852291107178, + "learning_rate": 1.8916358382625733e-09, + "loss": 2.0456, + "step": 29874 + }, + { + "epoch": 0.99, + "grad_norm": 0.7427096366882324, + "learning_rate": 1.8710193507742723e-09, + "loss": 1.9986, + "step": 29875 + }, + { + "epoch": 0.99, + "grad_norm": 0.7353299260139465, + "learning_rate": 1.8505158162873683e-09, + "loss": 2.0679, + "step": 29876 + }, + { + "epoch": 0.99, + "grad_norm": 0.7231204509735107, + "learning_rate": 1.8301252350350075e-09, + "loss": 2.0221, + "step": 29877 + }, + { + "epoch": 0.99, + "grad_norm": 0.7684375047683716, + "learning_rate": 1.809847607245896e-09, + "loss": 2.0617, + "step": 29878 + }, + { + "epoch": 0.99, + "grad_norm": 0.7398586273193359, + "learning_rate": 1.7896829331487397e-09, + "loss": 1.9826, + "step": 29879 + }, + { + "epoch": 0.99, + "grad_norm": 0.74696946144104, + "learning_rate": 1.7696312129733551e-09, + "loss": 2.0215, + "step": 29880 + }, + { + "epoch": 0.99, + "grad_norm": 0.7459486722946167, + "learning_rate": 1.7496924469440068e-09, + "loss": 2.0193, + "step": 29881 + }, + { + "epoch": 0.99, + "grad_norm": 0.7631803750991821, + "learning_rate": 1.729866635288291e-09, + "loss": 2.0135, + "step": 29882 + }, + { + "epoch": 0.99, + "grad_norm": 0.7474335432052612, + "learning_rate": 1.7101537782282517e-09, + "loss": 1.9952, + "step": 29883 + }, + { + "epoch": 0.99, + "grad_norm": 0.7493554949760437, + "learning_rate": 1.6905538759881546e-09, + "loss": 1.9999, + "step": 29884 + }, + { + "epoch": 0.99, + "grad_norm": 0.7568851709365845, + "learning_rate": 1.6710669287878233e-09, + "loss": 2.0524, + "step": 29885 + }, + { + "epoch": 0.99, + "grad_norm": 0.7399751543998718, + "learning_rate": 1.6516929368481927e-09, + "loss": 1.9676, + "step": 29886 + }, + { + "epoch": 0.99, + "grad_norm": 0.7268072962760925, + "learning_rate": 1.6324319003879764e-09, + "loss": 1.9913, + "step": 29887 + }, + { + "epoch": 0.99, + "grad_norm": 0.7300494909286499, + "learning_rate": 1.613283819624778e-09, + "loss": 2.0023, + "step": 29888 + }, + { + "epoch": 0.99, + "grad_norm": 0.7397010326385498, + "learning_rate": 1.5942486947762016e-09, + "loss": 2.0924, + "step": 29889 + }, + { + "epoch": 0.99, + "grad_norm": 0.7560091018676758, + "learning_rate": 1.5753265260554096e-09, + "loss": 2.078, + "step": 29890 + }, + { + "epoch": 0.99, + "grad_norm": 0.7547938823699951, + "learning_rate": 1.5565173136766753e-09, + "loss": 2.0011, + "step": 29891 + }, + { + "epoch": 0.99, + "grad_norm": 0.7331838607788086, + "learning_rate": 1.5378210578531616e-09, + "loss": 2.0382, + "step": 29892 + }, + { + "epoch": 0.99, + "grad_norm": 0.7528529167175293, + "learning_rate": 1.5192377587958107e-09, + "loss": 2.0209, + "step": 29893 + }, + { + "epoch": 0.99, + "grad_norm": 0.7621193528175354, + "learning_rate": 1.5007674167133445e-09, + "loss": 2.0497, + "step": 29894 + }, + { + "epoch": 0.99, + "grad_norm": 0.703238308429718, + "learning_rate": 1.4824100318167055e-09, + "loss": 2.0252, + "step": 29895 + }, + { + "epoch": 0.99, + "grad_norm": 0.7415446639060974, + "learning_rate": 1.4641656043112852e-09, + "loss": 2.0524, + "step": 29896 + }, + { + "epoch": 0.99, + "grad_norm": 0.7276495695114136, + "learning_rate": 1.4460341344046946e-09, + "loss": 2.0805, + "step": 29897 + }, + { + "epoch": 0.99, + "grad_norm": 0.7539460062980652, + "learning_rate": 1.4280156223012154e-09, + "loss": 2.0759, + "step": 29898 + }, + { + "epoch": 0.99, + "grad_norm": 0.7755263447761536, + "learning_rate": 1.4101100682040181e-09, + "loss": 2.0013, + "step": 29899 + }, + { + "epoch": 0.99, + "grad_norm": 0.7303889393806458, + "learning_rate": 1.3923174723151633e-09, + "loss": 2.0302, + "step": 29900 + }, + { + "epoch": 0.99, + "grad_norm": 0.73729008436203, + "learning_rate": 1.3746378348367117e-09, + "loss": 2.0715, + "step": 29901 + }, + { + "epoch": 0.99, + "grad_norm": 0.7594935297966003, + "learning_rate": 1.3570711559673933e-09, + "loss": 1.9935, + "step": 29902 + }, + { + "epoch": 0.99, + "grad_norm": 0.7298699617385864, + "learning_rate": 1.3396174359059377e-09, + "loss": 1.9887, + "step": 29903 + }, + { + "epoch": 0.99, + "grad_norm": 0.7334200143814087, + "learning_rate": 1.3222766748510751e-09, + "loss": 2.0957, + "step": 29904 + }, + { + "epoch": 0.99, + "grad_norm": 0.7237154245376587, + "learning_rate": 1.3050488729959842e-09, + "loss": 2.112, + "step": 29905 + }, + { + "epoch": 0.99, + "grad_norm": 0.7643848657608032, + "learning_rate": 1.2879340305371745e-09, + "loss": 2.078, + "step": 29906 + }, + { + "epoch": 1.0, + "grad_norm": 0.7261351346969604, + "learning_rate": 1.2709321476667147e-09, + "loss": 2.0147, + "step": 29907 + }, + { + "epoch": 1.0, + "grad_norm": 0.7658722400665283, + "learning_rate": 1.2540432245777834e-09, + "loss": 1.9805, + "step": 29908 + }, + { + "epoch": 1.0, + "grad_norm": 0.7499039173126221, + "learning_rate": 1.237267261461339e-09, + "loss": 2.0319, + "step": 29909 + }, + { + "epoch": 1.0, + "grad_norm": 0.7775822281837463, + "learning_rate": 1.2206042585061195e-09, + "loss": 2.0564, + "step": 29910 + }, + { + "epoch": 1.0, + "grad_norm": 0.7540507316589355, + "learning_rate": 1.2040542159008628e-09, + "loss": 2.0354, + "step": 29911 + }, + { + "epoch": 1.0, + "grad_norm": 0.7205124497413635, + "learning_rate": 1.1876171338320864e-09, + "loss": 2.0899, + "step": 29912 + }, + { + "epoch": 1.0, + "grad_norm": 0.7431591749191284, + "learning_rate": 1.1712930124851973e-09, + "loss": 2.007, + "step": 29913 + }, + { + "epoch": 1.0, + "grad_norm": 0.7286253571510315, + "learning_rate": 1.1550818520467133e-09, + "loss": 2.0256, + "step": 29914 + }, + { + "epoch": 1.0, + "grad_norm": 0.7481906414031982, + "learning_rate": 1.1389836526964904e-09, + "loss": 1.9846, + "step": 29915 + }, + { + "epoch": 1.0, + "grad_norm": 0.7880908250808716, + "learning_rate": 1.1229984146188255e-09, + "loss": 2.0561, + "step": 29916 + }, + { + "epoch": 1.0, + "grad_norm": 0.7736382484436035, + "learning_rate": 1.1071261379935748e-09, + "loss": 2.0835, + "step": 29917 + }, + { + "epoch": 1.0, + "grad_norm": 0.7612571120262146, + "learning_rate": 1.0913668230005948e-09, + "loss": 2.0654, + "step": 29918 + }, + { + "epoch": 1.0, + "grad_norm": 0.7665936946868896, + "learning_rate": 1.0757204698164103e-09, + "loss": 2.1786, + "step": 29919 + }, + { + "epoch": 1.0, + "grad_norm": 0.7419058084487915, + "learning_rate": 1.0601870786197677e-09, + "loss": 2.0098, + "step": 29920 + }, + { + "epoch": 1.0, + "grad_norm": 0.7315691709518433, + "learning_rate": 1.0447666495849717e-09, + "loss": 2.0495, + "step": 29921 + }, + { + "epoch": 1.0, + "grad_norm": 0.7426202297210693, + "learning_rate": 1.0294591828863276e-09, + "loss": 2.0219, + "step": 29922 + }, + { + "epoch": 1.0, + "grad_norm": 0.7323428392410278, + "learning_rate": 1.01426467869592e-09, + "loss": 2.0538, + "step": 29923 + }, + { + "epoch": 1.0, + "grad_norm": 0.7710581421852112, + "learning_rate": 9.991831371880535e-10, + "loss": 1.9981, + "step": 29924 + }, + { + "epoch": 1.0, + "grad_norm": 0.7526863217353821, + "learning_rate": 9.842145585303719e-10, + "loss": 2.0371, + "step": 29925 + }, + { + "epoch": 1.0, + "grad_norm": 0.7504081726074219, + "learning_rate": 9.693589428927396e-10, + "loss": 2.0718, + "step": 29926 + }, + { + "epoch": 1.0, + "grad_norm": 0.7425186038017273, + "learning_rate": 9.546162904439104e-10, + "loss": 2.0842, + "step": 29927 + }, + { + "epoch": 1.0, + "grad_norm": 0.7677820324897766, + "learning_rate": 9.399866013504178e-10, + "loss": 2.0427, + "step": 29928 + }, + { + "epoch": 1.0, + "grad_norm": 0.7290091514587402, + "learning_rate": 9.254698757754643e-10, + "loss": 2.0533, + "step": 29929 + }, + { + "epoch": 1.0, + "grad_norm": 0.7729735970497131, + "learning_rate": 9.110661138855836e-10, + "loss": 2.0557, + "step": 29930 + }, + { + "epoch": 1.0, + "grad_norm": 0.7551309466362, + "learning_rate": 8.96775315841758e-10, + "loss": 2.0087, + "step": 29931 + }, + { + "epoch": 1.0, + "grad_norm": 0.7739366292953491, + "learning_rate": 8.825974818071903e-10, + "loss": 2.0437, + "step": 29932 + }, + { + "epoch": 1.0, + "grad_norm": 0.7256814241409302, + "learning_rate": 8.685326119395321e-10, + "loss": 2.0321, + "step": 29933 + }, + { + "epoch": 1.0, + "grad_norm": 0.7723352909088135, + "learning_rate": 8.545807063997658e-10, + "loss": 2.1138, + "step": 29934 + }, + { + "epoch": 1.0, + "grad_norm": 0.7476518750190735, + "learning_rate": 8.40741765345543e-10, + "loss": 2.0165, + "step": 29935 + }, + { + "epoch": 1.0, + "grad_norm": 0.7521471977233887, + "learning_rate": 8.27015788932295e-10, + "loss": 2.0426, + "step": 29936 + }, + { + "epoch": 1.0, + "grad_norm": 0.7272869944572449, + "learning_rate": 8.134027773143427e-10, + "loss": 2.0323, + "step": 29937 + }, + { + "epoch": 1.0, + "grad_norm": 0.7486531734466553, + "learning_rate": 7.999027306471174e-10, + "loss": 1.9711, + "step": 29938 + }, + { + "epoch": 1.0, + "grad_norm": 0.7193907499313354, + "learning_rate": 7.865156490827197e-10, + "loss": 2.0457, + "step": 29939 + }, + { + "epoch": 1.0, + "grad_norm": 0.7560902833938599, + "learning_rate": 7.732415327721399e-10, + "loss": 2.0842, + "step": 29940 + }, + { + "epoch": 1.0, + "grad_norm": 0.7276819348335266, + "learning_rate": 7.60080381865258e-10, + "loss": 2.0696, + "step": 29941 + }, + { + "epoch": 1.0, + "grad_norm": 0.7517814040184021, + "learning_rate": 7.470321965108441e-10, + "loss": 2.0371, + "step": 29942 + }, + { + "epoch": 1.0, + "grad_norm": 0.771757960319519, + "learning_rate": 7.340969768554473e-10, + "loss": 2.0861, + "step": 29943 + }, + { + "epoch": 1.0, + "grad_norm": 0.7464506030082703, + "learning_rate": 7.212747230467276e-10, + "loss": 2.0, + "step": 29944 + }, + { + "epoch": 1.0, + "grad_norm": 0.7161481380462646, + "learning_rate": 7.085654352290138e-10, + "loss": 2.0726, + "step": 29945 + }, + { + "epoch": 1.0, + "grad_norm": 0.7373945116996765, + "learning_rate": 6.959691135466351e-10, + "loss": 2.0002, + "step": 29946 + }, + { + "epoch": 1.0, + "grad_norm": 0.7244176268577576, + "learning_rate": 6.834857581394793e-10, + "loss": 2.0628, + "step": 29947 + }, + { + "epoch": 1.0, + "grad_norm": 0.7278868556022644, + "learning_rate": 6.711153691507655e-10, + "loss": 2.0937, + "step": 29948 + }, + { + "epoch": 1.0, + "grad_norm": 0.7371912598609924, + "learning_rate": 6.588579467203815e-10, + "loss": 2.0254, + "step": 29949 + }, + { + "epoch": 1.0, + "grad_norm": 0.7127786874771118, + "learning_rate": 6.467134909848849e-10, + "loss": 1.9971, + "step": 29950 + }, + { + "epoch": 1.0, + "grad_norm": 0.7275497317314148, + "learning_rate": 6.346820020830535e-10, + "loss": 1.9863, + "step": 29951 + }, + { + "epoch": 1.0, + "grad_norm": 0.7169925570487976, + "learning_rate": 6.227634801503346e-10, + "loss": 2.0047, + "step": 29952 + }, + { + "epoch": 1.0, + "grad_norm": 0.7752792239189148, + "learning_rate": 6.109579253210651e-10, + "loss": 2.0266, + "step": 29953 + }, + { + "epoch": 1.0, + "grad_norm": 0.7461509704589844, + "learning_rate": 5.99265337729582e-10, + "loss": 2.1087, + "step": 29954 + }, + { + "epoch": 1.0, + "grad_norm": 0.7324398756027222, + "learning_rate": 5.876857175068917e-10, + "loss": 2.0306, + "step": 29955 + }, + { + "epoch": 1.0, + "grad_norm": 0.7414193153381348, + "learning_rate": 5.762190647851107e-10, + "loss": 2.0772, + "step": 29956 + }, + { + "epoch": 1.0, + "grad_norm": 0.7633237242698669, + "learning_rate": 5.648653796919145e-10, + "loss": 2.0775, + "step": 29957 + }, + { + "epoch": 1.0, + "grad_norm": 0.735564112663269, + "learning_rate": 5.536246623571995e-10, + "loss": 2.0399, + "step": 29958 + }, + { + "epoch": 1.0, + "grad_norm": 0.764510452747345, + "learning_rate": 5.424969129075308e-10, + "loss": 1.9942, + "step": 29959 + }, + { + "epoch": 1.0, + "grad_norm": 0.7274280786514282, + "learning_rate": 5.314821314683638e-10, + "loss": 2.0554, + "step": 29960 + }, + { + "epoch": 1.0, + "grad_norm": 0.7093517184257507, + "learning_rate": 5.205803181640434e-10, + "loss": 1.9897, + "step": 29961 + }, + { + "epoch": 1.0, + "grad_norm": 0.7587512731552124, + "learning_rate": 5.097914731178044e-10, + "loss": 2.0273, + "step": 29962 + }, + { + "epoch": 1.0, + "grad_norm": 0.755617618560791, + "learning_rate": 4.991155964517713e-10, + "loss": 2.067, + "step": 29963 + }, + { + "epoch": 1.0, + "grad_norm": 0.7443338632583618, + "learning_rate": 4.885526882869585e-10, + "loss": 2.0564, + "step": 29964 + }, + { + "epoch": 1.0, + "grad_norm": 0.7335346937179565, + "learning_rate": 4.781027487421597e-10, + "loss": 2.0159, + "step": 29965 + }, + { + "epoch": 1.0, + "grad_norm": 0.725265383720398, + "learning_rate": 4.677657779350586e-10, + "loss": 1.9666, + "step": 29966 + }, + { + "epoch": 1.0, + "grad_norm": 0.7502939701080322, + "learning_rate": 4.575417759822287e-10, + "loss": 2.0238, + "step": 29967 + }, + { + "epoch": 1.0, + "grad_norm": 0.7693683505058289, + "learning_rate": 4.4743074300024334e-10, + "loss": 2.0421, + "step": 29968 + }, + { + "epoch": 1.0, + "grad_norm": 0.7351319789886475, + "learning_rate": 4.3743267910345554e-10, + "loss": 2.0349, + "step": 29969 + }, + { + "epoch": 1.0, + "grad_norm": 0.7460674047470093, + "learning_rate": 4.275475844039978e-10, + "loss": 1.9725, + "step": 29970 + }, + { + "epoch": 1.0, + "grad_norm": 0.7762727737426758, + "learning_rate": 4.1777545901400264e-10, + "loss": 2.0384, + "step": 29971 + }, + { + "epoch": 1.0, + "grad_norm": 0.7705658078193665, + "learning_rate": 4.0811630304338213e-10, + "loss": 2.0174, + "step": 29972 + }, + { + "epoch": 1.0, + "grad_norm": 0.7528339624404907, + "learning_rate": 3.985701166009381e-10, + "loss": 2.0718, + "step": 29973 + }, + { + "epoch": 1.0, + "grad_norm": 0.7343324422836304, + "learning_rate": 3.891368997954725e-10, + "loss": 2.0095, + "step": 29974 + }, + { + "epoch": 1.0, + "grad_norm": 0.7579321265220642, + "learning_rate": 3.798166527335667e-10, + "loss": 2.0645, + "step": 29975 + }, + { + "epoch": 1.0, + "grad_norm": 0.7555983662605286, + "learning_rate": 3.7060937551958164e-10, + "loss": 2.0777, + "step": 29976 + }, + { + "epoch": 1.0, + "grad_norm": 0.7653681039810181, + "learning_rate": 3.615150682589885e-10, + "loss": 1.9922, + "step": 29977 + }, + { + "epoch": 1.0, + "grad_norm": 0.7409499287605286, + "learning_rate": 3.5253373105281764e-10, + "loss": 2.1126, + "step": 29978 + }, + { + "epoch": 1.0, + "grad_norm": 0.7467514872550964, + "learning_rate": 3.436653640032095e-10, + "loss": 2.0713, + "step": 29979 + }, + { + "epoch": 1.0, + "grad_norm": 0.7554823756217957, + "learning_rate": 3.349099672111944e-10, + "loss": 2.0426, + "step": 29980 + }, + { + "epoch": 1.0, + "grad_norm": 0.747926652431488, + "learning_rate": 3.26267540774472e-10, + "loss": 2.1105, + "step": 29981 + }, + { + "epoch": 1.0, + "grad_norm": 0.7688230276107788, + "learning_rate": 3.1773808479074184e-10, + "loss": 2.0657, + "step": 29982 + }, + { + "epoch": 1.0, + "grad_norm": 0.7157496809959412, + "learning_rate": 3.093215993577037e-10, + "loss": 2.0882, + "step": 29983 + }, + { + "epoch": 1.0, + "grad_norm": 0.7708505988121033, + "learning_rate": 3.0101808456972635e-10, + "loss": 2.0793, + "step": 29984 + }, + { + "epoch": 1.0, + "grad_norm": 0.7430346012115479, + "learning_rate": 2.928275405200687e-10, + "loss": 2.0664, + "step": 29985 + }, + { + "epoch": 1.0, + "grad_norm": 0.7533227801322937, + "learning_rate": 2.847499673008791e-10, + "loss": 2.0388, + "step": 29986 + }, + { + "epoch": 1.0, + "grad_norm": 0.7363753318786621, + "learning_rate": 2.767853650054164e-10, + "loss": 2.1146, + "step": 29987 + }, + { + "epoch": 1.0, + "grad_norm": 0.7249892950057983, + "learning_rate": 2.689337337213882e-10, + "loss": 2.1128, + "step": 29988 + }, + { + "epoch": 1.0, + "grad_norm": 0.726254403591156, + "learning_rate": 2.6119507353983273e-10, + "loss": 2.0108, + "step": 29989 + }, + { + "epoch": 1.0, + "grad_norm": 0.7713172435760498, + "learning_rate": 2.5356938454623724e-10, + "loss": 2.0801, + "step": 29990 + }, + { + "epoch": 1.0, + "grad_norm": 0.7783042788505554, + "learning_rate": 2.4605666682719907e-10, + "loss": 2.1466, + "step": 29991 + }, + { + "epoch": 1.0, + "grad_norm": 0.7423444986343384, + "learning_rate": 2.386569204670952e-10, + "loss": 2.067, + "step": 29992 + }, + { + "epoch": 1.0, + "grad_norm": 0.7339439392089844, + "learning_rate": 2.3137014555141279e-10, + "loss": 2.0256, + "step": 29993 + }, + { + "epoch": 1.0, + "grad_norm": 0.7885274887084961, + "learning_rate": 2.241963421600879e-10, + "loss": 2.0554, + "step": 29994 + }, + { + "epoch": 1.0, + "grad_norm": 0.7579604983329773, + "learning_rate": 2.1713551037638726e-10, + "loss": 2.0346, + "step": 29995 + }, + { + "epoch": 1.0, + "grad_norm": 0.76185142993927, + "learning_rate": 2.1018765027913667e-10, + "loss": 2.0471, + "step": 29996 + }, + { + "epoch": 1.0, + "grad_norm": 0.7575711011886597, + "learning_rate": 2.033527619460518e-10, + "loss": 2.093, + "step": 29997 + }, + { + "epoch": 1.0, + "grad_norm": 0.7392294406890869, + "learning_rate": 1.9663084545484823e-10, + "loss": 2.015, + "step": 29998 + }, + { + "epoch": 1.0, + "grad_norm": 0.7899391055107117, + "learning_rate": 1.9002190088213135e-10, + "loss": 2.0268, + "step": 29999 + }, + { + "epoch": 1.0, + "grad_norm": 0.7452924847602844, + "learning_rate": 1.835259283022861e-10, + "loss": 2.0789, + "step": 30000 + }, + { + "epoch": 1.0, + "grad_norm": 0.7229639887809753, + "learning_rate": 1.7714292778858722e-10, + "loss": 2.0081, + "step": 30001 + }, + { + "epoch": 1.0, + "grad_norm": 0.7499478459358215, + "learning_rate": 1.7087289941208895e-10, + "loss": 2.0555, + "step": 30002 + }, + { + "epoch": 1.0, + "grad_norm": 0.7569377422332764, + "learning_rate": 1.6471584324495582e-10, + "loss": 2.1134, + "step": 30003 + }, + { + "epoch": 1.0, + "grad_norm": 0.7243926525115967, + "learning_rate": 1.5867175935713186e-10, + "loss": 2.054, + "step": 30004 + }, + { + "epoch": 1.0, + "grad_norm": 0.7314569354057312, + "learning_rate": 1.5274064781523046e-10, + "loss": 2.0387, + "step": 30005 + }, + { + "epoch": 1.0, + "grad_norm": 0.7608683705329895, + "learning_rate": 1.4692250868808543e-10, + "loss": 2.0155, + "step": 30006 + }, + { + "epoch": 1.0, + "grad_norm": 0.7373477816581726, + "learning_rate": 1.4121734204008976e-10, + "loss": 2.0881, + "step": 30007 + }, + { + "epoch": 1.0, + "grad_norm": 0.7580901980400085, + "learning_rate": 1.356251479356363e-10, + "loss": 2.0622, + "step": 30008 + }, + { + "epoch": 1.0, + "grad_norm": 0.7529125213623047, + "learning_rate": 1.3014592643911805e-10, + "loss": 2.073, + "step": 30009 + }, + { + "epoch": 1.0, + "grad_norm": 0.7689856290817261, + "learning_rate": 1.2477967761159726e-10, + "loss": 2.0341, + "step": 30010 + }, + { + "epoch": 1.0, + "grad_norm": 0.7512357234954834, + "learning_rate": 1.1952640151302598e-10, + "loss": 2.0299, + "step": 30011 + }, + { + "epoch": 1.0, + "grad_norm": 0.7306007146835327, + "learning_rate": 1.1438609820446645e-10, + "loss": 1.9798, + "step": 30012 + }, + { + "epoch": 1.0, + "grad_norm": 0.7438822388648987, + "learning_rate": 1.0935876774254006e-10, + "loss": 2.0404, + "step": 30013 + }, + { + "epoch": 1.0, + "grad_norm": 0.7729618549346924, + "learning_rate": 1.0444441018497842e-10, + "loss": 2.0789, + "step": 30014 + }, + { + "epoch": 1.0, + "grad_norm": 0.7457106113433838, + "learning_rate": 9.964302558729266e-11, + "loss": 2.0646, + "step": 30015 + }, + { + "epoch": 1.0, + "grad_norm": 0.7459310293197632, + "learning_rate": 9.49546140027735e-11, + "loss": 2.0073, + "step": 30016 + }, + { + "epoch": 1.0, + "grad_norm": 0.7553513050079346, + "learning_rate": 9.037917548471164e-11, + "loss": 1.9777, + "step": 30017 + }, + { + "epoch": 1.0, + "grad_norm": 0.7469952702522278, + "learning_rate": 8.591671008528757e-11, + "loss": 2.0279, + "step": 30018 + }, + { + "epoch": 1.0, + "grad_norm": 0.7217015027999878, + "learning_rate": 8.156721785446131e-11, + "loss": 2.0209, + "step": 30019 + }, + { + "epoch": 1.0, + "grad_norm": 0.7337492108345032, + "learning_rate": 7.733069884108268e-11, + "loss": 2.055, + "step": 30020 + }, + { + "epoch": 1.0, + "grad_norm": 0.7287655472755432, + "learning_rate": 7.32071530940015e-11, + "loss": 2.0554, + "step": 30021 + }, + { + "epoch": 1.0, + "grad_norm": 0.7633906006813049, + "learning_rate": 6.919658065984714e-11, + "loss": 2.072, + "step": 30022 + }, + { + "epoch": 1.0, + "grad_norm": 0.7760948538780212, + "learning_rate": 6.529898158302849e-11, + "loss": 2.0192, + "step": 30023 + }, + { + "epoch": 1.0, + "grad_norm": 0.7492859363555908, + "learning_rate": 6.151435590795452e-11, + "loss": 2.0125, + "step": 30024 + }, + { + "epoch": 1.0, + "grad_norm": 0.7688169479370117, + "learning_rate": 5.784270367681366e-11, + "loss": 2.038, + "step": 30025 + }, + { + "epoch": 1.0, + "grad_norm": 0.7396836280822754, + "learning_rate": 5.428402493179441e-11, + "loss": 2.0208, + "step": 30026 + }, + { + "epoch": 1.0, + "grad_norm": 0.7266297936439514, + "learning_rate": 5.083831971286479e-11, + "loss": 1.9321, + "step": 30027 + }, + { + "epoch": 1.0, + "grad_norm": 0.7384500503540039, + "learning_rate": 4.7505588059992836e-11, + "loss": 1.9722, + "step": 30028 + }, + { + "epoch": 1.0, + "grad_norm": 0.7619094848632812, + "learning_rate": 4.4285830008705675e-11, + "loss": 2.0746, + "step": 30029 + }, + { + "epoch": 1.0, + "grad_norm": 0.7471997141838074, + "learning_rate": 4.11790455967509e-11, + "loss": 1.9753, + "step": 30030 + }, + { + "epoch": 1.0, + "grad_norm": 0.737399160861969, + "learning_rate": 3.818523485965564e-11, + "loss": 2.05, + "step": 30031 + }, + { + "epoch": 1.0, + "grad_norm": 0.7499684691429138, + "learning_rate": 3.5304397829616364e-11, + "loss": 1.9798, + "step": 30032 + }, + { + "epoch": 1.0, + "grad_norm": 0.7324346899986267, + "learning_rate": 3.253653454104999e-11, + "loss": 1.9936, + "step": 30033 + }, + { + "epoch": 1.0, + "grad_norm": 0.7423503398895264, + "learning_rate": 2.9881645023932536e-11, + "loss": 1.9536, + "step": 30034 + }, + { + "epoch": 1.0, + "grad_norm": 0.7474167943000793, + "learning_rate": 2.7339729308240026e-11, + "loss": 2.1112, + "step": 30035 + }, + { + "epoch": 1.0, + "grad_norm": 0.7588308453559875, + "learning_rate": 2.4910787423948478e-11, + "loss": 2.0964, + "step": 30036 + }, + { + "epoch": 1.0, + "grad_norm": 0.7837567925453186, + "learning_rate": 2.259481939770325e-11, + "loss": 2.0522, + "step": 30037 + }, + { + "epoch": 1.0, + "grad_norm": 0.7425159811973572, + "learning_rate": 2.0391825255039464e-11, + "loss": 2.0676, + "step": 30038 + }, + { + "epoch": 1.0, + "grad_norm": 0.741051197052002, + "learning_rate": 1.8301805021492258e-11, + "loss": 2.0925, + "step": 30039 + }, + { + "epoch": 1.0, + "grad_norm": 0.7427074909210205, + "learning_rate": 1.632475872037631e-11, + "loss": 2.0548, + "step": 30040 + }, + { + "epoch": 1.0, + "grad_norm": 0.738802433013916, + "learning_rate": 1.4460686375006305e-11, + "loss": 1.9549, + "step": 30041 + }, + { + "epoch": 1.0, + "grad_norm": 0.7539389133453369, + "learning_rate": 1.2709588005366259e-11, + "loss": 2.0847, + "step": 30042 + }, + { + "epoch": 1.0, + "grad_norm": 0.7748835682868958, + "learning_rate": 1.1071463631440182e-11, + "loss": 2.0514, + "step": 30043 + }, + { + "epoch": 1.0, + "grad_norm": 0.7527546882629395, + "learning_rate": 9.546313270991647e-12, + "loss": 2.0212, + "step": 30044 + }, + { + "epoch": 1.0, + "grad_norm": 0.7306848168373108, + "learning_rate": 8.134136944004667e-12, + "loss": 2.0182, + "step": 30045 + }, + { + "epoch": 1.0, + "grad_norm": 0.7830349802970886, + "learning_rate": 6.834934662691695e-12, + "loss": 2.0485, + "step": 30046 + }, + { + "epoch": 1.0, + "grad_norm": 0.7780806422233582, + "learning_rate": 5.648706444816299e-12, + "loss": 2.0785, + "step": 30047 + }, + { + "epoch": 1.0, + "grad_norm": 0.7345147728919983, + "learning_rate": 4.575452302590933e-12, + "loss": 2.069, + "step": 30048 + }, + { + "epoch": 1.0, + "grad_norm": 0.7313547134399414, + "learning_rate": 3.6151722471178264e-12, + "loss": 2.0154, + "step": 30049 + }, + { + "epoch": 1.0, + "grad_norm": 0.7374529242515564, + "learning_rate": 2.767866290609433e-12, + "loss": 2.039, + "step": 30050 + }, + { + "epoch": 1.0, + "grad_norm": 0.7542058229446411, + "learning_rate": 2.033534443057761e-12, + "loss": 2.0975, + "step": 30051 + }, + { + "epoch": 1.0, + "grad_norm": 0.7186015248298645, + "learning_rate": 1.4121767111241469e-12, + "loss": 2.0925, + "step": 30052 + }, + { + "epoch": 1.0, + "grad_norm": 0.7711829543113708, + "learning_rate": 9.037931025801527e-13, + "loss": 2.14, + "step": 30053 + }, + { + "epoch": 1.0, + "grad_norm": 0.7336012125015259, + "learning_rate": 5.083836240871165e-13, + "loss": 2.0082, + "step": 30054 + }, + { + "epoch": 1.0, + "grad_norm": 0.7710098028182983, + "learning_rate": 2.2594827786548423e-13, + "loss": 2.0744, + "step": 30055 + }, + { + "epoch": 1.0, + "grad_norm": 0.747883141040802, + "learning_rate": 5.6487069466371057e-14, + "loss": 2.0049, + "step": 30056 + }, + { + "epoch": 1.0, + "grad_norm": 1.5368523597717285, + "learning_rate": 0.0, + "loss": 2.0168, + "step": 30057 + }, + { + "epoch": 1.0, + "step": 30057, + "total_flos": 5.53843527867433e+16, + "train_loss": 2.1406584009897505, + "train_runtime": 4517.6027, + "train_samples_per_second": 425.801, + "train_steps_per_second": 6.653 + } + ], + "logging_steps": 1.0, + "max_steps": 30057, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 5.53843527867433e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}