{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.74360499702558, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.285714285714285e-05, "loss": 2.4427, "step": 1 }, { "epoch": 0.01, "learning_rate": 0.0001857142857142857, "loss": 2.3973, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00027857142857142854, "loss": 2.341, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.0003714285714285714, "loss": 2.1281, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0004642857142857143, "loss": 1.4346, "step": 5 }, { "epoch": 0.02, "learning_rate": 0.0005571428571428571, "loss": 1.1715, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.00065, "loss": 1.086, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.0006499851830773117, "loss": 0.9921, "step": 8 }, { "epoch": 0.03, "learning_rate": 0.00064994073366027, "loss": 0.9231, "step": 9 }, { "epoch": 0.03, "learning_rate": 0.0006498666558018197, "loss": 0.9343, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0006497629562564588, "loss": 0.9191, "step": 11 }, { "epoch": 0.04, "learning_rate": 0.0006496296444796219, "loss": 0.8791, "step": 12 }, { "epoch": 0.04, "learning_rate": 0.0006494667326268186, "loss": 0.8632, "step": 13 }, { "epoch": 0.04, "learning_rate": 0.0006492742355525248, "loss": 0.9267, "step": 14 }, { "epoch": 0.04, "learning_rate": 0.0006490521708088281, "loss": 0.8644, "step": 15 }, { "epoch": 0.05, "learning_rate": 0.000648800558643828, "loss": 0.8353, "step": 16 }, { "epoch": 0.05, "learning_rate": 0.0006485194219997891, "loss": 0.8482, "step": 17 }, { "epoch": 0.05, "learning_rate": 0.0006482087865110493, "loss": 0.8587, "step": 18 }, { "epoch": 0.06, "learning_rate": 0.0006478686805016826, "loss": 0.9134, "step": 19 }, { "epoch": 0.06, "learning_rate": 0.0006474991349829163, "loss": 0.8238, "step": 20 }, { "epoch": 0.06, "learning_rate": 0.0006471001836503035, "loss": 0.8329, "step": 21 }, { "epoch": 0.07, "learning_rate": 0.0006466718628806508, "loss": 0.7995, "step": 22 }, { "epoch": 0.07, "learning_rate": 0.0006462142117287011, "loss": 0.8363, "step": 23 }, { "epoch": 0.07, "learning_rate": 0.0006457272719235728, "loss": 0.7942, "step": 24 }, { "epoch": 0.07, "learning_rate": 0.0006452110878649547, "loss": 0.858, "step": 25 }, { "epoch": 0.08, "learning_rate": 0.0006446657066190579, "loss": 0.8474, "step": 26 }, { "epoch": 0.08, "learning_rate": 0.000644091177914324, "loss": 0.8175, "step": 27 }, { "epoch": 0.08, "learning_rate": 0.0006434875541368907, "loss": 0.7821, "step": 28 }, { "epoch": 0.09, "learning_rate": 0.0006428548903258156, "loss": 0.8583, "step": 29 }, { "epoch": 0.09, "learning_rate": 0.0006421932441680574, "loss": 0.8071, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.0006415026759932158, "loss": 0.805, "step": 31 }, { "epoch": 0.1, "learning_rate": 0.0006407832487680309, "loss": 0.881, "step": 32 }, { "epoch": 0.1, "learning_rate": 0.0006400350280906415, "loss": 0.8302, "step": 33 }, { "epoch": 0.1, "learning_rate": 0.0006392580821846041, "loss": 0.8456, "step": 34 }, { "epoch": 0.1, "learning_rate": 0.0006384524818926723, "loss": 0.8067, "step": 35 }, { "epoch": 0.11, "learning_rate": 0.0006376183006703367, "loss": 0.8307, "step": 36 }, { "epoch": 0.11, "learning_rate": 0.0006367556145791275, "loss": 0.8347, "step": 37 }, { "epoch": 0.11, "learning_rate": 0.0006358645022796795, "loss": 0.8086, "step": 38 }, { "epoch": 0.12, "learning_rate": 0.0006349450450245589, "loss": 0.8726, "step": 39 }, { "epoch": 0.12, "learning_rate": 0.0006339973266508556, "loss": 0.78, "step": 40 }, { "epoch": 0.12, "learning_rate": 0.0006330214335725379, "loss": 0.7902, "step": 41 }, { "epoch": 0.12, "learning_rate": 0.0006320174547725736, "loss": 0.7823, "step": 42 }, { "epoch": 0.13, "learning_rate": 0.0006309854817948169, "loss": 0.8211, "step": 43 }, { "epoch": 0.13, "learning_rate": 0.0006299256087356603, "loss": 0.8656, "step": 44 }, { "epoch": 0.13, "learning_rate": 0.000628837932235456, "loss": 0.7767, "step": 45 }, { "epoch": 0.14, "learning_rate": 0.0006277225514697028, "loss": 0.7542, "step": 46 }, { "epoch": 0.14, "learning_rate": 0.0006265795681400046, "loss": 0.8254, "step": 47 }, { "epoch": 0.14, "learning_rate": 0.0006254090864647957, "loss": 0.8099, "step": 48 }, { "epoch": 0.15, "learning_rate": 0.0006242112131698394, "loss": 0.7786, "step": 49 }, { "epoch": 0.15, "learning_rate": 0.0006229860574784954, "loss": 0.7895, "step": 50 }, { "epoch": 0.15, "learning_rate": 0.0006217337311017619, "loss": 0.8092, "step": 51 }, { "epoch": 0.15, "learning_rate": 0.0006204543482280886, "loss": 0.7998, "step": 52 }, { "epoch": 0.16, "learning_rate": 0.0006191480255129656, "loss": 0.802, "step": 53 }, { "epoch": 0.16, "learning_rate": 0.0006178148820682862, "loss": 0.7691, "step": 54 }, { "epoch": 0.16, "learning_rate": 0.0006164550394514865, "loss": 0.8572, "step": 55 }, { "epoch": 0.17, "learning_rate": 0.0006150686216544614, "loss": 0.7915, "step": 56 }, { "epoch": 0.17, "learning_rate": 0.0006136557550922589, "loss": 0.759, "step": 57 }, { "epoch": 0.17, "learning_rate": 0.0006122165685915537, "loss": 0.7282, "step": 58 }, { "epoch": 0.18, "learning_rate": 0.0006107511933789002, "loss": 0.768, "step": 59 }, { "epoch": 0.18, "learning_rate": 0.0006092597630687677, "loss": 0.7868, "step": 60 }, { "epoch": 0.18, "learning_rate": 0.0006077424136513567, "loss": 0.7827, "step": 61 }, { "epoch": 0.18, "learning_rate": 0.0006061992834801996, "loss": 0.7755, "step": 62 }, { "epoch": 0.19, "learning_rate": 0.0006046305132595453, "loss": 0.8332, "step": 63 }, { "epoch": 0.19, "learning_rate": 0.0006030362460315296, "loss": 0.7241, "step": 64 }, { "epoch": 0.19, "learning_rate": 0.0006014166271631326, "loss": 0.8074, "step": 65 }, { "epoch": 0.2, "learning_rate": 0.000599771804332924, "loss": 0.7491, "step": 66 }, { "epoch": 0.2, "learning_rate": 0.0005981019275175972, "loss": 0.8004, "step": 67 }, { "epoch": 0.2, "learning_rate": 0.000596407148978295, "loss": 0.7442, "step": 68 }, { "epoch": 0.21, "learning_rate": 0.0005946876232467254, "loss": 0.6956, "step": 69 }, { "epoch": 0.21, "learning_rate": 0.0005929435071110721, "loss": 0.7472, "step": 70 }, { "epoch": 0.21, "learning_rate": 0.0005911749596016978, "loss": 0.8097, "step": 71 }, { "epoch": 0.21, "learning_rate": 0.0005893821419766438, "loss": 0.8002, "step": 72 }, { "epoch": 0.22, "learning_rate": 0.0005875652177069265, "loss": 0.8203, "step": 73 }, { "epoch": 0.22, "learning_rate": 0.0005857243524616315, "loss": 0.7339, "step": 74 }, { "epoch": 0.22, "learning_rate": 0.0005838597140928082, "loss": 0.6844, "step": 75 }, { "epoch": 0.23, "learning_rate": 0.0005819714726201646, "loss": 0.7211, "step": 76 }, { "epoch": 0.23, "learning_rate": 0.0005800598002155648, "loss": 0.8289, "step": 77 }, { "epoch": 0.23, "learning_rate": 0.0005781248711873302, "loss": 0.7686, "step": 78 }, { "epoch": 0.23, "learning_rate": 0.0005761668619643458, "loss": 0.7618, "step": 79 }, { "epoch": 0.24, "learning_rate": 0.0005741859510799734, "loss": 0.779, "step": 80 }, { "epoch": 0.24, "learning_rate": 0.0005721823191557725, "loss": 0.7599, "step": 81 }, { "epoch": 0.24, "learning_rate": 0.0005701561488850312, "loss": 0.7495, "step": 82 }, { "epoch": 0.25, "learning_rate": 0.000568107625016108, "loss": 0.6977, "step": 83 }, { "epoch": 0.25, "learning_rate": 0.0005660369343355862, "loss": 0.757, "step": 84 }, { "epoch": 0.25, "learning_rate": 0.0005639442656512426, "loss": 0.8295, "step": 85 }, { "epoch": 0.26, "learning_rate": 0.0005618298097748316, "loss": 0.7394, "step": 86 }, { "epoch": 0.26, "learning_rate": 0.0005596937595046872, "loss": 0.7728, "step": 87 }, { "epoch": 0.26, "learning_rate": 0.0005575363096081429, "loss": 0.7721, "step": 88 }, { "epoch": 0.26, "learning_rate": 0.0005553576568037731, "loss": 0.7191, "step": 89 }, { "epoch": 0.27, "learning_rate": 0.0005531579997434555, "loss": 0.7594, "step": 90 }, { "epoch": 0.27, "learning_rate": 0.0005509375389942588, "loss": 0.7511, "step": 91 }, { "epoch": 0.27, "learning_rate": 0.0005486964770201533, "loss": 0.7644, "step": 92 }, { "epoch": 0.28, "learning_rate": 0.0005464350181635519, "loss": 0.7403, "step": 93 }, { "epoch": 0.28, "learning_rate": 0.000544153368626676, "loss": 0.7204, "step": 94 }, { "epoch": 0.28, "learning_rate": 0.0005418517364527552, "loss": 0.7358, "step": 95 }, { "epoch": 0.29, "learning_rate": 0.0005395303315070571, "loss": 0.7607, "step": 96 }, { "epoch": 0.29, "learning_rate": 0.0005371893654577517, "loss": 0.7505, "step": 97 }, { "epoch": 0.29, "learning_rate": 0.0005348290517566107, "loss": 0.7274, "step": 98 }, { "epoch": 0.29, "learning_rate": 0.0005324496056195461, "loss": 0.8161, "step": 99 }, { "epoch": 0.3, "learning_rate": 0.0005300512440069852, "loss": 0.7043, "step": 100 }, { "epoch": 0.3, "learning_rate": 0.0005276341856040884, "loss": 0.7921, "step": 101 }, { "epoch": 0.3, "learning_rate": 0.0005251986508008097, "loss": 0.6852, "step": 102 }, { "epoch": 0.31, "learning_rate": 0.0005227448616718004, "loss": 0.6996, "step": 103 }, { "epoch": 0.31, "learning_rate": 0.0005202730419561611, "loss": 0.6323, "step": 104 }, { "epoch": 0.31, "learning_rate": 0.0005177834170370404, "loss": 0.7601, "step": 105 }, { "epoch": 0.32, "learning_rate": 0.0005152762139210839, "loss": 0.7499, "step": 106 }, { "epoch": 0.32, "learning_rate": 0.0005127516612177365, "loss": 0.73, "step": 107 }, { "epoch": 0.32, "learning_rate": 0.0005102099891183958, "loss": 0.7438, "step": 108 }, { "epoch": 0.32, "learning_rate": 0.0005076514293754255, "loss": 0.6614, "step": 109 }, { "epoch": 0.33, "learning_rate": 0.0005050762152810218, "loss": 0.7599, "step": 110 }, { "epoch": 0.33, "learning_rate": 0.0005024845816459423, "loss": 0.7471, "step": 111 }, { "epoch": 0.33, "learning_rate": 0.0004998767647780961, "loss": 0.7569, "step": 112 }, { "epoch": 0.34, "learning_rate": 0.0004972530024609966, "loss": 0.6561, "step": 113 }, { "epoch": 0.34, "learning_rate": 0.0004946135339320798, "loss": 0.7369, "step": 114 }, { "epoch": 0.34, "learning_rate": 0.0004919585998608917, "loss": 0.755, "step": 115 }, { "epoch": 0.35, "learning_rate": 0.0004892884423271417, "loss": 0.7307, "step": 116 }, { "epoch": 0.35, "learning_rate": 0.0004866033047986317, "loss": 0.7321, "step": 117 }, { "epoch": 0.35, "learning_rate": 0.00048390343210905486, "loss": 0.772, "step": 118 }, { "epoch": 0.35, "learning_rate": 0.0004811890704356722, "loss": 0.6707, "step": 119 }, { "epoch": 0.36, "learning_rate": 0.0004784604672768657, "loss": 0.695, "step": 120 }, { "epoch": 0.36, "learning_rate": 0.0004757178714295709, "loss": 0.6372, "step": 121 }, { "epoch": 0.36, "learning_rate": 0.0004729615329665918, "loss": 0.7303, "step": 122 }, { "epoch": 0.37, "learning_rate": 0.0004701917032137987, "loss": 0.7313, "step": 123 }, { "epoch": 0.37, "learning_rate": 0.00046740863472721176, "loss": 0.6939, "step": 124 }, { "epoch": 0.37, "learning_rate": 0.0004646125812699734, "loss": 0.711, "step": 125 }, { "epoch": 0.37, "learning_rate": 0.0004618037977892089, "loss": 0.7238, "step": 126 }, { "epoch": 0.38, "learning_rate": 0.00045898254039278106, "loss": 0.7508, "step": 127 }, { "epoch": 0.38, "learning_rate": 0.0004561490663259375, "loss": 0.7817, "step": 128 }, { "epoch": 0.38, "learning_rate": 0.00045330363394785467, "loss": 0.7149, "step": 129 }, { "epoch": 0.39, "learning_rate": 0.0004504465027080806, "loss": 0.7987, "step": 130 }, { "epoch": 0.39, "learning_rate": 0.00044757793312287807, "loss": 0.7047, "step": 131 }, { "epoch": 0.39, "learning_rate": 0.00044469818675147024, "loss": 0.7322, "step": 132 }, { "epoch": 0.4, "learning_rate": 0.0004418075261721916, "loss": 0.71, "step": 133 }, { "epoch": 0.4, "learning_rate": 0.0004389062149585456, "loss": 0.7306, "step": 134 }, { "epoch": 0.4, "learning_rate": 0.0004359945176551721, "loss": 0.6989, "step": 135 }, { "epoch": 0.4, "learning_rate": 0.00043307269975372513, "loss": 0.6898, "step": 136 }, { "epoch": 0.41, "learning_rate": 0.0004301410276686663, "loss": 0.7431, "step": 137 }, { "epoch": 0.41, "learning_rate": 0.00042719976871297155, "loss": 0.7236, "step": 138 }, { "epoch": 0.41, "learning_rate": 0.0004242491910737582, "loss": 0.7704, "step": 139 }, { "epoch": 0.42, "learning_rate": 0.0004212895637878311, "loss": 0.7125, "step": 140 }, { "epoch": 0.42, "learning_rate": 0.00041832115671715107, "loss": 0.7869, "step": 141 }, { "epoch": 0.42, "learning_rate": 0.00041534424052422966, "loss": 0.714, "step": 142 }, { "epoch": 0.43, "learning_rate": 0.00041235908664744866, "loss": 0.6927, "step": 143 }, { "epoch": 0.43, "learning_rate": 0.00040936596727631104, "loss": 0.7168, "step": 144 }, { "epoch": 0.43, "learning_rate": 0.0004063651553266216, "loss": 0.7199, "step": 145 }, { "epoch": 0.43, "learning_rate": 0.00040335692441560304, "loss": 0.7084, "step": 146 }, { "epoch": 0.44, "learning_rate": 0.00040034154883694667, "loss": 0.728, "step": 147 }, { "epoch": 0.44, "learning_rate": 0.00039731930353580216, "loss": 0.7368, "step": 148 }, { "epoch": 0.44, "learning_rate": 0.0003942904640837078, "loss": 0.7298, "step": 149 }, { "epoch": 0.45, "learning_rate": 0.00039125530665346355, "loss": 0.73, "step": 150 }, { "epoch": 0.45, "learning_rate": 0.00038821410799394935, "loss": 0.6635, "step": 151 }, { "epoch": 0.45, "learning_rate": 0.0003851671454048909, "loss": 0.7228, "step": 152 }, { "epoch": 0.46, "learning_rate": 0.00038211469671157496, "loss": 0.7276, "step": 153 }, { "epoch": 0.46, "learning_rate": 0.00037905704023951726, "loss": 0.7386, "step": 154 }, { "epoch": 0.46, "learning_rate": 0.0003759944547890843, "loss": 0.7577, "step": 155 }, { "epoch": 0.46, "learning_rate": 0.0003729272196100721, "loss": 0.6462, "step": 156 }, { "epoch": 0.47, "learning_rate": 0.0003698556143762437, "loss": 0.7328, "step": 157 }, { "epoch": 0.47, "learning_rate": 0.0003667799191598287, "loss": 0.7096, "step": 158 }, { "epoch": 0.47, "learning_rate": 0.00036370041440598517, "loss": 0.6856, "step": 159 }, { "epoch": 0.48, "learning_rate": 0.0003606173809072294, "loss": 0.7114, "step": 160 }, { "epoch": 0.48, "learning_rate": 0.000357531099777832, "loss": 0.694, "step": 161 }, { "epoch": 0.48, "learning_rate": 0.00035444185242818624, "loss": 0.7436, "step": 162 }, { "epoch": 0.48, "learning_rate": 0.0003513499205391482, "loss": 0.727, "step": 163 }, { "epoch": 0.49, "learning_rate": 0.00034825558603635346, "loss": 0.6991, "step": 164 }, { "epoch": 0.49, "learning_rate": 0.0003451591310645103, "loss": 0.6891, "step": 165 }, { "epoch": 0.49, "learning_rate": 0.0003420608379616738, "loss": 0.6636, "step": 166 }, { "epoch": 0.5, "learning_rate": 0.0003389609892335013, "loss": 0.6759, "step": 167 }, { "epoch": 0.5, "learning_rate": 0.0003358598675274942, "loss": 0.7353, "step": 168 }, { "epoch": 0.5, "learning_rate": 0.00033275775560722527, "loss": 0.6926, "step": 169 }, { "epoch": 0.51, "learning_rate": 0.0003296549363265559, "loss": 0.7334, "step": 170 }, { "epoch": 0.51, "learning_rate": 0.0003265516926038455, "loss": 0.6609, "step": 171 }, { "epoch": 0.51, "learning_rate": 0.0003234483073961544, "loss": 0.6996, "step": 172 }, { "epoch": 0.51, "learning_rate": 0.0003203450636734441, "loss": 0.6515, "step": 173 }, { "epoch": 0.52, "learning_rate": 0.00031724224439277476, "loss": 0.7299, "step": 174 }, { "epoch": 0.52, "learning_rate": 0.00031414013247250586, "loss": 0.7283, "step": 175 }, { "epoch": 0.52, "learning_rate": 0.0003110390107664987, "loss": 0.6697, "step": 176 }, { "epoch": 0.53, "learning_rate": 0.00030793916203832625, "loss": 0.6834, "step": 177 }, { "epoch": 0.53, "learning_rate": 0.00030484086893548966, "loss": 0.7315, "step": 178 }, { "epoch": 0.53, "learning_rate": 0.0003017444139636465, "loss": 0.721, "step": 179 }, { "epoch": 0.54, "learning_rate": 0.0002986500794608518, "loss": 0.6572, "step": 180 }, { "epoch": 0.54, "learning_rate": 0.0002955581475718138, "loss": 0.7086, "step": 181 }, { "epoch": 0.54, "learning_rate": 0.000292468900222168, "loss": 0.7413, "step": 182 }, { "epoch": 0.54, "learning_rate": 0.0002893826190927707, "loss": 0.7398, "step": 183 }, { "epoch": 0.55, "learning_rate": 0.0002862995855940148, "loss": 0.6878, "step": 184 }, { "epoch": 0.55, "learning_rate": 0.00028322008084017135, "loss": 0.7316, "step": 185 }, { "epoch": 0.55, "learning_rate": 0.0002801443856237563, "loss": 0.7612, "step": 186 }, { "epoch": 0.56, "learning_rate": 0.000277072780389928, "loss": 0.7012, "step": 187 }, { "epoch": 0.56, "learning_rate": 0.0002740055452109156, "loss": 0.7154, "step": 188 }, { "epoch": 0.56, "learning_rate": 0.0002709429597604827, "loss": 0.707, "step": 189 }, { "epoch": 0.57, "learning_rate": 0.0002678853032884251, "loss": 0.6693, "step": 190 }, { "epoch": 0.57, "learning_rate": 0.0002648328545951092, "loss": 0.7198, "step": 191 }, { "epoch": 0.57, "learning_rate": 0.0002617858920060506, "loss": 0.6859, "step": 192 }, { "epoch": 0.57, "learning_rate": 0.0002587446933465364, "loss": 0.7213, "step": 193 }, { "epoch": 0.58, "learning_rate": 0.00025570953591629226, "loss": 0.7649, "step": 194 }, { "epoch": 0.58, "learning_rate": 0.0002526806964641978, "loss": 0.6622, "step": 195 }, { "epoch": 0.58, "learning_rate": 0.0002496584511630533, "loss": 0.6788, "step": 196 }, { "epoch": 0.59, "learning_rate": 0.000246643075584397, "loss": 0.7441, "step": 197 }, { "epoch": 0.59, "learning_rate": 0.00024363484467337842, "loss": 0.6831, "step": 198 }, { "epoch": 0.59, "learning_rate": 0.0002406340327236891, "loss": 0.721, "step": 199 }, { "epoch": 0.59, "learning_rate": 0.00023764091335255131, "loss": 0.6427, "step": 200 }, { "epoch": 0.6, "learning_rate": 0.00023465575947577034, "loss": 0.6415, "step": 201 }, { "epoch": 0.6, "learning_rate": 0.0002316788432828489, "loss": 0.6217, "step": 202 }, { "epoch": 0.6, "learning_rate": 0.00022871043621216898, "loss": 0.7135, "step": 203 }, { "epoch": 0.61, "learning_rate": 0.0002257508089262417, "loss": 0.6396, "step": 204 }, { "epoch": 0.61, "learning_rate": 0.0002228002312870284, "loss": 0.6426, "step": 205 }, { "epoch": 0.61, "learning_rate": 0.0002198589723313337, "loss": 0.7122, "step": 206 }, { "epoch": 0.62, "learning_rate": 0.00021692730024627484, "loss": 0.6733, "step": 207 }, { "epoch": 0.62, "learning_rate": 0.000214005482344828, "loss": 0.6623, "step": 208 }, { "epoch": 0.62, "learning_rate": 0.00021109378504145427, "loss": 0.6955, "step": 209 }, { "epoch": 0.62, "learning_rate": 0.00020819247382780837, "loss": 0.7119, "step": 210 }, { "epoch": 0.63, "learning_rate": 0.0002053018132485298, "loss": 0.7348, "step": 211 }, { "epoch": 0.63, "learning_rate": 0.00020242206687712196, "loss": 0.6481, "step": 212 }, { "epoch": 0.63, "learning_rate": 0.00019955349729191941, "loss": 0.7042, "step": 213 }, { "epoch": 0.64, "learning_rate": 0.00019669636605214536, "loss": 0.6279, "step": 214 }, { "epoch": 0.64, "learning_rate": 0.00019385093367406254, "loss": 0.7328, "step": 215 }, { "epoch": 0.64, "learning_rate": 0.00019101745960721896, "loss": 0.6726, "step": 216 }, { "epoch": 0.65, "learning_rate": 0.00018819620221079117, "loss": 0.6294, "step": 217 }, { "epoch": 0.65, "learning_rate": 0.00018538741873002672, "loss": 0.7579, "step": 218 }, { "epoch": 0.65, "learning_rate": 0.0001825913652727883, "loss": 0.6927, "step": 219 }, { "epoch": 0.65, "learning_rate": 0.0001798082967862013, "loss": 0.7399, "step": 220 }, { "epoch": 0.66, "learning_rate": 0.00017703846703340817, "loss": 0.7435, "step": 221 }, { "epoch": 0.66, "learning_rate": 0.000174282128570429, "loss": 0.6239, "step": 222 }, { "epoch": 0.66, "learning_rate": 0.0001715395327231343, "loss": 0.6879, "step": 223 }, { "epoch": 0.67, "learning_rate": 0.00016881092956432775, "loss": 0.6886, "step": 224 }, { "epoch": 0.67, "learning_rate": 0.00016609656789094506, "loss": 0.7171, "step": 225 }, { "epoch": 0.67, "learning_rate": 0.00016339669520136827, "loss": 0.673, "step": 226 }, { "epoch": 0.68, "learning_rate": 0.00016071155767285826, "loss": 0.6988, "step": 227 }, { "epoch": 0.68, "learning_rate": 0.0001580414001391084, "loss": 0.7076, "step": 228 }, { "epoch": 0.68, "learning_rate": 0.00015538646606792005, "loss": 0.7, "step": 229 }, { "epoch": 0.68, "learning_rate": 0.00015274699753900343, "loss": 0.7011, "step": 230 }, { "epoch": 0.69, "learning_rate": 0.00015012323522190388, "loss": 0.68, "step": 231 }, { "epoch": 0.69, "learning_rate": 0.00014751541835405772, "loss": 0.6937, "step": 232 }, { "epoch": 0.69, "learning_rate": 0.00014492378471897817, "loss": 0.6418, "step": 233 }, { "epoch": 0.7, "learning_rate": 0.0001423485706245744, "loss": 0.7231, "step": 234 }, { "epoch": 0.7, "learning_rate": 0.00013979001088160417, "loss": 0.6351, "step": 235 }, { "epoch": 0.7, "learning_rate": 0.0001372483387822637, "loss": 0.7035, "step": 236 }, { "epoch": 0.7, "learning_rate": 0.00013472378607891597, "loss": 0.6478, "step": 237 }, { "epoch": 0.71, "learning_rate": 0.00013221658296295956, "loss": 0.7228, "step": 238 }, { "epoch": 0.71, "learning_rate": 0.00012972695804383885, "loss": 0.7148, "step": 239 }, { "epoch": 0.71, "learning_rate": 0.00012725513832819964, "loss": 0.7167, "step": 240 }, { "epoch": 0.72, "learning_rate": 0.00012480134919919038, "loss": 0.7208, "step": 241 }, { "epoch": 0.72, "learning_rate": 0.00012236581439591161, "loss": 0.6406, "step": 242 }, { "epoch": 0.72, "learning_rate": 0.00011994875599301488, "loss": 0.6974, "step": 243 }, { "epoch": 0.73, "learning_rate": 0.00011755039438045392, "loss": 0.6357, "step": 244 }, { "epoch": 0.73, "learning_rate": 0.0001151709482433892, "loss": 0.7179, "step": 245 }, { "epoch": 0.73, "learning_rate": 0.00011281063454224838, "loss": 0.7553, "step": 246 }, { "epoch": 0.73, "learning_rate": 0.00011046966849294289, "loss": 0.7019, "step": 247 }, { "epoch": 0.74, "learning_rate": 0.00010814826354724483, "loss": 0.6608, "step": 248 }, { "epoch": 0.74, "learning_rate": 0.00010584663137332396, "loss": 0.6674, "step": 249 }, { "epoch": 0.74, "learning_rate": 0.00010356498183644816, "loss": 0.7079, "step": 250 } ], "logging_steps": 1, "max_steps": 336, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 3.167057329693655e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }