{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 367, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.2173, "step": 1 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 3.2031, "step": 2 }, { "epoch": 0.01, "learning_rate": 3.0000000000000004e-05, "loss": 3.1896, "step": 3 }, { "epoch": 0.01, "learning_rate": 4e-05, "loss": 3.5399, "step": 4 }, { "epoch": 0.01, "learning_rate": 3.999925099660787e-05, "loss": 3.249, "step": 5 }, { "epoch": 0.02, "learning_rate": 3.999700404253208e-05, "loss": 4.346, "step": 6 }, { "epoch": 0.02, "learning_rate": 3.9993259306070256e-05, "loss": 4.1291, "step": 7 }, { "epoch": 0.02, "learning_rate": 3.998801706770442e-05, "loss": 3.7064, "step": 8 }, { "epoch": 0.02, "learning_rate": 3.9981277720080015e-05, "loss": 3.5643, "step": 9 }, { "epoch": 0.03, "learning_rate": 3.9973041767976466e-05, "loss": 3.5031, "step": 10 }, { "epoch": 0.03, "learning_rate": 3.996330982826937e-05, "loss": 3.4195, "step": 11 }, { "epoch": 0.03, "learning_rate": 3.995208262988431e-05, "loss": 3.3598, "step": 12 }, { "epoch": 0.04, "learning_rate": 3.9939361013742275e-05, "loss": 3.3177, "step": 13 }, { "epoch": 0.04, "learning_rate": 3.99251459326966e-05, "loss": 3.2443, "step": 14 }, { "epoch": 0.04, "learning_rate": 3.9909438451461695e-05, "loss": 3.2215, "step": 15 }, { "epoch": 0.04, "learning_rate": 3.989223974653323e-05, "loss": 3.149, "step": 16 }, { "epoch": 0.05, "learning_rate": 3.9873551106100035e-05, "loss": 3.1167, "step": 17 }, { "epoch": 0.05, "learning_rate": 3.985337392994763e-05, "loss": 3.0538, "step": 18 }, { "epoch": 0.05, "learning_rate": 3.983170972935333e-05, "loss": 3.029, "step": 19 }, { "epoch": 0.05, "learning_rate": 3.9808560126973126e-05, "loss": 3.0055, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.9783926856720085e-05, "loss": 2.9598, "step": 21 }, { "epoch": 0.06, "learning_rate": 3.975781176363451e-05, "loss": 2.9572, "step": 22 }, { "epoch": 0.06, "learning_rate": 3.973021680374571e-05, "loss": 2.9004, "step": 23 }, { "epoch": 0.07, "learning_rate": 3.9701144043925576e-05, "loss": 2.8984, "step": 24 }, { "epoch": 0.07, "learning_rate": 3.9670595661733654e-05, "loss": 2.874, "step": 25 }, { "epoch": 0.07, "learning_rate": 3.963857394525413e-05, "loss": 2.8689, "step": 26 }, { "epoch": 0.07, "learning_rate": 3.960508129292446e-05, "loss": 2.8885, "step": 27 }, { "epoch": 0.08, "learning_rate": 3.9570120213355636e-05, "loss": 2.8331, "step": 28 }, { "epoch": 0.08, "learning_rate": 3.953369332514438e-05, "loss": 2.8093, "step": 29 }, { "epoch": 0.08, "learning_rate": 3.949580335667699e-05, "loss": 2.7919, "step": 30 }, { "epoch": 0.08, "learning_rate": 3.945645314592495e-05, "loss": 2.7876, "step": 31 }, { "epoch": 0.09, "learning_rate": 3.9415645640232386e-05, "loss": 2.791, "step": 32 }, { "epoch": 0.09, "learning_rate": 3.937338389609533e-05, "loss": 2.7751, "step": 33 }, { "epoch": 0.09, "learning_rate": 3.932967107893274e-05, "loss": 2.7673, "step": 34 }, { "epoch": 0.1, "learning_rate": 3.928451046284946e-05, "loss": 2.74, "step": 35 }, { "epoch": 0.1, "learning_rate": 3.923790543039095e-05, "loss": 2.7354, "step": 36 }, { "epoch": 0.1, "learning_rate": 3.9189859472289956e-05, "loss": 2.699, "step": 37 }, { "epoch": 0.1, "learning_rate": 3.9140376187205025e-05, "loss": 2.7079, "step": 38 }, { "epoch": 0.11, "learning_rate": 3.9089459281451e-05, "loss": 2.6673, "step": 39 }, { "epoch": 0.11, "learning_rate": 3.903711256872139e-05, "loss": 2.6733, "step": 40 }, { "epoch": 0.11, "learning_rate": 3.898333996980275e-05, "loss": 2.659, "step": 41 }, { "epoch": 0.11, "learning_rate": 3.8928145512280973e-05, "loss": 2.6657, "step": 42 }, { "epoch": 0.12, "learning_rate": 3.8871533330239646e-05, "loss": 2.6348, "step": 43 }, { "epoch": 0.12, "learning_rate": 3.8813507663950404e-05, "loss": 2.6497, "step": 44 }, { "epoch": 0.12, "learning_rate": 3.8754072859555346e-05, "loss": 2.5951, "step": 45 }, { "epoch": 0.13, "learning_rate": 3.869323336874146e-05, "loss": 2.6479, "step": 46 }, { "epoch": 0.13, "learning_rate": 3.8630993748407274e-05, "loss": 2.6018, "step": 47 }, { "epoch": 0.13, "learning_rate": 3.856735866032145e-05, "loss": 2.5817, "step": 48 }, { "epoch": 0.13, "learning_rate": 3.8502332870773675e-05, "loss": 2.5903, "step": 49 }, { "epoch": 0.14, "learning_rate": 3.843592125021764e-05, "loss": 2.5757, "step": 50 }, { "epoch": 0.14, "learning_rate": 3.8368128772906254e-05, "loss": 2.5705, "step": 51 }, { "epoch": 0.14, "learning_rate": 3.829896051651907e-05, "loss": 2.5914, "step": 52 }, { "epoch": 0.14, "learning_rate": 3.822842166178194e-05, "loss": 2.5694, "step": 53 }, { "epoch": 0.15, "learning_rate": 3.815651749207902e-05, "loss": 2.5826, "step": 54 }, { "epoch": 0.15, "learning_rate": 3.8083253393057006e-05, "loss": 2.5435, "step": 55 }, { "epoch": 0.15, "learning_rate": 3.8008634852221777e-05, "loss": 2.5286, "step": 56 }, { "epoch": 0.16, "learning_rate": 3.793266745852735e-05, "loss": 2.5152, "step": 57 }, { "epoch": 0.16, "learning_rate": 3.785535690195728e-05, "loss": 2.4879, "step": 58 }, { "epoch": 0.16, "learning_rate": 3.7776708973098476e-05, "loss": 2.5058, "step": 59 }, { "epoch": 0.16, "learning_rate": 3.769672956270749e-05, "loss": 2.5437, "step": 60 }, { "epoch": 0.17, "learning_rate": 3.761542466126929e-05, "loss": 2.4668, "step": 61 }, { "epoch": 0.17, "learning_rate": 3.753280035854857e-05, "loss": 2.501, "step": 62 }, { "epoch": 0.17, "learning_rate": 3.7448862843133644e-05, "loss": 2.4697, "step": 63 }, { "epoch": 0.17, "learning_rate": 3.736361840197288e-05, "loss": 2.4651, "step": 64 }, { "epoch": 0.18, "learning_rate": 3.727707341990383e-05, "loss": 2.4394, "step": 65 }, { "epoch": 0.18, "learning_rate": 3.718923437917503e-05, "loss": 2.4239, "step": 66 }, { "epoch": 0.18, "learning_rate": 3.7100107858960404e-05, "loss": 2.4541, "step": 67 }, { "epoch": 0.19, "learning_rate": 3.7009700534866557e-05, "loss": 2.445, "step": 68 }, { "epoch": 0.19, "learning_rate": 3.691801917843273e-05, "loss": 2.422, "step": 69 }, { "epoch": 0.19, "learning_rate": 3.6825070656623626e-05, "loss": 2.4274, "step": 70 }, { "epoch": 0.19, "learning_rate": 3.6730861931315054e-05, "loss": 2.4062, "step": 71 }, { "epoch": 0.2, "learning_rate": 3.663540005877249e-05, "loss": 2.4, "step": 72 }, { "epoch": 0.2, "learning_rate": 3.653869218912258e-05, "loss": 2.4229, "step": 73 }, { "epoch": 0.2, "learning_rate": 3.6440745565817556e-05, "loss": 2.3909, "step": 74 }, { "epoch": 0.2, "learning_rate": 3.6341567525092727e-05, "loss": 2.3607, "step": 75 }, { "epoch": 0.21, "learning_rate": 3.6241165495417006e-05, "loss": 2.399, "step": 76 }, { "epoch": 0.21, "learning_rate": 3.613954699693645e-05, "loss": 2.3902, "step": 77 }, { "epoch": 0.21, "learning_rate": 3.603671964091107e-05, "loss": 2.3692, "step": 78 }, { "epoch": 0.22, "learning_rate": 3.593269112914472e-05, "loss": 2.3436, "step": 79 }, { "epoch": 0.22, "learning_rate": 3.582746925340822e-05, "loss": 2.3629, "step": 80 }, { "epoch": 0.22, "learning_rate": 3.5721061894855756e-05, "loss": 2.3287, "step": 81 }, { "epoch": 0.22, "learning_rate": 3.561347702343456e-05, "loss": 2.3376, "step": 82 }, { "epoch": 0.23, "learning_rate": 3.5504722697288025e-05, "loss": 2.3418, "step": 83 }, { "epoch": 0.23, "learning_rate": 3.539480706215204e-05, "loss": 2.3182, "step": 84 }, { "epoch": 0.23, "learning_rate": 3.5283738350744986e-05, "loss": 2.3287, "step": 85 }, { "epoch": 0.23, "learning_rate": 3.517152488215101e-05, "loss": 2.3222, "step": 86 }, { "epoch": 0.24, "learning_rate": 3.505817506119698e-05, "loss": 2.3393, "step": 87 }, { "epoch": 0.24, "learning_rate": 3.494369737782293e-05, "loss": 2.322, "step": 88 }, { "epoch": 0.24, "learning_rate": 3.4828100406446184e-05, "loss": 2.2907, "step": 89 }, { "epoch": 0.25, "learning_rate": 3.47113928053191e-05, "loss": 2.3132, "step": 90 }, { "epoch": 0.25, "learning_rate": 3.45935833158806e-05, "loss": 2.2972, "step": 91 }, { "epoch": 0.25, "learning_rate": 3.44746807621014e-05, "loss": 2.2643, "step": 92 }, { "epoch": 0.25, "learning_rate": 3.4354694049823124e-05, "loss": 2.2527, "step": 93 }, { "epoch": 0.26, "learning_rate": 3.4233632166091205e-05, "loss": 2.2746, "step": 94 }, { "epoch": 0.26, "learning_rate": 3.4111504178481813e-05, "loss": 2.2479, "step": 95 }, { "epoch": 0.26, "learning_rate": 3.3988319234422636e-05, "loss": 2.2954, "step": 96 }, { "epoch": 0.26, "learning_rate": 3.3864086560507785e-05, "loss": 2.2455, "step": 97 }, { "epoch": 0.27, "learning_rate": 3.373881546180666e-05, "loss": 2.2442, "step": 98 }, { "epoch": 0.27, "learning_rate": 3.361251532116707e-05, "loss": 2.2035, "step": 99 }, { "epoch": 0.27, "learning_rate": 3.3485195598512365e-05, "loss": 2.2234, "step": 100 }, { "epoch": 0.28, "learning_rate": 3.3356865830132976e-05, "loss": 2.2226, "step": 101 }, { "epoch": 0.28, "learning_rate": 3.322753562797209e-05, "loss": 2.2167, "step": 102 }, { "epoch": 0.28, "learning_rate": 3.309721467890571e-05, "loss": 2.2481, "step": 103 }, { "epoch": 0.28, "learning_rate": 3.296591274401712e-05, "loss": 2.1965, "step": 104 }, { "epoch": 0.29, "learning_rate": 3.28336396578658e-05, "loss": 2.1935, "step": 105 }, { "epoch": 0.29, "learning_rate": 3.270040532775077e-05, "loss": 2.196, "step": 106 }, { "epoch": 0.29, "learning_rate": 3.256621973296854e-05, "loss": 2.1733, "step": 107 }, { "epoch": 0.29, "learning_rate": 3.243109292406568e-05, "loss": 2.1735, "step": 108 }, { "epoch": 0.3, "learning_rate": 3.229503502208602e-05, "loss": 2.1915, "step": 109 }, { "epoch": 0.3, "learning_rate": 3.215805621781256e-05, "loss": 2.1795, "step": 110 }, { "epoch": 0.3, "learning_rate": 3.202016677100422e-05, "loss": 2.2021, "step": 111 }, { "epoch": 0.31, "learning_rate": 3.188137700962733e-05, "loss": 2.201, "step": 112 }, { "epoch": 0.31, "learning_rate": 3.174169732908209e-05, "loss": 2.1857, "step": 113 }, { "epoch": 0.31, "learning_rate": 3.1601138191423966e-05, "loss": 2.1818, "step": 114 }, { "epoch": 0.31, "learning_rate": 3.145971012458005e-05, "loss": 2.1438, "step": 115 }, { "epoch": 0.32, "learning_rate": 3.13174237215605e-05, "loss": 2.1359, "step": 116 }, { "epoch": 0.32, "learning_rate": 3.11742896396652e-05, "loss": 2.1668, "step": 117 }, { "epoch": 0.32, "learning_rate": 3.103031859968542e-05, "loss": 2.1769, "step": 118 }, { "epoch": 0.32, "learning_rate": 3.0885521385100885e-05, "loss": 2.1445, "step": 119 }, { "epoch": 0.33, "learning_rate": 3.0739908841272095e-05, "loss": 2.1193, "step": 120 }, { "epoch": 0.33, "learning_rate": 3.059349187462798e-05, "loss": 2.1285, "step": 121 }, { "epoch": 0.33, "learning_rate": 3.044628145184899e-05, "loss": 2.1411, "step": 122 }, { "epoch": 0.34, "learning_rate": 3.0298288599045747e-05, "loss": 2.1321, "step": 123 }, { "epoch": 0.34, "learning_rate": 3.0149524400933114e-05, "loss": 2.1008, "step": 124 }, { "epoch": 0.34, "learning_rate": 3.0000000000000004e-05, "loss": 2.1437, "step": 125 }, { "epoch": 0.34, "learning_rate": 2.9849726595674756e-05, "loss": 2.1224, "step": 126 }, { "epoch": 0.35, "learning_rate": 2.9698715443486338e-05, "loss": 2.083, "step": 127 }, { "epoch": 0.35, "learning_rate": 2.9546977854221266e-05, "loss": 2.1156, "step": 128 }, { "epoch": 0.35, "learning_rate": 2.9394525193076454e-05, "loss": 2.1127, "step": 129 }, { "epoch": 0.35, "learning_rate": 2.9241368878807925e-05, "loss": 2.0949, "step": 130 }, { "epoch": 0.36, "learning_rate": 2.908752038287558e-05, "loss": 2.0821, "step": 131 }, { "epoch": 0.36, "learning_rate": 2.8932991228583954e-05, "loss": 2.0735, "step": 132 }, { "epoch": 0.36, "learning_rate": 2.877779299021912e-05, "loss": 2.0996, "step": 133 }, { "epoch": 0.37, "learning_rate": 2.8621937292181768e-05, "loss": 2.0967, "step": 134 }, { "epoch": 0.37, "learning_rate": 2.846543580811656e-05, "loss": 2.0313, "step": 135 }, { "epoch": 0.37, "learning_rate": 2.8308300260037734e-05, "loss": 2.0891, "step": 136 }, { "epoch": 0.37, "learning_rate": 2.8150542417451144e-05, "loss": 2.0816, "step": 137 }, { "epoch": 0.38, "learning_rate": 2.7992174096472714e-05, "loss": 2.0765, "step": 138 }, { "epoch": 0.38, "learning_rate": 2.783320715894341e-05, "loss": 2.026, "step": 139 }, { "epoch": 0.38, "learning_rate": 2.767365351154077e-05, "loss": 2.0424, "step": 140 }, { "epoch": 0.38, "learning_rate": 2.751352510488711e-05, "loss": 2.074, "step": 141 }, { "epoch": 0.39, "learning_rate": 2.7352833932654402e-05, "loss": 2.0189, "step": 142 }, { "epoch": 0.39, "learning_rate": 2.719159203066597e-05, "loss": 2.0283, "step": 143 }, { "epoch": 0.39, "learning_rate": 2.702981147599495e-05, "loss": 2.0373, "step": 144 }, { "epoch": 0.4, "learning_rate": 2.6867504386059776e-05, "loss": 2.0141, "step": 145 }, { "epoch": 0.4, "learning_rate": 2.6704682917716528e-05, "loss": 2.0197, "step": 146 }, { "epoch": 0.4, "learning_rate": 2.6541359266348437e-05, "loss": 2.0168, "step": 147 }, { "epoch": 0.4, "learning_rate": 2.637754566495238e-05, "loss": 2.0032, "step": 148 }, { "epoch": 0.41, "learning_rate": 2.6213254383222665e-05, "loss": 2.0038, "step": 149 }, { "epoch": 0.41, "learning_rate": 2.6048497726632023e-05, "loss": 1.9901, "step": 150 }, { "epoch": 0.41, "learning_rate": 2.588328803550993e-05, "loss": 1.9917, "step": 151 }, { "epoch": 0.41, "learning_rate": 2.571763768411829e-05, "loss": 1.9718, "step": 152 }, { "epoch": 0.42, "learning_rate": 2.555155907972461e-05, "loss": 2.0155, "step": 153 }, { "epoch": 0.42, "learning_rate": 2.5385064661672692e-05, "loss": 2.0072, "step": 154 }, { "epoch": 0.42, "learning_rate": 2.5218166900450937e-05, "loss": 1.9935, "step": 155 }, { "epoch": 0.43, "learning_rate": 2.5050878296758255e-05, "loss": 1.9879, "step": 156 }, { "epoch": 0.43, "learning_rate": 2.488321138056783e-05, "loss": 2.0028, "step": 157 }, { "epoch": 0.43, "learning_rate": 2.471517871018855e-05, "loss": 1.9669, "step": 158 }, { "epoch": 0.43, "learning_rate": 2.4546792871324424e-05, "loss": 1.9854, "step": 159 }, { "epoch": 0.44, "learning_rate": 2.43780664761319e-05, "loss": 1.9613, "step": 160 }, { "epoch": 0.44, "learning_rate": 2.4209012162275217e-05, "loss": 1.9474, "step": 161 }, { "epoch": 0.44, "learning_rate": 2.4039642591979825e-05, "loss": 1.9424, "step": 162 }, { "epoch": 0.44, "learning_rate": 2.3869970451083996e-05, "loss": 1.9656, "step": 163 }, { "epoch": 0.45, "learning_rate": 2.370000844808863e-05, "loss": 1.9686, "step": 164 }, { "epoch": 0.45, "learning_rate": 2.3529769313205423e-05, "loss": 1.9655, "step": 165 }, { "epoch": 0.45, "learning_rate": 2.3359265797403297e-05, "loss": 1.97, "step": 166 }, { "epoch": 0.46, "learning_rate": 2.318851067145345e-05, "loss": 1.9435, "step": 167 }, { "epoch": 0.46, "learning_rate": 2.3017516724972716e-05, "loss": 1.972, "step": 168 }, { "epoch": 0.46, "learning_rate": 2.2846296765465708e-05, "loss": 1.9789, "step": 169 }, { "epoch": 0.46, "learning_rate": 2.267486361736546e-05, "loss": 1.9466, "step": 170 }, { "epoch": 0.47, "learning_rate": 2.250323012107292e-05, "loss": 1.9231, "step": 171 }, { "epoch": 0.47, "learning_rate": 2.2331409131995186e-05, "loss": 1.9672, "step": 172 }, { "epoch": 0.47, "learning_rate": 2.2159413519582623e-05, "loss": 1.9265, "step": 173 }, { "epoch": 0.47, "learning_rate": 2.1987256166364937e-05, "loss": 1.911, "step": 174 }, { "epoch": 0.48, "learning_rate": 2.1814949966986288e-05, "loss": 1.9236, "step": 175 }, { "epoch": 0.48, "learning_rate": 2.1642507827239455e-05, "loss": 1.9543, "step": 176 }, { "epoch": 0.48, "learning_rate": 2.1469942663099208e-05, "loss": 1.9296, "step": 177 }, { "epoch": 0.49, "learning_rate": 2.129726739975486e-05, "loss": 1.9292, "step": 178 }, { "epoch": 0.49, "learning_rate": 2.112449497064223e-05, "loss": 1.9132, "step": 179 }, { "epoch": 0.49, "learning_rate": 2.095163831647485e-05, "loss": 1.9087, "step": 180 }, { "epoch": 0.49, "learning_rate": 2.0778710384274757e-05, "loss": 1.9137, "step": 181 }, { "epoch": 0.5, "learning_rate": 2.0605724126402734e-05, "loss": 1.8797, "step": 182 }, { "epoch": 0.5, "learning_rate": 2.0432692499588174e-05, "loss": 1.8937, "step": 183 }, { "epoch": 0.5, "learning_rate": 2.025962846395862e-05, "loss": 1.9124, "step": 184 }, { "epoch": 0.5, "learning_rate": 2.0086544982069046e-05, "loss": 1.8763, "step": 185 }, { "epoch": 0.51, "learning_rate": 1.9913455017930964e-05, "loss": 1.8699, "step": 186 }, { "epoch": 0.51, "learning_rate": 1.9740371536041388e-05, "loss": 1.8841, "step": 187 }, { "epoch": 0.51, "learning_rate": 1.9567307500411833e-05, "loss": 1.9116, "step": 188 }, { "epoch": 0.51, "learning_rate": 1.939427587359727e-05, "loss": 1.8997, "step": 189 }, { "epoch": 0.52, "learning_rate": 1.922128961572525e-05, "loss": 1.8716, "step": 190 }, { "epoch": 0.52, "learning_rate": 1.9048361683525155e-05, "loss": 1.8651, "step": 191 }, { "epoch": 0.52, "learning_rate": 1.8875505029357775e-05, "loss": 1.8827, "step": 192 }, { "epoch": 0.53, "learning_rate": 1.8702732600245138e-05, "loss": 1.871, "step": 193 }, { "epoch": 0.53, "learning_rate": 1.8530057336900805e-05, "loss": 1.8788, "step": 194 }, { "epoch": 0.53, "learning_rate": 1.835749217276055e-05, "loss": 1.8641, "step": 195 }, { "epoch": 0.53, "learning_rate": 1.8185050033013715e-05, "loss": 1.8856, "step": 196 }, { "epoch": 0.54, "learning_rate": 1.8012743833635067e-05, "loss": 1.8524, "step": 197 }, { "epoch": 0.54, "learning_rate": 1.7840586480417387e-05, "loss": 1.8142, "step": 198 }, { "epoch": 0.54, "learning_rate": 1.766859086800482e-05, "loss": 1.8361, "step": 199 }, { "epoch": 0.54, "learning_rate": 1.7496769878927085e-05, "loss": 1.8197, "step": 200 }, { "epoch": 0.55, "learning_rate": 1.7325136382634547e-05, "loss": 1.8723, "step": 201 }, { "epoch": 0.55, "learning_rate": 1.7153703234534302e-05, "loss": 1.8088, "step": 202 }, { "epoch": 0.55, "learning_rate": 1.6982483275027287e-05, "loss": 1.8264, "step": 203 }, { "epoch": 0.56, "learning_rate": 1.6811489328546557e-05, "loss": 1.7955, "step": 204 }, { "epoch": 0.56, "learning_rate": 1.6640734202596702e-05, "loss": 1.8337, "step": 205 }, { "epoch": 0.56, "learning_rate": 1.647023068679459e-05, "loss": 1.8571, "step": 206 }, { "epoch": 0.56, "learning_rate": 1.6299991551911373e-05, "loss": 1.8662, "step": 207 }, { "epoch": 0.57, "learning_rate": 1.6130029548916007e-05, "loss": 1.8593, "step": 208 }, { "epoch": 0.57, "learning_rate": 1.5960357408020178e-05, "loss": 1.8387, "step": 209 }, { "epoch": 0.57, "learning_rate": 1.579098783772479e-05, "loss": 1.8395, "step": 210 }, { "epoch": 0.57, "learning_rate": 1.5621933523868106e-05, "loss": 1.8271, "step": 211 }, { "epoch": 0.58, "learning_rate": 1.5453207128675583e-05, "loss": 1.8738, "step": 212 }, { "epoch": 0.58, "learning_rate": 1.5284821289811453e-05, "loss": 1.8598, "step": 213 }, { "epoch": 0.58, "learning_rate": 1.5116788619432177e-05, "loss": 1.832, "step": 214 }, { "epoch": 0.59, "learning_rate": 1.4949121703241747e-05, "loss": 1.8176, "step": 215 }, { "epoch": 0.59, "learning_rate": 1.4781833099549072e-05, "loss": 1.8302, "step": 216 }, { "epoch": 0.59, "learning_rate": 1.461493533832731e-05, "loss": 1.848, "step": 217 }, { "epoch": 0.59, "learning_rate": 1.4448440920275402e-05, "loss": 1.7919, "step": 218 }, { "epoch": 0.6, "learning_rate": 1.4282362315881719e-05, "loss": 1.8084, "step": 219 }, { "epoch": 0.6, "learning_rate": 1.4116711964490076e-05, "loss": 1.7979, "step": 220 }, { "epoch": 0.6, "learning_rate": 1.395150227336798e-05, "loss": 1.8167, "step": 221 }, { "epoch": 0.6, "learning_rate": 1.3786745616777348e-05, "loss": 1.8015, "step": 222 }, { "epoch": 0.61, "learning_rate": 1.3622454335047631e-05, "loss": 1.8359, "step": 223 }, { "epoch": 0.61, "learning_rate": 1.345864073365157e-05, "loss": 1.8111, "step": 224 }, { "epoch": 0.61, "learning_rate": 1.329531708228347e-05, "loss": 1.814, "step": 225 }, { "epoch": 0.62, "learning_rate": 1.3132495613940237e-05, "loss": 1.7876, "step": 226 }, { "epoch": 0.62, "learning_rate": 1.2970188524005058e-05, "loss": 1.7877, "step": 227 }, { "epoch": 0.62, "learning_rate": 1.2808407969334037e-05, "loss": 1.7683, "step": 228 }, { "epoch": 0.62, "learning_rate": 1.2647166067345598e-05, "loss": 1.8017, "step": 229 }, { "epoch": 0.63, "learning_rate": 1.24864748951129e-05, "loss": 1.7989, "step": 230 }, { "epoch": 0.63, "learning_rate": 1.2326346488459237e-05, "loss": 1.752, "step": 231 }, { "epoch": 0.63, "learning_rate": 1.2166792841056596e-05, "loss": 1.7771, "step": 232 }, { "epoch": 0.63, "learning_rate": 1.2007825903527287e-05, "loss": 1.7739, "step": 233 }, { "epoch": 0.64, "learning_rate": 1.1849457582548864e-05, "loss": 1.7821, "step": 234 }, { "epoch": 0.64, "learning_rate": 1.1691699739962275e-05, "loss": 1.7745, "step": 235 }, { "epoch": 0.64, "learning_rate": 1.153456419188345e-05, "loss": 1.7759, "step": 236 }, { "epoch": 0.65, "learning_rate": 1.137806270781824e-05, "loss": 1.795, "step": 237 }, { "epoch": 0.65, "learning_rate": 1.1222207009780888e-05, "loss": 1.7348, "step": 238 }, { "epoch": 0.65, "learning_rate": 1.1067008771416047e-05, "loss": 1.7385, "step": 239 }, { "epoch": 0.65, "learning_rate": 1.091247961712442e-05, "loss": 1.798, "step": 240 }, { "epoch": 0.66, "learning_rate": 1.0758631121192075e-05, "loss": 1.7615, "step": 241 }, { "epoch": 0.66, "learning_rate": 1.0605474806923556e-05, "loss": 1.7768, "step": 242 }, { "epoch": 0.66, "learning_rate": 1.0453022145778742e-05, "loss": 1.735, "step": 243 }, { "epoch": 0.66, "learning_rate": 1.0301284556513669e-05, "loss": 1.7635, "step": 244 }, { "epoch": 0.67, "learning_rate": 1.0150273404325244e-05, "loss": 1.7395, "step": 245 }, { "epoch": 0.67, "learning_rate": 1.0000000000000006e-05, "loss": 1.7238, "step": 246 }, { "epoch": 0.67, "learning_rate": 9.85047559906689e-06, "loss": 1.7633, "step": 247 }, { "epoch": 0.68, "learning_rate": 9.70171140095426e-06, "loss": 1.7585, "step": 248 }, { "epoch": 0.68, "learning_rate": 9.553718548151011e-06, "loss": 1.7379, "step": 249 }, { "epoch": 0.68, "learning_rate": 9.406508125372034e-06, "loss": 1.7427, "step": 250 }, { "epoch": 0.68, "learning_rate": 9.260091158727913e-06, "loss": 1.7205, "step": 251 }, { "epoch": 0.69, "learning_rate": 9.114478614899123e-06, "loss": 1.7576, "step": 252 }, { "epoch": 0.69, "learning_rate": 8.969681400314589e-06, "loss": 1.7559, "step": 253 }, { "epoch": 0.69, "learning_rate": 8.825710360334812e-06, "loss": 1.7465, "step": 254 }, { "epoch": 0.69, "learning_rate": 8.682576278439504e-06, "loss": 1.7571, "step": 255 }, { "epoch": 0.7, "learning_rate": 8.540289875419962e-06, "loss": 1.7239, "step": 256 }, { "epoch": 0.7, "learning_rate": 8.39886180857604e-06, "loss": 1.737, "step": 257 }, { "epoch": 0.7, "learning_rate": 8.258302670917915e-06, "loss": 1.7449, "step": 258 }, { "epoch": 0.71, "learning_rate": 8.118622990372676e-06, "loss": 1.7399, "step": 259 }, { "epoch": 0.71, "learning_rate": 7.979833228995782e-06, "loss": 1.7411, "step": 260 }, { "epoch": 0.71, "learning_rate": 7.841943782187435e-06, "loss": 1.7333, "step": 261 }, { "epoch": 0.71, "learning_rate": 7.704964977913984e-06, "loss": 1.7641, "step": 262 }, { "epoch": 0.72, "learning_rate": 7.568907075934322e-06, "loss": 1.7311, "step": 263 }, { "epoch": 0.72, "learning_rate": 7.433780267031463e-06, "loss": 1.7022, "step": 264 }, { "epoch": 0.72, "learning_rate": 7.299594672249231e-06, "loss": 1.7178, "step": 265 }, { "epoch": 0.72, "learning_rate": 7.166360342134202e-06, "loss": 1.7497, "step": 266 }, { "epoch": 0.73, "learning_rate": 7.034087255982882e-06, "loss": 1.7196, "step": 267 }, { "epoch": 0.73, "learning_rate": 6.902785321094301e-06, "loss": 1.707, "step": 268 }, { "epoch": 0.73, "learning_rate": 6.7724643720279156e-06, "loss": 1.7525, "step": 269 }, { "epoch": 0.74, "learning_rate": 6.643134169867031e-06, "loss": 1.6995, "step": 270 }, { "epoch": 0.74, "learning_rate": 6.514804401487642e-06, "loss": 1.7229, "step": 271 }, { "epoch": 0.74, "learning_rate": 6.38748467883294e-06, "loss": 1.7021, "step": 272 }, { "epoch": 0.74, "learning_rate": 6.261184538193341e-06, "loss": 1.6753, "step": 273 }, { "epoch": 0.75, "learning_rate": 6.135913439492227e-06, "loss": 1.7111, "step": 274 }, { "epoch": 0.75, "learning_rate": 6.01168076557737e-06, "loss": 1.7046, "step": 275 }, { "epoch": 0.75, "learning_rate": 5.888495821518194e-06, "loss": 1.7362, "step": 276 }, { "epoch": 0.75, "learning_rate": 5.7663678339087995e-06, "loss": 1.7811, "step": 277 }, { "epoch": 0.76, "learning_rate": 5.6453059501768806e-06, "loss": 1.6819, "step": 278 }, { "epoch": 0.76, "learning_rate": 5.5253192378985966e-06, "loss": 1.7154, "step": 279 }, { "epoch": 0.76, "learning_rate": 5.4064166841194e-06, "loss": 1.7069, "step": 280 }, { "epoch": 0.77, "learning_rate": 5.288607194680899e-06, "loss": 1.715, "step": 281 }, { "epoch": 0.77, "learning_rate": 5.171899593553824e-06, "loss": 1.7173, "step": 282 }, { "epoch": 0.77, "learning_rate": 5.056302622177074e-06, "loss": 1.6873, "step": 283 }, { "epoch": 0.77, "learning_rate": 4.941824938803024e-06, "loss": 1.7291, "step": 284 }, { "epoch": 0.78, "learning_rate": 4.828475117848992e-06, "loss": 1.6928, "step": 285 }, { "epoch": 0.78, "learning_rate": 4.716261649255021e-06, "loss": 1.6815, "step": 286 }, { "epoch": 0.78, "learning_rate": 4.605192937847962e-06, "loss": 1.6701, "step": 287 }, { "epoch": 0.78, "learning_rate": 4.495277302711982e-06, "loss": 1.7119, "step": 288 }, { "epoch": 0.79, "learning_rate": 4.386522976565439e-06, "loss": 1.6813, "step": 289 }, { "epoch": 0.79, "learning_rate": 4.278938105144255e-06, "loss": 1.6945, "step": 290 }, { "epoch": 0.79, "learning_rate": 4.172530746591783e-06, "loss": 1.7221, "step": 291 }, { "epoch": 0.8, "learning_rate": 4.06730887085528e-06, "loss": 1.7254, "step": 292 }, { "epoch": 0.8, "learning_rate": 3.963280359088933e-06, "loss": 1.6873, "step": 293 }, { "epoch": 0.8, "learning_rate": 3.86045300306356e-06, "loss": 1.6773, "step": 294 }, { "epoch": 0.8, "learning_rate": 3.7588345045830044e-06, "loss": 1.7121, "step": 295 }, { "epoch": 0.81, "learning_rate": 3.658432474907274e-06, "loss": 1.6798, "step": 296 }, { "epoch": 0.81, "learning_rate": 3.559254434182451e-06, "loss": 1.6762, "step": 297 }, { "epoch": 0.81, "learning_rate": 3.461307810877428e-06, "loss": 1.6934, "step": 298 }, { "epoch": 0.81, "learning_rate": 3.364599941227513e-06, "loss": 1.6862, "step": 299 }, { "epoch": 0.82, "learning_rate": 3.2691380686849517e-06, "loss": 1.6708, "step": 300 }, { "epoch": 0.82, "learning_rate": 3.174929343376374e-06, "loss": 1.6764, "step": 301 }, { "epoch": 0.82, "learning_rate": 3.081980821567272e-06, "loss": 1.6886, "step": 302 }, { "epoch": 0.83, "learning_rate": 2.990299465133446e-06, "loss": 1.6679, "step": 303 }, { "epoch": 0.83, "learning_rate": 2.8998921410396e-06, "loss": 1.6854, "step": 304 }, { "epoch": 0.83, "learning_rate": 2.8107656208249733e-06, "loss": 1.681, "step": 305 }, { "epoch": 0.83, "learning_rate": 2.72292658009617e-06, "loss": 1.6987, "step": 306 }, { "epoch": 0.84, "learning_rate": 2.6363815980271248e-06, "loss": 1.6574, "step": 307 }, { "epoch": 0.84, "learning_rate": 2.551137156866357e-06, "loss": 1.7037, "step": 308 }, { "epoch": 0.84, "learning_rate": 2.4671996414514276e-06, "loss": 1.702, "step": 309 }, { "epoch": 0.84, "learning_rate": 2.384575338730717e-06, "loss": 1.6788, "step": 310 }, { "epoch": 0.85, "learning_rate": 2.3032704372925176e-06, "loss": 1.6624, "step": 311 }, { "epoch": 0.85, "learning_rate": 2.223291026901533e-06, "loss": 1.693, "step": 312 }, { "epoch": 0.85, "learning_rate": 2.144643098042727e-06, "loss": 1.6687, "step": 313 }, { "epoch": 0.86, "learning_rate": 2.0673325414726574e-06, "loss": 1.6914, "step": 314 }, { "epoch": 0.86, "learning_rate": 1.991365147778228e-06, "loss": 1.6708, "step": 315 }, { "epoch": 0.86, "learning_rate": 1.9167466069429964e-06, "loss": 1.6752, "step": 316 }, { "epoch": 0.86, "learning_rate": 1.8434825079209884e-06, "loss": 1.6893, "step": 317 }, { "epoch": 0.87, "learning_rate": 1.7715783382180672e-06, "loss": 1.6681, "step": 318 }, { "epoch": 0.87, "learning_rate": 1.7010394834809373e-06, "loss": 1.6982, "step": 319 }, { "epoch": 0.87, "learning_rate": 1.6318712270937442e-06, "loss": 1.6421, "step": 320 }, { "epoch": 0.87, "learning_rate": 1.5640787497823585e-06, "loss": 1.7007, "step": 321 }, { "epoch": 0.88, "learning_rate": 1.4976671292263257e-06, "loss": 1.6832, "step": 322 }, { "epoch": 0.88, "learning_rate": 1.4326413396785488e-06, "loss": 1.6584, "step": 323 }, { "epoch": 0.88, "learning_rate": 1.3690062515927239e-06, "loss": 1.7009, "step": 324 }, { "epoch": 0.89, "learning_rate": 1.306766631258536e-06, "loss": 1.6755, "step": 325 }, { "epoch": 0.89, "learning_rate": 1.245927140444665e-06, "loss": 1.686, "step": 326 }, { "epoch": 0.89, "learning_rate": 1.1864923360496028e-06, "loss": 1.6916, "step": 327 }, { "epoch": 0.89, "learning_rate": 1.128466669760362e-06, "loss": 1.6923, "step": 328 }, { "epoch": 0.9, "learning_rate": 1.0718544877190306e-06, "loss": 1.7079, "step": 329 }, { "epoch": 0.9, "learning_rate": 1.0166600301972517e-06, "loss": 1.6865, "step": 330 }, { "epoch": 0.9, "learning_rate": 9.628874312786096e-07, "loss": 1.6624, "step": 331 }, { "epoch": 0.9, "learning_rate": 9.105407185490067e-07, "loss": 1.6489, "step": 332 }, { "epoch": 0.91, "learning_rate": 8.59623812794983e-07, "loss": 1.6923, "step": 333 }, { "epoch": 0.91, "learning_rate": 8.101405277100549e-07, "loss": 1.678, "step": 334 }, { "epoch": 0.91, "learning_rate": 7.620945696090532e-07, "loss": 1.6302, "step": 335 }, { "epoch": 0.92, "learning_rate": 7.154895371505421e-07, "loss": 1.6555, "step": 336 }, { "epoch": 0.92, "learning_rate": 6.703289210672603e-07, "loss": 1.6805, "step": 337 }, { "epoch": 0.92, "learning_rate": 6.266161039046737e-07, "loss": 1.6744, "step": 338 }, { "epoch": 0.92, "learning_rate": 5.843543597676138e-07, "loss": 1.6585, "step": 339 }, { "epoch": 0.93, "learning_rate": 5.435468540750544e-07, "loss": 1.6951, "step": 340 }, { "epoch": 0.93, "learning_rate": 5.041966433230094e-07, "loss": 1.6666, "step": 341 }, { "epoch": 0.93, "learning_rate": 4.6630667485561885e-07, "loss": 1.6757, "step": 342 }, { "epoch": 0.93, "learning_rate": 4.2987978664436936e-07, "loss": 1.6719, "step": 343 }, { "epoch": 0.94, "learning_rate": 3.9491870707554445e-07, "loss": 1.6575, "step": 344 }, { "epoch": 0.94, "learning_rate": 3.614260547458659e-07, "loss": 1.6398, "step": 345 }, { "epoch": 0.94, "learning_rate": 3.2940433826635257e-07, "loss": 1.6523, "step": 346 }, { "epoch": 0.95, "learning_rate": 2.9885595607443086e-07, "loss": 1.6521, "step": 347 }, { "epoch": 0.95, "learning_rate": 2.697831962542874e-07, "loss": 1.6598, "step": 348 }, { "epoch": 0.95, "learning_rate": 2.4218823636549703e-07, "loss": 1.6721, "step": 349 }, { "epoch": 0.95, "learning_rate": 2.1607314327991791e-07, "loss": 1.6975, "step": 350 }, { "epoch": 0.96, "learning_rate": 1.9143987302687738e-07, "loss": 1.6652, "step": 351 }, { "epoch": 0.96, "learning_rate": 1.682902706466738e-07, "loss": 1.6554, "step": 352 }, { "epoch": 0.96, "learning_rate": 1.4662607005237805e-07, "loss": 1.6603, "step": 353 }, { "epoch": 0.96, "learning_rate": 1.264488938999664e-07, "loss": 1.6843, "step": 354 }, { "epoch": 0.97, "learning_rate": 1.0776025346677321e-07, "loss": 1.69, "step": 355 }, { "epoch": 0.97, "learning_rate": 9.056154853830823e-08, "loss": 1.6755, "step": 356 }, { "epoch": 0.97, "learning_rate": 7.485406730340483e-08, "loss": 1.6577, "step": 357 }, { "epoch": 0.98, "learning_rate": 6.06389862577328e-08, "loss": 1.7059, "step": 358 }, { "epoch": 0.98, "learning_rate": 4.7917370115688756e-08, "loss": 1.7009, "step": 359 }, { "epoch": 0.98, "learning_rate": 3.66901717306356e-08, "loss": 1.6863, "step": 360 }, { "epoch": 0.98, "learning_rate": 2.6958232023539532e-08, "loss": 1.6557, "step": 361 }, { "epoch": 0.99, "learning_rate": 1.8722279919987098e-08, "loss": 1.6717, "step": 362 }, { "epoch": 0.99, "learning_rate": 1.1982932295582227e-08, "loss": 1.6401, "step": 363 }, { "epoch": 0.99, "learning_rate": 6.7406939297520734e-09, "loss": 1.6846, "step": 364 }, { "epoch": 0.99, "learning_rate": 2.995957467923916e-09, "loss": 1.6775, "step": 365 }, { "epoch": 1.0, "learning_rate": 7.490033921331296e-10, "loss": 1.675, "step": 366 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.6419, "step": 367 }, { "epoch": 1.0, "step": 367, "total_flos": 3.839475069608788e+18, "train_loss": 2.069092570598509, "train_runtime": 4018.2223, "train_samples_per_second": 174.946, "train_steps_per_second": 0.091 } ], "max_steps": 367, "num_train_epochs": 1, "total_flos": 3.839475069608788e+18, "trial_name": null, "trial_params": null }