{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9955156950672646, "global_step": 55500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.993004484304933e-05, "loss": 0.5359, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.985829596412556e-05, "loss": 0.0269, "step": 400 }, { "epoch": 0.01, "learning_rate": 1.9786547085201794e-05, "loss": 0.0198, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.9714798206278026e-05, "loss": 0.0073, "step": 800 }, { "epoch": 0.02, "learning_rate": 1.9643049327354262e-05, "loss": 0.0052, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.9571300448430494e-05, "loss": 0.006, "step": 1200 }, { "epoch": 0.03, "learning_rate": 1.9499551569506727e-05, "loss": 0.0038, "step": 1400 }, { "epoch": 0.03, "learning_rate": 1.9427802690582963e-05, "loss": 0.0026, "step": 1600 }, { "epoch": 0.03, "learning_rate": 1.9356053811659195e-05, "loss": 0.0023, "step": 1800 }, { "epoch": 0.04, "learning_rate": 1.9284304932735428e-05, "loss": 0.0021, "step": 2000 }, { "epoch": 0.04, "learning_rate": 1.921255605381166e-05, "loss": 0.0034, "step": 2200 }, { "epoch": 0.04, "learning_rate": 1.9140807174887893e-05, "loss": 0.003, "step": 2400 }, { "epoch": 0.05, "learning_rate": 1.9069058295964125e-05, "loss": 0.0018, "step": 2600 }, { "epoch": 0.05, "learning_rate": 1.8997309417040358e-05, "loss": 0.0013, "step": 2800 }, { "epoch": 0.05, "learning_rate": 1.8925560538116594e-05, "loss": 0.0032, "step": 3000 }, { "epoch": 0.06, "learning_rate": 1.8854170403587446e-05, "loss": 0.0092, "step": 3200 }, { "epoch": 0.06, "learning_rate": 1.878242152466368e-05, "loss": 0.0012, "step": 3400 }, { "epoch": 0.06, "learning_rate": 1.871067264573991e-05, "loss": 0.0016, "step": 3600 }, { "epoch": 0.07, "learning_rate": 1.8638923766816147e-05, "loss": 0.0027, "step": 3800 }, { "epoch": 0.07, "learning_rate": 1.856717488789238e-05, "loss": 0.002, "step": 4000 }, { "epoch": 0.08, "learning_rate": 1.8495426008968612e-05, "loss": 0.0031, "step": 4200 }, { "epoch": 0.08, "learning_rate": 1.8423677130044845e-05, "loss": 0.0006, "step": 4400 }, { "epoch": 0.08, "learning_rate": 1.8351928251121077e-05, "loss": 0.0009, "step": 4600 }, { "epoch": 0.09, "learning_rate": 1.828017937219731e-05, "loss": 0.0027, "step": 4800 }, { "epoch": 0.09, "learning_rate": 1.8208430493273542e-05, "loss": 0.0009, "step": 5000 }, { "epoch": 0.09, "learning_rate": 1.8136681614349778e-05, "loss": 0.0007, "step": 5200 }, { "epoch": 0.1, "learning_rate": 1.806493273542601e-05, "loss": 0.0011, "step": 5400 }, { "epoch": 0.1, "learning_rate": 1.7993183856502243e-05, "loss": 0.0023, "step": 5600 }, { "epoch": 0.1, "learning_rate": 1.7921434977578475e-05, "loss": 0.0015, "step": 5800 }, { "epoch": 0.11, "learning_rate": 1.784968609865471e-05, "loss": 0.0019, "step": 6000 }, { "epoch": 0.11, "learning_rate": 1.7777937219730944e-05, "loss": 0.0011, "step": 6200 }, { "epoch": 0.11, "learning_rate": 1.7706188340807176e-05, "loss": 0.0015, "step": 6400 }, { "epoch": 0.12, "learning_rate": 1.763443946188341e-05, "loss": 0.0007, "step": 6600 }, { "epoch": 0.12, "learning_rate": 1.756269058295964e-05, "loss": 0.0014, "step": 6800 }, { "epoch": 0.13, "learning_rate": 1.7490941704035874e-05, "loss": 0.0007, "step": 7000 }, { "epoch": 0.13, "learning_rate": 1.741919282511211e-05, "loss": 0.0004, "step": 7200 }, { "epoch": 0.13, "learning_rate": 1.7347443946188342e-05, "loss": 0.0007, "step": 7400 }, { "epoch": 0.14, "learning_rate": 1.7275695067264575e-05, "loss": 0.0009, "step": 7600 }, { "epoch": 0.14, "learning_rate": 1.7203946188340807e-05, "loss": 0.0009, "step": 7800 }, { "epoch": 0.14, "learning_rate": 1.7132197309417043e-05, "loss": 0.0007, "step": 8000 }, { "epoch": 0.15, "learning_rate": 1.7060448430493275e-05, "loss": 0.0004, "step": 8200 }, { "epoch": 0.15, "learning_rate": 1.6988699551569508e-05, "loss": 0.0009, "step": 8400 }, { "epoch": 0.15, "learning_rate": 1.6916950672645744e-05, "loss": 0.0007, "step": 8600 }, { "epoch": 0.16, "learning_rate": 1.6845201793721976e-05, "loss": 0.0007, "step": 8800 }, { "epoch": 0.16, "learning_rate": 1.677345291479821e-05, "loss": 0.0019, "step": 9000 }, { "epoch": 0.17, "learning_rate": 1.670170403587444e-05, "loss": 0.0004, "step": 9200 }, { "epoch": 0.17, "learning_rate": 1.6629955156950674e-05, "loss": 0.0004, "step": 9400 }, { "epoch": 0.17, "learning_rate": 1.6558206278026906e-05, "loss": 0.0002, "step": 9600 }, { "epoch": 0.18, "learning_rate": 1.648645739910314e-05, "loss": 0.0007, "step": 9800 }, { "epoch": 0.18, "learning_rate": 1.6414708520179375e-05, "loss": 0.0005, "step": 10000 }, { "epoch": 0.18, "learning_rate": 1.6342959641255607e-05, "loss": 0.0005, "step": 10200 }, { "epoch": 0.19, "learning_rate": 1.627121076233184e-05, "loss": 0.0007, "step": 10400 }, { "epoch": 0.19, "learning_rate": 1.6199461883408075e-05, "loss": 0.0007, "step": 10600 }, { "epoch": 0.19, "learning_rate": 1.6127713004484308e-05, "loss": 0.0011, "step": 10800 }, { "epoch": 0.2, "learning_rate": 1.605596412556054e-05, "loss": 0.0005, "step": 11000 }, { "epoch": 0.2, "learning_rate": 1.5984215246636773e-05, "loss": 0.0007, "step": 11200 }, { "epoch": 0.2, "learning_rate": 1.5912466367713005e-05, "loss": 0.0004, "step": 11400 }, { "epoch": 0.21, "learning_rate": 1.5840717488789238e-05, "loss": 0.0001, "step": 11600 }, { "epoch": 0.21, "learning_rate": 1.576896860986547e-05, "loss": 0.0002, "step": 11800 }, { "epoch": 0.22, "learning_rate": 1.5697219730941706e-05, "loss": 0.0004, "step": 12000 }, { "epoch": 0.22, "learning_rate": 1.562547085201794e-05, "loss": 0.0003, "step": 12200 }, { "epoch": 0.22, "learning_rate": 1.555372197309417e-05, "loss": 0.0005, "step": 12400 }, { "epoch": 0.23, "learning_rate": 1.5481973094170404e-05, "loss": 0.0001, "step": 12600 }, { "epoch": 0.23, "learning_rate": 1.541022421524664e-05, "loss": 0.0001, "step": 12800 }, { "epoch": 0.23, "learning_rate": 1.5338834080717492e-05, "loss": 0.0006, "step": 13000 }, { "epoch": 0.24, "learning_rate": 1.5267085201793725e-05, "loss": 0.0006, "step": 13200 }, { "epoch": 0.24, "learning_rate": 1.5195336322869956e-05, "loss": 0.0004, "step": 13400 }, { "epoch": 0.24, "learning_rate": 1.512358744394619e-05, "loss": 0.0003, "step": 13600 }, { "epoch": 0.25, "learning_rate": 1.5052197309417043e-05, "loss": 0.0009, "step": 13800 }, { "epoch": 0.25, "learning_rate": 1.4980448430493275e-05, "loss": 0.0002, "step": 14000 }, { "epoch": 0.25, "learning_rate": 1.4908699551569508e-05, "loss": 0.001, "step": 14200 }, { "epoch": 0.26, "learning_rate": 1.483695067264574e-05, "loss": 0.0007, "step": 14400 }, { "epoch": 0.26, "learning_rate": 1.4765201793721974e-05, "loss": 0.0006, "step": 14600 }, { "epoch": 0.27, "learning_rate": 1.4693452914798207e-05, "loss": 0.0004, "step": 14800 }, { "epoch": 0.27, "learning_rate": 1.462170403587444e-05, "loss": 0.0004, "step": 15000 }, { "epoch": 0.27, "learning_rate": 1.4549955156950675e-05, "loss": 0.0004, "step": 15200 }, { "epoch": 0.28, "learning_rate": 1.4478206278026908e-05, "loss": 0.0003, "step": 15400 }, { "epoch": 0.28, "learning_rate": 1.440645739910314e-05, "loss": 0.0002, "step": 15600 }, { "epoch": 0.28, "learning_rate": 1.4334708520179373e-05, "loss": 0.0003, "step": 15800 }, { "epoch": 0.29, "learning_rate": 1.4262959641255607e-05, "loss": 0.0015, "step": 16000 }, { "epoch": 0.29, "learning_rate": 1.419121076233184e-05, "loss": 0.0002, "step": 16200 }, { "epoch": 0.29, "learning_rate": 1.4119461883408072e-05, "loss": 0.0005, "step": 16400 }, { "epoch": 0.3, "learning_rate": 1.4047713004484308e-05, "loss": 0.0001, "step": 16600 }, { "epoch": 0.3, "learning_rate": 1.397596412556054e-05, "loss": 0.0009, "step": 16800 }, { "epoch": 0.3, "learning_rate": 1.3904215246636773e-05, "loss": 0.0001, "step": 17000 }, { "epoch": 0.31, "learning_rate": 1.3832466367713007e-05, "loss": 0.0001, "step": 17200 }, { "epoch": 0.31, "learning_rate": 1.376071748878924e-05, "loss": 0.0001, "step": 17400 }, { "epoch": 0.32, "learning_rate": 1.3688968609865472e-05, "loss": 0.0002, "step": 17600 }, { "epoch": 0.32, "learning_rate": 1.3617219730941704e-05, "loss": 0.0, "step": 17800 }, { "epoch": 0.32, "learning_rate": 1.3545470852017938e-05, "loss": 0.0, "step": 18000 }, { "epoch": 0.33, "learning_rate": 1.347372197309417e-05, "loss": 0.0002, "step": 18200 }, { "epoch": 0.33, "learning_rate": 1.3402331838565024e-05, "loss": 0.0003, "step": 18400 }, { "epoch": 0.33, "learning_rate": 1.3330582959641256e-05, "loss": 0.0004, "step": 18600 }, { "epoch": 0.34, "learning_rate": 1.325883408071749e-05, "loss": 0.0003, "step": 18800 }, { "epoch": 0.34, "learning_rate": 1.3187085201793723e-05, "loss": 0.0003, "step": 19000 }, { "epoch": 0.34, "learning_rate": 1.3115336322869955e-05, "loss": 0.0004, "step": 19200 }, { "epoch": 0.35, "learning_rate": 1.3043587443946188e-05, "loss": 0.0003, "step": 19400 }, { "epoch": 0.35, "learning_rate": 1.2971838565022424e-05, "loss": 0.0002, "step": 19600 }, { "epoch": 0.36, "learning_rate": 1.2900089686098656e-05, "loss": 0.0005, "step": 19800 }, { "epoch": 0.36, "learning_rate": 1.2828340807174889e-05, "loss": 0.0002, "step": 20000 }, { "epoch": 0.36, "learning_rate": 1.2756591928251123e-05, "loss": 0.0025, "step": 20200 }, { "epoch": 0.37, "learning_rate": 1.2684843049327355e-05, "loss": 0.0001, "step": 20400 }, { "epoch": 0.37, "learning_rate": 1.2613094170403588e-05, "loss": 0.0003, "step": 20600 }, { "epoch": 0.37, "learning_rate": 1.254134529147982e-05, "loss": 0.0001, "step": 20800 }, { "epoch": 0.38, "learning_rate": 1.2469596412556056e-05, "loss": 0.0004, "step": 21000 }, { "epoch": 0.38, "learning_rate": 1.2397847533632289e-05, "loss": 0.0001, "step": 21200 }, { "epoch": 0.38, "learning_rate": 1.2326098654708521e-05, "loss": 0.0002, "step": 21400 }, { "epoch": 0.39, "learning_rate": 1.2254349775784755e-05, "loss": 0.0, "step": 21600 }, { "epoch": 0.39, "learning_rate": 1.2182600896860988e-05, "loss": 0.0001, "step": 21800 }, { "epoch": 0.39, "learning_rate": 1.211085201793722e-05, "loss": 0.0002, "step": 22000 }, { "epoch": 0.4, "learning_rate": 1.2039103139013454e-05, "loss": 0.0002, "step": 22200 }, { "epoch": 0.4, "learning_rate": 1.1967354260089687e-05, "loss": 0.0003, "step": 22400 }, { "epoch": 0.41, "learning_rate": 1.189560538116592e-05, "loss": 0.0009, "step": 22600 }, { "epoch": 0.41, "learning_rate": 1.1823856502242152e-05, "loss": 0.0001, "step": 22800 }, { "epoch": 0.41, "learning_rate": 1.1752107623318388e-05, "loss": 0.0001, "step": 23000 }, { "epoch": 0.42, "learning_rate": 1.168035874439462e-05, "loss": 0.0, "step": 23200 }, { "epoch": 0.42, "learning_rate": 1.1608609865470853e-05, "loss": 0.0003, "step": 23400 }, { "epoch": 0.42, "learning_rate": 1.1536860986547087e-05, "loss": 0.0001, "step": 23600 }, { "epoch": 0.43, "learning_rate": 1.146511210762332e-05, "loss": 0.0, "step": 23800 }, { "epoch": 0.43, "learning_rate": 1.1393363228699552e-05, "loss": 0.0001, "step": 24000 }, { "epoch": 0.43, "learning_rate": 1.1321614349775784e-05, "loss": 0.0001, "step": 24200 }, { "epoch": 0.44, "learning_rate": 1.124986547085202e-05, "loss": 0.0001, "step": 24400 }, { "epoch": 0.44, "learning_rate": 1.1178116591928253e-05, "loss": 0.0001, "step": 24600 }, { "epoch": 0.44, "learning_rate": 1.1106367713004485e-05, "loss": 0.0001, "step": 24800 }, { "epoch": 0.45, "learning_rate": 1.103461883408072e-05, "loss": 0.0001, "step": 25000 }, { "epoch": 0.45, "learning_rate": 1.0962869955156952e-05, "loss": 0.0001, "step": 25200 }, { "epoch": 0.46, "learning_rate": 1.0891121076233184e-05, "loss": 0.0003, "step": 25400 }, { "epoch": 0.46, "learning_rate": 1.0819372197309419e-05, "loss": 0.0001, "step": 25600 }, { "epoch": 0.46, "learning_rate": 1.0747623318385651e-05, "loss": 0.0001, "step": 25800 }, { "epoch": 0.47, "learning_rate": 1.0676233183856504e-05, "loss": 0.0005, "step": 26000 }, { "epoch": 0.47, "learning_rate": 1.0604484304932736e-05, "loss": 0.0001, "step": 26200 }, { "epoch": 0.47, "learning_rate": 1.0532735426008969e-05, "loss": 0.0001, "step": 26400 }, { "epoch": 0.48, "learning_rate": 1.0460986547085203e-05, "loss": 0.0001, "step": 26600 }, { "epoch": 0.48, "learning_rate": 1.0389237668161435e-05, "loss": 0.0002, "step": 26800 }, { "epoch": 0.48, "learning_rate": 1.0317488789237668e-05, "loss": 0.0, "step": 27000 }, { "epoch": 0.49, "learning_rate": 1.0245739910313904e-05, "loss": 0.0, "step": 27200 }, { "epoch": 0.49, "learning_rate": 1.0173991031390136e-05, "loss": 0.0001, "step": 27400 }, { "epoch": 0.5, "learning_rate": 1.0102242152466369e-05, "loss": 0.0002, "step": 27600 }, { "epoch": 0.5, "learning_rate": 1.0030493273542601e-05, "loss": 0.0001, "step": 27800 }, { "epoch": 0.5, "learning_rate": 9.958744394618834e-06, "loss": 0.0001, "step": 28000 }, { "epoch": 0.51, "learning_rate": 9.887354260089686e-06, "loss": 0.0003, "step": 28200 }, { "epoch": 0.51, "learning_rate": 9.81560538116592e-06, "loss": 0.0001, "step": 28400 }, { "epoch": 0.51, "learning_rate": 9.743856502242153e-06, "loss": 0.0, "step": 28600 }, { "epoch": 0.52, "learning_rate": 9.672107623318386e-06, "loss": 0.0, "step": 28800 }, { "epoch": 0.52, "learning_rate": 9.60035874439462e-06, "loss": 0.0, "step": 29000 }, { "epoch": 0.52, "learning_rate": 9.528609865470852e-06, "loss": 0.0001, "step": 29200 }, { "epoch": 0.53, "learning_rate": 9.456860986547086e-06, "loss": 0.0, "step": 29400 }, { "epoch": 0.53, "learning_rate": 9.385112107623319e-06, "loss": 0.0001, "step": 29600 }, { "epoch": 0.53, "learning_rate": 9.313363228699553e-06, "loss": 0.0001, "step": 29800 }, { "epoch": 0.54, "learning_rate": 9.241614349775786e-06, "loss": 0.0, "step": 30000 }, { "epoch": 0.54, "learning_rate": 9.169865470852018e-06, "loss": 0.0002, "step": 30200 }, { "epoch": 0.55, "learning_rate": 9.098475336322871e-06, "loss": 0.0003, "step": 30400 }, { "epoch": 0.55, "learning_rate": 9.026726457399105e-06, "loss": 0.0001, "step": 30600 }, { "epoch": 0.55, "learning_rate": 8.954977578475338e-06, "loss": 0.0001, "step": 30800 }, { "epoch": 0.56, "learning_rate": 8.88322869955157e-06, "loss": 0.0001, "step": 31000 }, { "epoch": 0.56, "learning_rate": 8.811479820627803e-06, "loss": 0.0, "step": 31200 }, { "epoch": 0.56, "learning_rate": 8.739730941704037e-06, "loss": 0.0003, "step": 31400 }, { "epoch": 0.57, "learning_rate": 8.667982062780271e-06, "loss": 0.0001, "step": 31600 }, { "epoch": 0.57, "learning_rate": 8.596233183856503e-06, "loss": 0.0003, "step": 31800 }, { "epoch": 0.57, "learning_rate": 8.524484304932736e-06, "loss": 0.0, "step": 32000 }, { "epoch": 0.58, "learning_rate": 8.452735426008968e-06, "loss": 0.0, "step": 32200 }, { "epoch": 0.58, "learning_rate": 8.380986547085203e-06, "loss": 0.0, "step": 32400 }, { "epoch": 0.58, "learning_rate": 8.309237668161437e-06, "loss": 0.0, "step": 32600 }, { "epoch": 0.59, "learning_rate": 8.23748878923767e-06, "loss": 0.0, "step": 32800 }, { "epoch": 0.59, "learning_rate": 8.165739910313902e-06, "loss": 0.0001, "step": 33000 }, { "epoch": 0.6, "learning_rate": 8.093991031390134e-06, "loss": 0.0003, "step": 33200 }, { "epoch": 0.6, "learning_rate": 8.022242152466368e-06, "loss": 0.0, "step": 33400 }, { "epoch": 0.6, "learning_rate": 7.9504932735426e-06, "loss": 0.0, "step": 33600 }, { "epoch": 0.61, "learning_rate": 7.878744394618835e-06, "loss": 0.0001, "step": 33800 }, { "epoch": 0.61, "learning_rate": 7.806995515695068e-06, "loss": 0.0, "step": 34000 }, { "epoch": 0.61, "learning_rate": 7.7352466367713e-06, "loss": 0.0002, "step": 34200 }, { "epoch": 0.62, "learning_rate": 7.663497757847534e-06, "loss": 0.0, "step": 34400 }, { "epoch": 0.62, "learning_rate": 7.591748878923767e-06, "loss": 0.0, "step": 34600 }, { "epoch": 0.62, "learning_rate": 7.520000000000001e-06, "loss": 0.0, "step": 34800 }, { "epoch": 0.63, "learning_rate": 7.448251121076234e-06, "loss": 0.0001, "step": 35000 }, { "epoch": 0.63, "learning_rate": 7.376502242152467e-06, "loss": 0.0, "step": 35200 }, { "epoch": 0.63, "learning_rate": 7.304753363228701e-06, "loss": 0.0, "step": 35400 }, { "epoch": 0.64, "learning_rate": 7.233004484304933e-06, "loss": 0.0, "step": 35600 }, { "epoch": 0.64, "learning_rate": 7.161255605381167e-06, "loss": 0.0, "step": 35800 }, { "epoch": 0.65, "learning_rate": 7.0895067264574e-06, "loss": 0.0, "step": 36000 }, { "epoch": 0.65, "learning_rate": 7.0177578475336325e-06, "loss": 0.0, "step": 36200 }, { "epoch": 0.65, "learning_rate": 6.946008968609867e-06, "loss": 0.0, "step": 36400 }, { "epoch": 0.66, "learning_rate": 6.874260089686099e-06, "loss": 0.0, "step": 36600 }, { "epoch": 0.66, "learning_rate": 6.8025112107623325e-06, "loss": 0.0, "step": 36800 }, { "epoch": 0.66, "learning_rate": 6.730762331838565e-06, "loss": 0.0, "step": 37000 }, { "epoch": 0.67, "learning_rate": 6.659013452914798e-06, "loss": 0.0, "step": 37200 }, { "epoch": 0.67, "learning_rate": 6.5872645739910325e-06, "loss": 0.0, "step": 37400 }, { "epoch": 0.67, "learning_rate": 6.515515695067265e-06, "loss": 0.0, "step": 37600 }, { "epoch": 0.68, "learning_rate": 6.443766816143498e-06, "loss": 0.0, "step": 37800 }, { "epoch": 0.68, "learning_rate": 6.372017937219731e-06, "loss": 0.0, "step": 38000 }, { "epoch": 0.69, "learning_rate": 6.300269058295965e-06, "loss": 0.0, "step": 38200 }, { "epoch": 0.69, "learning_rate": 6.2292376681614354e-06, "loss": 0.0, "step": 38400 }, { "epoch": 0.69, "learning_rate": 6.157488789237669e-06, "loss": 0.0, "step": 38600 }, { "epoch": 0.7, "learning_rate": 6.085739910313901e-06, "loss": 0.0, "step": 38800 }, { "epoch": 0.7, "learning_rate": 6.0139910313901354e-06, "loss": 0.0, "step": 39000 }, { "epoch": 0.7, "learning_rate": 5.942242152466369e-06, "loss": 0.0, "step": 39200 }, { "epoch": 0.71, "learning_rate": 5.870493273542601e-06, "loss": 0.0, "step": 39400 }, { "epoch": 0.71, "learning_rate": 5.798744394618835e-06, "loss": 0.0, "step": 39600 }, { "epoch": 0.71, "learning_rate": 5.726995515695067e-06, "loss": 0.0, "step": 39800 }, { "epoch": 0.72, "learning_rate": 5.655246636771301e-06, "loss": 0.0, "step": 40000 }, { "epoch": 0.72, "learning_rate": 5.583497757847534e-06, "loss": 0.0, "step": 40200 }, { "epoch": 0.72, "learning_rate": 5.511748878923767e-06, "loss": 0.0, "step": 40400 }, { "epoch": 0.73, "learning_rate": 5.440358744394619e-06, "loss": 0.0, "step": 40600 }, { "epoch": 0.73, "learning_rate": 5.368609865470853e-06, "loss": 0.0, "step": 40800 }, { "epoch": 0.74, "learning_rate": 5.296860986547086e-06, "loss": 0.0, "step": 41000 }, { "epoch": 0.74, "learning_rate": 5.225112107623319e-06, "loss": 0.0, "step": 41200 }, { "epoch": 0.74, "learning_rate": 5.1533632286995515e-06, "loss": 0.0, "step": 41400 }, { "epoch": 0.75, "learning_rate": 5.081973094170403e-06, "loss": 0.0, "step": 41600 }, { "epoch": 0.75, "learning_rate": 5.0102242152466375e-06, "loss": 0.0002, "step": 41800 }, { "epoch": 0.75, "learning_rate": 4.938834080717489e-06, "loss": 0.0001, "step": 42000 }, { "epoch": 0.76, "learning_rate": 4.867085201793723e-06, "loss": 0.0, "step": 42200 }, { "epoch": 0.76, "learning_rate": 4.795336322869955e-06, "loss": 0.0, "step": 42400 }, { "epoch": 0.76, "learning_rate": 4.723587443946189e-06, "loss": 0.0, "step": 42600 }, { "epoch": 0.77, "learning_rate": 4.651838565022422e-06, "loss": 0.0, "step": 42800 }, { "epoch": 0.77, "learning_rate": 4.580089686098655e-06, "loss": 0.0, "step": 43000 }, { "epoch": 0.77, "learning_rate": 4.508340807174889e-06, "loss": 0.0001, "step": 43200 }, { "epoch": 0.78, "learning_rate": 4.436591928251122e-06, "loss": 0.0001, "step": 43400 }, { "epoch": 0.78, "learning_rate": 4.364843049327354e-06, "loss": 0.0, "step": 43600 }, { "epoch": 0.79, "learning_rate": 4.293094170403588e-06, "loss": 0.0, "step": 43800 }, { "epoch": 0.79, "learning_rate": 4.221345291479821e-06, "loss": 0.0, "step": 44000 }, { "epoch": 0.79, "learning_rate": 4.1495964125560536e-06, "loss": 0.0, "step": 44200 }, { "epoch": 0.8, "learning_rate": 4.077847533632288e-06, "loss": 0.0, "step": 44400 }, { "epoch": 0.8, "learning_rate": 4.00609865470852e-06, "loss": 0.0, "step": 44600 }, { "epoch": 0.8, "learning_rate": 3.934349775784754e-06, "loss": 0.0, "step": 44800 }, { "epoch": 0.81, "learning_rate": 3.862600896860987e-06, "loss": 0.0, "step": 45000 }, { "epoch": 0.81, "learning_rate": 3.79085201793722e-06, "loss": 0.0, "step": 45200 }, { "epoch": 0.81, "learning_rate": 3.719103139013453e-06, "loss": 0.0, "step": 45400 }, { "epoch": 0.82, "learning_rate": 3.6473542600896865e-06, "loss": 0.0, "step": 45600 }, { "epoch": 0.82, "learning_rate": 3.57560538116592e-06, "loss": 0.0002, "step": 45800 }, { "epoch": 0.83, "learning_rate": 3.5038565022421527e-06, "loss": 0.0, "step": 46000 }, { "epoch": 0.83, "learning_rate": 3.432107623318386e-06, "loss": 0.0001, "step": 46200 }, { "epoch": 0.83, "learning_rate": 3.360717488789238e-06, "loss": 0.0001, "step": 46400 }, { "epoch": 0.84, "learning_rate": 3.288968609865471e-06, "loss": 0.0, "step": 46600 }, { "epoch": 0.84, "learning_rate": 3.217219730941704e-06, "loss": 0.0, "step": 46800 }, { "epoch": 0.84, "learning_rate": 3.145470852017937e-06, "loss": 0.0001, "step": 47000 }, { "epoch": 0.85, "learning_rate": 3.073721973094171e-06, "loss": 0.0, "step": 47200 }, { "epoch": 0.85, "learning_rate": 3.001973094170404e-06, "loss": 0.0, "step": 47400 }, { "epoch": 0.85, "learning_rate": 2.930224215246637e-06, "loss": 0.0, "step": 47600 }, { "epoch": 0.86, "learning_rate": 2.85847533632287e-06, "loss": 0.0, "step": 47800 }, { "epoch": 0.86, "learning_rate": 2.7867264573991034e-06, "loss": 0.0, "step": 48000 }, { "epoch": 0.86, "learning_rate": 2.7149775784753363e-06, "loss": 0.0, "step": 48200 }, { "epoch": 0.87, "learning_rate": 2.64322869955157e-06, "loss": 0.0, "step": 48400 }, { "epoch": 0.87, "learning_rate": 2.571479820627803e-06, "loss": 0.0, "step": 48600 }, { "epoch": 0.88, "learning_rate": 2.499730941704036e-06, "loss": 0.0, "step": 48800 }, { "epoch": 0.88, "learning_rate": 2.427982062780269e-06, "loss": 0.0001, "step": 49000 }, { "epoch": 0.88, "learning_rate": 2.3562331838565025e-06, "loss": 0.0, "step": 49200 }, { "epoch": 0.89, "learning_rate": 2.2844843049327355e-06, "loss": 0.0001, "step": 49400 }, { "epoch": 0.89, "learning_rate": 2.2127354260089688e-06, "loss": 0.0, "step": 49600 }, { "epoch": 0.89, "learning_rate": 2.140986547085202e-06, "loss": 0.0, "step": 49800 }, { "epoch": 0.9, "learning_rate": 2.069237668161435e-06, "loss": 0.0, "step": 50000 }, { "epoch": 0.9, "learning_rate": 1.9974887892376684e-06, "loss": 0.0, "step": 50200 }, { "epoch": 0.9, "learning_rate": 1.9257399103139017e-06, "loss": 0.0, "step": 50400 }, { "epoch": 0.91, "learning_rate": 1.8543497757847534e-06, "loss": 0.0001, "step": 50600 }, { "epoch": 0.91, "learning_rate": 1.7826008968609867e-06, "loss": 0.0, "step": 50800 }, { "epoch": 0.91, "learning_rate": 1.7108520179372198e-06, "loss": 0.0, "step": 51000 }, { "epoch": 0.92, "learning_rate": 1.639103139013453e-06, "loss": 0.0, "step": 51200 }, { "epoch": 0.92, "learning_rate": 1.5673542600896863e-06, "loss": 0.0, "step": 51400 }, { "epoch": 0.93, "learning_rate": 1.4956053811659194e-06, "loss": 0.0, "step": 51600 }, { "epoch": 0.93, "learning_rate": 1.4238565022421528e-06, "loss": 0.0, "step": 51800 }, { "epoch": 0.93, "learning_rate": 1.3521076233183859e-06, "loss": 0.0, "step": 52000 }, { "epoch": 0.94, "learning_rate": 1.2803587443946188e-06, "loss": 0.0, "step": 52200 }, { "epoch": 0.94, "learning_rate": 1.2089686098654709e-06, "loss": 0.0, "step": 52400 }, { "epoch": 0.94, "learning_rate": 1.137219730941704e-06, "loss": 0.0, "step": 52600 }, { "epoch": 0.95, "learning_rate": 1.0654708520179373e-06, "loss": 0.0, "step": 52800 }, { "epoch": 0.95, "learning_rate": 9.937219730941705e-07, "loss": 0.0, "step": 53000 }, { "epoch": 0.95, "learning_rate": 9.219730941704037e-07, "loss": 0.0, "step": 53200 }, { "epoch": 0.96, "learning_rate": 8.502242152466368e-07, "loss": 0.0, "step": 53400 }, { "epoch": 0.96, "learning_rate": 7.7847533632287e-07, "loss": 0.0, "step": 53600 }, { "epoch": 0.97, "learning_rate": 7.067264573991033e-07, "loss": 0.0, "step": 53800 }, { "epoch": 0.97, "learning_rate": 6.349775784753363e-07, "loss": 0.0, "step": 54000 }, { "epoch": 0.97, "learning_rate": 5.632286995515695e-07, "loss": 0.0, "step": 54200 }, { "epoch": 0.98, "learning_rate": 4.914798206278028e-07, "loss": 0.0, "step": 54400 }, { "epoch": 0.98, "learning_rate": 4.1973094170403593e-07, "loss": 0.0, "step": 54600 }, { "epoch": 0.98, "learning_rate": 3.4798206278026905e-07, "loss": 0.0, "step": 54800 }, { "epoch": 0.99, "learning_rate": 2.762331838565023e-07, "loss": 0.0, "step": 55000 }, { "epoch": 0.99, "learning_rate": 2.0448430493273546e-07, "loss": 0.0, "step": 55200 }, { "epoch": 0.99, "learning_rate": 1.327354260089686e-07, "loss": 0.0, "step": 55400 } ], "max_steps": 55750, "num_train_epochs": 1, "total_flos": 6.788108648448e+16, "trial_name": null, "trial_params": null }