salma-elshafey's picture
First epoch - no testing yet
c073c39
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9955156950672646,
"global_step": 55500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.993004484304933e-05,
"loss": 0.5359,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.985829596412556e-05,
"loss": 0.0269,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 1.9786547085201794e-05,
"loss": 0.0198,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 1.9714798206278026e-05,
"loss": 0.0073,
"step": 800
},
{
"epoch": 0.02,
"learning_rate": 1.9643049327354262e-05,
"loss": 0.0052,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.9571300448430494e-05,
"loss": 0.006,
"step": 1200
},
{
"epoch": 0.03,
"learning_rate": 1.9499551569506727e-05,
"loss": 0.0038,
"step": 1400
},
{
"epoch": 0.03,
"learning_rate": 1.9427802690582963e-05,
"loss": 0.0026,
"step": 1600
},
{
"epoch": 0.03,
"learning_rate": 1.9356053811659195e-05,
"loss": 0.0023,
"step": 1800
},
{
"epoch": 0.04,
"learning_rate": 1.9284304932735428e-05,
"loss": 0.0021,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 1.921255605381166e-05,
"loss": 0.0034,
"step": 2200
},
{
"epoch": 0.04,
"learning_rate": 1.9140807174887893e-05,
"loss": 0.003,
"step": 2400
},
{
"epoch": 0.05,
"learning_rate": 1.9069058295964125e-05,
"loss": 0.0018,
"step": 2600
},
{
"epoch": 0.05,
"learning_rate": 1.8997309417040358e-05,
"loss": 0.0013,
"step": 2800
},
{
"epoch": 0.05,
"learning_rate": 1.8925560538116594e-05,
"loss": 0.0032,
"step": 3000
},
{
"epoch": 0.06,
"learning_rate": 1.8854170403587446e-05,
"loss": 0.0092,
"step": 3200
},
{
"epoch": 0.06,
"learning_rate": 1.878242152466368e-05,
"loss": 0.0012,
"step": 3400
},
{
"epoch": 0.06,
"learning_rate": 1.871067264573991e-05,
"loss": 0.0016,
"step": 3600
},
{
"epoch": 0.07,
"learning_rate": 1.8638923766816147e-05,
"loss": 0.0027,
"step": 3800
},
{
"epoch": 0.07,
"learning_rate": 1.856717488789238e-05,
"loss": 0.002,
"step": 4000
},
{
"epoch": 0.08,
"learning_rate": 1.8495426008968612e-05,
"loss": 0.0031,
"step": 4200
},
{
"epoch": 0.08,
"learning_rate": 1.8423677130044845e-05,
"loss": 0.0006,
"step": 4400
},
{
"epoch": 0.08,
"learning_rate": 1.8351928251121077e-05,
"loss": 0.0009,
"step": 4600
},
{
"epoch": 0.09,
"learning_rate": 1.828017937219731e-05,
"loss": 0.0027,
"step": 4800
},
{
"epoch": 0.09,
"learning_rate": 1.8208430493273542e-05,
"loss": 0.0009,
"step": 5000
},
{
"epoch": 0.09,
"learning_rate": 1.8136681614349778e-05,
"loss": 0.0007,
"step": 5200
},
{
"epoch": 0.1,
"learning_rate": 1.806493273542601e-05,
"loss": 0.0011,
"step": 5400
},
{
"epoch": 0.1,
"learning_rate": 1.7993183856502243e-05,
"loss": 0.0023,
"step": 5600
},
{
"epoch": 0.1,
"learning_rate": 1.7921434977578475e-05,
"loss": 0.0015,
"step": 5800
},
{
"epoch": 0.11,
"learning_rate": 1.784968609865471e-05,
"loss": 0.0019,
"step": 6000
},
{
"epoch": 0.11,
"learning_rate": 1.7777937219730944e-05,
"loss": 0.0011,
"step": 6200
},
{
"epoch": 0.11,
"learning_rate": 1.7706188340807176e-05,
"loss": 0.0015,
"step": 6400
},
{
"epoch": 0.12,
"learning_rate": 1.763443946188341e-05,
"loss": 0.0007,
"step": 6600
},
{
"epoch": 0.12,
"learning_rate": 1.756269058295964e-05,
"loss": 0.0014,
"step": 6800
},
{
"epoch": 0.13,
"learning_rate": 1.7490941704035874e-05,
"loss": 0.0007,
"step": 7000
},
{
"epoch": 0.13,
"learning_rate": 1.741919282511211e-05,
"loss": 0.0004,
"step": 7200
},
{
"epoch": 0.13,
"learning_rate": 1.7347443946188342e-05,
"loss": 0.0007,
"step": 7400
},
{
"epoch": 0.14,
"learning_rate": 1.7275695067264575e-05,
"loss": 0.0009,
"step": 7600
},
{
"epoch": 0.14,
"learning_rate": 1.7203946188340807e-05,
"loss": 0.0009,
"step": 7800
},
{
"epoch": 0.14,
"learning_rate": 1.7132197309417043e-05,
"loss": 0.0007,
"step": 8000
},
{
"epoch": 0.15,
"learning_rate": 1.7060448430493275e-05,
"loss": 0.0004,
"step": 8200
},
{
"epoch": 0.15,
"learning_rate": 1.6988699551569508e-05,
"loss": 0.0009,
"step": 8400
},
{
"epoch": 0.15,
"learning_rate": 1.6916950672645744e-05,
"loss": 0.0007,
"step": 8600
},
{
"epoch": 0.16,
"learning_rate": 1.6845201793721976e-05,
"loss": 0.0007,
"step": 8800
},
{
"epoch": 0.16,
"learning_rate": 1.677345291479821e-05,
"loss": 0.0019,
"step": 9000
},
{
"epoch": 0.17,
"learning_rate": 1.670170403587444e-05,
"loss": 0.0004,
"step": 9200
},
{
"epoch": 0.17,
"learning_rate": 1.6629955156950674e-05,
"loss": 0.0004,
"step": 9400
},
{
"epoch": 0.17,
"learning_rate": 1.6558206278026906e-05,
"loss": 0.0002,
"step": 9600
},
{
"epoch": 0.18,
"learning_rate": 1.648645739910314e-05,
"loss": 0.0007,
"step": 9800
},
{
"epoch": 0.18,
"learning_rate": 1.6414708520179375e-05,
"loss": 0.0005,
"step": 10000
},
{
"epoch": 0.18,
"learning_rate": 1.6342959641255607e-05,
"loss": 0.0005,
"step": 10200
},
{
"epoch": 0.19,
"learning_rate": 1.627121076233184e-05,
"loss": 0.0007,
"step": 10400
},
{
"epoch": 0.19,
"learning_rate": 1.6199461883408075e-05,
"loss": 0.0007,
"step": 10600
},
{
"epoch": 0.19,
"learning_rate": 1.6127713004484308e-05,
"loss": 0.0011,
"step": 10800
},
{
"epoch": 0.2,
"learning_rate": 1.605596412556054e-05,
"loss": 0.0005,
"step": 11000
},
{
"epoch": 0.2,
"learning_rate": 1.5984215246636773e-05,
"loss": 0.0007,
"step": 11200
},
{
"epoch": 0.2,
"learning_rate": 1.5912466367713005e-05,
"loss": 0.0004,
"step": 11400
},
{
"epoch": 0.21,
"learning_rate": 1.5840717488789238e-05,
"loss": 0.0001,
"step": 11600
},
{
"epoch": 0.21,
"learning_rate": 1.576896860986547e-05,
"loss": 0.0002,
"step": 11800
},
{
"epoch": 0.22,
"learning_rate": 1.5697219730941706e-05,
"loss": 0.0004,
"step": 12000
},
{
"epoch": 0.22,
"learning_rate": 1.562547085201794e-05,
"loss": 0.0003,
"step": 12200
},
{
"epoch": 0.22,
"learning_rate": 1.555372197309417e-05,
"loss": 0.0005,
"step": 12400
},
{
"epoch": 0.23,
"learning_rate": 1.5481973094170404e-05,
"loss": 0.0001,
"step": 12600
},
{
"epoch": 0.23,
"learning_rate": 1.541022421524664e-05,
"loss": 0.0001,
"step": 12800
},
{
"epoch": 0.23,
"learning_rate": 1.5338834080717492e-05,
"loss": 0.0006,
"step": 13000
},
{
"epoch": 0.24,
"learning_rate": 1.5267085201793725e-05,
"loss": 0.0006,
"step": 13200
},
{
"epoch": 0.24,
"learning_rate": 1.5195336322869956e-05,
"loss": 0.0004,
"step": 13400
},
{
"epoch": 0.24,
"learning_rate": 1.512358744394619e-05,
"loss": 0.0003,
"step": 13600
},
{
"epoch": 0.25,
"learning_rate": 1.5052197309417043e-05,
"loss": 0.0009,
"step": 13800
},
{
"epoch": 0.25,
"learning_rate": 1.4980448430493275e-05,
"loss": 0.0002,
"step": 14000
},
{
"epoch": 0.25,
"learning_rate": 1.4908699551569508e-05,
"loss": 0.001,
"step": 14200
},
{
"epoch": 0.26,
"learning_rate": 1.483695067264574e-05,
"loss": 0.0007,
"step": 14400
},
{
"epoch": 0.26,
"learning_rate": 1.4765201793721974e-05,
"loss": 0.0006,
"step": 14600
},
{
"epoch": 0.27,
"learning_rate": 1.4693452914798207e-05,
"loss": 0.0004,
"step": 14800
},
{
"epoch": 0.27,
"learning_rate": 1.462170403587444e-05,
"loss": 0.0004,
"step": 15000
},
{
"epoch": 0.27,
"learning_rate": 1.4549955156950675e-05,
"loss": 0.0004,
"step": 15200
},
{
"epoch": 0.28,
"learning_rate": 1.4478206278026908e-05,
"loss": 0.0003,
"step": 15400
},
{
"epoch": 0.28,
"learning_rate": 1.440645739910314e-05,
"loss": 0.0002,
"step": 15600
},
{
"epoch": 0.28,
"learning_rate": 1.4334708520179373e-05,
"loss": 0.0003,
"step": 15800
},
{
"epoch": 0.29,
"learning_rate": 1.4262959641255607e-05,
"loss": 0.0015,
"step": 16000
},
{
"epoch": 0.29,
"learning_rate": 1.419121076233184e-05,
"loss": 0.0002,
"step": 16200
},
{
"epoch": 0.29,
"learning_rate": 1.4119461883408072e-05,
"loss": 0.0005,
"step": 16400
},
{
"epoch": 0.3,
"learning_rate": 1.4047713004484308e-05,
"loss": 0.0001,
"step": 16600
},
{
"epoch": 0.3,
"learning_rate": 1.397596412556054e-05,
"loss": 0.0009,
"step": 16800
},
{
"epoch": 0.3,
"learning_rate": 1.3904215246636773e-05,
"loss": 0.0001,
"step": 17000
},
{
"epoch": 0.31,
"learning_rate": 1.3832466367713007e-05,
"loss": 0.0001,
"step": 17200
},
{
"epoch": 0.31,
"learning_rate": 1.376071748878924e-05,
"loss": 0.0001,
"step": 17400
},
{
"epoch": 0.32,
"learning_rate": 1.3688968609865472e-05,
"loss": 0.0002,
"step": 17600
},
{
"epoch": 0.32,
"learning_rate": 1.3617219730941704e-05,
"loss": 0.0,
"step": 17800
},
{
"epoch": 0.32,
"learning_rate": 1.3545470852017938e-05,
"loss": 0.0,
"step": 18000
},
{
"epoch": 0.33,
"learning_rate": 1.347372197309417e-05,
"loss": 0.0002,
"step": 18200
},
{
"epoch": 0.33,
"learning_rate": 1.3402331838565024e-05,
"loss": 0.0003,
"step": 18400
},
{
"epoch": 0.33,
"learning_rate": 1.3330582959641256e-05,
"loss": 0.0004,
"step": 18600
},
{
"epoch": 0.34,
"learning_rate": 1.325883408071749e-05,
"loss": 0.0003,
"step": 18800
},
{
"epoch": 0.34,
"learning_rate": 1.3187085201793723e-05,
"loss": 0.0003,
"step": 19000
},
{
"epoch": 0.34,
"learning_rate": 1.3115336322869955e-05,
"loss": 0.0004,
"step": 19200
},
{
"epoch": 0.35,
"learning_rate": 1.3043587443946188e-05,
"loss": 0.0003,
"step": 19400
},
{
"epoch": 0.35,
"learning_rate": 1.2971838565022424e-05,
"loss": 0.0002,
"step": 19600
},
{
"epoch": 0.36,
"learning_rate": 1.2900089686098656e-05,
"loss": 0.0005,
"step": 19800
},
{
"epoch": 0.36,
"learning_rate": 1.2828340807174889e-05,
"loss": 0.0002,
"step": 20000
},
{
"epoch": 0.36,
"learning_rate": 1.2756591928251123e-05,
"loss": 0.0025,
"step": 20200
},
{
"epoch": 0.37,
"learning_rate": 1.2684843049327355e-05,
"loss": 0.0001,
"step": 20400
},
{
"epoch": 0.37,
"learning_rate": 1.2613094170403588e-05,
"loss": 0.0003,
"step": 20600
},
{
"epoch": 0.37,
"learning_rate": 1.254134529147982e-05,
"loss": 0.0001,
"step": 20800
},
{
"epoch": 0.38,
"learning_rate": 1.2469596412556056e-05,
"loss": 0.0004,
"step": 21000
},
{
"epoch": 0.38,
"learning_rate": 1.2397847533632289e-05,
"loss": 0.0001,
"step": 21200
},
{
"epoch": 0.38,
"learning_rate": 1.2326098654708521e-05,
"loss": 0.0002,
"step": 21400
},
{
"epoch": 0.39,
"learning_rate": 1.2254349775784755e-05,
"loss": 0.0,
"step": 21600
},
{
"epoch": 0.39,
"learning_rate": 1.2182600896860988e-05,
"loss": 0.0001,
"step": 21800
},
{
"epoch": 0.39,
"learning_rate": 1.211085201793722e-05,
"loss": 0.0002,
"step": 22000
},
{
"epoch": 0.4,
"learning_rate": 1.2039103139013454e-05,
"loss": 0.0002,
"step": 22200
},
{
"epoch": 0.4,
"learning_rate": 1.1967354260089687e-05,
"loss": 0.0003,
"step": 22400
},
{
"epoch": 0.41,
"learning_rate": 1.189560538116592e-05,
"loss": 0.0009,
"step": 22600
},
{
"epoch": 0.41,
"learning_rate": 1.1823856502242152e-05,
"loss": 0.0001,
"step": 22800
},
{
"epoch": 0.41,
"learning_rate": 1.1752107623318388e-05,
"loss": 0.0001,
"step": 23000
},
{
"epoch": 0.42,
"learning_rate": 1.168035874439462e-05,
"loss": 0.0,
"step": 23200
},
{
"epoch": 0.42,
"learning_rate": 1.1608609865470853e-05,
"loss": 0.0003,
"step": 23400
},
{
"epoch": 0.42,
"learning_rate": 1.1536860986547087e-05,
"loss": 0.0001,
"step": 23600
},
{
"epoch": 0.43,
"learning_rate": 1.146511210762332e-05,
"loss": 0.0,
"step": 23800
},
{
"epoch": 0.43,
"learning_rate": 1.1393363228699552e-05,
"loss": 0.0001,
"step": 24000
},
{
"epoch": 0.43,
"learning_rate": 1.1321614349775784e-05,
"loss": 0.0001,
"step": 24200
},
{
"epoch": 0.44,
"learning_rate": 1.124986547085202e-05,
"loss": 0.0001,
"step": 24400
},
{
"epoch": 0.44,
"learning_rate": 1.1178116591928253e-05,
"loss": 0.0001,
"step": 24600
},
{
"epoch": 0.44,
"learning_rate": 1.1106367713004485e-05,
"loss": 0.0001,
"step": 24800
},
{
"epoch": 0.45,
"learning_rate": 1.103461883408072e-05,
"loss": 0.0001,
"step": 25000
},
{
"epoch": 0.45,
"learning_rate": 1.0962869955156952e-05,
"loss": 0.0001,
"step": 25200
},
{
"epoch": 0.46,
"learning_rate": 1.0891121076233184e-05,
"loss": 0.0003,
"step": 25400
},
{
"epoch": 0.46,
"learning_rate": 1.0819372197309419e-05,
"loss": 0.0001,
"step": 25600
},
{
"epoch": 0.46,
"learning_rate": 1.0747623318385651e-05,
"loss": 0.0001,
"step": 25800
},
{
"epoch": 0.47,
"learning_rate": 1.0676233183856504e-05,
"loss": 0.0005,
"step": 26000
},
{
"epoch": 0.47,
"learning_rate": 1.0604484304932736e-05,
"loss": 0.0001,
"step": 26200
},
{
"epoch": 0.47,
"learning_rate": 1.0532735426008969e-05,
"loss": 0.0001,
"step": 26400
},
{
"epoch": 0.48,
"learning_rate": 1.0460986547085203e-05,
"loss": 0.0001,
"step": 26600
},
{
"epoch": 0.48,
"learning_rate": 1.0389237668161435e-05,
"loss": 0.0002,
"step": 26800
},
{
"epoch": 0.48,
"learning_rate": 1.0317488789237668e-05,
"loss": 0.0,
"step": 27000
},
{
"epoch": 0.49,
"learning_rate": 1.0245739910313904e-05,
"loss": 0.0,
"step": 27200
},
{
"epoch": 0.49,
"learning_rate": 1.0173991031390136e-05,
"loss": 0.0001,
"step": 27400
},
{
"epoch": 0.5,
"learning_rate": 1.0102242152466369e-05,
"loss": 0.0002,
"step": 27600
},
{
"epoch": 0.5,
"learning_rate": 1.0030493273542601e-05,
"loss": 0.0001,
"step": 27800
},
{
"epoch": 0.5,
"learning_rate": 9.958744394618834e-06,
"loss": 0.0001,
"step": 28000
},
{
"epoch": 0.51,
"learning_rate": 9.887354260089686e-06,
"loss": 0.0003,
"step": 28200
},
{
"epoch": 0.51,
"learning_rate": 9.81560538116592e-06,
"loss": 0.0001,
"step": 28400
},
{
"epoch": 0.51,
"learning_rate": 9.743856502242153e-06,
"loss": 0.0,
"step": 28600
},
{
"epoch": 0.52,
"learning_rate": 9.672107623318386e-06,
"loss": 0.0,
"step": 28800
},
{
"epoch": 0.52,
"learning_rate": 9.60035874439462e-06,
"loss": 0.0,
"step": 29000
},
{
"epoch": 0.52,
"learning_rate": 9.528609865470852e-06,
"loss": 0.0001,
"step": 29200
},
{
"epoch": 0.53,
"learning_rate": 9.456860986547086e-06,
"loss": 0.0,
"step": 29400
},
{
"epoch": 0.53,
"learning_rate": 9.385112107623319e-06,
"loss": 0.0001,
"step": 29600
},
{
"epoch": 0.53,
"learning_rate": 9.313363228699553e-06,
"loss": 0.0001,
"step": 29800
},
{
"epoch": 0.54,
"learning_rate": 9.241614349775786e-06,
"loss": 0.0,
"step": 30000
},
{
"epoch": 0.54,
"learning_rate": 9.169865470852018e-06,
"loss": 0.0002,
"step": 30200
},
{
"epoch": 0.55,
"learning_rate": 9.098475336322871e-06,
"loss": 0.0003,
"step": 30400
},
{
"epoch": 0.55,
"learning_rate": 9.026726457399105e-06,
"loss": 0.0001,
"step": 30600
},
{
"epoch": 0.55,
"learning_rate": 8.954977578475338e-06,
"loss": 0.0001,
"step": 30800
},
{
"epoch": 0.56,
"learning_rate": 8.88322869955157e-06,
"loss": 0.0001,
"step": 31000
},
{
"epoch": 0.56,
"learning_rate": 8.811479820627803e-06,
"loss": 0.0,
"step": 31200
},
{
"epoch": 0.56,
"learning_rate": 8.739730941704037e-06,
"loss": 0.0003,
"step": 31400
},
{
"epoch": 0.57,
"learning_rate": 8.667982062780271e-06,
"loss": 0.0001,
"step": 31600
},
{
"epoch": 0.57,
"learning_rate": 8.596233183856503e-06,
"loss": 0.0003,
"step": 31800
},
{
"epoch": 0.57,
"learning_rate": 8.524484304932736e-06,
"loss": 0.0,
"step": 32000
},
{
"epoch": 0.58,
"learning_rate": 8.452735426008968e-06,
"loss": 0.0,
"step": 32200
},
{
"epoch": 0.58,
"learning_rate": 8.380986547085203e-06,
"loss": 0.0,
"step": 32400
},
{
"epoch": 0.58,
"learning_rate": 8.309237668161437e-06,
"loss": 0.0,
"step": 32600
},
{
"epoch": 0.59,
"learning_rate": 8.23748878923767e-06,
"loss": 0.0,
"step": 32800
},
{
"epoch": 0.59,
"learning_rate": 8.165739910313902e-06,
"loss": 0.0001,
"step": 33000
},
{
"epoch": 0.6,
"learning_rate": 8.093991031390134e-06,
"loss": 0.0003,
"step": 33200
},
{
"epoch": 0.6,
"learning_rate": 8.022242152466368e-06,
"loss": 0.0,
"step": 33400
},
{
"epoch": 0.6,
"learning_rate": 7.9504932735426e-06,
"loss": 0.0,
"step": 33600
},
{
"epoch": 0.61,
"learning_rate": 7.878744394618835e-06,
"loss": 0.0001,
"step": 33800
},
{
"epoch": 0.61,
"learning_rate": 7.806995515695068e-06,
"loss": 0.0,
"step": 34000
},
{
"epoch": 0.61,
"learning_rate": 7.7352466367713e-06,
"loss": 0.0002,
"step": 34200
},
{
"epoch": 0.62,
"learning_rate": 7.663497757847534e-06,
"loss": 0.0,
"step": 34400
},
{
"epoch": 0.62,
"learning_rate": 7.591748878923767e-06,
"loss": 0.0,
"step": 34600
},
{
"epoch": 0.62,
"learning_rate": 7.520000000000001e-06,
"loss": 0.0,
"step": 34800
},
{
"epoch": 0.63,
"learning_rate": 7.448251121076234e-06,
"loss": 0.0001,
"step": 35000
},
{
"epoch": 0.63,
"learning_rate": 7.376502242152467e-06,
"loss": 0.0,
"step": 35200
},
{
"epoch": 0.63,
"learning_rate": 7.304753363228701e-06,
"loss": 0.0,
"step": 35400
},
{
"epoch": 0.64,
"learning_rate": 7.233004484304933e-06,
"loss": 0.0,
"step": 35600
},
{
"epoch": 0.64,
"learning_rate": 7.161255605381167e-06,
"loss": 0.0,
"step": 35800
},
{
"epoch": 0.65,
"learning_rate": 7.0895067264574e-06,
"loss": 0.0,
"step": 36000
},
{
"epoch": 0.65,
"learning_rate": 7.0177578475336325e-06,
"loss": 0.0,
"step": 36200
},
{
"epoch": 0.65,
"learning_rate": 6.946008968609867e-06,
"loss": 0.0,
"step": 36400
},
{
"epoch": 0.66,
"learning_rate": 6.874260089686099e-06,
"loss": 0.0,
"step": 36600
},
{
"epoch": 0.66,
"learning_rate": 6.8025112107623325e-06,
"loss": 0.0,
"step": 36800
},
{
"epoch": 0.66,
"learning_rate": 6.730762331838565e-06,
"loss": 0.0,
"step": 37000
},
{
"epoch": 0.67,
"learning_rate": 6.659013452914798e-06,
"loss": 0.0,
"step": 37200
},
{
"epoch": 0.67,
"learning_rate": 6.5872645739910325e-06,
"loss": 0.0,
"step": 37400
},
{
"epoch": 0.67,
"learning_rate": 6.515515695067265e-06,
"loss": 0.0,
"step": 37600
},
{
"epoch": 0.68,
"learning_rate": 6.443766816143498e-06,
"loss": 0.0,
"step": 37800
},
{
"epoch": 0.68,
"learning_rate": 6.372017937219731e-06,
"loss": 0.0,
"step": 38000
},
{
"epoch": 0.69,
"learning_rate": 6.300269058295965e-06,
"loss": 0.0,
"step": 38200
},
{
"epoch": 0.69,
"learning_rate": 6.2292376681614354e-06,
"loss": 0.0,
"step": 38400
},
{
"epoch": 0.69,
"learning_rate": 6.157488789237669e-06,
"loss": 0.0,
"step": 38600
},
{
"epoch": 0.7,
"learning_rate": 6.085739910313901e-06,
"loss": 0.0,
"step": 38800
},
{
"epoch": 0.7,
"learning_rate": 6.0139910313901354e-06,
"loss": 0.0,
"step": 39000
},
{
"epoch": 0.7,
"learning_rate": 5.942242152466369e-06,
"loss": 0.0,
"step": 39200
},
{
"epoch": 0.71,
"learning_rate": 5.870493273542601e-06,
"loss": 0.0,
"step": 39400
},
{
"epoch": 0.71,
"learning_rate": 5.798744394618835e-06,
"loss": 0.0,
"step": 39600
},
{
"epoch": 0.71,
"learning_rate": 5.726995515695067e-06,
"loss": 0.0,
"step": 39800
},
{
"epoch": 0.72,
"learning_rate": 5.655246636771301e-06,
"loss": 0.0,
"step": 40000
},
{
"epoch": 0.72,
"learning_rate": 5.583497757847534e-06,
"loss": 0.0,
"step": 40200
},
{
"epoch": 0.72,
"learning_rate": 5.511748878923767e-06,
"loss": 0.0,
"step": 40400
},
{
"epoch": 0.73,
"learning_rate": 5.440358744394619e-06,
"loss": 0.0,
"step": 40600
},
{
"epoch": 0.73,
"learning_rate": 5.368609865470853e-06,
"loss": 0.0,
"step": 40800
},
{
"epoch": 0.74,
"learning_rate": 5.296860986547086e-06,
"loss": 0.0,
"step": 41000
},
{
"epoch": 0.74,
"learning_rate": 5.225112107623319e-06,
"loss": 0.0,
"step": 41200
},
{
"epoch": 0.74,
"learning_rate": 5.1533632286995515e-06,
"loss": 0.0,
"step": 41400
},
{
"epoch": 0.75,
"learning_rate": 5.081973094170403e-06,
"loss": 0.0,
"step": 41600
},
{
"epoch": 0.75,
"learning_rate": 5.0102242152466375e-06,
"loss": 0.0002,
"step": 41800
},
{
"epoch": 0.75,
"learning_rate": 4.938834080717489e-06,
"loss": 0.0001,
"step": 42000
},
{
"epoch": 0.76,
"learning_rate": 4.867085201793723e-06,
"loss": 0.0,
"step": 42200
},
{
"epoch": 0.76,
"learning_rate": 4.795336322869955e-06,
"loss": 0.0,
"step": 42400
},
{
"epoch": 0.76,
"learning_rate": 4.723587443946189e-06,
"loss": 0.0,
"step": 42600
},
{
"epoch": 0.77,
"learning_rate": 4.651838565022422e-06,
"loss": 0.0,
"step": 42800
},
{
"epoch": 0.77,
"learning_rate": 4.580089686098655e-06,
"loss": 0.0,
"step": 43000
},
{
"epoch": 0.77,
"learning_rate": 4.508340807174889e-06,
"loss": 0.0001,
"step": 43200
},
{
"epoch": 0.78,
"learning_rate": 4.436591928251122e-06,
"loss": 0.0001,
"step": 43400
},
{
"epoch": 0.78,
"learning_rate": 4.364843049327354e-06,
"loss": 0.0,
"step": 43600
},
{
"epoch": 0.79,
"learning_rate": 4.293094170403588e-06,
"loss": 0.0,
"step": 43800
},
{
"epoch": 0.79,
"learning_rate": 4.221345291479821e-06,
"loss": 0.0,
"step": 44000
},
{
"epoch": 0.79,
"learning_rate": 4.1495964125560536e-06,
"loss": 0.0,
"step": 44200
},
{
"epoch": 0.8,
"learning_rate": 4.077847533632288e-06,
"loss": 0.0,
"step": 44400
},
{
"epoch": 0.8,
"learning_rate": 4.00609865470852e-06,
"loss": 0.0,
"step": 44600
},
{
"epoch": 0.8,
"learning_rate": 3.934349775784754e-06,
"loss": 0.0,
"step": 44800
},
{
"epoch": 0.81,
"learning_rate": 3.862600896860987e-06,
"loss": 0.0,
"step": 45000
},
{
"epoch": 0.81,
"learning_rate": 3.79085201793722e-06,
"loss": 0.0,
"step": 45200
},
{
"epoch": 0.81,
"learning_rate": 3.719103139013453e-06,
"loss": 0.0,
"step": 45400
},
{
"epoch": 0.82,
"learning_rate": 3.6473542600896865e-06,
"loss": 0.0,
"step": 45600
},
{
"epoch": 0.82,
"learning_rate": 3.57560538116592e-06,
"loss": 0.0002,
"step": 45800
},
{
"epoch": 0.83,
"learning_rate": 3.5038565022421527e-06,
"loss": 0.0,
"step": 46000
},
{
"epoch": 0.83,
"learning_rate": 3.432107623318386e-06,
"loss": 0.0001,
"step": 46200
},
{
"epoch": 0.83,
"learning_rate": 3.360717488789238e-06,
"loss": 0.0001,
"step": 46400
},
{
"epoch": 0.84,
"learning_rate": 3.288968609865471e-06,
"loss": 0.0,
"step": 46600
},
{
"epoch": 0.84,
"learning_rate": 3.217219730941704e-06,
"loss": 0.0,
"step": 46800
},
{
"epoch": 0.84,
"learning_rate": 3.145470852017937e-06,
"loss": 0.0001,
"step": 47000
},
{
"epoch": 0.85,
"learning_rate": 3.073721973094171e-06,
"loss": 0.0,
"step": 47200
},
{
"epoch": 0.85,
"learning_rate": 3.001973094170404e-06,
"loss": 0.0,
"step": 47400
},
{
"epoch": 0.85,
"learning_rate": 2.930224215246637e-06,
"loss": 0.0,
"step": 47600
},
{
"epoch": 0.86,
"learning_rate": 2.85847533632287e-06,
"loss": 0.0,
"step": 47800
},
{
"epoch": 0.86,
"learning_rate": 2.7867264573991034e-06,
"loss": 0.0,
"step": 48000
},
{
"epoch": 0.86,
"learning_rate": 2.7149775784753363e-06,
"loss": 0.0,
"step": 48200
},
{
"epoch": 0.87,
"learning_rate": 2.64322869955157e-06,
"loss": 0.0,
"step": 48400
},
{
"epoch": 0.87,
"learning_rate": 2.571479820627803e-06,
"loss": 0.0,
"step": 48600
},
{
"epoch": 0.88,
"learning_rate": 2.499730941704036e-06,
"loss": 0.0,
"step": 48800
},
{
"epoch": 0.88,
"learning_rate": 2.427982062780269e-06,
"loss": 0.0001,
"step": 49000
},
{
"epoch": 0.88,
"learning_rate": 2.3562331838565025e-06,
"loss": 0.0,
"step": 49200
},
{
"epoch": 0.89,
"learning_rate": 2.2844843049327355e-06,
"loss": 0.0001,
"step": 49400
},
{
"epoch": 0.89,
"learning_rate": 2.2127354260089688e-06,
"loss": 0.0,
"step": 49600
},
{
"epoch": 0.89,
"learning_rate": 2.140986547085202e-06,
"loss": 0.0,
"step": 49800
},
{
"epoch": 0.9,
"learning_rate": 2.069237668161435e-06,
"loss": 0.0,
"step": 50000
},
{
"epoch": 0.9,
"learning_rate": 1.9974887892376684e-06,
"loss": 0.0,
"step": 50200
},
{
"epoch": 0.9,
"learning_rate": 1.9257399103139017e-06,
"loss": 0.0,
"step": 50400
},
{
"epoch": 0.91,
"learning_rate": 1.8543497757847534e-06,
"loss": 0.0001,
"step": 50600
},
{
"epoch": 0.91,
"learning_rate": 1.7826008968609867e-06,
"loss": 0.0,
"step": 50800
},
{
"epoch": 0.91,
"learning_rate": 1.7108520179372198e-06,
"loss": 0.0,
"step": 51000
},
{
"epoch": 0.92,
"learning_rate": 1.639103139013453e-06,
"loss": 0.0,
"step": 51200
},
{
"epoch": 0.92,
"learning_rate": 1.5673542600896863e-06,
"loss": 0.0,
"step": 51400
},
{
"epoch": 0.93,
"learning_rate": 1.4956053811659194e-06,
"loss": 0.0,
"step": 51600
},
{
"epoch": 0.93,
"learning_rate": 1.4238565022421528e-06,
"loss": 0.0,
"step": 51800
},
{
"epoch": 0.93,
"learning_rate": 1.3521076233183859e-06,
"loss": 0.0,
"step": 52000
},
{
"epoch": 0.94,
"learning_rate": 1.2803587443946188e-06,
"loss": 0.0,
"step": 52200
},
{
"epoch": 0.94,
"learning_rate": 1.2089686098654709e-06,
"loss": 0.0,
"step": 52400
},
{
"epoch": 0.94,
"learning_rate": 1.137219730941704e-06,
"loss": 0.0,
"step": 52600
},
{
"epoch": 0.95,
"learning_rate": 1.0654708520179373e-06,
"loss": 0.0,
"step": 52800
},
{
"epoch": 0.95,
"learning_rate": 9.937219730941705e-07,
"loss": 0.0,
"step": 53000
},
{
"epoch": 0.95,
"learning_rate": 9.219730941704037e-07,
"loss": 0.0,
"step": 53200
},
{
"epoch": 0.96,
"learning_rate": 8.502242152466368e-07,
"loss": 0.0,
"step": 53400
},
{
"epoch": 0.96,
"learning_rate": 7.7847533632287e-07,
"loss": 0.0,
"step": 53600
},
{
"epoch": 0.97,
"learning_rate": 7.067264573991033e-07,
"loss": 0.0,
"step": 53800
},
{
"epoch": 0.97,
"learning_rate": 6.349775784753363e-07,
"loss": 0.0,
"step": 54000
},
{
"epoch": 0.97,
"learning_rate": 5.632286995515695e-07,
"loss": 0.0,
"step": 54200
},
{
"epoch": 0.98,
"learning_rate": 4.914798206278028e-07,
"loss": 0.0,
"step": 54400
},
{
"epoch": 0.98,
"learning_rate": 4.1973094170403593e-07,
"loss": 0.0,
"step": 54600
},
{
"epoch": 0.98,
"learning_rate": 3.4798206278026905e-07,
"loss": 0.0,
"step": 54800
},
{
"epoch": 0.99,
"learning_rate": 2.762331838565023e-07,
"loss": 0.0,
"step": 55000
},
{
"epoch": 0.99,
"learning_rate": 2.0448430493273546e-07,
"loss": 0.0,
"step": 55200
},
{
"epoch": 0.99,
"learning_rate": 1.327354260089686e-07,
"loss": 0.0,
"step": 55400
}
],
"max_steps": 55750,
"num_train_epochs": 1,
"total_flos": 6.788108648448e+16,
"trial_name": null,
"trial_params": null
}