{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4362863380443525, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.968083748244606e-05, "loss": 2.9096, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.936167496489212e-05, "loss": 2.8631, "step": 200 }, { "epoch": 0.03, "learning_rate": 4.904251244733819e-05, "loss": 2.7364, "step": 300 }, { "epoch": 0.05, "learning_rate": 4.8723349929784253e-05, "loss": 2.8226, "step": 400 }, { "epoch": 0.06, "learning_rate": 4.840418741223031e-05, "loss": 2.7184, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.808502489467637e-05, "loss": 2.6843, "step": 600 }, { "epoch": 0.08, "learning_rate": 4.776586237712243e-05, "loss": 2.7507, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.744669985956849e-05, "loss": 2.7261, "step": 800 }, { "epoch": 0.1, "learning_rate": 4.712753734201456e-05, "loss": 2.7343, "step": 900 }, { "epoch": 0.11, "learning_rate": 4.6808374824460616e-05, "loss": 2.7005, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.6489212306906676e-05, "loss": 2.6462, "step": 1100 }, { "epoch": 0.14, "learning_rate": 4.617004978935274e-05, "loss": 2.6276, "step": 1200 }, { "epoch": 0.15, "learning_rate": 4.58508872717988e-05, "loss": 2.6493, "step": 1300 }, { "epoch": 0.16, "learning_rate": 4.553172475424487e-05, "loss": 2.7024, "step": 1400 }, { "epoch": 0.17, "learning_rate": 4.521256223669093e-05, "loss": 2.6616, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.4893399719136986e-05, "loss": 2.7136, "step": 1600 }, { "epoch": 0.2, "learning_rate": 4.4574237201583046e-05, "loss": 2.6842, "step": 1700 }, { "epoch": 0.21, "learning_rate": 4.4255074684029105e-05, "loss": 2.5911, "step": 1800 }, { "epoch": 0.22, "learning_rate": 4.393591216647517e-05, "loss": 2.7281, "step": 1900 }, { "epoch": 0.23, "learning_rate": 4.361674964892124e-05, "loss": 2.6697, "step": 2000 }, { "epoch": 0.24, "learning_rate": 4.32975871313673e-05, "loss": 2.6282, "step": 2100 }, { "epoch": 0.25, "learning_rate": 4.2978424613813356e-05, "loss": 2.564, "step": 2200 }, { "epoch": 0.26, "learning_rate": 4.2659262096259415e-05, "loss": 2.4882, "step": 2300 }, { "epoch": 0.28, "learning_rate": 4.2340099578705475e-05, "loss": 2.6347, "step": 2400 }, { "epoch": 0.29, "learning_rate": 4.202093706115154e-05, "loss": 2.5994, "step": 2500 }, { "epoch": 0.3, "learning_rate": 4.17017745435976e-05, "loss": 2.6479, "step": 2600 }, { "epoch": 0.31, "learning_rate": 4.1382612026043666e-05, "loss": 2.7357, "step": 2700 }, { "epoch": 0.32, "learning_rate": 4.1063449508489726e-05, "loss": 2.6819, "step": 2800 }, { "epoch": 0.33, "learning_rate": 4.0744286990935785e-05, "loss": 2.5451, "step": 2900 }, { "epoch": 0.34, "learning_rate": 4.0425124473381845e-05, "loss": 2.6343, "step": 3000 }, { "epoch": 0.36, "learning_rate": 4.010596195582791e-05, "loss": 2.5168, "step": 3100 }, { "epoch": 0.37, "learning_rate": 3.978679943827397e-05, "loss": 2.5881, "step": 3200 }, { "epoch": 0.38, "learning_rate": 3.946763692072003e-05, "loss": 2.6411, "step": 3300 }, { "epoch": 0.39, "learning_rate": 3.9148474403166096e-05, "loss": 2.5901, "step": 3400 }, { "epoch": 0.4, "learning_rate": 3.8829311885612155e-05, "loss": 2.5842, "step": 3500 }, { "epoch": 0.41, "learning_rate": 3.851014936805822e-05, "loss": 2.5436, "step": 3600 }, { "epoch": 0.43, "learning_rate": 3.819098685050428e-05, "loss": 2.6021, "step": 3700 }, { "epoch": 0.44, "learning_rate": 3.787182433295034e-05, "loss": 2.6307, "step": 3800 }, { "epoch": 0.45, "learning_rate": 3.75526618153964e-05, "loss": 2.5632, "step": 3900 }, { "epoch": 0.46, "learning_rate": 3.723349929784246e-05, "loss": 2.5933, "step": 4000 }, { "epoch": 0.47, "learning_rate": 3.6914336780288525e-05, "loss": 2.5458, "step": 4100 }, { "epoch": 0.48, "learning_rate": 3.659517426273459e-05, "loss": 2.5212, "step": 4200 }, { "epoch": 0.49, "learning_rate": 3.627601174518065e-05, "loss": 2.4532, "step": 4300 }, { "epoch": 0.51, "learning_rate": 3.595684922762671e-05, "loss": 2.5134, "step": 4400 }, { "epoch": 0.52, "learning_rate": 3.563768671007277e-05, "loss": 2.5368, "step": 4500 }, { "epoch": 0.53, "learning_rate": 3.531852419251883e-05, "loss": 2.5547, "step": 4600 }, { "epoch": 0.54, "learning_rate": 3.4999361674964895e-05, "loss": 2.4786, "step": 4700 }, { "epoch": 0.55, "learning_rate": 3.4680199157410954e-05, "loss": 2.5377, "step": 4800 }, { "epoch": 0.56, "learning_rate": 3.436103663985702e-05, "loss": 2.5462, "step": 4900 }, { "epoch": 0.57, "learning_rate": 3.404187412230308e-05, "loss": 2.5644, "step": 5000 }, { "epoch": 0.59, "learning_rate": 3.372271160474914e-05, "loss": 2.5206, "step": 5100 }, { "epoch": 0.6, "learning_rate": 3.3403549087195205e-05, "loss": 2.5305, "step": 5200 }, { "epoch": 0.61, "learning_rate": 3.3084386569641264e-05, "loss": 2.462, "step": 5300 }, { "epoch": 0.62, "learning_rate": 3.2765224052087324e-05, "loss": 2.4704, "step": 5400 }, { "epoch": 0.63, "learning_rate": 3.244606153453338e-05, "loss": 2.5133, "step": 5500 }, { "epoch": 0.64, "learning_rate": 3.212689901697944e-05, "loss": 2.4805, "step": 5600 }, { "epoch": 0.65, "learning_rate": 3.180773649942551e-05, "loss": 2.4572, "step": 5700 }, { "epoch": 0.67, "learning_rate": 3.1488573981871575e-05, "loss": 2.4128, "step": 5800 }, { "epoch": 0.68, "learning_rate": 3.1169411464317634e-05, "loss": 2.4577, "step": 5900 }, { "epoch": 0.69, "learning_rate": 3.0850248946763694e-05, "loss": 2.5539, "step": 6000 }, { "epoch": 0.7, "learning_rate": 3.053108642920975e-05, "loss": 2.472, "step": 6100 }, { "epoch": 0.71, "learning_rate": 3.0211923911655816e-05, "loss": 2.4468, "step": 6200 }, { "epoch": 0.72, "learning_rate": 2.9892761394101875e-05, "loss": 2.4831, "step": 6300 }, { "epoch": 0.74, "learning_rate": 2.957359887654794e-05, "loss": 2.4093, "step": 6400 }, { "epoch": 0.75, "learning_rate": 2.9254436358994004e-05, "loss": 2.4683, "step": 6500 }, { "epoch": 0.76, "learning_rate": 2.8935273841440063e-05, "loss": 2.4388, "step": 6600 }, { "epoch": 0.77, "learning_rate": 2.8616111323886126e-05, "loss": 2.3558, "step": 6700 }, { "epoch": 0.78, "learning_rate": 2.8296948806332185e-05, "loss": 2.4788, "step": 6800 }, { "epoch": 0.79, "learning_rate": 2.7977786288778245e-05, "loss": 2.4523, "step": 6900 }, { "epoch": 0.8, "learning_rate": 2.7658623771224308e-05, "loss": 2.4759, "step": 7000 }, { "epoch": 0.82, "learning_rate": 2.7339461253670367e-05, "loss": 2.3574, "step": 7100 }, { "epoch": 0.83, "learning_rate": 2.7020298736116433e-05, "loss": 2.4569, "step": 7200 }, { "epoch": 0.84, "learning_rate": 2.6701136218562496e-05, "loss": 2.4078, "step": 7300 }, { "epoch": 0.85, "learning_rate": 2.6381973701008555e-05, "loss": 2.3948, "step": 7400 }, { "epoch": 0.86, "learning_rate": 2.6062811183454615e-05, "loss": 2.5496, "step": 7500 }, { "epoch": 0.87, "learning_rate": 2.5743648665900677e-05, "loss": 2.4191, "step": 7600 }, { "epoch": 0.88, "learning_rate": 2.5424486148346737e-05, "loss": 2.4895, "step": 7700 }, { "epoch": 0.9, "learning_rate": 2.51053236307928e-05, "loss": 2.4726, "step": 7800 }, { "epoch": 0.91, "learning_rate": 2.4786161113238862e-05, "loss": 2.3287, "step": 7900 }, { "epoch": 0.92, "learning_rate": 2.446699859568492e-05, "loss": 2.4278, "step": 8000 }, { "epoch": 0.93, "learning_rate": 2.4147836078130988e-05, "loss": 2.4548, "step": 8100 }, { "epoch": 0.94, "learning_rate": 2.3828673560577047e-05, "loss": 2.5173, "step": 8200 }, { "epoch": 0.95, "learning_rate": 2.3509511043023107e-05, "loss": 2.4883, "step": 8300 }, { "epoch": 0.97, "learning_rate": 2.319034852546917e-05, "loss": 2.4435, "step": 8400 }, { "epoch": 0.98, "learning_rate": 2.2871186007915232e-05, "loss": 2.4825, "step": 8500 }, { "epoch": 0.99, "learning_rate": 2.2552023490361295e-05, "loss": 2.4539, "step": 8600 }, { "epoch": 1.0, "learning_rate": 2.2232860972807354e-05, "loss": 2.4115, "step": 8700 }, { "epoch": 1.01, "learning_rate": 2.1913698455253417e-05, "loss": 2.3452, "step": 8800 }, { "epoch": 1.02, "learning_rate": 2.159453593769948e-05, "loss": 2.3799, "step": 8900 }, { "epoch": 1.03, "learning_rate": 2.127537342014554e-05, "loss": 2.4019, "step": 9000 }, { "epoch": 1.05, "learning_rate": 2.09562109025916e-05, "loss": 2.3678, "step": 9100 }, { "epoch": 1.06, "learning_rate": 2.0637048385037665e-05, "loss": 2.3711, "step": 9200 }, { "epoch": 1.07, "learning_rate": 2.0317885867483724e-05, "loss": 2.2767, "step": 9300 }, { "epoch": 1.08, "learning_rate": 1.9998723349929783e-05, "loss": 2.3761, "step": 9400 }, { "epoch": 1.09, "learning_rate": 1.9679560832375846e-05, "loss": 2.3392, "step": 9500 }, { "epoch": 1.1, "learning_rate": 1.936039831482191e-05, "loss": 2.3232, "step": 9600 }, { "epoch": 1.11, "learning_rate": 1.904123579726797e-05, "loss": 2.355, "step": 9700 }, { "epoch": 1.13, "learning_rate": 1.872207327971403e-05, "loss": 2.3613, "step": 9800 }, { "epoch": 1.14, "learning_rate": 1.840291076216009e-05, "loss": 2.3475, "step": 9900 }, { "epoch": 1.15, "learning_rate": 1.8083748244606157e-05, "loss": 2.3654, "step": 10000 }, { "epoch": 1.16, "learning_rate": 1.7764585727052216e-05, "loss": 2.3825, "step": 10100 }, { "epoch": 1.17, "learning_rate": 1.7445423209498275e-05, "loss": 2.3245, "step": 10200 }, { "epoch": 1.18, "learning_rate": 1.712626069194434e-05, "loss": 2.2869, "step": 10300 }, { "epoch": 1.19, "learning_rate": 1.68070981743904e-05, "loss": 2.3207, "step": 10400 }, { "epoch": 1.21, "learning_rate": 1.648793565683646e-05, "loss": 2.3041, "step": 10500 }, { "epoch": 1.22, "learning_rate": 1.6168773139282523e-05, "loss": 2.3543, "step": 10600 }, { "epoch": 1.23, "learning_rate": 1.5849610621728586e-05, "loss": 2.3026, "step": 10700 }, { "epoch": 1.24, "learning_rate": 1.553044810417465e-05, "loss": 2.2919, "step": 10800 }, { "epoch": 1.25, "learning_rate": 1.5211285586620708e-05, "loss": 2.3805, "step": 10900 }, { "epoch": 1.26, "learning_rate": 1.4892123069066769e-05, "loss": 2.3523, "step": 11000 }, { "epoch": 1.28, "learning_rate": 1.4572960551512832e-05, "loss": 2.2931, "step": 11100 }, { "epoch": 1.29, "learning_rate": 1.4253798033958893e-05, "loss": 2.2623, "step": 11200 }, { "epoch": 1.3, "learning_rate": 1.3934635516404954e-05, "loss": 2.28, "step": 11300 }, { "epoch": 1.31, "learning_rate": 1.3615472998851015e-05, "loss": 2.3121, "step": 11400 }, { "epoch": 1.32, "learning_rate": 1.3296310481297078e-05, "loss": 2.3111, "step": 11500 }, { "epoch": 1.33, "learning_rate": 1.2977147963743139e-05, "loss": 2.332, "step": 11600 }, { "epoch": 1.34, "learning_rate": 1.26579854461892e-05, "loss": 2.3495, "step": 11700 }, { "epoch": 1.36, "learning_rate": 1.233882292863526e-05, "loss": 2.2864, "step": 11800 }, { "epoch": 1.37, "learning_rate": 1.2019660411081324e-05, "loss": 2.3482, "step": 11900 }, { "epoch": 1.38, "learning_rate": 1.1700497893527385e-05, "loss": 2.2843, "step": 12000 }, { "epoch": 1.39, "learning_rate": 1.1381335375973447e-05, "loss": 2.3075, "step": 12100 }, { "epoch": 1.4, "learning_rate": 1.1062172858419507e-05, "loss": 2.3899, "step": 12200 }, { "epoch": 1.41, "learning_rate": 1.074301034086557e-05, "loss": 2.3496, "step": 12300 }, { "epoch": 1.42, "learning_rate": 1.042384782331163e-05, "loss": 2.3482, "step": 12400 }, { "epoch": 1.44, "learning_rate": 1.0104685305757693e-05, "loss": 2.2579, "step": 12500 } ], "max_steps": 15666, "num_train_epochs": 2, "total_flos": 4.940417519751168e+16, "trial_name": null, "trial_params": null }