|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.723543605653223, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.968083748244606e-05, |
|
"loss": 2.9096, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.936167496489212e-05, |
|
"loss": 2.8631, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.904251244733819e-05, |
|
"loss": 2.7364, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8723349929784253e-05, |
|
"loss": 2.8226, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.840418741223031e-05, |
|
"loss": 2.7184, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.808502489467637e-05, |
|
"loss": 2.6843, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.776586237712243e-05, |
|
"loss": 2.7507, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.744669985956849e-05, |
|
"loss": 2.7261, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.712753734201456e-05, |
|
"loss": 2.7343, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.6808374824460616e-05, |
|
"loss": 2.7005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.6489212306906676e-05, |
|
"loss": 2.6462, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.617004978935274e-05, |
|
"loss": 2.6276, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.58508872717988e-05, |
|
"loss": 2.6493, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.553172475424487e-05, |
|
"loss": 2.7024, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.521256223669093e-05, |
|
"loss": 2.6616, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.4893399719136986e-05, |
|
"loss": 2.7136, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4574237201583046e-05, |
|
"loss": 2.6842, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4255074684029105e-05, |
|
"loss": 2.5911, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.393591216647517e-05, |
|
"loss": 2.7281, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.361674964892124e-05, |
|
"loss": 2.6697, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.32975871313673e-05, |
|
"loss": 2.6282, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.2978424613813356e-05, |
|
"loss": 2.564, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2659262096259415e-05, |
|
"loss": 2.4882, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.2340099578705475e-05, |
|
"loss": 2.6347, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.202093706115154e-05, |
|
"loss": 2.5994, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.17017745435976e-05, |
|
"loss": 2.6479, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.1382612026043666e-05, |
|
"loss": 2.7357, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.1063449508489726e-05, |
|
"loss": 2.6819, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.0744286990935785e-05, |
|
"loss": 2.5451, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.0425124473381845e-05, |
|
"loss": 2.6343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.010596195582791e-05, |
|
"loss": 2.5168, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.978679943827397e-05, |
|
"loss": 2.5881, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.946763692072003e-05, |
|
"loss": 2.6411, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.9148474403166096e-05, |
|
"loss": 2.5901, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.8829311885612155e-05, |
|
"loss": 2.5842, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.851014936805822e-05, |
|
"loss": 2.5436, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.819098685050428e-05, |
|
"loss": 2.6021, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.787182433295034e-05, |
|
"loss": 2.6307, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.75526618153964e-05, |
|
"loss": 2.5632, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.723349929784246e-05, |
|
"loss": 2.5933, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.6914336780288525e-05, |
|
"loss": 2.5458, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.659517426273459e-05, |
|
"loss": 2.5212, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.627601174518065e-05, |
|
"loss": 2.4532, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.595684922762671e-05, |
|
"loss": 2.5134, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.563768671007277e-05, |
|
"loss": 2.5368, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.531852419251883e-05, |
|
"loss": 2.5547, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.4999361674964895e-05, |
|
"loss": 2.4786, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.4680199157410954e-05, |
|
"loss": 2.5377, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.436103663985702e-05, |
|
"loss": 2.5462, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.404187412230308e-05, |
|
"loss": 2.5644, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.372271160474914e-05, |
|
"loss": 2.5206, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.3403549087195205e-05, |
|
"loss": 2.5305, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.3084386569641264e-05, |
|
"loss": 2.462, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.2765224052087324e-05, |
|
"loss": 2.4704, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.244606153453338e-05, |
|
"loss": 2.5133, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.212689901697944e-05, |
|
"loss": 2.4805, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.180773649942551e-05, |
|
"loss": 2.4572, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.1488573981871575e-05, |
|
"loss": 2.4128, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.1169411464317634e-05, |
|
"loss": 2.4577, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0850248946763694e-05, |
|
"loss": 2.5539, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.053108642920975e-05, |
|
"loss": 2.472, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0211923911655816e-05, |
|
"loss": 2.4468, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9892761394101875e-05, |
|
"loss": 2.4831, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.957359887654794e-05, |
|
"loss": 2.4093, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.9254436358994004e-05, |
|
"loss": 2.4683, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8935273841440063e-05, |
|
"loss": 2.4388, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.8616111323886126e-05, |
|
"loss": 2.3558, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.8296948806332185e-05, |
|
"loss": 2.4788, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.7977786288778245e-05, |
|
"loss": 2.4523, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.7658623771224308e-05, |
|
"loss": 2.4759, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.7339461253670367e-05, |
|
"loss": 2.3574, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.7020298736116433e-05, |
|
"loss": 2.4569, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.6701136218562496e-05, |
|
"loss": 2.4078, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.6381973701008555e-05, |
|
"loss": 2.3948, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.6062811183454615e-05, |
|
"loss": 2.5496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5743648665900677e-05, |
|
"loss": 2.4191, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.5424486148346737e-05, |
|
"loss": 2.4895, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.51053236307928e-05, |
|
"loss": 2.4726, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.4786161113238862e-05, |
|
"loss": 2.3287, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.446699859568492e-05, |
|
"loss": 2.4278, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.4147836078130988e-05, |
|
"loss": 2.4548, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3828673560577047e-05, |
|
"loss": 2.5173, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.3509511043023107e-05, |
|
"loss": 2.4883, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.319034852546917e-05, |
|
"loss": 2.4435, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.2871186007915232e-05, |
|
"loss": 2.4825, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2552023490361295e-05, |
|
"loss": 2.4539, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.2232860972807354e-05, |
|
"loss": 2.4115, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.1913698455253417e-05, |
|
"loss": 2.3452, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.159453593769948e-05, |
|
"loss": 2.3799, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.127537342014554e-05, |
|
"loss": 2.4019, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.09562109025916e-05, |
|
"loss": 2.3678, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.0637048385037665e-05, |
|
"loss": 2.3711, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.0317885867483724e-05, |
|
"loss": 2.2767, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.9998723349929783e-05, |
|
"loss": 2.3761, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.9679560832375846e-05, |
|
"loss": 2.3392, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.936039831482191e-05, |
|
"loss": 2.3232, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.904123579726797e-05, |
|
"loss": 2.355, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.872207327971403e-05, |
|
"loss": 2.3613, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.840291076216009e-05, |
|
"loss": 2.3475, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.8083748244606157e-05, |
|
"loss": 2.3654, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.7764585727052216e-05, |
|
"loss": 2.3825, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.7445423209498275e-05, |
|
"loss": 2.3245, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.712626069194434e-05, |
|
"loss": 2.2869, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.68070981743904e-05, |
|
"loss": 2.3207, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.648793565683646e-05, |
|
"loss": 2.3041, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.6168773139282523e-05, |
|
"loss": 2.3543, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.5849610621728586e-05, |
|
"loss": 2.3026, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.553044810417465e-05, |
|
"loss": 2.2919, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.5211285586620708e-05, |
|
"loss": 2.3805, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4892123069066769e-05, |
|
"loss": 2.3523, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4572960551512832e-05, |
|
"loss": 2.2931, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4253798033958893e-05, |
|
"loss": 2.2623, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.3934635516404954e-05, |
|
"loss": 2.28, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.3615472998851015e-05, |
|
"loss": 2.3121, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.3296310481297078e-05, |
|
"loss": 2.3111, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2977147963743139e-05, |
|
"loss": 2.332, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.26579854461892e-05, |
|
"loss": 2.3495, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.233882292863526e-05, |
|
"loss": 2.2864, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.2019660411081324e-05, |
|
"loss": 2.3482, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1700497893527385e-05, |
|
"loss": 2.2843, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1381335375973447e-05, |
|
"loss": 2.3075, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1062172858419507e-05, |
|
"loss": 2.3899, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.074301034086557e-05, |
|
"loss": 2.3496, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.042384782331163e-05, |
|
"loss": 2.3482, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0104685305757693e-05, |
|
"loss": 2.2579, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.785522788203753e-06, |
|
"loss": 2.2689, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.466360270649816e-06, |
|
"loss": 2.3111, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.147197753095877e-06, |
|
"loss": 2.2794, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.82803523554194e-06, |
|
"loss": 2.2754, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.508872717987999e-06, |
|
"loss": 2.2837, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.189710200434061e-06, |
|
"loss": 2.2297, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.870547682880124e-06, |
|
"loss": 2.2969, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.5513851653261844e-06, |
|
"loss": 2.3363, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.232222647772246e-06, |
|
"loss": 2.2079, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.9130601302183074e-06, |
|
"loss": 2.3057, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.593897612664369e-06, |
|
"loss": 2.2665, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.27473509511043e-06, |
|
"loss": 2.2934, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5.9555725775564915e-06, |
|
"loss": 2.2407, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.636410060002553e-06, |
|
"loss": 2.3309, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.317247542448615e-06, |
|
"loss": 2.3257, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.998085024894677e-06, |
|
"loss": 2.2518, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.678922507340738e-06, |
|
"loss": 2.2359, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.3597599897868e-06, |
|
"loss": 2.287, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.040597472232861e-06, |
|
"loss": 2.2462, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.7214349546789228e-06, |
|
"loss": 2.2815, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.4022724371249842e-06, |
|
"loss": 2.3361, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0831099195710457e-06, |
|
"loss": 2.2774, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.7639474020171072e-06, |
|
"loss": 2.2895, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.4447848844631687e-06, |
|
"loss": 2.3155, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.12562236690923e-06, |
|
"loss": 2.2698, |
|
"step": 15000 |
|
} |
|
], |
|
"max_steps": 15666, |
|
"num_train_epochs": 2, |
|
"total_flos": 5.928520220872704e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|