nbtpj's picture
Training in progress, step 15000
3780e03
raw
history blame
18.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.723543605653223,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.968083748244606e-05,
"loss": 2.9096,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 4.936167496489212e-05,
"loss": 2.8631,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 4.904251244733819e-05,
"loss": 2.7364,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 4.8723349929784253e-05,
"loss": 2.8226,
"step": 400
},
{
"epoch": 0.06,
"learning_rate": 4.840418741223031e-05,
"loss": 2.7184,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 4.808502489467637e-05,
"loss": 2.6843,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 4.776586237712243e-05,
"loss": 2.7507,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 4.744669985956849e-05,
"loss": 2.7261,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 4.712753734201456e-05,
"loss": 2.7343,
"step": 900
},
{
"epoch": 0.11,
"learning_rate": 4.6808374824460616e-05,
"loss": 2.7005,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 4.6489212306906676e-05,
"loss": 2.6462,
"step": 1100
},
{
"epoch": 0.14,
"learning_rate": 4.617004978935274e-05,
"loss": 2.6276,
"step": 1200
},
{
"epoch": 0.15,
"learning_rate": 4.58508872717988e-05,
"loss": 2.6493,
"step": 1300
},
{
"epoch": 0.16,
"learning_rate": 4.553172475424487e-05,
"loss": 2.7024,
"step": 1400
},
{
"epoch": 0.17,
"learning_rate": 4.521256223669093e-05,
"loss": 2.6616,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 4.4893399719136986e-05,
"loss": 2.7136,
"step": 1600
},
{
"epoch": 0.2,
"learning_rate": 4.4574237201583046e-05,
"loss": 2.6842,
"step": 1700
},
{
"epoch": 0.21,
"learning_rate": 4.4255074684029105e-05,
"loss": 2.5911,
"step": 1800
},
{
"epoch": 0.22,
"learning_rate": 4.393591216647517e-05,
"loss": 2.7281,
"step": 1900
},
{
"epoch": 0.23,
"learning_rate": 4.361674964892124e-05,
"loss": 2.6697,
"step": 2000
},
{
"epoch": 0.24,
"learning_rate": 4.32975871313673e-05,
"loss": 2.6282,
"step": 2100
},
{
"epoch": 0.25,
"learning_rate": 4.2978424613813356e-05,
"loss": 2.564,
"step": 2200
},
{
"epoch": 0.26,
"learning_rate": 4.2659262096259415e-05,
"loss": 2.4882,
"step": 2300
},
{
"epoch": 0.28,
"learning_rate": 4.2340099578705475e-05,
"loss": 2.6347,
"step": 2400
},
{
"epoch": 0.29,
"learning_rate": 4.202093706115154e-05,
"loss": 2.5994,
"step": 2500
},
{
"epoch": 0.3,
"learning_rate": 4.17017745435976e-05,
"loss": 2.6479,
"step": 2600
},
{
"epoch": 0.31,
"learning_rate": 4.1382612026043666e-05,
"loss": 2.7357,
"step": 2700
},
{
"epoch": 0.32,
"learning_rate": 4.1063449508489726e-05,
"loss": 2.6819,
"step": 2800
},
{
"epoch": 0.33,
"learning_rate": 4.0744286990935785e-05,
"loss": 2.5451,
"step": 2900
},
{
"epoch": 0.34,
"learning_rate": 4.0425124473381845e-05,
"loss": 2.6343,
"step": 3000
},
{
"epoch": 0.36,
"learning_rate": 4.010596195582791e-05,
"loss": 2.5168,
"step": 3100
},
{
"epoch": 0.37,
"learning_rate": 3.978679943827397e-05,
"loss": 2.5881,
"step": 3200
},
{
"epoch": 0.38,
"learning_rate": 3.946763692072003e-05,
"loss": 2.6411,
"step": 3300
},
{
"epoch": 0.39,
"learning_rate": 3.9148474403166096e-05,
"loss": 2.5901,
"step": 3400
},
{
"epoch": 0.4,
"learning_rate": 3.8829311885612155e-05,
"loss": 2.5842,
"step": 3500
},
{
"epoch": 0.41,
"learning_rate": 3.851014936805822e-05,
"loss": 2.5436,
"step": 3600
},
{
"epoch": 0.43,
"learning_rate": 3.819098685050428e-05,
"loss": 2.6021,
"step": 3700
},
{
"epoch": 0.44,
"learning_rate": 3.787182433295034e-05,
"loss": 2.6307,
"step": 3800
},
{
"epoch": 0.45,
"learning_rate": 3.75526618153964e-05,
"loss": 2.5632,
"step": 3900
},
{
"epoch": 0.46,
"learning_rate": 3.723349929784246e-05,
"loss": 2.5933,
"step": 4000
},
{
"epoch": 0.47,
"learning_rate": 3.6914336780288525e-05,
"loss": 2.5458,
"step": 4100
},
{
"epoch": 0.48,
"learning_rate": 3.659517426273459e-05,
"loss": 2.5212,
"step": 4200
},
{
"epoch": 0.49,
"learning_rate": 3.627601174518065e-05,
"loss": 2.4532,
"step": 4300
},
{
"epoch": 0.51,
"learning_rate": 3.595684922762671e-05,
"loss": 2.5134,
"step": 4400
},
{
"epoch": 0.52,
"learning_rate": 3.563768671007277e-05,
"loss": 2.5368,
"step": 4500
},
{
"epoch": 0.53,
"learning_rate": 3.531852419251883e-05,
"loss": 2.5547,
"step": 4600
},
{
"epoch": 0.54,
"learning_rate": 3.4999361674964895e-05,
"loss": 2.4786,
"step": 4700
},
{
"epoch": 0.55,
"learning_rate": 3.4680199157410954e-05,
"loss": 2.5377,
"step": 4800
},
{
"epoch": 0.56,
"learning_rate": 3.436103663985702e-05,
"loss": 2.5462,
"step": 4900
},
{
"epoch": 0.57,
"learning_rate": 3.404187412230308e-05,
"loss": 2.5644,
"step": 5000
},
{
"epoch": 0.59,
"learning_rate": 3.372271160474914e-05,
"loss": 2.5206,
"step": 5100
},
{
"epoch": 0.6,
"learning_rate": 3.3403549087195205e-05,
"loss": 2.5305,
"step": 5200
},
{
"epoch": 0.61,
"learning_rate": 3.3084386569641264e-05,
"loss": 2.462,
"step": 5300
},
{
"epoch": 0.62,
"learning_rate": 3.2765224052087324e-05,
"loss": 2.4704,
"step": 5400
},
{
"epoch": 0.63,
"learning_rate": 3.244606153453338e-05,
"loss": 2.5133,
"step": 5500
},
{
"epoch": 0.64,
"learning_rate": 3.212689901697944e-05,
"loss": 2.4805,
"step": 5600
},
{
"epoch": 0.65,
"learning_rate": 3.180773649942551e-05,
"loss": 2.4572,
"step": 5700
},
{
"epoch": 0.67,
"learning_rate": 3.1488573981871575e-05,
"loss": 2.4128,
"step": 5800
},
{
"epoch": 0.68,
"learning_rate": 3.1169411464317634e-05,
"loss": 2.4577,
"step": 5900
},
{
"epoch": 0.69,
"learning_rate": 3.0850248946763694e-05,
"loss": 2.5539,
"step": 6000
},
{
"epoch": 0.7,
"learning_rate": 3.053108642920975e-05,
"loss": 2.472,
"step": 6100
},
{
"epoch": 0.71,
"learning_rate": 3.0211923911655816e-05,
"loss": 2.4468,
"step": 6200
},
{
"epoch": 0.72,
"learning_rate": 2.9892761394101875e-05,
"loss": 2.4831,
"step": 6300
},
{
"epoch": 0.74,
"learning_rate": 2.957359887654794e-05,
"loss": 2.4093,
"step": 6400
},
{
"epoch": 0.75,
"learning_rate": 2.9254436358994004e-05,
"loss": 2.4683,
"step": 6500
},
{
"epoch": 0.76,
"learning_rate": 2.8935273841440063e-05,
"loss": 2.4388,
"step": 6600
},
{
"epoch": 0.77,
"learning_rate": 2.8616111323886126e-05,
"loss": 2.3558,
"step": 6700
},
{
"epoch": 0.78,
"learning_rate": 2.8296948806332185e-05,
"loss": 2.4788,
"step": 6800
},
{
"epoch": 0.79,
"learning_rate": 2.7977786288778245e-05,
"loss": 2.4523,
"step": 6900
},
{
"epoch": 0.8,
"learning_rate": 2.7658623771224308e-05,
"loss": 2.4759,
"step": 7000
},
{
"epoch": 0.82,
"learning_rate": 2.7339461253670367e-05,
"loss": 2.3574,
"step": 7100
},
{
"epoch": 0.83,
"learning_rate": 2.7020298736116433e-05,
"loss": 2.4569,
"step": 7200
},
{
"epoch": 0.84,
"learning_rate": 2.6701136218562496e-05,
"loss": 2.4078,
"step": 7300
},
{
"epoch": 0.85,
"learning_rate": 2.6381973701008555e-05,
"loss": 2.3948,
"step": 7400
},
{
"epoch": 0.86,
"learning_rate": 2.6062811183454615e-05,
"loss": 2.5496,
"step": 7500
},
{
"epoch": 0.87,
"learning_rate": 2.5743648665900677e-05,
"loss": 2.4191,
"step": 7600
},
{
"epoch": 0.88,
"learning_rate": 2.5424486148346737e-05,
"loss": 2.4895,
"step": 7700
},
{
"epoch": 0.9,
"learning_rate": 2.51053236307928e-05,
"loss": 2.4726,
"step": 7800
},
{
"epoch": 0.91,
"learning_rate": 2.4786161113238862e-05,
"loss": 2.3287,
"step": 7900
},
{
"epoch": 0.92,
"learning_rate": 2.446699859568492e-05,
"loss": 2.4278,
"step": 8000
},
{
"epoch": 0.93,
"learning_rate": 2.4147836078130988e-05,
"loss": 2.4548,
"step": 8100
},
{
"epoch": 0.94,
"learning_rate": 2.3828673560577047e-05,
"loss": 2.5173,
"step": 8200
},
{
"epoch": 0.95,
"learning_rate": 2.3509511043023107e-05,
"loss": 2.4883,
"step": 8300
},
{
"epoch": 0.97,
"learning_rate": 2.319034852546917e-05,
"loss": 2.4435,
"step": 8400
},
{
"epoch": 0.98,
"learning_rate": 2.2871186007915232e-05,
"loss": 2.4825,
"step": 8500
},
{
"epoch": 0.99,
"learning_rate": 2.2552023490361295e-05,
"loss": 2.4539,
"step": 8600
},
{
"epoch": 1.0,
"learning_rate": 2.2232860972807354e-05,
"loss": 2.4115,
"step": 8700
},
{
"epoch": 1.01,
"learning_rate": 2.1913698455253417e-05,
"loss": 2.3452,
"step": 8800
},
{
"epoch": 1.02,
"learning_rate": 2.159453593769948e-05,
"loss": 2.3799,
"step": 8900
},
{
"epoch": 1.03,
"learning_rate": 2.127537342014554e-05,
"loss": 2.4019,
"step": 9000
},
{
"epoch": 1.05,
"learning_rate": 2.09562109025916e-05,
"loss": 2.3678,
"step": 9100
},
{
"epoch": 1.06,
"learning_rate": 2.0637048385037665e-05,
"loss": 2.3711,
"step": 9200
},
{
"epoch": 1.07,
"learning_rate": 2.0317885867483724e-05,
"loss": 2.2767,
"step": 9300
},
{
"epoch": 1.08,
"learning_rate": 1.9998723349929783e-05,
"loss": 2.3761,
"step": 9400
},
{
"epoch": 1.09,
"learning_rate": 1.9679560832375846e-05,
"loss": 2.3392,
"step": 9500
},
{
"epoch": 1.1,
"learning_rate": 1.936039831482191e-05,
"loss": 2.3232,
"step": 9600
},
{
"epoch": 1.11,
"learning_rate": 1.904123579726797e-05,
"loss": 2.355,
"step": 9700
},
{
"epoch": 1.13,
"learning_rate": 1.872207327971403e-05,
"loss": 2.3613,
"step": 9800
},
{
"epoch": 1.14,
"learning_rate": 1.840291076216009e-05,
"loss": 2.3475,
"step": 9900
},
{
"epoch": 1.15,
"learning_rate": 1.8083748244606157e-05,
"loss": 2.3654,
"step": 10000
},
{
"epoch": 1.16,
"learning_rate": 1.7764585727052216e-05,
"loss": 2.3825,
"step": 10100
},
{
"epoch": 1.17,
"learning_rate": 1.7445423209498275e-05,
"loss": 2.3245,
"step": 10200
},
{
"epoch": 1.18,
"learning_rate": 1.712626069194434e-05,
"loss": 2.2869,
"step": 10300
},
{
"epoch": 1.19,
"learning_rate": 1.68070981743904e-05,
"loss": 2.3207,
"step": 10400
},
{
"epoch": 1.21,
"learning_rate": 1.648793565683646e-05,
"loss": 2.3041,
"step": 10500
},
{
"epoch": 1.22,
"learning_rate": 1.6168773139282523e-05,
"loss": 2.3543,
"step": 10600
},
{
"epoch": 1.23,
"learning_rate": 1.5849610621728586e-05,
"loss": 2.3026,
"step": 10700
},
{
"epoch": 1.24,
"learning_rate": 1.553044810417465e-05,
"loss": 2.2919,
"step": 10800
},
{
"epoch": 1.25,
"learning_rate": 1.5211285586620708e-05,
"loss": 2.3805,
"step": 10900
},
{
"epoch": 1.26,
"learning_rate": 1.4892123069066769e-05,
"loss": 2.3523,
"step": 11000
},
{
"epoch": 1.28,
"learning_rate": 1.4572960551512832e-05,
"loss": 2.2931,
"step": 11100
},
{
"epoch": 1.29,
"learning_rate": 1.4253798033958893e-05,
"loss": 2.2623,
"step": 11200
},
{
"epoch": 1.3,
"learning_rate": 1.3934635516404954e-05,
"loss": 2.28,
"step": 11300
},
{
"epoch": 1.31,
"learning_rate": 1.3615472998851015e-05,
"loss": 2.3121,
"step": 11400
},
{
"epoch": 1.32,
"learning_rate": 1.3296310481297078e-05,
"loss": 2.3111,
"step": 11500
},
{
"epoch": 1.33,
"learning_rate": 1.2977147963743139e-05,
"loss": 2.332,
"step": 11600
},
{
"epoch": 1.34,
"learning_rate": 1.26579854461892e-05,
"loss": 2.3495,
"step": 11700
},
{
"epoch": 1.36,
"learning_rate": 1.233882292863526e-05,
"loss": 2.2864,
"step": 11800
},
{
"epoch": 1.37,
"learning_rate": 1.2019660411081324e-05,
"loss": 2.3482,
"step": 11900
},
{
"epoch": 1.38,
"learning_rate": 1.1700497893527385e-05,
"loss": 2.2843,
"step": 12000
},
{
"epoch": 1.39,
"learning_rate": 1.1381335375973447e-05,
"loss": 2.3075,
"step": 12100
},
{
"epoch": 1.4,
"learning_rate": 1.1062172858419507e-05,
"loss": 2.3899,
"step": 12200
},
{
"epoch": 1.41,
"learning_rate": 1.074301034086557e-05,
"loss": 2.3496,
"step": 12300
},
{
"epoch": 1.42,
"learning_rate": 1.042384782331163e-05,
"loss": 2.3482,
"step": 12400
},
{
"epoch": 1.44,
"learning_rate": 1.0104685305757693e-05,
"loss": 2.2579,
"step": 12500
},
{
"epoch": 1.45,
"learning_rate": 9.785522788203753e-06,
"loss": 2.2689,
"step": 12600
},
{
"epoch": 1.46,
"learning_rate": 9.466360270649816e-06,
"loss": 2.3111,
"step": 12700
},
{
"epoch": 1.47,
"learning_rate": 9.147197753095877e-06,
"loss": 2.2794,
"step": 12800
},
{
"epoch": 1.48,
"learning_rate": 8.82803523554194e-06,
"loss": 2.2754,
"step": 12900
},
{
"epoch": 1.49,
"learning_rate": 8.508872717987999e-06,
"loss": 2.2837,
"step": 13000
},
{
"epoch": 1.51,
"learning_rate": 8.189710200434061e-06,
"loss": 2.2297,
"step": 13100
},
{
"epoch": 1.52,
"learning_rate": 7.870547682880124e-06,
"loss": 2.2969,
"step": 13200
},
{
"epoch": 1.53,
"learning_rate": 7.5513851653261844e-06,
"loss": 2.3363,
"step": 13300
},
{
"epoch": 1.54,
"learning_rate": 7.232222647772246e-06,
"loss": 2.2079,
"step": 13400
},
{
"epoch": 1.55,
"learning_rate": 6.9130601302183074e-06,
"loss": 2.3057,
"step": 13500
},
{
"epoch": 1.56,
"learning_rate": 6.593897612664369e-06,
"loss": 2.2665,
"step": 13600
},
{
"epoch": 1.57,
"learning_rate": 6.27473509511043e-06,
"loss": 2.2934,
"step": 13700
},
{
"epoch": 1.59,
"learning_rate": 5.9555725775564915e-06,
"loss": 2.2407,
"step": 13800
},
{
"epoch": 1.6,
"learning_rate": 5.636410060002553e-06,
"loss": 2.3309,
"step": 13900
},
{
"epoch": 1.61,
"learning_rate": 5.317247542448615e-06,
"loss": 2.3257,
"step": 14000
},
{
"epoch": 1.62,
"learning_rate": 4.998085024894677e-06,
"loss": 2.2518,
"step": 14100
},
{
"epoch": 1.63,
"learning_rate": 4.678922507340738e-06,
"loss": 2.2359,
"step": 14200
},
{
"epoch": 1.64,
"learning_rate": 4.3597599897868e-06,
"loss": 2.287,
"step": 14300
},
{
"epoch": 1.65,
"learning_rate": 4.040597472232861e-06,
"loss": 2.2462,
"step": 14400
},
{
"epoch": 1.67,
"learning_rate": 3.7214349546789228e-06,
"loss": 2.2815,
"step": 14500
},
{
"epoch": 1.68,
"learning_rate": 3.4022724371249842e-06,
"loss": 2.3361,
"step": 14600
},
{
"epoch": 1.69,
"learning_rate": 3.0831099195710457e-06,
"loss": 2.2774,
"step": 14700
},
{
"epoch": 1.7,
"learning_rate": 2.7639474020171072e-06,
"loss": 2.2895,
"step": 14800
},
{
"epoch": 1.71,
"learning_rate": 2.4447848844631687e-06,
"loss": 2.3155,
"step": 14900
},
{
"epoch": 1.72,
"learning_rate": 2.12562236690923e-06,
"loss": 2.2698,
"step": 15000
}
],
"max_steps": 15666,
"num_train_epochs": 2,
"total_flos": 5.928520220872704e+16,
"trial_name": null,
"trial_params": null
}