prompts / trainer_state.json
kevincstowe's picture
updating to large prompt model
d8730db
raw
history blame
16 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.171730940548895,
"global_step": 64000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9977639200449365e-05,
"loss": 3.6579,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.995527840089873e-05,
"loss": 3.4615,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.993291760134809e-05,
"loss": 3.3921,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.991055680179745e-05,
"loss": 3.3552,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9888196002246816e-05,
"loss": 3.3133,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.986583520269618e-05,
"loss": 3.2847,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.984347440314554e-05,
"loss": 3.261,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9821113603594904e-05,
"loss": 3.2291,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.979875280404427e-05,
"loss": 3.2023,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.977639200449363e-05,
"loss": 3.1846,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 4.975403120494299e-05,
"loss": 3.1743,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.9731670405392355e-05,
"loss": 3.1475,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.970930960584171e-05,
"loss": 3.1315,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.968694880629108e-05,
"loss": 3.1389,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.9664588006740444e-05,
"loss": 3.0986,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 4.9642227207189806e-05,
"loss": 3.0938,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 4.961986640763916e-05,
"loss": 3.0844,
"step": 8500
},
{
"epoch": 0.02,
"learning_rate": 4.959750560808853e-05,
"loss": 3.0931,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.9575144808537895e-05,
"loss": 3.0665,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.955278400898726e-05,
"loss": 3.0465,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.953042320943661e-05,
"loss": 3.0343,
"step": 10500
},
{
"epoch": 0.03,
"learning_rate": 4.950806240988598e-05,
"loss": 3.036,
"step": 11000
},
{
"epoch": 0.03,
"learning_rate": 4.9485701610335346e-05,
"loss": 3.0273,
"step": 11500
},
{
"epoch": 0.03,
"learning_rate": 4.94633408107847e-05,
"loss": 3.0231,
"step": 12000
},
{
"epoch": 0.03,
"learning_rate": 4.9440980011234064e-05,
"loss": 3.0177,
"step": 12500
},
{
"epoch": 0.03,
"learning_rate": 4.9418619211683434e-05,
"loss": 2.9969,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.9396258412132796e-05,
"loss": 3.0019,
"step": 13500
},
{
"epoch": 0.04,
"learning_rate": 4.937389761258215e-05,
"loss": 2.9867,
"step": 14000
},
{
"epoch": 0.04,
"learning_rate": 4.9351536813031515e-05,
"loss": 2.9825,
"step": 14500
},
{
"epoch": 0.04,
"learning_rate": 4.9329176013480885e-05,
"loss": 2.9799,
"step": 15000
},
{
"epoch": 0.04,
"learning_rate": 4.930681521393025e-05,
"loss": 2.9578,
"step": 15500
},
{
"epoch": 0.04,
"learning_rate": 4.9284454414379603e-05,
"loss": 2.949,
"step": 16000
},
{
"epoch": 0.04,
"learning_rate": 4.9262093614828966e-05,
"loss": 2.9598,
"step": 16500
},
{
"epoch": 0.05,
"learning_rate": 4.9239732815278336e-05,
"loss": 2.9568,
"step": 17000
},
{
"epoch": 0.05,
"learning_rate": 4.921737201572769e-05,
"loss": 2.9395,
"step": 17500
},
{
"epoch": 0.05,
"learning_rate": 4.9195011216177054e-05,
"loss": 2.9499,
"step": 18000
},
{
"epoch": 0.05,
"learning_rate": 4.917265041662642e-05,
"loss": 2.9316,
"step": 18500
},
{
"epoch": 0.05,
"learning_rate": 4.915028961707579e-05,
"loss": 2.9356,
"step": 19000
},
{
"epoch": 0.05,
"learning_rate": 4.912792881752514e-05,
"loss": 2.9105,
"step": 19500
},
{
"epoch": 0.05,
"learning_rate": 4.9105568017974505e-05,
"loss": 2.9277,
"step": 20000
},
{
"epoch": 0.06,
"learning_rate": 4.908320721842387e-05,
"loss": 2.9224,
"step": 20500
},
{
"epoch": 0.06,
"learning_rate": 4.906084641887324e-05,
"loss": 2.9136,
"step": 21000
},
{
"epoch": 0.06,
"learning_rate": 4.9038485619322594e-05,
"loss": 2.9109,
"step": 21500
},
{
"epoch": 0.06,
"learning_rate": 4.9016124819771956e-05,
"loss": 2.896,
"step": 22000
},
{
"epoch": 0.06,
"learning_rate": 4.899376402022132e-05,
"loss": 2.8996,
"step": 22500
},
{
"epoch": 0.06,
"learning_rate": 4.897140322067068e-05,
"loss": 2.9059,
"step": 23000
},
{
"epoch": 0.06,
"learning_rate": 4.8949042421120045e-05,
"loss": 2.8849,
"step": 23500
},
{
"epoch": 0.06,
"learning_rate": 4.892668162156941e-05,
"loss": 2.8876,
"step": 24000
},
{
"epoch": 0.07,
"learning_rate": 4.890432082201877e-05,
"loss": 2.887,
"step": 24500
},
{
"epoch": 0.07,
"learning_rate": 4.888196002246813e-05,
"loss": 2.8687,
"step": 25000
},
{
"epoch": 0.07,
"learning_rate": 4.8859599222917496e-05,
"loss": 2.8734,
"step": 25500
},
{
"epoch": 0.07,
"learning_rate": 4.883723842336686e-05,
"loss": 2.8745,
"step": 26000
},
{
"epoch": 0.07,
"learning_rate": 4.881487762381622e-05,
"loss": 2.8904,
"step": 26500
},
{
"epoch": 0.07,
"learning_rate": 4.8792516824265584e-05,
"loss": 2.8652,
"step": 27000
},
{
"epoch": 0.07,
"learning_rate": 4.8770156024714947e-05,
"loss": 2.8715,
"step": 27500
},
{
"epoch": 0.08,
"learning_rate": 4.874779522516431e-05,
"loss": 2.8643,
"step": 28000
},
{
"epoch": 0.08,
"learning_rate": 4.872543442561367e-05,
"loss": 2.8483,
"step": 28500
},
{
"epoch": 0.08,
"learning_rate": 4.8703073626063035e-05,
"loss": 2.8537,
"step": 29000
},
{
"epoch": 0.08,
"learning_rate": 4.86807128265124e-05,
"loss": 2.8456,
"step": 29500
},
{
"epoch": 0.08,
"learning_rate": 4.865835202696176e-05,
"loss": 2.8494,
"step": 30000
},
{
"epoch": 0.08,
"learning_rate": 4.863599122741112e-05,
"loss": 2.8352,
"step": 30500
},
{
"epoch": 0.08,
"learning_rate": 4.8613630427860486e-05,
"loss": 2.8388,
"step": 31000
},
{
"epoch": 0.08,
"learning_rate": 4.859126962830985e-05,
"loss": 2.8381,
"step": 31500
},
{
"epoch": 0.09,
"learning_rate": 4.856890882875921e-05,
"loss": 2.82,
"step": 32000
},
{
"epoch": 0.09,
"learning_rate": 4.8546548029208574e-05,
"loss": 2.8428,
"step": 32500
},
{
"epoch": 0.09,
"learning_rate": 4.852418722965794e-05,
"loss": 2.8436,
"step": 33000
},
{
"epoch": 0.09,
"learning_rate": 4.85018264301073e-05,
"loss": 2.826,
"step": 33500
},
{
"epoch": 0.09,
"learning_rate": 4.8479465630556655e-05,
"loss": 2.8274,
"step": 34000
},
{
"epoch": 0.09,
"learning_rate": 4.8457104831006025e-05,
"loss": 2.8338,
"step": 34500
},
{
"epoch": 0.09,
"learning_rate": 4.843474403145539e-05,
"loss": 2.8228,
"step": 35000
},
{
"epoch": 0.1,
"learning_rate": 4.841238323190475e-05,
"loss": 2.8078,
"step": 35500
},
{
"epoch": 0.1,
"learning_rate": 4.8390022432354106e-05,
"loss": 2.8198,
"step": 36000
},
{
"epoch": 0.1,
"learning_rate": 4.8367661632803476e-05,
"loss": 2.8088,
"step": 36500
},
{
"epoch": 0.1,
"learning_rate": 4.834530083325284e-05,
"loss": 2.8287,
"step": 37000
},
{
"epoch": 0.1,
"learning_rate": 4.83229400337022e-05,
"loss": 2.7993,
"step": 37500
},
{
"epoch": 0.1,
"learning_rate": 4.830057923415156e-05,
"loss": 2.8223,
"step": 38000
},
{
"epoch": 0.1,
"learning_rate": 4.827821843460093e-05,
"loss": 2.8084,
"step": 38500
},
{
"epoch": 0.1,
"learning_rate": 4.825585763505029e-05,
"loss": 2.8119,
"step": 39000
},
{
"epoch": 0.11,
"learning_rate": 4.8233496835499646e-05,
"loss": 2.8131,
"step": 39500
},
{
"epoch": 0.11,
"learning_rate": 4.821113603594901e-05,
"loss": 2.7907,
"step": 40000
},
{
"epoch": 0.11,
"learning_rate": 4.818877523639838e-05,
"loss": 2.8071,
"step": 40500
},
{
"epoch": 0.11,
"learning_rate": 4.816641443684774e-05,
"loss": 2.7969,
"step": 41000
},
{
"epoch": 0.11,
"learning_rate": 4.81440536372971e-05,
"loss": 2.8045,
"step": 41500
},
{
"epoch": 0.11,
"learning_rate": 4.812169283774646e-05,
"loss": 2.7991,
"step": 42000
},
{
"epoch": 0.11,
"learning_rate": 4.809933203819583e-05,
"loss": 2.7845,
"step": 42500
},
{
"epoch": 0.12,
"learning_rate": 4.807697123864519e-05,
"loss": 2.8144,
"step": 43000
},
{
"epoch": 0.12,
"learning_rate": 4.805461043909455e-05,
"loss": 2.764,
"step": 43500
},
{
"epoch": 0.12,
"learning_rate": 4.803224963954391e-05,
"loss": 2.7744,
"step": 44000
},
{
"epoch": 0.12,
"learning_rate": 4.800988883999328e-05,
"loss": 2.7857,
"step": 44500
},
{
"epoch": 0.12,
"learning_rate": 4.7987528040442636e-05,
"loss": 2.8017,
"step": 45000
},
{
"epoch": 0.12,
"learning_rate": 4.7965167240892e-05,
"loss": 2.7958,
"step": 45500
},
{
"epoch": 0.12,
"learning_rate": 4.794280644134136e-05,
"loss": 2.7766,
"step": 46000
},
{
"epoch": 0.12,
"learning_rate": 4.792044564179073e-05,
"loss": 2.7614,
"step": 46500
},
{
"epoch": 0.13,
"learning_rate": 4.789808484224009e-05,
"loss": 2.7739,
"step": 47000
},
{
"epoch": 0.13,
"learning_rate": 4.787572404268945e-05,
"loss": 2.7736,
"step": 47500
},
{
"epoch": 0.13,
"learning_rate": 4.785336324313882e-05,
"loss": 2.7683,
"step": 48000
},
{
"epoch": 0.13,
"learning_rate": 4.783100244358818e-05,
"loss": 2.7765,
"step": 48500
},
{
"epoch": 0.13,
"learning_rate": 4.780864164403754e-05,
"loss": 2.752,
"step": 49000
},
{
"epoch": 0.13,
"learning_rate": 4.77862808444869e-05,
"loss": 2.7666,
"step": 49500
},
{
"epoch": 0.13,
"learning_rate": 4.776392004493627e-05,
"loss": 2.7481,
"step": 50000
},
{
"epoch": 0.14,
"learning_rate": 4.7741559245385626e-05,
"loss": 2.7724,
"step": 50500
},
{
"epoch": 0.14,
"learning_rate": 4.771919844583499e-05,
"loss": 2.7773,
"step": 51000
},
{
"epoch": 0.14,
"learning_rate": 4.769683764628435e-05,
"loss": 2.7659,
"step": 51500
},
{
"epoch": 0.14,
"learning_rate": 4.767447684673372e-05,
"loss": 2.7473,
"step": 52000
},
{
"epoch": 0.14,
"learning_rate": 4.765211604718308e-05,
"loss": 2.761,
"step": 52500
},
{
"epoch": 0.14,
"learning_rate": 4.762975524763244e-05,
"loss": 2.7289,
"step": 53000
},
{
"epoch": 0.14,
"learning_rate": 4.76073944480818e-05,
"loss": 2.7657,
"step": 53500
},
{
"epoch": 0.14,
"learning_rate": 4.758503364853117e-05,
"loss": 2.7719,
"step": 54000
},
{
"epoch": 0.15,
"learning_rate": 4.756267284898053e-05,
"loss": 2.7581,
"step": 54500
},
{
"epoch": 0.15,
"learning_rate": 4.754031204942989e-05,
"loss": 2.7548,
"step": 55000
},
{
"epoch": 0.15,
"learning_rate": 4.7517951249879253e-05,
"loss": 2.7509,
"step": 55500
},
{
"epoch": 0.15,
"learning_rate": 4.7495590450328616e-05,
"loss": 2.738,
"step": 56000
},
{
"epoch": 0.15,
"learning_rate": 4.747322965077798e-05,
"loss": 2.752,
"step": 56500
},
{
"epoch": 0.15,
"learning_rate": 4.745086885122734e-05,
"loss": 2.7373,
"step": 57000
},
{
"epoch": 0.15,
"learning_rate": 4.7428508051676704e-05,
"loss": 2.7541,
"step": 57500
},
{
"epoch": 0.16,
"learning_rate": 4.740614725212607e-05,
"loss": 2.7332,
"step": 58000
},
{
"epoch": 0.16,
"learning_rate": 4.738378645257543e-05,
"loss": 2.713,
"step": 58500
},
{
"epoch": 0.16,
"learning_rate": 4.736142565302479e-05,
"loss": 2.7534,
"step": 59000
},
{
"epoch": 0.16,
"learning_rate": 4.7339064853474155e-05,
"loss": 2.753,
"step": 59500
},
{
"epoch": 0.16,
"learning_rate": 4.731670405392352e-05,
"loss": 2.7357,
"step": 60000
},
{
"epoch": 0.16,
"learning_rate": 4.729434325437288e-05,
"loss": 2.7286,
"step": 60500
},
{
"epoch": 0.16,
"learning_rate": 4.7271982454822244e-05,
"loss": 2.7137,
"step": 61000
},
{
"epoch": 0.17,
"learning_rate": 4.7249621655271606e-05,
"loss": 2.73,
"step": 61500
},
{
"epoch": 0.17,
"learning_rate": 4.722726085572097e-05,
"loss": 2.7375,
"step": 62000
},
{
"epoch": 0.17,
"learning_rate": 4.720490005617033e-05,
"loss": 2.7257,
"step": 62500
},
{
"epoch": 0.17,
"learning_rate": 4.7182539256619695e-05,
"loss": 2.7162,
"step": 63000
},
{
"epoch": 0.17,
"learning_rate": 4.716017845706906e-05,
"loss": 2.7421,
"step": 63500
},
{
"epoch": 0.17,
"learning_rate": 4.713781765751842e-05,
"loss": 2.7255,
"step": 64000
}
],
"max_steps": 1118028,
"num_train_epochs": 3,
"total_flos": 1.4482346926915584e+17,
"trial_name": null,
"trial_params": null
}