|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.171730940548895, |
|
"global_step": 64000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9977639200449365e-05, |
|
"loss": 3.6579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.995527840089873e-05, |
|
"loss": 3.4615, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993291760134809e-05, |
|
"loss": 3.3921, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.991055680179745e-05, |
|
"loss": 3.3552, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9888196002246816e-05, |
|
"loss": 3.3133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.986583520269618e-05, |
|
"loss": 3.2847, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.984347440314554e-05, |
|
"loss": 3.261, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9821113603594904e-05, |
|
"loss": 3.2291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.979875280404427e-05, |
|
"loss": 3.2023, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.977639200449363e-05, |
|
"loss": 3.1846, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.975403120494299e-05, |
|
"loss": 3.1743, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9731670405392355e-05, |
|
"loss": 3.1475, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.970930960584171e-05, |
|
"loss": 3.1315, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.968694880629108e-05, |
|
"loss": 3.1389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9664588006740444e-05, |
|
"loss": 3.0986, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9642227207189806e-05, |
|
"loss": 3.0938, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.961986640763916e-05, |
|
"loss": 3.0844, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.959750560808853e-05, |
|
"loss": 3.0931, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9575144808537895e-05, |
|
"loss": 3.0665, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.955278400898726e-05, |
|
"loss": 3.0465, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.953042320943661e-05, |
|
"loss": 3.0343, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.950806240988598e-05, |
|
"loss": 3.036, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9485701610335346e-05, |
|
"loss": 3.0273, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.94633408107847e-05, |
|
"loss": 3.0231, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9440980011234064e-05, |
|
"loss": 3.0177, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9418619211683434e-05, |
|
"loss": 2.9969, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9396258412132796e-05, |
|
"loss": 3.0019, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.937389761258215e-05, |
|
"loss": 2.9867, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9351536813031515e-05, |
|
"loss": 2.9825, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9329176013480885e-05, |
|
"loss": 2.9799, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.930681521393025e-05, |
|
"loss": 2.9578, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9284454414379603e-05, |
|
"loss": 2.949, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9262093614828966e-05, |
|
"loss": 2.9598, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9239732815278336e-05, |
|
"loss": 2.9568, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.921737201572769e-05, |
|
"loss": 2.9395, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9195011216177054e-05, |
|
"loss": 2.9499, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.917265041662642e-05, |
|
"loss": 2.9316, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.915028961707579e-05, |
|
"loss": 2.9356, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.912792881752514e-05, |
|
"loss": 2.9105, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9105568017974505e-05, |
|
"loss": 2.9277, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.908320721842387e-05, |
|
"loss": 2.9224, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.906084641887324e-05, |
|
"loss": 2.9136, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9038485619322594e-05, |
|
"loss": 2.9109, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9016124819771956e-05, |
|
"loss": 2.896, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.899376402022132e-05, |
|
"loss": 2.8996, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.897140322067068e-05, |
|
"loss": 2.9059, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8949042421120045e-05, |
|
"loss": 2.8849, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.892668162156941e-05, |
|
"loss": 2.8876, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.890432082201877e-05, |
|
"loss": 2.887, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.888196002246813e-05, |
|
"loss": 2.8687, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8859599222917496e-05, |
|
"loss": 2.8734, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883723842336686e-05, |
|
"loss": 2.8745, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.881487762381622e-05, |
|
"loss": 2.8904, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8792516824265584e-05, |
|
"loss": 2.8652, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8770156024714947e-05, |
|
"loss": 2.8715, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.874779522516431e-05, |
|
"loss": 2.8643, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.872543442561367e-05, |
|
"loss": 2.8483, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8703073626063035e-05, |
|
"loss": 2.8537, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.86807128265124e-05, |
|
"loss": 2.8456, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.865835202696176e-05, |
|
"loss": 2.8494, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.863599122741112e-05, |
|
"loss": 2.8352, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8613630427860486e-05, |
|
"loss": 2.8388, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.859126962830985e-05, |
|
"loss": 2.8381, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.856890882875921e-05, |
|
"loss": 2.82, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8546548029208574e-05, |
|
"loss": 2.8428, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852418722965794e-05, |
|
"loss": 2.8436, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.85018264301073e-05, |
|
"loss": 2.826, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8479465630556655e-05, |
|
"loss": 2.8274, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8457104831006025e-05, |
|
"loss": 2.8338, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.843474403145539e-05, |
|
"loss": 2.8228, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.841238323190475e-05, |
|
"loss": 2.8078, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8390022432354106e-05, |
|
"loss": 2.8198, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8367661632803476e-05, |
|
"loss": 2.8088, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.834530083325284e-05, |
|
"loss": 2.8287, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.83229400337022e-05, |
|
"loss": 2.7993, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.830057923415156e-05, |
|
"loss": 2.8223, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.827821843460093e-05, |
|
"loss": 2.8084, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.825585763505029e-05, |
|
"loss": 2.8119, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8233496835499646e-05, |
|
"loss": 2.8131, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.821113603594901e-05, |
|
"loss": 2.7907, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.818877523639838e-05, |
|
"loss": 2.8071, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.816641443684774e-05, |
|
"loss": 2.7969, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.81440536372971e-05, |
|
"loss": 2.8045, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.812169283774646e-05, |
|
"loss": 2.7991, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809933203819583e-05, |
|
"loss": 2.7845, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.807697123864519e-05, |
|
"loss": 2.8144, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.805461043909455e-05, |
|
"loss": 2.764, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.803224963954391e-05, |
|
"loss": 2.7744, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.800988883999328e-05, |
|
"loss": 2.7857, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7987528040442636e-05, |
|
"loss": 2.8017, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7965167240892e-05, |
|
"loss": 2.7958, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.794280644134136e-05, |
|
"loss": 2.7766, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.792044564179073e-05, |
|
"loss": 2.7614, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.789808484224009e-05, |
|
"loss": 2.7739, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.787572404268945e-05, |
|
"loss": 2.7736, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.785336324313882e-05, |
|
"loss": 2.7683, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.783100244358818e-05, |
|
"loss": 2.7765, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.780864164403754e-05, |
|
"loss": 2.752, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.77862808444869e-05, |
|
"loss": 2.7666, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.776392004493627e-05, |
|
"loss": 2.7481, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7741559245385626e-05, |
|
"loss": 2.7724, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.771919844583499e-05, |
|
"loss": 2.7773, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769683764628435e-05, |
|
"loss": 2.7659, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.767447684673372e-05, |
|
"loss": 2.7473, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.765211604718308e-05, |
|
"loss": 2.761, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.762975524763244e-05, |
|
"loss": 2.7289, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76073944480818e-05, |
|
"loss": 2.7657, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.758503364853117e-05, |
|
"loss": 2.7719, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.756267284898053e-05, |
|
"loss": 2.7581, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.754031204942989e-05, |
|
"loss": 2.7548, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7517951249879253e-05, |
|
"loss": 2.7509, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7495590450328616e-05, |
|
"loss": 2.738, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747322965077798e-05, |
|
"loss": 2.752, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.745086885122734e-05, |
|
"loss": 2.7373, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7428508051676704e-05, |
|
"loss": 2.7541, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.740614725212607e-05, |
|
"loss": 2.7332, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.738378645257543e-05, |
|
"loss": 2.713, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.736142565302479e-05, |
|
"loss": 2.7534, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7339064853474155e-05, |
|
"loss": 2.753, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.731670405392352e-05, |
|
"loss": 2.7357, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.729434325437288e-05, |
|
"loss": 2.7286, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7271982454822244e-05, |
|
"loss": 2.7137, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7249621655271606e-05, |
|
"loss": 2.73, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.722726085572097e-05, |
|
"loss": 2.7375, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.720490005617033e-05, |
|
"loss": 2.7257, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7182539256619695e-05, |
|
"loss": 2.7162, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.716017845706906e-05, |
|
"loss": 2.7421, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.713781765751842e-05, |
|
"loss": 2.7255, |
|
"step": 64000 |
|
} |
|
], |
|
"max_steps": 1118028, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.4482346926915584e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|