|
{ |
|
"best_metric": 0.38143062591552734, |
|
"best_model_checkpoint": "./models/checkpoint-405", |
|
"epoch": 1.8, |
|
"eval_steps": 45, |
|
"global_step": 405, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0044444444444444444, |
|
"grad_norm": 3.2011494636535645, |
|
"learning_rate": 2.173913043478261e-06, |
|
"logits/chosen": 1.6946959495544434, |
|
"logits/rejected": 1.7046217918395996, |
|
"logps/chosen": -123.91139221191406, |
|
"logps/rejected": -152.06222534179688, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14388123154640198, |
|
"rewards/margins": 0.2681159973144531, |
|
"rewards/rejected": -0.12423478066921234, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008888888888888889, |
|
"grad_norm": 2.7101495265960693, |
|
"learning_rate": 4.347826086956522e-06, |
|
"logits/chosen": 2.161226749420166, |
|
"logits/rejected": 2.1654703617095947, |
|
"logps/chosen": -257.9621276855469, |
|
"logps/rejected": -336.0558776855469, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.051631927490234375, |
|
"rewards/margins": 0.10870284587144852, |
|
"rewards/rejected": -0.05707092583179474, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.013333333333333334, |
|
"grad_norm": 2.1376404762268066, |
|
"learning_rate": 6.521739130434783e-06, |
|
"logits/chosen": 1.9573543071746826, |
|
"logits/rejected": 1.8775691986083984, |
|
"logps/chosen": -253.7610626220703, |
|
"logps/rejected": -210.71412658691406, |
|
"loss": 0.527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1437244415283203, |
|
"rewards/margins": 0.37279435992240906, |
|
"rewards/rejected": -0.22906990349292755, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017777777777777778, |
|
"grad_norm": 2.6771838665008545, |
|
"learning_rate": 8.695652173913044e-06, |
|
"logits/chosen": 2.2943520545959473, |
|
"logits/rejected": 2.242229461669922, |
|
"logps/chosen": -384.8254089355469, |
|
"logps/rejected": -270.86602783203125, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3624267578125, |
|
"rewards/margins": 0.435385137796402, |
|
"rewards/rejected": -0.07295837253332138, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.022222222222222223, |
|
"grad_norm": 3.228928565979004, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"logits/chosen": 2.2134103775024414, |
|
"logits/rejected": 2.145387887954712, |
|
"logps/chosen": -316.5057373046875, |
|
"logps/rejected": -360.7799377441406, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0028488151729106903, |
|
"rewards/margins": 0.1330413818359375, |
|
"rewards/rejected": -0.1301925629377365, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02666666666666667, |
|
"grad_norm": 3.082205057144165, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"logits/chosen": 2.1260976791381836, |
|
"logits/rejected": 2.1222031116485596, |
|
"logps/chosen": -358.46337890625, |
|
"logps/rejected": -424.693359375, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11278533935546875, |
|
"rewards/margins": 0.21288147568702698, |
|
"rewards/rejected": -0.10009613633155823, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03111111111111111, |
|
"grad_norm": 3.3314132690429688, |
|
"learning_rate": 1.5217391304347828e-05, |
|
"logits/chosen": 2.2984189987182617, |
|
"logits/rejected": 2.247058391571045, |
|
"logps/chosen": -534.6464233398438, |
|
"logps/rejected": -502.7433776855469, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02135010063648224, |
|
"rewards/margins": 0.21751099824905396, |
|
"rewards/rejected": -0.19616088271141052, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 3.316230058670044, |
|
"learning_rate": 1.739130434782609e-05, |
|
"logits/chosen": 2.123837947845459, |
|
"logits/rejected": 2.181354284286499, |
|
"logps/chosen": -245.4013671875, |
|
"logps/rejected": -403.6361083984375, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.11298942565917969, |
|
"rewards/margins": 0.027771372348070145, |
|
"rewards/rejected": 0.08521804958581924, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0176970958709717, |
|
"learning_rate": 1.956521739130435e-05, |
|
"logits/chosen": 1.7723705768585205, |
|
"logits/rejected": 1.846294641494751, |
|
"logps/chosen": -183.6702423095703, |
|
"logps/rejected": -227.79495239257812, |
|
"loss": 0.7198, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1414192169904709, |
|
"rewards/margins": -0.05180053412914276, |
|
"rewards/rejected": -0.08961868286132812, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 2.7427902221679688, |
|
"learning_rate": 2.173913043478261e-05, |
|
"logits/chosen": 2.143216848373413, |
|
"logits/rejected": 2.135941982269287, |
|
"logps/chosen": -362.2386169433594, |
|
"logps/rejected": -326.8141174316406, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03693237528204918, |
|
"rewards/margins": 0.16980285942554474, |
|
"rewards/rejected": -0.13287048041820526, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04888888888888889, |
|
"grad_norm": 2.896284580230713, |
|
"learning_rate": 2.391304347826087e-05, |
|
"logits/chosen": 2.1498055458068848, |
|
"logits/rejected": 2.200744152069092, |
|
"logps/chosen": -248.22348022460938, |
|
"logps/rejected": -351.1915283203125, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.033481597900390625, |
|
"rewards/margins": 0.09698867797851562, |
|
"rewards/rejected": -0.13047027587890625, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05333333333333334, |
|
"grad_norm": 3.3868470191955566, |
|
"learning_rate": 2.608695652173913e-05, |
|
"logits/chosen": 2.2540273666381836, |
|
"logits/rejected": 2.0217299461364746, |
|
"logps/chosen": -290.06707763671875, |
|
"logps/rejected": -291.1870422363281, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.057250212877988815, |
|
"rewards/margins": 0.15169525146484375, |
|
"rewards/rejected": -0.20894546806812286, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.057777777777777775, |
|
"grad_norm": 2.541471004486084, |
|
"learning_rate": 2.826086956521739e-05, |
|
"logits/chosen": 2.0605201721191406, |
|
"logits/rejected": 1.9781224727630615, |
|
"logps/chosen": -280.5556335449219, |
|
"logps/rejected": -206.835693359375, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02671203762292862, |
|
"rewards/margins": 0.0282897986471653, |
|
"rewards/rejected": -0.001577761024236679, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06222222222222222, |
|
"grad_norm": 3.5865259170532227, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"logits/chosen": 2.4662587642669678, |
|
"logits/rejected": 2.4787802696228027, |
|
"logps/chosen": -318.6181945800781, |
|
"logps/rejected": -371.00048828125, |
|
"loss": 0.7615, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.21838226914405823, |
|
"rewards/margins": -0.1308029294013977, |
|
"rewards/rejected": -0.08757934719324112, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 1.8042571544647217, |
|
"learning_rate": 3.260869565217392e-05, |
|
"logits/chosen": 1.8834528923034668, |
|
"logits/rejected": 1.8412845134735107, |
|
"logps/chosen": -160.182861328125, |
|
"logps/rejected": -134.62167358398438, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.048540499061346054, |
|
"rewards/margins": 0.10904045403003693, |
|
"rewards/rejected": -0.15758095681667328, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 3.6051087379455566, |
|
"learning_rate": 3.478260869565218e-05, |
|
"logits/chosen": 2.4191336631774902, |
|
"logits/rejected": 2.4424967765808105, |
|
"logps/chosen": -330.76373291015625, |
|
"logps/rejected": -360.3594970703125, |
|
"loss": 0.7277, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3086807429790497, |
|
"rewards/margins": -0.06518251448869705, |
|
"rewards/rejected": -0.24349823594093323, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07555555555555556, |
|
"grad_norm": 2.667231321334839, |
|
"learning_rate": 3.695652173913043e-05, |
|
"logits/chosen": 2.087791919708252, |
|
"logits/rejected": 2.067237615585327, |
|
"logps/chosen": -260.45025634765625, |
|
"logps/rejected": -310.5743713378906, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.17707443237304688, |
|
"rewards/margins": 0.09263762831687927, |
|
"rewards/rejected": -0.26971206068992615, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.471524477005005, |
|
"learning_rate": 3.91304347826087e-05, |
|
"logits/chosen": 1.9968055486679077, |
|
"logits/rejected": 1.9818394184112549, |
|
"logps/chosen": -148.7676239013672, |
|
"logps/rejected": -160.41592407226562, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12331848591566086, |
|
"rewards/margins": 0.021195977926254272, |
|
"rewards/rejected": -0.14451447129249573, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08444444444444445, |
|
"grad_norm": 3.896228790283203, |
|
"learning_rate": 4.130434782608696e-05, |
|
"logits/chosen": 2.1896002292633057, |
|
"logits/rejected": 2.2027523517608643, |
|
"logps/chosen": -280.221923828125, |
|
"logps/rejected": -345.39849853515625, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.23584365844726562, |
|
"rewards/margins": -0.005173489451408386, |
|
"rewards/rejected": -0.23067016899585724, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 2.9211912155151367, |
|
"learning_rate": 4.347826086956522e-05, |
|
"logits/chosen": 1.9726223945617676, |
|
"logits/rejected": 1.9995529651641846, |
|
"logps/chosen": -223.56761169433594, |
|
"logps/rejected": -288.2007141113281, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14491653442382812, |
|
"rewards/margins": 0.18477173149585724, |
|
"rewards/rejected": -0.32968828082084656, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09333333333333334, |
|
"grad_norm": 3.5073137283325195, |
|
"learning_rate": 4.565217391304348e-05, |
|
"logits/chosen": 2.084686517715454, |
|
"logits/rejected": 2.1801323890686035, |
|
"logps/chosen": -302.4353332519531, |
|
"logps/rejected": -421.34222412109375, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2602279782295227, |
|
"rewards/margins": 0.19529570639133453, |
|
"rewards/rejected": -0.45552366971969604, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09777777777777778, |
|
"grad_norm": 4.747559070587158, |
|
"learning_rate": 4.782608695652174e-05, |
|
"logits/chosen": 2.384913682937622, |
|
"logits/rejected": 2.304309368133545, |
|
"logps/chosen": -486.0771484375, |
|
"logps/rejected": -395.3549499511719, |
|
"loss": 0.7814, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.5777496099472046, |
|
"rewards/margins": -0.1678207516670227, |
|
"rewards/rejected": -0.4099288880825043, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10222222222222223, |
|
"grad_norm": 3.5170698165893555, |
|
"learning_rate": 5e-05, |
|
"logits/chosen": 2.422769546508789, |
|
"logits/rejected": 2.377617597579956, |
|
"logps/chosen": -356.8462219238281, |
|
"logps/rejected": -335.63311767578125, |
|
"loss": 0.7371, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5601089596748352, |
|
"rewards/margins": -0.08314056694507599, |
|
"rewards/rejected": -0.4769684076309204, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 1.9441181421279907, |
|
"learning_rate": 4.999932336875371e-05, |
|
"logits/chosen": 1.8213062286376953, |
|
"logits/rejected": 1.8396917581558228, |
|
"logps/chosen": -134.66082763671875, |
|
"logps/rejected": -149.367431640625, |
|
"loss": 0.65, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14266128838062286, |
|
"rewards/margins": 0.0897216796875, |
|
"rewards/rejected": -0.23238298296928406, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 3.068272352218628, |
|
"learning_rate": 4.9997293511641216e-05, |
|
"logits/chosen": 2.278895139694214, |
|
"logits/rejected": 2.275456190109253, |
|
"logps/chosen": -333.77490234375, |
|
"logps/rejected": -472.25897216796875, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23233337700366974, |
|
"rewards/margins": 0.5613830089569092, |
|
"rewards/rejected": -0.7937164306640625, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11555555555555555, |
|
"grad_norm": 1.8619799613952637, |
|
"learning_rate": 4.999391053853971e-05, |
|
"logits/chosen": 1.8031303882598877, |
|
"logits/rejected": 1.8489115238189697, |
|
"logps/chosen": -96.40797424316406, |
|
"logps/rejected": -161.97528076171875, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05866394191980362, |
|
"rewards/margins": 0.2442375123500824, |
|
"rewards/rejected": -0.3029014468193054, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.386238098144531, |
|
"learning_rate": 4.998917463257121e-05, |
|
"logits/chosen": 2.5145506858825684, |
|
"logits/rejected": 2.4547314643859863, |
|
"logps/chosen": -413.3758239746094, |
|
"logps/rejected": -371.0469055175781, |
|
"loss": 0.7603, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5716583728790283, |
|
"rewards/margins": -0.12001956254243851, |
|
"rewards/rejected": -0.4516388177871704, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12444444444444444, |
|
"grad_norm": 4.0328145027160645, |
|
"learning_rate": 4.998308605009268e-05, |
|
"logits/chosen": 1.9604880809783936, |
|
"logits/rejected": 1.9736435413360596, |
|
"logps/chosen": -294.1231689453125, |
|
"logps/rejected": -225.6836700439453, |
|
"loss": 0.7918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.390707403421402, |
|
"rewards/margins": -0.17286226153373718, |
|
"rewards/rejected": -0.217845156788826, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1288888888888889, |
|
"grad_norm": 2.2388572692871094, |
|
"learning_rate": 4.997564512068212e-05, |
|
"logits/chosen": 1.7558441162109375, |
|
"logits/rejected": 1.728846549987793, |
|
"logps/chosen": -262.6331787109375, |
|
"logps/rejected": -276.0820617675781, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.36693495512008667, |
|
"rewards/margins": 0.3257931172847748, |
|
"rewards/rejected": -0.6927281022071838, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 2.462900400161743, |
|
"learning_rate": 4.9966852247120764e-05, |
|
"logits/chosen": 2.112412214279175, |
|
"logits/rejected": 2.067960739135742, |
|
"logps/chosen": -276.7638244628906, |
|
"logps/rejected": -449.4617919921875, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44448322057724, |
|
"rewards/margins": 0.9420753121376038, |
|
"rewards/rejected": -1.3865585327148438, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13777777777777778, |
|
"grad_norm": 2.836688280105591, |
|
"learning_rate": 4.995670790537125e-05, |
|
"logits/chosen": 1.9061617851257324, |
|
"logits/rejected": 1.8595614433288574, |
|
"logps/chosen": -160.72055053710938, |
|
"logps/rejected": -129.44073486328125, |
|
"loss": 0.7916, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3917423486709595, |
|
"rewards/margins": -0.17064018547534943, |
|
"rewards/rejected": -0.22110214829444885, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 2.559312343597412, |
|
"learning_rate": 4.994521264455187e-05, |
|
"logits/chosen": 2.1637351512908936, |
|
"logits/rejected": 2.2050771713256836, |
|
"logps/chosen": -304.7850341796875, |
|
"logps/rejected": -348.8180847167969, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3886520266532898, |
|
"rewards/margins": 0.71138995885849, |
|
"rewards/rejected": -1.1000419855117798, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14666666666666667, |
|
"grad_norm": 4.237148761749268, |
|
"learning_rate": 4.993236708690683e-05, |
|
"logits/chosen": 2.109586238861084, |
|
"logits/rejected": 2.0614817142486572, |
|
"logps/chosen": -375.04638671875, |
|
"logps/rejected": -299.8951110839844, |
|
"loss": 0.7802, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5753310918807983, |
|
"rewards/margins": -0.14784467220306396, |
|
"rewards/rejected": -0.4274864196777344, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1511111111111111, |
|
"grad_norm": 2.7111451625823975, |
|
"learning_rate": 4.991817192777259e-05, |
|
"logits/chosen": 2.2655739784240723, |
|
"logits/rejected": 2.2684638500213623, |
|
"logps/chosen": -313.1495056152344, |
|
"logps/rejected": -275.92181396484375, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40856704115867615, |
|
"rewards/margins": 0.41301044821739197, |
|
"rewards/rejected": -0.8215774893760681, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15555555555555556, |
|
"grad_norm": 3.8092589378356934, |
|
"learning_rate": 4.9902627935540205e-05, |
|
"logits/chosen": 2.179189682006836, |
|
"logits/rejected": 2.120750904083252, |
|
"logps/chosen": -387.86395263671875, |
|
"logps/rejected": -440.36138916015625, |
|
"loss": 0.799, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1031348705291748, |
|
"rewards/margins": 0.2254989743232727, |
|
"rewards/rejected": -1.3286339044570923, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.2711353302001953, |
|
"learning_rate": 4.9885735951613745e-05, |
|
"logits/chosen": 2.114718198776245, |
|
"logits/rejected": 2.0954365730285645, |
|
"logps/chosen": -358.84710693359375, |
|
"logps/rejected": -392.369384765625, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9632622003555298, |
|
"rewards/margins": 0.3675689697265625, |
|
"rewards/rejected": -1.3308311700820923, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.16444444444444445, |
|
"grad_norm": 5.44711446762085, |
|
"learning_rate": 4.9867496890364726e-05, |
|
"logits/chosen": 2.1442081928253174, |
|
"logits/rejected": 2.1060421466827393, |
|
"logps/chosen": -323.090087890625, |
|
"logps/rejected": -316.7653503417969, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6822533011436462, |
|
"rewards/margins": 0.13051298260688782, |
|
"rewards/rejected": -0.8127662539482117, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1688888888888889, |
|
"grad_norm": 3.415454626083374, |
|
"learning_rate": 4.984791173908267e-05, |
|
"logits/chosen": 2.2119979858398438, |
|
"logits/rejected": 2.155428409576416, |
|
"logps/chosen": -411.5396728515625, |
|
"logps/rejected": -440.5595703125, |
|
"loss": 0.3268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0235031843185425, |
|
"rewards/margins": 1.1276824474334717, |
|
"rewards/rejected": -2.1511855125427246, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.17333333333333334, |
|
"grad_norm": 2.106764554977417, |
|
"learning_rate": 4.982698155792159e-05, |
|
"logits/chosen": 1.676947832107544, |
|
"logits/rejected": 1.8609204292297363, |
|
"logps/chosen": -207.95248413085938, |
|
"logps/rejected": -255.15560913085938, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20581628382205963, |
|
"rewards/margins": 0.5979617834091187, |
|
"rewards/rejected": -0.8037780523300171, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 2.561739444732666, |
|
"learning_rate": 4.980470747984265e-05, |
|
"logits/chosen": 1.903275489807129, |
|
"logits/rejected": 1.878113031387329, |
|
"logps/chosen": -231.56222534179688, |
|
"logps/rejected": -203.46058654785156, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5600662231445312, |
|
"rewards/margins": 0.49753645062446594, |
|
"rewards/rejected": -1.0576026439666748, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18222222222222223, |
|
"grad_norm": 3.4072999954223633, |
|
"learning_rate": 4.9781090710552835e-05, |
|
"logits/chosen": 2.3119935989379883, |
|
"logits/rejected": 2.174755334854126, |
|
"logps/chosen": -306.8345947265625, |
|
"logps/rejected": -353.8470764160156, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6789085865020752, |
|
"rewards/margins": 0.40302202105522156, |
|
"rewards/rejected": -1.0819306373596191, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18666666666666668, |
|
"grad_norm": 2.7821035385131836, |
|
"learning_rate": 4.975613252843966e-05, |
|
"logits/chosen": 1.9496957063674927, |
|
"logits/rejected": 1.9538969993591309, |
|
"logps/chosen": -228.1984100341797, |
|
"logps/rejected": -221.22930908203125, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.608843982219696, |
|
"rewards/margins": 0.46152499318122864, |
|
"rewards/rejected": -1.070369005203247, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.19111111111111112, |
|
"grad_norm": 2.4308483600616455, |
|
"learning_rate": 4.9729834284501995e-05, |
|
"logits/chosen": 1.9656260013580322, |
|
"logits/rejected": 1.9733545780181885, |
|
"logps/chosen": -214.34481811523438, |
|
"logps/rejected": -260.02227783203125, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5842536687850952, |
|
"rewards/margins": 0.8209755420684814, |
|
"rewards/rejected": -1.4052292108535767, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.19555555555555557, |
|
"grad_norm": 3.7162206172943115, |
|
"learning_rate": 4.970219740227693e-05, |
|
"logits/chosen": 2.2411859035491943, |
|
"logits/rejected": 2.2703304290771484, |
|
"logps/chosen": -327.88653564453125, |
|
"logps/rejected": -440.0887145996094, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2694365978240967, |
|
"rewards/margins": 0.9213591814041138, |
|
"rewards/rejected": -2.1907958984375, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.9216010570526123, |
|
"learning_rate": 4.9673223377762715e-05, |
|
"logits/chosen": 2.17927885055542, |
|
"logits/rejected": 2.1836795806884766, |
|
"logps/chosen": -384.8567199707031, |
|
"logps/rejected": -417.8768310546875, |
|
"loss": 0.3178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5690780878067017, |
|
"rewards/margins": 1.0208160877227783, |
|
"rewards/rejected": -1.5898940563201904, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": 2.1410844326019287, |
|
"eval_logits/rejected": 2.0819036960601807, |
|
"eval_logps/chosen": -302.23443603515625, |
|
"eval_logps/rejected": -335.14215087890625, |
|
"eval_loss": 0.590552568435669, |
|
"eval_rewards/accuracies": 0.6964285969734192, |
|
"eval_rewards/chosen": -1.194185495376587, |
|
"eval_rewards/margins": 0.738350510597229, |
|
"eval_rewards/rejected": -1.932536244392395, |
|
"eval_runtime": 17.8289, |
|
"eval_samples_per_second": 2.804, |
|
"eval_steps_per_second": 0.393, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.20444444444444446, |
|
"grad_norm": 5.059287071228027, |
|
"learning_rate": 4.9642913779337757e-05, |
|
"logits/chosen": 1.6329092979431152, |
|
"logits/rejected": 1.6129851341247559, |
|
"logps/chosen": -348.64971923828125, |
|
"logps/rejected": -319.07080078125, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3844482898712158, |
|
"rewards/margins": 0.060698702931404114, |
|
"rewards/rejected": -1.445146918296814, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2088888888888889, |
|
"grad_norm": 2.8684635162353516, |
|
"learning_rate": 4.9611270247675776e-05, |
|
"logits/chosen": 1.4863775968551636, |
|
"logits/rejected": 1.5527057647705078, |
|
"logps/chosen": -104.7228775024414, |
|
"logps/rejected": -144.18276977539062, |
|
"loss": 0.7198, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.2717903256416321, |
|
"rewards/margins": -0.05223694443702698, |
|
"rewards/rejected": -0.2195533812046051, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 4.421427249908447, |
|
"learning_rate": 4.9578294495656965e-05, |
|
"logits/chosen": 2.095689296722412, |
|
"logits/rejected": 2.1000843048095703, |
|
"logps/chosen": -379.61566162109375, |
|
"logps/rejected": -351.688232421875, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1613686084747314, |
|
"rewards/margins": 0.24983596801757812, |
|
"rewards/rejected": -1.4112045764923096, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.21777777777777776, |
|
"grad_norm": 2.3262526988983154, |
|
"learning_rate": 4.954398830827524e-05, |
|
"logits/chosen": 1.5170578956604004, |
|
"logits/rejected": 1.41743004322052, |
|
"logps/chosen": -141.56716918945312, |
|
"logps/rejected": -152.45985412597656, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.250314325094223, |
|
"rewards/margins": 0.1659536361694336, |
|
"rewards/rejected": -0.4162679612636566, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 1.6063750982284546, |
|
"learning_rate": 4.950835354254167e-05, |
|
"logits/chosen": 2.113161087036133, |
|
"logits/rejected": 2.027552843093872, |
|
"logps/chosen": -283.05487060546875, |
|
"logps/rejected": -338.911865234375, |
|
"loss": 0.202, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8912537097930908, |
|
"rewards/margins": 1.6376852989196777, |
|
"rewards/rejected": -2.5289390087127686, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22666666666666666, |
|
"grad_norm": 1.9353735446929932, |
|
"learning_rate": 4.947139212738395e-05, |
|
"logits/chosen": 2.0660266876220703, |
|
"logits/rejected": 1.9209285974502563, |
|
"logps/chosen": -392.48199462890625, |
|
"logps/rejected": -411.1744079589844, |
|
"loss": 0.1246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.723092794418335, |
|
"rewards/margins": 2.101870536804199, |
|
"rewards/rejected": -3.8249635696411133, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2311111111111111, |
|
"grad_norm": 3.488070011138916, |
|
"learning_rate": 4.943310606354192e-05, |
|
"logits/chosen": 2.190558433532715, |
|
"logits/rejected": 2.1188831329345703, |
|
"logps/chosen": -325.7753601074219, |
|
"logps/rejected": -416.0865783691406, |
|
"loss": 0.4128, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.3961410522460938, |
|
"rewards/margins": 1.5372390747070312, |
|
"rewards/rejected": -2.933380126953125, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.23555555555555555, |
|
"grad_norm": 9.58780574798584, |
|
"learning_rate": 4.9393497423459376e-05, |
|
"logits/chosen": 2.139993667602539, |
|
"logits/rejected": 2.1976380348205566, |
|
"logps/chosen": -336.98150634765625, |
|
"logps/rejected": -287.92987060546875, |
|
"loss": 1.3144, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.950069546699524, |
|
"rewards/margins": -0.7764908075332642, |
|
"rewards/rejected": -1.1735787391662598, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.2417423725128174, |
|
"learning_rate": 4.935256835117179e-05, |
|
"logits/chosen": 2.2602908611297607, |
|
"logits/rejected": 2.23490047454834, |
|
"logps/chosen": -410.2571105957031, |
|
"logps/rejected": -527.3067016601562, |
|
"loss": 0.212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7630027532577515, |
|
"rewards/margins": 3.0140035152435303, |
|
"rewards/rejected": -4.77700662612915, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.24444444444444444, |
|
"grad_norm": 2.4229862689971924, |
|
"learning_rate": 4.931032106219029e-05, |
|
"logits/chosen": 1.751630425453186, |
|
"logits/rejected": 1.79133939743042, |
|
"logps/chosen": -251.48580932617188, |
|
"logps/rejected": -306.2901611328125, |
|
"loss": 0.248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.854656994342804, |
|
"rewards/margins": 1.2701172828674316, |
|
"rewards/rejected": -2.124774217605591, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 7.835116386413574, |
|
"learning_rate": 4.926675784338174e-05, |
|
"logits/chosen": 1.9964067935943604, |
|
"logits/rejected": 2.117382287979126, |
|
"logps/chosen": -287.7210388183594, |
|
"logps/rejected": -260.3846740722656, |
|
"loss": 0.9769, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.8229798078536987, |
|
"rewards/margins": -0.4610947072505951, |
|
"rewards/rejected": -1.3618850708007812, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.25333333333333335, |
|
"grad_norm": 3.7002341747283936, |
|
"learning_rate": 4.922188105284495e-05, |
|
"logits/chosen": 2.1905529499053955, |
|
"logits/rejected": 2.094609260559082, |
|
"logps/chosen": -429.6041259765625, |
|
"logps/rejected": -498.65740966796875, |
|
"loss": 0.2605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7014801502227783, |
|
"rewards/margins": 1.3251266479492188, |
|
"rewards/rejected": -4.026606559753418, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2577777777777778, |
|
"grad_norm": 1.7439998388290405, |
|
"learning_rate": 4.9175693119783013e-05, |
|
"logits/chosen": 1.88455069065094, |
|
"logits/rejected": 1.802990198135376, |
|
"logps/chosen": -438.767578125, |
|
"logps/rejected": -430.30194091796875, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.552638292312622, |
|
"rewards/margins": 2.1732101440429688, |
|
"rewards/rejected": -4.725848197937012, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.26222222222222225, |
|
"grad_norm": 2.541234254837036, |
|
"learning_rate": 4.912819654437182e-05, |
|
"logits/chosen": 2.0551681518554688, |
|
"logits/rejected": 1.987473726272583, |
|
"logps/chosen": -397.0820617675781, |
|
"logps/rejected": -455.72442626953125, |
|
"loss": 0.1633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4276413917541504, |
|
"rewards/margins": 1.9974074363708496, |
|
"rewards/rejected": -3.425048828125, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 2.0597288608551025, |
|
"learning_rate": 4.9079393897624745e-05, |
|
"logits/chosen": 1.7409842014312744, |
|
"logits/rejected": 1.764671802520752, |
|
"logps/chosen": -259.36578369140625, |
|
"logps/rejected": -400.5860595703125, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.98876953125, |
|
"rewards/margins": 3.264120578765869, |
|
"rewards/rejected": -4.252890110015869, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27111111111111114, |
|
"grad_norm": 3.1185505390167236, |
|
"learning_rate": 4.9029287821253445e-05, |
|
"logits/chosen": 2.0401980876922607, |
|
"logits/rejected": 2.0045888423919678, |
|
"logps/chosen": -233.93736267089844, |
|
"logps/rejected": -272.4693603515625, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8766113519668579, |
|
"rewards/margins": 0.49696657061576843, |
|
"rewards/rejected": -1.3735778331756592, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.27555555555555555, |
|
"grad_norm": 0.503414511680603, |
|
"learning_rate": 4.897788102752485e-05, |
|
"logits/chosen": 2.0203349590301514, |
|
"logits/rejected": 1.9820505380630493, |
|
"logps/chosen": -306.28814697265625, |
|
"logps/rejected": -438.020263671875, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1499321460723877, |
|
"rewards/margins": 4.276589393615723, |
|
"rewards/rejected": -5.426521301269531, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.02221155166626, |
|
"learning_rate": 4.8925176299114416e-05, |
|
"logits/chosen": 1.7009226083755493, |
|
"logits/rejected": 1.5652942657470703, |
|
"logps/chosen": -425.44415283203125, |
|
"logps/rejected": -476.310546875, |
|
"loss": 0.2624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0838661193847656, |
|
"rewards/margins": 2.905472993850708, |
|
"rewards/rejected": -5.989339351654053, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 10.18476390838623, |
|
"learning_rate": 4.8871176488955415e-05, |
|
"logits/chosen": 1.9149291515350342, |
|
"logits/rejected": 1.852341651916504, |
|
"logps/chosen": -378.48138427734375, |
|
"logps/rejected": -236.2688751220703, |
|
"loss": 2.0384, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.0230088233947754, |
|
"rewards/margins": -1.4521996974945068, |
|
"rewards/rejected": -1.5708091259002686, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.28888888888888886, |
|
"grad_norm": 2.800845146179199, |
|
"learning_rate": 4.881588452008456e-05, |
|
"logits/chosen": 1.3878483772277832, |
|
"logits/rejected": 1.4146528244018555, |
|
"logps/chosen": -115.78246307373047, |
|
"logps/rejected": -137.14707946777344, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.47142869234085083, |
|
"rewards/margins": 0.036671459674835205, |
|
"rewards/rejected": -0.508100152015686, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.29333333333333333, |
|
"grad_norm": 4.6413726806640625, |
|
"learning_rate": 4.875930338548376e-05, |
|
"logits/chosen": 1.9736688137054443, |
|
"logits/rejected": 1.8488330841064453, |
|
"logps/chosen": -233.1080322265625, |
|
"logps/rejected": -270.9902038574219, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.331383466720581, |
|
"rewards/margins": 0.5846099853515625, |
|
"rewards/rejected": -1.9159934520721436, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.29777777777777775, |
|
"grad_norm": 8.721001625061035, |
|
"learning_rate": 4.87014361479181e-05, |
|
"logits/chosen": 1.8805785179138184, |
|
"logits/rejected": 1.8359558582305908, |
|
"logps/chosen": -321.511962890625, |
|
"logps/rejected": -275.58209228515625, |
|
"loss": 1.3317, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.167226552963257, |
|
"rewards/margins": -0.7222648859024048, |
|
"rewards/rejected": -2.4449615478515625, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3022222222222222, |
|
"grad_norm": 3.038731098175049, |
|
"learning_rate": 4.864228593977006e-05, |
|
"logits/chosen": 2.2448015213012695, |
|
"logits/rejected": 2.306088447570801, |
|
"logps/chosen": -347.45849609375, |
|
"logps/rejected": -423.48297119140625, |
|
"loss": 0.229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2222633361816406, |
|
"rewards/margins": 2.896373748779297, |
|
"rewards/rejected": -5.1186370849609375, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.30666666666666664, |
|
"grad_norm": 4.552579879760742, |
|
"learning_rate": 4.858185596286997e-05, |
|
"logits/chosen": 2.1001484394073486, |
|
"logits/rejected": 1.979229211807251, |
|
"logps/chosen": -295.586181640625, |
|
"logps/rejected": -303.2032470703125, |
|
"loss": 0.3926, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.715203881263733, |
|
"rewards/margins": 2.0468697547912598, |
|
"rewards/rejected": -3.762073516845703, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 3.6529836654663086, |
|
"learning_rate": 4.852014948832268e-05, |
|
"logits/chosen": 1.9788322448730469, |
|
"logits/rejected": 1.9580605030059814, |
|
"logps/chosen": -213.86343383789062, |
|
"logps/rejected": -261.7784118652344, |
|
"loss": 0.524, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2392014265060425, |
|
"rewards/margins": 0.37295836210250854, |
|
"rewards/rejected": -1.6121597290039062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31555555555555553, |
|
"grad_norm": 1.9879947900772095, |
|
"learning_rate": 4.8457169856330485e-05, |
|
"logits/chosen": 1.942040205001831, |
|
"logits/rejected": 1.8756356239318848, |
|
"logps/chosen": -313.9153747558594, |
|
"logps/rejected": -427.9259948730469, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7393081188201904, |
|
"rewards/margins": 4.251686096191406, |
|
"rewards/rejected": -5.990994453430176, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.4608001708984375, |
|
"learning_rate": 4.839292047601234e-05, |
|
"logits/chosen": 1.7308683395385742, |
|
"logits/rejected": 1.7762730121612549, |
|
"logps/chosen": -293.4880065917969, |
|
"logps/rejected": -250.67381286621094, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.6677329540252686, |
|
"rewards/margins": 1.1602835655212402, |
|
"rewards/rejected": -2.828016757965088, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3244444444444444, |
|
"grad_norm": 11.236379623413086, |
|
"learning_rate": 4.832740482521931e-05, |
|
"logits/chosen": 1.5850169658660889, |
|
"logits/rejected": 1.6753277778625488, |
|
"logps/chosen": -275.2799072265625, |
|
"logps/rejected": -174.08926391601562, |
|
"loss": 1.7558, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.16182279586792, |
|
"rewards/margins": -1.245647668838501, |
|
"rewards/rejected": -1.916175127029419, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3288888888888889, |
|
"grad_norm": 1.0129520893096924, |
|
"learning_rate": 4.826062645034631e-05, |
|
"logits/chosen": 2.147963047027588, |
|
"logits/rejected": 2.13932728767395, |
|
"logps/chosen": -533.4431762695312, |
|
"logps/rejected": -664.0446166992188, |
|
"loss": 0.038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.084846496582031, |
|
"rewards/margins": 4.350230693817139, |
|
"rewards/rejected": -8.435077667236328, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 8.0421724319458, |
|
"learning_rate": 4.819258896614014e-05, |
|
"logits/chosen": 2.038822650909424, |
|
"logits/rejected": 2.02886962890625, |
|
"logps/chosen": -347.89373779296875, |
|
"logps/rejected": -322.3008728027344, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.955258369445801, |
|
"rewards/margins": 0.5941513180732727, |
|
"rewards/rejected": -3.5494096279144287, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3377777777777778, |
|
"grad_norm": 10.355673789978027, |
|
"learning_rate": 4.812329605550381e-05, |
|
"logits/chosen": 2.0920519828796387, |
|
"logits/rejected": 2.0875403881073, |
|
"logps/chosen": -335.3114318847656, |
|
"logps/rejected": -350.8553466796875, |
|
"loss": 0.9773, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.6420540809631348, |
|
"rewards/margins": 0.05045384168624878, |
|
"rewards/rejected": -3.6925079822540283, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3422222222222222, |
|
"grad_norm": 0.42018255591392517, |
|
"learning_rate": 4.805275146929721e-05, |
|
"logits/chosen": 2.0620903968811035, |
|
"logits/rejected": 2.108494281768799, |
|
"logps/chosen": -342.378662109375, |
|
"logps/rejected": -450.7765808105469, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.201615810394287, |
|
"rewards/margins": 4.498736381530762, |
|
"rewards/rejected": -6.700352668762207, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3466666666666667, |
|
"grad_norm": 0.4926978349685669, |
|
"learning_rate": 4.7980959026134044e-05, |
|
"logits/chosen": 1.9305293560028076, |
|
"logits/rejected": 2.000296115875244, |
|
"logps/chosen": -292.07354736328125, |
|
"logps/rejected": -431.2196044921875, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1881355047225952, |
|
"rewards/margins": 3.963468074798584, |
|
"rewards/rejected": -5.151603698730469, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3511111111111111, |
|
"grad_norm": 4.756207466125488, |
|
"learning_rate": 4.790792261217512e-05, |
|
"logits/chosen": 2.1176319122314453, |
|
"logits/rejected": 2.1045117378234863, |
|
"logps/chosen": -309.7745666503906, |
|
"logps/rejected": -279.74969482421875, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1116164922714233, |
|
"rewards/margins": 0.3822830319404602, |
|
"rewards/rejected": -1.4938995838165283, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.1574506163597107, |
|
"learning_rate": 4.783364618091803e-05, |
|
"logits/chosen": 2.228512763977051, |
|
"logits/rejected": 2.239095687866211, |
|
"logps/chosen": -452.6519470214844, |
|
"logps/rejected": -519.6800537109375, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.023036241531372, |
|
"rewards/margins": 5.381681442260742, |
|
"rewards/rejected": -7.404717922210693, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 17.396940231323242, |
|
"learning_rate": 4.7758133752983135e-05, |
|
"logits/chosen": 2.2299280166625977, |
|
"logits/rejected": 2.275631904602051, |
|
"logps/chosen": -481.376708984375, |
|
"logps/rejected": -424.80029296875, |
|
"loss": 1.0849, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.431848049163818, |
|
"rewards/margins": -0.2735259532928467, |
|
"rewards/rejected": -4.158322334289551, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.36444444444444446, |
|
"grad_norm": 2.288203477859497, |
|
"learning_rate": 4.7681389415895864e-05, |
|
"logits/chosen": 1.9121689796447754, |
|
"logits/rejected": 1.9124395847320557, |
|
"logps/chosen": -365.42669677734375, |
|
"logps/rejected": -389.65118408203125, |
|
"loss": 0.3228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7858033180236816, |
|
"rewards/margins": 1.5727282762527466, |
|
"rewards/rejected": -4.358531475067139, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3688888888888889, |
|
"grad_norm": 17.783615112304688, |
|
"learning_rate": 4.7603417323865547e-05, |
|
"logits/chosen": 2.15109920501709, |
|
"logits/rejected": 2.273561954498291, |
|
"logps/chosen": -492.3847351074219, |
|
"logps/rejected": -423.0718994140625, |
|
"loss": 2.6581, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.891546726226807, |
|
"rewards/margins": -1.6526780128479004, |
|
"rewards/rejected": -3.2388687133789062, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.37333333333333335, |
|
"grad_norm": 3.1931591033935547, |
|
"learning_rate": 4.752422169756048e-05, |
|
"logits/chosen": 2.216590404510498, |
|
"logits/rejected": 2.2690138816833496, |
|
"logps/chosen": -395.58953857421875, |
|
"logps/rejected": -360.23663330078125, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.509881615638733, |
|
"rewards/margins": 2.4663939476013184, |
|
"rewards/rejected": -3.976275682449341, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.37777777777777777, |
|
"grad_norm": 0.4893577992916107, |
|
"learning_rate": 4.74438068238795e-05, |
|
"logits/chosen": 2.0717084407806396, |
|
"logits/rejected": 2.0398921966552734, |
|
"logps/chosen": -313.4667053222656, |
|
"logps/rejected": -517.9127197265625, |
|
"loss": 0.023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5163437128067017, |
|
"rewards/margins": 4.198652744293213, |
|
"rewards/rejected": -5.714996337890625, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.38222222222222224, |
|
"grad_norm": 12.34611988067627, |
|
"learning_rate": 4.736217705571989e-05, |
|
"logits/chosen": 1.8056581020355225, |
|
"logits/rejected": 1.906313180923462, |
|
"logps/chosen": -328.50921630859375, |
|
"logps/rejected": -252.91552734375, |
|
"loss": 1.6628, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.34942626953125, |
|
"rewards/margins": -1.445077657699585, |
|
"rewards/rejected": -1.9043487310409546, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.38666666666666666, |
|
"grad_norm": 11.105749130249023, |
|
"learning_rate": 4.7279336811741806e-05, |
|
"logits/chosen": 2.4057044982910156, |
|
"logits/rejected": 2.336398124694824, |
|
"logps/chosen": -602.2092895507812, |
|
"logps/rejected": -557.8479614257812, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.7743942737579346, |
|
"rewards/margins": 0.2018601894378662, |
|
"rewards/rejected": -3.976254463195801, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 17.08523941040039, |
|
"learning_rate": 4.7195290576129034e-05, |
|
"logits/chosen": 2.213070869445801, |
|
"logits/rejected": 2.195730686187744, |
|
"logps/chosen": -453.27703857421875, |
|
"logps/rejected": -573.7431640625, |
|
"loss": 2.1029, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.91035795211792, |
|
"rewards/margins": 0.73905348777771, |
|
"rewards/rejected": -5.649411201477051, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.39555555555555555, |
|
"grad_norm": 1.3783494234085083, |
|
"learning_rate": 4.711004289834632e-05, |
|
"logits/chosen": 2.123533248901367, |
|
"logits/rejected": 2.0941734313964844, |
|
"logps/chosen": -282.4661865234375, |
|
"logps/rejected": -387.58892822265625, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8440461158752441, |
|
"rewards/margins": 2.256805419921875, |
|
"rewards/rejected": -4.100851535797119, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.8574769496917725, |
|
"learning_rate": 4.702359839289306e-05, |
|
"logits/chosen": 2.0068724155426025, |
|
"logits/rejected": 2.0709903240203857, |
|
"logps/chosen": -362.0110168457031, |
|
"logps/rejected": -384.2983093261719, |
|
"loss": 0.2747, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.201084852218628, |
|
"rewards/margins": 1.2602746486663818, |
|
"rewards/rejected": -3.4613595008850098, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": 2.161973237991333, |
|
"eval_logits/rejected": 2.1175014972686768, |
|
"eval_logps/chosen": -310.35650634765625, |
|
"eval_logps/rejected": -351.593994140625, |
|
"eval_loss": 0.5963193774223328, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -2.006390333175659, |
|
"eval_rewards/margins": 1.5713260173797607, |
|
"eval_rewards/rejected": -3.57771635055542, |
|
"eval_runtime": 17.4029, |
|
"eval_samples_per_second": 2.873, |
|
"eval_steps_per_second": 0.402, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.40444444444444444, |
|
"grad_norm": 12.335380554199219, |
|
"learning_rate": 4.693596173905352e-05, |
|
"logits/chosen": 2.364140272140503, |
|
"logits/rejected": 2.410036563873291, |
|
"logps/chosen": -285.2010192871094, |
|
"logps/rejected": -307.5989074707031, |
|
"loss": 1.0673, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.113780975341797, |
|
"rewards/margins": -0.5538902282714844, |
|
"rewards/rejected": -1.5598907470703125, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.4088888888888889, |
|
"grad_norm": 15.816446304321289, |
|
"learning_rate": 4.684713768064357e-05, |
|
"logits/chosen": 1.9842954874038696, |
|
"logits/rejected": 2.057584762573242, |
|
"logps/chosen": -406.9105224609375, |
|
"logps/rejected": -411.98211669921875, |
|
"loss": 1.7238, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -5.087683200836182, |
|
"rewards/margins": -1.399601697921753, |
|
"rewards/rejected": -3.6880815029144287, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.41333333333333333, |
|
"grad_norm": 9.741644859313965, |
|
"learning_rate": 4.6757131025753886e-05, |
|
"logits/chosen": 1.788228988647461, |
|
"logits/rejected": 1.8304262161254883, |
|
"logps/chosen": -257.06671142578125, |
|
"logps/rejected": -334.83636474609375, |
|
"loss": 0.9151, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.2510857582092285, |
|
"rewards/margins": 0.16967010498046875, |
|
"rewards/rejected": -2.4207558631896973, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.4177777777777778, |
|
"grad_norm": 2.8402011394500732, |
|
"learning_rate": 4.666594664648965e-05, |
|
"logits/chosen": 2.0854671001434326, |
|
"logits/rejected": 2.104097366333008, |
|
"logps/chosen": -244.27151489257812, |
|
"logps/rejected": -314.5588073730469, |
|
"loss": 0.3846, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4888412654399872, |
|
"rewards/margins": 1.8905991315841675, |
|
"rewards/rejected": -2.3794403076171875, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4222222222222222, |
|
"grad_norm": 8.729580879211426, |
|
"learning_rate": 4.657358947870691e-05, |
|
"logits/chosen": 2.1040725708007812, |
|
"logits/rejected": 2.0335569381713867, |
|
"logps/chosen": -278.30023193359375, |
|
"logps/rejected": -244.13815307617188, |
|
"loss": 1.8136, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.41619873046875, |
|
"rewards/margins": -0.721272349357605, |
|
"rewards/rejected": -1.6949265003204346, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 10.822171211242676, |
|
"learning_rate": 4.648006452174529e-05, |
|
"logits/chosen": 2.428173542022705, |
|
"logits/rejected": 2.208796977996826, |
|
"logps/chosen": -400.2300720214844, |
|
"logps/rejected": -370.8153381347656, |
|
"loss": 1.0031, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.485565185546875, |
|
"rewards/margins": -0.5405601859092712, |
|
"rewards/rejected": -2.945004940032959, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4311111111111111, |
|
"grad_norm": 0.6067838668823242, |
|
"learning_rate": 4.638537683815744e-05, |
|
"logits/chosen": 2.0516138076782227, |
|
"logits/rejected": 2.128382682800293, |
|
"logps/chosen": -282.2442321777344, |
|
"logps/rejected": -405.89276123046875, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.78936767578125, |
|
"rewards/margins": 3.77711820602417, |
|
"rewards/rejected": -4.56648588180542, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.43555555555555553, |
|
"grad_norm": 13.154459953308105, |
|
"learning_rate": 4.628953155343499e-05, |
|
"logits/chosen": 2.1049790382385254, |
|
"logits/rejected": 1.929673433303833, |
|
"logps/chosen": -305.3583984375, |
|
"logps/rejected": -173.13394165039062, |
|
"loss": 0.9443, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -2.3848648071289062, |
|
"rewards/margins": -0.4350753426551819, |
|
"rewards/rejected": -1.9497895240783691, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 14.330009460449219, |
|
"learning_rate": 4.6192533855731114e-05, |
|
"logits/chosen": 2.194329261779785, |
|
"logits/rejected": 2.2239904403686523, |
|
"logps/chosen": -411.20849609375, |
|
"logps/rejected": -397.444580078125, |
|
"loss": 1.1319, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.436021566390991, |
|
"rewards/margins": 0.39365994930267334, |
|
"rewards/rejected": -3.829681396484375, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 7.170289993286133, |
|
"learning_rate": 4.609438899557964e-05, |
|
"logits/chosen": 2.3336665630340576, |
|
"logits/rejected": 2.3739962577819824, |
|
"logps/chosen": -443.4410095214844, |
|
"logps/rejected": -623.44384765625, |
|
"loss": 0.3145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.313551425933838, |
|
"rewards/margins": 2.143171787261963, |
|
"rewards/rejected": -5.456723213195801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4488888888888889, |
|
"grad_norm": 1.4472028017044067, |
|
"learning_rate": 4.5995102285610906e-05, |
|
"logits/chosen": 2.0881364345550537, |
|
"logits/rejected": 2.0009138584136963, |
|
"logps/chosen": -430.8775634765625, |
|
"logps/rejected": -477.1756591796875, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0700042247772217, |
|
"rewards/margins": 3.3247146606445312, |
|
"rewards/rejected": -5.394719123840332, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4533333333333333, |
|
"grad_norm": 4.1629252433776855, |
|
"learning_rate": 4.589467910026411e-05, |
|
"logits/chosen": 1.530840516090393, |
|
"logits/rejected": 1.5147547721862793, |
|
"logps/chosen": -123.47633361816406, |
|
"logps/rejected": -148.20376586914062, |
|
"loss": 0.837, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8232139348983765, |
|
"rewards/margins": -0.18887022137641907, |
|
"rewards/rejected": -0.6343437433242798, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4577777777777778, |
|
"grad_norm": 2.2841246128082275, |
|
"learning_rate": 4.579312487549649e-05, |
|
"logits/chosen": 1.9281361103057861, |
|
"logits/rejected": 2.022286891937256, |
|
"logps/chosen": -349.215576171875, |
|
"logps/rejected": -505.2444763183594, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0184952020645142, |
|
"rewards/margins": 3.793022394180298, |
|
"rewards/rejected": -4.811517715454102, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 5.752498149871826, |
|
"learning_rate": 4.5690445108488964e-05, |
|
"logits/chosen": 2.1871137619018555, |
|
"logits/rejected": 2.213275909423828, |
|
"logps/chosen": -212.89849853515625, |
|
"logps/rejected": -298.1326599121094, |
|
"loss": 0.4131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0786117315292358, |
|
"rewards/margins": 0.7388886213302612, |
|
"rewards/rejected": -1.817500352859497, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 2.0671677589416504, |
|
"learning_rate": 4.5586645357348636e-05, |
|
"logits/chosen": 1.9253795146942139, |
|
"logits/rejected": 1.9396390914916992, |
|
"logps/chosen": -283.0425720214844, |
|
"logps/rejected": -399.41876220703125, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7986934781074524, |
|
"rewards/margins": 3.9111409187316895, |
|
"rewards/rejected": -4.709834098815918, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4711111111111111, |
|
"grad_norm": 2.6245131492614746, |
|
"learning_rate": 4.548173124080789e-05, |
|
"logits/chosen": 2.175868511199951, |
|
"logits/rejected": 2.2268357276916504, |
|
"logps/chosen": -401.7663269042969, |
|
"logps/rejected": -339.01263427734375, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.636662244796753, |
|
"rewards/margins": 2.116255283355713, |
|
"rewards/rejected": -3.752917528152466, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.47555555555555556, |
|
"grad_norm": 6.8939642906188965, |
|
"learning_rate": 4.5375708437920284e-05, |
|
"logits/chosen": 2.150783061981201, |
|
"logits/rejected": 2.192080020904541, |
|
"logps/chosen": -327.8550720214844, |
|
"logps/rejected": -408.950439453125, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.347644090652466, |
|
"rewards/margins": 1.237553358078003, |
|
"rewards/rejected": -3.5851974487304688, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.782555103302002, |
|
"learning_rate": 4.526858268775313e-05, |
|
"logits/chosen": 1.875314712524414, |
|
"logits/rejected": 1.9068584442138672, |
|
"logps/chosen": -265.00701904296875, |
|
"logps/rejected": -323.212890625, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4442001581192017, |
|
"rewards/margins": 2.3625869750976562, |
|
"rewards/rejected": -3.8067870140075684, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.48444444444444446, |
|
"grad_norm": 0.7733494639396667, |
|
"learning_rate": 4.516035978907681e-05, |
|
"logits/chosen": 1.999725103378296, |
|
"logits/rejected": 1.9631330966949463, |
|
"logps/chosen": -347.4098815917969, |
|
"logps/rejected": -423.82171630859375, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3635421991348267, |
|
"rewards/margins": 3.0967469215393066, |
|
"rewards/rejected": -4.460289001464844, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 2.2969586849212646, |
|
"learning_rate": 4.50510456000509e-05, |
|
"logits/chosen": 1.6264564990997314, |
|
"logits/rejected": 1.6307601928710938, |
|
"logps/chosen": -256.4902648925781, |
|
"logps/rejected": -350.5125427246094, |
|
"loss": 0.2928, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.814366102218628, |
|
"rewards/margins": 3.2768468856811523, |
|
"rewards/rejected": -5.091212749481201, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.49333333333333335, |
|
"grad_norm": 7.435751914978027, |
|
"learning_rate": 4.494064603790708e-05, |
|
"logits/chosen": 1.9676257371902466, |
|
"logits/rejected": 1.9142203330993652, |
|
"logps/chosen": -340.17498779296875, |
|
"logps/rejected": -344.9024658203125, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.670006513595581, |
|
"rewards/margins": 0.3124961853027344, |
|
"rewards/rejected": -1.9825026988983154, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 2.219264268875122, |
|
"learning_rate": 4.482916707862884e-05, |
|
"logits/chosen": 2.203705310821533, |
|
"logits/rejected": 2.050464153289795, |
|
"logps/chosen": -281.28857421875, |
|
"logps/rejected": -352.7940673828125, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9629112482070923, |
|
"rewards/margins": 2.2336831092834473, |
|
"rewards/rejected": -3.19659423828125, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5022222222222222, |
|
"grad_norm": 18.2978515625, |
|
"learning_rate": 4.471661475662792e-05, |
|
"logits/chosen": 1.856745719909668, |
|
"logits/rejected": 1.9189677238464355, |
|
"logps/chosen": -463.925048828125, |
|
"logps/rejected": -392.9632873535156, |
|
"loss": 2.6401, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.859647750854492, |
|
"rewards/margins": -2.113811731338501, |
|
"rewards/rejected": -3.745835781097412, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5066666666666667, |
|
"grad_norm": 7.751908779144287, |
|
"learning_rate": 4.460299516441777e-05, |
|
"logits/chosen": 2.136542797088623, |
|
"logits/rejected": 2.011000156402588, |
|
"logps/chosen": -279.3929138183594, |
|
"logps/rejected": -225.48109436035156, |
|
"loss": 0.8985, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4863853454589844, |
|
"rewards/margins": 0.2738412022590637, |
|
"rewards/rejected": -1.7602264881134033, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5111111111111111, |
|
"grad_norm": 4.417598724365234, |
|
"learning_rate": 4.4488314452283675e-05, |
|
"logits/chosen": 1.400985836982727, |
|
"logits/rejected": 1.5235412120819092, |
|
"logps/chosen": -130.8572235107422, |
|
"logps/rejected": -137.48133850097656, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7593280673027039, |
|
"rewards/margins": 0.35355114936828613, |
|
"rewards/rejected": -1.1128792762756348, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5155555555555555, |
|
"grad_norm": 3.6468398571014404, |
|
"learning_rate": 4.437257882794991e-05, |
|
"logits/chosen": 2.243985414505005, |
|
"logits/rejected": 2.1406588554382324, |
|
"logps/chosen": -485.3035583496094, |
|
"logps/rejected": -437.01959228515625, |
|
"loss": 0.308, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8883087635040283, |
|
"rewards/margins": 1.3152587413787842, |
|
"rewards/rejected": -4.2035675048828125, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.9299352169036865, |
|
"learning_rate": 4.425579455624364e-05, |
|
"logits/chosen": 1.9169459342956543, |
|
"logits/rejected": 1.82602858543396, |
|
"logps/chosen": -202.2696075439453, |
|
"logps/rejected": -183.041015625, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4907638430595398, |
|
"rewards/margins": 0.28335878252983093, |
|
"rewards/rejected": -0.7741226553916931, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5244444444444445, |
|
"grad_norm": 5.041739463806152, |
|
"learning_rate": 4.413796795875586e-05, |
|
"logits/chosen": 1.7944426536560059, |
|
"logits/rejected": 1.8221161365509033, |
|
"logps/chosen": -212.9854278564453, |
|
"logps/rejected": -245.502197265625, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.522188663482666, |
|
"rewards/margins": 0.5173491835594177, |
|
"rewards/rejected": -2.0395379066467285, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5288888888888889, |
|
"grad_norm": 2.375946044921875, |
|
"learning_rate": 4.4019105413499164e-05, |
|
"logits/chosen": 2.1719205379486084, |
|
"logits/rejected": 2.069880962371826, |
|
"logps/chosen": -417.11004638671875, |
|
"logps/rejected": -380.632080078125, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.728323459625244, |
|
"rewards/margins": 2.0067780017852783, |
|
"rewards/rejected": -4.735101699829102, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 4.835366725921631, |
|
"learning_rate": 4.389921335456253e-05, |
|
"logits/chosen": 2.228755474090576, |
|
"logits/rejected": 2.121194839477539, |
|
"logps/chosen": -456.9483337402344, |
|
"logps/rejected": -443.54644775390625, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.9684982299804688, |
|
"rewards/margins": 3.314952850341797, |
|
"rewards/rejected": -5.283451080322266, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5377777777777778, |
|
"grad_norm": 9.487476348876953, |
|
"learning_rate": 4.3778298271762995e-05, |
|
"logits/chosen": 1.9492430686950684, |
|
"logits/rejected": 1.8923718929290771, |
|
"logps/chosen": -367.70208740234375, |
|
"logps/rejected": -292.9391174316406, |
|
"loss": 1.1745, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.854750156402588, |
|
"rewards/margins": -0.488888680934906, |
|
"rewards/rejected": -2.365861415863037, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5422222222222223, |
|
"grad_norm": 8.035406112670898, |
|
"learning_rate": 4.365636671029445e-05, |
|
"logits/chosen": 1.636220097541809, |
|
"logits/rejected": 1.6882154941558838, |
|
"logps/chosen": -239.27474975585938, |
|
"logps/rejected": -248.72625732421875, |
|
"loss": 0.7769, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7846871614456177, |
|
"rewards/margins": 0.6797889471054077, |
|
"rewards/rejected": -2.4644761085510254, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5466666666666666, |
|
"grad_norm": 5.362706184387207, |
|
"learning_rate": 4.3533425270373216e-05, |
|
"logits/chosen": 2.0839271545410156, |
|
"logits/rejected": 2.0972325801849365, |
|
"logps/chosen": -387.6666564941406, |
|
"logps/rejected": -393.9215393066406, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9992005825042725, |
|
"rewards/margins": 3.244002342224121, |
|
"rewards/rejected": -6.243203163146973, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5511111111111111, |
|
"grad_norm": 2.3104119300842285, |
|
"learning_rate": 4.340948060688088e-05, |
|
"logits/chosen": 1.8295419216156006, |
|
"logits/rejected": 1.8262109756469727, |
|
"logps/chosen": -224.88113403320312, |
|
"logps/rejected": -227.4783477783203, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5778324604034424, |
|
"rewards/margins": 0.8512080907821655, |
|
"rewards/rejected": -1.429040551185608, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 4.099146366119385, |
|
"learning_rate": 4.328453942900402e-05, |
|
"logits/chosen": 1.97019362449646, |
|
"logits/rejected": 1.9800690412521362, |
|
"logps/chosen": -287.9169921875, |
|
"logps/rejected": -343.14788818359375, |
|
"loss": 0.3836, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4385543763637543, |
|
"rewards/margins": 1.3104095458984375, |
|
"rewards/rejected": -1.7489639520645142, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.081305503845215, |
|
"learning_rate": 4.3158608499871024e-05, |
|
"logits/chosen": 2.1010217666625977, |
|
"logits/rejected": 2.029930830001831, |
|
"logps/chosen": -325.297119140625, |
|
"logps/rejected": -344.98284912109375, |
|
"loss": 0.342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5290114879608154, |
|
"rewards/margins": 0.9010803699493408, |
|
"rewards/rejected": -2.4300918579101562, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5644444444444444, |
|
"grad_norm": 0.4260007441043854, |
|
"learning_rate": 4.3031694636186e-05, |
|
"logits/chosen": 2.314997434616089, |
|
"logits/rejected": 2.244847059249878, |
|
"logps/chosen": -407.2716369628906, |
|
"logps/rejected": -440.58770751953125, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8063247799873352, |
|
"rewards/margins": 4.322688579559326, |
|
"rewards/rejected": -5.1290130615234375, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 22.247989654541016, |
|
"learning_rate": 4.2903804707859835e-05, |
|
"logits/chosen": 1.9879422187805176, |
|
"logits/rejected": 2.056591749191284, |
|
"logps/chosen": -241.29771423339844, |
|
"logps/rejected": -261.1890563964844, |
|
"loss": 1.5621, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.3791258335113525, |
|
"rewards/margins": -1.075968861579895, |
|
"rewards/rejected": -1.3031569719314575, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5733333333333334, |
|
"grad_norm": 3.094059705734253, |
|
"learning_rate": 4.2774945637638236e-05, |
|
"logits/chosen": 2.2621870040893555, |
|
"logits/rejected": 2.235694408416748, |
|
"logps/chosen": -403.2272644042969, |
|
"logps/rejected": -458.10858154296875, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8431869745254517, |
|
"rewards/margins": 2.3183135986328125, |
|
"rewards/rejected": -4.161500453948975, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 6.682619094848633, |
|
"learning_rate": 4.2645124400727074e-05, |
|
"logits/chosen": 1.864232063293457, |
|
"logits/rejected": 1.8359177112579346, |
|
"logps/chosen": -260.30364990234375, |
|
"logps/rejected": -291.6693420410156, |
|
"loss": 1.0362, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.559638261795044, |
|
"rewards/margins": 0.7007129788398743, |
|
"rewards/rejected": -2.2603511810302734, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5822222222222222, |
|
"grad_norm": 4.2206597328186035, |
|
"learning_rate": 4.251434802441476e-05, |
|
"logits/chosen": 2.0464115142822266, |
|
"logits/rejected": 2.0911216735839844, |
|
"logps/chosen": -243.08462524414062, |
|
"logps/rejected": -254.49130249023438, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2900955379009247, |
|
"rewards/margins": 0.2913353145122528, |
|
"rewards/rejected": -0.001239776611328125, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5866666666666667, |
|
"grad_norm": 3.9296364784240723, |
|
"learning_rate": 4.238262358769192e-05, |
|
"logits/chosen": 2.281747341156006, |
|
"logits/rejected": 2.3363983631134033, |
|
"logps/chosen": -285.22998046875, |
|
"logps/rejected": -372.3949279785156, |
|
"loss": 0.2539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6220627427101135, |
|
"rewards/margins": 1.3760398626327515, |
|
"rewards/rejected": -1.9981026649475098, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5911111111111111, |
|
"grad_norm": 2.2730822563171387, |
|
"learning_rate": 4.224995822086812e-05, |
|
"logits/chosen": 2.2451581954956055, |
|
"logits/rejected": 2.2252092361450195, |
|
"logps/chosen": -394.37017822265625, |
|
"logps/rejected": -501.9224853515625, |
|
"loss": 0.1271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13392946124076843, |
|
"rewards/margins": 4.8956298828125, |
|
"rewards/rejected": -4.76170015335083, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5955555555555555, |
|
"grad_norm": 2.509901523590088, |
|
"learning_rate": 4.211635910518595e-05, |
|
"logits/chosen": 1.6302995681762695, |
|
"logits/rejected": 1.689335823059082, |
|
"logps/chosen": -152.21542358398438, |
|
"logps/rejected": -137.0872802734375, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1873771846294403, |
|
"rewards/margins": 0.48279035091400146, |
|
"rewards/rejected": -0.6701675653457642, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.595450401306152, |
|
"learning_rate": 4.198183347243233e-05, |
|
"logits/chosen": 2.134934663772583, |
|
"logits/rejected": 2.091696262359619, |
|
"logps/chosen": -333.56829833984375, |
|
"logps/rejected": -360.50970458984375, |
|
"loss": 0.3094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9313689470291138, |
|
"rewards/margins": 2.534543037414551, |
|
"rewards/rejected": -3.465911865234375, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": 2.2205488681793213, |
|
"eval_logits/rejected": 2.174258232116699, |
|
"eval_logps/chosen": -303.35302734375, |
|
"eval_logps/rejected": -344.1379089355469, |
|
"eval_loss": 0.43841180205345154, |
|
"eval_rewards/accuracies": 0.8035714030265808, |
|
"eval_rewards/chosen": -1.3060392141342163, |
|
"eval_rewards/margins": 1.5260727405548096, |
|
"eval_rewards/rejected": -2.8321120738983154, |
|
"eval_runtime": 17.3865, |
|
"eval_samples_per_second": 2.876, |
|
"eval_steps_per_second": 0.403, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 1.6538218259811401, |
|
"learning_rate": 4.184638860454696e-05, |
|
"logits/chosen": 1.9310147762298584, |
|
"logits/rejected": 1.8604496717453003, |
|
"logps/chosen": -251.001708984375, |
|
"logps/rejected": -290.62005615234375, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33168870210647583, |
|
"rewards/margins": 1.7937004566192627, |
|
"rewards/rejected": -2.1253890991210938, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6088888888888889, |
|
"grad_norm": 3.1406407356262207, |
|
"learning_rate": 4.1710031833228225e-05, |
|
"logits/chosen": 1.7651350498199463, |
|
"logits/rejected": 1.8405566215515137, |
|
"logps/chosen": -175.06227111816406, |
|
"logps/rejected": -272.6947326660156, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38391417264938354, |
|
"rewards/margins": 1.8765029907226562, |
|
"rewards/rejected": -2.2604172229766846, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6133333333333333, |
|
"grad_norm": 4.962210178375244, |
|
"learning_rate": 4.157277053953631e-05, |
|
"logits/chosen": 2.104128837585449, |
|
"logits/rejected": 2.078892230987549, |
|
"logps/chosen": -259.95330810546875, |
|
"logps/rejected": -243.87139892578125, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8645896911621094, |
|
"rewards/margins": 1.2094483375549316, |
|
"rewards/rejected": -2.074038028717041, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6177777777777778, |
|
"grad_norm": 5.0981316566467285, |
|
"learning_rate": 4.143461215349361e-05, |
|
"logits/chosen": 2.3514866828918457, |
|
"logits/rejected": 2.3009910583496094, |
|
"logps/chosen": -429.4484558105469, |
|
"logps/rejected": -535.6287231445312, |
|
"loss": 0.2313, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3444290161132812, |
|
"rewards/margins": 2.3630645275115967, |
|
"rewards/rejected": -4.707493782043457, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 4.2106523513793945, |
|
"learning_rate": 4.129556415368261e-05, |
|
"logits/chosen": 2.048675060272217, |
|
"logits/rejected": 2.0247228145599365, |
|
"logps/chosen": -283.77264404296875, |
|
"logps/rejected": -262.07989501953125, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2498573064804077, |
|
"rewards/margins": 0.9712372422218323, |
|
"rewards/rejected": -2.2210946083068848, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6266666666666667, |
|
"grad_norm": 10.738434791564941, |
|
"learning_rate": 4.115563406684103e-05, |
|
"logits/chosen": 2.148074150085449, |
|
"logits/rejected": 2.117837905883789, |
|
"logps/chosen": -340.57489013671875, |
|
"logps/rejected": -366.0396423339844, |
|
"loss": 1.6829, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.95953369140625, |
|
"rewards/margins": 0.4566100835800171, |
|
"rewards/rejected": -1.416143774986267, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6311111111111111, |
|
"grad_norm": 8.303956985473633, |
|
"learning_rate": 4.101482946745439e-05, |
|
"logits/chosen": 2.478304386138916, |
|
"logits/rejected": 2.3817453384399414, |
|
"logps/chosen": -477.30059814453125, |
|
"logps/rejected": -419.9468994140625, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.896254062652588, |
|
"rewards/margins": 0.6844373941421509, |
|
"rewards/rejected": -3.5806915760040283, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6355555555555555, |
|
"grad_norm": 5.196935653686523, |
|
"learning_rate": 4.0873157977346e-05, |
|
"logits/chosen": 2.295231342315674, |
|
"logits/rejected": 2.320071220397949, |
|
"logps/chosen": -327.35858154296875, |
|
"logps/rejected": -336.46234130859375, |
|
"loss": 0.3213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11004638671875, |
|
"rewards/margins": 1.7110825777053833, |
|
"rewards/rejected": -1.6010361909866333, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.837842583656311, |
|
"learning_rate": 4.073062726526443e-05, |
|
"logits/chosen": 2.3723278045654297, |
|
"logits/rejected": 2.1718640327453613, |
|
"logps/chosen": -361.47161865234375, |
|
"logps/rejected": -342.56048583984375, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03143996000289917, |
|
"rewards/margins": 3.5597052574157715, |
|
"rewards/rejected": -3.5911452770233154, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6444444444444445, |
|
"grad_norm": 1.8959003686904907, |
|
"learning_rate": 4.058724504646834e-05, |
|
"logits/chosen": 2.1700668334960938, |
|
"logits/rejected": 2.0942935943603516, |
|
"logps/chosen": -257.6955261230469, |
|
"logps/rejected": -254.59393310546875, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6744873523712158, |
|
"rewards/margins": 1.8073105812072754, |
|
"rewards/rejected": -1.13282310962677, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6488888888888888, |
|
"grad_norm": 3.8877060413360596, |
|
"learning_rate": 4.044301908230889e-05, |
|
"logits/chosen": 2.336484909057617, |
|
"logits/rejected": 2.250220775604248, |
|
"logps/chosen": -329.42779541015625, |
|
"logps/rejected": -461.0723876953125, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0788795948028564, |
|
"rewards/margins": 5.8840203285217285, |
|
"rewards/rejected": -4.805140972137451, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6533333333333333, |
|
"grad_norm": 4.329055309295654, |
|
"learning_rate": 4.0297957179809586e-05, |
|
"logits/chosen": 1.7940289974212646, |
|
"logits/rejected": 1.8224772214889526, |
|
"logps/chosen": -218.82870483398438, |
|
"logps/rejected": -236.94589233398438, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5779021978378296, |
|
"rewards/margins": 0.4461887776851654, |
|
"rewards/rejected": -2.0240910053253174, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6577777777777778, |
|
"grad_norm": 1.4359842538833618, |
|
"learning_rate": 4.0152067191243696e-05, |
|
"logits/chosen": 1.9685239791870117, |
|
"logits/rejected": 2.0288798809051514, |
|
"logps/chosen": -357.14801025390625, |
|
"logps/rejected": -412.6529541015625, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2776429653167725, |
|
"rewards/margins": 2.4739623069763184, |
|
"rewards/rejected": -4.75160551071167, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6622222222222223, |
|
"grad_norm": 12.442399978637695, |
|
"learning_rate": 4.000535701370921e-05, |
|
"logits/chosen": 1.7401182651519775, |
|
"logits/rejected": 1.621551752090454, |
|
"logps/chosen": -320.3838195800781, |
|
"logps/rejected": -198.91424560546875, |
|
"loss": 1.6628, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.767268419265747, |
|
"rewards/margins": -0.9375503659248352, |
|
"rewards/rejected": -1.8297181129455566, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 17.23033332824707, |
|
"learning_rate": 3.985783458870134e-05, |
|
"logits/chosen": 2.1593716144561768, |
|
"logits/rejected": 2.1524195671081543, |
|
"logps/chosen": -375.254150390625, |
|
"logps/rejected": -289.0406494140625, |
|
"loss": 1.0413, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -2.9647598266601562, |
|
"rewards/margins": -0.5915945768356323, |
|
"rewards/rejected": -2.3731651306152344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6711111111111111, |
|
"grad_norm": 0.4952673614025116, |
|
"learning_rate": 3.9709507901682675e-05, |
|
"logits/chosen": 2.376957893371582, |
|
"logits/rejected": 2.328672409057617, |
|
"logps/chosen": -484.62518310546875, |
|
"logps/rejected": -505.002197265625, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.708740234375, |
|
"rewards/margins": 4.536779403686523, |
|
"rewards/rejected": -5.245519638061523, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 4.280153751373291, |
|
"learning_rate": 3.95603849816509e-05, |
|
"logits/chosen": 2.2534637451171875, |
|
"logits/rejected": 2.347764015197754, |
|
"logps/chosen": -311.7674560546875, |
|
"logps/rejected": -343.17584228515625, |
|
"loss": 0.2535, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9193252325057983, |
|
"rewards/margins": 1.2428758144378662, |
|
"rewards/rejected": -2.162200927734375, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 9.200690269470215, |
|
"learning_rate": 3.941047390070419e-05, |
|
"logits/chosen": 2.2525882720947266, |
|
"logits/rejected": 2.196587324142456, |
|
"logps/chosen": -419.79833984375, |
|
"logps/rejected": -368.85174560546875, |
|
"loss": 0.8223, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.9374847412109375, |
|
"rewards/margins": 0.6514175534248352, |
|
"rewards/rejected": -2.588902235031128, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6844444444444444, |
|
"grad_norm": 1.1576966047286987, |
|
"learning_rate": 3.925978277360428e-05, |
|
"logits/chosen": 2.2419962882995605, |
|
"logits/rejected": 2.240370750427246, |
|
"logps/chosen": -354.83514404296875, |
|
"logps/rejected": -398.2762451171875, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2429847717285156, |
|
"rewards/margins": 3.4657950401306152, |
|
"rewards/rejected": -4.708779811859131, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6888888888888889, |
|
"grad_norm": 4.058935642242432, |
|
"learning_rate": 3.910831975733717e-05, |
|
"logits/chosen": 2.4752016067504883, |
|
"logits/rejected": 2.4061837196350098, |
|
"logps/chosen": -374.433349609375, |
|
"logps/rejected": -460.99163818359375, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6075196266174316, |
|
"rewards/margins": 2.1309003829956055, |
|
"rewards/rejected": -3.738420009613037, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6933333333333334, |
|
"grad_norm": 10.329422950744629, |
|
"learning_rate": 3.895609305067162e-05, |
|
"logits/chosen": 2.1816141605377197, |
|
"logits/rejected": 2.1901464462280273, |
|
"logps/chosen": -354.7869873046875, |
|
"logps/rejected": -334.8106384277344, |
|
"loss": 1.1709, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -1.6891334056854248, |
|
"rewards/margins": -0.787255048751831, |
|
"rewards/rejected": -0.9018783569335938, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6977777777777778, |
|
"grad_norm": 2.0047101974487305, |
|
"learning_rate": 3.8803110893715334e-05, |
|
"logits/chosen": 2.076343536376953, |
|
"logits/rejected": 2.1251060962677, |
|
"logps/chosen": -226.72457885742188, |
|
"logps/rejected": -348.7533874511719, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06249618902802467, |
|
"rewards/margins": 5.404012680053711, |
|
"rewards/rejected": -5.4665093421936035, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7022222222222222, |
|
"grad_norm": 8.143394470214844, |
|
"learning_rate": 3.864938156746891e-05, |
|
"logits/chosen": 2.237619400024414, |
|
"logits/rejected": 2.3040781021118164, |
|
"logps/chosen": -429.0187683105469, |
|
"logps/rejected": -341.12957763671875, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7430390119552612, |
|
"rewards/margins": 1.6202683448791504, |
|
"rewards/rejected": -3.363307237625122, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7066666666666667, |
|
"grad_norm": 2.048825979232788, |
|
"learning_rate": 3.849491339337758e-05, |
|
"logits/chosen": 2.246427297592163, |
|
"logits/rejected": 2.1864523887634277, |
|
"logps/chosen": -253.68792724609375, |
|
"logps/rejected": -255.00852966308594, |
|
"loss": 0.1813, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2091705799102783, |
|
"rewards/margins": 1.7127196788787842, |
|
"rewards/rejected": -2.9218902587890625, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 9.93562126159668, |
|
"learning_rate": 3.833971473288084e-05, |
|
"logits/chosen": 2.260481357574463, |
|
"logits/rejected": 2.2601776123046875, |
|
"logps/chosen": -375.98193359375, |
|
"logps/rejected": -420.81494140625, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0803894996643066, |
|
"rewards/margins": 0.15620207786560059, |
|
"rewards/rejected": -3.2365915775299072, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7155555555555555, |
|
"grad_norm": 3.1551766395568848, |
|
"learning_rate": 3.818379398695969e-05, |
|
"logits/chosen": 1.9815887212753296, |
|
"logits/rejected": 1.9442949295043945, |
|
"logps/chosen": -316.49468994140625, |
|
"logps/rejected": -369.946044921875, |
|
"loss": 0.132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.701190173625946, |
|
"rewards/margins": 2.1365509033203125, |
|
"rewards/rejected": -2.8377411365509033, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 13.20207405090332, |
|
"learning_rate": 3.802715959568205e-05, |
|
"logits/chosen": 2.195608377456665, |
|
"logits/rejected": 2.118527889251709, |
|
"logps/chosen": -396.55755615234375, |
|
"logps/rejected": -423.4131164550781, |
|
"loss": 0.8681, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.12845778465271, |
|
"rewards/margins": 2.663003444671631, |
|
"rewards/rejected": -4.791460990905762, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7244444444444444, |
|
"grad_norm": 1.2311620712280273, |
|
"learning_rate": 3.7869820037745776e-05, |
|
"logits/chosen": 2.1139190196990967, |
|
"logits/rejected": 2.142258644104004, |
|
"logps/chosen": -275.6441955566406, |
|
"logps/rejected": -338.9689636230469, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2249420881271362, |
|
"rewards/margins": 3.1651391983032227, |
|
"rewards/rejected": -4.39008092880249, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7288888888888889, |
|
"grad_norm": 18.346521377563477, |
|
"learning_rate": 3.771178383001976e-05, |
|
"logits/chosen": 2.330061435699463, |
|
"logits/rejected": 2.222029685974121, |
|
"logps/chosen": -468.13232421875, |
|
"logps/rejected": -425.3597412109375, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.2604997158050537, |
|
"rewards/margins": 1.2873930931091309, |
|
"rewards/rejected": -4.5478925704956055, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7333333333333333, |
|
"grad_norm": 4.46051025390625, |
|
"learning_rate": 3.7553059527082913e-05, |
|
"logits/chosen": 2.2368054389953613, |
|
"logits/rejected": 2.2174384593963623, |
|
"logps/chosen": -287.6416931152344, |
|
"logps/rejected": -241.71742248535156, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1810874938964844, |
|
"rewards/margins": 0.794731855392456, |
|
"rewards/rejected": -1.9758193492889404, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7377777777777778, |
|
"grad_norm": 5.725604057312012, |
|
"learning_rate": 3.739365572076105e-05, |
|
"logits/chosen": 2.309138536453247, |
|
"logits/rejected": 2.297959327697754, |
|
"logps/chosen": -313.10382080078125, |
|
"logps/rejected": -440.99212646484375, |
|
"loss": 0.3831, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8803879022598267, |
|
"rewards/margins": 1.21652090549469, |
|
"rewards/rejected": -3.0969088077545166, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7422222222222222, |
|
"grad_norm": 13.70535945892334, |
|
"learning_rate": 3.7233581039661874e-05, |
|
"logits/chosen": 2.021416187286377, |
|
"logits/rejected": 2.0485005378723145, |
|
"logps/chosen": -308.900146484375, |
|
"logps/rejected": -373.67474365234375, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8083343505859375, |
|
"rewards/margins": 1.9991533756256104, |
|
"rewards/rejected": -2.807487726211548, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 5.6754560470581055, |
|
"learning_rate": 3.707284414870786e-05, |
|
"logits/chosen": 2.3587806224823, |
|
"logits/rejected": 2.424814224243164, |
|
"logps/chosen": -379.08697509765625, |
|
"logps/rejected": -437.82147216796875, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.100062608718872, |
|
"rewards/margins": 1.1164734363555908, |
|
"rewards/rejected": -2.216536045074463, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7511111111111111, |
|
"grad_norm": 13.687298774719238, |
|
"learning_rate": 3.691145374866723e-05, |
|
"logits/chosen": 2.0991220474243164, |
|
"logits/rejected": 2.0921735763549805, |
|
"logps/chosen": -251.90283203125, |
|
"logps/rejected": -294.06097412109375, |
|
"loss": 1.2515, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.1877260208129883, |
|
"rewards/margins": -0.7983794212341309, |
|
"rewards/rejected": -2.3893463611602783, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 2.953883647918701, |
|
"learning_rate": 3.6749418575683e-05, |
|
"logits/chosen": 1.9750038385391235, |
|
"logits/rejected": 1.9700895547866821, |
|
"logps/chosen": -258.9554748535156, |
|
"logps/rejected": -268.0426940917969, |
|
"loss": 0.107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.537645697593689, |
|
"rewards/margins": 2.264012336730957, |
|
"rewards/rejected": -3.8016581535339355, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.0586588382720947, |
|
"learning_rate": 3.658674740080004e-05, |
|
"logits/chosen": 2.249845504760742, |
|
"logits/rejected": 2.1530356407165527, |
|
"logps/chosen": -373.0096740722656, |
|
"logps/rejected": -384.4673767089844, |
|
"loss": 0.147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49631041288375854, |
|
"rewards/margins": 3.1010565757751465, |
|
"rewards/rejected": -2.6047463417053223, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7644444444444445, |
|
"grad_norm": 9.671707153320312, |
|
"learning_rate": 3.642344902949034e-05, |
|
"logits/chosen": 2.1441969871520996, |
|
"logits/rejected": 2.0829548835754395, |
|
"logps/chosen": -375.480224609375, |
|
"logps/rejected": -306.5086975097656, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.187718152999878, |
|
"rewards/margins": 0.452168345451355, |
|
"rewards/rejected": -2.6398866176605225, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7688888888888888, |
|
"grad_norm": 0.8413982391357422, |
|
"learning_rate": 3.6259532301176335e-05, |
|
"logits/chosen": 1.6508468389511108, |
|
"logits/rejected": 1.683258056640625, |
|
"logps/chosen": -302.1081848144531, |
|
"logps/rejected": -320.01141357421875, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08628615736961365, |
|
"rewards/margins": 3.430840492248535, |
|
"rewards/rejected": -3.5171265602111816, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7733333333333333, |
|
"grad_norm": 11.386160850524902, |
|
"learning_rate": 3.6095006088752447e-05, |
|
"logits/chosen": 2.2143354415893555, |
|
"logits/rejected": 2.283841848373413, |
|
"logps/chosen": -428.9851379394531, |
|
"logps/rejected": -506.7236328125, |
|
"loss": 0.4424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3689346313476562, |
|
"rewards/margins": 0.6975066661834717, |
|
"rewards/rejected": -4.066441535949707, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 2.112983226776123, |
|
"learning_rate": 3.592987929810476e-05, |
|
"logits/chosen": 1.9571995735168457, |
|
"logits/rejected": 1.922455072402954, |
|
"logps/chosen": -289.2620849609375, |
|
"logps/rejected": -418.0646057128906, |
|
"loss": 0.1977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3730583190917969, |
|
"rewards/margins": 4.838759422302246, |
|
"rewards/rejected": -5.211817741394043, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 2.702089786529541, |
|
"learning_rate": 3.576416086762896e-05, |
|
"logits/chosen": 1.9095101356506348, |
|
"logits/rejected": 1.9017338752746582, |
|
"logps/chosen": -258.8391418457031, |
|
"logps/rejected": -258.28948974609375, |
|
"loss": 0.1737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08884277939796448, |
|
"rewards/margins": 2.2755606174468994, |
|
"rewards/rejected": -2.364403486251831, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7866666666666666, |
|
"grad_norm": 3.3134331703186035, |
|
"learning_rate": 3.5597859767746524e-05, |
|
"logits/chosen": 2.0989699363708496, |
|
"logits/rejected": 2.059138774871826, |
|
"logps/chosen": -253.71551513671875, |
|
"logps/rejected": -245.14794921875, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4205713272094727, |
|
"rewards/margins": 1.4892207384109497, |
|
"rewards/rejected": -3.909791946411133, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7911111111111111, |
|
"grad_norm": 0.9361621141433716, |
|
"learning_rate": 3.543098500041906e-05, |
|
"logits/chosen": 2.1984782218933105, |
|
"logits/rejected": 2.0851023197174072, |
|
"logps/chosen": -280.2938537597656, |
|
"logps/rejected": -311.8728332519531, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.518353283405304, |
|
"rewards/margins": 2.7252793312072754, |
|
"rewards/rejected": -2.206925868988037, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7955555555555556, |
|
"grad_norm": 35.653995513916016, |
|
"learning_rate": 3.526354559866113e-05, |
|
"logits/chosen": 2.0546839237213135, |
|
"logits/rejected": 2.004641532897949, |
|
"logps/chosen": -270.5155334472656, |
|
"logps/rejected": -285.1980285644531, |
|
"loss": 3.7422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.489393711090088, |
|
"rewards/margins": -3.35947322845459, |
|
"rewards/rejected": -0.1299205720424652, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 21.546016693115234, |
|
"learning_rate": 3.509555062605121e-05, |
|
"logits/chosen": 2.0889358520507812, |
|
"logits/rejected": 2.1452298164367676, |
|
"logps/chosen": -404.2633056640625, |
|
"logps/rejected": -504.6707458496094, |
|
"loss": 0.8305, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.7308120727539062, |
|
"rewards/margins": 0.4282197952270508, |
|
"rewards/rejected": -4.159031867980957, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": 2.1556396484375, |
|
"eval_logits/rejected": 2.1094348430633545, |
|
"eval_logps/chosen": -310.0359191894531, |
|
"eval_logps/rejected": -357.9092712402344, |
|
"eval_loss": 0.4349474012851715, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": -1.9743303060531616, |
|
"eval_rewards/margins": 2.2349183559417725, |
|
"eval_rewards/rejected": -4.2092485427856445, |
|
"eval_runtime": 17.3856, |
|
"eval_samples_per_second": 2.876, |
|
"eval_steps_per_second": 0.403, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8044444444444444, |
|
"grad_norm": 1.8578166961669922, |
|
"learning_rate": 3.492700917624113e-05, |
|
"logits/chosen": 1.87994384765625, |
|
"logits/rejected": 1.8280099630355835, |
|
"logps/chosen": -235.28530883789062, |
|
"logps/rejected": -234.61431884765625, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8087150454521179, |
|
"rewards/margins": 2.258847236633301, |
|
"rewards/rejected": -3.0675621032714844, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8088888888888889, |
|
"grad_norm": 1.5471972227096558, |
|
"learning_rate": 3.4757930372463775e-05, |
|
"logits/chosen": 1.9977684020996094, |
|
"logits/rejected": 1.8153365850448608, |
|
"logps/chosen": -311.0814208984375, |
|
"logps/rejected": -285.3901672363281, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35354921221733093, |
|
"rewards/margins": 2.717729091644287, |
|
"rewards/rejected": -2.364180088043213, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8133333333333334, |
|
"grad_norm": 32.15199279785156, |
|
"learning_rate": 3.458832336703929e-05, |
|
"logits/chosen": 1.8522934913635254, |
|
"logits/rejected": 1.7468159198760986, |
|
"logps/chosen": -580.458740234375, |
|
"logps/rejected": -361.63299560546875, |
|
"loss": 4.9968, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -10.116351127624512, |
|
"rewards/margins": -4.255195617675781, |
|
"rewards/rejected": -5.8611555099487305, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 21.2011661529541, |
|
"learning_rate": 3.4418197340879635e-05, |
|
"logits/chosen": 2.1630630493164062, |
|
"logits/rejected": 2.1617729663848877, |
|
"logps/chosen": -496.76910400390625, |
|
"logps/rejected": -401.410888671875, |
|
"loss": 2.7916, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -6.106187343597412, |
|
"rewards/margins": -2.5850629806518555, |
|
"rewards/rejected": -3.5211243629455566, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8222222222222222, |
|
"grad_norm": 3.49141263961792, |
|
"learning_rate": 3.4247561502991604e-05, |
|
"logits/chosen": 2.1068267822265625, |
|
"logits/rejected": 2.1182143688201904, |
|
"logps/chosen": -385.7279052734375, |
|
"logps/rejected": -520.2858276367188, |
|
"loss": 0.2494, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5602035522460938, |
|
"rewards/margins": 3.892549991607666, |
|
"rewards/rejected": -7.45275354385376, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8266666666666667, |
|
"grad_norm": 10.206978797912598, |
|
"learning_rate": 3.407642508997838e-05, |
|
"logits/chosen": 1.9911150932312012, |
|
"logits/rejected": 1.9793498516082764, |
|
"logps/chosen": -355.60675048828125, |
|
"logps/rejected": -286.39141845703125, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6045563220977783, |
|
"rewards/margins": 0.29973304271698, |
|
"rewards/rejected": -3.9042892456054688, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8311111111111111, |
|
"grad_norm": 3.9811224937438965, |
|
"learning_rate": 3.3904797365539514e-05, |
|
"logits/chosen": 1.9419519901275635, |
|
"logits/rejected": 1.943634033203125, |
|
"logps/chosen": -345.35076904296875, |
|
"logps/rejected": -388.5447998046875, |
|
"loss": 0.3847, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5699470639228821, |
|
"rewards/margins": 2.5326507091522217, |
|
"rewards/rejected": -1.9627037048339844, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8355555555555556, |
|
"grad_norm": 7.058280944824219, |
|
"learning_rate": 3.37326876199695e-05, |
|
"logits/chosen": 2.377598524093628, |
|
"logits/rejected": 2.3797459602355957, |
|
"logps/chosen": -341.9767150878906, |
|
"logps/rejected": -513.3931884765625, |
|
"loss": 0.197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.686824083328247, |
|
"rewards/margins": 2.547370672225952, |
|
"rewards/rejected": -4.234194755554199, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.5856622457504272, |
|
"learning_rate": 3.356010516965486e-05, |
|
"logits/chosen": 1.8028912544250488, |
|
"logits/rejected": 1.8407230377197266, |
|
"logps/chosen": -206.43429565429688, |
|
"logps/rejected": -305.61212158203125, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5213749408721924, |
|
"rewards/margins": 4.116602420806885, |
|
"rewards/rejected": -3.5952274799346924, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 9.480084419250488, |
|
"learning_rate": 3.3387059356569875e-05, |
|
"logits/chosen": 2.0444135665893555, |
|
"logits/rejected": 2.0769548416137695, |
|
"logps/chosen": -260.6351318359375, |
|
"logps/rejected": -255.31646728515625, |
|
"loss": 1.054, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5492042303085327, |
|
"rewards/margins": -0.4221389889717102, |
|
"rewards/rejected": 0.9713432192802429, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8488888888888889, |
|
"grad_norm": 5.375828742980957, |
|
"learning_rate": 3.321355954777087e-05, |
|
"logits/chosen": 2.0929622650146484, |
|
"logits/rejected": 2.0118932723999023, |
|
"logps/chosen": -271.47088623046875, |
|
"logps/rejected": -324.151123046875, |
|
"loss": 0.3697, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7393569946289062, |
|
"rewards/margins": 1.4251999855041504, |
|
"rewards/rejected": -2.1645569801330566, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 11.164929389953613, |
|
"learning_rate": 3.3039615134889206e-05, |
|
"logits/chosen": 2.168374538421631, |
|
"logits/rejected": 2.050518035888672, |
|
"logps/chosen": -425.24169921875, |
|
"logps/rejected": -436.30914306640625, |
|
"loss": 0.9201, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.633542060852051, |
|
"rewards/margins": -0.2216278314590454, |
|
"rewards/rejected": -2.411914110183716, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8577777777777778, |
|
"grad_norm": 12.873815536499023, |
|
"learning_rate": 3.286523553362287e-05, |
|
"logits/chosen": 2.097388505935669, |
|
"logits/rejected": 2.0157864093780518, |
|
"logps/chosen": -274.7909851074219, |
|
"logps/rejected": -251.3322296142578, |
|
"loss": 1.2622, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.8827362060546875, |
|
"rewards/margins": 0.5675584077835083, |
|
"rewards/rejected": -2.4502944946289062, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8622222222222222, |
|
"grad_norm": 8.37449836730957, |
|
"learning_rate": 3.269043018322681e-05, |
|
"logits/chosen": 2.1862282752990723, |
|
"logits/rejected": 2.087134838104248, |
|
"logps/chosen": -295.71875, |
|
"logps/rejected": -301.2806396484375, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7563506960868835, |
|
"rewards/margins": 2.514920234680176, |
|
"rewards/rejected": -3.271270751953125, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8666666666666667, |
|
"grad_norm": 6.452545642852783, |
|
"learning_rate": 3.2515208546002e-05, |
|
"logits/chosen": 1.9793057441711426, |
|
"logits/rejected": 2.004866600036621, |
|
"logps/chosen": -242.0079803466797, |
|
"logps/rejected": -277.5906677246094, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.457282304763794, |
|
"rewards/margins": 1.3829689025878906, |
|
"rewards/rejected": -2.8402512073516846, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8711111111111111, |
|
"grad_norm": 25.117042541503906, |
|
"learning_rate": 3.233958010678322e-05, |
|
"logits/chosen": 2.0711257457733154, |
|
"logits/rejected": 2.1235086917877197, |
|
"logps/chosen": -509.37188720703125, |
|
"logps/rejected": -579.4296875, |
|
"loss": 0.7721, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.672909736633301, |
|
"rewards/margins": 1.30540931224823, |
|
"rewards/rejected": -5.97831916809082, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8755555555555555, |
|
"grad_norm": 2.4943253993988037, |
|
"learning_rate": 3.216355437242564e-05, |
|
"logits/chosen": 2.0617835521698, |
|
"logits/rejected": 2.025505781173706, |
|
"logps/chosen": -224.05841064453125, |
|
"logps/rejected": -297.93768310546875, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27078327536582947, |
|
"rewards/margins": 1.8932006359100342, |
|
"rewards/rejected": -2.1639838218688965, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.4619039297103882, |
|
"learning_rate": 3.1987140871290236e-05, |
|
"logits/chosen": 1.9786646366119385, |
|
"logits/rejected": 1.9430062770843506, |
|
"logps/chosen": -194.2051544189453, |
|
"logps/rejected": -184.22048950195312, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5335159301757812, |
|
"rewards/margins": 2.0488877296447754, |
|
"rewards/rejected": -1.5153717994689941, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8844444444444445, |
|
"grad_norm": 7.211589336395264, |
|
"learning_rate": 3.181034915272797e-05, |
|
"logits/chosen": 2.10782527923584, |
|
"logits/rejected": 2.206753730773926, |
|
"logps/chosen": -336.33038330078125, |
|
"logps/rejected": -462.14654541015625, |
|
"loss": 0.8607, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9421745538711548, |
|
"rewards/margins": 3.117570161819458, |
|
"rewards/rejected": -4.059744358062744, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 2.614577054977417, |
|
"learning_rate": 3.1633188786562914e-05, |
|
"logits/chosen": 1.9760353565216064, |
|
"logits/rejected": 1.902787685394287, |
|
"logps/chosen": -248.97714233398438, |
|
"logps/rejected": -256.932861328125, |
|
"loss": 0.282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1982636451721191, |
|
"rewards/margins": 2.316314697265625, |
|
"rewards/rejected": -1.1180511713027954, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8933333333333333, |
|
"grad_norm": 10.176268577575684, |
|
"learning_rate": 3.1455669362574214e-05, |
|
"logits/chosen": 1.9834389686584473, |
|
"logits/rejected": 1.794731855392456, |
|
"logps/chosen": -350.74102783203125, |
|
"logps/rejected": -293.0185241699219, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7628203630447388, |
|
"rewards/margins": 2.335862159729004, |
|
"rewards/rejected": -4.098682403564453, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8977777777777778, |
|
"grad_norm": 1.9432324171066284, |
|
"learning_rate": 3.1277800489977e-05, |
|
"logits/chosen": 1.688536286354065, |
|
"logits/rejected": 1.688530445098877, |
|
"logps/chosen": -238.6612548828125, |
|
"logps/rejected": -331.52606201171875, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12506788969039917, |
|
"rewards/margins": 3.6676583290100098, |
|
"rewards/rejected": -3.542590618133545, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9022222222222223, |
|
"grad_norm": 1.8283239603042603, |
|
"learning_rate": 3.1099591796902215e-05, |
|
"logits/chosen": 2.159648895263672, |
|
"logits/rejected": 2.12233567237854, |
|
"logps/chosen": -421.9817199707031, |
|
"logps/rejected": -420.61663818359375, |
|
"loss": 0.0986, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.661656141281128, |
|
"rewards/margins": 2.6688404083251953, |
|
"rewards/rejected": -6.330496311187744, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9066666666666666, |
|
"grad_norm": 17.021015167236328, |
|
"learning_rate": 3.092105292987548e-05, |
|
"logits/chosen": 1.7743968963623047, |
|
"logits/rejected": 1.7986412048339844, |
|
"logps/chosen": -164.3032989501953, |
|
"logps/rejected": -203.1534881591797, |
|
"loss": 1.8414, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.1501376628875732, |
|
"rewards/margins": -1.529021143913269, |
|
"rewards/rejected": -1.6211166381835938, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9111111111111111, |
|
"grad_norm": 12.92226791381836, |
|
"learning_rate": 3.07421935532949e-05, |
|
"logits/chosen": 1.701080083847046, |
|
"logits/rejected": 1.747081995010376, |
|
"logps/chosen": -134.5420379638672, |
|
"logps/rejected": -180.4077911376953, |
|
"loss": 0.7422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.9285032749176025, |
|
"rewards/margins": 0.21163922548294067, |
|
"rewards/rejected": -2.1401424407958984, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9155555555555556, |
|
"grad_norm": 12.34039306640625, |
|
"learning_rate": 3.056302334890786e-05, |
|
"logits/chosen": 2.1032023429870605, |
|
"logits/rejected": 2.1539621353149414, |
|
"logps/chosen": -291.208984375, |
|
"logps/rejected": -335.7510986328125, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.06739044934511185, |
|
"rewards/margins": 1.4255584478378296, |
|
"rewards/rejected": -1.3581680059432983, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.44443604350090027, |
|
"learning_rate": 3.03835520152871e-05, |
|
"logits/chosen": 2.3766026496887207, |
|
"logits/rejected": 2.2064499855041504, |
|
"logps/chosen": -357.09075927734375, |
|
"logps/rejected": -521.0325927734375, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4761963188648224, |
|
"rewards/margins": 5.920969009399414, |
|
"rewards/rejected": -6.397165298461914, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 1.3140870332717896, |
|
"learning_rate": 3.0203789267305567e-05, |
|
"logits/chosen": 2.188861131668091, |
|
"logits/rejected": 2.1991310119628906, |
|
"logps/chosen": -318.3990783691406, |
|
"logps/rejected": -407.42578125, |
|
"loss": 0.179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.08726167678833, |
|
"rewards/margins": 3.1875786781311035, |
|
"rewards/rejected": -4.274840354919434, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9288888888888889, |
|
"grad_norm": 12.631479263305664, |
|
"learning_rate": 3.002374483561064e-05, |
|
"logits/chosen": 2.1044745445251465, |
|
"logits/rejected": 2.079122543334961, |
|
"logps/chosen": -422.732421875, |
|
"logps/rejected": -583.081298828125, |
|
"loss": 0.3081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.848828077316284, |
|
"rewards/margins": 4.261569499969482, |
|
"rewards/rejected": -7.1103973388671875, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 17.813030242919922, |
|
"learning_rate": 2.9843428466097385e-05, |
|
"logits/chosen": 2.1924643516540527, |
|
"logits/rejected": 2.1515285968780518, |
|
"logps/chosen": -404.7247314453125, |
|
"logps/rejected": -385.24407958984375, |
|
"loss": 1.7449, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.990626335144043, |
|
"rewards/margins": -0.6515921354293823, |
|
"rewards/rejected": -3.33903431892395, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9377777777777778, |
|
"grad_norm": 3.6667134761810303, |
|
"learning_rate": 2.9662849919380976e-05, |
|
"logits/chosen": 1.874267339706421, |
|
"logits/rejected": 1.896430492401123, |
|
"logps/chosen": -279.3658447265625, |
|
"logps/rejected": -271.2750244140625, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9690204858779907, |
|
"rewards/margins": 2.050572633743286, |
|
"rewards/rejected": -3.0195930004119873, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9422222222222222, |
|
"grad_norm": 21.304107666015625, |
|
"learning_rate": 2.9482018970268393e-05, |
|
"logits/chosen": 2.03654408454895, |
|
"logits/rejected": 2.1858012676239014, |
|
"logps/chosen": -307.8687744140625, |
|
"logps/rejected": -392.89447021484375, |
|
"loss": 1.358, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.445587158203125, |
|
"rewards/margins": -1.0587692260742188, |
|
"rewards/rejected": -2.3868179321289062, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9466666666666667, |
|
"grad_norm": 2.9452733993530273, |
|
"learning_rate": 2.930094540722927e-05, |
|
"logits/chosen": 2.1484179496765137, |
|
"logits/rejected": 2.2047171592712402, |
|
"logps/chosen": -240.11737060546875, |
|
"logps/rejected": -360.74237060546875, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6234557628631592, |
|
"rewards/margins": 1.6575775146484375, |
|
"rewards/rejected": -3.2810332775115967, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9511111111111111, |
|
"grad_norm": 22.092178344726562, |
|
"learning_rate": 2.911963903186606e-05, |
|
"logits/chosen": 2.0055017471313477, |
|
"logits/rejected": 1.9135019779205322, |
|
"logps/chosen": -241.86856079101562, |
|
"logps/rejected": -239.4849395751953, |
|
"loss": 1.1702, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.504570722579956, |
|
"rewards/margins": -0.4858473837375641, |
|
"rewards/rejected": -1.0187233686447144, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9555555555555556, |
|
"grad_norm": 0.4408101439476013, |
|
"learning_rate": 2.8938109658383454e-05, |
|
"logits/chosen": 2.263948440551758, |
|
"logits/rejected": 2.16243314743042, |
|
"logps/chosen": -397.47772216796875, |
|
"logps/rejected": -574.7552490234375, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.030035376548767, |
|
"rewards/margins": 8.122578620910645, |
|
"rewards/rejected": -9.152613639831543, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.1885669231414795, |
|
"learning_rate": 2.8756367113057148e-05, |
|
"logits/chosen": 2.174750566482544, |
|
"logits/rejected": 2.1712284088134766, |
|
"logps/chosen": -379.27142333984375, |
|
"logps/rejected": -556.3659057617188, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10250397026538849, |
|
"rewards/margins": 6.272984504699707, |
|
"rewards/rejected": -6.170480728149414, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9644444444444444, |
|
"grad_norm": 2.534093141555786, |
|
"learning_rate": 2.857442123370195e-05, |
|
"logits/chosen": 1.986964225769043, |
|
"logits/rejected": 1.985724925994873, |
|
"logps/chosen": -314.1033020019531, |
|
"logps/rejected": -273.2991943359375, |
|
"loss": 0.3304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2605462074279785, |
|
"rewards/margins": 1.8220291137695312, |
|
"rewards/rejected": -3.0825753211975098, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9688888888888889, |
|
"grad_norm": 9.718185424804688, |
|
"learning_rate": 2.8392281869139213e-05, |
|
"logits/chosen": 2.056429386138916, |
|
"logits/rejected": 2.060886859893799, |
|
"logps/chosen": -310.82916259765625, |
|
"logps/rejected": -361.8159484863281, |
|
"loss": 0.8212, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.2504210472106934, |
|
"rewards/margins": 2.31430721282959, |
|
"rewards/rejected": -4.564728736877441, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9733333333333334, |
|
"grad_norm": 13.960983276367188, |
|
"learning_rate": 2.8209958878663778e-05, |
|
"logits/chosen": 2.2462310791015625, |
|
"logits/rejected": 2.28263521194458, |
|
"logps/chosen": -465.261474609375, |
|
"logps/rejected": -421.1733093261719, |
|
"loss": 0.728, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.611042022705078, |
|
"rewards/margins": -0.04042929410934448, |
|
"rewards/rejected": -5.570612907409668, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 9.203478813171387, |
|
"learning_rate": 2.8027462131510208e-05, |
|
"logits/chosen": 1.9416842460632324, |
|
"logits/rejected": 1.7857491970062256, |
|
"logps/chosen": -345.6712646484375, |
|
"logps/rejected": -251.47076416015625, |
|
"loss": 0.8191, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.3168740272521973, |
|
"rewards/margins": -0.017238736152648926, |
|
"rewards/rejected": -3.299635410308838, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9822222222222222, |
|
"grad_norm": 1.7355222702026367, |
|
"learning_rate": 2.7844801506318617e-05, |
|
"logits/chosen": 2.240471363067627, |
|
"logits/rejected": 2.2063498497009277, |
|
"logps/chosen": -328.5857238769531, |
|
"logps/rejected": -420.4921875, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8121414184570312, |
|
"rewards/margins": 2.7087008953094482, |
|
"rewards/rejected": -4.5208420753479, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9866666666666667, |
|
"grad_norm": 1.7263237237930298, |
|
"learning_rate": 2.7661986890599943e-05, |
|
"logits/chosen": 1.7395219802856445, |
|
"logits/rejected": 1.784384846687317, |
|
"logps/chosen": -214.15451049804688, |
|
"logps/rejected": -284.48638916015625, |
|
"loss": 0.3769, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4917289614677429, |
|
"rewards/margins": 2.53422212600708, |
|
"rewards/rejected": -3.0259511470794678, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9911111111111112, |
|
"grad_norm": 2.090808629989624, |
|
"learning_rate": 2.747902818020067e-05, |
|
"logits/chosen": 2.032662868499756, |
|
"logits/rejected": 1.8991634845733643, |
|
"logps/chosen": -398.57904052734375, |
|
"logps/rejected": -394.4957275390625, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1145386695861816, |
|
"rewards/margins": 2.4051780700683594, |
|
"rewards/rejected": -5.519716739654541, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 0.48346611857414246, |
|
"learning_rate": 2.7295935278767233e-05, |
|
"logits/chosen": 2.2755024433135986, |
|
"logits/rejected": 2.3426759243011475, |
|
"logps/chosen": -392.48272705078125, |
|
"logps/rejected": -447.3723449707031, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.353735327720642, |
|
"rewards/margins": 4.4421586990356445, |
|
"rewards/rejected": -5.795893669128418, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 19.339488983154297, |
|
"learning_rate": 2.711271809720986e-05, |
|
"logits/chosen": 2.270242214202881, |
|
"logits/rejected": 2.0706443786621094, |
|
"logps/chosen": -485.1645202636719, |
|
"logps/rejected": -381.00018310546875, |
|
"loss": 1.2152, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.213038682937622, |
|
"rewards/margins": -0.8432999849319458, |
|
"rewards/rejected": -2.369738817214966, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": 2.1288914680480957, |
|
"eval_logits/rejected": 2.083587408065796, |
|
"eval_logps/chosen": -313.3813781738281, |
|
"eval_logps/rejected": -365.7991027832031, |
|
"eval_loss": 0.4423667788505554, |
|
"eval_rewards/accuracies": 0.8214285969734192, |
|
"eval_rewards/chosen": -2.3088743686676025, |
|
"eval_rewards/margins": 2.689358949661255, |
|
"eval_rewards/rejected": -4.998233318328857, |
|
"eval_runtime": 17.388, |
|
"eval_samples_per_second": 2.876, |
|
"eval_steps_per_second": 0.403, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0044444444444445, |
|
"grad_norm": 1.6202038526535034, |
|
"learning_rate": 2.6929386553166164e-05, |
|
"logits/chosen": 2.034777879714966, |
|
"logits/rejected": 1.9399088621139526, |
|
"logps/chosen": -270.7170104980469, |
|
"logps/rejected": -321.8533020019531, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10045319050550461, |
|
"rewards/margins": 4.125720977783203, |
|
"rewards/rejected": -4.226174354553223, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.008888888888889, |
|
"grad_norm": 0.899365246295929, |
|
"learning_rate": 2.6745950570464212e-05, |
|
"logits/chosen": 1.856791377067566, |
|
"logits/rejected": 1.838975429534912, |
|
"logps/chosen": -187.313720703125, |
|
"logps/rejected": -206.15573120117188, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18259316682815552, |
|
"rewards/margins": 2.7951650619506836, |
|
"rewards/rejected": -2.9777581691741943, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.0133333333333334, |
|
"grad_norm": 24.894893646240234, |
|
"learning_rate": 2.6562420078585433e-05, |
|
"logits/chosen": 2.2783782482147217, |
|
"logits/rejected": 2.3907415866851807, |
|
"logps/chosen": -562.4542846679688, |
|
"logps/rejected": -450.45098876953125, |
|
"loss": 2.5241, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.394607543945312, |
|
"rewards/margins": -0.29980039596557617, |
|
"rewards/rejected": -8.094807624816895, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.0177777777777777, |
|
"grad_norm": 10.114426612854004, |
|
"learning_rate": 2.637880501212705e-05, |
|
"logits/chosen": 2.3603391647338867, |
|
"logits/rejected": 2.353978157043457, |
|
"logps/chosen": -342.9703674316406, |
|
"logps/rejected": -352.3290710449219, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0490471124649048, |
|
"rewards/margins": 2.3047902584075928, |
|
"rewards/rejected": -3.353837490081787, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.0222222222222221, |
|
"grad_norm": 0.2897517681121826, |
|
"learning_rate": 2.619511531026436e-05, |
|
"logits/chosen": 2.2957711219787598, |
|
"logits/rejected": 2.2823054790496826, |
|
"logps/chosen": -380.21844482421875, |
|
"logps/rejected": -504.15045166015625, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6248611211776733, |
|
"rewards/margins": 6.897383213043213, |
|
"rewards/rejected": -7.522244453430176, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0266666666666666, |
|
"grad_norm": 1.0437899827957153, |
|
"learning_rate": 2.6011360916212734e-05, |
|
"logits/chosen": 2.2502756118774414, |
|
"logits/rejected": 2.169267177581787, |
|
"logps/chosen": -248.31495666503906, |
|
"logps/rejected": -254.9483642578125, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.568634033203125, |
|
"rewards/margins": 2.8449950218200684, |
|
"rewards/rejected": -2.2763609886169434, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.031111111111111, |
|
"grad_norm": 2.103546142578125, |
|
"learning_rate": 2.5827551776689323e-05, |
|
"logits/chosen": 1.5411741733551025, |
|
"logits/rejected": 1.4888989925384521, |
|
"logps/chosen": -151.44955444335938, |
|
"logps/rejected": -160.1112518310547, |
|
"loss": 0.14, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6025131344795227, |
|
"rewards/margins": 2.4829578399658203, |
|
"rewards/rejected": -3.0854709148406982, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.0355555555555556, |
|
"grad_norm": 2.739473581314087, |
|
"learning_rate": 2.564369784137472e-05, |
|
"logits/chosen": 1.7263026237487793, |
|
"logits/rejected": 1.783945083618164, |
|
"logps/chosen": -234.08197021484375, |
|
"logps/rejected": -273.28668212890625, |
|
"loss": 0.2093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8093013763427734, |
|
"rewards/margins": 2.4584481716156006, |
|
"rewards/rejected": -3.267749547958374, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 3.7832846641540527, |
|
"learning_rate": 2.54598090623743e-05, |
|
"logits/chosen": 1.8052504062652588, |
|
"logits/rejected": 1.766016960144043, |
|
"logps/chosen": -296.1597900390625, |
|
"logps/rejected": -256.9058532714844, |
|
"loss": 0.3296, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5597388744354248, |
|
"rewards/margins": 0.9551147222518921, |
|
"rewards/rejected": -2.5148537158966064, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.0444444444444445, |
|
"grad_norm": 2.446428060531616, |
|
"learning_rate": 2.527589539367956e-05, |
|
"logits/chosen": 2.325028896331787, |
|
"logits/rejected": 2.232288360595703, |
|
"logps/chosen": -376.904052734375, |
|
"logps/rejected": -400.5955505371094, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.687849521636963, |
|
"rewards/margins": 3.5299012660980225, |
|
"rewards/rejected": -7.217750549316406, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.048888888888889, |
|
"grad_norm": 0.11488201469182968, |
|
"learning_rate": 2.50919667906293e-05, |
|
"logits/chosen": 1.7712184190750122, |
|
"logits/rejected": 1.815232515335083, |
|
"logps/chosen": -246.863037109375, |
|
"logps/rejected": -351.25238037109375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2323249876499176, |
|
"rewards/margins": 5.702582359313965, |
|
"rewards/rejected": -5.93490743637085, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0533333333333332, |
|
"grad_norm": 0.46934670209884644, |
|
"learning_rate": 2.4908033209370705e-05, |
|
"logits/chosen": 2.1205997467041016, |
|
"logits/rejected": 2.0082976818084717, |
|
"logps/chosen": -443.21771240234375, |
|
"logps/rejected": -437.1765441894531, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.834454357624054, |
|
"rewards/margins": 6.396831512451172, |
|
"rewards/rejected": -7.23128604888916, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0577777777777777, |
|
"grad_norm": 0.6756075620651245, |
|
"learning_rate": 2.4724104606320445e-05, |
|
"logits/chosen": 2.1717934608459473, |
|
"logits/rejected": 2.1605606079101562, |
|
"logps/chosen": -349.8464050292969, |
|
"logps/rejected": -456.8037109375, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42097780108451843, |
|
"rewards/margins": 6.1533660888671875, |
|
"rewards/rejected": -6.574343681335449, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0622222222222222, |
|
"grad_norm": 0.5172697901725769, |
|
"learning_rate": 2.4540190937625708e-05, |
|
"logits/chosen": 2.2337419986724854, |
|
"logits/rejected": 2.2302417755126953, |
|
"logps/chosen": -269.57037353515625, |
|
"logps/rejected": -465.9024658203125, |
|
"loss": 0.0192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4147706031799316, |
|
"rewards/margins": 4.075361728668213, |
|
"rewards/rejected": -5.4901323318481445, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.2905597984790802, |
|
"learning_rate": 2.4356302158625288e-05, |
|
"logits/chosen": 2.1833200454711914, |
|
"logits/rejected": 2.207943916320801, |
|
"logps/chosen": -326.02178955078125, |
|
"logps/rejected": -411.3348388671875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16143493354320526, |
|
"rewards/margins": 5.066305637359619, |
|
"rewards/rejected": -4.904870510101318, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0711111111111111, |
|
"grad_norm": 0.2205185443162918, |
|
"learning_rate": 2.4172448223310682e-05, |
|
"logits/chosen": 1.7023603916168213, |
|
"logits/rejected": 1.5953627824783325, |
|
"logps/chosen": -170.35665893554688, |
|
"logps/rejected": -241.34259033203125, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1372658014297485, |
|
"rewards/margins": 4.4668803215026855, |
|
"rewards/rejected": -5.6041460037231445, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.0755555555555556, |
|
"grad_norm": 0.5335175395011902, |
|
"learning_rate": 2.3988639083787272e-05, |
|
"logits/chosen": 1.9782161712646484, |
|
"logits/rejected": 1.9465917348861694, |
|
"logps/chosen": -299.92816162109375, |
|
"logps/rejected": -304.3218994140625, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17397230863571167, |
|
"rewards/margins": 3.7744734287261963, |
|
"rewards/rejected": -3.600501298904419, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 9.141175270080566, |
|
"learning_rate": 2.3804884689735642e-05, |
|
"logits/chosen": 2.1661500930786133, |
|
"logits/rejected": 2.1720974445343018, |
|
"logps/chosen": -267.519775390625, |
|
"logps/rejected": -319.2179260253906, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1500840187072754, |
|
"rewards/margins": 3.123685598373413, |
|
"rewards/rejected": -4.273769378662109, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.0844444444444445, |
|
"grad_norm": 0.07122190296649933, |
|
"learning_rate": 2.3621194987872955e-05, |
|
"logits/chosen": 2.0959739685058594, |
|
"logits/rejected": 1.9889600276947021, |
|
"logps/chosen": -352.41363525390625, |
|
"logps/rejected": -451.14453125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7250168323516846, |
|
"rewards/margins": 7.163854598999023, |
|
"rewards/rejected": -6.438838005065918, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.0888888888888888, |
|
"grad_norm": 6.129761695861816, |
|
"learning_rate": 2.3437579921414573e-05, |
|
"logits/chosen": 2.127330780029297, |
|
"logits/rejected": 1.9886295795440674, |
|
"logps/chosen": -403.7293701171875, |
|
"logps/rejected": -486.6906433105469, |
|
"loss": 0.2336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07745209336280823, |
|
"rewards/margins": 5.993661403656006, |
|
"rewards/rejected": -6.071113586425781, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.0933333333333333, |
|
"grad_norm": 32.1196174621582, |
|
"learning_rate": 2.325404942953579e-05, |
|
"logits/chosen": 1.87540864944458, |
|
"logits/rejected": 1.9732717275619507, |
|
"logps/chosen": -407.6719665527344, |
|
"logps/rejected": -368.49078369140625, |
|
"loss": 5.0416, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -7.963120937347412, |
|
"rewards/margins": -4.95430850982666, |
|
"rewards/rejected": -3.00881290435791, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.0977777777777777, |
|
"grad_norm": 0.005643940530717373, |
|
"learning_rate": 2.3070613446833842e-05, |
|
"logits/chosen": 2.0901176929473877, |
|
"logits/rejected": 2.1729612350463867, |
|
"logps/chosen": -399.19500732421875, |
|
"logps/rejected": -582.9959716796875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.139540195465088, |
|
"rewards/margins": 9.500133514404297, |
|
"rewards/rejected": -11.639673233032227, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.1022222222222222, |
|
"grad_norm": 13.798872947692871, |
|
"learning_rate": 2.288728190279014e-05, |
|
"logits/chosen": 2.2860894203186035, |
|
"logits/rejected": 2.2638816833496094, |
|
"logps/chosen": -482.7290954589844, |
|
"logps/rejected": -440.7649230957031, |
|
"loss": 1.9496, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.7536072731018066, |
|
"rewards/margins": 3.122256278991699, |
|
"rewards/rejected": -5.875863552093506, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.1066666666666667, |
|
"grad_norm": 0.43348434567451477, |
|
"learning_rate": 2.270406472123277e-05, |
|
"logits/chosen": 2.0960116386413574, |
|
"logits/rejected": 2.1119589805603027, |
|
"logps/chosen": -236.1478271484375, |
|
"logps/rejected": -304.01947021484375, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2858787775039673, |
|
"rewards/margins": 3.6734910011291504, |
|
"rewards/rejected": -3.959369659423828, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 3.9212396144866943, |
|
"learning_rate": 2.2520971819799328e-05, |
|
"logits/chosen": 2.0135841369628906, |
|
"logits/rejected": 1.9526537656784058, |
|
"logps/chosen": -181.21868896484375, |
|
"logps/rejected": -253.9673614501953, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40475767850875854, |
|
"rewards/margins": 1.227912187576294, |
|
"rewards/rejected": -1.6326699256896973, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1155555555555556, |
|
"grad_norm": 1.6736501455307007, |
|
"learning_rate": 2.2338013109400056e-05, |
|
"logits/chosen": 2.3246517181396484, |
|
"logits/rejected": 2.323885679244995, |
|
"logps/chosen": -365.00140380859375, |
|
"logps/rejected": -421.6229553222656, |
|
"loss": 0.0485, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9462188482284546, |
|
"rewards/margins": 4.564491271972656, |
|
"rewards/rejected": -3.618272542953491, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 20.39493179321289, |
|
"learning_rate": 2.215519849368138e-05, |
|
"logits/chosen": 2.3385281562805176, |
|
"logits/rejected": 2.2685837745666504, |
|
"logps/chosen": -561.4131469726562, |
|
"logps/rejected": -440.8316955566406, |
|
"loss": 0.8448, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.6562395095825195, |
|
"rewards/margins": -0.15807795524597168, |
|
"rewards/rejected": -7.498161315917969, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.1244444444444444, |
|
"grad_norm": 0.006911400239914656, |
|
"learning_rate": 2.1972537868489797e-05, |
|
"logits/chosen": 2.0427744388580322, |
|
"logits/rejected": 2.068326473236084, |
|
"logps/chosen": -424.22393798828125, |
|
"logps/rejected": -637.6040649414062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.107600450515747, |
|
"rewards/margins": 11.686300277709961, |
|
"rewards/rejected": -13.793901443481445, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.1288888888888888, |
|
"grad_norm": 9.414591789245605, |
|
"learning_rate": 2.1790041121336225e-05, |
|
"logits/chosen": 1.8931891918182373, |
|
"logits/rejected": 1.7515565156936646, |
|
"logps/chosen": -474.2149353027344, |
|
"logps/rejected": -366.8948059082031, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.580873012542725, |
|
"rewards/margins": 0.6344245672225952, |
|
"rewards/rejected": -5.215297698974609, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.31954923272132874, |
|
"learning_rate": 2.1607718130860782e-05, |
|
"logits/chosen": 2.2394070625305176, |
|
"logits/rejected": 2.203941822052002, |
|
"logps/chosen": -317.05987548828125, |
|
"logps/rejected": -365.3947448730469, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9734389781951904, |
|
"rewards/margins": 4.665194988250732, |
|
"rewards/rejected": -5.638634204864502, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 0.7436378598213196, |
|
"learning_rate": 2.142557876629805e-05, |
|
"logits/chosen": 2.0395288467407227, |
|
"logits/rejected": 2.0879290103912354, |
|
"logps/chosen": -368.682373046875, |
|
"logps/rejected": -483.10504150390625, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.940394639968872, |
|
"rewards/margins": 4.465216636657715, |
|
"rewards/rejected": -7.405611038208008, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.1422222222222222, |
|
"grad_norm": 0.5857129096984863, |
|
"learning_rate": 2.124363288694285e-05, |
|
"logits/chosen": 2.189476251602173, |
|
"logits/rejected": 2.2042782306671143, |
|
"logps/chosen": -297.9048767089844, |
|
"logps/rejected": -462.4341125488281, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4767074584960938, |
|
"rewards/margins": 5.004980564117432, |
|
"rewards/rejected": -7.481688022613525, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.1466666666666667, |
|
"grad_norm": 1.8212025165557861, |
|
"learning_rate": 2.1061890341616558e-05, |
|
"logits/chosen": 2.0773448944091797, |
|
"logits/rejected": 1.953477144241333, |
|
"logps/chosen": -466.6849670410156, |
|
"logps/rejected": -428.0595703125, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.071170330047607, |
|
"rewards/margins": 3.6165356636047363, |
|
"rewards/rejected": -8.687705993652344, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.1511111111111112, |
|
"grad_norm": 0.49154847860336304, |
|
"learning_rate": 2.0880360968133954e-05, |
|
"logits/chosen": 1.9941173791885376, |
|
"logits/rejected": 1.903878927230835, |
|
"logps/chosen": -443.1854553222656, |
|
"logps/rejected": -469.02886962890625, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.136099338531494, |
|
"rewards/margins": 5.983541965484619, |
|
"rewards/rejected": -8.119641304016113, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.1555555555555554, |
|
"grad_norm": 0.7263330817222595, |
|
"learning_rate": 2.0699054592770737e-05, |
|
"logits/chosen": 2.341273307800293, |
|
"logits/rejected": 2.2972702980041504, |
|
"logps/chosen": -383.4603271484375, |
|
"logps/rejected": -436.1636962890625, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9301009774208069, |
|
"rewards/margins": 4.749177932739258, |
|
"rewards/rejected": -5.67927885055542, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.4579602479934692, |
|
"learning_rate": 2.0517981029731616e-05, |
|
"logits/chosen": 2.224546432495117, |
|
"logits/rejected": 2.069706916809082, |
|
"logps/chosen": -441.466064453125, |
|
"logps/rejected": -530.3856201171875, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12378081679344177, |
|
"rewards/margins": 5.265193462371826, |
|
"rewards/rejected": -5.388974189758301, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.1644444444444444, |
|
"grad_norm": 0.380569189786911, |
|
"learning_rate": 2.0337150080619033e-05, |
|
"logits/chosen": 1.961578130722046, |
|
"logits/rejected": 1.9327723979949951, |
|
"logps/chosen": -419.30328369140625, |
|
"logps/rejected": -416.2418212890625, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4250718355178833, |
|
"rewards/margins": 6.134527683258057, |
|
"rewards/rejected": -7.55959939956665, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.1688888888888889, |
|
"grad_norm": 13.600119590759277, |
|
"learning_rate": 2.0156571533902627e-05, |
|
"logits/chosen": 1.979763388633728, |
|
"logits/rejected": 1.8556675910949707, |
|
"logps/chosen": -295.2734069824219, |
|
"logps/rejected": -243.45123291015625, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.295403242111206, |
|
"rewards/margins": 0.9111607074737549, |
|
"rewards/rejected": -4.206563949584961, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.1733333333333333, |
|
"grad_norm": 16.93592071533203, |
|
"learning_rate": 1.997625516438937e-05, |
|
"logits/chosen": 2.4186158180236816, |
|
"logits/rejected": 2.3498375415802, |
|
"logps/chosen": -611.62841796875, |
|
"logps/rejected": -590.39306640625, |
|
"loss": 0.8758, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -9.509602546691895, |
|
"rewards/margins": 1.1703383922576904, |
|
"rewards/rejected": -10.679941177368164, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.1777777777777778, |
|
"grad_norm": 2.826754331588745, |
|
"learning_rate": 1.9796210732694442e-05, |
|
"logits/chosen": 1.9747998714447021, |
|
"logits/rejected": 1.9943749904632568, |
|
"logps/chosen": -265.30975341796875, |
|
"logps/rejected": -372.72296142578125, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4182487726211548, |
|
"rewards/margins": 4.4431986808776855, |
|
"rewards/rejected": -5.861447334289551, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.1822222222222223, |
|
"grad_norm": 5.334752082824707, |
|
"learning_rate": 1.9616447984712914e-05, |
|
"logits/chosen": 1.6797964572906494, |
|
"logits/rejected": 1.7042231559753418, |
|
"logps/chosen": -150.200439453125, |
|
"logps/rejected": -205.3989715576172, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8328521251678467, |
|
"rewards/margins": 1.797693133354187, |
|
"rewards/rejected": -3.630545139312744, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.1866666666666668, |
|
"grad_norm": 4.4336323738098145, |
|
"learning_rate": 1.9436976651092144e-05, |
|
"logits/chosen": 1.883796215057373, |
|
"logits/rejected": 1.7938141822814941, |
|
"logps/chosen": -158.01724243164062, |
|
"logps/rejected": -183.39508056640625, |
|
"loss": 0.3805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2773910462856293, |
|
"rewards/margins": 0.9651764035224915, |
|
"rewards/rejected": -0.6877853870391846, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.1911111111111112, |
|
"grad_norm": 9.777835845947266, |
|
"learning_rate": 1.9257806446705116e-05, |
|
"logits/chosen": 2.2978739738464355, |
|
"logits/rejected": 2.2143282890319824, |
|
"logps/chosen": -346.0021667480469, |
|
"logps/rejected": -279.994384765625, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.30205535888671875, |
|
"rewards/margins": 2.2577598094940186, |
|
"rewards/rejected": -2.5598151683807373, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.1955555555555555, |
|
"grad_norm": 0.7445770502090454, |
|
"learning_rate": 1.9078947070124523e-05, |
|
"logits/chosen": 1.995645523071289, |
|
"logits/rejected": 2.0005688667297363, |
|
"logps/chosen": -301.2586669921875, |
|
"logps/rejected": -353.81927490234375, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5910667181015015, |
|
"rewards/margins": 3.9083282947540283, |
|
"rewards/rejected": -4.499395370483398, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 7.762486457824707, |
|
"learning_rate": 1.8900408203097787e-05, |
|
"logits/chosen": 1.905322551727295, |
|
"logits/rejected": 1.7590628862380981, |
|
"logps/chosen": -250.0103759765625, |
|
"logps/rejected": -283.5964050292969, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9317550659179688, |
|
"rewards/margins": 3.9607110023498535, |
|
"rewards/rejected": -5.892466068267822, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": 2.1097989082336426, |
|
"eval_logits/rejected": 2.0639405250549316, |
|
"eval_logps/chosen": -318.4396057128906, |
|
"eval_logps/rejected": -374.51708984375, |
|
"eval_loss": 0.4267149865627289, |
|
"eval_rewards/accuracies": 0.8214285969734192, |
|
"eval_rewards/chosen": -2.8146941661834717, |
|
"eval_rewards/margins": 3.0553336143493652, |
|
"eval_rewards/rejected": -5.8700270652771, |
|
"eval_runtime": 17.407, |
|
"eval_samples_per_second": 2.872, |
|
"eval_steps_per_second": 0.402, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2044444444444444, |
|
"grad_norm": 1.0685392618179321, |
|
"learning_rate": 1.8722199510023012e-05, |
|
"logits/chosen": 2.049793004989624, |
|
"logits/rejected": 2.014737606048584, |
|
"logps/chosen": -357.0670166015625, |
|
"logps/rejected": -461.2732238769531, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.979574680328369, |
|
"rewards/margins": 3.4011411666870117, |
|
"rewards/rejected": -8.380716323852539, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.208888888888889, |
|
"grad_norm": 0.4621816575527191, |
|
"learning_rate": 1.854433063742579e-05, |
|
"logits/chosen": 2.1473259925842285, |
|
"logits/rejected": 2.1311964988708496, |
|
"logps/chosen": -233.38954162597656, |
|
"logps/rejected": -311.93170166015625, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1365326642990112, |
|
"rewards/margins": 3.9022364616394043, |
|
"rewards/rejected": -2.7657036781311035, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.2133333333333334, |
|
"grad_norm": 3.8542513847351074, |
|
"learning_rate": 1.8366811213437092e-05, |
|
"logits/chosen": 2.036423921585083, |
|
"logits/rejected": 2.026139974594116, |
|
"logps/chosen": -301.99395751953125, |
|
"logps/rejected": -330.7635498046875, |
|
"loss": 0.1548, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5693366527557373, |
|
"rewards/margins": 1.7907044887542725, |
|
"rewards/rejected": -3.3600411415100098, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.2177777777777778, |
|
"grad_norm": 0.18561817705631256, |
|
"learning_rate": 1.8189650847272037e-05, |
|
"logits/chosen": 2.12514066696167, |
|
"logits/rejected": 2.1645846366882324, |
|
"logps/chosen": -372.3150939941406, |
|
"logps/rejected": -337.253173828125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3940017223358154, |
|
"rewards/margins": 5.315286159515381, |
|
"rewards/rejected": -8.709287643432617, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 9.204042434692383, |
|
"learning_rate": 1.8012859128709766e-05, |
|
"logits/chosen": 1.8135225772857666, |
|
"logits/rejected": 1.8564603328704834, |
|
"logps/chosen": -192.2001953125, |
|
"logps/rejected": -243.14703369140625, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7113640308380127, |
|
"rewards/margins": 0.32638704776763916, |
|
"rewards/rejected": -1.0377510786056519, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2266666666666666, |
|
"grad_norm": 0.10035301744937897, |
|
"learning_rate": 1.783644562757436e-05, |
|
"logits/chosen": 2.467790126800537, |
|
"logits/rejected": 2.2855076789855957, |
|
"logps/chosen": -284.48785400390625, |
|
"logps/rejected": -418.7071533203125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9228286743164062, |
|
"rewards/margins": 6.6048784255981445, |
|
"rewards/rejected": -5.682049751281738, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.231111111111111, |
|
"grad_norm": 0.995993435382843, |
|
"learning_rate": 1.7660419893216785e-05, |
|
"logits/chosen": 2.3109967708587646, |
|
"logits/rejected": 2.2218995094299316, |
|
"logps/chosen": -331.196044921875, |
|
"logps/rejected": -288.77044677734375, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.583350419998169, |
|
"rewards/margins": 3.377068519592285, |
|
"rewards/rejected": -2.793717861175537, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.2355555555555555, |
|
"grad_norm": 0.09387421607971191, |
|
"learning_rate": 1.7484791453998006e-05, |
|
"logits/chosen": 2.2291605472564697, |
|
"logits/rejected": 2.205029249191284, |
|
"logps/chosen": -329.593994140625, |
|
"logps/rejected": -481.59332275390625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18573302030563354, |
|
"rewards/margins": 7.1214494705200195, |
|
"rewards/rejected": -7.307182312011719, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.2010304927825928, |
|
"learning_rate": 1.7309569816773193e-05, |
|
"logits/chosen": 1.4943195581436157, |
|
"logits/rejected": 1.5665395259857178, |
|
"logps/chosen": -118.16926574707031, |
|
"logps/rejected": -229.98049926757812, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5613498687744141, |
|
"rewards/margins": 3.4735331535339355, |
|
"rewards/rejected": -2.9121835231781006, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 9.927587509155273, |
|
"learning_rate": 1.7134764466377136e-05, |
|
"logits/chosen": 2.1885547637939453, |
|
"logits/rejected": 2.239022731781006, |
|
"logps/chosen": -398.05059814453125, |
|
"logps/rejected": -413.83624267578125, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5941482782363892, |
|
"rewards/margins": 0.6228576898574829, |
|
"rewards/rejected": -1.217005968093872, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.248888888888889, |
|
"grad_norm": 5.716561794281006, |
|
"learning_rate": 1.69603848651108e-05, |
|
"logits/chosen": 1.9646629095077515, |
|
"logits/rejected": 1.9000844955444336, |
|
"logps/chosen": -269.88067626953125, |
|
"logps/rejected": -253.2309112548828, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.193873643875122, |
|
"rewards/margins": 1.1279609203338623, |
|
"rewards/rejected": -3.3218345642089844, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.2533333333333334, |
|
"grad_norm": 0.3801816999912262, |
|
"learning_rate": 1.6786440452229134e-05, |
|
"logits/chosen": 2.2147722244262695, |
|
"logits/rejected": 2.1242868900299072, |
|
"logps/chosen": -451.5334167480469, |
|
"logps/rejected": -462.0749206542969, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0167465209960938, |
|
"rewards/margins": 5.644117832183838, |
|
"rewards/rejected": -8.660863876342773, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.2577777777777777, |
|
"grad_norm": 0.1913336217403412, |
|
"learning_rate": 1.6612940643430138e-05, |
|
"logits/chosen": 2.109816551208496, |
|
"logits/rejected": 2.2030255794525146, |
|
"logps/chosen": -255.25289916992188, |
|
"logps/rejected": -452.1776123046875, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.056928277015686, |
|
"rewards/margins": 7.02421760559082, |
|
"rewards/rejected": -8.081146240234375, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.2622222222222224, |
|
"grad_norm": 12.739026069641113, |
|
"learning_rate": 1.6439894830345143e-05, |
|
"logits/chosen": 1.5907762050628662, |
|
"logits/rejected": 1.58561372756958, |
|
"logps/chosen": -197.10760498046875, |
|
"logps/rejected": -260.8538818359375, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.7191286087036133, |
|
"rewards/margins": 1.830248236656189, |
|
"rewards/rejected": -4.549376487731934, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 1.540250539779663, |
|
"learning_rate": 1.6267312380030506e-05, |
|
"logits/chosen": 2.0246505737304688, |
|
"logits/rejected": 1.983656883239746, |
|
"logps/chosen": -283.9218444824219, |
|
"logps/rejected": -365.05535888671875, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7960922122001648, |
|
"rewards/margins": 5.497363567352295, |
|
"rewards/rejected": -6.293455600738525, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.271111111111111, |
|
"grad_norm": 3.0418801307678223, |
|
"learning_rate": 1.609520263446049e-05, |
|
"logits/chosen": 2.055178165435791, |
|
"logits/rejected": 2.165806293487549, |
|
"logps/chosen": -276.8818054199219, |
|
"logps/rejected": -422.906005859375, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.977742910385132, |
|
"rewards/margins": 3.4164116382598877, |
|
"rewards/rejected": -6.3941545486450195, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.2755555555555556, |
|
"grad_norm": 1.8675150871276855, |
|
"learning_rate": 1.5923574910021624e-05, |
|
"logits/chosen": 1.7561115026474, |
|
"logits/rejected": 1.7464289665222168, |
|
"logps/chosen": -180.07662963867188, |
|
"logps/rejected": -193.69613647460938, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2910804748535156, |
|
"rewards/margins": 3.408679246902466, |
|
"rewards/rejected": -3.6997597217559814, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 11.50502872467041, |
|
"learning_rate": 1.5752438497008405e-05, |
|
"logits/chosen": 1.795478343963623, |
|
"logits/rejected": 1.8433971405029297, |
|
"logps/chosen": -314.85888671875, |
|
"logps/rejected": -392.9442138671875, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7009613513946533, |
|
"rewards/margins": 4.940203666687012, |
|
"rewards/rejected": -6.641165256500244, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.2844444444444445, |
|
"grad_norm": 0.6544823050498962, |
|
"learning_rate": 1.558180265912037e-05, |
|
"logits/chosen": 2.1740856170654297, |
|
"logits/rejected": 2.0556159019470215, |
|
"logps/chosen": -291.9125671386719, |
|
"logps/rejected": -370.18865966796875, |
|
"loss": 0.021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0329957008361816, |
|
"rewards/margins": 4.283731460571289, |
|
"rewards/rejected": -5.3167266845703125, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.2888888888888888, |
|
"grad_norm": 1.4096264839172363, |
|
"learning_rate": 1.5411676632960713e-05, |
|
"logits/chosen": 1.9917266368865967, |
|
"logits/rejected": 2.0029103755950928, |
|
"logps/chosen": -215.18414306640625, |
|
"logps/rejected": -224.6077117919922, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.029798150062561, |
|
"rewards/margins": 3.0806381702423096, |
|
"rewards/rejected": -2.050839900970459, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2933333333333334, |
|
"grad_norm": 3.500157117843628, |
|
"learning_rate": 1.5242069627536225e-05, |
|
"logits/chosen": 2.0910866260528564, |
|
"logits/rejected": 2.094388484954834, |
|
"logps/chosen": -268.5745544433594, |
|
"logps/rejected": -347.7729797363281, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5284897089004517, |
|
"rewards/margins": 1.9156463146209717, |
|
"rewards/rejected": -2.444136142730713, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.2977777777777777, |
|
"grad_norm": 7.339416027069092, |
|
"learning_rate": 1.5072990823758871e-05, |
|
"logits/chosen": 2.043335437774658, |
|
"logits/rejected": 1.9694390296936035, |
|
"logps/chosen": -267.2714538574219, |
|
"logps/rejected": -318.1759338378906, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.948584794998169, |
|
"rewards/margins": 0.3823028802871704, |
|
"rewards/rejected": -2.33088755607605, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.3022222222222222, |
|
"grad_norm": 2.8766441345214844, |
|
"learning_rate": 1.490444937394879e-05, |
|
"logits/chosen": 1.6397064924240112, |
|
"logits/rejected": 1.58877432346344, |
|
"logps/chosen": -206.1599578857422, |
|
"logps/rejected": -258.51251220703125, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5400612354278564, |
|
"rewards/margins": 1.5494178533554077, |
|
"rewards/rejected": -2.0894789695739746, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.3066666666666666, |
|
"grad_norm": 0.5003223419189453, |
|
"learning_rate": 1.4736454401338872e-05, |
|
"logits/chosen": 2.242143154144287, |
|
"logits/rejected": 2.218358039855957, |
|
"logps/chosen": -492.30926513671875, |
|
"logps/rejected": -563.084716796875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.345100402832031, |
|
"rewards/margins": 8.439008712768555, |
|
"rewards/rejected": -13.784109115600586, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3111111111111111, |
|
"grad_norm": 9.198110580444336, |
|
"learning_rate": 1.4569014999580937e-05, |
|
"logits/chosen": 2.083486795425415, |
|
"logits/rejected": 2.0191855430603027, |
|
"logps/chosen": -514.994384765625, |
|
"logps/rejected": -621.652587890625, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.739448547363281, |
|
"rewards/margins": 2.7005691528320312, |
|
"rewards/rejected": -11.440017700195312, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.3155555555555556, |
|
"grad_norm": 18.878862380981445, |
|
"learning_rate": 1.4402140232253486e-05, |
|
"logits/chosen": 2.4532670974731445, |
|
"logits/rejected": 2.3905553817749023, |
|
"logps/chosen": -382.56829833984375, |
|
"logps/rejected": -470.17926025390625, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.797785758972168, |
|
"rewards/margins": 4.899234771728516, |
|
"rewards/rejected": -8.697020530700684, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.9160619974136353, |
|
"learning_rate": 1.4235839132371038e-05, |
|
"logits/chosen": 2.1893036365509033, |
|
"logits/rejected": 2.259230136871338, |
|
"logps/chosen": -349.79779052734375, |
|
"logps/rejected": -420.09796142578125, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3005684614181519, |
|
"rewards/margins": 5.341613292694092, |
|
"rewards/rejected": -6.642181396484375, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.3244444444444445, |
|
"grad_norm": 1.0921589136123657, |
|
"learning_rate": 1.407012070189524e-05, |
|
"logits/chosen": 1.6390105485916138, |
|
"logits/rejected": 1.6804091930389404, |
|
"logps/chosen": -323.83782958984375, |
|
"logps/rejected": -502.33782958984375, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.742315769195557, |
|
"rewards/margins": 5.594854831695557, |
|
"rewards/rejected": -10.337170600891113, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.3288888888888888, |
|
"grad_norm": 0.014698788523674011, |
|
"learning_rate": 1.3904993911247561e-05, |
|
"logits/chosen": 2.3741211891174316, |
|
"logits/rejected": 2.227184295654297, |
|
"logps/chosen": -381.3302307128906, |
|
"logps/rejected": -436.189697265625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0916748046875, |
|
"rewards/margins": 8.013049125671387, |
|
"rewards/rejected": -7.921374797821045, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 12.252896308898926, |
|
"learning_rate": 1.3740467698823662e-05, |
|
"logits/chosen": 2.037738800048828, |
|
"logits/rejected": 1.979736566543579, |
|
"logps/chosen": -299.84228515625, |
|
"logps/rejected": -390.47296142578125, |
|
"loss": 0.8992, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.8193671703338623, |
|
"rewards/margins": 2.5206613540649414, |
|
"rewards/rejected": -6.340028762817383, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3377777777777777, |
|
"grad_norm": 12.054088592529297, |
|
"learning_rate": 1.3576550970509666e-05, |
|
"logits/chosen": 1.7213351726531982, |
|
"logits/rejected": 1.7385635375976562, |
|
"logps/chosen": -322.126708984375, |
|
"logps/rejected": -534.2659912109375, |
|
"loss": 0.7611, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.1543197631835938, |
|
"rewards/margins": 7.169881343841553, |
|
"rewards/rejected": -10.324201583862305, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.3422222222222222, |
|
"grad_norm": 1.8875739574432373, |
|
"learning_rate": 1.341325259919996e-05, |
|
"logits/chosen": 2.0664215087890625, |
|
"logits/rejected": 2.0522894859313965, |
|
"logps/chosen": -221.20150756835938, |
|
"logps/rejected": -283.83770751953125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.671368420124054, |
|
"rewards/margins": 2.8214011192321777, |
|
"rewards/rejected": -3.492769718170166, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.3466666666666667, |
|
"grad_norm": 11.853067398071289, |
|
"learning_rate": 1.325058142431701e-05, |
|
"logits/chosen": 1.6087274551391602, |
|
"logits/rejected": 1.6155368089675903, |
|
"logps/chosen": -160.04257202148438, |
|
"logps/rejected": -224.98550415039062, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0727493166923523, |
|
"rewards/margins": 2.172374725341797, |
|
"rewards/rejected": -2.245124101638794, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.3511111111111112, |
|
"grad_norm": 2.025550603866577, |
|
"learning_rate": 1.3088546251332772e-05, |
|
"logits/chosen": 1.9365671873092651, |
|
"logits/rejected": 1.9474778175354004, |
|
"logps/chosen": -421.8943786621094, |
|
"logps/rejected": -382.8135070800781, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.756591796875, |
|
"rewards/margins": 5.394895076751709, |
|
"rewards/rejected": -7.151486873626709, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.3555555555555556, |
|
"grad_norm": 0.24477213621139526, |
|
"learning_rate": 1.2927155851292145e-05, |
|
"logits/chosen": 1.8736495971679688, |
|
"logits/rejected": 1.8653249740600586, |
|
"logps/chosen": -244.5578155517578, |
|
"logps/rejected": -318.7236328125, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7046654224395752, |
|
"rewards/margins": 4.947877883911133, |
|
"rewards/rejected": -6.652543067932129, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 53.01292037963867, |
|
"learning_rate": 1.2766418960338128e-05, |
|
"logits/chosen": 2.017364025115967, |
|
"logits/rejected": 1.9819977283477783, |
|
"logps/chosen": -376.64581298828125, |
|
"logps/rejected": -330.441162109375, |
|
"loss": 2.673, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -9.418815612792969, |
|
"rewards/margins": -2.5403449535369873, |
|
"rewards/rejected": -6.878470420837402, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.3644444444444446, |
|
"grad_norm": 6.39654541015625, |
|
"learning_rate": 1.260634427923896e-05, |
|
"logits/chosen": 1.2513247728347778, |
|
"logits/rejected": 1.2842328548431396, |
|
"logps/chosen": -137.0913543701172, |
|
"logps/rejected": -154.89610290527344, |
|
"loss": 0.3182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5993289947509766, |
|
"rewards/margins": 2.3029263019561768, |
|
"rewards/rejected": -2.9022552967071533, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.3688888888888888, |
|
"grad_norm": 0.30323663353919983, |
|
"learning_rate": 1.2446940472917099e-05, |
|
"logits/chosen": 2.068608283996582, |
|
"logits/rejected": 2.0424392223358154, |
|
"logps/chosen": -323.708740234375, |
|
"logps/rejected": -347.22998046875, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0544846057891846, |
|
"rewards/margins": 4.629805088043213, |
|
"rewards/rejected": -6.684289932250977, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.3733333333333333, |
|
"grad_norm": 1.1456687450408936, |
|
"learning_rate": 1.2288216169980243e-05, |
|
"logits/chosen": 1.7509403228759766, |
|
"logits/rejected": 1.8240234851837158, |
|
"logps/chosen": -181.7119903564453, |
|
"logps/rejected": -259.93548583984375, |
|
"loss": 0.067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8926734924316406, |
|
"rewards/margins": 3.159923553466797, |
|
"rewards/rejected": -5.0525970458984375, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.3777777777777778, |
|
"grad_norm": 22.11775779724121, |
|
"learning_rate": 1.213017996225424e-05, |
|
"logits/chosen": 1.9033875465393066, |
|
"logits/rejected": 1.7332472801208496, |
|
"logps/chosen": -356.8746337890625, |
|
"logps/rejected": -316.99420166015625, |
|
"loss": 1.4625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.758178234100342, |
|
"rewards/margins": 3.5830559730529785, |
|
"rewards/rejected": -8.34123420715332, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3822222222222222, |
|
"grad_norm": 7.521746635437012, |
|
"learning_rate": 1.1972840404317961e-05, |
|
"logits/chosen": 2.111452102661133, |
|
"logits/rejected": 1.976496934890747, |
|
"logps/chosen": -428.1273193359375, |
|
"logps/rejected": -545.6732788085938, |
|
"loss": 0.1086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.942473411560059, |
|
"rewards/margins": 3.5422022342681885, |
|
"rewards/rejected": -14.484675407409668, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.3866666666666667, |
|
"grad_norm": 0.11550098657608032, |
|
"learning_rate": 1.1816206013040313e-05, |
|
"logits/chosen": 1.9739415645599365, |
|
"logits/rejected": 1.9305256605148315, |
|
"logps/chosen": -288.68017578125, |
|
"logps/rejected": -353.60418701171875, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.820286512374878, |
|
"rewards/margins": 5.378190040588379, |
|
"rewards/rejected": -7.198476791381836, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.3911111111111112, |
|
"grad_norm": 19.025943756103516, |
|
"learning_rate": 1.1660285267119167e-05, |
|
"logits/chosen": 2.469484806060791, |
|
"logits/rejected": 2.470264196395874, |
|
"logps/chosen": -707.8921508789062, |
|
"logps/rejected": -753.548828125, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.678134441375732, |
|
"rewards/margins": 6.853320121765137, |
|
"rewards/rejected": -13.531454086303711, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.3955555555555557, |
|
"grad_norm": 5.186333179473877, |
|
"learning_rate": 1.150508660662242e-05, |
|
"logits/chosen": 1.8456952571868896, |
|
"logits/rejected": 1.8528308868408203, |
|
"logps/chosen": -483.7690734863281, |
|
"logps/rejected": -454.6048583984375, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.390696048736572, |
|
"rewards/margins": 2.1679461002349854, |
|
"rewards/rejected": -9.558642387390137, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.9729480743408203, |
|
"learning_rate": 1.1350618432531098e-05, |
|
"logits/chosen": 1.75775945186615, |
|
"logits/rejected": 1.7443618774414062, |
|
"logps/chosen": -313.0162048339844, |
|
"logps/rejected": -333.1780090332031, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.031980991363525, |
|
"rewards/margins": 3.3457300662994385, |
|
"rewards/rejected": -8.377711296081543, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/chosen": 2.042860269546509, |
|
"eval_logits/rejected": 1.9980798959732056, |
|
"eval_logps/chosen": -329.1581726074219, |
|
"eval_logps/rejected": -387.9140930175781, |
|
"eval_loss": 0.446563184261322, |
|
"eval_rewards/accuracies": 0.8035714030265808, |
|
"eval_rewards/chosen": -3.886552572250366, |
|
"eval_rewards/margins": 3.3231773376464844, |
|
"eval_rewards/rejected": -7.2097296714782715, |
|
"eval_runtime": 17.4013, |
|
"eval_samples_per_second": 2.873, |
|
"eval_steps_per_second": 0.402, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.4044444444444444, |
|
"grad_norm": 11.154594421386719, |
|
"learning_rate": 1.1196889106284669e-05, |
|
"logits/chosen": 1.755511999130249, |
|
"logits/rejected": 1.762006163597107, |
|
"logps/chosen": -291.4748229980469, |
|
"logps/rejected": -280.0130615234375, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.212803840637207, |
|
"rewards/margins": 3.228024959564209, |
|
"rewards/rejected": -8.440828323364258, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.4088888888888889, |
|
"grad_norm": 1.097583532333374, |
|
"learning_rate": 1.1043906949328387e-05, |
|
"logits/chosen": 1.9886606931686401, |
|
"logits/rejected": 1.96701979637146, |
|
"logps/chosen": -252.5841827392578, |
|
"logps/rejected": -349.20745849609375, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4393486976623535, |
|
"rewards/margins": 3.46529221534729, |
|
"rewards/rejected": -5.904641151428223, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.4133333333333333, |
|
"grad_norm": 0.9752767086029053, |
|
"learning_rate": 1.0891680242662835e-05, |
|
"logits/chosen": 2.0138909816741943, |
|
"logits/rejected": 1.918421983718872, |
|
"logps/chosen": -300.9414978027344, |
|
"logps/rejected": -353.33404541015625, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.531005859375, |
|
"rewards/margins": 3.710988759994507, |
|
"rewards/rejected": -9.241994857788086, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.4177777777777778, |
|
"grad_norm": 16.44363784790039, |
|
"learning_rate": 1.0740217226395724e-05, |
|
"logits/chosen": 2.0399329662323, |
|
"logits/rejected": 1.9319369792938232, |
|
"logps/chosen": -391.0609130859375, |
|
"logps/rejected": -363.857177734375, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.746470928192139, |
|
"rewards/margins": 1.3634958267211914, |
|
"rewards/rejected": -6.10996675491333, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 33.33434295654297, |
|
"learning_rate": 1.0589526099295816e-05, |
|
"logits/chosen": 2.1048226356506348, |
|
"logits/rejected": 1.9768327474594116, |
|
"logps/chosen": -581.8522338867188, |
|
"logps/rejected": -463.3633728027344, |
|
"loss": 2.6066, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -9.7820405960083, |
|
"rewards/margins": -0.41506481170654297, |
|
"rewards/rejected": -9.366975784301758, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.4266666666666667, |
|
"grad_norm": 0.01582302711904049, |
|
"learning_rate": 1.0439615018349109e-05, |
|
"logits/chosen": 1.9011285305023193, |
|
"logits/rejected": 1.9159646034240723, |
|
"logps/chosen": -358.68157958984375, |
|
"logps/rejected": -534.3988037109375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9317246675491333, |
|
"rewards/margins": 9.084466934204102, |
|
"rewards/rejected": -11.016191482543945, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.431111111111111, |
|
"grad_norm": 6.929481506347656, |
|
"learning_rate": 1.029049209831733e-05, |
|
"logits/chosen": 1.9835437536239624, |
|
"logits/rejected": 1.9601792097091675, |
|
"logps/chosen": -285.4648132324219, |
|
"logps/rejected": -420.031005859375, |
|
"loss": 0.3011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0451195240020752, |
|
"rewards/margins": 8.181748390197754, |
|
"rewards/rejected": -9.22686767578125, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.4355555555555555, |
|
"grad_norm": 12.37388801574707, |
|
"learning_rate": 1.0142165411298662e-05, |
|
"logits/chosen": 2.131269693374634, |
|
"logits/rejected": 2.1731009483337402, |
|
"logps/chosen": -299.0472717285156, |
|
"logps/rejected": -404.6293029785156, |
|
"loss": 0.3707, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4244904518127441, |
|
"rewards/margins": 3.208432197570801, |
|
"rewards/rejected": -4.632922649383545, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.8624144196510315, |
|
"learning_rate": 9.994642986290797e-06, |
|
"logits/chosen": 2.057706832885742, |
|
"logits/rejected": 2.074605941772461, |
|
"logps/chosen": -324.3763427734375, |
|
"logps/rejected": -441.5903015136719, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.426907539367676, |
|
"rewards/margins": 5.264835834503174, |
|
"rewards/rejected": -8.691743850708008, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 5.4227118492126465, |
|
"learning_rate": 9.847932808756308e-06, |
|
"logits/chosen": 2.1998391151428223, |
|
"logits/rejected": 2.201568126678467, |
|
"logps/chosen": -312.435791015625, |
|
"logps/rejected": -431.78179931640625, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3775376081466675, |
|
"rewards/margins": 3.8356902599334717, |
|
"rewards/rejected": -5.213228225708008, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.448888888888889, |
|
"grad_norm": 2.2142040729522705, |
|
"learning_rate": 9.702042820190415e-06, |
|
"logits/chosen": 1.5558602809906006, |
|
"logits/rejected": 1.7116918563842773, |
|
"logps/chosen": -197.79367065429688, |
|
"logps/rejected": -246.2130126953125, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3813637495040894, |
|
"rewards/margins": 2.347956418991089, |
|
"rewards/rejected": -3.7293200492858887, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.4533333333333334, |
|
"grad_norm": 8.604325294494629, |
|
"learning_rate": 9.556980917691116e-06, |
|
"logits/chosen": 1.6613447666168213, |
|
"logits/rejected": 1.7617850303649902, |
|
"logps/chosen": -347.57830810546875, |
|
"logps/rejected": -387.5290222167969, |
|
"loss": 0.4048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3771653175354004, |
|
"rewards/margins": 0.8042678833007812, |
|
"rewards/rejected": -4.181433200836182, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.4577777777777778, |
|
"grad_norm": 0.06135905534029007, |
|
"learning_rate": 9.412754953531663e-06, |
|
"logits/chosen": 2.1208975315093994, |
|
"logits/rejected": 1.9472355842590332, |
|
"logps/chosen": -416.2113342285156, |
|
"logps/rejected": -469.04803466796875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4611542224884033, |
|
"rewards/margins": 6.7728729248046875, |
|
"rewards/rejected": -10.234027862548828, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.462222222222222, |
|
"grad_norm": 4.409871578216553, |
|
"learning_rate": 9.269372734735577e-06, |
|
"logits/chosen": 1.9324915409088135, |
|
"logits/rejected": 1.8667106628417969, |
|
"logps/chosen": -224.12960815429688, |
|
"logps/rejected": -257.76385498046875, |
|
"loss": 0.1307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.500388145446777, |
|
"rewards/margins": 2.151371955871582, |
|
"rewards/rejected": -6.651760101318359, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 40.46012496948242, |
|
"learning_rate": 9.126842022654003e-06, |
|
"logits/chosen": 2.013392686843872, |
|
"logits/rejected": 2.085439682006836, |
|
"logps/chosen": -343.09381103515625, |
|
"logps/rejected": -382.8746337890625, |
|
"loss": 1.476, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.1528754234313965, |
|
"rewards/margins": 1.4587280750274658, |
|
"rewards/rejected": -8.611603736877441, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.471111111111111, |
|
"grad_norm": 20.39794158935547, |
|
"learning_rate": 8.985170532545622e-06, |
|
"logits/chosen": 2.2019968032836914, |
|
"logits/rejected": 2.255478858947754, |
|
"logps/chosen": -461.25250244140625, |
|
"logps/rejected": -522.5560302734375, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.226644515991211, |
|
"rewards/margins": 1.7861032485961914, |
|
"rewards/rejected": -9.012747764587402, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.4755555555555555, |
|
"grad_norm": 0.8205786347389221, |
|
"learning_rate": 8.844365933158973e-06, |
|
"logits/chosen": 2.0666050910949707, |
|
"logits/rejected": 2.1347484588623047, |
|
"logps/chosen": -518.1484375, |
|
"logps/rejected": -621.5133666992188, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.620497703552246, |
|
"rewards/margins": 7.697022914886475, |
|
"rewards/rejected": -14.317520141601562, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 2.1303653717041016, |
|
"learning_rate": 8.704435846317386e-06, |
|
"logits/chosen": 1.9880008697509766, |
|
"logits/rejected": 2.010342836380005, |
|
"logps/chosen": -331.64874267578125, |
|
"logps/rejected": -382.2865905761719, |
|
"loss": 0.051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6854767799377441, |
|
"rewards/margins": 3.8334319591522217, |
|
"rewards/rejected": -5.518908500671387, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.4844444444444445, |
|
"grad_norm": 5.076652526855469, |
|
"learning_rate": 8.565387846506395e-06, |
|
"logits/chosen": 1.8777854442596436, |
|
"logits/rejected": 1.8597569465637207, |
|
"logps/chosen": -285.7859191894531, |
|
"logps/rejected": -302.11083984375, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.237748146057129, |
|
"rewards/margins": 1.4745651483535767, |
|
"rewards/rejected": -5.712313175201416, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.488888888888889, |
|
"grad_norm": 1.7670209407806396, |
|
"learning_rate": 8.427229460463696e-06, |
|
"logits/chosen": 2.0296010971069336, |
|
"logits/rejected": 2.046407461166382, |
|
"logps/chosen": -446.6829528808594, |
|
"logps/rejected": -450.63531494140625, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3315367698669434, |
|
"rewards/margins": 3.1756088733673096, |
|
"rewards/rejected": -5.507145881652832, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.4933333333333334, |
|
"grad_norm": 7.628573894500732, |
|
"learning_rate": 8.28996816677177e-06, |
|
"logits/chosen": 1.8448824882507324, |
|
"logits/rejected": 1.8389427661895752, |
|
"logps/chosen": -414.1676025390625, |
|
"logps/rejected": -435.0848083496094, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.346246242523193, |
|
"rewards/margins": 4.283938407897949, |
|
"rewards/rejected": -9.6301851272583, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.4977777777777779, |
|
"grad_norm": 5.303137302398682, |
|
"learning_rate": 8.153611395453045e-06, |
|
"logits/chosen": 1.9505963325500488, |
|
"logits/rejected": 1.9452285766601562, |
|
"logps/chosen": -294.60015869140625, |
|
"logps/rejected": -381.62030029296875, |
|
"loss": 0.2634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9013266563415527, |
|
"rewards/margins": 1.7531030178070068, |
|
"rewards/rejected": -4.654429912567139, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.5022222222222221, |
|
"grad_norm": 11.043461799621582, |
|
"learning_rate": 8.018166527567672e-06, |
|
"logits/chosen": 2.0403127670288086, |
|
"logits/rejected": 1.9792909622192383, |
|
"logps/chosen": -424.71881103515625, |
|
"logps/rejected": -502.0084228515625, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.235692024230957, |
|
"rewards/margins": 4.200654983520508, |
|
"rewards/rejected": -11.436347007751465, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.5066666666666668, |
|
"grad_norm": 0.34782156348228455, |
|
"learning_rate": 7.883640894814043e-06, |
|
"logits/chosen": 2.0710644721984863, |
|
"logits/rejected": 2.027409553527832, |
|
"logps/chosen": -235.57655334472656, |
|
"logps/rejected": -296.014892578125, |
|
"loss": 0.02, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5666137337684631, |
|
"rewards/margins": 5.824798107147217, |
|
"rewards/rejected": -6.391411781311035, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.511111111111111, |
|
"grad_norm": 3.823324203491211, |
|
"learning_rate": 7.75004177913188e-06, |
|
"logits/chosen": 2.1038994789123535, |
|
"logits/rejected": 2.085219383239746, |
|
"logps/chosen": -384.879638671875, |
|
"logps/rejected": -339.5116882324219, |
|
"loss": 0.074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.644735813140869, |
|
"rewards/margins": 4.097072601318359, |
|
"rewards/rejected": -6.7418084144592285, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5155555555555555, |
|
"grad_norm": 0.00015221821377053857, |
|
"learning_rate": 7.617376412308083e-06, |
|
"logits/chosen": 2.0240237712860107, |
|
"logits/rejected": 1.9871121644973755, |
|
"logps/chosen": -351.7294616699219, |
|
"logps/rejected": -592.8782958984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.147392749786377, |
|
"rewards/margins": 13.006977081298828, |
|
"rewards/rejected": -15.154369354248047, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.3263826370239258, |
|
"learning_rate": 7.485651975585236e-06, |
|
"logits/chosen": 1.8890652656555176, |
|
"logits/rejected": 1.8711776733398438, |
|
"logps/chosen": -324.95245361328125, |
|
"logps/rejected": -423.5935974121094, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.109809875488281, |
|
"rewards/margins": 5.546566009521484, |
|
"rewards/rejected": -11.656375885009766, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.5244444444444445, |
|
"grad_norm": 4.467953205108643, |
|
"learning_rate": 7.354875599272928e-06, |
|
"logits/chosen": 1.321131944656372, |
|
"logits/rejected": 1.298929214477539, |
|
"logps/chosen": -137.98204040527344, |
|
"logps/rejected": -102.74874877929688, |
|
"loss": 0.3132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.748739242553711, |
|
"rewards/margins": 1.1912882328033447, |
|
"rewards/rejected": -2.9400274753570557, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.528888888888889, |
|
"grad_norm": 2.1642391681671143, |
|
"learning_rate": 7.2250543623617685e-06, |
|
"logits/chosen": 2.2403130531311035, |
|
"logits/rejected": 2.209939956665039, |
|
"logps/chosen": -361.478759765625, |
|
"logps/rejected": -535.7804565429688, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.326800584793091, |
|
"rewards/margins": 6.651968479156494, |
|
"rewards/rejected": -8.978769302368164, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.018650932237505913, |
|
"learning_rate": 7.096195292140173e-06, |
|
"logits/chosen": 1.889040470123291, |
|
"logits/rejected": 1.954929232597351, |
|
"logps/chosen": -331.3338623046875, |
|
"logps/rejected": -661.5016479492188, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6049957275390625, |
|
"rewards/margins": 11.392202377319336, |
|
"rewards/rejected": -13.997198104858398, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.537777777777778, |
|
"grad_norm": 0.07977497577667236, |
|
"learning_rate": 6.968305363814001e-06, |
|
"logits/chosen": 2.1549904346466064, |
|
"logits/rejected": 2.1628024578094482, |
|
"logps/chosen": -401.5853271484375, |
|
"logps/rejected": -551.8875732421875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.6251935958862305, |
|
"rewards/margins": 7.631624221801758, |
|
"rewards/rejected": -12.256817817687988, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.5422222222222222, |
|
"grad_norm": 9.609312057495117, |
|
"learning_rate": 6.841391500128982e-06, |
|
"logits/chosen": 1.9556026458740234, |
|
"logits/rejected": 2.000077247619629, |
|
"logps/chosen": -265.30108642578125, |
|
"logps/rejected": -381.78399658203125, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.087409973144531, |
|
"rewards/margins": 1.7995681762695312, |
|
"rewards/rejected": -5.8869781494140625, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.5466666666666666, |
|
"grad_norm": 0.00032588234171271324, |
|
"learning_rate": 6.715460570995988e-06, |
|
"logits/chosen": 2.1855061054229736, |
|
"logits/rejected": 2.1387851238250732, |
|
"logps/chosen": -434.9403076171875, |
|
"logps/rejected": -738.8302612304688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6018309593200684, |
|
"rewards/margins": 11.849609375, |
|
"rewards/rejected": -14.451440811157227, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.551111111111111, |
|
"grad_norm": 1.5105094909667969, |
|
"learning_rate": 6.5905193931191235e-06, |
|
"logits/chosen": 2.143610715866089, |
|
"logits/rejected": 2.2100303173065186, |
|
"logps/chosen": -380.15618896484375, |
|
"logps/rejected": -462.9521789550781, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9106452465057373, |
|
"rewards/margins": 4.415860176086426, |
|
"rewards/rejected": -6.326505661010742, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.15542344748973846, |
|
"learning_rate": 6.46657472962679e-06, |
|
"logits/chosen": 2.022047758102417, |
|
"logits/rejected": 1.9033942222595215, |
|
"logps/chosen": -345.0733642578125, |
|
"logps/rejected": -438.015380859375, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09320831298828125, |
|
"rewards/margins": 8.476028442382812, |
|
"rewards/rejected": -8.569236755371094, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 3.5860283374786377, |
|
"learning_rate": 6.343633289705555e-06, |
|
"logits/chosen": 1.9373621940612793, |
|
"logits/rejected": 1.8331228494644165, |
|
"logps/chosen": -333.8984069824219, |
|
"logps/rejected": -243.1856231689453, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.346085548400879, |
|
"rewards/margins": 2.6990599632263184, |
|
"rewards/rejected": -6.045145511627197, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 1.0526074171066284, |
|
"learning_rate": 6.221701728237009e-06, |
|
"logits/chosen": 1.999690055847168, |
|
"logits/rejected": 2.0342698097229004, |
|
"logps/chosen": -347.6802978515625, |
|
"logps/rejected": -321.7969970703125, |
|
"loss": 0.036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.819159507751465, |
|
"rewards/margins": 3.3115005493164062, |
|
"rewards/rejected": -8.130660057067871, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.568888888888889, |
|
"grad_norm": 0.5887879133224487, |
|
"learning_rate": 6.100786645437481e-06, |
|
"logits/chosen": 1.1116806268692017, |
|
"logits/rejected": 1.1577059030532837, |
|
"logps/chosen": -76.88727569580078, |
|
"logps/rejected": -119.89225006103516, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09421806037425995, |
|
"rewards/margins": 2.9204256534576416, |
|
"rewards/rejected": -2.8262076377868652, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.5733333333333333, |
|
"grad_norm": 0.1103566437959671, |
|
"learning_rate": 5.980894586500841e-06, |
|
"logits/chosen": 2.108466625213623, |
|
"logits/rejected": 2.086857795715332, |
|
"logps/chosen": -387.1605224609375, |
|
"logps/rejected": -562.6422119140625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.905645847320557, |
|
"rewards/margins": 7.762504577636719, |
|
"rewards/rejected": -13.668149948120117, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.5777777777777777, |
|
"grad_norm": 8.797289848327637, |
|
"learning_rate": 5.8620320412441475e-06, |
|
"logits/chosen": 1.8724584579467773, |
|
"logits/rejected": 1.9823896884918213, |
|
"logps/chosen": -301.8065185546875, |
|
"logps/rejected": -380.43853759765625, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.267813205718994, |
|
"rewards/margins": 1.2750152349472046, |
|
"rewards/rejected": -3.542828321456909, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.5822222222222222, |
|
"grad_norm": 6.784573078155518, |
|
"learning_rate": 5.744205443756364e-06, |
|
"logits/chosen": 1.9750076532363892, |
|
"logits/rejected": 2.0933682918548584, |
|
"logps/chosen": -423.0281677246094, |
|
"logps/rejected": -526.6314697265625, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.2557196617126465, |
|
"rewards/margins": 1.4171913862228394, |
|
"rewards/rejected": -8.672910690307617, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.5866666666666667, |
|
"grad_norm": 2.033442258834839, |
|
"learning_rate": 5.627421172050096e-06, |
|
"logits/chosen": 1.7873187065124512, |
|
"logits/rejected": 1.7788472175598145, |
|
"logps/chosen": -231.81149291992188, |
|
"logps/rejected": -276.6201171875, |
|
"loss": 0.179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4262077808380127, |
|
"rewards/margins": 4.1126708984375, |
|
"rewards/rejected": -5.538878440856934, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.5911111111111111, |
|
"grad_norm": 0.03710145130753517, |
|
"learning_rate": 5.511685547716328e-06, |
|
"logits/chosen": 2.091726064682007, |
|
"logits/rejected": 2.0658438205718994, |
|
"logps/chosen": -469.0487060546875, |
|
"logps/rejected": -553.4110107421875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.09385085105896, |
|
"rewards/margins": 8.56124496459961, |
|
"rewards/rejected": -11.655096054077148, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.5955555555555554, |
|
"grad_norm": 0.9721232056617737, |
|
"learning_rate": 5.397004835582242e-06, |
|
"logits/chosen": 2.050297498703003, |
|
"logits/rejected": 2.0740513801574707, |
|
"logps/chosen": -394.7935485839844, |
|
"logps/rejected": -656.4100341796875, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.355221748352051, |
|
"rewards/margins": 11.149545669555664, |
|
"rewards/rejected": -14.504767417907715, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.1482057273387909, |
|
"learning_rate": 5.2833852433720855e-06, |
|
"logits/chosen": 2.2320728302001953, |
|
"logits/rejected": 2.227717876434326, |
|
"logps/chosen": -449.0965881347656, |
|
"logps/rejected": -448.2038879394531, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.938412666320801, |
|
"rewards/margins": 5.938919544219971, |
|
"rewards/rejected": -12.87733268737793, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": 2.020325183868408, |
|
"eval_logits/rejected": 1.97914719581604, |
|
"eval_logps/chosen": -332.6748352050781, |
|
"eval_logps/rejected": -392.21588134765625, |
|
"eval_loss": 0.41243118047714233, |
|
"eval_rewards/accuracies": 0.8035714030265808, |
|
"eval_rewards/chosen": -4.238221645355225, |
|
"eval_rewards/margins": 3.401686429977417, |
|
"eval_rewards/rejected": -7.639908313751221, |
|
"eval_runtime": 17.4022, |
|
"eval_samples_per_second": 2.873, |
|
"eval_steps_per_second": 0.402, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6044444444444443, |
|
"grad_norm": 26.702436447143555, |
|
"learning_rate": 5.170832921371163e-06, |
|
"logits/chosen": 2.1601366996765137, |
|
"logits/rejected": 2.0925962924957275, |
|
"logps/chosen": -737.0338134765625, |
|
"logps/rejected": -734.032958984375, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.603326797485352, |
|
"rewards/margins": 4.569097995758057, |
|
"rewards/rejected": -16.17242431640625, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.608888888888889, |
|
"grad_norm": 4.281423091888428, |
|
"learning_rate": 5.059353962092917e-06, |
|
"logits/chosen": 1.8992071151733398, |
|
"logits/rejected": 1.9108917713165283, |
|
"logps/chosen": -202.74957275390625, |
|
"logps/rejected": -191.141357421875, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23720017075538635, |
|
"rewards/margins": 1.9029862880706787, |
|
"rewards/rejected": -2.140186309814453, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.6133333333333333, |
|
"grad_norm": 0.12904126942157745, |
|
"learning_rate": 4.9489543999491045e-06, |
|
"logits/chosen": 2.1836905479431152, |
|
"logits/rejected": 2.0876262187957764, |
|
"logps/chosen": -383.00787353515625, |
|
"logps/rejected": -463.5958251953125, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0018112175166606903, |
|
"rewards/margins": 6.877252578735352, |
|
"rewards/rejected": -6.879063606262207, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.6177777777777778, |
|
"grad_norm": 0.3242693245410919, |
|
"learning_rate": 4.839640210923197e-06, |
|
"logits/chosen": 1.981348991394043, |
|
"logits/rejected": 1.8337197303771973, |
|
"logps/chosen": -231.2086944580078, |
|
"logps/rejected": -257.2821044921875, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9566452503204346, |
|
"rewards/margins": 4.5026397705078125, |
|
"rewards/rejected": -6.459284782409668, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.6222222222222222, |
|
"grad_norm": 0.7935138940811157, |
|
"learning_rate": 4.731417312246877e-06, |
|
"logits/chosen": 1.637596607208252, |
|
"logits/rejected": 1.6671159267425537, |
|
"logps/chosen": -181.33856201171875, |
|
"logps/rejected": -310.20806884765625, |
|
"loss": 0.0237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1557204723358154, |
|
"rewards/margins": 6.343451023101807, |
|
"rewards/rejected": -7.499171257019043, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.6266666666666667, |
|
"grad_norm": 0.17146991193294525, |
|
"learning_rate": 4.624291562079719e-06, |
|
"logits/chosen": 1.5095144510269165, |
|
"logits/rejected": 1.552412748336792, |
|
"logps/chosen": -283.5166015625, |
|
"logps/rejected": -318.7493591308594, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3418242931365967, |
|
"rewards/margins": 5.029508590698242, |
|
"rewards/rejected": -8.371332168579102, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.6311111111111112, |
|
"grad_norm": 10.096573829650879, |
|
"learning_rate": 4.518268759192115e-06, |
|
"logits/chosen": 2.28369402885437, |
|
"logits/rejected": 2.316972255706787, |
|
"logps/chosen": -435.752685546875, |
|
"logps/rejected": -479.5185241699219, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.010312080383301, |
|
"rewards/margins": 4.6889142990112305, |
|
"rewards/rejected": -9.699226379394531, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.6355555555555554, |
|
"grad_norm": 0.13205423951148987, |
|
"learning_rate": 4.413354642651369e-06, |
|
"logits/chosen": 2.1447973251342773, |
|
"logits/rejected": 2.208026885986328, |
|
"logps/chosen": -333.09185791015625, |
|
"logps/rejected": -571.9179077148438, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.114415168762207, |
|
"rewards/margins": 6.399701118469238, |
|
"rewards/rejected": -8.514116287231445, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 1.1356106996536255, |
|
"learning_rate": 4.309554891511036e-06, |
|
"logits/chosen": 2.151458740234375, |
|
"logits/rejected": 2.0680348873138428, |
|
"logps/chosen": -406.594970703125, |
|
"logps/rejected": -577.03662109375, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.874485731124878, |
|
"rewards/margins": 9.36522102355957, |
|
"rewards/rejected": -11.239706039428711, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.6444444444444444, |
|
"grad_norm": 0.09853781759738922, |
|
"learning_rate": 4.206875124503506e-06, |
|
"logits/chosen": 2.1071839332580566, |
|
"logits/rejected": 2.133695602416992, |
|
"logps/chosen": -299.2562255859375, |
|
"logps/rejected": -481.5537109375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.580816745758057, |
|
"rewards/margins": 6.295032024383545, |
|
"rewards/rejected": -10.875848770141602, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.6488888888888888, |
|
"grad_norm": 1.1135728359222412, |
|
"learning_rate": 4.105320899735882e-06, |
|
"logits/chosen": 1.5641443729400635, |
|
"logits/rejected": 1.5768111944198608, |
|
"logps/chosen": -173.53851318359375, |
|
"logps/rejected": -211.27960205078125, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20349428057670593, |
|
"rewards/margins": 2.9661078453063965, |
|
"rewards/rejected": -3.169602155685425, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.6533333333333333, |
|
"grad_norm": 0.5490663647651672, |
|
"learning_rate": 4.004897714389103e-06, |
|
"logits/chosen": 2.0074357986450195, |
|
"logits/rejected": 2.0224769115448, |
|
"logps/chosen": -344.1773681640625, |
|
"logps/rejected": -435.5508728027344, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.9361190795898438, |
|
"rewards/margins": 5.090202331542969, |
|
"rewards/rejected": -9.026321411132812, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.6577777777777778, |
|
"grad_norm": 1.7369130849838257, |
|
"learning_rate": 3.90561100442036e-06, |
|
"logits/chosen": 1.8831748962402344, |
|
"logits/rejected": 1.8279378414154053, |
|
"logps/chosen": -236.67889404296875, |
|
"logps/rejected": -348.7945251464844, |
|
"loss": 0.074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25758588314056396, |
|
"rewards/margins": 6.053953170776367, |
|
"rewards/rejected": -6.311539173126221, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.6622222222222223, |
|
"grad_norm": 0.015169711783528328, |
|
"learning_rate": 3.8074661442688868e-06, |
|
"logits/chosen": 2.0551671981811523, |
|
"logits/rejected": 1.9679946899414062, |
|
"logps/chosen": -285.3905944824219, |
|
"logps/rejected": -503.6309814453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3399689197540283, |
|
"rewards/margins": 8.514545440673828, |
|
"rewards/rejected": -9.854513168334961, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.20316235721111298, |
|
"learning_rate": 3.710468446565005e-06, |
|
"logits/chosen": 1.994492769241333, |
|
"logits/rejected": 1.9339189529418945, |
|
"logps/chosen": -295.31878662109375, |
|
"logps/rejected": -363.3540954589844, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.135162353515625, |
|
"rewards/margins": 4.933624267578125, |
|
"rewards/rejected": -7.06878662109375, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.6711111111111112, |
|
"grad_norm": 1.6665427684783936, |
|
"learning_rate": 3.6146231618425646e-06, |
|
"logits/chosen": 1.9451243877410889, |
|
"logits/rejected": 2.0122804641723633, |
|
"logps/chosen": -426.1136169433594, |
|
"logps/rejected": -602.61279296875, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.325921654701233, |
|
"rewards/margins": 8.843436241149902, |
|
"rewards/rejected": -10.169357299804688, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.6755555555555555, |
|
"grad_norm": 76.74038696289062, |
|
"learning_rate": 3.5199354782547156e-06, |
|
"logits/chosen": 2.1591286659240723, |
|
"logits/rejected": 2.029425859451294, |
|
"logps/chosen": -350.96746826171875, |
|
"logps/rejected": -409.662841796875, |
|
"loss": 2.5142, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.8959503173828125, |
|
"rewards/margins": -0.014461994171142578, |
|
"rewards/rejected": -5.88148832321167, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 1.8813796043395996, |
|
"learning_rate": 3.4264105212930915e-06, |
|
"logits/chosen": 1.5345783233642578, |
|
"logits/rejected": 1.5260515213012695, |
|
"logps/chosen": -137.23216247558594, |
|
"logps/rejected": -172.87872314453125, |
|
"loss": 0.3194, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7019901275634766, |
|
"rewards/margins": 2.383763313293457, |
|
"rewards/rejected": -3.0857534408569336, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.6844444444444444, |
|
"grad_norm": 2.3558857440948486, |
|
"learning_rate": 3.3340533535103467e-06, |
|
"logits/chosen": 1.783468246459961, |
|
"logits/rejected": 1.840031385421753, |
|
"logps/chosen": -209.6715087890625, |
|
"logps/rejected": -231.9936065673828, |
|
"loss": 0.1628, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41487425565719604, |
|
"rewards/margins": 1.7598159313201904, |
|
"rewards/rejected": -2.1746902465820312, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.6888888888888889, |
|
"grad_norm": 1.7316559553146362, |
|
"learning_rate": 3.2428689742461188e-06, |
|
"logits/chosen": 2.061565399169922, |
|
"logits/rejected": 2.0696067810058594, |
|
"logps/chosen": -303.8612060546875, |
|
"logps/rejected": -330.27777099609375, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3209824562072754, |
|
"rewards/margins": 5.411049842834473, |
|
"rewards/rejected": -6.732032299041748, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6933333333333334, |
|
"grad_norm": 12.0430269241333, |
|
"learning_rate": 3.152862319356428e-06, |
|
"logits/chosen": 1.936488389968872, |
|
"logits/rejected": 1.8861385583877563, |
|
"logps/chosen": -475.26220703125, |
|
"logps/rejected": -372.1424560546875, |
|
"loss": 0.1641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.164106845855713, |
|
"rewards/margins": 3.2129530906677246, |
|
"rewards/rejected": -9.377059936523438, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.6977777777777778, |
|
"grad_norm": 0.1999710500240326, |
|
"learning_rate": 3.064038260946478e-06, |
|
"logits/chosen": 2.0421996116638184, |
|
"logits/rejected": 1.8888078927993774, |
|
"logps/chosen": -289.0198059082031, |
|
"logps/rejected": -385.6482849121094, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0051002502441406, |
|
"rewards/margins": 6.449030876159668, |
|
"rewards/rejected": -7.454131126403809, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.7022222222222223, |
|
"grad_norm": 0.18681201338768005, |
|
"learning_rate": 2.9764016071069434e-06, |
|
"logits/chosen": 2.0140395164489746, |
|
"logits/rejected": 2.0304765701293945, |
|
"logps/chosen": -271.6080627441406, |
|
"logps/rejected": -379.2652893066406, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2312802076339722, |
|
"rewards/margins": 5.010470867156982, |
|
"rewards/rejected": -6.241751194000244, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 0.5151910185813904, |
|
"learning_rate": 2.8899571016536786e-06, |
|
"logits/chosen": 1.9135384559631348, |
|
"logits/rejected": 1.8421276807785034, |
|
"logps/chosen": -327.75213623046875, |
|
"logps/rejected": -383.0606689453125, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.634103298187256, |
|
"rewards/margins": 5.9485626220703125, |
|
"rewards/rejected": -9.582666397094727, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.7111111111111112, |
|
"grad_norm": 0.13173969089984894, |
|
"learning_rate": 2.8047094238709633e-06, |
|
"logits/chosen": 2.236691474914551, |
|
"logits/rejected": 2.233146905899048, |
|
"logps/chosen": -469.94879150390625, |
|
"logps/rejected": -569.8831176757812, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.104136943817139, |
|
"rewards/margins": 5.658421516418457, |
|
"rewards/rejected": -12.762557983398438, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.7155555555555555, |
|
"grad_norm": 0.7114121317863464, |
|
"learning_rate": 2.720663188258199e-06, |
|
"logits/chosen": 1.9220545291900635, |
|
"logits/rejected": 1.9399724006652832, |
|
"logps/chosen": -412.7835693359375, |
|
"logps/rejected": -462.8909606933594, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.260728359222412, |
|
"rewards/margins": 4.300273895263672, |
|
"rewards/rejected": -8.561002731323242, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 25.617097854614258, |
|
"learning_rate": 2.637822944280116e-06, |
|
"logits/chosen": 1.6366169452667236, |
|
"logits/rejected": 1.6212671995162964, |
|
"logps/chosen": -231.51577758789062, |
|
"logps/rejected": -197.76779174804688, |
|
"loss": 1.1373, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -3.7230637073516846, |
|
"rewards/margins": -0.7492774724960327, |
|
"rewards/rejected": -2.9737863540649414, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.7244444444444444, |
|
"grad_norm": 2.152416706085205, |
|
"learning_rate": 2.5561931761205082e-06, |
|
"logits/chosen": 1.781626582145691, |
|
"logits/rejected": 1.814887523651123, |
|
"logps/chosen": -261.75830078125, |
|
"logps/rejected": -285.1453857421875, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2200870513916016, |
|
"rewards/margins": 5.611637115478516, |
|
"rewards/rejected": -6.831724166870117, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.728888888888889, |
|
"grad_norm": 28.067974090576172, |
|
"learning_rate": 2.475778302439524e-06, |
|
"logits/chosen": 1.615212321281433, |
|
"logits/rejected": 1.6488571166992188, |
|
"logps/chosen": -317.0101318359375, |
|
"logps/rejected": -197.44625854492188, |
|
"loss": 2.2031, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.956669330596924, |
|
"rewards/margins": 0.2624635696411133, |
|
"rewards/rejected": -5.219132423400879, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 19.71435546875, |
|
"learning_rate": 2.396582676134462e-06, |
|
"logits/chosen": 2.0480542182922363, |
|
"logits/rejected": 2.0675265789031982, |
|
"logps/chosen": -269.1255187988281, |
|
"logps/rejected": -295.6541442871094, |
|
"loss": 1.3784, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -4.372478485107422, |
|
"rewards/margins": -0.9686035513877869, |
|
"rewards/rejected": -3.4038748741149902, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.7377777777777776, |
|
"grad_norm": 3.7129645347595215, |
|
"learning_rate": 2.318610584104142e-06, |
|
"logits/chosen": 1.7886816263198853, |
|
"logits/rejected": 1.657137393951416, |
|
"logps/chosen": -400.5063781738281, |
|
"logps/rejected": -427.5238037109375, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.910498142242432, |
|
"rewards/margins": 5.50076961517334, |
|
"rewards/rejected": -10.411267280578613, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.7422222222222223, |
|
"grad_norm": 16.56305503845215, |
|
"learning_rate": 2.241866247016869e-06, |
|
"logits/chosen": 2.101799488067627, |
|
"logits/rejected": 2.064134120941162, |
|
"logps/chosen": -435.5615234375, |
|
"logps/rejected": -487.712158203125, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.707480430603027, |
|
"rewards/margins": 3.5099639892578125, |
|
"rewards/rejected": -10.21744441986084, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.7466666666666666, |
|
"grad_norm": 0.02198374643921852, |
|
"learning_rate": 2.166353819081968e-06, |
|
"logits/chosen": 2.1594762802124023, |
|
"logits/rejected": 2.2261545658111572, |
|
"logps/chosen": -441.47528076171875, |
|
"logps/rejected": -560.4810791015625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.149522304534912, |
|
"rewards/margins": 8.681150436401367, |
|
"rewards/rejected": -11.830673217773438, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.751111111111111, |
|
"grad_norm": 14.180363655090332, |
|
"learning_rate": 2.092077387824884e-06, |
|
"logits/chosen": 2.0479955673217773, |
|
"logits/rejected": 1.947251796722412, |
|
"logps/chosen": -368.0101623535156, |
|
"logps/rejected": -422.68780517578125, |
|
"loss": 0.3844, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.151028633117676, |
|
"rewards/margins": 3.9921188354492188, |
|
"rewards/rejected": -9.143147468566895, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.7555555555555555, |
|
"grad_norm": 0.006763577461242676, |
|
"learning_rate": 2.0190409738659653e-06, |
|
"logits/chosen": 2.2438273429870605, |
|
"logits/rejected": 2.2101495265960693, |
|
"logps/chosen": -518.1273803710938, |
|
"logps/rejected": -702.2735595703125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.715429782867432, |
|
"rewards/margins": 12.057371139526367, |
|
"rewards/rejected": -16.77280044555664, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.18149635195732117, |
|
"learning_rate": 1.9472485307027945e-06, |
|
"logits/chosen": 2.1681084632873535, |
|
"logits/rejected": 2.203289270401001, |
|
"logps/chosen": -347.28582763671875, |
|
"logps/rejected": -500.73455810546875, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5240983963012695, |
|
"rewards/margins": 6.283698081970215, |
|
"rewards/rejected": -8.807796478271484, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.7644444444444445, |
|
"grad_norm": 4.91969108581543, |
|
"learning_rate": 1.876703944496197e-06, |
|
"logits/chosen": 1.9020869731903076, |
|
"logits/rejected": 1.7357096672058105, |
|
"logps/chosen": -352.68695068359375, |
|
"logps/rejected": -305.7750244140625, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5846099853515625, |
|
"rewards/margins": 4.8691534996032715, |
|
"rewards/rejected": -7.453763961791992, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.7688888888888887, |
|
"grad_norm": 57.27471923828125, |
|
"learning_rate": 1.8074110338598682e-06, |
|
"logits/chosen": 2.0281596183776855, |
|
"logits/rejected": 1.8878042697906494, |
|
"logps/chosen": -631.1502685546875, |
|
"logps/rejected": -437.7010498046875, |
|
"loss": 3.8594, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -13.262107849121094, |
|
"rewards/margins": -1.1633968353271484, |
|
"rewards/rejected": -12.098711013793945, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.7733333333333334, |
|
"grad_norm": 1.8617291450500488, |
|
"learning_rate": 1.7393735496536944e-06, |
|
"logits/chosen": 2.0407192707061768, |
|
"logits/rejected": 1.8599579334259033, |
|
"logps/chosen": -491.43707275390625, |
|
"logps/rejected": -467.81060791015625, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.183530807495117, |
|
"rewards/margins": 7.340937614440918, |
|
"rewards/rejected": -15.524469375610352, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 2.8930482864379883, |
|
"learning_rate": 1.6725951747806918e-06, |
|
"logits/chosen": 1.4244745969772339, |
|
"logits/rejected": 1.4205752611160278, |
|
"logps/chosen": -128.19241333007812, |
|
"logps/rejected": -157.7073974609375, |
|
"loss": 0.3631, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8688427209854126, |
|
"rewards/margins": 1.723132848739624, |
|
"rewards/rejected": -2.591975450515747, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7822222222222224, |
|
"grad_norm": 3.2958927154541016, |
|
"learning_rate": 1.6070795239876618e-06, |
|
"logits/chosen": 2.2915682792663574, |
|
"logits/rejected": 2.2581777572631836, |
|
"logps/chosen": -368.1529541015625, |
|
"logps/rejected": -491.930419921875, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.686471700668335, |
|
"rewards/margins": 7.626599311828613, |
|
"rewards/rejected": -11.313071250915527, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.7866666666666666, |
|
"grad_norm": 3.986274480819702, |
|
"learning_rate": 1.5428301436695159e-06, |
|
"logits/chosen": 1.5831184387207031, |
|
"logits/rejected": 1.6049795150756836, |
|
"logps/chosen": -165.51271057128906, |
|
"logps/rejected": -185.55548095703125, |
|
"loss": 0.2804, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1501312404870987, |
|
"rewards/margins": 1.7175559997558594, |
|
"rewards/rejected": -1.8676872253417969, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.791111111111111, |
|
"grad_norm": 29.756507873535156, |
|
"learning_rate": 1.479850511677322e-06, |
|
"logits/chosen": 2.165071487426758, |
|
"logits/rejected": 2.0931169986724854, |
|
"logps/chosen": -528.3546142578125, |
|
"logps/rejected": -583.2621459960938, |
|
"loss": 1.384, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.661299228668213, |
|
"rewards/margins": 4.992590427398682, |
|
"rewards/rejected": -11.653889656066895, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.7955555555555556, |
|
"grad_norm": 6.813007354736328, |
|
"learning_rate": 1.4181440371300342e-06, |
|
"logits/chosen": 1.9760260581970215, |
|
"logits/rejected": 1.9418466091156006, |
|
"logps/chosen": -360.95574951171875, |
|
"logps/rejected": -394.4306335449219, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3200485706329346, |
|
"rewards/margins": 4.02227783203125, |
|
"rewards/rejected": -5.3423261642456055, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.347569078207016, |
|
"learning_rate": 1.3577140602299448e-06, |
|
"logits/chosen": 2.0109634399414062, |
|
"logits/rejected": 1.9843730926513672, |
|
"logps/chosen": -423.1788330078125, |
|
"logps/rejected": -527.6969604492188, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6651382446289062, |
|
"rewards/margins": 6.632790565490723, |
|
"rewards/rejected": -9.297929763793945, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_logits/chosen": 2.008713722229004, |
|
"eval_logits/rejected": 1.9683387279510498, |
|
"eval_logps/chosen": -333.8675231933594, |
|
"eval_logps/rejected": -395.43695068359375, |
|
"eval_loss": 0.38143062591552734, |
|
"eval_rewards/accuracies": 0.8214285969734192, |
|
"eval_rewards/chosen": -4.357491493225098, |
|
"eval_rewards/margins": 3.6045258045196533, |
|
"eval_rewards/rejected": -7.96201753616333, |
|
"eval_runtime": 17.3982, |
|
"eval_samples_per_second": 2.874, |
|
"eval_steps_per_second": 0.402, |
|
"step": 405 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 45, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|