{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.999237456153729, "eval_steps": 1639, "global_step": 8195, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006100350770169285, "grad_norm": 5.8661699295043945, "learning_rate": 2.6666666666666667e-07, "log_odds_chosen": 0.40475258231163025, "log_odds_ratio": -0.6082888841629028, "logits/chosen": -1.8183057308197021, "logits/rejected": -1.8526496887207031, "logps/chosen": -3.988799571990967, "logps/rejected": -4.395384311676025, "loss": 4.3312, "nll_loss": 4.309820175170898, "rewards/accuracies": 0.75, "rewards/chosen": -0.3988799452781677, "rewards/margins": 0.04065848886966705, "rewards/rejected": -0.43953844904899597, "step": 1 }, { "epoch": 0.001220070154033857, "grad_norm": 5.590966701507568, "learning_rate": 5.333333333333333e-07, "log_odds_chosen": -0.11188226193189621, "log_odds_ratio": -0.7825625538825989, "logits/chosen": -1.5730516910552979, "logits/rejected": -1.6906580924987793, "logps/chosen": -3.548737049102783, "logps/rejected": -3.43538498878479, "loss": 3.8871, "nll_loss": 3.590762138366699, "rewards/accuracies": 0.375, "rewards/chosen": -0.3548737168312073, "rewards/margins": -0.011335203424096107, "rewards/rejected": -0.3435385227203369, "step": 2 }, { "epoch": 0.0018301052310507853, "grad_norm": 5.407670021057129, "learning_rate": 8e-07, "log_odds_chosen": 0.1568901538848877, "log_odds_ratio": -0.7190929651260376, "logits/chosen": -1.6038877964019775, "logits/rejected": -1.6699702739715576, "logps/chosen": -3.5208914279937744, "logps/rejected": -3.66849946975708, "loss": 3.943, "nll_loss": 3.7194385528564453, "rewards/accuracies": 0.625, "rewards/chosen": -0.35208916664123535, "rewards/margins": 0.0147608183324337, "rewards/rejected": -0.36684995889663696, "step": 3 }, { "epoch": 0.002440140308067714, "grad_norm": 4.840794563293457, "learning_rate": 1.0666666666666667e-06, "log_odds_chosen": 0.22611016035079956, "log_odds_ratio": -0.8185205459594727, "logits/chosen": -1.650814414024353, "logits/rejected": -1.7468597888946533, "logps/chosen": -3.6391141414642334, "logps/rejected": -3.8412036895751953, "loss": 3.7674, "nll_loss": 3.6112990379333496, "rewards/accuracies": 0.75, "rewards/chosen": -0.3639114201068878, "rewards/margins": 0.020208945497870445, "rewards/rejected": -0.3841203451156616, "step": 4 }, { "epoch": 0.0030501753850846426, "grad_norm": 5.261303901672363, "learning_rate": 1.3333333333333332e-06, "log_odds_chosen": 0.2593729794025421, "log_odds_ratio": -0.6225250959396362, "logits/chosen": -1.884399175643921, "logits/rejected": -1.874635934829712, "logps/chosen": -3.464994430541992, "logps/rejected": -3.703449010848999, "loss": 4.1063, "nll_loss": 3.9478328227996826, "rewards/accuracies": 0.625, "rewards/chosen": -0.3464994430541992, "rewards/margins": 0.02384546585381031, "rewards/rejected": -0.370344877243042, "step": 5 }, { "epoch": 0.0036602104621015707, "grad_norm": 5.08738899230957, "learning_rate": 1.6e-06, "log_odds_chosen": 1.1324759721755981, "log_odds_ratio": -0.49248525500297546, "logits/chosen": -1.614241600036621, "logits/rejected": -1.774787425994873, "logps/chosen": -3.150095224380493, "logps/rejected": -4.257132053375244, "loss": 3.6656, "nll_loss": 3.7183709144592285, "rewards/accuracies": 0.75, "rewards/chosen": -0.31500953435897827, "rewards/margins": 0.11070369184017181, "rewards/rejected": -0.4257132112979889, "step": 6 }, { "epoch": 0.0042702455391185, "grad_norm": 6.754072666168213, "learning_rate": 1.8666666666666667e-06, "log_odds_chosen": 0.21199947595596313, "log_odds_ratio": -0.6173521876335144, "logits/chosen": -1.6340386867523193, "logits/rejected": -1.626410722732544, "logps/chosen": -3.6223983764648438, "logps/rejected": -3.827378273010254, "loss": 4.2157, "nll_loss": 4.062716484069824, "rewards/accuracies": 0.5, "rewards/chosen": -0.36223986744880676, "rewards/margins": 0.02049800008535385, "rewards/rejected": -0.38273781538009644, "step": 7 }, { "epoch": 0.004880280616135428, "grad_norm": 7.314718723297119, "learning_rate": 2.1333333333333334e-06, "log_odds_chosen": -0.09046608209609985, "log_odds_ratio": -0.8319488167762756, "logits/chosen": -1.769519329071045, "logits/rejected": -1.721100926399231, "logps/chosen": -3.68027925491333, "logps/rejected": -3.595604658126831, "loss": 4.3518, "nll_loss": 4.144431114196777, "rewards/accuracies": 0.5, "rewards/chosen": -0.3680279850959778, "rewards/margins": -0.008467473089694977, "rewards/rejected": -0.359560489654541, "step": 8 }, { "epoch": 0.005490315693152357, "grad_norm": 5.2287187576293945, "learning_rate": 2.4e-06, "log_odds_chosen": 0.6594563722610474, "log_odds_ratio": -0.5483207702636719, "logits/chosen": -1.8032073974609375, "logits/rejected": -1.6298837661743164, "logps/chosen": -3.794393301010132, "logps/rejected": -4.444128513336182, "loss": 4.1601, "nll_loss": 4.250706195831299, "rewards/accuracies": 0.625, "rewards/chosen": -0.3794393241405487, "rewards/margins": 0.06497351825237274, "rewards/rejected": -0.44441282749176025, "step": 9 }, { "epoch": 0.006100350770169285, "grad_norm": 5.612982273101807, "learning_rate": 2.6666666666666664e-06, "log_odds_chosen": -0.2927180528640747, "log_odds_ratio": -0.9468244314193726, "logits/chosen": -2.0563578605651855, "logits/rejected": -1.8885996341705322, "logps/chosen": -4.0651655197143555, "logps/rejected": -3.800645112991333, "loss": 4.2766, "nll_loss": 4.310928821563721, "rewards/accuracies": 0.375, "rewards/chosen": -0.40651655197143555, "rewards/margins": -0.026452042162418365, "rewards/rejected": -0.3800645172595978, "step": 10 }, { "epoch": 0.006710385847186213, "grad_norm": 5.8622727394104, "learning_rate": 2.933333333333333e-06, "log_odds_chosen": 1.0433703660964966, "log_odds_ratio": -0.6270608901977539, "logits/chosen": -1.9005712270736694, "logits/rejected": -1.8938449621200562, "logps/chosen": -3.296123504638672, "logps/rejected": -4.338015079498291, "loss": 4.098, "nll_loss": 3.745035171508789, "rewards/accuracies": 0.75, "rewards/chosen": -0.3296123445034027, "rewards/margins": 0.10418914258480072, "rewards/rejected": -0.4338015019893646, "step": 11 }, { "epoch": 0.007320420924203141, "grad_norm": 5.78604793548584, "learning_rate": 3.2e-06, "log_odds_chosen": 1.0740402936935425, "log_odds_ratio": -0.42866671085357666, "logits/chosen": -1.7885470390319824, "logits/rejected": -1.765439748764038, "logps/chosen": -3.252180576324463, "logps/rejected": -4.305454254150391, "loss": 4.0089, "nll_loss": 3.702385902404785, "rewards/accuracies": 0.75, "rewards/chosen": -0.3252180218696594, "rewards/margins": 0.10532739758491516, "rewards/rejected": -0.430545449256897, "step": 12 }, { "epoch": 0.00793045600122007, "grad_norm": 5.8213982582092285, "learning_rate": 3.466666666666667e-06, "log_odds_chosen": -0.27198001742362976, "log_odds_ratio": -0.9987834692001343, "logits/chosen": -1.8714268207550049, "logits/rejected": -1.7955965995788574, "logps/chosen": -3.579291820526123, "logps/rejected": -3.3150923252105713, "loss": 4.3363, "nll_loss": 3.826323986053467, "rewards/accuracies": 0.25, "rewards/chosen": -0.35792917013168335, "rewards/margins": -0.02641996741294861, "rewards/rejected": -0.33150923252105713, "step": 13 }, { "epoch": 0.008540491078237, "grad_norm": 5.071163654327393, "learning_rate": 3.7333333333333333e-06, "log_odds_chosen": 0.5618191361427307, "log_odds_ratio": -0.5255388617515564, "logits/chosen": -1.839723825454712, "logits/rejected": -1.7556318044662476, "logps/chosen": -3.5914552211761475, "logps/rejected": -4.131275177001953, "loss": 3.9604, "nll_loss": 3.8429272174835205, "rewards/accuracies": 0.875, "rewards/chosen": -0.35914552211761475, "rewards/margins": 0.05398201942443848, "rewards/rejected": -0.4131275415420532, "step": 14 }, { "epoch": 0.009150526155253927, "grad_norm": 5.683403968811035, "learning_rate": 4e-06, "log_odds_chosen": -0.1417250633239746, "log_odds_ratio": -0.983815610408783, "logits/chosen": -1.9141600131988525, "logits/rejected": -1.83555006980896, "logps/chosen": -4.912601947784424, "logps/rejected": -4.764454364776611, "loss": 4.1421, "nll_loss": 4.203510761260986, "rewards/accuracies": 0.5, "rewards/chosen": -0.49126023054122925, "rewards/margins": -0.014814797788858414, "rewards/rejected": -0.47644543647766113, "step": 15 }, { "epoch": 0.009760561232270856, "grad_norm": 5.110896587371826, "learning_rate": 4.266666666666667e-06, "log_odds_chosen": 0.347435861825943, "log_odds_ratio": -0.672203540802002, "logits/chosen": -1.638167142868042, "logits/rejected": -1.5841045379638672, "logps/chosen": -3.185490131378174, "logps/rejected": -3.5164124965667725, "loss": 3.8322, "nll_loss": 3.5989952087402344, "rewards/accuracies": 0.5, "rewards/chosen": -0.3185490369796753, "rewards/margins": 0.033092230558395386, "rewards/rejected": -0.3516412377357483, "step": 16 }, { "epoch": 0.010370596309287784, "grad_norm": 5.915971279144287, "learning_rate": 4.533333333333333e-06, "log_odds_chosen": 0.12798890471458435, "log_odds_ratio": -0.7042856216430664, "logits/chosen": -1.9241524934768677, "logits/rejected": -1.7357978820800781, "logps/chosen": -3.9219489097595215, "logps/rejected": -4.046096324920654, "loss": 4.0314, "nll_loss": 4.2817230224609375, "rewards/accuracies": 0.75, "rewards/chosen": -0.39219486713409424, "rewards/margins": 0.012414749711751938, "rewards/rejected": -0.40460965037345886, "step": 17 }, { "epoch": 0.010980631386304713, "grad_norm": 6.367697238922119, "learning_rate": 4.8e-06, "log_odds_chosen": -0.4565976560115814, "log_odds_ratio": -1.1213418245315552, "logits/chosen": -1.7006479501724243, "logits/rejected": -1.745765209197998, "logps/chosen": -4.42435359954834, "logps/rejected": -3.965118408203125, "loss": 4.3574, "nll_loss": 4.513551235198975, "rewards/accuracies": 0.5, "rewards/chosen": -0.4424353539943695, "rewards/margins": -0.04592348635196686, "rewards/rejected": -0.39651188254356384, "step": 18 }, { "epoch": 0.011590666463321641, "grad_norm": 4.913632392883301, "learning_rate": 5.066666666666666e-06, "log_odds_chosen": 0.6986532211303711, "log_odds_ratio": -0.4638938307762146, "logits/chosen": -1.4644982814788818, "logits/rejected": -1.5976338386535645, "logps/chosen": -3.242945432662964, "logps/rejected": -3.921328067779541, "loss": 3.6467, "nll_loss": 3.2077701091766357, "rewards/accuracies": 0.875, "rewards/chosen": -0.3242945373058319, "rewards/margins": 0.06783826649188995, "rewards/rejected": -0.39213281869888306, "step": 19 }, { "epoch": 0.01220070154033857, "grad_norm": 5.605895042419434, "learning_rate": 5.333333333333333e-06, "log_odds_chosen": 0.012031935155391693, "log_odds_ratio": -0.715293288230896, "logits/chosen": -1.8383089303970337, "logits/rejected": -1.7695362567901611, "logps/chosen": -3.7141830921173096, "logps/rejected": -3.7285871505737305, "loss": 4.0367, "nll_loss": 4.131415367126465, "rewards/accuracies": 0.25, "rewards/chosen": -0.3714183270931244, "rewards/margins": 0.001440383493900299, "rewards/rejected": -0.3728587329387665, "step": 20 }, { "epoch": 0.012810736617355498, "grad_norm": 6.027003288269043, "learning_rate": 5.6e-06, "log_odds_chosen": -0.11943256855010986, "log_odds_ratio": -0.8201964497566223, "logits/chosen": -1.5784088373184204, "logits/rejected": -1.8745226860046387, "logps/chosen": -3.279625654220581, "logps/rejected": -3.1532323360443115, "loss": 3.6457, "nll_loss": 3.6174070835113525, "rewards/accuracies": 0.375, "rewards/chosen": -0.32796257734298706, "rewards/margins": -0.012639304623007774, "rewards/rejected": -0.31532326340675354, "step": 21 }, { "epoch": 0.013420771694372426, "grad_norm": 5.6670098304748535, "learning_rate": 5.866666666666666e-06, "log_odds_chosen": 0.9022667407989502, "log_odds_ratio": -0.48745080828666687, "logits/chosen": -1.8465652465820312, "logits/rejected": -1.7199620008468628, "logps/chosen": -3.4883170127868652, "logps/rejected": -4.370348930358887, "loss": 3.9601, "nll_loss": 3.931508779525757, "rewards/accuracies": 0.75, "rewards/chosen": -0.3488317131996155, "rewards/margins": 0.08820320665836334, "rewards/rejected": -0.4370349049568176, "step": 22 }, { "epoch": 0.014030806771389355, "grad_norm": 5.181885242462158, "learning_rate": 6.133333333333334e-06, "log_odds_chosen": -0.35247284173965454, "log_odds_ratio": -0.92566978931427, "logits/chosen": -1.4767199754714966, "logits/rejected": -1.6311578750610352, "logps/chosen": -3.450125217437744, "logps/rejected": -3.1115708351135254, "loss": 3.7467, "nll_loss": 3.5863871574401855, "rewards/accuracies": 0.25, "rewards/chosen": -0.3450125455856323, "rewards/margins": -0.033855460584163666, "rewards/rejected": -0.31115707755088806, "step": 23 }, { "epoch": 0.014640841848406283, "grad_norm": 6.64169979095459, "learning_rate": 6.4e-06, "log_odds_chosen": -0.9984688758850098, "log_odds_ratio": -1.603224515914917, "logits/chosen": -1.6143412590026855, "logits/rejected": -1.5497024059295654, "logps/chosen": -4.021011829376221, "logps/rejected": -3.063263416290283, "loss": 3.5604, "nll_loss": 3.473811149597168, "rewards/accuracies": 0.375, "rewards/chosen": -0.40210118889808655, "rewards/margins": -0.09577485918998718, "rewards/rejected": -0.306326299905777, "step": 24 }, { "epoch": 0.015250876925423212, "grad_norm": 4.571923732757568, "learning_rate": 6.666666666666667e-06, "log_odds_chosen": 1.4435513019561768, "log_odds_ratio": -0.4201183021068573, "logits/chosen": -1.9282798767089844, "logits/rejected": -2.010723829269409, "logps/chosen": -3.078062057495117, "logps/rejected": -4.466904163360596, "loss": 3.4356, "nll_loss": 3.361717939376831, "rewards/accuracies": 0.875, "rewards/chosen": -0.30780622363090515, "rewards/margins": 0.13888418674468994, "rewards/rejected": -0.4466904103755951, "step": 25 }, { "epoch": 0.01586091200244014, "grad_norm": 4.538055419921875, "learning_rate": 6.933333333333334e-06, "log_odds_chosen": 0.7103658318519592, "log_odds_ratio": -0.4660888612270355, "logits/chosen": -1.559739112854004, "logits/rejected": -1.558873176574707, "logps/chosen": -3.533261299133301, "logps/rejected": -4.20928430557251, "loss": 3.556, "nll_loss": 3.163360595703125, "rewards/accuracies": 0.75, "rewards/chosen": -0.35332614183425903, "rewards/margins": 0.06760229170322418, "rewards/rejected": -0.420928418636322, "step": 26 }, { "epoch": 0.01647094707945707, "grad_norm": 5.3035383224487305, "learning_rate": 7.2e-06, "log_odds_chosen": 0.4611160159111023, "log_odds_ratio": -0.5592554211616516, "logits/chosen": -1.9288463592529297, "logits/rejected": -1.7751396894454956, "logps/chosen": -2.959639072418213, "logps/rejected": -3.397279739379883, "loss": 3.5495, "nll_loss": 3.42233943939209, "rewards/accuracies": 0.75, "rewards/chosen": -0.2959638833999634, "rewards/margins": 0.04376406967639923, "rewards/rejected": -0.3397279977798462, "step": 27 }, { "epoch": 0.017080982156474, "grad_norm": 4.155711650848389, "learning_rate": 7.466666666666667e-06, "log_odds_chosen": 0.8228877186775208, "log_odds_ratio": -0.505296528339386, "logits/chosen": -1.7954788208007812, "logits/rejected": -1.626259684562683, "logps/chosen": -2.767993927001953, "logps/rejected": -3.5514678955078125, "loss": 3.3309, "nll_loss": 3.079341173171997, "rewards/accuracies": 0.875, "rewards/chosen": -0.27679938077926636, "rewards/margins": 0.07834739983081818, "rewards/rejected": -0.3551468253135681, "step": 28 }, { "epoch": 0.017691017233490924, "grad_norm": 4.245190620422363, "learning_rate": 7.733333333333333e-06, "log_odds_chosen": 0.55818772315979, "log_odds_ratio": -0.5403850078582764, "logits/chosen": -1.8279547691345215, "logits/rejected": -1.658295750617981, "logps/chosen": -3.0549185276031494, "logps/rejected": -3.592280864715576, "loss": 3.3159, "nll_loss": 3.3109731674194336, "rewards/accuracies": 0.75, "rewards/chosen": -0.3054918646812439, "rewards/margins": 0.053736232221126556, "rewards/rejected": -0.35922807455062866, "step": 29 }, { "epoch": 0.018301052310507854, "grad_norm": 7.287390232086182, "learning_rate": 8e-06, "log_odds_chosen": 0.33137160539627075, "log_odds_ratio": -0.656853437423706, "logits/chosen": -1.9413690567016602, "logits/rejected": -1.5346884727478027, "logps/chosen": -3.6021170616149902, "logps/rejected": -3.9198784828186035, "loss": 3.4706, "nll_loss": 3.540396213531494, "rewards/accuracies": 0.75, "rewards/chosen": -0.36021173000335693, "rewards/margins": 0.03177614510059357, "rewards/rejected": -0.3919878602027893, "step": 30 }, { "epoch": 0.018911087387524783, "grad_norm": 4.544059753417969, "learning_rate": 7.999020208205756e-06, "log_odds_chosen": -0.09943173825740814, "log_odds_ratio": -0.7651546001434326, "logits/chosen": -1.9639930725097656, "logits/rejected": -1.7425330877304077, "logps/chosen": -3.4321775436401367, "logps/rejected": -3.335564374923706, "loss": 3.7017, "nll_loss": 3.661449432373047, "rewards/accuracies": 0.375, "rewards/chosen": -0.34321779012680054, "rewards/margins": -0.009661301970481873, "rewards/rejected": -0.33355647325515747, "step": 31 }, { "epoch": 0.019521122464541713, "grad_norm": 4.28929328918457, "learning_rate": 7.998040416411512e-06, "log_odds_chosen": 0.3180930018424988, "log_odds_ratio": -0.6148769855499268, "logits/chosen": -1.605289340019226, "logits/rejected": -1.6832953691482544, "logps/chosen": -3.0358619689941406, "logps/rejected": -3.340078830718994, "loss": 3.3184, "nll_loss": 3.3358702659606934, "rewards/accuracies": 0.625, "rewards/chosen": -0.3035861849784851, "rewards/margins": 0.030421683564782143, "rewards/rejected": -0.3340078890323639, "step": 32 }, { "epoch": 0.02013115754155864, "grad_norm": 4.643866539001465, "learning_rate": 7.997060624617268e-06, "log_odds_chosen": 0.008085452020168304, "log_odds_ratio": -0.7580121159553528, "logits/chosen": -1.7432408332824707, "logits/rejected": -1.6876721382141113, "logps/chosen": -2.9497952461242676, "logps/rejected": -2.9519479274749756, "loss": 3.4691, "nll_loss": 3.172921657562256, "rewards/accuracies": 0.375, "rewards/chosen": -0.2949795126914978, "rewards/margins": 0.00021526217460632324, "rewards/rejected": -0.2951948046684265, "step": 33 }, { "epoch": 0.020741192618575568, "grad_norm": 4.602156162261963, "learning_rate": 7.996080832823024e-06, "log_odds_chosen": -0.6970983743667603, "log_odds_ratio": -1.278496265411377, "logits/chosen": -1.7407649755477905, "logits/rejected": -1.6742023229599, "logps/chosen": -3.548128843307495, "logps/rejected": -2.8685290813446045, "loss": 3.2601, "nll_loss": 3.3987722396850586, "rewards/accuracies": 0.5, "rewards/chosen": -0.3548129200935364, "rewards/margins": -0.0679599940776825, "rewards/rejected": -0.2868528962135315, "step": 34 }, { "epoch": 0.021351227695592497, "grad_norm": 4.507389068603516, "learning_rate": 7.99510104102878e-06, "log_odds_chosen": 0.04028065502643585, "log_odds_ratio": -0.7375363707542419, "logits/chosen": -1.6980992555618286, "logits/rejected": -1.679315447807312, "logps/chosen": -2.570836067199707, "logps/rejected": -2.6049656867980957, "loss": 3.1696, "nll_loss": 2.761922597885132, "rewards/accuracies": 0.5, "rewards/chosen": -0.25708359479904175, "rewards/margins": 0.003412984311580658, "rewards/rejected": -0.2604966163635254, "step": 35 }, { "epoch": 0.021961262772609427, "grad_norm": 4.758188724517822, "learning_rate": 7.994121249234538e-06, "log_odds_chosen": 0.15047979354858398, "log_odds_ratio": -0.9813517928123474, "logits/chosen": -1.881434440612793, "logits/rejected": -1.769799828529358, "logps/chosen": -3.0544612407684326, "logps/rejected": -3.19195294380188, "loss": 3.0584, "nll_loss": 3.2361748218536377, "rewards/accuracies": 0.625, "rewards/chosen": -0.3054461181163788, "rewards/margins": 0.013749165460467339, "rewards/rejected": -0.3191952705383301, "step": 36 }, { "epoch": 0.022571297849626352, "grad_norm": 4.151647090911865, "learning_rate": 7.993141457440294e-06, "log_odds_chosen": 0.042477577924728394, "log_odds_ratio": -0.7802722454071045, "logits/chosen": -1.7138612270355225, "logits/rejected": -1.71621835231781, "logps/chosen": -2.988293170928955, "logps/rejected": -3.021850109100342, "loss": 3.1945, "nll_loss": 3.00020694732666, "rewards/accuracies": 0.375, "rewards/chosen": -0.2988293170928955, "rewards/margins": 0.0033557191491127014, "rewards/rejected": -0.3021850287914276, "step": 37 }, { "epoch": 0.023181332926643282, "grad_norm": 5.1718268394470215, "learning_rate": 7.99216166564605e-06, "log_odds_chosen": -0.18128183484077454, "log_odds_ratio": -0.9431334733963013, "logits/chosen": -1.7927954196929932, "logits/rejected": -1.7206255197525024, "logps/chosen": -3.2793843746185303, "logps/rejected": -3.10009503364563, "loss": 3.1945, "nll_loss": 3.3296444416046143, "rewards/accuracies": 0.5, "rewards/chosen": -0.3279384672641754, "rewards/margins": -0.017928924411535263, "rewards/rejected": -0.31000953912734985, "step": 38 }, { "epoch": 0.02379136800366021, "grad_norm": 4.913817882537842, "learning_rate": 7.991181873851806e-06, "log_odds_chosen": 0.6172723174095154, "log_odds_ratio": -0.5446200370788574, "logits/chosen": -1.6630730628967285, "logits/rejected": -1.6445400714874268, "logps/chosen": -2.6747195720672607, "logps/rejected": -3.256246328353882, "loss": 2.9974, "nll_loss": 2.8014724254608154, "rewards/accuracies": 0.625, "rewards/chosen": -0.26747196912765503, "rewards/margins": 0.058152660727500916, "rewards/rejected": -0.32562464475631714, "step": 39 }, { "epoch": 0.02440140308067714, "grad_norm": 4.886686325073242, "learning_rate": 7.990202082057563e-06, "log_odds_chosen": 0.09901432693004608, "log_odds_ratio": -0.718325138092041, "logits/chosen": -1.7683387994766235, "logits/rejected": -1.762086272239685, "logps/chosen": -2.428389072418213, "logps/rejected": -2.520949125289917, "loss": 2.8291, "nll_loss": 2.735095500946045, "rewards/accuracies": 0.5, "rewards/chosen": -0.24283888936042786, "rewards/margins": 0.009256022050976753, "rewards/rejected": -0.25209492444992065, "step": 40 }, { "epoch": 0.025011438157694067, "grad_norm": 5.254570484161377, "learning_rate": 7.989222290263319e-06, "log_odds_chosen": 0.303674578666687, "log_odds_ratio": -0.5982803702354431, "logits/chosen": -1.6201701164245605, "logits/rejected": -1.6326053142547607, "logps/chosen": -2.5766122341156006, "logps/rejected": -2.83427095413208, "loss": 2.9685, "nll_loss": 2.8483855724334717, "rewards/accuracies": 0.875, "rewards/chosen": -0.25766122341156006, "rewards/margins": 0.025765879079699516, "rewards/rejected": -0.28342708945274353, "step": 41 }, { "epoch": 0.025621473234710996, "grad_norm": 4.557460308074951, "learning_rate": 7.988242498469075e-06, "log_odds_chosen": 0.15394935011863708, "log_odds_ratio": -0.6651225686073303, "logits/chosen": -1.724156379699707, "logits/rejected": -1.6704078912734985, "logps/chosen": -2.769479274749756, "logps/rejected": -2.904977798461914, "loss": 2.848, "nll_loss": 2.9148335456848145, "rewards/accuracies": 0.5, "rewards/chosen": -0.27694791555404663, "rewards/margins": 0.013549825176596642, "rewards/rejected": -0.2904977798461914, "step": 42 }, { "epoch": 0.026231508311727925, "grad_norm": 4.412582874298096, "learning_rate": 7.987262706674831e-06, "log_odds_chosen": 0.7759913206100464, "log_odds_ratio": -0.45008185505867004, "logits/chosen": -1.539524793624878, "logits/rejected": -1.6712660789489746, "logps/chosen": -2.049626588821411, "logps/rejected": -2.7492570877075195, "loss": 2.7722, "nll_loss": 2.5450470447540283, "rewards/accuracies": 0.875, "rewards/chosen": -0.20496267080307007, "rewards/margins": 0.06996303051710129, "rewards/rejected": -0.27492570877075195, "step": 43 }, { "epoch": 0.02684154338874485, "grad_norm": 3.780503273010254, "learning_rate": 7.986282914880589e-06, "log_odds_chosen": 0.35424935817718506, "log_odds_ratio": -0.5727294087409973, "logits/chosen": -1.6226611137390137, "logits/rejected": -1.627966046333313, "logps/chosen": -2.518103837966919, "logps/rejected": -2.8492231369018555, "loss": 2.8175, "nll_loss": 2.760067939758301, "rewards/accuracies": 0.625, "rewards/chosen": -0.25181037187576294, "rewards/margins": 0.033111944794654846, "rewards/rejected": -0.2849223017692566, "step": 44 }, { "epoch": 0.02745157846576178, "grad_norm": 3.85740327835083, "learning_rate": 7.985303123086343e-06, "log_odds_chosen": 0.5616327524185181, "log_odds_ratio": -0.5375369191169739, "logits/chosen": -1.5850794315338135, "logits/rejected": -1.6567716598510742, "logps/chosen": -2.4727120399475098, "logps/rejected": -2.990933895111084, "loss": 2.8911, "nll_loss": 3.0850539207458496, "rewards/accuracies": 0.75, "rewards/chosen": -0.24727120995521545, "rewards/margins": 0.05182214826345444, "rewards/rejected": -0.2990933656692505, "step": 45 }, { "epoch": 0.02806161354277871, "grad_norm": 3.913194417953491, "learning_rate": 7.9843233312921e-06, "log_odds_chosen": 0.40601015090942383, "log_odds_ratio": -0.6042528748512268, "logits/chosen": -1.5219168663024902, "logits/rejected": -1.5409836769104004, "logps/chosen": -2.5381906032562256, "logps/rejected": -2.924741744995117, "loss": 2.7899, "nll_loss": 2.6851589679718018, "rewards/accuracies": 0.625, "rewards/chosen": -0.2538190484046936, "rewards/margins": 0.0386551134288311, "rewards/rejected": -0.2924741804599762, "step": 46 }, { "epoch": 0.02867164861979564, "grad_norm": 3.722256660461426, "learning_rate": 7.983343539497857e-06, "log_odds_chosen": -0.16783322393894196, "log_odds_ratio": -1.0263850688934326, "logits/chosen": -1.5855308771133423, "logits/rejected": -1.5826709270477295, "logps/chosen": -2.933098316192627, "logps/rejected": -2.76784610748291, "loss": 2.7855, "nll_loss": 3.219956398010254, "rewards/accuracies": 0.375, "rewards/chosen": -0.2933098375797272, "rewards/margins": -0.016525208950042725, "rewards/rejected": -0.27678462862968445, "step": 47 }, { "epoch": 0.029281683696812565, "grad_norm": 6.008697986602783, "learning_rate": 7.982363747703613e-06, "log_odds_chosen": -0.40763503313064575, "log_odds_ratio": -0.9871083498001099, "logits/chosen": -1.5025759935379028, "logits/rejected": -1.3963420391082764, "logps/chosen": -2.6028225421905518, "logps/rejected": -2.218156337738037, "loss": 2.7892, "nll_loss": 2.802739143371582, "rewards/accuracies": 0.375, "rewards/chosen": -0.2602822482585907, "rewards/margins": -0.038466595113277435, "rewards/rejected": -0.22181564569473267, "step": 48 }, { "epoch": 0.029891718773829495, "grad_norm": 4.259695053100586, "learning_rate": 7.98138395590937e-06, "log_odds_chosen": -0.06315270811319351, "log_odds_ratio": -0.7579566240310669, "logits/chosen": -1.5127215385437012, "logits/rejected": -1.5460669994354248, "logps/chosen": -2.459181785583496, "logps/rejected": -2.4068050384521484, "loss": 2.5684, "nll_loss": 2.6148412227630615, "rewards/accuracies": 0.5, "rewards/chosen": -0.2459181845188141, "rewards/margins": -0.005237681791186333, "rewards/rejected": -0.2406804859638214, "step": 49 }, { "epoch": 0.030501753850846424, "grad_norm": 3.922170400619507, "learning_rate": 7.980404164115125e-06, "log_odds_chosen": 0.44673264026641846, "log_odds_ratio": -0.5725299715995789, "logits/chosen": -1.4952332973480225, "logits/rejected": -1.4938132762908936, "logps/chosen": -2.398581027984619, "logps/rejected": -2.8218979835510254, "loss": 2.6613, "nll_loss": 2.5619168281555176, "rewards/accuracies": 0.75, "rewards/chosen": -0.23985812067985535, "rewards/margins": 0.04233168810606003, "rewards/rejected": -0.2821897864341736, "step": 50 }, { "epoch": 0.031111788927863353, "grad_norm": 4.366194248199463, "learning_rate": 7.979424372320882e-06, "log_odds_chosen": 0.29021596908569336, "log_odds_ratio": -0.5822319984436035, "logits/chosen": -1.2190349102020264, "logits/rejected": -1.251035451889038, "logps/chosen": -2.0677552223205566, "logps/rejected": -2.322950839996338, "loss": 2.5686, "nll_loss": 2.245218515396118, "rewards/accuracies": 0.75, "rewards/chosen": -0.20677554607391357, "rewards/margins": 0.02551952935755253, "rewards/rejected": -0.23229506611824036, "step": 51 }, { "epoch": 0.03172182400488028, "grad_norm": 4.39836311340332, "learning_rate": 7.978444580526638e-06, "log_odds_chosen": 0.07649998366832733, "log_odds_ratio": -0.727651834487915, "logits/chosen": -1.5063222646713257, "logits/rejected": -1.4645870923995972, "logps/chosen": -2.492109775543213, "logps/rejected": -2.562729597091675, "loss": 2.6313, "nll_loss": 2.6865458488464355, "rewards/accuracies": 0.5, "rewards/chosen": -0.24921095371246338, "rewards/margins": 0.007061982527375221, "rewards/rejected": -0.25627297163009644, "step": 52 }, { "epoch": 0.03233185908189721, "grad_norm": 6.568082809448242, "learning_rate": 7.977464788732394e-06, "log_odds_chosen": 0.31785693764686584, "log_odds_ratio": -0.6573225855827332, "logits/chosen": -1.3875250816345215, "logits/rejected": -1.45305335521698, "logps/chosen": -1.630338191986084, "logps/rejected": -1.9214571714401245, "loss": 2.5415, "nll_loss": 2.3744678497314453, "rewards/accuracies": 0.625, "rewards/chosen": -0.16303381323814392, "rewards/margins": 0.029111888259649277, "rewards/rejected": -0.1921457052230835, "step": 53 }, { "epoch": 0.03294189415891414, "grad_norm": 4.731566429138184, "learning_rate": 7.97648499693815e-06, "log_odds_chosen": -0.2694026529788971, "log_odds_ratio": -0.9048912525177002, "logits/chosen": -1.3883681297302246, "logits/rejected": -1.3793829679489136, "logps/chosen": -2.373121976852417, "logps/rejected": -2.125659227371216, "loss": 2.7512, "nll_loss": 2.641587257385254, "rewards/accuracies": 0.375, "rewards/chosen": -0.2373122125864029, "rewards/margins": -0.024746285751461983, "rewards/rejected": -0.21256591379642487, "step": 54 }, { "epoch": 0.03355192923593107, "grad_norm": 3.4012396335601807, "learning_rate": 7.975505205143906e-06, "log_odds_chosen": -0.039650991559028625, "log_odds_ratio": -0.7518805265426636, "logits/chosen": -1.1694560050964355, "logits/rejected": -1.2061870098114014, "logps/chosen": -2.2472307682037354, "logps/rejected": -2.2080883979797363, "loss": 2.5353, "nll_loss": 2.404977798461914, "rewards/accuracies": 0.5, "rewards/chosen": -0.22472310066223145, "rewards/margins": -0.003914257511496544, "rewards/rejected": -0.22080881893634796, "step": 55 }, { "epoch": 0.034161964312948, "grad_norm": 4.106648921966553, "learning_rate": 7.974525413349662e-06, "log_odds_chosen": 0.12242823839187622, "log_odds_ratio": -0.7872140407562256, "logits/chosen": -1.3651669025421143, "logits/rejected": -1.3511990308761597, "logps/chosen": -2.2390525341033936, "logps/rejected": -2.341012954711914, "loss": 3.0069, "nll_loss": 2.591708183288574, "rewards/accuracies": 0.5, "rewards/chosen": -0.2239052653312683, "rewards/margins": 0.010196024551987648, "rewards/rejected": -0.23410126566886902, "step": 56 }, { "epoch": 0.034771999389964926, "grad_norm": 4.097082138061523, "learning_rate": 7.973545621555418e-06, "log_odds_chosen": -0.06939883530139923, "log_odds_ratio": -0.795678973197937, "logits/chosen": -1.3446311950683594, "logits/rejected": -1.4014421701431274, "logps/chosen": -2.2653074264526367, "logps/rejected": -2.1858930587768555, "loss": 2.4205, "nll_loss": 2.257880210876465, "rewards/accuracies": 0.625, "rewards/chosen": -0.2265307605266571, "rewards/margins": -0.007941421121358871, "rewards/rejected": -0.21858933568000793, "step": 57 }, { "epoch": 0.03538203446698185, "grad_norm": 3.315774917602539, "learning_rate": 7.972565829761176e-06, "log_odds_chosen": 0.2559330463409424, "log_odds_ratio": -0.6114947199821472, "logits/chosen": -1.364756464958191, "logits/rejected": -1.3954861164093018, "logps/chosen": -2.110363245010376, "logps/rejected": -2.354548931121826, "loss": 2.4236, "nll_loss": 2.544058084487915, "rewards/accuracies": 0.625, "rewards/chosen": -0.2110363245010376, "rewards/margins": 0.02441856451332569, "rewards/rejected": -0.23545488715171814, "step": 58 }, { "epoch": 0.03599206954399878, "grad_norm": 4.751431465148926, "learning_rate": 7.971586037966932e-06, "log_odds_chosen": 0.6239976286888123, "log_odds_ratio": -0.47841188311576843, "logits/chosen": -1.1394766569137573, "logits/rejected": -1.3215150833129883, "logps/chosen": -1.9435861110687256, "logps/rejected": -2.4841809272766113, "loss": 2.3162, "nll_loss": 2.0663418769836426, "rewards/accuracies": 0.875, "rewards/chosen": -0.19435861706733704, "rewards/margins": 0.0540594756603241, "rewards/rejected": -0.24841807782649994, "step": 59 }, { "epoch": 0.03660210462101571, "grad_norm": 5.184677600860596, "learning_rate": 7.970606246172688e-06, "log_odds_chosen": -0.47061365842819214, "log_odds_ratio": -1.0490567684173584, "logits/chosen": -1.4044352769851685, "logits/rejected": -1.2312325239181519, "logps/chosen": -2.3443167209625244, "logps/rejected": -1.9689701795578003, "loss": 2.4844, "nll_loss": 2.4992871284484863, "rewards/accuracies": 0.25, "rewards/chosen": -0.2344316840171814, "rewards/margins": -0.03753465414047241, "rewards/rejected": -0.19689702987670898, "step": 60 }, { "epoch": 0.03721213969803264, "grad_norm": 3.716001510620117, "learning_rate": 7.969626454378445e-06, "log_odds_chosen": -0.026183515787124634, "log_odds_ratio": -0.7533769011497498, "logits/chosen": -1.3900272846221924, "logits/rejected": -1.4365661144256592, "logps/chosen": -2.1641881465911865, "logps/rejected": -2.132979393005371, "loss": 2.4634, "nll_loss": 2.4115209579467773, "rewards/accuracies": 0.625, "rewards/chosen": -0.2164188027381897, "rewards/margins": -0.003120873123407364, "rewards/rejected": -0.21329793334007263, "step": 61 }, { "epoch": 0.037822174775049566, "grad_norm": 4.811389923095703, "learning_rate": 7.9686466625842e-06, "log_odds_chosen": -0.1346389204263687, "log_odds_ratio": -0.8041808009147644, "logits/chosen": -1.459594488143921, "logits/rejected": -1.582560420036316, "logps/chosen": -1.9791289567947388, "logps/rejected": -1.8483132123947144, "loss": 2.3321, "nll_loss": 2.3060641288757324, "rewards/accuracies": 0.375, "rewards/chosen": -0.19791290163993835, "rewards/margins": -0.013081575743854046, "rewards/rejected": -0.18483132123947144, "step": 62 }, { "epoch": 0.038432209852066496, "grad_norm": 4.5236077308654785, "learning_rate": 7.967666870789957e-06, "log_odds_chosen": -0.03167366608977318, "log_odds_ratio": -0.7652387619018555, "logits/chosen": -1.3528778553009033, "logits/rejected": -1.365309476852417, "logps/chosen": -2.1982803344726562, "logps/rejected": -2.1567459106445312, "loss": 2.3463, "nll_loss": 2.260676622390747, "rewards/accuracies": 0.375, "rewards/chosen": -0.21982800960540771, "rewards/margins": -0.004153449088335037, "rewards/rejected": -0.21567456424236298, "step": 63 }, { "epoch": 0.039042244929083425, "grad_norm": 4.283431529998779, "learning_rate": 7.966687078995713e-06, "log_odds_chosen": 0.37575119733810425, "log_odds_ratio": -0.6103765964508057, "logits/chosen": -1.3389976024627686, "logits/rejected": -1.3609977960586548, "logps/chosen": -2.2598037719726562, "logps/rejected": -2.58219838142395, "loss": 2.2897, "nll_loss": 2.421389579772949, "rewards/accuracies": 0.75, "rewards/chosen": -0.22598038613796234, "rewards/margins": 0.032239459455013275, "rewards/rejected": -0.258219838142395, "step": 64 }, { "epoch": 0.03965228000610035, "grad_norm": 4.400821685791016, "learning_rate": 7.965707287201469e-06, "log_odds_chosen": 0.4571629762649536, "log_odds_ratio": -0.5272305607795715, "logits/chosen": -1.3002351522445679, "logits/rejected": -1.3836455345153809, "logps/chosen": -2.1592748165130615, "logps/rejected": -2.562326192855835, "loss": 2.3559, "nll_loss": 2.182344675064087, "rewards/accuracies": 0.75, "rewards/chosen": -0.21592749655246735, "rewards/margins": 0.04030515253543854, "rewards/rejected": -0.2562326490879059, "step": 65 }, { "epoch": 0.04026231508311728, "grad_norm": 5.532351493835449, "learning_rate": 7.964727495407225e-06, "log_odds_chosen": -0.039681509137153625, "log_odds_ratio": -0.7964754104614258, "logits/chosen": -1.2221866846084595, "logits/rejected": -1.3599151372909546, "logps/chosen": -2.0179126262664795, "logps/rejected": -1.9762060642242432, "loss": 2.2804, "nll_loss": 2.0664427280426025, "rewards/accuracies": 0.375, "rewards/chosen": -0.20179128646850586, "rewards/margins": -0.004170666448771954, "rewards/rejected": -0.19762061536312103, "step": 66 }, { "epoch": 0.040872350160134206, "grad_norm": 4.75457763671875, "learning_rate": 7.963747703612983e-06, "log_odds_chosen": 0.028808627277612686, "log_odds_ratio": -0.7059023380279541, "logits/chosen": -1.4231452941894531, "logits/rejected": -1.3008105754852295, "logps/chosen": -1.8770134449005127, "logps/rejected": -1.8941420316696167, "loss": 2.0598, "nll_loss": 2.1053647994995117, "rewards/accuracies": 0.5, "rewards/chosen": -0.18770134449005127, "rewards/margins": 0.001712876372039318, "rewards/rejected": -0.18941423296928406, "step": 67 }, { "epoch": 0.041482385237151136, "grad_norm": 5.058011054992676, "learning_rate": 7.962767911818737e-06, "log_odds_chosen": 0.358126163482666, "log_odds_ratio": -0.5618312358856201, "logits/chosen": -1.1888004541397095, "logits/rejected": -1.2705730199813843, "logps/chosen": -1.741358757019043, "logps/rejected": -2.0336971282958984, "loss": 2.1846, "nll_loss": 1.9060081243515015, "rewards/accuracies": 0.625, "rewards/chosen": -0.17413589358329773, "rewards/margins": 0.029233839362859726, "rewards/rejected": -0.20336972177028656, "step": 68 }, { "epoch": 0.042092420314168065, "grad_norm": 4.735241889953613, "learning_rate": 7.961788120024493e-06, "log_odds_chosen": -0.26055029034614563, "log_odds_ratio": -0.8521256446838379, "logits/chosen": -1.2492648363113403, "logits/rejected": -1.2544978857040405, "logps/chosen": -1.9552260637283325, "logps/rejected": -1.7308744192123413, "loss": 2.2126, "nll_loss": 2.160418748855591, "rewards/accuracies": 0.5, "rewards/chosen": -0.19552260637283325, "rewards/margins": -0.022435162216424942, "rewards/rejected": -0.1730874478816986, "step": 69 }, { "epoch": 0.042702455391184994, "grad_norm": 5.780309677124023, "learning_rate": 7.960808328230251e-06, "log_odds_chosen": 0.10120214521884918, "log_odds_ratio": -0.6884282231330872, "logits/chosen": -1.3410685062408447, "logits/rejected": -1.4450104236602783, "logps/chosen": -1.8714523315429688, "logps/rejected": -1.931703805923462, "loss": 2.2248, "nll_loss": 1.9808084964752197, "rewards/accuracies": 0.5, "rewards/chosen": -0.18714523315429688, "rewards/margins": 0.006025145761668682, "rewards/rejected": -0.19317038357257843, "step": 70 }, { "epoch": 0.043312490468201924, "grad_norm": 4.2230916023254395, "learning_rate": 7.959828536436007e-06, "log_odds_chosen": -0.27966341376304626, "log_odds_ratio": -1.0991920232772827, "logits/chosen": -1.2091799974441528, "logits/rejected": -1.3184674978256226, "logps/chosen": -2.189871311187744, "logps/rejected": -1.9596307277679443, "loss": 2.6729, "nll_loss": 2.575623035430908, "rewards/accuracies": 0.5, "rewards/chosen": -0.2189871221780777, "rewards/margins": -0.0230240561068058, "rewards/rejected": -0.195963054895401, "step": 71 }, { "epoch": 0.04392252554521885, "grad_norm": 5.609494209289551, "learning_rate": 7.958848744641764e-06, "log_odds_chosen": 0.0036159157752990723, "log_odds_ratio": -0.7261074781417847, "logits/chosen": -1.2694283723831177, "logits/rejected": -1.291975975036621, "logps/chosen": -1.915067195892334, "logps/rejected": -1.9473167657852173, "loss": 2.0461, "nll_loss": 2.0218207836151123, "rewards/accuracies": 0.5, "rewards/chosen": -0.19150671362876892, "rewards/margins": 0.0032249651849269867, "rewards/rejected": -0.1947316825389862, "step": 72 }, { "epoch": 0.044532560622235776, "grad_norm": 4.586228847503662, "learning_rate": 7.95786895284752e-06, "log_odds_chosen": -0.04085252434015274, "log_odds_ratio": -0.727353036403656, "logits/chosen": -1.4137481451034546, "logits/rejected": -1.4008210897445679, "logps/chosen": -1.9394521713256836, "logps/rejected": -1.9049904346466064, "loss": 2.1429, "nll_loss": 2.232776641845703, "rewards/accuracies": 0.375, "rewards/chosen": -0.19394519925117493, "rewards/margins": -0.003446178510785103, "rewards/rejected": -0.19049903750419617, "step": 73 }, { "epoch": 0.045142595699252705, "grad_norm": 5.6986846923828125, "learning_rate": 7.956889161053276e-06, "log_odds_chosen": 0.3018708825111389, "log_odds_ratio": -0.5890268683433533, "logits/chosen": -1.1828252077102661, "logits/rejected": -1.1637895107269287, "logps/chosen": -1.4609389305114746, "logps/rejected": -1.7102141380310059, "loss": 2.1216, "nll_loss": 1.6565814018249512, "rewards/accuracies": 0.875, "rewards/chosen": -0.14609387516975403, "rewards/margins": 0.024927537888288498, "rewards/rejected": -0.17102141678333282, "step": 74 }, { "epoch": 0.045752630776269634, "grad_norm": 8.894720077514648, "learning_rate": 7.955909369259032e-06, "log_odds_chosen": -0.30915597081184387, "log_odds_ratio": -0.8913680911064148, "logits/chosen": -1.12887704372406, "logits/rejected": -1.2830572128295898, "logps/chosen": -1.902513027191162, "logps/rejected": -1.659989356994629, "loss": 1.9487, "nll_loss": 2.1274526119232178, "rewards/accuracies": 0.125, "rewards/chosen": -0.19025132060050964, "rewards/margins": -0.02425236999988556, "rewards/rejected": -0.16599895060062408, "step": 75 }, { "epoch": 0.046362665853286564, "grad_norm": 9.71774673461914, "learning_rate": 7.954929577464788e-06, "log_odds_chosen": -0.049925774335861206, "log_odds_ratio": -0.7761548161506653, "logits/chosen": -1.3998181819915771, "logits/rejected": -1.4376546144485474, "logps/chosen": -2.1567344665527344, "logps/rejected": -2.11150860786438, "loss": 2.0391, "nll_loss": 2.4180171489715576, "rewards/accuracies": 0.5, "rewards/chosen": -0.21567344665527344, "rewards/margins": -0.004522579722106457, "rewards/rejected": -0.2111508697271347, "step": 76 }, { "epoch": 0.04697270093030349, "grad_norm": 5.252336025238037, "learning_rate": 7.953949785670544e-06, "log_odds_chosen": -0.07995085418224335, "log_odds_ratio": -0.7886885404586792, "logits/chosen": -1.2583385705947876, "logits/rejected": -1.296952486038208, "logps/chosen": -1.6325832605361938, "logps/rejected": -1.572667121887207, "loss": 2.1321, "nll_loss": 1.8895819187164307, "rewards/accuracies": 0.5, "rewards/chosen": -0.16325831413269043, "rewards/margins": -0.005991615355014801, "rewards/rejected": -0.15726670622825623, "step": 77 }, { "epoch": 0.04758273600732042, "grad_norm": 7.65501070022583, "learning_rate": 7.952969993876302e-06, "log_odds_chosen": 0.7816745042800903, "log_odds_ratio": -0.4380966126918793, "logits/chosen": -1.1542704105377197, "logits/rejected": -1.4140818119049072, "logps/chosen": -1.4402070045471191, "logps/rejected": -2.0449185371398926, "loss": 1.8906, "nll_loss": 1.8038228750228882, "rewards/accuracies": 0.875, "rewards/chosen": -0.1440207064151764, "rewards/margins": 0.060471151024103165, "rewards/rejected": -0.20449188351631165, "step": 78 }, { "epoch": 0.04819277108433735, "grad_norm": 11.718932151794434, "learning_rate": 7.951990202082056e-06, "log_odds_chosen": 0.07527816295623779, "log_odds_ratio": -0.8441392183303833, "logits/chosen": -1.2670788764953613, "logits/rejected": -1.361000418663025, "logps/chosen": -1.9808706045150757, "logps/rejected": -1.9847910404205322, "loss": 1.8852, "nll_loss": 1.7946054935455322, "rewards/accuracies": 0.875, "rewards/chosen": -0.19808706641197205, "rewards/margins": 0.00039203278720378876, "rewards/rejected": -0.19847910106182098, "step": 79 }, { "epoch": 0.04880280616135428, "grad_norm": 17.097070693969727, "learning_rate": 7.951010410287813e-06, "log_odds_chosen": 0.672618567943573, "log_odds_ratio": -0.4590175151824951, "logits/chosen": -1.3496650457382202, "logits/rejected": -1.4166643619537354, "logps/chosen": -1.7727174758911133, "logps/rejected": -2.3594095706939697, "loss": 2.1881, "nll_loss": 1.9306244850158691, "rewards/accuracies": 0.875, "rewards/chosen": -0.1772717386484146, "rewards/margins": 0.05866921693086624, "rewards/rejected": -0.23594096302986145, "step": 80 }, { "epoch": 0.049412841238371204, "grad_norm": 7.664464473724365, "learning_rate": 7.95003061849357e-06, "log_odds_chosen": 0.4316689670085907, "log_odds_ratio": -0.6630246043205261, "logits/chosen": -1.3219586610794067, "logits/rejected": -1.4517977237701416, "logps/chosen": -1.743274450302124, "logps/rejected": -2.14032244682312, "loss": 2.0291, "nll_loss": 2.0185623168945312, "rewards/accuracies": 0.625, "rewards/chosen": -0.17432743310928345, "rewards/margins": 0.0397048145532608, "rewards/rejected": -0.21403226256370544, "step": 81 }, { "epoch": 0.05002287631538813, "grad_norm": 10.077680587768555, "learning_rate": 7.949050826699326e-06, "log_odds_chosen": -0.2831495702266693, "log_odds_ratio": -0.9015881419181824, "logits/chosen": -1.164992332458496, "logits/rejected": -1.1200942993164062, "logps/chosen": -2.0156757831573486, "logps/rejected": -1.7391074895858765, "loss": 1.8946, "nll_loss": 1.9173997640609741, "rewards/accuracies": 0.375, "rewards/chosen": -0.20156759023666382, "rewards/margins": -0.027656830847263336, "rewards/rejected": -0.17391075193881989, "step": 82 }, { "epoch": 0.05063291139240506, "grad_norm": 6.838299751281738, "learning_rate": 7.948071034905083e-06, "log_odds_chosen": 0.402604877948761, "log_odds_ratio": -0.5459954142570496, "logits/chosen": -1.2385890483856201, "logits/rejected": -1.4277836084365845, "logps/chosen": -1.358898401260376, "logps/rejected": -1.6811294555664062, "loss": 1.9918, "nll_loss": 1.8338841199874878, "rewards/accuracies": 0.75, "rewards/chosen": -0.13588984310626984, "rewards/margins": 0.03222310170531273, "rewards/rejected": -0.16811296343803406, "step": 83 }, { "epoch": 0.05124294646942199, "grad_norm": 10.883010864257812, "learning_rate": 7.947091243110839e-06, "log_odds_chosen": -0.11794321984052658, "log_odds_ratio": -0.792720377445221, "logits/chosen": -1.407608985900879, "logits/rejected": -1.400707721710205, "logps/chosen": -1.6874555349349976, "logps/rejected": -1.5821118354797363, "loss": 1.8972, "nll_loss": 1.908363938331604, "rewards/accuracies": 0.375, "rewards/chosen": -0.16874554753303528, "rewards/margins": -0.0105343759059906, "rewards/rejected": -0.15821118652820587, "step": 84 }, { "epoch": 0.05185298154643892, "grad_norm": 10.601635932922363, "learning_rate": 7.946111451316595e-06, "log_odds_chosen": 0.06109200417995453, "log_odds_ratio": -0.7478256225585938, "logits/chosen": -1.4077900648117065, "logits/rejected": -1.5681629180908203, "logps/chosen": -1.6189345121383667, "logps/rejected": -1.6796839237213135, "loss": 1.8825, "nll_loss": 1.861257791519165, "rewards/accuracies": 0.5, "rewards/chosen": -0.16189345717430115, "rewards/margins": 0.006074944511055946, "rewards/rejected": -0.16796839237213135, "step": 85 }, { "epoch": 0.05246301662345585, "grad_norm": 8.941997528076172, "learning_rate": 7.945131659522351e-06, "log_odds_chosen": 0.5111546516418457, "log_odds_ratio": -0.520841658115387, "logits/chosen": -1.2784653902053833, "logits/rejected": -1.3553466796875, "logps/chosen": -1.2015265226364136, "logps/rejected": -1.6046687364578247, "loss": 1.7285, "nll_loss": 1.43333899974823, "rewards/accuracies": 0.625, "rewards/chosen": -0.12015265971422195, "rewards/margins": 0.0403142087161541, "rewards/rejected": -0.16046687960624695, "step": 86 }, { "epoch": 0.05307305170047278, "grad_norm": 10.887710571289062, "learning_rate": 7.944151867728107e-06, "log_odds_chosen": -0.022439345717430115, "log_odds_ratio": -0.828036904335022, "logits/chosen": -1.4123868942260742, "logits/rejected": -1.4701398611068726, "logps/chosen": -1.6596662998199463, "logps/rejected": -1.6886999607086182, "loss": 1.8219, "nll_loss": 1.8986363410949707, "rewards/accuracies": 0.25, "rewards/chosen": -0.16596664488315582, "rewards/margins": 0.0029033608734607697, "rewards/rejected": -0.1688700020313263, "step": 87 }, { "epoch": 0.0536830867774897, "grad_norm": 3.606444835662842, "learning_rate": 7.943172075933863e-06, "log_odds_chosen": 0.2735767364501953, "log_odds_ratio": -0.6397613286972046, "logits/chosen": -1.288944125175476, "logits/rejected": -1.3072487115859985, "logps/chosen": -1.292909860610962, "logps/rejected": -1.506460189819336, "loss": 1.6703, "nll_loss": 1.3779218196868896, "rewards/accuracies": 0.375, "rewards/chosen": -0.12929099798202515, "rewards/margins": 0.021355025470256805, "rewards/rejected": -0.15064601600170135, "step": 88 }, { "epoch": 0.05429312185450663, "grad_norm": 10.128061294555664, "learning_rate": 7.942192284139621e-06, "log_odds_chosen": 0.45963555574417114, "log_odds_ratio": -0.587836742401123, "logits/chosen": -1.3607900142669678, "logits/rejected": -1.812579870223999, "logps/chosen": -1.3821380138397217, "logps/rejected": -1.7606815099716187, "loss": 1.6549, "nll_loss": 1.5053067207336426, "rewards/accuracies": 0.625, "rewards/chosen": -0.13821381330490112, "rewards/margins": 0.037854354828596115, "rewards/rejected": -0.17606815695762634, "step": 89 }, { "epoch": 0.05490315693152356, "grad_norm": 8.696532249450684, "learning_rate": 7.941212492345375e-06, "log_odds_chosen": 0.4536903500556946, "log_odds_ratio": -0.5717962980270386, "logits/chosen": -0.8229621648788452, "logits/rejected": -1.4021133184432983, "logps/chosen": -0.9888061881065369, "logps/rejected": -1.3167715072631836, "loss": 1.6313, "nll_loss": 1.1250265836715698, "rewards/accuracies": 0.625, "rewards/chosen": -0.09888062626123428, "rewards/margins": 0.03279653191566467, "rewards/rejected": -0.13167715072631836, "step": 90 }, { "epoch": 0.05551319200854049, "grad_norm": 6.239406108856201, "learning_rate": 7.940232700551132e-06, "log_odds_chosen": 0.37124860286712646, "log_odds_ratio": -0.6015422344207764, "logits/chosen": -1.7716925144195557, "logits/rejected": -1.9316816329956055, "logps/chosen": -1.7486664056777954, "logps/rejected": -2.046799898147583, "loss": 1.7755, "nll_loss": 1.8143548965454102, "rewards/accuracies": 0.625, "rewards/chosen": -0.17486663162708282, "rewards/margins": 0.029813360422849655, "rewards/rejected": -0.20467999577522278, "step": 91 }, { "epoch": 0.05612322708555742, "grad_norm": 10.345046043395996, "learning_rate": 7.93925290875689e-06, "log_odds_chosen": 0.05882374942302704, "log_odds_ratio": -0.7273902893066406, "logits/chosen": -1.6594091653823853, "logits/rejected": -1.4758095741271973, "logps/chosen": -1.3565313816070557, "logps/rejected": -1.3781466484069824, "loss": 1.5223, "nll_loss": 1.5218926668167114, "rewards/accuracies": 0.375, "rewards/chosen": -0.13565313816070557, "rewards/margins": 0.0021615270525217056, "rewards/rejected": -0.13781467080116272, "step": 92 }, { "epoch": 0.05673326216257435, "grad_norm": 11.832054138183594, "learning_rate": 7.938273116962646e-06, "log_odds_chosen": -0.06510807573795319, "log_odds_ratio": -0.7621272802352905, "logits/chosen": -1.1932748556137085, "logits/rejected": -1.3116530179977417, "logps/chosen": -1.1791833639144897, "logps/rejected": -1.1293659210205078, "loss": 1.5223, "nll_loss": 1.384325385093689, "rewards/accuracies": 0.5, "rewards/chosen": -0.11791832745075226, "rewards/margins": -0.00498172827064991, "rewards/rejected": -0.1129366010427475, "step": 93 }, { "epoch": 0.05734329723959128, "grad_norm": 14.296309471130371, "learning_rate": 7.937293325168402e-06, "log_odds_chosen": 0.590800404548645, "log_odds_ratio": -0.49400222301483154, "logits/chosen": -1.5376043319702148, "logits/rejected": -1.8132736682891846, "logps/chosen": -1.3493921756744385, "logps/rejected": -1.8335559368133545, "loss": 1.5335, "nll_loss": 1.7684797048568726, "rewards/accuracies": 0.75, "rewards/chosen": -0.13493920862674713, "rewards/margins": 0.048416391015052795, "rewards/rejected": -0.18335559964179993, "step": 94 }, { "epoch": 0.05795333231660821, "grad_norm": 9.109787940979004, "learning_rate": 7.936313533374158e-06, "log_odds_chosen": 0.5508210062980652, "log_odds_ratio": -0.5192461609840393, "logits/chosen": -1.3365455865859985, "logits/rejected": -1.8342344760894775, "logps/chosen": -1.218651294708252, "logps/rejected": -1.6270637512207031, "loss": 1.5295, "nll_loss": 1.583866834640503, "rewards/accuracies": 0.75, "rewards/chosen": -0.12186513841152191, "rewards/margins": 0.040841244161129, "rewards/rejected": -0.1627063751220703, "step": 95 }, { "epoch": 0.05856336739362513, "grad_norm": 5.668200492858887, "learning_rate": 7.935333741579914e-06, "log_odds_chosen": 0.0932328999042511, "log_odds_ratio": -0.6908342242240906, "logits/chosen": -1.3649669885635376, "logits/rejected": -1.498496413230896, "logps/chosen": -1.375852108001709, "logps/rejected": -1.4324169158935547, "loss": 1.5519, "nll_loss": 1.4047319889068604, "rewards/accuracies": 0.5, "rewards/chosen": -0.13758520781993866, "rewards/margins": 0.005656483117491007, "rewards/rejected": -0.14324170351028442, "step": 96 }, { "epoch": 0.05917340247064206, "grad_norm": 6.812465667724609, "learning_rate": 7.93435394978567e-06, "log_odds_chosen": -0.1675167977809906, "log_odds_ratio": -0.8218705654144287, "logits/chosen": -1.3037850856781006, "logits/rejected": -1.4547948837280273, "logps/chosen": -1.4203855991363525, "logps/rejected": -1.289433479309082, "loss": 1.6233, "nll_loss": 1.7181905508041382, "rewards/accuracies": 0.375, "rewards/chosen": -0.14203856885433197, "rewards/margins": -0.013095206581056118, "rewards/rejected": -0.12894335389137268, "step": 97 }, { "epoch": 0.05978343754765899, "grad_norm": 4.775881290435791, "learning_rate": 7.933374157991426e-06, "log_odds_chosen": 0.7385382056236267, "log_odds_ratio": -0.4135254919528961, "logits/chosen": -1.2747175693511963, "logits/rejected": -1.7494773864746094, "logps/chosen": -1.024376392364502, "logps/rejected": -1.5262428522109985, "loss": 1.4699, "nll_loss": 1.2399548292160034, "rewards/accuracies": 1.0, "rewards/chosen": -0.10243763029575348, "rewards/margins": 0.050186652690172195, "rewards/rejected": -0.15262427926063538, "step": 98 }, { "epoch": 0.06039347262467592, "grad_norm": 7.374175548553467, "learning_rate": 7.932394366197182e-06, "log_odds_chosen": 0.7547495365142822, "log_odds_ratio": -0.4355051517486572, "logits/chosen": -1.210670828819275, "logits/rejected": -1.6727955341339111, "logps/chosen": -1.3167270421981812, "logps/rejected": -1.8686121702194214, "loss": 1.5398, "nll_loss": 1.377557635307312, "rewards/accuracies": 0.875, "rewards/chosen": -0.1316727101802826, "rewards/margins": 0.05518850311636925, "rewards/rejected": -0.18686121702194214, "step": 99 }, { "epoch": 0.06100350770169285, "grad_norm": 3.0302157402038574, "learning_rate": 7.93141457440294e-06, "log_odds_chosen": 0.2834092974662781, "log_odds_ratio": -0.6748322248458862, "logits/chosen": -1.222381591796875, "logits/rejected": -1.5123062133789062, "logps/chosen": -1.3266156911849976, "logps/rejected": -1.4738701581954956, "loss": 1.5679, "nll_loss": 1.5869672298431396, "rewards/accuracies": 0.5, "rewards/chosen": -0.13266156613826752, "rewards/margins": 0.014725442975759506, "rewards/rejected": -0.14738701283931732, "step": 100 }, { "epoch": 0.06161354277870978, "grad_norm": 3.2645621299743652, "learning_rate": 7.930434782608696e-06, "log_odds_chosen": 0.29880842566490173, "log_odds_ratio": -0.8268628120422363, "logits/chosen": -1.2419122457504272, "logits/rejected": -1.3538109064102173, "logps/chosen": -1.5399177074432373, "logps/rejected": -1.7740072011947632, "loss": 1.5922, "nll_loss": 1.4716850519180298, "rewards/accuracies": 0.5, "rewards/chosen": -0.15399178862571716, "rewards/margins": 0.023408953100442886, "rewards/rejected": -0.17740073800086975, "step": 101 }, { "epoch": 0.06222357785572671, "grad_norm": 3.975478410720825, "learning_rate": 7.92945499081445e-06, "log_odds_chosen": 0.18240776658058167, "log_odds_ratio": -0.6665881872177124, "logits/chosen": -1.4161734580993652, "logits/rejected": -1.7006182670593262, "logps/chosen": -1.6778274774551392, "logps/rejected": -1.8160609006881714, "loss": 1.6728, "nll_loss": 1.5527294874191284, "rewards/accuracies": 0.625, "rewards/chosen": -0.1677827537059784, "rewards/margins": 0.013823338784277439, "rewards/rejected": -0.18160608410835266, "step": 102 }, { "epoch": 0.06283361293274363, "grad_norm": 6.83634614944458, "learning_rate": 7.928475199020208e-06, "log_odds_chosen": 0.29363539814949036, "log_odds_ratio": -0.7778407335281372, "logits/chosen": -1.3369272947311401, "logits/rejected": -1.6812334060668945, "logps/chosen": -1.6738427877426147, "logps/rejected": -1.8826217651367188, "loss": 1.551, "nll_loss": 1.546211838722229, "rewards/accuracies": 0.625, "rewards/chosen": -0.16738426685333252, "rewards/margins": 0.020877916365861893, "rewards/rejected": -0.1882621943950653, "step": 103 }, { "epoch": 0.06344364800976056, "grad_norm": 5.60284948348999, "learning_rate": 7.927495407225965e-06, "log_odds_chosen": 0.010318636894226074, "log_odds_ratio": -0.7753735780715942, "logits/chosen": -1.483638048171997, "logits/rejected": -1.6760648488998413, "logps/chosen": -1.4451056718826294, "logps/rejected": -1.409006118774414, "loss": 1.5723, "nll_loss": 1.5122895240783691, "rewards/accuracies": 0.5, "rewards/chosen": -0.14451056718826294, "rewards/margins": -0.003609958104789257, "rewards/rejected": -0.1409006118774414, "step": 104 }, { "epoch": 0.06405368308677749, "grad_norm": 4.015722751617432, "learning_rate": 7.92651561543172e-06, "log_odds_chosen": 0.0026471540331840515, "log_odds_ratio": -0.7124950885772705, "logits/chosen": -1.4702478647232056, "logits/rejected": -1.4458560943603516, "logps/chosen": -1.1648403406143188, "logps/rejected": -1.1744709014892578, "loss": 1.5982, "nll_loss": 1.4557287693023682, "rewards/accuracies": 0.625, "rewards/chosen": -0.11648403853178024, "rewards/margins": 0.0009630480781197548, "rewards/rejected": -0.11744708567857742, "step": 105 }, { "epoch": 0.06466371816379442, "grad_norm": 4.12414026260376, "learning_rate": 7.925535823637477e-06, "log_odds_chosen": 0.24323394894599915, "log_odds_ratio": -0.6948925256729126, "logits/chosen": -1.0923621654510498, "logits/rejected": -1.245092749595642, "logps/chosen": -1.2791919708251953, "logps/rejected": -1.4295234680175781, "loss": 1.5691, "nll_loss": 1.3563125133514404, "rewards/accuracies": 0.5, "rewards/chosen": -0.12791921198368073, "rewards/margins": 0.015033135190606117, "rewards/rejected": -0.1429523378610611, "step": 106 }, { "epoch": 0.06527375324081135, "grad_norm": 3.0148086547851562, "learning_rate": 7.924556031843233e-06, "log_odds_chosen": -0.39903467893600464, "log_odds_ratio": -0.9648972749710083, "logits/chosen": -1.2538715600967407, "logits/rejected": -1.3013038635253906, "logps/chosen": -1.4473379850387573, "logps/rejected": -1.1158884763717651, "loss": 1.6656, "nll_loss": 1.635825753211975, "rewards/accuracies": 0.25, "rewards/chosen": -0.14473380148410797, "rewards/margins": -0.03314495459198952, "rewards/rejected": -0.11158885061740875, "step": 107 }, { "epoch": 0.06588378831782828, "grad_norm": 3.1661291122436523, "learning_rate": 7.923576240048989e-06, "log_odds_chosen": 0.20733773708343506, "log_odds_ratio": -0.7281293272972107, "logits/chosen": -1.3299130201339722, "logits/rejected": -1.671874761581421, "logps/chosen": -1.2263782024383545, "logps/rejected": -1.3523890972137451, "loss": 1.49, "nll_loss": 1.4900926351547241, "rewards/accuracies": 0.75, "rewards/chosen": -0.12263782322406769, "rewards/margins": 0.012601076625287533, "rewards/rejected": -0.1352389007806778, "step": 108 }, { "epoch": 0.0664938233948452, "grad_norm": 4.686000823974609, "learning_rate": 7.922596448254745e-06, "log_odds_chosen": 0.1366727501153946, "log_odds_ratio": -0.6529913544654846, "logits/chosen": -1.3482108116149902, "logits/rejected": -1.4257779121398926, "logps/chosen": -1.1930285692214966, "logps/rejected": -1.300571084022522, "loss": 1.5347, "nll_loss": 1.5217491388320923, "rewards/accuracies": 0.75, "rewards/chosen": -0.11930285394191742, "rewards/margins": 0.010754253715276718, "rewards/rejected": -0.13005711138248444, "step": 109 }, { "epoch": 0.06710385847186214, "grad_norm": 4.981257915496826, "learning_rate": 7.921616656460501e-06, "log_odds_chosen": 0.12938423454761505, "log_odds_ratio": -0.6633402109146118, "logits/chosen": -1.135327935218811, "logits/rejected": -1.1890195608139038, "logps/chosen": -1.1593241691589355, "logps/rejected": -1.235068440437317, "loss": 1.5239, "nll_loss": 1.4039170742034912, "rewards/accuracies": 0.5, "rewards/chosen": -0.1159324198961258, "rewards/margins": 0.007574416697025299, "rewards/rejected": -0.12350684404373169, "step": 110 }, { "epoch": 0.06771389354887906, "grad_norm": 6.347729206085205, "learning_rate": 7.920636864666257e-06, "log_odds_chosen": 0.14552049338817596, "log_odds_ratio": -0.6874411106109619, "logits/chosen": -1.2874135971069336, "logits/rejected": -1.0308291912078857, "logps/chosen": -1.4868874549865723, "logps/rejected": -1.6355869770050049, "loss": 1.7094, "nll_loss": 1.737934947013855, "rewards/accuracies": 0.5, "rewards/chosen": -0.14868876338005066, "rewards/margins": 0.014869949780404568, "rewards/rejected": -0.1635587066411972, "step": 111 }, { "epoch": 0.068323928625896, "grad_norm": 5.929074287414551, "learning_rate": 7.919657072872015e-06, "log_odds_chosen": 0.14258627593517303, "log_odds_ratio": -0.653099775314331, "logits/chosen": -1.2657989263534546, "logits/rejected": -1.2805571556091309, "logps/chosen": -0.9968390464782715, "logps/rejected": -1.063353180885315, "loss": 1.3023, "nll_loss": 1.194297194480896, "rewards/accuracies": 0.625, "rewards/chosen": -0.09968391060829163, "rewards/margins": 0.006651420146226883, "rewards/rejected": -0.10633532702922821, "step": 112 }, { "epoch": 0.06893396370291292, "grad_norm": 7.266416072845459, "learning_rate": 7.91867728107777e-06, "log_odds_chosen": 0.16890937089920044, "log_odds_ratio": -0.6349431276321411, "logits/chosen": -1.5178728103637695, "logits/rejected": -1.6347193717956543, "logps/chosen": -1.4486634731292725, "logps/rejected": -1.5622005462646484, "loss": 1.4975, "nll_loss": 1.4896016120910645, "rewards/accuracies": 0.625, "rewards/chosen": -0.14486634731292725, "rewards/margins": 0.011353685520589352, "rewards/rejected": -0.15622003376483917, "step": 113 }, { "epoch": 0.06954399877992985, "grad_norm": 2.949709415435791, "learning_rate": 7.917697489283527e-06, "log_odds_chosen": 0.1887502670288086, "log_odds_ratio": -0.64467853307724, "logits/chosen": -1.3587192296981812, "logits/rejected": -1.3278298377990723, "logps/chosen": -1.36537504196167, "logps/rejected": -1.5169177055358887, "loss": 1.5547, "nll_loss": 1.6179225444793701, "rewards/accuracies": 0.625, "rewards/chosen": -0.13653750717639923, "rewards/margins": 0.015154248103499413, "rewards/rejected": -0.1516917645931244, "step": 114 }, { "epoch": 0.07015403385694677, "grad_norm": 9.680606842041016, "learning_rate": 7.916717697489284e-06, "log_odds_chosen": 0.773067831993103, "log_odds_ratio": -0.46286457777023315, "logits/chosen": -1.2710492610931396, "logits/rejected": -1.607222557067871, "logps/chosen": -1.2779064178466797, "logps/rejected": -1.8871395587921143, "loss": 1.8282, "nll_loss": 1.4588948488235474, "rewards/accuracies": 0.875, "rewards/chosen": -0.1277906596660614, "rewards/margins": 0.06092331185936928, "rewards/rejected": -0.1887139528989792, "step": 115 }, { "epoch": 0.0707640689339637, "grad_norm": 3.600252389907837, "learning_rate": 7.91573790569504e-06, "log_odds_chosen": 0.4234975278377533, "log_odds_ratio": -0.5371680855751038, "logits/chosen": -0.9821169376373291, "logits/rejected": -1.3034322261810303, "logps/chosen": -0.8143824338912964, "logps/rejected": -1.0221117734909058, "loss": 1.418, "nll_loss": 1.069612979888916, "rewards/accuracies": 0.75, "rewards/chosen": -0.08143824338912964, "rewards/margins": 0.020772945135831833, "rewards/rejected": -0.10221118479967117, "step": 116 }, { "epoch": 0.07137410401098063, "grad_norm": 2.4502663612365723, "learning_rate": 7.914758113900796e-06, "log_odds_chosen": -0.052961334586143494, "log_odds_ratio": -0.778633713722229, "logits/chosen": -1.3497717380523682, "logits/rejected": -1.5142556428909302, "logps/chosen": -1.3208248615264893, "logps/rejected": -1.275109052658081, "loss": 1.6229, "nll_loss": 1.5591641664505005, "rewards/accuracies": 0.5, "rewards/chosen": -0.1320824921131134, "rewards/margins": -0.004571591503918171, "rewards/rejected": -0.1275109052658081, "step": 117 }, { "epoch": 0.07198413908799756, "grad_norm": 3.131706953048706, "learning_rate": 7.913778322106552e-06, "log_odds_chosen": 0.9038541316986084, "log_odds_ratio": -0.5173225402832031, "logits/chosen": -0.9659017324447632, "logits/rejected": -1.5071476697921753, "logps/chosen": -1.022273302078247, "logps/rejected": -1.7721545696258545, "loss": 1.5297, "nll_loss": 1.2477984428405762, "rewards/accuracies": 0.5, "rewards/chosen": -0.10222733020782471, "rewards/margins": 0.07498813420534134, "rewards/rejected": -0.17721545696258545, "step": 118 }, { "epoch": 0.07259417416501449, "grad_norm": 5.988349437713623, "learning_rate": 7.912798530312308e-06, "log_odds_chosen": 0.25959622859954834, "log_odds_ratio": -0.6107751131057739, "logits/chosen": -1.5980942249298096, "logits/rejected": -1.5905232429504395, "logps/chosen": -1.1996312141418457, "logps/rejected": -1.388986349105835, "loss": 1.5104, "nll_loss": 1.4991925954818726, "rewards/accuracies": 0.625, "rewards/chosen": -0.11996310949325562, "rewards/margins": 0.01893552392721176, "rewards/rejected": -0.13889864087104797, "step": 119 }, { "epoch": 0.07320420924203141, "grad_norm": 6.140962600708008, "learning_rate": 7.911818738518064e-06, "log_odds_chosen": 0.36872172355651855, "log_odds_ratio": -0.583720326423645, "logits/chosen": -1.2491904497146606, "logits/rejected": -1.4337480068206787, "logps/chosen": -1.1606732606887817, "logps/rejected": -1.457308292388916, "loss": 1.5566, "nll_loss": 1.4918538331985474, "rewards/accuracies": 0.625, "rewards/chosen": -0.1160673275589943, "rewards/margins": 0.02966351993381977, "rewards/rejected": -0.1457308530807495, "step": 120 }, { "epoch": 0.07381424431904834, "grad_norm": 6.035140514373779, "learning_rate": 7.91083894672382e-06, "log_odds_chosen": -0.1630793660879135, "log_odds_ratio": -0.8211429119110107, "logits/chosen": -1.1764451265335083, "logits/rejected": -1.0802536010742188, "logps/chosen": -1.1831315755844116, "logps/rejected": -1.0614982843399048, "loss": 1.3993, "nll_loss": 1.5076093673706055, "rewards/accuracies": 0.5, "rewards/chosen": -0.11831315606832504, "rewards/margins": -0.01216332707554102, "rewards/rejected": -0.1061498299241066, "step": 121 }, { "epoch": 0.07442427939606527, "grad_norm": 4.229111194610596, "learning_rate": 7.909859154929576e-06, "log_odds_chosen": 0.9734333753585815, "log_odds_ratio": -0.47383588552474976, "logits/chosen": -1.0670360326766968, "logits/rejected": -1.4969801902770996, "logps/chosen": -1.0761163234710693, "logps/rejected": -1.855771780014038, "loss": 1.3689, "nll_loss": 1.2728385925292969, "rewards/accuracies": 0.625, "rewards/chosen": -0.10761164128780365, "rewards/margins": 0.07796555012464523, "rewards/rejected": -0.1855771839618683, "step": 122 }, { "epoch": 0.0750343144730822, "grad_norm": 2.584045648574829, "learning_rate": 7.908879363135334e-06, "log_odds_chosen": 0.2911311388015747, "log_odds_ratio": -0.6096236705780029, "logits/chosen": -1.1615791320800781, "logits/rejected": -1.2067952156066895, "logps/chosen": -1.047412633895874, "logps/rejected": -1.2372617721557617, "loss": 1.4795, "nll_loss": 1.3526129722595215, "rewards/accuracies": 0.625, "rewards/chosen": -0.10474126785993576, "rewards/margins": 0.018984917551279068, "rewards/rejected": -0.12372617423534393, "step": 123 }, { "epoch": 0.07564434955009913, "grad_norm": 4.27337646484375, "learning_rate": 7.90789957134109e-06, "log_odds_chosen": 0.7384442090988159, "log_odds_ratio": -0.4355548918247223, "logits/chosen": -1.1374502182006836, "logits/rejected": -1.5365418195724487, "logps/chosen": -1.120978593826294, "logps/rejected": -1.6886301040649414, "loss": 1.3835, "nll_loss": 1.2309420108795166, "rewards/accuracies": 0.875, "rewards/chosen": -0.1120978444814682, "rewards/margins": 0.056765154004096985, "rewards/rejected": -0.16886301338672638, "step": 124 }, { "epoch": 0.07625438462711606, "grad_norm": 2.8423330783843994, "learning_rate": 7.906919779546845e-06, "log_odds_chosen": 0.14196622371673584, "log_odds_ratio": -0.6748397946357727, "logits/chosen": -1.2622095346450806, "logits/rejected": -1.3275763988494873, "logps/chosen": -1.2389153242111206, "logps/rejected": -1.3624787330627441, "loss": 1.3927, "nll_loss": 1.3775643110275269, "rewards/accuracies": 0.75, "rewards/chosen": -0.12389153242111206, "rewards/margins": 0.012356342747807503, "rewards/rejected": -0.13624787330627441, "step": 125 }, { "epoch": 0.07686441970413299, "grad_norm": 2.8779726028442383, "learning_rate": 7.905939987752603e-06, "log_odds_chosen": 0.22821316123008728, "log_odds_ratio": -0.6007992029190063, "logits/chosen": -1.0610326528549194, "logits/rejected": -1.3142515420913696, "logps/chosen": -1.1680020093917847, "logps/rejected": -1.3095020055770874, "loss": 1.436, "nll_loss": 1.253715991973877, "rewards/accuracies": 0.875, "rewards/chosen": -0.1168002039194107, "rewards/margins": 0.014150001108646393, "rewards/rejected": -0.1309501975774765, "step": 126 }, { "epoch": 0.07747445478114992, "grad_norm": 4.904614448547363, "learning_rate": 7.904960195958359e-06, "log_odds_chosen": 0.44551122188568115, "log_odds_ratio": -0.5292454957962036, "logits/chosen": -0.8443334698677063, "logits/rejected": -1.221947431564331, "logps/chosen": -0.7853314876556396, "logps/rejected": -1.064401626586914, "loss": 1.3196, "nll_loss": 1.0171289443969727, "rewards/accuracies": 0.75, "rewards/chosen": -0.07853315025568008, "rewards/margins": 0.027907006442546844, "rewards/rejected": -0.10644015669822693, "step": 127 }, { "epoch": 0.07808448985816685, "grad_norm": 3.2410480976104736, "learning_rate": 7.903980404164115e-06, "log_odds_chosen": 0.1636979877948761, "log_odds_ratio": -0.6903864741325378, "logits/chosen": -1.2255629301071167, "logits/rejected": -1.3192284107208252, "logps/chosen": -0.8054472804069519, "logps/rejected": -0.8842976093292236, "loss": 1.3844, "nll_loss": 1.4163761138916016, "rewards/accuracies": 0.5, "rewards/chosen": -0.08054472506046295, "rewards/margins": 0.007885031402111053, "rewards/rejected": -0.088429756462574, "step": 128 }, { "epoch": 0.07869452493518378, "grad_norm": 3.706758737564087, "learning_rate": 7.903000612369871e-06, "log_odds_chosen": 0.6512293219566345, "log_odds_ratio": -0.48414719104766846, "logits/chosen": -1.148362398147583, "logits/rejected": -1.3334453105926514, "logps/chosen": -1.103452205657959, "logps/rejected": -1.5347379446029663, "loss": 1.4881, "nll_loss": 1.388649344444275, "rewards/accuracies": 0.875, "rewards/chosen": -0.11034521460533142, "rewards/margins": 0.043128602206707, "rewards/rejected": -0.15347382426261902, "step": 129 }, { "epoch": 0.0793045600122007, "grad_norm": 5.968535423278809, "learning_rate": 7.902020820575627e-06, "log_odds_chosen": 0.12775471806526184, "log_odds_ratio": -0.7173998355865479, "logits/chosen": -1.4087412357330322, "logits/rejected": -1.351942539215088, "logps/chosen": -1.5334382057189941, "logps/rejected": -1.6283835172653198, "loss": 1.484, "nll_loss": 1.7714204788208008, "rewards/accuracies": 0.625, "rewards/chosen": -0.1533438116312027, "rewards/margins": 0.009494535624980927, "rewards/rejected": -0.16283835470676422, "step": 130 }, { "epoch": 0.07991459508921762, "grad_norm": 3.520920753479004, "learning_rate": 7.901041028781383e-06, "log_odds_chosen": 0.02116324007511139, "log_odds_ratio": -0.6967451572418213, "logits/chosen": -1.0003879070281982, "logits/rejected": -1.243984580039978, "logps/chosen": -0.9748080968856812, "logps/rejected": -1.022420883178711, "loss": 1.6394, "nll_loss": 1.355366826057434, "rewards/accuracies": 0.625, "rewards/chosen": -0.09748081862926483, "rewards/margins": 0.004761273507028818, "rewards/rejected": -0.10224208980798721, "step": 131 }, { "epoch": 0.08052463016623455, "grad_norm": 5.371114730834961, "learning_rate": 7.90006123698714e-06, "log_odds_chosen": 0.030416816473007202, "log_odds_ratio": -0.7471176385879517, "logits/chosen": -1.0737919807434082, "logits/rejected": -1.1260757446289062, "logps/chosen": -1.0649256706237793, "logps/rejected": -1.0417418479919434, "loss": 1.523, "nll_loss": 1.1407111883163452, "rewards/accuracies": 0.5, "rewards/chosen": -0.10649257898330688, "rewards/margins": -0.0023183878511190414, "rewards/rejected": -0.10417419672012329, "step": 132 }, { "epoch": 0.08113466524325148, "grad_norm": 3.622905969619751, "learning_rate": 7.899081445192895e-06, "log_odds_chosen": 0.21777856349945068, "log_odds_ratio": -0.6195762753486633, "logits/chosen": -1.154260277748108, "logits/rejected": -1.2924460172653198, "logps/chosen": -1.1341428756713867, "logps/rejected": -1.2631573677062988, "loss": 1.4328, "nll_loss": 1.421041488647461, "rewards/accuracies": 0.625, "rewards/chosen": -0.11341428756713867, "rewards/margins": 0.01290144957602024, "rewards/rejected": -0.12631574273109436, "step": 133 }, { "epoch": 0.08174470032026841, "grad_norm": 5.597217559814453, "learning_rate": 7.898101653398653e-06, "log_odds_chosen": 0.5181113481521606, "log_odds_ratio": -0.49572068452835083, "logits/chosen": -1.2273942232131958, "logits/rejected": -1.4605207443237305, "logps/chosen": -0.9054656028747559, "logps/rejected": -1.2591445446014404, "loss": 1.4096, "nll_loss": 1.3544666767120361, "rewards/accuracies": 0.875, "rewards/chosen": -0.09054656326770782, "rewards/margins": 0.03536789119243622, "rewards/rejected": -0.12591445446014404, "step": 134 }, { "epoch": 0.08235473539728534, "grad_norm": 1.853334665298462, "learning_rate": 7.89712186160441e-06, "log_odds_chosen": -0.373619943857193, "log_odds_ratio": -0.9486770629882812, "logits/chosen": -1.2792437076568604, "logits/rejected": -1.2949498891830444, "logps/chosen": -1.245469331741333, "logps/rejected": -0.9915220141410828, "loss": 1.4177, "nll_loss": 1.4791388511657715, "rewards/accuracies": 0.375, "rewards/chosen": -0.12454693764448166, "rewards/margins": -0.025394732132554054, "rewards/rejected": -0.09915219992399216, "step": 135 }, { "epoch": 0.08296477047430227, "grad_norm": 3.0137102603912354, "learning_rate": 7.896142069810164e-06, "log_odds_chosen": -0.1418757289648056, "log_odds_ratio": -0.8148285150527954, "logits/chosen": -1.2788519859313965, "logits/rejected": -1.1856008768081665, "logps/chosen": -1.1612852811813354, "logps/rejected": -1.1136564016342163, "loss": 1.3877, "nll_loss": 1.3305412530899048, "rewards/accuracies": 0.25, "rewards/chosen": -0.11612853407859802, "rewards/margins": -0.004762882366776466, "rewards/rejected": -0.11136564612388611, "step": 136 }, { "epoch": 0.0835748055513192, "grad_norm": 4.005176067352295, "learning_rate": 7.895162278015922e-06, "log_odds_chosen": 0.43406715989112854, "log_odds_ratio": -0.587730348110199, "logits/chosen": -1.2413970232009888, "logits/rejected": -1.3764030933380127, "logps/chosen": -1.2444043159484863, "logps/rejected": -1.5509674549102783, "loss": 1.4347, "nll_loss": 1.4477182626724243, "rewards/accuracies": 0.75, "rewards/chosen": -0.12444042414426804, "rewards/margins": 0.030656326562166214, "rewards/rejected": -0.15509675443172455, "step": 137 }, { "epoch": 0.08418484062833613, "grad_norm": 2.9474031925201416, "learning_rate": 7.894182486221678e-06, "log_odds_chosen": 0.14465387165546417, "log_odds_ratio": -0.7107018232345581, "logits/chosen": -1.2763992547988892, "logits/rejected": -1.450039029121399, "logps/chosen": -1.210968255996704, "logps/rejected": -1.342073678970337, "loss": 1.5993, "nll_loss": 1.370431661605835, "rewards/accuracies": 0.5, "rewards/chosen": -0.12109682708978653, "rewards/margins": 0.013110540807247162, "rewards/rejected": -0.1342073678970337, "step": 138 }, { "epoch": 0.08479487570535306, "grad_norm": 2.433887481689453, "learning_rate": 7.893202694427434e-06, "log_odds_chosen": 0.4795933961868286, "log_odds_ratio": -0.5422801375389099, "logits/chosen": -1.1874454021453857, "logits/rejected": -1.5333082675933838, "logps/chosen": -0.9994510412216187, "logps/rejected": -1.3797203302383423, "loss": 1.4411, "nll_loss": 1.3297679424285889, "rewards/accuracies": 0.625, "rewards/chosen": -0.09994509816169739, "rewards/margins": 0.03802693262696266, "rewards/rejected": -0.13797202706336975, "step": 139 }, { "epoch": 0.08540491078236999, "grad_norm": 2.355891466140747, "learning_rate": 7.89222290263319e-06, "log_odds_chosen": 0.25136280059814453, "log_odds_ratio": -0.6294978857040405, "logits/chosen": -1.0259785652160645, "logits/rejected": -1.187266230583191, "logps/chosen": -1.0109078884124756, "logps/rejected": -1.1631789207458496, "loss": 1.3052, "nll_loss": 1.2871769666671753, "rewards/accuracies": 0.625, "rewards/chosen": -0.10109078884124756, "rewards/margins": 0.015227104537189007, "rewards/rejected": -0.11631789803504944, "step": 140 }, { "epoch": 0.08601494585938692, "grad_norm": 7.172341346740723, "learning_rate": 7.891243110838946e-06, "log_odds_chosen": 0.6347734928131104, "log_odds_ratio": -0.46431177854537964, "logits/chosen": -1.102378010749817, "logits/rejected": -1.260683298110962, "logps/chosen": -1.082747459411621, "logps/rejected": -1.5759553909301758, "loss": 1.5691, "nll_loss": 1.6793529987335205, "rewards/accuracies": 0.875, "rewards/chosen": -0.10827475786209106, "rewards/margins": 0.049320776015520096, "rewards/rejected": -0.15759554505348206, "step": 141 }, { "epoch": 0.08662498093640385, "grad_norm": 2.86556077003479, "learning_rate": 7.890263319044702e-06, "log_odds_chosen": 0.6585478186607361, "log_odds_ratio": -0.49075204133987427, "logits/chosen": -1.2203806638717651, "logits/rejected": -1.599116325378418, "logps/chosen": -0.8886911273002625, "logps/rejected": -1.3001117706298828, "loss": 1.4717, "nll_loss": 1.3066432476043701, "rewards/accuracies": 0.875, "rewards/chosen": -0.0888691172003746, "rewards/margins": 0.04114207625389099, "rewards/rejected": -0.1300112009048462, "step": 142 }, { "epoch": 0.08723501601342078, "grad_norm": 5.8866190910339355, "learning_rate": 7.889283527250458e-06, "log_odds_chosen": 0.3619791269302368, "log_odds_ratio": -0.5894664525985718, "logits/chosen": -1.22959566116333, "logits/rejected": -1.0948355197906494, "logps/chosen": -0.8414080142974854, "logps/rejected": -1.0992717742919922, "loss": 1.3793, "nll_loss": 1.2369385957717896, "rewards/accuracies": 0.625, "rewards/chosen": -0.08414079993963242, "rewards/margins": 0.025786375626921654, "rewards/rejected": -0.10992717742919922, "step": 143 }, { "epoch": 0.0878450510904377, "grad_norm": 26.49953842163086, "learning_rate": 7.888303735456215e-06, "log_odds_chosen": 0.6532790660858154, "log_odds_ratio": -0.48965609073638916, "logits/chosen": -1.0762690305709839, "logits/rejected": -1.1244540214538574, "logps/chosen": -0.9342276453971863, "logps/rejected": -1.3381693363189697, "loss": 1.5491, "nll_loss": 1.208509922027588, "rewards/accuracies": 0.625, "rewards/chosen": -0.0934227705001831, "rewards/margins": 0.04039417952299118, "rewards/rejected": -0.1338169425725937, "step": 144 }, { "epoch": 0.08845508616745462, "grad_norm": 3.0650179386138916, "learning_rate": 7.887323943661972e-06, "log_odds_chosen": 0.4204995334148407, "log_odds_ratio": -0.6250064373016357, "logits/chosen": -1.1676100492477417, "logits/rejected": -1.400207281112671, "logps/chosen": -1.0003113746643066, "logps/rejected": -1.3584997653961182, "loss": 1.4469, "nll_loss": 1.3623360395431519, "rewards/accuracies": 0.5, "rewards/chosen": -0.10003113746643066, "rewards/margins": 0.035818830132484436, "rewards/rejected": -0.1358499675989151, "step": 145 }, { "epoch": 0.08906512124447155, "grad_norm": 6.4307708740234375, "learning_rate": 7.886344151867728e-06, "log_odds_chosen": 0.05927160009741783, "log_odds_ratio": -0.672429084777832, "logits/chosen": -1.3355669975280762, "logits/rejected": -1.2510297298431396, "logps/chosen": -1.3986968994140625, "logps/rejected": -1.4225834608078003, "loss": 1.3841, "nll_loss": 1.4041626453399658, "rewards/accuracies": 0.625, "rewards/chosen": -0.13986968994140625, "rewards/margins": 0.0023886552080512047, "rewards/rejected": -0.14225834608078003, "step": 146 }, { "epoch": 0.08967515632148848, "grad_norm": 4.570639133453369, "learning_rate": 7.885364360073483e-06, "log_odds_chosen": 0.2570459246635437, "log_odds_ratio": -0.5984739065170288, "logits/chosen": -0.9278987050056458, "logits/rejected": -1.0084857940673828, "logps/chosen": -1.0584014654159546, "logps/rejected": -1.2047770023345947, "loss": 1.3841, "nll_loss": 1.1126140356063843, "rewards/accuracies": 0.5, "rewards/chosen": -0.10584015399217606, "rewards/margins": 0.014637553133070469, "rewards/rejected": -0.12047769874334335, "step": 147 }, { "epoch": 0.09028519139850541, "grad_norm": 5.946976184844971, "learning_rate": 7.88438456827924e-06, "log_odds_chosen": 0.35491999983787537, "log_odds_ratio": -0.5449843406677246, "logits/chosen": -1.10956871509552, "logits/rejected": -1.277550220489502, "logps/chosen": -1.157602310180664, "logps/rejected": -1.39149010181427, "loss": 1.2459, "nll_loss": 1.3486790657043457, "rewards/accuracies": 0.75, "rewards/chosen": -0.11576023697853088, "rewards/margins": 0.023388780653476715, "rewards/rejected": -0.139149010181427, "step": 148 }, { "epoch": 0.09089522647552234, "grad_norm": 1.6783517599105835, "learning_rate": 7.883404776484997e-06, "log_odds_chosen": 0.05622230842709541, "log_odds_ratio": -0.7229754328727722, "logits/chosen": -1.3506461381912231, "logits/rejected": -1.4678401947021484, "logps/chosen": -1.2286078929901123, "logps/rejected": -1.2668933868408203, "loss": 1.3119, "nll_loss": 1.4218822717666626, "rewards/accuracies": 0.5, "rewards/chosen": -0.12286079674959183, "rewards/margins": 0.003828552085906267, "rewards/rejected": -0.1266893446445465, "step": 149 }, { "epoch": 0.09150526155253927, "grad_norm": 3.4379117488861084, "learning_rate": 7.882424984690753e-06, "log_odds_chosen": -0.014378666877746582, "log_odds_ratio": -0.7848690152168274, "logits/chosen": -1.4375735521316528, "logits/rejected": -1.3768131732940674, "logps/chosen": -1.3720768690109253, "logps/rejected": -1.3552272319793701, "loss": 1.5084, "nll_loss": 1.7152955532073975, "rewards/accuracies": 0.625, "rewards/chosen": -0.13720768690109253, "rewards/margins": -0.0016849683597683907, "rewards/rejected": -0.135522723197937, "step": 150 }, { "epoch": 0.0921152966295562, "grad_norm": 3.348435401916504, "learning_rate": 7.881445192896509e-06, "log_odds_chosen": 0.0893043577671051, "log_odds_ratio": -0.8196665644645691, "logits/chosen": -1.2399749755859375, "logits/rejected": -1.0910629034042358, "logps/chosen": -1.1135051250457764, "logps/rejected": -1.236304521560669, "loss": 1.2591, "nll_loss": 1.3143283128738403, "rewards/accuracies": 0.625, "rewards/chosen": -0.11135050654411316, "rewards/margins": 0.012279944494366646, "rewards/rejected": -0.12363045662641525, "step": 151 }, { "epoch": 0.09272533170657313, "grad_norm": 4.791187763214111, "learning_rate": 7.880465401102265e-06, "log_odds_chosen": 0.04991087689995766, "log_odds_ratio": -0.706268310546875, "logits/chosen": -0.9892158508300781, "logits/rejected": -1.1223692893981934, "logps/chosen": -1.0679625272750854, "logps/rejected": -1.1136910915374756, "loss": 1.5648, "nll_loss": 1.275721549987793, "rewards/accuracies": 0.5, "rewards/chosen": -0.1067962497472763, "rewards/margins": 0.004572855308651924, "rewards/rejected": -0.11136910319328308, "step": 152 }, { "epoch": 0.09333536678359006, "grad_norm": 2.4061615467071533, "learning_rate": 7.879485609308021e-06, "log_odds_chosen": 0.34717684984207153, "log_odds_ratio": -0.6345182657241821, "logits/chosen": -1.0934312343597412, "logits/rejected": -1.0458341836929321, "logps/chosen": -1.2081130743026733, "logps/rejected": -1.525361180305481, "loss": 1.405, "nll_loss": 1.3787500858306885, "rewards/accuracies": 0.625, "rewards/chosen": -0.12081131339073181, "rewards/margins": 0.031724799424409866, "rewards/rejected": -0.15253612399101257, "step": 153 }, { "epoch": 0.09394540186060699, "grad_norm": 1.9343433380126953, "learning_rate": 7.878505817513777e-06, "log_odds_chosen": 0.46090608835220337, "log_odds_ratio": -0.6262308359146118, "logits/chosen": -1.3056374788284302, "logits/rejected": -1.2237968444824219, "logps/chosen": -1.0223901271820068, "logps/rejected": -1.283342719078064, "loss": 1.3484, "nll_loss": 1.466414213180542, "rewards/accuracies": 0.625, "rewards/chosen": -0.10223900526762009, "rewards/margins": 0.026095274835824966, "rewards/rejected": -0.12833428382873535, "step": 154 }, { "epoch": 0.09455543693762392, "grad_norm": 5.402397632598877, "learning_rate": 7.877526025719534e-06, "log_odds_chosen": -0.12911826372146606, "log_odds_ratio": -0.8438730239868164, "logits/chosen": -0.8594803214073181, "logits/rejected": -0.8549875020980835, "logps/chosen": -1.180537462234497, "logps/rejected": -1.0692827701568604, "loss": 1.4419, "nll_loss": 1.4769642353057861, "rewards/accuracies": 0.375, "rewards/chosen": -0.11805374920368195, "rewards/margins": -0.011125463992357254, "rewards/rejected": -0.1069282814860344, "step": 155 }, { "epoch": 0.09516547201464085, "grad_norm": 2.9113271236419678, "learning_rate": 7.876546233925291e-06, "log_odds_chosen": 0.595944881439209, "log_odds_ratio": -0.5254595279693604, "logits/chosen": -1.2280075550079346, "logits/rejected": -1.370423674583435, "logps/chosen": -1.0528993606567383, "logps/rejected": -1.4895617961883545, "loss": 1.3266, "nll_loss": 1.4923590421676636, "rewards/accuracies": 0.875, "rewards/chosen": -0.10528993606567383, "rewards/margins": 0.043666258454322815, "rewards/rejected": -0.14895617961883545, "step": 156 }, { "epoch": 0.09577550709165777, "grad_norm": 2.2459347248077393, "learning_rate": 7.875566442131047e-06, "log_odds_chosen": -0.09127645194530487, "log_odds_ratio": -0.7930302619934082, "logits/chosen": -1.074855923652649, "logits/rejected": -1.071007251739502, "logps/chosen": -1.2522311210632324, "logps/rejected": -1.200369954109192, "loss": 1.3327, "nll_loss": 1.496026635169983, "rewards/accuracies": 0.375, "rewards/chosen": -0.12522311508655548, "rewards/margins": -0.005186118185520172, "rewards/rejected": -0.12003699690103531, "step": 157 }, { "epoch": 0.0963855421686747, "grad_norm": 2.969351053237915, "learning_rate": 7.874586650336804e-06, "log_odds_chosen": 0.3170674741268158, "log_odds_ratio": -0.5727916359901428, "logits/chosen": -1.0463624000549316, "logits/rejected": -1.3321806192398071, "logps/chosen": -1.093687653541565, "logps/rejected": -1.3176978826522827, "loss": 1.3844, "nll_loss": 1.3251476287841797, "rewards/accuracies": 0.75, "rewards/chosen": -0.10936877131462097, "rewards/margins": 0.02240101434290409, "rewards/rejected": -0.1317697912454605, "step": 158 }, { "epoch": 0.09699557724569163, "grad_norm": 2.9660561084747314, "learning_rate": 7.87360685854256e-06, "log_odds_chosen": 0.12162984162569046, "log_odds_ratio": -0.7173681855201721, "logits/chosen": -1.0569581985473633, "logits/rejected": -1.1636863946914673, "logps/chosen": -1.1827771663665771, "logps/rejected": -1.3211286067962646, "loss": 1.5194, "nll_loss": 1.3455352783203125, "rewards/accuracies": 0.5, "rewards/chosen": -0.11827771365642548, "rewards/margins": 0.013835138641297817, "rewards/rejected": -0.13211286067962646, "step": 159 }, { "epoch": 0.09760561232270856, "grad_norm": 1.7490644454956055, "learning_rate": 7.872627066748316e-06, "log_odds_chosen": 0.583695650100708, "log_odds_ratio": -0.545001208782196, "logits/chosen": -1.0111027956008911, "logits/rejected": -1.0914132595062256, "logps/chosen": -1.1313092708587646, "logps/rejected": -1.52713942527771, "loss": 1.377, "nll_loss": 1.3898284435272217, "rewards/accuracies": 0.875, "rewards/chosen": -0.11313091963529587, "rewards/margins": 0.039583008736371994, "rewards/rejected": -0.15271392464637756, "step": 160 }, { "epoch": 0.09821564739972548, "grad_norm": 3.1785848140716553, "learning_rate": 7.871647274954072e-06, "log_odds_chosen": 0.16322332620620728, "log_odds_ratio": -0.6722919344902039, "logits/chosen": -1.2017384767532349, "logits/rejected": -1.1447081565856934, "logps/chosen": -1.1550581455230713, "logps/rejected": -1.3226406574249268, "loss": 1.3063, "nll_loss": 1.270405650138855, "rewards/accuracies": 0.5, "rewards/chosen": -0.11550581455230713, "rewards/margins": 0.016758253797888756, "rewards/rejected": -0.13226406276226044, "step": 161 }, { "epoch": 0.09882568247674241, "grad_norm": 2.058138847351074, "learning_rate": 7.870667483159828e-06, "log_odds_chosen": 0.6606152057647705, "log_odds_ratio": -0.4936741292476654, "logits/chosen": -1.0855475664138794, "logits/rejected": -1.280519962310791, "logps/chosen": -1.0197656154632568, "logps/rejected": -1.4616382122039795, "loss": 1.2458, "nll_loss": 1.219879150390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.10197657346725464, "rewards/margins": 0.0441872701048851, "rewards/rejected": -0.14616383612155914, "step": 162 }, { "epoch": 0.09943571755375934, "grad_norm": 5.749179840087891, "learning_rate": 7.869687691365584e-06, "log_odds_chosen": 0.10575216263532639, "log_odds_ratio": -0.700119137763977, "logits/chosen": -1.213775634765625, "logits/rejected": -1.2077088356018066, "logps/chosen": -1.1352986097335815, "logps/rejected": -1.1872684955596924, "loss": 1.5105, "nll_loss": 1.4656716585159302, "rewards/accuracies": 0.5, "rewards/chosen": -0.11352986097335815, "rewards/margins": 0.005196994170546532, "rewards/rejected": -0.11872684955596924, "step": 163 }, { "epoch": 0.10004575263077627, "grad_norm": 1.4665919542312622, "learning_rate": 7.86870789957134e-06, "log_odds_chosen": 0.5886954069137573, "log_odds_ratio": -0.46617960929870605, "logits/chosen": -1.0850622653961182, "logits/rejected": -1.1469213962554932, "logps/chosen": -1.1416900157928467, "logps/rejected": -1.5752692222595215, "loss": 1.3748, "nll_loss": 1.5358649492263794, "rewards/accuracies": 0.875, "rewards/chosen": -0.11416900157928467, "rewards/margins": 0.04335791990160942, "rewards/rejected": -0.1575269252061844, "step": 164 }, { "epoch": 0.1006557877077932, "grad_norm": 3.4263854026794434, "learning_rate": 7.867728107777096e-06, "log_odds_chosen": 0.4149549603462219, "log_odds_ratio": -0.5683473944664001, "logits/chosen": -0.7827431559562683, "logits/rejected": -1.0070935487747192, "logps/chosen": -0.8936384320259094, "logps/rejected": -1.1562161445617676, "loss": 1.3759, "nll_loss": 1.1872224807739258, "rewards/accuracies": 0.75, "rewards/chosen": -0.08936384320259094, "rewards/margins": 0.02625775709748268, "rewards/rejected": -0.11562160402536392, "step": 165 }, { "epoch": 0.10126582278481013, "grad_norm": 4.065791606903076, "learning_rate": 7.866748315982853e-06, "log_odds_chosen": 0.3570523262023926, "log_odds_ratio": -0.6202707290649414, "logits/chosen": -0.5728274583816528, "logits/rejected": -0.7317312955856323, "logps/chosen": -1.0034499168395996, "logps/rejected": -1.258690595626831, "loss": 1.2795, "nll_loss": 1.0664379596710205, "rewards/accuracies": 0.5, "rewards/chosen": -0.10034500062465668, "rewards/margins": 0.025524068623781204, "rewards/rejected": -0.12586906552314758, "step": 166 }, { "epoch": 0.10187585786182705, "grad_norm": 9.925978660583496, "learning_rate": 7.86576852418861e-06, "log_odds_chosen": 0.20335596799850464, "log_odds_ratio": -0.6658638119697571, "logits/chosen": -0.8592861890792847, "logits/rejected": -0.999780535697937, "logps/chosen": -1.0982885360717773, "logps/rejected": -1.2225817441940308, "loss": 1.4922, "nll_loss": 1.2556052207946777, "rewards/accuracies": 0.5, "rewards/chosen": -0.10982886701822281, "rewards/margins": 0.01242931466549635, "rewards/rejected": -0.12225817888975143, "step": 167 }, { "epoch": 0.10248589293884398, "grad_norm": 2.1818580627441406, "learning_rate": 7.864788732394367e-06, "log_odds_chosen": 0.28794994950294495, "log_odds_ratio": -0.6022144556045532, "logits/chosen": -0.9443429708480835, "logits/rejected": -1.1084773540496826, "logps/chosen": -1.1514256000518799, "logps/rejected": -1.3564538955688477, "loss": 1.4528, "nll_loss": 1.384474277496338, "rewards/accuracies": 0.75, "rewards/chosen": -0.11514255404472351, "rewards/margins": 0.02050282433629036, "rewards/rejected": -0.13564538955688477, "step": 168 }, { "epoch": 0.10309592801586091, "grad_norm": 1.9499655961990356, "learning_rate": 7.863808940600123e-06, "log_odds_chosen": 0.20603181421756744, "log_odds_ratio": -0.6748741269111633, "logits/chosen": -1.1146022081375122, "logits/rejected": -1.2887415885925293, "logps/chosen": -1.0124105215072632, "logps/rejected": -1.135079264640808, "loss": 1.3019, "nll_loss": 1.2108619213104248, "rewards/accuracies": 0.625, "rewards/chosen": -0.10124105215072632, "rewards/margins": 0.012266866862773895, "rewards/rejected": -0.11350792646408081, "step": 169 }, { "epoch": 0.10370596309287784, "grad_norm": 2.7438721656799316, "learning_rate": 7.862829148805879e-06, "log_odds_chosen": -0.5365756750106812, "log_odds_ratio": -1.318666696548462, "logits/chosen": -0.8161740899085999, "logits/rejected": -1.0271520614624023, "logps/chosen": -1.421082854270935, "logps/rejected": -1.036073923110962, "loss": 1.3199, "nll_loss": 1.3646124601364136, "rewards/accuracies": 0.5, "rewards/chosen": -0.14210829138755798, "rewards/margins": -0.038500890135765076, "rewards/rejected": -0.10360738635063171, "step": 170 }, { "epoch": 0.10431599816989477, "grad_norm": 8.271568298339844, "learning_rate": 7.861849357011635e-06, "log_odds_chosen": 0.08955623209476471, "log_odds_ratio": -0.7078862190246582, "logits/chosen": -0.8523294925689697, "logits/rejected": -0.8877230286598206, "logps/chosen": -1.087225079536438, "logps/rejected": -1.107897162437439, "loss": 1.4533, "nll_loss": 1.3651297092437744, "rewards/accuracies": 0.5, "rewards/chosen": -0.10872252285480499, "rewards/margins": 0.0020672082901000977, "rewards/rejected": -0.1107897162437439, "step": 171 }, { "epoch": 0.1049260332469117, "grad_norm": 3.687635660171509, "learning_rate": 7.860869565217391e-06, "log_odds_chosen": -0.04529111087322235, "log_odds_ratio": -0.775110125541687, "logits/chosen": -0.8541119694709778, "logits/rejected": -1.0217115879058838, "logps/chosen": -1.0813311338424683, "logps/rejected": -1.063571572303772, "loss": 1.3254, "nll_loss": 1.2203162908554077, "rewards/accuracies": 0.625, "rewards/chosen": -0.10813312232494354, "rewards/margins": -0.001775958575308323, "rewards/rejected": -0.1063571646809578, "step": 172 }, { "epoch": 0.10553606832392863, "grad_norm": 2.5167598724365234, "learning_rate": 7.859889773423147e-06, "log_odds_chosen": 0.11281800270080566, "log_odds_ratio": -0.6928614377975464, "logits/chosen": -0.8261829614639282, "logits/rejected": -0.9021503329277039, "logps/chosen": -1.0672982931137085, "logps/rejected": -1.106156349182129, "loss": 1.3133, "nll_loss": 1.2119895219802856, "rewards/accuracies": 0.625, "rewards/chosen": -0.10672982782125473, "rewards/margins": 0.0038858074694871902, "rewards/rejected": -0.11061564087867737, "step": 173 }, { "epoch": 0.10614610340094556, "grad_norm": 2.510709047317505, "learning_rate": 7.858909981628903e-06, "log_odds_chosen": 0.36295396089553833, "log_odds_ratio": -0.5557507872581482, "logits/chosen": -0.9477484822273254, "logits/rejected": -0.9344209432601929, "logps/chosen": -1.0376968383789062, "logps/rejected": -1.2641576528549194, "loss": 1.3228, "nll_loss": 1.2529652118682861, "rewards/accuracies": 0.625, "rewards/chosen": -0.1037696897983551, "rewards/margins": 0.02264608070254326, "rewards/rejected": -0.12641575932502747, "step": 174 }, { "epoch": 0.10675613847796249, "grad_norm": 6.990551471710205, "learning_rate": 7.85793018983466e-06, "log_odds_chosen": -0.5427697896957397, "log_odds_ratio": -1.0143355131149292, "logits/chosen": -0.9839531183242798, "logits/rejected": -1.049778938293457, "logps/chosen": -1.6612422466278076, "logps/rejected": -1.2404645681381226, "loss": 1.6602, "nll_loss": 1.6968659162521362, "rewards/accuracies": 0.125, "rewards/chosen": -0.16612422466278076, "rewards/margins": -0.04207775369286537, "rewards/rejected": -0.1240464597940445, "step": 175 }, { "epoch": 0.1073661735549794, "grad_norm": 6.081185340881348, "learning_rate": 7.856950398040416e-06, "log_odds_chosen": 0.16970294713974, "log_odds_ratio": -0.6355573534965515, "logits/chosen": -1.0590956211090088, "logits/rejected": -1.0066609382629395, "logps/chosen": -1.0663025379180908, "logps/rejected": -1.1990355253219604, "loss": 1.3806, "nll_loss": 1.246698021888733, "rewards/accuracies": 0.625, "rewards/chosen": -0.10663025826215744, "rewards/margins": 0.01327330619096756, "rewards/rejected": -0.1199035570025444, "step": 176 }, { "epoch": 0.10797620863199633, "grad_norm": 2.884753942489624, "learning_rate": 7.855970606246172e-06, "log_odds_chosen": 0.2539170980453491, "log_odds_ratio": -0.5829100608825684, "logits/chosen": -0.8447000980377197, "logits/rejected": -0.9607677459716797, "logps/chosen": -1.0334771871566772, "logps/rejected": -1.188652515411377, "loss": 1.1779, "nll_loss": 1.1102710962295532, "rewards/accuracies": 0.75, "rewards/chosen": -0.10334771871566772, "rewards/margins": 0.015517533756792545, "rewards/rejected": -0.1188652515411377, "step": 177 }, { "epoch": 0.10858624370901326, "grad_norm": 2.162673234939575, "learning_rate": 7.854990814451928e-06, "log_odds_chosen": -0.08424600958824158, "log_odds_ratio": -0.8174166679382324, "logits/chosen": -1.0618226528167725, "logits/rejected": -0.9548556804656982, "logps/chosen": -1.2410895824432373, "logps/rejected": -1.225845456123352, "loss": 1.4378, "nll_loss": 1.5244979858398438, "rewards/accuracies": 0.5, "rewards/chosen": -0.12410895526409149, "rewards/margins": -0.0015244167298078537, "rewards/rejected": -0.12258453667163849, "step": 178 }, { "epoch": 0.1091962787860302, "grad_norm": 5.083796977996826, "learning_rate": 7.854011022657686e-06, "log_odds_chosen": 0.5103251338005066, "log_odds_ratio": -0.4962230920791626, "logits/chosen": -0.9365650415420532, "logits/rejected": -0.8843255043029785, "logps/chosen": -0.8852238655090332, "logps/rejected": -1.2005057334899902, "loss": 1.3779, "nll_loss": 1.1476835012435913, "rewards/accuracies": 0.75, "rewards/chosen": -0.08852238953113556, "rewards/margins": 0.03152818977832794, "rewards/rejected": -0.1200505867600441, "step": 179 }, { "epoch": 0.10980631386304712, "grad_norm": 6.4780426025390625, "learning_rate": 7.853031230863442e-06, "log_odds_chosen": 0.29865774512290955, "log_odds_ratio": -0.6353069543838501, "logits/chosen": -0.9371895790100098, "logits/rejected": -1.0918865203857422, "logps/chosen": -1.0941035747528076, "logps/rejected": -1.296966314315796, "loss": 1.3989, "nll_loss": 1.3542423248291016, "rewards/accuracies": 0.75, "rewards/chosen": -0.1094103530049324, "rewards/margins": 0.020286284387111664, "rewards/rejected": -0.12969663739204407, "step": 180 }, { "epoch": 0.11041634894006405, "grad_norm": 3.271803379058838, "learning_rate": 7.852051439069198e-06, "log_odds_chosen": 0.004946403205394745, "log_odds_ratio": -0.7873402237892151, "logits/chosen": -0.9936560988426208, "logits/rejected": -0.907215416431427, "logps/chosen": -1.0562843084335327, "logps/rejected": -1.1214905977249146, "loss": 1.2781, "nll_loss": 1.360177755355835, "rewards/accuracies": 0.625, "rewards/chosen": -0.10562843084335327, "rewards/margins": 0.006520625203847885, "rewards/rejected": -0.11214905977249146, "step": 181 }, { "epoch": 0.11102638401708098, "grad_norm": 3.945678234100342, "learning_rate": 7.851071647274954e-06, "log_odds_chosen": -0.1942078024148941, "log_odds_ratio": -0.9065369367599487, "logits/chosen": -0.9017473459243774, "logits/rejected": -0.7692971229553223, "logps/chosen": -1.1868854761123657, "logps/rejected": -1.0838384628295898, "loss": 1.5017, "nll_loss": 1.5011121034622192, "rewards/accuracies": 0.375, "rewards/chosen": -0.11868854612112045, "rewards/margins": -0.010304704308509827, "rewards/rejected": -0.10838384926319122, "step": 182 }, { "epoch": 0.11163641909409791, "grad_norm": 5.649087905883789, "learning_rate": 7.85009185548071e-06, "log_odds_chosen": 0.24927404522895813, "log_odds_ratio": -0.6430729627609253, "logits/chosen": -0.8076207637786865, "logits/rejected": -0.8097611665725708, "logps/chosen": -1.0927278995513916, "logps/rejected": -1.2588350772857666, "loss": 1.4877, "nll_loss": 1.1501646041870117, "rewards/accuracies": 0.75, "rewards/chosen": -0.1092727929353714, "rewards/margins": 0.01661071926355362, "rewards/rejected": -0.12588350474834442, "step": 183 }, { "epoch": 0.11224645417111484, "grad_norm": 12.27353286743164, "learning_rate": 7.849112063686466e-06, "log_odds_chosen": 0.317342609167099, "log_odds_ratio": -0.6483021974563599, "logits/chosen": -0.9511519074440002, "logits/rejected": -0.9422733783721924, "logps/chosen": -1.0317199230194092, "logps/rejected": -1.2072339057922363, "loss": 1.4439, "nll_loss": 1.5252904891967773, "rewards/accuracies": 0.5, "rewards/chosen": -0.10317198932170868, "rewards/margins": 0.017551405355334282, "rewards/rejected": -0.12072339653968811, "step": 184 }, { "epoch": 0.11285648924813177, "grad_norm": 19.325729370117188, "learning_rate": 7.848132271892222e-06, "log_odds_chosen": 0.23027971386909485, "log_odds_ratio": -0.683318018913269, "logits/chosen": -1.0750980377197266, "logits/rejected": -0.8933321237564087, "logps/chosen": -1.0942838191986084, "logps/rejected": -1.24805748462677, "loss": 1.4567, "nll_loss": 1.508509874343872, "rewards/accuracies": 0.5, "rewards/chosen": -0.10942838340997696, "rewards/margins": 0.015377364121377468, "rewards/rejected": -0.124805748462677, "step": 185 }, { "epoch": 0.1134665243251487, "grad_norm": 9.627291679382324, "learning_rate": 7.847152480097978e-06, "log_odds_chosen": 0.24410946667194366, "log_odds_ratio": -0.6359568238258362, "logits/chosen": -1.0439263582229614, "logits/rejected": -1.0144639015197754, "logps/chosen": -1.0447264909744263, "logps/rejected": -1.1580917835235596, "loss": 1.4564, "nll_loss": 1.444150447845459, "rewards/accuracies": 0.75, "rewards/chosen": -0.10447265207767487, "rewards/margins": 0.011336536146700382, "rewards/rejected": -0.11580918729305267, "step": 186 }, { "epoch": 0.11407655940216563, "grad_norm": 2.9236338138580322, "learning_rate": 7.846172688303735e-06, "log_odds_chosen": 0.11882305890321732, "log_odds_ratio": -0.6735343933105469, "logits/chosen": -0.8650327324867249, "logits/rejected": -0.9523540735244751, "logps/chosen": -1.1394004821777344, "logps/rejected": -1.2168219089508057, "loss": 1.3144, "nll_loss": 1.257396936416626, "rewards/accuracies": 0.75, "rewards/chosen": -0.1139400526881218, "rewards/margins": 0.007742150686681271, "rewards/rejected": -0.12168219685554504, "step": 187 }, { "epoch": 0.11468659447918256, "grad_norm": 3.3455135822296143, "learning_rate": 7.84519289650949e-06, "log_odds_chosen": 0.1941705048084259, "log_odds_ratio": -0.6572586297988892, "logits/chosen": -0.8397231101989746, "logits/rejected": -1.017362117767334, "logps/chosen": -0.9889163970947266, "logps/rejected": -1.0689570903778076, "loss": 1.2099, "nll_loss": 1.178919792175293, "rewards/accuracies": 0.625, "rewards/chosen": -0.09889163821935654, "rewards/margins": 0.008004066534340382, "rewards/rejected": -0.10689570009708405, "step": 188 }, { "epoch": 0.11529662955619949, "grad_norm": 3.1025214195251465, "learning_rate": 7.844213104715247e-06, "log_odds_chosen": 0.11875510215759277, "log_odds_ratio": -0.7078925967216492, "logits/chosen": -1.1729754209518433, "logits/rejected": -1.134071946144104, "logps/chosen": -1.227812647819519, "logps/rejected": -1.3094182014465332, "loss": 1.5047, "nll_loss": 1.9594106674194336, "rewards/accuracies": 0.625, "rewards/chosen": -0.12278127670288086, "rewards/margins": 0.008160561323165894, "rewards/rejected": -0.13094183802604675, "step": 189 }, { "epoch": 0.11590666463321642, "grad_norm": 6.929073333740234, "learning_rate": 7.843233312921005e-06, "log_odds_chosen": 0.26751795411109924, "log_odds_ratio": -0.6137574315071106, "logits/chosen": -0.9209662675857544, "logits/rejected": -0.989007294178009, "logps/chosen": -1.0029923915863037, "logps/rejected": -1.1507459878921509, "loss": 1.3208, "nll_loss": 1.2956995964050293, "rewards/accuracies": 0.75, "rewards/chosen": -0.10029923915863037, "rewards/margins": 0.014775356277823448, "rewards/rejected": -0.11507460474967957, "step": 190 }, { "epoch": 0.11651669971023333, "grad_norm": 3.0034968852996826, "learning_rate": 7.84225352112676e-06, "log_odds_chosen": 0.030305534601211548, "log_odds_ratio": -0.7245807647705078, "logits/chosen": -1.1357676982879639, "logits/rejected": -1.1244070529937744, "logps/chosen": -1.1368027925491333, "logps/rejected": -1.1743425130844116, "loss": 1.424, "nll_loss": 1.6015983819961548, "rewards/accuracies": 0.5, "rewards/chosen": -0.11368028819561005, "rewards/margins": 0.003753962926566601, "rewards/rejected": -0.11743424832820892, "step": 191 }, { "epoch": 0.11712673478725026, "grad_norm": 3.04172945022583, "learning_rate": 7.841273729332517e-06, "log_odds_chosen": 0.04658292233943939, "log_odds_ratio": -0.795472264289856, "logits/chosen": -0.8458470106124878, "logits/rejected": -0.8255295753479004, "logps/chosen": -1.0437333583831787, "logps/rejected": -1.0356088876724243, "loss": 1.4037, "nll_loss": 1.410264015197754, "rewards/accuracies": 0.375, "rewards/chosen": -0.10437334328889847, "rewards/margins": -0.0008124485611915588, "rewards/rejected": -0.10356089472770691, "step": 192 }, { "epoch": 0.11773676986426719, "grad_norm": 3.136310338973999, "learning_rate": 7.840293937538273e-06, "log_odds_chosen": -0.043125513941049576, "log_odds_ratio": -0.7571417689323425, "logits/chosen": -0.7573177218437195, "logits/rejected": -0.8878939151763916, "logps/chosen": -1.0945210456848145, "logps/rejected": -1.0774478912353516, "loss": 1.4509, "nll_loss": 1.4057884216308594, "rewards/accuracies": 0.5, "rewards/chosen": -0.10945209860801697, "rewards/margins": -0.0017073126509785652, "rewards/rejected": -0.10774478316307068, "step": 193 }, { "epoch": 0.11834680494128412, "grad_norm": 4.075429439544678, "learning_rate": 7.839314145744029e-06, "log_odds_chosen": -0.13312304019927979, "log_odds_ratio": -0.7801726460456848, "logits/chosen": -0.8572752475738525, "logits/rejected": -0.8861173987388611, "logps/chosen": -1.7246925830841064, "logps/rejected": -1.6161044836044312, "loss": 1.3786, "nll_loss": 1.3538603782653809, "rewards/accuracies": 0.25, "rewards/chosen": -0.17246925830841064, "rewards/margins": -0.010858817026019096, "rewards/rejected": -0.1616104543209076, "step": 194 }, { "epoch": 0.11895684001830105, "grad_norm": 3.7283475399017334, "learning_rate": 7.838334353949785e-06, "log_odds_chosen": 0.4227336645126343, "log_odds_ratio": -0.5464380979537964, "logits/chosen": -0.7927939295768738, "logits/rejected": -0.8977031707763672, "logps/chosen": -0.7377104759216309, "logps/rejected": -0.9756165742874146, "loss": 1.3852, "nll_loss": 1.0400207042694092, "rewards/accuracies": 0.75, "rewards/chosen": -0.07377105206251144, "rewards/margins": 0.023790616542100906, "rewards/rejected": -0.09756167232990265, "step": 195 }, { "epoch": 0.11956687509531798, "grad_norm": 2.515411138534546, "learning_rate": 7.837354562155541e-06, "log_odds_chosen": 0.4669792056083679, "log_odds_ratio": -0.5366051197052002, "logits/chosen": -0.7617936134338379, "logits/rejected": -0.8083064556121826, "logps/chosen": -0.8531400561332703, "logps/rejected": -1.0828638076782227, "loss": 1.1146, "nll_loss": 1.0207661390304565, "rewards/accuracies": 0.75, "rewards/chosen": -0.08531400561332703, "rewards/margins": 0.02297237142920494, "rewards/rejected": -0.10828638076782227, "step": 196 }, { "epoch": 0.12017691017233491, "grad_norm": 5.3390583992004395, "learning_rate": 7.836374770361297e-06, "log_odds_chosen": 0.35203611850738525, "log_odds_ratio": -0.626430332660675, "logits/chosen": -0.9395579099655151, "logits/rejected": -0.8584036827087402, "logps/chosen": -1.099111795425415, "logps/rejected": -1.3202377557754517, "loss": 1.221, "nll_loss": 1.305481195449829, "rewards/accuracies": 0.5, "rewards/chosen": -0.10991118848323822, "rewards/margins": 0.02211260050535202, "rewards/rejected": -0.13202378153800964, "step": 197 }, { "epoch": 0.12078694524935184, "grad_norm": 3.111992597579956, "learning_rate": 7.835394978567055e-06, "log_odds_chosen": -0.03326989710330963, "log_odds_ratio": -0.7789394855499268, "logits/chosen": -0.9019477367401123, "logits/rejected": -0.7326586842536926, "logps/chosen": -1.2825260162353516, "logps/rejected": -1.2714803218841553, "loss": 1.3472, "nll_loss": 1.1968786716461182, "rewards/accuracies": 0.5, "rewards/chosen": -0.12825261056423187, "rewards/margins": -0.0011045671999454498, "rewards/rejected": -0.12714803218841553, "step": 198 }, { "epoch": 0.12139698032636877, "grad_norm": 3.7612791061401367, "learning_rate": 7.83441518677281e-06, "log_odds_chosen": 0.2125612497329712, "log_odds_ratio": -0.6685791015625, "logits/chosen": -0.8238197565078735, "logits/rejected": -0.8709234595298767, "logps/chosen": -1.1072832345962524, "logps/rejected": -1.2210218906402588, "loss": 1.3268, "nll_loss": 1.2351850271224976, "rewards/accuracies": 0.5, "rewards/chosen": -0.11072833836078644, "rewards/margins": 0.011373856104910374, "rewards/rejected": -0.12210218608379364, "step": 199 }, { "epoch": 0.1220070154033857, "grad_norm": 4.530349254608154, "learning_rate": 7.833435394978566e-06, "log_odds_chosen": 0.514009416103363, "log_odds_ratio": -0.5497227907180786, "logits/chosen": -0.7770557403564453, "logits/rejected": -0.7417598962783813, "logps/chosen": -0.9763879776000977, "logps/rejected": -1.347785472869873, "loss": 1.3148, "nll_loss": 1.1386100053787231, "rewards/accuracies": 0.625, "rewards/chosen": -0.0976388081908226, "rewards/margins": 0.03713974729180336, "rewards/rejected": -0.13477855920791626, "step": 200 }, { "epoch": 0.12261705048040263, "grad_norm": 3.7680504322052, "learning_rate": 7.832455603184324e-06, "log_odds_chosen": 0.4041670560836792, "log_odds_ratio": -0.54463791847229, "logits/chosen": -0.8970286250114441, "logits/rejected": -0.6390453577041626, "logps/chosen": -1.0731372833251953, "logps/rejected": -1.3535892963409424, "loss": 1.2998, "nll_loss": 1.2492234706878662, "rewards/accuracies": 0.875, "rewards/chosen": -0.10731372237205505, "rewards/margins": 0.028045203536748886, "rewards/rejected": -0.13535892963409424, "step": 201 }, { "epoch": 0.12322708555741956, "grad_norm": 2.5117201805114746, "learning_rate": 7.83147581139008e-06, "log_odds_chosen": 0.3032936751842499, "log_odds_ratio": -0.562832772731781, "logits/chosen": -0.7800061106681824, "logits/rejected": -0.7307410836219788, "logps/chosen": -0.988383412361145, "logps/rejected": -1.1835048198699951, "loss": 1.3012, "nll_loss": 1.150646686553955, "rewards/accuracies": 0.75, "rewards/chosen": -0.09883834421634674, "rewards/margins": 0.01951213739812374, "rewards/rejected": -0.11835047602653503, "step": 202 }, { "epoch": 0.12383712063443648, "grad_norm": 2.594458818435669, "learning_rate": 7.830496019595836e-06, "log_odds_chosen": 0.02796546369791031, "log_odds_ratio": -0.7040122747421265, "logits/chosen": -0.825912356376648, "logits/rejected": -0.782892644405365, "logps/chosen": -0.966590940952301, "logps/rejected": -0.9735561609268188, "loss": 1.3193, "nll_loss": 1.2082233428955078, "rewards/accuracies": 0.375, "rewards/chosen": -0.0966590940952301, "rewards/margins": 0.0006965235807001591, "rewards/rejected": -0.09735561907291412, "step": 203 }, { "epoch": 0.12444715571145341, "grad_norm": 3.537940502166748, "learning_rate": 7.829516227801592e-06, "log_odds_chosen": 0.37540724873542786, "log_odds_ratio": -0.5521938800811768, "logits/chosen": -0.7434271574020386, "logits/rejected": -0.7378321886062622, "logps/chosen": -0.8247733116149902, "logps/rejected": -1.0055336952209473, "loss": 1.3914, "nll_loss": 1.2236034870147705, "rewards/accuracies": 0.875, "rewards/chosen": -0.08247733116149902, "rewards/margins": 0.018076028674840927, "rewards/rejected": -0.10055336356163025, "step": 204 }, { "epoch": 0.12505719078847033, "grad_norm": 1.878038763999939, "learning_rate": 7.828536436007348e-06, "log_odds_chosen": 0.26669076085090637, "log_odds_ratio": -0.6411176919937134, "logits/chosen": -0.7845975160598755, "logits/rejected": -0.797978401184082, "logps/chosen": -0.7354211211204529, "logps/rejected": -0.843570351600647, "loss": 1.2361, "nll_loss": 1.142281413078308, "rewards/accuracies": 0.5, "rewards/chosen": -0.07354211062192917, "rewards/margins": 0.01081492193043232, "rewards/rejected": -0.08435703814029694, "step": 205 }, { "epoch": 0.12566722586548726, "grad_norm": 3.427781343460083, "learning_rate": 7.827556644213104e-06, "log_odds_chosen": 0.012422129511833191, "log_odds_ratio": -0.7249017357826233, "logits/chosen": -0.7873796820640564, "logits/rejected": -0.7016153335571289, "logps/chosen": -0.9875779151916504, "logps/rejected": -0.9857470393180847, "loss": 1.2269, "nll_loss": 1.1658076047897339, "rewards/accuracies": 0.25, "rewards/chosen": -0.098757803440094, "rewards/margins": -0.00018308963626623154, "rewards/rejected": -0.09857470542192459, "step": 206 }, { "epoch": 0.1262772609425042, "grad_norm": 3.088200330734253, "learning_rate": 7.82657685241886e-06, "log_odds_chosen": 0.5633070468902588, "log_odds_ratio": -0.5885379910469055, "logits/chosen": -0.860716700553894, "logits/rejected": -0.70885169506073, "logps/chosen": -0.8846802115440369, "logps/rejected": -1.2800440788269043, "loss": 1.1872, "nll_loss": 1.1562678813934326, "rewards/accuracies": 0.625, "rewards/chosen": -0.08846801519393921, "rewards/margins": 0.03953639045357704, "rewards/rejected": -0.12800440192222595, "step": 207 }, { "epoch": 0.12688729601952112, "grad_norm": 4.818462371826172, "learning_rate": 7.825597060624617e-06, "log_odds_chosen": -0.10246238857507706, "log_odds_ratio": -0.8276905417442322, "logits/chosen": -0.9869675636291504, "logits/rejected": -0.9221802949905396, "logps/chosen": -1.410804033279419, "logps/rejected": -1.2599503993988037, "loss": 1.4054, "nll_loss": 1.6778582334518433, "rewards/accuracies": 0.375, "rewards/chosen": -0.14108040928840637, "rewards/margins": -0.015085364691913128, "rewards/rejected": -0.12599505484104156, "step": 208 }, { "epoch": 0.12749733109653805, "grad_norm": 2.9786312580108643, "learning_rate": 7.824617268830374e-06, "log_odds_chosen": -0.1858944445848465, "log_odds_ratio": -0.8182568550109863, "logits/chosen": -0.7915933728218079, "logits/rejected": -0.7082310914993286, "logps/chosen": -1.1173920631408691, "logps/rejected": -0.964138388633728, "loss": 1.2994, "nll_loss": 1.3286936283111572, "rewards/accuracies": 0.25, "rewards/chosen": -0.11173919588327408, "rewards/margins": -0.015325360000133514, "rewards/rejected": -0.09641383588314056, "step": 209 }, { "epoch": 0.12810736617355498, "grad_norm": 5.914094924926758, "learning_rate": 7.823637477036129e-06, "log_odds_chosen": -0.3528755307197571, "log_odds_ratio": -0.9103401899337769, "logits/chosen": -0.9857897758483887, "logits/rejected": -1.0006698369979858, "logps/chosen": -1.1271506547927856, "logps/rejected": -0.9123197197914124, "loss": 1.5582, "nll_loss": 1.3714121580123901, "rewards/accuracies": 0.25, "rewards/chosen": -0.11271507292985916, "rewards/margins": -0.021483099088072777, "rewards/rejected": -0.09123197197914124, "step": 210 }, { "epoch": 0.1287174012505719, "grad_norm": 8.720269203186035, "learning_rate": 7.822657685241885e-06, "log_odds_chosen": 0.6110432147979736, "log_odds_ratio": -0.44838786125183105, "logits/chosen": -1.0277785062789917, "logits/rejected": -0.9804999232292175, "logps/chosen": -0.7535759210586548, "logps/rejected": -1.1139512062072754, "loss": 1.2327, "nll_loss": 0.9496726989746094, "rewards/accuracies": 0.875, "rewards/chosen": -0.075357586145401, "rewards/margins": 0.03603753075003624, "rewards/rejected": -0.11139512062072754, "step": 211 }, { "epoch": 0.12932743632758884, "grad_norm": 3.5761148929595947, "learning_rate": 7.821677893447643e-06, "log_odds_chosen": -0.2281191647052765, "log_odds_ratio": -0.8665253520011902, "logits/chosen": -1.1570154428482056, "logits/rejected": -1.1549826860427856, "logps/chosen": -1.0511505603790283, "logps/rejected": -0.926472544670105, "loss": 1.3174, "nll_loss": 1.3033537864685059, "rewards/accuracies": 0.5, "rewards/chosen": -0.10511505603790283, "rewards/margins": -0.012467803433537483, "rewards/rejected": -0.0926472544670105, "step": 212 }, { "epoch": 0.12993747140460576, "grad_norm": 2.5292000770568848, "learning_rate": 7.820698101653399e-06, "log_odds_chosen": 0.21703068912029266, "log_odds_ratio": -0.6844091415405273, "logits/chosen": -1.0209120512008667, "logits/rejected": -0.9823782444000244, "logps/chosen": -1.1978799104690552, "logps/rejected": -1.3907802104949951, "loss": 1.227, "nll_loss": 1.4553238153457642, "rewards/accuracies": 0.625, "rewards/chosen": -0.11978799104690552, "rewards/margins": 0.019290026277303696, "rewards/rejected": -0.1390780210494995, "step": 213 }, { "epoch": 0.1305475064816227, "grad_norm": 2.165876626968384, "learning_rate": 7.819718309859155e-06, "log_odds_chosen": -0.3650677502155304, "log_odds_ratio": -0.975648820400238, "logits/chosen": -0.866303026676178, "logits/rejected": -0.8189712762832642, "logps/chosen": -1.10890531539917, "logps/rejected": -0.8592621684074402, "loss": 1.261, "nll_loss": 1.2255784273147583, "rewards/accuracies": 0.375, "rewards/chosen": -0.11089053750038147, "rewards/margins": -0.02496432512998581, "rewards/rejected": -0.08592621982097626, "step": 214 }, { "epoch": 0.13115754155863962, "grad_norm": 3.8487796783447266, "learning_rate": 7.818738518064911e-06, "log_odds_chosen": 0.11591636389493942, "log_odds_ratio": -0.7333909273147583, "logits/chosen": -1.0533345937728882, "logits/rejected": -1.0663915872573853, "logps/chosen": -1.0632206201553345, "logps/rejected": -1.0743790864944458, "loss": 1.382, "nll_loss": 1.2638856172561646, "rewards/accuracies": 0.5, "rewards/chosen": -0.10632206499576569, "rewards/margins": 0.0011158520355820656, "rewards/rejected": -0.10743790864944458, "step": 215 }, { "epoch": 0.13176757663565655, "grad_norm": 1.909088134765625, "learning_rate": 7.817758726270667e-06, "log_odds_chosen": 0.007063969969749451, "log_odds_ratio": -0.7266018986701965, "logits/chosen": -0.9915033578872681, "logits/rejected": -1.0561681985855103, "logps/chosen": -1.2097535133361816, "logps/rejected": -1.1823275089263916, "loss": 1.379, "nll_loss": 1.382625699043274, "rewards/accuracies": 0.5, "rewards/chosen": -0.12097536027431488, "rewards/margins": -0.00274260388687253, "rewards/rejected": -0.11823275685310364, "step": 216 }, { "epoch": 0.13237761171267348, "grad_norm": 2.6507461071014404, "learning_rate": 7.816778934476423e-06, "log_odds_chosen": 0.2819988429546356, "log_odds_ratio": -0.6356192231178284, "logits/chosen": -0.894111156463623, "logits/rejected": -0.9227250814437866, "logps/chosen": -0.8836582899093628, "logps/rejected": -1.012203574180603, "loss": 1.4036, "nll_loss": 1.291224479675293, "rewards/accuracies": 0.625, "rewards/chosen": -0.0883658304810524, "rewards/margins": 0.01285453513264656, "rewards/rejected": -0.10122036933898926, "step": 217 }, { "epoch": 0.1329876467896904, "grad_norm": 2.8325328826904297, "learning_rate": 7.81579914268218e-06, "log_odds_chosen": 0.6530494093894958, "log_odds_ratio": -0.4423764944076538, "logits/chosen": -1.044180154800415, "logits/rejected": -1.0394846200942993, "logps/chosen": -1.0273312330245972, "logps/rejected": -1.4663742780685425, "loss": 1.51, "nll_loss": 1.4238406419754028, "rewards/accuracies": 0.875, "rewards/chosen": -0.10273312032222748, "rewards/margins": 0.043904323130846024, "rewards/rejected": -0.1466374397277832, "step": 218 }, { "epoch": 0.13359768186670734, "grad_norm": 1.7357605695724487, "learning_rate": 7.814819350887936e-06, "log_odds_chosen": -0.0227808840572834, "log_odds_ratio": -0.7502081394195557, "logits/chosen": -0.9548008441925049, "logits/rejected": -1.0264320373535156, "logps/chosen": -1.1037256717681885, "logps/rejected": -1.0493206977844238, "loss": 1.2666, "nll_loss": 1.343969702720642, "rewards/accuracies": 0.375, "rewards/chosen": -0.11037257313728333, "rewards/margins": -0.0054405080154538155, "rewards/rejected": -0.10493206977844238, "step": 219 }, { "epoch": 0.13420771694372427, "grad_norm": 1.802004337310791, "learning_rate": 7.813839559093693e-06, "log_odds_chosen": -0.060771942138671875, "log_odds_ratio": -0.8066741228103638, "logits/chosen": -1.0928730964660645, "logits/rejected": -1.0744097232818604, "logps/chosen": -0.9135183095932007, "logps/rejected": -0.896763026714325, "loss": 1.1813, "nll_loss": 1.2651777267456055, "rewards/accuracies": 0.375, "rewards/chosen": -0.09135182946920395, "rewards/margins": -0.0016755247488617897, "rewards/rejected": -0.08967630565166473, "step": 220 }, { "epoch": 0.1348177520207412, "grad_norm": 2.1524202823638916, "learning_rate": 7.81285976729945e-06, "log_odds_chosen": 0.177791565656662, "log_odds_ratio": -0.6579707860946655, "logits/chosen": -0.8102299571037292, "logits/rejected": -0.7626404762268066, "logps/chosen": -0.7687428593635559, "logps/rejected": -0.8363072276115417, "loss": 1.2937, "nll_loss": 1.0898728370666504, "rewards/accuracies": 0.75, "rewards/chosen": -0.07687428593635559, "rewards/margins": 0.006756434682756662, "rewards/rejected": -0.08363072574138641, "step": 221 }, { "epoch": 0.13542778709775813, "grad_norm": 2.3094868659973145, "learning_rate": 7.811879975505204e-06, "log_odds_chosen": 0.13451339304447174, "log_odds_ratio": -0.725571870803833, "logits/chosen": -0.7183337211608887, "logits/rejected": -0.6010791659355164, "logps/chosen": -0.9231369495391846, "logps/rejected": -1.0109964609146118, "loss": 1.273, "nll_loss": 1.0393099784851074, "rewards/accuracies": 0.625, "rewards/chosen": -0.09231369942426682, "rewards/margins": 0.008785942569375038, "rewards/rejected": -0.1010996401309967, "step": 222 }, { "epoch": 0.13603782217477506, "grad_norm": 2.9787185192108154, "learning_rate": 7.810900183710962e-06, "log_odds_chosen": 0.10881952941417694, "log_odds_ratio": -0.7083964347839355, "logits/chosen": -0.8419354557991028, "logits/rejected": -0.7779588103294373, "logps/chosen": -0.9841957092285156, "logps/rejected": -1.0037627220153809, "loss": 1.4363, "nll_loss": 1.2012042999267578, "rewards/accuracies": 0.375, "rewards/chosen": -0.09841956198215485, "rewards/margins": 0.001956706866621971, "rewards/rejected": -0.10037627816200256, "step": 223 }, { "epoch": 0.136647857251792, "grad_norm": 4.207352638244629, "learning_rate": 7.809920391916718e-06, "log_odds_chosen": 0.26976439356803894, "log_odds_ratio": -0.7299754619598389, "logits/chosen": -1.0486747026443481, "logits/rejected": -1.0862913131713867, "logps/chosen": -0.956432044506073, "logps/rejected": -1.1558587551116943, "loss": 1.3848, "nll_loss": 1.3149731159210205, "rewards/accuracies": 0.375, "rewards/chosen": -0.09564320743083954, "rewards/margins": 0.019942674785852432, "rewards/rejected": -0.11558587849140167, "step": 224 }, { "epoch": 0.13725789232880892, "grad_norm": 6.405318260192871, "learning_rate": 7.808940600122474e-06, "log_odds_chosen": 0.05623709410429001, "log_odds_ratio": -0.7812166213989258, "logits/chosen": -1.0191199779510498, "logits/rejected": -1.076115369796753, "logps/chosen": -1.392913818359375, "logps/rejected": -1.360562801361084, "loss": 1.4617, "nll_loss": 1.4459612369537354, "rewards/accuracies": 0.5, "rewards/chosen": -0.13929139077663422, "rewards/margins": -0.0032350989058613777, "rewards/rejected": -0.1360563039779663, "step": 225 }, { "epoch": 0.13786792740582585, "grad_norm": 2.748825788497925, "learning_rate": 7.80796080832823e-06, "log_odds_chosen": -0.01333153247833252, "log_odds_ratio": -0.7519276738166809, "logits/chosen": -1.0906342267990112, "logits/rejected": -1.0152759552001953, "logps/chosen": -1.256901741027832, "logps/rejected": -1.220004916191101, "loss": 1.6004, "nll_loss": 1.6616486310958862, "rewards/accuracies": 0.5, "rewards/chosen": -0.12569016218185425, "rewards/margins": -0.003689682111144066, "rewards/rejected": -0.12200049310922623, "step": 226 }, { "epoch": 0.13847796248284278, "grad_norm": 5.613758563995361, "learning_rate": 7.806981016533986e-06, "log_odds_chosen": 0.4776960015296936, "log_odds_ratio": -0.5692480206489563, "logits/chosen": -0.8642911911010742, "logits/rejected": -0.8779069185256958, "logps/chosen": -1.0537559986114502, "logps/rejected": -1.376704216003418, "loss": 1.2844, "nll_loss": 1.3125157356262207, "rewards/accuracies": 0.625, "rewards/chosen": -0.10537560284137726, "rewards/margins": 0.032294824719429016, "rewards/rejected": -0.13767041265964508, "step": 227 }, { "epoch": 0.1390879975598597, "grad_norm": 4.758591175079346, "learning_rate": 7.806001224739742e-06, "log_odds_chosen": 0.4998701810836792, "log_odds_ratio": -0.5789040327072144, "logits/chosen": -0.6383202075958252, "logits/rejected": -0.6744571328163147, "logps/chosen": -0.8337568044662476, "logps/rejected": -1.1265709400177002, "loss": 1.2968, "nll_loss": 0.963615357875824, "rewards/accuracies": 0.75, "rewards/chosen": -0.08337568491697311, "rewards/margins": 0.029281407594680786, "rewards/rejected": -0.1126570925116539, "step": 228 }, { "epoch": 0.1396980326368766, "grad_norm": 3.7864625453948975, "learning_rate": 7.805021432945498e-06, "log_odds_chosen": -0.13972145318984985, "log_odds_ratio": -0.8029104471206665, "logits/chosen": -0.8367788791656494, "logits/rejected": -0.8016765713691711, "logps/chosen": -0.9311959743499756, "logps/rejected": -0.823432445526123, "loss": 1.151, "nll_loss": 1.1584244966506958, "rewards/accuracies": 0.5, "rewards/chosen": -0.09311959147453308, "rewards/margins": -0.010776347480714321, "rewards/rejected": -0.08234325051307678, "step": 229 }, { "epoch": 0.14030806771389354, "grad_norm": 2.4330880641937256, "learning_rate": 7.804041641151255e-06, "log_odds_chosen": 0.14116178452968597, "log_odds_ratio": -0.7169243097305298, "logits/chosen": -1.1258292198181152, "logits/rejected": -1.232961893081665, "logps/chosen": -1.097679615020752, "logps/rejected": -1.268140196800232, "loss": 1.4409, "nll_loss": 1.4674466848373413, "rewards/accuracies": 0.625, "rewards/chosen": -0.10976796597242355, "rewards/margins": 0.017046064138412476, "rewards/rejected": -0.12681403756141663, "step": 230 }, { "epoch": 0.14091810279091047, "grad_norm": 2.470069169998169, "learning_rate": 7.80306184935701e-06, "log_odds_chosen": 0.4606154263019562, "log_odds_ratio": -0.5399612188339233, "logits/chosen": -0.808849573135376, "logits/rejected": -0.7981170415878296, "logps/chosen": -0.9522269368171692, "logps/rejected": -1.1703108549118042, "loss": 1.2583, "nll_loss": 1.1476235389709473, "rewards/accuracies": 0.75, "rewards/chosen": -0.09522269666194916, "rewards/margins": 0.02180839143693447, "rewards/rejected": -0.11703109741210938, "step": 231 }, { "epoch": 0.1415281378679274, "grad_norm": 3.543764591217041, "learning_rate": 7.802082057562769e-06, "log_odds_chosen": 0.40190649032592773, "log_odds_ratio": -0.5759028196334839, "logits/chosen": -0.8682248592376709, "logits/rejected": -0.965034008026123, "logps/chosen": -0.9449611902236938, "logps/rejected": -1.2030372619628906, "loss": 1.4003, "nll_loss": 1.277012586593628, "rewards/accuracies": 0.625, "rewards/chosen": -0.09449611604213715, "rewards/margins": 0.025807613506913185, "rewards/rejected": -0.12030373513698578, "step": 232 }, { "epoch": 0.14213817294494432, "grad_norm": 1.9690214395523071, "learning_rate": 7.801102265768523e-06, "log_odds_chosen": 0.5561241507530212, "log_odds_ratio": -0.4873473644256592, "logits/chosen": -0.7889351844787598, "logits/rejected": -0.9643588066101074, "logps/chosen": -0.8869659900665283, "logps/rejected": -1.2165979146957397, "loss": 1.3842, "nll_loss": 1.205625057220459, "rewards/accuracies": 0.875, "rewards/chosen": -0.08869659900665283, "rewards/margins": 0.03296317905187607, "rewards/rejected": -0.1216597855091095, "step": 233 }, { "epoch": 0.14274820802196125, "grad_norm": 3.6181557178497314, "learning_rate": 7.800122473974279e-06, "log_odds_chosen": 0.2895849049091339, "log_odds_ratio": -0.5933615565299988, "logits/chosen": -0.6428920030593872, "logits/rejected": -0.7066491842269897, "logps/chosen": -0.8695434331893921, "logps/rejected": -1.005267858505249, "loss": 1.232, "nll_loss": 0.9855774641036987, "rewards/accuracies": 0.75, "rewards/chosen": -0.08695434033870697, "rewards/margins": 0.0135724525898695, "rewards/rejected": -0.10052678734064102, "step": 234 }, { "epoch": 0.14335824309897818, "grad_norm": 2.36604905128479, "learning_rate": 7.799142682180037e-06, "log_odds_chosen": 0.5606030821800232, "log_odds_ratio": -0.524124026298523, "logits/chosen": -0.9399769306182861, "logits/rejected": -0.8539875745773315, "logps/chosen": -0.9664835333824158, "logps/rejected": -1.3288832902908325, "loss": 1.355, "nll_loss": 1.2193565368652344, "rewards/accuracies": 0.875, "rewards/chosen": -0.09664835035800934, "rewards/margins": 0.03623996302485466, "rewards/rejected": -0.1328883320093155, "step": 235 }, { "epoch": 0.1439682781759951, "grad_norm": 2.199805974960327, "learning_rate": 7.798162890385793e-06, "log_odds_chosen": 0.04697909951210022, "log_odds_ratio": -0.7186545729637146, "logits/chosen": -0.6428421139717102, "logits/rejected": -0.7734893560409546, "logps/chosen": -1.0481983423233032, "logps/rejected": -1.0610530376434326, "loss": 1.1257, "nll_loss": 1.2541704177856445, "rewards/accuracies": 0.375, "rewards/chosen": -0.10481983423233032, "rewards/margins": 0.0012854626402258873, "rewards/rejected": -0.10610529035329819, "step": 236 }, { "epoch": 0.14457831325301204, "grad_norm": 2.3560309410095215, "learning_rate": 7.797183098591549e-06, "log_odds_chosen": 0.3246544897556305, "log_odds_ratio": -0.5641750693321228, "logits/chosen": -1.05469810962677, "logits/rejected": -0.851701021194458, "logps/chosen": -0.8780465722084045, "logps/rejected": -1.083631157875061, "loss": 1.3132, "nll_loss": 1.4300309419631958, "rewards/accuracies": 0.75, "rewards/chosen": -0.08780466020107269, "rewards/margins": 0.020558450371026993, "rewards/rejected": -0.10836310684680939, "step": 237 }, { "epoch": 0.14518834833002897, "grad_norm": 2.9453485012054443, "learning_rate": 7.796203306797305e-06, "log_odds_chosen": 0.48952266573905945, "log_odds_ratio": -0.5739860534667969, "logits/chosen": -1.115866780281067, "logits/rejected": -1.095678448677063, "logps/chosen": -1.1891180276870728, "logps/rejected": -1.5932426452636719, "loss": 1.4609, "nll_loss": 1.6428841352462769, "rewards/accuracies": 0.625, "rewards/chosen": -0.11891180276870728, "rewards/margins": 0.04041246697306633, "rewards/rejected": -0.1593242734670639, "step": 238 }, { "epoch": 0.1457983834070459, "grad_norm": 2.7446722984313965, "learning_rate": 7.795223515003061e-06, "log_odds_chosen": 0.28477656841278076, "log_odds_ratio": -0.6036829948425293, "logits/chosen": -1.0019030570983887, "logits/rejected": -1.0057642459869385, "logps/chosen": -0.9970621466636658, "logps/rejected": -1.1433675289154053, "loss": 1.3498, "nll_loss": 1.1788746118545532, "rewards/accuracies": 0.625, "rewards/chosen": -0.09970621764659882, "rewards/margins": 0.014630535617470741, "rewards/rejected": -0.11433675140142441, "step": 239 }, { "epoch": 0.14640841848406283, "grad_norm": 2.1572086811065674, "learning_rate": 7.794243723208818e-06, "log_odds_chosen": 0.2865999639034271, "log_odds_ratio": -0.6209990978240967, "logits/chosen": -1.269669532775879, "logits/rejected": -1.1299000978469849, "logps/chosen": -1.0902001857757568, "logps/rejected": -1.3582873344421387, "loss": 1.2517, "nll_loss": 1.422100305557251, "rewards/accuracies": 0.75, "rewards/chosen": -0.10902002453804016, "rewards/margins": 0.026808712631464005, "rewards/rejected": -0.13582873344421387, "step": 240 }, { "epoch": 0.14701845356107976, "grad_norm": 3.702071189880371, "learning_rate": 7.793263931414574e-06, "log_odds_chosen": 0.4157208502292633, "log_odds_ratio": -0.5609100461006165, "logits/chosen": -0.7230011224746704, "logits/rejected": -0.6550971269607544, "logps/chosen": -0.8877879977226257, "logps/rejected": -1.1330509185791016, "loss": 1.2222, "nll_loss": 1.0279226303100586, "rewards/accuracies": 0.75, "rewards/chosen": -0.08877880871295929, "rewards/margins": 0.024526281282305717, "rewards/rejected": -0.11330509185791016, "step": 241 }, { "epoch": 0.1476284886380967, "grad_norm": 2.607598066329956, "learning_rate": 7.79228413962033e-06, "log_odds_chosen": 0.1622743308544159, "log_odds_ratio": -0.6977503299713135, "logits/chosen": -1.1188769340515137, "logits/rejected": -1.0304962396621704, "logps/chosen": -1.0841470956802368, "logps/rejected": -1.184234380722046, "loss": 1.5491, "nll_loss": 1.2121407985687256, "rewards/accuracies": 0.625, "rewards/chosen": -0.10841470956802368, "rewards/margins": 0.010008729062974453, "rewards/rejected": -0.11842343956232071, "step": 242 }, { "epoch": 0.14823852371511362, "grad_norm": 2.8150298595428467, "learning_rate": 7.791304347826088e-06, "log_odds_chosen": 0.4074481725692749, "log_odds_ratio": -0.5500730872154236, "logits/chosen": -0.883357048034668, "logits/rejected": -0.7615510821342468, "logps/chosen": -0.8770421147346497, "logps/rejected": -1.1544865369796753, "loss": 1.3954, "nll_loss": 1.216795563697815, "rewards/accuracies": 0.625, "rewards/chosen": -0.08770421147346497, "rewards/margins": 0.027744445949792862, "rewards/rejected": -0.11544866114854813, "step": 243 }, { "epoch": 0.14884855879213055, "grad_norm": 8.543292999267578, "learning_rate": 7.790324556031842e-06, "log_odds_chosen": 0.10030410438776016, "log_odds_ratio": -0.6813020706176758, "logits/chosen": -0.8731304407119751, "logits/rejected": -0.8239074945449829, "logps/chosen": -1.364395022392273, "logps/rejected": -1.448944330215454, "loss": 1.3431, "nll_loss": 1.3777525424957275, "rewards/accuracies": 0.625, "rewards/chosen": -0.1364395022392273, "rewards/margins": 0.008454934693872929, "rewards/rejected": -0.14489443600177765, "step": 244 }, { "epoch": 0.14945859386914748, "grad_norm": 3.367971658706665, "learning_rate": 7.789344764237598e-06, "log_odds_chosen": 0.28537923097610474, "log_odds_ratio": -0.6600664258003235, "logits/chosen": -0.8054450750350952, "logits/rejected": -0.7893984317779541, "logps/chosen": -0.9398113489151001, "logps/rejected": -1.10236656665802, "loss": 1.1392, "nll_loss": 1.2466214895248413, "rewards/accuracies": 0.625, "rewards/chosen": -0.09398113191127777, "rewards/margins": 0.016255518421530724, "rewards/rejected": -0.11023664474487305, "step": 245 }, { "epoch": 0.1500686289461644, "grad_norm": 2.4683070182800293, "learning_rate": 7.788364972443356e-06, "log_odds_chosen": 0.25822409987449646, "log_odds_ratio": -0.6430916786193848, "logits/chosen": -0.9187443852424622, "logits/rejected": -0.8759621381759644, "logps/chosen": -1.0706000328063965, "logps/rejected": -1.208448886871338, "loss": 1.3086, "nll_loss": 1.1688766479492188, "rewards/accuracies": 0.625, "rewards/chosen": -0.1070600152015686, "rewards/margins": 0.013784877955913544, "rewards/rejected": -0.12084489315748215, "step": 246 }, { "epoch": 0.15067866402318134, "grad_norm": 2.894167423248291, "learning_rate": 7.787385180649112e-06, "log_odds_chosen": 0.19074368476867676, "log_odds_ratio": -0.6490761637687683, "logits/chosen": -1.0765665769577026, "logits/rejected": -0.9870290756225586, "logps/chosen": -1.0201549530029297, "logps/rejected": -1.1616910696029663, "loss": 1.1666, "nll_loss": 1.2992485761642456, "rewards/accuracies": 0.5, "rewards/chosen": -0.10201548784971237, "rewards/margins": 0.014153622090816498, "rewards/rejected": -0.11616911739110947, "step": 247 }, { "epoch": 0.15128869910019827, "grad_norm": 3.060457944869995, "learning_rate": 7.786405388854868e-06, "log_odds_chosen": 0.33842363953590393, "log_odds_ratio": -0.6155612468719482, "logits/chosen": -0.9913941025733948, "logits/rejected": -0.9436041116714478, "logps/chosen": -1.1336190700531006, "logps/rejected": -1.3766217231750488, "loss": 1.1237, "nll_loss": 1.1691642999649048, "rewards/accuracies": 0.625, "rewards/chosen": -0.1133619099855423, "rewards/margins": 0.024300266057252884, "rewards/rejected": -0.13766217231750488, "step": 248 }, { "epoch": 0.1518987341772152, "grad_norm": 1.9134578704833984, "learning_rate": 7.785425597060624e-06, "log_odds_chosen": 0.24048978090286255, "log_odds_ratio": -0.5884311199188232, "logits/chosen": -1.18068528175354, "logits/rejected": -0.9474149942398071, "logps/chosen": -1.0785657167434692, "logps/rejected": -1.202911615371704, "loss": 1.2605, "nll_loss": 1.3453037738800049, "rewards/accuracies": 0.75, "rewards/chosen": -0.10785658657550812, "rewards/margins": 0.012434577569365501, "rewards/rejected": -0.12029115855693817, "step": 249 }, { "epoch": 0.15250876925423212, "grad_norm": 2.6691360473632812, "learning_rate": 7.78444580526638e-06, "log_odds_chosen": 0.6362863779067993, "log_odds_ratio": -0.44877737760543823, "logits/chosen": -1.0382379293441772, "logits/rejected": -0.9376381039619446, "logps/chosen": -0.9738128185272217, "logps/rejected": -1.3651845455169678, "loss": 1.1896, "nll_loss": 1.207344651222229, "rewards/accuracies": 0.875, "rewards/chosen": -0.09738128632307053, "rewards/margins": 0.039137158542871475, "rewards/rejected": -0.1365184485912323, "step": 250 }, { "epoch": 0.15311880433124905, "grad_norm": 5.6671648025512695, "learning_rate": 7.783466013472137e-06, "log_odds_chosen": 0.2152402698993683, "log_odds_ratio": -0.6168177127838135, "logits/chosen": -1.0105100870132446, "logits/rejected": -0.939407229423523, "logps/chosen": -1.0403013229370117, "logps/rejected": -1.151586890220642, "loss": 1.3449, "nll_loss": 1.177647352218628, "rewards/accuracies": 0.5, "rewards/chosen": -0.10403013229370117, "rewards/margins": 0.011128552258014679, "rewards/rejected": -0.11515869200229645, "step": 251 }, { "epoch": 0.15372883940826598, "grad_norm": 2.23223614692688, "learning_rate": 7.782486221677893e-06, "log_odds_chosen": 0.6701767444610596, "log_odds_ratio": -0.4944712817668915, "logits/chosen": -0.8669027090072632, "logits/rejected": -0.907177746295929, "logps/chosen": -1.0009756088256836, "logps/rejected": -1.3678256273269653, "loss": 1.0847, "nll_loss": 1.1281216144561768, "rewards/accuracies": 0.625, "rewards/chosen": -0.10009756684303284, "rewards/margins": 0.03668500483036041, "rewards/rejected": -0.13678257167339325, "step": 252 }, { "epoch": 0.1543388744852829, "grad_norm": 3.598905563354492, "learning_rate": 7.781506429883649e-06, "log_odds_chosen": 0.008346468210220337, "log_odds_ratio": -0.8593700528144836, "logits/chosen": -0.9967697262763977, "logits/rejected": -0.895081639289856, "logps/chosen": -1.2955882549285889, "logps/rejected": -1.352323293685913, "loss": 1.3892, "nll_loss": 1.45561683177948, "rewards/accuracies": 0.25, "rewards/chosen": -0.12955881655216217, "rewards/margins": 0.005673505365848541, "rewards/rejected": -0.1352323293685913, "step": 253 }, { "epoch": 0.15494890956229984, "grad_norm": 4.5110063552856445, "learning_rate": 7.780526638089407e-06, "log_odds_chosen": -0.3180376887321472, "log_odds_ratio": -0.8908965587615967, "logits/chosen": -1.1282950639724731, "logits/rejected": -0.9765962362289429, "logps/chosen": -1.2225303649902344, "logps/rejected": -1.0110152959823608, "loss": 1.1956, "nll_loss": 1.338239073753357, "rewards/accuracies": 0.25, "rewards/chosen": -0.12225304543972015, "rewards/margins": -0.02115151844918728, "rewards/rejected": -0.10110153257846832, "step": 254 }, { "epoch": 0.15555894463931677, "grad_norm": 2.517331600189209, "learning_rate": 7.779546846295163e-06, "log_odds_chosen": -0.3488119840621948, "log_odds_ratio": -0.9126331210136414, "logits/chosen": -1.2224271297454834, "logits/rejected": -1.0464444160461426, "logps/chosen": -1.2032248973846436, "logps/rejected": -0.9699029922485352, "loss": 1.4827, "nll_loss": 1.3106234073638916, "rewards/accuracies": 0.375, "rewards/chosen": -0.12032248079776764, "rewards/margins": -0.02333218604326248, "rewards/rejected": -0.09699030220508575, "step": 255 }, { "epoch": 0.1561689797163337, "grad_norm": 3.4063737392425537, "learning_rate": 7.778567054500917e-06, "log_odds_chosen": 0.38908225297927856, "log_odds_ratio": -0.5540482401847839, "logits/chosen": -1.1297249794006348, "logits/rejected": -1.109819769859314, "logps/chosen": -1.3222917318344116, "logps/rejected": -1.6293175220489502, "loss": 1.5576, "nll_loss": 1.491607427597046, "rewards/accuracies": 0.75, "rewards/chosen": -0.13222917914390564, "rewards/margins": 0.030702583491802216, "rewards/rejected": -0.16293177008628845, "step": 256 }, { "epoch": 0.15677901479335063, "grad_norm": 3.253873348236084, "learning_rate": 7.777587262706675e-06, "log_odds_chosen": 0.12302947044372559, "log_odds_ratio": -0.7043987512588501, "logits/chosen": -1.098328948020935, "logits/rejected": -0.6857577562332153, "logps/chosen": -1.184415340423584, "logps/rejected": -1.2567614316940308, "loss": 1.3483, "nll_loss": 1.3214929103851318, "rewards/accuracies": 0.5, "rewards/chosen": -0.11844153702259064, "rewards/margins": 0.007234611548483372, "rewards/rejected": -0.12567615509033203, "step": 257 }, { "epoch": 0.15738904987036756, "grad_norm": 2.712540864944458, "learning_rate": 7.776607470912431e-06, "log_odds_chosen": 0.24752990901470184, "log_odds_ratio": -0.6269053816795349, "logits/chosen": -0.8704622983932495, "logits/rejected": -0.833612322807312, "logps/chosen": -1.0848382711410522, "logps/rejected": -1.2072652578353882, "loss": 1.2332, "nll_loss": 1.1942583322525024, "rewards/accuracies": 0.75, "rewards/chosen": -0.10848383605480194, "rewards/margins": 0.012242693454027176, "rewards/rejected": -0.12072652578353882, "step": 258 }, { "epoch": 0.1579990849473845, "grad_norm": 1.8693621158599854, "learning_rate": 7.775627679118187e-06, "log_odds_chosen": 0.1663820594549179, "log_odds_ratio": -0.6528200507164001, "logits/chosen": -1.0145063400268555, "logits/rejected": -0.9368645548820496, "logps/chosen": -0.7865395545959473, "logps/rejected": -0.8785721063613892, "loss": 1.2362, "nll_loss": 1.286307454109192, "rewards/accuracies": 0.625, "rewards/chosen": -0.07865395396947861, "rewards/margins": 0.009203257970511913, "rewards/rejected": -0.0878572165966034, "step": 259 }, { "epoch": 0.1586091200244014, "grad_norm": 2.7284610271453857, "learning_rate": 7.774647887323943e-06, "log_odds_chosen": 0.11002177745103836, "log_odds_ratio": -0.6919146776199341, "logits/chosen": -0.7758104205131531, "logits/rejected": -0.590361475944519, "logps/chosen": -0.929775595664978, "logps/rejected": -1.0004569292068481, "loss": 1.4861, "nll_loss": 1.1518476009368896, "rewards/accuracies": 0.75, "rewards/chosen": -0.09297756105661392, "rewards/margins": 0.00706813158467412, "rewards/rejected": -0.10004568845033646, "step": 260 }, { "epoch": 0.15921915510141832, "grad_norm": 2.2712621688842773, "learning_rate": 7.7736680955297e-06, "log_odds_chosen": 0.6104550361633301, "log_odds_ratio": -0.5205526351928711, "logits/chosen": -0.8226828575134277, "logits/rejected": -0.7950866222381592, "logps/chosen": -0.8787000775337219, "logps/rejected": -1.2411754131317139, "loss": 1.2767, "nll_loss": 1.0877461433410645, "rewards/accuracies": 0.75, "rewards/chosen": -0.08787001669406891, "rewards/margins": 0.03624754399061203, "rewards/rejected": -0.12411756813526154, "step": 261 }, { "epoch": 0.15982919017843525, "grad_norm": 2.3654141426086426, "learning_rate": 7.772688303735456e-06, "log_odds_chosen": -0.07801627367734909, "log_odds_ratio": -0.7757260799407959, "logits/chosen": -1.1532732248306274, "logits/rejected": -0.9643528461456299, "logps/chosen": -1.3221553564071655, "logps/rejected": -1.2861003875732422, "loss": 1.5374, "nll_loss": 1.4839357137680054, "rewards/accuracies": 0.5, "rewards/chosen": -0.13221554458141327, "rewards/margins": -0.0036054905503988266, "rewards/rejected": -0.1286100447177887, "step": 262 }, { "epoch": 0.16043922525545218, "grad_norm": 2.3918955326080322, "learning_rate": 7.771708511941212e-06, "log_odds_chosen": 0.34428665041923523, "log_odds_ratio": -0.6098607182502747, "logits/chosen": -1.21371591091156, "logits/rejected": -1.1719295978546143, "logps/chosen": -1.132947325706482, "logps/rejected": -1.357439637184143, "loss": 1.3341, "nll_loss": 1.6089519262313843, "rewards/accuracies": 0.5, "rewards/chosen": -0.11329473555088043, "rewards/margins": 0.022449225187301636, "rewards/rejected": -0.13574396073818207, "step": 263 }, { "epoch": 0.1610492603324691, "grad_norm": 2.9283852577209473, "learning_rate": 7.770728720146968e-06, "log_odds_chosen": -0.18631795048713684, "log_odds_ratio": -0.8465763330459595, "logits/chosen": -0.8108327984809875, "logits/rejected": -0.8886317610740662, "logps/chosen": -1.0849888324737549, "logps/rejected": -0.9477260708808899, "loss": 1.2029, "nll_loss": 1.2897024154663086, "rewards/accuracies": 0.375, "rewards/chosen": -0.10849887877702713, "rewards/margins": -0.013726272620260715, "rewards/rejected": -0.09477260708808899, "step": 264 }, { "epoch": 0.16165929540948604, "grad_norm": 2.8944742679595947, "learning_rate": 7.769748928352726e-06, "log_odds_chosen": 0.03071035072207451, "log_odds_ratio": -0.7136768102645874, "logits/chosen": -0.9249821901321411, "logits/rejected": -0.8667629957199097, "logps/chosen": -0.8365970849990845, "logps/rejected": -0.8667410612106323, "loss": 1.2207, "nll_loss": 1.1581817865371704, "rewards/accuracies": 0.5, "rewards/chosen": -0.08365970849990845, "rewards/margins": 0.0030144001357257366, "rewards/rejected": -0.08667411655187607, "step": 265 }, { "epoch": 0.16226933048650297, "grad_norm": 3.657564163208008, "learning_rate": 7.768769136558482e-06, "log_odds_chosen": 0.643396258354187, "log_odds_ratio": -0.4806571304798126, "logits/chosen": -0.8587702512741089, "logits/rejected": -0.8005377650260925, "logps/chosen": -0.875823438167572, "logps/rejected": -1.282219409942627, "loss": 1.4153, "nll_loss": 1.0709002017974854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08758234232664108, "rewards/margins": 0.040639594197273254, "rewards/rejected": -0.12822192907333374, "step": 266 }, { "epoch": 0.1628793655635199, "grad_norm": 7.040660858154297, "learning_rate": 7.767789344764236e-06, "log_odds_chosen": 0.8177803754806519, "log_odds_ratio": -0.526404082775116, "logits/chosen": -0.6793309450149536, "logits/rejected": -0.725306510925293, "logps/chosen": -0.8610958456993103, "logps/rejected": -1.5266876220703125, "loss": 1.1655, "nll_loss": 1.0069074630737305, "rewards/accuracies": 0.875, "rewards/chosen": -0.08610958606004715, "rewards/margins": 0.06655918061733246, "rewards/rejected": -0.152668759226799, "step": 267 }, { "epoch": 0.16348940064053682, "grad_norm": 1.712852954864502, "learning_rate": 7.766809552969994e-06, "log_odds_chosen": 0.40791621804237366, "log_odds_ratio": -0.5593159794807434, "logits/chosen": -1.1447124481201172, "logits/rejected": -1.133261799812317, "logps/chosen": -1.0578488111495972, "logps/rejected": -1.3238589763641357, "loss": 1.178, "nll_loss": 1.281909704208374, "rewards/accuracies": 0.75, "rewards/chosen": -0.10578488558530807, "rewards/margins": 0.026601018384099007, "rewards/rejected": -0.13238589465618134, "step": 268 }, { "epoch": 0.16409943571755375, "grad_norm": 2.0394959449768066, "learning_rate": 7.76582976117575e-06, "log_odds_chosen": 0.4472591280937195, "log_odds_ratio": -0.55621737241745, "logits/chosen": -0.8428138494491577, "logits/rejected": -0.9300673007965088, "logps/chosen": -0.8864307999610901, "logps/rejected": -1.128824234008789, "loss": 1.1413, "nll_loss": 1.108571171760559, "rewards/accuracies": 0.625, "rewards/chosen": -0.08864308148622513, "rewards/margins": 0.024239342659711838, "rewards/rejected": -0.11288243532180786, "step": 269 }, { "epoch": 0.16470947079457068, "grad_norm": 1.9256882667541504, "learning_rate": 7.764849969381506e-06, "log_odds_chosen": 0.6202691197395325, "log_odds_ratio": -0.46457231044769287, "logits/chosen": -1.041861891746521, "logits/rejected": -1.0381197929382324, "logps/chosen": -0.8558303117752075, "logps/rejected": -1.222104549407959, "loss": 1.2163, "nll_loss": 1.1976042985916138, "rewards/accuracies": 0.75, "rewards/chosen": -0.08558303117752075, "rewards/margins": 0.03662741929292679, "rewards/rejected": -0.12221045792102814, "step": 270 }, { "epoch": 0.1653195058715876, "grad_norm": 1.7721426486968994, "learning_rate": 7.763870177587262e-06, "log_odds_chosen": 0.42782121896743774, "log_odds_ratio": -0.5187650918960571, "logits/chosen": -0.8946547508239746, "logits/rejected": -0.840463399887085, "logps/chosen": -0.7732785940170288, "logps/rejected": -0.9998363256454468, "loss": 1.3818, "nll_loss": 1.2091741561889648, "rewards/accuracies": 0.875, "rewards/chosen": -0.07732786238193512, "rewards/margins": 0.02265576645731926, "rewards/rejected": -0.09998363256454468, "step": 271 }, { "epoch": 0.16592954094860454, "grad_norm": 2.9524500370025635, "learning_rate": 7.762890385793018e-06, "log_odds_chosen": 0.5287894010543823, "log_odds_ratio": -0.5377291440963745, "logits/chosen": -0.9185388088226318, "logits/rejected": -0.8871104717254639, "logps/chosen": -0.8289216160774231, "logps/rejected": -1.1023149490356445, "loss": 1.1049, "nll_loss": 1.2607725858688354, "rewards/accuracies": 0.75, "rewards/chosen": -0.08289216458797455, "rewards/margins": 0.02733933925628662, "rewards/rejected": -0.11023149639368057, "step": 272 }, { "epoch": 0.16653957602562147, "grad_norm": 3.434633731842041, "learning_rate": 7.761910593998775e-06, "log_odds_chosen": 0.21623599529266357, "log_odds_ratio": -0.6955507397651672, "logits/chosen": -0.9392475485801697, "logits/rejected": -0.9225636720657349, "logps/chosen": -1.033257007598877, "logps/rejected": -1.1258549690246582, "loss": 1.4077, "nll_loss": 1.2227250337600708, "rewards/accuracies": 0.75, "rewards/chosen": -0.10332569479942322, "rewards/margins": 0.009259795770049095, "rewards/rejected": -0.11258549243211746, "step": 273 }, { "epoch": 0.1671496111026384, "grad_norm": 2.4459402561187744, "learning_rate": 7.76093080220453e-06, "log_odds_chosen": 0.0742308646440506, "log_odds_ratio": -0.7825095653533936, "logits/chosen": -0.7190593481063843, "logits/rejected": -0.8178085088729858, "logps/chosen": -0.8383591175079346, "logps/rejected": -0.8285937905311584, "loss": 1.1814, "nll_loss": 0.9598476886749268, "rewards/accuracies": 0.625, "rewards/chosen": -0.0838359147310257, "rewards/margins": -0.0009765271097421646, "rewards/rejected": -0.08285938203334808, "step": 274 }, { "epoch": 0.16775964617965533, "grad_norm": 2.4651970863342285, "learning_rate": 7.759951010410287e-06, "log_odds_chosen": 0.39684611558914185, "log_odds_ratio": -0.6019562482833862, "logits/chosen": -0.8949675559997559, "logits/rejected": -0.8688215613365173, "logps/chosen": -0.9699896574020386, "logps/rejected": -1.1900924444198608, "loss": 1.1831, "nll_loss": 1.2368110418319702, "rewards/accuracies": 0.625, "rewards/chosen": -0.09699897468090057, "rewards/margins": 0.022010287269949913, "rewards/rejected": -0.11900924891233444, "step": 275 }, { "epoch": 0.16836968125667226, "grad_norm": 6.5397186279296875, "learning_rate": 7.758971218616045e-06, "log_odds_chosen": 0.7858189344406128, "log_odds_ratio": -0.47055482864379883, "logits/chosen": -0.9036056399345398, "logits/rejected": -0.8998976945877075, "logps/chosen": -0.7464345693588257, "logps/rejected": -1.1630421876907349, "loss": 1.3311, "nll_loss": 0.934411883354187, "rewards/accuracies": 0.875, "rewards/chosen": -0.07464345544576645, "rewards/margins": 0.041660770773887634, "rewards/rejected": -0.11630422621965408, "step": 276 }, { "epoch": 0.1689797163336892, "grad_norm": 2.2059037685394287, "learning_rate": 7.7579914268218e-06, "log_odds_chosen": 0.280785471200943, "log_odds_ratio": -0.6733993291854858, "logits/chosen": -1.029989242553711, "logits/rejected": -0.9239932298660278, "logps/chosen": -0.8910529613494873, "logps/rejected": -1.0092614889144897, "loss": 1.3027, "nll_loss": 1.3669960498809814, "rewards/accuracies": 0.625, "rewards/chosen": -0.08910529315471649, "rewards/margins": 0.011820856481790543, "rewards/rejected": -0.10092614591121674, "step": 277 }, { "epoch": 0.16958975141070612, "grad_norm": 3.697209596633911, "learning_rate": 7.757011635027557e-06, "log_odds_chosen": 0.07951292395591736, "log_odds_ratio": -0.6964132189750671, "logits/chosen": -0.9017593264579773, "logits/rejected": -0.9843534231185913, "logps/chosen": -1.0031888484954834, "logps/rejected": -0.9984350204467773, "loss": 1.4274, "nll_loss": 1.0825457572937012, "rewards/accuracies": 0.75, "rewards/chosen": -0.10031889379024506, "rewards/margins": -0.000475381501019001, "rewards/rejected": -0.09984350204467773, "step": 278 }, { "epoch": 0.17019978648772305, "grad_norm": 2.093724012374878, "learning_rate": 7.756031843233313e-06, "log_odds_chosen": 0.276312917470932, "log_odds_ratio": -0.6774806380271912, "logits/chosen": -1.143459677696228, "logits/rejected": -1.1223394870758057, "logps/chosen": -0.9105595350265503, "logps/rejected": -1.1058679819107056, "loss": 1.4212, "nll_loss": 1.6354833841323853, "rewards/accuracies": 0.625, "rewards/chosen": -0.0910559594631195, "rewards/margins": 0.01953083649277687, "rewards/rejected": -0.11058680713176727, "step": 279 }, { "epoch": 0.17080982156473998, "grad_norm": 3.933887481689453, "learning_rate": 7.75505205143907e-06, "log_odds_chosen": -0.07062369585037231, "log_odds_ratio": -0.7657700777053833, "logits/chosen": -1.2187585830688477, "logits/rejected": -1.1895525455474854, "logps/chosen": -1.2380472421646118, "logps/rejected": -1.1819652318954468, "loss": 1.2749, "nll_loss": 1.5276275873184204, "rewards/accuracies": 0.25, "rewards/chosen": -0.1238047257065773, "rewards/margins": -0.005608202889561653, "rewards/rejected": -0.1181965246796608, "step": 280 }, { "epoch": 0.1714198566417569, "grad_norm": 2.2064061164855957, "learning_rate": 7.754072259644825e-06, "log_odds_chosen": -0.13413316011428833, "log_odds_ratio": -0.8021166324615479, "logits/chosen": -1.080421805381775, "logits/rejected": -1.057112693786621, "logps/chosen": -1.2172508239746094, "logps/rejected": -1.1262553930282593, "loss": 1.3277, "nll_loss": 1.428092360496521, "rewards/accuracies": 0.375, "rewards/chosen": -0.12172508239746094, "rewards/margins": -0.009099540300667286, "rewards/rejected": -0.11262553930282593, "step": 281 }, { "epoch": 0.17202989171877384, "grad_norm": 2.175548791885376, "learning_rate": 7.753092467850581e-06, "log_odds_chosen": 0.9662906527519226, "log_odds_ratio": -0.33602088689804077, "logits/chosen": -1.057160496711731, "logits/rejected": -1.1102476119995117, "logps/chosen": -0.8031333684921265, "logps/rejected": -1.409901738166809, "loss": 1.4068, "nll_loss": 1.2043205499649048, "rewards/accuracies": 1.0, "rewards/chosen": -0.08031333982944489, "rewards/margins": 0.060676850378513336, "rewards/rejected": -0.14099018275737762, "step": 282 }, { "epoch": 0.17263992679579077, "grad_norm": 7.2703471183776855, "learning_rate": 7.752112676056338e-06, "log_odds_chosen": 0.23913106322288513, "log_odds_ratio": -0.6690711975097656, "logits/chosen": -1.0826901197433472, "logits/rejected": -0.9747985601425171, "logps/chosen": -0.9307564496994019, "logps/rejected": -1.1430473327636719, "loss": 1.3458, "nll_loss": 1.4597210884094238, "rewards/accuracies": 0.625, "rewards/chosen": -0.09307564795017242, "rewards/margins": 0.0212290920317173, "rewards/rejected": -0.11430474370718002, "step": 283 }, { "epoch": 0.1732499618728077, "grad_norm": 2.6711506843566895, "learning_rate": 7.751132884262094e-06, "log_odds_chosen": 0.294938862323761, "log_odds_ratio": -0.6216363310813904, "logits/chosen": -1.1247613430023193, "logits/rejected": -1.184733510017395, "logps/chosen": -1.180924415588379, "logps/rejected": -1.4176748991012573, "loss": 1.4113, "nll_loss": 1.420167088508606, "rewards/accuracies": 0.75, "rewards/chosen": -0.11809244751930237, "rewards/margins": 0.023675046861171722, "rewards/rejected": -0.1417675018310547, "step": 284 }, { "epoch": 0.17385999694982462, "grad_norm": 2.1170332431793213, "learning_rate": 7.75015309246785e-06, "log_odds_chosen": -0.23953291773796082, "log_odds_ratio": -0.9644861817359924, "logits/chosen": -0.9849869608879089, "logits/rejected": -0.9199349880218506, "logps/chosen": -1.2244188785552979, "logps/rejected": -0.975501298904419, "loss": 1.3406, "nll_loss": 1.3577018976211548, "rewards/accuracies": 0.375, "rewards/chosen": -0.12244189530611038, "rewards/margins": -0.024891763925552368, "rewards/rejected": -0.09755012392997742, "step": 285 }, { "epoch": 0.17447003202684155, "grad_norm": 3.601837158203125, "learning_rate": 7.749173300673606e-06, "log_odds_chosen": -0.09241116046905518, "log_odds_ratio": -0.7989177703857422, "logits/chosen": -0.9832499027252197, "logits/rejected": -0.8992222547531128, "logps/chosen": -1.2662124633789062, "logps/rejected": -1.2239630222320557, "loss": 1.2489, "nll_loss": 1.4021210670471191, "rewards/accuracies": 0.5, "rewards/chosen": -0.12662124633789062, "rewards/margins": -0.004224944859743118, "rewards/rejected": -0.1223963052034378, "step": 286 }, { "epoch": 0.17508006710385848, "grad_norm": 2.6395938396453857, "learning_rate": 7.748193508879362e-06, "log_odds_chosen": 0.24161846935749054, "log_odds_ratio": -0.6485185623168945, "logits/chosen": -0.9269812107086182, "logits/rejected": -0.9497101306915283, "logps/chosen": -1.027330994606018, "logps/rejected": -1.2092102766036987, "loss": 1.3357, "nll_loss": 1.4139937162399292, "rewards/accuracies": 0.75, "rewards/chosen": -0.10273310542106628, "rewards/margins": 0.018187925219535828, "rewards/rejected": -0.12092103064060211, "step": 287 }, { "epoch": 0.1756901021808754, "grad_norm": 2.244955539703369, "learning_rate": 7.74721371708512e-06, "log_odds_chosen": -0.016814887523651123, "log_odds_ratio": -0.841411828994751, "logits/chosen": -1.0783225297927856, "logits/rejected": -0.9062286019325256, "logps/chosen": -1.1350816488265991, "logps/rejected": -1.1201751232147217, "loss": 1.4023, "nll_loss": 1.5693308115005493, "rewards/accuracies": 0.5, "rewards/chosen": -0.1135081797838211, "rewards/margins": -0.001490660011768341, "rewards/rejected": -0.11201751232147217, "step": 288 }, { "epoch": 0.17630013725789234, "grad_norm": 5.356431484222412, "learning_rate": 7.746233925290876e-06, "log_odds_chosen": 0.227142333984375, "log_odds_ratio": -0.6414239406585693, "logits/chosen": -0.7198106050491333, "logits/rejected": -0.7849417924880981, "logps/chosen": -0.9418185353279114, "logps/rejected": -1.0317919254302979, "loss": 1.071, "nll_loss": 0.9421826601028442, "rewards/accuracies": 0.625, "rewards/chosen": -0.0941818505525589, "rewards/margins": 0.008997339755296707, "rewards/rejected": -0.10317918658256531, "step": 289 }, { "epoch": 0.17691017233490924, "grad_norm": 3.895488739013672, "learning_rate": 7.745254133496632e-06, "log_odds_chosen": 0.21601901948451996, "log_odds_ratio": -0.6538717746734619, "logits/chosen": -0.7360506057739258, "logits/rejected": -0.7088386416435242, "logps/chosen": -0.7175682187080383, "logps/rejected": -0.8465307950973511, "loss": 1.2522, "nll_loss": 0.8707075119018555, "rewards/accuracies": 0.5, "rewards/chosen": -0.07175682485103607, "rewards/margins": 0.012896263040602207, "rewards/rejected": -0.0846530869603157, "step": 290 }, { "epoch": 0.17752020741192617, "grad_norm": 7.853343963623047, "learning_rate": 7.744274341702388e-06, "log_odds_chosen": -0.01529543474316597, "log_odds_ratio": -0.7159122228622437, "logits/chosen": -0.784968376159668, "logits/rejected": -0.7730675935745239, "logps/chosen": -0.9318339228630066, "logps/rejected": -0.9100733399391174, "loss": 1.332, "nll_loss": 1.0916602611541748, "rewards/accuracies": 0.5, "rewards/chosen": -0.09318339079618454, "rewards/margins": -0.002176058478653431, "rewards/rejected": -0.09100733697414398, "step": 291 }, { "epoch": 0.1781302424889431, "grad_norm": 4.013877868652344, "learning_rate": 7.743294549908144e-06, "log_odds_chosen": 0.4446292519569397, "log_odds_ratio": -0.5640853643417358, "logits/chosen": -0.6322895884513855, "logits/rejected": -0.6284134984016418, "logps/chosen": -0.8766701221466064, "logps/rejected": -1.1268906593322754, "loss": 1.3221, "nll_loss": 0.9942420721054077, "rewards/accuracies": 0.625, "rewards/chosen": -0.08766701817512512, "rewards/margins": 0.025022050365805626, "rewards/rejected": -0.1126890629529953, "step": 292 }, { "epoch": 0.17874027756596003, "grad_norm": 4.250795841217041, "learning_rate": 7.7423147581139e-06, "log_odds_chosen": 0.4484137296676636, "log_odds_ratio": -0.511076033115387, "logits/chosen": -0.935102105140686, "logits/rejected": -0.620356559753418, "logps/chosen": -1.251561164855957, "logps/rejected": -1.5750675201416016, "loss": 1.3644, "nll_loss": 1.3784351348876953, "rewards/accuracies": 0.875, "rewards/chosen": -0.12515611946582794, "rewards/margins": 0.03235063701868057, "rewards/rejected": -0.15750674903392792, "step": 293 }, { "epoch": 0.17935031264297696, "grad_norm": 2.648932695388794, "learning_rate": 7.741334966319657e-06, "log_odds_chosen": -0.05522368848323822, "log_odds_ratio": -0.7658013105392456, "logits/chosen": -1.1430091857910156, "logits/rejected": -0.9215238690376282, "logps/chosen": -1.1001920700073242, "logps/rejected": -1.047018051147461, "loss": 1.3035, "nll_loss": 1.359739065170288, "rewards/accuracies": 0.625, "rewards/chosen": -0.11001920700073242, "rewards/margins": -0.005317406263202429, "rewards/rejected": -0.10470180213451385, "step": 294 }, { "epoch": 0.1799603477199939, "grad_norm": 4.994222164154053, "learning_rate": 7.740355174525413e-06, "log_odds_chosen": 0.1317242980003357, "log_odds_ratio": -0.7069749236106873, "logits/chosen": -0.8104142546653748, "logits/rejected": -0.782747745513916, "logps/chosen": -1.0063295364379883, "logps/rejected": -1.0359114408493042, "loss": 1.2295, "nll_loss": 1.1164414882659912, "rewards/accuracies": 0.5, "rewards/chosen": -0.10063295811414719, "rewards/margins": 0.002958192490041256, "rewards/rejected": -0.10359114408493042, "step": 295 }, { "epoch": 0.18057038279701082, "grad_norm": 2.281296491622925, "learning_rate": 7.739375382731169e-06, "log_odds_chosen": -0.032369568943977356, "log_odds_ratio": -0.8232396245002747, "logits/chosen": -1.1237794160842896, "logits/rejected": -0.8957667350769043, "logps/chosen": -1.267546534538269, "logps/rejected": -1.2855384349822998, "loss": 1.4048, "nll_loss": 1.4199100732803345, "rewards/accuracies": 0.5, "rewards/chosen": -0.12675465643405914, "rewards/margins": 0.0017991941422224045, "rewards/rejected": -0.1285538375377655, "step": 296 }, { "epoch": 0.18118041787402775, "grad_norm": 2.4803121089935303, "learning_rate": 7.738395590936925e-06, "log_odds_chosen": 0.40323638916015625, "log_odds_ratio": -0.5269834995269775, "logits/chosen": -0.905529797077179, "logits/rejected": -0.9133264422416687, "logps/chosen": -0.8918092250823975, "logps/rejected": -1.146121859550476, "loss": 1.3102, "nll_loss": 1.1464500427246094, "rewards/accuracies": 0.875, "rewards/chosen": -0.08918093144893646, "rewards/margins": 0.025431260466575623, "rewards/rejected": -0.11461218446493149, "step": 297 }, { "epoch": 0.18179045295104468, "grad_norm": 3.686594247817993, "learning_rate": 7.737415799142681e-06, "log_odds_chosen": 0.38413214683532715, "log_odds_ratio": -0.561002254486084, "logits/chosen": -0.8367445468902588, "logits/rejected": -0.86583012342453, "logps/chosen": -0.8999280333518982, "logps/rejected": -1.1106722354888916, "loss": 1.2353, "nll_loss": 1.2027435302734375, "rewards/accuracies": 0.625, "rewards/chosen": -0.08999280631542206, "rewards/margins": 0.021074431017041206, "rewards/rejected": -0.11106723546981812, "step": 298 }, { "epoch": 0.1824004880280616, "grad_norm": 2.5143933296203613, "learning_rate": 7.736436007348439e-06, "log_odds_chosen": -0.06626398861408234, "log_odds_ratio": -0.8488805294036865, "logits/chosen": -1.110994815826416, "logits/rejected": -1.0412938594818115, "logps/chosen": -1.1037312746047974, "logps/rejected": -1.0099188089370728, "loss": 1.2097, "nll_loss": 1.2414442300796509, "rewards/accuracies": 0.5, "rewards/chosen": -0.1103731319308281, "rewards/margins": -0.009381242096424103, "rewards/rejected": -0.10099188983440399, "step": 299 }, { "epoch": 0.18301052310507854, "grad_norm": 2.8050317764282227, "learning_rate": 7.735456215554195e-06, "log_odds_chosen": 0.7521706223487854, "log_odds_ratio": -0.4440773129463196, "logits/chosen": -0.9667935371398926, "logits/rejected": -1.1465904712677002, "logps/chosen": -0.9618072509765625, "logps/rejected": -1.4693963527679443, "loss": 1.4945, "nll_loss": 1.4974682331085205, "rewards/accuracies": 0.875, "rewards/chosen": -0.09618072956800461, "rewards/margins": 0.05075891688466072, "rewards/rejected": -0.14693965017795563, "step": 300 }, { "epoch": 0.18362055818209547, "grad_norm": 2.9855690002441406, "learning_rate": 7.73447642375995e-06, "log_odds_chosen": 0.2986622452735901, "log_odds_ratio": -0.594144344329834, "logits/chosen": -1.0691496133804321, "logits/rejected": -1.1475781202316284, "logps/chosen": -0.7851791381835938, "logps/rejected": -0.9492397308349609, "loss": 1.1533, "nll_loss": 0.9912052750587463, "rewards/accuracies": 0.5, "rewards/chosen": -0.07851791381835938, "rewards/margins": 0.01640605926513672, "rewards/rejected": -0.09492398053407669, "step": 301 }, { "epoch": 0.1842305932591124, "grad_norm": 2.793195962905884, "learning_rate": 7.733496631965707e-06, "log_odds_chosen": 0.5118474960327148, "log_odds_ratio": -0.49588507413864136, "logits/chosen": -0.7595100402832031, "logits/rejected": -0.7314574122428894, "logps/chosen": -0.8442903757095337, "logps/rejected": -1.1108269691467285, "loss": 1.0316, "nll_loss": 0.8907158374786377, "rewards/accuracies": 0.875, "rewards/chosen": -0.08442904055118561, "rewards/margins": 0.02665366232395172, "rewards/rejected": -0.11108270287513733, "step": 302 }, { "epoch": 0.18484062833612933, "grad_norm": 4.118710994720459, "learning_rate": 7.732516840171463e-06, "log_odds_chosen": 0.28881406784057617, "log_odds_ratio": -0.6586036682128906, "logits/chosen": -0.9514085054397583, "logits/rejected": -0.8469619154930115, "logps/chosen": -0.6825212240219116, "logps/rejected": -0.8632802963256836, "loss": 1.334, "nll_loss": 1.0135022401809692, "rewards/accuracies": 0.5, "rewards/chosen": -0.06825212389230728, "rewards/margins": 0.01807590387761593, "rewards/rejected": -0.08632802963256836, "step": 303 }, { "epoch": 0.18545066341314626, "grad_norm": 2.262524127960205, "learning_rate": 7.73153704837722e-06, "log_odds_chosen": 0.17570289969444275, "log_odds_ratio": -0.7361024618148804, "logits/chosen": -0.9449988603591919, "logits/rejected": -0.8406028747558594, "logps/chosen": -0.7909358143806458, "logps/rejected": -0.8308484554290771, "loss": 1.3848, "nll_loss": 1.1960585117340088, "rewards/accuracies": 0.5, "rewards/chosen": -0.0790935829281807, "rewards/margins": 0.003991257399320602, "rewards/rejected": -0.0830848440527916, "step": 304 }, { "epoch": 0.18606069849016318, "grad_norm": 2.0106706619262695, "learning_rate": 7.730557256582976e-06, "log_odds_chosen": -0.44401615858078003, "log_odds_ratio": -1.0684298276901245, "logits/chosen": -1.1229379177093506, "logits/rejected": -0.9743452072143555, "logps/chosen": -1.0383780002593994, "logps/rejected": -0.7759272456169128, "loss": 1.3384, "nll_loss": 1.3860430717468262, "rewards/accuracies": 0.375, "rewards/chosen": -0.10383780300617218, "rewards/margins": -0.026245087385177612, "rewards/rejected": -0.07759271562099457, "step": 305 }, { "epoch": 0.1866707335671801, "grad_norm": 1.8853657245635986, "learning_rate": 7.729577464788732e-06, "log_odds_chosen": 0.48087215423583984, "log_odds_ratio": -0.4960637092590332, "logits/chosen": -1.032563328742981, "logits/rejected": -0.9954237341880798, "logps/chosen": -1.00168776512146, "logps/rejected": -1.3393781185150146, "loss": 1.2598, "nll_loss": 1.1691303253173828, "rewards/accuracies": 0.875, "rewards/chosen": -0.10016878694295883, "rewards/margins": 0.033769041299819946, "rewards/rejected": -0.13393783569335938, "step": 306 }, { "epoch": 0.18728076864419704, "grad_norm": 8.384262084960938, "learning_rate": 7.728597672994488e-06, "log_odds_chosen": 0.01682978868484497, "log_odds_ratio": -0.7220586538314819, "logits/chosen": -1.2274163961410522, "logits/rejected": -1.0661702156066895, "logps/chosen": -1.110696792602539, "logps/rejected": -1.1165878772735596, "loss": 1.2869, "nll_loss": 1.3256372213363647, "rewards/accuracies": 0.375, "rewards/chosen": -0.11106967180967331, "rewards/margins": 0.0005891202017664909, "rewards/rejected": -0.11165879666805267, "step": 307 }, { "epoch": 0.18789080372121397, "grad_norm": 2.9048643112182617, "learning_rate": 7.727617881200244e-06, "log_odds_chosen": 0.5080022811889648, "log_odds_ratio": -0.5138745903968811, "logits/chosen": -0.8063675165176392, "logits/rejected": -0.8137689828872681, "logps/chosen": -0.8690680265426636, "logps/rejected": -1.1280150413513184, "loss": 1.3064, "nll_loss": 1.2464828491210938, "rewards/accuracies": 0.75, "rewards/chosen": -0.0869068056344986, "rewards/margins": 0.02589471824467182, "rewards/rejected": -0.11280151456594467, "step": 308 }, { "epoch": 0.1885008387982309, "grad_norm": 2.8472373485565186, "learning_rate": 7.726638089406e-06, "log_odds_chosen": 0.25125014781951904, "log_odds_ratio": -0.6681662797927856, "logits/chosen": -0.9686963558197021, "logits/rejected": -0.788242518901825, "logps/chosen": -1.0046374797821045, "logps/rejected": -1.1323280334472656, "loss": 1.3078, "nll_loss": 1.210028886795044, "rewards/accuracies": 0.625, "rewards/chosen": -0.10046375542879105, "rewards/margins": 0.012769036926329136, "rewards/rejected": -0.11323279142379761, "step": 309 }, { "epoch": 0.18911087387524783, "grad_norm": 3.13407564163208, "learning_rate": 7.725658297611758e-06, "log_odds_chosen": 0.22552713751792908, "log_odds_ratio": -0.645173966884613, "logits/chosen": -0.8100011348724365, "logits/rejected": -0.9562106728553772, "logps/chosen": -0.9851580858230591, "logps/rejected": -1.0924633741378784, "loss": 1.1596, "nll_loss": 1.2091455459594727, "rewards/accuracies": 0.5, "rewards/chosen": -0.0985158160328865, "rewards/margins": 0.010730520822107792, "rewards/rejected": -0.10924633592367172, "step": 310 }, { "epoch": 0.18972090895226476, "grad_norm": 2.987307071685791, "learning_rate": 7.724678505817514e-06, "log_odds_chosen": 0.16284286975860596, "log_odds_ratio": -0.6744011640548706, "logits/chosen": -0.9986030459403992, "logits/rejected": -0.9339301586151123, "logps/chosen": -0.9818140268325806, "logps/rejected": -1.0857512950897217, "loss": 1.1648, "nll_loss": 1.2097783088684082, "rewards/accuracies": 0.75, "rewards/chosen": -0.09818140417337418, "rewards/margins": 0.010393722914159298, "rewards/rejected": -0.10857513546943665, "step": 311 }, { "epoch": 0.1903309440292817, "grad_norm": 3.875702142715454, "learning_rate": 7.72369871402327e-06, "log_odds_chosen": 0.13620978593826294, "log_odds_ratio": -0.697262704372406, "logits/chosen": -1.064399242401123, "logits/rejected": -1.0131561756134033, "logps/chosen": -1.1067087650299072, "logps/rejected": -1.141991138458252, "loss": 1.2242, "nll_loss": 1.2206729650497437, "rewards/accuracies": 0.5, "rewards/chosen": -0.11067087948322296, "rewards/margins": 0.003528241068124771, "rewards/rejected": -0.11419913172721863, "step": 312 }, { "epoch": 0.19094097910629862, "grad_norm": 5.73640775680542, "learning_rate": 7.722718922229026e-06, "log_odds_chosen": 0.05341716855764389, "log_odds_ratio": -0.7102628350257874, "logits/chosen": -1.134360671043396, "logits/rejected": -1.0381759405136108, "logps/chosen": -1.2080128192901611, "logps/rejected": -1.2229210138320923, "loss": 1.3002, "nll_loss": 1.415078043937683, "rewards/accuracies": 0.5, "rewards/chosen": -0.12080128490924835, "rewards/margins": 0.0014908239245414734, "rewards/rejected": -0.12229210138320923, "step": 313 }, { "epoch": 0.19155101418331555, "grad_norm": 3.606830358505249, "learning_rate": 7.721739130434782e-06, "log_odds_chosen": -0.059316862374544144, "log_odds_ratio": -0.7720973491668701, "logits/chosen": -1.1503164768218994, "logits/rejected": -1.1269567012786865, "logps/chosen": -1.0330851078033447, "logps/rejected": -1.035191535949707, "loss": 1.3683, "nll_loss": 1.277159333229065, "rewards/accuracies": 0.25, "rewards/chosen": -0.10330851376056671, "rewards/margins": 0.00021062884479761124, "rewards/rejected": -0.10351914912462234, "step": 314 }, { "epoch": 0.19216104926033248, "grad_norm": 8.085604667663574, "learning_rate": 7.720759338640539e-06, "log_odds_chosen": -0.1843494027853012, "log_odds_ratio": -0.8266544342041016, "logits/chosen": -1.1893253326416016, "logits/rejected": -1.0743526220321655, "logps/chosen": -1.2550276517868042, "logps/rejected": -1.1300042867660522, "loss": 1.4112, "nll_loss": 1.667516827583313, "rewards/accuracies": 0.25, "rewards/chosen": -0.12550276517868042, "rewards/margins": -0.0125023378059268, "rewards/rejected": -0.11300042271614075, "step": 315 }, { "epoch": 0.1927710843373494, "grad_norm": 2.859881639480591, "learning_rate": 7.719779546846295e-06, "log_odds_chosen": 0.34298744797706604, "log_odds_ratio": -0.559521496295929, "logits/chosen": -0.8885729312896729, "logits/rejected": -1.0741097927093506, "logps/chosen": -1.0126453638076782, "logps/rejected": -1.196508765220642, "loss": 1.1398, "nll_loss": 1.2242844104766846, "rewards/accuracies": 0.75, "rewards/chosen": -0.10126452893018723, "rewards/margins": 0.01838633418083191, "rewards/rejected": -0.11965087056159973, "step": 316 }, { "epoch": 0.19338111941436634, "grad_norm": 3.233945608139038, "learning_rate": 7.71879975505205e-06, "log_odds_chosen": 0.01658722758293152, "log_odds_ratio": -0.7333998680114746, "logits/chosen": -1.0643342733383179, "logits/rejected": -1.135719895362854, "logps/chosen": -1.092795968055725, "logps/rejected": -1.104888916015625, "loss": 1.2449, "nll_loss": 1.261642575263977, "rewards/accuracies": 0.5, "rewards/chosen": -0.10927959531545639, "rewards/margins": 0.001209290698170662, "rewards/rejected": -0.1104888916015625, "step": 317 }, { "epoch": 0.19399115449138327, "grad_norm": 4.541747570037842, "learning_rate": 7.717819963257807e-06, "log_odds_chosen": 0.6219976544380188, "log_odds_ratio": -0.5187156796455383, "logits/chosen": -1.011154294013977, "logits/rejected": -1.0796372890472412, "logps/chosen": -0.8907214403152466, "logps/rejected": -1.358431339263916, "loss": 1.2823, "nll_loss": 1.2197833061218262, "rewards/accuracies": 0.625, "rewards/chosen": -0.08907213807106018, "rewards/margins": 0.04677099734544754, "rewards/rejected": -0.13584314286708832, "step": 318 }, { "epoch": 0.1946011895684002, "grad_norm": 6.2533793449401855, "learning_rate": 7.716840171463563e-06, "log_odds_chosen": -0.027278035879135132, "log_odds_ratio": -0.7173595428466797, "logits/chosen": -1.3498681783676147, "logits/rejected": -1.2045636177062988, "logps/chosen": -0.9826159477233887, "logps/rejected": -0.9588472843170166, "loss": 1.3823, "nll_loss": 1.2136415243148804, "rewards/accuracies": 0.625, "rewards/chosen": -0.09826159477233887, "rewards/margins": -0.0023768674582242966, "rewards/rejected": -0.09588472545146942, "step": 319 }, { "epoch": 0.19521122464541713, "grad_norm": 3.275477647781372, "learning_rate": 7.715860379669319e-06, "log_odds_chosen": 0.3103967607021332, "log_odds_ratio": -0.5908018350601196, "logits/chosen": -1.2910306453704834, "logits/rejected": -1.1682703495025635, "logps/chosen": -1.0493711233139038, "logps/rejected": -1.2309784889221191, "loss": 1.3772, "nll_loss": 1.4282708168029785, "rewards/accuracies": 0.75, "rewards/chosen": -0.1049371138215065, "rewards/margins": 0.018160730600357056, "rewards/rejected": -0.12309785187244415, "step": 320 }, { "epoch": 0.19582125972243403, "grad_norm": 3.775313377380371, "learning_rate": 7.714880587875077e-06, "log_odds_chosen": -0.06468287855386734, "log_odds_ratio": -0.8064930438995361, "logits/chosen": -1.288217306137085, "logits/rejected": -1.2281572818756104, "logps/chosen": -1.258301019668579, "logps/rejected": -1.114062786102295, "loss": 1.362, "nll_loss": 1.3547160625457764, "rewards/accuracies": 0.625, "rewards/chosen": -0.12583009898662567, "rewards/margins": -0.01442381739616394, "rewards/rejected": -0.11140629649162292, "step": 321 }, { "epoch": 0.19643129479945096, "grad_norm": 2.3206918239593506, "learning_rate": 7.713900796080833e-06, "log_odds_chosen": 0.07618288695812225, "log_odds_ratio": -0.7607645988464355, "logits/chosen": -1.1760873794555664, "logits/rejected": -1.1052918434143066, "logps/chosen": -1.250434160232544, "logps/rejected": -1.3197665214538574, "loss": 1.4573, "nll_loss": 1.4245975017547607, "rewards/accuracies": 0.5, "rewards/chosen": -0.12504342198371887, "rewards/margins": 0.006933240219950676, "rewards/rejected": -0.1319766640663147, "step": 322 }, { "epoch": 0.19704132987646789, "grad_norm": 3.776109457015991, "learning_rate": 7.71292100428659e-06, "log_odds_chosen": 0.6088186502456665, "log_odds_ratio": -0.5401639938354492, "logits/chosen": -1.0223031044006348, "logits/rejected": -1.1310203075408936, "logps/chosen": -0.9445706605911255, "logps/rejected": -1.3044439554214478, "loss": 1.1284, "nll_loss": 1.0509321689605713, "rewards/accuracies": 0.625, "rewards/chosen": -0.09445707499980927, "rewards/margins": 0.03598732501268387, "rewards/rejected": -0.13044439256191254, "step": 323 }, { "epoch": 0.19765136495348481, "grad_norm": 1.8426474332809448, "learning_rate": 7.711941212492345e-06, "log_odds_chosen": 0.4990745186805725, "log_odds_ratio": -0.5580401420593262, "logits/chosen": -0.9440643787384033, "logits/rejected": -0.9190758466720581, "logps/chosen": -0.740145206451416, "logps/rejected": -1.0315736532211304, "loss": 1.1004, "nll_loss": 0.9543514251708984, "rewards/accuracies": 0.75, "rewards/chosen": -0.07401452958583832, "rewards/margins": 0.029142841696739197, "rewards/rejected": -0.10315736383199692, "step": 324 }, { "epoch": 0.19826140003050174, "grad_norm": 3.191783905029297, "learning_rate": 7.710961420698101e-06, "log_odds_chosen": 0.8141816258430481, "log_odds_ratio": -0.3979406952857971, "logits/chosen": -0.9377915859222412, "logits/rejected": -0.8040404319763184, "logps/chosen": -0.7654857635498047, "logps/rejected": -1.2628750801086426, "loss": 1.0385, "nll_loss": 1.1191966533660889, "rewards/accuracies": 1.0, "rewards/chosen": -0.07654857635498047, "rewards/margins": 0.04973893612623215, "rewards/rejected": -0.12628750503063202, "step": 325 }, { "epoch": 0.19887143510751867, "grad_norm": 2.2426598072052, "learning_rate": 7.709981628903858e-06, "log_odds_chosen": 0.2821231782436371, "log_odds_ratio": -0.5970578193664551, "logits/chosen": -1.115535020828247, "logits/rejected": -1.1224324703216553, "logps/chosen": -1.0971059799194336, "logps/rejected": -1.2697019577026367, "loss": 1.429, "nll_loss": 1.5434858798980713, "rewards/accuracies": 0.75, "rewards/chosen": -0.10971059650182724, "rewards/margins": 0.017259597778320312, "rewards/rejected": -0.12697020173072815, "step": 326 }, { "epoch": 0.1994814701845356, "grad_norm": 2.2521331310272217, "learning_rate": 7.709001837109614e-06, "log_odds_chosen": 0.09834244847297668, "log_odds_ratio": -0.6877093315124512, "logits/chosen": -0.9923684000968933, "logits/rejected": -0.7369375228881836, "logps/chosen": -0.7792398929595947, "logps/rejected": -0.8094380497932434, "loss": 1.2986, "nll_loss": 0.9198393821716309, "rewards/accuracies": 0.5, "rewards/chosen": -0.07792399078607559, "rewards/margins": 0.003019813448190689, "rewards/rejected": -0.08094380795955658, "step": 327 }, { "epoch": 0.20009150526155253, "grad_norm": 4.914551734924316, "learning_rate": 7.70802204531537e-06, "log_odds_chosen": 0.2980944812297821, "log_odds_ratio": -0.6077023148536682, "logits/chosen": -0.881406843662262, "logits/rejected": -0.7408886551856995, "logps/chosen": -0.9256936311721802, "logps/rejected": -1.125067114830017, "loss": 1.1505, "nll_loss": 1.2533780336380005, "rewards/accuracies": 0.875, "rewards/chosen": -0.09256935864686966, "rewards/margins": 0.01993735134601593, "rewards/rejected": -0.11250671744346619, "step": 328 }, { "epoch": 0.20070154033856946, "grad_norm": 2.823599338531494, "learning_rate": 7.707042253521128e-06, "log_odds_chosen": 0.3885735869407654, "log_odds_ratio": -0.5972371697425842, "logits/chosen": -1.0811641216278076, "logits/rejected": -0.9930849075317383, "logps/chosen": -0.9715642333030701, "logps/rejected": -1.2455167770385742, "loss": 1.159, "nll_loss": 1.1063227653503418, "rewards/accuracies": 0.625, "rewards/chosen": -0.09715642035007477, "rewards/margins": 0.027395252138376236, "rewards/rejected": -0.1245516687631607, "step": 329 }, { "epoch": 0.2013115754155864, "grad_norm": 4.1762375831604, "learning_rate": 7.706062461726882e-06, "log_odds_chosen": 0.5260932445526123, "log_odds_ratio": -0.5215073823928833, "logits/chosen": -0.5864076018333435, "logits/rejected": -0.8500750064849854, "logps/chosen": -0.9013909697532654, "logps/rejected": -1.152165174484253, "loss": 1.1661, "nll_loss": 1.0163570642471313, "rewards/accuracies": 0.75, "rewards/chosen": -0.09013909846544266, "rewards/margins": 0.025077426806092262, "rewards/rejected": -0.11521652340888977, "step": 330 }, { "epoch": 0.20192161049260332, "grad_norm": 4.477012634277344, "learning_rate": 7.705082669932638e-06, "log_odds_chosen": 0.06846677511930466, "log_odds_ratio": -0.6661103963851929, "logits/chosen": -1.1905461549758911, "logits/rejected": -1.2642240524291992, "logps/chosen": -1.2210084199905396, "logps/rejected": -1.2721554040908813, "loss": 1.3917, "nll_loss": 1.477041482925415, "rewards/accuracies": 0.625, "rewards/chosen": -0.1221008449792862, "rewards/margins": 0.005114690866321325, "rewards/rejected": -0.12721553444862366, "step": 331 }, { "epoch": 0.20253164556962025, "grad_norm": 3.1887645721435547, "learning_rate": 7.704102878138396e-06, "log_odds_chosen": 0.9425274133682251, "log_odds_ratio": -0.36111247539520264, "logits/chosen": -1.042020320892334, "logits/rejected": -0.9811635613441467, "logps/chosen": -0.554301917552948, "logps/rejected": -1.0739785432815552, "loss": 1.3894, "nll_loss": 1.431702971458435, "rewards/accuracies": 1.0, "rewards/chosen": -0.05543019622564316, "rewards/margins": 0.05196766555309296, "rewards/rejected": -0.10739786177873611, "step": 332 }, { "epoch": 0.20314168064663718, "grad_norm": 2.4265494346618652, "learning_rate": 7.703123086344152e-06, "log_odds_chosen": 0.6579123735427856, "log_odds_ratio": -0.5416015386581421, "logits/chosen": -1.0001007318496704, "logits/rejected": -0.8859859704971313, "logps/chosen": -0.7899651527404785, "logps/rejected": -1.1057122945785522, "loss": 1.2648, "nll_loss": 1.0840195417404175, "rewards/accuracies": 0.625, "rewards/chosen": -0.07899650931358337, "rewards/margins": 0.031574707478284836, "rewards/rejected": -0.1105712279677391, "step": 333 }, { "epoch": 0.2037517157236541, "grad_norm": 2.081448793411255, "learning_rate": 7.702143294549908e-06, "log_odds_chosen": 0.17614394426345825, "log_odds_ratio": -0.6644254922866821, "logits/chosen": -1.2760858535766602, "logits/rejected": -1.2689322233200073, "logps/chosen": -1.0837733745574951, "logps/rejected": -1.1819467544555664, "loss": 1.1961, "nll_loss": 1.4911904335021973, "rewards/accuracies": 0.5, "rewards/chosen": -0.10837733745574951, "rewards/margins": 0.009817340411245823, "rewards/rejected": -0.11819468438625336, "step": 334 }, { "epoch": 0.20436175080067104, "grad_norm": 3.489511013031006, "learning_rate": 7.701163502755664e-06, "log_odds_chosen": 0.18378423154354095, "log_odds_ratio": -0.7021164894104004, "logits/chosen": -0.9300558567047119, "logits/rejected": -0.8368260264396667, "logps/chosen": -1.098362684249878, "logps/rejected": -1.212577223777771, "loss": 1.4894, "nll_loss": 1.3339312076568604, "rewards/accuracies": 0.5, "rewards/chosen": -0.10983627289533615, "rewards/margins": 0.0114214438945055, "rewards/rejected": -0.1212577223777771, "step": 335 }, { "epoch": 0.20497178587768797, "grad_norm": 2.370591163635254, "learning_rate": 7.70018371096142e-06, "log_odds_chosen": 0.15192356705665588, "log_odds_ratio": -0.6686760187149048, "logits/chosen": -1.0958151817321777, "logits/rejected": -0.9468060731887817, "logps/chosen": -0.8581972122192383, "logps/rejected": -0.9504690766334534, "loss": 1.3435, "nll_loss": 1.0208635330200195, "rewards/accuracies": 0.625, "rewards/chosen": -0.08581972122192383, "rewards/margins": 0.009227183647453785, "rewards/rejected": -0.09504690766334534, "step": 336 }, { "epoch": 0.2055818209547049, "grad_norm": 3.818246364593506, "learning_rate": 7.699203919167177e-06, "log_odds_chosen": 0.16186639666557312, "log_odds_ratio": -0.748820960521698, "logits/chosen": -0.7101850509643555, "logits/rejected": -0.6425874829292297, "logps/chosen": -0.8822816014289856, "logps/rejected": -0.9211194515228271, "loss": 1.3375, "nll_loss": 1.1912102699279785, "rewards/accuracies": 0.5, "rewards/chosen": -0.08822816610336304, "rewards/margins": 0.0038837771862745285, "rewards/rejected": -0.09211194515228271, "step": 337 }, { "epoch": 0.20619185603172183, "grad_norm": 2.115675687789917, "learning_rate": 7.698224127372933e-06, "log_odds_chosen": 0.684975266456604, "log_odds_ratio": -0.469081312417984, "logits/chosen": -0.7781701683998108, "logits/rejected": -0.6546223163604736, "logps/chosen": -0.7075275182723999, "logps/rejected": -1.1224664449691772, "loss": 1.1668, "nll_loss": 0.9727733135223389, "rewards/accuracies": 0.875, "rewards/chosen": -0.07075275480747223, "rewards/margins": 0.04149390012025833, "rewards/rejected": -0.11224665492773056, "step": 338 }, { "epoch": 0.20680189110873876, "grad_norm": 4.655502796173096, "learning_rate": 7.697244335578689e-06, "log_odds_chosen": 0.0047137439250946045, "log_odds_ratio": -0.7754782438278198, "logits/chosen": -0.9859530925750732, "logits/rejected": -0.8095519542694092, "logps/chosen": -1.0108585357666016, "logps/rejected": -1.042220950126648, "loss": 1.2352, "nll_loss": 1.057834267616272, "rewards/accuracies": 0.625, "rewards/chosen": -0.1010858565568924, "rewards/margins": 0.0031362399458885193, "rewards/rejected": -0.10422209650278091, "step": 339 }, { "epoch": 0.20741192618575569, "grad_norm": 4.330885887145996, "learning_rate": 7.696264543784445e-06, "log_odds_chosen": 0.3717801868915558, "log_odds_ratio": -0.5557864904403687, "logits/chosen": -0.6024645566940308, "logits/rejected": -0.7110728621482849, "logps/chosen": -0.7541581988334656, "logps/rejected": -0.9557997584342957, "loss": 1.245, "nll_loss": 0.9996853470802307, "rewards/accuracies": 0.75, "rewards/chosen": -0.07541581988334656, "rewards/margins": 0.020164161920547485, "rewards/rejected": -0.09557998180389404, "step": 340 }, { "epoch": 0.20802196126277261, "grad_norm": 2.1283316612243652, "learning_rate": 7.695284751990201e-06, "log_odds_chosen": 0.609635591506958, "log_odds_ratio": -0.48048925399780273, "logits/chosen": -0.7788310647010803, "logits/rejected": -0.6397024989128113, "logps/chosen": -0.6976474523544312, "logps/rejected": -1.080712914466858, "loss": 1.2232, "nll_loss": 0.9753514528274536, "rewards/accuracies": 0.625, "rewards/chosen": -0.06976474821567535, "rewards/margins": 0.03830654174089432, "rewards/rejected": -0.10807129740715027, "step": 341 }, { "epoch": 0.20863199633978954, "grad_norm": 1.0989567041397095, "learning_rate": 7.694304960195957e-06, "log_odds_chosen": 0.414692759513855, "log_odds_ratio": -0.5337086915969849, "logits/chosen": -1.0472216606140137, "logits/rejected": -0.9964942932128906, "logps/chosen": -0.8699463605880737, "logps/rejected": -1.0847595930099487, "loss": 1.1951, "nll_loss": 1.2302274703979492, "rewards/accuracies": 0.875, "rewards/chosen": -0.08699463307857513, "rewards/margins": 0.02148132026195526, "rewards/rejected": -0.1084759533405304, "step": 342 }, { "epoch": 0.20924203141680647, "grad_norm": 1.2947988510131836, "learning_rate": 7.693325168401715e-06, "log_odds_chosen": 1.5163288116455078, "log_odds_ratio": -0.4672834277153015, "logits/chosen": -0.683569073677063, "logits/rejected": -0.8054211139678955, "logps/chosen": -0.8055288791656494, "logps/rejected": -2.0614116191864014, "loss": 1.1827, "nll_loss": 1.0872254371643066, "rewards/accuracies": 0.75, "rewards/chosen": -0.08055289089679718, "rewards/margins": 0.12558825314044952, "rewards/rejected": -0.2061411589384079, "step": 343 }, { "epoch": 0.2098520664938234, "grad_norm": 2.751955986022949, "learning_rate": 7.692345376607471e-06, "log_odds_chosen": 0.2586328685283661, "log_odds_ratio": -0.7050191760063171, "logits/chosen": -0.9035967588424683, "logits/rejected": -0.7396329641342163, "logps/chosen": -0.9488207101821899, "logps/rejected": -1.0489577054977417, "loss": 1.1926, "nll_loss": 1.045706033706665, "rewards/accuracies": 0.625, "rewards/chosen": -0.094882071018219, "rewards/margins": 0.01001368835568428, "rewards/rejected": -0.10489576309919357, "step": 344 }, { "epoch": 0.21046210157084033, "grad_norm": 1.9554073810577393, "learning_rate": 7.691365584813227e-06, "log_odds_chosen": 0.37457215785980225, "log_odds_ratio": -0.60442054271698, "logits/chosen": -0.8852366209030151, "logits/rejected": -0.7184220552444458, "logps/chosen": -0.7898033857345581, "logps/rejected": -1.0313621759414673, "loss": 1.4116, "nll_loss": 1.4433506727218628, "rewards/accuracies": 0.625, "rewards/chosen": -0.07898034155368805, "rewards/margins": 0.0241558700799942, "rewards/rejected": -0.10313621163368225, "step": 345 }, { "epoch": 0.21107213664785726, "grad_norm": 2.2897517681121826, "learning_rate": 7.690385793018983e-06, "log_odds_chosen": 0.06459371745586395, "log_odds_ratio": -0.7161203622817993, "logits/chosen": -0.6347295641899109, "logits/rejected": -0.7681485414505005, "logps/chosen": -1.021675705909729, "logps/rejected": -1.0254775285720825, "loss": 1.2483, "nll_loss": 1.156057357788086, "rewards/accuracies": 0.5, "rewards/chosen": -0.10216757655143738, "rewards/margins": 0.00038019055500626564, "rewards/rejected": -0.10254775732755661, "step": 346 }, { "epoch": 0.2116821717248742, "grad_norm": 1.6881184577941895, "learning_rate": 7.68940600122474e-06, "log_odds_chosen": 0.25115424394607544, "log_odds_ratio": -0.6369740962982178, "logits/chosen": -1.0216476917266846, "logits/rejected": -0.8961979150772095, "logps/chosen": -0.807543158531189, "logps/rejected": -0.9086236357688904, "loss": 1.3395, "nll_loss": 1.086463212966919, "rewards/accuracies": 0.375, "rewards/chosen": -0.08075432479381561, "rewards/margins": 0.010108046233654022, "rewards/rejected": -0.09086236357688904, "step": 347 }, { "epoch": 0.21229220680189112, "grad_norm": 1.4045557975769043, "learning_rate": 7.688426209430496e-06, "log_odds_chosen": 0.9377651810646057, "log_odds_ratio": -0.37768882513046265, "logits/chosen": -1.0056570768356323, "logits/rejected": -0.6681042909622192, "logps/chosen": -0.7780200242996216, "logps/rejected": -1.3416035175323486, "loss": 1.1501, "nll_loss": 1.073061227798462, "rewards/accuracies": 1.0, "rewards/chosen": -0.07780200988054276, "rewards/margins": 0.05635833740234375, "rewards/rejected": -0.1341603398323059, "step": 348 }, { "epoch": 0.21290224187890805, "grad_norm": 2.0235087871551514, "learning_rate": 7.687446417636252e-06, "log_odds_chosen": 0.6752246618270874, "log_odds_ratio": -0.4592107832431793, "logits/chosen": -1.035213828086853, "logits/rejected": -0.7774363160133362, "logps/chosen": -0.811065673828125, "logps/rejected": -1.2300524711608887, "loss": 1.2222, "nll_loss": 1.0589064359664917, "rewards/accuracies": 0.75, "rewards/chosen": -0.08110656589269638, "rewards/margins": 0.041898682713508606, "rewards/rejected": -0.12300524115562439, "step": 349 }, { "epoch": 0.21351227695592498, "grad_norm": 3.7518813610076904, "learning_rate": 7.686466625842008e-06, "log_odds_chosen": 0.3774681091308594, "log_odds_ratio": -0.5618996024131775, "logits/chosen": -0.5175235271453857, "logits/rejected": -0.502126157283783, "logps/chosen": -1.0833206176757812, "logps/rejected": -1.2786448001861572, "loss": 1.146, "nll_loss": 1.0679161548614502, "rewards/accuracies": 0.75, "rewards/chosen": -0.10833205282688141, "rewards/margins": 0.019532423466444016, "rewards/rejected": -0.12786448001861572, "step": 350 }, { "epoch": 0.21412231203294188, "grad_norm": 3.5044407844543457, "learning_rate": 7.685486834047764e-06, "log_odds_chosen": 0.8115400671958923, "log_odds_ratio": -0.4561329483985901, "logits/chosen": -0.5580329895019531, "logits/rejected": -0.6042249798774719, "logps/chosen": -0.7991158962249756, "logps/rejected": -1.3095135688781738, "loss": 1.1139, "nll_loss": 0.8936938047409058, "rewards/accuracies": 0.625, "rewards/chosen": -0.07991158962249756, "rewards/margins": 0.051039762794971466, "rewards/rejected": -0.13095135986804962, "step": 351 }, { "epoch": 0.2147323471099588, "grad_norm": 1.360635757446289, "learning_rate": 7.684507042253522e-06, "log_odds_chosen": 0.5750905871391296, "log_odds_ratio": -0.53971928358078, "logits/chosen": -0.769715428352356, "logits/rejected": -0.674405038356781, "logps/chosen": -0.767066478729248, "logps/rejected": -1.138861894607544, "loss": 1.0457, "nll_loss": 1.0480910539627075, "rewards/accuracies": 0.75, "rewards/chosen": -0.0767066478729248, "rewards/margins": 0.03717953711748123, "rewards/rejected": -0.11388619244098663, "step": 352 }, { "epoch": 0.21534238218697574, "grad_norm": 9.256567001342773, "learning_rate": 7.683527250459276e-06, "log_odds_chosen": 0.1345200389623642, "log_odds_ratio": -0.7054451704025269, "logits/chosen": -0.9387032389640808, "logits/rejected": -0.8530603647232056, "logps/chosen": -0.9992543458938599, "logps/rejected": -1.049220085144043, "loss": 1.2722, "nll_loss": 1.213941216468811, "rewards/accuracies": 0.375, "rewards/chosen": -0.0999254360795021, "rewards/margins": 0.0049965777434408665, "rewards/rejected": -0.10492201149463654, "step": 353 }, { "epoch": 0.21595241726399267, "grad_norm": 2.5779333114624023, "learning_rate": 7.682547458665032e-06, "log_odds_chosen": 0.3162413537502289, "log_odds_ratio": -0.6051570177078247, "logits/chosen": -0.8120638728141785, "logits/rejected": -0.7010293006896973, "logps/chosen": -0.8699072599411011, "logps/rejected": -1.0749189853668213, "loss": 1.1515, "nll_loss": 1.0250540971755981, "rewards/accuracies": 0.625, "rewards/chosen": -0.08699072897434235, "rewards/margins": 0.02050117775797844, "rewards/rejected": -0.10749191045761108, "step": 354 }, { "epoch": 0.2165624523410096, "grad_norm": 1.530547022819519, "learning_rate": 7.68156766687079e-06, "log_odds_chosen": 0.4800661504268646, "log_odds_ratio": -0.57938551902771, "logits/chosen": -0.8059306144714355, "logits/rejected": -0.6785740256309509, "logps/chosen": -0.705722451210022, "logps/rejected": -0.9932616949081421, "loss": 1.2761, "nll_loss": 0.943590521812439, "rewards/accuracies": 0.75, "rewards/chosen": -0.07057224959135056, "rewards/margins": 0.028753923252224922, "rewards/rejected": -0.09932617843151093, "step": 355 }, { "epoch": 0.21717248741802653, "grad_norm": 3.8760557174682617, "learning_rate": 7.680587875076546e-06, "log_odds_chosen": 0.511708676815033, "log_odds_ratio": -0.49126070737838745, "logits/chosen": -0.9261382222175598, "logits/rejected": -0.7755888104438782, "logps/chosen": -0.6921855807304382, "logps/rejected": -0.966245174407959, "loss": 1.1104, "nll_loss": 0.9536199569702148, "rewards/accuracies": 1.0, "rewards/chosen": -0.06921856105327606, "rewards/margins": 0.027405958622694016, "rewards/rejected": -0.09662452340126038, "step": 356 }, { "epoch": 0.21778252249504346, "grad_norm": 2.6949081420898438, "learning_rate": 7.679608083282302e-06, "log_odds_chosen": 0.49905121326446533, "log_odds_ratio": -0.5624749660491943, "logits/chosen": -1.1010627746582031, "logits/rejected": -0.9654725790023804, "logps/chosen": -0.7620342969894409, "logps/rejected": -1.0900075435638428, "loss": 1.2122, "nll_loss": 1.0207295417785645, "rewards/accuracies": 0.75, "rewards/chosen": -0.07620342075824738, "rewards/margins": 0.032797329127788544, "rewards/rejected": -0.10900075733661652, "step": 357 }, { "epoch": 0.2183925575720604, "grad_norm": 3.582930564880371, "learning_rate": 7.678628291488059e-06, "log_odds_chosen": 0.1951250433921814, "log_odds_ratio": -0.6569231748580933, "logits/chosen": -0.7671236395835876, "logits/rejected": -0.5941492319107056, "logps/chosen": -0.6996884346008301, "logps/rejected": -0.758348822593689, "loss": 1.0738, "nll_loss": 0.8379878401756287, "rewards/accuracies": 0.625, "rewards/chosen": -0.06996883451938629, "rewards/margins": 0.0058660381473600864, "rewards/rejected": -0.07583488523960114, "step": 358 }, { "epoch": 0.21900259264907732, "grad_norm": 7.382508754730225, "learning_rate": 7.677648499693815e-06, "log_odds_chosen": 0.3978649079799652, "log_odds_ratio": -0.5960951447486877, "logits/chosen": -1.040662407875061, "logits/rejected": -0.969982385635376, "logps/chosen": -0.9231418371200562, "logps/rejected": -1.1576781272888184, "loss": 1.3787, "nll_loss": 1.5441601276397705, "rewards/accuracies": 0.625, "rewards/chosen": -0.09231419116258621, "rewards/margins": 0.023453619331121445, "rewards/rejected": -0.11576780676841736, "step": 359 }, { "epoch": 0.21961262772609424, "grad_norm": 2.0941457748413086, "learning_rate": 7.67666870789957e-06, "log_odds_chosen": 0.5091622471809387, "log_odds_ratio": -0.5701141357421875, "logits/chosen": -0.9336798787117004, "logits/rejected": -0.9101845026016235, "logps/chosen": -0.9116315841674805, "logps/rejected": -1.1646184921264648, "loss": 1.2924, "nll_loss": 1.2126762866973877, "rewards/accuracies": 0.5, "rewards/chosen": -0.09116315841674805, "rewards/margins": 0.02529868483543396, "rewards/rejected": -0.116461843252182, "step": 360 }, { "epoch": 0.22022266280311117, "grad_norm": 3.609924793243408, "learning_rate": 7.675688916105327e-06, "log_odds_chosen": 0.5750096440315247, "log_odds_ratio": -0.5070134401321411, "logits/chosen": -0.6947118043899536, "logits/rejected": -0.7082769274711609, "logps/chosen": -0.9500147104263306, "logps/rejected": -1.3034045696258545, "loss": 1.3841, "nll_loss": 1.0839658975601196, "rewards/accuracies": 0.875, "rewards/chosen": -0.0950014740228653, "rewards/margins": 0.03533899039030075, "rewards/rejected": -0.13034045696258545, "step": 361 }, { "epoch": 0.2208326978801281, "grad_norm": 1.9993345737457275, "learning_rate": 7.674709124311083e-06, "log_odds_chosen": 0.23192401230335236, "log_odds_ratio": -0.6312641501426697, "logits/chosen": -0.9187256097793579, "logits/rejected": -0.7359157204627991, "logps/chosen": -0.9673340320587158, "logps/rejected": -1.0675110816955566, "loss": 1.2945, "nll_loss": 1.2078336477279663, "rewards/accuracies": 0.5, "rewards/chosen": -0.09673340618610382, "rewards/margins": 0.010017700493335724, "rewards/rejected": -0.10675111413002014, "step": 362 }, { "epoch": 0.22144273295714503, "grad_norm": 1.6550602912902832, "learning_rate": 7.673729332516841e-06, "log_odds_chosen": 0.4898552894592285, "log_odds_ratio": -0.5591527223587036, "logits/chosen": -1.0384482145309448, "logits/rejected": -0.8845198750495911, "logps/chosen": -0.8528231382369995, "logps/rejected": -1.1802865266799927, "loss": 1.2157, "nll_loss": 0.9566031694412231, "rewards/accuracies": 0.5, "rewards/chosen": -0.0852823257446289, "rewards/margins": 0.0327463336288929, "rewards/rejected": -0.1180286556482315, "step": 363 }, { "epoch": 0.22205276803416196, "grad_norm": 1.885421633720398, "learning_rate": 7.672749540722595e-06, "log_odds_chosen": 0.07845655083656311, "log_odds_ratio": -0.7133932113647461, "logits/chosen": -0.853196918964386, "logits/rejected": -0.7138934135437012, "logps/chosen": -1.093692421913147, "logps/rejected": -1.1472511291503906, "loss": 1.2321, "nll_loss": 1.1302359104156494, "rewards/accuracies": 0.625, "rewards/chosen": -0.10936924815177917, "rewards/margins": 0.005355866625905037, "rewards/rejected": -0.11472511291503906, "step": 364 }, { "epoch": 0.2226628031111789, "grad_norm": 9.423985481262207, "learning_rate": 7.671769748928351e-06, "log_odds_chosen": 0.6475621461868286, "log_odds_ratio": -0.4954969882965088, "logits/chosen": -0.6030987501144409, "logits/rejected": -0.6287246346473694, "logps/chosen": -0.836823046207428, "logps/rejected": -1.190110206604004, "loss": 1.3913, "nll_loss": 0.949792742729187, "rewards/accuracies": 0.625, "rewards/chosen": -0.08368229866027832, "rewards/margins": 0.035328712314367294, "rewards/rejected": -0.11901101469993591, "step": 365 }, { "epoch": 0.22327283818819582, "grad_norm": 2.7942943572998047, "learning_rate": 7.67078995713411e-06, "log_odds_chosen": 0.7854357957839966, "log_odds_ratio": -0.4328029751777649, "logits/chosen": -0.8136799335479736, "logits/rejected": -0.6622218489646912, "logps/chosen": -0.9029017686843872, "logps/rejected": -1.379035234451294, "loss": 1.257, "nll_loss": 1.3574714660644531, "rewards/accuracies": 0.75, "rewards/chosen": -0.09029017388820648, "rewards/margins": 0.047613341361284256, "rewards/rejected": -0.13790352642536163, "step": 366 }, { "epoch": 0.22388287326521275, "grad_norm": 2.0313968658447266, "learning_rate": 7.669810165339865e-06, "log_odds_chosen": 0.4219183921813965, "log_odds_ratio": -0.5274103283882141, "logits/chosen": -0.8279187083244324, "logits/rejected": -0.8530483245849609, "logps/chosen": -0.8387526273727417, "logps/rejected": -1.0898404121398926, "loss": 1.2441, "nll_loss": 1.5157307386398315, "rewards/accuracies": 0.875, "rewards/chosen": -0.08387526124715805, "rewards/margins": 0.025108788162469864, "rewards/rejected": -0.10898405313491821, "step": 367 }, { "epoch": 0.22449290834222968, "grad_norm": 1.681007742881775, "learning_rate": 7.668830373545621e-06, "log_odds_chosen": 0.5861552953720093, "log_odds_ratio": -0.5296221971511841, "logits/chosen": -0.9360634684562683, "logits/rejected": -0.7347110509872437, "logps/chosen": -1.1770634651184082, "logps/rejected": -1.5384974479675293, "loss": 1.4726, "nll_loss": 1.5966740846633911, "rewards/accuracies": 0.75, "rewards/chosen": -0.11770635843276978, "rewards/margins": 0.03614340350031853, "rewards/rejected": -0.1538497507572174, "step": 368 }, { "epoch": 0.2251029434192466, "grad_norm": 2.146224021911621, "learning_rate": 7.667850581751378e-06, "log_odds_chosen": 0.5297906398773193, "log_odds_ratio": -0.5379897356033325, "logits/chosen": -0.6869924068450928, "logits/rejected": -0.7533481121063232, "logps/chosen": -0.7730381488800049, "logps/rejected": -1.0265650749206543, "loss": 1.1703, "nll_loss": 0.9733712077140808, "rewards/accuracies": 0.5, "rewards/chosen": -0.07730381190776825, "rewards/margins": 0.025352684780955315, "rewards/rejected": -0.10265650600194931, "step": 369 }, { "epoch": 0.22571297849626354, "grad_norm": 3.5754261016845703, "learning_rate": 7.666870789957134e-06, "log_odds_chosen": 0.4815850853919983, "log_odds_ratio": -0.5392473340034485, "logits/chosen": -0.9450314044952393, "logits/rejected": -1.0440990924835205, "logps/chosen": -1.000084638595581, "logps/rejected": -1.3615431785583496, "loss": 1.2017, "nll_loss": 1.0876083374023438, "rewards/accuracies": 0.625, "rewards/chosen": -0.10000846534967422, "rewards/margins": 0.036145858466625214, "rewards/rejected": -0.13615432381629944, "step": 370 }, { "epoch": 0.22632301357328047, "grad_norm": 6.732400417327881, "learning_rate": 7.66589099816289e-06, "log_odds_chosen": 0.4106729328632355, "log_odds_ratio": -0.5305382609367371, "logits/chosen": -0.5767232775688171, "logits/rejected": -0.4212294816970825, "logps/chosen": -0.641070544719696, "logps/rejected": -0.8588678240776062, "loss": 1.0997, "nll_loss": 0.8319954872131348, "rewards/accuracies": 0.875, "rewards/chosen": -0.06410705298185349, "rewards/margins": 0.021779727190732956, "rewards/rejected": -0.08588677644729614, "step": 371 }, { "epoch": 0.2269330486502974, "grad_norm": 4.626410007476807, "learning_rate": 7.664911206368646e-06, "log_odds_chosen": 0.7468929290771484, "log_odds_ratio": -0.4721856713294983, "logits/chosen": -0.8605151176452637, "logits/rejected": -1.024324655532837, "logps/chosen": -0.8875660300254822, "logps/rejected": -1.298283338546753, "loss": 1.3645, "nll_loss": 1.3360430002212524, "rewards/accuracies": 0.875, "rewards/chosen": -0.08875660598278046, "rewards/margins": 0.04107173532247543, "rewards/rejected": -0.1298283338546753, "step": 372 }, { "epoch": 0.22754308372731433, "grad_norm": 5.226443290710449, "learning_rate": 7.663931414574402e-06, "log_odds_chosen": 0.34074142575263977, "log_odds_ratio": -0.5977645516395569, "logits/chosen": -0.5917799472808838, "logits/rejected": -0.5896779298782349, "logps/chosen": -0.9135371446609497, "logps/rejected": -1.106755256652832, "loss": 1.2071, "nll_loss": 1.0152450799942017, "rewards/accuracies": 0.625, "rewards/chosen": -0.09135370701551437, "rewards/margins": 0.01932181976735592, "rewards/rejected": -0.11067553609609604, "step": 373 }, { "epoch": 0.22815311880433126, "grad_norm": 1.7374022006988525, "learning_rate": 7.66295162278016e-06, "log_odds_chosen": 0.590164065361023, "log_odds_ratio": -0.5169371366500854, "logits/chosen": -0.8366150856018066, "logits/rejected": -0.7852163314819336, "logps/chosen": -0.889283299446106, "logps/rejected": -1.218456506729126, "loss": 1.1941, "nll_loss": 1.1264610290527344, "rewards/accuracies": 0.875, "rewards/chosen": -0.08892832696437836, "rewards/margins": 0.032917320728302, "rewards/rejected": -0.12184565514326096, "step": 374 }, { "epoch": 0.22876315388134819, "grad_norm": 2.056137800216675, "learning_rate": 7.661971830985914e-06, "log_odds_chosen": 0.13867980241775513, "log_odds_ratio": -0.6915042400360107, "logits/chosen": -1.053931713104248, "logits/rejected": -0.91335129737854, "logps/chosen": -0.9200055003166199, "logps/rejected": -1.0348936319351196, "loss": 1.2928, "nll_loss": 1.174667239189148, "rewards/accuracies": 0.5, "rewards/chosen": -0.0920005515217781, "rewards/margins": 0.011488819494843483, "rewards/rejected": -0.10348936915397644, "step": 375 }, { "epoch": 0.22937318895836512, "grad_norm": 3.2155356407165527, "learning_rate": 7.66099203919167e-06, "log_odds_chosen": 0.18083472549915314, "log_odds_ratio": -0.7137242555618286, "logits/chosen": -0.8576586842536926, "logits/rejected": -0.7889128923416138, "logps/chosen": -1.114741563796997, "logps/rejected": -1.1863783597946167, "loss": 1.263, "nll_loss": 1.2241939306259155, "rewards/accuracies": 0.5, "rewards/chosen": -0.11147415637969971, "rewards/margins": 0.007163679227232933, "rewards/rejected": -0.11863783746957779, "step": 376 }, { "epoch": 0.22998322403538204, "grad_norm": 1.6219336986541748, "learning_rate": 7.660012247397428e-06, "log_odds_chosen": 0.5724231600761414, "log_odds_ratio": -0.47472453117370605, "logits/chosen": -0.7889035940170288, "logits/rejected": -0.78835129737854, "logps/chosen": -0.6454639434814453, "logps/rejected": -0.9303909540176392, "loss": 1.4292, "nll_loss": 1.0593019723892212, "rewards/accuracies": 0.875, "rewards/chosen": -0.06454639881849289, "rewards/margins": 0.028492694720625877, "rewards/rejected": -0.09303909540176392, "step": 377 }, { "epoch": 0.23059325911239897, "grad_norm": 1.9332958459854126, "learning_rate": 7.659032455603184e-06, "log_odds_chosen": 0.13362827897071838, "log_odds_ratio": -0.7505306005477905, "logits/chosen": -0.6583895087242126, "logits/rejected": -0.7998080849647522, "logps/chosen": -1.103441834449768, "logps/rejected": -1.1374304294586182, "loss": 1.3304, "nll_loss": 1.1857428550720215, "rewards/accuracies": 0.375, "rewards/chosen": -0.11034418642520905, "rewards/margins": 0.0033988552168011665, "rewards/rejected": -0.11374305188655853, "step": 378 }, { "epoch": 0.2312032941894159, "grad_norm": 2.102267026901245, "learning_rate": 7.65805266380894e-06, "log_odds_chosen": 0.6762510538101196, "log_odds_ratio": -0.4805952310562134, "logits/chosen": -0.9872885942459106, "logits/rejected": -0.8610513210296631, "logps/chosen": -0.9177731871604919, "logps/rejected": -1.3315205574035645, "loss": 1.3797, "nll_loss": 1.562455654144287, "rewards/accuracies": 0.75, "rewards/chosen": -0.09177732467651367, "rewards/margins": 0.04137474298477173, "rewards/rejected": -0.1331520676612854, "step": 379 }, { "epoch": 0.23181332926643283, "grad_norm": 2.2210187911987305, "learning_rate": 7.657072872014697e-06, "log_odds_chosen": 0.6289001703262329, "log_odds_ratio": -0.5178492665290833, "logits/chosen": -0.9273969531059265, "logits/rejected": -0.9013679623603821, "logps/chosen": -0.9050487279891968, "logps/rejected": -1.2481791973114014, "loss": 1.2414, "nll_loss": 1.1033849716186523, "rewards/accuracies": 0.75, "rewards/chosen": -0.09050486981868744, "rewards/margins": 0.034313052892684937, "rewards/rejected": -0.12481792271137238, "step": 380 }, { "epoch": 0.23242336434344976, "grad_norm": 1.571066975593567, "learning_rate": 7.656093080220453e-06, "log_odds_chosen": 0.5756900310516357, "log_odds_ratio": -0.4840509593486786, "logits/chosen": -0.5468813180923462, "logits/rejected": -0.48495930433273315, "logps/chosen": -0.7404745817184448, "logps/rejected": -0.9915452599525452, "loss": 1.014, "nll_loss": 0.7059803605079651, "rewards/accuracies": 0.875, "rewards/chosen": -0.07404746115207672, "rewards/margins": 0.025107067078351974, "rewards/rejected": -0.0991545245051384, "step": 381 }, { "epoch": 0.23303339942046666, "grad_norm": 2.580272912979126, "learning_rate": 7.655113288426209e-06, "log_odds_chosen": 0.5710957050323486, "log_odds_ratio": -0.49668002128601074, "logits/chosen": -0.677046000957489, "logits/rejected": -0.6306741833686829, "logps/chosen": -0.834404468536377, "logps/rejected": -1.1562187671661377, "loss": 1.2144, "nll_loss": 1.0568255186080933, "rewards/accuracies": 0.75, "rewards/chosen": -0.08344044536352158, "rewards/margins": 0.03218142315745354, "rewards/rejected": -0.11562187969684601, "step": 382 }, { "epoch": 0.2336434344974836, "grad_norm": 1.9524569511413574, "learning_rate": 7.654133496631965e-06, "log_odds_chosen": 1.1354026794433594, "log_odds_ratio": -0.4094851613044739, "logits/chosen": -0.6889318823814392, "logits/rejected": -0.7296173572540283, "logps/chosen": -0.787225604057312, "logps/rejected": -1.5580990314483643, "loss": 1.0966, "nll_loss": 0.9668623805046082, "rewards/accuracies": 0.875, "rewards/chosen": -0.07872256636619568, "rewards/margins": 0.07708734273910522, "rewards/rejected": -0.1558099091053009, "step": 383 }, { "epoch": 0.23425346957450052, "grad_norm": 2.4991111755371094, "learning_rate": 7.653153704837721e-06, "log_odds_chosen": 0.6321199536323547, "log_odds_ratio": -0.5012189745903015, "logits/chosen": -0.8392832279205322, "logits/rejected": -0.7084116339683533, "logps/chosen": -0.7972933650016785, "logps/rejected": -1.1101372241973877, "loss": 1.1426, "nll_loss": 1.108126163482666, "rewards/accuracies": 0.875, "rewards/chosen": -0.0797293409705162, "rewards/margins": 0.0312843844294548, "rewards/rejected": -0.11101372539997101, "step": 384 }, { "epoch": 0.23486350465151745, "grad_norm": 1.7044899463653564, "learning_rate": 7.652173913043479e-06, "log_odds_chosen": 0.4596897065639496, "log_odds_ratio": -0.6157276630401611, "logits/chosen": -0.8594163656234741, "logits/rejected": -0.810276448726654, "logps/chosen": -1.1219959259033203, "logps/rejected": -1.423879623413086, "loss": 1.3169, "nll_loss": 1.2343297004699707, "rewards/accuracies": 0.625, "rewards/chosen": -0.11219958961009979, "rewards/margins": 0.030188364908099174, "rewards/rejected": -0.14238795638084412, "step": 385 }, { "epoch": 0.23547353972853438, "grad_norm": 1.7330607175827026, "learning_rate": 7.651194121249235e-06, "log_odds_chosen": -0.08071627467870712, "log_odds_ratio": -0.8248435258865356, "logits/chosen": -1.1043946743011475, "logits/rejected": -0.9598851799964905, "logps/chosen": -1.0967727899551392, "logps/rejected": -0.9727380275726318, "loss": 1.1555, "nll_loss": 1.4536824226379395, "rewards/accuracies": 0.625, "rewards/chosen": -0.1096772700548172, "rewards/margins": -0.012403471395373344, "rewards/rejected": -0.0972738116979599, "step": 386 }, { "epoch": 0.2360835748055513, "grad_norm": 4.1829962730407715, "learning_rate": 7.65021432945499e-06, "log_odds_chosen": 0.5604966282844543, "log_odds_ratio": -0.5623334646224976, "logits/chosen": -0.792628288269043, "logits/rejected": -0.7510545253753662, "logps/chosen": -0.8318784236907959, "logps/rejected": -1.152573823928833, "loss": 1.3156, "nll_loss": 1.1014859676361084, "rewards/accuracies": 0.625, "rewards/chosen": -0.08318784832954407, "rewards/margins": 0.03206953406333923, "rewards/rejected": -0.1152573823928833, "step": 387 }, { "epoch": 0.23669360988256824, "grad_norm": 1.497891902923584, "learning_rate": 7.649234537660747e-06, "log_odds_chosen": 0.833254873752594, "log_odds_ratio": -0.38682612776756287, "logits/chosen": -0.9879939556121826, "logits/rejected": -0.9668848514556885, "logps/chosen": -0.9871900677680969, "logps/rejected": -1.562648057937622, "loss": 1.2236, "nll_loss": 1.091407060623169, "rewards/accuracies": 0.875, "rewards/chosen": -0.09871900826692581, "rewards/margins": 0.05754581093788147, "rewards/rejected": -0.15626481175422668, "step": 388 }, { "epoch": 0.23730364495958517, "grad_norm": 1.7173848152160645, "learning_rate": 7.648254745866503e-06, "log_odds_chosen": 0.41254952549934387, "log_odds_ratio": -0.5578256845474243, "logits/chosen": -0.9476389288902283, "logits/rejected": -0.9275076389312744, "logps/chosen": -0.9299850463867188, "logps/rejected": -1.1329258680343628, "loss": 1.2511, "nll_loss": 1.0732593536376953, "rewards/accuracies": 0.625, "rewards/chosen": -0.09299850463867188, "rewards/margins": 0.020294085144996643, "rewards/rejected": -0.11329258978366852, "step": 389 }, { "epoch": 0.2379136800366021, "grad_norm": 7.337672233581543, "learning_rate": 7.64727495407226e-06, "log_odds_chosen": -0.1021001785993576, "log_odds_ratio": -0.7659924030303955, "logits/chosen": -1.109567403793335, "logits/rejected": -1.0281527042388916, "logps/chosen": -1.1125078201293945, "logps/rejected": -1.0591537952423096, "loss": 1.4024, "nll_loss": 1.5065799951553345, "rewards/accuracies": 0.375, "rewards/chosen": -0.11125078797340393, "rewards/margins": -0.005335403606295586, "rewards/rejected": -0.1059153825044632, "step": 390 }, { "epoch": 0.23852371511361903, "grad_norm": 3.66060209274292, "learning_rate": 7.646295162278016e-06, "log_odds_chosen": 0.29176902770996094, "log_odds_ratio": -0.6295928955078125, "logits/chosen": -1.0324301719665527, "logits/rejected": -0.9264101982116699, "logps/chosen": -0.9888505935668945, "logps/rejected": -1.1798462867736816, "loss": 1.2219, "nll_loss": 1.1352874040603638, "rewards/accuracies": 0.5, "rewards/chosen": -0.09888507425785065, "rewards/margins": 0.01909957081079483, "rewards/rejected": -0.11798463761806488, "step": 391 }, { "epoch": 0.23913375019063596, "grad_norm": 1.728987216949463, "learning_rate": 7.645315370483772e-06, "log_odds_chosen": 0.14898552000522614, "log_odds_ratio": -0.7242246866226196, "logits/chosen": -0.7490870356559753, "logits/rejected": -0.6732121109962463, "logps/chosen": -1.0831358432769775, "logps/rejected": -1.093076229095459, "loss": 1.1445, "nll_loss": 1.1038013696670532, "rewards/accuracies": 0.5, "rewards/chosen": -0.10831359773874283, "rewards/margins": 0.000994035042822361, "rewards/rejected": -0.10930763185024261, "step": 392 }, { "epoch": 0.2397437852676529, "grad_norm": 1.5785115957260132, "learning_rate": 7.644335578689528e-06, "log_odds_chosen": 0.6841885447502136, "log_odds_ratio": -0.4417039752006531, "logits/chosen": -0.5068305730819702, "logits/rejected": -0.4981076121330261, "logps/chosen": -0.7045274376869202, "logps/rejected": -1.037511944770813, "loss": 1.0807, "nll_loss": 0.8144634962081909, "rewards/accuracies": 0.875, "rewards/chosen": -0.0704527422785759, "rewards/margins": 0.03329845145344734, "rewards/rejected": -0.10375119745731354, "step": 393 }, { "epoch": 0.24035382034466982, "grad_norm": 3.2357325553894043, "learning_rate": 7.643355786895284e-06, "log_odds_chosen": 0.7352588176727295, "log_odds_ratio": -0.503108024597168, "logits/chosen": -0.9213255047798157, "logits/rejected": -0.703982949256897, "logps/chosen": -0.7925673127174377, "logps/rejected": -1.1416579484939575, "loss": 1.1267, "nll_loss": 0.8610604405403137, "rewards/accuracies": 0.875, "rewards/chosen": -0.07925673574209213, "rewards/margins": 0.03490906581282616, "rewards/rejected": -0.11416579782962799, "step": 394 }, { "epoch": 0.24096385542168675, "grad_norm": 2.7596566677093506, "learning_rate": 7.64237599510104e-06, "log_odds_chosen": 0.5123801231384277, "log_odds_ratio": -0.531188428401947, "logits/chosen": -1.0152777433395386, "logits/rejected": -0.5207391977310181, "logps/chosen": -0.9834269285202026, "logps/rejected": -1.3195863962173462, "loss": 1.194, "nll_loss": 1.3179175853729248, "rewards/accuracies": 0.625, "rewards/chosen": -0.09834268689155579, "rewards/margins": 0.033615946769714355, "rewards/rejected": -0.13195863366127014, "step": 395 }, { "epoch": 0.24157389049870368, "grad_norm": 2.1882898807525635, "learning_rate": 7.641396203306796e-06, "log_odds_chosen": 0.4471215009689331, "log_odds_ratio": -0.5322432518005371, "logits/chosen": -1.1080601215362549, "logits/rejected": -0.8302285671234131, "logps/chosen": -0.7288132905960083, "logps/rejected": -0.9990706443786621, "loss": 1.1316, "nll_loss": 1.0641915798187256, "rewards/accuracies": 0.75, "rewards/chosen": -0.07288133352994919, "rewards/margins": 0.02702573873102665, "rewards/rejected": -0.09990706294775009, "step": 396 }, { "epoch": 0.2421839255757206, "grad_norm": 3.12809419631958, "learning_rate": 7.640416411512554e-06, "log_odds_chosen": 0.8112160563468933, "log_odds_ratio": -0.4696948528289795, "logits/chosen": -0.7558934688568115, "logits/rejected": -0.6904950141906738, "logps/chosen": -0.7098037004470825, "logps/rejected": -1.103186845779419, "loss": 1.1353, "nll_loss": 0.9915320873260498, "rewards/accuracies": 0.75, "rewards/chosen": -0.07098037004470825, "rewards/margins": 0.03933831676840782, "rewards/rejected": -0.11031869053840637, "step": 397 }, { "epoch": 0.24279396065273753, "grad_norm": 9.231745719909668, "learning_rate": 7.639436619718309e-06, "log_odds_chosen": 0.04920307546854019, "log_odds_ratio": -0.7321325540542603, "logits/chosen": -0.9105455279350281, "logits/rejected": -0.7582827806472778, "logps/chosen": -1.1230487823486328, "logps/rejected": -1.1289818286895752, "loss": 1.1985, "nll_loss": 1.3370527029037476, "rewards/accuracies": 0.375, "rewards/chosen": -0.11230488866567612, "rewards/margins": 0.0005932999774813652, "rewards/rejected": -0.11289818584918976, "step": 398 }, { "epoch": 0.24340399572975446, "grad_norm": 2.2850170135498047, "learning_rate": 7.638456827924066e-06, "log_odds_chosen": 0.16326524317264557, "log_odds_ratio": -0.6865420341491699, "logits/chosen": -0.8504626750946045, "logits/rejected": -0.8004454374313354, "logps/chosen": -0.9078603386878967, "logps/rejected": -1.0042070150375366, "loss": 1.2462, "nll_loss": 1.171235203742981, "rewards/accuracies": 0.5, "rewards/chosen": -0.09078603982925415, "rewards/margins": 0.009634673595428467, "rewards/rejected": -0.10042071342468262, "step": 399 }, { "epoch": 0.2440140308067714, "grad_norm": 3.4265406131744385, "learning_rate": 7.637477036129822e-06, "log_odds_chosen": 0.4286854863166809, "log_odds_ratio": -0.5746227502822876, "logits/chosen": -0.8486690521240234, "logits/rejected": -0.5682289004325867, "logps/chosen": -0.7990826368331909, "logps/rejected": -1.0267257690429688, "loss": 1.2497, "nll_loss": 1.1877877712249756, "rewards/accuracies": 0.625, "rewards/chosen": -0.07990826666355133, "rewards/margins": 0.02276431769132614, "rewards/rejected": -0.10267258435487747, "step": 400 }, { "epoch": 0.24462406588378832, "grad_norm": 4.119083404541016, "learning_rate": 7.636497244335579e-06, "log_odds_chosen": 0.6922522783279419, "log_odds_ratio": -0.5310028791427612, "logits/chosen": -0.7914220690727234, "logits/rejected": -0.5969818830490112, "logps/chosen": -0.9243532419204712, "logps/rejected": -1.3215115070343018, "loss": 1.3481, "nll_loss": 1.2200415134429932, "rewards/accuracies": 0.75, "rewards/chosen": -0.092435322701931, "rewards/margins": 0.03971582651138306, "rewards/rejected": -0.13215114176273346, "step": 401 }, { "epoch": 0.24523410096080525, "grad_norm": 9.759302139282227, "learning_rate": 7.635517452541335e-06, "log_odds_chosen": 0.24191829562187195, "log_odds_ratio": -0.7014906406402588, "logits/chosen": -0.6697540879249573, "logits/rejected": -0.6024612784385681, "logps/chosen": -0.9628791809082031, "logps/rejected": -1.0504138469696045, "loss": 1.2578, "nll_loss": 1.5731197595596313, "rewards/accuracies": 0.5, "rewards/chosen": -0.09628792107105255, "rewards/margins": 0.008753461763262749, "rewards/rejected": -0.10504138469696045, "step": 402 }, { "epoch": 0.24584413603782218, "grad_norm": 1.4631072282791138, "learning_rate": 7.63453766074709e-06, "log_odds_chosen": 0.1426079422235489, "log_odds_ratio": -0.7430968880653381, "logits/chosen": -0.6875885725021362, "logits/rejected": -0.5808282494544983, "logps/chosen": -0.8417251110076904, "logps/rejected": -0.8717247843742371, "loss": 1.2181, "nll_loss": 0.9511438608169556, "rewards/accuracies": 0.5, "rewards/chosen": -0.08417251706123352, "rewards/margins": 0.0029999613761901855, "rewards/rejected": -0.0871724784374237, "step": 403 }, { "epoch": 0.2464541711148391, "grad_norm": 2.896583318710327, "learning_rate": 7.633557868952847e-06, "log_odds_chosen": 0.8191828727722168, "log_odds_ratio": -0.3971753418445587, "logits/chosen": -0.8970168828964233, "logits/rejected": -0.45591139793395996, "logps/chosen": -0.9599469900131226, "logps/rejected": -1.5188204050064087, "loss": 1.3778, "nll_loss": 1.1948072910308838, "rewards/accuracies": 0.875, "rewards/chosen": -0.09599470347166061, "rewards/margins": 0.05588734894990921, "rewards/rejected": -0.15188205242156982, "step": 404 }, { "epoch": 0.24706420619185604, "grad_norm": 2.980093479156494, "learning_rate": 7.632578077158603e-06, "log_odds_chosen": 0.37661537528038025, "log_odds_ratio": -0.6947274804115295, "logits/chosen": -0.9259769320487976, "logits/rejected": -0.6999658942222595, "logps/chosen": -0.9368196129798889, "logps/rejected": -1.2372866868972778, "loss": 1.3344, "nll_loss": 1.2747613191604614, "rewards/accuracies": 0.5, "rewards/chosen": -0.09368196129798889, "rewards/margins": 0.030046720057725906, "rewards/rejected": -0.1237286776304245, "step": 405 }, { "epoch": 0.24767424126887297, "grad_norm": 3.5024375915527344, "learning_rate": 7.63159828536436e-06, "log_odds_chosen": 0.33458423614501953, "log_odds_ratio": -0.6393198370933533, "logits/chosen": -0.8735682964324951, "logits/rejected": -0.7318587899208069, "logps/chosen": -0.881934404373169, "logps/rejected": -1.0447006225585938, "loss": 1.1468, "nll_loss": 1.0316039323806763, "rewards/accuracies": 0.625, "rewards/chosen": -0.08819344639778137, "rewards/margins": 0.01627662219107151, "rewards/rejected": -0.10447005927562714, "step": 406 }, { "epoch": 0.2482842763458899, "grad_norm": 1.8529053926467896, "learning_rate": 7.630618493570115e-06, "log_odds_chosen": 0.40738511085510254, "log_odds_ratio": -0.6227446794509888, "logits/chosen": -0.7316756248474121, "logits/rejected": -0.6740386486053467, "logps/chosen": -1.0051466226577759, "logps/rejected": -1.3472579717636108, "loss": 1.1958, "nll_loss": 1.0925791263580322, "rewards/accuracies": 0.5, "rewards/chosen": -0.10051466524600983, "rewards/margins": 0.03421115502715111, "rewards/rejected": -0.13472580909729004, "step": 407 }, { "epoch": 0.24889431142290683, "grad_norm": 9.92557144165039, "learning_rate": 7.629638701775873e-06, "log_odds_chosen": 0.4731285274028778, "log_odds_ratio": -0.5604979991912842, "logits/chosen": -0.8839956521987915, "logits/rejected": -0.7447319030761719, "logps/chosen": -0.7882004976272583, "logps/rejected": -1.0791269540786743, "loss": 1.284, "nll_loss": 0.9381757974624634, "rewards/accuracies": 0.75, "rewards/chosen": -0.07882004976272583, "rewards/margins": 0.02909264527261257, "rewards/rejected": -0.10791270434856415, "step": 408 }, { "epoch": 0.24950434649992376, "grad_norm": 3.6099977493286133, "learning_rate": 7.628658909981629e-06, "log_odds_chosen": 0.13446763157844543, "log_odds_ratio": -0.744735836982727, "logits/chosen": -0.869315505027771, "logits/rejected": -0.6480116844177246, "logps/chosen": -0.9939135909080505, "logps/rejected": -1.039717674255371, "loss": 0.9476, "nll_loss": 1.0608065128326416, "rewards/accuracies": 0.625, "rewards/chosen": -0.09939135611057281, "rewards/margins": 0.004580405540764332, "rewards/rejected": -0.10397176444530487, "step": 409 }, { "epoch": 0.25011438157694066, "grad_norm": 2.2784571647644043, "learning_rate": 7.6276791181873845e-06, "log_odds_chosen": -0.04359839856624603, "log_odds_ratio": -0.8001618385314941, "logits/chosen": -0.9547232389450073, "logits/rejected": -0.7964589595794678, "logps/chosen": -0.9964826703071594, "logps/rejected": -0.9496673941612244, "loss": 1.318, "nll_loss": 1.0588363409042358, "rewards/accuracies": 0.5, "rewards/chosen": -0.09964826703071594, "rewards/margins": -0.004681522957980633, "rewards/rejected": -0.09496674686670303, "step": 410 }, { "epoch": 0.2507244166539576, "grad_norm": 3.037890672683716, "learning_rate": 7.626699326393141e-06, "log_odds_chosen": -0.23316079378128052, "log_odds_ratio": -0.8628973364830017, "logits/chosen": -1.0471303462982178, "logits/rejected": -0.9477802515029907, "logps/chosen": -1.182673692703247, "logps/rejected": -1.0400989055633545, "loss": 1.3292, "nll_loss": 1.447003722190857, "rewards/accuracies": 0.25, "rewards/chosen": -0.11826735734939575, "rewards/margins": -0.014257468283176422, "rewards/rejected": -0.10400988906621933, "step": 411 }, { "epoch": 0.2513344517309745, "grad_norm": 2.3208720684051514, "learning_rate": 7.625719534598898e-06, "log_odds_chosen": 0.7428151369094849, "log_odds_ratio": -0.5191869139671326, "logits/chosen": -0.9989885687828064, "logits/rejected": -0.9757405519485474, "logps/chosen": -0.9533647894859314, "logps/rejected": -1.5134212970733643, "loss": 1.1751, "nll_loss": 1.3212707042694092, "rewards/accuracies": 0.75, "rewards/chosen": -0.09533648937940598, "rewards/margins": 0.05600564926862717, "rewards/rejected": -0.15134213864803314, "step": 412 }, { "epoch": 0.2519444868079915, "grad_norm": 2.1139001846313477, "learning_rate": 7.624739742804654e-06, "log_odds_chosen": -0.024593889713287354, "log_odds_ratio": -0.7899117469787598, "logits/chosen": -1.0872231721878052, "logits/rejected": -1.0008865594863892, "logps/chosen": -1.1620383262634277, "logps/rejected": -1.0883114337921143, "loss": 1.2464, "nll_loss": 1.3272947072982788, "rewards/accuracies": 0.375, "rewards/chosen": -0.11620382964611053, "rewards/margins": -0.0073726847767829895, "rewards/rejected": -0.10883114486932755, "step": 413 }, { "epoch": 0.2525545218850084, "grad_norm": 1.8336749076843262, "learning_rate": 7.62375995101041e-06, "log_odds_chosen": 0.08950290083885193, "log_odds_ratio": -0.7207662463188171, "logits/chosen": -0.8794618248939514, "logits/rejected": -0.8442606329917908, "logps/chosen": -0.8187690377235413, "logps/rejected": -0.8862806558609009, "loss": 1.3735, "nll_loss": 1.1809344291687012, "rewards/accuracies": 0.5, "rewards/chosen": -0.08187690377235413, "rewards/margins": 0.006751172244548798, "rewards/rejected": -0.08862808346748352, "step": 414 }, { "epoch": 0.25316455696202533, "grad_norm": 2.9274301528930664, "learning_rate": 7.622780159216166e-06, "log_odds_chosen": 0.5463844537734985, "log_odds_ratio": -0.5160501003265381, "logits/chosen": -0.6753590106964111, "logits/rejected": -0.6537524461746216, "logps/chosen": -1.0853204727172852, "logps/rejected": -1.4257864952087402, "loss": 1.1917, "nll_loss": 1.0841376781463623, "rewards/accuracies": 0.875, "rewards/chosen": -0.10853205621242523, "rewards/margins": 0.03404660522937775, "rewards/rejected": -0.14257866144180298, "step": 415 }, { "epoch": 0.25377459203904223, "grad_norm": 2.0806078910827637, "learning_rate": 7.621800367421922e-06, "log_odds_chosen": 0.2637360095977783, "log_odds_ratio": -0.6394661068916321, "logits/chosen": -0.8519200086593628, "logits/rejected": -0.8859351873397827, "logps/chosen": -0.8413231372833252, "logps/rejected": -0.9689289927482605, "loss": 1.3354, "nll_loss": 1.1570881605148315, "rewards/accuracies": 0.75, "rewards/chosen": -0.08413231372833252, "rewards/margins": 0.012760590761899948, "rewards/rejected": -0.09689290076494217, "step": 416 }, { "epoch": 0.2543846271160592, "grad_norm": 1.921183705329895, "learning_rate": 7.620820575627678e-06, "log_odds_chosen": 0.3488031327724457, "log_odds_ratio": -0.6580404043197632, "logits/chosen": -0.8996626138687134, "logits/rejected": -0.4937717914581299, "logps/chosen": -0.9722930192947388, "logps/rejected": -1.117755651473999, "loss": 1.3754, "nll_loss": 1.1236708164215088, "rewards/accuracies": 0.375, "rewards/chosen": -0.09722929447889328, "rewards/margins": 0.014546268619596958, "rewards/rejected": -0.11177556961774826, "step": 417 }, { "epoch": 0.2549946621930761, "grad_norm": 2.3282296657562256, "learning_rate": 7.619840783833435e-06, "log_odds_chosen": 0.4372059404850006, "log_odds_ratio": -0.575029730796814, "logits/chosen": -0.6636273860931396, "logits/rejected": -0.6454341411590576, "logps/chosen": -0.7526371479034424, "logps/rejected": -0.9789925813674927, "loss": 1.1307, "nll_loss": 0.9365856647491455, "rewards/accuracies": 0.75, "rewards/chosen": -0.07526371628046036, "rewards/margins": 0.02263553813099861, "rewards/rejected": -0.09789925813674927, "step": 418 }, { "epoch": 0.25560469727009305, "grad_norm": 2.1482043266296387, "learning_rate": 7.618860992039191e-06, "log_odds_chosen": 0.6377550363540649, "log_odds_ratio": -0.5552626252174377, "logits/chosen": -1.056857705116272, "logits/rejected": -0.8581469655036926, "logps/chosen": -0.8868253231048584, "logps/rejected": -1.3212969303131104, "loss": 1.2159, "nll_loss": 1.157731294631958, "rewards/accuracies": 0.625, "rewards/chosen": -0.08868253231048584, "rewards/margins": 0.04344715550541878, "rewards/rejected": -0.13212968409061432, "step": 419 }, { "epoch": 0.25621473234710995, "grad_norm": 1.7333190441131592, "learning_rate": 7.617881200244948e-06, "log_odds_chosen": 0.2189331203699112, "log_odds_ratio": -0.620421826839447, "logits/chosen": -0.9786847233772278, "logits/rejected": -0.6356947422027588, "logps/chosen": -1.2235288619995117, "logps/rejected": -1.3961334228515625, "loss": 1.1613, "nll_loss": 1.3129457235336304, "rewards/accuracies": 0.75, "rewards/chosen": -0.12235288321971893, "rewards/margins": 0.017260458320379257, "rewards/rejected": -0.1396133452653885, "step": 420 }, { "epoch": 0.2568247674241269, "grad_norm": 1.7456239461898804, "learning_rate": 7.616901408450704e-06, "log_odds_chosen": 0.2509578764438629, "log_odds_ratio": -0.6662046313285828, "logits/chosen": -0.5784440040588379, "logits/rejected": -0.6283528804779053, "logps/chosen": -0.8518276214599609, "logps/rejected": -0.988066554069519, "loss": 1.2703, "nll_loss": 1.4229512214660645, "rewards/accuracies": 0.75, "rewards/chosen": -0.08518275618553162, "rewards/margins": 0.01362390536814928, "rewards/rejected": -0.09880666434764862, "step": 421 }, { "epoch": 0.2574348025011438, "grad_norm": 4.142087936401367, "learning_rate": 7.61592161665646e-06, "log_odds_chosen": 0.22854956984519958, "log_odds_ratio": -0.6544584035873413, "logits/chosen": -0.8542300462722778, "logits/rejected": -0.7783733606338501, "logps/chosen": -0.951099693775177, "logps/rejected": -1.0788460969924927, "loss": 1.2798, "nll_loss": 1.2557127475738525, "rewards/accuracies": 0.375, "rewards/chosen": -0.0951099768280983, "rewards/margins": 0.012774637900292873, "rewards/rejected": -0.10788461565971375, "step": 422 }, { "epoch": 0.25804483757816077, "grad_norm": 3.7638158798217773, "learning_rate": 7.614941824862217e-06, "log_odds_chosen": -0.2842021882534027, "log_odds_ratio": -0.8866206407546997, "logits/chosen": -1.0696792602539062, "logits/rejected": -0.8790942430496216, "logps/chosen": -1.0913904905319214, "logps/rejected": -0.9670805335044861, "loss": 1.3541, "nll_loss": 1.3662071228027344, "rewards/accuracies": 0.375, "rewards/chosen": -0.10913904756307602, "rewards/margins": -0.012430991977453232, "rewards/rejected": -0.09670805931091309, "step": 423 }, { "epoch": 0.25865487265517767, "grad_norm": 5.249823093414307, "learning_rate": 7.613962033067973e-06, "log_odds_chosen": 0.09553516656160355, "log_odds_ratio": -0.6606074571609497, "logits/chosen": -1.0405634641647339, "logits/rejected": -0.7530900239944458, "logps/chosen": -1.0407214164733887, "logps/rejected": -1.0868397951126099, "loss": 1.1837, "nll_loss": 1.1828429698944092, "rewards/accuracies": 0.5, "rewards/chosen": -0.10407214611768723, "rewards/margins": 0.004611826967447996, "rewards/rejected": -0.10868397355079651, "step": 424 }, { "epoch": 0.2592649077321946, "grad_norm": 2.1729981899261475, "learning_rate": 7.612982241273729e-06, "log_odds_chosen": 0.4741806387901306, "log_odds_ratio": -0.5524778962135315, "logits/chosen": -0.7669922709465027, "logits/rejected": -0.571642279624939, "logps/chosen": -0.8328154683113098, "logps/rejected": -1.146511197090149, "loss": 1.1761, "nll_loss": 1.0546282529830933, "rewards/accuracies": 0.625, "rewards/chosen": -0.08328153938055038, "rewards/margins": 0.031369585543870926, "rewards/rejected": -0.11465112864971161, "step": 425 }, { "epoch": 0.25987494280921153, "grad_norm": 1.8703488111495972, "learning_rate": 7.612002449479486e-06, "log_odds_chosen": 0.52094566822052, "log_odds_ratio": -0.49952125549316406, "logits/chosen": -0.6375554800033569, "logits/rejected": -0.66957688331604, "logps/chosen": -0.9479765295982361, "logps/rejected": -1.2973084449768066, "loss": 1.0938, "nll_loss": 1.0510735511779785, "rewards/accuracies": 0.75, "rewards/chosen": -0.09479765594005585, "rewards/margins": 0.0349331870675087, "rewards/rejected": -0.12973085045814514, "step": 426 }, { "epoch": 0.26048497788622843, "grad_norm": 4.147088050842285, "learning_rate": 7.611022657685241e-06, "log_odds_chosen": 0.5345059633255005, "log_odds_ratio": -0.5299103856086731, "logits/chosen": -0.7918320298194885, "logits/rejected": -0.7128300666809082, "logps/chosen": -0.866987407207489, "logps/rejected": -1.200341820716858, "loss": 1.2652, "nll_loss": 1.0609453916549683, "rewards/accuracies": 0.75, "rewards/chosen": -0.0866987407207489, "rewards/margins": 0.03333544731140137, "rewards/rejected": -0.12003418803215027, "step": 427 }, { "epoch": 0.2610950129632454, "grad_norm": 1.6943926811218262, "learning_rate": 7.610042865890997e-06, "log_odds_chosen": 0.5509203672409058, "log_odds_ratio": -0.5777876377105713, "logits/chosen": -0.9853037595748901, "logits/rejected": -0.8851436376571655, "logps/chosen": -0.9131603240966797, "logps/rejected": -1.17252779006958, "loss": 1.2531, "nll_loss": 1.3141494989395142, "rewards/accuracies": 0.75, "rewards/chosen": -0.09131603688001633, "rewards/margins": 0.025936752557754517, "rewards/rejected": -0.11725278198719025, "step": 428 }, { "epoch": 0.2617050480402623, "grad_norm": 3.118131160736084, "learning_rate": 7.609063074096754e-06, "log_odds_chosen": 0.2943817377090454, "log_odds_ratio": -0.6292442083358765, "logits/chosen": -0.8701958656311035, "logits/rejected": -0.8042765259742737, "logps/chosen": -0.892703115940094, "logps/rejected": -1.0556068420410156, "loss": 1.2801, "nll_loss": 1.4042730331420898, "rewards/accuracies": 0.625, "rewards/chosen": -0.08927030861377716, "rewards/margins": 0.01629037782549858, "rewards/rejected": -0.10556069016456604, "step": 429 }, { "epoch": 0.26231508311727925, "grad_norm": 2.30570125579834, "learning_rate": 7.60808328230251e-06, "log_odds_chosen": 0.5394443273544312, "log_odds_ratio": -0.5413869619369507, "logits/chosen": -0.7156528830528259, "logits/rejected": -0.7896319627761841, "logps/chosen": -0.7398634552955627, "logps/rejected": -1.0289641618728638, "loss": 1.1501, "nll_loss": 1.041774034500122, "rewards/accuracies": 0.625, "rewards/chosen": -0.07398635149002075, "rewards/margins": 0.02891007624566555, "rewards/rejected": -0.10289642214775085, "step": 430 }, { "epoch": 0.26292511819429615, "grad_norm": 3.3099422454833984, "learning_rate": 7.607103490508267e-06, "log_odds_chosen": 0.3867950439453125, "log_odds_ratio": -0.5635284781455994, "logits/chosen": -0.7799526453018188, "logits/rejected": -0.7244229316711426, "logps/chosen": -0.7671600580215454, "logps/rejected": -0.9644435048103333, "loss": 1.1688, "nll_loss": 1.005763292312622, "rewards/accuracies": 0.625, "rewards/chosen": -0.07671600580215454, "rewards/margins": 0.019728347659111023, "rewards/rejected": -0.09644435346126556, "step": 431 }, { "epoch": 0.2635351532713131, "grad_norm": 3.0953621864318848, "learning_rate": 7.606123698714023e-06, "log_odds_chosen": 0.3953518271446228, "log_odds_ratio": -0.579872190952301, "logits/chosen": -0.6052432656288147, "logits/rejected": -0.4928319454193115, "logps/chosen": -0.9012798070907593, "logps/rejected": -1.1369256973266602, "loss": 1.1599, "nll_loss": 1.010514736175537, "rewards/accuracies": 0.75, "rewards/chosen": -0.09012798219919205, "rewards/margins": 0.023564590141177177, "rewards/rejected": -0.11369256675243378, "step": 432 }, { "epoch": 0.26414518834833, "grad_norm": 1.2687746286392212, "learning_rate": 7.605143906919779e-06, "log_odds_chosen": 0.03667262941598892, "log_odds_ratio": -0.7686617970466614, "logits/chosen": -0.7889806032180786, "logits/rejected": -0.7725884914398193, "logps/chosen": -0.8503056764602661, "logps/rejected": -0.8023906350135803, "loss": 1.0734, "nll_loss": 1.0402429103851318, "rewards/accuracies": 0.375, "rewards/chosen": -0.08503057062625885, "rewards/margins": -0.004791506100445986, "rewards/rejected": -0.08023905754089355, "step": 433 }, { "epoch": 0.26475522342534696, "grad_norm": 6.026952743530273, "learning_rate": 7.604164115125536e-06, "log_odds_chosen": 0.6520811319351196, "log_odds_ratio": -0.4595167636871338, "logits/chosen": -0.8714652061462402, "logits/rejected": -0.795704185962677, "logps/chosen": -0.7824360132217407, "logps/rejected": -1.1994373798370361, "loss": 1.2529, "nll_loss": 0.9522619247436523, "rewards/accuracies": 0.75, "rewards/chosen": -0.07824359834194183, "rewards/margins": 0.04170013964176178, "rewards/rejected": -0.11994373798370361, "step": 434 }, { "epoch": 0.26536525850236387, "grad_norm": 4.790633678436279, "learning_rate": 7.603184323331292e-06, "log_odds_chosen": 0.44341495633125305, "log_odds_ratio": -0.5667150020599365, "logits/chosen": -0.7529531121253967, "logits/rejected": -0.6713001728057861, "logps/chosen": -0.8181261420249939, "logps/rejected": -1.0649975538253784, "loss": 1.1204, "nll_loss": 0.9400722980499268, "rewards/accuracies": 0.625, "rewards/chosen": -0.08181262016296387, "rewards/margins": 0.024687139317393303, "rewards/rejected": -0.10649976134300232, "step": 435 }, { "epoch": 0.2659752935793808, "grad_norm": 6.410855293273926, "learning_rate": 7.602204531537048e-06, "log_odds_chosen": 0.7467976808547974, "log_odds_ratio": -0.5120850205421448, "logits/chosen": -0.6359294056892395, "logits/rejected": -0.6141526699066162, "logps/chosen": -0.8858708739280701, "logps/rejected": -1.228048324584961, "loss": 1.3752, "nll_loss": 1.1325024366378784, "rewards/accuracies": 0.75, "rewards/chosen": -0.08858709037303925, "rewards/margins": 0.03421773761510849, "rewards/rejected": -0.12280482053756714, "step": 436 }, { "epoch": 0.2665853286563977, "grad_norm": 3.8049142360687256, "learning_rate": 7.601224739742805e-06, "log_odds_chosen": 0.4432772099971771, "log_odds_ratio": -0.5458660125732422, "logits/chosen": -1.2650448083877563, "logits/rejected": -1.0736496448516846, "logps/chosen": -0.828168511390686, "logps/rejected": -1.1241456270217896, "loss": 1.2486, "nll_loss": 1.0758391618728638, "rewards/accuracies": 0.625, "rewards/chosen": -0.08281685411930084, "rewards/margins": 0.029597710818052292, "rewards/rejected": -0.11241456121206284, "step": 437 }, { "epoch": 0.2671953637334147, "grad_norm": 4.375844478607178, "learning_rate": 7.60024494794856e-06, "log_odds_chosen": 0.10625556856393814, "log_odds_ratio": -0.8194423317909241, "logits/chosen": -0.5800154209136963, "logits/rejected": -0.514889121055603, "logps/chosen": -1.014041781425476, "logps/rejected": -1.028247356414795, "loss": 1.2292, "nll_loss": 1.0243113040924072, "rewards/accuracies": 0.75, "rewards/chosen": -0.10140418261289597, "rewards/margins": 0.001420559361577034, "rewards/rejected": -0.10282474011182785, "step": 438 }, { "epoch": 0.2678053988104316, "grad_norm": 2.849787712097168, "learning_rate": 7.599265156154316e-06, "log_odds_chosen": 0.013058274984359741, "log_odds_ratio": -0.7062019109725952, "logits/chosen": -0.6537253856658936, "logits/rejected": -0.7388080358505249, "logps/chosen": -1.069462537765503, "logps/rejected": -1.0667294263839722, "loss": 1.1506, "nll_loss": 0.958329439163208, "rewards/accuracies": 0.375, "rewards/chosen": -0.10694625228643417, "rewards/margins": -0.000273311510682106, "rewards/rejected": -0.10667294263839722, "step": 439 }, { "epoch": 0.26841543388744854, "grad_norm": 3.5208442211151123, "learning_rate": 7.598285364360073e-06, "log_odds_chosen": -0.08785776793956757, "log_odds_ratio": -0.7877535820007324, "logits/chosen": -0.9668976664543152, "logits/rejected": -0.7710477113723755, "logps/chosen": -1.2407910823822021, "logps/rejected": -1.2081141471862793, "loss": 1.213, "nll_loss": 1.4177005290985107, "rewards/accuracies": 0.375, "rewards/chosen": -0.12407910078763962, "rewards/margins": -0.003267694264650345, "rewards/rejected": -0.12081141769886017, "step": 440 }, { "epoch": 0.26902546896446544, "grad_norm": 2.513970136642456, "learning_rate": 7.597305572565829e-06, "log_odds_chosen": 0.2234954535961151, "log_odds_ratio": -0.6131566166877747, "logits/chosen": -0.8810629844665527, "logits/rejected": -0.7202251553535461, "logps/chosen": -0.8332971334457397, "logps/rejected": -0.9827049374580383, "loss": 1.1995, "nll_loss": 1.0719671249389648, "rewards/accuracies": 0.625, "rewards/chosen": -0.08332972228527069, "rewards/margins": 0.014940774068236351, "rewards/rejected": -0.0982704907655716, "step": 441 }, { "epoch": 0.2696355040414824, "grad_norm": 2.1505446434020996, "learning_rate": 7.5963257807715855e-06, "log_odds_chosen": -0.15499770641326904, "log_odds_ratio": -0.9108115434646606, "logits/chosen": -0.8601590394973755, "logits/rejected": -0.8857513666152954, "logps/chosen": -1.0477116107940674, "logps/rejected": -0.9862794876098633, "loss": 1.2502, "nll_loss": 1.2457003593444824, "rewards/accuracies": 0.5, "rewards/chosen": -0.10477116703987122, "rewards/margins": -0.006143217906355858, "rewards/rejected": -0.0986279547214508, "step": 442 }, { "epoch": 0.2702455391184993, "grad_norm": 5.085864543914795, "learning_rate": 7.5953459889773425e-06, "log_odds_chosen": 0.9848780632019043, "log_odds_ratio": -0.39050188660621643, "logits/chosen": -0.9644244313240051, "logits/rejected": -0.8748199939727783, "logps/chosen": -1.0128437280654907, "logps/rejected": -1.7096201181411743, "loss": 1.3553, "nll_loss": 1.1992485523223877, "rewards/accuracies": 0.875, "rewards/chosen": -0.10128436982631683, "rewards/margins": 0.06967763602733612, "rewards/rejected": -0.17096200585365295, "step": 443 }, { "epoch": 0.27085557419551626, "grad_norm": 2.096691846847534, "learning_rate": 7.594366197183098e-06, "log_odds_chosen": 0.6223797798156738, "log_odds_ratio": -0.5351522564888, "logits/chosen": -0.9833553433418274, "logits/rejected": -0.7633851766586304, "logps/chosen": -0.846785306930542, "logps/rejected": -1.1179618835449219, "loss": 1.1833, "nll_loss": 1.2015807628631592, "rewards/accuracies": 0.625, "rewards/chosen": -0.0846785381436348, "rewards/margins": 0.02711765468120575, "rewards/rejected": -0.11179619282484055, "step": 444 }, { "epoch": 0.27146560927253316, "grad_norm": 2.098510265350342, "learning_rate": 7.593386405388855e-06, "log_odds_chosen": 0.08197088539600372, "log_odds_ratio": -0.7231148481369019, "logits/chosen": -1.0797227621078491, "logits/rejected": -1.0228769779205322, "logps/chosen": -0.8826051950454712, "logps/rejected": -1.001914143562317, "loss": 1.3519, "nll_loss": 1.413404941558838, "rewards/accuracies": 0.5, "rewards/chosen": -0.08826053142547607, "rewards/margins": 0.011930886656045914, "rewards/rejected": -0.10019140690565109, "step": 445 }, { "epoch": 0.2720756443495501, "grad_norm": 1.2154325246810913, "learning_rate": 7.592406613594611e-06, "log_odds_chosen": 1.0347319841384888, "log_odds_ratio": -0.43018224835395813, "logits/chosen": -0.8279287219047546, "logits/rejected": -0.8751354217529297, "logps/chosen": -0.7228954434394836, "logps/rejected": -1.4547991752624512, "loss": 1.245, "nll_loss": 1.233469009399414, "rewards/accuracies": 0.75, "rewards/chosen": -0.07228954136371613, "rewards/margins": 0.07319037616252899, "rewards/rejected": -0.14547991752624512, "step": 446 }, { "epoch": 0.272685679426567, "grad_norm": 1.822729468345642, "learning_rate": 7.591426821800367e-06, "log_odds_chosen": 0.5487288236618042, "log_odds_ratio": -0.4943434000015259, "logits/chosen": -0.7867703437805176, "logits/rejected": -0.5841090083122253, "logps/chosen": -0.7522687315940857, "logps/rejected": -1.0624961853027344, "loss": 1.2378, "nll_loss": 1.0739398002624512, "rewards/accuracies": 0.875, "rewards/chosen": -0.07522687315940857, "rewards/margins": 0.031022755429148674, "rewards/rejected": -0.10624963045120239, "step": 447 }, { "epoch": 0.273295714503584, "grad_norm": 2.9719457626342773, "learning_rate": 7.590447030006124e-06, "log_odds_chosen": 0.3502964377403259, "log_odds_ratio": -0.5860196352005005, "logits/chosen": -0.8657785654067993, "logits/rejected": -0.756075918674469, "logps/chosen": -0.8816034197807312, "logps/rejected": -1.058461308479309, "loss": 1.0185, "nll_loss": 1.046537160873413, "rewards/accuracies": 0.625, "rewards/chosen": -0.08816034346818924, "rewards/margins": 0.017685793340206146, "rewards/rejected": -0.10584613680839539, "step": 448 }, { "epoch": 0.2739057495806009, "grad_norm": 1.5285711288452148, "learning_rate": 7.58946723821188e-06, "log_odds_chosen": -0.16683438420295715, "log_odds_ratio": -0.8731886148452759, "logits/chosen": -0.8034111261367798, "logits/rejected": -0.8439921736717224, "logps/chosen": -1.038767695426941, "logps/rejected": -0.9524967670440674, "loss": 1.0722, "nll_loss": 1.0502569675445557, "rewards/accuracies": 0.5, "rewards/chosen": -0.10387677699327469, "rewards/margins": -0.008627103641629219, "rewards/rejected": -0.09524966776371002, "step": 449 }, { "epoch": 0.27451578465761783, "grad_norm": 6.166206359863281, "learning_rate": 7.588487446417635e-06, "log_odds_chosen": 0.4534033238887787, "log_odds_ratio": -0.5430460572242737, "logits/chosen": -0.5336553454399109, "logits/rejected": -0.3192227780818939, "logps/chosen": -0.9184363484382629, "logps/rejected": -1.2036058902740479, "loss": 1.341, "nll_loss": 1.3510864973068237, "rewards/accuracies": 0.625, "rewards/chosen": -0.0918436348438263, "rewards/margins": 0.028516950085759163, "rewards/rejected": -0.12036058306694031, "step": 450 }, { "epoch": 0.27512581973463474, "grad_norm": 1.204926609992981, "learning_rate": 7.587507654623392e-06, "log_odds_chosen": 0.42766857147216797, "log_odds_ratio": -0.538910984992981, "logits/chosen": -0.7044711709022522, "logits/rejected": -0.5668948888778687, "logps/chosen": -0.8235164880752563, "logps/rejected": -1.075345754623413, "loss": 1.3544, "nll_loss": 1.0534015893936157, "rewards/accuracies": 0.75, "rewards/chosen": -0.08235164731740952, "rewards/margins": 0.025182921439409256, "rewards/rejected": -0.10753457248210907, "step": 451 }, { "epoch": 0.2757358548116517, "grad_norm": 2.039318561553955, "learning_rate": 7.5865278628291485e-06, "log_odds_chosen": 0.825799286365509, "log_odds_ratio": -0.4100185036659241, "logits/chosen": -0.6192983984947205, "logits/rejected": -0.3753618597984314, "logps/chosen": -0.6425188779830933, "logps/rejected": -1.105602502822876, "loss": 1.0727, "nll_loss": 0.8413348197937012, "rewards/accuracies": 0.875, "rewards/chosen": -0.06425188481807709, "rewards/margins": 0.04630837216973305, "rewards/rejected": -0.11056025326251984, "step": 452 }, { "epoch": 0.2763458898886686, "grad_norm": 4.353516101837158, "learning_rate": 7.585548071034905e-06, "log_odds_chosen": 0.5277951955795288, "log_odds_ratio": -0.5323882102966309, "logits/chosen": -0.6629316806793213, "logits/rejected": -0.6759694218635559, "logps/chosen": -1.0266591310501099, "logps/rejected": -1.3637292385101318, "loss": 1.2712, "nll_loss": 1.1421180963516235, "rewards/accuracies": 0.75, "rewards/chosen": -0.10266591608524323, "rewards/margins": 0.03370700404047966, "rewards/rejected": -0.13637292385101318, "step": 453 }, { "epoch": 0.27695592496568555, "grad_norm": 5.961481094360352, "learning_rate": 7.5845682792406615e-06, "log_odds_chosen": 0.6443105340003967, "log_odds_ratio": -0.4887881875038147, "logits/chosen": -0.7957343459129333, "logits/rejected": -0.6617394685745239, "logps/chosen": -0.8976386189460754, "logps/rejected": -1.241767168045044, "loss": 1.1283, "nll_loss": 0.9811873435974121, "rewards/accuracies": 0.75, "rewards/chosen": -0.08976386487483978, "rewards/margins": 0.03441284969449043, "rewards/rejected": -0.12417671084403992, "step": 454 }, { "epoch": 0.27756596004270245, "grad_norm": 2.914179563522339, "learning_rate": 7.583588487446417e-06, "log_odds_chosen": -0.06855027377605438, "log_odds_ratio": -0.7667844295501709, "logits/chosen": -0.9674162864685059, "logits/rejected": -0.830525815486908, "logps/chosen": -1.1896030902862549, "logps/rejected": -1.1289780139923096, "loss": 1.0903, "nll_loss": 1.4217731952667236, "rewards/accuracies": 0.375, "rewards/chosen": -0.11896032094955444, "rewards/margins": -0.0060625080950558186, "rewards/rejected": -0.11289781332015991, "step": 455 }, { "epoch": 0.2781759951197194, "grad_norm": 3.2644808292388916, "learning_rate": 7.582608695652173e-06, "log_odds_chosen": 0.1350788027048111, "log_odds_ratio": -0.6934027671813965, "logits/chosen": -0.8696422576904297, "logits/rejected": -0.5969662666320801, "logps/chosen": -1.0736812353134155, "logps/rejected": -1.1256816387176514, "loss": 1.2763, "nll_loss": 1.3894126415252686, "rewards/accuracies": 0.5, "rewards/chosen": -0.1073681190609932, "rewards/margins": 0.005200049839913845, "rewards/rejected": -0.11256816983222961, "step": 456 }, { "epoch": 0.2787860301967363, "grad_norm": 4.251424312591553, "learning_rate": 7.58162890385793e-06, "log_odds_chosen": 0.44326356053352356, "log_odds_ratio": -0.5826451182365417, "logits/chosen": -0.7492133378982544, "logits/rejected": -0.6620557308197021, "logps/chosen": -0.8692135810852051, "logps/rejected": -1.1410578489303589, "loss": 1.2878, "nll_loss": 1.0700979232788086, "rewards/accuracies": 0.75, "rewards/chosen": -0.08692136406898499, "rewards/margins": 0.02718442678451538, "rewards/rejected": -0.11410579085350037, "step": 457 }, { "epoch": 0.2793960652737532, "grad_norm": 1.6189627647399902, "learning_rate": 7.580649112063686e-06, "log_odds_chosen": 0.45095309615135193, "log_odds_ratio": -0.5870187282562256, "logits/chosen": -0.9676951169967651, "logits/rejected": -0.9990047216415405, "logps/chosen": -1.1881740093231201, "logps/rejected": -1.5877734422683716, "loss": 1.1905, "nll_loss": 1.3880364894866943, "rewards/accuracies": 0.75, "rewards/chosen": -0.11881741881370544, "rewards/margins": 0.039959944784641266, "rewards/rejected": -0.1587773561477661, "step": 458 }, { "epoch": 0.28000610035077017, "grad_norm": 2.0039827823638916, "learning_rate": 7.579669320269443e-06, "log_odds_chosen": 0.22586126625537872, "log_odds_ratio": -0.7349258661270142, "logits/chosen": -0.8741607666015625, "logits/rejected": -0.8291732668876648, "logps/chosen": -1.054239273071289, "logps/rejected": -1.174980878829956, "loss": 1.3615, "nll_loss": 1.3077950477600098, "rewards/accuracies": 0.625, "rewards/chosen": -0.1054239347577095, "rewards/margins": 0.012074161320924759, "rewards/rejected": -0.11749808490276337, "step": 459 }, { "epoch": 0.2806161354277871, "grad_norm": 1.695631742477417, "learning_rate": 7.578689528475199e-06, "log_odds_chosen": 0.18761251866817474, "log_odds_ratio": -0.653955340385437, "logits/chosen": -0.5890318155288696, "logits/rejected": -0.5757396817207336, "logps/chosen": -0.8944319486618042, "logps/rejected": -0.9714249968528748, "loss": 1.1307, "nll_loss": 1.045249581336975, "rewards/accuracies": 0.625, "rewards/chosen": -0.08944319188594818, "rewards/margins": 0.007699310313910246, "rewards/rejected": -0.09714250266551971, "step": 460 }, { "epoch": 0.28122617050480403, "grad_norm": 1.788357138633728, "learning_rate": 7.577709736680954e-06, "log_odds_chosen": 0.8560991287231445, "log_odds_ratio": -0.40789905190467834, "logits/chosen": -0.7521940469741821, "logits/rejected": -0.5967000126838684, "logps/chosen": -0.6768153309822083, "logps/rejected": -1.168718695640564, "loss": 1.3884, "nll_loss": 1.0480587482452393, "rewards/accuracies": 0.875, "rewards/chosen": -0.06768152862787247, "rewards/margins": 0.04919033870100975, "rewards/rejected": -0.11687187105417252, "step": 461 }, { "epoch": 0.28183620558182093, "grad_norm": 1.9112261533737183, "learning_rate": 7.576729944886711e-06, "log_odds_chosen": 0.7123710513114929, "log_odds_ratio": -0.5054897665977478, "logits/chosen": -0.907132089138031, "logits/rejected": -0.7782021164894104, "logps/chosen": -0.7864884734153748, "logps/rejected": -1.175312876701355, "loss": 1.1482, "nll_loss": 1.1442363262176514, "rewards/accuracies": 0.75, "rewards/chosen": -0.07864885032176971, "rewards/margins": 0.03888243809342384, "rewards/rejected": -0.11753129214048386, "step": 462 }, { "epoch": 0.2824462406588379, "grad_norm": 2.0230445861816406, "learning_rate": 7.5757501530924675e-06, "log_odds_chosen": 0.31804198026657104, "log_odds_ratio": -0.5875938534736633, "logits/chosen": -0.8950618505477905, "logits/rejected": -0.8122823238372803, "logps/chosen": -0.9053471088409424, "logps/rejected": -1.0644340515136719, "loss": 1.0539, "nll_loss": 1.135021448135376, "rewards/accuracies": 0.75, "rewards/chosen": -0.09053471684455872, "rewards/margins": 0.01590868830680847, "rewards/rejected": -0.10644339770078659, "step": 463 }, { "epoch": 0.2830562757358548, "grad_norm": 4.786995887756348, "learning_rate": 7.574770361298224e-06, "log_odds_chosen": 0.2725621461868286, "log_odds_ratio": -0.6660605669021606, "logits/chosen": -1.0273383855819702, "logits/rejected": -0.8804582357406616, "logps/chosen": -0.9765942692756653, "logps/rejected": -1.099307894706726, "loss": 1.2537, "nll_loss": 1.3984352350234985, "rewards/accuracies": 0.5, "rewards/chosen": -0.09765943139791489, "rewards/margins": 0.012271367013454437, "rewards/rejected": -0.10993079841136932, "step": 464 }, { "epoch": 0.28366631081287175, "grad_norm": 4.114103317260742, "learning_rate": 7.573790569503981e-06, "log_odds_chosen": 1.0505824089050293, "log_odds_ratio": -0.38119643926620483, "logits/chosen": -1.0044019222259521, "logits/rejected": -1.0562913417816162, "logps/chosen": -0.7854946851730347, "logps/rejected": -1.5344924926757812, "loss": 1.2347, "nll_loss": 1.5551810264587402, "rewards/accuracies": 0.875, "rewards/chosen": -0.07854946702718735, "rewards/margins": 0.07489977777004242, "rewards/rejected": -0.15344925224781036, "step": 465 }, { "epoch": 0.28427634588988865, "grad_norm": 1.5312376022338867, "learning_rate": 7.572810777709737e-06, "log_odds_chosen": -0.15437763929367065, "log_odds_ratio": -0.8501384258270264, "logits/chosen": -0.8880467414855957, "logits/rejected": -0.7390494346618652, "logps/chosen": -1.224494457244873, "logps/rejected": -1.1359977722167969, "loss": 1.3053, "nll_loss": 1.4850561618804932, "rewards/accuracies": 0.25, "rewards/chosen": -0.12244945019483566, "rewards/margins": -0.008849674835801125, "rewards/rejected": -0.11359976977109909, "step": 466 }, { "epoch": 0.2848863809669056, "grad_norm": 1.7075546979904175, "learning_rate": 7.571830985915492e-06, "log_odds_chosen": 0.04501219093799591, "log_odds_ratio": -0.8179405927658081, "logits/chosen": -1.177836537361145, "logits/rejected": -0.8056638240814209, "logps/chosen": -1.1667230129241943, "logps/rejected": -1.1958903074264526, "loss": 1.3101, "nll_loss": 1.2881712913513184, "rewards/accuracies": 0.5, "rewards/chosen": -0.11667229980230331, "rewards/margins": 0.0029167253524065018, "rewards/rejected": -0.11958902329206467, "step": 467 }, { "epoch": 0.2854964160439225, "grad_norm": 3.281010866165161, "learning_rate": 7.570851194121249e-06, "log_odds_chosen": 0.7627777457237244, "log_odds_ratio": -0.5105006098747253, "logits/chosen": -0.48707860708236694, "logits/rejected": -0.4328402280807495, "logps/chosen": -0.8763188123703003, "logps/rejected": -1.378356695175171, "loss": 1.17, "nll_loss": 1.3446362018585205, "rewards/accuracies": 0.625, "rewards/chosen": -0.08763188123703003, "rewards/margins": 0.05020379275083542, "rewards/rejected": -0.13783568143844604, "step": 468 }, { "epoch": 0.28610645112093946, "grad_norm": 1.8019801378250122, "learning_rate": 7.569871402327005e-06, "log_odds_chosen": 0.5394909381866455, "log_odds_ratio": -0.6433078646659851, "logits/chosen": -1.034747838973999, "logits/rejected": -0.9244465827941895, "logps/chosen": -0.9933961629867554, "logps/rejected": -1.399671196937561, "loss": 1.3155, "nll_loss": 1.3935668468475342, "rewards/accuracies": 0.5, "rewards/chosen": -0.09933961927890778, "rewards/margins": 0.04062751680612564, "rewards/rejected": -0.13996712863445282, "step": 469 }, { "epoch": 0.28671648619795637, "grad_norm": 10.888223648071289, "learning_rate": 7.568891610532761e-06, "log_odds_chosen": 0.37478774785995483, "log_odds_ratio": -0.6071910858154297, "logits/chosen": -0.971308708190918, "logits/rejected": -0.6192744374275208, "logps/chosen": -1.0462239980697632, "logps/rejected": -1.2278921604156494, "loss": 1.3133, "nll_loss": 1.2435303926467896, "rewards/accuracies": 0.75, "rewards/chosen": -0.10462240129709244, "rewards/margins": 0.018166817724704742, "rewards/rejected": -0.12278921902179718, "step": 470 }, { "epoch": 0.2873265212749733, "grad_norm": 2.0096282958984375, "learning_rate": 7.567911818738518e-06, "log_odds_chosen": 0.3626979887485504, "log_odds_ratio": -0.5863425731658936, "logits/chosen": -0.9740308523178101, "logits/rejected": -0.8596985340118408, "logps/chosen": -0.9919542074203491, "logps/rejected": -1.1802668571472168, "loss": 1.2625, "nll_loss": 1.0768179893493652, "rewards/accuracies": 0.5, "rewards/chosen": -0.09919542074203491, "rewards/margins": 0.018831275403499603, "rewards/rejected": -0.11802669614553452, "step": 471 }, { "epoch": 0.2879365563519902, "grad_norm": 1.8393276929855347, "learning_rate": 7.5669320269442734e-06, "log_odds_chosen": 0.5037747621536255, "log_odds_ratio": -0.5735625624656677, "logits/chosen": -0.8443981409072876, "logits/rejected": -0.9001789093017578, "logps/chosen": -0.9401285648345947, "logps/rejected": -1.235161542892456, "loss": 1.229, "nll_loss": 1.2821193933486938, "rewards/accuracies": 0.75, "rewards/chosen": -0.09401285648345947, "rewards/margins": 0.029503298923373222, "rewards/rejected": -0.12351615726947784, "step": 472 }, { "epoch": 0.2885465914290072, "grad_norm": 2.3580820560455322, "learning_rate": 7.56595223515003e-06, "log_odds_chosen": 0.24665974080562592, "log_odds_ratio": -0.6107810735702515, "logits/chosen": -1.113149642944336, "logits/rejected": -1.013048768043518, "logps/chosen": -1.1898036003112793, "logps/rejected": -1.3653578758239746, "loss": 1.2082, "nll_loss": 1.4125027656555176, "rewards/accuracies": 0.625, "rewards/chosen": -0.11898036301136017, "rewards/margins": 0.01755542680621147, "rewards/rejected": -0.13653579354286194, "step": 473 }, { "epoch": 0.2891566265060241, "grad_norm": 1.5585564374923706, "learning_rate": 7.5649724433557865e-06, "log_odds_chosen": 0.43143126368522644, "log_odds_ratio": -0.5838141441345215, "logits/chosen": -1.0034074783325195, "logits/rejected": -0.9286743998527527, "logps/chosen": -0.9556889533996582, "logps/rejected": -1.2256176471710205, "loss": 1.1683, "nll_loss": 1.2222446203231812, "rewards/accuracies": 0.625, "rewards/chosen": -0.09556890279054642, "rewards/margins": 0.026992864906787872, "rewards/rejected": -0.12256176769733429, "step": 474 }, { "epoch": 0.28976666158304104, "grad_norm": 9.361310958862305, "learning_rate": 7.563992651561543e-06, "log_odds_chosen": 0.8417373299598694, "log_odds_ratio": -0.4489361047744751, "logits/chosen": -0.7635075449943542, "logits/rejected": -0.6717475056648254, "logps/chosen": -0.8515878915786743, "logps/rejected": -1.4010010957717896, "loss": 1.2036, "nll_loss": 1.152334451675415, "rewards/accuracies": 1.0, "rewards/chosen": -0.08515879511833191, "rewards/margins": 0.0549413226544857, "rewards/rejected": -0.1401001214981079, "step": 475 }, { "epoch": 0.29037669666005794, "grad_norm": 4.33163595199585, "learning_rate": 7.5630128597673e-06, "log_odds_chosen": 0.7105534076690674, "log_odds_ratio": -0.4660317897796631, "logits/chosen": -1.047844409942627, "logits/rejected": -0.9849704504013062, "logps/chosen": -0.8724058866500854, "logps/rejected": -1.3473234176635742, "loss": 1.1488, "nll_loss": 1.3195064067840576, "rewards/accuracies": 0.75, "rewards/chosen": -0.08724059164524078, "rewards/margins": 0.047491736710071564, "rewards/rejected": -0.13473233580589294, "step": 476 }, { "epoch": 0.2909867317370749, "grad_norm": 2.8071534633636475, "learning_rate": 7.562033067973056e-06, "log_odds_chosen": 0.6639384031295776, "log_odds_ratio": -0.46410810947418213, "logits/chosen": -0.7577490210533142, "logits/rejected": -0.8576235771179199, "logps/chosen": -0.9425905346870422, "logps/rejected": -1.3784253597259521, "loss": 1.2647, "nll_loss": 1.0477948188781738, "rewards/accuracies": 0.875, "rewards/chosen": -0.09425906836986542, "rewards/margins": 0.043583475053310394, "rewards/rejected": -0.13784253597259521, "step": 477 }, { "epoch": 0.2915967668140918, "grad_norm": 2.1730146408081055, "learning_rate": 7.561053276178811e-06, "log_odds_chosen": 0.12879586219787598, "log_odds_ratio": -0.7269017100334167, "logits/chosen": -1.0445566177368164, "logits/rejected": -0.9911221861839294, "logps/chosen": -0.8187336921691895, "logps/rejected": -0.9173862338066101, "loss": 1.3434, "nll_loss": 1.1978635787963867, "rewards/accuracies": 0.625, "rewards/chosen": -0.08187337219715118, "rewards/margins": 0.009865252301096916, "rewards/rejected": -0.09173862636089325, "step": 478 }, { "epoch": 0.29220680189110876, "grad_norm": 2.2087297439575195, "learning_rate": 7.560073484384568e-06, "log_odds_chosen": 0.5413170456886292, "log_odds_ratio": -0.5711327195167542, "logits/chosen": -0.9329870939254761, "logits/rejected": -0.8081048130989075, "logps/chosen": -0.9022470712661743, "logps/rejected": -1.241719365119934, "loss": 1.2183, "nll_loss": 1.1906887292861938, "rewards/accuracies": 0.75, "rewards/chosen": -0.09022469818592072, "rewards/margins": 0.033947236835956573, "rewards/rejected": -0.12417193502187729, "step": 479 }, { "epoch": 0.29281683696812566, "grad_norm": 3.765202283859253, "learning_rate": 7.559093692590324e-06, "log_odds_chosen": 0.48552393913269043, "log_odds_ratio": -0.6981343030929565, "logits/chosen": -0.7657569646835327, "logits/rejected": -0.6366523504257202, "logps/chosen": -0.7708931565284729, "logps/rejected": -0.9952893853187561, "loss": 1.2105, "nll_loss": 0.9293386936187744, "rewards/accuracies": 0.875, "rewards/chosen": -0.07708930969238281, "rewards/margins": 0.0224396213889122, "rewards/rejected": -0.09952893853187561, "step": 480 }, { "epoch": 0.2934268720451426, "grad_norm": 4.495582580566406, "learning_rate": 7.55811390079608e-06, "log_odds_chosen": 0.20766402781009674, "log_odds_ratio": -0.7155128717422485, "logits/chosen": -0.9516555070877075, "logits/rejected": -0.7408494353294373, "logps/chosen": -1.1329081058502197, "logps/rejected": -1.2243242263793945, "loss": 1.358, "nll_loss": 1.6310844421386719, "rewards/accuracies": 0.625, "rewards/chosen": -0.11329081654548645, "rewards/margins": 0.009141598828136921, "rewards/rejected": -0.1224324107170105, "step": 481 }, { "epoch": 0.2940369071221595, "grad_norm": 1.8087542057037354, "learning_rate": 7.557134109001837e-06, "log_odds_chosen": 0.5921972990036011, "log_odds_ratio": -0.5151346921920776, "logits/chosen": -0.8745071291923523, "logits/rejected": -0.6758306622505188, "logps/chosen": -0.9324339628219604, "logps/rejected": -1.3220289945602417, "loss": 1.0667, "nll_loss": 0.9679193496704102, "rewards/accuracies": 0.875, "rewards/chosen": -0.09324339777231216, "rewards/margins": 0.038959503173828125, "rewards/rejected": -0.1322029083967209, "step": 482 }, { "epoch": 0.2946469421991765, "grad_norm": 1.6457548141479492, "learning_rate": 7.556154317207593e-06, "log_odds_chosen": 0.4332568347454071, "log_odds_ratio": -0.5928576588630676, "logits/chosen": -0.9637812376022339, "logits/rejected": -0.8925006985664368, "logps/chosen": -0.9072562456130981, "logps/rejected": -1.1648279428482056, "loss": 1.1232, "nll_loss": 1.102083444595337, "rewards/accuracies": 0.625, "rewards/chosen": -0.0907256230711937, "rewards/margins": 0.02575717493891716, "rewards/rejected": -0.11648280173540115, "step": 483 }, { "epoch": 0.2952569772761934, "grad_norm": 2.571363687515259, "learning_rate": 7.555174525413349e-06, "log_odds_chosen": 0.7859311103820801, "log_odds_ratio": -0.5332058668136597, "logits/chosen": -0.9838283061981201, "logits/rejected": -0.966427206993103, "logps/chosen": -0.7389602661132812, "logps/rejected": -0.9641268849372864, "loss": 1.0233, "nll_loss": 1.193579912185669, "rewards/accuracies": 0.75, "rewards/chosen": -0.07389602810144424, "rewards/margins": 0.022516664117574692, "rewards/rejected": -0.09641269594430923, "step": 484 }, { "epoch": 0.29586701235321033, "grad_norm": 2.1006035804748535, "learning_rate": 7.5541947336191056e-06, "log_odds_chosen": 0.32098639011383057, "log_odds_ratio": -0.7684476971626282, "logits/chosen": -1.0879452228546143, "logits/rejected": -0.7478785514831543, "logps/chosen": -0.9372349977493286, "logps/rejected": -1.1378185749053955, "loss": 1.2929, "nll_loss": 1.2085192203521729, "rewards/accuracies": 0.625, "rewards/chosen": -0.09372350573539734, "rewards/margins": 0.02005835436284542, "rewards/rejected": -0.11378185451030731, "step": 485 }, { "epoch": 0.29647704743022724, "grad_norm": 1.8257334232330322, "learning_rate": 7.553214941824862e-06, "log_odds_chosen": 0.2225598245859146, "log_odds_ratio": -0.6944974064826965, "logits/chosen": -0.9166796207427979, "logits/rejected": -0.9098749756813049, "logps/chosen": -0.9822099208831787, "logps/rejected": -1.1347031593322754, "loss": 1.2472, "nll_loss": 1.3956663608551025, "rewards/accuracies": 0.5, "rewards/chosen": -0.09822098910808563, "rewards/margins": 0.015249323099851608, "rewards/rejected": -0.11347032338380814, "step": 486 }, { "epoch": 0.2970870825072442, "grad_norm": 2.271601438522339, "learning_rate": 7.552235150030619e-06, "log_odds_chosen": 0.5218062996864319, "log_odds_ratio": -0.5595958232879639, "logits/chosen": -0.8599309325218201, "logits/rejected": -0.8604663610458374, "logps/chosen": -0.886623740196228, "logps/rejected": -1.1549761295318604, "loss": 1.2882, "nll_loss": 1.2368673086166382, "rewards/accuracies": 0.625, "rewards/chosen": -0.08866237103939056, "rewards/margins": 0.02683524787425995, "rewards/rejected": -0.11549762636423111, "step": 487 }, { "epoch": 0.2976971175842611, "grad_norm": 2.887913703918457, "learning_rate": 7.551255358236375e-06, "log_odds_chosen": 0.6169976592063904, "log_odds_ratio": -0.4817153513431549, "logits/chosen": -0.8100370764732361, "logits/rejected": -0.6555559635162354, "logps/chosen": -0.7284666299819946, "logps/rejected": -1.0469928979873657, "loss": 1.2024, "nll_loss": 1.0618915557861328, "rewards/accuracies": 0.875, "rewards/chosen": -0.0728466585278511, "rewards/margins": 0.03185262903571129, "rewards/rejected": -0.10469929128885269, "step": 488 }, { "epoch": 0.298307152661278, "grad_norm": 1.7010419368743896, "learning_rate": 7.55027556644213e-06, "log_odds_chosen": 0.21035513281822205, "log_odds_ratio": -0.6621127128601074, "logits/chosen": -0.7744901776313782, "logits/rejected": -0.7175272107124329, "logps/chosen": -0.7090359926223755, "logps/rejected": -0.7830263376235962, "loss": 1.2752, "nll_loss": 1.1028720140457153, "rewards/accuracies": 0.625, "rewards/chosen": -0.07090359926223755, "rewards/margins": 0.00739904073998332, "rewards/rejected": -0.07830263674259186, "step": 489 }, { "epoch": 0.29891718773829495, "grad_norm": 1.885900616645813, "learning_rate": 7.549295774647887e-06, "log_odds_chosen": 0.14647027850151062, "log_odds_ratio": -0.6380361318588257, "logits/chosen": -1.051759958267212, "logits/rejected": -0.8451831340789795, "logps/chosen": -0.8874513506889343, "logps/rejected": -0.9970605373382568, "loss": 1.1909, "nll_loss": 1.1573987007141113, "rewards/accuracies": 0.5, "rewards/chosen": -0.08874513953924179, "rewards/margins": 0.010960912331938744, "rewards/rejected": -0.09970605373382568, "step": 490 }, { "epoch": 0.29952722281531186, "grad_norm": 2.458068370819092, "learning_rate": 7.548315982853643e-06, "log_odds_chosen": 0.23300084471702576, "log_odds_ratio": -0.6797036528587341, "logits/chosen": -0.8112008571624756, "logits/rejected": -0.7233951687812805, "logps/chosen": -0.8815759420394897, "logps/rejected": -1.0047173500061035, "loss": 1.1668, "nll_loss": 1.1287299394607544, "rewards/accuracies": 0.5, "rewards/chosen": -0.08815759420394897, "rewards/margins": 0.012314151041209698, "rewards/rejected": -0.10047174990177155, "step": 491 }, { "epoch": 0.3001372578923288, "grad_norm": 3.1914961338043213, "learning_rate": 7.547336191059399e-06, "log_odds_chosen": -0.20240049064159393, "log_odds_ratio": -0.8715456128120422, "logits/chosen": -1.144628882408142, "logits/rejected": -0.933480978012085, "logps/chosen": -1.1946388483047485, "logps/rejected": -1.1446805000305176, "loss": 1.2491, "nll_loss": 1.4041285514831543, "rewards/accuracies": 0.625, "rewards/chosen": -0.11946389079093933, "rewards/margins": -0.004995847120881081, "rewards/rejected": -0.1144680455327034, "step": 492 }, { "epoch": 0.3007472929693457, "grad_norm": 1.2634997367858887, "learning_rate": 7.546356399265156e-06, "log_odds_chosen": 1.0146855115890503, "log_odds_ratio": -0.434032142162323, "logits/chosen": -0.8384944796562195, "logits/rejected": -0.7357172966003418, "logps/chosen": -0.7780824899673462, "logps/rejected": -1.2552679777145386, "loss": 1.1326, "nll_loss": 0.9667509198188782, "rewards/accuracies": 0.75, "rewards/chosen": -0.07780825346708298, "rewards/margins": 0.04771855100989342, "rewards/rejected": -0.1255268007516861, "step": 493 }, { "epoch": 0.30135732804636267, "grad_norm": 2.1774163246154785, "learning_rate": 7.545376607470912e-06, "log_odds_chosen": 0.01896650344133377, "log_odds_ratio": -0.7597813606262207, "logits/chosen": -0.8617841601371765, "logits/rejected": -0.7943299412727356, "logps/chosen": -0.9996468424797058, "logps/rejected": -1.002112865447998, "loss": 1.2473, "nll_loss": 1.3435845375061035, "rewards/accuracies": 0.5, "rewards/chosen": -0.0999646931886673, "rewards/margins": 0.0002466081641614437, "rewards/rejected": -0.10021129995584488, "step": 494 }, { "epoch": 0.3019673631233796, "grad_norm": 5.50825834274292, "learning_rate": 7.544396815676668e-06, "log_odds_chosen": 0.9991591572761536, "log_odds_ratio": -0.3698599934577942, "logits/chosen": -1.0278573036193848, "logits/rejected": -0.7481499314308167, "logps/chosen": -0.8716457486152649, "logps/rejected": -1.4961355924606323, "loss": 1.2214, "nll_loss": 1.1915470361709595, "rewards/accuracies": 0.875, "rewards/chosen": -0.08716457337141037, "rewards/margins": 0.06244898959994316, "rewards/rejected": -0.14961355924606323, "step": 495 }, { "epoch": 0.30257739820039653, "grad_norm": 2.204221725463867, "learning_rate": 7.543417023882425e-06, "log_odds_chosen": 0.4738967716693878, "log_odds_ratio": -0.5717158317565918, "logits/chosen": -0.9438561797142029, "logits/rejected": -0.7755373120307922, "logps/chosen": -1.0399383306503296, "logps/rejected": -1.3535703420639038, "loss": 1.1237, "nll_loss": 1.2459180355072021, "rewards/accuracies": 0.625, "rewards/chosen": -0.10399383306503296, "rewards/margins": 0.031363196671009064, "rewards/rejected": -0.13535703718662262, "step": 496 }, { "epoch": 0.30318743327741343, "grad_norm": 2.7459933757781982, "learning_rate": 7.542437232088181e-06, "log_odds_chosen": 0.7051591873168945, "log_odds_ratio": -0.47506803274154663, "logits/chosen": -0.8921712040901184, "logits/rejected": -1.0273491144180298, "logps/chosen": -0.9224841594696045, "logps/rejected": -1.4224755764007568, "loss": 1.2948, "nll_loss": 1.1169859170913696, "rewards/accuracies": 0.75, "rewards/chosen": -0.09224840998649597, "rewards/margins": 0.04999914392828941, "rewards/rejected": -0.14224755764007568, "step": 497 }, { "epoch": 0.3037974683544304, "grad_norm": 1.762201189994812, "learning_rate": 7.541457440293937e-06, "log_odds_chosen": -0.0006084814667701721, "log_odds_ratio": -0.7525840997695923, "logits/chosen": -1.1645548343658447, "logits/rejected": -0.8989249467849731, "logps/chosen": -0.9791751503944397, "logps/rejected": -0.9899536371231079, "loss": 1.2897, "nll_loss": 1.2039837837219238, "rewards/accuracies": 0.625, "rewards/chosen": -0.09791751950979233, "rewards/margins": 0.0010778466239571571, "rewards/rejected": -0.09899535775184631, "step": 498 }, { "epoch": 0.3044075034314473, "grad_norm": 7.923894882202148, "learning_rate": 7.540477648499694e-06, "log_odds_chosen": 0.4098427891731262, "log_odds_ratio": -0.5975570678710938, "logits/chosen": -0.8023498058319092, "logits/rejected": -0.8471499681472778, "logps/chosen": -0.8750245571136475, "logps/rejected": -1.160827398300171, "loss": 1.1279, "nll_loss": 1.1706581115722656, "rewards/accuracies": 0.75, "rewards/chosen": -0.08750244975090027, "rewards/margins": 0.02858028933405876, "rewards/rejected": -0.11608274281024933, "step": 499 }, { "epoch": 0.30501753850846425, "grad_norm": 2.879197597503662, "learning_rate": 7.53949785670545e-06, "log_odds_chosen": 0.9121659994125366, "log_odds_ratio": -0.4448561668395996, "logits/chosen": -0.6693639755249023, "logits/rejected": -0.5751695036888123, "logps/chosen": -0.8067613840103149, "logps/rejected": -1.337027668952942, "loss": 1.0868, "nll_loss": 0.9669845104217529, "rewards/accuracies": 0.75, "rewards/chosen": -0.08067614585161209, "rewards/margins": 0.05302661657333374, "rewards/rejected": -0.13370276987552643, "step": 500 }, { "epoch": 0.30562757358548115, "grad_norm": 2.1424365043640137, "learning_rate": 7.538518064911206e-06, "log_odds_chosen": 0.9616602659225464, "log_odds_ratio": -0.4352071285247803, "logits/chosen": -1.0946749448776245, "logits/rejected": -0.8178445100784302, "logps/chosen": -0.9744576811790466, "logps/rejected": -1.7070131301879883, "loss": 1.2626, "nll_loss": 1.2648401260375977, "rewards/accuracies": 0.875, "rewards/chosen": -0.0974457710981369, "rewards/margins": 0.07325556874275208, "rewards/rejected": -0.17070132493972778, "step": 501 }, { "epoch": 0.3062376086624981, "grad_norm": 2.299633741378784, "learning_rate": 7.537538273116962e-06, "log_odds_chosen": 0.6565496921539307, "log_odds_ratio": -0.46361684799194336, "logits/chosen": -0.7004007697105408, "logits/rejected": -0.6021067500114441, "logps/chosen": -0.7095623016357422, "logps/rejected": -1.0704913139343262, "loss": 1.0135, "nll_loss": 0.8334436416625977, "rewards/accuracies": 0.875, "rewards/chosen": -0.07095623761415482, "rewards/margins": 0.03609290346503258, "rewards/rejected": -0.1070491299033165, "step": 502 }, { "epoch": 0.306847643739515, "grad_norm": 2.3339526653289795, "learning_rate": 7.536558481322718e-06, "log_odds_chosen": 0.05326010286808014, "log_odds_ratio": -0.8054389357566833, "logits/chosen": -0.8908881545066833, "logits/rejected": -0.5492258071899414, "logps/chosen": -1.0409538745880127, "logps/rejected": -1.040185809135437, "loss": 1.1585, "nll_loss": 1.1154955625534058, "rewards/accuracies": 0.375, "rewards/chosen": -0.10409538447856903, "rewards/margins": -7.679872214794159e-05, "rewards/rejected": -0.10401858389377594, "step": 503 }, { "epoch": 0.30745767881653197, "grad_norm": 1.7504909038543701, "learning_rate": 7.535578689528475e-06, "log_odds_chosen": 0.1411379724740982, "log_odds_ratio": -0.6687023639678955, "logits/chosen": -1.0358693599700928, "logits/rejected": -1.022145390510559, "logps/chosen": -0.8916113376617432, "logps/rejected": -1.0065251588821411, "loss": 1.2458, "nll_loss": 1.2056032419204712, "rewards/accuracies": 0.625, "rewards/chosen": -0.08916114270687103, "rewards/margins": 0.011491376906633377, "rewards/rejected": -0.10065251588821411, "step": 504 }, { "epoch": 0.30806771389354887, "grad_norm": 2.5329957008361816, "learning_rate": 7.534598897734231e-06, "log_odds_chosen": 0.6263988018035889, "log_odds_ratio": -0.6174801588058472, "logits/chosen": -0.9136663675308228, "logits/rejected": -0.5217211842536926, "logps/chosen": -1.0569239854812622, "logps/rejected": -1.381300926208496, "loss": 1.4298, "nll_loss": 1.2286988496780396, "rewards/accuracies": 0.75, "rewards/chosen": -0.10569240152835846, "rewards/margins": 0.03243768960237503, "rewards/rejected": -0.1381300985813141, "step": 505 }, { "epoch": 0.3086777489705658, "grad_norm": 4.141361236572266, "learning_rate": 7.5336191059399875e-06, "log_odds_chosen": 0.3624977469444275, "log_odds_ratio": -0.6121488809585571, "logits/chosen": -0.8197541236877441, "logits/rejected": -0.7985221147537231, "logps/chosen": -1.0488439798355103, "logps/rejected": -1.2831631898880005, "loss": 1.4209, "nll_loss": 1.3014328479766846, "rewards/accuracies": 0.625, "rewards/chosen": -0.10488439351320267, "rewards/margins": 0.023431919515132904, "rewards/rejected": -0.12831631302833557, "step": 506 }, { "epoch": 0.3092877840475827, "grad_norm": 2.842052936553955, "learning_rate": 7.532639314145744e-06, "log_odds_chosen": 0.3153131604194641, "log_odds_ratio": -0.615854024887085, "logits/chosen": -0.7766966223716736, "logits/rejected": -0.6619754433631897, "logps/chosen": -0.8077349662780762, "logps/rejected": -0.9492632746696472, "loss": 1.1323, "nll_loss": 0.977543294429779, "rewards/accuracies": 0.75, "rewards/chosen": -0.0807735025882721, "rewards/margins": 0.01415282767266035, "rewards/rejected": -0.09492632746696472, "step": 507 }, { "epoch": 0.3098978191245997, "grad_norm": 2.0433881282806396, "learning_rate": 7.5316595223515e-06, "log_odds_chosen": 0.5038180351257324, "log_odds_ratio": -0.4879950284957886, "logits/chosen": -0.796221137046814, "logits/rejected": -0.5903112888336182, "logps/chosen": -0.7519895434379578, "logps/rejected": -1.0515427589416504, "loss": 1.2982, "nll_loss": 1.114246129989624, "rewards/accuracies": 0.875, "rewards/chosen": -0.0751989558339119, "rewards/margins": 0.029955323785543442, "rewards/rejected": -0.10515428334474564, "step": 508 }, { "epoch": 0.3105078542016166, "grad_norm": 2.0954525470733643, "learning_rate": 7.530679730557256e-06, "log_odds_chosen": 0.3254093825817108, "log_odds_ratio": -0.6475839614868164, "logits/chosen": -0.9046000242233276, "logits/rejected": -0.6371320486068726, "logps/chosen": -1.1082358360290527, "logps/rejected": -1.259332537651062, "loss": 1.2557, "nll_loss": 1.170896053314209, "rewards/accuracies": 0.375, "rewards/chosen": -0.11082357913255692, "rewards/margins": 0.015109671279788017, "rewards/rejected": -0.12593325972557068, "step": 509 }, { "epoch": 0.31111788927863354, "grad_norm": 1.0918254852294922, "learning_rate": 7.529699938763013e-06, "log_odds_chosen": 0.26799875497817993, "log_odds_ratio": -0.6744207739830017, "logits/chosen": -1.0305732488632202, "logits/rejected": -0.5291420817375183, "logps/chosen": -1.1412678956985474, "logps/rejected": -1.258296251296997, "loss": 1.1276, "nll_loss": 1.3006666898727417, "rewards/accuracies": 0.75, "rewards/chosen": -0.1141267940402031, "rewards/margins": 0.011702830903232098, "rewards/rejected": -0.12582963705062866, "step": 510 }, { "epoch": 0.31172792435565044, "grad_norm": 1.6219698190689087, "learning_rate": 7.528720146968769e-06, "log_odds_chosen": 0.9440128207206726, "log_odds_ratio": -0.4002590477466583, "logits/chosen": -0.8066253066062927, "logits/rejected": -0.6321752071380615, "logps/chosen": -0.6278554201126099, "logps/rejected": -1.153719186782837, "loss": 1.0607, "nll_loss": 0.7931687831878662, "rewards/accuracies": 0.875, "rewards/chosen": -0.0627855435013771, "rewards/margins": 0.0525863841176033, "rewards/rejected": -0.11537192761898041, "step": 511 }, { "epoch": 0.3123379594326674, "grad_norm": 2.297243356704712, "learning_rate": 7.527740355174524e-06, "log_odds_chosen": 0.3703487515449524, "log_odds_ratio": -0.8154832124710083, "logits/chosen": -0.7596533298492432, "logits/rejected": -0.6996989250183105, "logps/chosen": -1.0224673748016357, "logps/rejected": -1.4480736255645752, "loss": 1.1837, "nll_loss": 1.2059540748596191, "rewards/accuracies": 0.375, "rewards/chosen": -0.1022467389702797, "rewards/margins": 0.0425606332719326, "rewards/rejected": -0.144807368516922, "step": 512 }, { "epoch": 0.3129479945096843, "grad_norm": 2.4736015796661377, "learning_rate": 7.526760563380281e-06, "log_odds_chosen": 0.2625872790813446, "log_odds_ratio": -0.6158942580223083, "logits/chosen": -0.8953098058700562, "logits/rejected": -0.7247512340545654, "logps/chosen": -0.8689736127853394, "logps/rejected": -1.019910216331482, "loss": 1.1138, "nll_loss": 1.0042662620544434, "rewards/accuracies": 0.625, "rewards/chosen": -0.0868973657488823, "rewards/margins": 0.015093659050762653, "rewards/rejected": -0.10199102759361267, "step": 513 }, { "epoch": 0.31355802958670126, "grad_norm": 3.29732608795166, "learning_rate": 7.525780771586037e-06, "log_odds_chosen": 0.505151093006134, "log_odds_ratio": -0.5660560131072998, "logits/chosen": -0.7606754899024963, "logits/rejected": -0.7672187685966492, "logps/chosen": -0.9446103572845459, "logps/rejected": -1.1422393321990967, "loss": 1.2565, "nll_loss": 1.0941927433013916, "rewards/accuracies": 0.5, "rewards/chosen": -0.09446103870868683, "rewards/margins": 0.019762888550758362, "rewards/rejected": -0.11422392725944519, "step": 514 }, { "epoch": 0.31416806466371816, "grad_norm": 2.2970330715179443, "learning_rate": 7.524800979791794e-06, "log_odds_chosen": 0.7377569079399109, "log_odds_ratio": -0.4434049725532532, "logits/chosen": -0.8463842272758484, "logits/rejected": -0.8630260229110718, "logps/chosen": -0.6500104665756226, "logps/rejected": -1.0763753652572632, "loss": 1.0502, "nll_loss": 1.006503701210022, "rewards/accuracies": 0.875, "rewards/chosen": -0.06500104814767838, "rewards/margins": 0.04263649135828018, "rewards/rejected": -0.10763754695653915, "step": 515 }, { "epoch": 0.3147780997407351, "grad_norm": 3.2675974369049072, "learning_rate": 7.5238211879975504e-06, "log_odds_chosen": 0.4653565287590027, "log_odds_ratio": -0.5507513880729675, "logits/chosen": -0.783069372177124, "logits/rejected": -0.7267308235168457, "logps/chosen": -0.7937196493148804, "logps/rejected": -1.0647273063659668, "loss": 1.3171, "nll_loss": 0.9908324480056763, "rewards/accuracies": 0.625, "rewards/chosen": -0.07937195897102356, "rewards/margins": 0.02710077352821827, "rewards/rejected": -0.10647273063659668, "step": 516 }, { "epoch": 0.315388134817752, "grad_norm": 1.297533631324768, "learning_rate": 7.5228413962033066e-06, "log_odds_chosen": 0.3275805115699768, "log_odds_ratio": -0.6418393850326538, "logits/chosen": -0.8680958151817322, "logits/rejected": -0.7439833879470825, "logps/chosen": -0.8670420050621033, "logps/rejected": -1.0462230443954468, "loss": 1.2954, "nll_loss": 1.1385201215744019, "rewards/accuracies": 0.5, "rewards/chosen": -0.08670420199632645, "rewards/margins": 0.01791810244321823, "rewards/rejected": -0.10462230443954468, "step": 517 }, { "epoch": 0.315998169894769, "grad_norm": 2.3403983116149902, "learning_rate": 7.521861604409063e-06, "log_odds_chosen": 0.42153748869895935, "log_odds_ratio": -0.6347939968109131, "logits/chosen": -0.6956172585487366, "logits/rejected": -0.6843965649604797, "logps/chosen": -0.7843080759048462, "logps/rejected": -1.0727627277374268, "loss": 1.2302, "nll_loss": 1.2335883378982544, "rewards/accuracies": 0.5, "rewards/chosen": -0.07843080908060074, "rewards/margins": 0.02884545922279358, "rewards/rejected": -0.10727627575397491, "step": 518 }, { "epoch": 0.3166082049717859, "grad_norm": 1.7594565153121948, "learning_rate": 7.520881812614819e-06, "log_odds_chosen": 0.26240551471710205, "log_odds_ratio": -0.6899847388267517, "logits/chosen": -0.9250906109809875, "logits/rejected": -0.6737095713615417, "logps/chosen": -1.050768256187439, "logps/rejected": -1.218514323234558, "loss": 1.326, "nll_loss": 1.1933250427246094, "rewards/accuracies": 0.625, "rewards/chosen": -0.10507683455944061, "rewards/margins": 0.016774598509073257, "rewards/rejected": -0.12185142934322357, "step": 519 }, { "epoch": 0.3172182400488028, "grad_norm": 2.409942865371704, "learning_rate": 7.519902020820575e-06, "log_odds_chosen": 0.6015084981918335, "log_odds_ratio": -0.5365585088729858, "logits/chosen": -0.7937760353088379, "logits/rejected": -0.7392306327819824, "logps/chosen": -0.8458819389343262, "logps/rejected": -1.2058910131454468, "loss": 1.2995, "nll_loss": 1.2261661291122437, "rewards/accuracies": 0.75, "rewards/chosen": -0.0845881998538971, "rewards/margins": 0.036000899970531464, "rewards/rejected": -0.12058910727500916, "step": 520 }, { "epoch": 0.31782827512581974, "grad_norm": 2.7779977321624756, "learning_rate": 7.518922229026332e-06, "log_odds_chosen": 0.5537900924682617, "log_odds_ratio": -0.5292412042617798, "logits/chosen": -1.0750513076782227, "logits/rejected": -0.9886177778244019, "logps/chosen": -1.109114646911621, "logps/rejected": -1.505446195602417, "loss": 1.2476, "nll_loss": 1.3278162479400635, "rewards/accuracies": 0.625, "rewards/chosen": -0.11091146618127823, "rewards/margins": 0.039633166044950485, "rewards/rejected": -0.15054462850093842, "step": 521 }, { "epoch": 0.31843831020283664, "grad_norm": 5.779080390930176, "learning_rate": 7.517942437232088e-06, "log_odds_chosen": 0.5068656802177429, "log_odds_ratio": -0.6060202717781067, "logits/chosen": -0.8158411979675293, "logits/rejected": -0.597423791885376, "logps/chosen": -1.0136799812316895, "logps/rejected": -1.3226337432861328, "loss": 1.155, "nll_loss": 1.3729981184005737, "rewards/accuracies": 0.625, "rewards/chosen": -0.1013680025935173, "rewards/margins": 0.030895370990037918, "rewards/rejected": -0.13226337730884552, "step": 522 }, { "epoch": 0.3190483452798536, "grad_norm": 2.200105667114258, "learning_rate": 7.516962645437844e-06, "log_odds_chosen": 0.9144119024276733, "log_odds_ratio": -0.40783578157424927, "logits/chosen": -0.806106448173523, "logits/rejected": -0.8021441698074341, "logps/chosen": -0.8529785871505737, "logps/rejected": -1.3889405727386475, "loss": 1.257, "nll_loss": 1.2821271419525146, "rewards/accuracies": 0.75, "rewards/chosen": -0.08529786020517349, "rewards/margins": 0.053596191108226776, "rewards/rejected": -0.13889405131340027, "step": 523 }, { "epoch": 0.3196583803568705, "grad_norm": 2.2536423206329346, "learning_rate": 7.5159828536436e-06, "log_odds_chosen": 0.8419265747070312, "log_odds_ratio": -0.4116814136505127, "logits/chosen": -0.7298800945281982, "logits/rejected": -0.6117702722549438, "logps/chosen": -0.8278441429138184, "logps/rejected": -1.2604409456253052, "loss": 1.1967, "nll_loss": 1.0697499513626099, "rewards/accuracies": 0.75, "rewards/chosen": -0.08278442174196243, "rewards/margins": 0.04325968027114868, "rewards/rejected": -0.12604409456253052, "step": 524 }, { "epoch": 0.32026841543388745, "grad_norm": 9.775550842285156, "learning_rate": 7.515003061849356e-06, "log_odds_chosen": -0.11883984506130219, "log_odds_ratio": -0.7992229461669922, "logits/chosen": -0.9979833960533142, "logits/rejected": -0.976888120174408, "logps/chosen": -1.4431850910186768, "logps/rejected": -1.35256028175354, "loss": 1.2254, "nll_loss": 1.5392026901245117, "rewards/accuracies": 0.625, "rewards/chosen": -0.14431850612163544, "rewards/margins": -0.009062472730875015, "rewards/rejected": -0.13525603711605072, "step": 525 }, { "epoch": 0.32087845051090436, "grad_norm": 4.813683032989502, "learning_rate": 7.514023270055113e-06, "log_odds_chosen": 0.5478627681732178, "log_odds_ratio": -0.5683342218399048, "logits/chosen": -0.8002806901931763, "logits/rejected": -0.7832433581352234, "logps/chosen": -0.7630584239959717, "logps/rejected": -1.0248565673828125, "loss": 1.2149, "nll_loss": 1.2248811721801758, "rewards/accuracies": 0.5, "rewards/chosen": -0.07630585134029388, "rewards/margins": 0.026179805397987366, "rewards/rejected": -0.10248564928770065, "step": 526 }, { "epoch": 0.3214884855879213, "grad_norm": 5.443321228027344, "learning_rate": 7.5130434782608695e-06, "log_odds_chosen": 0.6913321614265442, "log_odds_ratio": -0.46534156799316406, "logits/chosen": -0.897545576095581, "logits/rejected": -0.6913023591041565, "logps/chosen": -0.7149673700332642, "logps/rejected": -1.0671731233596802, "loss": 1.2917, "nll_loss": 0.9560548067092896, "rewards/accuracies": 0.75, "rewards/chosen": -0.07149673998355865, "rewards/margins": 0.03522057086229324, "rewards/rejected": -0.10671732574701309, "step": 527 }, { "epoch": 0.3220985206649382, "grad_norm": 2.715766429901123, "learning_rate": 7.512063686466626e-06, "log_odds_chosen": 0.23884814977645874, "log_odds_ratio": -0.6681796312332153, "logits/chosen": -1.0084257125854492, "logits/rejected": -0.8049767017364502, "logps/chosen": -1.3750877380371094, "logps/rejected": -1.556403398513794, "loss": 1.2828, "nll_loss": 1.5532102584838867, "rewards/accuracies": 0.5, "rewards/chosen": -0.13750877976417542, "rewards/margins": 0.0181315578520298, "rewards/rejected": -0.15564033389091492, "step": 528 }, { "epoch": 0.32270855574195517, "grad_norm": 2.186006546020508, "learning_rate": 7.511083894672382e-06, "log_odds_chosen": 0.279528945684433, "log_odds_ratio": -0.680081844329834, "logits/chosen": -1.0691618919372559, "logits/rejected": -0.8382617831230164, "logps/chosen": -1.1626256704330444, "logps/rejected": -1.3422901630401611, "loss": 1.2323, "nll_loss": 1.5066492557525635, "rewards/accuracies": 0.625, "rewards/chosen": -0.11626257002353668, "rewards/margins": 0.017966432496905327, "rewards/rejected": -0.13422900438308716, "step": 529 }, { "epoch": 0.3233185908189721, "grad_norm": 2.31215763092041, "learning_rate": 7.510104102878138e-06, "log_odds_chosen": 0.03770393133163452, "log_odds_ratio": -0.7373878359794617, "logits/chosen": -0.8311076760292053, "logits/rejected": -0.5597398281097412, "logps/chosen": -1.0301482677459717, "logps/rejected": -1.0090030431747437, "loss": 1.3918, "nll_loss": 1.2636610269546509, "rewards/accuracies": 0.375, "rewards/chosen": -0.10301482677459717, "rewards/margins": -0.0021145143546164036, "rewards/rejected": -0.1009003072977066, "step": 530 }, { "epoch": 0.32392862589598903, "grad_norm": 2.1417276859283447, "learning_rate": 7.509124311083894e-06, "log_odds_chosen": -0.0316208079457283, "log_odds_ratio": -0.7435659766197205, "logits/chosen": -0.8979780077934265, "logits/rejected": -0.8071035742759705, "logps/chosen": -1.0633968114852905, "logps/rejected": -1.0429942607879639, "loss": 1.24, "nll_loss": 1.2910892963409424, "rewards/accuracies": 0.625, "rewards/chosen": -0.10633968561887741, "rewards/margins": -0.002040260471403599, "rewards/rejected": -0.10429942607879639, "step": 531 }, { "epoch": 0.32453866097300593, "grad_norm": 1.2151495218276978, "learning_rate": 7.508144519289651e-06, "log_odds_chosen": 0.08704422414302826, "log_odds_ratio": -0.7212074398994446, "logits/chosen": -0.8646799325942993, "logits/rejected": -0.6525309681892395, "logps/chosen": -0.868733823299408, "logps/rejected": -0.8648262619972229, "loss": 1.1757, "nll_loss": 1.089589238166809, "rewards/accuracies": 0.5, "rewards/chosen": -0.0868733823299408, "rewards/margins": -0.0003907601349055767, "rewards/rejected": -0.08648262917995453, "step": 532 }, { "epoch": 0.3251486960500229, "grad_norm": 2.8444886207580566, "learning_rate": 7.507164727495407e-06, "log_odds_chosen": 0.5146658420562744, "log_odds_ratio": -0.6006065011024475, "logits/chosen": -0.9260507822036743, "logits/rejected": -0.6738665103912354, "logps/chosen": -0.969871997833252, "logps/rejected": -1.3290348052978516, "loss": 1.201, "nll_loss": 1.131309151649475, "rewards/accuracies": 0.5, "rewards/chosen": -0.09698719531297684, "rewards/margins": 0.0359162911772728, "rewards/rejected": -0.13290348649024963, "step": 533 }, { "epoch": 0.3257587311270398, "grad_norm": 2.8852474689483643, "learning_rate": 7.506184935701163e-06, "log_odds_chosen": 0.6955247521400452, "log_odds_ratio": -0.4592200815677643, "logits/chosen": -0.6088956594467163, "logits/rejected": -0.5608602166175842, "logps/chosen": -0.7307481169700623, "logps/rejected": -1.0851690769195557, "loss": 1.0801, "nll_loss": 0.8238611221313477, "rewards/accuracies": 0.875, "rewards/chosen": -0.0730748176574707, "rewards/margins": 0.03544209897518158, "rewards/rejected": -0.10851690918207169, "step": 534 }, { "epoch": 0.32636876620405675, "grad_norm": 2.174764633178711, "learning_rate": 7.505205143906919e-06, "log_odds_chosen": 0.8953385949134827, "log_odds_ratio": -0.4219960570335388, "logits/chosen": -0.6602382659912109, "logits/rejected": -0.592197835445404, "logps/chosen": -0.6233317852020264, "logps/rejected": -1.0642497539520264, "loss": 1.2125, "nll_loss": 0.9316391944885254, "rewards/accuracies": 0.875, "rewards/chosen": -0.06233317404985428, "rewards/margins": 0.04409180209040642, "rewards/rejected": -0.106424979865551, "step": 535 }, { "epoch": 0.32697880128107365, "grad_norm": 2.306215763092041, "learning_rate": 7.5042253521126754e-06, "log_odds_chosen": 0.03633973002433777, "log_odds_ratio": -0.738911509513855, "logits/chosen": -0.933951735496521, "logits/rejected": -0.671603798866272, "logps/chosen": -1.2008450031280518, "logps/rejected": -1.2693160772323608, "loss": 1.3083, "nll_loss": 1.2778666019439697, "rewards/accuracies": 0.5, "rewards/chosen": -0.12008450925350189, "rewards/margins": 0.006847103592008352, "rewards/rejected": -0.12693160772323608, "step": 536 }, { "epoch": 0.3275888363580906, "grad_norm": 1.7701464891433716, "learning_rate": 7.5032455603184316e-06, "log_odds_chosen": 0.3555559813976288, "log_odds_ratio": -0.5923171043395996, "logits/chosen": -0.6671931743621826, "logits/rejected": -0.6815792322158813, "logps/chosen": -0.7252172231674194, "logps/rejected": -0.8895531892776489, "loss": 1.2318, "nll_loss": 0.927949070930481, "rewards/accuracies": 0.75, "rewards/chosen": -0.07252173125743866, "rewards/margins": 0.01643359288573265, "rewards/rejected": -0.08895532041788101, "step": 537 }, { "epoch": 0.3281988714351075, "grad_norm": 3.7529473304748535, "learning_rate": 7.5022657685241885e-06, "log_odds_chosen": 0.2826739251613617, "log_odds_ratio": -0.6455239057540894, "logits/chosen": -0.8759108185768127, "logits/rejected": -0.620827317237854, "logps/chosen": -0.9409904479980469, "logps/rejected": -1.0109859704971313, "loss": 1.1621, "nll_loss": 1.1364693641662598, "rewards/accuracies": 0.5, "rewards/chosen": -0.09409904479980469, "rewards/margins": 0.006999547593295574, "rewards/rejected": -0.10109860450029373, "step": 538 }, { "epoch": 0.32880890651212447, "grad_norm": 2.1481800079345703, "learning_rate": 7.501285976729945e-06, "log_odds_chosen": 0.31132692098617554, "log_odds_ratio": -0.6052838563919067, "logits/chosen": -0.7224023938179016, "logits/rejected": -0.4958651065826416, "logps/chosen": -0.7727032899856567, "logps/rejected": -0.9190347790718079, "loss": 1.1951, "nll_loss": 1.0466337203979492, "rewards/accuracies": 0.625, "rewards/chosen": -0.07727032899856567, "rewards/margins": 0.014633143320679665, "rewards/rejected": -0.09190347045660019, "step": 539 }, { "epoch": 0.32941894158914137, "grad_norm": 1.8133164644241333, "learning_rate": 7.500306184935702e-06, "log_odds_chosen": 0.7959858179092407, "log_odds_ratio": -0.5043114423751831, "logits/chosen": -0.7262102365493774, "logits/rejected": -0.597271203994751, "logps/chosen": -0.8011658191680908, "logps/rejected": -1.3217519521713257, "loss": 1.1695, "nll_loss": 1.0087774991989136, "rewards/accuracies": 0.75, "rewards/chosen": -0.08011657744646072, "rewards/margins": 0.052058618515729904, "rewards/rejected": -0.13217519223690033, "step": 540 }, { "epoch": 0.3300289766661583, "grad_norm": 2.1710190773010254, "learning_rate": 7.499326393141457e-06, "log_odds_chosen": 0.4735857844352722, "log_odds_ratio": -0.5736089944839478, "logits/chosen": -0.6018572449684143, "logits/rejected": -0.48362618684768677, "logps/chosen": -0.7162358164787292, "logps/rejected": -0.9406477808952332, "loss": 1.025, "nll_loss": 0.9105579257011414, "rewards/accuracies": 0.75, "rewards/chosen": -0.07162357866764069, "rewards/margins": 0.022441204637289047, "rewards/rejected": -0.09406478703022003, "step": 541 }, { "epoch": 0.3306390117431752, "grad_norm": 1.5891348123550415, "learning_rate": 7.498346601347213e-06, "log_odds_chosen": -0.031228363513946533, "log_odds_ratio": -0.7743247151374817, "logits/chosen": -0.891741931438446, "logits/rejected": -0.7832950353622437, "logps/chosen": -0.8952641487121582, "logps/rejected": -0.8667294979095459, "loss": 1.1755, "nll_loss": 1.1278984546661377, "rewards/accuracies": 0.375, "rewards/chosen": -0.08952642232179642, "rewards/margins": -0.0028534643352031708, "rewards/rejected": -0.08667295426130295, "step": 542 }, { "epoch": 0.3312490468201922, "grad_norm": 2.276210308074951, "learning_rate": 7.49736680955297e-06, "log_odds_chosen": 0.4057652950286865, "log_odds_ratio": -0.5434128046035767, "logits/chosen": -0.7889856100082397, "logits/rejected": -0.3489120900630951, "logps/chosen": -1.0651601552963257, "logps/rejected": -1.3457930088043213, "loss": 1.1008, "nll_loss": 1.125175952911377, "rewards/accuracies": 0.75, "rewards/chosen": -0.10651601850986481, "rewards/margins": 0.028063297271728516, "rewards/rejected": -0.13457930088043213, "step": 543 }, { "epoch": 0.3318590818972091, "grad_norm": 2.3547348976135254, "learning_rate": 7.496387017758726e-06, "log_odds_chosen": -0.03352770954370499, "log_odds_ratio": -0.7970402240753174, "logits/chosen": -1.0183485746383667, "logits/rejected": -0.8335429430007935, "logps/chosen": -1.0595388412475586, "logps/rejected": -1.0414681434631348, "loss": 1.2807, "nll_loss": 1.3969849348068237, "rewards/accuracies": 0.25, "rewards/chosen": -0.1059538796544075, "rewards/margins": -0.0018070610240101814, "rewards/rejected": -0.1041468158364296, "step": 544 }, { "epoch": 0.33246911697422604, "grad_norm": 3.8464930057525635, "learning_rate": 7.495407225964482e-06, "log_odds_chosen": 0.04858130216598511, "log_odds_ratio": -0.8289941549301147, "logits/chosen": -0.8431075215339661, "logits/rejected": -0.7677030563354492, "logps/chosen": -1.2231943607330322, "logps/rejected": -1.2115478515625, "loss": 1.3386, "nll_loss": 1.4753470420837402, "rewards/accuracies": 0.5, "rewards/chosen": -0.12231943756341934, "rewards/margins": -0.0011646589264273643, "rewards/rejected": -0.1211547777056694, "step": 545 }, { "epoch": 0.33307915205124294, "grad_norm": 3.5543935298919678, "learning_rate": 7.494427434170239e-06, "log_odds_chosen": 0.6042671203613281, "log_odds_ratio": -0.45875632762908936, "logits/chosen": -0.9212436676025391, "logits/rejected": -0.7182016372680664, "logps/chosen": -0.9721032381057739, "logps/rejected": -1.4009027481079102, "loss": 1.102, "nll_loss": 1.170722484588623, "rewards/accuracies": 0.875, "rewards/chosen": -0.09721032530069351, "rewards/margins": 0.04287995770573616, "rewards/rejected": -0.14009028673171997, "step": 546 }, { "epoch": 0.3336891871282599, "grad_norm": 1.509411096572876, "learning_rate": 7.4934476423759945e-06, "log_odds_chosen": 0.5111051201820374, "log_odds_ratio": -0.5126672983169556, "logits/chosen": -0.9407274127006531, "logits/rejected": -0.8815658092498779, "logps/chosen": -0.827083945274353, "logps/rejected": -1.1309770345687866, "loss": 1.2561, "nll_loss": 1.2728753089904785, "rewards/accuracies": 0.875, "rewards/chosen": -0.08270839601755142, "rewards/margins": 0.03038931079208851, "rewards/rejected": -0.11309770494699478, "step": 547 }, { "epoch": 0.3342992222052768, "grad_norm": 2.3675663471221924, "learning_rate": 7.492467850581751e-06, "log_odds_chosen": 0.28459662199020386, "log_odds_ratio": -0.7446752786636353, "logits/chosen": -1.103224277496338, "logits/rejected": -0.792174220085144, "logps/chosen": -0.9772040843963623, "logps/rejected": -1.099323034286499, "loss": 1.2485, "nll_loss": 1.2897217273712158, "rewards/accuracies": 0.25, "rewards/chosen": -0.09772041440010071, "rewards/margins": 0.012211905792355537, "rewards/rejected": -0.1099323034286499, "step": 548 }, { "epoch": 0.3349092572822937, "grad_norm": 3.176452398300171, "learning_rate": 7.4914880587875076e-06, "log_odds_chosen": 0.8090983629226685, "log_odds_ratio": -0.4399205148220062, "logits/chosen": -0.6802666783332825, "logits/rejected": -0.6973263621330261, "logps/chosen": -0.6568824052810669, "logps/rejected": -1.1187936067581177, "loss": 1.1782, "nll_loss": 0.8314415216445923, "rewards/accuracies": 0.875, "rewards/chosen": -0.06568823754787445, "rewards/margins": 0.046191126108169556, "rewards/rejected": -0.111879363656044, "step": 549 }, { "epoch": 0.33551929235931066, "grad_norm": 1.6156067848205566, "learning_rate": 7.490508266993264e-06, "log_odds_chosen": 0.4622482657432556, "log_odds_ratio": -0.5949786305427551, "logits/chosen": -0.9054441452026367, "logits/rejected": -0.8222690224647522, "logps/chosen": -0.9096996784210205, "logps/rejected": -1.087550163269043, "loss": 1.1558, "nll_loss": 1.2767763137817383, "rewards/accuracies": 0.625, "rewards/chosen": -0.09096997231245041, "rewards/margins": 0.017785049974918365, "rewards/rejected": -0.10875502228736877, "step": 550 }, { "epoch": 0.33612932743632756, "grad_norm": 2.173185348510742, "learning_rate": 7.48952847519902e-06, "log_odds_chosen": 0.812583327293396, "log_odds_ratio": -0.5145860314369202, "logits/chosen": -0.7069043517112732, "logits/rejected": -0.7020682692527771, "logps/chosen": -0.9104750156402588, "logps/rejected": -1.5139238834381104, "loss": 1.2901, "nll_loss": 1.084916114807129, "rewards/accuracies": 0.625, "rewards/chosen": -0.0910475105047226, "rewards/margins": 0.0603448748588562, "rewards/rejected": -0.15139240026474, "step": 551 }, { "epoch": 0.3367393625133445, "grad_norm": 1.620370864868164, "learning_rate": 7.488548683404776e-06, "log_odds_chosen": -0.09337018430233002, "log_odds_ratio": -0.8612682819366455, "logits/chosen": -1.072957158088684, "logits/rejected": -0.7247740626335144, "logps/chosen": -1.194848895072937, "logps/rejected": -1.083373785018921, "loss": 1.3582, "nll_loss": 1.3688666820526123, "rewards/accuracies": 0.25, "rewards/chosen": -0.11948488652706146, "rewards/margins": -0.011147500947117805, "rewards/rejected": -0.1083373874425888, "step": 552 }, { "epoch": 0.3373493975903614, "grad_norm": 1.4183948040008545, "learning_rate": 7.487568891610532e-06, "log_odds_chosen": 0.4734949469566345, "log_odds_ratio": -0.5601305961608887, "logits/chosen": -0.9407700300216675, "logits/rejected": -0.8636681437492371, "logps/chosen": -0.8227407932281494, "logps/rejected": -1.0825642347335815, "loss": 1.1912, "nll_loss": 1.2521767616271973, "rewards/accuracies": 0.75, "rewards/chosen": -0.08227407932281494, "rewards/margins": 0.02598235011100769, "rewards/rejected": -0.10825642943382263, "step": 553 }, { "epoch": 0.3379594326673784, "grad_norm": 8.483487129211426, "learning_rate": 7.486589099816289e-06, "log_odds_chosen": 0.6895835995674133, "log_odds_ratio": -0.4494880437850952, "logits/chosen": -0.8563649654388428, "logits/rejected": -0.5301767587661743, "logps/chosen": -0.7745382785797119, "logps/rejected": -1.2252947092056274, "loss": 1.0944, "nll_loss": 1.031543493270874, "rewards/accuracies": 0.875, "rewards/chosen": -0.07745382189750671, "rewards/margins": 0.04507565498352051, "rewards/rejected": -0.12252948433160782, "step": 554 }, { "epoch": 0.3385694677443953, "grad_norm": 1.846039056777954, "learning_rate": 7.485609308022045e-06, "log_odds_chosen": 0.4632478356361389, "log_odds_ratio": -0.5855181217193604, "logits/chosen": -0.7401593327522278, "logits/rejected": -0.5392407178878784, "logps/chosen": -1.040003776550293, "logps/rejected": -1.3902690410614014, "loss": 1.1564, "nll_loss": 1.2651183605194092, "rewards/accuracies": 0.75, "rewards/chosen": -0.10400037467479706, "rewards/margins": 0.035026539117097855, "rewards/rejected": -0.13902691006660461, "step": 555 }, { "epoch": 0.33917950282141224, "grad_norm": 2.0112316608428955, "learning_rate": 7.484629516227801e-06, "log_odds_chosen": 0.028547845780849457, "log_odds_ratio": -0.7235783934593201, "logits/chosen": -0.918293297290802, "logits/rejected": -0.3760317265987396, "logps/chosen": -1.3894898891448975, "logps/rejected": -1.4272375106811523, "loss": 1.2473, "nll_loss": 1.3628206253051758, "rewards/accuracies": 0.625, "rewards/chosen": -0.13894899189472198, "rewards/margins": 0.00377477565780282, "rewards/rejected": -0.14272376894950867, "step": 556 }, { "epoch": 0.33978953789842914, "grad_norm": 2.132272958755493, "learning_rate": 7.483649724433558e-06, "log_odds_chosen": 0.2659241855144501, "log_odds_ratio": -0.6952410936355591, "logits/chosen": -0.7822130918502808, "logits/rejected": -0.7994852662086487, "logps/chosen": -0.87055903673172, "logps/rejected": -1.021316647529602, "loss": 1.3882, "nll_loss": 1.3493753671646118, "rewards/accuracies": 0.5, "rewards/chosen": -0.08705590665340424, "rewards/margins": 0.015075767412781715, "rewards/rejected": -0.1021316722035408, "step": 557 }, { "epoch": 0.3403995729754461, "grad_norm": 2.1799590587615967, "learning_rate": 7.4826699326393135e-06, "log_odds_chosen": 0.9240046143531799, "log_odds_ratio": -0.4174935221672058, "logits/chosen": -1.010056734085083, "logits/rejected": -0.45319482684135437, "logps/chosen": -0.9094511270523071, "logps/rejected": -1.5369691848754883, "loss": 1.1929, "nll_loss": 1.143677830696106, "rewards/accuracies": 0.875, "rewards/chosen": -0.09094510972499847, "rewards/margins": 0.06275182217359543, "rewards/rejected": -0.1536969244480133, "step": 558 }, { "epoch": 0.341009608052463, "grad_norm": 3.0145106315612793, "learning_rate": 7.48169014084507e-06, "log_odds_chosen": 0.1596866250038147, "log_odds_ratio": -0.6964091658592224, "logits/chosen": -0.758838415145874, "logits/rejected": -0.6091774702072144, "logps/chosen": -0.8988413214683533, "logps/rejected": -0.9628239274024963, "loss": 1.0531, "nll_loss": 1.2224918603897095, "rewards/accuracies": 0.625, "rewards/chosen": -0.08988413214683533, "rewards/margins": 0.006398269906640053, "rewards/rejected": -0.09628240019083023, "step": 559 }, { "epoch": 0.34161964312947996, "grad_norm": 1.4551010131835938, "learning_rate": 7.480710349050827e-06, "log_odds_chosen": 0.7280569076538086, "log_odds_ratio": -0.4982643723487854, "logits/chosen": -0.41580930352211, "logits/rejected": -0.45028650760650635, "logps/chosen": -0.5927192568778992, "logps/rejected": -1.0017454624176025, "loss": 0.9706, "nll_loss": 0.7404015064239502, "rewards/accuracies": 0.625, "rewards/chosen": -0.059271927922964096, "rewards/margins": 0.04090261831879616, "rewards/rejected": -0.10017454624176025, "step": 560 }, { "epoch": 0.34222967820649686, "grad_norm": 2.227304458618164, "learning_rate": 7.479730557256583e-06, "log_odds_chosen": 0.19358931481838226, "log_odds_ratio": -0.6625943183898926, "logits/chosen": -0.7863336801528931, "logits/rejected": -0.8524559736251831, "logps/chosen": -0.8146666288375854, "logps/rejected": -0.8970917463302612, "loss": 1.2499, "nll_loss": 0.9173654317855835, "rewards/accuracies": 0.5, "rewards/chosen": -0.0814666599035263, "rewards/margins": 0.008242513984441757, "rewards/rejected": -0.08970917761325836, "step": 561 }, { "epoch": 0.3428397132835138, "grad_norm": 3.7869346141815186, "learning_rate": 7.478750765462339e-06, "log_odds_chosen": 0.2560863196849823, "log_odds_ratio": -0.6256566047668457, "logits/chosen": -0.8715987801551819, "logits/rejected": -0.8737273216247559, "logps/chosen": -0.887238621711731, "logps/rejected": -1.0317898988723755, "loss": 1.1061, "nll_loss": 1.0443229675292969, "rewards/accuracies": 0.625, "rewards/chosen": -0.08872386813163757, "rewards/margins": 0.014455122873187065, "rewards/rejected": -0.10317898541688919, "step": 562 }, { "epoch": 0.3434497483605307, "grad_norm": 3.516345500946045, "learning_rate": 7.477770973668096e-06, "log_odds_chosen": 1.4009132385253906, "log_odds_ratio": -0.3020590841770172, "logits/chosen": -0.38772743940353394, "logits/rejected": -0.1802389770746231, "logps/chosen": -0.4711030125617981, "logps/rejected": -1.2012794017791748, "loss": 1.1835, "nll_loss": 0.8514180779457092, "rewards/accuracies": 0.875, "rewards/chosen": -0.04711030423641205, "rewards/margins": 0.07301764190196991, "rewards/rejected": -0.12012794613838196, "step": 563 }, { "epoch": 0.3440597834375477, "grad_norm": 1.5675415992736816, "learning_rate": 7.476791181873851e-06, "log_odds_chosen": 0.34915104508399963, "log_odds_ratio": -0.5885416269302368, "logits/chosen": -0.9226288795471191, "logits/rejected": -0.9161977767944336, "logps/chosen": -0.8613839149475098, "logps/rejected": -1.0814064741134644, "loss": 1.1779, "nll_loss": 1.064487099647522, "rewards/accuracies": 0.625, "rewards/chosen": -0.08613838255405426, "rewards/margins": 0.022002264857292175, "rewards/rejected": -0.10814065486192703, "step": 564 }, { "epoch": 0.3446698185145646, "grad_norm": 1.6643054485321045, "learning_rate": 7.475811390079607e-06, "log_odds_chosen": 0.11997469514608383, "log_odds_ratio": -0.6522902250289917, "logits/chosen": -0.7519733309745789, "logits/rejected": -0.6780299544334412, "logps/chosen": -0.7516345977783203, "logps/rejected": -0.8231319189071655, "loss": 1.1381, "nll_loss": 0.8775073289871216, "rewards/accuracies": 0.625, "rewards/chosen": -0.07516346871852875, "rewards/margins": 0.007149728946387768, "rewards/rejected": -0.08231319487094879, "step": 565 }, { "epoch": 0.34527985359158153, "grad_norm": 2.358529567718506, "learning_rate": 7.474831598285364e-06, "log_odds_chosen": 0.3809076249599457, "log_odds_ratio": -0.5725009441375732, "logits/chosen": -0.9838718175888062, "logits/rejected": -0.8047285079956055, "logps/chosen": -0.8381791710853577, "logps/rejected": -1.0987433195114136, "loss": 1.2252, "nll_loss": 1.2179896831512451, "rewards/accuracies": 0.625, "rewards/chosen": -0.08381792902946472, "rewards/margins": 0.026056412607431412, "rewards/rejected": -0.10987433791160583, "step": 566 }, { "epoch": 0.34588988866859843, "grad_norm": 1.8600764274597168, "learning_rate": 7.47385180649112e-06, "log_odds_chosen": 0.5564144253730774, "log_odds_ratio": -0.480718731880188, "logits/chosen": -0.9337607026100159, "logits/rejected": -0.5407679080963135, "logps/chosen": -0.7530437707901001, "logps/rejected": -1.0771740674972534, "loss": 1.1024, "nll_loss": 0.8855629563331604, "rewards/accuracies": 0.75, "rewards/chosen": -0.07530438154935837, "rewards/margins": 0.032413024455308914, "rewards/rejected": -0.10771740227937698, "step": 567 }, { "epoch": 0.3464999237456154, "grad_norm": 2.3645284175872803, "learning_rate": 7.472872014696877e-06, "log_odds_chosen": 0.35300037264823914, "log_odds_ratio": -0.569011926651001, "logits/chosen": -0.7228257060050964, "logits/rejected": -0.7761535048484802, "logps/chosen": -0.9739802479743958, "logps/rejected": -1.209738850593567, "loss": 1.2383, "nll_loss": 1.2566965818405151, "rewards/accuracies": 0.875, "rewards/chosen": -0.09739802777767181, "rewards/margins": 0.023575864732265472, "rewards/rejected": -0.12097388505935669, "step": 568 }, { "epoch": 0.3471099588226323, "grad_norm": 1.6616765260696411, "learning_rate": 7.4718922229026325e-06, "log_odds_chosen": 0.4096013307571411, "log_odds_ratio": -0.6794477701187134, "logits/chosen": -0.7943382263183594, "logits/rejected": -0.7333483695983887, "logps/chosen": -0.9658517837524414, "logps/rejected": -1.1754777431488037, "loss": 1.1572, "nll_loss": 1.1605650186538696, "rewards/accuracies": 0.5, "rewards/chosen": -0.09658518433570862, "rewards/margins": 0.02096259593963623, "rewards/rejected": -0.11754778027534485, "step": 569 }, { "epoch": 0.34771999389964925, "grad_norm": 1.5955160856246948, "learning_rate": 7.470912431108389e-06, "log_odds_chosen": 0.3602481484413147, "log_odds_ratio": -0.5876190662384033, "logits/chosen": -1.0605695247650146, "logits/rejected": -0.9247593879699707, "logps/chosen": -0.8378534317016602, "logps/rejected": -1.0312376022338867, "loss": 1.1701, "nll_loss": 1.3016793727874756, "rewards/accuracies": 0.5, "rewards/chosen": -0.08378534764051437, "rewards/margins": 0.019338417798280716, "rewards/rejected": -0.10312376171350479, "step": 570 }, { "epoch": 0.34833002897666615, "grad_norm": 3.1411380767822266, "learning_rate": 7.469932639314146e-06, "log_odds_chosen": 0.40823790431022644, "log_odds_ratio": -0.5888459086418152, "logits/chosen": -0.8530519008636475, "logits/rejected": -0.8101698160171509, "logps/chosen": -0.8687830567359924, "logps/rejected": -1.0763617753982544, "loss": 1.2218, "nll_loss": 1.1984931230545044, "rewards/accuracies": 0.875, "rewards/chosen": -0.08687830716371536, "rewards/margins": 0.020757878199219704, "rewards/rejected": -0.10763618350028992, "step": 571 }, { "epoch": 0.3489400640536831, "grad_norm": 1.4369792938232422, "learning_rate": 7.468952847519902e-06, "log_odds_chosen": 0.7026334404945374, "log_odds_ratio": -0.5721765756607056, "logits/chosen": -0.7691370248794556, "logits/rejected": -0.6697442531585693, "logps/chosen": -0.9012521505355835, "logps/rejected": -1.3902987241744995, "loss": 1.2218, "nll_loss": 1.1132704019546509, "rewards/accuracies": 0.625, "rewards/chosen": -0.09012521803379059, "rewards/margins": 0.0489046573638916, "rewards/rejected": -0.139029860496521, "step": 572 }, { "epoch": 0.3495500991307, "grad_norm": 1.7780025005340576, "learning_rate": 7.467973055725658e-06, "log_odds_chosen": 0.5949538350105286, "log_odds_ratio": -0.49805858731269836, "logits/chosen": -0.7134580016136169, "logits/rejected": -0.4492272138595581, "logps/chosen": -0.8980543613433838, "logps/rejected": -1.2372982501983643, "loss": 1.1875, "nll_loss": 1.2070497274398804, "rewards/accuracies": 0.875, "rewards/chosen": -0.08980543911457062, "rewards/margins": 0.03392438590526581, "rewards/rejected": -0.12372982501983643, "step": 573 }, { "epoch": 0.35016013420771697, "grad_norm": 1.9389218091964722, "learning_rate": 7.466993263931415e-06, "log_odds_chosen": 0.6935715675354004, "log_odds_ratio": -0.46273601055145264, "logits/chosen": -0.7179636359214783, "logits/rejected": -0.5493271350860596, "logps/chosen": -0.8571490049362183, "logps/rejected": -1.2802093029022217, "loss": 1.175, "nll_loss": 1.2987141609191895, "rewards/accuracies": 0.75, "rewards/chosen": -0.0857149064540863, "rewards/margins": 0.04230603203177452, "rewards/rejected": -0.12802092730998993, "step": 574 }, { "epoch": 0.35077016928473387, "grad_norm": 4.417479038238525, "learning_rate": 7.46601347213717e-06, "log_odds_chosen": 0.35935473442077637, "log_odds_ratio": -0.5895339846611023, "logits/chosen": -0.9195914268493652, "logits/rejected": -0.7975256443023682, "logps/chosen": -0.8364778757095337, "logps/rejected": -1.0038515329360962, "loss": 1.1323, "nll_loss": 1.011515736579895, "rewards/accuracies": 0.625, "rewards/chosen": -0.08364779502153397, "rewards/margins": 0.016737360507249832, "rewards/rejected": -0.1003851592540741, "step": 575 }, { "epoch": 0.3513802043617508, "grad_norm": 2.3633549213409424, "learning_rate": 7.465033680342926e-06, "log_odds_chosen": 0.37969064712524414, "log_odds_ratio": -0.5707975625991821, "logits/chosen": -0.9496698975563049, "logits/rejected": -0.7541441917419434, "logps/chosen": -1.0899479389190674, "logps/rejected": -1.3494516611099243, "loss": 1.1289, "nll_loss": 1.3290642499923706, "rewards/accuracies": 0.625, "rewards/chosen": -0.10899478197097778, "rewards/margins": 0.02595037966966629, "rewards/rejected": -0.13494516909122467, "step": 576 }, { "epoch": 0.3519902394387677, "grad_norm": 1.6723718643188477, "learning_rate": 7.464053888548683e-06, "log_odds_chosen": 0.3930833041667938, "log_odds_ratio": -0.5972433686256409, "logits/chosen": -0.895727276802063, "logits/rejected": -0.5534330606460571, "logps/chosen": -0.88779217004776, "logps/rejected": -1.0900176763534546, "loss": 1.0907, "nll_loss": 1.0221143960952759, "rewards/accuracies": 0.75, "rewards/chosen": -0.08877921849489212, "rewards/margins": 0.02022254467010498, "rewards/rejected": -0.1090017631649971, "step": 577 }, { "epoch": 0.3526002745157847, "grad_norm": 1.5673047304153442, "learning_rate": 7.463074096754439e-06, "log_odds_chosen": 0.09873712062835693, "log_odds_ratio": -0.7257397174835205, "logits/chosen": -0.5943139791488647, "logits/rejected": -0.7216383218765259, "logps/chosen": -0.8567925095558167, "logps/rejected": -0.8383630514144897, "loss": 1.0705, "nll_loss": 0.8819749355316162, "rewards/accuracies": 0.5, "rewards/chosen": -0.08567924797534943, "rewards/margins": -0.0018429411575198174, "rewards/rejected": -0.08383631706237793, "step": 578 }, { "epoch": 0.3532103095928016, "grad_norm": 1.800527572631836, "learning_rate": 7.4620943049601955e-06, "log_odds_chosen": 0.20890484750270844, "log_odds_ratio": -0.6738218665122986, "logits/chosen": -0.5131166577339172, "logits/rejected": -0.6597679257392883, "logps/chosen": -0.9443797469139099, "logps/rejected": -1.0831506252288818, "loss": 1.133, "nll_loss": 1.0617057085037231, "rewards/accuracies": 0.5, "rewards/chosen": -0.09443798661231995, "rewards/margins": 0.01387707982212305, "rewards/rejected": -0.10831506550312042, "step": 579 }, { "epoch": 0.3538203446698185, "grad_norm": 1.8303558826446533, "learning_rate": 7.4611145131659524e-06, "log_odds_chosen": 1.288333773612976, "log_odds_ratio": -0.2904435396194458, "logits/chosen": -0.5461866855621338, "logits/rejected": -0.4265415668487549, "logps/chosen": -0.5152004361152649, "logps/rejected": -1.1341233253479004, "loss": 1.1139, "nll_loss": 0.8101640939712524, "rewards/accuracies": 1.0, "rewards/chosen": -0.05152004584670067, "rewards/margins": 0.06189229339361191, "rewards/rejected": -0.11341233551502228, "step": 580 }, { "epoch": 0.35443037974683544, "grad_norm": 2.3132808208465576, "learning_rate": 7.460134721371708e-06, "log_odds_chosen": 0.6281065344810486, "log_odds_ratio": -0.4817311763763428, "logits/chosen": -0.9505938291549683, "logits/rejected": -0.6981932520866394, "logps/chosen": -0.9767268896102905, "logps/rejected": -1.3514355421066284, "loss": 1.4306, "nll_loss": 1.2704238891601562, "rewards/accuracies": 0.875, "rewards/chosen": -0.09767268598079681, "rewards/margins": 0.037470873445272446, "rewards/rejected": -0.13514354825019836, "step": 581 }, { "epoch": 0.35504041482385235, "grad_norm": 1.3495320081710815, "learning_rate": 7.459154929577465e-06, "log_odds_chosen": 0.2824276387691498, "log_odds_ratio": -0.6033127307891846, "logits/chosen": -0.9597598314285278, "logits/rejected": -0.8359571695327759, "logps/chosen": -0.8111422061920166, "logps/rejected": -0.978684663772583, "loss": 0.9831, "nll_loss": 0.9797089099884033, "rewards/accuracies": 0.625, "rewards/chosen": -0.08111421763896942, "rewards/margins": 0.01675424538552761, "rewards/rejected": -0.09786847233772278, "step": 582 }, { "epoch": 0.3556504499008693, "grad_norm": 1.8400684595108032, "learning_rate": 7.458175137783221e-06, "log_odds_chosen": 0.34931695461273193, "log_odds_ratio": -0.638886570930481, "logits/chosen": -0.7396467328071594, "logits/rejected": -0.4212626516819, "logps/chosen": -0.8939213752746582, "logps/rejected": -1.1743412017822266, "loss": 1.1816, "nll_loss": 1.0139086246490479, "rewards/accuracies": 0.5, "rewards/chosen": -0.08939214050769806, "rewards/margins": 0.028041979297995567, "rewards/rejected": -0.11743411421775818, "step": 583 }, { "epoch": 0.3562604849778862, "grad_norm": 2.465468645095825, "learning_rate": 7.457195345988977e-06, "log_odds_chosen": 0.7844269275665283, "log_odds_ratio": -0.5155989527702332, "logits/chosen": -0.7788552045822144, "logits/rejected": -0.7007317543029785, "logps/chosen": -0.9541213512420654, "logps/rejected": -1.4387109279632568, "loss": 1.0935, "nll_loss": 1.0568184852600098, "rewards/accuracies": 0.75, "rewards/chosen": -0.09541214257478714, "rewards/margins": 0.04845895245671272, "rewards/rejected": -0.14387108385562897, "step": 584 }, { "epoch": 0.35687052005490316, "grad_norm": 1.4503076076507568, "learning_rate": 7.456215554194734e-06, "log_odds_chosen": 0.5616871118545532, "log_odds_ratio": -0.5413557887077332, "logits/chosen": -0.883612334728241, "logits/rejected": -0.800460159778595, "logps/chosen": -0.8764939308166504, "logps/rejected": -1.2204655408859253, "loss": 1.243, "nll_loss": 1.3151756525039673, "rewards/accuracies": 0.625, "rewards/chosen": -0.0876493901014328, "rewards/margins": 0.03439716249704361, "rewards/rejected": -0.12204655259847641, "step": 585 }, { "epoch": 0.35748055513192006, "grad_norm": 2.1786811351776123, "learning_rate": 7.455235762400489e-06, "log_odds_chosen": 0.33571940660476685, "log_odds_ratio": -0.6005175113677979, "logits/chosen": -0.570440948009491, "logits/rejected": -0.6535412669181824, "logps/chosen": -0.9914689064025879, "logps/rejected": -1.2191541194915771, "loss": 1.2281, "nll_loss": 1.227748155593872, "rewards/accuracies": 0.625, "rewards/chosen": -0.09914688766002655, "rewards/margins": 0.022768519818782806, "rewards/rejected": -0.12191541492938995, "step": 586 }, { "epoch": 0.358090590208937, "grad_norm": 2.694249391555786, "learning_rate": 7.454255970606245e-06, "log_odds_chosen": 0.287312388420105, "log_odds_ratio": -0.6275303363800049, "logits/chosen": -0.8291124105453491, "logits/rejected": -0.6475328803062439, "logps/chosen": -0.7980310916900635, "logps/rejected": -0.9741908311843872, "loss": 1.2383, "nll_loss": 1.133311152458191, "rewards/accuracies": 0.75, "rewards/chosen": -0.07980310916900635, "rewards/margins": 0.01761598140001297, "rewards/rejected": -0.09741908311843872, "step": 587 }, { "epoch": 0.3587006252859539, "grad_norm": 2.066239833831787, "learning_rate": 7.453276178812002e-06, "log_odds_chosen": 0.8167078495025635, "log_odds_ratio": -0.4536983370780945, "logits/chosen": -0.8882420063018799, "logits/rejected": -0.8333288431167603, "logps/chosen": -0.7364187836647034, "logps/rejected": -1.2215720415115356, "loss": 1.1047, "nll_loss": 0.9508802890777588, "rewards/accuracies": 0.75, "rewards/chosen": -0.07364188134670258, "rewards/margins": 0.04851532727479935, "rewards/rejected": -0.12215720117092133, "step": 588 }, { "epoch": 0.3593106603629709, "grad_norm": 5.874992370605469, "learning_rate": 7.452296387017758e-06, "log_odds_chosen": 0.704352855682373, "log_odds_ratio": -0.47689223289489746, "logits/chosen": -0.5703623294830322, "logits/rejected": -0.5181507468223572, "logps/chosen": -0.8117230534553528, "logps/rejected": -1.1973737478256226, "loss": 1.2211, "nll_loss": 1.1219854354858398, "rewards/accuracies": 0.625, "rewards/chosen": -0.08117230236530304, "rewards/margins": 0.03856506198644638, "rewards/rejected": -0.11973737180233002, "step": 589 }, { "epoch": 0.3599206954399878, "grad_norm": 1.8126497268676758, "learning_rate": 7.4513165952235145e-06, "log_odds_chosen": 1.1122039556503296, "log_odds_ratio": -0.37703603506088257, "logits/chosen": -0.6912447214126587, "logits/rejected": -0.608352780342102, "logps/chosen": -0.6661853790283203, "logps/rejected": -1.246713399887085, "loss": 1.1738, "nll_loss": 1.09083890914917, "rewards/accuracies": 0.75, "rewards/chosen": -0.06661853939294815, "rewards/margins": 0.05805279314517975, "rewards/rejected": -0.1246713250875473, "step": 590 }, { "epoch": 0.36053073051700474, "grad_norm": 1.9827518463134766, "learning_rate": 7.4503368034292715e-06, "log_odds_chosen": 0.5963858366012573, "log_odds_ratio": -0.49397653341293335, "logits/chosen": -0.677963376045227, "logits/rejected": -0.43392547965049744, "logps/chosen": -0.8643999695777893, "logps/rejected": -1.2760009765625, "loss": 1.188, "nll_loss": 1.073564052581787, "rewards/accuracies": 0.625, "rewards/chosen": -0.08644000440835953, "rewards/margins": 0.04116010293364525, "rewards/rejected": -0.12760010361671448, "step": 591 }, { "epoch": 0.36114076559402164, "grad_norm": 3.845334053039551, "learning_rate": 7.449357011635027e-06, "log_odds_chosen": 0.6595717072486877, "log_odds_ratio": -0.4401244819164276, "logits/chosen": -0.9588133096694946, "logits/rejected": -0.6569585204124451, "logps/chosen": -0.7822142839431763, "logps/rejected": -1.1833478212356567, "loss": 1.1072, "nll_loss": 0.9457325339317322, "rewards/accuracies": 0.875, "rewards/chosen": -0.07822142541408539, "rewards/margins": 0.04011334478855133, "rewards/rejected": -0.11833477765321732, "step": 592 }, { "epoch": 0.3617508006710386, "grad_norm": 4.260014533996582, "learning_rate": 7.448377219840783e-06, "log_odds_chosen": 0.5895287394523621, "log_odds_ratio": -0.49638646841049194, "logits/chosen": -0.7466962337493896, "logits/rejected": -0.5910942554473877, "logps/chosen": -0.9043412208557129, "logps/rejected": -1.2467612028121948, "loss": 1.1677, "nll_loss": 1.0039088726043701, "rewards/accuracies": 0.875, "rewards/chosen": -0.09043412655591965, "rewards/margins": 0.03424198925495148, "rewards/rejected": -0.12467611581087112, "step": 593 }, { "epoch": 0.3623608357480555, "grad_norm": 1.3402519226074219, "learning_rate": 7.44739742804654e-06, "log_odds_chosen": 0.4427538812160492, "log_odds_ratio": -0.5855245590209961, "logits/chosen": -0.6826469302177429, "logits/rejected": -0.5655345916748047, "logps/chosen": -0.7458164095878601, "logps/rejected": -0.956011176109314, "loss": 1.0657, "nll_loss": 0.9086587429046631, "rewards/accuracies": 0.5, "rewards/chosen": -0.07458163797855377, "rewards/margins": 0.021019477397203445, "rewards/rejected": -0.09560111910104752, "step": 594 }, { "epoch": 0.36297087082507246, "grad_norm": 2.055922031402588, "learning_rate": 7.446417636252296e-06, "log_odds_chosen": -0.3759644329547882, "log_odds_ratio": -0.9614152908325195, "logits/chosen": -1.0929185152053833, "logits/rejected": -0.9511215686798096, "logps/chosen": -1.2179145812988281, "logps/rejected": -0.9762753248214722, "loss": 1.4036, "nll_loss": 1.651031732559204, "rewards/accuracies": 0.375, "rewards/chosen": -0.12179146707057953, "rewards/margins": -0.02416393533349037, "rewards/rejected": -0.09762752801179886, "step": 595 }, { "epoch": 0.36358090590208936, "grad_norm": 5.403359889984131, "learning_rate": 7.445437844458053e-06, "log_odds_chosen": 1.01719069480896, "log_odds_ratio": -0.39734965562820435, "logits/chosen": -0.7855740189552307, "logits/rejected": -0.7149321436882019, "logps/chosen": -0.7331108450889587, "logps/rejected": -1.3454619646072388, "loss": 1.1849, "nll_loss": 1.0876920223236084, "rewards/accuracies": 0.75, "rewards/chosen": -0.07331108301877975, "rewards/margins": 0.06123511493206024, "rewards/rejected": -0.1345461905002594, "step": 596 }, { "epoch": 0.3641909409791063, "grad_norm": 1.8862358331680298, "learning_rate": 7.444458052663809e-06, "log_odds_chosen": 0.6684749126434326, "log_odds_ratio": -0.5237428545951843, "logits/chosen": -0.921517014503479, "logits/rejected": -0.816099226474762, "logps/chosen": -0.948904275894165, "logps/rejected": -1.3132961988449097, "loss": 1.1392, "nll_loss": 1.1766479015350342, "rewards/accuracies": 0.75, "rewards/chosen": -0.09489042311906815, "rewards/margins": 0.036439199000597, "rewards/rejected": -0.13132962584495544, "step": 597 }, { "epoch": 0.3648009760561232, "grad_norm": 1.392108678817749, "learning_rate": 7.443478260869564e-06, "log_odds_chosen": 0.2718592882156372, "log_odds_ratio": -0.6143250465393066, "logits/chosen": -0.9377920627593994, "logits/rejected": -0.8277531862258911, "logps/chosen": -0.9015789031982422, "logps/rejected": -1.019563913345337, "loss": 1.1718, "nll_loss": 1.1422655582427979, "rewards/accuracies": 0.5, "rewards/chosen": -0.0901578888297081, "rewards/margins": 0.011798501014709473, "rewards/rejected": -0.10195639729499817, "step": 598 }, { "epoch": 0.3654110111331402, "grad_norm": 1.3981558084487915, "learning_rate": 7.442498469075321e-06, "log_odds_chosen": 0.13924546539783478, "log_odds_ratio": -0.6532320976257324, "logits/chosen": -0.843944251537323, "logits/rejected": -0.7389590740203857, "logps/chosen": -0.9049407839775085, "logps/rejected": -0.990310549736023, "loss": 1.0899, "nll_loss": 1.124180555343628, "rewards/accuracies": 0.75, "rewards/chosen": -0.0904940813779831, "rewards/margins": 0.008536981418728828, "rewards/rejected": -0.09903106093406677, "step": 599 }, { "epoch": 0.3660210462101571, "grad_norm": 3.108792304992676, "learning_rate": 7.441518677281077e-06, "log_odds_chosen": 1.017777681350708, "log_odds_ratio": -0.3363773822784424, "logits/chosen": -0.6882215738296509, "logits/rejected": -0.4045824706554413, "logps/chosen": -0.7334225177764893, "logps/rejected": -1.3659350872039795, "loss": 1.1434, "nll_loss": 1.0284574031829834, "rewards/accuracies": 1.0, "rewards/chosen": -0.07334225624799728, "rewards/margins": 0.06325125694274902, "rewards/rejected": -0.1365935206413269, "step": 600 }, { "epoch": 0.36663108128717403, "grad_norm": 1.8885043859481812, "learning_rate": 7.4405388854868335e-06, "log_odds_chosen": 1.005721092224121, "log_odds_ratio": -0.4410873353481293, "logits/chosen": -0.5496746897697449, "logits/rejected": -0.6601515412330627, "logps/chosen": -0.4618642032146454, "logps/rejected": -0.8679822087287903, "loss": 1.2461, "nll_loss": 1.1265380382537842, "rewards/accuracies": 0.875, "rewards/chosen": -0.0461864247918129, "rewards/margins": 0.04061180353164673, "rewards/rejected": -0.08679822087287903, "step": 601 }, { "epoch": 0.36724111636419093, "grad_norm": 1.1251360177993774, "learning_rate": 7.4395590936925905e-06, "log_odds_chosen": 0.684779942035675, "log_odds_ratio": -0.44065600633621216, "logits/chosen": -0.6777834892272949, "logits/rejected": -0.5711458325386047, "logps/chosen": -0.8945109248161316, "logps/rejected": -1.2951819896697998, "loss": 1.0375, "nll_loss": 0.9774079918861389, "rewards/accuracies": 1.0, "rewards/chosen": -0.08945108950138092, "rewards/margins": 0.04006710648536682, "rewards/rejected": -0.12951819598674774, "step": 602 }, { "epoch": 0.3678511514412079, "grad_norm": 2.0844497680664062, "learning_rate": 7.438579301898347e-06, "log_odds_chosen": 1.014418363571167, "log_odds_ratio": -0.5027972459793091, "logits/chosen": -0.8827579021453857, "logits/rejected": -0.6005574464797974, "logps/chosen": -0.8506317138671875, "logps/rejected": -1.4520363807678223, "loss": 1.0326, "nll_loss": 1.1192500591278076, "rewards/accuracies": 0.625, "rewards/chosen": -0.08506317436695099, "rewards/margins": 0.060140468180179596, "rewards/rejected": -0.14520363509655, "step": 603 }, { "epoch": 0.3684611865182248, "grad_norm": 2.2148611545562744, "learning_rate": 7.437599510104102e-06, "log_odds_chosen": 0.7526483535766602, "log_odds_ratio": -0.4091569781303406, "logits/chosen": -0.6506956815719604, "logits/rejected": -0.7128547430038452, "logps/chosen": -0.7632309198379517, "logps/rejected": -1.2400398254394531, "loss": 1.0745, "nll_loss": 0.94853675365448, "rewards/accuracies": 1.0, "rewards/chosen": -0.07632309198379517, "rewards/margins": 0.04768088459968567, "rewards/rejected": -0.12400397658348083, "step": 604 }, { "epoch": 0.36907122159524175, "grad_norm": 2.389051675796509, "learning_rate": 7.436619718309859e-06, "log_odds_chosen": 0.01776285097002983, "log_odds_ratio": -0.7326312065124512, "logits/chosen": -0.7172214984893799, "logits/rejected": -0.8391001224517822, "logps/chosen": -1.1163400411605835, "logps/rejected": -1.1322344541549683, "loss": 1.2637, "nll_loss": 1.5225028991699219, "rewards/accuracies": 0.5, "rewards/chosen": -0.11163400113582611, "rewards/margins": 0.0015894449315965176, "rewards/rejected": -0.11322344839572906, "step": 605 }, { "epoch": 0.36968125667225865, "grad_norm": 1.3848142623901367, "learning_rate": 7.435639926515615e-06, "log_odds_chosen": 0.6773809194564819, "log_odds_ratio": -0.5669771432876587, "logits/chosen": -0.802541971206665, "logits/rejected": -0.6279037594795227, "logps/chosen": -1.0206109285354614, "logps/rejected": -1.528174638748169, "loss": 0.9995, "nll_loss": 1.1508314609527588, "rewards/accuracies": 0.625, "rewards/chosen": -0.10206109285354614, "rewards/margins": 0.05075637251138687, "rewards/rejected": -0.15281745791435242, "step": 606 }, { "epoch": 0.3702912917492756, "grad_norm": 2.1427762508392334, "learning_rate": 7.434660134721371e-06, "log_odds_chosen": 0.4298461675643921, "log_odds_ratio": -0.549857497215271, "logits/chosen": -0.6707813739776611, "logits/rejected": -0.6093297004699707, "logps/chosen": -0.7450275421142578, "logps/rejected": -0.9909882545471191, "loss": 1.1966, "nll_loss": 0.9200478792190552, "rewards/accuracies": 0.875, "rewards/chosen": -0.07450275123119354, "rewards/margins": 0.024596065282821655, "rewards/rejected": -0.0990988165140152, "step": 607 }, { "epoch": 0.3709013268262925, "grad_norm": 1.7072937488555908, "learning_rate": 7.433680342927128e-06, "log_odds_chosen": 0.5190215110778809, "log_odds_ratio": -0.5176078081130981, "logits/chosen": -0.7434830069541931, "logits/rejected": -0.6305074691772461, "logps/chosen": -0.8123685717582703, "logps/rejected": -1.048757791519165, "loss": 1.1183, "nll_loss": 1.0005565881729126, "rewards/accuracies": 0.75, "rewards/chosen": -0.08123685419559479, "rewards/margins": 0.023638932034373283, "rewards/rejected": -0.10487579554319382, "step": 608 }, { "epoch": 0.37151136190330947, "grad_norm": 1.7236701250076294, "learning_rate": 7.432700551132883e-06, "log_odds_chosen": 0.13376274704933167, "log_odds_ratio": -0.7007250189781189, "logits/chosen": -0.7321257591247559, "logits/rejected": -0.7007670402526855, "logps/chosen": -0.9175659418106079, "logps/rejected": -1.0189993381500244, "loss": 1.2665, "nll_loss": 1.3466668128967285, "rewards/accuracies": 0.625, "rewards/chosen": -0.09175658971071243, "rewards/margins": 0.010143335908651352, "rewards/rejected": -0.10189992934465408, "step": 609 }, { "epoch": 0.37212139698032637, "grad_norm": 2.5049617290496826, "learning_rate": 7.43172075933864e-06, "log_odds_chosen": 0.9869920015335083, "log_odds_ratio": -0.4195936322212219, "logits/chosen": -0.7663689851760864, "logits/rejected": -0.7268314361572266, "logps/chosen": -0.619701623916626, "logps/rejected": -1.173617959022522, "loss": 1.1384, "nll_loss": 0.9008141756057739, "rewards/accuracies": 0.75, "rewards/chosen": -0.06197016313672066, "rewards/margins": 0.05539162829518318, "rewards/rejected": -0.11736179143190384, "step": 610 }, { "epoch": 0.37273143205734327, "grad_norm": 3.0118868350982666, "learning_rate": 7.4307409675443965e-06, "log_odds_chosen": 0.23199957609176636, "log_odds_ratio": -0.6682478189468384, "logits/chosen": -0.738090455532074, "logits/rejected": -0.7577811479568481, "logps/chosen": -1.2196316719055176, "logps/rejected": -1.302171230316162, "loss": 1.3473, "nll_loss": 1.2717955112457275, "rewards/accuracies": 0.5, "rewards/chosen": -0.12196317315101624, "rewards/margins": 0.008253946900367737, "rewards/rejected": -0.13021712005138397, "step": 611 }, { "epoch": 0.3733414671343602, "grad_norm": 2.05745005607605, "learning_rate": 7.429761175750153e-06, "log_odds_chosen": 0.5544310808181763, "log_odds_ratio": -0.587039589881897, "logits/chosen": -0.9786869287490845, "logits/rejected": -0.7448559403419495, "logps/chosen": -0.9017620086669922, "logps/rejected": -1.2619515657424927, "loss": 1.322, "nll_loss": 1.4617222547531128, "rewards/accuracies": 0.75, "rewards/chosen": -0.09017620980739594, "rewards/margins": 0.03601895272731781, "rewards/rejected": -0.12619516253471375, "step": 612 }, { "epoch": 0.37395150221137713, "grad_norm": 1.822627305984497, "learning_rate": 7.4287813839559096e-06, "log_odds_chosen": 0.5446133613586426, "log_odds_ratio": -0.6669130325317383, "logits/chosen": -0.6935368180274963, "logits/rejected": -0.733059287071228, "logps/chosen": -0.9062134027481079, "logps/rejected": -1.074958086013794, "loss": 1.2713, "nll_loss": 1.5964984893798828, "rewards/accuracies": 0.625, "rewards/chosen": -0.09062133729457855, "rewards/margins": 0.01687447540462017, "rewards/rejected": -0.10749581456184387, "step": 613 }, { "epoch": 0.3745615372883941, "grad_norm": 1.5177462100982666, "learning_rate": 7.427801592161666e-06, "log_odds_chosen": 0.6113395690917969, "log_odds_ratio": -0.5497816801071167, "logits/chosen": -0.8460979461669922, "logits/rejected": -0.5612041354179382, "logps/chosen": -0.9501785039901733, "logps/rejected": -1.240523099899292, "loss": 1.193, "nll_loss": 1.163496732711792, "rewards/accuracies": 0.75, "rewards/chosen": -0.09501785039901733, "rewards/margins": 0.029034463688731194, "rewards/rejected": -0.12405231595039368, "step": 614 }, { "epoch": 0.375171572365411, "grad_norm": 3.6060023307800293, "learning_rate": 7.426821800367421e-06, "log_odds_chosen": 0.42463400959968567, "log_odds_ratio": -0.5410842895507812, "logits/chosen": -0.626420259475708, "logits/rejected": -0.6096925735473633, "logps/chosen": -0.8480997085571289, "logps/rejected": -1.090139389038086, "loss": 1.1553, "nll_loss": 1.0530489683151245, "rewards/accuracies": 0.75, "rewards/chosen": -0.08480997383594513, "rewards/margins": 0.024203967303037643, "rewards/rejected": -0.10901393741369247, "step": 615 }, { "epoch": 0.37578160744242795, "grad_norm": 3.7326788902282715, "learning_rate": 7.425842008573178e-06, "log_odds_chosen": 0.7425811290740967, "log_odds_ratio": -0.4794152081012726, "logits/chosen": -0.30737343430519104, "logits/rejected": -0.5848971605300903, "logps/chosen": -1.0328134298324585, "logps/rejected": -1.4883455038070679, "loss": 1.264, "nll_loss": 1.3990159034729004, "rewards/accuracies": 0.875, "rewards/chosen": -0.10328134894371033, "rewards/margins": 0.045553211122751236, "rewards/rejected": -0.14883455634117126, "step": 616 }, { "epoch": 0.37639164251944485, "grad_norm": 1.377531886100769, "learning_rate": 7.424862216778934e-06, "log_odds_chosen": 0.32950785756111145, "log_odds_ratio": -0.610196590423584, "logits/chosen": -0.7139403820037842, "logits/rejected": -0.7156597375869751, "logps/chosen": -1.0673024654388428, "logps/rejected": -1.267635464668274, "loss": 1.0336, "nll_loss": 1.1045125722885132, "rewards/accuracies": 0.625, "rewards/chosen": -0.10673025250434875, "rewards/margins": 0.02003329247236252, "rewards/rejected": -0.12676355242729187, "step": 617 }, { "epoch": 0.3770016775964618, "grad_norm": 1.83469820022583, "learning_rate": 7.42388242498469e-06, "log_odds_chosen": 0.572563648223877, "log_odds_ratio": -0.505056619644165, "logits/chosen": -0.6505942940711975, "logits/rejected": -0.6664991974830627, "logps/chosen": -0.9370550513267517, "logps/rejected": -1.256557583808899, "loss": 1.18, "nll_loss": 1.1022087335586548, "rewards/accuracies": 0.875, "rewards/chosen": -0.09370550513267517, "rewards/margins": 0.03195026516914368, "rewards/rejected": -0.12565575540065765, "step": 618 }, { "epoch": 0.3776117126734787, "grad_norm": 4.6350321769714355, "learning_rate": 7.422902633190447e-06, "log_odds_chosen": -0.10590856522321701, "log_odds_ratio": -0.7987663745880127, "logits/chosen": -0.9953726530075073, "logits/rejected": -0.8275021314620972, "logps/chosen": -0.9969909191131592, "logps/rejected": -0.92560875415802, "loss": 1.2686, "nll_loss": 1.1621108055114746, "rewards/accuracies": 0.25, "rewards/chosen": -0.09969909489154816, "rewards/margins": -0.007138215005397797, "rewards/rejected": -0.09256087243556976, "step": 619 }, { "epoch": 0.37822174775049566, "grad_norm": 1.315550446510315, "learning_rate": 7.421922841396203e-06, "log_odds_chosen": 0.2656572759151459, "log_odds_ratio": -0.6427844762802124, "logits/chosen": -0.9811117649078369, "logits/rejected": -0.5708533525466919, "logps/chosen": -1.0669373273849487, "logps/rejected": -1.2481576204299927, "loss": 1.2055, "nll_loss": 1.3038978576660156, "rewards/accuracies": 0.625, "rewards/chosen": -0.10669373720884323, "rewards/margins": 0.018122021108865738, "rewards/rejected": -0.12481575459241867, "step": 620 }, { "epoch": 0.37883178282751256, "grad_norm": 2.2852749824523926, "learning_rate": 7.4209430496019585e-06, "log_odds_chosen": 0.43117398023605347, "log_odds_ratio": -0.577053964138031, "logits/chosen": -0.6194385290145874, "logits/rejected": -0.39589232206344604, "logps/chosen": -0.8831185102462769, "logps/rejected": -1.1716370582580566, "loss": 1.0389, "nll_loss": 0.9857202768325806, "rewards/accuracies": 0.625, "rewards/chosen": -0.08831185102462769, "rewards/margins": 0.028851857408881187, "rewards/rejected": -0.11716371774673462, "step": 621 }, { "epoch": 0.3794418179045295, "grad_norm": 2.5697789192199707, "learning_rate": 7.4199632578077155e-06, "log_odds_chosen": 0.4579043984413147, "log_odds_ratio": -0.59878009557724, "logits/chosen": -0.8279207944869995, "logits/rejected": -0.7857223153114319, "logps/chosen": -0.8316940069198608, "logps/rejected": -1.051562786102295, "loss": 1.2795, "nll_loss": 1.262333869934082, "rewards/accuracies": 0.625, "rewards/chosen": -0.08316940069198608, "rewards/margins": 0.021986883133649826, "rewards/rejected": -0.10515628010034561, "step": 622 }, { "epoch": 0.3800518529815464, "grad_norm": 3.437941074371338, "learning_rate": 7.418983466013472e-06, "log_odds_chosen": 1.024071216583252, "log_odds_ratio": -0.545897364616394, "logits/chosen": -0.6816886067390442, "logits/rejected": -0.4585212469100952, "logps/chosen": -0.7271715402603149, "logps/rejected": -1.3049980401992798, "loss": 1.0436, "nll_loss": 0.8262047171592712, "rewards/accuracies": 0.75, "rewards/chosen": -0.07271715998649597, "rewards/margins": 0.057782649993896484, "rewards/rejected": -0.13049980998039246, "step": 623 }, { "epoch": 0.3806618880585634, "grad_norm": 7.130721569061279, "learning_rate": 7.418003674219229e-06, "log_odds_chosen": 1.1866778135299683, "log_odds_ratio": -0.33057260513305664, "logits/chosen": -0.6991784572601318, "logits/rejected": -0.5123142004013062, "logps/chosen": -0.6802780628204346, "logps/rejected": -1.3893197774887085, "loss": 1.2537, "nll_loss": 0.9163042902946472, "rewards/accuracies": 0.875, "rewards/chosen": -0.0680278018116951, "rewards/margins": 0.07090416550636292, "rewards/rejected": -0.1389319747686386, "step": 624 }, { "epoch": 0.3812719231355803, "grad_norm": 10.275102615356445, "learning_rate": 7.417023882424985e-06, "log_odds_chosen": 0.42747071385383606, "log_odds_ratio": -0.5682465434074402, "logits/chosen": -0.6592702865600586, "logits/rejected": -0.6777412295341492, "logps/chosen": -0.7029582262039185, "logps/rejected": -0.942737340927124, "loss": 1.0761, "nll_loss": 0.8784524202346802, "rewards/accuracies": 0.625, "rewards/chosen": -0.07029582560062408, "rewards/margins": 0.02397790551185608, "rewards/rejected": -0.09427373111248016, "step": 625 }, { "epoch": 0.38188195821259724, "grad_norm": 1.812384009361267, "learning_rate": 7.41604409063074e-06, "log_odds_chosen": 0.4485730528831482, "log_odds_ratio": -0.5435431003570557, "logits/chosen": -0.7712128162384033, "logits/rejected": -0.6750677824020386, "logps/chosen": -0.8190521001815796, "logps/rejected": -1.0776863098144531, "loss": 1.0818, "nll_loss": 1.075966238975525, "rewards/accuracies": 0.875, "rewards/chosen": -0.08190520852804184, "rewards/margins": 0.025863416492938995, "rewards/rejected": -0.10776863247156143, "step": 626 }, { "epoch": 0.38249199328961414, "grad_norm": 5.35492467880249, "learning_rate": 7.415064298836497e-06, "log_odds_chosen": 1.253870964050293, "log_odds_ratio": -0.46252351999282837, "logits/chosen": -0.8228973746299744, "logits/rejected": -0.7807573080062866, "logps/chosen": -0.7228237390518188, "logps/rejected": -1.6746872663497925, "loss": 1.1599, "nll_loss": 1.158524990081787, "rewards/accuracies": 0.75, "rewards/chosen": -0.07228238135576248, "rewards/margins": 0.09518635272979736, "rewards/rejected": -0.16746872663497925, "step": 627 }, { "epoch": 0.3831020283666311, "grad_norm": 4.082261562347412, "learning_rate": 7.414084507042253e-06, "log_odds_chosen": 0.3355128765106201, "log_odds_ratio": -0.6655941605567932, "logits/chosen": -0.8437479734420776, "logits/rejected": -0.7781189680099487, "logps/chosen": -1.0614453554153442, "logps/rejected": -1.340005874633789, "loss": 1.1401, "nll_loss": 1.2535326480865479, "rewards/accuracies": 0.375, "rewards/chosen": -0.10614454001188278, "rewards/margins": 0.027856040745973587, "rewards/rejected": -0.13400058448314667, "step": 628 }, { "epoch": 0.383712063443648, "grad_norm": 1.9658299684524536, "learning_rate": 7.413104715248009e-06, "log_odds_chosen": 0.2509809136390686, "log_odds_ratio": -0.6541566848754883, "logits/chosen": -0.6864594221115112, "logits/rejected": -0.7053577899932861, "logps/chosen": -0.9085996150970459, "logps/rejected": -1.0344158411026, "loss": 1.2842, "nll_loss": 0.9842290282249451, "rewards/accuracies": 0.625, "rewards/chosen": -0.09085996448993683, "rewards/margins": 0.01258162036538124, "rewards/rejected": -0.10344158113002777, "step": 629 }, { "epoch": 0.38432209852066496, "grad_norm": 2.8633124828338623, "learning_rate": 7.412124923453766e-06, "log_odds_chosen": 0.5707064270973206, "log_odds_ratio": -0.5346137285232544, "logits/chosen": -0.8007265329360962, "logits/rejected": -0.7701824903488159, "logps/chosen": -0.9135830402374268, "logps/rejected": -1.2938125133514404, "loss": 1.2381, "nll_loss": 1.260535478591919, "rewards/accuracies": 0.75, "rewards/chosen": -0.09135830402374268, "rewards/margins": 0.038022954016923904, "rewards/rejected": -0.12938126921653748, "step": 630 }, { "epoch": 0.38493213359768186, "grad_norm": 1.7794982194900513, "learning_rate": 7.411145131659522e-06, "log_odds_chosen": 0.5961877107620239, "log_odds_ratio": -0.639489471912384, "logits/chosen": -1.0719540119171143, "logits/rejected": -0.9939005374908447, "logps/chosen": -1.1040446758270264, "logps/rejected": -1.5910414457321167, "loss": 1.1545, "nll_loss": 1.3386462926864624, "rewards/accuracies": 0.75, "rewards/chosen": -0.11040446907281876, "rewards/margins": 0.04869966208934784, "rewards/rejected": -0.1591041386127472, "step": 631 }, { "epoch": 0.3855421686746988, "grad_norm": 4.0880961418151855, "learning_rate": 7.4101653398652776e-06, "log_odds_chosen": 0.6708856225013733, "log_odds_ratio": -0.44371551275253296, "logits/chosen": -1.115296721458435, "logits/rejected": -0.9822975397109985, "logps/chosen": -0.9751896262168884, "logps/rejected": -1.4188472032546997, "loss": 1.1539, "nll_loss": 1.142509937286377, "rewards/accuracies": 0.75, "rewards/chosen": -0.09751896560192108, "rewards/margins": 0.044365763664245605, "rewards/rejected": -0.1418847292661667, "step": 632 }, { "epoch": 0.3861522037517157, "grad_norm": 5.029071807861328, "learning_rate": 7.4091855480710345e-06, "log_odds_chosen": 0.5019745826721191, "log_odds_ratio": -0.6218636631965637, "logits/chosen": -0.8267354965209961, "logits/rejected": -0.8004283905029297, "logps/chosen": -0.9578025341033936, "logps/rejected": -1.2265329360961914, "loss": 1.3137, "nll_loss": 1.1761999130249023, "rewards/accuracies": 0.75, "rewards/chosen": -0.09578025341033936, "rewards/margins": 0.026873044669628143, "rewards/rejected": -0.1226532906293869, "step": 633 }, { "epoch": 0.3867622388287327, "grad_norm": 1.401485800743103, "learning_rate": 7.408205756276791e-06, "log_odds_chosen": 0.82308429479599, "log_odds_ratio": -0.4457699954509735, "logits/chosen": -0.6287276744842529, "logits/rejected": -0.6498594284057617, "logps/chosen": -0.6447463631629944, "logps/rejected": -1.0626295804977417, "loss": 1.1075, "nll_loss": 0.9717241525650024, "rewards/accuracies": 0.75, "rewards/chosen": -0.06447463482618332, "rewards/margins": 0.04178832471370697, "rewards/rejected": -0.10626296699047089, "step": 634 }, { "epoch": 0.3873722739057496, "grad_norm": 1.8312557935714722, "learning_rate": 7.407225964482548e-06, "log_odds_chosen": 1.0426445007324219, "log_odds_ratio": -0.4271458387374878, "logits/chosen": -0.5948812961578369, "logits/rejected": -0.4512903690338135, "logps/chosen": -0.7940087914466858, "logps/rejected": -1.3971962928771973, "loss": 1.1219, "nll_loss": 0.918402910232544, "rewards/accuracies": 0.875, "rewards/chosen": -0.07940088212490082, "rewards/margins": 0.06031874567270279, "rewards/rejected": -0.1397196352481842, "step": 635 }, { "epoch": 0.38798230898276653, "grad_norm": 1.8005261421203613, "learning_rate": 7.406246172688304e-06, "log_odds_chosen": 1.1779589653015137, "log_odds_ratio": -0.3660156726837158, "logits/chosen": -0.5033475160598755, "logits/rejected": -0.46388766169548035, "logps/chosen": -0.6102933883666992, "logps/rejected": -1.2849311828613281, "loss": 1.0847, "nll_loss": 0.7968538999557495, "rewards/accuracies": 0.875, "rewards/chosen": -0.0610293447971344, "rewards/margins": 0.06746377050876617, "rewards/rejected": -0.12849311530590057, "step": 636 }, { "epoch": 0.38859234405978343, "grad_norm": 3.1331686973571777, "learning_rate": 7.40526638089406e-06, "log_odds_chosen": 0.5031171441078186, "log_odds_ratio": -0.5371063947677612, "logits/chosen": -0.7888015508651733, "logits/rejected": -0.6313671469688416, "logps/chosen": -0.8223208785057068, "logps/rejected": -1.0864527225494385, "loss": 1.0713, "nll_loss": 1.049842357635498, "rewards/accuracies": 0.75, "rewards/chosen": -0.08223208785057068, "rewards/margins": 0.026413198560476303, "rewards/rejected": -0.10864528268575668, "step": 637 }, { "epoch": 0.3892023791368004, "grad_norm": 1.5944411754608154, "learning_rate": 7.404286589099816e-06, "log_odds_chosen": 1.4998682737350464, "log_odds_ratio": -0.2542797327041626, "logits/chosen": -0.45644572377204895, "logits/rejected": -0.4990623891353607, "logps/chosen": -0.6224766373634338, "logps/rejected": -1.534695029258728, "loss": 1.0402, "nll_loss": 0.7928665280342102, "rewards/accuracies": 1.0, "rewards/chosen": -0.06224766746163368, "rewards/margins": 0.09122183918952942, "rewards/rejected": -0.1534695029258728, "step": 638 }, { "epoch": 0.3898124142138173, "grad_norm": 1.469588041305542, "learning_rate": 7.403306797305572e-06, "log_odds_chosen": -0.19680915772914886, "log_odds_ratio": -0.8492496609687805, "logits/chosen": -0.8690720796585083, "logits/rejected": -0.6522045731544495, "logps/chosen": -1.0672112703323364, "logps/rejected": -0.9790758490562439, "loss": 1.222, "nll_loss": 1.3422967195510864, "rewards/accuracies": 0.25, "rewards/chosen": -0.10672113299369812, "rewards/margins": -0.008813542313873768, "rewards/rejected": -0.09790759533643723, "step": 639 }, { "epoch": 0.39042244929083425, "grad_norm": 1.480901837348938, "learning_rate": 7.402327005511328e-06, "log_odds_chosen": 0.534442126750946, "log_odds_ratio": -0.5583986043930054, "logits/chosen": -0.9570493102073669, "logits/rejected": -0.5373808741569519, "logps/chosen": -1.0662684440612793, "logps/rejected": -1.4195109605789185, "loss": 1.2207, "nll_loss": 1.1895557641983032, "rewards/accuracies": 0.75, "rewards/chosen": -0.10662685334682465, "rewards/margins": 0.03532424941658974, "rewards/rejected": -0.14195109903812408, "step": 640 }, { "epoch": 0.39103248436785115, "grad_norm": 1.0747212171554565, "learning_rate": 7.401347213717085e-06, "log_odds_chosen": 0.8266416788101196, "log_odds_ratio": -0.3967078626155853, "logits/chosen": -0.6350427865982056, "logits/rejected": -0.7144795060157776, "logps/chosen": -0.7441815137863159, "logps/rejected": -1.2348285913467407, "loss": 1.1043, "nll_loss": 1.0353223085403442, "rewards/accuracies": 0.875, "rewards/chosen": -0.07441814988851547, "rewards/margins": 0.049064718186855316, "rewards/rejected": -0.12348287552595139, "step": 641 }, { "epoch": 0.39164251944486805, "grad_norm": 1.4492132663726807, "learning_rate": 7.400367421922841e-06, "log_odds_chosen": 0.14118412137031555, "log_odds_ratio": -0.7096278667449951, "logits/chosen": -0.6690101027488708, "logits/rejected": -0.5653053522109985, "logps/chosen": -0.9355993270874023, "logps/rejected": -1.023844838142395, "loss": 1.2429, "nll_loss": 1.0950939655303955, "rewards/accuracies": 0.625, "rewards/chosen": -0.09355993568897247, "rewards/margins": 0.008824549615383148, "rewards/rejected": -0.10238448530435562, "step": 642 }, { "epoch": 0.392252554521885, "grad_norm": 1.798403263092041, "learning_rate": 7.399387630128597e-06, "log_odds_chosen": -0.38448846340179443, "log_odds_ratio": -1.0451629161834717, "logits/chosen": -0.8879040479660034, "logits/rejected": -0.8570289015769958, "logps/chosen": -1.22466242313385, "logps/rejected": -0.9232693314552307, "loss": 1.2491, "nll_loss": 1.5413720607757568, "rewards/accuracies": 0.25, "rewards/chosen": -0.12246625125408173, "rewards/margins": -0.030139315873384476, "rewards/rejected": -0.09232693910598755, "step": 643 }, { "epoch": 0.3928625895989019, "grad_norm": 1.2431460618972778, "learning_rate": 7.3984078383343536e-06, "log_odds_chosen": 0.6005913615226746, "log_odds_ratio": -0.49601125717163086, "logits/chosen": -0.6471225023269653, "logits/rejected": -0.6821527481079102, "logps/chosen": -0.760350227355957, "logps/rejected": -1.0531798601150513, "loss": 1.118, "nll_loss": 0.9462687373161316, "rewards/accuracies": 0.75, "rewards/chosen": -0.0760350227355957, "rewards/margins": 0.02928296849131584, "rewards/rejected": -0.10531798750162125, "step": 644 }, { "epoch": 0.39347262467591887, "grad_norm": 2.0247724056243896, "learning_rate": 7.39742804654011e-06, "log_odds_chosen": 1.1081297397613525, "log_odds_ratio": -0.40728044509887695, "logits/chosen": -0.6165136694908142, "logits/rejected": -0.6443344950675964, "logps/chosen": -0.7356441020965576, "logps/rejected": -1.3739503622055054, "loss": 1.3187, "nll_loss": 1.0396426916122437, "rewards/accuracies": 0.75, "rewards/chosen": -0.07356441020965576, "rewards/margins": 0.06383062154054642, "rewards/rejected": -0.13739503920078278, "step": 645 }, { "epoch": 0.39408265975293577, "grad_norm": 1.901558518409729, "learning_rate": 7.396448254745866e-06, "log_odds_chosen": 0.17850767076015472, "log_odds_ratio": -0.6548815965652466, "logits/chosen": -1.0588269233703613, "logits/rejected": -0.7449964284896851, "logps/chosen": -0.983338475227356, "logps/rejected": -1.1001441478729248, "loss": 1.0832, "nll_loss": 1.224548101425171, "rewards/accuracies": 0.5, "rewards/chosen": -0.09833385050296783, "rewards/margins": 0.011680570431053638, "rewards/rejected": -0.1100144162774086, "step": 646 }, { "epoch": 0.39469269482995273, "grad_norm": 3.802279233932495, "learning_rate": 7.395468462951623e-06, "log_odds_chosen": 0.42755770683288574, "log_odds_ratio": -0.5950839519500732, "logits/chosen": -0.6503424048423767, "logits/rejected": -0.7692367434501648, "logps/chosen": -0.8745030164718628, "logps/rejected": -1.1028711795806885, "loss": 1.3241, "nll_loss": 1.3029417991638184, "rewards/accuracies": 0.5, "rewards/chosen": -0.0874503031373024, "rewards/margins": 0.022836808115243912, "rewards/rejected": -0.11028710752725601, "step": 647 }, { "epoch": 0.39530272990696963, "grad_norm": 1.7483949661254883, "learning_rate": 7.394488671157379e-06, "log_odds_chosen": 0.7418433427810669, "log_odds_ratio": -0.5465047359466553, "logits/chosen": -0.804847240447998, "logits/rejected": -0.6991739869117737, "logps/chosen": -0.8415488004684448, "logps/rejected": -1.4278531074523926, "loss": 1.1916, "nll_loss": 1.0929844379425049, "rewards/accuracies": 0.75, "rewards/chosen": -0.0841548815369606, "rewards/margins": 0.05863043665885925, "rewards/rejected": -0.14278531074523926, "step": 648 }, { "epoch": 0.3959127649839866, "grad_norm": 2.324122428894043, "learning_rate": 7.393508879363134e-06, "log_odds_chosen": 0.6512834429740906, "log_odds_ratio": -0.5722900629043579, "logits/chosen": -0.6947463750839233, "logits/rejected": -0.5675257444381714, "logps/chosen": -0.8631054759025574, "logps/rejected": -1.215415596961975, "loss": 1.091, "nll_loss": 0.8841413259506226, "rewards/accuracies": 0.625, "rewards/chosen": -0.08631055057048798, "rewards/margins": 0.03523101285099983, "rewards/rejected": -0.12154155969619751, "step": 649 }, { "epoch": 0.3965228000610035, "grad_norm": 1.9009461402893066, "learning_rate": 7.392529087568891e-06, "log_odds_chosen": 0.7567033767700195, "log_odds_ratio": -0.47381314635276794, "logits/chosen": -0.7071279287338257, "logits/rejected": -0.6265350580215454, "logps/chosen": -0.8059905767440796, "logps/rejected": -1.249855875968933, "loss": 1.1002, "nll_loss": 0.9685741662979126, "rewards/accuracies": 0.75, "rewards/chosen": -0.08059905469417572, "rewards/margins": 0.04438653588294983, "rewards/rejected": -0.12498559057712555, "step": 650 }, { "epoch": 0.39713283513802045, "grad_norm": 5.698590278625488, "learning_rate": 7.391549295774647e-06, "log_odds_chosen": 0.44663307070732117, "log_odds_ratio": -0.6371662020683289, "logits/chosen": -0.6379448175430298, "logits/rejected": -0.5213391184806824, "logps/chosen": -0.7698338031768799, "logps/rejected": -0.9753641486167908, "loss": 0.9486, "nll_loss": 0.8794859051704407, "rewards/accuracies": 0.5, "rewards/chosen": -0.07698338478803635, "rewards/margins": 0.02055303007364273, "rewards/rejected": -0.09753641486167908, "step": 651 }, { "epoch": 0.39774287021503735, "grad_norm": 1.7390856742858887, "learning_rate": 7.390569503980404e-06, "log_odds_chosen": 0.2620166540145874, "log_odds_ratio": -0.7213220000267029, "logits/chosen": -0.6579338312149048, "logits/rejected": -0.7983406186103821, "logps/chosen": -1.1719553470611572, "logps/rejected": -1.3207178115844727, "loss": 1.3433, "nll_loss": 1.46748948097229, "rewards/accuracies": 0.375, "rewards/chosen": -0.11719553172588348, "rewards/margins": 0.014876243658363819, "rewards/rejected": -0.13207177817821503, "step": 652 }, { "epoch": 0.3983529052920543, "grad_norm": 2.0302367210388184, "learning_rate": 7.38958971218616e-06, "log_odds_chosen": 0.42694228887557983, "log_odds_ratio": -0.5741420984268188, "logits/chosen": -0.9652789831161499, "logits/rejected": -0.8981277942657471, "logps/chosen": -0.8511216044425964, "logps/rejected": -1.1214652061462402, "loss": 1.1562, "nll_loss": 1.179802417755127, "rewards/accuracies": 0.75, "rewards/chosen": -0.08511215448379517, "rewards/margins": 0.027034372091293335, "rewards/rejected": -0.1121465414762497, "step": 653 }, { "epoch": 0.3989629403690712, "grad_norm": 1.846466302871704, "learning_rate": 7.3886099203919165e-06, "log_odds_chosen": 0.6422168612480164, "log_odds_ratio": -0.4837157726287842, "logits/chosen": -0.6113215088844299, "logits/rejected": -0.6132575273513794, "logps/chosen": -0.7427623271942139, "logps/rejected": -1.1223745346069336, "loss": 1.1319, "nll_loss": 0.9695774912834167, "rewards/accuracies": 0.875, "rewards/chosen": -0.07427623867988586, "rewards/margins": 0.037961218506097794, "rewards/rejected": -0.11223745346069336, "step": 654 }, { "epoch": 0.39957297544608816, "grad_norm": 1.2615214586257935, "learning_rate": 7.387630128597673e-06, "log_odds_chosen": 0.07575009763240814, "log_odds_ratio": -0.7354730367660522, "logits/chosen": -0.8888875842094421, "logits/rejected": -0.8064194917678833, "logps/chosen": -0.8634751439094543, "logps/rejected": -0.8334603309631348, "loss": 1.2056, "nll_loss": 1.0583288669586182, "rewards/accuracies": 0.5, "rewards/chosen": -0.08634752035140991, "rewards/margins": -0.003001481294631958, "rewards/rejected": -0.08334603905677795, "step": 655 }, { "epoch": 0.40018301052310506, "grad_norm": 1.3043169975280762, "learning_rate": 7.386650336803429e-06, "log_odds_chosen": 0.7304433584213257, "log_odds_ratio": -0.4373338520526886, "logits/chosen": -0.9384229183197021, "logits/rejected": -0.7914626598358154, "logps/chosen": -0.6014401912689209, "logps/rejected": -0.9944830536842346, "loss": 1.1239, "nll_loss": 1.1453745365142822, "rewards/accuracies": 0.875, "rewards/chosen": -0.06014402583241463, "rewards/margins": 0.03930428996682167, "rewards/rejected": -0.0994483083486557, "step": 656 }, { "epoch": 0.400793045600122, "grad_norm": 3.626957893371582, "learning_rate": 7.385670545009185e-06, "log_odds_chosen": 0.22381183505058289, "log_odds_ratio": -0.7828417420387268, "logits/chosen": -1.1568361520767212, "logits/rejected": -0.9422358274459839, "logps/chosen": -1.2398664951324463, "logps/rejected": -1.3118139505386353, "loss": 1.2415, "nll_loss": 1.4538441896438599, "rewards/accuracies": 0.625, "rewards/chosen": -0.12398666143417358, "rewards/margins": 0.0071947406977415085, "rewards/rejected": -0.13118140399456024, "step": 657 }, { "epoch": 0.4014030806771389, "grad_norm": 2.0582504272460938, "learning_rate": 7.384690753214942e-06, "log_odds_chosen": 0.38839465379714966, "log_odds_ratio": -0.5754706859588623, "logits/chosen": -0.8133124709129333, "logits/rejected": -0.7528368830680847, "logps/chosen": -0.9773575663566589, "logps/rejected": -1.178378939628601, "loss": 1.1639, "nll_loss": 1.136723518371582, "rewards/accuracies": 0.5, "rewards/chosen": -0.09773576259613037, "rewards/margins": 0.020102132111787796, "rewards/rejected": -0.11783789843320847, "step": 658 }, { "epoch": 0.4020131157541559, "grad_norm": 3.166407823562622, "learning_rate": 7.383710961420698e-06, "log_odds_chosen": 0.18227487802505493, "log_odds_ratio": -0.6582040786743164, "logits/chosen": -0.8846551775932312, "logits/rejected": -0.7112705707550049, "logps/chosen": -0.8342403173446655, "logps/rejected": -0.9352539777755737, "loss": 1.1018, "nll_loss": 1.0417091846466064, "rewards/accuracies": 0.625, "rewards/chosen": -0.08342403173446655, "rewards/margins": 0.010101371444761753, "rewards/rejected": -0.09352540224790573, "step": 659 }, { "epoch": 0.4026231508311728, "grad_norm": 1.393667221069336, "learning_rate": 7.382731169626454e-06, "log_odds_chosen": 1.017072081565857, "log_odds_ratio": -0.4382503032684326, "logits/chosen": -0.8470975160598755, "logits/rejected": -0.7920359969139099, "logps/chosen": -0.7959400415420532, "logps/rejected": -1.4940632581710815, "loss": 1.1607, "nll_loss": 0.9227932691574097, "rewards/accuracies": 0.75, "rewards/chosen": -0.07959400117397308, "rewards/margins": 0.06981232762336731, "rewards/rejected": -0.1494063287973404, "step": 660 }, { "epoch": 0.40323318590818974, "grad_norm": 1.468850016593933, "learning_rate": 7.38175137783221e-06, "log_odds_chosen": 0.2495456337928772, "log_odds_ratio": -0.6579104661941528, "logits/chosen": -0.8272875547409058, "logits/rejected": -0.8473895192146301, "logps/chosen": -0.7991277575492859, "logps/rejected": -0.9620398879051208, "loss": 1.0058, "nll_loss": 1.1424418687820435, "rewards/accuracies": 0.5, "rewards/chosen": -0.07991278171539307, "rewards/margins": 0.016291214153170586, "rewards/rejected": -0.0962039977312088, "step": 661 }, { "epoch": 0.40384322098520664, "grad_norm": 1.438616394996643, "learning_rate": 7.380771586037966e-06, "log_odds_chosen": 0.6026986837387085, "log_odds_ratio": -0.4906414747238159, "logits/chosen": -1.068711280822754, "logits/rejected": -0.8795879483222961, "logps/chosen": -0.9122006297111511, "logps/rejected": -1.3011506795883179, "loss": 1.2043, "nll_loss": 1.2248955965042114, "rewards/accuracies": 0.625, "rewards/chosen": -0.09122006595134735, "rewards/margins": 0.038895003497600555, "rewards/rejected": -0.1301150768995285, "step": 662 }, { "epoch": 0.4044532560622236, "grad_norm": 2.196329116821289, "learning_rate": 7.379791794243723e-06, "log_odds_chosen": 0.416800320148468, "log_odds_ratio": -0.5712971091270447, "logits/chosen": -0.9625936150550842, "logits/rejected": -0.8896626234054565, "logps/chosen": -0.9626561999320984, "logps/rejected": -1.2375659942626953, "loss": 1.2845, "nll_loss": 1.2526277303695679, "rewards/accuracies": 0.75, "rewards/chosen": -0.09626562148332596, "rewards/margins": 0.027490969747304916, "rewards/rejected": -0.12375659495592117, "step": 663 }, { "epoch": 0.4050632911392405, "grad_norm": 4.958156585693359, "learning_rate": 7.378812002449479e-06, "log_odds_chosen": 1.1765774488449097, "log_odds_ratio": -0.3276370167732239, "logits/chosen": -0.7379854917526245, "logits/rejected": -0.6824630498886108, "logps/chosen": -0.7357156276702881, "logps/rejected": -1.43797767162323, "loss": 1.0391, "nll_loss": 0.8684980273246765, "rewards/accuracies": 1.0, "rewards/chosen": -0.07357156276702881, "rewards/margins": 0.07022620737552643, "rewards/rejected": -0.14379777014255524, "step": 664 }, { "epoch": 0.40567332621625746, "grad_norm": 1.173964262008667, "learning_rate": 7.3778322106552355e-06, "log_odds_chosen": 0.6468262672424316, "log_odds_ratio": -0.5661402344703674, "logits/chosen": -0.7160661220550537, "logits/rejected": -0.7823420763015747, "logps/chosen": -0.7939325571060181, "logps/rejected": -1.2220007181167603, "loss": 1.2053, "nll_loss": 1.0367283821105957, "rewards/accuracies": 0.625, "rewards/chosen": -0.07939325273036957, "rewards/margins": 0.04280681908130646, "rewards/rejected": -0.12220007181167603, "step": 665 }, { "epoch": 0.40628336129327436, "grad_norm": 1.5568450689315796, "learning_rate": 7.376852418860992e-06, "log_odds_chosen": 0.5121436715126038, "log_odds_ratio": -0.510513186454773, "logits/chosen": -0.7017512321472168, "logits/rejected": -0.6287997961044312, "logps/chosen": -0.7474088668823242, "logps/rejected": -1.0087823867797852, "loss": 1.0346, "nll_loss": 0.857027530670166, "rewards/accuracies": 0.75, "rewards/chosen": -0.07474088668823242, "rewards/margins": 0.02613735944032669, "rewards/rejected": -0.10087823867797852, "step": 666 }, { "epoch": 0.4068933963702913, "grad_norm": 2.6583566665649414, "learning_rate": 7.375872627066748e-06, "log_odds_chosen": 0.4543219804763794, "log_odds_ratio": -0.5440211296081543, "logits/chosen": -0.6905971169471741, "logits/rejected": -0.7951527833938599, "logps/chosen": -0.9600241184234619, "logps/rejected": -1.2442653179168701, "loss": 1.2199, "nll_loss": 1.1449227333068848, "rewards/accuracies": 0.75, "rewards/chosen": -0.09600241482257843, "rewards/margins": 0.028424128890037537, "rewards/rejected": -0.12442654371261597, "step": 667 }, { "epoch": 0.4075034314473082, "grad_norm": 2.075460433959961, "learning_rate": 7.374892835272504e-06, "log_odds_chosen": 0.4425821900367737, "log_odds_ratio": -0.6611407995223999, "logits/chosen": -1.0112252235412598, "logits/rejected": -0.8047703504562378, "logps/chosen": -0.9137970209121704, "logps/rejected": -1.124578833580017, "loss": 1.1805, "nll_loss": 1.0381343364715576, "rewards/accuracies": 0.625, "rewards/chosen": -0.09137970954179764, "rewards/margins": 0.021078169345855713, "rewards/rejected": -0.11245787143707275, "step": 668 }, { "epoch": 0.4081134665243252, "grad_norm": 1.6922610998153687, "learning_rate": 7.373913043478261e-06, "log_odds_chosen": 0.3707704544067383, "log_odds_ratio": -0.6533543467521667, "logits/chosen": -0.9692888855934143, "logits/rejected": -0.856543242931366, "logps/chosen": -0.8818706274032593, "logps/rejected": -1.152168869972229, "loss": 1.2092, "nll_loss": 1.2859244346618652, "rewards/accuracies": 0.5, "rewards/chosen": -0.08818706125020981, "rewards/margins": 0.027029823511838913, "rewards/rejected": -0.11521688848733902, "step": 669 }, { "epoch": 0.4087235016013421, "grad_norm": 1.8176202774047852, "learning_rate": 7.372933251684017e-06, "log_odds_chosen": 0.7519971132278442, "log_odds_ratio": -0.47098249197006226, "logits/chosen": -0.8221707344055176, "logits/rejected": -0.7499630451202393, "logps/chosen": -0.8269434571266174, "logps/rejected": -1.2936089038848877, "loss": 1.1498, "nll_loss": 1.0776500701904297, "rewards/accuracies": 0.875, "rewards/chosen": -0.08269435167312622, "rewards/margins": 0.046666551381349564, "rewards/rejected": -0.12936089932918549, "step": 670 }, { "epoch": 0.409333536678359, "grad_norm": 1.095729947090149, "learning_rate": 7.371953459889773e-06, "log_odds_chosen": 0.5866734981536865, "log_odds_ratio": -0.5707204341888428, "logits/chosen": -0.8689852952957153, "logits/rejected": -0.6249036192893982, "logps/chosen": -0.8882087469100952, "logps/rejected": -1.1564536094665527, "loss": 0.9044, "nll_loss": 0.9528048634529114, "rewards/accuracies": 0.625, "rewards/chosen": -0.08882088214159012, "rewards/margins": 0.026824479922652245, "rewards/rejected": -0.11564536392688751, "step": 671 }, { "epoch": 0.40994357175537594, "grad_norm": 1.4132847785949707, "learning_rate": 7.370973668095529e-06, "log_odds_chosen": 0.9499735236167908, "log_odds_ratio": -0.42869824171066284, "logits/chosen": -0.6266188621520996, "logits/rejected": -0.6288608312606812, "logps/chosen": -0.682559609413147, "logps/rejected": -1.2987414598464966, "loss": 1.0892, "nll_loss": 0.8882672786712646, "rewards/accuracies": 0.875, "rewards/chosen": -0.0682559683918953, "rewards/margins": 0.061618175357580185, "rewards/rejected": -0.12987414002418518, "step": 672 }, { "epoch": 0.41055360683239284, "grad_norm": 1.794259786605835, "learning_rate": 7.369993876301285e-06, "log_odds_chosen": 0.7476781606674194, "log_odds_ratio": -0.45085906982421875, "logits/chosen": -0.6686862707138062, "logits/rejected": -0.49304473400115967, "logps/chosen": -0.6720727682113647, "logps/rejected": -1.1362234354019165, "loss": 1.3725, "nll_loss": 0.970905601978302, "rewards/accuracies": 0.75, "rewards/chosen": -0.06720727682113647, "rewards/margins": 0.046415064483881, "rewards/rejected": -0.11362233757972717, "step": 673 }, { "epoch": 0.4111636419094098, "grad_norm": 1.9024649858474731, "learning_rate": 7.3690140845070415e-06, "log_odds_chosen": 0.7701281309127808, "log_odds_ratio": -0.5192703008651733, "logits/chosen": -0.7963870167732239, "logits/rejected": -0.7582314610481262, "logps/chosen": -0.9202022552490234, "logps/rejected": -1.4464430809020996, "loss": 1.1695, "nll_loss": 1.0033079385757446, "rewards/accuracies": 0.625, "rewards/chosen": -0.09202022850513458, "rewards/margins": 0.05262408405542374, "rewards/rejected": -0.14464432001113892, "step": 674 }, { "epoch": 0.4117736769864267, "grad_norm": 2.2595534324645996, "learning_rate": 7.3680342927127984e-06, "log_odds_chosen": 0.6385166049003601, "log_odds_ratio": -0.4814169108867645, "logits/chosen": -0.5250683426856995, "logits/rejected": -0.5873607397079468, "logps/chosen": -0.9313704967498779, "logps/rejected": -1.340638518333435, "loss": 1.119, "nll_loss": 1.1225295066833496, "rewards/accuracies": 0.75, "rewards/chosen": -0.09313704073429108, "rewards/margins": 0.04092681035399437, "rewards/rejected": -0.13406385481357574, "step": 675 }, { "epoch": 0.41238371206344365, "grad_norm": 2.296394109725952, "learning_rate": 7.3670545009185546e-06, "log_odds_chosen": 0.5145680904388428, "log_odds_ratio": -0.550751805305481, "logits/chosen": -0.9440223574638367, "logits/rejected": -0.9022063612937927, "logps/chosen": -1.0667637586593628, "logps/rejected": -1.3447120189666748, "loss": 1.207, "nll_loss": 1.2608720064163208, "rewards/accuracies": 0.625, "rewards/chosen": -0.106676384806633, "rewards/margins": 0.027794819325208664, "rewards/rejected": -0.13447120785713196, "step": 676 }, { "epoch": 0.41299374714046055, "grad_norm": 3.247616767883301, "learning_rate": 7.3660747091243115e-06, "log_odds_chosen": 0.9857906699180603, "log_odds_ratio": -0.4433669447898865, "logits/chosen": -0.8978084325790405, "logits/rejected": -0.7367978096008301, "logps/chosen": -0.8572238087654114, "logps/rejected": -1.5037269592285156, "loss": 1.1873, "nll_loss": 1.1852437257766724, "rewards/accuracies": 0.75, "rewards/chosen": -0.08572238683700562, "rewards/margins": 0.06465031206607819, "rewards/rejected": -0.1503726840019226, "step": 677 }, { "epoch": 0.4136037822174775, "grad_norm": 3.918894052505493, "learning_rate": 7.365094917330067e-06, "log_odds_chosen": 0.39082908630371094, "log_odds_ratio": -0.6577644944190979, "logits/chosen": -0.795733630657196, "logits/rejected": -0.6751487255096436, "logps/chosen": -0.7880346775054932, "logps/rejected": -0.9045922756195068, "loss": 1.2836, "nll_loss": 1.5318092107772827, "rewards/accuracies": 0.5, "rewards/chosen": -0.07880347222089767, "rewards/margins": 0.011655757203698158, "rewards/rejected": -0.09045922756195068, "step": 678 }, { "epoch": 0.4142138172944944, "grad_norm": 2.0818865299224854, "learning_rate": 7.364115125535823e-06, "log_odds_chosen": 0.061217233538627625, "log_odds_ratio": -0.776836633682251, "logits/chosen": -0.8536518812179565, "logits/rejected": -0.5830141305923462, "logps/chosen": -1.153114914894104, "logps/rejected": -1.2253546714782715, "loss": 1.2005, "nll_loss": 1.285375714302063, "rewards/accuracies": 0.375, "rewards/chosen": -0.11531149595975876, "rewards/margins": 0.007223986089229584, "rewards/rejected": -0.12253548204898834, "step": 679 }, { "epoch": 0.41482385237151137, "grad_norm": 1.750819444656372, "learning_rate": 7.36313533374158e-06, "log_odds_chosen": 0.4596223831176758, "log_odds_ratio": -0.5858944654464722, "logits/chosen": -0.7630271315574646, "logits/rejected": -0.784026026725769, "logps/chosen": -0.848764181137085, "logps/rejected": -1.1541800498962402, "loss": 1.0059, "nll_loss": 1.0411648750305176, "rewards/accuracies": 0.625, "rewards/chosen": -0.08487642556428909, "rewards/margins": 0.030541591346263885, "rewards/rejected": -0.11541800945997238, "step": 680 }, { "epoch": 0.41543388744852827, "grad_norm": 4.569178104400635, "learning_rate": 7.362155541947336e-06, "log_odds_chosen": 0.811127781867981, "log_odds_ratio": -0.46946030855178833, "logits/chosen": -0.5533419847488403, "logits/rejected": -0.5575429201126099, "logps/chosen": -0.8335847854614258, "logps/rejected": -1.3091216087341309, "loss": 1.178, "nll_loss": 1.0357009172439575, "rewards/accuracies": 0.75, "rewards/chosen": -0.08335848152637482, "rewards/margins": 0.04755368083715439, "rewards/rejected": -0.1309121549129486, "step": 681 }, { "epoch": 0.41604392252554523, "grad_norm": 1.4040874242782593, "learning_rate": 7.361175750153092e-06, "log_odds_chosen": 0.2830769419670105, "log_odds_ratio": -0.603451669216156, "logits/chosen": -0.809253454208374, "logits/rejected": -0.7464954257011414, "logps/chosen": -0.8194505572319031, "logps/rejected": -0.9788495302200317, "loss": 1.2483, "nll_loss": 1.0014047622680664, "rewards/accuracies": 0.625, "rewards/chosen": -0.08194506168365479, "rewards/margins": 0.015939895063638687, "rewards/rejected": -0.09788495302200317, "step": 682 }, { "epoch": 0.41665395760256213, "grad_norm": 2.792846441268921, "learning_rate": 7.360195958358848e-06, "log_odds_chosen": 0.5719810724258423, "log_odds_ratio": -0.5135765075683594, "logits/chosen": -0.9213842153549194, "logits/rejected": -0.8067644834518433, "logps/chosen": -0.807379961013794, "logps/rejected": -1.119531512260437, "loss": 1.1159, "nll_loss": 1.143380045890808, "rewards/accuracies": 0.75, "rewards/chosen": -0.08073800057172775, "rewards/margins": 0.031215157359838486, "rewards/rejected": -0.11195315420627594, "step": 683 }, { "epoch": 0.4172639926795791, "grad_norm": 5.301162242889404, "learning_rate": 7.359216166564604e-06, "log_odds_chosen": 0.6225392818450928, "log_odds_ratio": -0.4672713279724121, "logits/chosen": -0.8537344336509705, "logits/rejected": -0.5392132997512817, "logps/chosen": -0.8574984669685364, "logps/rejected": -1.2354798316955566, "loss": 1.0902, "nll_loss": 0.9753689169883728, "rewards/accuracies": 0.75, "rewards/chosen": -0.08574985712766647, "rewards/margins": 0.03779813274741173, "rewards/rejected": -0.1235479861497879, "step": 684 }, { "epoch": 0.417874027756596, "grad_norm": 2.7072248458862305, "learning_rate": 7.3582363747703605e-06, "log_odds_chosen": 0.07855193316936493, "log_odds_ratio": -0.774050772190094, "logits/chosen": -0.802712619304657, "logits/rejected": -0.7594822645187378, "logps/chosen": -0.9037636518478394, "logps/rejected": -0.9802173376083374, "loss": 1.2675, "nll_loss": 1.2106659412384033, "rewards/accuracies": 0.5, "rewards/chosen": -0.0903763696551323, "rewards/margins": 0.0076453667134046555, "rewards/rejected": -0.0980217382311821, "step": 685 }, { "epoch": 0.41848406283361295, "grad_norm": 12.352270126342773, "learning_rate": 7.3572565829761175e-06, "log_odds_chosen": 0.2671723961830139, "log_odds_ratio": -0.6124652624130249, "logits/chosen": -1.0687602758407593, "logits/rejected": -0.7538300156593323, "logps/chosen": -1.0595173835754395, "logps/rejected": -1.2134627103805542, "loss": 1.2104, "nll_loss": 1.297800064086914, "rewards/accuracies": 0.625, "rewards/chosen": -0.10595173388719559, "rewards/margins": 0.01539454236626625, "rewards/rejected": -0.12134627997875214, "step": 686 }, { "epoch": 0.41909409791062985, "grad_norm": 1.811675786972046, "learning_rate": 7.356276791181874e-06, "log_odds_chosen": -0.2447570562362671, "log_odds_ratio": -0.8425295352935791, "logits/chosen": -1.033348798751831, "logits/rejected": -0.8988943099975586, "logps/chosen": -1.056204080581665, "logps/rejected": -0.9015412330627441, "loss": 1.1091, "nll_loss": 1.09425950050354, "rewards/accuracies": 0.125, "rewards/chosen": -0.10562039911746979, "rewards/margins": -0.015466279350221157, "rewards/rejected": -0.09015412628650665, "step": 687 }, { "epoch": 0.4197041329876468, "grad_norm": 2.71809720993042, "learning_rate": 7.35529699938763e-06, "log_odds_chosen": 0.3157210350036621, "log_odds_ratio": -0.658103883266449, "logits/chosen": -1.0329241752624512, "logits/rejected": -0.9443042278289795, "logps/chosen": -0.935663104057312, "logps/rejected": -1.174113392829895, "loss": 1.1884, "nll_loss": 1.2156498432159424, "rewards/accuracies": 0.625, "rewards/chosen": -0.09356630593538284, "rewards/margins": 0.023845024406909943, "rewards/rejected": -0.11741133034229279, "step": 688 }, { "epoch": 0.4203141680646637, "grad_norm": 3.83054256439209, "learning_rate": 7.354317207593386e-06, "log_odds_chosen": 0.7769707441329956, "log_odds_ratio": -0.5029573440551758, "logits/chosen": -0.7041599750518799, "logits/rejected": -0.7429802417755127, "logps/chosen": -0.7514178156852722, "logps/rejected": -1.184466004371643, "loss": 0.9846, "nll_loss": 0.9888333082199097, "rewards/accuracies": 0.625, "rewards/chosen": -0.0751417875289917, "rewards/margins": 0.043304815888404846, "rewards/rejected": -0.11844660341739655, "step": 689 }, { "epoch": 0.42092420314168066, "grad_norm": 1.8013839721679688, "learning_rate": 7.353337415799142e-06, "log_odds_chosen": 0.21798843145370483, "log_odds_ratio": -0.645603358745575, "logits/chosen": -0.8940156698226929, "logits/rejected": -0.837428092956543, "logps/chosen": -1.0961949825286865, "logps/rejected": -1.2139554023742676, "loss": 1.3462, "nll_loss": 1.2866138219833374, "rewards/accuracies": 0.625, "rewards/chosen": -0.10961950570344925, "rewards/margins": 0.011776034720242023, "rewards/rejected": -0.121395543217659, "step": 690 }, { "epoch": 0.42153423821869757, "grad_norm": 3.458669424057007, "learning_rate": 7.352357624004899e-06, "log_odds_chosen": 0.235649973154068, "log_odds_ratio": -0.6678213477134705, "logits/chosen": -0.9799326658248901, "logits/rejected": -0.9298195838928223, "logps/chosen": -0.9021475315093994, "logps/rejected": -1.0210273265838623, "loss": 1.0695, "nll_loss": 1.0554454326629639, "rewards/accuracies": 0.375, "rewards/chosen": -0.09021475911140442, "rewards/margins": 0.011887992732226849, "rewards/rejected": -0.10210274159908295, "step": 691 }, { "epoch": 0.4221442732957145, "grad_norm": 2.9126713275909424, "learning_rate": 7.351377832210655e-06, "log_odds_chosen": 0.5760246515274048, "log_odds_ratio": -0.5018455982208252, "logits/chosen": -0.758702278137207, "logits/rejected": -0.6922618746757507, "logps/chosen": -0.9040054082870483, "logps/rejected": -1.230403184890747, "loss": 1.0177, "nll_loss": 0.9873405694961548, "rewards/accuracies": 0.75, "rewards/chosen": -0.09040053188800812, "rewards/margins": 0.03263979032635689, "rewards/rejected": -0.1230403259396553, "step": 692 }, { "epoch": 0.4227543083727314, "grad_norm": 2.125927448272705, "learning_rate": 7.350398040416411e-06, "log_odds_chosen": 0.27232301235198975, "log_odds_ratio": -0.6781683564186096, "logits/chosen": -1.1238501071929932, "logits/rejected": -0.9531947374343872, "logps/chosen": -1.1980807781219482, "logps/rejected": -1.3923598527908325, "loss": 1.2023, "nll_loss": 1.446292757987976, "rewards/accuracies": 0.75, "rewards/chosen": -0.11980806291103363, "rewards/margins": 0.019427917897701263, "rewards/rejected": -0.1392359882593155, "step": 693 }, { "epoch": 0.4233643434497484, "grad_norm": 1.758662462234497, "learning_rate": 7.349418248622168e-06, "log_odds_chosen": 0.6691479682922363, "log_odds_ratio": -0.48678091168403625, "logits/chosen": -0.6791050434112549, "logits/rejected": -0.7257444858551025, "logps/chosen": -0.5786159038543701, "logps/rejected": -0.8808383941650391, "loss": 1.3243, "nll_loss": 1.2246049642562866, "rewards/accuracies": 0.875, "rewards/chosen": -0.05786159634590149, "rewards/margins": 0.030222246423363686, "rewards/rejected": -0.08808384090662003, "step": 694 }, { "epoch": 0.4239743785267653, "grad_norm": 1.3544304370880127, "learning_rate": 7.3484384568279234e-06, "log_odds_chosen": 0.7864464521408081, "log_odds_ratio": -0.45213866233825684, "logits/chosen": -0.8353204727172852, "logits/rejected": -0.6904330849647522, "logps/chosen": -0.7096562385559082, "logps/rejected": -1.1683272123336792, "loss": 0.9975, "nll_loss": 0.943368673324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.07096561789512634, "rewards/margins": 0.04586710035800934, "rewards/rejected": -0.11683271825313568, "step": 695 }, { "epoch": 0.42458441360378224, "grad_norm": 2.4630327224731445, "learning_rate": 7.3474586650336796e-06, "log_odds_chosen": 0.6606349945068359, "log_odds_ratio": -0.5181964635848999, "logits/chosen": -0.8772400617599487, "logits/rejected": -0.9255279302597046, "logps/chosen": -0.8163295388221741, "logps/rejected": -1.1602271795272827, "loss": 1.2067, "nll_loss": 1.255568504333496, "rewards/accuracies": 0.75, "rewards/chosen": -0.08163294941186905, "rewards/margins": 0.034389760345220566, "rewards/rejected": -0.11602271348237991, "step": 696 }, { "epoch": 0.42519444868079914, "grad_norm": 2.1874709129333496, "learning_rate": 7.3464788732394365e-06, "log_odds_chosen": 0.3656710982322693, "log_odds_ratio": -0.6493411064147949, "logits/chosen": -0.8507951498031616, "logits/rejected": -0.6979012489318848, "logps/chosen": -0.8880245089530945, "logps/rejected": -1.0792025327682495, "loss": 1.1373, "nll_loss": 1.0145479440689087, "rewards/accuracies": 0.625, "rewards/chosen": -0.08880244940519333, "rewards/margins": 0.019117802381515503, "rewards/rejected": -0.10792024433612823, "step": 697 }, { "epoch": 0.4258044837578161, "grad_norm": 2.518275022506714, "learning_rate": 7.345499081445193e-06, "log_odds_chosen": 1.0906754732131958, "log_odds_ratio": -0.3974533677101135, "logits/chosen": -0.8263314366340637, "logits/rejected": -0.7294557094573975, "logps/chosen": -0.7482399940490723, "logps/rejected": -1.4291059970855713, "loss": 1.2913, "nll_loss": 1.2503854036331177, "rewards/accuracies": 0.875, "rewards/chosen": -0.0748240053653717, "rewards/margins": 0.06808660179376602, "rewards/rejected": -0.14291059970855713, "step": 698 }, { "epoch": 0.426414518834833, "grad_norm": 1.7872953414916992, "learning_rate": 7.344519289650949e-06, "log_odds_chosen": -0.2878578305244446, "log_odds_ratio": -0.967719554901123, "logits/chosen": -0.9026891589164734, "logits/rejected": -0.7145922780036926, "logps/chosen": -1.2374141216278076, "logps/rejected": -1.1276037693023682, "loss": 1.1513, "nll_loss": 1.2389529943466187, "rewards/accuracies": 0.25, "rewards/chosen": -0.12374141812324524, "rewards/margins": -0.010981046594679356, "rewards/rejected": -0.11276037245988846, "step": 699 }, { "epoch": 0.42702455391184996, "grad_norm": 4.514059543609619, "learning_rate": 7.343539497856705e-06, "log_odds_chosen": 0.22055384516716003, "log_odds_ratio": -0.5935104489326477, "logits/chosen": -0.8816916942596436, "logits/rejected": -0.850502073764801, "logps/chosen": -0.9306086301803589, "logps/rejected": -1.0662425756454468, "loss": 1.1823, "nll_loss": 0.9703171253204346, "rewards/accuracies": 0.875, "rewards/chosen": -0.09306086599826813, "rewards/margins": 0.013563393615186214, "rewards/rejected": -0.10662426054477692, "step": 700 }, { "epoch": 0.42763458898886686, "grad_norm": 1.239640712738037, "learning_rate": 7.342559706062461e-06, "log_odds_chosen": 0.6212329864501953, "log_odds_ratio": -0.6196466684341431, "logits/chosen": -0.898034930229187, "logits/rejected": -0.8374640345573425, "logps/chosen": -0.8463006019592285, "logps/rejected": -1.215805172920227, "loss": 1.0918, "nll_loss": 0.9980503916740417, "rewards/accuracies": 0.625, "rewards/chosen": -0.08463006466627121, "rewards/margins": 0.03695046156644821, "rewards/rejected": -0.12158052623271942, "step": 701 }, { "epoch": 0.42824462406588376, "grad_norm": 3.1021933555603027, "learning_rate": 7.341579914268217e-06, "log_odds_chosen": 0.6380816698074341, "log_odds_ratio": -0.4713030457496643, "logits/chosen": -0.7444858551025391, "logits/rejected": -0.9004992842674255, "logps/chosen": -0.7266895771026611, "logps/rejected": -1.0771952867507935, "loss": 1.0677, "nll_loss": 0.9920639395713806, "rewards/accuracies": 0.75, "rewards/chosen": -0.07266896218061447, "rewards/margins": 0.035050567239522934, "rewards/rejected": -0.1077195331454277, "step": 702 }, { "epoch": 0.4288546591429007, "grad_norm": 6.7163262367248535, "learning_rate": 7.340600122473974e-06, "log_odds_chosen": 0.2545122504234314, "log_odds_ratio": -0.6097927689552307, "logits/chosen": -1.0628223419189453, "logits/rejected": -0.8429450392723083, "logps/chosen": -0.9260862469673157, "logps/rejected": -1.129244327545166, "loss": 1.1805, "nll_loss": 1.057479977607727, "rewards/accuracies": 0.625, "rewards/chosen": -0.09260862320661545, "rewards/margins": 0.020315811038017273, "rewards/rejected": -0.11292443424463272, "step": 703 }, { "epoch": 0.4294646942199176, "grad_norm": 1.1716123819351196, "learning_rate": 7.33962033067973e-06, "log_odds_chosen": 0.7335239052772522, "log_odds_ratio": -0.4284779727458954, "logits/chosen": -0.7767624258995056, "logits/rejected": -0.8134922385215759, "logps/chosen": -0.6800506114959717, "logps/rejected": -1.1195720434188843, "loss": 1.1174, "nll_loss": 0.8529696464538574, "rewards/accuracies": 0.75, "rewards/chosen": -0.06800505518913269, "rewards/margins": 0.04395214095711708, "rewards/rejected": -0.11195719987154007, "step": 704 }, { "epoch": 0.4300747292969346, "grad_norm": 1.975029468536377, "learning_rate": 7.338640538885487e-06, "log_odds_chosen": 0.5970534086227417, "log_odds_ratio": -0.6105606555938721, "logits/chosen": -0.9304762482643127, "logits/rejected": -0.8151805400848389, "logps/chosen": -0.8687636852264404, "logps/rejected": -1.2157654762268066, "loss": 1.3098, "nll_loss": 1.2081388235092163, "rewards/accuracies": 0.75, "rewards/chosen": -0.08687637746334076, "rewards/margins": 0.0347001850605011, "rewards/rejected": -0.12157656252384186, "step": 705 }, { "epoch": 0.4306847643739515, "grad_norm": 2.8783695697784424, "learning_rate": 7.3376607470912425e-06, "log_odds_chosen": 0.8870009779930115, "log_odds_ratio": -0.45964115858078003, "logits/chosen": -0.6351730227470398, "logits/rejected": -0.6718018054962158, "logps/chosen": -0.7541401386260986, "logps/rejected": -1.3207781314849854, "loss": 1.0435, "nll_loss": 0.8902481198310852, "rewards/accuracies": 0.75, "rewards/chosen": -0.0754140242934227, "rewards/margins": 0.05666379630565643, "rewards/rejected": -0.13207781314849854, "step": 706 }, { "epoch": 0.43129479945096844, "grad_norm": 9.763952255249023, "learning_rate": 7.336680955296999e-06, "log_odds_chosen": 0.01306505873799324, "log_odds_ratio": -0.7877708077430725, "logits/chosen": -0.9499788880348206, "logits/rejected": -0.9437590837478638, "logps/chosen": -0.8077357411384583, "logps/rejected": -0.8335963487625122, "loss": 1.1333, "nll_loss": 1.0418100357055664, "rewards/accuracies": 0.5, "rewards/chosen": -0.08077356964349747, "rewards/margins": 0.002586063463240862, "rewards/rejected": -0.08335964381694794, "step": 707 }, { "epoch": 0.43190483452798534, "grad_norm": 1.366633653640747, "learning_rate": 7.3357011635027556e-06, "log_odds_chosen": 0.5777968168258667, "log_odds_ratio": -0.5441449880599976, "logits/chosen": -0.6685980558395386, "logits/rejected": -0.6915783882141113, "logps/chosen": -0.896393895149231, "logps/rejected": -1.3115631341934204, "loss": 1.2302, "nll_loss": 1.0154436826705933, "rewards/accuracies": 0.75, "rewards/chosen": -0.08963939547538757, "rewards/margins": 0.041516922414302826, "rewards/rejected": -0.1311563104391098, "step": 708 }, { "epoch": 0.4325148696050023, "grad_norm": 1.6520482301712036, "learning_rate": 7.334721371708512e-06, "log_odds_chosen": 0.9764328598976135, "log_odds_ratio": -0.35403066873550415, "logits/chosen": -0.8896189332008362, "logits/rejected": -0.8304291367530823, "logps/chosen": -0.5876756906509399, "logps/rejected": -1.0754680633544922, "loss": 1.114, "nll_loss": 0.9135795831680298, "rewards/accuracies": 1.0, "rewards/chosen": -0.05876757204532623, "rewards/margins": 0.04877922683954239, "rewards/rejected": -0.10754679143428802, "step": 709 }, { "epoch": 0.4331249046820192, "grad_norm": 2.1802265644073486, "learning_rate": 7.333741579914268e-06, "log_odds_chosen": 0.9913164377212524, "log_odds_ratio": -0.4233580231666565, "logits/chosen": -0.7651900053024292, "logits/rejected": -0.7338737845420837, "logps/chosen": -1.0174435377120972, "logps/rejected": -1.5960078239440918, "loss": 1.1451, "nll_loss": 1.0817135572433472, "rewards/accuracies": 0.875, "rewards/chosen": -0.10174435377120972, "rewards/margins": 0.05785644054412842, "rewards/rejected": -0.15960079431533813, "step": 710 }, { "epoch": 0.43373493975903615, "grad_norm": 2.017793893814087, "learning_rate": 7.332761788120025e-06, "log_odds_chosen": 0.6102766990661621, "log_odds_ratio": -0.5349982976913452, "logits/chosen": -0.7151833772659302, "logits/rejected": -0.6923072934150696, "logps/chosen": -0.6946899890899658, "logps/rejected": -1.0354522466659546, "loss": 1.1236, "nll_loss": 0.8250791430473328, "rewards/accuracies": 0.625, "rewards/chosen": -0.06946900486946106, "rewards/margins": 0.03407622501254082, "rewards/rejected": -0.10354522615671158, "step": 711 }, { "epoch": 0.43434497483605305, "grad_norm": 1.7239164113998413, "learning_rate": 7.33178199632578e-06, "log_odds_chosen": 0.45339512825012207, "log_odds_ratio": -0.5416315793991089, "logits/chosen": -0.869772732257843, "logits/rejected": -0.6815479397773743, "logps/chosen": -0.7343224287033081, "logps/rejected": -1.0103996992111206, "loss": 1.164, "nll_loss": 1.0236685276031494, "rewards/accuracies": 0.75, "rewards/chosen": -0.07343225181102753, "rewards/margins": 0.027607731521129608, "rewards/rejected": -0.10103997588157654, "step": 712 }, { "epoch": 0.43495500991307, "grad_norm": 2.8125836849212646, "learning_rate": 7.330802204531536e-06, "log_odds_chosen": 1.2620567083358765, "log_odds_ratio": -0.35422030091285706, "logits/chosen": -0.7945424318313599, "logits/rejected": -0.7526643872261047, "logps/chosen": -0.7271201610565186, "logps/rejected": -1.5132229328155518, "loss": 1.0696, "nll_loss": 0.9871283173561096, "rewards/accuracies": 0.875, "rewards/chosen": -0.0727120116353035, "rewards/margins": 0.07861028611660004, "rewards/rejected": -0.15132230520248413, "step": 713 }, { "epoch": 0.4355650449900869, "grad_norm": 2.31986403465271, "learning_rate": 7.329822412737293e-06, "log_odds_chosen": 0.743293046951294, "log_odds_ratio": -0.5066909193992615, "logits/chosen": -0.9732462167739868, "logits/rejected": -0.9244346618652344, "logps/chosen": -0.8256282210350037, "logps/rejected": -1.2443828582763672, "loss": 1.316, "nll_loss": 1.313812494277954, "rewards/accuracies": 0.625, "rewards/chosen": -0.08256281912326813, "rewards/margins": 0.04187546670436859, "rewards/rejected": -0.12443828582763672, "step": 714 }, { "epoch": 0.43617508006710387, "grad_norm": 2.1386778354644775, "learning_rate": 7.328842620943049e-06, "log_odds_chosen": 0.44674256443977356, "log_odds_ratio": -0.6469495296478271, "logits/chosen": -1.0678439140319824, "logits/rejected": -0.9266119599342346, "logps/chosen": -0.9562376141548157, "logps/rejected": -1.1369335651397705, "loss": 1.1602, "nll_loss": 1.3177416324615479, "rewards/accuracies": 0.5, "rewards/chosen": -0.09562376886606216, "rewards/margins": 0.018069595098495483, "rewards/rejected": -0.11369336396455765, "step": 715 }, { "epoch": 0.4367851151441208, "grad_norm": 1.5696624517440796, "learning_rate": 7.327862829148806e-06, "log_odds_chosen": 0.5128247737884521, "log_odds_ratio": -0.5569857358932495, "logits/chosen": -0.8718817830085754, "logits/rejected": -0.7764681577682495, "logps/chosen": -0.92684006690979, "logps/rejected": -1.2366451025009155, "loss": 1.3324, "nll_loss": 1.4869318008422852, "rewards/accuracies": 0.75, "rewards/chosen": -0.09268401563167572, "rewards/margins": 0.03098050132393837, "rewards/rejected": -0.12366451323032379, "step": 716 }, { "epoch": 0.43739515022113773, "grad_norm": 2.2369935512542725, "learning_rate": 7.326883037354562e-06, "log_odds_chosen": 0.9650887846946716, "log_odds_ratio": -0.38189589977264404, "logits/chosen": -1.0010496377944946, "logits/rejected": -0.8889825344085693, "logps/chosen": -0.7561530470848083, "logps/rejected": -1.325263261795044, "loss": 1.3718, "nll_loss": 1.1690924167633057, "rewards/accuracies": 0.875, "rewards/chosen": -0.07561531662940979, "rewards/margins": 0.05691102519631386, "rewards/rejected": -0.13252633810043335, "step": 717 }, { "epoch": 0.43800518529815463, "grad_norm": 1.663401484489441, "learning_rate": 7.325903245560318e-06, "log_odds_chosen": 1.0176138877868652, "log_odds_ratio": -0.40186095237731934, "logits/chosen": -0.6158274412155151, "logits/rejected": -0.4783296585083008, "logps/chosen": -0.691921591758728, "logps/rejected": -1.29014253616333, "loss": 1.1337, "nll_loss": 0.9205366969108582, "rewards/accuracies": 0.875, "rewards/chosen": -0.06919216364622116, "rewards/margins": 0.05982208997011185, "rewards/rejected": -0.129014253616333, "step": 718 }, { "epoch": 0.4386152203751716, "grad_norm": 1.6704168319702148, "learning_rate": 7.324923453766075e-06, "log_odds_chosen": -0.1689733862876892, "log_odds_ratio": -0.8376729488372803, "logits/chosen": -1.185623049736023, "logits/rejected": -1.001991868019104, "logps/chosen": -1.2568327188491821, "logps/rejected": -1.1246669292449951, "loss": 1.298, "nll_loss": 1.4210083484649658, "rewards/accuracies": 0.25, "rewards/chosen": -0.1256832778453827, "rewards/margins": -0.013216579332947731, "rewards/rejected": -0.11246669292449951, "step": 719 }, { "epoch": 0.4392252554521885, "grad_norm": 3.605532646179199, "learning_rate": 7.323943661971831e-06, "log_odds_chosen": 0.38449031114578247, "log_odds_ratio": -0.612220823764801, "logits/chosen": -0.8082402348518372, "logits/rejected": -0.6524558067321777, "logps/chosen": -0.9458639621734619, "logps/rejected": -1.2089967727661133, "loss": 1.152, "nll_loss": 1.2297022342681885, "rewards/accuracies": 0.625, "rewards/chosen": -0.09458640217781067, "rewards/margins": 0.026313282549381256, "rewards/rejected": -0.12089968472719193, "step": 720 }, { "epoch": 0.43983529052920545, "grad_norm": 1.179532766342163, "learning_rate": 7.322963870177587e-06, "log_odds_chosen": 0.9300123453140259, "log_odds_ratio": -0.41504549980163574, "logits/chosen": -0.7066912055015564, "logits/rejected": -0.5852571129798889, "logps/chosen": -0.7277833819389343, "logps/rejected": -1.269653081893921, "loss": 1.0392, "nll_loss": 0.8969664573669434, "rewards/accuracies": 0.75, "rewards/chosen": -0.07277834415435791, "rewards/margins": 0.05418696999549866, "rewards/rejected": -0.12696531414985657, "step": 721 }, { "epoch": 0.44044532560622235, "grad_norm": 4.241207122802734, "learning_rate": 7.321984078383344e-06, "log_odds_chosen": 0.7564799189567566, "log_odds_ratio": -0.5122271776199341, "logits/chosen": -1.0821456909179688, "logits/rejected": -0.8396987915039062, "logps/chosen": -1.1295166015625, "logps/rejected": -1.771830439567566, "loss": 1.2383, "nll_loss": 1.335891604423523, "rewards/accuracies": 0.625, "rewards/chosen": -0.11295167356729507, "rewards/margins": 0.06423137336969376, "rewards/rejected": -0.17718303203582764, "step": 722 }, { "epoch": 0.4410553606832393, "grad_norm": 2.4947428703308105, "learning_rate": 7.321004286589099e-06, "log_odds_chosen": 1.1654548645019531, "log_odds_ratio": -0.3743015229701996, "logits/chosen": -0.6390406489372253, "logits/rejected": -0.6245759129524231, "logps/chosen": -0.7499833106994629, "logps/rejected": -1.4525477886199951, "loss": 1.0721, "nll_loss": 0.9753283262252808, "rewards/accuracies": 0.875, "rewards/chosen": -0.07499833405017853, "rewards/margins": 0.07025643438100815, "rewards/rejected": -0.14525476098060608, "step": 723 }, { "epoch": 0.4416653957602562, "grad_norm": 1.3677613735198975, "learning_rate": 7.320024494794855e-06, "log_odds_chosen": 0.4160395860671997, "log_odds_ratio": -0.5899010300636292, "logits/chosen": -0.785467803478241, "logits/rejected": -0.6521238684654236, "logps/chosen": -1.1099576950073242, "logps/rejected": -1.3248703479766846, "loss": 1.3181, "nll_loss": 1.0478143692016602, "rewards/accuracies": 0.625, "rewards/chosen": -0.11099576950073242, "rewards/margins": 0.0214912798255682, "rewards/rejected": -0.13248704373836517, "step": 724 }, { "epoch": 0.44227543083727316, "grad_norm": 1.6998943090438843, "learning_rate": 7.319044703000612e-06, "log_odds_chosen": 1.1518222093582153, "log_odds_ratio": -0.3660382926464081, "logits/chosen": -0.8146690130233765, "logits/rejected": -0.6478759050369263, "logps/chosen": -0.766089677810669, "logps/rejected": -1.449574589729309, "loss": 1.1057, "nll_loss": 0.9350372552871704, "rewards/accuracies": 0.875, "rewards/chosen": -0.07660897076129913, "rewards/margins": 0.0683484897017479, "rewards/rejected": -0.14495745301246643, "step": 725 }, { "epoch": 0.44288546591429007, "grad_norm": 1.9045628309249878, "learning_rate": 7.318064911206368e-06, "log_odds_chosen": 0.6442831754684448, "log_odds_ratio": -0.5707608461380005, "logits/chosen": -0.4740915596485138, "logits/rejected": -0.49455463886260986, "logps/chosen": -0.6084163784980774, "logps/rejected": -0.9683226346969604, "loss": 1.0407, "nll_loss": 0.8564392924308777, "rewards/accuracies": 0.625, "rewards/chosen": -0.0608416348695755, "rewards/margins": 0.0359906330704689, "rewards/rejected": -0.0968322604894638, "step": 726 }, { "epoch": 0.443495500991307, "grad_norm": 1.2401213645935059, "learning_rate": 7.3170851194121244e-06, "log_odds_chosen": 0.38337796926498413, "log_odds_ratio": -0.6113731861114502, "logits/chosen": -1.0487483739852905, "logits/rejected": -0.9417507648468018, "logps/chosen": -1.0160094499588013, "logps/rejected": -1.23536217212677, "loss": 1.1487, "nll_loss": 1.244286298751831, "rewards/accuracies": 0.625, "rewards/chosen": -0.10160095244646072, "rewards/margins": 0.021935276687145233, "rewards/rejected": -0.12353621423244476, "step": 727 }, { "epoch": 0.4441055360683239, "grad_norm": 1.462938904762268, "learning_rate": 7.316105327617881e-06, "log_odds_chosen": 0.5181021094322205, "log_odds_ratio": -0.6229691505432129, "logits/chosen": -1.146217703819275, "logits/rejected": -0.8756933212280273, "logps/chosen": -1.015798568725586, "logps/rejected": -1.4171644449234009, "loss": 1.2756, "nll_loss": 1.185545802116394, "rewards/accuracies": 0.75, "rewards/chosen": -0.10157985240221024, "rewards/margins": 0.04013659805059433, "rewards/rejected": -0.14171645045280457, "step": 728 }, { "epoch": 0.4447155711453409, "grad_norm": 3.079500913619995, "learning_rate": 7.315125535823637e-06, "log_odds_chosen": 1.0071011781692505, "log_odds_ratio": -0.40558767318725586, "logits/chosen": -0.8267897963523865, "logits/rejected": -0.7745504379272461, "logps/chosen": -0.7624354958534241, "logps/rejected": -1.4124071598052979, "loss": 1.1255, "nll_loss": 0.9232684373855591, "rewards/accuracies": 0.875, "rewards/chosen": -0.07624354958534241, "rewards/margins": 0.06499716639518738, "rewards/rejected": -0.14124071598052979, "step": 729 }, { "epoch": 0.4453256062223578, "grad_norm": 1.9139032363891602, "learning_rate": 7.314145744029393e-06, "log_odds_chosen": 0.7687194347381592, "log_odds_ratio": -0.49764835834503174, "logits/chosen": -0.7955242395401001, "logits/rejected": -0.7926729917526245, "logps/chosen": -0.6735266447067261, "logps/rejected": -1.1002858877182007, "loss": 0.9982, "nll_loss": 0.9363352060317993, "rewards/accuracies": 0.625, "rewards/chosen": -0.06735266745090485, "rewards/margins": 0.0426759198307991, "rewards/rejected": -0.11002857983112335, "step": 730 }, { "epoch": 0.44593564129937474, "grad_norm": 2.901482343673706, "learning_rate": 7.31316595223515e-06, "log_odds_chosen": -0.0641758069396019, "log_odds_ratio": -0.741477370262146, "logits/chosen": -0.9668593406677246, "logits/rejected": -0.9736400842666626, "logps/chosen": -0.9357836246490479, "logps/rejected": -0.9185276031494141, "loss": 1.092, "nll_loss": 1.216196060180664, "rewards/accuracies": 0.5, "rewards/chosen": -0.09357836097478867, "rewards/margins": -0.0017255963757634163, "rewards/rejected": -0.09185276925563812, "step": 731 }, { "epoch": 0.44654567637639164, "grad_norm": 2.074744462966919, "learning_rate": 7.312186160440906e-06, "log_odds_chosen": 0.8412339091300964, "log_odds_ratio": -0.414404958486557, "logits/chosen": -0.9831281900405884, "logits/rejected": -0.7615561485290527, "logps/chosen": -0.8288778066635132, "logps/rejected": -1.3734828233718872, "loss": 1.1694, "nll_loss": 1.2156007289886475, "rewards/accuracies": 0.875, "rewards/chosen": -0.08288778364658356, "rewards/margins": 0.054460495710372925, "rewards/rejected": -0.13734827935695648, "step": 732 }, { "epoch": 0.44715571145340854, "grad_norm": 1.6982258558273315, "learning_rate": 7.311206368646663e-06, "log_odds_chosen": -0.02350889891386032, "log_odds_ratio": -0.7974256873130798, "logits/chosen": -1.0401594638824463, "logits/rejected": -1.0315885543823242, "logps/chosen": -0.9102332592010498, "logps/rejected": -0.8868939876556396, "loss": 1.2379, "nll_loss": 1.1465280055999756, "rewards/accuracies": 0.375, "rewards/chosen": -0.09102333337068558, "rewards/margins": -0.002333934884518385, "rewards/rejected": -0.0886894017457962, "step": 733 }, { "epoch": 0.4477657465304255, "grad_norm": 2.1920783519744873, "learning_rate": 7.310226576852419e-06, "log_odds_chosen": 0.21771728992462158, "log_odds_ratio": -0.6624191403388977, "logits/chosen": -1.1439626216888428, "logits/rejected": -0.9420709609985352, "logps/chosen": -0.9329720735549927, "logps/rejected": -1.049346685409546, "loss": 1.2554, "nll_loss": 1.2355635166168213, "rewards/accuracies": 0.5, "rewards/chosen": -0.09329721331596375, "rewards/margins": 0.011637452989816666, "rewards/rejected": -0.10493467003107071, "step": 734 }, { "epoch": 0.4483757816074424, "grad_norm": 2.835639715194702, "learning_rate": 7.309246785058174e-06, "log_odds_chosen": 0.27952417731285095, "log_odds_ratio": -0.6934935450553894, "logits/chosen": -0.9980822801589966, "logits/rejected": -0.885334312915802, "logps/chosen": -1.0972814559936523, "logps/rejected": -1.3570683002471924, "loss": 1.1565, "nll_loss": 1.4043470621109009, "rewards/accuracies": 0.75, "rewards/chosen": -0.10972815006971359, "rewards/margins": 0.02597866952419281, "rewards/rejected": -0.1357068121433258, "step": 735 }, { "epoch": 0.44898581668445936, "grad_norm": 1.4383618831634521, "learning_rate": 7.308266993263931e-06, "log_odds_chosen": 0.6430991888046265, "log_odds_ratio": -0.48986321687698364, "logits/chosen": -0.9381874799728394, "logits/rejected": -0.8976821303367615, "logps/chosen": -0.7358971238136292, "logps/rejected": -1.112425684928894, "loss": 1.2011, "nll_loss": 1.1976346969604492, "rewards/accuracies": 0.75, "rewards/chosen": -0.07358971238136292, "rewards/margins": 0.03765285760164261, "rewards/rejected": -0.11124257743358612, "step": 736 }, { "epoch": 0.44959585176147626, "grad_norm": 6.95770788192749, "learning_rate": 7.307287201469687e-06, "log_odds_chosen": 0.09889043867588043, "log_odds_ratio": -0.6906177997589111, "logits/chosen": -1.1010125875473022, "logits/rejected": -0.8984132409095764, "logps/chosen": -0.8793965578079224, "logps/rejected": -0.9358416795730591, "loss": 1.1309, "nll_loss": 1.295283317565918, "rewards/accuracies": 0.625, "rewards/chosen": -0.08793966472148895, "rewards/margins": 0.005644511431455612, "rewards/rejected": -0.09358416497707367, "step": 737 }, { "epoch": 0.4502058868384932, "grad_norm": 5.495340824127197, "learning_rate": 7.3063074096754435e-06, "log_odds_chosen": 0.47854697704315186, "log_odds_ratio": -0.5783326625823975, "logits/chosen": -1.00934636592865, "logits/rejected": -1.0216920375823975, "logps/chosen": -0.9031851291656494, "logps/rejected": -1.0826313495635986, "loss": 1.1849, "nll_loss": 1.1080540418624878, "rewards/accuracies": 0.625, "rewards/chosen": -0.09031853079795837, "rewards/margins": 0.017944611608982086, "rewards/rejected": -0.10826313495635986, "step": 738 }, { "epoch": 0.4508159219155101, "grad_norm": 1.877071738243103, "learning_rate": 7.3053276178812004e-06, "log_odds_chosen": -0.33804768323898315, "log_odds_ratio": -0.9264910221099854, "logits/chosen": -0.9277536869049072, "logits/rejected": -0.9036844968795776, "logps/chosen": -0.8201135396957397, "logps/rejected": -0.6482836008071899, "loss": 1.1833, "nll_loss": 1.086984395980835, "rewards/accuracies": 0.375, "rewards/chosen": -0.08201135694980621, "rewards/margins": -0.017183000221848488, "rewards/rejected": -0.06482835859060287, "step": 739 }, { "epoch": 0.4514259569925271, "grad_norm": 3.3234033584594727, "learning_rate": 7.304347826086956e-06, "log_odds_chosen": 0.6479196548461914, "log_odds_ratio": -0.5584297776222229, "logits/chosen": -0.7620580792427063, "logits/rejected": -0.6538414359092712, "logps/chosen": -0.84891676902771, "logps/rejected": -1.2280482053756714, "loss": 1.1856, "nll_loss": 1.0088117122650146, "rewards/accuracies": 0.5, "rewards/chosen": -0.084891676902771, "rewards/margins": 0.03791314736008644, "rewards/rejected": -0.12280483543872833, "step": 740 }, { "epoch": 0.452035992069544, "grad_norm": 1.4685418605804443, "learning_rate": 7.303368034292712e-06, "log_odds_chosen": 0.08299532532691956, "log_odds_ratio": -0.7647950053215027, "logits/chosen": -0.9250365495681763, "logits/rejected": -0.6797724366188049, "logps/chosen": -1.027541160583496, "logps/rejected": -1.1024117469787598, "loss": 1.3722, "nll_loss": 1.1800495386123657, "rewards/accuracies": 0.375, "rewards/chosen": -0.1027541235089302, "rewards/margins": 0.007487053982913494, "rewards/rejected": -0.11024118214845657, "step": 741 }, { "epoch": 0.45264602714656094, "grad_norm": 1.8942500352859497, "learning_rate": 7.302388242498469e-06, "log_odds_chosen": 0.6421030759811401, "log_odds_ratio": -0.4526582956314087, "logits/chosen": -0.8629605174064636, "logits/rejected": -0.866931676864624, "logps/chosen": -0.9420598745346069, "logps/rejected": -1.3536419868469238, "loss": 1.1861, "nll_loss": 1.3025035858154297, "rewards/accuracies": 0.875, "rewards/chosen": -0.09420597553253174, "rewards/margins": 0.04115821421146393, "rewards/rejected": -0.13536420464515686, "step": 742 }, { "epoch": 0.45325606222357784, "grad_norm": 3.540055751800537, "learning_rate": 7.301408450704225e-06, "log_odds_chosen": 0.2279292345046997, "log_odds_ratio": -0.7497410774230957, "logits/chosen": -0.6933714747428894, "logits/rejected": -0.5591862201690674, "logps/chosen": -0.798801839351654, "logps/rejected": -0.9525967836380005, "loss": 1.2056, "nll_loss": 0.8812756538391113, "rewards/accuracies": 0.5, "rewards/chosen": -0.07988019287586212, "rewards/margins": 0.015379492193460464, "rewards/rejected": -0.09525968134403229, "step": 743 }, { "epoch": 0.4538660973005948, "grad_norm": 1.3888524770736694, "learning_rate": 7.300428658909982e-06, "log_odds_chosen": 0.8833773136138916, "log_odds_ratio": -0.3942326605319977, "logits/chosen": -0.5219761729240417, "logits/rejected": -0.38816002011299133, "logps/chosen": -0.7237377166748047, "logps/rejected": -1.2857296466827393, "loss": 1.0091, "nll_loss": 0.8687785267829895, "rewards/accuracies": 0.875, "rewards/chosen": -0.07237377017736435, "rewards/margins": 0.056199200451374054, "rewards/rejected": -0.1285729706287384, "step": 744 }, { "epoch": 0.4544761323776117, "grad_norm": 3.615743637084961, "learning_rate": 7.299448867115738e-06, "log_odds_chosen": 0.1511787474155426, "log_odds_ratio": -0.7217350006103516, "logits/chosen": -1.131864070892334, "logits/rejected": -0.9292676448822021, "logps/chosen": -1.1391514539718628, "logps/rejected": -1.2520883083343506, "loss": 1.3832, "nll_loss": 1.4466817378997803, "rewards/accuracies": 0.5, "rewards/chosen": -0.11391514539718628, "rewards/margins": 0.011293679475784302, "rewards/rejected": -0.12520882487297058, "step": 745 }, { "epoch": 0.45508616745462865, "grad_norm": 1.1777234077453613, "learning_rate": 7.298469075321493e-06, "log_odds_chosen": 0.9778517484664917, "log_odds_ratio": -0.40211477875709534, "logits/chosen": -0.6150887608528137, "logits/rejected": -0.6998891830444336, "logps/chosen": -0.5274043083190918, "logps/rejected": -0.9877122640609741, "loss": 1.1164, "nll_loss": 0.8261010050773621, "rewards/accuracies": 0.875, "rewards/chosen": -0.0527404323220253, "rewards/margins": 0.04603079333901405, "rewards/rejected": -0.09877122938632965, "step": 746 }, { "epoch": 0.45569620253164556, "grad_norm": 1.8150279521942139, "learning_rate": 7.29748928352725e-06, "log_odds_chosen": 0.364616334438324, "log_odds_ratio": -0.6103050708770752, "logits/chosen": -0.8231361508369446, "logits/rejected": -0.800948977470398, "logps/chosen": -0.7611737847328186, "logps/rejected": -0.9919555187225342, "loss": 1.1982, "nll_loss": 0.9276221990585327, "rewards/accuracies": 0.625, "rewards/chosen": -0.0761173740029335, "rewards/margins": 0.023078177124261856, "rewards/rejected": -0.09919555485248566, "step": 747 }, { "epoch": 0.4563062376086625, "grad_norm": 1.5274717807769775, "learning_rate": 7.296509491733006e-06, "log_odds_chosen": 0.8035154938697815, "log_odds_ratio": -0.4323265254497528, "logits/chosen": -0.6954258680343628, "logits/rejected": -0.6253541111946106, "logps/chosen": -0.7702920436859131, "logps/rejected": -1.2678722143173218, "loss": 1.045, "nll_loss": 0.9996135234832764, "rewards/accuracies": 0.875, "rewards/chosen": -0.07702921330928802, "rewards/margins": 0.04975801706314087, "rewards/rejected": -0.1267872303724289, "step": 748 }, { "epoch": 0.4569162726856794, "grad_norm": 1.7725257873535156, "learning_rate": 7.2955296999387625e-06, "log_odds_chosen": 1.00059175491333, "log_odds_ratio": -0.37173599004745483, "logits/chosen": -0.8301819562911987, "logits/rejected": -0.6270700693130493, "logps/chosen": -0.8198906183242798, "logps/rejected": -1.4898422956466675, "loss": 1.149, "nll_loss": 0.9033120274543762, "rewards/accuracies": 0.875, "rewards/chosen": -0.08198905736207962, "rewards/margins": 0.06699517369270325, "rewards/rejected": -0.14898423850536346, "step": 749 }, { "epoch": 0.45752630776269637, "grad_norm": 2.877943754196167, "learning_rate": 7.2945499081445195e-06, "log_odds_chosen": 1.0994373559951782, "log_odds_ratio": -0.37061047554016113, "logits/chosen": -0.8449851274490356, "logits/rejected": -0.8212486505508423, "logps/chosen": -0.7562361359596252, "logps/rejected": -1.425597906112671, "loss": 1.1247, "nll_loss": 1.0203535556793213, "rewards/accuracies": 0.875, "rewards/chosen": -0.07562361657619476, "rewards/margins": 0.066936194896698, "rewards/rejected": -0.14255979657173157, "step": 750 }, { "epoch": 0.4581363428397133, "grad_norm": 1.6544626951217651, "learning_rate": 7.293570116350276e-06, "log_odds_chosen": 0.07869875431060791, "log_odds_ratio": -0.8120481967926025, "logits/chosen": -1.0091259479522705, "logits/rejected": -0.7411571741104126, "logps/chosen": -0.8588269948959351, "logps/rejected": -1.0944716930389404, "loss": 1.1087, "nll_loss": 1.0547730922698975, "rewards/accuracies": 0.375, "rewards/chosen": -0.08588269352912903, "rewards/margins": 0.023564469069242477, "rewards/rejected": -0.1094471663236618, "step": 751 }, { "epoch": 0.45874637791673023, "grad_norm": 2.5513410568237305, "learning_rate": 7.292590324556031e-06, "log_odds_chosen": 1.061722993850708, "log_odds_ratio": -0.3781893253326416, "logits/chosen": -0.8878879547119141, "logits/rejected": -0.8322350978851318, "logps/chosen": -0.9890838861465454, "logps/rejected": -1.7392741441726685, "loss": 1.2722, "nll_loss": 1.1871707439422607, "rewards/accuracies": 0.75, "rewards/chosen": -0.09890838712453842, "rewards/margins": 0.07501902431249619, "rewards/rejected": -0.1739274114370346, "step": 752 }, { "epoch": 0.45935641299374713, "grad_norm": 1.5966211557388306, "learning_rate": 7.291610532761788e-06, "log_odds_chosen": 0.8947935104370117, "log_odds_ratio": -0.3781513571739197, "logits/chosen": -0.9060549736022949, "logits/rejected": -0.6972147226333618, "logps/chosen": -0.7242345809936523, "logps/rejected": -1.2350279092788696, "loss": 1.0349, "nll_loss": 0.9366735219955444, "rewards/accuracies": 0.875, "rewards/chosen": -0.07242345809936523, "rewards/margins": 0.05107933282852173, "rewards/rejected": -0.12350279092788696, "step": 753 }, { "epoch": 0.4599664480707641, "grad_norm": 2.487805128097534, "learning_rate": 7.290630740967544e-06, "log_odds_chosen": 0.15134719014167786, "log_odds_ratio": -0.6994785070419312, "logits/chosen": -0.7445639371871948, "logits/rejected": -0.7751510739326477, "logps/chosen": -1.08132004737854, "logps/rejected": -1.1604994535446167, "loss": 1.1146, "nll_loss": 1.3522217273712158, "rewards/accuracies": 0.5, "rewards/chosen": -0.108132004737854, "rewards/margins": 0.007917942479252815, "rewards/rejected": -0.11604994535446167, "step": 754 }, { "epoch": 0.460576483147781, "grad_norm": 1.654205322265625, "learning_rate": 7.2896509491733e-06, "log_odds_chosen": 0.4245782494544983, "log_odds_ratio": -0.5742748975753784, "logits/chosen": -1.0894392728805542, "logits/rejected": -0.8934519290924072, "logps/chosen": -0.8986117243766785, "logps/rejected": -1.0527461767196655, "loss": 1.3608, "nll_loss": 1.475743055343628, "rewards/accuracies": 0.625, "rewards/chosen": -0.0898611769080162, "rewards/margins": 0.015413444489240646, "rewards/rejected": -0.10527461767196655, "step": 755 }, { "epoch": 0.46118651822479795, "grad_norm": 2.027090549468994, "learning_rate": 7.288671157379057e-06, "log_odds_chosen": 0.676811158657074, "log_odds_ratio": -0.5243546962738037, "logits/chosen": -0.9320379495620728, "logits/rejected": -0.8858490586280823, "logps/chosen": -1.0477176904678345, "logps/rejected": -1.4903396368026733, "loss": 1.092, "nll_loss": 1.0954787731170654, "rewards/accuracies": 0.625, "rewards/chosen": -0.10477177053689957, "rewards/margins": 0.04426218941807747, "rewards/rejected": -0.14903396368026733, "step": 756 }, { "epoch": 0.46179655330181485, "grad_norm": 2.128293991088867, "learning_rate": 7.287691365584812e-06, "log_odds_chosen": 1.5008798837661743, "log_odds_ratio": -0.2791174054145813, "logits/chosen": -0.638131856918335, "logits/rejected": -0.42491811513900757, "logps/chosen": -0.6406135559082031, "logps/rejected": -1.6050126552581787, "loss": 1.048, "nll_loss": 0.9017986059188843, "rewards/accuracies": 1.0, "rewards/chosen": -0.06406135112047195, "rewards/margins": 0.0964399054646492, "rewards/rejected": -0.16050127148628235, "step": 757 }, { "epoch": 0.4624065883788318, "grad_norm": 1.2759085893630981, "learning_rate": 7.286711573790569e-06, "log_odds_chosen": 0.41234859824180603, "log_odds_ratio": -0.564568281173706, "logits/chosen": -0.8083447813987732, "logits/rejected": -0.8306019306182861, "logps/chosen": -0.9611987471580505, "logps/rejected": -1.206566333770752, "loss": 1.0123, "nll_loss": 1.1827951669692993, "rewards/accuracies": 0.625, "rewards/chosen": -0.09611988067626953, "rewards/margins": 0.024536747485399246, "rewards/rejected": -0.12065662443637848, "step": 758 }, { "epoch": 0.4630166234558487, "grad_norm": 2.4489364624023438, "learning_rate": 7.2857317819963254e-06, "log_odds_chosen": 0.3447574973106384, "log_odds_ratio": -0.6063953638076782, "logits/chosen": -0.8619308471679688, "logits/rejected": -0.6439954042434692, "logps/chosen": -0.9932701587677002, "logps/rejected": -1.1655175685882568, "loss": 1.258, "nll_loss": 1.1913769245147705, "rewards/accuracies": 0.625, "rewards/chosen": -0.09932701289653778, "rewards/margins": 0.017224740236997604, "rewards/rejected": -0.11655175685882568, "step": 759 }, { "epoch": 0.46362665853286567, "grad_norm": 1.5453121662139893, "learning_rate": 7.2847519902020815e-06, "log_odds_chosen": 1.3001044988632202, "log_odds_ratio": -0.3181709349155426, "logits/chosen": -0.654172956943512, "logits/rejected": -0.5089520215988159, "logps/chosen": -0.6784659624099731, "logps/rejected": -1.4389376640319824, "loss": 1.0, "nll_loss": 0.8730292320251465, "rewards/accuracies": 0.875, "rewards/chosen": -0.06784660369157791, "rewards/margins": 0.07604716718196869, "rewards/rejected": -0.1438937783241272, "step": 760 }, { "epoch": 0.46423669360988257, "grad_norm": 1.4872100353240967, "learning_rate": 7.2837721984078385e-06, "log_odds_chosen": 0.8269702196121216, "log_odds_ratio": -0.5237867832183838, "logits/chosen": -0.7333636283874512, "logits/rejected": -0.6078698039054871, "logps/chosen": -0.8467140197753906, "logps/rejected": -1.1987080574035645, "loss": 1.1038, "nll_loss": 1.140836477279663, "rewards/accuracies": 0.625, "rewards/chosen": -0.08467140793800354, "rewards/margins": 0.03519939258694649, "rewards/rejected": -0.11987080425024033, "step": 761 }, { "epoch": 0.4648467286868995, "grad_norm": 1.3433785438537598, "learning_rate": 7.282792406613595e-06, "log_odds_chosen": 0.2570277452468872, "log_odds_ratio": -0.6204051375389099, "logits/chosen": -0.5562739372253418, "logits/rejected": -0.39528512954711914, "logps/chosen": -0.8403723835945129, "logps/rejected": -0.9587957262992859, "loss": 1.0499, "nll_loss": 0.9352473616600037, "rewards/accuracies": 0.625, "rewards/chosen": -0.08403724431991577, "rewards/margins": 0.011842330917716026, "rewards/rejected": -0.09587956964969635, "step": 762 }, { "epoch": 0.4654567637639164, "grad_norm": 1.785841703414917, "learning_rate": 7.28181261481935e-06, "log_odds_chosen": 0.3706302344799042, "log_odds_ratio": -0.5441339612007141, "logits/chosen": -0.9036422371864319, "logits/rejected": -0.6964536905288696, "logps/chosen": -0.974592924118042, "logps/rejected": -1.2250027656555176, "loss": 1.3743, "nll_loss": 1.1413835287094116, "rewards/accuracies": 0.75, "rewards/chosen": -0.09745928645133972, "rewards/margins": 0.02504098042845726, "rewards/rejected": -0.12250027060508728, "step": 763 }, { "epoch": 0.4660667988409333, "grad_norm": 2.24306058883667, "learning_rate": 7.280832823025107e-06, "log_odds_chosen": 0.6843213438987732, "log_odds_ratio": -0.4698963761329651, "logits/chosen": -0.7112022638320923, "logits/rejected": -0.7684860825538635, "logps/chosen": -0.8706790208816528, "logps/rejected": -1.3374704122543335, "loss": 1.2828, "nll_loss": 0.9866891503334045, "rewards/accuracies": 0.875, "rewards/chosen": -0.08706790208816528, "rewards/margins": 0.046679142862558365, "rewards/rejected": -0.13374704122543335, "step": 764 }, { "epoch": 0.4666768339179503, "grad_norm": 7.066658973693848, "learning_rate": 7.279853031230863e-06, "log_odds_chosen": 0.12408305704593658, "log_odds_ratio": -0.7146608233451843, "logits/chosen": -1.1130520105361938, "logits/rejected": -0.8496831655502319, "logps/chosen": -1.1262410879135132, "logps/rejected": -1.2398568391799927, "loss": 1.2691, "nll_loss": 1.4558836221694946, "rewards/accuracies": 0.625, "rewards/chosen": -0.11262410879135132, "rewards/margins": 0.011361571028828621, "rewards/rejected": -0.12398567795753479, "step": 765 }, { "epoch": 0.4672868689949672, "grad_norm": 1.6919492483139038, "learning_rate": 7.278873239436619e-06, "log_odds_chosen": 0.30006369948387146, "log_odds_ratio": -0.6013190150260925, "logits/chosen": -0.8201946020126343, "logits/rejected": -0.9235202670097351, "logps/chosen": -0.9687050580978394, "logps/rejected": -1.1466302871704102, "loss": 1.2493, "nll_loss": 1.2611504793167114, "rewards/accuracies": 0.625, "rewards/chosen": -0.09687051177024841, "rewards/margins": 0.017792517319321632, "rewards/rejected": -0.1146630272269249, "step": 766 }, { "epoch": 0.46789690407198414, "grad_norm": 2.9392812252044678, "learning_rate": 7.277893447642376e-06, "log_odds_chosen": 0.126071497797966, "log_odds_ratio": -0.6983342170715332, "logits/chosen": -0.8727251291275024, "logits/rejected": -0.7837415933609009, "logps/chosen": -0.8931760191917419, "logps/rejected": -0.9563271403312683, "loss": 1.2984, "nll_loss": 1.0932888984680176, "rewards/accuracies": 0.625, "rewards/chosen": -0.08931760489940643, "rewards/margins": 0.006315112113952637, "rewards/rejected": -0.09563271701335907, "step": 767 }, { "epoch": 0.46850693914900104, "grad_norm": 2.2851881980895996, "learning_rate": 7.276913655848132e-06, "log_odds_chosen": 0.6102182865142822, "log_odds_ratio": -0.6514741778373718, "logits/chosen": -1.0818023681640625, "logits/rejected": -1.0096355676651, "logps/chosen": -0.8384925127029419, "logps/rejected": -1.1872594356536865, "loss": 1.1192, "nll_loss": 1.3130906820297241, "rewards/accuracies": 0.5, "rewards/chosen": -0.08384925127029419, "rewards/margins": 0.034876689314842224, "rewards/rejected": -0.11872594058513641, "step": 768 }, { "epoch": 0.469116974226018, "grad_norm": 1.975023865699768, "learning_rate": 7.2759338640538875e-06, "log_odds_chosen": 0.48023855686187744, "log_odds_ratio": -0.7188106179237366, "logits/chosen": -1.0923240184783936, "logits/rejected": -0.9510946273803711, "logps/chosen": -1.0521783828735352, "logps/rejected": -1.407654047012329, "loss": 1.3602, "nll_loss": 1.5354636907577515, "rewards/accuracies": 0.375, "rewards/chosen": -0.105217844247818, "rewards/margins": 0.03554756939411163, "rewards/rejected": -0.14076541364192963, "step": 769 }, { "epoch": 0.4697270093030349, "grad_norm": 1.7416237592697144, "learning_rate": 7.2749540722596445e-06, "log_odds_chosen": 1.0707894563674927, "log_odds_ratio": -0.4574257731437683, "logits/chosen": -0.7912166118621826, "logits/rejected": -0.727969229221344, "logps/chosen": -0.6831991672515869, "logps/rejected": -1.381126880645752, "loss": 1.2795, "nll_loss": 1.1362333297729492, "rewards/accuracies": 0.625, "rewards/chosen": -0.06831992417573929, "rewards/margins": 0.06979276984930038, "rewards/rejected": -0.13811269402503967, "step": 770 }, { "epoch": 0.47033704438005186, "grad_norm": 1.6885038614273071, "learning_rate": 7.273974280465401e-06, "log_odds_chosen": 0.7786606550216675, "log_odds_ratio": -0.44962042570114136, "logits/chosen": -0.8099868297576904, "logits/rejected": -0.8790403008460999, "logps/chosen": -0.7645364999771118, "logps/rejected": -1.2474061250686646, "loss": 1.0316, "nll_loss": 0.940853476524353, "rewards/accuracies": 0.875, "rewards/chosen": -0.07645364105701447, "rewards/margins": 0.04828697443008423, "rewards/rejected": -0.12474062293767929, "step": 771 }, { "epoch": 0.47094707945706876, "grad_norm": 3.9821693897247314, "learning_rate": 7.2729944886711576e-06, "log_odds_chosen": 0.8419683575630188, "log_odds_ratio": -0.47333070635795593, "logits/chosen": -0.7825213074684143, "logits/rejected": -0.6633771657943726, "logps/chosen": -0.8068127632141113, "logps/rejected": -1.364649772644043, "loss": 1.0111, "nll_loss": 1.0015660524368286, "rewards/accuracies": 0.75, "rewards/chosen": -0.08068127930164337, "rewards/margins": 0.0557837039232254, "rewards/rejected": -0.13646498322486877, "step": 772 }, { "epoch": 0.4715571145340857, "grad_norm": 2.806572675704956, "learning_rate": 7.272014696876914e-06, "log_odds_chosen": 0.5073726177215576, "log_odds_ratio": -0.504228949546814, "logits/chosen": -1.202415943145752, "logits/rejected": -1.0788019895553589, "logps/chosen": -1.048817753791809, "logps/rejected": -1.3807873725891113, "loss": 1.3773, "nll_loss": 1.1747736930847168, "rewards/accuracies": 0.875, "rewards/chosen": -0.10488178580999374, "rewards/margins": 0.03319695591926575, "rewards/rejected": -0.1380787342786789, "step": 773 }, { "epoch": 0.4721671496111026, "grad_norm": 4.804425239562988, "learning_rate": 7.27103490508267e-06, "log_odds_chosen": -0.03132426366209984, "log_odds_ratio": -0.7348059415817261, "logits/chosen": -1.0014058351516724, "logits/rejected": -0.9311801195144653, "logps/chosen": -1.1148548126220703, "logps/rejected": -1.0804846286773682, "loss": 1.26, "nll_loss": 1.4897526502609253, "rewards/accuracies": 0.375, "rewards/chosen": -0.11148548871278763, "rewards/margins": -0.0034370198845863342, "rewards/rejected": -0.1080484613776207, "step": 774 }, { "epoch": 0.4727771846881196, "grad_norm": 2.3906586170196533, "learning_rate": 7.270055113288426e-06, "log_odds_chosen": 1.0875444412231445, "log_odds_ratio": -0.4042854309082031, "logits/chosen": -0.6962466239929199, "logits/rejected": -0.846840500831604, "logps/chosen": -0.8547390699386597, "logps/rejected": -1.531618356704712, "loss": 1.1427, "nll_loss": 1.0243645906448364, "rewards/accuracies": 0.875, "rewards/chosen": -0.0854739099740982, "rewards/margins": 0.06768792122602463, "rewards/rejected": -0.15316182374954224, "step": 775 }, { "epoch": 0.4733872197651365, "grad_norm": 4.2102580070495605, "learning_rate": 7.269075321494182e-06, "log_odds_chosen": 0.7255704998970032, "log_odds_ratio": -0.5955746173858643, "logits/chosen": -0.7191329002380371, "logits/rejected": -0.6248856782913208, "logps/chosen": -0.8739631175994873, "logps/rejected": -1.3288629055023193, "loss": 1.2257, "nll_loss": 0.9680086374282837, "rewards/accuracies": 0.5, "rewards/chosen": -0.08739631623029709, "rewards/margins": 0.045489974319934845, "rewards/rejected": -0.13288629055023193, "step": 776 }, { "epoch": 0.47399725484215344, "grad_norm": 1.060547947883606, "learning_rate": 7.268095529699938e-06, "log_odds_chosen": 1.0467746257781982, "log_odds_ratio": -0.41353839635849, "logits/chosen": -0.9595692753791809, "logits/rejected": -0.7626914978027344, "logps/chosen": -0.7988385558128357, "logps/rejected": -1.4521901607513428, "loss": 0.9746, "nll_loss": 0.922490119934082, "rewards/accuracies": 0.875, "rewards/chosen": -0.07988385856151581, "rewards/margins": 0.06533516943454742, "rewards/rejected": -0.14521902799606323, "step": 777 }, { "epoch": 0.47460728991917034, "grad_norm": 4.642797946929932, "learning_rate": 7.267115737905695e-06, "log_odds_chosen": -0.49756962060928345, "log_odds_ratio": -1.0121815204620361, "logits/chosen": -1.0697473287582397, "logits/rejected": -0.9799396991729736, "logps/chosen": -1.2968413829803467, "logps/rejected": -0.9673316478729248, "loss": 1.2601, "nll_loss": 1.5449556112289429, "rewards/accuracies": 0.25, "rewards/chosen": -0.12968412041664124, "rewards/margins": -0.03295096009969711, "rewards/rejected": -0.09673316776752472, "step": 778 }, { "epoch": 0.4752173249961873, "grad_norm": 1.483063817024231, "learning_rate": 7.266135946111451e-06, "log_odds_chosen": 1.4515448808670044, "log_odds_ratio": -0.2586013078689575, "logits/chosen": -0.8074650168418884, "logits/rejected": -0.5166575312614441, "logps/chosen": -0.6355549097061157, "logps/rejected": -1.5128464698791504, "loss": 1.0608, "nll_loss": 0.7664292454719543, "rewards/accuracies": 1.0, "rewards/chosen": -0.06355549395084381, "rewards/margins": 0.08772913366556168, "rewards/rejected": -0.15128463506698608, "step": 779 }, { "epoch": 0.4758273600732042, "grad_norm": 4.5504069328308105, "learning_rate": 7.2651561543172065e-06, "log_odds_chosen": 0.8772875666618347, "log_odds_ratio": -0.5028882026672363, "logits/chosen": -0.639068603515625, "logits/rejected": -0.41910532116889954, "logps/chosen": -0.7958323955535889, "logps/rejected": -1.2610746622085571, "loss": 1.3911, "nll_loss": 0.9423705339431763, "rewards/accuracies": 0.75, "rewards/chosen": -0.07958324253559113, "rewards/margins": 0.04652423411607742, "rewards/rejected": -0.12610748410224915, "step": 780 }, { "epoch": 0.47643739515022115, "grad_norm": 1.2648873329162598, "learning_rate": 7.2641763625229635e-06, "log_odds_chosen": 0.28944092988967896, "log_odds_ratio": -0.6110931634902954, "logits/chosen": -0.7189249992370605, "logits/rejected": -0.7241221070289612, "logps/chosen": -0.9064383506774902, "logps/rejected": -1.0345737934112549, "loss": 1.2188, "nll_loss": 1.0812790393829346, "rewards/accuracies": 0.625, "rewards/chosen": -0.09064383804798126, "rewards/margins": 0.012813542038202286, "rewards/rejected": -0.10345738381147385, "step": 781 }, { "epoch": 0.47704743022723806, "grad_norm": 1.556840181350708, "learning_rate": 7.26319657072872e-06, "log_odds_chosen": 0.8976953029632568, "log_odds_ratio": -0.4359619915485382, "logits/chosen": -0.8996593952178955, "logits/rejected": -0.5898626446723938, "logps/chosen": -0.8164240121841431, "logps/rejected": -1.3510693311691284, "loss": 1.0907, "nll_loss": 0.9306791424751282, "rewards/accuracies": 0.875, "rewards/chosen": -0.08164239674806595, "rewards/margins": 0.053464531898498535, "rewards/rejected": -0.1351069211959839, "step": 782 }, { "epoch": 0.477657465304255, "grad_norm": 2.7932097911834717, "learning_rate": 7.262216778934476e-06, "log_odds_chosen": 0.2232477068901062, "log_odds_ratio": -0.6659490466117859, "logits/chosen": -0.8493054509162903, "logits/rejected": -0.6655734777450562, "logps/chosen": -0.9554531574249268, "logps/rejected": -1.059114933013916, "loss": 1.2147, "nll_loss": 1.153796911239624, "rewards/accuracies": 0.625, "rewards/chosen": -0.09554530680179596, "rewards/margins": 0.010366175323724747, "rewards/rejected": -0.1059114933013916, "step": 783 }, { "epoch": 0.4782675003812719, "grad_norm": 1.5347158908843994, "learning_rate": 7.261236987140233e-06, "log_odds_chosen": 0.6878765821456909, "log_odds_ratio": -0.5427079200744629, "logits/chosen": -0.9457268714904785, "logits/rejected": -0.8153207302093506, "logps/chosen": -0.5844366550445557, "logps/rejected": -1.0126018524169922, "loss": 1.0812, "nll_loss": 0.9889187812805176, "rewards/accuracies": 0.75, "rewards/chosen": -0.05844366177916527, "rewards/margins": 0.04281652346253395, "rewards/rejected": -0.10126018524169922, "step": 784 }, { "epoch": 0.47887753545828887, "grad_norm": 1.7734686136245728, "learning_rate": 7.260257195345989e-06, "log_odds_chosen": 0.9544578194618225, "log_odds_ratio": -0.4063669741153717, "logits/chosen": -0.9284070730209351, "logits/rejected": -0.7921819686889648, "logps/chosen": -0.9975785613059998, "logps/rejected": -1.6821717023849487, "loss": 1.2073, "nll_loss": 1.2104177474975586, "rewards/accuracies": 0.875, "rewards/chosen": -0.09975786507129669, "rewards/margins": 0.06845930963754654, "rewards/rejected": -0.16821716725826263, "step": 785 }, { "epoch": 0.4794875705353058, "grad_norm": 1.855125904083252, "learning_rate": 7.259277403551745e-06, "log_odds_chosen": 0.05417283624410629, "log_odds_ratio": -0.7323513031005859, "logits/chosen": -1.068895936012268, "logits/rejected": -0.968110203742981, "logps/chosen": -1.3199822902679443, "logps/rejected": -1.366964340209961, "loss": 1.2126, "nll_loss": 1.44459867477417, "rewards/accuracies": 0.625, "rewards/chosen": -0.1319982409477234, "rewards/margins": 0.004698202013969421, "rewards/rejected": -0.1366964429616928, "step": 786 }, { "epoch": 0.48009760561232273, "grad_norm": 3.6749703884124756, "learning_rate": 7.258297611757501e-06, "log_odds_chosen": 0.7234668731689453, "log_odds_ratio": -0.4994511902332306, "logits/chosen": -0.9078056812286377, "logits/rejected": -0.6890115737915039, "logps/chosen": -0.9242324829101562, "logps/rejected": -1.3672689199447632, "loss": 1.2324, "nll_loss": 1.3837493658065796, "rewards/accuracies": 0.625, "rewards/chosen": -0.09242324531078339, "rewards/margins": 0.04430364817380905, "rewards/rejected": -0.13672688603401184, "step": 787 }, { "epoch": 0.48070764068933963, "grad_norm": 2.365774631500244, "learning_rate": 7.257317819963257e-06, "log_odds_chosen": 0.08890673518180847, "log_odds_ratio": -0.7037118077278137, "logits/chosen": -1.0034974813461304, "logits/rejected": -0.9185813069343567, "logps/chosen": -0.9689663648605347, "logps/rejected": -1.0476808547973633, "loss": 1.2234, "nll_loss": 1.1801097393035889, "rewards/accuracies": 0.625, "rewards/chosen": -0.09689664840698242, "rewards/margins": 0.007871448993682861, "rewards/rejected": -0.10476808995008469, "step": 788 }, { "epoch": 0.4813176757663566, "grad_norm": 4.681107044219971, "learning_rate": 7.256338028169014e-06, "log_odds_chosen": 0.30974507331848145, "log_odds_ratio": -0.7264704704284668, "logits/chosen": -0.7991397380828857, "logits/rejected": -0.909953236579895, "logps/chosen": -0.9348466396331787, "logps/rejected": -1.070995569229126, "loss": 1.1484, "nll_loss": 1.4086493253707886, "rewards/accuracies": 0.5, "rewards/chosen": -0.09348466992378235, "rewards/margins": 0.013614900410175323, "rewards/rejected": -0.10709956288337708, "step": 789 }, { "epoch": 0.4819277108433735, "grad_norm": 5.839620590209961, "learning_rate": 7.25535823637477e-06, "log_odds_chosen": 0.6018407344818115, "log_odds_ratio": -0.6463212966918945, "logits/chosen": -0.9626110792160034, "logits/rejected": -0.8444914817810059, "logps/chosen": -0.9321059584617615, "logps/rejected": -1.4448845386505127, "loss": 1.2197, "nll_loss": 1.449941635131836, "rewards/accuracies": 0.625, "rewards/chosen": -0.0932106003165245, "rewards/margins": 0.051277849823236465, "rewards/rejected": -0.14448845386505127, "step": 790 }, { "epoch": 0.48253774592039045, "grad_norm": 1.2997180223464966, "learning_rate": 7.254378444580526e-06, "log_odds_chosen": 0.7130952477455139, "log_odds_ratio": -0.44526582956314087, "logits/chosen": -0.9191292524337769, "logits/rejected": -0.5550500154495239, "logps/chosen": -0.9300880432128906, "logps/rejected": -1.4200220108032227, "loss": 1.2394, "nll_loss": 1.2114245891571045, "rewards/accuracies": 0.875, "rewards/chosen": -0.09300880134105682, "rewards/margins": 0.04899340495467186, "rewards/rejected": -0.1420021951198578, "step": 791 }, { "epoch": 0.48314778099740735, "grad_norm": 2.439842462539673, "learning_rate": 7.2533986527862825e-06, "log_odds_chosen": 0.02240099012851715, "log_odds_ratio": -0.8937149047851562, "logits/chosen": -0.7347018122673035, "logits/rejected": -0.7691668272018433, "logps/chosen": -0.9493063688278198, "logps/rejected": -0.9321012496948242, "loss": 1.1161, "nll_loss": 1.0607949495315552, "rewards/accuracies": 0.375, "rewards/chosen": -0.09493064135313034, "rewards/margins": -0.0017205122858285904, "rewards/rejected": -0.0932101234793663, "step": 792 }, { "epoch": 0.48375781607442425, "grad_norm": 2.5511927604675293, "learning_rate": 7.252418860992039e-06, "log_odds_chosen": 0.12851275503635406, "log_odds_ratio": -0.7325413227081299, "logits/chosen": -1.0162529945373535, "logits/rejected": -0.9011362791061401, "logps/chosen": -1.0482151508331299, "logps/rejected": -1.0739555358886719, "loss": 1.3064, "nll_loss": 1.3138893842697144, "rewards/accuracies": 0.375, "rewards/chosen": -0.10482152551412582, "rewards/margins": 0.002574038691818714, "rewards/rejected": -0.10739555954933167, "step": 793 }, { "epoch": 0.4843678511514412, "grad_norm": 1.603407621383667, "learning_rate": 7.251439069197795e-06, "log_odds_chosen": 1.0160812139511108, "log_odds_ratio": -0.41815340518951416, "logits/chosen": -0.6988438963890076, "logits/rejected": -0.7557008266448975, "logps/chosen": -0.8827090263366699, "logps/rejected": -1.5553231239318848, "loss": 1.1706, "nll_loss": 1.1726765632629395, "rewards/accuracies": 0.875, "rewards/chosen": -0.08827090263366699, "rewards/margins": 0.06726141273975372, "rewards/rejected": -0.1555323302745819, "step": 794 }, { "epoch": 0.4849778862284581, "grad_norm": 2.5494213104248047, "learning_rate": 7.250459277403552e-06, "log_odds_chosen": -0.08603289723396301, "log_odds_ratio": -0.7802548408508301, "logits/chosen": -1.0387016534805298, "logits/rejected": -0.9503755569458008, "logps/chosen": -0.9516075849533081, "logps/rejected": -0.9388535022735596, "loss": 1.1129, "nll_loss": 1.1811480522155762, "rewards/accuracies": 0.5, "rewards/chosen": -0.09516075998544693, "rewards/margins": -0.0012754080817103386, "rewards/rejected": -0.09388535469770432, "step": 795 }, { "epoch": 0.48558792130547507, "grad_norm": 1.6270803213119507, "learning_rate": 7.249479485609308e-06, "log_odds_chosen": 0.2253769338130951, "log_odds_ratio": -0.6720061302185059, "logits/chosen": -0.881791889667511, "logits/rejected": -0.7205715775489807, "logps/chosen": -0.8142657279968262, "logps/rejected": -0.9643504619598389, "loss": 1.1672, "nll_loss": 1.0631166696548462, "rewards/accuracies": 0.75, "rewards/chosen": -0.08142657577991486, "rewards/margins": 0.015008474700152874, "rewards/rejected": -0.0964350476861, "step": 796 }, { "epoch": 0.48619795638249197, "grad_norm": 2.496614694595337, "learning_rate": 7.248499693815063e-06, "log_odds_chosen": 0.6917389631271362, "log_odds_ratio": -0.5748059749603271, "logits/chosen": -0.9213970899581909, "logits/rejected": -0.8567695021629333, "logps/chosen": -0.878300666809082, "logps/rejected": -1.3917039632797241, "loss": 1.3215, "nll_loss": 1.1778572797775269, "rewards/accuracies": 0.5, "rewards/chosen": -0.0878300741314888, "rewards/margins": 0.05134032666683197, "rewards/rejected": -0.13917040824890137, "step": 797 }, { "epoch": 0.4868079914595089, "grad_norm": 3.190375566482544, "learning_rate": 7.24751990202082e-06, "log_odds_chosen": 0.8072348833084106, "log_odds_ratio": -0.5003077983856201, "logits/chosen": -0.8628700971603394, "logits/rejected": -0.6548390984535217, "logps/chosen": -0.7150049209594727, "logps/rejected": -1.1822305917739868, "loss": 1.1004, "nll_loss": 0.9657073616981506, "rewards/accuracies": 0.75, "rewards/chosen": -0.0715004950761795, "rewards/margins": 0.04672256484627724, "rewards/rejected": -0.11822305619716644, "step": 798 }, { "epoch": 0.48741802653652583, "grad_norm": 1.634811520576477, "learning_rate": 7.246540110226576e-06, "log_odds_chosen": 0.6085306406021118, "log_odds_ratio": -0.4953051209449768, "logits/chosen": -0.8319675922393799, "logits/rejected": -0.5622016191482544, "logps/chosen": -0.9681849479675293, "logps/rejected": -1.3836734294891357, "loss": 1.0682, "nll_loss": 1.0388612747192383, "rewards/accuracies": 0.75, "rewards/chosen": -0.09681850671768188, "rewards/margins": 0.04154884070158005, "rewards/rejected": -0.13836733996868134, "step": 799 }, { "epoch": 0.4880280616135428, "grad_norm": 2.084815502166748, "learning_rate": 7.245560318432333e-06, "log_odds_chosen": 0.42036765813827515, "log_odds_ratio": -0.5882663130760193, "logits/chosen": -0.767720103263855, "logits/rejected": -0.6994869709014893, "logps/chosen": -0.8926767110824585, "logps/rejected": -1.2187727689743042, "loss": 1.0987, "nll_loss": 1.0595262050628662, "rewards/accuracies": 0.625, "rewards/chosen": -0.08926767855882645, "rewards/margins": 0.03260960429906845, "rewards/rejected": -0.1218772828578949, "step": 800 }, { "epoch": 0.4886380966905597, "grad_norm": 1.8389400243759155, "learning_rate": 7.244580526638089e-06, "log_odds_chosen": 0.4693268835544586, "log_odds_ratio": -0.6213112473487854, "logits/chosen": -1.0124317407608032, "logits/rejected": -0.9413136839866638, "logps/chosen": -0.8444666266441345, "logps/rejected": -1.0981898307800293, "loss": 1.1556, "nll_loss": 1.099231481552124, "rewards/accuracies": 0.625, "rewards/chosen": -0.08444665372371674, "rewards/margins": 0.025372322648763657, "rewards/rejected": -0.10981898009777069, "step": 801 }, { "epoch": 0.48924813176757664, "grad_norm": 3.5297653675079346, "learning_rate": 7.2436007348438455e-06, "log_odds_chosen": 0.40776968002319336, "log_odds_ratio": -0.5667057633399963, "logits/chosen": -0.8261697888374329, "logits/rejected": -0.7813771367073059, "logps/chosen": -0.9783549308776855, "logps/rejected": -1.288022756576538, "loss": 1.216, "nll_loss": 1.1851294040679932, "rewards/accuracies": 0.75, "rewards/chosen": -0.0978354960680008, "rewards/margins": 0.030966784805059433, "rewards/rejected": -0.12880228459835052, "step": 802 }, { "epoch": 0.48985816684459355, "grad_norm": 2.641652822494507, "learning_rate": 7.242620943049602e-06, "log_odds_chosen": 2.502847194671631, "log_odds_ratio": -0.17704792320728302, "logits/chosen": -0.4390939474105835, "logits/rejected": -0.6621981859207153, "logps/chosen": -0.519568681716919, "logps/rejected": -2.252044200897217, "loss": 1.17, "nll_loss": 1.115958571434021, "rewards/accuracies": 1.0, "rewards/chosen": -0.051956865936517715, "rewards/margins": 0.1732475459575653, "rewards/rejected": -0.22520440816879272, "step": 803 }, { "epoch": 0.4904682019216105, "grad_norm": 1.9665602445602417, "learning_rate": 7.241641151255358e-06, "log_odds_chosen": 0.7771542072296143, "log_odds_ratio": -0.5195245146751404, "logits/chosen": -0.5612184405326843, "logits/rejected": -0.6434416770935059, "logps/chosen": -0.8397537469863892, "logps/rejected": -1.2101365327835083, "loss": 1.1053, "nll_loss": 1.14451265335083, "rewards/accuracies": 0.75, "rewards/chosen": -0.08397538214921951, "rewards/margins": 0.037038274109363556, "rewards/rejected": -0.12101365625858307, "step": 804 }, { "epoch": 0.4910782369986274, "grad_norm": 2.627150535583496, "learning_rate": 7.240661359461114e-06, "log_odds_chosen": 1.245457410812378, "log_odds_ratio": -0.41804584860801697, "logits/chosen": -0.7211005687713623, "logits/rejected": -0.7050368785858154, "logps/chosen": -0.7512810826301575, "logps/rejected": -1.4887288808822632, "loss": 1.2586, "nll_loss": 0.9018502831459045, "rewards/accuracies": 0.75, "rewards/chosen": -0.07512811571359634, "rewards/margins": 0.0737447738647461, "rewards/rejected": -0.14887288212776184, "step": 805 }, { "epoch": 0.49168827207564436, "grad_norm": 1.9989891052246094, "learning_rate": 7.239681567666871e-06, "log_odds_chosen": 0.9235308170318604, "log_odds_ratio": -0.49754399061203003, "logits/chosen": -0.8886768817901611, "logits/rejected": -0.6138063073158264, "logps/chosen": -0.7142940759658813, "logps/rejected": -1.3636600971221924, "loss": 1.2361, "nll_loss": 1.1874531507492065, "rewards/accuracies": 0.75, "rewards/chosen": -0.07142941653728485, "rewards/margins": 0.06493659317493439, "rewards/rejected": -0.13636600971221924, "step": 806 }, { "epoch": 0.49229830715266126, "grad_norm": 3.786206007003784, "learning_rate": 7.238701775872627e-06, "log_odds_chosen": 1.7052350044250488, "log_odds_ratio": -0.315047949552536, "logits/chosen": -0.8373996615409851, "logits/rejected": -0.813723087310791, "logps/chosen": -0.6297135353088379, "logps/rejected": -1.6043181419372559, "loss": 1.1577, "nll_loss": 1.1178723573684692, "rewards/accuracies": 0.875, "rewards/chosen": -0.06297135353088379, "rewards/margins": 0.09746047109365463, "rewards/rejected": -0.16043183207511902, "step": 807 }, { "epoch": 0.4929083422296782, "grad_norm": 4.016673564910889, "learning_rate": 7.237721984078383e-06, "log_odds_chosen": 0.6711339354515076, "log_odds_ratio": -0.5836407542228699, "logits/chosen": -0.9028897285461426, "logits/rejected": -0.7593562006950378, "logps/chosen": -0.8084317445755005, "logps/rejected": -1.0923570394515991, "loss": 1.3518, "nll_loss": 1.0343564748764038, "rewards/accuracies": 0.625, "rewards/chosen": -0.08084317296743393, "rewards/margins": 0.028392530977725983, "rewards/rejected": -0.10923570394515991, "step": 808 }, { "epoch": 0.4935183773066951, "grad_norm": 1.7525932788848877, "learning_rate": 7.236742192284139e-06, "log_odds_chosen": 0.2277045100927353, "log_odds_ratio": -0.7070988416671753, "logits/chosen": -0.8176113367080688, "logits/rejected": -0.7122135162353516, "logps/chosen": -1.0940155982971191, "logps/rejected": -1.1997309923171997, "loss": 1.3198, "nll_loss": 1.3219640254974365, "rewards/accuracies": 0.625, "rewards/chosen": -0.10940156131982803, "rewards/margins": 0.010571533814072609, "rewards/rejected": -0.1199730932712555, "step": 809 }, { "epoch": 0.4941284123837121, "grad_norm": 1.7483797073364258, "learning_rate": 7.235762400489895e-06, "log_odds_chosen": 0.006495557725429535, "log_odds_ratio": -0.7110786437988281, "logits/chosen": -1.0188407897949219, "logits/rejected": -0.9475727081298828, "logps/chosen": -0.8251781463623047, "logps/rejected": -0.8197903633117676, "loss": 1.1908, "nll_loss": 1.0743625164031982, "rewards/accuracies": 0.5, "rewards/chosen": -0.08251781761646271, "rewards/margins": -0.0005387775599956512, "rewards/rejected": -0.08197903633117676, "step": 810 }, { "epoch": 0.494738447460729, "grad_norm": 2.1370608806610107, "learning_rate": 7.234782608695651e-06, "log_odds_chosen": 0.5952731370925903, "log_odds_ratio": -0.603042483329773, "logits/chosen": -0.7927750945091248, "logits/rejected": -0.5691177845001221, "logps/chosen": -0.8668727874755859, "logps/rejected": -1.2567546367645264, "loss": 1.0579, "nll_loss": 0.9995131492614746, "rewards/accuracies": 0.5, "rewards/chosen": -0.08668728172779083, "rewards/margins": 0.03898818790912628, "rewards/rejected": -0.12567546963691711, "step": 811 }, { "epoch": 0.49534848253774594, "grad_norm": 1.654668927192688, "learning_rate": 7.233802816901408e-06, "log_odds_chosen": 0.5269942283630371, "log_odds_ratio": -0.822985053062439, "logits/chosen": -0.981127142906189, "logits/rejected": -0.8784830570220947, "logps/chosen": -0.8692724108695984, "logps/rejected": -1.097283959388733, "loss": 1.0917, "nll_loss": 1.2571675777435303, "rewards/accuracies": 0.625, "rewards/chosen": -0.08692724257707596, "rewards/margins": 0.02280115708708763, "rewards/rejected": -0.10972839593887329, "step": 812 }, { "epoch": 0.49595851761476284, "grad_norm": 9.357263565063477, "learning_rate": 7.2328230251071645e-06, "log_odds_chosen": 1.0779916048049927, "log_odds_ratio": -0.3634085953235626, "logits/chosen": -0.9469009637832642, "logits/rejected": -0.7057602405548096, "logps/chosen": -0.9551061391830444, "logps/rejected": -1.66879403591156, "loss": 1.2987, "nll_loss": 1.2896924018859863, "rewards/accuracies": 1.0, "rewards/chosen": -0.09551061689853668, "rewards/margins": 0.07136879861354828, "rewards/rejected": -0.16687941551208496, "step": 813 }, { "epoch": 0.4965685526917798, "grad_norm": 1.1999704837799072, "learning_rate": 7.231843233312921e-06, "log_odds_chosen": -0.04346969723701477, "log_odds_ratio": -0.845470666885376, "logits/chosen": -0.8097286224365234, "logits/rejected": -0.7618380784988403, "logps/chosen": -0.9222749471664429, "logps/rejected": -0.8774998188018799, "loss": 1.0122, "nll_loss": 1.0233304500579834, "rewards/accuracies": 0.375, "rewards/chosen": -0.092227503657341, "rewards/margins": -0.004477512091398239, "rewards/rejected": -0.08774998784065247, "step": 814 }, { "epoch": 0.4971785877687967, "grad_norm": 2.192497491836548, "learning_rate": 7.230863441518677e-06, "log_odds_chosen": 0.8564324378967285, "log_odds_ratio": -0.4268239438533783, "logits/chosen": -0.6872550249099731, "logits/rejected": -0.6210874915122986, "logps/chosen": -0.6791593432426453, "logps/rejected": -1.2083914279937744, "loss": 0.9628, "nll_loss": 0.8126208186149597, "rewards/accuracies": 0.875, "rewards/chosen": -0.06791593134403229, "rewards/margins": 0.05292321741580963, "rewards/rejected": -0.12083915621042252, "step": 815 }, { "epoch": 0.49778862284581366, "grad_norm": 5.995279312133789, "learning_rate": 7.229883649724433e-06, "log_odds_chosen": 0.7826809883117676, "log_odds_ratio": -0.5892957448959351, "logits/chosen": -0.8665992617607117, "logits/rejected": -0.6898818016052246, "logps/chosen": -0.9785937070846558, "logps/rejected": -1.50315523147583, "loss": 1.1339, "nll_loss": 1.153578281402588, "rewards/accuracies": 0.5, "rewards/chosen": -0.09785936772823334, "rewards/margins": 0.05245615541934967, "rewards/rejected": -0.150315523147583, "step": 816 }, { "epoch": 0.49839865792283056, "grad_norm": 3.632349729537964, "learning_rate": 7.22890385793019e-06, "log_odds_chosen": 0.7466500997543335, "log_odds_ratio": -0.5129667520523071, "logits/chosen": -0.9264311790466309, "logits/rejected": -0.7957487106323242, "logps/chosen": -0.8429675102233887, "logps/rejected": -1.3485698699951172, "loss": 1.3275, "nll_loss": 1.0113296508789062, "rewards/accuracies": 0.625, "rewards/chosen": -0.08429676294326782, "rewards/margins": 0.05056023597717285, "rewards/rejected": -0.13485698401927948, "step": 817 }, { "epoch": 0.4990086929998475, "grad_norm": 3.538708209991455, "learning_rate": 7.227924066135946e-06, "log_odds_chosen": 0.9131835103034973, "log_odds_ratio": -0.4417022466659546, "logits/chosen": -0.8689677119255066, "logits/rejected": -0.791393518447876, "logps/chosen": -1.0059109926223755, "logps/rejected": -1.6496310234069824, "loss": 1.2043, "nll_loss": 1.2962067127227783, "rewards/accuracies": 0.625, "rewards/chosen": -0.10059110820293427, "rewards/margins": 0.0643720030784607, "rewards/rejected": -0.16496309638023376, "step": 818 }, { "epoch": 0.4996187280768644, "grad_norm": 2.054060935974121, "learning_rate": 7.226944274341702e-06, "log_odds_chosen": 0.7697902321815491, "log_odds_ratio": -0.60392826795578, "logits/chosen": -0.7017871737480164, "logits/rejected": -0.7794930338859558, "logps/chosen": -0.7790006399154663, "logps/rejected": -1.1720960140228271, "loss": 1.2644, "nll_loss": 1.2723757028579712, "rewards/accuracies": 0.625, "rewards/chosen": -0.07790006697177887, "rewards/margins": 0.039309535175561905, "rewards/rejected": -0.11720959842205048, "step": 819 }, { "epoch": 0.5002287631538813, "grad_norm": 3.3417694568634033, "learning_rate": 7.225964482547458e-06, "log_odds_chosen": 1.5242910385131836, "log_odds_ratio": -0.29974526166915894, "logits/chosen": -0.7484899759292603, "logits/rejected": -0.71079421043396, "logps/chosen": -0.6196498870849609, "logps/rejected": -1.595646619796753, "loss": 1.0474, "nll_loss": 0.745677649974823, "rewards/accuracies": 1.0, "rewards/chosen": -0.06196499243378639, "rewards/margins": 0.09759967029094696, "rewards/rejected": -0.15956465899944305, "step": 820 }, { "epoch": 0.5008387982308983, "grad_norm": 5.368313789367676, "learning_rate": 7.224984690753214e-06, "log_odds_chosen": 0.18166683614253998, "log_odds_ratio": -0.6335068941116333, "logits/chosen": -0.9326682090759277, "logits/rejected": -0.8595117330551147, "logps/chosen": -0.9227672219276428, "logps/rejected": -1.0189403295516968, "loss": 1.0656, "nll_loss": 0.9868752956390381, "rewards/accuracies": 0.75, "rewards/chosen": -0.09227671474218369, "rewards/margins": 0.009617323987185955, "rewards/rejected": -0.10189403593540192, "step": 821 }, { "epoch": 0.5014488333079152, "grad_norm": 13.70531940460205, "learning_rate": 7.2240048989589704e-06, "log_odds_chosen": 0.13763931393623352, "log_odds_ratio": -0.6902065873146057, "logits/chosen": -0.9361180067062378, "logits/rejected": -0.7273162603378296, "logps/chosen": -0.7814087867736816, "logps/rejected": -0.8049325942993164, "loss": 1.2336, "nll_loss": 1.140970230102539, "rewards/accuracies": 0.5, "rewards/chosen": -0.07814087718725204, "rewards/margins": 0.002352392300963402, "rewards/rejected": -0.08049326390028, "step": 822 }, { "epoch": 0.5020588683849322, "grad_norm": 2.9539918899536133, "learning_rate": 7.223025107164727e-06, "log_odds_chosen": 0.8960765600204468, "log_odds_ratio": -0.4589473605155945, "logits/chosen": -0.9039955139160156, "logits/rejected": -0.7655103206634521, "logps/chosen": -0.6523306369781494, "logps/rejected": -1.08062744140625, "loss": 1.1574, "nll_loss": 1.0688433647155762, "rewards/accuracies": 0.875, "rewards/chosen": -0.06523305922746658, "rewards/margins": 0.04282967746257782, "rewards/rejected": -0.1080627366900444, "step": 823 }, { "epoch": 0.502668903461949, "grad_norm": 1.400601863861084, "learning_rate": 7.2220453153704835e-06, "log_odds_chosen": 0.6844586730003357, "log_odds_ratio": -0.4550301134586334, "logits/chosen": -1.0830799341201782, "logits/rejected": -0.8166736364364624, "logps/chosen": -0.9125875234603882, "logps/rejected": -1.3209487199783325, "loss": 1.1381, "nll_loss": 1.4481536149978638, "rewards/accuracies": 0.75, "rewards/chosen": -0.09125875681638718, "rewards/margins": 0.04083612561225891, "rewards/rejected": -0.1320948749780655, "step": 824 }, { "epoch": 0.503278938538966, "grad_norm": 1.835517168045044, "learning_rate": 7.2210655235762405e-06, "log_odds_chosen": 0.5724440217018127, "log_odds_ratio": -0.5193227529525757, "logits/chosen": -0.933672308921814, "logits/rejected": -0.5928691625595093, "logps/chosen": -0.7113375067710876, "logps/rejected": -1.015336513519287, "loss": 1.0891, "nll_loss": 0.8557981848716736, "rewards/accuracies": 0.875, "rewards/chosen": -0.07113374769687653, "rewards/margins": 0.030399901792407036, "rewards/rejected": -0.10153365135192871, "step": 825 }, { "epoch": 0.503888973615983, "grad_norm": 1.762446403503418, "learning_rate": 7.220085731781996e-06, "log_odds_chosen": 0.09409511089324951, "log_odds_ratio": -0.7446218729019165, "logits/chosen": -1.083295226097107, "logits/rejected": -0.8602654933929443, "logps/chosen": -0.97372967004776, "logps/rejected": -1.0014153718948364, "loss": 1.1398, "nll_loss": 1.2790167331695557, "rewards/accuracies": 0.5, "rewards/chosen": -0.09737296402454376, "rewards/margins": 0.0027685808017849922, "rewards/rejected": -0.10014154016971588, "step": 826 }, { "epoch": 0.5044990086929998, "grad_norm": 1.3085089921951294, "learning_rate": 7.219105939987752e-06, "log_odds_chosen": 0.23176436126232147, "log_odds_ratio": -0.7498733997344971, "logits/chosen": -0.9083768725395203, "logits/rejected": -0.7974525690078735, "logps/chosen": -0.9597131609916687, "logps/rejected": -1.0994911193847656, "loss": 1.3439, "nll_loss": 1.057464599609375, "rewards/accuracies": 0.625, "rewards/chosen": -0.09597132354974747, "rewards/margins": 0.013977780938148499, "rewards/rejected": -0.10994911193847656, "step": 827 }, { "epoch": 0.5051090437700168, "grad_norm": 1.4038920402526855, "learning_rate": 7.218126148193509e-06, "log_odds_chosen": 0.32768890261650085, "log_odds_ratio": -0.6184827089309692, "logits/chosen": -0.9464152455329895, "logits/rejected": -0.7704837322235107, "logps/chosen": -0.9842202663421631, "logps/rejected": -1.216641902923584, "loss": 1.1984, "nll_loss": 1.2230225801467896, "rewards/accuracies": 0.75, "rewards/chosen": -0.09842202812433243, "rewards/margins": 0.0232421662658453, "rewards/rejected": -0.12166419625282288, "step": 828 }, { "epoch": 0.5057190788470337, "grad_norm": 1.5759371519088745, "learning_rate": 7.217146356399265e-06, "log_odds_chosen": 0.7724486589431763, "log_odds_ratio": -0.6538493633270264, "logits/chosen": -1.064894676208496, "logits/rejected": -0.696073055267334, "logps/chosen": -0.9610277414321899, "logps/rejected": -1.5136113166809082, "loss": 1.2824, "nll_loss": 1.2614853382110596, "rewards/accuracies": 0.375, "rewards/chosen": -0.09610278159379959, "rewards/margins": 0.05525835603475571, "rewards/rejected": -0.1513611376285553, "step": 829 }, { "epoch": 0.5063291139240507, "grad_norm": 2.1555724143981934, "learning_rate": 7.216166564605021e-06, "log_odds_chosen": 0.40209293365478516, "log_odds_ratio": -0.6181955933570862, "logits/chosen": -1.2296733856201172, "logits/rejected": -1.1211704015731812, "logps/chosen": -1.0167064666748047, "logps/rejected": -1.3232970237731934, "loss": 1.3862, "nll_loss": 1.383947730064392, "rewards/accuracies": 0.625, "rewards/chosen": -0.10167065262794495, "rewards/margins": 0.030659042298793793, "rewards/rejected": -0.13232970237731934, "step": 830 }, { "epoch": 0.5069391490010675, "grad_norm": 1.644810438156128, "learning_rate": 7.215186772810778e-06, "log_odds_chosen": 0.6274082660675049, "log_odds_ratio": -0.587634801864624, "logits/chosen": -0.691110372543335, "logits/rejected": -0.6135948896408081, "logps/chosen": -0.7817283868789673, "logps/rejected": -1.0720220804214478, "loss": 1.0777, "nll_loss": 0.9681916236877441, "rewards/accuracies": 0.625, "rewards/chosen": -0.07817283272743225, "rewards/margins": 0.029029373079538345, "rewards/rejected": -0.10720221698284149, "step": 831 }, { "epoch": 0.5075491840780845, "grad_norm": 1.4779596328735352, "learning_rate": 7.214206981016533e-06, "log_odds_chosen": 0.5701297521591187, "log_odds_ratio": -0.4839121401309967, "logits/chosen": -0.7411104440689087, "logits/rejected": -0.6360064744949341, "logps/chosen": -0.8434473276138306, "logps/rejected": -1.2456625699996948, "loss": 1.1528, "nll_loss": 1.233065128326416, "rewards/accuracies": 0.75, "rewards/chosen": -0.08434473723173141, "rewards/margins": 0.04022151231765747, "rewards/rejected": -0.12456625699996948, "step": 832 }, { "epoch": 0.5081592191551014, "grad_norm": 0.9558501839637756, "learning_rate": 7.2132271892222895e-06, "log_odds_chosen": 0.8876506686210632, "log_odds_ratio": -0.512181282043457, "logits/chosen": -0.8936072587966919, "logits/rejected": -0.7377627491950989, "logps/chosen": -0.8451572060585022, "logps/rejected": -1.4444180727005005, "loss": 1.1694, "nll_loss": 0.9812623858451843, "rewards/accuracies": 0.75, "rewards/chosen": -0.08451572060585022, "rewards/margins": 0.059926100075244904, "rewards/rejected": -0.14444181323051453, "step": 833 }, { "epoch": 0.5087692542321184, "grad_norm": 1.4151691198349, "learning_rate": 7.2122473974280465e-06, "log_odds_chosen": 0.9980777502059937, "log_odds_ratio": -0.45649272203445435, "logits/chosen": -0.8730754852294922, "logits/rejected": -0.774585485458374, "logps/chosen": -0.774637758731842, "logps/rejected": -1.4156497716903687, "loss": 1.2237, "nll_loss": 1.1429837942123413, "rewards/accuracies": 0.75, "rewards/chosen": -0.0774637758731842, "rewards/margins": 0.0641011968255043, "rewards/rejected": -0.1415649801492691, "step": 834 }, { "epoch": 0.5093792893091352, "grad_norm": 1.3581949472427368, "learning_rate": 7.2112676056338026e-06, "log_odds_chosen": 0.9599758982658386, "log_odds_ratio": -0.43304312229156494, "logits/chosen": -0.7183716297149658, "logits/rejected": -0.5949857831001282, "logps/chosen": -0.6560182571411133, "logps/rejected": -1.1593799591064453, "loss": 1.0511, "nll_loss": 0.9152950048446655, "rewards/accuracies": 0.75, "rewards/chosen": -0.06560182571411133, "rewards/margins": 0.05033617466688156, "rewards/rejected": -0.11593800038099289, "step": 835 }, { "epoch": 0.5099893243861522, "grad_norm": 1.282423496246338, "learning_rate": 7.210287813839559e-06, "log_odds_chosen": 0.5381938815116882, "log_odds_ratio": -0.6142145991325378, "logits/chosen": -0.7666431665420532, "logits/rejected": -0.7358349561691284, "logps/chosen": -0.782464861869812, "logps/rejected": -1.1001083850860596, "loss": 1.2373, "nll_loss": 1.155851125717163, "rewards/accuracies": 0.5, "rewards/chosen": -0.07824648916721344, "rewards/margins": 0.031764350831508636, "rewards/rejected": -0.11001083254814148, "step": 836 }, { "epoch": 0.5105993594631691, "grad_norm": 1.7101666927337646, "learning_rate": 7.209308022045315e-06, "log_odds_chosen": 1.4240758419036865, "log_odds_ratio": -0.2668229937553406, "logits/chosen": -0.7616926431655884, "logits/rejected": -0.6152201890945435, "logps/chosen": -0.6522952318191528, "logps/rejected": -1.5466947555541992, "loss": 1.0891, "nll_loss": 0.8732373714447021, "rewards/accuracies": 0.875, "rewards/chosen": -0.06522952020168304, "rewards/margins": 0.08943995833396912, "rewards/rejected": -0.15466947853565216, "step": 837 }, { "epoch": 0.5112093945401861, "grad_norm": 1.682380199432373, "learning_rate": 7.208328230251071e-06, "log_odds_chosen": 0.04528428614139557, "log_odds_ratio": -0.7388909459114075, "logits/chosen": -0.7651436924934387, "logits/rejected": -0.7231766581535339, "logps/chosen": -0.6899538636207581, "logps/rejected": -0.674663782119751, "loss": 1.0647, "nll_loss": 0.8313131928443909, "rewards/accuracies": 0.5, "rewards/chosen": -0.06899537891149521, "rewards/margins": -0.0015289974398911, "rewards/rejected": -0.0674663856625557, "step": 838 }, { "epoch": 0.511819429617203, "grad_norm": 5.947202682495117, "learning_rate": 7.207348438456828e-06, "log_odds_chosen": 0.7954505085945129, "log_odds_ratio": -0.5070545077323914, "logits/chosen": -0.9429283142089844, "logits/rejected": -0.7646000385284424, "logps/chosen": -0.8097546696662903, "logps/rejected": -1.3631199598312378, "loss": 1.2523, "nll_loss": 1.3880351781845093, "rewards/accuracies": 0.625, "rewards/chosen": -0.08097546547651291, "rewards/margins": 0.05533653497695923, "rewards/rejected": -0.13631199300289154, "step": 839 }, { "epoch": 0.5124294646942199, "grad_norm": 1.021550178527832, "learning_rate": 7.206368646662584e-06, "log_odds_chosen": 0.8509485125541687, "log_odds_ratio": -0.47600236535072327, "logits/chosen": -0.9725985527038574, "logits/rejected": -0.8583354949951172, "logps/chosen": -0.7218800187110901, "logps/rejected": -1.2065200805664062, "loss": 1.0231, "nll_loss": 1.001857042312622, "rewards/accuracies": 0.75, "rewards/chosen": -0.07218800485134125, "rewards/margins": 0.048464007675647736, "rewards/rejected": -0.12065200507640839, "step": 840 }, { "epoch": 0.5130394997712369, "grad_norm": 3.3746237754821777, "learning_rate": 7.20538885486834e-06, "log_odds_chosen": 0.236036479473114, "log_odds_ratio": -0.659974217414856, "logits/chosen": -0.6194189786911011, "logits/rejected": -0.6184456944465637, "logps/chosen": -0.8681596517562866, "logps/rejected": -1.0571258068084717, "loss": 1.194, "nll_loss": 0.9680041074752808, "rewards/accuracies": 0.5, "rewards/chosen": -0.0868159681558609, "rewards/margins": 0.01889660768210888, "rewards/rejected": -0.10571257770061493, "step": 841 }, { "epoch": 0.5136495348482538, "grad_norm": 1.625806450843811, "learning_rate": 7.204409063074097e-06, "log_odds_chosen": 0.566459596157074, "log_odds_ratio": -0.6569668054580688, "logits/chosen": -0.7174984216690063, "logits/rejected": -0.6468517184257507, "logps/chosen": -0.9719457030296326, "logps/rejected": -1.4220633506774902, "loss": 1.1018, "nll_loss": 1.0481575727462769, "rewards/accuracies": 0.625, "rewards/chosen": -0.09719456732273102, "rewards/margins": 0.045011766254901886, "rewards/rejected": -0.1422063410282135, "step": 842 }, { "epoch": 0.5142595699252707, "grad_norm": 5.043074607849121, "learning_rate": 7.203429271279852e-06, "log_odds_chosen": -0.09882812201976776, "log_odds_ratio": -0.8225772976875305, "logits/chosen": -0.9762797355651855, "logits/rejected": -0.8314245939254761, "logps/chosen": -1.0839049816131592, "logps/rejected": -1.0094088315963745, "loss": 1.3166, "nll_loss": 1.2254157066345215, "rewards/accuracies": 0.375, "rewards/chosen": -0.10839050263166428, "rewards/margins": -0.007449619472026825, "rewards/rejected": -0.10094089061021805, "step": 843 }, { "epoch": 0.5148696050022876, "grad_norm": 8.037117004394531, "learning_rate": 7.2024494794856085e-06, "log_odds_chosen": 0.7298761010169983, "log_odds_ratio": -0.5066822171211243, "logits/chosen": -1.1193803548812866, "logits/rejected": -0.9717879295349121, "logps/chosen": -0.9133215546607971, "logps/rejected": -1.383897066116333, "loss": 1.1123, "nll_loss": 1.3610436916351318, "rewards/accuracies": 0.625, "rewards/chosen": -0.09133215248584747, "rewards/margins": 0.047057539224624634, "rewards/rejected": -0.1383896917104721, "step": 844 }, { "epoch": 0.5154796400793046, "grad_norm": 1.7931442260742188, "learning_rate": 7.2014696876913655e-06, "log_odds_chosen": 0.4779090881347656, "log_odds_ratio": -0.6498427391052246, "logits/chosen": -0.8963353037834167, "logits/rejected": -0.6268539428710938, "logps/chosen": -0.9699177742004395, "logps/rejected": -1.2478594779968262, "loss": 1.1432, "nll_loss": 1.0452888011932373, "rewards/accuracies": 0.625, "rewards/chosen": -0.09699177742004395, "rewards/margins": 0.02779417112469673, "rewards/rejected": -0.12478594481945038, "step": 845 }, { "epoch": 0.5160896751563215, "grad_norm": 1.3546940088272095, "learning_rate": 7.200489895897122e-06, "log_odds_chosen": 0.7915182709693909, "log_odds_ratio": -0.46180152893066406, "logits/chosen": -1.1043879985809326, "logits/rejected": -0.9147236347198486, "logps/chosen": -0.8169784545898438, "logps/rejected": -1.3022656440734863, "loss": 1.2117, "nll_loss": 1.2349872589111328, "rewards/accuracies": 0.75, "rewards/chosen": -0.08169785141944885, "rewards/margins": 0.048528727144002914, "rewards/rejected": -0.13022658228874207, "step": 846 }, { "epoch": 0.5166997102333384, "grad_norm": 1.525189995765686, "learning_rate": 7.199510104102878e-06, "log_odds_chosen": 0.7372106313705444, "log_odds_ratio": -0.4255847632884979, "logits/chosen": -0.9248067140579224, "logits/rejected": -0.8624151945114136, "logps/chosen": -0.6162773370742798, "logps/rejected": -0.9587867856025696, "loss": 1.1011, "nll_loss": 1.2068684101104736, "rewards/accuracies": 0.875, "rewards/chosen": -0.06162773445248604, "rewards/margins": 0.03425094485282898, "rewards/rejected": -0.09587868303060532, "step": 847 }, { "epoch": 0.5173097453103553, "grad_norm": 2.8297767639160156, "learning_rate": 7.198530312308635e-06, "log_odds_chosen": 0.806063175201416, "log_odds_ratio": -0.4588016867637634, "logits/chosen": -0.9075120687484741, "logits/rejected": -0.8810781240463257, "logps/chosen": -0.9612340331077576, "logps/rejected": -1.516721487045288, "loss": 1.1579, "nll_loss": 1.0632814168930054, "rewards/accuracies": 0.75, "rewards/chosen": -0.09612340480089188, "rewards/margins": 0.055548761039972305, "rewards/rejected": -0.1516721546649933, "step": 848 }, { "epoch": 0.5179197803873723, "grad_norm": 1.6391624212265015, "learning_rate": 7.19755052051439e-06, "log_odds_chosen": 0.6216596961021423, "log_odds_ratio": -0.46683940291404724, "logits/chosen": -0.7885246872901917, "logits/rejected": -0.7805271148681641, "logps/chosen": -0.8520113825798035, "logps/rejected": -1.23163902759552, "loss": 1.1804, "nll_loss": 0.9482958912849426, "rewards/accuracies": 0.875, "rewards/chosen": -0.08520113676786423, "rewards/margins": 0.037962768226861954, "rewards/rejected": -0.12316390872001648, "step": 849 }, { "epoch": 0.5185298154643893, "grad_norm": 1.5619078874588013, "learning_rate": 7.196570728720146e-06, "log_odds_chosen": 0.611300528049469, "log_odds_ratio": -0.6496466398239136, "logits/chosen": -1.0387996435165405, "logits/rejected": -0.8873798251152039, "logps/chosen": -0.9772489070892334, "logps/rejected": -1.3786275386810303, "loss": 1.0974, "nll_loss": 1.2306103706359863, "rewards/accuracies": 0.5, "rewards/chosen": -0.09772489219903946, "rewards/margins": 0.04013785719871521, "rewards/rejected": -0.13786274194717407, "step": 850 }, { "epoch": 0.5191398505414061, "grad_norm": 1.3472901582717896, "learning_rate": 7.195590936925903e-06, "log_odds_chosen": 0.5604349970817566, "log_odds_ratio": -0.6967858076095581, "logits/chosen": -0.8537076115608215, "logits/rejected": -0.937180757522583, "logps/chosen": -0.5686004161834717, "logps/rejected": -1.0230482816696167, "loss": 1.1107, "nll_loss": 1.045473575592041, "rewards/accuracies": 0.625, "rewards/chosen": -0.05686003714799881, "rewards/margins": 0.0454447865486145, "rewards/rejected": -0.10230482369661331, "step": 851 }, { "epoch": 0.5197498856184231, "grad_norm": 1.8330790996551514, "learning_rate": 7.194611145131659e-06, "log_odds_chosen": -0.06335393339395523, "log_odds_ratio": -0.9010491371154785, "logits/chosen": -1.2251304388046265, "logits/rejected": -1.0396705865859985, "logps/chosen": -1.220636248588562, "logps/rejected": -1.201662302017212, "loss": 1.3344, "nll_loss": 1.4047812223434448, "rewards/accuracies": 0.375, "rewards/chosen": -0.12206362932920456, "rewards/margins": -0.0018973895348608494, "rewards/rejected": -0.12016623467206955, "step": 852 }, { "epoch": 0.52035992069544, "grad_norm": 1.6835827827453613, "learning_rate": 7.193631353337416e-06, "log_odds_chosen": 1.1224033832550049, "log_odds_ratio": -0.4625225067138672, "logits/chosen": -1.0206317901611328, "logits/rejected": -0.9890462160110474, "logps/chosen": -0.736081600189209, "logps/rejected": -1.5012882947921753, "loss": 1.2663, "nll_loss": 1.3602643013000488, "rewards/accuracies": 0.625, "rewards/chosen": -0.0736081600189209, "rewards/margins": 0.0765206590294838, "rewards/rejected": -0.1501288264989853, "step": 853 }, { "epoch": 0.5209699557724569, "grad_norm": 1.7891921997070312, "learning_rate": 7.1926515615431714e-06, "log_odds_chosen": -0.4413975477218628, "log_odds_ratio": -0.9866681694984436, "logits/chosen": -1.206330418586731, "logits/rejected": -1.1503018140792847, "logps/chosen": -1.1924169063568115, "logps/rejected": -0.8595751523971558, "loss": 1.2566, "nll_loss": 1.3117133378982544, "rewards/accuracies": 0.25, "rewards/chosen": -0.11924168467521667, "rewards/margins": -0.033284179866313934, "rewards/rejected": -0.08595751225948334, "step": 854 }, { "epoch": 0.5215799908494738, "grad_norm": 1.3994245529174805, "learning_rate": 7.1916717697489276e-06, "log_odds_chosen": -0.06759227812290192, "log_odds_ratio": -0.8312821984291077, "logits/chosen": -0.9150052070617676, "logits/rejected": -0.7846472859382629, "logps/chosen": -0.9978164434432983, "logps/rejected": -0.917375922203064, "loss": 1.1195, "nll_loss": 1.065456509590149, "rewards/accuracies": 0.5, "rewards/chosen": -0.09978163987398148, "rewards/margins": -0.008044049143791199, "rewards/rejected": -0.09173759818077087, "step": 855 }, { "epoch": 0.5221900259264908, "grad_norm": 1.2113724946975708, "learning_rate": 7.1906919779546845e-06, "log_odds_chosen": 0.7668556571006775, "log_odds_ratio": -0.44203490018844604, "logits/chosen": -0.9979684352874756, "logits/rejected": -0.582779586315155, "logps/chosen": -0.8440250158309937, "logps/rejected": -1.3028699159622192, "loss": 1.1755, "nll_loss": 1.1414169073104858, "rewards/accuracies": 0.875, "rewards/chosen": -0.08440250158309937, "rewards/margins": 0.04588449001312256, "rewards/rejected": -0.13028699159622192, "step": 856 }, { "epoch": 0.5228000610035077, "grad_norm": 6.447467803955078, "learning_rate": 7.189712186160441e-06, "log_odds_chosen": 0.8913160562515259, "log_odds_ratio": -0.4495653510093689, "logits/chosen": -0.7789832949638367, "logits/rejected": -0.7466177940368652, "logps/chosen": -0.784139096736908, "logps/rejected": -1.346867322921753, "loss": 1.2267, "nll_loss": 1.1111524105072021, "rewards/accuracies": 0.75, "rewards/chosen": -0.07841390371322632, "rewards/margins": 0.05627281591296196, "rewards/rejected": -0.13468672335147858, "step": 857 }, { "epoch": 0.5234100960805246, "grad_norm": 1.8090300559997559, "learning_rate": 7.188732394366197e-06, "log_odds_chosen": 0.11746634542942047, "log_odds_ratio": -0.7382618188858032, "logits/chosen": -0.8750433921813965, "logits/rejected": -0.8378809094429016, "logps/chosen": -0.9857749938964844, "logps/rejected": -1.0444917678833008, "loss": 1.1787, "nll_loss": 1.1760084629058838, "rewards/accuracies": 0.5, "rewards/chosen": -0.09857749938964844, "rewards/margins": 0.0058716703206300735, "rewards/rejected": -0.10444916784763336, "step": 858 }, { "epoch": 0.5240201311575415, "grad_norm": 2.1184234619140625, "learning_rate": 7.187752602571954e-06, "log_odds_chosen": 0.2494490146636963, "log_odds_ratio": -0.7758393883705139, "logits/chosen": -0.7915328741073608, "logits/rejected": -0.5154006481170654, "logps/chosen": -0.943652331829071, "logps/rejected": -1.1449896097183228, "loss": 1.0596, "nll_loss": 0.9982885122299194, "rewards/accuracies": 0.625, "rewards/chosen": -0.09436523169279099, "rewards/margins": 0.02013373002409935, "rewards/rejected": -0.11449896544218063, "step": 859 }, { "epoch": 0.5246301662345585, "grad_norm": 2.3001883029937744, "learning_rate": 7.186772810777709e-06, "log_odds_chosen": 0.7332541942596436, "log_odds_ratio": -0.43435561656951904, "logits/chosen": -0.9613431692123413, "logits/rejected": -0.813274085521698, "logps/chosen": -0.6913222670555115, "logps/rejected": -1.0461970567703247, "loss": 1.0336, "nll_loss": 1.1776142120361328, "rewards/accuracies": 1.0, "rewards/chosen": -0.06913222372531891, "rewards/margins": 0.03548748046159744, "rewards/rejected": -0.10461971163749695, "step": 860 }, { "epoch": 0.5252402013115754, "grad_norm": 1.4706180095672607, "learning_rate": 7.185793018983465e-06, "log_odds_chosen": 0.6925495862960815, "log_odds_ratio": -0.47731930017471313, "logits/chosen": -0.6620104908943176, "logits/rejected": -0.6682676076889038, "logps/chosen": -0.8355335593223572, "logps/rejected": -1.266715407371521, "loss": 1.1904, "nll_loss": 0.9941850900650024, "rewards/accuracies": 0.625, "rewards/chosen": -0.08355335891246796, "rewards/margins": 0.0431181862950325, "rewards/rejected": -0.12667155265808105, "step": 861 }, { "epoch": 0.5258502363885923, "grad_norm": 2.782231330871582, "learning_rate": 7.184813227189222e-06, "log_odds_chosen": 0.23645952343940735, "log_odds_ratio": -0.742387056350708, "logits/chosen": -0.9085226655006409, "logits/rejected": -0.9046564102172852, "logps/chosen": -0.9565190076828003, "logps/rejected": -1.0500961542129517, "loss": 0.9777, "nll_loss": 0.9348454475402832, "rewards/accuracies": 0.5, "rewards/chosen": -0.09565189480781555, "rewards/margins": 0.009357723407447338, "rewards/rejected": -0.10500961542129517, "step": 862 }, { "epoch": 0.5264602714656093, "grad_norm": 2.0403687953948975, "learning_rate": 7.183833435394978e-06, "log_odds_chosen": 0.6176193356513977, "log_odds_ratio": -0.6061481237411499, "logits/chosen": -0.9460746049880981, "logits/rejected": -0.7912473082542419, "logps/chosen": -0.9639811515808105, "logps/rejected": -1.3536272048950195, "loss": 1.0473, "nll_loss": 1.0880476236343384, "rewards/accuracies": 0.625, "rewards/chosen": -0.09639812260866165, "rewards/margins": 0.03896459564566612, "rewards/rejected": -0.13536271452903748, "step": 863 }, { "epoch": 0.5270703065426262, "grad_norm": 2.392625093460083, "learning_rate": 7.182853643600734e-06, "log_odds_chosen": 0.6579153537750244, "log_odds_ratio": -0.49175888299942017, "logits/chosen": -0.7456815242767334, "logits/rejected": -0.7802222371101379, "logps/chosen": -0.883330225944519, "logps/rejected": -1.3421787023544312, "loss": 1.1086, "nll_loss": 1.0564879179000854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08833302557468414, "rewards/margins": 0.04588485509157181, "rewards/rejected": -0.13421787321567535, "step": 864 }, { "epoch": 0.5276803416196432, "grad_norm": 3.703291654586792, "learning_rate": 7.181873851806491e-06, "log_odds_chosen": 1.1531332731246948, "log_odds_ratio": -0.32336097955703735, "logits/chosen": -0.9474977254867554, "logits/rejected": -0.9567420482635498, "logps/chosen": -0.574155867099762, "logps/rejected": -1.2583632469177246, "loss": 1.1174, "nll_loss": 1.0156904458999634, "rewards/accuracies": 0.875, "rewards/chosen": -0.05741558596491814, "rewards/margins": 0.06842074543237686, "rewards/rejected": -0.1258363276720047, "step": 865 }, { "epoch": 0.52829037669666, "grad_norm": 1.099583625793457, "learning_rate": 7.180894060012247e-06, "log_odds_chosen": 0.3265363574028015, "log_odds_ratio": -0.6266335248947144, "logits/chosen": -0.8852916359901428, "logits/rejected": -0.7761736512184143, "logps/chosen": -0.9775974750518799, "logps/rejected": -1.1518293619155884, "loss": 1.173, "nll_loss": 1.048965334892273, "rewards/accuracies": 0.5, "rewards/chosen": -0.09775975346565247, "rewards/margins": 0.01742318458855152, "rewards/rejected": -0.11518293619155884, "step": 866 }, { "epoch": 0.528900411773677, "grad_norm": 3.693814516067505, "learning_rate": 7.1799142682180036e-06, "log_odds_chosen": 1.2811119556427002, "log_odds_ratio": -0.41289764642715454, "logits/chosen": -0.777267575263977, "logits/rejected": -0.7160118818283081, "logps/chosen": -0.714516282081604, "logps/rejected": -1.520158052444458, "loss": 1.0843, "nll_loss": 0.9344543814659119, "rewards/accuracies": 0.625, "rewards/chosen": -0.07145162671804428, "rewards/margins": 0.08056418597698212, "rewards/rejected": -0.1520158052444458, "step": 867 }, { "epoch": 0.5295104468506939, "grad_norm": 3.091472625732422, "learning_rate": 7.17893447642376e-06, "log_odds_chosen": 0.5336377024650574, "log_odds_ratio": -0.5331398248672485, "logits/chosen": -0.6120738387107849, "logits/rejected": -0.6724826097488403, "logps/chosen": -0.7384785413742065, "logps/rejected": -1.0109814405441284, "loss": 0.985, "nll_loss": 0.8082345724105835, "rewards/accuracies": 0.75, "rewards/chosen": -0.07384785264730453, "rewards/margins": 0.027250293642282486, "rewards/rejected": -0.10109814256429672, "step": 868 }, { "epoch": 0.5301204819277109, "grad_norm": 4.474090099334717, "learning_rate": 7.177954684629516e-06, "log_odds_chosen": 0.4202596843242645, "log_odds_ratio": -0.6423066854476929, "logits/chosen": -0.8732722997665405, "logits/rejected": -0.7094640731811523, "logps/chosen": -1.1409680843353271, "logps/rejected": -1.3508028984069824, "loss": 1.1506, "nll_loss": 1.1886825561523438, "rewards/accuracies": 0.625, "rewards/chosen": -0.11409682035446167, "rewards/margins": 0.02098347619175911, "rewards/rejected": -0.13508029282093048, "step": 869 }, { "epoch": 0.5307305170047277, "grad_norm": 2.473019599914551, "learning_rate": 7.176974892835273e-06, "log_odds_chosen": 0.6696407794952393, "log_odds_ratio": -0.5304728746414185, "logits/chosen": -1.0106981992721558, "logits/rejected": -0.847144365310669, "logps/chosen": -1.041335105895996, "logps/rejected": -1.532790184020996, "loss": 1.0782, "nll_loss": 1.1759392023086548, "rewards/accuracies": 0.75, "rewards/chosen": -0.10413351655006409, "rewards/margins": 0.049145519733428955, "rewards/rejected": -0.15327903628349304, "step": 870 }, { "epoch": 0.5313405520817447, "grad_norm": 2.6517088413238525, "learning_rate": 7.175995101041029e-06, "log_odds_chosen": -0.01000937819480896, "log_odds_ratio": -0.7718991041183472, "logits/chosen": -0.8893063068389893, "logits/rejected": -0.8522997498512268, "logps/chosen": -0.8522785902023315, "logps/rejected": -0.8145520687103271, "loss": 1.1983, "nll_loss": 1.041588544845581, "rewards/accuracies": 0.5, "rewards/chosen": -0.0852278545498848, "rewards/margins": -0.0037726517766714096, "rewards/rejected": -0.08145520836114883, "step": 871 }, { "epoch": 0.5319505871587616, "grad_norm": 1.5976439714431763, "learning_rate": 7.175015309246784e-06, "log_odds_chosen": 0.9666697382926941, "log_odds_ratio": -0.49559253454208374, "logits/chosen": -1.0175695419311523, "logits/rejected": -0.8330521583557129, "logps/chosen": -0.959284782409668, "logps/rejected": -1.6660877466201782, "loss": 1.2246, "nll_loss": 1.1345152854919434, "rewards/accuracies": 0.625, "rewards/chosen": -0.09592848271131516, "rewards/margins": 0.07068029791116714, "rewards/rejected": -0.1666087657213211, "step": 872 }, { "epoch": 0.5325606222357786, "grad_norm": 2.772810935974121, "learning_rate": 7.174035517452541e-06, "log_odds_chosen": 0.21501663327217102, "log_odds_ratio": -0.6294543743133545, "logits/chosen": -0.8255460262298584, "logits/rejected": -0.7954759001731873, "logps/chosen": -1.0803340673446655, "logps/rejected": -1.190026044845581, "loss": 1.2728, "nll_loss": 1.40541672706604, "rewards/accuracies": 0.5, "rewards/chosen": -0.10803340375423431, "rewards/margins": 0.010969198308885098, "rewards/rejected": -0.11900260299444199, "step": 873 }, { "epoch": 0.5331706573127954, "grad_norm": 5.798897743225098, "learning_rate": 7.173055725658297e-06, "log_odds_chosen": 0.916728138923645, "log_odds_ratio": -0.4490162134170532, "logits/chosen": -0.6482548713684082, "logits/rejected": -0.5942224264144897, "logps/chosen": -0.754462480545044, "logps/rejected": -1.2303106784820557, "loss": 1.1104, "nll_loss": 1.0511845350265503, "rewards/accuracies": 0.625, "rewards/chosen": -0.0754462480545044, "rewards/margins": 0.04758481681346893, "rewards/rejected": -0.12303106486797333, "step": 874 }, { "epoch": 0.5337806923898124, "grad_norm": 1.749697208404541, "learning_rate": 7.172075933864053e-06, "log_odds_chosen": 0.643991231918335, "log_odds_ratio": -0.533649206161499, "logits/chosen": -1.0385022163391113, "logits/rejected": -0.8214400410652161, "logps/chosen": -1.0485533475875854, "logps/rejected": -1.4667787551879883, "loss": 1.2442, "nll_loss": 1.2452245950698853, "rewards/accuracies": 0.75, "rewards/chosen": -0.10485534369945526, "rewards/margins": 0.04182255268096924, "rewards/rejected": -0.1466778963804245, "step": 875 }, { "epoch": 0.5343907274668294, "grad_norm": 2.5763802528381348, "learning_rate": 7.17109614206981e-06, "log_odds_chosen": 0.6700981259346008, "log_odds_ratio": -0.4748574495315552, "logits/chosen": -0.8706841468811035, "logits/rejected": -0.7540481090545654, "logps/chosen": -0.7836462259292603, "logps/rejected": -1.1865507364273071, "loss": 1.1968, "nll_loss": 1.07016122341156, "rewards/accuracies": 0.75, "rewards/chosen": -0.07836462557315826, "rewards/margins": 0.04029045253992081, "rewards/rejected": -0.11865507066249847, "step": 876 }, { "epoch": 0.5350007625438463, "grad_norm": 2.2156758308410645, "learning_rate": 7.170116350275566e-06, "log_odds_chosen": 0.955051064491272, "log_odds_ratio": -0.40714365243911743, "logits/chosen": -0.6408506631851196, "logits/rejected": -0.5820608139038086, "logps/chosen": -0.7095268368721008, "logps/rejected": -1.2900497913360596, "loss": 1.0926, "nll_loss": 0.9038619995117188, "rewards/accuracies": 0.75, "rewards/chosen": -0.07095268368721008, "rewards/margins": 0.05805228278040886, "rewards/rejected": -0.12900497019290924, "step": 877 }, { "epoch": 0.5356107976208632, "grad_norm": 1.7302496433258057, "learning_rate": 7.169136558481322e-06, "log_odds_chosen": 0.13326981663703918, "log_odds_ratio": -0.6999744772911072, "logits/chosen": -1.2156189680099487, "logits/rejected": -1.0155327320098877, "logps/chosen": -1.1122777462005615, "logps/rejected": -1.1704895496368408, "loss": 1.2759, "nll_loss": 1.3070669174194336, "rewards/accuracies": 0.625, "rewards/chosen": -0.11122778058052063, "rewards/margins": 0.005821180529892445, "rewards/rejected": -0.1170489490032196, "step": 878 }, { "epoch": 0.5362208326978801, "grad_norm": 2.2812211513519287, "learning_rate": 7.168156766687079e-06, "log_odds_chosen": 0.3713536858558655, "log_odds_ratio": -0.7014147043228149, "logits/chosen": -0.7533150911331177, "logits/rejected": -0.5862425565719604, "logps/chosen": -0.9427275657653809, "logps/rejected": -1.1965240240097046, "loss": 1.2554, "nll_loss": 1.0832817554473877, "rewards/accuracies": 0.5, "rewards/chosen": -0.09427275508642197, "rewards/margins": 0.02537965215742588, "rewards/rejected": -0.1196524128317833, "step": 879 }, { "epoch": 0.5368308677748971, "grad_norm": 7.2625932693481445, "learning_rate": 7.167176974892835e-06, "log_odds_chosen": 0.20096854865550995, "log_odds_ratio": -0.6919386386871338, "logits/chosen": -0.9022514820098877, "logits/rejected": -0.7193572521209717, "logps/chosen": -0.8600403666496277, "logps/rejected": -0.9843965172767639, "loss": 1.2278, "nll_loss": 1.089436650276184, "rewards/accuracies": 0.5, "rewards/chosen": -0.08600403368473053, "rewards/margins": 0.01243562065064907, "rewards/rejected": -0.09843964874744415, "step": 880 }, { "epoch": 0.537440902851914, "grad_norm": 3.897744655609131, "learning_rate": 7.166197183098592e-06, "log_odds_chosen": 1.2122992277145386, "log_odds_ratio": -0.32378101348876953, "logits/chosen": -0.8819787502288818, "logits/rejected": -0.7319047451019287, "logps/chosen": -0.7681353092193604, "logps/rejected": -1.5136253833770752, "loss": 1.2596, "nll_loss": 0.9426190853118896, "rewards/accuracies": 0.875, "rewards/chosen": -0.07681354135274887, "rewards/margins": 0.07454901933670044, "rewards/rejected": -0.1513625532388687, "step": 881 }, { "epoch": 0.5380509379289309, "grad_norm": 2.123628616333008, "learning_rate": 7.165217391304348e-06, "log_odds_chosen": 0.2509895861148834, "log_odds_ratio": -0.7313666343688965, "logits/chosen": -0.9929608106613159, "logits/rejected": -0.739595353603363, "logps/chosen": -1.1033079624176025, "logps/rejected": -1.2893873453140259, "loss": 1.2298, "nll_loss": 1.20002019405365, "rewards/accuracies": 0.375, "rewards/chosen": -0.11033079028129578, "rewards/margins": 0.01860795170068741, "rewards/rejected": -0.12893874943256378, "step": 882 }, { "epoch": 0.5386609730059478, "grad_norm": 1.8595762252807617, "learning_rate": 7.164237599510103e-06, "log_odds_chosen": 1.143424153327942, "log_odds_ratio": -0.4198281764984131, "logits/chosen": -0.8050434589385986, "logits/rejected": -0.7136026620864868, "logps/chosen": -0.7461243867874146, "logps/rejected": -1.4761301279067993, "loss": 1.0961, "nll_loss": 1.0319560766220093, "rewards/accuracies": 0.875, "rewards/chosen": -0.07461243867874146, "rewards/margins": 0.07300056517124176, "rewards/rejected": -0.14761300384998322, "step": 883 }, { "epoch": 0.5392710080829648, "grad_norm": 2.0171682834625244, "learning_rate": 7.16325780771586e-06, "log_odds_chosen": 1.1047464609146118, "log_odds_ratio": -0.451570987701416, "logits/chosen": -0.8944357633590698, "logits/rejected": -0.8554857969284058, "logps/chosen": -0.8452370166778564, "logps/rejected": -1.4279580116271973, "loss": 1.2991, "nll_loss": 1.4258285760879517, "rewards/accuracies": 0.75, "rewards/chosen": -0.08452370017766953, "rewards/margins": 0.0582721121609211, "rewards/rejected": -0.14279581606388092, "step": 884 }, { "epoch": 0.5398810431599816, "grad_norm": 1.9847087860107422, "learning_rate": 7.162278015921616e-06, "log_odds_chosen": 0.8948750495910645, "log_odds_ratio": -0.6013544201850891, "logits/chosen": -1.0241189002990723, "logits/rejected": -0.6607208251953125, "logps/chosen": -1.0732142925262451, "logps/rejected": -1.6727713346481323, "loss": 1.1329, "nll_loss": 1.4186292886734009, "rewards/accuracies": 0.625, "rewards/chosen": -0.10732143372297287, "rewards/margins": 0.059955697506666183, "rewards/rejected": -0.16727712750434875, "step": 885 }, { "epoch": 0.5404910782369986, "grad_norm": 4.30535364151001, "learning_rate": 7.1612982241273724e-06, "log_odds_chosen": 1.134758710861206, "log_odds_ratio": -0.4474080204963684, "logits/chosen": -0.8186051845550537, "logits/rejected": -0.7703598141670227, "logps/chosen": -0.658771812915802, "logps/rejected": -1.272019624710083, "loss": 1.2632, "nll_loss": 1.1988880634307861, "rewards/accuracies": 0.625, "rewards/chosen": -0.06587718427181244, "rewards/margins": 0.06132477521896362, "rewards/rejected": -0.12720195949077606, "step": 886 }, { "epoch": 0.5411011133140156, "grad_norm": 1.1528863906860352, "learning_rate": 7.160318432333129e-06, "log_odds_chosen": 0.6408653259277344, "log_odds_ratio": -0.6328638792037964, "logits/chosen": -0.9718106389045715, "logits/rejected": -0.8509880304336548, "logps/chosen": -0.940283477306366, "logps/rejected": -1.2828739881515503, "loss": 1.0461, "nll_loss": 1.0656596422195435, "rewards/accuracies": 0.625, "rewards/chosen": -0.09402835369110107, "rewards/margins": 0.034259047359228134, "rewards/rejected": -0.1282874047756195, "step": 887 }, { "epoch": 0.5417111483910325, "grad_norm": 3.4982590675354004, "learning_rate": 7.1593386405388855e-06, "log_odds_chosen": 0.8341356515884399, "log_odds_ratio": -0.47494539618492126, "logits/chosen": -0.9591413736343384, "logits/rejected": -0.8262180685997009, "logps/chosen": -0.715042769908905, "logps/rejected": -1.1941933631896973, "loss": 1.0321, "nll_loss": 0.888143002986908, "rewards/accuracies": 0.75, "rewards/chosen": -0.07150427997112274, "rewards/margins": 0.04791506379842758, "rewards/rejected": -0.11941934376955032, "step": 888 }, { "epoch": 0.5423211834680494, "grad_norm": 2.5691678524017334, "learning_rate": 7.158358848744641e-06, "log_odds_chosen": 0.4089292883872986, "log_odds_ratio": -0.6273064017295837, "logits/chosen": -0.797531008720398, "logits/rejected": -0.6932498812675476, "logps/chosen": -0.9121578931808472, "logps/rejected": -1.1337218284606934, "loss": 1.0226, "nll_loss": 0.9018889665603638, "rewards/accuracies": 0.375, "rewards/chosen": -0.09121579676866531, "rewards/margins": 0.02215639129281044, "rewards/rejected": -0.11337218433618546, "step": 889 }, { "epoch": 0.5429312185450663, "grad_norm": 1.0361906290054321, "learning_rate": 7.157379056950398e-06, "log_odds_chosen": -0.004739284515380859, "log_odds_ratio": -0.7400366067886353, "logits/chosen": -0.9572893977165222, "logits/rejected": -0.9234524369239807, "logps/chosen": -1.0084974765777588, "logps/rejected": -1.008046269416809, "loss": 1.2152, "nll_loss": 1.1699172258377075, "rewards/accuracies": 0.5, "rewards/chosen": -0.10084976255893707, "rewards/margins": -4.5132823288440704e-05, "rewards/rejected": -0.10080461949110031, "step": 890 }, { "epoch": 0.5435412536220833, "grad_norm": 1.4798678159713745, "learning_rate": 7.156399265156154e-06, "log_odds_chosen": 0.4875624477863312, "log_odds_ratio": -0.5959969758987427, "logits/chosen": -1.0216425657272339, "logits/rejected": -0.7033925652503967, "logps/chosen": -0.8110039234161377, "logps/rejected": -1.1273828744888306, "loss": 1.1462, "nll_loss": 0.9990218877792358, "rewards/accuracies": 0.625, "rewards/chosen": -0.08110038936138153, "rewards/margins": 0.031637899577617645, "rewards/rejected": -0.11273828893899918, "step": 891 }, { "epoch": 0.5441512886991002, "grad_norm": 2.2621328830718994, "learning_rate": 7.15541947336191e-06, "log_odds_chosen": 1.0782549381256104, "log_odds_ratio": -0.5582076907157898, "logits/chosen": -0.8984646797180176, "logits/rejected": -0.8340439200401306, "logps/chosen": -0.6967090964317322, "logps/rejected": -1.2348368167877197, "loss": 1.1465, "nll_loss": 1.0560221672058105, "rewards/accuracies": 0.625, "rewards/chosen": -0.0696709156036377, "rewards/margins": 0.053812772035598755, "rewards/rejected": -0.12348368763923645, "step": 892 }, { "epoch": 0.5447613237761171, "grad_norm": 1.560115098953247, "learning_rate": 7.154439681567667e-06, "log_odds_chosen": 0.9120495319366455, "log_odds_ratio": -0.44853758811950684, "logits/chosen": -0.9017214179039001, "logits/rejected": -0.7052168250083923, "logps/chosen": -0.9265440702438354, "logps/rejected": -1.511617660522461, "loss": 1.098, "nll_loss": 1.0961554050445557, "rewards/accuracies": 0.875, "rewards/chosen": -0.09265441447496414, "rewards/margins": 0.05850735679268837, "rewards/rejected": -0.15116176009178162, "step": 893 }, { "epoch": 0.545371358853134, "grad_norm": 8.007875442504883, "learning_rate": 7.153459889773422e-06, "log_odds_chosen": 1.1389868259429932, "log_odds_ratio": -0.42728114128112793, "logits/chosen": -0.6028436422348022, "logits/rejected": -0.7079068422317505, "logps/chosen": -0.6779868602752686, "logps/rejected": -1.3643535375595093, "loss": 1.2091, "nll_loss": 0.9047582745552063, "rewards/accuracies": 0.75, "rewards/chosen": -0.0677986815571785, "rewards/margins": 0.06863666325807571, "rewards/rejected": -0.1364353597164154, "step": 894 }, { "epoch": 0.545981393930151, "grad_norm": 1.2708643674850464, "learning_rate": 7.152480097979179e-06, "log_odds_chosen": 0.5506248474121094, "log_odds_ratio": -0.5234013795852661, "logits/chosen": -0.7678009271621704, "logits/rejected": -0.8142585754394531, "logps/chosen": -0.8167765140533447, "logps/rejected": -1.0699009895324707, "loss": 1.0863, "nll_loss": 1.307770848274231, "rewards/accuracies": 0.75, "rewards/chosen": -0.08167764544487, "rewards/margins": 0.02531244605779648, "rewards/rejected": -0.10699009895324707, "step": 895 }, { "epoch": 0.546591429007168, "grad_norm": 1.343887448310852, "learning_rate": 7.151500306184935e-06, "log_odds_chosen": 0.6940044164657593, "log_odds_ratio": -0.5765656232833862, "logits/chosen": -1.171994924545288, "logits/rejected": -0.7131718397140503, "logps/chosen": -1.1135555505752563, "logps/rejected": -1.5047810077667236, "loss": 1.3231, "nll_loss": 1.421643614768982, "rewards/accuracies": 0.625, "rewards/chosen": -0.11135556548833847, "rewards/margins": 0.03912254050374031, "rewards/rejected": -0.15047809481620789, "step": 896 }, { "epoch": 0.5472014640841848, "grad_norm": 2.1658084392547607, "learning_rate": 7.1505205143906915e-06, "log_odds_chosen": 0.2820320129394531, "log_odds_ratio": -0.6837034225463867, "logits/chosen": -0.8911289572715759, "logits/rejected": -0.7544116973876953, "logps/chosen": -1.0211982727050781, "logps/rejected": -1.227541208267212, "loss": 1.3656, "nll_loss": 1.2385640144348145, "rewards/accuracies": 0.625, "rewards/chosen": -0.10211983323097229, "rewards/margins": 0.02063428983092308, "rewards/rejected": -0.12275411933660507, "step": 897 }, { "epoch": 0.5478114991612018, "grad_norm": 1.1314854621887207, "learning_rate": 7.1495407225964484e-06, "log_odds_chosen": 0.6641468405723572, "log_odds_ratio": -0.5408658385276794, "logits/chosen": -1.142959475517273, "logits/rejected": -0.9048566222190857, "logps/chosen": -0.9529459476470947, "logps/rejected": -1.383738398551941, "loss": 1.1162, "nll_loss": 1.3681024312973022, "rewards/accuracies": 0.625, "rewards/chosen": -0.09529459476470947, "rewards/margins": 0.04307924583554268, "rewards/rejected": -0.13837383687496185, "step": 898 }, { "epoch": 0.5484215342382187, "grad_norm": 1.2488975524902344, "learning_rate": 7.1485609308022046e-06, "log_odds_chosen": 0.9626524448394775, "log_odds_ratio": -0.44835466146469116, "logits/chosen": -0.8718408346176147, "logits/rejected": -0.7356978058815002, "logps/chosen": -0.8184477686882019, "logps/rejected": -1.467821478843689, "loss": 1.2866, "nll_loss": 1.4516923427581787, "rewards/accuracies": 0.75, "rewards/chosen": -0.08184478431940079, "rewards/margins": 0.06493737548589706, "rewards/rejected": -0.14678215980529785, "step": 899 }, { "epoch": 0.5490315693152357, "grad_norm": 3.0662708282470703, "learning_rate": 7.14758113900796e-06, "log_odds_chosen": 1.0898796319961548, "log_odds_ratio": -0.3846227824687958, "logits/chosen": -0.6306861042976379, "logits/rejected": -0.7702313661575317, "logps/chosen": -0.6382372975349426, "logps/rejected": -1.2640955448150635, "loss": 1.079, "nll_loss": 0.8344671726226807, "rewards/accuracies": 0.875, "rewards/chosen": -0.06382372975349426, "rewards/margins": 0.06258582323789597, "rewards/rejected": -0.12640956044197083, "step": 900 }, { "epoch": 0.5496416043922525, "grad_norm": 3.833442211151123, "learning_rate": 7.146601347213717e-06, "log_odds_chosen": 0.5151853561401367, "log_odds_ratio": -0.5592249631881714, "logits/chosen": -0.7312294244766235, "logits/rejected": -0.8309456706047058, "logps/chosen": -0.7273037433624268, "logps/rejected": -1.0223077535629272, "loss": 1.1355, "nll_loss": 0.8700791001319885, "rewards/accuracies": 0.625, "rewards/chosen": -0.07273037731647491, "rewards/margins": 0.029500406235456467, "rewards/rejected": -0.10223078727722168, "step": 901 }, { "epoch": 0.5502516394692695, "grad_norm": 1.6872997283935547, "learning_rate": 7.145621555419473e-06, "log_odds_chosen": 0.8616856932640076, "log_odds_ratio": -0.44608595967292786, "logits/chosen": -0.8231427669525146, "logits/rejected": -0.7620296478271484, "logps/chosen": -0.8908621668815613, "logps/rejected": -1.4001719951629639, "loss": 1.0849, "nll_loss": 1.0405458211898804, "rewards/accuracies": 0.75, "rewards/chosen": -0.08908621966838837, "rewards/margins": 0.05093098804354668, "rewards/rejected": -0.14001719653606415, "step": 902 }, { "epoch": 0.5508616745462864, "grad_norm": 1.4006026983261108, "learning_rate": 7.144641763625229e-06, "log_odds_chosen": 1.1255486011505127, "log_odds_ratio": -0.36260682344436646, "logits/chosen": -0.7872917056083679, "logits/rejected": -0.7855099439620972, "logps/chosen": -0.5272296667098999, "logps/rejected": -1.07631254196167, "loss": 1.037, "nll_loss": 1.0413172245025635, "rewards/accuracies": 0.75, "rewards/chosen": -0.05272296443581581, "rewards/margins": 0.05490829050540924, "rewards/rejected": -0.10763125121593475, "step": 903 }, { "epoch": 0.5514717096233034, "grad_norm": 1.3522794246673584, "learning_rate": 7.143661971830986e-06, "log_odds_chosen": -0.14680129289627075, "log_odds_ratio": -0.7977126240730286, "logits/chosen": -1.0507011413574219, "logits/rejected": -0.8453696966171265, "logps/chosen": -1.1178804636001587, "logps/rejected": -1.0191000699996948, "loss": 1.1959, "nll_loss": 1.2396256923675537, "rewards/accuracies": 0.375, "rewards/chosen": -0.11178804934024811, "rewards/margins": -0.009878043085336685, "rewards/rejected": -0.10191000998020172, "step": 904 }, { "epoch": 0.5520817447003202, "grad_norm": 1.9197522401809692, "learning_rate": 7.142682180036742e-06, "log_odds_chosen": 0.3083922863006592, "log_odds_ratio": -0.586685061454773, "logits/chosen": -0.9993684887886047, "logits/rejected": -0.9334879517555237, "logps/chosen": -0.9818407893180847, "logps/rejected": -1.2178447246551514, "loss": 1.0883, "nll_loss": 1.0826289653778076, "rewards/accuracies": 0.625, "rewards/chosen": -0.09818408638238907, "rewards/margins": 0.023600395768880844, "rewards/rejected": -0.12178447842597961, "step": 905 }, { "epoch": 0.5526917797773372, "grad_norm": 3.2766008377075195, "learning_rate": 7.141702388242497e-06, "log_odds_chosen": 0.4682387411594391, "log_odds_ratio": -0.6396242380142212, "logits/chosen": -0.9046856760978699, "logits/rejected": -0.9349477887153625, "logps/chosen": -0.8469136953353882, "logps/rejected": -1.0995392799377441, "loss": 1.1299, "nll_loss": 1.0427576303482056, "rewards/accuracies": 0.5, "rewards/chosen": -0.0846913680434227, "rewards/margins": 0.02526257187128067, "rewards/rejected": -0.10995393991470337, "step": 906 }, { "epoch": 0.5533018148543541, "grad_norm": 2.1614573001861572, "learning_rate": 7.140722596448254e-06, "log_odds_chosen": 0.5330687761306763, "log_odds_ratio": -0.6151196956634521, "logits/chosen": -0.8024454116821289, "logits/rejected": -0.7755886316299438, "logps/chosen": -0.7141422033309937, "logps/rejected": -1.0201663970947266, "loss": 1.0817, "nll_loss": 1.0240027904510498, "rewards/accuracies": 0.5, "rewards/chosen": -0.07141421735286713, "rewards/margins": 0.030602429062128067, "rewards/rejected": -0.10201665759086609, "step": 907 }, { "epoch": 0.5539118499313711, "grad_norm": 1.4132472276687622, "learning_rate": 7.1397428046540105e-06, "log_odds_chosen": 0.3314521908760071, "log_odds_ratio": -0.6078011393547058, "logits/chosen": -1.0164726972579956, "logits/rejected": -0.8505405783653259, "logps/chosen": -1.0277037620544434, "logps/rejected": -1.2266026735305786, "loss": 1.1513, "nll_loss": 1.1125431060791016, "rewards/accuracies": 0.625, "rewards/chosen": -0.1027703732252121, "rewards/margins": 0.019889894872903824, "rewards/rejected": -0.12266027182340622, "step": 908 }, { "epoch": 0.554521885008388, "grad_norm": 1.8974770307540894, "learning_rate": 7.1387630128597675e-06, "log_odds_chosen": 0.47302988171577454, "log_odds_ratio": -0.6287066340446472, "logits/chosen": -0.9556882381439209, "logits/rejected": -0.843576192855835, "logps/chosen": -0.7688852548599243, "logps/rejected": -1.0241475105285645, "loss": 1.0312, "nll_loss": 1.1534770727157593, "rewards/accuracies": 0.625, "rewards/chosen": -0.07688852399587631, "rewards/margins": 0.02552623301744461, "rewards/rejected": -0.10241475701332092, "step": 909 }, { "epoch": 0.5551319200854049, "grad_norm": 2.750967264175415, "learning_rate": 7.137783221065524e-06, "log_odds_chosen": 0.6352087259292603, "log_odds_ratio": -0.49001890420913696, "logits/chosen": -1.0399141311645508, "logits/rejected": -0.7875748872756958, "logps/chosen": -0.9216766357421875, "logps/rejected": -1.3541016578674316, "loss": 1.3769, "nll_loss": 1.3520041704177856, "rewards/accuracies": 0.75, "rewards/chosen": -0.0921676754951477, "rewards/margins": 0.04324251413345337, "rewards/rejected": -0.13541018962860107, "step": 910 }, { "epoch": 0.5557419551624219, "grad_norm": 1.5926092863082886, "learning_rate": 7.136803429271279e-06, "log_odds_chosen": 0.5242757201194763, "log_odds_ratio": -0.49061059951782227, "logits/chosen": -0.7607582807540894, "logits/rejected": -0.8104139566421509, "logps/chosen": -0.8270785808563232, "logps/rejected": -1.1533308029174805, "loss": 1.2725, "nll_loss": 1.0659418106079102, "rewards/accuracies": 0.75, "rewards/chosen": -0.08270785212516785, "rewards/margins": 0.032625243067741394, "rewards/rejected": -0.11533309519290924, "step": 911 }, { "epoch": 0.5563519902394388, "grad_norm": 1.7996829748153687, "learning_rate": 7.135823637477036e-06, "log_odds_chosen": 1.0406954288482666, "log_odds_ratio": -0.41530367732048035, "logits/chosen": -0.7687373757362366, "logits/rejected": -0.40797290205955505, "logps/chosen": -0.9109280705451965, "logps/rejected": -1.6261742115020752, "loss": 1.1557, "nll_loss": 1.1032556295394897, "rewards/accuracies": 0.875, "rewards/chosen": -0.09109281003475189, "rewards/margins": 0.07152461260557175, "rewards/rejected": -0.16261743009090424, "step": 912 }, { "epoch": 0.5569620253164557, "grad_norm": 1.6825602054595947, "learning_rate": 7.134843845682792e-06, "log_odds_chosen": 0.4802359640598297, "log_odds_ratio": -0.5644181966781616, "logits/chosen": -1.0804356336593628, "logits/rejected": -0.915387749671936, "logps/chosen": -0.8637098073959351, "logps/rejected": -1.1240519285202026, "loss": 1.1144, "nll_loss": 1.0130120515823364, "rewards/accuracies": 0.75, "rewards/chosen": -0.08637097477912903, "rewards/margins": 0.026034215465188026, "rewards/rejected": -0.1124051883816719, "step": 913 }, { "epoch": 0.5575720603934726, "grad_norm": 6.1397929191589355, "learning_rate": 7.133864053888548e-06, "log_odds_chosen": 0.43236619234085083, "log_odds_ratio": -0.6363650560379028, "logits/chosen": -0.9609804153442383, "logits/rejected": -0.7710122466087341, "logps/chosen": -0.822583019733429, "logps/rejected": -1.1436923742294312, "loss": 1.2058, "nll_loss": 1.1722888946533203, "rewards/accuracies": 0.5, "rewards/chosen": -0.08225830644369125, "rewards/margins": 0.03211092948913574, "rewards/rejected": -0.114369235932827, "step": 914 }, { "epoch": 0.5581820954704896, "grad_norm": 1.7852667570114136, "learning_rate": 7.132884262094305e-06, "log_odds_chosen": 0.6049008369445801, "log_odds_ratio": -0.5631730556488037, "logits/chosen": -0.9220798015594482, "logits/rejected": -0.8021814227104187, "logps/chosen": -0.9791899919509888, "logps/rejected": -1.3844730854034424, "loss": 1.0688, "nll_loss": 1.2159264087677002, "rewards/accuracies": 0.75, "rewards/chosen": -0.09791900217533112, "rewards/margins": 0.0405283123254776, "rewards/rejected": -0.13844731450080872, "step": 915 }, { "epoch": 0.5587921305475064, "grad_norm": 2.4058210849761963, "learning_rate": 7.131904470300061e-06, "log_odds_chosen": 0.2910497188568115, "log_odds_ratio": -0.6781479120254517, "logits/chosen": -1.0932825803756714, "logits/rejected": -0.8737126588821411, "logps/chosen": -1.1111069917678833, "logps/rejected": -1.295717716217041, "loss": 1.248, "nll_loss": 1.2507421970367432, "rewards/accuracies": 0.625, "rewards/chosen": -0.11111070215702057, "rewards/margins": 0.0184610765427351, "rewards/rejected": -0.12957178056240082, "step": 916 }, { "epoch": 0.5594021656245234, "grad_norm": 2.6260569095611572, "learning_rate": 7.1309246785058165e-06, "log_odds_chosen": 0.7075833678245544, "log_odds_ratio": -0.47210079431533813, "logits/chosen": -0.8261971473693848, "logits/rejected": -0.5417298078536987, "logps/chosen": -0.9877404570579529, "logps/rejected": -1.4303722381591797, "loss": 1.2233, "nll_loss": 1.1957788467407227, "rewards/accuracies": 0.75, "rewards/chosen": -0.09877404570579529, "rewards/margins": 0.04426318034529686, "rewards/rejected": -0.14303722977638245, "step": 917 }, { "epoch": 0.5600122007015403, "grad_norm": 2.0245614051818848, "learning_rate": 7.1299448867115734e-06, "log_odds_chosen": 0.6329659819602966, "log_odds_ratio": -0.489626407623291, "logits/chosen": -0.9532882571220398, "logits/rejected": -0.8026612997055054, "logps/chosen": -1.0514072179794312, "logps/rejected": -1.532637119293213, "loss": 1.391, "nll_loss": 1.3765658140182495, "rewards/accuracies": 0.75, "rewards/chosen": -0.10514072328805923, "rewards/margins": 0.04812299460172653, "rewards/rejected": -0.15326371788978577, "step": 918 }, { "epoch": 0.5606222357785573, "grad_norm": 1.8979049921035767, "learning_rate": 7.1289650949173295e-06, "log_odds_chosen": 0.4688302278518677, "log_odds_ratio": -0.5653039216995239, "logits/chosen": -0.8288286924362183, "logits/rejected": -0.7495752573013306, "logps/chosen": -0.7474000453948975, "logps/rejected": -1.0042954683303833, "loss": 0.8944, "nll_loss": 0.8988038897514343, "rewards/accuracies": 0.625, "rewards/chosen": -0.07474000006914139, "rewards/margins": 0.02568954974412918, "rewards/rejected": -0.10042954981327057, "step": 919 }, { "epoch": 0.5612322708555741, "grad_norm": 3.822500467300415, "learning_rate": 7.1279853031230865e-06, "log_odds_chosen": 1.333853840827942, "log_odds_ratio": -0.3723542094230652, "logits/chosen": -0.7987625598907471, "logits/rejected": -0.6565622687339783, "logps/chosen": -0.6938480138778687, "logps/rejected": -1.5722960233688354, "loss": 1.0866, "nll_loss": 0.9733442664146423, "rewards/accuracies": 0.875, "rewards/chosen": -0.06938480585813522, "rewards/margins": 0.08784479647874832, "rewards/rejected": -0.15722960233688354, "step": 920 }, { "epoch": 0.5618423059325911, "grad_norm": 1.7724525928497314, "learning_rate": 7.127005511328843e-06, "log_odds_chosen": 0.5564287304878235, "log_odds_ratio": -0.5796642303466797, "logits/chosen": -0.9323391318321228, "logits/rejected": -0.845345139503479, "logps/chosen": -0.8475968837738037, "logps/rejected": -1.1871248483657837, "loss": 1.0076, "nll_loss": 1.103020429611206, "rewards/accuracies": 0.5, "rewards/chosen": -0.0847596824169159, "rewards/margins": 0.033952806144952774, "rewards/rejected": -0.11871248483657837, "step": 921 }, { "epoch": 0.5624523410096081, "grad_norm": 1.9870156049728394, "learning_rate": 7.126025719534599e-06, "log_odds_chosen": 1.4734394550323486, "log_odds_ratio": -0.3178025484085083, "logits/chosen": -0.8538834452629089, "logits/rejected": -0.856508731842041, "logps/chosen": -0.6536615490913391, "logps/rejected": -1.5206258296966553, "loss": 1.1159, "nll_loss": 1.036739706993103, "rewards/accuracies": 0.875, "rewards/chosen": -0.06536615639925003, "rewards/margins": 0.08669643849134445, "rewards/rejected": -0.15206259489059448, "step": 922 }, { "epoch": 0.563062376086625, "grad_norm": 2.194459915161133, "learning_rate": 7.125045927740355e-06, "log_odds_chosen": 0.6975635290145874, "log_odds_ratio": -0.5214764475822449, "logits/chosen": -0.9493279457092285, "logits/rejected": -0.7923272848129272, "logps/chosen": -0.8733206987380981, "logps/rejected": -1.3506728410720825, "loss": 1.2638, "nll_loss": 1.0905150175094604, "rewards/accuracies": 0.5, "rewards/chosen": -0.08733206987380981, "rewards/margins": 0.04773520678281784, "rewards/rejected": -0.13506728410720825, "step": 923 }, { "epoch": 0.5636724111636419, "grad_norm": 1.6905754804611206, "learning_rate": 7.124066135946111e-06, "log_odds_chosen": 0.8522739410400391, "log_odds_ratio": -0.6359738111495972, "logits/chosen": -0.8023410439491272, "logits/rejected": -0.8091217875480652, "logps/chosen": -0.8499040603637695, "logps/rejected": -1.3923814296722412, "loss": 1.0263, "nll_loss": 1.0019077062606812, "rewards/accuracies": 0.625, "rewards/chosen": -0.08499040454626083, "rewards/margins": 0.054247744381427765, "rewards/rejected": -0.1392381489276886, "step": 924 }, { "epoch": 0.5642824462406588, "grad_norm": 1.403518795967102, "learning_rate": 7.123086344151867e-06, "log_odds_chosen": 0.2800932824611664, "log_odds_ratio": -0.6208046674728394, "logits/chosen": -1.0706011056900024, "logits/rejected": -1.0121967792510986, "logps/chosen": -0.971148669719696, "logps/rejected": -1.1402575969696045, "loss": 1.1087, "nll_loss": 1.1374284029006958, "rewards/accuracies": 0.75, "rewards/chosen": -0.09711487591266632, "rewards/margins": 0.016910893842577934, "rewards/rejected": -0.1140257716178894, "step": 925 }, { "epoch": 0.5648924813176758, "grad_norm": 8.194660186767578, "learning_rate": 7.122106552357624e-06, "log_odds_chosen": 1.4323495626449585, "log_odds_ratio": -0.346974641084671, "logits/chosen": -0.8167255520820618, "logits/rejected": -0.8005271553993225, "logps/chosen": -0.7094571590423584, "logps/rejected": -1.4508635997772217, "loss": 1.0657, "nll_loss": 1.0124585628509521, "rewards/accuracies": 0.75, "rewards/chosen": -0.07094572484493256, "rewards/margins": 0.07414063811302185, "rewards/rejected": -0.1450863480567932, "step": 926 }, { "epoch": 0.5655025163946927, "grad_norm": 1.1361620426177979, "learning_rate": 7.12112676056338e-06, "log_odds_chosen": 0.06117671728134155, "log_odds_ratio": -0.7402957677841187, "logits/chosen": -0.8514070510864258, "logits/rejected": -0.7765473127365112, "logps/chosen": -1.0304282903671265, "logps/rejected": -1.015918493270874, "loss": 1.1957, "nll_loss": 1.1687558889389038, "rewards/accuracies": 0.5, "rewards/chosen": -0.103042833507061, "rewards/margins": -0.0014509842731058598, "rewards/rejected": -0.10159184783697128, "step": 927 }, { "epoch": 0.5661125514717096, "grad_norm": 1.4355047941207886, "learning_rate": 7.120146968769136e-06, "log_odds_chosen": 0.43651407957077026, "log_odds_ratio": -0.5963600873947144, "logits/chosen": -0.7980813384056091, "logits/rejected": -0.817357063293457, "logps/chosen": -0.8287127614021301, "logps/rejected": -1.0559253692626953, "loss": 1.1669, "nll_loss": 1.0544755458831787, "rewards/accuracies": 0.625, "rewards/chosen": -0.08287128061056137, "rewards/margins": 0.022721262648701668, "rewards/rejected": -0.10559254139661789, "step": 928 }, { "epoch": 0.5667225865487265, "grad_norm": 1.6244211196899414, "learning_rate": 7.1191671769748925e-06, "log_odds_chosen": 0.5193271636962891, "log_odds_ratio": -0.6067477464675903, "logits/chosen": -1.039308786392212, "logits/rejected": -0.8785806894302368, "logps/chosen": -0.9424774050712585, "logps/rejected": -1.1432878971099854, "loss": 1.21, "nll_loss": 1.2299842834472656, "rewards/accuracies": 0.5, "rewards/chosen": -0.0942477434873581, "rewards/margins": 0.020081039518117905, "rewards/rejected": -0.1143287867307663, "step": 929 }, { "epoch": 0.5673326216257435, "grad_norm": 3.620091438293457, "learning_rate": 7.118187385180649e-06, "log_odds_chosen": 0.09348546713590622, "log_odds_ratio": -0.7327518463134766, "logits/chosen": -0.9551589488983154, "logits/rejected": -0.8044094443321228, "logps/chosen": -0.8817992210388184, "logps/rejected": -0.8943016529083252, "loss": 1.2546, "nll_loss": 1.040738821029663, "rewards/accuracies": 0.5, "rewards/chosen": -0.08817993104457855, "rewards/margins": 0.0012502414174377918, "rewards/rejected": -0.08943016827106476, "step": 930 }, { "epoch": 0.5679426567027605, "grad_norm": 2.685556411743164, "learning_rate": 7.117207593386405e-06, "log_odds_chosen": 0.6862539649009705, "log_odds_ratio": -0.574262261390686, "logits/chosen": -0.9445555210113525, "logits/rejected": -0.8223779797554016, "logps/chosen": -1.0074994564056396, "logps/rejected": -1.4356346130371094, "loss": 1.2778, "nll_loss": 1.1339943408966064, "rewards/accuracies": 0.625, "rewards/chosen": -0.10074995458126068, "rewards/margins": 0.042813509702682495, "rewards/rejected": -0.14356344938278198, "step": 931 }, { "epoch": 0.5685526917797773, "grad_norm": 2.3995537757873535, "learning_rate": 7.116227801592162e-06, "log_odds_chosen": 0.8419265747070312, "log_odds_ratio": -0.4382699131965637, "logits/chosen": -0.8612481355667114, "logits/rejected": -0.8352763652801514, "logps/chosen": -0.7302722334861755, "logps/rejected": -1.27901029586792, "loss": 1.26, "nll_loss": 0.9680002927780151, "rewards/accuracies": 0.875, "rewards/chosen": -0.07302722334861755, "rewards/margins": 0.05487380176782608, "rewards/rejected": -0.12790103256702423, "step": 932 }, { "epoch": 0.5691627268567943, "grad_norm": 1.3169810771942139, "learning_rate": 7.115248009797918e-06, "log_odds_chosen": 0.4609934091567993, "log_odds_ratio": -0.5832462906837463, "logits/chosen": -1.017622709274292, "logits/rejected": -0.8695303797721863, "logps/chosen": -0.956201434135437, "logps/rejected": -1.2234376668930054, "loss": 1.0778, "nll_loss": 1.1286066770553589, "rewards/accuracies": 0.625, "rewards/chosen": -0.09562014043331146, "rewards/margins": 0.02672361582517624, "rewards/rejected": -0.1223437711596489, "step": 933 }, { "epoch": 0.5697727619338112, "grad_norm": 1.6673126220703125, "learning_rate": 7.114268218003673e-06, "log_odds_chosen": 0.45929914712905884, "log_odds_ratio": -0.5741214752197266, "logits/chosen": -1.0966379642486572, "logits/rejected": -0.8203960061073303, "logps/chosen": -0.9177057147026062, "logps/rejected": -1.213132619857788, "loss": 1.0619, "nll_loss": 1.0233570337295532, "rewards/accuracies": 0.75, "rewards/chosen": -0.09177055954933167, "rewards/margins": 0.02954268828034401, "rewards/rejected": -0.12131325900554657, "step": 934 }, { "epoch": 0.5703827970108282, "grad_norm": 3.007871389389038, "learning_rate": 7.11328842620943e-06, "log_odds_chosen": 0.9583652019500732, "log_odds_ratio": -0.39373815059661865, "logits/chosen": -0.6816177368164062, "logits/rejected": -0.5181388854980469, "logps/chosen": -0.6215054988861084, "logps/rejected": -1.1431429386138916, "loss": 1.0445, "nll_loss": 1.0400997400283813, "rewards/accuracies": 0.75, "rewards/chosen": -0.06215055286884308, "rewards/margins": 0.0521637424826622, "rewards/rejected": -0.11431428790092468, "step": 935 }, { "epoch": 0.570992832087845, "grad_norm": 1.8409346342086792, "learning_rate": 7.112308634415186e-06, "log_odds_chosen": 0.94684898853302, "log_odds_ratio": -0.48988133668899536, "logits/chosen": -0.949312150478363, "logits/rejected": -0.770507276058197, "logps/chosen": -0.8195571899414062, "logps/rejected": -1.4561479091644287, "loss": 1.1304, "nll_loss": 0.9289451837539673, "rewards/accuracies": 0.625, "rewards/chosen": -0.08195571601390839, "rewards/margins": 0.06365906447172165, "rewards/rejected": -0.14561477303504944, "step": 936 }, { "epoch": 0.571602867164862, "grad_norm": 1.59007728099823, "learning_rate": 7.111328842620943e-06, "log_odds_chosen": 0.5965824127197266, "log_odds_ratio": -0.6641780138015747, "logits/chosen": -0.9312564730644226, "logits/rejected": -0.7493706941604614, "logps/chosen": -1.1009118556976318, "logps/rejected": -1.4362854957580566, "loss": 1.1426, "nll_loss": 1.291152834892273, "rewards/accuracies": 0.625, "rewards/chosen": -0.1100911796092987, "rewards/margins": 0.033537358045578, "rewards/rejected": -0.1436285525560379, "step": 937 }, { "epoch": 0.5722129022418789, "grad_norm": 2.6861469745635986, "learning_rate": 7.110349050826699e-06, "log_odds_chosen": 0.15970954298973083, "log_odds_ratio": -0.7825560569763184, "logits/chosen": -0.9431471824645996, "logits/rejected": -0.7917607426643372, "logps/chosen": -0.8590950965881348, "logps/rejected": -0.9680588245391846, "loss": 1.1299, "nll_loss": 1.0969300270080566, "rewards/accuracies": 0.375, "rewards/chosen": -0.08590951561927795, "rewards/margins": 0.010896377265453339, "rewards/rejected": -0.0968058854341507, "step": 938 }, { "epoch": 0.5728229373188959, "grad_norm": 1.3280621767044067, "learning_rate": 7.109369259032455e-06, "log_odds_chosen": 0.5518777966499329, "log_odds_ratio": -0.5889929533004761, "logits/chosen": -0.8814988136291504, "logits/rejected": -0.84413743019104, "logps/chosen": -0.854935884475708, "logps/rejected": -1.2287907600402832, "loss": 1.1387, "nll_loss": 1.0790374279022217, "rewards/accuracies": 0.5, "rewards/chosen": -0.08549359440803528, "rewards/margins": 0.037385497242212296, "rewards/rejected": -0.12287908792495728, "step": 939 }, { "epoch": 0.5734329723959127, "grad_norm": 1.2240716218948364, "learning_rate": 7.1083894672382115e-06, "log_odds_chosen": 1.0961887836456299, "log_odds_ratio": -0.4499264359474182, "logits/chosen": -0.7974746823310852, "logits/rejected": -0.6658695936203003, "logps/chosen": -0.8209682703018188, "logps/rejected": -1.5806726217269897, "loss": 1.238, "nll_loss": 0.8968201875686646, "rewards/accuracies": 0.625, "rewards/chosen": -0.08209683001041412, "rewards/margins": 0.07597044855356216, "rewards/rejected": -0.1580672711133957, "step": 940 }, { "epoch": 0.5740430074729297, "grad_norm": 1.4501038789749146, "learning_rate": 7.107409675443968e-06, "log_odds_chosen": 0.7776820659637451, "log_odds_ratio": -0.47885042428970337, "logits/chosen": -0.9452258348464966, "logits/rejected": -0.8114297389984131, "logps/chosen": -0.9657070636749268, "logps/rejected": -1.4365330934524536, "loss": 1.0212, "nll_loss": 1.2441409826278687, "rewards/accuracies": 0.875, "rewards/chosen": -0.0965707078576088, "rewards/margins": 0.04708259925246239, "rewards/rejected": -0.14365330338478088, "step": 941 }, { "epoch": 0.5746530425499466, "grad_norm": 1.0408754348754883, "learning_rate": 7.106429883649724e-06, "log_odds_chosen": 0.882443368434906, "log_odds_ratio": -0.5709308385848999, "logits/chosen": -0.7461697459220886, "logits/rejected": -0.6578814387321472, "logps/chosen": -0.8201216459274292, "logps/rejected": -1.3315927982330322, "loss": 1.0474, "nll_loss": 1.056233525276184, "rewards/accuracies": 0.625, "rewards/chosen": -0.08201216161251068, "rewards/margins": 0.051147107034921646, "rewards/rejected": -0.13315927982330322, "step": 942 }, { "epoch": 0.5752630776269636, "grad_norm": 3.1067328453063965, "learning_rate": 7.105450091855481e-06, "log_odds_chosen": 1.1131469011306763, "log_odds_ratio": -0.4456004500389099, "logits/chosen": -0.8618618249893188, "logits/rejected": -0.8047898411750793, "logps/chosen": -0.6637651920318604, "logps/rejected": -1.3508343696594238, "loss": 1.0367, "nll_loss": 0.858749508857727, "rewards/accuracies": 0.75, "rewards/chosen": -0.06637652218341827, "rewards/margins": 0.06870691478252411, "rewards/rejected": -0.13508343696594238, "step": 943 }, { "epoch": 0.5758731127039804, "grad_norm": 1.1028083562850952, "learning_rate": 7.104470300061237e-06, "log_odds_chosen": 0.7098427414894104, "log_odds_ratio": -0.5213289856910706, "logits/chosen": -0.88685142993927, "logits/rejected": -0.7685022354125977, "logps/chosen": -0.9728660583496094, "logps/rejected": -1.4449976682662964, "loss": 1.245, "nll_loss": 1.0393702983856201, "rewards/accuracies": 0.75, "rewards/chosen": -0.09728660434484482, "rewards/margins": 0.04721317067742348, "rewards/rejected": -0.1444997787475586, "step": 944 }, { "epoch": 0.5764831477809974, "grad_norm": 4.339383125305176, "learning_rate": 7.103490508266993e-06, "log_odds_chosen": 0.4262884855270386, "log_odds_ratio": -0.5959907174110413, "logits/chosen": -0.9972478747367859, "logits/rejected": -0.8187488317489624, "logps/chosen": -0.9344501495361328, "logps/rejected": -1.187240481376648, "loss": 1.1637, "nll_loss": 1.006104826927185, "rewards/accuracies": 0.5, "rewards/chosen": -0.09344501793384552, "rewards/margins": 0.025279026478528976, "rewards/rejected": -0.1187240481376648, "step": 945 }, { "epoch": 0.5770931828580144, "grad_norm": 1.7097828388214111, "learning_rate": 7.102510716472749e-06, "log_odds_chosen": 1.1663768291473389, "log_odds_ratio": -0.4905804395675659, "logits/chosen": -0.7845653891563416, "logits/rejected": -0.7674596309661865, "logps/chosen": -0.8674860596656799, "logps/rejected": -1.7067174911499023, "loss": 1.1557, "nll_loss": 1.1433930397033691, "rewards/accuracies": 0.75, "rewards/chosen": -0.08674861490726471, "rewards/margins": 0.08392316102981567, "rewards/rejected": -0.1706717610359192, "step": 946 }, { "epoch": 0.5777032179350312, "grad_norm": 1.7166634798049927, "learning_rate": 7.101530924678505e-06, "log_odds_chosen": 0.6440680623054504, "log_odds_ratio": -0.515186071395874, "logits/chosen": -0.9226597547531128, "logits/rejected": -0.72896808385849, "logps/chosen": -0.9134957790374756, "logps/rejected": -1.353563904762268, "loss": 1.0034, "nll_loss": 1.1285712718963623, "rewards/accuracies": 0.75, "rewards/chosen": -0.09134958684444427, "rewards/margins": 0.04400680959224701, "rewards/rejected": -0.13535639643669128, "step": 947 }, { "epoch": 0.5783132530120482, "grad_norm": 1.8609793186187744, "learning_rate": 7.100551132884262e-06, "log_odds_chosen": 0.3209403455257416, "log_odds_ratio": -0.6055920720100403, "logits/chosen": -1.1382086277008057, "logits/rejected": -1.054444670677185, "logps/chosen": -0.9333734512329102, "logps/rejected": -1.1251527070999146, "loss": 1.1547, "nll_loss": 1.1418439149856567, "rewards/accuracies": 0.5, "rewards/chosen": -0.09333734214305878, "rewards/margins": 0.019177930429577827, "rewards/rejected": -0.11251527070999146, "step": 948 }, { "epoch": 0.5789232880890651, "grad_norm": 1.9610267877578735, "learning_rate": 7.099571341090018e-06, "log_odds_chosen": 0.9959710836410522, "log_odds_ratio": -0.3913271725177765, "logits/chosen": -0.8573408126831055, "logits/rejected": -0.8077797889709473, "logps/chosen": -0.6617723703384399, "logps/rejected": -1.2572078704833984, "loss": 0.9796, "nll_loss": 0.87016761302948, "rewards/accuracies": 0.875, "rewards/chosen": -0.06617723405361176, "rewards/margins": 0.05954354628920555, "rewards/rejected": -0.1257207840681076, "step": 949 }, { "epoch": 0.5795333231660821, "grad_norm": 1.5368386507034302, "learning_rate": 7.098591549295774e-06, "log_odds_chosen": 0.5329275131225586, "log_odds_ratio": -0.5900324583053589, "logits/chosen": -0.9863919019699097, "logits/rejected": -0.8882894515991211, "logps/chosen": -0.8937471508979797, "logps/rejected": -1.1984610557556152, "loss": 1.3141, "nll_loss": 1.1344701051712036, "rewards/accuracies": 0.75, "rewards/chosen": -0.08937472105026245, "rewards/margins": 0.030471399426460266, "rewards/rejected": -0.11984612047672272, "step": 950 }, { "epoch": 0.5801433582430989, "grad_norm": 1.4589015245437622, "learning_rate": 7.0976117575015305e-06, "log_odds_chosen": 0.6831369400024414, "log_odds_ratio": -0.4882126748561859, "logits/chosen": -0.6717537641525269, "logits/rejected": -0.6291049718856812, "logps/chosen": -0.6722227931022644, "logps/rejected": -1.070947527885437, "loss": 1.0, "nll_loss": 0.9645553827285767, "rewards/accuracies": 0.75, "rewards/chosen": -0.06722228229045868, "rewards/margins": 0.03987246751785278, "rewards/rejected": -0.10709474980831146, "step": 951 }, { "epoch": 0.5807533933201159, "grad_norm": 3.0837624073028564, "learning_rate": 7.096631965707287e-06, "log_odds_chosen": 0.9248738884925842, "log_odds_ratio": -0.40449678897857666, "logits/chosen": -0.926517128944397, "logits/rejected": -0.5819160342216492, "logps/chosen": -0.8675790429115295, "logps/rejected": -1.516924262046814, "loss": 1.1344, "nll_loss": 0.9644500613212585, "rewards/accuracies": 0.875, "rewards/chosen": -0.08675791323184967, "rewards/margins": 0.06493452936410904, "rewards/rejected": -0.1516924351453781, "step": 952 }, { "epoch": 0.5813634283971328, "grad_norm": 1.9406250715255737, "learning_rate": 7.095652173913043e-06, "log_odds_chosen": 0.5991811156272888, "log_odds_ratio": -0.5552321076393127, "logits/chosen": -0.8609848022460938, "logits/rejected": -0.7181614637374878, "logps/chosen": -0.9620858430862427, "logps/rejected": -1.3915296792984009, "loss": 1.0613, "nll_loss": 1.0458732843399048, "rewards/accuracies": 0.75, "rewards/chosen": -0.0962085872888565, "rewards/margins": 0.042944375425577164, "rewards/rejected": -0.13915295898914337, "step": 953 }, { "epoch": 0.5819734634741498, "grad_norm": 2.6924874782562256, "learning_rate": 7.0946723821188e-06, "log_odds_chosen": 0.21781352162361145, "log_odds_ratio": -0.6357624530792236, "logits/chosen": -0.7733423113822937, "logits/rejected": -0.5902336239814758, "logps/chosen": -0.8048123717308044, "logps/rejected": -0.8439970016479492, "loss": 1.0997, "nll_loss": 0.996517539024353, "rewards/accuracies": 0.625, "rewards/chosen": -0.08048123866319656, "rewards/margins": 0.003918468952178955, "rewards/rejected": -0.08439970016479492, "step": 954 }, { "epoch": 0.5825834985511666, "grad_norm": 1.9865005016326904, "learning_rate": 7.093692590324556e-06, "log_odds_chosen": 0.19875727593898773, "log_odds_ratio": -0.7437726259231567, "logits/chosen": -0.8867332339286804, "logits/rejected": -0.8864989280700684, "logps/chosen": -0.9790133237838745, "logps/rejected": -1.0844963788986206, "loss": 1.2016, "nll_loss": 1.1284457445144653, "rewards/accuracies": 0.375, "rewards/chosen": -0.09790132939815521, "rewards/margins": 0.010548314079642296, "rewards/rejected": -0.10844964534044266, "step": 955 }, { "epoch": 0.5831935336281836, "grad_norm": 1.8952524662017822, "learning_rate": 7.092712798530312e-06, "log_odds_chosen": 0.24325576424598694, "log_odds_ratio": -0.6970798969268799, "logits/chosen": -0.9389818906784058, "logits/rejected": -0.855191171169281, "logps/chosen": -0.8466158509254456, "logps/rejected": -0.9400831460952759, "loss": 1.2491, "nll_loss": 0.9454874992370605, "rewards/accuracies": 0.625, "rewards/chosen": -0.0846615880727768, "rewards/margins": 0.009346730075776577, "rewards/rejected": -0.09400831907987595, "step": 956 }, { "epoch": 0.5838035687052006, "grad_norm": 3.179466485977173, "learning_rate": 7.091733006736068e-06, "log_odds_chosen": 1.2663847208023071, "log_odds_ratio": -0.42398130893707275, "logits/chosen": -0.690721869468689, "logits/rejected": -0.40770429372787476, "logps/chosen": -0.7399487495422363, "logps/rejected": -1.4881219863891602, "loss": 1.0287, "nll_loss": 0.8097256422042847, "rewards/accuracies": 0.875, "rewards/chosen": -0.07399487495422363, "rewards/margins": 0.07481732964515686, "rewards/rejected": -0.1488122045993805, "step": 957 }, { "epoch": 0.5844136037822175, "grad_norm": 7.210777282714844, "learning_rate": 7.090753214941824e-06, "log_odds_chosen": 0.7524954080581665, "log_odds_ratio": -0.614420473575592, "logits/chosen": -0.5891788005828857, "logits/rejected": -0.7534737586975098, "logps/chosen": -0.7908124923706055, "logps/rejected": -1.1167080402374268, "loss": 1.1643, "nll_loss": 0.975845992565155, "rewards/accuracies": 0.625, "rewards/chosen": -0.07908125221729279, "rewards/margins": 0.03258955851197243, "rewards/rejected": -0.11167080700397491, "step": 958 }, { "epoch": 0.5850236388592344, "grad_norm": 1.9654366970062256, "learning_rate": 7.08977342314758e-06, "log_odds_chosen": 0.7813896536827087, "log_odds_ratio": -0.5211148858070374, "logits/chosen": -1.0175762176513672, "logits/rejected": -0.8494956493377686, "logps/chosen": -0.854768693447113, "logps/rejected": -1.2075636386871338, "loss": 1.0921, "nll_loss": 1.0546749830245972, "rewards/accuracies": 0.75, "rewards/chosen": -0.08547687530517578, "rewards/margins": 0.03527948632836342, "rewards/rejected": -0.1207563504576683, "step": 959 }, { "epoch": 0.5856336739362513, "grad_norm": 1.7710118293762207, "learning_rate": 7.088793631353337e-06, "log_odds_chosen": 0.6242284774780273, "log_odds_ratio": -0.5082129240036011, "logits/chosen": -1.114320158958435, "logits/rejected": -0.8926303386688232, "logps/chosen": -0.8898941278457642, "logps/rejected": -1.298567533493042, "loss": 1.2225, "nll_loss": 1.2117880582809448, "rewards/accuracies": 0.625, "rewards/chosen": -0.08898940682411194, "rewards/margins": 0.04086734354496002, "rewards/rejected": -0.12985675036907196, "step": 960 }, { "epoch": 0.5862437090132683, "grad_norm": 1.291918396949768, "learning_rate": 7.0878138395590935e-06, "log_odds_chosen": 0.9563599824905396, "log_odds_ratio": -0.38050365447998047, "logits/chosen": -0.7038010954856873, "logits/rejected": -0.6116915345191956, "logps/chosen": -0.6217937469482422, "logps/rejected": -1.1458210945129395, "loss": 0.9408, "nll_loss": 0.8747814297676086, "rewards/accuracies": 1.0, "rewards/chosen": -0.06217937916517258, "rewards/margins": 0.052402742207050323, "rewards/rejected": -0.1145821213722229, "step": 961 }, { "epoch": 0.5868537440902852, "grad_norm": 2.180586576461792, "learning_rate": 7.0868340477648504e-06, "log_odds_chosen": 0.5749225616455078, "log_odds_ratio": -0.5382081270217896, "logits/chosen": -0.7582452297210693, "logits/rejected": -0.6472825407981873, "logps/chosen": -0.827700138092041, "logps/rejected": -1.2116308212280273, "loss": 1.0467, "nll_loss": 0.9889674782752991, "rewards/accuracies": 0.625, "rewards/chosen": -0.08277001231908798, "rewards/margins": 0.03839307278394699, "rewards/rejected": -0.12116309255361557, "step": 962 }, { "epoch": 0.5874637791673021, "grad_norm": 1.3875391483306885, "learning_rate": 7.085854255970606e-06, "log_odds_chosen": 0.5077694654464722, "log_odds_ratio": -0.5478377342224121, "logits/chosen": -0.8406616449356079, "logits/rejected": -0.6973280310630798, "logps/chosen": -0.9471422433853149, "logps/rejected": -1.2322112321853638, "loss": 1.1843, "nll_loss": 1.0893476009368896, "rewards/accuracies": 0.75, "rewards/chosen": -0.0947142094373703, "rewards/margins": 0.028506899252533913, "rewards/rejected": -0.12322111427783966, "step": 963 }, { "epoch": 0.588073814244319, "grad_norm": 1.7430098056793213, "learning_rate": 7.084874464176362e-06, "log_odds_chosen": 0.742387056350708, "log_odds_ratio": -0.47888919711112976, "logits/chosen": -0.8178930282592773, "logits/rejected": -0.7460458278656006, "logps/chosen": -0.9281522035598755, "logps/rejected": -1.4339570999145508, "loss": 1.1605, "nll_loss": 1.130693793296814, "rewards/accuracies": 0.75, "rewards/chosen": -0.09281522780656815, "rewards/margins": 0.05058048665523529, "rewards/rejected": -0.14339570701122284, "step": 964 }, { "epoch": 0.588683849321336, "grad_norm": 1.3501993417739868, "learning_rate": 7.083894672382119e-06, "log_odds_chosen": 0.8270265460014343, "log_odds_ratio": -0.5408130884170532, "logits/chosen": -1.0563693046569824, "logits/rejected": -0.8375090956687927, "logps/chosen": -0.98796147108078, "logps/rejected": -1.541185975074768, "loss": 1.1539, "nll_loss": 1.2521710395812988, "rewards/accuracies": 0.625, "rewards/chosen": -0.09879614412784576, "rewards/margins": 0.05532245337963104, "rewards/rejected": -0.1541185975074768, "step": 965 }, { "epoch": 0.589293884398353, "grad_norm": 1.792694091796875, "learning_rate": 7.082914880587875e-06, "log_odds_chosen": -0.1271015703678131, "log_odds_ratio": -0.787772536277771, "logits/chosen": -0.781807541847229, "logits/rejected": -0.8588495254516602, "logps/chosen": -0.8022798299789429, "logps/rejected": -0.7006468772888184, "loss": 1.0405, "nll_loss": 1.0103895664215088, "rewards/accuracies": 0.5, "rewards/chosen": -0.08022797852754593, "rewards/margins": -0.010163291357457638, "rewards/rejected": -0.07006468623876572, "step": 966 }, { "epoch": 0.5899039194753698, "grad_norm": 2.077949285507202, "learning_rate": 7.081935088793631e-06, "log_odds_chosen": 0.906253457069397, "log_odds_ratio": -0.3940291106700897, "logits/chosen": -1.0880682468414307, "logits/rejected": -0.8069441914558411, "logps/chosen": -0.9790130853652954, "logps/rejected": -1.5782217979431152, "loss": 1.1241, "nll_loss": 1.175973653793335, "rewards/accuracies": 0.875, "rewards/chosen": -0.09790131449699402, "rewards/margins": 0.05992087721824646, "rewards/rejected": -0.15782217681407928, "step": 967 }, { "epoch": 0.5905139545523868, "grad_norm": 2.5442049503326416, "learning_rate": 7.080955296999387e-06, "log_odds_chosen": -0.10662616789340973, "log_odds_ratio": -0.8553794622421265, "logits/chosen": -1.0471255779266357, "logits/rejected": -0.9683523774147034, "logps/chosen": -0.9377217292785645, "logps/rejected": -0.9751043915748596, "loss": 1.2197, "nll_loss": 1.3078807592391968, "rewards/accuracies": 0.375, "rewards/chosen": -0.09377218037843704, "rewards/margins": 0.003738253377377987, "rewards/rejected": -0.0975104346871376, "step": 968 }, { "epoch": 0.5911239896294037, "grad_norm": 1.0179710388183594, "learning_rate": 7.079975505205143e-06, "log_odds_chosen": 0.5325528383255005, "log_odds_ratio": -0.6644622087478638, "logits/chosen": -0.9777438640594482, "logits/rejected": -0.8896290063858032, "logps/chosen": -1.0059069395065308, "logps/rejected": -1.3607902526855469, "loss": 1.2181, "nll_loss": 1.1800386905670166, "rewards/accuracies": 0.5, "rewards/chosen": -0.10059069842100143, "rewards/margins": 0.03548834100365639, "rewards/rejected": -0.13607902824878693, "step": 969 }, { "epoch": 0.5917340247064207, "grad_norm": 1.2848868370056152, "learning_rate": 7.078995713410899e-06, "log_odds_chosen": 0.39878061413764954, "log_odds_ratio": -0.5751363635063171, "logits/chosen": -1.0670281648635864, "logits/rejected": -0.9494330883026123, "logps/chosen": -0.8745943307876587, "logps/rejected": -1.1010360717773438, "loss": 1.1756, "nll_loss": 1.022917628288269, "rewards/accuracies": 0.625, "rewards/chosen": -0.08745943009853363, "rewards/margins": 0.022644173353910446, "rewards/rejected": -0.11010359972715378, "step": 970 }, { "epoch": 0.5923440597834375, "grad_norm": 1.2622010707855225, "learning_rate": 7.078015921616656e-06, "log_odds_chosen": 0.7454142570495605, "log_odds_ratio": -0.5477339625358582, "logits/chosen": -1.0076366662979126, "logits/rejected": -0.9505211710929871, "logps/chosen": -0.8802158236503601, "logps/rejected": -1.3264445066452026, "loss": 1.2154, "nll_loss": 1.2486361265182495, "rewards/accuracies": 0.625, "rewards/chosen": -0.08802159130573273, "rewards/margins": 0.04462286829948425, "rewards/rejected": -0.1326444447040558, "step": 971 }, { "epoch": 0.5929540948604545, "grad_norm": 1.647493600845337, "learning_rate": 7.0770361298224125e-06, "log_odds_chosen": 0.627500593662262, "log_odds_ratio": -0.564361572265625, "logits/chosen": -0.755413830280304, "logits/rejected": -0.6813266277313232, "logps/chosen": -0.8867936134338379, "logps/rejected": -1.185732126235962, "loss": 1.1747, "nll_loss": 1.1136012077331543, "rewards/accuracies": 0.625, "rewards/chosen": -0.08867935836315155, "rewards/margins": 0.02989386022090912, "rewards/rejected": -0.11857321858406067, "step": 972 }, { "epoch": 0.5935641299374714, "grad_norm": 1.9086012840270996, "learning_rate": 7.076056338028169e-06, "log_odds_chosen": 0.9084018468856812, "log_odds_ratio": -0.5250840187072754, "logits/chosen": -0.8980162739753723, "logits/rejected": -0.8309838771820068, "logps/chosen": -0.7865515351295471, "logps/rejected": -1.437220573425293, "loss": 1.2323, "nll_loss": 1.2873533964157104, "rewards/accuracies": 0.625, "rewards/chosen": -0.07865515351295471, "rewards/margins": 0.06506690382957458, "rewards/rejected": -0.1437220573425293, "step": 973 }, { "epoch": 0.5941741650144884, "grad_norm": 1.4003793001174927, "learning_rate": 7.075076546233925e-06, "log_odds_chosen": 0.15145650506019592, "log_odds_ratio": -0.6621705889701843, "logits/chosen": -1.0108222961425781, "logits/rejected": -1.0434377193450928, "logps/chosen": -0.9851406812667847, "logps/rejected": -1.0694531202316284, "loss": 1.3545, "nll_loss": 1.41334867477417, "rewards/accuracies": 0.625, "rewards/chosen": -0.09851406514644623, "rewards/margins": 0.008431239984929562, "rewards/rejected": -0.10694531351327896, "step": 974 }, { "epoch": 0.5947842000915052, "grad_norm": 1.9558759927749634, "learning_rate": 7.074096754439681e-06, "log_odds_chosen": 0.7158800363540649, "log_odds_ratio": -0.6029306650161743, "logits/chosen": -1.0041155815124512, "logits/rejected": -1.0082142353057861, "logps/chosen": -1.1559717655181885, "logps/rejected": -1.587602138519287, "loss": 1.2089, "nll_loss": 1.4881422519683838, "rewards/accuracies": 0.625, "rewards/chosen": -0.11559715867042542, "rewards/margins": 0.04316304624080658, "rewards/rejected": -0.158760204911232, "step": 975 }, { "epoch": 0.5953942351685222, "grad_norm": 2.7865004539489746, "learning_rate": 7.073116962645438e-06, "log_odds_chosen": -0.3739645481109619, "log_odds_ratio": -0.9926085472106934, "logits/chosen": -0.8273966908454895, "logits/rejected": -0.7835572957992554, "logps/chosen": -1.157597303390503, "logps/rejected": -0.8859093189239502, "loss": 1.0707, "nll_loss": 1.2265613079071045, "rewards/accuracies": 0.375, "rewards/chosen": -0.11575973778963089, "rewards/margins": -0.027168795466423035, "rewards/rejected": -0.08859094232320786, "step": 976 }, { "epoch": 0.5960042702455391, "grad_norm": 9.606987953186035, "learning_rate": 7.072137170851194e-06, "log_odds_chosen": 0.7714087963104248, "log_odds_ratio": -0.4460846185684204, "logits/chosen": -0.9552637338638306, "logits/rejected": -0.8622552752494812, "logps/chosen": -0.8718659281730652, "logps/rejected": -1.3848873376846313, "loss": 1.1523, "nll_loss": 1.1705576181411743, "rewards/accuracies": 0.875, "rewards/chosen": -0.08718658983707428, "rewards/margins": 0.05130213871598244, "rewards/rejected": -0.1384887397289276, "step": 977 }, { "epoch": 0.596614305322556, "grad_norm": 3.103308916091919, "learning_rate": 7.07115737905695e-06, "log_odds_chosen": 0.271185040473938, "log_odds_ratio": -0.7022688388824463, "logits/chosen": -0.9338957071304321, "logits/rejected": -0.9365085959434509, "logps/chosen": -0.9796250462532043, "logps/rejected": -1.1287353038787842, "loss": 1.2327, "nll_loss": 1.496147632598877, "rewards/accuracies": 0.5, "rewards/chosen": -0.09796249866485596, "rewards/margins": 0.014911022037267685, "rewards/rejected": -0.11287352442741394, "step": 978 }, { "epoch": 0.597224340399573, "grad_norm": 4.832726955413818, "learning_rate": 7.070177587262707e-06, "log_odds_chosen": 0.6882843375205994, "log_odds_ratio": -0.4815453886985779, "logits/chosen": -1.0505439043045044, "logits/rejected": -0.6906454563140869, "logps/chosen": -0.9930926561355591, "logps/rejected": -1.4327689409255981, "loss": 1.1256, "nll_loss": 1.272856593132019, "rewards/accuracies": 0.75, "rewards/chosen": -0.09930926561355591, "rewards/margins": 0.043967630714178085, "rewards/rejected": -0.1432769000530243, "step": 979 }, { "epoch": 0.5978343754765899, "grad_norm": 2.387749433517456, "learning_rate": 7.069197795468462e-06, "log_odds_chosen": 1.1375536918640137, "log_odds_ratio": -0.43940046429634094, "logits/chosen": -0.9661591053009033, "logits/rejected": -0.8652776479721069, "logps/chosen": -0.7532519698143005, "logps/rejected": -1.4985570907592773, "loss": 1.1445, "nll_loss": 0.9223760366439819, "rewards/accuracies": 0.75, "rewards/chosen": -0.07532519847154617, "rewards/margins": 0.07453051209449768, "rewards/rejected": -0.14985570311546326, "step": 980 }, { "epoch": 0.5984444105536069, "grad_norm": 1.345227837562561, "learning_rate": 7.0682180036742184e-06, "log_odds_chosen": 0.7126580476760864, "log_odds_ratio": -0.4574570655822754, "logits/chosen": -0.6462136507034302, "logits/rejected": -0.5109070539474487, "logps/chosen": -0.9968214631080627, "logps/rejected": -1.479539155960083, "loss": 1.0097, "nll_loss": 0.8943560123443604, "rewards/accuracies": 0.875, "rewards/chosen": -0.09968214482069016, "rewards/margins": 0.048271775245666504, "rewards/rejected": -0.14795392751693726, "step": 981 }, { "epoch": 0.5990544456306237, "grad_norm": 4.1322922706604, "learning_rate": 7.067238211879975e-06, "log_odds_chosen": 0.19188246130943298, "log_odds_ratio": -0.6656349897384644, "logits/chosen": -0.6910341382026672, "logits/rejected": -0.6990981101989746, "logps/chosen": -0.8586798906326294, "logps/rejected": -1.0112309455871582, "loss": 1.0929, "nll_loss": 1.0708816051483154, "rewards/accuracies": 0.25, "rewards/chosen": -0.0858679935336113, "rewards/margins": 0.01525510661303997, "rewards/rejected": -0.10112310200929642, "step": 982 }, { "epoch": 0.5996644807076407, "grad_norm": 2.688654899597168, "learning_rate": 7.0662584200857315e-06, "log_odds_chosen": 0.6812440156936646, "log_odds_ratio": -0.6201260685920715, "logits/chosen": -1.063975214958191, "logits/rejected": -0.8386345505714417, "logps/chosen": -1.2395000457763672, "logps/rejected": -1.7982845306396484, "loss": 1.4423, "nll_loss": 1.8232911825180054, "rewards/accuracies": 0.75, "rewards/chosen": -0.12395000457763672, "rewards/margins": 0.05587845295667648, "rewards/rejected": -0.1798284500837326, "step": 983 }, { "epoch": 0.6002745157846576, "grad_norm": 2.346491575241089, "learning_rate": 7.065278628291488e-06, "log_odds_chosen": 0.03903937339782715, "log_odds_ratio": -0.7660312652587891, "logits/chosen": -0.7115103006362915, "logits/rejected": -0.7001587748527527, "logps/chosen": -0.7448747754096985, "logps/rejected": -0.7442691922187805, "loss": 1.1597, "nll_loss": 1.0242233276367188, "rewards/accuracies": 0.375, "rewards/chosen": -0.07448747754096985, "rewards/margins": -6.055459380149841e-05, "rewards/rejected": -0.07442691922187805, "step": 984 }, { "epoch": 0.6008845508616746, "grad_norm": 1.1622436046600342, "learning_rate": 7.064298836497245e-06, "log_odds_chosen": 1.1524122953414917, "log_odds_ratio": -0.41055846214294434, "logits/chosen": -0.7170193791389465, "logits/rejected": -0.8062136769294739, "logps/chosen": -0.6692878007888794, "logps/rejected": -1.398362636566162, "loss": 0.9452, "nll_loss": 0.832301139831543, "rewards/accuracies": 0.75, "rewards/chosen": -0.06692878156900406, "rewards/margins": 0.0729074701666832, "rewards/rejected": -0.13983625173568726, "step": 985 }, { "epoch": 0.6014945859386914, "grad_norm": 2.3861372470855713, "learning_rate": 7.063319044703e-06, "log_odds_chosen": 0.6447304487228394, "log_odds_ratio": -0.6880738139152527, "logits/chosen": -0.5255555510520935, "logits/rejected": -0.5042622685432434, "logps/chosen": -0.8620471954345703, "logps/rejected": -1.1446644067764282, "loss": 1.1015, "nll_loss": 0.79322350025177, "rewards/accuracies": 0.75, "rewards/chosen": -0.08620471507310867, "rewards/margins": 0.028261732310056686, "rewards/rejected": -0.11446644365787506, "step": 986 }, { "epoch": 0.6021046210157084, "grad_norm": 3.467224359512329, "learning_rate": 7.062339252908756e-06, "log_odds_chosen": 0.6569777131080627, "log_odds_ratio": -0.5167554020881653, "logits/chosen": -0.9218953251838684, "logits/rejected": -0.7814686894416809, "logps/chosen": -1.117397665977478, "logps/rejected": -1.582042932510376, "loss": 1.1053, "nll_loss": 1.19459068775177, "rewards/accuracies": 0.75, "rewards/chosen": -0.11173976957798004, "rewards/margins": 0.046464525163173676, "rewards/rejected": -0.15820428729057312, "step": 987 }, { "epoch": 0.6027146560927253, "grad_norm": 2.0743932723999023, "learning_rate": 7.061359461114513e-06, "log_odds_chosen": 1.3465405702590942, "log_odds_ratio": -0.39925670623779297, "logits/chosen": -0.902229905128479, "logits/rejected": -0.7884283661842346, "logps/chosen": -0.6497548818588257, "logps/rejected": -1.3264940977096558, "loss": 1.2938, "nll_loss": 1.046431303024292, "rewards/accuracies": 0.625, "rewards/chosen": -0.06497548520565033, "rewards/margins": 0.0676739290356636, "rewards/rejected": -0.13264942169189453, "step": 988 }, { "epoch": 0.6033246911697423, "grad_norm": 2.892817258834839, "learning_rate": 7.060379669320269e-06, "log_odds_chosen": -0.09720972180366516, "log_odds_ratio": -0.8766791820526123, "logits/chosen": -0.9613239765167236, "logits/rejected": -0.7720186114311218, "logps/chosen": -1.067038893699646, "logps/rejected": -1.0494983196258545, "loss": 1.3573, "nll_loss": 1.2585086822509766, "rewards/accuracies": 0.5, "rewards/chosen": -0.10670389235019684, "rewards/margins": -0.0017540520057082176, "rewards/rejected": -0.10494983196258545, "step": 989 }, { "epoch": 0.6039347262467591, "grad_norm": 1.4436408281326294, "learning_rate": 7.059399877526026e-06, "log_odds_chosen": 0.3073093593120575, "log_odds_ratio": -0.587813138961792, "logits/chosen": -0.8427678346633911, "logits/rejected": -0.625676691532135, "logps/chosen": -0.7755841612815857, "logps/rejected": -0.9328668117523193, "loss": 1.0811, "nll_loss": 1.0562217235565186, "rewards/accuracies": 0.75, "rewards/chosen": -0.07755842059850693, "rewards/margins": 0.015728265047073364, "rewards/rejected": -0.09328668564558029, "step": 990 }, { "epoch": 0.6045447613237761, "grad_norm": 2.1200592517852783, "learning_rate": 7.058420085731781e-06, "log_odds_chosen": -0.1351112723350525, "log_odds_ratio": -0.776327908039093, "logits/chosen": -0.8483791351318359, "logits/rejected": -0.8300155401229858, "logps/chosen": -0.8319553136825562, "logps/rejected": -0.7744994759559631, "loss": 1.3171, "nll_loss": 1.2574374675750732, "rewards/accuracies": 0.375, "rewards/chosen": -0.0831955298781395, "rewards/margins": -0.005745578557252884, "rewards/rejected": -0.07744994759559631, "step": 991 }, { "epoch": 0.6051547964007931, "grad_norm": 1.553694248199463, "learning_rate": 7.0574402939375375e-06, "log_odds_chosen": 1.534972906112671, "log_odds_ratio": -0.369064062833786, "logits/chosen": -0.5315718650817871, "logits/rejected": -0.7587119340896606, "logps/chosen": -0.6847862601280212, "logps/rejected": -1.8523516654968262, "loss": 0.9968, "nll_loss": 0.7913486957550049, "rewards/accuracies": 0.875, "rewards/chosen": -0.06847862899303436, "rewards/margins": 0.11675653606653214, "rewards/rejected": -0.1852351725101471, "step": 992 }, { "epoch": 0.60576483147781, "grad_norm": 2.1772091388702393, "learning_rate": 7.0564605021432945e-06, "log_odds_chosen": 0.42389726638793945, "log_odds_ratio": -0.6323736310005188, "logits/chosen": -0.9817061424255371, "logits/rejected": -1.0371136665344238, "logps/chosen": -0.8599461317062378, "logps/rejected": -1.0915340185165405, "loss": 1.164, "nll_loss": 1.2603288888931274, "rewards/accuracies": 0.75, "rewards/chosen": -0.08599460870027542, "rewards/margins": 0.023158783093094826, "rewards/rejected": -0.1091533899307251, "step": 993 }, { "epoch": 0.6063748665548269, "grad_norm": 1.9935593605041504, "learning_rate": 7.055480710349051e-06, "log_odds_chosen": 1.3688385486602783, "log_odds_ratio": -0.28932178020477295, "logits/chosen": -0.683745265007019, "logits/rejected": -0.4747011065483093, "logps/chosen": -0.7181228399276733, "logps/rejected": -1.600672960281372, "loss": 1.2076, "nll_loss": 0.939706563949585, "rewards/accuracies": 1.0, "rewards/chosen": -0.07181227952241898, "rewards/margins": 0.08825501054525375, "rewards/rejected": -0.16006729006767273, "step": 994 }, { "epoch": 0.6069849016318438, "grad_norm": 1.8692251443862915, "learning_rate": 7.054500918554807e-06, "log_odds_chosen": 0.6962469816207886, "log_odds_ratio": -0.5330129265785217, "logits/chosen": -0.7463474273681641, "logits/rejected": -0.7900652885437012, "logps/chosen": -0.6531740427017212, "logps/rejected": -1.0479053258895874, "loss": 1.2298, "nll_loss": 0.9983140230178833, "rewards/accuracies": 0.625, "rewards/chosen": -0.06531740725040436, "rewards/margins": 0.03947313502430916, "rewards/rejected": -0.10479053854942322, "step": 995 }, { "epoch": 0.6075949367088608, "grad_norm": 1.5032103061676025, "learning_rate": 7.053521126760564e-06, "log_odds_chosen": 0.4815455675125122, "log_odds_ratio": -0.5587382912635803, "logits/chosen": -1.1439197063446045, "logits/rejected": -1.0443572998046875, "logps/chosen": -0.985160231590271, "logps/rejected": -1.3026707172393799, "loss": 1.1464, "nll_loss": 1.1888225078582764, "rewards/accuracies": 0.625, "rewards/chosen": -0.09851603209972382, "rewards/margins": 0.03175104781985283, "rewards/rejected": -0.13026708364486694, "step": 996 }, { "epoch": 0.6082049717858777, "grad_norm": 1.4921954870224, "learning_rate": 7.052541334966319e-06, "log_odds_chosen": 1.3835558891296387, "log_odds_ratio": -0.41532227396965027, "logits/chosen": -0.6031211018562317, "logits/rejected": -0.6853234767913818, "logps/chosen": -0.6687721610069275, "logps/rejected": -1.561295986175537, "loss": 1.0655, "nll_loss": 0.9167606830596924, "rewards/accuracies": 0.75, "rewards/chosen": -0.06687722355127335, "rewards/margins": 0.08925239741802216, "rewards/rejected": -0.1561295986175537, "step": 997 }, { "epoch": 0.6088150068628946, "grad_norm": 2.2171034812927246, "learning_rate": 7.051561543172075e-06, "log_odds_chosen": 0.5838836431503296, "log_odds_ratio": -0.6630174517631531, "logits/chosen": -0.8086068630218506, "logits/rejected": -0.6363025903701782, "logps/chosen": -0.8676319122314453, "logps/rejected": -1.333775281906128, "loss": 1.3179, "nll_loss": 1.0539100170135498, "rewards/accuracies": 0.75, "rewards/chosen": -0.08676318824291229, "rewards/margins": 0.046614352613687515, "rewards/rejected": -0.1333775371313095, "step": 998 }, { "epoch": 0.6094250419399115, "grad_norm": 1.7899550199508667, "learning_rate": 7.050581751377832e-06, "log_odds_chosen": 0.595251739025116, "log_odds_ratio": -0.6569170951843262, "logits/chosen": -1.1057252883911133, "logits/rejected": -1.0324571132659912, "logps/chosen": -0.7666757702827454, "logps/rejected": -1.3269281387329102, "loss": 1.1842, "nll_loss": 1.1855748891830444, "rewards/accuracies": 0.625, "rewards/chosen": -0.07666757702827454, "rewards/margins": 0.05602522939443588, "rewards/rejected": -0.13269281387329102, "step": 999 }, { "epoch": 0.6100350770169285, "grad_norm": 1.9322763681411743, "learning_rate": 7.049601959583588e-06, "log_odds_chosen": 0.5744527578353882, "log_odds_ratio": -0.5086389780044556, "logits/chosen": -0.9153187870979309, "logits/rejected": -0.8376616835594177, "logps/chosen": -0.8797860741615295, "logps/rejected": -1.226257085800171, "loss": 1.1493, "nll_loss": 1.0561747550964355, "rewards/accuracies": 0.625, "rewards/chosen": -0.08797860145568848, "rewards/margins": 0.03464711084961891, "rewards/rejected": -0.12262571603059769, "step": 1000 }, { "epoch": 0.6106451120939455, "grad_norm": 1.153019905090332, "learning_rate": 7.048622167789345e-06, "log_odds_chosen": 0.32580968737602234, "log_odds_ratio": -0.6459565758705139, "logits/chosen": -0.922773003578186, "logits/rejected": -0.9506137371063232, "logps/chosen": -0.9446224570274353, "logps/rejected": -1.1144195795059204, "loss": 1.1638, "nll_loss": 1.2106338739395142, "rewards/accuracies": 0.375, "rewards/chosen": -0.09446224570274353, "rewards/margins": 0.016979709267616272, "rewards/rejected": -0.1114419549703598, "step": 1001 }, { "epoch": 0.6112551471709623, "grad_norm": 1.979092001914978, "learning_rate": 7.047642375995101e-06, "log_odds_chosen": 1.1022462844848633, "log_odds_ratio": -0.39893630146980286, "logits/chosen": -0.9579074382781982, "logits/rejected": -0.8641552925109863, "logps/chosen": -0.7999275922775269, "logps/rejected": -1.5823431015014648, "loss": 1.226, "nll_loss": 0.9748208522796631, "rewards/accuracies": 0.75, "rewards/chosen": -0.07999276369810104, "rewards/margins": 0.07824154198169708, "rewards/rejected": -0.15823429822921753, "step": 1002 }, { "epoch": 0.6118651822479793, "grad_norm": 1.3580812215805054, "learning_rate": 7.0466625842008565e-06, "log_odds_chosen": 0.9237748980522156, "log_odds_ratio": -0.4730624854564667, "logits/chosen": -0.9132469892501831, "logits/rejected": -0.9575519561767578, "logps/chosen": -0.8359541296958923, "logps/rejected": -1.4164397716522217, "loss": 1.168, "nll_loss": 1.1233006715774536, "rewards/accuracies": 0.625, "rewards/chosen": -0.08359541743993759, "rewards/margins": 0.05804856866598129, "rewards/rejected": -0.14164398610591888, "step": 1003 }, { "epoch": 0.6124752173249962, "grad_norm": 1.7017054557800293, "learning_rate": 7.0456827924066135e-06, "log_odds_chosen": -0.41274261474609375, "log_odds_ratio": -0.9874147772789001, "logits/chosen": -1.1621567010879517, "logits/rejected": -1.0201717615127563, "logps/chosen": -0.9383151531219482, "logps/rejected": -0.7429279685020447, "loss": 1.3185, "nll_loss": 1.1830766201019287, "rewards/accuracies": 0.25, "rewards/chosen": -0.09383151680231094, "rewards/margins": -0.019538719207048416, "rewards/rejected": -0.07429279386997223, "step": 1004 }, { "epoch": 0.6130852524020132, "grad_norm": 1.5111842155456543, "learning_rate": 7.04470300061237e-06, "log_odds_chosen": 0.8073736429214478, "log_odds_ratio": -0.5514466762542725, "logits/chosen": -0.9429285526275635, "logits/rejected": -0.7943370938301086, "logps/chosen": -0.8169924020767212, "logps/rejected": -1.3234373331069946, "loss": 1.0822, "nll_loss": 0.8778235912322998, "rewards/accuracies": 0.5, "rewards/chosen": -0.08169924467802048, "rewards/margins": 0.050644490867853165, "rewards/rejected": -0.13234372437000275, "step": 1005 }, { "epoch": 0.61369528747903, "grad_norm": 2.9420788288116455, "learning_rate": 7.043723208818126e-06, "log_odds_chosen": -0.2842126190662384, "log_odds_ratio": -0.8644106388092041, "logits/chosen": -1.1531442403793335, "logits/rejected": -1.1028804779052734, "logps/chosen": -1.1577813625335693, "logps/rejected": -0.9794958233833313, "loss": 1.2082, "nll_loss": 1.2749801874160767, "rewards/accuracies": 0.375, "rewards/chosen": -0.11577814817428589, "rewards/margins": -0.01782855950295925, "rewards/rejected": -0.09794958680868149, "step": 1006 }, { "epoch": 0.614305322556047, "grad_norm": 1.493105173110962, "learning_rate": 7.042743417023883e-06, "log_odds_chosen": 0.3566416800022125, "log_odds_ratio": -0.6899948120117188, "logits/chosen": -1.022821068763733, "logits/rejected": -0.8746054172515869, "logps/chosen": -0.9831328392028809, "logps/rejected": -1.1687817573547363, "loss": 1.1567, "nll_loss": 1.2417585849761963, "rewards/accuracies": 0.375, "rewards/chosen": -0.09831328690052032, "rewards/margins": 0.018564894795417786, "rewards/rejected": -0.11687818169593811, "step": 1007 }, { "epoch": 0.6149153576330639, "grad_norm": 2.507999897003174, "learning_rate": 7.041763625229638e-06, "log_odds_chosen": 0.32548844814300537, "log_odds_ratio": -0.7253118753433228, "logits/chosen": -1.0264521837234497, "logits/rejected": -1.0267301797866821, "logps/chosen": -1.0999966859817505, "logps/rejected": -1.2474889755249023, "loss": 1.1897, "nll_loss": 1.197097659111023, "rewards/accuracies": 0.5, "rewards/chosen": -0.10999967157840729, "rewards/margins": 0.014749223366379738, "rewards/rejected": -0.12474888563156128, "step": 1008 }, { "epoch": 0.6155253927100808, "grad_norm": 2.194005012512207, "learning_rate": 7.040783833435394e-06, "log_odds_chosen": 0.5233120322227478, "log_odds_ratio": -0.6055772304534912, "logits/chosen": -1.1227478981018066, "logits/rejected": -0.9849333763122559, "logps/chosen": -1.0381356477737427, "logps/rejected": -1.3421138525009155, "loss": 1.1833, "nll_loss": 1.1331562995910645, "rewards/accuracies": 0.75, "rewards/chosen": -0.10381357371807098, "rewards/margins": 0.030397823080420494, "rewards/rejected": -0.13421139121055603, "step": 1009 }, { "epoch": 0.6161354277870977, "grad_norm": 3.1113388538360596, "learning_rate": 7.039804041641151e-06, "log_odds_chosen": 0.3642026484012604, "log_odds_ratio": -0.6013919115066528, "logits/chosen": -0.9482816457748413, "logits/rejected": -0.9657381772994995, "logps/chosen": -0.8516219854354858, "logps/rejected": -1.0652894973754883, "loss": 1.2144, "nll_loss": 1.0238301753997803, "rewards/accuracies": 0.625, "rewards/chosen": -0.0851622000336647, "rewards/margins": 0.021366752684116364, "rewards/rejected": -0.10652895271778107, "step": 1010 }, { "epoch": 0.6167454628641147, "grad_norm": 1.5569278001785278, "learning_rate": 7.038824249846907e-06, "log_odds_chosen": 0.786908745765686, "log_odds_ratio": -0.5583793520927429, "logits/chosen": -1.1029428243637085, "logits/rejected": -0.9531729221343994, "logps/chosen": -0.8611459732055664, "logps/rejected": -1.482433795928955, "loss": 0.9904, "nll_loss": 1.1615171432495117, "rewards/accuracies": 0.625, "rewards/chosen": -0.08611460030078888, "rewards/margins": 0.062128785997629166, "rewards/rejected": -0.14824339747428894, "step": 1011 }, { "epoch": 0.6173554979411316, "grad_norm": 1.836535930633545, "learning_rate": 7.037844458052663e-06, "log_odds_chosen": 0.41145092248916626, "log_odds_ratio": -0.615347146987915, "logits/chosen": -1.1212538480758667, "logits/rejected": -1.0650794506072998, "logps/chosen": -0.8182975053787231, "logps/rejected": -1.0475977659225464, "loss": 1.3261, "nll_loss": 1.2295408248901367, "rewards/accuracies": 0.625, "rewards/chosen": -0.0818297490477562, "rewards/margins": 0.022930029779672623, "rewards/rejected": -0.10475978255271912, "step": 1012 }, { "epoch": 0.6179655330181485, "grad_norm": 1.6774051189422607, "learning_rate": 7.03686466625842e-06, "log_odds_chosen": 0.4236123263835907, "log_odds_ratio": -0.5844899415969849, "logits/chosen": -1.0290371179580688, "logits/rejected": -1.059090495109558, "logps/chosen": -0.8058210611343384, "logps/rejected": -0.9924815893173218, "loss": 0.9154, "nll_loss": 1.0769811868667603, "rewards/accuracies": 0.875, "rewards/chosen": -0.08058211207389832, "rewards/margins": 0.018666047602891922, "rewards/rejected": -0.09924815595149994, "step": 1013 }, { "epoch": 0.6185755680951655, "grad_norm": 1.1849706172943115, "learning_rate": 7.0358848744641756e-06, "log_odds_chosen": 0.7315831780433655, "log_odds_ratio": -0.5067752599716187, "logits/chosen": -0.9683271646499634, "logits/rejected": -0.8947197794914246, "logps/chosen": -0.843186616897583, "logps/rejected": -1.1926418542861938, "loss": 1.0276, "nll_loss": 0.896513044834137, "rewards/accuracies": 0.625, "rewards/chosen": -0.08431866019964218, "rewards/margins": 0.034945521503686905, "rewards/rejected": -0.11926417797803879, "step": 1014 }, { "epoch": 0.6191856031721824, "grad_norm": 1.1185277700424194, "learning_rate": 7.034905082669932e-06, "log_odds_chosen": 0.17590948939323425, "log_odds_ratio": -0.6979453563690186, "logits/chosen": -1.1755082607269287, "logits/rejected": -1.0596647262573242, "logps/chosen": -0.8965093493461609, "logps/rejected": -1.002290964126587, "loss": 1.1385, "nll_loss": 1.1465712785720825, "rewards/accuracies": 0.375, "rewards/chosen": -0.08965093642473221, "rewards/margins": 0.010578164830803871, "rewards/rejected": -0.10022909939289093, "step": 1015 }, { "epoch": 0.6197956382491994, "grad_norm": 2.3471202850341797, "learning_rate": 7.033925290875689e-06, "log_odds_chosen": 0.6544620990753174, "log_odds_ratio": -0.5633448958396912, "logits/chosen": -1.125057339668274, "logits/rejected": -1.0747525691986084, "logps/chosen": -0.8360836505889893, "logps/rejected": -1.3328657150268555, "loss": 1.0646, "nll_loss": 1.1522467136383057, "rewards/accuracies": 0.5, "rewards/chosen": -0.08360837399959564, "rewards/margins": 0.04967820644378662, "rewards/rejected": -0.13328658044338226, "step": 1016 }, { "epoch": 0.6204056733262162, "grad_norm": 1.730272650718689, "learning_rate": 7.032945499081445e-06, "log_odds_chosen": 0.6790673136711121, "log_odds_ratio": -0.5619771480560303, "logits/chosen": -0.7334851026535034, "logits/rejected": -0.8891196250915527, "logps/chosen": -0.7503423094749451, "logps/rejected": -1.1632680892944336, "loss": 1.1621, "nll_loss": 0.9173837900161743, "rewards/accuracies": 0.5, "rewards/chosen": -0.0750342309474945, "rewards/margins": 0.04129257798194885, "rewards/rejected": -0.11632680892944336, "step": 1017 }, { "epoch": 0.6210157084032332, "grad_norm": 3.511399269104004, "learning_rate": 7.031965707287202e-06, "log_odds_chosen": 0.711990475654602, "log_odds_ratio": -0.5163097381591797, "logits/chosen": -0.9985424876213074, "logits/rejected": -0.9704810380935669, "logps/chosen": -0.9182778596878052, "logps/rejected": -1.4088858366012573, "loss": 1.0473, "nll_loss": 1.1719262599945068, "rewards/accuracies": 0.75, "rewards/chosen": -0.09182779490947723, "rewards/margins": 0.04906078428030014, "rewards/rejected": -0.14088857173919678, "step": 1018 }, { "epoch": 0.6216257434802501, "grad_norm": 4.475054740905762, "learning_rate": 7.030985915492958e-06, "log_odds_chosen": 1.1891136169433594, "log_odds_ratio": -0.3526861071586609, "logits/chosen": -0.8578138947486877, "logits/rejected": -0.8584084510803223, "logps/chosen": -0.649745523929596, "logps/rejected": -1.3936957120895386, "loss": 1.1, "nll_loss": 0.8333926200866699, "rewards/accuracies": 0.75, "rewards/chosen": -0.06497455388307571, "rewards/margins": 0.07439503073692322, "rewards/rejected": -0.13936957716941833, "step": 1019 }, { "epoch": 0.6222357785572671, "grad_norm": 1.6971911191940308, "learning_rate": 7.030006123698713e-06, "log_odds_chosen": 0.794848620891571, "log_odds_ratio": -0.5142409205436707, "logits/chosen": -0.6793743371963501, "logits/rejected": -0.7840151786804199, "logps/chosen": -0.7211571931838989, "logps/rejected": -1.1106749773025513, "loss": 1.1336, "nll_loss": 0.8592008352279663, "rewards/accuracies": 0.625, "rewards/chosen": -0.07211571931838989, "rewards/margins": 0.03895178437232971, "rewards/rejected": -0.1110675036907196, "step": 1020 }, { "epoch": 0.6228458136342839, "grad_norm": 2.0892767906188965, "learning_rate": 7.02902633190447e-06, "log_odds_chosen": 1.6129378080368042, "log_odds_ratio": -0.3367805778980255, "logits/chosen": -0.972527265548706, "logits/rejected": -0.9666719436645508, "logps/chosen": -0.7277695536613464, "logps/rejected": -1.823781967163086, "loss": 1.0588, "nll_loss": 0.9319698810577393, "rewards/accuracies": 0.875, "rewards/chosen": -0.07277695834636688, "rewards/margins": 0.10960123687982559, "rewards/rejected": -0.18237820267677307, "step": 1021 }, { "epoch": 0.6234558487113009, "grad_norm": 1.5107128620147705, "learning_rate": 7.028046540110226e-06, "log_odds_chosen": 0.7830079197883606, "log_odds_ratio": -0.4691263437271118, "logits/chosen": -1.0660871267318726, "logits/rejected": -0.9676420092582703, "logps/chosen": -0.7600122690200806, "logps/rejected": -1.1940298080444336, "loss": 1.2245, "nll_loss": 1.1696090698242188, "rewards/accuracies": 0.75, "rewards/chosen": -0.07600122690200806, "rewards/margins": 0.04340174421668053, "rewards/rejected": -0.11940297484397888, "step": 1022 }, { "epoch": 0.6240658837883178, "grad_norm": 2.161759376525879, "learning_rate": 7.027066748315982e-06, "log_odds_chosen": 1.2027004957199097, "log_odds_ratio": -0.40801993012428284, "logits/chosen": -0.8924084305763245, "logits/rejected": -0.9752118587493896, "logps/chosen": -0.6129487752914429, "logps/rejected": -1.2803938388824463, "loss": 1.0472, "nll_loss": 0.9723347425460815, "rewards/accuracies": 0.75, "rewards/chosen": -0.061294883489608765, "rewards/margins": 0.06674449890851974, "rewards/rejected": -0.1280393898487091, "step": 1023 }, { "epoch": 0.6246759188653348, "grad_norm": 1.1929072141647339, "learning_rate": 7.026086956521739e-06, "log_odds_chosen": 0.5248597264289856, "log_odds_ratio": -0.5227106809616089, "logits/chosen": -0.8742238879203796, "logits/rejected": -0.8021509051322937, "logps/chosen": -0.9464627504348755, "logps/rejected": -1.2222849130630493, "loss": 1.0391, "nll_loss": 1.0606482028961182, "rewards/accuracies": 0.875, "rewards/chosen": -0.09464628249406815, "rewards/margins": 0.02758222073316574, "rewards/rejected": -0.12222849577665329, "step": 1024 }, { "epoch": 0.6252859539423516, "grad_norm": 1.1543364524841309, "learning_rate": 7.025107164727495e-06, "log_odds_chosen": 0.5720182657241821, "log_odds_ratio": -0.5306106805801392, "logits/chosen": -1.0454415082931519, "logits/rejected": -0.8140748739242554, "logps/chosen": -0.9839603900909424, "logps/rejected": -1.387133002281189, "loss": 1.0857, "nll_loss": 1.0769011974334717, "rewards/accuracies": 0.875, "rewards/chosen": -0.09839604049921036, "rewards/margins": 0.04031726345419884, "rewards/rejected": -0.1387133002281189, "step": 1025 }, { "epoch": 0.6258959890193686, "grad_norm": 5.1750264167785645, "learning_rate": 7.024127372933251e-06, "log_odds_chosen": 0.6184946894645691, "log_odds_ratio": -0.5531610250473022, "logits/chosen": -0.6898423433303833, "logits/rejected": -0.7022363543510437, "logps/chosen": -0.971220850944519, "logps/rejected": -1.3440967798233032, "loss": 1.2282, "nll_loss": 0.9343023896217346, "rewards/accuracies": 0.625, "rewards/chosen": -0.09712208807468414, "rewards/margins": 0.03728759288787842, "rewards/rejected": -0.13440968096256256, "step": 1026 }, { "epoch": 0.6265060240963856, "grad_norm": 1.110626220703125, "learning_rate": 7.023147581139008e-06, "log_odds_chosen": 0.6376667618751526, "log_odds_ratio": -0.5516895055770874, "logits/chosen": -1.0983542203903198, "logits/rejected": -1.032567024230957, "logps/chosen": -1.0910292863845825, "logps/rejected": -1.496758222579956, "loss": 1.2944, "nll_loss": 1.3857587575912476, "rewards/accuracies": 0.75, "rewards/chosen": -0.10910293459892273, "rewards/margins": 0.04057289659976959, "rewards/rejected": -0.14967583119869232, "step": 1027 }, { "epoch": 0.6271160591734025, "grad_norm": 1.5197699069976807, "learning_rate": 7.022167789344764e-06, "log_odds_chosen": 0.9907110929489136, "log_odds_ratio": -0.5905585885047913, "logits/chosen": -0.6220281720161438, "logits/rejected": -0.6503004431724548, "logps/chosen": -0.7186261415481567, "logps/rejected": -1.3571546077728271, "loss": 1.1728, "nll_loss": 0.9090418815612793, "rewards/accuracies": 0.625, "rewards/chosen": -0.0718626156449318, "rewards/margins": 0.06385285407304764, "rewards/rejected": -0.13571545481681824, "step": 1028 }, { "epoch": 0.6277260942504194, "grad_norm": 2.318859338760376, "learning_rate": 7.021187997550521e-06, "log_odds_chosen": 0.3914673626422882, "log_odds_ratio": -0.6729711294174194, "logits/chosen": -0.933266818523407, "logits/rejected": -0.8775621056556702, "logps/chosen": -1.1801875829696655, "logps/rejected": -1.4261542558670044, "loss": 1.1391, "nll_loss": 1.1894588470458984, "rewards/accuracies": 0.625, "rewards/chosen": -0.11801876127719879, "rewards/margins": 0.024596665054559708, "rewards/rejected": -0.1426154375076294, "step": 1029 }, { "epoch": 0.6283361293274363, "grad_norm": 1.548087239265442, "learning_rate": 7.020208205756277e-06, "log_odds_chosen": 0.3058635890483856, "log_odds_ratio": -0.6633766889572144, "logits/chosen": -0.924217939376831, "logits/rejected": -0.7901015281677246, "logps/chosen": -0.7707209587097168, "logps/rejected": -0.9721770286560059, "loss": 0.9767, "nll_loss": 0.914206862449646, "rewards/accuracies": 0.625, "rewards/chosen": -0.07707209885120392, "rewards/margins": 0.020145602524280548, "rewards/rejected": -0.09721769392490387, "step": 1030 }, { "epoch": 0.6289461644044533, "grad_norm": 2.576824903488159, "learning_rate": 7.019228413962032e-06, "log_odds_chosen": 0.2023247480392456, "log_odds_ratio": -0.6259798407554626, "logits/chosen": -1.0340174436569214, "logits/rejected": -0.9330251216888428, "logps/chosen": -1.0274749994277954, "logps/rejected": -1.1782987117767334, "loss": 1.3221, "nll_loss": 1.098908543586731, "rewards/accuracies": 0.625, "rewards/chosen": -0.10274749994277954, "rewards/margins": 0.015082375146448612, "rewards/rejected": -0.11782987415790558, "step": 1031 }, { "epoch": 0.6295561994814702, "grad_norm": 1.4201258420944214, "learning_rate": 7.018248622167789e-06, "log_odds_chosen": 0.4064794182777405, "log_odds_ratio": -0.6504154205322266, "logits/chosen": -1.0001740455627441, "logits/rejected": -0.9679412245750427, "logps/chosen": -1.0732511281967163, "logps/rejected": -1.462838053703308, "loss": 1.0733, "nll_loss": 1.006564736366272, "rewards/accuracies": 0.625, "rewards/chosen": -0.10732511430978775, "rewards/margins": 0.03895869851112366, "rewards/rejected": -0.146283820271492, "step": 1032 }, { "epoch": 0.6301662345584871, "grad_norm": 1.9479377269744873, "learning_rate": 7.017268830373545e-06, "log_odds_chosen": 0.6781850457191467, "log_odds_ratio": -0.5985616445541382, "logits/chosen": -0.8710761666297913, "logits/rejected": -0.7879846096038818, "logps/chosen": -0.9134092330932617, "logps/rejected": -1.40542733669281, "loss": 1.0687, "nll_loss": 0.9511291980743408, "rewards/accuracies": 0.625, "rewards/chosen": -0.09134091436862946, "rewards/margins": 0.04920181259512901, "rewards/rejected": -0.14054273068904877, "step": 1033 }, { "epoch": 0.630776269635504, "grad_norm": 1.1920779943466187, "learning_rate": 7.016289038579301e-06, "log_odds_chosen": 0.3382757306098938, "log_odds_ratio": -0.610386312007904, "logits/chosen": -1.0200824737548828, "logits/rejected": -0.9129770398139954, "logps/chosen": -0.9705511927604675, "logps/rejected": -1.211596965789795, "loss": 1.1184, "nll_loss": 1.210343360900879, "rewards/accuracies": 0.625, "rewards/chosen": -0.09705512225627899, "rewards/margins": 0.024104589596390724, "rewards/rejected": -0.12115970999002457, "step": 1034 }, { "epoch": 0.631386304712521, "grad_norm": 1.4850915670394897, "learning_rate": 7.015309246785058e-06, "log_odds_chosen": 0.7802917957305908, "log_odds_ratio": -0.5474395751953125, "logits/chosen": -0.8689448237419128, "logits/rejected": -0.9282759428024292, "logps/chosen": -0.6856533288955688, "logps/rejected": -1.1672464609146118, "loss": 1.0783, "nll_loss": 0.9422691464424133, "rewards/accuracies": 0.625, "rewards/chosen": -0.06856532394886017, "rewards/margins": 0.04815932363271713, "rewards/rejected": -0.1167246475815773, "step": 1035 }, { "epoch": 0.631996339789538, "grad_norm": 2.0133273601531982, "learning_rate": 7.0143294549908145e-06, "log_odds_chosen": 0.6318105459213257, "log_odds_ratio": -0.498998761177063, "logits/chosen": -0.8263574242591858, "logits/rejected": -0.5654610395431519, "logps/chosen": -0.7873425483703613, "logps/rejected": -1.1811347007751465, "loss": 1.149, "nll_loss": 0.980331301689148, "rewards/accuracies": 0.625, "rewards/chosen": -0.07873426377773285, "rewards/margins": 0.03937920182943344, "rewards/rejected": -0.11811346560716629, "step": 1036 }, { "epoch": 0.6326063748665548, "grad_norm": 2.977553606033325, "learning_rate": 7.01334966319657e-06, "log_odds_chosen": 0.7814171314239502, "log_odds_ratio": -0.4681134819984436, "logits/chosen": -0.9900726079940796, "logits/rejected": -0.807068943977356, "logps/chosen": -0.9224368333816528, "logps/rejected": -1.449718713760376, "loss": 1.2025, "nll_loss": 1.0327811241149902, "rewards/accuracies": 0.625, "rewards/chosen": -0.09224367886781693, "rewards/margins": 0.05272817984223366, "rewards/rejected": -0.14497186243534088, "step": 1037 }, { "epoch": 0.6332164099435718, "grad_norm": 2.3414647579193115, "learning_rate": 7.012369871402327e-06, "log_odds_chosen": 0.9559918642044067, "log_odds_ratio": -0.4881397485733032, "logits/chosen": -0.7488853931427002, "logits/rejected": -0.8616431951522827, "logps/chosen": -0.7481063008308411, "logps/rejected": -1.2722433805465698, "loss": 0.9615, "nll_loss": 0.7037456035614014, "rewards/accuracies": 0.625, "rewards/chosen": -0.07481063157320023, "rewards/margins": 0.0524136982858181, "rewards/rejected": -0.12722432613372803, "step": 1038 }, { "epoch": 0.6338264450205887, "grad_norm": 10.312505722045898, "learning_rate": 7.011390079608083e-06, "log_odds_chosen": 0.6069276332855225, "log_odds_ratio": -0.5292661190032959, "logits/chosen": -0.9358175992965698, "logits/rejected": -0.8709068298339844, "logps/chosen": -0.82916259765625, "logps/rejected": -1.2151788473129272, "loss": 1.1771, "nll_loss": 1.0315548181533813, "rewards/accuracies": 0.875, "rewards/chosen": -0.0829162672162056, "rewards/margins": 0.03860161826014519, "rewards/rejected": -0.12151788175106049, "step": 1039 }, { "epoch": 0.6344364800976056, "grad_norm": 1.2212021350860596, "learning_rate": 7.010410287813839e-06, "log_odds_chosen": 0.6381763219833374, "log_odds_ratio": -0.4861699938774109, "logits/chosen": -1.1566886901855469, "logits/rejected": -0.958441972732544, "logps/chosen": -0.7797699570655823, "logps/rejected": -1.2165942192077637, "loss": 1.0632, "nll_loss": 1.188826560974121, "rewards/accuracies": 0.75, "rewards/chosen": -0.0779770016670227, "rewards/margins": 0.04368242993950844, "rewards/rejected": -0.12165942788124084, "step": 1040 }, { "epoch": 0.6350465151746225, "grad_norm": 3.8667945861816406, "learning_rate": 7.009430496019596e-06, "log_odds_chosen": 0.9216610193252563, "log_odds_ratio": -0.47024738788604736, "logits/chosen": -0.8127771615982056, "logits/rejected": -0.8832744359970093, "logps/chosen": -0.6370726823806763, "logps/rejected": -1.2949957847595215, "loss": 1.0871, "nll_loss": 0.928816556930542, "rewards/accuracies": 0.75, "rewards/chosen": -0.06370726972818375, "rewards/margins": 0.06579230725765228, "rewards/rejected": -0.12949958443641663, "step": 1041 }, { "epoch": 0.6356565502516395, "grad_norm": 1.8993988037109375, "learning_rate": 7.008450704225352e-06, "log_odds_chosen": 1.4414130449295044, "log_odds_ratio": -0.373401403427124, "logits/chosen": -0.9250112771987915, "logits/rejected": -0.7800486087799072, "logps/chosen": -0.7189074754714966, "logps/rejected": -1.7338415384292603, "loss": 1.1094, "nll_loss": 0.8671526312828064, "rewards/accuracies": 0.75, "rewards/chosen": -0.07189075648784637, "rewards/margins": 0.10149341821670532, "rewards/rejected": -0.1733841598033905, "step": 1042 }, { "epoch": 0.6362665853286564, "grad_norm": 2.1286308765411377, "learning_rate": 7.007470912431108e-06, "log_odds_chosen": 0.5240227580070496, "log_odds_ratio": -0.5418524146080017, "logits/chosen": -0.8761534690856934, "logits/rejected": -0.8861596584320068, "logps/chosen": -0.760486364364624, "logps/rejected": -1.0619556903839111, "loss": 1.0507, "nll_loss": 1.1421030759811401, "rewards/accuracies": 0.75, "rewards/chosen": -0.07604864239692688, "rewards/margins": 0.030146926641464233, "rewards/rejected": -0.10619556903839111, "step": 1043 }, { "epoch": 0.6368766204056733, "grad_norm": 1.787360429763794, "learning_rate": 7.006491120636864e-06, "log_odds_chosen": 0.2517321705818176, "log_odds_ratio": -0.6955653429031372, "logits/chosen": -1.188262939453125, "logits/rejected": -1.0580917596817017, "logps/chosen": -1.0077672004699707, "logps/rejected": -1.2069541215896606, "loss": 1.3267, "nll_loss": 1.3334828615188599, "rewards/accuracies": 0.5, "rewards/chosen": -0.10077671706676483, "rewards/margins": 0.01991869881749153, "rewards/rejected": -0.12069541215896606, "step": 1044 }, { "epoch": 0.6374866554826902, "grad_norm": 2.7852182388305664, "learning_rate": 7.0055113288426204e-06, "log_odds_chosen": 0.9080294966697693, "log_odds_ratio": -0.4445717930793762, "logits/chosen": -0.6323826909065247, "logits/rejected": -0.7734313607215881, "logps/chosen": -0.7795454263687134, "logps/rejected": -1.3464542627334595, "loss": 1.1167, "nll_loss": 0.9545661211013794, "rewards/accuracies": 0.75, "rewards/chosen": -0.07795454561710358, "rewards/margins": 0.05669087916612625, "rewards/rejected": -0.13464543223381042, "step": 1045 }, { "epoch": 0.6380966905597072, "grad_norm": 2.941551923751831, "learning_rate": 7.004531537048377e-06, "log_odds_chosen": 1.1770638227462769, "log_odds_ratio": -0.3728043735027313, "logits/chosen": -0.8040805459022522, "logits/rejected": -0.7080007195472717, "logps/chosen": -0.6963561773300171, "logps/rejected": -1.521034836769104, "loss": 1.1448, "nll_loss": 0.9902427196502686, "rewards/accuracies": 0.875, "rewards/chosen": -0.06963562220335007, "rewards/margins": 0.08246785402297974, "rewards/rejected": -0.1521034836769104, "step": 1046 }, { "epoch": 0.6387067256367241, "grad_norm": 1.7768937349319458, "learning_rate": 7.0035517452541335e-06, "log_odds_chosen": 0.7289992570877075, "log_odds_ratio": -0.5511199235916138, "logits/chosen": -0.9101812243461609, "logits/rejected": -1.0045459270477295, "logps/chosen": -0.7310895919799805, "logps/rejected": -1.0698317289352417, "loss": 1.0823, "nll_loss": 1.1962785720825195, "rewards/accuracies": 0.625, "rewards/chosen": -0.07310895621776581, "rewards/margins": 0.03387422114610672, "rewards/rejected": -0.10698316991329193, "step": 1047 }, { "epoch": 0.639316760713741, "grad_norm": 1.2270172834396362, "learning_rate": 7.002571953459889e-06, "log_odds_chosen": 1.2288095951080322, "log_odds_ratio": -0.36511388421058655, "logits/chosen": -0.5574251413345337, "logits/rejected": -0.6943822503089905, "logps/chosen": -0.7905837297439575, "logps/rejected": -1.5253125429153442, "loss": 1.0417, "nll_loss": 0.805362343788147, "rewards/accuracies": 1.0, "rewards/chosen": -0.07905837148427963, "rewards/margins": 0.07347289472818375, "rewards/rejected": -0.15253126621246338, "step": 1048 }, { "epoch": 0.639926795790758, "grad_norm": 1.1754062175750732, "learning_rate": 7.001592161665646e-06, "log_odds_chosen": 0.723698079586029, "log_odds_ratio": -0.44178783893585205, "logits/chosen": -1.0492280721664429, "logits/rejected": -0.9610827565193176, "logps/chosen": -0.8169659376144409, "logps/rejected": -1.240581750869751, "loss": 1.2492, "nll_loss": 1.2723215818405151, "rewards/accuracies": 0.875, "rewards/chosen": -0.08169659972190857, "rewards/margins": 0.04236157611012459, "rewards/rejected": -0.12405817955732346, "step": 1049 }, { "epoch": 0.6405368308677749, "grad_norm": 2.0058701038360596, "learning_rate": 7.000612369871402e-06, "log_odds_chosen": 0.4312594532966614, "log_odds_ratio": -0.6745761632919312, "logits/chosen": -0.9872571229934692, "logits/rejected": -0.839690089225769, "logps/chosen": -0.9490063190460205, "logps/rejected": -1.137291669845581, "loss": 1.2253, "nll_loss": 1.3646464347839355, "rewards/accuracies": 0.5, "rewards/chosen": -0.09490063041448593, "rewards/margins": 0.01882854476571083, "rewards/rejected": -0.11372917890548706, "step": 1050 }, { "epoch": 0.6411468659447919, "grad_norm": 1.1077539920806885, "learning_rate": 6.999632578077158e-06, "log_odds_chosen": 0.9580885171890259, "log_odds_ratio": -0.40172144770622253, "logits/chosen": -0.8619572520256042, "logits/rejected": -0.8617777824401855, "logps/chosen": -0.7062225341796875, "logps/rejected": -1.289135217666626, "loss": 1.2083, "nll_loss": 0.9618723392486572, "rewards/accuracies": 0.875, "rewards/chosen": -0.07062225788831711, "rewards/margins": 0.058291275054216385, "rewards/rejected": -0.1289135217666626, "step": 1051 }, { "epoch": 0.6417569010218087, "grad_norm": 2.520923137664795, "learning_rate": 6.998652786282915e-06, "log_odds_chosen": 0.3507387638092041, "log_odds_ratio": -0.679262638092041, "logits/chosen": -1.1710691452026367, "logits/rejected": -1.047145962715149, "logps/chosen": -1.0473878383636475, "logps/rejected": -1.3125123977661133, "loss": 1.2442, "nll_loss": 1.2495917081832886, "rewards/accuracies": 0.5, "rewards/chosen": -0.10473878681659698, "rewards/margins": 0.026512445881962776, "rewards/rejected": -0.1312512308359146, "step": 1052 }, { "epoch": 0.6423669360988257, "grad_norm": 1.6258785724639893, "learning_rate": 6.997672994488671e-06, "log_odds_chosen": 0.9182783365249634, "log_odds_ratio": -0.45726388692855835, "logits/chosen": -0.9706111550331116, "logits/rejected": -1.0727794170379639, "logps/chosen": -0.7218595743179321, "logps/rejected": -1.342881441116333, "loss": 1.1969, "nll_loss": 1.2861965894699097, "rewards/accuracies": 0.75, "rewards/chosen": -0.07218596339225769, "rewards/margins": 0.06210218369960785, "rewards/rejected": -0.13428814709186554, "step": 1053 }, { "epoch": 0.6429769711758426, "grad_norm": 1.4144153594970703, "learning_rate": 6.996693202694426e-06, "log_odds_chosen": 0.33975327014923096, "log_odds_ratio": -0.5896779894828796, "logits/chosen": -0.7880275249481201, "logits/rejected": -0.7739027142524719, "logps/chosen": -0.7953935861587524, "logps/rejected": -1.0168204307556152, "loss": 1.1054, "nll_loss": 0.818257212638855, "rewards/accuracies": 0.75, "rewards/chosen": -0.07953935861587524, "rewards/margins": 0.022142687812447548, "rewards/rejected": -0.10168204456567764, "step": 1054 }, { "epoch": 0.6435870062528596, "grad_norm": 1.056406855583191, "learning_rate": 6.995713410900183e-06, "log_odds_chosen": 0.880889892578125, "log_odds_ratio": -0.5016034245491028, "logits/chosen": -0.952045202255249, "logits/rejected": -0.972622275352478, "logps/chosen": -0.9364957809448242, "logps/rejected": -1.605541706085205, "loss": 1.1709, "nll_loss": 1.029375672340393, "rewards/accuracies": 0.75, "rewards/chosen": -0.09364958107471466, "rewards/margins": 0.06690458953380585, "rewards/rejected": -0.1605541706085205, "step": 1055 }, { "epoch": 0.6441970413298764, "grad_norm": 5.274961471557617, "learning_rate": 6.9947336191059395e-06, "log_odds_chosen": 0.0692799761891365, "log_odds_ratio": -0.7534735202789307, "logits/chosen": -1.1455684900283813, "logits/rejected": -1.0720266103744507, "logps/chosen": -1.3260891437530518, "logps/rejected": -1.3229143619537354, "loss": 1.4219, "nll_loss": 1.5206208229064941, "rewards/accuracies": 0.25, "rewards/chosen": -0.13260890543460846, "rewards/margins": -0.0003174860030412674, "rewards/rejected": -0.13229142129421234, "step": 1056 }, { "epoch": 0.6448070764068934, "grad_norm": 1.3684666156768799, "learning_rate": 6.9937538273116964e-06, "log_odds_chosen": 0.12072619050741196, "log_odds_ratio": -0.790400505065918, "logits/chosen": -0.8973181247711182, "logits/rejected": -0.995625376701355, "logps/chosen": -0.8914533257484436, "logps/rejected": -0.9553607702255249, "loss": 1.1883, "nll_loss": 1.349694848060608, "rewards/accuracies": 0.5, "rewards/chosen": -0.08914533257484436, "rewards/margins": 0.006390746682882309, "rewards/rejected": -0.09553608298301697, "step": 1057 }, { "epoch": 0.6454171114839103, "grad_norm": 1.8185040950775146, "learning_rate": 6.9927740355174526e-06, "log_odds_chosen": 1.2384843826293945, "log_odds_ratio": -0.334646999835968, "logits/chosen": -0.9658390283584595, "logits/rejected": -0.8049201965332031, "logps/chosen": -0.853907585144043, "logps/rejected": -1.6776349544525146, "loss": 1.1879, "nll_loss": 1.0592105388641357, "rewards/accuracies": 0.875, "rewards/chosen": -0.08539076149463654, "rewards/margins": 0.0823727399110794, "rewards/rejected": -0.16776350140571594, "step": 1058 }, { "epoch": 0.6460271465609273, "grad_norm": 1.170155644416809, "learning_rate": 6.991794243723209e-06, "log_odds_chosen": 0.8269898891448975, "log_odds_ratio": -0.4501647651195526, "logits/chosen": -0.7329310178756714, "logits/rejected": -0.8719196319580078, "logps/chosen": -0.6897600293159485, "logps/rejected": -1.2019155025482178, "loss": 0.978, "nll_loss": 0.9054034948348999, "rewards/accuracies": 0.875, "rewards/chosen": -0.06897599995136261, "rewards/margins": 0.05121555179357529, "rewards/rejected": -0.1201915591955185, "step": 1059 }, { "epoch": 0.6466371816379441, "grad_norm": 0.9159780740737915, "learning_rate": 6.990814451928965e-06, "log_odds_chosen": 1.0353792905807495, "log_odds_ratio": -0.44345131516456604, "logits/chosen": -1.0993586778640747, "logits/rejected": -1.0506550073623657, "logps/chosen": -0.8205570578575134, "logps/rejected": -1.4700047969818115, "loss": 1.1575, "nll_loss": 1.2580071687698364, "rewards/accuracies": 0.75, "rewards/chosen": -0.0820557102560997, "rewards/margins": 0.06494476646184921, "rewards/rejected": -0.1470004767179489, "step": 1060 }, { "epoch": 0.6472472167149611, "grad_norm": 1.0493944883346558, "learning_rate": 6.989834660134721e-06, "log_odds_chosen": 1.0367692708969116, "log_odds_ratio": -0.40635165572166443, "logits/chosen": -0.8357670307159424, "logits/rejected": -0.8564531803131104, "logps/chosen": -0.6710343956947327, "logps/rejected": -1.308941125869751, "loss": 0.992, "nll_loss": 1.0472352504730225, "rewards/accuracies": 0.75, "rewards/chosen": -0.06710344552993774, "rewards/margins": 0.06379067152738571, "rewards/rejected": -0.13089410960674286, "step": 1061 }, { "epoch": 0.6478572517919781, "grad_norm": 2.9226083755493164, "learning_rate": 6.988854868340477e-06, "log_odds_chosen": 1.1680701971054077, "log_odds_ratio": -0.38325411081314087, "logits/chosen": -0.7817193865776062, "logits/rejected": -0.6757265329360962, "logps/chosen": -0.6372658014297485, "logps/rejected": -1.318381428718567, "loss": 1.1139, "nll_loss": 0.8669332265853882, "rewards/accuracies": 0.75, "rewards/chosen": -0.06372658163309097, "rewards/margins": 0.06811156123876572, "rewards/rejected": -0.1318381428718567, "step": 1062 }, { "epoch": 0.648467286868995, "grad_norm": 1.4144176244735718, "learning_rate": 6.987875076546234e-06, "log_odds_chosen": 1.0037404298782349, "log_odds_ratio": -0.38630545139312744, "logits/chosen": -0.7553423643112183, "logits/rejected": -0.9201920032501221, "logps/chosen": -0.8612457513809204, "logps/rejected": -1.5261070728302002, "loss": 1.2081, "nll_loss": 1.0270612239837646, "rewards/accuracies": 1.0, "rewards/chosen": -0.08612458407878876, "rewards/margins": 0.06648612022399902, "rewards/rejected": -0.15261070430278778, "step": 1063 }, { "epoch": 0.6490773219460119, "grad_norm": 1.7904455661773682, "learning_rate": 6.98689528475199e-06, "log_odds_chosen": 0.895864725112915, "log_odds_ratio": -0.4768511652946472, "logits/chosen": -1.1738545894622803, "logits/rejected": -0.9770313501358032, "logps/chosen": -0.9530377388000488, "logps/rejected": -1.5947104692459106, "loss": 1.1223, "nll_loss": 1.1668074131011963, "rewards/accuracies": 0.75, "rewards/chosen": -0.09530377388000488, "rewards/margins": 0.06416727602481842, "rewards/rejected": -0.1594710499048233, "step": 1064 }, { "epoch": 0.6496873570230288, "grad_norm": 1.7508074045181274, "learning_rate": 6.985915492957745e-06, "log_odds_chosen": 0.9087488651275635, "log_odds_ratio": -0.38388872146606445, "logits/chosen": -0.7187662720680237, "logits/rejected": -0.808743417263031, "logps/chosen": -0.6786420345306396, "logps/rejected": -1.2062432765960693, "loss": 1.1793, "nll_loss": 0.9619721174240112, "rewards/accuracies": 0.875, "rewards/chosen": -0.06786420196294785, "rewards/margins": 0.05276012048125267, "rewards/rejected": -0.12062431871891022, "step": 1065 }, { "epoch": 0.6502973921000458, "grad_norm": 1.8762832880020142, "learning_rate": 6.984935701163502e-06, "log_odds_chosen": 0.5093404054641724, "log_odds_ratio": -0.5937227606773376, "logits/chosen": -0.8710527420043945, "logits/rejected": -0.8552173972129822, "logps/chosen": -0.8231183290481567, "logps/rejected": -1.1584887504577637, "loss": 1.2123, "nll_loss": 1.2336783409118652, "rewards/accuracies": 0.5, "rewards/chosen": -0.08231183141469955, "rewards/margins": 0.03353704512119293, "rewards/rejected": -0.11584886908531189, "step": 1066 }, { "epoch": 0.6509074271770627, "grad_norm": 1.4510655403137207, "learning_rate": 6.9839559093692585e-06, "log_odds_chosen": -0.27165013551712036, "log_odds_ratio": -0.9690085649490356, "logits/chosen": -1.0097843408584595, "logits/rejected": -0.9557676315307617, "logps/chosen": -0.9329320192337036, "logps/rejected": -0.7973079681396484, "loss": 1.1481, "nll_loss": 1.3243457078933716, "rewards/accuracies": 0.375, "rewards/chosen": -0.0932932049036026, "rewards/margins": -0.01356241013854742, "rewards/rejected": -0.0797307938337326, "step": 1067 }, { "epoch": 0.6515174622540796, "grad_norm": 2.5527942180633545, "learning_rate": 6.982976117575015e-06, "log_odds_chosen": 0.6100050806999207, "log_odds_ratio": -0.5359834432601929, "logits/chosen": -0.8783476948738098, "logits/rejected": -0.7851946353912354, "logps/chosen": -0.8280900716781616, "logps/rejected": -1.189116358757019, "loss": 1.1818, "nll_loss": 1.0138241052627563, "rewards/accuracies": 0.625, "rewards/chosen": -0.08280900865793228, "rewards/margins": 0.036102619022130966, "rewards/rejected": -0.11891162395477295, "step": 1068 }, { "epoch": 0.6521274973310965, "grad_norm": 2.140019178390503, "learning_rate": 6.981996325780772e-06, "log_odds_chosen": 0.40075796842575073, "log_odds_ratio": -0.6110103130340576, "logits/chosen": -0.7801520824432373, "logits/rejected": -0.8466013669967651, "logps/chosen": -0.7116097211837769, "logps/rejected": -0.9671156406402588, "loss": 0.8899, "nll_loss": 0.9169503450393677, "rewards/accuracies": 0.625, "rewards/chosen": -0.07116097211837769, "rewards/margins": 0.025550590828061104, "rewards/rejected": -0.09671156853437424, "step": 1069 }, { "epoch": 0.6527375324081135, "grad_norm": 3.562331438064575, "learning_rate": 6.981016533986528e-06, "log_odds_chosen": 1.3665509223937988, "log_odds_ratio": -0.4616950750350952, "logits/chosen": -0.9315096139907837, "logits/rejected": -0.7915965914726257, "logps/chosen": -0.7233586311340332, "logps/rejected": -1.6107704639434814, "loss": 1.0043, "nll_loss": 0.9855670928955078, "rewards/accuracies": 0.75, "rewards/chosen": -0.0723358690738678, "rewards/margins": 0.08874118328094482, "rewards/rejected": -0.16107703745365143, "step": 1070 }, { "epoch": 0.6533475674851303, "grad_norm": 1.312448263168335, "learning_rate": 6.980036742192284e-06, "log_odds_chosen": 0.9518453478813171, "log_odds_ratio": -0.4374743103981018, "logits/chosen": -0.9113318920135498, "logits/rejected": -0.8858892321586609, "logps/chosen": -0.8166566491127014, "logps/rejected": -1.4106475114822388, "loss": 1.1462, "nll_loss": 0.9398202896118164, "rewards/accuracies": 0.75, "rewards/chosen": -0.08166566491127014, "rewards/margins": 0.0593990832567215, "rewards/rejected": -0.14106474816799164, "step": 1071 }, { "epoch": 0.6539576025621473, "grad_norm": 1.5945980548858643, "learning_rate": 6.97905695039804e-06, "log_odds_chosen": 0.4959043860435486, "log_odds_ratio": -0.6049567461013794, "logits/chosen": -1.0032711029052734, "logits/rejected": -0.8343000411987305, "logps/chosen": -0.8536239862442017, "logps/rejected": -1.127203106880188, "loss": 1.1456, "nll_loss": 1.025400161743164, "rewards/accuracies": 0.75, "rewards/chosen": -0.08536240458488464, "rewards/margins": 0.027357913553714752, "rewards/rejected": -0.1127203106880188, "step": 1072 }, { "epoch": 0.6545676376391643, "grad_norm": 2.1908788681030273, "learning_rate": 6.978077158603796e-06, "log_odds_chosen": 0.6306928396224976, "log_odds_ratio": -0.47116196155548096, "logits/chosen": -0.7966935038566589, "logits/rejected": -0.8138699531555176, "logps/chosen": -0.7848304510116577, "logps/rejected": -1.171337366104126, "loss": 0.9931, "nll_loss": 1.08894681930542, "rewards/accuracies": 0.75, "rewards/chosen": -0.07848304510116577, "rewards/margins": 0.03865070268511772, "rewards/rejected": -0.1171337366104126, "step": 1073 }, { "epoch": 0.6551776727161812, "grad_norm": 2.2275888919830322, "learning_rate": 6.977097366809553e-06, "log_odds_chosen": -0.0063627660274505615, "log_odds_ratio": -0.7204772233963013, "logits/chosen": -0.881367564201355, "logits/rejected": -0.9467364549636841, "logps/chosen": -0.9670398831367493, "logps/rejected": -0.9881949424743652, "loss": 1.0916, "nll_loss": 1.170764684677124, "rewards/accuracies": 0.5, "rewards/chosen": -0.09670399129390717, "rewards/margins": 0.0021155085414648056, "rewards/rejected": -0.09881949424743652, "step": 1074 }, { "epoch": 0.6557877077931981, "grad_norm": 1.329728126525879, "learning_rate": 6.976117575015309e-06, "log_odds_chosen": 0.3612045347690582, "log_odds_ratio": -0.5773850679397583, "logits/chosen": -0.8205929398536682, "logits/rejected": -0.897664487361908, "logps/chosen": -0.9266102910041809, "logps/rejected": -1.1506731510162354, "loss": 1.2649, "nll_loss": 1.2355821132659912, "rewards/accuracies": 0.625, "rewards/chosen": -0.09266103059053421, "rewards/margins": 0.02240629307925701, "rewards/rejected": -0.11506731808185577, "step": 1075 }, { "epoch": 0.656397742870215, "grad_norm": 1.7913811206817627, "learning_rate": 6.975137783221065e-06, "log_odds_chosen": 1.3988823890686035, "log_odds_ratio": -0.3127322494983673, "logits/chosen": -0.718970000743866, "logits/rejected": -0.7796708941459656, "logps/chosen": -0.6374071836471558, "logps/rejected": -1.5393868684768677, "loss": 1.094, "nll_loss": 0.8659354448318481, "rewards/accuracies": 1.0, "rewards/chosen": -0.06374072283506393, "rewards/margins": 0.09019796550273895, "rewards/rejected": -0.1539386808872223, "step": 1076 }, { "epoch": 0.657007777947232, "grad_norm": 1.8700838088989258, "learning_rate": 6.9741579914268214e-06, "log_odds_chosen": 0.3673263490200043, "log_odds_ratio": -0.6619488000869751, "logits/chosen": -0.8173439502716064, "logits/rejected": -0.8091148138046265, "logps/chosen": -0.8751758337020874, "logps/rejected": -1.112183928489685, "loss": 1.0763, "nll_loss": 0.8858508467674255, "rewards/accuracies": 0.375, "rewards/chosen": -0.08751758188009262, "rewards/margins": 0.023700814694166183, "rewards/rejected": -0.1112184002995491, "step": 1077 }, { "epoch": 0.6576178130242489, "grad_norm": 1.969247817993164, "learning_rate": 6.9731781996325776e-06, "log_odds_chosen": 1.0337505340576172, "log_odds_ratio": -0.48283785581588745, "logits/chosen": -0.8947832584381104, "logits/rejected": -0.8461629152297974, "logps/chosen": -0.6447712182998657, "logps/rejected": -1.0626933574676514, "loss": 1.1408, "nll_loss": 0.9079880714416504, "rewards/accuracies": 0.75, "rewards/chosen": -0.06447713077068329, "rewards/margins": 0.04179222881793976, "rewards/rejected": -0.10626935213804245, "step": 1078 }, { "epoch": 0.6582278481012658, "grad_norm": 3.5185747146606445, "learning_rate": 6.972198407838334e-06, "log_odds_chosen": 0.7768051624298096, "log_odds_ratio": -0.4544866979122162, "logits/chosen": -0.8042610883712769, "logits/rejected": -0.7162127494812012, "logps/chosen": -0.7186687588691711, "logps/rejected": -1.2518131732940674, "loss": 1.1659, "nll_loss": 0.9120057821273804, "rewards/accuracies": 0.625, "rewards/chosen": -0.07186686992645264, "rewards/margins": 0.053314439952373505, "rewards/rejected": -0.12518131732940674, "step": 1079 }, { "epoch": 0.6588378831782827, "grad_norm": 1.6888045072555542, "learning_rate": 6.971218616044091e-06, "log_odds_chosen": 0.3096123933792114, "log_odds_ratio": -0.8455010056495667, "logits/chosen": -0.81528639793396, "logits/rejected": -0.6748599410057068, "logps/chosen": -1.2084578275680542, "logps/rejected": -1.3325438499450684, "loss": 1.1515, "nll_loss": 1.315333366394043, "rewards/accuracies": 0.5, "rewards/chosen": -0.12084579467773438, "rewards/margins": 0.012408599257469177, "rewards/rejected": -0.13325437903404236, "step": 1080 }, { "epoch": 0.6594479182552997, "grad_norm": 0.9527907371520996, "learning_rate": 6.970238824249847e-06, "log_odds_chosen": 1.005726933479309, "log_odds_ratio": -0.43811506032943726, "logits/chosen": -0.9564969539642334, "logits/rejected": -0.8484285473823547, "logps/chosen": -0.8670969009399414, "logps/rejected": -1.5534701347351074, "loss": 1.1341, "nll_loss": 1.0356734991073608, "rewards/accuracies": 0.75, "rewards/chosen": -0.08670969307422638, "rewards/margins": 0.06863731145858765, "rewards/rejected": -0.15534701943397522, "step": 1081 }, { "epoch": 0.6600579533323166, "grad_norm": 4.732676982879639, "learning_rate": 6.969259032455602e-06, "log_odds_chosen": 0.8616839647293091, "log_odds_ratio": -0.43464332818984985, "logits/chosen": -0.7489621043205261, "logits/rejected": -0.8052102327346802, "logps/chosen": -0.7009851932525635, "logps/rejected": -1.1828463077545166, "loss": 1.2047, "nll_loss": 1.2934198379516602, "rewards/accuracies": 0.875, "rewards/chosen": -0.07009852677583694, "rewards/margins": 0.048186108469963074, "rewards/rejected": -0.11828464269638062, "step": 1082 }, { "epoch": 0.6606679884093335, "grad_norm": 2.5865139961242676, "learning_rate": 6.968279240661359e-06, "log_odds_chosen": 0.4839699864387512, "log_odds_ratio": -0.689195990562439, "logits/chosen": -0.9762754440307617, "logits/rejected": -0.9092730283737183, "logps/chosen": -0.9953213930130005, "logps/rejected": -1.3232245445251465, "loss": 1.09, "nll_loss": 1.114122986793518, "rewards/accuracies": 0.5, "rewards/chosen": -0.09953214228153229, "rewards/margins": 0.03279032185673714, "rewards/rejected": -0.13232247531414032, "step": 1083 }, { "epoch": 0.6612780234863505, "grad_norm": 4.359467506408691, "learning_rate": 6.967299448867115e-06, "log_odds_chosen": 0.348430335521698, "log_odds_ratio": -0.5959970951080322, "logits/chosen": -1.0813767910003662, "logits/rejected": -1.060728907585144, "logps/chosen": -1.0421111583709717, "logps/rejected": -1.2800345420837402, "loss": 1.2323, "nll_loss": 1.138902187347412, "rewards/accuracies": 0.625, "rewards/chosen": -0.10421110689640045, "rewards/margins": 0.023792332038283348, "rewards/rejected": -0.12800344824790955, "step": 1084 }, { "epoch": 0.6618880585633674, "grad_norm": 1.6959041357040405, "learning_rate": 6.966319657072872e-06, "log_odds_chosen": 1.0182075500488281, "log_odds_ratio": -0.4677150547504425, "logits/chosen": -0.8958009481430054, "logits/rejected": -0.8990307450294495, "logps/chosen": -0.8513599634170532, "logps/rejected": -1.5457881689071655, "loss": 1.1136, "nll_loss": 1.0875037908554077, "rewards/accuracies": 0.625, "rewards/chosen": -0.08513600379228592, "rewards/margins": 0.06944281607866287, "rewards/rejected": -0.1545788049697876, "step": 1085 }, { "epoch": 0.6624980936403844, "grad_norm": 1.4826717376708984, "learning_rate": 6.965339865278628e-06, "log_odds_chosen": 1.4631638526916504, "log_odds_ratio": -0.343364417552948, "logits/chosen": -0.791878879070282, "logits/rejected": -0.9878970384597778, "logps/chosen": -0.6854878664016724, "logps/rejected": -1.6829227209091187, "loss": 1.0836, "nll_loss": 1.105088233947754, "rewards/accuracies": 0.75, "rewards/chosen": -0.068548783659935, "rewards/margins": 0.09974348545074463, "rewards/rejected": -0.16829226911067963, "step": 1086 }, { "epoch": 0.6631081287174012, "grad_norm": 2.0963387489318848, "learning_rate": 6.964360073484384e-06, "log_odds_chosen": 1.029110312461853, "log_odds_ratio": -0.45947209000587463, "logits/chosen": -0.9536857008934021, "logits/rejected": -0.9819352626800537, "logps/chosen": -0.9915754795074463, "logps/rejected": -1.7415940761566162, "loss": 1.0592, "nll_loss": 1.089918851852417, "rewards/accuracies": 0.875, "rewards/chosen": -0.09915755689144135, "rewards/margins": 0.07500186562538147, "rewards/rejected": -0.17415942251682281, "step": 1087 }, { "epoch": 0.6637181637944182, "grad_norm": 1.9415518045425415, "learning_rate": 6.9633802816901405e-06, "log_odds_chosen": 0.5864768028259277, "log_odds_ratio": -0.5827445387840271, "logits/chosen": -0.9188864231109619, "logits/rejected": -0.7730840444564819, "logps/chosen": -0.9579471349716187, "logps/rejected": -1.2922412157058716, "loss": 1.2191, "nll_loss": 1.2963382005691528, "rewards/accuracies": 0.5, "rewards/chosen": -0.09579471498727798, "rewards/margins": 0.03342941403388977, "rewards/rejected": -0.12922413647174835, "step": 1088 }, { "epoch": 0.6643281988714351, "grad_norm": 1.2430858612060547, "learning_rate": 6.962400489895897e-06, "log_odds_chosen": 1.9798901081085205, "log_odds_ratio": -0.17898082733154297, "logits/chosen": -0.9105274677276611, "logits/rejected": -0.706400454044342, "logps/chosen": -0.44136083126068115, "logps/rejected": -1.6226205825805664, "loss": 1.1406, "nll_loss": 0.8751038312911987, "rewards/accuracies": 1.0, "rewards/chosen": -0.044136084616184235, "rewards/margins": 0.11812599003314972, "rewards/rejected": -0.16226206719875336, "step": 1089 }, { "epoch": 0.6649382339484521, "grad_norm": 1.259446620941162, "learning_rate": 6.961420698101653e-06, "log_odds_chosen": 1.2262580394744873, "log_odds_ratio": -0.46149373054504395, "logits/chosen": -1.0230478048324585, "logits/rejected": -1.102954626083374, "logps/chosen": -0.8685773611068726, "logps/rejected": -1.707432746887207, "loss": 1.1124, "nll_loss": 1.1374644041061401, "rewards/accuracies": 0.75, "rewards/chosen": -0.08685773611068726, "rewards/margins": 0.08388553559780121, "rewards/rejected": -0.17074327170848846, "step": 1090 }, { "epoch": 0.6655482690254689, "grad_norm": 1.5978769063949585, "learning_rate": 6.96044090630741e-06, "log_odds_chosen": 0.37609654664993286, "log_odds_ratio": -0.7366656064987183, "logits/chosen": -1.1649696826934814, "logits/rejected": -1.1748486757278442, "logps/chosen": -1.1443532705307007, "logps/rejected": -1.5426206588745117, "loss": 1.2157, "nll_loss": 1.3392064571380615, "rewards/accuracies": 0.5, "rewards/chosen": -0.1144353374838829, "rewards/margins": 0.039826732128858566, "rewards/rejected": -0.15426206588745117, "step": 1091 }, { "epoch": 0.6661583041024859, "grad_norm": 3.375828742980957, "learning_rate": 6.959461114513166e-06, "log_odds_chosen": 0.6729937195777893, "log_odds_ratio": -0.5277542471885681, "logits/chosen": -0.9171279072761536, "logits/rejected": -0.929256021976471, "logps/chosen": -0.8560669422149658, "logps/rejected": -1.279416799545288, "loss": 1.1334, "nll_loss": 0.9520672559738159, "rewards/accuracies": 0.625, "rewards/chosen": -0.08560669422149658, "rewards/margins": 0.042334988713264465, "rewards/rejected": -0.12794168293476105, "step": 1092 }, { "epoch": 0.6667683391795028, "grad_norm": 1.5034936666488647, "learning_rate": 6.958481322718922e-06, "log_odds_chosen": 0.7085092067718506, "log_odds_ratio": -0.5374107956886292, "logits/chosen": -0.905116081237793, "logits/rejected": -0.9101864695549011, "logps/chosen": -0.7398056983947754, "logps/rejected": -1.1358344554901123, "loss": 1.1956, "nll_loss": 1.1134164333343506, "rewards/accuracies": 0.625, "rewards/chosen": -0.07398056238889694, "rewards/margins": 0.03960288316011429, "rewards/rejected": -0.11358345299959183, "step": 1093 }, { "epoch": 0.6673783742565198, "grad_norm": 2.990673780441284, "learning_rate": 6.957501530924678e-06, "log_odds_chosen": 0.9249264001846313, "log_odds_ratio": -0.45289894938468933, "logits/chosen": -0.9775279760360718, "logits/rejected": -1.044213056564331, "logps/chosen": -0.836868166923523, "logps/rejected": -1.4151750802993774, "loss": 1.3176, "nll_loss": 1.1266412734985352, "rewards/accuracies": 0.625, "rewards/chosen": -0.08368682116270065, "rewards/margins": 0.05783068388700485, "rewards/rejected": -0.1415175199508667, "step": 1094 }, { "epoch": 0.6679884093335366, "grad_norm": 1.991523265838623, "learning_rate": 6.956521739130434e-06, "log_odds_chosen": 0.2516958713531494, "log_odds_ratio": -0.6444699168205261, "logits/chosen": -0.9608302116394043, "logits/rejected": -1.036831259727478, "logps/chosen": -0.9013481736183167, "logps/rejected": -1.0549205541610718, "loss": 1.2017, "nll_loss": 1.2185488939285278, "rewards/accuracies": 0.75, "rewards/chosen": -0.09013482183218002, "rewards/margins": 0.01535723451524973, "rewards/rejected": -0.10549205541610718, "step": 1095 }, { "epoch": 0.6685984444105536, "grad_norm": 2.1111950874328613, "learning_rate": 6.95554194733619e-06, "log_odds_chosen": 0.3024968206882477, "log_odds_ratio": -0.6920565366744995, "logits/chosen": -1.079178810119629, "logits/rejected": -1.0675573348999023, "logps/chosen": -0.7340569496154785, "logps/rejected": -0.7951209545135498, "loss": 1.1262, "nll_loss": 0.9894618391990662, "rewards/accuracies": 0.375, "rewards/chosen": -0.07340569794178009, "rewards/margins": 0.006106400862336159, "rewards/rejected": -0.0795121043920517, "step": 1096 }, { "epoch": 0.6692084794875706, "grad_norm": 2.3502776622772217, "learning_rate": 6.954562155541947e-06, "log_odds_chosen": 0.5843790173530579, "log_odds_ratio": -0.673125147819519, "logits/chosen": -0.9382731914520264, "logits/rejected": -0.9454036951065063, "logps/chosen": -1.0064764022827148, "logps/rejected": -1.4691306352615356, "loss": 1.162, "nll_loss": 1.2013224363327026, "rewards/accuracies": 0.5, "rewards/chosen": -0.10064764320850372, "rewards/margins": 0.04626542702317238, "rewards/rejected": -0.1469130665063858, "step": 1097 }, { "epoch": 0.6698185145645874, "grad_norm": 3.5611941814422607, "learning_rate": 6.953582363747703e-06, "log_odds_chosen": 0.8774391412734985, "log_odds_ratio": -0.4567355215549469, "logits/chosen": -0.9395297765731812, "logits/rejected": -0.9881553649902344, "logps/chosen": -0.8305085301399231, "logps/rejected": -1.4336330890655518, "loss": 1.1317, "nll_loss": 0.9894365668296814, "rewards/accuracies": 0.625, "rewards/chosen": -0.08305084705352783, "rewards/margins": 0.06031246483325958, "rewards/rejected": -0.1433633267879486, "step": 1098 }, { "epoch": 0.6704285496416044, "grad_norm": 2.3365638256073, "learning_rate": 6.95260257195346e-06, "log_odds_chosen": 0.4328935742378235, "log_odds_ratio": -0.6221247911453247, "logits/chosen": -0.9681868553161621, "logits/rejected": -0.9353679418563843, "logps/chosen": -0.9180067777633667, "logps/rejected": -1.1926758289337158, "loss": 1.0948, "nll_loss": 1.1174871921539307, "rewards/accuracies": 0.5, "rewards/chosen": -0.09180067479610443, "rewards/margins": 0.027466915547847748, "rewards/rejected": -0.11926759779453278, "step": 1099 }, { "epoch": 0.6710385847186213, "grad_norm": 2.5381674766540527, "learning_rate": 6.951622780159216e-06, "log_odds_chosen": 1.2255351543426514, "log_odds_ratio": -0.49153852462768555, "logits/chosen": -0.7912271618843079, "logits/rejected": -0.9017009139060974, "logps/chosen": -0.7455304265022278, "logps/rejected": -1.598872423171997, "loss": 1.1402, "nll_loss": 0.9677419662475586, "rewards/accuracies": 0.75, "rewards/chosen": -0.07455304265022278, "rewards/margins": 0.08533421158790588, "rewards/rejected": -0.15988725423812866, "step": 1100 }, { "epoch": 0.6716486197956383, "grad_norm": 3.2895667552948, "learning_rate": 6.950642988364972e-06, "log_odds_chosen": 1.4319331645965576, "log_odds_ratio": -0.30316534638404846, "logits/chosen": -0.8016763925552368, "logits/rejected": -0.9430943131446838, "logps/chosen": -0.5653068423271179, "logps/rejected": -1.3917977809906006, "loss": 1.3507, "nll_loss": 1.1450982093811035, "rewards/accuracies": 1.0, "rewards/chosen": -0.05653068423271179, "rewards/margins": 0.0826491117477417, "rewards/rejected": -0.1391797959804535, "step": 1101 }, { "epoch": 0.6722586548726551, "grad_norm": 1.5635942220687866, "learning_rate": 6.949663196570729e-06, "log_odds_chosen": 0.6118274927139282, "log_odds_ratio": -0.5790911912918091, "logits/chosen": -0.9361826181411743, "logits/rejected": -1.0691752433776855, "logps/chosen": -0.703133225440979, "logps/rejected": -1.0676360130310059, "loss": 1.166, "nll_loss": 0.9303731918334961, "rewards/accuracies": 0.75, "rewards/chosen": -0.07031332701444626, "rewards/margins": 0.036450281739234924, "rewards/rejected": -0.10676361620426178, "step": 1102 }, { "epoch": 0.6728686899496721, "grad_norm": 1.293082356452942, "learning_rate": 6.948683404776485e-06, "log_odds_chosen": 0.5364777445793152, "log_odds_ratio": -0.5734925270080566, "logits/chosen": -1.2131706476211548, "logits/rejected": -1.1670833826065063, "logps/chosen": -0.9071897268295288, "logps/rejected": -1.1950910091400146, "loss": 1.1472, "nll_loss": 1.2491209506988525, "rewards/accuracies": 0.625, "rewards/chosen": -0.09071897715330124, "rewards/margins": 0.028790125623345375, "rewards/rejected": -0.11950911581516266, "step": 1103 }, { "epoch": 0.673478725026689, "grad_norm": 1.729028582572937, "learning_rate": 6.947703612982241e-06, "log_odds_chosen": 0.056256815791130066, "log_odds_ratio": -0.7385145425796509, "logits/chosen": -0.849764347076416, "logits/rejected": -0.9038540124893188, "logps/chosen": -1.0725843906402588, "logps/rejected": -1.1955982446670532, "loss": 1.2309, "nll_loss": 1.1218359470367432, "rewards/accuracies": 0.375, "rewards/chosen": -0.10725843906402588, "rewards/margins": 0.012301397509872913, "rewards/rejected": -0.11955983191728592, "step": 1104 }, { "epoch": 0.674088760103706, "grad_norm": 1.5180838108062744, "learning_rate": 6.946723821187997e-06, "log_odds_chosen": 0.8009585738182068, "log_odds_ratio": -0.5114440321922302, "logits/chosen": -1.001551628112793, "logits/rejected": -1.0591070652008057, "logps/chosen": -0.9766463041305542, "logps/rejected": -1.497488021850586, "loss": 1.2133, "nll_loss": 1.3155848979949951, "rewards/accuracies": 0.75, "rewards/chosen": -0.09766463190317154, "rewards/margins": 0.05208417400717735, "rewards/rejected": -0.1497488021850586, "step": 1105 }, { "epoch": 0.6746987951807228, "grad_norm": 1.8225994110107422, "learning_rate": 6.945744029393753e-06, "log_odds_chosen": 0.7760636806488037, "log_odds_ratio": -0.6287446618080139, "logits/chosen": -0.9616233110427856, "logits/rejected": -0.8839720487594604, "logps/chosen": -0.7900917530059814, "logps/rejected": -1.3177123069763184, "loss": 1.1906, "nll_loss": 1.0691606998443604, "rewards/accuracies": 0.625, "rewards/chosen": -0.07900917530059814, "rewards/margins": 0.05276203900575638, "rewards/rejected": -0.13177122175693512, "step": 1106 }, { "epoch": 0.6753088302577398, "grad_norm": 1.4489543437957764, "learning_rate": 6.944764237599509e-06, "log_odds_chosen": 0.3459840714931488, "log_odds_ratio": -0.5702568292617798, "logits/chosen": -1.1423221826553345, "logits/rejected": -0.9674311280250549, "logps/chosen": -0.9046181440353394, "logps/rejected": -1.1422312259674072, "loss": 1.1044, "nll_loss": 1.1029155254364014, "rewards/accuracies": 0.625, "rewards/chosen": -0.09046182036399841, "rewards/margins": 0.02376130223274231, "rewards/rejected": -0.11422313004732132, "step": 1107 }, { "epoch": 0.6759188653347568, "grad_norm": 1.3051018714904785, "learning_rate": 6.943784445805266e-06, "log_odds_chosen": 1.4956958293914795, "log_odds_ratio": -0.3116660714149475, "logits/chosen": -0.6816617250442505, "logits/rejected": -0.7228027582168579, "logps/chosen": -0.8040928840637207, "logps/rejected": -1.8302563428878784, "loss": 1.0451, "nll_loss": 0.8326460719108582, "rewards/accuracies": 0.875, "rewards/chosen": -0.08040928840637207, "rewards/margins": 0.10261635482311249, "rewards/rejected": -0.18302564322948456, "step": 1108 }, { "epoch": 0.6765289004117737, "grad_norm": 1.4747549295425415, "learning_rate": 6.9428046540110224e-06, "log_odds_chosen": 0.9252590537071228, "log_odds_ratio": -0.45605888962745667, "logits/chosen": -0.9807040691375732, "logits/rejected": -0.9379989504814148, "logps/chosen": -0.6108251810073853, "logps/rejected": -1.162051796913147, "loss": 1.0489, "nll_loss": 0.8111245632171631, "rewards/accuracies": 1.0, "rewards/chosen": -0.061082519590854645, "rewards/margins": 0.05512265861034393, "rewards/rejected": -0.11620518565177917, "step": 1109 }, { "epoch": 0.6771389354887906, "grad_norm": 1.4192429780960083, "learning_rate": 6.941824862216779e-06, "log_odds_chosen": 1.08197021484375, "log_odds_ratio": -0.5162779092788696, "logits/chosen": -0.9570862650871277, "logits/rejected": -1.162358045578003, "logps/chosen": -0.6454808712005615, "logps/rejected": -1.1538058519363403, "loss": 1.0132, "nll_loss": 0.8747724294662476, "rewards/accuracies": 0.75, "rewards/chosen": -0.06454809010028839, "rewards/margins": 0.050832491368055344, "rewards/rejected": -0.11538059264421463, "step": 1110 }, { "epoch": 0.6777489705658075, "grad_norm": 9.289826393127441, "learning_rate": 6.940845070422535e-06, "log_odds_chosen": 1.4719808101654053, "log_odds_ratio": -0.2767511308193207, "logits/chosen": -0.8046169877052307, "logits/rejected": -0.7349786758422852, "logps/chosen": -0.7922788858413696, "logps/rejected": -1.7304143905639648, "loss": 1.2097, "nll_loss": 0.9351758360862732, "rewards/accuracies": 1.0, "rewards/chosen": -0.07922789454460144, "rewards/margins": 0.09381354600191116, "rewards/rejected": -0.1730414479970932, "step": 1111 }, { "epoch": 0.6783590056428245, "grad_norm": 1.3045730590820312, "learning_rate": 6.939865278628291e-06, "log_odds_chosen": 1.5746209621429443, "log_odds_ratio": -0.3688451945781708, "logits/chosen": -1.2416704893112183, "logits/rejected": -0.9755666255950928, "logps/chosen": -0.8492047786712646, "logps/rejected": -1.776899814605713, "loss": 1.0653, "nll_loss": 1.1681370735168457, "rewards/accuracies": 0.875, "rewards/chosen": -0.0849204733967781, "rewards/margins": 0.09276952594518661, "rewards/rejected": -0.17768999934196472, "step": 1112 }, { "epoch": 0.6789690407198414, "grad_norm": 2.6790380477905273, "learning_rate": 6.938885486834048e-06, "log_odds_chosen": 1.6302828788757324, "log_odds_ratio": -0.2790389657020569, "logits/chosen": -1.0549814701080322, "logits/rejected": -0.9700198173522949, "logps/chosen": -0.7776997089385986, "logps/rejected": -1.939271330833435, "loss": 1.097, "nll_loss": 1.1861075162887573, "rewards/accuracies": 1.0, "rewards/chosen": -0.07776996493339539, "rewards/margins": 0.1161571741104126, "rewards/rejected": -0.19392713904380798, "step": 1113 }, { "epoch": 0.6795790757968583, "grad_norm": 3.8294618129730225, "learning_rate": 6.937905695039804e-06, "log_odds_chosen": 0.3211052119731903, "log_odds_ratio": -0.622495710849762, "logits/chosen": -1.03559148311615, "logits/rejected": -0.7962740659713745, "logps/chosen": -0.8198618292808533, "logps/rejected": -1.036118745803833, "loss": 1.1727, "nll_loss": 1.0845510959625244, "rewards/accuracies": 0.625, "rewards/chosen": -0.08198618143796921, "rewards/margins": 0.021625686436891556, "rewards/rejected": -0.10361187160015106, "step": 1114 }, { "epoch": 0.6801891108738752, "grad_norm": 1.8382973670959473, "learning_rate": 6.93692590324556e-06, "log_odds_chosen": 0.7261638045310974, "log_odds_ratio": -0.5499467849731445, "logits/chosen": -0.860729455947876, "logits/rejected": -0.8938555121421814, "logps/chosen": -0.8251329660415649, "logps/rejected": -1.2309019565582275, "loss": 1.0346, "nll_loss": 0.873737096786499, "rewards/accuracies": 0.75, "rewards/chosen": -0.08251330256462097, "rewards/margins": 0.04057689383625984, "rewards/rejected": -0.12309020757675171, "step": 1115 }, { "epoch": 0.6807991459508922, "grad_norm": 2.419818639755249, "learning_rate": 6.935946111451317e-06, "log_odds_chosen": 1.2792284488677979, "log_odds_ratio": -0.41448646783828735, "logits/chosen": -0.967557966709137, "logits/rejected": -1.104222059249878, "logps/chosen": -0.8620707392692566, "logps/rejected": -1.6116763353347778, "loss": 1.0969, "nll_loss": 1.0456851720809937, "rewards/accuracies": 0.875, "rewards/chosen": -0.0862070769071579, "rewards/margins": 0.07496055960655212, "rewards/rejected": -0.16116763651371002, "step": 1116 }, { "epoch": 0.6814091810279091, "grad_norm": 3.606900691986084, "learning_rate": 6.934966319657072e-06, "log_odds_chosen": 3.1124045848846436, "log_odds_ratio": -0.12225907295942307, "logits/chosen": -0.769172191619873, "logits/rejected": -0.8147187232971191, "logps/chosen": -0.42987167835235596, "logps/rejected": -2.584486961364746, "loss": 1.0438, "nll_loss": 0.8553159236907959, "rewards/accuracies": 1.0, "rewards/chosen": -0.042987167835235596, "rewards/margins": 0.21546155214309692, "rewards/rejected": -0.2584487199783325, "step": 1117 }, { "epoch": 0.682019216104926, "grad_norm": 1.9873948097229004, "learning_rate": 6.933986527862828e-06, "log_odds_chosen": -0.022205624729394913, "log_odds_ratio": -0.8076281547546387, "logits/chosen": -1.0997720956802368, "logits/rejected": -1.0926967859268188, "logps/chosen": -1.2007827758789062, "logps/rejected": -1.121872067451477, "loss": 1.2215, "nll_loss": 1.5385487079620361, "rewards/accuracies": 0.5, "rewards/chosen": -0.12007827311754227, "rewards/margins": -0.007891053333878517, "rewards/rejected": -0.1121872141957283, "step": 1118 }, { "epoch": 0.682629251181943, "grad_norm": 2.524353504180908, "learning_rate": 6.933006736068585e-06, "log_odds_chosen": 0.9700592756271362, "log_odds_ratio": -0.6128915548324585, "logits/chosen": -0.8007985353469849, "logits/rejected": -0.748350203037262, "logps/chosen": -0.8128459453582764, "logps/rejected": -1.5403434038162231, "loss": 1.148, "nll_loss": 1.063049554824829, "rewards/accuracies": 0.5, "rewards/chosen": -0.08128460496664047, "rewards/margins": 0.07274974882602692, "rewards/rejected": -0.1540343463420868, "step": 1119 }, { "epoch": 0.6832392862589599, "grad_norm": 3.283541202545166, "learning_rate": 6.9320269442743415e-06, "log_odds_chosen": 0.24195504188537598, "log_odds_ratio": -0.680282473564148, "logits/chosen": -1.0130982398986816, "logits/rejected": -0.9806647896766663, "logps/chosen": -1.0641098022460938, "logps/rejected": -1.2108632326126099, "loss": 1.1217, "nll_loss": 1.1544079780578613, "rewards/accuracies": 0.375, "rewards/chosen": -0.10641098022460938, "rewards/margins": 0.014675343409180641, "rewards/rejected": -0.12108632922172546, "step": 1120 }, { "epoch": 0.6838493213359769, "grad_norm": 1.525328516960144, "learning_rate": 6.931047152480098e-06, "log_odds_chosen": 0.9332733154296875, "log_odds_ratio": -0.5983851552009583, "logits/chosen": -0.8108730316162109, "logits/rejected": -0.8312375545501709, "logps/chosen": -0.7117083668708801, "logps/rejected": -1.174189805984497, "loss": 1.002, "nll_loss": 0.8740100264549255, "rewards/accuracies": 0.5, "rewards/chosen": -0.07117082923650742, "rewards/margins": 0.046248145401477814, "rewards/rejected": -0.11741897463798523, "step": 1121 }, { "epoch": 0.6844593564129937, "grad_norm": 1.179639220237732, "learning_rate": 6.930067360685854e-06, "log_odds_chosen": 1.614373803138733, "log_odds_ratio": -0.3709481358528137, "logits/chosen": -0.7831040620803833, "logits/rejected": -0.8012334108352661, "logps/chosen": -0.6254026889801025, "logps/rejected": -1.7535006999969482, "loss": 1.0147, "nll_loss": 0.8145773410797119, "rewards/accuracies": 0.75, "rewards/chosen": -0.06254027038812637, "rewards/margins": 0.11280979216098785, "rewards/rejected": -0.17535006999969482, "step": 1122 }, { "epoch": 0.6850693914900107, "grad_norm": 3.3933398723602295, "learning_rate": 6.92908756889161e-06, "log_odds_chosen": 0.09785591065883636, "log_odds_ratio": -0.7122799158096313, "logits/chosen": -0.8395187258720398, "logits/rejected": -0.7709342241287231, "logps/chosen": -0.8533941507339478, "logps/rejected": -0.9329327344894409, "loss": 1.206, "nll_loss": 1.0346007347106934, "rewards/accuracies": 0.5, "rewards/chosen": -0.08533941209316254, "rewards/margins": 0.007953852415084839, "rewards/rejected": -0.09329327195882797, "step": 1123 }, { "epoch": 0.6856794265670276, "grad_norm": 1.9394108057022095, "learning_rate": 6.928107777097367e-06, "log_odds_chosen": -0.016322769224643707, "log_odds_ratio": -0.807707667350769, "logits/chosen": -0.9491612315177917, "logits/rejected": -0.7830949425697327, "logps/chosen": -1.0249018669128418, "logps/rejected": -0.9989628791809082, "loss": 1.2133, "nll_loss": 1.1833728551864624, "rewards/accuracies": 0.375, "rewards/chosen": -0.10249020159244537, "rewards/margins": -0.002593900542706251, "rewards/rejected": -0.09989629685878754, "step": 1124 }, { "epoch": 0.6862894616440446, "grad_norm": 2.084303140640259, "learning_rate": 6.927127985303123e-06, "log_odds_chosen": 0.9951095581054688, "log_odds_ratio": -0.4470161199569702, "logits/chosen": -1.0237088203430176, "logits/rejected": -0.9810500144958496, "logps/chosen": -0.7503847479820251, "logps/rejected": -1.426025629043579, "loss": 1.2447, "nll_loss": 1.1861896514892578, "rewards/accuracies": 0.875, "rewards/chosen": -0.07503847777843475, "rewards/margins": 0.06756410002708435, "rewards/rejected": -0.1426025778055191, "step": 1125 }, { "epoch": 0.6868994967210614, "grad_norm": 2.0091629028320312, "learning_rate": 6.926148193508879e-06, "log_odds_chosen": 0.9154140949249268, "log_odds_ratio": -0.4889998733997345, "logits/chosen": -0.963087797164917, "logits/rejected": -0.9092215299606323, "logps/chosen": -0.7941011190414429, "logps/rejected": -1.3370305299758911, "loss": 1.0531, "nll_loss": 0.9528678059577942, "rewards/accuracies": 0.75, "rewards/chosen": -0.0794101133942604, "rewards/margins": 0.054292935878038406, "rewards/rejected": -0.1337030678987503, "step": 1126 }, { "epoch": 0.6875095317980784, "grad_norm": 2.0981976985931396, "learning_rate": 6.925168401714636e-06, "log_odds_chosen": 0.5634567737579346, "log_odds_ratio": -0.5065612196922302, "logits/chosen": -0.9409902095794678, "logits/rejected": -0.949404239654541, "logps/chosen": -0.7793449759483337, "logps/rejected": -1.0649287700653076, "loss": 1.2265, "nll_loss": 1.008186936378479, "rewards/accuracies": 0.75, "rewards/chosen": -0.07793450355529785, "rewards/margins": 0.02855837345123291, "rewards/rejected": -0.10649287700653076, "step": 1127 }, { "epoch": 0.6881195668750953, "grad_norm": 1.3655141592025757, "learning_rate": 6.924188609920391e-06, "log_odds_chosen": -0.16903626918792725, "log_odds_ratio": -0.8812611699104309, "logits/chosen": -0.9795275926589966, "logits/rejected": -0.8235393166542053, "logps/chosen": -1.0131759643554688, "logps/rejected": -0.941463053226471, "loss": 1.236, "nll_loss": 1.3158587217330933, "rewards/accuracies": 0.375, "rewards/chosen": -0.10131759941577911, "rewards/margins": -0.007171290926635265, "rewards/rejected": -0.09414630383253098, "step": 1128 }, { "epoch": 0.6887296019521122, "grad_norm": 1.5912889242172241, "learning_rate": 6.923208818126147e-06, "log_odds_chosen": 1.1874659061431885, "log_odds_ratio": -0.4635687470436096, "logits/chosen": -0.8151705265045166, "logits/rejected": -0.7091847658157349, "logps/chosen": -0.6328972578048706, "logps/rejected": -1.3641188144683838, "loss": 1.1976, "nll_loss": 0.8317908644676208, "rewards/accuracies": 0.75, "rewards/chosen": -0.06328972429037094, "rewards/margins": 0.07312215119600296, "rewards/rejected": -0.1364118754863739, "step": 1129 }, { "epoch": 0.6893396370291291, "grad_norm": 1.3551477193832397, "learning_rate": 6.922229026331904e-06, "log_odds_chosen": 0.49750927090644836, "log_odds_ratio": -0.5382885932922363, "logits/chosen": -1.006334900856018, "logits/rejected": -1.0663681030273438, "logps/chosen": -0.8887765407562256, "logps/rejected": -1.143773078918457, "loss": 1.1946, "nll_loss": 1.2646323442459106, "rewards/accuracies": 0.875, "rewards/chosen": -0.08887766301631927, "rewards/margins": 0.025499653071165085, "rewards/rejected": -0.11437730491161346, "step": 1130 }, { "epoch": 0.6899496721061461, "grad_norm": 1.4993423223495483, "learning_rate": 6.9212492345376605e-06, "log_odds_chosen": 0.47208940982818604, "log_odds_ratio": -0.6226555109024048, "logits/chosen": -0.8846650123596191, "logits/rejected": -0.6198636889457703, "logps/chosen": -0.9906696677207947, "logps/rejected": -1.3800108432769775, "loss": 1.1552, "nll_loss": 1.2040245532989502, "rewards/accuracies": 0.625, "rewards/chosen": -0.09906697273254395, "rewards/margins": 0.038934122771024704, "rewards/rejected": -0.13800108432769775, "step": 1131 }, { "epoch": 0.6905597071831631, "grad_norm": 1.8272764682769775, "learning_rate": 6.920269442743417e-06, "log_odds_chosen": 1.0193514823913574, "log_odds_ratio": -0.4721424877643585, "logits/chosen": -0.9028899073600769, "logits/rejected": -0.9923677444458008, "logps/chosen": -0.6871592402458191, "logps/rejected": -1.3167330026626587, "loss": 1.1566, "nll_loss": 0.8333790302276611, "rewards/accuracies": 0.5, "rewards/chosen": -0.06871592253446579, "rewards/margins": 0.06295736879110336, "rewards/rejected": -0.13167329132556915, "step": 1132 }, { "epoch": 0.6911697422601799, "grad_norm": 1.7744832038879395, "learning_rate": 6.919289650949174e-06, "log_odds_chosen": 0.44466668367385864, "log_odds_ratio": -0.6660833358764648, "logits/chosen": -0.7830970287322998, "logits/rejected": -0.8946317434310913, "logps/chosen": -0.8284932971000671, "logps/rejected": -1.076999545097351, "loss": 1.1936, "nll_loss": 1.0169456005096436, "rewards/accuracies": 0.625, "rewards/chosen": -0.08284933120012283, "rewards/margins": 0.024850623682141304, "rewards/rejected": -0.10769995301961899, "step": 1133 }, { "epoch": 0.6917797773371969, "grad_norm": 1.9918389320373535, "learning_rate": 6.918309859154929e-06, "log_odds_chosen": 0.7872728705406189, "log_odds_ratio": -0.4935474097728729, "logits/chosen": -1.005946159362793, "logits/rejected": -0.9833360314369202, "logps/chosen": -0.887527585029602, "logps/rejected": -1.3043034076690674, "loss": 1.3831, "nll_loss": 1.562378168106079, "rewards/accuracies": 0.625, "rewards/chosen": -0.08875276893377304, "rewards/margins": 0.0416775718331337, "rewards/rejected": -0.13043034076690674, "step": 1134 }, { "epoch": 0.6923898124142138, "grad_norm": 2.167142391204834, "learning_rate": 6.917330067360685e-06, "log_odds_chosen": 0.20647062361240387, "log_odds_ratio": -0.7828078269958496, "logits/chosen": -0.8370513319969177, "logits/rejected": -0.8675339221954346, "logps/chosen": -1.138312578201294, "logps/rejected": -1.1700172424316406, "loss": 1.1137, "nll_loss": 1.1245441436767578, "rewards/accuracies": 0.5, "rewards/chosen": -0.11383125185966492, "rewards/margins": 0.003170468844473362, "rewards/rejected": -0.1170017272233963, "step": 1135 }, { "epoch": 0.6929998474912308, "grad_norm": 6.886661529541016, "learning_rate": 6.916350275566442e-06, "log_odds_chosen": 0.898117184638977, "log_odds_ratio": -0.4612521529197693, "logits/chosen": -1.0709738731384277, "logits/rejected": -1.0023260116577148, "logps/chosen": -0.87148118019104, "logps/rejected": -1.4787944555282593, "loss": 1.4415, "nll_loss": 1.2912046909332275, "rewards/accuracies": 0.875, "rewards/chosen": -0.08714812248945236, "rewards/margins": 0.060731321573257446, "rewards/rejected": -0.1478794515132904, "step": 1136 }, { "epoch": 0.6936098825682476, "grad_norm": 1.4595998525619507, "learning_rate": 6.915370483772198e-06, "log_odds_chosen": 0.7570406794548035, "log_odds_ratio": -0.5828847289085388, "logits/chosen": -0.8865551948547363, "logits/rejected": -0.8105503916740417, "logps/chosen": -0.6975650191307068, "logps/rejected": -1.1596434116363525, "loss": 0.9101, "nll_loss": 0.8673747181892395, "rewards/accuracies": 0.5, "rewards/chosen": -0.06975650042295456, "rewards/margins": 0.046207837760448456, "rewards/rejected": -0.11596433818340302, "step": 1137 }, { "epoch": 0.6942199176452646, "grad_norm": 1.2898069620132446, "learning_rate": 6.914390691977955e-06, "log_odds_chosen": 1.0798702239990234, "log_odds_ratio": -0.3914317190647125, "logits/chosen": -0.7404386401176453, "logits/rejected": -0.5891646146774292, "logps/chosen": -0.5385486483573914, "logps/rejected": -1.0851774215698242, "loss": 1.0407, "nll_loss": 0.7197567820549011, "rewards/accuracies": 0.75, "rewards/chosen": -0.053854867815971375, "rewards/margins": 0.05466287583112717, "rewards/rejected": -0.10851773619651794, "step": 1138 }, { "epoch": 0.6948299527222815, "grad_norm": 6.303065299987793, "learning_rate": 6.913410900183711e-06, "log_odds_chosen": 1.0838885307312012, "log_odds_ratio": -0.5465443134307861, "logits/chosen": -0.9473835229873657, "logits/rejected": -0.8449313044548035, "logps/chosen": -0.9440591335296631, "logps/rejected": -1.7843947410583496, "loss": 1.2029, "nll_loss": 1.2116371393203735, "rewards/accuracies": 0.625, "rewards/chosen": -0.09440591186285019, "rewards/margins": 0.08403357118368149, "rewards/rejected": -0.17843948304653168, "step": 1139 }, { "epoch": 0.6954399877992985, "grad_norm": 3.3409740924835205, "learning_rate": 6.9124311083894665e-06, "log_odds_chosen": -0.22681459784507751, "log_odds_ratio": -0.8345633745193481, "logits/chosen": -1.2076704502105713, "logits/rejected": -1.0888630151748657, "logps/chosen": -1.1728756427764893, "logps/rejected": -1.0461816787719727, "loss": 1.2472, "nll_loss": 1.342272162437439, "rewards/accuracies": 0.375, "rewards/chosen": -0.11728755384683609, "rewards/margins": -0.012669388204813004, "rewards/rejected": -0.10461817681789398, "step": 1140 }, { "epoch": 0.6960500228763153, "grad_norm": 1.3216488361358643, "learning_rate": 6.911451316595223e-06, "log_odds_chosen": 1.595166563987732, "log_odds_ratio": -0.3030080199241638, "logits/chosen": -0.5470213890075684, "logits/rejected": -0.5886522531509399, "logps/chosen": -0.67692631483078, "logps/rejected": -1.811232566833496, "loss": 0.9919, "nll_loss": 0.804598331451416, "rewards/accuracies": 0.875, "rewards/chosen": -0.06769262999296188, "rewards/margins": 0.11343064159154892, "rewards/rejected": -0.1811232715845108, "step": 1141 }, { "epoch": 0.6966600579533323, "grad_norm": 1.8448153734207153, "learning_rate": 6.9104715248009795e-06, "log_odds_chosen": 0.8518046140670776, "log_odds_ratio": -0.4482008218765259, "logits/chosen": -0.8736380934715271, "logits/rejected": -0.9223020076751709, "logps/chosen": -0.8670827746391296, "logps/rejected": -1.4064319133758545, "loss": 1.1834, "nll_loss": 1.11173677444458, "rewards/accuracies": 0.875, "rewards/chosen": -0.08670827001333237, "rewards/margins": 0.05393492057919502, "rewards/rejected": -0.1406431943178177, "step": 1142 }, { "epoch": 0.6972700930303493, "grad_norm": 1.429465413093567, "learning_rate": 6.909491733006736e-06, "log_odds_chosen": 0.3289705216884613, "log_odds_ratio": -0.6468949913978577, "logits/chosen": -0.927499532699585, "logits/rejected": -0.957896888256073, "logps/chosen": -0.8465698957443237, "logps/rejected": -1.0304596424102783, "loss": 1.2182, "nll_loss": 1.277895450592041, "rewards/accuracies": 0.5, "rewards/chosen": -0.08465699106454849, "rewards/margins": 0.01838897354900837, "rewards/rejected": -0.10304595530033112, "step": 1143 }, { "epoch": 0.6978801281073662, "grad_norm": 3.1430273056030273, "learning_rate": 6.908511941212493e-06, "log_odds_chosen": 0.8979470729827881, "log_odds_ratio": -0.4833334684371948, "logits/chosen": -0.7186274528503418, "logits/rejected": -0.9193049669265747, "logps/chosen": -0.7131045460700989, "logps/rejected": -1.1943780183792114, "loss": 1.2596, "nll_loss": 1.191651701927185, "rewards/accuracies": 0.625, "rewards/chosen": -0.07131046056747437, "rewards/margins": 0.048127345740795135, "rewards/rejected": -0.1194378063082695, "step": 1144 }, { "epoch": 0.6984901631843831, "grad_norm": 3.0075087547302246, "learning_rate": 6.907532149418248e-06, "log_odds_chosen": 0.3880309462547302, "log_odds_ratio": -0.5815356969833374, "logits/chosen": -1.0369378328323364, "logits/rejected": -0.7227840423583984, "logps/chosen": -1.0994868278503418, "logps/rejected": -1.3508620262145996, "loss": 1.348, "nll_loss": 1.3701488971710205, "rewards/accuracies": 0.5, "rewards/chosen": -0.10994868725538254, "rewards/margins": 0.025137513875961304, "rewards/rejected": -0.13508620858192444, "step": 1145 }, { "epoch": 0.6991001982614, "grad_norm": 1.3790825605392456, "learning_rate": 6.906552357624004e-06, "log_odds_chosen": 0.42044028639793396, "log_odds_ratio": -0.6819382309913635, "logits/chosen": -0.8448103070259094, "logits/rejected": -0.7566829919815063, "logps/chosen": -0.839824378490448, "logps/rejected": -1.0830628871917725, "loss": 1.0741, "nll_loss": 0.8864620923995972, "rewards/accuracies": 0.375, "rewards/chosen": -0.0839824378490448, "rewards/margins": 0.0243238415569067, "rewards/rejected": -0.10830628126859665, "step": 1146 }, { "epoch": 0.699710233338417, "grad_norm": 1.586570143699646, "learning_rate": 6.905572565829761e-06, "log_odds_chosen": 0.29128023982048035, "log_odds_ratio": -0.6849155426025391, "logits/chosen": -0.7217405438423157, "logits/rejected": -0.6255213618278503, "logps/chosen": -0.7537519335746765, "logps/rejected": -0.9230844974517822, "loss": 1.2359, "nll_loss": 0.9335112571716309, "rewards/accuracies": 0.5, "rewards/chosen": -0.07537519186735153, "rewards/margins": 0.01693325862288475, "rewards/rejected": -0.09230845421552658, "step": 1147 }, { "epoch": 0.7003202684154339, "grad_norm": 1.8739031553268433, "learning_rate": 6.904592774035517e-06, "log_odds_chosen": 1.6714725494384766, "log_odds_ratio": -0.33741235733032227, "logits/chosen": -0.5975245237350464, "logits/rejected": -0.5481873154640198, "logps/chosen": -0.5993027091026306, "logps/rejected": -1.7354841232299805, "loss": 1.0016, "nll_loss": 0.6577364802360535, "rewards/accuracies": 0.875, "rewards/chosen": -0.05993027240037918, "rewards/margins": 0.1136181429028511, "rewards/rejected": -0.1735484004020691, "step": 1148 }, { "epoch": 0.7009303034924508, "grad_norm": 1.4549977779388428, "learning_rate": 6.903612982241273e-06, "log_odds_chosen": 0.8626666069030762, "log_odds_ratio": -0.4872589111328125, "logits/chosen": -0.7627856731414795, "logits/rejected": -0.7204113006591797, "logps/chosen": -0.83469557762146, "logps/rejected": -1.3008029460906982, "loss": 1.1731, "nll_loss": 0.9176992177963257, "rewards/accuracies": 0.625, "rewards/chosen": -0.08346956968307495, "rewards/margins": 0.04661073163151741, "rewards/rejected": -0.13008028268814087, "step": 1149 }, { "epoch": 0.7015403385694677, "grad_norm": 4.56486701965332, "learning_rate": 6.90263319044703e-06, "log_odds_chosen": 0.5763504505157471, "log_odds_ratio": -0.4932591915130615, "logits/chosen": -1.1102484464645386, "logits/rejected": -0.8845747113227844, "logps/chosen": -1.098737359046936, "logps/rejected": -1.4908690452575684, "loss": 1.1671, "nll_loss": 1.2312813997268677, "rewards/accuracies": 0.875, "rewards/chosen": -0.10987374186515808, "rewards/margins": 0.03921317309141159, "rewards/rejected": -0.14908690750598907, "step": 1150 }, { "epoch": 0.7021503736464847, "grad_norm": 6.19189977645874, "learning_rate": 6.9016533986527855e-06, "log_odds_chosen": 0.31474635004997253, "log_odds_ratio": -0.6535468697547913, "logits/chosen": -0.9530041217803955, "logits/rejected": -0.7221956253051758, "logps/chosen": -1.0349886417388916, "logps/rejected": -1.265528678894043, "loss": 1.0988, "nll_loss": 1.1438319683074951, "rewards/accuracies": 0.5, "rewards/chosen": -0.10349886119365692, "rewards/margins": 0.023054003715515137, "rewards/rejected": -0.12655286490917206, "step": 1151 }, { "epoch": 0.7027604087235017, "grad_norm": 2.010967254638672, "learning_rate": 6.9006736068585425e-06, "log_odds_chosen": 1.8012170791625977, "log_odds_ratio": -0.30055221915245056, "logits/chosen": -0.8777263164520264, "logits/rejected": -0.7562463283538818, "logps/chosen": -0.6887317895889282, "logps/rejected": -1.9074997901916504, "loss": 0.9705, "nll_loss": 0.9103342294692993, "rewards/accuracies": 0.875, "rewards/chosen": -0.06887318193912506, "rewards/margins": 0.1218768060207367, "rewards/rejected": -0.19074997305870056, "step": 1152 }, { "epoch": 0.7033704438005185, "grad_norm": 1.8076486587524414, "learning_rate": 6.899693815064299e-06, "log_odds_chosen": 1.2799792289733887, "log_odds_ratio": -0.3391989469528198, "logits/chosen": -0.7459380030632019, "logits/rejected": -0.7699466943740845, "logps/chosen": -0.7332475185394287, "logps/rejected": -1.524186611175537, "loss": 1.0063, "nll_loss": 0.9294354319572449, "rewards/accuracies": 0.875, "rewards/chosen": -0.07332474738359451, "rewards/margins": 0.07909391820430756, "rewards/rejected": -0.15241867303848267, "step": 1153 }, { "epoch": 0.7039804788775355, "grad_norm": 2.3514113426208496, "learning_rate": 6.898714023270055e-06, "log_odds_chosen": 0.5327104330062866, "log_odds_ratio": -0.5610063076019287, "logits/chosen": -0.7014486193656921, "logits/rejected": -0.7557470202445984, "logps/chosen": -0.7798948884010315, "logps/rejected": -1.0965402126312256, "loss": 1.0904, "nll_loss": 1.05862557888031, "rewards/accuracies": 0.75, "rewards/chosen": -0.07798948884010315, "rewards/margins": 0.031664539128541946, "rewards/rejected": -0.1096540316939354, "step": 1154 }, { "epoch": 0.7045905139545524, "grad_norm": 1.7547465562820435, "learning_rate": 6.897734231475812e-06, "log_odds_chosen": 0.7806822061538696, "log_odds_ratio": -0.544856607913971, "logits/chosen": -1.077763557434082, "logits/rejected": -1.0764389038085938, "logps/chosen": -0.8531606793403625, "logps/rejected": -1.4126975536346436, "loss": 1.1414, "nll_loss": 1.341080904006958, "rewards/accuracies": 0.75, "rewards/chosen": -0.08531607687473297, "rewards/margins": 0.055953674018383026, "rewards/rejected": -0.1412697434425354, "step": 1155 }, { "epoch": 0.7052005490315694, "grad_norm": 1.7944339513778687, "learning_rate": 6.896754439681568e-06, "log_odds_chosen": 0.4909588694572449, "log_odds_ratio": -0.662624180316925, "logits/chosen": -1.0131006240844727, "logits/rejected": -0.8058418035507202, "logps/chosen": -1.044076681137085, "logps/rejected": -1.3913636207580566, "loss": 1.2209, "nll_loss": 1.2997580766677856, "rewards/accuracies": 0.625, "rewards/chosen": -0.1044076681137085, "rewards/margins": 0.034728698432445526, "rewards/rejected": -0.13913635909557343, "step": 1156 }, { "epoch": 0.7058105841085862, "grad_norm": 2.1030499935150146, "learning_rate": 6.895774647887323e-06, "log_odds_chosen": 0.8466812372207642, "log_odds_ratio": -0.4835936427116394, "logits/chosen": -0.6812465190887451, "logits/rejected": -0.7892744541168213, "logps/chosen": -0.7476644515991211, "logps/rejected": -1.271224021911621, "loss": 1.114, "nll_loss": 1.0042812824249268, "rewards/accuracies": 0.75, "rewards/chosen": -0.07476644963026047, "rewards/margins": 0.05235596001148224, "rewards/rejected": -0.1271224021911621, "step": 1157 }, { "epoch": 0.7064206191856032, "grad_norm": 1.8488352298736572, "learning_rate": 6.89479485609308e-06, "log_odds_chosen": 1.3425272703170776, "log_odds_ratio": -0.3842019736766815, "logits/chosen": -0.7539938688278198, "logits/rejected": -0.6999654173851013, "logps/chosen": -0.7902413606643677, "logps/rejected": -1.6277097463607788, "loss": 1.1967, "nll_loss": 0.914810061454773, "rewards/accuracies": 0.875, "rewards/chosen": -0.07902413606643677, "rewards/margins": 0.08374684303998947, "rewards/rejected": -0.16277098655700684, "step": 1158 }, { "epoch": 0.7070306542626201, "grad_norm": 1.9258027076721191, "learning_rate": 6.893815064298836e-06, "log_odds_chosen": 0.7080007791519165, "log_odds_ratio": -0.4816635549068451, "logits/chosen": -0.6775274276733398, "logits/rejected": -0.840533971786499, "logps/chosen": -0.8864907026290894, "logps/rejected": -1.3276437520980835, "loss": 1.0964, "nll_loss": 0.8892369270324707, "rewards/accuracies": 0.75, "rewards/chosen": -0.08864906430244446, "rewards/margins": 0.04411529749631882, "rewards/rejected": -0.13276436924934387, "step": 1159 }, { "epoch": 0.707640689339637, "grad_norm": 3.283407211303711, "learning_rate": 6.892835272504592e-06, "log_odds_chosen": 1.4280391931533813, "log_odds_ratio": -0.3656364679336548, "logits/chosen": -0.8098468780517578, "logits/rejected": -0.9137033820152283, "logps/chosen": -0.5724767446517944, "logps/rejected": -1.3139034509658813, "loss": 1.2154, "nll_loss": 1.2323203086853027, "rewards/accuracies": 0.75, "rewards/chosen": -0.05724767595529556, "rewards/margins": 0.07414265722036362, "rewards/rejected": -0.13139033317565918, "step": 1160 }, { "epoch": 0.7082507244166539, "grad_norm": 2.397559642791748, "learning_rate": 6.891855480710349e-06, "log_odds_chosen": 0.35533785820007324, "log_odds_ratio": -0.7211289405822754, "logits/chosen": -1.135218620300293, "logits/rejected": -1.07131028175354, "logps/chosen": -1.178729772567749, "logps/rejected": -1.4822816848754883, "loss": 1.1933, "nll_loss": 1.3758654594421387, "rewards/accuracies": 0.5, "rewards/chosen": -0.11787299066781998, "rewards/margins": 0.030355190858244896, "rewards/rejected": -0.14822816848754883, "step": 1161 }, { "epoch": 0.7088607594936709, "grad_norm": 1.4860855340957642, "learning_rate": 6.8908756889161045e-06, "log_odds_chosen": 0.6146957278251648, "log_odds_ratio": -0.5287896990776062, "logits/chosen": -1.0157746076583862, "logits/rejected": -1.0288195610046387, "logps/chosen": -0.8936911821365356, "logps/rejected": -1.3080586194992065, "loss": 1.141, "nll_loss": 1.051106572151184, "rewards/accuracies": 0.625, "rewards/chosen": -0.08936911821365356, "rewards/margins": 0.041436754167079926, "rewards/rejected": -0.1308058649301529, "step": 1162 }, { "epoch": 0.7094707945706878, "grad_norm": 1.8741732835769653, "learning_rate": 6.889895897121861e-06, "log_odds_chosen": 1.4872758388519287, "log_odds_ratio": -0.34133926033973694, "logits/chosen": -0.9911133050918579, "logits/rejected": -0.9734009504318237, "logps/chosen": -0.5777038335800171, "logps/rejected": -1.5033214092254639, "loss": 0.9949, "nll_loss": 1.1209570169448853, "rewards/accuracies": 0.875, "rewards/chosen": -0.05777038261294365, "rewards/margins": 0.0925617665052414, "rewards/rejected": -0.15033215284347534, "step": 1163 }, { "epoch": 0.7100808296477047, "grad_norm": 2.366422176361084, "learning_rate": 6.888916105327618e-06, "log_odds_chosen": 0.3135907053947449, "log_odds_ratio": -0.6766057014465332, "logits/chosen": -1.0042227506637573, "logits/rejected": -1.0267349481582642, "logps/chosen": -1.0429993867874146, "logps/rejected": -1.2481911182403564, "loss": 1.2044, "nll_loss": 1.2124170064926147, "rewards/accuracies": 0.625, "rewards/chosen": -0.10429994016885757, "rewards/margins": 0.020519165322184563, "rewards/rejected": -0.12481911480426788, "step": 1164 }, { "epoch": 0.7106908647247216, "grad_norm": 1.4849385023117065, "learning_rate": 6.887936313533374e-06, "log_odds_chosen": 1.4567536115646362, "log_odds_ratio": -0.32570210099220276, "logits/chosen": -0.7191029191017151, "logits/rejected": -0.8674978017807007, "logps/chosen": -0.5345293283462524, "logps/rejected": -1.3321752548217773, "loss": 1.0726, "nll_loss": 0.8647159934043884, "rewards/accuracies": 0.875, "rewards/chosen": -0.05345293506979942, "rewards/margins": 0.07976458966732025, "rewards/rejected": -0.13321752846240997, "step": 1165 }, { "epoch": 0.7113008998017386, "grad_norm": 3.1199018955230713, "learning_rate": 6.886956521739131e-06, "log_odds_chosen": 0.3414866030216217, "log_odds_ratio": -0.5826215147972107, "logits/chosen": -1.113793134689331, "logits/rejected": -0.9813620448112488, "logps/chosen": -0.9859036207199097, "logps/rejected": -1.2576905488967896, "loss": 1.4177, "nll_loss": 1.3896923065185547, "rewards/accuracies": 0.625, "rewards/chosen": -0.09859035909175873, "rewards/margins": 0.027178697288036346, "rewards/rejected": -0.12576904892921448, "step": 1166 }, { "epoch": 0.7119109348787556, "grad_norm": 1.7582900524139404, "learning_rate": 6.885976729944887e-06, "log_odds_chosen": 0.2267785370349884, "log_odds_ratio": -0.6442882418632507, "logits/chosen": -1.243779182434082, "logits/rejected": -1.1782546043395996, "logps/chosen": -1.2726808786392212, "logps/rejected": -1.4603980779647827, "loss": 1.1854, "nll_loss": 1.4485514163970947, "rewards/accuracies": 0.5, "rewards/chosen": -0.12726807594299316, "rewards/margins": 0.018771713599562645, "rewards/rejected": -0.14603978395462036, "step": 1167 }, { "epoch": 0.7125209699557724, "grad_norm": 1.9758727550506592, "learning_rate": 6.884996938150642e-06, "log_odds_chosen": 0.3634287118911743, "log_odds_ratio": -0.6386745572090149, "logits/chosen": -1.0925538539886475, "logits/rejected": -1.031797170639038, "logps/chosen": -0.7534840703010559, "logps/rejected": -0.9540756940841675, "loss": 0.9772, "nll_loss": 1.0656676292419434, "rewards/accuracies": 0.625, "rewards/chosen": -0.07534840703010559, "rewards/margins": 0.020059168338775635, "rewards/rejected": -0.09540757536888123, "step": 1168 }, { "epoch": 0.7131310050327894, "grad_norm": 1.57174551486969, "learning_rate": 6.884017146356399e-06, "log_odds_chosen": 1.0222822427749634, "log_odds_ratio": -0.49725621938705444, "logits/chosen": -0.8222008347511292, "logits/rejected": -0.919611930847168, "logps/chosen": -0.9068228602409363, "logps/rejected": -1.6196680068969727, "loss": 1.3138, "nll_loss": 1.1474509239196777, "rewards/accuracies": 0.625, "rewards/chosen": -0.09068228304386139, "rewards/margins": 0.07128452509641647, "rewards/rejected": -0.16196680068969727, "step": 1169 }, { "epoch": 0.7137410401098063, "grad_norm": 2.1548502445220947, "learning_rate": 6.883037354562155e-06, "log_odds_chosen": 0.3414318263530731, "log_odds_ratio": -0.635293185710907, "logits/chosen": -1.0911192893981934, "logits/rejected": -0.8500733375549316, "logps/chosen": -0.9487711191177368, "logps/rejected": -1.1676826477050781, "loss": 1.1343, "nll_loss": 1.020594835281372, "rewards/accuracies": 0.625, "rewards/chosen": -0.09487711638212204, "rewards/margins": 0.021891150623559952, "rewards/rejected": -0.11676827073097229, "step": 1170 }, { "epoch": 0.7143510751868233, "grad_norm": 4.738702774047852, "learning_rate": 6.882057562767911e-06, "log_odds_chosen": 0.4704131484031677, "log_odds_ratio": -0.5849587321281433, "logits/chosen": -0.9683646559715271, "logits/rejected": -0.9895760416984558, "logps/chosen": -1.1184368133544922, "logps/rejected": -1.4197587966918945, "loss": 1.0566, "nll_loss": 1.2421658039093018, "rewards/accuracies": 0.5, "rewards/chosen": -0.11184369027614594, "rewards/margins": 0.030132198706269264, "rewards/rejected": -0.14197587966918945, "step": 1171 }, { "epoch": 0.7149611102638401, "grad_norm": 2.7971584796905518, "learning_rate": 6.881077770973668e-06, "log_odds_chosen": 0.6847003698348999, "log_odds_ratio": -0.5929617881774902, "logits/chosen": -0.9805426597595215, "logits/rejected": -0.9399664402008057, "logps/chosen": -0.7498248815536499, "logps/rejected": -1.155599594116211, "loss": 1.1853, "nll_loss": 1.1126192808151245, "rewards/accuracies": 0.625, "rewards/chosen": -0.07498247921466827, "rewards/margins": 0.040577489882707596, "rewards/rejected": -0.11555996537208557, "step": 1172 }, { "epoch": 0.7155711453408571, "grad_norm": 2.717926263809204, "learning_rate": 6.880097979179424e-06, "log_odds_chosen": 1.1618428230285645, "log_odds_ratio": -0.5120784044265747, "logits/chosen": -0.9760808348655701, "logits/rejected": -0.910772442817688, "logps/chosen": -0.8157773017883301, "logps/rejected": -1.5571099519729614, "loss": 1.1899, "nll_loss": 1.3754230737686157, "rewards/accuracies": 0.5, "rewards/chosen": -0.08157773315906525, "rewards/margins": 0.07413327693939209, "rewards/rejected": -0.15571101009845734, "step": 1173 }, { "epoch": 0.716181180417874, "grad_norm": 1.6244193315505981, "learning_rate": 6.87911818738518e-06, "log_odds_chosen": 1.6506547927856445, "log_odds_ratio": -0.3452380299568176, "logits/chosen": -0.7933816909790039, "logits/rejected": -0.936977207660675, "logps/chosen": -0.7150061130523682, "logps/rejected": -1.738608956336975, "loss": 0.9886, "nll_loss": 0.9541125893592834, "rewards/accuracies": 0.875, "rewards/chosen": -0.07150059938430786, "rewards/margins": 0.10236029326915741, "rewards/rejected": -0.17386089265346527, "step": 1174 }, { "epoch": 0.716791215494891, "grad_norm": 1.4198729991912842, "learning_rate": 6.878138395590937e-06, "log_odds_chosen": 0.7567418813705444, "log_odds_ratio": -0.5688452124595642, "logits/chosen": -0.9739099740982056, "logits/rejected": -1.0039398670196533, "logps/chosen": -0.8544595241546631, "logps/rejected": -1.4204492568969727, "loss": 1.097, "nll_loss": 0.9849512577056885, "rewards/accuracies": 0.625, "rewards/chosen": -0.08544595539569855, "rewards/margins": 0.0565989725291729, "rewards/rejected": -0.14204493165016174, "step": 1175 }, { "epoch": 0.7174012505719078, "grad_norm": 2.213759183883667, "learning_rate": 6.877158603796693e-06, "log_odds_chosen": 2.0769529342651367, "log_odds_ratio": -0.30057716369628906, "logits/chosen": -0.9703157544136047, "logits/rejected": -0.931053876876831, "logps/chosen": -0.5200855135917664, "logps/rejected": -1.8179082870483398, "loss": 0.9492, "nll_loss": 0.8031119108200073, "rewards/accuracies": 0.875, "rewards/chosen": -0.052008554339408875, "rewards/margins": 0.1297822743654251, "rewards/rejected": -0.1817908138036728, "step": 1176 }, { "epoch": 0.7180112856489248, "grad_norm": 1.1960746049880981, "learning_rate": 6.876178812002449e-06, "log_odds_chosen": 1.7494100332260132, "log_odds_ratio": -0.266752153635025, "logits/chosen": -0.8085479140281677, "logits/rejected": -0.8200073838233948, "logps/chosen": -0.6655157804489136, "logps/rejected": -1.8328776359558105, "loss": 0.9449, "nll_loss": 0.9850106239318848, "rewards/accuracies": 1.0, "rewards/chosen": -0.0665515884757042, "rewards/margins": 0.11673618853092194, "rewards/rejected": -0.18328776955604553, "step": 1177 }, { "epoch": 0.7186213207259418, "grad_norm": 3.3552839756011963, "learning_rate": 6.875199020208206e-06, "log_odds_chosen": 0.44864165782928467, "log_odds_ratio": -0.563926100730896, "logits/chosen": -0.944829523563385, "logits/rejected": -0.9006340503692627, "logps/chosen": -0.8051031827926636, "logps/rejected": -1.0656626224517822, "loss": 1.1404, "nll_loss": 1.0200117826461792, "rewards/accuracies": 0.625, "rewards/chosen": -0.08051031827926636, "rewards/margins": 0.026055937632918358, "rewards/rejected": -0.10656626522541046, "step": 1178 }, { "epoch": 0.7192313558029587, "grad_norm": 1.4237534999847412, "learning_rate": 6.874219228413961e-06, "log_odds_chosen": 1.5536609888076782, "log_odds_ratio": -0.35860350728034973, "logits/chosen": -0.9946091175079346, "logits/rejected": -0.8751067519187927, "logps/chosen": -0.7891784906387329, "logps/rejected": -1.9545249938964844, "loss": 1.1489, "nll_loss": 0.9329702854156494, "rewards/accuracies": 0.875, "rewards/chosen": -0.07891784608364105, "rewards/margins": 0.11653465032577515, "rewards/rejected": -0.1954525113105774, "step": 1179 }, { "epoch": 0.7198413908799756, "grad_norm": 3.038954019546509, "learning_rate": 6.873239436619718e-06, "log_odds_chosen": 0.9940419793128967, "log_odds_ratio": -0.4371674954891205, "logits/chosen": -0.8874846696853638, "logits/rejected": -0.7836609482765198, "logps/chosen": -0.717861533164978, "logps/rejected": -1.2905915975570679, "loss": 1.1373, "nll_loss": 0.9353563785552979, "rewards/accuracies": 0.75, "rewards/chosen": -0.07178615778684616, "rewards/margins": 0.057273007929325104, "rewards/rejected": -0.12905916571617126, "step": 1180 }, { "epoch": 0.7204514259569925, "grad_norm": 1.3577042818069458, "learning_rate": 6.872259644825474e-06, "log_odds_chosen": 1.2512215375900269, "log_odds_ratio": -0.5073304176330566, "logits/chosen": -0.7541602253913879, "logits/rejected": -0.6936522722244263, "logps/chosen": -0.7736330032348633, "logps/rejected": -1.67556893825531, "loss": 1.0673, "nll_loss": 1.1745551824569702, "rewards/accuracies": 0.75, "rewards/chosen": -0.07736329734325409, "rewards/margins": 0.09019359201192856, "rewards/rejected": -0.16755688190460205, "step": 1181 }, { "epoch": 0.7210614610340095, "grad_norm": 1.876141905784607, "learning_rate": 6.87127985303123e-06, "log_odds_chosen": 0.6670271754264832, "log_odds_ratio": -0.6117826700210571, "logits/chosen": -1.076404094696045, "logits/rejected": -0.9977322220802307, "logps/chosen": -0.858473002910614, "logps/rejected": -1.3431501388549805, "loss": 1.009, "nll_loss": 1.0401713848114014, "rewards/accuracies": 0.625, "rewards/chosen": -0.08584730327129364, "rewards/margins": 0.048467714339494705, "rewards/rejected": -0.13431502878665924, "step": 1182 }, { "epoch": 0.7216714961110264, "grad_norm": 2.50405216217041, "learning_rate": 6.870300061236987e-06, "log_odds_chosen": 0.7551093101501465, "log_odds_ratio": -0.6095129251480103, "logits/chosen": -0.9595885872840881, "logits/rejected": -0.9221586585044861, "logps/chosen": -0.99724280834198, "logps/rejected": -1.4557785987854004, "loss": 1.2855, "nll_loss": 1.4363268613815308, "rewards/accuracies": 0.75, "rewards/chosen": -0.09972427785396576, "rewards/margins": 0.04585357755422592, "rewards/rejected": -0.14557786285877228, "step": 1183 }, { "epoch": 0.7222815311880433, "grad_norm": 1.3732678890228271, "learning_rate": 6.8693202694427435e-06, "log_odds_chosen": 0.3132696747779846, "log_odds_ratio": -0.7672315835952759, "logits/chosen": -1.1975774765014648, "logits/rejected": -1.0479542016983032, "logps/chosen": -1.108788251876831, "logps/rejected": -1.3062061071395874, "loss": 1.0961, "nll_loss": 1.388708472251892, "rewards/accuracies": 0.375, "rewards/chosen": -0.1108788326382637, "rewards/margins": 0.019741784781217575, "rewards/rejected": -0.13062062859535217, "step": 1184 }, { "epoch": 0.7228915662650602, "grad_norm": 2.5103375911712646, "learning_rate": 6.868340477648499e-06, "log_odds_chosen": 1.0597783327102661, "log_odds_ratio": -0.43453919887542725, "logits/chosen": -0.9073193669319153, "logits/rejected": -0.9061429500579834, "logps/chosen": -0.8253092765808105, "logps/rejected": -1.6091828346252441, "loss": 1.2048, "nll_loss": 1.1081397533416748, "rewards/accuracies": 0.875, "rewards/chosen": -0.0825309306383133, "rewards/margins": 0.07838735729455948, "rewards/rejected": -0.16091829538345337, "step": 1185 }, { "epoch": 0.7235016013420772, "grad_norm": 6.679915904998779, "learning_rate": 6.867360685854256e-06, "log_odds_chosen": 1.0727654695510864, "log_odds_ratio": -0.4315096139907837, "logits/chosen": -0.825516402721405, "logits/rejected": -0.6656556129455566, "logps/chosen": -0.6294510960578918, "logps/rejected": -1.332694411277771, "loss": 1.0931, "nll_loss": 0.8258570432662964, "rewards/accuracies": 0.625, "rewards/chosen": -0.06294511258602142, "rewards/margins": 0.07032433152198792, "rewards/rejected": -0.13326944410800934, "step": 1186 }, { "epoch": 0.7241116364190942, "grad_norm": 1.61748468875885, "learning_rate": 6.866380894060012e-06, "log_odds_chosen": 1.130505919456482, "log_odds_ratio": -0.4474511742591858, "logits/chosen": -0.8231695890426636, "logits/rejected": -0.8796210289001465, "logps/chosen": -0.8630437254905701, "logps/rejected": -1.6409339904785156, "loss": 1.2102, "nll_loss": 1.3163405656814575, "rewards/accuracies": 0.75, "rewards/chosen": -0.08630437403917313, "rewards/margins": 0.07778902351856232, "rewards/rejected": -0.16409339010715485, "step": 1187 }, { "epoch": 0.724721671496111, "grad_norm": 3.633803367614746, "learning_rate": 6.865401102265768e-06, "log_odds_chosen": 0.3494025468826294, "log_odds_ratio": -0.6587755680084229, "logits/chosen": -1.0346165895462036, "logits/rejected": -0.8621420860290527, "logps/chosen": -0.8877995014190674, "logps/rejected": -1.1254991292953491, "loss": 1.2578, "nll_loss": 0.9932622909545898, "rewards/accuracies": 0.625, "rewards/chosen": -0.08877994120121002, "rewards/margins": 0.02376997098326683, "rewards/rejected": -0.11254991590976715, "step": 1188 }, { "epoch": 0.725331706573128, "grad_norm": 3.295435667037964, "learning_rate": 6.864421310471525e-06, "log_odds_chosen": 0.7792425155639648, "log_odds_ratio": -0.5665645003318787, "logits/chosen": -1.0093905925750732, "logits/rejected": -1.0290647745132446, "logps/chosen": -0.7980021238327026, "logps/rejected": -1.3117070198059082, "loss": 1.2163, "nll_loss": 0.9761629104614258, "rewards/accuracies": 0.625, "rewards/chosen": -0.07980021089315414, "rewards/margins": 0.05137047916650772, "rewards/rejected": -0.13117069005966187, "step": 1189 }, { "epoch": 0.7259417416501449, "grad_norm": 4.353078842163086, "learning_rate": 6.863441518677281e-06, "log_odds_chosen": 1.1312557458877563, "log_odds_ratio": -0.5767092704772949, "logits/chosen": -0.6832240223884583, "logits/rejected": -0.780129611492157, "logps/chosen": -0.8016761541366577, "logps/rejected": -1.7857227325439453, "loss": 1.1599, "nll_loss": 0.9205730557441711, "rewards/accuracies": 0.625, "rewards/chosen": -0.08016762137413025, "rewards/margins": 0.0984046533703804, "rewards/rejected": -0.17857226729393005, "step": 1190 }, { "epoch": 0.7265517767271618, "grad_norm": 2.412384033203125, "learning_rate": 6.862461726883036e-06, "log_odds_chosen": 0.8592194318771362, "log_odds_ratio": -0.5115650296211243, "logits/chosen": -0.9587136507034302, "logits/rejected": -1.027683973312378, "logps/chosen": -0.837211549282074, "logps/rejected": -1.3592345714569092, "loss": 1.2226, "nll_loss": 1.0519622564315796, "rewards/accuracies": 0.75, "rewards/chosen": -0.08372116088867188, "rewards/margins": 0.05220230668783188, "rewards/rejected": -0.13592346012592316, "step": 1191 }, { "epoch": 0.7271618118041787, "grad_norm": 1.350211501121521, "learning_rate": 6.861481935088793e-06, "log_odds_chosen": 0.3534691333770752, "log_odds_ratio": -0.583859384059906, "logits/chosen": -1.1117749214172363, "logits/rejected": -1.086052417755127, "logps/chosen": -0.9367709159851074, "logps/rejected": -1.1421399116516113, "loss": 1.1041, "nll_loss": 1.1571197509765625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0936770960688591, "rewards/margins": 0.02053690329194069, "rewards/rejected": -0.11421399563550949, "step": 1192 }, { "epoch": 0.7277718468811957, "grad_norm": 1.5145357847213745, "learning_rate": 6.860502143294549e-06, "log_odds_chosen": 0.8431099653244019, "log_odds_ratio": -0.45469745993614197, "logits/chosen": -0.893100380897522, "logits/rejected": -0.8827468156814575, "logps/chosen": -0.788567304611206, "logps/rejected": -1.2965327501296997, "loss": 1.0042, "nll_loss": 0.8920559883117676, "rewards/accuracies": 0.875, "rewards/chosen": -0.07885673642158508, "rewards/margins": 0.050796542316675186, "rewards/rejected": -0.12965327501296997, "step": 1193 }, { "epoch": 0.7283818819582126, "grad_norm": 1.847603440284729, "learning_rate": 6.859522351500306e-06, "log_odds_chosen": 0.8211240768432617, "log_odds_ratio": -0.504995584487915, "logits/chosen": -0.7673488855361938, "logits/rejected": -0.8744294047355652, "logps/chosen": -0.7108743190765381, "logps/rejected": -1.2199602127075195, "loss": 0.8987, "nll_loss": 0.8560603857040405, "rewards/accuracies": 0.625, "rewards/chosen": -0.07108743488788605, "rewards/margins": 0.050908591598272324, "rewards/rejected": -0.12199602276086807, "step": 1194 }, { "epoch": 0.7289919170352295, "grad_norm": 1.8378933668136597, "learning_rate": 6.8585425597060625e-06, "log_odds_chosen": 0.2165759801864624, "log_odds_ratio": -0.6379740238189697, "logits/chosen": -1.1229376792907715, "logits/rejected": -1.1216307878494263, "logps/chosen": -1.11286199092865, "logps/rejected": -1.319028377532959, "loss": 1.2046, "nll_loss": 1.362776517868042, "rewards/accuracies": 0.5, "rewards/chosen": -0.1112862080335617, "rewards/margins": 0.020616643130779266, "rewards/rejected": -0.13190284371376038, "step": 1195 }, { "epoch": 0.7296019521122464, "grad_norm": 2.083418846130371, "learning_rate": 6.857562767911819e-06, "log_odds_chosen": 1.4080082178115845, "log_odds_ratio": -0.3556100130081177, "logits/chosen": -0.6106485724449158, "logits/rejected": -0.9457530975341797, "logps/chosen": -0.6249538660049438, "logps/rejected": -1.6230945587158203, "loss": 1.0318, "nll_loss": 0.8285104036331177, "rewards/accuracies": 0.75, "rewards/chosen": -0.06249538064002991, "rewards/margins": 0.09981408715248108, "rewards/rejected": -0.162309467792511, "step": 1196 }, { "epoch": 0.7302119871892634, "grad_norm": 1.5558480024337769, "learning_rate": 6.856582976117575e-06, "log_odds_chosen": 0.39868611097335815, "log_odds_ratio": -0.5627891421318054, "logits/chosen": -0.9228823184967041, "logits/rejected": -0.8879584670066833, "logps/chosen": -0.9196316003799438, "logps/rejected": -1.1631563901901245, "loss": 1.1323, "nll_loss": 1.0139235258102417, "rewards/accuracies": 0.625, "rewards/chosen": -0.09196315705776215, "rewards/margins": 0.02435249090194702, "rewards/rejected": -0.11631564795970917, "step": 1197 }, { "epoch": 0.7308220222662803, "grad_norm": 1.592200517654419, "learning_rate": 6.855603184323331e-06, "log_odds_chosen": 0.6754680275917053, "log_odds_ratio": -0.46883755922317505, "logits/chosen": -1.0667757987976074, "logits/rejected": -0.9535909295082092, "logps/chosen": -0.6896679997444153, "logps/rejected": -1.1485495567321777, "loss": 1.2778, "nll_loss": 1.1129189729690552, "rewards/accuracies": 0.875, "rewards/chosen": -0.068966805934906, "rewards/margins": 0.04588814824819565, "rewards/rejected": -0.11485495418310165, "step": 1198 }, { "epoch": 0.7314320573432972, "grad_norm": 3.597057819366455, "learning_rate": 6.854623392529087e-06, "log_odds_chosen": 0.2878205180168152, "log_odds_ratio": -0.7040200233459473, "logits/chosen": -1.1110519170761108, "logits/rejected": -1.1110697984695435, "logps/chosen": -1.3258812427520752, "logps/rejected": -1.4197741746902466, "loss": 1.3535, "nll_loss": 1.4252266883850098, "rewards/accuracies": 0.625, "rewards/chosen": -0.13258813321590424, "rewards/margins": 0.009389283135533333, "rewards/rejected": -0.1419774293899536, "step": 1199 }, { "epoch": 0.7320420924203141, "grad_norm": 1.075648546218872, "learning_rate": 6.853643600734844e-06, "log_odds_chosen": 0.4823988676071167, "log_odds_ratio": -0.6089485883712769, "logits/chosen": -1.0601474046707153, "logits/rejected": -1.043746829032898, "logps/chosen": -1.139370322227478, "logps/rejected": -1.425706148147583, "loss": 1.2848, "nll_loss": 1.2120661735534668, "rewards/accuracies": 0.875, "rewards/chosen": -0.11393704265356064, "rewards/margins": 0.02863357961177826, "rewards/rejected": -0.1425706148147583, "step": 1200 }, { "epoch": 0.7326521274973311, "grad_norm": 1.2945815324783325, "learning_rate": 6.8526638089406e-06, "log_odds_chosen": 0.7584860324859619, "log_odds_ratio": -0.514245867729187, "logits/chosen": -0.95610111951828, "logits/rejected": -1.1042897701263428, "logps/chosen": -0.931660532951355, "logps/rejected": -1.3890125751495361, "loss": 1.145, "nll_loss": 1.2204866409301758, "rewards/accuracies": 0.5, "rewards/chosen": -0.0931660532951355, "rewards/margins": 0.0457351990044117, "rewards/rejected": -0.1389012485742569, "step": 1201 }, { "epoch": 0.7332621625743481, "grad_norm": 2.492255210876465, "learning_rate": 6.851684017146355e-06, "log_odds_chosen": 1.0313128232955933, "log_odds_ratio": -0.42885899543762207, "logits/chosen": -0.9924832582473755, "logits/rejected": -0.9718155264854431, "logps/chosen": -0.8360809683799744, "logps/rejected": -1.4941366910934448, "loss": 1.2166, "nll_loss": 1.3065414428710938, "rewards/accuracies": 1.0, "rewards/chosen": -0.08360809832811356, "rewards/margins": 0.0658055767416954, "rewards/rejected": -0.14941367506980896, "step": 1202 }, { "epoch": 0.7338721976513649, "grad_norm": 1.4687844514846802, "learning_rate": 6.850704225352112e-06, "log_odds_chosen": 1.5043786764144897, "log_odds_ratio": -0.4845198690891266, "logits/chosen": -0.7857572436332703, "logits/rejected": -0.8556846380233765, "logps/chosen": -0.6058898568153381, "logps/rejected": -1.7313352823257446, "loss": 1.0598, "nll_loss": 0.8213715553283691, "rewards/accuracies": 0.75, "rewards/chosen": -0.060588981956243515, "rewards/margins": 0.11254455149173737, "rewards/rejected": -0.17313352227210999, "step": 1203 }, { "epoch": 0.7344822327283819, "grad_norm": 6.215100288391113, "learning_rate": 6.8497244335578684e-06, "log_odds_chosen": 0.7247559428215027, "log_odds_ratio": -0.5231803059577942, "logits/chosen": -0.986579954624176, "logits/rejected": -0.9635564088821411, "logps/chosen": -0.8810257911682129, "logps/rejected": -1.390437364578247, "loss": 1.125, "nll_loss": 1.151649832725525, "rewards/accuracies": 0.875, "rewards/chosen": -0.08810258656740189, "rewards/margins": 0.05094114691019058, "rewards/rejected": -0.13904371857643127, "step": 1204 }, { "epoch": 0.7350922678053988, "grad_norm": 1.7879221439361572, "learning_rate": 6.8487446417636246e-06, "log_odds_chosen": 0.7220070958137512, "log_odds_ratio": -0.5868832468986511, "logits/chosen": -0.9089726209640503, "logits/rejected": -0.9315413236618042, "logps/chosen": -0.8238685131072998, "logps/rejected": -1.3518379926681519, "loss": 1.1244, "nll_loss": 1.1614066362380981, "rewards/accuracies": 0.5, "rewards/chosen": -0.08238685131072998, "rewards/margins": 0.052796948701143265, "rewards/rejected": -0.13518379628658295, "step": 1205 }, { "epoch": 0.7357023028824158, "grad_norm": 1.3966014385223389, "learning_rate": 6.8477648499693815e-06, "log_odds_chosen": 0.6926497220993042, "log_odds_ratio": -0.522009551525116, "logits/chosen": -1.1185822486877441, "logits/rejected": -1.0466573238372803, "logps/chosen": -0.7244850397109985, "logps/rejected": -1.2174535989761353, "loss": 1.0663, "nll_loss": 0.8747690916061401, "rewards/accuracies": 0.875, "rewards/chosen": -0.0724484995007515, "rewards/margins": 0.04929685592651367, "rewards/rejected": -0.12174535542726517, "step": 1206 }, { "epoch": 0.7363123379594326, "grad_norm": 1.067309021949768, "learning_rate": 6.846785058175138e-06, "log_odds_chosen": 1.2232286930084229, "log_odds_ratio": -0.42930859327316284, "logits/chosen": -0.9686050415039062, "logits/rejected": -0.6990951299667358, "logps/chosen": -0.7669467926025391, "logps/rejected": -1.6336110830307007, "loss": 1.2137, "nll_loss": 0.8744580149650574, "rewards/accuracies": 0.75, "rewards/chosen": -0.07669468969106674, "rewards/margins": 0.08666643500328064, "rewards/rejected": -0.16336111724376678, "step": 1207 }, { "epoch": 0.7369223730364496, "grad_norm": 1.4696295261383057, "learning_rate": 6.845805266380894e-06, "log_odds_chosen": 1.0238181352615356, "log_odds_ratio": -0.4769960045814514, "logits/chosen": -1.1289950609207153, "logits/rejected": -1.0197632312774658, "logps/chosen": -0.8593354225158691, "logps/rejected": -1.5329350233078003, "loss": 1.3488, "nll_loss": 1.4839341640472412, "rewards/accuracies": 0.75, "rewards/chosen": -0.08593355119228363, "rewards/margins": 0.06735996156930923, "rewards/rejected": -0.15329350531101227, "step": 1208 }, { "epoch": 0.7375324081134665, "grad_norm": 2.57099986076355, "learning_rate": 6.84482547458665e-06, "log_odds_chosen": 0.8613711595535278, "log_odds_ratio": -0.4621734023094177, "logits/chosen": -1.059037208557129, "logits/rejected": -0.9933327436447144, "logps/chosen": -0.8521682024002075, "logps/rejected": -1.4062108993530273, "loss": 1.0075, "nll_loss": 1.0420749187469482, "rewards/accuracies": 0.75, "rewards/chosen": -0.08521682024002075, "rewards/margins": 0.055404260754585266, "rewards/rejected": -0.1406210958957672, "step": 1209 }, { "epoch": 0.7381424431904835, "grad_norm": 1.3634835481643677, "learning_rate": 6.843845682792406e-06, "log_odds_chosen": 1.4982765913009644, "log_odds_ratio": -0.40757638216018677, "logits/chosen": -0.9442771077156067, "logits/rejected": -0.9025216102600098, "logps/chosen": -0.7423018217086792, "logps/rejected": -1.8074092864990234, "loss": 1.1643, "nll_loss": 1.0236413478851318, "rewards/accuracies": 0.625, "rewards/chosen": -0.07423018664121628, "rewards/margins": 0.10651074349880219, "rewards/rejected": -0.18074092268943787, "step": 1210 }, { "epoch": 0.7387524782675003, "grad_norm": 5.1550164222717285, "learning_rate": 6.842865890998163e-06, "log_odds_chosen": 0.4947620630264282, "log_odds_ratio": -0.6167159080505371, "logits/chosen": -0.9441032409667969, "logits/rejected": -0.9188375473022461, "logps/chosen": -0.9191725254058838, "logps/rejected": -1.2619149684906006, "loss": 1.1918, "nll_loss": 1.1927374601364136, "rewards/accuracies": 0.5, "rewards/chosen": -0.0919172465801239, "rewards/margins": 0.03427424281835556, "rewards/rejected": -0.12619149684906006, "step": 1211 }, { "epoch": 0.7393625133445173, "grad_norm": 1.7757471799850464, "learning_rate": 6.841886099203919e-06, "log_odds_chosen": 0.6218528747558594, "log_odds_ratio": -0.5891570448875427, "logits/chosen": -0.8885287046432495, "logits/rejected": -0.907221257686615, "logps/chosen": -0.7484859228134155, "logps/rejected": -1.1490111351013184, "loss": 1.0994, "nll_loss": 1.0528494119644165, "rewards/accuracies": 0.75, "rewards/chosen": -0.07484859228134155, "rewards/margins": 0.04005253314971924, "rewards/rejected": -0.11490112543106079, "step": 1212 }, { "epoch": 0.7399725484215343, "grad_norm": 1.2024848461151123, "learning_rate": 6.840906307409675e-06, "log_odds_chosen": 0.9531283378601074, "log_odds_ratio": -0.44372475147247314, "logits/chosen": -0.8060228824615479, "logits/rejected": -0.7237440943717957, "logps/chosen": -0.8701989054679871, "logps/rejected": -1.4453966617584229, "loss": 1.039, "nll_loss": 0.9357895851135254, "rewards/accuracies": 0.875, "rewards/chosen": -0.0870198905467987, "rewards/margins": 0.05751977860927582, "rewards/rejected": -0.14453968405723572, "step": 1213 }, { "epoch": 0.7405825834985512, "grad_norm": 2.24208927154541, "learning_rate": 6.839926515615431e-06, "log_odds_chosen": 1.0643081665039062, "log_odds_ratio": -0.5085029602050781, "logits/chosen": -0.9086275696754456, "logits/rejected": -1.0078065395355225, "logps/chosen": -0.6933543086051941, "logps/rejected": -1.4338428974151611, "loss": 1.0928, "nll_loss": 1.1533888578414917, "rewards/accuracies": 0.625, "rewards/chosen": -0.06933543086051941, "rewards/margins": 0.07404886931180954, "rewards/rejected": -0.14338430762290955, "step": 1214 }, { "epoch": 0.7411926185755681, "grad_norm": 1.4419682025909424, "learning_rate": 6.8389467238211875e-06, "log_odds_chosen": 1.3002136945724487, "log_odds_ratio": -0.4651225209236145, "logits/chosen": -1.0497666597366333, "logits/rejected": -0.9565054774284363, "logps/chosen": -0.8151912689208984, "logps/rejected": -1.8009830713272095, "loss": 1.1652, "nll_loss": 1.0581011772155762, "rewards/accuracies": 0.75, "rewards/chosen": -0.08151912689208984, "rewards/margins": 0.09857918322086334, "rewards/rejected": -0.18009831011295319, "step": 1215 }, { "epoch": 0.741802653652585, "grad_norm": 1.8716686964035034, "learning_rate": 6.837966932026944e-06, "log_odds_chosen": 0.8579006791114807, "log_odds_ratio": -0.41346293687820435, "logits/chosen": -0.8498449325561523, "logits/rejected": -1.0227744579315186, "logps/chosen": -0.7140198349952698, "logps/rejected": -1.1577461957931519, "loss": 1.1742, "nll_loss": 1.1669036149978638, "rewards/accuracies": 0.875, "rewards/chosen": -0.07140198349952698, "rewards/margins": 0.04437262937426567, "rewards/rejected": -0.11577461659908295, "step": 1216 }, { "epoch": 0.742412688729602, "grad_norm": 1.6167633533477783, "learning_rate": 6.8369871402327006e-06, "log_odds_chosen": 1.3570395708084106, "log_odds_ratio": -0.40913790464401245, "logits/chosen": -0.9532581567764282, "logits/rejected": -0.9562643766403198, "logps/chosen": -0.7486782073974609, "logps/rejected": -1.560848593711853, "loss": 1.0582, "nll_loss": 0.873612105846405, "rewards/accuracies": 0.625, "rewards/chosen": -0.07486782222986221, "rewards/margins": 0.08121703565120697, "rewards/rejected": -0.15608486533164978, "step": 1217 }, { "epoch": 0.7430227238066189, "grad_norm": 1.3368059396743774, "learning_rate": 6.836007348438457e-06, "log_odds_chosen": 1.1181678771972656, "log_odds_ratio": -0.478695809841156, "logits/chosen": -1.06923246383667, "logits/rejected": -0.9616477489471436, "logps/chosen": -0.7506374716758728, "logps/rejected": -1.541622281074524, "loss": 1.1414, "nll_loss": 0.9464243650436401, "rewards/accuracies": 0.625, "rewards/chosen": -0.07506375014781952, "rewards/margins": 0.07909848541021347, "rewards/rejected": -0.1541622281074524, "step": 1218 }, { "epoch": 0.7436327588836358, "grad_norm": 1.7251865863800049, "learning_rate": 6.835027556644212e-06, "log_odds_chosen": 1.0887588262557983, "log_odds_ratio": -0.5236433744430542, "logits/chosen": -1.0092885494232178, "logits/rejected": -0.9570202231407166, "logps/chosen": -0.8889316320419312, "logps/rejected": -1.7183854579925537, "loss": 1.054, "nll_loss": 1.0093822479248047, "rewards/accuracies": 0.625, "rewards/chosen": -0.08889316022396088, "rewards/margins": 0.08294539153575897, "rewards/rejected": -0.17183855175971985, "step": 1219 }, { "epoch": 0.7442427939606527, "grad_norm": 1.4851230382919312, "learning_rate": 6.834047764849969e-06, "log_odds_chosen": 1.2724292278289795, "log_odds_ratio": -0.3506382405757904, "logits/chosen": -0.7258316278457642, "logits/rejected": -0.8170667886734009, "logps/chosen": -0.6803751587867737, "logps/rejected": -1.481781005859375, "loss": 1.2079, "nll_loss": 0.9376446008682251, "rewards/accuracies": 1.0, "rewards/chosen": -0.06803752481937408, "rewards/margins": 0.08014058321714401, "rewards/rejected": -0.1481781005859375, "step": 1220 }, { "epoch": 0.7448528290376697, "grad_norm": 1.952075719833374, "learning_rate": 6.833067973055725e-06, "log_odds_chosen": 0.4176218807697296, "log_odds_ratio": -0.6719907522201538, "logits/chosen": -1.0138037204742432, "logits/rejected": -1.0251251459121704, "logps/chosen": -0.857676088809967, "logps/rejected": -1.094146728515625, "loss": 1.0956, "nll_loss": 1.1818616390228271, "rewards/accuracies": 0.625, "rewards/chosen": -0.08576761186122894, "rewards/margins": 0.023647062480449677, "rewards/rejected": -0.10941467434167862, "step": 1221 }, { "epoch": 0.7454628641146865, "grad_norm": 1.317821979522705, "learning_rate": 6.832088181261482e-06, "log_odds_chosen": 1.4861129522323608, "log_odds_ratio": -0.3172144293785095, "logits/chosen": -0.7690767645835876, "logits/rejected": -0.9515212774276733, "logps/chosen": -0.7307243347167969, "logps/rejected": -1.6676275730133057, "loss": 1.0774, "nll_loss": 0.791795551776886, "rewards/accuracies": 0.875, "rewards/chosen": -0.07307244092226028, "rewards/margins": 0.09369032084941864, "rewards/rejected": -0.16676276922225952, "step": 1222 }, { "epoch": 0.7460728991917035, "grad_norm": 2.2409515380859375, "learning_rate": 6.831108389467238e-06, "log_odds_chosen": -0.4490423798561096, "log_odds_ratio": -0.981819748878479, "logits/chosen": -1.1738699674606323, "logits/rejected": -1.0959300994873047, "logps/chosen": -1.0878620147705078, "logps/rejected": -0.8108183741569519, "loss": 1.2621, "nll_loss": 1.1590322256088257, "rewards/accuracies": 0.25, "rewards/chosen": -0.1087861955165863, "rewards/margins": -0.027704361826181412, "rewards/rejected": -0.0810818299651146, "step": 1223 }, { "epoch": 0.7466829342687205, "grad_norm": 1.6447269916534424, "learning_rate": 6.830128597672994e-06, "log_odds_chosen": 0.8096188306808472, "log_odds_ratio": -0.5755710005760193, "logits/chosen": -1.0731163024902344, "logits/rejected": -0.9618271589279175, "logps/chosen": -0.9205623865127563, "logps/rejected": -1.5336809158325195, "loss": 1.2762, "nll_loss": 1.0623698234558105, "rewards/accuracies": 0.5, "rewards/chosen": -0.09205624461174011, "rewards/margins": 0.06131184101104736, "rewards/rejected": -0.15336808562278748, "step": 1224 }, { "epoch": 0.7472929693457374, "grad_norm": 5.499212741851807, "learning_rate": 6.82914880587875e-06, "log_odds_chosen": -0.02546098828315735, "log_odds_ratio": -0.8658964037895203, "logits/chosen": -0.8429945111274719, "logits/rejected": -0.9892422556877136, "logps/chosen": -0.8782151937484741, "logps/rejected": -0.9569973349571228, "loss": 1.1639, "nll_loss": 1.0259430408477783, "rewards/accuracies": 0.625, "rewards/chosen": -0.08782152086496353, "rewards/margins": 0.007878215983510017, "rewards/rejected": -0.0956997349858284, "step": 1225 }, { "epoch": 0.7479030044227543, "grad_norm": 1.32676100730896, "learning_rate": 6.8281690140845065e-06, "log_odds_chosen": 0.9171837568283081, "log_odds_ratio": -0.5375332236289978, "logits/chosen": -1.1387232542037964, "logits/rejected": -0.9218088388442993, "logps/chosen": -0.820766806602478, "logps/rejected": -1.4365718364715576, "loss": 1.0576, "nll_loss": 0.9184183478355408, "rewards/accuracies": 0.5, "rewards/chosen": -0.08207667618989944, "rewards/margins": 0.06158049404621124, "rewards/rejected": -0.14365717768669128, "step": 1226 }, { "epoch": 0.7485130394997712, "grad_norm": 1.9639328718185425, "learning_rate": 6.827189222290263e-06, "log_odds_chosen": 1.752274990081787, "log_odds_ratio": -0.27716338634490967, "logits/chosen": -0.8887650966644287, "logits/rejected": -0.883141279220581, "logps/chosen": -0.5652143955230713, "logps/rejected": -1.8002376556396484, "loss": 1.0642, "nll_loss": 0.9320122003555298, "rewards/accuracies": 1.0, "rewards/chosen": -0.05652143433690071, "rewards/margins": 0.12350234389305115, "rewards/rejected": -0.18002377450466156, "step": 1227 }, { "epoch": 0.7491230745767882, "grad_norm": 1.6160136461257935, "learning_rate": 6.82620943049602e-06, "log_odds_chosen": 0.5007943511009216, "log_odds_ratio": -0.7208665013313293, "logits/chosen": -1.1267238855361938, "logits/rejected": -0.9400234222412109, "logps/chosen": -1.0735572576522827, "logps/rejected": -1.3548593521118164, "loss": 1.2056, "nll_loss": 1.299741268157959, "rewards/accuracies": 0.25, "rewards/chosen": -0.10735572874546051, "rewards/margins": 0.028130214661359787, "rewards/rejected": -0.1354859471321106, "step": 1228 }, { "epoch": 0.7497331096538051, "grad_norm": 1.6047085523605347, "learning_rate": 6.825229638701776e-06, "log_odds_chosen": 0.2817487418651581, "log_odds_ratio": -0.736321747303009, "logits/chosen": -1.0437707901000977, "logits/rejected": -1.038405179977417, "logps/chosen": -0.9091626405715942, "logps/rejected": -1.081868052482605, "loss": 1.2034, "nll_loss": 1.0651307106018066, "rewards/accuracies": 0.375, "rewards/chosen": -0.09091626852750778, "rewards/margins": 0.01727055013179779, "rewards/rejected": -0.10818681120872498, "step": 1229 }, { "epoch": 0.750343144730822, "grad_norm": 1.4285250902175903, "learning_rate": 6.824249846907532e-06, "log_odds_chosen": 1.0961674451828003, "log_odds_ratio": -0.5301420092582703, "logits/chosen": -0.723745584487915, "logits/rejected": -0.8679084777832031, "logps/chosen": -0.7617778182029724, "logps/rejected": -1.4969682693481445, "loss": 1.0956, "nll_loss": 0.940353274345398, "rewards/accuracies": 0.5, "rewards/chosen": -0.07617778331041336, "rewards/margins": 0.07351905107498169, "rewards/rejected": -0.14969685673713684, "step": 1230 }, { "epoch": 0.7509531798078389, "grad_norm": 1.3500897884368896, "learning_rate": 6.823270055113288e-06, "log_odds_chosen": 0.40754157304763794, "log_odds_ratio": -0.578765332698822, "logits/chosen": -0.963153600692749, "logits/rejected": -0.92830491065979, "logps/chosen": -0.9525710940361023, "logps/rejected": -1.1632897853851318, "loss": 1.042, "nll_loss": 1.0489143133163452, "rewards/accuracies": 0.625, "rewards/chosen": -0.09525710344314575, "rewards/margins": 0.02107187733054161, "rewards/rejected": -0.11632898449897766, "step": 1231 }, { "epoch": 0.7515632148848559, "grad_norm": 0.9519506096839905, "learning_rate": 6.822290263319044e-06, "log_odds_chosen": 0.5707884430885315, "log_odds_ratio": -0.5892797708511353, "logits/chosen": -0.9382237792015076, "logits/rejected": -0.8712984323501587, "logps/chosen": -0.7321672439575195, "logps/rejected": -1.1354613304138184, "loss": 1.2616, "nll_loss": 1.0756512880325317, "rewards/accuracies": 0.625, "rewards/chosen": -0.07321672886610031, "rewards/margins": 0.040329400449991226, "rewards/rejected": -0.11354613304138184, "step": 1232 }, { "epoch": 0.7521732499618728, "grad_norm": 1.5446559190750122, "learning_rate": 6.821310471524801e-06, "log_odds_chosen": 1.3658746480941772, "log_odds_ratio": -0.5349934697151184, "logits/chosen": -0.9601951837539673, "logits/rejected": -0.6880537271499634, "logps/chosen": -0.8073532581329346, "logps/rejected": -1.8100848197937012, "loss": 1.1468, "nll_loss": 0.9793766736984253, "rewards/accuracies": 0.625, "rewards/chosen": -0.08073532581329346, "rewards/margins": 0.10027315467596054, "rewards/rejected": -0.1810084730386734, "step": 1233 }, { "epoch": 0.7527832850388897, "grad_norm": 7.411893367767334, "learning_rate": 6.820330679730557e-06, "log_odds_chosen": 0.5109868049621582, "log_odds_ratio": -0.5291246771812439, "logits/chosen": -0.7526789307594299, "logits/rejected": -0.7201439142227173, "logps/chosen": -0.8622456192970276, "logps/rejected": -1.1333715915679932, "loss": 1.1316, "nll_loss": 1.0893713235855103, "rewards/accuracies": 0.625, "rewards/chosen": -0.08622457087039948, "rewards/margins": 0.027112584561109543, "rewards/rejected": -0.11333715170621872, "step": 1234 }, { "epoch": 0.7533933201159067, "grad_norm": 1.4388329982757568, "learning_rate": 6.819350887936313e-06, "log_odds_chosen": 0.5849480032920837, "log_odds_ratio": -0.5820132493972778, "logits/chosen": -1.0635586977005005, "logits/rejected": -1.083251714706421, "logps/chosen": -0.8281629085540771, "logps/rejected": -1.2727560997009277, "loss": 1.1389, "nll_loss": 1.176254391670227, "rewards/accuracies": 0.625, "rewards/chosen": -0.08281629532575607, "rewards/margins": 0.04445930942893028, "rewards/rejected": -0.12727561593055725, "step": 1235 }, { "epoch": 0.7540033551929236, "grad_norm": 1.5419400930404663, "learning_rate": 6.8183710961420694e-06, "log_odds_chosen": 0.7160145044326782, "log_odds_ratio": -0.5318560004234314, "logits/chosen": -0.8018890023231506, "logits/rejected": -0.7849745750427246, "logps/chosen": -0.7852876782417297, "logps/rejected": -1.1760106086730957, "loss": 1.0436, "nll_loss": 0.8760011792182922, "rewards/accuracies": 0.75, "rewards/chosen": -0.07852876931428909, "rewards/margins": 0.03907228261232376, "rewards/rejected": -0.11760105192661285, "step": 1236 }, { "epoch": 0.7546133902699406, "grad_norm": 2.2968504428863525, "learning_rate": 6.8173913043478256e-06, "log_odds_chosen": 0.3539128303527832, "log_odds_ratio": -0.6865154504776001, "logits/chosen": -0.9862709045410156, "logits/rejected": -0.8870136141777039, "logps/chosen": -0.964532196521759, "logps/rejected": -1.229479193687439, "loss": 1.1361, "nll_loss": 1.1737232208251953, "rewards/accuracies": 0.375, "rewards/chosen": -0.0964532196521759, "rewards/margins": 0.026494696736335754, "rewards/rejected": -0.12294792383909225, "step": 1237 }, { "epoch": 0.7552234253469574, "grad_norm": 1.9677997827529907, "learning_rate": 6.816411512553582e-06, "log_odds_chosen": 1.0195807218551636, "log_odds_ratio": -0.4004848003387451, "logits/chosen": -1.0581060647964478, "logits/rejected": -0.8081938028335571, "logps/chosen": -0.8798478841781616, "logps/rejected": -1.5323386192321777, "loss": 1.1958, "nll_loss": 1.260862946510315, "rewards/accuracies": 0.875, "rewards/chosen": -0.08798478543758392, "rewards/margins": 0.06524907797574997, "rewards/rejected": -0.15323388576507568, "step": 1238 }, { "epoch": 0.7558334604239744, "grad_norm": 1.5975639820098877, "learning_rate": 6.815431720759339e-06, "log_odds_chosen": 1.5605748891830444, "log_odds_ratio": -0.4092845320701599, "logits/chosen": -0.6442502737045288, "logits/rejected": -0.7332244515419006, "logps/chosen": -0.6969619989395142, "logps/rejected": -1.815773606300354, "loss": 1.0482, "nll_loss": 0.9819337129592896, "rewards/accuracies": 0.625, "rewards/chosen": -0.06969620287418365, "rewards/margins": 0.11188116669654846, "rewards/rejected": -0.18157736957073212, "step": 1239 }, { "epoch": 0.7564434955009913, "grad_norm": 2.5495693683624268, "learning_rate": 6.814451928965095e-06, "log_odds_chosen": 1.0224251747131348, "log_odds_ratio": -0.4695611596107483, "logits/chosen": -0.9925297498703003, "logits/rejected": -0.975035548210144, "logps/chosen": -0.6777691841125488, "logps/rejected": -1.21194589138031, "loss": 1.1867, "nll_loss": 1.063812494277954, "rewards/accuracies": 0.75, "rewards/chosen": -0.06777691841125488, "rewards/margins": 0.053417667746543884, "rewards/rejected": -0.12119458615779877, "step": 1240 }, { "epoch": 0.7570535305780083, "grad_norm": 2.7763712406158447, "learning_rate": 6.813472137170851e-06, "log_odds_chosen": 0.5487938523292542, "log_odds_ratio": -0.6409933567047119, "logits/chosen": -1.0079047679901123, "logits/rejected": -0.9913309812545776, "logps/chosen": -0.9252363443374634, "logps/rejected": -1.358654260635376, "loss": 1.1663, "nll_loss": 1.0551830530166626, "rewards/accuracies": 0.5, "rewards/chosen": -0.09252364933490753, "rewards/margins": 0.04334178939461708, "rewards/rejected": -0.13586542010307312, "step": 1241 }, { "epoch": 0.7576635656550251, "grad_norm": 10.478577613830566, "learning_rate": 6.812492345376607e-06, "log_odds_chosen": 0.33109620213508606, "log_odds_ratio": -0.5641742944717407, "logits/chosen": -0.8992449641227722, "logits/rejected": -0.8832610249519348, "logps/chosen": -0.9927308559417725, "logps/rejected": -1.1920729875564575, "loss": 1.147, "nll_loss": 1.1945948600769043, "rewards/accuracies": 0.75, "rewards/chosen": -0.09927307814359665, "rewards/margins": 0.019934214651584625, "rewards/rejected": -0.11920729279518127, "step": 1242 }, { "epoch": 0.7582736007320421, "grad_norm": 1.3498849868774414, "learning_rate": 6.811512553582363e-06, "log_odds_chosen": 1.1753326654434204, "log_odds_ratio": -0.5886012315750122, "logits/chosen": -0.8138304948806763, "logits/rejected": -0.7568641901016235, "logps/chosen": -0.831272304058075, "logps/rejected": -1.707273244857788, "loss": 1.1978, "nll_loss": 1.1289796829223633, "rewards/accuracies": 0.625, "rewards/chosen": -0.0831272304058075, "rewards/margins": 0.08760011941194534, "rewards/rejected": -0.17072735726833344, "step": 1243 }, { "epoch": 0.758883635809059, "grad_norm": 4.632528781890869, "learning_rate": 6.810532761788119e-06, "log_odds_chosen": 1.2892645597457886, "log_odds_ratio": -0.3969135284423828, "logits/chosen": -0.7637213468551636, "logits/rejected": -0.9364103078842163, "logps/chosen": -0.7080016136169434, "logps/rejected": -1.549295425415039, "loss": 1.1322, "nll_loss": 0.9769567251205444, "rewards/accuracies": 0.875, "rewards/chosen": -0.07080015540122986, "rewards/margins": 0.08412938565015793, "rewards/rejected": -0.1549295336008072, "step": 1244 }, { "epoch": 0.759493670886076, "grad_norm": 1.5161769390106201, "learning_rate": 6.809552969993876e-06, "log_odds_chosen": 0.6038503646850586, "log_odds_ratio": -0.49986976385116577, "logits/chosen": -0.9872438907623291, "logits/rejected": -0.9454985857009888, "logps/chosen": -0.9348065853118896, "logps/rejected": -1.3024386167526245, "loss": 1.0243, "nll_loss": 1.054766297340393, "rewards/accuracies": 0.75, "rewards/chosen": -0.0934806615114212, "rewards/margins": 0.03676320239901543, "rewards/rejected": -0.13024386763572693, "step": 1245 }, { "epoch": 0.7601037059630928, "grad_norm": 4.183884143829346, "learning_rate": 6.808573178199632e-06, "log_odds_chosen": 1.4682786464691162, "log_odds_ratio": -0.40462058782577515, "logits/chosen": -0.8457642793655396, "logits/rejected": -0.8769279718399048, "logps/chosen": -0.6657716631889343, "logps/rejected": -1.6005866527557373, "loss": 1.2167, "nll_loss": 0.9580233097076416, "rewards/accuracies": 0.75, "rewards/chosen": -0.06657716631889343, "rewards/margins": 0.09348149597644806, "rewards/rejected": -0.1600586473941803, "step": 1246 }, { "epoch": 0.7607137410401098, "grad_norm": 1.1230095624923706, "learning_rate": 6.807593386405389e-06, "log_odds_chosen": 0.5527480840682983, "log_odds_ratio": -0.7261210680007935, "logits/chosen": -1.139204502105713, "logits/rejected": -1.0068446397781372, "logps/chosen": -0.9297029972076416, "logps/rejected": -1.3563416004180908, "loss": 1.1297, "nll_loss": 1.2120940685272217, "rewards/accuracies": 0.5, "rewards/chosen": -0.09297031164169312, "rewards/margins": 0.04266386106610298, "rewards/rejected": -0.1356341689825058, "step": 1247 }, { "epoch": 0.7613237761171268, "grad_norm": 1.4298099279403687, "learning_rate": 6.806613594611145e-06, "log_odds_chosen": 1.6377625465393066, "log_odds_ratio": -0.2289559245109558, "logits/chosen": -0.7962802648544312, "logits/rejected": -0.5589193105697632, "logps/chosen": -0.5120678544044495, "logps/rejected": -1.511116862297058, "loss": 1.1192, "nll_loss": 0.7287614345550537, "rewards/accuracies": 1.0, "rewards/chosen": -0.051206789910793304, "rewards/margins": 0.09990490227937698, "rewards/rejected": -0.1511116921901703, "step": 1248 }, { "epoch": 0.7619338111941437, "grad_norm": 1.7442259788513184, "learning_rate": 6.805633802816901e-06, "log_odds_chosen": 0.9203789234161377, "log_odds_ratio": -0.4199652373790741, "logits/chosen": -0.9000710248947144, "logits/rejected": -0.8364545106887817, "logps/chosen": -0.7863472700119019, "logps/rejected": -1.319075584411621, "loss": 1.3036, "nll_loss": 1.2258416414260864, "rewards/accuracies": 0.875, "rewards/chosen": -0.07863472402095795, "rewards/margins": 0.05327284336090088, "rewards/rejected": -0.13190756738185883, "step": 1249 }, { "epoch": 0.7625438462711606, "grad_norm": 7.052353382110596, "learning_rate": 6.804654011022658e-06, "log_odds_chosen": 1.0469342470169067, "log_odds_ratio": -0.3805442452430725, "logits/chosen": -1.137779951095581, "logits/rejected": -1.074167251586914, "logps/chosen": -0.9281176328659058, "logps/rejected": -1.6920775175094604, "loss": 1.2359, "nll_loss": 1.2150204181671143, "rewards/accuracies": 0.875, "rewards/chosen": -0.09281177073717117, "rewards/margins": 0.07639598101377487, "rewards/rejected": -0.16920775175094604, "step": 1250 }, { "epoch": 0.7631538813481775, "grad_norm": 1.4997386932373047, "learning_rate": 6.803674219228414e-06, "log_odds_chosen": 1.0068814754486084, "log_odds_ratio": -0.4310882091522217, "logits/chosen": -0.9119139909744263, "logits/rejected": -0.9528170824050903, "logps/chosen": -0.7612082362174988, "logps/rejected": -1.3711450099945068, "loss": 1.1348, "nll_loss": 1.0742994546890259, "rewards/accuracies": 0.75, "rewards/chosen": -0.07612082362174988, "rewards/margins": 0.060993678867816925, "rewards/rejected": -0.1371144950389862, "step": 1251 }, { "epoch": 0.7637639164251945, "grad_norm": 1.5866376161575317, "learning_rate": 6.80269442743417e-06, "log_odds_chosen": 0.45400920510292053, "log_odds_ratio": -0.6428136825561523, "logits/chosen": -1.0832723379135132, "logits/rejected": -0.9281458258628845, "logps/chosen": -0.839452862739563, "logps/rejected": -1.1523663997650146, "loss": 1.2083, "nll_loss": 1.3359651565551758, "rewards/accuracies": 0.5, "rewards/chosen": -0.08394529670476913, "rewards/margins": 0.03129134327173233, "rewards/rejected": -0.11523663252592087, "step": 1252 }, { "epoch": 0.7643739515022113, "grad_norm": 1.31661856174469, "learning_rate": 6.801714635639927e-06, "log_odds_chosen": 0.8013429045677185, "log_odds_ratio": -0.47116073966026306, "logits/chosen": -0.7465466856956482, "logits/rejected": -0.831333339214325, "logps/chosen": -0.6861192584037781, "logps/rejected": -1.2046701908111572, "loss": 0.9645, "nll_loss": 0.8864420056343079, "rewards/accuracies": 0.625, "rewards/chosen": -0.06861192733049393, "rewards/margins": 0.05185509845614433, "rewards/rejected": -0.12046702206134796, "step": 1253 }, { "epoch": 0.7649839865792283, "grad_norm": 1.3230125904083252, "learning_rate": 6.800734843845682e-06, "log_odds_chosen": -0.19955861568450928, "log_odds_ratio": -0.9642066955566406, "logits/chosen": -0.8927576541900635, "logits/rejected": -0.887553334236145, "logps/chosen": -0.9130954742431641, "logps/rejected": -0.8287105560302734, "loss": 1.045, "nll_loss": 1.12814199924469, "rewards/accuracies": 0.25, "rewards/chosen": -0.0913095474243164, "rewards/margins": -0.008438493125140667, "rewards/rejected": -0.08287104964256287, "step": 1254 }, { "epoch": 0.7655940216562452, "grad_norm": 2.497358798980713, "learning_rate": 6.799755052051438e-06, "log_odds_chosen": 0.9262201189994812, "log_odds_ratio": -0.5293926000595093, "logits/chosen": -0.9627757668495178, "logits/rejected": -0.9286614060401917, "logps/chosen": -1.0059423446655273, "logps/rejected": -1.5930877923965454, "loss": 1.2388, "nll_loss": 1.2065545320510864, "rewards/accuracies": 0.75, "rewards/chosen": -0.10059423744678497, "rewards/margins": 0.05871454253792763, "rewards/rejected": -0.1593087762594223, "step": 1255 }, { "epoch": 0.7662040567332622, "grad_norm": 2.008523464202881, "learning_rate": 6.798775260257195e-06, "log_odds_chosen": 1.173075556755066, "log_odds_ratio": -0.38528192043304443, "logits/chosen": -0.8537023067474365, "logits/rejected": -0.7918007969856262, "logps/chosen": -0.6993890404701233, "logps/rejected": -1.4505215883255005, "loss": 1.0728, "nll_loss": 0.9011071920394897, "rewards/accuracies": 1.0, "rewards/chosen": -0.06993890553712845, "rewards/margins": 0.07511326670646667, "rewards/rejected": -0.14505216479301453, "step": 1256 }, { "epoch": 0.766814091810279, "grad_norm": 1.1889755725860596, "learning_rate": 6.797795468462951e-06, "log_odds_chosen": 2.328763246536255, "log_odds_ratio": -0.26231318712234497, "logits/chosen": -0.9230266809463501, "logits/rejected": -0.9101105332374573, "logps/chosen": -0.5617315769195557, "logps/rejected": -2.2975635528564453, "loss": 0.9647, "nll_loss": 0.886939287185669, "rewards/accuracies": 0.75, "rewards/chosen": -0.05617315694689751, "rewards/margins": 0.17358320951461792, "rewards/rejected": -0.22975635528564453, "step": 1257 }, { "epoch": 0.767424126887296, "grad_norm": 1.8528523445129395, "learning_rate": 6.7968156766687075e-06, "log_odds_chosen": 1.2759888172149658, "log_odds_ratio": -0.4406035840511322, "logits/chosen": -0.8377573490142822, "logits/rejected": -0.9757012128829956, "logps/chosen": -0.9952411651611328, "logps/rejected": -2.016599178314209, "loss": 1.1668, "nll_loss": 1.2314401865005493, "rewards/accuracies": 0.75, "rewards/chosen": -0.09952412545681, "rewards/margins": 0.10213577747344971, "rewards/rejected": -0.2016599029302597, "step": 1258 }, { "epoch": 0.768034161964313, "grad_norm": 1.4157911539077759, "learning_rate": 6.795835884874464e-06, "log_odds_chosen": 1.6558177471160889, "log_odds_ratio": -0.26634255051612854, "logits/chosen": -0.9132752418518066, "logits/rejected": -0.6183645129203796, "logps/chosen": -0.824556291103363, "logps/rejected": -2.0722243785858154, "loss": 1.1028, "nll_loss": 0.96193927526474, "rewards/accuracies": 0.875, "rewards/chosen": -0.08245562762022018, "rewards/margins": 0.12476681172847748, "rewards/rejected": -0.20722246170043945, "step": 1259 }, { "epoch": 0.7686441970413299, "grad_norm": 1.7281244993209839, "learning_rate": 6.79485609308022e-06, "log_odds_chosen": 0.25505760312080383, "log_odds_ratio": -0.7927278876304626, "logits/chosen": -1.0227365493774414, "logits/rejected": -0.8208057284355164, "logps/chosen": -1.1737264394760132, "logps/rejected": -1.2897624969482422, "loss": 1.2246, "nll_loss": 1.20357084274292, "rewards/accuracies": 0.625, "rewards/chosen": -0.11737264692783356, "rewards/margins": 0.011603601276874542, "rewards/rejected": -0.1289762556552887, "step": 1260 }, { "epoch": 0.7692542321183468, "grad_norm": 1.9025169610977173, "learning_rate": 6.793876301285977e-06, "log_odds_chosen": 1.0630764961242676, "log_odds_ratio": -0.4958150088787079, "logits/chosen": -0.8985300660133362, "logits/rejected": -0.8530818223953247, "logps/chosen": -0.8832361698150635, "logps/rejected": -1.7418875694274902, "loss": 1.327, "nll_loss": 1.296839714050293, "rewards/accuracies": 0.75, "rewards/chosen": -0.08832362294197083, "rewards/margins": 0.08586514741182327, "rewards/rejected": -0.1741887629032135, "step": 1261 }, { "epoch": 0.7698642671953637, "grad_norm": 2.477750301361084, "learning_rate": 6.792896509491733e-06, "log_odds_chosen": 1.0817790031433105, "log_odds_ratio": -0.4789431691169739, "logits/chosen": -1.0097476243972778, "logits/rejected": -0.8642555475234985, "logps/chosen": -0.983481228351593, "logps/rejected": -1.7398216724395752, "loss": 1.1837, "nll_loss": 1.2067279815673828, "rewards/accuracies": 0.75, "rewards/chosen": -0.09834812581539154, "rewards/margins": 0.07563403993844986, "rewards/rejected": -0.173982173204422, "step": 1262 }, { "epoch": 0.7704743022723807, "grad_norm": 1.3721189498901367, "learning_rate": 6.791916717697489e-06, "log_odds_chosen": 1.1624782085418701, "log_odds_ratio": -0.4025757312774658, "logits/chosen": -0.7359796166419983, "logits/rejected": -0.7431483268737793, "logps/chosen": -0.6440922021865845, "logps/rejected": -1.373360514640808, "loss": 1.1741, "nll_loss": 0.8405295610427856, "rewards/accuracies": 0.75, "rewards/chosen": -0.06440922617912292, "rewards/margins": 0.072926826775074, "rewards/rejected": -0.13733604550361633, "step": 1263 }, { "epoch": 0.7710843373493976, "grad_norm": 1.9137805700302124, "learning_rate": 6.790936925903246e-06, "log_odds_chosen": 0.999713122844696, "log_odds_ratio": -0.4777379035949707, "logits/chosen": -0.7550036311149597, "logits/rejected": -0.999810516834259, "logps/chosen": -0.7296668291091919, "logps/rejected": -1.4231325387954712, "loss": 0.9958, "nll_loss": 0.8996728658676147, "rewards/accuracies": 0.75, "rewards/chosen": -0.07296667993068695, "rewards/margins": 0.0693465918302536, "rewards/rejected": -0.14231325685977936, "step": 1264 }, { "epoch": 0.7716943724264145, "grad_norm": 1.7256122827529907, "learning_rate": 6.789957134109001e-06, "log_odds_chosen": 1.377347707748413, "log_odds_ratio": -0.35049182176589966, "logits/chosen": -0.8041257858276367, "logits/rejected": -0.8869597911834717, "logps/chosen": -0.8531670570373535, "logps/rejected": -1.8535746335983276, "loss": 1.0281, "nll_loss": 1.0627636909484863, "rewards/accuracies": 1.0, "rewards/chosen": -0.08531670272350311, "rewards/margins": 0.1000407487154007, "rewards/rejected": -0.185357466340065, "step": 1265 }, { "epoch": 0.7723044075034314, "grad_norm": 1.4207096099853516, "learning_rate": 6.788977342314757e-06, "log_odds_chosen": 2.059678792953491, "log_odds_ratio": -0.2790129780769348, "logits/chosen": -0.7055413722991943, "logits/rejected": -0.8390336036682129, "logps/chosen": -0.6775014400482178, "logps/rejected": -2.0660016536712646, "loss": 0.9558, "nll_loss": 0.8272958993911743, "rewards/accuracies": 0.875, "rewards/chosen": -0.06775014102458954, "rewards/margins": 0.13885001838207245, "rewards/rejected": -0.206600159406662, "step": 1266 }, { "epoch": 0.7729144425804484, "grad_norm": 1.8664073944091797, "learning_rate": 6.787997550520514e-06, "log_odds_chosen": 0.5967732667922974, "log_odds_ratio": -0.5651599168777466, "logits/chosen": -0.9352601766586304, "logits/rejected": -0.8387495279312134, "logps/chosen": -0.7993851900100708, "logps/rejected": -1.2933588027954102, "loss": 0.9517, "nll_loss": 0.9091963768005371, "rewards/accuracies": 0.75, "rewards/chosen": -0.07993852347135544, "rewards/margins": 0.049397364258766174, "rewards/rejected": -0.12933588027954102, "step": 1267 }, { "epoch": 0.7735244776574653, "grad_norm": 1.5329726934432983, "learning_rate": 6.7870177587262704e-06, "log_odds_chosen": 1.2553616762161255, "log_odds_ratio": -0.45948493480682373, "logits/chosen": -1.071832299232483, "logits/rejected": -0.9922913312911987, "logps/chosen": -0.8764066100120544, "logps/rejected": -1.7724554538726807, "loss": 1.1125, "nll_loss": 1.1613085269927979, "rewards/accuracies": 0.75, "rewards/chosen": -0.0876406580209732, "rewards/margins": 0.08960489183664322, "rewards/rejected": -0.17724555730819702, "step": 1268 }, { "epoch": 0.7741345127344822, "grad_norm": 2.1226253509521484, "learning_rate": 6.7860379669320266e-06, "log_odds_chosen": 1.6407954692840576, "log_odds_ratio": -0.31245678663253784, "logits/chosen": -0.9255495071411133, "logits/rejected": -0.9117434024810791, "logps/chosen": -0.5675399303436279, "logps/rejected": -1.504948616027832, "loss": 1.2592, "nll_loss": 1.056009292602539, "rewards/accuracies": 0.75, "rewards/chosen": -0.05675399675965309, "rewards/margins": 0.09374086558818817, "rewards/rejected": -0.15049487352371216, "step": 1269 }, { "epoch": 0.7747445478114992, "grad_norm": 2.4525437355041504, "learning_rate": 6.7850581751377835e-06, "log_odds_chosen": 0.6527649164199829, "log_odds_ratio": -0.6870501637458801, "logits/chosen": -1.1165963411331177, "logits/rejected": -1.1033090353012085, "logps/chosen": -1.003313422203064, "logps/rejected": -1.5099157094955444, "loss": 1.1181, "nll_loss": 1.242093563079834, "rewards/accuracies": 0.375, "rewards/chosen": -0.10033133625984192, "rewards/margins": 0.050660230219364166, "rewards/rejected": -0.1509915590286255, "step": 1270 }, { "epoch": 0.7753545828885161, "grad_norm": 2.250605583190918, "learning_rate": 6.784078383343539e-06, "log_odds_chosen": 0.7048159837722778, "log_odds_ratio": -0.6052714586257935, "logits/chosen": -0.9550564289093018, "logits/rejected": -1.0026196241378784, "logps/chosen": -0.8000485897064209, "logps/rejected": -1.2176382541656494, "loss": 1.2747, "nll_loss": 1.2251489162445068, "rewards/accuracies": 0.5, "rewards/chosen": -0.08000487089157104, "rewards/margins": 0.041758958250284195, "rewards/rejected": -0.12176381796598434, "step": 1271 }, { "epoch": 0.7759646179655331, "grad_norm": 1.1164284944534302, "learning_rate": 6.783098591549295e-06, "log_odds_chosen": 1.1807857751846313, "log_odds_ratio": -0.4042457342147827, "logits/chosen": -0.7846145629882812, "logits/rejected": -0.8968318700790405, "logps/chosen": -0.6105986833572388, "logps/rejected": -1.3602662086486816, "loss": 1.0658, "nll_loss": 0.7518090009689331, "rewards/accuracies": 0.875, "rewards/chosen": -0.0610598661005497, "rewards/margins": 0.07496676594018936, "rewards/rejected": -0.13602663576602936, "step": 1272 }, { "epoch": 0.7765746530425499, "grad_norm": 1.6152141094207764, "learning_rate": 6.782118799755052e-06, "log_odds_chosen": 1.4463176727294922, "log_odds_ratio": -0.4140373170375824, "logits/chosen": -0.8469927906990051, "logits/rejected": -0.873870313167572, "logps/chosen": -0.5429831743240356, "logps/rejected": -1.414610505104065, "loss": 1.0208, "nll_loss": 0.7722726464271545, "rewards/accuracies": 0.75, "rewards/chosen": -0.054298318922519684, "rewards/margins": 0.08716273307800293, "rewards/rejected": -0.14146104454994202, "step": 1273 }, { "epoch": 0.7771846881195669, "grad_norm": 2.169219493865967, "learning_rate": 6.781139007960808e-06, "log_odds_chosen": 1.7630268335342407, "log_odds_ratio": -0.25651735067367554, "logits/chosen": -0.6420461535453796, "logits/rejected": -0.7177380323410034, "logps/chosen": -0.6968342065811157, "logps/rejected": -1.911059021949768, "loss": 1.1161, "nll_loss": 0.8820439577102661, "rewards/accuracies": 1.0, "rewards/chosen": -0.06968341767787933, "rewards/margins": 0.12142248451709747, "rewards/rejected": -0.1911059021949768, "step": 1274 }, { "epoch": 0.7777947231965838, "grad_norm": 1.454458236694336, "learning_rate": 6.780159216166565e-06, "log_odds_chosen": 1.1289238929748535, "log_odds_ratio": -0.460309237241745, "logits/chosen": -0.910642683506012, "logits/rejected": -1.002609372138977, "logps/chosen": -0.6385757923126221, "logps/rejected": -1.4001920223236084, "loss": 1.2463, "nll_loss": 1.1142464876174927, "rewards/accuracies": 0.75, "rewards/chosen": -0.06385758519172668, "rewards/margins": 0.07616162300109863, "rewards/rejected": -0.14001920819282532, "step": 1275 }, { "epoch": 0.7784047582736008, "grad_norm": 3.436091661453247, "learning_rate": 6.77917942437232e-06, "log_odds_chosen": 0.9246940016746521, "log_odds_ratio": -0.5329384207725525, "logits/chosen": -1.0863292217254639, "logits/rejected": -1.0383188724517822, "logps/chosen": -0.8276329040527344, "logps/rejected": -1.5264031887054443, "loss": 1.0916, "nll_loss": 1.330397129058838, "rewards/accuracies": 0.625, "rewards/chosen": -0.08276329189538956, "rewards/margins": 0.069877028465271, "rewards/rejected": -0.15264031291007996, "step": 1276 }, { "epoch": 0.7790147933506176, "grad_norm": 2.7369587421417236, "learning_rate": 6.778199632578076e-06, "log_odds_chosen": 0.8036502599716187, "log_odds_ratio": -0.5666468143463135, "logits/chosen": -1.05802583694458, "logits/rejected": -1.067873239517212, "logps/chosen": -0.8286337852478027, "logps/rejected": -1.374237298965454, "loss": 0.9811, "nll_loss": 1.0266351699829102, "rewards/accuracies": 0.75, "rewards/chosen": -0.08286338299512863, "rewards/margins": 0.054560352116823196, "rewards/rejected": -0.13742373883724213, "step": 1277 }, { "epoch": 0.7796248284276346, "grad_norm": 1.7511359453201294, "learning_rate": 6.777219840783833e-06, "log_odds_chosen": 0.6103162169456482, "log_odds_ratio": -0.5705099701881409, "logits/chosen": -1.066351294517517, "logits/rejected": -0.9255253076553345, "logps/chosen": -0.9667710065841675, "logps/rejected": -1.380812406539917, "loss": 1.2567, "nll_loss": 1.2006185054779053, "rewards/accuracies": 0.625, "rewards/chosen": -0.09667709469795227, "rewards/margins": 0.04140413925051689, "rewards/rejected": -0.13808123767375946, "step": 1278 }, { "epoch": 0.7802348635046515, "grad_norm": 2.846998453140259, "learning_rate": 6.7762400489895895e-06, "log_odds_chosen": 0.727945864200592, "log_odds_ratio": -0.5460273623466492, "logits/chosen": -0.9228030443191528, "logits/rejected": -1.0167967081069946, "logps/chosen": -1.0233266353607178, "logps/rejected": -1.486668586730957, "loss": 1.13, "nll_loss": 1.3134618997573853, "rewards/accuracies": 0.75, "rewards/chosen": -0.10233266651630402, "rewards/margins": 0.04633420333266258, "rewards/rejected": -0.1486668735742569, "step": 1279 }, { "epoch": 0.7808448985816685, "grad_norm": 1.5564478635787964, "learning_rate": 6.775260257195346e-06, "log_odds_chosen": 1.3861522674560547, "log_odds_ratio": -0.3809509873390198, "logits/chosen": -1.0133569240570068, "logits/rejected": -1.0206589698791504, "logps/chosen": -0.8445816040039062, "logps/rejected": -1.8997936248779297, "loss": 0.9362, "nll_loss": 1.1274867057800293, "rewards/accuracies": 0.625, "rewards/chosen": -0.08445816487073898, "rewards/margins": 0.10552121698856354, "rewards/rejected": -0.18997938930988312, "step": 1280 }, { "epoch": 0.7814549336586853, "grad_norm": 2.200601100921631, "learning_rate": 6.7742804654011026e-06, "log_odds_chosen": 1.3130791187286377, "log_odds_ratio": -0.46815478801727295, "logits/chosen": -0.8241785168647766, "logits/rejected": -0.9839587807655334, "logps/chosen": -0.632724404335022, "logps/rejected": -1.508515477180481, "loss": 1.1278, "nll_loss": 0.9116723537445068, "rewards/accuracies": 0.75, "rewards/chosen": -0.06327244639396667, "rewards/margins": 0.08757909387350082, "rewards/rejected": -0.1508515477180481, "step": 1281 }, { "epoch": 0.7820649687357023, "grad_norm": 2.7042582035064697, "learning_rate": 6.773300673606858e-06, "log_odds_chosen": 0.5058876276016235, "log_odds_ratio": -0.5567942261695862, "logits/chosen": -1.1120033264160156, "logits/rejected": -1.0577471256256104, "logps/chosen": -1.0295617580413818, "logps/rejected": -1.3300447463989258, "loss": 1.1516, "nll_loss": 1.06985342502594, "rewards/accuracies": 0.75, "rewards/chosen": -0.10295617580413818, "rewards/margins": 0.030048299580812454, "rewards/rejected": -0.13300448656082153, "step": 1282 }, { "epoch": 0.7826750038127193, "grad_norm": 1.8672176599502563, "learning_rate": 6.772320881812614e-06, "log_odds_chosen": 2.3500585556030273, "log_odds_ratio": -0.2361392229795456, "logits/chosen": -0.6407598853111267, "logits/rejected": -0.9048489332199097, "logps/chosen": -0.5472018122673035, "logps/rejected": -2.1347146034240723, "loss": 0.9502, "nll_loss": 0.7766790390014648, "rewards/accuracies": 0.875, "rewards/chosen": -0.054720181971788406, "rewards/margins": 0.15875130891799927, "rewards/rejected": -0.21347147226333618, "step": 1283 }, { "epoch": 0.7832850388897361, "grad_norm": 3.7212471961975098, "learning_rate": 6.771341090018371e-06, "log_odds_chosen": 0.694406270980835, "log_odds_ratio": -0.6064453125, "logits/chosen": -0.9377741813659668, "logits/rejected": -1.0719377994537354, "logps/chosen": -0.8489385843276978, "logps/rejected": -1.3121436834335327, "loss": 1.2322, "nll_loss": 1.0456849336624146, "rewards/accuracies": 0.625, "rewards/chosen": -0.0848938599228859, "rewards/margins": 0.046320512890815735, "rewards/rejected": -0.13121436536312103, "step": 1284 }, { "epoch": 0.7838950739667531, "grad_norm": 1.3136898279190063, "learning_rate": 6.770361298224127e-06, "log_odds_chosen": 0.511707603931427, "log_odds_ratio": -0.5873465538024902, "logits/chosen": -1.0409936904907227, "logits/rejected": -1.1099443435668945, "logps/chosen": -0.8396823406219482, "logps/rejected": -1.215378761291504, "loss": 1.0735, "nll_loss": 1.0115675926208496, "rewards/accuracies": 0.625, "rewards/chosen": -0.08396822959184647, "rewards/margins": 0.037569645792245865, "rewards/rejected": -0.12153787165880203, "step": 1285 }, { "epoch": 0.78450510904377, "grad_norm": 1.5676450729370117, "learning_rate": 6.769381506429883e-06, "log_odds_chosen": 0.8604487180709839, "log_odds_ratio": -0.5755510330200195, "logits/chosen": -1.0167492628097534, "logits/rejected": -1.1208853721618652, "logps/chosen": -0.8609983325004578, "logps/rejected": -1.4820218086242676, "loss": 1.2673, "nll_loss": 1.1207982301712036, "rewards/accuracies": 0.5, "rewards/chosen": -0.08609983325004578, "rewards/margins": 0.062102336436510086, "rewards/rejected": -0.14820216596126556, "step": 1286 }, { "epoch": 0.785115144120787, "grad_norm": 1.6594849824905396, "learning_rate": 6.76840171463564e-06, "log_odds_chosen": 1.2085111141204834, "log_odds_ratio": -0.38717174530029297, "logits/chosen": -0.7759032845497131, "logits/rejected": -0.8977901339530945, "logps/chosen": -0.8441400527954102, "logps/rejected": -1.630887746810913, "loss": 1.0453, "nll_loss": 0.9746447205543518, "rewards/accuracies": 0.875, "rewards/chosen": -0.08441400527954102, "rewards/margins": 0.07867476344108582, "rewards/rejected": -0.16308876872062683, "step": 1287 }, { "epoch": 0.7857251791978038, "grad_norm": 1.1948434114456177, "learning_rate": 6.767421922841395e-06, "log_odds_chosen": 1.5724828243255615, "log_odds_ratio": -0.29479336738586426, "logits/chosen": -0.9272796511650085, "logits/rejected": -0.8923239707946777, "logps/chosen": -0.6326871514320374, "logps/rejected": -1.6769559383392334, "loss": 1.0779, "nll_loss": 0.7836211323738098, "rewards/accuracies": 0.875, "rewards/chosen": -0.06326871365308762, "rewards/margins": 0.10442689061164856, "rewards/rejected": -0.16769561171531677, "step": 1288 }, { "epoch": 0.7863352142748208, "grad_norm": 3.201749563217163, "learning_rate": 6.766442131047152e-06, "log_odds_chosen": 0.23995690047740936, "log_odds_ratio": -0.6669832468032837, "logits/chosen": -1.0912185907363892, "logits/rejected": -0.9948910474777222, "logps/chosen": -0.8571921586990356, "logps/rejected": -1.0388299226760864, "loss": 1.0444, "nll_loss": 1.0637128353118896, "rewards/accuracies": 0.375, "rewards/chosen": -0.08571922034025192, "rewards/margins": 0.018163762986660004, "rewards/rejected": -0.10388298332691193, "step": 1289 }, { "epoch": 0.7869452493518377, "grad_norm": 2.741136074066162, "learning_rate": 6.7654623392529085e-06, "log_odds_chosen": 1.4329880475997925, "log_odds_ratio": -0.3182365894317627, "logits/chosen": -0.8380365371704102, "logits/rejected": -0.8672981858253479, "logps/chosen": -0.7336521744728088, "logps/rejected": -1.6122987270355225, "loss": 1.0619, "nll_loss": 0.9347797632217407, "rewards/accuracies": 0.875, "rewards/chosen": -0.0733652263879776, "rewards/margins": 0.08786466717720032, "rewards/rejected": -0.16122989356517792, "step": 1290 }, { "epoch": 0.7875552844288547, "grad_norm": 8.971506118774414, "learning_rate": 6.764482547458665e-06, "log_odds_chosen": 0.7148635983467102, "log_odds_ratio": -0.44989949464797974, "logits/chosen": -1.0724228620529175, "logits/rejected": -1.0940827131271362, "logps/chosen": -0.8434203267097473, "logps/rejected": -1.3050544261932373, "loss": 1.0791, "nll_loss": 1.1406872272491455, "rewards/accuracies": 1.0, "rewards/chosen": -0.08434203267097473, "rewards/margins": 0.0461634136736393, "rewards/rejected": -0.13050544261932373, "step": 1291 }, { "epoch": 0.7881653195058715, "grad_norm": 20.258943557739258, "learning_rate": 6.763502755664422e-06, "log_odds_chosen": 0.048382826149463654, "log_odds_ratio": -0.7279548645019531, "logits/chosen": -1.1703201532363892, "logits/rejected": -1.097968578338623, "logps/chosen": -1.027740478515625, "logps/rejected": -1.080357313156128, "loss": 1.2257, "nll_loss": 1.1956201791763306, "rewards/accuracies": 0.5, "rewards/chosen": -0.10277405381202698, "rewards/margins": 0.005261690821498632, "rewards/rejected": -0.10803574323654175, "step": 1292 }, { "epoch": 0.7887753545828885, "grad_norm": 1.330649971961975, "learning_rate": 6.762522963870177e-06, "log_odds_chosen": 0.48673105239868164, "log_odds_ratio": -0.7228789925575256, "logits/chosen": -1.1099656820297241, "logits/rejected": -1.1019208431243896, "logps/chosen": -1.015470027923584, "logps/rejected": -1.4127641916275024, "loss": 1.0839, "nll_loss": 1.08041250705719, "rewards/accuracies": 0.625, "rewards/chosen": -0.1015469953417778, "rewards/margins": 0.03972943127155304, "rewards/rejected": -0.14127641916275024, "step": 1293 }, { "epoch": 0.7893853896599055, "grad_norm": 2.1541755199432373, "learning_rate": 6.761543172075933e-06, "log_odds_chosen": 1.0037950277328491, "log_odds_ratio": -0.42993444204330444, "logits/chosen": -0.9597757458686829, "logits/rejected": -1.0557245016098022, "logps/chosen": -0.7813237905502319, "logps/rejected": -1.5289472341537476, "loss": 1.0592, "nll_loss": 1.1602821350097656, "rewards/accuracies": 0.875, "rewards/chosen": -0.07813237607479095, "rewards/margins": 0.07476234436035156, "rewards/rejected": -0.1528947353363037, "step": 1294 }, { "epoch": 0.7899954247369224, "grad_norm": 1.1312388181686401, "learning_rate": 6.76056338028169e-06, "log_odds_chosen": 1.3635761737823486, "log_odds_ratio": -0.4462679326534271, "logits/chosen": -0.8936147689819336, "logits/rejected": -0.9087058305740356, "logps/chosen": -0.663071870803833, "logps/rejected": -1.6103185415267944, "loss": 1.0808, "nll_loss": 0.7931503057479858, "rewards/accuracies": 0.75, "rewards/chosen": -0.0663071870803833, "rewards/margins": 0.09472465515136719, "rewards/rejected": -0.1610318422317505, "step": 1295 }, { "epoch": 0.7906054598139393, "grad_norm": 1.9283818006515503, "learning_rate": 6.759583588487446e-06, "log_odds_chosen": 1.7241723537445068, "log_odds_ratio": -0.5091837048530579, "logits/chosen": -0.8539417386054993, "logits/rejected": -1.0985435247421265, "logps/chosen": -0.8394866585731506, "logps/rejected": -2.060542583465576, "loss": 1.0561, "nll_loss": 1.0631129741668701, "rewards/accuracies": 0.625, "rewards/chosen": -0.08394867181777954, "rewards/margins": 0.12210558354854584, "rewards/rejected": -0.20605424046516418, "step": 1296 }, { "epoch": 0.7912154948909562, "grad_norm": 0.9109266400337219, "learning_rate": 6.758603796693202e-06, "log_odds_chosen": 0.34968167543411255, "log_odds_ratio": -0.6088711023330688, "logits/chosen": -1.1152780055999756, "logits/rejected": -0.9608190059661865, "logps/chosen": -1.0180257558822632, "logps/rejected": -1.3033766746520996, "loss": 1.1098, "nll_loss": 1.1497609615325928, "rewards/accuracies": 0.5, "rewards/chosen": -0.10180257260799408, "rewards/margins": 0.02853509411215782, "rewards/rejected": -0.1303376704454422, "step": 1297 }, { "epoch": 0.7918255299679732, "grad_norm": 2.636390447616577, "learning_rate": 6.757624004898959e-06, "log_odds_chosen": 0.6707031726837158, "log_odds_ratio": -0.5339587926864624, "logits/chosen": -1.1384799480438232, "logits/rejected": -1.0943869352340698, "logps/chosen": -0.8921812772750854, "logps/rejected": -1.447007417678833, "loss": 1.2263, "nll_loss": 1.3073550462722778, "rewards/accuracies": 0.625, "rewards/chosen": -0.0892181396484375, "rewards/margins": 0.05548261106014252, "rewards/rejected": -0.14470075070858002, "step": 1298 }, { "epoch": 0.7924355650449901, "grad_norm": 1.8064340353012085, "learning_rate": 6.7566442131047145e-06, "log_odds_chosen": 1.1854747533798218, "log_odds_ratio": -0.453471302986145, "logits/chosen": -1.0620760917663574, "logits/rejected": -1.1125297546386719, "logps/chosen": -1.1961991786956787, "logps/rejected": -2.042848587036133, "loss": 1.0762, "nll_loss": 1.3723677396774292, "rewards/accuracies": 0.625, "rewards/chosen": -0.11961992084980011, "rewards/margins": 0.08466494083404541, "rewards/rejected": -0.20428486168384552, "step": 1299 }, { "epoch": 0.793045600122007, "grad_norm": 1.394036889076233, "learning_rate": 6.755664421310471e-06, "log_odds_chosen": 0.713001549243927, "log_odds_ratio": -0.4799776077270508, "logits/chosen": -0.8985599279403687, "logits/rejected": -0.8451971411705017, "logps/chosen": -0.6232743263244629, "logps/rejected": -1.0353193283081055, "loss": 0.9631, "nll_loss": 0.8952054381370544, "rewards/accuracies": 0.75, "rewards/chosen": -0.06232743337750435, "rewards/margins": 0.04120449721813202, "rewards/rejected": -0.10353193432092667, "step": 1300 }, { "epoch": 0.7936556351990239, "grad_norm": 2.4170823097229004, "learning_rate": 6.7546846295162275e-06, "log_odds_chosen": 1.6483705043792725, "log_odds_ratio": -0.3461966812610626, "logits/chosen": -0.691068172454834, "logits/rejected": -0.7483983039855957, "logps/chosen": -0.895738959312439, "logps/rejected": -2.2162866592407227, "loss": 1.1027, "nll_loss": 0.9718185663223267, "rewards/accuracies": 0.875, "rewards/chosen": -0.08957388997077942, "rewards/margins": 0.13205479085445404, "rewards/rejected": -0.22162868082523346, "step": 1301 }, { "epoch": 0.7942656702760409, "grad_norm": 1.5938825607299805, "learning_rate": 6.753704837721984e-06, "log_odds_chosen": 0.6672198176383972, "log_odds_ratio": -0.4939280152320862, "logits/chosen": -1.0226341485977173, "logits/rejected": -0.9659593105316162, "logps/chosen": -0.6960126757621765, "logps/rejected": -1.02862548828125, "loss": 1.145, "nll_loss": 0.8589034080505371, "rewards/accuracies": 0.75, "rewards/chosen": -0.06960126757621765, "rewards/margins": 0.03326128423213959, "rewards/rejected": -0.10286255180835724, "step": 1302 }, { "epoch": 0.7948757053530578, "grad_norm": 1.4117190837860107, "learning_rate": 6.752725045927741e-06, "log_odds_chosen": 0.7408321499824524, "log_odds_ratio": -0.6720131635665894, "logits/chosen": -1.1413893699645996, "logits/rejected": -1.212834358215332, "logps/chosen": -0.9620422124862671, "logps/rejected": -1.4785572290420532, "loss": 1.1101, "nll_loss": 1.3371844291687012, "rewards/accuracies": 0.5, "rewards/chosen": -0.09620422124862671, "rewards/margins": 0.05165150389075279, "rewards/rejected": -0.1478557288646698, "step": 1303 }, { "epoch": 0.7954857404300747, "grad_norm": 1.722527265548706, "learning_rate": 6.751745254133497e-06, "log_odds_chosen": 1.3231405019760132, "log_odds_ratio": -0.46200934052467346, "logits/chosen": -0.8706014156341553, "logits/rejected": -0.9453005790710449, "logps/chosen": -0.6618320345878601, "logps/rejected": -1.4652750492095947, "loss": 1.0139, "nll_loss": 0.892475962638855, "rewards/accuracies": 0.625, "rewards/chosen": -0.06618320196866989, "rewards/margins": 0.0803442969918251, "rewards/rejected": -0.146527498960495, "step": 1304 }, { "epoch": 0.7960957755070917, "grad_norm": 3.612717390060425, "learning_rate": 6.750765462339252e-06, "log_odds_chosen": 0.5511782765388489, "log_odds_ratio": -0.5980492830276489, "logits/chosen": -0.9578031897544861, "logits/rejected": -1.0519065856933594, "logps/chosen": -1.0450998544692993, "logps/rejected": -1.4017360210418701, "loss": 1.2486, "nll_loss": 1.11586332321167, "rewards/accuracies": 0.625, "rewards/chosen": -0.10450997948646545, "rewards/margins": 0.03566361218690872, "rewards/rejected": -0.14017359912395477, "step": 1305 }, { "epoch": 0.7967058105841086, "grad_norm": 1.983351707458496, "learning_rate": 6.749785670545009e-06, "log_odds_chosen": 0.9030053615570068, "log_odds_ratio": -0.49052947759628296, "logits/chosen": -0.9839359521865845, "logits/rejected": -1.0345373153686523, "logps/chosen": -0.7589867115020752, "logps/rejected": -1.3704888820648193, "loss": 1.0499, "nll_loss": 0.8873736262321472, "rewards/accuracies": 0.625, "rewards/chosen": -0.075898677110672, "rewards/margins": 0.06115021929144859, "rewards/rejected": -0.1370488852262497, "step": 1306 }, { "epoch": 0.7973158456611256, "grad_norm": 1.6320996284484863, "learning_rate": 6.748805878750765e-06, "log_odds_chosen": 0.8978859782218933, "log_odds_ratio": -0.46957463026046753, "logits/chosen": -0.9232217073440552, "logits/rejected": -1.0312443971633911, "logps/chosen": -0.9474601149559021, "logps/rejected": -1.4162871837615967, "loss": 1.0885, "nll_loss": 0.947300374507904, "rewards/accuracies": 0.625, "rewards/chosen": -0.09474600851535797, "rewards/margins": 0.04688272625207901, "rewards/rejected": -0.14162874221801758, "step": 1307 }, { "epoch": 0.7979258807381424, "grad_norm": 1.5137417316436768, "learning_rate": 6.747826086956521e-06, "log_odds_chosen": 0.7523982524871826, "log_odds_ratio": -0.5103393793106079, "logits/chosen": -1.0364770889282227, "logits/rejected": -0.9801818132400513, "logps/chosen": -0.8773798942565918, "logps/rejected": -1.3387137651443481, "loss": 1.2187, "nll_loss": 1.0407766103744507, "rewards/accuracies": 0.75, "rewards/chosen": -0.08773799985647202, "rewards/margins": 0.0461333766579628, "rewards/rejected": -0.13387137651443481, "step": 1308 }, { "epoch": 0.7985359158151594, "grad_norm": 2.0960991382598877, "learning_rate": 6.746846295162278e-06, "log_odds_chosen": 0.36438319087028503, "log_odds_ratio": -0.5296230912208557, "logits/chosen": -1.1638107299804688, "logits/rejected": -1.2453064918518066, "logps/chosen": -0.8415290117263794, "logps/rejected": -1.065016746520996, "loss": 0.9965, "nll_loss": 1.0830090045928955, "rewards/accuracies": 1.0, "rewards/chosen": -0.08415290713310242, "rewards/margins": 0.022348765283823013, "rewards/rejected": -0.10650167614221573, "step": 1309 }, { "epoch": 0.7991459508921763, "grad_norm": 1.3963054418563843, "learning_rate": 6.745866503368034e-06, "log_odds_chosen": 0.4716557264328003, "log_odds_ratio": -0.681419849395752, "logits/chosen": -0.9525138139724731, "logits/rejected": -0.9220751523971558, "logps/chosen": -0.8352946043014526, "logps/rejected": -1.133305311203003, "loss": 1.2091, "nll_loss": 1.0103747844696045, "rewards/accuracies": 0.625, "rewards/chosen": -0.08352946490049362, "rewards/margins": 0.02980106696486473, "rewards/rejected": -0.11333052814006805, "step": 1310 }, { "epoch": 0.7997559859691933, "grad_norm": 2.2669124603271484, "learning_rate": 6.74488671157379e-06, "log_odds_chosen": 0.3937423527240753, "log_odds_ratio": -0.7547153234481812, "logits/chosen": -0.9821072220802307, "logits/rejected": -1.0798171758651733, "logps/chosen": -0.9650622606277466, "logps/rejected": -1.1899718046188354, "loss": 1.1341, "nll_loss": 1.0290507078170776, "rewards/accuracies": 0.5, "rewards/chosen": -0.09650623798370361, "rewards/margins": 0.022490959614515305, "rewards/rejected": -0.11899719387292862, "step": 1311 }, { "epoch": 0.8003660210462101, "grad_norm": 3.8472249507904053, "learning_rate": 6.743906919779547e-06, "log_odds_chosen": 0.6923986077308655, "log_odds_ratio": -0.5813309550285339, "logits/chosen": -1.1409162282943726, "logits/rejected": -1.2049410343170166, "logps/chosen": -0.8670406937599182, "logps/rejected": -1.4346171617507935, "loss": 1.1923, "nll_loss": 1.133327603340149, "rewards/accuracies": 0.625, "rewards/chosen": -0.0867040753364563, "rewards/margins": 0.05675764009356499, "rewards/rejected": -0.1434617042541504, "step": 1312 }, { "epoch": 0.8009760561232271, "grad_norm": 2.7107746601104736, "learning_rate": 6.742927127985303e-06, "log_odds_chosen": 1.353867769241333, "log_odds_ratio": -0.5566213726997375, "logits/chosen": -0.8777472972869873, "logits/rejected": -1.0241743326187134, "logps/chosen": -1.003371000289917, "logps/rejected": -1.9245727062225342, "loss": 1.1021, "nll_loss": 0.8913019895553589, "rewards/accuracies": 0.5, "rewards/chosen": -0.10033710300922394, "rewards/margins": 0.09212017059326172, "rewards/rejected": -0.19245728850364685, "step": 1313 }, { "epoch": 0.801586091200244, "grad_norm": 3.160581350326538, "learning_rate": 6.74194733619106e-06, "log_odds_chosen": 0.408004492521286, "log_odds_ratio": -0.5649124979972839, "logits/chosen": -1.1525111198425293, "logits/rejected": -1.1659770011901855, "logps/chosen": -0.7479057312011719, "logps/rejected": -0.9311686158180237, "loss": 1.1581, "nll_loss": 1.4217513799667358, "rewards/accuracies": 0.75, "rewards/chosen": -0.0747905820608139, "rewards/margins": 0.0183262899518013, "rewards/rejected": -0.0931168720126152, "step": 1314 }, { "epoch": 0.8021961262772609, "grad_norm": 1.5391029119491577, "learning_rate": 6.740967544396816e-06, "log_odds_chosen": 0.5481231212615967, "log_odds_ratio": -0.5790790915489197, "logits/chosen": -0.7980386018753052, "logits/rejected": -0.7009983062744141, "logps/chosen": -0.8123342990875244, "logps/rejected": -1.0759835243225098, "loss": 1.0226, "nll_loss": 0.8306771516799927, "rewards/accuracies": 0.625, "rewards/chosen": -0.0812334269285202, "rewards/margins": 0.026364922523498535, "rewards/rejected": -0.10759835690259933, "step": 1315 }, { "epoch": 0.8028061613542778, "grad_norm": 1.4628068208694458, "learning_rate": 6.739987752602571e-06, "log_odds_chosen": 0.5968589782714844, "log_odds_ratio": -0.6095161437988281, "logits/chosen": -1.0772347450256348, "logits/rejected": -0.9746349453926086, "logps/chosen": -0.9266807436943054, "logps/rejected": -1.3078960180282593, "loss": 1.1242, "nll_loss": 1.1759988069534302, "rewards/accuracies": 0.625, "rewards/chosen": -0.0926680862903595, "rewards/margins": 0.038121528923511505, "rewards/rejected": -0.1307896226644516, "step": 1316 }, { "epoch": 0.8034161964312948, "grad_norm": 3.706861734390259, "learning_rate": 6.739007960808328e-06, "log_odds_chosen": 0.9255368709564209, "log_odds_ratio": -0.5025635957717896, "logits/chosen": -1.161330223083496, "logits/rejected": -1.0648049116134644, "logps/chosen": -1.1018518209457397, "logps/rejected": -1.795325517654419, "loss": 1.2512, "nll_loss": 1.6253669261932373, "rewards/accuracies": 0.75, "rewards/chosen": -0.11018519103527069, "rewards/margins": 0.06934738159179688, "rewards/rejected": -0.17953255772590637, "step": 1317 }, { "epoch": 0.8040262315083118, "grad_norm": 0.9257941842079163, "learning_rate": 6.738028169014084e-06, "log_odds_chosen": 1.3055510520935059, "log_odds_ratio": -0.4009634554386139, "logits/chosen": -0.9508154392242432, "logits/rejected": -0.8912301063537598, "logps/chosen": -0.7573127746582031, "logps/rejected": -1.7221571207046509, "loss": 1.0792, "nll_loss": 0.8970714211463928, "rewards/accuracies": 0.875, "rewards/chosen": -0.07573127746582031, "rewards/margins": 0.09648443758487701, "rewards/rejected": -0.17221571505069733, "step": 1318 }, { "epoch": 0.8046362665853286, "grad_norm": 1.6211109161376953, "learning_rate": 6.73704837721984e-06, "log_odds_chosen": 1.848478078842163, "log_odds_ratio": -0.33235782384872437, "logits/chosen": -0.9372480511665344, "logits/rejected": -0.8329731225967407, "logps/chosen": -0.6171993017196655, "logps/rejected": -1.9266538619995117, "loss": 1.0774, "nll_loss": 0.8875106573104858, "rewards/accuracies": 0.875, "rewards/chosen": -0.061719927936792374, "rewards/margins": 0.13094547390937805, "rewards/rejected": -0.19266539812088013, "step": 1319 }, { "epoch": 0.8052463016623456, "grad_norm": 2.377314805984497, "learning_rate": 6.736068585425597e-06, "log_odds_chosen": 0.5237133502960205, "log_odds_ratio": -0.5874079465866089, "logits/chosen": -1.0856143236160278, "logits/rejected": -1.0807604789733887, "logps/chosen": -0.8625520467758179, "logps/rejected": -1.235710859298706, "loss": 1.0494, "nll_loss": 0.9430092573165894, "rewards/accuracies": 0.375, "rewards/chosen": -0.08625520765781403, "rewards/margins": 0.03731587156653404, "rewards/rejected": -0.12357108294963837, "step": 1320 }, { "epoch": 0.8058563367393625, "grad_norm": 1.588730812072754, "learning_rate": 6.735088793631353e-06, "log_odds_chosen": -0.16673339903354645, "log_odds_ratio": -0.8647449016571045, "logits/chosen": -1.0223592519760132, "logits/rejected": -0.9632348418235779, "logps/chosen": -1.095198631286621, "logps/rejected": -1.0257816314697266, "loss": 1.2854, "nll_loss": 1.610044240951538, "rewards/accuracies": 0.375, "rewards/chosen": -0.10951986908912659, "rewards/margins": -0.006941698957234621, "rewards/rejected": -0.10257817059755325, "step": 1321 }, { "epoch": 0.8064663718163795, "grad_norm": 1.433670163154602, "learning_rate": 6.734109001837109e-06, "log_odds_chosen": 1.0599923133850098, "log_odds_ratio": -0.4907030165195465, "logits/chosen": -0.9988442659378052, "logits/rejected": -0.948617696762085, "logps/chosen": -0.9198707342147827, "logps/rejected": -1.6663135290145874, "loss": 1.267, "nll_loss": 1.1251394748687744, "rewards/accuracies": 0.75, "rewards/chosen": -0.09198708832263947, "rewards/margins": 0.07464426755905151, "rewards/rejected": -0.16663135588169098, "step": 1322 }, { "epoch": 0.8070764068933963, "grad_norm": 2.790811538696289, "learning_rate": 6.733129210042866e-06, "log_odds_chosen": 0.304965078830719, "log_odds_ratio": -0.6270314455032349, "logits/chosen": -1.1311804056167603, "logits/rejected": -0.9304990768432617, "logps/chosen": -0.8909580111503601, "logps/rejected": -1.1458410024642944, "loss": 1.2434, "nll_loss": 1.1924583911895752, "rewards/accuracies": 0.75, "rewards/chosen": -0.08909580111503601, "rewards/margins": 0.025488311424851418, "rewards/rejected": -0.11458411067724228, "step": 1323 }, { "epoch": 0.8076864419704133, "grad_norm": 1.1431022882461548, "learning_rate": 6.732149418248622e-06, "log_odds_chosen": 2.054276466369629, "log_odds_ratio": -0.31202733516693115, "logits/chosen": -0.9021444320678711, "logits/rejected": -0.897655189037323, "logps/chosen": -0.6382509469985962, "logps/rejected": -2.112607955932617, "loss": 1.0287, "nll_loss": 0.9273234009742737, "rewards/accuracies": 0.875, "rewards/chosen": -0.0638250932097435, "rewards/margins": 0.14743570983409882, "rewards/rejected": -0.2112608104944229, "step": 1324 }, { "epoch": 0.8082964770474302, "grad_norm": 1.9374879598617554, "learning_rate": 6.731169626454378e-06, "log_odds_chosen": -0.07196379452943802, "log_odds_ratio": -0.7924830317497253, "logits/chosen": -1.0769953727722168, "logits/rejected": -1.1265983581542969, "logps/chosen": -0.9571069478988647, "logps/rejected": -0.9232685565948486, "loss": 1.21, "nll_loss": 1.2971115112304688, "rewards/accuracies": 0.375, "rewards/chosen": -0.09571069478988647, "rewards/margins": -0.0033838399685919285, "rewards/rejected": -0.09232684969902039, "step": 1325 }, { "epoch": 0.8089065121244472, "grad_norm": 1.0795093774795532, "learning_rate": 6.730189834660135e-06, "log_odds_chosen": 1.7947931289672852, "log_odds_ratio": -0.3576734960079193, "logits/chosen": -0.9268112182617188, "logits/rejected": -1.085315227508545, "logps/chosen": -0.7408013343811035, "logps/rejected": -1.8617634773254395, "loss": 0.9415, "nll_loss": 0.9431003332138062, "rewards/accuracies": 0.875, "rewards/chosen": -0.07408013194799423, "rewards/margins": 0.11209619790315628, "rewards/rejected": -0.1861763298511505, "step": 1326 }, { "epoch": 0.809516547201464, "grad_norm": 2.4031002521514893, "learning_rate": 6.729210042865891e-06, "log_odds_chosen": 0.8936187028884888, "log_odds_ratio": -0.46143388748168945, "logits/chosen": -0.9962900280952454, "logits/rejected": -1.074615716934204, "logps/chosen": -0.8078802824020386, "logps/rejected": -1.3794997930526733, "loss": 1.2098, "nll_loss": 0.9852614402770996, "rewards/accuracies": 0.625, "rewards/chosen": -0.0807880312204361, "rewards/margins": 0.05716196447610855, "rewards/rejected": -0.13794998824596405, "step": 1327 }, { "epoch": 0.810126582278481, "grad_norm": 2.101292371749878, "learning_rate": 6.728230251071646e-06, "log_odds_chosen": 0.4362945556640625, "log_odds_ratio": -0.5427440404891968, "logits/chosen": -1.03361177444458, "logits/rejected": -0.9257545471191406, "logps/chosen": -0.9064551591873169, "logps/rejected": -1.190096378326416, "loss": 1.0026, "nll_loss": 0.9727706909179688, "rewards/accuracies": 0.75, "rewards/chosen": -0.09064552187919617, "rewards/margins": 0.028364116325974464, "rewards/rejected": -0.11900964379310608, "step": 1328 }, { "epoch": 0.810736617355498, "grad_norm": 1.3648725748062134, "learning_rate": 6.727250459277403e-06, "log_odds_chosen": 1.607385516166687, "log_odds_ratio": -0.358988881111145, "logits/chosen": -0.906787097454071, "logits/rejected": -1.0438754558563232, "logps/chosen": -0.622934103012085, "logps/rejected": -1.568457841873169, "loss": 1.0593, "nll_loss": 0.9188051223754883, "rewards/accuracies": 0.875, "rewards/chosen": -0.06229341775178909, "rewards/margins": 0.09455236792564392, "rewards/rejected": -0.15684577822685242, "step": 1329 }, { "epoch": 0.8113466524325149, "grad_norm": 2.8129072189331055, "learning_rate": 6.726270667483159e-06, "log_odds_chosen": 1.0348272323608398, "log_odds_ratio": -0.5533687472343445, "logits/chosen": -1.0114262104034424, "logits/rejected": -0.9166679382324219, "logps/chosen": -0.7318557500839233, "logps/rejected": -1.3368263244628906, "loss": 1.0181, "nll_loss": 1.0582475662231445, "rewards/accuracies": 0.5, "rewards/chosen": -0.07318557798862457, "rewards/margins": 0.06049704924225807, "rewards/rejected": -0.13368262350559235, "step": 1330 }, { "epoch": 0.8119566875095318, "grad_norm": 1.6917933225631714, "learning_rate": 6.725290875688916e-06, "log_odds_chosen": 0.41779500246047974, "log_odds_ratio": -0.6663804054260254, "logits/chosen": -1.1070588827133179, "logits/rejected": -0.9965167045593262, "logps/chosen": -0.7880451679229736, "logps/rejected": -0.9838771224021912, "loss": 1.2, "nll_loss": 1.2297356128692627, "rewards/accuracies": 0.375, "rewards/chosen": -0.07880451530218124, "rewards/margins": 0.019583191722631454, "rewards/rejected": -0.0983877182006836, "step": 1331 }, { "epoch": 0.8125667225865487, "grad_norm": 1.533410906791687, "learning_rate": 6.724311083894672e-06, "log_odds_chosen": 1.230533242225647, "log_odds_ratio": -0.5520339012145996, "logits/chosen": -1.0473841428756714, "logits/rejected": -0.9565062522888184, "logps/chosen": -0.8378148078918457, "logps/rejected": -1.7733800411224365, "loss": 1.0265, "nll_loss": 0.8898705840110779, "rewards/accuracies": 0.75, "rewards/chosen": -0.08378148823976517, "rewards/margins": 0.09355651587247849, "rewards/rejected": -0.17733800411224365, "step": 1332 }, { "epoch": 0.8131767576635657, "grad_norm": 2.552906036376953, "learning_rate": 6.723331292100428e-06, "log_odds_chosen": 1.4696378707885742, "log_odds_ratio": -0.37429144978523254, "logits/chosen": -0.9393641948699951, "logits/rejected": -0.8198233842849731, "logps/chosen": -0.6793010234832764, "logps/rejected": -1.7877321243286133, "loss": 1.1469, "nll_loss": 0.939156174659729, "rewards/accuracies": 0.75, "rewards/chosen": -0.06793010234832764, "rewards/margins": 0.11084311455488205, "rewards/rejected": -0.1787732094526291, "step": 1333 }, { "epoch": 0.8137867927405826, "grad_norm": 1.5600063800811768, "learning_rate": 6.722351500306185e-06, "log_odds_chosen": 0.9639155268669128, "log_odds_ratio": -0.46144577860832214, "logits/chosen": -0.9886378049850464, "logits/rejected": -1.0742535591125488, "logps/chosen": -0.7649453282356262, "logps/rejected": -1.4795193672180176, "loss": 1.1299, "nll_loss": 1.133662223815918, "rewards/accuracies": 0.75, "rewards/chosen": -0.07649452984333038, "rewards/margins": 0.0714574009180069, "rewards/rejected": -0.14795193076133728, "step": 1334 }, { "epoch": 0.8143968278175995, "grad_norm": 1.328800082206726, "learning_rate": 6.721371708511941e-06, "log_odds_chosen": -0.1806788593530655, "log_odds_ratio": -0.8557970523834229, "logits/chosen": -1.1999702453613281, "logits/rejected": -1.114086627960205, "logps/chosen": -1.0879572629928589, "logps/rejected": -0.9416419863700867, "loss": 1.2021, "nll_loss": 1.2896058559417725, "rewards/accuracies": 0.5, "rewards/chosen": -0.10879573971033096, "rewards/margins": -0.014631533995270729, "rewards/rejected": -0.09416420757770538, "step": 1335 }, { "epoch": 0.8150068628946164, "grad_norm": 2.297834634780884, "learning_rate": 6.720391916717697e-06, "log_odds_chosen": 0.6593941450119019, "log_odds_ratio": -0.5522379875183105, "logits/chosen": -0.9433204531669617, "logits/rejected": -0.9234040975570679, "logps/chosen": -0.9113680124282837, "logps/rejected": -1.2892652750015259, "loss": 1.1886, "nll_loss": 1.099571704864502, "rewards/accuracies": 0.5, "rewards/chosen": -0.09113680571317673, "rewards/margins": 0.0377897247672081, "rewards/rejected": -0.12892653048038483, "step": 1336 }, { "epoch": 0.8156168979716334, "grad_norm": 1.517862319946289, "learning_rate": 6.719412124923454e-06, "log_odds_chosen": 0.5879621505737305, "log_odds_ratio": -0.518886923789978, "logits/chosen": -0.9454577565193176, "logits/rejected": -1.0235779285430908, "logps/chosen": -0.880351185798645, "logps/rejected": -1.2355490922927856, "loss": 1.2378, "nll_loss": 1.2230465412139893, "rewards/accuracies": 0.75, "rewards/chosen": -0.08803512156009674, "rewards/margins": 0.035519786179065704, "rewards/rejected": -0.12355491518974304, "step": 1337 }, { "epoch": 0.8162269330486503, "grad_norm": 1.7475699186325073, "learning_rate": 6.71843233312921e-06, "log_odds_chosen": 0.5261459946632385, "log_odds_ratio": -0.678449809551239, "logits/chosen": -1.2207000255584717, "logits/rejected": -1.2304871082305908, "logps/chosen": -0.7979715466499329, "logps/rejected": -1.135928988456726, "loss": 1.1702, "nll_loss": 1.1799993515014648, "rewards/accuracies": 0.625, "rewards/chosen": -0.0797971561551094, "rewards/margins": 0.033795736730098724, "rewards/rejected": -0.11359289288520813, "step": 1338 }, { "epoch": 0.8168369681256672, "grad_norm": 1.2063549757003784, "learning_rate": 6.717452541334965e-06, "log_odds_chosen": 0.09042501449584961, "log_odds_ratio": -0.7190404534339905, "logits/chosen": -1.0694799423217773, "logits/rejected": -1.2393810749053955, "logps/chosen": -0.7812798023223877, "logps/rejected": -0.8368456959724426, "loss": 1.1808, "nll_loss": 1.1383483409881592, "rewards/accuracies": 0.5, "rewards/chosen": -0.07812798023223877, "rewards/margins": 0.005556588526815176, "rewards/rejected": -0.08368456363677979, "step": 1339 }, { "epoch": 0.8174470032026842, "grad_norm": 1.5931040048599243, "learning_rate": 6.716472749540722e-06, "log_odds_chosen": 1.0723258256912231, "log_odds_ratio": -0.5041177272796631, "logits/chosen": -1.0457719564437866, "logits/rejected": -0.9556998610496521, "logps/chosen": -0.6900681257247925, "logps/rejected": -1.4766261577606201, "loss": 1.14, "nll_loss": 0.9916316866874695, "rewards/accuracies": 0.625, "rewards/chosen": -0.06900681555271149, "rewards/margins": 0.07865580171346664, "rewards/rejected": -0.14766260981559753, "step": 1340 }, { "epoch": 0.8180570382797011, "grad_norm": 1.627962589263916, "learning_rate": 6.715492957746478e-06, "log_odds_chosen": 0.9318469762802124, "log_odds_ratio": -0.47086119651794434, "logits/chosen": -1.2542152404785156, "logits/rejected": -1.2194262742996216, "logps/chosen": -0.7354743480682373, "logps/rejected": -1.2822041511535645, "loss": 1.1926, "nll_loss": 1.31376314163208, "rewards/accuracies": 0.75, "rewards/chosen": -0.07354743778705597, "rewards/margins": 0.05467299371957779, "rewards/rejected": -0.12822042405605316, "step": 1341 }, { "epoch": 0.818667073356718, "grad_norm": 1.591133952140808, "learning_rate": 6.714513165952235e-06, "log_odds_chosen": 1.2878527641296387, "log_odds_ratio": -0.3493751287460327, "logits/chosen": -0.7405897378921509, "logits/rejected": -0.915236234664917, "logps/chosen": -0.7861494421958923, "logps/rejected": -1.6502413749694824, "loss": 1.0357, "nll_loss": 0.794049084186554, "rewards/accuracies": 1.0, "rewards/chosen": -0.07861494272947311, "rewards/margins": 0.08640919625759125, "rewards/rejected": -0.16502416133880615, "step": 1342 }, { "epoch": 0.8192771084337349, "grad_norm": 1.534515619277954, "learning_rate": 6.7135333741579915e-06, "log_odds_chosen": 1.7652188539505005, "log_odds_ratio": -0.26420503854751587, "logits/chosen": -0.7653300762176514, "logits/rejected": -0.8208740949630737, "logps/chosen": -0.59844571352005, "logps/rejected": -1.7678648233413696, "loss": 1.1774, "nll_loss": 0.9116598963737488, "rewards/accuracies": 1.0, "rewards/chosen": -0.059844572097063065, "rewards/margins": 0.1169419065117836, "rewards/rejected": -0.17678648233413696, "step": 1343 }, { "epoch": 0.8198871435107519, "grad_norm": 2.931612730026245, "learning_rate": 6.712553582363748e-06, "log_odds_chosen": 1.2752798795700073, "log_odds_ratio": -0.5343047976493835, "logits/chosen": -1.0277276039123535, "logits/rejected": -1.0486481189727783, "logps/chosen": -0.8433962464332581, "logps/rejected": -1.6548030376434326, "loss": 1.1323, "nll_loss": 1.0752354860305786, "rewards/accuracies": 0.75, "rewards/chosen": -0.08433962613344193, "rewards/margins": 0.08114069700241089, "rewards/rejected": -0.16548031568527222, "step": 1344 }, { "epoch": 0.8204971785877688, "grad_norm": 1.8905466794967651, "learning_rate": 6.711573790569504e-06, "log_odds_chosen": 1.2210707664489746, "log_odds_ratio": -0.41748952865600586, "logits/chosen": -0.948765218257904, "logits/rejected": -1.1024647951126099, "logps/chosen": -0.7463363409042358, "logps/rejected": -1.5662615299224854, "loss": 1.0057, "nll_loss": 0.9002948999404907, "rewards/accuracies": 0.75, "rewards/chosen": -0.0746336355805397, "rewards/margins": 0.08199252933263779, "rewards/rejected": -0.1566261649131775, "step": 1345 }, { "epoch": 0.8211072136647857, "grad_norm": 1.6134055852890015, "learning_rate": 6.71059399877526e-06, "log_odds_chosen": -0.31372350454330444, "log_odds_ratio": -1.0072025060653687, "logits/chosen": -1.3011157512664795, "logits/rejected": -1.2022252082824707, "logps/chosen": -1.4133801460266113, "logps/rejected": -1.151479721069336, "loss": 1.2467, "nll_loss": 1.4711652994155884, "rewards/accuracies": 0.5, "rewards/chosen": -0.1413380205631256, "rewards/margins": -0.026190049946308136, "rewards/rejected": -0.11514796316623688, "step": 1346 }, { "epoch": 0.8217172487418026, "grad_norm": 1.8624775409698486, "learning_rate": 6.709614206981016e-06, "log_odds_chosen": 0.6688375473022461, "log_odds_ratio": -0.5439580082893372, "logits/chosen": -0.8978503942489624, "logits/rejected": -1.0514335632324219, "logps/chosen": -0.7391178607940674, "logps/rejected": -1.1596330404281616, "loss": 1.2049, "nll_loss": 1.0447595119476318, "rewards/accuracies": 0.625, "rewards/chosen": -0.07391178607940674, "rewards/margins": 0.042051512748003006, "rewards/rejected": -0.11596329510211945, "step": 1347 }, { "epoch": 0.8223272838188196, "grad_norm": 1.2079209089279175, "learning_rate": 6.708634415186773e-06, "log_odds_chosen": 0.6915109157562256, "log_odds_ratio": -0.5607942342758179, "logits/chosen": -0.9500499963760376, "logits/rejected": -1.061043381690979, "logps/chosen": -0.7518484592437744, "logps/rejected": -1.1790080070495605, "loss": 1.184, "nll_loss": 0.8618786334991455, "rewards/accuracies": 0.5, "rewards/chosen": -0.07518485188484192, "rewards/margins": 0.04271595552563667, "rewards/rejected": -0.1179008036851883, "step": 1348 }, { "epoch": 0.8229373188958365, "grad_norm": 1.4147120714187622, "learning_rate": 6.707654623392529e-06, "log_odds_chosen": 0.0642998069524765, "log_odds_ratio": -0.8645104765892029, "logits/chosen": -1.2879689931869507, "logits/rejected": -1.1885974407196045, "logps/chosen": -1.2161402702331543, "logps/rejected": -1.3086810111999512, "loss": 1.3331, "nll_loss": 1.7039296627044678, "rewards/accuracies": 0.375, "rewards/chosen": -0.1216140165925026, "rewards/margins": 0.009254083037376404, "rewards/rejected": -0.1308681070804596, "step": 1349 }, { "epoch": 0.8235473539728534, "grad_norm": 1.2104415893554688, "learning_rate": 6.706674831598284e-06, "log_odds_chosen": 0.7154700756072998, "log_odds_ratio": -0.5009414553642273, "logits/chosen": -1.1958272457122803, "logits/rejected": -1.1353418827056885, "logps/chosen": -0.8801706433296204, "logps/rejected": -1.3642542362213135, "loss": 1.0306, "nll_loss": 0.9632347822189331, "rewards/accuracies": 0.75, "rewards/chosen": -0.0880170688033104, "rewards/margins": 0.04840836673974991, "rewards/rejected": -0.1364254355430603, "step": 1350 }, { "epoch": 0.8241573890498703, "grad_norm": 1.6158233880996704, "learning_rate": 6.705695039804041e-06, "log_odds_chosen": 0.9449496865272522, "log_odds_ratio": -0.5412594079971313, "logits/chosen": -0.8695111274719238, "logits/rejected": -0.9360852837562561, "logps/chosen": -0.6985865831375122, "logps/rejected": -1.334423303604126, "loss": 1.0976, "nll_loss": 0.8104138374328613, "rewards/accuracies": 0.875, "rewards/chosen": -0.06985865533351898, "rewards/margins": 0.06358367949724197, "rewards/rejected": -0.13344234228134155, "step": 1351 }, { "epoch": 0.8247674241268873, "grad_norm": 1.358108401298523, "learning_rate": 6.704715248009797e-06, "log_odds_chosen": 0.5366283655166626, "log_odds_ratio": -0.6224706768989563, "logits/chosen": -0.8990792036056519, "logits/rejected": -0.981329619884491, "logps/chosen": -0.919713020324707, "logps/rejected": -1.2550616264343262, "loss": 1.0739, "nll_loss": 1.141416311264038, "rewards/accuracies": 0.625, "rewards/chosen": -0.09197130054235458, "rewards/margins": 0.03353486210107803, "rewards/rejected": -0.12550616264343262, "step": 1352 }, { "epoch": 0.8253774592039043, "grad_norm": 1.1530735492706299, "learning_rate": 6.7037354562155535e-06, "log_odds_chosen": 0.3903261423110962, "log_odds_ratio": -0.6836197376251221, "logits/chosen": -1.0620090961456299, "logits/rejected": -1.0996443033218384, "logps/chosen": -0.9287874102592468, "logps/rejected": -1.2062757015228271, "loss": 1.0732, "nll_loss": 1.1805481910705566, "rewards/accuracies": 0.5, "rewards/chosen": -0.09287874400615692, "rewards/margins": 0.027748825028538704, "rewards/rejected": -0.12062756717205048, "step": 1353 }, { "epoch": 0.8259874942809211, "grad_norm": 2.2921831607818604, "learning_rate": 6.7027556644213105e-06, "log_odds_chosen": 1.0209553241729736, "log_odds_ratio": -0.4436020255088806, "logits/chosen": -0.9809552431106567, "logits/rejected": -0.946361243724823, "logps/chosen": -0.9152511358261108, "logps/rejected": -1.6238462924957275, "loss": 1.1959, "nll_loss": 1.0658071041107178, "rewards/accuracies": 0.75, "rewards/chosen": -0.0915251225233078, "rewards/margins": 0.07085952162742615, "rewards/rejected": -0.16238464415073395, "step": 1354 }, { "epoch": 0.8265975293579381, "grad_norm": 2.1706695556640625, "learning_rate": 6.701775872627067e-06, "log_odds_chosen": 0.04585987329483032, "log_odds_ratio": -0.7724087834358215, "logits/chosen": -1.0951308012008667, "logits/rejected": -1.0104683637619019, "logps/chosen": -1.0165457725524902, "logps/rejected": -1.0704164505004883, "loss": 1.1231, "nll_loss": 1.2438057661056519, "rewards/accuracies": 0.5, "rewards/chosen": -0.10165458172559738, "rewards/margins": 0.005387072451412678, "rewards/rejected": -0.10704164952039719, "step": 1355 }, { "epoch": 0.827207564434955, "grad_norm": 2.823467254638672, "learning_rate": 6.700796080832823e-06, "log_odds_chosen": 0.8742354512214661, "log_odds_ratio": -0.6005622148513794, "logits/chosen": -1.302474021911621, "logits/rejected": -1.2187154293060303, "logps/chosen": -1.0669435262680054, "logps/rejected": -1.8227746486663818, "loss": 1.1977, "nll_loss": 1.3112956285476685, "rewards/accuracies": 0.625, "rewards/chosen": -0.10669436305761337, "rewards/margins": 0.07558310031890869, "rewards/rejected": -0.18227745592594147, "step": 1356 }, { "epoch": 0.827817599511972, "grad_norm": 4.047957420349121, "learning_rate": 6.699816289038579e-06, "log_odds_chosen": 0.20832164585590363, "log_odds_ratio": -0.7151592969894409, "logits/chosen": -1.117546558380127, "logits/rejected": -1.1062148809432983, "logps/chosen": -0.779909610748291, "logps/rejected": -0.8927154541015625, "loss": 1.0482, "nll_loss": 1.019484519958496, "rewards/accuracies": 0.25, "rewards/chosen": -0.07799095660448074, "rewards/margins": 0.011280586943030357, "rewards/rejected": -0.08927155286073685, "step": 1357 }, { "epoch": 0.8284276345889888, "grad_norm": 1.9648628234863281, "learning_rate": 6.698836497244335e-06, "log_odds_chosen": 1.0818393230438232, "log_odds_ratio": -0.45950767397880554, "logits/chosen": -0.9703860878944397, "logits/rejected": -0.9471397399902344, "logps/chosen": -0.7686645984649658, "logps/rejected": -1.5883982181549072, "loss": 1.1652, "nll_loss": 0.8889410495758057, "rewards/accuracies": 0.75, "rewards/chosen": -0.07686646282672882, "rewards/margins": 0.0819733589887619, "rewards/rejected": -0.15883982181549072, "step": 1358 }, { "epoch": 0.8290376696660058, "grad_norm": 2.255866050720215, "learning_rate": 6.697856705450092e-06, "log_odds_chosen": 0.019791841506958008, "log_odds_ratio": -0.8002079129219055, "logits/chosen": -1.1953922510147095, "logits/rejected": -1.1880311965942383, "logps/chosen": -1.0751025676727295, "logps/rejected": -1.1053863763809204, "loss": 1.15, "nll_loss": 1.2579874992370605, "rewards/accuracies": 0.375, "rewards/chosen": -0.10751025378704071, "rewards/margins": 0.0030283797532320023, "rewards/rejected": -0.11053863167762756, "step": 1359 }, { "epoch": 0.8296477047430227, "grad_norm": 1.4972524642944336, "learning_rate": 6.696876913655848e-06, "log_odds_chosen": 0.24640966951847076, "log_odds_ratio": -0.6102185845375061, "logits/chosen": -1.2751179933547974, "logits/rejected": -1.2051234245300293, "logps/chosen": -1.2133607864379883, "logps/rejected": -1.4067556858062744, "loss": 1.0802, "nll_loss": 1.416222333908081, "rewards/accuracies": 0.625, "rewards/chosen": -0.12133608013391495, "rewards/margins": 0.01933950185775757, "rewards/rejected": -0.1406755894422531, "step": 1360 }, { "epoch": 0.8302577398200397, "grad_norm": 1.126898169517517, "learning_rate": 6.695897121861604e-06, "log_odds_chosen": 0.3267732262611389, "log_odds_ratio": -0.6385219097137451, "logits/chosen": -1.2118300199508667, "logits/rejected": -1.1905198097229004, "logps/chosen": -0.9518773555755615, "logps/rejected": -1.1754841804504395, "loss": 1.0412, "nll_loss": 1.3185768127441406, "rewards/accuracies": 0.375, "rewards/chosen": -0.09518773853778839, "rewards/margins": 0.022360678762197495, "rewards/rejected": -0.11754842102527618, "step": 1361 }, { "epoch": 0.8308677748970565, "grad_norm": 1.7630037069320679, "learning_rate": 6.69491733006736e-06, "log_odds_chosen": 0.5065999031066895, "log_odds_ratio": -0.6625381112098694, "logits/chosen": -1.197711706161499, "logits/rejected": -1.1357054710388184, "logps/chosen": -0.8551136255264282, "logps/rejected": -1.1997840404510498, "loss": 1.2375, "nll_loss": 1.1374268531799316, "rewards/accuracies": 0.5, "rewards/chosen": -0.08551137149333954, "rewards/margins": 0.03446703404188156, "rewards/rejected": -0.1199783980846405, "step": 1362 }, { "epoch": 0.8314778099740735, "grad_norm": 1.007285714149475, "learning_rate": 6.6939375382731164e-06, "log_odds_chosen": 1.0874191522598267, "log_odds_ratio": -0.48199236392974854, "logits/chosen": -1.1080899238586426, "logits/rejected": -0.8865540027618408, "logps/chosen": -0.7332997918128967, "logps/rejected": -1.4756320714950562, "loss": 1.0976, "nll_loss": 1.0405384302139282, "rewards/accuracies": 0.875, "rewards/chosen": -0.07332998514175415, "rewards/margins": 0.07423323392868042, "rewards/rejected": -0.14756320416927338, "step": 1363 }, { "epoch": 0.8320878450510905, "grad_norm": 2.474644184112549, "learning_rate": 6.6929577464788726e-06, "log_odds_chosen": 0.2674422860145569, "log_odds_ratio": -0.6056761741638184, "logits/chosen": -1.0270390510559082, "logits/rejected": -1.0171562433242798, "logps/chosen": -0.8994272947311401, "logps/rejected": -1.0859664678573608, "loss": 1.1857, "nll_loss": 1.0396982431411743, "rewards/accuracies": 0.5, "rewards/chosen": -0.08994272351264954, "rewards/margins": 0.01865391433238983, "rewards/rejected": -0.10859665274620056, "step": 1364 }, { "epoch": 0.8326978801281074, "grad_norm": 0.9027401804924011, "learning_rate": 6.6919779546846295e-06, "log_odds_chosen": 0.4729253649711609, "log_odds_ratio": -0.609714150428772, "logits/chosen": -1.045021891593933, "logits/rejected": -0.9533630609512329, "logps/chosen": -0.8823007345199585, "logps/rejected": -1.2213059663772583, "loss": 1.046, "nll_loss": 1.0419785976409912, "rewards/accuracies": 0.75, "rewards/chosen": -0.08823007345199585, "rewards/margins": 0.03390052914619446, "rewards/rejected": -0.12213059514760971, "step": 1365 }, { "epoch": 0.8333079152051243, "grad_norm": 3.396958827972412, "learning_rate": 6.690998162890386e-06, "log_odds_chosen": 0.7353827953338623, "log_odds_ratio": -0.5183190107345581, "logits/chosen": -1.0094640254974365, "logits/rejected": -0.9646320343017578, "logps/chosen": -0.9497378468513489, "logps/rejected": -1.4554266929626465, "loss": 1.2012, "nll_loss": 1.1000258922576904, "rewards/accuracies": 0.625, "rewards/chosen": -0.09497378766536713, "rewards/margins": 0.05056887865066528, "rewards/rejected": -0.1455426663160324, "step": 1366 }, { "epoch": 0.8339179502821412, "grad_norm": 2.0035297870635986, "learning_rate": 6.690018371096142e-06, "log_odds_chosen": 1.544753074645996, "log_odds_ratio": -0.34248700737953186, "logits/chosen": -0.7830765247344971, "logits/rejected": -0.9929071664810181, "logps/chosen": -0.6344987750053406, "logps/rejected": -1.6202048063278198, "loss": 1.1701, "nll_loss": 0.7934953570365906, "rewards/accuracies": 0.875, "rewards/chosen": -0.06344987452030182, "rewards/margins": 0.09857060760259628, "rewards/rejected": -0.1620204895734787, "step": 1367 }, { "epoch": 0.8345279853591582, "grad_norm": 1.910728096961975, "learning_rate": 6.689038579301898e-06, "log_odds_chosen": 0.8221548795700073, "log_odds_ratio": -0.5948315858840942, "logits/chosen": -0.8615641593933105, "logits/rejected": -0.9298107624053955, "logps/chosen": -1.0821928977966309, "logps/rejected": -1.6500556468963623, "loss": 0.9933, "nll_loss": 1.033631443977356, "rewards/accuracies": 0.5, "rewards/chosen": -0.10821928828954697, "rewards/margins": 0.05678629130125046, "rewards/rejected": -0.16500557959079742, "step": 1368 }, { "epoch": 0.8351380204361751, "grad_norm": 2.642226457595825, "learning_rate": 6.688058787507654e-06, "log_odds_chosen": 1.1511907577514648, "log_odds_ratio": -0.46496254205703735, "logits/chosen": -1.0579359531402588, "logits/rejected": -0.958902895450592, "logps/chosen": -1.0131721496582031, "logps/rejected": -1.8443642854690552, "loss": 1.1262, "nll_loss": 1.5199400186538696, "rewards/accuracies": 0.875, "rewards/chosen": -0.10131719708442688, "rewards/margins": 0.0831192210316658, "rewards/rejected": -0.18443642556667328, "step": 1369 }, { "epoch": 0.835748055513192, "grad_norm": 1.4449952840805054, "learning_rate": 6.687078995713411e-06, "log_odds_chosen": 1.58213472366333, "log_odds_ratio": -0.3459736406803131, "logits/chosen": -0.7449336051940918, "logits/rejected": -0.6088211536407471, "logps/chosen": -0.6689862012863159, "logps/rejected": -1.7578659057617188, "loss": 1.0723, "nll_loss": 0.7908028960227966, "rewards/accuracies": 0.75, "rewards/chosen": -0.06689862161874771, "rewards/margins": 0.10888797044754028, "rewards/rejected": -0.1757865995168686, "step": 1370 }, { "epoch": 0.8363580905902089, "grad_norm": 1.9875742197036743, "learning_rate": 6.686099203919167e-06, "log_odds_chosen": 0.902639627456665, "log_odds_ratio": -0.46526628732681274, "logits/chosen": -1.1162413358688354, "logits/rejected": -1.04771089553833, "logps/chosen": -1.2166039943695068, "logps/rejected": -1.8535196781158447, "loss": 1.0388, "nll_loss": 1.2729675769805908, "rewards/accuracies": 0.625, "rewards/chosen": -0.12166039645671844, "rewards/margins": 0.06369157135486603, "rewards/rejected": -0.18535196781158447, "step": 1371 }, { "epoch": 0.8369681256672259, "grad_norm": 2.2196691036224365, "learning_rate": 6.685119412124923e-06, "log_odds_chosen": 0.5962851047515869, "log_odds_ratio": -0.5511301159858704, "logits/chosen": -0.9919116497039795, "logits/rejected": -0.8339693546295166, "logps/chosen": -0.8812481164932251, "logps/rejected": -1.201338768005371, "loss": 1.1971, "nll_loss": 1.088413119316101, "rewards/accuracies": 0.75, "rewards/chosen": -0.0881248190999031, "rewards/margins": 0.0320090614259243, "rewards/rejected": -0.1201338842511177, "step": 1372 }, { "epoch": 0.8375781607442427, "grad_norm": 1.4272897243499756, "learning_rate": 6.684139620330679e-06, "log_odds_chosen": -0.2743574380874634, "log_odds_ratio": -0.9154039621353149, "logits/chosen": -1.002967119216919, "logits/rejected": -1.0723344087600708, "logps/chosen": -1.1085002422332764, "logps/rejected": -0.905017614364624, "loss": 1.065, "nll_loss": 1.2799299955368042, "rewards/accuracies": 0.25, "rewards/chosen": -0.11085003614425659, "rewards/margins": -0.020348262041807175, "rewards/rejected": -0.09050176292657852, "step": 1373 }, { "epoch": 0.8381881958212597, "grad_norm": 1.3009519577026367, "learning_rate": 6.6831598285364355e-06, "log_odds_chosen": 0.021954283118247986, "log_odds_ratio": -0.7580701112747192, "logits/chosen": -1.140360713005066, "logits/rejected": -1.0411323308944702, "logps/chosen": -0.9577374458312988, "logps/rejected": -0.9068036079406738, "loss": 1.2205, "nll_loss": 1.3997328281402588, "rewards/accuracies": 0.125, "rewards/chosen": -0.09577374905347824, "rewards/margins": -0.005093391053378582, "rewards/rejected": -0.09068036079406738, "step": 1374 }, { "epoch": 0.8387982308982767, "grad_norm": 1.6319406032562256, "learning_rate": 6.682180036742192e-06, "log_odds_chosen": 0.6865013837814331, "log_odds_ratio": -0.6163798570632935, "logits/chosen": -1.0762577056884766, "logits/rejected": -1.0219166278839111, "logps/chosen": -0.7796590924263, "logps/rejected": -1.1995428800582886, "loss": 1.1857, "nll_loss": 1.1018967628479004, "rewards/accuracies": 0.5, "rewards/chosen": -0.07796590030193329, "rewards/margins": 0.041988372802734375, "rewards/rejected": -0.11995428800582886, "step": 1375 }, { "epoch": 0.8394082659752936, "grad_norm": 2.247121572494507, "learning_rate": 6.6812002449479486e-06, "log_odds_chosen": 0.5146929025650024, "log_odds_ratio": -0.5447264909744263, "logits/chosen": -1.0413005352020264, "logits/rejected": -0.9864171147346497, "logps/chosen": -1.1411699056625366, "logps/rejected": -1.5630369186401367, "loss": 1.1771, "nll_loss": 1.4606034755706787, "rewards/accuracies": 0.625, "rewards/chosen": -0.11411699652671814, "rewards/margins": 0.042186688631772995, "rewards/rejected": -0.15630368888378143, "step": 1376 }, { "epoch": 0.8400183010523105, "grad_norm": 1.4701964855194092, "learning_rate": 6.680220453153705e-06, "log_odds_chosen": 1.0993032455444336, "log_odds_ratio": -0.48715248703956604, "logits/chosen": -0.8329532742500305, "logits/rejected": -0.8964288234710693, "logps/chosen": -0.8256286978721619, "logps/rejected": -1.497387409210205, "loss": 1.1372, "nll_loss": 1.1864854097366333, "rewards/accuracies": 0.875, "rewards/chosen": -0.0825628712773323, "rewards/margins": 0.06717587262392044, "rewards/rejected": -0.14973874390125275, "step": 1377 }, { "epoch": 0.8406283361293274, "grad_norm": 4.159692764282227, "learning_rate": 6.679240661359461e-06, "log_odds_chosen": 0.42385345697402954, "log_odds_ratio": -0.6963935494422913, "logits/chosen": -1.1807798147201538, "logits/rejected": -1.0890896320343018, "logps/chosen": -0.9452576041221619, "logps/rejected": -1.305030345916748, "loss": 1.2626, "nll_loss": 1.264718770980835, "rewards/accuracies": 0.375, "rewards/chosen": -0.0945257619023323, "rewards/margins": 0.035977281630039215, "rewards/rejected": -0.13050305843353271, "step": 1378 }, { "epoch": 0.8412383712063444, "grad_norm": 5.05345344543457, "learning_rate": 6.678260869565217e-06, "log_odds_chosen": 1.0889308452606201, "log_odds_ratio": -0.44436606764793396, "logits/chosen": -1.0145725011825562, "logits/rejected": -1.08574640750885, "logps/chosen": -0.9073658585548401, "logps/rejected": -1.6196726560592651, "loss": 1.1847, "nll_loss": 1.208306908607483, "rewards/accuracies": 0.75, "rewards/chosen": -0.09073659777641296, "rewards/margins": 0.0712306797504425, "rewards/rejected": -0.16196727752685547, "step": 1379 }, { "epoch": 0.8418484062833613, "grad_norm": 2.188303232192993, "learning_rate": 6.677281077770973e-06, "log_odds_chosen": 0.21949346363544464, "log_odds_ratio": -0.7000342607498169, "logits/chosen": -1.029419183731079, "logits/rejected": -0.9107855558395386, "logps/chosen": -1.077488660812378, "logps/rejected": -1.2451565265655518, "loss": 1.2525, "nll_loss": 1.1878753900527954, "rewards/accuracies": 0.5, "rewards/chosen": -0.10774886608123779, "rewards/margins": 0.01676679030060768, "rewards/rejected": -0.12451565265655518, "step": 1380 }, { "epoch": 0.8424584413603782, "grad_norm": 3.581663131713867, "learning_rate": 6.676301285976729e-06, "log_odds_chosen": 1.6514534950256348, "log_odds_ratio": -0.33514535427093506, "logits/chosen": -0.6708265542984009, "logits/rejected": -0.8812049627304077, "logps/chosen": -0.5750015377998352, "logps/rejected": -1.587918996810913, "loss": 0.8809, "nll_loss": 0.6214261651039124, "rewards/accuracies": 0.75, "rewards/chosen": -0.05750015750527382, "rewards/margins": 0.10129175335168839, "rewards/rejected": -0.1587918996810913, "step": 1381 }, { "epoch": 0.8430684764373951, "grad_norm": 3.0396792888641357, "learning_rate": 6.675321494182486e-06, "log_odds_chosen": -0.33509594202041626, "log_odds_ratio": -0.9148310422897339, "logits/chosen": -1.143767237663269, "logits/rejected": -1.0719101428985596, "logps/chosen": -1.3932280540466309, "logps/rejected": -1.134855031967163, "loss": 1.3201, "nll_loss": 1.2729765176773071, "rewards/accuracies": 0.125, "rewards/chosen": -0.13932280242443085, "rewards/margins": -0.02583729662001133, "rewards/rejected": -0.11348550766706467, "step": 1382 }, { "epoch": 0.8436785115144121, "grad_norm": 1.3114112615585327, "learning_rate": 6.674341702388242e-06, "log_odds_chosen": 0.31508368253707886, "log_odds_ratio": -0.5776143670082092, "logits/chosen": -0.9733522534370422, "logits/rejected": -1.0337464809417725, "logps/chosen": -0.9915496110916138, "logps/rejected": -1.1679579019546509, "loss": 1.1519, "nll_loss": 1.1878767013549805, "rewards/accuracies": 0.875, "rewards/chosen": -0.09915496408939362, "rewards/margins": 0.017640825361013412, "rewards/rejected": -0.11679577827453613, "step": 1383 }, { "epoch": 0.844288546591429, "grad_norm": 1.7655433416366577, "learning_rate": 6.673361910593999e-06, "log_odds_chosen": 1.0313401222229004, "log_odds_ratio": -0.41183167695999146, "logits/chosen": -1.0051355361938477, "logits/rejected": -0.9814625978469849, "logps/chosen": -0.6604422330856323, "logps/rejected": -1.2430089712142944, "loss": 1.1626, "nll_loss": 1.183417558670044, "rewards/accuracies": 0.875, "rewards/chosen": -0.06604421883821487, "rewards/margins": 0.05825668200850487, "rewards/rejected": -0.12430089712142944, "step": 1384 }, { "epoch": 0.8448985816684459, "grad_norm": 1.4635742902755737, "learning_rate": 6.6723821187997545e-06, "log_odds_chosen": 0.5632568001747131, "log_odds_ratio": -0.557330846786499, "logits/chosen": -1.0121548175811768, "logits/rejected": -1.0386263132095337, "logps/chosen": -0.6975452899932861, "logps/rejected": -0.9792892932891846, "loss": 1.1688, "nll_loss": 1.1720988750457764, "rewards/accuracies": 0.625, "rewards/chosen": -0.06975452601909637, "rewards/margins": 0.028174402192234993, "rewards/rejected": -0.09792893379926682, "step": 1385 }, { "epoch": 0.8455086167454628, "grad_norm": 2.2578301429748535, "learning_rate": 6.671402327005511e-06, "log_odds_chosen": 0.6798563003540039, "log_odds_ratio": -0.477858304977417, "logits/chosen": -1.0925443172454834, "logits/rejected": -1.040060043334961, "logps/chosen": -0.9174473285675049, "logps/rejected": -1.3764863014221191, "loss": 1.1762, "nll_loss": 0.9362852573394775, "rewards/accuracies": 0.75, "rewards/chosen": -0.09174473583698273, "rewards/margins": 0.04590388759970665, "rewards/rejected": -0.13764861226081848, "step": 1386 }, { "epoch": 0.8461186518224798, "grad_norm": 1.6880576610565186, "learning_rate": 6.670422535211268e-06, "log_odds_chosen": 0.4048916697502136, "log_odds_ratio": -0.6340861320495605, "logits/chosen": -0.8931738138198853, "logits/rejected": -0.8881096839904785, "logps/chosen": -0.8123895525932312, "logps/rejected": -1.1115381717681885, "loss": 1.0533, "nll_loss": 0.9136719703674316, "rewards/accuracies": 0.75, "rewards/chosen": -0.08123895525932312, "rewards/margins": 0.029914861544966698, "rewards/rejected": -0.11115381866693497, "step": 1387 }, { "epoch": 0.8467286868994968, "grad_norm": 2.912047863006592, "learning_rate": 6.669442743417024e-06, "log_odds_chosen": 1.1226491928100586, "log_odds_ratio": -0.509222686290741, "logits/chosen": -0.9819351434707642, "logits/rejected": -0.9957700967788696, "logps/chosen": -0.7751832604408264, "logps/rejected": -1.4139310121536255, "loss": 0.9969, "nll_loss": 1.1899324655532837, "rewards/accuracies": 0.75, "rewards/chosen": -0.07751832902431488, "rewards/margins": 0.06387478113174438, "rewards/rejected": -0.14139311015605927, "step": 1388 }, { "epoch": 0.8473387219765136, "grad_norm": 1.3802413940429688, "learning_rate": 6.66846295162278e-06, "log_odds_chosen": 0.4447908401489258, "log_odds_ratio": -0.6436397433280945, "logits/chosen": -0.804428219795227, "logits/rejected": -0.7241106033325195, "logps/chosen": -0.7894214987754822, "logps/rejected": -1.0692644119262695, "loss": 0.9611, "nll_loss": 0.8467448949813843, "rewards/accuracies": 0.5, "rewards/chosen": -0.07894214987754822, "rewards/margins": 0.027984291315078735, "rewards/rejected": -0.10692644119262695, "step": 1389 }, { "epoch": 0.8479487570535306, "grad_norm": 3.489827871322632, "learning_rate": 6.667483159828536e-06, "log_odds_chosen": 0.9756218791007996, "log_odds_ratio": -0.5686737298965454, "logits/chosen": -1.097404956817627, "logits/rejected": -1.0970691442489624, "logps/chosen": -0.9272086024284363, "logps/rejected": -1.4879539012908936, "loss": 1.2854, "nll_loss": 1.2199656963348389, "rewards/accuracies": 0.625, "rewards/chosen": -0.0927208662033081, "rewards/margins": 0.05607452988624573, "rewards/rejected": -0.14879539608955383, "step": 1390 }, { "epoch": 0.8485587921305475, "grad_norm": 2.0289089679718018, "learning_rate": 6.666503368034292e-06, "log_odds_chosen": 1.3279389142990112, "log_odds_ratio": -0.6300089359283447, "logits/chosen": -1.0866589546203613, "logits/rejected": -1.0934118032455444, "logps/chosen": -0.8027034997940063, "logps/rejected": -1.8548370599746704, "loss": 1.1704, "nll_loss": 1.0575848817825317, "rewards/accuracies": 0.5, "rewards/chosen": -0.08027034997940063, "rewards/margins": 0.10521335154771805, "rewards/rejected": -0.18548369407653809, "step": 1391 }, { "epoch": 0.8491688272075645, "grad_norm": 3.5656540393829346, "learning_rate": 6.665523576240048e-06, "log_odds_chosen": 1.217759132385254, "log_odds_ratio": -0.37892621755599976, "logits/chosen": -0.7053500413894653, "logits/rejected": -0.6104367971420288, "logps/chosen": -0.7265943884849548, "logps/rejected": -1.6150811910629272, "loss": 1.0129, "nll_loss": 0.9292305707931519, "rewards/accuracies": 0.875, "rewards/chosen": -0.0726594403386116, "rewards/margins": 0.08884866535663605, "rewards/rejected": -0.16150811314582825, "step": 1392 }, { "epoch": 0.8497788622845813, "grad_norm": 2.5078110694885254, "learning_rate": 6.664543784445805e-06, "log_odds_chosen": 0.729446291923523, "log_odds_ratio": -0.50773024559021, "logits/chosen": -0.9577001929283142, "logits/rejected": -0.9600822329521179, "logps/chosen": -0.9114651679992676, "logps/rejected": -1.3890516757965088, "loss": 0.9657, "nll_loss": 1.037530541419983, "rewards/accuracies": 0.75, "rewards/chosen": -0.09114652127027512, "rewards/margins": 0.04775865003466606, "rewards/rejected": -0.13890516757965088, "step": 1393 }, { "epoch": 0.8503888973615983, "grad_norm": 1.5702227354049683, "learning_rate": 6.663563992651561e-06, "log_odds_chosen": 0.5987696647644043, "log_odds_ratio": -0.6413683891296387, "logits/chosen": -0.7578064203262329, "logits/rejected": -0.8384538292884827, "logps/chosen": -0.7566162943840027, "logps/rejected": -1.0494494438171387, "loss": 1.1064, "nll_loss": 0.9248702526092529, "rewards/accuracies": 0.625, "rewards/chosen": -0.07566162943840027, "rewards/margins": 0.029283316805958748, "rewards/rejected": -0.10494494438171387, "step": 1394 }, { "epoch": 0.8509989324386152, "grad_norm": 3.07206392288208, "learning_rate": 6.662584200857318e-06, "log_odds_chosen": 1.9781607389450073, "log_odds_ratio": -0.2822903096675873, "logits/chosen": -0.8623048663139343, "logits/rejected": -0.9779251217842102, "logps/chosen": -0.7229533195495605, "logps/rejected": -2.1023850440979004, "loss": 1.1551, "nll_loss": 1.0423367023468018, "rewards/accuracies": 0.75, "rewards/chosen": -0.07229533046483994, "rewards/margins": 0.13794319331645966, "rewards/rejected": -0.210238516330719, "step": 1395 }, { "epoch": 0.8516089675156322, "grad_norm": 9.067244529724121, "learning_rate": 6.6616044090630736e-06, "log_odds_chosen": 0.9540286064147949, "log_odds_ratio": -0.5319257974624634, "logits/chosen": -0.7668647170066833, "logits/rejected": -1.072683334350586, "logps/chosen": -1.1790223121643066, "logps/rejected": -1.7548949718475342, "loss": 1.3168, "nll_loss": 1.5876500606536865, "rewards/accuracies": 0.75, "rewards/chosen": -0.1179022341966629, "rewards/margins": 0.05758725479245186, "rewards/rejected": -0.17548948526382446, "step": 1396 }, { "epoch": 0.852219002592649, "grad_norm": 1.487330436706543, "learning_rate": 6.66062461726883e-06, "log_odds_chosen": 1.510414719581604, "log_odds_ratio": -0.3687741458415985, "logits/chosen": -0.8598390817642212, "logits/rejected": -0.8794710636138916, "logps/chosen": -0.6566622257232666, "logps/rejected": -1.7310009002685547, "loss": 0.9413, "nll_loss": 0.9872356653213501, "rewards/accuracies": 0.75, "rewards/chosen": -0.06566622853279114, "rewards/margins": 0.10743386298418045, "rewards/rejected": -0.173100084066391, "step": 1397 }, { "epoch": 0.852829037669666, "grad_norm": 1.3967899084091187, "learning_rate": 6.659644825474587e-06, "log_odds_chosen": 0.9283736944198608, "log_odds_ratio": -0.5102983713150024, "logits/chosen": -1.0900522470474243, "logits/rejected": -1.1615149974822998, "logps/chosen": -0.8433291912078857, "logps/rejected": -1.4412310123443604, "loss": 1.1218, "nll_loss": 1.1703051328659058, "rewards/accuracies": 0.5, "rewards/chosen": -0.08433292806148529, "rewards/margins": 0.05979017913341522, "rewards/rejected": -0.14412309229373932, "step": 1398 }, { "epoch": 0.853439072746683, "grad_norm": 1.5336462259292603, "learning_rate": 6.658665033680343e-06, "log_odds_chosen": 0.4553503692150116, "log_odds_ratio": -0.6427475214004517, "logits/chosen": -1.102303147315979, "logits/rejected": -0.9375187158584595, "logps/chosen": -0.8130852580070496, "logps/rejected": -1.0689574480056763, "loss": 1.2186, "nll_loss": 0.948177695274353, "rewards/accuracies": 0.375, "rewards/chosen": -0.0813085287809372, "rewards/margins": 0.025587214156985283, "rewards/rejected": -0.10689574480056763, "step": 1399 }, { "epoch": 0.8540491078236999, "grad_norm": 1.1637450456619263, "learning_rate": 6.657685241886099e-06, "log_odds_chosen": 0.5034130215644836, "log_odds_ratio": -0.586217999458313, "logits/chosen": -0.8583945035934448, "logits/rejected": -0.9484912157058716, "logps/chosen": -0.9032069444656372, "logps/rejected": -1.244408130645752, "loss": 0.944, "nll_loss": 1.0849900245666504, "rewards/accuracies": 0.75, "rewards/chosen": -0.09032069891691208, "rewards/margins": 0.03412013500928879, "rewards/rejected": -0.12444083392620087, "step": 1400 }, { "epoch": 0.8546591429007168, "grad_norm": 1.4019323587417603, "learning_rate": 6.656705450091856e-06, "log_odds_chosen": 0.7906240820884705, "log_odds_ratio": -0.5503624677658081, "logits/chosen": -1.0666851997375488, "logits/rejected": -1.0882067680358887, "logps/chosen": -0.8154082298278809, "logps/rejected": -1.4685956239700317, "loss": 1.0616, "nll_loss": 0.9708826541900635, "rewards/accuracies": 0.625, "rewards/chosen": -0.08154082298278809, "rewards/margins": 0.06531873345375061, "rewards/rejected": -0.1468595564365387, "step": 1401 }, { "epoch": 0.8552691779777337, "grad_norm": 1.0775457620620728, "learning_rate": 6.655725658297611e-06, "log_odds_chosen": 1.1577508449554443, "log_odds_ratio": -0.499441921710968, "logits/chosen": -1.0614447593688965, "logits/rejected": -1.1550891399383545, "logps/chosen": -0.7440598607063293, "logps/rejected": -1.5740574598312378, "loss": 1.107, "nll_loss": 0.8827574849128723, "rewards/accuracies": 0.75, "rewards/chosen": -0.07440599054098129, "rewards/margins": 0.08299976587295532, "rewards/rejected": -0.1574057638645172, "step": 1402 }, { "epoch": 0.8558792130547507, "grad_norm": 1.31891930103302, "learning_rate": 6.654745866503367e-06, "log_odds_chosen": 0.0909833088517189, "log_odds_ratio": -0.6947644948959351, "logits/chosen": -0.9636657238006592, "logits/rejected": -0.90308678150177, "logps/chosen": -0.6236017942428589, "logps/rejected": -0.6356879472732544, "loss": 1.178, "nll_loss": 0.9666578769683838, "rewards/accuracies": 0.375, "rewards/chosen": -0.06236018240451813, "rewards/margins": 0.0012086108326911926, "rewards/rejected": -0.06356879323720932, "step": 1403 }, { "epoch": 0.8564892481317675, "grad_norm": 1.8394914865493774, "learning_rate": 6.653766074709124e-06, "log_odds_chosen": 0.25728410482406616, "log_odds_ratio": -0.7033402919769287, "logits/chosen": -1.048463225364685, "logits/rejected": -0.9525495767593384, "logps/chosen": -1.0870076417922974, "logps/rejected": -1.2612606287002563, "loss": 1.3142, "nll_loss": 1.25124192237854, "rewards/accuracies": 0.5, "rewards/chosen": -0.10870076715946198, "rewards/margins": 0.017425302416086197, "rewards/rejected": -0.12612606585025787, "step": 1404 }, { "epoch": 0.8570992832087845, "grad_norm": 1.5275497436523438, "learning_rate": 6.65278628291488e-06, "log_odds_chosen": 0.29322436451911926, "log_odds_ratio": -0.7279667854309082, "logits/chosen": -0.9469975233078003, "logits/rejected": -1.0085062980651855, "logps/chosen": -1.131829023361206, "logps/rejected": -1.314065933227539, "loss": 1.1476, "nll_loss": 1.0850077867507935, "rewards/accuracies": 0.375, "rewards/chosen": -0.1131829023361206, "rewards/margins": 0.01822369545698166, "rewards/rejected": -0.13140660524368286, "step": 1405 }, { "epoch": 0.8577093182858014, "grad_norm": 2.543642520904541, "learning_rate": 6.6518064911206365e-06, "log_odds_chosen": 0.5656499862670898, "log_odds_ratio": -0.6066392064094543, "logits/chosen": -1.1029748916625977, "logits/rejected": -1.0422389507293701, "logps/chosen": -0.9584228992462158, "logps/rejected": -1.4482884407043457, "loss": 1.1066, "nll_loss": 1.082896113395691, "rewards/accuracies": 0.625, "rewards/chosen": -0.09584228694438934, "rewards/margins": 0.04898654296994209, "rewards/rejected": -0.14482884109020233, "step": 1406 }, { "epoch": 0.8583193533628184, "grad_norm": 3.617933750152588, "learning_rate": 6.650826699326393e-06, "log_odds_chosen": 1.2622216939926147, "log_odds_ratio": -0.5006223320960999, "logits/chosen": -1.049891710281372, "logits/rejected": -1.0989632606506348, "logps/chosen": -0.9135825037956238, "logps/rejected": -1.843172550201416, "loss": 1.105, "nll_loss": 1.0182075500488281, "rewards/accuracies": 0.875, "rewards/chosen": -0.0913582444190979, "rewards/margins": 0.09295900911092758, "rewards/rejected": -0.18431724607944489, "step": 1407 }, { "epoch": 0.8589293884398352, "grad_norm": 1.7354941368103027, "learning_rate": 6.649846907532149e-06, "log_odds_chosen": 0.4535694122314453, "log_odds_ratio": -0.6233372092247009, "logits/chosen": -0.9924995303153992, "logits/rejected": -0.9176925420761108, "logps/chosen": -0.7551907300949097, "logps/rejected": -1.0240193605422974, "loss": 1.1227, "nll_loss": 0.9614284038543701, "rewards/accuracies": 0.5, "rewards/chosen": -0.07551907747983932, "rewards/margins": 0.026882866397500038, "rewards/rejected": -0.10240194946527481, "step": 1408 }, { "epoch": 0.8595394235168522, "grad_norm": 1.477320671081543, "learning_rate": 6.648867115737905e-06, "log_odds_chosen": 0.345610111951828, "log_odds_ratio": -0.627954363822937, "logits/chosen": -0.8113425374031067, "logits/rejected": -0.8074018359184265, "logps/chosen": -1.0351145267486572, "logps/rejected": -1.217565894126892, "loss": 1.0978, "nll_loss": 1.012820839881897, "rewards/accuracies": 0.625, "rewards/chosen": -0.10351146012544632, "rewards/margins": 0.018245123326778412, "rewards/rejected": -0.12175658345222473, "step": 1409 }, { "epoch": 0.8601494585938692, "grad_norm": 1.3391507863998413, "learning_rate": 6.647887323943662e-06, "log_odds_chosen": 0.7371962070465088, "log_odds_ratio": -0.6159670352935791, "logits/chosen": -0.9143090844154358, "logits/rejected": -1.016645073890686, "logps/chosen": -0.9949612021446228, "logps/rejected": -1.3263719081878662, "loss": 1.0801, "nll_loss": 1.013930320739746, "rewards/accuracies": 0.5, "rewards/chosen": -0.09949611872434616, "rewards/margins": 0.03314107656478882, "rewards/rejected": -0.13263720273971558, "step": 1410 }, { "epoch": 0.8607594936708861, "grad_norm": 1.8829056024551392, "learning_rate": 6.646907532149418e-06, "log_odds_chosen": 1.5134997367858887, "log_odds_ratio": -0.3445639908313751, "logits/chosen": -0.9785038828849792, "logits/rejected": -1.1949009895324707, "logps/chosen": -0.7454254031181335, "logps/rejected": -1.7816286087036133, "loss": 1.1787, "nll_loss": 1.1416831016540527, "rewards/accuracies": 0.75, "rewards/chosen": -0.07454253733158112, "rewards/margins": 0.10362032800912857, "rewards/rejected": -0.17816287279129028, "step": 1411 }, { "epoch": 0.861369528747903, "grad_norm": 2.22076153755188, "learning_rate": 6.645927740355175e-06, "log_odds_chosen": 1.0612937211990356, "log_odds_ratio": -0.6115090847015381, "logits/chosen": -0.9215760231018066, "logits/rejected": -1.011976957321167, "logps/chosen": -0.9321799278259277, "logps/rejected": -1.6046142578125, "loss": 1.1713, "nll_loss": 1.2675625085830688, "rewards/accuracies": 0.625, "rewards/chosen": -0.09321799874305725, "rewards/margins": 0.06724344193935394, "rewards/rejected": -0.1604614406824112, "step": 1412 }, { "epoch": 0.8619795638249199, "grad_norm": 3.6376001834869385, "learning_rate": 6.64494794856093e-06, "log_odds_chosen": 0.20309001207351685, "log_odds_ratio": -0.7353938221931458, "logits/chosen": -1.1213096380233765, "logits/rejected": -0.8526062369346619, "logps/chosen": -0.9442927837371826, "logps/rejected": -0.9655027985572815, "loss": 1.1256, "nll_loss": 1.260651707649231, "rewards/accuracies": 0.5, "rewards/chosen": -0.09442928433418274, "rewards/margins": 0.0021210000850260258, "rewards/rejected": -0.09655027836561203, "step": 1413 }, { "epoch": 0.8625895989019369, "grad_norm": 2.9083046913146973, "learning_rate": 6.643968156766686e-06, "log_odds_chosen": 1.3177706003189087, "log_odds_ratio": -0.3682430684566498, "logits/chosen": -0.8261640071868896, "logits/rejected": -0.9171598553657532, "logps/chosen": -0.6811001896858215, "logps/rejected": -1.5820796489715576, "loss": 1.0423, "nll_loss": 0.8395195603370667, "rewards/accuracies": 0.875, "rewards/chosen": -0.06811001896858215, "rewards/margins": 0.09009794890880585, "rewards/rejected": -0.158207967877388, "step": 1414 }, { "epoch": 0.8631996339789538, "grad_norm": 1.5853326320648193, "learning_rate": 6.642988364972443e-06, "log_odds_chosen": 0.6280058026313782, "log_odds_ratio": -0.6216966509819031, "logits/chosen": -0.9278050065040588, "logits/rejected": -1.138102650642395, "logps/chosen": -1.066051721572876, "logps/rejected": -1.461315393447876, "loss": 1.1192, "nll_loss": 1.137573003768921, "rewards/accuracies": 0.625, "rewards/chosen": -0.1066051721572876, "rewards/margins": 0.03952636569738388, "rewards/rejected": -0.14613154530525208, "step": 1415 }, { "epoch": 0.8638096690559707, "grad_norm": 1.3736352920532227, "learning_rate": 6.642008573178199e-06, "log_odds_chosen": 0.492732971906662, "log_odds_ratio": -0.7501434087753296, "logits/chosen": -1.1154292821884155, "logits/rejected": -1.0291799306869507, "logps/chosen": -1.1597959995269775, "logps/rejected": -1.542145848274231, "loss": 1.1737, "nll_loss": 1.3570241928100586, "rewards/accuracies": 0.5, "rewards/chosen": -0.11597960442304611, "rewards/margins": 0.03823498263955116, "rewards/rejected": -0.15421459078788757, "step": 1416 }, { "epoch": 0.8644197041329876, "grad_norm": 1.246242880821228, "learning_rate": 6.6410287813839555e-06, "log_odds_chosen": 0.5045374631881714, "log_odds_ratio": -0.6270843744277954, "logits/chosen": -1.009769320487976, "logits/rejected": -1.0065747499465942, "logps/chosen": -0.8753255009651184, "logps/rejected": -1.1002675294876099, "loss": 1.2282, "nll_loss": 1.0144973993301392, "rewards/accuracies": 0.625, "rewards/chosen": -0.08753255009651184, "rewards/margins": 0.022494208067655563, "rewards/rejected": -0.1100267618894577, "step": 1417 }, { "epoch": 0.8650297392100046, "grad_norm": 1.0811042785644531, "learning_rate": 6.6400489895897125e-06, "log_odds_chosen": 0.4091089367866516, "log_odds_ratio": -0.6336852312088013, "logits/chosen": -1.037063479423523, "logits/rejected": -1.0149805545806885, "logps/chosen": -1.1169226169586182, "logps/rejected": -1.3753196001052856, "loss": 1.1214, "nll_loss": 1.4286320209503174, "rewards/accuracies": 0.625, "rewards/chosen": -0.11169225722551346, "rewards/margins": 0.025839712470769882, "rewards/rejected": -0.13753198087215424, "step": 1418 }, { "epoch": 0.8656397742870215, "grad_norm": 2.2183990478515625, "learning_rate": 6.639069197795468e-06, "log_odds_chosen": 2.2391669750213623, "log_odds_ratio": -0.2614355683326721, "logits/chosen": -0.8089970350265503, "logits/rejected": -0.9041407108306885, "logps/chosen": -0.6164162158966064, "logps/rejected": -2.1762170791625977, "loss": 1.0824, "nll_loss": 0.8182101249694824, "rewards/accuracies": 0.75, "rewards/chosen": -0.061641618609428406, "rewards/margins": 0.15598008036613464, "rewards/rejected": -0.21762171387672424, "step": 1419 }, { "epoch": 0.8662498093640384, "grad_norm": 0.8790768980979919, "learning_rate": 6.638089406001224e-06, "log_odds_chosen": 0.7104777693748474, "log_odds_ratio": -0.5415405035018921, "logits/chosen": -0.6238994598388672, "logits/rejected": -0.7807844877243042, "logps/chosen": -0.6557489037513733, "logps/rejected": -1.022188663482666, "loss": 0.9824, "nll_loss": 0.8879281282424927, "rewards/accuracies": 0.75, "rewards/chosen": -0.06557489186525345, "rewards/margins": 0.03664398565888405, "rewards/rejected": -0.1022188737988472, "step": 1420 }, { "epoch": 0.8668598444410553, "grad_norm": 5.925572872161865, "learning_rate": 6.637109614206981e-06, "log_odds_chosen": 0.6937425136566162, "log_odds_ratio": -0.519892692565918, "logits/chosen": -0.8905079364776611, "logits/rejected": -0.9043022394180298, "logps/chosen": -0.8801860809326172, "logps/rejected": -1.3264076709747314, "loss": 1.0897, "nll_loss": 1.0686798095703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.08801861107349396, "rewards/margins": 0.04462215676903725, "rewards/rejected": -0.1326407790184021, "step": 1421 }, { "epoch": 0.8674698795180723, "grad_norm": 4.009912967681885, "learning_rate": 6.636129822412737e-06, "log_odds_chosen": 0.4262439012527466, "log_odds_ratio": -0.5926772952079773, "logits/chosen": -1.0054566860198975, "logits/rejected": -0.9754514694213867, "logps/chosen": -0.8240996599197388, "logps/rejected": -1.0513520240783691, "loss": 1.2505, "nll_loss": 1.0930492877960205, "rewards/accuracies": 0.625, "rewards/chosen": -0.08240996301174164, "rewards/margins": 0.022725237533450127, "rewards/rejected": -0.10513519495725632, "step": 1422 }, { "epoch": 0.8680799145950893, "grad_norm": 3.1495542526245117, "learning_rate": 6.635150030618494e-06, "log_odds_chosen": 0.9092183709144592, "log_odds_ratio": -0.5660773515701294, "logits/chosen": -0.9962736964225769, "logits/rejected": -0.9872628450393677, "logps/chosen": -0.7609419822692871, "logps/rejected": -1.5065813064575195, "loss": 1.2451, "nll_loss": 1.2041573524475098, "rewards/accuracies": 0.625, "rewards/chosen": -0.07609420269727707, "rewards/margins": 0.07456392049789429, "rewards/rejected": -0.15065813064575195, "step": 1423 }, { "epoch": 0.8686899496721061, "grad_norm": 1.8463935852050781, "learning_rate": 6.63417023882425e-06, "log_odds_chosen": 0.42278721928596497, "log_odds_ratio": -0.6485533118247986, "logits/chosen": -1.1470876932144165, "logits/rejected": -1.049983263015747, "logps/chosen": -0.9596710801124573, "logps/rejected": -1.2357550859451294, "loss": 1.0893, "nll_loss": 1.133811354637146, "rewards/accuracies": 0.625, "rewards/chosen": -0.09596709907054901, "rewards/margins": 0.02760840579867363, "rewards/rejected": -0.12357550859451294, "step": 1424 }, { "epoch": 0.8692999847491231, "grad_norm": 1.3780888319015503, "learning_rate": 6.633190447030005e-06, "log_odds_chosen": 0.1328796148300171, "log_odds_ratio": -0.6859986782073975, "logits/chosen": -1.083580732345581, "logits/rejected": -0.9085127115249634, "logps/chosen": -0.9017292261123657, "logps/rejected": -1.0041598081588745, "loss": 1.1619, "nll_loss": 1.1104295253753662, "rewards/accuracies": 0.5, "rewards/chosen": -0.09017292410135269, "rewards/margins": 0.010243050754070282, "rewards/rejected": -0.10041597485542297, "step": 1425 }, { "epoch": 0.86991001982614, "grad_norm": 1.7548508644104004, "learning_rate": 6.632210655235762e-06, "log_odds_chosen": 0.8049589991569519, "log_odds_ratio": -0.501284122467041, "logits/chosen": -0.7069745659828186, "logits/rejected": -0.7863433361053467, "logps/chosen": -0.8269559144973755, "logps/rejected": -1.3067009449005127, "loss": 1.1097, "nll_loss": 0.9178255796432495, "rewards/accuracies": 0.875, "rewards/chosen": -0.08269559592008591, "rewards/margins": 0.04797450825572014, "rewards/rejected": -0.13067010045051575, "step": 1426 }, { "epoch": 0.870520054903157, "grad_norm": 1.1805038452148438, "learning_rate": 6.6312308634415184e-06, "log_odds_chosen": 0.8521842956542969, "log_odds_ratio": -0.47474759817123413, "logits/chosen": -0.617754340171814, "logits/rejected": -0.6846123933792114, "logps/chosen": -0.6283992528915405, "logps/rejected": -1.1226415634155273, "loss": 1.0757, "nll_loss": 0.8119843006134033, "rewards/accuracies": 0.75, "rewards/chosen": -0.06283992528915405, "rewards/margins": 0.04942423477768898, "rewards/rejected": -0.11226416379213333, "step": 1427 }, { "epoch": 0.8711300899801738, "grad_norm": 5.162349700927734, "learning_rate": 6.6302510716472746e-06, "log_odds_chosen": 0.7169758081436157, "log_odds_ratio": -0.5618600845336914, "logits/chosen": -1.0969191789627075, "logits/rejected": -1.0667243003845215, "logps/chosen": -1.1262116432189941, "logps/rejected": -1.6969070434570312, "loss": 1.1034, "nll_loss": 1.3240926265716553, "rewards/accuracies": 0.875, "rewards/chosen": -0.11262115836143494, "rewards/margins": 0.05706954747438431, "rewards/rejected": -0.16969069838523865, "step": 1428 }, { "epoch": 0.8717401250571908, "grad_norm": 1.1437389850616455, "learning_rate": 6.6292712798530315e-06, "log_odds_chosen": 1.447800636291504, "log_odds_ratio": -0.461387038230896, "logits/chosen": -0.9336048364639282, "logits/rejected": -0.9991939067840576, "logps/chosen": -0.685429573059082, "logps/rejected": -1.556382417678833, "loss": 0.9971, "nll_loss": 0.9606143236160278, "rewards/accuracies": 0.625, "rewards/chosen": -0.0685429647564888, "rewards/margins": 0.08709528297185898, "rewards/rejected": -0.15563823282718658, "step": 1429 }, { "epoch": 0.8723501601342077, "grad_norm": 1.3458170890808105, "learning_rate": 6.628291488058787e-06, "log_odds_chosen": 0.7277716994285583, "log_odds_ratio": -0.5899659991264343, "logits/chosen": -0.9511935114860535, "logits/rejected": -0.8634399175643921, "logps/chosen": -0.7887856960296631, "logps/rejected": -1.2764626741409302, "loss": 1.1563, "nll_loss": 1.0187716484069824, "rewards/accuracies": 0.625, "rewards/chosen": -0.07887856662273407, "rewards/margins": 0.04876770079135895, "rewards/rejected": -0.12764626741409302, "step": 1430 }, { "epoch": 0.8729601952112247, "grad_norm": 4.020300388336182, "learning_rate": 6.627311696264543e-06, "log_odds_chosen": 0.8511129021644592, "log_odds_ratio": -0.505054235458374, "logits/chosen": -1.0255378484725952, "logits/rejected": -1.0873823165893555, "logps/chosen": -0.8113240003585815, "logps/rejected": -1.378210425376892, "loss": 1.0916, "nll_loss": 1.0530942678451538, "rewards/accuracies": 0.625, "rewards/chosen": -0.08113239705562592, "rewards/margins": 0.05668864771723747, "rewards/rejected": -0.1378210484981537, "step": 1431 }, { "epoch": 0.8735702302882415, "grad_norm": 1.1075435876846313, "learning_rate": 6.6263319044703e-06, "log_odds_chosen": 1.2739272117614746, "log_odds_ratio": -0.47088074684143066, "logits/chosen": -0.7292001843452454, "logits/rejected": -0.7962381839752197, "logps/chosen": -0.6665883660316467, "logps/rejected": -1.502817153930664, "loss": 1.0097, "nll_loss": 0.9096221327781677, "rewards/accuracies": 0.75, "rewards/chosen": -0.06665883213281631, "rewards/margins": 0.08362287282943726, "rewards/rejected": -0.15028171241283417, "step": 1432 }, { "epoch": 0.8741802653652585, "grad_norm": 1.4790749549865723, "learning_rate": 6.625352112676056e-06, "log_odds_chosen": 1.1375117301940918, "log_odds_ratio": -0.42694368958473206, "logits/chosen": -0.7259787321090698, "logits/rejected": -0.7913621068000793, "logps/chosen": -0.5732370615005493, "logps/rejected": -1.2395542860031128, "loss": 0.9012, "nll_loss": 0.7446396350860596, "rewards/accuracies": 0.75, "rewards/chosen": -0.05732370913028717, "rewards/margins": 0.06663171947002411, "rewards/rejected": -0.12395542860031128, "step": 1433 }, { "epoch": 0.8747903004422755, "grad_norm": 1.5363365411758423, "learning_rate": 6.624372320881812e-06, "log_odds_chosen": 1.7251157760620117, "log_odds_ratio": -0.47539907693862915, "logits/chosen": -0.8876974582672119, "logits/rejected": -0.8368291854858398, "logps/chosen": -0.8647202253341675, "logps/rejected": -2.264643907546997, "loss": 1.1422, "nll_loss": 1.0018306970596313, "rewards/accuracies": 0.75, "rewards/chosen": -0.0864720270037651, "rewards/margins": 0.139992356300354, "rewards/rejected": -0.2264643907546997, "step": 1434 }, { "epoch": 0.8754003355192923, "grad_norm": 1.344645619392395, "learning_rate": 6.623392529087569e-06, "log_odds_chosen": 1.2022629976272583, "log_odds_ratio": -0.4455980062484741, "logits/chosen": -0.8856435418128967, "logits/rejected": -0.956557035446167, "logps/chosen": -0.6641167402267456, "logps/rejected": -1.4865202903747559, "loss": 1.0737, "nll_loss": 0.8400264978408813, "rewards/accuracies": 0.625, "rewards/chosen": -0.06641167402267456, "rewards/margins": 0.08224035799503326, "rewards/rejected": -0.14865203201770782, "step": 1435 }, { "epoch": 0.8760103705963093, "grad_norm": 3.6934101581573486, "learning_rate": 6.622412737293324e-06, "log_odds_chosen": 0.3447827696800232, "log_odds_ratio": -0.6099939942359924, "logits/chosen": -0.9401041269302368, "logits/rejected": -0.8533219695091248, "logps/chosen": -0.8388278484344482, "logps/rejected": -1.0717417001724243, "loss": 1.1776, "nll_loss": 0.9381722807884216, "rewards/accuracies": 0.5, "rewards/chosen": -0.08388278633356094, "rewards/margins": 0.02329137735068798, "rewards/rejected": -0.10717417299747467, "step": 1436 }, { "epoch": 0.8766204056733262, "grad_norm": 1.836344838142395, "learning_rate": 6.621432945499081e-06, "log_odds_chosen": 0.06277982890605927, "log_odds_ratio": -0.7153537273406982, "logits/chosen": -1.2540465593338013, "logits/rejected": -1.1434736251831055, "logps/chosen": -1.0227324962615967, "logps/rejected": -1.0423966646194458, "loss": 1.1732, "nll_loss": 1.3174307346343994, "rewards/accuracies": 0.375, "rewards/chosen": -0.10227325558662415, "rewards/margins": 0.0019664065912365913, "rewards/rejected": -0.10423965752124786, "step": 1437 }, { "epoch": 0.8772304407503432, "grad_norm": 3.2788333892822266, "learning_rate": 6.6204531537048375e-06, "log_odds_chosen": -0.15721291303634644, "log_odds_ratio": -0.8972392678260803, "logits/chosen": -1.0352146625518799, "logits/rejected": -0.9641487002372742, "logps/chosen": -1.1314570903778076, "logps/rejected": -1.0243077278137207, "loss": 1.1529, "nll_loss": 1.3043025732040405, "rewards/accuracies": 0.375, "rewards/chosen": -0.11314570903778076, "rewards/margins": -0.010714935138821602, "rewards/rejected": -0.10243077576160431, "step": 1438 }, { "epoch": 0.87784047582736, "grad_norm": 1.6840189695358276, "learning_rate": 6.619473361910594e-06, "log_odds_chosen": 1.6996548175811768, "log_odds_ratio": -0.31039389967918396, "logits/chosen": -0.7963988184928894, "logits/rejected": -0.8531326055526733, "logps/chosen": -0.6951149106025696, "logps/rejected": -1.894774079322815, "loss": 1.057, "nll_loss": 0.9785455465316772, "rewards/accuracies": 0.875, "rewards/chosen": -0.06951149553060532, "rewards/margins": 0.11996591091156006, "rewards/rejected": -0.18947741389274597, "step": 1439 }, { "epoch": 0.878450510904377, "grad_norm": 1.3148441314697266, "learning_rate": 6.6184935701163506e-06, "log_odds_chosen": 0.12762294709682465, "log_odds_ratio": -0.6816147565841675, "logits/chosen": -1.2452893257141113, "logits/rejected": -1.206868052482605, "logps/chosen": -1.038940668106079, "logps/rejected": -1.149592638015747, "loss": 1.2821, "nll_loss": 1.3733394145965576, "rewards/accuracies": 0.625, "rewards/chosen": -0.10389406979084015, "rewards/margins": 0.01106519065797329, "rewards/rejected": -0.11495926976203918, "step": 1440 }, { "epoch": 0.8790605459813939, "grad_norm": 1.6326403617858887, "learning_rate": 6.617513778322107e-06, "log_odds_chosen": 0.7414739727973938, "log_odds_ratio": -0.5106199979782104, "logits/chosen": -1.215639352798462, "logits/rejected": -1.1712396144866943, "logps/chosen": -1.124226689338684, "logps/rejected": -1.708376169204712, "loss": 1.1757, "nll_loss": 1.329075574874878, "rewards/accuracies": 0.75, "rewards/chosen": -0.11242267489433289, "rewards/margins": 0.058414943516254425, "rewards/rejected": -0.1708376109600067, "step": 1441 }, { "epoch": 0.8796705810584109, "grad_norm": 2.521141529083252, "learning_rate": 6.616533986527862e-06, "log_odds_chosen": 2.0779805183410645, "log_odds_ratio": -0.25599905848503113, "logits/chosen": -0.8977345824241638, "logits/rejected": -0.9311666488647461, "logps/chosen": -0.6846802830696106, "logps/rejected": -2.1953444480895996, "loss": 1.0014, "nll_loss": 0.9198470115661621, "rewards/accuracies": 0.875, "rewards/chosen": -0.06846802681684494, "rewards/margins": 0.15106642246246338, "rewards/rejected": -0.21953445672988892, "step": 1442 }, { "epoch": 0.8802806161354277, "grad_norm": 3.939213514328003, "learning_rate": 6.615554194733619e-06, "log_odds_chosen": 0.38783377408981323, "log_odds_ratio": -0.5856831669807434, "logits/chosen": -1.2440919876098633, "logits/rejected": -1.1088820695877075, "logps/chosen": -1.1025888919830322, "logps/rejected": -1.391247272491455, "loss": 1.2916, "nll_loss": 1.1423767805099487, "rewards/accuracies": 0.75, "rewards/chosen": -0.11025889217853546, "rewards/margins": 0.028865832835435867, "rewards/rejected": -0.13912472128868103, "step": 1443 }, { "epoch": 0.8808906512124447, "grad_norm": 1.54022216796875, "learning_rate": 6.614574402939375e-06, "log_odds_chosen": 1.296541452407837, "log_odds_ratio": -0.4865361452102661, "logits/chosen": -1.1581511497497559, "logits/rejected": -1.0887945890426636, "logps/chosen": -0.9389630556106567, "logps/rejected": -1.810779333114624, "loss": 1.2119, "nll_loss": 1.4128490686416626, "rewards/accuracies": 0.875, "rewards/chosen": -0.09389631450176239, "rewards/margins": 0.08718161284923553, "rewards/rejected": -0.18107792735099792, "step": 1444 }, { "epoch": 0.8815006862894617, "grad_norm": 2.601238965988159, "learning_rate": 6.613594611145131e-06, "log_odds_chosen": 0.7674190402030945, "log_odds_ratio": -0.4112587869167328, "logits/chosen": -1.0501583814620972, "logits/rejected": -0.9886091947555542, "logps/chosen": -0.9007179737091064, "logps/rejected": -1.4337129592895508, "loss": 0.9834, "nll_loss": 1.1535590887069702, "rewards/accuracies": 1.0, "rewards/chosen": -0.09007179737091064, "rewards/margins": 0.053299497812986374, "rewards/rejected": -0.14337129890918732, "step": 1445 }, { "epoch": 0.8821107213664786, "grad_norm": 1.6328588724136353, "learning_rate": 6.612614819350888e-06, "log_odds_chosen": 1.3005367517471313, "log_odds_ratio": -0.46246862411499023, "logits/chosen": -0.8200477361679077, "logits/rejected": -0.9174585342407227, "logps/chosen": -0.6929560899734497, "logps/rejected": -1.5564663410186768, "loss": 1.0646, "nll_loss": 1.0110119581222534, "rewards/accuracies": 0.625, "rewards/chosen": -0.06929560750722885, "rewards/margins": 0.08635102957487106, "rewards/rejected": -0.15564662218093872, "step": 1446 }, { "epoch": 0.8827207564434955, "grad_norm": 2.425339937210083, "learning_rate": 6.611635027556643e-06, "log_odds_chosen": 2.832791328430176, "log_odds_ratio": -0.2677805721759796, "logits/chosen": -0.7771764993667603, "logits/rejected": -0.8914507627487183, "logps/chosen": -0.5500641465187073, "logps/rejected": -2.5341949462890625, "loss": 1.007, "nll_loss": 0.6522924304008484, "rewards/accuracies": 0.875, "rewards/chosen": -0.055006418377161026, "rewards/margins": 0.19841307401657104, "rewards/rejected": -0.25341951847076416, "step": 1447 }, { "epoch": 0.8833307915205124, "grad_norm": 1.4951306581497192, "learning_rate": 6.6106552357623995e-06, "log_odds_chosen": 0.5148866176605225, "log_odds_ratio": -0.5808138251304626, "logits/chosen": -0.8179311752319336, "logits/rejected": -0.7891422510147095, "logps/chosen": -0.968926727771759, "logps/rejected": -1.314605951309204, "loss": 1.0624, "nll_loss": 0.9737633466720581, "rewards/accuracies": 0.625, "rewards/chosen": -0.09689266979694366, "rewards/margins": 0.034567926079034805, "rewards/rejected": -0.13146060705184937, "step": 1448 }, { "epoch": 0.8839408265975294, "grad_norm": 1.5768601894378662, "learning_rate": 6.6096754439681565e-06, "log_odds_chosen": 0.7998414039611816, "log_odds_ratio": -0.4326365888118744, "logits/chosen": -1.1081814765930176, "logits/rejected": -1.019696593284607, "logps/chosen": -0.7228367328643799, "logps/rejected": -1.223563313484192, "loss": 0.9988, "nll_loss": 1.0316509008407593, "rewards/accuracies": 0.875, "rewards/chosen": -0.07228367775678635, "rewards/margins": 0.05007265880703926, "rewards/rejected": -0.1223563402891159, "step": 1449 }, { "epoch": 0.8845508616745463, "grad_norm": 1.2888704538345337, "learning_rate": 6.608695652173913e-06, "log_odds_chosen": 2.0576224327087402, "log_odds_ratio": -0.2858836054801941, "logits/chosen": -0.9650793671607971, "logits/rejected": -1.0442094802856445, "logps/chosen": -0.8197263479232788, "logps/rejected": -2.2706005573272705, "loss": 1.0454, "nll_loss": 1.2286438941955566, "rewards/accuracies": 0.875, "rewards/chosen": -0.0819726288318634, "rewards/margins": 0.14508742094039917, "rewards/rejected": -0.22706004977226257, "step": 1450 }, { "epoch": 0.8851608967515632, "grad_norm": 1.3084216117858887, "learning_rate": 6.60771586037967e-06, "log_odds_chosen": 0.8410958647727966, "log_odds_ratio": -0.5924519300460815, "logits/chosen": -1.1964012384414673, "logits/rejected": -1.1634986400604248, "logps/chosen": -0.8949893712997437, "logps/rejected": -1.499135971069336, "loss": 1.0739, "nll_loss": 1.0379356145858765, "rewards/accuracies": 0.625, "rewards/chosen": -0.08949893712997437, "rewards/margins": 0.06041465699672699, "rewards/rejected": -0.14991357922554016, "step": 1451 }, { "epoch": 0.8857709318285801, "grad_norm": 14.084274291992188, "learning_rate": 6.606736068585426e-06, "log_odds_chosen": 1.2564176321029663, "log_odds_ratio": -0.40925508737564087, "logits/chosen": -0.8579257130622864, "logits/rejected": -0.8581188917160034, "logps/chosen": -0.7293090224266052, "logps/rejected": -1.6322733163833618, "loss": 1.0039, "nll_loss": 0.8226028084754944, "rewards/accuracies": 0.75, "rewards/chosen": -0.07293090224266052, "rewards/margins": 0.0902964323759079, "rewards/rejected": -0.16322734951972961, "step": 1452 }, { "epoch": 0.8863809669055971, "grad_norm": 1.8842936754226685, "learning_rate": 6.605756276791181e-06, "log_odds_chosen": 2.017665147781372, "log_odds_ratio": -0.29848065972328186, "logits/chosen": -1.1504926681518555, "logits/rejected": -1.1278566122055054, "logps/chosen": -0.7729690074920654, "logps/rejected": -2.298342227935791, "loss": 1.2314, "nll_loss": 1.2340099811553955, "rewards/accuracies": 0.875, "rewards/chosen": -0.0772968977689743, "rewards/margins": 0.15253733098506927, "rewards/rejected": -0.22983422875404358, "step": 1453 }, { "epoch": 0.886991001982614, "grad_norm": 3.0539612770080566, "learning_rate": 6.604776484996938e-06, "log_odds_chosen": 1.221775770187378, "log_odds_ratio": -0.4563359022140503, "logits/chosen": -0.9907004833221436, "logits/rejected": -0.9068528413772583, "logps/chosen": -0.8051220178604126, "logps/rejected": -1.6691219806671143, "loss": 1.1371, "nll_loss": 0.965421199798584, "rewards/accuracies": 0.75, "rewards/chosen": -0.08051219582557678, "rewards/margins": 0.08640001714229584, "rewards/rejected": -0.16691219806671143, "step": 1454 }, { "epoch": 0.8876010370596309, "grad_norm": 2.442335844039917, "learning_rate": 6.603796693202694e-06, "log_odds_chosen": 1.282627820968628, "log_odds_ratio": -0.4525657296180725, "logits/chosen": -1.1277563571929932, "logits/rejected": -1.1459205150604248, "logps/chosen": -0.8360750675201416, "logps/rejected": -1.846392273902893, "loss": 1.2033, "nll_loss": 1.1456900835037231, "rewards/accuracies": 0.75, "rewards/chosen": -0.0836075097322464, "rewards/margins": 0.10103172808885574, "rewards/rejected": -0.18463924527168274, "step": 1455 }, { "epoch": 0.8882110721366479, "grad_norm": 2.1543445587158203, "learning_rate": 6.60281690140845e-06, "log_odds_chosen": 0.804595947265625, "log_odds_ratio": -0.5893965363502502, "logits/chosen": -1.0011030435562134, "logits/rejected": -0.9886761903762817, "logps/chosen": -0.8941909670829773, "logps/rejected": -1.5818750858306885, "loss": 1.2124, "nll_loss": 0.953136146068573, "rewards/accuracies": 0.75, "rewards/chosen": -0.08941909670829773, "rewards/margins": 0.06876843422651291, "rewards/rejected": -0.15818752348423004, "step": 1456 }, { "epoch": 0.8888211072136648, "grad_norm": 2.486483573913574, "learning_rate": 6.601837109614207e-06, "log_odds_chosen": 2.101213216781616, "log_odds_ratio": -0.27420327067375183, "logits/chosen": -0.8335511684417725, "logits/rejected": -0.7033529281616211, "logps/chosen": -0.5896657705307007, "logps/rejected": -2.0989203453063965, "loss": 0.926, "nll_loss": 0.7966701984405518, "rewards/accuracies": 1.0, "rewards/chosen": -0.05896657332777977, "rewards/margins": 0.15092548727989197, "rewards/rejected": -0.20989204943180084, "step": 1457 }, { "epoch": 0.8894311422906818, "grad_norm": 2.038106679916382, "learning_rate": 6.600857317819963e-06, "log_odds_chosen": 0.4881751239299774, "log_odds_ratio": -0.5670891404151917, "logits/chosen": -1.0684807300567627, "logits/rejected": -1.132473111152649, "logps/chosen": -1.0125036239624023, "logps/rejected": -1.3539944887161255, "loss": 1.1616, "nll_loss": 1.2635051012039185, "rewards/accuracies": 0.5, "rewards/chosen": -0.10125035792589188, "rewards/margins": 0.03414908051490784, "rewards/rejected": -0.1353994458913803, "step": 1458 }, { "epoch": 0.8900411773676986, "grad_norm": 1.891555666923523, "learning_rate": 6.599877526025719e-06, "log_odds_chosen": 1.0582263469696045, "log_odds_ratio": -0.5112255215644836, "logits/chosen": -1.1480002403259277, "logits/rejected": -1.2051210403442383, "logps/chosen": -0.8406908512115479, "logps/rejected": -1.5297822952270508, "loss": 1.1144, "nll_loss": 1.08251953125, "rewards/accuracies": 0.625, "rewards/chosen": -0.08406908810138702, "rewards/margins": 0.06890915334224701, "rewards/rejected": -0.15297824144363403, "step": 1459 }, { "epoch": 0.8906512124447156, "grad_norm": 1.7149081230163574, "learning_rate": 6.5988977342314756e-06, "log_odds_chosen": 0.14672282338142395, "log_odds_ratio": -0.7073354721069336, "logits/chosen": -1.0181105136871338, "logits/rejected": -1.0713963508605957, "logps/chosen": -1.162067174911499, "logps/rejected": -1.227577805519104, "loss": 1.1468, "nll_loss": 1.3025519847869873, "rewards/accuracies": 0.5, "rewards/chosen": -0.11620672792196274, "rewards/margins": 0.0065510571002960205, "rewards/rejected": -0.12275778502225876, "step": 1460 }, { "epoch": 0.8912612475217325, "grad_norm": 1.360175371170044, "learning_rate": 6.597917942437232e-06, "log_odds_chosen": 0.8562144041061401, "log_odds_ratio": -0.4957732558250427, "logits/chosen": -0.8879480361938477, "logits/rejected": -1.0729748010635376, "logps/chosen": -0.7032002806663513, "logps/rejected": -1.2906253337860107, "loss": 1.1354, "nll_loss": 0.9004684090614319, "rewards/accuracies": 0.75, "rewards/chosen": -0.0703200250864029, "rewards/margins": 0.05874251201748848, "rewards/rejected": -0.12906254827976227, "step": 1461 }, { "epoch": 0.8918712825987495, "grad_norm": 1.5728367567062378, "learning_rate": 6.596938150642988e-06, "log_odds_chosen": 1.0862525701522827, "log_odds_ratio": -0.5539356470108032, "logits/chosen": -0.8143303990364075, "logits/rejected": -0.7374180555343628, "logps/chosen": -0.7222826480865479, "logps/rejected": -1.3769726753234863, "loss": 1.0016, "nll_loss": 0.8655831813812256, "rewards/accuracies": 0.625, "rewards/chosen": -0.07222826778888702, "rewards/margins": 0.06546899676322937, "rewards/rejected": -0.1376972645521164, "step": 1462 }, { "epoch": 0.8924813176757663, "grad_norm": 1.0938278436660767, "learning_rate": 6.595958358848745e-06, "log_odds_chosen": 1.1143282651901245, "log_odds_ratio": -0.4798784554004669, "logits/chosen": -1.0801299810409546, "logits/rejected": -0.9127199053764343, "logps/chosen": -0.8818850517272949, "logps/rejected": -1.7698009014129639, "loss": 1.0964, "nll_loss": 1.1141732931137085, "rewards/accuracies": 0.75, "rewards/chosen": -0.08818850666284561, "rewards/margins": 0.08879159390926361, "rewards/rejected": -0.17698010802268982, "step": 1463 }, { "epoch": 0.8930913527527833, "grad_norm": 1.470458745956421, "learning_rate": 6.594978567054501e-06, "log_odds_chosen": 1.3095048666000366, "log_odds_ratio": -0.45371246337890625, "logits/chosen": -0.8320404291152954, "logits/rejected": -0.8279436826705933, "logps/chosen": -0.7666031122207642, "logps/rejected": -1.5882279872894287, "loss": 0.9866, "nll_loss": 0.8856767416000366, "rewards/accuracies": 0.75, "rewards/chosen": -0.07666031271219254, "rewards/margins": 0.08216248452663422, "rewards/rejected": -0.15882280468940735, "step": 1464 }, { "epoch": 0.8937013878298002, "grad_norm": 2.112379550933838, "learning_rate": 6.593998775260257e-06, "log_odds_chosen": 0.45127907395362854, "log_odds_ratio": -0.6386860609054565, "logits/chosen": -0.9706188440322876, "logits/rejected": -0.9745127558708191, "logps/chosen": -0.9537509083747864, "logps/rejected": -1.3157638311386108, "loss": 1.317, "nll_loss": 1.094677448272705, "rewards/accuracies": 0.5, "rewards/chosen": -0.09537509083747864, "rewards/margins": 0.03620129078626633, "rewards/rejected": -0.13157638907432556, "step": 1465 }, { "epoch": 0.8943114229068171, "grad_norm": 1.1541632413864136, "learning_rate": 6.593018983466013e-06, "log_odds_chosen": 0.4927964210510254, "log_odds_ratio": -0.6499235033988953, "logits/chosen": -0.9487018585205078, "logits/rejected": -1.0204813480377197, "logps/chosen": -0.903855562210083, "logps/rejected": -1.1928002834320068, "loss": 1.0302, "nll_loss": 1.0396864414215088, "rewards/accuracies": 0.5, "rewards/chosen": -0.0903855562210083, "rewards/margins": 0.028894472867250443, "rewards/rejected": -0.11928003281354904, "step": 1466 }, { "epoch": 0.894921457983834, "grad_norm": 1.196380376815796, "learning_rate": 6.592039191671769e-06, "log_odds_chosen": 1.0377209186553955, "log_odds_ratio": -0.5066481232643127, "logits/chosen": -0.7617638111114502, "logits/rejected": -0.8224605321884155, "logps/chosen": -0.6714606881141663, "logps/rejected": -1.4441063404083252, "loss": 0.9317, "nll_loss": 0.7878377437591553, "rewards/accuracies": 0.625, "rewards/chosen": -0.06714607030153275, "rewards/margins": 0.07726456224918365, "rewards/rejected": -0.1444106251001358, "step": 1467 }, { "epoch": 0.895531493060851, "grad_norm": 1.4219926595687866, "learning_rate": 6.591059399877526e-06, "log_odds_chosen": 0.3755621910095215, "log_odds_ratio": -0.6327043175697327, "logits/chosen": -0.9536902904510498, "logits/rejected": -0.9750341176986694, "logps/chosen": -0.9989349842071533, "logps/rejected": -1.1932482719421387, "loss": 1.2064, "nll_loss": 1.1634845733642578, "rewards/accuracies": 0.5, "rewards/chosen": -0.09989350289106369, "rewards/margins": 0.019431322813034058, "rewards/rejected": -0.11932482570409775, "step": 1468 }, { "epoch": 0.896141528137868, "grad_norm": 1.5167250633239746, "learning_rate": 6.590079608083282e-06, "log_odds_chosen": 1.6591384410858154, "log_odds_ratio": -0.4371677041053772, "logits/chosen": -1.035003900527954, "logits/rejected": -1.026563048362732, "logps/chosen": -0.7885551452636719, "logps/rejected": -1.9678232669830322, "loss": 1.3066, "nll_loss": 0.9832535982131958, "rewards/accuracies": 0.625, "rewards/chosen": -0.07885551452636719, "rewards/margins": 0.11792681366205215, "rewards/rejected": -0.19678233563899994, "step": 1469 }, { "epoch": 0.8967515632148848, "grad_norm": 1.8038581609725952, "learning_rate": 6.589099816289038e-06, "log_odds_chosen": 0.9336448311805725, "log_odds_ratio": -0.5762449502944946, "logits/chosen": -0.870816707611084, "logits/rejected": -0.9699428081512451, "logps/chosen": -0.7875005602836609, "logps/rejected": -1.3796131610870361, "loss": 1.0465, "nll_loss": 0.9507405757904053, "rewards/accuracies": 0.5, "rewards/chosen": -0.07875005155801773, "rewards/margins": 0.05921127274632454, "rewards/rejected": -0.13796132802963257, "step": 1470 }, { "epoch": 0.8973615982919018, "grad_norm": 1.3072271347045898, "learning_rate": 6.588120024494795e-06, "log_odds_chosen": 0.45241600275039673, "log_odds_ratio": -0.6435867547988892, "logits/chosen": -1.303856372833252, "logits/rejected": -1.0494372844696045, "logps/chosen": -0.9804626107215881, "logps/rejected": -1.3963713645935059, "loss": 1.1107, "nll_loss": 1.2539738416671753, "rewards/accuracies": 0.625, "rewards/chosen": -0.09804626554250717, "rewards/margins": 0.04159088805317879, "rewards/rejected": -0.13963715732097626, "step": 1471 }, { "epoch": 0.8979716333689187, "grad_norm": 2.266160011291504, "learning_rate": 6.587140232700551e-06, "log_odds_chosen": 0.7078739404678345, "log_odds_ratio": -0.851923942565918, "logits/chosen": -1.158407211303711, "logits/rejected": -1.1139721870422363, "logps/chosen": -1.1090447902679443, "logps/rejected": -1.9431705474853516, "loss": 1.2371, "nll_loss": 1.4480974674224854, "rewards/accuracies": 0.5, "rewards/chosen": -0.11090448498725891, "rewards/margins": 0.08341255784034729, "rewards/rejected": -0.1943170726299286, "step": 1472 }, { "epoch": 0.8985816684459357, "grad_norm": 2.780083179473877, "learning_rate": 6.586160440906307e-06, "log_odds_chosen": 0.5321213603019714, "log_odds_ratio": -0.5944370031356812, "logits/chosen": -0.9711858630180359, "logits/rejected": -0.956062912940979, "logps/chosen": -0.685352087020874, "logps/rejected": -0.9815030097961426, "loss": 1.1246, "nll_loss": 1.047903299331665, "rewards/accuracies": 0.5, "rewards/chosen": -0.0685352087020874, "rewards/margins": 0.029615094885230064, "rewards/rejected": -0.09815031290054321, "step": 1473 }, { "epoch": 0.8991917035229525, "grad_norm": 2.453517436981201, "learning_rate": 6.585180649112064e-06, "log_odds_chosen": 2.4817428588867188, "log_odds_ratio": -0.14913338422775269, "logits/chosen": -0.6302494406700134, "logits/rejected": -0.7099666595458984, "logps/chosen": -0.4705771207809448, "logps/rejected": -1.9946929216384888, "loss": 0.9821, "nll_loss": 0.7691850662231445, "rewards/accuracies": 1.0, "rewards/chosen": -0.04705771058797836, "rewards/margins": 0.1524115800857544, "rewards/rejected": -0.19946929812431335, "step": 1474 }, { "epoch": 0.8998017385999695, "grad_norm": 2.4604239463806152, "learning_rate": 6.58420085731782e-06, "log_odds_chosen": 0.8042159080505371, "log_odds_ratio": -0.6650793552398682, "logits/chosen": -1.0395315885543823, "logits/rejected": -1.0703660249710083, "logps/chosen": -0.9358577132225037, "logps/rejected": -1.5531672239303589, "loss": 1.198, "nll_loss": 1.129841685295105, "rewards/accuracies": 0.625, "rewards/chosen": -0.0935857743024826, "rewards/margins": 0.06173095107078552, "rewards/rejected": -0.15531672537326813, "step": 1475 }, { "epoch": 0.9004117736769864, "grad_norm": 4.287633419036865, "learning_rate": 6.583221065523575e-06, "log_odds_chosen": 0.9134787917137146, "log_odds_ratio": -0.5801113247871399, "logits/chosen": -1.1268248558044434, "logits/rejected": -1.186445951461792, "logps/chosen": -0.8653611540794373, "logps/rejected": -1.5506880283355713, "loss": 1.2572, "nll_loss": 1.1544544696807861, "rewards/accuracies": 0.625, "rewards/chosen": -0.08653611689805984, "rewards/margins": 0.06853269040584564, "rewards/rejected": -0.15506881475448608, "step": 1476 }, { "epoch": 0.9010218087540034, "grad_norm": 2.3215718269348145, "learning_rate": 6.582241273729332e-06, "log_odds_chosen": 1.559342861175537, "log_odds_ratio": -0.2852870225906372, "logits/chosen": -0.7446753978729248, "logits/rejected": -0.8758214116096497, "logps/chosen": -0.6626322865486145, "logps/rejected": -1.7355509996414185, "loss": 1.0741, "nll_loss": 0.985927164554596, "rewards/accuracies": 1.0, "rewards/chosen": -0.06626322865486145, "rewards/margins": 0.10729186981916428, "rewards/rejected": -0.17355510592460632, "step": 1477 }, { "epoch": 0.9016318438310202, "grad_norm": 2.2558181285858154, "learning_rate": 6.581261481935088e-06, "log_odds_chosen": 0.5380185842514038, "log_odds_ratio": -0.7591995596885681, "logits/chosen": -0.9809861183166504, "logits/rejected": -0.88303542137146, "logps/chosen": -1.0370391607284546, "logps/rejected": -1.5144386291503906, "loss": 1.0643, "nll_loss": 1.1734564304351807, "rewards/accuracies": 0.5, "rewards/chosen": -0.10370391607284546, "rewards/margins": 0.04773995280265808, "rewards/rejected": -0.15144385397434235, "step": 1478 }, { "epoch": 0.9022418789080372, "grad_norm": 2.5148990154266357, "learning_rate": 6.580281690140845e-06, "log_odds_chosen": 0.3245880901813507, "log_odds_ratio": -0.5799049139022827, "logits/chosen": -1.108154296875, "logits/rejected": -1.020564079284668, "logps/chosen": -1.1060203313827515, "logps/rejected": -1.3399358987808228, "loss": 1.28, "nll_loss": 1.141456961631775, "rewards/accuracies": 0.625, "rewards/chosen": -0.11060203611850739, "rewards/margins": 0.023391541093587875, "rewards/rejected": -0.13399358093738556, "step": 1479 }, { "epoch": 0.9028519139850542, "grad_norm": 3.8034427165985107, "learning_rate": 6.579301898346601e-06, "log_odds_chosen": 0.2206478863954544, "log_odds_ratio": -0.713421106338501, "logits/chosen": -1.192835807800293, "logits/rejected": -1.133303165435791, "logps/chosen": -0.9693179130554199, "logps/rejected": -1.1260302066802979, "loss": 1.2068, "nll_loss": 1.2028107643127441, "rewards/accuracies": 0.25, "rewards/chosen": -0.09693179279565811, "rewards/margins": 0.015671230852603912, "rewards/rejected": -0.11260302364826202, "step": 1480 }, { "epoch": 0.9034619490620711, "grad_norm": 1.257561445236206, "learning_rate": 6.5783221065523575e-06, "log_odds_chosen": 0.45786988735198975, "log_odds_ratio": -0.6254637837409973, "logits/chosen": -1.1865488290786743, "logits/rejected": -0.8789098858833313, "logps/chosen": -0.9551989436149597, "logps/rejected": -1.1897294521331787, "loss": 1.1835, "nll_loss": 1.4513792991638184, "rewards/accuracies": 0.625, "rewards/chosen": -0.09551989287137985, "rewards/margins": 0.023453054949641228, "rewards/rejected": -0.11897295713424683, "step": 1481 }, { "epoch": 0.904071984139088, "grad_norm": 0.9252704381942749, "learning_rate": 6.577342314758114e-06, "log_odds_chosen": 1.4591346979141235, "log_odds_ratio": -0.312267005443573, "logits/chosen": -1.032113790512085, "logits/rejected": -0.990732729434967, "logps/chosen": -0.6468921303749084, "logps/rejected": -1.512293815612793, "loss": 1.0841, "nll_loss": 0.8230503797531128, "rewards/accuracies": 0.875, "rewards/chosen": -0.06468921899795532, "rewards/margins": 0.08654017746448517, "rewards/rejected": -0.1512293964624405, "step": 1482 }, { "epoch": 0.9046820192161049, "grad_norm": 1.1961872577667236, "learning_rate": 6.57636252296387e-06, "log_odds_chosen": 0.7564437389373779, "log_odds_ratio": -0.5212407112121582, "logits/chosen": -1.1981854438781738, "logits/rejected": -1.2495734691619873, "logps/chosen": -0.816828727722168, "logps/rejected": -1.3311463594436646, "loss": 1.1839, "nll_loss": 1.1815249919891357, "rewards/accuracies": 0.875, "rewards/chosen": -0.08168287575244904, "rewards/margins": 0.051431767642498016, "rewards/rejected": -0.13311463594436646, "step": 1483 }, { "epoch": 0.9052920542931219, "grad_norm": 1.5977848768234253, "learning_rate": 6.575382731169626e-06, "log_odds_chosen": 0.5717309713363647, "log_odds_ratio": -0.600559413433075, "logits/chosen": -0.9588490724563599, "logits/rejected": -0.9191207885742188, "logps/chosen": -0.7983802556991577, "logps/rejected": -1.1395246982574463, "loss": 1.1619, "nll_loss": 1.0552295446395874, "rewards/accuracies": 0.5, "rewards/chosen": -0.07983802258968353, "rewards/margins": 0.03411444276571274, "rewards/rejected": -0.11395246535539627, "step": 1484 }, { "epoch": 0.9059020893701388, "grad_norm": 5.189253807067871, "learning_rate": 6.574402939375383e-06, "log_odds_chosen": 0.6200368404388428, "log_odds_ratio": -0.5067791938781738, "logits/chosen": -0.8813613057136536, "logits/rejected": -1.1441630125045776, "logps/chosen": -0.7361705899238586, "logps/rejected": -1.1262904405593872, "loss": 1.0352, "nll_loss": 0.9927424788475037, "rewards/accuracies": 0.625, "rewards/chosen": -0.07361705601215363, "rewards/margins": 0.039011985063552856, "rewards/rejected": -0.11262904852628708, "step": 1485 }, { "epoch": 0.9065121244471557, "grad_norm": 2.2775015830993652, "learning_rate": 6.573423147581139e-06, "log_odds_chosen": 0.9785517454147339, "log_odds_ratio": -0.4558624029159546, "logits/chosen": -0.9915270805358887, "logits/rejected": -0.8743374347686768, "logps/chosen": -1.1552906036376953, "logps/rejected": -1.929631233215332, "loss": 1.1624, "nll_loss": 1.133954405784607, "rewards/accuracies": 0.75, "rewards/chosen": -0.11552906036376953, "rewards/margins": 0.07743406295776367, "rewards/rejected": -0.1929631233215332, "step": 1486 }, { "epoch": 0.9071221595241726, "grad_norm": 2.2171554565429688, "learning_rate": 6.572443355786894e-06, "log_odds_chosen": 0.5694516897201538, "log_odds_ratio": -0.5090508460998535, "logits/chosen": -1.129941463470459, "logits/rejected": -0.9502127170562744, "logps/chosen": -0.862078070640564, "logps/rejected": -1.1818159818649292, "loss": 1.1893, "nll_loss": 1.147647500038147, "rewards/accuracies": 0.75, "rewards/chosen": -0.0862078070640564, "rewards/margins": 0.031973786652088165, "rewards/rejected": -0.11818159371614456, "step": 1487 }, { "epoch": 0.9077321946011896, "grad_norm": 1.4616377353668213, "learning_rate": 6.571463563992651e-06, "log_odds_chosen": 0.9101369380950928, "log_odds_ratio": -0.5880981683731079, "logits/chosen": -1.0941283702850342, "logits/rejected": -1.057978630065918, "logps/chosen": -0.849696159362793, "logps/rejected": -1.5815033912658691, "loss": 1.1043, "nll_loss": 1.1255848407745361, "rewards/accuracies": 0.625, "rewards/chosen": -0.08496962487697601, "rewards/margins": 0.07318072766065598, "rewards/rejected": -0.1581503450870514, "step": 1488 }, { "epoch": 0.9083422296782065, "grad_norm": 1.7567144632339478, "learning_rate": 6.570483772198407e-06, "log_odds_chosen": 0.2101203054189682, "log_odds_ratio": -0.6779602766036987, "logits/chosen": -1.0767712593078613, "logits/rejected": -1.0660414695739746, "logps/chosen": -0.9780689477920532, "logps/rejected": -1.1491295099258423, "loss": 1.1818, "nll_loss": 1.0931158065795898, "rewards/accuracies": 0.5, "rewards/chosen": -0.0978069007396698, "rewards/margins": 0.017106054350733757, "rewards/rejected": -0.11491294950246811, "step": 1489 }, { "epoch": 0.9089522647552234, "grad_norm": 1.2688664197921753, "learning_rate": 6.5695039804041635e-06, "log_odds_chosen": 0.6917928457260132, "log_odds_ratio": -0.6109912395477295, "logits/chosen": -1.1467243432998657, "logits/rejected": -1.0950038433074951, "logps/chosen": -0.8350560665130615, "logps/rejected": -1.3013702630996704, "loss": 1.0837, "nll_loss": 1.0216937065124512, "rewards/accuracies": 0.625, "rewards/chosen": -0.08350560814142227, "rewards/margins": 0.04663141816854477, "rewards/rejected": -0.13013702630996704, "step": 1490 }, { "epoch": 0.9095622998322404, "grad_norm": 9.518078804016113, "learning_rate": 6.56852418860992e-06, "log_odds_chosen": 1.1208703517913818, "log_odds_ratio": -0.45358723402023315, "logits/chosen": -0.8997596502304077, "logits/rejected": -0.9064932465553284, "logps/chosen": -0.8064699769020081, "logps/rejected": -1.6147973537445068, "loss": 1.1042, "nll_loss": 0.9869530200958252, "rewards/accuracies": 0.75, "rewards/chosen": -0.08064700663089752, "rewards/margins": 0.08083274215459824, "rewards/rejected": -0.16147974133491516, "step": 1491 }, { "epoch": 0.9101723349092573, "grad_norm": 1.71310555934906, "learning_rate": 6.5675443968156765e-06, "log_odds_chosen": 1.061702013015747, "log_odds_ratio": -0.42978736758232117, "logits/chosen": -0.901756763458252, "logits/rejected": -0.8934377431869507, "logps/chosen": -0.7416117191314697, "logps/rejected": -1.4453271627426147, "loss": 0.9546, "nll_loss": 0.9291298985481262, "rewards/accuracies": 0.75, "rewards/chosen": -0.07416117191314697, "rewards/margins": 0.07037154585123062, "rewards/rejected": -0.144532710313797, "step": 1492 }, { "epoch": 0.9107823699862743, "grad_norm": 1.9863909482955933, "learning_rate": 6.566564605021433e-06, "log_odds_chosen": 0.544481098651886, "log_odds_ratio": -0.9419079422950745, "logits/chosen": -0.9019052982330322, "logits/rejected": -0.9527245163917542, "logps/chosen": -1.1861709356307983, "logps/rejected": -1.6365559101104736, "loss": 1.1586, "nll_loss": 1.2859528064727783, "rewards/accuracies": 0.375, "rewards/chosen": -0.11861709505319595, "rewards/margins": 0.045038506388664246, "rewards/rejected": -0.1636556088924408, "step": 1493 }, { "epoch": 0.9113924050632911, "grad_norm": 1.1533316373825073, "learning_rate": 6.565584813227189e-06, "log_odds_chosen": 0.12709172070026398, "log_odds_ratio": -0.7721453309059143, "logits/chosen": -1.1532773971557617, "logits/rejected": -1.1215934753417969, "logps/chosen": -0.832535445690155, "logps/rejected": -0.9401874542236328, "loss": 1.0239, "nll_loss": 0.9920914173126221, "rewards/accuracies": 0.375, "rewards/chosen": -0.08325354754924774, "rewards/margins": 0.010765193030238152, "rewards/rejected": -0.09401874244213104, "step": 1494 }, { "epoch": 0.9120024401403081, "grad_norm": 1.2089747190475464, "learning_rate": 6.564605021432945e-06, "log_odds_chosen": 0.332023024559021, "log_odds_ratio": -0.5996142029762268, "logits/chosen": -0.9664623737335205, "logits/rejected": -0.9063448905944824, "logps/chosen": -0.7799562215805054, "logps/rejected": -0.9918370246887207, "loss": 1.0394, "nll_loss": 1.001218557357788, "rewards/accuracies": 0.625, "rewards/chosen": -0.07799562066793442, "rewards/margins": 0.02118808589875698, "rewards/rejected": -0.09918370842933655, "step": 1495 }, { "epoch": 0.912612475217325, "grad_norm": 3.5903048515319824, "learning_rate": 6.563625229638702e-06, "log_odds_chosen": 1.0986076593399048, "log_odds_ratio": -0.39258068799972534, "logits/chosen": -0.9114586710929871, "logits/rejected": -1.0049090385437012, "logps/chosen": -0.6486481428146362, "logps/rejected": -1.3448562622070312, "loss": 0.9926, "nll_loss": 0.8012422323226929, "rewards/accuracies": 1.0, "rewards/chosen": -0.06486482173204422, "rewards/margins": 0.06962081789970398, "rewards/rejected": -0.1344856321811676, "step": 1496 }, { "epoch": 0.9132225102943419, "grad_norm": 2.182755708694458, "learning_rate": 6.562645437844458e-06, "log_odds_chosen": 0.9430890083312988, "log_odds_ratio": -0.5313423871994019, "logits/chosen": -1.0389214754104614, "logits/rejected": -1.014899492263794, "logps/chosen": -1.0618778467178345, "logps/rejected": -1.5753213167190552, "loss": 1.099, "nll_loss": 1.294402003288269, "rewards/accuracies": 0.75, "rewards/chosen": -0.10618779063224792, "rewards/margins": 0.05134434625506401, "rewards/rejected": -0.15753214061260223, "step": 1497 }, { "epoch": 0.9138325453713588, "grad_norm": 3.785183906555176, "learning_rate": 6.561665646050214e-06, "log_odds_chosen": 0.6184197664260864, "log_odds_ratio": -0.6161119937896729, "logits/chosen": -0.9079425930976868, "logits/rejected": -0.8966100811958313, "logps/chosen": -0.7211639881134033, "logps/rejected": -1.1107779741287231, "loss": 1.1356, "nll_loss": 0.9585392475128174, "rewards/accuracies": 0.5, "rewards/chosen": -0.07211639732122421, "rewards/margins": 0.03896140307188034, "rewards/rejected": -0.11107779294252396, "step": 1498 }, { "epoch": 0.9144425804483758, "grad_norm": 2.9076051712036133, "learning_rate": 6.56068585425597e-06, "log_odds_chosen": 0.3065933883190155, "log_odds_ratio": -0.6216230988502502, "logits/chosen": -0.9895074367523193, "logits/rejected": -1.0531126260757446, "logps/chosen": -1.148467779159546, "logps/rejected": -1.3926228284835815, "loss": 1.2991, "nll_loss": 1.2370247840881348, "rewards/accuracies": 0.625, "rewards/chosen": -0.11484678834676743, "rewards/margins": 0.024415496736764908, "rewards/rejected": -0.13926228880882263, "step": 1499 }, { "epoch": 0.9150526155253927, "grad_norm": 1.794013261795044, "learning_rate": 6.559706062461726e-06, "log_odds_chosen": 0.4605436623096466, "log_odds_ratio": -0.550311803817749, "logits/chosen": -0.8194522857666016, "logits/rejected": -0.8037203550338745, "logps/chosen": -0.7990760207176208, "logps/rejected": -1.0294601917266846, "loss": 1.0211, "nll_loss": 0.9788553714752197, "rewards/accuracies": 0.625, "rewards/chosen": -0.0799076110124588, "rewards/margins": 0.023038409650325775, "rewards/rejected": -0.10294601321220398, "step": 1500 }, { "epoch": 0.9156626506024096, "grad_norm": 1.2392897605895996, "learning_rate": 6.5587262706674825e-06, "log_odds_chosen": 1.4441949129104614, "log_odds_ratio": -0.5006154775619507, "logits/chosen": -0.9828604459762573, "logits/rejected": -0.8533739447593689, "logps/chosen": -0.7958362102508545, "logps/rejected": -1.9149861335754395, "loss": 0.9992, "nll_loss": 1.071223258972168, "rewards/accuracies": 0.625, "rewards/chosen": -0.07958362996578217, "rewards/margins": 0.11191500723361969, "rewards/rejected": -0.19149863719940186, "step": 1501 }, { "epoch": 0.9162726856794265, "grad_norm": 1.4938335418701172, "learning_rate": 6.5577464788732395e-06, "log_odds_chosen": 0.7912643551826477, "log_odds_ratio": -0.4897606670856476, "logits/chosen": -0.8502410650253296, "logits/rejected": -0.8035738468170166, "logps/chosen": -0.6913476586341858, "logps/rejected": -1.0299638509750366, "loss": 1.0653, "nll_loss": 1.0356941223144531, "rewards/accuracies": 0.625, "rewards/chosen": -0.06913476437330246, "rewards/margins": 0.03386162221431732, "rewards/rejected": -0.10299638658761978, "step": 1502 }, { "epoch": 0.9168827207564435, "grad_norm": 1.1806236505508423, "learning_rate": 6.556766687078996e-06, "log_odds_chosen": 1.2068946361541748, "log_odds_ratio": -0.43614089488983154, "logits/chosen": -0.8839513063430786, "logits/rejected": -0.672034740447998, "logps/chosen": -0.6055874824523926, "logps/rejected": -1.4300342798233032, "loss": 1.003, "nll_loss": 0.7917687892913818, "rewards/accuracies": 0.625, "rewards/chosen": -0.0605587512254715, "rewards/margins": 0.0824446827173233, "rewards/rejected": -0.1430034339427948, "step": 1503 }, { "epoch": 0.9174927558334605, "grad_norm": 1.7422071695327759, "learning_rate": 6.555786895284751e-06, "log_odds_chosen": 0.6493856906890869, "log_odds_ratio": -0.4907328486442566, "logits/chosen": -1.0203015804290771, "logits/rejected": -1.020918846130371, "logps/chosen": -0.8431510925292969, "logps/rejected": -1.2749162912368774, "loss": 0.9563, "nll_loss": 0.9714062213897705, "rewards/accuracies": 0.75, "rewards/chosen": -0.08431510627269745, "rewards/margins": 0.04317652806639671, "rewards/rejected": -0.12749162316322327, "step": 1504 }, { "epoch": 0.9181027909104773, "grad_norm": 1.257216453552246, "learning_rate": 6.554807103490508e-06, "log_odds_chosen": 0.7351247072219849, "log_odds_ratio": -0.5883452892303467, "logits/chosen": -1.0798394680023193, "logits/rejected": -0.9098517894744873, "logps/chosen": -0.7479734420776367, "logps/rejected": -1.3372254371643066, "loss": 1.0952, "nll_loss": 0.9834073781967163, "rewards/accuracies": 0.5, "rewards/chosen": -0.07479734718799591, "rewards/margins": 0.058925192803144455, "rewards/rejected": -0.13372254371643066, "step": 1505 }, { "epoch": 0.9187128259874943, "grad_norm": 1.126091480255127, "learning_rate": 6.553827311696264e-06, "log_odds_chosen": 0.9568291306495667, "log_odds_ratio": -0.5533475279808044, "logits/chosen": -1.101815104484558, "logits/rejected": -1.1385116577148438, "logps/chosen": -0.8440772294998169, "logps/rejected": -1.5017187595367432, "loss": 1.1676, "nll_loss": 1.2477785348892212, "rewards/accuracies": 0.625, "rewards/chosen": -0.0844077318906784, "rewards/margins": 0.06576413661241531, "rewards/rejected": -0.15017187595367432, "step": 1506 }, { "epoch": 0.9193228610645112, "grad_norm": 1.4376431703567505, "learning_rate": 6.552847519902021e-06, "log_odds_chosen": 0.8062112331390381, "log_odds_ratio": -0.5485752820968628, "logits/chosen": -0.8288381099700928, "logits/rejected": -0.9095072746276855, "logps/chosen": -0.9556858539581299, "logps/rejected": -1.549451470375061, "loss": 1.1631, "nll_loss": 0.985410213470459, "rewards/accuracies": 0.625, "rewards/chosen": -0.09556858241558075, "rewards/margins": 0.05937657505273819, "rewards/rejected": -0.15494515001773834, "step": 1507 }, { "epoch": 0.9199328961415282, "grad_norm": 2.2302048206329346, "learning_rate": 6.551867728107777e-06, "log_odds_chosen": 1.89375638961792, "log_odds_ratio": -0.2610481381416321, "logits/chosen": -1.0360829830169678, "logits/rejected": -0.8660251498222351, "logps/chosen": -0.8600760698318481, "logps/rejected": -2.315312623977661, "loss": 1.1442, "nll_loss": 1.1064095497131348, "rewards/accuracies": 1.0, "rewards/chosen": -0.08600760996341705, "rewards/margins": 0.14552366733551025, "rewards/rejected": -0.2315312922000885, "step": 1508 }, { "epoch": 0.920542931218545, "grad_norm": 3.7012758255004883, "learning_rate": 6.550887936313533e-06, "log_odds_chosen": 0.47128403186798096, "log_odds_ratio": -0.5726262331008911, "logits/chosen": -1.0273630619049072, "logits/rejected": -0.7596001625061035, "logps/chosen": -0.955092191696167, "logps/rejected": -1.3037480115890503, "loss": 1.0846, "nll_loss": 1.0314955711364746, "rewards/accuracies": 0.625, "rewards/chosen": -0.09550921618938446, "rewards/margins": 0.03486558049917221, "rewards/rejected": -0.13037480413913727, "step": 1509 }, { "epoch": 0.921152966295562, "grad_norm": 3.6972272396087646, "learning_rate": 6.549908144519289e-06, "log_odds_chosen": 1.259592056274414, "log_odds_ratio": -0.3689601719379425, "logits/chosen": -1.033813238143921, "logits/rejected": -0.9005993604660034, "logps/chosen": -0.7845463752746582, "logps/rejected": -1.5895366668701172, "loss": 1.1634, "nll_loss": 1.125245451927185, "rewards/accuracies": 0.875, "rewards/chosen": -0.0784546434879303, "rewards/margins": 0.08049902319908142, "rewards/rejected": -0.15895365178585052, "step": 1510 }, { "epoch": 0.9217630013725789, "grad_norm": 2.1078028678894043, "learning_rate": 6.548928352725045e-06, "log_odds_chosen": 1.1558762788772583, "log_odds_ratio": -0.5059009194374084, "logits/chosen": -0.9455853700637817, "logits/rejected": -1.0188055038452148, "logps/chosen": -0.6584396362304688, "logps/rejected": -1.3605835437774658, "loss": 1.1395, "nll_loss": 1.1593599319458008, "rewards/accuracies": 0.75, "rewards/chosen": -0.06584396958351135, "rewards/margins": 0.07021439075469971, "rewards/rejected": -0.13605836033821106, "step": 1511 }, { "epoch": 0.9223730364495959, "grad_norm": 1.7162251472473145, "learning_rate": 6.5479485609308015e-06, "log_odds_chosen": 0.4647894501686096, "log_odds_ratio": -0.7106590270996094, "logits/chosen": -1.0985016822814941, "logits/rejected": -1.1317172050476074, "logps/chosen": -0.8788753151893616, "logps/rejected": -1.2175973653793335, "loss": 1.2095, "nll_loss": 1.227578043937683, "rewards/accuracies": 0.375, "rewards/chosen": -0.08788753300905228, "rewards/margins": 0.033872202038764954, "rewards/rejected": -0.12175973504781723, "step": 1512 }, { "epoch": 0.9229830715266127, "grad_norm": 1.4086908102035522, "learning_rate": 6.5469687691365585e-06, "log_odds_chosen": 0.7577129602432251, "log_odds_ratio": -0.4577093720436096, "logits/chosen": -0.9216588735580444, "logits/rejected": -1.001157522201538, "logps/chosen": -0.6201026439666748, "logps/rejected": -1.051344394683838, "loss": 1.0508, "nll_loss": 0.9558085203170776, "rewards/accuracies": 0.625, "rewards/chosen": -0.0620102658867836, "rewards/margins": 0.04312417656183243, "rewards/rejected": -0.10513444244861603, "step": 1513 }, { "epoch": 0.9235931066036297, "grad_norm": 2.5824966430664062, "learning_rate": 6.545988977342315e-06, "log_odds_chosen": 0.4131944477558136, "log_odds_ratio": -0.5492684841156006, "logits/chosen": -1.1512776613235474, "logits/rejected": -1.1993343830108643, "logps/chosen": -0.780993640422821, "logps/rejected": -0.9939608573913574, "loss": 1.3065, "nll_loss": 0.9636596441268921, "rewards/accuracies": 0.875, "rewards/chosen": -0.07809937000274658, "rewards/margins": 0.02129671722650528, "rewards/rejected": -0.09939607977867126, "step": 1514 }, { "epoch": 0.9242031416806467, "grad_norm": 1.2512259483337402, "learning_rate": 6.545009185548071e-06, "log_odds_chosen": 1.7813459634780884, "log_odds_ratio": -0.5135937929153442, "logits/chosen": -0.8748586773872375, "logits/rejected": -0.9800803661346436, "logps/chosen": -0.8549656271934509, "logps/rejected": -2.2620534896850586, "loss": 1.0949, "nll_loss": 1.0255703926086426, "rewards/accuracies": 0.5, "rewards/chosen": -0.08549656718969345, "rewards/margins": 0.1407087743282318, "rewards/rejected": -0.22620534896850586, "step": 1515 }, { "epoch": 0.9248131767576636, "grad_norm": 2.0941834449768066, "learning_rate": 6.544029393753827e-06, "log_odds_chosen": 0.9330970644950867, "log_odds_ratio": -0.6566283702850342, "logits/chosen": -0.6557670831680298, "logits/rejected": -0.8170104026794434, "logps/chosen": -0.7970994114875793, "logps/rejected": -1.474007248878479, "loss": 1.1004, "nll_loss": 0.8391571044921875, "rewards/accuracies": 0.5, "rewards/chosen": -0.07970993965864182, "rewards/margins": 0.06769078969955444, "rewards/rejected": -0.14740073680877686, "step": 1516 }, { "epoch": 0.9254232118346805, "grad_norm": 5.496551036834717, "learning_rate": 6.543049601959583e-06, "log_odds_chosen": 0.6297461986541748, "log_odds_ratio": -0.7672339081764221, "logits/chosen": -1.0575120449066162, "logits/rejected": -0.9350992441177368, "logps/chosen": -0.9301233887672424, "logps/rejected": -1.4324688911437988, "loss": 1.1785, "nll_loss": 1.171114206314087, "rewards/accuracies": 0.5, "rewards/chosen": -0.09301234036684036, "rewards/margins": 0.05023455619812012, "rewards/rejected": -0.14324688911437988, "step": 1517 }, { "epoch": 0.9260332469116974, "grad_norm": 2.2559478282928467, "learning_rate": 6.54206981016534e-06, "log_odds_chosen": 1.774928092956543, "log_odds_ratio": -0.40098538994789124, "logits/chosen": -1.2480273246765137, "logits/rejected": -1.1371252536773682, "logps/chosen": -0.731948733329773, "logps/rejected": -2.129767656326294, "loss": 1.3462, "nll_loss": 1.250445008277893, "rewards/accuracies": 0.75, "rewards/chosen": -0.07319488376379013, "rewards/margins": 0.1397818922996521, "rewards/rejected": -0.21297678351402283, "step": 1518 }, { "epoch": 0.9266432819887144, "grad_norm": 1.4896719455718994, "learning_rate": 6.541090018371096e-06, "log_odds_chosen": 2.028672695159912, "log_odds_ratio": -0.23624658584594727, "logits/chosen": -1.2049531936645508, "logits/rejected": -1.141369104385376, "logps/chosen": -0.8509537577629089, "logps/rejected": -2.3240365982055664, "loss": 1.3047, "nll_loss": 1.4912251234054565, "rewards/accuracies": 0.875, "rewards/chosen": -0.0850953757762909, "rewards/margins": 0.14730829000473022, "rewards/rejected": -0.23240366578102112, "step": 1519 }, { "epoch": 0.9272533170657313, "grad_norm": 1.3718068599700928, "learning_rate": 6.540110226576852e-06, "log_odds_chosen": 0.4412913918495178, "log_odds_ratio": -0.58497554063797, "logits/chosen": -1.0944818258285522, "logits/rejected": -1.0995385646820068, "logps/chosen": -0.7449988126754761, "logps/rejected": -0.9789481163024902, "loss": 1.0859, "nll_loss": 0.9909513592720032, "rewards/accuracies": 0.625, "rewards/chosen": -0.07449989020824432, "rewards/margins": 0.023394931107759476, "rewards/rejected": -0.0978948175907135, "step": 1520 }, { "epoch": 0.9278633521427482, "grad_norm": 5.494888782501221, "learning_rate": 6.539130434782609e-06, "log_odds_chosen": 1.3628804683685303, "log_odds_ratio": -0.3630605638027191, "logits/chosen": -0.8946200013160706, "logits/rejected": -0.9951670169830322, "logps/chosen": -0.7144083976745605, "logps/rejected": -1.6494678258895874, "loss": 1.1668, "nll_loss": 0.8952453136444092, "rewards/accuracies": 1.0, "rewards/chosen": -0.07144084572792053, "rewards/margins": 0.09350594878196716, "rewards/rejected": -0.1649467796087265, "step": 1521 }, { "epoch": 0.9284733872197651, "grad_norm": 1.4929908514022827, "learning_rate": 6.5381506429883644e-06, "log_odds_chosen": 2.3117339611053467, "log_odds_ratio": -0.36234286427497864, "logits/chosen": -0.8950282335281372, "logits/rejected": -0.9088782072067261, "logps/chosen": -0.5294355750083923, "logps/rejected": -2.1941206455230713, "loss": 1.2002, "nll_loss": 0.958667516708374, "rewards/accuracies": 0.875, "rewards/chosen": -0.05294355750083923, "rewards/margins": 0.1664685159921646, "rewards/rejected": -0.21941207349300385, "step": 1522 }, { "epoch": 0.9290834222967821, "grad_norm": 2.231144905090332, "learning_rate": 6.5371708511941206e-06, "log_odds_chosen": 0.667698323726654, "log_odds_ratio": -0.5808815956115723, "logits/chosen": -0.9898830056190491, "logits/rejected": -0.8480097055435181, "logps/chosen": -1.1540052890777588, "logps/rejected": -1.570399284362793, "loss": 1.2174, "nll_loss": 1.0753484964370728, "rewards/accuracies": 0.75, "rewards/chosen": -0.1154005378484726, "rewards/margins": 0.04163938760757446, "rewards/rejected": -0.15703992545604706, "step": 1523 }, { "epoch": 0.929693457373799, "grad_norm": 2.100661039352417, "learning_rate": 6.5361910593998775e-06, "log_odds_chosen": 1.094697117805481, "log_odds_ratio": -0.4320843517780304, "logits/chosen": -1.039333701133728, "logits/rejected": -0.9483596086502075, "logps/chosen": -0.6532077789306641, "logps/rejected": -1.349104881286621, "loss": 1.1172, "nll_loss": 0.9230038523674011, "rewards/accuracies": 0.75, "rewards/chosen": -0.06532078236341476, "rewards/margins": 0.06958969682455063, "rewards/rejected": -0.1349104791879654, "step": 1524 }, { "epoch": 0.9303034924508159, "grad_norm": 2.1527493000030518, "learning_rate": 6.535211267605634e-06, "log_odds_chosen": 1.3242731094360352, "log_odds_ratio": -0.4147953391075134, "logits/chosen": -0.814926266670227, "logits/rejected": -0.8309200406074524, "logps/chosen": -0.7825602889060974, "logps/rejected": -1.6949903964996338, "loss": 1.1156, "nll_loss": 0.8963558673858643, "rewards/accuracies": 0.75, "rewards/chosen": -0.0782560259103775, "rewards/margins": 0.09124302119016647, "rewards/rejected": -0.16949903964996338, "step": 1525 }, { "epoch": 0.9309135275278329, "grad_norm": 1.5915203094482422, "learning_rate": 6.53423147581139e-06, "log_odds_chosen": 0.9386541247367859, "log_odds_ratio": -0.6062834858894348, "logits/chosen": -1.1226551532745361, "logits/rejected": -0.9693746566772461, "logps/chosen": -0.9743300080299377, "logps/rejected": -1.6261705160140991, "loss": 1.3559, "nll_loss": 1.470435380935669, "rewards/accuracies": 0.75, "rewards/chosen": -0.09743300080299377, "rewards/margins": 0.06518404930830002, "rewards/rejected": -0.1626170575618744, "step": 1526 }, { "epoch": 0.9315235626048498, "grad_norm": 1.2338554859161377, "learning_rate": 6.533251684017146e-06, "log_odds_chosen": -0.0263570174574852, "log_odds_ratio": -0.7480180263519287, "logits/chosen": -1.0219621658325195, "logits/rejected": -0.8203169107437134, "logps/chosen": -0.8057317733764648, "logps/rejected": -0.8116195201873779, "loss": 0.8976, "nll_loss": 0.9834222793579102, "rewards/accuracies": 0.375, "rewards/chosen": -0.0805731788277626, "rewards/margins": 0.0005887774750590324, "rewards/rejected": -0.08116196095943451, "step": 1527 }, { "epoch": 0.9321335976818667, "grad_norm": 3.0940449237823486, "learning_rate": 6.532271892222902e-06, "log_odds_chosen": 1.392853021621704, "log_odds_ratio": -0.47161391377449036, "logits/chosen": -0.979358434677124, "logits/rejected": -0.956018328666687, "logps/chosen": -0.6817711591720581, "logps/rejected": -1.6634175777435303, "loss": 1.1364, "nll_loss": 0.9916106462478638, "rewards/accuracies": 0.75, "rewards/chosen": -0.06817711889743805, "rewards/margins": 0.0981646403670311, "rewards/rejected": -0.16634175181388855, "step": 1528 }, { "epoch": 0.9327436327588836, "grad_norm": 1.3494758605957031, "learning_rate": 6.531292100428658e-06, "log_odds_chosen": 0.20404544472694397, "log_odds_ratio": -0.7171281576156616, "logits/chosen": -0.9659404754638672, "logits/rejected": -1.0670331716537476, "logps/chosen": -0.8938862085342407, "logps/rejected": -1.0073814392089844, "loss": 1.0707, "nll_loss": 1.0601460933685303, "rewards/accuracies": 0.5, "rewards/chosen": -0.08938862383365631, "rewards/margins": 0.011349514126777649, "rewards/rejected": -0.10073813796043396, "step": 1529 }, { "epoch": 0.9333536678359006, "grad_norm": 2.2640414237976074, "learning_rate": 6.530312308634415e-06, "log_odds_chosen": 0.6634859442710876, "log_odds_ratio": -0.5986598134040833, "logits/chosen": -1.0943679809570312, "logits/rejected": -1.1530368328094482, "logps/chosen": -0.9041319489479065, "logps/rejected": -1.4311201572418213, "loss": 1.1756, "nll_loss": 1.036246418952942, "rewards/accuracies": 0.75, "rewards/chosen": -0.09041319787502289, "rewards/margins": 0.05269881710410118, "rewards/rejected": -0.14311200380325317, "step": 1530 }, { "epoch": 0.9339637029129175, "grad_norm": 1.3533854484558105, "learning_rate": 6.529332516840171e-06, "log_odds_chosen": 0.467329204082489, "log_odds_ratio": -0.6087016463279724, "logits/chosen": -1.0284802913665771, "logits/rejected": -1.1496119499206543, "logps/chosen": -0.9023088812828064, "logps/rejected": -1.115018367767334, "loss": 1.1583, "nll_loss": 1.3410606384277344, "rewards/accuracies": 0.625, "rewards/chosen": -0.09023088961839676, "rewards/margins": 0.021270956844091415, "rewards/rejected": -0.11150185018777847, "step": 1531 }, { "epoch": 0.9345737379899344, "grad_norm": 1.9162389039993286, "learning_rate": 6.528352725045928e-06, "log_odds_chosen": 0.22309325635433197, "log_odds_ratio": -0.6583173274993896, "logits/chosen": -0.9967734813690186, "logits/rejected": -1.0189714431762695, "logps/chosen": -0.9770963788032532, "logps/rejected": -1.1770362854003906, "loss": 1.2037, "nll_loss": 1.113695740699768, "rewards/accuracies": 0.375, "rewards/chosen": -0.09770964831113815, "rewards/margins": 0.019993990659713745, "rewards/rejected": -0.1177036315202713, "step": 1532 }, { "epoch": 0.9351837730669513, "grad_norm": 1.291683316230774, "learning_rate": 6.5273729332516835e-06, "log_odds_chosen": -0.09566305577754974, "log_odds_ratio": -0.7876400947570801, "logits/chosen": -1.0997368097305298, "logits/rejected": -1.027458906173706, "logps/chosen": -1.096579670906067, "logps/rejected": -1.0450575351715088, "loss": 1.0068, "nll_loss": 1.2424595355987549, "rewards/accuracies": 0.5, "rewards/chosen": -0.10965797305107117, "rewards/margins": -0.005152209661900997, "rewards/rejected": -0.104505755007267, "step": 1533 }, { "epoch": 0.9357938081439683, "grad_norm": 1.8501116037368774, "learning_rate": 6.52639314145744e-06, "log_odds_chosen": 0.5702586770057678, "log_odds_ratio": -0.5300538539886475, "logits/chosen": -1.2005856037139893, "logits/rejected": -1.2101900577545166, "logps/chosen": -0.9236264228820801, "logps/rejected": -1.316734790802002, "loss": 1.1533, "nll_loss": 1.2242984771728516, "rewards/accuracies": 0.625, "rewards/chosen": -0.0923626497387886, "rewards/margins": 0.03931083530187607, "rewards/rejected": -0.13167348504066467, "step": 1534 }, { "epoch": 0.9364038432209852, "grad_norm": 1.4475816488265991, "learning_rate": 6.525413349663197e-06, "log_odds_chosen": 0.7186351418495178, "log_odds_ratio": -0.46920862793922424, "logits/chosen": -1.1643893718719482, "logits/rejected": -1.1463388204574585, "logps/chosen": -0.891647219657898, "logps/rejected": -1.3293769359588623, "loss": 1.1138, "nll_loss": 1.0874638557434082, "rewards/accuracies": 0.75, "rewards/chosen": -0.08916471898555756, "rewards/margins": 0.043772969394922256, "rewards/rejected": -0.13293768465518951, "step": 1535 }, { "epoch": 0.9370138782980021, "grad_norm": 1.4926122426986694, "learning_rate": 6.524433557868953e-06, "log_odds_chosen": 0.3649725317955017, "log_odds_ratio": -0.6150185465812683, "logits/chosen": -1.1380622386932373, "logits/rejected": -1.0329076051712036, "logps/chosen": -0.6411396861076355, "logps/rejected": -0.7517315149307251, "loss": 1.0174, "nll_loss": 0.9029486775398254, "rewards/accuracies": 0.5, "rewards/chosen": -0.06411397457122803, "rewards/margins": 0.01105918362736702, "rewards/rejected": -0.07517315447330475, "step": 1536 }, { "epoch": 0.937623913375019, "grad_norm": 1.3019412755966187, "learning_rate": 6.523453766074709e-06, "log_odds_chosen": 1.1163992881774902, "log_odds_ratio": -0.5029264688491821, "logits/chosen": -0.9136865139007568, "logits/rejected": -0.8523301482200623, "logps/chosen": -0.8991246223449707, "logps/rejected": -1.7453399896621704, "loss": 1.175, "nll_loss": 1.1257086992263794, "rewards/accuracies": 0.875, "rewards/chosen": -0.08991245925426483, "rewards/margins": 0.08462154120206833, "rewards/rejected": -0.17453402280807495, "step": 1537 }, { "epoch": 0.938233948452036, "grad_norm": 1.5711852312088013, "learning_rate": 6.522473974280466e-06, "log_odds_chosen": 0.6173648238182068, "log_odds_ratio": -0.48614540696144104, "logits/chosen": -1.0424866676330566, "logits/rejected": -0.8505780696868896, "logps/chosen": -0.8107344508171082, "logps/rejected": -1.172100305557251, "loss": 1.0655, "nll_loss": 1.3147190809249878, "rewards/accuracies": 0.75, "rewards/chosen": -0.08107344806194305, "rewards/margins": 0.03613658994436264, "rewards/rejected": -0.1172100380063057, "step": 1538 }, { "epoch": 0.938843983529053, "grad_norm": 1.1593846082687378, "learning_rate": 6.521494182486221e-06, "log_odds_chosen": 0.6450440883636475, "log_odds_ratio": -0.5827469825744629, "logits/chosen": -1.065112590789795, "logits/rejected": -1.0640044212341309, "logps/chosen": -1.0354504585266113, "logps/rejected": -1.5092236995697021, "loss": 1.2122, "nll_loss": 1.1590074300765991, "rewards/accuracies": 0.75, "rewards/chosen": -0.10354504734277725, "rewards/margins": 0.04737730696797371, "rewards/rejected": -0.15092235803604126, "step": 1539 }, { "epoch": 0.9394540186060698, "grad_norm": 1.1597398519515991, "learning_rate": 6.520514390691977e-06, "log_odds_chosen": 1.284451961517334, "log_odds_ratio": -0.5723639130592346, "logits/chosen": -0.9587189555168152, "logits/rejected": -0.8527402281761169, "logps/chosen": -0.7380387783050537, "logps/rejected": -1.6001988649368286, "loss": 1.0416, "nll_loss": 0.9997466802597046, "rewards/accuracies": 0.625, "rewards/chosen": -0.07380387932062149, "rewards/margins": 0.08621601015329361, "rewards/rejected": -0.1600198745727539, "step": 1540 }, { "epoch": 0.9400640536830868, "grad_norm": 2.116034984588623, "learning_rate": 6.519534598897734e-06, "log_odds_chosen": 0.5133987665176392, "log_odds_ratio": -0.6616507172584534, "logits/chosen": -0.8317111730575562, "logits/rejected": -0.8644552826881409, "logps/chosen": -0.9529913067817688, "logps/rejected": -1.3515746593475342, "loss": 1.2503, "nll_loss": 1.1648244857788086, "rewards/accuracies": 0.5, "rewards/chosen": -0.0952991247177124, "rewards/margins": 0.03985833004117012, "rewards/rejected": -0.13515746593475342, "step": 1541 }, { "epoch": 0.9406740887601037, "grad_norm": 1.5490882396697998, "learning_rate": 6.51855480710349e-06, "log_odds_chosen": 1.260387659072876, "log_odds_ratio": -0.5709865689277649, "logits/chosen": -0.9699113965034485, "logits/rejected": -1.084402322769165, "logps/chosen": -0.9332062005996704, "logps/rejected": -1.8915627002716064, "loss": 1.0766, "nll_loss": 1.0755178928375244, "rewards/accuracies": 0.5, "rewards/chosen": -0.09332062304019928, "rewards/margins": 0.09583563357591629, "rewards/rejected": -0.18915626406669617, "step": 1542 }, { "epoch": 0.9412841238371207, "grad_norm": 1.313974380493164, "learning_rate": 6.517575015309246e-06, "log_odds_chosen": 0.9224734902381897, "log_odds_ratio": -0.47080740332603455, "logits/chosen": -1.0164871215820312, "logits/rejected": -1.0717499256134033, "logps/chosen": -0.9728305339813232, "logps/rejected": -1.601521372795105, "loss": 1.0633, "nll_loss": 1.122583270072937, "rewards/accuracies": 0.75, "rewards/chosen": -0.09728305786848068, "rewards/margins": 0.06286908686161041, "rewards/rejected": -0.1601521372795105, "step": 1543 }, { "epoch": 0.9418941589141375, "grad_norm": 1.3220402002334595, "learning_rate": 6.5165952235150025e-06, "log_odds_chosen": 1.1628034114837646, "log_odds_ratio": -0.4416057765483856, "logits/chosen": -0.9187482595443726, "logits/rejected": -0.9312397241592407, "logps/chosen": -0.655549168586731, "logps/rejected": -1.4148240089416504, "loss": 1.1325, "nll_loss": 0.9045938849449158, "rewards/accuracies": 0.625, "rewards/chosen": -0.0655549168586731, "rewards/margins": 0.0759274810552597, "rewards/rejected": -0.1414823979139328, "step": 1544 }, { "epoch": 0.9425041939911545, "grad_norm": 1.0522186756134033, "learning_rate": 6.515615431720759e-06, "log_odds_chosen": 1.6725537776947021, "log_odds_ratio": -0.23107118904590607, "logits/chosen": -0.57249915599823, "logits/rejected": -0.7072504758834839, "logps/chosen": -0.6944862008094788, "logps/rejected": -1.8027987480163574, "loss": 0.8906, "nll_loss": 0.7786925435066223, "rewards/accuracies": 1.0, "rewards/chosen": -0.06944862008094788, "rewards/margins": 0.11083124577999115, "rewards/rejected": -0.18027988076210022, "step": 1545 }, { "epoch": 0.9431142290681714, "grad_norm": 2.2016775608062744, "learning_rate": 6.514635639926516e-06, "log_odds_chosen": 0.5621732473373413, "log_odds_ratio": -0.5528222322463989, "logits/chosen": -0.7139623165130615, "logits/rejected": -0.8955979943275452, "logps/chosen": -0.8335115909576416, "logps/rejected": -1.1346005201339722, "loss": 1.1541, "nll_loss": 0.9813293814659119, "rewards/accuracies": 0.625, "rewards/chosen": -0.08335116505622864, "rewards/margins": 0.030108889564871788, "rewards/rejected": -0.11346004903316498, "step": 1546 }, { "epoch": 0.9437242641451884, "grad_norm": 2.709784984588623, "learning_rate": 6.513655848132272e-06, "log_odds_chosen": 0.9053638577461243, "log_odds_ratio": -0.5078916549682617, "logits/chosen": -0.9143595099449158, "logits/rejected": -1.0081485509872437, "logps/chosen": -0.8244757652282715, "logps/rejected": -1.451186180114746, "loss": 1.1179, "nll_loss": 1.0350828170776367, "rewards/accuracies": 0.75, "rewards/chosen": -0.0824475884437561, "rewards/margins": 0.06267103552818298, "rewards/rejected": -0.14511863887310028, "step": 1547 }, { "epoch": 0.9443342992222052, "grad_norm": 1.5185507535934448, "learning_rate": 6.512676056338028e-06, "log_odds_chosen": 0.8595970273017883, "log_odds_ratio": -0.5745270848274231, "logits/chosen": -0.9088954925537109, "logits/rejected": -1.0141558647155762, "logps/chosen": -0.9221664667129517, "logps/rejected": -1.3832992315292358, "loss": 1.1975, "nll_loss": 1.3424713611602783, "rewards/accuracies": 0.5, "rewards/chosen": -0.09221664071083069, "rewards/margins": 0.046113271266222, "rewards/rejected": -0.1383299082517624, "step": 1548 }, { "epoch": 0.9449443342992222, "grad_norm": 1.1230460405349731, "learning_rate": 6.511696264543785e-06, "log_odds_chosen": -0.01050463318824768, "log_odds_ratio": -0.8369073867797852, "logits/chosen": -1.1178754568099976, "logits/rejected": -1.0766042470932007, "logps/chosen": -1.113048791885376, "logps/rejected": -1.0456271171569824, "loss": 1.1196, "nll_loss": 1.178741455078125, "rewards/accuracies": 0.25, "rewards/chosen": -0.1113048866391182, "rewards/margins": -0.006742170080542564, "rewards/rejected": -0.10456270724534988, "step": 1549 }, { "epoch": 0.9455543693762392, "grad_norm": 1.4643549919128418, "learning_rate": 6.51071647274954e-06, "log_odds_chosen": 0.9573603868484497, "log_odds_ratio": -0.46119171380996704, "logits/chosen": -0.9829882383346558, "logits/rejected": -1.1339826583862305, "logps/chosen": -0.8375042676925659, "logps/rejected": -1.3994017839431763, "loss": 1.0573, "nll_loss": 1.0565345287322998, "rewards/accuracies": 0.75, "rewards/chosen": -0.08375042676925659, "rewards/margins": 0.056189753115177155, "rewards/rejected": -0.13994017243385315, "step": 1550 }, { "epoch": 0.9461644044532561, "grad_norm": 1.694638729095459, "learning_rate": 6.509736680955296e-06, "log_odds_chosen": 0.34552979469299316, "log_odds_ratio": -0.585957944393158, "logits/chosen": -1.2358852624893188, "logits/rejected": -1.11476469039917, "logps/chosen": -1.0317816734313965, "logps/rejected": -1.2877960205078125, "loss": 1.3009, "nll_loss": 1.3148741722106934, "rewards/accuracies": 0.75, "rewards/chosen": -0.10317815840244293, "rewards/margins": 0.025601446628570557, "rewards/rejected": -0.12877961993217468, "step": 1551 }, { "epoch": 0.946774439530273, "grad_norm": 1.9843833446502686, "learning_rate": 6.508756889161053e-06, "log_odds_chosen": 1.2750647068023682, "log_odds_ratio": -0.42001259326934814, "logits/chosen": -0.6915394067764282, "logits/rejected": -0.7197211980819702, "logps/chosen": -0.6756351590156555, "logps/rejected": -1.4874272346496582, "loss": 0.9038, "nll_loss": 0.6956062316894531, "rewards/accuracies": 0.75, "rewards/chosen": -0.06756351888179779, "rewards/margins": 0.08117921650409698, "rewards/rejected": -0.14874272048473358, "step": 1552 }, { "epoch": 0.9473844746072899, "grad_norm": 1.2998905181884766, "learning_rate": 6.507777097366809e-06, "log_odds_chosen": 0.634911835193634, "log_odds_ratio": -0.600679874420166, "logits/chosen": -0.9171426296234131, "logits/rejected": -0.8279135227203369, "logps/chosen": -0.7461159229278564, "logps/rejected": -1.218915343284607, "loss": 1.1213, "nll_loss": 0.9960734844207764, "rewards/accuracies": 0.5, "rewards/chosen": -0.074611596763134, "rewards/margins": 0.04727993905544281, "rewards/rejected": -0.12189152836799622, "step": 1553 }, { "epoch": 0.9479945096843069, "grad_norm": 3.2805864810943604, "learning_rate": 6.5067973055725654e-06, "log_odds_chosen": 1.2180507183074951, "log_odds_ratio": -0.29577842354774475, "logits/chosen": -0.915813684463501, "logits/rejected": -1.00864839553833, "logps/chosen": -0.8060296773910522, "logps/rejected": -1.658339262008667, "loss": 1.2431, "nll_loss": 1.0447919368743896, "rewards/accuracies": 0.875, "rewards/chosen": -0.0806029662489891, "rewards/margins": 0.08523096889257431, "rewards/rejected": -0.16583393514156342, "step": 1554 }, { "epoch": 0.9486045447613238, "grad_norm": 1.2893019914627075, "learning_rate": 6.505817513778322e-06, "log_odds_chosen": 1.014212727546692, "log_odds_ratio": -0.5249166488647461, "logits/chosen": -1.0480157136917114, "logits/rejected": -0.9734439849853516, "logps/chosen": -0.7048348188400269, "logps/rejected": -1.366440773010254, "loss": 1.0773, "nll_loss": 1.021677017211914, "rewards/accuracies": 0.75, "rewards/chosen": -0.0704834833741188, "rewards/margins": 0.06616058945655823, "rewards/rejected": -0.13664409518241882, "step": 1555 }, { "epoch": 0.9492145798383407, "grad_norm": 1.3143757581710815, "learning_rate": 6.504837721984078e-06, "log_odds_chosen": 0.9813128113746643, "log_odds_ratio": -0.43913835287094116, "logits/chosen": -0.9129269123077393, "logits/rejected": -1.0176827907562256, "logps/chosen": -0.6699444055557251, "logps/rejected": -1.2606050968170166, "loss": 1.0285, "nll_loss": 1.0140533447265625, "rewards/accuracies": 0.875, "rewards/chosen": -0.06699444353580475, "rewards/margins": 0.05906607210636139, "rewards/rejected": -0.12606051564216614, "step": 1556 }, { "epoch": 0.9498246149153576, "grad_norm": 3.1819465160369873, "learning_rate": 6.503857930189834e-06, "log_odds_chosen": 1.242588996887207, "log_odds_ratio": -0.3776906132698059, "logits/chosen": -0.7870678305625916, "logits/rejected": -1.0037139654159546, "logps/chosen": -0.6755571365356445, "logps/rejected": -1.577883243560791, "loss": 1.1063, "nll_loss": 0.8857784271240234, "rewards/accuracies": 0.875, "rewards/chosen": -0.06755571067333221, "rewards/margins": 0.09023261815309525, "rewards/rejected": -0.15778833627700806, "step": 1557 }, { "epoch": 0.9504346499923746, "grad_norm": 1.4949342012405396, "learning_rate": 6.502878138395591e-06, "log_odds_chosen": 0.3785633444786072, "log_odds_ratio": -0.56305330991745, "logits/chosen": -1.0889195203781128, "logits/rejected": -1.0078459978103638, "logps/chosen": -0.8598426580429077, "logps/rejected": -1.0875914096832275, "loss": 1.2436, "nll_loss": 1.250192403793335, "rewards/accuracies": 0.75, "rewards/chosen": -0.08598426729440689, "rewards/margins": 0.022774886339902878, "rewards/rejected": -0.10875915735960007, "step": 1558 }, { "epoch": 0.9510446850693914, "grad_norm": 1.5940901041030884, "learning_rate": 6.501898346601347e-06, "log_odds_chosen": 0.007889915257692337, "log_odds_ratio": -0.7774978280067444, "logits/chosen": -1.0436502695083618, "logits/rejected": -1.0215702056884766, "logps/chosen": -1.0046663284301758, "logps/rejected": -1.0210727453231812, "loss": 1.0049, "nll_loss": 1.0295004844665527, "rewards/accuracies": 0.25, "rewards/chosen": -0.10046662390232086, "rewards/margins": 0.0016406513750553131, "rewards/rejected": -0.10210728645324707, "step": 1559 }, { "epoch": 0.9516547201464084, "grad_norm": 1.271681308746338, "learning_rate": 6.500918554807104e-06, "log_odds_chosen": 1.1381421089172363, "log_odds_ratio": -0.39848047494888306, "logits/chosen": -0.9115217924118042, "logits/rejected": -0.7406131029129028, "logps/chosen": -0.8463451862335205, "logps/rejected": -1.5014839172363281, "loss": 1.1433, "nll_loss": 0.9568911790847778, "rewards/accuracies": 0.75, "rewards/chosen": -0.08463452756404877, "rewards/margins": 0.06551387161016464, "rewards/rejected": -0.150148406624794, "step": 1560 }, { "epoch": 0.9522647552234254, "grad_norm": 2.8882663249969482, "learning_rate": 6.499938763012859e-06, "log_odds_chosen": 0.9823706150054932, "log_odds_ratio": -0.45960235595703125, "logits/chosen": -1.0883986949920654, "logits/rejected": -1.0605084896087646, "logps/chosen": -0.9161726236343384, "logps/rejected": -1.6414754390716553, "loss": 1.3074, "nll_loss": 1.110550045967102, "rewards/accuracies": 0.75, "rewards/chosen": -0.09161727130413055, "rewards/margins": 0.07253026962280273, "rewards/rejected": -0.1641475409269333, "step": 1561 }, { "epoch": 0.9528747903004423, "grad_norm": 1.907858967781067, "learning_rate": 6.498958971218615e-06, "log_odds_chosen": 0.85531085729599, "log_odds_ratio": -0.5616927742958069, "logits/chosen": -1.0613374710083008, "logits/rejected": -1.0030827522277832, "logps/chosen": -0.8763956427574158, "logps/rejected": -1.468863844871521, "loss": 1.1334, "nll_loss": 0.9984713792800903, "rewards/accuracies": 0.625, "rewards/chosen": -0.08763957023620605, "rewards/margins": 0.05924682319164276, "rewards/rejected": -0.14688639342784882, "step": 1562 }, { "epoch": 0.9534848253774592, "grad_norm": 2.2787208557128906, "learning_rate": 6.497979179424372e-06, "log_odds_chosen": 0.8629451990127563, "log_odds_ratio": -0.5753306150436401, "logits/chosen": -1.0802417993545532, "logits/rejected": -1.1144511699676514, "logps/chosen": -0.8163145184516907, "logps/rejected": -1.43040931224823, "loss": 1.1112, "nll_loss": 1.0127224922180176, "rewards/accuracies": 0.5, "rewards/chosen": -0.08163145184516907, "rewards/margins": 0.06140947714447975, "rewards/rejected": -0.14304092526435852, "step": 1563 }, { "epoch": 0.9540948604544761, "grad_norm": 1.4411182403564453, "learning_rate": 6.496999387630128e-06, "log_odds_chosen": 0.7202012538909912, "log_odds_ratio": -0.5951586961746216, "logits/chosen": -1.172644853591919, "logits/rejected": -1.1718947887420654, "logps/chosen": -0.7865864038467407, "logps/rejected": -1.361530065536499, "loss": 1.156, "nll_loss": 1.129956603050232, "rewards/accuracies": 0.625, "rewards/chosen": -0.07865864038467407, "rewards/margins": 0.05749436467885971, "rewards/rejected": -0.13615299761295319, "step": 1564 }, { "epoch": 0.9547048955314931, "grad_norm": 3.6746366024017334, "learning_rate": 6.4960195958358845e-06, "log_odds_chosen": -0.085954949259758, "log_odds_ratio": -0.8744462728500366, "logits/chosen": -1.0063560009002686, "logits/rejected": -1.0355947017669678, "logps/chosen": -1.254012107849121, "logps/rejected": -1.1413195133209229, "loss": 1.1951, "nll_loss": 1.3375345468521118, "rewards/accuracies": 0.625, "rewards/chosen": -0.12540122866630554, "rewards/margins": -0.01126927975565195, "rewards/rejected": -0.11413194239139557, "step": 1565 }, { "epoch": 0.95531493060851, "grad_norm": 1.623704195022583, "learning_rate": 6.4950398040416415e-06, "log_odds_chosen": 0.5241759419441223, "log_odds_ratio": -0.6339350342750549, "logits/chosen": -1.196890115737915, "logits/rejected": -1.0104222297668457, "logps/chosen": -1.1054346561431885, "logps/rejected": -1.5852277278900146, "loss": 1.349, "nll_loss": 1.2961496114730835, "rewards/accuracies": 0.5, "rewards/chosen": -0.11054347455501556, "rewards/margins": 0.04797930642962456, "rewards/rejected": -0.15852276980876923, "step": 1566 }, { "epoch": 0.9559249656855269, "grad_norm": 1.2456319332122803, "learning_rate": 6.494060012247397e-06, "log_odds_chosen": 0.9506069421768188, "log_odds_ratio": -0.42925959825515747, "logits/chosen": -0.9892472624778748, "logits/rejected": -0.9506424069404602, "logps/chosen": -0.7038148641586304, "logps/rejected": -1.2695623636245728, "loss": 1.0008, "nll_loss": 1.1584820747375488, "rewards/accuracies": 0.625, "rewards/chosen": -0.07038148492574692, "rewards/margins": 0.056574758142232895, "rewards/rejected": -0.12695623934268951, "step": 1567 }, { "epoch": 0.9565350007625438, "grad_norm": 3.4601101875305176, "learning_rate": 6.493080220453153e-06, "log_odds_chosen": 0.6918116807937622, "log_odds_ratio": -0.5548208951950073, "logits/chosen": -1.0749346017837524, "logits/rejected": -1.0122053623199463, "logps/chosen": -0.7574754357337952, "logps/rejected": -1.2566319704055786, "loss": 1.0627, "nll_loss": 0.9247705340385437, "rewards/accuracies": 0.625, "rewards/chosen": -0.0757475420832634, "rewards/margins": 0.04991566017270088, "rewards/rejected": -0.12566320598125458, "step": 1568 }, { "epoch": 0.9571450358395608, "grad_norm": 2.020106792449951, "learning_rate": 6.49210042865891e-06, "log_odds_chosen": 1.1627196073532104, "log_odds_ratio": -0.3905891180038452, "logits/chosen": -0.8434866666793823, "logits/rejected": -0.8987736701965332, "logps/chosen": -0.6259303092956543, "logps/rejected": -1.3897593021392822, "loss": 1.1453, "nll_loss": 0.8417953848838806, "rewards/accuracies": 0.875, "rewards/chosen": -0.06259303539991379, "rewards/margins": 0.07638290524482727, "rewards/rejected": -0.13897593319416046, "step": 1569 }, { "epoch": 0.9577550709165777, "grad_norm": 1.4721342325210571, "learning_rate": 6.491120636864666e-06, "log_odds_chosen": 0.0928657203912735, "log_odds_ratio": -0.7395329475402832, "logits/chosen": -1.2257053852081299, "logits/rejected": -1.1542894840240479, "logps/chosen": -1.1404906511306763, "logps/rejected": -1.197771430015564, "loss": 1.029, "nll_loss": 1.2210148572921753, "rewards/accuracies": 0.375, "rewards/chosen": -0.11404906213283539, "rewards/margins": 0.0057280827313661575, "rewards/rejected": -0.1197771430015564, "step": 1570 }, { "epoch": 0.9583651059935946, "grad_norm": 0.9696265459060669, "learning_rate": 6.490140845070422e-06, "log_odds_chosen": 1.657996654510498, "log_odds_ratio": -0.46143320202827454, "logits/chosen": -1.048856496810913, "logits/rejected": -1.1208211183547974, "logps/chosen": -0.7157469987869263, "logps/rejected": -2.0778892040252686, "loss": 1.2508, "nll_loss": 1.0744988918304443, "rewards/accuracies": 0.625, "rewards/chosen": -0.07157470285892487, "rewards/margins": 0.1362142264842987, "rewards/rejected": -0.20778892934322357, "step": 1571 }, { "epoch": 0.9589751410706115, "grad_norm": 1.8306388854980469, "learning_rate": 6.489161053276179e-06, "log_odds_chosen": 0.8995030522346497, "log_odds_ratio": -0.47462037205696106, "logits/chosen": -0.9754583835601807, "logits/rejected": -0.9669779539108276, "logps/chosen": -0.8015357851982117, "logps/rejected": -1.2303012609481812, "loss": 1.0491, "nll_loss": 1.1092793941497803, "rewards/accuracies": 0.75, "rewards/chosen": -0.08015358448028564, "rewards/margins": 0.04287654161453247, "rewards/rejected": -0.12303012609481812, "step": 1572 }, { "epoch": 0.9595851761476285, "grad_norm": 1.1390016078948975, "learning_rate": 6.488181261481934e-06, "log_odds_chosen": 1.2408859729766846, "log_odds_ratio": -0.3931965231895447, "logits/chosen": -0.7455164790153503, "logits/rejected": -0.7417059540748596, "logps/chosen": -0.7903632521629333, "logps/rejected": -1.705509066581726, "loss": 1.0624, "nll_loss": 0.9200388193130493, "rewards/accuracies": 0.875, "rewards/chosen": -0.07903632521629333, "rewards/margins": 0.09151458740234375, "rewards/rejected": -0.17055091261863708, "step": 1573 }, { "epoch": 0.9601952112246455, "grad_norm": 4.659962177276611, "learning_rate": 6.487201469687691e-06, "log_odds_chosen": 0.5292235016822815, "log_odds_ratio": -0.6198203563690186, "logits/chosen": -0.8373744487762451, "logits/rejected": -0.809149980545044, "logps/chosen": -0.7393596172332764, "logps/rejected": -1.03269362449646, "loss": 1.0621, "nll_loss": 0.9201416373252869, "rewards/accuracies": 0.375, "rewards/chosen": -0.07393595576286316, "rewards/margins": 0.029333405196666718, "rewards/rejected": -0.10326936841011047, "step": 1574 }, { "epoch": 0.9608052463016623, "grad_norm": 2.033229112625122, "learning_rate": 6.486221677893447e-06, "log_odds_chosen": 0.5483484864234924, "log_odds_ratio": -0.5343560576438904, "logits/chosen": -0.9651437401771545, "logits/rejected": -0.9203461408615112, "logps/chosen": -0.7296140789985657, "logps/rejected": -1.0689221620559692, "loss": 0.8883, "nll_loss": 0.8692828416824341, "rewards/accuracies": 0.75, "rewards/chosen": -0.07296141237020493, "rewards/margins": 0.033930808305740356, "rewards/rejected": -0.10689222067594528, "step": 1575 }, { "epoch": 0.9614152813786793, "grad_norm": 1.3948137760162354, "learning_rate": 6.4852418860992035e-06, "log_odds_chosen": 0.8309269547462463, "log_odds_ratio": -0.587948203086853, "logits/chosen": -1.0393651723861694, "logits/rejected": -0.8736796975135803, "logps/chosen": -0.8679385781288147, "logps/rejected": -1.3427048921585083, "loss": 1.1122, "nll_loss": 1.0798449516296387, "rewards/accuracies": 0.75, "rewards/chosen": -0.08679386228322983, "rewards/margins": 0.04747661575675011, "rewards/rejected": -0.13427048921585083, "step": 1576 }, { "epoch": 0.9620253164556962, "grad_norm": 1.9035029411315918, "learning_rate": 6.4842620943049605e-06, "log_odds_chosen": 0.4131890535354614, "log_odds_ratio": -0.6016682386398315, "logits/chosen": -0.844577431678772, "logits/rejected": -0.9139933586120605, "logps/chosen": -0.8500407934188843, "logps/rejected": -1.1163008213043213, "loss": 1.1465, "nll_loss": 1.031203269958496, "rewards/accuracies": 0.625, "rewards/chosen": -0.08500409126281738, "rewards/margins": 0.02662600949406624, "rewards/rejected": -0.11163008213043213, "step": 1577 }, { "epoch": 0.9626353515327132, "grad_norm": 0.992185115814209, "learning_rate": 6.483282302510717e-06, "log_odds_chosen": 0.44890978932380676, "log_odds_ratio": -0.6320874691009521, "logits/chosen": -0.8242723345756531, "logits/rejected": -0.8821132183074951, "logps/chosen": -0.9943450689315796, "logps/rejected": -1.260432243347168, "loss": 1.0864, "nll_loss": 1.1479830741882324, "rewards/accuracies": 0.75, "rewards/chosen": -0.0994345173239708, "rewards/margins": 0.026608716696500778, "rewards/rejected": -0.12604323029518127, "step": 1578 }, { "epoch": 0.96324538660973, "grad_norm": 3.278902769088745, "learning_rate": 6.482302510716472e-06, "log_odds_chosen": 2.089822292327881, "log_odds_ratio": -0.32359611988067627, "logits/chosen": -0.7759605646133423, "logits/rejected": -0.7935028076171875, "logps/chosen": -0.6448100805282593, "logps/rejected": -2.0734379291534424, "loss": 1.0414, "nll_loss": 0.8789771795272827, "rewards/accuracies": 0.875, "rewards/chosen": -0.06448100507259369, "rewards/margins": 0.14286281168460846, "rewards/rejected": -0.20734381675720215, "step": 1579 }, { "epoch": 0.963855421686747, "grad_norm": 1.7834352254867554, "learning_rate": 6.481322718922229e-06, "log_odds_chosen": 0.2906210422515869, "log_odds_ratio": -0.6067988872528076, "logits/chosen": -1.1093828678131104, "logits/rejected": -1.0817034244537354, "logps/chosen": -0.9494075775146484, "logps/rejected": -1.142660140991211, "loss": 1.0914, "nll_loss": 1.0816502571105957, "rewards/accuracies": 0.75, "rewards/chosen": -0.09494075924158096, "rewards/margins": 0.019325243309140205, "rewards/rejected": -0.11426600813865662, "step": 1580 }, { "epoch": 0.9644654567637639, "grad_norm": 2.1491990089416504, "learning_rate": 6.480342927127985e-06, "log_odds_chosen": -0.12689904868602753, "log_odds_ratio": -0.9141179919242859, "logits/chosen": -1.1252363920211792, "logits/rejected": -0.9696252942085266, "logps/chosen": -1.0212405920028687, "logps/rejected": -1.018823504447937, "loss": 1.1, "nll_loss": 1.2860389947891235, "rewards/accuracies": 0.5, "rewards/chosen": -0.10212406516075134, "rewards/margins": -0.0002417033538222313, "rewards/rejected": -0.10188236087560654, "step": 1581 }, { "epoch": 0.9650754918407809, "grad_norm": 1.6780269145965576, "learning_rate": 6.479363135333741e-06, "log_odds_chosen": 2.193127393722534, "log_odds_ratio": -0.27032169699668884, "logits/chosen": -0.8060741424560547, "logits/rejected": -0.9827871322631836, "logps/chosen": -0.533229649066925, "logps/rejected": -1.9985946416854858, "loss": 0.977, "nll_loss": 0.6861420273780823, "rewards/accuracies": 0.875, "rewards/chosen": -0.053322963416576385, "rewards/margins": 0.14653651416301727, "rewards/rejected": -0.19985947012901306, "step": 1582 }, { "epoch": 0.9656855269177977, "grad_norm": 1.5132567882537842, "learning_rate": 6.478383343539498e-06, "log_odds_chosen": 0.9936649799346924, "log_odds_ratio": -0.5417370796203613, "logits/chosen": -1.0351420640945435, "logits/rejected": -0.9946777820587158, "logps/chosen": -0.8191772103309631, "logps/rejected": -1.4371691942214966, "loss": 0.912, "nll_loss": 0.9709146022796631, "rewards/accuracies": 0.625, "rewards/chosen": -0.08191771805286407, "rewards/margins": 0.06179920211434364, "rewards/rejected": -0.14371691644191742, "step": 1583 }, { "epoch": 0.9662955619948147, "grad_norm": 1.347930908203125, "learning_rate": 6.477403551745253e-06, "log_odds_chosen": 1.3829090595245361, "log_odds_ratio": -0.44186320900917053, "logits/chosen": -0.8663504123687744, "logits/rejected": -0.9459016919136047, "logps/chosen": -0.9347856044769287, "logps/rejected": -1.907460331916809, "loss": 1.006, "nll_loss": 1.0104784965515137, "rewards/accuracies": 0.75, "rewards/chosen": -0.09347856044769287, "rewards/margins": 0.09726747870445251, "rewards/rejected": -0.19074603915214539, "step": 1584 }, { "epoch": 0.9669055970718317, "grad_norm": 2.616814613342285, "learning_rate": 6.4764237599510095e-06, "log_odds_chosen": 0.378839373588562, "log_odds_ratio": -0.6479204893112183, "logits/chosen": -0.9810655117034912, "logits/rejected": -0.875025749206543, "logps/chosen": -1.0268645286560059, "logps/rejected": -1.2324235439300537, "loss": 1.1408, "nll_loss": 1.1407790184020996, "rewards/accuracies": 0.625, "rewards/chosen": -0.10268644988536835, "rewards/margins": 0.020555898547172546, "rewards/rejected": -0.12324235588312149, "step": 1585 }, { "epoch": 0.9675156321488485, "grad_norm": 1.9262253046035767, "learning_rate": 6.4754439681567664e-06, "log_odds_chosen": 1.3562818765640259, "log_odds_ratio": -0.47075408697128296, "logits/chosen": -0.8568267822265625, "logits/rejected": -0.9509389996528625, "logps/chosen": -0.6357790231704712, "logps/rejected": -1.616296410560608, "loss": 0.9933, "nll_loss": 0.9229382276535034, "rewards/accuracies": 0.625, "rewards/chosen": -0.06357790529727936, "rewards/margins": 0.09805172681808472, "rewards/rejected": -0.16162964701652527, "step": 1586 }, { "epoch": 0.9681256672258655, "grad_norm": 2.998701333999634, "learning_rate": 6.4744641763625226e-06, "log_odds_chosen": 2.232558250427246, "log_odds_ratio": -0.21955031156539917, "logits/chosen": -0.7348307371139526, "logits/rejected": -0.7736248970031738, "logps/chosen": -0.6795996427536011, "logps/rejected": -2.234267234802246, "loss": 1.1094, "nll_loss": 1.0427014827728271, "rewards/accuracies": 1.0, "rewards/chosen": -0.06795996427536011, "rewards/margins": 0.15546676516532898, "rewards/rejected": -0.2234267294406891, "step": 1587 }, { "epoch": 0.9687357023028824, "grad_norm": 13.029882431030273, "learning_rate": 6.4734843845682795e-06, "log_odds_chosen": 1.1201481819152832, "log_odds_ratio": -0.47000786662101746, "logits/chosen": -0.8472220301628113, "logits/rejected": -0.7677755355834961, "logps/chosen": -0.7015767097473145, "logps/rejected": -1.3751425743103027, "loss": 0.9789, "nll_loss": 0.8262069821357727, "rewards/accuracies": 0.625, "rewards/chosen": -0.07015767693519592, "rewards/margins": 0.06735658645629883, "rewards/rejected": -0.13751426339149475, "step": 1588 }, { "epoch": 0.9693457373798994, "grad_norm": 2.8069050312042236, "learning_rate": 6.472504592774036e-06, "log_odds_chosen": 1.2326850891113281, "log_odds_ratio": -0.4584555923938751, "logits/chosen": -0.9707061052322388, "logits/rejected": -0.9089238047599792, "logps/chosen": -0.8457830548286438, "logps/rejected": -1.7859907150268555, "loss": 1.102, "nll_loss": 0.9643535614013672, "rewards/accuracies": 0.625, "rewards/chosen": -0.08457830548286438, "rewards/margins": 0.09402076154947281, "rewards/rejected": -0.17859907448291779, "step": 1589 }, { "epoch": 0.9699557724569162, "grad_norm": 2.0395290851593018, "learning_rate": 6.471524800979791e-06, "log_odds_chosen": 1.0205044746398926, "log_odds_ratio": -0.5729349255561829, "logits/chosen": -0.956525444984436, "logits/rejected": -1.011770248413086, "logps/chosen": -0.7761977910995483, "logps/rejected": -1.4501359462738037, "loss": 1.1981, "nll_loss": 1.2027041912078857, "rewards/accuracies": 0.625, "rewards/chosen": -0.07761978358030319, "rewards/margins": 0.06739381700754166, "rewards/rejected": -0.14501360058784485, "step": 1590 }, { "epoch": 0.9705658075339332, "grad_norm": 2.247498035430908, "learning_rate": 6.470545009185548e-06, "log_odds_chosen": 0.8096214532852173, "log_odds_ratio": -0.5576925873756409, "logits/chosen": -1.018629550933838, "logits/rejected": -0.959138035774231, "logps/chosen": -0.9746545553207397, "logps/rejected": -1.554791808128357, "loss": 1.158, "nll_loss": 1.0603548288345337, "rewards/accuracies": 0.625, "rewards/chosen": -0.09746546298265457, "rewards/margins": 0.05801371484994888, "rewards/rejected": -0.15547917783260345, "step": 1591 }, { "epoch": 0.9711758426109501, "grad_norm": 2.2564597129821777, "learning_rate": 6.469565217391304e-06, "log_odds_chosen": 0.41893690824508667, "log_odds_ratio": -0.7791930437088013, "logits/chosen": -0.8155346512794495, "logits/rejected": -0.7249971032142639, "logps/chosen": -0.8531796932220459, "logps/rejected": -1.0293631553649902, "loss": 1.0993, "nll_loss": 1.005181908607483, "rewards/accuracies": 0.5, "rewards/chosen": -0.08531796932220459, "rewards/margins": 0.017618345096707344, "rewards/rejected": -0.10293632000684738, "step": 1592 }, { "epoch": 0.9717858776879671, "grad_norm": 1.5765775442123413, "learning_rate": 6.46858542559706e-06, "log_odds_chosen": 0.9196377992630005, "log_odds_ratio": -0.5338851809501648, "logits/chosen": -0.9162123799324036, "logits/rejected": -0.866573691368103, "logps/chosen": -0.7281956672668457, "logps/rejected": -1.3629323244094849, "loss": 1.1224, "nll_loss": 0.912224531173706, "rewards/accuracies": 0.5, "rewards/chosen": -0.07281957566738129, "rewards/margins": 0.0634736716747284, "rewards/rejected": -0.1362932324409485, "step": 1593 }, { "epoch": 0.9723959127649839, "grad_norm": 7.969070911407471, "learning_rate": 6.467605633802817e-06, "log_odds_chosen": 0.32641974091529846, "log_odds_ratio": -0.6033743023872375, "logits/chosen": -0.805417537689209, "logits/rejected": -0.7472744584083557, "logps/chosen": -1.0058366060256958, "logps/rejected": -1.2528076171875, "loss": 1.0986, "nll_loss": 1.1558866500854492, "rewards/accuracies": 0.75, "rewards/chosen": -0.10058365762233734, "rewards/margins": 0.024697110056877136, "rewards/rejected": -0.12528076767921448, "step": 1594 }, { "epoch": 0.9730059478420009, "grad_norm": 2.085218667984009, "learning_rate": 6.466625842008573e-06, "log_odds_chosen": 1.9834133386611938, "log_odds_ratio": -0.40343692898750305, "logits/chosen": -0.8600291013717651, "logits/rejected": -0.7583684325218201, "logps/chosen": -0.7050493359565735, "logps/rejected": -2.1425018310546875, "loss": 1.0009, "nll_loss": 0.9395180940628052, "rewards/accuracies": 0.875, "rewards/chosen": -0.07050493359565735, "rewards/margins": 0.14374522864818573, "rewards/rejected": -0.21425017714500427, "step": 1595 }, { "epoch": 0.9736159829190179, "grad_norm": 6.173746109008789, "learning_rate": 6.4656460502143285e-06, "log_odds_chosen": 0.6779859066009521, "log_odds_ratio": -0.8969680070877075, "logits/chosen": -0.7890722751617432, "logits/rejected": -0.7736035585403442, "logps/chosen": -1.3363351821899414, "logps/rejected": -1.749787449836731, "loss": 1.1875, "nll_loss": 1.0220966339111328, "rewards/accuracies": 0.75, "rewards/chosen": -0.13363352417945862, "rewards/margins": 0.04134521633386612, "rewards/rejected": -0.17497874796390533, "step": 1596 }, { "epoch": 0.9742260179960348, "grad_norm": 1.2105324268341064, "learning_rate": 6.4646662584200855e-06, "log_odds_chosen": 1.979524850845337, "log_odds_ratio": -0.39081254601478577, "logits/chosen": -1.0795598030090332, "logits/rejected": -0.8821196556091309, "logps/chosen": -0.7808518409729004, "logps/rejected": -2.2736189365386963, "loss": 0.9462, "nll_loss": 0.9554572105407715, "rewards/accuracies": 0.875, "rewards/chosen": -0.07808518409729004, "rewards/margins": 0.1492767184972763, "rewards/rejected": -0.22736188769340515, "step": 1597 }, { "epoch": 0.9748360530730517, "grad_norm": 1.8042200803756714, "learning_rate": 6.463686466625842e-06, "log_odds_chosen": 1.6649086475372314, "log_odds_ratio": -0.3109215497970581, "logits/chosen": -0.7888253331184387, "logits/rejected": -0.6545913219451904, "logps/chosen": -0.6316285133361816, "logps/rejected": -1.7586590051651, "loss": 0.961, "nll_loss": 0.9147021174430847, "rewards/accuracies": 0.875, "rewards/chosen": -0.06316285580396652, "rewards/margins": 0.11270304769277573, "rewards/rejected": -0.17586590349674225, "step": 1598 }, { "epoch": 0.9754460881500686, "grad_norm": 1.7708359956741333, "learning_rate": 6.4627066748315986e-06, "log_odds_chosen": 2.0365304946899414, "log_odds_ratio": -0.2710496485233307, "logits/chosen": -0.7910709381103516, "logits/rejected": -0.7232854962348938, "logps/chosen": -0.6044620871543884, "logps/rejected": -2.0672943592071533, "loss": 0.9756, "nll_loss": 0.6938060522079468, "rewards/accuracies": 0.875, "rewards/chosen": -0.06044621020555496, "rewards/margins": 0.14628323912620544, "rewards/rejected": -0.2067294418811798, "step": 1599 }, { "epoch": 0.9760561232270856, "grad_norm": 2.1009035110473633, "learning_rate": 6.461726883037355e-06, "log_odds_chosen": 0.49664121866226196, "log_odds_ratio": -0.666098952293396, "logits/chosen": -0.9806991815567017, "logits/rejected": -0.8417461514472961, "logps/chosen": -0.9745231866836548, "logps/rejected": -1.4207701683044434, "loss": 1.2425, "nll_loss": 1.1041473150253296, "rewards/accuracies": 0.5, "rewards/chosen": -0.097452312707901, "rewards/margins": 0.0446246936917305, "rewards/rejected": -0.1420770138502121, "step": 1600 }, { "epoch": 0.9766661583041025, "grad_norm": 1.2843002080917358, "learning_rate": 6.46074709124311e-06, "log_odds_chosen": 0.9499680995941162, "log_odds_ratio": -0.5985073447227478, "logits/chosen": -1.1643240451812744, "logits/rejected": -0.9581229090690613, "logps/chosen": -1.0838007926940918, "logps/rejected": -1.9283435344696045, "loss": 1.2179, "nll_loss": 1.2575112581253052, "rewards/accuracies": 0.625, "rewards/chosen": -0.10838007926940918, "rewards/margins": 0.08445426821708679, "rewards/rejected": -0.19283434748649597, "step": 1601 }, { "epoch": 0.9772761933811194, "grad_norm": 6.15317964553833, "learning_rate": 6.459767299448867e-06, "log_odds_chosen": 1.0228725671768188, "log_odds_ratio": -0.5361090302467346, "logits/chosen": -0.9492722749710083, "logits/rejected": -0.9856749773025513, "logps/chosen": -0.7420119047164917, "logps/rejected": -1.357678771018982, "loss": 1.2227, "nll_loss": 0.9670311212539673, "rewards/accuracies": 0.625, "rewards/chosen": -0.07420118898153305, "rewards/margins": 0.06156668812036514, "rewards/rejected": -0.1357678771018982, "step": 1602 }, { "epoch": 0.9778862284581363, "grad_norm": 1.5804357528686523, "learning_rate": 6.458787507654623e-06, "log_odds_chosen": 1.3317373991012573, "log_odds_ratio": -0.43638646602630615, "logits/chosen": -0.7928769588470459, "logits/rejected": -0.8903645277023315, "logps/chosen": -0.7629896998405457, "logps/rejected": -1.7550848722457886, "loss": 0.9391, "nll_loss": 0.830940306186676, "rewards/accuracies": 0.625, "rewards/chosen": -0.07629896700382233, "rewards/margins": 0.09920951724052429, "rewards/rejected": -0.1755084991455078, "step": 1603 }, { "epoch": 0.9784962635351533, "grad_norm": 2.4180235862731934, "learning_rate": 6.457807715860379e-06, "log_odds_chosen": 0.12421134859323502, "log_odds_ratio": -0.7084619998931885, "logits/chosen": -0.8911659121513367, "logits/rejected": -0.9327605962753296, "logps/chosen": -1.0831537246704102, "logps/rejected": -1.1297965049743652, "loss": 0.963, "nll_loss": 1.2184022665023804, "rewards/accuracies": 0.625, "rewards/chosen": -0.1083153784275055, "rewards/margins": 0.004664282314479351, "rewards/rejected": -0.11297966539859772, "step": 1604 }, { "epoch": 0.9791062986121702, "grad_norm": 2.041914463043213, "learning_rate": 6.456827924066136e-06, "log_odds_chosen": 0.80780428647995, "log_odds_ratio": -0.5420573353767395, "logits/chosen": -0.7415957450866699, "logits/rejected": -0.7480100393295288, "logps/chosen": -1.1739211082458496, "logps/rejected": -1.74662184715271, "loss": 1.1664, "nll_loss": 1.1501047611236572, "rewards/accuracies": 0.5, "rewards/chosen": -0.11739212274551392, "rewards/margins": 0.05727006867527962, "rewards/rejected": -0.17466217279434204, "step": 1605 }, { "epoch": 0.9797163336891871, "grad_norm": 8.417123794555664, "learning_rate": 6.455848132271892e-06, "log_odds_chosen": 1.4233949184417725, "log_odds_ratio": -0.2752983272075653, "logits/chosen": -0.6510565280914307, "logits/rejected": -0.7288137674331665, "logps/chosen": -0.5230060815811157, "logps/rejected": -1.2824499607086182, "loss": 0.9484, "nll_loss": 0.8768186569213867, "rewards/accuracies": 0.875, "rewards/chosen": -0.05230060964822769, "rewards/margins": 0.07594438642263412, "rewards/rejected": -0.12824499607086182, "step": 1606 }, { "epoch": 0.980326368766204, "grad_norm": 2.604839563369751, "learning_rate": 6.4548683404776475e-06, "log_odds_chosen": 0.20374499261379242, "log_odds_ratio": -0.7236039042472839, "logits/chosen": -1.0638959407806396, "logits/rejected": -1.0442678928375244, "logps/chosen": -1.0436235666275024, "logps/rejected": -1.1633168458938599, "loss": 1.1505, "nll_loss": 1.13539457321167, "rewards/accuracies": 0.25, "rewards/chosen": -0.104362353682518, "rewards/margins": 0.011969340965151787, "rewards/rejected": -0.11633169651031494, "step": 1607 }, { "epoch": 0.980936403843221, "grad_norm": 4.227840423583984, "learning_rate": 6.4538885486834045e-06, "log_odds_chosen": 0.6463737487792969, "log_odds_ratio": -0.5278842449188232, "logits/chosen": -1.0313427448272705, "logits/rejected": -0.9439377784729004, "logps/chosen": -0.5842961072921753, "logps/rejected": -0.9647654294967651, "loss": 1.1476, "nll_loss": 1.1116275787353516, "rewards/accuracies": 0.75, "rewards/chosen": -0.05842961370944977, "rewards/margins": 0.038046933710575104, "rewards/rejected": -0.09647655487060547, "step": 1608 }, { "epoch": 0.981546438920238, "grad_norm": 1.8889598846435547, "learning_rate": 6.452908756889161e-06, "log_odds_chosen": 0.18118134140968323, "log_odds_ratio": -0.7485390901565552, "logits/chosen": -1.0819848775863647, "logits/rejected": -1.1050993204116821, "logps/chosen": -0.9647681713104248, "logps/rejected": -1.0809887647628784, "loss": 1.1793, "nll_loss": 1.2912697792053223, "rewards/accuracies": 0.25, "rewards/chosen": -0.09647681564092636, "rewards/margins": 0.011622059158980846, "rewards/rejected": -0.10809887945652008, "step": 1609 }, { "epoch": 0.9821564739972548, "grad_norm": 2.293621301651001, "learning_rate": 6.451928965094917e-06, "log_odds_chosen": 0.8834148645401001, "log_odds_ratio": -0.5415912866592407, "logits/chosen": -0.9363086223602295, "logits/rejected": -0.9153056144714355, "logps/chosen": -0.8947258591651917, "logps/rejected": -1.4744210243225098, "loss": 1.1917, "nll_loss": 0.9638441801071167, "rewards/accuracies": 0.75, "rewards/chosen": -0.08947259187698364, "rewards/margins": 0.05796951800584793, "rewards/rejected": -0.14744210243225098, "step": 1610 }, { "epoch": 0.9827665090742718, "grad_norm": 1.9016789197921753, "learning_rate": 6.450949173300674e-06, "log_odds_chosen": 0.10714755207300186, "log_odds_ratio": -0.7119608521461487, "logits/chosen": -0.674813985824585, "logits/rejected": -0.5866150856018066, "logps/chosen": -0.8562852740287781, "logps/rejected": -0.8602995276451111, "loss": 0.9597, "nll_loss": 0.8529596328735352, "rewards/accuracies": 0.625, "rewards/chosen": -0.08562852442264557, "rewards/margins": 0.00040141912177205086, "rewards/rejected": -0.08602994680404663, "step": 1611 }, { "epoch": 0.9833765441512887, "grad_norm": 1.5873535871505737, "learning_rate": 6.44996938150643e-06, "log_odds_chosen": 2.110668182373047, "log_odds_ratio": -0.20059311389923096, "logits/chosen": -0.7605870366096497, "logits/rejected": -0.8544315099716187, "logps/chosen": -0.6032233238220215, "logps/rejected": -2.01649808883667, "loss": 1.0322, "nll_loss": 0.8337453007698059, "rewards/accuracies": 1.0, "rewards/chosen": -0.06032233685255051, "rewards/margins": 0.14132747054100037, "rewards/rejected": -0.20164981484413147, "step": 1612 }, { "epoch": 0.9839865792283057, "grad_norm": 3.4920766353607178, "learning_rate": 6.448989589712185e-06, "log_odds_chosen": 0.9079462289810181, "log_odds_ratio": -0.47321224212646484, "logits/chosen": -1.092529535293579, "logits/rejected": -1.1582118272781372, "logps/chosen": -0.8861775398254395, "logps/rejected": -1.5343948602676392, "loss": 1.2888, "nll_loss": 1.2101571559906006, "rewards/accuracies": 0.875, "rewards/chosen": -0.08861775696277618, "rewards/margins": 0.06482171267271042, "rewards/rejected": -0.1534394770860672, "step": 1613 }, { "epoch": 0.9845966143053225, "grad_norm": 4.081056118011475, "learning_rate": 6.448009797917942e-06, "log_odds_chosen": 1.440887451171875, "log_odds_ratio": -0.5078411102294922, "logits/chosen": -0.676682710647583, "logits/rejected": -0.9587581157684326, "logps/chosen": -1.064021110534668, "logps/rejected": -2.2667646408081055, "loss": 0.9292, "nll_loss": 1.0393472909927368, "rewards/accuracies": 0.75, "rewards/chosen": -0.10640211403369904, "rewards/margins": 0.12027435749769211, "rewards/rejected": -0.22667646408081055, "step": 1614 }, { "epoch": 0.9852066493823395, "grad_norm": 2.350957155227661, "learning_rate": 6.447030006123698e-06, "log_odds_chosen": 0.3754788339138031, "log_odds_ratio": -0.6518809199333191, "logits/chosen": -0.99676513671875, "logits/rejected": -1.0593008995056152, "logps/chosen": -0.8766900897026062, "logps/rejected": -1.1679208278656006, "loss": 1.2052, "nll_loss": 1.0961300134658813, "rewards/accuracies": 0.625, "rewards/chosen": -0.0876690149307251, "rewards/margins": 0.029123082756996155, "rewards/rejected": -0.11679209768772125, "step": 1615 }, { "epoch": 0.9858166844593564, "grad_norm": 1.9175763130187988, "learning_rate": 6.446050214329455e-06, "log_odds_chosen": 0.3378962278366089, "log_odds_ratio": -0.6653576493263245, "logits/chosen": -0.989741861820221, "logits/rejected": -0.9138915538787842, "logps/chosen": -0.8826771378517151, "logps/rejected": -1.0726454257965088, "loss": 1.1509, "nll_loss": 0.9756893515586853, "rewards/accuracies": 0.375, "rewards/chosen": -0.08826771378517151, "rewards/margins": 0.018996834754943848, "rewards/rejected": -0.10726454854011536, "step": 1616 }, { "epoch": 0.9864267195363733, "grad_norm": 1.3223257064819336, "learning_rate": 6.445070422535211e-06, "log_odds_chosen": -0.25768083333969116, "log_odds_ratio": -0.9022197723388672, "logits/chosen": -1.2183284759521484, "logits/rejected": -1.1529271602630615, "logps/chosen": -1.2667052745819092, "logps/rejected": -1.0790499448776245, "loss": 1.2899, "nll_loss": 1.310368537902832, "rewards/accuracies": 0.375, "rewards/chosen": -0.12667052447795868, "rewards/margins": -0.01876552402973175, "rewards/rejected": -0.10790500044822693, "step": 1617 }, { "epoch": 0.9870367546133902, "grad_norm": 1.644882321357727, "learning_rate": 6.444090630740967e-06, "log_odds_chosen": 0.6277360916137695, "log_odds_ratio": -0.5701389312744141, "logits/chosen": -1.2178196907043457, "logits/rejected": -1.086665153503418, "logps/chosen": -0.9666088223457336, "logps/rejected": -1.4490931034088135, "loss": 1.1676, "nll_loss": 1.2273452281951904, "rewards/accuracies": 0.75, "rewards/chosen": -0.09666088223457336, "rewards/margins": 0.04824843257665634, "rewards/rejected": -0.1449093222618103, "step": 1618 }, { "epoch": 0.9876467896904072, "grad_norm": 1.7194970846176147, "learning_rate": 6.4431108389467236e-06, "log_odds_chosen": 1.0408350229263306, "log_odds_ratio": -0.6212822794914246, "logits/chosen": -0.9740228652954102, "logits/rejected": -1.0199304819107056, "logps/chosen": -0.9786560535430908, "logps/rejected": -1.7977769374847412, "loss": 1.1815, "nll_loss": 1.2323826551437378, "rewards/accuracies": 0.5, "rewards/chosen": -0.09786560386419296, "rewards/margins": 0.081912100315094, "rewards/rejected": -0.17977771162986755, "step": 1619 }, { "epoch": 0.9882568247674242, "grad_norm": 3.1329288482666016, "learning_rate": 6.44213104715248e-06, "log_odds_chosen": 1.0230958461761475, "log_odds_ratio": -0.6118720769882202, "logits/chosen": -0.7519209384918213, "logits/rejected": -0.7407989501953125, "logps/chosen": -0.8829506635665894, "logps/rejected": -1.4185221195220947, "loss": 1.0314, "nll_loss": 0.8936970829963684, "rewards/accuracies": 0.875, "rewards/chosen": -0.08829505741596222, "rewards/margins": 0.053557138890028, "rewards/rejected": -0.14185220003128052, "step": 1620 }, { "epoch": 0.988866859844441, "grad_norm": 1.740858554840088, "learning_rate": 6.441151255358236e-06, "log_odds_chosen": 0.51490718126297, "log_odds_ratio": -0.5092818140983582, "logits/chosen": -1.030043363571167, "logits/rejected": -0.9912199974060059, "logps/chosen": -0.9388914704322815, "logps/rejected": -1.2717655897140503, "loss": 1.253, "nll_loss": 1.157057762145996, "rewards/accuracies": 0.75, "rewards/chosen": -0.09388914704322815, "rewards/margins": 0.0332874171435833, "rewards/rejected": -0.12717656791210175, "step": 1621 }, { "epoch": 0.989476894921458, "grad_norm": 2.051528215408325, "learning_rate": 6.440171463563993e-06, "log_odds_chosen": 0.9723951816558838, "log_odds_ratio": -0.5833614468574524, "logits/chosen": -1.0357439517974854, "logits/rejected": -0.9090323448181152, "logps/chosen": -0.8566206693649292, "logps/rejected": -1.5541062355041504, "loss": 0.9468, "nll_loss": 1.0231451988220215, "rewards/accuracies": 0.625, "rewards/chosen": -0.08566206693649292, "rewards/margins": 0.06974856555461884, "rewards/rejected": -0.15541064739227295, "step": 1622 }, { "epoch": 0.9900869299984749, "grad_norm": 1.433550238609314, "learning_rate": 6.439191671769749e-06, "log_odds_chosen": 1.7081348896026611, "log_odds_ratio": -0.5527487993240356, "logits/chosen": -0.8694688081741333, "logits/rejected": -1.104386806488037, "logps/chosen": -0.5245345830917358, "logps/rejected": -1.8652621507644653, "loss": 0.9736, "nll_loss": 0.8436434864997864, "rewards/accuracies": 0.625, "rewards/chosen": -0.052453458309173584, "rewards/margins": 0.13407276570796967, "rewards/rejected": -0.18652622401714325, "step": 1623 }, { "epoch": 0.9906969650754919, "grad_norm": 4.7596893310546875, "learning_rate": 6.438211879975504e-06, "log_odds_chosen": 2.145230293273926, "log_odds_ratio": -0.3213021159172058, "logits/chosen": -0.9217208623886108, "logits/rejected": -0.9246659874916077, "logps/chosen": -0.7561315298080444, "logps/rejected": -2.3958277702331543, "loss": 0.9882, "nll_loss": 1.0464025735855103, "rewards/accuracies": 0.75, "rewards/chosen": -0.07561315596103668, "rewards/margins": 0.16396963596343994, "rewards/rejected": -0.23958280682563782, "step": 1624 }, { "epoch": 0.9913070001525087, "grad_norm": 2.474722385406494, "learning_rate": 6.437232088181261e-06, "log_odds_chosen": 0.016973093152046204, "log_odds_ratio": -0.7551695108413696, "logits/chosen": -0.9289376735687256, "logits/rejected": -0.870965838432312, "logps/chosen": -1.105986475944519, "logps/rejected": -1.158080816268921, "loss": 1.3478, "nll_loss": 1.2930271625518799, "rewards/accuracies": 0.625, "rewards/chosen": -0.11059864610433578, "rewards/margins": 0.005209426395595074, "rewards/rejected": -0.11580807715654373, "step": 1625 }, { "epoch": 0.9919170352295257, "grad_norm": 1.3142402172088623, "learning_rate": 6.436252296387017e-06, "log_odds_chosen": 0.5126217007637024, "log_odds_ratio": -0.6324549913406372, "logits/chosen": -0.898608922958374, "logits/rejected": -0.964912474155426, "logps/chosen": -0.8213787078857422, "logps/rejected": -1.179796814918518, "loss": 1.0449, "nll_loss": 0.9910069108009338, "rewards/accuracies": 0.625, "rewards/chosen": -0.08213786780834198, "rewards/margins": 0.035841815173625946, "rewards/rejected": -0.11797969043254852, "step": 1626 }, { "epoch": 0.9925270703065426, "grad_norm": 2.594414710998535, "learning_rate": 6.435272504592774e-06, "log_odds_chosen": 0.5252457857131958, "log_odds_ratio": -0.5816487073898315, "logits/chosen": -0.949162483215332, "logits/rejected": -0.9820404052734375, "logps/chosen": -0.7800079584121704, "logps/rejected": -1.0042200088500977, "loss": 0.9597, "nll_loss": 0.9624773859977722, "rewards/accuracies": 0.5, "rewards/chosen": -0.07800079882144928, "rewards/margins": 0.022421203553676605, "rewards/rejected": -0.10042200237512589, "step": 1627 }, { "epoch": 0.9931371053835596, "grad_norm": 5.051064491271973, "learning_rate": 6.43429271279853e-06, "log_odds_chosen": 1.3916761875152588, "log_odds_ratio": -0.3032815158367157, "logits/chosen": -1.0367437601089478, "logits/rejected": -1.0860788822174072, "logps/chosen": -0.6598248481750488, "logps/rejected": -1.5344645977020264, "loss": 1.0237, "nll_loss": 0.9620481729507446, "rewards/accuracies": 1.0, "rewards/chosen": -0.06598248332738876, "rewards/margins": 0.08746398240327835, "rewards/rejected": -0.1534464806318283, "step": 1628 }, { "epoch": 0.9937471404605764, "grad_norm": 2.0617711544036865, "learning_rate": 6.4333129210042865e-06, "log_odds_chosen": -0.4893423616886139, "log_odds_ratio": -0.9915558099746704, "logits/chosen": -1.1366231441497803, "logits/rejected": -1.0746632814407349, "logps/chosen": -1.1865794658660889, "logps/rejected": -0.8803901076316833, "loss": 1.2457, "nll_loss": 1.3560752868652344, "rewards/accuracies": 0.25, "rewards/chosen": -0.11865794658660889, "rewards/margins": -0.030618935823440552, "rewards/rejected": -0.08803901821374893, "step": 1629 }, { "epoch": 0.9943571755375934, "grad_norm": 2.5851383209228516, "learning_rate": 6.432333129210043e-06, "log_odds_chosen": 1.0668503046035767, "log_odds_ratio": -0.4128376245498657, "logits/chosen": -0.6932411193847656, "logits/rejected": -0.6824802756309509, "logps/chosen": -0.7322548627853394, "logps/rejected": -1.3375117778778076, "loss": 0.9247, "nll_loss": 0.7300251126289368, "rewards/accuracies": 0.75, "rewards/chosen": -0.0732254907488823, "rewards/margins": 0.06052567809820175, "rewards/rejected": -0.13375118374824524, "step": 1630 }, { "epoch": 0.9949672106146104, "grad_norm": 2.2820746898651123, "learning_rate": 6.431353337415799e-06, "log_odds_chosen": 0.4123229384422302, "log_odds_ratio": -0.8328788876533508, "logits/chosen": -0.9413003921508789, "logits/rejected": -0.928784966468811, "logps/chosen": -1.0323550701141357, "logps/rejected": -1.4056119918823242, "loss": 1.2163, "nll_loss": 1.396681308746338, "rewards/accuracies": 0.375, "rewards/chosen": -0.10323551297187805, "rewards/margins": 0.037325672805309296, "rewards/rejected": -0.14056117832660675, "step": 1631 }, { "epoch": 0.9955772456916273, "grad_norm": 1.772194266319275, "learning_rate": 6.430373545621555e-06, "log_odds_chosen": 0.47657352685928345, "log_odds_ratio": -0.5828834772109985, "logits/chosen": -0.8882174491882324, "logits/rejected": -1.0236496925354004, "logps/chosen": -0.8947367668151855, "logps/rejected": -1.191900610923767, "loss": 1.055, "nll_loss": 0.9376131296157837, "rewards/accuracies": 0.625, "rewards/chosen": -0.0894736796617508, "rewards/margins": 0.029716385528445244, "rewards/rejected": -0.11919006705284119, "step": 1632 }, { "epoch": 0.9961872807686442, "grad_norm": 0.9427839517593384, "learning_rate": 6.429393753827312e-06, "log_odds_chosen": 0.0980580747127533, "log_odds_ratio": -0.7519798278808594, "logits/chosen": -0.9518871307373047, "logits/rejected": -0.9361542463302612, "logps/chosen": -0.885353147983551, "logps/rejected": -0.9582772850990295, "loss": 1.2631, "nll_loss": 1.2035720348358154, "rewards/accuracies": 0.375, "rewards/chosen": -0.08853531628847122, "rewards/margins": 0.007292415015399456, "rewards/rejected": -0.09582772850990295, "step": 1633 }, { "epoch": 0.9967973158456611, "grad_norm": 2.7337870597839355, "learning_rate": 6.428413962033068e-06, "log_odds_chosen": 1.2749141454696655, "log_odds_ratio": -0.42648690938949585, "logits/chosen": -0.7961999177932739, "logits/rejected": -0.94342041015625, "logps/chosen": -0.7892799973487854, "logps/rejected": -1.7455774545669556, "loss": 1.0397, "nll_loss": 1.020822286605835, "rewards/accuracies": 1.0, "rewards/chosen": -0.07892800867557526, "rewards/margins": 0.0956297367811203, "rewards/rejected": -0.17455774545669556, "step": 1634 }, { "epoch": 0.9974073509226781, "grad_norm": 6.570844650268555, "learning_rate": 6.427434170238824e-06, "log_odds_chosen": 1.0728448629379272, "log_odds_ratio": -0.36864858865737915, "logits/chosen": -0.8321369886398315, "logits/rejected": -0.9078727960586548, "logps/chosen": -0.801960825920105, "logps/rejected": -1.469234585762024, "loss": 1.1707, "nll_loss": 1.0465216636657715, "rewards/accuracies": 0.875, "rewards/chosen": -0.0801960825920105, "rewards/margins": 0.06672737747430801, "rewards/rejected": -0.14692345261573792, "step": 1635 }, { "epoch": 0.998017385999695, "grad_norm": 1.630133032798767, "learning_rate": 6.42645437844458e-06, "log_odds_chosen": 0.5703784227371216, "log_odds_ratio": -0.5456447005271912, "logits/chosen": -0.8405221104621887, "logits/rejected": -0.8284821510314941, "logps/chosen": -0.8089795112609863, "logps/rejected": -1.1730345487594604, "loss": 1.1265, "nll_loss": 1.0834161043167114, "rewards/accuracies": 0.75, "rewards/chosen": -0.08089794218540192, "rewards/margins": 0.03640551120042801, "rewards/rejected": -0.11730345338582993, "step": 1636 }, { "epoch": 0.9986274210767119, "grad_norm": 7.01126766204834, "learning_rate": 6.425474586650336e-06, "log_odds_chosen": 1.059993028640747, "log_odds_ratio": -0.5576409101486206, "logits/chosen": -0.9542994499206543, "logits/rejected": -0.8588439226150513, "logps/chosen": -0.9588567018508911, "logps/rejected": -1.5794644355773926, "loss": 1.1225, "nll_loss": 1.0056465864181519, "rewards/accuracies": 0.625, "rewards/chosen": -0.09588567167520523, "rewards/margins": 0.06206078082323074, "rewards/rejected": -0.15794645249843597, "step": 1637 }, { "epoch": 0.9992374561537288, "grad_norm": 1.5680334568023682, "learning_rate": 6.424494794856092e-06, "log_odds_chosen": 0.708348274230957, "log_odds_ratio": -0.6606940627098083, "logits/chosen": -0.8388473987579346, "logits/rejected": -0.8911296129226685, "logps/chosen": -0.9662216901779175, "logps/rejected": -1.3844859600067139, "loss": 1.126, "nll_loss": 1.1512528657913208, "rewards/accuracies": 0.5, "rewards/chosen": -0.09662218391895294, "rewards/margins": 0.04182642325758934, "rewards/rejected": -0.1384485960006714, "step": 1638 }, { "epoch": 0.9998474912307458, "grad_norm": 1.4403759241104126, "learning_rate": 6.423515003061849e-06, "log_odds_chosen": 0.8790372014045715, "log_odds_ratio": -0.4670657515525818, "logits/chosen": -1.0076642036437988, "logits/rejected": -0.8925111293792725, "logps/chosen": -0.9475148916244507, "logps/rejected": -1.5144188404083252, "loss": 1.2408, "nll_loss": 1.1163034439086914, "rewards/accuracies": 0.875, "rewards/chosen": -0.09475149214267731, "rewards/margins": 0.056690413504838943, "rewards/rejected": -0.15144190192222595, "step": 1639 }, { "epoch": 0.9998474912307458, "eval_log_odds_chosen": 1.0713987350463867, "eval_log_odds_ratio": -0.49873560667037964, "eval_logits/chosen": -0.9312915205955505, "eval_logits/rejected": -0.9217242002487183, "eval_logps/chosen": -0.8497822880744934, "eval_logps/rejected": -1.5919662714004517, "eval_loss": 1.1077635288238525, "eval_nll_loss": 1.101354956626892, "eval_rewards/accuracies": 0.7045454382896423, "eval_rewards/chosen": -0.08497823774814606, "eval_rewards/margins": 0.07421839237213135, "eval_rewards/rejected": -0.1591966152191162, "eval_runtime": 395.6449, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.167, "step": 1639 }, { "epoch": 1.0004575263077626, "grad_norm": 1.8803726434707642, "learning_rate": 6.4225352112676055e-06, "log_odds_chosen": 0.7572243213653564, "log_odds_ratio": -0.5517594218254089, "logits/chosen": -0.925684928894043, "logits/rejected": -1.0092884302139282, "logps/chosen": -1.033058762550354, "logps/rejected": -1.5098150968551636, "loss": 1.1269, "nll_loss": 1.3111776113510132, "rewards/accuracies": 0.625, "rewards/chosen": -0.1033058688044548, "rewards/margins": 0.047675635665655136, "rewards/rejected": -0.15098151564598083, "step": 1640 }, { "epoch": 1.0010675613847797, "grad_norm": 2.011247158050537, "learning_rate": 6.421555419473362e-06, "log_odds_chosen": 0.5406813621520996, "log_odds_ratio": -0.6492504477500916, "logits/chosen": -1.0092887878417969, "logits/rejected": -1.1070469617843628, "logps/chosen": -0.9702550768852234, "logps/rejected": -1.3390843868255615, "loss": 1.1555, "nll_loss": 1.0130224227905273, "rewards/accuracies": 0.625, "rewards/chosen": -0.09702551364898682, "rewards/margins": 0.0368829220533371, "rewards/rejected": -0.1339084506034851, "step": 1641 }, { "epoch": 1.0016775964617965, "grad_norm": 1.5142245292663574, "learning_rate": 6.420575627679118e-06, "log_odds_chosen": 0.9777643084526062, "log_odds_ratio": -0.5004425048828125, "logits/chosen": -0.952613353729248, "logits/rejected": -0.9985236525535583, "logps/chosen": -0.7368085980415344, "logps/rejected": -1.382053256034851, "loss": 1.1867, "nll_loss": 0.9665805101394653, "rewards/accuracies": 0.625, "rewards/chosen": -0.07368086278438568, "rewards/margins": 0.06452446430921555, "rewards/rejected": -0.13820531964302063, "step": 1642 }, { "epoch": 1.0022876315388134, "grad_norm": 1.9209259748458862, "learning_rate": 6.419595835884874e-06, "log_odds_chosen": 1.4617798328399658, "log_odds_ratio": -0.42782121896743774, "logits/chosen": -1.026137351989746, "logits/rejected": -1.0251970291137695, "logps/chosen": -0.7535276412963867, "logps/rejected": -1.8334883451461792, "loss": 0.9511, "nll_loss": 1.0189849138259888, "rewards/accuracies": 0.875, "rewards/chosen": -0.07535276561975479, "rewards/margins": 0.10799606889486313, "rewards/rejected": -0.18334881961345673, "step": 1643 }, { "epoch": 1.0028976666158305, "grad_norm": 1.7298851013183594, "learning_rate": 6.418616044090631e-06, "log_odds_chosen": 0.6238480806350708, "log_odds_ratio": -0.5236512422561646, "logits/chosen": -1.0215861797332764, "logits/rejected": -0.9743545055389404, "logps/chosen": -0.9354526996612549, "logps/rejected": -1.3574161529541016, "loss": 1.2243, "nll_loss": 1.3822550773620605, "rewards/accuracies": 0.75, "rewards/chosen": -0.09354527294635773, "rewards/margins": 0.04219633340835571, "rewards/rejected": -0.13574160635471344, "step": 1644 }, { "epoch": 1.0035077016928473, "grad_norm": 4.306888580322266, "learning_rate": 6.417636252296387e-06, "log_odds_chosen": 1.7597154378890991, "log_odds_ratio": -0.3975919187068939, "logits/chosen": -0.9182068109512329, "logits/rejected": -0.9868367910385132, "logps/chosen": -0.5764548778533936, "logps/rejected": -1.803532600402832, "loss": 1.2646, "nll_loss": 1.012438178062439, "rewards/accuracies": 0.75, "rewards/chosen": -0.057645488530397415, "rewards/margins": 0.1227077767252922, "rewards/rejected": -0.1803532838821411, "step": 1645 }, { "epoch": 1.0041177367698644, "grad_norm": 4.361793041229248, "learning_rate": 6.416656460502143e-06, "log_odds_chosen": 0.7766858339309692, "log_odds_ratio": -0.6557899713516235, "logits/chosen": -0.9446489214897156, "logits/rejected": -0.9254082441329956, "logps/chosen": -0.9292771816253662, "logps/rejected": -1.4882681369781494, "loss": 1.1193, "nll_loss": 1.2331147193908691, "rewards/accuracies": 0.75, "rewards/chosen": -0.0929277092218399, "rewards/margins": 0.055899109691381454, "rewards/rejected": -0.14882682263851166, "step": 1646 }, { "epoch": 1.0047277718468812, "grad_norm": 2.322730779647827, "learning_rate": 6.415676668707899e-06, "log_odds_chosen": 0.6906884908676147, "log_odds_ratio": -0.5189952254295349, "logits/chosen": -0.9500510692596436, "logits/rejected": -1.0178948640823364, "logps/chosen": -0.903104305267334, "logps/rejected": -1.3543130159378052, "loss": 1.2737, "nll_loss": 1.2737184762954712, "rewards/accuracies": 0.75, "rewards/chosen": -0.09031043201684952, "rewards/margins": 0.045120880007743835, "rewards/rejected": -0.13543130457401276, "step": 1647 }, { "epoch": 1.005337806923898, "grad_norm": 1.9592331647872925, "learning_rate": 6.414696876913655e-06, "log_odds_chosen": 0.4642386734485626, "log_odds_ratio": -0.6923292875289917, "logits/chosen": -0.8373271822929382, "logits/rejected": -0.8008240461349487, "logps/chosen": -0.9612773060798645, "logps/rejected": -1.3346648216247559, "loss": 1.0286, "nll_loss": 0.942825973033905, "rewards/accuracies": 0.625, "rewards/chosen": -0.09612773358821869, "rewards/margins": 0.0373387485742569, "rewards/rejected": -0.13346648216247559, "step": 1648 }, { "epoch": 1.0059478420009151, "grad_norm": 1.4056941270828247, "learning_rate": 6.4137170851194115e-06, "log_odds_chosen": 1.1558470726013184, "log_odds_ratio": -0.515107274055481, "logits/chosen": -1.2152049541473389, "logits/rejected": -1.1651407480239868, "logps/chosen": -0.8073008060455322, "logps/rejected": -1.3761271238327026, "loss": 1.0197, "nll_loss": 1.2379013299942017, "rewards/accuracies": 0.5, "rewards/chosen": -0.08073007315397263, "rewards/margins": 0.05688263475894928, "rewards/rejected": -0.1376127153635025, "step": 1649 }, { "epoch": 1.006557877077932, "grad_norm": 3.0085232257843018, "learning_rate": 6.4127372933251684e-06, "log_odds_chosen": 0.5572444200515747, "log_odds_ratio": -0.7326973676681519, "logits/chosen": -1.1569781303405762, "logits/rejected": -1.2463010549545288, "logps/chosen": -1.0390820503234863, "logps/rejected": -1.3731818199157715, "loss": 1.2447, "nll_loss": 1.1747376918792725, "rewards/accuracies": 0.5, "rewards/chosen": -0.10390821099281311, "rewards/margins": 0.03340998291969299, "rewards/rejected": -0.1373181939125061, "step": 1650 }, { "epoch": 1.0071679121549488, "grad_norm": 1.1713979244232178, "learning_rate": 6.4117575015309245e-06, "log_odds_chosen": -0.039220936596393585, "log_odds_ratio": -0.7603402137756348, "logits/chosen": -1.120255470275879, "logits/rejected": -1.046708106994629, "logps/chosen": -1.0005815029144287, "logps/rejected": -0.9690566062927246, "loss": 1.1386, "nll_loss": 1.2879607677459717, "rewards/accuracies": 0.375, "rewards/chosen": -0.10005815327167511, "rewards/margins": -0.003152491059154272, "rewards/rejected": -0.0969056636095047, "step": 1651 }, { "epoch": 1.007777947231966, "grad_norm": 1.6371978521347046, "learning_rate": 6.410777709736681e-06, "log_odds_chosen": 1.2376797199249268, "log_odds_ratio": -0.4362511932849884, "logits/chosen": -1.0409916639328003, "logits/rejected": -1.0175156593322754, "logps/chosen": -0.8301412463188171, "logps/rejected": -1.7066670656204224, "loss": 1.1205, "nll_loss": 1.083024501800537, "rewards/accuracies": 0.875, "rewards/chosen": -0.08301413059234619, "rewards/margins": 0.08765257894992828, "rewards/rejected": -0.17066670954227448, "step": 1652 }, { "epoch": 1.0083879823089827, "grad_norm": 2.3356127738952637, "learning_rate": 6.409797917942437e-06, "log_odds_chosen": 2.395719289779663, "log_odds_ratio": -0.46156495809555054, "logits/chosen": -0.9396721124649048, "logits/rejected": -0.878736674785614, "logps/chosen": -0.7697539925575256, "logps/rejected": -2.650783061981201, "loss": 1.0406, "nll_loss": 1.0117237567901611, "rewards/accuracies": 0.625, "rewards/chosen": -0.07697540521621704, "rewards/margins": 0.18810290098190308, "rewards/rejected": -0.2650783061981201, "step": 1653 }, { "epoch": 1.0089980173859996, "grad_norm": 1.9314006567001343, "learning_rate": 6.408818126148193e-06, "log_odds_chosen": 0.929100513458252, "log_odds_ratio": -0.555121898651123, "logits/chosen": -1.037650465965271, "logits/rejected": -0.9827899932861328, "logps/chosen": -0.7752183675765991, "logps/rejected": -1.2181602716445923, "loss": 1.114, "nll_loss": 1.1884336471557617, "rewards/accuracies": 0.75, "rewards/chosen": -0.07752183824777603, "rewards/margins": 0.04429418221116066, "rewards/rejected": -0.12181602418422699, "step": 1654 }, { "epoch": 1.0096080524630167, "grad_norm": 1.5193567276000977, "learning_rate": 6.40783833435395e-06, "log_odds_chosen": 1.244825839996338, "log_odds_ratio": -0.46553608775138855, "logits/chosen": -1.11446213722229, "logits/rejected": -1.0678868293762207, "logps/chosen": -0.9313148260116577, "logps/rejected": -1.8741443157196045, "loss": 1.1297, "nll_loss": 1.0025402307510376, "rewards/accuracies": 0.625, "rewards/chosen": -0.09313148260116577, "rewards/margins": 0.09428294748067856, "rewards/rejected": -0.18741443753242493, "step": 1655 }, { "epoch": 1.0102180875400335, "grad_norm": 1.5305211544036865, "learning_rate": 6.406858542559706e-06, "log_odds_chosen": 1.0731010437011719, "log_odds_ratio": -0.495543897151947, "logits/chosen": -1.0216984748840332, "logits/rejected": -1.0179438591003418, "logps/chosen": -0.8400090336799622, "logps/rejected": -1.6170042753219604, "loss": 1.1894, "nll_loss": 1.0482324361801147, "rewards/accuracies": 0.75, "rewards/chosen": -0.08400090783834457, "rewards/margins": 0.07769952714443207, "rewards/rejected": -0.16170042753219604, "step": 1656 }, { "epoch": 1.0108281226170506, "grad_norm": 2.8494088649749756, "learning_rate": 6.405878750765462e-06, "log_odds_chosen": 1.4385225772857666, "log_odds_ratio": -0.5004547834396362, "logits/chosen": -1.050866723060608, "logits/rejected": -1.0910993814468384, "logps/chosen": -0.5672321915626526, "logps/rejected": -1.325664758682251, "loss": 1.1002, "nll_loss": 1.1147536039352417, "rewards/accuracies": 0.625, "rewards/chosen": -0.0567232221364975, "rewards/margins": 0.07584325224161148, "rewards/rejected": -0.13256646692752838, "step": 1657 }, { "epoch": 1.0114381576940674, "grad_norm": 1.5931752920150757, "learning_rate": 6.404898958971218e-06, "log_odds_chosen": 0.9601722359657288, "log_odds_ratio": -0.5351735353469849, "logits/chosen": -1.240482211112976, "logits/rejected": -1.2510724067687988, "logps/chosen": -0.9885051846504211, "logps/rejected": -1.7442436218261719, "loss": 1.123, "nll_loss": 1.3153126239776611, "rewards/accuracies": 0.625, "rewards/chosen": -0.09885052591562271, "rewards/margins": 0.07557384669780731, "rewards/rejected": -0.17442438006401062, "step": 1658 }, { "epoch": 1.0120481927710843, "grad_norm": 4.683197498321533, "learning_rate": 6.403919167176974e-06, "log_odds_chosen": 0.7766141295433044, "log_odds_ratio": -0.6066244840621948, "logits/chosen": -1.042470932006836, "logits/rejected": -1.1217079162597656, "logps/chosen": -0.8838378190994263, "logps/rejected": -1.3759444952011108, "loss": 1.0034, "nll_loss": 1.0224409103393555, "rewards/accuracies": 0.5, "rewards/chosen": -0.08838377892971039, "rewards/margins": 0.049210671335458755, "rewards/rejected": -0.13759446144104004, "step": 1659 }, { "epoch": 1.0126582278481013, "grad_norm": 1.7201329469680786, "learning_rate": 6.4029393753827305e-06, "log_odds_chosen": 0.06408586353063583, "log_odds_ratio": -0.6740603446960449, "logits/chosen": -1.168168067932129, "logits/rejected": -1.0608644485473633, "logps/chosen": -1.0061813592910767, "logps/rejected": -1.0510482788085938, "loss": 1.2312, "nll_loss": 1.3041753768920898, "rewards/accuracies": 0.625, "rewards/chosen": -0.1006181463599205, "rewards/margins": 0.004486694000661373, "rewards/rejected": -0.10510483384132385, "step": 1660 }, { "epoch": 1.0132682629251182, "grad_norm": 1.6262640953063965, "learning_rate": 6.4019595835884875e-06, "log_odds_chosen": 1.8443248271942139, "log_odds_ratio": -0.4483046233654022, "logits/chosen": -0.9511334300041199, "logits/rejected": -0.9556715488433838, "logps/chosen": -0.7926774621009827, "logps/rejected": -2.0525364875793457, "loss": 1.1519, "nll_loss": 1.3046441078186035, "rewards/accuracies": 0.75, "rewards/chosen": -0.07926774024963379, "rewards/margins": 0.12598593533039093, "rewards/rejected": -0.20525367558002472, "step": 1661 }, { "epoch": 1.013878298002135, "grad_norm": 1.6899250745773315, "learning_rate": 6.400979791794244e-06, "log_odds_chosen": 0.8312592506408691, "log_odds_ratio": -0.4809723496437073, "logits/chosen": -1.0323492288589478, "logits/rejected": -1.0176241397857666, "logps/chosen": -0.7714283466339111, "logps/rejected": -1.344472885131836, "loss": 0.9806, "nll_loss": 0.9189913868904114, "rewards/accuracies": 0.875, "rewards/chosen": -0.07714283466339111, "rewards/margins": 0.05730444937944412, "rewards/rejected": -0.13444727659225464, "step": 1662 }, { "epoch": 1.014488333079152, "grad_norm": 1.4610148668289185, "learning_rate": 6.4e-06, "log_odds_chosen": 0.892366886138916, "log_odds_ratio": -0.40624314546585083, "logits/chosen": -0.7958084344863892, "logits/rejected": -0.6425321698188782, "logps/chosen": -0.5355542898178101, "logps/rejected": -0.9663764238357544, "loss": 1.0162, "nll_loss": 0.84840327501297, "rewards/accuracies": 0.875, "rewards/chosen": -0.053555428981781006, "rewards/margins": 0.04308221861720085, "rewards/rejected": -0.09663765132427216, "step": 1663 }, { "epoch": 1.015098368156169, "grad_norm": 2.9896955490112305, "learning_rate": 6.399020208205756e-06, "log_odds_chosen": -0.2229830026626587, "log_odds_ratio": -0.8837847709655762, "logits/chosen": -1.0417394638061523, "logits/rejected": -0.9933497905731201, "logps/chosen": -1.0028326511383057, "logps/rejected": -0.9207131862640381, "loss": 1.1421, "nll_loss": 1.2522797584533691, "rewards/accuracies": 0.125, "rewards/chosen": -0.10028326511383057, "rewards/margins": -0.008211947977542877, "rewards/rejected": -0.09207132458686829, "step": 1664 }, { "epoch": 1.015708403233186, "grad_norm": 1.0407963991165161, "learning_rate": 6.398040416411512e-06, "log_odds_chosen": -0.04046058654785156, "log_odds_ratio": -0.7294895648956299, "logits/chosen": -0.9575178623199463, "logits/rejected": -1.0008647441864014, "logps/chosen": -1.0134918689727783, "logps/rejected": -1.0104995965957642, "loss": 1.2108, "nll_loss": 1.2444868087768555, "rewards/accuracies": 0.375, "rewards/chosen": -0.10134918242692947, "rewards/margins": -0.0002992250956594944, "rewards/rejected": -0.10104995220899582, "step": 1665 }, { "epoch": 1.0163184383102029, "grad_norm": 1.5644713640213013, "learning_rate": 6.397060624617268e-06, "log_odds_chosen": 2.3105180263519287, "log_odds_ratio": -0.18890412151813507, "logits/chosen": -0.8042429685592651, "logits/rejected": -0.8345189690589905, "logps/chosen": -0.5941360592842102, "logps/rejected": -2.0867700576782227, "loss": 0.9309, "nll_loss": 0.959976851940155, "rewards/accuracies": 1.0, "rewards/chosen": -0.0594136118888855, "rewards/margins": 0.149263396859169, "rewards/rejected": -0.2086769938468933, "step": 1666 }, { "epoch": 1.0169284733872197, "grad_norm": 1.4024012088775635, "learning_rate": 6.396080832823025e-06, "log_odds_chosen": 0.6468677520751953, "log_odds_ratio": -0.571658730506897, "logits/chosen": -0.8624612092971802, "logits/rejected": -0.9109867215156555, "logps/chosen": -0.8403258323669434, "logps/rejected": -1.2006434202194214, "loss": 1.2491, "nll_loss": 1.2062851190567017, "rewards/accuracies": 0.625, "rewards/chosen": -0.0840325802564621, "rewards/margins": 0.03603176027536392, "rewards/rejected": -0.12006434053182602, "step": 1667 }, { "epoch": 1.0175385084642368, "grad_norm": 0.9676788449287415, "learning_rate": 6.395101041028781e-06, "log_odds_chosen": 0.10105115175247192, "log_odds_ratio": -0.7305707335472107, "logits/chosen": -1.2407461404800415, "logits/rejected": -1.1181409358978271, "logps/chosen": -1.2064592838287354, "logps/rejected": -1.27272367477417, "loss": 1.0258, "nll_loss": 1.2865792512893677, "rewards/accuracies": 0.25, "rewards/chosen": -0.1206459254026413, "rewards/margins": 0.00662644486874342, "rewards/rejected": -0.127272367477417, "step": 1668 }, { "epoch": 1.0181485435412536, "grad_norm": 1.2417497634887695, "learning_rate": 6.394121249234538e-06, "log_odds_chosen": 0.6299837231636047, "log_odds_ratio": -0.6068235039710999, "logits/chosen": -1.0349384546279907, "logits/rejected": -1.0522865056991577, "logps/chosen": -0.9880645871162415, "logps/rejected": -1.4673779010772705, "loss": 1.0035, "nll_loss": 1.086257815361023, "rewards/accuracies": 0.5, "rewards/chosen": -0.0988064557313919, "rewards/margins": 0.04793133586645126, "rewards/rejected": -0.14673779904842377, "step": 1669 }, { "epoch": 1.0187585786182705, "grad_norm": 1.8199965953826904, "learning_rate": 6.393141457440293e-06, "log_odds_chosen": 0.3974761366844177, "log_odds_ratio": -0.703126072883606, "logits/chosen": -0.7171837091445923, "logits/rejected": -0.7920709848403931, "logps/chosen": -0.8798556327819824, "logps/rejected": -1.055614709854126, "loss": 1.256, "nll_loss": 1.0507700443267822, "rewards/accuracies": 0.5, "rewards/chosen": -0.087985560297966, "rewards/margins": 0.01757589913904667, "rewards/rejected": -0.10556146502494812, "step": 1670 }, { "epoch": 1.0193686136952875, "grad_norm": 1.5253254175186157, "learning_rate": 6.3921616656460495e-06, "log_odds_chosen": 1.7347831726074219, "log_odds_ratio": -0.4028066396713257, "logits/chosen": -0.73872971534729, "logits/rejected": -0.7981241345405579, "logps/chosen": -0.8934840559959412, "logps/rejected": -2.2161083221435547, "loss": 1.2097, "nll_loss": 1.077041506767273, "rewards/accuracies": 0.625, "rewards/chosen": -0.08934841305017471, "rewards/margins": 0.1322624385356903, "rewards/rejected": -0.22161084413528442, "step": 1671 }, { "epoch": 1.0199786487723044, "grad_norm": 1.6154744625091553, "learning_rate": 6.3911818738518065e-06, "log_odds_chosen": 1.2944315671920776, "log_odds_ratio": -0.5067098140716553, "logits/chosen": -0.8699775338172913, "logits/rejected": -0.8245745301246643, "logps/chosen": -0.7970283031463623, "logps/rejected": -1.766497254371643, "loss": 1.0867, "nll_loss": 0.9222815036773682, "rewards/accuracies": 0.75, "rewards/chosen": -0.07970282435417175, "rewards/margins": 0.09694689512252808, "rewards/rejected": -0.17664973437786102, "step": 1672 }, { "epoch": 1.0205886838493214, "grad_norm": 1.5122876167297363, "learning_rate": 6.390202082057563e-06, "log_odds_chosen": 1.8448365926742554, "log_odds_ratio": -0.38601240515708923, "logits/chosen": -0.9375758767127991, "logits/rejected": -0.8952871561050415, "logps/chosen": -0.664992094039917, "logps/rejected": -1.9916188716888428, "loss": 1.0178, "nll_loss": 0.9117758274078369, "rewards/accuracies": 0.75, "rewards/chosen": -0.06649921089410782, "rewards/margins": 0.13266268372535706, "rewards/rejected": -0.19916188716888428, "step": 1673 }, { "epoch": 1.0211987189263383, "grad_norm": 0.961279571056366, "learning_rate": 6.389222290263319e-06, "log_odds_chosen": 1.1549245119094849, "log_odds_ratio": -0.3922046422958374, "logits/chosen": -0.9870456457138062, "logits/rejected": -1.0146151781082153, "logps/chosen": -0.8777214288711548, "logps/rejected": -1.689122200012207, "loss": 1.1078, "nll_loss": 1.187206745147705, "rewards/accuracies": 0.875, "rewards/chosen": -0.08777214586734772, "rewards/margins": 0.08114007860422134, "rewards/rejected": -0.16891220211982727, "step": 1674 }, { "epoch": 1.0218087540033551, "grad_norm": 1.5951080322265625, "learning_rate": 6.388242498469075e-06, "log_odds_chosen": 0.3613610565662384, "log_odds_ratio": -0.6676468849182129, "logits/chosen": -1.0307894945144653, "logits/rejected": -1.0845556259155273, "logps/chosen": -0.8809680938720703, "logps/rejected": -1.085556983947754, "loss": 1.1506, "nll_loss": 0.9465773105621338, "rewards/accuracies": 0.5, "rewards/chosen": -0.08809681236743927, "rewards/margins": 0.020458891987800598, "rewards/rejected": -0.10855569690465927, "step": 1675 }, { "epoch": 1.0224187890803722, "grad_norm": 2.181180000305176, "learning_rate": 6.387262706674831e-06, "log_odds_chosen": 1.1967215538024902, "log_odds_ratio": -0.36190611124038696, "logits/chosen": -0.972741961479187, "logits/rejected": -1.0176291465759277, "logps/chosen": -0.7370730042457581, "logps/rejected": -1.5297071933746338, "loss": 1.0265, "nll_loss": 0.9068787097930908, "rewards/accuracies": 1.0, "rewards/chosen": -0.07370729744434357, "rewards/margins": 0.07926341891288757, "rewards/rejected": -0.15297073125839233, "step": 1676 }, { "epoch": 1.023028824157389, "grad_norm": 1.2495542764663696, "learning_rate": 6.386282914880587e-06, "log_odds_chosen": 0.9396030306816101, "log_odds_ratio": -0.4689776301383972, "logits/chosen": -0.9519705176353455, "logits/rejected": -1.003440260887146, "logps/chosen": -0.8896403908729553, "logps/rejected": -1.5573230981826782, "loss": 1.1123, "nll_loss": 1.0462406873703003, "rewards/accuracies": 0.625, "rewards/chosen": -0.08896403759717941, "rewards/margins": 0.06676827371120453, "rewards/rejected": -0.15573230385780334, "step": 1677 }, { "epoch": 1.023638859234406, "grad_norm": 1.922995924949646, "learning_rate": 6.385303123086344e-06, "log_odds_chosen": 1.239729642868042, "log_odds_ratio": -0.49665239453315735, "logits/chosen": -1.0580576658248901, "logits/rejected": -1.11026930809021, "logps/chosen": -0.7658758759498596, "logps/rejected": -1.5700231790542603, "loss": 1.1242, "nll_loss": 1.1221593618392944, "rewards/accuracies": 0.75, "rewards/chosen": -0.07658758759498596, "rewards/margins": 0.08041473478078842, "rewards/rejected": -0.1570023149251938, "step": 1678 }, { "epoch": 1.024248894311423, "grad_norm": 3.3627960681915283, "learning_rate": 6.3843233312921e-06, "log_odds_chosen": 1.482311487197876, "log_odds_ratio": -0.4110715091228485, "logits/chosen": -0.8024501800537109, "logits/rejected": -0.8039897680282593, "logps/chosen": -0.7670362591743469, "logps/rejected": -1.788851261138916, "loss": 1.194, "nll_loss": 0.9660623669624329, "rewards/accuracies": 0.75, "rewards/chosen": -0.07670362293720245, "rewards/margins": 0.10218149423599243, "rewards/rejected": -0.17888513207435608, "step": 1679 }, { "epoch": 1.0248589293884398, "grad_norm": 1.087221384048462, "learning_rate": 6.383343539497857e-06, "log_odds_chosen": 1.813923954963684, "log_odds_ratio": -0.4226991534233093, "logits/chosen": -0.8295612931251526, "logits/rejected": -0.9044234156608582, "logps/chosen": -0.6280056834220886, "logps/rejected": -2.0308403968811035, "loss": 1.116, "nll_loss": 0.8526089787483215, "rewards/accuracies": 0.75, "rewards/chosen": -0.0628005713224411, "rewards/margins": 0.140283465385437, "rewards/rejected": -0.2030840516090393, "step": 1680 }, { "epoch": 1.0254689644654567, "grad_norm": 1.4756982326507568, "learning_rate": 6.3823637477036125e-06, "log_odds_chosen": 0.9193881154060364, "log_odds_ratio": -0.5004829168319702, "logits/chosen": -1.007720708847046, "logits/rejected": -0.914832592010498, "logps/chosen": -0.8670412302017212, "logps/rejected": -1.5763447284698486, "loss": 1.2479, "nll_loss": 1.309722900390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.08670412003993988, "rewards/margins": 0.0709303468465805, "rewards/rejected": -0.15763446688652039, "step": 1681 }, { "epoch": 1.0260789995424737, "grad_norm": 3.827194929122925, "learning_rate": 6.3813839559093686e-06, "log_odds_chosen": 0.9493703246116638, "log_odds_ratio": -0.4676326513290405, "logits/chosen": -0.8842183351516724, "logits/rejected": -0.9451444745063782, "logps/chosen": -0.8108094334602356, "logps/rejected": -1.4326980113983154, "loss": 0.9172, "nll_loss": 1.11808180809021, "rewards/accuracies": 0.875, "rewards/chosen": -0.08108095079660416, "rewards/margins": 0.062188856303691864, "rewards/rejected": -0.14326980710029602, "step": 1682 }, { "epoch": 1.0266890346194906, "grad_norm": 2.08250093460083, "learning_rate": 6.3804041641151255e-06, "log_odds_chosen": 1.2400273084640503, "log_odds_ratio": -0.4777098596096039, "logits/chosen": -0.997399628162384, "logits/rejected": -1.007453203201294, "logps/chosen": -0.8478072881698608, "logps/rejected": -1.7575182914733887, "loss": 1.0461, "nll_loss": 1.2054306268692017, "rewards/accuracies": 0.75, "rewards/chosen": -0.0847807228565216, "rewards/margins": 0.09097109735012054, "rewards/rejected": -0.17575183510780334, "step": 1683 }, { "epoch": 1.0272990696965076, "grad_norm": 1.6686056852340698, "learning_rate": 6.379424372320882e-06, "log_odds_chosen": 1.2287801504135132, "log_odds_ratio": -0.5506404638290405, "logits/chosen": -0.871421754360199, "logits/rejected": -0.911573052406311, "logps/chosen": -0.83208167552948, "logps/rejected": -1.7803008556365967, "loss": 1.2415, "nll_loss": 1.0526151657104492, "rewards/accuracies": 0.75, "rewards/chosen": -0.08320817351341248, "rewards/margins": 0.09482190757989883, "rewards/rejected": -0.1780301034450531, "step": 1684 }, { "epoch": 1.0279091047735245, "grad_norm": 1.459687352180481, "learning_rate": 6.378444580526638e-06, "log_odds_chosen": 1.4248987436294556, "log_odds_ratio": -0.5154100060462952, "logits/chosen": -0.8631391525268555, "logits/rejected": -0.7954903841018677, "logps/chosen": -0.6537235379219055, "logps/rejected": -1.6787123680114746, "loss": 0.9908, "nll_loss": 0.8743115663528442, "rewards/accuracies": 0.5, "rewards/chosen": -0.06537235528230667, "rewards/margins": 0.10249887406826019, "rewards/rejected": -0.16787122189998627, "step": 1685 }, { "epoch": 1.0285191398505413, "grad_norm": 1.4938430786132812, "learning_rate": 6.377464788732395e-06, "log_odds_chosen": 0.8631067276000977, "log_odds_ratio": -0.5884206295013428, "logits/chosen": -1.1196472644805908, "logits/rejected": -1.0980191230773926, "logps/chosen": -0.8364564180374146, "logps/rejected": -1.342384696006775, "loss": 1.0548, "nll_loss": 1.1305599212646484, "rewards/accuracies": 0.625, "rewards/chosen": -0.08364564180374146, "rewards/margins": 0.05059283599257469, "rewards/rejected": -0.13423848152160645, "step": 1686 }, { "epoch": 1.0291291749275584, "grad_norm": 1.058226466178894, "learning_rate": 6.37648499693815e-06, "log_odds_chosen": 0.6412538886070251, "log_odds_ratio": -0.5769462585449219, "logits/chosen": -1.0073318481445312, "logits/rejected": -1.0496208667755127, "logps/chosen": -0.8282480239868164, "logps/rejected": -1.264066457748413, "loss": 1.0077, "nll_loss": 0.9863777160644531, "rewards/accuracies": 0.625, "rewards/chosen": -0.08282481133937836, "rewards/margins": 0.043581847101449966, "rewards/rejected": -0.12640665471553802, "step": 1687 }, { "epoch": 1.0297392100045752, "grad_norm": 1.5163768529891968, "learning_rate": 6.375505205143906e-06, "log_odds_chosen": 2.09843111038208, "log_odds_ratio": -0.3159441351890564, "logits/chosen": -1.1210511922836304, "logits/rejected": -0.9575161933898926, "logps/chosen": -0.9695944786071777, "logps/rejected": -2.6816015243530273, "loss": 1.2125, "nll_loss": 1.3840281963348389, "rewards/accuracies": 0.875, "rewards/chosen": -0.0969594419002533, "rewards/margins": 0.1712007224559784, "rewards/rejected": -0.2681601643562317, "step": 1688 }, { "epoch": 1.030349245081592, "grad_norm": 1.5545704364776611, "learning_rate": 6.374525413349663e-06, "log_odds_chosen": 1.3743609189987183, "log_odds_ratio": -0.4504600465297699, "logits/chosen": -0.8363815546035767, "logits/rejected": -0.9424604177474976, "logps/chosen": -0.8105116486549377, "logps/rejected": -1.8827004432678223, "loss": 0.9155, "nll_loss": 0.8916815519332886, "rewards/accuracies": 0.75, "rewards/chosen": -0.08105117082595825, "rewards/margins": 0.10721887648105621, "rewards/rejected": -0.18827004730701447, "step": 1689 }, { "epoch": 1.0309592801586092, "grad_norm": 1.6756539344787598, "learning_rate": 6.373545621555419e-06, "log_odds_chosen": 1.900174617767334, "log_odds_ratio": -0.3735773265361786, "logits/chosen": -0.9575885534286499, "logits/rejected": -1.0255134105682373, "logps/chosen": -0.767113447189331, "logps/rejected": -2.3311922550201416, "loss": 0.983, "nll_loss": 0.9008147716522217, "rewards/accuracies": 0.875, "rewards/chosen": -0.07671133428812027, "rewards/margins": 0.15640789270401, "rewards/rejected": -0.23311921954154968, "step": 1690 }, { "epoch": 1.031569315235626, "grad_norm": 2.3242931365966797, "learning_rate": 6.372565829761175e-06, "log_odds_chosen": 0.5069249868392944, "log_odds_ratio": -0.598190426826477, "logits/chosen": -0.9745382070541382, "logits/rejected": -1.2019391059875488, "logps/chosen": -0.872694730758667, "logps/rejected": -1.188470721244812, "loss": 0.9581, "nll_loss": 1.0006773471832275, "rewards/accuracies": 0.625, "rewards/chosen": -0.08726947754621506, "rewards/margins": 0.03157760202884674, "rewards/rejected": -0.1188470721244812, "step": 1691 }, { "epoch": 1.032179350312643, "grad_norm": 2.7488925457000732, "learning_rate": 6.371586037966932e-06, "log_odds_chosen": 1.1386926174163818, "log_odds_ratio": -0.4823906421661377, "logits/chosen": -0.9086184501647949, "logits/rejected": -0.9955198764801025, "logps/chosen": -0.7503646612167358, "logps/rejected": -1.5144792795181274, "loss": 1.1409, "nll_loss": 0.9586318731307983, "rewards/accuracies": 0.5, "rewards/chosen": -0.07503646612167358, "rewards/margins": 0.07641145586967468, "rewards/rejected": -0.15144792199134827, "step": 1692 }, { "epoch": 1.03278938538966, "grad_norm": 1.6653026342391968, "learning_rate": 6.370606246172688e-06, "log_odds_chosen": 0.8839661478996277, "log_odds_ratio": -0.5499696731567383, "logits/chosen": -1.0517053604125977, "logits/rejected": -1.1416059732437134, "logps/chosen": -0.9015588760375977, "logps/rejected": -1.5537545680999756, "loss": 1.1169, "nll_loss": 1.2394006252288818, "rewards/accuracies": 0.625, "rewards/chosen": -0.09015588462352753, "rewards/margins": 0.06521956622600555, "rewards/rejected": -0.15537546575069427, "step": 1693 }, { "epoch": 1.0333994204666768, "grad_norm": 1.1138075590133667, "learning_rate": 6.369626454378444e-06, "log_odds_chosen": 1.7584692239761353, "log_odds_ratio": -0.4056023061275482, "logits/chosen": -0.9330585598945618, "logits/rejected": -0.8895509243011475, "logps/chosen": -0.6986105442047119, "logps/rejected": -2.0580925941467285, "loss": 1.1101, "nll_loss": 0.8444068431854248, "rewards/accuracies": 0.75, "rewards/chosen": -0.06986105442047119, "rewards/margins": 0.13594822585582733, "rewards/rejected": -0.20580928027629852, "step": 1694 }, { "epoch": 1.0340094555436938, "grad_norm": 1.5394455194473267, "learning_rate": 6.368646662584201e-06, "log_odds_chosen": 2.6622138023376465, "log_odds_ratio": -0.25903409719467163, "logits/chosen": -0.6356373429298401, "logits/rejected": -0.8213293552398682, "logps/chosen": -0.531342089176178, "logps/rejected": -2.5001938343048096, "loss": 0.8502, "nll_loss": 0.6521084308624268, "rewards/accuracies": 0.75, "rewards/chosen": -0.05313421040773392, "rewards/margins": 0.19688516855239868, "rewards/rejected": -0.250019371509552, "step": 1695 }, { "epoch": 1.0346194906207107, "grad_norm": 2.623012065887451, "learning_rate": 6.367666870789957e-06, "log_odds_chosen": 1.3814425468444824, "log_odds_ratio": -0.4725302457809448, "logits/chosen": -0.993444561958313, "logits/rejected": -1.0578882694244385, "logps/chosen": -0.7478793859481812, "logps/rejected": -1.7499518394470215, "loss": 1.0123, "nll_loss": 0.8924946188926697, "rewards/accuracies": 0.625, "rewards/chosen": -0.074787937104702, "rewards/margins": 0.10020725429058075, "rewards/rejected": -0.17499519884586334, "step": 1696 }, { "epoch": 1.0352295256977275, "grad_norm": 1.8057819604873657, "learning_rate": 6.366687078995714e-06, "log_odds_chosen": 1.4938554763793945, "log_odds_ratio": -0.4735473394393921, "logits/chosen": -1.0382550954818726, "logits/rejected": -1.0820430517196655, "logps/chosen": -0.7541583776473999, "logps/rejected": -1.667457103729248, "loss": 1.1517, "nll_loss": 1.0698422193527222, "rewards/accuracies": 0.625, "rewards/chosen": -0.07541583478450775, "rewards/margins": 0.09132988005876541, "rewards/rejected": -0.16674572229385376, "step": 1697 }, { "epoch": 1.0358395607747446, "grad_norm": 3.79107928276062, "learning_rate": 6.365707287201469e-06, "log_odds_chosen": 0.9804724454879761, "log_odds_ratio": -0.49990251660346985, "logits/chosen": -0.9015570878982544, "logits/rejected": -0.8956757187843323, "logps/chosen": -0.7363957166671753, "logps/rejected": -1.4862391948699951, "loss": 1.1501, "nll_loss": 1.067262053489685, "rewards/accuracies": 0.75, "rewards/chosen": -0.07363957166671753, "rewards/margins": 0.0749843493103981, "rewards/rejected": -0.14862391352653503, "step": 1698 }, { "epoch": 1.0364495958517614, "grad_norm": 1.3393455743789673, "learning_rate": 6.364727495407225e-06, "log_odds_chosen": 1.3895649909973145, "log_odds_ratio": -0.43880516290664673, "logits/chosen": -0.6684085726737976, "logits/rejected": -0.808588445186615, "logps/chosen": -0.7557362914085388, "logps/rejected": -1.7247400283813477, "loss": 1.0924, "nll_loss": 0.7466180324554443, "rewards/accuracies": 0.75, "rewards/chosen": -0.07557363063097, "rewards/margins": 0.09690037369728088, "rewards/rejected": -0.1724739968776703, "step": 1699 }, { "epoch": 1.0370596309287785, "grad_norm": 1.6721490621566772, "learning_rate": 6.363747703612982e-06, "log_odds_chosen": 1.6331921815872192, "log_odds_ratio": -0.3414229154586792, "logits/chosen": -0.6915038228034973, "logits/rejected": -0.6841705441474915, "logps/chosen": -0.7730079889297485, "logps/rejected": -1.8735958337783813, "loss": 1.0672, "nll_loss": 0.957840621471405, "rewards/accuracies": 0.75, "rewards/chosen": -0.07730080187320709, "rewards/margins": 0.11005876958370209, "rewards/rejected": -0.18735957145690918, "step": 1700 }, { "epoch": 1.0376696660057954, "grad_norm": 1.3284207582473755, "learning_rate": 6.362767911818738e-06, "log_odds_chosen": 0.3643324375152588, "log_odds_ratio": -0.7209042310714722, "logits/chosen": -1.2504303455352783, "logits/rejected": -1.0289206504821777, "logps/chosen": -0.9587897062301636, "logps/rejected": -1.1886298656463623, "loss": 1.012, "nll_loss": 1.0991265773773193, "rewards/accuracies": 0.625, "rewards/chosen": -0.0958789736032486, "rewards/margins": 0.02298402041196823, "rewards/rejected": -0.11886298656463623, "step": 1701 }, { "epoch": 1.0382797010828122, "grad_norm": 1.4001494646072388, "learning_rate": 6.361788120024494e-06, "log_odds_chosen": 0.8231513500213623, "log_odds_ratio": -0.4488367438316345, "logits/chosen": -0.5179519653320312, "logits/rejected": -0.7365611791610718, "logps/chosen": -0.5881554484367371, "logps/rejected": -1.0829352140426636, "loss": 0.9808, "nll_loss": 0.7573093175888062, "rewards/accuracies": 0.875, "rewards/chosen": -0.058815546333789825, "rewards/margins": 0.04947797954082489, "rewards/rejected": -0.10829353332519531, "step": 1702 }, { "epoch": 1.0388897361598293, "grad_norm": 1.4800317287445068, "learning_rate": 6.360808328230251e-06, "log_odds_chosen": 2.2407748699188232, "log_odds_ratio": -0.3240011930465698, "logits/chosen": -1.0287880897521973, "logits/rejected": -1.0652656555175781, "logps/chosen": -0.8465464115142822, "logps/rejected": -2.5013058185577393, "loss": 1.0826, "nll_loss": 1.1339308023452759, "rewards/accuracies": 0.75, "rewards/chosen": -0.08465464413166046, "rewards/margins": 0.16547593474388123, "rewards/rejected": -0.2501305937767029, "step": 1703 }, { "epoch": 1.0394997712368461, "grad_norm": 1.9639980792999268, "learning_rate": 6.359828536436007e-06, "log_odds_chosen": 1.317833662033081, "log_odds_ratio": -0.48658880591392517, "logits/chosen": -0.9458043575286865, "logits/rejected": -1.0613056421279907, "logps/chosen": -0.6812687516212463, "logps/rejected": -1.4950165748596191, "loss": 1.0334, "nll_loss": 1.0635199546813965, "rewards/accuracies": 0.625, "rewards/chosen": -0.0681268721818924, "rewards/margins": 0.08137478679418564, "rewards/rejected": -0.14950165152549744, "step": 1704 }, { "epoch": 1.040109806313863, "grad_norm": 5.134627342224121, "learning_rate": 6.358848744641763e-06, "log_odds_chosen": 0.870992124080658, "log_odds_ratio": -0.5535897016525269, "logits/chosen": -1.0520172119140625, "logits/rejected": -1.072385311126709, "logps/chosen": -0.8639043569564819, "logps/rejected": -1.4090404510498047, "loss": 1.1237, "nll_loss": 1.155638575553894, "rewards/accuracies": 0.5, "rewards/chosen": -0.0863904356956482, "rewards/margins": 0.05451361462473869, "rewards/rejected": -0.14090405404567719, "step": 1705 }, { "epoch": 1.04071984139088, "grad_norm": 1.8230783939361572, "learning_rate": 6.35786895284752e-06, "log_odds_chosen": 1.4584176540374756, "log_odds_ratio": -0.4987388253211975, "logits/chosen": -1.0289560556411743, "logits/rejected": -1.0645267963409424, "logps/chosen": -0.8009985089302063, "logps/rejected": -1.9446825981140137, "loss": 1.0287, "nll_loss": 1.1172542572021484, "rewards/accuracies": 0.75, "rewards/chosen": -0.08009985089302063, "rewards/margins": 0.11436843872070312, "rewards/rejected": -0.19446828961372375, "step": 1706 }, { "epoch": 1.0413298764678969, "grad_norm": 1.2030409574508667, "learning_rate": 6.356889161053276e-06, "log_odds_chosen": 0.1632653772830963, "log_odds_ratio": -0.8192508816719055, "logits/chosen": -1.025270700454712, "logits/rejected": -1.0797019004821777, "logps/chosen": -1.0321143865585327, "logps/rejected": -1.2590816020965576, "loss": 1.1783, "nll_loss": 1.275369644165039, "rewards/accuracies": 0.375, "rewards/chosen": -0.1032114326953888, "rewards/margins": 0.022696735337376595, "rewards/rejected": -0.12590816617012024, "step": 1707 }, { "epoch": 1.041939911544914, "grad_norm": 1.9219868183135986, "learning_rate": 6.355909369259033e-06, "log_odds_chosen": 1.4292516708374023, "log_odds_ratio": -0.39766454696655273, "logits/chosen": -1.1010463237762451, "logits/rejected": -1.0655453205108643, "logps/chosen": -0.7447901964187622, "logps/rejected": -1.6224956512451172, "loss": 1.2831, "nll_loss": 1.3630471229553223, "rewards/accuracies": 0.625, "rewards/chosen": -0.07447902113199234, "rewards/margins": 0.08777053654193878, "rewards/rejected": -0.16224956512451172, "step": 1708 }, { "epoch": 1.0425499466219308, "grad_norm": 2.2451984882354736, "learning_rate": 6.354929577464789e-06, "log_odds_chosen": 0.8138028383255005, "log_odds_ratio": -0.4792519211769104, "logits/chosen": -1.125145673751831, "logits/rejected": -1.1152664422988892, "logps/chosen": -0.9867438077926636, "logps/rejected": -1.6185895204544067, "loss": 1.112, "nll_loss": 1.1608378887176514, "rewards/accuracies": 0.875, "rewards/chosen": -0.09867437928915024, "rewards/margins": 0.06318456679582596, "rewards/rejected": -0.1618589609861374, "step": 1709 }, { "epoch": 1.0431599816989476, "grad_norm": 1.746664047241211, "learning_rate": 6.353949785670544e-06, "log_odds_chosen": 0.8404300212860107, "log_odds_ratio": -0.515887975692749, "logits/chosen": -0.9213602542877197, "logits/rejected": -0.9834131598472595, "logps/chosen": -0.7948319315910339, "logps/rejected": -1.3119173049926758, "loss": 1.0083, "nll_loss": 1.0210999250411987, "rewards/accuracies": 0.625, "rewards/chosen": -0.07948318868875504, "rewards/margins": 0.05170853063464165, "rewards/rejected": -0.13119173049926758, "step": 1710 }, { "epoch": 1.0437700167759647, "grad_norm": 3.2668228149414062, "learning_rate": 6.352969993876301e-06, "log_odds_chosen": 0.21648633480072021, "log_odds_ratio": -0.7192510962486267, "logits/chosen": -1.2161160707473755, "logits/rejected": -1.0687955617904663, "logps/chosen": -0.9690794944763184, "logps/rejected": -1.0813055038452148, "loss": 1.1115, "nll_loss": 1.2755948305130005, "rewards/accuracies": 0.5, "rewards/chosen": -0.09690794348716736, "rewards/margins": 0.011222600936889648, "rewards/rejected": -0.108130544424057, "step": 1711 }, { "epoch": 1.0443800518529816, "grad_norm": 1.6148918867111206, "learning_rate": 6.351990202082057e-06, "log_odds_chosen": 0.20168772339820862, "log_odds_ratio": -0.8613628149032593, "logits/chosen": -0.7802277207374573, "logits/rejected": -0.8385470509529114, "logps/chosen": -1.091214656829834, "logps/rejected": -1.1998796463012695, "loss": 1.0189, "nll_loss": 0.9385936856269836, "rewards/accuracies": 0.625, "rewards/chosen": -0.10912147164344788, "rewards/margins": 0.010866492986679077, "rewards/rejected": -0.11998797208070755, "step": 1712 }, { "epoch": 1.0449900869299984, "grad_norm": 1.8607673645019531, "learning_rate": 6.3510104102878134e-06, "log_odds_chosen": 1.0869345664978027, "log_odds_ratio": -0.47270068526268005, "logits/chosen": -1.0390915870666504, "logits/rejected": -0.9828323125839233, "logps/chosen": -0.842562198638916, "logps/rejected": -1.6490466594696045, "loss": 1.0522, "nll_loss": 1.0524879693984985, "rewards/accuracies": 0.875, "rewards/chosen": -0.08425621688365936, "rewards/margins": 0.08064845204353333, "rewards/rejected": -0.1649046689271927, "step": 1713 }, { "epoch": 1.0456001220070155, "grad_norm": 2.6000421047210693, "learning_rate": 6.35003061849357e-06, "log_odds_chosen": 0.8961905241012573, "log_odds_ratio": -0.6373863816261292, "logits/chosen": -0.9877110719680786, "logits/rejected": -0.894223690032959, "logps/chosen": -0.6998660564422607, "logps/rejected": -1.207962989807129, "loss": 1.0715, "nll_loss": 1.1891392469406128, "rewards/accuracies": 0.625, "rewards/chosen": -0.06998661160469055, "rewards/margins": 0.050809696316719055, "rewards/rejected": -0.1207963079214096, "step": 1714 }, { "epoch": 1.0462101570840323, "grad_norm": 2.3537139892578125, "learning_rate": 6.349050826699326e-06, "log_odds_chosen": 0.06260369718074799, "log_odds_ratio": -0.6922775506973267, "logits/chosen": -1.231246829032898, "logits/rejected": -1.1366961002349854, "logps/chosen": -0.9255940914154053, "logps/rejected": -0.9831283688545227, "loss": 1.3301, "nll_loss": 1.439040184020996, "rewards/accuracies": 0.625, "rewards/chosen": -0.09255939722061157, "rewards/margins": 0.005753433331847191, "rewards/rejected": -0.09831283241510391, "step": 1715 }, { "epoch": 1.0468201921610492, "grad_norm": 1.9773814678192139, "learning_rate": 6.348071034905082e-06, "log_odds_chosen": 1.4909039735794067, "log_odds_ratio": -0.4878677725791931, "logits/chosen": -0.827308714389801, "logits/rejected": -0.9449284076690674, "logps/chosen": -0.5784209966659546, "logps/rejected": -1.633955717086792, "loss": 0.97, "nll_loss": 0.7021434903144836, "rewards/accuracies": 0.75, "rewards/chosen": -0.05784209817647934, "rewards/margins": 0.10555350035429001, "rewards/rejected": -0.16339558362960815, "step": 1716 }, { "epoch": 1.0474302272380662, "grad_norm": 1.6031577587127686, "learning_rate": 6.347091243110839e-06, "log_odds_chosen": 0.8561406135559082, "log_odds_ratio": -0.5805282592773438, "logits/chosen": -1.0857763290405273, "logits/rejected": -0.9803919792175293, "logps/chosen": -0.8403936624526978, "logps/rejected": -1.4315986633300781, "loss": 1.0794, "nll_loss": 1.3584294319152832, "rewards/accuracies": 0.625, "rewards/chosen": -0.08403937518596649, "rewards/margins": 0.05912049859762192, "rewards/rejected": -0.1431598663330078, "step": 1717 }, { "epoch": 1.048040262315083, "grad_norm": 1.2558984756469727, "learning_rate": 6.346111451316595e-06, "log_odds_chosen": 2.2733874320983887, "log_odds_ratio": -0.5174773335456848, "logits/chosen": -1.1645698547363281, "logits/rejected": -1.0153383016586304, "logps/chosen": -0.83565753698349, "logps/rejected": -2.7786293029785156, "loss": 1.0985, "nll_loss": 1.080288052558899, "rewards/accuracies": 0.5, "rewards/chosen": -0.08356575667858124, "rewards/margins": 0.194297194480896, "rewards/rejected": -0.27786293625831604, "step": 1718 }, { "epoch": 1.0486502973921001, "grad_norm": 1.0958659648895264, "learning_rate": 6.345131659522351e-06, "log_odds_chosen": 1.4174638986587524, "log_odds_ratio": -0.5965737700462341, "logits/chosen": -0.9787595272064209, "logits/rejected": -0.9752988815307617, "logps/chosen": -0.8592691421508789, "logps/rejected": -1.8659298419952393, "loss": 1.1237, "nll_loss": 1.029200792312622, "rewards/accuracies": 0.625, "rewards/chosen": -0.08592691272497177, "rewards/margins": 0.10066606849431992, "rewards/rejected": -0.18659299612045288, "step": 1719 }, { "epoch": 1.049260332469117, "grad_norm": 1.9470553398132324, "learning_rate": 6.344151867728108e-06, "log_odds_chosen": 3.3673648834228516, "log_odds_ratio": -0.1294839084148407, "logits/chosen": -0.5674589276313782, "logits/rejected": -0.6901726126670837, "logps/chosen": -0.6040385365486145, "logps/rejected": -3.044307231903076, "loss": 1.0884, "nll_loss": 0.8842755556106567, "rewards/accuracies": 1.0, "rewards/chosen": -0.06040385365486145, "rewards/margins": 0.24402686953544617, "rewards/rejected": -0.3044307231903076, "step": 1720 }, { "epoch": 1.0498703675461338, "grad_norm": 2.8901662826538086, "learning_rate": 6.343172075933863e-06, "log_odds_chosen": 1.5814692974090576, "log_odds_ratio": -0.276974081993103, "logits/chosen": -0.7574598789215088, "logits/rejected": -0.8544406294822693, "logps/chosen": -0.6730154752731323, "logps/rejected": -1.749063491821289, "loss": 0.8913, "nll_loss": 0.7317355871200562, "rewards/accuracies": 0.875, "rewards/chosen": -0.06730154901742935, "rewards/margins": 0.10760480165481567, "rewards/rejected": -0.17490635812282562, "step": 1721 }, { "epoch": 1.050480402623151, "grad_norm": 1.821881651878357, "learning_rate": 6.34219228413962e-06, "log_odds_chosen": 1.0459606647491455, "log_odds_ratio": -0.5003008842468262, "logits/chosen": -1.1884725093841553, "logits/rejected": -1.172908067703247, "logps/chosen": -0.7848759889602661, "logps/rejected": -1.493970274925232, "loss": 1.1625, "nll_loss": 1.2395812273025513, "rewards/accuracies": 0.75, "rewards/chosen": -0.07848759740591049, "rewards/margins": 0.07090942561626434, "rewards/rejected": -0.14939703047275543, "step": 1722 }, { "epoch": 1.0510904377001677, "grad_norm": 1.3824985027313232, "learning_rate": 6.341212492345376e-06, "log_odds_chosen": 1.165449857711792, "log_odds_ratio": -0.38462287187576294, "logits/chosen": -1.0011680126190186, "logits/rejected": -1.0125876665115356, "logps/chosen": -0.8138060569763184, "logps/rejected": -1.6357530355453491, "loss": 1.0278, "nll_loss": 0.9251039028167725, "rewards/accuracies": 0.875, "rewards/chosen": -0.08138061314821243, "rewards/margins": 0.08219470083713531, "rewards/rejected": -0.16357529163360596, "step": 1723 }, { "epoch": 1.0517004727771846, "grad_norm": 1.2551414966583252, "learning_rate": 6.3402327005511325e-06, "log_odds_chosen": 0.5351947546005249, "log_odds_ratio": -0.6061190366744995, "logits/chosen": -1.2247028350830078, "logits/rejected": -1.2356863021850586, "logps/chosen": -0.9468036890029907, "logps/rejected": -1.1268231868743896, "loss": 1.2795, "nll_loss": 1.6652936935424805, "rewards/accuracies": 0.625, "rewards/chosen": -0.09468036890029907, "rewards/margins": 0.018001949414610863, "rewards/rejected": -0.11268231272697449, "step": 1724 }, { "epoch": 1.0523105078542017, "grad_norm": 1.7542117834091187, "learning_rate": 6.3392529087568895e-06, "log_odds_chosen": 0.5888677835464478, "log_odds_ratio": -0.5422514081001282, "logits/chosen": -1.2248518466949463, "logits/rejected": -1.181775450706482, "logps/chosen": -0.9228023290634155, "logps/rejected": -1.321228265762329, "loss": 1.0942, "nll_loss": 1.2434091567993164, "rewards/accuracies": 0.625, "rewards/chosen": -0.09228023141622543, "rewards/margins": 0.03984259068965912, "rewards/rejected": -0.13212281465530396, "step": 1725 }, { "epoch": 1.0529205429312185, "grad_norm": 4.989507675170898, "learning_rate": 6.338273116962646e-06, "log_odds_chosen": 2.1435959339141846, "log_odds_ratio": -0.3420625925064087, "logits/chosen": -1.0026957988739014, "logits/rejected": -1.0532293319702148, "logps/chosen": -0.7948493957519531, "logps/rejected": -2.523404598236084, "loss": 1.0031, "nll_loss": 1.0198938846588135, "rewards/accuracies": 0.875, "rewards/chosen": -0.07948493957519531, "rewards/margins": 0.17285552620887756, "rewards/rejected": -0.2523404657840729, "step": 1726 }, { "epoch": 1.0535305780082356, "grad_norm": 1.4862027168273926, "learning_rate": 6.337293325168401e-06, "log_odds_chosen": 0.45944541692733765, "log_odds_ratio": -0.6712832450866699, "logits/chosen": -1.045798420906067, "logits/rejected": -1.0372986793518066, "logps/chosen": -0.8037136197090149, "logps/rejected": -1.16660737991333, "loss": 1.1077, "nll_loss": 1.0315449237823486, "rewards/accuracies": 0.375, "rewards/chosen": -0.08037136495113373, "rewards/margins": 0.036289382725954056, "rewards/rejected": -0.11666074395179749, "step": 1727 }, { "epoch": 1.0541406130852524, "grad_norm": 1.114606499671936, "learning_rate": 6.336313533374158e-06, "log_odds_chosen": 0.3464049696922302, "log_odds_ratio": -0.6820353269577026, "logits/chosen": -0.9395979046821594, "logits/rejected": -0.9473426342010498, "logps/chosen": -0.9064358472824097, "logps/rejected": -1.0456677675247192, "loss": 1.2825, "nll_loss": 1.226874589920044, "rewards/accuracies": 0.75, "rewards/chosen": -0.09064359217882156, "rewards/margins": 0.01392318494617939, "rewards/rejected": -0.10456676781177521, "step": 1728 }, { "epoch": 1.0547506481622693, "grad_norm": 4.12589693069458, "learning_rate": 6.335333741579914e-06, "log_odds_chosen": 0.8798367381095886, "log_odds_ratio": -0.5489708185195923, "logits/chosen": -1.120577335357666, "logits/rejected": -1.1297516822814941, "logps/chosen": -1.0303364992141724, "logps/rejected": -1.8053407669067383, "loss": 1.1862, "nll_loss": 1.2391856908798218, "rewards/accuracies": 0.75, "rewards/chosen": -0.103033646941185, "rewards/margins": 0.07750042527914047, "rewards/rejected": -0.18053407967090607, "step": 1729 }, { "epoch": 1.0553606832392863, "grad_norm": 2.106458902359009, "learning_rate": 6.33435394978567e-06, "log_odds_chosen": 1.2132916450500488, "log_odds_ratio": -0.40274855494499207, "logits/chosen": -1.0689388513565063, "logits/rejected": -1.1146371364593506, "logps/chosen": -0.9034730195999146, "logps/rejected": -1.7941101789474487, "loss": 1.1781, "nll_loss": 1.062880516052246, "rewards/accuracies": 0.625, "rewards/chosen": -0.0903473049402237, "rewards/margins": 0.08906371891498566, "rewards/rejected": -0.17941102385520935, "step": 1730 }, { "epoch": 1.0559707183163032, "grad_norm": 2.0005104541778564, "learning_rate": 6.333374157991427e-06, "log_odds_chosen": 1.2904579639434814, "log_odds_ratio": -0.44437122344970703, "logits/chosen": -1.1132731437683105, "logits/rejected": -1.0167644023895264, "logps/chosen": -0.9528177380561829, "logps/rejected": -1.873992681503296, "loss": 1.2213, "nll_loss": 1.5477209091186523, "rewards/accuracies": 0.75, "rewards/chosen": -0.09528177231550217, "rewards/margins": 0.09211750328540802, "rewards/rejected": -0.18739929795265198, "step": 1731 }, { "epoch": 1.05658075339332, "grad_norm": 2.344114303588867, "learning_rate": 6.332394366197182e-06, "log_odds_chosen": 0.4277973473072052, "log_odds_ratio": -0.6672731637954712, "logits/chosen": -0.7548996210098267, "logits/rejected": -0.9299074411392212, "logps/chosen": -0.8044107556343079, "logps/rejected": -1.0997748374938965, "loss": 1.0643, "nll_loss": 0.9215213060379028, "rewards/accuracies": 0.625, "rewards/chosen": -0.08044108003377914, "rewards/margins": 0.029536399990320206, "rewards/rejected": -0.10997747629880905, "step": 1732 }, { "epoch": 1.057190788470337, "grad_norm": 2.2079851627349854, "learning_rate": 6.3314145744029384e-06, "log_odds_chosen": 0.6961812973022461, "log_odds_ratio": -0.6746851205825806, "logits/chosen": -1.250468134880066, "logits/rejected": -1.152945637702942, "logps/chosen": -1.1415354013442993, "logps/rejected": -1.7620630264282227, "loss": 1.2648, "nll_loss": 1.3439574241638184, "rewards/accuracies": 0.5, "rewards/chosen": -0.11415354907512665, "rewards/margins": 0.062052756547927856, "rewards/rejected": -0.1762062907218933, "step": 1733 }, { "epoch": 1.057800823547354, "grad_norm": 1.059553623199463, "learning_rate": 6.330434782608695e-06, "log_odds_chosen": 1.1316113471984863, "log_odds_ratio": -0.5106324553489685, "logits/chosen": -0.9826539754867554, "logits/rejected": -0.9988439083099365, "logps/chosen": -0.6114109754562378, "logps/rejected": -1.3039891719818115, "loss": 1.0361, "nll_loss": 1.010216474533081, "rewards/accuracies": 0.625, "rewards/chosen": -0.06114109605550766, "rewards/margins": 0.06925781816244125, "rewards/rejected": -0.1303989142179489, "step": 1734 }, { "epoch": 1.058410858624371, "grad_norm": 3.670433521270752, "learning_rate": 6.3294549908144515e-06, "log_odds_chosen": 2.019815444946289, "log_odds_ratio": -0.17242097854614258, "logits/chosen": -0.8465269804000854, "logits/rejected": -0.9451579451560974, "logps/chosen": -0.7879601716995239, "logps/rejected": -2.1546788215637207, "loss": 0.975, "nll_loss": 0.8564742207527161, "rewards/accuracies": 1.0, "rewards/chosen": -0.07879601418972015, "rewards/margins": 0.13667187094688416, "rewards/rejected": -0.2154678851366043, "step": 1735 }, { "epoch": 1.0590208937013879, "grad_norm": 4.075552463531494, "learning_rate": 6.3284751990202085e-06, "log_odds_chosen": 0.4322297275066376, "log_odds_ratio": -0.6394485235214233, "logits/chosen": -1.0942740440368652, "logits/rejected": -1.2352992296218872, "logps/chosen": -0.8827008605003357, "logps/rejected": -0.9993592500686646, "loss": 1.1569, "nll_loss": 1.417944073677063, "rewards/accuracies": 0.5, "rewards/chosen": -0.08827009052038193, "rewards/margins": 0.011665841564536095, "rewards/rejected": -0.09993593394756317, "step": 1736 }, { "epoch": 1.0596309287784047, "grad_norm": 1.112648367881775, "learning_rate": 6.327495407225965e-06, "log_odds_chosen": 0.3251841366291046, "log_odds_ratio": -0.6245514750480652, "logits/chosen": -0.9766319990158081, "logits/rejected": -0.9011305570602417, "logps/chosen": -0.8609442710876465, "logps/rejected": -1.1164088249206543, "loss": 1.0501, "nll_loss": 1.193691611289978, "rewards/accuracies": 0.5, "rewards/chosen": -0.08609442412853241, "rewards/margins": 0.025546453893184662, "rewards/rejected": -0.11164088547229767, "step": 1737 }, { "epoch": 1.0602409638554218, "grad_norm": 1.4449995756149292, "learning_rate": 6.32651561543172e-06, "log_odds_chosen": 0.06377546489238739, "log_odds_ratio": -0.7434319257736206, "logits/chosen": -1.1971193552017212, "logits/rejected": -1.0456805229187012, "logps/chosen": -0.92952561378479, "logps/rejected": -0.9768179059028625, "loss": 1.2408, "nll_loss": 1.087144374847412, "rewards/accuracies": 0.375, "rewards/chosen": -0.09295256435871124, "rewards/margins": 0.0047292341478168964, "rewards/rejected": -0.09768179804086685, "step": 1738 }, { "epoch": 1.0608509989324386, "grad_norm": 9.27859115600586, "learning_rate": 6.325535823637477e-06, "log_odds_chosen": 0.6754741668701172, "log_odds_ratio": -0.8006361722946167, "logits/chosen": -1.0800836086273193, "logits/rejected": -0.9407399892807007, "logps/chosen": -1.0316524505615234, "logps/rejected": -1.6467914581298828, "loss": 1.1923, "nll_loss": 1.167057752609253, "rewards/accuracies": 0.375, "rewards/chosen": -0.10316524654626846, "rewards/margins": 0.061513908207416534, "rewards/rejected": -0.1646791398525238, "step": 1739 }, { "epoch": 1.0614610340094555, "grad_norm": 8.702642440795898, "learning_rate": 6.324556031843233e-06, "log_odds_chosen": 0.4501548111438751, "log_odds_ratio": -0.6845507025718689, "logits/chosen": -0.7288732528686523, "logits/rejected": -0.8993625640869141, "logps/chosen": -0.7898118495941162, "logps/rejected": -1.0886582136154175, "loss": 1.0988, "nll_loss": 0.8380340337753296, "rewards/accuracies": 0.5, "rewards/chosen": -0.0789811909198761, "rewards/margins": 0.02988463267683983, "rewards/rejected": -0.10886582732200623, "step": 1740 }, { "epoch": 1.0620710690864725, "grad_norm": 1.5533794164657593, "learning_rate": 6.323576240048989e-06, "log_odds_chosen": 0.7283447980880737, "log_odds_ratio": -0.4901295304298401, "logits/chosen": -1.0066345930099487, "logits/rejected": -0.9547040462493896, "logps/chosen": -0.8352618217468262, "logps/rejected": -1.332434058189392, "loss": 0.9801, "nll_loss": 0.9970476627349854, "rewards/accuracies": 0.625, "rewards/chosen": -0.08352617919445038, "rewards/margins": 0.049717217683792114, "rewards/rejected": -0.1332433968782425, "step": 1741 }, { "epoch": 1.0626811041634894, "grad_norm": 1.7704030275344849, "learning_rate": 6.322596448254746e-06, "log_odds_chosen": 2.6873912811279297, "log_odds_ratio": -0.3774373531341553, "logits/chosen": -0.9527771472930908, "logits/rejected": -0.7933036088943481, "logps/chosen": -0.7472444176673889, "logps/rejected": -3.072934150695801, "loss": 1.0382, "nll_loss": 0.9667398929595947, "rewards/accuracies": 0.625, "rewards/chosen": -0.07472445070743561, "rewards/margins": 0.23256897926330566, "rewards/rejected": -0.30729344487190247, "step": 1742 }, { "epoch": 1.0632911392405062, "grad_norm": 1.1877936124801636, "learning_rate": 6.321616656460502e-06, "log_odds_chosen": 1.1609925031661987, "log_odds_ratio": -0.44040483236312866, "logits/chosen": -0.9836363792419434, "logits/rejected": -1.1393322944641113, "logps/chosen": -0.734687089920044, "logps/rejected": -1.4651665687561035, "loss": 0.9845, "nll_loss": 0.8915789127349854, "rewards/accuracies": 0.75, "rewards/chosen": -0.07346871495246887, "rewards/margins": 0.0730479434132576, "rewards/rejected": -0.14651665091514587, "step": 1743 }, { "epoch": 1.0639011743175233, "grad_norm": 1.1181702613830566, "learning_rate": 6.3206368646662575e-06, "log_odds_chosen": 0.3470086455345154, "log_odds_ratio": -0.6050612926483154, "logits/chosen": -0.8646624088287354, "logits/rejected": -0.8545845746994019, "logps/chosen": -0.8106101751327515, "logps/rejected": -1.0425206422805786, "loss": 1.1923, "nll_loss": 1.1305204629898071, "rewards/accuracies": 0.625, "rewards/chosen": -0.08106102049350739, "rewards/margins": 0.023191042244434357, "rewards/rejected": -0.10425206273794174, "step": 1744 }, { "epoch": 1.0645112093945401, "grad_norm": 1.3096140623092651, "learning_rate": 6.3196570728720144e-06, "log_odds_chosen": -0.12206429988145828, "log_odds_ratio": -0.9058246612548828, "logits/chosen": -1.0805504322052002, "logits/rejected": -0.9934561252593994, "logps/chosen": -1.2449183464050293, "logps/rejected": -1.1653037071228027, "loss": 1.3047, "nll_loss": 1.508396029472351, "rewards/accuracies": 0.5, "rewards/chosen": -0.12449183315038681, "rewards/margins": -0.00796146783977747, "rewards/rejected": -0.11653036624193192, "step": 1745 }, { "epoch": 1.0651212444715572, "grad_norm": 1.6742252111434937, "learning_rate": 6.3186772810777706e-06, "log_odds_chosen": 1.9999439716339111, "log_odds_ratio": -0.35164034366607666, "logits/chosen": -0.8947495222091675, "logits/rejected": -0.9500597715377808, "logps/chosen": -0.7487902641296387, "logps/rejected": -2.319725513458252, "loss": 1.0002, "nll_loss": 0.8497788906097412, "rewards/accuracies": 0.75, "rewards/chosen": -0.07487902045249939, "rewards/margins": 0.15709353983402252, "rewards/rejected": -0.2319725751876831, "step": 1746 }, { "epoch": 1.065731279548574, "grad_norm": 2.0266778469085693, "learning_rate": 6.317697489283527e-06, "log_odds_chosen": 0.9907072186470032, "log_odds_ratio": -0.4695045053958893, "logits/chosen": -1.0545320510864258, "logits/rejected": -1.0272786617279053, "logps/chosen": -0.8937914967536926, "logps/rejected": -1.5761053562164307, "loss": 1.0199, "nll_loss": 1.110249400138855, "rewards/accuracies": 0.75, "rewards/chosen": -0.08937914669513702, "rewards/margins": 0.06823138892650604, "rewards/rejected": -0.15761053562164307, "step": 1747 }, { "epoch": 1.066341314625591, "grad_norm": 2.2111546993255615, "learning_rate": 6.316717697489284e-06, "log_odds_chosen": 1.1910841464996338, "log_odds_ratio": -0.4761672616004944, "logits/chosen": -0.9731011390686035, "logits/rejected": -1.1572550535202026, "logps/chosen": -0.7615955471992493, "logps/rejected": -1.5535502433776855, "loss": 1.0983, "nll_loss": 0.9558181762695312, "rewards/accuracies": 0.75, "rewards/chosen": -0.07615955173969269, "rewards/margins": 0.07919546961784363, "rewards/rejected": -0.15535502135753632, "step": 1748 }, { "epoch": 1.066951349702608, "grad_norm": 8.18647289276123, "learning_rate": 6.31573790569504e-06, "log_odds_chosen": 0.21793407201766968, "log_odds_ratio": -0.645000159740448, "logits/chosen": -1.045111060142517, "logits/rejected": -1.0153757333755493, "logps/chosen": -0.9832507967948914, "logps/rejected": -1.09719717502594, "loss": 1.0896, "nll_loss": 1.2729308605194092, "rewards/accuracies": 0.5, "rewards/chosen": -0.09832507371902466, "rewards/margins": 0.011394631117582321, "rewards/rejected": -0.10971970856189728, "step": 1749 }, { "epoch": 1.0675613847796248, "grad_norm": 2.1519815921783447, "learning_rate": 6.314758113900796e-06, "log_odds_chosen": 1.0784687995910645, "log_odds_ratio": -0.3848070502281189, "logits/chosen": -0.7456324100494385, "logits/rejected": -0.8137785196304321, "logps/chosen": -1.0097484588623047, "logps/rejected": -1.7272117137908936, "loss": 1.1239, "nll_loss": 1.0676305294036865, "rewards/accuracies": 0.75, "rewards/chosen": -0.10097484290599823, "rewards/margins": 0.071746326982975, "rewards/rejected": -0.17272117733955383, "step": 1750 }, { "epoch": 1.0681714198566417, "grad_norm": 1.4811372756958008, "learning_rate": 6.313778322106552e-06, "log_odds_chosen": 1.6817491054534912, "log_odds_ratio": -0.24208936095237732, "logits/chosen": -0.8124538660049438, "logits/rejected": -0.6988325715065002, "logps/chosen": -0.6019902229309082, "logps/rejected": -1.7462055683135986, "loss": 0.9234, "nll_loss": 0.8066587448120117, "rewards/accuracies": 0.875, "rewards/chosen": -0.06019902229309082, "rewards/margins": 0.1144215315580368, "rewards/rejected": -0.17462056875228882, "step": 1751 }, { "epoch": 1.0687814549336587, "grad_norm": 4.555011749267578, "learning_rate": 6.312798530312308e-06, "log_odds_chosen": 0.3470829129219055, "log_odds_ratio": -0.6613048315048218, "logits/chosen": -1.262857437133789, "logits/rejected": -1.1530001163482666, "logps/chosen": -0.832526683807373, "logps/rejected": -0.9661528468132019, "loss": 1.1666, "nll_loss": 1.2692011594772339, "rewards/accuracies": 0.625, "rewards/chosen": -0.0832526683807373, "rewards/margins": 0.013362618163228035, "rewards/rejected": -0.09661528468132019, "step": 1752 }, { "epoch": 1.0693914900106756, "grad_norm": 2.466292142868042, "learning_rate": 6.311818738518065e-06, "log_odds_chosen": 0.7258684635162354, "log_odds_ratio": -0.5472490787506104, "logits/chosen": -0.9186747074127197, "logits/rejected": -0.7932735085487366, "logps/chosen": -0.9105655550956726, "logps/rejected": -1.4290499687194824, "loss": 1.2467, "nll_loss": 1.287876009941101, "rewards/accuracies": 0.75, "rewards/chosen": -0.09105656296014786, "rewards/margins": 0.05184842646121979, "rewards/rejected": -0.14290499687194824, "step": 1753 }, { "epoch": 1.0700015250876926, "grad_norm": 1.6166682243347168, "learning_rate": 6.310838946723821e-06, "log_odds_chosen": 0.6067445278167725, "log_odds_ratio": -0.5604883432388306, "logits/chosen": -0.7556600570678711, "logits/rejected": -0.8390296697616577, "logps/chosen": -0.7364567518234253, "logps/rejected": -1.1414055824279785, "loss": 1.1487, "nll_loss": 0.9467371702194214, "rewards/accuracies": 0.75, "rewards/chosen": -0.07364567369222641, "rewards/margins": 0.0404948815703392, "rewards/rejected": -0.11414056271314621, "step": 1754 }, { "epoch": 1.0706115601647095, "grad_norm": 1.3177319765090942, "learning_rate": 6.3098591549295765e-06, "log_odds_chosen": 0.5412392020225525, "log_odds_ratio": -0.618392288684845, "logits/chosen": -1.0466082096099854, "logits/rejected": -1.134914755821228, "logps/chosen": -1.0562814474105835, "logps/rejected": -1.3701422214508057, "loss": 1.1362, "nll_loss": 1.1640729904174805, "rewards/accuracies": 0.5, "rewards/chosen": -0.10562814772129059, "rewards/margins": 0.031386084854602814, "rewards/rejected": -0.1370142251253128, "step": 1755 }, { "epoch": 1.0712215952417263, "grad_norm": 5.748149394989014, "learning_rate": 6.3088793631353335e-06, "log_odds_chosen": 1.6778242588043213, "log_odds_ratio": -0.4549539089202881, "logits/chosen": -0.8125802874565125, "logits/rejected": -0.7891769409179688, "logps/chosen": -0.8390053510665894, "logps/rejected": -2.0983176231384277, "loss": 1.1134, "nll_loss": 0.9853954911231995, "rewards/accuracies": 0.625, "rewards/chosen": -0.08390054106712341, "rewards/margins": 0.12593120336532593, "rewards/rejected": -0.20983174443244934, "step": 1756 }, { "epoch": 1.0718316303187434, "grad_norm": 1.3658298254013062, "learning_rate": 6.30789957134109e-06, "log_odds_chosen": 1.1429640054702759, "log_odds_ratio": -0.5477825403213501, "logits/chosen": -0.9872580766677856, "logits/rejected": -1.1211522817611694, "logps/chosen": -0.9326412677764893, "logps/rejected": -1.8494791984558105, "loss": 1.2199, "nll_loss": 1.153205394744873, "rewards/accuracies": 0.75, "rewards/chosen": -0.0932641327381134, "rewards/margins": 0.09168379008769989, "rewards/rejected": -0.1849479228258133, "step": 1757 }, { "epoch": 1.0724416653957602, "grad_norm": 3.350126028060913, "learning_rate": 6.306919779546846e-06, "log_odds_chosen": 1.6518763303756714, "log_odds_ratio": -0.34760814905166626, "logits/chosen": -0.8433474898338318, "logits/rejected": -1.040771722793579, "logps/chosen": -0.7500995397567749, "logps/rejected": -2.010585308074951, "loss": 0.9647, "nll_loss": 0.9076477289199829, "rewards/accuracies": 0.75, "rewards/chosen": -0.07500995695590973, "rewards/margins": 0.12604857981204987, "rewards/rejected": -0.2010585367679596, "step": 1758 }, { "epoch": 1.073051700472777, "grad_norm": 1.0021895170211792, "learning_rate": 6.305939987752603e-06, "log_odds_chosen": 0.04199250042438507, "log_odds_ratio": -0.7882078289985657, "logits/chosen": -1.1328023672103882, "logits/rejected": -1.0535478591918945, "logps/chosen": -1.0602319240570068, "logps/rejected": -1.1243499517440796, "loss": 1.1446, "nll_loss": 1.1450352668762207, "rewards/accuracies": 0.5, "rewards/chosen": -0.10602319240570068, "rewards/margins": 0.0064118048176169395, "rewards/rejected": -0.1124349981546402, "step": 1759 }, { "epoch": 1.0736617355497942, "grad_norm": 2.6518361568450928, "learning_rate": 6.304960195958359e-06, "log_odds_chosen": 0.7175359129905701, "log_odds_ratio": -0.5679641366004944, "logits/chosen": -0.8630623817443848, "logits/rejected": -0.7409378290176392, "logps/chosen": -0.8596873879432678, "logps/rejected": -1.4171414375305176, "loss": 1.1565, "nll_loss": 1.0858139991760254, "rewards/accuracies": 0.625, "rewards/chosen": -0.0859687402844429, "rewards/margins": 0.055745407938957214, "rewards/rejected": -0.14171414077281952, "step": 1760 }, { "epoch": 1.074271770626811, "grad_norm": 1.8557828664779663, "learning_rate": 6.303980404164114e-06, "log_odds_chosen": 1.4677464962005615, "log_odds_ratio": -0.5345121622085571, "logits/chosen": -0.9842389225959778, "logits/rejected": -1.0857067108154297, "logps/chosen": -0.8134230375289917, "logps/rejected": -1.9819376468658447, "loss": 1.2024, "nll_loss": 1.038560152053833, "rewards/accuracies": 0.5, "rewards/chosen": -0.08134230971336365, "rewards/margins": 0.11685145646333694, "rewards/rejected": -0.1981937736272812, "step": 1761 }, { "epoch": 1.074881805703828, "grad_norm": 3.4329755306243896, "learning_rate": 6.303000612369871e-06, "log_odds_chosen": 1.4261085987091064, "log_odds_ratio": -0.41183826327323914, "logits/chosen": -0.9869244694709778, "logits/rejected": -1.018497347831726, "logps/chosen": -0.7618865966796875, "logps/rejected": -1.701379656791687, "loss": 1.1161, "nll_loss": 1.0441224575042725, "rewards/accuracies": 0.75, "rewards/chosen": -0.07618866115808487, "rewards/margins": 0.0939493179321289, "rewards/rejected": -0.17013797163963318, "step": 1762 }, { "epoch": 1.075491840780845, "grad_norm": 1.6138354539871216, "learning_rate": 6.302020820575627e-06, "log_odds_chosen": 0.7107868194580078, "log_odds_ratio": -0.5736734867095947, "logits/chosen": -0.9902312755584717, "logits/rejected": -0.8726305961608887, "logps/chosen": -0.8656079173088074, "logps/rejected": -1.3178040981292725, "loss": 1.2728, "nll_loss": 1.2085105180740356, "rewards/accuracies": 0.625, "rewards/chosen": -0.08656080067157745, "rewards/margins": 0.04521960765123367, "rewards/rejected": -0.13178041577339172, "step": 1763 }, { "epoch": 1.0761018758578618, "grad_norm": 1.1722254753112793, "learning_rate": 6.301041028781384e-06, "log_odds_chosen": 0.748265266418457, "log_odds_ratio": -0.5905579328536987, "logits/chosen": -1.0568013191223145, "logits/rejected": -0.965682864189148, "logps/chosen": -1.0630340576171875, "logps/rejected": -1.6597650051116943, "loss": 1.1668, "nll_loss": 1.3334661722183228, "rewards/accuracies": 0.625, "rewards/chosen": -0.10630341619253159, "rewards/margins": 0.05967308580875397, "rewards/rejected": -0.16597649455070496, "step": 1764 }, { "epoch": 1.0767119109348788, "grad_norm": 2.9437978267669678, "learning_rate": 6.30006123698714e-06, "log_odds_chosen": 0.15894289314746857, "log_odds_ratio": -0.7676764726638794, "logits/chosen": -1.0716018676757812, "logits/rejected": -0.9333729147911072, "logps/chosen": -1.0400220155715942, "logps/rejected": -1.2171310186386108, "loss": 1.3072, "nll_loss": 1.311797857284546, "rewards/accuracies": 0.375, "rewards/chosen": -0.10400219261646271, "rewards/margins": 0.017710907384753227, "rewards/rejected": -0.12171310186386108, "step": 1765 }, { "epoch": 1.0773219460118957, "grad_norm": 2.131279468536377, "learning_rate": 6.299081445192896e-06, "log_odds_chosen": 2.294781446456909, "log_odds_ratio": -0.24557118117809296, "logits/chosen": -0.8208928108215332, "logits/rejected": -0.9632208943367004, "logps/chosen": -0.7154713273048401, "logps/rejected": -2.288709878921509, "loss": 1.1176, "nll_loss": 0.8426382541656494, "rewards/accuracies": 0.875, "rewards/chosen": -0.07154714316129684, "rewards/margins": 0.15732385218143463, "rewards/rejected": -0.22887098789215088, "step": 1766 }, { "epoch": 1.0779319810889125, "grad_norm": 5.6833906173706055, "learning_rate": 6.2981016533986525e-06, "log_odds_chosen": 0.91648930311203, "log_odds_ratio": -0.44962555170059204, "logits/chosen": -0.7244158983230591, "logits/rejected": -0.7632455229759216, "logps/chosen": -0.5693092346191406, "logps/rejected": -1.1310040950775146, "loss": 1.1409, "nll_loss": 0.710076630115509, "rewards/accuracies": 0.875, "rewards/chosen": -0.05693092197179794, "rewards/margins": 0.05616948753595352, "rewards/rejected": -0.11310041695833206, "step": 1767 }, { "epoch": 1.0785420161659296, "grad_norm": 0.9506612420082092, "learning_rate": 6.297121861604409e-06, "log_odds_chosen": 0.26601073145866394, "log_odds_ratio": -0.6806964874267578, "logits/chosen": -1.0252282619476318, "logits/rejected": -0.9848096966743469, "logps/chosen": -1.03256356716156, "logps/rejected": -1.2265574932098389, "loss": 1.0005, "nll_loss": 1.0555428266525269, "rewards/accuracies": 0.625, "rewards/chosen": -0.10325635969638824, "rewards/margins": 0.01939939334988594, "rewards/rejected": -0.12265575677156448, "step": 1768 }, { "epoch": 1.0791520512429464, "grad_norm": 1.743139624595642, "learning_rate": 6.296142069810165e-06, "log_odds_chosen": 0.8898508548736572, "log_odds_ratio": -0.5246341228485107, "logits/chosen": -1.0267266035079956, "logits/rejected": -0.9802473783493042, "logps/chosen": -0.9615914821624756, "logps/rejected": -1.6469756364822388, "loss": 1.3387, "nll_loss": 1.3066704273223877, "rewards/accuracies": 0.75, "rewards/chosen": -0.09615914523601532, "rewards/margins": 0.06853841245174408, "rewards/rejected": -0.1646975576877594, "step": 1769 }, { "epoch": 1.0797620863199633, "grad_norm": 1.821405053138733, "learning_rate": 6.295162278015922e-06, "log_odds_chosen": 0.26889529824256897, "log_odds_ratio": -0.6957663297653198, "logits/chosen": -0.9092407822608948, "logits/rejected": -0.9101122617721558, "logps/chosen": -0.922699511051178, "logps/rejected": -1.112029790878296, "loss": 0.975, "nll_loss": 1.1097385883331299, "rewards/accuracies": 0.5, "rewards/chosen": -0.09226994961500168, "rewards/margins": 0.018933024257421494, "rewards/rejected": -0.11120297014713287, "step": 1770 }, { "epoch": 1.0803721213969804, "grad_norm": 1.5905117988586426, "learning_rate": 6.294182486221678e-06, "log_odds_chosen": 0.9929666519165039, "log_odds_ratio": -0.49445468187332153, "logits/chosen": -0.8322117924690247, "logits/rejected": -0.8856289982795715, "logps/chosen": -0.6595380902290344, "logps/rejected": -1.2762908935546875, "loss": 1.2456, "nll_loss": 0.9371398687362671, "rewards/accuracies": 0.875, "rewards/chosen": -0.0659538060426712, "rewards/margins": 0.06167527288198471, "rewards/rejected": -0.1276290863752365, "step": 1771 }, { "epoch": 1.0809821564739972, "grad_norm": 2.6899242401123047, "learning_rate": 6.293202694427433e-06, "log_odds_chosen": 1.850128412246704, "log_odds_ratio": -0.36237967014312744, "logits/chosen": -0.7508154511451721, "logits/rejected": -0.903167724609375, "logps/chosen": -0.6517766714096069, "logps/rejected": -1.8404217958450317, "loss": 1.1463, "nll_loss": 0.9073569774627686, "rewards/accuracies": 0.75, "rewards/chosen": -0.06517765671014786, "rewards/margins": 0.1188645213842392, "rewards/rejected": -0.18404218554496765, "step": 1772 }, { "epoch": 1.0815921915510143, "grad_norm": 1.216370701789856, "learning_rate": 6.29222290263319e-06, "log_odds_chosen": 1.1458498239517212, "log_odds_ratio": -0.5881308913230896, "logits/chosen": -0.9783810973167419, "logits/rejected": -0.9755955934524536, "logps/chosen": -0.8218429684638977, "logps/rejected": -1.6083803176879883, "loss": 1.0691, "nll_loss": 1.0566726922988892, "rewards/accuracies": 0.75, "rewards/chosen": -0.08218429237604141, "rewards/margins": 0.0786537453532219, "rewards/rejected": -0.1608380377292633, "step": 1773 }, { "epoch": 1.0822022266280311, "grad_norm": 1.6113824844360352, "learning_rate": 6.291243110838946e-06, "log_odds_chosen": 0.7053662538528442, "log_odds_ratio": -0.4541478753089905, "logits/chosen": -0.6357153654098511, "logits/rejected": -0.7197378873825073, "logps/chosen": -0.877879798412323, "logps/rejected": -1.3460601568222046, "loss": 1.107, "nll_loss": 1.0881092548370361, "rewards/accuracies": 0.875, "rewards/chosen": -0.08778797090053558, "rewards/margins": 0.04681804031133652, "rewards/rejected": -0.1346060186624527, "step": 1774 }, { "epoch": 1.082812261705048, "grad_norm": 1.853151559829712, "learning_rate": 6.290263319044702e-06, "log_odds_chosen": 0.5821682214736938, "log_odds_ratio": -0.5600728392601013, "logits/chosen": -0.9699493646621704, "logits/rejected": -0.9868435859680176, "logps/chosen": -0.7841989994049072, "logps/rejected": -1.1192702054977417, "loss": 1.0432, "nll_loss": 1.1206231117248535, "rewards/accuracies": 0.625, "rewards/chosen": -0.07841989398002625, "rewards/margins": 0.033507127314805984, "rewards/rejected": -0.11192702502012253, "step": 1775 }, { "epoch": 1.083422296782065, "grad_norm": 1.2714881896972656, "learning_rate": 6.289283527250459e-06, "log_odds_chosen": 1.6894583702087402, "log_odds_ratio": -0.39747416973114014, "logits/chosen": -0.903711199760437, "logits/rejected": -0.9375240802764893, "logps/chosen": -0.6944079995155334, "logps/rejected": -1.9930886030197144, "loss": 1.0021, "nll_loss": 1.186120867729187, "rewards/accuracies": 0.875, "rewards/chosen": -0.0694408044219017, "rewards/margins": 0.1298680454492569, "rewards/rejected": -0.1993088573217392, "step": 1776 }, { "epoch": 1.0840323318590819, "grad_norm": 1.291993260383606, "learning_rate": 6.2883037354562154e-06, "log_odds_chosen": 1.553749680519104, "log_odds_ratio": -0.33044469356536865, "logits/chosen": -0.8858358263969421, "logits/rejected": -0.8457117676734924, "logps/chosen": -0.6763467192649841, "logps/rejected": -1.680939793586731, "loss": 1.026, "nll_loss": 0.946224570274353, "rewards/accuracies": 0.875, "rewards/chosen": -0.06763467937707901, "rewards/margins": 0.10045930743217468, "rewards/rejected": -0.1680939793586731, "step": 1777 }, { "epoch": 1.0846423669360987, "grad_norm": 1.7563585042953491, "learning_rate": 6.2873239436619716e-06, "log_odds_chosen": 0.8591490387916565, "log_odds_ratio": -0.5024287700653076, "logits/chosen": -0.9557942748069763, "logits/rejected": -1.0211238861083984, "logps/chosen": -0.8620891571044922, "logps/rejected": -1.5109705924987793, "loss": 1.1418, "nll_loss": 1.096282958984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.08620890974998474, "rewards/margins": 0.06488814949989319, "rewards/rejected": -0.15109707415103912, "step": 1778 }, { "epoch": 1.0852524020131158, "grad_norm": 1.514243483543396, "learning_rate": 6.286344151867728e-06, "log_odds_chosen": 0.10669714957475662, "log_odds_ratio": -0.7040711641311646, "logits/chosen": -1.1104282140731812, "logits/rejected": -1.159978985786438, "logps/chosen": -1.1665654182434082, "logps/rejected": -1.2165513038635254, "loss": 1.1531, "nll_loss": 1.5026829242706299, "rewards/accuracies": 0.625, "rewards/chosen": -0.11665654927492142, "rewards/margins": 0.004998577758669853, "rewards/rejected": -0.12165512144565582, "step": 1779 }, { "epoch": 1.0858624370901326, "grad_norm": 1.360012412071228, "learning_rate": 6.285364360073484e-06, "log_odds_chosen": -0.22026558220386505, "log_odds_ratio": -0.8861227035522461, "logits/chosen": -1.2249250411987305, "logits/rejected": -1.060664415359497, "logps/chosen": -1.0415678024291992, "logps/rejected": -0.9285403490066528, "loss": 1.1691, "nll_loss": 1.1366180181503296, "rewards/accuracies": 0.375, "rewards/chosen": -0.10415679216384888, "rewards/margins": -0.011302750557661057, "rewards/rejected": -0.09285403788089752, "step": 1780 }, { "epoch": 1.0864724721671497, "grad_norm": 1.7941875457763672, "learning_rate": 6.284384568279241e-06, "log_odds_chosen": 2.6058435440063477, "log_odds_ratio": -0.1321781575679779, "logits/chosen": -0.8391338586807251, "logits/rejected": -0.9890427589416504, "logps/chosen": -0.6197397112846375, "logps/rejected": -2.4372923374176025, "loss": 1.1119, "nll_loss": 1.0390548706054688, "rewards/accuracies": 1.0, "rewards/chosen": -0.061973970383405685, "rewards/margins": 0.18175525963306427, "rewards/rejected": -0.24372926354408264, "step": 1781 }, { "epoch": 1.0870825072441666, "grad_norm": 1.1263259649276733, "learning_rate": 6.283404776484997e-06, "log_odds_chosen": 0.7477928400039673, "log_odds_ratio": -0.6117251515388489, "logits/chosen": -1.0361955165863037, "logits/rejected": -1.131170392036438, "logps/chosen": -1.0076459646224976, "logps/rejected": -1.492018461227417, "loss": 1.3544, "nll_loss": 1.2704989910125732, "rewards/accuracies": 0.5, "rewards/chosen": -0.10076460242271423, "rewards/margins": 0.048437245190143585, "rewards/rejected": -0.14920184016227722, "step": 1782 }, { "epoch": 1.0876925423211834, "grad_norm": 1.2042258977890015, "learning_rate": 6.282424984690753e-06, "log_odds_chosen": 1.4522614479064941, "log_odds_ratio": -0.48726963996887207, "logits/chosen": -0.7958258390426636, "logits/rejected": -0.8955973982810974, "logps/chosen": -0.7556885480880737, "logps/rejected": -1.8976318836212158, "loss": 1.1712, "nll_loss": 1.0246881246566772, "rewards/accuracies": 0.625, "rewards/chosen": -0.07556886970996857, "rewards/margins": 0.11419433355331421, "rewards/rejected": -0.18976318836212158, "step": 1783 }, { "epoch": 1.0883025773982005, "grad_norm": 1.2414579391479492, "learning_rate": 6.281445192896509e-06, "log_odds_chosen": 0.8480870723724365, "log_odds_ratio": -0.657759964466095, "logits/chosen": -0.8121491074562073, "logits/rejected": -0.9209799766540527, "logps/chosen": -0.6812921762466431, "logps/rejected": -1.343637466430664, "loss": 0.9784, "nll_loss": 0.791560173034668, "rewards/accuracies": 0.625, "rewards/chosen": -0.06812921911478043, "rewards/margins": 0.0662345290184021, "rewards/rejected": -0.13436375558376312, "step": 1784 }, { "epoch": 1.0889126124752173, "grad_norm": 1.31423819065094, "learning_rate": 6.280465401102265e-06, "log_odds_chosen": 0.7833124399185181, "log_odds_ratio": -0.5402050614356995, "logits/chosen": -0.9273542165756226, "logits/rejected": -0.8841464519500732, "logps/chosen": -0.7073710560798645, "logps/rejected": -1.1867060661315918, "loss": 0.967, "nll_loss": 0.9477024674415588, "rewards/accuracies": 0.5, "rewards/chosen": -0.07073710858821869, "rewards/margins": 0.047933489084243774, "rewards/rejected": -0.11867059767246246, "step": 1785 }, { "epoch": 1.0895226475522342, "grad_norm": 2.5297086238861084, "learning_rate": 6.279485609308021e-06, "log_odds_chosen": 0.721762478351593, "log_odds_ratio": -0.6405494213104248, "logits/chosen": -0.8768067955970764, "logits/rejected": -0.8915804624557495, "logps/chosen": -0.7654779553413391, "logps/rejected": -1.2965214252471924, "loss": 1.086, "nll_loss": 1.0415070056915283, "rewards/accuracies": 0.5, "rewards/chosen": -0.07654780149459839, "rewards/margins": 0.05310434848070145, "rewards/rejected": -0.12965214252471924, "step": 1786 }, { "epoch": 1.0901326826292512, "grad_norm": 1.5199655294418335, "learning_rate": 6.278505817513778e-06, "log_odds_chosen": 1.5116167068481445, "log_odds_ratio": -0.3884049952030182, "logits/chosen": -0.8284149765968323, "logits/rejected": -0.9634189605712891, "logps/chosen": -0.6088589429855347, "logps/rejected": -1.605995774269104, "loss": 0.8987, "nll_loss": 0.7763640880584717, "rewards/accuracies": 0.875, "rewards/chosen": -0.060885898768901825, "rewards/margins": 0.09971367567777634, "rewards/rejected": -0.16059957444667816, "step": 1787 }, { "epoch": 1.090742717706268, "grad_norm": 1.3284012079238892, "learning_rate": 6.2775260257195345e-06, "log_odds_chosen": 0.8433029651641846, "log_odds_ratio": -0.43640321493148804, "logits/chosen": -1.0535041093826294, "logits/rejected": -1.1072497367858887, "logps/chosen": -0.8110131025314331, "logps/rejected": -1.3183262348175049, "loss": 1.0671, "nll_loss": 1.3986709117889404, "rewards/accuracies": 0.875, "rewards/chosen": -0.08110131323337555, "rewards/margins": 0.05073130130767822, "rewards/rejected": -0.13183261454105377, "step": 1788 }, { "epoch": 1.0913527527832851, "grad_norm": 1.8630690574645996, "learning_rate": 6.2765462339252914e-06, "log_odds_chosen": 0.5925914645195007, "log_odds_ratio": -0.6490092277526855, "logits/chosen": -1.0660558938980103, "logits/rejected": -1.0530604124069214, "logps/chosen": -1.1083253622055054, "logps/rejected": -1.6337089538574219, "loss": 1.2771, "nll_loss": 1.2605656385421753, "rewards/accuracies": 0.625, "rewards/chosen": -0.11083253473043442, "rewards/margins": 0.05253836140036583, "rewards/rejected": -0.16337089240550995, "step": 1789 }, { "epoch": 1.091962787860302, "grad_norm": 2.912809371948242, "learning_rate": 6.275566442131047e-06, "log_odds_chosen": 1.4361140727996826, "log_odds_ratio": -0.5076704025268555, "logits/chosen": -0.9341415166854858, "logits/rejected": -0.9589587450027466, "logps/chosen": -0.8774024248123169, "logps/rejected": -1.933058500289917, "loss": 1.1139, "nll_loss": 1.0638947486877441, "rewards/accuracies": 0.5, "rewards/chosen": -0.08774024248123169, "rewards/margins": 0.1055656149983406, "rewards/rejected": -0.1933058500289917, "step": 1790 }, { "epoch": 1.0925728229373188, "grad_norm": 1.3945547342300415, "learning_rate": 6.274586650336803e-06, "log_odds_chosen": 0.5354132056236267, "log_odds_ratio": -0.6151241660118103, "logits/chosen": -1.0961819887161255, "logits/rejected": -0.9957499504089355, "logps/chosen": -0.8801238536834717, "logps/rejected": -1.2947181463241577, "loss": 1.1627, "nll_loss": 1.0855423212051392, "rewards/accuracies": 0.625, "rewards/chosen": -0.08801239728927612, "rewards/margins": 0.04145941883325577, "rewards/rejected": -0.1294718086719513, "step": 1791 }, { "epoch": 1.093182858014336, "grad_norm": 1.6337167024612427, "learning_rate": 6.27360685854256e-06, "log_odds_chosen": 1.4086185693740845, "log_odds_ratio": -0.4722205400466919, "logits/chosen": -1.0369014739990234, "logits/rejected": -1.2169685363769531, "logps/chosen": -0.9742467999458313, "logps/rejected": -2.0237345695495605, "loss": 1.1037, "nll_loss": 1.2037298679351807, "rewards/accuracies": 0.75, "rewards/chosen": -0.09742467105388641, "rewards/margins": 0.10494879633188248, "rewards/rejected": -0.2023734748363495, "step": 1792 }, { "epoch": 1.0937928930913527, "grad_norm": 1.704550862312317, "learning_rate": 6.272627066748316e-06, "log_odds_chosen": 1.1847221851348877, "log_odds_ratio": -0.4735592007637024, "logits/chosen": -0.7317532896995544, "logits/rejected": -0.875051736831665, "logps/chosen": -0.7110099196434021, "logps/rejected": -1.4903994798660278, "loss": 1.0003, "nll_loss": 0.9294511675834656, "rewards/accuracies": 0.75, "rewards/chosen": -0.07110099494457245, "rewards/margins": 0.07793895900249481, "rewards/rejected": -0.14903995394706726, "step": 1793 }, { "epoch": 1.0944029281683696, "grad_norm": 0.8372685313224792, "learning_rate": 6.271647274954072e-06, "log_odds_chosen": 1.205104947090149, "log_odds_ratio": -0.4084761142730713, "logits/chosen": -0.8202754259109497, "logits/rejected": -0.958409309387207, "logps/chosen": -0.7678389549255371, "logps/rejected": -1.517236590385437, "loss": 0.9888, "nll_loss": 0.9013354778289795, "rewards/accuracies": 0.75, "rewards/chosen": -0.07678389549255371, "rewards/margins": 0.07493976503610611, "rewards/rejected": -0.15172365307807922, "step": 1794 }, { "epoch": 1.0950129632453867, "grad_norm": 1.566401481628418, "learning_rate": 6.270667483159828e-06, "log_odds_chosen": 0.9426136612892151, "log_odds_ratio": -0.6083282232284546, "logits/chosen": -0.8822272419929504, "logits/rejected": -0.8240160942077637, "logps/chosen": -0.8924352526664734, "logps/rejected": -1.687887191772461, "loss": 1.2367, "nll_loss": 1.2663377523422241, "rewards/accuracies": 0.5, "rewards/chosen": -0.08924353122711182, "rewards/margins": 0.07954519242048264, "rewards/rejected": -0.16878871619701385, "step": 1795 }, { "epoch": 1.0956229983224035, "grad_norm": 2.020291328430176, "learning_rate": 6.269687691365584e-06, "log_odds_chosen": 2.047863483428955, "log_odds_ratio": -0.34588125348091125, "logits/chosen": -0.8961355090141296, "logits/rejected": -0.7910984754562378, "logps/chosen": -0.7050944566726685, "logps/rejected": -2.269944190979004, "loss": 1.0861, "nll_loss": 0.8728979825973511, "rewards/accuracies": 0.875, "rewards/chosen": -0.07050944119691849, "rewards/margins": 0.1564849615097046, "rewards/rejected": -0.22699441015720367, "step": 1796 }, { "epoch": 1.0962330333994204, "grad_norm": 1.727907419204712, "learning_rate": 6.26870789957134e-06, "log_odds_chosen": 1.4013316631317139, "log_odds_ratio": -0.4845429062843323, "logits/chosen": -1.1039403676986694, "logits/rejected": -1.1471295356750488, "logps/chosen": -0.84747713804245, "logps/rejected": -1.8934160470962524, "loss": 1.2291, "nll_loss": 1.1075739860534668, "rewards/accuracies": 0.625, "rewards/chosen": -0.08474771678447723, "rewards/margins": 0.10459388792514801, "rewards/rejected": -0.18934160470962524, "step": 1797 }, { "epoch": 1.0968430684764374, "grad_norm": 1.1431806087493896, "learning_rate": 6.267728107777097e-06, "log_odds_chosen": 1.7330178022384644, "log_odds_ratio": -0.38708609342575073, "logits/chosen": -0.6880914568901062, "logits/rejected": -0.7372509241104126, "logps/chosen": -0.7745956182479858, "logps/rejected": -2.0370864868164062, "loss": 0.9505, "nll_loss": 1.0397894382476807, "rewards/accuracies": 0.75, "rewards/chosen": -0.07745956629514694, "rewards/margins": 0.12624910473823547, "rewards/rejected": -0.20370864868164062, "step": 1798 }, { "epoch": 1.0974531035534543, "grad_norm": 1.181043028831482, "learning_rate": 6.2667483159828535e-06, "log_odds_chosen": 0.5755528211593628, "log_odds_ratio": -0.5516996383666992, "logits/chosen": -0.8472100496292114, "logits/rejected": -0.6839675307273865, "logps/chosen": -0.9071120023727417, "logps/rejected": -1.296069622039795, "loss": 1.1261, "nll_loss": 1.1186884641647339, "rewards/accuracies": 0.75, "rewards/chosen": -0.09071120619773865, "rewards/margins": 0.038895756006240845, "rewards/rejected": -0.1296069622039795, "step": 1799 }, { "epoch": 1.0980631386304713, "grad_norm": 4.020850658416748, "learning_rate": 6.26576852418861e-06, "log_odds_chosen": 1.018410325050354, "log_odds_ratio": -0.5562149286270142, "logits/chosen": -0.7822019457817078, "logits/rejected": -0.8479874134063721, "logps/chosen": -0.8881343603134155, "logps/rejected": -1.6684601306915283, "loss": 1.1043, "nll_loss": 0.9823254346847534, "rewards/accuracies": 0.625, "rewards/chosen": -0.08881344646215439, "rewards/margins": 0.07803258299827576, "rewards/rejected": -0.16684602200984955, "step": 1800 }, { "epoch": 1.0986731737074882, "grad_norm": 1.2486355304718018, "learning_rate": 6.264788732394366e-06, "log_odds_chosen": 0.5544829964637756, "log_odds_ratio": -0.6340983510017395, "logits/chosen": -0.9033823609352112, "logits/rejected": -0.925360918045044, "logps/chosen": -0.7083885669708252, "logps/rejected": -1.041882872581482, "loss": 0.949, "nll_loss": 1.0108458995819092, "rewards/accuracies": 0.625, "rewards/chosen": -0.07083885371685028, "rewards/margins": 0.033349428325891495, "rewards/rejected": -0.10418829321861267, "step": 1801 }, { "epoch": 1.099283208784505, "grad_norm": 1.734359622001648, "learning_rate": 6.263808940600122e-06, "log_odds_chosen": 0.5758276581764221, "log_odds_ratio": -0.810213029384613, "logits/chosen": -1.2372530698776245, "logits/rejected": -1.0690398216247559, "logps/chosen": -1.2812564373016357, "logps/rejected": -1.7590621709823608, "loss": 1.1529, "nll_loss": 1.2765015363693237, "rewards/accuracies": 0.5, "rewards/chosen": -0.1281256526708603, "rewards/margins": 0.04778056964278221, "rewards/rejected": -0.1759062111377716, "step": 1802 }, { "epoch": 1.099893243861522, "grad_norm": 1.1594752073287964, "learning_rate": 6.262829148805878e-06, "log_odds_chosen": 2.197848081588745, "log_odds_ratio": -0.24795323610305786, "logits/chosen": -0.6078064441680908, "logits/rejected": -0.7464274168014526, "logps/chosen": -0.561617910861969, "logps/rejected": -2.103344440460205, "loss": 0.9911, "nll_loss": 0.72835373878479, "rewards/accuracies": 0.875, "rewards/chosen": -0.0561617910861969, "rewards/margins": 0.15417267382144928, "rewards/rejected": -0.21033446490764618, "step": 1803 }, { "epoch": 1.100503278938539, "grad_norm": 1.280457854270935, "learning_rate": 6.261849357011635e-06, "log_odds_chosen": 1.954250454902649, "log_odds_ratio": -0.5394588708877563, "logits/chosen": -0.9231195449829102, "logits/rejected": -1.012900471687317, "logps/chosen": -0.9978679418563843, "logps/rejected": -2.508434295654297, "loss": 1.0264, "nll_loss": 1.1763322353363037, "rewards/accuracies": 0.75, "rewards/chosen": -0.09978680312633514, "rewards/margins": 0.15105664730072021, "rewards/rejected": -0.25084343552589417, "step": 1804 }, { "epoch": 1.101113314015556, "grad_norm": 1.6512391567230225, "learning_rate": 6.260869565217391e-06, "log_odds_chosen": 1.003224492073059, "log_odds_ratio": -0.4725801944732666, "logits/chosen": -0.8673931360244751, "logits/rejected": -1.0387310981750488, "logps/chosen": -0.798823893070221, "logps/rejected": -1.4405343532562256, "loss": 1.215, "nll_loss": 0.9828977584838867, "rewards/accuracies": 0.75, "rewards/chosen": -0.07988238334655762, "rewards/margins": 0.06417104601860046, "rewards/rejected": -0.14405342936515808, "step": 1805 }, { "epoch": 1.1017233490925729, "grad_norm": 1.3970164060592651, "learning_rate": 6.259889773423148e-06, "log_odds_chosen": 0.9736465215682983, "log_odds_ratio": -0.4967115521430969, "logits/chosen": -0.9962385892868042, "logits/rejected": -1.0365768671035767, "logps/chosen": -0.8393636345863342, "logps/rejected": -1.5327552556991577, "loss": 0.974, "nll_loss": 0.8659040331840515, "rewards/accuracies": 0.625, "rewards/chosen": -0.08393636345863342, "rewards/margins": 0.06933916360139847, "rewards/rejected": -0.1532755196094513, "step": 1806 }, { "epoch": 1.1023333841695897, "grad_norm": 2.3016223907470703, "learning_rate": 6.258909981628903e-06, "log_odds_chosen": 1.4555141925811768, "log_odds_ratio": -0.42430514097213745, "logits/chosen": -0.820584237575531, "logits/rejected": -0.9895553588867188, "logps/chosen": -0.9484481811523438, "logps/rejected": -1.9705893993377686, "loss": 1.2005, "nll_loss": 1.1457750797271729, "rewards/accuracies": 0.75, "rewards/chosen": -0.09484480321407318, "rewards/margins": 0.10221412777900696, "rewards/rejected": -0.19705894589424133, "step": 1807 }, { "epoch": 1.1029434192466068, "grad_norm": 1.522047996520996, "learning_rate": 6.2579301898346595e-06, "log_odds_chosen": 1.6926512718200684, "log_odds_ratio": -0.4458329677581787, "logits/chosen": -0.7508872151374817, "logits/rejected": -0.8410725593566895, "logps/chosen": -0.7092227339744568, "logps/rejected": -1.931619644165039, "loss": 0.9001, "nll_loss": 0.8053614497184753, "rewards/accuracies": 0.625, "rewards/chosen": -0.07092227041721344, "rewards/margins": 0.12223969399929047, "rewards/rejected": -0.1931619793176651, "step": 1808 }, { "epoch": 1.1035534543236236, "grad_norm": 4.338975429534912, "learning_rate": 6.2569503980404164e-06, "log_odds_chosen": 0.4486161768436432, "log_odds_ratio": -0.7958617806434631, "logits/chosen": -0.8461564779281616, "logits/rejected": -0.9059327840805054, "logps/chosen": -1.1280364990234375, "logps/rejected": -1.4736459255218506, "loss": 1.1518, "nll_loss": 1.1313098669052124, "rewards/accuracies": 0.5, "rewards/chosen": -0.11280364543199539, "rewards/margins": 0.034560952335596085, "rewards/rejected": -0.14736460149288177, "step": 1809 }, { "epoch": 1.1041634894006405, "grad_norm": 1.5760177373886108, "learning_rate": 6.2559706062461726e-06, "log_odds_chosen": 1.261535406112671, "log_odds_ratio": -0.36563214659690857, "logits/chosen": -0.8216724991798401, "logits/rejected": -0.8872995376586914, "logps/chosen": -0.7475907802581787, "logps/rejected": -1.615919828414917, "loss": 1.035, "nll_loss": 0.9919652938842773, "rewards/accuracies": 1.0, "rewards/chosen": -0.0747590884566307, "rewards/margins": 0.0868329107761383, "rewards/rejected": -0.16159197688102722, "step": 1810 }, { "epoch": 1.1047735244776575, "grad_norm": 1.153361201286316, "learning_rate": 6.254990814451929e-06, "log_odds_chosen": 1.5170929431915283, "log_odds_ratio": -0.3268665373325348, "logits/chosen": -0.8620573878288269, "logits/rejected": -0.7188432812690735, "logps/chosen": -0.5593810677528381, "logps/rejected": -1.5576543807983398, "loss": 1.0995, "nll_loss": 0.861324667930603, "rewards/accuracies": 0.875, "rewards/chosen": -0.05593810975551605, "rewards/margins": 0.09982731938362122, "rewards/rejected": -0.15576542913913727, "step": 1811 }, { "epoch": 1.1053835595546744, "grad_norm": 1.6737501621246338, "learning_rate": 6.254011022657685e-06, "log_odds_chosen": 1.1028432846069336, "log_odds_ratio": -0.42910486459732056, "logits/chosen": -1.0754685401916504, "logits/rejected": -0.9898039698600769, "logps/chosen": -0.8461483716964722, "logps/rejected": -1.6060597896575928, "loss": 1.2123, "nll_loss": 1.0630429983139038, "rewards/accuracies": 0.75, "rewards/chosen": -0.0846148356795311, "rewards/margins": 0.07599115371704102, "rewards/rejected": -0.1606059968471527, "step": 1812 }, { "epoch": 1.1059935946316912, "grad_norm": 2.1449923515319824, "learning_rate": 6.253031230863441e-06, "log_odds_chosen": 0.9757858514785767, "log_odds_ratio": -0.5956950783729553, "logits/chosen": -0.9746394753456116, "logits/rejected": -0.9652541279792786, "logps/chosen": -0.8428065180778503, "logps/rejected": -1.5335638523101807, "loss": 0.9923, "nll_loss": 1.0056581497192383, "rewards/accuracies": 0.625, "rewards/chosen": -0.08428065478801727, "rewards/margins": 0.06907574087381363, "rewards/rejected": -0.1533564031124115, "step": 1813 }, { "epoch": 1.1066036297087083, "grad_norm": 2.2743723392486572, "learning_rate": 6.252051439069197e-06, "log_odds_chosen": 1.4092907905578613, "log_odds_ratio": -0.38253870606422424, "logits/chosen": -0.9761922955513, "logits/rejected": -1.132870078086853, "logps/chosen": -0.7077215909957886, "logps/rejected": -1.5235965251922607, "loss": 1.2106, "nll_loss": 1.3544285297393799, "rewards/accuracies": 0.75, "rewards/chosen": -0.07077215611934662, "rewards/margins": 0.08158749341964722, "rewards/rejected": -0.15235966444015503, "step": 1814 }, { "epoch": 1.1072136647857251, "grad_norm": 1.5950393676757812, "learning_rate": 6.251071647274954e-06, "log_odds_chosen": 1.3742625713348389, "log_odds_ratio": -0.47964808344841003, "logits/chosen": -0.9013469219207764, "logits/rejected": -0.9559609293937683, "logps/chosen": -0.8390951156616211, "logps/rejected": -1.647504448890686, "loss": 1.1539, "nll_loss": 0.974923312664032, "rewards/accuracies": 0.625, "rewards/chosen": -0.08390951156616211, "rewards/margins": 0.08084093034267426, "rewards/rejected": -0.16475045680999756, "step": 1815 }, { "epoch": 1.1078236998627422, "grad_norm": 2.825700283050537, "learning_rate": 6.25009185548071e-06, "log_odds_chosen": 1.3738548755645752, "log_odds_ratio": -0.4195077121257782, "logits/chosen": -1.022202968597412, "logits/rejected": -1.0946205854415894, "logps/chosen": -0.7541830539703369, "logps/rejected": -1.6245224475860596, "loss": 1.1324, "nll_loss": 1.0739285945892334, "rewards/accuracies": 0.625, "rewards/chosen": -0.07541830837726593, "rewards/margins": 0.0870339423418045, "rewards/rejected": -0.16245225071907043, "step": 1816 }, { "epoch": 1.108433734939759, "grad_norm": 0.9862058162689209, "learning_rate": 6.249112063686467e-06, "log_odds_chosen": 1.2649250030517578, "log_odds_ratio": -0.5879263281822205, "logits/chosen": -0.9030767679214478, "logits/rejected": -0.955039381980896, "logps/chosen": -0.8345728516578674, "logps/rejected": -1.6417381763458252, "loss": 1.0151, "nll_loss": 1.0229498147964478, "rewards/accuracies": 0.5, "rewards/chosen": -0.08345729112625122, "rewards/margins": 0.08071652799844742, "rewards/rejected": -0.16417381167411804, "step": 1817 }, { "epoch": 1.109043770016776, "grad_norm": 1.037484884262085, "learning_rate": 6.248132271892222e-06, "log_odds_chosen": 1.581512212753296, "log_odds_ratio": -0.3409452736377716, "logits/chosen": -0.6556394696235657, "logits/rejected": -0.862643301486969, "logps/chosen": -0.6908441781997681, "logps/rejected": -1.8296787738800049, "loss": 1.1086, "nll_loss": 0.8244026899337769, "rewards/accuracies": 0.875, "rewards/chosen": -0.06908442080020905, "rewards/margins": 0.11388346552848816, "rewards/rejected": -0.1829678863286972, "step": 1818 }, { "epoch": 1.109653805093793, "grad_norm": 3.0790762901306152, "learning_rate": 6.2471524800979785e-06, "log_odds_chosen": 0.9725136160850525, "log_odds_ratio": -0.5050808787345886, "logits/chosen": -1.158374309539795, "logits/rejected": -1.1307815313339233, "logps/chosen": -0.7720130681991577, "logps/rejected": -1.397868037223816, "loss": 1.0529, "nll_loss": 0.9827753901481628, "rewards/accuracies": 0.5, "rewards/chosen": -0.07720130681991577, "rewards/margins": 0.0625855028629303, "rewards/rejected": -0.13978680968284607, "step": 1819 }, { "epoch": 1.1102638401708098, "grad_norm": 1.0579557418823242, "learning_rate": 6.2461726883037355e-06, "log_odds_chosen": 0.8975638151168823, "log_odds_ratio": -0.5453693270683289, "logits/chosen": -0.7784926295280457, "logits/rejected": -0.9377841949462891, "logps/chosen": -0.7526888251304626, "logps/rejected": -1.338752031326294, "loss": 0.8667, "nll_loss": 0.8012913465499878, "rewards/accuracies": 0.5, "rewards/chosen": -0.07526888698339462, "rewards/margins": 0.05860632285475731, "rewards/rejected": -0.13387520611286163, "step": 1820 }, { "epoch": 1.1108738752478267, "grad_norm": 3.105879545211792, "learning_rate": 6.245192896509492e-06, "log_odds_chosen": 1.161759614944458, "log_odds_ratio": -0.46110886335372925, "logits/chosen": -1.0518182516098022, "logits/rejected": -1.116575002670288, "logps/chosen": -0.9394791126251221, "logps/rejected": -1.7528594732284546, "loss": 1.292, "nll_loss": 1.3458898067474365, "rewards/accuracies": 0.625, "rewards/chosen": -0.09394790977239609, "rewards/margins": 0.08133804053068161, "rewards/rejected": -0.1752859652042389, "step": 1821 }, { "epoch": 1.1114839103248437, "grad_norm": 1.1854100227355957, "learning_rate": 6.244213104715248e-06, "log_odds_chosen": 1.370617389678955, "log_odds_ratio": -0.4289873540401459, "logits/chosen": -0.7550190091133118, "logits/rejected": -0.7568802237510681, "logps/chosen": -0.7380600571632385, "logps/rejected": -1.7038724422454834, "loss": 0.9534, "nll_loss": 0.8836385607719421, "rewards/accuracies": 0.875, "rewards/chosen": -0.07380601018667221, "rewards/margins": 0.09658123552799225, "rewards/rejected": -0.17038725316524506, "step": 1822 }, { "epoch": 1.1120939454018606, "grad_norm": 1.8254224061965942, "learning_rate": 6.243233312921005e-06, "log_odds_chosen": 1.027003526687622, "log_odds_ratio": -0.5481360554695129, "logits/chosen": -0.8766764402389526, "logits/rejected": -0.9641904830932617, "logps/chosen": -1.041823387145996, "logps/rejected": -1.8151919841766357, "loss": 1.1864, "nll_loss": 1.1669366359710693, "rewards/accuracies": 0.5, "rewards/chosen": -0.10418233275413513, "rewards/margins": 0.0773368552327156, "rewards/rejected": -0.18151918053627014, "step": 1823 }, { "epoch": 1.1127039804788774, "grad_norm": 1.6770261526107788, "learning_rate": 6.24225352112676e-06, "log_odds_chosen": 1.7826114892959595, "log_odds_ratio": -0.42270034551620483, "logits/chosen": -0.827379584312439, "logits/rejected": -0.8905813694000244, "logps/chosen": -0.7303752303123474, "logps/rejected": -1.7688956260681152, "loss": 1.0787, "nll_loss": 1.232293963432312, "rewards/accuracies": 0.875, "rewards/chosen": -0.0730375200510025, "rewards/margins": 0.1038520485162735, "rewards/rejected": -0.176889568567276, "step": 1824 }, { "epoch": 1.1133140155558945, "grad_norm": 1.041169285774231, "learning_rate": 6.241273729332516e-06, "log_odds_chosen": 1.6184625625610352, "log_odds_ratio": -0.47625455260276794, "logits/chosen": -1.0113604068756104, "logits/rejected": -1.083565354347229, "logps/chosen": -0.8388935327529907, "logps/rejected": -2.1199426651000977, "loss": 1.2334, "nll_loss": 1.1975741386413574, "rewards/accuracies": 0.5, "rewards/chosen": -0.08388935029506683, "rewards/margins": 0.12810494005680084, "rewards/rejected": -0.21199430525302887, "step": 1825 }, { "epoch": 1.1139240506329113, "grad_norm": 1.7475658655166626, "learning_rate": 6.240293937538273e-06, "log_odds_chosen": 0.7577029466629028, "log_odds_ratio": -0.5075356364250183, "logits/chosen": -0.9456686973571777, "logits/rejected": -0.9113441705703735, "logps/chosen": -0.8813697695732117, "logps/rejected": -1.3783749341964722, "loss": 1.138, "nll_loss": 0.8999152779579163, "rewards/accuracies": 0.75, "rewards/chosen": -0.08813697844743729, "rewards/margins": 0.04970052093267441, "rewards/rejected": -0.1378374993801117, "step": 1826 }, { "epoch": 1.1145340857099284, "grad_norm": 1.7546112537384033, "learning_rate": 6.239314145744029e-06, "log_odds_chosen": 0.8034483194351196, "log_odds_ratio": -0.6141762137413025, "logits/chosen": -1.0915770530700684, "logits/rejected": -1.0240023136138916, "logps/chosen": -0.8493975400924683, "logps/rejected": -1.422942876815796, "loss": 1.1924, "nll_loss": 1.2645621299743652, "rewards/accuracies": 0.375, "rewards/chosen": -0.08493975549936295, "rewards/margins": 0.05735454335808754, "rewards/rejected": -0.14229430258274078, "step": 1827 }, { "epoch": 1.1151441207869452, "grad_norm": 2.4373888969421387, "learning_rate": 6.238334353949785e-06, "log_odds_chosen": 1.5508335828781128, "log_odds_ratio": -0.36914026737213135, "logits/chosen": -0.8353328704833984, "logits/rejected": -0.817514181137085, "logps/chosen": -0.766910195350647, "logps/rejected": -1.7399884462356567, "loss": 0.9436, "nll_loss": 0.8909814357757568, "rewards/accuracies": 0.75, "rewards/chosen": -0.07669101655483246, "rewards/margins": 0.09730783849954605, "rewards/rejected": -0.1739988625049591, "step": 1828 }, { "epoch": 1.115754155863962, "grad_norm": 1.6666778326034546, "learning_rate": 6.237354562155541e-06, "log_odds_chosen": 0.48866498470306396, "log_odds_ratio": -0.7644931077957153, "logits/chosen": -0.9194769859313965, "logits/rejected": -1.0156253576278687, "logps/chosen": -0.692436158657074, "logps/rejected": -1.0088049173355103, "loss": 1.1028, "nll_loss": 1.0711123943328857, "rewards/accuracies": 0.75, "rewards/chosen": -0.06924361735582352, "rewards/margins": 0.03163687512278557, "rewards/rejected": -0.10088049620389938, "step": 1829 }, { "epoch": 1.1163641909409792, "grad_norm": 1.3149343729019165, "learning_rate": 6.2363747703612975e-06, "log_odds_chosen": 0.7843335270881653, "log_odds_ratio": -0.41932398080825806, "logits/chosen": -1.0655992031097412, "logits/rejected": -1.0944405794143677, "logps/chosen": -0.9337158799171448, "logps/rejected": -1.5094099044799805, "loss": 1.1465, "nll_loss": 1.1227812767028809, "rewards/accuracies": 0.875, "rewards/chosen": -0.09337158501148224, "rewards/margins": 0.05756940320134163, "rewards/rejected": -0.15094098448753357, "step": 1830 }, { "epoch": 1.116974226017996, "grad_norm": 1.4521820545196533, "learning_rate": 6.2353949785670545e-06, "log_odds_chosen": 2.4853262901306152, "log_odds_ratio": -0.18404076993465424, "logits/chosen": -0.7856154441833496, "logits/rejected": -1.0235298871994019, "logps/chosen": -0.567945122718811, "logps/rejected": -2.4189682006835938, "loss": 1.0724, "nll_loss": 0.8961179256439209, "rewards/accuracies": 1.0, "rewards/chosen": -0.05679451674222946, "rewards/margins": 0.18510231375694275, "rewards/rejected": -0.2418968379497528, "step": 1831 }, { "epoch": 1.117584261095013, "grad_norm": 3.3041324615478516, "learning_rate": 6.234415186772811e-06, "log_odds_chosen": 0.3495689034461975, "log_odds_ratio": -0.6595956683158875, "logits/chosen": -0.9855471849441528, "logits/rejected": -1.0833735466003418, "logps/chosen": -0.8733057975769043, "logps/rejected": -0.9610003232955933, "loss": 1.3239, "nll_loss": 1.3033342361450195, "rewards/accuracies": 0.375, "rewards/chosen": -0.08733058720827103, "rewards/margins": 0.008769446983933449, "rewards/rejected": -0.09610003232955933, "step": 1832 }, { "epoch": 1.11819429617203, "grad_norm": 2.2011048793792725, "learning_rate": 6.233435394978567e-06, "log_odds_chosen": 0.6899380683898926, "log_odds_ratio": -0.7442108392715454, "logits/chosen": -1.0265244245529175, "logits/rejected": -1.0497337579727173, "logps/chosen": -1.0081026554107666, "logps/rejected": -1.4393651485443115, "loss": 1.1067, "nll_loss": 1.1558680534362793, "rewards/accuracies": 0.625, "rewards/chosen": -0.10081025958061218, "rewards/margins": 0.04312625154852867, "rewards/rejected": -0.14393651485443115, "step": 1833 }, { "epoch": 1.1188043312490468, "grad_norm": 1.1343778371810913, "learning_rate": 6.232455603184324e-06, "log_odds_chosen": 2.419170618057251, "log_odds_ratio": -0.34534958004951477, "logits/chosen": -0.4734736680984497, "logits/rejected": -0.8152135610580444, "logps/chosen": -0.48846855759620667, "logps/rejected": -2.105058193206787, "loss": 0.941, "nll_loss": 0.6572362184524536, "rewards/accuracies": 0.75, "rewards/chosen": -0.048846855759620667, "rewards/margins": 0.16165894269943237, "rewards/rejected": -0.21050581336021423, "step": 1834 }, { "epoch": 1.1194143663260638, "grad_norm": 1.0998889207839966, "learning_rate": 6.231475811390079e-06, "log_odds_chosen": 2.0908398628234863, "log_odds_ratio": -0.31640172004699707, "logits/chosen": -0.8789825439453125, "logits/rejected": -0.9151819944381714, "logps/chosen": -0.6246352791786194, "logps/rejected": -2.218803882598877, "loss": 1.0407, "nll_loss": 0.936242938041687, "rewards/accuracies": 0.875, "rewards/chosen": -0.06246352940797806, "rewards/margins": 0.15941688418388367, "rewards/rejected": -0.22188040614128113, "step": 1835 }, { "epoch": 1.1200244014030807, "grad_norm": 1.2506026029586792, "learning_rate": 6.230496019595835e-06, "log_odds_chosen": 1.578460931777954, "log_odds_ratio": -0.334096223115921, "logits/chosen": -0.9380632042884827, "logits/rejected": -0.8944165110588074, "logps/chosen": -0.8543212413787842, "logps/rejected": -2.1061277389526367, "loss": 1.0318, "nll_loss": 0.940406084060669, "rewards/accuracies": 0.75, "rewards/chosen": -0.08543212711811066, "rewards/margins": 0.12518064677715302, "rewards/rejected": -0.21061277389526367, "step": 1836 }, { "epoch": 1.1206344364800975, "grad_norm": 1.1327017545700073, "learning_rate": 6.229516227801592e-06, "log_odds_chosen": 1.1567647457122803, "log_odds_ratio": -0.3939710855484009, "logits/chosen": -0.7170379161834717, "logits/rejected": -0.9262677431106567, "logps/chosen": -0.6384114027023315, "logps/rejected": -1.3850575685501099, "loss": 0.991, "nll_loss": 0.8914657235145569, "rewards/accuracies": 0.875, "rewards/chosen": -0.06384114921092987, "rewards/margins": 0.07466460764408112, "rewards/rejected": -0.138505756855011, "step": 1837 }, { "epoch": 1.1212444715571146, "grad_norm": 6.1064863204956055, "learning_rate": 6.228536436007348e-06, "log_odds_chosen": 0.8247337341308594, "log_odds_ratio": -0.5056567788124084, "logits/chosen": -0.9929734468460083, "logits/rejected": -0.9911219477653503, "logps/chosen": -0.8282707929611206, "logps/rejected": -1.4039090871810913, "loss": 1.0631, "nll_loss": 1.0213711261749268, "rewards/accuracies": 0.625, "rewards/chosen": -0.08282709121704102, "rewards/margins": 0.05756382271647453, "rewards/rejected": -0.14039090275764465, "step": 1838 }, { "epoch": 1.1218545066341314, "grad_norm": 4.7260870933532715, "learning_rate": 6.227556644213104e-06, "log_odds_chosen": 0.8816094398498535, "log_odds_ratio": -0.48569488525390625, "logits/chosen": -1.1143935918807983, "logits/rejected": -1.1382890939712524, "logps/chosen": -0.7756515145301819, "logps/rejected": -1.3390591144561768, "loss": 1.0199, "nll_loss": 1.1539900302886963, "rewards/accuracies": 0.75, "rewards/chosen": -0.07756514847278595, "rewards/margins": 0.05634075403213501, "rewards/rejected": -0.13390591740608215, "step": 1839 }, { "epoch": 1.1224645417111483, "grad_norm": 3.6879396438598633, "learning_rate": 6.226576852418861e-06, "log_odds_chosen": 1.232920527458191, "log_odds_ratio": -0.43883106112480164, "logits/chosen": -1.0186481475830078, "logits/rejected": -1.0236146450042725, "logps/chosen": -0.6669376492500305, "logps/rejected": -1.5703692436218262, "loss": 1.1334, "nll_loss": 1.0383586883544922, "rewards/accuracies": 0.75, "rewards/chosen": -0.06669376790523529, "rewards/margins": 0.09034314751625061, "rewards/rejected": -0.1570369154214859, "step": 1840 }, { "epoch": 1.1230745767881654, "grad_norm": 1.9207144975662231, "learning_rate": 6.225597060624617e-06, "log_odds_chosen": 0.38953614234924316, "log_odds_ratio": -0.684097945690155, "logits/chosen": -0.9463201761245728, "logits/rejected": -1.0112642049789429, "logps/chosen": -0.8600882887840271, "logps/rejected": -0.9998921155929565, "loss": 1.159, "nll_loss": 1.060532569885254, "rewards/accuracies": 0.625, "rewards/chosen": -0.08600883185863495, "rewards/margins": 0.013980381190776825, "rewards/rejected": -0.09998922049999237, "step": 1841 }, { "epoch": 1.1236846118651822, "grad_norm": 2.8733010292053223, "learning_rate": 6.224617268830373e-06, "log_odds_chosen": 0.17523089051246643, "log_odds_ratio": -0.8065007925033569, "logits/chosen": -0.9267977476119995, "logits/rejected": -0.8388856649398804, "logps/chosen": -0.9222136735916138, "logps/rejected": -1.1684786081314087, "loss": 1.1131, "nll_loss": 1.0673329830169678, "rewards/accuracies": 0.625, "rewards/chosen": -0.09222136437892914, "rewards/margins": 0.024626493453979492, "rewards/rejected": -0.11684786528348923, "step": 1842 }, { "epoch": 1.1242946469421993, "grad_norm": 3.2052245140075684, "learning_rate": 6.22363747703613e-06, "log_odds_chosen": 1.2686164379119873, "log_odds_ratio": -0.43303626775741577, "logits/chosen": -0.7159256935119629, "logits/rejected": -0.8684356212615967, "logps/chosen": -0.6890186071395874, "logps/rejected": -1.4413973093032837, "loss": 0.9566, "nll_loss": 0.8762117624282837, "rewards/accuracies": 0.75, "rewards/chosen": -0.06890186667442322, "rewards/margins": 0.07523787021636963, "rewards/rejected": -0.14413973689079285, "step": 1843 }, { "epoch": 1.1249046820192161, "grad_norm": 1.0983701944351196, "learning_rate": 6.222657685241886e-06, "log_odds_chosen": 1.158308744430542, "log_odds_ratio": -0.4483598470687866, "logits/chosen": -0.7999579906463623, "logits/rejected": -0.7718486189842224, "logps/chosen": -0.7440356016159058, "logps/rejected": -1.336965560913086, "loss": 0.9865, "nll_loss": 0.9064253568649292, "rewards/accuracies": 0.75, "rewards/chosen": -0.0744035542011261, "rewards/margins": 0.05929301679134369, "rewards/rejected": -0.1336965709924698, "step": 1844 }, { "epoch": 1.125514717096233, "grad_norm": 1.019162893295288, "learning_rate": 6.221677893447643e-06, "log_odds_chosen": 1.2774827480316162, "log_odds_ratio": -0.4350139796733856, "logits/chosen": -1.0356998443603516, "logits/rejected": -1.0643901824951172, "logps/chosen": -0.8528231978416443, "logps/rejected": -1.8118761777877808, "loss": 1.0819, "nll_loss": 0.9489307999610901, "rewards/accuracies": 0.75, "rewards/chosen": -0.0852823257446289, "rewards/margins": 0.09590531140565872, "rewards/rejected": -0.18118762969970703, "step": 1845 }, { "epoch": 1.12612475217325, "grad_norm": 1.086791753768921, "learning_rate": 6.220698101653399e-06, "log_odds_chosen": 0.86105877161026, "log_odds_ratio": -0.43761736154556274, "logits/chosen": -0.8234812021255493, "logits/rejected": -0.8596507906913757, "logps/chosen": -0.7365468144416809, "logps/rejected": -1.2941319942474365, "loss": 0.9377, "nll_loss": 0.7736819982528687, "rewards/accuracies": 0.75, "rewards/chosen": -0.07365468144416809, "rewards/margins": 0.055758509784936905, "rewards/rejected": -0.1294132024049759, "step": 1846 }, { "epoch": 1.1267347872502669, "grad_norm": 1.9047116041183472, "learning_rate": 6.219718309859154e-06, "log_odds_chosen": 0.23096570372581482, "log_odds_ratio": -0.6828559637069702, "logits/chosen": -0.9754705429077148, "logits/rejected": -0.8951206207275391, "logps/chosen": -0.816851019859314, "logps/rejected": -0.9374262094497681, "loss": 1.1136, "nll_loss": 1.0201621055603027, "rewards/accuracies": 0.5, "rewards/chosen": -0.08168510347604752, "rewards/margins": 0.01205751858651638, "rewards/rejected": -0.09374263137578964, "step": 1847 }, { "epoch": 1.1273448223272837, "grad_norm": 1.0700050592422485, "learning_rate": 6.218738518064911e-06, "log_odds_chosen": 1.8273626565933228, "log_odds_ratio": -0.3690497577190399, "logits/chosen": -0.820634126663208, "logits/rejected": -0.8640314340591431, "logps/chosen": -0.5641850233078003, "logps/rejected": -1.8384095430374146, "loss": 0.9517, "nll_loss": 0.9906688928604126, "rewards/accuracies": 0.875, "rewards/chosen": -0.05641850084066391, "rewards/margins": 0.12742246687412262, "rewards/rejected": -0.18384096026420593, "step": 1848 }, { "epoch": 1.1279548574043008, "grad_norm": 2.207866907119751, "learning_rate": 6.217758726270667e-06, "log_odds_chosen": 1.7462255954742432, "log_odds_ratio": -0.2919868230819702, "logits/chosen": -0.9145758152008057, "logits/rejected": -1.0088486671447754, "logps/chosen": -0.507282555103302, "logps/rejected": -1.262885332107544, "loss": 1.1991, "nll_loss": 1.0565526485443115, "rewards/accuracies": 0.875, "rewards/chosen": -0.05072825774550438, "rewards/margins": 0.07556028664112091, "rewards/rejected": -0.1262885332107544, "step": 1849 }, { "epoch": 1.1285648924813176, "grad_norm": 1.695811152458191, "learning_rate": 6.216778934476423e-06, "log_odds_chosen": 0.8593326807022095, "log_odds_ratio": -0.592713475227356, "logits/chosen": -1.0446856021881104, "logits/rejected": -1.0395026206970215, "logps/chosen": -1.0493193864822388, "logps/rejected": -1.647753357887268, "loss": 1.1358, "nll_loss": 1.2093532085418701, "rewards/accuracies": 0.5, "rewards/chosen": -0.10493193566799164, "rewards/margins": 0.05984339863061905, "rewards/rejected": -0.16477534174919128, "step": 1850 }, { "epoch": 1.1291749275583345, "grad_norm": 2.8990299701690674, "learning_rate": 6.21579914268218e-06, "log_odds_chosen": 0.9906715154647827, "log_odds_ratio": -0.5531163811683655, "logits/chosen": -0.8136277198791504, "logits/rejected": -0.9531344175338745, "logps/chosen": -0.9611769318580627, "logps/rejected": -1.4186832904815674, "loss": 1.2184, "nll_loss": 1.2382100820541382, "rewards/accuracies": 0.625, "rewards/chosen": -0.09611769765615463, "rewards/margins": 0.04575064033269882, "rewards/rejected": -0.14186833798885345, "step": 1851 }, { "epoch": 1.1297849626353516, "grad_norm": 2.1059021949768066, "learning_rate": 6.214819350887936e-06, "log_odds_chosen": 0.7207045555114746, "log_odds_ratio": -0.6790372729301453, "logits/chosen": -1.146634578704834, "logits/rejected": -1.183837652206421, "logps/chosen": -1.1953964233398438, "logps/rejected": -1.7666974067687988, "loss": 1.1225, "nll_loss": 1.1185358762741089, "rewards/accuracies": 0.625, "rewards/chosen": -0.11953964829444885, "rewards/margins": 0.057130102068185806, "rewards/rejected": -0.17666974663734436, "step": 1852 }, { "epoch": 1.1303949977123684, "grad_norm": 1.3323465585708618, "learning_rate": 6.213839559093692e-06, "log_odds_chosen": 1.4043726921081543, "log_odds_ratio": -0.4435831606388092, "logits/chosen": -0.870336651802063, "logits/rejected": -0.892004132270813, "logps/chosen": -0.6113113164901733, "logps/rejected": -1.384272813796997, "loss": 1.0245, "nll_loss": 0.7983102202415466, "rewards/accuracies": 0.625, "rewards/chosen": -0.061131127178668976, "rewards/margins": 0.0772961676120758, "rewards/rejected": -0.13842730224132538, "step": 1853 }, { "epoch": 1.1310050327893855, "grad_norm": 1.524830937385559, "learning_rate": 6.212859767299449e-06, "log_odds_chosen": 0.3005915880203247, "log_odds_ratio": -0.7500922679901123, "logits/chosen": -0.8723500370979309, "logits/rejected": -0.8965516090393066, "logps/chosen": -0.8189831972122192, "logps/rejected": -0.9768264293670654, "loss": 1.1448, "nll_loss": 0.9550945162773132, "rewards/accuracies": 0.5, "rewards/chosen": -0.08189832419157028, "rewards/margins": 0.01578432507812977, "rewards/rejected": -0.0976826399564743, "step": 1854 }, { "epoch": 1.1316150678664023, "grad_norm": 1.1056658029556274, "learning_rate": 6.211879975505205e-06, "log_odds_chosen": 1.1538255214691162, "log_odds_ratio": -0.47025710344314575, "logits/chosen": -1.0323371887207031, "logits/rejected": -0.9380830526351929, "logps/chosen": -0.8555911779403687, "logps/rejected": -1.6148040294647217, "loss": 1.0963, "nll_loss": 1.079131007194519, "rewards/accuracies": 0.625, "rewards/chosen": -0.08555911481380463, "rewards/margins": 0.07592128962278366, "rewards/rejected": -0.1614803969860077, "step": 1855 }, { "epoch": 1.1322251029434192, "grad_norm": 1.312881588935852, "learning_rate": 6.210900183710961e-06, "log_odds_chosen": 1.5737824440002441, "log_odds_ratio": -0.3596564531326294, "logits/chosen": -0.6968920230865479, "logits/rejected": -0.8832237720489502, "logps/chosen": -0.8130943775177002, "logps/rejected": -1.915785789489746, "loss": 1.1259, "nll_loss": 1.1195480823516846, "rewards/accuracies": 0.75, "rewards/chosen": -0.08130943775177002, "rewards/margins": 0.11026912182569504, "rewards/rejected": -0.19157856702804565, "step": 1856 }, { "epoch": 1.1328351380204362, "grad_norm": 1.9345999956130981, "learning_rate": 6.209920391916718e-06, "log_odds_chosen": 1.039406180381775, "log_odds_ratio": -0.5078576803207397, "logits/chosen": -0.9165863990783691, "logits/rejected": -0.9322859048843384, "logps/chosen": -0.9594966173171997, "logps/rejected": -1.6766283512115479, "loss": 1.2381, "nll_loss": 1.0512899160385132, "rewards/accuracies": 0.75, "rewards/chosen": -0.0959496721625328, "rewards/margins": 0.0717131569981575, "rewards/rejected": -0.1676628440618515, "step": 1857 }, { "epoch": 1.133445173097453, "grad_norm": 1.9209811687469482, "learning_rate": 6.208940600122473e-06, "log_odds_chosen": 1.040018081665039, "log_odds_ratio": -0.4794102609157562, "logits/chosen": -0.8872625827789307, "logits/rejected": -1.0682522058486938, "logps/chosen": -0.8075264096260071, "logps/rejected": -1.5830051898956299, "loss": 1.1273, "nll_loss": 1.0282517671585083, "rewards/accuracies": 0.875, "rewards/chosen": -0.08075263351202011, "rewards/margins": 0.07754788547754288, "rewards/rejected": -0.158300518989563, "step": 1858 }, { "epoch": 1.1340552081744701, "grad_norm": 2.5558409690856934, "learning_rate": 6.20796080832823e-06, "log_odds_chosen": 0.520755410194397, "log_odds_ratio": -0.6748015880584717, "logits/chosen": -0.9569551944732666, "logits/rejected": -0.8581650257110596, "logps/chosen": -0.9164974093437195, "logps/rejected": -1.3828147649765015, "loss": 1.2289, "nll_loss": 1.0125489234924316, "rewards/accuracies": 0.5, "rewards/chosen": -0.09164974093437195, "rewards/margins": 0.04663173854351044, "rewards/rejected": -0.1382814645767212, "step": 1859 }, { "epoch": 1.134665243251487, "grad_norm": 1.5365885496139526, "learning_rate": 6.206981016533986e-06, "log_odds_chosen": 0.7108677625656128, "log_odds_ratio": -0.6389864683151245, "logits/chosen": -0.8655378818511963, "logits/rejected": -0.887470006942749, "logps/chosen": -0.8786606788635254, "logps/rejected": -1.2916699647903442, "loss": 1.1524, "nll_loss": 0.936873197555542, "rewards/accuracies": 0.375, "rewards/chosen": -0.08786606788635254, "rewards/margins": 0.041300930082798004, "rewards/rejected": -0.12916699051856995, "step": 1860 }, { "epoch": 1.1352752783285038, "grad_norm": 1.9214850664138794, "learning_rate": 6.206001224739742e-06, "log_odds_chosen": 1.9077008962631226, "log_odds_ratio": -0.36409294605255127, "logits/chosen": -0.9413098692893982, "logits/rejected": -1.008734941482544, "logps/chosen": -0.5403475165367126, "logps/rejected": -1.9311285018920898, "loss": 1.0948, "nll_loss": 1.0268590450286865, "rewards/accuracies": 0.875, "rewards/chosen": -0.054034750908613205, "rewards/margins": 0.13907811045646667, "rewards/rejected": -0.19311286509037018, "step": 1861 }, { "epoch": 1.135885313405521, "grad_norm": 1.3495428562164307, "learning_rate": 6.205021432945499e-06, "log_odds_chosen": 1.1134823560714722, "log_odds_ratio": -0.399405300617218, "logits/chosen": -0.9584141969680786, "logits/rejected": -0.9299060702323914, "logps/chosen": -0.839007556438446, "logps/rejected": -1.622859239578247, "loss": 1.1124, "nll_loss": 1.050830364227295, "rewards/accuracies": 0.875, "rewards/chosen": -0.08390074968338013, "rewards/margins": 0.07838518172502518, "rewards/rejected": -0.1622859388589859, "step": 1862 }, { "epoch": 1.1364953484825377, "grad_norm": 1.9673430919647217, "learning_rate": 6.2040416411512555e-06, "log_odds_chosen": 1.353814721107483, "log_odds_ratio": -0.5851765871047974, "logits/chosen": -0.7940864562988281, "logits/rejected": -0.8434898853302002, "logps/chosen": -1.0586892366409302, "logps/rejected": -2.2444005012512207, "loss": 1.3396, "nll_loss": 1.2127701044082642, "rewards/accuracies": 0.625, "rewards/chosen": -0.10586893558502197, "rewards/margins": 0.11857111752033234, "rewards/rejected": -0.2244400531053543, "step": 1863 }, { "epoch": 1.1371053835595546, "grad_norm": 2.273771286010742, "learning_rate": 6.203061849357011e-06, "log_odds_chosen": 0.8680706024169922, "log_odds_ratio": -0.5531262755393982, "logits/chosen": -0.9652572870254517, "logits/rejected": -1.0305250883102417, "logps/chosen": -0.8469842672348022, "logps/rejected": -1.3546435832977295, "loss": 1.2227, "nll_loss": 1.070144772529602, "rewards/accuracies": 0.625, "rewards/chosen": -0.08469842374324799, "rewards/margins": 0.05076592415571213, "rewards/rejected": -0.1354643553495407, "step": 1864 }, { "epoch": 1.1377154186365717, "grad_norm": 1.5365962982177734, "learning_rate": 6.202082057562768e-06, "log_odds_chosen": 1.9989337921142578, "log_odds_ratio": -0.34960898756980896, "logits/chosen": -0.8413887023925781, "logits/rejected": -0.9653310775756836, "logps/chosen": -0.6399502754211426, "logps/rejected": -1.9297090768814087, "loss": 1.0299, "nll_loss": 0.6937770843505859, "rewards/accuracies": 1.0, "rewards/chosen": -0.06399503350257874, "rewards/margins": 0.12897589802742004, "rewards/rejected": -0.19297091662883759, "step": 1865 }, { "epoch": 1.1383254537135885, "grad_norm": 1.7181816101074219, "learning_rate": 6.201102265768524e-06, "log_odds_chosen": 0.4041581451892853, "log_odds_ratio": -0.6238812208175659, "logits/chosen": -0.9944661855697632, "logits/rejected": -1.0694472789764404, "logps/chosen": -0.9866172075271606, "logps/rejected": -1.2466477155685425, "loss": 1.3037, "nll_loss": 1.436673879623413, "rewards/accuracies": 0.375, "rewards/chosen": -0.09866172075271606, "rewards/margins": 0.026003055274486542, "rewards/rejected": -0.12466476857662201, "step": 1866 }, { "epoch": 1.1389354887906054, "grad_norm": 1.9721821546554565, "learning_rate": 6.20012247397428e-06, "log_odds_chosen": 2.0134339332580566, "log_odds_ratio": -0.29123571515083313, "logits/chosen": -0.6119053363800049, "logits/rejected": -0.7249542474746704, "logps/chosen": -0.604301929473877, "logps/rejected": -1.9464223384857178, "loss": 0.9689, "nll_loss": 0.8665163516998291, "rewards/accuracies": 0.75, "rewards/chosen": -0.060430191457271576, "rewards/margins": 0.13421204686164856, "rewards/rejected": -0.19464224576950073, "step": 1867 }, { "epoch": 1.1395455238676224, "grad_norm": 1.4373723268508911, "learning_rate": 6.199142682180037e-06, "log_odds_chosen": 0.6263667345046997, "log_odds_ratio": -0.465983122587204, "logits/chosen": -0.9582157135009766, "logits/rejected": -0.8746200799942017, "logps/chosen": -1.0475481748580933, "logps/rejected": -1.504184603691101, "loss": 1.0845, "nll_loss": 1.1178655624389648, "rewards/accuracies": 0.875, "rewards/chosen": -0.10475482791662216, "rewards/margins": 0.045663632452487946, "rewards/rejected": -0.1504184752702713, "step": 1868 }, { "epoch": 1.1401555589446393, "grad_norm": 1.2351188659667969, "learning_rate": 6.198162890385792e-06, "log_odds_chosen": 0.1456688642501831, "log_odds_ratio": -0.7436883449554443, "logits/chosen": -1.1288871765136719, "logits/rejected": -0.9932481646537781, "logps/chosen": -1.0026049613952637, "logps/rejected": -1.123643398284912, "loss": 1.1137, "nll_loss": 1.1304811239242554, "rewards/accuracies": 0.375, "rewards/chosen": -0.10026049613952637, "rewards/margins": 0.012103849090635777, "rewards/rejected": -0.11236433684825897, "step": 1869 }, { "epoch": 1.1407655940216563, "grad_norm": 1.5069020986557007, "learning_rate": 6.197183098591548e-06, "log_odds_chosen": 1.6599667072296143, "log_odds_ratio": -0.34425869584083557, "logits/chosen": -0.8473042249679565, "logits/rejected": -0.7732916474342346, "logps/chosen": -0.5966208577156067, "logps/rejected": -1.700257420539856, "loss": 1.0446, "nll_loss": 0.9037070274353027, "rewards/accuracies": 0.875, "rewards/chosen": -0.05966208875179291, "rewards/margins": 0.11036364734172821, "rewards/rejected": -0.17002573609352112, "step": 1870 }, { "epoch": 1.1413756290986732, "grad_norm": 1.029371738433838, "learning_rate": 6.196203306797305e-06, "log_odds_chosen": 0.5889120101928711, "log_odds_ratio": -0.6302324533462524, "logits/chosen": -1.1248784065246582, "logits/rejected": -0.9952324628829956, "logps/chosen": -0.8437801003456116, "logps/rejected": -1.159134864807129, "loss": 1.0443, "nll_loss": 1.1711654663085938, "rewards/accuracies": 0.375, "rewards/chosen": -0.08437801152467728, "rewards/margins": 0.03153548017144203, "rewards/rejected": -0.11591348797082901, "step": 1871 }, { "epoch": 1.14198566417569, "grad_norm": 4.474699020385742, "learning_rate": 6.1952235150030615e-06, "log_odds_chosen": 1.6570382118225098, "log_odds_ratio": -0.46609288454055786, "logits/chosen": -0.8319189548492432, "logits/rejected": -0.9470818042755127, "logps/chosen": -0.8631861209869385, "logps/rejected": -2.0092315673828125, "loss": 1.0204, "nll_loss": 1.0241082906723022, "rewards/accuracies": 0.5, "rewards/chosen": -0.08631862699985504, "rewards/margins": 0.11460451781749725, "rewards/rejected": -0.2009231448173523, "step": 1872 }, { "epoch": 1.142595699252707, "grad_norm": 3.9155313968658447, "learning_rate": 6.194243723208818e-06, "log_odds_chosen": 1.5229874849319458, "log_odds_ratio": -0.4485635757446289, "logits/chosen": -0.946499764919281, "logits/rejected": -0.9044427871704102, "logps/chosen": -0.8476817607879639, "logps/rejected": -2.0384178161621094, "loss": 1.1508, "nll_loss": 0.9983426928520203, "rewards/accuracies": 0.75, "rewards/chosen": -0.08476817607879639, "rewards/margins": 0.11907360702753067, "rewards/rejected": -0.20384179055690765, "step": 1873 }, { "epoch": 1.143205734329724, "grad_norm": 2.3262429237365723, "learning_rate": 6.1932639314145745e-06, "log_odds_chosen": 0.7805324792861938, "log_odds_ratio": -0.7241466045379639, "logits/chosen": -0.9833155870437622, "logits/rejected": -1.0178883075714111, "logps/chosen": -0.7775952816009521, "logps/rejected": -1.2405163049697876, "loss": 1.0019, "nll_loss": 0.8849153518676758, "rewards/accuracies": 0.375, "rewards/chosen": -0.07775953412055969, "rewards/margins": 0.046292103826999664, "rewards/rejected": -0.12405163794755936, "step": 1874 }, { "epoch": 1.143815769406741, "grad_norm": 1.5591329336166382, "learning_rate": 6.19228413962033e-06, "log_odds_chosen": 0.5544247031211853, "log_odds_ratio": -0.5546396374702454, "logits/chosen": -1.0288503170013428, "logits/rejected": -0.9164700508117676, "logps/chosen": -0.886873722076416, "logps/rejected": -1.2386418581008911, "loss": 1.064, "nll_loss": 0.9624952673912048, "rewards/accuracies": 0.75, "rewards/chosen": -0.08868738263845444, "rewards/margins": 0.03517680987715721, "rewards/rejected": -0.12386419624090195, "step": 1875 }, { "epoch": 1.1444258044837579, "grad_norm": 1.6381213665008545, "learning_rate": 6.191304347826087e-06, "log_odds_chosen": 0.35653799772262573, "log_odds_ratio": -0.8136720657348633, "logits/chosen": -1.1617536544799805, "logits/rejected": -1.009071946144104, "logps/chosen": -1.3393131494522095, "logps/rejected": -1.653497576713562, "loss": 1.2758, "nll_loss": 1.4404219388961792, "rewards/accuracies": 0.375, "rewards/chosen": -0.13393130898475647, "rewards/margins": 0.03141844645142555, "rewards/rejected": -0.16534975171089172, "step": 1876 }, { "epoch": 1.1450358395607747, "grad_norm": 1.8710362911224365, "learning_rate": 6.190324556031843e-06, "log_odds_chosen": 0.7757555842399597, "log_odds_ratio": -0.5130202174186707, "logits/chosen": -0.8493529558181763, "logits/rejected": -1.0175596475601196, "logps/chosen": -0.7410321235656738, "logps/rejected": -1.2450919151306152, "loss": 1.137, "nll_loss": 1.0326722860336304, "rewards/accuracies": 0.5, "rewards/chosen": -0.0741032063961029, "rewards/margins": 0.05040598288178444, "rewards/rejected": -0.12450919300317764, "step": 1877 }, { "epoch": 1.1456458746377916, "grad_norm": 2.1155197620391846, "learning_rate": 6.189344764237599e-06, "log_odds_chosen": 1.7878849506378174, "log_odds_ratio": -0.4713693857192993, "logits/chosen": -0.9456501603126526, "logits/rejected": -1.0148061513900757, "logps/chosen": -0.8652978539466858, "logps/rejected": -2.252124786376953, "loss": 1.1622, "nll_loss": 0.9461432695388794, "rewards/accuracies": 0.625, "rewards/chosen": -0.08652978390455246, "rewards/margins": 0.13868270814418793, "rewards/rejected": -0.22521249949932098, "step": 1878 }, { "epoch": 1.1462559097148086, "grad_norm": 1.8521877527236938, "learning_rate": 6.188364972443356e-06, "log_odds_chosen": 0.5860990285873413, "log_odds_ratio": -0.7061346769332886, "logits/chosen": -0.9570705890655518, "logits/rejected": -1.117529273033142, "logps/chosen": -0.9950999617576599, "logps/rejected": -1.434046745300293, "loss": 1.205, "nll_loss": 1.2423752546310425, "rewards/accuracies": 0.375, "rewards/chosen": -0.09950999915599823, "rewards/margins": 0.043894682079553604, "rewards/rejected": -0.14340466260910034, "step": 1879 }, { "epoch": 1.1468659447918255, "grad_norm": 9.279607772827148, "learning_rate": 6.187385180649112e-06, "log_odds_chosen": 1.519971251487732, "log_odds_ratio": -0.37854236364364624, "logits/chosen": -0.9796421527862549, "logits/rejected": -1.169400930404663, "logps/chosen": -0.7197120189666748, "logps/rejected": -1.824576497077942, "loss": 1.1789, "nll_loss": 1.0751789808273315, "rewards/accuracies": 0.875, "rewards/chosen": -0.07197120040655136, "rewards/margins": 0.11048644781112671, "rewards/rejected": -0.18245765566825867, "step": 1880 }, { "epoch": 1.1474759798688425, "grad_norm": 6.074314117431641, "learning_rate": 6.186405388854867e-06, "log_odds_chosen": 1.0037450790405273, "log_odds_ratio": -0.4656254053115845, "logits/chosen": -1.1089000701904297, "logits/rejected": -1.0908305644989014, "logps/chosen": -0.9436542391777039, "logps/rejected": -1.703993558883667, "loss": 1.224, "nll_loss": 1.3239285945892334, "rewards/accuracies": 0.625, "rewards/chosen": -0.0943654254078865, "rewards/margins": 0.07603394240140915, "rewards/rejected": -0.17039936780929565, "step": 1881 }, { "epoch": 1.1480860149458594, "grad_norm": 1.4783776998519897, "learning_rate": 6.185425597060624e-06, "log_odds_chosen": 1.129721999168396, "log_odds_ratio": -0.5107484459877014, "logits/chosen": -0.9028922915458679, "logits/rejected": -0.8726842999458313, "logps/chosen": -0.7584066390991211, "logps/rejected": -1.5537830591201782, "loss": 1.089, "nll_loss": 1.035994052886963, "rewards/accuracies": 0.75, "rewards/chosen": -0.07584066689014435, "rewards/margins": 0.07953764498233795, "rewards/rejected": -0.1553783118724823, "step": 1882 }, { "epoch": 1.1486960500228762, "grad_norm": 1.9197344779968262, "learning_rate": 6.1844458052663805e-06, "log_odds_chosen": 1.7450882196426392, "log_odds_ratio": -0.28286778926849365, "logits/chosen": -0.7957932353019714, "logits/rejected": -0.89167320728302, "logps/chosen": -0.8077200651168823, "logps/rejected": -2.0506253242492676, "loss": 0.9697, "nll_loss": 0.9025224447250366, "rewards/accuracies": 1.0, "rewards/chosen": -0.08077201247215271, "rewards/margins": 0.12429053336381912, "rewards/rejected": -0.20506253838539124, "step": 1883 }, { "epoch": 1.1493060850998933, "grad_norm": 2.092971086502075, "learning_rate": 6.183466013472137e-06, "log_odds_chosen": 0.19126102328300476, "log_odds_ratio": -0.6754552125930786, "logits/chosen": -0.9990147948265076, "logits/rejected": -1.0397837162017822, "logps/chosen": -1.0396873950958252, "logps/rejected": -1.177559733390808, "loss": 1.2169, "nll_loss": 1.1969659328460693, "rewards/accuracies": 0.5, "rewards/chosen": -0.10396874696016312, "rewards/margins": 0.013787232339382172, "rewards/rejected": -0.11775597929954529, "step": 1884 }, { "epoch": 1.1499161201769101, "grad_norm": 4.440672874450684, "learning_rate": 6.182486221677894e-06, "log_odds_chosen": 1.7823928594589233, "log_odds_ratio": -0.3295990526676178, "logits/chosen": -0.8213328719139099, "logits/rejected": -0.8702632188796997, "logps/chosen": -0.6516637802124023, "logps/rejected": -1.9099030494689941, "loss": 1.0656, "nll_loss": 0.880578875541687, "rewards/accuracies": 0.75, "rewards/chosen": -0.06516637653112411, "rewards/margins": 0.12582392990589142, "rewards/rejected": -0.19099032878875732, "step": 1885 }, { "epoch": 1.1505261552539272, "grad_norm": 2.607987880706787, "learning_rate": 6.181506429883649e-06, "log_odds_chosen": 1.6010125875473022, "log_odds_ratio": -0.47251781821250916, "logits/chosen": -0.8214245438575745, "logits/rejected": -0.8215417861938477, "logps/chosen": -0.6150713562965393, "logps/rejected": -1.8352166414260864, "loss": 1.1742, "nll_loss": 1.0368905067443848, "rewards/accuracies": 0.75, "rewards/chosen": -0.06150713935494423, "rewards/margins": 0.12201452255249023, "rewards/rejected": -0.18352165818214417, "step": 1886 }, { "epoch": 1.151136190330944, "grad_norm": 2.1298601627349854, "learning_rate": 6.180526638089406e-06, "log_odds_chosen": 0.9784334897994995, "log_odds_ratio": -0.4783822000026703, "logits/chosen": -0.8418402075767517, "logits/rejected": -0.7941498756408691, "logps/chosen": -0.8354650735855103, "logps/rejected": -1.5387604236602783, "loss": 1.2463, "nll_loss": 1.0166915655136108, "rewards/accuracies": 0.625, "rewards/chosen": -0.08354651182889938, "rewards/margins": 0.07032953947782516, "rewards/rejected": -0.15387605130672455, "step": 1887 }, { "epoch": 1.151746225407961, "grad_norm": 1.508378505706787, "learning_rate": 6.179546846295162e-06, "log_odds_chosen": 0.32964545488357544, "log_odds_ratio": -0.6308181285858154, "logits/chosen": -0.945001482963562, "logits/rejected": -1.0403510332107544, "logps/chosen": -1.2874683141708374, "logps/rejected": -1.4768226146697998, "loss": 1.3236, "nll_loss": 1.415224313735962, "rewards/accuracies": 0.75, "rewards/chosen": -0.12874683737754822, "rewards/margins": 0.01893543265759945, "rewards/rejected": -0.1476822793483734, "step": 1888 }, { "epoch": 1.152356260484978, "grad_norm": 3.5501699447631836, "learning_rate": 6.178567054500918e-06, "log_odds_chosen": 0.428668737411499, "log_odds_ratio": -0.6906299591064453, "logits/chosen": -1.126889705657959, "logits/rejected": -0.9135847091674805, "logps/chosen": -0.8690334558486938, "logps/rejected": -1.2317818403244019, "loss": 1.0139, "nll_loss": 1.0983538627624512, "rewards/accuracies": 0.375, "rewards/chosen": -0.08690334856510162, "rewards/margins": 0.03627484664320946, "rewards/rejected": -0.12317819148302078, "step": 1889 }, { "epoch": 1.1529662955619948, "grad_norm": 1.198387861251831, "learning_rate": 6.177587262706675e-06, "log_odds_chosen": 1.6851338148117065, "log_odds_ratio": -0.4745844304561615, "logits/chosen": -0.7474543452262878, "logits/rejected": -0.7712869644165039, "logps/chosen": -0.8228708505630493, "logps/rejected": -2.122950792312622, "loss": 0.9772, "nll_loss": 1.01353919506073, "rewards/accuracies": 0.75, "rewards/chosen": -0.08228708058595657, "rewards/margins": 0.13000799715518951, "rewards/rejected": -0.21229508519172668, "step": 1890 }, { "epoch": 1.1535763306390117, "grad_norm": 1.872409462928772, "learning_rate": 6.176607470912431e-06, "log_odds_chosen": 0.2724132239818573, "log_odds_ratio": -0.6368765234947205, "logits/chosen": -1.0119744539260864, "logits/rejected": -0.7987739443778992, "logps/chosen": -0.9886149764060974, "logps/rejected": -1.1493197679519653, "loss": 1.0933, "nll_loss": 1.1140917539596558, "rewards/accuracies": 0.625, "rewards/chosen": -0.09886150062084198, "rewards/margins": 0.01607048511505127, "rewards/rejected": -0.11493197828531265, "step": 1891 }, { "epoch": 1.1541863657160287, "grad_norm": 1.7941319942474365, "learning_rate": 6.1756276791181864e-06, "log_odds_chosen": 2.7145044803619385, "log_odds_ratio": -0.2104458212852478, "logits/chosen": -0.6677671670913696, "logits/rejected": -0.8947114944458008, "logps/chosen": -0.6263543367385864, "logps/rejected": -2.511397361755371, "loss": 1.0811, "nll_loss": 0.7706609964370728, "rewards/accuracies": 1.0, "rewards/chosen": -0.06263543665409088, "rewards/margins": 0.18850427865982056, "rewards/rejected": -0.25113973021507263, "step": 1892 }, { "epoch": 1.1547964007930456, "grad_norm": 1.3201128244400024, "learning_rate": 6.174647887323943e-06, "log_odds_chosen": 0.6144051551818848, "log_odds_ratio": -0.5032006502151489, "logits/chosen": -0.48715880513191223, "logits/rejected": -0.6156313419342041, "logps/chosen": -0.7178645133972168, "logps/rejected": -0.9895616173744202, "loss": 1.0223, "nll_loss": 0.6833261847496033, "rewards/accuracies": 0.75, "rewards/chosen": -0.07178644835948944, "rewards/margins": 0.027169711887836456, "rewards/rejected": -0.0989561527967453, "step": 1893 }, { "epoch": 1.1554064358700624, "grad_norm": 2.1249778270721436, "learning_rate": 6.1736680955296995e-06, "log_odds_chosen": 0.6619554758071899, "log_odds_ratio": -0.5376853346824646, "logits/chosen": -0.8996084928512573, "logits/rejected": -0.7938793897628784, "logps/chosen": -0.8608701229095459, "logps/rejected": -1.2759701013565063, "loss": 1.3136, "nll_loss": 1.1110392808914185, "rewards/accuracies": 0.75, "rewards/chosen": -0.08608700335025787, "rewards/margins": 0.041510000824928284, "rewards/rejected": -0.12759701907634735, "step": 1894 }, { "epoch": 1.1560164709470795, "grad_norm": 1.234594464302063, "learning_rate": 6.172688303735456e-06, "log_odds_chosen": 0.6094122529029846, "log_odds_ratio": -0.5079977512359619, "logits/chosen": -0.9286766052246094, "logits/rejected": -0.8266024589538574, "logps/chosen": -0.6569213271141052, "logps/rejected": -0.9102300405502319, "loss": 1.2875, "nll_loss": 1.0827414989471436, "rewards/accuracies": 0.625, "rewards/chosen": -0.06569212675094604, "rewards/margins": 0.025330867618322372, "rewards/rejected": -0.09102300554513931, "step": 1895 }, { "epoch": 1.1566265060240963, "grad_norm": 3.5940911769866943, "learning_rate": 6.171708511941213e-06, "log_odds_chosen": 1.8159606456756592, "log_odds_ratio": -0.4187665581703186, "logits/chosen": -0.7341688871383667, "logits/rejected": -0.7011659145355225, "logps/chosen": -0.7752352356910706, "logps/rejected": -2.1800413131713867, "loss": 1.0069, "nll_loss": 0.9706186056137085, "rewards/accuracies": 0.75, "rewards/chosen": -0.07752352207899094, "rewards/margins": 0.14048060774803162, "rewards/rejected": -0.21800413727760315, "step": 1896 }, { "epoch": 1.1572365411011134, "grad_norm": 3.513883352279663, "learning_rate": 6.170728720146969e-06, "log_odds_chosen": 1.1831560134887695, "log_odds_ratio": -0.5777862071990967, "logits/chosen": -0.936644434928894, "logits/rejected": -1.0874804258346558, "logps/chosen": -0.922227144241333, "logps/rejected": -2.0144901275634766, "loss": 1.1393, "nll_loss": 1.193086862564087, "rewards/accuracies": 0.75, "rewards/chosen": -0.09222272038459778, "rewards/margins": 0.10922630876302719, "rewards/rejected": -0.20144902169704437, "step": 1897 }, { "epoch": 1.1578465761781302, "grad_norm": 1.069838285446167, "learning_rate": 6.169748928352724e-06, "log_odds_chosen": 0.8015347719192505, "log_odds_ratio": -0.49637776613235474, "logits/chosen": -1.0325591564178467, "logits/rejected": -1.1352123022079468, "logps/chosen": -0.7599188089370728, "logps/rejected": -1.2120007276535034, "loss": 1.044, "nll_loss": 1.1138887405395508, "rewards/accuracies": 0.625, "rewards/chosen": -0.07599188387393951, "rewards/margins": 0.045208193361759186, "rewards/rejected": -0.1212000697851181, "step": 1898 }, { "epoch": 1.158456611255147, "grad_norm": 2.051766872406006, "learning_rate": 6.168769136558481e-06, "log_odds_chosen": 2.31754469871521, "log_odds_ratio": -0.20775151252746582, "logits/chosen": -0.8784830570220947, "logits/rejected": -1.0388380289077759, "logps/chosen": -0.5573232769966125, "logps/rejected": -2.1912477016448975, "loss": 1.1331, "nll_loss": 0.9474217891693115, "rewards/accuracies": 1.0, "rewards/chosen": -0.05573233217000961, "rewards/margins": 0.16339245438575745, "rewards/rejected": -0.21912477910518646, "step": 1899 }, { "epoch": 1.1590666463321642, "grad_norm": 7.866484642028809, "learning_rate": 6.167789344764237e-06, "log_odds_chosen": 1.8350121974945068, "log_odds_ratio": -0.37884286046028137, "logits/chosen": -0.7978610992431641, "logits/rejected": -0.8353217840194702, "logps/chosen": -0.6245733499526978, "logps/rejected": -2.0127689838409424, "loss": 0.9696, "nll_loss": 0.911444365978241, "rewards/accuracies": 0.75, "rewards/chosen": -0.062457334250211716, "rewards/margins": 0.1388195902109146, "rewards/rejected": -0.20127692818641663, "step": 1900 }, { "epoch": 1.159676681409181, "grad_norm": 1.2057222127914429, "learning_rate": 6.166809552969994e-06, "log_odds_chosen": 2.160489559173584, "log_odds_ratio": -0.26412248611450195, "logits/chosen": -0.8016148805618286, "logits/rejected": -0.7774183750152588, "logps/chosen": -0.8268200755119324, "logps/rejected": -2.56196928024292, "loss": 1.0524, "nll_loss": 0.9533221125602722, "rewards/accuracies": 1.0, "rewards/chosen": -0.08268201351165771, "rewards/margins": 0.17351491749286652, "rewards/rejected": -0.25619691610336304, "step": 1901 }, { "epoch": 1.160286716486198, "grad_norm": 1.1584596633911133, "learning_rate": 6.16582976117575e-06, "log_odds_chosen": 0.7757169008255005, "log_odds_ratio": -0.5677451491355896, "logits/chosen": -0.9012803435325623, "logits/rejected": -0.9021557569503784, "logps/chosen": -0.7155272960662842, "logps/rejected": -1.252291202545166, "loss": 0.9546, "nll_loss": 0.8719240427017212, "rewards/accuracies": 0.75, "rewards/chosen": -0.07155272364616394, "rewards/margins": 0.053676407784223557, "rewards/rejected": -0.1252291351556778, "step": 1902 }, { "epoch": 1.160896751563215, "grad_norm": 2.011111259460449, "learning_rate": 6.164849969381506e-06, "log_odds_chosen": 0.8400827646255493, "log_odds_ratio": -0.5591062307357788, "logits/chosen": -0.9015997052192688, "logits/rejected": -0.6930168271064758, "logps/chosen": -0.9031659364700317, "logps/rejected": -1.3899331092834473, "loss": 1.2709, "nll_loss": 1.1178019046783447, "rewards/accuracies": 0.625, "rewards/chosen": -0.09031659364700317, "rewards/margins": 0.048676714301109314, "rewards/rejected": -0.1389933079481125, "step": 1903 }, { "epoch": 1.1615067866402318, "grad_norm": 1.4818962812423706, "learning_rate": 6.1638701775872624e-06, "log_odds_chosen": -0.003874644637107849, "log_odds_ratio": -0.8828010559082031, "logits/chosen": -0.9960297346115112, "logits/rejected": -1.0441087484359741, "logps/chosen": -1.0786765813827515, "logps/rejected": -1.167341709136963, "loss": 1.1814, "nll_loss": 1.4482567310333252, "rewards/accuracies": 0.375, "rewards/chosen": -0.10786765813827515, "rewards/margins": 0.008866516873240471, "rewards/rejected": -0.11673416197299957, "step": 1904 }, { "epoch": 1.1621168217172488, "grad_norm": 2.297999620437622, "learning_rate": 6.1628903857930186e-06, "log_odds_chosen": 1.0540745258331299, "log_odds_ratio": -0.4542146921157837, "logits/chosen": -0.8359436988830566, "logits/rejected": -0.7071722745895386, "logps/chosen": -0.8140203356742859, "logps/rejected": -1.5598843097686768, "loss": 1.1292, "nll_loss": 1.0254648923873901, "rewards/accuracies": 0.875, "rewards/chosen": -0.08140204101800919, "rewards/margins": 0.0745863988995552, "rewards/rejected": -0.1559884399175644, "step": 1905 }, { "epoch": 1.1627268567942657, "grad_norm": 1.7503457069396973, "learning_rate": 6.161910593998775e-06, "log_odds_chosen": 0.5622363686561584, "log_odds_ratio": -0.7114524841308594, "logits/chosen": -1.027565836906433, "logits/rejected": -0.8905953764915466, "logps/chosen": -0.9115147590637207, "logps/rejected": -1.4184433221817017, "loss": 1.0435, "nll_loss": 1.2425119876861572, "rewards/accuracies": 0.5, "rewards/chosen": -0.09115147590637207, "rewards/margins": 0.0506928525865078, "rewards/rejected": -0.14184433221817017, "step": 1906 }, { "epoch": 1.1633368918712825, "grad_norm": 1.2640544176101685, "learning_rate": 6.160930802204532e-06, "log_odds_chosen": 3.258756160736084, "log_odds_ratio": -0.16616445779800415, "logits/chosen": -0.723797082901001, "logits/rejected": -0.8213954567909241, "logps/chosen": -0.49804723262786865, "logps/rejected": -2.9387259483337402, "loss": 1.1388, "nll_loss": 0.7390480041503906, "rewards/accuracies": 1.0, "rewards/chosen": -0.049804724752902985, "rewards/margins": 0.24406787753105164, "rewards/rejected": -0.293872594833374, "step": 1907 }, { "epoch": 1.1639469269482996, "grad_norm": 1.9485423564910889, "learning_rate": 6.159951010410288e-06, "log_odds_chosen": 0.6532278060913086, "log_odds_ratio": -0.6790818572044373, "logits/chosen": -0.7044081687927246, "logits/rejected": -0.9030882716178894, "logps/chosen": -0.8153014183044434, "logps/rejected": -1.3425061702728271, "loss": 1.3586, "nll_loss": 0.9460065364837646, "rewards/accuracies": 0.5, "rewards/chosen": -0.0815301388502121, "rewards/margins": 0.05272047221660614, "rewards/rejected": -0.13425061106681824, "step": 1908 }, { "epoch": 1.1645569620253164, "grad_norm": 4.106560707092285, "learning_rate": 6.158971218616043e-06, "log_odds_chosen": 0.4534391760826111, "log_odds_ratio": -0.5332414507865906, "logits/chosen": -0.8352651000022888, "logits/rejected": -0.8289226293563843, "logps/chosen": -0.7352844476699829, "logps/rejected": -0.9996950626373291, "loss": 1.1088, "nll_loss": 1.022517442703247, "rewards/accuracies": 0.625, "rewards/chosen": -0.07352843880653381, "rewards/margins": 0.02644106186926365, "rewards/rejected": -0.09996950626373291, "step": 1909 }, { "epoch": 1.1651669971023333, "grad_norm": 1.6465097665786743, "learning_rate": 6.1579914268218e-06, "log_odds_chosen": 1.3068652153015137, "log_odds_ratio": -0.419045090675354, "logits/chosen": -0.8089885115623474, "logits/rejected": -0.8690946102142334, "logps/chosen": -0.8168615102767944, "logps/rejected": -1.638056993484497, "loss": 1.148, "nll_loss": 1.092693567276001, "rewards/accuracies": 0.875, "rewards/chosen": -0.08168614655733109, "rewards/margins": 0.08211953938007355, "rewards/rejected": -0.16380569338798523, "step": 1910 }, { "epoch": 1.1657770321793504, "grad_norm": 1.3261284828186035, "learning_rate": 6.157011635027556e-06, "log_odds_chosen": 1.3209524154663086, "log_odds_ratio": -0.43927648663520813, "logits/chosen": -0.7995851039886475, "logits/rejected": -0.9726670980453491, "logps/chosen": -0.8001620173454285, "logps/rejected": -1.5799167156219482, "loss": 1.0735, "nll_loss": 0.9560781717300415, "rewards/accuracies": 0.625, "rewards/chosen": -0.08001620322465897, "rewards/margins": 0.07797548174858093, "rewards/rejected": -0.1579916775226593, "step": 1911 }, { "epoch": 1.1663870672563672, "grad_norm": 1.1767388582229614, "learning_rate": 6.156031843233313e-06, "log_odds_chosen": 1.2300820350646973, "log_odds_ratio": -0.4835177958011627, "logits/chosen": -1.078589677810669, "logits/rejected": -1.0506423711776733, "logps/chosen": -0.6394737958908081, "logps/rejected": -1.5293591022491455, "loss": 1.0767, "nll_loss": 0.9010640978813171, "rewards/accuracies": 0.625, "rewards/chosen": -0.06394738703966141, "rewards/margins": 0.08898851275444031, "rewards/rejected": -0.1529359072446823, "step": 1912 }, { "epoch": 1.1669971023333843, "grad_norm": 1.9094674587249756, "learning_rate": 6.155052051439069e-06, "log_odds_chosen": -0.035673707723617554, "log_odds_ratio": -0.7432850003242493, "logits/chosen": -1.107018232345581, "logits/rejected": -1.0750035047531128, "logps/chosen": -1.1238279342651367, "logps/rejected": -1.0874011516571045, "loss": 1.1573, "nll_loss": 1.3147058486938477, "rewards/accuracies": 0.375, "rewards/chosen": -0.11238278448581696, "rewards/margins": -0.003642681520432234, "rewards/rejected": -0.10874010622501373, "step": 1913 }, { "epoch": 1.1676071374104011, "grad_norm": 1.2199220657348633, "learning_rate": 6.154072259644825e-06, "log_odds_chosen": 0.499519407749176, "log_odds_ratio": -0.5575780272483826, "logits/chosen": -0.8435899615287781, "logits/rejected": -0.9705110788345337, "logps/chosen": -0.9126136898994446, "logps/rejected": -1.319682002067566, "loss": 1.1259, "nll_loss": 1.1081459522247314, "rewards/accuracies": 0.625, "rewards/chosen": -0.0912613794207573, "rewards/margins": 0.0407068245112896, "rewards/rejected": -0.1319682002067566, "step": 1914 }, { "epoch": 1.168217172487418, "grad_norm": 1.2594363689422607, "learning_rate": 6.1530924678505815e-06, "log_odds_chosen": 1.4786286354064941, "log_odds_ratio": -0.34108781814575195, "logits/chosen": -0.8854197263717651, "logits/rejected": -1.061411738395691, "logps/chosen": -0.697411060333252, "logps/rejected": -1.6518886089324951, "loss": 1.096, "nll_loss": 0.9234029054641724, "rewards/accuracies": 0.875, "rewards/chosen": -0.06974110752344131, "rewards/margins": 0.09544776380062103, "rewards/rejected": -0.16518887877464294, "step": 1915 }, { "epoch": 1.168827207564435, "grad_norm": 1.6560332775115967, "learning_rate": 6.152112676056338e-06, "log_odds_chosen": 0.2505122721195221, "log_odds_ratio": -0.6586001515388489, "logits/chosen": -0.6978837251663208, "logits/rejected": -0.6807445287704468, "logps/chosen": -0.9055915474891663, "logps/rejected": -0.9909784197807312, "loss": 1.1489, "nll_loss": 1.1241217851638794, "rewards/accuracies": 0.5, "rewards/chosen": -0.09055916219949722, "rewards/margins": 0.008538685739040375, "rewards/rejected": -0.099097840487957, "step": 1916 }, { "epoch": 1.1694372426414519, "grad_norm": 1.5124452114105225, "learning_rate": 6.151132884262094e-06, "log_odds_chosen": 1.1491678953170776, "log_odds_ratio": -0.3976607918739319, "logits/chosen": -0.8102552890777588, "logits/rejected": -0.8365095257759094, "logps/chosen": -0.7600656151771545, "logps/rejected": -1.5647461414337158, "loss": 1.0353, "nll_loss": 1.0286318063735962, "rewards/accuracies": 0.875, "rewards/chosen": -0.07600656151771545, "rewards/margins": 0.0804680585861206, "rewards/rejected": -0.15647462010383606, "step": 1917 }, { "epoch": 1.1700472777184687, "grad_norm": 1.595719337463379, "learning_rate": 6.150153092467851e-06, "log_odds_chosen": 0.3079521656036377, "log_odds_ratio": -0.6058942079544067, "logits/chosen": -1.0470235347747803, "logits/rejected": -0.9846833944320679, "logps/chosen": -0.9990240335464478, "logps/rejected": -1.2126682996749878, "loss": 1.2606, "nll_loss": 1.2194840908050537, "rewards/accuracies": 0.75, "rewards/chosen": -0.09990240633487701, "rewards/margins": 0.021364422515034676, "rewards/rejected": -0.12126682698726654, "step": 1918 }, { "epoch": 1.1706573127954858, "grad_norm": 1.0516843795776367, "learning_rate": 6.149173300673607e-06, "log_odds_chosen": 2.369492769241333, "log_odds_ratio": -0.16003091633319855, "logits/chosen": -0.9285350441932678, "logits/rejected": -0.8798165917396545, "logps/chosen": -0.6502379775047302, "logps/rejected": -2.30570387840271, "loss": 1.2079, "nll_loss": 0.9468501806259155, "rewards/accuracies": 1.0, "rewards/chosen": -0.06502380222082138, "rewards/margins": 0.16554658114910126, "rewards/rejected": -0.23057037591934204, "step": 1919 }, { "epoch": 1.1712673478725026, "grad_norm": 1.1913866996765137, "learning_rate": 6.148193508879363e-06, "log_odds_chosen": 0.442192405462265, "log_odds_ratio": -0.7067618370056152, "logits/chosen": -0.9396578073501587, "logits/rejected": -0.9842542409896851, "logps/chosen": -0.9055806994438171, "logps/rejected": -1.2064776420593262, "loss": 1.0306, "nll_loss": 0.9714670181274414, "rewards/accuracies": 0.5, "rewards/chosen": -0.09055806696414948, "rewards/margins": 0.03008970059454441, "rewards/rejected": -0.12064777314662933, "step": 1920 }, { "epoch": 1.1718773829495195, "grad_norm": 1.338685393333435, "learning_rate": 6.147213717085119e-06, "log_odds_chosen": 2.0609304904937744, "log_odds_ratio": -0.24977312982082367, "logits/chosen": -0.7663528919219971, "logits/rejected": -0.8532339334487915, "logps/chosen": -0.6304246187210083, "logps/rejected": -2.0901362895965576, "loss": 0.9661, "nll_loss": 0.804653525352478, "rewards/accuracies": 0.875, "rewards/chosen": -0.06304246932268143, "rewards/margins": 0.1459711492061615, "rewards/rejected": -0.20901362597942352, "step": 1921 }, { "epoch": 1.1724874180265366, "grad_norm": 1.5769727230072021, "learning_rate": 6.146233925290875e-06, "log_odds_chosen": 1.288865566253662, "log_odds_ratio": -0.5080938935279846, "logits/chosen": -0.935179591178894, "logits/rejected": -1.005270004272461, "logps/chosen": -0.8704359531402588, "logps/rejected": -1.789244532585144, "loss": 1.3843, "nll_loss": 1.0853288173675537, "rewards/accuracies": 0.75, "rewards/chosen": -0.08704359829425812, "rewards/margins": 0.09188086539506912, "rewards/rejected": -0.17892447113990784, "step": 1922 }, { "epoch": 1.1730974531035534, "grad_norm": 1.7906203269958496, "learning_rate": 6.145254133496631e-06, "log_odds_chosen": 0.9621886014938354, "log_odds_ratio": -0.5228516459465027, "logits/chosen": -0.8007124662399292, "logits/rejected": -0.9727472066879272, "logps/chosen": -0.6868284344673157, "logps/rejected": -1.2352091073989868, "loss": 1.1403, "nll_loss": 0.8448562622070312, "rewards/accuracies": 0.75, "rewards/chosen": -0.06868284940719604, "rewards/margins": 0.054838065057992935, "rewards/rejected": -0.12352091073989868, "step": 1923 }, { "epoch": 1.1737074881805705, "grad_norm": 1.728851079940796, "learning_rate": 6.144274341702388e-06, "log_odds_chosen": 1.894347906112671, "log_odds_ratio": -0.3970085382461548, "logits/chosen": -0.8067904710769653, "logits/rejected": -0.8284851312637329, "logps/chosen": -0.6765871047973633, "logps/rejected": -2.1759390830993652, "loss": 0.9686, "nll_loss": 0.9206866025924683, "rewards/accuracies": 0.75, "rewards/chosen": -0.06765870749950409, "rewards/margins": 0.14993520081043243, "rewards/rejected": -0.21759389340877533, "step": 1924 }, { "epoch": 1.1743175232575873, "grad_norm": 4.5695481300354, "learning_rate": 6.143294549908144e-06, "log_odds_chosen": 1.3941409587860107, "log_odds_ratio": -0.46197670698165894, "logits/chosen": -0.9335269927978516, "logits/rejected": -0.9270792007446289, "logps/chosen": -0.9905274510383606, "logps/rejected": -2.0167911052703857, "loss": 1.0365, "nll_loss": 0.9926959276199341, "rewards/accuracies": 0.75, "rewards/chosen": -0.09905274212360382, "rewards/margins": 0.10262637585401535, "rewards/rejected": -0.20167914032936096, "step": 1925 }, { "epoch": 1.1749275583346042, "grad_norm": 1.9261523485183716, "learning_rate": 6.1423147581139e-06, "log_odds_chosen": 0.6869014501571655, "log_odds_ratio": -0.5204841494560242, "logits/chosen": -0.7626921534538269, "logits/rejected": -1.0099079608917236, "logps/chosen": -0.7305060029029846, "logps/rejected": -1.1580500602722168, "loss": 0.9651, "nll_loss": 1.0873346328735352, "rewards/accuracies": 0.75, "rewards/chosen": -0.0730506032705307, "rewards/margins": 0.0427544005215168, "rewards/rejected": -0.1158050000667572, "step": 1926 }, { "epoch": 1.1755375934116212, "grad_norm": 1.1937179565429688, "learning_rate": 6.141334966319657e-06, "log_odds_chosen": 0.7609546780586243, "log_odds_ratio": -0.6358985304832458, "logits/chosen": -1.222616195678711, "logits/rejected": -1.1224918365478516, "logps/chosen": -0.9294871091842651, "logps/rejected": -1.5273782014846802, "loss": 1.0984, "nll_loss": 1.2084898948669434, "rewards/accuracies": 0.5, "rewards/chosen": -0.09294870495796204, "rewards/margins": 0.05978911370038986, "rewards/rejected": -0.1527378261089325, "step": 1927 }, { "epoch": 1.176147628488638, "grad_norm": 2.553398609161377, "learning_rate": 6.140355174525413e-06, "log_odds_chosen": 0.7130773663520813, "log_odds_ratio": -0.5587080717086792, "logits/chosen": -0.8728848099708557, "logits/rejected": -0.9704602956771851, "logps/chosen": -1.0173370838165283, "logps/rejected": -1.5438127517700195, "loss": 1.1853, "nll_loss": 1.390412449836731, "rewards/accuracies": 0.75, "rewards/chosen": -0.10173371434211731, "rewards/margins": 0.052647557109594345, "rewards/rejected": -0.15438127517700195, "step": 1928 }, { "epoch": 1.1767576635656551, "grad_norm": 1.5976982116699219, "learning_rate": 6.13937538273117e-06, "log_odds_chosen": 2.1386702060699463, "log_odds_ratio": -0.3195205330848694, "logits/chosen": -0.9091014266014099, "logits/rejected": -0.9558646082878113, "logps/chosen": -0.5413157939910889, "logps/rejected": -2.0464892387390137, "loss": 0.9937, "nll_loss": 0.6517952680587769, "rewards/accuracies": 0.875, "rewards/chosen": -0.05413157492876053, "rewards/margins": 0.15051734447479248, "rewards/rejected": -0.2046489268541336, "step": 1929 }, { "epoch": 1.177367698642672, "grad_norm": 1.6191023588180542, "learning_rate": 6.138395590936926e-06, "log_odds_chosen": 1.5404475927352905, "log_odds_ratio": -0.4922914206981659, "logits/chosen": -0.9114946126937866, "logits/rejected": -1.022381067276001, "logps/chosen": -0.9539744257926941, "logps/rejected": -2.0976874828338623, "loss": 1.1884, "nll_loss": 1.1765053272247314, "rewards/accuracies": 0.75, "rewards/chosen": -0.09539745002985, "rewards/margins": 0.11437129229307175, "rewards/rejected": -0.20976874232292175, "step": 1930 }, { "epoch": 1.1779777337196888, "grad_norm": 2.611266851425171, "learning_rate": 6.137415799142682e-06, "log_odds_chosen": 0.5312680006027222, "log_odds_ratio": -0.6175152659416199, "logits/chosen": -1.2170883417129517, "logits/rejected": -1.0792485475540161, "logps/chosen": -0.9665811061859131, "logps/rejected": -1.241349697113037, "loss": 1.2691, "nll_loss": 1.5026965141296387, "rewards/accuracies": 0.5, "rewards/chosen": -0.09665811061859131, "rewards/margins": 0.02747686579823494, "rewards/rejected": -0.12413498014211655, "step": 1931 }, { "epoch": 1.178587768796706, "grad_norm": 1.5351500511169434, "learning_rate": 6.136436007348438e-06, "log_odds_chosen": 2.3307032585144043, "log_odds_ratio": -0.215173602104187, "logits/chosen": -1.0066018104553223, "logits/rejected": -0.9490712881088257, "logps/chosen": -0.7282769083976746, "logps/rejected": -2.442257881164551, "loss": 1.1167, "nll_loss": 1.0486412048339844, "rewards/accuracies": 1.0, "rewards/chosen": -0.07282768934965134, "rewards/margins": 0.1713981032371521, "rewards/rejected": -0.24422580003738403, "step": 1932 }, { "epoch": 1.1791978038737227, "grad_norm": 3.821624755859375, "learning_rate": 6.135456215554194e-06, "log_odds_chosen": 0.8032358884811401, "log_odds_ratio": -0.5919657349586487, "logits/chosen": -0.9379093647003174, "logits/rejected": -0.8380845785140991, "logps/chosen": -0.9558459520339966, "logps/rejected": -1.4880609512329102, "loss": 1.1629, "nll_loss": 1.0962061882019043, "rewards/accuracies": 0.5, "rewards/chosen": -0.09558460116386414, "rewards/margins": 0.05322149395942688, "rewards/rejected": -0.14880609512329102, "step": 1933 }, { "epoch": 1.1798078389507396, "grad_norm": 2.5304133892059326, "learning_rate": 6.13447642375995e-06, "log_odds_chosen": 0.2294076830148697, "log_odds_ratio": -0.6646734476089478, "logits/chosen": -1.1491808891296387, "logits/rejected": -1.1588623523712158, "logps/chosen": -1.1449604034423828, "logps/rejected": -1.3111640214920044, "loss": 0.996, "nll_loss": 1.33793044090271, "rewards/accuracies": 0.625, "rewards/chosen": -0.11449604481458664, "rewards/margins": 0.016620371490716934, "rewards/rejected": -0.13111642003059387, "step": 1934 }, { "epoch": 1.1804178740277567, "grad_norm": 1.5006256103515625, "learning_rate": 6.133496631965707e-06, "log_odds_chosen": 0.8140839338302612, "log_odds_ratio": -0.5341989398002625, "logits/chosen": -0.8269205093383789, "logits/rejected": -0.7316461205482483, "logps/chosen": -0.8445327877998352, "logps/rejected": -1.3996027708053589, "loss": 1.0909, "nll_loss": 1.0676805973052979, "rewards/accuracies": 0.625, "rewards/chosen": -0.084453284740448, "rewards/margins": 0.05550700053572655, "rewards/rejected": -0.13996028900146484, "step": 1935 }, { "epoch": 1.1810279091047735, "grad_norm": 1.6025490760803223, "learning_rate": 6.1325168401714634e-06, "log_odds_chosen": 0.04245731234550476, "log_odds_ratio": -0.7338944673538208, "logits/chosen": -1.2478389739990234, "logits/rejected": -1.1660666465759277, "logps/chosen": -1.181914210319519, "logps/rejected": -1.2254343032836914, "loss": 1.0259, "nll_loss": 1.3103969097137451, "rewards/accuracies": 0.5, "rewards/chosen": -0.1181914210319519, "rewards/margins": 0.004352009389549494, "rewards/rejected": -0.12254343181848526, "step": 1936 }, { "epoch": 1.1816379441817904, "grad_norm": 2.3568601608276367, "learning_rate": 6.1315370483772196e-06, "log_odds_chosen": 1.2033905982971191, "log_odds_ratio": -0.4709353446960449, "logits/chosen": -0.9667263031005859, "logits/rejected": -0.9443613290786743, "logps/chosen": -1.0106861591339111, "logps/rejected": -1.8891582489013672, "loss": 1.2031, "nll_loss": 1.2177520990371704, "rewards/accuracies": 0.75, "rewards/chosen": -0.10106860846281052, "rewards/margins": 0.08784722536802292, "rewards/rejected": -0.18891583383083344, "step": 1937 }, { "epoch": 1.1822479792588074, "grad_norm": 2.081113576889038, "learning_rate": 6.130557256582976e-06, "log_odds_chosen": 0.91993248462677, "log_odds_ratio": -0.48179781436920166, "logits/chosen": -0.8747386932373047, "logits/rejected": -0.8238855600357056, "logps/chosen": -0.6656641960144043, "logps/rejected": -1.341505765914917, "loss": 1.2091, "nll_loss": 0.7355939149856567, "rewards/accuracies": 0.75, "rewards/chosen": -0.06656641513109207, "rewards/margins": 0.06758416444063187, "rewards/rejected": -0.13415057957172394, "step": 1938 }, { "epoch": 1.1828580143358243, "grad_norm": 1.606594204902649, "learning_rate": 6.129577464788732e-06, "log_odds_chosen": 1.8145334720611572, "log_odds_ratio": -0.39368826150894165, "logits/chosen": -0.8841876983642578, "logits/rejected": -0.86605304479599, "logps/chosen": -0.8085101842880249, "logps/rejected": -2.2417080402374268, "loss": 1.0262, "nll_loss": 0.9460543394088745, "rewards/accuracies": 0.75, "rewards/chosen": -0.08085101842880249, "rewards/margins": 0.14331980049610138, "rewards/rejected": -0.22417080402374268, "step": 1939 }, { "epoch": 1.1834680494128413, "grad_norm": 1.1179251670837402, "learning_rate": 6.128597672994489e-06, "log_odds_chosen": 2.0314652919769287, "log_odds_ratio": -0.3337571620941162, "logits/chosen": -1.0317531824111938, "logits/rejected": -0.9778656959533691, "logps/chosen": -0.7401791214942932, "logps/rejected": -2.220738410949707, "loss": 1.0462, "nll_loss": 0.8950597047805786, "rewards/accuracies": 0.75, "rewards/chosen": -0.07401791214942932, "rewards/margins": 0.14805592596530914, "rewards/rejected": -0.22207383811473846, "step": 1940 }, { "epoch": 1.1840780844898582, "grad_norm": 2.079566478729248, "learning_rate": 6.127617881200245e-06, "log_odds_chosen": 0.8251074552536011, "log_odds_ratio": -0.7159382700920105, "logits/chosen": -1.062404990196228, "logits/rejected": -1.20791494846344, "logps/chosen": -0.8227519989013672, "logps/rejected": -1.5347310304641724, "loss": 1.3153, "nll_loss": 1.2077713012695312, "rewards/accuracies": 0.375, "rewards/chosen": -0.08227519690990448, "rewards/margins": 0.07119790464639664, "rewards/rejected": -0.1534731090068817, "step": 1941 }, { "epoch": 1.184688119566875, "grad_norm": 1.367769718170166, "learning_rate": 6.126638089406001e-06, "log_odds_chosen": 1.2268859148025513, "log_odds_ratio": -0.48798561096191406, "logits/chosen": -0.896306037902832, "logits/rejected": -0.9407915472984314, "logps/chosen": -0.7093474864959717, "logps/rejected": -1.5747385025024414, "loss": 0.9684, "nll_loss": 0.7967731952667236, "rewards/accuracies": 0.625, "rewards/chosen": -0.07093474268913269, "rewards/margins": 0.08653909713029861, "rewards/rejected": -0.1574738472700119, "step": 1942 }, { "epoch": 1.185298154643892, "grad_norm": 1.6935086250305176, "learning_rate": 6.125658297611757e-06, "log_odds_chosen": 0.2797071635723114, "log_odds_ratio": -0.6132888793945312, "logits/chosen": -1.0853676795959473, "logits/rejected": -1.1095309257507324, "logps/chosen": -0.9187334775924683, "logps/rejected": -1.1241984367370605, "loss": 1.081, "nll_loss": 1.0001795291900635, "rewards/accuracies": 0.625, "rewards/chosen": -0.09187334775924683, "rewards/margins": 0.020546499639749527, "rewards/rejected": -0.11241984367370605, "step": 1943 }, { "epoch": 1.185908189720909, "grad_norm": 1.3103749752044678, "learning_rate": 6.124678505817513e-06, "log_odds_chosen": 0.967552125453949, "log_odds_ratio": -0.6152899861335754, "logits/chosen": -0.9332679510116577, "logits/rejected": -0.9140877723693848, "logps/chosen": -0.916873037815094, "logps/rejected": -1.6830496788024902, "loss": 1.0858, "nll_loss": 1.1176408529281616, "rewards/accuracies": 0.625, "rewards/chosen": -0.09168730676174164, "rewards/margins": 0.07661766558885574, "rewards/rejected": -0.16830497980117798, "step": 1944 }, { "epoch": 1.1865182247979258, "grad_norm": 1.4988511800765991, "learning_rate": 6.123698714023269e-06, "log_odds_chosen": 0.44459980726242065, "log_odds_ratio": -0.7156652212142944, "logits/chosen": -1.1191552877426147, "logits/rejected": -0.9767816662788391, "logps/chosen": -0.8400195240974426, "logps/rejected": -1.1573165655136108, "loss": 1.0646, "nll_loss": 1.1572332382202148, "rewards/accuracies": 0.5, "rewards/chosen": -0.08400195837020874, "rewards/margins": 0.03172970563173294, "rewards/rejected": -0.11573165655136108, "step": 1945 }, { "epoch": 1.1871282598749429, "grad_norm": 1.2258896827697754, "learning_rate": 6.122718922229026e-06, "log_odds_chosen": 0.7018535137176514, "log_odds_ratio": -0.5424052476882935, "logits/chosen": -0.9915263652801514, "logits/rejected": -0.9112012386322021, "logps/chosen": -0.729407787322998, "logps/rejected": -1.1906070709228516, "loss": 0.985, "nll_loss": 0.9504985213279724, "rewards/accuracies": 0.625, "rewards/chosen": -0.07294077426195145, "rewards/margins": 0.04611992835998535, "rewards/rejected": -0.1190607100725174, "step": 1946 }, { "epoch": 1.1877382949519597, "grad_norm": 1.584007978439331, "learning_rate": 6.1217391304347825e-06, "log_odds_chosen": 1.1284626722335815, "log_odds_ratio": -0.47009772062301636, "logits/chosen": -0.8826654553413391, "logits/rejected": -0.9878886938095093, "logps/chosen": -0.6249395608901978, "logps/rejected": -1.330596923828125, "loss": 1.0156, "nll_loss": 0.7558545470237732, "rewards/accuracies": 0.75, "rewards/chosen": -0.062493957579135895, "rewards/margins": 0.07056573033332825, "rewards/rejected": -0.13305969536304474, "step": 1947 }, { "epoch": 1.1883483300289766, "grad_norm": 1.3150564432144165, "learning_rate": 6.120759338640539e-06, "log_odds_chosen": 1.9971957206726074, "log_odds_ratio": -0.345524400472641, "logits/chosen": -0.6046509742736816, "logits/rejected": -0.8234940767288208, "logps/chosen": -0.655547559261322, "logps/rejected": -2.0052170753479004, "loss": 0.9235, "nll_loss": 0.7175650000572205, "rewards/accuracies": 0.75, "rewards/chosen": -0.06555476039648056, "rewards/margins": 0.13496695458889008, "rewards/rejected": -0.20052172243595123, "step": 1948 }, { "epoch": 1.1889583651059936, "grad_norm": 2.6709227561950684, "learning_rate": 6.119779546846295e-06, "log_odds_chosen": 0.9523411989212036, "log_odds_ratio": -0.41695690155029297, "logits/chosen": -1.0766938924789429, "logits/rejected": -1.0429322719573975, "logps/chosen": -0.8889974355697632, "logps/rejected": -1.6035760641098022, "loss": 1.1823, "nll_loss": 1.1720877885818481, "rewards/accuracies": 0.875, "rewards/chosen": -0.08889975398778915, "rewards/margins": 0.0714578628540039, "rewards/rejected": -0.16035762429237366, "step": 1949 }, { "epoch": 1.1895684001830105, "grad_norm": 6.052290916442871, "learning_rate": 6.118799755052051e-06, "log_odds_chosen": 1.078476905822754, "log_odds_ratio": -0.47205302119255066, "logits/chosen": -1.1873202323913574, "logits/rejected": -1.089904546737671, "logps/chosen": -1.0845746994018555, "logps/rejected": -1.8594551086425781, "loss": 1.0202, "nll_loss": 1.2430939674377441, "rewards/accuracies": 0.625, "rewards/chosen": -0.10845747590065002, "rewards/margins": 0.07748804241418839, "rewards/rejected": -0.1859455108642578, "step": 1950 }, { "epoch": 1.1901784352600275, "grad_norm": 1.1529802083969116, "learning_rate": 6.117819963257807e-06, "log_odds_chosen": 1.1220998764038086, "log_odds_ratio": -0.41736602783203125, "logits/chosen": -0.8832045197486877, "logits/rejected": -0.997936487197876, "logps/chosen": -0.7679154872894287, "logps/rejected": -1.5194344520568848, "loss": 1.0359, "nll_loss": 0.8494780659675598, "rewards/accuracies": 0.75, "rewards/chosen": -0.07679154723882675, "rewards/margins": 0.07515189051628113, "rewards/rejected": -0.15194344520568848, "step": 1951 }, { "epoch": 1.1907884703370444, "grad_norm": 1.0881304740905762, "learning_rate": 6.116840171463564e-06, "log_odds_chosen": 0.561901330947876, "log_odds_ratio": -0.4602789580821991, "logits/chosen": -0.9667049646377563, "logits/rejected": -1.0440131425857544, "logps/chosen": -0.7823715806007385, "logps/rejected": -1.1187901496887207, "loss": 1.0896, "nll_loss": 0.9078762531280518, "rewards/accuracies": 1.0, "rewards/chosen": -0.07823716104030609, "rewards/margins": 0.03364185988903046, "rewards/rejected": -0.11187902092933655, "step": 1952 }, { "epoch": 1.1913985054140612, "grad_norm": 1.2519773244857788, "learning_rate": 6.11586037966932e-06, "log_odds_chosen": 0.7235759496688843, "log_odds_ratio": -0.5072814226150513, "logits/chosen": -0.8523759245872498, "logits/rejected": -1.081244707107544, "logps/chosen": -0.7986292243003845, "logps/rejected": -1.229026198387146, "loss": 0.9597, "nll_loss": 0.9045858383178711, "rewards/accuracies": 0.75, "rewards/chosen": -0.07986292243003845, "rewards/margins": 0.04303969070315361, "rewards/rejected": -0.12290261685848236, "step": 1953 }, { "epoch": 1.1920085404910783, "grad_norm": 1.4357839822769165, "learning_rate": 6.114880587875077e-06, "log_odds_chosen": 2.190981388092041, "log_odds_ratio": -0.3127932548522949, "logits/chosen": -1.0237624645233154, "logits/rejected": -1.0291624069213867, "logps/chosen": -0.8276845812797546, "logps/rejected": -2.445770025253296, "loss": 1.1646, "nll_loss": 1.3135440349578857, "rewards/accuracies": 0.875, "rewards/chosen": -0.08276846259832382, "rewards/margins": 0.1618085503578186, "rewards/rejected": -0.24457702040672302, "step": 1954 }, { "epoch": 1.1926185755680951, "grad_norm": 1.7183865308761597, "learning_rate": 6.113900796080832e-06, "log_odds_chosen": -0.25252485275268555, "log_odds_ratio": -0.8753600120544434, "logits/chosen": -1.0347111225128174, "logits/rejected": -1.0281202793121338, "logps/chosen": -0.9677433371543884, "logps/rejected": -0.8481078147888184, "loss": 1.2745, "nll_loss": 1.1631925106048584, "rewards/accuracies": 0.125, "rewards/chosen": -0.09677433967590332, "rewards/margins": -0.011963553726673126, "rewards/rejected": -0.0848107859492302, "step": 1955 }, { "epoch": 1.1932286106451122, "grad_norm": 1.376486897468567, "learning_rate": 6.1129210042865884e-06, "log_odds_chosen": 1.1480060815811157, "log_odds_ratio": -0.3991859257221222, "logits/chosen": -0.9669071435928345, "logits/rejected": -0.9371753931045532, "logps/chosen": -0.7272657155990601, "logps/rejected": -1.5819143056869507, "loss": 1.0999, "nll_loss": 1.1142189502716064, "rewards/accuracies": 0.625, "rewards/chosen": -0.07272657006978989, "rewards/margins": 0.08546487241983414, "rewards/rejected": -0.15819144248962402, "step": 1956 }, { "epoch": 1.193838645722129, "grad_norm": 1.6538811922073364, "learning_rate": 6.111941212492345e-06, "log_odds_chosen": 1.0482113361358643, "log_odds_ratio": -0.5454205870628357, "logits/chosen": -1.170936942100525, "logits/rejected": -1.0529236793518066, "logps/chosen": -1.0362534523010254, "logps/rejected": -1.8800404071807861, "loss": 1.1614, "nll_loss": 1.2069122791290283, "rewards/accuracies": 0.625, "rewards/chosen": -0.1036253571510315, "rewards/margins": 0.0843786895275116, "rewards/rejected": -0.1880040466785431, "step": 1957 }, { "epoch": 1.194448680799146, "grad_norm": 1.5076361894607544, "learning_rate": 6.1109614206981015e-06, "log_odds_chosen": 0.799663782119751, "log_odds_ratio": -0.49876606464385986, "logits/chosen": -0.905352771282196, "logits/rejected": -1.159038782119751, "logps/chosen": -0.82319176197052, "logps/rejected": -1.3670539855957031, "loss": 0.975, "nll_loss": 0.9566832780838013, "rewards/accuracies": 0.75, "rewards/chosen": -0.08231917768716812, "rewards/margins": 0.05438623204827309, "rewards/rejected": -0.1367053985595703, "step": 1958 }, { "epoch": 1.195058715876163, "grad_norm": 2.098236322402954, "learning_rate": 6.109981628903858e-06, "log_odds_chosen": 1.1950790882110596, "log_odds_ratio": -0.44588956236839294, "logits/chosen": -0.8315331935882568, "logits/rejected": -0.925173282623291, "logps/chosen": -0.812121570110321, "logps/rejected": -1.6385037899017334, "loss": 1.2096, "nll_loss": 1.0618349313735962, "rewards/accuracies": 0.75, "rewards/chosen": -0.08121216297149658, "rewards/margins": 0.082638218998909, "rewards/rejected": -0.16385036706924438, "step": 1959 }, { "epoch": 1.1956687509531798, "grad_norm": 1.0736113786697388, "learning_rate": 6.109001837109615e-06, "log_odds_chosen": 0.4670121669769287, "log_odds_ratio": -0.6134769916534424, "logits/chosen": -1.0224950313568115, "logits/rejected": -0.9800776243209839, "logps/chosen": -0.9132804870605469, "logps/rejected": -1.2950440645217896, "loss": 1.0425, "nll_loss": 1.0251898765563965, "rewards/accuracies": 0.625, "rewards/chosen": -0.09132804721593857, "rewards/margins": 0.03817635774612427, "rewards/rejected": -0.12950441241264343, "step": 1960 }, { "epoch": 1.1962787860301967, "grad_norm": 3.109894275665283, "learning_rate": 6.10802204531537e-06, "log_odds_chosen": 2.392195701599121, "log_odds_ratio": -0.2864380478858948, "logits/chosen": -0.8663517236709595, "logits/rejected": -0.9761322140693665, "logps/chosen": -0.6669671535491943, "logps/rejected": -2.3377466201782227, "loss": 1.0941, "nll_loss": 1.0855191946029663, "rewards/accuracies": 0.875, "rewards/chosen": -0.06669671833515167, "rewards/margins": 0.1670779436826706, "rewards/rejected": -0.23377466201782227, "step": 1961 }, { "epoch": 1.1968888211072137, "grad_norm": 1.2243047952651978, "learning_rate": 6.107042253521126e-06, "log_odds_chosen": 1.890148401260376, "log_odds_ratio": -0.28583216667175293, "logits/chosen": -1.0497658252716064, "logits/rejected": -0.9375730752944946, "logps/chosen": -0.8508580923080444, "logps/rejected": -2.290438413619995, "loss": 1.1804, "nll_loss": 1.0341870784759521, "rewards/accuracies": 0.875, "rewards/chosen": -0.08508581668138504, "rewards/margins": 0.14395801723003387, "rewards/rejected": -0.2290438413619995, "step": 1962 }, { "epoch": 1.1974988561842306, "grad_norm": 2.7016756534576416, "learning_rate": 6.106062461726883e-06, "log_odds_chosen": 0.33536046743392944, "log_odds_ratio": -0.6436054110527039, "logits/chosen": -1.0154474973678589, "logits/rejected": -1.0196396112442017, "logps/chosen": -0.7643463611602783, "logps/rejected": -0.996879518032074, "loss": 1.2628, "nll_loss": 1.238913893699646, "rewards/accuracies": 0.625, "rewards/chosen": -0.07643464207649231, "rewards/margins": 0.023253316059708595, "rewards/rejected": -0.09968796372413635, "step": 1963 }, { "epoch": 1.1981088912612474, "grad_norm": 1.4078360795974731, "learning_rate": 6.105082669932639e-06, "log_odds_chosen": 1.9147348403930664, "log_odds_ratio": -0.5793519616127014, "logits/chosen": -0.7416610717773438, "logits/rejected": -0.9416378736495972, "logps/chosen": -1.1138083934783936, "logps/rejected": -2.7400708198547363, "loss": 1.2007, "nll_loss": 1.1651644706726074, "rewards/accuracies": 0.625, "rewards/chosen": -0.11138084530830383, "rewards/margins": 0.1626262366771698, "rewards/rejected": -0.27400708198547363, "step": 1964 }, { "epoch": 1.1987189263382645, "grad_norm": 1.7706632614135742, "learning_rate": 6.104102878138395e-06, "log_odds_chosen": -0.17426711320877075, "log_odds_ratio": -0.8458932042121887, "logits/chosen": -0.9850740432739258, "logits/rejected": -0.8741720914840698, "logps/chosen": -1.3258922100067139, "logps/rejected": -1.2237118482589722, "loss": 1.2724, "nll_loss": 1.3071728944778442, "rewards/accuracies": 0.25, "rewards/chosen": -0.1325892210006714, "rewards/margins": -0.010218030773103237, "rewards/rejected": -0.12237118184566498, "step": 1965 }, { "epoch": 1.1993289614152813, "grad_norm": 1.628000259399414, "learning_rate": 6.103123086344151e-06, "log_odds_chosen": 0.982089638710022, "log_odds_ratio": -0.5828201174736023, "logits/chosen": -1.031315803527832, "logits/rejected": -0.890454888343811, "logps/chosen": -0.7984261512756348, "logps/rejected": -1.4788790941238403, "loss": 1.0734, "nll_loss": 1.0705153942108154, "rewards/accuracies": 0.625, "rewards/chosen": -0.07984261214733124, "rewards/margins": 0.06804528832435608, "rewards/rejected": -0.1478879153728485, "step": 1966 }, { "epoch": 1.1999389964922984, "grad_norm": 4.363484859466553, "learning_rate": 6.1021432945499075e-06, "log_odds_chosen": 0.523685872554779, "log_odds_ratio": -0.6513725519180298, "logits/chosen": -1.0952812433242798, "logits/rejected": -1.008217215538025, "logps/chosen": -1.1827609539031982, "logps/rejected": -1.5974361896514893, "loss": 1.189, "nll_loss": 1.2593830823898315, "rewards/accuracies": 0.625, "rewards/chosen": -0.11827610433101654, "rewards/margins": 0.04146752506494522, "rewards/rejected": -0.15974363684654236, "step": 1967 }, { "epoch": 1.2005490315693153, "grad_norm": 1.500910758972168, "learning_rate": 6.1011635027556644e-06, "log_odds_chosen": 1.843275547027588, "log_odds_ratio": -0.3824741542339325, "logits/chosen": -0.9456383585929871, "logits/rejected": -0.7640315294265747, "logps/chosen": -0.7156391143798828, "logps/rejected": -2.119900703430176, "loss": 1.1761, "nll_loss": 0.8608943819999695, "rewards/accuracies": 0.75, "rewards/chosen": -0.07156391441822052, "rewards/margins": 0.1404261589050293, "rewards/rejected": -0.21199005842208862, "step": 1968 }, { "epoch": 1.201159066646332, "grad_norm": 2.330724000930786, "learning_rate": 6.1001837109614206e-06, "log_odds_chosen": 0.9934747815132141, "log_odds_ratio": -0.4215461015701294, "logits/chosen": -0.7517217397689819, "logits/rejected": -0.8433269262313843, "logps/chosen": -0.7292361259460449, "logps/rejected": -1.3677418231964111, "loss": 0.9893, "nll_loss": 0.8978198766708374, "rewards/accuracies": 0.75, "rewards/chosen": -0.07292361557483673, "rewards/margins": 0.06385056674480438, "rewards/rejected": -0.1367741823196411, "step": 1969 }, { "epoch": 1.2017691017233492, "grad_norm": 1.2539145946502686, "learning_rate": 6.099203919167177e-06, "log_odds_chosen": 1.6346873044967651, "log_odds_ratio": -0.3700158894062042, "logits/chosen": -0.8330330848693848, "logits/rejected": -0.6830340623855591, "logps/chosen": -0.6891742944717407, "logps/rejected": -1.8231642246246338, "loss": 1.1951, "nll_loss": 0.8689568042755127, "rewards/accuracies": 0.875, "rewards/chosen": -0.0689174234867096, "rewards/margins": 0.11339901387691498, "rewards/rejected": -0.18231643736362457, "step": 1970 }, { "epoch": 1.202379136800366, "grad_norm": 1.2826310396194458, "learning_rate": 6.098224127372934e-06, "log_odds_chosen": 1.994745135307312, "log_odds_ratio": -0.31038710474967957, "logits/chosen": -1.0740723609924316, "logits/rejected": -0.9578490257263184, "logps/chosen": -0.8147326707839966, "logps/rejected": -2.3451929092407227, "loss": 1.1443, "nll_loss": 1.0775352716445923, "rewards/accuracies": 0.875, "rewards/chosen": -0.08147326856851578, "rewards/margins": 0.15304601192474365, "rewards/rejected": -0.23451927304267883, "step": 1971 }, { "epoch": 1.2029891718773829, "grad_norm": 4.1620283126831055, "learning_rate": 6.097244335578689e-06, "log_odds_chosen": 1.2340381145477295, "log_odds_ratio": -0.46690014004707336, "logits/chosen": -1.073466420173645, "logits/rejected": -1.102586030960083, "logps/chosen": -0.8732471466064453, "logps/rejected": -1.7164032459259033, "loss": 1.1633, "nll_loss": 1.1097248792648315, "rewards/accuracies": 0.75, "rewards/chosen": -0.08732470870018005, "rewards/margins": 0.08431562781333923, "rewards/rejected": -0.1716403365135193, "step": 1972 }, { "epoch": 1.2035992069544, "grad_norm": 2.7093610763549805, "learning_rate": 6.096264543784445e-06, "log_odds_chosen": 0.8850501775741577, "log_odds_ratio": -0.4899013638496399, "logits/chosen": -1.1001694202423096, "logits/rejected": -1.1297696828842163, "logps/chosen": -0.7601175308227539, "logps/rejected": -1.4079962968826294, "loss": 1.3613, "nll_loss": 1.1887396574020386, "rewards/accuracies": 0.625, "rewards/chosen": -0.07601175457239151, "rewards/margins": 0.06478787958621979, "rewards/rejected": -0.1407996416091919, "step": 1973 }, { "epoch": 1.2042092420314168, "grad_norm": 1.8367919921875, "learning_rate": 6.095284751990202e-06, "log_odds_chosen": 0.8139967918395996, "log_odds_ratio": -0.4393298625946045, "logits/chosen": -0.9530370235443115, "logits/rejected": -0.9906689524650574, "logps/chosen": -0.6829560995101929, "logps/rejected": -1.2062690258026123, "loss": 0.9457, "nll_loss": 1.028821587562561, "rewards/accuracies": 0.75, "rewards/chosen": -0.06829561293125153, "rewards/margins": 0.052331291139125824, "rewards/rejected": -0.12062690407037735, "step": 1974 }, { "epoch": 1.2048192771084336, "grad_norm": 8.285225868225098, "learning_rate": 6.094304960195958e-06, "log_odds_chosen": 1.0181554555892944, "log_odds_ratio": -0.5195052027702332, "logits/chosen": -1.0063135623931885, "logits/rejected": -0.9648904204368591, "logps/chosen": -0.9567058086395264, "logps/rejected": -1.7928991317749023, "loss": 1.1846, "nll_loss": 1.0742093324661255, "rewards/accuracies": 0.625, "rewards/chosen": -0.09567058086395264, "rewards/margins": 0.08361932635307312, "rewards/rejected": -0.17928990721702576, "step": 1975 }, { "epoch": 1.2054293121854507, "grad_norm": 2.152580738067627, "learning_rate": 6.093325168401714e-06, "log_odds_chosen": 0.7128236293792725, "log_odds_ratio": -0.5400813221931458, "logits/chosen": -0.978410005569458, "logits/rejected": -0.9761843085289001, "logps/chosen": -0.8258233070373535, "logps/rejected": -1.363912582397461, "loss": 0.9392, "nll_loss": 1.0585927963256836, "rewards/accuracies": 0.75, "rewards/chosen": -0.08258232474327087, "rewards/margins": 0.05380893498659134, "rewards/rejected": -0.13639125227928162, "step": 1976 }, { "epoch": 1.2060393472624675, "grad_norm": 3.323504686355591, "learning_rate": 6.092345376607471e-06, "log_odds_chosen": 0.6380078792572021, "log_odds_ratio": -0.5310113430023193, "logits/chosen": -1.1275526285171509, "logits/rejected": -0.8433347940444946, "logps/chosen": -1.0115801095962524, "logps/rejected": -1.5219824314117432, "loss": 1.1958, "nll_loss": 1.0781581401824951, "rewards/accuracies": 0.625, "rewards/chosen": -0.101158007979393, "rewards/margins": 0.05104023590683937, "rewards/rejected": -0.15219825506210327, "step": 1977 }, { "epoch": 1.2066493823394846, "grad_norm": 1.9399607181549072, "learning_rate": 6.0913655848132265e-06, "log_odds_chosen": 1.0160398483276367, "log_odds_ratio": -0.4719950556755066, "logits/chosen": -1.1928502321243286, "logits/rejected": -1.087219476699829, "logps/chosen": -0.8706978559494019, "logps/rejected": -1.6574655771255493, "loss": 0.9505, "nll_loss": 0.9929871559143066, "rewards/accuracies": 0.75, "rewards/chosen": -0.0870697870850563, "rewards/margins": 0.07867677509784698, "rewards/rejected": -0.1657465547323227, "step": 1978 }, { "epoch": 1.2072594174165014, "grad_norm": 1.766414761543274, "learning_rate": 6.090385793018983e-06, "log_odds_chosen": 0.32828301191329956, "log_odds_ratio": -0.7356580495834351, "logits/chosen": -1.15386962890625, "logits/rejected": -1.0507252216339111, "logps/chosen": -0.8813153505325317, "logps/rejected": -0.8883544206619263, "loss": 1.2613, "nll_loss": 1.2131708860397339, "rewards/accuracies": 0.375, "rewards/chosen": -0.08813153952360153, "rewards/margins": 0.0007039024494588375, "rewards/rejected": -0.08883544057607651, "step": 1979 }, { "epoch": 1.2078694524935183, "grad_norm": 2.0651214122772217, "learning_rate": 6.08940600122474e-06, "log_odds_chosen": 1.5324896574020386, "log_odds_ratio": -0.429135262966156, "logits/chosen": -0.7309192419052124, "logits/rejected": -0.7527433037757874, "logps/chosen": -0.5987264513969421, "logps/rejected": -1.6534621715545654, "loss": 1.1805, "nll_loss": 0.8168799877166748, "rewards/accuracies": 0.625, "rewards/chosen": -0.059872645884752274, "rewards/margins": 0.10547356307506561, "rewards/rejected": -0.16534622013568878, "step": 1980 }, { "epoch": 1.2084794875705354, "grad_norm": 1.393947958946228, "learning_rate": 6.088426209430496e-06, "log_odds_chosen": 2.2720108032226562, "log_odds_ratio": -0.3043883144855499, "logits/chosen": -0.934955358505249, "logits/rejected": -1.1467055082321167, "logps/chosen": -0.7060542702674866, "logps/rejected": -2.3934292793273926, "loss": 0.8916, "nll_loss": 0.8615755438804626, "rewards/accuracies": 0.875, "rewards/chosen": -0.07060542702674866, "rewards/margins": 0.1687375158071518, "rewards/rejected": -0.23934294283390045, "step": 1981 }, { "epoch": 1.2090895226475522, "grad_norm": 1.45664381980896, "learning_rate": 6.087446417636253e-06, "log_odds_chosen": 0.3427305221557617, "log_odds_ratio": -0.6167619824409485, "logits/chosen": -0.9420807361602783, "logits/rejected": -0.8524360656738281, "logps/chosen": -0.8306441307067871, "logps/rejected": -1.0897386074066162, "loss": 1.1358, "nll_loss": 1.0437465906143188, "rewards/accuracies": 0.625, "rewards/chosen": -0.08306441456079483, "rewards/margins": 0.02590944431722164, "rewards/rejected": -0.10897386074066162, "step": 1982 }, { "epoch": 1.2096995577245693, "grad_norm": 1.8392951488494873, "learning_rate": 6.086466625842008e-06, "log_odds_chosen": 1.6828967332839966, "log_odds_ratio": -0.4130361080169678, "logits/chosen": -0.8032678365707397, "logits/rejected": -0.7317081689834595, "logps/chosen": -0.5300934314727783, "logps/rejected": -1.7312051057815552, "loss": 0.9942, "nll_loss": 0.7073492407798767, "rewards/accuracies": 0.75, "rewards/chosen": -0.05300934612751007, "rewards/margins": 0.12011116743087769, "rewards/rejected": -0.17312052845954895, "step": 1983 }, { "epoch": 1.2103095928015861, "grad_norm": 7.789988040924072, "learning_rate": 6.085486834047764e-06, "log_odds_chosen": 0.5737615823745728, "log_odds_ratio": -0.548052191734314, "logits/chosen": -1.1145780086517334, "logits/rejected": -1.0406320095062256, "logps/chosen": -0.9643700122833252, "logps/rejected": -1.2987768650054932, "loss": 1.1548, "nll_loss": 1.3093537092208862, "rewards/accuracies": 0.5, "rewards/chosen": -0.0964369997382164, "rewards/margins": 0.03344068303704262, "rewards/rejected": -0.12987768650054932, "step": 1984 }, { "epoch": 1.210919627878603, "grad_norm": 1.5071216821670532, "learning_rate": 6.084507042253521e-06, "log_odds_chosen": 0.21780818700790405, "log_odds_ratio": -0.7591634392738342, "logits/chosen": -1.0790916681289673, "logits/rejected": -1.1377313137054443, "logps/chosen": -1.0342057943344116, "logps/rejected": -1.1359026432037354, "loss": 1.1884, "nll_loss": 1.1935603618621826, "rewards/accuracies": 0.5, "rewards/chosen": -0.10342057794332504, "rewards/margins": 0.010169684886932373, "rewards/rejected": -0.11359027028083801, "step": 1985 }, { "epoch": 1.21152966295562, "grad_norm": 1.4517661333084106, "learning_rate": 6.083527250459277e-06, "log_odds_chosen": 0.8538380265235901, "log_odds_ratio": -0.4686465263366699, "logits/chosen": -1.1152058839797974, "logits/rejected": -1.1203029155731201, "logps/chosen": -0.9064021706581116, "logps/rejected": -1.5056486129760742, "loss": 0.9875, "nll_loss": 1.0469691753387451, "rewards/accuracies": 0.75, "rewards/chosen": -0.09064021706581116, "rewards/margins": 0.0599246472120285, "rewards/rejected": -0.15056486427783966, "step": 1986 }, { "epoch": 1.2121396980326369, "grad_norm": 1.4083958864212036, "learning_rate": 6.082547458665033e-06, "log_odds_chosen": 0.763644278049469, "log_odds_ratio": -0.44433414936065674, "logits/chosen": -0.9223996996879578, "logits/rejected": -0.9468894004821777, "logps/chosen": -0.9366763830184937, "logps/rejected": -1.4488980770111084, "loss": 0.9435, "nll_loss": 0.8959858417510986, "rewards/accuracies": 0.875, "rewards/chosen": -0.09366763383150101, "rewards/margins": 0.05122218653559685, "rewards/rejected": -0.14488983154296875, "step": 1987 }, { "epoch": 1.2127497331096537, "grad_norm": 2.372295379638672, "learning_rate": 6.08156766687079e-06, "log_odds_chosen": 0.445109486579895, "log_odds_ratio": -0.6549487709999084, "logits/chosen": -1.012480616569519, "logits/rejected": -1.0050408840179443, "logps/chosen": -0.7195994257926941, "logps/rejected": -0.951012372970581, "loss": 1.1183, "nll_loss": 1.002638578414917, "rewards/accuracies": 0.5, "rewards/chosen": -0.07195994257926941, "rewards/margins": 0.023141292855143547, "rewards/rejected": -0.0951012372970581, "step": 1988 }, { "epoch": 1.2133597681866708, "grad_norm": 1.9420064687728882, "learning_rate": 6.0805878750765455e-06, "log_odds_chosen": 0.6421966552734375, "log_odds_ratio": -0.5563278198242188, "logits/chosen": -0.7841984033584595, "logits/rejected": -1.0276867151260376, "logps/chosen": -0.7622334957122803, "logps/rejected": -1.2256678342819214, "loss": 0.9777, "nll_loss": 0.9286580681800842, "rewards/accuracies": 0.5, "rewards/chosen": -0.07622335851192474, "rewards/margins": 0.046343427151441574, "rewards/rejected": -0.12256678193807602, "step": 1989 }, { "epoch": 1.2139698032636876, "grad_norm": 1.7763512134552002, "learning_rate": 6.079608083282302e-06, "log_odds_chosen": 1.5171747207641602, "log_odds_ratio": -0.3822346031665802, "logits/chosen": -0.8342009782791138, "logits/rejected": -0.8902687430381775, "logps/chosen": -0.7667959332466125, "logps/rejected": -1.8570566177368164, "loss": 1.0479, "nll_loss": 0.9883375763893127, "rewards/accuracies": 0.75, "rewards/chosen": -0.07667958736419678, "rewards/margins": 0.10902607440948486, "rewards/rejected": -0.18570567667484283, "step": 1990 }, { "epoch": 1.2145798383407045, "grad_norm": 1.4861310720443726, "learning_rate": 6.078628291488059e-06, "log_odds_chosen": 0.8683050870895386, "log_odds_ratio": -0.5646497011184692, "logits/chosen": -0.9627691507339478, "logits/rejected": -0.8844781517982483, "logps/chosen": -0.9412206411361694, "logps/rejected": -1.6158915758132935, "loss": 1.0062, "nll_loss": 1.0625860691070557, "rewards/accuracies": 0.5, "rewards/chosen": -0.09412206709384918, "rewards/margins": 0.0674670934677124, "rewards/rejected": -0.16158916056156158, "step": 1991 }, { "epoch": 1.2151898734177216, "grad_norm": 1.8915292024612427, "learning_rate": 6.077648499693815e-06, "log_odds_chosen": 2.347273111343384, "log_odds_ratio": -0.29549866914749146, "logits/chosen": -0.8763899803161621, "logits/rejected": -0.9207156896591187, "logps/chosen": -0.6339168548583984, "logps/rejected": -2.4521677494049072, "loss": 0.972, "nll_loss": 0.8424481153488159, "rewards/accuracies": 0.875, "rewards/chosen": -0.06339168548583984, "rewards/margins": 0.18182510137557983, "rewards/rejected": -0.24521677196025848, "step": 1992 }, { "epoch": 1.2157999084947384, "grad_norm": 2.1897454261779785, "learning_rate": 6.076668707899572e-06, "log_odds_chosen": 0.4655829668045044, "log_odds_ratio": -0.5646304488182068, "logits/chosen": -1.0111123323440552, "logits/rejected": -0.9402689933776855, "logps/chosen": -0.97731614112854, "logps/rejected": -1.279732584953308, "loss": 1.1532, "nll_loss": 1.3281352519989014, "rewards/accuracies": 0.875, "rewards/chosen": -0.09773161262273788, "rewards/margins": 0.030241647735238075, "rewards/rejected": -0.1279732584953308, "step": 1993 }, { "epoch": 1.2164099435717555, "grad_norm": 2.1430253982543945, "learning_rate": 6.075688916105328e-06, "log_odds_chosen": 1.5160505771636963, "log_odds_ratio": -0.36474472284317017, "logits/chosen": -0.9290088415145874, "logits/rejected": -0.9317882061004639, "logps/chosen": -0.662897527217865, "logps/rejected": -1.7493085861206055, "loss": 1.1089, "nll_loss": 0.8860800266265869, "rewards/accuracies": 0.875, "rewards/chosen": -0.0662897527217865, "rewards/margins": 0.1086411103606224, "rewards/rejected": -0.1749308705329895, "step": 1994 }, { "epoch": 1.2170199786487723, "grad_norm": 3.2638132572174072, "learning_rate": 6.074709124311083e-06, "log_odds_chosen": 1.8560779094696045, "log_odds_ratio": -0.36286577582359314, "logits/chosen": -0.8306640386581421, "logits/rejected": -0.9845195412635803, "logps/chosen": -0.8160381317138672, "logps/rejected": -2.2208709716796875, "loss": 1.1687, "nll_loss": 1.0165834426879883, "rewards/accuracies": 0.625, "rewards/chosen": -0.08160381019115448, "rewards/margins": 0.14048327505588531, "rewards/rejected": -0.222087100148201, "step": 1995 }, { "epoch": 1.2176300137257892, "grad_norm": 1.4464168548583984, "learning_rate": 6.07372933251684e-06, "log_odds_chosen": 0.6721298694610596, "log_odds_ratio": -0.6067743897438049, "logits/chosen": -1.0249637365341187, "logits/rejected": -0.9940723180770874, "logps/chosen": -0.8244649767875671, "logps/rejected": -1.3009790182113647, "loss": 1.2108, "nll_loss": 1.023003339767456, "rewards/accuracies": 0.5, "rewards/chosen": -0.08244649320840836, "rewards/margins": 0.04765141382813454, "rewards/rejected": -0.1300979107618332, "step": 1996 }, { "epoch": 1.2182400488028062, "grad_norm": 2.0408592224121094, "learning_rate": 6.072749540722596e-06, "log_odds_chosen": 1.620054006576538, "log_odds_ratio": -0.4451470971107483, "logits/chosen": -0.9525611996650696, "logits/rejected": -1.0206795930862427, "logps/chosen": -0.720880925655365, "logps/rejected": -1.8784127235412598, "loss": 1.1442, "nll_loss": 0.850614607334137, "rewards/accuracies": 0.875, "rewards/chosen": -0.0720880925655365, "rewards/margins": 0.115753173828125, "rewards/rejected": -0.1878412663936615, "step": 1997 }, { "epoch": 1.218850083879823, "grad_norm": 1.460932970046997, "learning_rate": 6.071769748928352e-06, "log_odds_chosen": 0.9385980367660522, "log_odds_ratio": -0.6484776139259338, "logits/chosen": -0.8677377700805664, "logits/rejected": -0.8936175107955933, "logps/chosen": -0.7343937158584595, "logps/rejected": -1.3790690898895264, "loss": 1.0411, "nll_loss": 0.8746741414070129, "rewards/accuracies": 0.5, "rewards/chosen": -0.07343937456607819, "rewards/margins": 0.06446754187345505, "rewards/rejected": -0.13790690898895264, "step": 1998 }, { "epoch": 1.21946011895684, "grad_norm": 5.450629711151123, "learning_rate": 6.070789957134109e-06, "log_odds_chosen": 0.7938932180404663, "log_odds_ratio": -0.5026128888130188, "logits/chosen": -0.9666149616241455, "logits/rejected": -0.753233015537262, "logps/chosen": -1.211431860923767, "logps/rejected": -1.8310362100601196, "loss": 0.974, "nll_loss": 1.258589744567871, "rewards/accuracies": 0.5, "rewards/chosen": -0.12114317715167999, "rewards/margins": 0.06196044012904167, "rewards/rejected": -0.18310362100601196, "step": 1999 }, { "epoch": 1.220070154033857, "grad_norm": 1.2784349918365479, "learning_rate": 6.069810165339865e-06, "log_odds_chosen": 0.5729721784591675, "log_odds_ratio": -0.6826573610305786, "logits/chosen": -0.7254363298416138, "logits/rejected": -0.8405987024307251, "logps/chosen": -0.7804351449012756, "logps/rejected": -1.1058731079101562, "loss": 1.0463, "nll_loss": 0.9697393178939819, "rewards/accuracies": 0.625, "rewards/chosen": -0.07804352045059204, "rewards/margins": 0.032543785870075226, "rewards/rejected": -0.11058729887008667, "step": 2000 }, { "epoch": 1.2206801891108738, "grad_norm": 1.3833919763565063, "learning_rate": 6.068830373545621e-06, "log_odds_chosen": 1.4793826341629028, "log_odds_ratio": -0.4056032598018646, "logits/chosen": -0.9415978193283081, "logits/rejected": -0.9285787343978882, "logps/chosen": -0.7456154823303223, "logps/rejected": -1.8820855617523193, "loss": 0.979, "nll_loss": 0.9659154415130615, "rewards/accuracies": 0.875, "rewards/chosen": -0.07456155121326447, "rewards/margins": 0.11364700645208359, "rewards/rejected": -0.18820856511592865, "step": 2001 }, { "epoch": 1.2212902241878907, "grad_norm": 1.2049596309661865, "learning_rate": 6.067850581751378e-06, "log_odds_chosen": 1.439297080039978, "log_odds_ratio": -0.3877798020839691, "logits/chosen": -0.8407096862792969, "logits/rejected": -0.9260165691375732, "logps/chosen": -0.5827820301055908, "logps/rejected": -1.5832301378250122, "loss": 0.9522, "nll_loss": 0.7399693727493286, "rewards/accuracies": 0.875, "rewards/chosen": -0.05827820301055908, "rewards/margins": 0.10004481673240662, "rewards/rejected": -0.1583230197429657, "step": 2002 }, { "epoch": 1.2219002592649078, "grad_norm": 1.9452033042907715, "learning_rate": 6.066870789957134e-06, "log_odds_chosen": 0.7574986815452576, "log_odds_ratio": -0.5344687700271606, "logits/chosen": -0.8404548168182373, "logits/rejected": -0.6851488351821899, "logps/chosen": -0.8670209646224976, "logps/rejected": -1.2371373176574707, "loss": 1.0535, "nll_loss": 1.0106418132781982, "rewards/accuracies": 0.75, "rewards/chosen": -0.08670210093259811, "rewards/margins": 0.03701164200901985, "rewards/rejected": -0.12371373921632767, "step": 2003 }, { "epoch": 1.2225102943419246, "grad_norm": 2.1266963481903076, "learning_rate": 6.06589099816289e-06, "log_odds_chosen": 2.255779266357422, "log_odds_ratio": -0.5448325872421265, "logits/chosen": -0.7900258898735046, "logits/rejected": -0.665224552154541, "logps/chosen": -0.7569361925125122, "logps/rejected": -2.5767712593078613, "loss": 1.0389, "nll_loss": 0.9183056354522705, "rewards/accuracies": 0.5, "rewards/chosen": -0.07569362223148346, "rewards/margins": 0.18198353052139282, "rewards/rejected": -0.2576771378517151, "step": 2004 }, { "epoch": 1.2231203294189417, "grad_norm": 2.0051732063293457, "learning_rate": 6.064911206368647e-06, "log_odds_chosen": 1.2197446823120117, "log_odds_ratio": -0.561829149723053, "logits/chosen": -1.0515352487564087, "logits/rejected": -1.1694148778915405, "logps/chosen": -0.9432823657989502, "logps/rejected": -1.991100788116455, "loss": 1.0662, "nll_loss": 1.1159709692001343, "rewards/accuracies": 0.5, "rewards/chosen": -0.09432824701070786, "rewards/margins": 0.1047818586230278, "rewards/rejected": -0.19911009073257446, "step": 2005 }, { "epoch": 1.2237303644959585, "grad_norm": 1.4381464719772339, "learning_rate": 6.063931414574402e-06, "log_odds_chosen": 1.1736397743225098, "log_odds_ratio": -0.43799686431884766, "logits/chosen": -1.0165109634399414, "logits/rejected": -0.9879999160766602, "logps/chosen": -0.950040340423584, "logps/rejected": -1.7800425291061401, "loss": 1.0835, "nll_loss": 1.2018221616744995, "rewards/accuracies": 0.75, "rewards/chosen": -0.09500403702259064, "rewards/margins": 0.08300022780895233, "rewards/rejected": -0.17800426483154297, "step": 2006 }, { "epoch": 1.2243403995729754, "grad_norm": 1.7996574640274048, "learning_rate": 6.062951622780158e-06, "log_odds_chosen": 0.29023122787475586, "log_odds_ratio": -0.6510745286941528, "logits/chosen": -0.7891322374343872, "logits/rejected": -0.8072277307510376, "logps/chosen": -0.8441135883331299, "logps/rejected": -0.9862062335014343, "loss": 1.032, "nll_loss": 0.9094617366790771, "rewards/accuracies": 0.625, "rewards/chosen": -0.08441136032342911, "rewards/margins": 0.014209260232746601, "rewards/rejected": -0.09862062335014343, "step": 2007 }, { "epoch": 1.2249504346499924, "grad_norm": 1.075457215309143, "learning_rate": 6.061971830985915e-06, "log_odds_chosen": 0.7176972031593323, "log_odds_ratio": -0.5590854287147522, "logits/chosen": -1.080752968788147, "logits/rejected": -1.1324589252471924, "logps/chosen": -0.8732731938362122, "logps/rejected": -1.374786138534546, "loss": 1.0393, "nll_loss": 1.0359772443771362, "rewards/accuracies": 0.75, "rewards/chosen": -0.08732732385396957, "rewards/margins": 0.05015129595994949, "rewards/rejected": -0.13747861981391907, "step": 2008 }, { "epoch": 1.2255604697270093, "grad_norm": 1.0246561765670776, "learning_rate": 6.060992039191671e-06, "log_odds_chosen": 0.9824770092964172, "log_odds_ratio": -0.591830849647522, "logits/chosen": -1.0230995416641235, "logits/rejected": -1.1644287109375, "logps/chosen": -0.8316381573677063, "logps/rejected": -1.4934649467468262, "loss": 1.1478, "nll_loss": 1.0575640201568604, "rewards/accuracies": 0.625, "rewards/chosen": -0.08316382020711899, "rewards/margins": 0.0661826804280281, "rewards/rejected": -0.1493465006351471, "step": 2009 }, { "epoch": 1.2261705048040263, "grad_norm": 3.7148118019104004, "learning_rate": 6.060012247397428e-06, "log_odds_chosen": 1.217512845993042, "log_odds_ratio": -0.5502748489379883, "logits/chosen": -0.7172484993934631, "logits/rejected": -0.8645513653755188, "logps/chosen": -0.808618426322937, "logps/rejected": -1.735656976699829, "loss": 1.1255, "nll_loss": 0.9697450995445251, "rewards/accuracies": 0.75, "rewards/chosen": -0.08086185157299042, "rewards/margins": 0.09270384907722473, "rewards/rejected": -0.17356570065021515, "step": 2010 }, { "epoch": 1.2267805398810432, "grad_norm": 2.3249423503875732, "learning_rate": 6.0590324556031845e-06, "log_odds_chosen": 0.27644240856170654, "log_odds_ratio": -0.6641644835472107, "logits/chosen": -1.072085976600647, "logits/rejected": -0.9373941421508789, "logps/chosen": -1.220359444618225, "logps/rejected": -1.4434914588928223, "loss": 1.2465, "nll_loss": 1.2764475345611572, "rewards/accuracies": 0.375, "rewards/chosen": -0.12203594297170639, "rewards/margins": 0.022313207387924194, "rewards/rejected": -0.14434914290905, "step": 2011 }, { "epoch": 1.22739057495806, "grad_norm": 1.3708466291427612, "learning_rate": 6.05805266380894e-06, "log_odds_chosen": 1.38519287109375, "log_odds_ratio": -0.39529192447662354, "logits/chosen": -1.0164048671722412, "logits/rejected": -1.0125726461410522, "logps/chosen": -0.7501886487007141, "logps/rejected": -1.6237852573394775, "loss": 1.1073, "nll_loss": 1.0607426166534424, "rewards/accuracies": 0.75, "rewards/chosen": -0.07501885294914246, "rewards/margins": 0.08735965937376022, "rewards/rejected": -0.16237851977348328, "step": 2012 }, { "epoch": 1.228000610035077, "grad_norm": 1.3449592590332031, "learning_rate": 6.057072872014697e-06, "log_odds_chosen": 0.8465437889099121, "log_odds_ratio": -0.45069587230682373, "logits/chosen": -1.0593262910842896, "logits/rejected": -1.0114086866378784, "logps/chosen": -0.922416627407074, "logps/rejected": -1.4813973903656006, "loss": 1.2144, "nll_loss": 1.5667392015457153, "rewards/accuracies": 0.75, "rewards/chosen": -0.09224165976047516, "rewards/margins": 0.05589807406067848, "rewards/rejected": -0.14813974499702454, "step": 2013 }, { "epoch": 1.228610645112094, "grad_norm": 1.5192549228668213, "learning_rate": 6.056093080220453e-06, "log_odds_chosen": 2.4017601013183594, "log_odds_ratio": -0.25060349702835083, "logits/chosen": -1.0773407220840454, "logits/rejected": -1.1036951541900635, "logps/chosen": -0.49304550886154175, "logps/rejected": -1.9952489137649536, "loss": 1.0804, "nll_loss": 1.3408714532852173, "rewards/accuracies": 0.875, "rewards/chosen": -0.049304548650979996, "rewards/margins": 0.1502203494310379, "rewards/rejected": -0.1995249092578888, "step": 2014 }, { "epoch": 1.2292206801891108, "grad_norm": 1.0556950569152832, "learning_rate": 6.055113288426209e-06, "log_odds_chosen": 0.23886877298355103, "log_odds_ratio": -0.6771417260169983, "logits/chosen": -0.9571503400802612, "logits/rejected": -0.944849967956543, "logps/chosen": -0.818652331829071, "logps/rejected": -0.9738494157791138, "loss": 1.089, "nll_loss": 0.9592676758766174, "rewards/accuracies": 0.5, "rewards/chosen": -0.08186523616313934, "rewards/margins": 0.0155197037383914, "rewards/rejected": -0.09738494455814362, "step": 2015 }, { "epoch": 1.2298307152661279, "grad_norm": 1.854976773262024, "learning_rate": 6.054133496631966e-06, "log_odds_chosen": 0.8028749227523804, "log_odds_ratio": -0.6784875988960266, "logits/chosen": -1.02232825756073, "logits/rejected": -1.148437738418579, "logps/chosen": -0.971570611000061, "logps/rejected": -1.5520763397216797, "loss": 0.9794, "nll_loss": 1.0617103576660156, "rewards/accuracies": 0.375, "rewards/chosen": -0.0971570611000061, "rewards/margins": 0.05805055797100067, "rewards/rejected": -0.15520761907100677, "step": 2016 }, { "epoch": 1.2304407503431447, "grad_norm": 1.0857125520706177, "learning_rate": 6.053153704837722e-06, "log_odds_chosen": 1.99190092086792, "log_odds_ratio": -0.358365535736084, "logits/chosen": -0.8984676599502563, "logits/rejected": -1.092151403427124, "logps/chosen": -0.6300029158592224, "logps/rejected": -2.0488014221191406, "loss": 0.9053, "nll_loss": 0.7997983694076538, "rewards/accuracies": 0.75, "rewards/chosen": -0.06300029158592224, "rewards/margins": 0.1418798565864563, "rewards/rejected": -0.20488014817237854, "step": 2017 }, { "epoch": 1.2310507854201616, "grad_norm": 1.429601788520813, "learning_rate": 6.052173913043477e-06, "log_odds_chosen": 1.5349383354187012, "log_odds_ratio": -0.35175949335098267, "logits/chosen": -1.188020944595337, "logits/rejected": -1.0479471683502197, "logps/chosen": -0.8560179471969604, "logps/rejected": -1.8800208568572998, "loss": 0.99, "nll_loss": 1.1750178337097168, "rewards/accuracies": 0.875, "rewards/chosen": -0.0856017917394638, "rewards/margins": 0.1024002879858017, "rewards/rejected": -0.1880020797252655, "step": 2018 }, { "epoch": 1.2316608204971786, "grad_norm": 1.6561596393585205, "learning_rate": 6.051194121249234e-06, "log_odds_chosen": 0.0116143599152565, "log_odds_ratio": -0.7544088363647461, "logits/chosen": -0.9653552770614624, "logits/rejected": -0.9102216958999634, "logps/chosen": -1.0116480588912964, "logps/rejected": -0.9995798468589783, "loss": 1.1164, "nll_loss": 1.352159857749939, "rewards/accuracies": 0.5, "rewards/chosen": -0.1011648178100586, "rewards/margins": -0.001206832006573677, "rewards/rejected": -0.09995798766613007, "step": 2019 }, { "epoch": 1.2322708555741955, "grad_norm": 1.5614758729934692, "learning_rate": 6.05021432945499e-06, "log_odds_chosen": 1.3749561309814453, "log_odds_ratio": -0.42593204975128174, "logits/chosen": -0.9321553111076355, "logits/rejected": -1.0697524547576904, "logps/chosen": -0.8124918937683105, "logps/rejected": -1.839154601097107, "loss": 1.1294, "nll_loss": 0.9973814487457275, "rewards/accuracies": 0.875, "rewards/chosen": -0.0812491923570633, "rewards/margins": 0.10266625881195068, "rewards/rejected": -0.18391543626785278, "step": 2020 }, { "epoch": 1.2328808906512125, "grad_norm": 0.7436054348945618, "learning_rate": 6.049234537660747e-06, "log_odds_chosen": 1.0166516304016113, "log_odds_ratio": -0.5231209397315979, "logits/chosen": -0.9135211706161499, "logits/rejected": -1.019054889678955, "logps/chosen": -0.8197060823440552, "logps/rejected": -1.4277383089065552, "loss": 1.1294, "nll_loss": 0.9194108247756958, "rewards/accuracies": 0.625, "rewards/chosen": -0.08197060972452164, "rewards/margins": 0.06080322712659836, "rewards/rejected": -0.14277383685112, "step": 2021 }, { "epoch": 1.2334909257282294, "grad_norm": 1.6169167757034302, "learning_rate": 6.0482547458665035e-06, "log_odds_chosen": 1.0032955408096313, "log_odds_ratio": -0.4732262194156647, "logits/chosen": -0.7839292287826538, "logits/rejected": -0.7498821020126343, "logps/chosen": -0.7342349290847778, "logps/rejected": -1.3611477613449097, "loss": 1.0992, "nll_loss": 0.9263759255409241, "rewards/accuracies": 0.75, "rewards/chosen": -0.07342348992824554, "rewards/margins": 0.06269128620624542, "rewards/rejected": -0.13611477613449097, "step": 2022 }, { "epoch": 1.2341009608052462, "grad_norm": 1.3892086744308472, "learning_rate": 6.047274954072259e-06, "log_odds_chosen": 1.00031316280365, "log_odds_ratio": -0.4922369718551636, "logits/chosen": -1.0183110237121582, "logits/rejected": -1.0371116399765015, "logps/chosen": -0.7702465057373047, "logps/rejected": -1.4266419410705566, "loss": 1.089, "nll_loss": 1.0729738473892212, "rewards/accuracies": 0.625, "rewards/chosen": -0.07702465355396271, "rewards/margins": 0.06563953310251236, "rewards/rejected": -0.14266419410705566, "step": 2023 }, { "epoch": 1.2347109958822633, "grad_norm": 1.1087552309036255, "learning_rate": 6.046295162278016e-06, "log_odds_chosen": 0.7773559093475342, "log_odds_ratio": -0.50531005859375, "logits/chosen": -1.1297886371612549, "logits/rejected": -1.0721256732940674, "logps/chosen": -0.9495313167572021, "logps/rejected": -1.5191631317138672, "loss": 1.0535, "nll_loss": 1.2392077445983887, "rewards/accuracies": 0.625, "rewards/chosen": -0.09495313465595245, "rewards/margins": 0.05696318671107292, "rewards/rejected": -0.15191632509231567, "step": 2024 }, { "epoch": 1.2353210309592801, "grad_norm": 1.1328095197677612, "learning_rate": 6.045315370483772e-06, "log_odds_chosen": 1.2996426820755005, "log_odds_ratio": -0.5340493321418762, "logits/chosen": -1.018800973892212, "logits/rejected": -0.997709333896637, "logps/chosen": -0.9935813546180725, "logps/rejected": -1.95924973487854, "loss": 1.2322, "nll_loss": 1.1805591583251953, "rewards/accuracies": 0.5, "rewards/chosen": -0.09935814142227173, "rewards/margins": 0.09656684100627899, "rewards/rejected": -0.19592498242855072, "step": 2025 }, { "epoch": 1.2359310660362972, "grad_norm": 2.5616567134857178, "learning_rate": 6.044335578689528e-06, "log_odds_chosen": 1.1040995121002197, "log_odds_ratio": -0.5022065043449402, "logits/chosen": -0.9784831404685974, "logits/rejected": -1.146684169769287, "logps/chosen": -0.9708237648010254, "logps/rejected": -1.7177516222000122, "loss": 1.2551, "nll_loss": 1.1041646003723145, "rewards/accuracies": 0.75, "rewards/chosen": -0.09708236902952194, "rewards/margins": 0.07469280064105988, "rewards/rejected": -0.17177516222000122, "step": 2026 }, { "epoch": 1.236541101113314, "grad_norm": 2.8249523639678955, "learning_rate": 6.043355786895285e-06, "log_odds_chosen": 0.9966164231300354, "log_odds_ratio": -0.44519561529159546, "logits/chosen": -0.9721822738647461, "logits/rejected": -1.0200843811035156, "logps/chosen": -0.8199412822723389, "logps/rejected": -1.538619041442871, "loss": 1.0656, "nll_loss": 1.0816339254379272, "rewards/accuracies": 0.75, "rewards/chosen": -0.08199412375688553, "rewards/margins": 0.07186777889728546, "rewards/rejected": -0.1538619101047516, "step": 2027 }, { "epoch": 1.237151136190331, "grad_norm": 3.6082074642181396, "learning_rate": 6.042375995101041e-06, "log_odds_chosen": 1.06858491897583, "log_odds_ratio": -0.6706027984619141, "logits/chosen": -1.1915721893310547, "logits/rejected": -1.0193004608154297, "logps/chosen": -1.0711849927902222, "logps/rejected": -2.0455501079559326, "loss": 1.2456, "nll_loss": 1.150874376296997, "rewards/accuracies": 0.5, "rewards/chosen": -0.10711848735809326, "rewards/margins": 0.09743651747703552, "rewards/rejected": -0.20455500483512878, "step": 2028 }, { "epoch": 1.2377611712673477, "grad_norm": 2.324622392654419, "learning_rate": 6.041396203306796e-06, "log_odds_chosen": 0.7589336633682251, "log_odds_ratio": -0.45996537804603577, "logits/chosen": -0.5702388882637024, "logits/rejected": -0.8827179670333862, "logps/chosen": -0.7335893511772156, "logps/rejected": -1.1420648097991943, "loss": 1.0781, "nll_loss": 0.9716370105743408, "rewards/accuracies": 0.875, "rewards/chosen": -0.0733589380979538, "rewards/margins": 0.0408475361764431, "rewards/rejected": -0.1142064779996872, "step": 2029 }, { "epoch": 1.2383712063443648, "grad_norm": 1.418982744216919, "learning_rate": 6.040416411512553e-06, "log_odds_chosen": 1.1713300943374634, "log_odds_ratio": -0.443336546421051, "logits/chosen": -1.1192448139190674, "logits/rejected": -1.2352763414382935, "logps/chosen": -0.8768590688705444, "logps/rejected": -1.6719932556152344, "loss": 1.2829, "nll_loss": 1.2870142459869385, "rewards/accuracies": 0.75, "rewards/chosen": -0.08768590539693832, "rewards/margins": 0.07951343059539795, "rewards/rejected": -0.16719934344291687, "step": 2030 }, { "epoch": 1.2389812414213817, "grad_norm": 1.8006644248962402, "learning_rate": 6.0394366197183095e-06, "log_odds_chosen": 1.040906548500061, "log_odds_ratio": -0.39652198553085327, "logits/chosen": -1.0319616794586182, "logits/rejected": -1.1192599534988403, "logps/chosen": -0.9779373407363892, "logps/rejected": -1.7581188678741455, "loss": 1.1815, "nll_loss": 1.1313738822937012, "rewards/accuracies": 0.75, "rewards/chosen": -0.09779374301433563, "rewards/margins": 0.07801815867424011, "rewards/rejected": -0.17581188678741455, "step": 2031 }, { "epoch": 1.2395912764983987, "grad_norm": 2.5864908695220947, "learning_rate": 6.038456827924066e-06, "log_odds_chosen": 1.738217830657959, "log_odds_ratio": -0.38418638706207275, "logits/chosen": -0.8722362518310547, "logits/rejected": -0.9439036250114441, "logps/chosen": -0.6234151124954224, "logps/rejected": -1.9114880561828613, "loss": 0.9693, "nll_loss": 0.9020284414291382, "rewards/accuracies": 0.875, "rewards/chosen": -0.062341511249542236, "rewards/margins": 0.12880730628967285, "rewards/rejected": -0.1911488175392151, "step": 2032 }, { "epoch": 1.2402013115754156, "grad_norm": 1.1686875820159912, "learning_rate": 6.0374770361298225e-06, "log_odds_chosen": 2.3565802574157715, "log_odds_ratio": -0.30094534158706665, "logits/chosen": -0.42663225531578064, "logits/rejected": -0.7689417600631714, "logps/chosen": -0.5671711564064026, "logps/rejected": -2.1668741703033447, "loss": 0.8848, "nll_loss": 0.5814156532287598, "rewards/accuracies": 0.75, "rewards/chosen": -0.05671711638569832, "rewards/margins": 0.15997029840946198, "rewards/rejected": -0.21668741106987, "step": 2033 }, { "epoch": 1.2408113466524324, "grad_norm": 1.0635555982589722, "learning_rate": 6.036497244335579e-06, "log_odds_chosen": 0.6297667026519775, "log_odds_ratio": -0.5185743570327759, "logits/chosen": -1.0059754848480225, "logits/rejected": -1.109569787979126, "logps/chosen": -0.6377572417259216, "logps/rejected": -0.9970512390136719, "loss": 1.1139, "nll_loss": 0.7994505763053894, "rewards/accuracies": 0.75, "rewards/chosen": -0.06377572566270828, "rewards/margins": 0.03592940419912338, "rewards/rejected": -0.09970512986183167, "step": 2034 }, { "epoch": 1.2414213817294495, "grad_norm": 1.4956345558166504, "learning_rate": 6.035517452541335e-06, "log_odds_chosen": 1.4774326086044312, "log_odds_ratio": -0.3818090260028839, "logits/chosen": -1.157140851020813, "logits/rejected": -1.0668315887451172, "logps/chosen": -0.9973210096359253, "logps/rejected": -2.179655075073242, "loss": 1.0572, "nll_loss": 1.191072702407837, "rewards/accuracies": 0.75, "rewards/chosen": -0.09973210096359253, "rewards/margins": 0.11823340505361557, "rewards/rejected": -0.2179655134677887, "step": 2035 }, { "epoch": 1.2420314168064663, "grad_norm": 1.8796788454055786, "learning_rate": 6.034537660747091e-06, "log_odds_chosen": 1.5004749298095703, "log_odds_ratio": -0.39909031987190247, "logits/chosen": -0.7134904861450195, "logits/rejected": -1.0165919065475464, "logps/chosen": -0.6150287389755249, "logps/rejected": -1.5943880081176758, "loss": 0.9822, "nll_loss": 0.7921222448348999, "rewards/accuracies": 0.875, "rewards/chosen": -0.06150287389755249, "rewards/margins": 0.09793592244386673, "rewards/rejected": -0.15943878889083862, "step": 2036 }, { "epoch": 1.2426414518834834, "grad_norm": 1.8247878551483154, "learning_rate": 6.033557868952847e-06, "log_odds_chosen": 1.636528730392456, "log_odds_ratio": -0.5168719291687012, "logits/chosen": -0.9368768930435181, "logits/rejected": -1.1065226793289185, "logps/chosen": -0.8402308225631714, "logps/rejected": -2.0243587493896484, "loss": 1.0797, "nll_loss": 1.0119872093200684, "rewards/accuracies": 0.625, "rewards/chosen": -0.08402308076620102, "rewards/margins": 0.1184128075838089, "rewards/rejected": -0.20243588089942932, "step": 2037 }, { "epoch": 1.2432514869605003, "grad_norm": 5.535706520080566, "learning_rate": 6.032578077158604e-06, "log_odds_chosen": 0.6361961364746094, "log_odds_ratio": -0.6222307682037354, "logits/chosen": -0.9592744708061218, "logits/rejected": -0.9925606846809387, "logps/chosen": -0.9758633375167847, "logps/rejected": -1.4227443933486938, "loss": 1.0954, "nll_loss": 1.0764540433883667, "rewards/accuracies": 0.5, "rewards/chosen": -0.09758632630109787, "rewards/margins": 0.044688109308481216, "rewards/rejected": -0.14227445423603058, "step": 2038 }, { "epoch": 1.243861522037517, "grad_norm": 1.5177301168441772, "learning_rate": 6.03159828536436e-06, "log_odds_chosen": 0.24432501196861267, "log_odds_ratio": -0.7833023071289062, "logits/chosen": -0.9547308087348938, "logits/rejected": -0.9884546399116516, "logps/chosen": -0.9330512285232544, "logps/rejected": -1.0744999647140503, "loss": 1.1452, "nll_loss": 1.0346509218215942, "rewards/accuracies": 0.625, "rewards/chosen": -0.09330512583255768, "rewards/margins": 0.01414487510919571, "rewards/rejected": -0.10745000839233398, "step": 2039 }, { "epoch": 1.2444715571145342, "grad_norm": 3.0442054271698, "learning_rate": 6.030618493570115e-06, "log_odds_chosen": 2.111067295074463, "log_odds_ratio": -0.5680040121078491, "logits/chosen": -0.9100465178489685, "logits/rejected": -1.0464781522750854, "logps/chosen": -0.7028497457504272, "logps/rejected": -2.4723849296569824, "loss": 1.1493, "nll_loss": 1.113272786140442, "rewards/accuracies": 0.5, "rewards/chosen": -0.07028498500585556, "rewards/margins": 0.1769535094499588, "rewards/rejected": -0.24723848700523376, "step": 2040 }, { "epoch": 1.245081592191551, "grad_norm": 1.5687869787216187, "learning_rate": 6.029638701775872e-06, "log_odds_chosen": 1.3629252910614014, "log_odds_ratio": -0.46321722865104675, "logits/chosen": -0.8525218963623047, "logits/rejected": -0.8659440875053406, "logps/chosen": -0.752292275428772, "logps/rejected": -1.6797571182250977, "loss": 1.0526, "nll_loss": 0.8319898843765259, "rewards/accuracies": 0.75, "rewards/chosen": -0.0752292275428772, "rewards/margins": 0.09274648129940033, "rewards/rejected": -0.16797570884227753, "step": 2041 }, { "epoch": 1.2456916272685679, "grad_norm": 3.7987871170043945, "learning_rate": 6.0286589099816285e-06, "log_odds_chosen": 1.4845752716064453, "log_odds_ratio": -0.5339809656143188, "logits/chosen": -0.9915978908538818, "logits/rejected": -1.1068865060806274, "logps/chosen": -0.9165202975273132, "logps/rejected": -2.0161476135253906, "loss": 1.1458, "nll_loss": 1.272807240486145, "rewards/accuracies": 0.625, "rewards/chosen": -0.0916520357131958, "rewards/margins": 0.10996272414922714, "rewards/rejected": -0.20161475241184235, "step": 2042 }, { "epoch": 1.246301662345585, "grad_norm": 2.791027784347534, "learning_rate": 6.027679118187385e-06, "log_odds_chosen": 0.9262441396713257, "log_odds_ratio": -0.6570484638214111, "logits/chosen": -1.0040128231048584, "logits/rejected": -1.0233279466629028, "logps/chosen": -0.9379569888114929, "logps/rejected": -1.723207712173462, "loss": 1.0977, "nll_loss": 1.1133153438568115, "rewards/accuracies": 0.5, "rewards/chosen": -0.09379570186138153, "rewards/margins": 0.07852508127689362, "rewards/rejected": -0.17232078313827515, "step": 2043 }, { "epoch": 1.2469116974226018, "grad_norm": 7.338433265686035, "learning_rate": 6.026699326393142e-06, "log_odds_chosen": 0.03527463600039482, "log_odds_ratio": -0.7104960680007935, "logits/chosen": -1.1338306665420532, "logits/rejected": -1.0275015830993652, "logps/chosen": -0.8889368772506714, "logps/rejected": -0.8375908732414246, "loss": 1.3449, "nll_loss": 1.2731258869171143, "rewards/accuracies": 0.375, "rewards/chosen": -0.08889368921518326, "rewards/margins": -0.00513459974899888, "rewards/rejected": -0.08375908434391022, "step": 2044 }, { "epoch": 1.2475217324996186, "grad_norm": 2.588207721710205, "learning_rate": 6.025719534598898e-06, "log_odds_chosen": 1.0724986791610718, "log_odds_ratio": -0.4228265583515167, "logits/chosen": -0.9325292706489563, "logits/rejected": -0.9069384336471558, "logps/chosen": -0.723293125629425, "logps/rejected": -1.4321343898773193, "loss": 1.1007, "nll_loss": 1.112460970878601, "rewards/accuracies": 0.625, "rewards/chosen": -0.0723293125629425, "rewards/margins": 0.07088412344455719, "rewards/rejected": -0.1432134360074997, "step": 2045 }, { "epoch": 1.2481317675766357, "grad_norm": 1.6810072660446167, "learning_rate": 6.024739742804653e-06, "log_odds_chosen": 1.687903881072998, "log_odds_ratio": -0.3719104528427124, "logits/chosen": -0.8147554993629456, "logits/rejected": -0.993996798992157, "logps/chosen": -0.5864373445510864, "logps/rejected": -1.6632726192474365, "loss": 1.0753, "nll_loss": 0.8400248885154724, "rewards/accuracies": 0.75, "rewards/chosen": -0.058643732219934464, "rewards/margins": 0.10768353193998337, "rewards/rejected": -0.16632726788520813, "step": 2046 }, { "epoch": 1.2487418026536525, "grad_norm": 1.4892091751098633, "learning_rate": 6.02375995101041e-06, "log_odds_chosen": 2.464613199234009, "log_odds_ratio": -0.5287538170814514, "logits/chosen": -0.7962495684623718, "logits/rejected": -0.8971015214920044, "logps/chosen": -0.741214394569397, "logps/rejected": -2.7164862155914307, "loss": 0.9511, "nll_loss": 0.8808614015579224, "rewards/accuracies": 0.625, "rewards/chosen": -0.07412143051624298, "rewards/margins": 0.197527214884758, "rewards/rejected": -0.271648645401001, "step": 2047 }, { "epoch": 1.2493518377306696, "grad_norm": 2.2736563682556152, "learning_rate": 6.022780159216166e-06, "log_odds_chosen": 0.871261477470398, "log_odds_ratio": -0.4984646439552307, "logits/chosen": -0.9047053456306458, "logits/rejected": -0.9265580177307129, "logps/chosen": -0.9132497310638428, "logps/rejected": -1.5003271102905273, "loss": 1.2407, "nll_loss": 1.1826586723327637, "rewards/accuracies": 0.75, "rewards/chosen": -0.09132497012615204, "rewards/margins": 0.058707743883132935, "rewards/rejected": -0.15003272891044617, "step": 2048 }, { "epoch": 1.2499618728076864, "grad_norm": 1.1820698976516724, "learning_rate": 6.021800367421923e-06, "log_odds_chosen": 1.5847333669662476, "log_odds_ratio": -0.4003012478351593, "logits/chosen": -0.8815160393714905, "logits/rejected": -1.0720101594924927, "logps/chosen": -0.850773811340332, "logps/rejected": -2.0508782863616943, "loss": 1.06, "nll_loss": 0.9783060550689697, "rewards/accuracies": 0.875, "rewards/chosen": -0.08507739007472992, "rewards/margins": 0.12001043558120728, "rewards/rejected": -0.2050878405570984, "step": 2049 }, { "epoch": 1.2505719078847033, "grad_norm": 2.8314266204833984, "learning_rate": 6.020820575627679e-06, "log_odds_chosen": 1.695389747619629, "log_odds_ratio": -0.2859249711036682, "logits/chosen": -0.883865475654602, "logits/rejected": -0.937522828578949, "logps/chosen": -0.6293734312057495, "logps/rejected": -1.7699062824249268, "loss": 1.1052, "nll_loss": 1.0142648220062256, "rewards/accuracies": 1.0, "rewards/chosen": -0.06293734163045883, "rewards/margins": 0.11405330151319504, "rewards/rejected": -0.17699064314365387, "step": 2050 }, { "epoch": 1.2511819429617204, "grad_norm": 1.2523897886276245, "learning_rate": 6.019840783833435e-06, "log_odds_chosen": 1.2889264822006226, "log_odds_ratio": -0.5086396336555481, "logits/chosen": -0.806175947189331, "logits/rejected": -0.9862149953842163, "logps/chosen": -0.7886307239532471, "logps/rejected": -1.7447869777679443, "loss": 1.1215, "nll_loss": 0.9258155822753906, "rewards/accuracies": 0.625, "rewards/chosen": -0.07886307686567307, "rewards/margins": 0.09561561048030853, "rewards/rejected": -0.1744786947965622, "step": 2051 }, { "epoch": 1.2517919780387372, "grad_norm": 1.6262078285217285, "learning_rate": 6.018860992039191e-06, "log_odds_chosen": 1.0734115839004517, "log_odds_ratio": -0.46804070472717285, "logits/chosen": -0.9476578235626221, "logits/rejected": -0.9854532480239868, "logps/chosen": -0.7881143093109131, "logps/rejected": -1.4752804040908813, "loss": 0.9448, "nll_loss": 0.931925892829895, "rewards/accuracies": 0.75, "rewards/chosen": -0.07881143689155579, "rewards/margins": 0.0687166079878807, "rewards/rejected": -0.1475280523300171, "step": 2052 }, { "epoch": 1.2524020131157543, "grad_norm": 1.3586331605911255, "learning_rate": 6.0178812002449475e-06, "log_odds_chosen": 0.13205735385417938, "log_odds_ratio": -0.6997319459915161, "logits/chosen": -1.1726489067077637, "logits/rejected": -1.137652039527893, "logps/chosen": -1.1465784311294556, "logps/rejected": -1.2213866710662842, "loss": 1.2307, "nll_loss": 1.386872410774231, "rewards/accuracies": 0.625, "rewards/chosen": -0.11465784907341003, "rewards/margins": 0.007480831351131201, "rewards/rejected": -0.12213867902755737, "step": 2053 }, { "epoch": 1.2530120481927711, "grad_norm": 2.346440315246582, "learning_rate": 6.016901408450704e-06, "log_odds_chosen": 1.7751808166503906, "log_odds_ratio": -0.5057436227798462, "logits/chosen": -0.7276626825332642, "logits/rejected": -0.9954484701156616, "logps/chosen": -0.6109965443611145, "logps/rejected": -1.8242605924606323, "loss": 1.1063, "nll_loss": 0.8298936486244202, "rewards/accuracies": 0.75, "rewards/chosen": -0.06109965592622757, "rewards/margins": 0.12132640928030014, "rewards/rejected": -0.1824260652065277, "step": 2054 }, { "epoch": 1.253622083269788, "grad_norm": 2.2260782718658447, "learning_rate": 6.015921616656461e-06, "log_odds_chosen": 0.7232345342636108, "log_odds_ratio": -0.5158937573432922, "logits/chosen": -1.121761679649353, "logits/rejected": -1.1518723964691162, "logps/chosen": -0.8795366287231445, "logps/rejected": -1.3991544246673584, "loss": 1.1546, "nll_loss": 1.1786086559295654, "rewards/accuracies": 0.75, "rewards/chosen": -0.08795366436243057, "rewards/margins": 0.05196176841855049, "rewards/rejected": -0.13991543650627136, "step": 2055 }, { "epoch": 1.2542321183468048, "grad_norm": 2.191880464553833, "learning_rate": 6.014941824862217e-06, "log_odds_chosen": 0.7609747648239136, "log_odds_ratio": -0.500619649887085, "logits/chosen": -0.7312521934509277, "logits/rejected": -0.8460234999656677, "logps/chosen": -0.7980797290802002, "logps/rejected": -1.248160481452942, "loss": 1.0475, "nll_loss": 1.0389797687530518, "rewards/accuracies": 0.75, "rewards/chosen": -0.07980796694755554, "rewards/margins": 0.04500808194279671, "rewards/rejected": -0.12481605261564255, "step": 2056 }, { "epoch": 1.2548421534238219, "grad_norm": 3.371450662612915, "learning_rate": 6.013962033067972e-06, "log_odds_chosen": 1.1910265684127808, "log_odds_ratio": -0.3888685703277588, "logits/chosen": -1.2680190801620483, "logits/rejected": -1.3737528324127197, "logps/chosen": -0.913715124130249, "logps/rejected": -1.6999289989471436, "loss": 1.3242, "nll_loss": 1.3749058246612549, "rewards/accuracies": 0.875, "rewards/chosen": -0.09137151390314102, "rewards/margins": 0.07862138748168945, "rewards/rejected": -0.16999289393424988, "step": 2057 }, { "epoch": 1.2554521885008387, "grad_norm": 4.5083417892456055, "learning_rate": 6.012982241273729e-06, "log_odds_chosen": 0.3550640046596527, "log_odds_ratio": -0.6487560272216797, "logits/chosen": -0.9030584096908569, "logits/rejected": -1.0298460721969604, "logps/chosen": -1.0829604864120483, "logps/rejected": -1.3353345394134521, "loss": 1.2106, "nll_loss": 1.1396809816360474, "rewards/accuracies": 0.5, "rewards/chosen": -0.10829605162143707, "rewards/margins": 0.02523741126060486, "rewards/rejected": -0.13353346288204193, "step": 2058 }, { "epoch": 1.2560622235778558, "grad_norm": 1.0828999280929565, "learning_rate": 6.012002449479485e-06, "log_odds_chosen": 1.832033634185791, "log_odds_ratio": -0.41862109303474426, "logits/chosen": -1.029413104057312, "logits/rejected": -1.0302767753601074, "logps/chosen": -0.9754337072372437, "logps/rejected": -2.6102185249328613, "loss": 1.0619, "nll_loss": 1.079393982887268, "rewards/accuracies": 0.75, "rewards/chosen": -0.09754336625337601, "rewards/margins": 0.16347849369049072, "rewards/rejected": -0.26102185249328613, "step": 2059 }, { "epoch": 1.2566722586548726, "grad_norm": 1.7301603555679321, "learning_rate": 6.011022657685241e-06, "log_odds_chosen": 1.5516986846923828, "log_odds_ratio": -0.4842934012413025, "logits/chosen": -0.9497770071029663, "logits/rejected": -0.8799492120742798, "logps/chosen": -0.7111964225769043, "logps/rejected": -1.8830795288085938, "loss": 0.9467, "nll_loss": 0.884981632232666, "rewards/accuracies": 0.75, "rewards/chosen": -0.07111964374780655, "rewards/margins": 0.11718831956386566, "rewards/rejected": -0.1883079707622528, "step": 2060 }, { "epoch": 1.2572822937318895, "grad_norm": 1.146623134613037, "learning_rate": 6.010042865890998e-06, "log_odds_chosen": 0.6368117928504944, "log_odds_ratio": -0.5332843661308289, "logits/chosen": -0.7668173909187317, "logits/rejected": -0.8596578240394592, "logps/chosen": -0.835933268070221, "logps/rejected": -1.2141934633255005, "loss": 1.0878, "nll_loss": 0.9332133531570435, "rewards/accuracies": 0.625, "rewards/chosen": -0.08359332382678986, "rewards/margins": 0.03782601282000542, "rewards/rejected": -0.12141934037208557, "step": 2061 }, { "epoch": 1.2578923288089066, "grad_norm": 1.2389243841171265, "learning_rate": 6.009063074096754e-06, "log_odds_chosen": 0.8987215757369995, "log_odds_ratio": -0.4684637784957886, "logits/chosen": -0.9934291243553162, "logits/rejected": -0.9300845861434937, "logps/chosen": -0.7385488152503967, "logps/rejected": -1.1462445259094238, "loss": 1.0863, "nll_loss": 1.35785710811615, "rewards/accuracies": 0.875, "rewards/chosen": -0.07385487854480743, "rewards/margins": 0.04076957702636719, "rewards/rejected": -0.11462445557117462, "step": 2062 }, { "epoch": 1.2585023638859234, "grad_norm": 1.822518229484558, "learning_rate": 6.0080832823025104e-06, "log_odds_chosen": 1.0922404527664185, "log_odds_ratio": -0.4560846984386444, "logits/chosen": -0.826594889163971, "logits/rejected": -0.6458832025527954, "logps/chosen": -0.9326007962226868, "logps/rejected": -1.841054916381836, "loss": 1.2019, "nll_loss": 1.0615679025650024, "rewards/accuracies": 0.75, "rewards/chosen": -0.09326008707284927, "rewards/margins": 0.09084540605545044, "rewards/rejected": -0.18410548567771912, "step": 2063 }, { "epoch": 1.2591123989629405, "grad_norm": 1.942295789718628, "learning_rate": 6.0071034905082666e-06, "log_odds_chosen": 0.18460619449615479, "log_odds_ratio": -0.795590341091156, "logits/chosen": -0.780657172203064, "logits/rejected": -0.6855940818786621, "logps/chosen": -0.9287386536598206, "logps/rejected": -0.9962255954742432, "loss": 0.9368, "nll_loss": 1.0198315382003784, "rewards/accuracies": 0.5, "rewards/chosen": -0.09287386387586594, "rewards/margins": 0.00674869678914547, "rewards/rejected": -0.09962256252765656, "step": 2064 }, { "epoch": 1.2597224340399573, "grad_norm": 3.3168888092041016, "learning_rate": 6.006123698714023e-06, "log_odds_chosen": 1.393623948097229, "log_odds_ratio": -0.3481822907924652, "logits/chosen": -0.7466915845870972, "logits/rejected": -0.8694908618927002, "logps/chosen": -0.741141676902771, "logps/rejected": -1.6154471635818481, "loss": 1.1189, "nll_loss": 0.9941285848617554, "rewards/accuracies": 0.875, "rewards/chosen": -0.07411417365074158, "rewards/margins": 0.08743054419755936, "rewards/rejected": -0.16154472529888153, "step": 2065 }, { "epoch": 1.2603324691169742, "grad_norm": 1.469880223274231, "learning_rate": 6.00514390691978e-06, "log_odds_chosen": 0.8145917654037476, "log_odds_ratio": -0.5944385528564453, "logits/chosen": -0.6555103659629822, "logits/rejected": -0.5401102900505066, "logps/chosen": -0.8054157495498657, "logps/rejected": -1.4909111261367798, "loss": 1.0844, "nll_loss": 1.0451757907867432, "rewards/accuracies": 0.5, "rewards/chosen": -0.08054157346487045, "rewards/margins": 0.06854952871799469, "rewards/rejected": -0.14909110963344574, "step": 2066 }, { "epoch": 1.2609425041939912, "grad_norm": 2.9991648197174072, "learning_rate": 6.004164115125536e-06, "log_odds_chosen": 0.9553672671318054, "log_odds_ratio": -0.7587400674819946, "logits/chosen": -0.9587233662605286, "logits/rejected": -0.9661192893981934, "logps/chosen": -0.7103208303451538, "logps/rejected": -1.5791860818862915, "loss": 0.9249, "nll_loss": 0.8687561750411987, "rewards/accuracies": 0.5, "rewards/chosen": -0.0710320845246315, "rewards/margins": 0.08688652515411377, "rewards/rejected": -0.15791860222816467, "step": 2067 }, { "epoch": 1.261552539271008, "grad_norm": 5.950984477996826, "learning_rate": 6.003184323331292e-06, "log_odds_chosen": 0.14650419354438782, "log_odds_ratio": -0.7008358836174011, "logits/chosen": -0.8640573024749756, "logits/rejected": -0.974051833152771, "logps/chosen": -1.1598901748657227, "logps/rejected": -1.2942287921905518, "loss": 1.2363, "nll_loss": 1.2155065536499023, "rewards/accuracies": 0.625, "rewards/chosen": -0.11598902195692062, "rewards/margins": 0.013433857820928097, "rewards/rejected": -0.1294228732585907, "step": 2068 }, { "epoch": 1.2621625743480251, "grad_norm": 1.7491220235824585, "learning_rate": 6.002204531537048e-06, "log_odds_chosen": 0.5102424025535583, "log_odds_ratio": -0.601906955242157, "logits/chosen": -0.9429954886436462, "logits/rejected": -0.9501675367355347, "logps/chosen": -0.7714251279830933, "logps/rejected": -1.0364515781402588, "loss": 1.155, "nll_loss": 1.0195139646530151, "rewards/accuracies": 0.625, "rewards/chosen": -0.07714250683784485, "rewards/margins": 0.02650264836847782, "rewards/rejected": -0.10364516079425812, "step": 2069 }, { "epoch": 1.262772609425042, "grad_norm": 2.0893940925598145, "learning_rate": 6.001224739742804e-06, "log_odds_chosen": 0.6889505982398987, "log_odds_ratio": -0.6201357245445251, "logits/chosen": -1.0236506462097168, "logits/rejected": -0.9848555326461792, "logps/chosen": -1.1461570262908936, "logps/rejected": -1.7822096347808838, "loss": 1.2462, "nll_loss": 1.3952558040618896, "rewards/accuracies": 0.625, "rewards/chosen": -0.11461570858955383, "rewards/margins": 0.06360524892807007, "rewards/rejected": -0.1782209724187851, "step": 2070 }, { "epoch": 1.2633826445020588, "grad_norm": 2.423114061355591, "learning_rate": 6.00024494794856e-06, "log_odds_chosen": 2.215947151184082, "log_odds_ratio": -0.3945186734199524, "logits/chosen": -0.9236855506896973, "logits/rejected": -1.0314874649047852, "logps/chosen": -0.7529359459877014, "logps/rejected": -2.530550003051758, "loss": 1.1654, "nll_loss": 1.031747817993164, "rewards/accuracies": 0.875, "rewards/chosen": -0.07529360055923462, "rewards/margins": 0.17776142060756683, "rewards/rejected": -0.25305500626564026, "step": 2071 }, { "epoch": 1.2639926795790757, "grad_norm": 3.1172828674316406, "learning_rate": 5.999265156154317e-06, "log_odds_chosen": 1.4398598670959473, "log_odds_ratio": -0.3261106610298157, "logits/chosen": -1.002031683921814, "logits/rejected": -1.1386500597000122, "logps/chosen": -0.8117234706878662, "logps/rejected": -1.8597086668014526, "loss": 1.0536, "nll_loss": 1.0429494380950928, "rewards/accuracies": 0.875, "rewards/chosen": -0.08117234706878662, "rewards/margins": 0.10479853302240372, "rewards/rejected": -0.18597087264060974, "step": 2072 }, { "epoch": 1.2646027146560928, "grad_norm": 2.9351799488067627, "learning_rate": 5.998285364360073e-06, "log_odds_chosen": 0.43515828251838684, "log_odds_ratio": -0.7993111610412598, "logits/chosen": -0.7764827013015747, "logits/rejected": -0.8384947180747986, "logps/chosen": -0.7872901558876038, "logps/rejected": -1.0640361309051514, "loss": 1.1515, "nll_loss": 0.9372248649597168, "rewards/accuracies": 0.5, "rewards/chosen": -0.07872901856899261, "rewards/margins": 0.027674593031406403, "rewards/rejected": -0.10640361905097961, "step": 2073 }, { "epoch": 1.2652127497331096, "grad_norm": 1.3951865434646606, "learning_rate": 5.99730557256583e-06, "log_odds_chosen": 1.0604875087738037, "log_odds_ratio": -0.5802308917045593, "logits/chosen": -0.839790940284729, "logits/rejected": -0.8494554758071899, "logps/chosen": -0.9245245456695557, "logps/rejected": -1.725572943687439, "loss": 1.2669, "nll_loss": 1.118462085723877, "rewards/accuracies": 0.75, "rewards/chosen": -0.09245245903730392, "rewards/margins": 0.08010484278202057, "rewards/rejected": -0.1725572943687439, "step": 2074 }, { "epoch": 1.2658227848101267, "grad_norm": 1.384269118309021, "learning_rate": 5.996325780771586e-06, "log_odds_chosen": 1.6186821460723877, "log_odds_ratio": -0.3626062273979187, "logits/chosen": -0.8293671607971191, "logits/rejected": -0.9427703619003296, "logps/chosen": -0.5660743713378906, "logps/rejected": -1.61758553981781, "loss": 0.9408, "nll_loss": 0.7345062494277954, "rewards/accuracies": 0.875, "rewards/chosen": -0.056607436388731, "rewards/margins": 0.10515112429857254, "rewards/rejected": -0.16175857186317444, "step": 2075 }, { "epoch": 1.2664328198871435, "grad_norm": 1.281851053237915, "learning_rate": 5.995345988977342e-06, "log_odds_chosen": 0.14060181379318237, "log_odds_ratio": -0.659014880657196, "logits/chosen": -1.0060251951217651, "logits/rejected": -0.9629154205322266, "logps/chosen": -0.9027858972549438, "logps/rejected": -1.0108611583709717, "loss": 1.1058, "nll_loss": 1.0222764015197754, "rewards/accuracies": 0.625, "rewards/chosen": -0.09027858078479767, "rewards/margins": 0.010807529091835022, "rewards/rejected": -0.10108611732721329, "step": 2076 }, { "epoch": 1.2670428549641604, "grad_norm": 1.3162789344787598, "learning_rate": 5.994366197183099e-06, "log_odds_chosen": 0.38524991273880005, "log_odds_ratio": -0.5773579478263855, "logits/chosen": -0.9276596903800964, "logits/rejected": -0.9139848947525024, "logps/chosen": -1.0156919956207275, "logps/rejected": -1.2976857423782349, "loss": 1.0957, "nll_loss": 1.2184637784957886, "rewards/accuracies": 0.625, "rewards/chosen": -0.10156920552253723, "rewards/margins": 0.028199370950460434, "rewards/rejected": -0.12976858019828796, "step": 2077 }, { "epoch": 1.2676528900411774, "grad_norm": 1.4185545444488525, "learning_rate": 5.993386405388855e-06, "log_odds_chosen": 1.8111941814422607, "log_odds_ratio": -0.5546393394470215, "logits/chosen": -0.8777459859848022, "logits/rejected": -0.8452116250991821, "logps/chosen": -0.8526684641838074, "logps/rejected": -2.423849582672119, "loss": 1.1583, "nll_loss": 1.0989723205566406, "rewards/accuracies": 0.625, "rewards/chosen": -0.0852668434381485, "rewards/margins": 0.15711811184883118, "rewards/rejected": -0.24238495528697968, "step": 2078 }, { "epoch": 1.2682629251181943, "grad_norm": 1.7295124530792236, "learning_rate": 5.992406613594611e-06, "log_odds_chosen": 2.0035572052001953, "log_odds_ratio": -0.2173355519771576, "logits/chosen": -0.7473598718643188, "logits/rejected": -0.8266887068748474, "logps/chosen": -0.8131016492843628, "logps/rejected": -2.2548105716705322, "loss": 1.1053, "nll_loss": 0.9908686876296997, "rewards/accuracies": 0.875, "rewards/chosen": -0.08131016790866852, "rewards/margins": 0.144170880317688, "rewards/rejected": -0.2254810333251953, "step": 2079 }, { "epoch": 1.2688729601952113, "grad_norm": 1.1924011707305908, "learning_rate": 5.991426821800367e-06, "log_odds_chosen": 0.548260509967804, "log_odds_ratio": -0.6349282264709473, "logits/chosen": -1.031799554824829, "logits/rejected": -0.946582019329071, "logps/chosen": -1.0022131204605103, "logps/rejected": -1.449430227279663, "loss": 1.0416, "nll_loss": 1.1358237266540527, "rewards/accuracies": 0.625, "rewards/chosen": -0.10022132098674774, "rewards/margins": 0.04472169280052185, "rewards/rejected": -0.1449429988861084, "step": 2080 }, { "epoch": 1.2694829952722282, "grad_norm": 2.6018073558807373, "learning_rate": 5.990447030006123e-06, "log_odds_chosen": 1.0904252529144287, "log_odds_ratio": -0.38433295488357544, "logits/chosen": -1.0420892238616943, "logits/rejected": -0.9830237627029419, "logps/chosen": -0.9864157438278198, "logps/rejected": -1.8196909427642822, "loss": 1.2262, "nll_loss": 1.3203680515289307, "rewards/accuracies": 0.625, "rewards/chosen": -0.09864157438278198, "rewards/margins": 0.0833275318145752, "rewards/rejected": -0.18196913599967957, "step": 2081 }, { "epoch": 1.270093030349245, "grad_norm": 1.138053297996521, "learning_rate": 5.989467238211879e-06, "log_odds_chosen": 1.7879844903945923, "log_odds_ratio": -0.38536912202835083, "logits/chosen": -0.9118704795837402, "logits/rejected": -0.9490798711776733, "logps/chosen": -0.8407023549079895, "logps/rejected": -2.2914416790008545, "loss": 1.157, "nll_loss": 1.0998549461364746, "rewards/accuracies": 0.875, "rewards/chosen": -0.08407023549079895, "rewards/margins": 0.14507393538951874, "rewards/rejected": -0.22914418578147888, "step": 2082 }, { "epoch": 1.2707030654262619, "grad_norm": 3.4542131423950195, "learning_rate": 5.988487446417636e-06, "log_odds_chosen": 0.6578030586242676, "log_odds_ratio": -0.6063178777694702, "logits/chosen": -0.8297137022018433, "logits/rejected": -0.9301599264144897, "logps/chosen": -0.8971725106239319, "logps/rejected": -1.352824091911316, "loss": 1.2849, "nll_loss": 1.124497652053833, "rewards/accuracies": 0.5, "rewards/chosen": -0.08971726149320602, "rewards/margins": 0.0455651618540287, "rewards/rejected": -0.13528242707252502, "step": 2083 }, { "epoch": 1.271313100503279, "grad_norm": 2.1826281547546387, "learning_rate": 5.987507654623392e-06, "log_odds_chosen": 1.233465552330017, "log_odds_ratio": -0.456116646528244, "logits/chosen": -0.8398498296737671, "logits/rejected": -0.9716507196426392, "logps/chosen": -0.5596115589141846, "logps/rejected": -1.215827226638794, "loss": 1.0395, "nll_loss": 0.9827751517295837, "rewards/accuracies": 0.75, "rewards/chosen": -0.05596115440130234, "rewards/margins": 0.06562156975269318, "rewards/rejected": -0.12158271670341492, "step": 2084 }, { "epoch": 1.2719231355802958, "grad_norm": 3.682701826095581, "learning_rate": 5.9865278628291485e-06, "log_odds_chosen": 2.399792194366455, "log_odds_ratio": -0.33732712268829346, "logits/chosen": -0.7765485048294067, "logits/rejected": -0.9515568017959595, "logps/chosen": -0.8359071612358093, "logps/rejected": -2.6365857124328613, "loss": 1.1919, "nll_loss": 1.1224610805511475, "rewards/accuracies": 0.75, "rewards/chosen": -0.08359071612358093, "rewards/margins": 0.18006786704063416, "rewards/rejected": -0.2636585831642151, "step": 2085 }, { "epoch": 1.2725331706573129, "grad_norm": 2.2492847442626953, "learning_rate": 5.985548071034905e-06, "log_odds_chosen": 0.6780203580856323, "log_odds_ratio": -0.5536536574363708, "logits/chosen": -0.8453336954116821, "logits/rejected": -0.8395431041717529, "logps/chosen": -0.7849796414375305, "logps/rejected": -1.073188066482544, "loss": 1.1635, "nll_loss": 0.9903268218040466, "rewards/accuracies": 0.625, "rewards/chosen": -0.07849796861410141, "rewards/margins": 0.028820831328630447, "rewards/rejected": -0.10731880366802216, "step": 2086 }, { "epoch": 1.2731432057343297, "grad_norm": 1.7051913738250732, "learning_rate": 5.984568279240661e-06, "log_odds_chosen": 1.33612060546875, "log_odds_ratio": -0.42267319560050964, "logits/chosen": -0.911597728729248, "logits/rejected": -0.9782314300537109, "logps/chosen": -0.8679676651954651, "logps/rejected": -1.8918349742889404, "loss": 1.1234, "nll_loss": 0.9294177293777466, "rewards/accuracies": 0.875, "rewards/chosen": -0.08679676800966263, "rewards/margins": 0.10238674283027649, "rewards/rejected": -0.18918350338935852, "step": 2087 }, { "epoch": 1.2737532408113466, "grad_norm": 2.4943430423736572, "learning_rate": 5.983588487446417e-06, "log_odds_chosen": 1.6518354415893555, "log_odds_ratio": -0.2920061945915222, "logits/chosen": -0.8616944551467896, "logits/rejected": -0.9206452369689941, "logps/chosen": -1.0078222751617432, "logps/rejected": -2.328387498855591, "loss": 1.1287, "nll_loss": 1.2601542472839355, "rewards/accuracies": 0.875, "rewards/chosen": -0.10078223049640656, "rewards/margins": 0.13205653429031372, "rewards/rejected": -0.23283876478672028, "step": 2088 }, { "epoch": 1.2743632758883636, "grad_norm": 1.505120873451233, "learning_rate": 5.982608695652174e-06, "log_odds_chosen": 2.193418264389038, "log_odds_ratio": -0.27041319012641907, "logits/chosen": -0.948406994342804, "logits/rejected": -0.8719758987426758, "logps/chosen": -0.9538190960884094, "logps/rejected": -2.7827084064483643, "loss": 1.2571, "nll_loss": 1.2529144287109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.09538191556930542, "rewards/margins": 0.1828889399766922, "rewards/rejected": -0.2782708406448364, "step": 2089 }, { "epoch": 1.2749733109653805, "grad_norm": 1.1806488037109375, "learning_rate": 5.98162890385793e-06, "log_odds_chosen": 1.2414679527282715, "log_odds_ratio": -0.4383309483528137, "logits/chosen": -0.7915858626365662, "logits/rejected": -0.7950471639633179, "logps/chosen": -0.703925609588623, "logps/rejected": -1.635453701019287, "loss": 0.8849, "nll_loss": 0.8167217969894409, "rewards/accuracies": 0.75, "rewards/chosen": -0.07039256393909454, "rewards/margins": 0.09315282106399536, "rewards/rejected": -0.1635453850030899, "step": 2090 }, { "epoch": 1.2755833460423975, "grad_norm": 1.1912096738815308, "learning_rate": 5.980649112063687e-06, "log_odds_chosen": 3.6134963035583496, "log_odds_ratio": -0.34993624687194824, "logits/chosen": -0.9309799671173096, "logits/rejected": -1.0609731674194336, "logps/chosen": -0.5659412741661072, "logps/rejected": -3.4996509552001953, "loss": 0.9078, "nll_loss": 0.8229589462280273, "rewards/accuracies": 0.75, "rewards/chosen": -0.0565941296517849, "rewards/margins": 0.2933709919452667, "rewards/rejected": -0.3499651253223419, "step": 2091 }, { "epoch": 1.2761933811194144, "grad_norm": 3.881821632385254, "learning_rate": 5.979669320269442e-06, "log_odds_chosen": 1.4235764741897583, "log_odds_ratio": -0.4091641902923584, "logits/chosen": -0.8330656886100769, "logits/rejected": -1.0247926712036133, "logps/chosen": -0.6785928010940552, "logps/rejected": -1.7119742631912231, "loss": 1.1874, "nll_loss": 1.0780060291290283, "rewards/accuracies": 0.875, "rewards/chosen": -0.06785927712917328, "rewards/margins": 0.10333815962076187, "rewards/rejected": -0.17119742929935455, "step": 2092 }, { "epoch": 1.2768034161964312, "grad_norm": 2.162045478820801, "learning_rate": 5.978689528475198e-06, "log_odds_chosen": 0.8049120903015137, "log_odds_ratio": -0.5781786441802979, "logits/chosen": -0.7731239795684814, "logits/rejected": -0.8000776767730713, "logps/chosen": -0.8411957025527954, "logps/rejected": -1.3750866651535034, "loss": 1.0275, "nll_loss": 1.0194309949874878, "rewards/accuracies": 0.75, "rewards/chosen": -0.08411957323551178, "rewards/margins": 0.05338909476995468, "rewards/rejected": -0.13750866055488586, "step": 2093 }, { "epoch": 1.2774134512734483, "grad_norm": 1.4592567682266235, "learning_rate": 5.977709736680955e-06, "log_odds_chosen": 1.757279396057129, "log_odds_ratio": -0.28196385502815247, "logits/chosen": -0.9320039749145508, "logits/rejected": -0.9209992289543152, "logps/chosen": -0.6078919172286987, "logps/rejected": -1.7944391965866089, "loss": 0.9985, "nll_loss": 0.8200271129608154, "rewards/accuracies": 0.875, "rewards/chosen": -0.060789190232753754, "rewards/margins": 0.11865472793579102, "rewards/rejected": -0.17944391071796417, "step": 2094 }, { "epoch": 1.2780234863504651, "grad_norm": 2.8460967540740967, "learning_rate": 5.9767299448867114e-06, "log_odds_chosen": 1.8249598741531372, "log_odds_ratio": -0.2526964843273163, "logits/chosen": -1.1027554273605347, "logits/rejected": -1.0488853454589844, "logps/chosen": -0.8530719876289368, "logps/rejected": -2.267021894454956, "loss": 1.0705, "nll_loss": 0.9777473211288452, "rewards/accuracies": 1.0, "rewards/chosen": -0.08530719578266144, "rewards/margins": 0.14139500260353088, "rewards/rejected": -0.22670218348503113, "step": 2095 }, { "epoch": 1.2786335214274822, "grad_norm": 2.199828624725342, "learning_rate": 5.9757501530924676e-06, "log_odds_chosen": 1.9383221864700317, "log_odds_ratio": -0.34839826822280884, "logits/chosen": -0.8183227777481079, "logits/rejected": -0.8275136351585388, "logps/chosen": -0.6032242774963379, "logps/rejected": -1.9078103303909302, "loss": 0.8741, "nll_loss": 0.8564397096633911, "rewards/accuracies": 0.75, "rewards/chosen": -0.06032242998480797, "rewards/margins": 0.13045862317085266, "rewards/rejected": -0.19078105688095093, "step": 2096 }, { "epoch": 1.279243556504499, "grad_norm": 2.5063107013702393, "learning_rate": 5.974770361298224e-06, "log_odds_chosen": 1.0705920457839966, "log_odds_ratio": -0.5224593281745911, "logits/chosen": -0.7213620543479919, "logits/rejected": -0.791671097278595, "logps/chosen": -0.7330665588378906, "logps/rejected": -1.565130591392517, "loss": 0.998, "nll_loss": 0.9009990096092224, "rewards/accuracies": 0.875, "rewards/chosen": -0.07330664992332458, "rewards/margins": 0.083206407725811, "rewards/rejected": -0.1565130650997162, "step": 2097 }, { "epoch": 1.279853591581516, "grad_norm": 2.286142587661743, "learning_rate": 5.97379056950398e-06, "log_odds_chosen": 1.5597329139709473, "log_odds_ratio": -0.31347566843032837, "logits/chosen": -0.9635800123214722, "logits/rejected": -1.0862858295440674, "logps/chosen": -0.675096333026886, "logps/rejected": -1.792607069015503, "loss": 1.2136, "nll_loss": 1.1314818859100342, "rewards/accuracies": 1.0, "rewards/chosen": -0.06750963628292084, "rewards/margins": 0.11175107955932617, "rewards/rejected": -0.179260715842247, "step": 2098 }, { "epoch": 1.2804636266585327, "grad_norm": 2.167843818664551, "learning_rate": 5.972810777709736e-06, "log_odds_chosen": 0.8622989654541016, "log_odds_ratio": -0.5499354004859924, "logits/chosen": -0.7533364295959473, "logits/rejected": -0.8192092776298523, "logps/chosen": -0.7373591661453247, "logps/rejected": -1.448859691619873, "loss": 0.9782, "nll_loss": 0.8596910834312439, "rewards/accuracies": 0.75, "rewards/chosen": -0.07373592257499695, "rewards/margins": 0.0711500495672226, "rewards/rejected": -0.14488598704338074, "step": 2099 }, { "epoch": 1.2810736617355498, "grad_norm": 3.4837827682495117, "learning_rate": 5.971830985915493e-06, "log_odds_chosen": 1.7967908382415771, "log_odds_ratio": -0.36428624391555786, "logits/chosen": -0.8254123330116272, "logits/rejected": -0.8102679252624512, "logps/chosen": -0.7499963045120239, "logps/rejected": -1.9947881698608398, "loss": 1.1741, "nll_loss": 1.1200604438781738, "rewards/accuracies": 0.75, "rewards/chosen": -0.07499963045120239, "rewards/margins": 0.12447918206453323, "rewards/rejected": -0.19947880506515503, "step": 2100 }, { "epoch": 1.2816836968125667, "grad_norm": 1.1366925239562988, "learning_rate": 5.970851194121249e-06, "log_odds_chosen": 0.7096221446990967, "log_odds_ratio": -0.5182823538780212, "logits/chosen": -0.930925190448761, "logits/rejected": -1.0530152320861816, "logps/chosen": -0.7701876163482666, "logps/rejected": -1.2120957374572754, "loss": 1.0499, "nll_loss": 1.021201729774475, "rewards/accuracies": 0.625, "rewards/chosen": -0.07701876014471054, "rewards/margins": 0.04419080913066864, "rewards/rejected": -0.12120957672595978, "step": 2101 }, { "epoch": 1.2822937318895837, "grad_norm": 2.52902889251709, "learning_rate": 5.969871402327006e-06, "log_odds_chosen": 1.8753137588500977, "log_odds_ratio": -0.29482707381248474, "logits/chosen": -0.8015798330307007, "logits/rejected": -0.8744711875915527, "logps/chosen": -0.6434571146965027, "logps/rejected": -1.8391178846359253, "loss": 1.1015, "nll_loss": 0.9910299777984619, "rewards/accuracies": 1.0, "rewards/chosen": -0.06434571743011475, "rewards/margins": 0.11956607550382614, "rewards/rejected": -0.1839117854833603, "step": 2102 }, { "epoch": 1.2829037669666006, "grad_norm": 2.0856826305389404, "learning_rate": 5.968891610532761e-06, "log_odds_chosen": 0.28045403957366943, "log_odds_ratio": -0.645092248916626, "logits/chosen": -1.1446752548217773, "logits/rejected": -1.0318162441253662, "logps/chosen": -1.033076286315918, "logps/rejected": -1.2605682611465454, "loss": 1.2122, "nll_loss": 1.2778067588806152, "rewards/accuracies": 0.5, "rewards/chosen": -0.10330763459205627, "rewards/margins": 0.022749193012714386, "rewards/rejected": -0.12605682015419006, "step": 2103 }, { "epoch": 1.2835138020436174, "grad_norm": 2.025655746459961, "learning_rate": 5.967911818738517e-06, "log_odds_chosen": 1.749812364578247, "log_odds_ratio": -0.3758692145347595, "logits/chosen": -0.861119270324707, "logits/rejected": -0.833025336265564, "logps/chosen": -0.620735228061676, "logps/rejected": -1.8129676580429077, "loss": 1.1415, "nll_loss": 1.0459097623825073, "rewards/accuracies": 0.75, "rewards/chosen": -0.06207352876663208, "rewards/margins": 0.11922325193881989, "rewards/rejected": -0.18129676580429077, "step": 2104 }, { "epoch": 1.2841238371206345, "grad_norm": 1.3570047616958618, "learning_rate": 5.966932026944274e-06, "log_odds_chosen": 1.305345058441162, "log_odds_ratio": -0.3471776247024536, "logits/chosen": -0.9077119827270508, "logits/rejected": -0.9910491704940796, "logps/chosen": -0.7859475612640381, "logps/rejected": -1.6871907711029053, "loss": 0.8904, "nll_loss": 1.0433248281478882, "rewards/accuracies": 0.75, "rewards/chosen": -0.07859475910663605, "rewards/margins": 0.09012432396411896, "rewards/rejected": -0.168719083070755, "step": 2105 }, { "epoch": 1.2847338721976513, "grad_norm": 1.598168969154358, "learning_rate": 5.9659522351500305e-06, "log_odds_chosen": 0.6370433568954468, "log_odds_ratio": -0.5826823711395264, "logits/chosen": -1.1232268810272217, "logits/rejected": -1.030247688293457, "logps/chosen": -0.7584284543991089, "logps/rejected": -1.1454408168792725, "loss": 1.1438, "nll_loss": 1.1126445531845093, "rewards/accuracies": 0.625, "rewards/chosen": -0.07584284245967865, "rewards/margins": 0.03870124742388725, "rewards/rejected": -0.11454407870769501, "step": 2106 }, { "epoch": 1.2853439072746684, "grad_norm": 2.3607094287872314, "learning_rate": 5.964972443355787e-06, "log_odds_chosen": 0.41236138343811035, "log_odds_ratio": -0.6161808967590332, "logits/chosen": -0.7747939825057983, "logits/rejected": -0.8874839544296265, "logps/chosen": -0.8582727909088135, "logps/rejected": -1.1399849653244019, "loss": 0.9616, "nll_loss": 1.1015737056732178, "rewards/accuracies": 0.625, "rewards/chosen": -0.0858272835612297, "rewards/margins": 0.02817121148109436, "rewards/rejected": -0.11399849504232407, "step": 2107 }, { "epoch": 1.2859539423516853, "grad_norm": 2.1757442951202393, "learning_rate": 5.9639926515615436e-06, "log_odds_chosen": 0.6473197340965271, "log_odds_ratio": -0.5287650227546692, "logits/chosen": -1.0206501483917236, "logits/rejected": -0.9782370328903198, "logps/chosen": -0.7729459404945374, "logps/rejected": -1.1763900518417358, "loss": 1.1549, "nll_loss": 1.0663784742355347, "rewards/accuracies": 0.75, "rewards/chosen": -0.07729458808898926, "rewards/margins": 0.04034440964460373, "rewards/rejected": -0.11763900518417358, "step": 2108 }, { "epoch": 1.286563977428702, "grad_norm": 2.3757455348968506, "learning_rate": 5.963012859767299e-06, "log_odds_chosen": 0.6855674386024475, "log_odds_ratio": -0.6107479333877563, "logits/chosen": -0.9737637639045715, "logits/rejected": -1.007084846496582, "logps/chosen": -1.0052474737167358, "logps/rejected": -1.5354626178741455, "loss": 1.19, "nll_loss": 1.3010969161987305, "rewards/accuracies": 0.625, "rewards/chosen": -0.10052473843097687, "rewards/margins": 0.053021520376205444, "rewards/rejected": -0.1535462737083435, "step": 2109 }, { "epoch": 1.287174012505719, "grad_norm": 2.3269312381744385, "learning_rate": 5.962033067973055e-06, "log_odds_chosen": 1.4956622123718262, "log_odds_ratio": -0.4840918779373169, "logits/chosen": -0.8424680829048157, "logits/rejected": -0.9349056482315063, "logps/chosen": -0.872702956199646, "logps/rejected": -1.964923620223999, "loss": 1.1247, "nll_loss": 1.2868976593017578, "rewards/accuracies": 0.75, "rewards/chosen": -0.08727028965950012, "rewards/margins": 0.10922206938266754, "rewards/rejected": -0.19649237394332886, "step": 2110 }, { "epoch": 1.287784047582736, "grad_norm": 1.2398099899291992, "learning_rate": 5.961053276178812e-06, "log_odds_chosen": 0.8344631195068359, "log_odds_ratio": -0.48233500123023987, "logits/chosen": -1.0654667615890503, "logits/rejected": -1.1229100227355957, "logps/chosen": -0.8088322281837463, "logps/rejected": -1.3618886470794678, "loss": 1.0638, "nll_loss": 1.1149991750717163, "rewards/accuracies": 0.75, "rewards/chosen": -0.08088322728872299, "rewards/margins": 0.055305641144514084, "rewards/rejected": -0.13618886470794678, "step": 2111 }, { "epoch": 1.2883940826597529, "grad_norm": 1.0881704092025757, "learning_rate": 5.960073484384568e-06, "log_odds_chosen": 0.08173520117998123, "log_odds_ratio": -0.7259317636489868, "logits/chosen": -0.9246586561203003, "logits/rejected": -0.913550853729248, "logps/chosen": -0.9532552361488342, "logps/rejected": -0.9375674724578857, "loss": 1.1444, "nll_loss": 1.231886386871338, "rewards/accuracies": 0.5, "rewards/chosen": -0.0953255295753479, "rewards/margins": -0.0015687746927142143, "rewards/rejected": -0.09375675022602081, "step": 2112 }, { "epoch": 1.28900411773677, "grad_norm": 1.235650897026062, "learning_rate": 5.959093692590324e-06, "log_odds_chosen": 1.4548063278198242, "log_odds_ratio": -0.4731515944004059, "logits/chosen": -0.7910346984863281, "logits/rejected": -1.0190476179122925, "logps/chosen": -0.7247359156608582, "logps/rejected": -1.8014546632766724, "loss": 1.0573, "nll_loss": 0.9465491771697998, "rewards/accuracies": 0.625, "rewards/chosen": -0.07247359305620193, "rewards/margins": 0.10767187178134918, "rewards/rejected": -0.1801454722881317, "step": 2113 }, { "epoch": 1.2896141528137868, "grad_norm": 2.1302878856658936, "learning_rate": 5.958113900796081e-06, "log_odds_chosen": 1.5219205617904663, "log_odds_ratio": -0.3213742673397064, "logits/chosen": -0.5296574234962463, "logits/rejected": -0.7583574056625366, "logps/chosen": -0.6522428393363953, "logps/rejected": -1.6426782608032227, "loss": 1.0477, "nll_loss": 0.8105820417404175, "rewards/accuracies": 0.875, "rewards/chosen": -0.065224289894104, "rewards/margins": 0.09904354065656662, "rewards/rejected": -0.16426782310009003, "step": 2114 }, { "epoch": 1.2902241878908036, "grad_norm": 1.6560307741165161, "learning_rate": 5.9571341090018364e-06, "log_odds_chosen": 0.6974208950996399, "log_odds_ratio": -0.7506178617477417, "logits/chosen": -1.0718672275543213, "logits/rejected": -0.9704629182815552, "logps/chosen": -1.0241062641143799, "logps/rejected": -1.6959552764892578, "loss": 1.108, "nll_loss": 1.236401915550232, "rewards/accuracies": 0.5, "rewards/chosen": -0.10241062939167023, "rewards/margins": 0.06718490272760391, "rewards/rejected": -0.16959552466869354, "step": 2115 }, { "epoch": 1.2908342229678207, "grad_norm": 1.3631139993667603, "learning_rate": 5.956154317207593e-06, "log_odds_chosen": 0.4798629879951477, "log_odds_ratio": -0.6239835619926453, "logits/chosen": -1.041783094406128, "logits/rejected": -0.8233596086502075, "logps/chosen": -0.9363858103752136, "logps/rejected": -1.3627500534057617, "loss": 0.9816, "nll_loss": 0.9747976064682007, "rewards/accuracies": 0.5, "rewards/chosen": -0.093638576567173, "rewards/margins": 0.04263642802834511, "rewards/rejected": -0.1362750083208084, "step": 2116 }, { "epoch": 1.2914442580448375, "grad_norm": 1.2486579418182373, "learning_rate": 5.9551745254133495e-06, "log_odds_chosen": 0.41047412157058716, "log_odds_ratio": -0.7052605152130127, "logits/chosen": -0.9493283629417419, "logits/rejected": -0.9694016575813293, "logps/chosen": -0.9633997082710266, "logps/rejected": -1.3109486103057861, "loss": 1.0191, "nll_loss": 1.1137638092041016, "rewards/accuracies": 0.375, "rewards/chosen": -0.09633997082710266, "rewards/margins": 0.03475489839911461, "rewards/rejected": -0.13109487295150757, "step": 2117 }, { "epoch": 1.2920542931218546, "grad_norm": 2.730119466781616, "learning_rate": 5.954194733619106e-06, "log_odds_chosen": 0.06448429077863693, "log_odds_ratio": -0.7072682976722717, "logits/chosen": -1.0312165021896362, "logits/rejected": -1.0671627521514893, "logps/chosen": -0.8905166387557983, "logps/rejected": -0.9438337087631226, "loss": 1.1808, "nll_loss": 1.0242044925689697, "rewards/accuracies": 0.625, "rewards/chosen": -0.08905166387557983, "rewards/margins": 0.005331710446625948, "rewards/rejected": -0.0943833738565445, "step": 2118 }, { "epoch": 1.2926643281988714, "grad_norm": 1.182651400566101, "learning_rate": 5.953214941824863e-06, "log_odds_chosen": 0.38033831119537354, "log_odds_ratio": -0.688475489616394, "logits/chosen": -0.973094642162323, "logits/rejected": -0.933610200881958, "logps/chosen": -0.7726792097091675, "logps/rejected": -1.0318821668624878, "loss": 1.1159, "nll_loss": 1.1249845027923584, "rewards/accuracies": 0.625, "rewards/chosen": -0.07726791501045227, "rewards/margins": 0.02592030167579651, "rewards/rejected": -0.10318821668624878, "step": 2119 }, { "epoch": 1.2932743632758883, "grad_norm": 0.9967605471611023, "learning_rate": 5.952235150030618e-06, "log_odds_chosen": 1.1669039726257324, "log_odds_ratio": -0.48149386048316956, "logits/chosen": -1.0315965414047241, "logits/rejected": -1.171295404434204, "logps/chosen": -0.7535057067871094, "logps/rejected": -1.6395751237869263, "loss": 0.9998, "nll_loss": 0.9715520143508911, "rewards/accuracies": 0.75, "rewards/chosen": -0.0753505751490593, "rewards/margins": 0.08860693871974945, "rewards/rejected": -0.16395750641822815, "step": 2120 }, { "epoch": 1.2938843983529054, "grad_norm": 1.7403347492218018, "learning_rate": 5.951255358236374e-06, "log_odds_chosen": 1.0324366092681885, "log_odds_ratio": -0.5439324378967285, "logits/chosen": -0.8627357482910156, "logits/rejected": -0.9663164615631104, "logps/chosen": -1.0947489738464355, "logps/rejected": -1.8299373388290405, "loss": 1.2231, "nll_loss": 1.174721121788025, "rewards/accuracies": 0.5, "rewards/chosen": -0.10947489738464355, "rewards/margins": 0.07351884245872498, "rewards/rejected": -0.18299373984336853, "step": 2121 }, { "epoch": 1.2944944334299222, "grad_norm": 2.3146328926086426, "learning_rate": 5.950275566442131e-06, "log_odds_chosen": 1.646965742111206, "log_odds_ratio": -0.39587002992630005, "logits/chosen": -0.8878288269042969, "logits/rejected": -0.8217625617980957, "logps/chosen": -0.7262383103370667, "logps/rejected": -1.917722463607788, "loss": 1.162, "nll_loss": 0.8468114733695984, "rewards/accuracies": 0.625, "rewards/chosen": -0.0726238340139389, "rewards/margins": 0.11914840340614319, "rewards/rejected": -0.1917722523212433, "step": 2122 }, { "epoch": 1.2951044685069393, "grad_norm": 1.2082645893096924, "learning_rate": 5.949295774647887e-06, "log_odds_chosen": 2.7137534618377686, "log_odds_ratio": -0.19319933652877808, "logits/chosen": -1.0083129405975342, "logits/rejected": -0.9753155708312988, "logps/chosen": -0.8504301309585571, "logps/rejected": -2.9995384216308594, "loss": 0.9438, "nll_loss": 1.0327743291854858, "rewards/accuracies": 1.0, "rewards/chosen": -0.08504302054643631, "rewards/margins": 0.2149108350276947, "rewards/rejected": -0.2999538779258728, "step": 2123 }, { "epoch": 1.2957145035839561, "grad_norm": 5.696073055267334, "learning_rate": 5.948315982853643e-06, "log_odds_chosen": 0.8377284407615662, "log_odds_ratio": -0.5709266662597656, "logits/chosen": -0.9222772121429443, "logits/rejected": -0.8801543712615967, "logps/chosen": -0.9427767395973206, "logps/rejected": -1.387850046157837, "loss": 1.1488, "nll_loss": 1.1620604991912842, "rewards/accuracies": 0.625, "rewards/chosen": -0.09427767992019653, "rewards/margins": 0.044507332146167755, "rewards/rejected": -0.1387850046157837, "step": 2124 }, { "epoch": 1.296324538660973, "grad_norm": 1.3218092918395996, "learning_rate": 5.9473361910594e-06, "log_odds_chosen": 0.21504652500152588, "log_odds_ratio": -0.7689711451530457, "logits/chosen": -0.9996336698532104, "logits/rejected": -0.9106845259666443, "logps/chosen": -1.1955798864364624, "logps/rejected": -1.3542252779006958, "loss": 1.1497, "nll_loss": 1.3881011009216309, "rewards/accuracies": 0.375, "rewards/chosen": -0.11955797672271729, "rewards/margins": 0.015864545479416847, "rewards/rejected": -0.13542252779006958, "step": 2125 }, { "epoch": 1.2969345737379898, "grad_norm": 2.570345163345337, "learning_rate": 5.9463563992651555e-06, "log_odds_chosen": 1.2226347923278809, "log_odds_ratio": -0.4597695767879486, "logits/chosen": -0.955797553062439, "logits/rejected": -1.111461877822876, "logps/chosen": -0.6404402256011963, "logps/rejected": -1.4561131000518799, "loss": 0.9026, "nll_loss": 0.9804287552833557, "rewards/accuracies": 0.5, "rewards/chosen": -0.0640440285205841, "rewards/margins": 0.08156728744506836, "rewards/rejected": -0.14561131596565247, "step": 2126 }, { "epoch": 1.2975446088150069, "grad_norm": 1.5331473350524902, "learning_rate": 5.945376607470912e-06, "log_odds_chosen": 0.9464298486709595, "log_odds_ratio": -0.5831531882286072, "logits/chosen": -0.9881943464279175, "logits/rejected": -0.9470137357711792, "logps/chosen": -0.9532034397125244, "logps/rejected": -1.5606740713119507, "loss": 1.2456, "nll_loss": 1.1546010971069336, "rewards/accuracies": 0.625, "rewards/chosen": -0.09532035887241364, "rewards/margins": 0.06074705719947815, "rewards/rejected": -0.15606743097305298, "step": 2127 }, { "epoch": 1.2981546438920237, "grad_norm": 2.022319793701172, "learning_rate": 5.9443968156766686e-06, "log_odds_chosen": 1.6474946737289429, "log_odds_ratio": -0.2554081380367279, "logits/chosen": -0.7531256675720215, "logits/rejected": -0.8447847366333008, "logps/chosen": -0.7357672452926636, "logps/rejected": -1.8888791799545288, "loss": 1.126, "nll_loss": 0.8748989701271057, "rewards/accuracies": 1.0, "rewards/chosen": -0.07357672601938248, "rewards/margins": 0.11531119048595428, "rewards/rejected": -0.18888792395591736, "step": 2128 }, { "epoch": 1.2987646789690408, "grad_norm": 1.173346757888794, "learning_rate": 5.943417023882425e-06, "log_odds_chosen": 1.5394071340560913, "log_odds_ratio": -0.42465153336524963, "logits/chosen": -0.7113226652145386, "logits/rejected": -0.7875117063522339, "logps/chosen": -0.7644931077957153, "logps/rejected": -1.9130971431732178, "loss": 1.0619, "nll_loss": 0.9430275559425354, "rewards/accuracies": 0.75, "rewards/chosen": -0.07644931226968765, "rewards/margins": 0.1148604154586792, "rewards/rejected": -0.19130972027778625, "step": 2129 }, { "epoch": 1.2993747140460576, "grad_norm": 1.2687321901321411, "learning_rate": 5.942437232088182e-06, "log_odds_chosen": 1.4111673831939697, "log_odds_ratio": -0.7089614272117615, "logits/chosen": -1.0598645210266113, "logits/rejected": -1.007068395614624, "logps/chosen": -1.1159064769744873, "logps/rejected": -2.199272632598877, "loss": 1.2561, "nll_loss": 1.3669323921203613, "rewards/accuracies": 0.375, "rewards/chosen": -0.11159063875675201, "rewards/margins": 0.10833662003278732, "rewards/rejected": -0.21992725133895874, "step": 2130 }, { "epoch": 1.2999847491230745, "grad_norm": 1.8899943828582764, "learning_rate": 5.941457440293938e-06, "log_odds_chosen": 1.0894107818603516, "log_odds_ratio": -0.6187502145767212, "logits/chosen": -0.7958697080612183, "logits/rejected": -0.7940219640731812, "logps/chosen": -0.8969143629074097, "logps/rejected": -1.7079651355743408, "loss": 1.1744, "nll_loss": 1.0693305730819702, "rewards/accuracies": 0.5, "rewards/chosen": -0.08969143778085709, "rewards/margins": 0.08110508322715759, "rewards/rejected": -0.17079651355743408, "step": 2131 }, { "epoch": 1.3005947842000916, "grad_norm": 3.052769660949707, "learning_rate": 5.940477648499693e-06, "log_odds_chosen": 2.532430648803711, "log_odds_ratio": -0.34266215562820435, "logits/chosen": -0.6644211411476135, "logits/rejected": -0.8298815488815308, "logps/chosen": -0.5568380355834961, "logps/rejected": -2.558443307876587, "loss": 0.9518, "nll_loss": 0.9066282510757446, "rewards/accuracies": 0.75, "rewards/chosen": -0.05568381026387215, "rewards/margins": 0.20016053318977356, "rewards/rejected": -0.2558443248271942, "step": 2132 }, { "epoch": 1.3012048192771084, "grad_norm": 3.4560492038726807, "learning_rate": 5.93949785670545e-06, "log_odds_chosen": 2.1238112449645996, "log_odds_ratio": -0.2221895456314087, "logits/chosen": -0.6733236908912659, "logits/rejected": -0.8004091382026672, "logps/chosen": -0.5412494540214539, "logps/rejected": -1.9547821283340454, "loss": 0.9289, "nll_loss": 0.7646684050559998, "rewards/accuracies": 1.0, "rewards/chosen": -0.054124943912029266, "rewards/margins": 0.1413532793521881, "rewards/rejected": -0.19547820091247559, "step": 2133 }, { "epoch": 1.3018148543541255, "grad_norm": 2.422391176223755, "learning_rate": 5.938518064911206e-06, "log_odds_chosen": 2.0288329124450684, "log_odds_ratio": -0.2948196232318878, "logits/chosen": -0.9864996671676636, "logits/rejected": -0.9382100701332092, "logps/chosen": -0.755202054977417, "logps/rejected": -2.3138973712921143, "loss": 1.1312, "nll_loss": 0.9528970718383789, "rewards/accuracies": 0.875, "rewards/chosen": -0.07552020251750946, "rewards/margins": 0.15586955845355988, "rewards/rejected": -0.23138973116874695, "step": 2134 }, { "epoch": 1.3024248894311423, "grad_norm": 4.506279468536377, "learning_rate": 5.937538273116962e-06, "log_odds_chosen": 0.28295227885246277, "log_odds_ratio": -0.6773762702941895, "logits/chosen": -0.5374891757965088, "logits/rejected": -0.7348595261573792, "logps/chosen": -1.087662935256958, "logps/rejected": -1.3364605903625488, "loss": 1.1656, "nll_loss": 1.0588130950927734, "rewards/accuracies": 0.625, "rewards/chosen": -0.10876628756523132, "rewards/margins": 0.024879761040210724, "rewards/rejected": -0.13364604115486145, "step": 2135 }, { "epoch": 1.3030349245081592, "grad_norm": 1.3700674772262573, "learning_rate": 5.936558481322719e-06, "log_odds_chosen": 2.5097708702087402, "log_odds_ratio": -0.25752031803131104, "logits/chosen": -0.6516819596290588, "logits/rejected": -0.45712757110595703, "logps/chosen": -0.6205065250396729, "logps/rejected": -2.304579734802246, "loss": 1.0132, "nll_loss": 0.8180834650993347, "rewards/accuracies": 0.875, "rewards/chosen": -0.062050655484199524, "rewards/margins": 0.16840730607509613, "rewards/rejected": -0.23045796155929565, "step": 2136 }, { "epoch": 1.303644959585176, "grad_norm": 1.0175244808197021, "learning_rate": 5.9355786895284745e-06, "log_odds_chosen": 0.43869805335998535, "log_odds_ratio": -0.7858507037162781, "logits/chosen": -1.084369421005249, "logits/rejected": -1.0524015426635742, "logps/chosen": -0.9615123867988586, "logps/rejected": -1.2512998580932617, "loss": 1.0731, "nll_loss": 1.1400662660598755, "rewards/accuracies": 0.25, "rewards/chosen": -0.09615124762058258, "rewards/margins": 0.028978753834962845, "rewards/rejected": -0.12512999773025513, "step": 2137 }, { "epoch": 1.304254994662193, "grad_norm": 1.0526399612426758, "learning_rate": 5.934598897734231e-06, "log_odds_chosen": 1.6089756488800049, "log_odds_ratio": -0.43874070048332214, "logits/chosen": -0.8291196823120117, "logits/rejected": -0.8550204634666443, "logps/chosen": -0.8689058423042297, "logps/rejected": -2.0329248905181885, "loss": 1.012, "nll_loss": 0.9418435096740723, "rewards/accuracies": 0.625, "rewards/chosen": -0.08689058572053909, "rewards/margins": 0.11640191078186035, "rewards/rejected": -0.20329248905181885, "step": 2138 }, { "epoch": 1.30486502973921, "grad_norm": 11.774141311645508, "learning_rate": 5.933619105939988e-06, "log_odds_chosen": 0.7115578651428223, "log_odds_ratio": -0.6025295257568359, "logits/chosen": -0.8555532097816467, "logits/rejected": -0.9307010173797607, "logps/chosen": -0.8796902894973755, "logps/rejected": -1.4724712371826172, "loss": 1.0247, "nll_loss": 1.014630913734436, "rewards/accuracies": 0.75, "rewards/chosen": -0.08796902000904083, "rewards/margins": 0.059278085827827454, "rewards/rejected": -0.14724712073802948, "step": 2139 }, { "epoch": 1.305475064816227, "grad_norm": 5.271119594573975, "learning_rate": 5.932639314145744e-06, "log_odds_chosen": 1.717612862586975, "log_odds_ratio": -0.5136016011238098, "logits/chosen": -0.7458939552307129, "logits/rejected": -0.8564097285270691, "logps/chosen": -0.6740074157714844, "logps/rejected": -2.0968337059020996, "loss": 1.1312, "nll_loss": 1.0896981954574585, "rewards/accuracies": 0.625, "rewards/chosen": -0.0674007385969162, "rewards/margins": 0.142282634973526, "rewards/rejected": -0.2096833735704422, "step": 2140 }, { "epoch": 1.3060850998932438, "grad_norm": 1.3595714569091797, "learning_rate": 5.9316595223515e-06, "log_odds_chosen": 0.9022659659385681, "log_odds_ratio": -0.6249160766601562, "logits/chosen": -0.912567675113678, "logits/rejected": -0.9159340858459473, "logps/chosen": -1.0184211730957031, "logps/rejected": -1.7863736152648926, "loss": 1.0793, "nll_loss": 1.0608733892440796, "rewards/accuracies": 0.625, "rewards/chosen": -0.10184212028980255, "rewards/margins": 0.07679525762796402, "rewards/rejected": -0.17863738536834717, "step": 2141 }, { "epoch": 1.3066951349702607, "grad_norm": 0.9978041052818298, "learning_rate": 5.930679730557257e-06, "log_odds_chosen": 1.700132131576538, "log_odds_ratio": -0.3343655467033386, "logits/chosen": -0.5794309377670288, "logits/rejected": -0.72115159034729, "logps/chosen": -0.6779916286468506, "logps/rejected": -1.8816790580749512, "loss": 1.1052, "nll_loss": 0.9998888969421387, "rewards/accuracies": 0.875, "rewards/chosen": -0.0677991658449173, "rewards/margins": 0.12036874145269394, "rewards/rejected": -0.18816792964935303, "step": 2142 }, { "epoch": 1.3073051700472778, "grad_norm": 3.1538074016571045, "learning_rate": 5.929699938763012e-06, "log_odds_chosen": 1.5653934478759766, "log_odds_ratio": -0.5941264629364014, "logits/chosen": -0.8308488726615906, "logits/rejected": -0.8357005715370178, "logps/chosen": -0.8221206665039062, "logps/rejected": -1.9775984287261963, "loss": 1.1557, "nll_loss": 1.3065102100372314, "rewards/accuracies": 0.625, "rewards/chosen": -0.08221206814050674, "rewards/margins": 0.115547776222229, "rewards/rejected": -0.19775986671447754, "step": 2143 }, { "epoch": 1.3079152051242946, "grad_norm": 2.907522678375244, "learning_rate": 5.928720146968769e-06, "log_odds_chosen": 0.6051357388496399, "log_odds_ratio": -0.5706139802932739, "logits/chosen": -0.9710012674331665, "logits/rejected": -0.9416658878326416, "logps/chosen": -0.7398017644882202, "logps/rejected": -1.0928573608398438, "loss": 1.0105, "nll_loss": 0.8539373278617859, "rewards/accuracies": 0.625, "rewards/chosen": -0.0739801749587059, "rewards/margins": 0.03530555218458176, "rewards/rejected": -0.10928573459386826, "step": 2144 }, { "epoch": 1.3085252402013117, "grad_norm": 1.435171365737915, "learning_rate": 5.927740355174525e-06, "log_odds_chosen": 1.708690881729126, "log_odds_ratio": -0.38191285729408264, "logits/chosen": -0.8086074590682983, "logits/rejected": -0.8493276834487915, "logps/chosen": -0.6571851372718811, "logps/rejected": -1.9394214153289795, "loss": 1.078, "nll_loss": 0.9252721667289734, "rewards/accuracies": 0.875, "rewards/chosen": -0.06571850925683975, "rewards/margins": 0.12822364270687103, "rewards/rejected": -0.19394215941429138, "step": 2145 }, { "epoch": 1.3091352752783285, "grad_norm": 2.9040791988372803, "learning_rate": 5.926760563380281e-06, "log_odds_chosen": 1.6281657218933105, "log_odds_ratio": -0.3258216083049774, "logits/chosen": -0.9097169637680054, "logits/rejected": -0.826760470867157, "logps/chosen": -0.7107625603675842, "logps/rejected": -1.7619608640670776, "loss": 1.1994, "nll_loss": 1.2146252393722534, "rewards/accuracies": 0.875, "rewards/chosen": -0.07107625901699066, "rewards/margins": 0.10511982440948486, "rewards/rejected": -0.17619609832763672, "step": 2146 }, { "epoch": 1.3097453103553454, "grad_norm": 2.782897710800171, "learning_rate": 5.925780771586038e-06, "log_odds_chosen": 1.3325453996658325, "log_odds_ratio": -0.4123319089412689, "logits/chosen": -0.9800021648406982, "logits/rejected": -1.1048524379730225, "logps/chosen": -0.8082702159881592, "logps/rejected": -1.666641116142273, "loss": 1.0776, "nll_loss": 1.1187148094177246, "rewards/accuracies": 0.75, "rewards/chosen": -0.0808270126581192, "rewards/margins": 0.08583709597587585, "rewards/rejected": -0.16666412353515625, "step": 2147 }, { "epoch": 1.3103553454323624, "grad_norm": 1.3514825105667114, "learning_rate": 5.924800979791794e-06, "log_odds_chosen": 0.43066057562828064, "log_odds_ratio": -0.5746071934700012, "logits/chosen": -0.8082869052886963, "logits/rejected": -0.9274190068244934, "logps/chosen": -0.8783783912658691, "logps/rejected": -1.0908420085906982, "loss": 1.0948, "nll_loss": 1.3045227527618408, "rewards/accuracies": 0.75, "rewards/chosen": -0.0878378376364708, "rewards/margins": 0.02124636620283127, "rewards/rejected": -0.10908419638872147, "step": 2148 }, { "epoch": 1.3109653805093793, "grad_norm": 1.630440592765808, "learning_rate": 5.92382118799755e-06, "log_odds_chosen": 1.69015371799469, "log_odds_ratio": -0.22206763923168182, "logits/chosen": -0.889275312423706, "logits/rejected": -0.671660304069519, "logps/chosen": -0.6696704626083374, "logps/rejected": -1.851110577583313, "loss": 1.146, "nll_loss": 1.1386903524398804, "rewards/accuracies": 1.0, "rewards/chosen": -0.06696704775094986, "rewards/margins": 0.11814401298761368, "rewards/rejected": -0.18511106073856354, "step": 2149 }, { "epoch": 1.3115754155863963, "grad_norm": 1.0883234739303589, "learning_rate": 5.922841396203307e-06, "log_odds_chosen": 0.09630760550498962, "log_odds_ratio": -0.8497442603111267, "logits/chosen": -1.0299084186553955, "logits/rejected": -0.8585292100906372, "logps/chosen": -1.186216115951538, "logps/rejected": -1.2872776985168457, "loss": 1.2251, "nll_loss": 1.2663862705230713, "rewards/accuracies": 0.25, "rewards/chosen": -0.11862161755561829, "rewards/margins": 0.01010615099221468, "rewards/rejected": -0.1287277787923813, "step": 2150 }, { "epoch": 1.3121854506634132, "grad_norm": 1.146383285522461, "learning_rate": 5.921861604409063e-06, "log_odds_chosen": 1.870405912399292, "log_odds_ratio": -0.3933902978897095, "logits/chosen": -0.7877167463302612, "logits/rejected": -0.8448804020881653, "logps/chosen": -0.6251991391181946, "logps/rejected": -2.0884931087493896, "loss": 1.0244, "nll_loss": 0.9609172940254211, "rewards/accuracies": 0.875, "rewards/chosen": -0.06251991540193558, "rewards/margins": 0.14632941782474518, "rewards/rejected": -0.20884931087493896, "step": 2151 }, { "epoch": 1.31279548574043, "grad_norm": 1.3821486234664917, "learning_rate": 5.920881812614819e-06, "log_odds_chosen": 1.4083694219589233, "log_odds_ratio": -0.38055405020713806, "logits/chosen": -0.7314466834068298, "logits/rejected": -0.8755241632461548, "logps/chosen": -0.6464147567749023, "logps/rejected": -1.6221308708190918, "loss": 0.9789, "nll_loss": 1.1329470872879028, "rewards/accuracies": 0.875, "rewards/chosen": -0.06464147567749023, "rewards/margins": 0.09757161885499954, "rewards/rejected": -0.16221310198307037, "step": 2152 }, { "epoch": 1.3134055208174469, "grad_norm": 1.9005295038223267, "learning_rate": 5.919902020820576e-06, "log_odds_chosen": 0.787128746509552, "log_odds_ratio": -0.6035783886909485, "logits/chosen": -0.9774180054664612, "logits/rejected": -0.968865156173706, "logps/chosen": -0.905447244644165, "logps/rejected": -1.5039360523223877, "loss": 1.0866, "nll_loss": 1.0897489786148071, "rewards/accuracies": 0.625, "rewards/chosen": -0.09054473042488098, "rewards/margins": 0.05984887108206749, "rewards/rejected": -0.15039360523223877, "step": 2153 }, { "epoch": 1.314015555894464, "grad_norm": 3.8323113918304443, "learning_rate": 5.918922229026331e-06, "log_odds_chosen": 1.5301035642623901, "log_odds_ratio": -0.38191694021224976, "logits/chosen": -0.8141208291053772, "logits/rejected": -0.6955301761627197, "logps/chosen": -0.6314278841018677, "logps/rejected": -1.720259189605713, "loss": 1.1167, "nll_loss": 0.8335922956466675, "rewards/accuracies": 1.0, "rewards/chosen": -0.063142791390419, "rewards/margins": 0.10888314247131348, "rewards/rejected": -0.17202593386173248, "step": 2154 }, { "epoch": 1.3146255909714808, "grad_norm": 0.9646884202957153, "learning_rate": 5.917942437232087e-06, "log_odds_chosen": 1.18336820602417, "log_odds_ratio": -0.4420488476753235, "logits/chosen": -0.7851940393447876, "logits/rejected": -0.7719038128852844, "logps/chosen": -0.6772198677062988, "logps/rejected": -1.4986839294433594, "loss": 0.9391, "nll_loss": 0.8062781691551208, "rewards/accuracies": 0.875, "rewards/chosen": -0.06772199273109436, "rewards/margins": 0.08214640617370605, "rewards/rejected": -0.14986839890480042, "step": 2155 }, { "epoch": 1.3152356260484979, "grad_norm": 1.6132712364196777, "learning_rate": 5.916962645437844e-06, "log_odds_chosen": 0.9051122069358826, "log_odds_ratio": -0.6538543701171875, "logits/chosen": -0.9770084619522095, "logits/rejected": -0.946773886680603, "logps/chosen": -0.9710173606872559, "logps/rejected": -1.7768346071243286, "loss": 1.0472, "nll_loss": 1.0684337615966797, "rewards/accuracies": 0.5, "rewards/chosen": -0.09710174053907394, "rewards/margins": 0.08058172464370728, "rewards/rejected": -0.17768345773220062, "step": 2156 }, { "epoch": 1.3158456611255147, "grad_norm": 2.771326780319214, "learning_rate": 5.9159828536436e-06, "log_odds_chosen": 0.9452359676361084, "log_odds_ratio": -0.6043030619621277, "logits/chosen": -1.0728580951690674, "logits/rejected": -1.0075156688690186, "logps/chosen": -1.0219230651855469, "logps/rejected": -1.6634050607681274, "loss": 1.1796, "nll_loss": 1.1619621515274048, "rewards/accuracies": 0.625, "rewards/chosen": -0.10219230502843857, "rewards/margins": 0.06414821743965149, "rewards/rejected": -0.16634051501750946, "step": 2157 }, { "epoch": 1.3164556962025316, "grad_norm": 5.12546443939209, "learning_rate": 5.915003061849357e-06, "log_odds_chosen": 1.8048269748687744, "log_odds_ratio": -0.3378034830093384, "logits/chosen": -0.708325982093811, "logits/rejected": -0.9477869272232056, "logps/chosen": -0.6349039077758789, "logps/rejected": -1.889514684677124, "loss": 0.9245, "nll_loss": 0.8708020448684692, "rewards/accuracies": 0.875, "rewards/chosen": -0.06349039077758789, "rewards/margins": 0.12546107172966003, "rewards/rejected": -0.18895146250724792, "step": 2158 }, { "epoch": 1.3170657312795486, "grad_norm": 1.0892974138259888, "learning_rate": 5.9140232700551134e-06, "log_odds_chosen": 0.264554500579834, "log_odds_ratio": -0.7285789251327515, "logits/chosen": -1.0775846242904663, "logits/rejected": -0.9638126492500305, "logps/chosen": -0.9551974534988403, "logps/rejected": -1.1899993419647217, "loss": 1.1159, "nll_loss": 1.0414330959320068, "rewards/accuracies": 0.375, "rewards/chosen": -0.09551975131034851, "rewards/margins": 0.023480188101530075, "rewards/rejected": -0.11899994313716888, "step": 2159 }, { "epoch": 1.3176757663565655, "grad_norm": 2.5343174934387207, "learning_rate": 5.913043478260869e-06, "log_odds_chosen": 0.37864893674850464, "log_odds_ratio": -0.5854939818382263, "logits/chosen": -1.0726263523101807, "logits/rejected": -1.027753233909607, "logps/chosen": -0.9603984951972961, "logps/rejected": -1.129237174987793, "loss": 1.147, "nll_loss": 1.2639267444610596, "rewards/accuracies": 0.625, "rewards/chosen": -0.09603984653949738, "rewards/margins": 0.01688387431204319, "rewards/rejected": -0.11292372643947601, "step": 2160 }, { "epoch": 1.3182858014335825, "grad_norm": 0.9717926383018494, "learning_rate": 5.912063686466626e-06, "log_odds_chosen": 0.12707281112670898, "log_odds_ratio": -0.8228939771652222, "logits/chosen": -0.8612271547317505, "logits/rejected": -0.964375913143158, "logps/chosen": -0.9818617105484009, "logps/rejected": -0.9997646808624268, "loss": 0.9899, "nll_loss": 0.9180717468261719, "rewards/accuracies": 0.25, "rewards/chosen": -0.09818616509437561, "rewards/margins": 0.0017903130501508713, "rewards/rejected": -0.09997648000717163, "step": 2161 }, { "epoch": 1.3188958365105994, "grad_norm": 1.716295599937439, "learning_rate": 5.911083894672382e-06, "log_odds_chosen": 0.15191087126731873, "log_odds_ratio": -0.7381735444068909, "logits/chosen": -1.001070499420166, "logits/rejected": -0.8967993259429932, "logps/chosen": -0.9085825681686401, "logps/rejected": -1.0338847637176514, "loss": 1.1468, "nll_loss": 1.0492467880249023, "rewards/accuracies": 0.25, "rewards/chosen": -0.09085825085639954, "rewards/margins": 0.012530227191746235, "rewards/rejected": -0.1033884808421135, "step": 2162 }, { "epoch": 1.3195058715876162, "grad_norm": 0.8723849058151245, "learning_rate": 5.910104102878138e-06, "log_odds_chosen": 1.0585767030715942, "log_odds_ratio": -0.427884042263031, "logits/chosen": -0.6285152435302734, "logits/rejected": -0.7320806980133057, "logps/chosen": -0.5954476594924927, "logps/rejected": -1.2148717641830444, "loss": 0.9685, "nll_loss": 0.7282158136367798, "rewards/accuracies": 0.75, "rewards/chosen": -0.05954476445913315, "rewards/margins": 0.06194241717457771, "rewards/rejected": -0.12148717045783997, "step": 2163 }, { "epoch": 1.3201159066646333, "grad_norm": 1.9056675434112549, "learning_rate": 5.909124311083895e-06, "log_odds_chosen": 1.3790942430496216, "log_odds_ratio": -0.4501732885837555, "logits/chosen": -0.8858400583267212, "logits/rejected": -0.7279493808746338, "logps/chosen": -0.7975525856018066, "logps/rejected": -1.7208316326141357, "loss": 1.0451, "nll_loss": 0.8944565057754517, "rewards/accuracies": 0.75, "rewards/chosen": -0.0797552540898323, "rewards/margins": 0.09232790768146515, "rewards/rejected": -0.17208316922187805, "step": 2164 }, { "epoch": 1.3207259417416501, "grad_norm": 2.205005168914795, "learning_rate": 5.908144519289651e-06, "log_odds_chosen": 1.1229735612869263, "log_odds_ratio": -0.4415638744831085, "logits/chosen": -1.022294521331787, "logits/rejected": -0.9330035448074341, "logps/chosen": -1.1380237340927124, "logps/rejected": -1.9306780099868774, "loss": 1.1964, "nll_loss": 1.2823100090026855, "rewards/accuracies": 0.75, "rewards/chosen": -0.11380238085985184, "rewards/margins": 0.07926543802022934, "rewards/rejected": -0.19306781888008118, "step": 2165 }, { "epoch": 1.321335976818667, "grad_norm": 2.5996737480163574, "learning_rate": 5.907164727495406e-06, "log_odds_chosen": 0.6581010222434998, "log_odds_ratio": -0.4934252202510834, "logits/chosen": -1.0733503103256226, "logits/rejected": -0.9884737730026245, "logps/chosen": -1.0401870012283325, "logps/rejected": -1.5645644664764404, "loss": 0.9945, "nll_loss": 1.1150964498519897, "rewards/accuracies": 0.625, "rewards/chosen": -0.10401870310306549, "rewards/margins": 0.05243774130940437, "rewards/rejected": -0.15645644068717957, "step": 2166 }, { "epoch": 1.321946011895684, "grad_norm": 1.477430820465088, "learning_rate": 5.906184935701163e-06, "log_odds_chosen": 1.1470556259155273, "log_odds_ratio": -0.519717812538147, "logits/chosen": -0.8668819665908813, "logits/rejected": -0.9288657307624817, "logps/chosen": -0.8705576062202454, "logps/rejected": -1.6667900085449219, "loss": 1.0367, "nll_loss": 1.2371599674224854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08705577254295349, "rewards/margins": 0.07962323725223541, "rewards/rejected": -0.1666789948940277, "step": 2167 }, { "epoch": 1.322556046972701, "grad_norm": 1.9983882904052734, "learning_rate": 5.905205143906919e-06, "log_odds_chosen": 1.8739228248596191, "log_odds_ratio": -0.2989254295825958, "logits/chosen": -1.0132439136505127, "logits/rejected": -0.9035096168518066, "logps/chosen": -0.8144127130508423, "logps/rejected": -2.1796343326568604, "loss": 1.13, "nll_loss": 1.111640453338623, "rewards/accuracies": 0.875, "rewards/chosen": -0.08144127577543259, "rewards/margins": 0.13652217388153076, "rewards/rejected": -0.21796345710754395, "step": 2168 }, { "epoch": 1.3231660820497178, "grad_norm": 1.7511965036392212, "learning_rate": 5.9042253521126755e-06, "log_odds_chosen": 2.0018420219421387, "log_odds_ratio": -0.47653627395629883, "logits/chosen": -0.946158766746521, "logits/rejected": -0.8121472597122192, "logps/chosen": -0.9106037020683289, "logps/rejected": -2.6141204833984375, "loss": 1.214, "nll_loss": 1.3919334411621094, "rewards/accuracies": 0.625, "rewards/chosen": -0.09106037020683289, "rewards/margins": 0.17035169899463654, "rewards/rejected": -0.2614120841026306, "step": 2169 }, { "epoch": 1.3237761171267348, "grad_norm": 1.0253140926361084, "learning_rate": 5.9032455603184325e-06, "log_odds_chosen": 0.5830503106117249, "log_odds_ratio": -0.6466146111488342, "logits/chosen": -0.9289714097976685, "logits/rejected": -0.9301590919494629, "logps/chosen": -1.0377713441848755, "logps/rejected": -1.5469017028808594, "loss": 1.1502, "nll_loss": 1.2085074186325073, "rewards/accuracies": 0.625, "rewards/chosen": -0.10377714037895203, "rewards/margins": 0.05091303214430809, "rewards/rejected": -0.15469017624855042, "step": 2170 }, { "epoch": 1.3243861522037517, "grad_norm": 1.4152812957763672, "learning_rate": 5.902265768524189e-06, "log_odds_chosen": 0.6257781982421875, "log_odds_ratio": -0.6839464902877808, "logits/chosen": -0.9962184429168701, "logits/rejected": -1.0152599811553955, "logps/chosen": -1.0845355987548828, "logps/rejected": -1.5490875244140625, "loss": 1.1856, "nll_loss": 1.1739332675933838, "rewards/accuracies": 0.375, "rewards/chosen": -0.10845356434583664, "rewards/margins": 0.046455200761556625, "rewards/rejected": -0.15490874648094177, "step": 2171 }, { "epoch": 1.3249961872807687, "grad_norm": 4.356991767883301, "learning_rate": 5.901285976729945e-06, "log_odds_chosen": 2.498591423034668, "log_odds_ratio": -0.23016750812530518, "logits/chosen": -0.5376872420310974, "logits/rejected": -0.6091080904006958, "logps/chosen": -0.7428491711616516, "logps/rejected": -2.7136049270629883, "loss": 0.974, "nll_loss": 0.8321677446365356, "rewards/accuracies": 1.0, "rewards/chosen": -0.07428491860628128, "rewards/margins": 0.19707557559013367, "rewards/rejected": -0.27136051654815674, "step": 2172 }, { "epoch": 1.3256062223577856, "grad_norm": 2.450054407119751, "learning_rate": 5.900306184935701e-06, "log_odds_chosen": 1.2713193893432617, "log_odds_ratio": -0.4385000169277191, "logits/chosen": -0.6627016663551331, "logits/rejected": -0.7535789608955383, "logps/chosen": -0.7653412222862244, "logps/rejected": -1.6546486616134644, "loss": 1.1613, "nll_loss": 1.008916974067688, "rewards/accuracies": 0.625, "rewards/chosen": -0.07653412222862244, "rewards/margins": 0.08893074840307236, "rewards/rejected": -0.1654648780822754, "step": 2173 }, { "epoch": 1.3262162574348024, "grad_norm": 1.272329330444336, "learning_rate": 5.899326393141457e-06, "log_odds_chosen": 0.35253846645355225, "log_odds_ratio": -0.5795521140098572, "logits/chosen": -0.860049843788147, "logits/rejected": -0.7973164319992065, "logps/chosen": -0.8482930660247803, "logps/rejected": -1.103979229927063, "loss": 1.005, "nll_loss": 1.0559557676315308, "rewards/accuracies": 0.625, "rewards/chosen": -0.08482931554317474, "rewards/margins": 0.025568615645170212, "rewards/rejected": -0.11039792001247406, "step": 2174 }, { "epoch": 1.3268262925118195, "grad_norm": 2.370755672454834, "learning_rate": 5.898346601347214e-06, "log_odds_chosen": 1.2964487075805664, "log_odds_ratio": -0.5001427531242371, "logits/chosen": -0.9016985893249512, "logits/rejected": -0.9790381193161011, "logps/chosen": -0.7109994888305664, "logps/rejected": -1.6693288087844849, "loss": 1.2854, "nll_loss": 1.3728817701339722, "rewards/accuracies": 0.75, "rewards/chosen": -0.07109995186328888, "rewards/margins": 0.0958329364657402, "rewards/rejected": -0.1669328659772873, "step": 2175 }, { "epoch": 1.3274363275888363, "grad_norm": 2.701427459716797, "learning_rate": 5.89736680955297e-06, "log_odds_chosen": 0.970658004283905, "log_odds_ratio": -0.5330080986022949, "logits/chosen": -0.8893380761146545, "logits/rejected": -0.855997622013092, "logps/chosen": -0.8142818212509155, "logps/rejected": -1.5028560161590576, "loss": 1.2089, "nll_loss": 1.0925703048706055, "rewards/accuracies": 0.625, "rewards/chosen": -0.08142818510532379, "rewards/margins": 0.06885740906000137, "rewards/rejected": -0.15028560161590576, "step": 2176 }, { "epoch": 1.3280463626658534, "grad_norm": 2.4988856315612793, "learning_rate": 5.896387017758725e-06, "log_odds_chosen": 0.5331727266311646, "log_odds_ratio": -0.5752596855163574, "logits/chosen": -0.6515904664993286, "logits/rejected": -0.6359684467315674, "logps/chosen": -0.842158854007721, "logps/rejected": -1.155572772026062, "loss": 1.0959, "nll_loss": 1.1095380783081055, "rewards/accuracies": 0.5, "rewards/chosen": -0.08421589434146881, "rewards/margins": 0.03134138137102127, "rewards/rejected": -0.11555726826190948, "step": 2177 }, { "epoch": 1.3286563977428703, "grad_norm": 9.359964370727539, "learning_rate": 5.895407225964482e-06, "log_odds_chosen": 2.6210129261016846, "log_odds_ratio": -0.31718361377716064, "logits/chosen": -0.8807947635650635, "logits/rejected": -0.8586338758468628, "logps/chosen": -0.6620334982872009, "logps/rejected": -2.7340896129608154, "loss": 1.058, "nll_loss": 0.9998201727867126, "rewards/accuracies": 0.875, "rewards/chosen": -0.06620335578918457, "rewards/margins": 0.2072056233882904, "rewards/rejected": -0.273408979177475, "step": 2178 }, { "epoch": 1.329266432819887, "grad_norm": 2.41823148727417, "learning_rate": 5.894427434170238e-06, "log_odds_chosen": 0.8516008853912354, "log_odds_ratio": -0.5974605083465576, "logits/chosen": -0.8805670142173767, "logits/rejected": -0.7424733638763428, "logps/chosen": -0.8504729270935059, "logps/rejected": -1.493896245956421, "loss": 1.3871, "nll_loss": 1.1795635223388672, "rewards/accuracies": 0.75, "rewards/chosen": -0.08504728972911835, "rewards/margins": 0.06434233486652374, "rewards/rejected": -0.1493896245956421, "step": 2179 }, { "epoch": 1.329876467896904, "grad_norm": 1.284847378730774, "learning_rate": 5.8934476423759945e-06, "log_odds_chosen": 2.6014821529388428, "log_odds_ratio": -0.14247652888298035, "logits/chosen": -0.5527442693710327, "logits/rejected": -0.7208833694458008, "logps/chosen": -0.4320557117462158, "logps/rejected": -2.007732629776001, "loss": 0.876, "nll_loss": 0.8558809757232666, "rewards/accuracies": 1.0, "rewards/chosen": -0.04320557042956352, "rewards/margins": 0.15756770968437195, "rewards/rejected": -0.20077328383922577, "step": 2180 }, { "epoch": 1.330486502973921, "grad_norm": 2.2959587574005127, "learning_rate": 5.8924678505817515e-06, "log_odds_chosen": 1.0790210962295532, "log_odds_ratio": -0.5515794157981873, "logits/chosen": -0.8850275278091431, "logits/rejected": -0.6675537824630737, "logps/chosen": -0.7214459776878357, "logps/rejected": -1.3839941024780273, "loss": 0.9808, "nll_loss": 0.9113630652427673, "rewards/accuracies": 0.5, "rewards/chosen": -0.07214459776878357, "rewards/margins": 0.06625481694936752, "rewards/rejected": -0.1383994221687317, "step": 2181 }, { "epoch": 1.3310965380509379, "grad_norm": 1.5354762077331543, "learning_rate": 5.891488058787508e-06, "log_odds_chosen": 2.2267580032348633, "log_odds_ratio": -0.34692323207855225, "logits/chosen": -0.8389135599136353, "logits/rejected": -0.5723263621330261, "logps/chosen": -0.583034098148346, "logps/rejected": -2.191209316253662, "loss": 0.8762, "nll_loss": 0.7782399654388428, "rewards/accuracies": 0.75, "rewards/chosen": -0.058303412050008774, "rewards/margins": 0.16081750392913818, "rewards/rejected": -0.21912091970443726, "step": 2182 }, { "epoch": 1.331706573127955, "grad_norm": 1.3045998811721802, "learning_rate": 5.890508266993263e-06, "log_odds_chosen": 1.5801793336868286, "log_odds_ratio": -0.35907232761383057, "logits/chosen": -0.8159648180007935, "logits/rejected": -0.7301837801933289, "logps/chosen": -0.6994892358779907, "logps/rejected": -1.8598859310150146, "loss": 1.058, "nll_loss": 0.8889079689979553, "rewards/accuracies": 0.875, "rewards/chosen": -0.06994891911745071, "rewards/margins": 0.11603967845439911, "rewards/rejected": -0.18598860502243042, "step": 2183 }, { "epoch": 1.3323166082049718, "grad_norm": 1.7677205801010132, "learning_rate": 5.88952847519902e-06, "log_odds_chosen": 0.773006021976471, "log_odds_ratio": -0.5589374303817749, "logits/chosen": -0.7473510503768921, "logits/rejected": -0.6678904891014099, "logps/chosen": -0.8721461296081543, "logps/rejected": -1.3338805437088013, "loss": 1.2229, "nll_loss": 1.087605357170105, "rewards/accuracies": 0.625, "rewards/chosen": -0.08721461147069931, "rewards/margins": 0.04617343470454216, "rewards/rejected": -0.13338805735111237, "step": 2184 }, { "epoch": 1.3329266432819886, "grad_norm": 2.2424070835113525, "learning_rate": 5.888548683404776e-06, "log_odds_chosen": 0.5774070620536804, "log_odds_ratio": -0.7537945508956909, "logits/chosen": -0.8006496429443359, "logits/rejected": -0.6455296277999878, "logps/chosen": -1.0240082740783691, "logps/rejected": -1.4450750350952148, "loss": 1.2452, "nll_loss": 1.2310832738876343, "rewards/accuracies": 0.625, "rewards/chosen": -0.10240083187818527, "rewards/margins": 0.04210667312145233, "rewards/rejected": -0.144507497549057, "step": 2185 }, { "epoch": 1.3335366783590057, "grad_norm": 3.0706677436828613, "learning_rate": 5.887568891610533e-06, "log_odds_chosen": 1.8379716873168945, "log_odds_ratio": -0.2956753969192505, "logits/chosen": -0.8539910316467285, "logits/rejected": -0.7429335117340088, "logps/chosen": -0.7850522994995117, "logps/rejected": -2.1686770915985107, "loss": 1.0739, "nll_loss": 0.9902758002281189, "rewards/accuracies": 0.875, "rewards/chosen": -0.07850523293018341, "rewards/margins": 0.13836246728897095, "rewards/rejected": -0.21686770021915436, "step": 2186 }, { "epoch": 1.3341467134360225, "grad_norm": 2.494065284729004, "learning_rate": 5.886589099816289e-06, "log_odds_chosen": 0.7121623754501343, "log_odds_ratio": -0.5594691038131714, "logits/chosen": -1.017078161239624, "logits/rejected": -0.8119895458221436, "logps/chosen": -0.7642079591751099, "logps/rejected": -1.345358967781067, "loss": 0.9772, "nll_loss": 1.012791633605957, "rewards/accuracies": 0.625, "rewards/chosen": -0.07642079889774323, "rewards/margins": 0.05811510980129242, "rewards/rejected": -0.13453590869903564, "step": 2187 }, { "epoch": 1.3347567485130396, "grad_norm": 1.4739826917648315, "learning_rate": 5.885609308022045e-06, "log_odds_chosen": 2.0628538131713867, "log_odds_ratio": -0.4400041401386261, "logits/chosen": -0.8212138414382935, "logits/rejected": -0.875048816204071, "logps/chosen": -0.6701746582984924, "logps/rejected": -2.2712883949279785, "loss": 1.0588, "nll_loss": 0.9822679162025452, "rewards/accuracies": 0.75, "rewards/chosen": -0.06701746582984924, "rewards/margins": 0.16011139750480652, "rewards/rejected": -0.22712886333465576, "step": 2188 }, { "epoch": 1.3353667835900564, "grad_norm": 1.4146939516067505, "learning_rate": 5.884629516227801e-06, "log_odds_chosen": 1.8223369121551514, "log_odds_ratio": -0.47681647539138794, "logits/chosen": -0.7326204180717468, "logits/rejected": -0.7958992719650269, "logps/chosen": -0.7567592263221741, "logps/rejected": -2.2966630458831787, "loss": 0.872, "nll_loss": 0.8396555185317993, "rewards/accuracies": 0.625, "rewards/chosen": -0.07567591965198517, "rewards/margins": 0.15399038791656494, "rewards/rejected": -0.2296663075685501, "step": 2189 }, { "epoch": 1.3359768186670733, "grad_norm": 1.0571215152740479, "learning_rate": 5.8836497244335575e-06, "log_odds_chosen": 0.9822187423706055, "log_odds_ratio": -0.4871056079864502, "logits/chosen": -0.8124821186065674, "logits/rejected": -0.6859561204910278, "logps/chosen": -0.8348990678787231, "logps/rejected": -1.5480455160140991, "loss": 0.9823, "nll_loss": 1.0281797647476196, "rewards/accuracies": 0.75, "rewards/chosen": -0.08348990976810455, "rewards/margins": 0.07131464034318924, "rewards/rejected": -0.1548045426607132, "step": 2190 }, { "epoch": 1.3365868537440904, "grad_norm": 1.153292179107666, "learning_rate": 5.882669932639314e-06, "log_odds_chosen": 0.21147537231445312, "log_odds_ratio": -0.7102208137512207, "logits/chosen": -0.9928228855133057, "logits/rejected": -0.7175612449645996, "logps/chosen": -0.9506713151931763, "logps/rejected": -1.098215103149414, "loss": 1.2523, "nll_loss": 1.0849777460098267, "rewards/accuracies": 0.375, "rewards/chosen": -0.09506713598966599, "rewards/margins": 0.014754369854927063, "rewards/rejected": -0.10982149839401245, "step": 2191 }, { "epoch": 1.3371968888211072, "grad_norm": 8.513544082641602, "learning_rate": 5.8816901408450706e-06, "log_odds_chosen": 2.2368335723876953, "log_odds_ratio": -0.43142831325531006, "logits/chosen": -0.6552883982658386, "logits/rejected": -0.7946966886520386, "logps/chosen": -0.7365363240242004, "logps/rejected": -2.372152328491211, "loss": 1.0262, "nll_loss": 0.9230271577835083, "rewards/accuracies": 0.625, "rewards/chosen": -0.07365363836288452, "rewards/margins": 0.1635615974664688, "rewards/rejected": -0.23721523582935333, "step": 2192 }, { "epoch": 1.3378069238981243, "grad_norm": 2.301698923110962, "learning_rate": 5.880710349050827e-06, "log_odds_chosen": 0.9456520080566406, "log_odds_ratio": -0.4700053632259369, "logits/chosen": -0.6230521202087402, "logits/rejected": -0.7633495330810547, "logps/chosen": -0.7875230312347412, "logps/rejected": -1.4062341451644897, "loss": 1.0143, "nll_loss": 0.8567672371864319, "rewards/accuracies": 0.875, "rewards/chosen": -0.078752301633358, "rewards/margins": 0.06187111884355545, "rewards/rejected": -0.14062342047691345, "step": 2193 }, { "epoch": 1.3384169589751411, "grad_norm": 1.3106977939605713, "learning_rate": 5.879730557256582e-06, "log_odds_chosen": 1.461535096168518, "log_odds_ratio": -0.38325008749961853, "logits/chosen": -0.7617017030715942, "logits/rejected": -0.7933975458145142, "logps/chosen": -0.5980589389801025, "logps/rejected": -1.6086934804916382, "loss": 0.9723, "nll_loss": 0.6585337519645691, "rewards/accuracies": 0.875, "rewards/chosen": -0.05980589613318443, "rewards/margins": 0.10106345266103745, "rewards/rejected": -0.16086935997009277, "step": 2194 }, { "epoch": 1.339026994052158, "grad_norm": 1.527664065361023, "learning_rate": 5.878750765462339e-06, "log_odds_chosen": 0.5187501311302185, "log_odds_ratio": -0.5067245364189148, "logits/chosen": -0.9230453968048096, "logits/rejected": -0.7205352783203125, "logps/chosen": -0.8611152768135071, "logps/rejected": -1.216967225074768, "loss": 1.094, "nll_loss": 0.9477622509002686, "rewards/accuracies": 0.75, "rewards/chosen": -0.08611153811216354, "rewards/margins": 0.0355851911008358, "rewards/rejected": -0.12169672548770905, "step": 2195 }, { "epoch": 1.3396370291291748, "grad_norm": 1.2704026699066162, "learning_rate": 5.877770973668095e-06, "log_odds_chosen": 2.8102543354034424, "log_odds_ratio": -0.3322182893753052, "logits/chosen": -0.7619637250900269, "logits/rejected": -0.5925220251083374, "logps/chosen": -0.7292778491973877, "logps/rejected": -3.0318784713745117, "loss": 0.9655, "nll_loss": 0.8873058557510376, "rewards/accuracies": 0.75, "rewards/chosen": -0.07292778789997101, "rewards/margins": 0.23026007413864136, "rewards/rejected": -0.30318784713745117, "step": 2196 }, { "epoch": 1.3402470642061919, "grad_norm": 1.6864796876907349, "learning_rate": 5.876791181873852e-06, "log_odds_chosen": 0.8060150146484375, "log_odds_ratio": -0.4963955879211426, "logits/chosen": -0.7509785890579224, "logits/rejected": -0.7379730343818665, "logps/chosen": -0.6929782032966614, "logps/rejected": -1.189340591430664, "loss": 0.9271, "nll_loss": 0.8033883571624756, "rewards/accuracies": 0.875, "rewards/chosen": -0.06929782032966614, "rewards/margins": 0.049636244773864746, "rewards/rejected": -0.11893406510353088, "step": 2197 }, { "epoch": 1.3408570992832087, "grad_norm": 1.267568588256836, "learning_rate": 5.875811390079608e-06, "log_odds_chosen": 1.0714325904846191, "log_odds_ratio": -0.5382555723190308, "logits/chosen": -0.631936252117157, "logits/rejected": -0.7333073616027832, "logps/chosen": -0.6902545094490051, "logps/rejected": -1.1261518001556396, "loss": 1.0635, "nll_loss": 1.0482826232910156, "rewards/accuracies": 0.875, "rewards/chosen": -0.06902545690536499, "rewards/margins": 0.043589718639850616, "rewards/rejected": -0.11261516809463501, "step": 2198 }, { "epoch": 1.3414671343602258, "grad_norm": 0.9320788383483887, "learning_rate": 5.874831598285364e-06, "log_odds_chosen": 1.0555907487869263, "log_odds_ratio": -0.4632602035999298, "logits/chosen": -0.8792203664779663, "logits/rejected": -0.7808269262313843, "logps/chosen": -0.7943131923675537, "logps/rejected": -1.4923031330108643, "loss": 0.9595, "nll_loss": 1.0212782621383667, "rewards/accuracies": 0.75, "rewards/chosen": -0.07943131774663925, "rewards/margins": 0.06979899108409882, "rewards/rejected": -0.14923031628131866, "step": 2199 }, { "epoch": 1.3420771694372426, "grad_norm": 1.1000903844833374, "learning_rate": 5.87385180649112e-06, "log_odds_chosen": 0.9106385111808777, "log_odds_ratio": -0.4986701011657715, "logits/chosen": -0.9816641211509705, "logits/rejected": -0.6945081949234009, "logps/chosen": -0.7756131887435913, "logps/rejected": -1.4283883571624756, "loss": 1.1522, "nll_loss": 1.0629063844680786, "rewards/accuracies": 0.625, "rewards/chosen": -0.07756131887435913, "rewards/margins": 0.06527750194072723, "rewards/rejected": -0.14283883571624756, "step": 2200 }, { "epoch": 1.3426872045142595, "grad_norm": 1.2801345586776733, "learning_rate": 5.8728720146968765e-06, "log_odds_chosen": 2.3185176849365234, "log_odds_ratio": -0.3449845016002655, "logits/chosen": -0.7888214588165283, "logits/rejected": -0.7548151016235352, "logps/chosen": -0.8133923411369324, "logps/rejected": -2.5073771476745605, "loss": 1.251, "nll_loss": 1.028136968612671, "rewards/accuracies": 0.75, "rewards/chosen": -0.08133924007415771, "rewards/margins": 0.1693984866142273, "rewards/rejected": -0.250737726688385, "step": 2201 }, { "epoch": 1.3432972395912766, "grad_norm": 1.5149468183517456, "learning_rate": 5.871892222902633e-06, "log_odds_chosen": 1.9257526397705078, "log_odds_ratio": -0.24445413053035736, "logits/chosen": -0.8523411750793457, "logits/rejected": -0.8132732510566711, "logps/chosen": -0.842968761920929, "logps/rejected": -2.2690186500549316, "loss": 1.1238, "nll_loss": 1.0709145069122314, "rewards/accuracies": 1.0, "rewards/chosen": -0.08429687470197678, "rewards/margins": 0.1426049917936325, "rewards/rejected": -0.22690187394618988, "step": 2202 }, { "epoch": 1.3439072746682934, "grad_norm": 2.7883687019348145, "learning_rate": 5.87091243110839e-06, "log_odds_chosen": 1.0898303985595703, "log_odds_ratio": -0.4275326728820801, "logits/chosen": -0.9118462800979614, "logits/rejected": -0.8370077013969421, "logps/chosen": -0.829910159111023, "logps/rejected": -1.56219482421875, "loss": 0.965, "nll_loss": 0.9085766077041626, "rewards/accuracies": 0.875, "rewards/chosen": -0.08299101144075394, "rewards/margins": 0.07322848588228226, "rewards/rejected": -0.1562194973230362, "step": 2203 }, { "epoch": 1.3445173097453105, "grad_norm": 1.699027180671692, "learning_rate": 5.869932639314146e-06, "log_odds_chosen": 0.6458367109298706, "log_odds_ratio": -0.6061770915985107, "logits/chosen": -0.947791576385498, "logits/rejected": -0.7573228478431702, "logps/chosen": -0.8594518899917603, "logps/rejected": -1.3101005554199219, "loss": 1.2107, "nll_loss": 1.2325546741485596, "rewards/accuracies": 0.625, "rewards/chosen": -0.08594518899917603, "rewards/margins": 0.045064862817525864, "rewards/rejected": -0.1310100555419922, "step": 2204 }, { "epoch": 1.3451273448223273, "grad_norm": 1.6213233470916748, "learning_rate": 5.868952847519902e-06, "log_odds_chosen": 0.06159719079732895, "log_odds_ratio": -0.7085099816322327, "logits/chosen": -0.9326483011245728, "logits/rejected": -0.9260591864585876, "logps/chosen": -0.895497739315033, "logps/rejected": -0.9343775510787964, "loss": 1.1255, "nll_loss": 1.2405338287353516, "rewards/accuracies": 0.625, "rewards/chosen": -0.08954977989196777, "rewards/margins": 0.003887974191457033, "rewards/rejected": -0.09343776106834412, "step": 2205 }, { "epoch": 1.3457373798993442, "grad_norm": 2.272675037384033, "learning_rate": 5.867973055725658e-06, "log_odds_chosen": 1.6082923412322998, "log_odds_ratio": -0.45588046312332153, "logits/chosen": -0.7155619263648987, "logits/rejected": -0.8268578052520752, "logps/chosen": -0.8351123332977295, "logps/rejected": -2.0121304988861084, "loss": 1.1558, "nll_loss": 1.020883321762085, "rewards/accuracies": 0.75, "rewards/chosen": -0.08351123332977295, "rewards/margins": 0.11770182847976685, "rewards/rejected": -0.2012130618095398, "step": 2206 }, { "epoch": 1.346347414976361, "grad_norm": 1.661924123764038, "learning_rate": 5.866993263931414e-06, "log_odds_chosen": 1.1746506690979004, "log_odds_ratio": -0.42714738845825195, "logits/chosen": -0.9436182379722595, "logits/rejected": -0.8268758058547974, "logps/chosen": -0.7986956238746643, "logps/rejected": -1.6354844570159912, "loss": 1.1527, "nll_loss": 1.1062774658203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.07986956834793091, "rewards/margins": 0.08367887884378433, "rewards/rejected": -0.16354843974113464, "step": 2207 }, { "epoch": 1.346957450053378, "grad_norm": 1.319277048110962, "learning_rate": 5.86601347213717e-06, "log_odds_chosen": 1.1657150983810425, "log_odds_ratio": -0.5332887768745422, "logits/chosen": -0.9018818736076355, "logits/rejected": -0.7616963386535645, "logps/chosen": -0.8039796948432922, "logps/rejected": -1.672165870666504, "loss": 1.289, "nll_loss": 0.9943537712097168, "rewards/accuracies": 0.625, "rewards/chosen": -0.08039796352386475, "rewards/margins": 0.08681860566139221, "rewards/rejected": -0.16721658408641815, "step": 2208 }, { "epoch": 1.347567485130395, "grad_norm": 1.7486398220062256, "learning_rate": 5.865033680342927e-06, "log_odds_chosen": 3.7353436946868896, "log_odds_ratio": -0.11060715466737747, "logits/chosen": -0.6029015183448792, "logits/rejected": -0.7597154378890991, "logps/chosen": -0.45553576946258545, "logps/rejected": -3.2271714210510254, "loss": 0.952, "nll_loss": 0.5993818640708923, "rewards/accuracies": 1.0, "rewards/chosen": -0.045553576201200485, "rewards/margins": 0.277163565158844, "rewards/rejected": -0.32271715998649597, "step": 2209 }, { "epoch": 1.348177520207412, "grad_norm": 1.4567089080810547, "learning_rate": 5.864053888548683e-06, "log_odds_chosen": 0.600199818611145, "log_odds_ratio": -0.6084442138671875, "logits/chosen": -0.7416287660598755, "logits/rejected": -0.8509082794189453, "logps/chosen": -0.7712913155555725, "logps/rejected": -0.9179264903068542, "loss": 1.126, "nll_loss": 0.9531316757202148, "rewards/accuracies": 0.75, "rewards/chosen": -0.07712913304567337, "rewards/margins": 0.014663513749837875, "rewards/rejected": -0.09179264307022095, "step": 2210 }, { "epoch": 1.3487875552844288, "grad_norm": 4.636000633239746, "learning_rate": 5.8630740967544386e-06, "log_odds_chosen": 0.7603991031646729, "log_odds_ratio": -0.6480523347854614, "logits/chosen": -0.6887142658233643, "logits/rejected": -0.7594671249389648, "logps/chosen": -0.809868335723877, "logps/rejected": -1.426413655281067, "loss": 0.977, "nll_loss": 0.8637785315513611, "rewards/accuracies": 0.375, "rewards/chosen": -0.08098682761192322, "rewards/margins": 0.06165454164147377, "rewards/rejected": -0.14264138042926788, "step": 2211 }, { "epoch": 1.3493975903614457, "grad_norm": 1.1078417301177979, "learning_rate": 5.8620943049601955e-06, "log_odds_chosen": 1.0740773677825928, "log_odds_ratio": -0.4370757043361664, "logits/chosen": -0.7810068726539612, "logits/rejected": -0.7968840599060059, "logps/chosen": -0.8011103868484497, "logps/rejected": -1.5216379165649414, "loss": 1.0116, "nll_loss": 1.0023624897003174, "rewards/accuracies": 0.75, "rewards/chosen": -0.08011104166507721, "rewards/margins": 0.07205276191234589, "rewards/rejected": -0.1521638035774231, "step": 2212 }, { "epoch": 1.3500076254384628, "grad_norm": 1.428236722946167, "learning_rate": 5.861114513165952e-06, "log_odds_chosen": 1.3637750148773193, "log_odds_ratio": -0.5490926504135132, "logits/chosen": -0.8760374784469604, "logits/rejected": -0.812995433807373, "logps/chosen": -0.8102202415466309, "logps/rejected": -1.700239896774292, "loss": 1.0846, "nll_loss": 1.0921378135681152, "rewards/accuracies": 0.625, "rewards/chosen": -0.08102202415466309, "rewards/margins": 0.08900195360183716, "rewards/rejected": -0.17002397775650024, "step": 2213 }, { "epoch": 1.3506176605154796, "grad_norm": 2.223944902420044, "learning_rate": 5.860134721371709e-06, "log_odds_chosen": 0.6968930959701538, "log_odds_ratio": -0.6009370684623718, "logits/chosen": -0.8389410972595215, "logits/rejected": -0.7713860273361206, "logps/chosen": -0.9387409687042236, "logps/rejected": -1.4344221353530884, "loss": 1.1347, "nll_loss": 1.1473257541656494, "rewards/accuracies": 0.5, "rewards/chosen": -0.09387410432100296, "rewards/margins": 0.049568116664886475, "rewards/rejected": -0.14344221353530884, "step": 2214 }, { "epoch": 1.3512276955924967, "grad_norm": 1.353967547416687, "learning_rate": 5.859154929577465e-06, "log_odds_chosen": 1.3784961700439453, "log_odds_ratio": -0.4981774687767029, "logits/chosen": -0.8348477482795715, "logits/rejected": -0.8751797676086426, "logps/chosen": -0.8092830181121826, "logps/rejected": -1.9598190784454346, "loss": 1.1088, "nll_loss": 0.9808061122894287, "rewards/accuracies": 0.75, "rewards/chosen": -0.08092831075191498, "rewards/margins": 0.11505361646413803, "rewards/rejected": -0.1959819197654724, "step": 2215 }, { "epoch": 1.3518377306695135, "grad_norm": 2.005544424057007, "learning_rate": 5.858175137783221e-06, "log_odds_chosen": 0.9081225991249084, "log_odds_ratio": -0.4456785023212433, "logits/chosen": -0.8279466032981873, "logits/rejected": -1.0112181901931763, "logps/chosen": -0.6986038684844971, "logps/rejected": -1.206161618232727, "loss": 0.9717, "nll_loss": 1.0021648406982422, "rewards/accuracies": 0.75, "rewards/chosen": -0.06986038386821747, "rewards/margins": 0.050755783915519714, "rewards/rejected": -0.12061616778373718, "step": 2216 }, { "epoch": 1.3524477657465304, "grad_norm": 1.2091196775436401, "learning_rate": 5.857195345988977e-06, "log_odds_chosen": 0.5867890119552612, "log_odds_ratio": -0.5395498275756836, "logits/chosen": -1.0742512941360474, "logits/rejected": -0.8640368580818176, "logps/chosen": -1.1253855228424072, "logps/rejected": -1.5586113929748535, "loss": 1.1949, "nll_loss": 1.2988331317901611, "rewards/accuracies": 0.875, "rewards/chosen": -0.11253856122493744, "rewards/margins": 0.04332257807254791, "rewards/rejected": -0.15586113929748535, "step": 2217 }, { "epoch": 1.3530578008235474, "grad_norm": 1.1511787176132202, "learning_rate": 5.856215554194733e-06, "log_odds_chosen": 1.7456365823745728, "log_odds_ratio": -0.5284615159034729, "logits/chosen": -0.7527136206626892, "logits/rejected": -0.6818813681602478, "logps/chosen": -0.6589488387107849, "logps/rejected": -2.0292444229125977, "loss": 1.1194, "nll_loss": 0.863060474395752, "rewards/accuracies": 0.625, "rewards/chosen": -0.06589487940073013, "rewards/margins": 0.13702955842018127, "rewards/rejected": -0.202924445271492, "step": 2218 }, { "epoch": 1.3536678359005643, "grad_norm": 1.680168628692627, "learning_rate": 5.855235762400489e-06, "log_odds_chosen": 0.8448098301887512, "log_odds_ratio": -0.4687224328517914, "logits/chosen": -0.806977391242981, "logits/rejected": -0.7989888787269592, "logps/chosen": -0.757165253162384, "logps/rejected": -1.2302128076553345, "loss": 1.0073, "nll_loss": 1.0664219856262207, "rewards/accuracies": 0.75, "rewards/chosen": -0.0757165253162384, "rewards/margins": 0.04730475693941116, "rewards/rejected": -0.12302128970623016, "step": 2219 }, { "epoch": 1.3542778709775813, "grad_norm": 1.3619134426116943, "learning_rate": 5.854255970606246e-06, "log_odds_chosen": 1.3371665477752686, "log_odds_ratio": -0.667770266532898, "logits/chosen": -1.010612964630127, "logits/rejected": -0.8639817237854004, "logps/chosen": -1.1372920274734497, "logps/rejected": -2.240502119064331, "loss": 1.0797, "nll_loss": 1.3803201913833618, "rewards/accuracies": 0.375, "rewards/chosen": -0.11372920870780945, "rewards/margins": 0.11032100766897202, "rewards/rejected": -0.22405022382736206, "step": 2220 }, { "epoch": 1.3548879060545982, "grad_norm": 1.921562671661377, "learning_rate": 5.853276178812002e-06, "log_odds_chosen": 0.7644928097724915, "log_odds_ratio": -0.4131559729576111, "logits/chosen": -0.9173520803451538, "logits/rejected": -0.8215504884719849, "logps/chosen": -0.7664268612861633, "logps/rejected": -1.2077977657318115, "loss": 1.3196, "nll_loss": 0.9647409915924072, "rewards/accuracies": 1.0, "rewards/chosen": -0.07664269208908081, "rewards/margins": 0.04413708299398422, "rewards/rejected": -0.12077978253364563, "step": 2221 }, { "epoch": 1.355497941131615, "grad_norm": 2.115290880203247, "learning_rate": 5.8522963870177585e-06, "log_odds_chosen": -0.4830726981163025, "log_odds_ratio": -0.9874249696731567, "logits/chosen": -0.9765336513519287, "logits/rejected": -0.9752664566040039, "logps/chosen": -0.8152048587799072, "logps/rejected": -0.6160162687301636, "loss": 1.1348, "nll_loss": 1.0457544326782227, "rewards/accuracies": 0.25, "rewards/chosen": -0.08152048289775848, "rewards/margins": -0.019918862730264664, "rewards/rejected": -0.06160162761807442, "step": 2222 }, { "epoch": 1.3561079762086319, "grad_norm": 1.701383113861084, "learning_rate": 5.8513165952235146e-06, "log_odds_chosen": 1.805100440979004, "log_odds_ratio": -0.37221458554267883, "logits/chosen": -0.6053006649017334, "logits/rejected": -0.828563392162323, "logps/chosen": -0.6295006275177002, "logps/rejected": -1.869977355003357, "loss": 1.1583, "nll_loss": 0.6796203255653381, "rewards/accuracies": 0.75, "rewards/chosen": -0.06295006722211838, "rewards/margins": 0.12404768168926239, "rewards/rejected": -0.18699774146080017, "step": 2223 }, { "epoch": 1.356718011285649, "grad_norm": 1.403197169303894, "learning_rate": 5.850336803429271e-06, "log_odds_chosen": 1.4505953788757324, "log_odds_ratio": -0.4112122058868408, "logits/chosen": -1.0193138122558594, "logits/rejected": -1.0295103788375854, "logps/chosen": -0.8801313638687134, "logps/rejected": -2.022869110107422, "loss": 1.3171, "nll_loss": 1.3768638372421265, "rewards/accuracies": 0.625, "rewards/chosen": -0.08801312744617462, "rewards/margins": 0.11427377909421921, "rewards/rejected": -0.20228691399097443, "step": 2224 }, { "epoch": 1.3573280463626658, "grad_norm": 3.451066493988037, "learning_rate": 5.849357011635028e-06, "log_odds_chosen": 0.9225815534591675, "log_odds_ratio": -0.46816545724868774, "logits/chosen": -0.9597386121749878, "logits/rejected": -0.924709677696228, "logps/chosen": -1.0874441862106323, "logps/rejected": -1.7941417694091797, "loss": 1.1089, "nll_loss": 1.2051522731781006, "rewards/accuracies": 0.75, "rewards/chosen": -0.10874442011117935, "rewards/margins": 0.07066977024078369, "rewards/rejected": -0.17941418290138245, "step": 2225 }, { "epoch": 1.3579380814396829, "grad_norm": 1.4922292232513428, "learning_rate": 5.848377219840784e-06, "log_odds_chosen": 0.3512357473373413, "log_odds_ratio": -0.5919831991195679, "logits/chosen": -1.0611844062805176, "logits/rejected": -1.0726828575134277, "logps/chosen": -0.8415603637695312, "logps/rejected": -1.050673246383667, "loss": 1.0985, "nll_loss": 1.1030986309051514, "rewards/accuracies": 0.625, "rewards/chosen": -0.08415603637695312, "rewards/margins": 0.020911280065774918, "rewards/rejected": -0.10506731271743774, "step": 2226 }, { "epoch": 1.3585481165166997, "grad_norm": 1.212158441543579, "learning_rate": 5.84739742804654e-06, "log_odds_chosen": 1.9463783502578735, "log_odds_ratio": -0.35886698961257935, "logits/chosen": -1.0110880136489868, "logits/rejected": -0.9245651960372925, "logps/chosen": -0.8769474625587463, "logps/rejected": -2.4066965579986572, "loss": 1.1309, "nll_loss": 1.0796124935150146, "rewards/accuracies": 0.875, "rewards/chosen": -0.08769474923610687, "rewards/margins": 0.1529749184846878, "rewards/rejected": -0.24066965281963348, "step": 2227 }, { "epoch": 1.3591581515937166, "grad_norm": 1.3025341033935547, "learning_rate": 5.846417636252297e-06, "log_odds_chosen": 1.7324957847595215, "log_odds_ratio": -0.3043484091758728, "logits/chosen": -0.8430293798446655, "logits/rejected": -0.8638132810592651, "logps/chosen": -0.5132616758346558, "logps/rejected": -1.6305508613586426, "loss": 1.2979, "nll_loss": 1.1953555345535278, "rewards/accuracies": 0.875, "rewards/chosen": -0.051326170563697815, "rewards/margins": 0.11172892153263092, "rewards/rejected": -0.16305509209632874, "step": 2228 }, { "epoch": 1.3597681866707336, "grad_norm": 1.8162657022476196, "learning_rate": 5.845437844458052e-06, "log_odds_chosen": 0.3216267228126526, "log_odds_ratio": -0.60184246301651, "logits/chosen": -0.79172283411026, "logits/rejected": -0.7190717458724976, "logps/chosen": -0.8246687650680542, "logps/rejected": -0.9993400573730469, "loss": 0.9313, "nll_loss": 0.9699311852455139, "rewards/accuracies": 0.625, "rewards/chosen": -0.08246687799692154, "rewards/margins": 0.017467128112912178, "rewards/rejected": -0.09993401169776917, "step": 2229 }, { "epoch": 1.3603782217477505, "grad_norm": 1.7439966201782227, "learning_rate": 5.844458052663808e-06, "log_odds_chosen": 1.1429508924484253, "log_odds_ratio": -0.5160976648330688, "logits/chosen": -0.9709650278091431, "logits/rejected": -1.004760980606079, "logps/chosen": -1.0692553520202637, "logps/rejected": -1.9994229078292847, "loss": 1.1564, "nll_loss": 1.2090601921081543, "rewards/accuracies": 0.75, "rewards/chosen": -0.10692553222179413, "rewards/margins": 0.09301675111055374, "rewards/rejected": -0.19994229078292847, "step": 2230 }, { "epoch": 1.3609882568247675, "grad_norm": 1.7070958614349365, "learning_rate": 5.843478260869565e-06, "log_odds_chosen": 2.234320640563965, "log_odds_ratio": -0.32138791680336, "logits/chosen": -0.8858624696731567, "logits/rejected": -0.9040496349334717, "logps/chosen": -0.6384104490280151, "logps/rejected": -2.272582769393921, "loss": 1.0369, "nll_loss": 0.9355392456054688, "rewards/accuracies": 0.875, "rewards/chosen": -0.06384104490280151, "rewards/margins": 0.163417249917984, "rewards/rejected": -0.22725829482078552, "step": 2231 }, { "epoch": 1.3615982919017844, "grad_norm": 2.9241580963134766, "learning_rate": 5.842498469075321e-06, "log_odds_chosen": 2.3171486854553223, "log_odds_ratio": -0.35862094163894653, "logits/chosen": -0.7773635387420654, "logits/rejected": -0.8192508220672607, "logps/chosen": -0.6334406733512878, "logps/rejected": -2.3764209747314453, "loss": 0.9584, "nll_loss": 0.7196717858314514, "rewards/accuracies": 0.875, "rewards/chosen": -0.0633440688252449, "rewards/margins": 0.1742980182170868, "rewards/rejected": -0.2376420795917511, "step": 2232 }, { "epoch": 1.3622083269788012, "grad_norm": 1.667714238166809, "learning_rate": 5.8415186772810775e-06, "log_odds_chosen": 1.172253966331482, "log_odds_ratio": -0.49397456645965576, "logits/chosen": -1.1293275356292725, "logits/rejected": -1.1008285284042358, "logps/chosen": -0.7836043238639832, "logps/rejected": -1.6369938850402832, "loss": 1.0846, "nll_loss": 1.0185344219207764, "rewards/accuracies": 0.75, "rewards/chosen": -0.07836043834686279, "rewards/margins": 0.08533895015716553, "rewards/rejected": -0.16369938850402832, "step": 2233 }, { "epoch": 1.362818362055818, "grad_norm": 1.940058946609497, "learning_rate": 5.840538885486834e-06, "log_odds_chosen": 1.4421802759170532, "log_odds_ratio": -0.43598151206970215, "logits/chosen": -0.8891519904136658, "logits/rejected": -0.9624162316322327, "logps/chosen": -0.6017307639122009, "logps/rejected": -1.6523747444152832, "loss": 1.2684, "nll_loss": 1.0164440870285034, "rewards/accuracies": 0.75, "rewards/chosen": -0.06017307937145233, "rewards/margins": 0.10506439208984375, "rewards/rejected": -0.16523747146129608, "step": 2234 }, { "epoch": 1.3634283971328351, "grad_norm": 1.1714062690734863, "learning_rate": 5.83955909369259e-06, "log_odds_chosen": 1.5620743036270142, "log_odds_ratio": -0.4123615026473999, "logits/chosen": -0.9150552153587341, "logits/rejected": -0.9657235741615295, "logps/chosen": -0.7225598692893982, "logps/rejected": -1.809746503829956, "loss": 1.1679, "nll_loss": 1.0855116844177246, "rewards/accuracies": 0.625, "rewards/chosen": -0.07225598394870758, "rewards/margins": 0.10871865600347519, "rewards/rejected": -0.18097463250160217, "step": 2235 }, { "epoch": 1.364038432209852, "grad_norm": 1.9652456045150757, "learning_rate": 5.838579301898346e-06, "log_odds_chosen": 0.8827286958694458, "log_odds_ratio": -0.6403334140777588, "logits/chosen": -0.7588618993759155, "logits/rejected": -0.7597247362136841, "logps/chosen": -0.884360671043396, "logps/rejected": -1.552756428718567, "loss": 1.199, "nll_loss": 1.0721238851547241, "rewards/accuracies": 0.5, "rewards/chosen": -0.0884360671043396, "rewards/margins": 0.0668395608663559, "rewards/rejected": -0.1552756279706955, "step": 2236 }, { "epoch": 1.364648467286869, "grad_norm": 1.5820945501327515, "learning_rate": 5.837599510104103e-06, "log_odds_chosen": 1.9467236995697021, "log_odds_ratio": -0.3255322575569153, "logits/chosen": -0.8650515675544739, "logits/rejected": -0.7002971768379211, "logps/chosen": -0.7592948079109192, "logps/rejected": -2.1302287578582764, "loss": 1.0881, "nll_loss": 1.010408639907837, "rewards/accuracies": 0.875, "rewards/chosen": -0.07592948526144028, "rewards/margins": 0.13709339499473572, "rewards/rejected": -0.2130228579044342, "step": 2237 }, { "epoch": 1.365258502363886, "grad_norm": 13.194787979125977, "learning_rate": 5.836619718309859e-06, "log_odds_chosen": 0.9128910303115845, "log_odds_ratio": -0.5916229486465454, "logits/chosen": -1.138301968574524, "logits/rejected": -0.968328595161438, "logps/chosen": -1.329296588897705, "logps/rejected": -2.0668396949768066, "loss": 1.1711, "nll_loss": 1.313730001449585, "rewards/accuracies": 0.625, "rewards/chosen": -0.13292965292930603, "rewards/margins": 0.07375432550907135, "rewards/rejected": -0.2066839635372162, "step": 2238 }, { "epoch": 1.3658685374409028, "grad_norm": 3.65071177482605, "learning_rate": 5.835639926515616e-06, "log_odds_chosen": 0.8299610614776611, "log_odds_ratio": -0.484380841255188, "logits/chosen": -0.8285525441169739, "logits/rejected": -0.7531532645225525, "logps/chosen": -0.8096957802772522, "logps/rejected": -1.3830294609069824, "loss": 1.0559, "nll_loss": 0.9717326164245605, "rewards/accuracies": 0.625, "rewards/chosen": -0.08096958696842194, "rewards/margins": 0.057333365082740784, "rewards/rejected": -0.13830295205116272, "step": 2239 }, { "epoch": 1.3664785725179198, "grad_norm": 1.2788063287734985, "learning_rate": 5.834660134721371e-06, "log_odds_chosen": 0.9233675003051758, "log_odds_ratio": -0.5056461691856384, "logits/chosen": -0.9980387687683105, "logits/rejected": -0.9331468343734741, "logps/chosen": -0.9218146204948425, "logps/rejected": -1.614474892616272, "loss": 1.0437, "nll_loss": 1.0635180473327637, "rewards/accuracies": 0.5, "rewards/chosen": -0.09218145906925201, "rewards/margins": 0.06926602125167847, "rewards/rejected": -0.16144748032093048, "step": 2240 }, { "epoch": 1.3670886075949367, "grad_norm": 1.8207134008407593, "learning_rate": 5.833680342927127e-06, "log_odds_chosen": 1.1119483709335327, "log_odds_ratio": -0.5111438035964966, "logits/chosen": -1.0524370670318604, "logits/rejected": -0.8966277837753296, "logps/chosen": -0.9924684166908264, "logps/rejected": -1.8650747537612915, "loss": 1.0076, "nll_loss": 1.1563620567321777, "rewards/accuracies": 0.75, "rewards/chosen": -0.09924684464931488, "rewards/margins": 0.08726061880588531, "rewards/rejected": -0.1865074783563614, "step": 2241 }, { "epoch": 1.3676986426719537, "grad_norm": 1.1038033962249756, "learning_rate": 5.832700551132884e-06, "log_odds_chosen": 0.4705026149749756, "log_odds_ratio": -0.5656812191009521, "logits/chosen": -0.7938821315765381, "logits/rejected": -0.7321728467941284, "logps/chosen": -0.9959607124328613, "logps/rejected": -1.3118183612823486, "loss": 1.0244, "nll_loss": 1.0119420289993286, "rewards/accuracies": 0.5, "rewards/chosen": -0.09959608316421509, "rewards/margins": 0.03158574551343918, "rewards/rejected": -0.13118183612823486, "step": 2242 }, { "epoch": 1.3683086777489706, "grad_norm": 2.330540895462036, "learning_rate": 5.83172075933864e-06, "log_odds_chosen": 0.7318198680877686, "log_odds_ratio": -0.5334912538528442, "logits/chosen": -0.8661116361618042, "logits/rejected": -0.8707336783409119, "logps/chosen": -0.8584485054016113, "logps/rejected": -1.295844316482544, "loss": 1.1424, "nll_loss": 0.8948735594749451, "rewards/accuracies": 0.75, "rewards/chosen": -0.08584485203027725, "rewards/margins": 0.043739594519138336, "rewards/rejected": -0.1295844316482544, "step": 2243 }, { "epoch": 1.3689187128259874, "grad_norm": 2.024338483810425, "learning_rate": 5.8307409675443965e-06, "log_odds_chosen": 1.8754985332489014, "log_odds_ratio": -0.2539946138858795, "logits/chosen": -0.8979036808013916, "logits/rejected": -0.8276338577270508, "logps/chosen": -0.7847517728805542, "logps/rejected": -2.1877288818359375, "loss": 0.9913, "nll_loss": 0.9908477067947388, "rewards/accuracies": 0.875, "rewards/chosen": -0.07847517728805542, "rewards/margins": 0.14029771089553833, "rewards/rejected": -0.21877288818359375, "step": 2244 }, { "epoch": 1.3695287479030045, "grad_norm": 0.9992026090621948, "learning_rate": 5.8297611757501535e-06, "log_odds_chosen": 0.4363671541213989, "log_odds_ratio": -0.682960033416748, "logits/chosen": -1.1993581056594849, "logits/rejected": -0.7974581122398376, "logps/chosen": -0.9433773756027222, "logps/rejected": -1.211714267730713, "loss": 1.1028, "nll_loss": 1.0565913915634155, "rewards/accuracies": 0.625, "rewards/chosen": -0.09433773159980774, "rewards/margins": 0.026833701878786087, "rewards/rejected": -0.12117143720388412, "step": 2245 }, { "epoch": 1.3701387829800213, "grad_norm": 8.627195358276367, "learning_rate": 5.828781383955909e-06, "log_odds_chosen": 0.3549940288066864, "log_odds_ratio": -0.6208112239837646, "logits/chosen": -0.7793879508972168, "logits/rejected": -1.0151405334472656, "logps/chosen": -1.0826523303985596, "logps/rejected": -1.2099461555480957, "loss": 1.28, "nll_loss": 1.2655905485153198, "rewards/accuracies": 0.5, "rewards/chosen": -0.10826524347066879, "rewards/margins": 0.012729382142424583, "rewards/rejected": -0.12099461257457733, "step": 2246 }, { "epoch": 1.3707488180570384, "grad_norm": 2.6801741123199463, "learning_rate": 5.827801592161665e-06, "log_odds_chosen": 0.7296691536903381, "log_odds_ratio": -0.5521063208580017, "logits/chosen": -0.8221760392189026, "logits/rejected": -0.6961130499839783, "logps/chosen": -0.8521146178245544, "logps/rejected": -1.3547966480255127, "loss": 1.1237, "nll_loss": 1.1112443208694458, "rewards/accuracies": 0.625, "rewards/chosen": -0.08521147072315216, "rewards/margins": 0.050268203020095825, "rewards/rejected": -0.1354796588420868, "step": 2247 }, { "epoch": 1.3713588531340553, "grad_norm": 1.8662080764770508, "learning_rate": 5.826821800367422e-06, "log_odds_chosen": 0.9514580965042114, "log_odds_ratio": -0.5304571390151978, "logits/chosen": -0.9610443115234375, "logits/rejected": -0.865069568157196, "logps/chosen": -0.7593035697937012, "logps/rejected": -1.507460594177246, "loss": 1.1084, "nll_loss": 0.9699898958206177, "rewards/accuracies": 0.5, "rewards/chosen": -0.07593035697937012, "rewards/margins": 0.07481570541858673, "rewards/rejected": -0.15074606239795685, "step": 2248 }, { "epoch": 1.371968888211072, "grad_norm": 1.5654284954071045, "learning_rate": 5.825842008573178e-06, "log_odds_chosen": 1.8620259761810303, "log_odds_ratio": -0.24502825736999512, "logits/chosen": -0.5598326921463013, "logits/rejected": -0.3453269898891449, "logps/chosen": -0.4662347733974457, "logps/rejected": -1.3226419687271118, "loss": 0.9787, "nll_loss": 0.6496054530143738, "rewards/accuracies": 1.0, "rewards/chosen": -0.04662347212433815, "rewards/margins": 0.08564071357250214, "rewards/rejected": -0.13226419687271118, "step": 2249 }, { "epoch": 1.372578923288089, "grad_norm": 3.8598341941833496, "learning_rate": 5.824862216778934e-06, "log_odds_chosen": 3.1476521492004395, "log_odds_ratio": -0.18878822028636932, "logits/chosen": -0.7189927101135254, "logits/rejected": -0.9196062684059143, "logps/chosen": -0.5417569875717163, "logps/rejected": -2.744323492050171, "loss": 1.0034, "nll_loss": 0.9770225286483765, "rewards/accuracies": 0.875, "rewards/chosen": -0.054175689816474915, "rewards/margins": 0.22025667130947113, "rewards/rejected": -0.27443236112594604, "step": 2250 }, { "epoch": 1.373188958365106, "grad_norm": 2.731192111968994, "learning_rate": 5.82388242498469e-06, "log_odds_chosen": 0.598310112953186, "log_odds_ratio": -0.6454042196273804, "logits/chosen": -1.04342782497406, "logits/rejected": -0.9966298937797546, "logps/chosen": -1.095856785774231, "logps/rejected": -1.523773193359375, "loss": 1.2196, "nll_loss": 1.3368937969207764, "rewards/accuracies": 0.625, "rewards/chosen": -0.10958568751811981, "rewards/margins": 0.04279164969921112, "rewards/rejected": -0.15237730741500854, "step": 2251 }, { "epoch": 1.3737989934421229, "grad_norm": 1.8322068452835083, "learning_rate": 5.822902633190446e-06, "log_odds_chosen": 0.7151564359664917, "log_odds_ratio": -0.6281531453132629, "logits/chosen": -0.9222691059112549, "logits/rejected": -0.912231981754303, "logps/chosen": -0.9731929302215576, "logps/rejected": -1.4768880605697632, "loss": 1.1322, "nll_loss": 1.139478325843811, "rewards/accuracies": 0.5, "rewards/chosen": -0.09731929004192352, "rewards/margins": 0.050369516015052795, "rewards/rejected": -0.14768880605697632, "step": 2252 }, { "epoch": 1.37440902851914, "grad_norm": 2.5535337924957275, "learning_rate": 5.821922841396203e-06, "log_odds_chosen": 0.6209951639175415, "log_odds_ratio": -0.6006054282188416, "logits/chosen": -1.0645642280578613, "logits/rejected": -1.0003905296325684, "logps/chosen": -0.8844903707504272, "logps/rejected": -1.3706393241882324, "loss": 1.1132, "nll_loss": 1.083298921585083, "rewards/accuracies": 0.5, "rewards/chosen": -0.08844904601573944, "rewards/margins": 0.04861489683389664, "rewards/rejected": -0.13706395030021667, "step": 2253 }, { "epoch": 1.3750190635961568, "grad_norm": 2.3560142517089844, "learning_rate": 5.8209430496019594e-06, "log_odds_chosen": 2.2502810955047607, "log_odds_ratio": -0.4947863519191742, "logits/chosen": -0.8287709951400757, "logits/rejected": -0.7859522104263306, "logps/chosen": -0.6815084218978882, "logps/rejected": -2.403024673461914, "loss": 1.0013, "nll_loss": 0.9836437106132507, "rewards/accuracies": 0.625, "rewards/chosen": -0.0681508481502533, "rewards/margins": 0.17215164005756378, "rewards/rejected": -0.24030247330665588, "step": 2254 }, { "epoch": 1.3756290986731736, "grad_norm": 1.2997806072235107, "learning_rate": 5.8199632578077156e-06, "log_odds_chosen": 2.1311709880828857, "log_odds_ratio": -0.27171915769577026, "logits/chosen": -0.9144402742385864, "logits/rejected": -0.9042385220527649, "logps/chosen": -0.744372546672821, "logps/rejected": -2.30122709274292, "loss": 0.9492, "nll_loss": 1.0449862480163574, "rewards/accuracies": 0.875, "rewards/chosen": -0.07443725317716599, "rewards/margins": 0.15568548440933228, "rewards/rejected": -0.23012274503707886, "step": 2255 }, { "epoch": 1.3762391337501907, "grad_norm": 1.2360385656356812, "learning_rate": 5.8189834660134725e-06, "log_odds_chosen": 1.6508597135543823, "log_odds_ratio": -0.32706084847450256, "logits/chosen": -1.0923149585723877, "logits/rejected": -0.9859123229980469, "logps/chosen": -0.7794479727745056, "logps/rejected": -2.0332512855529785, "loss": 1.0733, "nll_loss": 1.1475199460983276, "rewards/accuracies": 0.875, "rewards/chosen": -0.0779448002576828, "rewards/margins": 0.12538033723831177, "rewards/rejected": -0.20332515239715576, "step": 2256 }, { "epoch": 1.3768491688272075, "grad_norm": 1.9449710845947266, "learning_rate": 5.818003674219228e-06, "log_odds_chosen": 1.4250760078430176, "log_odds_ratio": -0.3845040500164032, "logits/chosen": -1.1311274766921997, "logits/rejected": -1.074110507965088, "logps/chosen": -1.145554780960083, "logps/rejected": -2.169546127319336, "loss": 1.2673, "nll_loss": 1.385132074356079, "rewards/accuracies": 0.875, "rewards/chosen": -0.1145554780960083, "rewards/margins": 0.10239913314580917, "rewards/rejected": -0.21695461869239807, "step": 2257 }, { "epoch": 1.3774592039042246, "grad_norm": 2.235017776489258, "learning_rate": 5.817023882424984e-06, "log_odds_chosen": 0.07534006237983704, "log_odds_ratio": -0.7751016020774841, "logits/chosen": -1.0928936004638672, "logits/rejected": -0.9599421620368958, "logps/chosen": -1.228048324584961, "logps/rejected": -1.1927402019500732, "loss": 1.286, "nll_loss": 1.640954852104187, "rewards/accuracies": 0.5, "rewards/chosen": -0.12280482798814774, "rewards/margins": -0.003530808724462986, "rewards/rejected": -0.11927402764558792, "step": 2258 }, { "epoch": 1.3780692389812415, "grad_norm": 5.77495002746582, "learning_rate": 5.816044090630741e-06, "log_odds_chosen": 1.3884963989257812, "log_odds_ratio": -0.43342167139053345, "logits/chosen": -0.7670757174491882, "logits/rejected": -0.8138846158981323, "logps/chosen": -0.7056110501289368, "logps/rejected": -1.749243140220642, "loss": 1.0191, "nll_loss": 1.1807518005371094, "rewards/accuracies": 0.75, "rewards/chosen": -0.07056110352277756, "rewards/margins": 0.10436322540044785, "rewards/rejected": -0.1749243140220642, "step": 2259 }, { "epoch": 1.3786792740582583, "grad_norm": 1.563369870185852, "learning_rate": 5.815064298836497e-06, "log_odds_chosen": 0.705750584602356, "log_odds_ratio": -0.5859494805335999, "logits/chosen": -1.1375465393066406, "logits/rejected": -1.0345007181167603, "logps/chosen": -0.8009589910507202, "logps/rejected": -1.2780146598815918, "loss": 1.1615, "nll_loss": 1.1774725914001465, "rewards/accuracies": 0.625, "rewards/chosen": -0.08009590208530426, "rewards/margins": 0.04770556092262268, "rewards/rejected": -0.12780146300792694, "step": 2260 }, { "epoch": 1.3792893091352751, "grad_norm": 1.9440628290176392, "learning_rate": 5.814084507042253e-06, "log_odds_chosen": 0.5071219205856323, "log_odds_ratio": -0.6892820000648499, "logits/chosen": -0.9893202185630798, "logits/rejected": -0.8434873819351196, "logps/chosen": -1.2086987495422363, "logps/rejected": -1.690805196762085, "loss": 1.3691, "nll_loss": 1.3489561080932617, "rewards/accuracies": 0.5, "rewards/chosen": -0.12086988985538483, "rewards/margins": 0.04821065813302994, "rewards/rejected": -0.16908054053783417, "step": 2261 }, { "epoch": 1.3798993442122922, "grad_norm": 3.1978683471679688, "learning_rate": 5.81310471524801e-06, "log_odds_chosen": 0.4914977252483368, "log_odds_ratio": -0.5547615885734558, "logits/chosen": -0.7522488832473755, "logits/rejected": -0.7297075986862183, "logps/chosen": -0.9325006008148193, "logps/rejected": -1.2708688974380493, "loss": 1.0546, "nll_loss": 1.1246953010559082, "rewards/accuracies": 0.625, "rewards/chosen": -0.09325005859136581, "rewards/margins": 0.03383683040738106, "rewards/rejected": -0.12708689272403717, "step": 2262 }, { "epoch": 1.380509379289309, "grad_norm": 2.2089385986328125, "learning_rate": 5.812124923453765e-06, "log_odds_chosen": 0.28761863708496094, "log_odds_ratio": -0.6839699745178223, "logits/chosen": -1.1249849796295166, "logits/rejected": -0.8859840035438538, "logps/chosen": -1.1198605298995972, "logps/rejected": -1.442342758178711, "loss": 1.3264, "nll_loss": 1.306402564048767, "rewards/accuracies": 0.5, "rewards/chosen": -0.11198604851961136, "rewards/margins": 0.032248225063085556, "rewards/rejected": -0.1442342847585678, "step": 2263 }, { "epoch": 1.3811194143663261, "grad_norm": 1.3244802951812744, "learning_rate": 5.8111451316595215e-06, "log_odds_chosen": 0.8930788040161133, "log_odds_ratio": -0.6267358660697937, "logits/chosen": -0.9386757016181946, "logits/rejected": -0.9320673942565918, "logps/chosen": -0.8076683282852173, "logps/rejected": -1.5053315162658691, "loss": 1.0446, "nll_loss": 1.1274222135543823, "rewards/accuracies": 0.5, "rewards/chosen": -0.08076684176921844, "rewards/margins": 0.06976631283760071, "rewards/rejected": -0.15053313970565796, "step": 2264 }, { "epoch": 1.381729449443343, "grad_norm": 1.6756386756896973, "learning_rate": 5.8101653398652785e-06, "log_odds_chosen": 0.6717478036880493, "log_odds_ratio": -0.5590236186981201, "logits/chosen": -0.8973177671432495, "logits/rejected": -0.9436066150665283, "logps/chosen": -0.8099276423454285, "logps/rejected": -1.189635992050171, "loss": 1.1677, "nll_loss": 1.121962308883667, "rewards/accuracies": 0.625, "rewards/chosen": -0.08099276572465897, "rewards/margins": 0.037970833480358124, "rewards/rejected": -0.11896359175443649, "step": 2265 }, { "epoch": 1.3823394845203598, "grad_norm": 1.2297595739364624, "learning_rate": 5.809185548071035e-06, "log_odds_chosen": 1.0298868417739868, "log_odds_ratio": -0.5002329349517822, "logits/chosen": -0.6714497804641724, "logits/rejected": -0.7496261596679688, "logps/chosen": -0.7091062664985657, "logps/rejected": -1.3193717002868652, "loss": 1.0413, "nll_loss": 0.9913464784622192, "rewards/accuracies": 0.625, "rewards/chosen": -0.07091063261032104, "rewards/margins": 0.061026543378829956, "rewards/rejected": -0.131937175989151, "step": 2266 }, { "epoch": 1.3829495195973769, "grad_norm": 2.3787648677825928, "learning_rate": 5.808205756276792e-06, "log_odds_chosen": 1.4115025997161865, "log_odds_ratio": -0.4062540829181671, "logits/chosen": -0.6606249213218689, "logits/rejected": -0.7115967273712158, "logps/chosen": -0.6084219217300415, "logps/rejected": -1.590761661529541, "loss": 0.9036, "nll_loss": 0.7413924932479858, "rewards/accuracies": 0.875, "rewards/chosen": -0.06084219366312027, "rewards/margins": 0.09823396801948547, "rewards/rejected": -0.15907615423202515, "step": 2267 }, { "epoch": 1.3835595546743937, "grad_norm": 1.8483352661132812, "learning_rate": 5.807225964482547e-06, "log_odds_chosen": 0.7797060012817383, "log_odds_ratio": -0.48830658197402954, "logits/chosen": -0.7787712812423706, "logits/rejected": -0.8345603942871094, "logps/chosen": -0.7961904406547546, "logps/rejected": -1.3609426021575928, "loss": 1.0633, "nll_loss": 0.8803527355194092, "rewards/accuracies": 0.75, "rewards/chosen": -0.07961905002593994, "rewards/margins": 0.05647522583603859, "rewards/rejected": -0.13609427213668823, "step": 2268 }, { "epoch": 1.3841695897514108, "grad_norm": 1.7507835626602173, "learning_rate": 5.806246172688303e-06, "log_odds_chosen": 0.6716511845588684, "log_odds_ratio": -0.6766573786735535, "logits/chosen": -0.8157699108123779, "logits/rejected": -0.738804817199707, "logps/chosen": -0.9137717485427856, "logps/rejected": -1.370509147644043, "loss": 1.1825, "nll_loss": 1.0581014156341553, "rewards/accuracies": 0.5, "rewards/chosen": -0.09137717634439468, "rewards/margins": 0.04567374289035797, "rewards/rejected": -0.13705091178417206, "step": 2269 }, { "epoch": 1.3847796248284276, "grad_norm": 1.3528410196304321, "learning_rate": 5.80526638089406e-06, "log_odds_chosen": 3.2620654106140137, "log_odds_ratio": -0.13826286792755127, "logits/chosen": -0.6615185737609863, "logits/rejected": -0.675877034664154, "logps/chosen": -0.4161432087421417, "logps/rejected": -2.6632087230682373, "loss": 1.0281, "nll_loss": 0.7079113721847534, "rewards/accuracies": 1.0, "rewards/chosen": -0.04161432012915611, "rewards/margins": 0.22470656037330627, "rewards/rejected": -0.2663208842277527, "step": 2270 }, { "epoch": 1.3853896599054445, "grad_norm": 1.411777138710022, "learning_rate": 5.804286589099816e-06, "log_odds_chosen": 0.25134244561195374, "log_odds_ratio": -0.7688804864883423, "logits/chosen": -0.8353490233421326, "logits/rejected": -0.8180022835731506, "logps/chosen": -0.9689807891845703, "logps/rejected": -0.9989373087882996, "loss": 1.0406, "nll_loss": 1.073526382446289, "rewards/accuracies": 0.625, "rewards/chosen": -0.09689807891845703, "rewards/margins": 0.002995651215314865, "rewards/rejected": -0.0998937338590622, "step": 2271 }, { "epoch": 1.3859996949824616, "grad_norm": 1.5465502738952637, "learning_rate": 5.803306797305572e-06, "log_odds_chosen": 1.0823094844818115, "log_odds_ratio": -0.5113891363143921, "logits/chosen": -0.6365749835968018, "logits/rejected": -0.8080755472183228, "logps/chosen": -0.6183693408966064, "logps/rejected": -1.2854264974594116, "loss": 1.0062, "nll_loss": 0.8184927105903625, "rewards/accuracies": 0.75, "rewards/chosen": -0.06183692812919617, "rewards/margins": 0.06670572608709335, "rewards/rejected": -0.12854266166687012, "step": 2272 }, { "epoch": 1.3866097300594784, "grad_norm": 1.701366901397705, "learning_rate": 5.802327005511329e-06, "log_odds_chosen": 1.9409642219543457, "log_odds_ratio": -0.521465539932251, "logits/chosen": -1.1058106422424316, "logits/rejected": -1.1385250091552734, "logps/chosen": -0.9158508777618408, "logps/rejected": -2.4365100860595703, "loss": 1.0901, "nll_loss": 1.1198346614837646, "rewards/accuracies": 0.5, "rewards/chosen": -0.09158508479595184, "rewards/margins": 0.1520659327507019, "rewards/rejected": -0.24365101754665375, "step": 2273 }, { "epoch": 1.3872197651364955, "grad_norm": 2.1611061096191406, "learning_rate": 5.8013472137170844e-06, "log_odds_chosen": 0.6645542979240417, "log_odds_ratio": -0.6111166477203369, "logits/chosen": -0.8948742151260376, "logits/rejected": -0.7759416103363037, "logps/chosen": -0.8472101092338562, "logps/rejected": -1.4749977588653564, "loss": 1.0926, "nll_loss": 1.2077670097351074, "rewards/accuracies": 0.375, "rewards/chosen": -0.08472100645303726, "rewards/margins": 0.06277875602245331, "rewards/rejected": -0.14749976992607117, "step": 2274 }, { "epoch": 1.3878298002135123, "grad_norm": 1.3673431873321533, "learning_rate": 5.8003674219228406e-06, "log_odds_chosen": 1.3170804977416992, "log_odds_ratio": -0.5116487741470337, "logits/chosen": -1.066519856452942, "logits/rejected": -1.0403751134872437, "logps/chosen": -0.7281814217567444, "logps/rejected": -1.3951497077941895, "loss": 1.0297, "nll_loss": 1.2699601650238037, "rewards/accuracies": 0.625, "rewards/chosen": -0.07281814515590668, "rewards/margins": 0.06669681519269943, "rewards/rejected": -0.1395149528980255, "step": 2275 }, { "epoch": 1.3884398352905292, "grad_norm": 1.289680004119873, "learning_rate": 5.7993876301285975e-06, "log_odds_chosen": -0.017527371644973755, "log_odds_ratio": -0.781742513179779, "logits/chosen": -1.049154281616211, "logits/rejected": -0.9462293982505798, "logps/chosen": -1.0245659351348877, "logps/rejected": -1.0317481756210327, "loss": 1.1515, "nll_loss": 1.24732506275177, "rewards/accuracies": 0.5, "rewards/chosen": -0.10245658457279205, "rewards/margins": 0.0007182275876402855, "rewards/rejected": -0.10317482054233551, "step": 2276 }, { "epoch": 1.389049870367546, "grad_norm": 1.4528567790985107, "learning_rate": 5.798407838334354e-06, "log_odds_chosen": 0.1836688220500946, "log_odds_ratio": -0.645559549331665, "logits/chosen": -0.9297501444816589, "logits/rejected": -0.8254477977752686, "logps/chosen": -1.2031688690185547, "logps/rejected": -1.2776232957839966, "loss": 0.9804, "nll_loss": 1.1914361715316772, "rewards/accuracies": 0.625, "rewards/chosen": -0.12031689286231995, "rewards/margins": 0.007445436902344227, "rewards/rejected": -0.1277623325586319, "step": 2277 }, { "epoch": 1.389659905444563, "grad_norm": 4.063163757324219, "learning_rate": 5.797428046540111e-06, "log_odds_chosen": 1.8013066053390503, "log_odds_ratio": -0.6052106618881226, "logits/chosen": -0.8863402605056763, "logits/rejected": -0.7420318722724915, "logps/chosen": -0.8544580936431885, "logps/rejected": -2.542741298675537, "loss": 1.172, "nll_loss": 1.1130973100662231, "rewards/accuracies": 0.875, "rewards/chosen": -0.08544580638408661, "rewards/margins": 0.1688283085823059, "rewards/rejected": -0.2542741298675537, "step": 2278 }, { "epoch": 1.39026994052158, "grad_norm": 1.1097283363342285, "learning_rate": 5.796448254745867e-06, "log_odds_chosen": 1.3642091751098633, "log_odds_ratio": -0.4354158043861389, "logits/chosen": -0.791204035282135, "logits/rejected": -0.7731715440750122, "logps/chosen": -0.7084821462631226, "logps/rejected": -1.7014431953430176, "loss": 1.0672, "nll_loss": 0.874543309211731, "rewards/accuracies": 0.625, "rewards/chosen": -0.07084821164608002, "rewards/margins": 0.09929610043764114, "rewards/rejected": -0.17014433443546295, "step": 2279 }, { "epoch": 1.390879975598597, "grad_norm": 1.5147284269332886, "learning_rate": 5.795468462951622e-06, "log_odds_chosen": 1.8800253868103027, "log_odds_ratio": -0.392465204000473, "logits/chosen": -0.9268322587013245, "logits/rejected": -0.9421176910400391, "logps/chosen": -0.7513542771339417, "logps/rejected": -2.1660146713256836, "loss": 0.972, "nll_loss": 0.9443876147270203, "rewards/accuracies": 0.75, "rewards/chosen": -0.07513542473316193, "rewards/margins": 0.14146603643894196, "rewards/rejected": -0.21660146117210388, "step": 2280 }, { "epoch": 1.3914900106756138, "grad_norm": 0.9482801556587219, "learning_rate": 5.794488671157379e-06, "log_odds_chosen": 1.315608024597168, "log_odds_ratio": -0.4592973291873932, "logits/chosen": -0.9896866679191589, "logits/rejected": -0.9069033861160278, "logps/chosen": -0.6855656504631042, "logps/rejected": -1.6815710067749023, "loss": 0.9298, "nll_loss": 0.9181535243988037, "rewards/accuracies": 0.875, "rewards/chosen": -0.06855656206607819, "rewards/margins": 0.09960053861141205, "rewards/rejected": -0.16815710067749023, "step": 2281 }, { "epoch": 1.3921000457526307, "grad_norm": 0.8487209677696228, "learning_rate": 5.793508879363135e-06, "log_odds_chosen": 0.8175863027572632, "log_odds_ratio": -0.44297513365745544, "logits/chosen": -1.0761116743087769, "logits/rejected": -0.7444576025009155, "logps/chosen": -0.9830151796340942, "logps/rejected": -1.6509876251220703, "loss": 0.9962, "nll_loss": 1.1000914573669434, "rewards/accuracies": 0.75, "rewards/chosen": -0.09830151498317719, "rewards/margins": 0.06679724901914597, "rewards/rejected": -0.16509875655174255, "step": 2282 }, { "epoch": 1.3927100808296478, "grad_norm": 1.2606725692749023, "learning_rate": 5.792529087568891e-06, "log_odds_chosen": 0.2104351967573166, "log_odds_ratio": -0.6211009621620178, "logits/chosen": -1.0229401588439941, "logits/rejected": -1.0473744869232178, "logps/chosen": -1.1860299110412598, "logps/rejected": -1.2828617095947266, "loss": 1.1213, "nll_loss": 1.0267499685287476, "rewards/accuracies": 0.75, "rewards/chosen": -0.11860300600528717, "rewards/margins": 0.009683173149824142, "rewards/rejected": -0.12828616797924042, "step": 2283 }, { "epoch": 1.3933201159066646, "grad_norm": 1.2594741582870483, "learning_rate": 5.791549295774648e-06, "log_odds_chosen": 1.6811811923980713, "log_odds_ratio": -0.3861544132232666, "logits/chosen": -0.9338093996047974, "logits/rejected": -0.8583389520645142, "logps/chosen": -0.8231949210166931, "logps/rejected": -2.125089168548584, "loss": 1.0991, "nll_loss": 1.1158229112625122, "rewards/accuracies": 0.75, "rewards/chosen": -0.08231949061155319, "rewards/margins": 0.1301894336938858, "rewards/rejected": -0.2125089317560196, "step": 2284 }, { "epoch": 1.3939301509836817, "grad_norm": 1.1273099184036255, "learning_rate": 5.790569503980404e-06, "log_odds_chosen": 0.5998234748840332, "log_odds_ratio": -0.5958980917930603, "logits/chosen": -0.9322745203971863, "logits/rejected": -1.0010870695114136, "logps/chosen": -0.8357911705970764, "logps/rejected": -1.280827522277832, "loss": 1.0714, "nll_loss": 0.9001524448394775, "rewards/accuracies": 0.625, "rewards/chosen": -0.08357911556959152, "rewards/margins": 0.04450362175703049, "rewards/rejected": -0.1280827522277832, "step": 2285 }, { "epoch": 1.3945401860606985, "grad_norm": 2.135772228240967, "learning_rate": 5.78958971218616e-06, "log_odds_chosen": 0.6797598600387573, "log_odds_ratio": -0.5568233728408813, "logits/chosen": -0.9178913831710815, "logits/rejected": -0.9312237501144409, "logps/chosen": -0.7017077803611755, "logps/rejected": -1.1667852401733398, "loss": 1.0425, "nll_loss": 1.0512595176696777, "rewards/accuracies": 0.625, "rewards/chosen": -0.07017078250646591, "rewards/margins": 0.04650774598121643, "rewards/rejected": -0.11667852103710175, "step": 2286 }, { "epoch": 1.3951502211377154, "grad_norm": 1.270881175994873, "learning_rate": 5.7886099203919166e-06, "log_odds_chosen": 1.3252785205841064, "log_odds_ratio": -0.44833916425704956, "logits/chosen": -0.6204832196235657, "logits/rejected": -0.6795334815979004, "logps/chosen": -0.6686531901359558, "logps/rejected": -1.4728281497955322, "loss": 0.947, "nll_loss": 0.7532225847244263, "rewards/accuracies": 0.75, "rewards/chosen": -0.06686532497406006, "rewards/margins": 0.08041749149560928, "rewards/rejected": -0.14728280901908875, "step": 2287 }, { "epoch": 1.3957602562147322, "grad_norm": 1.1267939805984497, "learning_rate": 5.787630128597673e-06, "log_odds_chosen": 1.1219731569290161, "log_odds_ratio": -0.3947230577468872, "logits/chosen": -0.9803156852722168, "logits/rejected": -0.9720431566238403, "logps/chosen": -0.8492871522903442, "logps/rejected": -1.6644566059112549, "loss": 1.0129, "nll_loss": 0.9521793127059937, "rewards/accuracies": 0.875, "rewards/chosen": -0.0849287137389183, "rewards/margins": 0.08151693642139435, "rewards/rejected": -0.16644565761089325, "step": 2288 }, { "epoch": 1.3963702912917493, "grad_norm": 1.1292873620986938, "learning_rate": 5.786650336803429e-06, "log_odds_chosen": 1.3422354459762573, "log_odds_ratio": -0.5161082744598389, "logits/chosen": -0.9947453141212463, "logits/rejected": -1.0194170475006104, "logps/chosen": -0.8583055138587952, "logps/rejected": -1.8626223802566528, "loss": 1.1522, "nll_loss": 0.9977429509162903, "rewards/accuracies": 0.75, "rewards/chosen": -0.08583056181669235, "rewards/margins": 0.10043168067932129, "rewards/rejected": -0.18626224994659424, "step": 2289 }, { "epoch": 1.3969803263687661, "grad_norm": 2.5106582641601562, "learning_rate": 5.785670545009186e-06, "log_odds_chosen": 0.7132769823074341, "log_odds_ratio": -0.5932155847549438, "logits/chosen": -0.8458859920501709, "logits/rejected": -0.7412476539611816, "logps/chosen": -0.8693448901176453, "logps/rejected": -1.3594660758972168, "loss": 1.0556, "nll_loss": 0.9697139859199524, "rewards/accuracies": 0.625, "rewards/chosen": -0.08693449199199677, "rewards/margins": 0.04901212453842163, "rewards/rejected": -0.1359466016292572, "step": 2290 }, { "epoch": 1.3975903614457832, "grad_norm": 3.2423782348632812, "learning_rate": 5.784690753214941e-06, "log_odds_chosen": 0.6321156620979309, "log_odds_ratio": -0.5181830525398254, "logits/chosen": -0.8467576503753662, "logits/rejected": -0.8117441534996033, "logps/chosen": -0.8022303581237793, "logps/rejected": -1.26860773563385, "loss": 1.0242, "nll_loss": 1.0835082530975342, "rewards/accuracies": 0.75, "rewards/chosen": -0.08022305369377136, "rewards/margins": 0.04663772135972977, "rewards/rejected": -0.12686076760292053, "step": 2291 }, { "epoch": 1.3982003965228, "grad_norm": 1.7593958377838135, "learning_rate": 5.783710961420697e-06, "log_odds_chosen": 2.3053767681121826, "log_odds_ratio": -0.35498732328414917, "logits/chosen": -0.6951073408126831, "logits/rejected": -0.7867634892463684, "logps/chosen": -0.6428266763687134, "logps/rejected": -2.416504144668579, "loss": 1.0651, "nll_loss": 0.8906455636024475, "rewards/accuracies": 0.75, "rewards/chosen": -0.06428267061710358, "rewards/margins": 0.17736774682998657, "rewards/rejected": -0.24165040254592896, "step": 2292 }, { "epoch": 1.3988104315998169, "grad_norm": 6.798411846160889, "learning_rate": 5.782731169626454e-06, "log_odds_chosen": 0.5802478790283203, "log_odds_ratio": -0.6247245073318481, "logits/chosen": -0.9433108568191528, "logits/rejected": -1.0622203350067139, "logps/chosen": -0.9507689476013184, "logps/rejected": -1.4057128429412842, "loss": 1.1196, "nll_loss": 1.0592761039733887, "rewards/accuracies": 0.5, "rewards/chosen": -0.09507689625024796, "rewards/margins": 0.04549439623951912, "rewards/rejected": -0.14057129621505737, "step": 2293 }, { "epoch": 1.399420466676834, "grad_norm": 2.0308661460876465, "learning_rate": 5.78175137783221e-06, "log_odds_chosen": 0.031584203243255615, "log_odds_ratio": -0.7093695402145386, "logits/chosen": -1.0822416543960571, "logits/rejected": -0.9472188949584961, "logps/chosen": -0.9505449533462524, "logps/rejected": -0.957548975944519, "loss": 1.0338, "nll_loss": 1.1106817722320557, "rewards/accuracies": 0.5, "rewards/chosen": -0.095054492354393, "rewards/margins": 0.0007004011422395706, "rewards/rejected": -0.09575490653514862, "step": 2294 }, { "epoch": 1.4000305017538508, "grad_norm": 1.204202651977539, "learning_rate": 5.780771586037967e-06, "log_odds_chosen": 0.6952953338623047, "log_odds_ratio": -0.5731825828552246, "logits/chosen": -0.7858607172966003, "logits/rejected": -0.8180117011070251, "logps/chosen": -0.9621703624725342, "logps/rejected": -1.5510004758834839, "loss": 1.0229, "nll_loss": 1.1355319023132324, "rewards/accuracies": 0.625, "rewards/chosen": -0.09621703624725342, "rewards/margins": 0.05888301879167557, "rewards/rejected": -0.15510006248950958, "step": 2295 }, { "epoch": 1.4006405368308679, "grad_norm": 1.4945778846740723, "learning_rate": 5.779791794243723e-06, "log_odds_chosen": 1.6777100563049316, "log_odds_ratio": -0.4709164500236511, "logits/chosen": -0.6401786804199219, "logits/rejected": -0.7661590576171875, "logps/chosen": -0.7183140516281128, "logps/rejected": -2.008157253265381, "loss": 1.0521, "nll_loss": 0.8773641586303711, "rewards/accuracies": 0.75, "rewards/chosen": -0.07183140516281128, "rewards/margins": 0.12898433208465576, "rewards/rejected": -0.20081573724746704, "step": 2296 }, { "epoch": 1.4012505719078847, "grad_norm": 1.7180695533752441, "learning_rate": 5.778812002449479e-06, "log_odds_chosen": 0.9342995882034302, "log_odds_ratio": -0.6031736135482788, "logits/chosen": -0.9649551510810852, "logits/rejected": -0.9150043725967407, "logps/chosen": -0.8816875219345093, "logps/rejected": -1.5176407098770142, "loss": 1.1065, "nll_loss": 1.1116019487380981, "rewards/accuracies": 0.5, "rewards/chosen": -0.08816875517368317, "rewards/margins": 0.06359530985355377, "rewards/rejected": -0.15176406502723694, "step": 2297 }, { "epoch": 1.4018606069849016, "grad_norm": 1.7493013143539429, "learning_rate": 5.777832210655236e-06, "log_odds_chosen": 2.413839340209961, "log_odds_ratio": -0.22061261534690857, "logits/chosen": -0.5943487882614136, "logits/rejected": -0.6198243498802185, "logps/chosen": -0.7000130414962769, "logps/rejected": -2.5405542850494385, "loss": 0.9278, "nll_loss": 0.8568438291549683, "rewards/accuracies": 0.875, "rewards/chosen": -0.07000130414962769, "rewards/margins": 0.18405413627624512, "rewards/rejected": -0.2540554404258728, "step": 2298 }, { "epoch": 1.4024706420619186, "grad_norm": 2.281619071960449, "learning_rate": 5.776852418860992e-06, "log_odds_chosen": 0.5718551278114319, "log_odds_ratio": -0.5822614431381226, "logits/chosen": -0.8595896363258362, "logits/rejected": -0.7739325165748596, "logps/chosen": -0.8399352431297302, "logps/rejected": -1.2682011127471924, "loss": 1.1323, "nll_loss": 1.3283311128616333, "rewards/accuracies": 0.75, "rewards/chosen": -0.08399352431297302, "rewards/margins": 0.0428265817463398, "rewards/rejected": -0.12682010233402252, "step": 2299 }, { "epoch": 1.4030806771389355, "grad_norm": 1.9547383785247803, "learning_rate": 5.775872627066748e-06, "log_odds_chosen": 2.257497787475586, "log_odds_ratio": -0.36376500129699707, "logits/chosen": -1.0233018398284912, "logits/rejected": -0.9190607070922852, "logps/chosen": -0.7545179128646851, "logps/rejected": -2.6910905838012695, "loss": 1.0063, "nll_loss": 1.1614019870758057, "rewards/accuracies": 0.75, "rewards/chosen": -0.0754517912864685, "rewards/margins": 0.1936572641134262, "rewards/rejected": -0.2691090703010559, "step": 2300 }, { "epoch": 1.4036907122159525, "grad_norm": 1.4919860363006592, "learning_rate": 5.774892835272505e-06, "log_odds_chosen": 1.1982264518737793, "log_odds_ratio": -0.38499078154563904, "logits/chosen": -0.933556079864502, "logits/rejected": -0.7809266448020935, "logps/chosen": -0.821622371673584, "logps/rejected": -1.6995716094970703, "loss": 1.1108, "nll_loss": 1.1192867755889893, "rewards/accuracies": 0.875, "rewards/chosen": -0.08216223120689392, "rewards/margins": 0.0877949446439743, "rewards/rejected": -0.16995717585086823, "step": 2301 }, { "epoch": 1.4043007472929694, "grad_norm": 1.5133273601531982, "learning_rate": 5.773913043478261e-06, "log_odds_chosen": 0.9299768805503845, "log_odds_ratio": -0.44949087500572205, "logits/chosen": -0.8481519222259521, "logits/rejected": -0.7805588841438293, "logps/chosen": -0.6365715265274048, "logps/rejected": -1.2265467643737793, "loss": 1.0788, "nll_loss": 0.812773585319519, "rewards/accuracies": 0.875, "rewards/chosen": -0.06365714222192764, "rewards/margins": 0.05899753421545029, "rewards/rejected": -0.12265469133853912, "step": 2302 }, { "epoch": 1.4049107823699862, "grad_norm": 1.506729245185852, "learning_rate": 5.772933251684016e-06, "log_odds_chosen": 2.2459299564361572, "log_odds_ratio": -0.2922336459159851, "logits/chosen": -0.632405698299408, "logits/rejected": -0.7898154854774475, "logps/chosen": -0.6477810740470886, "logps/rejected": -2.301607131958008, "loss": 1.0276, "nll_loss": 0.8293002843856812, "rewards/accuracies": 0.875, "rewards/chosen": -0.06477810442447662, "rewards/margins": 0.16538262367248535, "rewards/rejected": -0.23016071319580078, "step": 2303 }, { "epoch": 1.405520817447003, "grad_norm": 2.0680432319641113, "learning_rate": 5.771953459889773e-06, "log_odds_chosen": 1.8001103401184082, "log_odds_ratio": -0.2972872853279114, "logits/chosen": -0.650970458984375, "logits/rejected": -0.8021447658538818, "logps/chosen": -0.7486281394958496, "logps/rejected": -2.117537021636963, "loss": 1.1548, "nll_loss": 1.130449891090393, "rewards/accuracies": 0.875, "rewards/chosen": -0.07486281543970108, "rewards/margins": 0.13689088821411133, "rewards/rejected": -0.2117537260055542, "step": 2304 }, { "epoch": 1.4061308525240201, "grad_norm": 1.41811203956604, "learning_rate": 5.770973668095529e-06, "log_odds_chosen": 1.9611051082611084, "log_odds_ratio": -0.3122381269931793, "logits/chosen": -1.020057201385498, "logits/rejected": -0.7943241000175476, "logps/chosen": -0.8631713390350342, "logps/rejected": -2.3591909408569336, "loss": 0.9734, "nll_loss": 1.06706964969635, "rewards/accuracies": 0.875, "rewards/chosen": -0.08631713688373566, "rewards/margins": 0.14960192143917084, "rewards/rejected": -0.2359190732240677, "step": 2305 }, { "epoch": 1.406740887601037, "grad_norm": 2.7756781578063965, "learning_rate": 5.769993876301286e-06, "log_odds_chosen": 0.2693048119544983, "log_odds_ratio": -0.6435359716415405, "logits/chosen": -0.8579459190368652, "logits/rejected": -0.8322874903678894, "logps/chosen": -0.7494988441467285, "logps/rejected": -0.9657964706420898, "loss": 1.2085, "nll_loss": 1.1467187404632568, "rewards/accuracies": 0.5, "rewards/chosen": -0.07494988292455673, "rewards/margins": 0.02162976749241352, "rewards/rejected": -0.0965796560049057, "step": 2306 }, { "epoch": 1.407350922678054, "grad_norm": 1.7598434686660767, "learning_rate": 5.769014084507042e-06, "log_odds_chosen": 0.28962230682373047, "log_odds_ratio": -0.7560871243476868, "logits/chosen": -1.054886817932129, "logits/rejected": -0.9408217668533325, "logps/chosen": -0.878140926361084, "logps/rejected": -1.1802414655685425, "loss": 1.0393, "nll_loss": 0.9760478138923645, "rewards/accuracies": 0.5, "rewards/chosen": -0.087814100086689, "rewards/margins": 0.03021004982292652, "rewards/rejected": -0.11802415549755096, "step": 2307 }, { "epoch": 1.407960957755071, "grad_norm": 1.8506784439086914, "learning_rate": 5.768034292712798e-06, "log_odds_chosen": 1.737925410270691, "log_odds_ratio": -0.5600228309631348, "logits/chosen": -0.7589334845542908, "logits/rejected": -0.9318799376487732, "logps/chosen": -0.826940655708313, "logps/rejected": -2.2957608699798584, "loss": 1.0863, "nll_loss": 1.0027508735656738, "rewards/accuracies": 0.5, "rewards/chosen": -0.08269406855106354, "rewards/margins": 0.14688201248645782, "rewards/rejected": -0.22957608103752136, "step": 2308 }, { "epoch": 1.4085709928320878, "grad_norm": 2.535691976547241, "learning_rate": 5.767054500918555e-06, "log_odds_chosen": 1.8983638286590576, "log_odds_ratio": -0.4233211278915405, "logits/chosen": -0.9362657070159912, "logits/rejected": -0.8835799694061279, "logps/chosen": -0.8784054517745972, "logps/rejected": -2.4377408027648926, "loss": 1.1031, "nll_loss": 1.05672287940979, "rewards/accuracies": 0.75, "rewards/chosen": -0.08784055709838867, "rewards/margins": 0.15593354403972626, "rewards/rejected": -0.24377408623695374, "step": 2309 }, { "epoch": 1.4091810279091048, "grad_norm": 1.952013611793518, "learning_rate": 5.766074709124311e-06, "log_odds_chosen": 1.0611810684204102, "log_odds_ratio": -0.3202510476112366, "logits/chosen": -0.9271910190582275, "logits/rejected": -0.9092862606048584, "logps/chosen": -0.82707679271698, "logps/rejected": -1.5377306938171387, "loss": 1.1801, "nll_loss": 1.068591833114624, "rewards/accuracies": 1.0, "rewards/chosen": -0.08270767331123352, "rewards/margins": 0.07106538116931915, "rewards/rejected": -0.15377305448055267, "step": 2310 }, { "epoch": 1.4097910629861217, "grad_norm": 12.056135177612305, "learning_rate": 5.765094917330067e-06, "log_odds_chosen": 1.1054730415344238, "log_odds_ratio": -0.48956942558288574, "logits/chosen": -0.7450252175331116, "logits/rejected": -0.7580990791320801, "logps/chosen": -0.8079906105995178, "logps/rejected": -1.5495043992996216, "loss": 1.0653, "nll_loss": 1.1803746223449707, "rewards/accuracies": 0.75, "rewards/chosen": -0.08079906553030014, "rewards/margins": 0.07415138930082321, "rewards/rejected": -0.15495043992996216, "step": 2311 }, { "epoch": 1.4104010980631387, "grad_norm": 1.851054310798645, "learning_rate": 5.764115125535824e-06, "log_odds_chosen": 1.4026448726654053, "log_odds_ratio": -0.4174756705760956, "logits/chosen": -1.0686347484588623, "logits/rejected": -1.0650241374969482, "logps/chosen": -0.8657271862030029, "logps/rejected": -1.9756556749343872, "loss": 1.0595, "nll_loss": 1.1496174335479736, "rewards/accuracies": 0.875, "rewards/chosen": -0.08657272160053253, "rewards/margins": 0.11099286377429962, "rewards/rejected": -0.19756558537483215, "step": 2312 }, { "epoch": 1.4110111331401556, "grad_norm": 1.3882890939712524, "learning_rate": 5.76313533374158e-06, "log_odds_chosen": 0.7944706678390503, "log_odds_ratio": -0.5890868902206421, "logits/chosen": -0.8860166668891907, "logits/rejected": -0.8304762840270996, "logps/chosen": -0.8261891007423401, "logps/rejected": -1.3890926837921143, "loss": 0.9689, "nll_loss": 1.0605462789535522, "rewards/accuracies": 0.625, "rewards/chosen": -0.08261890709400177, "rewards/margins": 0.05629035830497742, "rewards/rejected": -0.1389092653989792, "step": 2313 }, { "epoch": 1.4116211682171724, "grad_norm": 1.728498101234436, "learning_rate": 5.762155541947335e-06, "log_odds_chosen": 1.823111891746521, "log_odds_ratio": -0.36597347259521484, "logits/chosen": -0.8025971055030823, "logits/rejected": -0.8671283721923828, "logps/chosen": -0.9748679399490356, "logps/rejected": -2.429640293121338, "loss": 1.2105, "nll_loss": 1.1171579360961914, "rewards/accuracies": 0.75, "rewards/chosen": -0.09748679399490356, "rewards/margins": 0.14547722041606903, "rewards/rejected": -0.2429640144109726, "step": 2314 }, { "epoch": 1.4122312032941895, "grad_norm": 1.6912518739700317, "learning_rate": 5.761175750153092e-06, "log_odds_chosen": 1.2560365200042725, "log_odds_ratio": -0.34683114290237427, "logits/chosen": -0.8365044593811035, "logits/rejected": -0.9128870368003845, "logps/chosen": -0.7617158889770508, "logps/rejected": -1.6040825843811035, "loss": 1.0035, "nll_loss": 0.814083456993103, "rewards/accuracies": 1.0, "rewards/chosen": -0.07617159187793732, "rewards/margins": 0.08423667401075363, "rewards/rejected": -0.16040825843811035, "step": 2315 }, { "epoch": 1.4128412383712063, "grad_norm": 5.601535797119141, "learning_rate": 5.760195958358848e-06, "log_odds_chosen": 0.8796581625938416, "log_odds_ratio": -0.5535196661949158, "logits/chosen": -1.0485637187957764, "logits/rejected": -0.9312844276428223, "logps/chosen": -0.9111117124557495, "logps/rejected": -1.6511995792388916, "loss": 1.1426, "nll_loss": 0.9848850965499878, "rewards/accuracies": 0.75, "rewards/chosen": -0.09111116826534271, "rewards/margins": 0.07400879263877869, "rewards/rejected": -0.1651199758052826, "step": 2316 }, { "epoch": 1.4134512734482234, "grad_norm": 4.102245330810547, "learning_rate": 5.7592161665646045e-06, "log_odds_chosen": 0.814113974571228, "log_odds_ratio": -0.49147161841392517, "logits/chosen": -0.6478776931762695, "logits/rejected": -0.6548057794570923, "logps/chosen": -0.713790237903595, "logps/rejected": -1.2052844762802124, "loss": 1.1333, "nll_loss": 0.7341996431350708, "rewards/accuracies": 0.75, "rewards/chosen": -0.07137902081012726, "rewards/margins": 0.04914942383766174, "rewards/rejected": -0.1205284520983696, "step": 2317 }, { "epoch": 1.4140613085252403, "grad_norm": 1.2166178226470947, "learning_rate": 5.7582363747703614e-06, "log_odds_chosen": 0.9690226316452026, "log_odds_ratio": -0.5651248693466187, "logits/chosen": -0.6850205659866333, "logits/rejected": -0.8013108372688293, "logps/chosen": -0.8686493635177612, "logps/rejected": -1.5533063411712646, "loss": 1.1401, "nll_loss": 0.8887732625007629, "rewards/accuracies": 0.625, "rewards/chosen": -0.08686494082212448, "rewards/margins": 0.0684657171368599, "rewards/rejected": -0.15533065795898438, "step": 2318 }, { "epoch": 1.414671343602257, "grad_norm": 1.0900179147720337, "learning_rate": 5.7572565829761176e-06, "log_odds_chosen": 1.3878374099731445, "log_odds_ratio": -0.4284605383872986, "logits/chosen": -1.01197350025177, "logits/rejected": -0.9287344217300415, "logps/chosen": -0.6933929920196533, "logps/rejected": -1.5682756900787354, "loss": 1.1315, "nll_loss": 1.151711344718933, "rewards/accuracies": 0.75, "rewards/chosen": -0.06933930516242981, "rewards/margins": 0.08748827129602432, "rewards/rejected": -0.15682756900787354, "step": 2319 }, { "epoch": 1.415281378679274, "grad_norm": 1.453311562538147, "learning_rate": 5.756276791181874e-06, "log_odds_chosen": 2.945167303085327, "log_odds_ratio": -0.22055160999298096, "logits/chosen": -0.6965364217758179, "logits/rejected": -0.7182773947715759, "logps/chosen": -0.5661608576774597, "logps/rejected": -2.803346633911133, "loss": 1.022, "nll_loss": 0.7992721796035767, "rewards/accuracies": 0.875, "rewards/chosen": -0.05661609023809433, "rewards/margins": 0.2237185835838318, "rewards/rejected": -0.2803346514701843, "step": 2320 }, { "epoch": 1.415891413756291, "grad_norm": 1.7013534307479858, "learning_rate": 5.75529699938763e-06, "log_odds_chosen": 2.284400463104248, "log_odds_ratio": -0.26212072372436523, "logits/chosen": -0.5780512690544128, "logits/rejected": -0.7483034133911133, "logps/chosen": -0.7779887318611145, "logps/rejected": -2.352686882019043, "loss": 0.9205, "nll_loss": 0.7096840143203735, "rewards/accuracies": 0.875, "rewards/chosen": -0.07779887318611145, "rewards/margins": 0.15746979415416718, "rewards/rejected": -0.23526868224143982, "step": 2321 }, { "epoch": 1.4165014488333079, "grad_norm": 1.4670861959457397, "learning_rate": 5.754317207593386e-06, "log_odds_chosen": 0.10669964551925659, "log_odds_ratio": -0.7143932580947876, "logits/chosen": -0.9409898519515991, "logits/rejected": -0.9829056262969971, "logps/chosen": -1.0636532306671143, "logps/rejected": -1.1305230855941772, "loss": 1.1887, "nll_loss": 1.1506068706512451, "rewards/accuracies": 0.625, "rewards/chosen": -0.10636533051729202, "rewards/margins": 0.006686978042125702, "rewards/rejected": -0.11305232346057892, "step": 2322 }, { "epoch": 1.417111483910325, "grad_norm": 5.57823371887207, "learning_rate": 5.753337415799143e-06, "log_odds_chosen": -0.12321975827217102, "log_odds_ratio": -0.9386101961135864, "logits/chosen": -1.0176008939743042, "logits/rejected": -0.9534282684326172, "logps/chosen": -1.1494323015213013, "logps/rejected": -1.1020348072052002, "loss": 1.1425, "nll_loss": 1.4030237197875977, "rewards/accuracies": 0.25, "rewards/chosen": -0.11494322866201401, "rewards/margins": -0.004739748314023018, "rewards/rejected": -0.11020348221063614, "step": 2323 }, { "epoch": 1.4177215189873418, "grad_norm": 1.940436601638794, "learning_rate": 5.752357624004899e-06, "log_odds_chosen": 1.4203851222991943, "log_odds_ratio": -0.5792679786682129, "logits/chosen": -0.9608607888221741, "logits/rejected": -0.7960203886032104, "logps/chosen": -0.8570407629013062, "logps/rejected": -1.9182463884353638, "loss": 1.1274, "nll_loss": 1.2427663803100586, "rewards/accuracies": 0.625, "rewards/chosen": -0.08570408076047897, "rewards/margins": 0.10612055659294128, "rewards/rejected": -0.19182465970516205, "step": 2324 }, { "epoch": 1.4183315540643586, "grad_norm": 1.0026966333389282, "learning_rate": 5.751377832210654e-06, "log_odds_chosen": 1.702497959136963, "log_odds_ratio": -0.5939577221870422, "logits/chosen": -0.9021096229553223, "logits/rejected": -0.9563713073730469, "logps/chosen": -0.8796548843383789, "logps/rejected": -2.2692322731018066, "loss": 1.096, "nll_loss": 1.09196937084198, "rewards/accuracies": 0.625, "rewards/chosen": -0.08796548843383789, "rewards/margins": 0.13895773887634277, "rewards/rejected": -0.22692322731018066, "step": 2325 }, { "epoch": 1.4189415891413757, "grad_norm": 2.4191908836364746, "learning_rate": 5.750398040416411e-06, "log_odds_chosen": 0.7290643453598022, "log_odds_ratio": -0.7429959177970886, "logits/chosen": -0.7664763331413269, "logits/rejected": -0.8428192734718323, "logps/chosen": -0.9099230170249939, "logps/rejected": -1.1958998441696167, "loss": 0.9908, "nll_loss": 1.1139057874679565, "rewards/accuracies": 0.625, "rewards/chosen": -0.09099230170249939, "rewards/margins": 0.02859768085181713, "rewards/rejected": -0.11958998441696167, "step": 2326 }, { "epoch": 1.4195516242183925, "grad_norm": 1.4205044507980347, "learning_rate": 5.749418248622167e-06, "log_odds_chosen": 0.8119869232177734, "log_odds_ratio": -0.4620319604873657, "logits/chosen": -0.8829960227012634, "logits/rejected": -0.9602673053741455, "logps/chosen": -0.7852360606193542, "logps/rejected": -1.236156940460205, "loss": 1.025, "nll_loss": 1.0916177034378052, "rewards/accuracies": 0.625, "rewards/chosen": -0.07852360606193542, "rewards/margins": 0.04509208723902702, "rewards/rejected": -0.12361568957567215, "step": 2327 }, { "epoch": 1.4201616592954096, "grad_norm": 6.406006813049316, "learning_rate": 5.7484384568279235e-06, "log_odds_chosen": 0.06817877292633057, "log_odds_ratio": -0.8303927779197693, "logits/chosen": -0.8618849515914917, "logits/rejected": -0.9050978422164917, "logps/chosen": -1.038625955581665, "logps/rejected": -1.0830414295196533, "loss": 1.2082, "nll_loss": 1.012818694114685, "rewards/accuracies": 0.625, "rewards/chosen": -0.10386259853839874, "rewards/margins": 0.004441549070179462, "rewards/rejected": -0.10830415785312653, "step": 2328 }, { "epoch": 1.4207716943724265, "grad_norm": 1.2168101072311401, "learning_rate": 5.7474586650336805e-06, "log_odds_chosen": 0.7562825679779053, "log_odds_ratio": -0.6187559366226196, "logits/chosen": -0.9066315293312073, "logits/rejected": -0.8946570158004761, "logps/chosen": -0.6762900948524475, "logps/rejected": -1.238537311553955, "loss": 1.2429, "nll_loss": 0.9551621675491333, "rewards/accuracies": 0.5, "rewards/chosen": -0.06762900948524475, "rewards/margins": 0.05622471496462822, "rewards/rejected": -0.12385372817516327, "step": 2329 }, { "epoch": 1.4213817294494433, "grad_norm": 1.419276475906372, "learning_rate": 5.746478873239437e-06, "log_odds_chosen": 0.5766302347183228, "log_odds_ratio": -0.681349515914917, "logits/chosen": -1.051033616065979, "logits/rejected": -1.0440150499343872, "logps/chosen": -1.1142088174819946, "logps/rejected": -1.5997165441513062, "loss": 1.1234, "nll_loss": 1.2346123456954956, "rewards/accuracies": 0.5, "rewards/chosen": -0.1114208921790123, "rewards/margins": 0.04855075851082802, "rewards/rejected": -0.15997165441513062, "step": 2330 }, { "epoch": 1.4219917645264601, "grad_norm": 1.134458065032959, "learning_rate": 5.745499081445192e-06, "log_odds_chosen": 0.635003387928009, "log_odds_ratio": -0.590783953666687, "logits/chosen": -1.0601940155029297, "logits/rejected": -0.9203646183013916, "logps/chosen": -0.908428966999054, "logps/rejected": -1.3591618537902832, "loss": 1.1482, "nll_loss": 1.0805113315582275, "rewards/accuracies": 0.5, "rewards/chosen": -0.09084290266036987, "rewards/margins": 0.04507328197360039, "rewards/rejected": -0.13591617345809937, "step": 2331 }, { "epoch": 1.4226017996034772, "grad_norm": 7.676752090454102, "learning_rate": 5.744519289650949e-06, "log_odds_chosen": 2.842582941055298, "log_odds_ratio": -0.41642406582832336, "logits/chosen": -0.7048382759094238, "logits/rejected": -0.7999882698059082, "logps/chosen": -0.7584058046340942, "logps/rejected": -2.862673282623291, "loss": 1.2111, "nll_loss": 0.939180850982666, "rewards/accuracies": 0.625, "rewards/chosen": -0.07584058493375778, "rewards/margins": 0.21042677760124207, "rewards/rejected": -0.28626734018325806, "step": 2332 }, { "epoch": 1.423211834680494, "grad_norm": 2.3726983070373535, "learning_rate": 5.743539497856705e-06, "log_odds_chosen": 0.1718563437461853, "log_odds_ratio": -0.6764070987701416, "logits/chosen": -1.098624587059021, "logits/rejected": -0.9950767755508423, "logps/chosen": -1.0470337867736816, "logps/rejected": -1.2136435508728027, "loss": 1.2727, "nll_loss": 1.3306195735931396, "rewards/accuracies": 0.5, "rewards/chosen": -0.1047033816576004, "rewards/margins": 0.016660965979099274, "rewards/rejected": -0.12136434763669968, "step": 2333 }, { "epoch": 1.4238218697575111, "grad_norm": 1.9386296272277832, "learning_rate": 5.742559706062462e-06, "log_odds_chosen": -0.02035931497812271, "log_odds_ratio": -0.8896387219429016, "logits/chosen": -1.0131964683532715, "logits/rejected": -0.9232957363128662, "logps/chosen": -1.1457059383392334, "logps/rejected": -1.1277464628219604, "loss": 1.1736, "nll_loss": 1.369185209274292, "rewards/accuracies": 0.25, "rewards/chosen": -0.11457060277462006, "rewards/margins": -0.0017959480173885822, "rewards/rejected": -0.11277464777231216, "step": 2334 }, { "epoch": 1.424431904834528, "grad_norm": 1.4299219846725464, "learning_rate": 5.741579914268218e-06, "log_odds_chosen": 0.7774779796600342, "log_odds_ratio": -0.49695172905921936, "logits/chosen": -0.7815316915512085, "logits/rejected": -0.989378035068512, "logps/chosen": -0.7673565149307251, "logps/rejected": -1.2315508127212524, "loss": 1.1264, "nll_loss": 1.1846323013305664, "rewards/accuracies": 0.625, "rewards/chosen": -0.07673564553260803, "rewards/margins": 0.046419426798820496, "rewards/rejected": -0.12315508723258972, "step": 2335 }, { "epoch": 1.4250419399115448, "grad_norm": 1.008773684501648, "learning_rate": 5.740600122473974e-06, "log_odds_chosen": 1.26236891746521, "log_odds_ratio": -0.4895968437194824, "logits/chosen": -1.0070340633392334, "logits/rejected": -0.9374409914016724, "logps/chosen": -0.8779326677322388, "logps/rejected": -1.7575788497924805, "loss": 0.9612, "nll_loss": 1.0018576383590698, "rewards/accuracies": 0.75, "rewards/chosen": -0.08779326826334, "rewards/margins": 0.08796461671590805, "rewards/rejected": -0.17575788497924805, "step": 2336 }, { "epoch": 1.4256519749885619, "grad_norm": 1.428442120552063, "learning_rate": 5.73962033067973e-06, "log_odds_chosen": 0.9011542797088623, "log_odds_ratio": -0.5364725589752197, "logits/chosen": -1.0661636590957642, "logits/rejected": -1.0611071586608887, "logps/chosen": -0.9622921347618103, "logps/rejected": -1.641984224319458, "loss": 1.1307, "nll_loss": 1.0923826694488525, "rewards/accuracies": 0.625, "rewards/chosen": -0.09622921794652939, "rewards/margins": 0.06796921044588089, "rewards/rejected": -0.16419842839241028, "step": 2337 }, { "epoch": 1.4262620100655787, "grad_norm": 2.986750364303589, "learning_rate": 5.738640538885486e-06, "log_odds_chosen": 1.366384744644165, "log_odds_ratio": -0.4608480632305145, "logits/chosen": -0.9137927293777466, "logits/rejected": -0.8160091042518616, "logps/chosen": -0.9284685850143433, "logps/rejected": -2.012200117111206, "loss": 1.1626, "nll_loss": 1.1327247619628906, "rewards/accuracies": 0.75, "rewards/chosen": -0.09284686297178268, "rewards/margins": 0.10837314277887344, "rewards/rejected": -0.20122000575065613, "step": 2338 }, { "epoch": 1.4268720451425958, "grad_norm": 1.6157608032226562, "learning_rate": 5.7376607470912425e-06, "log_odds_chosen": 1.4014132022857666, "log_odds_ratio": -0.4201047122478485, "logits/chosen": -0.9155417680740356, "logits/rejected": -0.9947823286056519, "logps/chosen": -0.7360646724700928, "logps/rejected": -1.7136614322662354, "loss": 1.1298, "nll_loss": 0.9657579660415649, "rewards/accuracies": 0.875, "rewards/chosen": -0.0736064612865448, "rewards/margins": 0.0977596864104271, "rewards/rejected": -0.1713661551475525, "step": 2339 }, { "epoch": 1.4274820802196126, "grad_norm": 4.897909641265869, "learning_rate": 5.7366809552969995e-06, "log_odds_chosen": 2.4374516010284424, "log_odds_ratio": -0.49451619386672974, "logits/chosen": -0.9330449104309082, "logits/rejected": -0.9321072697639465, "logps/chosen": -0.9122269153594971, "logps/rejected": -3.0209362506866455, "loss": 1.1789, "nll_loss": 1.0689998865127563, "rewards/accuracies": 0.625, "rewards/chosen": -0.09122269600629807, "rewards/margins": 0.2108709216117859, "rewards/rejected": -0.30209362506866455, "step": 2340 }, { "epoch": 1.4280921152966295, "grad_norm": 1.3840084075927734, "learning_rate": 5.735701163502756e-06, "log_odds_chosen": 1.001219630241394, "log_odds_ratio": -0.44297105073928833, "logits/chosen": -0.7977368831634521, "logits/rejected": -0.7379114627838135, "logps/chosen": -0.7692641615867615, "logps/rejected": -1.478344202041626, "loss": 1.0196, "nll_loss": 0.9113282561302185, "rewards/accuracies": 0.875, "rewards/chosen": -0.07692642509937286, "rewards/margins": 0.07090801000595093, "rewards/rejected": -0.1478344202041626, "step": 2341 }, { "epoch": 1.4287021503736466, "grad_norm": 2.4596996307373047, "learning_rate": 5.734721371708512e-06, "log_odds_chosen": 0.8189254999160767, "log_odds_ratio": -0.5501672029495239, "logits/chosen": -1.1563875675201416, "logits/rejected": -0.956062912940979, "logps/chosen": -0.7835804224014282, "logps/rejected": -1.4047657251358032, "loss": 1.0748, "nll_loss": 1.0573396682739258, "rewards/accuracies": 0.625, "rewards/chosen": -0.07835803925991058, "rewards/margins": 0.0621185377240181, "rewards/rejected": -0.14047658443450928, "step": 2342 }, { "epoch": 1.4293121854506634, "grad_norm": 2.952340841293335, "learning_rate": 5.733741579914268e-06, "log_odds_chosen": 0.77345871925354, "log_odds_ratio": -0.42969614267349243, "logits/chosen": -0.9964877367019653, "logits/rejected": -1.1591918468475342, "logps/chosen": -0.7980438470840454, "logps/rejected": -1.3103282451629639, "loss": 1.2113, "nll_loss": 1.0529149770736694, "rewards/accuracies": 0.875, "rewards/chosen": -0.07980439066886902, "rewards/margins": 0.05122845619916916, "rewards/rejected": -0.13103283941745758, "step": 2343 }, { "epoch": 1.4299222205276805, "grad_norm": 1.7160005569458008, "learning_rate": 5.732761788120024e-06, "log_odds_chosen": 1.5809978246688843, "log_odds_ratio": -0.32966089248657227, "logits/chosen": -0.7170539498329163, "logits/rejected": -0.8449836373329163, "logps/chosen": -0.7238073945045471, "logps/rejected": -1.6897834539413452, "loss": 0.9976, "nll_loss": 0.9519374370574951, "rewards/accuracies": 1.0, "rewards/chosen": -0.07238073647022247, "rewards/margins": 0.09659760445356369, "rewards/rejected": -0.16897834837436676, "step": 2344 }, { "epoch": 1.4305322556046973, "grad_norm": 3.000135660171509, "learning_rate": 5.73178199632578e-06, "log_odds_chosen": 1.4296985864639282, "log_odds_ratio": -0.46867555379867554, "logits/chosen": -0.8068240880966187, "logits/rejected": -0.8407828211784363, "logps/chosen": -0.8072691559791565, "logps/rejected": -1.772663950920105, "loss": 1.1274, "nll_loss": 0.8999676704406738, "rewards/accuracies": 0.625, "rewards/chosen": -0.08072692155838013, "rewards/margins": 0.09653948992490768, "rewards/rejected": -0.17726640403270721, "step": 2345 }, { "epoch": 1.4311422906817142, "grad_norm": 1.4632542133331299, "learning_rate": 5.730802204531537e-06, "log_odds_chosen": 0.2913045883178711, "log_odds_ratio": -0.6460139751434326, "logits/chosen": -0.685863196849823, "logits/rejected": -0.6383848190307617, "logps/chosen": -0.743165910243988, "logps/rejected": -0.8708298206329346, "loss": 1.1399, "nll_loss": 0.963337779045105, "rewards/accuracies": 0.75, "rewards/chosen": -0.0743165910243988, "rewards/margins": 0.012766401283442974, "rewards/rejected": -0.08708299696445465, "step": 2346 }, { "epoch": 1.431752325758731, "grad_norm": 3.610301971435547, "learning_rate": 5.729822412737293e-06, "log_odds_chosen": 0.2240852266550064, "log_odds_ratio": -0.9487441778182983, "logits/chosen": -1.1038271188735962, "logits/rejected": -1.0387455224990845, "logps/chosen": -1.744354486465454, "logps/rejected": -1.627465009689331, "loss": 1.2645, "nll_loss": 1.2559224367141724, "rewards/accuracies": 0.5, "rewards/chosen": -0.17443545162677765, "rewards/margins": -0.011688966304063797, "rewards/rejected": -0.16274647414684296, "step": 2347 }, { "epoch": 1.432362360835748, "grad_norm": 0.7746606469154358, "learning_rate": 5.728842620943049e-06, "log_odds_chosen": 1.077852725982666, "log_odds_ratio": -0.5012678503990173, "logits/chosen": -0.9065784811973572, "logits/rejected": -0.8145415782928467, "logps/chosen": -0.8607285022735596, "logps/rejected": -1.7405463457107544, "loss": 1.1151, "nll_loss": 1.1029925346374512, "rewards/accuracies": 0.625, "rewards/chosen": -0.08607284724712372, "rewards/margins": 0.08798179775476456, "rewards/rejected": -0.17405463755130768, "step": 2348 }, { "epoch": 1.432972395912765, "grad_norm": 2.584568977355957, "learning_rate": 5.7278628291488055e-06, "log_odds_chosen": 0.8396077156066895, "log_odds_ratio": -0.5765513181686401, "logits/chosen": -0.866820216178894, "logits/rejected": -0.9351949095726013, "logps/chosen": -0.7520173192024231, "logps/rejected": -1.324188232421875, "loss": 1.0947, "nll_loss": 1.1935608386993408, "rewards/accuracies": 0.5, "rewards/chosen": -0.07520173490047455, "rewards/margins": 0.05721709504723549, "rewards/rejected": -0.13241882622241974, "step": 2349 }, { "epoch": 1.433582430989782, "grad_norm": 1.3701952695846558, "learning_rate": 5.726883037354562e-06, "log_odds_chosen": 0.5979804992675781, "log_odds_ratio": -0.5168778300285339, "logits/chosen": -0.8954041004180908, "logits/rejected": -0.9211938381195068, "logps/chosen": -0.9313086867332458, "logps/rejected": -1.3865418434143066, "loss": 1.2337, "nll_loss": 1.1947603225708008, "rewards/accuracies": 0.875, "rewards/chosen": -0.09313087165355682, "rewards/margins": 0.04552331566810608, "rewards/rejected": -0.1386541873216629, "step": 2350 }, { "epoch": 1.4341924660667988, "grad_norm": 1.4067221879959106, "learning_rate": 5.7259032455603186e-06, "log_odds_chosen": 0.8933352828025818, "log_odds_ratio": -0.6423656344413757, "logits/chosen": -0.9491850733757019, "logits/rejected": -0.9518918991088867, "logps/chosen": -0.9719679355621338, "logps/rejected": -1.7398266792297363, "loss": 1.1191, "nll_loss": 1.1904629468917847, "rewards/accuracies": 0.625, "rewards/chosen": -0.0971967950463295, "rewards/margins": 0.0767858698964119, "rewards/rejected": -0.1739826649427414, "step": 2351 }, { "epoch": 1.4348025011438157, "grad_norm": 1.6226948499679565, "learning_rate": 5.724923453766075e-06, "log_odds_chosen": 0.3644343316555023, "log_odds_ratio": -0.8065193891525269, "logits/chosen": -1.1049110889434814, "logits/rejected": -0.9921408891677856, "logps/chosen": -1.0460433959960938, "logps/rejected": -1.3654428720474243, "loss": 1.2334, "nll_loss": 1.1242797374725342, "rewards/accuracies": 0.375, "rewards/chosen": -0.1046043410897255, "rewards/margins": 0.03193995729088783, "rewards/rejected": -0.13654428720474243, "step": 2352 }, { "epoch": 1.4354125362208328, "grad_norm": 2.3106720447540283, "learning_rate": 5.723943661971831e-06, "log_odds_chosen": 0.16188976168632507, "log_odds_ratio": -0.6514706611633301, "logits/chosen": -0.9350411891937256, "logits/rejected": -0.9472873210906982, "logps/chosen": -1.0590364933013916, "logps/rejected": -1.1419646739959717, "loss": 1.0905, "nll_loss": 1.226398229598999, "rewards/accuracies": 0.5, "rewards/chosen": -0.10590365529060364, "rewards/margins": 0.00829281471669674, "rewards/rejected": -0.11419646441936493, "step": 2353 }, { "epoch": 1.4360225712978496, "grad_norm": 1.087139368057251, "learning_rate": 5.722963870177587e-06, "log_odds_chosen": 1.460524320602417, "log_odds_ratio": -0.5730536580085754, "logits/chosen": -0.9314327239990234, "logits/rejected": -0.9532114267349243, "logps/chosen": -0.7879713177680969, "logps/rejected": -2.0523340702056885, "loss": 0.9758, "nll_loss": 1.0449516773223877, "rewards/accuracies": 0.5, "rewards/chosen": -0.07879713177680969, "rewards/margins": 0.1264362633228302, "rewards/rejected": -0.2052334100008011, "step": 2354 }, { "epoch": 1.4366326063748667, "grad_norm": 2.1111068725585938, "learning_rate": 5.721984078383343e-06, "log_odds_chosen": 1.6813278198242188, "log_odds_ratio": -0.3803500533103943, "logits/chosen": -0.7742798328399658, "logits/rejected": -0.8546812534332275, "logps/chosen": -0.780527651309967, "logps/rejected": -1.964468240737915, "loss": 1.0695, "nll_loss": 0.8709237575531006, "rewards/accuracies": 0.875, "rewards/chosen": -0.07805276662111282, "rewards/margins": 0.11839406192302704, "rewards/rejected": -0.19644680619239807, "step": 2355 }, { "epoch": 1.4372426414518835, "grad_norm": 3.9453530311584473, "learning_rate": 5.721004286589099e-06, "log_odds_chosen": 0.5228894948959351, "log_odds_ratio": -0.5832595825195312, "logits/chosen": -1.052195429801941, "logits/rejected": -0.9383227229118347, "logps/chosen": -0.9047080874443054, "logps/rejected": -1.3295222520828247, "loss": 1.1623, "nll_loss": 1.025324821472168, "rewards/accuracies": 0.625, "rewards/chosen": -0.0904708132147789, "rewards/margins": 0.04248141497373581, "rewards/rejected": -0.1329522281885147, "step": 2356 }, { "epoch": 1.4378526765289004, "grad_norm": 1.1393662691116333, "learning_rate": 5.720024494794856e-06, "log_odds_chosen": 2.4335885047912598, "log_odds_ratio": -0.48609641194343567, "logits/chosen": -0.9171528816223145, "logits/rejected": -0.8300061821937561, "logps/chosen": -0.6633528470993042, "logps/rejected": -2.676522970199585, "loss": 1.1154, "nll_loss": 0.9111210703849792, "rewards/accuracies": 0.625, "rewards/chosen": -0.0663352906703949, "rewards/margins": 0.20131702721118927, "rewards/rejected": -0.267652302980423, "step": 2357 }, { "epoch": 1.4384627116059172, "grad_norm": 1.1808853149414062, "learning_rate": 5.719044703000612e-06, "log_odds_chosen": 0.6123150587081909, "log_odds_ratio": -0.5468699932098389, "logits/chosen": -1.0220980644226074, "logits/rejected": -1.01571786403656, "logps/chosen": -0.8097245097160339, "logps/rejected": -1.2233823537826538, "loss": 1.245, "nll_loss": 1.2527666091918945, "rewards/accuracies": 0.625, "rewards/chosen": -0.08097244799137115, "rewards/margins": 0.04136577993631363, "rewards/rejected": -0.12233823537826538, "step": 2358 }, { "epoch": 1.4390727466829343, "grad_norm": 1.621217131614685, "learning_rate": 5.718064911206368e-06, "log_odds_chosen": 1.7615816593170166, "log_odds_ratio": -0.27858543395996094, "logits/chosen": -0.7377833724021912, "logits/rejected": -0.8069800734519958, "logps/chosen": -0.8197129368782043, "logps/rejected": -2.0883145332336426, "loss": 1.181, "nll_loss": 0.9906231760978699, "rewards/accuracies": 0.875, "rewards/chosen": -0.08197128772735596, "rewards/margins": 0.12686017155647278, "rewards/rejected": -0.20883145928382874, "step": 2359 }, { "epoch": 1.4396827817599511, "grad_norm": 1.0595183372497559, "learning_rate": 5.7170851194121245e-06, "log_odds_chosen": 2.0476858615875244, "log_odds_ratio": -0.2326754480600357, "logits/chosen": -0.9202700853347778, "logits/rejected": -1.088659644126892, "logps/chosen": -1.017193078994751, "logps/rejected": -2.5845422744750977, "loss": 1.2694, "nll_loss": 1.4727380275726318, "rewards/accuracies": 1.0, "rewards/chosen": -0.10171931236982346, "rewards/margins": 0.15673494338989258, "rewards/rejected": -0.25845426321029663, "step": 2360 }, { "epoch": 1.4402928168369682, "grad_norm": 2.0256407260894775, "learning_rate": 5.716105327617881e-06, "log_odds_chosen": -0.37413668632507324, "log_odds_ratio": -0.9242069721221924, "logits/chosen": -1.0407778024673462, "logits/rejected": -0.9486490488052368, "logps/chosen": -1.1802574396133423, "logps/rejected": -0.9450602531433105, "loss": 1.0415, "nll_loss": 1.2660820484161377, "rewards/accuracies": 0.25, "rewards/chosen": -0.1180257499217987, "rewards/margins": -0.02351972460746765, "rewards/rejected": -0.09450601786375046, "step": 2361 }, { "epoch": 1.440902851913985, "grad_norm": 1.1646168231964111, "learning_rate": 5.715125535823638e-06, "log_odds_chosen": 0.3190098702907562, "log_odds_ratio": -0.6734285354614258, "logits/chosen": -0.7013776302337646, "logits/rejected": -0.7654647827148438, "logps/chosen": -0.7816221714019775, "logps/rejected": -0.9165321588516235, "loss": 0.9777, "nll_loss": 1.0150889158248901, "rewards/accuracies": 0.75, "rewards/chosen": -0.07816222310066223, "rewards/margins": 0.013490994460880756, "rewards/rejected": -0.09165322035551071, "step": 2362 }, { "epoch": 1.4415128869910019, "grad_norm": 4.394710063934326, "learning_rate": 5.714145744029394e-06, "log_odds_chosen": 2.4298765659332275, "log_odds_ratio": -0.38501089811325073, "logits/chosen": -0.8756049275398254, "logits/rejected": -0.8715611100196838, "logps/chosen": -0.7438836097717285, "logps/rejected": -2.6450154781341553, "loss": 1.1698, "nll_loss": 1.1360008716583252, "rewards/accuracies": 0.875, "rewards/chosen": -0.07438835501670837, "rewards/margins": 0.19011318683624268, "rewards/rejected": -0.26450151205062866, "step": 2363 }, { "epoch": 1.442122922068019, "grad_norm": 1.278965950012207, "learning_rate": 5.71316595223515e-06, "log_odds_chosen": 1.5612659454345703, "log_odds_ratio": -0.5008243322372437, "logits/chosen": -1.052526831626892, "logits/rejected": -1.1448222398757935, "logps/chosen": -0.8061537146568298, "logps/rejected": -2.0340442657470703, "loss": 1.152, "nll_loss": 1.372542142868042, "rewards/accuracies": 0.625, "rewards/chosen": -0.08061537146568298, "rewards/margins": 0.12278907746076584, "rewards/rejected": -0.20340445637702942, "step": 2364 }, { "epoch": 1.4427329571450358, "grad_norm": 1.216336727142334, "learning_rate": 5.712186160440906e-06, "log_odds_chosen": 0.8273671865463257, "log_odds_ratio": -0.5158347487449646, "logits/chosen": -0.8431985974311829, "logits/rejected": -0.9036374688148499, "logps/chosen": -0.9455785751342773, "logps/rejected": -1.4809212684631348, "loss": 1.1347, "nll_loss": 1.0770692825317383, "rewards/accuracies": 0.625, "rewards/chosen": -0.09455785900354385, "rewards/margins": 0.05353426933288574, "rewards/rejected": -0.148092120885849, "step": 2365 }, { "epoch": 1.4433429922220529, "grad_norm": 1.919388771057129, "learning_rate": 5.711206368646662e-06, "log_odds_chosen": 1.8473355770111084, "log_odds_ratio": -0.2514527440071106, "logits/chosen": -0.5686947107315063, "logits/rejected": -0.6233241558074951, "logps/chosen": -0.5833421945571899, "logps/rejected": -1.78682279586792, "loss": 0.8764, "nll_loss": 0.7517216205596924, "rewards/accuracies": 0.875, "rewards/chosen": -0.05833422392606735, "rewards/margins": 0.12034805864095688, "rewards/rejected": -0.17868226766586304, "step": 2366 }, { "epoch": 1.4439530272990697, "grad_norm": 2.6703836917877197, "learning_rate": 5.710226576852418e-06, "log_odds_chosen": 1.5571047067642212, "log_odds_ratio": -0.3039594292640686, "logits/chosen": -0.7013258337974548, "logits/rejected": -0.687997043132782, "logps/chosen": -0.6242268085479736, "logps/rejected": -1.645617961883545, "loss": 1.1164, "nll_loss": 0.9429813623428345, "rewards/accuracies": 0.875, "rewards/chosen": -0.06242268532514572, "rewards/margins": 0.10213911533355713, "rewards/rejected": -0.16456180810928345, "step": 2367 }, { "epoch": 1.4445630623760866, "grad_norm": 0.9634466767311096, "learning_rate": 5.709246785058175e-06, "log_odds_chosen": 1.4063501358032227, "log_odds_ratio": -0.4814508259296417, "logits/chosen": -0.7429927587509155, "logits/rejected": -0.9179096817970276, "logps/chosen": -0.6445380449295044, "logps/rejected": -1.7312384843826294, "loss": 0.9951, "nll_loss": 0.7778205871582031, "rewards/accuracies": 0.625, "rewards/chosen": -0.06445381045341492, "rewards/margins": 0.10867004096508026, "rewards/rejected": -0.17312383651733398, "step": 2368 }, { "epoch": 1.4451730974531036, "grad_norm": 2.082319498062134, "learning_rate": 5.708266993263931e-06, "log_odds_chosen": 3.0431270599365234, "log_odds_ratio": -0.269785076379776, "logits/chosen": -0.7092055678367615, "logits/rejected": -0.8328689336776733, "logps/chosen": -0.6564176082611084, "logps/rejected": -3.1233882904052734, "loss": 0.9201, "nll_loss": 0.7882471680641174, "rewards/accuracies": 0.875, "rewards/chosen": -0.06564176082611084, "rewards/margins": 0.2466970682144165, "rewards/rejected": -0.31233882904052734, "step": 2369 }, { "epoch": 1.4457831325301205, "grad_norm": 1.2926158905029297, "learning_rate": 5.707287201469687e-06, "log_odds_chosen": 2.9097108840942383, "log_odds_ratio": -0.12576554715633392, "logits/chosen": -0.4757615029811859, "logits/rejected": -0.7193158864974976, "logps/chosen": -0.44978952407836914, "logps/rejected": -2.4801766872406006, "loss": 0.966, "nll_loss": 0.7895057797431946, "rewards/accuracies": 1.0, "rewards/chosen": -0.04497895389795303, "rewards/margins": 0.20303872227668762, "rewards/rejected": -0.24801766872406006, "step": 2370 }, { "epoch": 1.4463931676071375, "grad_norm": 1.7775014638900757, "learning_rate": 5.7063074096754435e-06, "log_odds_chosen": 0.9624242782592773, "log_odds_ratio": -0.38809406757354736, "logits/chosen": -0.9439576864242554, "logits/rejected": -0.8309004306793213, "logps/chosen": -0.8132913708686829, "logps/rejected": -1.420057773590088, "loss": 0.952, "nll_loss": 0.964489758014679, "rewards/accuracies": 0.875, "rewards/chosen": -0.08132913708686829, "rewards/margins": 0.06067664548754692, "rewards/rejected": -0.1420057862997055, "step": 2371 }, { "epoch": 1.4470032026841544, "grad_norm": 2.582139015197754, "learning_rate": 5.7053276178812e-06, "log_odds_chosen": 0.20246607065200806, "log_odds_ratio": -0.6233424544334412, "logits/chosen": -1.0150985717773438, "logits/rejected": -0.9018446207046509, "logps/chosen": -1.0427318811416626, "logps/rejected": -1.1853609085083008, "loss": 1.1581, "nll_loss": 1.1515557765960693, "rewards/accuracies": 0.5, "rewards/chosen": -0.10427318513393402, "rewards/margins": 0.014262914657592773, "rewards/rejected": -0.1185360997915268, "step": 2372 }, { "epoch": 1.4476132377611712, "grad_norm": 1.5600385665893555, "learning_rate": 5.704347826086956e-06, "log_odds_chosen": 2.117145538330078, "log_odds_ratio": -0.36033591628074646, "logits/chosen": -0.9219189286231995, "logits/rejected": -0.7950201034545898, "logps/chosen": -0.9223781824111938, "logps/rejected": -2.5368757247924805, "loss": 1.0363, "nll_loss": 1.1781091690063477, "rewards/accuracies": 0.75, "rewards/chosen": -0.09223783016204834, "rewards/margins": 0.16144973039627075, "rewards/rejected": -0.2536875605583191, "step": 2373 }, { "epoch": 1.448223272838188, "grad_norm": 1.668790340423584, "learning_rate": 5.703368034292713e-06, "log_odds_chosen": 1.766305685043335, "log_odds_ratio": -0.3019167184829712, "logits/chosen": -0.7940369844436646, "logits/rejected": -0.9081528186798096, "logps/chosen": -0.7832354307174683, "logps/rejected": -2.0761239528656006, "loss": 1.0581, "nll_loss": 0.9668650031089783, "rewards/accuracies": 1.0, "rewards/chosen": -0.07832354307174683, "rewards/margins": 0.12928885221481323, "rewards/rejected": -0.20761241018772125, "step": 2374 }, { "epoch": 1.4488333079152051, "grad_norm": 1.2200839519500732, "learning_rate": 5.702388242498469e-06, "log_odds_chosen": 1.027637004852295, "log_odds_ratio": -0.6308920979499817, "logits/chosen": -1.038395643234253, "logits/rejected": -0.8749858140945435, "logps/chosen": -0.8914446830749512, "logps/rejected": -1.588111400604248, "loss": 1.02, "nll_loss": 1.1493206024169922, "rewards/accuracies": 0.5, "rewards/chosen": -0.08914446830749512, "rewards/margins": 0.06966666877269745, "rewards/rejected": -0.15881113708019257, "step": 2375 }, { "epoch": 1.449443342992222, "grad_norm": 1.5807299613952637, "learning_rate": 5.701408450704226e-06, "log_odds_chosen": 1.3410944938659668, "log_odds_ratio": -0.5838567018508911, "logits/chosen": -0.9100257754325867, "logits/rejected": -0.8499830961227417, "logps/chosen": -0.8321108818054199, "logps/rejected": -1.882372260093689, "loss": 1.017, "nll_loss": 1.0871978998184204, "rewards/accuracies": 0.625, "rewards/chosen": -0.08321108669042587, "rewards/margins": 0.10502614080905914, "rewards/rejected": -0.18823722004890442, "step": 2376 }, { "epoch": 1.450053378069239, "grad_norm": 1.4508867263793945, "learning_rate": 5.700428658909981e-06, "log_odds_chosen": 0.7065391540527344, "log_odds_ratio": -0.6806613206863403, "logits/chosen": -0.9023609757423401, "logits/rejected": -0.7030925750732422, "logps/chosen": -1.2810280323028564, "logps/rejected": -1.8571652173995972, "loss": 1.127, "nll_loss": 1.37559175491333, "rewards/accuracies": 0.5, "rewards/chosen": -0.12810280919075012, "rewards/margins": 0.05761370807886124, "rewards/rejected": -0.18571650981903076, "step": 2377 }, { "epoch": 1.450663413146256, "grad_norm": 4.545784950256348, "learning_rate": 5.699448867115737e-06, "log_odds_chosen": 0.9223697185516357, "log_odds_ratio": -0.5821510553359985, "logits/chosen": -1.0128158330917358, "logits/rejected": -1.011059045791626, "logps/chosen": -0.7939817905426025, "logps/rejected": -1.3893078565597534, "loss": 1.0466, "nll_loss": 1.0244899988174438, "rewards/accuracies": 0.375, "rewards/chosen": -0.07939817756414413, "rewards/margins": 0.059532612562179565, "rewards/rejected": -0.1389307975769043, "step": 2378 }, { "epoch": 1.4512734482232728, "grad_norm": 1.6786243915557861, "learning_rate": 5.698469075321494e-06, "log_odds_chosen": 0.9189170002937317, "log_odds_ratio": -0.5211924910545349, "logits/chosen": -1.0347708463668823, "logits/rejected": -0.9863202571868896, "logps/chosen": -0.9938545823097229, "logps/rejected": -1.6543591022491455, "loss": 1.0967, "nll_loss": 1.3146969079971313, "rewards/accuracies": 0.625, "rewards/chosen": -0.09938546270132065, "rewards/margins": 0.0660504475235939, "rewards/rejected": -0.16543591022491455, "step": 2379 }, { "epoch": 1.4518834833002898, "grad_norm": 4.666314601898193, "learning_rate": 5.69748928352725e-06, "log_odds_chosen": 0.29064178466796875, "log_odds_ratio": -0.720687985420227, "logits/chosen": -0.8436869382858276, "logits/rejected": -0.8747325539588928, "logps/chosen": -0.9062188863754272, "logps/rejected": -1.0506665706634521, "loss": 1.1213, "nll_loss": 0.9699746370315552, "rewards/accuracies": 0.625, "rewards/chosen": -0.09062188118696213, "rewards/margins": 0.014444773085415363, "rewards/rejected": -0.10506665706634521, "step": 2380 }, { "epoch": 1.4524935183773067, "grad_norm": 3.672980546951294, "learning_rate": 5.6965094917330065e-06, "log_odds_chosen": 3.1518404483795166, "log_odds_ratio": -0.21734295785427094, "logits/chosen": -1.1112260818481445, "logits/rejected": -1.1361702680587769, "logps/chosen": -0.6653757095336914, "logps/rejected": -3.159501791000366, "loss": 1.1969, "nll_loss": 1.1752357482910156, "rewards/accuracies": 1.0, "rewards/chosen": -0.06653757393360138, "rewards/margins": 0.2494126260280609, "rewards/rejected": -0.3159501850605011, "step": 2381 }, { "epoch": 1.4531035534543237, "grad_norm": 1.2147471904754639, "learning_rate": 5.695529699938763e-06, "log_odds_chosen": 1.2373520135879517, "log_odds_ratio": -0.4782838821411133, "logits/chosen": -0.9655303359031677, "logits/rejected": -0.9614467620849609, "logps/chosen": -0.7334385514259338, "logps/rejected": -1.6908518075942993, "loss": 0.9959, "nll_loss": 0.8979222774505615, "rewards/accuracies": 0.625, "rewards/chosen": -0.07334385067224503, "rewards/margins": 0.09574133157730103, "rewards/rejected": -0.16908518970012665, "step": 2382 }, { "epoch": 1.4537135885313406, "grad_norm": 2.013897657394409, "learning_rate": 5.694549908144519e-06, "log_odds_chosen": 1.4038622379302979, "log_odds_ratio": -0.44425803422927856, "logits/chosen": -0.9188889861106873, "logits/rejected": -0.9651859402656555, "logps/chosen": -0.8296111822128296, "logps/rejected": -1.9899386167526245, "loss": 1.2055, "nll_loss": 1.015347957611084, "rewards/accuracies": 0.875, "rewards/chosen": -0.08296111226081848, "rewards/margins": 0.11603274941444397, "rewards/rejected": -0.19899387657642365, "step": 2383 }, { "epoch": 1.4543236236083574, "grad_norm": 1.9318978786468506, "learning_rate": 5.693570116350275e-06, "log_odds_chosen": 1.1192885637283325, "log_odds_ratio": -0.565017819404602, "logits/chosen": -1.0206000804901123, "logits/rejected": -0.8987245559692383, "logps/chosen": -1.2625690698623657, "logps/rejected": -2.200420618057251, "loss": 1.1038, "nll_loss": 1.2499324083328247, "rewards/accuracies": 0.75, "rewards/chosen": -0.12625691294670105, "rewards/margins": 0.09378515928983688, "rewards/rejected": -0.22004206478595734, "step": 2384 }, { "epoch": 1.4549336586853743, "grad_norm": 1.2689992189407349, "learning_rate": 5.692590324556032e-06, "log_odds_chosen": 0.43448957800865173, "log_odds_ratio": -0.7359696626663208, "logits/chosen": -0.9571198225021362, "logits/rejected": -1.1045295000076294, "logps/chosen": -1.0791423320770264, "logps/rejected": -1.471218466758728, "loss": 1.2266, "nll_loss": 1.2026716470718384, "rewards/accuracies": 0.5, "rewards/chosen": -0.10791423916816711, "rewards/margins": 0.039207614958286285, "rewards/rejected": -0.1471218466758728, "step": 2385 }, { "epoch": 1.4555436937623913, "grad_norm": 1.4821072816848755, "learning_rate": 5.691610532761788e-06, "log_odds_chosen": 0.8864818811416626, "log_odds_ratio": -0.41355186700820923, "logits/chosen": -0.8382216095924377, "logits/rejected": -0.7973566055297852, "logps/chosen": -0.8632866144180298, "logps/rejected": -1.4127670526504517, "loss": 1.1764, "nll_loss": 1.0882211923599243, "rewards/accuracies": 0.75, "rewards/chosen": -0.0863286629319191, "rewards/margins": 0.054948046803474426, "rewards/rejected": -0.14127670228481293, "step": 2386 }, { "epoch": 1.4561537288394082, "grad_norm": 2.336672782897949, "learning_rate": 5.690630740967545e-06, "log_odds_chosen": 0.7365424633026123, "log_odds_ratio": -0.6682365536689758, "logits/chosen": -1.129683494567871, "logits/rejected": -1.0195586681365967, "logps/chosen": -0.9458786249160767, "logps/rejected": -1.546191692352295, "loss": 1.1708, "nll_loss": 1.2665207386016846, "rewards/accuracies": 0.625, "rewards/chosen": -0.09458785504102707, "rewards/margins": 0.060031309723854065, "rewards/rejected": -0.15461915731430054, "step": 2387 }, { "epoch": 1.4567637639164253, "grad_norm": 1.0251127481460571, "learning_rate": 5.6896509491733e-06, "log_odds_chosen": 1.2156579494476318, "log_odds_ratio": -0.6261614561080933, "logits/chosen": -0.7513916492462158, "logits/rejected": -0.662096381187439, "logps/chosen": -0.9377646446228027, "logps/rejected": -1.7903547286987305, "loss": 0.9268, "nll_loss": 0.8740208148956299, "rewards/accuracies": 0.625, "rewards/chosen": -0.09377647191286087, "rewards/margins": 0.08525900542736053, "rewards/rejected": -0.17903545498847961, "step": 2388 }, { "epoch": 1.457373798993442, "grad_norm": 3.6487011909484863, "learning_rate": 5.688671157379056e-06, "log_odds_chosen": 0.759010910987854, "log_odds_ratio": -0.5862253308296204, "logits/chosen": -0.9386171102523804, "logits/rejected": -0.9389578104019165, "logps/chosen": -0.8811663389205933, "logps/rejected": -1.374758243560791, "loss": 1.1683, "nll_loss": 1.2704989910125732, "rewards/accuracies": 0.5, "rewards/chosen": -0.08811663091182709, "rewards/margins": 0.049359191209077835, "rewards/rejected": -0.13747583329677582, "step": 2389 }, { "epoch": 1.457983834070459, "grad_norm": 2.5307891368865967, "learning_rate": 5.687691365584813e-06, "log_odds_chosen": 0.8068573474884033, "log_odds_ratio": -0.5915623307228088, "logits/chosen": -0.729374349117279, "logits/rejected": -0.6924287676811218, "logps/chosen": -0.8240771293640137, "logps/rejected": -1.3906670808792114, "loss": 1.0441, "nll_loss": 1.019106388092041, "rewards/accuracies": 0.625, "rewards/chosen": -0.08240771293640137, "rewards/margins": 0.05665900558233261, "rewards/rejected": -0.13906671106815338, "step": 2390 }, { "epoch": 1.458593869147476, "grad_norm": 5.1717634201049805, "learning_rate": 5.686711573790569e-06, "log_odds_chosen": 1.0829949378967285, "log_odds_ratio": -0.4752713739871979, "logits/chosen": -0.7988039255142212, "logits/rejected": -0.7759058475494385, "logps/chosen": -0.7904109954833984, "logps/rejected": -1.4484078884124756, "loss": 0.9737, "nll_loss": 1.1379495859146118, "rewards/accuracies": 0.75, "rewards/chosen": -0.07904110103845596, "rewards/margins": 0.06579969078302383, "rewards/rejected": -0.1448407918214798, "step": 2391 }, { "epoch": 1.4592039042244929, "grad_norm": 1.3849372863769531, "learning_rate": 5.6857317819963255e-06, "log_odds_chosen": 0.09079223871231079, "log_odds_ratio": -0.6763738393783569, "logits/chosen": -0.9824151992797852, "logits/rejected": -0.9082952737808228, "logps/chosen": -0.9462427496910095, "logps/rejected": -1.0103676319122314, "loss": 1.1666, "nll_loss": 1.030322790145874, "rewards/accuracies": 0.75, "rewards/chosen": -0.09462427347898483, "rewards/margins": 0.006412481889128685, "rewards/rejected": -0.10103675723075867, "step": 2392 }, { "epoch": 1.45981393930151, "grad_norm": 1.5656081438064575, "learning_rate": 5.6847519902020825e-06, "log_odds_chosen": 1.7474623918533325, "log_odds_ratio": -0.43181806802749634, "logits/chosen": -0.7388595938682556, "logits/rejected": -0.7479391098022461, "logps/chosen": -0.741011381149292, "logps/rejected": -1.926903486251831, "loss": 0.9198, "nll_loss": 0.9712176322937012, "rewards/accuracies": 0.625, "rewards/chosen": -0.07410113513469696, "rewards/margins": 0.11858922243118286, "rewards/rejected": -0.19269034266471863, "step": 2393 }, { "epoch": 1.4604239743785268, "grad_norm": 2.143383741378784, "learning_rate": 5.683772198407838e-06, "log_odds_chosen": 0.6868656873703003, "log_odds_ratio": -0.48793864250183105, "logits/chosen": -0.9470247030258179, "logits/rejected": -0.826324462890625, "logps/chosen": -0.8259517550468445, "logps/rejected": -1.2775397300720215, "loss": 1.078, "nll_loss": 1.2543959617614746, "rewards/accuracies": 0.75, "rewards/chosen": -0.08259516954421997, "rewards/margins": 0.045158810913562775, "rewards/rejected": -0.12775398790836334, "step": 2394 }, { "epoch": 1.4610340094555436, "grad_norm": 1.1500129699707031, "learning_rate": 5.682792406613594e-06, "log_odds_chosen": 0.767892599105835, "log_odds_ratio": -0.5392211675643921, "logits/chosen": -0.5935094952583313, "logits/rejected": -0.6877840161323547, "logps/chosen": -0.6699512600898743, "logps/rejected": -1.1615350246429443, "loss": 1.0163, "nll_loss": 0.9047592878341675, "rewards/accuracies": 0.625, "rewards/chosen": -0.06699512153863907, "rewards/margins": 0.049158383160829544, "rewards/rejected": -0.11615350842475891, "step": 2395 }, { "epoch": 1.4616440445325607, "grad_norm": 1.8038685321807861, "learning_rate": 5.681812614819351e-06, "log_odds_chosen": -0.12521114945411682, "log_odds_ratio": -0.7829742431640625, "logits/chosen": -1.141299843788147, "logits/rejected": -1.061285376548767, "logps/chosen": -1.0659675598144531, "logps/rejected": -1.0135223865509033, "loss": 1.026, "nll_loss": 1.1709649562835693, "rewards/accuracies": 0.375, "rewards/chosen": -0.10659675300121307, "rewards/margins": -0.0052445209585130215, "rewards/rejected": -0.10135223716497421, "step": 2396 }, { "epoch": 1.4622540796095775, "grad_norm": 1.6855262517929077, "learning_rate": 5.680832823025107e-06, "log_odds_chosen": 1.5410029888153076, "log_odds_ratio": -0.41220682859420776, "logits/chosen": -0.9553978443145752, "logits/rejected": -0.9717707633972168, "logps/chosen": -0.8503206372261047, "logps/rejected": -2.0548508167266846, "loss": 1.1624, "nll_loss": 1.0843422412872314, "rewards/accuracies": 0.875, "rewards/chosen": -0.08503206074237823, "rewards/margins": 0.12045302987098694, "rewards/rejected": -0.20548509061336517, "step": 2397 }, { "epoch": 1.4628641146865946, "grad_norm": 1.3749319314956665, "learning_rate": 5.679853031230863e-06, "log_odds_chosen": 2.0395290851593018, "log_odds_ratio": -0.4261258840560913, "logits/chosen": -0.9536216855049133, "logits/rejected": -1.0077202320098877, "logps/chosen": -0.7710169553756714, "logps/rejected": -2.34196138381958, "loss": 1.09, "nll_loss": 1.045833706855774, "rewards/accuracies": 0.75, "rewards/chosen": -0.0771016925573349, "rewards/margins": 0.15709444880485535, "rewards/rejected": -0.23419614136219025, "step": 2398 }, { "epoch": 1.4634741497636115, "grad_norm": 3.066546678543091, "learning_rate": 5.67887323943662e-06, "log_odds_chosen": 1.5881708860397339, "log_odds_ratio": -0.345254123210907, "logits/chosen": -0.8182018995285034, "logits/rejected": -0.842755913734436, "logps/chosen": -0.6054115891456604, "logps/rejected": -1.5720255374908447, "loss": 0.9775, "nll_loss": 0.8472062349319458, "rewards/accuracies": 0.875, "rewards/chosen": -0.06054115295410156, "rewards/margins": 0.09666140377521515, "rewards/rejected": -0.1572025567293167, "step": 2399 }, { "epoch": 1.4640841848406283, "grad_norm": 1.3451340198516846, "learning_rate": 5.677893447642375e-06, "log_odds_chosen": 0.6871253848075867, "log_odds_ratio": -0.5016561150550842, "logits/chosen": -0.8062203526496887, "logits/rejected": -0.8093345165252686, "logps/chosen": -0.9097095131874084, "logps/rejected": -1.3362104892730713, "loss": 1.1468, "nll_loss": 1.0485508441925049, "rewards/accuracies": 0.75, "rewards/chosen": -0.0909709483385086, "rewards/margins": 0.04265010729432106, "rewards/rejected": -0.13362105190753937, "step": 2400 }, { "epoch": 1.4646942199176451, "grad_norm": 1.4235085248947144, "learning_rate": 5.6769136558481314e-06, "log_odds_chosen": 2.6562538146972656, "log_odds_ratio": -0.2159522920846939, "logits/chosen": -0.9871128797531128, "logits/rejected": -1.0344293117523193, "logps/chosen": -0.6781994104385376, "logps/rejected": -2.653221368789673, "loss": 1.0883, "nll_loss": 0.8337131142616272, "rewards/accuracies": 1.0, "rewards/chosen": -0.06781993806362152, "rewards/margins": 0.1975022256374359, "rewards/rejected": -0.26532214879989624, "step": 2401 }, { "epoch": 1.4653042549946622, "grad_norm": 2.0379891395568848, "learning_rate": 5.675933864053888e-06, "log_odds_chosen": 1.6428413391113281, "log_odds_ratio": -0.5973062515258789, "logits/chosen": -0.9185023307800293, "logits/rejected": -0.8843647241592407, "logps/chosen": -0.7575834393501282, "logps/rejected": -2.0090126991271973, "loss": 1.0696, "nll_loss": 1.0199501514434814, "rewards/accuracies": 0.625, "rewards/chosen": -0.07575834542512894, "rewards/margins": 0.12514294683933258, "rewards/rejected": -0.20090128481388092, "step": 2402 }, { "epoch": 1.465914290071679, "grad_norm": 1.3263030052185059, "learning_rate": 5.6749540722596445e-06, "log_odds_chosen": 1.4666626453399658, "log_odds_ratio": -0.5331543684005737, "logits/chosen": -1.0025538206100464, "logits/rejected": -0.9218178987503052, "logps/chosen": -0.9291096925735474, "logps/rejected": -2.1931800842285156, "loss": 1.0139, "nll_loss": 1.135535717010498, "rewards/accuracies": 0.5, "rewards/chosen": -0.09291097521781921, "rewards/margins": 0.12640705704689026, "rewards/rejected": -0.21931803226470947, "step": 2403 }, { "epoch": 1.4665243251486961, "grad_norm": 2.1502034664154053, "learning_rate": 5.6739742804654015e-06, "log_odds_chosen": 0.9834445714950562, "log_odds_ratio": -0.5200545191764832, "logits/chosen": -0.8406655788421631, "logits/rejected": -0.8777607679367065, "logps/chosen": -0.7261959910392761, "logps/rejected": -1.4051008224487305, "loss": 1.0955, "nll_loss": 1.0406012535095215, "rewards/accuracies": 0.625, "rewards/chosen": -0.07261960208415985, "rewards/margins": 0.0678904801607132, "rewards/rejected": -0.14051008224487305, "step": 2404 }, { "epoch": 1.467134360225713, "grad_norm": 1.7393583059310913, "learning_rate": 5.672994488671157e-06, "log_odds_chosen": 2.4135546684265137, "log_odds_ratio": -0.3360852301120758, "logits/chosen": -0.9346811771392822, "logits/rejected": -0.8886123895645142, "logps/chosen": -1.175499439239502, "logps/rejected": -3.1897969245910645, "loss": 1.1237, "nll_loss": 1.2939364910125732, "rewards/accuracies": 0.625, "rewards/chosen": -0.11754994839429855, "rewards/margins": 0.20142972469329834, "rewards/rejected": -0.3189796805381775, "step": 2405 }, { "epoch": 1.4677443953027298, "grad_norm": 8.220067024230957, "learning_rate": 5.672014696876913e-06, "log_odds_chosen": 1.0445778369903564, "log_odds_ratio": -0.46126919984817505, "logits/chosen": -0.9573872685432434, "logits/rejected": -0.9653612375259399, "logps/chosen": -0.878169596195221, "logps/rejected": -1.5951170921325684, "loss": 1.1663, "nll_loss": 1.1395126581192017, "rewards/accuracies": 0.875, "rewards/chosen": -0.08781696110963821, "rewards/margins": 0.0716947540640831, "rewards/rejected": -0.1595117151737213, "step": 2406 }, { "epoch": 1.4683544303797469, "grad_norm": 3.35443377494812, "learning_rate": 5.67103490508267e-06, "log_odds_chosen": 0.5699759721755981, "log_odds_ratio": -0.5419033765792847, "logits/chosen": -0.8602370023727417, "logits/rejected": -0.9010392427444458, "logps/chosen": -0.9034501314163208, "logps/rejected": -1.270571231842041, "loss": 1.0521, "nll_loss": 1.012154221534729, "rewards/accuracies": 0.75, "rewards/chosen": -0.09034501016139984, "rewards/margins": 0.03671211749315262, "rewards/rejected": -0.12705713510513306, "step": 2407 }, { "epoch": 1.4689644654567637, "grad_norm": 1.805701732635498, "learning_rate": 5.670055113288426e-06, "log_odds_chosen": 1.2346296310424805, "log_odds_ratio": -0.5416170954704285, "logits/chosen": -0.84445720911026, "logits/rejected": -0.8483217358589172, "logps/chosen": -0.8693361878395081, "logps/rejected": -1.820110559463501, "loss": 1.0031, "nll_loss": 0.9730386137962341, "rewards/accuracies": 0.5, "rewards/chosen": -0.08693361282348633, "rewards/margins": 0.09507744759321213, "rewards/rejected": -0.18201106786727905, "step": 2408 }, { "epoch": 1.4695745005337808, "grad_norm": 2.1089556217193604, "learning_rate": 5.669075321494182e-06, "log_odds_chosen": 1.548673391342163, "log_odds_ratio": -0.3620005249977112, "logits/chosen": -0.6419262886047363, "logits/rejected": -0.8751057982444763, "logps/chosen": -0.8449109792709351, "logps/rejected": -2.0428545475006104, "loss": 1.2588, "nll_loss": 1.1464349031448364, "rewards/accuracies": 0.875, "rewards/chosen": -0.08449109643697739, "rewards/margins": 0.11979436129331589, "rewards/rejected": -0.20428545773029327, "step": 2409 }, { "epoch": 1.4701845356107976, "grad_norm": 1.1283413171768188, "learning_rate": 5.668095529699939e-06, "log_odds_chosen": 1.5621626377105713, "log_odds_ratio": -0.5137041807174683, "logits/chosen": -0.8044814467430115, "logits/rejected": -0.7514818906784058, "logps/chosen": -0.6562075614929199, "logps/rejected": -1.7395837306976318, "loss": 1.1332, "nll_loss": 0.8590380549430847, "rewards/accuracies": 0.625, "rewards/chosen": -0.06562075763940811, "rewards/margins": 0.10833762586116791, "rewards/rejected": -0.17395837604999542, "step": 2410 }, { "epoch": 1.4707945706878145, "grad_norm": 1.1775716543197632, "learning_rate": 5.667115737905694e-06, "log_odds_chosen": 1.753678798675537, "log_odds_ratio": -0.4827895760536194, "logits/chosen": -0.7098923921585083, "logits/rejected": -0.763947606086731, "logps/chosen": -0.49691087007522583, "logps/rejected": -1.506868600845337, "loss": 0.9416, "nll_loss": 0.8819037675857544, "rewards/accuracies": 0.625, "rewards/chosen": -0.049691092222929, "rewards/margins": 0.10099577158689499, "rewards/rejected": -0.15068687498569489, "step": 2411 }, { "epoch": 1.4714046057648313, "grad_norm": 2.771256446838379, "learning_rate": 5.6661359461114505e-06, "log_odds_chosen": 1.4708153009414673, "log_odds_ratio": -0.40428802371025085, "logits/chosen": -0.8721907138824463, "logits/rejected": -0.8917276859283447, "logps/chosen": -0.9416942596435547, "logps/rejected": -2.026874542236328, "loss": 0.988, "nll_loss": 0.9398423433303833, "rewards/accuracies": 0.75, "rewards/chosen": -0.09416942298412323, "rewards/margins": 0.10851802676916122, "rewards/rejected": -0.20268745720386505, "step": 2412 }, { "epoch": 1.4720146408418484, "grad_norm": 1.0357474088668823, "learning_rate": 5.6651561543172075e-06, "log_odds_chosen": 2.3779897689819336, "log_odds_ratio": -0.23844876885414124, "logits/chosen": -0.9471707344055176, "logits/rejected": -0.9885196089744568, "logps/chosen": -0.7498331665992737, "logps/rejected": -2.463780164718628, "loss": 1.0592, "nll_loss": 1.18943452835083, "rewards/accuracies": 1.0, "rewards/chosen": -0.07498331367969513, "rewards/margins": 0.1713947206735611, "rewards/rejected": -0.24637803435325623, "step": 2413 }, { "epoch": 1.4726246759188653, "grad_norm": 2.612658977508545, "learning_rate": 5.6641763625229636e-06, "log_odds_chosen": 1.2708799839019775, "log_odds_ratio": -0.5338738560676575, "logits/chosen": -0.9227899312973022, "logits/rejected": -0.8633881211280823, "logps/chosen": -1.0374094247817993, "logps/rejected": -1.9680240154266357, "loss": 1.2432, "nll_loss": 1.2696301937103271, "rewards/accuracies": 0.625, "rewards/chosen": -0.10374095290899277, "rewards/margins": 0.09306144714355469, "rewards/rejected": -0.19680239260196686, "step": 2414 }, { "epoch": 1.4732347109958823, "grad_norm": 1.3194775581359863, "learning_rate": 5.6631965707287205e-06, "log_odds_chosen": 0.8707935214042664, "log_odds_ratio": -0.46910396218299866, "logits/chosen": -0.8125808238983154, "logits/rejected": -0.8562300205230713, "logps/chosen": -0.7129109501838684, "logps/rejected": -1.3022339344024658, "loss": 0.9724, "nll_loss": 0.8520782589912415, "rewards/accuracies": 0.75, "rewards/chosen": -0.07129110395908356, "rewards/margins": 0.05893229320645332, "rewards/rejected": -0.13022339344024658, "step": 2415 }, { "epoch": 1.4738447460728992, "grad_norm": 0.9825142621994019, "learning_rate": 5.662216778934477e-06, "log_odds_chosen": 1.3092341423034668, "log_odds_ratio": -0.4609508514404297, "logits/chosen": -0.8475406169891357, "logits/rejected": -0.8958883285522461, "logps/chosen": -0.8235808610916138, "logps/rejected": -1.7835785150527954, "loss": 1.055, "nll_loss": 0.9587126970291138, "rewards/accuracies": 0.625, "rewards/chosen": -0.08235808461904526, "rewards/margins": 0.09599975496530533, "rewards/rejected": -0.17835785448551178, "step": 2416 }, { "epoch": 1.474454781149916, "grad_norm": 2.2610485553741455, "learning_rate": 5.661236987140232e-06, "log_odds_chosen": 1.4990593194961548, "log_odds_ratio": -0.32912224531173706, "logits/chosen": -0.8147575855255127, "logits/rejected": -0.7938371896743774, "logps/chosen": -0.7407137751579285, "logps/rejected": -1.7823255062103271, "loss": 1.1782, "nll_loss": 0.991600513458252, "rewards/accuracies": 0.75, "rewards/chosen": -0.07407138496637344, "rewards/margins": 0.10416116565465927, "rewards/rejected": -0.17823255062103271, "step": 2417 }, { "epoch": 1.475064816226933, "grad_norm": 2.9059276580810547, "learning_rate": 5.660257195345989e-06, "log_odds_chosen": 0.9112158417701721, "log_odds_ratio": -0.5709037780761719, "logits/chosen": -0.7408450841903687, "logits/rejected": -0.6701516509056091, "logps/chosen": -1.0046582221984863, "logps/rejected": -1.6206793785095215, "loss": 1.1345, "nll_loss": 1.07252037525177, "rewards/accuracies": 0.625, "rewards/chosen": -0.1004658117890358, "rewards/margins": 0.06160213053226471, "rewards/rejected": -0.1620679497718811, "step": 2418 }, { "epoch": 1.47567485130395, "grad_norm": 2.142646551132202, "learning_rate": 5.659277403551745e-06, "log_odds_chosen": 1.6398205757141113, "log_odds_ratio": -0.3736706078052521, "logits/chosen": -0.8408427238464355, "logits/rejected": -0.8915632367134094, "logps/chosen": -0.6938611268997192, "logps/rejected": -1.8167998790740967, "loss": 1.1187, "nll_loss": 0.9196740984916687, "rewards/accuracies": 0.875, "rewards/chosen": -0.06938610970973969, "rewards/margins": 0.11229385435581207, "rewards/rejected": -0.18167997896671295, "step": 2419 }, { "epoch": 1.476284886380967, "grad_norm": 1.7533916234970093, "learning_rate": 5.658297611757501e-06, "log_odds_chosen": 0.5191090106964111, "log_odds_ratio": -0.705108642578125, "logits/chosen": -0.9560015201568604, "logits/rejected": -0.9636672735214233, "logps/chosen": -1.1193809509277344, "logps/rejected": -1.4640833139419556, "loss": 1.2257, "nll_loss": 1.094653844833374, "rewards/accuracies": 0.5, "rewards/chosen": -0.11193808913230896, "rewards/margins": 0.03447023406624794, "rewards/rejected": -0.1464083194732666, "step": 2420 }, { "epoch": 1.4768949214579838, "grad_norm": 1.7418501377105713, "learning_rate": 5.657317819963258e-06, "log_odds_chosen": -0.08338403701782227, "log_odds_ratio": -0.9389329552650452, "logits/chosen": -0.936488926410675, "logits/rejected": -0.8733192682266235, "logps/chosen": -1.2997496128082275, "logps/rejected": -1.2801899909973145, "loss": 1.2628, "nll_loss": 1.2430516481399536, "rewards/accuracies": 0.375, "rewards/chosen": -0.12997496128082275, "rewards/margins": -0.0019559748470783234, "rewards/rejected": -0.12801899015903473, "step": 2421 }, { "epoch": 1.4775049565350007, "grad_norm": 4.888741493225098, "learning_rate": 5.656338028169013e-06, "log_odds_chosen": 0.6704644560813904, "log_odds_ratio": -0.4986487925052643, "logits/chosen": -0.7603607773780823, "logits/rejected": -0.7588375210762024, "logps/chosen": -0.7934552431106567, "logps/rejected": -1.1656527519226074, "loss": 1.0537, "nll_loss": 1.0263924598693848, "rewards/accuracies": 0.75, "rewards/chosen": -0.07934553176164627, "rewards/margins": 0.03721974045038223, "rewards/rejected": -0.1165652722120285, "step": 2422 }, { "epoch": 1.4781149916120178, "grad_norm": 1.7424728870391846, "learning_rate": 5.6553582363747695e-06, "log_odds_chosen": 1.0410372018814087, "log_odds_ratio": -0.5965257287025452, "logits/chosen": -1.0732018947601318, "logits/rejected": -0.8458385467529297, "logps/chosen": -1.059332013130188, "logps/rejected": -2.0077247619628906, "loss": 1.1123, "nll_loss": 1.2260253429412842, "rewards/accuracies": 0.5, "rewards/chosen": -0.10593320429325104, "rewards/margins": 0.09483926743268967, "rewards/rejected": -0.2007724642753601, "step": 2423 }, { "epoch": 1.4787250266890346, "grad_norm": 2.1782145500183105, "learning_rate": 5.6543784445805265e-06, "log_odds_chosen": 0.3312355577945709, "log_odds_ratio": -0.7163265943527222, "logits/chosen": -1.1702924966812134, "logits/rejected": -0.9849736094474792, "logps/chosen": -1.2034423351287842, "logps/rejected": -1.457878589630127, "loss": 1.1236, "nll_loss": 1.4140639305114746, "rewards/accuracies": 0.5, "rewards/chosen": -0.12034422159194946, "rewards/margins": 0.025443634018301964, "rewards/rejected": -0.14578786492347717, "step": 2424 }, { "epoch": 1.4793350617660517, "grad_norm": 1.948724627494812, "learning_rate": 5.653398652786283e-06, "log_odds_chosen": 0.6871563196182251, "log_odds_ratio": -0.529737114906311, "logits/chosen": -1.0130125284194946, "logits/rejected": -0.9355250597000122, "logps/chosen": -0.7159390449523926, "logps/rejected": -1.0877553224563599, "loss": 1.0964, "nll_loss": 1.1234415769577026, "rewards/accuracies": 0.75, "rewards/chosen": -0.07159390300512314, "rewards/margins": 0.03718164563179016, "rewards/rejected": -0.1087755411863327, "step": 2425 }, { "epoch": 1.4799450968430685, "grad_norm": 1.5475177764892578, "learning_rate": 5.652418860992039e-06, "log_odds_chosen": 2.022444725036621, "log_odds_ratio": -0.4656362533569336, "logits/chosen": -0.8837157487869263, "logits/rejected": -0.9745906591415405, "logps/chosen": -0.5550538897514343, "logps/rejected": -2.1988935470581055, "loss": 1.1038, "nll_loss": 0.9399548768997192, "rewards/accuracies": 0.625, "rewards/chosen": -0.05550539121031761, "rewards/margins": 0.16438397765159607, "rewards/rejected": -0.21988937258720398, "step": 2426 }, { "epoch": 1.4805551319200854, "grad_norm": 1.3966028690338135, "learning_rate": 5.651439069197796e-06, "log_odds_chosen": 1.56671941280365, "log_odds_ratio": -0.28547579050064087, "logits/chosen": -0.9094043970108032, "logits/rejected": -0.8189091086387634, "logps/chosen": -0.8618515133857727, "logps/rejected": -2.051980972290039, "loss": 1.0413, "nll_loss": 1.023411750793457, "rewards/accuracies": 1.0, "rewards/chosen": -0.08618514984846115, "rewards/margins": 0.11901293694972992, "rewards/rejected": -0.20519809424877167, "step": 2427 }, { "epoch": 1.4811651669971022, "grad_norm": 1.9895342588424683, "learning_rate": 5.650459277403551e-06, "log_odds_chosen": 1.8556948900222778, "log_odds_ratio": -0.42414674162864685, "logits/chosen": -0.7692582607269287, "logits/rejected": -0.8579679727554321, "logps/chosen": -0.5383148193359375, "logps/rejected": -1.8486740589141846, "loss": 0.9428, "nll_loss": 0.6806387305259705, "rewards/accuracies": 0.875, "rewards/chosen": -0.05383148044347763, "rewards/margins": 0.1310359388589859, "rewards/rejected": -0.18486741185188293, "step": 2428 }, { "epoch": 1.4817752020741193, "grad_norm": 1.340840220451355, "learning_rate": 5.649479485609308e-06, "log_odds_chosen": 0.5664774179458618, "log_odds_ratio": -0.6230307817459106, "logits/chosen": -0.792241096496582, "logits/rejected": -0.7912318110466003, "logps/chosen": -0.7892841100692749, "logps/rejected": -1.07563054561615, "loss": 1.0266, "nll_loss": 1.1342189311981201, "rewards/accuracies": 0.5, "rewards/chosen": -0.07892841100692749, "rewards/margins": 0.02863464504480362, "rewards/rejected": -0.10756304860115051, "step": 2429 }, { "epoch": 1.4823852371511361, "grad_norm": 3.202913999557495, "learning_rate": 5.648499693815064e-06, "log_odds_chosen": 1.5651819705963135, "log_odds_ratio": -0.4203501343727112, "logits/chosen": -1.0400818586349487, "logits/rejected": -0.9106249809265137, "logps/chosen": -0.9829927682876587, "logps/rejected": -2.177201271057129, "loss": 1.0869, "nll_loss": 1.181950330734253, "rewards/accuracies": 0.75, "rewards/chosen": -0.09829927980899811, "rewards/margins": 0.11942087113857269, "rewards/rejected": -0.2177201360464096, "step": 2430 }, { "epoch": 1.4829952722281532, "grad_norm": 1.7214431762695312, "learning_rate": 5.64751990202082e-06, "log_odds_chosen": 1.4577460289001465, "log_odds_ratio": -0.3360913395881653, "logits/chosen": -0.7907562255859375, "logits/rejected": -0.7797807455062866, "logps/chosen": -0.6888304948806763, "logps/rejected": -1.6972898244857788, "loss": 1.1737, "nll_loss": 0.936485767364502, "rewards/accuracies": 1.0, "rewards/chosen": -0.06888304650783539, "rewards/margins": 0.10084593296051025, "rewards/rejected": -0.16972897946834564, "step": 2431 }, { "epoch": 1.48360530730517, "grad_norm": 1.0720868110656738, "learning_rate": 5.646540110226577e-06, "log_odds_chosen": 1.6188610792160034, "log_odds_ratio": -0.5436573028564453, "logits/chosen": -0.8368492722511292, "logits/rejected": -0.8935444355010986, "logps/chosen": -0.6986021995544434, "logps/rejected": -1.7675886154174805, "loss": 1.0347, "nll_loss": 0.9325059652328491, "rewards/accuracies": 0.625, "rewards/chosen": -0.06986022740602493, "rewards/margins": 0.10689864307641983, "rewards/rejected": -0.17675887048244476, "step": 2432 }, { "epoch": 1.4842153423821869, "grad_norm": 9.408245086669922, "learning_rate": 5.645560318432333e-06, "log_odds_chosen": 1.0875235795974731, "log_odds_ratio": -0.5272321701049805, "logits/chosen": -0.6805660724639893, "logits/rejected": -0.9048939943313599, "logps/chosen": -0.8049923181533813, "logps/rejected": -1.591433048248291, "loss": 1.0545, "nll_loss": 0.9598747491836548, "rewards/accuracies": 0.625, "rewards/chosen": -0.08049923181533813, "rewards/margins": 0.07864405959844589, "rewards/rejected": -0.15914329886436462, "step": 2433 }, { "epoch": 1.484825377459204, "grad_norm": 4.345757484436035, "learning_rate": 5.6445805266380886e-06, "log_odds_chosen": 0.9453359842300415, "log_odds_ratio": -0.6505447626113892, "logits/chosen": -0.8215595483779907, "logits/rejected": -0.8442747592926025, "logps/chosen": -0.8609486818313599, "logps/rejected": -1.6339879035949707, "loss": 1.2201, "nll_loss": 1.0655714273452759, "rewards/accuracies": 0.625, "rewards/chosen": -0.08609487116336823, "rewards/margins": 0.0773039162158966, "rewards/rejected": -0.16339880228042603, "step": 2434 }, { "epoch": 1.4854354125362208, "grad_norm": 1.1878646612167358, "learning_rate": 5.6436007348438455e-06, "log_odds_chosen": 1.8309428691864014, "log_odds_ratio": -0.4524855613708496, "logits/chosen": -0.7607269287109375, "logits/rejected": -0.6855349540710449, "logps/chosen": -0.7865053415298462, "logps/rejected": -2.1708858013153076, "loss": 1.0727, "nll_loss": 0.907505452632904, "rewards/accuracies": 0.75, "rewards/chosen": -0.07865054160356522, "rewards/margins": 0.13843804597854614, "rewards/rejected": -0.21708858013153076, "step": 2435 }, { "epoch": 1.4860454476132379, "grad_norm": 3.9865641593933105, "learning_rate": 5.642620943049602e-06, "log_odds_chosen": 2.665092945098877, "log_odds_ratio": -0.28322046995162964, "logits/chosen": -0.6488205790519714, "logits/rejected": -0.7541371583938599, "logps/chosen": -0.5984632968902588, "logps/rejected": -2.7100017070770264, "loss": 0.9837, "nll_loss": 0.9730250239372253, "rewards/accuracies": 0.875, "rewards/chosen": -0.05984632670879364, "rewards/margins": 0.21115383505821228, "rewards/rejected": -0.2710001766681671, "step": 2436 }, { "epoch": 1.4866554826902547, "grad_norm": 1.4820218086242676, "learning_rate": 5.641641151255358e-06, "log_odds_chosen": 0.6350419521331787, "log_odds_ratio": -0.541602611541748, "logits/chosen": -0.816063404083252, "logits/rejected": -0.7947125434875488, "logps/chosen": -0.7954459190368652, "logps/rejected": -1.2253859043121338, "loss": 1.1396, "nll_loss": 0.9649029970169067, "rewards/accuracies": 0.75, "rewards/chosen": -0.07954459637403488, "rewards/margins": 0.04299398511648178, "rewards/rejected": -0.12253858894109726, "step": 2437 }, { "epoch": 1.4872655177672716, "grad_norm": 3.0392627716064453, "learning_rate": 5.640661359461115e-06, "log_odds_chosen": 0.5470753908157349, "log_odds_ratio": -0.5983891487121582, "logits/chosen": -0.8548875451087952, "logits/rejected": -0.793736457824707, "logps/chosen": -0.9997090101242065, "logps/rejected": -1.368727445602417, "loss": 1.0669, "nll_loss": 1.0332105159759521, "rewards/accuracies": 0.625, "rewards/chosen": -0.09997090697288513, "rewards/margins": 0.03690185025334358, "rewards/rejected": -0.13687275350093842, "step": 2438 }, { "epoch": 1.4878755528442886, "grad_norm": 1.6528443098068237, "learning_rate": 5.639681567666871e-06, "log_odds_chosen": 0.37148353457450867, "log_odds_ratio": -0.5883814692497253, "logits/chosen": -0.8606992363929749, "logits/rejected": -0.7893591523170471, "logps/chosen": -0.8702883124351501, "logps/rejected": -1.0692028999328613, "loss": 1.1731, "nll_loss": 1.0369855165481567, "rewards/accuracies": 0.625, "rewards/chosen": -0.08702883124351501, "rewards/margins": 0.01989145576953888, "rewards/rejected": -0.1069202870130539, "step": 2439 }, { "epoch": 1.4884855879213055, "grad_norm": 1.9940078258514404, "learning_rate": 5.638701775872626e-06, "log_odds_chosen": 0.9603890776634216, "log_odds_ratio": -0.5413775444030762, "logits/chosen": -0.8261222839355469, "logits/rejected": -0.934895396232605, "logps/chosen": -0.7669447660446167, "logps/rejected": -1.3977853059768677, "loss": 1.1903, "nll_loss": 1.02480149269104, "rewards/accuracies": 0.75, "rewards/chosen": -0.07669448107481003, "rewards/margins": 0.06308405101299286, "rewards/rejected": -0.13977853953838348, "step": 2440 }, { "epoch": 1.4890956229983223, "grad_norm": 2.181445598602295, "learning_rate": 5.637721984078383e-06, "log_odds_chosen": 1.4642550945281982, "log_odds_ratio": -0.38679245114326477, "logits/chosen": -0.7091372013092041, "logits/rejected": -0.774273157119751, "logps/chosen": -0.7611209154129028, "logps/rejected": -1.8256961107254028, "loss": 0.9725, "nll_loss": 0.9885587692260742, "rewards/accuracies": 0.75, "rewards/chosen": -0.07611209154129028, "rewards/margins": 0.10645753145217896, "rewards/rejected": -0.18256962299346924, "step": 2441 }, { "epoch": 1.4897056580753394, "grad_norm": 1.2961082458496094, "learning_rate": 5.636742192284139e-06, "log_odds_chosen": 0.5775202512741089, "log_odds_ratio": -0.5782397985458374, "logits/chosen": -0.9052743911743164, "logits/rejected": -0.87040114402771, "logps/chosen": -1.026760220527649, "logps/rejected": -1.430873155593872, "loss": 1.117, "nll_loss": 1.1813011169433594, "rewards/accuracies": 0.5, "rewards/chosen": -0.10267601907253265, "rewards/margins": 0.04041130095720291, "rewards/rejected": -0.14308732748031616, "step": 2442 }, { "epoch": 1.4903156931523562, "grad_norm": 4.167413711547852, "learning_rate": 5.635762400489896e-06, "log_odds_chosen": 1.853622317314148, "log_odds_ratio": -0.30012422800064087, "logits/chosen": -0.5407915115356445, "logits/rejected": -0.6998962759971619, "logps/chosen": -0.6225944757461548, "logps/rejected": -1.8897981643676758, "loss": 0.9581, "nll_loss": 0.7036458849906921, "rewards/accuracies": 1.0, "rewards/chosen": -0.06225945055484772, "rewards/margins": 0.1267203688621521, "rewards/rejected": -0.18897980451583862, "step": 2443 }, { "epoch": 1.490925728229373, "grad_norm": 1.2498396635055542, "learning_rate": 5.634782608695652e-06, "log_odds_chosen": 0.07998239994049072, "log_odds_ratio": -0.8806940913200378, "logits/chosen": -1.0626300573349, "logits/rejected": -0.9072905778884888, "logps/chosen": -1.030457854270935, "logps/rejected": -1.1652474403381348, "loss": 1.2473, "nll_loss": 1.1871609687805176, "rewards/accuracies": 0.375, "rewards/chosen": -0.10304579138755798, "rewards/margins": 0.01347893476486206, "rewards/rejected": -0.11652473360300064, "step": 2444 }, { "epoch": 1.4915357633063901, "grad_norm": 1.7162089347839355, "learning_rate": 5.633802816901408e-06, "log_odds_chosen": 1.1455302238464355, "log_odds_ratio": -0.41670966148376465, "logits/chosen": -0.9379512071609497, "logits/rejected": -0.8897534608840942, "logps/chosen": -0.8378008604049683, "logps/rejected": -1.7002018690109253, "loss": 1.0876, "nll_loss": 1.1591370105743408, "rewards/accuracies": 0.75, "rewards/chosen": -0.08378008008003235, "rewards/margins": 0.08624010533094406, "rewards/rejected": -0.170020192861557, "step": 2445 }, { "epoch": 1.492145798383407, "grad_norm": 0.982334315776825, "learning_rate": 5.6328230251071646e-06, "log_odds_chosen": 2.73911452293396, "log_odds_ratio": -0.38706955313682556, "logits/chosen": -0.8760119676589966, "logits/rejected": -0.843203067779541, "logps/chosen": -0.7941638827323914, "logps/rejected": -3.0375399589538574, "loss": 1.0047, "nll_loss": 1.026258945465088, "rewards/accuracies": 0.75, "rewards/chosen": -0.07941639423370361, "rewards/margins": 0.2243376076221466, "rewards/rejected": -0.3037540018558502, "step": 2446 }, { "epoch": 1.492755833460424, "grad_norm": 1.4053170680999756, "learning_rate": 5.631843233312921e-06, "log_odds_chosen": 1.7422865629196167, "log_odds_ratio": -0.5489007830619812, "logits/chosen": -0.9769712686538696, "logits/rejected": -0.8549410104751587, "logps/chosen": -0.789635419845581, "logps/rejected": -2.2221975326538086, "loss": 1.0441, "nll_loss": 0.9796972870826721, "rewards/accuracies": 0.625, "rewards/chosen": -0.07896354794502258, "rewards/margins": 0.14325623214244843, "rewards/rejected": -0.22221976518630981, "step": 2447 }, { "epoch": 1.493365868537441, "grad_norm": 1.364544153213501, "learning_rate": 5.630863441518677e-06, "log_odds_chosen": 1.6065611839294434, "log_odds_ratio": -0.27602875232696533, "logits/chosen": -0.7004554271697998, "logits/rejected": -0.7258118391036987, "logps/chosen": -0.5426563024520874, "logps/rejected": -1.5667791366577148, "loss": 0.9539, "nll_loss": 0.740198016166687, "rewards/accuracies": 0.875, "rewards/chosen": -0.05426563322544098, "rewards/margins": 0.10241226851940155, "rewards/rejected": -0.15667790174484253, "step": 2448 }, { "epoch": 1.4939759036144578, "grad_norm": 1.912410855293274, "learning_rate": 5.629883649724434e-06, "log_odds_chosen": 1.249336838722229, "log_odds_ratio": -0.6372274160385132, "logits/chosen": -0.740175724029541, "logits/rejected": -0.7351844310760498, "logps/chosen": -0.6842795014381409, "logps/rejected": -1.6430730819702148, "loss": 0.9915, "nll_loss": 0.7929335832595825, "rewards/accuracies": 0.5, "rewards/chosen": -0.06842795014381409, "rewards/margins": 0.09587936103343964, "rewards/rejected": -0.16430732607841492, "step": 2449 }, { "epoch": 1.4945859386914748, "grad_norm": 1.6858162879943848, "learning_rate": 5.62890385793019e-06, "log_odds_chosen": 1.3917434215545654, "log_odds_ratio": -0.5552867650985718, "logits/chosen": -1.1043930053710938, "logits/rejected": -0.9028702974319458, "logps/chosen": -0.8998458385467529, "logps/rejected": -2.043644666671753, "loss": 1.1427, "nll_loss": 1.0457141399383545, "rewards/accuracies": 0.5, "rewards/chosen": -0.08998458087444305, "rewards/margins": 0.1143798753619194, "rewards/rejected": -0.20436446368694305, "step": 2450 }, { "epoch": 1.4951959737684917, "grad_norm": 3.3206307888031006, "learning_rate": 5.627924066135945e-06, "log_odds_chosen": 1.5460419654846191, "log_odds_ratio": -0.4527661204338074, "logits/chosen": -0.8970120549201965, "logits/rejected": -0.8677760362625122, "logps/chosen": -1.0933417081832886, "logps/rejected": -2.2936863899230957, "loss": 1.2758, "nll_loss": 1.3620094060897827, "rewards/accuracies": 0.875, "rewards/chosen": -0.10933417826890945, "rewards/margins": 0.12003447860479355, "rewards/rejected": -0.2293686419725418, "step": 2451 }, { "epoch": 1.4958060088455087, "grad_norm": 1.9590115547180176, "learning_rate": 5.626944274341702e-06, "log_odds_chosen": 1.1283522844314575, "log_odds_ratio": -0.673829972743988, "logits/chosen": -0.8329994082450867, "logits/rejected": -0.8822349309921265, "logps/chosen": -0.9276076555252075, "logps/rejected": -1.8757340908050537, "loss": 1.0856, "nll_loss": 1.186639428138733, "rewards/accuracies": 0.5, "rewards/chosen": -0.09276077151298523, "rewards/margins": 0.09481265395879745, "rewards/rejected": -0.1875734031200409, "step": 2452 }, { "epoch": 1.4964160439225256, "grad_norm": 3.7700743675231934, "learning_rate": 5.625964482547458e-06, "log_odds_chosen": 1.345693826675415, "log_odds_ratio": -0.4563818573951721, "logits/chosen": -0.948756217956543, "logits/rejected": -0.8657381534576416, "logps/chosen": -0.8064419031143188, "logps/rejected": -1.7820569276809692, "loss": 1.0223, "nll_loss": 1.210689663887024, "rewards/accuracies": 0.625, "rewards/chosen": -0.08064419031143188, "rewards/margins": 0.09756150841712952, "rewards/rejected": -0.1782056987285614, "step": 2453 }, { "epoch": 1.4970260789995424, "grad_norm": 2.523514747619629, "learning_rate": 5.624984690753214e-06, "log_odds_chosen": 1.2483046054840088, "log_odds_ratio": -0.39711880683898926, "logits/chosen": -0.5828320980072021, "logits/rejected": -0.5680110454559326, "logps/chosen": -0.6255226135253906, "logps/rejected": -1.4108119010925293, "loss": 1.1181, "nll_loss": 0.8250494003295898, "rewards/accuracies": 0.875, "rewards/chosen": -0.06255225837230682, "rewards/margins": 0.07852894067764282, "rewards/rejected": -0.14108119904994965, "step": 2454 }, { "epoch": 1.4976361140765593, "grad_norm": 1.6656914949417114, "learning_rate": 5.624004898958971e-06, "log_odds_chosen": 1.0299413204193115, "log_odds_ratio": -0.6113587021827698, "logits/chosen": -0.8267254829406738, "logits/rejected": -0.8191162347793579, "logps/chosen": -0.7827358245849609, "logps/rejected": -1.4864614009857178, "loss": 1.0283, "nll_loss": 0.9013176560401917, "rewards/accuracies": 0.5, "rewards/chosen": -0.07827357947826385, "rewards/margins": 0.0703725591301918, "rewards/rejected": -0.14864614605903625, "step": 2455 }, { "epoch": 1.4982461491535763, "grad_norm": 3.0640578269958496, "learning_rate": 5.6230251071647275e-06, "log_odds_chosen": 0.7404286861419678, "log_odds_ratio": -0.6954370737075806, "logits/chosen": -1.1388006210327148, "logits/rejected": -0.9103729724884033, "logps/chosen": -1.1557159423828125, "logps/rejected": -1.7048543691635132, "loss": 1.1164, "nll_loss": 1.4215093851089478, "rewards/accuracies": 0.625, "rewards/chosen": -0.11557161062955856, "rewards/margins": 0.05491383373737335, "rewards/rejected": -0.17048543691635132, "step": 2456 }, { "epoch": 1.4988561842305932, "grad_norm": 1.0589290857315063, "learning_rate": 5.622045315370484e-06, "log_odds_chosen": 1.5935556888580322, "log_odds_ratio": -0.38289642333984375, "logits/chosen": -0.6784478425979614, "logits/rejected": -0.7036916017532349, "logps/chosen": -0.7119377851486206, "logps/rejected": -1.8486244678497314, "loss": 1.1465, "nll_loss": 0.9859731197357178, "rewards/accuracies": 0.625, "rewards/chosen": -0.07119378447532654, "rewards/margins": 0.11366866528987885, "rewards/rejected": -0.18486246466636658, "step": 2457 }, { "epoch": 1.4994662193076103, "grad_norm": 5.15291690826416, "learning_rate": 5.62106552357624e-06, "log_odds_chosen": 0.4496336579322815, "log_odds_ratio": -0.5506305694580078, "logits/chosen": -0.9181222915649414, "logits/rejected": -0.8553338050842285, "logps/chosen": -0.6718755960464478, "logps/rejected": -0.8784362077713013, "loss": 1.1399, "nll_loss": 0.9915258288383484, "rewards/accuracies": 0.875, "rewards/chosen": -0.06718756258487701, "rewards/margins": 0.020656060427427292, "rewards/rejected": -0.08784361928701401, "step": 2458 }, { "epoch": 1.500076254384627, "grad_norm": 3.544081449508667, "learning_rate": 5.620085731781996e-06, "log_odds_chosen": 0.5233792066574097, "log_odds_ratio": -0.6038194894790649, "logits/chosen": -1.0221401453018188, "logits/rejected": -0.9866894483566284, "logps/chosen": -0.9709274172782898, "logps/rejected": -1.3852262496948242, "loss": 1.1424, "nll_loss": 1.2717636823654175, "rewards/accuracies": 0.75, "rewards/chosen": -0.09709275513887405, "rewards/margins": 0.041429877281188965, "rewards/rejected": -0.13852262496948242, "step": 2459 }, { "epoch": 1.500686289461644, "grad_norm": 2.508686065673828, "learning_rate": 5.619105939987753e-06, "log_odds_chosen": 0.8782012462615967, "log_odds_ratio": -0.5750585198402405, "logits/chosen": -0.6589779853820801, "logits/rejected": -0.824840784072876, "logps/chosen": -0.9134641289710999, "logps/rejected": -1.4999208450317383, "loss": 1.0867, "nll_loss": 1.082150936126709, "rewards/accuracies": 0.625, "rewards/chosen": -0.09134641289710999, "rewards/margins": 0.05864567309617996, "rewards/rejected": -0.14999207854270935, "step": 2460 }, { "epoch": 1.501296324538661, "grad_norm": 1.3055591583251953, "learning_rate": 5.618126148193509e-06, "log_odds_chosen": 1.5307621955871582, "log_odds_ratio": -0.5724409222602844, "logits/chosen": -0.6927217841148376, "logits/rejected": -0.7947099804878235, "logps/chosen": -0.6912854313850403, "logps/rejected": -1.920508861541748, "loss": 1.1445, "nll_loss": 1.0416672229766846, "rewards/accuracies": 0.5, "rewards/chosen": -0.06912854313850403, "rewards/margins": 0.12292234599590302, "rewards/rejected": -0.19205090403556824, "step": 2461 }, { "epoch": 1.5019063596156779, "grad_norm": 3.201228380203247, "learning_rate": 5.617146356399264e-06, "log_odds_chosen": 1.154508352279663, "log_odds_ratio": -0.553947925567627, "logits/chosen": -0.8739033341407776, "logits/rejected": -0.8820139765739441, "logps/chosen": -0.7957345247268677, "logps/rejected": -1.6109905242919922, "loss": 1.196, "nll_loss": 1.258918046951294, "rewards/accuracies": 0.625, "rewards/chosen": -0.07957345247268677, "rewards/margins": 0.08152560889720917, "rewards/rejected": -0.16109906136989594, "step": 2462 }, { "epoch": 1.502516394692695, "grad_norm": 1.0926698446273804, "learning_rate": 5.616166564605021e-06, "log_odds_chosen": 4.823635578155518, "log_odds_ratio": -0.23118221759796143, "logits/chosen": -0.7935900688171387, "logits/rejected": -0.7996759414672852, "logps/chosen": -0.6265155076980591, "logps/rejected": -4.692549705505371, "loss": 1.0251, "nll_loss": 0.8788750767707825, "rewards/accuracies": 0.875, "rewards/chosen": -0.06265154480934143, "rewards/margins": 0.4066034257411957, "rewards/rejected": -0.4692549705505371, "step": 2463 }, { "epoch": 1.5031264297697118, "grad_norm": 1.705115795135498, "learning_rate": 5.615186772810777e-06, "log_odds_chosen": 1.5581822395324707, "log_odds_ratio": -0.5799910426139832, "logits/chosen": -0.7824141979217529, "logits/rejected": -0.8518579006195068, "logps/chosen": -0.9033035039901733, "logps/rejected": -2.0671463012695312, "loss": 1.0946, "nll_loss": 1.117802619934082, "rewards/accuracies": 0.875, "rewards/chosen": -0.0903303474187851, "rewards/margins": 0.11638429015874863, "rewards/rejected": -0.20671464502811432, "step": 2464 }, { "epoch": 1.5037364648467286, "grad_norm": 2.655825138092041, "learning_rate": 5.6142069810165334e-06, "log_odds_chosen": 0.20440074801445007, "log_odds_ratio": -0.633820652961731, "logits/chosen": -0.9318704009056091, "logits/rejected": -0.8941906690597534, "logps/chosen": -0.8919649124145508, "logps/rejected": -1.038327693939209, "loss": 1.2369, "nll_loss": 1.0744627714157104, "rewards/accuracies": 0.625, "rewards/chosen": -0.08919648826122284, "rewards/margins": 0.014636283740401268, "rewards/rejected": -0.10383276641368866, "step": 2465 }, { "epoch": 1.5043464999237455, "grad_norm": 1.5376895666122437, "learning_rate": 5.61322718922229e-06, "log_odds_chosen": 1.0613731145858765, "log_odds_ratio": -0.4586571753025055, "logits/chosen": -0.5862109661102295, "logits/rejected": -0.6615397930145264, "logps/chosen": -0.6968377232551575, "logps/rejected": -1.28315007686615, "loss": 1.0391, "nll_loss": 0.8493620157241821, "rewards/accuracies": 0.75, "rewards/chosen": -0.06968377530574799, "rewards/margins": 0.05863124132156372, "rewards/rejected": -0.1283150166273117, "step": 2466 }, { "epoch": 1.5049565350007625, "grad_norm": 1.660143256187439, "learning_rate": 5.6122473974280465e-06, "log_odds_chosen": 1.6876230239868164, "log_odds_ratio": -0.46584993600845337, "logits/chosen": -1.0048884153366089, "logits/rejected": -0.9421011209487915, "logps/chosen": -0.9869081974029541, "logps/rejected": -2.380436897277832, "loss": 1.0359, "nll_loss": 1.1654783487319946, "rewards/accuracies": 0.75, "rewards/chosen": -0.09869082272052765, "rewards/margins": 0.13935285806655884, "rewards/rejected": -0.23804369568824768, "step": 2467 }, { "epoch": 1.5055665700777796, "grad_norm": 1.2262884378433228, "learning_rate": 5.611267605633802e-06, "log_odds_chosen": 1.670062780380249, "log_odds_ratio": -0.4721340835094452, "logits/chosen": -0.8085103034973145, "logits/rejected": -0.863303005695343, "logps/chosen": -0.6837374567985535, "logps/rejected": -2.021982431411743, "loss": 1.0757, "nll_loss": 0.9422601461410522, "rewards/accuracies": 0.625, "rewards/chosen": -0.06837373971939087, "rewards/margins": 0.13382449746131897, "rewards/rejected": -0.20219823718070984, "step": 2468 }, { "epoch": 1.5061766051547965, "grad_norm": 1.8101307153701782, "learning_rate": 5.610287813839559e-06, "log_odds_chosen": 2.865814208984375, "log_odds_ratio": -0.22224780917167664, "logits/chosen": -0.7844769358634949, "logits/rejected": -0.7389310002326965, "logps/chosen": -0.8294067978858948, "logps/rejected": -3.0738070011138916, "loss": 1.0644, "nll_loss": 0.9167685508728027, "rewards/accuracies": 0.875, "rewards/chosen": -0.08294068276882172, "rewards/margins": 0.22444003820419312, "rewards/rejected": -0.307380735874176, "step": 2469 }, { "epoch": 1.5067866402318133, "grad_norm": 2.8709475994110107, "learning_rate": 5.609308022045315e-06, "log_odds_chosen": 1.327343463897705, "log_odds_ratio": -0.5584751963615417, "logits/chosen": -1.0166832208633423, "logits/rejected": -0.8868072032928467, "logps/chosen": -0.878982663154602, "logps/rejected": -1.9277634620666504, "loss": 1.0547, "nll_loss": 1.169036865234375, "rewards/accuracies": 0.875, "rewards/chosen": -0.08789826929569244, "rewards/margins": 0.10487809777259827, "rewards/rejected": -0.19277635216712952, "step": 2470 }, { "epoch": 1.5073966753088301, "grad_norm": 2.0493760108947754, "learning_rate": 5.608328230251072e-06, "log_odds_chosen": 1.2910518646240234, "log_odds_ratio": -0.45111149549484253, "logits/chosen": -0.6076250076293945, "logits/rejected": -0.6014370918273926, "logps/chosen": -0.8001154661178589, "logps/rejected": -1.7725622653961182, "loss": 1.0324, "nll_loss": 0.8438833355903625, "rewards/accuracies": 0.625, "rewards/chosen": -0.08001155406236649, "rewards/margins": 0.09724467247724533, "rewards/rejected": -0.17725622653961182, "step": 2471 }, { "epoch": 1.5080067103858472, "grad_norm": 1.1629897356033325, "learning_rate": 5.607348438456828e-06, "log_odds_chosen": 0.4313124120235443, "log_odds_ratio": -0.6783521175384521, "logits/chosen": -0.6959164142608643, "logits/rejected": -0.5541479587554932, "logps/chosen": -0.880113959312439, "logps/rejected": -1.1911296844482422, "loss": 1.0291, "nll_loss": 1.0158209800720215, "rewards/accuracies": 0.75, "rewards/chosen": -0.08801139891147614, "rewards/margins": 0.031101562082767487, "rewards/rejected": -0.11911295354366302, "step": 2472 }, { "epoch": 1.508616745462864, "grad_norm": 6.766129493713379, "learning_rate": 5.606368646662584e-06, "log_odds_chosen": 2.323213577270508, "log_odds_ratio": -0.1509312391281128, "logits/chosen": -0.5492175817489624, "logits/rejected": -0.5874276757240295, "logps/chosen": -0.5398715138435364, "logps/rejected": -2.0605201721191406, "loss": 0.8423, "nll_loss": 0.6533176898956299, "rewards/accuracies": 1.0, "rewards/chosen": -0.05398715287446976, "rewards/margins": 0.15206485986709595, "rewards/rejected": -0.2060520201921463, "step": 2473 }, { "epoch": 1.5092267805398811, "grad_norm": 1.1227694749832153, "learning_rate": 5.60538885486834e-06, "log_odds_chosen": 1.31228768825531, "log_odds_ratio": -0.4163232743740082, "logits/chosen": -0.8316411972045898, "logits/rejected": -0.7732307314872742, "logps/chosen": -0.7198516130447388, "logps/rejected": -1.7083604335784912, "loss": 0.9312, "nll_loss": 0.8489744663238525, "rewards/accuracies": 0.75, "rewards/chosen": -0.07198516279459, "rewards/margins": 0.09885086864233017, "rewards/rejected": -0.17083604633808136, "step": 2474 }, { "epoch": 1.509836815616898, "grad_norm": 1.1701562404632568, "learning_rate": 5.604409063074096e-06, "log_odds_chosen": 1.4845802783966064, "log_odds_ratio": -0.3293769657611847, "logits/chosen": -0.7082731127738953, "logits/rejected": -0.5406703948974609, "logps/chosen": -0.6881373524665833, "logps/rejected": -1.6926244497299194, "loss": 0.9718, "nll_loss": 0.7187799215316772, "rewards/accuracies": 0.875, "rewards/chosen": -0.0688137412071228, "rewards/margins": 0.10044869780540466, "rewards/rejected": -0.16926243901252747, "step": 2475 }, { "epoch": 1.5104468506939148, "grad_norm": 1.597030520439148, "learning_rate": 5.6034292712798525e-06, "log_odds_chosen": 2.3073511123657227, "log_odds_ratio": -0.29027265310287476, "logits/chosen": -0.7628093957901001, "logits/rejected": -0.8101411461830139, "logps/chosen": -0.6647030115127563, "logps/rejected": -2.41868257522583, "loss": 0.9821, "nll_loss": 0.9236597418785095, "rewards/accuracies": 0.75, "rewards/chosen": -0.06647031009197235, "rewards/margins": 0.17539797723293304, "rewards/rejected": -0.2418682873249054, "step": 2476 }, { "epoch": 1.5110568857709317, "grad_norm": 1.3974310159683228, "learning_rate": 5.6024494794856094e-06, "log_odds_chosen": 1.3795737028121948, "log_odds_ratio": -0.38949304819107056, "logits/chosen": -0.7007067203521729, "logits/rejected": -0.8128170967102051, "logps/chosen": -0.6377942562103271, "logps/rejected": -1.5721070766448975, "loss": 1.0654, "nll_loss": 0.7398743629455566, "rewards/accuracies": 0.75, "rewards/chosen": -0.06377942860126495, "rewards/margins": 0.09343128651380539, "rewards/rejected": -0.15721070766448975, "step": 2477 }, { "epoch": 1.5116669208479487, "grad_norm": 1.2969510555267334, "learning_rate": 5.6014696876913656e-06, "log_odds_chosen": 0.5549116730690002, "log_odds_ratio": -0.704580545425415, "logits/chosen": -0.9463943243026733, "logits/rejected": -0.8940225839614868, "logps/chosen": -1.0158066749572754, "logps/rejected": -1.3773359060287476, "loss": 1.1975, "nll_loss": 1.41103994846344, "rewards/accuracies": 0.5, "rewards/chosen": -0.1015806794166565, "rewards/margins": 0.0361529216170311, "rewards/rejected": -0.137733593583107, "step": 2478 }, { "epoch": 1.5122769559249658, "grad_norm": 4.160453796386719, "learning_rate": 5.600489895897121e-06, "log_odds_chosen": 1.247454047203064, "log_odds_ratio": -0.43655484914779663, "logits/chosen": -0.6800925135612488, "logits/rejected": -0.48656001687049866, "logps/chosen": -0.7454271912574768, "logps/rejected": -1.6245566606521606, "loss": 1.0932, "nll_loss": 1.0314216613769531, "rewards/accuracies": 0.625, "rewards/chosen": -0.07454272359609604, "rewards/margins": 0.08791294693946838, "rewards/rejected": -0.16245566308498383, "step": 2479 }, { "epoch": 1.5128869910019827, "grad_norm": 2.34921932220459, "learning_rate": 5.599510104102878e-06, "log_odds_chosen": 2.3892722129821777, "log_odds_ratio": -0.30328285694122314, "logits/chosen": -0.6938861012458801, "logits/rejected": -0.8896896243095398, "logps/chosen": -0.6836562156677246, "logps/rejected": -2.568052291870117, "loss": 0.982, "nll_loss": 1.0177565813064575, "rewards/accuracies": 0.875, "rewards/chosen": -0.06836562603712082, "rewards/margins": 0.18843959271907806, "rewards/rejected": -0.2568052113056183, "step": 2480 }, { "epoch": 1.5134970260789995, "grad_norm": 1.5417211055755615, "learning_rate": 5.598530312308634e-06, "log_odds_chosen": 0.9514263868331909, "log_odds_ratio": -0.46422290802001953, "logits/chosen": -0.9830182790756226, "logits/rejected": -0.9019359946250916, "logps/chosen": -0.9106940031051636, "logps/rejected": -1.6305055618286133, "loss": 1.2097, "nll_loss": 1.0666425228118896, "rewards/accuracies": 0.875, "rewards/chosen": -0.09106940031051636, "rewards/margins": 0.07198114693164825, "rewards/rejected": -0.1630505621433258, "step": 2481 }, { "epoch": 1.5141070611560163, "grad_norm": 2.236210584640503, "learning_rate": 5.59755052051439e-06, "log_odds_chosen": 0.9808790683746338, "log_odds_ratio": -0.43954843282699585, "logits/chosen": -0.7834139466285706, "logits/rejected": -0.7923768758773804, "logps/chosen": -0.7857143878936768, "logps/rejected": -1.4750481843948364, "loss": 1.0057, "nll_loss": 0.8781374096870422, "rewards/accuracies": 0.875, "rewards/chosen": -0.07857143133878708, "rewards/margins": 0.0689333900809288, "rewards/rejected": -0.14750482141971588, "step": 2482 }, { "epoch": 1.5147170962330334, "grad_norm": 1.1147682666778564, "learning_rate": 5.596570728720147e-06, "log_odds_chosen": 0.9572777152061462, "log_odds_ratio": -0.5584524869918823, "logits/chosen": -0.9559906125068665, "logits/rejected": -0.860278844833374, "logps/chosen": -0.8578830361366272, "logps/rejected": -1.494244933128357, "loss": 1.0123, "nll_loss": 1.0109213590621948, "rewards/accuracies": 0.625, "rewards/chosen": -0.0857883095741272, "rewards/margins": 0.0636361837387085, "rewards/rejected": -0.1494244784116745, "step": 2483 }, { "epoch": 1.5153271313100505, "grad_norm": 2.562392234802246, "learning_rate": 5.595590936925903e-06, "log_odds_chosen": 0.624492883682251, "log_odds_ratio": -0.5940291881561279, "logits/chosen": -0.8949069976806641, "logits/rejected": -0.7070610523223877, "logps/chosen": -0.8241503238677979, "logps/rejected": -1.2885596752166748, "loss": 1.1089, "nll_loss": 1.007572054862976, "rewards/accuracies": 0.625, "rewards/chosen": -0.08241502940654755, "rewards/margins": 0.046440936625003815, "rewards/rejected": -0.12885597348213196, "step": 2484 }, { "epoch": 1.5159371663870673, "grad_norm": 2.154935598373413, "learning_rate": 5.594611145131659e-06, "log_odds_chosen": 0.9918580651283264, "log_odds_ratio": -0.47511059045791626, "logits/chosen": -0.8526792526245117, "logits/rejected": -0.8954917788505554, "logps/chosen": -0.8266292810440063, "logps/rejected": -1.5438429117202759, "loss": 0.9977, "nll_loss": 0.9261163473129272, "rewards/accuracies": 0.75, "rewards/chosen": -0.08266294002532959, "rewards/margins": 0.07172134518623352, "rewards/rejected": -0.1543842852115631, "step": 2485 }, { "epoch": 1.5165472014640842, "grad_norm": 1.2621729373931885, "learning_rate": 5.593631353337415e-06, "log_odds_chosen": 1.3267743587493896, "log_odds_ratio": -0.5375449061393738, "logits/chosen": -1.001574993133545, "logits/rejected": -0.9394991993904114, "logps/chosen": -1.031950831413269, "logps/rejected": -2.066103935241699, "loss": 1.1196, "nll_loss": 1.0766366720199585, "rewards/accuracies": 0.625, "rewards/chosen": -0.10319508612155914, "rewards/margins": 0.10341531038284302, "rewards/rejected": -0.20661039650440216, "step": 2486 }, { "epoch": 1.517157236541101, "grad_norm": 1.2955657243728638, "learning_rate": 5.5926515615431715e-06, "log_odds_chosen": 1.6407134532928467, "log_odds_ratio": -0.31933125853538513, "logits/chosen": -0.8431398272514343, "logits/rejected": -0.9424628019332886, "logps/chosen": -0.7759358882904053, "logps/rejected": -2.010850429534912, "loss": 1.1162, "nll_loss": 0.8894637823104858, "rewards/accuracies": 0.875, "rewards/chosen": -0.077593594789505, "rewards/margins": 0.12349145114421844, "rewards/rejected": -0.20108506083488464, "step": 2487 }, { "epoch": 1.517767271618118, "grad_norm": 1.2548915147781372, "learning_rate": 5.5916717697489285e-06, "log_odds_chosen": 0.62213534116745, "log_odds_ratio": -0.538665771484375, "logits/chosen": -1.0176143646240234, "logits/rejected": -0.9571881294250488, "logps/chosen": -0.9061287045478821, "logps/rejected": -1.3089516162872314, "loss": 1.1928, "nll_loss": 1.3049932718276978, "rewards/accuracies": 0.75, "rewards/chosen": -0.09061286598443985, "rewards/margins": 0.04028230160474777, "rewards/rejected": -0.13089516758918762, "step": 2488 }, { "epoch": 1.518377306695135, "grad_norm": 1.483144760131836, "learning_rate": 5.590691977954685e-06, "log_odds_chosen": 0.7425680160522461, "log_odds_ratio": -0.6647573709487915, "logits/chosen": -0.9888864755630493, "logits/rejected": -0.9591428637504578, "logps/chosen": -0.9531070590019226, "logps/rejected": -1.6057733297348022, "loss": 1.184, "nll_loss": 1.0721293687820435, "rewards/accuracies": 0.5, "rewards/chosen": -0.09531070291996002, "rewards/margins": 0.06526663899421692, "rewards/rejected": -0.16057734191417694, "step": 2489 }, { "epoch": 1.518987341772152, "grad_norm": 1.2976347208023071, "learning_rate": 5.589712186160441e-06, "log_odds_chosen": 1.1871885061264038, "log_odds_ratio": -0.39515286684036255, "logits/chosen": -0.943938136100769, "logits/rejected": -1.0206414461135864, "logps/chosen": -0.754289984703064, "logps/rejected": -1.615906000137329, "loss": 0.9306, "nll_loss": 0.9066457748413086, "rewards/accuracies": 0.875, "rewards/chosen": -0.07542899996042252, "rewards/margins": 0.08616160601377487, "rewards/rejected": -0.1615906059741974, "step": 2490 }, { "epoch": 1.5195973768491688, "grad_norm": 1.376273274421692, "learning_rate": 5.588732394366197e-06, "log_odds_chosen": 1.13163161277771, "log_odds_ratio": -0.40738385915756226, "logits/chosen": -0.7892360687255859, "logits/rejected": -0.7716497182846069, "logps/chosen": -0.6872042417526245, "logps/rejected": -1.4171558618545532, "loss": 1.0142, "nll_loss": 0.8841676115989685, "rewards/accuracies": 0.875, "rewards/chosen": -0.06872041523456573, "rewards/margins": 0.07299518585205078, "rewards/rejected": -0.1417156159877777, "step": 2491 }, { "epoch": 1.5202074119261857, "grad_norm": 1.1570836305618286, "learning_rate": 5.587752602571953e-06, "log_odds_chosen": 1.7690143585205078, "log_odds_ratio": -0.41738390922546387, "logits/chosen": -0.81022709608078, "logits/rejected": -0.7030981779098511, "logps/chosen": -0.9140681624412537, "logps/rejected": -2.284444808959961, "loss": 1.1604, "nll_loss": 1.246682047843933, "rewards/accuracies": 0.75, "rewards/chosen": -0.09140682220458984, "rewards/margins": 0.13703766465187073, "rewards/rejected": -0.22844450175762177, "step": 2492 }, { "epoch": 1.5208174470032025, "grad_norm": 1.748706340789795, "learning_rate": 5.586772810777709e-06, "log_odds_chosen": 1.782995343208313, "log_odds_ratio": -0.27341428399086, "logits/chosen": -0.751512885093689, "logits/rejected": -0.6428518295288086, "logps/chosen": -0.8584562540054321, "logps/rejected": -2.2051289081573486, "loss": 1.0447, "nll_loss": 0.9069657325744629, "rewards/accuracies": 1.0, "rewards/chosen": -0.08584562689065933, "rewards/margins": 0.1346672922372818, "rewards/rejected": -0.22051292657852173, "step": 2493 }, { "epoch": 1.5214274820802196, "grad_norm": 2.3471839427948, "learning_rate": 5.585793018983466e-06, "log_odds_chosen": 1.3819303512573242, "log_odds_ratio": -0.710601806640625, "logits/chosen": -0.9608615636825562, "logits/rejected": -0.9609988927841187, "logps/chosen": -0.8728699088096619, "logps/rejected": -2.1808629035949707, "loss": 1.119, "nll_loss": 1.0902751684188843, "rewards/accuracies": 0.25, "rewards/chosen": -0.08728699386119843, "rewards/margins": 0.1307993084192276, "rewards/rejected": -0.21808630228042603, "step": 2494 }, { "epoch": 1.5220375171572367, "grad_norm": 2.3907840251922607, "learning_rate": 5.584813227189222e-06, "log_odds_chosen": 1.6052659749984741, "log_odds_ratio": -0.4331895112991333, "logits/chosen": -0.8565149307250977, "logits/rejected": -0.7703676223754883, "logps/chosen": -0.9454232454299927, "logps/rejected": -2.2739739418029785, "loss": 1.0183, "nll_loss": 1.0564560890197754, "rewards/accuracies": 0.625, "rewards/chosen": -0.09454232454299927, "rewards/margins": 0.13285505771636963, "rewards/rejected": -0.2273973822593689, "step": 2495 }, { "epoch": 1.5226475522342535, "grad_norm": 5.770318508148193, "learning_rate": 5.583833435394979e-06, "log_odds_chosen": 2.404970169067383, "log_odds_ratio": -0.15179802477359772, "logits/chosen": -0.5530962944030762, "logits/rejected": -0.6448239684104919, "logps/chosen": -0.4837334156036377, "logps/rejected": -2.1019697189331055, "loss": 0.8369, "nll_loss": 0.540590226650238, "rewards/accuracies": 1.0, "rewards/chosen": -0.04837334156036377, "rewards/margins": 0.16182363033294678, "rewards/rejected": -0.21019697189331055, "step": 2496 }, { "epoch": 1.5232575873112704, "grad_norm": 1.2547228336334229, "learning_rate": 5.5828536436007344e-06, "log_odds_chosen": 1.5077664852142334, "log_odds_ratio": -0.44831931591033936, "logits/chosen": -0.7834994792938232, "logits/rejected": -0.8361940383911133, "logps/chosen": -0.7732446193695068, "logps/rejected": -1.987210750579834, "loss": 1.0314, "nll_loss": 1.1307986974716187, "rewards/accuracies": 0.625, "rewards/chosen": -0.07732446491718292, "rewards/margins": 0.12139661610126495, "rewards/rejected": -0.19872108101844788, "step": 2497 }, { "epoch": 1.5238676223882872, "grad_norm": 3.5512664318084717, "learning_rate": 5.5818738518064905e-06, "log_odds_chosen": 1.1699832677841187, "log_odds_ratio": -0.4005330204963684, "logits/chosen": -0.9317195415496826, "logits/rejected": -0.8421360850334167, "logps/chosen": -0.7958222031593323, "logps/rejected": -1.6113038063049316, "loss": 1.1019, "nll_loss": 1.1710691452026367, "rewards/accuracies": 0.875, "rewards/chosen": -0.07958222180604935, "rewards/margins": 0.08154815435409546, "rewards/rejected": -0.1611303836107254, "step": 2498 }, { "epoch": 1.5244776574653043, "grad_norm": 3.418792247772217, "learning_rate": 5.5808940600122475e-06, "log_odds_chosen": 0.37110498547554016, "log_odds_ratio": -0.5871899724006653, "logits/chosen": -0.970572829246521, "logits/rejected": -0.9178725481033325, "logps/chosen": -0.9590935707092285, "logps/rejected": -1.1881259679794312, "loss": 1.0938, "nll_loss": 1.159150242805481, "rewards/accuracies": 0.625, "rewards/chosen": -0.09590935707092285, "rewards/margins": 0.022903241217136383, "rewards/rejected": -0.11881260573863983, "step": 2499 }, { "epoch": 1.5250876925423213, "grad_norm": 1.6499117612838745, "learning_rate": 5.579914268218004e-06, "log_odds_chosen": 1.7769988775253296, "log_odds_ratio": -0.2623410224914551, "logits/chosen": -0.812545120716095, "logits/rejected": -0.77830570936203, "logps/chosen": -0.8103148341178894, "logps/rejected": -2.136767864227295, "loss": 1.0241, "nll_loss": 1.0493379831314087, "rewards/accuracies": 0.875, "rewards/chosen": -0.08103148639202118, "rewards/margins": 0.13264529407024384, "rewards/rejected": -0.21367676556110382, "step": 2500 }, { "epoch": 1.5256977276193382, "grad_norm": 1.3250510692596436, "learning_rate": 5.57893447642376e-06, "log_odds_chosen": 1.5379501581192017, "log_odds_ratio": -0.4517156779766083, "logits/chosen": -1.1291851997375488, "logits/rejected": -0.9155741333961487, "logps/chosen": -0.8204259276390076, "logps/rejected": -2.0045366287231445, "loss": 1.0621, "nll_loss": 1.1763107776641846, "rewards/accuracies": 0.625, "rewards/chosen": -0.08204259723424911, "rewards/margins": 0.11841106414794922, "rewards/rejected": -0.20045366883277893, "step": 2501 }, { "epoch": 1.526307762696355, "grad_norm": 3.111973762512207, "learning_rate": 5.577954684629516e-06, "log_odds_chosen": 0.4323084354400635, "log_odds_ratio": -0.8010804653167725, "logits/chosen": -0.7151633501052856, "logits/rejected": -0.7468076944351196, "logps/chosen": -0.9168069362640381, "logps/rejected": -1.106744408607483, "loss": 1.2758, "nll_loss": 1.2948259115219116, "rewards/accuracies": 0.375, "rewards/chosen": -0.09168069064617157, "rewards/margins": 0.01899375021457672, "rewards/rejected": -0.11067444086074829, "step": 2502 }, { "epoch": 1.5269177977733719, "grad_norm": 1.5397082567214966, "learning_rate": 5.576974892835272e-06, "log_odds_chosen": 2.4848685264587402, "log_odds_ratio": -0.27302438020706177, "logits/chosen": -0.7123546600341797, "logits/rejected": -0.8750879764556885, "logps/chosen": -0.7377833724021912, "logps/rejected": -2.6708974838256836, "loss": 0.9129, "nll_loss": 0.9171515703201294, "rewards/accuracies": 0.75, "rewards/chosen": -0.07377833873033524, "rewards/margins": 0.1933114230632782, "rewards/rejected": -0.2670897841453552, "step": 2503 }, { "epoch": 1.5275278328503887, "grad_norm": 1.307241439819336, "learning_rate": 5.575995101041028e-06, "log_odds_chosen": 0.9531700611114502, "log_odds_ratio": -0.7732043266296387, "logits/chosen": -0.9514815807342529, "logits/rejected": -1.007678508758545, "logps/chosen": -0.9884435534477234, "logps/rejected": -1.8608589172363281, "loss": 1.1584, "nll_loss": 1.204785943031311, "rewards/accuracies": 0.375, "rewards/chosen": -0.09884435683488846, "rewards/margins": 0.08724153786897659, "rewards/rejected": -0.18608589470386505, "step": 2504 }, { "epoch": 1.5281378679274058, "grad_norm": 0.9166889190673828, "learning_rate": 5.575015309246785e-06, "log_odds_chosen": 1.2908724546432495, "log_odds_ratio": -0.5371776819229126, "logits/chosen": -0.7485888004302979, "logits/rejected": -0.7818034887313843, "logps/chosen": -0.7872840762138367, "logps/rejected": -1.7749793529510498, "loss": 1.1402, "nll_loss": 0.8133730888366699, "rewards/accuracies": 0.625, "rewards/chosen": -0.07872840762138367, "rewards/margins": 0.09876953065395355, "rewards/rejected": -0.1774979531764984, "step": 2505 }, { "epoch": 1.5287479030044229, "grad_norm": 5.774515151977539, "learning_rate": 5.574035517452541e-06, "log_odds_chosen": 1.0314173698425293, "log_odds_ratio": -0.6559295654296875, "logits/chosen": -0.7928462624549866, "logits/rejected": -0.6987476944923401, "logps/chosen": -1.0300381183624268, "logps/rejected": -1.9455502033233643, "loss": 1.2446, "nll_loss": 1.262335181236267, "rewards/accuracies": 0.625, "rewards/chosen": -0.10300381481647491, "rewards/margins": 0.09155119955539703, "rewards/rejected": -0.19455501437187195, "step": 2506 }, { "epoch": 1.5293579380814397, "grad_norm": 1.5134114027023315, "learning_rate": 5.573055725658297e-06, "log_odds_chosen": 2.240988254547119, "log_odds_ratio": -0.6432231664657593, "logits/chosen": -0.7969598770141602, "logits/rejected": -0.7138686180114746, "logps/chosen": -0.8185728788375854, "logps/rejected": -2.725346565246582, "loss": 1.056, "nll_loss": 1.017919659614563, "rewards/accuracies": 0.5, "rewards/chosen": -0.08185729384422302, "rewards/margins": 0.19067737460136414, "rewards/rejected": -0.27253466844558716, "step": 2507 }, { "epoch": 1.5299679731584566, "grad_norm": 1.0990444421768188, "learning_rate": 5.5720759338640535e-06, "log_odds_chosen": 1.1446447372436523, "log_odds_ratio": -0.5230267643928528, "logits/chosen": -0.7504318952560425, "logits/rejected": -0.8037117719650269, "logps/chosen": -0.8336101770401001, "logps/rejected": -1.6939043998718262, "loss": 1.0431, "nll_loss": 1.158864974975586, "rewards/accuracies": 0.75, "rewards/chosen": -0.08336102217435837, "rewards/margins": 0.08602941781282425, "rewards/rejected": -0.16939043998718262, "step": 2508 }, { "epoch": 1.5305780082354734, "grad_norm": 1.8642960786819458, "learning_rate": 5.57109614206981e-06, "log_odds_chosen": 1.5597525835037231, "log_odds_ratio": -0.5264473557472229, "logits/chosen": -0.6368902325630188, "logits/rejected": -0.8442760109901428, "logps/chosen": -0.8350764513015747, "logps/rejected": -1.9636638164520264, "loss": 1.0209, "nll_loss": 0.9402093887329102, "rewards/accuracies": 0.625, "rewards/chosen": -0.08350765705108643, "rewards/margins": 0.11285873502492905, "rewards/rejected": -0.19636636972427368, "step": 2509 }, { "epoch": 1.5311880433124905, "grad_norm": 1.8383256196975708, "learning_rate": 5.5701163502755666e-06, "log_odds_chosen": 1.0408811569213867, "log_odds_ratio": -0.4168727397918701, "logits/chosen": -0.8414854407310486, "logits/rejected": -0.8239859342575073, "logps/chosen": -0.7813014388084412, "logps/rejected": -1.4280656576156616, "loss": 0.9681, "nll_loss": 0.9842291474342346, "rewards/accuracies": 0.875, "rewards/chosen": -0.07813014835119247, "rewards/margins": 0.06467641890048981, "rewards/rejected": -0.14280655980110168, "step": 2510 }, { "epoch": 1.5317980783895075, "grad_norm": 1.1322948932647705, "learning_rate": 5.569136558481323e-06, "log_odds_chosen": 0.6765874028205872, "log_odds_ratio": -0.6630417108535767, "logits/chosen": -0.9698027968406677, "logits/rejected": -0.908319354057312, "logps/chosen": -0.9904581904411316, "logps/rejected": -1.3475016355514526, "loss": 1.2217, "nll_loss": 1.3220114707946777, "rewards/accuracies": 0.5, "rewards/chosen": -0.09904582053422928, "rewards/margins": 0.035704340785741806, "rewards/rejected": -0.1347501575946808, "step": 2511 }, { "epoch": 1.5324081134665244, "grad_norm": 1.7382181882858276, "learning_rate": 5.568156766687079e-06, "log_odds_chosen": 1.3333024978637695, "log_odds_ratio": -0.4003922939300537, "logits/chosen": -0.8943787813186646, "logits/rejected": -0.8280379772186279, "logps/chosen": -0.612337589263916, "logps/rejected": -1.4172872304916382, "loss": 1.0868, "nll_loss": 1.142961859703064, "rewards/accuracies": 0.75, "rewards/chosen": -0.0612337626516819, "rewards/margins": 0.0804949626326561, "rewards/rejected": -0.1417287290096283, "step": 2512 }, { "epoch": 1.5330181485435412, "grad_norm": 1.064823031425476, "learning_rate": 5.567176974892836e-06, "log_odds_chosen": 2.12172269821167, "log_odds_ratio": -0.3909405469894409, "logits/chosen": -0.6017648577690125, "logits/rejected": -0.8215756416320801, "logps/chosen": -0.6450878381729126, "logps/rejected": -2.213796377182007, "loss": 1.0219, "nll_loss": 0.7725179195404053, "rewards/accuracies": 0.625, "rewards/chosen": -0.06450878828763962, "rewards/margins": 0.15687085688114166, "rewards/rejected": -0.22137965261936188, "step": 2513 }, { "epoch": 1.533628183620558, "grad_norm": 1.6869245767593384, "learning_rate": 5.566197183098591e-06, "log_odds_chosen": 1.0930640697479248, "log_odds_ratio": -0.5659945011138916, "logits/chosen": -0.7669646739959717, "logits/rejected": -0.753623366355896, "logps/chosen": -0.7710443735122681, "logps/rejected": -1.6372838020324707, "loss": 1.0008, "nll_loss": 1.0182101726531982, "rewards/accuracies": 0.5, "rewards/chosen": -0.07710443437099457, "rewards/margins": 0.08662395179271698, "rewards/rejected": -0.16372838616371155, "step": 2514 }, { "epoch": 1.5342382186975752, "grad_norm": 1.2883672714233398, "learning_rate": 5.565217391304347e-06, "log_odds_chosen": 0.1534762978553772, "log_odds_ratio": -0.6657649874687195, "logits/chosen": -1.0207346677780151, "logits/rejected": -0.9513400793075562, "logps/chosen": -0.8670065402984619, "logps/rejected": -0.9457370042800903, "loss": 1.1704, "nll_loss": 1.0848435163497925, "rewards/accuracies": 0.5, "rewards/chosen": -0.08670065551996231, "rewards/margins": 0.007873041555285454, "rewards/rejected": -0.09457369148731232, "step": 2515 }, { "epoch": 1.534848253774592, "grad_norm": 1.3350077867507935, "learning_rate": 5.564237599510104e-06, "log_odds_chosen": 3.9312071800231934, "log_odds_ratio": -0.06752195209264755, "logits/chosen": -0.5900403261184692, "logits/rejected": -0.6303979158401489, "logps/chosen": -0.536903977394104, "logps/rejected": -3.5393283367156982, "loss": 0.9469, "nll_loss": 0.6773403882980347, "rewards/accuracies": 1.0, "rewards/chosen": -0.05369040369987488, "rewards/margins": 0.30024242401123047, "rewards/rejected": -0.35393285751342773, "step": 2516 }, { "epoch": 1.535458288851609, "grad_norm": 0.9763394594192505, "learning_rate": 5.56325780771586e-06, "log_odds_chosen": 1.4964501857757568, "log_odds_ratio": -0.4232161045074463, "logits/chosen": -0.7237327098846436, "logits/rejected": -0.6843276619911194, "logps/chosen": -0.8336841464042664, "logps/rejected": -1.8935394287109375, "loss": 0.9319, "nll_loss": 0.8865488767623901, "rewards/accuracies": 0.75, "rewards/chosen": -0.08336842060089111, "rewards/margins": 0.10598553717136383, "rewards/rejected": -0.18935394287109375, "step": 2517 }, { "epoch": 1.536068323928626, "grad_norm": 1.4698517322540283, "learning_rate": 5.562278015921616e-06, "log_odds_chosen": 1.483007788658142, "log_odds_ratio": -0.5680675506591797, "logits/chosen": -0.7592060565948486, "logits/rejected": -0.9527926445007324, "logps/chosen": -0.7995915412902832, "logps/rejected": -1.8880081176757812, "loss": 0.9281, "nll_loss": 0.9647401571273804, "rewards/accuracies": 0.5, "rewards/chosen": -0.07995915412902832, "rewards/margins": 0.108841672539711, "rewards/rejected": -0.18880082666873932, "step": 2518 }, { "epoch": 1.5366783590056428, "grad_norm": 3.274606227874756, "learning_rate": 5.5612982241273725e-06, "log_odds_chosen": 1.2355883121490479, "log_odds_ratio": -0.3486708104610443, "logits/chosen": -0.8588155508041382, "logits/rejected": -0.8363090753555298, "logps/chosen": -0.8131359815597534, "logps/rejected": -1.625938892364502, "loss": 1.151, "nll_loss": 1.0727258920669556, "rewards/accuracies": 0.875, "rewards/chosen": -0.0813135951757431, "rewards/margins": 0.08128029108047485, "rewards/rejected": -0.16259387135505676, "step": 2519 }, { "epoch": 1.5372883940826596, "grad_norm": 1.4233307838439941, "learning_rate": 5.560318432333129e-06, "log_odds_chosen": 1.8355252742767334, "log_odds_ratio": -0.45125746726989746, "logits/chosen": -0.7718793153762817, "logits/rejected": -0.8265688419342041, "logps/chosen": -0.9631248116493225, "logps/rejected": -2.426910400390625, "loss": 1.079, "nll_loss": 1.1103116273880005, "rewards/accuracies": 0.5, "rewards/chosen": -0.09631248563528061, "rewards/margins": 0.14637857675552368, "rewards/rejected": -0.2426910549402237, "step": 2520 }, { "epoch": 1.5378984291596767, "grad_norm": 1.3099603652954102, "learning_rate": 5.559338640538885e-06, "log_odds_chosen": 1.6330102682113647, "log_odds_ratio": -0.5494164824485779, "logits/chosen": -1.0184507369995117, "logits/rejected": -0.9154897332191467, "logps/chosen": -0.9823589324951172, "logps/rejected": -2.293224573135376, "loss": 1.067, "nll_loss": 1.3130890130996704, "rewards/accuracies": 0.75, "rewards/chosen": -0.09823589026927948, "rewards/margins": 0.1310865730047226, "rewards/rejected": -0.22932247817516327, "step": 2521 }, { "epoch": 1.5385084642366937, "grad_norm": 2.0375537872314453, "learning_rate": 5.558358848744642e-06, "log_odds_chosen": 0.29240715503692627, "log_odds_ratio": -0.6323037147521973, "logits/chosen": -0.7763550281524658, "logits/rejected": -0.8973461985588074, "logps/chosen": -0.9121987223625183, "logps/rejected": -1.1163448095321655, "loss": 1.0797, "nll_loss": 1.228305459022522, "rewards/accuracies": 0.625, "rewards/chosen": -0.09121987968683243, "rewards/margins": 0.020414602011442184, "rewards/rejected": -0.11163447797298431, "step": 2522 }, { "epoch": 1.5391184993137106, "grad_norm": 7.530134201049805, "learning_rate": 5.557379056950398e-06, "log_odds_chosen": 1.7031569480895996, "log_odds_ratio": -0.439635306596756, "logits/chosen": -0.7404355406761169, "logits/rejected": -0.7675396203994751, "logps/chosen": -0.9936172366142273, "logps/rejected": -2.3071203231811523, "loss": 1.0384, "nll_loss": 1.0219972133636475, "rewards/accuracies": 0.75, "rewards/chosen": -0.09936171770095825, "rewards/margins": 0.1313503086566925, "rewards/rejected": -0.23071202635765076, "step": 2523 }, { "epoch": 1.5397285343907274, "grad_norm": 1.4257296323776245, "learning_rate": 5.556399265156155e-06, "log_odds_chosen": 0.8851293325424194, "log_odds_ratio": -0.5130696296691895, "logits/chosen": -1.030817985534668, "logits/rejected": -0.9387123584747314, "logps/chosen": -0.8727681636810303, "logps/rejected": -1.6017863750457764, "loss": 1.3028, "nll_loss": 1.3079216480255127, "rewards/accuracies": 0.625, "rewards/chosen": -0.08727681636810303, "rewards/margins": 0.07290181517601013, "rewards/rejected": -0.16017864644527435, "step": 2524 }, { "epoch": 1.5403385694677443, "grad_norm": 1.6901226043701172, "learning_rate": 5.55541947336191e-06, "log_odds_chosen": 2.4177393913269043, "log_odds_ratio": -0.5054854154586792, "logits/chosen": -0.6237314939498901, "logits/rejected": -0.9348790049552917, "logps/chosen": -0.8674442768096924, "logps/rejected": -2.996324062347412, "loss": 1.0085, "nll_loss": 1.1444692611694336, "rewards/accuracies": 0.5, "rewards/chosen": -0.08674442768096924, "rewards/margins": 0.2128879874944687, "rewards/rejected": -0.29963240027427673, "step": 2525 }, { "epoch": 1.5409486045447613, "grad_norm": 2.9667365550994873, "learning_rate": 5.554439681567666e-06, "log_odds_chosen": 2.324035406112671, "log_odds_ratio": -0.3305918276309967, "logits/chosen": -0.8214629292488098, "logits/rejected": -0.8938137292861938, "logps/chosen": -0.7922887802124023, "logps/rejected": -2.694286584854126, "loss": 1.3106, "nll_loss": 1.1795125007629395, "rewards/accuracies": 0.75, "rewards/chosen": -0.07922887802124023, "rewards/margins": 0.19019976258277893, "rewards/rejected": -0.26942864060401917, "step": 2526 }, { "epoch": 1.5415586396217784, "grad_norm": 1.7613900899887085, "learning_rate": 5.553459889773423e-06, "log_odds_chosen": 1.8228323459625244, "log_odds_ratio": -0.23672106862068176, "logits/chosen": -0.8651381134986877, "logits/rejected": -0.9237109422683716, "logps/chosen": -0.5646775960922241, "logps/rejected": -1.5012439489364624, "loss": 1.1368, "nll_loss": 1.0234330892562866, "rewards/accuracies": 1.0, "rewards/chosen": -0.05646775662899017, "rewards/margins": 0.09365664422512054, "rewards/rejected": -0.15012440085411072, "step": 2527 }, { "epoch": 1.5421686746987953, "grad_norm": 1.7730441093444824, "learning_rate": 5.552480097979179e-06, "log_odds_chosen": 1.2942476272583008, "log_odds_ratio": -0.4510766863822937, "logits/chosen": -0.7964100241661072, "logits/rejected": -0.8261784315109253, "logps/chosen": -0.7430090308189392, "logps/rejected": -1.6498215198516846, "loss": 1.1564, "nll_loss": 0.8567453026771545, "rewards/accuracies": 0.875, "rewards/chosen": -0.07430090010166168, "rewards/margins": 0.09068123996257782, "rewards/rejected": -0.1649821400642395, "step": 2528 }, { "epoch": 1.542778709775812, "grad_norm": 1.3010411262512207, "learning_rate": 5.551500306184935e-06, "log_odds_chosen": 0.5740804672241211, "log_odds_ratio": -0.6603923439979553, "logits/chosen": -0.9151546955108643, "logits/rejected": -0.7653339505195618, "logps/chosen": -0.8807555437088013, "logps/rejected": -1.3071517944335938, "loss": 1.0431, "nll_loss": 0.944385290145874, "rewards/accuracies": 0.5, "rewards/chosen": -0.08807554841041565, "rewards/margins": 0.04263962805271149, "rewards/rejected": -0.13071519136428833, "step": 2529 }, { "epoch": 1.543388744852829, "grad_norm": 2.672942876815796, "learning_rate": 5.550520514390692e-06, "log_odds_chosen": 1.9549769163131714, "log_odds_ratio": -0.31178250908851624, "logits/chosen": -0.9008969068527222, "logits/rejected": -0.8674022555351257, "logps/chosen": -0.682195246219635, "logps/rejected": -2.212069034576416, "loss": 1.0972, "nll_loss": 0.9800952672958374, "rewards/accuracies": 0.875, "rewards/chosen": -0.06821952760219574, "rewards/margins": 0.15298740565776825, "rewards/rejected": -0.221206933259964, "step": 2530 }, { "epoch": 1.543998779929846, "grad_norm": 2.9989943504333496, "learning_rate": 5.549540722596448e-06, "log_odds_chosen": 1.7789162397384644, "log_odds_ratio": -0.3184787333011627, "logits/chosen": -0.6871305704116821, "logits/rejected": -0.724483847618103, "logps/chosen": -0.6327769160270691, "logps/rejected": -1.827230453491211, "loss": 0.9976, "nll_loss": 0.917978048324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.06327769160270691, "rewards/margins": 0.11944535374641418, "rewards/rejected": -0.1827230453491211, "step": 2531 }, { "epoch": 1.5446088150068629, "grad_norm": 0.8686668276786804, "learning_rate": 5.548560930802204e-06, "log_odds_chosen": 2.598177671432495, "log_odds_ratio": -0.41702407598495483, "logits/chosen": -0.8292573690414429, "logits/rejected": -0.9212667346000671, "logps/chosen": -0.8792466521263123, "logps/rejected": -3.242070198059082, "loss": 1.003, "nll_loss": 1.0706322193145752, "rewards/accuracies": 0.875, "rewards/chosen": -0.08792466670274734, "rewards/margins": 0.2362823784351349, "rewards/rejected": -0.32420700788497925, "step": 2532 }, { "epoch": 1.54521885008388, "grad_norm": 2.178081750869751, "learning_rate": 5.547581139007961e-06, "log_odds_chosen": 1.7732133865356445, "log_odds_ratio": -0.3612101078033447, "logits/chosen": -0.9049028158187866, "logits/rejected": -0.7214950323104858, "logps/chosen": -0.755341649055481, "logps/rejected": -2.1327149868011475, "loss": 1.1918, "nll_loss": 0.9371128082275391, "rewards/accuracies": 0.75, "rewards/chosen": -0.07553417980670929, "rewards/margins": 0.13773731887340546, "rewards/rejected": -0.21327149868011475, "step": 2533 }, { "epoch": 1.5458288851608968, "grad_norm": 0.9367010593414307, "learning_rate": 5.546601347213717e-06, "log_odds_chosen": 1.5652003288269043, "log_odds_ratio": -0.40664443373680115, "logits/chosen": -0.7830389738082886, "logits/rejected": -0.7987761497497559, "logps/chosen": -0.7704408168792725, "logps/rejected": -1.9129884243011475, "loss": 1.0265, "nll_loss": 1.033433437347412, "rewards/accuracies": 0.625, "rewards/chosen": -0.07704408466815948, "rewards/margins": 0.11425474286079407, "rewards/rejected": -0.19129884243011475, "step": 2534 }, { "epoch": 1.5464389202379136, "grad_norm": 2.159531831741333, "learning_rate": 5.545621555419473e-06, "log_odds_chosen": 1.2490043640136719, "log_odds_ratio": -0.3458406329154968, "logits/chosen": -0.7076967358589172, "logits/rejected": -0.743579626083374, "logps/chosen": -0.8258545398712158, "logps/rejected": -1.713107943534851, "loss": 1.0932, "nll_loss": 0.9610235691070557, "rewards/accuracies": 1.0, "rewards/chosen": -0.08258544653654099, "rewards/margins": 0.08872533589601517, "rewards/rejected": -0.17131078243255615, "step": 2535 }, { "epoch": 1.5470489553149305, "grad_norm": 1.641748309135437, "learning_rate": 5.544641763625229e-06, "log_odds_chosen": 0.9826844930648804, "log_odds_ratio": -0.5201801657676697, "logits/chosen": -1.169465184211731, "logits/rejected": -1.0544862747192383, "logps/chosen": -0.8907891511917114, "logps/rejected": -1.6623615026474, "loss": 1.0545, "nll_loss": 1.1213116645812988, "rewards/accuracies": 0.75, "rewards/chosen": -0.08907891064882278, "rewards/margins": 0.07715723663568497, "rewards/rejected": -0.16623616218566895, "step": 2536 }, { "epoch": 1.5476589903919475, "grad_norm": 1.3653373718261719, "learning_rate": 5.543661971830985e-06, "log_odds_chosen": 0.8202639818191528, "log_odds_ratio": -0.4593036472797394, "logits/chosen": -0.8631136417388916, "logits/rejected": -1.0361170768737793, "logps/chosen": -0.8193888664245605, "logps/rejected": -1.4025869369506836, "loss": 1.1201, "nll_loss": 0.9566749334335327, "rewards/accuracies": 0.75, "rewards/chosen": -0.08193889260292053, "rewards/margins": 0.058319807052612305, "rewards/rejected": -0.14025869965553284, "step": 2537 }, { "epoch": 1.5482690254689646, "grad_norm": 3.4913411140441895, "learning_rate": 5.542682180036742e-06, "log_odds_chosen": 1.1478837728500366, "log_odds_ratio": -0.39347296953201294, "logits/chosen": -0.9324799180030823, "logits/rejected": -0.9799394607543945, "logps/chosen": -0.7900645732879639, "logps/rejected": -1.6246367692947388, "loss": 1.1268, "nll_loss": 0.9825326800346375, "rewards/accuracies": 0.75, "rewards/chosen": -0.07900645583868027, "rewards/margins": 0.08345721662044525, "rewards/rejected": -0.16246366500854492, "step": 2538 }, { "epoch": 1.5488790605459815, "grad_norm": 2.4542739391326904, "learning_rate": 5.541702388242498e-06, "log_odds_chosen": 1.6903098821640015, "log_odds_ratio": -0.5130757093429565, "logits/chosen": -0.9518396258354187, "logits/rejected": -0.9492877125740051, "logps/chosen": -0.7639410495758057, "logps/rejected": -2.1854686737060547, "loss": 1.1294, "nll_loss": 1.0669491291046143, "rewards/accuracies": 0.5, "rewards/chosen": -0.07639411091804504, "rewards/margins": 0.14215274155139923, "rewards/rejected": -0.21854686737060547, "step": 2539 }, { "epoch": 1.5494890956229983, "grad_norm": 5.182666778564453, "learning_rate": 5.5407225964482545e-06, "log_odds_chosen": 2.435574769973755, "log_odds_ratio": -0.3081682324409485, "logits/chosen": -1.0480382442474365, "logits/rejected": -0.8650548458099365, "logps/chosen": -0.858579158782959, "logps/rejected": -2.8573074340820312, "loss": 0.9179, "nll_loss": 0.9976521730422974, "rewards/accuracies": 0.75, "rewards/chosen": -0.08585792779922485, "rewards/margins": 0.19987282156944275, "rewards/rejected": -0.2857307493686676, "step": 2540 }, { "epoch": 1.5500991307000151, "grad_norm": 1.456286072731018, "learning_rate": 5.5397428046540114e-06, "log_odds_chosen": 0.9255800247192383, "log_odds_ratio": -0.5009828805923462, "logits/chosen": -1.029209852218628, "logits/rejected": -0.7910193204879761, "logps/chosen": -1.221233606338501, "logps/rejected": -1.9998271465301514, "loss": 1.1053, "nll_loss": 1.4108309745788574, "rewards/accuracies": 0.75, "rewards/chosen": -0.12212337553501129, "rewards/margins": 0.07785935699939728, "rewards/rejected": -0.19998273253440857, "step": 2541 }, { "epoch": 1.5507091657770322, "grad_norm": 1.8447456359863281, "learning_rate": 5.538763012859767e-06, "log_odds_chosen": 2.4482879638671875, "log_odds_ratio": -0.2185838222503662, "logits/chosen": -0.7493706941604614, "logits/rejected": -0.9098882675170898, "logps/chosen": -0.5935033559799194, "logps/rejected": -2.393423318862915, "loss": 1.0245, "nll_loss": 0.8028548955917358, "rewards/accuracies": 1.0, "rewards/chosen": -0.059350334107875824, "rewards/margins": 0.17999200522899628, "rewards/rejected": -0.2393423467874527, "step": 2542 }, { "epoch": 1.551319200854049, "grad_norm": 3.4737184047698975, "learning_rate": 5.537783221065523e-06, "log_odds_chosen": 2.1631431579589844, "log_odds_ratio": -0.3155973553657532, "logits/chosen": -0.8280279636383057, "logits/rejected": -0.7442380785942078, "logps/chosen": -0.7161019444465637, "logps/rejected": -2.2777466773986816, "loss": 0.9203, "nll_loss": 0.8647473454475403, "rewards/accuracies": 0.75, "rewards/chosen": -0.07161019742488861, "rewards/margins": 0.1561644822359085, "rewards/rejected": -0.22777467966079712, "step": 2543 }, { "epoch": 1.5519292359310661, "grad_norm": 1.4820183515548706, "learning_rate": 5.53680342927128e-06, "log_odds_chosen": 2.3896355628967285, "log_odds_ratio": -0.31840088963508606, "logits/chosen": -0.7135456204414368, "logits/rejected": -0.8038613796234131, "logps/chosen": -0.643345832824707, "logps/rejected": -2.439681053161621, "loss": 1.0305, "nll_loss": 0.9056124687194824, "rewards/accuracies": 0.875, "rewards/chosen": -0.06433458626270294, "rewards/margins": 0.1796335130929947, "rewards/rejected": -0.24396809935569763, "step": 2544 }, { "epoch": 1.552539271008083, "grad_norm": 2.916074275970459, "learning_rate": 5.535823637477036e-06, "log_odds_chosen": 1.1309876441955566, "log_odds_ratio": -0.40410900115966797, "logits/chosen": -0.7720824480056763, "logits/rejected": -0.9140823483467102, "logps/chosen": -0.6867576837539673, "logps/rejected": -1.392543911933899, "loss": 1.0885, "nll_loss": 0.9516808986663818, "rewards/accuracies": 0.875, "rewards/chosen": -0.06867577135562897, "rewards/margins": 0.07057863473892212, "rewards/rejected": -0.1392544060945511, "step": 2545 }, { "epoch": 1.5531493060850998, "grad_norm": 3.2196173667907715, "learning_rate": 5.534843845682792e-06, "log_odds_chosen": 1.474929690361023, "log_odds_ratio": -0.4309034049510956, "logits/chosen": -0.6047282814979553, "logits/rejected": -0.7344374656677246, "logps/chosen": -0.5460704565048218, "logps/rejected": -1.4598114490509033, "loss": 1.1314, "nll_loss": 0.8421685695648193, "rewards/accuracies": 0.625, "rewards/chosen": -0.054607048630714417, "rewards/margins": 0.09137409925460815, "rewards/rejected": -0.14598113298416138, "step": 2546 }, { "epoch": 1.5537593411621167, "grad_norm": 1.2883137464523315, "learning_rate": 5.533864053888549e-06, "log_odds_chosen": 1.3665425777435303, "log_odds_ratio": -0.4068998694419861, "logits/chosen": -0.7248858213424683, "logits/rejected": -0.8294969201087952, "logps/chosen": -0.7382005453109741, "logps/rejected": -1.6237661838531494, "loss": 1.004, "nll_loss": 0.9167613983154297, "rewards/accuracies": 0.75, "rewards/chosen": -0.07382005453109741, "rewards/margins": 0.08855657279491425, "rewards/rejected": -0.16237661242485046, "step": 2547 }, { "epoch": 1.5543693762391337, "grad_norm": 0.9518353939056396, "learning_rate": 5.532884262094304e-06, "log_odds_chosen": 1.5821119546890259, "log_odds_ratio": -0.41732579469680786, "logits/chosen": -0.9362848997116089, "logits/rejected": -0.8680323958396912, "logps/chosen": -1.0556201934814453, "logps/rejected": -2.301617383956909, "loss": 1.1267, "nll_loss": 1.161807894706726, "rewards/accuracies": 0.75, "rewards/chosen": -0.10556202381849289, "rewards/margins": 0.12459972500801086, "rewards/rejected": -0.23016175627708435, "step": 2548 }, { "epoch": 1.5549794113161508, "grad_norm": 1.7191466093063354, "learning_rate": 5.53190447030006e-06, "log_odds_chosen": 0.8847781419754028, "log_odds_ratio": -0.5599172115325928, "logits/chosen": -0.8990005254745483, "logits/rejected": -0.9022780656814575, "logps/chosen": -0.9338212013244629, "logps/rejected": -1.4996343851089478, "loss": 0.9508, "nll_loss": 0.9579650163650513, "rewards/accuracies": 0.75, "rewards/chosen": -0.09338212013244629, "rewards/margins": 0.05658132955431938, "rewards/rejected": -0.14996343851089478, "step": 2549 }, { "epoch": 1.5555894463931677, "grad_norm": 0.944240152835846, "learning_rate": 5.530924678505817e-06, "log_odds_chosen": 1.0663883686065674, "log_odds_ratio": -0.4873148202896118, "logits/chosen": -0.8182233572006226, "logits/rejected": -0.7924298048019409, "logps/chosen": -0.8451813459396362, "logps/rejected": -1.6550153493881226, "loss": 1.1518, "nll_loss": 1.1481579542160034, "rewards/accuracies": 0.75, "rewards/chosen": -0.08451813459396362, "rewards/margins": 0.08098341524600983, "rewards/rejected": -0.16550154983997345, "step": 2550 }, { "epoch": 1.5561994814701845, "grad_norm": 5.571816444396973, "learning_rate": 5.5299448867115735e-06, "log_odds_chosen": 1.449806809425354, "log_odds_ratio": -0.49220362305641174, "logits/chosen": -0.963215172290802, "logits/rejected": -0.9044803380966187, "logps/chosen": -0.8974839448928833, "logps/rejected": -2.0199127197265625, "loss": 1.2123, "nll_loss": 1.029615879058838, "rewards/accuracies": 0.625, "rewards/chosen": -0.08974839746952057, "rewards/margins": 0.11224286258220673, "rewards/rejected": -0.2019912600517273, "step": 2551 }, { "epoch": 1.5568095165472013, "grad_norm": 1.9228911399841309, "learning_rate": 5.5289650949173305e-06, "log_odds_chosen": 1.7605164051055908, "log_odds_ratio": -0.4162086248397827, "logits/chosen": -0.589764416217804, "logits/rejected": -0.7423189282417297, "logps/chosen": -0.5154831409454346, "logps/rejected": -1.7067506313323975, "loss": 0.9619, "nll_loss": 0.7930707931518555, "rewards/accuracies": 0.75, "rewards/chosen": -0.0515483133494854, "rewards/margins": 0.11912676692008972, "rewards/rejected": -0.17067508399486542, "step": 2552 }, { "epoch": 1.5574195516242184, "grad_norm": 1.177893877029419, "learning_rate": 5.527985303123087e-06, "log_odds_chosen": 1.0332447290420532, "log_odds_ratio": -0.5352843999862671, "logits/chosen": -0.6787713766098022, "logits/rejected": -0.7016506195068359, "logps/chosen": -0.7861824035644531, "logps/rejected": -1.4516912698745728, "loss": 1.0256, "nll_loss": 0.8461726903915405, "rewards/accuracies": 0.5, "rewards/chosen": -0.07861823588609695, "rewards/margins": 0.06655088812112808, "rewards/rejected": -0.14516912400722504, "step": 2553 }, { "epoch": 1.5580295867012355, "grad_norm": 5.362849235534668, "learning_rate": 5.527005511328842e-06, "log_odds_chosen": 1.6402838230133057, "log_odds_ratio": -0.4288111925125122, "logits/chosen": -0.8696236610412598, "logits/rejected": -0.9973232746124268, "logps/chosen": -0.8576831817626953, "logps/rejected": -2.217149019241333, "loss": 1.1064, "nll_loss": 1.1558398008346558, "rewards/accuracies": 0.875, "rewards/chosen": -0.08576831221580505, "rewards/margins": 0.1359466016292572, "rewards/rejected": -0.22171492874622345, "step": 2554 }, { "epoch": 1.5586396217782523, "grad_norm": 7.246178150177002, "learning_rate": 5.526025719534599e-06, "log_odds_chosen": 2.238929510116577, "log_odds_ratio": -0.4765285849571228, "logits/chosen": -0.8842220902442932, "logits/rejected": -0.9253867268562317, "logps/chosen": -0.8598819971084595, "logps/rejected": -2.789003610610962, "loss": 1.02, "nll_loss": 1.1941807270050049, "rewards/accuracies": 0.75, "rewards/chosen": -0.08598820120096207, "rewards/margins": 0.19291216135025024, "rewards/rejected": -0.2789003551006317, "step": 2555 }, { "epoch": 1.5592496568552692, "grad_norm": 6.595717430114746, "learning_rate": 5.525045927740355e-06, "log_odds_chosen": 1.4129031896591187, "log_odds_ratio": -0.4676171839237213, "logits/chosen": -0.8892577290534973, "logits/rejected": -0.8623193502426147, "logps/chosen": -0.7562360763549805, "logps/rejected": -1.8283164501190186, "loss": 1.0821, "nll_loss": 0.8370970487594604, "rewards/accuracies": 0.75, "rewards/chosen": -0.07562360912561417, "rewards/margins": 0.10720804333686829, "rewards/rejected": -0.18283164501190186, "step": 2556 }, { "epoch": 1.559859691932286, "grad_norm": 2.1830408573150635, "learning_rate": 5.524066135946111e-06, "log_odds_chosen": 1.9757132530212402, "log_odds_ratio": -0.2883279621601105, "logits/chosen": -0.8008719086647034, "logits/rejected": -0.834000289440155, "logps/chosen": -0.6066874265670776, "logps/rejected": -1.9641838073730469, "loss": 1.2069, "nll_loss": 1.072137475013733, "rewards/accuracies": 0.75, "rewards/chosen": -0.06066875159740448, "rewards/margins": 0.13574963808059692, "rewards/rejected": -0.1964183747768402, "step": 2557 }, { "epoch": 1.560469727009303, "grad_norm": 1.0344172716140747, "learning_rate": 5.523086344151868e-06, "log_odds_chosen": 2.058364152908325, "log_odds_ratio": -0.4044439494609833, "logits/chosen": -0.6158266067504883, "logits/rejected": -0.727750301361084, "logps/chosen": -0.7312882542610168, "logps/rejected": -2.2432961463928223, "loss": 1.1204, "nll_loss": 0.972801685333252, "rewards/accuracies": 0.625, "rewards/chosen": -0.0731288269162178, "rewards/margins": 0.1512007713317871, "rewards/rejected": -0.22432959079742432, "step": 2558 }, { "epoch": 1.56107976208632, "grad_norm": 2.5397186279296875, "learning_rate": 5.522106552357623e-06, "log_odds_chosen": 2.097641944885254, "log_odds_ratio": -0.37941545248031616, "logits/chosen": -0.48673200607299805, "logits/rejected": -0.6822521686553955, "logps/chosen": -0.5891162157058716, "logps/rejected": -2.1625490188598633, "loss": 0.8236, "nll_loss": 0.6358466148376465, "rewards/accuracies": 0.75, "rewards/chosen": -0.05891162529587746, "rewards/margins": 0.1573432981967926, "rewards/rejected": -0.21625491976737976, "step": 2559 }, { "epoch": 1.561689797163337, "grad_norm": 1.860187292098999, "learning_rate": 5.5211267605633794e-06, "log_odds_chosen": 1.5113025903701782, "log_odds_ratio": -0.36385655403137207, "logits/chosen": -0.7290829420089722, "logits/rejected": -0.8477517366409302, "logps/chosen": -0.7481787204742432, "logps/rejected": -1.7638514041900635, "loss": 1.077, "nll_loss": 0.9831631183624268, "rewards/accuracies": 0.75, "rewards/chosen": -0.07481786608695984, "rewards/margins": 0.10156727582216263, "rewards/rejected": -0.17638514935970306, "step": 2560 }, { "epoch": 1.5622998322403538, "grad_norm": 2.3019044399261475, "learning_rate": 5.520146968769136e-06, "log_odds_chosen": 2.1693649291992188, "log_odds_ratio": -0.3102201223373413, "logits/chosen": -0.7105373740196228, "logits/rejected": -0.8414967060089111, "logps/chosen": -0.5525964498519897, "logps/rejected": -2.0174758434295654, "loss": 0.9885, "nll_loss": 0.8283458352088928, "rewards/accuracies": 0.75, "rewards/chosen": -0.055259644985198975, "rewards/margins": 0.14648793637752533, "rewards/rejected": -0.2017475813627243, "step": 2561 }, { "epoch": 1.5629098673173707, "grad_norm": 3.294320821762085, "learning_rate": 5.5191671769748925e-06, "log_odds_chosen": 1.43846595287323, "log_odds_ratio": -0.4856198728084564, "logits/chosen": -0.87413489818573, "logits/rejected": -0.8120351433753967, "logps/chosen": -1.0996688604354858, "logps/rejected": -2.244779586791992, "loss": 1.1587, "nll_loss": 1.2482508420944214, "rewards/accuracies": 0.625, "rewards/chosen": -0.10996688902378082, "rewards/margins": 0.11451109498739243, "rewards/rejected": -0.22447799146175385, "step": 2562 }, { "epoch": 1.5635199023943875, "grad_norm": 1.0880471467971802, "learning_rate": 5.518187385180649e-06, "log_odds_chosen": 1.134243369102478, "log_odds_ratio": -0.5314016938209534, "logits/chosen": -0.8219916820526123, "logits/rejected": -0.6661800742149353, "logps/chosen": -0.6466844081878662, "logps/rejected": -1.494692087173462, "loss": 1.0011, "nll_loss": 0.8446565270423889, "rewards/accuracies": 0.75, "rewards/chosen": -0.0646684467792511, "rewards/margins": 0.08480077981948853, "rewards/rejected": -0.14946922659873962, "step": 2563 }, { "epoch": 1.5641299374714046, "grad_norm": 1.1135368347167969, "learning_rate": 5.517207593386406e-06, "log_odds_chosen": 1.7500355243682861, "log_odds_ratio": -0.4531891345977783, "logits/chosen": -0.9277398586273193, "logits/rejected": -1.0712039470672607, "logps/chosen": -0.909034013748169, "logps/rejected": -2.293046474456787, "loss": 0.9976, "nll_loss": 1.0652668476104736, "rewards/accuracies": 0.75, "rewards/chosen": -0.09090340882539749, "rewards/margins": 0.13840122520923615, "rewards/rejected": -0.22930464148521423, "step": 2564 }, { "epoch": 1.5647399725484217, "grad_norm": 7.365447998046875, "learning_rate": 5.516227801592161e-06, "log_odds_chosen": 2.0378973484039307, "log_odds_ratio": -0.42810195684432983, "logits/chosen": -0.9139009714126587, "logits/rejected": -0.8903024792671204, "logps/chosen": -0.8351738452911377, "logps/rejected": -2.4353551864624023, "loss": 1.0155, "nll_loss": 0.99949711561203, "rewards/accuracies": 0.875, "rewards/chosen": -0.08351738005876541, "rewards/margins": 0.16001813113689423, "rewards/rejected": -0.24353548884391785, "step": 2565 }, { "epoch": 1.5653500076254385, "grad_norm": 1.4118622541427612, "learning_rate": 5.515248009797918e-06, "log_odds_chosen": 1.049253225326538, "log_odds_ratio": -0.558630108833313, "logits/chosen": -0.6684341430664062, "logits/rejected": -0.7663778066635132, "logps/chosen": -0.7341742515563965, "logps/rejected": -1.3904277086257935, "loss": 0.9798, "nll_loss": 0.7836344242095947, "rewards/accuracies": 0.625, "rewards/chosen": -0.07341743260622025, "rewards/margins": 0.06562533974647522, "rewards/rejected": -0.13904276490211487, "step": 2566 }, { "epoch": 1.5659600427024554, "grad_norm": 1.7450803518295288, "learning_rate": 5.514268218003674e-06, "log_odds_chosen": 2.0920915603637695, "log_odds_ratio": -0.4146295487880707, "logits/chosen": -0.7190822958946228, "logits/rejected": -0.8584149479866028, "logps/chosen": -0.7382564544677734, "logps/rejected": -2.218726396560669, "loss": 1.072, "nll_loss": 0.8154364824295044, "rewards/accuracies": 0.75, "rewards/chosen": -0.0738256424665451, "rewards/margins": 0.14804697036743164, "rewards/rejected": -0.22187262773513794, "step": 2567 }, { "epoch": 1.5665700777794722, "grad_norm": 3.109867572784424, "learning_rate": 5.51328842620943e-06, "log_odds_chosen": 0.4329894781112671, "log_odds_ratio": -0.5477924346923828, "logits/chosen": -0.8770626783370972, "logits/rejected": -0.6765099763870239, "logps/chosen": -0.8409799337387085, "logps/rejected": -1.0705900192260742, "loss": 1.0179, "nll_loss": 1.1069070100784302, "rewards/accuracies": 0.625, "rewards/chosen": -0.08409799635410309, "rewards/margins": 0.022961009293794632, "rewards/rejected": -0.10705900192260742, "step": 2568 }, { "epoch": 1.5671801128564893, "grad_norm": 2.0400397777557373, "learning_rate": 5.512308634415187e-06, "log_odds_chosen": 2.8952481746673584, "log_odds_ratio": -0.3043539524078369, "logits/chosen": -0.61414635181427, "logits/rejected": -0.8843346834182739, "logps/chosen": -0.6297029852867126, "logps/rejected": -2.7999563217163086, "loss": 1.1243, "nll_loss": 0.8918410539627075, "rewards/accuracies": 0.875, "rewards/chosen": -0.06297029554843903, "rewards/margins": 0.21702532470226288, "rewards/rejected": -0.2799956202507019, "step": 2569 }, { "epoch": 1.5677901479335061, "grad_norm": 2.682852029800415, "learning_rate": 5.511328842620943e-06, "log_odds_chosen": 0.667709469795227, "log_odds_ratio": -0.5918682813644409, "logits/chosen": -1.1111937761306763, "logits/rejected": -0.8740152716636658, "logps/chosen": -1.0168299674987793, "logps/rejected": -1.5505571365356445, "loss": 1.1535, "nll_loss": 1.0740911960601807, "rewards/accuracies": 0.5, "rewards/chosen": -0.10168299823999405, "rewards/margins": 0.053372710943222046, "rewards/rejected": -0.1550557166337967, "step": 2570 }, { "epoch": 1.5684001830105232, "grad_norm": 2.6252200603485107, "learning_rate": 5.5103490508266985e-06, "log_odds_chosen": 1.9238498210906982, "log_odds_ratio": -0.4662717580795288, "logits/chosen": -0.6646947860717773, "logits/rejected": -0.7251682877540588, "logps/chosen": -0.6675700545310974, "logps/rejected": -1.970879077911377, "loss": 1.0374, "nll_loss": 0.8565446734428406, "rewards/accuracies": 0.625, "rewards/chosen": -0.06675700843334198, "rewards/margins": 0.130330890417099, "rewards/rejected": -0.19708789885044098, "step": 2571 }, { "epoch": 1.56901021808754, "grad_norm": 1.1340456008911133, "learning_rate": 5.5093692590324555e-06, "log_odds_chosen": 0.8802831768989563, "log_odds_ratio": -0.5548490285873413, "logits/chosen": -0.9219682216644287, "logits/rejected": -0.8752164244651794, "logps/chosen": -0.9154893159866333, "logps/rejected": -1.5811617374420166, "loss": 1.0824, "nll_loss": 1.1564887762069702, "rewards/accuracies": 0.75, "rewards/chosen": -0.09154893457889557, "rewards/margins": 0.06656724214553833, "rewards/rejected": -0.1581161767244339, "step": 2572 }, { "epoch": 1.5696202531645569, "grad_norm": 2.9483604431152344, "learning_rate": 5.508389467238212e-06, "log_odds_chosen": 1.1128532886505127, "log_odds_ratio": -0.6043262481689453, "logits/chosen": -0.9314286708831787, "logits/rejected": -0.7561902403831482, "logps/chosen": -0.8013826608657837, "logps/rejected": -1.678429126739502, "loss": 1.0112, "nll_loss": 0.986329972743988, "rewards/accuracies": 0.625, "rewards/chosen": -0.08013826608657837, "rewards/margins": 0.08770464360713959, "rewards/rejected": -0.16784292459487915, "step": 2573 }, { "epoch": 1.5702302882415737, "grad_norm": 1.5609976053237915, "learning_rate": 5.507409675443968e-06, "log_odds_chosen": 1.6916582584381104, "log_odds_ratio": -0.3993774950504303, "logits/chosen": -0.5716679096221924, "logits/rejected": -0.6723162531852722, "logps/chosen": -0.8189375996589661, "logps/rejected": -1.9196438789367676, "loss": 0.9747, "nll_loss": 0.9253197908401489, "rewards/accuracies": 0.75, "rewards/chosen": -0.08189376443624496, "rewards/margins": 0.1100706234574318, "rewards/rejected": -0.19196438789367676, "step": 2574 }, { "epoch": 1.5708403233185908, "grad_norm": 2.5794224739074707, "learning_rate": 5.506429883649725e-06, "log_odds_chosen": 2.1820340156555176, "log_odds_ratio": -0.44838008284568787, "logits/chosen": -0.4442605972290039, "logits/rejected": -0.5910895466804504, "logps/chosen": -0.6562374830245972, "logps/rejected": -2.3851351737976074, "loss": 0.9779, "nll_loss": 0.7855462431907654, "rewards/accuracies": 0.75, "rewards/chosen": -0.06562374532222748, "rewards/margins": 0.17288976907730103, "rewards/rejected": -0.2385135143995285, "step": 2575 }, { "epoch": 1.5714503583956079, "grad_norm": 1.2104318141937256, "learning_rate": 5.50545009185548e-06, "log_odds_chosen": 1.6800706386566162, "log_odds_ratio": -0.21371304988861084, "logits/chosen": -0.5524154901504517, "logits/rejected": -0.7226391434669495, "logps/chosen": -0.52627032995224, "logps/rejected": -1.4238237142562866, "loss": 1.0597, "nll_loss": 0.9811379909515381, "rewards/accuracies": 1.0, "rewards/chosen": -0.05262703448534012, "rewards/margins": 0.0897553488612175, "rewards/rejected": -0.14238238334655762, "step": 2576 }, { "epoch": 1.5720603934726247, "grad_norm": 10.858196258544922, "learning_rate": 5.504470300061236e-06, "log_odds_chosen": 1.2413268089294434, "log_odds_ratio": -0.422580361366272, "logits/chosen": -1.0150063037872314, "logits/rejected": -1.045499324798584, "logps/chosen": -0.8437800407409668, "logps/rejected": -1.7745378017425537, "loss": 1.2228, "nll_loss": 1.107378363609314, "rewards/accuracies": 0.875, "rewards/chosen": -0.08437800407409668, "rewards/margins": 0.09307578951120377, "rewards/rejected": -0.17745378613471985, "step": 2577 }, { "epoch": 1.5726704285496416, "grad_norm": 3.637267589569092, "learning_rate": 5.503490508266993e-06, "log_odds_chosen": 1.0886900424957275, "log_odds_ratio": -0.6371052265167236, "logits/chosen": -0.7703521251678467, "logits/rejected": -0.9609028100967407, "logps/chosen": -0.9396564960479736, "logps/rejected": -1.7965998649597168, "loss": 0.9905, "nll_loss": 1.3084373474121094, "rewards/accuracies": 0.5, "rewards/chosen": -0.09396565705537796, "rewards/margins": 0.0856943279504776, "rewards/rejected": -0.17965996265411377, "step": 2578 }, { "epoch": 1.5732804636266584, "grad_norm": 1.6662194728851318, "learning_rate": 5.502510716472749e-06, "log_odds_chosen": 0.9467204213142395, "log_odds_ratio": -0.46406319737434387, "logits/chosen": -0.9650887846946716, "logits/rejected": -0.9835374355316162, "logps/chosen": -0.8650475740432739, "logps/rejected": -1.5583138465881348, "loss": 1.07, "nll_loss": 1.093822717666626, "rewards/accuracies": 0.75, "rewards/chosen": -0.08650475740432739, "rewards/margins": 0.06932663917541504, "rewards/rejected": -0.15583139657974243, "step": 2579 }, { "epoch": 1.5738904987036755, "grad_norm": 1.3166133165359497, "learning_rate": 5.501530924678506e-06, "log_odds_chosen": 1.3732643127441406, "log_odds_ratio": -0.4761536121368408, "logits/chosen": -0.9443543553352356, "logits/rejected": -0.8407355546951294, "logps/chosen": -1.1139041185379028, "logps/rejected": -2.3391356468200684, "loss": 1.2931, "nll_loss": 1.2641854286193848, "rewards/accuracies": 0.625, "rewards/chosen": -0.11139041185379028, "rewards/margins": 0.12252315878868103, "rewards/rejected": -0.2339135706424713, "step": 2580 }, { "epoch": 1.5745005337806925, "grad_norm": 1.4193365573883057, "learning_rate": 5.500551132884262e-06, "log_odds_chosen": 0.7771318554878235, "log_odds_ratio": -0.4882272779941559, "logits/chosen": -0.8326510787010193, "logits/rejected": -0.7547774910926819, "logps/chosen": -0.9714791178703308, "logps/rejected": -1.5215084552764893, "loss": 1.1943, "nll_loss": 1.1078413724899292, "rewards/accuracies": 0.75, "rewards/chosen": -0.09714790433645248, "rewards/margins": 0.055002935230731964, "rewards/rejected": -0.15215083956718445, "step": 2581 }, { "epoch": 1.5751105688577094, "grad_norm": 7.796485900878906, "learning_rate": 5.4995713410900175e-06, "log_odds_chosen": 1.1686910390853882, "log_odds_ratio": -0.5497726202011108, "logits/chosen": -0.8030738830566406, "logits/rejected": -0.8935097455978394, "logps/chosen": -0.8248045444488525, "logps/rejected": -1.7165565490722656, "loss": 0.9893, "nll_loss": 1.067542552947998, "rewards/accuracies": 0.625, "rewards/chosen": -0.08248044550418854, "rewards/margins": 0.08917520195245743, "rewards/rejected": -0.17165565490722656, "step": 2582 }, { "epoch": 1.5757206039347262, "grad_norm": 1.8754056692123413, "learning_rate": 5.4985915492957745e-06, "log_odds_chosen": 2.225811004638672, "log_odds_ratio": -0.24037136137485504, "logits/chosen": -0.7905761003494263, "logits/rejected": -0.8481170535087585, "logps/chosen": -0.8317685127258301, "logps/rejected": -2.311455726623535, "loss": 1.1081, "nll_loss": 1.0822877883911133, "rewards/accuracies": 1.0, "rewards/chosen": -0.0831768587231636, "rewards/margins": 0.14796873927116394, "rewards/rejected": -0.23114559054374695, "step": 2583 }, { "epoch": 1.576330639011743, "grad_norm": 2.473252296447754, "learning_rate": 5.497611757501531e-06, "log_odds_chosen": 0.6664921641349792, "log_odds_ratio": -0.6144312620162964, "logits/chosen": -0.9441173672676086, "logits/rejected": -0.8921613097190857, "logps/chosen": -1.0458134412765503, "logps/rejected": -1.6306995153427124, "loss": 1.1357, "nll_loss": 1.145222783088684, "rewards/accuracies": 0.375, "rewards/chosen": -0.10458134859800339, "rewards/margins": 0.05848860740661621, "rewards/rejected": -0.1630699634552002, "step": 2584 }, { "epoch": 1.5769406740887602, "grad_norm": 1.441246509552002, "learning_rate": 5.496631965707287e-06, "log_odds_chosen": 0.6668229699134827, "log_odds_ratio": -0.4974778890609741, "logits/chosen": -0.9568098187446594, "logits/rejected": -0.8329535722732544, "logps/chosen": -0.9439405798912048, "logps/rejected": -1.4457110166549683, "loss": 1.1185, "nll_loss": 1.3078668117523193, "rewards/accuracies": 0.75, "rewards/chosen": -0.09439405053853989, "rewards/margins": 0.05017705261707306, "rewards/rejected": -0.14457111060619354, "step": 2585 }, { "epoch": 1.577550709165777, "grad_norm": 2.240116596221924, "learning_rate": 5.495652173913044e-06, "log_odds_chosen": 0.8523387908935547, "log_odds_ratio": -0.6244750022888184, "logits/chosen": -1.037717580795288, "logits/rejected": -0.99564528465271, "logps/chosen": -0.9540178775787354, "logps/rejected": -1.581963300704956, "loss": 1.0944, "nll_loss": 1.1222689151763916, "rewards/accuracies": 0.5, "rewards/chosen": -0.09540179371833801, "rewards/margins": 0.06279454380273819, "rewards/rejected": -0.1581963300704956, "step": 2586 }, { "epoch": 1.578160744242794, "grad_norm": 1.419223666191101, "learning_rate": 5.4946723821188e-06, "log_odds_chosen": 0.43263357877731323, "log_odds_ratio": -0.9263094663619995, "logits/chosen": -0.7900945544242859, "logits/rejected": -0.7773907780647278, "logps/chosen": -1.1987075805664062, "logps/rejected": -1.4912757873535156, "loss": 1.2656, "nll_loss": 1.504512906074524, "rewards/accuracies": 0.5, "rewards/chosen": -0.11987075209617615, "rewards/margins": 0.029256828129291534, "rewards/rejected": -0.14912758767604828, "step": 2587 }, { "epoch": 1.578770779319811, "grad_norm": 1.7334259748458862, "learning_rate": 5.493692590324555e-06, "log_odds_chosen": 2.240302562713623, "log_odds_ratio": -0.4502025842666626, "logits/chosen": -0.8400090932846069, "logits/rejected": -0.8571999073028564, "logps/chosen": -0.7543245553970337, "logps/rejected": -2.3780269622802734, "loss": 1.2384, "nll_loss": 1.0924155712127686, "rewards/accuracies": 0.75, "rewards/chosen": -0.07543245702981949, "rewards/margins": 0.1623702496290207, "rewards/rejected": -0.23780269920825958, "step": 2588 }, { "epoch": 1.5793808143968278, "grad_norm": 3.8018970489501953, "learning_rate": 5.492712798530312e-06, "log_odds_chosen": 0.863866925239563, "log_odds_ratio": -0.5423746109008789, "logits/chosen": -0.9701972603797913, "logits/rejected": -0.975214958190918, "logps/chosen": -0.6742736101150513, "logps/rejected": -1.3568862676620483, "loss": 1.076, "nll_loss": 0.9876387715339661, "rewards/accuracies": 0.625, "rewards/chosen": -0.0674273669719696, "rewards/margins": 0.0682612732052803, "rewards/rejected": -0.1356886327266693, "step": 2589 }, { "epoch": 1.5799908494738446, "grad_norm": 1.419095516204834, "learning_rate": 5.491733006736068e-06, "log_odds_chosen": 1.55169677734375, "log_odds_ratio": -0.49286943674087524, "logits/chosen": -0.8843321800231934, "logits/rejected": -0.9977410435676575, "logps/chosen": -0.9026440382003784, "logps/rejected": -2.021329641342163, "loss": 1.0812, "nll_loss": 1.030110239982605, "rewards/accuracies": 0.75, "rewards/chosen": -0.09026440978050232, "rewards/margins": 0.11186856776475906, "rewards/rejected": -0.20213298499584198, "step": 2590 }, { "epoch": 1.5806008845508617, "grad_norm": 1.2123303413391113, "learning_rate": 5.490753214941825e-06, "log_odds_chosen": 2.381711959838867, "log_odds_ratio": -0.23400650918483734, "logits/chosen": -0.876802921295166, "logits/rejected": -0.9270589351654053, "logps/chosen": -0.6067606806755066, "logps/rejected": -2.4126267433166504, "loss": 1.0138, "nll_loss": 0.9259790778160095, "rewards/accuracies": 1.0, "rewards/chosen": -0.06067606806755066, "rewards/margins": 0.18058662116527557, "rewards/rejected": -0.24126268923282623, "step": 2591 }, { "epoch": 1.5812109196278787, "grad_norm": 1.6809529066085815, "learning_rate": 5.489773423147581e-06, "log_odds_chosen": 1.8327560424804688, "log_odds_ratio": -0.30475425720214844, "logits/chosen": -0.8245010375976562, "logits/rejected": -0.7840898633003235, "logps/chosen": -0.6578329801559448, "logps/rejected": -1.9640766382217407, "loss": 0.8259, "nll_loss": 0.8341517448425293, "rewards/accuracies": 0.875, "rewards/chosen": -0.0657832995057106, "rewards/margins": 0.13062436878681183, "rewards/rejected": -0.19640766084194183, "step": 2592 }, { "epoch": 1.5818209547048956, "grad_norm": 7.535183429718018, "learning_rate": 5.4887936313533366e-06, "log_odds_chosen": 1.1619709730148315, "log_odds_ratio": -0.5782104730606079, "logits/chosen": -0.7407215237617493, "logits/rejected": -0.8694180846214294, "logps/chosen": -0.7732101678848267, "logps/rejected": -1.6339699029922485, "loss": 1.0601, "nll_loss": 0.913931131362915, "rewards/accuracies": 0.5, "rewards/chosen": -0.07732102274894714, "rewards/margins": 0.08607596904039383, "rewards/rejected": -0.16339698433876038, "step": 2593 }, { "epoch": 1.5824309897819124, "grad_norm": 8.890690803527832, "learning_rate": 5.4878138395590935e-06, "log_odds_chosen": 1.6474542617797852, "log_odds_ratio": -0.44251909852027893, "logits/chosen": -0.8322910666465759, "logits/rejected": -0.8892808556556702, "logps/chosen": -0.8861182332038879, "logps/rejected": -2.0181965827941895, "loss": 1.3311, "nll_loss": 1.351182460784912, "rewards/accuracies": 0.75, "rewards/chosen": -0.08861182630062103, "rewards/margins": 0.11320783197879791, "rewards/rejected": -0.20181965827941895, "step": 2594 }, { "epoch": 1.5830410248589293, "grad_norm": 1.1356667280197144, "learning_rate": 5.48683404776485e-06, "log_odds_chosen": 1.2002440690994263, "log_odds_ratio": -0.51466965675354, "logits/chosen": -0.7209022045135498, "logits/rejected": -0.7554358243942261, "logps/chosen": -0.8227611780166626, "logps/rejected": -1.5010892152786255, "loss": 1.2325, "nll_loss": 1.1699419021606445, "rewards/accuracies": 0.75, "rewards/chosen": -0.0822761207818985, "rewards/margins": 0.0678328201174736, "rewards/rejected": -0.1501089334487915, "step": 2595 }, { "epoch": 1.5836510599359463, "grad_norm": 1.5641456842422485, "learning_rate": 5.485854255970606e-06, "log_odds_chosen": 0.9264963865280151, "log_odds_ratio": -0.5815582871437073, "logits/chosen": -0.7434080839157104, "logits/rejected": -0.7721868753433228, "logps/chosen": -0.825089156627655, "logps/rejected": -1.55488920211792, "loss": 1.0721, "nll_loss": 1.1814358234405518, "rewards/accuracies": 0.5, "rewards/chosen": -0.08250892162322998, "rewards/margins": 0.07297998666763306, "rewards/rejected": -0.15548890829086304, "step": 2596 }, { "epoch": 1.5842610950129632, "grad_norm": 2.360501289367676, "learning_rate": 5.484874464176363e-06, "log_odds_chosen": 1.3328993320465088, "log_odds_ratio": -0.5101656317710876, "logits/chosen": -0.8818448781967163, "logits/rejected": -0.9077179431915283, "logps/chosen": -0.9034380912780762, "logps/rejected": -2.0035321712493896, "loss": 1.0746, "nll_loss": 0.9688497185707092, "rewards/accuracies": 0.875, "rewards/chosen": -0.09034381061792374, "rewards/margins": 0.11000940948724747, "rewards/rejected": -0.2003532201051712, "step": 2597 }, { "epoch": 1.5848711300899803, "grad_norm": 3.713366985321045, "learning_rate": 5.483894672382119e-06, "log_odds_chosen": 0.9789985418319702, "log_odds_ratio": -0.599122166633606, "logits/chosen": -0.8544267416000366, "logits/rejected": -0.9059382081031799, "logps/chosen": -0.8537832498550415, "logps/rejected": -1.5209283828735352, "loss": 1.1662, "nll_loss": 1.1567771434783936, "rewards/accuracies": 0.5, "rewards/chosen": -0.08537833392620087, "rewards/margins": 0.06671452522277832, "rewards/rejected": -0.152092844247818, "step": 2598 }, { "epoch": 1.585481165166997, "grad_norm": 1.7693957090377808, "learning_rate": 5.482914880587874e-06, "log_odds_chosen": 2.9425485134124756, "log_odds_ratio": -0.33499252796173096, "logits/chosen": -0.8623477220535278, "logits/rejected": -0.9062983393669128, "logps/chosen": -0.5971286296844482, "logps/rejected": -2.8270680904388428, "loss": 1.075, "nll_loss": 0.937977135181427, "rewards/accuracies": 0.75, "rewards/chosen": -0.059712864458560944, "rewards/margins": 0.22299395501613617, "rewards/rejected": -0.2827067971229553, "step": 2599 }, { "epoch": 1.586091200244014, "grad_norm": 1.9012969732284546, "learning_rate": 5.481935088793631e-06, "log_odds_chosen": 1.6997946500778198, "log_odds_ratio": -0.26885759830474854, "logits/chosen": -0.8540576696395874, "logits/rejected": -0.8175234198570251, "logps/chosen": -0.9117425680160522, "logps/rejected": -2.217479944229126, "loss": 1.1512, "nll_loss": 1.0472358465194702, "rewards/accuracies": 1.0, "rewards/chosen": -0.09117425978183746, "rewards/margins": 0.13057374954223633, "rewards/rejected": -0.2217479944229126, "step": 2600 }, { "epoch": 1.5867012353210308, "grad_norm": 2.1383039951324463, "learning_rate": 5.480955296999387e-06, "log_odds_chosen": 1.9673857688903809, "log_odds_ratio": -0.457133024930954, "logits/chosen": -0.8230267763137817, "logits/rejected": -0.7659047245979309, "logps/chosen": -0.7006456851959229, "logps/rejected": -2.170116424560547, "loss": 1.0948, "nll_loss": 0.8773524761199951, "rewards/accuracies": 0.625, "rewards/chosen": -0.07006456702947617, "rewards/margins": 0.14694708585739136, "rewards/rejected": -0.21701166033744812, "step": 2601 }, { "epoch": 1.5873112703980479, "grad_norm": 1.6780409812927246, "learning_rate": 5.479975505205143e-06, "log_odds_chosen": 0.3476588726043701, "log_odds_ratio": -0.6332027316093445, "logits/chosen": -0.8132901191711426, "logits/rejected": -0.8044226169586182, "logps/chosen": -0.9209284782409668, "logps/rejected": -1.226635456085205, "loss": 1.1611, "nll_loss": 1.1833170652389526, "rewards/accuracies": 0.625, "rewards/chosen": -0.0920928493142128, "rewards/margins": 0.03057069703936577, "rewards/rejected": -0.12266354262828827, "step": 2602 }, { "epoch": 1.587921305475065, "grad_norm": 1.848217248916626, "learning_rate": 5.4789957134109e-06, "log_odds_chosen": 0.7699001431465149, "log_odds_ratio": -0.5830568671226501, "logits/chosen": -0.8180171847343445, "logits/rejected": -0.8181729316711426, "logps/chosen": -1.0337603092193604, "logps/rejected": -1.6899828910827637, "loss": 0.978, "nll_loss": 1.1621320247650146, "rewards/accuracies": 0.625, "rewards/chosen": -0.10337603092193604, "rewards/margins": 0.06562226265668869, "rewards/rejected": -0.16899828612804413, "step": 2603 }, { "epoch": 1.5885313405520818, "grad_norm": 1.564782977104187, "learning_rate": 5.4780159216166565e-06, "log_odds_chosen": 1.4521710872650146, "log_odds_ratio": -0.5754609704017639, "logits/chosen": -0.7509757280349731, "logits/rejected": -0.8942426443099976, "logps/chosen": -0.8462483882904053, "logps/rejected": -1.9991090297698975, "loss": 1.0356, "nll_loss": 1.072988510131836, "rewards/accuracies": 0.5, "rewards/chosen": -0.08462484180927277, "rewards/margins": 0.11528605222702026, "rewards/rejected": -0.19991090893745422, "step": 2604 }, { "epoch": 1.5891413756290986, "grad_norm": 1.4379141330718994, "learning_rate": 5.477036129822412e-06, "log_odds_chosen": 0.25820133090019226, "log_odds_ratio": -0.8401973247528076, "logits/chosen": -1.0258780717849731, "logits/rejected": -1.0347096920013428, "logps/chosen": -1.2527774572372437, "logps/rejected": -1.5256662368774414, "loss": 1.1584, "nll_loss": 1.3228040933609009, "rewards/accuracies": 0.5, "rewards/chosen": -0.12527775764465332, "rewards/margins": 0.02728888764977455, "rewards/rejected": -0.15256664156913757, "step": 2605 }, { "epoch": 1.5897514107061155, "grad_norm": 2.8488006591796875, "learning_rate": 5.476056338028169e-06, "log_odds_chosen": 2.3205976486206055, "log_odds_ratio": -0.2303972840309143, "logits/chosen": -0.6366963386535645, "logits/rejected": -0.8613557815551758, "logps/chosen": -0.6878460645675659, "logps/rejected": -2.144184112548828, "loss": 1.1468, "nll_loss": 0.9633470773696899, "rewards/accuracies": 0.875, "rewards/chosen": -0.06878460943698883, "rewards/margins": 0.14563380181789398, "rewards/rejected": -0.2144184112548828, "step": 2606 }, { "epoch": 1.5903614457831325, "grad_norm": 2.0561881065368652, "learning_rate": 5.475076546233925e-06, "log_odds_chosen": 1.3079198598861694, "log_odds_ratio": -0.36481207609176636, "logits/chosen": -0.7483435869216919, "logits/rejected": -0.9517335295677185, "logps/chosen": -0.8583503365516663, "logps/rejected": -1.818183183670044, "loss": 1.0938, "nll_loss": 0.9118853807449341, "rewards/accuracies": 1.0, "rewards/chosen": -0.0858350470662117, "rewards/margins": 0.09598328173160553, "rewards/rejected": -0.18181830644607544, "step": 2607 }, { "epoch": 1.5909714808601496, "grad_norm": 1.6135681867599487, "learning_rate": 5.474096754439682e-06, "log_odds_chosen": 1.6909058094024658, "log_odds_ratio": -0.4477522373199463, "logits/chosen": -0.9664735794067383, "logits/rejected": -0.94975346326828, "logps/chosen": -0.82813560962677, "logps/rejected": -2.0766327381134033, "loss": 1.1173, "nll_loss": 1.0715949535369873, "rewards/accuracies": 0.625, "rewards/chosen": -0.082813560962677, "rewards/margins": 0.12484971433877945, "rewards/rejected": -0.20766326785087585, "step": 2608 }, { "epoch": 1.5915815159371665, "grad_norm": 1.3805915117263794, "learning_rate": 5.473116962645438e-06, "log_odds_chosen": 0.9851019382476807, "log_odds_ratio": -0.3854188024997711, "logits/chosen": -0.9413793087005615, "logits/rejected": -0.9066373109817505, "logps/chosen": -0.8893339037895203, "logps/rejected": -1.5274784564971924, "loss": 1.1457, "nll_loss": 1.1980053186416626, "rewards/accuracies": 0.75, "rewards/chosen": -0.08893338590860367, "rewards/margins": 0.06381445378065109, "rewards/rejected": -0.15274783968925476, "step": 2609 }, { "epoch": 1.5921915510141833, "grad_norm": 1.3338696956634521, "learning_rate": 5.472137170851194e-06, "log_odds_chosen": 2.361036777496338, "log_odds_ratio": -0.3541642725467682, "logits/chosen": -0.7919530868530273, "logits/rejected": -0.7390632033348083, "logps/chosen": -0.9919770359992981, "logps/rejected": -3.0048422813415527, "loss": 1.1366, "nll_loss": 1.0452700853347778, "rewards/accuracies": 0.75, "rewards/chosen": -0.09919770061969757, "rewards/margins": 0.20128653943538666, "rewards/rejected": -0.30048424005508423, "step": 2610 }, { "epoch": 1.5928015860912002, "grad_norm": 2.027378559112549, "learning_rate": 5.47115737905695e-06, "log_odds_chosen": 0.4432576298713684, "log_odds_ratio": -0.6818237900733948, "logits/chosen": -1.061099886894226, "logits/rejected": -1.0764002799987793, "logps/chosen": -0.8520164489746094, "logps/rejected": -1.2112925052642822, "loss": 1.2044, "nll_loss": 1.3956997394561768, "rewards/accuracies": 0.625, "rewards/chosen": -0.08520164340734482, "rewards/margins": 0.035927608609199524, "rewards/rejected": -0.12112925946712494, "step": 2611 }, { "epoch": 1.5934116211682172, "grad_norm": 3.5010385513305664, "learning_rate": 5.470177587262706e-06, "log_odds_chosen": 2.072025775909424, "log_odds_ratio": -0.3332066535949707, "logits/chosen": -0.701118528842926, "logits/rejected": -0.8015366792678833, "logps/chosen": -0.6771595478057861, "logps/rejected": -2.184096336364746, "loss": 1.1018, "nll_loss": 0.8347341418266296, "rewards/accuracies": 0.75, "rewards/chosen": -0.06771595776081085, "rewards/margins": 0.15069368481636047, "rewards/rejected": -0.21840962767601013, "step": 2612 }, { "epoch": 1.594021656245234, "grad_norm": 2.438830852508545, "learning_rate": 5.469197795468462e-06, "log_odds_chosen": 1.6624923944473267, "log_odds_ratio": -0.4396184980869293, "logits/chosen": -0.8753170967102051, "logits/rejected": -0.9971723556518555, "logps/chosen": -0.6593282222747803, "logps/rejected": -1.8552082777023315, "loss": 1.0868, "nll_loss": 0.9362027645111084, "rewards/accuracies": 0.75, "rewards/chosen": -0.06593281775712967, "rewards/margins": 0.11958802491426468, "rewards/rejected": -0.18552084267139435, "step": 2613 }, { "epoch": 1.5946316913222511, "grad_norm": 1.27996027469635, "learning_rate": 5.468218003674219e-06, "log_odds_chosen": 0.6120678186416626, "log_odds_ratio": -0.5548153519630432, "logits/chosen": -0.7589644193649292, "logits/rejected": -0.7556136250495911, "logps/chosen": -0.7472729086875916, "logps/rejected": -1.1290439367294312, "loss": 0.9913, "nll_loss": 0.9806371927261353, "rewards/accuracies": 0.5, "rewards/chosen": -0.07472728937864304, "rewards/margins": 0.03817710280418396, "rewards/rejected": -0.1129043847322464, "step": 2614 }, { "epoch": 1.595241726399268, "grad_norm": 3.3165628910064697, "learning_rate": 5.4672382118799755e-06, "log_odds_chosen": 0.37860584259033203, "log_odds_ratio": -0.5948148369789124, "logits/chosen": -0.8112127184867859, "logits/rejected": -0.7418652176856995, "logps/chosen": -0.9530637264251709, "logps/rejected": -1.210900902748108, "loss": 1.2222, "nll_loss": 1.3068360090255737, "rewards/accuracies": 0.625, "rewards/chosen": -0.09530637413263321, "rewards/margins": 0.02578371949493885, "rewards/rejected": -0.12109009176492691, "step": 2615 }, { "epoch": 1.5958517614762848, "grad_norm": 1.4643030166625977, "learning_rate": 5.466258420085731e-06, "log_odds_chosen": 1.3871979713439941, "log_odds_ratio": -0.5008931159973145, "logits/chosen": -1.015526294708252, "logits/rejected": -0.95301353931427, "logps/chosen": -0.8392719626426697, "logps/rejected": -1.9231324195861816, "loss": 1.0747, "nll_loss": 1.1843725442886353, "rewards/accuracies": 0.625, "rewards/chosen": -0.0839271992444992, "rewards/margins": 0.10838603228330612, "rewards/rejected": -0.19231322407722473, "step": 2616 }, { "epoch": 1.5964617965533017, "grad_norm": 1.6103819608688354, "learning_rate": 5.465278628291488e-06, "log_odds_chosen": 1.8749456405639648, "log_odds_ratio": -0.43172889947891235, "logits/chosen": -0.9492470622062683, "logits/rejected": -0.9398029446601868, "logps/chosen": -0.8321407437324524, "logps/rejected": -2.209883451461792, "loss": 1.3139, "nll_loss": 1.393563985824585, "rewards/accuracies": 0.75, "rewards/chosen": -0.08321408182382584, "rewards/margins": 0.1377742737531662, "rewards/rejected": -0.22098836302757263, "step": 2617 }, { "epoch": 1.5970718316303187, "grad_norm": 1.931990146636963, "learning_rate": 5.464298836497244e-06, "log_odds_chosen": 1.421033501625061, "log_odds_ratio": -0.46456146240234375, "logits/chosen": -0.8444852232933044, "logits/rejected": -0.632911205291748, "logps/chosen": -0.7147053480148315, "logps/rejected": -1.8599234819412231, "loss": 1.1489, "nll_loss": 1.0662977695465088, "rewards/accuracies": 0.75, "rewards/chosen": -0.07147054374217987, "rewards/margins": 0.1145218014717102, "rewards/rejected": -0.18599234521389008, "step": 2618 }, { "epoch": 1.5976818667073358, "grad_norm": 4.914978981018066, "learning_rate": 5.463319044703001e-06, "log_odds_chosen": 0.5318078398704529, "log_odds_ratio": -0.5586918592453003, "logits/chosen": -0.8476191163063049, "logits/rejected": -0.9036802053451538, "logps/chosen": -0.7849323749542236, "logps/rejected": -1.1193106174468994, "loss": 1.0438, "nll_loss": 0.9170666933059692, "rewards/accuracies": 0.75, "rewards/chosen": -0.07849323749542236, "rewards/margins": 0.0334378257393837, "rewards/rejected": -0.11193107068538666, "step": 2619 }, { "epoch": 1.5982919017843527, "grad_norm": 1.1942028999328613, "learning_rate": 5.462339252908757e-06, "log_odds_chosen": 0.8749598264694214, "log_odds_ratio": -0.7399158477783203, "logits/chosen": -0.9238083958625793, "logits/rejected": -1.0162484645843506, "logps/chosen": -0.9579551219940186, "logps/rejected": -1.6885063648223877, "loss": 1.0574, "nll_loss": 1.1105902194976807, "rewards/accuracies": 0.375, "rewards/chosen": -0.09579551964998245, "rewards/margins": 0.07305511832237244, "rewards/rejected": -0.1688506305217743, "step": 2620 }, { "epoch": 1.5989019368613695, "grad_norm": 1.7200804948806763, "learning_rate": 5.461359461114513e-06, "log_odds_chosen": 1.7857780456542969, "log_odds_ratio": -0.29743844270706177, "logits/chosen": -0.7819284200668335, "logits/rejected": -0.7765580415725708, "logps/chosen": -0.6365269422531128, "logps/rejected": -1.7170008420944214, "loss": 0.9564, "nll_loss": 0.9616667628288269, "rewards/accuracies": 0.875, "rewards/chosen": -0.06365270167589188, "rewards/margins": 0.10804738849401474, "rewards/rejected": -0.1717001050710678, "step": 2621 }, { "epoch": 1.5995119719383863, "grad_norm": 1.613180160522461, "learning_rate": 5.460379669320269e-06, "log_odds_chosen": 1.5721118450164795, "log_odds_ratio": -0.5168807506561279, "logits/chosen": -0.9598037600517273, "logits/rejected": -0.9414520263671875, "logps/chosen": -0.9326207041740417, "logps/rejected": -2.27883243560791, "loss": 1.2313, "nll_loss": 1.094595193862915, "rewards/accuracies": 0.375, "rewards/chosen": -0.09326206892728806, "rewards/margins": 0.13462115824222565, "rewards/rejected": -0.2278832346200943, "step": 2622 }, { "epoch": 1.6001220070154034, "grad_norm": 0.9021689891815186, "learning_rate": 5.459399877526025e-06, "log_odds_chosen": 0.9172555208206177, "log_odds_ratio": -0.6624044179916382, "logits/chosen": -1.102678656578064, "logits/rejected": -0.9869332313537598, "logps/chosen": -0.9581718444824219, "logps/rejected": -1.6554555892944336, "loss": 1.2802, "nll_loss": 1.509353756904602, "rewards/accuracies": 0.625, "rewards/chosen": -0.0958171933889389, "rewards/margins": 0.06972837448120117, "rewards/rejected": -0.16554556787014008, "step": 2623 }, { "epoch": 1.6007320420924203, "grad_norm": 1.969775915145874, "learning_rate": 5.4584200857317814e-06, "log_odds_chosen": 2.2259416580200195, "log_odds_ratio": -0.4812028110027313, "logits/chosen": -0.8501545190811157, "logits/rejected": -0.9783651232719421, "logps/chosen": -0.7839503288269043, "logps/rejected": -2.620736598968506, "loss": 1.0712, "nll_loss": 0.9678223133087158, "rewards/accuracies": 0.625, "rewards/chosen": -0.07839503884315491, "rewards/margins": 0.18367864191532135, "rewards/rejected": -0.26207366585731506, "step": 2624 }, { "epoch": 1.6013420771694373, "grad_norm": 2.0844831466674805, "learning_rate": 5.457440293937538e-06, "log_odds_chosen": 0.8208471536636353, "log_odds_ratio": -0.6913638710975647, "logits/chosen": -0.9732643365859985, "logits/rejected": -0.9375481605529785, "logps/chosen": -0.8492740392684937, "logps/rejected": -1.610071063041687, "loss": 1.0563, "nll_loss": 1.1613353490829468, "rewards/accuracies": 0.375, "rewards/chosen": -0.08492740988731384, "rewards/margins": 0.07607971131801605, "rewards/rejected": -0.1610071063041687, "step": 2625 }, { "epoch": 1.6019521122464542, "grad_norm": 1.695907711982727, "learning_rate": 5.4564605021432945e-06, "log_odds_chosen": 1.3891297578811646, "log_odds_ratio": -0.5332578420639038, "logits/chosen": -0.7435401678085327, "logits/rejected": -0.6701388359069824, "logps/chosen": -0.7186709046363831, "logps/rejected": -1.8326250314712524, "loss": 1.1057, "nll_loss": 0.8832957744598389, "rewards/accuracies": 0.75, "rewards/chosen": -0.07186709344387054, "rewards/margins": 0.11139541864395142, "rewards/rejected": -0.18326251208782196, "step": 2626 }, { "epoch": 1.602562147323471, "grad_norm": 2.696498394012451, "learning_rate": 5.455480710349051e-06, "log_odds_chosen": 0.39006662368774414, "log_odds_ratio": -0.585674524307251, "logits/chosen": -0.9908345341682434, "logits/rejected": -0.9723615646362305, "logps/chosen": -0.9778731465339661, "logps/rejected": -1.1843847036361694, "loss": 1.1901, "nll_loss": 1.3271591663360596, "rewards/accuracies": 0.625, "rewards/chosen": -0.09778732806444168, "rewards/margins": 0.020651141181588173, "rewards/rejected": -0.1184384673833847, "step": 2627 }, { "epoch": 1.6031721824004879, "grad_norm": 3.0594444274902344, "learning_rate": 5.454500918554807e-06, "log_odds_chosen": 2.8255183696746826, "log_odds_ratio": -0.22884945571422577, "logits/chosen": -0.8530361652374268, "logits/rejected": -0.8763452172279358, "logps/chosen": -0.8035987019538879, "logps/rejected": -3.0899837017059326, "loss": 1.1327, "nll_loss": 1.192514181137085, "rewards/accuracies": 1.0, "rewards/chosen": -0.08035986870527267, "rewards/margins": 0.22863851487636566, "rewards/rejected": -0.3089984059333801, "step": 2628 }, { "epoch": 1.603782217477505, "grad_norm": 4.124186038970947, "learning_rate": 5.453521126760563e-06, "log_odds_chosen": 0.40570011734962463, "log_odds_ratio": -0.6513538360595703, "logits/chosen": -0.7983009815216064, "logits/rejected": -0.5665170550346375, "logps/chosen": -0.8895670175552368, "logps/rejected": -1.1502466201782227, "loss": 1.0486, "nll_loss": 1.0789172649383545, "rewards/accuracies": 0.5, "rewards/chosen": -0.08895670622587204, "rewards/margins": 0.026067964732646942, "rewards/rejected": -0.11502467095851898, "step": 2629 }, { "epoch": 1.604392252554522, "grad_norm": 1.3633906841278076, "learning_rate": 5.452541334966319e-06, "log_odds_chosen": 0.44687917828559875, "log_odds_ratio": -0.5475033521652222, "logits/chosen": -0.8913214206695557, "logits/rejected": -0.9823046922683716, "logps/chosen": -0.8401917815208435, "logps/rejected": -1.14474356174469, "loss": 1.1184, "nll_loss": 1.0056883096694946, "rewards/accuracies": 0.875, "rewards/chosen": -0.08401917666196823, "rewards/margins": 0.030455179512500763, "rewards/rejected": -0.114474356174469, "step": 2630 }, { "epoch": 1.6050022876315388, "grad_norm": 1.025444507598877, "learning_rate": 5.451561543172076e-06, "log_odds_chosen": 2.5228254795074463, "log_odds_ratio": -0.3995872735977173, "logits/chosen": -0.6155611276626587, "logits/rejected": -0.7885991930961609, "logps/chosen": -0.5829458236694336, "logps/rejected": -2.686227798461914, "loss": 0.8968, "nll_loss": 0.750337541103363, "rewards/accuracies": 0.625, "rewards/chosen": -0.05829457938671112, "rewards/margins": 0.21032820641994476, "rewards/rejected": -0.2686227858066559, "step": 2631 }, { "epoch": 1.6056123227085557, "grad_norm": 1.777635931968689, "learning_rate": 5.450581751377832e-06, "log_odds_chosen": 1.4082995653152466, "log_odds_ratio": -0.4173334538936615, "logits/chosen": -0.9234285354614258, "logits/rejected": -0.9466781616210938, "logps/chosen": -0.7980602383613586, "logps/rejected": -1.7900261878967285, "loss": 1.0163, "nll_loss": 0.9494756460189819, "rewards/accuracies": 0.875, "rewards/chosen": -0.07980602234601974, "rewards/margins": 0.09919659793376923, "rewards/rejected": -0.17900261282920837, "step": 2632 }, { "epoch": 1.6062223577855725, "grad_norm": 1.8056106567382812, "learning_rate": 5.449601959583588e-06, "log_odds_chosen": 0.389695942401886, "log_odds_ratio": -0.7780151963233948, "logits/chosen": -0.852490246295929, "logits/rejected": -0.7278626561164856, "logps/chosen": -1.0469523668289185, "logps/rejected": -1.3026419878005981, "loss": 1.1505, "nll_loss": 1.117430567741394, "rewards/accuracies": 0.5, "rewards/chosen": -0.10469523072242737, "rewards/margins": 0.025568963959813118, "rewards/rejected": -0.13026419281959534, "step": 2633 }, { "epoch": 1.6068323928625896, "grad_norm": 10.746918678283691, "learning_rate": 5.448622167789344e-06, "log_odds_chosen": 1.078220248222351, "log_odds_ratio": -0.5191407203674316, "logits/chosen": -0.875114917755127, "logits/rejected": -0.9645814895629883, "logps/chosen": -0.7141215801239014, "logps/rejected": -1.454228401184082, "loss": 1.001, "nll_loss": 0.7915502786636353, "rewards/accuracies": 0.75, "rewards/chosen": -0.07141216099262238, "rewards/margins": 0.0740106850862503, "rewards/rejected": -0.14542284607887268, "step": 2634 }, { "epoch": 1.6074424279396067, "grad_norm": 1.8759804964065552, "learning_rate": 5.4476423759951005e-06, "log_odds_chosen": 2.274355888366699, "log_odds_ratio": -0.36428385972976685, "logits/chosen": -0.6715002059936523, "logits/rejected": -0.7366835474967957, "logps/chosen": -0.8021427392959595, "logps/rejected": -2.4714882373809814, "loss": 1.0863, "nll_loss": 0.9860646724700928, "rewards/accuracies": 0.75, "rewards/chosen": -0.08021427690982819, "rewards/margins": 0.16693457961082458, "rewards/rejected": -0.24714884161949158, "step": 2635 }, { "epoch": 1.6080524630166235, "grad_norm": 1.6447696685791016, "learning_rate": 5.4466625842008574e-06, "log_odds_chosen": 1.3484619855880737, "log_odds_ratio": -0.44796866178512573, "logits/chosen": -0.67594313621521, "logits/rejected": -0.7245215177536011, "logps/chosen": -0.698398232460022, "logps/rejected": -1.7374900579452515, "loss": 1.0307, "nll_loss": 0.9682902693748474, "rewards/accuracies": 0.625, "rewards/chosen": -0.06983982026576996, "rewards/margins": 0.10390918701887131, "rewards/rejected": -0.17374901473522186, "step": 2636 }, { "epoch": 1.6086624980936404, "grad_norm": 1.544813871383667, "learning_rate": 5.4456827924066136e-06, "log_odds_chosen": 1.2731232643127441, "log_odds_ratio": -0.5531101822853088, "logits/chosen": -0.7026165127754211, "logits/rejected": -0.6311897039413452, "logps/chosen": -0.7258175611495972, "logps/rejected": -1.5852216482162476, "loss": 1.0884, "nll_loss": 1.0111358165740967, "rewards/accuracies": 0.625, "rewards/chosen": -0.07258175313472748, "rewards/margins": 0.0859404057264328, "rewards/rejected": -0.15852215886116028, "step": 2637 }, { "epoch": 1.6092725331706572, "grad_norm": 1.293685793876648, "learning_rate": 5.44470300061237e-06, "log_odds_chosen": 0.45678532123565674, "log_odds_ratio": -0.6097384691238403, "logits/chosen": -0.7488848567008972, "logits/rejected": -0.787596583366394, "logps/chosen": -0.899842381477356, "logps/rejected": -1.237017273902893, "loss": 1.1428, "nll_loss": 1.1113691329956055, "rewards/accuracies": 0.375, "rewards/chosen": -0.0899842381477356, "rewards/margins": 0.03371749818325043, "rewards/rejected": -0.12370173633098602, "step": 2638 }, { "epoch": 1.6098825682476743, "grad_norm": 5.061835765838623, "learning_rate": 5.443723208818126e-06, "log_odds_chosen": 0.4439396262168884, "log_odds_ratio": -0.7850078344345093, "logits/chosen": -0.6828522682189941, "logits/rejected": -0.7361525893211365, "logps/chosen": -1.1409435272216797, "logps/rejected": -1.39688241481781, "loss": 0.9688, "nll_loss": 0.9188871383666992, "rewards/accuracies": 0.625, "rewards/chosen": -0.11409434676170349, "rewards/margins": 0.025593888014554977, "rewards/rejected": -0.13968823850154877, "step": 2639 }, { "epoch": 1.6104926033246911, "grad_norm": 1.5294852256774902, "learning_rate": 5.442743417023882e-06, "log_odds_chosen": 0.6271345615386963, "log_odds_ratio": -0.5194215774536133, "logits/chosen": -0.8450523018836975, "logits/rejected": -0.7948639988899231, "logps/chosen": -0.9432064294815063, "logps/rejected": -1.3733762502670288, "loss": 1.1859, "nll_loss": 1.0333995819091797, "rewards/accuracies": 0.75, "rewards/chosen": -0.0943206399679184, "rewards/margins": 0.043016981333494186, "rewards/rejected": -0.13733762502670288, "step": 2640 }, { "epoch": 1.6111026384017082, "grad_norm": 2.1431689262390137, "learning_rate": 5.441763625229638e-06, "log_odds_chosen": 3.4024477005004883, "log_odds_ratio": -0.1524641513824463, "logits/chosen": -0.7634443044662476, "logits/rejected": -0.8768596053123474, "logps/chosen": -0.7056323289871216, "logps/rejected": -3.3852734565734863, "loss": 0.935, "nll_loss": 0.8665724396705627, "rewards/accuracies": 1.0, "rewards/chosen": -0.07056322693824768, "rewards/margins": 0.26796412467956543, "rewards/rejected": -0.3385273814201355, "step": 2641 }, { "epoch": 1.611712673478725, "grad_norm": 2.18375563621521, "learning_rate": 5.440783833435395e-06, "log_odds_chosen": 0.49934083223342896, "log_odds_ratio": -0.6681643724441528, "logits/chosen": -0.7986195087432861, "logits/rejected": -0.8547290563583374, "logps/chosen": -0.9113449454307556, "logps/rejected": -1.250035285949707, "loss": 1.1959, "nll_loss": 1.0938812494277954, "rewards/accuracies": 0.625, "rewards/chosen": -0.09113449603319168, "rewards/margins": 0.03386903554201126, "rewards/rejected": -0.12500354647636414, "step": 2642 }, { "epoch": 1.612322708555742, "grad_norm": 10.925097465515137, "learning_rate": 5.439804041641151e-06, "log_odds_chosen": 3.1869056224823, "log_odds_ratio": -0.1179371029138565, "logits/chosen": -0.8119745850563049, "logits/rejected": -0.7719530463218689, "logps/chosen": -0.7775290608406067, "logps/rejected": -3.313075065612793, "loss": 1.0913, "nll_loss": 0.9921322464942932, "rewards/accuracies": 1.0, "rewards/chosen": -0.07775291055440903, "rewards/margins": 0.2535546123981476, "rewards/rejected": -0.3313075006008148, "step": 2643 }, { "epoch": 1.6129327436327587, "grad_norm": 1.6614857912063599, "learning_rate": 5.438824249846907e-06, "log_odds_chosen": 1.87815260887146, "log_odds_ratio": -0.4089028239250183, "logits/chosen": -0.6580256223678589, "logits/rejected": -0.7293339967727661, "logps/chosen": -0.6281273365020752, "logps/rejected": -2.1559953689575195, "loss": 1.1567, "nll_loss": 0.9676709175109863, "rewards/accuracies": 0.875, "rewards/chosen": -0.06281273066997528, "rewards/margins": 0.15278679132461548, "rewards/rejected": -0.21559953689575195, "step": 2644 }, { "epoch": 1.6135427787097758, "grad_norm": 1.1941829919815063, "learning_rate": 5.437844458052663e-06, "log_odds_chosen": 1.9432024955749512, "log_odds_ratio": -0.40395039319992065, "logits/chosen": -0.7767165899276733, "logits/rejected": -0.9349473714828491, "logps/chosen": -0.7393653392791748, "logps/rejected": -2.26100492477417, "loss": 1.0758, "nll_loss": 0.9459221363067627, "rewards/accuracies": 0.875, "rewards/chosen": -0.07393653690814972, "rewards/margins": 0.15216395258903503, "rewards/rejected": -0.22610050439834595, "step": 2645 }, { "epoch": 1.6141528137867929, "grad_norm": 1.399639368057251, "learning_rate": 5.4368646662584195e-06, "log_odds_chosen": 2.2982800006866455, "log_odds_ratio": -0.3461473286151886, "logits/chosen": -0.8436600565910339, "logits/rejected": -0.8876062631607056, "logps/chosen": -0.8970966339111328, "logps/rejected": -2.895616292953491, "loss": 1.1401, "nll_loss": 1.1518895626068115, "rewards/accuracies": 0.75, "rewards/chosen": -0.08970966190099716, "rewards/margins": 0.19985198974609375, "rewards/rejected": -0.2895616590976715, "step": 2646 }, { "epoch": 1.6147628488638097, "grad_norm": 2.1573402881622314, "learning_rate": 5.4358848744641765e-06, "log_odds_chosen": 2.1456758975982666, "log_odds_ratio": -0.3790813684463501, "logits/chosen": -0.7101891040802002, "logits/rejected": -0.8471784591674805, "logps/chosen": -0.8164104223251343, "logps/rejected": -2.621821403503418, "loss": 1.0326, "nll_loss": 0.9746493697166443, "rewards/accuracies": 0.875, "rewards/chosen": -0.08164104074239731, "rewards/margins": 0.18054109811782837, "rewards/rejected": -0.2621821463108063, "step": 2647 }, { "epoch": 1.6153728839408266, "grad_norm": 1.702208161354065, "learning_rate": 5.434905082669933e-06, "log_odds_chosen": 1.3775256872177124, "log_odds_ratio": -0.416368305683136, "logits/chosen": -0.8681955337524414, "logits/rejected": -0.9475646615028381, "logps/chosen": -0.9190174341201782, "logps/rejected": -2.0190067291259766, "loss": 1.0775, "nll_loss": 1.329732894897461, "rewards/accuracies": 0.875, "rewards/chosen": -0.0919017493724823, "rewards/margins": 0.10999894142150879, "rewards/rejected": -0.2019006907939911, "step": 2648 }, { "epoch": 1.6159829190178434, "grad_norm": 1.2893993854522705, "learning_rate": 5.433925290875689e-06, "log_odds_chosen": 1.4156105518341064, "log_odds_ratio": -0.3970262408256531, "logits/chosen": -0.6615968942642212, "logits/rejected": -0.7265824675559998, "logps/chosen": -0.6100510954856873, "logps/rejected": -1.6689209938049316, "loss": 0.9934, "nll_loss": 1.0060633420944214, "rewards/accuracies": 0.625, "rewards/chosen": -0.061005111783742905, "rewards/margins": 0.10588700324296951, "rewards/rejected": -0.16689211130142212, "step": 2649 }, { "epoch": 1.6165929540948605, "grad_norm": 1.4932652711868286, "learning_rate": 5.432945499081445e-06, "log_odds_chosen": 0.8360008001327515, "log_odds_ratio": -0.45249930024147034, "logits/chosen": -0.8909012675285339, "logits/rejected": -0.7814252376556396, "logps/chosen": -0.7930747270584106, "logps/rejected": -1.3384191989898682, "loss": 1.065, "nll_loss": 0.9835062026977539, "rewards/accuracies": 0.875, "rewards/chosen": -0.07930748164653778, "rewards/margins": 0.054534442722797394, "rewards/rejected": -0.13384191691875458, "step": 2650 }, { "epoch": 1.6172029891718775, "grad_norm": 2.9869720935821533, "learning_rate": 5.431965707287201e-06, "log_odds_chosen": 1.459904432296753, "log_odds_ratio": -0.40757569670677185, "logits/chosen": -0.531848132610321, "logits/rejected": -0.41637498140335083, "logps/chosen": -0.5753880143165588, "logps/rejected": -1.4808504581451416, "loss": 1.0862, "nll_loss": 0.7839810848236084, "rewards/accuracies": 0.625, "rewards/chosen": -0.057538799941539764, "rewards/margins": 0.09054624289274216, "rewards/rejected": -0.14808505773544312, "step": 2651 }, { "epoch": 1.6178130242488944, "grad_norm": 2.515695333480835, "learning_rate": 5.430985915492957e-06, "log_odds_chosen": 1.4626717567443848, "log_odds_ratio": -0.4137900769710541, "logits/chosen": -0.699590802192688, "logits/rejected": -0.8589851260185242, "logps/chosen": -0.6890032887458801, "logps/rejected": -1.6472067832946777, "loss": 0.9905, "nll_loss": 0.9031238555908203, "rewards/accuracies": 0.625, "rewards/chosen": -0.06890033185482025, "rewards/margins": 0.0958203449845314, "rewards/rejected": -0.16472068428993225, "step": 2652 }, { "epoch": 1.6184230593259112, "grad_norm": 2.132242441177368, "learning_rate": 5.430006123698714e-06, "log_odds_chosen": 3.24906849861145, "log_odds_ratio": -0.44554805755615234, "logits/chosen": -0.8492903113365173, "logits/rejected": -0.9909859895706177, "logps/chosen": -0.6816325783729553, "logps/rejected": -3.168546199798584, "loss": 1.1659, "nll_loss": 1.1877667903900146, "rewards/accuracies": 0.625, "rewards/chosen": -0.06816326081752777, "rewards/margins": 0.2486913502216339, "rewards/rejected": -0.3168545961380005, "step": 2653 }, { "epoch": 1.619033094402928, "grad_norm": 3.775775671005249, "learning_rate": 5.42902633190447e-06, "log_odds_chosen": 2.4142487049102783, "log_odds_ratio": -0.2693481147289276, "logits/chosen": -0.8424720764160156, "logits/rejected": -0.921898603439331, "logps/chosen": -0.6189187169075012, "logps/rejected": -2.4110162258148193, "loss": 1.0022, "nll_loss": 0.9860739707946777, "rewards/accuracies": 0.875, "rewards/chosen": -0.06189187243580818, "rewards/margins": 0.17920975387096405, "rewards/rejected": -0.24110162258148193, "step": 2654 }, { "epoch": 1.619643129479945, "grad_norm": 1.2403453588485718, "learning_rate": 5.428046540110226e-06, "log_odds_chosen": 3.0476152896881104, "log_odds_ratio": -0.16445893049240112, "logits/chosen": -0.8648727536201477, "logits/rejected": -0.9344479441642761, "logps/chosen": -0.7868419885635376, "logps/rejected": -3.2700998783111572, "loss": 1.2414, "nll_loss": 0.9891548156738281, "rewards/accuracies": 1.0, "rewards/chosen": -0.07868419587612152, "rewards/margins": 0.24832579493522644, "rewards/rejected": -0.32701000571250916, "step": 2655 }, { "epoch": 1.620253164556962, "grad_norm": 1.5927519798278809, "learning_rate": 5.4270667483159824e-06, "log_odds_chosen": 1.026388168334961, "log_odds_ratio": -0.5922088623046875, "logits/chosen": -0.8153365850448608, "logits/rejected": -0.8298490047454834, "logps/chosen": -0.8259839415550232, "logps/rejected": -1.4457542896270752, "loss": 1.0631, "nll_loss": 1.0341923236846924, "rewards/accuracies": 0.375, "rewards/chosen": -0.08259838819503784, "rewards/margins": 0.061977021396160126, "rewards/rejected": -0.14457541704177856, "step": 2656 }, { "epoch": 1.620863199633979, "grad_norm": 2.1531243324279785, "learning_rate": 5.4260869565217386e-06, "log_odds_chosen": 2.0942957401275635, "log_odds_ratio": -0.3465011715888977, "logits/chosen": -0.9829750061035156, "logits/rejected": -1.0023458003997803, "logps/chosen": -0.7167975902557373, "logps/rejected": -2.340703010559082, "loss": 1.0502, "nll_loss": 1.0592141151428223, "rewards/accuracies": 0.875, "rewards/chosen": -0.07167975604534149, "rewards/margins": 0.16239053010940552, "rewards/rejected": -0.234070286154747, "step": 2657 }, { "epoch": 1.621473234710996, "grad_norm": 2.745842933654785, "learning_rate": 5.425107164727495e-06, "log_odds_chosen": 0.9932626485824585, "log_odds_ratio": -0.4450188875198364, "logits/chosen": -0.7876451015472412, "logits/rejected": -0.8739555478096008, "logps/chosen": -0.8656588792800903, "logps/rejected": -1.5170594453811646, "loss": 1.1094, "nll_loss": 1.2544306516647339, "rewards/accuracies": 0.875, "rewards/chosen": -0.08656589686870575, "rewards/margins": 0.06514005362987518, "rewards/rejected": -0.15170595049858093, "step": 2658 }, { "epoch": 1.6220832697880128, "grad_norm": 1.6937053203582764, "learning_rate": 5.424127372933252e-06, "log_odds_chosen": 1.2600539922714233, "log_odds_ratio": -0.47684288024902344, "logits/chosen": -0.7661404013633728, "logits/rejected": -0.7803698778152466, "logps/chosen": -0.7859441041946411, "logps/rejected": -1.6923487186431885, "loss": 1.0941, "nll_loss": 1.2264149188995361, "rewards/accuracies": 0.625, "rewards/chosen": -0.07859441637992859, "rewards/margins": 0.09064046293497086, "rewards/rejected": -0.16923488676548004, "step": 2659 }, { "epoch": 1.6226933048650296, "grad_norm": 2.6507372856140137, "learning_rate": 5.423147581139008e-06, "log_odds_chosen": 0.8690325617790222, "log_odds_ratio": -0.5308202505111694, "logits/chosen": -0.6819126605987549, "logits/rejected": -0.7065168619155884, "logps/chosen": -0.7216345071792603, "logps/rejected": -1.385457992553711, "loss": 1.073, "nll_loss": 0.8589818477630615, "rewards/accuracies": 0.5, "rewards/chosen": -0.07216345518827438, "rewards/margins": 0.06638234108686447, "rewards/rejected": -0.13854579627513885, "step": 2660 }, { "epoch": 1.6233033399420467, "grad_norm": 2.023843765258789, "learning_rate": 5.422167789344765e-06, "log_odds_chosen": 0.2561919391155243, "log_odds_ratio": -0.5819820761680603, "logits/chosen": -0.833949089050293, "logits/rejected": -0.7877292037010193, "logps/chosen": -0.7033746242523193, "logps/rejected": -0.8620610237121582, "loss": 1.0196, "nll_loss": 0.9653475284576416, "rewards/accuracies": 0.875, "rewards/chosen": -0.0703374594449997, "rewards/margins": 0.015868639573454857, "rewards/rejected": -0.0862061083316803, "step": 2661 }, { "epoch": 1.6239133750190637, "grad_norm": 1.5096745491027832, "learning_rate": 5.42118799755052e-06, "log_odds_chosen": 0.4015875458717346, "log_odds_ratio": -0.583277702331543, "logits/chosen": -1.0189039707183838, "logits/rejected": -0.8118681311607361, "logps/chosen": -0.7510931491851807, "logps/rejected": -0.9568696022033691, "loss": 1.0555, "nll_loss": 1.0074448585510254, "rewards/accuracies": 0.625, "rewards/chosen": -0.0751093178987503, "rewards/margins": 0.020577644929289818, "rewards/rejected": -0.09568696469068527, "step": 2662 }, { "epoch": 1.6245234100960806, "grad_norm": 1.6216745376586914, "learning_rate": 5.420208205756276e-06, "log_odds_chosen": 1.8789761066436768, "log_odds_ratio": -0.3611924946308136, "logits/chosen": -0.7672128677368164, "logits/rejected": -0.6674597859382629, "logps/chosen": -0.7682692408561707, "logps/rejected": -2.2796738147735596, "loss": 1.0494, "nll_loss": 0.92249596118927, "rewards/accuracies": 0.625, "rewards/chosen": -0.07682693004608154, "rewards/margins": 0.15114045143127441, "rewards/rejected": -0.22796738147735596, "step": 2663 }, { "epoch": 1.6251334451730974, "grad_norm": 1.5195307731628418, "learning_rate": 5.419228413962033e-06, "log_odds_chosen": 1.716618537902832, "log_odds_ratio": -0.4663499593734741, "logits/chosen": -0.7574124336242676, "logits/rejected": -0.7484862804412842, "logps/chosen": -0.8348486423492432, "logps/rejected": -2.3082027435302734, "loss": 1.2678, "nll_loss": 1.137464165687561, "rewards/accuracies": 0.75, "rewards/chosen": -0.08348485827445984, "rewards/margins": 0.14733541011810303, "rewards/rejected": -0.23082026839256287, "step": 2664 }, { "epoch": 1.6257434802501143, "grad_norm": 1.1240544319152832, "learning_rate": 5.418248622167789e-06, "log_odds_chosen": 1.4496008157730103, "log_odds_ratio": -0.44553327560424805, "logits/chosen": -0.9553337097167969, "logits/rejected": -1.0240402221679688, "logps/chosen": -0.7917934656143188, "logps/rejected": -1.8711329698562622, "loss": 1.1517, "nll_loss": 1.1731623411178589, "rewards/accuracies": 0.75, "rewards/chosen": -0.07917934656143188, "rewards/margins": 0.10793394595384598, "rewards/rejected": -0.18711329996585846, "step": 2665 }, { "epoch": 1.6263535153271313, "grad_norm": 2.138732671737671, "learning_rate": 5.417268830373545e-06, "log_odds_chosen": 2.4821038246154785, "log_odds_ratio": -0.31421560049057007, "logits/chosen": -0.6442356109619141, "logits/rejected": -0.8697600364685059, "logps/chosen": -0.6869537830352783, "logps/rejected": -2.539163589477539, "loss": 1.2968, "nll_loss": 1.1395204067230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.06869538128376007, "rewards/margins": 0.18522101640701294, "rewards/rejected": -0.2539163827896118, "step": 2666 }, { "epoch": 1.6269635504041482, "grad_norm": 1.1676759719848633, "learning_rate": 5.416289038579302e-06, "log_odds_chosen": 1.2102926969528198, "log_odds_ratio": -0.48658204078674316, "logits/chosen": -0.889306902885437, "logits/rejected": -0.7622441053390503, "logps/chosen": -1.1276123523712158, "logps/rejected": -2.1282801628112793, "loss": 1.0312, "nll_loss": 1.1203203201293945, "rewards/accuracies": 0.75, "rewards/chosen": -0.1127612367272377, "rewards/margins": 0.10006680339574814, "rewards/rejected": -0.21282804012298584, "step": 2667 }, { "epoch": 1.6275735854811653, "grad_norm": 2.528953790664673, "learning_rate": 5.415309246785058e-06, "log_odds_chosen": 1.5685508251190186, "log_odds_ratio": -0.36940860748291016, "logits/chosen": -0.7851236462593079, "logits/rejected": -0.8478277921676636, "logps/chosen": -0.8559529781341553, "logps/rejected": -1.9021669626235962, "loss": 1.0245, "nll_loss": 1.0050162076950073, "rewards/accuracies": 0.875, "rewards/chosen": -0.08559529483318329, "rewards/margins": 0.10462139546871185, "rewards/rejected": -0.19021670520305634, "step": 2668 }, { "epoch": 1.628183620558182, "grad_norm": 1.9138705730438232, "learning_rate": 5.414329454990814e-06, "log_odds_chosen": 0.8393606543540955, "log_odds_ratio": -0.523500919342041, "logits/chosen": -0.7709652185440063, "logits/rejected": -0.6836596727371216, "logps/chosen": -0.8748063445091248, "logps/rejected": -1.5158720016479492, "loss": 1.144, "nll_loss": 0.9663925766944885, "rewards/accuracies": 0.75, "rewards/chosen": -0.08748063445091248, "rewards/margins": 0.06410656124353409, "rewards/rejected": -0.15158718824386597, "step": 2669 }, { "epoch": 1.628793655635199, "grad_norm": 3.1080071926116943, "learning_rate": 5.413349663196571e-06, "log_odds_chosen": 0.5877658128738403, "log_odds_ratio": -0.6507399678230286, "logits/chosen": -0.688133180141449, "logits/rejected": -0.8012738227844238, "logps/chosen": -0.97492516040802, "logps/rejected": -1.367433786392212, "loss": 0.9528, "nll_loss": 1.0051839351654053, "rewards/accuracies": 0.75, "rewards/chosen": -0.0974925234913826, "rewards/margins": 0.039250850677490234, "rewards/rejected": -0.13674336671829224, "step": 2670 }, { "epoch": 1.6294036907122158, "grad_norm": 1.2739418745040894, "learning_rate": 5.412369871402327e-06, "log_odds_chosen": 1.9371623992919922, "log_odds_ratio": -0.3317854404449463, "logits/chosen": -0.6574761867523193, "logits/rejected": -0.7741778492927551, "logps/chosen": -0.7790768146514893, "logps/rejected": -2.1838765144348145, "loss": 1.0185, "nll_loss": 0.9902330636978149, "rewards/accuracies": 0.75, "rewards/chosen": -0.07790768146514893, "rewards/margins": 0.14047998189926147, "rewards/rejected": -0.2183876633644104, "step": 2671 }, { "epoch": 1.6300137257892329, "grad_norm": 1.5394138097763062, "learning_rate": 5.411390079608084e-06, "log_odds_chosen": 1.4767334461212158, "log_odds_ratio": -0.23187656700611115, "logits/chosen": -1.0817840099334717, "logits/rejected": -1.1350007057189941, "logps/chosen": -1.0899077653884888, "logps/rejected": -2.1934847831726074, "loss": 1.2621, "nll_loss": 1.577765703201294, "rewards/accuracies": 1.0, "rewards/chosen": -0.10899077355861664, "rewards/margins": 0.11035769432783127, "rewards/rejected": -0.2193484604358673, "step": 2672 }, { "epoch": 1.63062376086625, "grad_norm": 0.962482213973999, "learning_rate": 5.410410287813839e-06, "log_odds_chosen": 1.0598266124725342, "log_odds_ratio": -0.5228284597396851, "logits/chosen": -0.6938991546630859, "logits/rejected": -0.6574380397796631, "logps/chosen": -0.7515935897827148, "logps/rejected": -1.6321794986724854, "loss": 1.0502, "nll_loss": 0.9391773343086243, "rewards/accuracies": 0.625, "rewards/chosen": -0.07515935599803925, "rewards/margins": 0.08805860579013824, "rewards/rejected": -0.1632179617881775, "step": 2673 }, { "epoch": 1.6312337959432668, "grad_norm": 0.9645936489105225, "learning_rate": 5.409430496019595e-06, "log_odds_chosen": 0.07692163437604904, "log_odds_ratio": -0.7993725538253784, "logits/chosen": -0.833658754825592, "logits/rejected": -0.8227490782737732, "logps/chosen": -1.1226080656051636, "logps/rejected": -1.1236095428466797, "loss": 1.2087, "nll_loss": 1.156769037246704, "rewards/accuracies": 0.375, "rewards/chosen": -0.11226081103086472, "rewards/margins": 0.0001001385971903801, "rewards/rejected": -0.11236095428466797, "step": 2674 }, { "epoch": 1.6318438310202836, "grad_norm": 2.0884225368499756, "learning_rate": 5.408450704225352e-06, "log_odds_chosen": 0.8600356578826904, "log_odds_ratio": -0.5287560224533081, "logits/chosen": -0.693288266658783, "logits/rejected": -0.8856412768363953, "logps/chosen": -0.8785294890403748, "logps/rejected": -1.5695054531097412, "loss": 1.0812, "nll_loss": 1.1414881944656372, "rewards/accuracies": 0.625, "rewards/chosen": -0.08785295486450195, "rewards/margins": 0.0690976083278656, "rewards/rejected": -0.15695054829120636, "step": 2675 }, { "epoch": 1.6324538660973005, "grad_norm": 1.8593547344207764, "learning_rate": 5.407470912431108e-06, "log_odds_chosen": 1.3025208711624146, "log_odds_ratio": -0.3561179041862488, "logits/chosen": -0.6160016059875488, "logits/rejected": -0.6803116798400879, "logps/chosen": -0.6997448801994324, "logps/rejected": -1.575197696685791, "loss": 0.9642, "nll_loss": 0.9740628004074097, "rewards/accuracies": 0.875, "rewards/chosen": -0.06997448951005936, "rewards/margins": 0.08754529058933258, "rewards/rejected": -0.15751977264881134, "step": 2676 }, { "epoch": 1.6330639011743175, "grad_norm": 5.022462368011475, "learning_rate": 5.406491120636864e-06, "log_odds_chosen": 0.7348878383636475, "log_odds_ratio": -0.5608474016189575, "logits/chosen": -0.9350990653038025, "logits/rejected": -0.7471616864204407, "logps/chosen": -0.7441871762275696, "logps/rejected": -1.2094329595565796, "loss": 1.0218, "nll_loss": 0.8327001333236694, "rewards/accuracies": 0.75, "rewards/chosen": -0.07441872358322144, "rewards/margins": 0.04652457311749458, "rewards/rejected": -0.12094329297542572, "step": 2677 }, { "epoch": 1.6336739362513346, "grad_norm": 1.9248783588409424, "learning_rate": 5.405511328842621e-06, "log_odds_chosen": 0.4713301956653595, "log_odds_ratio": -0.5733759999275208, "logits/chosen": -0.7311676740646362, "logits/rejected": -0.7179108262062073, "logps/chosen": -0.8600947856903076, "logps/rejected": -1.153578758239746, "loss": 0.9812, "nll_loss": 1.0697940587997437, "rewards/accuracies": 0.625, "rewards/chosen": -0.08600948005914688, "rewards/margins": 0.02934839576482773, "rewards/rejected": -0.11535787582397461, "step": 2678 }, { "epoch": 1.6342839713283515, "grad_norm": 1.5933756828308105, "learning_rate": 5.404531537048377e-06, "log_odds_chosen": 2.3022100925445557, "log_odds_ratio": -0.41253331303596497, "logits/chosen": -0.6115303039550781, "logits/rejected": -0.7652267813682556, "logps/chosen": -0.657856822013855, "logps/rejected": -2.507568359375, "loss": 1.0815, "nll_loss": 0.8827272653579712, "rewards/accuracies": 0.75, "rewards/chosen": -0.06578568369150162, "rewards/margins": 0.1849711537361145, "rewards/rejected": -0.2507568597793579, "step": 2679 }, { "epoch": 1.6348940064053683, "grad_norm": 2.5079362392425537, "learning_rate": 5.403551745254133e-06, "log_odds_chosen": 0.23349803686141968, "log_odds_ratio": -0.6566906571388245, "logits/chosen": -1.1037721633911133, "logits/rejected": -0.9550437331199646, "logps/chosen": -1.0102860927581787, "logps/rejected": -1.2030205726623535, "loss": 1.1358, "nll_loss": 1.1787452697753906, "rewards/accuracies": 0.5, "rewards/chosen": -0.10102860629558563, "rewards/margins": 0.01927345059812069, "rewards/rejected": -0.12030205875635147, "step": 2680 }, { "epoch": 1.6355040414823852, "grad_norm": 1.5851420164108276, "learning_rate": 5.40257195345989e-06, "log_odds_chosen": 1.0139713287353516, "log_odds_ratio": -0.541073739528656, "logits/chosen": -0.7282562851905823, "logits/rejected": -0.7479408979415894, "logps/chosen": -0.6476602554321289, "logps/rejected": -1.314069151878357, "loss": 0.9041, "nll_loss": 0.7961655855178833, "rewards/accuracies": 0.625, "rewards/chosen": -0.06476602703332901, "rewards/margins": 0.06664088368415833, "rewards/rejected": -0.13140691816806793, "step": 2681 }, { "epoch": 1.6361140765594022, "grad_norm": 1.1959797143936157, "learning_rate": 5.401592161665646e-06, "log_odds_chosen": 1.2548329830169678, "log_odds_ratio": -0.4828207492828369, "logits/chosen": -0.9091358780860901, "logits/rejected": -0.9143281579017639, "logps/chosen": -0.7592060565948486, "logps/rejected": -1.6819117069244385, "loss": 1.0887, "nll_loss": 0.8941130638122559, "rewards/accuracies": 0.75, "rewards/chosen": -0.07592060416936874, "rewards/margins": 0.09227055311203003, "rewards/rejected": -0.16819116473197937, "step": 2682 }, { "epoch": 1.636724111636419, "grad_norm": 1.7458113431930542, "learning_rate": 5.400612369871402e-06, "log_odds_chosen": 1.563598871231079, "log_odds_ratio": -0.5458674430847168, "logits/chosen": -0.8910726308822632, "logits/rejected": -0.9725301265716553, "logps/chosen": -0.7919062376022339, "logps/rejected": -2.062270402908325, "loss": 1.1437, "nll_loss": 1.041873812675476, "rewards/accuracies": 0.625, "rewards/chosen": -0.07919062674045563, "rewards/margins": 0.1270364224910736, "rewards/rejected": -0.20622704923152924, "step": 2683 }, { "epoch": 1.6373341467134361, "grad_norm": 2.16595458984375, "learning_rate": 5.399632578077159e-06, "log_odds_chosen": 2.250826120376587, "log_odds_ratio": -0.27210649847984314, "logits/chosen": -0.6101945638656616, "logits/rejected": -0.8333432078361511, "logps/chosen": -0.5993210077285767, "logps/rejected": -2.413494110107422, "loss": 1.1201, "nll_loss": 0.9504544734954834, "rewards/accuracies": 1.0, "rewards/chosen": -0.05993209779262543, "rewards/margins": 0.1814173012971878, "rewards/rejected": -0.24134939908981323, "step": 2684 }, { "epoch": 1.637944181790453, "grad_norm": 1.4679423570632935, "learning_rate": 5.398652786282914e-06, "log_odds_chosen": 1.0679042339324951, "log_odds_ratio": -0.3966764807701111, "logits/chosen": -0.7899295687675476, "logits/rejected": -0.8589627742767334, "logps/chosen": -0.9174652099609375, "logps/rejected": -1.6784284114837646, "loss": 1.0558, "nll_loss": 0.9738070964813232, "rewards/accuracies": 0.75, "rewards/chosen": -0.09174652397632599, "rewards/margins": 0.07609633356332779, "rewards/rejected": -0.16784285008907318, "step": 2685 }, { "epoch": 1.6385542168674698, "grad_norm": 1.1051398515701294, "learning_rate": 5.39767299448867e-06, "log_odds_chosen": 2.0228681564331055, "log_odds_ratio": -0.4691816568374634, "logits/chosen": -0.7110952138900757, "logits/rejected": -0.724595844745636, "logps/chosen": -0.4894842505455017, "logps/rejected": -2.1851985454559326, "loss": 0.9861, "nll_loss": 0.6818794012069702, "rewards/accuracies": 0.625, "rewards/chosen": -0.04894842579960823, "rewards/margins": 0.16957144439220428, "rewards/rejected": -0.21851986646652222, "step": 2686 }, { "epoch": 1.6391642519444867, "grad_norm": 3.4403131008148193, "learning_rate": 5.396693202694427e-06, "log_odds_chosen": 0.8353013396263123, "log_odds_ratio": -0.6185566186904907, "logits/chosen": -0.7650121450424194, "logits/rejected": -0.8118118047714233, "logps/chosen": -0.8052759170532227, "logps/rejected": -1.4932535886764526, "loss": 1.0918, "nll_loss": 0.9381892681121826, "rewards/accuracies": 0.5, "rewards/chosen": -0.08052758872509003, "rewards/margins": 0.0687977597117424, "rewards/rejected": -0.14932537078857422, "step": 2687 }, { "epoch": 1.6397742870215037, "grad_norm": 1.8349751234054565, "learning_rate": 5.3957134109001834e-06, "log_odds_chosen": 2.2155215740203857, "log_odds_ratio": -0.4183523654937744, "logits/chosen": -0.8707659244537354, "logits/rejected": -1.0129085779190063, "logps/chosen": -0.8673060536384583, "logps/rejected": -2.6923530101776123, "loss": 1.2225, "nll_loss": 1.0577961206436157, "rewards/accuracies": 0.75, "rewards/chosen": -0.08673061430454254, "rewards/margins": 0.18250468373298645, "rewards/rejected": -0.2692352831363678, "step": 2688 }, { "epoch": 1.6403843220985208, "grad_norm": 1.7147796154022217, "learning_rate": 5.39473361910594e-06, "log_odds_chosen": 0.6802948117256165, "log_odds_ratio": -0.6168246865272522, "logits/chosen": -0.8220602869987488, "logits/rejected": -0.8431388735771179, "logps/chosen": -0.7782182693481445, "logps/rejected": -1.2349220514297485, "loss": 1.0522, "nll_loss": 0.846868634223938, "rewards/accuracies": 0.375, "rewards/chosen": -0.07782183587551117, "rewards/margins": 0.04567037522792816, "rewards/rejected": -0.12349221110343933, "step": 2689 }, { "epoch": 1.6409943571755377, "grad_norm": 2.008164167404175, "learning_rate": 5.393753827311696e-06, "log_odds_chosen": 1.128441572189331, "log_odds_ratio": -0.5047239065170288, "logits/chosen": -0.8962945938110352, "logits/rejected": -0.8960003852844238, "logps/chosen": -0.8295149207115173, "logps/rejected": -1.6829662322998047, "loss": 1.2084, "nll_loss": 0.9499651193618774, "rewards/accuracies": 0.75, "rewards/chosen": -0.08295149356126785, "rewards/margins": 0.08534513413906097, "rewards/rejected": -0.16829662024974823, "step": 2690 }, { "epoch": 1.6416043922525545, "grad_norm": 2.1087567806243896, "learning_rate": 5.392774035517452e-06, "log_odds_chosen": 1.8067498207092285, "log_odds_ratio": -0.5575054883956909, "logits/chosen": -0.8205009698867798, "logits/rejected": -1.024404764175415, "logps/chosen": -0.6775885820388794, "logps/rejected": -2.2580742835998535, "loss": 1.1659, "nll_loss": 1.127441167831421, "rewards/accuracies": 0.625, "rewards/chosen": -0.06775885820388794, "rewards/margins": 0.1580485701560974, "rewards/rejected": -0.22580742835998535, "step": 2691 }, { "epoch": 1.6422144273295713, "grad_norm": 2.1949424743652344, "learning_rate": 5.391794243723209e-06, "log_odds_chosen": 1.3212785720825195, "log_odds_ratio": -0.6200747489929199, "logits/chosen": -0.7730022072792053, "logits/rejected": -1.0459799766540527, "logps/chosen": -0.9399882555007935, "logps/rejected": -1.9743797779083252, "loss": 1.2108, "nll_loss": 1.0233663320541382, "rewards/accuracies": 0.5, "rewards/chosen": -0.09399882704019547, "rewards/margins": 0.10343916714191437, "rewards/rejected": -0.19743800163269043, "step": 2692 }, { "epoch": 1.6428244624065884, "grad_norm": 2.424053907394409, "learning_rate": 5.390814451928965e-06, "log_odds_chosen": 0.7563986778259277, "log_odds_ratio": -0.4832369089126587, "logits/chosen": -0.7760441303253174, "logits/rejected": -0.9222630262374878, "logps/chosen": -0.7699525356292725, "logps/rejected": -1.243657112121582, "loss": 0.8902, "nll_loss": 0.9044142961502075, "rewards/accuracies": 0.75, "rewards/chosen": -0.07699525356292725, "rewards/margins": 0.04737045243382454, "rewards/rejected": -0.12436570972204208, "step": 2693 }, { "epoch": 1.6434344974836053, "grad_norm": 1.0825763940811157, "learning_rate": 5.389834660134721e-06, "log_odds_chosen": 0.5414077043533325, "log_odds_ratio": -0.6292965412139893, "logits/chosen": -1.0166501998901367, "logits/rejected": -0.961461067199707, "logps/chosen": -0.8629699945449829, "logps/rejected": -1.2606457471847534, "loss": 0.9669, "nll_loss": 0.9641406536102295, "rewards/accuracies": 0.5, "rewards/chosen": -0.08629700541496277, "rewards/margins": 0.039767589420080185, "rewards/rejected": -0.12606459856033325, "step": 2694 }, { "epoch": 1.6440445325606223, "grad_norm": 1.7193331718444824, "learning_rate": 5.388854868340478e-06, "log_odds_chosen": 1.2102303504943848, "log_odds_ratio": -0.4406295120716095, "logits/chosen": -0.6461570858955383, "logits/rejected": -0.8375294208526611, "logps/chosen": -0.6263419389724731, "logps/rejected": -1.4729022979736328, "loss": 1.0312, "nll_loss": 0.8134157061576843, "rewards/accuracies": 0.75, "rewards/chosen": -0.06263419985771179, "rewards/margins": 0.08465602993965149, "rewards/rejected": -0.14729022979736328, "step": 2695 }, { "epoch": 1.6446545676376392, "grad_norm": 1.1814035177230835, "learning_rate": 5.387875076546233e-06, "log_odds_chosen": 2.1094207763671875, "log_odds_ratio": -0.32692980766296387, "logits/chosen": -0.626920223236084, "logits/rejected": -0.7960072755813599, "logps/chosen": -0.6275554895401001, "logps/rejected": -2.132166624069214, "loss": 0.8389, "nll_loss": 0.8487656712532043, "rewards/accuracies": 0.875, "rewards/chosen": -0.06275554746389389, "rewards/margins": 0.1504611074924469, "rewards/rejected": -0.2132166624069214, "step": 2696 }, { "epoch": 1.645264602714656, "grad_norm": 1.9281939268112183, "learning_rate": 5.386895284751989e-06, "log_odds_chosen": 2.800154685974121, "log_odds_ratio": -0.33635714650154114, "logits/chosen": -0.6268557906150818, "logits/rejected": -0.9493340253829956, "logps/chosen": -0.5885058641433716, "logps/rejected": -2.671379327774048, "loss": 0.8096, "nll_loss": 0.7169210910797119, "rewards/accuracies": 0.75, "rewards/chosen": -0.05885059013962746, "rewards/margins": 0.2082873433828354, "rewards/rejected": -0.26713791489601135, "step": 2697 }, { "epoch": 1.6458746377916729, "grad_norm": 2.7755935192108154, "learning_rate": 5.385915492957746e-06, "log_odds_chosen": 1.297674536705017, "log_odds_ratio": -0.3595341145992279, "logits/chosen": -0.9492147564888, "logits/rejected": -0.9901728630065918, "logps/chosen": -0.9716095328330994, "logps/rejected": -1.9645026922225952, "loss": 1.0812, "nll_loss": 1.1573835611343384, "rewards/accuracies": 0.875, "rewards/chosen": -0.09716096520423889, "rewards/margins": 0.09928930550813675, "rewards/rejected": -0.19645026326179504, "step": 2698 }, { "epoch": 1.64648467286869, "grad_norm": 11.04487419128418, "learning_rate": 5.3849357011635025e-06, "log_odds_chosen": 1.6079106330871582, "log_odds_ratio": -0.482662558555603, "logits/chosen": -0.9653670787811279, "logits/rejected": -0.8768274784088135, "logps/chosen": -0.8982203006744385, "logps/rejected": -2.185915231704712, "loss": 1.0488, "nll_loss": 1.0407850742340088, "rewards/accuracies": 0.625, "rewards/chosen": -0.08982203900814056, "rewards/margins": 0.12876947224140167, "rewards/rejected": -0.21859151124954224, "step": 2699 }, { "epoch": 1.647094707945707, "grad_norm": 3.9863481521606445, "learning_rate": 5.3839559093692594e-06, "log_odds_chosen": 0.24460536241531372, "log_odds_ratio": -0.6117894649505615, "logits/chosen": -1.0322355031967163, "logits/rejected": -1.0029613971710205, "logps/chosen": -0.9429116249084473, "logps/rejected": -1.1008875370025635, "loss": 0.9952, "nll_loss": 1.096974492073059, "rewards/accuracies": 0.875, "rewards/chosen": -0.09429116547107697, "rewards/margins": 0.015797585248947144, "rewards/rejected": -0.11008875072002411, "step": 2700 }, { "epoch": 1.6477047430227239, "grad_norm": 1.2935750484466553, "learning_rate": 5.3829761175750156e-06, "log_odds_chosen": 0.914794921875, "log_odds_ratio": -0.6283128261566162, "logits/chosen": -1.0558695793151855, "logits/rejected": -0.9619863033294678, "logps/chosen": -0.9777864217758179, "logps/rejected": -1.6790015697479248, "loss": 1.1024, "nll_loss": 1.1906472444534302, "rewards/accuracies": 0.5, "rewards/chosen": -0.09777864813804626, "rewards/margins": 0.07012149691581726, "rewards/rejected": -0.16790014505386353, "step": 2701 }, { "epoch": 1.6483147780997407, "grad_norm": 2.523677110671997, "learning_rate": 5.381996325780771e-06, "log_odds_chosen": 0.7503154277801514, "log_odds_ratio": -0.5868837237358093, "logits/chosen": -0.6713022589683533, "logits/rejected": -0.6470035314559937, "logps/chosen": -1.0229012966156006, "logps/rejected": -1.574068307876587, "loss": 1.1675, "nll_loss": 1.0271997451782227, "rewards/accuracies": 0.625, "rewards/chosen": -0.10229012370109558, "rewards/margins": 0.05511670932173729, "rewards/rejected": -0.15740683674812317, "step": 2702 }, { "epoch": 1.6489248131767575, "grad_norm": 1.2609738111495972, "learning_rate": 5.381016533986528e-06, "log_odds_chosen": 1.5149623155593872, "log_odds_ratio": -0.4183095693588257, "logits/chosen": -0.7825206518173218, "logits/rejected": -0.962093710899353, "logps/chosen": -0.7347067594528198, "logps/rejected": -1.8243675231933594, "loss": 1.0174, "nll_loss": 0.8195590972900391, "rewards/accuracies": 0.75, "rewards/chosen": -0.07347067445516586, "rewards/margins": 0.10896609723567963, "rewards/rejected": -0.1824367642402649, "step": 2703 }, { "epoch": 1.6495348482537746, "grad_norm": 3.764388084411621, "learning_rate": 5.380036742192284e-06, "log_odds_chosen": 1.0665974617004395, "log_odds_ratio": -0.6978788375854492, "logits/chosen": -1.003991961479187, "logits/rejected": -1.0570317506790161, "logps/chosen": -1.064348578453064, "logps/rejected": -2.0412466526031494, "loss": 1.2031, "nll_loss": 1.2903399467468262, "rewards/accuracies": 0.625, "rewards/chosen": -0.10643486678600311, "rewards/margins": 0.09768979996442795, "rewards/rejected": -0.20412465929985046, "step": 2704 }, { "epoch": 1.6501448833307917, "grad_norm": 2.15378737449646, "learning_rate": 5.37905695039804e-06, "log_odds_chosen": 1.5431418418884277, "log_odds_ratio": -0.41590726375579834, "logits/chosen": -0.9099912047386169, "logits/rejected": -0.9596318006515503, "logps/chosen": -0.969245970249176, "logps/rejected": -2.1843414306640625, "loss": 1.1422, "nll_loss": 1.0896124839782715, "rewards/accuracies": 0.875, "rewards/chosen": -0.09692459553480148, "rewards/margins": 0.12150955200195312, "rewards/rejected": -0.2184341549873352, "step": 2705 }, { "epoch": 1.6507549184078085, "grad_norm": 1.1944831609725952, "learning_rate": 5.378077158603797e-06, "log_odds_chosen": 1.1980879306793213, "log_odds_ratio": -0.6265957355499268, "logits/chosen": -0.8226630687713623, "logits/rejected": -0.6013259887695312, "logps/chosen": -0.839944064617157, "logps/rejected": -1.8590983152389526, "loss": 1.0748, "nll_loss": 0.9965581297874451, "rewards/accuracies": 0.5, "rewards/chosen": -0.08399441093206406, "rewards/margins": 0.10191541910171509, "rewards/rejected": -0.18590983748435974, "step": 2706 }, { "epoch": 1.6513649534848254, "grad_norm": 1.6264369487762451, "learning_rate": 5.377097366809553e-06, "log_odds_chosen": 0.044208794832229614, "log_odds_ratio": -0.8194891810417175, "logits/chosen": -1.0102969408035278, "logits/rejected": -0.8276785612106323, "logps/chosen": -1.1438922882080078, "logps/rejected": -1.1151137351989746, "loss": 0.9542, "nll_loss": 1.050438404083252, "rewards/accuracies": 0.625, "rewards/chosen": -0.11438924074172974, "rewards/margins": -0.0028778668493032455, "rewards/rejected": -0.11151136457920074, "step": 2707 }, { "epoch": 1.6519749885618422, "grad_norm": 1.3121063709259033, "learning_rate": 5.376117575015308e-06, "log_odds_chosen": 1.9527970552444458, "log_odds_ratio": -0.32117727398872375, "logits/chosen": -0.874409019947052, "logits/rejected": -0.8056333065032959, "logps/chosen": -0.77259361743927, "logps/rejected": -2.273743152618408, "loss": 1.2693, "nll_loss": 1.1551344394683838, "rewards/accuracies": 0.875, "rewards/chosen": -0.077259361743927, "rewards/margins": 0.15011493861675262, "rewards/rejected": -0.22737430036067963, "step": 2708 }, { "epoch": 1.6525850236388593, "grad_norm": 1.460106611251831, "learning_rate": 5.375137783221065e-06, "log_odds_chosen": 1.0797110795974731, "log_odds_ratio": -0.44955974817276, "logits/chosen": -0.8321317434310913, "logits/rejected": -0.8058427572250366, "logps/chosen": -0.7560429573059082, "logps/rejected": -1.48757004737854, "loss": 0.9775, "nll_loss": 0.8956151604652405, "rewards/accuracies": 0.875, "rewards/chosen": -0.07560428977012634, "rewards/margins": 0.07315270602703094, "rewards/rejected": -0.1487569957971573, "step": 2709 }, { "epoch": 1.6531950587158761, "grad_norm": 1.1055105924606323, "learning_rate": 5.3741579914268215e-06, "log_odds_chosen": 1.0440338850021362, "log_odds_ratio": -0.6747362017631531, "logits/chosen": -0.7541756629943848, "logits/rejected": -0.7458676099777222, "logps/chosen": -0.774179995059967, "logps/rejected": -1.6708900928497314, "loss": 0.8142, "nll_loss": 0.9387544393539429, "rewards/accuracies": 0.375, "rewards/chosen": -0.0774179995059967, "rewards/margins": 0.08967101573944092, "rewards/rejected": -0.16708901524543762, "step": 2710 }, { "epoch": 1.6538050937928932, "grad_norm": 2.6125881671905518, "learning_rate": 5.373178199632578e-06, "log_odds_chosen": 2.039001941680908, "log_odds_ratio": -0.3623340427875519, "logits/chosen": -0.4776984453201294, "logits/rejected": -0.6375648975372314, "logps/chosen": -0.6845290660858154, "logps/rejected": -2.1220905780792236, "loss": 1.0402, "nll_loss": 0.7896289825439453, "rewards/accuracies": 0.875, "rewards/chosen": -0.06845290958881378, "rewards/margins": 0.14375615119934082, "rewards/rejected": -0.2122090607881546, "step": 2711 }, { "epoch": 1.65441512886991, "grad_norm": 1.3306454420089722, "learning_rate": 5.372198407838335e-06, "log_odds_chosen": 2.8269433975219727, "log_odds_ratio": -0.30246278643608093, "logits/chosen": -0.6983853578567505, "logits/rejected": -0.7319818139076233, "logps/chosen": -0.6410632729530334, "logps/rejected": -2.7436118125915527, "loss": 0.9759, "nll_loss": 0.7636740207672119, "rewards/accuracies": 0.75, "rewards/chosen": -0.06410632282495499, "rewards/margins": 0.21025486290454865, "rewards/rejected": -0.27436119318008423, "step": 2712 }, { "epoch": 1.655025163946927, "grad_norm": 2.6843466758728027, "learning_rate": 5.37121861604409e-06, "log_odds_chosen": 0.5821237564086914, "log_odds_ratio": -0.6728954911231995, "logits/chosen": -0.966423749923706, "logits/rejected": -0.9336968064308167, "logps/chosen": -0.934394896030426, "logps/rejected": -1.3216224908828735, "loss": 1.0881, "nll_loss": 1.0315383672714233, "rewards/accuracies": 0.375, "rewards/chosen": -0.0934394896030426, "rewards/margins": 0.03872275352478027, "rewards/rejected": -0.13216224312782288, "step": 2713 }, { "epoch": 1.6556351990239437, "grad_norm": 1.2726346254348755, "learning_rate": 5.370238824249847e-06, "log_odds_chosen": 0.24564948678016663, "log_odds_ratio": -0.7201197147369385, "logits/chosen": -0.9495034217834473, "logits/rejected": -0.9135363698005676, "logps/chosen": -0.9882038235664368, "logps/rejected": -1.2154444456100464, "loss": 1.1555, "nll_loss": 1.2092890739440918, "rewards/accuracies": 0.5, "rewards/chosen": -0.09882038831710815, "rewards/margins": 0.02272406592965126, "rewards/rejected": -0.12154445052146912, "step": 2714 }, { "epoch": 1.6562452341009608, "grad_norm": 1.1254171133041382, "learning_rate": 5.369259032455603e-06, "log_odds_chosen": 1.0884549617767334, "log_odds_ratio": -0.527512788772583, "logits/chosen": -0.7654579877853394, "logits/rejected": -0.7938538193702698, "logps/chosen": -0.8796117901802063, "logps/rejected": -1.7755523920059204, "loss": 1.069, "nll_loss": 1.051591396331787, "rewards/accuracies": 0.5, "rewards/chosen": -0.08796117454767227, "rewards/margins": 0.08959406614303589, "rewards/rejected": -0.17755524814128876, "step": 2715 }, { "epoch": 1.6568552691779779, "grad_norm": 2.90053129196167, "learning_rate": 5.368279240661359e-06, "log_odds_chosen": -0.09399238973855972, "log_odds_ratio": -0.7982456684112549, "logits/chosen": -1.063506841659546, "logits/rejected": -0.9495699405670166, "logps/chosen": -1.2628543376922607, "logps/rejected": -1.2256587743759155, "loss": 1.2446, "nll_loss": 1.3042725324630737, "rewards/accuracies": 0.375, "rewards/chosen": -0.12628543376922607, "rewards/margins": -0.0037195561453700066, "rewards/rejected": -0.12256588041782379, "step": 2716 }, { "epoch": 1.6574653042549947, "grad_norm": 1.679589867591858, "learning_rate": 5.367299448867116e-06, "log_odds_chosen": 1.5515474081039429, "log_odds_ratio": -0.4385632276535034, "logits/chosen": -0.7739195823669434, "logits/rejected": -0.81986004114151, "logps/chosen": -0.8397651314735413, "logps/rejected": -2.0566468238830566, "loss": 1.0497, "nll_loss": 0.9985994696617126, "rewards/accuracies": 0.75, "rewards/chosen": -0.08397650718688965, "rewards/margins": 0.12168815732002258, "rewards/rejected": -0.20566466450691223, "step": 2717 }, { "epoch": 1.6580753393320116, "grad_norm": 1.1397337913513184, "learning_rate": 5.366319657072872e-06, "log_odds_chosen": 0.7319713830947876, "log_odds_ratio": -0.5886727571487427, "logits/chosen": -0.8975452184677124, "logits/rejected": -1.0066354274749756, "logps/chosen": -0.818332850933075, "logps/rejected": -1.3568140268325806, "loss": 0.9201, "nll_loss": 1.1467194557189941, "rewards/accuracies": 0.75, "rewards/chosen": -0.08183328807353973, "rewards/margins": 0.05384811758995056, "rewards/rejected": -0.1356814056634903, "step": 2718 }, { "epoch": 1.6586853744090284, "grad_norm": 5.957388877868652, "learning_rate": 5.3653398652786275e-06, "log_odds_chosen": 2.8213882446289062, "log_odds_ratio": -0.4517519772052765, "logits/chosen": -0.6002238988876343, "logits/rejected": -0.6695181727409363, "logps/chosen": -0.6271414756774902, "logps/rejected": -2.9062438011169434, "loss": 0.9757, "nll_loss": 0.8087099194526672, "rewards/accuracies": 0.75, "rewards/chosen": -0.06271415203809738, "rewards/margins": 0.22791022062301636, "rewards/rejected": -0.29062438011169434, "step": 2719 }, { "epoch": 1.6592954094860455, "grad_norm": 1.1869087219238281, "learning_rate": 5.364360073484384e-06, "log_odds_chosen": 0.6514809131622314, "log_odds_ratio": -0.6129856705665588, "logits/chosen": -0.9358597993850708, "logits/rejected": -0.9036373496055603, "logps/chosen": -0.8194438219070435, "logps/rejected": -1.209557294845581, "loss": 1.2859, "nll_loss": 1.5881054401397705, "rewards/accuracies": 0.75, "rewards/chosen": -0.08194439113140106, "rewards/margins": 0.03901134803891182, "rewards/rejected": -0.12095573544502258, "step": 2720 }, { "epoch": 1.6599054445630623, "grad_norm": 1.2152537107467651, "learning_rate": 5.3633802816901405e-06, "log_odds_chosen": 1.65958833694458, "log_odds_ratio": -0.3608850836753845, "logits/chosen": -0.7149420976638794, "logits/rejected": -0.9144742488861084, "logps/chosen": -0.7809582352638245, "logps/rejected": -1.7888853549957275, "loss": 0.9749, "nll_loss": 0.9148797988891602, "rewards/accuracies": 0.75, "rewards/chosen": -0.07809583097696304, "rewards/margins": 0.10079272091388702, "rewards/rejected": -0.17888855934143066, "step": 2721 }, { "epoch": 1.6605154796400794, "grad_norm": 1.8844847679138184, "learning_rate": 5.362400489895897e-06, "log_odds_chosen": 0.8331484198570251, "log_odds_ratio": -0.4925658106803894, "logits/chosen": -0.7625762224197388, "logits/rejected": -0.7369213700294495, "logps/chosen": -0.6520861983299255, "logps/rejected": -1.188655138015747, "loss": 1.0469, "nll_loss": 0.8143883943557739, "rewards/accuracies": 0.625, "rewards/chosen": -0.06520862132310867, "rewards/margins": 0.05365690588951111, "rewards/rejected": -0.11886553466320038, "step": 2722 }, { "epoch": 1.6611255147170962, "grad_norm": 1.6586463451385498, "learning_rate": 5.361420698101654e-06, "log_odds_chosen": 1.1548137664794922, "log_odds_ratio": -0.5716269016265869, "logits/chosen": -0.8584438562393188, "logits/rejected": -0.7615184783935547, "logps/chosen": -0.8293440341949463, "logps/rejected": -1.666183352470398, "loss": 1.0188, "nll_loss": 0.8840290904045105, "rewards/accuracies": 0.5, "rewards/chosen": -0.0829344093799591, "rewards/margins": 0.08368393033742905, "rewards/rejected": -0.16661833226680756, "step": 2723 }, { "epoch": 1.661735549794113, "grad_norm": 1.4709208011627197, "learning_rate": 5.36044090630741e-06, "log_odds_chosen": 0.9271186590194702, "log_odds_ratio": -0.44663941860198975, "logits/chosen": -0.7629250884056091, "logits/rejected": -0.7756421566009521, "logps/chosen": -0.6488403677940369, "logps/rejected": -1.207231044769287, "loss": 1.226, "nll_loss": 1.1693741083145142, "rewards/accuracies": 0.75, "rewards/chosen": -0.06488403677940369, "rewards/margins": 0.05583908036351204, "rewards/rejected": -0.12072311341762543, "step": 2724 }, { "epoch": 1.66234558487113, "grad_norm": 2.075838327407837, "learning_rate": 5.359461114513165e-06, "log_odds_chosen": 2.2682597637176514, "log_odds_ratio": -0.4999448359012604, "logits/chosen": -0.9225348830223083, "logits/rejected": -1.0108394622802734, "logps/chosen": -0.8305021524429321, "logps/rejected": -2.6408462524414062, "loss": 1.0613, "nll_loss": 1.05472993850708, "rewards/accuracies": 0.5, "rewards/chosen": -0.08305022120475769, "rewards/margins": 0.1810344159603119, "rewards/rejected": -0.2640846371650696, "step": 2725 }, { "epoch": 1.662955619948147, "grad_norm": 6.280720233917236, "learning_rate": 5.358481322718922e-06, "log_odds_chosen": -0.22325490415096283, "log_odds_ratio": -0.8697505593299866, "logits/chosen": -1.0221729278564453, "logits/rejected": -0.9105631709098816, "logps/chosen": -1.0640259981155396, "logps/rejected": -1.009939432144165, "loss": 1.2604, "nll_loss": 1.3315261602401733, "rewards/accuracies": 0.375, "rewards/chosen": -0.10640259832143784, "rewards/margins": -0.005408651661127806, "rewards/rejected": -0.10099394619464874, "step": 2726 }, { "epoch": 1.663565655025164, "grad_norm": 1.8382198810577393, "learning_rate": 5.357501530924678e-06, "log_odds_chosen": 0.6502119302749634, "log_odds_ratio": -0.6683474183082581, "logits/chosen": -0.9548815488815308, "logits/rejected": -0.9023215770721436, "logps/chosen": -0.8240422010421753, "logps/rejected": -1.250836968421936, "loss": 1.1304, "nll_loss": 0.910707414150238, "rewards/accuracies": 0.5, "rewards/chosen": -0.08240421861410141, "rewards/margins": 0.04267948493361473, "rewards/rejected": -0.12508369982242584, "step": 2727 }, { "epoch": 1.664175690102181, "grad_norm": 1.362196445465088, "learning_rate": 5.356521739130435e-06, "log_odds_chosen": 0.7376496195793152, "log_odds_ratio": -0.4929268956184387, "logits/chosen": -1.0062984228134155, "logits/rejected": -0.9171808362007141, "logps/chosen": -0.9409331679344177, "logps/rejected": -1.4873095750808716, "loss": 1.1654, "nll_loss": 1.0676006078720093, "rewards/accuracies": 0.625, "rewards/chosen": -0.09409331530332565, "rewards/margins": 0.05463763698935509, "rewards/rejected": -0.14873096346855164, "step": 2728 }, { "epoch": 1.6647857251791978, "grad_norm": 1.1286230087280273, "learning_rate": 5.355541947336191e-06, "log_odds_chosen": 0.9956952333450317, "log_odds_ratio": -0.4459002614021301, "logits/chosen": -0.9297113418579102, "logits/rejected": -0.8650578260421753, "logps/chosen": -0.733421802520752, "logps/rejected": -1.3757413625717163, "loss": 1.0411, "nll_loss": 0.869142472743988, "rewards/accuracies": 0.875, "rewards/chosen": -0.07334218174219131, "rewards/margins": 0.06423194706439972, "rewards/rejected": -0.13757413625717163, "step": 2729 }, { "epoch": 1.6653957602562146, "grad_norm": 1.8856807947158813, "learning_rate": 5.3545621555419465e-06, "log_odds_chosen": 0.7016499042510986, "log_odds_ratio": -0.564872682094574, "logits/chosen": -0.8178382515907288, "logits/rejected": -0.8249785304069519, "logps/chosen": -0.9766982793807983, "logps/rejected": -1.3981612920761108, "loss": 1.1642, "nll_loss": 1.01796555519104, "rewards/accuracies": 0.5, "rewards/chosen": -0.0976698249578476, "rewards/margins": 0.04214629530906677, "rewards/rejected": -0.13981612026691437, "step": 2730 }, { "epoch": 1.6660057953332317, "grad_norm": 2.5328938961029053, "learning_rate": 5.3535823637477035e-06, "log_odds_chosen": 2.651491403579712, "log_odds_ratio": -0.2909192442893982, "logits/chosen": -0.46836334466934204, "logits/rejected": -0.6222400665283203, "logps/chosen": -0.6003950238227844, "logps/rejected": -2.674210548400879, "loss": 1.0014, "nll_loss": 0.9109956622123718, "rewards/accuracies": 1.0, "rewards/chosen": -0.06003950163722038, "rewards/margins": 0.20738154649734497, "rewards/rejected": -0.26742106676101685, "step": 2731 }, { "epoch": 1.6666158304102487, "grad_norm": 1.865331768989563, "learning_rate": 5.35260257195346e-06, "log_odds_chosen": 1.9340248107910156, "log_odds_ratio": -0.2798709273338318, "logits/chosen": -0.6768539547920227, "logits/rejected": -0.8861147165298462, "logps/chosen": -0.5646640062332153, "logps/rejected": -1.8526430130004883, "loss": 0.931, "nll_loss": 0.7242026925086975, "rewards/accuracies": 1.0, "rewards/chosen": -0.05646640062332153, "rewards/margins": 0.12879790365695953, "rewards/rejected": -0.18526430428028107, "step": 2732 }, { "epoch": 1.6672258654872656, "grad_norm": 1.212948203086853, "learning_rate": 5.351622780159216e-06, "log_odds_chosen": 0.5162142515182495, "log_odds_ratio": -0.567253828048706, "logits/chosen": -0.9667829871177673, "logits/rejected": -0.9523773193359375, "logps/chosen": -0.9872148036956787, "logps/rejected": -1.339093565940857, "loss": 1.2742, "nll_loss": 1.2777791023254395, "rewards/accuracies": 0.5, "rewards/chosen": -0.09872148931026459, "rewards/margins": 0.035187866538763046, "rewards/rejected": -0.13390934467315674, "step": 2733 }, { "epoch": 1.6678359005642824, "grad_norm": 1.205331802368164, "learning_rate": 5.350642988364973e-06, "log_odds_chosen": 1.2384926080703735, "log_odds_ratio": -0.40868663787841797, "logits/chosen": -0.723347544670105, "logits/rejected": -0.6771788597106934, "logps/chosen": -0.7000570893287659, "logps/rejected": -1.5775160789489746, "loss": 0.9457, "nll_loss": 0.9764299392700195, "rewards/accuracies": 0.625, "rewards/chosen": -0.07000571489334106, "rewards/margins": 0.08774588257074356, "rewards/rejected": -0.15775160491466522, "step": 2734 }, { "epoch": 1.6684459356412993, "grad_norm": 1.201539397239685, "learning_rate": 5.349663196570729e-06, "log_odds_chosen": 3.819117784500122, "log_odds_ratio": -0.14539706707000732, "logits/chosen": -0.555575966835022, "logits/rejected": -0.9026216864585876, "logps/chosen": -0.45923370122909546, "logps/rejected": -3.336766004562378, "loss": 1.075, "nll_loss": 0.950973391532898, "rewards/accuracies": 1.0, "rewards/chosen": -0.04592336714267731, "rewards/margins": 0.28775322437286377, "rewards/rejected": -0.33367660641670227, "step": 2735 }, { "epoch": 1.6690559707183164, "grad_norm": 1.4976338148117065, "learning_rate": 5.348683404776484e-06, "log_odds_chosen": 2.0073516368865967, "log_odds_ratio": -0.323306679725647, "logits/chosen": -0.6835958361625671, "logits/rejected": -0.7916488647460938, "logps/chosen": -0.6594816446304321, "logps/rejected": -2.1193795204162598, "loss": 1.0933, "nll_loss": 0.8523029088973999, "rewards/accuracies": 0.875, "rewards/chosen": -0.06594815850257874, "rewards/margins": 0.145989790558815, "rewards/rejected": -0.21193796396255493, "step": 2736 }, { "epoch": 1.6696660057953332, "grad_norm": 1.765023946762085, "learning_rate": 5.347703612982241e-06, "log_odds_chosen": -0.09267117083072662, "log_odds_ratio": -0.7832914590835571, "logits/chosen": -0.9619423747062683, "logits/rejected": -0.837533712387085, "logps/chosen": -1.1374140977859497, "logps/rejected": -1.0730854272842407, "loss": 1.1704, "nll_loss": 1.4243409633636475, "rewards/accuracies": 0.625, "rewards/chosen": -0.11374141275882721, "rewards/margins": -0.006432864814996719, "rewards/rejected": -0.10730855166912079, "step": 2737 }, { "epoch": 1.6702760408723503, "grad_norm": 1.8020386695861816, "learning_rate": 5.346723821187997e-06, "log_odds_chosen": 2.2151689529418945, "log_odds_ratio": -0.42021867632865906, "logits/chosen": -0.6479965448379517, "logits/rejected": -0.8015814423561096, "logps/chosen": -0.5135161280632019, "logps/rejected": -2.227790117263794, "loss": 1.1751, "nll_loss": 0.9085818529129028, "rewards/accuracies": 0.875, "rewards/chosen": -0.05135161429643631, "rewards/margins": 0.17142736911773682, "rewards/rejected": -0.22277899086475372, "step": 2738 }, { "epoch": 1.6708860759493671, "grad_norm": 1.5714904069900513, "learning_rate": 5.345744029393753e-06, "log_odds_chosen": 0.28842365741729736, "log_odds_ratio": -0.6027372479438782, "logits/chosen": -0.7598439455032349, "logits/rejected": -0.7155526876449585, "logps/chosen": -0.8412559032440186, "logps/rejected": -1.0266497135162354, "loss": 1.2132, "nll_loss": 0.9444253444671631, "rewards/accuracies": 0.625, "rewards/chosen": -0.0841255858540535, "rewards/margins": 0.018539387732744217, "rewards/rejected": -0.10266498476266861, "step": 2739 }, { "epoch": 1.671496111026384, "grad_norm": 1.1327557563781738, "learning_rate": 5.34476423759951e-06, "log_odds_chosen": 0.8647260665893555, "log_odds_ratio": -0.43894675374031067, "logits/chosen": -1.0436344146728516, "logits/rejected": -0.7965308427810669, "logps/chosen": -0.9962621927261353, "logps/rejected": -1.6712898015975952, "loss": 1.0738, "nll_loss": 1.0920791625976562, "rewards/accuracies": 0.875, "rewards/chosen": -0.09962620586156845, "rewards/margins": 0.06750277429819107, "rewards/rejected": -0.16712898015975952, "step": 2740 }, { "epoch": 1.6721061461034008, "grad_norm": 2.3808393478393555, "learning_rate": 5.343784445805266e-06, "log_odds_chosen": 1.8990358114242554, "log_odds_ratio": -0.24401040375232697, "logits/chosen": -0.928792417049408, "logits/rejected": -1.03458833694458, "logps/chosen": -0.8254041075706482, "logps/rejected": -2.26942777633667, "loss": 1.2526, "nll_loss": 1.3050265312194824, "rewards/accuracies": 0.875, "rewards/chosen": -0.08254041522741318, "rewards/margins": 0.1444023698568344, "rewards/rejected": -0.226942777633667, "step": 2741 }, { "epoch": 1.6727161811804179, "grad_norm": 2.2517828941345215, "learning_rate": 5.3428046540110225e-06, "log_odds_chosen": 0.638297975063324, "log_odds_ratio": -0.5657376050949097, "logits/chosen": -0.9357471466064453, "logits/rejected": -1.0126808881759644, "logps/chosen": -1.1965610980987549, "logps/rejected": -1.6742773056030273, "loss": 1.324, "nll_loss": 1.637054204940796, "rewards/accuracies": 0.5, "rewards/chosen": -0.11965611577033997, "rewards/margins": 0.04777161777019501, "rewards/rejected": -0.16742773354053497, "step": 2742 }, { "epoch": 1.673326216257435, "grad_norm": 0.9029905796051025, "learning_rate": 5.341824862216779e-06, "log_odds_chosen": 1.3487753868103027, "log_odds_ratio": -0.3538605570793152, "logits/chosen": -0.5599895119667053, "logits/rejected": -0.6976974010467529, "logps/chosen": -0.8222344517707825, "logps/rejected": -1.7852154970169067, "loss": 0.9381, "nll_loss": 0.9784561395645142, "rewards/accuracies": 0.875, "rewards/chosen": -0.08222344517707825, "rewards/margins": 0.09629809856414795, "rewards/rejected": -0.1785215437412262, "step": 2743 }, { "epoch": 1.6739362513344518, "grad_norm": 0.9485909342765808, "learning_rate": 5.340845070422535e-06, "log_odds_chosen": 1.601290225982666, "log_odds_ratio": -0.4191494882106781, "logits/chosen": -0.7712669372558594, "logits/rejected": -0.8043485879898071, "logps/chosen": -0.7037328481674194, "logps/rejected": -1.8628747463226318, "loss": 0.9769, "nll_loss": 0.9806486964225769, "rewards/accuracies": 0.875, "rewards/chosen": -0.0703732892870903, "rewards/margins": 0.11591421067714691, "rewards/rejected": -0.18628749251365662, "step": 2744 }, { "epoch": 1.6745462864114686, "grad_norm": 1.3765180110931396, "learning_rate": 5.339865278628292e-06, "log_odds_chosen": 1.6617070436477661, "log_odds_ratio": -0.3451383709907532, "logits/chosen": -0.6786142587661743, "logits/rejected": -0.6132914423942566, "logps/chosen": -0.5172152519226074, "logps/rejected": -1.558161973953247, "loss": 1.0788, "nll_loss": 0.6659436225891113, "rewards/accuracies": 0.75, "rewards/chosen": -0.05172152817249298, "rewards/margins": 0.10409467667341232, "rewards/rejected": -0.1558161973953247, "step": 2745 }, { "epoch": 1.6751563214884855, "grad_norm": 1.8324472904205322, "learning_rate": 5.338885486834048e-06, "log_odds_chosen": 0.46041521430015564, "log_odds_ratio": -0.7519144415855408, "logits/chosen": -0.5521873235702515, "logits/rejected": -0.6628503799438477, "logps/chosen": -0.8081446886062622, "logps/rejected": -1.0720691680908203, "loss": 1.1749, "nll_loss": 1.1479182243347168, "rewards/accuracies": 0.375, "rewards/chosen": -0.08081447333097458, "rewards/margins": 0.026392439380288124, "rewards/rejected": -0.10720691084861755, "step": 2746 }, { "epoch": 1.6757663565655025, "grad_norm": 1.8167102336883545, "learning_rate": 5.337905695039803e-06, "log_odds_chosen": 1.5960662364959717, "log_odds_ratio": -0.4166620969772339, "logits/chosen": -0.5734879970550537, "logits/rejected": -0.5967326164245605, "logps/chosen": -0.6107507348060608, "logps/rejected": -1.797349452972412, "loss": 1.0865, "nll_loss": 1.08604896068573, "rewards/accuracies": 0.75, "rewards/chosen": -0.06107507646083832, "rewards/margins": 0.1186598688364029, "rewards/rejected": -0.1797349452972412, "step": 2747 }, { "epoch": 1.6763763916425194, "grad_norm": 1.3843400478363037, "learning_rate": 5.33692590324556e-06, "log_odds_chosen": 1.1206289529800415, "log_odds_ratio": -0.48849013447761536, "logits/chosen": -0.7095973491668701, "logits/rejected": -0.73507159948349, "logps/chosen": -0.852568507194519, "logps/rejected": -1.6586289405822754, "loss": 1.2002, "nll_loss": 1.3324077129364014, "rewards/accuracies": 0.75, "rewards/chosen": -0.08525684475898743, "rewards/margins": 0.08060605078935623, "rewards/rejected": -0.16586290299892426, "step": 2748 }, { "epoch": 1.6769864267195365, "grad_norm": 4.193239688873291, "learning_rate": 5.335946111451316e-06, "log_odds_chosen": 1.672337532043457, "log_odds_ratio": -0.37548333406448364, "logits/chosen": -0.7059224843978882, "logits/rejected": -0.9515314698219299, "logps/chosen": -0.886870265007019, "logps/rejected": -2.0499494075775146, "loss": 1.2396, "nll_loss": 1.3415648937225342, "rewards/accuracies": 0.75, "rewards/chosen": -0.08868702501058578, "rewards/margins": 0.11630791425704956, "rewards/rejected": -0.20499493181705475, "step": 2749 }, { "epoch": 1.6775964617965533, "grad_norm": 1.3013737201690674, "learning_rate": 5.334966319657072e-06, "log_odds_chosen": 1.155466079711914, "log_odds_ratio": -0.6284981966018677, "logits/chosen": -0.7281285524368286, "logits/rejected": -0.9391955733299255, "logps/chosen": -0.9093168377876282, "logps/rejected": -1.7629518508911133, "loss": 1.087, "nll_loss": 1.1037085056304932, "rewards/accuracies": 0.625, "rewards/chosen": -0.09093168377876282, "rewards/margins": 0.08536350727081299, "rewards/rejected": -0.1762951910495758, "step": 2750 }, { "epoch": 1.6782064968735702, "grad_norm": 0.9775970578193665, "learning_rate": 5.333986527862829e-06, "log_odds_chosen": 1.189887523651123, "log_odds_ratio": -0.5937631726264954, "logits/chosen": -0.898434042930603, "logits/rejected": -0.8505975008010864, "logps/chosen": -0.8111134171485901, "logps/rejected": -1.698091745376587, "loss": 0.9505, "nll_loss": 0.9334631562232971, "rewards/accuracies": 0.5, "rewards/chosen": -0.0811113491654396, "rewards/margins": 0.08869782090187073, "rewards/rejected": -0.16980916261672974, "step": 2751 }, { "epoch": 1.678816531950587, "grad_norm": 1.1045308113098145, "learning_rate": 5.333006736068585e-06, "log_odds_chosen": 1.2571390867233276, "log_odds_ratio": -0.5604572892189026, "logits/chosen": -0.9345829486846924, "logits/rejected": -0.8980340957641602, "logps/chosen": -0.7909297943115234, "logps/rejected": -1.8304134607315063, "loss": 1.0179, "nll_loss": 1.110541820526123, "rewards/accuracies": 0.5, "rewards/chosen": -0.07909297943115234, "rewards/margins": 0.10394836962223053, "rewards/rejected": -0.18304134905338287, "step": 2752 }, { "epoch": 1.679426567027604, "grad_norm": 2.881589651107788, "learning_rate": 5.332026944274341e-06, "log_odds_chosen": 1.1608364582061768, "log_odds_ratio": -0.48171406984329224, "logits/chosen": -0.6499819159507751, "logits/rejected": -0.5374346375465393, "logps/chosen": -0.8954433798789978, "logps/rejected": -1.7418988943099976, "loss": 0.9909, "nll_loss": 1.0684195756912231, "rewards/accuracies": 0.75, "rewards/chosen": -0.08954434096813202, "rewards/margins": 0.08464555442333221, "rewards/rejected": -0.17418989539146423, "step": 2753 }, { "epoch": 1.6800366021046211, "grad_norm": 1.1269716024398804, "learning_rate": 5.331047152480098e-06, "log_odds_chosen": 1.0025525093078613, "log_odds_ratio": -0.7413411140441895, "logits/chosen": -0.7286022901535034, "logits/rejected": -0.8716866970062256, "logps/chosen": -0.8719608783721924, "logps/rejected": -1.6175057888031006, "loss": 0.9707, "nll_loss": 0.9444453716278076, "rewards/accuracies": 0.5, "rewards/chosen": -0.08719608932733536, "rewards/margins": 0.07455448806285858, "rewards/rejected": -0.16175056993961334, "step": 2754 }, { "epoch": 1.680646637181638, "grad_norm": 0.9303513169288635, "learning_rate": 5.330067360685854e-06, "log_odds_chosen": 1.1149874925613403, "log_odds_ratio": -0.5553916692733765, "logits/chosen": -0.7403858304023743, "logits/rejected": -0.8309726715087891, "logps/chosen": -0.8520300984382629, "logps/rejected": -1.6686348915100098, "loss": 0.9443, "nll_loss": 1.035712718963623, "rewards/accuracies": 0.625, "rewards/chosen": -0.08520301431417465, "rewards/margins": 0.08166047930717468, "rewards/rejected": -0.16686350107192993, "step": 2755 }, { "epoch": 1.6812566722586548, "grad_norm": 0.9516505002975464, "learning_rate": 5.329087568891611e-06, "log_odds_chosen": 3.3803024291992188, "log_odds_ratio": -0.30434292554855347, "logits/chosen": -0.7304121255874634, "logits/rejected": -0.8840508460998535, "logps/chosen": -0.5978593826293945, "logps/rejected": -3.3686342239379883, "loss": 1.1161, "nll_loss": 0.9204068183898926, "rewards/accuracies": 0.75, "rewards/chosen": -0.05978593975305557, "rewards/margins": 0.2770775258541107, "rewards/rejected": -0.3368634581565857, "step": 2756 }, { "epoch": 1.6818667073356717, "grad_norm": 12.177606582641602, "learning_rate": 5.328107777097367e-06, "log_odds_chosen": 0.16502848267555237, "log_odds_ratio": -0.7674413323402405, "logits/chosen": -0.8281761407852173, "logits/rejected": -0.7697843313217163, "logps/chosen": -0.8847551941871643, "logps/rejected": -1.050293207168579, "loss": 1.1174, "nll_loss": 1.0033700466156006, "rewards/accuracies": 0.75, "rewards/chosen": -0.08847552537918091, "rewards/margins": 0.016553806141018867, "rewards/rejected": -0.10502932965755463, "step": 2757 }, { "epoch": 1.6824767424126887, "grad_norm": 1.2628051042556763, "learning_rate": 5.327127985303123e-06, "log_odds_chosen": 1.7738511562347412, "log_odds_ratio": -0.5262364149093628, "logits/chosen": -0.8806589841842651, "logits/rejected": -0.9637264609336853, "logps/chosen": -0.7887566685676575, "logps/rejected": -2.156006097793579, "loss": 1.0889, "nll_loss": 1.1533679962158203, "rewards/accuracies": 0.625, "rewards/chosen": -0.07887566834688187, "rewards/margins": 0.13672494888305664, "rewards/rejected": -0.2156006097793579, "step": 2758 }, { "epoch": 1.6830867774897058, "grad_norm": 1.3391382694244385, "learning_rate": 5.326148193508879e-06, "log_odds_chosen": 1.8719054460525513, "log_odds_ratio": -0.3393075466156006, "logits/chosen": -0.6388946771621704, "logits/rejected": -0.621522843837738, "logps/chosen": -0.571297287940979, "logps/rejected": -1.938596248626709, "loss": 1.143, "nll_loss": 0.6822472810745239, "rewards/accuracies": 0.625, "rewards/chosen": -0.05712973326444626, "rewards/margins": 0.1367298811674118, "rewards/rejected": -0.19385960698127747, "step": 2759 }, { "epoch": 1.6836968125667227, "grad_norm": 2.1373980045318604, "learning_rate": 5.325168401714635e-06, "log_odds_chosen": 0.7865924835205078, "log_odds_ratio": -0.5733562707901001, "logits/chosen": -1.0835483074188232, "logits/rejected": -0.9184428453445435, "logps/chosen": -0.9159454703330994, "logps/rejected": -1.4853602647781372, "loss": 1.1073, "nll_loss": 1.1044907569885254, "rewards/accuracies": 0.75, "rewards/chosen": -0.09159454703330994, "rewards/margins": 0.056941479444503784, "rewards/rejected": -0.14853602647781372, "step": 2760 }, { "epoch": 1.6843068476437395, "grad_norm": 2.3939669132232666, "learning_rate": 5.324188609920391e-06, "log_odds_chosen": 2.3062121868133545, "log_odds_ratio": -0.2564782500267029, "logits/chosen": -0.7112226486206055, "logits/rejected": -0.8487898111343384, "logps/chosen": -0.7341263294219971, "logps/rejected": -2.4183268547058105, "loss": 1.1675, "nll_loss": 1.0730903148651123, "rewards/accuracies": 0.875, "rewards/chosen": -0.07341262698173523, "rewards/margins": 0.16842004656791687, "rewards/rejected": -0.2418326735496521, "step": 2761 }, { "epoch": 1.6849168827207563, "grad_norm": 1.8820430040359497, "learning_rate": 5.323208818126148e-06, "log_odds_chosen": 1.708844542503357, "log_odds_ratio": -0.3077698349952698, "logits/chosen": -0.7960489988327026, "logits/rejected": -0.9482330083847046, "logps/chosen": -0.8703707456588745, "logps/rejected": -2.2017226219177246, "loss": 1.1963, "nll_loss": 1.3746812343597412, "rewards/accuracies": 0.875, "rewards/chosen": -0.08703707158565521, "rewards/margins": 0.13313519954681396, "rewards/rejected": -0.22017225623130798, "step": 2762 }, { "epoch": 1.6855269177977734, "grad_norm": 1.7384376525878906, "learning_rate": 5.3222290263319045e-06, "log_odds_chosen": 0.9518798589706421, "log_odds_ratio": -0.4454325735569, "logits/chosen": -0.8700229525566101, "logits/rejected": -0.8646662831306458, "logps/chosen": -0.8713592290878296, "logps/rejected": -1.5392725467681885, "loss": 0.9939, "nll_loss": 1.134446620941162, "rewards/accuracies": 0.75, "rewards/chosen": -0.0871359258890152, "rewards/margins": 0.06679131835699081, "rewards/rejected": -0.1539272665977478, "step": 2763 }, { "epoch": 1.6861369528747903, "grad_norm": 2.5773329734802246, "learning_rate": 5.3212492345376606e-06, "log_odds_chosen": 0.6509724855422974, "log_odds_ratio": -0.6198165416717529, "logits/chosen": -0.8962138891220093, "logits/rejected": -0.7723129391670227, "logps/chosen": -0.9288305640220642, "logps/rejected": -1.5031582117080688, "loss": 1.048, "nll_loss": 1.115673542022705, "rewards/accuracies": 0.5, "rewards/chosen": -0.09288306534290314, "rewards/margins": 0.05743276700377464, "rewards/rejected": -0.15031582117080688, "step": 2764 }, { "epoch": 1.6867469879518073, "grad_norm": 1.1906558275222778, "learning_rate": 5.320269442743417e-06, "log_odds_chosen": 1.718498706817627, "log_odds_ratio": -0.28853750228881836, "logits/chosen": -0.6328994035720825, "logits/rejected": -0.7623311281204224, "logps/chosen": -0.7520368099212646, "logps/rejected": -1.8838016986846924, "loss": 1.0273, "nll_loss": 0.9755165576934814, "rewards/accuracies": 0.875, "rewards/chosen": -0.07520367950201035, "rewards/margins": 0.11317649483680725, "rewards/rejected": -0.1883801817893982, "step": 2765 }, { "epoch": 1.6873570230288242, "grad_norm": 1.2063719034194946, "learning_rate": 5.319289650949173e-06, "log_odds_chosen": 0.9385203123092651, "log_odds_ratio": -0.46970540285110474, "logits/chosen": -0.8342958092689514, "logits/rejected": -0.8350346684455872, "logps/chosen": -0.6445662975311279, "logps/rejected": -1.106752634048462, "loss": 1.1544, "nll_loss": 1.1903228759765625, "rewards/accuracies": 0.625, "rewards/chosen": -0.06445663422346115, "rewards/margins": 0.0462186262011528, "rewards/rejected": -0.11067526042461395, "step": 2766 }, { "epoch": 1.687967058105841, "grad_norm": 1.4253206253051758, "learning_rate": 5.318309859154929e-06, "log_odds_chosen": 1.1572767496109009, "log_odds_ratio": -0.4093613624572754, "logits/chosen": -1.019899845123291, "logits/rejected": -1.086020588874817, "logps/chosen": -0.8931871652603149, "logps/rejected": -1.7502775192260742, "loss": 1.0987, "nll_loss": 1.0192714929580688, "rewards/accuracies": 0.875, "rewards/chosen": -0.08931872248649597, "rewards/margins": 0.08570902794599533, "rewards/rejected": -0.1750277429819107, "step": 2767 }, { "epoch": 1.6885770931828579, "grad_norm": 2.584963798522949, "learning_rate": 5.317330067360686e-06, "log_odds_chosen": 0.5755178928375244, "log_odds_ratio": -0.7116554379463196, "logits/chosen": -0.9457694888114929, "logits/rejected": -0.8074489831924438, "logps/chosen": -0.9135655164718628, "logps/rejected": -1.4012812376022339, "loss": 0.9771, "nll_loss": 1.030942440032959, "rewards/accuracies": 0.5, "rewards/chosen": -0.0913565531373024, "rewards/margins": 0.04877157509326935, "rewards/rejected": -0.14012812077999115, "step": 2768 }, { "epoch": 1.689187128259875, "grad_norm": 4.098773956298828, "learning_rate": 5.316350275566442e-06, "log_odds_chosen": 2.21555757522583, "log_odds_ratio": -0.3396691679954529, "logits/chosen": -0.8222780227661133, "logits/rejected": -0.8406798243522644, "logps/chosen": -0.6639728546142578, "logps/rejected": -2.3962881565093994, "loss": 0.9818, "nll_loss": 0.8563815355300903, "rewards/accuracies": 0.875, "rewards/chosen": -0.0663972869515419, "rewards/margins": 0.17323154211044312, "rewards/rejected": -0.23962882161140442, "step": 2769 }, { "epoch": 1.689797163336892, "grad_norm": 2.310720682144165, "learning_rate": 5.315370483772198e-06, "log_odds_chosen": 1.873073697090149, "log_odds_ratio": -0.25241273641586304, "logits/chosen": -0.756070613861084, "logits/rejected": -0.9640892744064331, "logps/chosen": -0.8210798501968384, "logps/rejected": -2.227872848510742, "loss": 0.9608, "nll_loss": 0.952482283115387, "rewards/accuracies": 1.0, "rewards/chosen": -0.08210799098014832, "rewards/margins": 0.14067929983139038, "rewards/rejected": -0.2227872908115387, "step": 2770 }, { "epoch": 1.6904071984139089, "grad_norm": 2.1880598068237305, "learning_rate": 5.314390691977954e-06, "log_odds_chosen": 1.2804219722747803, "log_odds_ratio": -0.676307737827301, "logits/chosen": -0.6936706304550171, "logits/rejected": -0.6834840774536133, "logps/chosen": -0.8802645802497864, "logps/rejected": -1.6059303283691406, "loss": 1.0108, "nll_loss": 1.0460336208343506, "rewards/accuracies": 0.5, "rewards/chosen": -0.08802645653486252, "rewards/margins": 0.07256658375263214, "rewards/rejected": -0.16059303283691406, "step": 2771 }, { "epoch": 1.6910172334909257, "grad_norm": 3.0822014808654785, "learning_rate": 5.31341090018371e-06, "log_odds_chosen": 1.0021761655807495, "log_odds_ratio": -0.4732077419757843, "logits/chosen": -0.8053500652313232, "logits/rejected": -0.879194974899292, "logps/chosen": -0.8597350120544434, "logps/rejected": -1.5288982391357422, "loss": 1.1928, "nll_loss": 0.9918496608734131, "rewards/accuracies": 0.625, "rewards/chosen": -0.08597350120544434, "rewards/margins": 0.0669163167476654, "rewards/rejected": -0.15288983285427094, "step": 2772 }, { "epoch": 1.6916272685679425, "grad_norm": 1.9777010679244995, "learning_rate": 5.312431108389467e-06, "log_odds_chosen": 1.183390736579895, "log_odds_ratio": -0.4700350761413574, "logits/chosen": -0.8654969930648804, "logits/rejected": -0.6724052429199219, "logps/chosen": -0.835809588432312, "logps/rejected": -1.6438186168670654, "loss": 0.9193, "nll_loss": 0.958568811416626, "rewards/accuracies": 0.625, "rewards/chosen": -0.08358096331357956, "rewards/margins": 0.08080089837312698, "rewards/rejected": -0.16438186168670654, "step": 2773 }, { "epoch": 1.6922373036449596, "grad_norm": 1.271889328956604, "learning_rate": 5.3114513165952235e-06, "log_odds_chosen": 1.2554330825805664, "log_odds_ratio": -0.6234678030014038, "logits/chosen": -0.9383845329284668, "logits/rejected": -0.9053272008895874, "logps/chosen": -0.809859573841095, "logps/rejected": -2.006774663925171, "loss": 1.2425, "nll_loss": 1.1233707666397095, "rewards/accuracies": 0.75, "rewards/chosen": -0.08098596334457397, "rewards/margins": 0.11969150602817535, "rewards/rejected": -0.20067745447158813, "step": 2774 }, { "epoch": 1.6928473387219767, "grad_norm": 1.2467941045761108, "learning_rate": 5.31047152480098e-06, "log_odds_chosen": 0.07997351139783859, "log_odds_ratio": -0.691266655921936, "logits/chosen": -1.029811143875122, "logits/rejected": -0.842435359954834, "logps/chosen": -0.97102952003479, "logps/rejected": -1.0308837890625, "loss": 1.0117, "nll_loss": 1.0798444747924805, "rewards/accuracies": 0.625, "rewards/chosen": -0.09710295498371124, "rewards/margins": 0.005985427647829056, "rewards/rejected": -0.1030883863568306, "step": 2775 }, { "epoch": 1.6934573737989935, "grad_norm": 1.7196073532104492, "learning_rate": 5.309491733006736e-06, "log_odds_chosen": 0.25756025314331055, "log_odds_ratio": -0.849455714225769, "logits/chosen": -0.9029291272163391, "logits/rejected": -0.9427890777587891, "logps/chosen": -1.0926735401153564, "logps/rejected": -1.2567123174667358, "loss": 1.2311, "nll_loss": 1.2587709426879883, "rewards/accuracies": 0.25, "rewards/chosen": -0.10926736146211624, "rewards/margins": 0.01640387251973152, "rewards/rejected": -0.12567123770713806, "step": 2776 }, { "epoch": 1.6940674088760104, "grad_norm": 3.48403000831604, "learning_rate": 5.308511941212492e-06, "log_odds_chosen": 2.0813863277435303, "log_odds_ratio": -0.4097210168838501, "logits/chosen": -0.6219509840011597, "logits/rejected": -0.8011910319328308, "logps/chosen": -0.6937106847763062, "logps/rejected": -2.2237794399261475, "loss": 1.0771, "nll_loss": 0.845399796962738, "rewards/accuracies": 0.75, "rewards/chosen": -0.06937107443809509, "rewards/margins": 0.1530068814754486, "rewards/rejected": -0.2223779559135437, "step": 2777 }, { "epoch": 1.6946774439530272, "grad_norm": 2.1261842250823975, "learning_rate": 5.307532149418248e-06, "log_odds_chosen": 1.0265017747879028, "log_odds_ratio": -0.48514118790626526, "logits/chosen": -0.801636278629303, "logits/rejected": -0.711544394493103, "logps/chosen": -0.7876126766204834, "logps/rejected": -1.571906328201294, "loss": 1.0165, "nll_loss": 0.9755597114562988, "rewards/accuracies": 0.625, "rewards/chosen": -0.0787612646818161, "rewards/margins": 0.07842937111854553, "rewards/rejected": -0.15719063580036163, "step": 2778 }, { "epoch": 1.695287479030044, "grad_norm": 1.4744808673858643, "learning_rate": 5.306552357624005e-06, "log_odds_chosen": 1.777113437652588, "log_odds_ratio": -0.48491841554641724, "logits/chosen": -0.9200356006622314, "logits/rejected": -0.8815619945526123, "logps/chosen": -0.6080329418182373, "logps/rejected": -1.7349793910980225, "loss": 1.0856, "nll_loss": 1.0105040073394775, "rewards/accuracies": 0.625, "rewards/chosen": -0.06080329418182373, "rewards/margins": 0.112694650888443, "rewards/rejected": -0.17349794507026672, "step": 2779 }, { "epoch": 1.6958975141070611, "grad_norm": 1.3918802738189697, "learning_rate": 5.305572565829761e-06, "log_odds_chosen": 1.128379464149475, "log_odds_ratio": -0.45835185050964355, "logits/chosen": -0.7946621775627136, "logits/rejected": -0.8006933927536011, "logps/chosen": -0.7300557494163513, "logps/rejected": -1.5108927488327026, "loss": 1.185, "nll_loss": 0.9184452891349792, "rewards/accuracies": 0.75, "rewards/chosen": -0.07300557941198349, "rewards/margins": 0.07808370143175125, "rewards/rejected": -0.15108928084373474, "step": 2780 }, { "epoch": 1.6965075491840782, "grad_norm": 1.534116268157959, "learning_rate": 5.304592774035518e-06, "log_odds_chosen": 0.824465274810791, "log_odds_ratio": -0.6382598876953125, "logits/chosen": -1.0157461166381836, "logits/rejected": -0.9516314268112183, "logps/chosen": -0.8336345553398132, "logps/rejected": -1.5519516468048096, "loss": 1.0981, "nll_loss": 1.215736746788025, "rewards/accuracies": 0.5, "rewards/chosen": -0.08336345851421356, "rewards/margins": 0.07183169573545456, "rewards/rejected": -0.15519516170024872, "step": 2781 }, { "epoch": 1.697117584261095, "grad_norm": 5.148177623748779, "learning_rate": 5.303612982241273e-06, "log_odds_chosen": 1.9804718494415283, "log_odds_ratio": -0.4468397796154022, "logits/chosen": -0.8446058034896851, "logits/rejected": -0.8241176009178162, "logps/chosen": -0.8100568056106567, "logps/rejected": -2.460662364959717, "loss": 0.9791, "nll_loss": 0.9291671514511108, "rewards/accuracies": 0.625, "rewards/chosen": -0.08100569248199463, "rewards/margins": 0.16506054997444153, "rewards/rejected": -0.24606622755527496, "step": 2782 }, { "epoch": 1.697727619338112, "grad_norm": 2.593640089035034, "learning_rate": 5.3026331904470294e-06, "log_odds_chosen": 2.2705607414245605, "log_odds_ratio": -0.26773324608802795, "logits/chosen": -0.7832115888595581, "logits/rejected": -1.0112448930740356, "logps/chosen": -0.6211471557617188, "logps/rejected": -2.2746803760528564, "loss": 1.0144, "nll_loss": 0.776494562625885, "rewards/accuracies": 1.0, "rewards/chosen": -0.06211471930146217, "rewards/margins": 0.16535332798957825, "rewards/rejected": -0.22746805846691132, "step": 2783 }, { "epoch": 1.6983376544151287, "grad_norm": 2.035249948501587, "learning_rate": 5.301653398652786e-06, "log_odds_chosen": 1.3704913854599, "log_odds_ratio": -0.4709094166755676, "logits/chosen": -0.9769405126571655, "logits/rejected": -1.0362513065338135, "logps/chosen": -0.9074689745903015, "logps/rejected": -2.0105438232421875, "loss": 1.131, "nll_loss": 1.1111764907836914, "rewards/accuracies": 0.75, "rewards/chosen": -0.09074689447879791, "rewards/margins": 0.1103074699640274, "rewards/rejected": -0.2010543793439865, "step": 2784 }, { "epoch": 1.6989476894921458, "grad_norm": 2.3448095321655273, "learning_rate": 5.3006736068585425e-06, "log_odds_chosen": 2.306980609893799, "log_odds_ratio": -0.2645193934440613, "logits/chosen": -0.804880678653717, "logits/rejected": -0.8743611574172974, "logps/chosen": -0.665048360824585, "logps/rejected": -2.3096752166748047, "loss": 1.0041, "nll_loss": 1.0032869577407837, "rewards/accuracies": 0.875, "rewards/chosen": -0.0665048360824585, "rewards/margins": 0.16446268558502197, "rewards/rejected": -0.23096752166748047, "step": 2785 }, { "epoch": 1.6995577245691629, "grad_norm": 1.2935504913330078, "learning_rate": 5.299693815064299e-06, "log_odds_chosen": 1.8410426378250122, "log_odds_ratio": -0.430039644241333, "logits/chosen": -1.003649115562439, "logits/rejected": -0.86826092004776, "logps/chosen": -0.9685797691345215, "logps/rejected": -2.421210765838623, "loss": 1.1593, "nll_loss": 1.0844918489456177, "rewards/accuracies": 0.75, "rewards/chosen": -0.09685797989368439, "rewards/margins": 0.14526310563087463, "rewards/rejected": -0.24212107062339783, "step": 2786 }, { "epoch": 1.7001677596461797, "grad_norm": 1.2753238677978516, "learning_rate": 5.298714023270055e-06, "log_odds_chosen": 1.2749357223510742, "log_odds_ratio": -0.35773026943206787, "logits/chosen": -0.834159255027771, "logits/rejected": -1.0493372678756714, "logps/chosen": -0.7852953672409058, "logps/rejected": -1.656749963760376, "loss": 1.0292, "nll_loss": 0.9017449021339417, "rewards/accuracies": 0.875, "rewards/chosen": -0.07852953672409058, "rewards/margins": 0.08714547753334045, "rewards/rejected": -0.16567501425743103, "step": 2787 }, { "epoch": 1.7007777947231966, "grad_norm": 1.3185087442398071, "learning_rate": 5.297734231475811e-06, "log_odds_chosen": 1.3912056684494019, "log_odds_ratio": -0.4963046908378601, "logits/chosen": -0.695421040058136, "logits/rejected": -0.8774069547653198, "logps/chosen": -0.6601350903511047, "logps/rejected": -1.5201424360275269, "loss": 1.0307, "nll_loss": 1.0641320943832397, "rewards/accuracies": 0.625, "rewards/chosen": -0.06601350754499435, "rewards/margins": 0.08600073307752609, "rewards/rejected": -0.15201424062252045, "step": 2788 }, { "epoch": 1.7013878298002134, "grad_norm": 1.7403210401535034, "learning_rate": 5.296754439681567e-06, "log_odds_chosen": 1.0763860940933228, "log_odds_ratio": -0.36632049083709717, "logits/chosen": -0.5053740739822388, "logits/rejected": -0.7642527222633362, "logps/chosen": -0.6844410300254822, "logps/rejected": -1.2856595516204834, "loss": 1.0146, "nll_loss": 0.8417565822601318, "rewards/accuracies": 0.75, "rewards/chosen": -0.06844410300254822, "rewards/margins": 0.06012185662984848, "rewards/rejected": -0.1285659670829773, "step": 2789 }, { "epoch": 1.7019978648772305, "grad_norm": 1.6509512662887573, "learning_rate": 5.295774647887324e-06, "log_odds_chosen": 2.225905179977417, "log_odds_ratio": -0.46634307503700256, "logits/chosen": -0.7316932678222656, "logits/rejected": -0.9290206432342529, "logps/chosen": -0.5522904992103577, "logps/rejected": -2.30845308303833, "loss": 1.0473, "nll_loss": 0.8441755771636963, "rewards/accuracies": 0.625, "rewards/chosen": -0.05522904917597771, "rewards/margins": 0.17561626434326172, "rewards/rejected": -0.23084530234336853, "step": 2790 }, { "epoch": 1.7026078999542473, "grad_norm": 1.9343478679656982, "learning_rate": 5.29479485609308e-06, "log_odds_chosen": 1.3495593070983887, "log_odds_ratio": -0.4719836115837097, "logits/chosen": -0.9393724203109741, "logits/rejected": -0.9788820743560791, "logps/chosen": -0.7160208821296692, "logps/rejected": -1.724480390548706, "loss": 1.0619, "nll_loss": 0.9016902446746826, "rewards/accuracies": 0.625, "rewards/chosen": -0.07160209119319916, "rewards/margins": 0.10084596276283264, "rewards/rejected": -0.1724480539560318, "step": 2791 }, { "epoch": 1.7032179350312644, "grad_norm": 1.5562278032302856, "learning_rate": 5.293815064298836e-06, "log_odds_chosen": 1.0572963953018188, "log_odds_ratio": -0.5838200449943542, "logits/chosen": -0.7870614528656006, "logits/rejected": -0.6770976781845093, "logps/chosen": -0.8343592882156372, "logps/rejected": -1.6265519857406616, "loss": 1.1346, "nll_loss": 0.949266254901886, "rewards/accuracies": 0.625, "rewards/chosen": -0.08343592286109924, "rewards/margins": 0.0792192742228508, "rewards/rejected": -0.16265518963336945, "step": 2792 }, { "epoch": 1.7038279701082812, "grad_norm": 1.9618518352508545, "learning_rate": 5.292835272504592e-06, "log_odds_chosen": 3.2779765129089355, "log_odds_ratio": -0.2741166055202484, "logits/chosen": -0.8339167833328247, "logits/rejected": -1.0127347707748413, "logps/chosen": -0.5744446516036987, "logps/rejected": -3.190849781036377, "loss": 0.9632, "nll_loss": 0.7816007137298584, "rewards/accuracies": 0.75, "rewards/chosen": -0.05744446814060211, "rewards/margins": 0.2616405189037323, "rewards/rejected": -0.3190849721431732, "step": 2793 }, { "epoch": 1.704438005185298, "grad_norm": 1.7288246154785156, "learning_rate": 5.2918554807103485e-06, "log_odds_chosen": 0.7464392185211182, "log_odds_ratio": -0.4983115792274475, "logits/chosen": -0.9470498561859131, "logits/rejected": -0.9362658262252808, "logps/chosen": -1.1111907958984375, "logps/rejected": -1.7091251611709595, "loss": 1.0885, "nll_loss": 1.2570648193359375, "rewards/accuracies": 0.625, "rewards/chosen": -0.1111190915107727, "rewards/margins": 0.05979343131184578, "rewards/rejected": -0.17091251909732819, "step": 2794 }, { "epoch": 1.705048040262315, "grad_norm": 1.5005249977111816, "learning_rate": 5.2908756889161054e-06, "log_odds_chosen": 1.4262232780456543, "log_odds_ratio": -0.3534543216228485, "logits/chosen": -0.7121496796607971, "logits/rejected": -0.7495580911636353, "logps/chosen": -0.8361847400665283, "logps/rejected": -1.8120675086975098, "loss": 0.9546, "nll_loss": 0.8359470367431641, "rewards/accuracies": 0.875, "rewards/chosen": -0.08361846953630447, "rewards/margins": 0.09758829325437546, "rewards/rejected": -0.18120676279067993, "step": 2795 }, { "epoch": 1.705658075339332, "grad_norm": 1.4432960748672485, "learning_rate": 5.2898958971218616e-06, "log_odds_chosen": 0.6071670055389404, "log_odds_ratio": -0.7734783887863159, "logits/chosen": -0.811576783657074, "logits/rejected": -0.9113979935646057, "logps/chosen": -0.9776575565338135, "logps/rejected": -1.571086049079895, "loss": 1.0021, "nll_loss": 1.0663690567016602, "rewards/accuracies": 0.375, "rewards/chosen": -0.09776575863361359, "rewards/margins": 0.05934286117553711, "rewards/rejected": -0.1571086198091507, "step": 2796 }, { "epoch": 1.706268110416349, "grad_norm": 1.475090742111206, "learning_rate": 5.288916105327618e-06, "log_odds_chosen": 2.3749098777770996, "log_odds_ratio": -0.44626057147979736, "logits/chosen": -0.7635031938552856, "logits/rejected": -0.9094822406768799, "logps/chosen": -0.6144595146179199, "logps/rejected": -2.451451301574707, "loss": 0.9024, "nll_loss": 0.6694636940956116, "rewards/accuracies": 0.625, "rewards/chosen": -0.06144595146179199, "rewards/margins": 0.18369919061660767, "rewards/rejected": -0.24514514207839966, "step": 2797 }, { "epoch": 1.706878145493366, "grad_norm": 1.7703474760055542, "learning_rate": 5.287936313533375e-06, "log_odds_chosen": 0.9408376216888428, "log_odds_ratio": -1.0142638683319092, "logits/chosen": -0.5266956090927124, "logits/rejected": -0.6412793397903442, "logps/chosen": -0.9970877766609192, "logps/rejected": -1.6913615465164185, "loss": 1.0158, "nll_loss": 0.9119540452957153, "rewards/accuracies": 0.625, "rewards/chosen": -0.09970878809690475, "rewards/margins": 0.06942737102508545, "rewards/rejected": -0.1691361516714096, "step": 2798 }, { "epoch": 1.7074881805703828, "grad_norm": 1.8408417701721191, "learning_rate": 5.28695652173913e-06, "log_odds_chosen": 0.24758988618850708, "log_odds_ratio": -0.6762844920158386, "logits/chosen": -0.6736915707588196, "logits/rejected": -0.7272194623947144, "logps/chosen": -1.1437568664550781, "logps/rejected": -1.2376621961593628, "loss": 1.2253, "nll_loss": 1.190023422241211, "rewards/accuracies": 0.5, "rewards/chosen": -0.11437568068504333, "rewards/margins": 0.009390540421009064, "rewards/rejected": -0.1237662136554718, "step": 2799 }, { "epoch": 1.7080982156473996, "grad_norm": 1.9495512247085571, "learning_rate": 5.285976729944886e-06, "log_odds_chosen": 2.4287643432617188, "log_odds_ratio": -0.3639467656612396, "logits/chosen": -0.7258942723274231, "logits/rejected": -1.0333694219589233, "logps/chosen": -0.8980337381362915, "logps/rejected": -2.9575936794281006, "loss": 1.196, "nll_loss": 1.1346995830535889, "rewards/accuracies": 0.75, "rewards/chosen": -0.08980338275432587, "rewards/margins": 0.20595599710941315, "rewards/rejected": -0.295759379863739, "step": 2800 }, { "epoch": 1.7087082507244167, "grad_norm": 1.1540052890777588, "learning_rate": 5.284996938150643e-06, "log_odds_chosen": 2.4627180099487305, "log_odds_ratio": -0.19352364540100098, "logits/chosen": -0.7092860341072083, "logits/rejected": -0.834132194519043, "logps/chosen": -0.6284768581390381, "logps/rejected": -2.3576955795288086, "loss": 1.1392, "nll_loss": 0.9293639063835144, "rewards/accuracies": 0.875, "rewards/chosen": -0.06284768134355545, "rewards/margins": 0.17292189598083496, "rewards/rejected": -0.23576956987380981, "step": 2801 }, { "epoch": 1.7093182858014337, "grad_norm": 1.6289503574371338, "learning_rate": 5.284017146356399e-06, "log_odds_chosen": 1.1065020561218262, "log_odds_ratio": -0.6348105669021606, "logits/chosen": -0.9315631985664368, "logits/rejected": -0.985313355922699, "logps/chosen": -0.9262785315513611, "logps/rejected": -1.8168284893035889, "loss": 1.1293, "nll_loss": 1.1542491912841797, "rewards/accuracies": 0.5, "rewards/chosen": -0.09262785315513611, "rewards/margins": 0.08905500918626785, "rewards/rejected": -0.18168285489082336, "step": 2802 }, { "epoch": 1.7099283208784506, "grad_norm": 1.195595145225525, "learning_rate": 5.283037354562155e-06, "log_odds_chosen": 0.11003702878952026, "log_odds_ratio": -0.9899422526359558, "logits/chosen": -0.8116940259933472, "logits/rejected": -0.8443759679794312, "logps/chosen": -1.2446445226669312, "logps/rejected": -1.2954903841018677, "loss": 1.0586, "nll_loss": 1.1523990631103516, "rewards/accuracies": 0.375, "rewards/chosen": -0.12446445226669312, "rewards/margins": 0.005084585398435593, "rewards/rejected": -0.1295490264892578, "step": 2803 }, { "epoch": 1.7105383559554674, "grad_norm": 1.5161741971969604, "learning_rate": 5.282057562767911e-06, "log_odds_chosen": 1.4467922449111938, "log_odds_ratio": -0.46233731508255005, "logits/chosen": -0.7170931100845337, "logits/rejected": -0.7769380807876587, "logps/chosen": -0.8042218089103699, "logps/rejected": -1.9380862712860107, "loss": 1.0491, "nll_loss": 1.0528349876403809, "rewards/accuracies": 0.625, "rewards/chosen": -0.08042219281196594, "rewards/margins": 0.11338643729686737, "rewards/rejected": -0.19380861520767212, "step": 2804 }, { "epoch": 1.7111483910324843, "grad_norm": 5.814593315124512, "learning_rate": 5.2810777709736675e-06, "log_odds_chosen": 0.8187794089317322, "log_odds_ratio": -0.5698307752609253, "logits/chosen": -1.0052783489227295, "logits/rejected": -0.9687407612800598, "logps/chosen": -1.0488879680633545, "logps/rejected": -1.6851012706756592, "loss": 1.1541, "nll_loss": 1.1345064640045166, "rewards/accuracies": 0.625, "rewards/chosen": -0.10488880425691605, "rewards/margins": 0.06362133473157883, "rewards/rejected": -0.16851013898849487, "step": 2805 }, { "epoch": 1.7117584261095011, "grad_norm": 1.725322961807251, "learning_rate": 5.280097979179424e-06, "log_odds_chosen": 0.26403236389160156, "log_odds_ratio": -0.721917986869812, "logits/chosen": -1.156463384628296, "logits/rejected": -1.0865873098373413, "logps/chosen": -1.4203035831451416, "logps/rejected": -1.6446597576141357, "loss": 1.3174, "nll_loss": 1.5763213634490967, "rewards/accuracies": 0.375, "rewards/chosen": -0.14203035831451416, "rewards/margins": 0.022435616701841354, "rewards/rejected": -0.1644659787416458, "step": 2806 }, { "epoch": 1.7123684611865182, "grad_norm": 2.534454345703125, "learning_rate": 5.279118187385181e-06, "log_odds_chosen": 0.8629266023635864, "log_odds_ratio": -0.4550536274909973, "logits/chosen": -0.6170623302459717, "logits/rejected": -0.8900168538093567, "logps/chosen": -0.7037513256072998, "logps/rejected": -1.1781139373779297, "loss": 0.9983, "nll_loss": 0.8830133080482483, "rewards/accuracies": 0.75, "rewards/chosen": -0.07037512958049774, "rewards/margins": 0.04743627458810806, "rewards/rejected": -0.1178114041686058, "step": 2807 }, { "epoch": 1.7129784962635353, "grad_norm": 1.3015272617340088, "learning_rate": 5.278138395590937e-06, "log_odds_chosen": 0.9223639369010925, "log_odds_ratio": -0.651095449924469, "logits/chosen": -0.7533515095710754, "logits/rejected": -0.7161415815353394, "logps/chosen": -0.8114967346191406, "logps/rejected": -1.558468222618103, "loss": 1.0963, "nll_loss": 0.9152194857597351, "rewards/accuracies": 0.625, "rewards/chosen": -0.08114967495203018, "rewards/margins": 0.07469714432954788, "rewards/rejected": -0.15584681928157806, "step": 2808 }, { "epoch": 1.7135885313405521, "grad_norm": 2.4923720359802246, "learning_rate": 5.277158603796694e-06, "log_odds_chosen": 0.029667675495147705, "log_odds_ratio": -0.7567608952522278, "logits/chosen": -1.0092180967330933, "logits/rejected": -0.9254239797592163, "logps/chosen": -1.0074090957641602, "logps/rejected": -1.1004900932312012, "loss": 1.0833, "nll_loss": 1.0981388092041016, "rewards/accuracies": 0.375, "rewards/chosen": -0.10074090957641602, "rewards/margins": 0.009308092296123505, "rewards/rejected": -0.11004900932312012, "step": 2809 }, { "epoch": 1.714198566417569, "grad_norm": 1.3110692501068115, "learning_rate": 5.276178812002449e-06, "log_odds_chosen": 0.9892473816871643, "log_odds_ratio": -0.4657340347766876, "logits/chosen": -0.930747389793396, "logits/rejected": -0.7749007940292358, "logps/chosen": -0.8628420829772949, "logps/rejected": -1.6639759540557861, "loss": 1.1589, "nll_loss": 1.1265745162963867, "rewards/accuracies": 0.875, "rewards/chosen": -0.08628420531749725, "rewards/margins": 0.08011339604854584, "rewards/rejected": -0.1663976013660431, "step": 2810 }, { "epoch": 1.7148086014945858, "grad_norm": 1.4606083631515503, "learning_rate": 5.275199020208205e-06, "log_odds_chosen": 0.7685062289237976, "log_odds_ratio": -0.5451632142066956, "logits/chosen": -1.0425753593444824, "logits/rejected": -0.9922940135002136, "logps/chosen": -1.0820953845977783, "logps/rejected": -1.6814429759979248, "loss": 1.1071, "nll_loss": 1.2316515445709229, "rewards/accuracies": 0.5, "rewards/chosen": -0.10820954293012619, "rewards/margins": 0.059934765100479126, "rewards/rejected": -0.1681443154811859, "step": 2811 }, { "epoch": 1.7154186365716029, "grad_norm": 8.314311027526855, "learning_rate": 5.274219228413962e-06, "log_odds_chosen": 1.9187124967575073, "log_odds_ratio": -0.34191247820854187, "logits/chosen": -0.718193769454956, "logits/rejected": -0.8661776185035706, "logps/chosen": -0.8331862092018127, "logps/rejected": -2.3493783473968506, "loss": 1.1884, "nll_loss": 0.9773688912391663, "rewards/accuracies": 0.75, "rewards/chosen": -0.08331862092018127, "rewards/margins": 0.15161922574043274, "rewards/rejected": -0.234937846660614, "step": 2812 }, { "epoch": 1.71602867164862, "grad_norm": 1.8490289449691772, "learning_rate": 5.273239436619718e-06, "log_odds_chosen": 0.8851927518844604, "log_odds_ratio": -0.4916200339794159, "logits/chosen": -1.041567325592041, "logits/rejected": -1.126624584197998, "logps/chosen": -0.8991339802742004, "logps/rejected": -1.4499027729034424, "loss": 1.0881, "nll_loss": 1.0668612718582153, "rewards/accuracies": 0.625, "rewards/chosen": -0.08991339802742004, "rewards/margins": 0.055076874792575836, "rewards/rejected": -0.14499028027057648, "step": 2813 }, { "epoch": 1.7166387067256368, "grad_norm": 5.786545276641846, "learning_rate": 5.272259644825474e-06, "log_odds_chosen": 0.49176520109176636, "log_odds_ratio": -0.5569891929626465, "logits/chosen": -1.0999658107757568, "logits/rejected": -1.0818406343460083, "logps/chosen": -1.0667694807052612, "logps/rejected": -1.5039561986923218, "loss": 1.2495, "nll_loss": 1.2943506240844727, "rewards/accuracies": 0.5, "rewards/chosen": -0.10667693614959717, "rewards/margins": 0.04371868073940277, "rewards/rejected": -0.15039561688899994, "step": 2814 }, { "epoch": 1.7172487418026536, "grad_norm": 1.9854822158813477, "learning_rate": 5.271279853031231e-06, "log_odds_chosen": 0.8465070724487305, "log_odds_ratio": -0.5210913419723511, "logits/chosen": -0.8597835302352905, "logits/rejected": -0.9013993144035339, "logps/chosen": -0.8382281064987183, "logps/rejected": -1.424358606338501, "loss": 0.9632, "nll_loss": 0.9665998220443726, "rewards/accuracies": 0.75, "rewards/chosen": -0.0838228166103363, "rewards/margins": 0.05861304700374603, "rewards/rejected": -0.14243584871292114, "step": 2815 }, { "epoch": 1.7178587768796705, "grad_norm": 1.8281813859939575, "learning_rate": 5.2703000612369866e-06, "log_odds_chosen": 1.8811135292053223, "log_odds_ratio": -0.4639696478843689, "logits/chosen": -0.5590851306915283, "logits/rejected": -0.7583044767379761, "logps/chosen": -0.8224055767059326, "logps/rejected": -2.257131576538086, "loss": 1.2035, "nll_loss": 1.1285701990127563, "rewards/accuracies": 0.75, "rewards/chosen": -0.08224056661128998, "rewards/margins": 0.1434726119041443, "rewards/rejected": -0.22571316361427307, "step": 2816 }, { "epoch": 1.7184688119566875, "grad_norm": 1.1853227615356445, "learning_rate": 5.269320269442743e-06, "log_odds_chosen": 0.235669806599617, "log_odds_ratio": -0.6823386549949646, "logits/chosen": -0.8578324913978577, "logits/rejected": -0.7622467875480652, "logps/chosen": -0.7528314590454102, "logps/rejected": -0.8985012769699097, "loss": 1.0973, "nll_loss": 1.0740337371826172, "rewards/accuracies": 0.5, "rewards/chosen": -0.07528315484523773, "rewards/margins": 0.014566978439688683, "rewards/rejected": -0.08985012769699097, "step": 2817 }, { "epoch": 1.7190788470337044, "grad_norm": 2.6349751949310303, "learning_rate": 5.2683404776485e-06, "log_odds_chosen": 1.1694238185882568, "log_odds_ratio": -0.5572564005851746, "logits/chosen": -0.6278109550476074, "logits/rejected": -0.6767251491546631, "logps/chosen": -0.8057659268379211, "logps/rejected": -1.6008532047271729, "loss": 1.1783, "nll_loss": 0.9200262427330017, "rewards/accuracies": 0.5, "rewards/chosen": -0.08057659864425659, "rewards/margins": 0.07950873672962189, "rewards/rejected": -0.16008533537387848, "step": 2818 }, { "epoch": 1.7196888821107215, "grad_norm": 2.0657408237457275, "learning_rate": 5.267360685854256e-06, "log_odds_chosen": 1.1847703456878662, "log_odds_ratio": -0.5104296803474426, "logits/chosen": -0.7409685254096985, "logits/rejected": -0.731627345085144, "logps/chosen": -0.9257452487945557, "logps/rejected": -1.887953758239746, "loss": 1.0368, "nll_loss": 1.1665871143341064, "rewards/accuracies": 0.625, "rewards/chosen": -0.09257452934980392, "rewards/margins": 0.09622084349393845, "rewards/rejected": -0.18879537284374237, "step": 2819 }, { "epoch": 1.7202989171877383, "grad_norm": 2.023468017578125, "learning_rate": 5.266380894060012e-06, "log_odds_chosen": 1.2268694639205933, "log_odds_ratio": -0.4445848762989044, "logits/chosen": -0.8469048738479614, "logits/rejected": -0.8427537083625793, "logps/chosen": -0.9097914099693298, "logps/rejected": -1.838196873664856, "loss": 1.1057, "nll_loss": 1.0781970024108887, "rewards/accuracies": 0.875, "rewards/chosen": -0.09097914397716522, "rewards/margins": 0.09284055978059769, "rewards/rejected": -0.1838196963071823, "step": 2820 }, { "epoch": 1.7209089522647552, "grad_norm": 1.6516622304916382, "learning_rate": 5.265401102265769e-06, "log_odds_chosen": 2.634082794189453, "log_odds_ratio": -0.4224047064781189, "logits/chosen": -0.7360752820968628, "logits/rejected": -0.8844661712646484, "logps/chosen": -1.0850324630737305, "logps/rejected": -3.34854793548584, "loss": 1.1969, "nll_loss": 1.1142228841781616, "rewards/accuracies": 0.75, "rewards/chosen": -0.10850325226783752, "rewards/margins": 0.2263515144586563, "rewards/rejected": -0.33485478162765503, "step": 2821 }, { "epoch": 1.721518987341772, "grad_norm": 1.0365216732025146, "learning_rate": 5.264421310471524e-06, "log_odds_chosen": 0.847777783870697, "log_odds_ratio": -0.5827749967575073, "logits/chosen": -0.7813401222229004, "logits/rejected": -0.8518154621124268, "logps/chosen": -0.7441768646240234, "logps/rejected": -1.277956247329712, "loss": 1.0582, "nll_loss": 1.1001709699630737, "rewards/accuracies": 0.5, "rewards/chosen": -0.07441769540309906, "rewards/margins": 0.05337793380022049, "rewards/rejected": -0.12779562175273895, "step": 2822 }, { "epoch": 1.722129022418789, "grad_norm": 13.145355224609375, "learning_rate": 5.263441518677281e-06, "log_odds_chosen": 1.5109546184539795, "log_odds_ratio": -0.3435021936893463, "logits/chosen": -0.2629179060459137, "logits/rejected": -0.6228183507919312, "logps/chosen": -0.8405666351318359, "logps/rejected": -1.8628075122833252, "loss": 1.0333, "nll_loss": 0.9268733263015747, "rewards/accuracies": 0.875, "rewards/chosen": -0.08405666798353195, "rewards/margins": 0.10222408920526505, "rewards/rejected": -0.186280757188797, "step": 2823 }, { "epoch": 1.7227390574958061, "grad_norm": 1.1433790922164917, "learning_rate": 5.262461726883037e-06, "log_odds_chosen": 0.17356190085411072, "log_odds_ratio": -0.7787324786186218, "logits/chosen": -0.7434707880020142, "logits/rejected": -0.6213522553443909, "logps/chosen": -0.8032696843147278, "logps/rejected": -0.9296444654464722, "loss": 0.9988, "nll_loss": 0.868262529373169, "rewards/accuracies": 0.5, "rewards/chosen": -0.08032697439193726, "rewards/margins": 0.01263747364282608, "rewards/rejected": -0.09296444058418274, "step": 2824 }, { "epoch": 1.723349092572823, "grad_norm": 2.4804463386535645, "learning_rate": 5.261481935088793e-06, "log_odds_chosen": 1.6021851301193237, "log_odds_ratio": -0.3710736632347107, "logits/chosen": -0.9262493848800659, "logits/rejected": -0.9533915519714355, "logps/chosen": -0.8656114935874939, "logps/rejected": -2.019618511199951, "loss": 1.0001, "nll_loss": 1.2536007165908813, "rewards/accuracies": 0.75, "rewards/chosen": -0.08656114339828491, "rewards/margins": 0.11540068686008453, "rewards/rejected": -0.20196183025836945, "step": 2825 }, { "epoch": 1.7239591276498398, "grad_norm": 4.554933071136475, "learning_rate": 5.26050214329455e-06, "log_odds_chosen": 0.4779738783836365, "log_odds_ratio": -0.5982098579406738, "logits/chosen": -0.8582546710968018, "logits/rejected": -0.7930506467819214, "logps/chosen": -0.8882185220718384, "logps/rejected": -1.243881344795227, "loss": 1.0625, "nll_loss": 1.0273722410202026, "rewards/accuracies": 0.625, "rewards/chosen": -0.08882184326648712, "rewards/margins": 0.0355662927031517, "rewards/rejected": -0.12438814342021942, "step": 2826 }, { "epoch": 1.7245691627268567, "grad_norm": 2.322932004928589, "learning_rate": 5.259522351500306e-06, "log_odds_chosen": 1.3013434410095215, "log_odds_ratio": -0.5718357563018799, "logits/chosen": -0.8188011646270752, "logits/rejected": -0.750889003276825, "logps/chosen": -0.9182877540588379, "logps/rejected": -1.980238676071167, "loss": 1.1039, "nll_loss": 1.0482752323150635, "rewards/accuracies": 0.625, "rewards/chosen": -0.09182877838611603, "rewards/margins": 0.10619509220123291, "rewards/rejected": -0.19802387058734894, "step": 2827 }, { "epoch": 1.7251791978038737, "grad_norm": 2.142857313156128, "learning_rate": 5.258542559706062e-06, "log_odds_chosen": 2.548672914505005, "log_odds_ratio": -0.3406829833984375, "logits/chosen": -0.779271125793457, "logits/rejected": -0.7405529022216797, "logps/chosen": -0.6613498330116272, "logps/rejected": -2.796997547149658, "loss": 1.0552, "nll_loss": 1.0315511226654053, "rewards/accuracies": 0.75, "rewards/chosen": -0.0661349892616272, "rewards/margins": 0.21356476843357086, "rewards/rejected": -0.27969974279403687, "step": 2828 }, { "epoch": 1.7257892328808908, "grad_norm": 8.210741996765137, "learning_rate": 5.257562767911819e-06, "log_odds_chosen": 1.6405725479125977, "log_odds_ratio": -0.4568236470222473, "logits/chosen": -0.7882771492004395, "logits/rejected": -0.7662296295166016, "logps/chosen": -1.0921710729599, "logps/rejected": -2.5352869033813477, "loss": 0.9416, "nll_loss": 1.0882009267807007, "rewards/accuracies": 0.75, "rewards/chosen": -0.10921711474657059, "rewards/margins": 0.1443115919828415, "rewards/rejected": -0.2535287141799927, "step": 2829 }, { "epoch": 1.7263992679579077, "grad_norm": 1.2048274278640747, "learning_rate": 5.256582976117575e-06, "log_odds_chosen": 0.2470969259738922, "log_odds_ratio": -0.6515891551971436, "logits/chosen": -0.7366290092468262, "logits/rejected": -0.629479706287384, "logps/chosen": -0.9538973569869995, "logps/rejected": -1.0751140117645264, "loss": 1.1548, "nll_loss": 0.9796732664108276, "rewards/accuracies": 0.375, "rewards/chosen": -0.09538973122835159, "rewards/margins": 0.012121673673391342, "rewards/rejected": -0.10751140862703323, "step": 2830 }, { "epoch": 1.7270093030349245, "grad_norm": 1.7921929359436035, "learning_rate": 5.255603184323331e-06, "log_odds_chosen": 1.4574832916259766, "log_odds_ratio": -0.578372597694397, "logits/chosen": -0.7065088748931885, "logits/rejected": -0.7270188927650452, "logps/chosen": -1.1108324527740479, "logps/rejected": -2.385547161102295, "loss": 1.1117, "nll_loss": 1.2441824674606323, "rewards/accuracies": 0.5, "rewards/chosen": -0.1110832542181015, "rewards/margins": 0.12747147679328918, "rewards/rejected": -0.23855473101139069, "step": 2831 }, { "epoch": 1.7276193381119413, "grad_norm": 1.43605637550354, "learning_rate": 5.254623392529088e-06, "log_odds_chosen": 1.4291878938674927, "log_odds_ratio": -0.4515186548233032, "logits/chosen": -0.7556346654891968, "logits/rejected": -0.8299382925033569, "logps/chosen": -0.808157205581665, "logps/rejected": -1.9107742309570312, "loss": 1.1715, "nll_loss": 0.9400620460510254, "rewards/accuracies": 0.625, "rewards/chosen": -0.08081571757793427, "rewards/margins": 0.1102616935968399, "rewards/rejected": -0.19107742607593536, "step": 2832 }, { "epoch": 1.7282293731889584, "grad_norm": 1.3771392107009888, "learning_rate": 5.253643600734843e-06, "log_odds_chosen": 1.2734261751174927, "log_odds_ratio": -0.42987164855003357, "logits/chosen": -0.8029624819755554, "logits/rejected": -0.7046810388565063, "logps/chosen": -0.9173661470413208, "logps/rejected": -1.9026124477386475, "loss": 1.0967, "nll_loss": 0.9971457123756409, "rewards/accuracies": 0.875, "rewards/chosen": -0.09173661470413208, "rewards/margins": 0.09852464497089386, "rewards/rejected": -0.19026124477386475, "step": 2833 }, { "epoch": 1.7288394082659753, "grad_norm": 2.336082696914673, "learning_rate": 5.252663808940599e-06, "log_odds_chosen": 0.7343360185623169, "log_odds_ratio": -0.5371416807174683, "logits/chosen": -0.8485663533210754, "logits/rejected": -0.8170729875564575, "logps/chosen": -1.063692569732666, "logps/rejected": -1.6330595016479492, "loss": 1.191, "nll_loss": 1.2712193727493286, "rewards/accuracies": 0.75, "rewards/chosen": -0.1063692644238472, "rewards/margins": 0.05693669244647026, "rewards/rejected": -0.16330595314502716, "step": 2834 }, { "epoch": 1.7294494433429923, "grad_norm": 1.1361052989959717, "learning_rate": 5.251684017146356e-06, "log_odds_chosen": 0.9869979619979858, "log_odds_ratio": -0.655692458152771, "logits/chosen": -0.5236486196517944, "logits/rejected": -0.5289434790611267, "logps/chosen": -0.882683515548706, "logps/rejected": -1.6556620597839355, "loss": 0.9819, "nll_loss": 0.9163020849227905, "rewards/accuracies": 0.625, "rewards/chosen": -0.08826835453510284, "rewards/margins": 0.07729785144329071, "rewards/rejected": -0.16556620597839355, "step": 2835 }, { "epoch": 1.7300594784200092, "grad_norm": 3.4448633193969727, "learning_rate": 5.250704225352112e-06, "log_odds_chosen": 2.132436752319336, "log_odds_ratio": -0.34389784932136536, "logits/chosen": -0.6596208214759827, "logits/rejected": -0.6892653703689575, "logps/chosen": -0.731278657913208, "logps/rejected": -2.3110947608947754, "loss": 0.9277, "nll_loss": 0.8630212545394897, "rewards/accuracies": 0.75, "rewards/chosen": -0.0731278657913208, "rewards/margins": 0.15798160433769226, "rewards/rejected": -0.23110947012901306, "step": 2836 }, { "epoch": 1.730669513497026, "grad_norm": 2.2485315799713135, "learning_rate": 5.249724433557869e-06, "log_odds_chosen": 0.4680759906768799, "log_odds_ratio": -0.6478707790374756, "logits/chosen": -0.841579794883728, "logits/rejected": -0.8975107073783875, "logps/chosen": -1.0729634761810303, "logps/rejected": -1.4860451221466064, "loss": 1.1073, "nll_loss": 1.2271710634231567, "rewards/accuracies": 0.625, "rewards/chosen": -0.10729634761810303, "rewards/margins": 0.04130816459655762, "rewards/rejected": -0.14860451221466064, "step": 2837 }, { "epoch": 1.7312795485740429, "grad_norm": 1.0632843971252441, "learning_rate": 5.2487446417636255e-06, "log_odds_chosen": 1.0125677585601807, "log_odds_ratio": -0.6195001006126404, "logits/chosen": -0.6580902934074402, "logits/rejected": -0.6816972494125366, "logps/chosen": -0.9980877637863159, "logps/rejected": -1.6593117713928223, "loss": 1.0643, "nll_loss": 1.1928088665008545, "rewards/accuracies": 0.75, "rewards/chosen": -0.09980877488851547, "rewards/margins": 0.0661223977804184, "rewards/rejected": -0.16593118011951447, "step": 2838 }, { "epoch": 1.73188958365106, "grad_norm": 3.9720523357391357, "learning_rate": 5.247764849969381e-06, "log_odds_chosen": 0.5233697891235352, "log_odds_ratio": -0.6665458083152771, "logits/chosen": -0.9562082290649414, "logits/rejected": -0.8952009677886963, "logps/chosen": -0.9201028943061829, "logps/rejected": -1.2149852514266968, "loss": 1.0212, "nll_loss": 1.0626826286315918, "rewards/accuracies": 0.5, "rewards/chosen": -0.09201028943061829, "rewards/margins": 0.02948824129998684, "rewards/rejected": -0.12149852514266968, "step": 2839 }, { "epoch": 1.732499618728077, "grad_norm": 1.4964076280593872, "learning_rate": 5.246785058175138e-06, "log_odds_chosen": 1.954458236694336, "log_odds_ratio": -0.2698972821235657, "logits/chosen": -0.6122575998306274, "logits/rejected": -0.6112472414970398, "logps/chosen": -0.6993948221206665, "logps/rejected": -2.0709288120269775, "loss": 1.0117, "nll_loss": 0.9375929832458496, "rewards/accuracies": 0.875, "rewards/chosen": -0.06993948668241501, "rewards/margins": 0.13715338706970215, "rewards/rejected": -0.20709288120269775, "step": 2840 }, { "epoch": 1.7331096538050939, "grad_norm": 1.5732942819595337, "learning_rate": 5.245805266380894e-06, "log_odds_chosen": 1.6438674926757812, "log_odds_ratio": -0.35952824354171753, "logits/chosen": -0.697788417339325, "logits/rejected": -0.6628966331481934, "logps/chosen": -0.7170608639717102, "logps/rejected": -1.8957490921020508, "loss": 1.033, "nll_loss": 0.9321151971817017, "rewards/accuracies": 0.875, "rewards/chosen": -0.07170609384775162, "rewards/margins": 0.1178688257932663, "rewards/rejected": -0.18957491219043732, "step": 2841 }, { "epoch": 1.7337196888821107, "grad_norm": 1.1505569219589233, "learning_rate": 5.24482547458665e-06, "log_odds_chosen": 0.08588597178459167, "log_odds_ratio": -0.719416618347168, "logits/chosen": -0.7519586682319641, "logits/rejected": -0.7265371084213257, "logps/chosen": -1.1214313507080078, "logps/rejected": -1.173675298690796, "loss": 1.1868, "nll_loss": 1.0970721244812012, "rewards/accuracies": 0.5, "rewards/chosen": -0.1121431440114975, "rewards/margins": 0.005224378779530525, "rewards/rejected": -0.11736752092838287, "step": 2842 }, { "epoch": 1.7343297239591275, "grad_norm": 3.1352124214172363, "learning_rate": 5.243845682792407e-06, "log_odds_chosen": 0.015780285000801086, "log_odds_ratio": -0.7333073616027832, "logits/chosen": -0.8062366247177124, "logits/rejected": -0.799223005771637, "logps/chosen": -1.0132360458374023, "logps/rejected": -0.9584834575653076, "loss": 1.0817, "nll_loss": 1.1590335369110107, "rewards/accuracies": 0.375, "rewards/chosen": -0.10132358968257904, "rewards/margins": -0.005475259386003017, "rewards/rejected": -0.09584833681583405, "step": 2843 }, { "epoch": 1.7349397590361446, "grad_norm": 1.460900902748108, "learning_rate": 5.242865890998162e-06, "log_odds_chosen": 1.593283772468567, "log_odds_ratio": -0.4162641167640686, "logits/chosen": -0.5818689465522766, "logits/rejected": -0.6473700404167175, "logps/chosen": -0.6095595359802246, "logps/rejected": -1.804094910621643, "loss": 1.0449, "nll_loss": 0.8968101143836975, "rewards/accuracies": 0.75, "rewards/chosen": -0.0609559528529644, "rewards/margins": 0.1194535493850708, "rewards/rejected": -0.1804094910621643, "step": 2844 }, { "epoch": 1.7355497941131615, "grad_norm": 1.631714105606079, "learning_rate": 5.241886099203918e-06, "log_odds_chosen": 1.7030119895935059, "log_odds_ratio": -0.3570600748062134, "logits/chosen": -0.896950900554657, "logits/rejected": -0.934830367565155, "logps/chosen": -1.1769206523895264, "logps/rejected": -2.533618450164795, "loss": 1.1041, "nll_loss": 1.2717725038528442, "rewards/accuracies": 0.875, "rewards/chosen": -0.11769207566976547, "rewards/margins": 0.1356697678565979, "rewards/rejected": -0.25336185097694397, "step": 2845 }, { "epoch": 1.7361598291901785, "grad_norm": 1.2996251583099365, "learning_rate": 5.240906307409675e-06, "log_odds_chosen": 2.129690170288086, "log_odds_ratio": -0.2848163843154907, "logits/chosen": -0.6099296808242798, "logits/rejected": -0.6982994675636292, "logps/chosen": -0.7489544153213501, "logps/rejected": -2.3278002738952637, "loss": 1.0598, "nll_loss": 0.8858548402786255, "rewards/accuracies": 0.875, "rewards/chosen": -0.07489543408155441, "rewards/margins": 0.1578845977783203, "rewards/rejected": -0.23278003931045532, "step": 2846 }, { "epoch": 1.7367698642671954, "grad_norm": 2.513396739959717, "learning_rate": 5.2399265156154314e-06, "log_odds_chosen": 1.5385510921478271, "log_odds_ratio": -0.594028890132904, "logits/chosen": -0.7318214774131775, "logits/rejected": -0.5997973680496216, "logps/chosen": -0.951115608215332, "logps/rejected": -2.2230851650238037, "loss": 1.138, "nll_loss": 1.0400751829147339, "rewards/accuracies": 0.625, "rewards/chosen": -0.09511157125234604, "rewards/margins": 0.12719695270061493, "rewards/rejected": -0.22230851650238037, "step": 2847 }, { "epoch": 1.7373798993442122, "grad_norm": 1.775207281112671, "learning_rate": 5.2389467238211876e-06, "log_odds_chosen": 1.2084457874298096, "log_odds_ratio": -0.475993812084198, "logits/chosen": -0.5215975046157837, "logits/rejected": -0.6986162662506104, "logps/chosen": -0.7716225981712341, "logps/rejected": -1.4867123365402222, "loss": 1.014, "nll_loss": 1.0986344814300537, "rewards/accuracies": 0.75, "rewards/chosen": -0.0771622583270073, "rewards/margins": 0.0715089738368988, "rewards/rejected": -0.1486712396144867, "step": 2848 }, { "epoch": 1.737989934421229, "grad_norm": 1.3763047456741333, "learning_rate": 5.2379669320269445e-06, "log_odds_chosen": 1.7935734987258911, "log_odds_ratio": -0.4433932602405548, "logits/chosen": -0.730626106262207, "logits/rejected": -0.5854634046554565, "logps/chosen": -0.8236598968505859, "logps/rejected": -2.0377392768859863, "loss": 1.0844, "nll_loss": 0.9870719313621521, "rewards/accuracies": 0.625, "rewards/chosen": -0.0823659896850586, "rewards/margins": 0.12140792608261108, "rewards/rejected": -0.20377391576766968, "step": 2849 }, { "epoch": 1.7385999694982461, "grad_norm": 1.3015775680541992, "learning_rate": 5.2369871402327e-06, "log_odds_chosen": 1.5654538869857788, "log_odds_ratio": -0.40367013216018677, "logits/chosen": -0.657669186592102, "logits/rejected": -0.7343337535858154, "logps/chosen": -0.6543533802032471, "logps/rejected": -1.6544438600540161, "loss": 0.9769, "nll_loss": 0.982063889503479, "rewards/accuracies": 0.75, "rewards/chosen": -0.06543533504009247, "rewards/margins": 0.10000903159379959, "rewards/rejected": -0.16544437408447266, "step": 2850 }, { "epoch": 1.7392100045752632, "grad_norm": 3.234314203262329, "learning_rate": 5.236007348438457e-06, "log_odds_chosen": 0.4668397605419159, "log_odds_ratio": -0.6646698713302612, "logits/chosen": -0.9008992910385132, "logits/rejected": -0.8726464509963989, "logps/chosen": -1.0095393657684326, "logps/rejected": -1.2791457176208496, "loss": 1.0744, "nll_loss": 1.2466506958007812, "rewards/accuracies": 0.625, "rewards/chosen": -0.10095393657684326, "rewards/margins": 0.02696063183248043, "rewards/rejected": -0.12791456282138824, "step": 2851 }, { "epoch": 1.73982003965228, "grad_norm": 1.1505566835403442, "learning_rate": 5.235027556644213e-06, "log_odds_chosen": 2.257448673248291, "log_odds_ratio": -0.2637196481227875, "logits/chosen": -0.6071388721466064, "logits/rejected": -0.821049690246582, "logps/chosen": -0.6864190101623535, "logps/rejected": -2.4336633682250977, "loss": 1.0051, "nll_loss": 0.8617395162582397, "rewards/accuracies": 1.0, "rewards/chosen": -0.06864190101623535, "rewards/margins": 0.17472445964813232, "rewards/rejected": -0.24336636066436768, "step": 2852 }, { "epoch": 1.740430074729297, "grad_norm": 1.5693644285202026, "learning_rate": 5.234047764849969e-06, "log_odds_chosen": 1.3215208053588867, "log_odds_ratio": -0.40064531564712524, "logits/chosen": -1.065708875656128, "logits/rejected": -0.7777706384658813, "logps/chosen": -1.1777154207229614, "logps/rejected": -2.3292417526245117, "loss": 1.1516, "nll_loss": 1.300482153892517, "rewards/accuracies": 0.875, "rewards/chosen": -0.11777153611183167, "rewards/margins": 0.11515264958143234, "rewards/rejected": -0.2329241931438446, "step": 2853 }, { "epoch": 1.7410401098063137, "grad_norm": 1.4753046035766602, "learning_rate": 5.233067973055726e-06, "log_odds_chosen": 1.2268892526626587, "log_odds_ratio": -0.34843066334724426, "logits/chosen": -0.5398301482200623, "logits/rejected": -0.5980101227760315, "logps/chosen": -0.7978343963623047, "logps/rejected": -1.6558266878128052, "loss": 0.9516, "nll_loss": 0.8303232192993164, "rewards/accuracies": 0.875, "rewards/chosen": -0.07978343963623047, "rewards/margins": 0.08579923212528229, "rewards/rejected": -0.16558267176151276, "step": 2854 }, { "epoch": 1.7416501448833308, "grad_norm": 1.0616084337234497, "learning_rate": 5.232088181261482e-06, "log_odds_chosen": 2.035031318664551, "log_odds_ratio": -0.5578932762145996, "logits/chosen": -0.9925424456596375, "logits/rejected": -0.8530890345573425, "logps/chosen": -0.9609582424163818, "logps/rejected": -2.869859218597412, "loss": 1.2049, "nll_loss": 1.156950831413269, "rewards/accuracies": 0.5, "rewards/chosen": -0.09609582275152206, "rewards/margins": 0.1908901035785675, "rewards/rejected": -0.28698593378067017, "step": 2855 }, { "epoch": 1.7422601799603479, "grad_norm": 1.1992548704147339, "learning_rate": 5.231108389467237e-06, "log_odds_chosen": 0.7620038986206055, "log_odds_ratio": -0.45913198590278625, "logits/chosen": -0.8407225608825684, "logits/rejected": -0.6873990893363953, "logps/chosen": -0.8480520248413086, "logps/rejected": -1.3228802680969238, "loss": 1.0786, "nll_loss": 1.0376062393188477, "rewards/accuracies": 0.875, "rewards/chosen": -0.0848052054643631, "rewards/margins": 0.04748282954096794, "rewards/rejected": -0.13228803873062134, "step": 2856 }, { "epoch": 1.7428702150373647, "grad_norm": 1.4948838949203491, "learning_rate": 5.230128597672994e-06, "log_odds_chosen": 2.231945753097534, "log_odds_ratio": -0.28636011481285095, "logits/chosen": -0.7282116413116455, "logits/rejected": -0.8546703457832336, "logps/chosen": -0.6856443881988525, "logps/rejected": -2.2128007411956787, "loss": 0.9955, "nll_loss": 1.0387738943099976, "rewards/accuracies": 0.75, "rewards/chosen": -0.06856444478034973, "rewards/margins": 0.15271563827991486, "rewards/rejected": -0.2212800830602646, "step": 2857 }, { "epoch": 1.7434802501143816, "grad_norm": 1.1596713066101074, "learning_rate": 5.2291488058787505e-06, "log_odds_chosen": 0.20626266300678253, "log_odds_ratio": -0.7060211896896362, "logits/chosen": -1.0271977186203003, "logits/rejected": -0.9624778032302856, "logps/chosen": -1.010383129119873, "logps/rejected": -1.1856292486190796, "loss": 1.0397, "nll_loss": 1.1065372228622437, "rewards/accuracies": 0.5, "rewards/chosen": -0.10103832185268402, "rewards/margins": 0.017524614930152893, "rewards/rejected": -0.11856293678283691, "step": 2858 }, { "epoch": 1.7440902851913984, "grad_norm": 4.224849700927734, "learning_rate": 5.228169014084507e-06, "log_odds_chosen": 3.186093807220459, "log_odds_ratio": -0.17276419699192047, "logits/chosen": -0.7219542264938354, "logits/rejected": -0.6836525201797485, "logps/chosen": -0.6549166440963745, "logps/rejected": -3.020735263824463, "loss": 1.0486, "nll_loss": 0.7463926076889038, "rewards/accuracies": 1.0, "rewards/chosen": -0.06549166887998581, "rewards/margins": 0.23658187687397003, "rewards/rejected": -0.30207353830337524, "step": 2859 }, { "epoch": 1.7447003202684155, "grad_norm": 1.3903557062149048, "learning_rate": 5.2271892222902636e-06, "log_odds_chosen": 2.1337454319000244, "log_odds_ratio": -0.46732330322265625, "logits/chosen": -0.9239389896392822, "logits/rejected": -0.6729450225830078, "logps/chosen": -0.7829688787460327, "logps/rejected": -2.656906843185425, "loss": 1.0169, "nll_loss": 0.916033148765564, "rewards/accuracies": 0.875, "rewards/chosen": -0.07829688489437103, "rewards/margins": 0.18739379942417145, "rewards/rejected": -0.2656906843185425, "step": 2860 }, { "epoch": 1.7453103553454323, "grad_norm": 4.554537296295166, "learning_rate": 5.226209430496019e-06, "log_odds_chosen": 0.4678671956062317, "log_odds_ratio": -0.7290868759155273, "logits/chosen": -0.7495357394218445, "logits/rejected": -0.7789904475212097, "logps/chosen": -0.9878890514373779, "logps/rejected": -1.3810454607009888, "loss": 1.1477, "nll_loss": 1.08686363697052, "rewards/accuracies": 0.375, "rewards/chosen": -0.09878890961408615, "rewards/margins": 0.03931563347578049, "rewards/rejected": -0.13810452818870544, "step": 2861 }, { "epoch": 1.7459203904224494, "grad_norm": 2.443755626678467, "learning_rate": 5.225229638701775e-06, "log_odds_chosen": 2.808471202850342, "log_odds_ratio": -0.3491913080215454, "logits/chosen": -0.7442360520362854, "logits/rejected": -0.8464252948760986, "logps/chosen": -0.6926993727684021, "logps/rejected": -2.8613901138305664, "loss": 1.0689, "nll_loss": 1.1259968280792236, "rewards/accuracies": 0.875, "rewards/chosen": -0.06926993280649185, "rewards/margins": 0.2168690711259842, "rewards/rejected": -0.28613901138305664, "step": 2862 }, { "epoch": 1.7465304254994662, "grad_norm": 3.9234964847564697, "learning_rate": 5.224249846907532e-06, "log_odds_chosen": 1.7468862533569336, "log_odds_ratio": -0.35084137320518494, "logits/chosen": -0.5506841540336609, "logits/rejected": -0.6945980191230774, "logps/chosen": -0.5979211330413818, "logps/rejected": -1.8207846879959106, "loss": 0.9768, "nll_loss": 0.7503006458282471, "rewards/accuracies": 0.75, "rewards/chosen": -0.059792112559080124, "rewards/margins": 0.122286356985569, "rewards/rejected": -0.18207845091819763, "step": 2863 }, { "epoch": 1.747140460576483, "grad_norm": 1.093285322189331, "learning_rate": 5.223270055113288e-06, "log_odds_chosen": 2.371424436569214, "log_odds_ratio": -0.5409382581710815, "logits/chosen": -0.9308104515075684, "logits/rejected": -0.8965738415718079, "logps/chosen": -1.0018759965896606, "logps/rejected": -3.1604158878326416, "loss": 1.0136, "nll_loss": 1.2189664840698242, "rewards/accuracies": 0.5, "rewards/chosen": -0.10018760710954666, "rewards/margins": 0.2158540040254593, "rewards/rejected": -0.31604158878326416, "step": 2864 }, { "epoch": 1.7477504956535, "grad_norm": 1.892634391784668, "learning_rate": 5.222290263319045e-06, "log_odds_chosen": 1.3598543405532837, "log_odds_ratio": -0.4721706807613373, "logits/chosen": -0.9680887460708618, "logits/rejected": -0.8537699580192566, "logps/chosen": -1.0037171840667725, "logps/rejected": -2.115060806274414, "loss": 1.0978, "nll_loss": 1.1736202239990234, "rewards/accuracies": 0.75, "rewards/chosen": -0.10037171840667725, "rewards/margins": 0.1111343652009964, "rewards/rejected": -0.21150608360767365, "step": 2865 }, { "epoch": 1.748360530730517, "grad_norm": 1.3740370273590088, "learning_rate": 5.221310471524801e-06, "log_odds_chosen": 1.081595778465271, "log_odds_ratio": -0.5965090990066528, "logits/chosen": -0.6784170269966125, "logits/rejected": -0.7462942600250244, "logps/chosen": -0.5882166624069214, "logps/rejected": -1.3521255254745483, "loss": 1.0062, "nll_loss": 0.9870063066482544, "rewards/accuracies": 0.5, "rewards/chosen": -0.0588216707110405, "rewards/margins": 0.07639087736606598, "rewards/rejected": -0.13521254062652588, "step": 2866 }, { "epoch": 1.748970565807534, "grad_norm": 1.0485466718673706, "learning_rate": 5.220330679730556e-06, "log_odds_chosen": 1.4761972427368164, "log_odds_ratio": -0.34427410364151, "logits/chosen": -0.538876473903656, "logits/rejected": -0.5529094934463501, "logps/chosen": -0.6276366114616394, "logps/rejected": -1.5939385890960693, "loss": 1.0721, "nll_loss": 0.8424514532089233, "rewards/accuracies": 0.875, "rewards/chosen": -0.06276366114616394, "rewards/margins": 0.09663020074367523, "rewards/rejected": -0.15939386188983917, "step": 2867 }, { "epoch": 1.749580600884551, "grad_norm": 1.428765058517456, "learning_rate": 5.219350887936313e-06, "log_odds_chosen": 0.8051208257675171, "log_odds_ratio": -0.5641360878944397, "logits/chosen": -0.7369301915168762, "logits/rejected": -0.7505521178245544, "logps/chosen": -0.7200945019721985, "logps/rejected": -1.2440563440322876, "loss": 0.9935, "nll_loss": 0.8503973484039307, "rewards/accuracies": 0.5, "rewards/chosen": -0.07200945168733597, "rewards/margins": 0.05239618197083473, "rewards/rejected": -0.124405637383461, "step": 2868 }, { "epoch": 1.7501906359615678, "grad_norm": 3.9119865894317627, "learning_rate": 5.2183710961420695e-06, "log_odds_chosen": 1.4724622964859009, "log_odds_ratio": -0.49602171778678894, "logits/chosen": -0.8076095581054688, "logits/rejected": -0.7485111355781555, "logps/chosen": -1.024775743484497, "logps/rejected": -2.033925771713257, "loss": 1.039, "nll_loss": 1.0051367282867432, "rewards/accuracies": 0.625, "rewards/chosen": -0.10247758030891418, "rewards/margins": 0.10091499984264374, "rewards/rejected": -0.20339259505271912, "step": 2869 }, { "epoch": 1.7508006710385846, "grad_norm": 1.4037412405014038, "learning_rate": 5.217391304347826e-06, "log_odds_chosen": 2.004321575164795, "log_odds_ratio": -0.4342685341835022, "logits/chosen": -0.5425178408622742, "logits/rejected": -0.7092487215995789, "logps/chosen": -0.8012481927871704, "logps/rejected": -2.3367769718170166, "loss": 0.9493, "nll_loss": 0.8761541843414307, "rewards/accuracies": 0.625, "rewards/chosen": -0.08012481778860092, "rewards/margins": 0.15355288982391357, "rewards/rejected": -0.2336777150630951, "step": 2870 }, { "epoch": 1.7514107061156017, "grad_norm": 1.17167329788208, "learning_rate": 5.216411512553583e-06, "log_odds_chosen": 0.9012905955314636, "log_odds_ratio": -0.5289103984832764, "logits/chosen": -0.6211526393890381, "logits/rejected": -0.5858347415924072, "logps/chosen": -0.7972731590270996, "logps/rejected": -1.4858341217041016, "loss": 1.0823, "nll_loss": 1.0199339389801025, "rewards/accuracies": 0.625, "rewards/chosen": -0.07972732186317444, "rewards/margins": 0.06885608285665512, "rewards/rejected": -0.14858339726924896, "step": 2871 }, { "epoch": 1.7520207411926185, "grad_norm": 1.2882899045944214, "learning_rate": 5.215431720759339e-06, "log_odds_chosen": 2.117464303970337, "log_odds_ratio": -0.45918169617652893, "logits/chosen": -0.74165940284729, "logits/rejected": -0.8168250322341919, "logps/chosen": -0.8472241163253784, "logps/rejected": -2.6359376907348633, "loss": 1.0237, "nll_loss": 1.0020701885223389, "rewards/accuracies": 0.75, "rewards/chosen": -0.08472239971160889, "rewards/margins": 0.17887136340141296, "rewards/rejected": -0.26359376311302185, "step": 2872 }, { "epoch": 1.7526307762696356, "grad_norm": 1.2658283710479736, "learning_rate": 5.214451928965094e-06, "log_odds_chosen": 0.24488785862922668, "log_odds_ratio": -0.6592289209365845, "logits/chosen": -0.9438077211380005, "logits/rejected": -0.8712363243103027, "logps/chosen": -1.018729567527771, "logps/rejected": -1.1578326225280762, "loss": 1.1263, "nll_loss": 1.259194254875183, "rewards/accuracies": 0.75, "rewards/chosen": -0.10187296569347382, "rewards/margins": 0.013910309411585331, "rewards/rejected": -0.11578326672315598, "step": 2873 }, { "epoch": 1.7532408113466524, "grad_norm": 4.238931655883789, "learning_rate": 5.213472137170851e-06, "log_odds_chosen": 0.800741970539093, "log_odds_ratio": -0.5046523213386536, "logits/chosen": -0.9305271506309509, "logits/rejected": -0.8603220582008362, "logps/chosen": -0.8183624148368835, "logps/rejected": -1.3476455211639404, "loss": 1.0681, "nll_loss": 0.9478530883789062, "rewards/accuracies": 0.625, "rewards/chosen": -0.08183625340461731, "rewards/margins": 0.05292830243706703, "rewards/rejected": -0.13476455211639404, "step": 2874 }, { "epoch": 1.7538508464236693, "grad_norm": 1.9713166952133179, "learning_rate": 5.212492345376607e-06, "log_odds_chosen": 0.296065092086792, "log_odds_ratio": -0.6523987054824829, "logits/chosen": -0.8426145315170288, "logits/rejected": -0.7096506357192993, "logps/chosen": -0.8403913378715515, "logps/rejected": -1.03107750415802, "loss": 0.9477, "nll_loss": 1.0011835098266602, "rewards/accuracies": 0.5, "rewards/chosen": -0.08403913676738739, "rewards/margins": 0.01906862109899521, "rewards/rejected": -0.103107750415802, "step": 2875 }, { "epoch": 1.7544608815006861, "grad_norm": 1.7108392715454102, "learning_rate": 5.211512553582364e-06, "log_odds_chosen": 2.3021092414855957, "log_odds_ratio": -0.29617562890052795, "logits/chosen": -0.8846980333328247, "logits/rejected": -0.8402022123336792, "logps/chosen": -0.7448737025260925, "logps/rejected": -2.562804698944092, "loss": 1.1723, "nll_loss": 1.0898144245147705, "rewards/accuracies": 0.875, "rewards/chosen": -0.07448737323284149, "rewards/margins": 0.18179309368133545, "rewards/rejected": -0.25628045201301575, "step": 2876 }, { "epoch": 1.7550709165777032, "grad_norm": 1.500512719154358, "learning_rate": 5.21053276178812e-06, "log_odds_chosen": 1.6237610578536987, "log_odds_ratio": -0.5222574472427368, "logits/chosen": -0.5767965316772461, "logits/rejected": -0.5546849966049194, "logps/chosen": -0.7818055748939514, "logps/rejected": -2.2643895149230957, "loss": 1.0788, "nll_loss": 1.0055851936340332, "rewards/accuracies": 0.625, "rewards/chosen": -0.07818055897951126, "rewards/margins": 0.14825841784477234, "rewards/rejected": -0.226438969373703, "step": 2877 }, { "epoch": 1.7556809516547203, "grad_norm": 4.660475254058838, "learning_rate": 5.209552969993876e-06, "log_odds_chosen": 1.8663387298583984, "log_odds_ratio": -0.5052477121353149, "logits/chosen": -0.8325438499450684, "logits/rejected": -0.8835018873214722, "logps/chosen": -0.5699436068534851, "logps/rejected": -1.8400328159332275, "loss": 1.2443, "nll_loss": 1.1545662879943848, "rewards/accuracies": 0.5, "rewards/chosen": -0.05699436366558075, "rewards/margins": 0.12700891494750977, "rewards/rejected": -0.1840032935142517, "step": 2878 }, { "epoch": 1.7562909867317371, "grad_norm": 1.6671183109283447, "learning_rate": 5.2085731781996324e-06, "log_odds_chosen": 1.8140581846237183, "log_odds_ratio": -0.40180903673171997, "logits/chosen": -0.8820016384124756, "logits/rejected": -0.9752891063690186, "logps/chosen": -0.6703055500984192, "logps/rejected": -1.9590106010437012, "loss": 0.9755, "nll_loss": 0.725144624710083, "rewards/accuracies": 0.875, "rewards/chosen": -0.06703054904937744, "rewards/margins": 0.12887051701545715, "rewards/rejected": -0.1959010660648346, "step": 2879 }, { "epoch": 1.756901021808754, "grad_norm": 7.141870498657227, "learning_rate": 5.2075933864053885e-06, "log_odds_chosen": 3.208047866821289, "log_odds_ratio": -0.25149843096733093, "logits/chosen": -0.6413882374763489, "logits/rejected": -0.7736809253692627, "logps/chosen": -0.6651246547698975, "logps/rejected": -3.1204800605773926, "loss": 1.0655, "nll_loss": 0.9347347021102905, "rewards/accuracies": 1.0, "rewards/chosen": -0.06651246547698975, "rewards/margins": 0.24553555250167847, "rewards/rejected": -0.3120480477809906, "step": 2880 }, { "epoch": 1.7575110568857708, "grad_norm": 10.886828422546387, "learning_rate": 5.206613594611145e-06, "log_odds_chosen": 0.36824357509613037, "log_odds_ratio": -0.6394404172897339, "logits/chosen": -0.9287501573562622, "logits/rejected": -0.816276490688324, "logps/chosen": -0.7112684845924377, "logps/rejected": -0.9122549295425415, "loss": 1.2251, "nll_loss": 1.0806081295013428, "rewards/accuracies": 0.625, "rewards/chosen": -0.07112684845924377, "rewards/margins": 0.020098645240068436, "rewards/rejected": -0.09122549742460251, "step": 2881 }, { "epoch": 1.7581210919627879, "grad_norm": 2.4883036613464355, "learning_rate": 5.205633802816902e-06, "log_odds_chosen": 1.8340911865234375, "log_odds_ratio": -0.5001726150512695, "logits/chosen": -0.8353720307350159, "logits/rejected": -0.8387104868888855, "logps/chosen": -0.7939103245735168, "logps/rejected": -2.287517547607422, "loss": 0.9999, "nll_loss": 1.001212477684021, "rewards/accuracies": 0.625, "rewards/chosen": -0.07939103245735168, "rewards/margins": 0.1493607461452484, "rewards/rejected": -0.2287517637014389, "step": 2882 }, { "epoch": 1.758731127039805, "grad_norm": 2.979196548461914, "learning_rate": 5.204654011022658e-06, "log_odds_chosen": 0.8028197288513184, "log_odds_ratio": -0.504662275314331, "logits/chosen": -0.7814989686012268, "logits/rejected": -0.9149587154388428, "logps/chosen": -0.8072163462638855, "logps/rejected": -1.3994847536087036, "loss": 1.0831, "nll_loss": 0.9920943975448608, "rewards/accuracies": 0.625, "rewards/chosen": -0.08072163909673691, "rewards/margins": 0.05922684445977211, "rewards/rejected": -0.13994848728179932, "step": 2883 }, { "epoch": 1.7593411621168218, "grad_norm": 1.3517953157424927, "learning_rate": 5.203674219228413e-06, "log_odds_chosen": 1.0696163177490234, "log_odds_ratio": -0.41940101981163025, "logits/chosen": -1.028432846069336, "logits/rejected": -1.0203380584716797, "logps/chosen": -0.9149559140205383, "logps/rejected": -1.7037718296051025, "loss": 1.176, "nll_loss": 1.0166395902633667, "rewards/accuracies": 0.75, "rewards/chosen": -0.0914955884218216, "rewards/margins": 0.07888159155845642, "rewards/rejected": -0.17037717998027802, "step": 2884 }, { "epoch": 1.7599511971938386, "grad_norm": 2.2216134071350098, "learning_rate": 5.20269442743417e-06, "log_odds_chosen": 0.7535603046417236, "log_odds_ratio": -0.5828113555908203, "logits/chosen": -1.0372278690338135, "logits/rejected": -0.9913288950920105, "logps/chosen": -0.9082623720169067, "logps/rejected": -1.5366114377975464, "loss": 1.1704, "nll_loss": 1.085996150970459, "rewards/accuracies": 0.75, "rewards/chosen": -0.09082623571157455, "rewards/margins": 0.06283490359783173, "rewards/rejected": -0.15366114675998688, "step": 2885 }, { "epoch": 1.7605612322708555, "grad_norm": 1.0508677959442139, "learning_rate": 5.201714635639926e-06, "log_odds_chosen": 1.0553940534591675, "log_odds_ratio": -0.5915548205375671, "logits/chosen": -0.8284364938735962, "logits/rejected": -1.008589506149292, "logps/chosen": -1.1400033235549927, "logps/rejected": -2.0598347187042236, "loss": 1.216, "nll_loss": 1.2583216428756714, "rewards/accuracies": 0.75, "rewards/chosen": -0.1140003353357315, "rewards/margins": 0.0919831246137619, "rewards/rejected": -0.2059834599494934, "step": 2886 }, { "epoch": 1.7611712673478725, "grad_norm": 2.035022735595703, "learning_rate": 5.200734843845682e-06, "log_odds_chosen": 1.376861333847046, "log_odds_ratio": -0.6792669892311096, "logits/chosen": -1.1035884618759155, "logits/rejected": -0.9778581261634827, "logps/chosen": -1.0201843976974487, "logps/rejected": -2.3309974670410156, "loss": 1.1736, "nll_loss": 1.40687096118927, "rewards/accuracies": 0.5, "rewards/chosen": -0.10201844573020935, "rewards/margins": 0.13108132779598236, "rewards/rejected": -0.2330997735261917, "step": 2887 }, { "epoch": 1.7617813024248894, "grad_norm": 2.8408615589141846, "learning_rate": 5.199755052051439e-06, "log_odds_chosen": 1.9722062349319458, "log_odds_ratio": -0.20419280230998993, "logits/chosen": -0.855825662612915, "logits/rejected": -0.8186862468719482, "logps/chosen": -0.8019193410873413, "logps/rejected": -2.0182600021362305, "loss": 1.0512, "nll_loss": 1.0691808462142944, "rewards/accuracies": 1.0, "rewards/chosen": -0.08019193261861801, "rewards/margins": 0.12163405865430832, "rewards/rejected": -0.20182597637176514, "step": 2888 }, { "epoch": 1.7623913375019065, "grad_norm": 3.1836538314819336, "learning_rate": 5.198775260257195e-06, "log_odds_chosen": 1.4230337142944336, "log_odds_ratio": -0.43335291743278503, "logits/chosen": -0.7777609825134277, "logits/rejected": -0.8370321989059448, "logps/chosen": -0.8964954614639282, "logps/rejected": -1.9910117387771606, "loss": 0.9757, "nll_loss": 1.0665206909179688, "rewards/accuracies": 0.625, "rewards/chosen": -0.08964955061674118, "rewards/margins": 0.10945162922143936, "rewards/rejected": -0.19910117983818054, "step": 2889 }, { "epoch": 1.7630013725789233, "grad_norm": 1.3483836650848389, "learning_rate": 5.197795468462951e-06, "log_odds_chosen": 2.1123340129852295, "log_odds_ratio": -0.38016921281814575, "logits/chosen": -0.6546502709388733, "logits/rejected": -0.8518440127372742, "logps/chosen": -0.8091632127761841, "logps/rejected": -2.5624935626983643, "loss": 1.0976, "nll_loss": 1.0176124572753906, "rewards/accuracies": 0.875, "rewards/chosen": -0.08091632276773453, "rewards/margins": 0.17533302307128906, "rewards/rejected": -0.256249338388443, "step": 2890 }, { "epoch": 1.7636114076559402, "grad_norm": 1.2263764142990112, "learning_rate": 5.196815676668708e-06, "log_odds_chosen": 0.33935993909835815, "log_odds_ratio": -0.6939378380775452, "logits/chosen": -1.0080286264419556, "logits/rejected": -1.0097103118896484, "logps/chosen": -1.0396826267242432, "logps/rejected": -1.3088781833648682, "loss": 1.175, "nll_loss": 1.268835425376892, "rewards/accuracies": 0.625, "rewards/chosen": -0.10396827757358551, "rewards/margins": 0.026919543743133545, "rewards/rejected": -0.13088780641555786, "step": 2891 }, { "epoch": 1.764221442732957, "grad_norm": 1.583805799484253, "learning_rate": 5.195835884874464e-06, "log_odds_chosen": 1.5639642477035522, "log_odds_ratio": -0.3928127586841583, "logits/chosen": -0.9003719687461853, "logits/rejected": -0.9507914781570435, "logps/chosen": -0.7887111306190491, "logps/rejected": -1.8422214984893799, "loss": 1.0796, "nll_loss": 1.075090765953064, "rewards/accuracies": 0.625, "rewards/chosen": -0.07887111604213715, "rewards/margins": 0.105351023375988, "rewards/rejected": -0.18422213196754456, "step": 2892 }, { "epoch": 1.764831477809974, "grad_norm": 1.5063040256500244, "learning_rate": 5.194856093080221e-06, "log_odds_chosen": 1.47993004322052, "log_odds_ratio": -0.46056145429611206, "logits/chosen": -0.7738922834396362, "logits/rejected": -0.7571676969528198, "logps/chosen": -0.8032389879226685, "logps/rejected": -2.0145599842071533, "loss": 1.0808, "nll_loss": 1.046202540397644, "rewards/accuracies": 0.75, "rewards/chosen": -0.08032390475273132, "rewards/margins": 0.12113211303949356, "rewards/rejected": -0.2014560103416443, "step": 2893 }, { "epoch": 1.7654415128869911, "grad_norm": 1.2379897832870483, "learning_rate": 5.193876301285977e-06, "log_odds_chosen": 1.0394415855407715, "log_odds_ratio": -0.4479685127735138, "logits/chosen": -0.8334277868270874, "logits/rejected": -0.9282788038253784, "logps/chosen": -0.727055549621582, "logps/rejected": -1.4498393535614014, "loss": 0.9868, "nll_loss": 0.8727952837944031, "rewards/accuracies": 0.875, "rewards/chosen": -0.07270555198192596, "rewards/margins": 0.07227838784456253, "rewards/rejected": -0.1449839323759079, "step": 2894 }, { "epoch": 1.766051547964008, "grad_norm": 2.9700777530670166, "learning_rate": 5.192896509491733e-06, "log_odds_chosen": 0.5244893431663513, "log_odds_ratio": -0.6230535507202148, "logits/chosen": -0.7704585790634155, "logits/rejected": -0.7909706830978394, "logps/chosen": -0.8963903188705444, "logps/rejected": -1.3047534227371216, "loss": 0.9845, "nll_loss": 1.1960030794143677, "rewards/accuracies": 0.625, "rewards/chosen": -0.08963903784751892, "rewards/margins": 0.04083631560206413, "rewards/rejected": -0.13047534227371216, "step": 2895 }, { "epoch": 1.7666615830410248, "grad_norm": 4.107356548309326, "learning_rate": 5.191916717697489e-06, "log_odds_chosen": 1.9165518283843994, "log_odds_ratio": -0.380613774061203, "logits/chosen": -0.8244099617004395, "logits/rejected": -0.9122971296310425, "logps/chosen": -0.8489360809326172, "logps/rejected": -2.380675792694092, "loss": 1.1238, "nll_loss": 1.0153852701187134, "rewards/accuracies": 0.875, "rewards/chosen": -0.0848936066031456, "rewards/margins": 0.15317396819591522, "rewards/rejected": -0.2380675971508026, "step": 2896 }, { "epoch": 1.7672716181180417, "grad_norm": 1.3863224983215332, "learning_rate": 5.190936925903245e-06, "log_odds_chosen": 1.505859613418579, "log_odds_ratio": -0.3446590304374695, "logits/chosen": -0.8856205940246582, "logits/rejected": -0.9184154272079468, "logps/chosen": -0.9038151502609253, "logps/rejected": -2.0389561653137207, "loss": 1.17, "nll_loss": 1.0581862926483154, "rewards/accuracies": 0.875, "rewards/chosen": -0.09038151055574417, "rewards/margins": 0.11351409554481506, "rewards/rejected": -0.20389559864997864, "step": 2897 }, { "epoch": 1.7678816531950587, "grad_norm": 8.990239143371582, "learning_rate": 5.189957134109001e-06, "log_odds_chosen": 0.9903564453125, "log_odds_ratio": -0.5209202766418457, "logits/chosen": -0.5821977853775024, "logits/rejected": -0.7359964847564697, "logps/chosen": -0.7517663836479187, "logps/rejected": -1.5271177291870117, "loss": 1.2808, "nll_loss": 0.9188941121101379, "rewards/accuracies": 0.75, "rewards/chosen": -0.07517664134502411, "rewards/margins": 0.07753513008356094, "rewards/rejected": -0.15271176397800446, "step": 2898 }, { "epoch": 1.7684916882720756, "grad_norm": 1.818809986114502, "learning_rate": 5.188977342314758e-06, "log_odds_chosen": 1.2925779819488525, "log_odds_ratio": -0.5322363376617432, "logits/chosen": -0.5647917985916138, "logits/rejected": -0.7030653953552246, "logps/chosen": -0.7664379477500916, "logps/rejected": -1.7842919826507568, "loss": 1.0201, "nll_loss": 0.8424157500267029, "rewards/accuracies": 0.5, "rewards/chosen": -0.07664379477500916, "rewards/margins": 0.10178540647029877, "rewards/rejected": -0.17842920124530792, "step": 2899 }, { "epoch": 1.7691017233490927, "grad_norm": 1.8456041812896729, "learning_rate": 5.187997550520514e-06, "log_odds_chosen": 1.9529728889465332, "log_odds_ratio": -0.36471328139305115, "logits/chosen": -0.60445636510849, "logits/rejected": -0.7801661491394043, "logps/chosen": -0.6420395970344543, "logps/rejected": -2.1579840183258057, "loss": 1.0245, "nll_loss": 0.8196278214454651, "rewards/accuracies": 0.875, "rewards/chosen": -0.06420396268367767, "rewards/margins": 0.15159444510936737, "rewards/rejected": -0.21579840779304504, "step": 2900 }, { "epoch": 1.7697117584261095, "grad_norm": 1.182024359703064, "learning_rate": 5.18701775872627e-06, "log_odds_chosen": 0.2977723479270935, "log_odds_ratio": -0.7159024477005005, "logits/chosen": -0.9331671595573425, "logits/rejected": -1.0286741256713867, "logps/chosen": -1.1354827880859375, "logps/rejected": -1.391234278678894, "loss": 1.1119, "nll_loss": 1.2239859104156494, "rewards/accuracies": 0.5, "rewards/chosen": -0.11354828625917435, "rewards/margins": 0.025575140491127968, "rewards/rejected": -0.13912343978881836, "step": 2901 }, { "epoch": 1.7703217935031264, "grad_norm": 2.6860947608947754, "learning_rate": 5.186037966932027e-06, "log_odds_chosen": 0.30922821164131165, "log_odds_ratio": -0.6208102107048035, "logits/chosen": -0.7234599590301514, "logits/rejected": -0.8563635349273682, "logps/chosen": -0.9793540239334106, "logps/rejected": -1.1283342838287354, "loss": 1.1524, "nll_loss": 1.2565447092056274, "rewards/accuracies": 0.5, "rewards/chosen": -0.09793542325496674, "rewards/margins": 0.014898017048835754, "rewards/rejected": -0.1128334254026413, "step": 2902 }, { "epoch": 1.7709318285801432, "grad_norm": 1.5775165557861328, "learning_rate": 5.185058175137783e-06, "log_odds_chosen": 1.630103588104248, "log_odds_ratio": -0.5701373815536499, "logits/chosen": -0.6742690801620483, "logits/rejected": -0.5379921197891235, "logps/chosen": -0.8437085151672363, "logps/rejected": -2.1833930015563965, "loss": 0.9876, "nll_loss": 0.9189921617507935, "rewards/accuracies": 0.625, "rewards/chosen": -0.08437085151672363, "rewards/margins": 0.13396844267845154, "rewards/rejected": -0.21833929419517517, "step": 2903 }, { "epoch": 1.7715418636571603, "grad_norm": 1.3808735609054565, "learning_rate": 5.18407838334354e-06, "log_odds_chosen": 0.9855117797851562, "log_odds_ratio": -0.5453112125396729, "logits/chosen": -0.9259577989578247, "logits/rejected": -0.8963829874992371, "logps/chosen": -0.9191602468490601, "logps/rejected": -1.659285306930542, "loss": 1.016, "nll_loss": 1.0285630226135254, "rewards/accuracies": 0.625, "rewards/chosen": -0.091916024684906, "rewards/margins": 0.07401251047849655, "rewards/rejected": -0.16592854261398315, "step": 2904 }, { "epoch": 1.7721518987341773, "grad_norm": 4.866032123565674, "learning_rate": 5.183098591549296e-06, "log_odds_chosen": 1.6641970872879028, "log_odds_ratio": -0.3675066828727722, "logits/chosen": -0.7935699224472046, "logits/rejected": -0.7696541547775269, "logps/chosen": -0.9020227193832397, "logps/rejected": -2.2285995483398438, "loss": 0.9845, "nll_loss": 1.106420874595642, "rewards/accuracies": 0.875, "rewards/chosen": -0.09020227193832397, "rewards/margins": 0.13265767693519592, "rewards/rejected": -0.2228599488735199, "step": 2905 }, { "epoch": 1.7727619338111942, "grad_norm": 2.1639328002929688, "learning_rate": 5.182118799755052e-06, "log_odds_chosen": 1.5474672317504883, "log_odds_ratio": -0.41785484552383423, "logits/chosen": -0.7668356895446777, "logits/rejected": -0.7682759761810303, "logps/chosen": -0.8040107488632202, "logps/rejected": -2.009514808654785, "loss": 1.0517, "nll_loss": 0.8430502414703369, "rewards/accuracies": 0.75, "rewards/chosen": -0.08040107041597366, "rewards/margins": 0.12055040895938873, "rewards/rejected": -0.200951486825943, "step": 2906 }, { "epoch": 1.773371968888211, "grad_norm": 1.2338974475860596, "learning_rate": 5.181139007960808e-06, "log_odds_chosen": 0.23039156198501587, "log_odds_ratio": -0.751397967338562, "logits/chosen": -0.8943483829498291, "logits/rejected": -0.8117963075637817, "logps/chosen": -1.0686742067337036, "logps/rejected": -1.2196617126464844, "loss": 0.9534, "nll_loss": 1.0725346803665161, "rewards/accuracies": 0.375, "rewards/chosen": -0.10686741769313812, "rewards/margins": 0.01509873941540718, "rewards/rejected": -0.1219661682844162, "step": 2907 }, { "epoch": 1.7739820039652279, "grad_norm": 2.7687721252441406, "learning_rate": 5.180159216166564e-06, "log_odds_chosen": 1.582676887512207, "log_odds_ratio": -0.44640815258026123, "logits/chosen": -0.6533383131027222, "logits/rejected": -0.6432872414588928, "logps/chosen": -0.8106944561004639, "logps/rejected": -2.0412416458129883, "loss": 1.1836, "nll_loss": 1.0392776727676392, "rewards/accuracies": 0.625, "rewards/chosen": -0.0810694545507431, "rewards/margins": 0.12305472791194916, "rewards/rejected": -0.20412416756153107, "step": 2908 }, { "epoch": 1.774592039042245, "grad_norm": 1.4839409589767456, "learning_rate": 5.17917942437232e-06, "log_odds_chosen": 1.7430510520935059, "log_odds_ratio": -0.32283180952072144, "logits/chosen": -0.811693012714386, "logits/rejected": -0.8528302311897278, "logps/chosen": -0.7316653728485107, "logps/rejected": -1.9989535808563232, "loss": 1.0495, "nll_loss": 0.9477723836898804, "rewards/accuracies": 0.875, "rewards/chosen": -0.07316653430461884, "rewards/margins": 0.126728817820549, "rewards/rejected": -0.19989535212516785, "step": 2909 }, { "epoch": 1.775202074119262, "grad_norm": 3.622046709060669, "learning_rate": 5.178199632578077e-06, "log_odds_chosen": 0.8339945077896118, "log_odds_ratio": -0.4405965507030487, "logits/chosen": -0.6599256992340088, "logits/rejected": -0.6226097345352173, "logps/chosen": -0.6706104874610901, "logps/rejected": -1.181053876876831, "loss": 1.1687, "nll_loss": 0.8696236610412598, "rewards/accuracies": 0.875, "rewards/chosen": -0.06706105172634125, "rewards/margins": 0.051044344902038574, "rewards/rejected": -0.11810539662837982, "step": 2910 }, { "epoch": 1.7758121091962789, "grad_norm": 1.162786602973938, "learning_rate": 5.177219840783833e-06, "log_odds_chosen": -0.05490206182003021, "log_odds_ratio": -0.7709468603134155, "logits/chosen": -0.9896905422210693, "logits/rejected": -0.8962510228157043, "logps/chosen": -0.8129956126213074, "logps/rejected": -0.7985374927520752, "loss": 1.0555, "nll_loss": 1.0836735963821411, "rewards/accuracies": 0.5, "rewards/chosen": -0.0812995582818985, "rewards/margins": -0.0014458163641393185, "rewards/rejected": -0.07985374331474304, "step": 2911 }, { "epoch": 1.7764221442732957, "grad_norm": 1.3767755031585693, "learning_rate": 5.1762400489895895e-06, "log_odds_chosen": 1.1924489736557007, "log_odds_ratio": -0.5512272715568542, "logits/chosen": -0.7918281555175781, "logits/rejected": -0.6116968393325806, "logps/chosen": -0.967780590057373, "logps/rejected": -1.9581036567687988, "loss": 1.1783, "nll_loss": 1.1767584085464478, "rewards/accuracies": 0.75, "rewards/chosen": -0.09677806496620178, "rewards/margins": 0.09903229027986526, "rewards/rejected": -0.19581034779548645, "step": 2912 }, { "epoch": 1.7770321793503125, "grad_norm": 4.037641525268555, "learning_rate": 5.175260257195346e-06, "log_odds_chosen": 1.0589252710342407, "log_odds_ratio": -0.5052337646484375, "logits/chosen": -0.8582881689071655, "logits/rejected": -0.9035068154335022, "logps/chosen": -0.8404357433319092, "logps/rejected": -1.5902290344238281, "loss": 1.0457, "nll_loss": 1.137831211090088, "rewards/accuracies": 0.625, "rewards/chosen": -0.08404357731342316, "rewards/margins": 0.07497932016849518, "rewards/rejected": -0.15902289748191833, "step": 2913 }, { "epoch": 1.7776422144273296, "grad_norm": 1.4685122966766357, "learning_rate": 5.174280465401102e-06, "log_odds_chosen": 2.279693126678467, "log_odds_ratio": -0.2729325592517853, "logits/chosen": -0.8695191740989685, "logits/rejected": -0.8837803602218628, "logps/chosen": -0.9039450883865356, "logps/rejected": -2.788970708847046, "loss": 1.1797, "nll_loss": 1.0841355323791504, "rewards/accuracies": 0.875, "rewards/chosen": -0.0903945118188858, "rewards/margins": 0.18850257992744446, "rewards/rejected": -0.27889707684516907, "step": 2914 }, { "epoch": 1.7782522495043465, "grad_norm": 1.5704399347305298, "learning_rate": 5.173300673606858e-06, "log_odds_chosen": 1.4801325798034668, "log_odds_ratio": -0.4003617763519287, "logits/chosen": -0.8708837032318115, "logits/rejected": -0.870071291923523, "logps/chosen": -0.9582995176315308, "logps/rejected": -2.241668224334717, "loss": 1.0126, "nll_loss": 1.0813913345336914, "rewards/accuracies": 0.75, "rewards/chosen": -0.09582994878292084, "rewards/margins": 0.12833687663078308, "rewards/rejected": -0.2241668403148651, "step": 2915 }, { "epoch": 1.7788622845813635, "grad_norm": 1.0458952188491821, "learning_rate": 5.172320881812615e-06, "log_odds_chosen": 1.4478175640106201, "log_odds_ratio": -0.4872777760028839, "logits/chosen": -0.950136125087738, "logits/rejected": -0.956336498260498, "logps/chosen": -0.7171552181243896, "logps/rejected": -1.8214025497436523, "loss": 1.1078, "nll_loss": 1.1325972080230713, "rewards/accuracies": 0.75, "rewards/chosen": -0.07171551883220673, "rewards/margins": 0.11042473465204239, "rewards/rejected": -0.1821402609348297, "step": 2916 }, { "epoch": 1.7794723196583804, "grad_norm": 6.079827785491943, "learning_rate": 5.171341090018371e-06, "log_odds_chosen": 0.5518981218338013, "log_odds_ratio": -0.6418969631195068, "logits/chosen": -0.8062268495559692, "logits/rejected": -0.7387372255325317, "logps/chosen": -0.8445768356323242, "logps/rejected": -1.2677013874053955, "loss": 1.1529, "nll_loss": 1.2200528383255005, "rewards/accuracies": 0.625, "rewards/chosen": -0.08445768803358078, "rewards/margins": 0.04231245070695877, "rewards/rejected": -0.12677013874053955, "step": 2917 }, { "epoch": 1.7800823547353972, "grad_norm": 4.154146194458008, "learning_rate": 5.170361298224127e-06, "log_odds_chosen": 2.9198074340820312, "log_odds_ratio": -0.26803645491600037, "logits/chosen": -0.7116149067878723, "logits/rejected": -0.7580006718635559, "logps/chosen": -0.7706268429756165, "logps/rejected": -3.1089940071105957, "loss": 1.2042, "nll_loss": 1.2411479949951172, "rewards/accuracies": 0.875, "rewards/chosen": -0.0770626962184906, "rewards/margins": 0.23383669555187225, "rewards/rejected": -0.31089937686920166, "step": 2918 }, { "epoch": 1.780692389812414, "grad_norm": 1.0297720432281494, "learning_rate": 5.169381506429883e-06, "log_odds_chosen": 1.7068567276000977, "log_odds_ratio": -0.5929688811302185, "logits/chosen": -0.816508948802948, "logits/rejected": -0.8766548037528992, "logps/chosen": -0.8820651173591614, "logps/rejected": -2.099360466003418, "loss": 1.2325, "nll_loss": 1.1767092943191528, "rewards/accuracies": 0.5, "rewards/chosen": -0.08820651471614838, "rewards/margins": 0.1217295378446579, "rewards/rejected": -0.20993605256080627, "step": 2919 }, { "epoch": 1.7813024248894311, "grad_norm": 1.216280460357666, "learning_rate": 5.168401714635639e-06, "log_odds_chosen": 1.4498999118804932, "log_odds_ratio": -0.3994389772415161, "logits/chosen": -0.7963591814041138, "logits/rejected": -0.8912091255187988, "logps/chosen": -0.8440485000610352, "logps/rejected": -1.7852197885513306, "loss": 1.1726, "nll_loss": 1.2051922082901, "rewards/accuracies": 0.75, "rewards/chosen": -0.084404855966568, "rewards/margins": 0.09411713480949402, "rewards/rejected": -0.178521990776062, "step": 2920 }, { "epoch": 1.7819124599664482, "grad_norm": 1.4508111476898193, "learning_rate": 5.167421922841396e-06, "log_odds_chosen": 0.9356955289840698, "log_odds_ratio": -0.5913479328155518, "logits/chosen": -0.6266791820526123, "logits/rejected": -0.7401588559150696, "logps/chosen": -0.9650602340698242, "logps/rejected": -1.6851933002471924, "loss": 1.1474, "nll_loss": 1.1245390176773071, "rewards/accuracies": 0.5, "rewards/chosen": -0.0965060293674469, "rewards/margins": 0.07201330363750458, "rewards/rejected": -0.16851931810379028, "step": 2921 }, { "epoch": 1.782522495043465, "grad_norm": 1.4052762985229492, "learning_rate": 5.1664421310471525e-06, "log_odds_chosen": 1.0088753700256348, "log_odds_ratio": -0.5564680695533752, "logits/chosen": -0.769660472869873, "logits/rejected": -0.8228495717048645, "logps/chosen": -0.7292946577072144, "logps/rejected": -1.4805538654327393, "loss": 1.1164, "nll_loss": 1.167697548866272, "rewards/accuracies": 0.625, "rewards/chosen": -0.07292946428060532, "rewards/margins": 0.07512591779232025, "rewards/rejected": -0.14805537462234497, "step": 2922 }, { "epoch": 1.783132530120482, "grad_norm": 5.38071346282959, "learning_rate": 5.165462339252909e-06, "log_odds_chosen": 1.3162099123001099, "log_odds_ratio": -0.44487160444259644, "logits/chosen": -0.6443517208099365, "logits/rejected": -0.5236511826515198, "logps/chosen": -0.7658140063285828, "logps/rejected": -1.78691565990448, "loss": 1.2212, "nll_loss": 1.105115532875061, "rewards/accuracies": 0.625, "rewards/chosen": -0.07658139616250992, "rewards/margins": 0.10211016237735748, "rewards/rejected": -0.178691565990448, "step": 2923 }, { "epoch": 1.7837425651974987, "grad_norm": 1.0945972204208374, "learning_rate": 5.164482547458665e-06, "log_odds_chosen": 0.40928807854652405, "log_odds_ratio": -0.6429958343505859, "logits/chosen": -0.7977845668792725, "logits/rejected": -0.7889995574951172, "logps/chosen": -0.7855854034423828, "logps/rejected": -1.0639463663101196, "loss": 0.9503, "nll_loss": 0.8387025594711304, "rewards/accuracies": 0.625, "rewards/chosen": -0.0785585418343544, "rewards/margins": 0.027836104854941368, "rewards/rejected": -0.10639464110136032, "step": 2924 }, { "epoch": 1.7843526002745158, "grad_norm": 2.375345230102539, "learning_rate": 5.163502755664421e-06, "log_odds_chosen": 1.3635787963867188, "log_odds_ratio": -0.4344332814216614, "logits/chosen": -0.753714919090271, "logits/rejected": -0.8586221933364868, "logps/chosen": -0.8999950885772705, "logps/rejected": -1.8571982383728027, "loss": 1.0927, "nll_loss": 0.9976084232330322, "rewards/accuracies": 0.5, "rewards/chosen": -0.08999951183795929, "rewards/margins": 0.0957203134894371, "rewards/rejected": -0.185719832777977, "step": 2925 }, { "epoch": 1.7849626353515329, "grad_norm": 1.8828760385513306, "learning_rate": 5.162522963870177e-06, "log_odds_chosen": 1.2306687831878662, "log_odds_ratio": -0.47931256890296936, "logits/chosen": -0.5253058671951294, "logits/rejected": -0.7653928995132446, "logps/chosen": -0.828447163105011, "logps/rejected": -1.7607916593551636, "loss": 0.9649, "nll_loss": 0.802476167678833, "rewards/accuracies": 0.625, "rewards/chosen": -0.08284471929073334, "rewards/margins": 0.09323446452617645, "rewards/rejected": -0.1760791838169098, "step": 2926 }, { "epoch": 1.7855726704285497, "grad_norm": 1.1281814575195312, "learning_rate": 5.161543172075934e-06, "log_odds_chosen": 2.1707324981689453, "log_odds_ratio": -0.38517940044403076, "logits/chosen": -0.8292037844657898, "logits/rejected": -0.825893223285675, "logps/chosen": -0.763306736946106, "logps/rejected": -2.4573378562927246, "loss": 1.1157, "nll_loss": 1.088668704032898, "rewards/accuracies": 0.625, "rewards/chosen": -0.07633067667484283, "rewards/margins": 0.1694031059741974, "rewards/rejected": -0.24573378264904022, "step": 2927 }, { "epoch": 1.7861827055055666, "grad_norm": 2.140221118927002, "learning_rate": 5.16056338028169e-06, "log_odds_chosen": 1.8590589761734009, "log_odds_ratio": -0.3886086046695709, "logits/chosen": -0.6951204538345337, "logits/rejected": -0.6601829528808594, "logps/chosen": -0.5994358062744141, "logps/rejected": -2.0043652057647705, "loss": 1.0022, "nll_loss": 0.8264967203140259, "rewards/accuracies": 0.75, "rewards/chosen": -0.05994357913732529, "rewards/margins": 0.14049294590950012, "rewards/rejected": -0.2004365175962448, "step": 2928 }, { "epoch": 1.7867927405825834, "grad_norm": 9.82405948638916, "learning_rate": 5.159583588487446e-06, "log_odds_chosen": 1.1376161575317383, "log_odds_ratio": -0.47222161293029785, "logits/chosen": -0.8876988291740417, "logits/rejected": -0.8906995058059692, "logps/chosen": -0.9426632523536682, "logps/rejected": -1.7977895736694336, "loss": 1.1793, "nll_loss": 1.1926554441452026, "rewards/accuracies": 0.625, "rewards/chosen": -0.09426632523536682, "rewards/margins": 0.08551263064146042, "rewards/rejected": -0.17977896332740784, "step": 2929 }, { "epoch": 1.7874027756596003, "grad_norm": 1.354168176651001, "learning_rate": 5.158603796693202e-06, "log_odds_chosen": 1.0525386333465576, "log_odds_ratio": -0.5049943327903748, "logits/chosen": -0.8693111538887024, "logits/rejected": -0.7834113240242004, "logps/chosen": -0.9935169219970703, "logps/rejected": -1.678153157234192, "loss": 1.1167, "nll_loss": 1.179654598236084, "rewards/accuracies": 0.625, "rewards/chosen": -0.09935169667005539, "rewards/margins": 0.06846362352371216, "rewards/rejected": -0.16781531274318695, "step": 2930 }, { "epoch": 1.7880128107366173, "grad_norm": 1.990675687789917, "learning_rate": 5.157624004898958e-06, "log_odds_chosen": 1.4785743951797485, "log_odds_ratio": -0.5080987215042114, "logits/chosen": -0.7417181730270386, "logits/rejected": -0.786120593547821, "logps/chosen": -0.8982926607131958, "logps/rejected": -2.0795207023620605, "loss": 1.1811, "nll_loss": 1.0512620210647583, "rewards/accuracies": 0.5, "rewards/chosen": -0.08982926607131958, "rewards/margins": 0.11812280118465424, "rewards/rejected": -0.207952082157135, "step": 2931 }, { "epoch": 1.7886228458136344, "grad_norm": 0.927378237247467, "learning_rate": 5.156644213104715e-06, "log_odds_chosen": 1.8631980419158936, "log_odds_ratio": -0.3151105046272278, "logits/chosen": -0.48428788781166077, "logits/rejected": -0.6336583495140076, "logps/chosen": -0.5790415406227112, "logps/rejected": -1.8494150638580322, "loss": 1.0191, "nll_loss": 0.7178119421005249, "rewards/accuracies": 0.875, "rewards/chosen": -0.05790415778756142, "rewards/margins": 0.12703734636306763, "rewards/rejected": -0.18494150042533875, "step": 2932 }, { "epoch": 1.7892328808906512, "grad_norm": 1.2161006927490234, "learning_rate": 5.1556644213104715e-06, "log_odds_chosen": 0.583537220954895, "log_odds_ratio": -0.5698044300079346, "logits/chosen": -1.0140717029571533, "logits/rejected": -0.6713557839393616, "logps/chosen": -0.7967157363891602, "logps/rejected": -1.2369394302368164, "loss": 0.9528, "nll_loss": 0.9144582748413086, "rewards/accuracies": 0.625, "rewards/chosen": -0.07967156916856766, "rewards/margins": 0.044022370129823685, "rewards/rejected": -0.12369394302368164, "step": 2933 }, { "epoch": 1.789842915967668, "grad_norm": 1.7530063390731812, "learning_rate": 5.154684629516228e-06, "log_odds_chosen": 2.4773385524749756, "log_odds_ratio": -0.293657511472702, "logits/chosen": -0.8202243447303772, "logits/rejected": -0.8255250453948975, "logps/chosen": -0.5065011978149414, "logps/rejected": -2.2494187355041504, "loss": 1.0982, "nll_loss": 1.20009183883667, "rewards/accuracies": 0.875, "rewards/chosen": -0.05065012723207474, "rewards/margins": 0.17429175972938538, "rewards/rejected": -0.22494187951087952, "step": 2934 }, { "epoch": 1.790452951044685, "grad_norm": 1.9755882024765015, "learning_rate": 5.153704837721985e-06, "log_odds_chosen": 1.4819910526275635, "log_odds_ratio": -0.5091464519500732, "logits/chosen": -1.0093458890914917, "logits/rejected": -0.9424651265144348, "logps/chosen": -0.9757708311080933, "logps/rejected": -2.1631007194519043, "loss": 1.1931, "nll_loss": 1.280226469039917, "rewards/accuracies": 0.625, "rewards/chosen": -0.09757708758115768, "rewards/margins": 0.11873296648263931, "rewards/rejected": -0.2163100689649582, "step": 2935 }, { "epoch": 1.791062986121702, "grad_norm": 1.3357114791870117, "learning_rate": 5.15272504592774e-06, "log_odds_chosen": 0.5919325947761536, "log_odds_ratio": -0.6415827870368958, "logits/chosen": -1.0757191181182861, "logits/rejected": -0.9461754560470581, "logps/chosen": -0.9261596202850342, "logps/rejected": -1.3235933780670166, "loss": 1.0329, "nll_loss": 1.1638509035110474, "rewards/accuracies": 0.5, "rewards/chosen": -0.09261596947908401, "rewards/margins": 0.03974337875843048, "rewards/rejected": -0.1323593258857727, "step": 2936 }, { "epoch": 1.791673021198719, "grad_norm": 1.5572420358657837, "learning_rate": 5.151745254133496e-06, "log_odds_chosen": 2.6403985023498535, "log_odds_ratio": -0.2943672835826874, "logits/chosen": -0.740085244178772, "logits/rejected": -0.7181538343429565, "logps/chosen": -0.9230130910873413, "logps/rejected": -3.14985990524292, "loss": 1.1326, "nll_loss": 1.0637528896331787, "rewards/accuracies": 0.875, "rewards/chosen": -0.09230130910873413, "rewards/margins": 0.22268471121788025, "rewards/rejected": -0.3149860203266144, "step": 2937 }, { "epoch": 1.792283056275736, "grad_norm": 2.113203763961792, "learning_rate": 5.150765462339253e-06, "log_odds_chosen": 1.375572681427002, "log_odds_ratio": -0.39129868149757385, "logits/chosen": -0.674334704875946, "logits/rejected": -0.56045001745224, "logps/chosen": -0.6084682941436768, "logps/rejected": -1.510810375213623, "loss": 1.2804, "nll_loss": 1.2502555847167969, "rewards/accuracies": 1.0, "rewards/chosen": -0.06084683537483215, "rewards/margins": 0.09023420512676239, "rewards/rejected": -0.15108104050159454, "step": 2938 }, { "epoch": 1.7928930913527528, "grad_norm": 1.620143175125122, "learning_rate": 5.149785670545009e-06, "log_odds_chosen": 0.8821297883987427, "log_odds_ratio": -0.4996594488620758, "logits/chosen": -0.7629134058952332, "logits/rejected": -0.8260897994041443, "logps/chosen": -1.0152021646499634, "logps/rejected": -1.6995048522949219, "loss": 1.0974, "nll_loss": 1.1236144304275513, "rewards/accuracies": 0.625, "rewards/chosen": -0.10152021795511246, "rewards/margins": 0.06843025982379913, "rewards/rejected": -0.169950470328331, "step": 2939 }, { "epoch": 1.7935031264297696, "grad_norm": 1.9207645654678345, "learning_rate": 5.148805878750765e-06, "log_odds_chosen": 1.3198416233062744, "log_odds_ratio": -0.4395029544830322, "logits/chosen": -0.7789291143417358, "logits/rejected": -0.6252316236495972, "logps/chosen": -0.7671699523925781, "logps/rejected": -1.734437346458435, "loss": 0.902, "nll_loss": 0.8757250308990479, "rewards/accuracies": 0.625, "rewards/chosen": -0.07671699672937393, "rewards/margins": 0.09672674536705017, "rewards/rejected": -0.1734437346458435, "step": 2940 }, { "epoch": 1.7941131615067867, "grad_norm": 1.6800490617752075, "learning_rate": 5.147826086956521e-06, "log_odds_chosen": 2.5040173530578613, "log_odds_ratio": -0.3062761723995209, "logits/chosen": -0.8286259174346924, "logits/rejected": -0.8888179063796997, "logps/chosen": -0.7287958860397339, "logps/rejected": -2.730111598968506, "loss": 1.083, "nll_loss": 1.0096670389175415, "rewards/accuracies": 0.75, "rewards/chosen": -0.07287958264350891, "rewards/margins": 0.2001315802335739, "rewards/rejected": -0.27301114797592163, "step": 2941 }, { "epoch": 1.7947231965838035, "grad_norm": 1.618739366531372, "learning_rate": 5.1468462951622774e-06, "log_odds_chosen": 1.0982378721237183, "log_odds_ratio": -0.5652427077293396, "logits/chosen": -0.819580614566803, "logits/rejected": -0.8115131855010986, "logps/chosen": -0.7048932313919067, "logps/rejected": -1.5699529647827148, "loss": 0.9802, "nll_loss": 0.8651342391967773, "rewards/accuracies": 0.625, "rewards/chosen": -0.0704893246293068, "rewards/margins": 0.0865059569478035, "rewards/rejected": -0.1569952815771103, "step": 2942 }, { "epoch": 1.7953332316608206, "grad_norm": 1.6312886476516724, "learning_rate": 5.1458665033680336e-06, "log_odds_chosen": 2.5097320079803467, "log_odds_ratio": -0.34345531463623047, "logits/chosen": -0.6896557211875916, "logits/rejected": -0.7013655304908752, "logps/chosen": -0.8498448133468628, "logps/rejected": -2.9121885299682617, "loss": 1.003, "nll_loss": 0.8921125531196594, "rewards/accuracies": 0.625, "rewards/chosen": -0.08498449623584747, "rewards/margins": 0.2062343806028366, "rewards/rejected": -0.2912188470363617, "step": 2943 }, { "epoch": 1.7959432667378374, "grad_norm": 1.7616972923278809, "learning_rate": 5.1448867115737905e-06, "log_odds_chosen": 0.2055184543132782, "log_odds_ratio": -0.7759057879447937, "logits/chosen": -0.9711250066757202, "logits/rejected": -1.044983983039856, "logps/chosen": -1.059657335281372, "logps/rejected": -1.2097854614257812, "loss": 1.1755, "nll_loss": 1.1468555927276611, "rewards/accuracies": 0.375, "rewards/chosen": -0.10596573352813721, "rewards/margins": 0.015012817457318306, "rewards/rejected": -0.12097854912281036, "step": 2944 }, { "epoch": 1.7965533018148543, "grad_norm": 2.1677780151367188, "learning_rate": 5.143906919779547e-06, "log_odds_chosen": 0.9941016435623169, "log_odds_ratio": -0.4956027865409851, "logits/chosen": -1.0675371885299683, "logits/rejected": -1.0238770246505737, "logps/chosen": -1.004828691482544, "logps/rejected": -1.5392886400222778, "loss": 1.2192, "nll_loss": 1.1979725360870361, "rewards/accuracies": 0.75, "rewards/chosen": -0.10048285871744156, "rewards/margins": 0.053446006029844284, "rewards/rejected": -0.15392887592315674, "step": 2945 }, { "epoch": 1.7971633368918711, "grad_norm": 1.1806973218917847, "learning_rate": 5.142927127985304e-06, "log_odds_chosen": 1.745128870010376, "log_odds_ratio": -0.3551446199417114, "logits/chosen": -0.7293444871902466, "logits/rejected": -0.8037762641906738, "logps/chosen": -0.6980605125427246, "logps/rejected": -1.930190920829773, "loss": 0.9438, "nll_loss": 1.0750771760940552, "rewards/accuracies": 0.75, "rewards/chosen": -0.0698060542345047, "rewards/margins": 0.12321304529905319, "rewards/rejected": -0.1930191069841385, "step": 2946 }, { "epoch": 1.7977733719688882, "grad_norm": 2.284905195236206, "learning_rate": 5.141947336191059e-06, "log_odds_chosen": 2.573244571685791, "log_odds_ratio": -0.390586793422699, "logits/chosen": -0.6817971467971802, "logits/rejected": -0.7818523049354553, "logps/chosen": -0.8668539524078369, "logps/rejected": -3.000962257385254, "loss": 1.2504, "nll_loss": 1.1543259620666504, "rewards/accuracies": 0.625, "rewards/chosen": -0.08668539673089981, "rewards/margins": 0.21341082453727722, "rewards/rejected": -0.30009621381759644, "step": 2947 }, { "epoch": 1.7983834070459053, "grad_norm": 2.0172581672668457, "learning_rate": 5.140967544396815e-06, "log_odds_chosen": 0.7568084597587585, "log_odds_ratio": -0.4627995491027832, "logits/chosen": -1.059289813041687, "logits/rejected": -1.1272352933883667, "logps/chosen": -0.8608397245407104, "logps/rejected": -1.4153386354446411, "loss": 1.1954, "nll_loss": 0.9648970365524292, "rewards/accuracies": 0.75, "rewards/chosen": -0.08608397841453552, "rewards/margins": 0.055449895560741425, "rewards/rejected": -0.14153388142585754, "step": 2948 }, { "epoch": 1.7989934421229221, "grad_norm": 2.9143409729003906, "learning_rate": 5.139987752602572e-06, "log_odds_chosen": 0.24945829808712006, "log_odds_ratio": -0.7000762224197388, "logits/chosen": -0.928499698638916, "logits/rejected": -0.9230220317840576, "logps/chosen": -0.926162838935852, "logps/rejected": -1.1012424230575562, "loss": 1.1462, "nll_loss": 0.9728093147277832, "rewards/accuracies": 0.625, "rewards/chosen": -0.09261628985404968, "rewards/margins": 0.01750795543193817, "rewards/rejected": -0.11012424528598785, "step": 2949 }, { "epoch": 1.799603477199939, "grad_norm": 1.8559484481811523, "learning_rate": 5.139007960808328e-06, "log_odds_chosen": 0.961126446723938, "log_odds_ratio": -0.5546029806137085, "logits/chosen": -0.752964437007904, "logits/rejected": -0.8047241568565369, "logps/chosen": -0.7366553544998169, "logps/rejected": -1.4856977462768555, "loss": 1.1443, "nll_loss": 0.9582391977310181, "rewards/accuracies": 0.625, "rewards/chosen": -0.07366553694009781, "rewards/margins": 0.07490424811840057, "rewards/rejected": -0.14856979250907898, "step": 2950 }, { "epoch": 1.8002135122769558, "grad_norm": 9.540377616882324, "learning_rate": 5.138028169014084e-06, "log_odds_chosen": 1.1951898336410522, "log_odds_ratio": -0.6322383880615234, "logits/chosen": -0.7362903952598572, "logits/rejected": -0.8038493394851685, "logps/chosen": -0.9444335699081421, "logps/rejected": -1.7273845672607422, "loss": 1.0987, "nll_loss": 0.8607026934623718, "rewards/accuracies": 0.5, "rewards/chosen": -0.09444335848093033, "rewards/margins": 0.07829511165618896, "rewards/rejected": -0.1727384626865387, "step": 2951 }, { "epoch": 1.8008235473539729, "grad_norm": 1.7360458374023438, "learning_rate": 5.137048377219841e-06, "log_odds_chosen": 2.2317073345184326, "log_odds_ratio": -0.35055866837501526, "logits/chosen": -0.8598236441612244, "logits/rejected": -0.9274926781654358, "logps/chosen": -0.6486058235168457, "logps/rejected": -2.3321869373321533, "loss": 0.9352, "nll_loss": 0.8572848439216614, "rewards/accuracies": 0.875, "rewards/chosen": -0.06486058235168457, "rewards/margins": 0.16835811734199524, "rewards/rejected": -0.2332186996936798, "step": 2952 }, { "epoch": 1.80143358243099, "grad_norm": 3.5036375522613525, "learning_rate": 5.1360685854255965e-06, "log_odds_chosen": 2.928119659423828, "log_odds_ratio": -0.2521659731864929, "logits/chosen": -0.5040413737297058, "logits/rejected": -0.6146899461746216, "logps/chosen": -0.5676016807556152, "logps/rejected": -2.7645812034606934, "loss": 0.991, "nll_loss": 0.6925290822982788, "rewards/accuracies": 0.875, "rewards/chosen": -0.05676016956567764, "rewards/margins": 0.219697967171669, "rewards/rejected": -0.27645814418792725, "step": 2953 }, { "epoch": 1.8020436175080068, "grad_norm": 2.3459017276763916, "learning_rate": 5.135088793631353e-06, "log_odds_chosen": 1.2323733568191528, "log_odds_ratio": -0.599845290184021, "logits/chosen": -0.9314367771148682, "logits/rejected": -0.8810322880744934, "logps/chosen": -0.8589271903038025, "logps/rejected": -1.7946902513504028, "loss": 1.213, "nll_loss": 1.1283059120178223, "rewards/accuracies": 0.5, "rewards/chosen": -0.08589272201061249, "rewards/margins": 0.09357630461454391, "rewards/rejected": -0.1794690191745758, "step": 2954 }, { "epoch": 1.8026536525850236, "grad_norm": 2.0647964477539062, "learning_rate": 5.1341090018371096e-06, "log_odds_chosen": 2.298661708831787, "log_odds_ratio": -0.2716250717639923, "logits/chosen": -0.7687997817993164, "logits/rejected": -0.8785980939865112, "logps/chosen": -0.7807949781417847, "logps/rejected": -2.4123170375823975, "loss": 0.9528, "nll_loss": 1.0752066373825073, "rewards/accuracies": 0.875, "rewards/chosen": -0.07807949185371399, "rewards/margins": 0.16315221786499023, "rewards/rejected": -0.24123170971870422, "step": 2955 }, { "epoch": 1.8032636876620405, "grad_norm": 2.5690953731536865, "learning_rate": 5.133129210042866e-06, "log_odds_chosen": 2.3730580806732178, "log_odds_ratio": -0.33622345328330994, "logits/chosen": -0.8330021500587463, "logits/rejected": -0.8963409662246704, "logps/chosen": -0.7365045547485352, "logps/rejected": -2.57574462890625, "loss": 1.0468, "nll_loss": 1.0930655002593994, "rewards/accuracies": 1.0, "rewards/chosen": -0.07365045696496964, "rewards/margins": 0.18392400443553925, "rewards/rejected": -0.2575744688510895, "step": 2956 }, { "epoch": 1.8038737227390576, "grad_norm": 1.5711312294006348, "learning_rate": 5.132149418248622e-06, "log_odds_chosen": 2.3231470584869385, "log_odds_ratio": -0.3895111083984375, "logits/chosen": -0.801520824432373, "logits/rejected": -0.8482531309127808, "logps/chosen": -0.7385958433151245, "logps/rejected": -2.6310112476348877, "loss": 0.9783, "nll_loss": 0.9914120435714722, "rewards/accuracies": 0.625, "rewards/chosen": -0.07385958731174469, "rewards/margins": 0.18924154341220856, "rewards/rejected": -0.26310113072395325, "step": 2957 }, { "epoch": 1.8044837578160744, "grad_norm": 1.9398818016052246, "learning_rate": 5.131169626454378e-06, "log_odds_chosen": 0.6628403067588806, "log_odds_ratio": -0.49501973390579224, "logits/chosen": -0.934070348739624, "logits/rejected": -0.9601234793663025, "logps/chosen": -0.9244493246078491, "logps/rejected": -1.3398287296295166, "loss": 1.1509, "nll_loss": 1.1201446056365967, "rewards/accuracies": 0.75, "rewards/chosen": -0.09244492650032043, "rewards/margins": 0.041537944227457047, "rewards/rejected": -0.13398286700248718, "step": 2958 }, { "epoch": 1.8050937928930915, "grad_norm": 3.4987053871154785, "learning_rate": 5.130189834660134e-06, "log_odds_chosen": 2.5206756591796875, "log_odds_ratio": -0.2447880208492279, "logits/chosen": -0.5490060448646545, "logits/rejected": -0.6332288384437561, "logps/chosen": -0.7099505662918091, "logps/rejected": -2.634146213531494, "loss": 0.8784, "nll_loss": 0.8516290783882141, "rewards/accuracies": 1.0, "rewards/chosen": -0.07099506258964539, "rewards/margins": 0.19241958856582642, "rewards/rejected": -0.2634146511554718, "step": 2959 }, { "epoch": 1.8057038279701083, "grad_norm": 0.8949787020683289, "learning_rate": 5.129210042865891e-06, "log_odds_chosen": 0.5332297086715698, "log_odds_ratio": -0.7571908235549927, "logits/chosen": -1.072551965713501, "logits/rejected": -0.9988390207290649, "logps/chosen": -1.127834677696228, "logps/rejected": -1.6303389072418213, "loss": 1.002, "nll_loss": 1.1545324325561523, "rewards/accuracies": 0.375, "rewards/chosen": -0.11278347671031952, "rewards/margins": 0.050250403583049774, "rewards/rejected": -0.1630338728427887, "step": 2960 }, { "epoch": 1.8063138630471252, "grad_norm": 1.749839186668396, "learning_rate": 5.128230251071647e-06, "log_odds_chosen": 1.375421166419983, "log_odds_ratio": -0.6038702726364136, "logits/chosen": -0.8456982970237732, "logits/rejected": -0.9319944381713867, "logps/chosen": -0.9008198976516724, "logps/rejected": -1.9667171239852905, "loss": 1.1024, "nll_loss": 1.068326473236084, "rewards/accuracies": 0.5, "rewards/chosen": -0.09008198976516724, "rewards/margins": 0.10658973455429077, "rewards/rejected": -0.196671724319458, "step": 2961 }, { "epoch": 1.806923898124142, "grad_norm": 1.0707907676696777, "learning_rate": 5.127250459277403e-06, "log_odds_chosen": 0.13367429375648499, "log_odds_ratio": -0.8553130626678467, "logits/chosen": -0.9619296789169312, "logits/rejected": -0.9654186367988586, "logps/chosen": -0.8682041168212891, "logps/rejected": -0.9970457553863525, "loss": 1.0807, "nll_loss": 0.9615072011947632, "rewards/accuracies": 0.375, "rewards/chosen": -0.08682041615247726, "rewards/margins": 0.012884162366390228, "rewards/rejected": -0.0997045710682869, "step": 2962 }, { "epoch": 1.807533933201159, "grad_norm": 1.585201382637024, "learning_rate": 5.12627066748316e-06, "log_odds_chosen": 1.4348535537719727, "log_odds_ratio": -0.46973779797554016, "logits/chosen": -0.6675055623054504, "logits/rejected": -0.7200373411178589, "logps/chosen": -0.6398396492004395, "logps/rejected": -1.6321115493774414, "loss": 0.954, "nll_loss": 0.8515964150428772, "rewards/accuracies": 0.75, "rewards/chosen": -0.0639839693903923, "rewards/margins": 0.0992271900177002, "rewards/rejected": -0.1632111668586731, "step": 2963 }, { "epoch": 1.8081439682781761, "grad_norm": 2.2358007431030273, "learning_rate": 5.1252908756889155e-06, "log_odds_chosen": 1.0949046611785889, "log_odds_ratio": -0.5587061643600464, "logits/chosen": -0.9817756414413452, "logits/rejected": -0.966905415058136, "logps/chosen": -0.7195446491241455, "logps/rejected": -1.4943228960037231, "loss": 0.9897, "nll_loss": 0.9080955982208252, "rewards/accuracies": 0.75, "rewards/chosen": -0.07195447385311127, "rewards/margins": 0.07747782766819, "rewards/rejected": -0.14943230152130127, "step": 2964 }, { "epoch": 1.808754003355193, "grad_norm": 1.4357359409332275, "learning_rate": 5.124311083894672e-06, "log_odds_chosen": 2.0855984687805176, "log_odds_ratio": -0.3726327419281006, "logits/chosen": -0.9016175866127014, "logits/rejected": -1.0330671072006226, "logps/chosen": -0.7629484534263611, "logps/rejected": -2.3131613731384277, "loss": 1.0543, "nll_loss": 0.8895127177238464, "rewards/accuracies": 0.875, "rewards/chosen": -0.07629484683275223, "rewards/margins": 0.1550213098526001, "rewards/rejected": -0.23131614923477173, "step": 2965 }, { "epoch": 1.8093640384322098, "grad_norm": 5.7211809158325195, "learning_rate": 5.123331292100429e-06, "log_odds_chosen": 0.5251819491386414, "log_odds_ratio": -0.5613450407981873, "logits/chosen": -0.7567147612571716, "logits/rejected": -0.8053407073020935, "logps/chosen": -1.0739930868148804, "logps/rejected": -1.4624528884887695, "loss": 1.1751, "nll_loss": 1.4044791460037231, "rewards/accuracies": 0.625, "rewards/chosen": -0.10739930719137192, "rewards/margins": 0.0388459675014019, "rewards/rejected": -0.14624527096748352, "step": 2966 }, { "epoch": 1.8099740735092267, "grad_norm": 1.1628599166870117, "learning_rate": 5.122351500306185e-06, "log_odds_chosen": 2.5360050201416016, "log_odds_ratio": -0.41029027104377747, "logits/chosen": -0.6762598752975464, "logits/rejected": -0.5766629576683044, "logps/chosen": -0.6350914239883423, "logps/rejected": -2.398092031478882, "loss": 0.9576, "nll_loss": 0.7376944422721863, "rewards/accuracies": 0.75, "rewards/chosen": -0.06350914388895035, "rewards/margins": 0.1763000786304474, "rewards/rejected": -0.23980921506881714, "step": 2967 }, { "epoch": 1.8105841085862437, "grad_norm": 3.2510793209075928, "learning_rate": 5.121371708511941e-06, "log_odds_chosen": 1.1312589645385742, "log_odds_ratio": -0.3590957522392273, "logits/chosen": -0.6559758186340332, "logits/rejected": -0.642560601234436, "logps/chosen": -0.589009165763855, "logps/rejected": -1.2459907531738281, "loss": 0.996, "nll_loss": 0.85263592004776, "rewards/accuracies": 1.0, "rewards/chosen": -0.05890091508626938, "rewards/margins": 0.06569816172122955, "rewards/rejected": -0.12459906935691833, "step": 2968 }, { "epoch": 1.8111941436632606, "grad_norm": 2.460404396057129, "learning_rate": 5.120391916717698e-06, "log_odds_chosen": 2.998833656311035, "log_odds_ratio": -0.41621479392051697, "logits/chosen": -0.9052519798278809, "logits/rejected": -0.8587925434112549, "logps/chosen": -0.7006587386131287, "logps/rejected": -3.2186970710754395, "loss": 1.0549, "nll_loss": 0.8604191541671753, "rewards/accuracies": 0.625, "rewards/chosen": -0.07006587088108063, "rewards/margins": 0.25180381536483765, "rewards/rejected": -0.3218696713447571, "step": 2969 }, { "epoch": 1.8118041787402777, "grad_norm": 1.171470284461975, "learning_rate": 5.119412124923453e-06, "log_odds_chosen": 1.6399742364883423, "log_odds_ratio": -0.37519383430480957, "logits/chosen": -0.8840672969818115, "logits/rejected": -0.9142745137214661, "logps/chosen": -0.8559198975563049, "logps/rejected": -2.1273975372314453, "loss": 0.9589, "nll_loss": 0.9149918556213379, "rewards/accuracies": 0.875, "rewards/chosen": -0.08559199422597885, "rewards/margins": 0.12714776396751404, "rewards/rejected": -0.2127397507429123, "step": 2970 }, { "epoch": 1.8124142138172945, "grad_norm": 2.716352939605713, "learning_rate": 5.118432333129209e-06, "log_odds_chosen": 1.7184653282165527, "log_odds_ratio": -0.37973567843437195, "logits/chosen": -0.9021450281143188, "logits/rejected": -0.7435963749885559, "logps/chosen": -0.8124907612800598, "logps/rejected": -2.0692100524902344, "loss": 1.0139, "nll_loss": 1.0274171829223633, "rewards/accuracies": 0.75, "rewards/chosen": -0.08124907314777374, "rewards/margins": 0.12567193806171417, "rewards/rejected": -0.20692101120948792, "step": 2971 }, { "epoch": 1.8130242488943114, "grad_norm": 2.6685023307800293, "learning_rate": 5.117452541334966e-06, "log_odds_chosen": 0.472709596157074, "log_odds_ratio": -0.549335241317749, "logits/chosen": -0.842244565486908, "logits/rejected": -0.7879679203033447, "logps/chosen": -0.8587955832481384, "logps/rejected": -1.120192527770996, "loss": 1.167, "nll_loss": 1.2177196741104126, "rewards/accuracies": 0.75, "rewards/chosen": -0.08587955683469772, "rewards/margins": 0.026139702647924423, "rewards/rejected": -0.11201926320791245, "step": 2972 }, { "epoch": 1.8136342839713282, "grad_norm": 1.9637128114700317, "learning_rate": 5.116472749540722e-06, "log_odds_chosen": 1.969498634338379, "log_odds_ratio": -0.44613635540008545, "logits/chosen": -1.0552411079406738, "logits/rejected": -0.9338759779930115, "logps/chosen": -0.956843912601471, "logps/rejected": -2.547384738922119, "loss": 1.0056, "nll_loss": 1.1057984828948975, "rewards/accuracies": 0.75, "rewards/chosen": -0.09568438678979874, "rewards/margins": 0.15905410051345825, "rewards/rejected": -0.2547384798526764, "step": 2973 }, { "epoch": 1.8142443190483453, "grad_norm": 1.1458172798156738, "learning_rate": 5.115492957746479e-06, "log_odds_chosen": 3.454230785369873, "log_odds_ratio": -0.19202947616577148, "logits/chosen": -0.6113846898078918, "logits/rejected": -0.6783907413482666, "logps/chosen": -0.6496522426605225, "logps/rejected": -3.358109951019287, "loss": 0.9287, "nll_loss": 0.7026536464691162, "rewards/accuracies": 1.0, "rewards/chosen": -0.06496522575616837, "rewards/margins": 0.27084577083587646, "rewards/rejected": -0.33581095933914185, "step": 2974 }, { "epoch": 1.8148543541253623, "grad_norm": 5.48870325088501, "learning_rate": 5.1145131659522346e-06, "log_odds_chosen": 1.2934529781341553, "log_odds_ratio": -0.4775673449039459, "logits/chosen": -0.8825253248214722, "logits/rejected": -0.9191895723342896, "logps/chosen": -0.6642112731933594, "logps/rejected": -1.4654362201690674, "loss": 1.0971, "nll_loss": 0.8716492652893066, "rewards/accuracies": 0.625, "rewards/chosen": -0.06642112135887146, "rewards/margins": 0.08012249320745468, "rewards/rejected": -0.14654362201690674, "step": 2975 }, { "epoch": 1.8154643892023792, "grad_norm": 4.367747783660889, "learning_rate": 5.113533374157991e-06, "log_odds_chosen": 2.7433409690856934, "log_odds_ratio": -0.2688903212547302, "logits/chosen": -0.7912608981132507, "logits/rejected": -0.7954492568969727, "logps/chosen": -0.6807457804679871, "logps/rejected": -2.91403865814209, "loss": 1.1044, "nll_loss": 1.2983421087265015, "rewards/accuracies": 0.875, "rewards/chosen": -0.06807457655668259, "rewards/margins": 0.2233293056488037, "rewards/rejected": -0.2914038896560669, "step": 2976 }, { "epoch": 1.816074424279396, "grad_norm": 1.2407524585723877, "learning_rate": 5.112553582363748e-06, "log_odds_chosen": -0.03138226270675659, "log_odds_ratio": -0.7568391561508179, "logits/chosen": -1.0282227993011475, "logits/rejected": -0.9908006191253662, "logps/chosen": -0.9392699003219604, "logps/rejected": -0.9341061115264893, "loss": 1.1098, "nll_loss": 1.125540852546692, "rewards/accuracies": 0.625, "rewards/chosen": -0.09392698854207993, "rewards/margins": -0.0005163843743503094, "rewards/rejected": -0.09341060370206833, "step": 2977 }, { "epoch": 1.8166844593564129, "grad_norm": 5.352575302124023, "learning_rate": 5.111573790569504e-06, "log_odds_chosen": 3.0822548866271973, "log_odds_ratio": -0.17286093533039093, "logits/chosen": -0.7440487146377563, "logits/rejected": -0.8591834902763367, "logps/chosen": -0.6343057155609131, "logps/rejected": -2.9853017330169678, "loss": 1.0732, "nll_loss": 0.9791899919509888, "rewards/accuracies": 1.0, "rewards/chosen": -0.06343057751655579, "rewards/margins": 0.23509959876537323, "rewards/rejected": -0.2985301613807678, "step": 2978 }, { "epoch": 1.81729449443343, "grad_norm": 3.018937110900879, "learning_rate": 5.11059399877526e-06, "log_odds_chosen": 2.8670926094055176, "log_odds_ratio": -0.22071126103401184, "logits/chosen": -0.7760010361671448, "logits/rejected": -0.7946492433547974, "logps/chosen": -0.6317049860954285, "logps/rejected": -2.8663458824157715, "loss": 1.055, "nll_loss": 0.8560076951980591, "rewards/accuracies": 0.875, "rewards/chosen": -0.06317050009965897, "rewards/margins": 0.22346410155296326, "rewards/rejected": -0.2866345942020416, "step": 2979 }, { "epoch": 1.817904529510447, "grad_norm": 4.912167549133301, "learning_rate": 5.109614206981017e-06, "log_odds_chosen": 2.770448684692383, "log_odds_ratio": -0.3137018084526062, "logits/chosen": -0.6867722272872925, "logits/rejected": -0.8096891641616821, "logps/chosen": -0.8847754597663879, "logps/rejected": -2.915587902069092, "loss": 1.0412, "nll_loss": 0.8468528985977173, "rewards/accuracies": 0.75, "rewards/chosen": -0.08847755193710327, "rewards/margins": 0.20308125019073486, "rewards/rejected": -0.29155880212783813, "step": 2980 }, { "epoch": 1.8185145645874639, "grad_norm": 1.6663923263549805, "learning_rate": 5.108634415186772e-06, "log_odds_chosen": 2.12064790725708, "log_odds_ratio": -0.33887428045272827, "logits/chosen": -0.839634120464325, "logits/rejected": -0.9366447925567627, "logps/chosen": -0.8225195407867432, "logps/rejected": -2.4784040451049805, "loss": 1.0276, "nll_loss": 0.9578025341033936, "rewards/accuracies": 0.75, "rewards/chosen": -0.08225195854902267, "rewards/margins": 0.16558845341205597, "rewards/rejected": -0.24784040451049805, "step": 2981 }, { "epoch": 1.8191245996644807, "grad_norm": 1.4712259769439697, "learning_rate": 5.107654623392528e-06, "log_odds_chosen": -0.13131090998649597, "log_odds_ratio": -0.7951453924179077, "logits/chosen": -0.7356204986572266, "logits/rejected": -0.9282455444335938, "logps/chosen": -0.9974793791770935, "logps/rejected": -0.9004091620445251, "loss": 1.1857, "nll_loss": 1.0949467420578003, "rewards/accuracies": 0.375, "rewards/chosen": -0.09974794834852219, "rewards/margins": -0.009707028046250343, "rewards/rejected": -0.09004092216491699, "step": 2982 }, { "epoch": 1.8197346347414975, "grad_norm": 1.77046537399292, "learning_rate": 5.106674831598285e-06, "log_odds_chosen": 1.7614690065383911, "log_odds_ratio": -0.45946604013442993, "logits/chosen": -0.7796222567558289, "logits/rejected": -0.843155562877655, "logps/chosen": -0.7543547749519348, "logps/rejected": -2.0561187267303467, "loss": 1.0635, "nll_loss": 1.0411269664764404, "rewards/accuracies": 0.625, "rewards/chosen": -0.07543547451496124, "rewards/margins": 0.1301763951778412, "rewards/rejected": -0.20561188459396362, "step": 2983 }, { "epoch": 1.8203446698185146, "grad_norm": 1.3255261182785034, "learning_rate": 5.105695039804041e-06, "log_odds_chosen": 0.23097264766693115, "log_odds_ratio": -0.620193362236023, "logits/chosen": -1.0073105096817017, "logits/rejected": -0.9041935205459595, "logps/chosen": -1.0284346342086792, "logps/rejected": -1.1711711883544922, "loss": 1.223, "nll_loss": 1.1582155227661133, "rewards/accuracies": 0.625, "rewards/chosen": -0.10284346342086792, "rewards/margins": 0.014273658394813538, "rewards/rejected": -0.11711712926626205, "step": 2984 }, { "epoch": 1.8209547048955315, "grad_norm": 1.056805968284607, "learning_rate": 5.104715248009798e-06, "log_odds_chosen": 1.5211087465286255, "log_odds_ratio": -0.43579351902008057, "logits/chosen": -0.6697575449943542, "logits/rejected": -0.5048307776451111, "logps/chosen": -0.7386199831962585, "logps/rejected": -1.8532671928405762, "loss": 0.9585, "nll_loss": 0.9037507772445679, "rewards/accuracies": 0.75, "rewards/chosen": -0.0738620012998581, "rewards/margins": 0.111464723944664, "rewards/rejected": -0.1853267252445221, "step": 2985 }, { "epoch": 1.8215647399725485, "grad_norm": 5.65871524810791, "learning_rate": 5.1037354562155544e-06, "log_odds_chosen": 2.6736555099487305, "log_odds_ratio": -0.2671881318092346, "logits/chosen": -0.8612191081047058, "logits/rejected": -0.7661202549934387, "logps/chosen": -0.6722230911254883, "logps/rejected": -2.783869981765747, "loss": 0.9433, "nll_loss": 0.7742234468460083, "rewards/accuracies": 0.75, "rewards/chosen": -0.06722231209278107, "rewards/margins": 0.2111646831035614, "rewards/rejected": -0.27838701009750366, "step": 2986 }, { "epoch": 1.8221747750495654, "grad_norm": 4.160833358764648, "learning_rate": 5.10275566442131e-06, "log_odds_chosen": 2.199249029159546, "log_odds_ratio": -0.409278929233551, "logits/chosen": -0.8228086829185486, "logits/rejected": -0.9212770462036133, "logps/chosen": -0.7814897894859314, "logps/rejected": -2.494539737701416, "loss": 0.9933, "nll_loss": 1.0239752531051636, "rewards/accuracies": 0.75, "rewards/chosen": -0.0781489834189415, "rewards/margins": 0.17130498588085175, "rewards/rejected": -0.24945396184921265, "step": 2987 }, { "epoch": 1.8227848101265822, "grad_norm": 1.265574336051941, "learning_rate": 5.101775872627067e-06, "log_odds_chosen": 1.1701785326004028, "log_odds_ratio": -0.4159320294857025, "logits/chosen": -0.6912726759910583, "logits/rejected": -0.7272073030471802, "logps/chosen": -0.6559022665023804, "logps/rejected": -1.4471485614776611, "loss": 0.992, "nll_loss": 1.057967185974121, "rewards/accuracies": 0.875, "rewards/chosen": -0.06559022516012192, "rewards/margins": 0.07912465184926987, "rewards/rejected": -0.14471489191055298, "step": 2988 }, { "epoch": 1.823394845203599, "grad_norm": 1.662248969078064, "learning_rate": 5.100796080832823e-06, "log_odds_chosen": 1.9997409582138062, "log_odds_ratio": -0.3398248553276062, "logits/chosen": -0.6741729974746704, "logits/rejected": -0.7751017212867737, "logps/chosen": -0.7200632095336914, "logps/rejected": -2.2377729415893555, "loss": 1.1722, "nll_loss": 1.0371506214141846, "rewards/accuracies": 0.75, "rewards/chosen": -0.07200632989406586, "rewards/margins": 0.1517709493637085, "rewards/rejected": -0.22377727925777435, "step": 2989 }, { "epoch": 1.8240048802806161, "grad_norm": 1.1552932262420654, "learning_rate": 5.099816289038579e-06, "log_odds_chosen": 1.154458999633789, "log_odds_ratio": -0.4187602996826172, "logits/chosen": -0.6879180669784546, "logits/rejected": -0.8074720501899719, "logps/chosen": -0.8581211566925049, "logps/rejected": -1.6577351093292236, "loss": 0.9971, "nll_loss": 0.9898157119750977, "rewards/accuracies": 0.625, "rewards/chosen": -0.08581212162971497, "rewards/margins": 0.07996140420436859, "rewards/rejected": -0.16577354073524475, "step": 2990 }, { "epoch": 1.8246149153576332, "grad_norm": 1.1464427709579468, "learning_rate": 5.098836497244336e-06, "log_odds_chosen": 2.320535182952881, "log_odds_ratio": -0.4359983205795288, "logits/chosen": -0.7762720584869385, "logits/rejected": -0.8269885778427124, "logps/chosen": -0.8421376943588257, "logps/rejected": -2.7299957275390625, "loss": 0.8644, "nll_loss": 0.9477337598800659, "rewards/accuracies": 0.75, "rewards/chosen": -0.08421377837657928, "rewards/margins": 0.18878582119941711, "rewards/rejected": -0.2729995846748352, "step": 2991 }, { "epoch": 1.82522495043465, "grad_norm": 2.559039354324341, "learning_rate": 5.097856705450092e-06, "log_odds_chosen": 2.233649253845215, "log_odds_ratio": -0.5373110771179199, "logits/chosen": -0.8577374815940857, "logits/rejected": -0.7605445981025696, "logps/chosen": -0.8181635141372681, "logps/rejected": -2.5339419841766357, "loss": 1.1345, "nll_loss": 1.0998194217681885, "rewards/accuracies": 0.5, "rewards/chosen": -0.08181635290384293, "rewards/margins": 0.17157787084579468, "rewards/rejected": -0.253394216299057, "step": 2992 }, { "epoch": 1.825834985511667, "grad_norm": 1.5555963516235352, "learning_rate": 5.096876913655847e-06, "log_odds_chosen": 0.29590100049972534, "log_odds_ratio": -0.6829304695129395, "logits/chosen": -0.6915087699890137, "logits/rejected": -0.7083662748336792, "logps/chosen": -0.9176799654960632, "logps/rejected": -1.1298794746398926, "loss": 1.1034, "nll_loss": 1.018903374671936, "rewards/accuracies": 0.75, "rewards/chosen": -0.09176799654960632, "rewards/margins": 0.021219950169324875, "rewards/rejected": -0.1129879504442215, "step": 2993 }, { "epoch": 1.8264450205886837, "grad_norm": 1.2561097145080566, "learning_rate": 5.095897121861604e-06, "log_odds_chosen": 2.2658748626708984, "log_odds_ratio": -0.2501346468925476, "logits/chosen": -0.8401373624801636, "logits/rejected": -0.8621656894683838, "logps/chosen": -0.7611908316612244, "logps/rejected": -2.5456202030181885, "loss": 0.8426, "nll_loss": 0.8626778721809387, "rewards/accuracies": 0.875, "rewards/chosen": -0.0761190876364708, "rewards/margins": 0.17844294011592865, "rewards/rejected": -0.25456202030181885, "step": 2994 }, { "epoch": 1.8270550556657008, "grad_norm": 1.2747247219085693, "learning_rate": 5.09491733006736e-06, "log_odds_chosen": 2.4870212078094482, "log_odds_ratio": -0.3511427044868469, "logits/chosen": -0.8227078914642334, "logits/rejected": -0.8243185877799988, "logps/chosen": -0.7092076539993286, "logps/rejected": -2.712435483932495, "loss": 1.0782, "nll_loss": 0.8575756549835205, "rewards/accuracies": 0.75, "rewards/chosen": -0.07092076539993286, "rewards/margins": 0.20032279193401337, "rewards/rejected": -0.27124354243278503, "step": 2995 }, { "epoch": 1.8276650907427177, "grad_norm": 1.9921104907989502, "learning_rate": 5.0939375382731165e-06, "log_odds_chosen": 2.009878635406494, "log_odds_ratio": -0.3078121244907379, "logits/chosen": -0.8596560955047607, "logits/rejected": -0.9035004377365112, "logps/chosen": -0.7071245908737183, "logps/rejected": -2.2219343185424805, "loss": 1.1157, "nll_loss": 1.1193944215774536, "rewards/accuracies": 0.75, "rewards/chosen": -0.07071246206760406, "rewards/margins": 0.15148098766803741, "rewards/rejected": -0.22219343483448029, "step": 2996 }, { "epoch": 1.8282751258197347, "grad_norm": 4.671427249908447, "learning_rate": 5.0929577464788735e-06, "log_odds_chosen": 1.849366307258606, "log_odds_ratio": -0.634708046913147, "logits/chosen": -0.9386723041534424, "logits/rejected": -0.9008952379226685, "logps/chosen": -1.0720889568328857, "logps/rejected": -2.8434090614318848, "loss": 1.2717, "nll_loss": 1.340910792350769, "rewards/accuracies": 0.5, "rewards/chosen": -0.10720889270305634, "rewards/margins": 0.1771320104598999, "rewards/rejected": -0.28434091806411743, "step": 2997 }, { "epoch": 1.8288851608967516, "grad_norm": 1.2048568725585938, "learning_rate": 5.091977954684629e-06, "log_odds_chosen": 1.3709179162979126, "log_odds_ratio": -0.35156047344207764, "logits/chosen": -0.8580374121665955, "logits/rejected": -0.8624914884567261, "logps/chosen": -0.8167555332183838, "logps/rejected": -1.8252158164978027, "loss": 1.2364, "nll_loss": 1.0269334316253662, "rewards/accuracies": 0.625, "rewards/chosen": -0.08167555928230286, "rewards/margins": 0.10084602981805801, "rewards/rejected": -0.18252159655094147, "step": 2998 }, { "epoch": 1.8294951959737684, "grad_norm": 1.4907997846603394, "learning_rate": 5.090998162890385e-06, "log_odds_chosen": 2.7088260650634766, "log_odds_ratio": -0.42405444383621216, "logits/chosen": -0.8701107501983643, "logits/rejected": -0.7944515347480774, "logps/chosen": -0.6618197560310364, "logps/rejected": -2.892458915710449, "loss": 0.9897, "nll_loss": 0.7289825677871704, "rewards/accuracies": 0.625, "rewards/chosen": -0.0661819726228714, "rewards/margins": 0.22306394577026367, "rewards/rejected": -0.2892459034919739, "step": 2999 }, { "epoch": 1.8301052310507853, "grad_norm": 1.6460765600204468, "learning_rate": 5.090018371096142e-06, "log_odds_chosen": 1.0631115436553955, "log_odds_ratio": -0.5510998368263245, "logits/chosen": -0.7195731997489929, "logits/rejected": -0.6961040496826172, "logps/chosen": -0.6664726138114929, "logps/rejected": -1.5564587116241455, "loss": 1.1268, "nll_loss": 1.0695154666900635, "rewards/accuracies": 0.5, "rewards/chosen": -0.06664726138114929, "rewards/margins": 0.08899862319231033, "rewards/rejected": -0.15564587712287903, "step": 3000 }, { "epoch": 1.8307152661278023, "grad_norm": 1.8710094690322876, "learning_rate": 5.089038579301898e-06, "log_odds_chosen": 0.9998834133148193, "log_odds_ratio": -0.5257665514945984, "logits/chosen": -0.832033634185791, "logits/rejected": -0.8506331443786621, "logps/chosen": -0.7883210778236389, "logps/rejected": -1.5172518491744995, "loss": 1.1149, "nll_loss": 0.935732364654541, "rewards/accuracies": 0.5, "rewards/chosen": -0.07883210480213165, "rewards/margins": 0.07289309054613113, "rewards/rejected": -0.15172520279884338, "step": 3001 }, { "epoch": 1.8313253012048194, "grad_norm": 1.7938501834869385, "learning_rate": 5.088058787507655e-06, "log_odds_chosen": 0.46218207478523254, "log_odds_ratio": -0.7834254503250122, "logits/chosen": -0.8927503824234009, "logits/rejected": -1.0045702457427979, "logps/chosen": -1.0818623304367065, "logps/rejected": -1.5478235483169556, "loss": 1.1741, "nll_loss": 1.3069936037063599, "rewards/accuracies": 0.375, "rewards/chosen": -0.10818623006343842, "rewards/margins": 0.04659612104296684, "rewards/rejected": -0.15478235483169556, "step": 3002 }, { "epoch": 1.8319353362818362, "grad_norm": 1.9105820655822754, "learning_rate": 5.087078995713411e-06, "log_odds_chosen": 0.946422815322876, "log_odds_ratio": -0.5837032198905945, "logits/chosen": -0.8547594547271729, "logits/rejected": -0.8192530870437622, "logps/chosen": -0.8170840740203857, "logps/rejected": -1.4659655094146729, "loss": 1.1074, "nll_loss": 1.1053798198699951, "rewards/accuracies": 0.5, "rewards/chosen": -0.0817084088921547, "rewards/margins": 0.06488816440105438, "rewards/rejected": -0.14659656584262848, "step": 3003 }, { "epoch": 1.832545371358853, "grad_norm": 2.9886882305145264, "learning_rate": 5.086099203919166e-06, "log_odds_chosen": 0.3421021103858948, "log_odds_ratio": -0.7293146848678589, "logits/chosen": -1.0511667728424072, "logits/rejected": -0.8887174129486084, "logps/chosen": -0.997678816318512, "logps/rejected": -1.2766931056976318, "loss": 1.1825, "nll_loss": 1.1065654754638672, "rewards/accuracies": 0.5, "rewards/chosen": -0.09976787865161896, "rewards/margins": 0.027901429682970047, "rewards/rejected": -0.1276693046092987, "step": 3004 }, { "epoch": 1.83315540643587, "grad_norm": 2.123518228530884, "learning_rate": 5.085119412124923e-06, "log_odds_chosen": 1.2123992443084717, "log_odds_ratio": -0.5681841373443604, "logits/chosen": -0.8629352450370789, "logits/rejected": -1.00568687915802, "logps/chosen": -0.9361317157745361, "logps/rejected": -1.7522510290145874, "loss": 0.9639, "nll_loss": 1.0802428722381592, "rewards/accuracies": 0.75, "rewards/chosen": -0.09361317753791809, "rewards/margins": 0.08161193132400513, "rewards/rejected": -0.17522510886192322, "step": 3005 }, { "epoch": 1.833765441512887, "grad_norm": 3.967414379119873, "learning_rate": 5.0841396203306794e-06, "log_odds_chosen": 0.8627570867538452, "log_odds_ratio": -0.6777989268302917, "logits/chosen": -0.8403773307800293, "logits/rejected": -0.890794038772583, "logps/chosen": -0.8896954655647278, "logps/rejected": -1.5286659002304077, "loss": 1.1707, "nll_loss": 1.341338872909546, "rewards/accuracies": 0.625, "rewards/chosen": -0.08896955102682114, "rewards/margins": 0.063897043466568, "rewards/rejected": -0.15286658704280853, "step": 3006 }, { "epoch": 1.834375476589904, "grad_norm": 2.5629703998565674, "learning_rate": 5.0831598285364356e-06, "log_odds_chosen": 1.6146396398544312, "log_odds_ratio": -0.5707791447639465, "logits/chosen": -0.9829496741294861, "logits/rejected": -1.0586308240890503, "logps/chosen": -0.9991370439529419, "logps/rejected": -2.384813070297241, "loss": 0.9894, "nll_loss": 1.1956232786178589, "rewards/accuracies": 0.75, "rewards/chosen": -0.09991370886564255, "rewards/margins": 0.13856759667396545, "rewards/rejected": -0.2384813129901886, "step": 3007 }, { "epoch": 1.834985511666921, "grad_norm": 1.9352511167526245, "learning_rate": 5.0821800367421925e-06, "log_odds_chosen": 2.0210485458374023, "log_odds_ratio": -0.3212203085422516, "logits/chosen": -0.9123569130897522, "logits/rejected": -0.9437052011489868, "logps/chosen": -0.9401422739028931, "logps/rejected": -2.5541157722473145, "loss": 1.0998, "nll_loss": 1.0365371704101562, "rewards/accuracies": 0.875, "rewards/chosen": -0.0940142273902893, "rewards/margins": 0.16139735281467438, "rewards/rejected": -0.2554115951061249, "step": 3008 }, { "epoch": 1.8355955467439378, "grad_norm": 1.6014946699142456, "learning_rate": 5.081200244947949e-06, "log_odds_chosen": 1.9576148986816406, "log_odds_ratio": -0.3667522072792053, "logits/chosen": -0.7504858374595642, "logits/rejected": -0.8429199457168579, "logps/chosen": -0.8435834646224976, "logps/rejected": -2.3523221015930176, "loss": 1.1586, "nll_loss": 0.859542727470398, "rewards/accuracies": 0.75, "rewards/chosen": -0.084358349442482, "rewards/margins": 0.15087386965751648, "rewards/rejected": -0.23523220419883728, "step": 3009 }, { "epoch": 1.8362055818209546, "grad_norm": 1.74567449092865, "learning_rate": 5.080220453153704e-06, "log_odds_chosen": 0.6538408398628235, "log_odds_ratio": -0.5702823400497437, "logits/chosen": -0.6766505241394043, "logits/rejected": -0.836948037147522, "logps/chosen": -0.7671520709991455, "logps/rejected": -1.219835638999939, "loss": 1.0243, "nll_loss": 0.9195014834403992, "rewards/accuracies": 0.625, "rewards/chosen": -0.07671521604061127, "rewards/margins": 0.04526834189891815, "rewards/rejected": -0.12198355793952942, "step": 3010 }, { "epoch": 1.8368156168979717, "grad_norm": 4.693711280822754, "learning_rate": 5.079240661359461e-06, "log_odds_chosen": 2.5320513248443604, "log_odds_ratio": -0.1901015341281891, "logits/chosen": -0.920814037322998, "logits/rejected": -1.0196101665496826, "logps/chosen": -0.6286860704421997, "logps/rejected": -2.5496625900268555, "loss": 1.0907, "nll_loss": 1.0429718494415283, "rewards/accuracies": 1.0, "rewards/chosen": -0.06286860257387161, "rewards/margins": 0.19209764897823334, "rewards/rejected": -0.25496625900268555, "step": 3011 }, { "epoch": 1.8374256519749885, "grad_norm": 1.5466289520263672, "learning_rate": 5.078260869565217e-06, "log_odds_chosen": 2.901353359222412, "log_odds_ratio": -0.21610118448734283, "logits/chosen": -0.7990923523902893, "logits/rejected": -0.9168449640274048, "logps/chosen": -0.656634509563446, "logps/rejected": -2.946192741394043, "loss": 1.0511, "nll_loss": 0.8459024429321289, "rewards/accuracies": 0.875, "rewards/chosen": -0.06566344946622849, "rewards/margins": 0.22895580530166626, "rewards/rejected": -0.29461926221847534, "step": 3012 }, { "epoch": 1.8380356870520056, "grad_norm": 9.265486717224121, "learning_rate": 5.077281077770974e-06, "log_odds_chosen": 1.6396981477737427, "log_odds_ratio": -0.5054362416267395, "logits/chosen": -0.9019023776054382, "logits/rejected": -0.9526298642158508, "logps/chosen": -0.8366364240646362, "logps/rejected": -2.0971410274505615, "loss": 1.1984, "nll_loss": 1.24265456199646, "rewards/accuracies": 0.75, "rewards/chosen": -0.08366364985704422, "rewards/margins": 0.1260504573583603, "rewards/rejected": -0.2097141146659851, "step": 3013 }, { "epoch": 1.8386457221290224, "grad_norm": 7.171846389770508, "learning_rate": 5.07630128597673e-06, "log_odds_chosen": 0.27150675654411316, "log_odds_ratio": -0.5991933941841125, "logits/chosen": -0.9197561144828796, "logits/rejected": -0.9414098262786865, "logps/chosen": -1.0236142873764038, "logps/rejected": -1.2262853384017944, "loss": 1.1579, "nll_loss": 1.2270429134368896, "rewards/accuracies": 0.625, "rewards/chosen": -0.10236142575740814, "rewards/margins": 0.020267104730010033, "rewards/rejected": -0.12262853980064392, "step": 3014 }, { "epoch": 1.8392557572060393, "grad_norm": 4.517622947692871, "learning_rate": 5.075321494182485e-06, "log_odds_chosen": 2.1770620346069336, "log_odds_ratio": -0.268943727016449, "logits/chosen": -1.0094457864761353, "logits/rejected": -1.069291114807129, "logps/chosen": -0.8511171340942383, "logps/rejected": -2.5985660552978516, "loss": 1.0839, "nll_loss": 1.0614330768585205, "rewards/accuracies": 0.875, "rewards/chosen": -0.08511170744895935, "rewards/margins": 0.17474490404129028, "rewards/rejected": -0.25985661149024963, "step": 3015 }, { "epoch": 1.8398657922830561, "grad_norm": 3.46014666557312, "learning_rate": 5.074341702388242e-06, "log_odds_chosen": 1.1975854635238647, "log_odds_ratio": -0.33387160301208496, "logits/chosen": -1.0593698024749756, "logits/rejected": -0.9990262389183044, "logps/chosen": -1.012239694595337, "logps/rejected": -1.8983701467514038, "loss": 1.0924, "nll_loss": 1.124783992767334, "rewards/accuracies": 1.0, "rewards/chosen": -0.10122396051883698, "rewards/margins": 0.08861306309700012, "rewards/rejected": -0.1898370236158371, "step": 3016 }, { "epoch": 1.8404758273600732, "grad_norm": 1.4367693662643433, "learning_rate": 5.0733619105939985e-06, "log_odds_chosen": 1.4161834716796875, "log_odds_ratio": -0.47980743646621704, "logits/chosen": -0.7129634022712708, "logits/rejected": -0.8231196999549866, "logps/chosen": -0.9771373271942139, "logps/rejected": -2.11716365814209, "loss": 1.1106, "nll_loss": 0.9188846349716187, "rewards/accuracies": 0.625, "rewards/chosen": -0.09771373122930527, "rewards/margins": 0.11400263756513596, "rewards/rejected": -0.21171635389328003, "step": 3017 }, { "epoch": 1.8410858624370903, "grad_norm": 1.230628252029419, "learning_rate": 5.072382118799755e-06, "log_odds_chosen": 0.6859130263328552, "log_odds_ratio": -0.6797612905502319, "logits/chosen": -0.7405944466590881, "logits/rejected": -0.7268997430801392, "logps/chosen": -0.787199854850769, "logps/rejected": -1.329554557800293, "loss": 1.0732, "nll_loss": 0.9511052370071411, "rewards/accuracies": 0.5, "rewards/chosen": -0.07871998846530914, "rewards/margins": 0.05423547327518463, "rewards/rejected": -0.13295546174049377, "step": 3018 }, { "epoch": 1.8416958975141071, "grad_norm": 5.467814922332764, "learning_rate": 5.0714023270055116e-06, "log_odds_chosen": 1.0396754741668701, "log_odds_ratio": -0.5210738182067871, "logits/chosen": -1.0016883611679077, "logits/rejected": -1.0292547941207886, "logps/chosen": -0.9850153923034668, "logps/rejected": -1.8101472854614258, "loss": 1.1881, "nll_loss": 1.0619968175888062, "rewards/accuracies": 0.625, "rewards/chosen": -0.0985015407204628, "rewards/margins": 0.08251319080591202, "rewards/rejected": -0.18101473152637482, "step": 3019 }, { "epoch": 1.842305932591124, "grad_norm": 1.1777561902999878, "learning_rate": 5.070422535211268e-06, "log_odds_chosen": 0.7338858246803284, "log_odds_ratio": -0.6115418672561646, "logits/chosen": -0.9163930416107178, "logits/rejected": -0.9182088375091553, "logps/chosen": -0.9869023561477661, "logps/rejected": -1.5824120044708252, "loss": 1.1692, "nll_loss": 1.165074348449707, "rewards/accuracies": 0.625, "rewards/chosen": -0.09869024157524109, "rewards/margins": 0.059550970792770386, "rewards/rejected": -0.15824121236801147, "step": 3020 }, { "epoch": 1.8429159676681408, "grad_norm": 1.5123928785324097, "learning_rate": 5.069442743417023e-06, "log_odds_chosen": 1.1436938047409058, "log_odds_ratio": -0.444936603307724, "logits/chosen": -0.6156934499740601, "logits/rejected": -0.665770411491394, "logps/chosen": -0.7316117286682129, "logps/rejected": -1.3836535215377808, "loss": 0.817, "nll_loss": 0.9053364992141724, "rewards/accuracies": 0.875, "rewards/chosen": -0.07316116988658905, "rewards/margins": 0.0652041882276535, "rewards/rejected": -0.13836535811424255, "step": 3021 }, { "epoch": 1.8435260027451579, "grad_norm": 1.1821212768554688, "learning_rate": 5.06846295162278e-06, "log_odds_chosen": 1.8503634929656982, "log_odds_ratio": -0.5049134492874146, "logits/chosen": -0.7413020133972168, "logits/rejected": -0.7025440335273743, "logps/chosen": -0.9146373271942139, "logps/rejected": -2.3076086044311523, "loss": 0.9539, "nll_loss": 1.1249946355819702, "rewards/accuracies": 0.75, "rewards/chosen": -0.09146372973918915, "rewards/margins": 0.13929715752601624, "rewards/rejected": -0.2307608723640442, "step": 3022 }, { "epoch": 1.8441360378221747, "grad_norm": 1.998679757118225, "learning_rate": 5.067483159828536e-06, "log_odds_chosen": 0.38342463970184326, "log_odds_ratio": -0.6763701438903809, "logits/chosen": -1.0646657943725586, "logits/rejected": -1.009609341621399, "logps/chosen": -0.9354146122932434, "logps/rejected": -1.2829692363739014, "loss": 1.2039, "nll_loss": 1.1372311115264893, "rewards/accuracies": 0.625, "rewards/chosen": -0.0935414656996727, "rewards/margins": 0.03475547209382057, "rewards/rejected": -0.12829694151878357, "step": 3023 }, { "epoch": 1.8447460728991918, "grad_norm": 3.0035924911499023, "learning_rate": 5.066503368034292e-06, "log_odds_chosen": 2.2298245429992676, "log_odds_ratio": -0.4280835688114166, "logits/chosen": -0.8180320858955383, "logits/rejected": -0.7687985301017761, "logps/chosen": -0.8066033720970154, "logps/rejected": -2.318979263305664, "loss": 0.986, "nll_loss": 0.9668058156967163, "rewards/accuracies": 0.75, "rewards/chosen": -0.08066034317016602, "rewards/margins": 0.1512375771999359, "rewards/rejected": -0.23189792037010193, "step": 3024 }, { "epoch": 1.8453561079762086, "grad_norm": 1.0138039588928223, "learning_rate": 5.065523576240049e-06, "log_odds_chosen": 2.0072684288024902, "log_odds_ratio": -0.33191901445388794, "logits/chosen": -0.664387583732605, "logits/rejected": -0.7366880178451538, "logps/chosen": -0.7155565619468689, "logps/rejected": -2.1789040565490723, "loss": 0.8745, "nll_loss": 0.8243201971054077, "rewards/accuracies": 0.875, "rewards/chosen": -0.07155565917491913, "rewards/margins": 0.14633473753929138, "rewards/rejected": -0.21789038181304932, "step": 3025 }, { "epoch": 1.8459661430532255, "grad_norm": 1.0203485488891602, "learning_rate": 5.064543784445805e-06, "log_odds_chosen": 1.1013989448547363, "log_odds_ratio": -0.5871909260749817, "logits/chosen": -0.8570480942726135, "logits/rejected": -0.7864934206008911, "logps/chosen": -0.7603434324264526, "logps/rejected": -1.5641669034957886, "loss": 1.0144, "nll_loss": 0.8936570882797241, "rewards/accuracies": 0.5, "rewards/chosen": -0.07603433728218079, "rewards/margins": 0.08038235455751419, "rewards/rejected": -0.15641669929027557, "step": 3026 }, { "epoch": 1.8465761781302423, "grad_norm": 1.4499753713607788, "learning_rate": 5.063563992651561e-06, "log_odds_chosen": 0.8313055038452148, "log_odds_ratio": -0.6461098194122314, "logits/chosen": -0.8462370038032532, "logits/rejected": -0.9052601456642151, "logps/chosen": -0.8546938300132751, "logps/rejected": -1.5106520652770996, "loss": 1.1128, "nll_loss": 0.9577297568321228, "rewards/accuracies": 0.375, "rewards/chosen": -0.08546938002109528, "rewards/margins": 0.0655958354473114, "rewards/rejected": -0.15106523036956787, "step": 3027 }, { "epoch": 1.8471862132072594, "grad_norm": 2.0091588497161865, "learning_rate": 5.0625842008573175e-06, "log_odds_chosen": 1.037306308746338, "log_odds_ratio": -0.5381232500076294, "logits/chosen": -0.840961217880249, "logits/rejected": -0.9211387634277344, "logps/chosen": -0.8726826906204224, "logps/rejected": -1.5976828336715698, "loss": 1.1, "nll_loss": 1.131192684173584, "rewards/accuracies": 0.75, "rewards/chosen": -0.08726827055215836, "rewards/margins": 0.07250000536441803, "rewards/rejected": -0.15976828336715698, "step": 3028 }, { "epoch": 1.8477962482842765, "grad_norm": 1.3032320737838745, "learning_rate": 5.061604409063074e-06, "log_odds_chosen": 1.6861008405685425, "log_odds_ratio": -0.38189268112182617, "logits/chosen": -0.7927966117858887, "logits/rejected": -0.8835961818695068, "logps/chosen": -0.7390729784965515, "logps/rejected": -1.971135139465332, "loss": 0.9531, "nll_loss": 1.2194437980651855, "rewards/accuracies": 0.875, "rewards/chosen": -0.07390730082988739, "rewards/margins": 0.12320621311664581, "rewards/rejected": -0.1971135139465332, "step": 3029 }, { "epoch": 1.8484062833612933, "grad_norm": 1.282676339149475, "learning_rate": 5.060624617268831e-06, "log_odds_chosen": 0.6675347089767456, "log_odds_ratio": -0.6211506128311157, "logits/chosen": -0.905949056148529, "logits/rejected": -0.830947756767273, "logps/chosen": -1.0379830598831177, "logps/rejected": -1.5884687900543213, "loss": 1.2848, "nll_loss": 1.2387112379074097, "rewards/accuracies": 0.5, "rewards/chosen": -0.10379830747842789, "rewards/margins": 0.055048566311597824, "rewards/rejected": -0.1588468700647354, "step": 3030 }, { "epoch": 1.8490163184383102, "grad_norm": 1.5136891603469849, "learning_rate": 5.059644825474587e-06, "log_odds_chosen": 2.8316075801849365, "log_odds_ratio": -0.22933095693588257, "logits/chosen": -0.6287264823913574, "logits/rejected": -0.6843090057373047, "logps/chosen": -0.5464013814926147, "logps/rejected": -2.6196587085723877, "loss": 0.9119, "nll_loss": 0.7686491012573242, "rewards/accuracies": 0.875, "rewards/chosen": -0.05464014410972595, "rewards/margins": 0.20732572674751282, "rewards/rejected": -0.26196587085723877, "step": 3031 }, { "epoch": 1.849626353515327, "grad_norm": 7.155033111572266, "learning_rate": 5.058665033680343e-06, "log_odds_chosen": 1.3673324584960938, "log_odds_ratio": -0.6468624472618103, "logits/chosen": -0.8614370822906494, "logits/rejected": -0.7776086926460266, "logps/chosen": -0.9643814563751221, "logps/rejected": -2.128802537918091, "loss": 0.9673, "nll_loss": 0.884160578250885, "rewards/accuracies": 0.625, "rewards/chosen": -0.09643814712762833, "rewards/margins": 0.11644209921360016, "rewards/rejected": -0.21288025379180908, "step": 3032 }, { "epoch": 1.850236388592344, "grad_norm": 1.9219892024993896, "learning_rate": 5.057685241886099e-06, "log_odds_chosen": 1.777302861213684, "log_odds_ratio": -0.3057252764701843, "logits/chosen": -0.7923510670661926, "logits/rejected": -0.7367969751358032, "logps/chosen": -0.7511862516403198, "logps/rejected": -2.0211470127105713, "loss": 1.0094, "nll_loss": 0.9542790651321411, "rewards/accuracies": 1.0, "rewards/chosen": -0.07511862367391586, "rewards/margins": 0.12699607014656067, "rewards/rejected": -0.20211470127105713, "step": 3033 }, { "epoch": 1.8508464236693611, "grad_norm": 4.278196811676025, "learning_rate": 5.056705450091855e-06, "log_odds_chosen": 1.3436359167099, "log_odds_ratio": -0.3964274525642395, "logits/chosen": -0.8223505020141602, "logits/rejected": -0.8251715302467346, "logps/chosen": -0.8723235726356506, "logps/rejected": -1.9550800323486328, "loss": 1.0705, "nll_loss": 1.070104718208313, "rewards/accuracies": 0.875, "rewards/chosen": -0.08723235875368118, "rewards/margins": 0.1082756444811821, "rewards/rejected": -0.19550800323486328, "step": 3034 }, { "epoch": 1.851456458746378, "grad_norm": 1.1698060035705566, "learning_rate": 5.055725658297611e-06, "log_odds_chosen": 0.2506679892539978, "log_odds_ratio": -0.636601448059082, "logits/chosen": -1.0186511278152466, "logits/rejected": -0.7767103910446167, "logps/chosen": -0.955335259437561, "logps/rejected": -1.1220896244049072, "loss": 1.0083, "nll_loss": 0.9788526892662048, "rewards/accuracies": 0.625, "rewards/chosen": -0.09553353488445282, "rewards/margins": 0.0166754312813282, "rewards/rejected": -0.11220897734165192, "step": 3035 }, { "epoch": 1.8520664938233948, "grad_norm": 3.671557903289795, "learning_rate": 5.054745866503368e-06, "log_odds_chosen": 2.4562439918518066, "log_odds_ratio": -0.4758749008178711, "logits/chosen": -0.6777955293655396, "logits/rejected": -0.8219020366668701, "logps/chosen": -0.7972472310066223, "logps/rejected": -2.946927070617676, "loss": 1.0712, "nll_loss": 0.9084374308586121, "rewards/accuracies": 0.75, "rewards/chosen": -0.07972472906112671, "rewards/margins": 0.2149680256843567, "rewards/rejected": -0.294692724943161, "step": 3036 }, { "epoch": 1.8526765289004117, "grad_norm": 3.101372480392456, "learning_rate": 5.053766074709124e-06, "log_odds_chosen": 0.7929514646530151, "log_odds_ratio": -0.5336900949478149, "logits/chosen": -0.7478688359260559, "logits/rejected": -0.6623824834823608, "logps/chosen": -0.8568079471588135, "logps/rejected": -1.445236086845398, "loss": 1.1115, "nll_loss": 0.9897908568382263, "rewards/accuracies": 0.625, "rewards/chosen": -0.08568079769611359, "rewards/margins": 0.05884281545877457, "rewards/rejected": -0.14452362060546875, "step": 3037 }, { "epoch": 1.8532865639774287, "grad_norm": 1.825990080833435, "learning_rate": 5.05278628291488e-06, "log_odds_chosen": 2.2196173667907715, "log_odds_ratio": -0.39016032218933105, "logits/chosen": -0.878081202507019, "logits/rejected": -0.853940486907959, "logps/chosen": -0.7743427157402039, "logps/rejected": -2.621798515319824, "loss": 1.1109, "nll_loss": 0.9425161480903625, "rewards/accuracies": 0.75, "rewards/chosen": -0.07743427157402039, "rewards/margins": 0.18474559485912323, "rewards/rejected": -0.2621798515319824, "step": 3038 }, { "epoch": 1.8538965990544456, "grad_norm": 1.5968928337097168, "learning_rate": 5.0518064911206366e-06, "log_odds_chosen": 0.1721574366092682, "log_odds_ratio": -0.6773583889007568, "logits/chosen": -0.9980648756027222, "logits/rejected": -0.8983988761901855, "logps/chosen": -1.0285871028900146, "logps/rejected": -1.154665231704712, "loss": 1.0491, "nll_loss": 1.0945767164230347, "rewards/accuracies": 0.5, "rewards/chosen": -0.10285870730876923, "rewards/margins": 0.012607818469405174, "rewards/rejected": -0.11546652764081955, "step": 3039 }, { "epoch": 1.8545066341314627, "grad_norm": 1.9407243728637695, "learning_rate": 5.050826699326393e-06, "log_odds_chosen": 2.5336387157440186, "log_odds_ratio": -0.4392881989479065, "logits/chosen": -0.8900256752967834, "logits/rejected": -0.7606527805328369, "logps/chosen": -0.8480169177055359, "logps/rejected": -2.8040056228637695, "loss": 1.113, "nll_loss": 1.0209087133407593, "rewards/accuracies": 0.75, "rewards/chosen": -0.08480168879032135, "rewards/margins": 0.19559890031814575, "rewards/rejected": -0.2804005742073059, "step": 3040 }, { "epoch": 1.8551166692084795, "grad_norm": 3.740570545196533, "learning_rate": 5.04984690753215e-06, "log_odds_chosen": 0.6085376739501953, "log_odds_ratio": -0.6688642501831055, "logits/chosen": -0.9847375154495239, "logits/rejected": -0.8960614800453186, "logps/chosen": -0.8607829213142395, "logps/rejected": -1.2815252542495728, "loss": 1.0065, "nll_loss": 1.0281376838684082, "rewards/accuracies": 0.25, "rewards/chosen": -0.08607830107212067, "rewards/margins": 0.04207422584295273, "rewards/rejected": -0.1281525194644928, "step": 3041 }, { "epoch": 1.8557267042854964, "grad_norm": 1.8737130165100098, "learning_rate": 5.048867115737906e-06, "log_odds_chosen": 0.6185060739517212, "log_odds_ratio": -0.7225363254547119, "logits/chosen": -0.9797121286392212, "logits/rejected": -0.9842094779014587, "logps/chosen": -0.8435788154602051, "logps/rejected": -1.4906857013702393, "loss": 1.0485, "nll_loss": 1.0161900520324707, "rewards/accuracies": 0.75, "rewards/chosen": -0.08435788005590439, "rewards/margins": 0.06471069157123566, "rewards/rejected": -0.14906857907772064, "step": 3042 }, { "epoch": 1.8563367393625132, "grad_norm": 2.46337628364563, "learning_rate": 5.047887323943662e-06, "log_odds_chosen": 0.7854076623916626, "log_odds_ratio": -0.5892115831375122, "logits/chosen": -0.9852485656738281, "logits/rejected": -1.0112031698226929, "logps/chosen": -0.7920181751251221, "logps/rejected": -1.3567708730697632, "loss": 0.9999, "nll_loss": 1.0543558597564697, "rewards/accuracies": 0.625, "rewards/chosen": -0.0792018249630928, "rewards/margins": 0.05647527053952217, "rewards/rejected": -0.13567709922790527, "step": 3043 }, { "epoch": 1.8569467744395303, "grad_norm": 1.0957664251327515, "learning_rate": 5.046907532149418e-06, "log_odds_chosen": 0.6277957558631897, "log_odds_ratio": -0.5202026963233948, "logits/chosen": -0.8988180160522461, "logits/rejected": -0.8589369058609009, "logps/chosen": -0.6384439468383789, "logps/rejected": -1.0134131908416748, "loss": 1.0067, "nll_loss": 0.8965312242507935, "rewards/accuracies": 0.875, "rewards/chosen": -0.06384439021348953, "rewards/margins": 0.03749692812561989, "rewards/rejected": -0.10134131461381912, "step": 3044 }, { "epoch": 1.8575568095165473, "grad_norm": 3.2350375652313232, "learning_rate": 5.045927740355174e-06, "log_odds_chosen": 2.4438207149505615, "log_odds_ratio": -0.4030582904815674, "logits/chosen": -0.7525489330291748, "logits/rejected": -0.7790641784667969, "logps/chosen": -0.6956459283828735, "logps/rejected": -2.6639394760131836, "loss": 0.9238, "nll_loss": 0.7967538237571716, "rewards/accuracies": 0.625, "rewards/chosen": -0.06956459581851959, "rewards/margins": 0.19682937860488892, "rewards/rejected": -0.2663939595222473, "step": 3045 }, { "epoch": 1.8581668445935642, "grad_norm": 1.9294383525848389, "learning_rate": 5.04494794856093e-06, "log_odds_chosen": 3.3176283836364746, "log_odds_ratio": -0.22383293509483337, "logits/chosen": -1.1015732288360596, "logits/rejected": -1.187252163887024, "logps/chosen": -0.8162018060684204, "logps/rejected": -3.523367166519165, "loss": 1.0967, "nll_loss": 1.2434481382369995, "rewards/accuracies": 0.875, "rewards/chosen": -0.08162017911672592, "rewards/margins": 0.27071651816368103, "rewards/rejected": -0.35233670473098755, "step": 3046 }, { "epoch": 1.858776879670581, "grad_norm": 2.4533469676971436, "learning_rate": 5.043968156766687e-06, "log_odds_chosen": 0.6472408175468445, "log_odds_ratio": -0.6467485427856445, "logits/chosen": -0.8549516797065735, "logits/rejected": -0.8655805587768555, "logps/chosen": -1.057373046875, "logps/rejected": -1.5552418231964111, "loss": 1.0459, "nll_loss": 1.07627272605896, "rewards/accuracies": 0.625, "rewards/chosen": -0.10573731362819672, "rewards/margins": 0.04978685826063156, "rewards/rejected": -0.15552416443824768, "step": 3047 }, { "epoch": 1.8593869147475979, "grad_norm": 1.4291110038757324, "learning_rate": 5.042988364972443e-06, "log_odds_chosen": 2.3080825805664062, "log_odds_ratio": -0.42535486817359924, "logits/chosen": -0.9701317548751831, "logits/rejected": -1.0070924758911133, "logps/chosen": -0.9137327671051025, "logps/rejected": -2.6779701709747314, "loss": 1.11, "nll_loss": 1.0878010988235474, "rewards/accuracies": 0.75, "rewards/chosen": -0.09137327969074249, "rewards/margins": 0.17642372846603394, "rewards/rejected": -0.2677970230579376, "step": 3048 }, { "epoch": 1.859996949824615, "grad_norm": 1.719043254852295, "learning_rate": 5.0420085731781995e-06, "log_odds_chosen": 1.9377392530441284, "log_odds_ratio": -0.43991801142692566, "logits/chosen": -0.8684865832328796, "logits/rejected": -0.9512128829956055, "logps/chosen": -0.7006718516349792, "logps/rejected": -2.164924144744873, "loss": 1.0732, "nll_loss": 0.9999555349349976, "rewards/accuracies": 0.875, "rewards/chosen": -0.07006718218326569, "rewards/margins": 0.14642523229122162, "rewards/rejected": -0.2164924144744873, "step": 3049 }, { "epoch": 1.8606069849016318, "grad_norm": 1.2982127666473389, "learning_rate": 5.041028781383956e-06, "log_odds_chosen": 1.7264931201934814, "log_odds_ratio": -0.4839388132095337, "logits/chosen": -0.9463480710983276, "logits/rejected": -0.9185802936553955, "logps/chosen": -0.6898120045661926, "logps/rejected": -2.0772364139556885, "loss": 1.2777, "nll_loss": 1.202204704284668, "rewards/accuracies": 0.625, "rewards/chosen": -0.06898120045661926, "rewards/margins": 0.13874243199825287, "rewards/rejected": -0.20772363245487213, "step": 3050 }, { "epoch": 1.8612170199786489, "grad_norm": 1.533760905265808, "learning_rate": 5.040048989589712e-06, "log_odds_chosen": 1.09413480758667, "log_odds_ratio": -0.4700016677379608, "logits/chosen": -0.6154019236564636, "logits/rejected": -0.7742198705673218, "logps/chosen": -0.7451950311660767, "logps/rejected": -1.5417957305908203, "loss": 1.0336, "nll_loss": 0.9162818789482117, "rewards/accuracies": 0.625, "rewards/chosen": -0.07451950013637543, "rewards/margins": 0.07966006547212601, "rewards/rejected": -0.15417957305908203, "step": 3051 }, { "epoch": 1.8618270550556657, "grad_norm": 2.908468723297119, "learning_rate": 5.039069197795468e-06, "log_odds_chosen": 0.5108066201210022, "log_odds_ratio": -0.53066486120224, "logits/chosen": -0.9540765285491943, "logits/rejected": -0.8417340517044067, "logps/chosen": -1.0344775915145874, "logps/rejected": -1.426945447921753, "loss": 1.1695, "nll_loss": 1.1282130479812622, "rewards/accuracies": 0.625, "rewards/chosen": -0.10344775021076202, "rewards/margins": 0.03924679011106491, "rewards/rejected": -0.14269454777240753, "step": 3052 }, { "epoch": 1.8624370901326825, "grad_norm": 1.2760519981384277, "learning_rate": 5.038089406001225e-06, "log_odds_chosen": 0.2020975947380066, "log_odds_ratio": -0.6751426458358765, "logits/chosen": -0.9442094564437866, "logits/rejected": -0.8926259875297546, "logps/chosen": -0.9017063975334167, "logps/rejected": -1.0038586854934692, "loss": 1.0824, "nll_loss": 1.171350121498108, "rewards/accuracies": 0.5, "rewards/chosen": -0.09017064422369003, "rewards/margins": 0.010215231217443943, "rewards/rejected": -0.1003858745098114, "step": 3053 }, { "epoch": 1.8630471252096994, "grad_norm": 1.3727787733078003, "learning_rate": 5.037109614206981e-06, "log_odds_chosen": 3.5764729976654053, "log_odds_ratio": -0.12982964515686035, "logits/chosen": -0.5455223321914673, "logits/rejected": -0.6635432839393616, "logps/chosen": -0.6852380037307739, "logps/rejected": -3.5528907775878906, "loss": 0.9828, "nll_loss": 0.7131038904190063, "rewards/accuracies": 0.875, "rewards/chosen": -0.06852380186319351, "rewards/margins": 0.28676527738571167, "rewards/rejected": -0.355289101600647, "step": 3054 }, { "epoch": 1.8636571602867165, "grad_norm": 1.0238083600997925, "learning_rate": 5.036129822412737e-06, "log_odds_chosen": 1.784414529800415, "log_odds_ratio": -0.45621970295906067, "logits/chosen": -0.680909276008606, "logits/rejected": -0.8550099730491638, "logps/chosen": -0.6631513833999634, "logps/rejected": -2.029780387878418, "loss": 0.9943, "nll_loss": 0.8906533718109131, "rewards/accuracies": 0.75, "rewards/chosen": -0.06631514430046082, "rewards/margins": 0.13666290044784546, "rewards/rejected": -0.20297804474830627, "step": 3055 }, { "epoch": 1.8642671953637335, "grad_norm": 1.6160807609558105, "learning_rate": 5.035150030618493e-06, "log_odds_chosen": 1.0354558229446411, "log_odds_ratio": -0.5752743482589722, "logits/chosen": -0.7110251188278198, "logits/rejected": -0.8609368801116943, "logps/chosen": -0.7181642055511475, "logps/rejected": -1.430101990699768, "loss": 1.0523, "nll_loss": 0.8391668796539307, "rewards/accuracies": 0.375, "rewards/chosen": -0.07181642204523087, "rewards/margins": 0.07119378447532654, "rewards/rejected": -0.1430101990699768, "step": 3056 }, { "epoch": 1.8648772304407504, "grad_norm": 1.7625982761383057, "learning_rate": 5.034170238824249e-06, "log_odds_chosen": 0.644830584526062, "log_odds_ratio": -0.5945575833320618, "logits/chosen": -0.7345211505889893, "logits/rejected": -0.6900432705879211, "logps/chosen": -0.8986930847167969, "logps/rejected": -1.4458674192428589, "loss": 1.0617, "nll_loss": 0.9189138412475586, "rewards/accuracies": 0.875, "rewards/chosen": -0.08986930549144745, "rewards/margins": 0.05471744388341904, "rewards/rejected": -0.1445867419242859, "step": 3057 }, { "epoch": 1.8654872655177672, "grad_norm": 7.432365417480469, "learning_rate": 5.033190447030006e-06, "log_odds_chosen": 0.7714844346046448, "log_odds_ratio": -0.5460910797119141, "logits/chosen": -0.9023177027702332, "logits/rejected": -0.8134728074073792, "logps/chosen": -0.8950166702270508, "logps/rejected": -1.5814870595932007, "loss": 1.1522, "nll_loss": 1.0205384492874146, "rewards/accuracies": 0.5, "rewards/chosen": -0.08950166404247284, "rewards/margins": 0.06864702701568604, "rewards/rejected": -0.15814870595932007, "step": 3058 }, { "epoch": 1.866097300594784, "grad_norm": 1.0651148557662964, "learning_rate": 5.032210655235762e-06, "log_odds_chosen": 0.05068434774875641, "log_odds_ratio": -0.7400567531585693, "logits/chosen": -0.813219428062439, "logits/rejected": -0.6489352583885193, "logps/chosen": -0.8290866017341614, "logps/rejected": -0.8052581548690796, "loss": 1.0726, "nll_loss": 1.0249723196029663, "rewards/accuracies": 0.625, "rewards/chosen": -0.08290866017341614, "rewards/margins": -0.002382843755185604, "rewards/rejected": -0.08052581548690796, "step": 3059 }, { "epoch": 1.8667073356718011, "grad_norm": 2.1303560733795166, "learning_rate": 5.0312308634415185e-06, "log_odds_chosen": 1.0683560371398926, "log_odds_ratio": -0.533109188079834, "logits/chosen": -1.0255111455917358, "logits/rejected": -1.0041905641555786, "logps/chosen": -0.947597861289978, "logps/rejected": -1.8190157413482666, "loss": 1.1401, "nll_loss": 1.1430318355560303, "rewards/accuracies": 0.75, "rewards/chosen": -0.09475979208946228, "rewards/margins": 0.08714177459478378, "rewards/rejected": -0.18190157413482666, "step": 3060 }, { "epoch": 1.8673173707488182, "grad_norm": 4.586140155792236, "learning_rate": 5.030251071647275e-06, "log_odds_chosen": 0.21661683917045593, "log_odds_ratio": -0.8149321675300598, "logits/chosen": -0.7526832818984985, "logits/rejected": -0.7827191352844238, "logps/chosen": -1.0817234516143799, "logps/rejected": -1.2399730682373047, "loss": 0.9608, "nll_loss": 0.9032350778579712, "rewards/accuracies": 0.75, "rewards/chosen": -0.10817234218120575, "rewards/margins": 0.01582496613264084, "rewards/rejected": -0.12399730831384659, "step": 3061 }, { "epoch": 1.867927405825835, "grad_norm": 1.608465313911438, "learning_rate": 5.029271279853031e-06, "log_odds_chosen": 1.036215901374817, "log_odds_ratio": -0.4798668622970581, "logits/chosen": -0.8295052647590637, "logits/rejected": -0.7834423184394836, "logps/chosen": -0.9417099952697754, "logps/rejected": -1.4408859014511108, "loss": 1.1644, "nll_loss": 1.1157772541046143, "rewards/accuracies": 0.625, "rewards/chosen": -0.09417100250720978, "rewards/margins": 0.04991758614778519, "rewards/rejected": -0.14408859610557556, "step": 3062 }, { "epoch": 1.868537440902852, "grad_norm": 1.298065423965454, "learning_rate": 5.028291488058787e-06, "log_odds_chosen": 3.971240520477295, "log_odds_ratio": -0.07379348576068878, "logits/chosen": -0.4867945611476898, "logits/rejected": -0.8699672818183899, "logps/chosen": -0.48251983523368835, "logps/rejected": -3.4262378215789795, "loss": 0.7807, "nll_loss": 0.5681151747703552, "rewards/accuracies": 1.0, "rewards/chosen": -0.048251986503601074, "rewards/margins": 0.2943717837333679, "rewards/rejected": -0.3426238000392914, "step": 3063 }, { "epoch": 1.8691474759798687, "grad_norm": 1.6554330587387085, "learning_rate": 5.027311696264544e-06, "log_odds_chosen": 1.2698256969451904, "log_odds_ratio": -0.48128241300582886, "logits/chosen": -0.8637083768844604, "logits/rejected": -0.9370256662368774, "logps/chosen": -0.5412660241127014, "logps/rejected": -1.2170629501342773, "loss": 1.015, "nll_loss": 1.1074714660644531, "rewards/accuracies": 0.75, "rewards/chosen": -0.05412659794092178, "rewards/margins": 0.06757969409227371, "rewards/rejected": -0.1217062920331955, "step": 3064 }, { "epoch": 1.8697575110568858, "grad_norm": 1.6985712051391602, "learning_rate": 5.0263319044703e-06, "log_odds_chosen": 0.1376495659351349, "log_odds_ratio": -0.7068400382995605, "logits/chosen": -0.8252453804016113, "logits/rejected": -0.7440705299377441, "logps/chosen": -0.9821791052818298, "logps/rejected": -1.0110145807266235, "loss": 1.0984, "nll_loss": 1.0281853675842285, "rewards/accuracies": 0.625, "rewards/chosen": -0.0982179120182991, "rewards/margins": 0.002883540466427803, "rewards/rejected": -0.10110145807266235, "step": 3065 }, { "epoch": 1.8703675461339027, "grad_norm": 1.0849140882492065, "learning_rate": 5.025352112676057e-06, "log_odds_chosen": 2.168200731277466, "log_odds_ratio": -0.3702467083930969, "logits/chosen": -0.8071864247322083, "logits/rejected": -0.8271828889846802, "logps/chosen": -0.784838855266571, "logps/rejected": -2.3539841175079346, "loss": 1.0381, "nll_loss": 1.1232428550720215, "rewards/accuracies": 0.625, "rewards/chosen": -0.07848387956619263, "rewards/margins": 0.15691453218460083, "rewards/rejected": -0.23539841175079346, "step": 3066 }, { "epoch": 1.8709775812109197, "grad_norm": 3.606569528579712, "learning_rate": 5.024372320881812e-06, "log_odds_chosen": 0.8631864190101624, "log_odds_ratio": -0.689984917640686, "logits/chosen": -0.8802719712257385, "logits/rejected": -0.791390061378479, "logps/chosen": -0.8936011791229248, "logps/rejected": -1.578566312789917, "loss": 0.9631, "nll_loss": 1.1092253923416138, "rewards/accuracies": 0.375, "rewards/chosen": -0.08936011791229248, "rewards/margins": 0.06849651038646698, "rewards/rejected": -0.15785662829875946, "step": 3067 }, { "epoch": 1.8715876162879366, "grad_norm": 1.3822964429855347, "learning_rate": 5.023392529087568e-06, "log_odds_chosen": 0.6471914052963257, "log_odds_ratio": -0.5660396814346313, "logits/chosen": -0.9282853603363037, "logits/rejected": -0.918527364730835, "logps/chosen": -0.7859587669372559, "logps/rejected": -1.2502368688583374, "loss": 1.0875, "nll_loss": 0.9586237072944641, "rewards/accuracies": 0.625, "rewards/chosen": -0.07859588414430618, "rewards/margins": 0.046427808701992035, "rewards/rejected": -0.12502369284629822, "step": 3068 }, { "epoch": 1.8721976513649534, "grad_norm": 3.615614891052246, "learning_rate": 5.022412737293325e-06, "log_odds_chosen": 2.572971820831299, "log_odds_ratio": -0.16616666316986084, "logits/chosen": -0.7614405751228333, "logits/rejected": -0.9423866271972656, "logps/chosen": -0.6934431195259094, "logps/rejected": -2.6567702293395996, "loss": 0.9518, "nll_loss": 0.871392548084259, "rewards/accuracies": 1.0, "rewards/chosen": -0.06934431195259094, "rewards/margins": 0.19633272290229797, "rewards/rejected": -0.2656770348548889, "step": 3069 }, { "epoch": 1.8728076864419703, "grad_norm": 1.4465070962905884, "learning_rate": 5.021432945499081e-06, "log_odds_chosen": 0.0816081166267395, "log_odds_ratio": -0.7315609455108643, "logits/chosen": -0.9043357372283936, "logits/rejected": -0.781208872795105, "logps/chosen": -0.9436861276626587, "logps/rejected": -1.0332072973251343, "loss": 1.1561, "nll_loss": 1.2307912111282349, "rewards/accuracies": 0.375, "rewards/chosen": -0.09436860680580139, "rewards/margins": 0.008952120319008827, "rewards/rejected": -0.10332073271274567, "step": 3070 }, { "epoch": 1.8734177215189873, "grad_norm": 1.740090250968933, "learning_rate": 5.0204531537048375e-06, "log_odds_chosen": 1.033820629119873, "log_odds_ratio": -0.7525190711021423, "logits/chosen": -0.7893511056900024, "logits/rejected": -0.7670328617095947, "logps/chosen": -0.9967296123504639, "logps/rejected": -1.838235855102539, "loss": 1.1232, "nll_loss": 1.0347673892974854, "rewards/accuracies": 0.375, "rewards/chosen": -0.09967296570539474, "rewards/margins": 0.08415062725543976, "rewards/rejected": -0.1838235855102539, "step": 3071 }, { "epoch": 1.8740277565960044, "grad_norm": 5.065492630004883, "learning_rate": 5.019473361910594e-06, "log_odds_chosen": 1.5549089908599854, "log_odds_ratio": -0.3425801396369934, "logits/chosen": -0.8686631321907043, "logits/rejected": -0.8956814408302307, "logps/chosen": -0.7225348949432373, "logps/rejected": -1.7697391510009766, "loss": 1.0433, "nll_loss": 0.9881555438041687, "rewards/accuracies": 0.875, "rewards/chosen": -0.07225349545478821, "rewards/margins": 0.10472041368484497, "rewards/rejected": -0.17697390913963318, "step": 3072 }, { "epoch": 1.8746377916730212, "grad_norm": 1.6752902269363403, "learning_rate": 5.01849357011635e-06, "log_odds_chosen": 0.1701178252696991, "log_odds_ratio": -0.6435218453407288, "logits/chosen": -0.8724584579467773, "logits/rejected": -0.8695675134658813, "logps/chosen": -0.8838928937911987, "logps/rejected": -0.9769651293754578, "loss": 1.1969, "nll_loss": 1.1954772472381592, "rewards/accuracies": 0.5, "rewards/chosen": -0.08838928490877151, "rewards/margins": 0.009307222440838814, "rewards/rejected": -0.09769650548696518, "step": 3073 }, { "epoch": 1.875247826750038, "grad_norm": 1.1827689409255981, "learning_rate": 5.017513778322106e-06, "log_odds_chosen": 1.5432002544403076, "log_odds_ratio": -0.45165473222732544, "logits/chosen": -0.9397053718566895, "logits/rejected": -0.9250054955482483, "logps/chosen": -0.8450435400009155, "logps/rejected": -1.960172414779663, "loss": 1.0813, "nll_loss": 1.050112247467041, "rewards/accuracies": 0.75, "rewards/chosen": -0.08450435847043991, "rewards/margins": 0.11151289194822311, "rewards/rejected": -0.19601723551750183, "step": 3074 }, { "epoch": 1.875857861827055, "grad_norm": 1.4309146404266357, "learning_rate": 5.016533986527863e-06, "log_odds_chosen": 0.7499575614929199, "log_odds_ratio": -0.5976966023445129, "logits/chosen": -0.7376729846000671, "logits/rejected": -0.8117738962173462, "logps/chosen": -0.890538215637207, "logps/rejected": -1.4888064861297607, "loss": 0.9627, "nll_loss": 1.082892894744873, "rewards/accuracies": 0.625, "rewards/chosen": -0.08905380964279175, "rewards/margins": 0.05982682853937149, "rewards/rejected": -0.14888064563274384, "step": 3075 }, { "epoch": 1.876467896904072, "grad_norm": 2.972282886505127, "learning_rate": 5.015554194733619e-06, "log_odds_chosen": 1.3656176328659058, "log_odds_ratio": -0.42146575450897217, "logits/chosen": -0.9620993137359619, "logits/rejected": -0.8618299961090088, "logps/chosen": -0.7854351997375488, "logps/rejected": -1.7137870788574219, "loss": 0.8929, "nll_loss": 0.8537344932556152, "rewards/accuracies": 0.75, "rewards/chosen": -0.078543521463871, "rewards/margins": 0.0928351879119873, "rewards/rejected": -0.1713787019252777, "step": 3076 }, { "epoch": 1.877077931981089, "grad_norm": 5.2917609214782715, "learning_rate": 5.014574402939375e-06, "log_odds_chosen": 0.6413893699645996, "log_odds_ratio": -0.5330682992935181, "logits/chosen": -0.7529528141021729, "logits/rejected": -0.6846252679824829, "logps/chosen": -0.8476547598838806, "logps/rejected": -1.2734217643737793, "loss": 1.0113, "nll_loss": 0.9991766810417175, "rewards/accuracies": 0.625, "rewards/chosen": -0.0847654789686203, "rewards/margins": 0.042576707899570465, "rewards/rejected": -0.12734217941761017, "step": 3077 }, { "epoch": 1.877687967058106, "grad_norm": 1.1729364395141602, "learning_rate": 5.013594611145131e-06, "log_odds_chosen": 0.4817134141921997, "log_odds_ratio": -0.7016167640686035, "logits/chosen": -0.8067553639411926, "logits/rejected": -0.7570465207099915, "logps/chosen": -0.9330253005027771, "logps/rejected": -1.3016963005065918, "loss": 1.1754, "nll_loss": 1.057870864868164, "rewards/accuracies": 0.625, "rewards/chosen": -0.09330253303050995, "rewards/margins": 0.036867111921310425, "rewards/rejected": -0.13016964495182037, "step": 3078 }, { "epoch": 1.8782980021351228, "grad_norm": 2.3206546306610107, "learning_rate": 5.012614819350887e-06, "log_odds_chosen": 1.160102128982544, "log_odds_ratio": -0.6307894587516785, "logits/chosen": -0.9801912307739258, "logits/rejected": -0.9354003667831421, "logps/chosen": -0.9201633930206299, "logps/rejected": -1.864961862564087, "loss": 1.3196, "nll_loss": 1.0540378093719482, "rewards/accuracies": 0.375, "rewards/chosen": -0.09201633930206299, "rewards/margins": 0.09447984397411346, "rewards/rejected": -0.18649618327617645, "step": 3079 }, { "epoch": 1.8789080372121396, "grad_norm": 1.4937635660171509, "learning_rate": 5.0116350275566435e-06, "log_odds_chosen": 1.1679718494415283, "log_odds_ratio": -0.6543703079223633, "logits/chosen": -0.9077333211898804, "logits/rejected": -0.7829538583755493, "logps/chosen": -0.964630126953125, "logps/rejected": -1.99945068359375, "loss": 1.1301, "nll_loss": 1.2827284336090088, "rewards/accuracies": 0.625, "rewards/chosen": -0.09646301716566086, "rewards/margins": 0.10348204523324966, "rewards/rejected": -0.19994506239891052, "step": 3080 }, { "epoch": 1.8795180722891565, "grad_norm": 1.487392544746399, "learning_rate": 5.0106552357624005e-06, "log_odds_chosen": 2.235037088394165, "log_odds_ratio": -0.34044331312179565, "logits/chosen": -0.880115270614624, "logits/rejected": -0.8139536380767822, "logps/chosen": -0.7172335386276245, "logps/rejected": -2.4671883583068848, "loss": 1.1823, "nll_loss": 1.0123370885849, "rewards/accuracies": 0.875, "rewards/chosen": -0.07172334939241409, "rewards/margins": 0.17499549686908722, "rewards/rejected": -0.24671883881092072, "step": 3081 }, { "epoch": 1.8801281073661735, "grad_norm": 1.233587622642517, "learning_rate": 5.009675443968157e-06, "log_odds_chosen": 1.1845629215240479, "log_odds_ratio": -0.4664000868797302, "logits/chosen": -0.8348090648651123, "logits/rejected": -0.8603161573410034, "logps/chosen": -0.6897218227386475, "logps/rejected": -1.4797672033309937, "loss": 1.0093, "nll_loss": 0.7558538913726807, "rewards/accuracies": 0.875, "rewards/chosen": -0.06897218525409698, "rewards/margins": 0.07900453358888626, "rewards/rejected": -0.14797672629356384, "step": 3082 }, { "epoch": 1.8807381424431906, "grad_norm": 2.787956953048706, "learning_rate": 5.0086956521739136e-06, "log_odds_chosen": 1.64885413646698, "log_odds_ratio": -0.570650041103363, "logits/chosen": -0.8727083802223206, "logits/rejected": -0.8282932043075562, "logps/chosen": -0.7758157253265381, "logps/rejected": -2.2551932334899902, "loss": 1.1584, "nll_loss": 1.0936594009399414, "rewards/accuracies": 0.75, "rewards/chosen": -0.07758156955242157, "rewards/margins": 0.14793774485588074, "rewards/rejected": -0.2255193293094635, "step": 3083 }, { "epoch": 1.8813481775202074, "grad_norm": 1.5030632019042969, "learning_rate": 5.007715860379669e-06, "log_odds_chosen": 0.571088969707489, "log_odds_ratio": -0.5816898345947266, "logits/chosen": -0.859082043170929, "logits/rejected": -0.8141895532608032, "logps/chosen": -0.9733942747116089, "logps/rejected": -1.3842359781265259, "loss": 1.1632, "nll_loss": 1.3094955682754517, "rewards/accuracies": 0.5, "rewards/chosen": -0.0973394364118576, "rewards/margins": 0.0410841666162014, "rewards/rejected": -0.1384236067533493, "step": 3084 }, { "epoch": 1.8819582125972243, "grad_norm": 1.3898754119873047, "learning_rate": 5.006736068585425e-06, "log_odds_chosen": 0.07060765475034714, "log_odds_ratio": -0.7688431739807129, "logits/chosen": -0.8943886756896973, "logits/rejected": -0.7679469585418701, "logps/chosen": -1.077566146850586, "logps/rejected": -1.1507052183151245, "loss": 1.3066, "nll_loss": 1.0959358215332031, "rewards/accuracies": 0.375, "rewards/chosen": -0.10775662213563919, "rewards/margins": 0.007313898764550686, "rewards/rejected": -0.11507052183151245, "step": 3085 }, { "epoch": 1.8825682476742411, "grad_norm": 1.7910126447677612, "learning_rate": 5.005756276791182e-06, "log_odds_chosen": 1.1637176275253296, "log_odds_ratio": -0.427318274974823, "logits/chosen": -0.4117010235786438, "logits/rejected": -0.4884478449821472, "logps/chosen": -0.503838300704956, "logps/rejected": -1.105342149734497, "loss": 1.0133, "nll_loss": 0.8801697492599487, "rewards/accuracies": 0.875, "rewards/chosen": -0.050383832305669785, "rewards/margins": 0.0601503923535347, "rewards/rejected": -0.11053422093391418, "step": 3086 }, { "epoch": 1.8831782827512582, "grad_norm": 1.159293532371521, "learning_rate": 5.004776484996938e-06, "log_odds_chosen": 2.5890841484069824, "log_odds_ratio": -0.3411708176136017, "logits/chosen": -0.6546015739440918, "logits/rejected": -0.7651278972625732, "logps/chosen": -0.629628598690033, "logps/rejected": -2.6700174808502197, "loss": 1.1363, "nll_loss": 0.7712187170982361, "rewards/accuracies": 0.75, "rewards/chosen": -0.06296286731958389, "rewards/margins": 0.20403890311717987, "rewards/rejected": -0.26700177788734436, "step": 3087 }, { "epoch": 1.8837883178282753, "grad_norm": 2.028836965560913, "learning_rate": 5.003796693202694e-06, "log_odds_chosen": 1.0247882604599, "log_odds_ratio": -0.6027184128761292, "logits/chosen": -0.658991277217865, "logits/rejected": -0.7261846661567688, "logps/chosen": -0.8136229515075684, "logps/rejected": -1.663457989692688, "loss": 1.226, "nll_loss": 1.1826624870300293, "rewards/accuracies": 0.625, "rewards/chosen": -0.0813622921705246, "rewards/margins": 0.08498352020978928, "rewards/rejected": -0.16634581983089447, "step": 3088 }, { "epoch": 1.8843983529052921, "grad_norm": 1.486404299736023, "learning_rate": 5.002816901408451e-06, "log_odds_chosen": 1.0919415950775146, "log_odds_ratio": -0.40385669469833374, "logits/chosen": -0.7659770250320435, "logits/rejected": -0.9229094982147217, "logps/chosen": -0.7076411843299866, "logps/rejected": -1.3613437414169312, "loss": 0.9168, "nll_loss": 0.87945955991745, "rewards/accuracies": 0.875, "rewards/chosen": -0.07076410949230194, "rewards/margins": 0.06537026166915894, "rewards/rejected": -0.13613437116146088, "step": 3089 }, { "epoch": 1.885008387982309, "grad_norm": 1.8209099769592285, "learning_rate": 5.001837109614206e-06, "log_odds_chosen": 1.7634351253509521, "log_odds_ratio": -0.3743172883987427, "logits/chosen": -0.6304529309272766, "logits/rejected": -0.6863961219787598, "logps/chosen": -0.6631146669387817, "logps/rejected": -1.9601783752441406, "loss": 1.0105, "nll_loss": 0.9261713027954102, "rewards/accuracies": 0.75, "rewards/chosen": -0.06631147116422653, "rewards/margins": 0.12970635294914246, "rewards/rejected": -0.19601783156394958, "step": 3090 }, { "epoch": 1.8856184230593258, "grad_norm": 2.4470040798187256, "learning_rate": 5.0008573178199625e-06, "log_odds_chosen": 1.7839553356170654, "log_odds_ratio": -0.40410274267196655, "logits/chosen": -0.6969171166419983, "logits/rejected": -0.6537843346595764, "logps/chosen": -0.8062025904655457, "logps/rejected": -2.0343177318573, "loss": 1.1061, "nll_loss": 1.0945091247558594, "rewards/accuracies": 0.875, "rewards/chosen": -0.08062026649713516, "rewards/margins": 0.12281151115894318, "rewards/rejected": -0.20343177020549774, "step": 3091 }, { "epoch": 1.8862284581363429, "grad_norm": 2.7444050312042236, "learning_rate": 4.9998775260257195e-06, "log_odds_chosen": 1.3305292129516602, "log_odds_ratio": -0.3122386038303375, "logits/chosen": -0.6344490051269531, "logits/rejected": -0.6156343221664429, "logps/chosen": -0.7624972462654114, "logps/rejected": -1.5499732494354248, "loss": 1.0047, "nll_loss": 0.9179905652999878, "rewards/accuracies": 0.75, "rewards/chosen": -0.07624972611665726, "rewards/margins": 0.07874760776758194, "rewards/rejected": -0.1549973338842392, "step": 3092 }, { "epoch": 1.8868384932133597, "grad_norm": 4.446104049682617, "learning_rate": 4.998897734231476e-06, "log_odds_chosen": 1.3507564067840576, "log_odds_ratio": -0.43496790528297424, "logits/chosen": -0.6951199769973755, "logits/rejected": -0.6354362368583679, "logps/chosen": -0.6630753874778748, "logps/rejected": -1.5221912860870361, "loss": 0.862, "nll_loss": 0.7867337465286255, "rewards/accuracies": 0.75, "rewards/chosen": -0.06630753725767136, "rewards/margins": 0.0859115868806839, "rewards/rejected": -0.15221913158893585, "step": 3093 }, { "epoch": 1.8874485282903768, "grad_norm": 3.8544352054595947, "learning_rate": 4.997917942437233e-06, "log_odds_chosen": 2.1849021911621094, "log_odds_ratio": -0.3360343277454376, "logits/chosen": -0.839824378490448, "logits/rejected": -0.8097922801971436, "logps/chosen": -0.787276029586792, "logps/rejected": -2.4808709621429443, "loss": 0.932, "nll_loss": 0.9853015542030334, "rewards/accuracies": 0.75, "rewards/chosen": -0.0787276029586792, "rewards/margins": 0.1693595051765442, "rewards/rejected": -0.2480871081352234, "step": 3094 }, { "epoch": 1.8880585633673936, "grad_norm": 10.614739418029785, "learning_rate": 4.996938150642988e-06, "log_odds_chosen": 1.6515644788742065, "log_odds_ratio": -0.832756757736206, "logits/chosen": -0.6074020266532898, "logits/rejected": -0.6498210430145264, "logps/chosen": -1.2780020236968994, "logps/rejected": -2.76879620552063, "loss": 1.2435, "nll_loss": 1.2262852191925049, "rewards/accuracies": 0.5, "rewards/chosen": -0.12780021131038666, "rewards/margins": 0.14907942712306976, "rewards/rejected": -0.27687960863113403, "step": 3095 }, { "epoch": 1.8886685984444105, "grad_norm": 2.001159429550171, "learning_rate": 4.995958358848744e-06, "log_odds_chosen": 1.4376846551895142, "log_odds_ratio": -0.3666246235370636, "logits/chosen": -0.6358508467674255, "logits/rejected": -0.802239179611206, "logps/chosen": -0.6896984577178955, "logps/rejected": -1.6799718141555786, "loss": 1.1168, "nll_loss": 0.8797380328178406, "rewards/accuracies": 0.875, "rewards/chosen": -0.06896984577178955, "rewards/margins": 0.09902734309434891, "rewards/rejected": -0.16799718141555786, "step": 3096 }, { "epoch": 1.8892786335214273, "grad_norm": 1.167773723602295, "learning_rate": 4.994978567054501e-06, "log_odds_chosen": 1.0837630033493042, "log_odds_ratio": -0.6212328672409058, "logits/chosen": -0.7143028974533081, "logits/rejected": -0.7431017756462097, "logps/chosen": -0.8662492036819458, "logps/rejected": -1.6262049674987793, "loss": 1.1368, "nll_loss": 0.9049906730651855, "rewards/accuracies": 0.75, "rewards/chosen": -0.08662492036819458, "rewards/margins": 0.07599557936191559, "rewards/rejected": -0.16262048482894897, "step": 3097 }, { "epoch": 1.8898886685984444, "grad_norm": 0.9983831644058228, "learning_rate": 4.993998775260257e-06, "log_odds_chosen": 0.37686872482299805, "log_odds_ratio": -0.5718812346458435, "logits/chosen": -0.8282232880592346, "logits/rejected": -0.8090528249740601, "logps/chosen": -0.7633427381515503, "logps/rejected": -1.0227067470550537, "loss": 1.0234, "nll_loss": 0.9228087067604065, "rewards/accuracies": 0.625, "rewards/chosen": -0.07633427530527115, "rewards/margins": 0.025936400517821312, "rewards/rejected": -0.10227067023515701, "step": 3098 }, { "epoch": 1.8904987036754615, "grad_norm": 1.0269343852996826, "learning_rate": 4.993018983466013e-06, "log_odds_chosen": 1.6080518960952759, "log_odds_ratio": -0.44222238659858704, "logits/chosen": -0.9137828946113586, "logits/rejected": -0.9407883882522583, "logps/chosen": -0.7502617835998535, "logps/rejected": -2.028841495513916, "loss": 1.0371, "nll_loss": 0.9564977884292603, "rewards/accuracies": 0.625, "rewards/chosen": -0.07502618432044983, "rewards/margins": 0.127857968211174, "rewards/rejected": -0.20288415253162384, "step": 3099 }, { "epoch": 1.8911087387524783, "grad_norm": 1.9115971326828003, "learning_rate": 4.99203919167177e-06, "log_odds_chosen": 0.21573001146316528, "log_odds_ratio": -0.6628099679946899, "logits/chosen": -1.15871000289917, "logits/rejected": -1.0017887353897095, "logps/chosen": -0.9104055166244507, "logps/rejected": -1.0425317287445068, "loss": 1.0839, "nll_loss": 1.2871994972229004, "rewards/accuracies": 0.5, "rewards/chosen": -0.09104055166244507, "rewards/margins": 0.013212625868618488, "rewards/rejected": -0.10425317287445068, "step": 3100 }, { "epoch": 1.8917187738294952, "grad_norm": 2.010695219039917, "learning_rate": 4.9910593998775254e-06, "log_odds_chosen": 1.7953298091888428, "log_odds_ratio": -0.3780418038368225, "logits/chosen": -0.49850499629974365, "logits/rejected": -0.6908271312713623, "logps/chosen": -0.5879048109054565, "logps/rejected": -1.8466846942901611, "loss": 0.9401, "nll_loss": 0.7186830043792725, "rewards/accuracies": 0.875, "rewards/chosen": -0.058790478855371475, "rewards/margins": 0.1258780062198639, "rewards/rejected": -0.18466848134994507, "step": 3101 }, { "epoch": 1.892328808906512, "grad_norm": 2.5650269985198975, "learning_rate": 4.9900796080832816e-06, "log_odds_chosen": 1.5843498706817627, "log_odds_ratio": -0.5023996233940125, "logits/chosen": -0.9317445158958435, "logits/rejected": -0.8805923461914062, "logps/chosen": -0.9861603379249573, "logps/rejected": -2.083725690841675, "loss": 1.0361, "nll_loss": 1.3015401363372803, "rewards/accuracies": 0.625, "rewards/chosen": -0.09861603379249573, "rewards/margins": 0.10975654423236847, "rewards/rejected": -0.2083725780248642, "step": 3102 }, { "epoch": 1.892938843983529, "grad_norm": 1.6076738834381104, "learning_rate": 4.9890998162890385e-06, "log_odds_chosen": 0.44293439388275146, "log_odds_ratio": -0.5917074084281921, "logits/chosen": -0.8814066052436829, "logits/rejected": -0.8507541418075562, "logps/chosen": -0.8695300817489624, "logps/rejected": -1.0988280773162842, "loss": 1.1473, "nll_loss": 1.078629493713379, "rewards/accuracies": 0.625, "rewards/chosen": -0.08695301413536072, "rewards/margins": 0.022929806262254715, "rewards/rejected": -0.10988282412290573, "step": 3103 }, { "epoch": 1.8935488790605461, "grad_norm": 1.144450306892395, "learning_rate": 4.988120024494795e-06, "log_odds_chosen": 0.8488739728927612, "log_odds_ratio": -0.6903359889984131, "logits/chosen": -0.6996299028396606, "logits/rejected": -0.8001128435134888, "logps/chosen": -0.7323846220970154, "logps/rejected": -1.1830556392669678, "loss": 0.9803, "nll_loss": 1.097570538520813, "rewards/accuracies": 0.375, "rewards/chosen": -0.07323846220970154, "rewards/margins": 0.045067112892866135, "rewards/rejected": -0.11830556392669678, "step": 3104 }, { "epoch": 1.894158914137563, "grad_norm": 2.725477457046509, "learning_rate": 4.987140232700551e-06, "log_odds_chosen": 1.612292766571045, "log_odds_ratio": -0.46747395396232605, "logits/chosen": -0.8426423668861389, "logits/rejected": -0.8377816081047058, "logps/chosen": -0.8233970999717712, "logps/rejected": -1.9887194633483887, "loss": 1.0614, "nll_loss": 1.0328665971755981, "rewards/accuracies": 0.75, "rewards/chosen": -0.08233971148729324, "rewards/margins": 0.11653224378824234, "rewards/rejected": -0.19887195527553558, "step": 3105 }, { "epoch": 1.8947689492145798, "grad_norm": 1.7694131135940552, "learning_rate": 4.986160440906308e-06, "log_odds_chosen": 2.3066394329071045, "log_odds_ratio": -0.2683447301387787, "logits/chosen": -0.6574403047561646, "logits/rejected": -0.7987154722213745, "logps/chosen": -0.7993061542510986, "logps/rejected": -2.4690492153167725, "loss": 1.0588, "nll_loss": 0.853409469127655, "rewards/accuracies": 0.875, "rewards/chosen": -0.07993060350418091, "rewards/margins": 0.16697433590888977, "rewards/rejected": -0.24690493941307068, "step": 3106 }, { "epoch": 1.8953789842915967, "grad_norm": 1.7601349353790283, "learning_rate": 4.985180649112063e-06, "log_odds_chosen": 1.4274041652679443, "log_odds_ratio": -0.42458197474479675, "logits/chosen": -0.9940718412399292, "logits/rejected": -1.0134599208831787, "logps/chosen": -0.8979209661483765, "logps/rejected": -2.0853686332702637, "loss": 0.944, "nll_loss": 1.0096102952957153, "rewards/accuracies": 0.875, "rewards/chosen": -0.08979210257530212, "rewards/margins": 0.11874476820230484, "rewards/rejected": -0.20853686332702637, "step": 3107 }, { "epoch": 1.8959890193686137, "grad_norm": 2.6981894969940186, "learning_rate": 4.98420085731782e-06, "log_odds_chosen": 0.4996534287929535, "log_odds_ratio": -0.7083269357681274, "logits/chosen": -0.5305708050727844, "logits/rejected": -0.662231981754303, "logps/chosen": -0.8413428068161011, "logps/rejected": -1.231959342956543, "loss": 1.1846, "nll_loss": 1.1210155487060547, "rewards/accuracies": 0.5, "rewards/chosen": -0.08413428068161011, "rewards/margins": 0.03906165063381195, "rewards/rejected": -0.12319593876600266, "step": 3108 }, { "epoch": 1.8965990544456306, "grad_norm": 1.1036423444747925, "learning_rate": 4.983221065523576e-06, "log_odds_chosen": 1.396701455116272, "log_odds_ratio": -0.3307158946990967, "logits/chosen": -0.7809146642684937, "logits/rejected": -0.7045212388038635, "logps/chosen": -0.6483873128890991, "logps/rejected": -1.6342594623565674, "loss": 0.9021, "nll_loss": 0.9116294384002686, "rewards/accuracies": 0.875, "rewards/chosen": -0.06483873724937439, "rewards/margins": 0.09858722984790802, "rewards/rejected": -0.16342595219612122, "step": 3109 }, { "epoch": 1.8972090895226477, "grad_norm": 1.4543074369430542, "learning_rate": 4.982241273729332e-06, "log_odds_chosen": 2.863093852996826, "log_odds_ratio": -0.24639886617660522, "logits/chosen": -0.68210369348526, "logits/rejected": -0.6863276958465576, "logps/chosen": -0.5817000269889832, "logps/rejected": -2.7478036880493164, "loss": 1.0817, "nll_loss": 0.9908312559127808, "rewards/accuracies": 0.875, "rewards/chosen": -0.058170005679130554, "rewards/margins": 0.2166103571653366, "rewards/rejected": -0.27478039264678955, "step": 3110 }, { "epoch": 1.8978191245996645, "grad_norm": 1.372982144355774, "learning_rate": 4.981261481935089e-06, "log_odds_chosen": 1.7266346216201782, "log_odds_ratio": -0.47822096943855286, "logits/chosen": -0.9837566614151001, "logits/rejected": -0.9529657959938049, "logps/chosen": -0.7757121324539185, "logps/rejected": -2.1830763816833496, "loss": 1.3402, "nll_loss": 1.3043475151062012, "rewards/accuracies": 0.625, "rewards/chosen": -0.07757122069597244, "rewards/margins": 0.1407364308834076, "rewards/rejected": -0.21830767393112183, "step": 3111 }, { "epoch": 1.8984291596766814, "grad_norm": 1.6456106901168823, "learning_rate": 4.9802816901408445e-06, "log_odds_chosen": 1.57908296585083, "log_odds_ratio": -0.38117170333862305, "logits/chosen": -0.9878475666046143, "logits/rejected": -0.8424707651138306, "logps/chosen": -0.9501291513442993, "logps/rejected": -2.148228883743286, "loss": 1.0908, "nll_loss": 1.092951774597168, "rewards/accuracies": 0.75, "rewards/chosen": -0.09501291811466217, "rewards/margins": 0.11980998516082764, "rewards/rejected": -0.214822918176651, "step": 3112 }, { "epoch": 1.8990391947536982, "grad_norm": 1.041991114616394, "learning_rate": 4.979301898346601e-06, "log_odds_chosen": 1.4939090013504028, "log_odds_ratio": -0.41668301820755005, "logits/chosen": -0.7982405424118042, "logits/rejected": -0.8715484142303467, "logps/chosen": -0.9194163680076599, "logps/rejected": -2.1079132556915283, "loss": 1.0053, "nll_loss": 0.9587635397911072, "rewards/accuracies": 0.75, "rewards/chosen": -0.09194164723157883, "rewards/margins": 0.11884968727827072, "rewards/rejected": -0.21079133450984955, "step": 3113 }, { "epoch": 1.8996492298307153, "grad_norm": 0.8963333964347839, "learning_rate": 4.978322106552358e-06, "log_odds_chosen": 1.1086771488189697, "log_odds_ratio": -0.47627773880958557, "logits/chosen": -0.8840665817260742, "logits/rejected": -0.7497166395187378, "logps/chosen": -0.9092026352882385, "logps/rejected": -1.73862624168396, "loss": 1.0725, "nll_loss": 1.0325913429260254, "rewards/accuracies": 0.625, "rewards/chosen": -0.09092025458812714, "rewards/margins": 0.08294236660003662, "rewards/rejected": -0.17386263608932495, "step": 3114 }, { "epoch": 1.9002592649077323, "grad_norm": 3.3764939308166504, "learning_rate": 4.977342314758114e-06, "log_odds_chosen": 1.4967553615570068, "log_odds_ratio": -0.43764233589172363, "logits/chosen": -0.9298742413520813, "logits/rejected": -0.959436297416687, "logps/chosen": -0.7926472425460815, "logps/rejected": -1.9503216743469238, "loss": 1.1874, "nll_loss": 1.1341567039489746, "rewards/accuracies": 0.625, "rewards/chosen": -0.07926471531391144, "rewards/margins": 0.11576744168996811, "rewards/rejected": -0.19503216445446014, "step": 3115 }, { "epoch": 1.9008692999847492, "grad_norm": 1.0309571027755737, "learning_rate": 4.97636252296387e-06, "log_odds_chosen": 1.3133344650268555, "log_odds_ratio": -0.44030022621154785, "logits/chosen": -0.86174076795578, "logits/rejected": -0.9100048542022705, "logps/chosen": -0.6730870008468628, "logps/rejected": -1.6564983129501343, "loss": 1.0006, "nll_loss": 0.9445192813873291, "rewards/accuracies": 0.75, "rewards/chosen": -0.0673087015748024, "rewards/margins": 0.09834113717079163, "rewards/rejected": -0.16564983129501343, "step": 3116 }, { "epoch": 1.901479335061766, "grad_norm": 1.3488041162490845, "learning_rate": 4.975382731169627e-06, "log_odds_chosen": 1.6462938785552979, "log_odds_ratio": -0.3392092287540436, "logits/chosen": -0.7993651628494263, "logits/rejected": -0.9257432222366333, "logps/chosen": -0.6987612247467041, "logps/rejected": -1.8117579221725464, "loss": 1.1388, "nll_loss": 1.3478074073791504, "rewards/accuracies": 1.0, "rewards/chosen": -0.06987612694501877, "rewards/margins": 0.11129966378211975, "rewards/rejected": -0.18117579817771912, "step": 3117 }, { "epoch": 1.9020893701387829, "grad_norm": 1.2940374612808228, "learning_rate": 4.974402939375382e-06, "log_odds_chosen": 1.8459879159927368, "log_odds_ratio": -0.5629435777664185, "logits/chosen": -0.7783734798431396, "logits/rejected": -0.9381085634231567, "logps/chosen": -0.7289265394210815, "logps/rejected": -2.3027610778808594, "loss": 0.9313, "nll_loss": 0.8550704121589661, "rewards/accuracies": 0.625, "rewards/chosen": -0.07289265096187592, "rewards/margins": 0.15738345682621002, "rewards/rejected": -0.23027610778808594, "step": 3118 }, { "epoch": 1.9026994052158, "grad_norm": 1.9131754636764526, "learning_rate": 4.973423147581138e-06, "log_odds_chosen": 1.2428830862045288, "log_odds_ratio": -0.5625555515289307, "logits/chosen": -0.7781592607498169, "logits/rejected": -0.897244393825531, "logps/chosen": -0.9005133509635925, "logps/rejected": -1.8229272365570068, "loss": 1.1591, "nll_loss": 1.1912832260131836, "rewards/accuracies": 0.5, "rewards/chosen": -0.09005133807659149, "rewards/margins": 0.09224138408899307, "rewards/rejected": -0.18229271471500397, "step": 3119 }, { "epoch": 1.9033094402928168, "grad_norm": 3.008697986602783, "learning_rate": 4.972443355786895e-06, "log_odds_chosen": 1.2170697450637817, "log_odds_ratio": -0.5342682003974915, "logits/chosen": -0.9684756994247437, "logits/rejected": -0.9845564365386963, "logps/chosen": -0.9974519610404968, "logps/rejected": -1.9792520999908447, "loss": 1.2148, "nll_loss": 1.1303131580352783, "rewards/accuracies": 0.75, "rewards/chosen": -0.09974519163370132, "rewards/margins": 0.09818002581596375, "rewards/rejected": -0.19792522490024567, "step": 3120 }, { "epoch": 1.9039194753698339, "grad_norm": 0.9925045967102051, "learning_rate": 4.971463563992651e-06, "log_odds_chosen": 2.0157923698425293, "log_odds_ratio": -0.39953187108039856, "logits/chosen": -0.8717406988143921, "logits/rejected": -0.977114200592041, "logps/chosen": -0.7102333307266235, "logps/rejected": -1.9756450653076172, "loss": 1.0129, "nll_loss": 1.1028903722763062, "rewards/accuracies": 0.75, "rewards/chosen": -0.07102333754301071, "rewards/margins": 0.12654118239879608, "rewards/rejected": -0.1975645273923874, "step": 3121 }, { "epoch": 1.9045295104468507, "grad_norm": 1.8254666328430176, "learning_rate": 4.970483772198408e-06, "log_odds_chosen": 1.589573860168457, "log_odds_ratio": -0.5583274960517883, "logits/chosen": -0.79278963804245, "logits/rejected": -0.8173049688339233, "logps/chosen": -0.745556116104126, "logps/rejected": -1.9006686210632324, "loss": 1.0564, "nll_loss": 1.2279318571090698, "rewards/accuracies": 0.625, "rewards/chosen": -0.07455562055110931, "rewards/margins": 0.11551126092672348, "rewards/rejected": -0.1900668740272522, "step": 3122 }, { "epoch": 1.9051395455238676, "grad_norm": 1.861801028251648, "learning_rate": 4.969503980404164e-06, "log_odds_chosen": 0.5087692141532898, "log_odds_ratio": -0.5246050357818604, "logits/chosen": -0.903908371925354, "logits/rejected": -0.8422043919563293, "logps/chosen": -0.5836141109466553, "logps/rejected": -0.8059465885162354, "loss": 0.9846, "nll_loss": 0.8977361917495728, "rewards/accuracies": 0.875, "rewards/chosen": -0.05836141109466553, "rewards/margins": 0.022233251482248306, "rewards/rejected": -0.08059465885162354, "step": 3123 }, { "epoch": 1.9057495806008844, "grad_norm": 2.462092161178589, "learning_rate": 4.96852418860992e-06, "log_odds_chosen": 1.0004650354385376, "log_odds_ratio": -0.5767329931259155, "logits/chosen": -0.8830769062042236, "logits/rejected": -1.1009202003479004, "logps/chosen": -0.7996739149093628, "logps/rejected": -1.4959591627120972, "loss": 1.0027, "nll_loss": 1.0510239601135254, "rewards/accuracies": 0.5, "rewards/chosen": -0.07996739447116852, "rewards/margins": 0.0696285292506218, "rewards/rejected": -0.14959591627120972, "step": 3124 }, { "epoch": 1.9063596156779015, "grad_norm": 1.5291292667388916, "learning_rate": 4.967544396815677e-06, "log_odds_chosen": 4.122159004211426, "log_odds_ratio": -0.27048078179359436, "logits/chosen": -0.7209489345550537, "logits/rejected": -0.928615391254425, "logps/chosen": -0.7187943458557129, "logps/rejected": -4.301602363586426, "loss": 1.0684, "nll_loss": 0.8328904509544373, "rewards/accuracies": 0.875, "rewards/chosen": -0.07187943905591965, "rewards/margins": 0.35828080773353577, "rewards/rejected": -0.4301602244377136, "step": 3125 }, { "epoch": 1.9069696507549185, "grad_norm": 1.7233415842056274, "learning_rate": 4.966564605021433e-06, "log_odds_chosen": 0.10123051702976227, "log_odds_ratio": -0.8096752762794495, "logits/chosen": -1.0359735488891602, "logits/rejected": -0.8991436958312988, "logps/chosen": -0.8004545569419861, "logps/rejected": -0.9650855660438538, "loss": 1.2078, "nll_loss": 1.0554438829421997, "rewards/accuracies": 0.5, "rewards/chosen": -0.08004546165466309, "rewards/margins": 0.01646309345960617, "rewards/rejected": -0.09650854766368866, "step": 3126 }, { "epoch": 1.9075796858319354, "grad_norm": 1.9019414186477661, "learning_rate": 4.965584813227189e-06, "log_odds_chosen": 0.3123680353164673, "log_odds_ratio": -0.5745226144790649, "logits/chosen": -0.9470477104187012, "logits/rejected": -0.9085545539855957, "logps/chosen": -1.1007815599441528, "logps/rejected": -1.3360590934753418, "loss": 1.1791, "nll_loss": 1.1825942993164062, "rewards/accuracies": 0.75, "rewards/chosen": -0.11007815599441528, "rewards/margins": 0.023527752608060837, "rewards/rejected": -0.13360591232776642, "step": 3127 }, { "epoch": 1.9081897209089522, "grad_norm": 1.4392772912979126, "learning_rate": 4.964605021432946e-06, "log_odds_chosen": 2.560276508331299, "log_odds_ratio": -0.16707780957221985, "logits/chosen": -0.7798561453819275, "logits/rejected": -0.8626532554626465, "logps/chosen": -0.6290644407272339, "logps/rejected": -2.5410690307617188, "loss": 0.9998, "nll_loss": 0.786663830280304, "rewards/accuracies": 1.0, "rewards/chosen": -0.06290645152330399, "rewards/margins": 0.19120046496391296, "rewards/rejected": -0.25410690903663635, "step": 3128 }, { "epoch": 1.908799755985969, "grad_norm": 1.0247198343276978, "learning_rate": 4.963625229638701e-06, "log_odds_chosen": 2.4535837173461914, "log_odds_ratio": -0.3439459204673767, "logits/chosen": -0.7311643362045288, "logits/rejected": -0.8644229769706726, "logps/chosen": -0.5642033219337463, "logps/rejected": -2.343188762664795, "loss": 0.99, "nll_loss": 0.8223299384117126, "rewards/accuracies": 0.875, "rewards/chosen": -0.05642033740878105, "rewards/margins": 0.17789854109287262, "rewards/rejected": -0.23431886732578278, "step": 3129 }, { "epoch": 1.9094097910629861, "grad_norm": 1.6312602758407593, "learning_rate": 4.962645437844457e-06, "log_odds_chosen": 0.7425573468208313, "log_odds_ratio": -0.6123043298721313, "logits/chosen": -0.7323912978172302, "logits/rejected": -0.7543686628341675, "logps/chosen": -0.7401857376098633, "logps/rejected": -1.189343810081482, "loss": 1.0228, "nll_loss": 0.7910636067390442, "rewards/accuracies": 0.375, "rewards/chosen": -0.07401858270168304, "rewards/margins": 0.04491580277681351, "rewards/rejected": -0.11893437802791595, "step": 3130 }, { "epoch": 1.9100198261400032, "grad_norm": 0.9889516234397888, "learning_rate": 4.961665646050214e-06, "log_odds_chosen": 0.6209444403648376, "log_odds_ratio": -0.6572861075401306, "logits/chosen": -1.0984163284301758, "logits/rejected": -1.064525842666626, "logps/chosen": -0.923153281211853, "logps/rejected": -1.4198870658874512, "loss": 0.8715, "nll_loss": 1.0290710926055908, "rewards/accuracies": 0.625, "rewards/chosen": -0.09231533110141754, "rewards/margins": 0.049673378467559814, "rewards/rejected": -0.14198869466781616, "step": 3131 }, { "epoch": 1.91062986121702, "grad_norm": 1.7700145244598389, "learning_rate": 4.96068585425597e-06, "log_odds_chosen": 4.245697021484375, "log_odds_ratio": -0.12835577130317688, "logits/chosen": -1.029173731803894, "logits/rejected": -1.0733747482299805, "logps/chosen": -0.8803904056549072, "logps/rejected": -4.588660717010498, "loss": 1.0444, "nll_loss": 1.1030949354171753, "rewards/accuracies": 1.0, "rewards/chosen": -0.08803904056549072, "rewards/margins": 0.3708270788192749, "rewards/rejected": -0.45886605978012085, "step": 3132 }, { "epoch": 1.911239896294037, "grad_norm": 4.762248992919922, "learning_rate": 4.9597060624617264e-06, "log_odds_chosen": 1.7799826860427856, "log_odds_ratio": -0.2785181403160095, "logits/chosen": -0.8402552008628845, "logits/rejected": -0.9567870497703552, "logps/chosen": -0.8267343044281006, "logps/rejected": -2.0374014377593994, "loss": 1.0781, "nll_loss": 0.8981594443321228, "rewards/accuracies": 0.875, "rewards/chosen": -0.08267343044281006, "rewards/margins": 0.12106671184301376, "rewards/rejected": -0.20374014973640442, "step": 3133 }, { "epoch": 1.9118499313710537, "grad_norm": 2.463991165161133, "learning_rate": 4.958726270667483e-06, "log_odds_chosen": 1.743660807609558, "log_odds_ratio": -0.3848278522491455, "logits/chosen": -0.7822061777114868, "logits/rejected": -0.8310481309890747, "logps/chosen": -0.7522734999656677, "logps/rejected": -2.0412251949310303, "loss": 1.0305, "nll_loss": 0.8830463886260986, "rewards/accuracies": 0.75, "rewards/chosen": -0.07522734999656677, "rewards/margins": 0.12889517843723297, "rewards/rejected": -0.20412252843379974, "step": 3134 }, { "epoch": 1.9124599664480708, "grad_norm": 3.6943423748016357, "learning_rate": 4.957746478873239e-06, "log_odds_chosen": 1.1379508972167969, "log_odds_ratio": -0.4935861825942993, "logits/chosen": -0.9246867895126343, "logits/rejected": -1.007528305053711, "logps/chosen": -0.9275496006011963, "logps/rejected": -1.7459684610366821, "loss": 1.2567, "nll_loss": 1.1929882764816284, "rewards/accuracies": 0.75, "rewards/chosen": -0.09275496751070023, "rewards/margins": 0.08184187859296799, "rewards/rejected": -0.17459683120250702, "step": 3135 }, { "epoch": 1.9130700015250877, "grad_norm": 1.5920087099075317, "learning_rate": 4.956766687078996e-06, "log_odds_chosen": 1.0224111080169678, "log_odds_ratio": -0.46522337198257446, "logits/chosen": -0.9102801084518433, "logits/rejected": -0.9137706756591797, "logps/chosen": -0.6542743444442749, "logps/rejected": -1.3154566287994385, "loss": 0.9972, "nll_loss": 0.7658584117889404, "rewards/accuracies": 0.75, "rewards/chosen": -0.06542743742465973, "rewards/margins": 0.06611824035644531, "rewards/rejected": -0.13154567778110504, "step": 3136 }, { "epoch": 1.9136800366021047, "grad_norm": 2.7968409061431885, "learning_rate": 4.955786895284752e-06, "log_odds_chosen": 2.1476962566375732, "log_odds_ratio": -0.3452984094619751, "logits/chosen": -0.8147406578063965, "logits/rejected": -0.8982431888580322, "logps/chosen": -0.7489731907844543, "logps/rejected": -2.376631021499634, "loss": 1.0321, "nll_loss": 0.7868669033050537, "rewards/accuracies": 0.875, "rewards/chosen": -0.07489731907844543, "rewards/margins": 0.16276578605175018, "rewards/rejected": -0.23766310513019562, "step": 3137 }, { "epoch": 1.9142900716791216, "grad_norm": 1.3598216772079468, "learning_rate": 4.954807103490508e-06, "log_odds_chosen": 0.8509190082550049, "log_odds_ratio": -0.44073227047920227, "logits/chosen": -0.9822085499763489, "logits/rejected": -1.0089985132217407, "logps/chosen": -0.8474953770637512, "logps/rejected": -1.3715382814407349, "loss": 1.2269, "nll_loss": 1.0295785665512085, "rewards/accuracies": 0.875, "rewards/chosen": -0.08474954217672348, "rewards/margins": 0.052404291927814484, "rewards/rejected": -0.13715381920337677, "step": 3138 }, { "epoch": 1.9149001067561384, "grad_norm": 5.586964130401611, "learning_rate": 4.953827311696265e-06, "log_odds_chosen": 1.860768437385559, "log_odds_ratio": -0.49836495518684387, "logits/chosen": -0.6645160913467407, "logits/rejected": -0.7913675308227539, "logps/chosen": -0.6687631011009216, "logps/rejected": -1.9662597179412842, "loss": 0.9897, "nll_loss": 0.7987092733383179, "rewards/accuracies": 0.625, "rewards/chosen": -0.06687631458044052, "rewards/margins": 0.12974967062473297, "rewards/rejected": -0.1966259777545929, "step": 3139 }, { "epoch": 1.9155101418331553, "grad_norm": 12.606969833374023, "learning_rate": 4.952847519902021e-06, "log_odds_chosen": 1.3279236555099487, "log_odds_ratio": -0.3735448718070984, "logits/chosen": -0.8235529661178589, "logits/rejected": -0.7480250597000122, "logps/chosen": -0.8797754049301147, "logps/rejected": -1.82024347782135, "loss": 1.3317, "nll_loss": 1.1825051307678223, "rewards/accuracies": 0.875, "rewards/chosen": -0.08797754347324371, "rewards/margins": 0.09404680132865906, "rewards/rejected": -0.18202432990074158, "step": 3140 }, { "epoch": 1.9161201769101723, "grad_norm": 1.4992649555206299, "learning_rate": 4.951867728107776e-06, "log_odds_chosen": 2.0929033756256104, "log_odds_ratio": -0.41905951499938965, "logits/chosen": -0.770135760307312, "logits/rejected": -0.9605997800827026, "logps/chosen": -0.8069016933441162, "logps/rejected": -2.4505562782287598, "loss": 0.9552, "nll_loss": 1.000745177268982, "rewards/accuracies": 0.75, "rewards/chosen": -0.0806901752948761, "rewards/margins": 0.16436545550823212, "rewards/rejected": -0.24505561590194702, "step": 3141 }, { "epoch": 1.9167302119871894, "grad_norm": 6.83734655380249, "learning_rate": 4.950887936313533e-06, "log_odds_chosen": 2.155233383178711, "log_odds_ratio": -0.31363996863365173, "logits/chosen": -0.583670973777771, "logits/rejected": -0.9596277475357056, "logps/chosen": -0.6685458421707153, "logps/rejected": -2.3297109603881836, "loss": 1.1356, "nll_loss": 1.0457419157028198, "rewards/accuracies": 0.875, "rewards/chosen": -0.0668545812368393, "rewards/margins": 0.16611652076244354, "rewards/rejected": -0.23297110199928284, "step": 3142 }, { "epoch": 1.9173402470642062, "grad_norm": 1.215949535369873, "learning_rate": 4.949908144519289e-06, "log_odds_chosen": 2.3641929626464844, "log_odds_ratio": -0.3859333395957947, "logits/chosen": -0.6586625576019287, "logits/rejected": -0.9264925718307495, "logps/chosen": -0.7175196409225464, "logps/rejected": -2.4003183841705322, "loss": 0.9454, "nll_loss": 0.9367694854736328, "rewards/accuracies": 0.625, "rewards/chosen": -0.07175195962190628, "rewards/margins": 0.16827988624572754, "rewards/rejected": -0.24003185331821442, "step": 3143 }, { "epoch": 1.917950282141223, "grad_norm": 1.4010049104690552, "learning_rate": 4.9489283527250455e-06, "log_odds_chosen": 1.9362090826034546, "log_odds_ratio": -0.36302217841148376, "logits/chosen": -0.8268527984619141, "logits/rejected": -0.9607956409454346, "logps/chosen": -0.8497634530067444, "logps/rejected": -2.303905487060547, "loss": 1.0772, "nll_loss": 0.9780779480934143, "rewards/accuracies": 0.875, "rewards/chosen": -0.08497634530067444, "rewards/margins": 0.14541421830654144, "rewards/rejected": -0.23039056360721588, "step": 3144 }, { "epoch": 1.91856031721824, "grad_norm": 1.1533609628677368, "learning_rate": 4.9479485609308025e-06, "log_odds_chosen": 2.392282485961914, "log_odds_ratio": -0.1558973789215088, "logits/chosen": -0.5807636380195618, "logits/rejected": -0.7832938432693481, "logps/chosen": -0.6944335103034973, "logps/rejected": -2.4493589401245117, "loss": 1.0869, "nll_loss": 0.9335077404975891, "rewards/accuracies": 1.0, "rewards/chosen": -0.06944335252046585, "rewards/margins": 0.1754925698041916, "rewards/rejected": -0.24493591487407684, "step": 3145 }, { "epoch": 1.919170352295257, "grad_norm": 2.11582088470459, "learning_rate": 4.9469687691365586e-06, "log_odds_chosen": 0.8708480000495911, "log_odds_ratio": -0.5095453262329102, "logits/chosen": -0.6460840702056885, "logits/rejected": -0.7310110926628113, "logps/chosen": -0.9365864992141724, "logps/rejected": -1.4240741729736328, "loss": 1.1313, "nll_loss": 1.2380472421646118, "rewards/accuracies": 0.75, "rewards/chosen": -0.09365865588188171, "rewards/margins": 0.04874876141548157, "rewards/rejected": -0.14240741729736328, "step": 3146 }, { "epoch": 1.9197803873722739, "grad_norm": 4.340248107910156, "learning_rate": 4.945988977342314e-06, "log_odds_chosen": 1.0054831504821777, "log_odds_ratio": -0.5234043002128601, "logits/chosen": -1.2841143608093262, "logits/rejected": -1.0159869194030762, "logps/chosen": -1.0051864385604858, "logps/rejected": -1.726499319076538, "loss": 1.2831, "nll_loss": 1.2537659406661987, "rewards/accuracies": 0.625, "rewards/chosen": -0.10051865130662918, "rewards/margins": 0.07213129103183746, "rewards/rejected": -0.17264994978904724, "step": 3147 }, { "epoch": 1.920390422449291, "grad_norm": 2.2756948471069336, "learning_rate": 4.945009185548071e-06, "log_odds_chosen": 2.1003403663635254, "log_odds_ratio": -0.26888376474380493, "logits/chosen": -0.5829803347587585, "logits/rejected": -0.8456088900566101, "logps/chosen": -0.6387683153152466, "logps/rejected": -2.2000601291656494, "loss": 0.9497, "nll_loss": 0.8609205484390259, "rewards/accuracies": 0.75, "rewards/chosen": -0.06387683004140854, "rewards/margins": 0.15612918138504028, "rewards/rejected": -0.22000601887702942, "step": 3148 }, { "epoch": 1.9210004575263078, "grad_norm": 2.980327844619751, "learning_rate": 4.944029393753827e-06, "log_odds_chosen": -0.10320791602134705, "log_odds_ratio": -0.8158032298088074, "logits/chosen": -1.1507753133773804, "logits/rejected": -1.0483711957931519, "logps/chosen": -0.8738209009170532, "logps/rejected": -0.8317074775695801, "loss": 1.1084, "nll_loss": 1.1336065530776978, "rewards/accuracies": 0.5, "rewards/chosen": -0.08738208562135696, "rewards/margins": -0.004211340099573135, "rewards/rejected": -0.08317074924707413, "step": 3149 }, { "epoch": 1.9216104926033246, "grad_norm": 4.659781455993652, "learning_rate": 4.943049601959584e-06, "log_odds_chosen": 1.2904804944992065, "log_odds_ratio": -0.5101473331451416, "logits/chosen": -0.8119325637817383, "logits/rejected": -0.7975358366966248, "logps/chosen": -0.7273733615875244, "logps/rejected": -1.6930345296859741, "loss": 1.0605, "nll_loss": 0.9951928853988647, "rewards/accuracies": 0.625, "rewards/chosen": -0.07273733615875244, "rewards/margins": 0.09656611829996109, "rewards/rejected": -0.16930347681045532, "step": 3150 }, { "epoch": 1.9222205276803415, "grad_norm": 1.246057391166687, "learning_rate": 4.94206981016534e-06, "log_odds_chosen": 2.0885140895843506, "log_odds_ratio": -0.29092469811439514, "logits/chosen": -0.8988709449768066, "logits/rejected": -0.966149091720581, "logps/chosen": -0.6664321422576904, "logps/rejected": -2.29710054397583, "loss": 1.1705, "nll_loss": 0.9583810567855835, "rewards/accuracies": 1.0, "rewards/chosen": -0.06664320826530457, "rewards/margins": 0.1630668342113495, "rewards/rejected": -0.22971005737781525, "step": 3151 }, { "epoch": 1.9228305627573585, "grad_norm": 2.756107807159424, "learning_rate": 4.941090018371095e-06, "log_odds_chosen": 1.6515151262283325, "log_odds_ratio": -0.2710059881210327, "logits/chosen": -0.858121395111084, "logits/rejected": -0.805014967918396, "logps/chosen": -0.8719300031661987, "logps/rejected": -1.9581387042999268, "loss": 1.0129, "nll_loss": 1.1380667686462402, "rewards/accuracies": 1.0, "rewards/chosen": -0.08719299733638763, "rewards/margins": 0.10862088203430176, "rewards/rejected": -0.1958138793706894, "step": 3152 }, { "epoch": 1.9234405978343756, "grad_norm": 3.5380172729492188, "learning_rate": 4.940110226576852e-06, "log_odds_chosen": 1.3043782711029053, "log_odds_ratio": -0.34760168194770813, "logits/chosen": -0.8810082077980042, "logits/rejected": -0.7686072587966919, "logps/chosen": -0.7658665776252747, "logps/rejected": -1.6576309204101562, "loss": 1.0391, "nll_loss": 1.0454579591751099, "rewards/accuracies": 0.875, "rewards/chosen": -0.07658665627241135, "rewards/margins": 0.08917643874883652, "rewards/rejected": -0.16576310992240906, "step": 3153 }, { "epoch": 1.9240506329113924, "grad_norm": 1.4198052883148193, "learning_rate": 4.939130434782608e-06, "log_odds_chosen": 2.6330387592315674, "log_odds_ratio": -0.1795198619365692, "logits/chosen": -0.4998961389064789, "logits/rejected": -0.7473652362823486, "logps/chosen": -0.6674098968505859, "logps/rejected": -2.6288411617279053, "loss": 0.922, "nll_loss": 0.7502346038818359, "rewards/accuracies": 1.0, "rewards/chosen": -0.0667409896850586, "rewards/margins": 0.19614313542842865, "rewards/rejected": -0.26288414001464844, "step": 3154 }, { "epoch": 1.9246606679884093, "grad_norm": 4.5191192626953125, "learning_rate": 4.9381506429883645e-06, "log_odds_chosen": 1.3016316890716553, "log_odds_ratio": -0.4482145309448242, "logits/chosen": -0.5586649775505066, "logits/rejected": -0.9194546341896057, "logps/chosen": -0.747636079788208, "logps/rejected": -1.579880952835083, "loss": 1.2076, "nll_loss": 1.1313681602478027, "rewards/accuracies": 0.75, "rewards/chosen": -0.07476361095905304, "rewards/margins": 0.08322447538375854, "rewards/rejected": -0.15798808634281158, "step": 3155 }, { "epoch": 1.9252707030654261, "grad_norm": 1.624089241027832, "learning_rate": 4.9371708511941215e-06, "log_odds_chosen": 3.083402633666992, "log_odds_ratio": -0.43693122267723083, "logits/chosen": -0.40240049362182617, "logits/rejected": -0.6921334266662598, "logps/chosen": -0.6072592735290527, "logps/rejected": -3.033235788345337, "loss": 0.9732, "nll_loss": 0.6457287073135376, "rewards/accuracies": 0.875, "rewards/chosen": -0.06072592735290527, "rewards/margins": 0.24259766936302185, "rewards/rejected": -0.3033236265182495, "step": 3156 }, { "epoch": 1.9258807381424432, "grad_norm": 1.4770969152450562, "learning_rate": 4.936191059399878e-06, "log_odds_chosen": 0.6010091304779053, "log_odds_ratio": -0.5990605354309082, "logits/chosen": -0.73883056640625, "logits/rejected": -0.7504190802574158, "logps/chosen": -0.8419026136398315, "logps/rejected": -1.162363886833191, "loss": 1.0252, "nll_loss": 1.1916338205337524, "rewards/accuracies": 0.625, "rewards/chosen": -0.0841902568936348, "rewards/margins": 0.0320461243391037, "rewards/rejected": -0.1162363812327385, "step": 3157 }, { "epoch": 1.9264907732194603, "grad_norm": 2.8128035068511963, "learning_rate": 4.935211267605633e-06, "log_odds_chosen": 2.1513328552246094, "log_odds_ratio": -0.21309307217597961, "logits/chosen": -0.7920081615447998, "logits/rejected": -0.7989377975463867, "logps/chosen": -0.9829623699188232, "logps/rejected": -2.728203296661377, "loss": 1.0672, "nll_loss": 1.1602402925491333, "rewards/accuracies": 1.0, "rewards/chosen": -0.09829624742269516, "rewards/margins": 0.17452411353588104, "rewards/rejected": -0.2728203535079956, "step": 3158 }, { "epoch": 1.9271008082964771, "grad_norm": 1.4814914464950562, "learning_rate": 4.93423147581139e-06, "log_odds_chosen": 0.5391937494277954, "log_odds_ratio": -0.5855231881141663, "logits/chosen": -1.014336109161377, "logits/rejected": -1.0072870254516602, "logps/chosen": -1.0342706441879272, "logps/rejected": -1.3653826713562012, "loss": 1.1982, "nll_loss": 1.4040110111236572, "rewards/accuracies": 0.625, "rewards/chosen": -0.10342705994844437, "rewards/margins": 0.03311118856072426, "rewards/rejected": -0.13653826713562012, "step": 3159 }, { "epoch": 1.927710843373494, "grad_norm": 2.959571123123169, "learning_rate": 4.933251684017146e-06, "log_odds_chosen": 0.8015685081481934, "log_odds_ratio": -0.6560080051422119, "logits/chosen": -0.8869588971138, "logits/rejected": -0.786641001701355, "logps/chosen": -0.8273866176605225, "logps/rejected": -1.2788784503936768, "loss": 1.1375, "nll_loss": 1.1473958492279053, "rewards/accuracies": 0.625, "rewards/chosen": -0.08273866027593613, "rewards/margins": 0.045149192214012146, "rewards/rejected": -0.12788784503936768, "step": 3160 }, { "epoch": 1.9283208784505108, "grad_norm": 3.8143062591552734, "learning_rate": 4.932271892222902e-06, "log_odds_chosen": 1.7877475023269653, "log_odds_ratio": -0.26348334550857544, "logits/chosen": -0.615976095199585, "logits/rejected": -0.9160779714584351, "logps/chosen": -0.6993962526321411, "logps/rejected": -1.9852029085159302, "loss": 0.9306, "nll_loss": 0.834240198135376, "rewards/accuracies": 0.875, "rewards/chosen": -0.06993962824344635, "rewards/margins": 0.12858065962791443, "rewards/rejected": -0.19852027297019958, "step": 3161 }, { "epoch": 1.9289309135275279, "grad_norm": 3.934633493423462, "learning_rate": 4.931292100428659e-06, "log_odds_chosen": 2.0557541847229004, "log_odds_ratio": -0.24300479888916016, "logits/chosen": -0.7633740901947021, "logits/rejected": -1.008605718612671, "logps/chosen": -0.5242742300033569, "logps/rejected": -1.9536634683609009, "loss": 1.0984, "nll_loss": 1.0462121963500977, "rewards/accuracies": 0.875, "rewards/chosen": -0.05242742970585823, "rewards/margins": 0.14293891191482544, "rewards/rejected": -0.19536633789539337, "step": 3162 }, { "epoch": 1.9295409486045447, "grad_norm": 1.5712863206863403, "learning_rate": 4.930312308634415e-06, "log_odds_chosen": 3.340121269226074, "log_odds_ratio": -0.15501564741134644, "logits/chosen": -0.511011004447937, "logits/rejected": -0.7432045340538025, "logps/chosen": -0.7874373197555542, "logps/rejected": -3.2730836868286133, "loss": 0.8723, "nll_loss": 0.9534663558006287, "rewards/accuracies": 0.875, "rewards/chosen": -0.07874373346567154, "rewards/margins": 0.24856463074684143, "rewards/rejected": -0.3273083567619324, "step": 3163 }, { "epoch": 1.9301509836815618, "grad_norm": 1.2220479249954224, "learning_rate": 4.929332516840171e-06, "log_odds_chosen": 1.069966435432434, "log_odds_ratio": -0.469706267118454, "logits/chosen": -0.5523710250854492, "logits/rejected": -0.50087970495224, "logps/chosen": -0.56525057554245, "logps/rejected": -1.1298251152038574, "loss": 1.2282, "nll_loss": 0.718788743019104, "rewards/accuracies": 0.5, "rewards/chosen": -0.056525055319070816, "rewards/margins": 0.056457459926605225, "rewards/rejected": -0.11298251152038574, "step": 3164 }, { "epoch": 1.9307610187585786, "grad_norm": 2.13407039642334, "learning_rate": 4.9283527250459274e-06, "log_odds_chosen": 1.690009355545044, "log_odds_ratio": -0.4174119532108307, "logits/chosen": -0.977914035320282, "logits/rejected": -1.1172536611557007, "logps/chosen": -1.0644960403442383, "logps/rejected": -2.434394598007202, "loss": 1.1255, "nll_loss": 1.2805155515670776, "rewards/accuracies": 0.875, "rewards/chosen": -0.10644960403442383, "rewards/margins": 0.13698989152908325, "rewards/rejected": -0.2434394657611847, "step": 3165 }, { "epoch": 1.9313710538355955, "grad_norm": 1.7559219598770142, "learning_rate": 4.9273729332516836e-06, "log_odds_chosen": 1.099539041519165, "log_odds_ratio": -0.4685564935207367, "logits/chosen": -0.725652813911438, "logits/rejected": -0.6361986398696899, "logps/chosen": -0.7071871757507324, "logps/rejected": -1.566387414932251, "loss": 1.0564, "nll_loss": 0.8590087294578552, "rewards/accuracies": 0.875, "rewards/chosen": -0.07071871310472488, "rewards/margins": 0.08592003583908081, "rewards/rejected": -0.1566387414932251, "step": 3166 }, { "epoch": 1.9319810889126123, "grad_norm": 4.701375961303711, "learning_rate": 4.9263931414574405e-06, "log_odds_chosen": 0.9270994067192078, "log_odds_ratio": -0.5159339308738708, "logits/chosen": -0.9116350412368774, "logits/rejected": -0.8611557483673096, "logps/chosen": -0.9209694862365723, "logps/rejected": -1.6017382144927979, "loss": 1.1524, "nll_loss": 1.1075341701507568, "rewards/accuracies": 0.75, "rewards/chosen": -0.09209693968296051, "rewards/margins": 0.06807689368724823, "rewards/rejected": -0.16017383337020874, "step": 3167 }, { "epoch": 1.9325911239896294, "grad_norm": 5.287292957305908, "learning_rate": 4.925413349663197e-06, "log_odds_chosen": 0.1924104392528534, "log_odds_ratio": -0.7003645896911621, "logits/chosen": -1.0030629634857178, "logits/rejected": -1.1204462051391602, "logps/chosen": -1.0345416069030762, "logps/rejected": -1.2308077812194824, "loss": 1.1542, "nll_loss": 1.1655527353286743, "rewards/accuracies": 0.5, "rewards/chosen": -0.10345415025949478, "rewards/margins": 0.01962662860751152, "rewards/rejected": -0.1230807825922966, "step": 3168 }, { "epoch": 1.9332011590666465, "grad_norm": 1.674017071723938, "learning_rate": 4.924433557868952e-06, "log_odds_chosen": 1.3636666536331177, "log_odds_ratio": -0.5962859392166138, "logits/chosen": -0.7212989330291748, "logits/rejected": -1.0039527416229248, "logps/chosen": -0.9144542813301086, "logps/rejected": -1.657508373260498, "loss": 1.1598, "nll_loss": 1.3415374755859375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0914454311132431, "rewards/margins": 0.07430542260408401, "rewards/rejected": -0.16575086116790771, "step": 3169 }, { "epoch": 1.9338111941436633, "grad_norm": 3.0137174129486084, "learning_rate": 4.923453766074709e-06, "log_odds_chosen": 1.3343467712402344, "log_odds_ratio": -0.3315051794052124, "logits/chosen": -0.8860174417495728, "logits/rejected": -0.9289869666099548, "logps/chosen": -0.8508375287055969, "logps/rejected": -1.8772891759872437, "loss": 1.0595, "nll_loss": 1.0623286962509155, "rewards/accuracies": 1.0, "rewards/chosen": -0.08508375287055969, "rewards/margins": 0.1026451587677002, "rewards/rejected": -0.1877289116382599, "step": 3170 }, { "epoch": 1.9344212292206802, "grad_norm": 2.0388760566711426, "learning_rate": 4.922473974280465e-06, "log_odds_chosen": 1.168283224105835, "log_odds_ratio": -0.500024139881134, "logits/chosen": -0.8601956367492676, "logits/rejected": -1.0111173391342163, "logps/chosen": -0.8038344383239746, "logps/rejected": -1.722222924232483, "loss": 1.1148, "nll_loss": 1.0924947261810303, "rewards/accuracies": 0.625, "rewards/chosen": -0.08038343489170074, "rewards/margins": 0.09183886647224426, "rewards/rejected": -0.1722223162651062, "step": 3171 }, { "epoch": 1.935031264297697, "grad_norm": 3.359619617462158, "learning_rate": 4.921494182486221e-06, "log_odds_chosen": 0.5068312287330627, "log_odds_ratio": -0.5486159324645996, "logits/chosen": -0.7649770975112915, "logits/rejected": -0.7788947820663452, "logps/chosen": -0.7329954504966736, "logps/rejected": -1.0508193969726562, "loss": 1.1672, "nll_loss": 1.050999641418457, "rewards/accuracies": 0.625, "rewards/chosen": -0.07329954206943512, "rewards/margins": 0.03178238868713379, "rewards/rejected": -0.10508193075656891, "step": 3172 }, { "epoch": 1.935641299374714, "grad_norm": 1.0527452230453491, "learning_rate": 4.920514390691978e-06, "log_odds_chosen": 2.5906553268432617, "log_odds_ratio": -0.21081186830997467, "logits/chosen": -0.758280336856842, "logits/rejected": -0.9577261209487915, "logps/chosen": -0.7060192823410034, "logps/rejected": -2.600688934326172, "loss": 1.1131, "nll_loss": 0.936324954032898, "rewards/accuracies": 1.0, "rewards/chosen": -0.0706019252538681, "rewards/margins": 0.18946696817874908, "rewards/rejected": -0.2600688934326172, "step": 3173 }, { "epoch": 1.936251334451731, "grad_norm": 1.5538877248764038, "learning_rate": 4.919534598897734e-06, "log_odds_chosen": 2.9766416549682617, "log_odds_ratio": -0.2410883903503418, "logits/chosen": -0.6140612363815308, "logits/rejected": -0.7271786332130432, "logps/chosen": -0.5189554691314697, "logps/rejected": -2.6356422901153564, "loss": 0.923, "nll_loss": 0.7978644371032715, "rewards/accuracies": 0.875, "rewards/chosen": -0.051895543932914734, "rewards/margins": 0.2116686999797821, "rewards/rejected": -0.26356422901153564, "step": 3174 }, { "epoch": 1.936861369528748, "grad_norm": 3.1498091220855713, "learning_rate": 4.9185548071034895e-06, "log_odds_chosen": 2.8434407711029053, "log_odds_ratio": -0.22196418046951294, "logits/chosen": -0.4410017430782318, "logits/rejected": -0.575050950050354, "logps/chosen": -0.3769187033176422, "logps/rejected": -2.3319411277770996, "loss": 0.8632, "nll_loss": 0.44709479808807373, "rewards/accuracies": 1.0, "rewards/chosen": -0.0376918688416481, "rewards/margins": 0.19550225138664246, "rewards/rejected": -0.23319412767887115, "step": 3175 }, { "epoch": 1.9374714046057648, "grad_norm": 1.5594862699508667, "learning_rate": 4.9175750153092465e-06, "log_odds_chosen": 0.28100234270095825, "log_odds_ratio": -0.677182674407959, "logits/chosen": -0.7781860828399658, "logits/rejected": -0.8881880044937134, "logps/chosen": -0.9342055320739746, "logps/rejected": -1.1075400114059448, "loss": 1.0524, "nll_loss": 1.149880290031433, "rewards/accuracies": 0.625, "rewards/chosen": -0.09342055767774582, "rewards/margins": 0.017333442345261574, "rewards/rejected": -0.11075399816036224, "step": 3176 }, { "epoch": 1.9380814396827817, "grad_norm": 1.8864648342132568, "learning_rate": 4.916595223515003e-06, "log_odds_chosen": 3.3975772857666016, "log_odds_ratio": -0.22024300694465637, "logits/chosen": -0.5547648668289185, "logits/rejected": -0.761220395565033, "logps/chosen": -0.5698922872543335, "logps/rejected": -3.2451133728027344, "loss": 0.9535, "nll_loss": 0.728908360004425, "rewards/accuracies": 0.875, "rewards/chosen": -0.05698923021554947, "rewards/margins": 0.2675221562385559, "rewards/rejected": -0.3245113492012024, "step": 3177 }, { "epoch": 1.9386914747597985, "grad_norm": 1.3692580461502075, "learning_rate": 4.9156154317207596e-06, "log_odds_chosen": 0.6287575364112854, "log_odds_ratio": -0.5808652639389038, "logits/chosen": -0.8999735116958618, "logits/rejected": -0.7423825263977051, "logps/chosen": -0.9855577945709229, "logps/rejected": -1.4760764837265015, "loss": 1.1181, "nll_loss": 1.1470654010772705, "rewards/accuracies": 0.75, "rewards/chosen": -0.098555788397789, "rewards/margins": 0.04905186966061592, "rewards/rejected": -0.14760765433311462, "step": 3178 }, { "epoch": 1.9393015098368156, "grad_norm": 2.1140635013580322, "learning_rate": 4.914635639926516e-06, "log_odds_chosen": 1.8617181777954102, "log_odds_ratio": -0.44465216994285583, "logits/chosen": -0.8778839707374573, "logits/rejected": -0.8471603393554688, "logps/chosen": -1.0037109851837158, "logps/rejected": -2.601992130279541, "loss": 1.2413, "nll_loss": 1.1128637790679932, "rewards/accuracies": 0.75, "rewards/chosen": -0.1003711074590683, "rewards/margins": 0.1598280966281891, "rewards/rejected": -0.2601992189884186, "step": 3179 }, { "epoch": 1.9399115449138327, "grad_norm": 1.7708163261413574, "learning_rate": 4.913655848132272e-06, "log_odds_chosen": 0.3259720206260681, "log_odds_ratio": -0.6780352592468262, "logits/chosen": -0.8088346123695374, "logits/rejected": -0.8591753840446472, "logps/chosen": -0.8676255941390991, "logps/rejected": -1.1579747200012207, "loss": 1.0286, "nll_loss": 1.151445984840393, "rewards/accuracies": 0.5, "rewards/chosen": -0.08676256239414215, "rewards/margins": 0.029034912586212158, "rewards/rejected": -0.11579747498035431, "step": 3180 }, { "epoch": 1.9405215799908495, "grad_norm": 2.998415470123291, "learning_rate": 4.912676056338028e-06, "log_odds_chosen": 1.2501883506774902, "log_odds_ratio": -0.4790070056915283, "logits/chosen": -0.6925077438354492, "logits/rejected": -0.662095844745636, "logps/chosen": -0.7485864162445068, "logps/rejected": -1.3465088605880737, "loss": 1.0941, "nll_loss": 1.0597963333129883, "rewards/accuracies": 0.75, "rewards/chosen": -0.0748586431145668, "rewards/margins": 0.059792254120111465, "rewards/rejected": -0.13465090095996857, "step": 3181 }, { "epoch": 1.9411316150678664, "grad_norm": 3.138136386871338, "learning_rate": 4.911696264543784e-06, "log_odds_chosen": 1.5063170194625854, "log_odds_ratio": -0.46385902166366577, "logits/chosen": -0.9160669445991516, "logits/rejected": -1.0013158321380615, "logps/chosen": -0.7111062407493591, "logps/rejected": -1.5423156023025513, "loss": 1.0773, "nll_loss": 1.1175401210784912, "rewards/accuracies": 0.75, "rewards/chosen": -0.07111062854528427, "rewards/margins": 0.0831209272146225, "rewards/rejected": -0.15423157811164856, "step": 3182 }, { "epoch": 1.9417416501448832, "grad_norm": 1.6232273578643799, "learning_rate": 4.91071647274954e-06, "log_odds_chosen": 1.8932316303253174, "log_odds_ratio": -0.32369133830070496, "logits/chosen": -0.8873392939567566, "logits/rejected": -0.8924078941345215, "logps/chosen": -0.8856519460678101, "logps/rejected": -2.369767665863037, "loss": 0.9931, "nll_loss": 1.0940171480178833, "rewards/accuracies": 0.875, "rewards/chosen": -0.08856519311666489, "rewards/margins": 0.1484115868806839, "rewards/rejected": -0.2369767725467682, "step": 3183 }, { "epoch": 1.9423516852219003, "grad_norm": 1.4614300727844238, "learning_rate": 4.909736680955297e-06, "log_odds_chosen": 0.35442042350769043, "log_odds_ratio": -0.7019535303115845, "logits/chosen": -0.9204188585281372, "logits/rejected": -0.8475841283798218, "logps/chosen": -0.9405575394630432, "logps/rejected": -1.263206958770752, "loss": 1.293, "nll_loss": 1.164302110671997, "rewards/accuracies": 0.375, "rewards/chosen": -0.09405575692653656, "rewards/margins": 0.03226494789123535, "rewards/rejected": -0.1263207048177719, "step": 3184 }, { "epoch": 1.9429617202989173, "grad_norm": 1.878021001815796, "learning_rate": 4.908756889161053e-06, "log_odds_chosen": 1.8394229412078857, "log_odds_ratio": -0.5251709222793579, "logits/chosen": -0.8143972158432007, "logits/rejected": -0.8121471405029297, "logps/chosen": -0.6871980428695679, "logps/rejected": -2.0490200519561768, "loss": 0.968, "nll_loss": 1.0148981809616089, "rewards/accuracies": 0.625, "rewards/chosen": -0.06871980428695679, "rewards/margins": 0.13618220388889313, "rewards/rejected": -0.20490199327468872, "step": 3185 }, { "epoch": 1.9435717553759342, "grad_norm": 1.760735273361206, "learning_rate": 4.9077770973668085e-06, "log_odds_chosen": 1.5233654975891113, "log_odds_ratio": -0.42624032497406006, "logits/chosen": -1.0330052375793457, "logits/rejected": -1.0088446140289307, "logps/chosen": -0.960136353969574, "logps/rejected": -2.1928815841674805, "loss": 1.0999, "nll_loss": 1.148252248764038, "rewards/accuracies": 0.75, "rewards/chosen": -0.0960136353969574, "rewards/margins": 0.12327451258897781, "rewards/rejected": -0.21928814053535461, "step": 3186 }, { "epoch": 1.944181790452951, "grad_norm": 3.591243028640747, "learning_rate": 4.9067973055725655e-06, "log_odds_chosen": 0.42706936597824097, "log_odds_ratio": -0.5912795662879944, "logits/chosen": -0.6895887851715088, "logits/rejected": -0.7021631002426147, "logps/chosen": -0.837996244430542, "logps/rejected": -1.110823392868042, "loss": 1.0926, "nll_loss": 0.9966909885406494, "rewards/accuracies": 0.5, "rewards/chosen": -0.08379962295293808, "rewards/margins": 0.02728271298110485, "rewards/rejected": -0.11108234524726868, "step": 3187 }, { "epoch": 1.9447918255299679, "grad_norm": 1.0718600749969482, "learning_rate": 4.905817513778322e-06, "log_odds_chosen": 1.251011848449707, "log_odds_ratio": -0.3508518636226654, "logits/chosen": -0.9545785784721375, "logits/rejected": -0.8839952945709229, "logps/chosen": -0.9332192540168762, "logps/rejected": -1.9171028137207031, "loss": 1.1653, "nll_loss": 0.9715815186500549, "rewards/accuracies": 0.875, "rewards/chosen": -0.09332192689180374, "rewards/margins": 0.09838837385177612, "rewards/rejected": -0.19171029329299927, "step": 3188 }, { "epoch": 1.945401860606985, "grad_norm": 1.5406725406646729, "learning_rate": 4.904837721984079e-06, "log_odds_chosen": 0.4468013048171997, "log_odds_ratio": -0.5639792680740356, "logits/chosen": -0.6660869717597961, "logits/rejected": -0.6732889413833618, "logps/chosen": -0.7417452931404114, "logps/rejected": -1.054589867591858, "loss": 1.0701, "nll_loss": 0.8923450708389282, "rewards/accuracies": 0.75, "rewards/chosen": -0.07417452335357666, "rewards/margins": 0.03128446638584137, "rewards/rejected": -0.10545898973941803, "step": 3189 }, { "epoch": 1.9460118956840018, "grad_norm": 1.5302225351333618, "learning_rate": 4.903857930189835e-06, "log_odds_chosen": 3.057016611099243, "log_odds_ratio": -0.2980351448059082, "logits/chosen": -0.8076933026313782, "logits/rejected": -0.8506377935409546, "logps/chosen": -0.6716271638870239, "logps/rejected": -3.0409963130950928, "loss": 1.1, "nll_loss": 0.9642805457115173, "rewards/accuracies": 0.875, "rewards/chosen": -0.06716272234916687, "rewards/margins": 0.23693694174289703, "rewards/rejected": -0.3040996491909027, "step": 3190 }, { "epoch": 1.9466219307610189, "grad_norm": 1.036439299583435, "learning_rate": 4.902878138395591e-06, "log_odds_chosen": 0.48272064328193665, "log_odds_ratio": -0.5613093972206116, "logits/chosen": -1.0242825746536255, "logits/rejected": -0.908515453338623, "logps/chosen": -0.9978452920913696, "logps/rejected": -1.3789491653442383, "loss": 1.1115, "nll_loss": 1.1524816751480103, "rewards/accuracies": 0.75, "rewards/chosen": -0.09978453069925308, "rewards/margins": 0.0381103940308094, "rewards/rejected": -0.13789492845535278, "step": 3191 }, { "epoch": 1.9472319658380357, "grad_norm": 4.773731231689453, "learning_rate": 4.901898346601347e-06, "log_odds_chosen": 1.7169010639190674, "log_odds_ratio": -0.37584421038627625, "logits/chosen": -0.6939708590507507, "logits/rejected": -0.7236474752426147, "logps/chosen": -0.6211909055709839, "logps/rejected": -1.9247632026672363, "loss": 0.9332, "nll_loss": 0.7136380076408386, "rewards/accuracies": 0.75, "rewards/chosen": -0.06211909279227257, "rewards/margins": 0.13035723567008972, "rewards/rejected": -0.1924763321876526, "step": 3192 }, { "epoch": 1.9478420009150526, "grad_norm": 7.275735378265381, "learning_rate": 4.900918554807103e-06, "log_odds_chosen": 0.31533730030059814, "log_odds_ratio": -0.7412481307983398, "logits/chosen": -0.8320909142494202, "logits/rejected": -0.8021373152732849, "logps/chosen": -0.8683454990386963, "logps/rejected": -1.0796890258789062, "loss": 1.1068, "nll_loss": 1.0532011985778809, "rewards/accuracies": 0.5, "rewards/chosen": -0.08683454990386963, "rewards/margins": 0.021134350448846817, "rewards/rejected": -0.10796890407800674, "step": 3193 }, { "epoch": 1.9484520359920694, "grad_norm": 2.328042507171631, "learning_rate": 4.899938763012859e-06, "log_odds_chosen": 1.8074781894683838, "log_odds_ratio": -0.5754228234291077, "logits/chosen": -0.9995144009590149, "logits/rejected": -0.8802316784858704, "logps/chosen": -0.7615374326705933, "logps/rejected": -2.355921506881714, "loss": 1.0725, "nll_loss": 0.9911291599273682, "rewards/accuracies": 0.625, "rewards/chosen": -0.07615374773740768, "rewards/margins": 0.15943841636180878, "rewards/rejected": -0.23559215664863586, "step": 3194 }, { "epoch": 1.9490620710690865, "grad_norm": 3.0985195636749268, "learning_rate": 4.898958971218616e-06, "log_odds_chosen": 1.7995893955230713, "log_odds_ratio": -0.3587972819805145, "logits/chosen": -0.7755189538002014, "logits/rejected": -0.7969903349876404, "logps/chosen": -0.7260501384735107, "logps/rejected": -2.10017728805542, "loss": 0.9668, "nll_loss": 0.8607136011123657, "rewards/accuracies": 0.75, "rewards/chosen": -0.07260501384735107, "rewards/margins": 0.13741269707679749, "rewards/rejected": -0.21001771092414856, "step": 3195 }, { "epoch": 1.9496721061461035, "grad_norm": 2.067699432373047, "learning_rate": 4.897979179424372e-06, "log_odds_chosen": 1.8455960750579834, "log_odds_ratio": -0.4840793013572693, "logits/chosen": -0.722282886505127, "logits/rejected": -0.8746249079704285, "logps/chosen": -0.7449204921722412, "logps/rejected": -2.1046295166015625, "loss": 1.0836, "nll_loss": 0.8713068962097168, "rewards/accuracies": 0.5, "rewards/chosen": -0.07449205964803696, "rewards/margins": 0.13597089052200317, "rewards/rejected": -0.21046295762062073, "step": 3196 }, { "epoch": 1.9502821412231204, "grad_norm": 1.2665482759475708, "learning_rate": 4.8969993876301284e-06, "log_odds_chosen": 2.0745062828063965, "log_odds_ratio": -0.3204595148563385, "logits/chosen": -0.746198832988739, "logits/rejected": -0.8009994029998779, "logps/chosen": -0.9407910704612732, "logps/rejected": -2.6087167263031006, "loss": 0.9868, "nll_loss": 1.0400019884109497, "rewards/accuracies": 0.875, "rewards/chosen": -0.09407910704612732, "rewards/margins": 0.16679254174232483, "rewards/rejected": -0.26087167859077454, "step": 3197 }, { "epoch": 1.9508921763001372, "grad_norm": 2.1168594360351562, "learning_rate": 4.8960195958358846e-06, "log_odds_chosen": 1.0522069931030273, "log_odds_ratio": -0.5052448511123657, "logits/chosen": -0.6244784593582153, "logits/rejected": -0.7500911951065063, "logps/chosen": -0.9132036566734314, "logps/rejected": -1.6497418880462646, "loss": 1.1254, "nll_loss": 1.0134190320968628, "rewards/accuracies": 0.5, "rewards/chosen": -0.09132036566734314, "rewards/margins": 0.07365381717681885, "rewards/rejected": -0.164974182844162, "step": 3198 }, { "epoch": 1.951502211377154, "grad_norm": 1.4995861053466797, "learning_rate": 4.895039804041641e-06, "log_odds_chosen": 0.9788792133331299, "log_odds_ratio": -0.4525911211967468, "logits/chosen": -0.7003226280212402, "logits/rejected": -0.8094329833984375, "logps/chosen": -0.7825883030891418, "logps/rejected": -1.4038703441619873, "loss": 0.9528, "nll_loss": 1.036588191986084, "rewards/accuracies": 0.625, "rewards/chosen": -0.07825882732868195, "rewards/margins": 0.062128208577632904, "rewards/rejected": -0.14038704335689545, "step": 3199 }, { "epoch": 1.9521122464541711, "grad_norm": 1.1700741052627563, "learning_rate": 4.894060012247397e-06, "log_odds_chosen": 1.490844488143921, "log_odds_ratio": -0.36600741744041443, "logits/chosen": -0.7403532862663269, "logits/rejected": -0.7299853563308716, "logps/chosen": -0.8158112168312073, "logps/rejected": -1.9553041458129883, "loss": 1.0622, "nll_loss": 1.0207839012145996, "rewards/accuracies": 0.875, "rewards/chosen": -0.08158112317323685, "rewards/margins": 0.11394929140806198, "rewards/rejected": -0.19553041458129883, "step": 3200 }, { "epoch": 1.9527222815311882, "grad_norm": 4.238185405731201, "learning_rate": 4.893080220453154e-06, "log_odds_chosen": 1.2295076847076416, "log_odds_ratio": -0.6553114652633667, "logits/chosen": -0.9109560251235962, "logits/rejected": -0.9247994422912598, "logps/chosen": -1.0081838369369507, "logps/rejected": -1.8268835544586182, "loss": 1.3154, "nll_loss": 1.481229305267334, "rewards/accuracies": 0.5, "rewards/chosen": -0.10081838071346283, "rewards/margins": 0.08186997473239899, "rewards/rejected": -0.18268835544586182, "step": 3201 }, { "epoch": 1.953332316608205, "grad_norm": 1.0511316061019897, "learning_rate": 4.89210042865891e-06, "log_odds_chosen": 1.2593462467193604, "log_odds_ratio": -0.5409144163131714, "logits/chosen": -0.7737048268318176, "logits/rejected": -0.7867151498794556, "logps/chosen": -1.196808099746704, "logps/rejected": -2.2436327934265137, "loss": 1.115, "nll_loss": 1.1325616836547852, "rewards/accuracies": 0.625, "rewards/chosen": -0.11968080699443817, "rewards/margins": 0.10468244552612305, "rewards/rejected": -0.22436325252056122, "step": 3202 }, { "epoch": 1.953942351685222, "grad_norm": 1.262508511543274, "learning_rate": 4.891120636864667e-06, "log_odds_chosen": 0.46468234062194824, "log_odds_ratio": -0.6458843350410461, "logits/chosen": -0.6631366610527039, "logits/rejected": -0.7352367639541626, "logps/chosen": -0.8222184181213379, "logps/rejected": -1.1674081087112427, "loss": 1.0205, "nll_loss": 1.1632930040359497, "rewards/accuracies": 0.5, "rewards/chosen": -0.08222184330224991, "rewards/margins": 0.03451897203922272, "rewards/rejected": -0.11674080789089203, "step": 3203 }, { "epoch": 1.9545523867622387, "grad_norm": 1.5941752195358276, "learning_rate": 4.890140845070422e-06, "log_odds_chosen": 2.933870553970337, "log_odds_ratio": -0.27730998396873474, "logits/chosen": -0.7978775501251221, "logits/rejected": -0.8629483580589294, "logps/chosen": -0.6121662259101868, "logps/rejected": -3.027305841445923, "loss": 0.9884, "nll_loss": 0.835247278213501, "rewards/accuracies": 0.875, "rewards/chosen": -0.061216626316308975, "rewards/margins": 0.2415139526128769, "rewards/rejected": -0.30273059010505676, "step": 3204 }, { "epoch": 1.9551624218392556, "grad_norm": 1.1251732110977173, "learning_rate": 4.889161053276178e-06, "log_odds_chosen": 0.5869818925857544, "log_odds_ratio": -0.4641333222389221, "logits/chosen": -0.7128545045852661, "logits/rejected": -0.4944385886192322, "logps/chosen": -0.900959312915802, "logps/rejected": -1.3048083782196045, "loss": 1.1749, "nll_loss": 1.0071394443511963, "rewards/accuracies": 1.0, "rewards/chosen": -0.09009592980146408, "rewards/margins": 0.04038490727543831, "rewards/rejected": -0.1304808259010315, "step": 3205 }, { "epoch": 1.9557724569162727, "grad_norm": 6.999551773071289, "learning_rate": 4.888181261481935e-06, "log_odds_chosen": 2.7712550163269043, "log_odds_ratio": -0.25695693492889404, "logits/chosen": -0.840820848941803, "logits/rejected": -0.794863224029541, "logps/chosen": -0.8131837844848633, "logps/rejected": -3.0995848178863525, "loss": 1.2262, "nll_loss": 1.1300218105316162, "rewards/accuracies": 1.0, "rewards/chosen": -0.08131837844848633, "rewards/margins": 0.2286400943994522, "rewards/rejected": -0.30995845794677734, "step": 3206 }, { "epoch": 1.9563824919932897, "grad_norm": 1.3370522260665894, "learning_rate": 4.887201469687691e-06, "log_odds_chosen": 2.5624756813049316, "log_odds_ratio": -0.3599144518375397, "logits/chosen": -0.8251229524612427, "logits/rejected": -0.8912021517753601, "logps/chosen": -0.780997633934021, "logps/rejected": -2.887242555618286, "loss": 1.0329, "nll_loss": 0.8880524039268494, "rewards/accuracies": 0.75, "rewards/chosen": -0.07809976488351822, "rewards/margins": 0.21062450110912323, "rewards/rejected": -0.28872424364089966, "step": 3207 }, { "epoch": 1.9569925270703066, "grad_norm": 2.544060707092285, "learning_rate": 4.8862216778934475e-06, "log_odds_chosen": 1.0365121364593506, "log_odds_ratio": -0.9745888710021973, "logits/chosen": -0.8735173940658569, "logits/rejected": -0.8209006190299988, "logps/chosen": -0.7986536026000977, "logps/rejected": -2.0536398887634277, "loss": 1.1778, "nll_loss": 0.9871491193771362, "rewards/accuracies": 0.375, "rewards/chosen": -0.07986535876989365, "rewards/margins": 0.12549863755702972, "rewards/rejected": -0.20536400377750397, "step": 3208 }, { "epoch": 1.9576025621473234, "grad_norm": 4.047481536865234, "learning_rate": 4.885241886099204e-06, "log_odds_chosen": 1.0240010023117065, "log_odds_ratio": -0.3830515444278717, "logits/chosen": -1.0100955963134766, "logits/rejected": -0.9230256080627441, "logps/chosen": -0.8292093276977539, "logps/rejected": -1.5307703018188477, "loss": 1.1653, "nll_loss": 1.0096502304077148, "rewards/accuracies": 0.875, "rewards/chosen": -0.08292093127965927, "rewards/margins": 0.07015610486268997, "rewards/rejected": -0.15307703614234924, "step": 3209 }, { "epoch": 1.9582125972243403, "grad_norm": 0.9467303156852722, "learning_rate": 4.88426209430496e-06, "log_odds_chosen": 1.5406250953674316, "log_odds_ratio": -0.36050349473953247, "logits/chosen": -0.8781349062919617, "logits/rejected": -0.9095138311386108, "logps/chosen": -0.7025279998779297, "logps/rejected": -1.8003405332565308, "loss": 0.9038, "nll_loss": 0.795314610004425, "rewards/accuracies": 0.875, "rewards/chosen": -0.07025279849767685, "rewards/margins": 0.10978125780820847, "rewards/rejected": -0.18003405630588531, "step": 3210 }, { "epoch": 1.9588226323013573, "grad_norm": 1.5507445335388184, "learning_rate": 4.883282302510716e-06, "log_odds_chosen": 2.0498523712158203, "log_odds_ratio": -0.29126405715942383, "logits/chosen": -0.8114604949951172, "logits/rejected": -0.9401339292526245, "logps/chosen": -0.7148810029029846, "logps/rejected": -2.2934353351593018, "loss": 1.2198, "nll_loss": 0.9772394895553589, "rewards/accuracies": 0.875, "rewards/chosen": -0.07148809731006622, "rewards/margins": 0.15785543620586395, "rewards/rejected": -0.22934354841709137, "step": 3211 }, { "epoch": 1.9594326673783744, "grad_norm": 2.219651699066162, "learning_rate": 4.882302510716473e-06, "log_odds_chosen": 2.371840238571167, "log_odds_ratio": -0.4185417890548706, "logits/chosen": -0.7459859848022461, "logits/rejected": -0.6971405744552612, "logps/chosen": -0.7136883735656738, "logps/rejected": -2.622779369354248, "loss": 0.9365, "nll_loss": 0.7483582496643066, "rewards/accuracies": 0.75, "rewards/chosen": -0.07136883586645126, "rewards/margins": 0.19090911746025085, "rewards/rejected": -0.2622779607772827, "step": 3212 }, { "epoch": 1.9600427024553913, "grad_norm": 1.8731876611709595, "learning_rate": 4.881322718922229e-06, "log_odds_chosen": 1.2079612016677856, "log_odds_ratio": -0.4409840703010559, "logits/chosen": -0.8525122404098511, "logits/rejected": -0.8291592001914978, "logps/chosen": -0.8041057586669922, "logps/rejected": -1.4941744804382324, "loss": 1.054, "nll_loss": 1.1703872680664062, "rewards/accuracies": 0.75, "rewards/chosen": -0.08041057735681534, "rewards/margins": 0.06900688260793686, "rewards/rejected": -0.1494174599647522, "step": 3213 }, { "epoch": 1.960652737532408, "grad_norm": 2.6086344718933105, "learning_rate": 4.880342927127985e-06, "log_odds_chosen": 1.8751754760742188, "log_odds_ratio": -0.2623186707496643, "logits/chosen": -0.6690616607666016, "logits/rejected": -0.7643951177597046, "logps/chosen": -0.5936283469200134, "logps/rejected": -1.8844690322875977, "loss": 1.1432, "nll_loss": 0.7498822808265686, "rewards/accuracies": 1.0, "rewards/chosen": -0.059362832456827164, "rewards/margins": 0.12908408045768738, "rewards/rejected": -0.18844690918922424, "step": 3214 }, { "epoch": 1.961262772609425, "grad_norm": 1.352926254272461, "learning_rate": 4.879363135333741e-06, "log_odds_chosen": 0.923136293888092, "log_odds_ratio": -0.5991853475570679, "logits/chosen": -1.0427842140197754, "logits/rejected": -0.9933752417564392, "logps/chosen": -0.9344190955162048, "logps/rejected": -1.7041707038879395, "loss": 1.1124, "nll_loss": 1.2264328002929688, "rewards/accuracies": 0.5, "rewards/chosen": -0.0934419110417366, "rewards/margins": 0.07697516679763794, "rewards/rejected": -0.17041708528995514, "step": 3215 }, { "epoch": 1.961872807686442, "grad_norm": 1.355675220489502, "learning_rate": 4.878383343539497e-06, "log_odds_chosen": 1.9533064365386963, "log_odds_ratio": -0.26949843764305115, "logits/chosen": -0.8076946139335632, "logits/rejected": -0.8958978652954102, "logps/chosen": -0.9423877000808716, "logps/rejected": -2.4688282012939453, "loss": 1.1205, "nll_loss": 0.9970598220825195, "rewards/accuracies": 0.875, "rewards/chosen": -0.0942387804389, "rewards/margins": 0.15264403820037842, "rewards/rejected": -0.246882826089859, "step": 3216 }, { "epoch": 1.9624828427634589, "grad_norm": 1.5485767126083374, "learning_rate": 4.877403551745254e-06, "log_odds_chosen": 1.2646245956420898, "log_odds_ratio": -0.5902014374732971, "logits/chosen": -0.8948424458503723, "logits/rejected": -0.8877366781234741, "logps/chosen": -0.9411200284957886, "logps/rejected": -1.8924123048782349, "loss": 1.1613, "nll_loss": 1.2053231000900269, "rewards/accuracies": 0.625, "rewards/chosen": -0.09411200881004333, "rewards/margins": 0.09512921422719955, "rewards/rejected": -0.1892412304878235, "step": 3217 }, { "epoch": 1.963092877840476, "grad_norm": 1.3522553443908691, "learning_rate": 4.87642375995101e-06, "log_odds_chosen": 2.165116786956787, "log_odds_ratio": -0.2104148268699646, "logits/chosen": -0.8007245659828186, "logits/rejected": -0.840011477470398, "logps/chosen": -0.7169246673583984, "logps/rejected": -2.2876555919647217, "loss": 1.0414, "nll_loss": 0.7465946674346924, "rewards/accuracies": 1.0, "rewards/chosen": -0.07169246673583984, "rewards/margins": 0.15707311034202576, "rewards/rejected": -0.2287655770778656, "step": 3218 }, { "epoch": 1.9637029129174928, "grad_norm": 1.4685786962509155, "learning_rate": 4.8754439681567665e-06, "log_odds_chosen": 1.1238529682159424, "log_odds_ratio": -0.4184931516647339, "logits/chosen": -0.7113537788391113, "logits/rejected": -0.617323637008667, "logps/chosen": -0.8110948801040649, "logps/rejected": -1.6082608699798584, "loss": 0.9824, "nll_loss": 0.9050337672233582, "rewards/accuracies": 0.75, "rewards/chosen": -0.08110949397087097, "rewards/margins": 0.07971660047769547, "rewards/rejected": -0.16082608699798584, "step": 3219 }, { "epoch": 1.9643129479945096, "grad_norm": 3.038734197616577, "learning_rate": 4.8744641763625235e-06, "log_odds_chosen": 1.377671718597412, "log_odds_ratio": -0.4272859990596771, "logits/chosen": -0.7255908846855164, "logits/rejected": -0.8572998046875, "logps/chosen": -0.8180264234542847, "logps/rejected": -1.660461187362671, "loss": 1.0868, "nll_loss": 1.3106410503387451, "rewards/accuracies": 0.75, "rewards/chosen": -0.08180265128612518, "rewards/margins": 0.08424346894025803, "rewards/rejected": -0.1660461276769638, "step": 3220 }, { "epoch": 1.9649229830715265, "grad_norm": 1.4001636505126953, "learning_rate": 4.873484384568279e-06, "log_odds_chosen": 3.175379753112793, "log_odds_ratio": -0.4255547523498535, "logits/chosen": -0.9303005933761597, "logits/rejected": -1.01693594455719, "logps/chosen": -0.7841024994850159, "logps/rejected": -3.5151712894439697, "loss": 1.0408, "nll_loss": 0.9050654172897339, "rewards/accuracies": 0.625, "rewards/chosen": -0.07841025292873383, "rewards/margins": 0.2731068730354309, "rewards/rejected": -0.3515171408653259, "step": 3221 }, { "epoch": 1.9655330181485435, "grad_norm": 8.289233207702637, "learning_rate": 4.872504592774035e-06, "log_odds_chosen": 0.6016110181808472, "log_odds_ratio": -0.6109645366668701, "logits/chosen": -0.6422958374023438, "logits/rejected": -0.7261682152748108, "logps/chosen": -0.8989226818084717, "logps/rejected": -1.3341447114944458, "loss": 1.0516, "nll_loss": 0.9746118783950806, "rewards/accuracies": 0.5, "rewards/chosen": -0.08989226073026657, "rewards/margins": 0.04352221637964249, "rewards/rejected": -0.13341447710990906, "step": 3222 }, { "epoch": 1.9661430532255606, "grad_norm": 3.901681661605835, "learning_rate": 4.871524800979792e-06, "log_odds_chosen": 1.8835477828979492, "log_odds_ratio": -0.35205793380737305, "logits/chosen": -0.7826775908470154, "logits/rejected": -0.8397346138954163, "logps/chosen": -0.81146639585495, "logps/rejected": -2.2661709785461426, "loss": 0.9399, "nll_loss": 0.9446791410446167, "rewards/accuracies": 0.75, "rewards/chosen": -0.08114664256572723, "rewards/margins": 0.14547045528888702, "rewards/rejected": -0.22661708295345306, "step": 3223 }, { "epoch": 1.9667530883025774, "grad_norm": 10.657815933227539, "learning_rate": 4.870545009185548e-06, "log_odds_chosen": 1.6831095218658447, "log_odds_ratio": -0.6867842674255371, "logits/chosen": -0.7195713520050049, "logits/rejected": -0.7657673358917236, "logps/chosen": -0.9009013772010803, "logps/rejected": -2.1868414878845215, "loss": 1.2357, "nll_loss": 0.9971362352371216, "rewards/accuracies": 0.625, "rewards/chosen": -0.09009014070034027, "rewards/margins": 0.1285940408706665, "rewards/rejected": -0.21868416666984558, "step": 3224 }, { "epoch": 1.9673631233795943, "grad_norm": 1.237013339996338, "learning_rate": 4.869565217391304e-06, "log_odds_chosen": 1.577815055847168, "log_odds_ratio": -0.5029335021972656, "logits/chosen": -1.065629243850708, "logits/rejected": -1.0858213901519775, "logps/chosen": -0.9713888764381409, "logps/rejected": -2.3171730041503906, "loss": 1.2282, "nll_loss": 1.2883886098861694, "rewards/accuracies": 0.75, "rewards/chosen": -0.09713888168334961, "rewards/margins": 0.1345784068107605, "rewards/rejected": -0.2317172735929489, "step": 3225 }, { "epoch": 1.9679731584566111, "grad_norm": 2.02291202545166, "learning_rate": 4.86858542559706e-06, "log_odds_chosen": 1.724044680595398, "log_odds_ratio": -0.3289507031440735, "logits/chosen": -0.9110183119773865, "logits/rejected": -0.896538257598877, "logps/chosen": -1.175551414489746, "logps/rejected": -2.547114372253418, "loss": 1.0734, "nll_loss": 1.2551857233047485, "rewards/accuracies": 0.875, "rewards/chosen": -0.11755514144897461, "rewards/margins": 0.13715630769729614, "rewards/rejected": -0.25471144914627075, "step": 3226 }, { "epoch": 1.9685831935336282, "grad_norm": 1.9894335269927979, "learning_rate": 4.867605633802816e-06, "log_odds_chosen": 2.4756414890289307, "log_odds_ratio": -0.3102203905582428, "logits/chosen": -0.7566052079200745, "logits/rejected": -0.7808799743652344, "logps/chosen": -0.6285523176193237, "logps/rejected": -2.5646026134490967, "loss": 0.9148, "nll_loss": 0.7792279720306396, "rewards/accuracies": 0.75, "rewards/chosen": -0.06285522878170013, "rewards/margins": 0.19360502064228058, "rewards/rejected": -0.2564602494239807, "step": 3227 }, { "epoch": 1.9691932286106453, "grad_norm": 3.7103545665740967, "learning_rate": 4.8666258420085725e-06, "log_odds_chosen": 2.7837467193603516, "log_odds_ratio": -0.27721744775772095, "logits/chosen": -0.5569703578948975, "logits/rejected": -0.7129952907562256, "logps/chosen": -0.6003292798995972, "logps/rejected": -2.824406385421753, "loss": 1.0881, "nll_loss": 0.9253441095352173, "rewards/accuracies": 0.75, "rewards/chosen": -0.060032930225133896, "rewards/margins": 0.222407728433609, "rewards/rejected": -0.2824406921863556, "step": 3228 }, { "epoch": 1.9698032636876621, "grad_norm": 1.6280471086502075, "learning_rate": 4.8656460502143294e-06, "log_odds_chosen": 0.8079237937927246, "log_odds_ratio": -0.46414291858673096, "logits/chosen": -0.6135032176971436, "logits/rejected": -0.7849127054214478, "logps/chosen": -0.7960331439971924, "logps/rejected": -1.2920501232147217, "loss": 1.1239, "nll_loss": 0.8811429738998413, "rewards/accuracies": 0.75, "rewards/chosen": -0.07960331439971924, "rewards/margins": 0.04960170388221741, "rewards/rejected": -0.12920501828193665, "step": 3229 }, { "epoch": 1.970413298764679, "grad_norm": 1.4543043375015259, "learning_rate": 4.8646662584200855e-06, "log_odds_chosen": 0.3851696848869324, "log_odds_ratio": -0.6085725426673889, "logits/chosen": -0.9993644952774048, "logits/rejected": -0.9331170320510864, "logps/chosen": -0.9281848669052124, "logps/rejected": -1.184309720993042, "loss": 1.1315, "nll_loss": 1.1682305335998535, "rewards/accuracies": 0.75, "rewards/chosen": -0.0928184911608696, "rewards/margins": 0.025612488389015198, "rewards/rejected": -0.1184309795498848, "step": 3230 }, { "epoch": 1.9710233338416958, "grad_norm": 1.4858181476593018, "learning_rate": 4.8636864666258425e-06, "log_odds_chosen": 0.3990520238876343, "log_odds_ratio": -0.5753686428070068, "logits/chosen": -0.9875926971435547, "logits/rejected": -0.8041112422943115, "logps/chosen": -0.840648353099823, "logps/rejected": -1.1199679374694824, "loss": 1.1484, "nll_loss": 0.937920868396759, "rewards/accuracies": 0.625, "rewards/chosen": -0.08406484127044678, "rewards/margins": 0.02793196216225624, "rewards/rejected": -0.11199679970741272, "step": 3231 }, { "epoch": 1.9716333689187129, "grad_norm": 2.1172242164611816, "learning_rate": 4.862706674831598e-06, "log_odds_chosen": 2.037118434906006, "log_odds_ratio": -0.22691906988620758, "logits/chosen": -0.616887092590332, "logits/rejected": -0.9526239633560181, "logps/chosen": -0.8100337982177734, "logps/rejected": -2.3527274131774902, "loss": 1.2559, "nll_loss": 1.0263035297393799, "rewards/accuracies": 1.0, "rewards/chosen": -0.08100338280200958, "rewards/margins": 0.15426938235759735, "rewards/rejected": -0.23527276515960693, "step": 3232 }, { "epoch": 1.9722434039957297, "grad_norm": 1.563450813293457, "learning_rate": 4.861726883037354e-06, "log_odds_chosen": 1.7745740413665771, "log_odds_ratio": -0.3982160687446594, "logits/chosen": -0.7257934808731079, "logits/rejected": -0.7864668965339661, "logps/chosen": -0.6533870697021484, "logps/rejected": -1.8997178077697754, "loss": 0.9202, "nll_loss": 0.9263691902160645, "rewards/accuracies": 0.75, "rewards/chosen": -0.06533870846033096, "rewards/margins": 0.1246330738067627, "rewards/rejected": -0.18997178971767426, "step": 3233 }, { "epoch": 1.9728534390727468, "grad_norm": 1.5493459701538086, "learning_rate": 4.860747091243111e-06, "log_odds_chosen": 2.418735980987549, "log_odds_ratio": -0.4752868413925171, "logits/chosen": -0.8161255717277527, "logits/rejected": -0.824981153011322, "logps/chosen": -0.9389989972114563, "logps/rejected": -3.124495506286621, "loss": 0.944, "nll_loss": 0.9139696955680847, "rewards/accuracies": 0.75, "rewards/chosen": -0.09389990568161011, "rewards/margins": 0.21854963898658752, "rewards/rejected": -0.31244951486587524, "step": 3234 }, { "epoch": 1.9734634741497636, "grad_norm": 9.386765480041504, "learning_rate": 4.859767299448867e-06, "log_odds_chosen": 0.7941097021102905, "log_odds_ratio": -0.535788893699646, "logits/chosen": -0.7695637941360474, "logits/rejected": -0.7705100774765015, "logps/chosen": -0.8576263785362244, "logps/rejected": -1.3483960628509521, "loss": 1.2201, "nll_loss": 0.9279859662055969, "rewards/accuracies": 0.625, "rewards/chosen": -0.0857626274228096, "rewards/margins": 0.049076974391937256, "rewards/rejected": -0.13483962416648865, "step": 3235 }, { "epoch": 1.9740735092267805, "grad_norm": 3.5255258083343506, "learning_rate": 4.858787507654623e-06, "log_odds_chosen": 3.3725953102111816, "log_odds_ratio": -0.17428945004940033, "logits/chosen": -0.5973767638206482, "logits/rejected": -0.8611962795257568, "logps/chosen": -0.8548794984817505, "logps/rejected": -3.6634035110473633, "loss": 1.0591, "nll_loss": 0.9733943343162537, "rewards/accuracies": 1.0, "rewards/chosen": -0.08548794686794281, "rewards/margins": 0.28085243701934814, "rewards/rejected": -0.36634036898612976, "step": 3236 }, { "epoch": 1.9746835443037973, "grad_norm": 2.980924367904663, "learning_rate": 4.85780771586038e-06, "log_odds_chosen": 1.099050760269165, "log_odds_ratio": -0.4485261142253876, "logits/chosen": -0.8792245388031006, "logits/rejected": -0.7934010624885559, "logps/chosen": -0.8318290710449219, "logps/rejected": -1.5794771909713745, "loss": 1.0492, "nll_loss": 1.1014196872711182, "rewards/accuracies": 0.625, "rewards/chosen": -0.0831829160451889, "rewards/margins": 0.07476481050252914, "rewards/rejected": -0.15794771909713745, "step": 3237 }, { "epoch": 1.9752935793808144, "grad_norm": 1.2318390607833862, "learning_rate": 4.856827924066135e-06, "log_odds_chosen": 0.5561680793762207, "log_odds_ratio": -0.5452020168304443, "logits/chosen": -0.9781268835067749, "logits/rejected": -0.8911054730415344, "logps/chosen": -0.803911030292511, "logps/rejected": -1.114537239074707, "loss": 0.9754, "nll_loss": 0.8909404873847961, "rewards/accuracies": 0.625, "rewards/chosen": -0.08039110153913498, "rewards/margins": 0.031062619760632515, "rewards/rejected": -0.11145372688770294, "step": 3238 }, { "epoch": 1.9759036144578315, "grad_norm": 3.9675252437591553, "learning_rate": 4.8558481322718915e-06, "log_odds_chosen": 2.303959608078003, "log_odds_ratio": -0.2976524233818054, "logits/chosen": -0.954808235168457, "logits/rejected": -0.7687167525291443, "logps/chosen": -1.056075930595398, "logps/rejected": -3.049531936645508, "loss": 1.2305, "nll_loss": 1.1393611431121826, "rewards/accuracies": 0.875, "rewards/chosen": -0.10560759156942368, "rewards/margins": 0.19934560358524323, "rewards/rejected": -0.3049532175064087, "step": 3239 }, { "epoch": 1.9765136495348483, "grad_norm": 3.3684537410736084, "learning_rate": 4.8548683404776485e-06, "log_odds_chosen": 0.6190025806427002, "log_odds_ratio": -0.5620608329772949, "logits/chosen": -0.7573691606521606, "logits/rejected": -0.8642758131027222, "logps/chosen": -0.9585903882980347, "logps/rejected": -1.3239293098449707, "loss": 1.0054, "nll_loss": 1.231374740600586, "rewards/accuracies": 0.5, "rewards/chosen": -0.09585903584957123, "rewards/margins": 0.0365338996052742, "rewards/rejected": -0.13239294290542603, "step": 3240 }, { "epoch": 1.9771236846118652, "grad_norm": 1.066178798675537, "learning_rate": 4.853888548683405e-06, "log_odds_chosen": 2.1802914142608643, "log_odds_ratio": -0.37710919976234436, "logits/chosen": -0.6888167858123779, "logits/rejected": -0.7951495051383972, "logps/chosen": -0.7298429012298584, "logps/rejected": -2.341949462890625, "loss": 1.0281, "nll_loss": 0.899688720703125, "rewards/accuracies": 0.75, "rewards/chosen": -0.07298428565263748, "rewards/margins": 0.16121067106723785, "rewards/rejected": -0.23419494926929474, "step": 3241 }, { "epoch": 1.977733719688882, "grad_norm": 3.474036693572998, "learning_rate": 4.852908756889161e-06, "log_odds_chosen": 0.7997348308563232, "log_odds_ratio": -0.7508952617645264, "logits/chosen": -0.8107732534408569, "logits/rejected": -0.9650845527648926, "logps/chosen": -0.9351159334182739, "logps/rejected": -1.527159333229065, "loss": 1.1025, "nll_loss": 1.3259329795837402, "rewards/accuracies": 0.5, "rewards/chosen": -0.09351160377264023, "rewards/margins": 0.05920432507991791, "rewards/rejected": -0.15271592140197754, "step": 3242 }, { "epoch": 1.978343754765899, "grad_norm": 1.7885491847991943, "learning_rate": 4.851928965094917e-06, "log_odds_chosen": 1.8477939367294312, "log_odds_ratio": -0.4538092315196991, "logits/chosen": -0.668686032295227, "logits/rejected": -0.7325792908668518, "logps/chosen": -1.089127779006958, "logps/rejected": -2.6914560794830322, "loss": 1.0588, "nll_loss": 1.1785404682159424, "rewards/accuracies": 0.875, "rewards/chosen": -0.10891278088092804, "rewards/margins": 0.16023282706737518, "rewards/rejected": -0.2691456079483032, "step": 3243 }, { "epoch": 1.978953789842916, "grad_norm": 7.268209457397461, "learning_rate": 4.850949173300673e-06, "log_odds_chosen": 0.5557130575180054, "log_odds_ratio": -0.6331322193145752, "logits/chosen": -1.0144078731536865, "logits/rejected": -0.9480323791503906, "logps/chosen": -0.8146464824676514, "logps/rejected": -1.1968247890472412, "loss": 1.0666, "nll_loss": 1.2348895072937012, "rewards/accuracies": 0.625, "rewards/chosen": -0.08146466314792633, "rewards/margins": 0.03821781650185585, "rewards/rejected": -0.11968247592449188, "step": 3244 }, { "epoch": 1.979563824919933, "grad_norm": 9.443563461303711, "learning_rate": 4.84996938150643e-06, "log_odds_chosen": 2.208803415298462, "log_odds_ratio": -0.37443429231643677, "logits/chosen": -0.6008838415145874, "logits/rejected": -0.5471155643463135, "logps/chosen": -0.6559154987335205, "logps/rejected": -2.272305488586426, "loss": 1.0138, "nll_loss": 0.8867379426956177, "rewards/accuracies": 0.75, "rewards/chosen": -0.06559155136346817, "rewards/margins": 0.161639004945755, "rewards/rejected": -0.22723054885864258, "step": 3245 }, { "epoch": 1.9801738599969498, "grad_norm": 3.6503634452819824, "learning_rate": 4.848989589712186e-06, "log_odds_chosen": 1.4711847305297852, "log_odds_ratio": -0.46665677428245544, "logits/chosen": -0.6486777663230896, "logits/rejected": -0.7309650182723999, "logps/chosen": -0.7365027666091919, "logps/rejected": -1.7672674655914307, "loss": 1.2315, "nll_loss": 1.0766396522521973, "rewards/accuracies": 0.75, "rewards/chosen": -0.07365027070045471, "rewards/margins": 0.10307648777961731, "rewards/rejected": -0.17672675848007202, "step": 3246 }, { "epoch": 1.9807838950739667, "grad_norm": 1.5384984016418457, "learning_rate": 4.848009797917942e-06, "log_odds_chosen": 1.955332636833191, "log_odds_ratio": -0.39699631929397583, "logits/chosen": -0.6498295664787292, "logits/rejected": -0.7714557647705078, "logps/chosen": -0.7667102217674255, "logps/rejected": -2.3366196155548096, "loss": 0.9928, "nll_loss": 0.8721123933792114, "rewards/accuracies": 0.75, "rewards/chosen": -0.07667101919651031, "rewards/margins": 0.15699094533920288, "rewards/rejected": -0.233661949634552, "step": 3247 }, { "epoch": 1.9813939301509835, "grad_norm": 1.544640064239502, "learning_rate": 4.847030006123699e-06, "log_odds_chosen": 3.141206979751587, "log_odds_ratio": -0.16606804728507996, "logits/chosen": -0.7776825428009033, "logits/rejected": -0.8117425441741943, "logps/chosen": -0.7468591928482056, "logps/rejected": -3.252465009689331, "loss": 1.0546, "nll_loss": 1.2456032037734985, "rewards/accuracies": 1.0, "rewards/chosen": -0.07468591630458832, "rewards/margins": 0.25056055188179016, "rewards/rejected": -0.32524651288986206, "step": 3248 }, { "epoch": 1.9820039652280006, "grad_norm": 1.496017336845398, "learning_rate": 4.846050214329454e-06, "log_odds_chosen": 1.0165055990219116, "log_odds_ratio": -0.7640368342399597, "logits/chosen": -0.745673656463623, "logits/rejected": -0.7035654783248901, "logps/chosen": -0.8907541036605835, "logps/rejected": -1.7821999788284302, "loss": 1.0545, "nll_loss": 0.9971440434455872, "rewards/accuracies": 0.375, "rewards/chosen": -0.08907541632652283, "rewards/margins": 0.08914460241794586, "rewards/rejected": -0.1782200038433075, "step": 3249 }, { "epoch": 1.9826140003050177, "grad_norm": 1.5903308391571045, "learning_rate": 4.8450704225352105e-06, "log_odds_chosen": 0.18204356729984283, "log_odds_ratio": -0.6488069891929626, "logits/chosen": -0.876334547996521, "logits/rejected": -0.909410834312439, "logps/chosen": -0.8908795118331909, "logps/rejected": -1.000595211982727, "loss": 1.2146, "nll_loss": 1.1569205522537231, "rewards/accuracies": 0.375, "rewards/chosen": -0.08908794820308685, "rewards/margins": 0.010971570387482643, "rewards/rejected": -0.10005952417850494, "step": 3250 }, { "epoch": 1.9832240353820345, "grad_norm": 4.136093616485596, "learning_rate": 4.8440906307409675e-06, "log_odds_chosen": 1.2928118705749512, "log_odds_ratio": -0.39241644740104675, "logits/chosen": -0.7764843702316284, "logits/rejected": -0.9379758238792419, "logps/chosen": -0.8878496885299683, "logps/rejected": -1.7186732292175293, "loss": 1.2579, "nll_loss": 1.296691656112671, "rewards/accuracies": 0.875, "rewards/chosen": -0.08878496289253235, "rewards/margins": 0.08308236300945282, "rewards/rejected": -0.17186734080314636, "step": 3251 }, { "epoch": 1.9838340704590514, "grad_norm": 1.3834688663482666, "learning_rate": 4.843110838946724e-06, "log_odds_chosen": 0.8930197954177856, "log_odds_ratio": -0.5405827760696411, "logits/chosen": -0.6635875701904297, "logits/rejected": -0.6006404161453247, "logps/chosen": -0.7872273325920105, "logps/rejected": -1.423034906387329, "loss": 1.0948, "nll_loss": 0.9974344968795776, "rewards/accuracies": 0.75, "rewards/chosen": -0.07872273027896881, "rewards/margins": 0.06358076632022858, "rewards/rejected": -0.1423034965991974, "step": 3252 }, { "epoch": 1.9844441055360682, "grad_norm": 1.4155157804489136, "learning_rate": 4.84213104715248e-06, "log_odds_chosen": 1.7410809993743896, "log_odds_ratio": -0.39865463972091675, "logits/chosen": -0.8281679153442383, "logits/rejected": -0.8373910188674927, "logps/chosen": -0.842870831489563, "logps/rejected": -2.102503776550293, "loss": 1.2587, "nll_loss": 1.0279247760772705, "rewards/accuracies": 0.75, "rewards/chosen": -0.08428707718849182, "rewards/margins": 0.12596330046653748, "rewards/rejected": -0.2102503776550293, "step": 3253 }, { "epoch": 1.9850541406130853, "grad_norm": 4.3024678230285645, "learning_rate": 4.841151255358237e-06, "log_odds_chosen": 1.142642617225647, "log_odds_ratio": -0.4287029206752777, "logits/chosen": -0.5505132079124451, "logits/rejected": -0.5774904489517212, "logps/chosen": -0.8638915419578552, "logps/rejected": -1.6446881294250488, "loss": 1.0126, "nll_loss": 0.9184305667877197, "rewards/accuracies": 0.875, "rewards/chosen": -0.08638915419578552, "rewards/margins": 0.07807967066764832, "rewards/rejected": -0.16446882486343384, "step": 3254 }, { "epoch": 1.9856641756901023, "grad_norm": 2.5715725421905518, "learning_rate": 4.840171463563992e-06, "log_odds_chosen": 1.9485316276550293, "log_odds_ratio": -0.39924925565719604, "logits/chosen": -0.6475752592086792, "logits/rejected": -0.7213708758354187, "logps/chosen": -0.5733436346054077, "logps/rejected": -1.8640162944793701, "loss": 0.9658, "nll_loss": 0.6974142789840698, "rewards/accuracies": 0.625, "rewards/chosen": -0.05733436346054077, "rewards/margins": 0.12906727194786072, "rewards/rejected": -0.1864016205072403, "step": 3255 }, { "epoch": 1.9862742107671192, "grad_norm": 1.2188355922698975, "learning_rate": 4.839191671769748e-06, "log_odds_chosen": 0.8610986471176147, "log_odds_ratio": -0.4889141619205475, "logits/chosen": -0.8602010011672974, "logits/rejected": -0.8445929288864136, "logps/chosen": -0.792215883731842, "logps/rejected": -1.3953135013580322, "loss": 1.0931, "nll_loss": 0.8395626544952393, "rewards/accuracies": 0.75, "rewards/chosen": -0.07922159880399704, "rewards/margins": 0.0603097565472126, "rewards/rejected": -0.13953134417533875, "step": 3256 }, { "epoch": 1.986884245844136, "grad_norm": 1.8274967670440674, "learning_rate": 4.838211879975505e-06, "log_odds_chosen": 0.737577497959137, "log_odds_ratio": -0.6186408400535583, "logits/chosen": -1.0530390739440918, "logits/rejected": -0.8870104551315308, "logps/chosen": -0.7939327955245972, "logps/rejected": -1.3341468572616577, "loss": 1.0311, "nll_loss": 1.0615437030792236, "rewards/accuracies": 0.5, "rewards/chosen": -0.07939328253269196, "rewards/margins": 0.054021406918764114, "rewards/rejected": -0.13341468572616577, "step": 3257 }, { "epoch": 1.9874942809211529, "grad_norm": 2.159026861190796, "learning_rate": 4.837232088181261e-06, "log_odds_chosen": 0.7573711276054382, "log_odds_ratio": -0.5604733228683472, "logits/chosen": -0.6400688290596008, "logits/rejected": -0.651044487953186, "logps/chosen": -0.8386270999908447, "logps/rejected": -1.3679949045181274, "loss": 1.0742, "nll_loss": 0.9261248111724854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08386270701885223, "rewards/margins": 0.05293677747249603, "rewards/rejected": -0.13679948449134827, "step": 3258 }, { "epoch": 1.98810431599817, "grad_norm": 1.7438949346542358, "learning_rate": 4.836252296387018e-06, "log_odds_chosen": 1.721986174583435, "log_odds_ratio": -0.5711618661880493, "logits/chosen": -0.8315178155899048, "logits/rejected": -0.9061145782470703, "logps/chosen": -0.7985076904296875, "logps/rejected": -2.2766146659851074, "loss": 1.1728, "nll_loss": 1.1058491468429565, "rewards/accuracies": 0.625, "rewards/chosen": -0.07985077798366547, "rewards/margins": 0.147810697555542, "rewards/rejected": -0.22766144573688507, "step": 3259 }, { "epoch": 1.9887143510751868, "grad_norm": 2.8341450691223145, "learning_rate": 4.835272504592774e-06, "log_odds_chosen": 3.3895606994628906, "log_odds_ratio": -0.22311937808990479, "logits/chosen": -0.6919516324996948, "logits/rejected": -0.7621110081672668, "logps/chosen": -0.7240586280822754, "logps/rejected": -3.4424667358398438, "loss": 1.0522, "nll_loss": 0.9704632759094238, "rewards/accuracies": 0.875, "rewards/chosen": -0.0724058598279953, "rewards/margins": 0.27184081077575684, "rewards/rejected": -0.34424668550491333, "step": 3260 }, { "epoch": 1.9893243861522039, "grad_norm": 1.3257094621658325, "learning_rate": 4.8342927127985296e-06, "log_odds_chosen": 2.108927011489868, "log_odds_ratio": -0.2720298767089844, "logits/chosen": -0.3772435784339905, "logits/rejected": -0.42836394906044006, "logps/chosen": -0.505379855632782, "logps/rejected": -2.0071263313293457, "loss": 0.9688, "nll_loss": 0.5965878963470459, "rewards/accuracies": 0.875, "rewards/chosen": -0.05053798854351044, "rewards/margins": 0.15017464756965637, "rewards/rejected": -0.2007126361131668, "step": 3261 }, { "epoch": 1.9899344212292207, "grad_norm": 1.8186709880828857, "learning_rate": 4.8333129210042865e-06, "log_odds_chosen": 0.8904392719268799, "log_odds_ratio": -0.6185429096221924, "logits/chosen": -0.8122639060020447, "logits/rejected": -0.8085033893585205, "logps/chosen": -0.7874140739440918, "logps/rejected": -1.4408042430877686, "loss": 0.9521, "nll_loss": 0.9416394233703613, "rewards/accuracies": 0.625, "rewards/chosen": -0.0787414088845253, "rewards/margins": 0.06533902883529663, "rewards/rejected": -0.14408043026924133, "step": 3262 }, { "epoch": 1.9905444563062376, "grad_norm": 1.967345118522644, "learning_rate": 4.832333129210043e-06, "log_odds_chosen": 3.370875597000122, "log_odds_ratio": -0.21038362383842468, "logits/chosen": -0.7362838387489319, "logits/rejected": -0.7402755618095398, "logps/chosen": -0.6650819778442383, "logps/rejected": -3.428406000137329, "loss": 0.9855, "nll_loss": 1.0492663383483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.06650819629430771, "rewards/margins": 0.27633243799209595, "rewards/rejected": -0.34284061193466187, "step": 3263 }, { "epoch": 1.9911544913832544, "grad_norm": 3.056523084640503, "learning_rate": 4.831353337415799e-06, "log_odds_chosen": 2.7811694145202637, "log_odds_ratio": -0.26181527972221375, "logits/chosen": -0.8735873103141785, "logits/rejected": -0.89757239818573, "logps/chosen": -0.8071427345275879, "logps/rejected": -3.1520748138427734, "loss": 1.3207, "nll_loss": 1.174939513206482, "rewards/accuracies": 0.875, "rewards/chosen": -0.08071427047252655, "rewards/margins": 0.234493225812912, "rewards/rejected": -0.31520748138427734, "step": 3264 }, { "epoch": 1.9917645264602715, "grad_norm": 1.7722649574279785, "learning_rate": 4.830373545621556e-06, "log_odds_chosen": 1.5348637104034424, "log_odds_ratio": -0.5683839321136475, "logits/chosen": -0.6060521006584167, "logits/rejected": -0.579740047454834, "logps/chosen": -0.694651186466217, "logps/rejected": -1.9353114366531372, "loss": 1.0984, "nll_loss": 0.9244412183761597, "rewards/accuracies": 0.625, "rewards/chosen": -0.06946512311697006, "rewards/margins": 0.12406601011753082, "rewards/rejected": -0.1935311257839203, "step": 3265 }, { "epoch": 1.9923745615372885, "grad_norm": 2.5637223720550537, "learning_rate": 4.829393753827311e-06, "log_odds_chosen": 0.9611517190933228, "log_odds_ratio": -0.7196532487869263, "logits/chosen": -0.913231611251831, "logits/rejected": -0.7444536089897156, "logps/chosen": -0.9118808507919312, "logps/rejected": -1.7941454648971558, "loss": 0.9622, "nll_loss": 0.9335172772407532, "rewards/accuracies": 0.375, "rewards/chosen": -0.09118808805942535, "rewards/margins": 0.08822645246982574, "rewards/rejected": -0.1794145405292511, "step": 3266 }, { "epoch": 1.9929845966143054, "grad_norm": 1.646619439125061, "learning_rate": 4.828413962033067e-06, "log_odds_chosen": 0.3676445484161377, "log_odds_ratio": -0.8170697689056396, "logits/chosen": -0.9327845573425293, "logits/rejected": -0.8731573820114136, "logps/chosen": -1.0898219347000122, "logps/rejected": -1.4413763284683228, "loss": 1.07, "nll_loss": 1.1956225633621216, "rewards/accuracies": 0.375, "rewards/chosen": -0.10898219048976898, "rewards/margins": 0.03515544533729553, "rewards/rejected": -0.14413763582706451, "step": 3267 }, { "epoch": 1.9935946316913222, "grad_norm": 1.2164191007614136, "learning_rate": 4.827434170238824e-06, "log_odds_chosen": 0.9019318222999573, "log_odds_ratio": -0.48653337359428406, "logits/chosen": -0.7955577373504639, "logits/rejected": -0.6907128691673279, "logps/chosen": -0.8920221328735352, "logps/rejected": -1.5074204206466675, "loss": 1.2427, "nll_loss": 1.213369607925415, "rewards/accuracies": 0.625, "rewards/chosen": -0.08920222520828247, "rewards/margins": 0.06153983250260353, "rewards/rejected": -0.1507420539855957, "step": 3268 }, { "epoch": 1.994204666768339, "grad_norm": 2.791943311691284, "learning_rate": 4.82645437844458e-06, "log_odds_chosen": 1.1111518144607544, "log_odds_ratio": -0.52290278673172, "logits/chosen": -0.8814469575881958, "logits/rejected": -0.8165335655212402, "logps/chosen": -0.9908477067947388, "logps/rejected": -1.8784211874008179, "loss": 0.9575, "nll_loss": 1.1361935138702393, "rewards/accuracies": 0.625, "rewards/chosen": -0.0990847647190094, "rewards/margins": 0.08875734359025955, "rewards/rejected": -0.18784210085868835, "step": 3269 }, { "epoch": 1.9948147018453561, "grad_norm": 3.6156394481658936, "learning_rate": 4.825474586650337e-06, "log_odds_chosen": 1.6219290494918823, "log_odds_ratio": -0.46916627883911133, "logits/chosen": -0.7343230247497559, "logits/rejected": -0.9016472697257996, "logps/chosen": -0.6780942678451538, "logps/rejected": -1.8572560548782349, "loss": 1.0931, "nll_loss": 1.1033304929733276, "rewards/accuracies": 0.75, "rewards/chosen": -0.06780943274497986, "rewards/margins": 0.11791618168354034, "rewards/rejected": -0.1857256144285202, "step": 3270 }, { "epoch": 1.995424736922373, "grad_norm": 2.252110481262207, "learning_rate": 4.824494794856093e-06, "log_odds_chosen": 3.5269758701324463, "log_odds_ratio": -0.22461384534835815, "logits/chosen": -0.8520441055297852, "logits/rejected": -0.9794242978096008, "logps/chosen": -0.7129347324371338, "logps/rejected": -3.622051477432251, "loss": 1.167, "nll_loss": 0.8366997241973877, "rewards/accuracies": 0.875, "rewards/chosen": -0.07129347324371338, "rewards/margins": 0.2909117043018341, "rewards/rejected": -0.3622051775455475, "step": 3271 }, { "epoch": 1.99603477199939, "grad_norm": 1.2172850370407104, "learning_rate": 4.823515003061849e-06, "log_odds_chosen": 0.3189871907234192, "log_odds_ratio": -0.6888537406921387, "logits/chosen": -0.8396056890487671, "logits/rejected": -0.7690985202789307, "logps/chosen": -1.2032076120376587, "logps/rejected": -1.4787455797195435, "loss": 1.1604, "nll_loss": 1.1267650127410889, "rewards/accuracies": 0.625, "rewards/chosen": -0.12032076716423035, "rewards/margins": 0.027553800493478775, "rewards/rejected": -0.14787456393241882, "step": 3272 }, { "epoch": 1.996644807076407, "grad_norm": 1.1536118984222412, "learning_rate": 4.822535211267606e-06, "log_odds_chosen": 0.7067020535469055, "log_odds_ratio": -0.545540452003479, "logits/chosen": -0.6439371109008789, "logits/rejected": -0.7243137955665588, "logps/chosen": -0.780156672000885, "logps/rejected": -1.1721735000610352, "loss": 0.9296, "nll_loss": 0.9983596801757812, "rewards/accuracies": 0.625, "rewards/chosen": -0.07801567018032074, "rewards/margins": 0.039201684296131134, "rewards/rejected": -0.11721735447645187, "step": 3273 }, { "epoch": 1.9972548421534237, "grad_norm": 2.2026262283325195, "learning_rate": 4.821555419473362e-06, "log_odds_chosen": 0.9759377241134644, "log_odds_ratio": -0.7959539294242859, "logits/chosen": -0.7300096750259399, "logits/rejected": -0.8329513072967529, "logps/chosen": -1.024641990661621, "logps/rejected": -1.9063496589660645, "loss": 0.9906, "nll_loss": 1.09557044506073, "rewards/accuracies": 0.375, "rewards/chosen": -0.10246418416500092, "rewards/margins": 0.08817078173160553, "rewards/rejected": -0.19063496589660645, "step": 3274 }, { "epoch": 1.9978648772304406, "grad_norm": 1.0549386739730835, "learning_rate": 4.820575627679118e-06, "log_odds_chosen": 0.7640473246574402, "log_odds_ratio": -0.5560879707336426, "logits/chosen": -0.5841600894927979, "logits/rejected": -0.5845310688018799, "logps/chosen": -0.6518443822860718, "logps/rejected": -1.1318137645721436, "loss": 0.8234, "nll_loss": 0.783392608165741, "rewards/accuracies": 0.5, "rewards/chosen": -0.06518443673849106, "rewards/margins": 0.04799693822860718, "rewards/rejected": -0.11318138241767883, "step": 3275 }, { "epoch": 1.9984749123074577, "grad_norm": 9.802926063537598, "learning_rate": 4.819595835884875e-06, "log_odds_chosen": 0.5468245148658752, "log_odds_ratio": -0.725161075592041, "logits/chosen": -0.6571686267852783, "logits/rejected": -0.6891529560089111, "logps/chosen": -0.9780949354171753, "logps/rejected": -1.442976474761963, "loss": 1.1246, "nll_loss": 1.1194853782653809, "rewards/accuracies": 0.375, "rewards/chosen": -0.09780950099229813, "rewards/margins": 0.046488165855407715, "rewards/rejected": -0.14429765939712524, "step": 3276 }, { "epoch": 1.9990849473844747, "grad_norm": 1.414963960647583, "learning_rate": 4.818616044090631e-06, "log_odds_chosen": 2.2288427352905273, "log_odds_ratio": -0.24958741664886475, "logits/chosen": -0.671459972858429, "logits/rejected": -0.6782407760620117, "logps/chosen": -0.5882902145385742, "logps/rejected": -2.1795480251312256, "loss": 1.0565, "nll_loss": 1.0094528198242188, "rewards/accuracies": 0.875, "rewards/chosen": -0.05882902070879936, "rewards/margins": 0.15912578999996185, "rewards/rejected": -0.2179548144340515, "step": 3277 }, { "epoch": 1.9996949824614916, "grad_norm": 1.12418532371521, "learning_rate": 4.817636252296386e-06, "log_odds_chosen": 0.2066415697336197, "log_odds_ratio": -0.7083804607391357, "logits/chosen": -0.9323784112930298, "logits/rejected": -0.8938809633255005, "logps/chosen": -0.8774722814559937, "logps/rejected": -1.025930404663086, "loss": 1.2158, "nll_loss": 1.135040283203125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0877472311258316, "rewards/margins": 0.014845798723399639, "rewards/rejected": -0.10259303450584412, "step": 3278 }, { "epoch": 1.9996949824614916, "eval_log_odds_chosen": 1.5271199941635132, "eval_log_odds_ratio": -0.45621296763420105, "eval_logits/chosen": -0.7771740555763245, "eval_logits/rejected": -0.7535930871963501, "eval_logps/chosen": -0.8183403611183167, "eval_logps/rejected": -1.9612712860107422, "eval_loss": 1.0768041610717773, "eval_nll_loss": 1.072601318359375, "eval_rewards/accuracies": 0.7272727489471436, "eval_rewards/chosen": -0.08183404058218002, "eval_rewards/margins": 0.11429310590028763, "eval_rewards/rejected": -0.19612717628479004, "eval_runtime": 395.7132, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.167, "step": 3278 }, { "epoch": 2.0003050175385084, "grad_norm": 1.4573866128921509, "learning_rate": 4.816656460502143e-06, "log_odds_chosen": 2.8617935180664062, "log_odds_ratio": -0.24416612088680267, "logits/chosen": -0.6154521703720093, "logits/rejected": -0.7318934202194214, "logps/chosen": -0.6062767505645752, "logps/rejected": -2.6455578804016113, "loss": 1.1478, "nll_loss": 0.891433835029602, "rewards/accuracies": 0.875, "rewards/chosen": -0.06062767654657364, "rewards/margins": 0.2039281129837036, "rewards/rejected": -0.26455581188201904, "step": 3279 }, { "epoch": 2.0009150526155253, "grad_norm": 1.9084159135818481, "learning_rate": 4.815676668707899e-06, "log_odds_chosen": 1.8134428262710571, "log_odds_ratio": -0.4870414435863495, "logits/chosen": -0.7111732363700867, "logits/rejected": -0.7466248273849487, "logps/chosen": -0.6338663697242737, "logps/rejected": -2.0065810680389404, "loss": 0.9243, "nll_loss": 0.7812319993972778, "rewards/accuracies": 0.625, "rewards/chosen": -0.06338663399219513, "rewards/margins": 0.1372714787721634, "rewards/rejected": -0.20065811276435852, "step": 3280 }, { "epoch": 2.001525087692542, "grad_norm": 4.340580940246582, "learning_rate": 4.814696876913655e-06, "log_odds_chosen": 0.5503770709037781, "log_odds_ratio": -0.6212989091873169, "logits/chosen": -0.7191370129585266, "logits/rejected": -0.5612338781356812, "logps/chosen": -0.8417502641677856, "logps/rejected": -1.1084176301956177, "loss": 1.1596, "nll_loss": 1.119375228881836, "rewards/accuracies": 0.75, "rewards/chosen": -0.08417503535747528, "rewards/margins": 0.026666734367609024, "rewards/rejected": -0.110841765999794, "step": 3281 }, { "epoch": 2.0021351227695594, "grad_norm": 1.250541090965271, "learning_rate": 4.813717085119412e-06, "log_odds_chosen": 1.1509151458740234, "log_odds_ratio": -0.4400990605354309, "logits/chosen": -0.5229468941688538, "logits/rejected": -0.43968209624290466, "logps/chosen": -0.6518921852111816, "logps/rejected": -1.433708667755127, "loss": 1.0353, "nll_loss": 0.7921472787857056, "rewards/accuracies": 0.75, "rewards/chosen": -0.06518922001123428, "rewards/margins": 0.07818164676427841, "rewards/rejected": -0.1433708667755127, "step": 3282 }, { "epoch": 2.0027451578465763, "grad_norm": 1.964855432510376, "learning_rate": 4.812737293325168e-06, "log_odds_chosen": 1.685609221458435, "log_odds_ratio": -0.39747390151023865, "logits/chosen": -0.7597888112068176, "logits/rejected": -0.6280618906021118, "logps/chosen": -0.7484870553016663, "logps/rejected": -2.0488851070404053, "loss": 1.0762, "nll_loss": 0.8495073318481445, "rewards/accuracies": 0.75, "rewards/chosen": -0.0748487114906311, "rewards/margins": 0.13003979623317719, "rewards/rejected": -0.20488852262496948, "step": 3283 }, { "epoch": 2.003355192923593, "grad_norm": 1.5613808631896973, "learning_rate": 4.811757501530924e-06, "log_odds_chosen": 1.2668284177780151, "log_odds_ratio": -0.4457826316356659, "logits/chosen": -0.6506568789482117, "logits/rejected": -0.6604580879211426, "logps/chosen": -0.6695379018783569, "logps/rejected": -1.6692378520965576, "loss": 0.9406, "nll_loss": 0.8142275810241699, "rewards/accuracies": 0.75, "rewards/chosen": -0.06695379316806793, "rewards/margins": 0.09996999800205231, "rewards/rejected": -0.16692379117012024, "step": 3284 }, { "epoch": 2.00396522800061, "grad_norm": 1.3008967638015747, "learning_rate": 4.810777709736681e-06, "log_odds_chosen": 0.09256654977798462, "log_odds_ratio": -0.7299067378044128, "logits/chosen": -0.7314487099647522, "logits/rejected": -0.8274887204170227, "logps/chosen": -0.9828022718429565, "logps/rejected": -0.9632931351661682, "loss": 1.1961, "nll_loss": 1.244313359260559, "rewards/accuracies": 0.375, "rewards/chosen": -0.09828022122383118, "rewards/margins": -0.0019509047269821167, "rewards/rejected": -0.09632933139801025, "step": 3285 }, { "epoch": 2.004575263077627, "grad_norm": 1.5414695739746094, "learning_rate": 4.809797917942437e-06, "log_odds_chosen": 1.138850450515747, "log_odds_ratio": -0.534193754196167, "logits/chosen": -0.7351304888725281, "logits/rejected": -0.6691275835037231, "logps/chosen": -0.8147356510162354, "logps/rejected": -1.553418517112732, "loss": 1.0649, "nll_loss": 1.1320977210998535, "rewards/accuracies": 0.5, "rewards/chosen": -0.08147356659173965, "rewards/margins": 0.0738682672381401, "rewards/rejected": -0.15534184873104095, "step": 3286 }, { "epoch": 2.005185298154644, "grad_norm": 1.1926161050796509, "learning_rate": 4.808818126148194e-06, "log_odds_chosen": 2.4324159622192383, "log_odds_ratio": -0.32688289880752563, "logits/chosen": -0.7240574359893799, "logits/rejected": -0.6497575044631958, "logps/chosen": -0.6709302663803101, "logps/rejected": -2.560734272003174, "loss": 1.1043, "nll_loss": 0.9507809281349182, "rewards/accuracies": 0.75, "rewards/chosen": -0.06709302961826324, "rewards/margins": 0.18898040056228638, "rewards/rejected": -0.2560734450817108, "step": 3287 }, { "epoch": 2.005795333231661, "grad_norm": 2.357236623764038, "learning_rate": 4.80783833435395e-06, "log_odds_chosen": 0.9716489315032959, "log_odds_ratio": -0.7908122539520264, "logits/chosen": -0.795477032661438, "logits/rejected": -0.7032697200775146, "logps/chosen": -0.9879416823387146, "logps/rejected": -1.807971715927124, "loss": 1.2585, "nll_loss": 1.1874226331710815, "rewards/accuracies": 0.5, "rewards/chosen": -0.09879416227340698, "rewards/margins": 0.08200301229953766, "rewards/rejected": -0.18079717457294464, "step": 3288 }, { "epoch": 2.0064053683086778, "grad_norm": 8.973374366760254, "learning_rate": 4.806858542559705e-06, "log_odds_chosen": 2.785457134246826, "log_odds_ratio": -0.326595664024353, "logits/chosen": -0.6860159635543823, "logits/rejected": -0.8874212503433228, "logps/chosen": -0.5858861207962036, "logps/rejected": -2.800034761428833, "loss": 1.1055, "nll_loss": 0.9341662526130676, "rewards/accuracies": 0.75, "rewards/chosen": -0.05858860909938812, "rewards/margins": 0.22141484916210175, "rewards/rejected": -0.28000345826148987, "step": 3289 }, { "epoch": 2.0070154033856946, "grad_norm": 1.1743862628936768, "learning_rate": 4.805878750765462e-06, "log_odds_chosen": 1.3761497735977173, "log_odds_ratio": -0.4127432703971863, "logits/chosen": -0.7808564901351929, "logits/rejected": -0.77964186668396, "logps/chosen": -1.1030869483947754, "logps/rejected": -2.2407073974609375, "loss": 1.3085, "nll_loss": 1.2409042119979858, "rewards/accuracies": 0.75, "rewards/chosen": -0.1103086993098259, "rewards/margins": 0.1137620210647583, "rewards/rejected": -0.2240707278251648, "step": 3290 }, { "epoch": 2.0076254384627115, "grad_norm": 1.1962844133377075, "learning_rate": 4.804898958971218e-06, "log_odds_chosen": 1.5550614595413208, "log_odds_ratio": -0.3479894995689392, "logits/chosen": -0.42170578241348267, "logits/rejected": -0.4956011176109314, "logps/chosen": -0.576023280620575, "logps/rejected": -1.5587862730026245, "loss": 1.0044, "nll_loss": 0.8230005502700806, "rewards/accuracies": 0.875, "rewards/chosen": -0.057602327316999435, "rewards/margins": 0.0982763022184372, "rewards/rejected": -0.15587863326072693, "step": 3291 }, { "epoch": 2.0082354735397288, "grad_norm": 5.478481769561768, "learning_rate": 4.8039191671769744e-06, "log_odds_chosen": 1.042442798614502, "log_odds_ratio": -0.40750813484191895, "logits/chosen": -1.1981706619262695, "logits/rejected": -1.0291587114334106, "logps/chosen": -1.0475056171417236, "logps/rejected": -1.9006119966506958, "loss": 1.1985, "nll_loss": 1.4043159484863281, "rewards/accuracies": 0.875, "rewards/chosen": -0.10475055128335953, "rewards/margins": 0.08531065285205841, "rewards/rejected": -0.19006121158599854, "step": 3292 }, { "epoch": 2.0088455086167456, "grad_norm": 1.6747673749923706, "learning_rate": 4.802939375382731e-06, "log_odds_chosen": 0.8800872564315796, "log_odds_ratio": -0.49084794521331787, "logits/chosen": -1.1695096492767334, "logits/rejected": -0.9243176579475403, "logps/chosen": -1.0191044807434082, "logps/rejected": -1.7004445791244507, "loss": 0.9665, "nll_loss": 1.144699215888977, "rewards/accuracies": 0.625, "rewards/chosen": -0.10191044211387634, "rewards/margins": 0.06813400983810425, "rewards/rejected": -0.17004446685314178, "step": 3293 }, { "epoch": 2.0094555436937624, "grad_norm": 1.1235448122024536, "learning_rate": 4.8019595835884875e-06, "log_odds_chosen": 2.692610502243042, "log_odds_ratio": -0.15500064194202423, "logits/chosen": -0.630707859992981, "logits/rejected": -0.7111387848854065, "logps/chosen": -0.64983069896698, "logps/rejected": -2.6194958686828613, "loss": 0.8476, "nll_loss": 0.7777450084686279, "rewards/accuracies": 0.875, "rewards/chosen": -0.064983069896698, "rewards/margins": 0.1969664990901947, "rewards/rejected": -0.2619495689868927, "step": 3294 }, { "epoch": 2.0100655787707793, "grad_norm": 1.5707921981811523, "learning_rate": 4.800979791794243e-06, "log_odds_chosen": 1.2308064699172974, "log_odds_ratio": -0.5275776982307434, "logits/chosen": -0.7466309070587158, "logits/rejected": -0.6519231796264648, "logps/chosen": -0.7300728559494019, "logps/rejected": -1.8009647130966187, "loss": 0.9965, "nll_loss": 0.9840091466903687, "rewards/accuracies": 0.75, "rewards/chosen": -0.07300728559494019, "rewards/margins": 0.10708919167518616, "rewards/rejected": -0.18009647727012634, "step": 3295 }, { "epoch": 2.010675613847796, "grad_norm": 2.168229103088379, "learning_rate": 4.8e-06, "log_odds_chosen": 1.4897352457046509, "log_odds_ratio": -0.49352896213531494, "logits/chosen": -0.9371669292449951, "logits/rejected": -0.8213059902191162, "logps/chosen": -0.9791871905326843, "logps/rejected": -2.102595567703247, "loss": 1.1795, "nll_loss": 1.1675626039505005, "rewards/accuracies": 0.75, "rewards/chosen": -0.09791871905326843, "rewards/margins": 0.11234083026647568, "rewards/rejected": -0.2102595567703247, "step": 3296 }, { "epoch": 2.011285648924813, "grad_norm": 1.645989179611206, "learning_rate": 4.799020208205756e-06, "log_odds_chosen": 0.563978374004364, "log_odds_ratio": -0.6752762794494629, "logits/chosen": -1.077004313468933, "logits/rejected": -0.8713219165802002, "logps/chosen": -0.9989764094352722, "logps/rejected": -1.4415464401245117, "loss": 1.116, "nll_loss": 1.1519087553024292, "rewards/accuracies": 0.625, "rewards/chosen": -0.09989764541387558, "rewards/margins": 0.044257014989852905, "rewards/rejected": -0.1441546529531479, "step": 3297 }, { "epoch": 2.0118956840018303, "grad_norm": 1.3796085119247437, "learning_rate": 4.798040416411513e-06, "log_odds_chosen": 2.601128101348877, "log_odds_ratio": -0.3905752897262573, "logits/chosen": -1.0141966342926025, "logits/rejected": -0.8601998090744019, "logps/chosen": -0.8301111459732056, "logps/rejected": -3.0682897567749023, "loss": 0.9667, "nll_loss": 0.9002969861030579, "rewards/accuracies": 0.75, "rewards/chosen": -0.08301111310720444, "rewards/margins": 0.2238178849220276, "rewards/rejected": -0.3068290054798126, "step": 3298 }, { "epoch": 2.012505719078847, "grad_norm": 2.461461067199707, "learning_rate": 4.797060624617269e-06, "log_odds_chosen": 1.675866723060608, "log_odds_ratio": -0.33751749992370605, "logits/chosen": -0.8822318911552429, "logits/rejected": -0.8957215547561646, "logps/chosen": -0.8204292058944702, "logps/rejected": -2.054260730743408, "loss": 0.9544, "nll_loss": 1.0312858819961548, "rewards/accuracies": 0.875, "rewards/chosen": -0.08204291760921478, "rewards/margins": 0.12338315695524216, "rewards/rejected": -0.20542608201503754, "step": 3299 }, { "epoch": 2.013115754155864, "grad_norm": 1.3273178339004517, "learning_rate": 4.796080832823024e-06, "log_odds_chosen": 2.244758367538452, "log_odds_ratio": -0.2574893534183502, "logits/chosen": -0.8448606729507446, "logits/rejected": -0.9395595192909241, "logps/chosen": -0.6523261666297913, "logps/rejected": -2.2363545894622803, "loss": 1.1312, "nll_loss": 1.2218047380447388, "rewards/accuracies": 1.0, "rewards/chosen": -0.06523261964321136, "rewards/margins": 0.15840286016464233, "rewards/rejected": -0.2236354500055313, "step": 3300 }, { "epoch": 2.013725789232881, "grad_norm": 1.315082311630249, "learning_rate": 4.795101041028781e-06, "log_odds_chosen": 1.7232763767242432, "log_odds_ratio": -0.39114224910736084, "logits/chosen": -0.6949218511581421, "logits/rejected": -0.7849221229553223, "logps/chosen": -0.7858583331108093, "logps/rejected": -2.13006854057312, "loss": 0.9803, "nll_loss": 0.9891105890274048, "rewards/accuracies": 0.625, "rewards/chosen": -0.07858584076166153, "rewards/margins": 0.13442102074623108, "rewards/rejected": -0.213006854057312, "step": 3301 }, { "epoch": 2.0143358243098977, "grad_norm": 2.177734375, "learning_rate": 4.794121249234537e-06, "log_odds_chosen": 1.2281441688537598, "log_odds_ratio": -0.4328947067260742, "logits/chosen": -0.8536553978919983, "logits/rejected": -0.8487932682037354, "logps/chosen": -0.5757482051849365, "logps/rejected": -1.3909955024719238, "loss": 1.1267, "nll_loss": 0.9762325286865234, "rewards/accuracies": 0.75, "rewards/chosen": -0.057574816048145294, "rewards/margins": 0.08152473717927933, "rewards/rejected": -0.13909955322742462, "step": 3302 }, { "epoch": 2.014945859386915, "grad_norm": 1.732443928718567, "learning_rate": 4.7931414574402935e-06, "log_odds_chosen": 0.8578935861587524, "log_odds_ratio": -0.6361792087554932, "logits/chosen": -0.8859715461730957, "logits/rejected": -0.898522138595581, "logps/chosen": -0.7566874027252197, "logps/rejected": -1.3946828842163086, "loss": 0.9394, "nll_loss": 0.9507178664207458, "rewards/accuracies": 0.25, "rewards/chosen": -0.07566875219345093, "rewards/margins": 0.06379954516887665, "rewards/rejected": -0.13946828246116638, "step": 3303 }, { "epoch": 2.015555894463932, "grad_norm": 1.371157169342041, "learning_rate": 4.7921616656460505e-06, "log_odds_chosen": 2.165132761001587, "log_odds_ratio": -0.305015504360199, "logits/chosen": -0.5995827317237854, "logits/rejected": -0.719261109828949, "logps/chosen": -0.5841732621192932, "logps/rejected": -2.080789566040039, "loss": 1.0653, "nll_loss": 0.8358729481697083, "rewards/accuracies": 0.875, "rewards/chosen": -0.05841732770204544, "rewards/margins": 0.14966164529323578, "rewards/rejected": -0.20807895064353943, "step": 3304 }, { "epoch": 2.0161659295409486, "grad_norm": 1.01297128200531, "learning_rate": 4.791181873851807e-06, "log_odds_chosen": 1.4381396770477295, "log_odds_ratio": -0.39246124029159546, "logits/chosen": -0.7655603885650635, "logits/rejected": -0.8263437151908875, "logps/chosen": -0.6899067759513855, "logps/rejected": -1.7733429670333862, "loss": 1.0159, "nll_loss": 1.0086973905563354, "rewards/accuracies": 0.75, "rewards/chosen": -0.06899068504571915, "rewards/margins": 0.10834360867738724, "rewards/rejected": -0.17733430862426758, "step": 3305 }, { "epoch": 2.0167759646179655, "grad_norm": 2.71789288520813, "learning_rate": 4.790202082057562e-06, "log_odds_chosen": 1.0496383905410767, "log_odds_ratio": -0.5019396543502808, "logits/chosen": -1.070315957069397, "logits/rejected": -0.8996802568435669, "logps/chosen": -0.8489416241645813, "logps/rejected": -1.6198627948760986, "loss": 1.1318, "nll_loss": 1.1865369081497192, "rewards/accuracies": 0.75, "rewards/chosen": -0.08489416539669037, "rewards/margins": 0.07709211111068726, "rewards/rejected": -0.16198627650737762, "step": 3306 }, { "epoch": 2.0173859996949823, "grad_norm": 1.6446505784988403, "learning_rate": 4.789222290263319e-06, "log_odds_chosen": 2.117056369781494, "log_odds_ratio": -0.317310631275177, "logits/chosen": -0.5075703859329224, "logits/rejected": -0.7884924411773682, "logps/chosen": -0.6799415349960327, "logps/rejected": -2.1726620197296143, "loss": 0.911, "nll_loss": 0.7129213809967041, "rewards/accuracies": 0.875, "rewards/chosen": -0.0679941475391388, "rewards/margins": 0.14927208423614502, "rewards/rejected": -0.2172662317752838, "step": 3307 }, { "epoch": 2.017996034771999, "grad_norm": 3.996856212615967, "learning_rate": 4.788242498469075e-06, "log_odds_chosen": 1.1752465963363647, "log_odds_ratio": -0.36636412143707275, "logits/chosen": -0.5978670716285706, "logits/rejected": -0.7608232498168945, "logps/chosen": -0.6887293457984924, "logps/rejected": -1.4205197095870972, "loss": 0.9689, "nll_loss": 0.8077383041381836, "rewards/accuracies": 0.75, "rewards/chosen": -0.06887292861938477, "rewards/margins": 0.07317903637886047, "rewards/rejected": -0.14205196499824524, "step": 3308 }, { "epoch": 2.0186060698490165, "grad_norm": 1.1581614017486572, "learning_rate": 4.787262706674831e-06, "log_odds_chosen": 1.5101144313812256, "log_odds_ratio": -0.4272575378417969, "logits/chosen": -0.8692445158958435, "logits/rejected": -0.8548500537872314, "logps/chosen": -0.9102600812911987, "logps/rejected": -2.1246695518493652, "loss": 1.1025, "nll_loss": 1.3297065496444702, "rewards/accuracies": 0.75, "rewards/chosen": -0.09102600812911987, "rewards/margins": 0.12144097685813904, "rewards/rejected": -0.2124669849872589, "step": 3309 }, { "epoch": 2.0192161049260333, "grad_norm": 4.605230331420898, "learning_rate": 4.786282914880588e-06, "log_odds_chosen": 2.585413932800293, "log_odds_ratio": -0.2508610486984253, "logits/chosen": -0.6042652130126953, "logits/rejected": -0.8290063738822937, "logps/chosen": -0.6289401650428772, "logps/rejected": -2.526897668838501, "loss": 0.9918, "nll_loss": 0.7442548871040344, "rewards/accuracies": 0.875, "rewards/chosen": -0.06289401650428772, "rewards/margins": 0.18979576230049133, "rewards/rejected": -0.25268977880477905, "step": 3310 }, { "epoch": 2.01982614000305, "grad_norm": 1.572887659072876, "learning_rate": 4.785303123086344e-06, "log_odds_chosen": 2.032620668411255, "log_odds_ratio": -0.45000168681144714, "logits/chosen": -1.0485835075378418, "logits/rejected": -0.9624040126800537, "logps/chosen": -1.0610363483428955, "logps/rejected": -2.74558687210083, "loss": 1.0233, "nll_loss": 1.4946199655532837, "rewards/accuracies": 0.625, "rewards/chosen": -0.10610363632440567, "rewards/margins": 0.16845504939556122, "rewards/rejected": -0.2745587229728699, "step": 3311 }, { "epoch": 2.020436175080067, "grad_norm": 1.1074045896530151, "learning_rate": 4.7843233312921e-06, "log_odds_chosen": 1.5199682712554932, "log_odds_ratio": -0.4538496732711792, "logits/chosen": -0.8902978301048279, "logits/rejected": -0.9213417172431946, "logps/chosen": -0.8875598311424255, "logps/rejected": -2.106989860534668, "loss": 1.2525, "nll_loss": 1.1113046407699585, "rewards/accuracies": 0.625, "rewards/chosen": -0.08875598013401031, "rewards/margins": 0.12194301187992096, "rewards/rejected": -0.21069900691509247, "step": 3312 }, { "epoch": 2.021046210157084, "grad_norm": 1.2460567951202393, "learning_rate": 4.783343539497856e-06, "log_odds_chosen": 0.30118700861930847, "log_odds_ratio": -0.6502537727355957, "logits/chosen": -1.0287882089614868, "logits/rejected": -0.9753747582435608, "logps/chosen": -0.843268632888794, "logps/rejected": -1.0905356407165527, "loss": 1.2332, "nll_loss": 1.1055387258529663, "rewards/accuracies": 0.625, "rewards/chosen": -0.0843268558382988, "rewards/margins": 0.02472670003771782, "rewards/rejected": -0.10905355960130692, "step": 3313 }, { "epoch": 2.021656245234101, "grad_norm": 3.3185319900512695, "learning_rate": 4.7823637477036125e-06, "log_odds_chosen": 0.4675048887729645, "log_odds_ratio": -0.6805012226104736, "logits/chosen": -0.8376014828681946, "logits/rejected": -0.9536040425300598, "logps/chosen": -0.8830006122589111, "logps/rejected": -1.2437067031860352, "loss": 1.0896, "nll_loss": 1.0725336074829102, "rewards/accuracies": 0.375, "rewards/chosen": -0.08830006420612335, "rewards/margins": 0.03607060760259628, "rewards/rejected": -0.12437066435813904, "step": 3314 }, { "epoch": 2.022266280311118, "grad_norm": 2.1737430095672607, "learning_rate": 4.7813839559093695e-06, "log_odds_chosen": 1.3552380800247192, "log_odds_ratio": -0.4392698109149933, "logits/chosen": -0.9618107676506042, "logits/rejected": -0.9858677983283997, "logps/chosen": -0.8682811260223389, "logps/rejected": -1.8720378875732422, "loss": 0.9944, "nll_loss": 1.2743656635284424, "rewards/accuracies": 0.75, "rewards/chosen": -0.08682811260223389, "rewards/margins": 0.10037568211555481, "rewards/rejected": -0.1872037947177887, "step": 3315 }, { "epoch": 2.022876315388135, "grad_norm": 1.2803035974502563, "learning_rate": 4.780404164115126e-06, "log_odds_chosen": 2.1742868423461914, "log_odds_ratio": -0.47823232412338257, "logits/chosen": -0.9868505001068115, "logits/rejected": -0.873837411403656, "logps/chosen": -0.812690258026123, "logps/rejected": -2.6188817024230957, "loss": 1.0032, "nll_loss": 0.9772591590881348, "rewards/accuracies": 0.625, "rewards/chosen": -0.0812690258026123, "rewards/margins": 0.18061915040016174, "rewards/rejected": -0.26188817620277405, "step": 3316 }, { "epoch": 2.0234863504651517, "grad_norm": 2.3862483501434326, "learning_rate": 4.779424372320882e-06, "log_odds_chosen": 1.7785395383834839, "log_odds_ratio": -0.5599672198295593, "logits/chosen": -0.8222084045410156, "logits/rejected": -0.7394610047340393, "logps/chosen": -0.7536519765853882, "logps/rejected": -2.181690216064453, "loss": 0.9789, "nll_loss": 0.8991715908050537, "rewards/accuracies": 0.625, "rewards/chosen": -0.07536520063877106, "rewards/margins": 0.14280381798744202, "rewards/rejected": -0.21816901862621307, "step": 3317 }, { "epoch": 2.0240963855421685, "grad_norm": 1.2834339141845703, "learning_rate": 4.778444580526638e-06, "log_odds_chosen": 5.552341938018799, "log_odds_ratio": -0.05023808404803276, "logits/chosen": -0.5836939215660095, "logits/rejected": -0.8069566488265991, "logps/chosen": -0.4238468110561371, "logps/rejected": -4.88125467300415, "loss": 1.0261, "nll_loss": 0.8291033506393433, "rewards/accuracies": 1.0, "rewards/chosen": -0.04238468408584595, "rewards/margins": 0.44574078917503357, "rewards/rejected": -0.4881254732608795, "step": 3318 }, { "epoch": 2.024706420619186, "grad_norm": 2.9340660572052, "learning_rate": 4.777464788732394e-06, "log_odds_chosen": 2.536498785018921, "log_odds_ratio": -0.3859230577945709, "logits/chosen": -0.8989280462265015, "logits/rejected": -0.84382164478302, "logps/chosen": -0.8122622966766357, "logps/rejected": -2.9386110305786133, "loss": 0.9952, "nll_loss": 0.9441635012626648, "rewards/accuracies": 0.875, "rewards/chosen": -0.08122622966766357, "rewards/margins": 0.2126348614692688, "rewards/rejected": -0.2938610911369324, "step": 3319 }, { "epoch": 2.0253164556962027, "grad_norm": 1.4516990184783936, "learning_rate": 4.77648499693815e-06, "log_odds_chosen": 0.9398118853569031, "log_odds_ratio": -0.49432051181793213, "logits/chosen": -0.9286905527114868, "logits/rejected": -0.7233937978744507, "logps/chosen": -0.8102116584777832, "logps/rejected": -1.477845311164856, "loss": 1.0242, "nll_loss": 0.955079197883606, "rewards/accuracies": 0.75, "rewards/chosen": -0.08102115988731384, "rewards/margins": 0.06676336377859116, "rewards/rejected": -0.1477845311164856, "step": 3320 }, { "epoch": 2.0259264907732195, "grad_norm": 1.4594216346740723, "learning_rate": 4.775505205143907e-06, "log_odds_chosen": 1.3708851337432861, "log_odds_ratio": -0.41532474756240845, "logits/chosen": -0.8603397607803345, "logits/rejected": -0.775130033493042, "logps/chosen": -0.8894894123077393, "logps/rejected": -1.9643017053604126, "loss": 1.0797, "nll_loss": 1.0144941806793213, "rewards/accuracies": 0.875, "rewards/chosen": -0.08894894272089005, "rewards/margins": 0.10748124122619629, "rewards/rejected": -0.19643017649650574, "step": 3321 }, { "epoch": 2.0265365258502364, "grad_norm": 0.9735987186431885, "learning_rate": 4.774525413349663e-06, "log_odds_chosen": 0.9708886742591858, "log_odds_ratio": -0.5979281663894653, "logits/chosen": -0.8050706386566162, "logits/rejected": -0.8699665069580078, "logps/chosen": -0.6802713871002197, "logps/rejected": -1.4534900188446045, "loss": 0.8946, "nll_loss": 0.7386515736579895, "rewards/accuracies": 0.75, "rewards/chosen": -0.06802713871002197, "rewards/margins": 0.0773218646645546, "rewards/rejected": -0.14534901082515717, "step": 3322 }, { "epoch": 2.027146560927253, "grad_norm": 1.5456293821334839, "learning_rate": 4.7735456215554185e-06, "log_odds_chosen": 0.35892271995544434, "log_odds_ratio": -0.7808266282081604, "logits/chosen": -0.9674046039581299, "logits/rejected": -0.7552224397659302, "logps/chosen": -1.125458002090454, "logps/rejected": -1.3244850635528564, "loss": 1.0274, "nll_loss": 1.239645004272461, "rewards/accuracies": 0.5, "rewards/chosen": -0.11254580318927765, "rewards/margins": 0.01990271545946598, "rewards/rejected": -0.13244852423667908, "step": 3323 }, { "epoch": 2.02775659600427, "grad_norm": 4.009816646575928, "learning_rate": 4.7725658297611754e-06, "log_odds_chosen": 0.24887454509735107, "log_odds_ratio": -0.813612163066864, "logits/chosen": -0.8095759153366089, "logits/rejected": -0.8014136552810669, "logps/chosen": -0.7264891862869263, "logps/rejected": -0.9858429431915283, "loss": 1.0628, "nll_loss": 0.9744747877120972, "rewards/accuracies": 0.5, "rewards/chosen": -0.07264892756938934, "rewards/margins": 0.025935377925634384, "rewards/rejected": -0.09858430176973343, "step": 3324 }, { "epoch": 2.0283666310812873, "grad_norm": 3.0509727001190186, "learning_rate": 4.7715860379669316e-06, "log_odds_chosen": 3.756739616394043, "log_odds_ratio": -0.09900423884391785, "logits/chosen": -0.6785666942596436, "logits/rejected": -0.9784849882125854, "logps/chosen": -0.4862014651298523, "logps/rejected": -3.305267810821533, "loss": 0.9635, "nll_loss": 0.9337749481201172, "rewards/accuracies": 1.0, "rewards/chosen": -0.04862014204263687, "rewards/margins": 0.2819066345691681, "rewards/rejected": -0.33052676916122437, "step": 3325 }, { "epoch": 2.028976666158304, "grad_norm": 0.9460153579711914, "learning_rate": 4.7706062461726885e-06, "log_odds_chosen": 1.534253716468811, "log_odds_ratio": -0.312348335981369, "logits/chosen": -0.8190928101539612, "logits/rejected": -0.9323708415031433, "logps/chosen": -0.6548188924789429, "logps/rejected": -1.7289694547653198, "loss": 0.9646, "nll_loss": 0.8531889915466309, "rewards/accuracies": 1.0, "rewards/chosen": -0.06548189371824265, "rewards/margins": 0.10741505771875381, "rewards/rejected": -0.17289695143699646, "step": 3326 }, { "epoch": 2.029586701235321, "grad_norm": 1.444297194480896, "learning_rate": 4.769626454378445e-06, "log_odds_chosen": 1.6470669507980347, "log_odds_ratio": -0.5562753081321716, "logits/chosen": -0.9213936924934387, "logits/rejected": -0.8264152407646179, "logps/chosen": -0.7825149297714233, "logps/rejected": -2.0002455711364746, "loss": 1.0741, "nll_loss": 1.0929646492004395, "rewards/accuracies": 0.5, "rewards/chosen": -0.07825150340795517, "rewards/margins": 0.12177307903766632, "rewards/rejected": -0.2000245749950409, "step": 3327 }, { "epoch": 2.030196736312338, "grad_norm": 1.212943196296692, "learning_rate": 4.768646662584201e-06, "log_odds_chosen": 2.9574310779571533, "log_odds_ratio": -0.15882055461406708, "logits/chosen": -0.669527530670166, "logits/rejected": -0.8137344121932983, "logps/chosen": -0.5602372884750366, "logps/rejected": -2.7311935424804688, "loss": 1.0221, "nll_loss": 0.6864817142486572, "rewards/accuracies": 1.0, "rewards/chosen": -0.0560237318277359, "rewards/margins": 0.21709564328193665, "rewards/rejected": -0.27311936020851135, "step": 3328 }, { "epoch": 2.0308067713893547, "grad_norm": 2.1526618003845215, "learning_rate": 4.767666870789957e-06, "log_odds_chosen": 0.7598338723182678, "log_odds_ratio": -0.7245975136756897, "logits/chosen": -0.8314036726951599, "logits/rejected": -0.9028393626213074, "logps/chosen": -1.0686640739440918, "logps/rejected": -1.6428018808364868, "loss": 1.2435, "nll_loss": 1.277374505996704, "rewards/accuracies": 0.375, "rewards/chosen": -0.10686640441417694, "rewards/margins": 0.05741378292441368, "rewards/rejected": -0.16428019106388092, "step": 3329 }, { "epoch": 2.031416806466372, "grad_norm": 1.4576493501663208, "learning_rate": 4.766687078995713e-06, "log_odds_chosen": 1.786247968673706, "log_odds_ratio": -0.4287133812904358, "logits/chosen": -0.7338982224464417, "logits/rejected": -0.939186692237854, "logps/chosen": -0.7343932390213013, "logps/rejected": -2.043269157409668, "loss": 1.0598, "nll_loss": 0.8449286222457886, "rewards/accuracies": 0.875, "rewards/chosen": -0.0734393298625946, "rewards/margins": 0.13088759779930115, "rewards/rejected": -0.20432692766189575, "step": 3330 }, { "epoch": 2.032026841543389, "grad_norm": 1.2439541816711426, "learning_rate": 4.765707287201469e-06, "log_odds_chosen": 0.6773325204849243, "log_odds_ratio": -0.571659505367279, "logits/chosen": -0.9238535761833191, "logits/rejected": -0.9872488379478455, "logps/chosen": -0.8214558362960815, "logps/rejected": -1.34645414352417, "loss": 0.9685, "nll_loss": 0.9527511596679688, "rewards/accuracies": 0.625, "rewards/chosen": -0.0821455866098404, "rewards/margins": 0.05249984189867973, "rewards/rejected": -0.13464541733264923, "step": 3331 }, { "epoch": 2.0326368766204057, "grad_norm": 2.6882615089416504, "learning_rate": 4.764727495407226e-06, "log_odds_chosen": 1.8716087341308594, "log_odds_ratio": -0.40881258249282837, "logits/chosen": -0.8268192410469055, "logits/rejected": -0.8439706563949585, "logps/chosen": -0.7976162433624268, "logps/rejected": -2.3046913146972656, "loss": 1.093, "nll_loss": 0.9377488493919373, "rewards/accuracies": 0.625, "rewards/chosen": -0.07976162433624268, "rewards/margins": 0.15070748329162598, "rewards/rejected": -0.23046912252902985, "step": 3332 }, { "epoch": 2.0332469116974226, "grad_norm": 2.8116295337677, "learning_rate": 4.763747703612982e-06, "log_odds_chosen": 1.4293584823608398, "log_odds_ratio": -0.5323713421821594, "logits/chosen": -0.7646288871765137, "logits/rejected": -0.8883991241455078, "logps/chosen": -0.8434984683990479, "logps/rejected": -1.8393253087997437, "loss": 1.0722, "nll_loss": 1.1231542825698853, "rewards/accuracies": 0.5, "rewards/chosen": -0.0843498483300209, "rewards/margins": 0.09958268702030182, "rewards/rejected": -0.18393254280090332, "step": 3333 }, { "epoch": 2.0338569467744394, "grad_norm": 1.1737818717956543, "learning_rate": 4.762767911818738e-06, "log_odds_chosen": 0.9572833180427551, "log_odds_ratio": -0.646823525428772, "logits/chosen": -1.0818537473678589, "logits/rejected": -0.9453969597816467, "logps/chosen": -1.0511767864227295, "logps/rejected": -1.8559067249298096, "loss": 1.1793, "nll_loss": 1.2069370746612549, "rewards/accuracies": 0.5, "rewards/chosen": -0.10511768609285355, "rewards/margins": 0.08047299087047577, "rewards/rejected": -0.18559066951274872, "step": 3334 }, { "epoch": 2.0344669818514562, "grad_norm": 1.488215684890747, "learning_rate": 4.7617881200244945e-06, "log_odds_chosen": 0.8357565402984619, "log_odds_ratio": -0.595829963684082, "logits/chosen": -0.9904341101646423, "logits/rejected": -0.9445765614509583, "logps/chosen": -0.9043703079223633, "logps/rejected": -1.6313832998275757, "loss": 1.0727, "nll_loss": 1.3102928400039673, "rewards/accuracies": 0.375, "rewards/chosen": -0.09043703973293304, "rewards/margins": 0.07270129770040512, "rewards/rejected": -0.16313832998275757, "step": 3335 }, { "epoch": 2.0350770169284735, "grad_norm": 1.1685377359390259, "learning_rate": 4.760808328230251e-06, "log_odds_chosen": 2.2679522037506104, "log_odds_ratio": -0.2556418776512146, "logits/chosen": -0.6776978373527527, "logits/rejected": -0.8564092516899109, "logps/chosen": -0.7105318307876587, "logps/rejected": -2.436448574066162, "loss": 1.004, "nll_loss": 0.8597302436828613, "rewards/accuracies": 0.875, "rewards/chosen": -0.07105318456888199, "rewards/margins": 0.1725916564464569, "rewards/rejected": -0.2436448484659195, "step": 3336 }, { "epoch": 2.0356870520054904, "grad_norm": 1.7074198722839355, "learning_rate": 4.759828536436007e-06, "log_odds_chosen": 0.6642356514930725, "log_odds_ratio": -0.6412957906723022, "logits/chosen": -0.7368584871292114, "logits/rejected": -0.8108037710189819, "logps/chosen": -0.8190281987190247, "logps/rejected": -1.3708134889602661, "loss": 0.9253, "nll_loss": 0.8602015376091003, "rewards/accuracies": 0.75, "rewards/chosen": -0.08190282434225082, "rewards/margins": 0.05517851933836937, "rewards/rejected": -0.1370813548564911, "step": 3337 }, { "epoch": 2.0362970870825072, "grad_norm": 1.3133931159973145, "learning_rate": 4.758848744641764e-06, "log_odds_chosen": 1.72239089012146, "log_odds_ratio": -0.37088602781295776, "logits/chosen": -0.9636951684951782, "logits/rejected": -0.9352770447731018, "logps/chosen": -0.8671882748603821, "logps/rejected": -2.162400960922241, "loss": 1.1364, "nll_loss": 1.184581995010376, "rewards/accuracies": 0.75, "rewards/chosen": -0.08671882003545761, "rewards/margins": 0.12952128052711487, "rewards/rejected": -0.21624010801315308, "step": 3338 }, { "epoch": 2.036907122159524, "grad_norm": 5.297682762145996, "learning_rate": 4.75786895284752e-06, "log_odds_chosen": 0.6334404945373535, "log_odds_ratio": -0.4638219177722931, "logits/chosen": -0.880454957485199, "logits/rejected": -0.9377495646476746, "logps/chosen": -1.0571753978729248, "logps/rejected": -1.525261640548706, "loss": 1.2334, "nll_loss": 1.3333781957626343, "rewards/accuracies": 0.875, "rewards/chosen": -0.10571753233671188, "rewards/margins": 0.0468086339533329, "rewards/rejected": -0.15252617001533508, "step": 3339 }, { "epoch": 2.037517157236541, "grad_norm": 1.7220866680145264, "learning_rate": 4.756889161053276e-06, "log_odds_chosen": 1.502831220626831, "log_odds_ratio": -0.4223432242870331, "logits/chosen": -0.9554048180580139, "logits/rejected": -0.8976925611495972, "logps/chosen": -1.14565908908844, "logps/rejected": -2.3897793292999268, "loss": 1.2399, "nll_loss": 1.2899006605148315, "rewards/accuracies": 0.75, "rewards/chosen": -0.114565908908844, "rewards/margins": 0.12441202253103256, "rewards/rejected": -0.23897793889045715, "step": 3340 }, { "epoch": 2.038127192313558, "grad_norm": 6.815831661224365, "learning_rate": 4.755909369259032e-06, "log_odds_chosen": 1.0310015678405762, "log_odds_ratio": -0.4302423596382141, "logits/chosen": -0.9421552419662476, "logits/rejected": -0.9831014275550842, "logps/chosen": -0.778502881526947, "logps/rejected": -1.513695240020752, "loss": 1.1037, "nll_loss": 1.0861749649047852, "rewards/accuracies": 0.75, "rewards/chosen": -0.07785028964281082, "rewards/margins": 0.07351923733949661, "rewards/rejected": -0.15136951208114624, "step": 3341 }, { "epoch": 2.038737227390575, "grad_norm": 1.5985466241836548, "learning_rate": 4.754929577464788e-06, "log_odds_chosen": 1.8895552158355713, "log_odds_ratio": -0.41413450241088867, "logits/chosen": -0.5140672922134399, "logits/rejected": -0.6684013605117798, "logps/chosen": -0.873878002166748, "logps/rejected": -2.2380998134613037, "loss": 1.1435, "nll_loss": 1.1219314336776733, "rewards/accuracies": 0.75, "rewards/chosen": -0.0873878002166748, "rewards/margins": 0.13642217218875885, "rewards/rejected": -0.22380998730659485, "step": 3342 }, { "epoch": 2.039347262467592, "grad_norm": 1.77402925491333, "learning_rate": 4.753949785670545e-06, "log_odds_chosen": 0.23541192710399628, "log_odds_ratio": -0.7211547493934631, "logits/chosen": -1.0180336236953735, "logits/rejected": -0.9751753807067871, "logps/chosen": -0.803249716758728, "logps/rejected": -0.9513666033744812, "loss": 1.1904, "nll_loss": 1.1028342247009277, "rewards/accuracies": 0.375, "rewards/chosen": -0.08032497763633728, "rewards/margins": 0.014811679720878601, "rewards/rejected": -0.09513665735721588, "step": 3343 }, { "epoch": 2.0399572975446087, "grad_norm": 3.605665445327759, "learning_rate": 4.752969993876301e-06, "log_odds_chosen": 0.48361966013908386, "log_odds_ratio": -0.567979097366333, "logits/chosen": -1.0756157636642456, "logits/rejected": -0.9025520086288452, "logps/chosen": -1.0018824338912964, "logps/rejected": -1.3724980354309082, "loss": 1.1891, "nll_loss": 1.176647663116455, "rewards/accuracies": 0.625, "rewards/chosen": -0.1001882404088974, "rewards/margins": 0.03706156462430954, "rewards/rejected": -0.13724979758262634, "step": 3344 }, { "epoch": 2.0405673326216256, "grad_norm": 1.6063852310180664, "learning_rate": 4.751990202082057e-06, "log_odds_chosen": -0.18957413733005524, "log_odds_ratio": -0.8817780017852783, "logits/chosen": -0.8305633068084717, "logits/rejected": -0.763999342918396, "logps/chosen": -1.0140037536621094, "logps/rejected": -0.9302712678909302, "loss": 1.0136, "nll_loss": 1.11980140209198, "rewards/accuracies": 0.375, "rewards/chosen": -0.10140036791563034, "rewards/margins": -0.008373243734240532, "rewards/rejected": -0.09302712976932526, "step": 3345 }, { "epoch": 2.041177367698643, "grad_norm": 1.4987478256225586, "learning_rate": 4.7510104102878135e-06, "log_odds_chosen": 2.3668031692504883, "log_odds_ratio": -0.22787486016750336, "logits/chosen": -0.7347167730331421, "logits/rejected": -0.873073160648346, "logps/chosen": -0.6913093328475952, "logps/rejected": -2.3722262382507324, "loss": 1.0833, "nll_loss": 0.8778835535049438, "rewards/accuracies": 0.875, "rewards/chosen": -0.06913094222545624, "rewards/margins": 0.16809169948101044, "rewards/rejected": -0.23722264170646667, "step": 3346 }, { "epoch": 2.0417874027756597, "grad_norm": 1.3648673295974731, "learning_rate": 4.75003061849357e-06, "log_odds_chosen": 0.5881509780883789, "log_odds_ratio": -0.579301655292511, "logits/chosen": -1.0017368793487549, "logits/rejected": -0.9215659499168396, "logps/chosen": -0.8859582543373108, "logps/rejected": -1.3007484674453735, "loss": 1.1907, "nll_loss": 1.2212458848953247, "rewards/accuracies": 0.625, "rewards/chosen": -0.08859582245349884, "rewards/margins": 0.041479021310806274, "rewards/rejected": -0.13007484376430511, "step": 3347 }, { "epoch": 2.0423974378526766, "grad_norm": 1.3829901218414307, "learning_rate": 4.749050826699326e-06, "log_odds_chosen": 2.9326531887054443, "log_odds_ratio": -0.21470928192138672, "logits/chosen": -1.0784761905670166, "logits/rejected": -1.0843260288238525, "logps/chosen": -0.8888137936592102, "logps/rejected": -3.3165087699890137, "loss": 1.023, "nll_loss": 1.2258464097976685, "rewards/accuracies": 1.0, "rewards/chosen": -0.08888138085603714, "rewards/margins": 0.2427694946527481, "rewards/rejected": -0.33165085315704346, "step": 3348 }, { "epoch": 2.0430074729296934, "grad_norm": 1.7756242752075195, "learning_rate": 4.748071034905083e-06, "log_odds_chosen": 1.0413813591003418, "log_odds_ratio": -0.40777096152305603, "logits/chosen": -1.1103520393371582, "logits/rejected": -0.9635636210441589, "logps/chosen": -0.8613606691360474, "logps/rejected": -1.6071724891662598, "loss": 1.0499, "nll_loss": 1.0819261074066162, "rewards/accuracies": 0.625, "rewards/chosen": -0.08613606542348862, "rewards/margins": 0.07458119094371796, "rewards/rejected": -0.16071724891662598, "step": 3349 }, { "epoch": 2.0436175080067103, "grad_norm": 2.0750527381896973, "learning_rate": 4.747091243110839e-06, "log_odds_chosen": 2.1396727561950684, "log_odds_ratio": -0.22027629613876343, "logits/chosen": -0.48344865441322327, "logits/rejected": -0.5952343940734863, "logps/chosen": -0.6991540193557739, "logps/rejected": -2.1187584400177, "loss": 0.9586, "nll_loss": 0.7510977387428284, "rewards/accuracies": 1.0, "rewards/chosen": -0.06991540640592575, "rewards/margins": 0.14196044206619263, "rewards/rejected": -0.21187584102153778, "step": 3350 }, { "epoch": 2.044227543083727, "grad_norm": 1.3483778238296509, "learning_rate": 4.746111451316596e-06, "log_odds_chosen": 1.4519481658935547, "log_odds_ratio": -0.36531373858451843, "logits/chosen": -0.9207762479782104, "logits/rejected": -0.99809730052948, "logps/chosen": -0.9775614738464355, "logps/rejected": -2.1382064819335938, "loss": 1.0294, "nll_loss": 1.1044892072677612, "rewards/accuracies": 0.875, "rewards/chosen": -0.09775615483522415, "rewards/margins": 0.11606451123952866, "rewards/rejected": -0.2138206660747528, "step": 3351 }, { "epoch": 2.0448375781607444, "grad_norm": 1.3490172624588013, "learning_rate": 4.745131659522351e-06, "log_odds_chosen": 1.5169883966445923, "log_odds_ratio": -0.2864552438259125, "logits/chosen": -1.073367714881897, "logits/rejected": -1.0164921283721924, "logps/chosen": -0.7845557332038879, "logps/rejected": -1.7275218963623047, "loss": 1.202, "nll_loss": 1.1058851480484009, "rewards/accuracies": 1.0, "rewards/chosen": -0.07845556735992432, "rewards/margins": 0.09429661929607391, "rewards/rejected": -0.17275220155715942, "step": 3352 }, { "epoch": 2.0454476132377613, "grad_norm": 6.551699161529541, "learning_rate": 4.744151867728107e-06, "log_odds_chosen": 2.8237836360931396, "log_odds_ratio": -0.27652043104171753, "logits/chosen": -0.6957963109016418, "logits/rejected": -0.7811368703842163, "logps/chosen": -0.6748140454292297, "logps/rejected": -2.9549126625061035, "loss": 1.0874, "nll_loss": 0.8630077838897705, "rewards/accuracies": 0.875, "rewards/chosen": -0.06748140603303909, "rewards/margins": 0.22800984978675842, "rewards/rejected": -0.2954912483692169, "step": 3353 }, { "epoch": 2.046057648314778, "grad_norm": 1.2639482021331787, "learning_rate": 4.743172075933864e-06, "log_odds_chosen": 1.5890313386917114, "log_odds_ratio": -0.33599337935447693, "logits/chosen": -0.6940735578536987, "logits/rejected": -0.7855267524719238, "logps/chosen": -0.7960492372512817, "logps/rejected": -1.9097115993499756, "loss": 1.0188, "nll_loss": 1.1255114078521729, "rewards/accuracies": 0.875, "rewards/chosen": -0.07960493117570877, "rewards/margins": 0.11136624962091446, "rewards/rejected": -0.19097116589546204, "step": 3354 }, { "epoch": 2.046667683391795, "grad_norm": 2.026780605316162, "learning_rate": 4.74219228413962e-06, "log_odds_chosen": 1.4903626441955566, "log_odds_ratio": -0.4214814305305481, "logits/chosen": -0.7477064728736877, "logits/rejected": -0.8415327668190002, "logps/chosen": -0.7843739986419678, "logps/rejected": -1.6347095966339111, "loss": 1.1257, "nll_loss": 1.2083505392074585, "rewards/accuracies": 0.875, "rewards/chosen": -0.07843740284442902, "rewards/margins": 0.08503356575965881, "rewards/rejected": -0.16347096860408783, "step": 3355 }, { "epoch": 2.047277718468812, "grad_norm": 1.920900821685791, "learning_rate": 4.7412124923453764e-06, "log_odds_chosen": 1.6412657499313354, "log_odds_ratio": -0.4462743401527405, "logits/chosen": -0.832330584526062, "logits/rejected": -1.0110036134719849, "logps/chosen": -0.8005251884460449, "logps/rejected": -2.1030161380767822, "loss": 1.1161, "nll_loss": 0.9542414546012878, "rewards/accuracies": 0.625, "rewards/chosen": -0.08005252480506897, "rewards/margins": 0.13024909794330597, "rewards/rejected": -0.21030160784721375, "step": 3356 }, { "epoch": 2.047887753545829, "grad_norm": 1.441785454750061, "learning_rate": 4.740232700551133e-06, "log_odds_chosen": 0.014057785272598267, "log_odds_ratio": -0.7633894681930542, "logits/chosen": -1.0025725364685059, "logits/rejected": -0.9100174307823181, "logps/chosen": -1.012108564376831, "logps/rejected": -1.0301027297973633, "loss": 1.1386, "nll_loss": 1.1199803352355957, "rewards/accuracies": 0.25, "rewards/chosen": -0.10121086239814758, "rewards/margins": 0.0017994139343500137, "rewards/rejected": -0.10301028192043304, "step": 3357 }, { "epoch": 2.048497788622846, "grad_norm": 1.2187868356704712, "learning_rate": 4.739252908756889e-06, "log_odds_chosen": 1.5116772651672363, "log_odds_ratio": -0.4588967561721802, "logits/chosen": -0.734912633895874, "logits/rejected": -0.7018141746520996, "logps/chosen": -0.7127405405044556, "logps/rejected": -1.8455047607421875, "loss": 1.0235, "nll_loss": 1.0559566020965576, "rewards/accuracies": 0.75, "rewards/chosen": -0.0712740570306778, "rewards/margins": 0.113276407122612, "rewards/rejected": -0.1845504641532898, "step": 3358 }, { "epoch": 2.0491078236998628, "grad_norm": 2.711277723312378, "learning_rate": 4.738273116962645e-06, "log_odds_chosen": 1.8917309045791626, "log_odds_ratio": -0.39709019660949707, "logits/chosen": -0.9482991099357605, "logits/rejected": -0.9054345488548279, "logps/chosen": -0.8639664649963379, "logps/rejected": -2.1399753093719482, "loss": 1.1819, "nll_loss": 1.2512397766113281, "rewards/accuracies": 0.625, "rewards/chosen": -0.08639664947986603, "rewards/margins": 0.12760087847709656, "rewards/rejected": -0.21399752795696259, "step": 3359 }, { "epoch": 2.0497178587768796, "grad_norm": 1.2913262844085693, "learning_rate": 4.737293325168402e-06, "log_odds_chosen": 1.8541960716247559, "log_odds_ratio": -0.43627113103866577, "logits/chosen": -0.7855002880096436, "logits/rejected": -0.9375516772270203, "logps/chosen": -0.6916579604148865, "logps/rejected": -2.1086463928222656, "loss": 1.1272, "nll_loss": 0.8398441672325134, "rewards/accuracies": 0.75, "rewards/chosen": -0.06916580349206924, "rewards/margins": 0.14169885218143463, "rewards/rejected": -0.21086466312408447, "step": 3360 }, { "epoch": 2.0503278938538965, "grad_norm": 4.202425956726074, "learning_rate": 4.736313533374158e-06, "log_odds_chosen": 1.4868428707122803, "log_odds_ratio": -0.4122202396392822, "logits/chosen": -0.6783283948898315, "logits/rejected": -0.7723677754402161, "logps/chosen": -0.5849062204360962, "logps/rejected": -1.519732117652893, "loss": 1.0396, "nll_loss": 0.8086789846420288, "rewards/accuracies": 0.75, "rewards/chosen": -0.05849061906337738, "rewards/margins": 0.0934825986623764, "rewards/rejected": -0.15197321772575378, "step": 3361 }, { "epoch": 2.0509379289309133, "grad_norm": 2.993971109390259, "learning_rate": 4.735333741579914e-06, "log_odds_chosen": 3.6121022701263428, "log_odds_ratio": -0.10147519409656525, "logits/chosen": -0.5523157119750977, "logits/rejected": -0.8532407283782959, "logps/chosen": -0.5136462450027466, "logps/rejected": -3.181925058364868, "loss": 0.9121, "nll_loss": 0.7057712078094482, "rewards/accuracies": 1.0, "rewards/chosen": -0.051364630460739136, "rewards/margins": 0.26682785153388977, "rewards/rejected": -0.3181924819946289, "step": 3362 }, { "epoch": 2.0515479640079306, "grad_norm": 1.7809185981750488, "learning_rate": 4.73435394978567e-06, "log_odds_chosen": 0.5858202576637268, "log_odds_ratio": -0.6349130868911743, "logits/chosen": -1.070469617843628, "logits/rejected": -0.9748386144638062, "logps/chosen": -1.0751351118087769, "logps/rejected": -1.4556487798690796, "loss": 1.2423, "nll_loss": 1.3218982219696045, "rewards/accuracies": 0.625, "rewards/chosen": -0.10751352459192276, "rewards/margins": 0.038051359355449677, "rewards/rejected": -0.14556488394737244, "step": 3363 }, { "epoch": 2.0521579990849474, "grad_norm": 2.998276472091675, "learning_rate": 4.733374157991426e-06, "log_odds_chosen": 1.4825448989868164, "log_odds_ratio": -0.4077956974506378, "logits/chosen": -0.8944116830825806, "logits/rejected": -0.9091695547103882, "logps/chosen": -0.7265413999557495, "logps/rejected": -1.8081036806106567, "loss": 1.0845, "nll_loss": 1.1151947975158691, "rewards/accuracies": 0.875, "rewards/chosen": -0.07265414297580719, "rewards/margins": 0.108156219124794, "rewards/rejected": -0.1808103770017624, "step": 3364 }, { "epoch": 2.0527680341619643, "grad_norm": 8.756694793701172, "learning_rate": 4.732394366197182e-06, "log_odds_chosen": 1.3932290077209473, "log_odds_ratio": -0.2624816298484802, "logits/chosen": -0.9512516856193542, "logits/rejected": -0.8482272624969482, "logps/chosen": -0.8851184248924255, "logps/rejected": -1.8710392713546753, "loss": 1.0709, "nll_loss": 1.1670302152633667, "rewards/accuracies": 1.0, "rewards/chosen": -0.08851184695959091, "rewards/margins": 0.09859207272529602, "rewards/rejected": -0.18710392713546753, "step": 3365 }, { "epoch": 2.053378069238981, "grad_norm": 4.24714469909668, "learning_rate": 4.731414574402939e-06, "log_odds_chosen": 1.3698029518127441, "log_odds_ratio": -0.3395390212535858, "logits/chosen": -0.7378455400466919, "logits/rejected": -0.869381844997406, "logps/chosen": -0.5935160517692566, "logps/rejected": -1.3920197486877441, "loss": 1.0642, "nll_loss": 0.7654874324798584, "rewards/accuracies": 0.875, "rewards/chosen": -0.0593516044318676, "rewards/margins": 0.07985036820173264, "rewards/rejected": -0.13920198380947113, "step": 3366 }, { "epoch": 2.053988104315998, "grad_norm": 1.289550542831421, "learning_rate": 4.7304347826086955e-06, "log_odds_chosen": 1.683426856994629, "log_odds_ratio": -0.37662559747695923, "logits/chosen": -0.6196346282958984, "logits/rejected": -0.7578267455101013, "logps/chosen": -0.7315376400947571, "logps/rejected": -2.1081576347351074, "loss": 1.0201, "nll_loss": 0.8640579581260681, "rewards/accuracies": 0.75, "rewards/chosen": -0.0731537714600563, "rewards/margins": 0.13766199350357056, "rewards/rejected": -0.21081575751304626, "step": 3367 }, { "epoch": 2.0545981393930153, "grad_norm": 5.435867786407471, "learning_rate": 4.7294549908144524e-06, "log_odds_chosen": 2.009110450744629, "log_odds_ratio": -0.5156393051147461, "logits/chosen": -0.9650190472602844, "logits/rejected": -0.8700199723243713, "logps/chosen": -0.9924151301383972, "logps/rejected": -2.6868197917938232, "loss": 1.059, "nll_loss": 1.1224411725997925, "rewards/accuracies": 0.625, "rewards/chosen": -0.09924152493476868, "rewards/margins": 0.16944046318531036, "rewards/rejected": -0.26868200302124023, "step": 3368 }, { "epoch": 2.055208174470032, "grad_norm": 2.1879537105560303, "learning_rate": 4.728475199020208e-06, "log_odds_chosen": 2.4891786575317383, "log_odds_ratio": -0.38831770420074463, "logits/chosen": -0.5489276647567749, "logits/rejected": -0.6467500925064087, "logps/chosen": -0.6555076837539673, "logps/rejected": -2.5934343338012695, "loss": 0.9699, "nll_loss": 0.802727460861206, "rewards/accuracies": 0.75, "rewards/chosen": -0.0655507743358612, "rewards/margins": 0.1937926709651947, "rewards/rejected": -0.2593434453010559, "step": 3369 }, { "epoch": 2.055818209547049, "grad_norm": 5.383404731750488, "learning_rate": 4.727495407225964e-06, "log_odds_chosen": 1.6003775596618652, "log_odds_ratio": -0.4886636435985565, "logits/chosen": -0.643420398235321, "logits/rejected": -0.6270260810852051, "logps/chosen": -0.5837981700897217, "logps/rejected": -1.8878464698791504, "loss": 0.9589, "nll_loss": 0.7610239386558533, "rewards/accuracies": 0.75, "rewards/chosen": -0.05837981775403023, "rewards/margins": 0.13040482997894287, "rewards/rejected": -0.1887846738100052, "step": 3370 }, { "epoch": 2.056428244624066, "grad_norm": 2.746520757675171, "learning_rate": 4.726515615431721e-06, "log_odds_chosen": 0.8954005241394043, "log_odds_ratio": -0.487186998128891, "logits/chosen": -0.7269982099533081, "logits/rejected": -0.7731778621673584, "logps/chosen": -0.6737625598907471, "logps/rejected": -1.2764322757720947, "loss": 1.0349, "nll_loss": 0.852385401725769, "rewards/accuracies": 0.875, "rewards/chosen": -0.06737625598907471, "rewards/margins": 0.060266975313425064, "rewards/rejected": -0.12764322757720947, "step": 3371 }, { "epoch": 2.0570382797010827, "grad_norm": 2.9098494052886963, "learning_rate": 4.725535823637477e-06, "log_odds_chosen": 1.5838160514831543, "log_odds_ratio": -0.41868850588798523, "logits/chosen": -0.9026504755020142, "logits/rejected": -0.8203778266906738, "logps/chosen": -0.7531280517578125, "logps/rejected": -1.7811791896820068, "loss": 1.0153, "nll_loss": 1.151008129119873, "rewards/accuracies": 0.875, "rewards/chosen": -0.07531280815601349, "rewards/margins": 0.10280512273311615, "rewards/rejected": -0.17811793088912964, "step": 3372 }, { "epoch": 2.0576483147781, "grad_norm": 1.9418095350265503, "learning_rate": 4.724556031843233e-06, "log_odds_chosen": 1.7998278141021729, "log_odds_ratio": -0.49121421575546265, "logits/chosen": -0.6979025602340698, "logits/rejected": -0.7481311559677124, "logps/chosen": -0.6651706695556641, "logps/rejected": -2.0228490829467773, "loss": 0.9557, "nll_loss": 0.7924827933311462, "rewards/accuracies": 0.625, "rewards/chosen": -0.06651706993579865, "rewards/margins": 0.1357678472995758, "rewards/rejected": -0.20228490233421326, "step": 3373 }, { "epoch": 2.058258349855117, "grad_norm": 1.4369548559188843, "learning_rate": 4.72357624004899e-06, "log_odds_chosen": 1.351480484008789, "log_odds_ratio": -0.5848489999771118, "logits/chosen": -1.0263890027999878, "logits/rejected": -1.0292751789093018, "logps/chosen": -0.8005304336547852, "logps/rejected": -1.6485222578048706, "loss": 1.0483, "nll_loss": 1.1792855262756348, "rewards/accuracies": 0.625, "rewards/chosen": -0.08005304634571075, "rewards/margins": 0.08479918539524078, "rewards/rejected": -0.16485223174095154, "step": 3374 }, { "epoch": 2.0588683849321336, "grad_norm": 1.4121661186218262, "learning_rate": 4.722596448254745e-06, "log_odds_chosen": 0.5015830993652344, "log_odds_ratio": -0.6595718264579773, "logits/chosen": -0.9272125959396362, "logits/rejected": -0.9687085151672363, "logps/chosen": -0.8980482220649719, "logps/rejected": -1.3523013591766357, "loss": 0.9826, "nll_loss": 0.9814669489860535, "rewards/accuracies": 0.5, "rewards/chosen": -0.08980482071638107, "rewards/margins": 0.04542531073093414, "rewards/rejected": -0.13523012399673462, "step": 3375 }, { "epoch": 2.0594784200091505, "grad_norm": 3.741492509841919, "learning_rate": 4.721616656460501e-06, "log_odds_chosen": 1.9993430376052856, "log_odds_ratio": -0.4210563898086548, "logits/chosen": -0.5552987456321716, "logits/rejected": -0.7438369989395142, "logps/chosen": -0.8350949287414551, "logps/rejected": -2.438164472579956, "loss": 0.998, "nll_loss": 0.8721805810928345, "rewards/accuracies": 0.75, "rewards/chosen": -0.08350948989391327, "rewards/margins": 0.16030699014663696, "rewards/rejected": -0.24381648004055023, "step": 3376 }, { "epoch": 2.0600884550861673, "grad_norm": 1.6557515859603882, "learning_rate": 4.720636864666258e-06, "log_odds_chosen": 2.3515706062316895, "log_odds_ratio": -0.40208524465560913, "logits/chosen": -0.7600526213645935, "logits/rejected": -0.815265417098999, "logps/chosen": -0.6912788152694702, "logps/rejected": -2.48193359375, "loss": 0.9573, "nll_loss": 0.9031609296798706, "rewards/accuracies": 0.625, "rewards/chosen": -0.0691278874874115, "rewards/margins": 0.17906546592712402, "rewards/rejected": -0.24819336831569672, "step": 3377 }, { "epoch": 2.060698490163184, "grad_norm": 3.1684253215789795, "learning_rate": 4.7196570728720145e-06, "log_odds_chosen": 0.947928786277771, "log_odds_ratio": -0.4262479245662689, "logits/chosen": -0.6792212128639221, "logits/rejected": -0.8136683702468872, "logps/chosen": -0.7866467833518982, "logps/rejected": -1.4281566143035889, "loss": 1.1736, "nll_loss": 1.1283316612243652, "rewards/accuracies": 0.875, "rewards/chosen": -0.07866468280553818, "rewards/margins": 0.06415098160505295, "rewards/rejected": -0.14281566441059113, "step": 3378 }, { "epoch": 2.0613085252402015, "grad_norm": 1.1856048107147217, "learning_rate": 4.7186772810777715e-06, "log_odds_chosen": 2.2327373027801514, "log_odds_ratio": -0.29664215445518494, "logits/chosen": -0.764958918094635, "logits/rejected": -0.8702648878097534, "logps/chosen": -0.6523483991622925, "logps/rejected": -2.3202075958251953, "loss": 1.0669, "nll_loss": 0.954582929611206, "rewards/accuracies": 0.875, "rewards/chosen": -0.06523483991622925, "rewards/margins": 0.16678592562675476, "rewards/rejected": -0.232020765542984, "step": 3379 }, { "epoch": 2.0619185603172183, "grad_norm": 1.3440418243408203, "learning_rate": 4.717697489283527e-06, "log_odds_chosen": 2.5106358528137207, "log_odds_ratio": -0.3758419156074524, "logits/chosen": -0.4983315169811249, "logits/rejected": -0.46421515941619873, "logps/chosen": -0.8509825468063354, "logps/rejected": -2.8948535919189453, "loss": 0.9051, "nll_loss": 0.9262841939926147, "rewards/accuracies": 0.625, "rewards/chosen": -0.0850982517004013, "rewards/margins": 0.2043871134519577, "rewards/rejected": -0.289485365152359, "step": 3380 }, { "epoch": 2.062528595394235, "grad_norm": 1.1662672758102417, "learning_rate": 4.716717697489283e-06, "log_odds_chosen": 1.0350817441940308, "log_odds_ratio": -0.363997220993042, "logits/chosen": -0.8117042779922485, "logits/rejected": -0.7288194894790649, "logps/chosen": -0.8274434804916382, "logps/rejected": -1.4936201572418213, "loss": 1.0951, "nll_loss": 1.024479627609253, "rewards/accuracies": 0.875, "rewards/chosen": -0.08274435997009277, "rewards/margins": 0.06661766022443771, "rewards/rejected": -0.14936202764511108, "step": 3381 }, { "epoch": 2.063138630471252, "grad_norm": 2.1260569095611572, "learning_rate": 4.71573790569504e-06, "log_odds_chosen": 2.734433174133301, "log_odds_ratio": -0.3180646598339081, "logits/chosen": -0.7982807755470276, "logits/rejected": -0.7645038962364197, "logps/chosen": -0.8266090154647827, "logps/rejected": -3.031365394592285, "loss": 1.0601, "nll_loss": 0.8764634132385254, "rewards/accuracies": 0.875, "rewards/chosen": -0.08266090601682663, "rewards/margins": 0.22047561407089233, "rewards/rejected": -0.30313652753829956, "step": 3382 }, { "epoch": 2.063748665548269, "grad_norm": 1.1683650016784668, "learning_rate": 4.714758113900796e-06, "log_odds_chosen": 1.4311354160308838, "log_odds_ratio": -0.3901083767414093, "logits/chosen": -0.7885444760322571, "logits/rejected": -0.7759038209915161, "logps/chosen": -0.6415730714797974, "logps/rejected": -1.5968453884124756, "loss": 0.8882, "nll_loss": 0.9511884450912476, "rewards/accuracies": 0.875, "rewards/chosen": -0.06415730714797974, "rewards/margins": 0.09552721679210663, "rewards/rejected": -0.15968453884124756, "step": 3383 }, { "epoch": 2.064358700625286, "grad_norm": 2.496107816696167, "learning_rate": 4.713778322106552e-06, "log_odds_chosen": 2.0606374740600586, "log_odds_ratio": -0.2832179367542267, "logits/chosen": -0.8834967017173767, "logits/rejected": -0.8238196969032288, "logps/chosen": -0.7494481205940247, "logps/rejected": -2.2494606971740723, "loss": 0.9742, "nll_loss": 0.8679856657981873, "rewards/accuracies": 0.875, "rewards/chosen": -0.07494480907917023, "rewards/margins": 0.15000125765800476, "rewards/rejected": -0.22494608163833618, "step": 3384 }, { "epoch": 2.064968735702303, "grad_norm": 2.4370381832122803, "learning_rate": 4.712798530312309e-06, "log_odds_chosen": 1.886852741241455, "log_odds_ratio": -0.4007924199104309, "logits/chosen": -0.7292571067810059, "logits/rejected": -0.5782760977745056, "logps/chosen": -0.7181040644645691, "logps/rejected": -1.9989961385726929, "loss": 0.9914, "nll_loss": 0.8559372425079346, "rewards/accuracies": 0.75, "rewards/chosen": -0.07181040197610855, "rewards/margins": 0.12808920443058014, "rewards/rejected": -0.1998996138572693, "step": 3385 }, { "epoch": 2.06557877077932, "grad_norm": 1.583221197128296, "learning_rate": 4.711818738518064e-06, "log_odds_chosen": 1.158209204673767, "log_odds_ratio": -0.39965134859085083, "logits/chosen": -0.8547422289848328, "logits/rejected": -0.7609108686447144, "logps/chosen": -0.9170470237731934, "logps/rejected": -1.6953492164611816, "loss": 1.0344, "nll_loss": 0.89805006980896, "rewards/accuracies": 0.75, "rewards/chosen": -0.09170470386743546, "rewards/margins": 0.07783021032810211, "rewards/rejected": -0.16953492164611816, "step": 3386 }, { "epoch": 2.0661888058563367, "grad_norm": 1.6235074996948242, "learning_rate": 4.7108389467238205e-06, "log_odds_chosen": 0.43295717239379883, "log_odds_ratio": -0.6269890666007996, "logits/chosen": -0.5581961870193481, "logits/rejected": -0.5881048440933228, "logps/chosen": -0.7883639931678772, "logps/rejected": -1.0481367111206055, "loss": 0.9829, "nll_loss": 0.8564642667770386, "rewards/accuracies": 0.5, "rewards/chosen": -0.07883639633655548, "rewards/margins": 0.025977272540330887, "rewards/rejected": -0.10481366515159607, "step": 3387 }, { "epoch": 2.0667988409333535, "grad_norm": 1.1959424018859863, "learning_rate": 4.7098591549295774e-06, "log_odds_chosen": 1.506369709968567, "log_odds_ratio": -0.48156964778900146, "logits/chosen": -0.676231324672699, "logits/rejected": -0.6317439079284668, "logps/chosen": -0.7087423801422119, "logps/rejected": -1.777357816696167, "loss": 1.0619, "nll_loss": 1.0738253593444824, "rewards/accuracies": 0.875, "rewards/chosen": -0.07087424397468567, "rewards/margins": 0.10686154663562775, "rewards/rejected": -0.17773579061031342, "step": 3388 }, { "epoch": 2.0674088760103704, "grad_norm": 2.1420557498931885, "learning_rate": 4.7088793631353336e-06, "log_odds_chosen": 1.3431059122085571, "log_odds_ratio": -0.4211176335811615, "logits/chosen": -0.7893017530441284, "logits/rejected": -0.6877216100692749, "logps/chosen": -0.7866301536560059, "logps/rejected": -1.8107787370681763, "loss": 0.9488, "nll_loss": 0.8507710695266724, "rewards/accuracies": 0.75, "rewards/chosen": -0.07866302132606506, "rewards/margins": 0.10241486132144928, "rewards/rejected": -0.18107789754867554, "step": 3389 }, { "epoch": 2.0680189110873877, "grad_norm": 2.867380142211914, "learning_rate": 4.70789957134109e-06, "log_odds_chosen": 1.0077574253082275, "log_odds_ratio": -0.4882905185222626, "logits/chosen": -0.838939368724823, "logits/rejected": -0.6962980031967163, "logps/chosen": -0.7788057327270508, "logps/rejected": -1.5315706729888916, "loss": 1.0704, "nll_loss": 1.1150237321853638, "rewards/accuracies": 0.75, "rewards/chosen": -0.07788057625293732, "rewards/margins": 0.07527647912502289, "rewards/rejected": -0.1531570702791214, "step": 3390 }, { "epoch": 2.0686289461644045, "grad_norm": 1.1507129669189453, "learning_rate": 4.706919779546847e-06, "log_odds_chosen": 0.8162946105003357, "log_odds_ratio": -0.4971082806587219, "logits/chosen": -0.8679473400115967, "logits/rejected": -0.8637828230857849, "logps/chosen": -0.7435000538825989, "logps/rejected": -1.2881580591201782, "loss": 0.9744, "nll_loss": 1.1337836980819702, "rewards/accuracies": 0.625, "rewards/chosen": -0.07435000687837601, "rewards/margins": 0.054465800523757935, "rewards/rejected": -0.12881579995155334, "step": 3391 }, { "epoch": 2.0692389812414214, "grad_norm": 1.624038577079773, "learning_rate": 4.705939987752602e-06, "log_odds_chosen": 0.45839476585388184, "log_odds_ratio": -0.7217825651168823, "logits/chosen": -0.8240686655044556, "logits/rejected": -0.8407875895500183, "logps/chosen": -0.9299455881118774, "logps/rejected": -1.2882907390594482, "loss": 1.0095, "nll_loss": 1.0665158033370972, "rewards/accuracies": 0.5, "rewards/chosen": -0.0929945707321167, "rewards/margins": 0.03583451732993126, "rewards/rejected": -0.12882909178733826, "step": 3392 }, { "epoch": 2.069849016318438, "grad_norm": 3.2452633380889893, "learning_rate": 4.704960195958359e-06, "log_odds_chosen": 1.4250540733337402, "log_odds_ratio": -0.4551590383052826, "logits/chosen": -0.8160351514816284, "logits/rejected": -0.7566376328468323, "logps/chosen": -0.833827793598175, "logps/rejected": -1.9094135761260986, "loss": 1.026, "nll_loss": 1.0262459516525269, "rewards/accuracies": 0.75, "rewards/chosen": -0.08338278532028198, "rewards/margins": 0.10755858570337296, "rewards/rejected": -0.19094136357307434, "step": 3393 }, { "epoch": 2.070459051395455, "grad_norm": 1.4534666538238525, "learning_rate": 4.703980404164115e-06, "log_odds_chosen": 1.4141974449157715, "log_odds_ratio": -0.4183448553085327, "logits/chosen": -0.6798227429389954, "logits/rejected": -0.7308428287506104, "logps/chosen": -0.8905957937240601, "logps/rejected": -1.7932723760604858, "loss": 0.9881, "nll_loss": 1.1214523315429688, "rewards/accuracies": 0.75, "rewards/chosen": -0.08905958384275436, "rewards/margins": 0.09026767313480377, "rewards/rejected": -0.17932724952697754, "step": 3394 }, { "epoch": 2.0710690864724723, "grad_norm": 0.9683372974395752, "learning_rate": 4.703000612369871e-06, "log_odds_chosen": 0.835002064704895, "log_odds_ratio": -0.4573400616645813, "logits/chosen": -0.9316987991333008, "logits/rejected": -0.9333744049072266, "logps/chosen": -0.7562950849533081, "logps/rejected": -1.2999622821807861, "loss": 1.1258, "nll_loss": 1.133978009223938, "rewards/accuracies": 0.875, "rewards/chosen": -0.07562950998544693, "rewards/margins": 0.054366737604141235, "rewards/rejected": -0.12999624013900757, "step": 3395 }, { "epoch": 2.071679121549489, "grad_norm": 1.7993981838226318, "learning_rate": 4.702020820575628e-06, "log_odds_chosen": 1.9604696035385132, "log_odds_ratio": -0.37554997205734253, "logits/chosen": -1.0053825378417969, "logits/rejected": -0.9366999268531799, "logps/chosen": -0.7291748523712158, "logps/rejected": -2.279240608215332, "loss": 0.9146, "nll_loss": 1.0355439186096191, "rewards/accuracies": 0.75, "rewards/chosen": -0.07291749119758606, "rewards/margins": 0.15500658750534058, "rewards/rejected": -0.22792406380176544, "step": 3396 }, { "epoch": 2.072289156626506, "grad_norm": 1.4990994930267334, "learning_rate": 4.701041028781383e-06, "log_odds_chosen": 2.9506869316101074, "log_odds_ratio": -0.13326510787010193, "logits/chosen": -0.9146518707275391, "logits/rejected": -0.8955420255661011, "logps/chosen": -0.6493118405342102, "logps/rejected": -2.9656152725219727, "loss": 1.0426, "nll_loss": 0.9701952934265137, "rewards/accuracies": 1.0, "rewards/chosen": -0.06493118405342102, "rewards/margins": 0.2316303551197052, "rewards/rejected": -0.2965615391731262, "step": 3397 }, { "epoch": 2.072899191703523, "grad_norm": 1.0458641052246094, "learning_rate": 4.7000612369871395e-06, "log_odds_chosen": 1.128053903579712, "log_odds_ratio": -0.38890209794044495, "logits/chosen": -0.8213348984718323, "logits/rejected": -0.8150337934494019, "logps/chosen": -0.6972925662994385, "logps/rejected": -1.347238302230835, "loss": 1.0493, "nll_loss": 0.94046550989151, "rewards/accuracies": 0.875, "rewards/chosen": -0.06972925364971161, "rewards/margins": 0.06499457359313965, "rewards/rejected": -0.13472382724285126, "step": 3398 }, { "epoch": 2.0735092267805397, "grad_norm": 2.39219069480896, "learning_rate": 4.6990814451928965e-06, "log_odds_chosen": 1.1577708721160889, "log_odds_ratio": -0.5517385005950928, "logits/chosen": -0.7297322750091553, "logits/rejected": -0.6448463797569275, "logps/chosen": -0.7526196837425232, "logps/rejected": -1.551237940788269, "loss": 0.9844, "nll_loss": 0.8375670909881592, "rewards/accuracies": 0.75, "rewards/chosen": -0.07526196539402008, "rewards/margins": 0.0798618346452713, "rewards/rejected": -0.15512380003929138, "step": 3399 }, { "epoch": 2.074119261857557, "grad_norm": 1.31736421585083, "learning_rate": 4.698101653398653e-06, "log_odds_chosen": 1.702304482460022, "log_odds_ratio": -0.3756539523601532, "logits/chosen": -0.973698616027832, "logits/rejected": -1.0157370567321777, "logps/chosen": -0.7321118116378784, "logps/rejected": -1.9554088115692139, "loss": 0.9913, "nll_loss": 1.1431546211242676, "rewards/accuracies": 0.75, "rewards/chosen": -0.0732111930847168, "rewards/margins": 0.12232968211174011, "rewards/rejected": -0.1955408751964569, "step": 3400 }, { "epoch": 2.074729296934574, "grad_norm": 1.7255903482437134, "learning_rate": 4.697121861604409e-06, "log_odds_chosen": 0.9588639736175537, "log_odds_ratio": -0.738635241985321, "logits/chosen": -1.0503946542739868, "logits/rejected": -0.9486242532730103, "logps/chosen": -1.1204683780670166, "logps/rejected": -2.103104591369629, "loss": 1.1755, "nll_loss": 1.1667845249176025, "rewards/accuracies": 0.5, "rewards/chosen": -0.11204685270786285, "rewards/margins": 0.09826359897851944, "rewards/rejected": -0.2103104591369629, "step": 3401 }, { "epoch": 2.0753393320115907, "grad_norm": 5.10311222076416, "learning_rate": 4.696142069810166e-06, "log_odds_chosen": 1.6157156229019165, "log_odds_ratio": -0.42239508032798767, "logits/chosen": -0.9360043406486511, "logits/rejected": -0.9434387683868408, "logps/chosen": -0.8280152678489685, "logps/rejected": -2.0192642211914062, "loss": 1.1041, "nll_loss": 1.0642387866973877, "rewards/accuracies": 0.625, "rewards/chosen": -0.08280153572559357, "rewards/margins": 0.1191248744726181, "rewards/rejected": -0.20192641019821167, "step": 3402 }, { "epoch": 2.0759493670886076, "grad_norm": 4.6187944412231445, "learning_rate": 4.695162278015921e-06, "log_odds_chosen": 2.89532732963562, "log_odds_ratio": -0.2950778603553772, "logits/chosen": -0.642992377281189, "logits/rejected": -0.6278515458106995, "logps/chosen": -0.5993418097496033, "logps/rejected": -2.91406512260437, "loss": 1.0081, "nll_loss": 0.8111323118209839, "rewards/accuracies": 0.75, "rewards/chosen": -0.059934183955192566, "rewards/margins": 0.23147234320640564, "rewards/rejected": -0.291406512260437, "step": 3403 }, { "epoch": 2.0765594021656244, "grad_norm": 3.648627281188965, "learning_rate": 4.694182486221677e-06, "log_odds_chosen": 1.9283709526062012, "log_odds_ratio": -0.39716362953186035, "logits/chosen": -0.8990431427955627, "logits/rejected": -0.8422478437423706, "logps/chosen": -0.857670247554779, "logps/rejected": -2.3439383506774902, "loss": 1.0679, "nll_loss": 1.1269499063491821, "rewards/accuracies": 0.875, "rewards/chosen": -0.08576702326536179, "rewards/margins": 0.14862680435180664, "rewards/rejected": -0.23439383506774902, "step": 3404 }, { "epoch": 2.0771694372426412, "grad_norm": 1.3070603609085083, "learning_rate": 4.693202694427434e-06, "log_odds_chosen": 0.6333695650100708, "log_odds_ratio": -0.586082398891449, "logits/chosen": -0.7416192293167114, "logits/rejected": -0.787216305732727, "logps/chosen": -0.7651244401931763, "logps/rejected": -1.2751482725143433, "loss": 1.0323, "nll_loss": 0.8215000033378601, "rewards/accuracies": 0.5, "rewards/chosen": -0.07651244848966599, "rewards/margins": 0.0510023757815361, "rewards/rejected": -0.1275148242712021, "step": 3405 }, { "epoch": 2.0777794723196585, "grad_norm": 1.696018934249878, "learning_rate": 4.69222290263319e-06, "log_odds_chosen": 0.475538045167923, "log_odds_ratio": -0.6789229512214661, "logits/chosen": -0.8299585580825806, "logits/rejected": -0.8009642958641052, "logps/chosen": -1.0349762439727783, "logps/rejected": -1.4060583114624023, "loss": 1.0111, "nll_loss": 1.0213124752044678, "rewards/accuracies": 0.5, "rewards/chosen": -0.10349763184785843, "rewards/margins": 0.037108197808265686, "rewards/rejected": -0.14060582220554352, "step": 3406 }, { "epoch": 2.0783895073966754, "grad_norm": 3.4434444904327393, "learning_rate": 4.691243110838947e-06, "log_odds_chosen": 0.40817856788635254, "log_odds_ratio": -0.6054181456565857, "logits/chosen": -0.8651957511901855, "logits/rejected": -0.8243716359138489, "logps/chosen": -0.9024112224578857, "logps/rejected": -1.1032180786132812, "loss": 1.1605, "nll_loss": 1.1735050678253174, "rewards/accuracies": 0.5, "rewards/chosen": -0.09024112671613693, "rewards/margins": 0.020080674439668655, "rewards/rejected": -0.11032180488109589, "step": 3407 }, { "epoch": 2.0789995424736922, "grad_norm": 1.1859513521194458, "learning_rate": 4.690263319044703e-06, "log_odds_chosen": 0.6961778998374939, "log_odds_ratio": -0.7476730942726135, "logits/chosen": -0.9234545230865479, "logits/rejected": -0.8597779273986816, "logps/chosen": -0.9146542549133301, "logps/rejected": -1.5239814519882202, "loss": 1.2607, "nll_loss": 1.0256377458572388, "rewards/accuracies": 0.625, "rewards/chosen": -0.09146542847156525, "rewards/margins": 0.06093272566795349, "rewards/rejected": -0.15239815413951874, "step": 3408 }, { "epoch": 2.079609577550709, "grad_norm": 4.0981550216674805, "learning_rate": 4.6892835272504585e-06, "log_odds_chosen": 2.6339426040649414, "log_odds_ratio": -0.3112924098968506, "logits/chosen": -0.7454679012298584, "logits/rejected": -0.6871376037597656, "logps/chosen": -0.8176096081733704, "logps/rejected": -2.9761013984680176, "loss": 1.0625, "nll_loss": 0.8005833625793457, "rewards/accuracies": 0.875, "rewards/chosen": -0.0817609578371048, "rewards/margins": 0.21584919095039368, "rewards/rejected": -0.29761016368865967, "step": 3409 }, { "epoch": 2.080219612627726, "grad_norm": 1.4615005254745483, "learning_rate": 4.6883037354562155e-06, "log_odds_chosen": 1.7644875049591064, "log_odds_ratio": -0.4759752154350281, "logits/chosen": -0.9145323634147644, "logits/rejected": -0.8388063311576843, "logps/chosen": -0.7840425968170166, "logps/rejected": -2.169719696044922, "loss": 0.9509, "nll_loss": 1.0105836391448975, "rewards/accuracies": 0.625, "rewards/chosen": -0.0784042626619339, "rewards/margins": 0.1385677009820938, "rewards/rejected": -0.2169719636440277, "step": 3410 }, { "epoch": 2.080829647704743, "grad_norm": 1.9158673286437988, "learning_rate": 4.687323943661972e-06, "log_odds_chosen": 0.37295523285865784, "log_odds_ratio": -0.5939763784408569, "logits/chosen": -0.9635352492332458, "logits/rejected": -0.8518361449241638, "logps/chosen": -0.8974242210388184, "logps/rejected": -1.1823716163635254, "loss": 1.035, "nll_loss": 1.015730857849121, "rewards/accuracies": 0.75, "rewards/chosen": -0.08974242210388184, "rewards/margins": 0.02849474735558033, "rewards/rejected": -0.11823716759681702, "step": 3411 }, { "epoch": 2.08143968278176, "grad_norm": 4.051777362823486, "learning_rate": 4.686344151867728e-06, "log_odds_chosen": 1.6030369997024536, "log_odds_ratio": -0.49755990505218506, "logits/chosen": -0.7434818148612976, "logits/rejected": -0.8735827207565308, "logps/chosen": -0.9261771440505981, "logps/rejected": -2.1478240489959717, "loss": 1.1252, "nll_loss": 1.299412727355957, "rewards/accuracies": 0.75, "rewards/chosen": -0.0926177054643631, "rewards/margins": 0.12216468900442123, "rewards/rejected": -0.21478238701820374, "step": 3412 }, { "epoch": 2.082049717858777, "grad_norm": 2.0778942108154297, "learning_rate": 4.685364360073485e-06, "log_odds_chosen": 1.533784031867981, "log_odds_ratio": -0.3594488501548767, "logits/chosen": -0.5806247591972351, "logits/rejected": -0.5840065479278564, "logps/chosen": -0.7637319564819336, "logps/rejected": -1.7704707384109497, "loss": 0.9646, "nll_loss": 0.811363935470581, "rewards/accuracies": 0.75, "rewards/chosen": -0.07637319713830948, "rewards/margins": 0.1006738692522049, "rewards/rejected": -0.17704707384109497, "step": 3413 }, { "epoch": 2.0826597529357938, "grad_norm": 1.541343331336975, "learning_rate": 4.684384568279241e-06, "log_odds_chosen": 2.8123879432678223, "log_odds_ratio": -0.5295149683952332, "logits/chosen": -0.6710271835327148, "logits/rejected": -0.729675829410553, "logps/chosen": -1.0116119384765625, "logps/rejected": -3.479135751724243, "loss": 0.883, "nll_loss": 1.1389750242233276, "rewards/accuracies": 0.625, "rewards/chosen": -0.10116119682788849, "rewards/margins": 0.24675238132476807, "rewards/rejected": -0.34791356325149536, "step": 3414 }, { "epoch": 2.0832697880128106, "grad_norm": 1.4634196758270264, "learning_rate": 4.683404776484996e-06, "log_odds_chosen": 3.0013675689697266, "log_odds_ratio": -0.282185435295105, "logits/chosen": -1.0358445644378662, "logits/rejected": -1.0158766508102417, "logps/chosen": -0.9170434474945068, "logps/rejected": -3.5036733150482178, "loss": 1.1569, "nll_loss": 1.1106864213943481, "rewards/accuracies": 0.75, "rewards/chosen": -0.0917043462395668, "rewards/margins": 0.25866299867630005, "rewards/rejected": -0.35036733746528625, "step": 3415 }, { "epoch": 2.083879823089828, "grad_norm": 1.8678531646728516, "learning_rate": 4.682424984690753e-06, "log_odds_chosen": 0.5887812376022339, "log_odds_ratio": -0.7036893963813782, "logits/chosen": -0.8473566770553589, "logits/rejected": -1.0057106018066406, "logps/chosen": -1.0086581707000732, "logps/rejected": -1.566709041595459, "loss": 1.2391, "nll_loss": 1.2665352821350098, "rewards/accuracies": 0.75, "rewards/chosen": -0.10086581856012344, "rewards/margins": 0.055805087089538574, "rewards/rejected": -0.15667089819908142, "step": 3416 }, { "epoch": 2.0844898581668447, "grad_norm": 2.1604580879211426, "learning_rate": 4.681445192896509e-06, "log_odds_chosen": 1.4883571863174438, "log_odds_ratio": -0.5295505523681641, "logits/chosen": -1.1259057521820068, "logits/rejected": -1.0184599161148071, "logps/chosen": -1.010178804397583, "logps/rejected": -2.2125508785247803, "loss": 1.1979, "nll_loss": 1.1472066640853882, "rewards/accuracies": 0.5, "rewards/chosen": -0.10101788491010666, "rewards/margins": 0.12023722380399704, "rewards/rejected": -0.2212551087141037, "step": 3417 }, { "epoch": 2.0850998932438616, "grad_norm": 2.563009262084961, "learning_rate": 4.680465401102265e-06, "log_odds_chosen": 1.521393060684204, "log_odds_ratio": -0.32833316922187805, "logits/chosen": -0.756629228591919, "logits/rejected": -0.6870940923690796, "logps/chosen": -0.6417667865753174, "logps/rejected": -1.5687637329101562, "loss": 0.9896, "nll_loss": 0.8054777383804321, "rewards/accuracies": 0.875, "rewards/chosen": -0.06417667865753174, "rewards/margins": 0.09269969910383224, "rewards/rejected": -0.15687638521194458, "step": 3418 }, { "epoch": 2.0857099283208784, "grad_norm": 3.2958931922912598, "learning_rate": 4.679485609308022e-06, "log_odds_chosen": 3.289433002471924, "log_odds_ratio": -0.23341813683509827, "logits/chosen": -0.6811901330947876, "logits/rejected": -0.808265209197998, "logps/chosen": -0.7029356360435486, "logps/rejected": -3.40657377243042, "loss": 0.9997, "nll_loss": 1.06989324092865, "rewards/accuracies": 0.875, "rewards/chosen": -0.07029356807470322, "rewards/margins": 0.27036380767822266, "rewards/rejected": -0.34065738320350647, "step": 3419 }, { "epoch": 2.0863199633978953, "grad_norm": 3.3513176441192627, "learning_rate": 4.678505817513778e-06, "log_odds_chosen": 3.2529244422912598, "log_odds_ratio": -0.2009323090314865, "logits/chosen": -0.6183388233184814, "logits/rejected": -0.8039370775222778, "logps/chosen": -0.8105467557907104, "logps/rejected": -3.4694886207580566, "loss": 0.9486, "nll_loss": 0.9619777202606201, "rewards/accuracies": 0.875, "rewards/chosen": -0.0810546800494194, "rewards/margins": 0.26589417457580566, "rewards/rejected": -0.34694889187812805, "step": 3420 }, { "epoch": 2.086929998474912, "grad_norm": 1.38987135887146, "learning_rate": 4.6775260257195345e-06, "log_odds_chosen": 0.8151965737342834, "log_odds_ratio": -0.46751540899276733, "logits/chosen": -0.9853817224502563, "logits/rejected": -0.8407749533653259, "logps/chosen": -0.9004522562026978, "logps/rejected": -1.4837069511413574, "loss": 1.1286, "nll_loss": 1.210533618927002, "rewards/accuracies": 0.875, "rewards/chosen": -0.09004522114992142, "rewards/margins": 0.058325476944446564, "rewards/rejected": -0.14837069809436798, "step": 3421 }, { "epoch": 2.0875400335519294, "grad_norm": 1.7558733224868774, "learning_rate": 4.676546233925291e-06, "log_odds_chosen": 1.0299257040023804, "log_odds_ratio": -0.5674062371253967, "logits/chosen": -0.7171289920806885, "logits/rejected": -0.7298529744148254, "logps/chosen": -0.6614772081375122, "logps/rejected": -1.467688798904419, "loss": 1.0692, "nll_loss": 0.8376420736312866, "rewards/accuracies": 0.5, "rewards/chosen": -0.06614772230386734, "rewards/margins": 0.0806211531162262, "rewards/rejected": -0.14676886796951294, "step": 3422 }, { "epoch": 2.0881500686289463, "grad_norm": 3.424827814102173, "learning_rate": 4.675566442131047e-06, "log_odds_chosen": 1.646737813949585, "log_odds_ratio": -0.541104257106781, "logits/chosen": -0.789707362651825, "logits/rejected": -0.8178156018257141, "logps/chosen": -0.8206555843353271, "logps/rejected": -2.063225507736206, "loss": 0.9703, "nll_loss": 0.970398485660553, "rewards/accuracies": 0.75, "rewards/chosen": -0.08206556737422943, "rewards/margins": 0.12425698339939117, "rewards/rejected": -0.2063225358724594, "step": 3423 }, { "epoch": 2.088760103705963, "grad_norm": 1.6860285997390747, "learning_rate": 4.674586650336804e-06, "log_odds_chosen": 1.5425302982330322, "log_odds_ratio": -0.42640072107315063, "logits/chosen": -0.9188007712364197, "logits/rejected": -0.9384064674377441, "logps/chosen": -0.8403767347335815, "logps/rejected": -2.0656070709228516, "loss": 0.8856, "nll_loss": 0.9905270338058472, "rewards/accuracies": 0.625, "rewards/chosen": -0.08403768390417099, "rewards/margins": 0.12252301722764969, "rewards/rejected": -0.20656070113182068, "step": 3424 }, { "epoch": 2.08937013878298, "grad_norm": 2.259687662124634, "learning_rate": 4.67360685854256e-06, "log_odds_chosen": 2.385317802429199, "log_odds_ratio": -0.3236914873123169, "logits/chosen": -0.7760266065597534, "logits/rejected": -0.8331184387207031, "logps/chosen": -0.6085249781608582, "logps/rejected": -2.3718676567077637, "loss": 1.106, "nll_loss": 0.8989355564117432, "rewards/accuracies": 0.75, "rewards/chosen": -0.060852501541376114, "rewards/margins": 0.17633426189422607, "rewards/rejected": -0.2371867597103119, "step": 3425 }, { "epoch": 2.089980173859997, "grad_norm": 2.5406174659729004, "learning_rate": 4.672627066748315e-06, "log_odds_chosen": 2.0977697372436523, "log_odds_ratio": -0.2772500216960907, "logits/chosen": -0.9111077785491943, "logits/rejected": -0.7758520841598511, "logps/chosen": -0.8889107704162598, "logps/rejected": -2.5676074028015137, "loss": 0.9744, "nll_loss": 1.0380902290344238, "rewards/accuracies": 0.875, "rewards/chosen": -0.08889108151197433, "rewards/margins": 0.1678696572780609, "rewards/rejected": -0.25676071643829346, "step": 3426 }, { "epoch": 2.090590208937014, "grad_norm": 1.4933546781539917, "learning_rate": 4.671647274954072e-06, "log_odds_chosen": 1.7771694660186768, "log_odds_ratio": -0.40691936016082764, "logits/chosen": -0.7860090732574463, "logits/rejected": -0.7250915169715881, "logps/chosen": -0.741866409778595, "logps/rejected": -2.135502815246582, "loss": 1.0164, "nll_loss": 0.8041539788246155, "rewards/accuracies": 0.75, "rewards/chosen": -0.07418663799762726, "rewards/margins": 0.139363631606102, "rewards/rejected": -0.21355026960372925, "step": 3427 }, { "epoch": 2.091200244014031, "grad_norm": 2.0435523986816406, "learning_rate": 4.670667483159828e-06, "log_odds_chosen": 2.593143939971924, "log_odds_ratio": -0.30554091930389404, "logits/chosen": -0.9188857078552246, "logits/rejected": -0.8665510416030884, "logps/chosen": -0.9364505410194397, "logps/rejected": -3.106891632080078, "loss": 1.2998, "nll_loss": 1.1650320291519165, "rewards/accuracies": 0.75, "rewards/chosen": -0.09364505857229233, "rewards/margins": 0.21704411506652832, "rewards/rejected": -0.31068915128707886, "step": 3428 }, { "epoch": 2.0918102790910478, "grad_norm": 1.2391277551651, "learning_rate": 4.669687691365584e-06, "log_odds_chosen": 1.1609984636306763, "log_odds_ratio": -0.5753551721572876, "logits/chosen": -0.784819483757019, "logits/rejected": -0.8184065818786621, "logps/chosen": -0.7684091329574585, "logps/rejected": -1.4346060752868652, "loss": 1.1528, "nll_loss": 1.0456658601760864, "rewards/accuracies": 0.5, "rewards/chosen": -0.07684091478586197, "rewards/margins": 0.06661969423294067, "rewards/rejected": -0.14346061646938324, "step": 3429 }, { "epoch": 2.0924203141680646, "grad_norm": 1.7877888679504395, "learning_rate": 4.668707899571341e-06, "log_odds_chosen": 0.967564582824707, "log_odds_ratio": -0.5862137675285339, "logits/chosen": -0.9629517793655396, "logits/rejected": -0.9745800495147705, "logps/chosen": -1.0228309631347656, "logps/rejected": -1.8852550983428955, "loss": 1.1598, "nll_loss": 1.0812314748764038, "rewards/accuracies": 0.75, "rewards/chosen": -0.10228311270475388, "rewards/margins": 0.08624240010976791, "rewards/rejected": -0.1885254979133606, "step": 3430 }, { "epoch": 2.0930303492450815, "grad_norm": 5.347431659698486, "learning_rate": 4.6677281077770975e-06, "log_odds_chosen": 0.5802560448646545, "log_odds_ratio": -0.5800502300262451, "logits/chosen": -0.6924103498458862, "logits/rejected": -0.6693403124809265, "logps/chosen": -0.5303856134414673, "logps/rejected": -0.9669095277786255, "loss": 1.0422, "nll_loss": 0.7996801733970642, "rewards/accuracies": 0.625, "rewards/chosen": -0.05303855985403061, "rewards/margins": 0.04365239292383194, "rewards/rejected": -0.09669095277786255, "step": 3431 }, { "epoch": 2.0936403843220983, "grad_norm": 1.5682873725891113, "learning_rate": 4.666748315982853e-06, "log_odds_chosen": 2.0620369911193848, "log_odds_ratio": -0.2867644429206848, "logits/chosen": -0.9059122800827026, "logits/rejected": -0.8594101071357727, "logps/chosen": -0.7955828905105591, "logps/rejected": -2.4442155361175537, "loss": 1.0955, "nll_loss": 0.8858511447906494, "rewards/accuracies": 0.75, "rewards/chosen": -0.07955829054117203, "rewards/margins": 0.16486325860023499, "rewards/rejected": -0.2444215565919876, "step": 3432 }, { "epoch": 2.0942504193991156, "grad_norm": 1.0657508373260498, "learning_rate": 4.66576852418861e-06, "log_odds_chosen": 1.3140616416931152, "log_odds_ratio": -0.4883476495742798, "logits/chosen": -0.8000844717025757, "logits/rejected": -0.7287721037864685, "logps/chosen": -0.7921160459518433, "logps/rejected": -1.8055696487426758, "loss": 1.0803, "nll_loss": 1.004786729812622, "rewards/accuracies": 0.625, "rewards/chosen": -0.07921160757541656, "rewards/margins": 0.10134535282850266, "rewards/rejected": -0.18055696785449982, "step": 3433 }, { "epoch": 2.0948604544761324, "grad_norm": 3.282809257507324, "learning_rate": 4.664788732394366e-06, "log_odds_chosen": 2.583451747894287, "log_odds_ratio": -0.39592212438583374, "logits/chosen": -1.1037806272506714, "logits/rejected": -0.9987195134162903, "logps/chosen": -0.9262784123420715, "logps/rejected": -3.2322840690612793, "loss": 1.1546, "nll_loss": 1.3911001682281494, "rewards/accuracies": 0.875, "rewards/chosen": -0.09262784570455551, "rewards/margins": 0.2306005358695984, "rewards/rejected": -0.3232283890247345, "step": 3434 }, { "epoch": 2.0954704895531493, "grad_norm": 1.5210293531417847, "learning_rate": 4.663808940600123e-06, "log_odds_chosen": 1.5070691108703613, "log_odds_ratio": -0.42320600152015686, "logits/chosen": -0.8205026388168335, "logits/rejected": -0.8847484588623047, "logps/chosen": -0.7941902279853821, "logps/rejected": -1.864396095275879, "loss": 0.9494, "nll_loss": 1.0612385272979736, "rewards/accuracies": 0.75, "rewards/chosen": -0.07941903173923492, "rewards/margins": 0.10702059417963028, "rewards/rejected": -0.1864396035671234, "step": 3435 }, { "epoch": 2.096080524630166, "grad_norm": 1.4034675359725952, "learning_rate": 4.662829148805879e-06, "log_odds_chosen": 1.6949223279953003, "log_odds_ratio": -0.3588288128376007, "logits/chosen": -1.0698803663253784, "logits/rejected": -0.9583494663238525, "logps/chosen": -1.0423533916473389, "logps/rejected": -2.4835472106933594, "loss": 1.2953, "nll_loss": 1.3311259746551514, "rewards/accuracies": 0.75, "rewards/chosen": -0.10423533618450165, "rewards/margins": 0.14411939680576324, "rewards/rejected": -0.2483547180891037, "step": 3436 }, { "epoch": 2.096690559707183, "grad_norm": 4.62075138092041, "learning_rate": 4.661849357011634e-06, "log_odds_chosen": 1.0627655982971191, "log_odds_ratio": -0.41791683435440063, "logits/chosen": -1.134454607963562, "logits/rejected": -0.8935819268226624, "logps/chosen": -0.9695844054222107, "logps/rejected": -1.7709085941314697, "loss": 1.0283, "nll_loss": 1.1621577739715576, "rewards/accuracies": 0.75, "rewards/chosen": -0.09695844352245331, "rewards/margins": 0.08013242483139038, "rewards/rejected": -0.1770908534526825, "step": 3437 }, { "epoch": 2.0973005947842003, "grad_norm": 1.7299400568008423, "learning_rate": 4.660869565217391e-06, "log_odds_chosen": 1.6153167486190796, "log_odds_ratio": -0.5220927000045776, "logits/chosen": -0.8387751579284668, "logits/rejected": -0.8702648282051086, "logps/chosen": -0.7174173593521118, "logps/rejected": -2.077920436859131, "loss": 1.0688, "nll_loss": 1.0134501457214355, "rewards/accuracies": 0.625, "rewards/chosen": -0.0717417299747467, "rewards/margins": 0.13605031371116638, "rewards/rejected": -0.20779204368591309, "step": 3438 }, { "epoch": 2.097910629861217, "grad_norm": 2.677706241607666, "learning_rate": 4.659889773423147e-06, "log_odds_chosen": 1.7819974422454834, "log_odds_ratio": -0.28227823972702026, "logits/chosen": -0.7002838850021362, "logits/rejected": -0.6130955219268799, "logps/chosen": -0.7404521703720093, "logps/rejected": -2.0752758979797363, "loss": 1.1081, "nll_loss": 0.9057707786560059, "rewards/accuracies": 1.0, "rewards/chosen": -0.07404521107673645, "rewards/margins": 0.13348238170146942, "rewards/rejected": -0.20752759277820587, "step": 3439 }, { "epoch": 2.098520664938234, "grad_norm": 3.8849992752075195, "learning_rate": 4.658909981628903e-06, "log_odds_chosen": 3.8058981895446777, "log_odds_ratio": -0.2294573038816452, "logits/chosen": -0.6674071550369263, "logits/rejected": -0.7004518508911133, "logps/chosen": -0.6698341965675354, "logps/rejected": -3.6700196266174316, "loss": 1.0079, "nll_loss": 0.8345171809196472, "rewards/accuracies": 0.875, "rewards/chosen": -0.0669834166765213, "rewards/margins": 0.3000185489654541, "rewards/rejected": -0.3670019805431366, "step": 3440 }, { "epoch": 2.099130700015251, "grad_norm": 1.4124467372894287, "learning_rate": 4.65793018983466e-06, "log_odds_chosen": 1.328834056854248, "log_odds_ratio": -0.5195116400718689, "logits/chosen": -0.781893253326416, "logits/rejected": -0.7160043716430664, "logps/chosen": -0.8143520355224609, "logps/rejected": -1.957564115524292, "loss": 0.993, "nll_loss": 0.9915516376495361, "rewards/accuracies": 0.5, "rewards/chosen": -0.0814351961016655, "rewards/margins": 0.11432121694087982, "rewards/rejected": -0.19575642049312592, "step": 3441 }, { "epoch": 2.0997407350922677, "grad_norm": 11.706282615661621, "learning_rate": 4.6569503980404165e-06, "log_odds_chosen": 2.8111038208007812, "log_odds_ratio": -0.35265225172042847, "logits/chosen": -0.8109731078147888, "logits/rejected": -0.8200172781944275, "logps/chosen": -0.6548792719841003, "logps/rejected": -2.931333065032959, "loss": 1.0222, "nll_loss": 0.7986090183258057, "rewards/accuracies": 0.875, "rewards/chosen": -0.06548792868852615, "rewards/margins": 0.2276453673839569, "rewards/rejected": -0.29313331842422485, "step": 3442 }, { "epoch": 2.1003507701692845, "grad_norm": 4.094052314758301, "learning_rate": 4.655970606246172e-06, "log_odds_chosen": 0.266127347946167, "log_odds_ratio": -0.7090519666671753, "logits/chosen": -0.9467118382453918, "logits/rejected": -1.0291357040405273, "logps/chosen": -1.0887553691864014, "logps/rejected": -1.1495739221572876, "loss": 1.1024, "nll_loss": 1.238197684288025, "rewards/accuracies": 0.625, "rewards/chosen": -0.10887554287910461, "rewards/margins": 0.006081863306462765, "rewards/rejected": -0.11495740711688995, "step": 3443 }, { "epoch": 2.100960805246302, "grad_norm": 1.1218974590301514, "learning_rate": 4.654990814451929e-06, "log_odds_chosen": 1.2826378345489502, "log_odds_ratio": -0.5264571309089661, "logits/chosen": -0.7122917175292969, "logits/rejected": -0.868762195110321, "logps/chosen": -0.7635124921798706, "logps/rejected": -1.6935240030288696, "loss": 0.9461, "nll_loss": 0.9129049181938171, "rewards/accuracies": 0.75, "rewards/chosen": -0.07635124027729034, "rewards/margins": 0.09300114959478378, "rewards/rejected": -0.16935239732265472, "step": 3444 }, { "epoch": 2.1015708403233186, "grad_norm": 1.4740772247314453, "learning_rate": 4.654011022657685e-06, "log_odds_chosen": 2.2474334239959717, "log_odds_ratio": -0.2717689871788025, "logits/chosen": -0.6955758333206177, "logits/rejected": -0.8533936738967896, "logps/chosen": -0.7658345699310303, "logps/rejected": -2.503567695617676, "loss": 0.9219, "nll_loss": 0.8761427402496338, "rewards/accuracies": 1.0, "rewards/chosen": -0.07658345997333527, "rewards/margins": 0.17377331852912903, "rewards/rejected": -0.2503567636013031, "step": 3445 }, { "epoch": 2.1021808754003355, "grad_norm": 1.2696937322616577, "learning_rate": 4.653031230863441e-06, "log_odds_chosen": 0.7501980066299438, "log_odds_ratio": -0.4886636435985565, "logits/chosen": -0.9438859224319458, "logits/rejected": -0.799977719783783, "logps/chosen": -0.8815085291862488, "logps/rejected": -1.4174823760986328, "loss": 0.987, "nll_loss": 0.9117724299430847, "rewards/accuracies": 0.75, "rewards/chosen": -0.08815085887908936, "rewards/margins": 0.05359739065170288, "rewards/rejected": -0.14174824953079224, "step": 3446 }, { "epoch": 2.1027909104773523, "grad_norm": 1.9104325771331787, "learning_rate": 4.652051439069198e-06, "log_odds_chosen": 0.658094584941864, "log_odds_ratio": -0.5359553694725037, "logits/chosen": -0.8915557265281677, "logits/rejected": -0.7944828271865845, "logps/chosen": -0.7990496158599854, "logps/rejected": -1.2974121570587158, "loss": 0.8861, "nll_loss": 0.8954119682312012, "rewards/accuracies": 0.625, "rewards/chosen": -0.0799049586057663, "rewards/margins": 0.04983625188469887, "rewards/rejected": -0.12974122166633606, "step": 3447 }, { "epoch": 2.103400945554369, "grad_norm": 4.814967632293701, "learning_rate": 4.651071647274954e-06, "log_odds_chosen": 0.39006519317626953, "log_odds_ratio": -0.5577057600021362, "logits/chosen": -0.7762300968170166, "logits/rejected": -0.7589071989059448, "logps/chosen": -0.7978861331939697, "logps/rejected": -1.021101951599121, "loss": 1.1027, "nll_loss": 0.9521746039390564, "rewards/accuracies": 0.75, "rewards/chosen": -0.07978861778974533, "rewards/margins": 0.022321585565805435, "rewards/rejected": -0.10211019963026047, "step": 3448 }, { "epoch": 2.1040109806313865, "grad_norm": 3.743791341781616, "learning_rate": 4.65009185548071e-06, "log_odds_chosen": 0.7252000570297241, "log_odds_ratio": -0.5924098491668701, "logits/chosen": -0.9901183843612671, "logits/rejected": -0.9365497827529907, "logps/chosen": -0.7708336114883423, "logps/rejected": -1.2238364219665527, "loss": 1.0247, "nll_loss": 1.1273905038833618, "rewards/accuracies": 0.5, "rewards/chosen": -0.07708335667848587, "rewards/margins": 0.04530029371380806, "rewards/rejected": -0.12238365411758423, "step": 3449 }, { "epoch": 2.1046210157084033, "grad_norm": 1.4264426231384277, "learning_rate": 4.649112063686466e-06, "log_odds_chosen": 2.6007580757141113, "log_odds_ratio": -0.32544153928756714, "logits/chosen": -0.8827750086784363, "logits/rejected": -0.7993157505989075, "logps/chosen": -0.8554539680480957, "logps/rejected": -2.8904144763946533, "loss": 1.1264, "nll_loss": 1.0306518077850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.08554539084434509, "rewards/margins": 0.203496053814888, "rewards/rejected": -0.2890414297580719, "step": 3450 }, { "epoch": 2.10523105078542, "grad_norm": 1.185423731803894, "learning_rate": 4.6481322718922225e-06, "log_odds_chosen": 2.4847168922424316, "log_odds_ratio": -0.39077961444854736, "logits/chosen": -0.9407099485397339, "logits/rejected": -0.9480397701263428, "logps/chosen": -0.7931902408599854, "logps/rejected": -2.6866350173950195, "loss": 1.1293, "nll_loss": 0.9968500137329102, "rewards/accuracies": 0.875, "rewards/chosen": -0.07931902259588242, "rewards/margins": 0.18934446573257446, "rewards/rejected": -0.2686634957790375, "step": 3451 }, { "epoch": 2.105841085862437, "grad_norm": 1.7996524572372437, "learning_rate": 4.647152480097979e-06, "log_odds_chosen": 1.7912609577178955, "log_odds_ratio": -0.4941343069076538, "logits/chosen": -0.9650744795799255, "logits/rejected": -0.9490246772766113, "logps/chosen": -0.8158069252967834, "logps/rejected": -2.2621538639068604, "loss": 1.2443, "nll_loss": 1.0295839309692383, "rewards/accuracies": 0.625, "rewards/chosen": -0.08158070594072342, "rewards/margins": 0.1446346938610077, "rewards/rejected": -0.2262153923511505, "step": 3452 }, { "epoch": 2.106451120939454, "grad_norm": 1.5842984914779663, "learning_rate": 4.6461726883037355e-06, "log_odds_chosen": 2.245237350463867, "log_odds_ratio": -0.296215295791626, "logits/chosen": -0.8187314867973328, "logits/rejected": -0.8284119963645935, "logps/chosen": -0.5228824615478516, "logps/rejected": -2.1454689502716064, "loss": 0.9549, "nll_loss": 0.7550396919250488, "rewards/accuracies": 1.0, "rewards/chosen": -0.0522882454097271, "rewards/margins": 0.16225865483283997, "rewards/rejected": -0.21454691886901855, "step": 3453 }, { "epoch": 2.107061156016471, "grad_norm": 1.0110045671463013, "learning_rate": 4.645192896509491e-06, "log_odds_chosen": 0.3279355764389038, "log_odds_ratio": -0.5760331153869629, "logits/chosen": -0.7537808418273926, "logits/rejected": -0.7548210620880127, "logps/chosen": -0.8822599649429321, "logps/rejected": -1.064255952835083, "loss": 1.0811, "nll_loss": 1.0259674787521362, "rewards/accuracies": 0.75, "rewards/chosen": -0.08822599053382874, "rewards/margins": 0.018199613317847252, "rewards/rejected": -0.10642560571432114, "step": 3454 }, { "epoch": 2.107671191093488, "grad_norm": 1.0732041597366333, "learning_rate": 4.644213104715248e-06, "log_odds_chosen": 1.6490869522094727, "log_odds_ratio": -0.4150310754776001, "logits/chosen": -0.9960216283798218, "logits/rejected": -0.8530455231666565, "logps/chosen": -0.7778338193893433, "logps/rejected": -2.071009635925293, "loss": 0.963, "nll_loss": 0.8245631456375122, "rewards/accuracies": 0.75, "rewards/chosen": -0.07778337597846985, "rewards/margins": 0.12931758165359497, "rewards/rejected": -0.20710094273090363, "step": 3455 }, { "epoch": 2.108281226170505, "grad_norm": 1.2533801794052124, "learning_rate": 4.643233312921004e-06, "log_odds_chosen": 2.8440332412719727, "log_odds_ratio": -0.26801520586013794, "logits/chosen": -0.5398253798484802, "logits/rejected": -0.5517732501029968, "logps/chosen": -0.6151379942893982, "logps/rejected": -2.719202756881714, "loss": 0.9944, "nll_loss": 0.7601838111877441, "rewards/accuracies": 0.875, "rewards/chosen": -0.06151380389928818, "rewards/margins": 0.21040646731853485, "rewards/rejected": -0.27192026376724243, "step": 3456 }, { "epoch": 2.1088912612475217, "grad_norm": 1.5990021228790283, "learning_rate": 4.64225352112676e-06, "log_odds_chosen": 0.40080922842025757, "log_odds_ratio": -0.5359519720077515, "logits/chosen": -0.8652513027191162, "logits/rejected": -0.7843136787414551, "logps/chosen": -0.6961172819137573, "logps/rejected": -0.9315497279167175, "loss": 0.9553, "nll_loss": 0.9154911637306213, "rewards/accuracies": 0.875, "rewards/chosen": -0.06961173564195633, "rewards/margins": 0.023543238639831543, "rewards/rejected": -0.09315498173236847, "step": 3457 }, { "epoch": 2.1095012963245385, "grad_norm": 1.94535231590271, "learning_rate": 4.641273729332517e-06, "log_odds_chosen": 1.3194389343261719, "log_odds_ratio": -0.5591382384300232, "logits/chosen": -0.9667230844497681, "logits/rejected": -0.9238499402999878, "logps/chosen": -0.868605375289917, "logps/rejected": -1.987409234046936, "loss": 1.0932, "nll_loss": 1.112922191619873, "rewards/accuracies": 0.5, "rewards/chosen": -0.0868605375289917, "rewards/margins": 0.11188039183616638, "rewards/rejected": -0.19874092936515808, "step": 3458 }, { "epoch": 2.1101113314015554, "grad_norm": 1.1663848161697388, "learning_rate": 4.640293937538273e-06, "log_odds_chosen": 3.642271041870117, "log_odds_ratio": -0.30774635076522827, "logits/chosen": -0.7696238160133362, "logits/rejected": -0.7938554883003235, "logps/chosen": -0.681857705116272, "logps/rejected": -3.7139079570770264, "loss": 1.0681, "nll_loss": 0.8775579333305359, "rewards/accuracies": 0.75, "rewards/chosen": -0.06818576902151108, "rewards/margins": 0.3032050132751465, "rewards/rejected": -0.37139081954956055, "step": 3459 }, { "epoch": 2.1107213664785727, "grad_norm": 2.922379970550537, "learning_rate": 4.639314145744028e-06, "log_odds_chosen": 0.2348158061504364, "log_odds_ratio": -0.8258494734764099, "logits/chosen": -0.8813740015029907, "logits/rejected": -0.8932250142097473, "logps/chosen": -1.0872631072998047, "logps/rejected": -1.0988965034484863, "loss": 1.0489, "nll_loss": 0.8880813121795654, "rewards/accuracies": 0.625, "rewards/chosen": -0.10872631520032883, "rewards/margins": 0.0011633271351456642, "rewards/rejected": -0.10988964140415192, "step": 3460 }, { "epoch": 2.1113314015555895, "grad_norm": 1.7936896085739136, "learning_rate": 4.638334353949785e-06, "log_odds_chosen": 2.828016757965088, "log_odds_ratio": -0.25759366154670715, "logits/chosen": -0.6350826621055603, "logits/rejected": -0.7620657682418823, "logps/chosen": -0.5176504850387573, "logps/rejected": -2.4878551959991455, "loss": 1.0962, "nll_loss": 0.8253413438796997, "rewards/accuracies": 0.75, "rewards/chosen": -0.05176505446434021, "rewards/margins": 0.1970204859972, "rewards/rejected": -0.24878552556037903, "step": 3461 }, { "epoch": 2.1119414366326064, "grad_norm": 4.130039215087891, "learning_rate": 4.6373545621555415e-06, "log_odds_chosen": 1.3446530103683472, "log_odds_ratio": -0.3814004957675934, "logits/chosen": -0.818640410900116, "logits/rejected": -0.6663857698440552, "logps/chosen": -0.7477964758872986, "logps/rejected": -1.708168387413025, "loss": 1.1588, "nll_loss": 0.8835543394088745, "rewards/accuracies": 0.75, "rewards/chosen": -0.07477964460849762, "rewards/margins": 0.09603719413280487, "rewards/rejected": -0.1708168387413025, "step": 3462 }, { "epoch": 2.112551471709623, "grad_norm": 1.6249234676361084, "learning_rate": 4.6363747703612985e-06, "log_odds_chosen": 3.4462454319000244, "log_odds_ratio": -0.15196387469768524, "logits/chosen": -0.7396771311759949, "logits/rejected": -0.8913955092430115, "logps/chosen": -0.5283348560333252, "logps/rejected": -3.0997729301452637, "loss": 0.8954, "nll_loss": 0.6812947988510132, "rewards/accuracies": 1.0, "rewards/chosen": -0.05283348634839058, "rewards/margins": 0.2571437954902649, "rewards/rejected": -0.30997729301452637, "step": 3463 }, { "epoch": 2.11316150678664, "grad_norm": 1.0676486492156982, "learning_rate": 4.635394978567055e-06, "log_odds_chosen": 0.3584381341934204, "log_odds_ratio": -0.6094081997871399, "logits/chosen": -1.0845953226089478, "logits/rejected": -0.9639327526092529, "logps/chosen": -0.9462476968765259, "logps/rejected": -1.1617188453674316, "loss": 1.1363, "nll_loss": 1.0651658773422241, "rewards/accuracies": 0.75, "rewards/chosen": -0.09462476521730423, "rewards/margins": 0.0215471088886261, "rewards/rejected": -0.11617187410593033, "step": 3464 }, { "epoch": 2.1137715418636573, "grad_norm": 1.542076587677002, "learning_rate": 4.634415186772811e-06, "log_odds_chosen": 1.7034320831298828, "log_odds_ratio": -0.3735343813896179, "logits/chosen": -0.9637048244476318, "logits/rejected": -0.9919798374176025, "logps/chosen": -0.7220070362091064, "logps/rejected": -1.941695213317871, "loss": 0.8974, "nll_loss": 0.9784156084060669, "rewards/accuracies": 0.75, "rewards/chosen": -0.0722007006406784, "rewards/margins": 0.1219688206911087, "rewards/rejected": -0.19416950643062592, "step": 3465 }, { "epoch": 2.114381576940674, "grad_norm": 2.2766878604888916, "learning_rate": 4.633435394978567e-06, "log_odds_chosen": 0.6496603488922119, "log_odds_ratio": -0.5248321890830994, "logits/chosen": -1.0114092826843262, "logits/rejected": -0.9786829352378845, "logps/chosen": -0.9349936246871948, "logps/rejected": -1.3791120052337646, "loss": 1.2004, "nll_loss": 1.3035194873809814, "rewards/accuracies": 0.625, "rewards/chosen": -0.09349936246871948, "rewards/margins": 0.04441182315349579, "rewards/rejected": -0.13791118562221527, "step": 3466 }, { "epoch": 2.114991612017691, "grad_norm": 1.425133228302002, "learning_rate": 4.632455603184323e-06, "log_odds_chosen": 1.579370379447937, "log_odds_ratio": -0.4902091324329376, "logits/chosen": -0.8793547749519348, "logits/rejected": -0.9449479579925537, "logps/chosen": -0.8107865452766418, "logps/rejected": -1.919676661491394, "loss": 1.0546, "nll_loss": 0.9016145467758179, "rewards/accuracies": 0.625, "rewards/chosen": -0.0810786560177803, "rewards/margins": 0.1108890175819397, "rewards/rejected": -0.1919676661491394, "step": 3467 }, { "epoch": 2.115601647094708, "grad_norm": 1.8021358251571655, "learning_rate": 4.631475811390079e-06, "log_odds_chosen": 1.171524167060852, "log_odds_ratio": -0.4411817789077759, "logits/chosen": -0.8396528363227844, "logits/rejected": -0.9598815441131592, "logps/chosen": -0.7437320947647095, "logps/rejected": -1.5951652526855469, "loss": 1.0967, "nll_loss": 0.9323896169662476, "rewards/accuracies": 0.875, "rewards/chosen": -0.07437321543693542, "rewards/margins": 0.0851433202624321, "rewards/rejected": -0.15951652824878693, "step": 3468 }, { "epoch": 2.1162116821717247, "grad_norm": 2.8757197856903076, "learning_rate": 4.630496019595836e-06, "log_odds_chosen": 1.775836706161499, "log_odds_ratio": -0.568576455116272, "logits/chosen": -0.8862267136573792, "logits/rejected": -0.8289909958839417, "logps/chosen": -0.852263331413269, "logps/rejected": -2.3077354431152344, "loss": 1.0803, "nll_loss": 0.9436986446380615, "rewards/accuracies": 0.5, "rewards/chosen": -0.08522632718086243, "rewards/margins": 0.14554722607135773, "rewards/rejected": -0.23077356815338135, "step": 3469 }, { "epoch": 2.116821717248742, "grad_norm": 1.4647737741470337, "learning_rate": 4.629516227801592e-06, "log_odds_chosen": 1.3604680299758911, "log_odds_ratio": -0.4884680509567261, "logits/chosen": -0.796331524848938, "logits/rejected": -0.9292788505554199, "logps/chosen": -0.9001736640930176, "logps/rejected": -1.8453844785690308, "loss": 1.0272, "nll_loss": 1.035989761352539, "rewards/accuracies": 0.75, "rewards/chosen": -0.09001736342906952, "rewards/margins": 0.09452107548713684, "rewards/rejected": -0.18453843891620636, "step": 3470 }, { "epoch": 2.117431752325759, "grad_norm": 2.5273592472076416, "learning_rate": 4.628536436007348e-06, "log_odds_chosen": 0.9419946670532227, "log_odds_ratio": -0.5137043595314026, "logits/chosen": -0.841253399848938, "logits/rejected": -0.8289709091186523, "logps/chosen": -0.6578493118286133, "logps/rejected": -1.3660565614700317, "loss": 0.9342, "nll_loss": 0.8060392141342163, "rewards/accuracies": 0.75, "rewards/chosen": -0.06578493118286133, "rewards/margins": 0.07082073390483856, "rewards/rejected": -0.1366056650876999, "step": 3471 }, { "epoch": 2.1180417874027757, "grad_norm": 1.6888355016708374, "learning_rate": 4.627556644213104e-06, "log_odds_chosen": 1.3390848636627197, "log_odds_ratio": -0.5259073972702026, "logits/chosen": -0.9297871589660645, "logits/rejected": -0.9984228014945984, "logps/chosen": -0.8526566028594971, "logps/rejected": -1.9119452238082886, "loss": 1.0126, "nll_loss": 1.0008660554885864, "rewards/accuracies": 0.75, "rewards/chosen": -0.08526566624641418, "rewards/margins": 0.10592885315418243, "rewards/rejected": -0.19119451940059662, "step": 3472 }, { "epoch": 2.1186518224797926, "grad_norm": 3.54485821723938, "learning_rate": 4.6265768524188605e-06, "log_odds_chosen": 3.5944411754608154, "log_odds_ratio": -0.3374304175376892, "logits/chosen": -0.6560598015785217, "logits/rejected": -0.9301100969314575, "logps/chosen": -0.6088846325874329, "logps/rejected": -3.465744972229004, "loss": 1.0127, "nll_loss": 0.8206021785736084, "rewards/accuracies": 0.875, "rewards/chosen": -0.06088846176862717, "rewards/margins": 0.28568607568740845, "rewards/rejected": -0.3465745151042938, "step": 3473 }, { "epoch": 2.1192618575568094, "grad_norm": 1.516389012336731, "learning_rate": 4.6255970606246175e-06, "log_odds_chosen": 2.0697951316833496, "log_odds_ratio": -0.2699970006942749, "logits/chosen": -0.6704572439193726, "logits/rejected": -0.6265944242477417, "logps/chosen": -0.7399497032165527, "logps/rejected": -2.230802536010742, "loss": 1.143, "nll_loss": 1.0130950212478638, "rewards/accuracies": 0.875, "rewards/chosen": -0.07399497181177139, "rewards/margins": 0.14908528327941895, "rewards/rejected": -0.22308024764060974, "step": 3474 }, { "epoch": 2.1198718926338262, "grad_norm": 1.2131381034851074, "learning_rate": 4.624617268830374e-06, "log_odds_chosen": 1.901688575744629, "log_odds_ratio": -0.3448683023452759, "logits/chosen": -0.944391131401062, "logits/rejected": -1.0177630186080933, "logps/chosen": -0.8097535967826843, "logps/rejected": -2.243508815765381, "loss": 1.0138, "nll_loss": 0.9213222861289978, "rewards/accuracies": 0.75, "rewards/chosen": -0.08097535371780396, "rewards/margins": 0.14337551593780518, "rewards/rejected": -0.22435086965560913, "step": 3475 }, { "epoch": 2.1204819277108435, "grad_norm": 1.9952768087387085, "learning_rate": 4.62363747703613e-06, "log_odds_chosen": 1.3262168169021606, "log_odds_ratio": -0.4227803945541382, "logits/chosen": -0.8089631795883179, "logits/rejected": -0.785707950592041, "logps/chosen": -0.7397491931915283, "logps/rejected": -1.6555356979370117, "loss": 1.1471, "nll_loss": 0.9219493269920349, "rewards/accuracies": 0.75, "rewards/chosen": -0.07397492229938507, "rewards/margins": 0.0915786623954773, "rewards/rejected": -0.16555358469486237, "step": 3476 }, { "epoch": 2.1210919627878604, "grad_norm": 1.9113959074020386, "learning_rate": 4.622657685241886e-06, "log_odds_chosen": 0.7136847972869873, "log_odds_ratio": -0.6187278628349304, "logits/chosen": -0.8503879308700562, "logits/rejected": -0.8177233934402466, "logps/chosen": -0.833283543586731, "logps/rejected": -1.2931612730026245, "loss": 1.3069, "nll_loss": 1.0750243663787842, "rewards/accuracies": 0.5, "rewards/chosen": -0.08332835137844086, "rewards/margins": 0.045987773686647415, "rewards/rejected": -0.12931612133979797, "step": 3477 }, { "epoch": 2.1217019978648772, "grad_norm": 1.419236660003662, "learning_rate": 4.621677893447642e-06, "log_odds_chosen": -0.1253965198993683, "log_odds_ratio": -0.7963637709617615, "logits/chosen": -1.0780984163284302, "logits/rejected": -0.9037010073661804, "logps/chosen": -1.0083986520767212, "logps/rejected": -0.9221310615539551, "loss": 1.1298, "nll_loss": 1.1034184694290161, "rewards/accuracies": 0.625, "rewards/chosen": -0.10083986818790436, "rewards/margins": -0.008626759052276611, "rewards/rejected": -0.09221310913562775, "step": 3478 }, { "epoch": 2.122312032941894, "grad_norm": 1.3433946371078491, "learning_rate": 4.620698101653398e-06, "log_odds_chosen": 1.166944980621338, "log_odds_ratio": -0.6466816663742065, "logits/chosen": -1.1662676334381104, "logits/rejected": -1.1239399909973145, "logps/chosen": -0.9592186212539673, "logps/rejected": -2.098825693130493, "loss": 1.0364, "nll_loss": 1.085984230041504, "rewards/accuracies": 0.5, "rewards/chosen": -0.09592185914516449, "rewards/margins": 0.11396069824695587, "rewards/rejected": -0.20988255739212036, "step": 3479 }, { "epoch": 2.122922068018911, "grad_norm": 2.308880090713501, "learning_rate": 4.619718309859155e-06, "log_odds_chosen": -0.07756815105676651, "log_odds_ratio": -0.8288689851760864, "logits/chosen": -0.8497066497802734, "logits/rejected": -0.8118457794189453, "logps/chosen": -1.0884590148925781, "logps/rejected": -1.0818694829940796, "loss": 0.9337, "nll_loss": 1.1155810356140137, "rewards/accuracies": 0.25, "rewards/chosen": -0.10884591192007065, "rewards/margins": -0.0006589507684111595, "rewards/rejected": -0.10818696022033691, "step": 3480 }, { "epoch": 2.123532103095928, "grad_norm": 1.762345552444458, "learning_rate": 4.618738518064911e-06, "log_odds_chosen": 1.0693035125732422, "log_odds_ratio": -0.4355458915233612, "logits/chosen": -0.6821069717407227, "logits/rejected": -0.6337220072746277, "logps/chosen": -0.6401296257972717, "logps/rejected": -1.2647520303726196, "loss": 1.0026, "nll_loss": 0.8667131662368774, "rewards/accuracies": 0.75, "rewards/chosen": -0.06401295959949493, "rewards/margins": 0.06246224045753479, "rewards/rejected": -0.12647520005702972, "step": 3481 }, { "epoch": 2.124142138172945, "grad_norm": 1.0960516929626465, "learning_rate": 4.617758726270667e-06, "log_odds_chosen": 3.288168430328369, "log_odds_ratio": -0.13853497803211212, "logits/chosen": -0.766974151134491, "logits/rejected": -0.9797167778015137, "logps/chosen": -0.612662672996521, "logps/rejected": -3.0716166496276855, "loss": 0.9835, "nll_loss": 0.7576842308044434, "rewards/accuracies": 1.0, "rewards/chosen": -0.06126626580953598, "rewards/margins": 0.2458954155445099, "rewards/rejected": -0.3071616590023041, "step": 3482 }, { "epoch": 2.124752173249962, "grad_norm": 2.913738250732422, "learning_rate": 4.6167789344764234e-06, "log_odds_chosen": 1.9861737489700317, "log_odds_ratio": -0.2861028015613556, "logits/chosen": -0.8786911368370056, "logits/rejected": -1.0248948335647583, "logps/chosen": -0.7050269246101379, "logps/rejected": -2.1517372131347656, "loss": 1.08, "nll_loss": 0.9878689646720886, "rewards/accuracies": 0.875, "rewards/chosen": -0.07050269097089767, "rewards/margins": 0.14467103779315948, "rewards/rejected": -0.21517372131347656, "step": 3483 }, { "epoch": 2.1253622083269788, "grad_norm": 1.9748375415802002, "learning_rate": 4.6157991426821796e-06, "log_odds_chosen": 5.051239967346191, "log_odds_ratio": -0.10002100467681885, "logits/chosen": -0.8265080451965332, "logits/rejected": -0.9437551498413086, "logps/chosen": -0.5808967351913452, "logps/rejected": -4.7913103103637695, "loss": 0.9657, "nll_loss": 0.8553664088249207, "rewards/accuracies": 1.0, "rewards/chosen": -0.05808967351913452, "rewards/margins": 0.4210413098335266, "rewards/rejected": -0.4791310131549835, "step": 3484 }, { "epoch": 2.1259722434039956, "grad_norm": 1.9645415544509888, "learning_rate": 4.614819350887936e-06, "log_odds_chosen": 2.610640287399292, "log_odds_ratio": -0.24973051249980927, "logits/chosen": -0.6741077303886414, "logits/rejected": -0.8695501089096069, "logps/chosen": -0.5549731254577637, "logps/rejected": -2.5582473278045654, "loss": 1.1499, "nll_loss": 1.0096518993377686, "rewards/accuracies": 1.0, "rewards/chosen": -0.05549730733036995, "rewards/margins": 0.20032739639282227, "rewards/rejected": -0.2558247148990631, "step": 3485 }, { "epoch": 2.1265822784810124, "grad_norm": 2.925459861755371, "learning_rate": 4.613839559093693e-06, "log_odds_chosen": 0.7035825848579407, "log_odds_ratio": -0.5008127689361572, "logits/chosen": -1.0119739770889282, "logits/rejected": -0.9154252409934998, "logps/chosen": -0.7147429585456848, "logps/rejected": -1.1638715267181396, "loss": 0.9204, "nll_loss": 0.9000409245491028, "rewards/accuracies": 0.75, "rewards/chosen": -0.07147429883480072, "rewards/margins": 0.04491285979747772, "rewards/rejected": -0.11638715863227844, "step": 3486 }, { "epoch": 2.1271923135580297, "grad_norm": 1.8548672199249268, "learning_rate": 4.612859767299449e-06, "log_odds_chosen": 1.4932146072387695, "log_odds_ratio": -0.32054904103279114, "logits/chosen": -0.8757796287536621, "logits/rejected": -0.9362252950668335, "logps/chosen": -0.8206523060798645, "logps/rejected": -1.8755464553833008, "loss": 1.1368, "nll_loss": 1.247817039489746, "rewards/accuracies": 0.875, "rewards/chosen": -0.08206523954868317, "rewards/margins": 0.10548941791057587, "rewards/rejected": -0.18755465745925903, "step": 3487 }, { "epoch": 2.1278023486350466, "grad_norm": 3.036442995071411, "learning_rate": 4.611879975505206e-06, "log_odds_chosen": 1.3468681573867798, "log_odds_ratio": -0.4592304825782776, "logits/chosen": -0.9472706913948059, "logits/rejected": -1.0661437511444092, "logps/chosen": -0.9376252889633179, "logps/rejected": -1.8853367567062378, "loss": 1.0482, "nll_loss": 1.1278502941131592, "rewards/accuracies": 0.75, "rewards/chosen": -0.09376253187656403, "rewards/margins": 0.09477114677429199, "rewards/rejected": -0.18853366374969482, "step": 3488 }, { "epoch": 2.1284123837120634, "grad_norm": 1.6983087062835693, "learning_rate": 4.610900183710961e-06, "log_odds_chosen": 1.5304038524627686, "log_odds_ratio": -0.4000445604324341, "logits/chosen": -0.8241803050041199, "logits/rejected": -0.7344163656234741, "logps/chosen": -0.7213878631591797, "logps/rejected": -1.7712607383728027, "loss": 1.0816, "nll_loss": 0.934829831123352, "rewards/accuracies": 0.625, "rewards/chosen": -0.07213878631591797, "rewards/margins": 0.10498729348182678, "rewards/rejected": -0.17712607979774475, "step": 3489 }, { "epoch": 2.1290224187890803, "grad_norm": 5.741508483886719, "learning_rate": 4.609920391916717e-06, "log_odds_chosen": 2.7263057231903076, "log_odds_ratio": -0.229047954082489, "logits/chosen": -0.765278697013855, "logits/rejected": -0.9112639427185059, "logps/chosen": -0.7295370101928711, "logps/rejected": -2.8444273471832275, "loss": 0.9532, "nll_loss": 0.9039618968963623, "rewards/accuracies": 1.0, "rewards/chosen": -0.07295370101928711, "rewards/margins": 0.2114890068769455, "rewards/rejected": -0.2844427227973938, "step": 3490 }, { "epoch": 2.129632453866097, "grad_norm": 5.674871921539307, "learning_rate": 4.608940600122474e-06, "log_odds_chosen": 3.684034585952759, "log_odds_ratio": -0.2764902412891388, "logits/chosen": -0.7654911279678345, "logits/rejected": -0.7856529951095581, "logps/chosen": -0.7204781174659729, "logps/rejected": -3.8587045669555664, "loss": 0.9488, "nll_loss": 0.9543349742889404, "rewards/accuracies": 0.875, "rewards/chosen": -0.07204780727624893, "rewards/margins": 0.3138226866722107, "rewards/rejected": -0.38587045669555664, "step": 3491 }, { "epoch": 2.1302424889431144, "grad_norm": 4.581456661224365, "learning_rate": 4.60796080832823e-06, "log_odds_chosen": 1.3325002193450928, "log_odds_ratio": -0.32937541604042053, "logits/chosen": -0.8077207207679749, "logits/rejected": -0.833543062210083, "logps/chosen": -0.545384407043457, "logps/rejected": -1.0305776596069336, "loss": 0.9716, "nll_loss": 1.0450830459594727, "rewards/accuracies": 0.875, "rewards/chosen": -0.054538436233997345, "rewards/margins": 0.04851933568716049, "rewards/rejected": -0.10305777192115784, "step": 3492 }, { "epoch": 2.1308525240201313, "grad_norm": 2.3660888671875, "learning_rate": 4.606981016533986e-06, "log_odds_chosen": 1.3942515850067139, "log_odds_ratio": -0.41677144169807434, "logits/chosen": -0.831719160079956, "logits/rejected": -0.8547002673149109, "logps/chosen": -0.7430360913276672, "logps/rejected": -1.7553563117980957, "loss": 1.058, "nll_loss": 0.9292411804199219, "rewards/accuracies": 0.875, "rewards/chosen": -0.07430360466241837, "rewards/margins": 0.10123202949762344, "rewards/rejected": -0.175535649061203, "step": 3493 }, { "epoch": 2.131462559097148, "grad_norm": 4.188648223876953, "learning_rate": 4.6060012247397425e-06, "log_odds_chosen": 0.7031679153442383, "log_odds_ratio": -0.6631630063056946, "logits/chosen": -0.8545958399772644, "logits/rejected": -0.7555155754089355, "logps/chosen": -1.0758254528045654, "logps/rejected": -1.701664924621582, "loss": 0.9688, "nll_loss": 1.1174170970916748, "rewards/accuracies": 0.625, "rewards/chosen": -0.10758255422115326, "rewards/margins": 0.06258393824100494, "rewards/rejected": -0.1701664924621582, "step": 3494 }, { "epoch": 2.132072594174165, "grad_norm": 10.811227798461914, "learning_rate": 4.605021432945499e-06, "log_odds_chosen": 1.7999944686889648, "log_odds_ratio": -0.40861600637435913, "logits/chosen": -0.6192060708999634, "logits/rejected": -0.710408091545105, "logps/chosen": -0.5038986802101135, "logps/rejected": -1.4879355430603027, "loss": 0.9714, "nll_loss": 0.9672909379005432, "rewards/accuracies": 0.625, "rewards/chosen": -0.05038987472653389, "rewards/margins": 0.098403699696064, "rewards/rejected": -0.148793563246727, "step": 3495 }, { "epoch": 2.132682629251182, "grad_norm": 1.488586664199829, "learning_rate": 4.604041641151255e-06, "log_odds_chosen": 1.8912379741668701, "log_odds_ratio": -0.3055988550186157, "logits/chosen": -0.8353520035743713, "logits/rejected": -0.7357980012893677, "logps/chosen": -0.8721171021461487, "logps/rejected": -2.4090113639831543, "loss": 0.9752, "nll_loss": 1.1007702350616455, "rewards/accuracies": 0.875, "rewards/chosen": -0.0872117131948471, "rewards/margins": 0.1536894291639328, "rewards/rejected": -0.2409011274576187, "step": 3496 }, { "epoch": 2.1332926643281986, "grad_norm": 1.4526978731155396, "learning_rate": 4.603061849357012e-06, "log_odds_chosen": 0.5077281594276428, "log_odds_ratio": -0.6739729642868042, "logits/chosen": -0.8902709484100342, "logits/rejected": -0.8958572149276733, "logps/chosen": -0.904820442199707, "logps/rejected": -1.2619105577468872, "loss": 1.1392, "nll_loss": 1.0533841848373413, "rewards/accuracies": 0.625, "rewards/chosen": -0.09048204869031906, "rewards/margins": 0.035709016025066376, "rewards/rejected": -0.12619106471538544, "step": 3497 }, { "epoch": 2.133902699405216, "grad_norm": 1.4072051048278809, "learning_rate": 4.602082057562768e-06, "log_odds_chosen": 1.346817135810852, "log_odds_ratio": -0.5003939867019653, "logits/chosen": -0.6137968301773071, "logits/rejected": -0.79316645860672, "logps/chosen": -0.7260687947273254, "logps/rejected": -1.675229787826538, "loss": 1.18, "nll_loss": 0.8324342966079712, "rewards/accuracies": 0.75, "rewards/chosen": -0.0726068764925003, "rewards/margins": 0.09491609036922455, "rewards/rejected": -0.16752296686172485, "step": 3498 }, { "epoch": 2.1345127344822328, "grad_norm": 1.3128143548965454, "learning_rate": 4.601102265768524e-06, "log_odds_chosen": 0.649333119392395, "log_odds_ratio": -0.705940842628479, "logits/chosen": -0.8855282068252563, "logits/rejected": -0.8463894128799438, "logps/chosen": -0.8370097875595093, "logps/rejected": -1.4105645418167114, "loss": 1.1205, "nll_loss": 1.0287394523620605, "rewards/accuracies": 0.5, "rewards/chosen": -0.08370097726583481, "rewards/margins": 0.05735547095537186, "rewards/rejected": -0.14105644822120667, "step": 3499 }, { "epoch": 2.1351227695592496, "grad_norm": 2.3803508281707764, "learning_rate": 4.60012247397428e-06, "log_odds_chosen": 2.0380871295928955, "log_odds_ratio": -0.46839386224746704, "logits/chosen": -0.728166937828064, "logits/rejected": -0.7380020022392273, "logps/chosen": -0.7569708824157715, "logps/rejected": -2.3340840339660645, "loss": 1.1651, "nll_loss": 1.035120964050293, "rewards/accuracies": 0.75, "rewards/chosen": -0.07569709420204163, "rewards/margins": 0.15771132707595825, "rewards/rejected": -0.23340842127799988, "step": 3500 }, { "epoch": 2.1357328046362665, "grad_norm": 1.2528076171875, "learning_rate": 4.599142682180036e-06, "log_odds_chosen": 0.6854296922683716, "log_odds_ratio": -0.510977029800415, "logits/chosen": -0.8610137104988098, "logits/rejected": -0.7549944519996643, "logps/chosen": -0.7492125630378723, "logps/rejected": -1.2385141849517822, "loss": 1.068, "nll_loss": 1.2735940217971802, "rewards/accuracies": 0.75, "rewards/chosen": -0.07492125034332275, "rewards/margins": 0.04893018305301666, "rewards/rejected": -0.12385143339633942, "step": 3501 }, { "epoch": 2.1363428397132833, "grad_norm": 1.4879698753356934, "learning_rate": 4.598162890385793e-06, "log_odds_chosen": 1.0141719579696655, "log_odds_ratio": -0.510560154914856, "logits/chosen": -0.906570315361023, "logits/rejected": -0.9191558361053467, "logps/chosen": -0.9057604670524597, "logps/rejected": -1.6616564989089966, "loss": 1.1012, "nll_loss": 1.134972095489502, "rewards/accuracies": 0.75, "rewards/chosen": -0.09057605266571045, "rewards/margins": 0.07558959722518921, "rewards/rejected": -0.16616564989089966, "step": 3502 }, { "epoch": 2.1369528747903006, "grad_norm": 1.4176011085510254, "learning_rate": 4.597183098591549e-06, "log_odds_chosen": 2.0433621406555176, "log_odds_ratio": -0.22269171476364136, "logits/chosen": -0.9088412523269653, "logits/rejected": -0.9291037321090698, "logps/chosen": -0.647533118724823, "logps/rejected": -2.136803150177002, "loss": 1.1079, "nll_loss": 0.931524395942688, "rewards/accuracies": 1.0, "rewards/chosen": -0.06475330889225006, "rewards/margins": 0.1489270031452179, "rewards/rejected": -0.21368031203746796, "step": 3503 }, { "epoch": 2.1375629098673175, "grad_norm": 1.3139665126800537, "learning_rate": 4.596203306797305e-06, "log_odds_chosen": 1.9876797199249268, "log_odds_ratio": -0.6778281331062317, "logits/chosen": -0.9306566715240479, "logits/rejected": -0.89950031042099, "logps/chosen": -0.9632483720779419, "logps/rejected": -2.8330159187316895, "loss": 1.1247, "nll_loss": 1.065902590751648, "rewards/accuracies": 0.625, "rewards/chosen": -0.09632483869791031, "rewards/margins": 0.18697676062583923, "rewards/rejected": -0.28330159187316895, "step": 3504 }, { "epoch": 2.1381729449443343, "grad_norm": 3.0493688583374023, "learning_rate": 4.595223515003062e-06, "log_odds_chosen": 2.172644853591919, "log_odds_ratio": -0.4439929723739624, "logits/chosen": -0.8791118860244751, "logits/rejected": -0.766995370388031, "logps/chosen": -0.8787919282913208, "logps/rejected": -2.6886026859283447, "loss": 1.0911, "nll_loss": 1.0585963726043701, "rewards/accuracies": 0.625, "rewards/chosen": -0.08787919580936432, "rewards/margins": 0.18098106980323792, "rewards/rejected": -0.26886025071144104, "step": 3505 }, { "epoch": 2.138782980021351, "grad_norm": 1.4406371116638184, "learning_rate": 4.594243723208818e-06, "log_odds_chosen": 1.8828994035720825, "log_odds_ratio": -0.37749606370925903, "logits/chosen": -0.8565412759780884, "logits/rejected": -0.9708782434463501, "logps/chosen": -1.0926792621612549, "logps/rejected": -2.6300129890441895, "loss": 1.0104, "nll_loss": 1.080291509628296, "rewards/accuracies": 0.75, "rewards/chosen": -0.10926792025566101, "rewards/margins": 0.15373340249061584, "rewards/rejected": -0.26300132274627686, "step": 3506 }, { "epoch": 2.139393015098368, "grad_norm": 1.3978939056396484, "learning_rate": 4.593263931414574e-06, "log_odds_chosen": 3.1059765815734863, "log_odds_ratio": -0.3357903063297272, "logits/chosen": -0.6651207208633423, "logits/rejected": -0.8049830198287964, "logps/chosen": -0.553101658821106, "logps/rejected": -3.110114097595215, "loss": 1.1924, "nll_loss": 1.0935580730438232, "rewards/accuracies": 0.75, "rewards/chosen": -0.055310167372226715, "rewards/margins": 0.2557012438774109, "rewards/rejected": -0.311011403799057, "step": 3507 }, { "epoch": 2.1400030501753853, "grad_norm": 1.1672453880310059, "learning_rate": 4.592284139620331e-06, "log_odds_chosen": 2.542039394378662, "log_odds_ratio": -0.20088964700698853, "logits/chosen": -0.8373396992683411, "logits/rejected": -0.8990054726600647, "logps/chosen": -0.6464366912841797, "logps/rejected": -2.5519461631774902, "loss": 0.8871, "nll_loss": 0.743105947971344, "rewards/accuracies": 1.0, "rewards/chosen": -0.06464367359876633, "rewards/margins": 0.19055093824863434, "rewards/rejected": -0.25519460439682007, "step": 3508 }, { "epoch": 2.140613085252402, "grad_norm": 1.0157456398010254, "learning_rate": 4.591304347826087e-06, "log_odds_chosen": 1.3902947902679443, "log_odds_ratio": -0.5060617923736572, "logits/chosen": -0.8618115782737732, "logits/rejected": -0.8070640563964844, "logps/chosen": -0.7664268612861633, "logps/rejected": -1.8210581541061401, "loss": 0.9794, "nll_loss": 1.0988208055496216, "rewards/accuracies": 0.625, "rewards/chosen": -0.07664269208908081, "rewards/margins": 0.10546313226222992, "rewards/rejected": -0.18210580945014954, "step": 3509 }, { "epoch": 2.141223120329419, "grad_norm": 3.021850109100342, "learning_rate": 4.590324556031843e-06, "log_odds_chosen": 2.984531879425049, "log_odds_ratio": -0.4895516037940979, "logits/chosen": -0.8243690729141235, "logits/rejected": -0.8861367702484131, "logps/chosen": -0.9312424063682556, "logps/rejected": -3.5670251846313477, "loss": 1.0988, "nll_loss": 1.1752018928527832, "rewards/accuracies": 0.625, "rewards/chosen": -0.09312425553798676, "rewards/margins": 0.26357829570770264, "rewards/rejected": -0.3567025661468506, "step": 3510 }, { "epoch": 2.141833155406436, "grad_norm": 0.92038494348526, "learning_rate": 4.589344764237599e-06, "log_odds_chosen": 2.4693422317504883, "log_odds_ratio": -0.2734067440032959, "logits/chosen": -0.8603216409683228, "logits/rejected": -0.8262818455696106, "logps/chosen": -0.7251901626586914, "logps/rejected": -2.7061944007873535, "loss": 0.9814, "nll_loss": 0.9925925731658936, "rewards/accuracies": 1.0, "rewards/chosen": -0.07251901179552078, "rewards/margins": 0.19810043275356293, "rewards/rejected": -0.2706194519996643, "step": 3511 }, { "epoch": 2.1424431904834527, "grad_norm": 1.12490713596344, "learning_rate": 4.588364972443355e-06, "log_odds_chosen": 2.80483078956604, "log_odds_ratio": -0.2906314730644226, "logits/chosen": -0.8057085871696472, "logits/rejected": -0.8238980174064636, "logps/chosen": -0.6673732399940491, "logps/rejected": -2.482145071029663, "loss": 0.8044, "nll_loss": 0.8136192560195923, "rewards/accuracies": 0.75, "rewards/chosen": -0.06673732399940491, "rewards/margins": 0.1814771592617035, "rewards/rejected": -0.2482144981622696, "step": 3512 }, { "epoch": 2.14305322556047, "grad_norm": 1.8976248502731323, "learning_rate": 4.587385180649111e-06, "log_odds_chosen": 2.3923654556274414, "log_odds_ratio": -0.5558648705482483, "logits/chosen": -0.9267922639846802, "logits/rejected": -0.9709519743919373, "logps/chosen": -0.824485719203949, "logps/rejected": -2.919443130493164, "loss": 1.1838, "nll_loss": 1.0158498287200928, "rewards/accuracies": 0.5, "rewards/chosen": -0.0824485719203949, "rewards/margins": 0.20949572324752808, "rewards/rejected": -0.291944295167923, "step": 3513 }, { "epoch": 2.143663260637487, "grad_norm": 5.217020034790039, "learning_rate": 4.586405388854868e-06, "log_odds_chosen": 2.0412471294403076, "log_odds_ratio": -0.24891796708106995, "logits/chosen": -0.8143822550773621, "logits/rejected": -0.7304022312164307, "logps/chosen": -0.6998772621154785, "logps/rejected": -2.1405274868011475, "loss": 0.952, "nll_loss": 0.9077810049057007, "rewards/accuracies": 0.875, "rewards/chosen": -0.06998772919178009, "rewards/margins": 0.1440650224685669, "rewards/rejected": -0.2140527367591858, "step": 3514 }, { "epoch": 2.1442732957145036, "grad_norm": 1.7347537279129028, "learning_rate": 4.5854255970606244e-06, "log_odds_chosen": 1.8275766372680664, "log_odds_ratio": -0.5046308636665344, "logits/chosen": -1.0198071002960205, "logits/rejected": -0.7676573991775513, "logps/chosen": -0.8775067329406738, "logps/rejected": -2.1945064067840576, "loss": 1.025, "nll_loss": 1.178114414215088, "rewards/accuracies": 0.625, "rewards/chosen": -0.08775067329406738, "rewards/margins": 0.13169996440410614, "rewards/rejected": -0.21945063769817352, "step": 3515 }, { "epoch": 2.1448833307915205, "grad_norm": 2.684458017349243, "learning_rate": 4.584445805266381e-06, "log_odds_chosen": 1.1489970684051514, "log_odds_ratio": -0.5918782949447632, "logits/chosen": -1.0159111022949219, "logits/rejected": -0.9761629104614258, "logps/chosen": -1.0687323808670044, "logps/rejected": -2.0534369945526123, "loss": 1.4051, "nll_loss": 1.6318492889404297, "rewards/accuracies": 0.625, "rewards/chosen": -0.10687323659658432, "rewards/margins": 0.09847046434879303, "rewards/rejected": -0.20534370839595795, "step": 3516 }, { "epoch": 2.1454933658685373, "grad_norm": 3.063627004623413, "learning_rate": 4.583466013472137e-06, "log_odds_chosen": 1.0865681171417236, "log_odds_ratio": -0.4312746822834015, "logits/chosen": -0.6864496469497681, "logits/rejected": -0.6910355091094971, "logps/chosen": -0.9075138568878174, "logps/rejected": -1.6440865993499756, "loss": 1.0222, "nll_loss": 0.938197135925293, "rewards/accuracies": 0.75, "rewards/chosen": -0.09075137972831726, "rewards/margins": 0.07365728914737701, "rewards/rejected": -0.16440865397453308, "step": 3517 }, { "epoch": 2.146103400945554, "grad_norm": 2.4147181510925293, "learning_rate": 4.582486221677893e-06, "log_odds_chosen": 2.56562876701355, "log_odds_ratio": -0.29492953419685364, "logits/chosen": -0.6181319952011108, "logits/rejected": -0.6820223331451416, "logps/chosen": -0.5908501744270325, "logps/rejected": -2.5268781185150146, "loss": 1.1195, "nll_loss": 0.8871641159057617, "rewards/accuracies": 0.75, "rewards/chosen": -0.059085018932819366, "rewards/margins": 0.1936028003692627, "rewards/rejected": -0.25268781185150146, "step": 3518 }, { "epoch": 2.1467134360225715, "grad_norm": 3.319223403930664, "learning_rate": 4.58150642988365e-06, "log_odds_chosen": 1.3785488605499268, "log_odds_ratio": -0.4283841848373413, "logits/chosen": -0.9868256449699402, "logits/rejected": -0.8400087356567383, "logps/chosen": -0.9560381770133972, "logps/rejected": -1.9318225383758545, "loss": 1.0919, "nll_loss": 1.2586195468902588, "rewards/accuracies": 0.75, "rewards/chosen": -0.0956038162112236, "rewards/margins": 0.09757844358682632, "rewards/rejected": -0.19318225979804993, "step": 3519 }, { "epoch": 2.1473234710995883, "grad_norm": 8.300591468811035, "learning_rate": 4.580526638089406e-06, "log_odds_chosen": 1.5805656909942627, "log_odds_ratio": -0.4441927373409271, "logits/chosen": -0.7757086157798767, "logits/rejected": -0.7045128345489502, "logps/chosen": -0.700340986251831, "logps/rejected": -1.9129738807678223, "loss": 0.9698, "nll_loss": 1.0525012016296387, "rewards/accuracies": 0.75, "rewards/chosen": -0.07003410160541534, "rewards/margins": 0.1212632954120636, "rewards/rejected": -0.19129739701747894, "step": 3520 }, { "epoch": 2.147933506176605, "grad_norm": 1.6175674200057983, "learning_rate": 4.579546846295162e-06, "log_odds_chosen": 0.9292752146720886, "log_odds_ratio": -0.5392324328422546, "logits/chosen": -0.8971130847930908, "logits/rejected": -0.8914729952812195, "logps/chosen": -0.8764070272445679, "logps/rejected": -1.5259943008422852, "loss": 1.0311, "nll_loss": 1.0107965469360352, "rewards/accuracies": 0.625, "rewards/chosen": -0.08764071017503738, "rewards/margins": 0.06495871394872665, "rewards/rejected": -0.15259942412376404, "step": 3521 }, { "epoch": 2.148543541253622, "grad_norm": 1.9462463855743408, "learning_rate": 4.578567054500919e-06, "log_odds_chosen": 1.7045886516571045, "log_odds_ratio": -0.30118706822395325, "logits/chosen": -0.9869673848152161, "logits/rejected": -0.9615561962127686, "logps/chosen": -0.9751867651939392, "logps/rejected": -2.279447555541992, "loss": 1.1466, "nll_loss": 1.320174217224121, "rewards/accuracies": 0.875, "rewards/chosen": -0.0975186824798584, "rewards/margins": 0.1304260790348053, "rewards/rejected": -0.2279447466135025, "step": 3522 }, { "epoch": 2.149153576330639, "grad_norm": 1.8402345180511475, "learning_rate": 4.577587262706674e-06, "log_odds_chosen": 1.5276439189910889, "log_odds_ratio": -0.2876100540161133, "logits/chosen": -0.7995697259902954, "logits/rejected": -0.8164690136909485, "logps/chosen": -0.6665523052215576, "logps/rejected": -1.7295113801956177, "loss": 1.0622, "nll_loss": 0.7606010437011719, "rewards/accuracies": 1.0, "rewards/chosen": -0.066655233502388, "rewards/margins": 0.10629589855670929, "rewards/rejected": -0.1729511320590973, "step": 3523 }, { "epoch": 2.149763611407656, "grad_norm": 2.1517176628112793, "learning_rate": 4.57660747091243e-06, "log_odds_chosen": 1.2724859714508057, "log_odds_ratio": -0.3406447768211365, "logits/chosen": -0.6223829388618469, "logits/rejected": -0.7968330383300781, "logps/chosen": -0.7304019927978516, "logps/rejected": -1.5942927598953247, "loss": 1.0774, "nll_loss": 0.9584707021713257, "rewards/accuracies": 0.875, "rewards/chosen": -0.0730402022600174, "rewards/margins": 0.08638907968997955, "rewards/rejected": -0.15942928194999695, "step": 3524 }, { "epoch": 2.150373646484673, "grad_norm": 1.263150930404663, "learning_rate": 4.575627679118187e-06, "log_odds_chosen": 1.6440037488937378, "log_odds_ratio": -0.3712606728076935, "logits/chosen": -0.6584304571151733, "logits/rejected": -0.7998344302177429, "logps/chosen": -0.7497207522392273, "logps/rejected": -1.9549123048782349, "loss": 1.0944, "nll_loss": 0.9885454773902893, "rewards/accuracies": 0.625, "rewards/chosen": -0.07497207075357437, "rewards/margins": 0.12051916122436523, "rewards/rejected": -0.195491224527359, "step": 3525 }, { "epoch": 2.15098368156169, "grad_norm": 6.398556709289551, "learning_rate": 4.5746478873239435e-06, "log_odds_chosen": 1.043879508972168, "log_odds_ratio": -0.5362920761108398, "logits/chosen": -0.7700615525245667, "logits/rejected": -0.8378276228904724, "logps/chosen": -0.751507043838501, "logps/rejected": -1.4083411693572998, "loss": 1.1073, "nll_loss": 0.9506282806396484, "rewards/accuracies": 0.625, "rewards/chosen": -0.07515071332454681, "rewards/margins": 0.06568340957164764, "rewards/rejected": -0.14083412289619446, "step": 3526 }, { "epoch": 2.1515937166387067, "grad_norm": 2.458069324493408, "learning_rate": 4.5736680955297e-06, "log_odds_chosen": 1.6614755392074585, "log_odds_ratio": -0.3779751658439636, "logits/chosen": -0.834068775177002, "logits/rejected": -0.8316590785980225, "logps/chosen": -0.7582569122314453, "logps/rejected": -1.7798432111740112, "loss": 1.0789, "nll_loss": 1.0896220207214355, "rewards/accuracies": 0.875, "rewards/chosen": -0.07582569122314453, "rewards/margins": 0.10215861350297928, "rewards/rejected": -0.1779842972755432, "step": 3527 }, { "epoch": 2.1522037517157235, "grad_norm": 2.206874132156372, "learning_rate": 4.5726883037354566e-06, "log_odds_chosen": 0.9117926955223083, "log_odds_ratio": -0.4451037049293518, "logits/chosen": -0.9407510161399841, "logits/rejected": -0.9375834465026855, "logps/chosen": -0.8826543688774109, "logps/rejected": -1.5680108070373535, "loss": 1.1749, "nll_loss": 1.1901216506958008, "rewards/accuracies": 0.75, "rewards/chosen": -0.08826543390750885, "rewards/margins": 0.06853563338518143, "rewards/rejected": -0.15680107474327087, "step": 3528 }, { "epoch": 2.1528137867927404, "grad_norm": 3.4214253425598145, "learning_rate": 4.571708511941212e-06, "log_odds_chosen": 1.7305972576141357, "log_odds_ratio": -0.35501524806022644, "logits/chosen": -0.7414453029632568, "logits/rejected": -0.6958703398704529, "logps/chosen": -0.7381864190101624, "logps/rejected": -1.8618102073669434, "loss": 0.8941, "nll_loss": 0.7627942562103271, "rewards/accuracies": 0.75, "rewards/chosen": -0.0738186463713646, "rewards/margins": 0.11236236989498138, "rewards/rejected": -0.18618100881576538, "step": 3529 }, { "epoch": 2.1534238218697577, "grad_norm": 8.169677734375, "learning_rate": 4.570728720146969e-06, "log_odds_chosen": 0.888323187828064, "log_odds_ratio": -0.5169301629066467, "logits/chosen": -0.8289172649383545, "logits/rejected": -0.8928090333938599, "logps/chosen": -0.8701575994491577, "logps/rejected": -1.4556888341903687, "loss": 1.1305, "nll_loss": 1.0934250354766846, "rewards/accuracies": 0.75, "rewards/chosen": -0.08701576292514801, "rewards/margins": 0.058553118258714676, "rewards/rejected": -0.1455688774585724, "step": 3530 }, { "epoch": 2.1540338569467745, "grad_norm": 2.5015907287597656, "learning_rate": 4.569748928352725e-06, "log_odds_chosen": 1.72701096534729, "log_odds_ratio": -0.5021153688430786, "logits/chosen": -0.8521434664726257, "logits/rejected": -0.9564333558082581, "logps/chosen": -0.9325957894325256, "logps/rejected": -2.3171374797821045, "loss": 1.0923, "nll_loss": 1.1164385080337524, "rewards/accuracies": 0.75, "rewards/chosen": -0.09325958043336868, "rewards/margins": 0.13845418393611908, "rewards/rejected": -0.23171375691890717, "step": 3531 }, { "epoch": 2.1546438920237914, "grad_norm": 2.2142906188964844, "learning_rate": 4.568769136558481e-06, "log_odds_chosen": 0.8357073664665222, "log_odds_ratio": -0.7061159014701843, "logits/chosen": -0.9635995626449585, "logits/rejected": -0.8903313875198364, "logps/chosen": -0.9869629740715027, "logps/rejected": -1.6699000597000122, "loss": 1.1898, "nll_loss": 1.1981053352355957, "rewards/accuracies": 0.5, "rewards/chosen": -0.09869629889726639, "rewards/margins": 0.06829370558261871, "rewards/rejected": -0.1669900119304657, "step": 3532 }, { "epoch": 2.155253927100808, "grad_norm": 1.2444604635238647, "learning_rate": 4.567789344764238e-06, "log_odds_chosen": 2.0697031021118164, "log_odds_ratio": -0.31288599967956543, "logits/chosen": -0.6871200799942017, "logits/rejected": -0.6758948564529419, "logps/chosen": -0.653121829032898, "logps/rejected": -2.1803622245788574, "loss": 0.9968, "nll_loss": 0.8424069881439209, "rewards/accuracies": 0.75, "rewards/chosen": -0.06531218439340591, "rewards/margins": 0.15272404253482819, "rewards/rejected": -0.2180362343788147, "step": 3533 }, { "epoch": 2.155863962177825, "grad_norm": 1.4226925373077393, "learning_rate": 4.566809552969993e-06, "log_odds_chosen": 1.5428686141967773, "log_odds_ratio": -0.3233960270881653, "logits/chosen": -0.6236007809638977, "logits/rejected": -0.782355010509491, "logps/chosen": -0.7143815755844116, "logps/rejected": -1.7906067371368408, "loss": 0.984, "nll_loss": 0.8868401646614075, "rewards/accuracies": 0.875, "rewards/chosen": -0.07143815606832504, "rewards/margins": 0.10762251913547516, "rewards/rejected": -0.1790606826543808, "step": 3534 }, { "epoch": 2.1564739972548423, "grad_norm": 1.3505401611328125, "learning_rate": 4.5658297611757494e-06, "log_odds_chosen": 1.1075936555862427, "log_odds_ratio": -0.39024585485458374, "logits/chosen": -0.7663408517837524, "logits/rejected": -0.8418033123016357, "logps/chosen": -0.5611000061035156, "logps/rejected": -1.207915186882019, "loss": 1.1969, "nll_loss": 1.1780184507369995, "rewards/accuracies": 0.875, "rewards/chosen": -0.05610999837517738, "rewards/margins": 0.0646815225481987, "rewards/rejected": -0.12079152464866638, "step": 3535 }, { "epoch": 2.157084032331859, "grad_norm": 1.731648564338684, "learning_rate": 4.564849969381506e-06, "log_odds_chosen": 0.5821787118911743, "log_odds_ratio": -0.6520670652389526, "logits/chosen": -1.0679136514663696, "logits/rejected": -0.9608190059661865, "logps/chosen": -0.9877040982246399, "logps/rejected": -1.4685510396957397, "loss": 1.2056, "nll_loss": 1.1276768445968628, "rewards/accuracies": 0.625, "rewards/chosen": -0.09877040982246399, "rewards/margins": 0.04808470606803894, "rewards/rejected": -0.14685511589050293, "step": 3536 }, { "epoch": 2.157694067408876, "grad_norm": 2.649017572402954, "learning_rate": 4.5638701775872625e-06, "log_odds_chosen": 1.5936310291290283, "log_odds_ratio": -0.5130800008773804, "logits/chosen": -0.6392936110496521, "logits/rejected": -0.5008270144462585, "logps/chosen": -0.5264378190040588, "logps/rejected": -1.639039158821106, "loss": 0.9204, "nll_loss": 0.8356564044952393, "rewards/accuracies": 0.625, "rewards/chosen": -0.052643783390522, "rewards/margins": 0.11126013100147247, "rewards/rejected": -0.16390392184257507, "step": 3537 }, { "epoch": 2.158304102485893, "grad_norm": 3.0688343048095703, "learning_rate": 4.562890385793019e-06, "log_odds_chosen": 0.9540299773216248, "log_odds_ratio": -0.7113113403320312, "logits/chosen": -0.9667414426803589, "logits/rejected": -0.8425436019897461, "logps/chosen": -1.184516191482544, "logps/rejected": -1.9881736040115356, "loss": 1.1416, "nll_loss": 1.1892099380493164, "rewards/accuracies": 0.375, "rewards/chosen": -0.11845161020755768, "rewards/margins": 0.08036575466394424, "rewards/rejected": -0.19881737232208252, "step": 3538 }, { "epoch": 2.1589141375629097, "grad_norm": 1.1070973873138428, "learning_rate": 4.561910593998776e-06, "log_odds_chosen": 1.4853003025054932, "log_odds_ratio": -0.46087902784347534, "logits/chosen": -0.7217894196510315, "logits/rejected": -0.761663556098938, "logps/chosen": -0.7019019722938538, "logps/rejected": -1.6775195598602295, "loss": 0.9751, "nll_loss": 0.9466738700866699, "rewards/accuracies": 0.75, "rewards/chosen": -0.0701901987195015, "rewards/margins": 0.09756174683570862, "rewards/rejected": -0.1677519530057907, "step": 3539 }, { "epoch": 2.1595241726399266, "grad_norm": 2.0189549922943115, "learning_rate": 4.560930802204531e-06, "log_odds_chosen": 1.7565041780471802, "log_odds_ratio": -0.45272910594940186, "logits/chosen": -0.7980658411979675, "logits/rejected": -0.7335249185562134, "logps/chosen": -0.9345518946647644, "logps/rejected": -2.3452582359313965, "loss": 1.1609, "nll_loss": 1.2716898918151855, "rewards/accuracies": 0.625, "rewards/chosen": -0.09345519542694092, "rewards/margins": 0.1410706341266632, "rewards/rejected": -0.23452582955360413, "step": 3540 }, { "epoch": 2.160134207716944, "grad_norm": 1.5847893953323364, "learning_rate": 4.559951010410287e-06, "log_odds_chosen": 1.2278786897659302, "log_odds_ratio": -0.423973023891449, "logits/chosen": -0.8156846761703491, "logits/rejected": -0.7037732601165771, "logps/chosen": -0.8727303743362427, "logps/rejected": -1.7210572957992554, "loss": 1.0032, "nll_loss": 1.024782657623291, "rewards/accuracies": 0.875, "rewards/chosen": -0.08727303892374039, "rewards/margins": 0.08483269065618515, "rewards/rejected": -0.17210572957992554, "step": 3541 }, { "epoch": 2.1607442427939607, "grad_norm": 1.4214473962783813, "learning_rate": 4.558971218616044e-06, "log_odds_chosen": 0.8714544773101807, "log_odds_ratio": -0.7052428722381592, "logits/chosen": -0.7422268986701965, "logits/rejected": -0.8054996132850647, "logps/chosen": -0.7979256510734558, "logps/rejected": -1.5221104621887207, "loss": 1.0423, "nll_loss": 1.152319073677063, "rewards/accuracies": 0.625, "rewards/chosen": -0.0797925740480423, "rewards/margins": 0.07241848111152649, "rewards/rejected": -0.1522110402584076, "step": 3542 }, { "epoch": 2.1613542778709776, "grad_norm": 1.1589595079421997, "learning_rate": 4.5579914268218e-06, "log_odds_chosen": 3.005713701248169, "log_odds_ratio": -0.22537800669670105, "logits/chosen": -0.7043172121047974, "logits/rejected": -0.8065255284309387, "logps/chosen": -0.514315664768219, "logps/rejected": -2.8110499382019043, "loss": 0.8688, "nll_loss": 0.7219991683959961, "rewards/accuracies": 0.875, "rewards/chosen": -0.0514315664768219, "rewards/margins": 0.22967343032360077, "rewards/rejected": -0.2811049818992615, "step": 3543 }, { "epoch": 2.1619643129479944, "grad_norm": 1.5864605903625488, "learning_rate": 4.557011635027557e-06, "log_odds_chosen": 2.1638965606689453, "log_odds_ratio": -0.28117242455482483, "logits/chosen": -0.5492685437202454, "logits/rejected": -0.8168894052505493, "logps/chosen": -0.5347668528556824, "logps/rejected": -2.064962387084961, "loss": 0.9247, "nll_loss": 0.6460897326469421, "rewards/accuracies": 1.0, "rewards/chosen": -0.053476691246032715, "rewards/margins": 0.15301954746246338, "rewards/rejected": -0.2064962387084961, "step": 3544 }, { "epoch": 2.1625743480250113, "grad_norm": 3.2685699462890625, "learning_rate": 4.556031843233313e-06, "log_odds_chosen": 0.479825496673584, "log_odds_ratio": -0.6563087105751038, "logits/chosen": -1.0134403705596924, "logits/rejected": -0.9437315464019775, "logps/chosen": -1.117142677307129, "logps/rejected": -1.5148358345031738, "loss": 1.208, "nll_loss": 1.3891682624816895, "rewards/accuracies": 0.625, "rewards/chosen": -0.11171427369117737, "rewards/margins": 0.039769310504198074, "rewards/rejected": -0.15148359537124634, "step": 3545 }, { "epoch": 2.1631843831020285, "grad_norm": 1.6650058031082153, "learning_rate": 4.5550520514390685e-06, "log_odds_chosen": 1.6562166213989258, "log_odds_ratio": -0.37477654218673706, "logits/chosen": -0.7021735906600952, "logits/rejected": -0.6663492321968079, "logps/chosen": -0.6026681661605835, "logps/rejected": -1.7445564270019531, "loss": 0.9766, "nll_loss": 0.840761125087738, "rewards/accuracies": 0.75, "rewards/chosen": -0.06026682257652283, "rewards/margins": 0.11418882012367249, "rewards/rejected": -0.1744556576013565, "step": 3546 }, { "epoch": 2.1637944181790454, "grad_norm": 1.3246252536773682, "learning_rate": 4.5540722596448254e-06, "log_odds_chosen": 1.0691746473312378, "log_odds_ratio": -0.44527876377105713, "logits/chosen": -0.843429446220398, "logits/rejected": -0.7981292605400085, "logps/chosen": -0.9576410055160522, "logps/rejected": -1.7287925481796265, "loss": 1.0288, "nll_loss": 1.1540014743804932, "rewards/accuracies": 0.625, "rewards/chosen": -0.09576410055160522, "rewards/margins": 0.07711514830589294, "rewards/rejected": -0.17287924885749817, "step": 3547 }, { "epoch": 2.1644044532560622, "grad_norm": 2.1327712535858154, "learning_rate": 4.5530924678505816e-06, "log_odds_chosen": 1.3495707511901855, "log_odds_ratio": -0.5021937489509583, "logits/chosen": -0.900180459022522, "logits/rejected": -0.688385546207428, "logps/chosen": -1.0525627136230469, "logps/rejected": -2.2699599266052246, "loss": 1.1571, "nll_loss": 1.3814034461975098, "rewards/accuracies": 0.75, "rewards/chosen": -0.10525627434253693, "rewards/margins": 0.1217397153377533, "rewards/rejected": -0.22699600458145142, "step": 3548 }, { "epoch": 2.165014488333079, "grad_norm": 1.0200083255767822, "learning_rate": 4.552112676056338e-06, "log_odds_chosen": 1.646933674812317, "log_odds_ratio": -0.6902823448181152, "logits/chosen": -0.7877004146575928, "logits/rejected": -0.9473247528076172, "logps/chosen": -0.9317548274993896, "logps/rejected": -2.3990938663482666, "loss": 1.0625, "nll_loss": 1.164808750152588, "rewards/accuracies": 0.5, "rewards/chosen": -0.0931754782795906, "rewards/margins": 0.14673392474651337, "rewards/rejected": -0.23990941047668457, "step": 3549 }, { "epoch": 2.165624523410096, "grad_norm": 2.588656425476074, "learning_rate": 4.551132884262095e-06, "log_odds_chosen": 0.6548011302947998, "log_odds_ratio": -0.5968676209449768, "logits/chosen": -0.8198246955871582, "logits/rejected": -0.8020837306976318, "logps/chosen": -0.9632056355476379, "logps/rejected": -1.4818593263626099, "loss": 1.1685, "nll_loss": 1.213685154914856, "rewards/accuracies": 0.5, "rewards/chosen": -0.09632056951522827, "rewards/margins": 0.05186537653207779, "rewards/rejected": -0.14818593859672546, "step": 3550 }, { "epoch": 2.166234558487113, "grad_norm": 5.186868667602539, "learning_rate": 4.55015309246785e-06, "log_odds_chosen": 2.233032703399658, "log_odds_ratio": -0.32748645544052124, "logits/chosen": -0.6450175046920776, "logits/rejected": -0.6528424024581909, "logps/chosen": -0.6303138732910156, "logps/rejected": -2.339120388031006, "loss": 0.9901, "nll_loss": 0.8417753577232361, "rewards/accuracies": 0.75, "rewards/chosen": -0.0630313903093338, "rewards/margins": 0.17088066041469574, "rewards/rejected": -0.23391205072402954, "step": 3551 }, { "epoch": 2.16684459356413, "grad_norm": 2.3590118885040283, "learning_rate": 4.549173300673606e-06, "log_odds_chosen": 1.8947497606277466, "log_odds_ratio": -0.29886311292648315, "logits/chosen": -0.914411187171936, "logits/rejected": -0.9043592214584351, "logps/chosen": -0.7045863270759583, "logps/rejected": -2.002044916152954, "loss": 1.1549, "nll_loss": 1.1745105981826782, "rewards/accuracies": 1.0, "rewards/chosen": -0.07045863568782806, "rewards/margins": 0.12974585592746735, "rewards/rejected": -0.2002044916152954, "step": 3552 }, { "epoch": 2.167454628641147, "grad_norm": 1.0955365896224976, "learning_rate": 4.548193508879363e-06, "log_odds_chosen": 0.2660047113895416, "log_odds_ratio": -0.6264297962188721, "logits/chosen": -0.9910409450531006, "logits/rejected": -0.9883280992507935, "logps/chosen": -0.939012348651886, "logps/rejected": -1.144871711730957, "loss": 1.083, "nll_loss": 1.0502127408981323, "rewards/accuracies": 0.625, "rewards/chosen": -0.09390123188495636, "rewards/margins": 0.020585939288139343, "rewards/rejected": -0.1144871711730957, "step": 3553 }, { "epoch": 2.1680646637181638, "grad_norm": 1.1756845712661743, "learning_rate": 4.547213717085119e-06, "log_odds_chosen": 1.87306547164917, "log_odds_ratio": -0.4323086142539978, "logits/chosen": -0.5661885142326355, "logits/rejected": -0.7327972054481506, "logps/chosen": -0.6193844079971313, "logps/rejected": -1.7768940925598145, "loss": 0.9936, "nll_loss": 0.830540657043457, "rewards/accuracies": 0.75, "rewards/chosen": -0.061938442289829254, "rewards/margins": 0.11575096845626831, "rewards/rejected": -0.17768940329551697, "step": 3554 }, { "epoch": 2.1686746987951806, "grad_norm": 1.5492526292800903, "learning_rate": 4.546233925290875e-06, "log_odds_chosen": 1.6583757400512695, "log_odds_ratio": -0.5692494511604309, "logits/chosen": -0.9006234407424927, "logits/rejected": -0.8628569841384888, "logps/chosen": -0.8087714910507202, "logps/rejected": -2.133646249771118, "loss": 1.0777, "nll_loss": 1.08535897731781, "rewards/accuracies": 0.625, "rewards/chosen": -0.0808771550655365, "rewards/margins": 0.1324874609708786, "rewards/rejected": -0.2133646160364151, "step": 3555 }, { "epoch": 2.1692847338721974, "grad_norm": 3.509323835372925, "learning_rate": 4.545254133496632e-06, "log_odds_chosen": 2.681166887283325, "log_odds_ratio": -0.34311747550964355, "logits/chosen": -0.6655282378196716, "logits/rejected": -0.8427386283874512, "logps/chosen": -1.0268323421478271, "logps/rejected": -3.2380030155181885, "loss": 1.0077, "nll_loss": 1.2816706895828247, "rewards/accuracies": 0.75, "rewards/chosen": -0.10268323123455048, "rewards/margins": 0.22111709415912628, "rewards/rejected": -0.32380032539367676, "step": 3556 }, { "epoch": 2.1698947689492147, "grad_norm": 1.7712432146072388, "learning_rate": 4.5442743417023875e-06, "log_odds_chosen": 1.7678017616271973, "log_odds_ratio": -0.3846222460269928, "logits/chosen": -0.8990625143051147, "logits/rejected": -0.7960261702537537, "logps/chosen": -0.8236939311027527, "logps/rejected": -2.191190004348755, "loss": 1.0744, "nll_loss": 0.961333692073822, "rewards/accuracies": 0.625, "rewards/chosen": -0.08236938714981079, "rewards/margins": 0.13674962520599365, "rewards/rejected": -0.21911901235580444, "step": 3557 }, { "epoch": 2.1705048040262316, "grad_norm": 1.2580485343933105, "learning_rate": 4.5432945499081445e-06, "log_odds_chosen": 2.451768398284912, "log_odds_ratio": -0.3727963864803314, "logits/chosen": -0.8061018586158752, "logits/rejected": -0.8845597505569458, "logps/chosen": -0.6410644054412842, "logps/rejected": -2.4634463787078857, "loss": 1.1001, "nll_loss": 0.9982565641403198, "rewards/accuracies": 0.75, "rewards/chosen": -0.06410644203424454, "rewards/margins": 0.18223817646503448, "rewards/rejected": -0.2463446408510208, "step": 3558 }, { "epoch": 2.1711148391032484, "grad_norm": 1.649017095565796, "learning_rate": 4.542314758113901e-06, "log_odds_chosen": 0.7406863570213318, "log_odds_ratio": -0.5742886066436768, "logits/chosen": -0.7346946597099304, "logits/rejected": -0.8289762735366821, "logps/chosen": -0.7307462692260742, "logps/rejected": -1.282340407371521, "loss": 0.9838, "nll_loss": 0.8616088628768921, "rewards/accuracies": 0.5, "rewards/chosen": -0.07307463139295578, "rewards/margins": 0.05515940487384796, "rewards/rejected": -0.12823404371738434, "step": 3559 }, { "epoch": 2.1717248741802653, "grad_norm": 1.466245412826538, "learning_rate": 4.541334966319657e-06, "log_odds_chosen": 0.6237879991531372, "log_odds_ratio": -0.4812958240509033, "logits/chosen": -0.7801098823547363, "logits/rejected": -0.6538912057876587, "logps/chosen": -0.8877075910568237, "logps/rejected": -1.3369121551513672, "loss": 1.1503, "nll_loss": 1.0078880786895752, "rewards/accuracies": 0.875, "rewards/chosen": -0.08877076208591461, "rewards/margins": 0.04492046684026718, "rewards/rejected": -0.1336912214756012, "step": 3560 }, { "epoch": 2.172334909257282, "grad_norm": 2.57149600982666, "learning_rate": 4.540355174525414e-06, "log_odds_chosen": 2.990601062774658, "log_odds_ratio": -0.4155052900314331, "logits/chosen": -0.9431986808776855, "logits/rejected": -0.8921677470207214, "logps/chosen": -0.7786799073219299, "logps/rejected": -3.3291802406311035, "loss": 1.2233, "nll_loss": 1.3361141681671143, "rewards/accuracies": 0.75, "rewards/chosen": -0.07786799222230911, "rewards/margins": 0.25505006313323975, "rewards/rejected": -0.33291804790496826, "step": 3561 }, { "epoch": 2.1729449443342994, "grad_norm": 1.4142224788665771, "learning_rate": 4.53937538273117e-06, "log_odds_chosen": 2.0451443195343018, "log_odds_ratio": -0.21877184510231018, "logits/chosen": -0.8886977434158325, "logits/rejected": -0.6993908286094666, "logps/chosen": -0.9004145264625549, "logps/rejected": -2.442709445953369, "loss": 1.1794, "nll_loss": 1.0600510835647583, "rewards/accuracies": 1.0, "rewards/chosen": -0.09004145115613937, "rewards/margins": 0.15422949194908142, "rewards/rejected": -0.2442709505558014, "step": 3562 }, { "epoch": 2.1735549794113163, "grad_norm": 1.9808473587036133, "learning_rate": 4.538395590936925e-06, "log_odds_chosen": 2.5890841484069824, "log_odds_ratio": -0.3088991940021515, "logits/chosen": -0.7033481001853943, "logits/rejected": -0.8303686380386353, "logps/chosen": -0.6839879751205444, "logps/rejected": -2.749556303024292, "loss": 0.9535, "nll_loss": 0.8377826809883118, "rewards/accuracies": 0.875, "rewards/chosen": -0.06839879602193832, "rewards/margins": 0.20655684173107147, "rewards/rejected": -0.2749556303024292, "step": 3563 }, { "epoch": 2.174165014488333, "grad_norm": 1.1490390300750732, "learning_rate": 4.537415799142682e-06, "log_odds_chosen": 0.24043866991996765, "log_odds_ratio": -0.7197868227958679, "logits/chosen": -1.0654125213623047, "logits/rejected": -0.9053770899772644, "logps/chosen": -1.256986141204834, "logps/rejected": -1.3693699836730957, "loss": 1.1424, "nll_loss": 1.2530064582824707, "rewards/accuracies": 0.75, "rewards/chosen": -0.12569862604141235, "rewards/margins": 0.011238380335271358, "rewards/rejected": -0.1369369924068451, "step": 3564 }, { "epoch": 2.17477504956535, "grad_norm": 1.7293369770050049, "learning_rate": 4.536436007348438e-06, "log_odds_chosen": 1.61704421043396, "log_odds_ratio": -0.47607576847076416, "logits/chosen": -0.8749569654464722, "logits/rejected": -0.947170615196228, "logps/chosen": -0.7477642893791199, "logps/rejected": -1.6113624572753906, "loss": 0.9246, "nll_loss": 1.0464524030685425, "rewards/accuracies": 0.75, "rewards/chosen": -0.07477643340826035, "rewards/margins": 0.08635979890823364, "rewards/rejected": -0.16113623976707458, "step": 3565 }, { "epoch": 2.175385084642367, "grad_norm": 14.308876037597656, "learning_rate": 4.535456215554194e-06, "log_odds_chosen": 1.1607505083084106, "log_odds_ratio": -0.506434977054596, "logits/chosen": -0.8166356682777405, "logits/rejected": -0.8346385955810547, "logps/chosen": -0.7899380326271057, "logps/rejected": -1.5232291221618652, "loss": 1.0659, "nll_loss": 0.9550034999847412, "rewards/accuracies": 0.625, "rewards/chosen": -0.07899380475282669, "rewards/margins": 0.07332910597324371, "rewards/rejected": -0.152322918176651, "step": 3566 }, { "epoch": 2.175995119719384, "grad_norm": 0.9938893914222717, "learning_rate": 4.534476423759951e-06, "log_odds_chosen": 1.8997678756713867, "log_odds_ratio": -0.43469569087028503, "logits/chosen": -0.7878732681274414, "logits/rejected": -0.7268337607383728, "logps/chosen": -0.9630633592605591, "logps/rejected": -2.3605542182922363, "loss": 0.9684, "nll_loss": 0.9768039584159851, "rewards/accuracies": 0.625, "rewards/chosen": -0.09630633890628815, "rewards/margins": 0.13974909484386444, "rewards/rejected": -0.2360554337501526, "step": 3567 }, { "epoch": 2.176605154796401, "grad_norm": 3.2870547771453857, "learning_rate": 4.5334966319657065e-06, "log_odds_chosen": 1.7515363693237305, "log_odds_ratio": -0.3614405691623688, "logits/chosen": -0.6294447779655457, "logits/rejected": -0.7657275199890137, "logps/chosen": -0.6911345720291138, "logps/rejected": -2.0452940464019775, "loss": 0.9768, "nll_loss": 0.8052015900611877, "rewards/accuracies": 0.875, "rewards/chosen": -0.06911345571279526, "rewards/margins": 0.1354159712791443, "rewards/rejected": -0.20452941954135895, "step": 3568 }, { "epoch": 2.1772151898734178, "grad_norm": 1.089194893836975, "learning_rate": 4.532516840171463e-06, "log_odds_chosen": 0.9188060760498047, "log_odds_ratio": -0.7027682065963745, "logits/chosen": -0.8207849860191345, "logits/rejected": -0.7147032022476196, "logps/chosen": -0.879585862159729, "logps/rejected": -1.7051907777786255, "loss": 1.0537, "nll_loss": 1.1123683452606201, "rewards/accuracies": 0.375, "rewards/chosen": -0.08795858919620514, "rewards/margins": 0.08256049454212189, "rewards/rejected": -0.17051908373832703, "step": 3569 }, { "epoch": 2.1778252249504346, "grad_norm": 3.8880302906036377, "learning_rate": 4.53153704837722e-06, "log_odds_chosen": 1.3568947315216064, "log_odds_ratio": -0.5187653303146362, "logits/chosen": -0.9122829437255859, "logits/rejected": -0.8493151068687439, "logps/chosen": -0.8836626410484314, "logps/rejected": -1.935511589050293, "loss": 1.0814, "nll_loss": 1.1389997005462646, "rewards/accuracies": 0.625, "rewards/chosen": -0.08836627006530762, "rewards/margins": 0.10518490523099899, "rewards/rejected": -0.1935511827468872, "step": 3570 }, { "epoch": 2.1784352600274515, "grad_norm": 1.5120514631271362, "learning_rate": 4.530557256582976e-06, "log_odds_chosen": 1.2077112197875977, "log_odds_ratio": -0.5049259066581726, "logits/chosen": -0.9222643375396729, "logits/rejected": -0.9505664110183716, "logps/chosen": -0.7288529872894287, "logps/rejected": -1.7269608974456787, "loss": 1.0637, "nll_loss": 0.9545345902442932, "rewards/accuracies": 0.625, "rewards/chosen": -0.07288529723882675, "rewards/margins": 0.09981080889701843, "rewards/rejected": -0.17269611358642578, "step": 3571 }, { "epoch": 2.1790452951044683, "grad_norm": 1.6141940355300903, "learning_rate": 4.529577464788733e-06, "log_odds_chosen": 2.0262274742126465, "log_odds_ratio": -0.3394275903701782, "logits/chosen": -0.6374714374542236, "logits/rejected": -0.6580438017845154, "logps/chosen": -0.7038159370422363, "logps/rejected": -2.198241949081421, "loss": 1.0037, "nll_loss": 0.9465727210044861, "rewards/accuracies": 0.875, "rewards/chosen": -0.07038159668445587, "rewards/margins": 0.14944259822368622, "rewards/rejected": -0.2198241949081421, "step": 3572 }, { "epoch": 2.1796553301814856, "grad_norm": 2.178496837615967, "learning_rate": 4.528597672994489e-06, "log_odds_chosen": 2.0272529125213623, "log_odds_ratio": -0.33324867486953735, "logits/chosen": -0.7638037204742432, "logits/rejected": -0.9194791913032532, "logps/chosen": -0.6339024305343628, "logps/rejected": -2.109901189804077, "loss": 1.0445, "nll_loss": 1.0143933296203613, "rewards/accuracies": 0.875, "rewards/chosen": -0.06339024752378464, "rewards/margins": 0.14759986102581024, "rewards/rejected": -0.21099010109901428, "step": 3573 }, { "epoch": 2.1802653652585025, "grad_norm": 1.2696062326431274, "learning_rate": 4.527617881200244e-06, "log_odds_chosen": 3.0593442916870117, "log_odds_ratio": -0.27893751859664917, "logits/chosen": -0.6761045455932617, "logits/rejected": -0.7647181749343872, "logps/chosen": -0.7134596109390259, "logps/rejected": -3.2085680961608887, "loss": 1.0806, "nll_loss": 0.9276824593544006, "rewards/accuracies": 0.875, "rewards/chosen": -0.07134596258401871, "rewards/margins": 0.24951088428497314, "rewards/rejected": -0.32085683941841125, "step": 3574 }, { "epoch": 2.1808754003355193, "grad_norm": 3.832258701324463, "learning_rate": 4.526638089406001e-06, "log_odds_chosen": 1.296990156173706, "log_odds_ratio": -0.4538114666938782, "logits/chosen": -1.0757466554641724, "logits/rejected": -0.9797813296318054, "logps/chosen": -0.9324936270713806, "logps/rejected": -1.9197649955749512, "loss": 1.277, "nll_loss": 1.1098594665527344, "rewards/accuracies": 0.875, "rewards/chosen": -0.0932493656873703, "rewards/margins": 0.09872713685035706, "rewards/rejected": -0.19197648763656616, "step": 3575 }, { "epoch": 2.181485435412536, "grad_norm": 2.20961594581604, "learning_rate": 4.525658297611757e-06, "log_odds_chosen": 1.9855425357818604, "log_odds_ratio": -0.38715460896492004, "logits/chosen": -0.7846944332122803, "logits/rejected": -0.8661292791366577, "logps/chosen": -0.780236005783081, "logps/rejected": -2.4019289016723633, "loss": 1.0644, "nll_loss": 1.0709606409072876, "rewards/accuracies": 0.875, "rewards/chosen": -0.07802359759807587, "rewards/margins": 0.16216927766799927, "rewards/rejected": -0.24019289016723633, "step": 3576 }, { "epoch": 2.182095470489553, "grad_norm": 1.5059562921524048, "learning_rate": 4.524678505817513e-06, "log_odds_chosen": 0.9154419302940369, "log_odds_ratio": -0.4844823479652405, "logits/chosen": -0.8603423833847046, "logits/rejected": -0.8107329607009888, "logps/chosen": -0.8958427309989929, "logps/rejected": -1.5497760772705078, "loss": 1.062, "nll_loss": 1.1056110858917236, "rewards/accuracies": 0.625, "rewards/chosen": -0.08958427608013153, "rewards/margins": 0.0653933435678482, "rewards/rejected": -0.15497761964797974, "step": 3577 }, { "epoch": 2.1827055055665703, "grad_norm": 2.1853256225585938, "learning_rate": 4.52369871402327e-06, "log_odds_chosen": 2.2124814987182617, "log_odds_ratio": -0.4543658494949341, "logits/chosen": -0.7164528369903564, "logits/rejected": -0.7078519463539124, "logps/chosen": -0.7422207593917847, "logps/rejected": -2.3117029666900635, "loss": 1.1173, "nll_loss": 0.9583108425140381, "rewards/accuracies": 0.625, "rewards/chosen": -0.07422208040952682, "rewards/margins": 0.15694822371006012, "rewards/rejected": -0.23117029666900635, "step": 3578 }, { "epoch": 2.183315540643587, "grad_norm": 1.6294958591461182, "learning_rate": 4.5227189222290264e-06, "log_odds_chosen": 0.5820363759994507, "log_odds_ratio": -0.5140280723571777, "logits/chosen": -1.0026458501815796, "logits/rejected": -0.954942524433136, "logps/chosen": -0.7427814602851868, "logps/rejected": -1.0598690509796143, "loss": 1.0533, "nll_loss": 1.2644307613372803, "rewards/accuracies": 0.75, "rewards/chosen": -0.07427814602851868, "rewards/margins": 0.031708769500255585, "rewards/rejected": -0.10598690807819366, "step": 3579 }, { "epoch": 2.183925575720604, "grad_norm": 1.5176670551300049, "learning_rate": 4.521739130434782e-06, "log_odds_chosen": 3.0360655784606934, "log_odds_ratio": -0.22770515084266663, "logits/chosen": -0.8956266641616821, "logits/rejected": -0.8589643836021423, "logps/chosen": -0.7090831995010376, "logps/rejected": -3.0769901275634766, "loss": 1.126, "nll_loss": 0.8497103452682495, "rewards/accuracies": 1.0, "rewards/chosen": -0.0709083303809166, "rewards/margins": 0.23679065704345703, "rewards/rejected": -0.30769896507263184, "step": 3580 }, { "epoch": 2.184535610797621, "grad_norm": 1.291896104812622, "learning_rate": 4.520759338640539e-06, "log_odds_chosen": 0.43501031398773193, "log_odds_ratio": -0.5577082633972168, "logits/chosen": -0.8149794340133667, "logits/rejected": -0.7639133930206299, "logps/chosen": -0.8418320417404175, "logps/rejected": -1.0673584938049316, "loss": 1.3359, "nll_loss": 1.1350359916687012, "rewards/accuracies": 0.75, "rewards/chosen": -0.08418320119380951, "rewards/margins": 0.022552642971277237, "rewards/rejected": -0.10673585534095764, "step": 3581 }, { "epoch": 2.1851456458746377, "grad_norm": 1.760605812072754, "learning_rate": 4.519779546846295e-06, "log_odds_chosen": 1.1967540979385376, "log_odds_ratio": -0.4150036871433258, "logits/chosen": -0.6959535479545593, "logits/rejected": -0.6754670143127441, "logps/chosen": -0.6170510649681091, "logps/rejected": -1.3148622512817383, "loss": 1.1017, "nll_loss": 0.8041478991508484, "rewards/accuracies": 0.875, "rewards/chosen": -0.06170510873198509, "rewards/margins": 0.06978113204240799, "rewards/rejected": -0.13148623704910278, "step": 3582 }, { "epoch": 2.1857556809516545, "grad_norm": 2.5191736221313477, "learning_rate": 4.518799755052052e-06, "log_odds_chosen": 0.5406358242034912, "log_odds_ratio": -0.7334242463111877, "logits/chosen": -0.837864875793457, "logits/rejected": -0.837773323059082, "logps/chosen": -0.9347772598266602, "logps/rejected": -1.3827141523361206, "loss": 1.132, "nll_loss": 1.1443681716918945, "rewards/accuracies": 0.375, "rewards/chosen": -0.09347772598266602, "rewards/margins": 0.044793687760829926, "rewards/rejected": -0.13827142119407654, "step": 3583 }, { "epoch": 2.186365716028672, "grad_norm": 2.3796253204345703, "learning_rate": 4.517819963257808e-06, "log_odds_chosen": 1.3855185508728027, "log_odds_ratio": -0.5192061066627502, "logits/chosen": -0.7293837666511536, "logits/rejected": -0.7261666059494019, "logps/chosen": -0.7000110149383545, "logps/rejected": -1.7564845085144043, "loss": 1.0199, "nll_loss": 0.9315966367721558, "rewards/accuracies": 0.75, "rewards/chosen": -0.07000111043453217, "rewards/margins": 0.10564734041690826, "rewards/rejected": -0.17564846575260162, "step": 3584 }, { "epoch": 2.1869757511056886, "grad_norm": 1.5167648792266846, "learning_rate": 4.516840171463564e-06, "log_odds_chosen": 1.97948157787323, "log_odds_ratio": -0.4489331841468811, "logits/chosen": -0.5878053903579712, "logits/rejected": -0.7824012041091919, "logps/chosen": -0.5744813680648804, "logps/rejected": -1.937971591949463, "loss": 1.0056, "nll_loss": 0.8040847778320312, "rewards/accuracies": 0.875, "rewards/chosen": -0.057448141276836395, "rewards/margins": 0.13634900748729706, "rewards/rejected": -0.19379714131355286, "step": 3585 }, { "epoch": 2.1875857861827055, "grad_norm": 1.3548227548599243, "learning_rate": 4.51586037966932e-06, "log_odds_chosen": 2.8011069297790527, "log_odds_ratio": -0.2949381470680237, "logits/chosen": -0.6258941888809204, "logits/rejected": -0.9062691330909729, "logps/chosen": -0.5249759554862976, "logps/rejected": -2.3548388481140137, "loss": 1.053, "nll_loss": 0.7699929475784302, "rewards/accuracies": 0.875, "rewards/chosen": -0.05249759927392006, "rewards/margins": 0.1829862743616104, "rewards/rejected": -0.23548386991024017, "step": 3586 }, { "epoch": 2.1881958212597223, "grad_norm": 1.6424498558044434, "learning_rate": 4.514880587875076e-06, "log_odds_chosen": 0.9723494648933411, "log_odds_ratio": -0.5147125124931335, "logits/chosen": -0.9457560181617737, "logits/rejected": -0.9320165514945984, "logps/chosen": -0.8192435503005981, "logps/rejected": -1.301836609840393, "loss": 1.137, "nll_loss": 1.1355334520339966, "rewards/accuracies": 0.625, "rewards/chosen": -0.08192435652017593, "rewards/margins": 0.04825930297374725, "rewards/rejected": -0.1301836520433426, "step": 3587 }, { "epoch": 2.188805856336739, "grad_norm": 1.741439700126648, "learning_rate": 4.513900796080832e-06, "log_odds_chosen": 0.6955939531326294, "log_odds_ratio": -0.4556194543838501, "logits/chosen": -0.9725375771522522, "logits/rejected": -0.858777642250061, "logps/chosen": -0.8294489979743958, "logps/rejected": -1.3070951700210571, "loss": 1.1006, "nll_loss": 1.0330322980880737, "rewards/accuracies": 0.75, "rewards/chosen": -0.08294489979743958, "rewards/margins": 0.047764625400304794, "rewards/rejected": -0.13070952892303467, "step": 3588 }, { "epoch": 2.1894158914137565, "grad_norm": 2.261018753051758, "learning_rate": 4.512921004286589e-06, "log_odds_chosen": 1.0126465559005737, "log_odds_ratio": -0.43359822034835815, "logits/chosen": -0.8716158866882324, "logits/rejected": -0.9184987545013428, "logps/chosen": -0.7804638147354126, "logps/rejected": -1.4230997562408447, "loss": 1.1601, "nll_loss": 0.9790230989456177, "rewards/accuracies": 0.875, "rewards/chosen": -0.07804638892412186, "rewards/margins": 0.06426358222961426, "rewards/rejected": -0.14230996370315552, "step": 3589 }, { "epoch": 2.1900259264907733, "grad_norm": 1.478464126586914, "learning_rate": 4.5119412124923455e-06, "log_odds_chosen": 0.7818297147750854, "log_odds_ratio": -0.4709579646587372, "logits/chosen": -0.7617764472961426, "logits/rejected": -0.8865084052085876, "logps/chosen": -0.7867529392242432, "logps/rejected": -1.2969220876693726, "loss": 1.0089, "nll_loss": 0.9067614674568176, "rewards/accuracies": 0.75, "rewards/chosen": -0.0786752998828888, "rewards/margins": 0.0510169193148613, "rewards/rejected": -0.1296922117471695, "step": 3590 }, { "epoch": 2.19063596156779, "grad_norm": 1.3910552263259888, "learning_rate": 4.510961420698101e-06, "log_odds_chosen": 2.1663355827331543, "log_odds_ratio": -0.46632057428359985, "logits/chosen": -0.9544036984443665, "logits/rejected": -0.8724913597106934, "logps/chosen": -0.7782682776451111, "logps/rejected": -2.3975491523742676, "loss": 1.0056, "nll_loss": 1.2018072605133057, "rewards/accuracies": 0.625, "rewards/chosen": -0.07782682776451111, "rewards/margins": 0.16192808747291565, "rewards/rejected": -0.23975491523742676, "step": 3591 }, { "epoch": 2.191245996644807, "grad_norm": 1.5407140254974365, "learning_rate": 4.509981628903858e-06, "log_odds_chosen": 2.0617640018463135, "log_odds_ratio": -0.4260709583759308, "logits/chosen": -0.9623674154281616, "logits/rejected": -0.9288516640663147, "logps/chosen": -0.8741122484207153, "logps/rejected": -2.5957770347595215, "loss": 1.1641, "nll_loss": 1.3849211931228638, "rewards/accuracies": 0.5, "rewards/chosen": -0.08741122484207153, "rewards/margins": 0.17216648161411285, "rewards/rejected": -0.2595776915550232, "step": 3592 }, { "epoch": 2.191856031721824, "grad_norm": 3.14279842376709, "learning_rate": 4.509001837109614e-06, "log_odds_chosen": 1.9004435539245605, "log_odds_ratio": -0.40526801347732544, "logits/chosen": -0.6658283472061157, "logits/rejected": -0.8411211967468262, "logps/chosen": -0.6707230806350708, "logps/rejected": -2.041830539703369, "loss": 1.0549, "nll_loss": 0.8902910351753235, "rewards/accuracies": 0.75, "rewards/chosen": -0.0670723021030426, "rewards/margins": 0.13711073994636536, "rewards/rejected": -0.20418304204940796, "step": 3593 }, { "epoch": 2.1924660667988407, "grad_norm": 1.1981080770492554, "learning_rate": 4.50802204531537e-06, "log_odds_chosen": 2.206360340118408, "log_odds_ratio": -0.27300935983657837, "logits/chosen": -0.7702865600585938, "logits/rejected": -0.9281109571456909, "logps/chosen": -0.7703056931495667, "logps/rejected": -2.340212106704712, "loss": 0.9957, "nll_loss": 0.7839416265487671, "rewards/accuracies": 0.875, "rewards/chosen": -0.07703057676553726, "rewards/margins": 0.156990647315979, "rewards/rejected": -0.23402124643325806, "step": 3594 }, { "epoch": 2.193076101875858, "grad_norm": 1.7422683238983154, "learning_rate": 4.507042253521127e-06, "log_odds_chosen": 0.7800799012184143, "log_odds_ratio": -0.608971118927002, "logits/chosen": -0.7887578010559082, "logits/rejected": -0.8324671387672424, "logps/chosen": -0.9199795722961426, "logps/rejected": -1.5287855863571167, "loss": 1.139, "nll_loss": 1.0715826749801636, "rewards/accuracies": 0.375, "rewards/chosen": -0.09199795126914978, "rewards/margins": 0.06088060885667801, "rewards/rejected": -0.1528785675764084, "step": 3595 }, { "epoch": 2.193686136952875, "grad_norm": 3.6837563514709473, "learning_rate": 4.506062461726883e-06, "log_odds_chosen": 1.506434440612793, "log_odds_ratio": -0.5467410683631897, "logits/chosen": -0.8768728375434875, "logits/rejected": -0.9628888368606567, "logps/chosen": -0.8818525075912476, "logps/rejected": -2.1450083255767822, "loss": 1.1596, "nll_loss": 1.1729806661605835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08818525075912476, "rewards/margins": 0.12631559371948242, "rewards/rejected": -0.21450084447860718, "step": 3596 }, { "epoch": 2.1942961720298917, "grad_norm": 1.599999189376831, "learning_rate": 4.505082669932638e-06, "log_odds_chosen": 1.5206470489501953, "log_odds_ratio": -0.45566514134407043, "logits/chosen": -0.8737106919288635, "logits/rejected": -0.8963059782981873, "logps/chosen": -0.870306134223938, "logps/rejected": -2.0237057209014893, "loss": 1.1749, "nll_loss": 1.0131382942199707, "rewards/accuracies": 0.625, "rewards/chosen": -0.08703060448169708, "rewards/margins": 0.11533994972705841, "rewards/rejected": -0.2023705691099167, "step": 3597 }, { "epoch": 2.1949062071069085, "grad_norm": 1.6461323499679565, "learning_rate": 4.504102878138395e-06, "log_odds_chosen": 2.678699016571045, "log_odds_ratio": -0.40421199798583984, "logits/chosen": -0.8833870887756348, "logits/rejected": -0.9618496298789978, "logps/chosen": -0.8018258810043335, "logps/rejected": -3.0482430458068848, "loss": 0.921, "nll_loss": 0.88811194896698, "rewards/accuracies": 0.875, "rewards/chosen": -0.08018258213996887, "rewards/margins": 0.22464174032211304, "rewards/rejected": -0.3048243522644043, "step": 3598 }, { "epoch": 2.1955162421839254, "grad_norm": 2.045227289199829, "learning_rate": 4.503123086344151e-06, "log_odds_chosen": 1.365424633026123, "log_odds_ratio": -0.4100055992603302, "logits/chosen": -0.8849034905433655, "logits/rejected": -1.0153576135635376, "logps/chosen": -0.7047306895256042, "logps/rejected": -1.7206045389175415, "loss": 1.0248, "nll_loss": 0.898064136505127, "rewards/accuracies": 0.75, "rewards/chosen": -0.0704730674624443, "rewards/margins": 0.10158738493919373, "rewards/rejected": -0.17206045985221863, "step": 3599 }, { "epoch": 2.1961262772609427, "grad_norm": 1.4184519052505493, "learning_rate": 4.502143294549908e-06, "log_odds_chosen": 1.4036263227462769, "log_odds_ratio": -0.4195699095726013, "logits/chosen": -0.6882082223892212, "logits/rejected": -0.665357768535614, "logps/chosen": -0.600639820098877, "logps/rejected": -1.534245252609253, "loss": 0.9297, "nll_loss": 0.8637109994888306, "rewards/accuracies": 0.75, "rewards/chosen": -0.06006397679448128, "rewards/margins": 0.0933605507016182, "rewards/rejected": -0.15342453122138977, "step": 3600 }, { "epoch": 2.1967363123379595, "grad_norm": 3.486921548843384, "learning_rate": 4.5011635027556645e-06, "log_odds_chosen": 1.5606584548950195, "log_odds_ratio": -0.5093634128570557, "logits/chosen": -0.9688276052474976, "logits/rejected": -0.994727373123169, "logps/chosen": -0.9274137616157532, "logps/rejected": -2.1123509407043457, "loss": 1.0948, "nll_loss": 1.2127312421798706, "rewards/accuracies": 0.75, "rewards/chosen": -0.09274137020111084, "rewards/margins": 0.1184937134385109, "rewards/rejected": -0.21123507618904114, "step": 3601 }, { "epoch": 2.1973463474149764, "grad_norm": 2.2711329460144043, "learning_rate": 4.500183710961421e-06, "log_odds_chosen": 1.3826048374176025, "log_odds_ratio": -0.570753812789917, "logits/chosen": -0.948574423789978, "logits/rejected": -0.8364682793617249, "logps/chosen": -1.1193181276321411, "logps/rejected": -2.424072027206421, "loss": 1.1382, "nll_loss": 1.3113601207733154, "rewards/accuracies": 0.625, "rewards/chosen": -0.11193181574344635, "rewards/margins": 0.13047540187835693, "rewards/rejected": -0.24240721762180328, "step": 3602 }, { "epoch": 2.197956382491993, "grad_norm": 1.268588662147522, "learning_rate": 4.499203919167177e-06, "log_odds_chosen": 0.3700123429298401, "log_odds_ratio": -0.6035534143447876, "logits/chosen": -1.0151598453521729, "logits/rejected": -0.7960637807846069, "logps/chosen": -0.9295044541358948, "logps/rejected": -1.1989399194717407, "loss": 1.1953, "nll_loss": 1.0162781476974487, "rewards/accuracies": 0.5, "rewards/chosen": -0.09295044839382172, "rewards/margins": 0.026943549513816833, "rewards/rejected": -0.11989399045705795, "step": 3603 }, { "epoch": 2.19856641756901, "grad_norm": 9.227442741394043, "learning_rate": 4.498224127372933e-06, "log_odds_chosen": 1.851568579673767, "log_odds_ratio": -0.3015502691268921, "logits/chosen": -0.7386249899864197, "logits/rejected": -0.7162646055221558, "logps/chosen": -0.7070053219795227, "logps/rejected": -1.960293173789978, "loss": 1.0846, "nll_loss": 0.8972446918487549, "rewards/accuracies": 1.0, "rewards/chosen": -0.07070054113864899, "rewards/margins": 0.12532876431941986, "rewards/rejected": -0.19602930545806885, "step": 3604 }, { "epoch": 2.1991764526460273, "grad_norm": 6.115756034851074, "learning_rate": 4.497244335578689e-06, "log_odds_chosen": 2.2071962356567383, "log_odds_ratio": -0.402875691652298, "logits/chosen": -0.9928845763206482, "logits/rejected": -0.9939806461334229, "logps/chosen": -0.6873190402984619, "logps/rejected": -2.270388603210449, "loss": 1.1465, "nll_loss": 0.9416934251785278, "rewards/accuracies": 0.625, "rewards/chosen": -0.06873190402984619, "rewards/margins": 0.15830697119235992, "rewards/rejected": -0.22703886032104492, "step": 3605 }, { "epoch": 2.199786487723044, "grad_norm": 1.4779549837112427, "learning_rate": 4.496264543784446e-06, "log_odds_chosen": 2.335766077041626, "log_odds_ratio": -0.31021302938461304, "logits/chosen": -0.6804679036140442, "logits/rejected": -0.6988809704780579, "logps/chosen": -0.6371190547943115, "logps/rejected": -2.4016716480255127, "loss": 1.1125, "nll_loss": 0.8655698895454407, "rewards/accuracies": 0.75, "rewards/chosen": -0.06371190398931503, "rewards/margins": 0.1764552891254425, "rewards/rejected": -0.24016718566417694, "step": 3606 }, { "epoch": 2.200396522800061, "grad_norm": 2.505781412124634, "learning_rate": 4.495284751990202e-06, "log_odds_chosen": 1.5093865394592285, "log_odds_ratio": -0.3006477355957031, "logits/chosen": -0.9224273562431335, "logits/rejected": -0.9866790771484375, "logps/chosen": -0.9332199096679688, "logps/rejected": -2.0343410968780518, "loss": 1.1652, "nll_loss": 1.2885851860046387, "rewards/accuracies": 0.875, "rewards/chosen": -0.09332199394702911, "rewards/margins": 0.11011212319135666, "rewards/rejected": -0.20343410968780518, "step": 3607 }, { "epoch": 2.201006557877078, "grad_norm": 1.3977774381637573, "learning_rate": 4.494304960195957e-06, "log_odds_chosen": 0.9966185092926025, "log_odds_ratio": -0.6923024654388428, "logits/chosen": -0.8559908866882324, "logits/rejected": -0.8154733180999756, "logps/chosen": -1.0044264793395996, "logps/rejected": -1.8243136405944824, "loss": 1.1499, "nll_loss": 1.1181838512420654, "rewards/accuracies": 0.375, "rewards/chosen": -0.10044265538454056, "rewards/margins": 0.08198872953653336, "rewards/rejected": -0.18243137001991272, "step": 3608 }, { "epoch": 2.2016165929540947, "grad_norm": 2.1030118465423584, "learning_rate": 4.493325168401714e-06, "log_odds_chosen": 1.6760609149932861, "log_odds_ratio": -0.387469083070755, "logits/chosen": -0.8573908805847168, "logits/rejected": -0.9094594120979309, "logps/chosen": -0.8178248405456543, "logps/rejected": -2.0852818489074707, "loss": 1.1477, "nll_loss": 1.007206678390503, "rewards/accuracies": 0.75, "rewards/chosen": -0.08178247511386871, "rewards/margins": 0.12674573063850403, "rewards/rejected": -0.20852820575237274, "step": 3609 }, { "epoch": 2.202226628031112, "grad_norm": 1.0932040214538574, "learning_rate": 4.4923453766074705e-06, "log_odds_chosen": 1.375175952911377, "log_odds_ratio": -0.5277397632598877, "logits/chosen": -0.9017250537872314, "logits/rejected": -0.890508770942688, "logps/chosen": -0.9317548274993896, "logps/rejected": -2.0310635566711426, "loss": 0.964, "nll_loss": 1.0531210899353027, "rewards/accuracies": 0.5, "rewards/chosen": -0.0931754782795906, "rewards/margins": 0.10993087291717529, "rewards/rejected": -0.2031063735485077, "step": 3610 }, { "epoch": 2.202836663108129, "grad_norm": 1.3952184915542603, "learning_rate": 4.4913655848132274e-06, "log_odds_chosen": 1.1212717294692993, "log_odds_ratio": -0.40039074420928955, "logits/chosen": -0.837817907333374, "logits/rejected": -0.8727848529815674, "logps/chosen": -0.7974585294723511, "logps/rejected": -1.4922362565994263, "loss": 1.0203, "nll_loss": 0.8704149127006531, "rewards/accuracies": 0.875, "rewards/chosen": -0.07974585890769958, "rewards/margins": 0.06947776675224304, "rewards/rejected": -0.14922362565994263, "step": 3611 }, { "epoch": 2.2034466981851457, "grad_norm": 2.3453776836395264, "learning_rate": 4.4903857930189835e-06, "log_odds_chosen": 1.6810686588287354, "log_odds_ratio": -0.6111759543418884, "logits/chosen": -0.8507667779922485, "logits/rejected": -0.749036431312561, "logps/chosen": -0.9174715280532837, "logps/rejected": -2.3199594020843506, "loss": 1.097, "nll_loss": 1.1770480871200562, "rewards/accuracies": 0.625, "rewards/chosen": -0.09174714237451553, "rewards/margins": 0.14024877548217773, "rewards/rejected": -0.23199594020843506, "step": 3612 }, { "epoch": 2.2040567332621626, "grad_norm": 2.070225715637207, "learning_rate": 4.48940600122474e-06, "log_odds_chosen": 0.2132885456085205, "log_odds_ratio": -0.6674293279647827, "logits/chosen": -1.0403119325637817, "logits/rejected": -0.8848094940185547, "logps/chosen": -0.9759516716003418, "logps/rejected": -1.1874806880950928, "loss": 1.0861, "nll_loss": 1.125356674194336, "rewards/accuracies": 0.5, "rewards/chosen": -0.09759517759084702, "rewards/margins": 0.021152887493371964, "rewards/rejected": -0.11874805390834808, "step": 3613 }, { "epoch": 2.2046667683391794, "grad_norm": 2.151047468185425, "learning_rate": 4.488426209430496e-06, "log_odds_chosen": 1.2364163398742676, "log_odds_ratio": -0.485628604888916, "logits/chosen": -1.1649270057678223, "logits/rejected": -1.0125035047531128, "logps/chosen": -1.1938954591751099, "logps/rejected": -2.197019338607788, "loss": 1.0708, "nll_loss": 1.2651469707489014, "rewards/accuracies": 0.875, "rewards/chosen": -0.11938954889774323, "rewards/margins": 0.10031238198280334, "rewards/rejected": -0.21970193088054657, "step": 3614 }, { "epoch": 2.2052768034161963, "grad_norm": 1.4732005596160889, "learning_rate": 4.487446417636252e-06, "log_odds_chosen": 1.8477693796157837, "log_odds_ratio": -0.43111974000930786, "logits/chosen": -0.8739327192306519, "logits/rejected": -0.9303836822509766, "logps/chosen": -0.6352973580360413, "logps/rejected": -2.0238289833068848, "loss": 0.8484, "nll_loss": 0.7195987701416016, "rewards/accuracies": 0.625, "rewards/chosen": -0.06352973729372025, "rewards/margins": 0.13885316252708435, "rewards/rejected": -0.202382892370224, "step": 3615 }, { "epoch": 2.2058868384932135, "grad_norm": 1.1392453908920288, "learning_rate": 4.486466625842008e-06, "log_odds_chosen": 1.321172833442688, "log_odds_ratio": -0.2933143377304077, "logits/chosen": -0.9610804319381714, "logits/rejected": -0.9655401110649109, "logps/chosen": -0.5018186569213867, "logps/rejected": -1.1890567541122437, "loss": 0.9464, "nll_loss": 0.8612639307975769, "rewards/accuracies": 1.0, "rewards/chosen": -0.05018187314271927, "rewards/margins": 0.06872381269931793, "rewards/rejected": -0.1189056783914566, "step": 3616 }, { "epoch": 2.2064968735702304, "grad_norm": 1.5326086282730103, "learning_rate": 4.485486834047765e-06, "log_odds_chosen": 3.670013904571533, "log_odds_ratio": -0.15824279189109802, "logits/chosen": -0.840965211391449, "logits/rejected": -0.7990272045135498, "logps/chosen": -0.6170030236244202, "logps/rejected": -3.5966341495513916, "loss": 1.009, "nll_loss": 0.8719863295555115, "rewards/accuracies": 1.0, "rewards/chosen": -0.061700306832790375, "rewards/margins": 0.29796311259269714, "rewards/rejected": -0.3596634566783905, "step": 3617 }, { "epoch": 2.2071069086472472, "grad_norm": 1.4939061403274536, "learning_rate": 4.484507042253521e-06, "log_odds_chosen": 0.4407321810722351, "log_odds_ratio": -0.7846670150756836, "logits/chosen": -0.8549028038978577, "logits/rejected": -0.9773977994918823, "logps/chosen": -1.164876937866211, "logps/rejected": -1.5288687944412231, "loss": 1.2627, "nll_loss": 1.3290396928787231, "rewards/accuracies": 0.375, "rewards/chosen": -0.11648769676685333, "rewards/margins": 0.036399178206920624, "rewards/rejected": -0.15288688242435455, "step": 3618 }, { "epoch": 2.207716943724264, "grad_norm": 4.8830108642578125, "learning_rate": 4.483527250459277e-06, "log_odds_chosen": 2.7278149127960205, "log_odds_ratio": -0.22967711091041565, "logits/chosen": -0.542675256729126, "logits/rejected": -0.7496830821037292, "logps/chosen": -0.7290111780166626, "logps/rejected": -2.9054977893829346, "loss": 1.0306, "nll_loss": 0.7695097923278809, "rewards/accuracies": 0.875, "rewards/chosen": -0.07290112972259521, "rewards/margins": 0.2176486849784851, "rewards/rejected": -0.2905498147010803, "step": 3619 }, { "epoch": 2.208326978801281, "grad_norm": 1.9735180139541626, "learning_rate": 4.482547458665033e-06, "log_odds_chosen": 2.8704304695129395, "log_odds_ratio": -0.16954882442951202, "logits/chosen": -0.8104048371315002, "logits/rejected": -0.8576940298080444, "logps/chosen": -0.65889573097229, "logps/rejected": -2.801323890686035, "loss": 1.0955, "nll_loss": 0.7754921913146973, "rewards/accuracies": 0.875, "rewards/chosen": -0.06588957458734512, "rewards/margins": 0.21424280107021332, "rewards/rejected": -0.28013235330581665, "step": 3620 }, { "epoch": 2.208937013878298, "grad_norm": 1.6978024244308472, "learning_rate": 4.4815676668707895e-06, "log_odds_chosen": 0.2060796618461609, "log_odds_ratio": -0.6936774253845215, "logits/chosen": -0.8402299880981445, "logits/rejected": -0.8509794473648071, "logps/chosen": -0.7799228429794312, "logps/rejected": -0.913648247718811, "loss": 1.0096, "nll_loss": 0.9674701690673828, "rewards/accuracies": 0.375, "rewards/chosen": -0.07799229770898819, "rewards/margins": 0.013372535817325115, "rewards/rejected": -0.09136483073234558, "step": 3621 }, { "epoch": 2.209547048955315, "grad_norm": 1.5592482089996338, "learning_rate": 4.480587875076546e-06, "log_odds_chosen": 0.6855846047401428, "log_odds_ratio": -0.513511598110199, "logits/chosen": -0.4620532989501953, "logits/rejected": -0.5139445662498474, "logps/chosen": -0.6865431666374207, "logps/rejected": -1.0416979789733887, "loss": 1.0275, "nll_loss": 0.7722153663635254, "rewards/accuracies": 0.75, "rewards/chosen": -0.06865432858467102, "rewards/margins": 0.035515472292900085, "rewards/rejected": -0.10416979342699051, "step": 3622 }, { "epoch": 2.210157084032332, "grad_norm": 3.28816294670105, "learning_rate": 4.479608083282303e-06, "log_odds_chosen": 0.34057170152664185, "log_odds_ratio": -0.6862726211547852, "logits/chosen": -0.9841274619102478, "logits/rejected": -1.026142954826355, "logps/chosen": -0.7629203796386719, "logps/rejected": -0.9499984979629517, "loss": 1.1452, "nll_loss": 1.423534631729126, "rewards/accuracies": 0.625, "rewards/chosen": -0.07629203796386719, "rewards/margins": 0.018707802519202232, "rewards/rejected": -0.09499984234571457, "step": 3623 }, { "epoch": 2.2107671191093488, "grad_norm": 1.5348634719848633, "learning_rate": 4.478628291488059e-06, "log_odds_chosen": 1.0293622016906738, "log_odds_ratio": -0.5750959515571594, "logits/chosen": -0.8879637122154236, "logits/rejected": -0.8184065222740173, "logps/chosen": -0.8167032599449158, "logps/rejected": -1.3246709108352661, "loss": 1.2836, "nll_loss": 1.216996192932129, "rewards/accuracies": 0.625, "rewards/chosen": -0.08167032152414322, "rewards/margins": 0.050796762108802795, "rewards/rejected": -0.1324670910835266, "step": 3624 }, { "epoch": 2.2113771541863656, "grad_norm": 1.2838283777236938, "learning_rate": 4.477648499693815e-06, "log_odds_chosen": 0.5668579339981079, "log_odds_ratio": -0.6423830986022949, "logits/chosen": -0.9579189419746399, "logits/rejected": -0.9317425489425659, "logps/chosen": -0.8591387867927551, "logps/rejected": -1.379080057144165, "loss": 1.0831, "nll_loss": 1.1697415113449097, "rewards/accuracies": 0.375, "rewards/chosen": -0.08591388165950775, "rewards/margins": 0.051994141191244125, "rewards/rejected": -0.13790802657604218, "step": 3625 }, { "epoch": 2.2119871892633824, "grad_norm": 5.125545024871826, "learning_rate": 4.476668707899571e-06, "log_odds_chosen": 1.634615421295166, "log_odds_ratio": -0.7725652456283569, "logits/chosen": -0.9131223559379578, "logits/rejected": -1.0331153869628906, "logps/chosen": -1.2551078796386719, "logps/rejected": -2.418287515640259, "loss": 1.3282, "nll_loss": 1.0859028100967407, "rewards/accuracies": 0.75, "rewards/chosen": -0.1255107820034027, "rewards/margins": 0.11631796509027481, "rewards/rejected": -0.24182875454425812, "step": 3626 }, { "epoch": 2.2125972243403997, "grad_norm": 1.0168758630752563, "learning_rate": 4.475688916105327e-06, "log_odds_chosen": 2.7653799057006836, "log_odds_ratio": -0.35585811734199524, "logits/chosen": -0.5982940196990967, "logits/rejected": -0.7926957011222839, "logps/chosen": -0.6250382661819458, "logps/rejected": -2.7958590984344482, "loss": 0.9361, "nll_loss": 0.7719594240188599, "rewards/accuracies": 0.75, "rewards/chosen": -0.06250382959842682, "rewards/margins": 0.21708208322525024, "rewards/rejected": -0.27958592772483826, "step": 3627 }, { "epoch": 2.2132072594174166, "grad_norm": 1.7025887966156006, "learning_rate": 4.474709124311084e-06, "log_odds_chosen": 0.8439710736274719, "log_odds_ratio": -0.49303263425827026, "logits/chosen": -0.9609402418136597, "logits/rejected": -0.9559491872787476, "logps/chosen": -0.9224273562431335, "logps/rejected": -1.5323965549468994, "loss": 0.9897, "nll_loss": 1.0381882190704346, "rewards/accuracies": 0.75, "rewards/chosen": -0.09224274754524231, "rewards/margins": 0.06099691241979599, "rewards/rejected": -0.1532396674156189, "step": 3628 }, { "epoch": 2.2138172944944334, "grad_norm": 1.2219300270080566, "learning_rate": 4.47372933251684e-06, "log_odds_chosen": 1.6553006172180176, "log_odds_ratio": -0.41719168424606323, "logits/chosen": -0.7019492387771606, "logits/rejected": -0.6873200535774231, "logps/chosen": -0.7089115381240845, "logps/rejected": -1.7975449562072754, "loss": 0.9404, "nll_loss": 0.8671728372573853, "rewards/accuracies": 0.75, "rewards/chosen": -0.07089115679264069, "rewards/margins": 0.10886334627866745, "rewards/rejected": -0.17975449562072754, "step": 3629 }, { "epoch": 2.2144273295714503, "grad_norm": 2.2963171005249023, "learning_rate": 4.472749540722596e-06, "log_odds_chosen": 1.3355860710144043, "log_odds_ratio": -0.42116373777389526, "logits/chosen": -0.7387257814407349, "logits/rejected": -0.7904590368270874, "logps/chosen": -0.6670739650726318, "logps/rejected": -1.6307640075683594, "loss": 0.9013, "nll_loss": 0.7915517091751099, "rewards/accuracies": 0.75, "rewards/chosen": -0.06670740246772766, "rewards/margins": 0.09636899828910828, "rewards/rejected": -0.16307640075683594, "step": 3630 }, { "epoch": 2.215037364648467, "grad_norm": 4.3195881843566895, "learning_rate": 4.471769748928352e-06, "log_odds_chosen": 1.8596680164337158, "log_odds_ratio": -0.522706151008606, "logits/chosen": -0.8120279908180237, "logits/rejected": -0.9950725436210632, "logps/chosen": -0.834516167640686, "logps/rejected": -2.2548699378967285, "loss": 1.0892, "nll_loss": 1.0262632369995117, "rewards/accuracies": 0.75, "rewards/chosen": -0.08345162123441696, "rewards/margins": 0.14203539490699768, "rewards/rejected": -0.22548700869083405, "step": 3631 }, { "epoch": 2.2156473997254844, "grad_norm": 3.995307207107544, "learning_rate": 4.4707899571341085e-06, "log_odds_chosen": 0.8724214434623718, "log_odds_ratio": -0.5234646797180176, "logits/chosen": -0.9165623784065247, "logits/rejected": -0.8614815473556519, "logps/chosen": -0.8935644626617432, "logps/rejected": -1.5930112600326538, "loss": 1.0099, "nll_loss": 1.0434495210647583, "rewards/accuracies": 0.625, "rewards/chosen": -0.0893564522266388, "rewards/margins": 0.06994467973709106, "rewards/rejected": -0.15930113196372986, "step": 3632 }, { "epoch": 2.2162574348025013, "grad_norm": 3.8339269161224365, "learning_rate": 4.469810165339865e-06, "log_odds_chosen": 0.35238325595855713, "log_odds_ratio": -0.6847411394119263, "logits/chosen": -0.9498417973518372, "logits/rejected": -0.9245414733886719, "logps/chosen": -0.7851099967956543, "logps/rejected": -1.124670386314392, "loss": 1.0852, "nll_loss": 1.1977601051330566, "rewards/accuracies": 0.5, "rewards/chosen": -0.07851099967956543, "rewards/margins": 0.033956050872802734, "rewards/rejected": -0.11246705055236816, "step": 3633 }, { "epoch": 2.216867469879518, "grad_norm": 1.4841492176055908, "learning_rate": 4.468830373545622e-06, "log_odds_chosen": 1.507542610168457, "log_odds_ratio": -0.37123793363571167, "logits/chosen": -0.8278884291648865, "logits/rejected": -0.8318625688552856, "logps/chosen": -0.9571640491485596, "logps/rejected": -2.095550060272217, "loss": 1.1328, "nll_loss": 1.091375470161438, "rewards/accuracies": 1.0, "rewards/chosen": -0.09571640938520432, "rewards/margins": 0.11383859068155289, "rewards/rejected": -0.2095550149679184, "step": 3634 }, { "epoch": 2.217477504956535, "grad_norm": 1.353764533996582, "learning_rate": 4.467850581751378e-06, "log_odds_chosen": 1.252927303314209, "log_odds_ratio": -0.578714907169342, "logits/chosen": -1.0142252445220947, "logits/rejected": -1.0983778238296509, "logps/chosen": -0.9052004218101501, "logps/rejected": -1.872369408607483, "loss": 1.1249, "nll_loss": 1.086920976638794, "rewards/accuracies": 0.625, "rewards/chosen": -0.09052005410194397, "rewards/margins": 0.09671691060066223, "rewards/rejected": -0.1872369647026062, "step": 3635 }, { "epoch": 2.218087540033552, "grad_norm": 2.0578479766845703, "learning_rate": 4.466870789957134e-06, "log_odds_chosen": 2.784320116043091, "log_odds_ratio": -0.2220071256160736, "logits/chosen": -0.9996500611305237, "logits/rejected": -0.9579164981842041, "logps/chosen": -0.7076944708824158, "logps/rejected": -2.8793482780456543, "loss": 1.0616, "nll_loss": 0.9721775054931641, "rewards/accuracies": 0.875, "rewards/chosen": -0.07076944410800934, "rewards/margins": 0.21716538071632385, "rewards/rejected": -0.2879348397254944, "step": 3636 }, { "epoch": 2.2186975751105686, "grad_norm": 1.780898928642273, "learning_rate": 4.46589099816289e-06, "log_odds_chosen": 1.0119600296020508, "log_odds_ratio": -0.5590735673904419, "logits/chosen": -0.9112780094146729, "logits/rejected": -0.9079651832580566, "logps/chosen": -1.016160488128662, "logps/rejected": -1.7640687227249146, "loss": 1.1577, "nll_loss": 1.4209706783294678, "rewards/accuracies": 0.5, "rewards/chosen": -0.10161606222391129, "rewards/margins": 0.074790820479393, "rewards/rejected": -0.1764068752527237, "step": 3637 }, { "epoch": 2.219307610187586, "grad_norm": 1.761168122291565, "learning_rate": 4.464911206368646e-06, "log_odds_chosen": 2.7945570945739746, "log_odds_ratio": -0.4236215353012085, "logits/chosen": -0.6088904142379761, "logits/rejected": -0.7874215841293335, "logps/chosen": -0.7488322257995605, "logps/rejected": -3.105116844177246, "loss": 0.9732, "nll_loss": 1.0218952894210815, "rewards/accuracies": 0.75, "rewards/chosen": -0.07488323003053665, "rewards/margins": 0.23562848567962646, "rewards/rejected": -0.3105117082595825, "step": 3638 }, { "epoch": 2.219917645264603, "grad_norm": 6.554170608520508, "learning_rate": 4.463931414574403e-06, "log_odds_chosen": 2.3746345043182373, "log_odds_ratio": -0.2917974591255188, "logits/chosen": -0.8241680264472961, "logits/rejected": -0.9457731246948242, "logps/chosen": -0.827670693397522, "logps/rejected": -2.6703922748565674, "loss": 1.0747, "nll_loss": 1.0018665790557861, "rewards/accuracies": 0.875, "rewards/chosen": -0.0827670693397522, "rewards/margins": 0.1842721700668335, "rewards/rejected": -0.2670392394065857, "step": 3639 }, { "epoch": 2.2205276803416196, "grad_norm": 1.5185614824295044, "learning_rate": 4.462951622780159e-06, "log_odds_chosen": 0.030401602387428284, "log_odds_ratio": -0.9811215400695801, "logits/chosen": -0.9497411251068115, "logits/rejected": -0.8426184058189392, "logps/chosen": -1.2909833192825317, "logps/rejected": -1.4653139114379883, "loss": 1.2597, "nll_loss": 1.4662779569625854, "rewards/accuracies": 0.25, "rewards/chosen": -0.1290983259677887, "rewards/margins": 0.01743306592106819, "rewards/rejected": -0.14653140306472778, "step": 3640 }, { "epoch": 2.2211377154186365, "grad_norm": 1.1256128549575806, "learning_rate": 4.461971830985915e-06, "log_odds_chosen": 2.5669984817504883, "log_odds_ratio": -0.320464551448822, "logits/chosen": -0.7273551225662231, "logits/rejected": -0.8363503217697144, "logps/chosen": -0.7273396253585815, "logps/rejected": -2.8639931678771973, "loss": 0.9599, "nll_loss": 0.9095445871353149, "rewards/accuracies": 0.875, "rewards/chosen": -0.07273396849632263, "rewards/margins": 0.21366535127162933, "rewards/rejected": -0.28639930486679077, "step": 3641 }, { "epoch": 2.2217477504956533, "grad_norm": 2.244572639465332, "learning_rate": 4.460992039191672e-06, "log_odds_chosen": 2.2603211402893066, "log_odds_ratio": -0.24341288208961487, "logits/chosen": -0.6435690522193909, "logits/rejected": -0.7269929647445679, "logps/chosen": -0.7105200886726379, "logps/rejected": -2.4513866901397705, "loss": 1.0533, "nll_loss": 0.8449592590332031, "rewards/accuracies": 0.875, "rewards/chosen": -0.07105200737714767, "rewards/margins": 0.17408664524555206, "rewards/rejected": -0.24513867497444153, "step": 3642 }, { "epoch": 2.2223577855726706, "grad_norm": 2.2268576622009277, "learning_rate": 4.4600122473974276e-06, "log_odds_chosen": 2.091378927230835, "log_odds_ratio": -0.43304651975631714, "logits/chosen": -0.8376953601837158, "logits/rejected": -1.0341293811798096, "logps/chosen": -0.7548791170120239, "logps/rejected": -2.260450839996338, "loss": 1.0679, "nll_loss": 0.9545304775238037, "rewards/accuracies": 0.625, "rewards/chosen": -0.07548791915178299, "rewards/margins": 0.15055719017982483, "rewards/rejected": -0.22604511678218842, "step": 3643 }, { "epoch": 2.2229678206496875, "grad_norm": 3.809037208557129, "learning_rate": 4.459032455603184e-06, "log_odds_chosen": 2.5571506023406982, "log_odds_ratio": -0.3165692985057831, "logits/chosen": -1.0185388326644897, "logits/rejected": -1.0161492824554443, "logps/chosen": -0.7656204104423523, "logps/rejected": -2.8242807388305664, "loss": 1.2196, "nll_loss": 1.0650925636291504, "rewards/accuracies": 0.875, "rewards/chosen": -0.07656203955411911, "rewards/margins": 0.2058660238981247, "rewards/rejected": -0.2824280858039856, "step": 3644 }, { "epoch": 2.2235778557267043, "grad_norm": 1.6875975131988525, "learning_rate": 4.458052663808941e-06, "log_odds_chosen": 2.089240789413452, "log_odds_ratio": -0.35957199335098267, "logits/chosen": -0.8203725814819336, "logits/rejected": -0.9286679029464722, "logps/chosen": -0.7112411856651306, "logps/rejected": -2.341493606567383, "loss": 1.1078, "nll_loss": 0.9398941993713379, "rewards/accuracies": 0.75, "rewards/chosen": -0.0711241215467453, "rewards/margins": 0.16302524507045746, "rewards/rejected": -0.23414935171604156, "step": 3645 }, { "epoch": 2.224187890803721, "grad_norm": 1.447194218635559, "learning_rate": 4.457072872014697e-06, "log_odds_chosen": 0.6074912548065186, "log_odds_ratio": -0.7239988446235657, "logits/chosen": -1.0632396936416626, "logits/rejected": -0.9851881265640259, "logps/chosen": -1.17300283908844, "logps/rejected": -1.788891077041626, "loss": 1.0387, "nll_loss": 1.2709605693817139, "rewards/accuracies": 0.25, "rewards/chosen": -0.11730027943849564, "rewards/margins": 0.0615888312458992, "rewards/rejected": -0.17888911068439484, "step": 3646 }, { "epoch": 2.224797925880738, "grad_norm": 1.3058396577835083, "learning_rate": 4.456093080220453e-06, "log_odds_chosen": 0.7855527400970459, "log_odds_ratio": -0.6132136583328247, "logits/chosen": -0.7448775172233582, "logits/rejected": -0.8564314842224121, "logps/chosen": -0.734271764755249, "logps/rejected": -1.2444801330566406, "loss": 1.1903, "nll_loss": 1.0312894582748413, "rewards/accuracies": 0.5, "rewards/chosen": -0.07342717796564102, "rewards/margins": 0.051020845770835876, "rewards/rejected": -0.1244480162858963, "step": 3647 }, { "epoch": 2.225407960957755, "grad_norm": 2.0017731189727783, "learning_rate": 4.455113288426209e-06, "log_odds_chosen": 0.48700952529907227, "log_odds_ratio": -0.6777213215827942, "logits/chosen": -0.9274716377258301, "logits/rejected": -0.8604868054389954, "logps/chosen": -0.9036245346069336, "logps/rejected": -1.238013505935669, "loss": 1.0309, "nll_loss": 1.0233029127120972, "rewards/accuracies": 0.625, "rewards/chosen": -0.09036245197057724, "rewards/margins": 0.03343890234827995, "rewards/rejected": -0.1238013505935669, "step": 3648 }, { "epoch": 2.226017996034772, "grad_norm": 2.7570109367370605, "learning_rate": 4.454133496631965e-06, "log_odds_chosen": 1.7154369354248047, "log_odds_ratio": -0.45229125022888184, "logits/chosen": -0.7972027659416199, "logits/rejected": -0.7212133407592773, "logps/chosen": -0.9312183856964111, "logps/rejected": -2.2597625255584717, "loss": 1.1, "nll_loss": 0.9901058077812195, "rewards/accuracies": 0.5, "rewards/chosen": -0.09312184154987335, "rewards/margins": 0.1328544020652771, "rewards/rejected": -0.22597624361515045, "step": 3649 }, { "epoch": 2.226628031111789, "grad_norm": 1.1848416328430176, "learning_rate": 4.453153704837721e-06, "log_odds_chosen": 0.9910850524902344, "log_odds_ratio": -0.38584059476852417, "logits/chosen": -0.6269676089286804, "logits/rejected": -0.6724063158035278, "logps/chosen": -0.9691085815429688, "logps/rejected": -1.7422785758972168, "loss": 1.2256, "nll_loss": 1.252776861190796, "rewards/accuracies": 0.75, "rewards/chosen": -0.09691085666418076, "rewards/margins": 0.077317014336586, "rewards/rejected": -0.17422786355018616, "step": 3650 }, { "epoch": 2.227238066188806, "grad_norm": 10.175954818725586, "learning_rate": 4.452173913043478e-06, "log_odds_chosen": 2.2154171466827393, "log_odds_ratio": -0.29558396339416504, "logits/chosen": -0.5529034733772278, "logits/rejected": -0.712926983833313, "logps/chosen": -0.8166603446006775, "logps/rejected": -2.362093210220337, "loss": 1.1099, "nll_loss": 0.9072473645210266, "rewards/accuracies": 0.875, "rewards/chosen": -0.08166603744029999, "rewards/margins": 0.15454328060150146, "rewards/rejected": -0.23620931804180145, "step": 3651 }, { "epoch": 2.2278481012658227, "grad_norm": 2.0885674953460693, "learning_rate": 4.451194121249234e-06, "log_odds_chosen": 1.4844621419906616, "log_odds_ratio": -0.5202593803405762, "logits/chosen": -0.7299773693084717, "logits/rejected": -0.8434908390045166, "logps/chosen": -0.7129049301147461, "logps/rejected": -1.824326515197754, "loss": 1.2019, "nll_loss": 1.1164357662200928, "rewards/accuracies": 0.625, "rewards/chosen": -0.07129049301147461, "rewards/margins": 0.11114215850830078, "rewards/rejected": -0.18243266642093658, "step": 3652 }, { "epoch": 2.2284581363428395, "grad_norm": 1.3540900945663452, "learning_rate": 4.450214329454991e-06, "log_odds_chosen": 0.317064106464386, "log_odds_ratio": -0.6580198407173157, "logits/chosen": -0.7077003121376038, "logits/rejected": -0.6669837832450867, "logps/chosen": -0.7019420266151428, "logps/rejected": -0.8400650024414062, "loss": 1.0701, "nll_loss": 1.0201284885406494, "rewards/accuracies": 0.375, "rewards/chosen": -0.07019419968128204, "rewards/margins": 0.013812299817800522, "rewards/rejected": -0.08400650322437286, "step": 3653 }, { "epoch": 2.229068171419857, "grad_norm": 1.3018712997436523, "learning_rate": 4.449234537660747e-06, "log_odds_chosen": 0.6460211277008057, "log_odds_ratio": -0.5983914732933044, "logits/chosen": -0.9238421320915222, "logits/rejected": -0.7952417135238647, "logps/chosen": -0.9544610381126404, "logps/rejected": -1.4487184286117554, "loss": 1.1098, "nll_loss": 1.1503679752349854, "rewards/accuracies": 0.5, "rewards/chosen": -0.09544610977172852, "rewards/margins": 0.049425750970840454, "rewards/rejected": -0.14487186074256897, "step": 3654 }, { "epoch": 2.2296782064968736, "grad_norm": 2.2345550060272217, "learning_rate": 4.448254745866503e-06, "log_odds_chosen": 1.124933123588562, "log_odds_ratio": -0.4729061722755432, "logits/chosen": -0.7342696189880371, "logits/rejected": -0.6881695985794067, "logps/chosen": -0.8728382587432861, "logps/rejected": -1.689795732498169, "loss": 1.2023, "nll_loss": 1.1887567043304443, "rewards/accuracies": 0.625, "rewards/chosen": -0.08728383481502533, "rewards/margins": 0.08169575035572052, "rewards/rejected": -0.16897958517074585, "step": 3655 }, { "epoch": 2.2302882415738905, "grad_norm": 2.352786064147949, "learning_rate": 4.44727495407226e-06, "log_odds_chosen": 1.0835812091827393, "log_odds_ratio": -0.52813321352005, "logits/chosen": -0.7590537667274475, "logits/rejected": -0.7404001355171204, "logps/chosen": -0.8217451572418213, "logps/rejected": -1.6766283512115479, "loss": 1.0194, "nll_loss": 1.1719622611999512, "rewards/accuracies": 0.625, "rewards/chosen": -0.08217451721429825, "rewards/margins": 0.08548832684755325, "rewards/rejected": -0.1676628291606903, "step": 3656 }, { "epoch": 2.2308982766509073, "grad_norm": 1.3115208148956299, "learning_rate": 4.446295162278016e-06, "log_odds_chosen": 1.213385820388794, "log_odds_ratio": -0.46028032898902893, "logits/chosen": -0.8426735401153564, "logits/rejected": -0.8376807570457458, "logps/chosen": -0.6992343068122864, "logps/rejected": -1.5485050678253174, "loss": 0.9736, "nll_loss": 0.9341418743133545, "rewards/accuracies": 0.75, "rewards/chosen": -0.06992343068122864, "rewards/margins": 0.08492709696292877, "rewards/rejected": -0.1548505276441574, "step": 3657 }, { "epoch": 2.231508311727924, "grad_norm": 1.6628819704055786, "learning_rate": 4.445315370483772e-06, "log_odds_chosen": 2.181142807006836, "log_odds_ratio": -0.273511677980423, "logits/chosen": -0.39971989393234253, "logits/rejected": -0.5686305165290833, "logps/chosen": -0.5035005211830139, "logps/rejected": -2.0307488441467285, "loss": 0.7542, "nll_loss": 0.5935463905334473, "rewards/accuracies": 0.875, "rewards/chosen": -0.05035005509853363, "rewards/margins": 0.15272483229637146, "rewards/rejected": -0.2030748724937439, "step": 3658 }, { "epoch": 2.2321183468049415, "grad_norm": 1.1643227338790894, "learning_rate": 4.444335578689529e-06, "log_odds_chosen": 0.20137718319892883, "log_odds_ratio": -0.7357843518257141, "logits/chosen": -0.6803021430969238, "logits/rejected": -0.6845685243606567, "logps/chosen": -0.7966328859329224, "logps/rejected": -0.9148931503295898, "loss": 1.1209, "nll_loss": 0.8841718435287476, "rewards/accuracies": 0.625, "rewards/chosen": -0.07966329157352448, "rewards/margins": 0.01182602345943451, "rewards/rejected": -0.09148931503295898, "step": 3659 }, { "epoch": 2.2327283818819583, "grad_norm": 1.7493438720703125, "learning_rate": 4.443355786895284e-06, "log_odds_chosen": 2.0894274711608887, "log_odds_ratio": -0.25254660844802856, "logits/chosen": -0.8898342847824097, "logits/rejected": -0.811692476272583, "logps/chosen": -1.0246411561965942, "logps/rejected": -2.7068393230438232, "loss": 1.2723, "nll_loss": 1.2738823890686035, "rewards/accuracies": 1.0, "rewards/chosen": -0.10246411710977554, "rewards/margins": 0.16821981966495514, "rewards/rejected": -0.2706839442253113, "step": 3660 }, { "epoch": 2.233338416958975, "grad_norm": 1.4755513668060303, "learning_rate": 4.44237599510104e-06, "log_odds_chosen": 2.3213248252868652, "log_odds_ratio": -0.346135675907135, "logits/chosen": -0.7317697405815125, "logits/rejected": -0.8402038216590881, "logps/chosen": -0.6889616250991821, "logps/rejected": -2.448817253112793, "loss": 1.0205, "nll_loss": 0.9081838130950928, "rewards/accuracies": 0.875, "rewards/chosen": -0.06889616698026657, "rewards/margins": 0.17598557472229004, "rewards/rejected": -0.2448817491531372, "step": 3661 }, { "epoch": 2.233948452035992, "grad_norm": 1.6401270627975464, "learning_rate": 4.441396203306797e-06, "log_odds_chosen": 0.9893078207969666, "log_odds_ratio": -0.5375961065292358, "logits/chosen": -0.9100623726844788, "logits/rejected": -0.9014368057250977, "logps/chosen": -0.8113498687744141, "logps/rejected": -1.5517759323120117, "loss": 1.185, "nll_loss": 1.4238102436065674, "rewards/accuracies": 0.625, "rewards/chosen": -0.08113498985767365, "rewards/margins": 0.07404260337352753, "rewards/rejected": -0.15517759323120117, "step": 3662 }, { "epoch": 2.234558487113009, "grad_norm": 5.854127407073975, "learning_rate": 4.440416411512553e-06, "log_odds_chosen": 1.0311709642410278, "log_odds_ratio": -0.5213012099266052, "logits/chosen": -0.8464331030845642, "logits/rejected": -0.8644919395446777, "logps/chosen": -0.7294811010360718, "logps/rejected": -1.4230982065200806, "loss": 1.0047, "nll_loss": 0.8430011868476868, "rewards/accuracies": 0.625, "rewards/chosen": -0.07294810563325882, "rewards/margins": 0.06936170905828476, "rewards/rejected": -0.14230982959270477, "step": 3663 }, { "epoch": 2.235168522190026, "grad_norm": 1.198409914970398, "learning_rate": 4.43943661971831e-06, "log_odds_chosen": 2.7772860527038574, "log_odds_ratio": -0.2843320965766907, "logits/chosen": -0.774822473526001, "logits/rejected": -1.0416383743286133, "logps/chosen": -0.881908118724823, "logps/rejected": -3.184443235397339, "loss": 1.1396, "nll_loss": 1.1372654438018799, "rewards/accuracies": 0.75, "rewards/chosen": -0.08819081634283066, "rewards/margins": 0.23025351762771606, "rewards/rejected": -0.3184443414211273, "step": 3664 }, { "epoch": 2.235778557267043, "grad_norm": 2.8420703411102295, "learning_rate": 4.438456827924066e-06, "log_odds_chosen": 1.61692214012146, "log_odds_ratio": -0.558256983757019, "logits/chosen": -0.5322059392929077, "logits/rejected": -0.5546191334724426, "logps/chosen": -0.7852010130882263, "logps/rejected": -1.9785418510437012, "loss": 1.0358, "nll_loss": 0.8225569725036621, "rewards/accuracies": 0.5, "rewards/chosen": -0.07852010428905487, "rewards/margins": 0.11933407932519913, "rewards/rejected": -0.1978541910648346, "step": 3665 }, { "epoch": 2.23638859234406, "grad_norm": 1.5210731029510498, "learning_rate": 4.437477036129822e-06, "log_odds_chosen": 2.0571706295013428, "log_odds_ratio": -0.6091500520706177, "logits/chosen": -0.8167483806610107, "logits/rejected": -0.8435706496238708, "logps/chosen": -0.7673311233520508, "logps/rejected": -2.438335657119751, "loss": 1.055, "nll_loss": 1.1206042766571045, "rewards/accuracies": 0.5, "rewards/chosen": -0.07673311233520508, "rewards/margins": 0.1671004444360733, "rewards/rejected": -0.24383355677127838, "step": 3666 }, { "epoch": 2.2369986274210767, "grad_norm": 1.190500020980835, "learning_rate": 4.436497244335579e-06, "log_odds_chosen": 1.7466036081314087, "log_odds_ratio": -0.2795974314212799, "logits/chosen": -0.821578860282898, "logits/rejected": -0.9220658540725708, "logps/chosen": -0.7964269518852234, "logps/rejected": -2.034531593322754, "loss": 1.0501, "nll_loss": 0.9159253239631653, "rewards/accuracies": 1.0, "rewards/chosen": -0.07964269816875458, "rewards/margins": 0.12381047010421753, "rewards/rejected": -0.2034531533718109, "step": 3667 }, { "epoch": 2.2376086624980935, "grad_norm": 1.3852373361587524, "learning_rate": 4.435517452541335e-06, "log_odds_chosen": 1.7565228939056396, "log_odds_ratio": -0.4475376307964325, "logits/chosen": -0.6939830183982849, "logits/rejected": -0.7954548597335815, "logps/chosen": -0.8187403678894043, "logps/rejected": -2.0288686752319336, "loss": 1.0365, "nll_loss": 0.992929995059967, "rewards/accuracies": 0.875, "rewards/chosen": -0.08187403529882431, "rewards/margins": 0.1210128515958786, "rewards/rejected": -0.20288687944412231, "step": 3668 }, { "epoch": 2.2382186975751104, "grad_norm": 1.2507822513580322, "learning_rate": 4.434537660747091e-06, "log_odds_chosen": 1.3953663110733032, "log_odds_ratio": -0.5637639760971069, "logits/chosen": -0.8153934478759766, "logits/rejected": -0.7426707744598389, "logps/chosen": -0.9512243866920471, "logps/rejected": -2.075479507446289, "loss": 1.2185, "nll_loss": 1.1690782308578491, "rewards/accuracies": 0.5, "rewards/chosen": -0.09512244164943695, "rewards/margins": 0.11242550611495972, "rewards/rejected": -0.20754796266555786, "step": 3669 }, { "epoch": 2.2388287326521277, "grad_norm": 2.1668319702148438, "learning_rate": 4.433557868952848e-06, "log_odds_chosen": 2.7788004875183105, "log_odds_ratio": -0.38810989260673523, "logits/chosen": -0.8566645383834839, "logits/rejected": -1.0040541887283325, "logps/chosen": -0.9999309182167053, "logps/rejected": -3.1389060020446777, "loss": 1.1374, "nll_loss": 1.3194918632507324, "rewards/accuracies": 0.75, "rewards/chosen": -0.09999309480190277, "rewards/margins": 0.21389751136302948, "rewards/rejected": -0.31389060616493225, "step": 3670 }, { "epoch": 2.2394387677291445, "grad_norm": 8.723959922790527, "learning_rate": 4.432578077158603e-06, "log_odds_chosen": 0.4252946972846985, "log_odds_ratio": -0.6514432430267334, "logits/chosen": -0.8366045355796814, "logits/rejected": -0.8131610155105591, "logps/chosen": -0.878344714641571, "logps/rejected": -1.2041544914245605, "loss": 1.0476, "nll_loss": 0.9314035177230835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08783447742462158, "rewards/margins": 0.03258097171783447, "rewards/rejected": -0.12041543424129486, "step": 3671 }, { "epoch": 2.2400488028061614, "grad_norm": 4.128843784332275, "learning_rate": 4.431598285364359e-06, "log_odds_chosen": 1.2729952335357666, "log_odds_ratio": -0.4299512803554535, "logits/chosen": -0.7716963887214661, "logits/rejected": -0.8665793538093567, "logps/chosen": -0.8068802356719971, "logps/rejected": -1.6880033016204834, "loss": 0.9429, "nll_loss": 0.9083400964736938, "rewards/accuracies": 0.75, "rewards/chosen": -0.08068802952766418, "rewards/margins": 0.08811230212450027, "rewards/rejected": -0.16880033910274506, "step": 3672 }, { "epoch": 2.240658837883178, "grad_norm": 2.8822412490844727, "learning_rate": 4.430618493570116e-06, "log_odds_chosen": 0.8673656582832336, "log_odds_ratio": -0.5015956163406372, "logits/chosen": -0.8095037341117859, "logits/rejected": -0.8494553565979004, "logps/chosen": -0.7495286464691162, "logps/rejected": -1.2754234075546265, "loss": 1.2057, "nll_loss": 0.9658111333847046, "rewards/accuracies": 0.75, "rewards/chosen": -0.0749528557062149, "rewards/margins": 0.052589479833841324, "rewards/rejected": -0.12754234671592712, "step": 3673 }, { "epoch": 2.241268872960195, "grad_norm": 2.12506103515625, "learning_rate": 4.4296387017758724e-06, "log_odds_chosen": 1.6061375141143799, "log_odds_ratio": -0.2777023911476135, "logits/chosen": -0.592521071434021, "logits/rejected": -0.6849977374076843, "logps/chosen": -0.6359518766403198, "logps/rejected": -1.6183021068572998, "loss": 0.9628, "nll_loss": 0.6405145525932312, "rewards/accuracies": 0.875, "rewards/chosen": -0.06359519064426422, "rewards/margins": 0.09823501110076904, "rewards/rejected": -0.16183021664619446, "step": 3674 }, { "epoch": 2.2418789080372123, "grad_norm": 2.020704507827759, "learning_rate": 4.4286589099816286e-06, "log_odds_chosen": 1.5236737728118896, "log_odds_ratio": -0.3845515251159668, "logits/chosen": -0.8919179439544678, "logits/rejected": -0.9549755454063416, "logps/chosen": -0.7773796916007996, "logps/rejected": -1.794252872467041, "loss": 1.1898, "nll_loss": 1.2228659391403198, "rewards/accuracies": 0.75, "rewards/chosen": -0.0777379721403122, "rewards/margins": 0.10168731212615967, "rewards/rejected": -0.17942528426647186, "step": 3675 }, { "epoch": 2.242488943114229, "grad_norm": 1.319076657295227, "learning_rate": 4.4276791181873855e-06, "log_odds_chosen": 1.6192456483840942, "log_odds_ratio": -0.35675203800201416, "logits/chosen": -0.9683201909065247, "logits/rejected": -0.8963865637779236, "logps/chosen": -0.7859050035476685, "logps/rejected": -2.0195059776306152, "loss": 1.002, "nll_loss": 0.9883821606636047, "rewards/accuracies": 0.875, "rewards/chosen": -0.0785904973745346, "rewards/margins": 0.12336012721061707, "rewards/rejected": -0.20195060968399048, "step": 3676 }, { "epoch": 2.243098978191246, "grad_norm": 1.4718996286392212, "learning_rate": 4.426699326393141e-06, "log_odds_chosen": 1.3664448261260986, "log_odds_ratio": -0.3960570693016052, "logits/chosen": -0.8407243490219116, "logits/rejected": -0.926510751247406, "logps/chosen": -0.7465761303901672, "logps/rejected": -1.7253373861312866, "loss": 1.082, "nll_loss": 1.043776273727417, "rewards/accuracies": 0.75, "rewards/chosen": -0.0746576189994812, "rewards/margins": 0.09787611663341522, "rewards/rejected": -0.17253375053405762, "step": 3677 }, { "epoch": 2.243709013268263, "grad_norm": 1.2572742700576782, "learning_rate": 4.425719534598897e-06, "log_odds_chosen": 0.903182327747345, "log_odds_ratio": -0.4802512526512146, "logits/chosen": -0.8943805694580078, "logits/rejected": -0.8251007795333862, "logps/chosen": -0.8851925730705261, "logps/rejected": -1.4896554946899414, "loss": 1.0021, "nll_loss": 1.0241392850875854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08851926028728485, "rewards/margins": 0.06044628471136093, "rewards/rejected": -0.14896553754806519, "step": 3678 }, { "epoch": 2.2443190483452797, "grad_norm": 1.0079941749572754, "learning_rate": 4.424739742804654e-06, "log_odds_chosen": 0.5881235599517822, "log_odds_ratio": -0.8567600846290588, "logits/chosen": -0.8299847841262817, "logits/rejected": -0.8316963911056519, "logps/chosen": -1.0347192287445068, "logps/rejected": -1.5881311893463135, "loss": 1.0616, "nll_loss": 1.1722005605697632, "rewards/accuracies": 0.5, "rewards/chosen": -0.10347191989421844, "rewards/margins": 0.05534118786454201, "rewards/rejected": -0.15881311893463135, "step": 3679 }, { "epoch": 2.2449290834222966, "grad_norm": 1.4825290441513062, "learning_rate": 4.42375995101041e-06, "log_odds_chosen": 1.8699779510498047, "log_odds_ratio": -0.38879144191741943, "logits/chosen": -0.7407248020172119, "logits/rejected": -0.8330116868019104, "logps/chosen": -0.7343224287033081, "logps/rejected": -2.1904449462890625, "loss": 1.098, "nll_loss": 1.0603413581848145, "rewards/accuracies": 0.875, "rewards/chosen": -0.07343223690986633, "rewards/margins": 0.14561226963996887, "rewards/rejected": -0.2190445065498352, "step": 3680 }, { "epoch": 2.245539118499314, "grad_norm": 1.433786153793335, "learning_rate": 4.422780159216167e-06, "log_odds_chosen": 1.4171106815338135, "log_odds_ratio": -0.48779767751693726, "logits/chosen": -0.9422720670700073, "logits/rejected": -0.8517917394638062, "logps/chosen": -0.8121074438095093, "logps/rejected": -1.8428549766540527, "loss": 1.0056, "nll_loss": 1.0474529266357422, "rewards/accuracies": 0.625, "rewards/chosen": -0.08121074736118317, "rewards/margins": 0.10307474434375763, "rewards/rejected": -0.1842854917049408, "step": 3681 }, { "epoch": 2.2461491535763307, "grad_norm": 2.435925006866455, "learning_rate": 4.421800367421923e-06, "log_odds_chosen": 1.5441555976867676, "log_odds_ratio": -0.4680396616458893, "logits/chosen": -0.7260335087776184, "logits/rejected": -0.7147573828697205, "logps/chosen": -0.7347360253334045, "logps/rejected": -1.7855249643325806, "loss": 0.9169, "nll_loss": 0.9065122604370117, "rewards/accuracies": 0.625, "rewards/chosen": -0.07347360253334045, "rewards/margins": 0.10507889091968536, "rewards/rejected": -0.178552508354187, "step": 3682 }, { "epoch": 2.2467591886533476, "grad_norm": 1.278171181678772, "learning_rate": 4.420820575627678e-06, "log_odds_chosen": 0.8571873307228088, "log_odds_ratio": -0.5970165729522705, "logits/chosen": -0.8411440849304199, "logits/rejected": -0.847093403339386, "logps/chosen": -0.83689945936203, "logps/rejected": -1.4919817447662354, "loss": 1.0208, "nll_loss": 0.9343882203102112, "rewards/accuracies": 0.5, "rewards/chosen": -0.08368995040655136, "rewards/margins": 0.06550822407007217, "rewards/rejected": -0.14919817447662354, "step": 3683 }, { "epoch": 2.2473692237303644, "grad_norm": 3.109191656112671, "learning_rate": 4.419840783833435e-06, "log_odds_chosen": 2.8412671089172363, "log_odds_ratio": -0.3888739347457886, "logits/chosen": -0.8684310913085938, "logits/rejected": -1.0216081142425537, "logps/chosen": -0.7003990411758423, "logps/rejected": -3.1289334297180176, "loss": 1.1064, "nll_loss": 1.0335729122161865, "rewards/accuracies": 0.75, "rewards/chosen": -0.07003989815711975, "rewards/margins": 0.24285344779491425, "rewards/rejected": -0.3128933310508728, "step": 3684 }, { "epoch": 2.2479792588073813, "grad_norm": 3.611781358718872, "learning_rate": 4.4188609920391915e-06, "log_odds_chosen": 1.2560291290283203, "log_odds_ratio": -0.390347421169281, "logits/chosen": -0.8833474516868591, "logits/rejected": -0.9876581430435181, "logps/chosen": -0.7599651217460632, "logps/rejected": -1.6503081321716309, "loss": 1.0195, "nll_loss": 0.9649044275283813, "rewards/accuracies": 0.875, "rewards/chosen": -0.0759965181350708, "rewards/margins": 0.089034304022789, "rewards/rejected": -0.1650308221578598, "step": 3685 }, { "epoch": 2.2485892938843985, "grad_norm": 2.336167573928833, "learning_rate": 4.417881200244948e-06, "log_odds_chosen": 3.3251373767852783, "log_odds_ratio": -0.24323789775371552, "logits/chosen": -0.987727165222168, "logits/rejected": -1.0913071632385254, "logps/chosen": -0.7275193333625793, "logps/rejected": -3.4470958709716797, "loss": 1.0129, "nll_loss": 1.077406883239746, "rewards/accuracies": 0.875, "rewards/chosen": -0.07275193184614182, "rewards/margins": 0.2719576954841614, "rewards/rejected": -0.3447096347808838, "step": 3686 }, { "epoch": 2.2491993289614154, "grad_norm": 3.1425840854644775, "learning_rate": 4.4169014084507046e-06, "log_odds_chosen": 2.017396926879883, "log_odds_ratio": -0.6113195419311523, "logits/chosen": -0.8585768938064575, "logits/rejected": -0.8504894971847534, "logps/chosen": -0.9379143118858337, "logps/rejected": -2.7177019119262695, "loss": 1.1426, "nll_loss": 1.1575794219970703, "rewards/accuracies": 0.625, "rewards/chosen": -0.0937914326786995, "rewards/margins": 0.1779787689447403, "rewards/rejected": -0.2717702388763428, "step": 3687 }, { "epoch": 2.2498093640384322, "grad_norm": 1.9154090881347656, "learning_rate": 4.41592161665646e-06, "log_odds_chosen": 0.8691943883895874, "log_odds_ratio": -0.5824052691459656, "logits/chosen": -0.9065130352973938, "logits/rejected": -0.9874013662338257, "logps/chosen": -0.9518378973007202, "logps/rejected": -1.5332518815994263, "loss": 0.95, "nll_loss": 1.1739808320999146, "rewards/accuracies": 0.75, "rewards/chosen": -0.09518378973007202, "rewards/margins": 0.05814139172434807, "rewards/rejected": -0.1533251851797104, "step": 3688 }, { "epoch": 2.250419399115449, "grad_norm": 1.8054903745651245, "learning_rate": 4.414941824862216e-06, "log_odds_chosen": 1.6273586750030518, "log_odds_ratio": -0.3987812101840973, "logits/chosen": -0.8446712493896484, "logits/rejected": -0.987667441368103, "logps/chosen": -0.6648598909378052, "logps/rejected": -1.8460133075714111, "loss": 1.0081, "nll_loss": 0.872954249382019, "rewards/accuracies": 0.625, "rewards/chosen": -0.06648599356412888, "rewards/margins": 0.11811535060405731, "rewards/rejected": -0.18460135161876678, "step": 3689 }, { "epoch": 2.251029434192466, "grad_norm": 2.472923994064331, "learning_rate": 4.413962033067973e-06, "log_odds_chosen": 0.2400076985359192, "log_odds_ratio": -0.6614056825637817, "logits/chosen": -0.8445185422897339, "logits/rejected": -0.8646679520606995, "logps/chosen": -0.9009891748428345, "logps/rejected": -1.0467355251312256, "loss": 1.2265, "nll_loss": 1.127732515335083, "rewards/accuracies": 0.625, "rewards/chosen": -0.09009892493486404, "rewards/margins": 0.014574636705219746, "rewards/rejected": -0.10467355698347092, "step": 3690 }, { "epoch": 2.2516394692694828, "grad_norm": 1.264128565788269, "learning_rate": 4.412982241273729e-06, "log_odds_chosen": 1.9906611442565918, "log_odds_ratio": -0.37962374091148376, "logits/chosen": -0.8585606813430786, "logits/rejected": -0.8916982412338257, "logps/chosen": -0.7125940322875977, "logps/rejected": -2.155705690383911, "loss": 1.0476, "nll_loss": 0.8805764317512512, "rewards/accuracies": 0.75, "rewards/chosen": -0.07125940918922424, "rewards/margins": 0.14431115984916687, "rewards/rejected": -0.2155705690383911, "step": 3691 }, { "epoch": 2.2522495043465, "grad_norm": 1.9755804538726807, "learning_rate": 4.412002449479486e-06, "log_odds_chosen": 2.5692434310913086, "log_odds_ratio": -0.3441678583621979, "logits/chosen": -0.6391957402229309, "logits/rejected": -0.7527121305465698, "logps/chosen": -0.481869637966156, "logps/rejected": -2.4261674880981445, "loss": 0.9699, "nll_loss": 0.8648095726966858, "rewards/accuracies": 0.75, "rewards/chosen": -0.04818696528673172, "rewards/margins": 0.19442981481552124, "rewards/rejected": -0.24261675775051117, "step": 3692 }, { "epoch": 2.252859539423517, "grad_norm": 1.6460448503494263, "learning_rate": 4.411022657685242e-06, "log_odds_chosen": 1.8250237703323364, "log_odds_ratio": -0.5300679802894592, "logits/chosen": -1.0818569660186768, "logits/rejected": -1.0053772926330566, "logps/chosen": -0.8159835934638977, "logps/rejected": -2.4394187927246094, "loss": 1.1802, "nll_loss": 1.266209363937378, "rewards/accuracies": 0.75, "rewards/chosen": -0.08159835636615753, "rewards/margins": 0.16234351694583893, "rewards/rejected": -0.24394187331199646, "step": 3693 }, { "epoch": 2.2534695745005338, "grad_norm": 1.703680157661438, "learning_rate": 4.4100428658909974e-06, "log_odds_chosen": 1.1480004787445068, "log_odds_ratio": -0.5753116011619568, "logits/chosen": -0.8819245100021362, "logits/rejected": -0.9101800918579102, "logps/chosen": -0.912768542766571, "logps/rejected": -1.6927632093429565, "loss": 0.9884, "nll_loss": 1.1606804132461548, "rewards/accuracies": 0.5, "rewards/chosen": -0.0912768542766571, "rewards/margins": 0.07799945771694183, "rewards/rejected": -0.16927632689476013, "step": 3694 }, { "epoch": 2.2540796095775506, "grad_norm": 0.974498987197876, "learning_rate": 4.409063074096754e-06, "log_odds_chosen": 1.4460428953170776, "log_odds_ratio": -0.5857232809066772, "logits/chosen": -0.8822861909866333, "logits/rejected": -0.9634264707565308, "logps/chosen": -0.6263403296470642, "logps/rejected": -1.6366513967514038, "loss": 0.9905, "nll_loss": 1.120901346206665, "rewards/accuracies": 0.625, "rewards/chosen": -0.06263403594493866, "rewards/margins": 0.1010311171412468, "rewards/rejected": -0.16366514563560486, "step": 3695 }, { "epoch": 2.2546896446545674, "grad_norm": 1.8764909505844116, "learning_rate": 4.4080832823025105e-06, "log_odds_chosen": 0.781329333782196, "log_odds_ratio": -0.45805731415748596, "logits/chosen": -0.7168333530426025, "logits/rejected": -0.7343552112579346, "logps/chosen": -0.7832685112953186, "logps/rejected": -1.2288401126861572, "loss": 1.1137, "nll_loss": 1.0221134424209595, "rewards/accuracies": 0.875, "rewards/chosen": -0.07832685112953186, "rewards/margins": 0.04455716162919998, "rewards/rejected": -0.12288402020931244, "step": 3696 }, { "epoch": 2.2552996797315847, "grad_norm": 1.3283498287200928, "learning_rate": 4.407103490508267e-06, "log_odds_chosen": 2.9248266220092773, "log_odds_ratio": -0.34967049956321716, "logits/chosen": -0.546234130859375, "logits/rejected": -0.7213085293769836, "logps/chosen": -0.44601818919181824, "logps/rejected": -2.614644765853882, "loss": 0.9199, "nll_loss": 0.6687975525856018, "rewards/accuracies": 0.75, "rewards/chosen": -0.044601816684007645, "rewards/margins": 0.21686264872550964, "rewards/rejected": -0.2614644467830658, "step": 3697 }, { "epoch": 2.2559097148086016, "grad_norm": 1.2988574504852295, "learning_rate": 4.406123698714024e-06, "log_odds_chosen": 1.125391960144043, "log_odds_ratio": -0.48038217425346375, "logits/chosen": -0.7962017059326172, "logits/rejected": -0.9166330099105835, "logps/chosen": -1.010825276374817, "logps/rejected": -1.898496150970459, "loss": 1.0524, "nll_loss": 1.1465809345245361, "rewards/accuracies": 0.625, "rewards/chosen": -0.10108252614736557, "rewards/margins": 0.08876708894968033, "rewards/rejected": -0.1898496150970459, "step": 3698 }, { "epoch": 2.2565197498856184, "grad_norm": 1.7018357515335083, "learning_rate": 4.40514390691978e-06, "log_odds_chosen": 0.9253621101379395, "log_odds_ratio": -0.4545353949069977, "logits/chosen": -0.7534382343292236, "logits/rejected": -0.796380877494812, "logps/chosen": -0.7905114889144897, "logps/rejected": -1.3794875144958496, "loss": 1.1339, "nll_loss": 0.9976596832275391, "rewards/accuracies": 0.75, "rewards/chosen": -0.07905115187168121, "rewards/margins": 0.05889759212732315, "rewards/rejected": -0.13794875144958496, "step": 3699 }, { "epoch": 2.2571297849626353, "grad_norm": 1.825243592262268, "learning_rate": 4.404164115125535e-06, "log_odds_chosen": 0.7246125340461731, "log_odds_ratio": -0.5234280824661255, "logits/chosen": -0.9746597409248352, "logits/rejected": -0.8765170574188232, "logps/chosen": -0.9624531269073486, "logps/rejected": -1.4553377628326416, "loss": 1.1672, "nll_loss": 1.1636359691619873, "rewards/accuracies": 0.75, "rewards/chosen": -0.09624531865119934, "rewards/margins": 0.04928847402334213, "rewards/rejected": -0.14553378522396088, "step": 3700 }, { "epoch": 2.257739820039652, "grad_norm": 1.5717897415161133, "learning_rate": 4.403184323331292e-06, "log_odds_chosen": 1.5185017585754395, "log_odds_ratio": -0.4450508952140808, "logits/chosen": -0.6819480061531067, "logits/rejected": -0.7774963974952698, "logps/chosen": -0.7755427360534668, "logps/rejected": -1.9299014806747437, "loss": 1.0721, "nll_loss": 0.9278290867805481, "rewards/accuracies": 0.75, "rewards/chosen": -0.07755427062511444, "rewards/margins": 0.1154358759522438, "rewards/rejected": -0.19299015402793884, "step": 3701 }, { "epoch": 2.258349855116669, "grad_norm": 1.4222602844238281, "learning_rate": 4.402204531537048e-06, "log_odds_chosen": 1.2703778743743896, "log_odds_ratio": -0.4543271064758301, "logits/chosen": -0.9576435685157776, "logits/rejected": -0.9032017588615417, "logps/chosen": -0.814435601234436, "logps/rejected": -1.7928879261016846, "loss": 0.9427, "nll_loss": 1.0615239143371582, "rewards/accuracies": 0.875, "rewards/chosen": -0.08144355565309525, "rewards/margins": 0.09784524142742157, "rewards/rejected": -0.1792888045310974, "step": 3702 }, { "epoch": 2.2589598901936863, "grad_norm": 2.3834388256073, "learning_rate": 4.401224739742804e-06, "log_odds_chosen": 1.7530320882797241, "log_odds_ratio": -0.3682537078857422, "logits/chosen": -0.8693124055862427, "logits/rejected": -0.764482855796814, "logps/chosen": -0.9975926876068115, "logps/rejected": -2.4275074005126953, "loss": 1.0638, "nll_loss": 1.1063406467437744, "rewards/accuracies": 0.75, "rewards/chosen": -0.09975926578044891, "rewards/margins": 0.14299146831035614, "rewards/rejected": -0.24275073409080505, "step": 3703 }, { "epoch": 2.259569925270703, "grad_norm": 1.458580493927002, "learning_rate": 4.400244947948561e-06, "log_odds_chosen": 2.7702507972717285, "log_odds_ratio": -0.3112994134426117, "logits/chosen": -0.7814972996711731, "logits/rejected": -0.9230218529701233, "logps/chosen": -0.5744720697402954, "logps/rejected": -2.5766940116882324, "loss": 1.0283, "nll_loss": 0.7135895490646362, "rewards/accuracies": 0.75, "rewards/chosen": -0.05744720622897148, "rewards/margins": 0.2002221941947937, "rewards/rejected": -0.2576693892478943, "step": 3704 }, { "epoch": 2.26017996034772, "grad_norm": 4.216365814208984, "learning_rate": 4.3992651561543165e-06, "log_odds_chosen": 1.4671295881271362, "log_odds_ratio": -0.45805904269218445, "logits/chosen": -0.7678511142730713, "logits/rejected": -0.7162355184555054, "logps/chosen": -0.8226409554481506, "logps/rejected": -1.9431737661361694, "loss": 0.9228, "nll_loss": 0.942594051361084, "rewards/accuracies": 0.75, "rewards/chosen": -0.08226409554481506, "rewards/margins": 0.1120532900094986, "rewards/rejected": -0.19431738555431366, "step": 3705 }, { "epoch": 2.260789995424737, "grad_norm": 1.3225038051605225, "learning_rate": 4.3982853643600734e-06, "log_odds_chosen": 3.143564462661743, "log_odds_ratio": -0.16987675428390503, "logits/chosen": -0.9623255133628845, "logits/rejected": -1.0430036783218384, "logps/chosen": -0.5293115377426147, "logps/rejected": -2.9523191452026367, "loss": 1.0652, "nll_loss": 1.049861192703247, "rewards/accuracies": 1.0, "rewards/chosen": -0.052931152284145355, "rewards/margins": 0.24230076372623444, "rewards/rejected": -0.2952319383621216, "step": 3706 }, { "epoch": 2.261400030501754, "grad_norm": 1.2746291160583496, "learning_rate": 4.3973055725658296e-06, "log_odds_chosen": 1.7505297660827637, "log_odds_ratio": -0.3959841728210449, "logits/chosen": -0.819804310798645, "logits/rejected": -0.8263553380966187, "logps/chosen": -0.8014824986457825, "logps/rejected": -2.1057558059692383, "loss": 1.1339, "nll_loss": 1.0116461515426636, "rewards/accuracies": 0.75, "rewards/chosen": -0.08014824986457825, "rewards/margins": 0.13042734563350677, "rewards/rejected": -0.21057559549808502, "step": 3707 }, { "epoch": 2.262010065578771, "grad_norm": 4.798282146453857, "learning_rate": 4.396325780771586e-06, "log_odds_chosen": 2.3997113704681396, "log_odds_ratio": -0.3429030179977417, "logits/chosen": -0.7028525471687317, "logits/rejected": -0.8854658603668213, "logps/chosen": -0.657450795173645, "logps/rejected": -2.500652313232422, "loss": 1.0199, "nll_loss": 0.7837889790534973, "rewards/accuracies": 0.875, "rewards/chosen": -0.06574508547782898, "rewards/margins": 0.1843201369047165, "rewards/rejected": -0.25006523728370667, "step": 3708 }, { "epoch": 2.262620100655788, "grad_norm": 1.3431289196014404, "learning_rate": 4.395345988977343e-06, "log_odds_chosen": 2.6461122035980225, "log_odds_ratio": -0.4247104525566101, "logits/chosen": -0.6863726377487183, "logits/rejected": -0.7279415726661682, "logps/chosen": -0.6552637219429016, "logps/rejected": -2.7004690170288086, "loss": 1.048, "nll_loss": 0.8847620487213135, "rewards/accuracies": 0.75, "rewards/chosen": -0.06552636623382568, "rewards/margins": 0.20452052354812622, "rewards/rejected": -0.2700468897819519, "step": 3709 }, { "epoch": 2.2632301357328046, "grad_norm": 4.774994850158691, "learning_rate": 4.394366197183099e-06, "log_odds_chosen": 0.3138084411621094, "log_odds_ratio": -0.743620753288269, "logits/chosen": -0.6764736175537109, "logits/rejected": -0.6831073760986328, "logps/chosen": -0.860261082649231, "logps/rejected": -1.177202820777893, "loss": 0.9945, "nll_loss": 0.9335365295410156, "rewards/accuracies": 0.5, "rewards/chosen": -0.08602610230445862, "rewards/margins": 0.031694184988737106, "rewards/rejected": -0.11772028356790543, "step": 3710 }, { "epoch": 2.2638401708098215, "grad_norm": 1.4511500597000122, "learning_rate": 4.393386405388854e-06, "log_odds_chosen": 2.503082513809204, "log_odds_ratio": -0.41301101446151733, "logits/chosen": -0.8774224519729614, "logits/rejected": -0.9921035766601562, "logps/chosen": -0.8111387491226196, "logps/rejected": -2.914813756942749, "loss": 0.9524, "nll_loss": 0.950342059135437, "rewards/accuracies": 0.875, "rewards/chosen": -0.08111388236284256, "rewards/margins": 0.21036747097969055, "rewards/rejected": -0.2914813756942749, "step": 3711 }, { "epoch": 2.2644502058868383, "grad_norm": 1.4227674007415771, "learning_rate": 4.392406613594611e-06, "log_odds_chosen": 1.2369736433029175, "log_odds_ratio": -0.39977937936782837, "logits/chosen": -0.7095423936843872, "logits/rejected": -0.8241422772407532, "logps/chosen": -0.9626367092132568, "logps/rejected": -1.9443261623382568, "loss": 1.0913, "nll_loss": 1.0184261798858643, "rewards/accuracies": 0.875, "rewards/chosen": -0.09626366943120956, "rewards/margins": 0.09816896170377731, "rewards/rejected": -0.19443263113498688, "step": 3712 }, { "epoch": 2.2650602409638556, "grad_norm": 6.162407875061035, "learning_rate": 4.391426821800367e-06, "log_odds_chosen": 3.981684923171997, "log_odds_ratio": -0.2472333014011383, "logits/chosen": -0.7474602460861206, "logits/rejected": -0.7093945145606995, "logps/chosen": -0.5914533138275146, "logps/rejected": -3.6038320064544678, "loss": 1.0397, "nll_loss": 0.83526211977005, "rewards/accuracies": 0.75, "rewards/chosen": -0.059145331382751465, "rewards/margins": 0.30123788118362427, "rewards/rejected": -0.36038321256637573, "step": 3713 }, { "epoch": 2.2656702760408725, "grad_norm": 1.1827518939971924, "learning_rate": 4.390447030006123e-06, "log_odds_chosen": 1.04658043384552, "log_odds_ratio": -0.47317835688591003, "logits/chosen": -0.777808666229248, "logits/rejected": -0.7886457443237305, "logps/chosen": -0.719168484210968, "logps/rejected": -1.4658806324005127, "loss": 1.0191, "nll_loss": 0.9973258376121521, "rewards/accuracies": 0.75, "rewards/chosen": -0.0719168484210968, "rewards/margins": 0.07467120885848999, "rewards/rejected": -0.1465880572795868, "step": 3714 }, { "epoch": 2.2662803111178893, "grad_norm": 2.3564789295196533, "learning_rate": 4.38946723821188e-06, "log_odds_chosen": 0.5845195055007935, "log_odds_ratio": -0.5084731578826904, "logits/chosen": -0.8754169344902039, "logits/rejected": -0.9222370386123657, "logps/chosen": -0.8674908876419067, "logps/rejected": -1.248355507850647, "loss": 1.0452, "nll_loss": 1.0156867504119873, "rewards/accuracies": 0.75, "rewards/chosen": -0.08674909919500351, "rewards/margins": 0.038086455315351486, "rewards/rejected": -0.1248355507850647, "step": 3715 }, { "epoch": 2.266890346194906, "grad_norm": 1.4600365161895752, "learning_rate": 4.388487446417636e-06, "log_odds_chosen": 0.2913641929626465, "log_odds_ratio": -0.6350716948509216, "logits/chosen": -0.7456468939781189, "logits/rejected": -0.8441720008850098, "logps/chosen": -0.8902613520622253, "logps/rejected": -1.080402135848999, "loss": 1.0964, "nll_loss": 0.9657348990440369, "rewards/accuracies": 0.625, "rewards/chosen": -0.08902613818645477, "rewards/margins": 0.019014079123735428, "rewards/rejected": -0.1080402135848999, "step": 3716 }, { "epoch": 2.267500381271923, "grad_norm": 1.9814214706420898, "learning_rate": 4.387507654623392e-06, "log_odds_chosen": 1.6728649139404297, "log_odds_ratio": -0.32903167605400085, "logits/chosen": -0.6816893815994263, "logits/rejected": -0.7611936926841736, "logps/chosen": -0.517220139503479, "logps/rejected": -1.6284029483795166, "loss": 0.9731, "nll_loss": 0.8227657079696655, "rewards/accuracies": 0.875, "rewards/chosen": -0.05172201246023178, "rewards/margins": 0.11111827939748764, "rewards/rejected": -0.16284029185771942, "step": 3717 }, { "epoch": 2.2681104163489403, "grad_norm": 1.7323614358901978, "learning_rate": 4.386527862829149e-06, "log_odds_chosen": 1.4296433925628662, "log_odds_ratio": -0.5640126466751099, "logits/chosen": -0.8590665459632874, "logits/rejected": -0.8430265188217163, "logps/chosen": -0.8733019232749939, "logps/rejected": -1.9704055786132812, "loss": 1.1424, "nll_loss": 1.039838433265686, "rewards/accuracies": 0.5, "rewards/chosen": -0.08733019232749939, "rewards/margins": 0.10971038043498993, "rewards/rejected": -0.19704055786132812, "step": 3718 }, { "epoch": 2.268720451425957, "grad_norm": 0.9839211702346802, "learning_rate": 4.385548071034905e-06, "log_odds_chosen": 0.8422211408615112, "log_odds_ratio": -0.5386780500411987, "logits/chosen": -0.7297782897949219, "logits/rejected": -0.7482080459594727, "logps/chosen": -0.6582612991333008, "logps/rejected": -1.2366759777069092, "loss": 1.014, "nll_loss": 0.9384346008300781, "rewards/accuracies": 0.625, "rewards/chosen": -0.06582613289356232, "rewards/margins": 0.057841457426548004, "rewards/rejected": -0.12366759032011032, "step": 3719 }, { "epoch": 2.269330486502974, "grad_norm": 1.4087622165679932, "learning_rate": 4.384568279240662e-06, "log_odds_chosen": 1.184127688407898, "log_odds_ratio": -0.4504169523715973, "logits/chosen": -0.8351390957832336, "logits/rejected": -0.994612455368042, "logps/chosen": -0.9079486727714539, "logps/rejected": -1.7386953830718994, "loss": 1.117, "nll_loss": 1.1259313821792603, "rewards/accuracies": 0.75, "rewards/chosen": -0.0907948687672615, "rewards/margins": 0.0830746665596962, "rewards/rejected": -0.1738695353269577, "step": 3720 }, { "epoch": 2.269940521579991, "grad_norm": 1.3279675245285034, "learning_rate": 4.383588487446418e-06, "log_odds_chosen": 1.8683127164840698, "log_odds_ratio": -0.5070036053657532, "logits/chosen": -0.7655463218688965, "logits/rejected": -0.8928954005241394, "logps/chosen": -0.6868885159492493, "logps/rejected": -2.130429267883301, "loss": 1.1369, "nll_loss": 1.0383129119873047, "rewards/accuracies": 0.625, "rewards/chosen": -0.06868885457515717, "rewards/margins": 0.14435409009456635, "rewards/rejected": -0.2130429446697235, "step": 3721 }, { "epoch": 2.2705505566570077, "grad_norm": 1.2614514827728271, "learning_rate": 4.382608695652173e-06, "log_odds_chosen": 1.7931853532791138, "log_odds_ratio": -0.4500766694545746, "logits/chosen": -0.9301499724388123, "logits/rejected": -0.9422235488891602, "logps/chosen": -0.808906614780426, "logps/rejected": -2.2197978496551514, "loss": 1.0583, "nll_loss": 1.217150330543518, "rewards/accuracies": 0.75, "rewards/chosen": -0.08089066296815872, "rewards/margins": 0.14108914136886597, "rewards/rejected": -0.2219797968864441, "step": 3722 }, { "epoch": 2.2711605917340245, "grad_norm": 2.4145970344543457, "learning_rate": 4.38162890385793e-06, "log_odds_chosen": 0.08310970664024353, "log_odds_ratio": -0.675741970539093, "logits/chosen": -0.8226214647293091, "logits/rejected": -0.7576106190681458, "logps/chosen": -0.8492965698242188, "logps/rejected": -0.8851979970932007, "loss": 1.0294, "nll_loss": 0.9500854015350342, "rewards/accuracies": 0.5, "rewards/chosen": -0.08492965996265411, "rewards/margins": 0.0035901404917240143, "rewards/rejected": -0.08851979672908783, "step": 3723 }, { "epoch": 2.271770626811042, "grad_norm": 2.6983566284179688, "learning_rate": 4.380649112063686e-06, "log_odds_chosen": 1.6066950559616089, "log_odds_ratio": -0.36004552245140076, "logits/chosen": -0.8628699779510498, "logits/rejected": -0.9081717729568481, "logps/chosen": -0.706709086894989, "logps/rejected": -1.8539669513702393, "loss": 1.1837, "nll_loss": 1.2350395917892456, "rewards/accuracies": 0.875, "rewards/chosen": -0.07067091017961502, "rewards/margins": 0.11472579091787338, "rewards/rejected": -0.1853967010974884, "step": 3724 }, { "epoch": 2.2723806618880587, "grad_norm": 1.2848973274230957, "learning_rate": 4.379669320269442e-06, "log_odds_chosen": 0.2801355719566345, "log_odds_ratio": -0.7292940616607666, "logits/chosen": -0.7962721586227417, "logits/rejected": -0.8613890409469604, "logps/chosen": -0.9624870419502258, "logps/rejected": -1.2101190090179443, "loss": 0.9819, "nll_loss": 1.0558936595916748, "rewards/accuracies": 0.5, "rewards/chosen": -0.09624870121479034, "rewards/margins": 0.0247632022947073, "rewards/rejected": -0.12101190537214279, "step": 3725 }, { "epoch": 2.2729906969650755, "grad_norm": 1.0923031568527222, "learning_rate": 4.378689528475199e-06, "log_odds_chosen": 0.8556982278823853, "log_odds_ratio": -0.625925600528717, "logits/chosen": -0.7897664308547974, "logits/rejected": -0.7006006240844727, "logps/chosen": -0.8263569474220276, "logps/rejected": -1.5866721868515015, "loss": 0.9826, "nll_loss": 0.9183844327926636, "rewards/accuracies": 0.5, "rewards/chosen": -0.08263570070266724, "rewards/margins": 0.07603151351213455, "rewards/rejected": -0.1586672067642212, "step": 3726 }, { "epoch": 2.2736007320420923, "grad_norm": 1.5970934629440308, "learning_rate": 4.377709736680955e-06, "log_odds_chosen": 2.603829860687256, "log_odds_ratio": -0.20330041646957397, "logits/chosen": -0.794780969619751, "logits/rejected": -0.8217865228652954, "logps/chosen": -0.6652761101722717, "logps/rejected": -2.6425068378448486, "loss": 1.0378, "nll_loss": 0.9747176170349121, "rewards/accuracies": 1.0, "rewards/chosen": -0.06652761995792389, "rewards/margins": 0.19772309064865112, "rewards/rejected": -0.2642506957054138, "step": 3727 }, { "epoch": 2.274210767119109, "grad_norm": 1.9983155727386475, "learning_rate": 4.376729944886711e-06, "log_odds_chosen": 1.897302269935608, "log_odds_ratio": -0.44210270047187805, "logits/chosen": -0.7279578447341919, "logits/rejected": -0.789553701877594, "logps/chosen": -0.7126216888427734, "logps/rejected": -2.1870720386505127, "loss": 1.1448, "nll_loss": 0.7873445153236389, "rewards/accuracies": 0.625, "rewards/chosen": -0.0712621733546257, "rewards/margins": 0.14744503796100616, "rewards/rejected": -0.21870721876621246, "step": 3728 }, { "epoch": 2.2748208021961265, "grad_norm": 1.2689520120620728, "learning_rate": 4.375750153092468e-06, "log_odds_chosen": 1.9162805080413818, "log_odds_ratio": -0.3110729455947876, "logits/chosen": -0.9596072435379028, "logits/rejected": -1.0098671913146973, "logps/chosen": -0.8395361304283142, "logps/rejected": -2.3462765216827393, "loss": 0.9803, "nll_loss": 0.991958737373352, "rewards/accuracies": 0.875, "rewards/chosen": -0.0839536115527153, "rewards/margins": 0.15067405998706818, "rewards/rejected": -0.23462766408920288, "step": 3729 }, { "epoch": 2.2754308372731433, "grad_norm": 1.9300481081008911, "learning_rate": 4.374770361298224e-06, "log_odds_chosen": 2.2272911071777344, "log_odds_ratio": -0.29126620292663574, "logits/chosen": -0.8167418241500854, "logits/rejected": -0.9529044032096863, "logps/chosen": -0.6455971598625183, "logps/rejected": -2.296278953552246, "loss": 1.0701, "nll_loss": 1.0912845134735107, "rewards/accuracies": 0.875, "rewards/chosen": -0.06455972045660019, "rewards/margins": 0.16506817936897278, "rewards/rejected": -0.22962789237499237, "step": 3730 }, { "epoch": 2.27604087235016, "grad_norm": 1.8666210174560547, "learning_rate": 4.37379056950398e-06, "log_odds_chosen": 0.5671815872192383, "log_odds_ratio": -0.820713996887207, "logits/chosen": -0.8390407562255859, "logits/rejected": -0.8048071265220642, "logps/chosen": -1.0195661783218384, "logps/rejected": -1.4886057376861572, "loss": 1.1169, "nll_loss": 1.1725951433181763, "rewards/accuracies": 0.25, "rewards/chosen": -0.10195661336183548, "rewards/margins": 0.04690397530794144, "rewards/rejected": -0.14886058866977692, "step": 3731 }, { "epoch": 2.276650907427177, "grad_norm": 2.717575788497925, "learning_rate": 4.372810777709737e-06, "log_odds_chosen": 1.3877620697021484, "log_odds_ratio": -0.4492849111557007, "logits/chosen": -0.835929811000824, "logits/rejected": -0.882710337638855, "logps/chosen": -0.785742998123169, "logps/rejected": -1.8488168716430664, "loss": 1.1174, "nll_loss": 1.0982270240783691, "rewards/accuracies": 0.875, "rewards/chosen": -0.0785742998123169, "rewards/margins": 0.10630738735198975, "rewards/rejected": -0.18488168716430664, "step": 3732 }, { "epoch": 2.277260942504194, "grad_norm": 2.0974819660186768, "learning_rate": 4.371830985915493e-06, "log_odds_chosen": 1.9137463569641113, "log_odds_ratio": -0.5032889246940613, "logits/chosen": -0.7837751507759094, "logits/rejected": -0.9313108921051025, "logps/chosen": -0.851247251033783, "logps/rejected": -2.5371034145355225, "loss": 1.1745, "nll_loss": 1.0230637788772583, "rewards/accuracies": 0.75, "rewards/chosen": -0.08512473106384277, "rewards/margins": 0.1685856282711029, "rewards/rejected": -0.2537103593349457, "step": 3733 }, { "epoch": 2.2778709775812107, "grad_norm": 1.6130162477493286, "learning_rate": 4.370851194121249e-06, "log_odds_chosen": 1.060652256011963, "log_odds_ratio": -0.49087417125701904, "logits/chosen": -1.045884609222412, "logits/rejected": -0.9454553723335266, "logps/chosen": -0.7737364172935486, "logps/rejected": -1.5825632810592651, "loss": 0.9899, "nll_loss": 1.1115825176239014, "rewards/accuracies": 0.625, "rewards/chosen": -0.07737363874912262, "rewards/margins": 0.08088269829750061, "rewards/rejected": -0.15825633704662323, "step": 3734 }, { "epoch": 2.278481012658228, "grad_norm": 2.8489434719085693, "learning_rate": 4.369871402327005e-06, "log_odds_chosen": 0.8005168437957764, "log_odds_ratio": -0.7903232574462891, "logits/chosen": -0.8431577086448669, "logits/rejected": -0.7942468523979187, "logps/chosen": -1.1553593873977661, "logps/rejected": -1.7728736400604248, "loss": 1.1969, "nll_loss": 1.315569281578064, "rewards/accuracies": 0.375, "rewards/chosen": -0.11553594470024109, "rewards/margins": 0.06175142899155617, "rewards/rejected": -0.17728736996650696, "step": 3735 }, { "epoch": 2.279091047735245, "grad_norm": 2.550269365310669, "learning_rate": 4.368891610532761e-06, "log_odds_chosen": -0.11494921147823334, "log_odds_ratio": -0.9372630715370178, "logits/chosen": -0.7896199822425842, "logits/rejected": -0.6853040456771851, "logps/chosen": -1.2046704292297363, "logps/rejected": -1.1009278297424316, "loss": 1.1155, "nll_loss": 1.127431869506836, "rewards/accuracies": 0.5, "rewards/chosen": -0.12046704441308975, "rewards/margins": -0.010374271310865879, "rewards/rejected": -0.11009277403354645, "step": 3736 }, { "epoch": 2.2797010828122617, "grad_norm": 1.3819458484649658, "learning_rate": 4.367911818738518e-06, "log_odds_chosen": 1.4388576745986938, "log_odds_ratio": -0.3615233898162842, "logits/chosen": -0.8404486775398254, "logits/rejected": -0.8518436551094055, "logps/chosen": -0.934868574142456, "logps/rejected": -2.0612285137176514, "loss": 1.209, "nll_loss": 0.9933860301971436, "rewards/accuracies": 1.0, "rewards/chosen": -0.09348684549331665, "rewards/margins": 0.11263599991798401, "rewards/rejected": -0.20612284541130066, "step": 3737 }, { "epoch": 2.2803111178892785, "grad_norm": 6.281861305236816, "learning_rate": 4.3669320269442744e-06, "log_odds_chosen": 1.2584564685821533, "log_odds_ratio": -0.6920324563980103, "logits/chosen": -0.8921384215354919, "logits/rejected": -0.96225905418396, "logps/chosen": -0.7553830146789551, "logps/rejected": -1.8726006746292114, "loss": 1.142, "nll_loss": 0.9838807582855225, "rewards/accuracies": 0.5, "rewards/chosen": -0.07553829997777939, "rewards/margins": 0.11172176152467728, "rewards/rejected": -0.18726006150245667, "step": 3738 }, { "epoch": 2.2809211529662954, "grad_norm": 12.739323616027832, "learning_rate": 4.3659522351500306e-06, "log_odds_chosen": 0.9942141771316528, "log_odds_ratio": -0.4374317526817322, "logits/chosen": -1.0279161930084229, "logits/rejected": -0.8751434087753296, "logps/chosen": -0.8657218813896179, "logps/rejected": -1.5832931995391846, "loss": 1.0425, "nll_loss": 1.0147688388824463, "rewards/accuracies": 0.75, "rewards/chosen": -0.08657218515872955, "rewards/margins": 0.07175713777542114, "rewards/rejected": -0.1583293080329895, "step": 3739 }, { "epoch": 2.2815311880433127, "grad_norm": 1.6023887395858765, "learning_rate": 4.364972443355787e-06, "log_odds_chosen": 1.4947388172149658, "log_odds_ratio": -0.4616397023200989, "logits/chosen": -0.5068562030792236, "logits/rejected": -0.7000172734260559, "logps/chosen": -0.7095311880111694, "logps/rejected": -1.7697182893753052, "loss": 0.9693, "nll_loss": 0.7881331443786621, "rewards/accuracies": 0.625, "rewards/chosen": -0.0709531158208847, "rewards/margins": 0.10601872205734253, "rewards/rejected": -0.17697185277938843, "step": 3740 }, { "epoch": 2.2821412231203295, "grad_norm": 2.853823184967041, "learning_rate": 4.363992651561543e-06, "log_odds_chosen": 2.452381134033203, "log_odds_ratio": -0.36506807804107666, "logits/chosen": -0.9808054566383362, "logits/rejected": -1.038214921951294, "logps/chosen": -0.8519949913024902, "logps/rejected": -2.8864169120788574, "loss": 1.1293, "nll_loss": 1.0491721630096436, "rewards/accuracies": 0.75, "rewards/chosen": -0.08519949018955231, "rewards/margins": 0.20344217121601105, "rewards/rejected": -0.28864166140556335, "step": 3741 }, { "epoch": 2.2827512581973464, "grad_norm": 1.4074498414993286, "learning_rate": 4.363012859767299e-06, "log_odds_chosen": 1.089653491973877, "log_odds_ratio": -0.508573055267334, "logits/chosen": -0.8123052716255188, "logits/rejected": -0.7888678908348083, "logps/chosen": -0.8279938697814941, "logps/rejected": -1.7632123231887817, "loss": 1.1781, "nll_loss": 0.9580183029174805, "rewards/accuracies": 0.75, "rewards/chosen": -0.08279938995838165, "rewards/margins": 0.093521848320961, "rewards/rejected": -0.17632123827934265, "step": 3742 }, { "epoch": 2.283361293274363, "grad_norm": 2.549227237701416, "learning_rate": 4.362033067973056e-06, "log_odds_chosen": 1.5825104713439941, "log_odds_ratio": -0.3725789785385132, "logits/chosen": -0.6730577945709229, "logits/rejected": -0.8266639113426208, "logps/chosen": -0.5969563722610474, "logps/rejected": -1.5677781105041504, "loss": 0.9655, "nll_loss": 0.8228529691696167, "rewards/accuracies": 0.875, "rewards/chosen": -0.059695638716220856, "rewards/margins": 0.09708216786384583, "rewards/rejected": -0.15677781403064728, "step": 3743 }, { "epoch": 2.28397132835138, "grad_norm": 1.9886802434921265, "learning_rate": 4.361053276178812e-06, "log_odds_chosen": 1.9843207597732544, "log_odds_ratio": -0.4548552632331848, "logits/chosen": -0.9426446557044983, "logits/rejected": -1.080180048942566, "logps/chosen": -0.848497748374939, "logps/rejected": -2.3117127418518066, "loss": 1.1471, "nll_loss": 1.473226547241211, "rewards/accuracies": 0.75, "rewards/chosen": -0.0848497822880745, "rewards/margins": 0.14632149040699005, "rewards/rejected": -0.23117126524448395, "step": 3744 }, { "epoch": 2.284581363428397, "grad_norm": 1.6536129713058472, "learning_rate": 4.360073484384567e-06, "log_odds_chosen": 2.8990797996520996, "log_odds_ratio": -0.3985127806663513, "logits/chosen": -0.7129489183425903, "logits/rejected": -0.8763777017593384, "logps/chosen": -0.6701765060424805, "logps/rejected": -2.8183224201202393, "loss": 1.0173, "nll_loss": 0.9608619213104248, "rewards/accuracies": 0.75, "rewards/chosen": -0.06701765954494476, "rewards/margins": 0.21481460332870483, "rewards/rejected": -0.2818322479724884, "step": 3745 }, { "epoch": 2.285191398505414, "grad_norm": 1.312872290611267, "learning_rate": 4.359093692590324e-06, "log_odds_chosen": 1.7996430397033691, "log_odds_ratio": -0.5003947615623474, "logits/chosen": -0.8749751448631287, "logits/rejected": -0.7829938530921936, "logps/chosen": -0.7549422383308411, "logps/rejected": -2.1119253635406494, "loss": 1.0735, "nll_loss": 1.1302032470703125, "rewards/accuracies": 0.75, "rewards/chosen": -0.07549422234296799, "rewards/margins": 0.1356983333826065, "rewards/rejected": -0.2111925482749939, "step": 3746 }, { "epoch": 2.285801433582431, "grad_norm": 1.5765080451965332, "learning_rate": 4.35811390079608e-06, "log_odds_chosen": 0.7505800724029541, "log_odds_ratio": -0.5293620824813843, "logits/chosen": -1.1817713975906372, "logits/rejected": -1.1163513660430908, "logps/chosen": -0.7978935241699219, "logps/rejected": -1.2453358173370361, "loss": 1.2818, "nll_loss": 1.4672980308532715, "rewards/accuracies": 0.625, "rewards/chosen": -0.07978934794664383, "rewards/margins": 0.04474423825740814, "rewards/rejected": -0.12453359365463257, "step": 3747 }, { "epoch": 2.286411468659448, "grad_norm": 1.4171218872070312, "learning_rate": 4.357134109001837e-06, "log_odds_chosen": 1.5492703914642334, "log_odds_ratio": -0.5783823132514954, "logits/chosen": -0.778279721736908, "logits/rejected": -0.7021008729934692, "logps/chosen": -0.859455943107605, "logps/rejected": -2.0752296447753906, "loss": 1.028, "nll_loss": 1.0343369245529175, "rewards/accuracies": 0.5, "rewards/chosen": -0.08594558387994766, "rewards/margins": 0.12157737463712692, "rewards/rejected": -0.2075229436159134, "step": 3748 }, { "epoch": 2.2870215037364647, "grad_norm": 2.346627712249756, "learning_rate": 4.3561543172075935e-06, "log_odds_chosen": 1.7317603826522827, "log_odds_ratio": -0.5674578547477722, "logits/chosen": -0.5205991864204407, "logits/rejected": -0.7620295882225037, "logps/chosen": -0.8816468715667725, "logps/rejected": -2.1890175342559814, "loss": 1.233, "nll_loss": 1.2785643339157104, "rewards/accuracies": 0.625, "rewards/chosen": -0.08816468715667725, "rewards/margins": 0.1307370662689209, "rewards/rejected": -0.21890176832675934, "step": 3749 }, { "epoch": 2.287631538813482, "grad_norm": 2.041597843170166, "learning_rate": 4.35517452541335e-06, "log_odds_chosen": 0.435589075088501, "log_odds_ratio": -0.6803147196769714, "logits/chosen": -1.115968108177185, "logits/rejected": -0.9884642362594604, "logps/chosen": -1.0068581104278564, "logps/rejected": -1.460046410560608, "loss": 1.0408, "nll_loss": 1.4128024578094482, "rewards/accuracies": 0.5, "rewards/chosen": -0.10068580508232117, "rewards/margins": 0.045318834483623505, "rewards/rejected": -0.14600464701652527, "step": 3750 }, { "epoch": 2.288241573890499, "grad_norm": 6.208978176116943, "learning_rate": 4.354194733619106e-06, "log_odds_chosen": 1.915225863456726, "log_odds_ratio": -0.32291099429130554, "logits/chosen": -0.8461465239524841, "logits/rejected": -0.7557649612426758, "logps/chosen": -0.6489026546478271, "logps/rejected": -2.120901346206665, "loss": 1.0534, "nll_loss": 1.162636160850525, "rewards/accuracies": 1.0, "rewards/chosen": -0.06489026546478271, "rewards/margins": 0.14719988405704498, "rewards/rejected": -0.2120901495218277, "step": 3751 }, { "epoch": 2.2888516089675157, "grad_norm": 2.1639397144317627, "learning_rate": 4.353214941824862e-06, "log_odds_chosen": 1.0843607187271118, "log_odds_ratio": -0.5001974105834961, "logits/chosen": -0.5688600540161133, "logits/rejected": -0.7294479608535767, "logps/chosen": -0.775425910949707, "logps/rejected": -1.3907345533370972, "loss": 1.144, "nll_loss": 0.989589273929596, "rewards/accuracies": 0.625, "rewards/chosen": -0.07754258811473846, "rewards/margins": 0.061530862003564835, "rewards/rejected": -0.139073446393013, "step": 3752 }, { "epoch": 2.2894616440445326, "grad_norm": 1.1413930654525757, "learning_rate": 4.352235150030618e-06, "log_odds_chosen": 1.8798623085021973, "log_odds_ratio": -0.35559073090553284, "logits/chosen": -0.7777539491653442, "logits/rejected": -0.7033095359802246, "logps/chosen": -0.6260694265365601, "logps/rejected": -2.00836181640625, "loss": 0.9592, "nll_loss": 0.7097015380859375, "rewards/accuracies": 0.875, "rewards/chosen": -0.06260694563388824, "rewards/margins": 0.13822923600673676, "rewards/rejected": -0.2008361667394638, "step": 3753 }, { "epoch": 2.2900716791215494, "grad_norm": 1.169597864151001, "learning_rate": 4.351255358236375e-06, "log_odds_chosen": 2.6627979278564453, "log_odds_ratio": -0.31013309955596924, "logits/chosen": -0.9488793611526489, "logits/rejected": -1.005329966545105, "logps/chosen": -0.8155645132064819, "logps/rejected": -2.9971752166748047, "loss": 1.0988, "nll_loss": 0.9546079635620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.08155645430088043, "rewards/margins": 0.21816106140613556, "rewards/rejected": -0.299717515707016, "step": 3754 }, { "epoch": 2.2906817141985663, "grad_norm": 1.062658429145813, "learning_rate": 4.350275566442131e-06, "log_odds_chosen": 1.232064962387085, "log_odds_ratio": -0.3752515912055969, "logits/chosen": -0.8911031484603882, "logits/rejected": -0.9769709706306458, "logps/chosen": -0.8870492577552795, "logps/rejected": -1.7649489641189575, "loss": 1.1067, "nll_loss": 1.1083619594573975, "rewards/accuracies": 0.75, "rewards/chosen": -0.08870492875576019, "rewards/margins": 0.08778997510671616, "rewards/rejected": -0.17649489641189575, "step": 3755 }, { "epoch": 2.291291749275583, "grad_norm": 3.5899698734283447, "learning_rate": 4.349295774647887e-06, "log_odds_chosen": 2.0969812870025635, "log_odds_ratio": -0.38877272605895996, "logits/chosen": -0.6513837575912476, "logits/rejected": -0.8249632120132446, "logps/chosen": -0.6424543857574463, "logps/rejected": -2.1229116916656494, "loss": 0.9519, "nll_loss": 0.8345134258270264, "rewards/accuracies": 0.75, "rewards/chosen": -0.06424544006586075, "rewards/margins": 0.14804573357105255, "rewards/rejected": -0.2122911810874939, "step": 3756 }, { "epoch": 2.2919017843526004, "grad_norm": 1.1134836673736572, "learning_rate": 4.348315982853643e-06, "log_odds_chosen": 0.672759473323822, "log_odds_ratio": -0.6270250082015991, "logits/chosen": -0.764182984828949, "logits/rejected": -0.7343789935112, "logps/chosen": -0.703567624092102, "logps/rejected": -1.1633849143981934, "loss": 1.059, "nll_loss": 0.9985894560813904, "rewards/accuracies": 0.5, "rewards/chosen": -0.07035677134990692, "rewards/margins": 0.04598172754049301, "rewards/rejected": -0.11633849143981934, "step": 3757 }, { "epoch": 2.2925118194296172, "grad_norm": 4.119210243225098, "learning_rate": 4.347336191059399e-06, "log_odds_chosen": 2.52191424369812, "log_odds_ratio": -0.18328969180583954, "logits/chosen": -0.8807802200317383, "logits/rejected": -0.9126054048538208, "logps/chosen": -0.7050777673721313, "logps/rejected": -2.644463539123535, "loss": 0.7895, "nll_loss": 0.8064006567001343, "rewards/accuracies": 1.0, "rewards/chosen": -0.07050777971744537, "rewards/margins": 0.19393856823444366, "rewards/rejected": -0.26444634795188904, "step": 3758 }, { "epoch": 2.293121854506634, "grad_norm": 1.6142359972000122, "learning_rate": 4.3463563992651555e-06, "log_odds_chosen": 0.027126222848892212, "log_odds_ratio": -0.7319813966751099, "logits/chosen": -1.0009262561798096, "logits/rejected": -1.065736174583435, "logps/chosen": -0.943824052810669, "logps/rejected": -0.9214168190956116, "loss": 1.0757, "nll_loss": 1.059380292892456, "rewards/accuracies": 0.25, "rewards/chosen": -0.0943824052810669, "rewards/margins": -0.0022407229989767075, "rewards/rejected": -0.09214168041944504, "step": 3759 }, { "epoch": 2.293731889583651, "grad_norm": 4.427926540374756, "learning_rate": 4.3453766074709125e-06, "log_odds_chosen": 1.7906537055969238, "log_odds_ratio": -0.25154101848602295, "logits/chosen": -0.8190262913703918, "logits/rejected": -0.794649064540863, "logps/chosen": -0.5781269073486328, "logps/rejected": -1.761847972869873, "loss": 1.2373, "nll_loss": 1.213414192199707, "rewards/accuracies": 0.875, "rewards/chosen": -0.05781269073486328, "rewards/margins": 0.1183721050620079, "rewards/rejected": -0.1761847883462906, "step": 3760 }, { "epoch": 2.294341924660668, "grad_norm": 1.6555149555206299, "learning_rate": 4.344396815676669e-06, "log_odds_chosen": 2.6655311584472656, "log_odds_ratio": -0.2076980620622635, "logits/chosen": -0.5519404411315918, "logits/rejected": -0.7374356985092163, "logps/chosen": -0.592959463596344, "logps/rejected": -2.6336259841918945, "loss": 0.9891, "nll_loss": 0.8711815476417542, "rewards/accuracies": 1.0, "rewards/chosen": -0.05929594114422798, "rewards/margins": 0.2040666788816452, "rewards/rejected": -0.2633626163005829, "step": 3761 }, { "epoch": 2.294951959737685, "grad_norm": 1.8686178922653198, "learning_rate": 4.343417023882425e-06, "log_odds_chosen": 2.388383626937866, "log_odds_ratio": -0.3116331696510315, "logits/chosen": -0.9505226612091064, "logits/rejected": -0.9399609565734863, "logps/chosen": -1.0208935737609863, "logps/rejected": -2.7300164699554443, "loss": 1.1392, "nll_loss": 1.2056167125701904, "rewards/accuracies": 0.75, "rewards/chosen": -0.10208936780691147, "rewards/margins": 0.1709122657775879, "rewards/rejected": -0.27300167083740234, "step": 3762 }, { "epoch": 2.295561994814702, "grad_norm": 2.0189261436462402, "learning_rate": 4.342437232088181e-06, "log_odds_chosen": 0.9016528129577637, "log_odds_ratio": -0.48200365900993347, "logits/chosen": -0.6890827417373657, "logits/rejected": -0.9041939973831177, "logps/chosen": -0.7474868893623352, "logps/rejected": -1.2282524108886719, "loss": 0.9147, "nll_loss": 0.9270815849304199, "rewards/accuracies": 0.625, "rewards/chosen": -0.0747486874461174, "rewards/margins": 0.048076558858156204, "rewards/rejected": -0.1228252425789833, "step": 3763 }, { "epoch": 2.2961720298917188, "grad_norm": 2.0404438972473145, "learning_rate": 4.341457440293937e-06, "log_odds_chosen": 1.6996232271194458, "log_odds_ratio": -0.3679838478565216, "logits/chosen": -0.8726053237915039, "logits/rejected": -0.9345843195915222, "logps/chosen": -0.8130528926849365, "logps/rejected": -2.098543643951416, "loss": 1.0097, "nll_loss": 0.965394914150238, "rewards/accuracies": 0.875, "rewards/chosen": -0.08130529522895813, "rewards/margins": 0.12854906916618347, "rewards/rejected": -0.2098543643951416, "step": 3764 }, { "epoch": 2.2967820649687356, "grad_norm": 1.2244808673858643, "learning_rate": 4.340477648499694e-06, "log_odds_chosen": 2.1162142753601074, "log_odds_ratio": -0.3161145746707916, "logits/chosen": -1.0097646713256836, "logits/rejected": -1.0260841846466064, "logps/chosen": -0.8065990805625916, "logps/rejected": -2.4158849716186523, "loss": 1.1005, "nll_loss": 1.0495078563690186, "rewards/accuracies": 0.875, "rewards/chosen": -0.0806599110364914, "rewards/margins": 0.1609286069869995, "rewards/rejected": -0.2415885329246521, "step": 3765 }, { "epoch": 2.2973921000457524, "grad_norm": 1.8122962713241577, "learning_rate": 4.33949785670545e-06, "log_odds_chosen": 3.9806265830993652, "log_odds_ratio": -0.36330366134643555, "logits/chosen": -0.7847560048103333, "logits/rejected": -0.8644284009933472, "logps/chosen": -0.7335242629051208, "logps/rejected": -3.990570068359375, "loss": 0.9828, "nll_loss": 0.9950694441795349, "rewards/accuracies": 0.75, "rewards/chosen": -0.07335242629051208, "rewards/margins": 0.32570457458496094, "rewards/rejected": -0.3990570306777954, "step": 3766 }, { "epoch": 2.2980021351227697, "grad_norm": 4.613654136657715, "learning_rate": 4.338518064911206e-06, "log_odds_chosen": 2.5103249549865723, "log_odds_ratio": -0.18163982033729553, "logits/chosen": -0.9244089126586914, "logits/rejected": -0.9927042722702026, "logps/chosen": -0.8167665004730225, "logps/rejected": -2.731504440307617, "loss": 1.0226, "nll_loss": 0.9735623598098755, "rewards/accuracies": 1.0, "rewards/chosen": -0.0816766545176506, "rewards/margins": 0.1914738267660141, "rewards/rejected": -0.2731504738330841, "step": 3767 }, { "epoch": 2.2986121701997866, "grad_norm": 13.474587440490723, "learning_rate": 4.337538273116962e-06, "log_odds_chosen": 0.15537050366401672, "log_odds_ratio": -0.6410900354385376, "logits/chosen": -0.7361705303192139, "logits/rejected": -0.742283284664154, "logps/chosen": -1.0777639150619507, "logps/rejected": -1.1615314483642578, "loss": 1.167, "nll_loss": 1.0759090185165405, "rewards/accuracies": 0.75, "rewards/chosen": -0.10777639597654343, "rewards/margins": 0.008376761339604855, "rewards/rejected": -0.11615315079689026, "step": 3768 }, { "epoch": 2.2992222052768034, "grad_norm": 0.9225742220878601, "learning_rate": 4.3365584813227185e-06, "log_odds_chosen": 1.1306819915771484, "log_odds_ratio": -0.595379114151001, "logits/chosen": -0.9332530498504639, "logits/rejected": -0.8765233755111694, "logps/chosen": -0.9222348928451538, "logps/rejected": -1.894237756729126, "loss": 1.0106, "nll_loss": 1.0807496309280396, "rewards/accuracies": 0.5, "rewards/chosen": -0.09222348034381866, "rewards/margins": 0.09720029681921005, "rewards/rejected": -0.18942378461360931, "step": 3769 }, { "epoch": 2.2998322403538203, "grad_norm": 1.175924301147461, "learning_rate": 4.335578689528475e-06, "log_odds_chosen": 1.1235829591751099, "log_odds_ratio": -0.43334510922431946, "logits/chosen": -0.76809161901474, "logits/rejected": -0.6936110258102417, "logps/chosen": -0.6162145137786865, "logps/rejected": -1.3935056924819946, "loss": 0.9326, "nll_loss": 0.706428050994873, "rewards/accuracies": 0.75, "rewards/chosen": -0.06162144988775253, "rewards/margins": 0.07772910594940186, "rewards/rejected": -0.13935056328773499, "step": 3770 }, { "epoch": 2.300442275430837, "grad_norm": 7.254495143890381, "learning_rate": 4.3345988977342315e-06, "log_odds_chosen": 2.996108055114746, "log_odds_ratio": -0.27186793088912964, "logits/chosen": -0.6774404048919678, "logits/rejected": -0.9056857228279114, "logps/chosen": -0.721163272857666, "logps/rejected": -3.1769375801086426, "loss": 0.935, "nll_loss": 0.846819281578064, "rewards/accuracies": 0.875, "rewards/chosen": -0.07211633026599884, "rewards/margins": 0.24557742476463318, "rewards/rejected": -0.3176937699317932, "step": 3771 }, { "epoch": 2.3010523105078544, "grad_norm": 5.332807540893555, "learning_rate": 4.333619105939988e-06, "log_odds_chosen": 2.1322784423828125, "log_odds_ratio": -0.5243881344795227, "logits/chosen": -0.7882369160652161, "logits/rejected": -0.7903752326965332, "logps/chosen": -0.7244365215301514, "logps/rejected": -2.3707423210144043, "loss": 1.0632, "nll_loss": 1.1598834991455078, "rewards/accuracies": 0.5, "rewards/chosen": -0.0724436491727829, "rewards/margins": 0.16463060677051544, "rewards/rejected": -0.23707425594329834, "step": 3772 }, { "epoch": 2.3016623455848713, "grad_norm": 1.7652599811553955, "learning_rate": 4.332639314145745e-06, "log_odds_chosen": 3.083907127380371, "log_odds_ratio": -0.41019535064697266, "logits/chosen": -0.8661225438117981, "logits/rejected": -0.9966081380844116, "logps/chosen": -0.7148052453994751, "logps/rejected": -3.307352304458618, "loss": 1.1031, "nll_loss": 1.030392050743103, "rewards/accuracies": 0.75, "rewards/chosen": -0.07148052752017975, "rewards/margins": 0.25925469398498535, "rewards/rejected": -0.3307352066040039, "step": 3773 }, { "epoch": 2.302272380661888, "grad_norm": 1.611073613166809, "learning_rate": 4.3316595223515e-06, "log_odds_chosen": 1.8502259254455566, "log_odds_ratio": -0.505987823009491, "logits/chosen": -0.9772500991821289, "logits/rejected": -0.9648323059082031, "logps/chosen": -0.889685869216919, "logps/rejected": -2.5420541763305664, "loss": 1.1185, "nll_loss": 1.120327115058899, "rewards/accuracies": 0.5, "rewards/chosen": -0.08896858245134354, "rewards/margins": 0.16523683071136475, "rewards/rejected": -0.2542054057121277, "step": 3774 }, { "epoch": 2.302882415738905, "grad_norm": 1.5905410051345825, "learning_rate": 4.330679730557256e-06, "log_odds_chosen": 1.1365655660629272, "log_odds_ratio": -0.5177655816078186, "logits/chosen": -0.8539026379585266, "logits/rejected": -0.820368230342865, "logps/chosen": -0.6538571715354919, "logps/rejected": -1.591965675354004, "loss": 1.2166, "nll_loss": 1.0111799240112305, "rewards/accuracies": 0.625, "rewards/chosen": -0.06538571417331696, "rewards/margins": 0.09381085634231567, "rewards/rejected": -0.15919657051563263, "step": 3775 }, { "epoch": 2.303492450815922, "grad_norm": 1.664535641670227, "learning_rate": 4.329699938763013e-06, "log_odds_chosen": 1.6833536624908447, "log_odds_ratio": -0.4433320164680481, "logits/chosen": -0.8809607028961182, "logits/rejected": -0.9346604347229004, "logps/chosen": -1.003766417503357, "logps/rejected": -2.490144729614258, "loss": 1.094, "nll_loss": 1.2930375337600708, "rewards/accuracies": 0.625, "rewards/chosen": -0.10037663578987122, "rewards/margins": 0.14863784611225128, "rewards/rejected": -0.2490144670009613, "step": 3776 }, { "epoch": 2.3041024858929386, "grad_norm": 1.6130462884902954, "learning_rate": 4.328720146968769e-06, "log_odds_chosen": 1.5836050510406494, "log_odds_ratio": -0.3668753504753113, "logits/chosen": -0.9084547162055969, "logits/rejected": -0.8479844331741333, "logps/chosen": -0.7351653575897217, "logps/rejected": -1.8665080070495605, "loss": 1.0691, "nll_loss": 0.9729228019714355, "rewards/accuracies": 1.0, "rewards/chosen": -0.07351653277873993, "rewards/margins": 0.11313425004482269, "rewards/rejected": -0.18665079772472382, "step": 3777 }, { "epoch": 2.304712520969956, "grad_norm": 5.066157341003418, "learning_rate": 4.327740355174525e-06, "log_odds_chosen": 0.7199095487594604, "log_odds_ratio": -0.531818151473999, "logits/chosen": -0.7890702486038208, "logits/rejected": -0.8062787055969238, "logps/chosen": -0.9486857652664185, "logps/rejected": -1.5074481964111328, "loss": 0.9889, "nll_loss": 1.1060715913772583, "rewards/accuracies": 0.625, "rewards/chosen": -0.09486857801675797, "rewards/margins": 0.05587625503540039, "rewards/rejected": -0.15074482560157776, "step": 3778 }, { "epoch": 2.305322556046973, "grad_norm": 2.904792308807373, "learning_rate": 4.326760563380281e-06, "log_odds_chosen": 2.11842942237854, "log_odds_ratio": -0.3871428966522217, "logits/chosen": -0.9442986249923706, "logits/rejected": -0.9579176902770996, "logps/chosen": -0.9548863172531128, "logps/rejected": -2.744216203689575, "loss": 1.0639, "nll_loss": 1.0101618766784668, "rewards/accuracies": 0.75, "rewards/chosen": -0.09548863023519516, "rewards/margins": 0.17893297970294952, "rewards/rejected": -0.2744216024875641, "step": 3779 }, { "epoch": 2.3059325911239896, "grad_norm": 1.2271355390548706, "learning_rate": 4.3257807715860375e-06, "log_odds_chosen": 1.1503405570983887, "log_odds_ratio": -0.45438626408576965, "logits/chosen": -0.9789530038833618, "logits/rejected": -0.9859372973442078, "logps/chosen": -0.937502920627594, "logps/rejected": -1.8717737197875977, "loss": 1.0757, "nll_loss": 1.2443264722824097, "rewards/accuracies": 0.75, "rewards/chosen": -0.09375029802322388, "rewards/margins": 0.09342706203460693, "rewards/rejected": -0.1871773600578308, "step": 3780 }, { "epoch": 2.3065426262010065, "grad_norm": 1.289538025856018, "learning_rate": 4.324800979791794e-06, "log_odds_chosen": 0.8863908052444458, "log_odds_ratio": -0.3830094337463379, "logits/chosen": -1.0964828729629517, "logits/rejected": -0.950339138507843, "logps/chosen": -1.0435813665390015, "logps/rejected": -1.5972926616668701, "loss": 1.2783, "nll_loss": 1.351351022720337, "rewards/accuracies": 1.0, "rewards/chosen": -0.10435813665390015, "rewards/margins": 0.05537114292383194, "rewards/rejected": -0.1597292721271515, "step": 3781 }, { "epoch": 2.3071526612780233, "grad_norm": 2.043077230453491, "learning_rate": 4.323821187997551e-06, "log_odds_chosen": 2.9566640853881836, "log_odds_ratio": -0.19957773387432098, "logits/chosen": -0.9683152437210083, "logits/rejected": -0.8650219440460205, "logps/chosen": -0.8599345088005066, "logps/rejected": -3.2596023082733154, "loss": 1.1026, "nll_loss": 1.2691667079925537, "rewards/accuracies": 0.875, "rewards/chosen": -0.0859934464097023, "rewards/margins": 0.2399667650461197, "rewards/rejected": -0.3259601891040802, "step": 3782 }, { "epoch": 2.3077626963550406, "grad_norm": 2.7754738330841064, "learning_rate": 4.322841396203307e-06, "log_odds_chosen": 1.9337379932403564, "log_odds_ratio": -0.3123589754104614, "logits/chosen": -1.0067657232284546, "logits/rejected": -0.926807165145874, "logps/chosen": -0.9279003739356995, "logps/rejected": -2.505129098892212, "loss": 1.0891, "nll_loss": 1.0003420114517212, "rewards/accuracies": 0.75, "rewards/chosen": -0.09279003739356995, "rewards/margins": 0.1577228605747223, "rewards/rejected": -0.25051289796829224, "step": 3783 }, { "epoch": 2.3083727314320575, "grad_norm": 2.9990622997283936, "learning_rate": 4.321861604409063e-06, "log_odds_chosen": 0.8770504593849182, "log_odds_ratio": -0.5875844359397888, "logits/chosen": -0.8067348599433899, "logits/rejected": -0.7916969060897827, "logps/chosen": -0.9878105521202087, "logps/rejected": -1.6381585597991943, "loss": 1.0356, "nll_loss": 1.0806785821914673, "rewards/accuracies": 0.625, "rewards/chosen": -0.098781056702137, "rewards/margins": 0.06503480672836304, "rewards/rejected": -0.16381585597991943, "step": 3784 }, { "epoch": 2.3089827665090743, "grad_norm": 5.163374423980713, "learning_rate": 4.320881812614819e-06, "log_odds_chosen": 1.005416750907898, "log_odds_ratio": -0.49956291913986206, "logits/chosen": -0.7830510139465332, "logits/rejected": -0.7588286399841309, "logps/chosen": -0.7637386322021484, "logps/rejected": -1.4564403295516968, "loss": 1.1243, "nll_loss": 0.9600640535354614, "rewards/accuracies": 0.625, "rewards/chosen": -0.0763738602399826, "rewards/margins": 0.06927017867565155, "rewards/rejected": -0.14564403891563416, "step": 3785 }, { "epoch": 2.309592801586091, "grad_norm": 2.387300968170166, "learning_rate": 4.319902020820575e-06, "log_odds_chosen": 2.362259864807129, "log_odds_ratio": -0.3997268080711365, "logits/chosen": -0.7686911821365356, "logits/rejected": -0.8513123989105225, "logps/chosen": -0.686875581741333, "logps/rejected": -2.6173887252807617, "loss": 0.9673, "nll_loss": 1.0151478052139282, "rewards/accuracies": 0.75, "rewards/chosen": -0.0686875581741333, "rewards/margins": 0.19305133819580078, "rewards/rejected": -0.2617388963699341, "step": 3786 }, { "epoch": 2.310202836663108, "grad_norm": 5.343902587890625, "learning_rate": 4.318922229026332e-06, "log_odds_chosen": 1.905019998550415, "log_odds_ratio": -0.41669532656669617, "logits/chosen": -0.8566033840179443, "logits/rejected": -0.9007030129432678, "logps/chosen": -0.6774653792381287, "logps/rejected": -2.265603542327881, "loss": 0.9677, "nll_loss": 0.9609557390213013, "rewards/accuracies": 0.875, "rewards/chosen": -0.06774653494358063, "rewards/margins": 0.15881380438804626, "rewards/rejected": -0.22656035423278809, "step": 3787 }, { "epoch": 2.310812871740125, "grad_norm": 0.9642329216003418, "learning_rate": 4.317942437232088e-06, "log_odds_chosen": 1.4764564037322998, "log_odds_ratio": -0.3235543966293335, "logits/chosen": -0.6932666301727295, "logits/rejected": -0.6670259237289429, "logps/chosen": -0.6505281329154968, "logps/rejected": -1.6208336353302002, "loss": 1.1524, "nll_loss": 0.9619959592819214, "rewards/accuracies": 0.875, "rewards/chosen": -0.0650528147816658, "rewards/margins": 0.09703055024147034, "rewards/rejected": -0.16208335757255554, "step": 3788 }, { "epoch": 2.311422906817142, "grad_norm": 3.5451416969299316, "learning_rate": 4.316962645437844e-06, "log_odds_chosen": 0.3987189531326294, "log_odds_ratio": -0.6525441408157349, "logits/chosen": -0.9426546096801758, "logits/rejected": -0.8661746978759766, "logps/chosen": -0.9967726469039917, "logps/rejected": -1.2675623893737793, "loss": 1.1336, "nll_loss": 1.117775797843933, "rewards/accuracies": 0.625, "rewards/chosen": -0.09967727214097977, "rewards/margins": 0.027078978717327118, "rewards/rejected": -0.12675625085830688, "step": 3789 }, { "epoch": 2.312032941894159, "grad_norm": 3.806891679763794, "learning_rate": 4.315982853643601e-06, "log_odds_chosen": 2.394606590270996, "log_odds_ratio": -0.5039646625518799, "logits/chosen": -0.8516750931739807, "logits/rejected": -0.8639001250267029, "logps/chosen": -0.5588906407356262, "logps/rejected": -2.3806488513946533, "loss": 0.9701, "nll_loss": 0.832456111907959, "rewards/accuracies": 0.75, "rewards/chosen": -0.0558890700340271, "rewards/margins": 0.18217585980892181, "rewards/rejected": -0.23806491494178772, "step": 3790 }, { "epoch": 2.312642976971176, "grad_norm": 1.3844401836395264, "learning_rate": 4.3150030618493565e-06, "log_odds_chosen": 2.5592222213745117, "log_odds_ratio": -0.2988951802253723, "logits/chosen": -0.7907050848007202, "logits/rejected": -0.8578824400901794, "logps/chosen": -0.5962704420089722, "logps/rejected": -2.458012342453003, "loss": 0.9624, "nll_loss": 1.030160665512085, "rewards/accuracies": 0.875, "rewards/chosen": -0.05962704122066498, "rewards/margins": 0.1861741840839386, "rewards/rejected": -0.24580124020576477, "step": 3791 }, { "epoch": 2.3132530120481927, "grad_norm": 1.6955199241638184, "learning_rate": 4.314023270055113e-06, "log_odds_chosen": 2.84183931350708, "log_odds_ratio": -0.20560108125209808, "logits/chosen": -0.6562494039535522, "logits/rejected": -0.8680815100669861, "logps/chosen": -0.6452372074127197, "logps/rejected": -2.743687868118286, "loss": 0.9636, "nll_loss": 0.7837105989456177, "rewards/accuracies": 1.0, "rewards/chosen": -0.06452371925115585, "rewards/margins": 0.20984503626823425, "rewards/rejected": -0.2743687629699707, "step": 3792 }, { "epoch": 2.3138630471252095, "grad_norm": 1.305993676185608, "learning_rate": 4.31304347826087e-06, "log_odds_chosen": 1.0311203002929688, "log_odds_ratio": -0.4247323274612427, "logits/chosen": -0.684756875038147, "logits/rejected": -0.5864530205726624, "logps/chosen": -0.6776829361915588, "logps/rejected": -1.2135190963745117, "loss": 0.8975, "nll_loss": 0.8372725248336792, "rewards/accuracies": 0.875, "rewards/chosen": -0.06776829063892365, "rewards/margins": 0.05358362942934036, "rewards/rejected": -0.12135191261768341, "step": 3793 }, { "epoch": 2.314473082202227, "grad_norm": 1.2797093391418457, "learning_rate": 4.312063686466626e-06, "log_odds_chosen": 1.854402780532837, "log_odds_ratio": -0.32680490612983704, "logits/chosen": -0.928786039352417, "logits/rejected": -0.9031205177307129, "logps/chosen": -0.7808973789215088, "logps/rejected": -2.1809303760528564, "loss": 0.9194, "nll_loss": 0.8645766377449036, "rewards/accuracies": 0.875, "rewards/chosen": -0.07808974385261536, "rewards/margins": 0.1400032937526703, "rewards/rejected": -0.21809303760528564, "step": 3794 }, { "epoch": 2.3150831172792437, "grad_norm": 1.2737531661987305, "learning_rate": 4.311083894672382e-06, "log_odds_chosen": 2.016731023788452, "log_odds_ratio": -0.2750368118286133, "logits/chosen": -0.889729380607605, "logits/rejected": -0.8788928389549255, "logps/chosen": -0.6530545949935913, "logps/rejected": -2.0758485794067383, "loss": 0.9342, "nll_loss": 0.803379237651825, "rewards/accuracies": 0.75, "rewards/chosen": -0.06530545651912689, "rewards/margins": 0.14227940142154694, "rewards/rejected": -0.20758485794067383, "step": 3795 }, { "epoch": 2.3156931523562605, "grad_norm": 1.105695366859436, "learning_rate": 4.310104102878139e-06, "log_odds_chosen": 0.733875572681427, "log_odds_ratio": -0.5955312848091125, "logits/chosen": -0.8489227890968323, "logits/rejected": -0.8136996030807495, "logps/chosen": -0.8602747917175293, "logps/rejected": -1.4947841167449951, "loss": 1.0326, "nll_loss": 1.041451334953308, "rewards/accuracies": 0.625, "rewards/chosen": -0.08602748066186905, "rewards/margins": 0.06345093995332718, "rewards/rejected": -0.14947842061519623, "step": 3796 }, { "epoch": 2.3163031874332773, "grad_norm": 1.1340608596801758, "learning_rate": 4.309124311083894e-06, "log_odds_chosen": 2.267773389816284, "log_odds_ratio": -0.3999975621700287, "logits/chosen": -0.8597730398178101, "logits/rejected": -0.9899814128875732, "logps/chosen": -0.6398810148239136, "logps/rejected": -2.408201217651367, "loss": 1.0647, "nll_loss": 1.0222197771072388, "rewards/accuracies": 0.625, "rewards/chosen": -0.0639881044626236, "rewards/margins": 0.17683202028274536, "rewards/rejected": -0.24082012474536896, "step": 3797 }, { "epoch": 2.316913222510294, "grad_norm": 0.9941070079803467, "learning_rate": 4.30814451928965e-06, "log_odds_chosen": 0.8747661113739014, "log_odds_ratio": -0.5240099430084229, "logits/chosen": -0.780516505241394, "logits/rejected": -0.883562445640564, "logps/chosen": -0.7413321733474731, "logps/rejected": -1.2966845035552979, "loss": 1.2852, "nll_loss": 1.340566635131836, "rewards/accuracies": 0.75, "rewards/chosen": -0.07413321733474731, "rewards/margins": 0.055535245686769485, "rewards/rejected": -0.1296684592962265, "step": 3798 }, { "epoch": 2.317523257587311, "grad_norm": 1.329936146736145, "learning_rate": 4.307164727495407e-06, "log_odds_chosen": 0.42583954334259033, "log_odds_ratio": -0.6259796023368835, "logits/chosen": -0.8753276467323303, "logits/rejected": -0.8290300369262695, "logps/chosen": -0.7177606821060181, "logps/rejected": -0.970879316329956, "loss": 1.0868, "nll_loss": 0.8972570300102234, "rewards/accuracies": 0.625, "rewards/chosen": -0.07177606970071793, "rewards/margins": 0.025311868637800217, "rewards/rejected": -0.09708793461322784, "step": 3799 }, { "epoch": 2.3181332926643283, "grad_norm": 1.801285982131958, "learning_rate": 4.306184935701163e-06, "log_odds_chosen": 2.0466063022613525, "log_odds_ratio": -0.26203933358192444, "logits/chosen": -0.6213898062705994, "logits/rejected": -0.8434267640113831, "logps/chosen": -0.583098292350769, "logps/rejected": -1.9826396703720093, "loss": 0.974, "nll_loss": 0.7731451392173767, "rewards/accuracies": 1.0, "rewards/chosen": -0.058309830725193024, "rewards/margins": 0.13995414972305298, "rewards/rejected": -0.1982639729976654, "step": 3800 }, { "epoch": 2.318743327741345, "grad_norm": 1.5000470876693726, "learning_rate": 4.30520514390692e-06, "log_odds_chosen": 2.6409108638763428, "log_odds_ratio": -0.4167112112045288, "logits/chosen": -0.8119515776634216, "logits/rejected": -0.9013205170631409, "logps/chosen": -0.6438989043235779, "logps/rejected": -2.893054246902466, "loss": 0.9882, "nll_loss": 0.7496641874313354, "rewards/accuracies": 0.625, "rewards/chosen": -0.06438989192247391, "rewards/margins": 0.2249155342578888, "rewards/rejected": -0.2893054187297821, "step": 3801 }, { "epoch": 2.319353362818362, "grad_norm": 2.3596458435058594, "learning_rate": 4.3042253521126756e-06, "log_odds_chosen": 2.018223524093628, "log_odds_ratio": -0.3721444308757782, "logits/chosen": -0.6388235688209534, "logits/rejected": -0.6991946697235107, "logps/chosen": -0.5927003622055054, "logps/rejected": -2.0086398124694824, "loss": 0.9878, "nll_loss": 0.7290210723876953, "rewards/accuracies": 0.625, "rewards/chosen": -0.05927003175020218, "rewards/margins": 0.14159394800662994, "rewards/rejected": -0.20086398720741272, "step": 3802 }, { "epoch": 2.319963397895379, "grad_norm": 1.4232516288757324, "learning_rate": 4.303245560318432e-06, "log_odds_chosen": 1.6332106590270996, "log_odds_ratio": -0.4321562349796295, "logits/chosen": -0.6842277646064758, "logits/rejected": -0.7544980645179749, "logps/chosen": -0.4653874337673187, "logps/rejected": -1.5301892757415771, "loss": 0.9842, "nll_loss": 0.8460519313812256, "rewards/accuracies": 0.625, "rewards/chosen": -0.046538740396499634, "rewards/margins": 0.1064801961183548, "rewards/rejected": -0.15301892161369324, "step": 3803 }, { "epoch": 2.320573432972396, "grad_norm": 7.528347492218018, "learning_rate": 4.302265768524189e-06, "log_odds_chosen": 2.9254965782165527, "log_odds_ratio": -0.3982014060020447, "logits/chosen": -0.9482356309890747, "logits/rejected": -1.0247235298156738, "logps/chosen": -0.8178841471672058, "logps/rejected": -3.32590389251709, "loss": 1.1287, "nll_loss": 0.993261456489563, "rewards/accuracies": 0.75, "rewards/chosen": -0.08178841322660446, "rewards/margins": 0.2508019506931305, "rewards/rejected": -0.33259037137031555, "step": 3804 }, { "epoch": 2.321183468049413, "grad_norm": 1.2160003185272217, "learning_rate": 4.301285976729945e-06, "log_odds_chosen": 2.0663115978240967, "log_odds_ratio": -0.4146362841129303, "logits/chosen": -0.7118147015571594, "logits/rejected": -0.7314068078994751, "logps/chosen": -0.6770027875900269, "logps/rejected": -2.362683057785034, "loss": 1.0732, "nll_loss": 0.8938395380973816, "rewards/accuracies": 0.625, "rewards/chosen": -0.06770028918981552, "rewards/margins": 0.16856801509857178, "rewards/rejected": -0.2362682968378067, "step": 3805 }, { "epoch": 2.32179350312643, "grad_norm": 8.927750587463379, "learning_rate": 4.300306184935701e-06, "log_odds_chosen": 3.0114738941192627, "log_odds_ratio": -0.1051827147603035, "logits/chosen": -0.8457134962081909, "logits/rejected": -0.9435209035873413, "logps/chosen": -0.6983883380889893, "logps/rejected": -3.059911012649536, "loss": 0.9635, "nll_loss": 0.7764533758163452, "rewards/accuracies": 1.0, "rewards/chosen": -0.06983883678913116, "rewards/margins": 0.2361522763967514, "rewards/rejected": -0.30599111318588257, "step": 3806 }, { "epoch": 2.3224035382034467, "grad_norm": 0.9844527244567871, "learning_rate": 4.299326393141458e-06, "log_odds_chosen": 1.501029133796692, "log_odds_ratio": -0.38569778203964233, "logits/chosen": -1.0226123332977295, "logits/rejected": -1.034989833831787, "logps/chosen": -0.6762649416923523, "logps/rejected": -1.627354621887207, "loss": 1.015, "nll_loss": 1.1946632862091064, "rewards/accuracies": 0.75, "rewards/chosen": -0.06762649863958359, "rewards/margins": 0.09510897099971771, "rewards/rejected": -0.1627354621887207, "step": 3807 }, { "epoch": 2.3230135732804635, "grad_norm": 3.8119451999664307, "learning_rate": 4.298346601347213e-06, "log_odds_chosen": 1.058890461921692, "log_odds_ratio": -0.5361051559448242, "logits/chosen": -1.1362686157226562, "logits/rejected": -1.0573432445526123, "logps/chosen": -0.9364434480667114, "logps/rejected": -1.858987808227539, "loss": 1.1459, "nll_loss": 1.2403740882873535, "rewards/accuracies": 0.75, "rewards/chosen": -0.09364435076713562, "rewards/margins": 0.09225442260503769, "rewards/rejected": -0.1858987808227539, "step": 3808 }, { "epoch": 2.3236236083574804, "grad_norm": 1.2787455320358276, "learning_rate": 4.297366809552969e-06, "log_odds_chosen": 2.3024706840515137, "log_odds_ratio": -0.288973331451416, "logits/chosen": -0.7547962665557861, "logits/rejected": -0.7787744402885437, "logps/chosen": -0.6329952478408813, "logps/rejected": -2.401036500930786, "loss": 0.9713, "nll_loss": 0.7765597701072693, "rewards/accuracies": 1.0, "rewards/chosen": -0.0632995218038559, "rewards/margins": 0.17680412530899048, "rewards/rejected": -0.24010364711284637, "step": 3809 }, { "epoch": 2.3242336434344977, "grad_norm": 1.1615328788757324, "learning_rate": 4.296387017758726e-06, "log_odds_chosen": 2.242203712463379, "log_odds_ratio": -0.30531245470046997, "logits/chosen": -0.837925374507904, "logits/rejected": -0.8562459945678711, "logps/chosen": -0.7605957984924316, "logps/rejected": -2.471280097961426, "loss": 0.9438, "nll_loss": 1.0070899724960327, "rewards/accuracies": 0.875, "rewards/chosen": -0.07605957984924316, "rewards/margins": 0.17106842994689941, "rewards/rejected": -0.24712800979614258, "step": 3810 }, { "epoch": 2.3248436785115145, "grad_norm": 1.483852744102478, "learning_rate": 4.295407225964482e-06, "log_odds_chosen": 1.4906022548675537, "log_odds_ratio": -0.3825669586658478, "logits/chosen": -1.0647996664047241, "logits/rejected": -0.9502236843109131, "logps/chosen": -0.6764675378799438, "logps/rejected": -1.802983283996582, "loss": 0.9861, "nll_loss": 0.8490424156188965, "rewards/accuracies": 0.75, "rewards/chosen": -0.06764675676822662, "rewards/margins": 0.11265158653259277, "rewards/rejected": -0.1802983433008194, "step": 3811 }, { "epoch": 2.3254537135885314, "grad_norm": 2.2400028705596924, "learning_rate": 4.2944274341702385e-06, "log_odds_chosen": 1.1711369752883911, "log_odds_ratio": -0.4440813660621643, "logits/chosen": -0.7649149894714355, "logits/rejected": -0.8191266059875488, "logps/chosen": -0.6429809331893921, "logps/rejected": -1.4861626625061035, "loss": 1.0782, "nll_loss": 1.0129963159561157, "rewards/accuracies": 0.625, "rewards/chosen": -0.06429809331893921, "rewards/margins": 0.08431817591190338, "rewards/rejected": -0.1486162543296814, "step": 3812 }, { "epoch": 2.326063748665548, "grad_norm": 1.1600279808044434, "learning_rate": 4.2934476423759955e-06, "log_odds_chosen": 1.476876139640808, "log_odds_ratio": -0.4222206473350525, "logits/chosen": -0.8851209878921509, "logits/rejected": -0.991608738899231, "logps/chosen": -0.7105883359909058, "logps/rejected": -1.7116425037384033, "loss": 1.0145, "nll_loss": 1.1950738430023193, "rewards/accuracies": 0.75, "rewards/chosen": -0.07105883210897446, "rewards/margins": 0.10010542720556259, "rewards/rejected": -0.17116425931453705, "step": 3813 }, { "epoch": 2.326673783742565, "grad_norm": 2.083770513534546, "learning_rate": 4.292467850581751e-06, "log_odds_chosen": 0.23158425092697144, "log_odds_ratio": -0.8101614713668823, "logits/chosen": -0.9107954502105713, "logits/rejected": -0.8364979028701782, "logps/chosen": -1.1349396705627441, "logps/rejected": -1.3347762823104858, "loss": 1.1477, "nll_loss": 1.1558438539505005, "rewards/accuracies": 0.5, "rewards/chosen": -0.11349394917488098, "rewards/margins": 0.019983666017651558, "rewards/rejected": -0.13347762823104858, "step": 3814 }, { "epoch": 2.3272838188195824, "grad_norm": 1.1841634511947632, "learning_rate": 4.291488058787508e-06, "log_odds_chosen": 1.5454403162002563, "log_odds_ratio": -0.4452863335609436, "logits/chosen": -0.8584579229354858, "logits/rejected": -0.8951631784439087, "logps/chosen": -0.796069860458374, "logps/rejected": -2.0098533630371094, "loss": 1.0571, "nll_loss": 1.078478455543518, "rewards/accuracies": 0.75, "rewards/chosen": -0.07960698753595352, "rewards/margins": 0.1213783472776413, "rewards/rejected": -0.20098534226417542, "step": 3815 }, { "epoch": 2.327893853896599, "grad_norm": 1.979535698890686, "learning_rate": 4.290508266993264e-06, "log_odds_chosen": 1.910531997680664, "log_odds_ratio": -0.5080084204673767, "logits/chosen": -0.9377456903457642, "logits/rejected": -0.8284542560577393, "logps/chosen": -0.9231626391410828, "logps/rejected": -2.6780033111572266, "loss": 0.8922, "nll_loss": 0.93421471118927, "rewards/accuracies": 0.75, "rewards/chosen": -0.09231626242399216, "rewards/margins": 0.1754840910434723, "rewards/rejected": -0.26780036091804504, "step": 3816 }, { "epoch": 2.328503888973616, "grad_norm": 1.2139252424240112, "learning_rate": 4.28952847519902e-06, "log_odds_chosen": 0.6103600263595581, "log_odds_ratio": -0.6937113404273987, "logits/chosen": -0.8069350719451904, "logits/rejected": -0.8609499931335449, "logps/chosen": -0.8284063339233398, "logps/rejected": -1.3075295686721802, "loss": 1.1844, "nll_loss": 1.2098660469055176, "rewards/accuracies": 0.5, "rewards/chosen": -0.08284063637256622, "rewards/margins": 0.04791231453418732, "rewards/rejected": -0.13075295090675354, "step": 3817 }, { "epoch": 2.329113924050633, "grad_norm": 4.06757926940918, "learning_rate": 4.288548683404777e-06, "log_odds_chosen": 1.7017742395401, "log_odds_ratio": -0.44921624660491943, "logits/chosen": -0.5909831523895264, "logits/rejected": -0.7683712840080261, "logps/chosen": -0.7724026441574097, "logps/rejected": -2.0681161880493164, "loss": 1.0226, "nll_loss": 0.897942066192627, "rewards/accuracies": 0.75, "rewards/chosen": -0.07724025845527649, "rewards/margins": 0.12957139313220978, "rewards/rejected": -0.20681163668632507, "step": 3818 }, { "epoch": 2.3297239591276497, "grad_norm": 1.546921730041504, "learning_rate": 4.287568891610532e-06, "log_odds_chosen": 2.452500820159912, "log_odds_ratio": -0.24062904715538025, "logits/chosen": -0.8939720392227173, "logits/rejected": -1.0516517162322998, "logps/chosen": -0.6961843967437744, "logps/rejected": -2.593967914581299, "loss": 1.0683, "nll_loss": 0.9442591667175293, "rewards/accuracies": 0.875, "rewards/chosen": -0.06961844861507416, "rewards/margins": 0.18977835774421692, "rewards/rejected": -0.2593967914581299, "step": 3819 }, { "epoch": 2.3303339942046666, "grad_norm": 0.8939404487609863, "learning_rate": 4.286589099816288e-06, "log_odds_chosen": 0.5727902054786682, "log_odds_ratio": -0.7858390808105469, "logits/chosen": -0.8655822277069092, "logits/rejected": -0.8149731755256653, "logps/chosen": -0.9634789228439331, "logps/rejected": -1.4279141426086426, "loss": 1.0834, "nll_loss": 1.095695972442627, "rewards/accuracies": 0.5, "rewards/chosen": -0.09634789824485779, "rewards/margins": 0.04644351452589035, "rewards/rejected": -0.14279140532016754, "step": 3820 }, { "epoch": 2.330944029281684, "grad_norm": 2.0867910385131836, "learning_rate": 4.285609308022045e-06, "log_odds_chosen": 0.8014987111091614, "log_odds_ratio": -0.6046007871627808, "logits/chosen": -0.8926500678062439, "logits/rejected": -0.7895166873931885, "logps/chosen": -0.8822468519210815, "logps/rejected": -1.442956566810608, "loss": 1.2565, "nll_loss": 0.9827859997749329, "rewards/accuracies": 0.75, "rewards/chosen": -0.08822469413280487, "rewards/margins": 0.0560709685087204, "rewards/rejected": -0.14429566264152527, "step": 3821 }, { "epoch": 2.3315540643587007, "grad_norm": 1.037606120109558, "learning_rate": 4.284629516227801e-06, "log_odds_chosen": 1.0358574390411377, "log_odds_ratio": -0.5297369360923767, "logits/chosen": -0.6588783860206604, "logits/rejected": -0.7835967540740967, "logps/chosen": -0.738440752029419, "logps/rejected": -1.4954237937927246, "loss": 0.9594, "nll_loss": 0.7490825653076172, "rewards/accuracies": 0.625, "rewards/chosen": -0.0738440752029419, "rewards/margins": 0.07569828629493713, "rewards/rejected": -0.14954237639904022, "step": 3822 }, { "epoch": 2.3321640994357176, "grad_norm": 1.5271083116531372, "learning_rate": 4.2836497244335575e-06, "log_odds_chosen": 1.8782997131347656, "log_odds_ratio": -0.25713449716567993, "logits/chosen": -1.0100258588790894, "logits/rejected": -1.0118074417114258, "logps/chosen": -0.9797115921974182, "logps/rejected": -2.493072032928467, "loss": 1.1186, "nll_loss": 1.002108097076416, "rewards/accuracies": 0.875, "rewards/chosen": -0.09797116369009018, "rewards/margins": 0.15133604407310486, "rewards/rejected": -0.24930720031261444, "step": 3823 }, { "epoch": 2.3327741345127344, "grad_norm": 1.9233778715133667, "learning_rate": 4.2826699326393145e-06, "log_odds_chosen": 1.6108264923095703, "log_odds_ratio": -0.3404298424720764, "logits/chosen": -0.7116779088973999, "logits/rejected": -0.7535459399223328, "logps/chosen": -0.7287690043449402, "logps/rejected": -1.7844953536987305, "loss": 1.1036, "nll_loss": 0.8222283124923706, "rewards/accuracies": 0.75, "rewards/chosen": -0.07287690043449402, "rewards/margins": 0.10557263344526291, "rewards/rejected": -0.17844954133033752, "step": 3824 }, { "epoch": 2.3333841695897513, "grad_norm": 6.102280139923096, "learning_rate": 4.28169014084507e-06, "log_odds_chosen": 1.9477555751800537, "log_odds_ratio": -0.3067387044429779, "logits/chosen": -0.8814351558685303, "logits/rejected": -0.792596697807312, "logps/chosen": -0.8150956034660339, "logps/rejected": -2.221151351928711, "loss": 0.9543, "nll_loss": 1.1102877855300903, "rewards/accuracies": 0.875, "rewards/chosen": -0.0815095603466034, "rewards/margins": 0.1406055986881256, "rewards/rejected": -0.222115159034729, "step": 3825 }, { "epoch": 2.3339942046667685, "grad_norm": 1.2908469438552856, "learning_rate": 4.280710349050826e-06, "log_odds_chosen": 0.4100114703178406, "log_odds_ratio": -0.5862939357757568, "logits/chosen": -1.1405904293060303, "logits/rejected": -0.9004709124565125, "logps/chosen": -0.9369367957115173, "logps/rejected": -1.2364777326583862, "loss": 1.1158, "nll_loss": 1.2386521100997925, "rewards/accuracies": 0.625, "rewards/chosen": -0.09369368106126785, "rewards/margins": 0.029954100027680397, "rewards/rejected": -0.1236477792263031, "step": 3826 }, { "epoch": 2.3346042397437854, "grad_norm": 1.530360460281372, "learning_rate": 4.279730557256583e-06, "log_odds_chosen": 0.40390729904174805, "log_odds_ratio": -0.6311410665512085, "logits/chosen": -0.9034016132354736, "logits/rejected": -0.9170619249343872, "logps/chosen": -0.9828474521636963, "logps/rejected": -1.2495579719543457, "loss": 1.2231, "nll_loss": 1.1546990871429443, "rewards/accuracies": 0.5, "rewards/chosen": -0.0982847511768341, "rewards/margins": 0.026671050116419792, "rewards/rejected": -0.12495578825473785, "step": 3827 }, { "epoch": 2.3352142748208022, "grad_norm": 1.22455632686615, "learning_rate": 4.278750765462339e-06, "log_odds_chosen": 3.0073254108428955, "log_odds_ratio": -0.15223807096481323, "logits/chosen": -0.6404058933258057, "logits/rejected": -0.7797352075576782, "logps/chosen": -0.4144424796104431, "logps/rejected": -2.4557056427001953, "loss": 1.0291, "nll_loss": 1.0295941829681396, "rewards/accuracies": 1.0, "rewards/chosen": -0.04144425317645073, "rewards/margins": 0.20412632822990417, "rewards/rejected": -0.245570570230484, "step": 3828 }, { "epoch": 2.335824309897819, "grad_norm": 3.802536964416504, "learning_rate": 4.277770973668096e-06, "log_odds_chosen": 1.666757583618164, "log_odds_ratio": -0.4670165777206421, "logits/chosen": -0.8289409875869751, "logits/rejected": -1.0401865243911743, "logps/chosen": -0.765066385269165, "logps/rejected": -2.0696001052856445, "loss": 1.0436, "nll_loss": 0.9460814595222473, "rewards/accuracies": 0.75, "rewards/chosen": -0.07650664448738098, "rewards/margins": 0.13045337796211243, "rewards/rejected": -0.2069600224494934, "step": 3829 }, { "epoch": 2.336434344974836, "grad_norm": 0.9213159680366516, "learning_rate": 4.276791181873852e-06, "log_odds_chosen": -0.05688214302062988, "log_odds_ratio": -0.8334795236587524, "logits/chosen": -1.0179612636566162, "logits/rejected": -0.9147520065307617, "logps/chosen": -0.924744725227356, "logps/rejected": -0.9012904167175293, "loss": 1.0118, "nll_loss": 1.1484040021896362, "rewards/accuracies": 0.375, "rewards/chosen": -0.09247447550296783, "rewards/margins": -0.002345440909266472, "rewards/rejected": -0.09012904018163681, "step": 3830 }, { "epoch": 2.3370443800518528, "grad_norm": 2.4891302585601807, "learning_rate": 4.275811390079607e-06, "log_odds_chosen": 0.009993091225624084, "log_odds_ratio": -0.7376406192779541, "logits/chosen": -0.7932602763175964, "logits/rejected": -0.896813690662384, "logps/chosen": -0.9421353936195374, "logps/rejected": -0.9592188000679016, "loss": 0.9888, "nll_loss": 1.0526505708694458, "rewards/accuracies": 0.25, "rewards/chosen": -0.09421353787183762, "rewards/margins": 0.00170834269374609, "rewards/rejected": -0.09592188149690628, "step": 3831 }, { "epoch": 2.33765441512887, "grad_norm": 1.4823436737060547, "learning_rate": 4.274831598285364e-06, "log_odds_chosen": 1.41469144821167, "log_odds_ratio": -0.6456068158149719, "logits/chosen": -0.7940420508384705, "logits/rejected": -0.9212964773178101, "logps/chosen": -1.0841543674468994, "logps/rejected": -1.8845465183258057, "loss": 1.0527, "nll_loss": 1.1515601873397827, "rewards/accuracies": 0.5, "rewards/chosen": -0.10841543972492218, "rewards/margins": 0.08003921061754227, "rewards/rejected": -0.18845464289188385, "step": 3832 }, { "epoch": 2.338264450205887, "grad_norm": 0.9212853908538818, "learning_rate": 4.2738518064911204e-06, "log_odds_chosen": 0.9305362105369568, "log_odds_ratio": -0.41575053334236145, "logits/chosen": -0.7491858005523682, "logits/rejected": -0.8106008172035217, "logps/chosen": -0.69617760181427, "logps/rejected": -1.1876246929168701, "loss": 1.0986, "nll_loss": 0.9464604258537292, "rewards/accuracies": 0.75, "rewards/chosen": -0.06961776316165924, "rewards/margins": 0.049144718796014786, "rewards/rejected": -0.11876247823238373, "step": 3833 }, { "epoch": 2.3388744852829038, "grad_norm": 1.7558716535568237, "learning_rate": 4.2728720146968766e-06, "log_odds_chosen": 1.301233172416687, "log_odds_ratio": -0.5064469575881958, "logits/chosen": -0.8460436463356018, "logits/rejected": -0.9211156964302063, "logps/chosen": -0.698133111000061, "logps/rejected": -1.5450853109359741, "loss": 1.222, "nll_loss": 0.960405707359314, "rewards/accuracies": 0.625, "rewards/chosen": -0.0698133111000061, "rewards/margins": 0.08469521999359131, "rewards/rejected": -0.1545085310935974, "step": 3834 }, { "epoch": 2.3394845203599206, "grad_norm": 3.3276898860931396, "learning_rate": 4.2718922229026335e-06, "log_odds_chosen": 0.8836581707000732, "log_odds_ratio": -0.6300619840621948, "logits/chosen": -1.0038436651229858, "logits/rejected": -1.0547280311584473, "logps/chosen": -1.1495243310928345, "logps/rejected": -1.9047305583953857, "loss": 1.1539, "nll_loss": 1.2993099689483643, "rewards/accuracies": 0.5, "rewards/chosen": -0.1149524375796318, "rewards/margins": 0.07552063465118408, "rewards/rejected": -0.19047307968139648, "step": 3835 }, { "epoch": 2.3400945554369375, "grad_norm": 2.117260217666626, "learning_rate": 4.270912431108389e-06, "log_odds_chosen": 0.3478768467903137, "log_odds_ratio": -0.6632562279701233, "logits/chosen": -0.9185119867324829, "logits/rejected": -0.8364837765693665, "logps/chosen": -0.8745166063308716, "logps/rejected": -1.1687119007110596, "loss": 1.1303, "nll_loss": 1.0622996091842651, "rewards/accuracies": 0.375, "rewards/chosen": -0.08745165914297104, "rewards/margins": 0.02941952459514141, "rewards/rejected": -0.1168711856007576, "step": 3836 }, { "epoch": 2.3407045905139547, "grad_norm": 1.2813127040863037, "learning_rate": 4.269932639314145e-06, "log_odds_chosen": 1.0546964406967163, "log_odds_ratio": -0.3694326877593994, "logits/chosen": -0.862117350101471, "logits/rejected": -0.8609915971755981, "logps/chosen": -0.9096320271492004, "logps/rejected": -1.5629448890686035, "loss": 1.1492, "nll_loss": 0.8461524248123169, "rewards/accuracies": 0.875, "rewards/chosen": -0.0909631997346878, "rewards/margins": 0.06533131003379822, "rewards/rejected": -0.15629449486732483, "step": 3837 }, { "epoch": 2.3413146255909716, "grad_norm": 1.1313921213150024, "learning_rate": 4.268952847519902e-06, "log_odds_chosen": 1.3489419221878052, "log_odds_ratio": -0.32900890707969666, "logits/chosen": -0.7944619059562683, "logits/rejected": -0.8477065563201904, "logps/chosen": -0.8711955547332764, "logps/rejected": -1.9075862169265747, "loss": 1.0581, "nll_loss": 0.9578561782836914, "rewards/accuracies": 0.875, "rewards/chosen": -0.08711955696344376, "rewards/margins": 0.10363905131816864, "rewards/rejected": -0.190758615732193, "step": 3838 }, { "epoch": 2.3419246606679884, "grad_norm": 1.2025504112243652, "learning_rate": 4.267973055725658e-06, "log_odds_chosen": 1.9181028604507446, "log_odds_ratio": -0.32248222827911377, "logits/chosen": -0.6721473932266235, "logits/rejected": -0.784743070602417, "logps/chosen": -0.8534338474273682, "logps/rejected": -2.294114351272583, "loss": 0.9266, "nll_loss": 0.8789671659469604, "rewards/accuracies": 0.875, "rewards/chosen": -0.0853433832526207, "rewards/margins": 0.14406804740428925, "rewards/rejected": -0.22941142320632935, "step": 3839 }, { "epoch": 2.3425346957450053, "grad_norm": 1.6518372297286987, "learning_rate": 4.266993263931414e-06, "log_odds_chosen": 0.5060199499130249, "log_odds_ratio": -0.7021706104278564, "logits/chosen": -0.7656505107879639, "logits/rejected": -0.623945951461792, "logps/chosen": -1.0571153163909912, "logps/rejected": -1.3737554550170898, "loss": 1.0152, "nll_loss": 1.0936282873153687, "rewards/accuracies": 0.625, "rewards/chosen": -0.10571151971817017, "rewards/margins": 0.03166402876377106, "rewards/rejected": -0.13737556338310242, "step": 3840 }, { "epoch": 2.343144730822022, "grad_norm": 2.6263134479522705, "learning_rate": 4.266013472137171e-06, "log_odds_chosen": 0.27013933658599854, "log_odds_ratio": -0.6091958284378052, "logits/chosen": -0.782029390335083, "logits/rejected": -0.6711163520812988, "logps/chosen": -0.8395928144454956, "logps/rejected": -1.013667345046997, "loss": 1.0796, "nll_loss": 1.0993791818618774, "rewards/accuracies": 0.75, "rewards/chosen": -0.08395928889513016, "rewards/margins": 0.017407454550266266, "rewards/rejected": -0.10136673599481583, "step": 3841 }, { "epoch": 2.343754765899039, "grad_norm": 1.4309176206588745, "learning_rate": 4.265033680342926e-06, "log_odds_chosen": 2.2427821159362793, "log_odds_ratio": -0.4453708231449127, "logits/chosen": -0.7726026773452759, "logits/rejected": -0.9742847681045532, "logps/chosen": -0.7973431944847107, "logps/rejected": -2.6811065673828125, "loss": 0.9993, "nll_loss": 0.9749939441680908, "rewards/accuracies": 0.75, "rewards/chosen": -0.07973431795835495, "rewards/margins": 0.18837635219097137, "rewards/rejected": -0.2681106626987457, "step": 3842 }, { "epoch": 2.3443648009760563, "grad_norm": 1.4811112880706787, "learning_rate": 4.264053888548683e-06, "log_odds_chosen": 1.3712512254714966, "log_odds_ratio": -0.3724123239517212, "logits/chosen": -0.6897038221359253, "logits/rejected": -0.6951648592948914, "logps/chosen": -0.6404494047164917, "logps/rejected": -1.5328927040100098, "loss": 0.8655, "nll_loss": 0.8174915313720703, "rewards/accuracies": 0.875, "rewards/chosen": -0.06404494494199753, "rewards/margins": 0.08924433588981628, "rewards/rejected": -0.1532892882823944, "step": 3843 }, { "epoch": 2.344974836053073, "grad_norm": 1.4202202558517456, "learning_rate": 4.2630740967544395e-06, "log_odds_chosen": 2.0143189430236816, "log_odds_ratio": -0.33846163749694824, "logits/chosen": -0.7285637259483337, "logits/rejected": -0.9954649806022644, "logps/chosen": -0.8152353763580322, "logps/rejected": -2.1335721015930176, "loss": 1.1267, "nll_loss": 1.2023000717163086, "rewards/accuracies": 0.875, "rewards/chosen": -0.08152353763580322, "rewards/margins": 0.13183368742465973, "rewards/rejected": -0.21335721015930176, "step": 3844 }, { "epoch": 2.34558487113009, "grad_norm": 1.8485195636749268, "learning_rate": 4.262094304960196e-06, "log_odds_chosen": 1.1491186618804932, "log_odds_ratio": -0.4473736882209778, "logits/chosen": -0.8425145745277405, "logits/rejected": -0.7337931990623474, "logps/chosen": -1.0026031732559204, "logps/rejected": -1.8866081237792969, "loss": 1.1551, "nll_loss": 1.3808622360229492, "rewards/accuracies": 0.875, "rewards/chosen": -0.10026031732559204, "rewards/margins": 0.08840048313140869, "rewards/rejected": -0.18866080045700073, "step": 3845 }, { "epoch": 2.346194906207107, "grad_norm": 2.3680522441864014, "learning_rate": 4.261114513165953e-06, "log_odds_chosen": 2.0553407669067383, "log_odds_ratio": -0.44438570737838745, "logits/chosen": -0.7959437370300293, "logits/rejected": -0.8006775975227356, "logps/chosen": -0.8331616520881653, "logps/rejected": -2.491041898727417, "loss": 1.1598, "nll_loss": 1.0281133651733398, "rewards/accuracies": 0.75, "rewards/chosen": -0.08331616967916489, "rewards/margins": 0.16578802466392517, "rewards/rejected": -0.24910420179367065, "step": 3846 }, { "epoch": 2.3468049412841236, "grad_norm": 4.051345348358154, "learning_rate": 4.260134721371709e-06, "log_odds_chosen": 1.265262484550476, "log_odds_ratio": -0.5084211230278015, "logits/chosen": -0.99423748254776, "logits/rejected": -0.900798499584198, "logps/chosen": -0.7797771692276001, "logps/rejected": -1.743417739868164, "loss": 1.2089, "nll_loss": 0.9728839993476868, "rewards/accuracies": 0.625, "rewards/chosen": -0.07797771692276001, "rewards/margins": 0.09636405855417252, "rewards/rejected": -0.17434176802635193, "step": 3847 }, { "epoch": 2.347414976361141, "grad_norm": 1.547951340675354, "learning_rate": 4.259154929577464e-06, "log_odds_chosen": 1.7415672540664673, "log_odds_ratio": -0.620467483997345, "logits/chosen": -0.9944146871566772, "logits/rejected": -0.9978964328765869, "logps/chosen": -0.912631094455719, "logps/rejected": -2.3759772777557373, "loss": 1.269, "nll_loss": 1.148667812347412, "rewards/accuracies": 0.625, "rewards/chosen": -0.09126310795545578, "rewards/margins": 0.14633463323116302, "rewards/rejected": -0.2375977337360382, "step": 3848 }, { "epoch": 2.348025011438158, "grad_norm": 9.766520500183105, "learning_rate": 4.258175137783221e-06, "log_odds_chosen": 1.2736462354660034, "log_odds_ratio": -0.4326373338699341, "logits/chosen": -0.7549967169761658, "logits/rejected": -0.628485918045044, "logps/chosen": -0.5685153007507324, "logps/rejected": -1.437710165977478, "loss": 1.0422, "nll_loss": 0.9539195895195007, "rewards/accuracies": 0.875, "rewards/chosen": -0.05685152858495712, "rewards/margins": 0.08691948652267456, "rewards/rejected": -0.14377102255821228, "step": 3849 }, { "epoch": 2.3486350465151746, "grad_norm": 2.060091018676758, "learning_rate": 4.257195345988977e-06, "log_odds_chosen": 1.420576810836792, "log_odds_ratio": -0.5852246284484863, "logits/chosen": -0.8725477457046509, "logits/rejected": -1.0075340270996094, "logps/chosen": -0.9857507348060608, "logps/rejected": -2.204810857772827, "loss": 1.1443, "nll_loss": 1.238048791885376, "rewards/accuracies": 0.75, "rewards/chosen": -0.09857507050037384, "rewards/margins": 0.12190600484609604, "rewards/rejected": -0.22048108279705048, "step": 3850 }, { "epoch": 2.3492450815921915, "grad_norm": 2.114603281021118, "learning_rate": 4.256215554194733e-06, "log_odds_chosen": 1.0988738536834717, "log_odds_ratio": -0.472099244594574, "logits/chosen": -0.8192664384841919, "logits/rejected": -0.753589928150177, "logps/chosen": -0.700926661491394, "logps/rejected": -1.574902057647705, "loss": 1.0357, "nll_loss": 0.8627696633338928, "rewards/accuracies": 0.875, "rewards/chosen": -0.07009267061948776, "rewards/margins": 0.08739753812551498, "rewards/rejected": -0.15749022364616394, "step": 3851 }, { "epoch": 2.3498551166692083, "grad_norm": 2.233225107192993, "learning_rate": 4.25523576240049e-06, "log_odds_chosen": 1.7331277132034302, "log_odds_ratio": -0.43684712052345276, "logits/chosen": -0.7623686194419861, "logits/rejected": -0.7910452485084534, "logps/chosen": -0.8192145824432373, "logps/rejected": -2.199730634689331, "loss": 1.1441, "nll_loss": 0.9818141460418701, "rewards/accuracies": 0.75, "rewards/chosen": -0.08192145824432373, "rewards/margins": 0.1380515992641449, "rewards/rejected": -0.21997307240962982, "step": 3852 }, { "epoch": 2.350465151746225, "grad_norm": 1.238228440284729, "learning_rate": 4.254255970606246e-06, "log_odds_chosen": 1.336456298828125, "log_odds_ratio": -0.3362882137298584, "logits/chosen": -0.7671328783035278, "logits/rejected": -0.826056957244873, "logps/chosen": -0.5311759114265442, "logps/rejected": -1.3534917831420898, "loss": 1.0176, "nll_loss": 0.7402997612953186, "rewards/accuracies": 0.875, "rewards/chosen": -0.05311759561300278, "rewards/margins": 0.08223158121109009, "rewards/rejected": -0.13534918427467346, "step": 3853 }, { "epoch": 2.3510751868232425, "grad_norm": 9.129517555236816, "learning_rate": 4.2532761788120016e-06, "log_odds_chosen": 1.8121485710144043, "log_odds_ratio": -0.36436575651168823, "logits/chosen": -0.8821384906768799, "logits/rejected": -0.9261325001716614, "logps/chosen": -0.8715810775756836, "logps/rejected": -2.364777088165283, "loss": 1.0781, "nll_loss": 1.1345489025115967, "rewards/accuracies": 0.875, "rewards/chosen": -0.08715811371803284, "rewards/margins": 0.1493196040391922, "rewards/rejected": -0.23647770285606384, "step": 3854 }, { "epoch": 2.3516852219002593, "grad_norm": 1.1679260730743408, "learning_rate": 4.2522963870177585e-06, "log_odds_chosen": 2.4801905155181885, "log_odds_ratio": -0.617263674736023, "logits/chosen": -0.698650598526001, "logits/rejected": -0.7337030172348022, "logps/chosen": -0.8064042925834656, "logps/rejected": -2.952080726623535, "loss": 1.1242, "nll_loss": 0.9557286500930786, "rewards/accuracies": 0.625, "rewards/chosen": -0.08064042031764984, "rewards/margins": 0.214567631483078, "rewards/rejected": -0.29520806670188904, "step": 3855 }, { "epoch": 2.352295256977276, "grad_norm": 2.7306597232818604, "learning_rate": 4.251316595223515e-06, "log_odds_chosen": 1.4381344318389893, "log_odds_ratio": -0.3418954610824585, "logits/chosen": -1.0308235883712769, "logits/rejected": -0.9328039884567261, "logps/chosen": -0.7195460200309753, "logps/rejected": -1.7044298648834229, "loss": 1.0856, "nll_loss": 1.0028715133666992, "rewards/accuracies": 0.875, "rewards/chosen": -0.07195460051298141, "rewards/margins": 0.09848837554454803, "rewards/rejected": -0.17044298350811005, "step": 3856 }, { "epoch": 2.352905292054293, "grad_norm": 1.2303394079208374, "learning_rate": 4.250336803429272e-06, "log_odds_chosen": 1.085838794708252, "log_odds_ratio": -0.5267061591148376, "logits/chosen": -0.9161326885223389, "logits/rejected": -0.7530105113983154, "logps/chosen": -0.8343712091445923, "logps/rejected": -1.679144263267517, "loss": 0.9959, "nll_loss": 0.9805300235748291, "rewards/accuracies": 0.5, "rewards/chosen": -0.08343712240457535, "rewards/margins": 0.08447732031345367, "rewards/rejected": -0.16791443526744843, "step": 3857 }, { "epoch": 2.3535153271313103, "grad_norm": 1.8116437196731567, "learning_rate": 4.249357011635028e-06, "log_odds_chosen": 1.4518811702728271, "log_odds_ratio": -0.498374342918396, "logits/chosen": -0.8036307096481323, "logits/rejected": -0.915189266204834, "logps/chosen": -1.1250537633895874, "logps/rejected": -2.138615131378174, "loss": 1.1187, "nll_loss": 1.0999900102615356, "rewards/accuracies": 0.625, "rewards/chosen": -0.11250537633895874, "rewards/margins": 0.1013561338186264, "rewards/rejected": -0.21386151015758514, "step": 3858 }, { "epoch": 2.354125362208327, "grad_norm": 2.0400564670562744, "learning_rate": 4.248377219840783e-06, "log_odds_chosen": 1.5482385158538818, "log_odds_ratio": -0.4597189724445343, "logits/chosen": -0.7123672962188721, "logits/rejected": -0.807030975818634, "logps/chosen": -0.7041927576065063, "logps/rejected": -1.8891587257385254, "loss": 0.9706, "nll_loss": 0.8322288393974304, "rewards/accuracies": 0.625, "rewards/chosen": -0.07041928172111511, "rewards/margins": 0.1184965968132019, "rewards/rejected": -0.18891587853431702, "step": 3859 }, { "epoch": 2.354735397285344, "grad_norm": 7.4789018630981445, "learning_rate": 4.24739742804654e-06, "log_odds_chosen": 1.0578861236572266, "log_odds_ratio": -0.643048107624054, "logits/chosen": -0.8207041025161743, "logits/rejected": -0.8833701610565186, "logps/chosen": -1.0657079219818115, "logps/rejected": -2.105663776397705, "loss": 1.1193, "nll_loss": 1.2948424816131592, "rewards/accuracies": 0.75, "rewards/chosen": -0.10657079517841339, "rewards/margins": 0.10399559140205383, "rewards/rejected": -0.21056637167930603, "step": 3860 }, { "epoch": 2.355345432362361, "grad_norm": 12.878621101379395, "learning_rate": 4.246417636252296e-06, "log_odds_chosen": 0.5795618891716003, "log_odds_ratio": -0.5231304168701172, "logits/chosen": -1.0298899412155151, "logits/rejected": -0.9556430578231812, "logps/chosen": -0.8840954303741455, "logps/rejected": -1.3478333950042725, "loss": 1.0329, "nll_loss": 0.9996918439865112, "rewards/accuracies": 0.5, "rewards/chosen": -0.08840955048799515, "rewards/margins": 0.046373799443244934, "rewards/rejected": -0.13478335738182068, "step": 3861 }, { "epoch": 2.3559554674393777, "grad_norm": 1.4383584260940552, "learning_rate": 4.245437844458052e-06, "log_odds_chosen": 1.6710593700408936, "log_odds_ratio": -0.4441390931606293, "logits/chosen": -0.7296159267425537, "logits/rejected": -0.7705780267715454, "logps/chosen": -0.7915953397750854, "logps/rejected": -2.1512386798858643, "loss": 0.9099, "nll_loss": 0.9185308814048767, "rewards/accuracies": 0.5, "rewards/chosen": -0.07915952801704407, "rewards/margins": 0.13596436381340027, "rewards/rejected": -0.21512387692928314, "step": 3862 }, { "epoch": 2.3565655025163945, "grad_norm": 3.534628391265869, "learning_rate": 4.244458052663809e-06, "log_odds_chosen": 1.1446055173873901, "log_odds_ratio": -0.6129634976387024, "logits/chosen": -0.7089569568634033, "logits/rejected": -0.7741296291351318, "logps/chosen": -0.7114595174789429, "logps/rejected": -1.6693685054779053, "loss": 1.1633, "nll_loss": 0.8948661088943481, "rewards/accuracies": 0.5, "rewards/chosen": -0.07114595174789429, "rewards/margins": 0.09579089283943176, "rewards/rejected": -0.16693685948848724, "step": 3863 }, { "epoch": 2.357175537593412, "grad_norm": 5.92274284362793, "learning_rate": 4.243478260869565e-06, "log_odds_chosen": 1.314561367034912, "log_odds_ratio": -0.4125615358352661, "logits/chosen": -0.8228933811187744, "logits/rejected": -0.8924185037612915, "logps/chosen": -0.8164547085762024, "logps/rejected": -1.7887744903564453, "loss": 0.9018, "nll_loss": 0.8964249491691589, "rewards/accuracies": 0.75, "rewards/chosen": -0.08164547383785248, "rewards/margins": 0.09723199158906937, "rewards/rejected": -0.17887745797634125, "step": 3864 }, { "epoch": 2.3577855726704287, "grad_norm": 3.7207398414611816, "learning_rate": 4.242498469075321e-06, "log_odds_chosen": 3.518805503845215, "log_odds_ratio": -0.4133675694465637, "logits/chosen": -0.8041255474090576, "logits/rejected": -1.1210761070251465, "logps/chosen": -0.7451958656311035, "logps/rejected": -3.7981300354003906, "loss": 1.0555, "nll_loss": 0.968660295009613, "rewards/accuracies": 0.75, "rewards/chosen": -0.07451959699392319, "rewards/margins": 0.3052934408187866, "rewards/rejected": -0.379813015460968, "step": 3865 }, { "epoch": 2.3583956077474455, "grad_norm": 1.464617133140564, "learning_rate": 4.2415186772810776e-06, "log_odds_chosen": 1.9733353853225708, "log_odds_ratio": -0.24028244614601135, "logits/chosen": -0.7646255493164062, "logits/rejected": -0.9270279407501221, "logps/chosen": -0.592795193195343, "logps/rejected": -1.8257240056991577, "loss": 0.8772, "nll_loss": 0.9379678964614868, "rewards/accuracies": 1.0, "rewards/chosen": -0.05927952378988266, "rewards/margins": 0.12329288572072983, "rewards/rejected": -0.1825723946094513, "step": 3866 }, { "epoch": 2.3590056428244623, "grad_norm": 1.5363352298736572, "learning_rate": 4.240538885486834e-06, "log_odds_chosen": 1.6142308712005615, "log_odds_ratio": -0.3386822044849396, "logits/chosen": -0.9381616711616516, "logits/rejected": -0.8537185192108154, "logps/chosen": -0.6198549866676331, "logps/rejected": -1.7884401082992554, "loss": 1.0475, "nll_loss": 0.7563760280609131, "rewards/accuracies": 1.0, "rewards/chosen": -0.061985500156879425, "rewards/margins": 0.11685851216316223, "rewards/rejected": -0.17884401977062225, "step": 3867 }, { "epoch": 2.359615677901479, "grad_norm": 1.407214879989624, "learning_rate": 4.239559093692591e-06, "log_odds_chosen": 2.1058731079101562, "log_odds_ratio": -0.32502180337905884, "logits/chosen": -0.9270694255828857, "logits/rejected": -0.9761348366737366, "logps/chosen": -0.6668249368667603, "logps/rejected": -2.2107625007629395, "loss": 1.1143, "nll_loss": 0.9696822166442871, "rewards/accuracies": 0.875, "rewards/chosen": -0.06668249517679214, "rewards/margins": 0.1543937623500824, "rewards/rejected": -0.22107625007629395, "step": 3868 }, { "epoch": 2.3602257129784965, "grad_norm": 1.626758337020874, "learning_rate": 4.238579301898347e-06, "log_odds_chosen": 1.2460641860961914, "log_odds_ratio": -0.4881402254104614, "logits/chosen": -0.8472775220870972, "logits/rejected": -0.8974766731262207, "logps/chosen": -0.8258445262908936, "logps/rejected": -1.677988052368164, "loss": 0.9837, "nll_loss": 0.8069911003112793, "rewards/accuracies": 0.625, "rewards/chosen": -0.0825844556093216, "rewards/margins": 0.08521434664726257, "rewards/rejected": -0.16779880225658417, "step": 3869 }, { "epoch": 2.3608357480555133, "grad_norm": 10.484962463378906, "learning_rate": 4.237599510104103e-06, "log_odds_chosen": 0.8301517963409424, "log_odds_ratio": -0.38711556792259216, "logits/chosen": -0.9745680093765259, "logits/rejected": -0.9641396403312683, "logps/chosen": -1.0699129104614258, "logps/rejected": -1.6444218158721924, "loss": 1.0608, "nll_loss": 1.0463013648986816, "rewards/accuracies": 1.0, "rewards/chosen": -0.10699129104614258, "rewards/margins": 0.05745089426636696, "rewards/rejected": -0.16444218158721924, "step": 3870 }, { "epoch": 2.36144578313253, "grad_norm": 1.5309923887252808, "learning_rate": 4.236619718309859e-06, "log_odds_chosen": 0.08309070765972137, "log_odds_ratio": -0.7112706303596497, "logits/chosen": -1.0120073556900024, "logits/rejected": -0.9875375628471375, "logps/chosen": -1.0270895957946777, "logps/rejected": -1.078128695487976, "loss": 1.0993, "nll_loss": 1.2410616874694824, "rewards/accuracies": 0.625, "rewards/chosen": -0.10270895808935165, "rewards/margins": 0.005103913601487875, "rewards/rejected": -0.10781286656856537, "step": 3871 }, { "epoch": 2.362055818209547, "grad_norm": 1.818657398223877, "learning_rate": 4.235639926515615e-06, "log_odds_chosen": 0.4292556643486023, "log_odds_ratio": -0.5381692051887512, "logits/chosen": -0.9916304349899292, "logits/rejected": -0.7164685726165771, "logps/chosen": -0.9940678477287292, "logps/rejected": -1.2889299392700195, "loss": 0.9461, "nll_loss": 1.1519371271133423, "rewards/accuracies": 0.75, "rewards/chosen": -0.09940677881240845, "rewards/margins": 0.02948623150587082, "rewards/rejected": -0.12889301776885986, "step": 3872 }, { "epoch": 2.362665853286564, "grad_norm": 1.1097090244293213, "learning_rate": 4.234660134721371e-06, "log_odds_chosen": 2.0003159046173096, "log_odds_ratio": -0.5165330767631531, "logits/chosen": -0.9645727872848511, "logits/rejected": -1.0411382913589478, "logps/chosen": -0.8851012587547302, "logps/rejected": -2.4886677265167236, "loss": 0.8362, "nll_loss": 1.0530623197555542, "rewards/accuracies": 0.625, "rewards/chosen": -0.08851012587547302, "rewards/margins": 0.16035665571689606, "rewards/rejected": -0.24886678159236908, "step": 3873 }, { "epoch": 2.3632758883635807, "grad_norm": 2.904163122177124, "learning_rate": 4.233680342927128e-06, "log_odds_chosen": 2.1164157390594482, "log_odds_ratio": -0.47543373703956604, "logits/chosen": -0.873790442943573, "logits/rejected": -1.0244669914245605, "logps/chosen": -0.7056785821914673, "logps/rejected": -2.4526238441467285, "loss": 1.162, "nll_loss": 1.0211200714111328, "rewards/accuracies": 0.5, "rewards/chosen": -0.07056786119937897, "rewards/margins": 0.17469453811645508, "rewards/rejected": -0.24526238441467285, "step": 3874 }, { "epoch": 2.363885923440598, "grad_norm": 10.629159927368164, "learning_rate": 4.232700551132884e-06, "log_odds_chosen": -0.09548310935497284, "log_odds_ratio": -0.762832522392273, "logits/chosen": -0.8541202545166016, "logits/rejected": -0.8148660659790039, "logps/chosen": -0.9352942109107971, "logps/rejected": -0.8700515031814575, "loss": 1.0819, "nll_loss": 0.9712916612625122, "rewards/accuracies": 0.375, "rewards/chosen": -0.09352941811084747, "rewards/margins": -0.006524272263050079, "rewards/rejected": -0.0870051458477974, "step": 3875 }, { "epoch": 2.364495958517615, "grad_norm": 1.078884243965149, "learning_rate": 4.23172075933864e-06, "log_odds_chosen": 1.2744169235229492, "log_odds_ratio": -0.4783787131309509, "logits/chosen": -0.6697185039520264, "logits/rejected": -0.8144369125366211, "logps/chosen": -0.8113970756530762, "logps/rejected": -1.8281199932098389, "loss": 1.0882, "nll_loss": 0.8930909633636475, "rewards/accuracies": 0.875, "rewards/chosen": -0.0811396986246109, "rewards/margins": 0.10167229175567627, "rewards/rejected": -0.18281199038028717, "step": 3876 }, { "epoch": 2.3651059935946317, "grad_norm": 2.0692636966705322, "learning_rate": 4.230740967544397e-06, "log_odds_chosen": 1.9310615062713623, "log_odds_ratio": -0.37664780020713806, "logits/chosen": -0.8383450508117676, "logits/rejected": -0.8988173604011536, "logps/chosen": -0.6996256113052368, "logps/rejected": -2.2273988723754883, "loss": 0.9522, "nll_loss": 0.890518069267273, "rewards/accuracies": 0.75, "rewards/chosen": -0.06996256113052368, "rewards/margins": 0.15277734398841858, "rewards/rejected": -0.22273990511894226, "step": 3877 }, { "epoch": 2.3657160286716485, "grad_norm": 1.4189658164978027, "learning_rate": 4.229761175750153e-06, "log_odds_chosen": 0.800984263420105, "log_odds_ratio": -0.6363456845283508, "logits/chosen": -0.9795562028884888, "logits/rejected": -0.9531080722808838, "logps/chosen": -1.0245840549468994, "logps/rejected": -1.5778555870056152, "loss": 1.0885, "nll_loss": 1.2279053926467896, "rewards/accuracies": 0.75, "rewards/chosen": -0.1024584174156189, "rewards/margins": 0.0553271509706974, "rewards/rejected": -0.157785564661026, "step": 3878 }, { "epoch": 2.3663260637486654, "grad_norm": 1.3479377031326294, "learning_rate": 4.228781383955909e-06, "log_odds_chosen": 0.20027735829353333, "log_odds_ratio": -0.7733340263366699, "logits/chosen": -1.1560369729995728, "logits/rejected": -1.043057918548584, "logps/chosen": -1.2052288055419922, "logps/rejected": -1.4299273490905762, "loss": 1.1591, "nll_loss": 1.1575604677200317, "rewards/accuracies": 0.5, "rewards/chosen": -0.1205228865146637, "rewards/margins": 0.022469839081168175, "rewards/rejected": -0.14299273490905762, "step": 3879 }, { "epoch": 2.3669360988256827, "grad_norm": 1.6155016422271729, "learning_rate": 4.227801592161666e-06, "log_odds_chosen": 0.6438573598861694, "log_odds_ratio": -0.6387315988540649, "logits/chosen": -0.6862471699714661, "logits/rejected": -0.7202239036560059, "logps/chosen": -0.7965254783630371, "logps/rejected": -1.2011499404907227, "loss": 1.0285, "nll_loss": 0.8981741666793823, "rewards/accuracies": 0.5, "rewards/chosen": -0.07965254783630371, "rewards/margins": 0.04046245291829109, "rewards/rejected": -0.1201149970293045, "step": 3880 }, { "epoch": 2.3675461339026995, "grad_norm": 0.8709188103675842, "learning_rate": 4.226821800367422e-06, "log_odds_chosen": 1.1765973567962646, "log_odds_ratio": -0.500275731086731, "logits/chosen": -1.0677306652069092, "logits/rejected": -0.9193627834320068, "logps/chosen": -0.8629252910614014, "logps/rejected": -1.6816670894622803, "loss": 1.1876, "nll_loss": 1.1482336521148682, "rewards/accuracies": 0.75, "rewards/chosen": -0.08629253506660461, "rewards/margins": 0.08187417685985565, "rewards/rejected": -0.16816671192646027, "step": 3881 }, { "epoch": 2.3681561689797164, "grad_norm": 1.9657397270202637, "learning_rate": 4.225842008573177e-06, "log_odds_chosen": 0.40670347213745117, "log_odds_ratio": -0.9172256588935852, "logits/chosen": -1.0095089673995972, "logits/rejected": -0.9837193489074707, "logps/chosen": -1.3850646018981934, "logps/rejected": -1.7376245260238647, "loss": 1.2156, "nll_loss": 1.4956557750701904, "rewards/accuracies": 0.5, "rewards/chosen": -0.1385064572095871, "rewards/margins": 0.035256002098321915, "rewards/rejected": -0.1737624555826187, "step": 3882 }, { "epoch": 2.368766204056733, "grad_norm": 1.578853964805603, "learning_rate": 4.224862216778934e-06, "log_odds_chosen": 1.167975664138794, "log_odds_ratio": -0.5629892945289612, "logits/chosen": -0.8997477889060974, "logits/rejected": -0.750585675239563, "logps/chosen": -0.774610698223114, "logps/rejected": -1.601219892501831, "loss": 1.0767, "nll_loss": 1.0378259420394897, "rewards/accuracies": 0.5, "rewards/chosen": -0.07746107131242752, "rewards/margins": 0.08266092091798782, "rewards/rejected": -0.16012199223041534, "step": 3883 }, { "epoch": 2.36937623913375, "grad_norm": 1.6708678007125854, "learning_rate": 4.22388242498469e-06, "log_odds_chosen": 1.9717779159545898, "log_odds_ratio": -0.30058616399765015, "logits/chosen": -0.6713961958885193, "logits/rejected": -0.7198128700256348, "logps/chosen": -0.6870705485343933, "logps/rejected": -2.065290689468384, "loss": 1.0151, "nll_loss": 0.8275880813598633, "rewards/accuracies": 0.875, "rewards/chosen": -0.06870706379413605, "rewards/margins": 0.1378220021724701, "rewards/rejected": -0.20652905106544495, "step": 3884 }, { "epoch": 2.369986274210767, "grad_norm": 1.0996973514556885, "learning_rate": 4.222902633190447e-06, "log_odds_chosen": 1.6287643909454346, "log_odds_ratio": -0.3502918481826782, "logits/chosen": -0.6627320051193237, "logits/rejected": -0.8633120059967041, "logps/chosen": -0.8440263867378235, "logps/rejected": -2.057950973510742, "loss": 1.1078, "nll_loss": 0.9847897887229919, "rewards/accuracies": 0.875, "rewards/chosen": -0.08440263569355011, "rewards/margins": 0.12139247357845306, "rewards/rejected": -0.20579510927200317, "step": 3885 }, { "epoch": 2.370596309287784, "grad_norm": 1.3187719583511353, "learning_rate": 4.221922841396203e-06, "log_odds_chosen": 3.692185401916504, "log_odds_ratio": -0.2022382766008377, "logits/chosen": -0.8100305199623108, "logits/rejected": -0.9432787299156189, "logps/chosen": -0.49308285117149353, "logps/rejected": -3.5072779655456543, "loss": 0.9012, "nll_loss": 0.6525975465774536, "rewards/accuracies": 0.875, "rewards/chosen": -0.04930828511714935, "rewards/margins": 0.30141952633857727, "rewards/rejected": -0.35072779655456543, "step": 3886 }, { "epoch": 2.371206344364801, "grad_norm": 1.7448571920394897, "learning_rate": 4.2209430496019595e-06, "log_odds_chosen": -0.004912368953227997, "log_odds_ratio": -0.7086331844329834, "logits/chosen": -0.8154033422470093, "logits/rejected": -0.7883826494216919, "logps/chosen": -0.8523698449134827, "logps/rejected": -0.8647733330726624, "loss": 1.0973, "nll_loss": 1.1746220588684082, "rewards/accuracies": 0.375, "rewards/chosen": -0.08523698151111603, "rewards/margins": 0.0012403512373566628, "rewards/rejected": -0.08647733926773071, "step": 3887 }, { "epoch": 2.371816379441818, "grad_norm": 1.5778409242630005, "learning_rate": 4.219963257807716e-06, "log_odds_chosen": 2.025012969970703, "log_odds_ratio": -0.4089454710483551, "logits/chosen": -0.9680811762809753, "logits/rejected": -0.8029263019561768, "logps/chosen": -0.9485001564025879, "logps/rejected": -2.649458885192871, "loss": 1.2032, "nll_loss": 1.1188937425613403, "rewards/accuracies": 0.625, "rewards/chosen": -0.09485001862049103, "rewards/margins": 0.17009587585926056, "rewards/rejected": -0.2649458944797516, "step": 3888 }, { "epoch": 2.3724264145188347, "grad_norm": 1.6128817796707153, "learning_rate": 4.218983466013472e-06, "log_odds_chosen": 1.2816627025604248, "log_odds_ratio": -0.43441537022590637, "logits/chosen": -0.7794024348258972, "logits/rejected": -0.9886749982833862, "logps/chosen": -0.9016965627670288, "logps/rejected": -1.6462013721466064, "loss": 1.1461, "nll_loss": 1.2701106071472168, "rewards/accuracies": 0.75, "rewards/chosen": -0.09016966074705124, "rewards/margins": 0.07445047795772552, "rewards/rejected": -0.16462013125419617, "step": 3889 }, { "epoch": 2.3730364495958516, "grad_norm": 15.09008502960205, "learning_rate": 4.218003674219228e-06, "log_odds_chosen": 0.2240377962589264, "log_odds_ratio": -0.7072640657424927, "logits/chosen": -0.7864174246788025, "logits/rejected": -0.8899081945419312, "logps/chosen": -1.0929090976715088, "logps/rejected": -1.231745958328247, "loss": 1.1868, "nll_loss": 1.1742900609970093, "rewards/accuracies": 0.5, "rewards/chosen": -0.10929092019796371, "rewards/margins": 0.013883685693144798, "rewards/rejected": -0.12317460030317307, "step": 3890 }, { "epoch": 2.373646484672869, "grad_norm": 1.478873610496521, "learning_rate": 4.217023882424985e-06, "log_odds_chosen": 0.7162947058677673, "log_odds_ratio": -0.5756965279579163, "logits/chosen": -0.9043248891830444, "logits/rejected": -0.9088051319122314, "logps/chosen": -1.1554783582687378, "logps/rejected": -1.6912037134170532, "loss": 1.279, "nll_loss": 1.2679433822631836, "rewards/accuracies": 0.625, "rewards/chosen": -0.11554783582687378, "rewards/margins": 0.05357252433896065, "rewards/rejected": -0.16912035644054413, "step": 3891 }, { "epoch": 2.3742565197498857, "grad_norm": 1.2705981731414795, "learning_rate": 4.216044090630741e-06, "log_odds_chosen": 1.8734192848205566, "log_odds_ratio": -0.4946618974208832, "logits/chosen": -0.7060058116912842, "logits/rejected": -0.966965913772583, "logps/chosen": -0.8345456123352051, "logps/rejected": -2.311215400695801, "loss": 1.0464, "nll_loss": 1.0875418186187744, "rewards/accuracies": 0.5, "rewards/chosen": -0.08345456421375275, "rewards/margins": 0.14766699075698853, "rewards/rejected": -0.23112156987190247, "step": 3892 }, { "epoch": 2.3748665548269026, "grad_norm": 1.4452253580093384, "learning_rate": 4.215064298836496e-06, "log_odds_chosen": 0.0488973893225193, "log_odds_ratio": -0.7251821756362915, "logits/chosen": -0.8656131029129028, "logits/rejected": -0.8090947866439819, "logps/chosen": -0.7220322489738464, "logps/rejected": -0.7546747922897339, "loss": 1.0567, "nll_loss": 1.0144639015197754, "rewards/accuracies": 0.5, "rewards/chosen": -0.07220322638750076, "rewards/margins": 0.003264258150011301, "rewards/rejected": -0.07546748220920563, "step": 3893 }, { "epoch": 2.3754765899039194, "grad_norm": 1.1813918352127075, "learning_rate": 4.214084507042253e-06, "log_odds_chosen": 1.773909568786621, "log_odds_ratio": -0.41537049412727356, "logits/chosen": -0.8757885694503784, "logits/rejected": -0.9143730401992798, "logps/chosen": -1.0130892992019653, "logps/rejected": -2.4426584243774414, "loss": 1.1107, "nll_loss": 1.1627657413482666, "rewards/accuracies": 0.75, "rewards/chosen": -0.1013089269399643, "rewards/margins": 0.1429569125175476, "rewards/rejected": -0.2442658543586731, "step": 3894 }, { "epoch": 2.3760866249809363, "grad_norm": 1.442916989326477, "learning_rate": 4.213104715248009e-06, "log_odds_chosen": 1.4657025337219238, "log_odds_ratio": -0.42781564593315125, "logits/chosen": -0.8654043078422546, "logits/rejected": -0.9179784655570984, "logps/chosen": -0.7607423067092896, "logps/rejected": -1.8294868469238281, "loss": 0.9242, "nll_loss": 0.9673962593078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.07607423514127731, "rewards/margins": 0.10687445849180222, "rewards/rejected": -0.18294867873191833, "step": 3895 }, { "epoch": 2.376696660057953, "grad_norm": 6.003244400024414, "learning_rate": 4.212124923453766e-06, "log_odds_chosen": 1.6110620498657227, "log_odds_ratio": -0.4190259277820587, "logits/chosen": -0.4977237582206726, "logits/rejected": -0.709253191947937, "logps/chosen": -0.6394940614700317, "logps/rejected": -1.8840103149414062, "loss": 0.879, "nll_loss": 0.7565564513206482, "rewards/accuracies": 0.875, "rewards/chosen": -0.06394940614700317, "rewards/margins": 0.12445162236690521, "rewards/rejected": -0.1884010285139084, "step": 3896 }, { "epoch": 2.3773066951349704, "grad_norm": 3.192255735397339, "learning_rate": 4.2111451316595224e-06, "log_odds_chosen": 2.745823860168457, "log_odds_ratio": -0.7861001491546631, "logits/chosen": -0.6678766012191772, "logits/rejected": -0.8035784959793091, "logps/chosen": -0.7890718579292297, "logps/rejected": -3.2465901374816895, "loss": 1.0897, "nll_loss": 0.9427876472473145, "rewards/accuracies": 0.5, "rewards/chosen": -0.07890719175338745, "rewards/margins": 0.24575181305408478, "rewards/rejected": -0.3246590197086334, "step": 3897 }, { "epoch": 2.3779167302119872, "grad_norm": 7.886340141296387, "learning_rate": 4.2101653398652786e-06, "log_odds_chosen": 2.7294816970825195, "log_odds_ratio": -0.37691038846969604, "logits/chosen": -0.704139769077301, "logits/rejected": -0.8176714181900024, "logps/chosen": -0.5274269580841064, "logps/rejected": -2.6516575813293457, "loss": 0.9054, "nll_loss": 0.747061014175415, "rewards/accuracies": 0.75, "rewards/chosen": -0.052742697298526764, "rewards/margins": 0.21242307126522064, "rewards/rejected": -0.2651657462120056, "step": 3898 }, { "epoch": 2.378526765289004, "grad_norm": 1.7467358112335205, "learning_rate": 4.209185548071035e-06, "log_odds_chosen": 3.6054136753082275, "log_odds_ratio": -0.39132097363471985, "logits/chosen": -0.6237008571624756, "logits/rejected": -0.7683576345443726, "logps/chosen": -0.869312047958374, "logps/rejected": -4.028870582580566, "loss": 1.0599, "nll_loss": 1.0391548871994019, "rewards/accuracies": 0.75, "rewards/chosen": -0.08693120628595352, "rewards/margins": 0.3159558176994324, "rewards/rejected": -0.4028870463371277, "step": 3899 }, { "epoch": 2.379136800366021, "grad_norm": 1.2772184610366821, "learning_rate": 4.208205756276791e-06, "log_odds_chosen": 3.0938820838928223, "log_odds_ratio": -0.47211575508117676, "logits/chosen": -0.9262301325798035, "logits/rejected": -0.9131186008453369, "logps/chosen": -0.8200598359107971, "logps/rejected": -3.396549701690674, "loss": 1.0507, "nll_loss": 1.0489307641983032, "rewards/accuracies": 0.625, "rewards/chosen": -0.08200598508119583, "rewards/margins": 0.2576489746570587, "rewards/rejected": -0.33965495228767395, "step": 3900 }, { "epoch": 2.379746835443038, "grad_norm": 1.471700668334961, "learning_rate": 4.207225964482547e-06, "log_odds_chosen": 0.9613568186759949, "log_odds_ratio": -0.41576820611953735, "logits/chosen": -0.8676627278327942, "logits/rejected": -0.8630779385566711, "logps/chosen": -0.700423538684845, "logps/rejected": -1.2697081565856934, "loss": 1.1542, "nll_loss": 0.881440281867981, "rewards/accuracies": 0.75, "rewards/chosen": -0.07004235684871674, "rewards/margins": 0.05692845582962036, "rewards/rejected": -0.1269708126783371, "step": 3901 }, { "epoch": 2.380356870520055, "grad_norm": 4.225200176239014, "learning_rate": 4.206246172688304e-06, "log_odds_chosen": 4.090996742248535, "log_odds_ratio": -0.15509666502475739, "logits/chosen": -0.5897617340087891, "logits/rejected": -0.8706395626068115, "logps/chosen": -0.8021934032440186, "logps/rejected": -4.118764400482178, "loss": 0.847, "nll_loss": 0.7800190448760986, "rewards/accuracies": 1.0, "rewards/chosen": -0.0802193433046341, "rewards/margins": 0.3316571116447449, "rewards/rejected": -0.41187649965286255, "step": 3902 }, { "epoch": 2.380966905597072, "grad_norm": 1.2983006238937378, "learning_rate": 4.20526638089406e-06, "log_odds_chosen": 2.031148910522461, "log_odds_ratio": -0.5501842498779297, "logits/chosen": -1.0377097129821777, "logits/rejected": -1.0379894971847534, "logps/chosen": -0.9636944532394409, "logps/rejected": -2.740114450454712, "loss": 1.2306, "nll_loss": 1.1511249542236328, "rewards/accuracies": 0.75, "rewards/chosen": -0.09636944532394409, "rewards/margins": 0.17764201760292053, "rewards/rejected": -0.2740114629268646, "step": 3903 }, { "epoch": 2.3815769406740888, "grad_norm": 2.7991526126861572, "learning_rate": 4.204286589099816e-06, "log_odds_chosen": 1.0784709453582764, "log_odds_ratio": -0.47951531410217285, "logits/chosen": -0.7304609417915344, "logits/rejected": -0.8789843320846558, "logps/chosen": -1.0605663061141968, "logps/rejected": -1.8951497077941895, "loss": 1.0112, "nll_loss": 1.166139841079712, "rewards/accuracies": 0.75, "rewards/chosen": -0.10605663061141968, "rewards/margins": 0.08345834165811539, "rewards/rejected": -0.18951496481895447, "step": 3904 }, { "epoch": 2.3821869757511056, "grad_norm": 1.1746649742126465, "learning_rate": 4.203306797305572e-06, "log_odds_chosen": 1.5495786666870117, "log_odds_ratio": -0.42176732420921326, "logits/chosen": -0.8574966192245483, "logits/rejected": -0.9281332492828369, "logps/chosen": -0.619789719581604, "logps/rejected": -1.694551944732666, "loss": 1.0119, "nll_loss": 0.939698338508606, "rewards/accuracies": 0.75, "rewards/chosen": -0.06197896972298622, "rewards/margins": 0.10747623443603516, "rewards/rejected": -0.16945520043373108, "step": 3905 }, { "epoch": 2.3827970108281225, "grad_norm": 1.2722382545471191, "learning_rate": 4.202327005511328e-06, "log_odds_chosen": 0.9563171863555908, "log_odds_ratio": -0.5036914944648743, "logits/chosen": -1.00098717212677, "logits/rejected": -0.9523628950119019, "logps/chosen": -0.9639416933059692, "logps/rejected": -1.7577909231185913, "loss": 1.0449, "nll_loss": 1.0876661539077759, "rewards/accuracies": 0.625, "rewards/chosen": -0.09639417380094528, "rewards/margins": 0.07938491553068161, "rewards/rejected": -0.1757790893316269, "step": 3906 }, { "epoch": 2.3834070459051393, "grad_norm": 1.269568681716919, "learning_rate": 4.2013472137170845e-06, "log_odds_chosen": 1.3518189191818237, "log_odds_ratio": -0.4855145215988159, "logits/chosen": -1.0739576816558838, "logits/rejected": -0.9110101461410522, "logps/chosen": -0.9264966249465942, "logps/rejected": -2.010223627090454, "loss": 1.1093, "nll_loss": 1.2475051879882812, "rewards/accuracies": 0.75, "rewards/chosen": -0.0926496684551239, "rewards/margins": 0.10837268084287643, "rewards/rejected": -0.20102235674858093, "step": 3907 }, { "epoch": 2.3840170809821566, "grad_norm": 4.101064682006836, "learning_rate": 4.2003674219228415e-06, "log_odds_chosen": 2.891984224319458, "log_odds_ratio": -0.2270069420337677, "logits/chosen": -0.7918058037757874, "logits/rejected": -0.8751806616783142, "logps/chosen": -0.4115256369113922, "logps/rejected": -2.5219624042510986, "loss": 0.8736, "nll_loss": 0.6572765707969666, "rewards/accuracies": 0.875, "rewards/chosen": -0.04115256294608116, "rewards/margins": 0.21104365587234497, "rewards/rejected": -0.2521962523460388, "step": 3908 }, { "epoch": 2.3846271160591734, "grad_norm": 1.4798392057418823, "learning_rate": 4.199387630128598e-06, "log_odds_chosen": 2.581329107284546, "log_odds_ratio": -0.21266457438468933, "logits/chosen": -0.7611092329025269, "logits/rejected": -0.8745107650756836, "logps/chosen": -0.6725620031356812, "logps/rejected": -2.56766414642334, "loss": 1.0385, "nll_loss": 0.8708239793777466, "rewards/accuracies": 0.875, "rewards/chosen": -0.06725619733333588, "rewards/margins": 0.18951021134853363, "rewards/rejected": -0.2567664384841919, "step": 3909 }, { "epoch": 2.3852371511361903, "grad_norm": 1.3607810735702515, "learning_rate": 4.1984078383343546e-06, "log_odds_chosen": 1.776300311088562, "log_odds_ratio": -0.3240668773651123, "logits/chosen": -0.8424259424209595, "logits/rejected": -0.8657300472259521, "logps/chosen": -0.5528539419174194, "logps/rejected": -1.4851875305175781, "loss": 1.0401, "nll_loss": 1.086155652999878, "rewards/accuracies": 0.875, "rewards/chosen": -0.05528539419174194, "rewards/margins": 0.09323335438966751, "rewards/rejected": -0.14851875603199005, "step": 3910 }, { "epoch": 2.385847186213207, "grad_norm": 2.0672264099121094, "learning_rate": 4.19742804654011e-06, "log_odds_chosen": 2.363469362258911, "log_odds_ratio": -0.1915355771780014, "logits/chosen": -0.7876918911933899, "logits/rejected": -0.9069675803184509, "logps/chosen": -0.8426257371902466, "logps/rejected": -2.7245540618896484, "loss": 1.0345, "nll_loss": 0.8395590782165527, "rewards/accuracies": 1.0, "rewards/chosen": -0.08426257967948914, "rewards/margins": 0.18819284439086914, "rewards/rejected": -0.27245545387268066, "step": 3911 }, { "epoch": 2.3864572212902244, "grad_norm": 3.1281371116638184, "learning_rate": 4.196448254745866e-06, "log_odds_chosen": 1.5428797006607056, "log_odds_ratio": -0.5888376832008362, "logits/chosen": -0.9320118427276611, "logits/rejected": -1.1155189275741577, "logps/chosen": -0.8824830055236816, "logps/rejected": -2.0699944496154785, "loss": 1.1721, "nll_loss": 1.123833417892456, "rewards/accuracies": 0.625, "rewards/chosen": -0.08824830502271652, "rewards/margins": 0.1187511458992958, "rewards/rejected": -0.20699943602085114, "step": 3912 }, { "epoch": 2.3870672563672413, "grad_norm": 1.6113911867141724, "learning_rate": 4.195468462951623e-06, "log_odds_chosen": 1.2436271905899048, "log_odds_ratio": -0.47690120339393616, "logits/chosen": -0.6461934447288513, "logits/rejected": -0.7265094518661499, "logps/chosen": -0.6182629466056824, "logps/rejected": -1.4318169355392456, "loss": 1.0448, "nll_loss": 0.9190176129341125, "rewards/accuracies": 0.875, "rewards/chosen": -0.06182629615068436, "rewards/margins": 0.08135540038347244, "rewards/rejected": -0.1431816965341568, "step": 3913 }, { "epoch": 2.387677291444258, "grad_norm": 1.2794703245162964, "learning_rate": 4.194488671157379e-06, "log_odds_chosen": 1.8446475267410278, "log_odds_ratio": -0.45958036184310913, "logits/chosen": -1.0190509557724, "logits/rejected": -1.0183699131011963, "logps/chosen": -0.9200867414474487, "logps/rejected": -2.422441005706787, "loss": 1.0282, "nll_loss": 1.0375043153762817, "rewards/accuracies": 0.75, "rewards/chosen": -0.09200866520404816, "rewards/margins": 0.15023542940616608, "rewards/rejected": -0.24224409461021423, "step": 3914 }, { "epoch": 2.388287326521275, "grad_norm": 1.7759052515029907, "learning_rate": 4.193508879363135e-06, "log_odds_chosen": 1.017541766166687, "log_odds_ratio": -0.5617265105247498, "logits/chosen": -0.9229756593704224, "logits/rejected": -0.9285171031951904, "logps/chosen": -0.9914008975028992, "logps/rejected": -1.709825873374939, "loss": 1.2815, "nll_loss": 1.1738301515579224, "rewards/accuracies": 0.75, "rewards/chosen": -0.09914009273052216, "rewards/margins": 0.0718424916267395, "rewards/rejected": -0.17098259925842285, "step": 3915 }, { "epoch": 2.388897361598292, "grad_norm": 1.1150010824203491, "learning_rate": 4.192529087568891e-06, "log_odds_chosen": 3.008553981781006, "log_odds_ratio": -0.32439956068992615, "logits/chosen": -0.4770296812057495, "logits/rejected": -0.667560875415802, "logps/chosen": -0.6712393164634705, "logps/rejected": -3.153752326965332, "loss": 1.0661, "nll_loss": 0.8221766948699951, "rewards/accuracies": 0.875, "rewards/chosen": -0.06712393462657928, "rewards/margins": 0.2482513189315796, "rewards/rejected": -0.3153752386569977, "step": 3916 }, { "epoch": 2.3895073966753086, "grad_norm": 3.207195281982422, "learning_rate": 4.191549295774647e-06, "log_odds_chosen": 1.876122236251831, "log_odds_ratio": -0.31638553738594055, "logits/chosen": -0.6979920864105225, "logits/rejected": -0.8405364155769348, "logps/chosen": -0.6911320090293884, "logps/rejected": -1.6638209819793701, "loss": 0.9786, "nll_loss": 0.9361172318458557, "rewards/accuracies": 0.875, "rewards/chosen": -0.06911320239305496, "rewards/margins": 0.09726891666650772, "rewards/rejected": -0.1663821041584015, "step": 3917 }, { "epoch": 2.390117431752326, "grad_norm": 4.2810187339782715, "learning_rate": 4.1905695039804035e-06, "log_odds_chosen": 1.7371734380722046, "log_odds_ratio": -0.39678502082824707, "logits/chosen": -0.9118887782096863, "logits/rejected": -0.9355485439300537, "logps/chosen": -0.9742792844772339, "logps/rejected": -2.4663214683532715, "loss": 1.1855, "nll_loss": 1.0543098449707031, "rewards/accuracies": 0.75, "rewards/chosen": -0.09742792695760727, "rewards/margins": 0.14920422434806824, "rewards/rejected": -0.2466321587562561, "step": 3918 }, { "epoch": 2.390727466829343, "grad_norm": 2.2684977054595947, "learning_rate": 4.1895897121861605e-06, "log_odds_chosen": 0.37202024459838867, "log_odds_ratio": -0.6785159707069397, "logits/chosen": -0.9136311411857605, "logits/rejected": -0.7648258209228516, "logps/chosen": -0.9191599488258362, "logps/rejected": -1.0353193283081055, "loss": 1.1277, "nll_loss": 1.2304203510284424, "rewards/accuracies": 0.5, "rewards/chosen": -0.09191599488258362, "rewards/margins": 0.011615936644375324, "rewards/rejected": -0.10353193432092667, "step": 3919 }, { "epoch": 2.3913375019063596, "grad_norm": 1.0271799564361572, "learning_rate": 4.188609920391917e-06, "log_odds_chosen": 1.36980140209198, "log_odds_ratio": -0.5375928282737732, "logits/chosen": -0.8558515310287476, "logits/rejected": -0.8618645071983337, "logps/chosen": -0.8639935851097107, "logps/rejected": -1.73883056640625, "loss": 1.0099, "nll_loss": 1.1486537456512451, "rewards/accuracies": 0.625, "rewards/chosen": -0.08639935404062271, "rewards/margins": 0.08748369663953781, "rewards/rejected": -0.17388306558132172, "step": 3920 }, { "epoch": 2.3919475369833765, "grad_norm": 1.3050252199172974, "learning_rate": 4.187630128597673e-06, "log_odds_chosen": 2.470252752304077, "log_odds_ratio": -0.28498390316963196, "logits/chosen": -0.5985651612281799, "logits/rejected": -0.7575374841690063, "logps/chosen": -0.5541539788246155, "logps/rejected": -2.389108419418335, "loss": 1.0049, "nll_loss": 0.8700408935546875, "rewards/accuracies": 0.875, "rewards/chosen": -0.05541539564728737, "rewards/margins": 0.18349546194076538, "rewards/rejected": -0.23891085386276245, "step": 3921 }, { "epoch": 2.3925575720603933, "grad_norm": 2.328806161880493, "learning_rate": 4.186650336803429e-06, "log_odds_chosen": 1.5340546369552612, "log_odds_ratio": -0.42452549934387207, "logits/chosen": -0.8017457723617554, "logits/rejected": -0.7791569232940674, "logps/chosen": -0.7706159353256226, "logps/rejected": -1.7253895998001099, "loss": 1.0403, "nll_loss": 0.9335912466049194, "rewards/accuracies": 0.75, "rewards/chosen": -0.07706159353256226, "rewards/margins": 0.09547737240791321, "rewards/rejected": -0.17253896594047546, "step": 3922 }, { "epoch": 2.3931676071374106, "grad_norm": 2.0526809692382812, "learning_rate": 4.185670545009185e-06, "log_odds_chosen": 1.1592954397201538, "log_odds_ratio": -0.6603236198425293, "logits/chosen": -0.9533284902572632, "logits/rejected": -0.9671717882156372, "logps/chosen": -1.0682014226913452, "logps/rejected": -2.006535768508911, "loss": 1.0492, "nll_loss": 1.1211726665496826, "rewards/accuracies": 0.5, "rewards/chosen": -0.10682014375925064, "rewards/margins": 0.09383343905210495, "rewards/rejected": -0.2006535828113556, "step": 3923 }, { "epoch": 2.3937776422144275, "grad_norm": 3.2242636680603027, "learning_rate": 4.184690753214942e-06, "log_odds_chosen": 1.1454863548278809, "log_odds_ratio": -0.5775333642959595, "logits/chosen": -0.9173632860183716, "logits/rejected": -0.9116469621658325, "logps/chosen": -0.9807479977607727, "logps/rejected": -1.8530616760253906, "loss": 1.053, "nll_loss": 0.9451025724411011, "rewards/accuracies": 0.625, "rewards/chosen": -0.09807479381561279, "rewards/margins": 0.08723136782646179, "rewards/rejected": -0.18530616164207458, "step": 3924 }, { "epoch": 2.3943876772914443, "grad_norm": 3.2201194763183594, "learning_rate": 4.183710961420698e-06, "log_odds_chosen": 2.054379940032959, "log_odds_ratio": -0.31533342599868774, "logits/chosen": -0.8703881502151489, "logits/rejected": -1.0181772708892822, "logps/chosen": -0.8072379231452942, "logps/rejected": -2.3688786029815674, "loss": 0.9383, "nll_loss": 1.0552207231521606, "rewards/accuracies": 0.75, "rewards/chosen": -0.08072379231452942, "rewards/margins": 0.15616407990455627, "rewards/rejected": -0.2368878722190857, "step": 3925 }, { "epoch": 2.394997712368461, "grad_norm": 2.520585775375366, "learning_rate": 4.182731169626454e-06, "log_odds_chosen": 1.5641671419143677, "log_odds_ratio": -0.3659813702106476, "logits/chosen": -0.9178447723388672, "logits/rejected": -0.9911339282989502, "logps/chosen": -0.6749070882797241, "logps/rejected": -1.7211588621139526, "loss": 0.964, "nll_loss": 0.7985630035400391, "rewards/accuracies": 0.875, "rewards/chosen": -0.06749071180820465, "rewards/margins": 0.10462518036365509, "rewards/rejected": -0.17211589217185974, "step": 3926 }, { "epoch": 2.395607747445478, "grad_norm": 9.465228080749512, "learning_rate": 4.181751377832211e-06, "log_odds_chosen": 1.1824623346328735, "log_odds_ratio": -0.9053522944450378, "logits/chosen": -0.849376380443573, "logits/rejected": -0.9093557596206665, "logps/chosen": -1.1228621006011963, "logps/rejected": -2.0749597549438477, "loss": 1.1581, "nll_loss": 1.1512925624847412, "rewards/accuracies": 0.5, "rewards/chosen": -0.11228621006011963, "rewards/margins": 0.0952097550034523, "rewards/rejected": -0.20749597251415253, "step": 3927 }, { "epoch": 2.396217782522495, "grad_norm": 1.771892786026001, "learning_rate": 4.1807715860379665e-06, "log_odds_chosen": 0.9470299482345581, "log_odds_ratio": -0.40442484617233276, "logits/chosen": -0.9348737597465515, "logits/rejected": -0.8765934109687805, "logps/chosen": -0.7970561981201172, "logps/rejected": -1.4502410888671875, "loss": 0.9651, "nll_loss": 0.928965151309967, "rewards/accuracies": 0.625, "rewards/chosen": -0.0797056257724762, "rewards/margins": 0.06531848013401031, "rewards/rejected": -0.1450241059064865, "step": 3928 }, { "epoch": 2.396827817599512, "grad_norm": 1.4239708185195923, "learning_rate": 4.179791794243723e-06, "log_odds_chosen": 2.8515446186065674, "log_odds_ratio": -0.2857133448123932, "logits/chosen": -0.70588219165802, "logits/rejected": -0.9311230182647705, "logps/chosen": -0.6056065559387207, "logps/rejected": -2.819439649581909, "loss": 0.9013, "nll_loss": 0.8553131818771362, "rewards/accuracies": 0.75, "rewards/chosen": -0.06056065484881401, "rewards/margins": 0.22138334810733795, "rewards/rejected": -0.2819439768791199, "step": 3929 }, { "epoch": 2.397437852676529, "grad_norm": 1.1110016107559204, "learning_rate": 4.1788120024494796e-06, "log_odds_chosen": 1.0839290618896484, "log_odds_ratio": -0.5001997947692871, "logits/chosen": -1.0814673900604248, "logits/rejected": -1.0617749691009521, "logps/chosen": -1.0412108898162842, "logps/rejected": -1.994509220123291, "loss": 1.2678, "nll_loss": 1.2470844984054565, "rewards/accuracies": 0.75, "rewards/chosen": -0.10412108898162842, "rewards/margins": 0.09532984346151352, "rewards/rejected": -0.19945093989372253, "step": 3930 }, { "epoch": 2.398047887753546, "grad_norm": 1.2952866554260254, "learning_rate": 4.177832210655236e-06, "log_odds_chosen": 0.6636565923690796, "log_odds_ratio": -0.6481530666351318, "logits/chosen": -0.745581865310669, "logits/rejected": -0.9219915270805359, "logps/chosen": -0.729758620262146, "logps/rejected": -1.1486923694610596, "loss": 0.9026, "nll_loss": 0.8237044811248779, "rewards/accuracies": 0.5, "rewards/chosen": -0.07297585904598236, "rewards/margins": 0.041893370449543, "rewards/rejected": -0.11486922949552536, "step": 3931 }, { "epoch": 2.3986579228305627, "grad_norm": 1.4416712522506714, "learning_rate": 4.176852418860992e-06, "log_odds_chosen": 3.0700926780700684, "log_odds_ratio": -0.2236626148223877, "logits/chosen": -0.7268919348716736, "logits/rejected": -0.9543445110321045, "logps/chosen": -0.5795446634292603, "logps/rejected": -2.937012195587158, "loss": 0.8958, "nll_loss": 0.8657371997833252, "rewards/accuracies": 0.875, "rewards/chosen": -0.057954464107751846, "rewards/margins": 0.23574674129486084, "rewards/rejected": -0.2937012016773224, "step": 3932 }, { "epoch": 2.3992679579075795, "grad_norm": 8.939238548278809, "learning_rate": 4.175872627066748e-06, "log_odds_chosen": 1.819286584854126, "log_odds_ratio": -0.3656074106693268, "logits/chosen": -0.728431761264801, "logits/rejected": -0.8604109287261963, "logps/chosen": -0.6136094927787781, "logps/rejected": -1.8385367393493652, "loss": 1.0109, "nll_loss": 0.7478933930397034, "rewards/accuracies": 0.75, "rewards/chosen": -0.06136094778776169, "rewards/margins": 0.1224927306175232, "rewards/rejected": -0.18385367095470428, "step": 3933 }, { "epoch": 2.399877992984597, "grad_norm": 3.881653308868408, "learning_rate": 4.174892835272504e-06, "log_odds_chosen": 2.6852364540100098, "log_odds_ratio": -0.2528154253959656, "logits/chosen": -0.9280350208282471, "logits/rejected": -1.0481446981430054, "logps/chosen": -0.720953106880188, "logps/rejected": -2.7411396503448486, "loss": 0.9791, "nll_loss": 0.9457122087478638, "rewards/accuracies": 0.875, "rewards/chosen": -0.07209530472755432, "rewards/margins": 0.20201867818832397, "rewards/rejected": -0.2741139829158783, "step": 3934 }, { "epoch": 2.4004880280616137, "grad_norm": 4.701658725738525, "learning_rate": 4.17391304347826e-06, "log_odds_chosen": 1.0185083150863647, "log_odds_ratio": -0.6490785479545593, "logits/chosen": -0.9535336494445801, "logits/rejected": -0.7029327154159546, "logps/chosen": -1.0056777000427246, "logps/rejected": -1.9800100326538086, "loss": 1.1887, "nll_loss": 1.2359362840652466, "rewards/accuracies": 0.625, "rewards/chosen": -0.1005677729845047, "rewards/margins": 0.09743323922157288, "rewards/rejected": -0.19800101220607758, "step": 3935 }, { "epoch": 2.4010980631386305, "grad_norm": 2.594896078109741, "learning_rate": 4.172933251684017e-06, "log_odds_chosen": 0.8716482520103455, "log_odds_ratio": -0.5228211879730225, "logits/chosen": -1.0029512643814087, "logits/rejected": -0.8831565380096436, "logps/chosen": -0.9316202402114868, "logps/rejected": -1.550235390663147, "loss": 1.1999, "nll_loss": 1.074547290802002, "rewards/accuracies": 0.625, "rewards/chosen": -0.09316202253103256, "rewards/margins": 0.06186152994632721, "rewards/rejected": -0.15502354502677917, "step": 3936 }, { "epoch": 2.4017080982156473, "grad_norm": 1.291258692741394, "learning_rate": 4.171953459889773e-06, "log_odds_chosen": 1.5765583515167236, "log_odds_ratio": -0.44619220495224, "logits/chosen": -0.8200530409812927, "logits/rejected": -0.9655821323394775, "logps/chosen": -0.7178801894187927, "logps/rejected": -1.8370695114135742, "loss": 0.9639, "nll_loss": 0.8518433570861816, "rewards/accuracies": 0.75, "rewards/chosen": -0.07178802043199539, "rewards/margins": 0.11191892623901367, "rewards/rejected": -0.18370695412158966, "step": 3937 }, { "epoch": 2.402318133292664, "grad_norm": 1.546505331993103, "learning_rate": 4.17097366809553e-06, "log_odds_chosen": 1.8888070583343506, "log_odds_ratio": -0.3516363203525543, "logits/chosen": -0.8401468992233276, "logits/rejected": -0.9540204405784607, "logps/chosen": -0.6117013692855835, "logps/rejected": -1.910367727279663, "loss": 1.0992, "nll_loss": 0.839520275592804, "rewards/accuracies": 0.875, "rewards/chosen": -0.06117013841867447, "rewards/margins": 0.12986664474010468, "rewards/rejected": -0.19103679060935974, "step": 3938 }, { "epoch": 2.402928168369681, "grad_norm": 8.412747383117676, "learning_rate": 4.1699938763012855e-06, "log_odds_chosen": -0.15318617224693298, "log_odds_ratio": -0.9429612159729004, "logits/chosen": -1.011349081993103, "logits/rejected": -0.947752058506012, "logps/chosen": -1.2613706588745117, "logps/rejected": -1.163679838180542, "loss": 1.185, "nll_loss": 1.3709607124328613, "rewards/accuracies": 0.5, "rewards/chosen": -0.12613706290721893, "rewards/margins": -0.009769079275429249, "rewards/rejected": -0.11636798083782196, "step": 3939 }, { "epoch": 2.4035382034466983, "grad_norm": 2.7778995037078857, "learning_rate": 4.169014084507042e-06, "log_odds_chosen": 1.825218915939331, "log_odds_ratio": -0.4049835801124573, "logits/chosen": -0.6851568818092346, "logits/rejected": -0.8006423711776733, "logps/chosen": -0.7091966867446899, "logps/rejected": -2.1051692962646484, "loss": 1.0287, "nll_loss": 0.8414514660835266, "rewards/accuracies": 0.625, "rewards/chosen": -0.07091967016458511, "rewards/margins": 0.13959729671478271, "rewards/rejected": -0.21051695942878723, "step": 3940 }, { "epoch": 2.404148238523715, "grad_norm": 1.7059158086776733, "learning_rate": 4.168034292712799e-06, "log_odds_chosen": 0.9644880294799805, "log_odds_ratio": -0.47287023067474365, "logits/chosen": -0.8549529314041138, "logits/rejected": -0.840908944606781, "logps/chosen": -0.6627872586250305, "logps/rejected": -1.3911755084991455, "loss": 0.9468, "nll_loss": 0.9231002330780029, "rewards/accuracies": 0.625, "rewards/chosen": -0.06627872586250305, "rewards/margins": 0.07283882051706314, "rewards/rejected": -0.1391175538301468, "step": 3941 }, { "epoch": 2.404758273600732, "grad_norm": 1.8372011184692383, "learning_rate": 4.167054500918555e-06, "log_odds_chosen": 1.5428071022033691, "log_odds_ratio": -0.4786812663078308, "logits/chosen": -0.8827941417694092, "logits/rejected": -0.861477255821228, "logps/chosen": -0.7419000864028931, "logps/rejected": -1.9097455739974976, "loss": 0.9548, "nll_loss": 0.8394606709480286, "rewards/accuracies": 0.75, "rewards/chosen": -0.07419000566005707, "rewards/margins": 0.11678454279899597, "rewards/rejected": -0.19097456336021423, "step": 3942 }, { "epoch": 2.405368308677749, "grad_norm": 1.555417776107788, "learning_rate": 4.166074709124311e-06, "log_odds_chosen": 1.5370711088180542, "log_odds_ratio": -0.5513356924057007, "logits/chosen": -0.9656710624694824, "logits/rejected": -1.0129880905151367, "logps/chosen": -0.7137662172317505, "logps/rejected": -1.8413877487182617, "loss": 1.1188, "nll_loss": 1.0490849018096924, "rewards/accuracies": 0.75, "rewards/chosen": -0.07137662172317505, "rewards/margins": 0.11276214569807053, "rewards/rejected": -0.18413877487182617, "step": 3943 }, { "epoch": 2.4059783437547657, "grad_norm": 1.3235162496566772, "learning_rate": 4.165094917330068e-06, "log_odds_chosen": 2.130303382873535, "log_odds_ratio": -0.3526371121406555, "logits/chosen": -0.637824296951294, "logits/rejected": -0.7300437092781067, "logps/chosen": -0.6561853885650635, "logps/rejected": -2.2041592597961426, "loss": 0.9949, "nll_loss": 0.8862555027008057, "rewards/accuracies": 0.75, "rewards/chosen": -0.06561853736639023, "rewards/margins": 0.15479739010334015, "rewards/rejected": -0.22041592001914978, "step": 3944 }, { "epoch": 2.406588378831783, "grad_norm": 1.569880723953247, "learning_rate": 4.164115125535823e-06, "log_odds_chosen": 1.5867708921432495, "log_odds_ratio": -0.38823288679122925, "logits/chosen": -0.8833070993423462, "logits/rejected": -0.971686065196991, "logps/chosen": -0.6715250611305237, "logps/rejected": -1.7765374183654785, "loss": 1.0929, "nll_loss": 1.0071766376495361, "rewards/accuracies": 0.75, "rewards/chosen": -0.06715250760316849, "rewards/margins": 0.1105012595653534, "rewards/rejected": -0.17765375971794128, "step": 3945 }, { "epoch": 2.4071984139088, "grad_norm": 1.2614200115203857, "learning_rate": 4.163135333741579e-06, "log_odds_chosen": 1.09515380859375, "log_odds_ratio": -0.6595960855484009, "logits/chosen": -1.1363048553466797, "logits/rejected": -0.8904391527175903, "logps/chosen": -0.9962357878684998, "logps/rejected": -1.8793933391571045, "loss": 1.0777, "nll_loss": 1.187451958656311, "rewards/accuracies": 0.625, "rewards/chosen": -0.09962357580661774, "rewards/margins": 0.08831577003002167, "rewards/rejected": -0.1879393458366394, "step": 3946 }, { "epoch": 2.4078084489858167, "grad_norm": 2.3985722064971924, "learning_rate": 4.162155541947336e-06, "log_odds_chosen": 0.9331408143043518, "log_odds_ratio": -0.6156731247901917, "logits/chosen": -0.8484128713607788, "logits/rejected": -0.7836604714393616, "logps/chosen": -0.8983463048934937, "logps/rejected": -1.6640679836273193, "loss": 1.0681, "nll_loss": 1.0368926525115967, "rewards/accuracies": 0.5, "rewards/chosen": -0.08983463793992996, "rewards/margins": 0.07657216489315033, "rewards/rejected": -0.1664068102836609, "step": 3947 }, { "epoch": 2.4084184840628335, "grad_norm": 2.257876396179199, "learning_rate": 4.161175750153092e-06, "log_odds_chosen": 0.47177523374557495, "log_odds_ratio": -0.6171818375587463, "logits/chosen": -0.9078033566474915, "logits/rejected": -0.8790246248245239, "logps/chosen": -0.9996058344841003, "logps/rejected": -1.281135082244873, "loss": 1.1805, "nll_loss": 1.1609865427017212, "rewards/accuracies": 0.375, "rewards/chosen": -0.09996059536933899, "rewards/margins": 0.028152916580438614, "rewards/rejected": -0.1281135082244873, "step": 3948 }, { "epoch": 2.4090285191398504, "grad_norm": 2.0846707820892334, "learning_rate": 4.160195958358849e-06, "log_odds_chosen": 1.2476191520690918, "log_odds_ratio": -0.5245611071586609, "logits/chosen": -0.9080413579940796, "logits/rejected": -0.983295202255249, "logps/chosen": -0.8676263093948364, "logps/rejected": -1.830873727798462, "loss": 0.9466, "nll_loss": 1.001164436340332, "rewards/accuracies": 0.75, "rewards/chosen": -0.08676262944936752, "rewards/margins": 0.09632475674152374, "rewards/rejected": -0.18308737874031067, "step": 3949 }, { "epoch": 2.4096385542168672, "grad_norm": 8.00775146484375, "learning_rate": 4.1592161665646045e-06, "log_odds_chosen": 1.8601844310760498, "log_odds_ratio": -0.3973293900489807, "logits/chosen": -0.9951446652412415, "logits/rejected": -1.056185007095337, "logps/chosen": -0.842542290687561, "logps/rejected": -2.3385305404663086, "loss": 1.1479, "nll_loss": 1.0806617736816406, "rewards/accuracies": 0.75, "rewards/chosen": -0.08425422012805939, "rewards/margins": 0.1495988517999649, "rewards/rejected": -0.2338530719280243, "step": 3950 }, { "epoch": 2.4102485892938845, "grad_norm": 1.4961978197097778, "learning_rate": 4.158236374770361e-06, "log_odds_chosen": 1.2303285598754883, "log_odds_ratio": -0.42390137910842896, "logits/chosen": -0.5036446452140808, "logits/rejected": -0.5416945815086365, "logps/chosen": -0.651196300983429, "logps/rejected": -1.4523003101348877, "loss": 1.1757, "nll_loss": 0.830580472946167, "rewards/accuracies": 0.75, "rewards/chosen": -0.06511963158845901, "rewards/margins": 0.08011040091514587, "rewards/rejected": -0.1452300250530243, "step": 3951 }, { "epoch": 2.4108586243709014, "grad_norm": 21.335325241088867, "learning_rate": 4.157256582976118e-06, "log_odds_chosen": 1.847318410873413, "log_odds_ratio": -0.2786933183670044, "logits/chosen": -0.8913816213607788, "logits/rejected": -0.9215169548988342, "logps/chosen": -0.7035033702850342, "logps/rejected": -1.9671579599380493, "loss": 1.1022, "nll_loss": 1.0095036029815674, "rewards/accuracies": 0.875, "rewards/chosen": -0.07035034149885178, "rewards/margins": 0.12636546790599823, "rewards/rejected": -0.1967158019542694, "step": 3952 }, { "epoch": 2.411468659447918, "grad_norm": 6.039161205291748, "learning_rate": 4.156276791181874e-06, "log_odds_chosen": 0.9947299957275391, "log_odds_ratio": -0.5458722710609436, "logits/chosen": -0.9818813800811768, "logits/rejected": -1.1370927095413208, "logps/chosen": -1.0067222118377686, "logps/rejected": -1.9165709018707275, "loss": 1.163, "nll_loss": 1.1744160652160645, "rewards/accuracies": 0.625, "rewards/chosen": -0.10067223012447357, "rewards/margins": 0.09098486602306366, "rewards/rejected": -0.19165709614753723, "step": 3953 }, { "epoch": 2.412078694524935, "grad_norm": 1.6415960788726807, "learning_rate": 4.15529699938763e-06, "log_odds_chosen": 1.6608507633209229, "log_odds_ratio": -0.37162238359451294, "logits/chosen": -0.8170780539512634, "logits/rejected": -0.8018803000450134, "logps/chosen": -0.6629860401153564, "logps/rejected": -1.8462722301483154, "loss": 1.0413, "nll_loss": 1.0111860036849976, "rewards/accuracies": 0.75, "rewards/chosen": -0.06629861146211624, "rewards/margins": 0.11832861602306366, "rewards/rejected": -0.1846272200345993, "step": 3954 }, { "epoch": 2.4126887296019524, "grad_norm": 11.94420051574707, "learning_rate": 4.154317207593387e-06, "log_odds_chosen": 1.1980311870574951, "log_odds_ratio": -0.5348905324935913, "logits/chosen": -1.0068204402923584, "logits/rejected": -0.9349639415740967, "logps/chosen": -0.9081258773803711, "logps/rejected": -1.7552645206451416, "loss": 1.1477, "nll_loss": 1.3552733659744263, "rewards/accuracies": 0.625, "rewards/chosen": -0.0908125787973404, "rewards/margins": 0.0847138836979866, "rewards/rejected": -0.1755264699459076, "step": 3955 }, { "epoch": 2.413298764678969, "grad_norm": 1.674168348312378, "learning_rate": 4.153337415799142e-06, "log_odds_chosen": 0.9822320938110352, "log_odds_ratio": -0.5027878284454346, "logits/chosen": -0.8545803427696228, "logits/rejected": -0.8188158273696899, "logps/chosen": -0.8254404664039612, "logps/rejected": -1.5356873273849487, "loss": 1.0356, "nll_loss": 1.1227315664291382, "rewards/accuracies": 0.625, "rewards/chosen": -0.08254404366016388, "rewards/margins": 0.07102467864751816, "rewards/rejected": -0.15356871485710144, "step": 3956 }, { "epoch": 2.413908799755986, "grad_norm": 2.311084508895874, "learning_rate": 4.152357624004898e-06, "log_odds_chosen": 2.9596643447875977, "log_odds_ratio": -0.2578417956829071, "logits/chosen": -0.6898974180221558, "logits/rejected": -0.8970990777015686, "logps/chosen": -0.5069757103919983, "logps/rejected": -2.773819923400879, "loss": 0.9768, "nll_loss": 0.6692302823066711, "rewards/accuracies": 0.875, "rewards/chosen": -0.05069757252931595, "rewards/margins": 0.22668440639972687, "rewards/rejected": -0.277381956577301, "step": 3957 }, { "epoch": 2.414518834833003, "grad_norm": 3.0980899333953857, "learning_rate": 4.151377832210655e-06, "log_odds_chosen": 2.1461234092712402, "log_odds_ratio": -0.38486042618751526, "logits/chosen": -0.5985862016677856, "logits/rejected": -0.7449844479560852, "logps/chosen": -0.6583192348480225, "logps/rejected": -2.2573628425598145, "loss": 0.9189, "nll_loss": 0.7985731363296509, "rewards/accuracies": 0.875, "rewards/chosen": -0.06583192199468613, "rewards/margins": 0.159904345870018, "rewards/rejected": -0.22573626041412354, "step": 3958 }, { "epoch": 2.4151288699100197, "grad_norm": 1.2964355945587158, "learning_rate": 4.150398040416411e-06, "log_odds_chosen": 2.6346263885498047, "log_odds_ratio": -0.19452139735221863, "logits/chosen": -0.8428938388824463, "logits/rejected": -0.9386836290359497, "logps/chosen": -0.6289932727813721, "logps/rejected": -2.663546562194824, "loss": 0.9921, "nll_loss": 0.8347455859184265, "rewards/accuracies": 1.0, "rewards/chosen": -0.06289932876825333, "rewards/margins": 0.2034553438425064, "rewards/rejected": -0.26635468006134033, "step": 3959 }, { "epoch": 2.4157389049870366, "grad_norm": 1.2655318975448608, "learning_rate": 4.1494182486221675e-06, "log_odds_chosen": 0.9804688692092896, "log_odds_ratio": -0.4647696614265442, "logits/chosen": -0.8815598487854004, "logits/rejected": -0.9707659482955933, "logps/chosen": -0.789036214351654, "logps/rejected": -1.4375871419906616, "loss": 0.9965, "nll_loss": 1.0619224309921265, "rewards/accuracies": 0.75, "rewards/chosen": -0.07890362292528152, "rewards/margins": 0.06485508382320404, "rewards/rejected": -0.14375871419906616, "step": 3960 }, { "epoch": 2.416348940064054, "grad_norm": 1.5972391366958618, "learning_rate": 4.1484384568279244e-06, "log_odds_chosen": 2.011672019958496, "log_odds_ratio": -0.3909597396850586, "logits/chosen": -0.6462357640266418, "logits/rejected": -0.9737759828567505, "logps/chosen": -0.6658480763435364, "logps/rejected": -2.283992290496826, "loss": 0.8861, "nll_loss": 0.7958141565322876, "rewards/accuracies": 0.75, "rewards/chosen": -0.06658481061458588, "rewards/margins": 0.16181442141532898, "rewards/rejected": -0.22839923202991486, "step": 3961 }, { "epoch": 2.4169589751410707, "grad_norm": 1.0415985584259033, "learning_rate": 4.14745866503368e-06, "log_odds_chosen": 1.4399073123931885, "log_odds_ratio": -0.5480465292930603, "logits/chosen": -1.001936674118042, "logits/rejected": -0.9643756151199341, "logps/chosen": -0.8734875917434692, "logps/rejected": -1.8555963039398193, "loss": 1.297, "nll_loss": 1.160888910293579, "rewards/accuracies": 0.5, "rewards/chosen": -0.08734877407550812, "rewards/margins": 0.09821085631847382, "rewards/rejected": -0.18555963039398193, "step": 3962 }, { "epoch": 2.4175690102180876, "grad_norm": 1.4986286163330078, "learning_rate": 4.146478873239436e-06, "log_odds_chosen": 1.5612634420394897, "log_odds_ratio": -0.30157122015953064, "logits/chosen": -0.8502963781356812, "logits/rejected": -0.8151975870132446, "logps/chosen": -1.0710595846176147, "logps/rejected": -2.3400378227233887, "loss": 1.1048, "nll_loss": 1.194467544555664, "rewards/accuracies": 1.0, "rewards/chosen": -0.10710595548152924, "rewards/margins": 0.12689784169197083, "rewards/rejected": -0.23400379717350006, "step": 3963 }, { "epoch": 2.4181790452951044, "grad_norm": 1.8232232332229614, "learning_rate": 4.145499081445193e-06, "log_odds_chosen": 2.3004534244537354, "log_odds_ratio": -0.26061803102493286, "logits/chosen": -0.9010784029960632, "logits/rejected": -0.968770444393158, "logps/chosen": -0.8780102729797363, "logps/rejected": -2.6640214920043945, "loss": 1.1631, "nll_loss": 0.9696353673934937, "rewards/accuracies": 0.875, "rewards/chosen": -0.08780103921890259, "rewards/margins": 0.17860111594200134, "rewards/rejected": -0.26640212535858154, "step": 3964 }, { "epoch": 2.4187890803721213, "grad_norm": 1.9646986722946167, "learning_rate": 4.144519289650949e-06, "log_odds_chosen": 1.733018159866333, "log_odds_ratio": -0.43329769372940063, "logits/chosen": -0.756569504737854, "logits/rejected": -0.7405564785003662, "logps/chosen": -0.8362756967544556, "logps/rejected": -2.3195178508758545, "loss": 1.0417, "nll_loss": 1.1524276733398438, "rewards/accuracies": 0.625, "rewards/chosen": -0.08362757414579391, "rewards/margins": 0.14832422137260437, "rewards/rejected": -0.2319517731666565, "step": 3965 }, { "epoch": 2.4193991154491385, "grad_norm": 2.2055413722991943, "learning_rate": 4.143539497856706e-06, "log_odds_chosen": 1.6649343967437744, "log_odds_ratio": -0.44047683477401733, "logits/chosen": -0.7587037682533264, "logits/rejected": -0.9775071144104004, "logps/chosen": -1.0877407789230347, "logps/rejected": -2.4717957973480225, "loss": 1.0615, "nll_loss": 1.1139405965805054, "rewards/accuracies": 0.625, "rewards/chosen": -0.1087740808725357, "rewards/margins": 0.13840550184249878, "rewards/rejected": -0.24717959761619568, "step": 3966 }, { "epoch": 2.4200091505261554, "grad_norm": 1.4618638753890991, "learning_rate": 4.142559706062462e-06, "log_odds_chosen": 1.0668305158615112, "log_odds_ratio": -0.6179268956184387, "logits/chosen": -0.8423967361450195, "logits/rejected": -0.7791250944137573, "logps/chosen": -0.7671372294425964, "logps/rejected": -1.6257517337799072, "loss": 1.0833, "nll_loss": 1.097561240196228, "rewards/accuracies": 0.75, "rewards/chosen": -0.07671371847391129, "rewards/margins": 0.08586146682500839, "rewards/rejected": -0.16257518529891968, "step": 3967 }, { "epoch": 2.4206191856031722, "grad_norm": 1.186140537261963, "learning_rate": 4.141579914268217e-06, "log_odds_chosen": 0.16600562632083893, "log_odds_ratio": -0.7311100363731384, "logits/chosen": -1.0190958976745605, "logits/rejected": -0.9621322154998779, "logps/chosen": -1.025681495666504, "logps/rejected": -1.0332525968551636, "loss": 1.2801, "nll_loss": 1.486778736114502, "rewards/accuracies": 0.375, "rewards/chosen": -0.10256814956665039, "rewards/margins": 0.0007571219466626644, "rewards/rejected": -0.1033252701163292, "step": 3968 }, { "epoch": 2.421229220680189, "grad_norm": 0.969561755657196, "learning_rate": 4.140600122473974e-06, "log_odds_chosen": 1.4427728652954102, "log_odds_ratio": -0.5834457278251648, "logits/chosen": -1.0250942707061768, "logits/rejected": -1.1051199436187744, "logps/chosen": -0.7097722291946411, "logps/rejected": -1.6952301263809204, "loss": 1.0665, "nll_loss": 1.064967155456543, "rewards/accuracies": 0.625, "rewards/chosen": -0.07097722589969635, "rewards/margins": 0.09854577481746674, "rewards/rejected": -0.1695229858160019, "step": 3969 }, { "epoch": 2.421839255757206, "grad_norm": 1.2623181343078613, "learning_rate": 4.13962033067973e-06, "log_odds_chosen": 1.8972855806350708, "log_odds_ratio": -0.3150244951248169, "logits/chosen": -0.899439811706543, "logits/rejected": -0.870725154876709, "logps/chosen": -0.6995154619216919, "logps/rejected": -2.049734354019165, "loss": 1.0627, "nll_loss": 0.9567804336547852, "rewards/accuracies": 0.875, "rewards/chosen": -0.06995154917240143, "rewards/margins": 0.1350218951702118, "rewards/rejected": -0.20497342944145203, "step": 3970 }, { "epoch": 2.422449290834223, "grad_norm": 2.9498229026794434, "learning_rate": 4.1386405388854865e-06, "log_odds_chosen": 0.8998129963874817, "log_odds_ratio": -0.6412692666053772, "logits/chosen": -1.0679014921188354, "logits/rejected": -1.0237420797348022, "logps/chosen": -0.9972176551818848, "logps/rejected": -1.798455834388733, "loss": 1.2094, "nll_loss": 1.384864330291748, "rewards/accuracies": 0.5, "rewards/chosen": -0.09972177445888519, "rewards/margins": 0.08012381196022034, "rewards/rejected": -0.17984558641910553, "step": 3971 }, { "epoch": 2.42305932591124, "grad_norm": 1.0324434041976929, "learning_rate": 4.1376607470912435e-06, "log_odds_chosen": 1.4957690238952637, "log_odds_ratio": -0.3607153296470642, "logits/chosen": -0.8688327074050903, "logits/rejected": -1.0214004516601562, "logps/chosen": -0.7492775321006775, "logps/rejected": -1.8159456253051758, "loss": 1.1129, "nll_loss": 1.2851450443267822, "rewards/accuracies": 0.875, "rewards/chosen": -0.07492775470018387, "rewards/margins": 0.10666681826114655, "rewards/rejected": -0.18159456551074982, "step": 3972 }, { "epoch": 2.423669360988257, "grad_norm": 4.800498962402344, "learning_rate": 4.136680955296999e-06, "log_odds_chosen": 1.4492677450180054, "log_odds_ratio": -0.36386626958847046, "logits/chosen": -0.8716745376586914, "logits/rejected": -0.8839867115020752, "logps/chosen": -0.6891593337059021, "logps/rejected": -1.7606340646743774, "loss": 0.9837, "nll_loss": 0.9491456747055054, "rewards/accuracies": 0.875, "rewards/chosen": -0.06891593337059021, "rewards/margins": 0.1071474701166153, "rewards/rejected": -0.1760634183883667, "step": 3973 }, { "epoch": 2.4242793960652738, "grad_norm": 1.3645238876342773, "learning_rate": 4.135701163502755e-06, "log_odds_chosen": 2.483628034591675, "log_odds_ratio": -0.29885202646255493, "logits/chosen": -0.8632800579071045, "logits/rejected": -0.9464362859725952, "logps/chosen": -0.5137407779693604, "logps/rejected": -2.3335320949554443, "loss": 1.0627, "nll_loss": 1.0235769748687744, "rewards/accuracies": 0.875, "rewards/chosen": -0.051374077796936035, "rewards/margins": 0.18197913467884064, "rewards/rejected": -0.23335321247577667, "step": 3974 }, { "epoch": 2.4248894311422906, "grad_norm": 1.7335505485534668, "learning_rate": 4.134721371708512e-06, "log_odds_chosen": 2.2128405570983887, "log_odds_ratio": -0.31389349699020386, "logits/chosen": -0.7556325793266296, "logits/rejected": -0.8443374633789062, "logps/chosen": -0.7172579169273376, "logps/rejected": -2.458615303039551, "loss": 1.0243, "nll_loss": 0.9441617727279663, "rewards/accuracies": 0.875, "rewards/chosen": -0.07172580063343048, "rewards/margins": 0.1741357445716858, "rewards/rejected": -0.24586156010627747, "step": 3975 }, { "epoch": 2.4254994662193075, "grad_norm": 3.4177322387695312, "learning_rate": 4.133741579914268e-06, "log_odds_chosen": 1.068317174911499, "log_odds_ratio": -0.5266236066818237, "logits/chosen": -1.157911777496338, "logits/rejected": -1.0849432945251465, "logps/chosen": -1.068916916847229, "logps/rejected": -1.9164413213729858, "loss": 1.1572, "nll_loss": 1.2356655597686768, "rewards/accuracies": 0.75, "rewards/chosen": -0.1068917065858841, "rewards/margins": 0.08475242555141449, "rewards/rejected": -0.19164413213729858, "step": 3976 }, { "epoch": 2.4261095012963247, "grad_norm": 4.959366798400879, "learning_rate": 4.132761788120025e-06, "log_odds_chosen": 1.463215947151184, "log_odds_ratio": -0.4183636009693146, "logits/chosen": -0.7905042171478271, "logits/rejected": -0.7901585102081299, "logps/chosen": -0.9100571274757385, "logps/rejected": -2.0428473949432373, "loss": 1.2081, "nll_loss": 1.06667160987854, "rewards/accuracies": 0.75, "rewards/chosen": -0.09100571274757385, "rewards/margins": 0.11327902972698212, "rewards/rejected": -0.20428474247455597, "step": 3977 }, { "epoch": 2.4267195363733416, "grad_norm": 3.941763401031494, "learning_rate": 4.131781996325781e-06, "log_odds_chosen": 1.2651288509368896, "log_odds_ratio": -0.7277805209159851, "logits/chosen": -0.8726680278778076, "logits/rejected": -0.9015929698944092, "logps/chosen": -1.0920864343643188, "logps/rejected": -1.9224019050598145, "loss": 0.9938, "nll_loss": 1.0891895294189453, "rewards/accuracies": 0.75, "rewards/chosen": -0.10920864343643188, "rewards/margins": 0.0830315575003624, "rewards/rejected": -0.19224020838737488, "step": 3978 }, { "epoch": 2.4273295714503584, "grad_norm": 1.5058780908584595, "learning_rate": 4.130802204531536e-06, "log_odds_chosen": 1.043298363685608, "log_odds_ratio": -0.6637181043624878, "logits/chosen": -0.9378571510314941, "logits/rejected": -0.9721625447273254, "logps/chosen": -0.9272719025611877, "logps/rejected": -1.6724363565444946, "loss": 1.0983, "nll_loss": 1.086143136024475, "rewards/accuracies": 0.5, "rewards/chosen": -0.0927271917462349, "rewards/margins": 0.07451644539833069, "rewards/rejected": -0.16724362969398499, "step": 3979 }, { "epoch": 2.4279396065273753, "grad_norm": 1.4491368532180786, "learning_rate": 4.129822412737293e-06, "log_odds_chosen": 1.0134552717208862, "log_odds_ratio": -0.5758593082427979, "logits/chosen": -0.9842895269393921, "logits/rejected": -0.8329980373382568, "logps/chosen": -0.7382550835609436, "logps/rejected": -1.4335088729858398, "loss": 0.9768, "nll_loss": 0.8386445045471191, "rewards/accuracies": 0.625, "rewards/chosen": -0.07382550835609436, "rewards/margins": 0.06952537596225739, "rewards/rejected": -0.14335088431835175, "step": 3980 }, { "epoch": 2.428549641604392, "grad_norm": 1.4022204875946045, "learning_rate": 4.128842620943049e-06, "log_odds_chosen": 1.1144866943359375, "log_odds_ratio": -0.46404752135276794, "logits/chosen": -0.9635143876075745, "logits/rejected": -0.9511457681655884, "logps/chosen": -0.7881898880004883, "logps/rejected": -1.5744531154632568, "loss": 1.2166, "nll_loss": 1.0290449857711792, "rewards/accuracies": 0.75, "rewards/chosen": -0.07881899178028107, "rewards/margins": 0.07862631976604462, "rewards/rejected": -0.15744531154632568, "step": 3981 }, { "epoch": 2.429159676681409, "grad_norm": 1.2422698736190796, "learning_rate": 4.1278628291488055e-06, "log_odds_chosen": 2.6338319778442383, "log_odds_ratio": -0.3933468759059906, "logits/chosen": -0.7998634576797485, "logits/rejected": -1.0294865369796753, "logps/chosen": -0.7747669816017151, "logps/rejected": -2.983154296875, "loss": 0.9901, "nll_loss": 0.8545289039611816, "rewards/accuracies": 0.75, "rewards/chosen": -0.07747670263051987, "rewards/margins": 0.220838725566864, "rewards/rejected": -0.2983154356479645, "step": 3982 }, { "epoch": 2.4297697117584263, "grad_norm": 1.663756251335144, "learning_rate": 4.1268830373545625e-06, "log_odds_chosen": 0.6710303425788879, "log_odds_ratio": -0.5836065411567688, "logits/chosen": -1.0822863578796387, "logits/rejected": -0.9876830577850342, "logps/chosen": -1.0685571432113647, "logps/rejected": -1.3568871021270752, "loss": 0.9577, "nll_loss": 1.2728254795074463, "rewards/accuracies": 0.875, "rewards/chosen": -0.10685570538043976, "rewards/margins": 0.028832996264100075, "rewards/rejected": -0.13568870723247528, "step": 3983 }, { "epoch": 2.430379746835443, "grad_norm": 1.6076663732528687, "learning_rate": 4.125903245560319e-06, "log_odds_chosen": 2.3216845989227295, "log_odds_ratio": -0.28633207082748413, "logits/chosen": -0.7970588803291321, "logits/rejected": -1.010600209236145, "logps/chosen": -0.6940172910690308, "logps/rejected": -2.4948554039001465, "loss": 1.0243, "nll_loss": 0.7996928691864014, "rewards/accuracies": 0.875, "rewards/chosen": -0.06940172612667084, "rewards/margins": 0.18008384108543396, "rewards/rejected": -0.2494855672121048, "step": 3984 }, { "epoch": 2.43098978191246, "grad_norm": 3.113330364227295, "learning_rate": 4.124923453766074e-06, "log_odds_chosen": 2.340449333190918, "log_odds_ratio": -0.41634315252304077, "logits/chosen": -0.5666605234146118, "logits/rejected": -0.7347721457481384, "logps/chosen": -0.6386579275131226, "logps/rejected": -2.531240463256836, "loss": 0.9842, "nll_loss": 0.8672448396682739, "rewards/accuracies": 0.75, "rewards/chosen": -0.0638657957315445, "rewards/margins": 0.18925823271274567, "rewards/rejected": -0.25312405824661255, "step": 3985 }, { "epoch": 2.431599816989477, "grad_norm": 1.0897691249847412, "learning_rate": 4.123943661971831e-06, "log_odds_chosen": 0.46300965547561646, "log_odds_ratio": -0.6183956861495972, "logits/chosen": -0.9824340343475342, "logits/rejected": -0.9201133251190186, "logps/chosen": -0.8394055366516113, "logps/rejected": -1.177737832069397, "loss": 0.9558, "nll_loss": 0.9284405708312988, "rewards/accuracies": 0.5, "rewards/chosen": -0.0839405506849289, "rewards/margins": 0.033833228051662445, "rewards/rejected": -0.11777378618717194, "step": 3986 }, { "epoch": 2.4322098520664936, "grad_norm": 10.71863842010498, "learning_rate": 4.122963870177587e-06, "log_odds_chosen": 1.3502123355865479, "log_odds_ratio": -0.49724406003952026, "logits/chosen": -0.9661071300506592, "logits/rejected": -0.8142659664154053, "logps/chosen": -0.9688727855682373, "logps/rejected": -2.111569881439209, "loss": 1.0504, "nll_loss": 1.0731712579727173, "rewards/accuracies": 0.625, "rewards/chosen": -0.09688727557659149, "rewards/margins": 0.11426973342895508, "rewards/rejected": -0.21115699410438538, "step": 3987 }, { "epoch": 2.432819887143511, "grad_norm": 6.880799770355225, "learning_rate": 4.121984078383343e-06, "log_odds_chosen": 0.7822170257568359, "log_odds_ratio": -0.5135849714279175, "logits/chosen": -0.9453437328338623, "logits/rejected": -0.9205343723297119, "logps/chosen": -0.7784498929977417, "logps/rejected": -1.2076539993286133, "loss": 0.9834, "nll_loss": 1.004684329032898, "rewards/accuracies": 0.75, "rewards/chosen": -0.07784498482942581, "rewards/margins": 0.04292040318250656, "rewards/rejected": -0.12076538801193237, "step": 3988 }, { "epoch": 2.433429922220528, "grad_norm": 1.7938510179519653, "learning_rate": 4.1210042865891e-06, "log_odds_chosen": 2.984121084213257, "log_odds_ratio": -0.2056972235441208, "logits/chosen": -1.0406832695007324, "logits/rejected": -1.0296525955200195, "logps/chosen": -0.7236111164093018, "logps/rejected": -3.1500134468078613, "loss": 1.161, "nll_loss": 0.9256296157836914, "rewards/accuracies": 1.0, "rewards/chosen": -0.07236111164093018, "rewards/margins": 0.24264027178287506, "rewards/rejected": -0.31500136852264404, "step": 3989 }, { "epoch": 2.4340399572975446, "grad_norm": 1.577618956565857, "learning_rate": 4.120024494794855e-06, "log_odds_chosen": 0.3158932626247406, "log_odds_ratio": -0.683213472366333, "logits/chosen": -0.9836444854736328, "logits/rejected": -1.1048989295959473, "logps/chosen": -1.0371630191802979, "logps/rejected": -1.373929500579834, "loss": 1.0776, "nll_loss": 1.1510307788848877, "rewards/accuracies": 0.5, "rewards/chosen": -0.10371631383895874, "rewards/margins": 0.0336766391992569, "rewards/rejected": -0.13739293813705444, "step": 3990 }, { "epoch": 2.4346499923745615, "grad_norm": 3.809664011001587, "learning_rate": 4.119044703000612e-06, "log_odds_chosen": 1.514732837677002, "log_odds_ratio": -0.4077833294868469, "logits/chosen": -0.7110183238983154, "logits/rejected": -0.7692201733589172, "logps/chosen": -0.8736640214920044, "logps/rejected": -1.8556677103042603, "loss": 1.0732, "nll_loss": 1.1574654579162598, "rewards/accuracies": 0.875, "rewards/chosen": -0.08736640214920044, "rewards/margins": 0.09820037335157394, "rewards/rejected": -0.18556678295135498, "step": 3991 }, { "epoch": 2.4352600274515783, "grad_norm": 1.9818062782287598, "learning_rate": 4.1180649112063685e-06, "log_odds_chosen": 0.6685363054275513, "log_odds_ratio": -0.5672813057899475, "logits/chosen": -0.7583861947059631, "logits/rejected": -0.776763379573822, "logps/chosen": -0.69366455078125, "logps/rejected": -1.1499985456466675, "loss": 0.9447, "nll_loss": 0.7949637174606323, "rewards/accuracies": 0.625, "rewards/chosen": -0.069366455078125, "rewards/margins": 0.045633405447006226, "rewards/rejected": -0.11499986052513123, "step": 3992 }, { "epoch": 2.435870062528595, "grad_norm": 2.1227731704711914, "learning_rate": 4.1170851194121246e-06, "log_odds_chosen": 2.633737325668335, "log_odds_ratio": -0.3222983181476593, "logits/chosen": -0.8052576184272766, "logits/rejected": -0.9749875664710999, "logps/chosen": -0.6653146147727966, "logps/rejected": -2.6565961837768555, "loss": 1.0028, "nll_loss": 0.8895390033721924, "rewards/accuracies": 0.875, "rewards/chosen": -0.0665314644575119, "rewards/margins": 0.1991281509399414, "rewards/rejected": -0.2656596302986145, "step": 3993 }, { "epoch": 2.4364800976056125, "grad_norm": 1.2290446758270264, "learning_rate": 4.1161053276178815e-06, "log_odds_chosen": 1.170596957206726, "log_odds_ratio": -0.48540225625038147, "logits/chosen": -0.9545691013336182, "logits/rejected": -0.9764495491981506, "logps/chosen": -0.8500955700874329, "logps/rejected": -1.621840238571167, "loss": 1.1456, "nll_loss": 1.0872783660888672, "rewards/accuracies": 0.625, "rewards/chosen": -0.08500956743955612, "rewards/margins": 0.07717446982860565, "rewards/rejected": -0.16218402981758118, "step": 3994 }, { "epoch": 2.4370901326826293, "grad_norm": 1.5350295305252075, "learning_rate": 4.115125535823638e-06, "log_odds_chosen": 3.2311770915985107, "log_odds_ratio": -0.33318978548049927, "logits/chosen": -0.7598448395729065, "logits/rejected": -1.0065032243728638, "logps/chosen": -0.7406067848205566, "logps/rejected": -3.4531986713409424, "loss": 1.1183, "nll_loss": 0.9546404480934143, "rewards/accuracies": 0.75, "rewards/chosen": -0.07406068593263626, "rewards/margins": 0.2712591588497162, "rewards/rejected": -0.34531983733177185, "step": 3995 }, { "epoch": 2.437700167759646, "grad_norm": 1.7401049137115479, "learning_rate": 4.114145744029393e-06, "log_odds_chosen": 1.7076122760772705, "log_odds_ratio": -0.358569860458374, "logits/chosen": -0.6769351959228516, "logits/rejected": -0.8346085548400879, "logps/chosen": -0.8393694758415222, "logps/rejected": -2.176830291748047, "loss": 1.0406, "nll_loss": 1.1066629886627197, "rewards/accuracies": 0.875, "rewards/chosen": -0.08393694460391998, "rewards/margins": 0.13374607264995575, "rewards/rejected": -0.21768301725387573, "step": 3996 }, { "epoch": 2.438310202836663, "grad_norm": 1.804842472076416, "learning_rate": 4.11316595223515e-06, "log_odds_chosen": 2.5041723251342773, "log_odds_ratio": -0.6255578994750977, "logits/chosen": -0.7956425547599792, "logits/rejected": -0.9126222133636475, "logps/chosen": -0.9300618767738342, "logps/rejected": -3.1033377647399902, "loss": 1.064, "nll_loss": 1.21225106716156, "rewards/accuracies": 0.5, "rewards/chosen": -0.0930061936378479, "rewards/margins": 0.21732762455940247, "rewards/rejected": -0.31033381819725037, "step": 3997 }, { "epoch": 2.43892023791368, "grad_norm": 1.733553409576416, "learning_rate": 4.112186160440906e-06, "log_odds_chosen": 2.6390466690063477, "log_odds_ratio": -0.2755109667778015, "logits/chosen": -0.6111873388290405, "logits/rejected": -0.780420184135437, "logps/chosen": -0.8007665872573853, "logps/rejected": -2.867398738861084, "loss": 1.0606, "nll_loss": 0.8865332007408142, "rewards/accuracies": 0.75, "rewards/chosen": -0.0800766572356224, "rewards/margins": 0.20666322112083435, "rewards/rejected": -0.28673988580703735, "step": 3998 }, { "epoch": 2.439530272990697, "grad_norm": 1.046318531036377, "learning_rate": 4.111206368646662e-06, "log_odds_chosen": 1.2777811288833618, "log_odds_ratio": -0.4541223645210266, "logits/chosen": -0.9518057107925415, "logits/rejected": -0.9079644680023193, "logps/chosen": -0.9398811459541321, "logps/rejected": -1.892717957496643, "loss": 0.9771, "nll_loss": 1.094894289970398, "rewards/accuracies": 0.75, "rewards/chosen": -0.09398810565471649, "rewards/margins": 0.09528367966413498, "rewards/rejected": -0.18927179276943207, "step": 3999 }, { "epoch": 2.440140308067714, "grad_norm": 1.7794406414031982, "learning_rate": 4.110226576852419e-06, "log_odds_chosen": 3.6040337085723877, "log_odds_ratio": -0.36204448342323303, "logits/chosen": -0.7401628494262695, "logits/rejected": -0.8654237389564514, "logps/chosen": -0.7010796070098877, "logps/rejected": -3.7798917293548584, "loss": 0.9459, "nll_loss": 0.9073655605316162, "rewards/accuracies": 0.75, "rewards/chosen": -0.07010795921087265, "rewards/margins": 0.307881236076355, "rewards/rejected": -0.37798917293548584, "step": 4000 }, { "epoch": 2.440750343144731, "grad_norm": 4.001279354095459, "learning_rate": 4.109246785058175e-06, "log_odds_chosen": 1.0134694576263428, "log_odds_ratio": -0.46846669912338257, "logits/chosen": -0.6030614972114563, "logits/rejected": -0.7921934127807617, "logps/chosen": -0.7262508273124695, "logps/rejected": -1.4053523540496826, "loss": 0.9753, "nll_loss": 0.7613164186477661, "rewards/accuracies": 0.875, "rewards/chosen": -0.07262507826089859, "rewards/margins": 0.06791015714406967, "rewards/rejected": -0.14053525030612946, "step": 4001 }, { "epoch": 2.4413603782217477, "grad_norm": 1.2013052701950073, "learning_rate": 4.1082669932639305e-06, "log_odds_chosen": 1.2842609882354736, "log_odds_ratio": -0.9509759545326233, "logits/chosen": -0.6197727918624878, "logits/rejected": -0.5221917629241943, "logps/chosen": -0.9579405784606934, "logps/rejected": -2.047283411026001, "loss": 0.9885, "nll_loss": 0.7473157644271851, "rewards/accuracies": 0.75, "rewards/chosen": -0.09579405933618546, "rewards/margins": 0.10893429070711136, "rewards/rejected": -0.20472835004329681, "step": 4002 }, { "epoch": 2.4419704132987645, "grad_norm": 1.8148691654205322, "learning_rate": 4.1072872014696875e-06, "log_odds_chosen": 2.0473110675811768, "log_odds_ratio": -0.25877392292022705, "logits/chosen": -1.0308094024658203, "logits/rejected": -1.2246909141540527, "logps/chosen": -0.8604073524475098, "logps/rejected": -2.4674718379974365, "loss": 0.9977, "nll_loss": 1.032224178314209, "rewards/accuracies": 1.0, "rewards/chosen": -0.08604073524475098, "rewards/margins": 0.16070644557476044, "rewards/rejected": -0.2467471957206726, "step": 4003 }, { "epoch": 2.4425804483757814, "grad_norm": 2.6221649646759033, "learning_rate": 4.106307409675444e-06, "log_odds_chosen": -0.12559828162193298, "log_odds_ratio": -0.794310450553894, "logits/chosen": -1.0561041831970215, "logits/rejected": -0.9877917766571045, "logps/chosen": -1.0609560012817383, "logps/rejected": -0.9888412952423096, "loss": 1.0777, "nll_loss": 1.2491004467010498, "rewards/accuracies": 0.375, "rewards/chosen": -0.10609559714794159, "rewards/margins": -0.007211461663246155, "rewards/rejected": -0.09888413548469543, "step": 4004 }, { "epoch": 2.4431904834527987, "grad_norm": 1.6220606565475464, "learning_rate": 4.105327617881201e-06, "log_odds_chosen": 1.0894029140472412, "log_odds_ratio": -0.38796138763427734, "logits/chosen": -0.9788026809692383, "logits/rejected": -0.9486875534057617, "logps/chosen": -0.9308581352233887, "logps/rejected": -1.7693440914154053, "loss": 1.1026, "nll_loss": 0.9890075922012329, "rewards/accuracies": 0.75, "rewards/chosen": -0.09308580309152603, "rewards/margins": 0.08384861052036285, "rewards/rejected": -0.17693442106246948, "step": 4005 }, { "epoch": 2.4438005185298155, "grad_norm": 2.2510221004486084, "learning_rate": 4.104347826086957e-06, "log_odds_chosen": 0.10411562025547028, "log_odds_ratio": -0.8102810382843018, "logits/chosen": -1.0161075592041016, "logits/rejected": -0.8905754685401917, "logps/chosen": -1.3602670431137085, "logps/rejected": -1.398389220237732, "loss": 1.1633, "nll_loss": 1.3879711627960205, "rewards/accuracies": 0.625, "rewards/chosen": -0.13602671027183533, "rewards/margins": 0.0038122106343507767, "rewards/rejected": -0.13983893394470215, "step": 4006 }, { "epoch": 2.4444105536068323, "grad_norm": 1.2194105386734009, "learning_rate": 4.103368034292713e-06, "log_odds_chosen": 1.4136393070220947, "log_odds_ratio": -0.36366885900497437, "logits/chosen": -0.9341058731079102, "logits/rejected": -0.9585076570510864, "logps/chosen": -0.9017707109451294, "logps/rejected": -2.0238592624664307, "loss": 0.9605, "nll_loss": 1.0898829698562622, "rewards/accuracies": 0.875, "rewards/chosen": -0.09017706662416458, "rewards/margins": 0.11220887303352356, "rewards/rejected": -0.20238593220710754, "step": 4007 }, { "epoch": 2.445020588683849, "grad_norm": 3.3967268466949463, "learning_rate": 4.102388242498469e-06, "log_odds_chosen": 0.5713521242141724, "log_odds_ratio": -0.7069587111473083, "logits/chosen": -1.070551872253418, "logits/rejected": -0.9901330471038818, "logps/chosen": -1.1810989379882812, "logps/rejected": -1.6925289630889893, "loss": 1.0651, "nll_loss": 1.323093056678772, "rewards/accuracies": 0.625, "rewards/chosen": -0.11810989677906036, "rewards/margins": 0.05114298313856125, "rewards/rejected": -0.16925287246704102, "step": 4008 }, { "epoch": 2.4456306237608665, "grad_norm": 13.03530216217041, "learning_rate": 4.101408450704225e-06, "log_odds_chosen": 1.5889270305633545, "log_odds_ratio": -0.3513872027397156, "logits/chosen": -0.9613492488861084, "logits/rejected": -1.018190622329712, "logps/chosen": -0.8098403811454773, "logps/rejected": -1.9185543060302734, "loss": 1.0109, "nll_loss": 0.9222970008850098, "rewards/accuracies": 1.0, "rewards/chosen": -0.08098404109477997, "rewards/margins": 0.11087140440940857, "rewards/rejected": -0.19185544550418854, "step": 4009 }, { "epoch": 2.4462406588378833, "grad_norm": 2.0526368618011475, "learning_rate": 4.100428658909981e-06, "log_odds_chosen": 2.5308964252471924, "log_odds_ratio": -0.34644997119903564, "logits/chosen": -0.7465721368789673, "logits/rejected": -0.7653748989105225, "logps/chosen": -0.5721865892410278, "logps/rejected": -2.3754069805145264, "loss": 1.004, "nll_loss": 0.9127488136291504, "rewards/accuracies": 0.75, "rewards/chosen": -0.05721866339445114, "rewards/margins": 0.18032206594944, "rewards/rejected": -0.23754072189331055, "step": 4010 }, { "epoch": 2.4468506939149, "grad_norm": 2.602477788925171, "learning_rate": 4.099448867115738e-06, "log_odds_chosen": 2.4472827911376953, "log_odds_ratio": -0.27536582946777344, "logits/chosen": -0.8755006790161133, "logits/rejected": -0.9111487865447998, "logps/chosen": -0.9546583890914917, "logps/rejected": -2.960759401321411, "loss": 1.0425, "nll_loss": 1.084059238433838, "rewards/accuracies": 0.875, "rewards/chosen": -0.09546583890914917, "rewards/margins": 0.20061010122299194, "rewards/rejected": -0.2960759401321411, "step": 4011 }, { "epoch": 2.447460728991917, "grad_norm": 1.367177963256836, "learning_rate": 4.098469075321494e-06, "log_odds_chosen": 1.6197320222854614, "log_odds_ratio": -0.2501586079597473, "logits/chosen": -0.821234941482544, "logits/rejected": -0.8828185796737671, "logps/chosen": -0.6364636421203613, "logps/rejected": -1.5162639617919922, "loss": 1.1, "nll_loss": 0.9917424917221069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0636463612318039, "rewards/margins": 0.08798003196716309, "rewards/rejected": -0.15162639319896698, "step": 4012 }, { "epoch": 2.448070764068934, "grad_norm": 1.4251728057861328, "learning_rate": 4.0974892835272496e-06, "log_odds_chosen": 1.6616922616958618, "log_odds_ratio": -0.42782220244407654, "logits/chosen": -0.8438749313354492, "logits/rejected": -0.887122392654419, "logps/chosen": -0.9450898170471191, "logps/rejected": -2.157555103302002, "loss": 1.0386, "nll_loss": 1.1604385375976562, "rewards/accuracies": 0.75, "rewards/chosen": -0.09450897574424744, "rewards/margins": 0.12124653905630112, "rewards/rejected": -0.21575552225112915, "step": 4013 }, { "epoch": 2.4486807991459507, "grad_norm": 1.808130145072937, "learning_rate": 4.0965094917330065e-06, "log_odds_chosen": 1.318291425704956, "log_odds_ratio": -0.5550962686538696, "logits/chosen": -1.0093481540679932, "logits/rejected": -1.009937047958374, "logps/chosen": -0.6173197627067566, "logps/rejected": -1.7085511684417725, "loss": 1.1041, "nll_loss": 0.9991564750671387, "rewards/accuracies": 0.75, "rewards/chosen": -0.0617319792509079, "rewards/margins": 0.10912314057350159, "rewards/rejected": -0.17085511982440948, "step": 4014 }, { "epoch": 2.449290834222968, "grad_norm": 1.413088083267212, "learning_rate": 4.095529699938763e-06, "log_odds_chosen": 0.5788511633872986, "log_odds_ratio": -0.5004204511642456, "logits/chosen": -0.9091132879257202, "logits/rejected": -0.8118089437484741, "logps/chosen": -0.8368625640869141, "logps/rejected": -1.192859411239624, "loss": 1.0132, "nll_loss": 1.2281107902526855, "rewards/accuracies": 0.75, "rewards/chosen": -0.08368626236915588, "rewards/margins": 0.035599686205387115, "rewards/rejected": -0.1192859411239624, "step": 4015 }, { "epoch": 2.449900869299985, "grad_norm": 3.551441192626953, "learning_rate": 4.094549908144519e-06, "log_odds_chosen": 1.9936681985855103, "log_odds_ratio": -0.46841907501220703, "logits/chosen": -1.0485081672668457, "logits/rejected": -1.0909966230392456, "logps/chosen": -0.8299684524536133, "logps/rejected": -2.3654162883758545, "loss": 1.0109, "nll_loss": 1.0607134103775024, "rewards/accuracies": 0.5, "rewards/chosen": -0.08299686014652252, "rewards/margins": 0.15354476869106293, "rewards/rejected": -0.23654161393642426, "step": 4016 }, { "epoch": 2.4505109043770017, "grad_norm": 4.15151309967041, "learning_rate": 4.093570116350276e-06, "log_odds_chosen": 2.278794050216675, "log_odds_ratio": -0.2881375551223755, "logits/chosen": -0.8652855157852173, "logits/rejected": -1.0286619663238525, "logps/chosen": -0.7683653831481934, "logps/rejected": -2.548848867416382, "loss": 1.0624, "nll_loss": 0.9534700512886047, "rewards/accuracies": 0.875, "rewards/chosen": -0.07683654129505157, "rewards/margins": 0.17804834246635437, "rewards/rejected": -0.25488486886024475, "step": 4017 }, { "epoch": 2.4511209394540185, "grad_norm": 2.277754068374634, "learning_rate": 4.092590324556032e-06, "log_odds_chosen": 2.897136926651001, "log_odds_ratio": -0.37611329555511475, "logits/chosen": -1.1396636962890625, "logits/rejected": -1.0244221687316895, "logps/chosen": -1.0189411640167236, "logps/rejected": -3.590630054473877, "loss": 1.3009, "nll_loss": 1.2650833129882812, "rewards/accuracies": 0.625, "rewards/chosen": -0.10189412534236908, "rewards/margins": 0.25716888904571533, "rewards/rejected": -0.3590630292892456, "step": 4018 }, { "epoch": 2.4517309745310354, "grad_norm": 1.4964464902877808, "learning_rate": 4.091610532761788e-06, "log_odds_chosen": 1.479604721069336, "log_odds_ratio": -0.30550262331962585, "logits/chosen": -0.6212495565414429, "logits/rejected": -0.6067937612533569, "logps/chosen": -0.5380028486251831, "logps/rejected": -1.4887292385101318, "loss": 0.8494, "nll_loss": 0.7407189011573792, "rewards/accuracies": 1.0, "rewards/chosen": -0.05380028486251831, "rewards/margins": 0.09507264196872711, "rewards/rejected": -0.14887292683124542, "step": 4019 }, { "epoch": 2.4523410096080527, "grad_norm": 1.4916707277297974, "learning_rate": 4.090630740967544e-06, "log_odds_chosen": 2.89483904838562, "log_odds_ratio": -0.2767585515975952, "logits/chosen": -0.7793004512786865, "logits/rejected": -0.9295477867126465, "logps/chosen": -0.6614230275154114, "logps/rejected": -2.868286609649658, "loss": 0.9986, "nll_loss": 0.7470055222511292, "rewards/accuracies": 0.875, "rewards/chosen": -0.06614230573177338, "rewards/margins": 0.22068636119365692, "rewards/rejected": -0.2868286669254303, "step": 4020 }, { "epoch": 2.4529510446850695, "grad_norm": 1.4685413837432861, "learning_rate": 4.0896509491733e-06, "log_odds_chosen": 1.0895435810089111, "log_odds_ratio": -0.6304906010627747, "logits/chosen": -0.9462602138519287, "logits/rejected": -0.8817389607429504, "logps/chosen": -1.0549284219741821, "logps/rejected": -1.9464054107666016, "loss": 1.1179, "nll_loss": 1.448959231376648, "rewards/accuracies": 0.5, "rewards/chosen": -0.10549283772706985, "rewards/margins": 0.08914770931005478, "rewards/rejected": -0.19464054703712463, "step": 4021 }, { "epoch": 2.4535610797620864, "grad_norm": 1.691564679145813, "learning_rate": 4.088671157379057e-06, "log_odds_chosen": 0.6532695889472961, "log_odds_ratio": -0.6264371871948242, "logits/chosen": -0.7641379833221436, "logits/rejected": -0.8558540344238281, "logps/chosen": -0.7808912992477417, "logps/rejected": -1.2758175134658813, "loss": 1.0216, "nll_loss": 1.092684268951416, "rewards/accuracies": 0.625, "rewards/chosen": -0.078089140355587, "rewards/margins": 0.04949261248111725, "rewards/rejected": -0.12758174538612366, "step": 4022 }, { "epoch": 2.454171114839103, "grad_norm": 1.8373697996139526, "learning_rate": 4.087691365584813e-06, "log_odds_chosen": 1.5403125286102295, "log_odds_ratio": -0.5249049067497253, "logits/chosen": -0.9699171781539917, "logits/rejected": -0.8825682401657104, "logps/chosen": -0.81756591796875, "logps/rejected": -1.942610740661621, "loss": 1.1064, "nll_loss": 1.0873925685882568, "rewards/accuracies": 0.625, "rewards/chosen": -0.0817565992474556, "rewards/margins": 0.11250448226928711, "rewards/rejected": -0.1942610740661621, "step": 4023 }, { "epoch": 2.45478114991612, "grad_norm": 1.6870523691177368, "learning_rate": 4.0867115737905694e-06, "log_odds_chosen": 1.1808868646621704, "log_odds_ratio": -0.5334165096282959, "logits/chosen": -0.9627009630203247, "logits/rejected": -0.9040465950965881, "logps/chosen": -0.7737298011779785, "logps/rejected": -1.273960828781128, "loss": 1.0528, "nll_loss": 1.223268985748291, "rewards/accuracies": 0.625, "rewards/chosen": -0.07737298309803009, "rewards/margins": 0.05002310872077942, "rewards/rejected": -0.1273960918188095, "step": 4024 }, { "epoch": 2.455391184993137, "grad_norm": 1.9396545886993408, "learning_rate": 4.0857317819963256e-06, "log_odds_chosen": 1.4997541904449463, "log_odds_ratio": -0.43804463744163513, "logits/chosen": -0.8844562768936157, "logits/rejected": -1.0439709424972534, "logps/chosen": -0.9015432596206665, "logps/rejected": -2.1237778663635254, "loss": 1.2292, "nll_loss": 0.9716801643371582, "rewards/accuracies": 0.75, "rewards/chosen": -0.09015433490276337, "rewards/margins": 0.12222344428300858, "rewards/rejected": -0.21237777173519135, "step": 4025 }, { "epoch": 2.456001220070154, "grad_norm": 1.7539026737213135, "learning_rate": 4.084751990202082e-06, "log_odds_chosen": 0.06698693335056305, "log_odds_ratio": -0.7963563203811646, "logits/chosen": -1.203851580619812, "logits/rejected": -1.0974006652832031, "logps/chosen": -1.2570353746414185, "logps/rejected": -1.3423813581466675, "loss": 1.145, "nll_loss": 1.3397212028503418, "rewards/accuracies": 0.375, "rewards/chosen": -0.12570352852344513, "rewards/margins": 0.008534605614840984, "rewards/rejected": -0.13423815369606018, "step": 4026 }, { "epoch": 2.456611255147171, "grad_norm": 1.9686800241470337, "learning_rate": 4.083772198407838e-06, "log_odds_chosen": 1.014597773551941, "log_odds_ratio": -0.6165768504142761, "logits/chosen": -0.9548628330230713, "logits/rejected": -1.0363909006118774, "logps/chosen": -0.8072081208229065, "logps/rejected": -1.5450801849365234, "loss": 0.9405, "nll_loss": 0.9381282329559326, "rewards/accuracies": 0.375, "rewards/chosen": -0.08072081208229065, "rewards/margins": 0.07378719747066498, "rewards/rejected": -0.15450802445411682, "step": 4027 }, { "epoch": 2.457221290224188, "grad_norm": 2.9120137691497803, "learning_rate": 4.082792406613595e-06, "log_odds_chosen": 1.7147550582885742, "log_odds_ratio": -0.33865731954574585, "logits/chosen": -0.7847563028335571, "logits/rejected": -0.8558672666549683, "logps/chosen": -0.554776668548584, "logps/rejected": -1.8337897062301636, "loss": 0.9633, "nll_loss": 0.6950790882110596, "rewards/accuracies": 0.75, "rewards/chosen": -0.05547766387462616, "rewards/margins": 0.12790131568908691, "rewards/rejected": -0.18337899446487427, "step": 4028 }, { "epoch": 2.4578313253012047, "grad_norm": 1.8588953018188477, "learning_rate": 4.081812614819351e-06, "log_odds_chosen": 2.0390403270721436, "log_odds_ratio": -0.41394680738449097, "logits/chosen": -0.9114466905593872, "logits/rejected": -0.9647508859634399, "logps/chosen": -0.7644202709197998, "logps/rejected": -2.365729808807373, "loss": 1.2193, "nll_loss": 0.904374361038208, "rewards/accuracies": 0.625, "rewards/chosen": -0.07644203305244446, "rewards/margins": 0.16013096272945404, "rewards/rejected": -0.2365729957818985, "step": 4029 }, { "epoch": 2.4584413603782216, "grad_norm": 7.570423126220703, "learning_rate": 4.080832823025106e-06, "log_odds_chosen": 1.100374460220337, "log_odds_ratio": -0.5289479494094849, "logits/chosen": -1.0559443235397339, "logits/rejected": -1.0764756202697754, "logps/chosen": -0.8693832159042358, "logps/rejected": -1.7584726810455322, "loss": 1.101, "nll_loss": 1.1347894668579102, "rewards/accuracies": 0.5, "rewards/chosen": -0.08693831413984299, "rewards/margins": 0.08890894800424576, "rewards/rejected": -0.17584727704524994, "step": 4030 }, { "epoch": 2.459051395455239, "grad_norm": 0.8781692981719971, "learning_rate": 4.079853031230863e-06, "log_odds_chosen": 1.6920706033706665, "log_odds_ratio": -0.3475218117237091, "logits/chosen": -0.8749645948410034, "logits/rejected": -0.924328625202179, "logps/chosen": -0.7020057439804077, "logps/rejected": -1.9054136276245117, "loss": 0.8783, "nll_loss": 0.809404194355011, "rewards/accuracies": 0.875, "rewards/chosen": -0.07020057737827301, "rewards/margins": 0.12034077942371368, "rewards/rejected": -0.1905413568019867, "step": 4031 }, { "epoch": 2.4596614305322557, "grad_norm": 1.7945913076400757, "learning_rate": 4.078873239436619e-06, "log_odds_chosen": 1.2169877290725708, "log_odds_ratio": -0.4674476981163025, "logits/chosen": -0.8956844806671143, "logits/rejected": -0.941929280757904, "logps/chosen": -0.753358006477356, "logps/rejected": -1.50649893283844, "loss": 0.9676, "nll_loss": 1.0141271352767944, "rewards/accuracies": 0.75, "rewards/chosen": -0.0753358006477356, "rewards/margins": 0.07531410455703735, "rewards/rejected": -0.15064990520477295, "step": 4032 }, { "epoch": 2.4602714656092726, "grad_norm": 1.6105698347091675, "learning_rate": 4.077893447642376e-06, "log_odds_chosen": 0.651914119720459, "log_odds_ratio": -0.4604053199291229, "logits/chosen": -0.8783145546913147, "logits/rejected": -0.9755892753601074, "logps/chosen": -0.8285588622093201, "logps/rejected": -1.21000075340271, "loss": 1.1542, "nll_loss": 1.0984140634536743, "rewards/accuracies": 0.875, "rewards/chosen": -0.08285589516162872, "rewards/margins": 0.03814418613910675, "rewards/rejected": -0.12100008130073547, "step": 4033 }, { "epoch": 2.4608815006862894, "grad_norm": 1.1622825860977173, "learning_rate": 4.076913655848132e-06, "log_odds_chosen": 1.5683484077453613, "log_odds_ratio": -0.38996604084968567, "logits/chosen": -0.9307002425193787, "logits/rejected": -0.9596481323242188, "logps/chosen": -0.8777084350585938, "logps/rejected": -2.1444568634033203, "loss": 1.1023, "nll_loss": 1.0673460960388184, "rewards/accuracies": 0.75, "rewards/chosen": -0.08777084201574326, "rewards/margins": 0.1266748607158661, "rewards/rejected": -0.21444568037986755, "step": 4034 }, { "epoch": 2.4614915357633063, "grad_norm": 1.884616732597351, "learning_rate": 4.0759338640538885e-06, "log_odds_chosen": 2.1337873935699463, "log_odds_ratio": -0.5805399417877197, "logits/chosen": -1.0396853685379028, "logits/rejected": -1.129543423652649, "logps/chosen": -0.8960810899734497, "logps/rejected": -2.8250017166137695, "loss": 1.1708, "nll_loss": 1.03822922706604, "rewards/accuracies": 0.625, "rewards/chosen": -0.0896081030368805, "rewards/margins": 0.19289207458496094, "rewards/rejected": -0.28250017762184143, "step": 4035 }, { "epoch": 2.462101570840323, "grad_norm": 1.8577287197113037, "learning_rate": 4.074954072259645e-06, "log_odds_chosen": 1.3350636959075928, "log_odds_ratio": -0.40916070342063904, "logits/chosen": -0.6000884175300598, "logits/rejected": -0.6464594602584839, "logps/chosen": -0.7454444766044617, "logps/rejected": -1.6656264066696167, "loss": 0.9873, "nll_loss": 0.881924033164978, "rewards/accuracies": 0.75, "rewards/chosen": -0.07454444468021393, "rewards/margins": 0.09201818704605103, "rewards/rejected": -0.16656263172626495, "step": 4036 }, { "epoch": 2.4627116059173404, "grad_norm": 4.708134174346924, "learning_rate": 4.073974280465401e-06, "log_odds_chosen": 2.5083110332489014, "log_odds_ratio": -0.19998261332511902, "logits/chosen": -0.6978232860565186, "logits/rejected": -0.8096421957015991, "logps/chosen": -0.5740676522254944, "logps/rejected": -1.8934763669967651, "loss": 1.1212, "nll_loss": 0.954816997051239, "rewards/accuracies": 0.875, "rewards/chosen": -0.05740676820278168, "rewards/margins": 0.13194087147712708, "rewards/rejected": -0.18934763967990875, "step": 4037 }, { "epoch": 2.4633216409943572, "grad_norm": 1.7490564584732056, "learning_rate": 4.072994488671157e-06, "log_odds_chosen": 1.7702997922897339, "log_odds_ratio": -0.4565422832965851, "logits/chosen": -0.7934812903404236, "logits/rejected": -0.8288364410400391, "logps/chosen": -0.6509184241294861, "logps/rejected": -1.9712655544281006, "loss": 1.1197, "nll_loss": 0.9061003923416138, "rewards/accuracies": 0.625, "rewards/chosen": -0.06509184837341309, "rewards/margins": 0.13203470408916473, "rewards/rejected": -0.197126567363739, "step": 4038 }, { "epoch": 2.463931676071374, "grad_norm": 1.6579201221466064, "learning_rate": 4.072014696876914e-06, "log_odds_chosen": 0.39352285861968994, "log_odds_ratio": -0.5680838823318481, "logits/chosen": -0.8548651337623596, "logits/rejected": -0.7754751443862915, "logps/chosen": -0.9198557138442993, "logps/rejected": -1.160121202468872, "loss": 0.9198, "nll_loss": 1.048030972480774, "rewards/accuracies": 0.75, "rewards/chosen": -0.09198557585477829, "rewards/margins": 0.024026552215218544, "rewards/rejected": -0.11601212620735168, "step": 4039 }, { "epoch": 2.464541711148391, "grad_norm": 1.9866904020309448, "learning_rate": 4.07103490508267e-06, "log_odds_chosen": 1.3951106071472168, "log_odds_ratio": -0.4181610941886902, "logits/chosen": -0.8466808795928955, "logits/rejected": -0.9060536026954651, "logps/chosen": -0.8574527502059937, "logps/rejected": -1.9843862056732178, "loss": 1.0025, "nll_loss": 1.1341686248779297, "rewards/accuracies": 0.875, "rewards/chosen": -0.08574527502059937, "rewards/margins": 0.11269335448741913, "rewards/rejected": -0.1984386146068573, "step": 4040 }, { "epoch": 2.465151746225408, "grad_norm": 2.1123886108398438, "learning_rate": 4.070055113288426e-06, "log_odds_chosen": 2.132603168487549, "log_odds_ratio": -0.39060384035110474, "logits/chosen": -0.6407643556594849, "logits/rejected": -0.7278345227241516, "logps/chosen": -0.7528643608093262, "logps/rejected": -2.357888698577881, "loss": 1.0913, "nll_loss": 0.9499194025993347, "rewards/accuracies": 0.625, "rewards/chosen": -0.07528643310070038, "rewards/margins": 0.16050246357917786, "rewards/rejected": -0.23578888177871704, "step": 4041 }, { "epoch": 2.465761781302425, "grad_norm": 1.6541348695755005, "learning_rate": 4.069075321494182e-06, "log_odds_chosen": 1.9900572299957275, "log_odds_ratio": -0.3618365526199341, "logits/chosen": -0.9570668935775757, "logits/rejected": -0.8995953798294067, "logps/chosen": -0.9016464948654175, "logps/rejected": -2.4834392070770264, "loss": 1.0031, "nll_loss": 1.0274884700775146, "rewards/accuracies": 0.75, "rewards/chosen": -0.09016464650630951, "rewards/margins": 0.15817926824092865, "rewards/rejected": -0.24834391474723816, "step": 4042 }, { "epoch": 2.466371816379442, "grad_norm": 8.52053451538086, "learning_rate": 4.068095529699938e-06, "log_odds_chosen": 2.0770065784454346, "log_odds_ratio": -0.3232158422470093, "logits/chosen": -0.6665194630622864, "logits/rejected": -1.013284683227539, "logps/chosen": -0.6567776203155518, "logps/rejected": -2.179237127304077, "loss": 0.9836, "nll_loss": 0.8101468086242676, "rewards/accuracies": 0.875, "rewards/chosen": -0.06567776203155518, "rewards/margins": 0.15224596858024597, "rewards/rejected": -0.21792373061180115, "step": 4043 }, { "epoch": 2.4669818514564588, "grad_norm": 1.8400464057922363, "learning_rate": 4.0671157379056944e-06, "log_odds_chosen": 3.048130989074707, "log_odds_ratio": -0.36827531456947327, "logits/chosen": -0.8536072969436646, "logits/rejected": -1.016485333442688, "logps/chosen": -0.6350059509277344, "logps/rejected": -3.1732945442199707, "loss": 1.1023, "nll_loss": 0.9138284921646118, "rewards/accuracies": 0.75, "rewards/chosen": -0.06350059807300568, "rewards/margins": 0.25382882356643677, "rewards/rejected": -0.31732943654060364, "step": 4044 }, { "epoch": 2.4675918865334756, "grad_norm": 1.8396601676940918, "learning_rate": 4.066135946111451e-06, "log_odds_chosen": 2.1494789123535156, "log_odds_ratio": -0.31427305936813354, "logits/chosen": -0.7845752239227295, "logits/rejected": -0.9885300397872925, "logps/chosen": -0.7570037245750427, "logps/rejected": -2.398794174194336, "loss": 1.0475, "nll_loss": 0.9387339949607849, "rewards/accuracies": 1.0, "rewards/chosen": -0.07570037245750427, "rewards/margins": 0.16417905688285828, "rewards/rejected": -0.23987941443920135, "step": 4045 }, { "epoch": 2.4682019216104925, "grad_norm": 1.4086743593215942, "learning_rate": 4.0651561543172075e-06, "log_odds_chosen": 1.8242194652557373, "log_odds_ratio": -0.34864312410354614, "logits/chosen": -0.8388205766677856, "logits/rejected": -0.9844834804534912, "logps/chosen": -0.6220178604125977, "logps/rejected": -2.0261662006378174, "loss": 0.9983, "nll_loss": 0.7098769545555115, "rewards/accuracies": 0.875, "rewards/chosen": -0.06220178306102753, "rewards/margins": 0.14041483402252197, "rewards/rejected": -0.2026166170835495, "step": 4046 }, { "epoch": 2.4688119566875093, "grad_norm": 2.9421298503875732, "learning_rate": 4.064176362522964e-06, "log_odds_chosen": 1.3758081197738647, "log_odds_ratio": -0.5423816442489624, "logits/chosen": -0.7385363578796387, "logits/rejected": -0.8820555210113525, "logps/chosen": -0.7960091829299927, "logps/rejected": -1.9393850564956665, "loss": 1.0851, "nll_loss": 0.9923474192619324, "rewards/accuracies": 0.75, "rewards/chosen": -0.07960091531276703, "rewards/margins": 0.11433760076761246, "rewards/rejected": -0.1939385086297989, "step": 4047 }, { "epoch": 2.4694219917645266, "grad_norm": 5.744729518890381, "learning_rate": 4.06319657072872e-06, "log_odds_chosen": 2.8702468872070312, "log_odds_ratio": -0.35816383361816406, "logits/chosen": -0.9256716370582581, "logits/rejected": -1.0363879203796387, "logps/chosen": -0.8099595904350281, "logps/rejected": -3.337716817855835, "loss": 1.2074, "nll_loss": 0.9427430629730225, "rewards/accuracies": 0.875, "rewards/chosen": -0.08099596202373505, "rewards/margins": 0.25277572870254517, "rewards/rejected": -0.333771675825119, "step": 4048 }, { "epoch": 2.4700320268415434, "grad_norm": 1.7586870193481445, "learning_rate": 4.062216778934476e-06, "log_odds_chosen": 2.6942296028137207, "log_odds_ratio": -0.3366156816482544, "logits/chosen": -0.6850230693817139, "logits/rejected": -1.0035194158554077, "logps/chosen": -0.8148418664932251, "logps/rejected": -2.9133872985839844, "loss": 0.9312, "nll_loss": 0.8504127264022827, "rewards/accuracies": 0.75, "rewards/chosen": -0.08148418366909027, "rewards/margins": 0.20985457301139832, "rewards/rejected": -0.2913387715816498, "step": 4049 }, { "epoch": 2.4706420619185603, "grad_norm": 1.5875835418701172, "learning_rate": 4.061236987140233e-06, "log_odds_chosen": 1.849531650543213, "log_odds_ratio": -0.4204341769218445, "logits/chosen": -0.9371914267539978, "logits/rejected": -1.055673360824585, "logps/chosen": -0.8556787371635437, "logps/rejected": -1.9808404445648193, "loss": 1.0037, "nll_loss": 1.2104252576828003, "rewards/accuracies": 0.75, "rewards/chosen": -0.08556786924600601, "rewards/margins": 0.11251617968082428, "rewards/rejected": -0.1980840414762497, "step": 4050 }, { "epoch": 2.471252096995577, "grad_norm": 1.8141061067581177, "learning_rate": 4.060257195345989e-06, "log_odds_chosen": 0.4264218211174011, "log_odds_ratio": -0.5811172723770142, "logits/chosen": -0.9079562425613403, "logits/rejected": -0.869163990020752, "logps/chosen": -0.8424203991889954, "logps/rejected": -1.0283859968185425, "loss": 1.2504, "nll_loss": 1.1346684694290161, "rewards/accuracies": 0.625, "rewards/chosen": -0.08424204587936401, "rewards/margins": 0.018596554175019264, "rewards/rejected": -0.10283860564231873, "step": 4051 }, { "epoch": 2.4718621320725944, "grad_norm": 2.049067497253418, "learning_rate": 4.059277403551745e-06, "log_odds_chosen": 1.1685152053833008, "log_odds_ratio": -0.650091290473938, "logits/chosen": -0.8513847589492798, "logits/rejected": -0.8937761187553406, "logps/chosen": -0.8445690870285034, "logps/rejected": -1.7783689498901367, "loss": 1.1656, "nll_loss": 1.0595126152038574, "rewards/accuracies": 0.5, "rewards/chosen": -0.0844569131731987, "rewards/margins": 0.09337998926639557, "rewards/rejected": -0.17783689498901367, "step": 4052 }, { "epoch": 2.4724721671496113, "grad_norm": 1.446376085281372, "learning_rate": 4.058297611757501e-06, "log_odds_chosen": 1.057381272315979, "log_odds_ratio": -0.6141161918640137, "logits/chosen": -0.738277792930603, "logits/rejected": -0.9180088043212891, "logps/chosen": -0.9049057960510254, "logps/rejected": -1.6986899375915527, "loss": 1.1152, "nll_loss": 1.0278862714767456, "rewards/accuracies": 0.625, "rewards/chosen": -0.09049057960510254, "rewards/margins": 0.07937842607498169, "rewards/rejected": -0.16986900568008423, "step": 4053 }, { "epoch": 2.473082202226628, "grad_norm": 1.2603100538253784, "learning_rate": 4.057317819963257e-06, "log_odds_chosen": 0.6933577060699463, "log_odds_ratio": -0.5395489931106567, "logits/chosen": -0.6686989068984985, "logits/rejected": -0.8781963586807251, "logps/chosen": -0.810943603515625, "logps/rejected": -1.2424136400222778, "loss": 1.0483, "nll_loss": 1.0391793251037598, "rewards/accuracies": 0.75, "rewards/chosen": -0.08109436184167862, "rewards/margins": 0.043146997690200806, "rewards/rejected": -0.12424135208129883, "step": 4054 }, { "epoch": 2.473692237303645, "grad_norm": 0.9734103679656982, "learning_rate": 4.0563380281690135e-06, "log_odds_chosen": 3.192476272583008, "log_odds_ratio": -0.14403080940246582, "logits/chosen": -0.6123138666152954, "logits/rejected": -0.7468070983886719, "logps/chosen": -0.4306514263153076, "logps/rejected": -2.7292981147766113, "loss": 0.9323, "nll_loss": 0.632351815700531, "rewards/accuracies": 1.0, "rewards/chosen": -0.043065138161182404, "rewards/margins": 0.2298646867275238, "rewards/rejected": -0.2729298174381256, "step": 4055 }, { "epoch": 2.474302272380662, "grad_norm": 1.3009674549102783, "learning_rate": 4.0553582363747704e-06, "log_odds_chosen": 2.4311904907226562, "log_odds_ratio": -0.6051723957061768, "logits/chosen": -0.9515325427055359, "logits/rejected": -1.018188714981079, "logps/chosen": -0.9977748990058899, "logps/rejected": -3.284090757369995, "loss": 1.1512, "nll_loss": 1.0857129096984863, "rewards/accuracies": 0.5, "rewards/chosen": -0.09977748245000839, "rewards/margins": 0.2286316156387329, "rewards/rejected": -0.3284091055393219, "step": 4056 }, { "epoch": 2.4749123074576787, "grad_norm": 1.7571955919265747, "learning_rate": 4.0543784445805266e-06, "log_odds_chosen": 1.0097781419754028, "log_odds_ratio": -0.39314907789230347, "logits/chosen": -0.6114814877510071, "logits/rejected": -0.5627289414405823, "logps/chosen": -0.7223908305168152, "logps/rejected": -1.2813255786895752, "loss": 1.2521, "nll_loss": 1.1799204349517822, "rewards/accuracies": 0.75, "rewards/chosen": -0.07223907858133316, "rewards/margins": 0.055893465876579285, "rewards/rejected": -0.12813255190849304, "step": 4057 }, { "epoch": 2.4755223425346955, "grad_norm": 1.7273145914077759, "learning_rate": 4.0533986527862835e-06, "log_odds_chosen": 2.171827793121338, "log_odds_ratio": -0.2630775570869446, "logits/chosen": -0.7298201322555542, "logits/rejected": -0.917039155960083, "logps/chosen": -0.6334376931190491, "logps/rejected": -2.181410074234009, "loss": 1.0198, "nll_loss": 0.772412896156311, "rewards/accuracies": 1.0, "rewards/chosen": -0.06334377080202103, "rewards/margins": 0.1547972410917282, "rewards/rejected": -0.21814101934432983, "step": 4058 }, { "epoch": 2.476132377611713, "grad_norm": 1.5739574432373047, "learning_rate": 4.052418860992039e-06, "log_odds_chosen": 1.202353596687317, "log_odds_ratio": -0.5496606826782227, "logits/chosen": -0.9411571025848389, "logits/rejected": -0.8987220525741577, "logps/chosen": -0.9619967937469482, "logps/rejected": -1.8975671529769897, "loss": 1.0588, "nll_loss": 1.2190335988998413, "rewards/accuracies": 0.75, "rewards/chosen": -0.09619967639446259, "rewards/margins": 0.09355702996253967, "rewards/rejected": -0.18975672125816345, "step": 4059 }, { "epoch": 2.4767424126887296, "grad_norm": 1.1419973373413086, "learning_rate": 4.051439069197795e-06, "log_odds_chosen": 2.1963207721710205, "log_odds_ratio": -0.48754703998565674, "logits/chosen": -1.0199183225631714, "logits/rejected": -1.0372804403305054, "logps/chosen": -0.869949460029602, "logps/rejected": -2.696629047393799, "loss": 0.9863, "nll_loss": 1.0669498443603516, "rewards/accuracies": 0.75, "rewards/chosen": -0.0869949460029602, "rewards/margins": 0.18266795575618744, "rewards/rejected": -0.26966291666030884, "step": 4060 }, { "epoch": 2.4773524477657465, "grad_norm": 2.657069444656372, "learning_rate": 4.050459277403552e-06, "log_odds_chosen": 2.1051158905029297, "log_odds_ratio": -0.4166651666164398, "logits/chosen": -0.62224280834198, "logits/rejected": -0.8778536915779114, "logps/chosen": -0.7426272034645081, "logps/rejected": -2.3948311805725098, "loss": 0.8957, "nll_loss": 0.7355575561523438, "rewards/accuracies": 0.75, "rewards/chosen": -0.07426272332668304, "rewards/margins": 0.16522040963172913, "rewards/rejected": -0.23948311805725098, "step": 4061 }, { "epoch": 2.4779624828427633, "grad_norm": 3.4892184734344482, "learning_rate": 4.049479485609308e-06, "log_odds_chosen": 0.5272315740585327, "log_odds_ratio": -0.558782696723938, "logits/chosen": -1.0865864753723145, "logits/rejected": -1.0741119384765625, "logps/chosen": -1.1633065938949585, "logps/rejected": -1.6079890727996826, "loss": 1.2528, "nll_loss": 1.3060317039489746, "rewards/accuracies": 0.75, "rewards/chosen": -0.11633065342903137, "rewards/margins": 0.04446825757622719, "rewards/rejected": -0.16079890727996826, "step": 4062 }, { "epoch": 2.4785725179197806, "grad_norm": 2.5926880836486816, "learning_rate": 4.048499693815064e-06, "log_odds_chosen": 0.6435266733169556, "log_odds_ratio": -0.5684666037559509, "logits/chosen": -0.890674352645874, "logits/rejected": -0.9085339903831482, "logps/chosen": -0.7822591662406921, "logps/rejected": -1.2470494508743286, "loss": 1.0067, "nll_loss": 0.8792884349822998, "rewards/accuracies": 0.75, "rewards/chosen": -0.07822591066360474, "rewards/margins": 0.046479031443595886, "rewards/rejected": -0.12470494210720062, "step": 4063 }, { "epoch": 2.4791825529967975, "grad_norm": 1.5515046119689941, "learning_rate": 4.047519902020821e-06, "log_odds_chosen": 2.7620391845703125, "log_odds_ratio": -0.341571569442749, "logits/chosen": -0.7516696453094482, "logits/rejected": -0.8822001814842224, "logps/chosen": -0.5302518606185913, "logps/rejected": -2.630919933319092, "loss": 0.9706, "nll_loss": 0.7158744931221008, "rewards/accuracies": 0.875, "rewards/chosen": -0.05302518606185913, "rewards/margins": 0.2100668102502823, "rewards/rejected": -0.2630919814109802, "step": 4064 }, { "epoch": 2.4797925880738143, "grad_norm": 1.1534090042114258, "learning_rate": 4.046540110226576e-06, "log_odds_chosen": 1.121524453163147, "log_odds_ratio": -0.45477941632270813, "logits/chosen": -0.8160515427589417, "logits/rejected": -0.9806548357009888, "logps/chosen": -0.8350478410720825, "logps/rejected": -1.6848275661468506, "loss": 0.9257, "nll_loss": 0.8610063791275024, "rewards/accuracies": 0.75, "rewards/chosen": -0.08350478112697601, "rewards/margins": 0.08497797697782516, "rewards/rejected": -0.16848275065422058, "step": 4065 }, { "epoch": 2.480402623150831, "grad_norm": 1.1880056858062744, "learning_rate": 4.0455603184323325e-06, "log_odds_chosen": 0.3124449849128723, "log_odds_ratio": -0.6511313915252686, "logits/chosen": -0.6853986382484436, "logits/rejected": -0.7399997711181641, "logps/chosen": -1.0334514379501343, "logps/rejected": -1.2715685367584229, "loss": 1.2339, "nll_loss": 1.2863529920578003, "rewards/accuracies": 0.375, "rewards/chosen": -0.10334514081478119, "rewards/margins": 0.023811718448996544, "rewards/rejected": -0.12715685367584229, "step": 4066 }, { "epoch": 2.481012658227848, "grad_norm": 1.9609904289245605, "learning_rate": 4.0445805266380895e-06, "log_odds_chosen": 2.0009477138519287, "log_odds_ratio": -0.47158971428871155, "logits/chosen": -0.7531700134277344, "logits/rejected": -0.8658089637756348, "logps/chosen": -0.6286449432373047, "logps/rejected": -2.143780469894409, "loss": 0.9585, "nll_loss": 0.8703737854957581, "rewards/accuracies": 0.75, "rewards/chosen": -0.06286448985338211, "rewards/margins": 0.15151356160640717, "rewards/rejected": -0.21437805891036987, "step": 4067 }, { "epoch": 2.481622693304865, "grad_norm": 1.6595964431762695, "learning_rate": 4.043600734843846e-06, "log_odds_chosen": 0.546998143196106, "log_odds_ratio": -0.6131468415260315, "logits/chosen": -0.914935827255249, "logits/rejected": -0.7984838485717773, "logps/chosen": -0.7920410633087158, "logps/rejected": -1.223425269126892, "loss": 1.1121, "nll_loss": 1.1907802820205688, "rewards/accuracies": 0.625, "rewards/chosen": -0.07920410484075546, "rewards/margins": 0.04313841834664345, "rewards/rejected": -0.12234252691268921, "step": 4068 }, { "epoch": 2.482232728381882, "grad_norm": 1.914587140083313, "learning_rate": 4.042620943049602e-06, "log_odds_chosen": 2.4130849838256836, "log_odds_ratio": -0.2564052939414978, "logits/chosen": -0.7473115921020508, "logits/rejected": -0.8906888961791992, "logps/chosen": -0.614694356918335, "logps/rejected": -2.3689703941345215, "loss": 1.0928, "nll_loss": 0.7965954542160034, "rewards/accuracies": 0.875, "rewards/chosen": -0.061469435691833496, "rewards/margins": 0.1754276305437088, "rewards/rejected": -0.2368970513343811, "step": 4069 }, { "epoch": 2.482842763458899, "grad_norm": 2.644312620162964, "learning_rate": 4.041641151255358e-06, "log_odds_chosen": 1.6338093280792236, "log_odds_ratio": -0.395338237285614, "logits/chosen": -0.9455209374427795, "logits/rejected": -0.8735820055007935, "logps/chosen": -0.8741368651390076, "logps/rejected": -2.1837105751037598, "loss": 1.168, "nll_loss": 1.1351882219314575, "rewards/accuracies": 0.75, "rewards/chosen": -0.08741368353366852, "rewards/margins": 0.13095739483833313, "rewards/rejected": -0.21837106347084045, "step": 4070 }, { "epoch": 2.483452798535916, "grad_norm": 1.1405091285705566, "learning_rate": 4.040661359461114e-06, "log_odds_chosen": 1.4538686275482178, "log_odds_ratio": -0.3287408649921417, "logits/chosen": -0.9184456467628479, "logits/rejected": -0.9364739060401917, "logps/chosen": -0.8297708630561829, "logps/rejected": -1.936697244644165, "loss": 1.0697, "nll_loss": 1.0663129091262817, "rewards/accuracies": 1.0, "rewards/chosen": -0.08297708630561829, "rewards/margins": 0.11069264262914658, "rewards/rejected": -0.19366973638534546, "step": 4071 }, { "epoch": 2.4840628336129327, "grad_norm": 4.174835681915283, "learning_rate": 4.039681567666871e-06, "log_odds_chosen": 1.9604750871658325, "log_odds_ratio": -0.4233759641647339, "logits/chosen": -0.9347819089889526, "logits/rejected": -1.08634614944458, "logps/chosen": -0.9025164246559143, "logps/rejected": -2.2873802185058594, "loss": 1.2304, "nll_loss": 1.3207473754882812, "rewards/accuracies": 0.875, "rewards/chosen": -0.09025164693593979, "rewards/margins": 0.1384863704442978, "rewards/rejected": -0.2287379950284958, "step": 4072 }, { "epoch": 2.4846728686899495, "grad_norm": 1.7894620895385742, "learning_rate": 4.038701775872627e-06, "log_odds_chosen": 1.506756067276001, "log_odds_ratio": -0.49398696422576904, "logits/chosen": -0.9421275854110718, "logits/rejected": -0.8924733400344849, "logps/chosen": -0.8823264837265015, "logps/rejected": -1.9880303144454956, "loss": 1.0905, "nll_loss": 1.3018157482147217, "rewards/accuracies": 0.75, "rewards/chosen": -0.08823265135288239, "rewards/margins": 0.11057039350271225, "rewards/rejected": -0.19880303740501404, "step": 4073 }, { "epoch": 2.485282903766967, "grad_norm": 1.8076672554016113, "learning_rate": 4.037721984078383e-06, "log_odds_chosen": 1.942472219467163, "log_odds_ratio": -0.5376855134963989, "logits/chosen": -0.8014484643936157, "logits/rejected": -0.799098551273346, "logps/chosen": -0.6361316442489624, "logps/rejected": -2.1009833812713623, "loss": 0.9868, "nll_loss": 1.0245147943496704, "rewards/accuracies": 0.75, "rewards/chosen": -0.0636131688952446, "rewards/margins": 0.14648517966270447, "rewards/rejected": -0.21009835600852966, "step": 4074 }, { "epoch": 2.4858929388439837, "grad_norm": 0.9254853129386902, "learning_rate": 4.03674219228414e-06, "log_odds_chosen": 0.6727989912033081, "log_odds_ratio": -0.5859856605529785, "logits/chosen": -0.9365510940551758, "logits/rejected": -0.8936387300491333, "logps/chosen": -0.8014878630638123, "logps/rejected": -1.2887539863586426, "loss": 1.213, "nll_loss": 1.1199191808700562, "rewards/accuracies": 0.75, "rewards/chosen": -0.08014879375696182, "rewards/margins": 0.04872661828994751, "rewards/rejected": -0.12887540459632874, "step": 4075 }, { "epoch": 2.4865029739210005, "grad_norm": 4.024227142333984, "learning_rate": 4.0357624004898954e-06, "log_odds_chosen": 1.8025827407836914, "log_odds_ratio": -0.3304700255393982, "logits/chosen": -0.9198892712593079, "logits/rejected": -0.9439499378204346, "logps/chosen": -0.8332804441452026, "logps/rejected": -2.1153030395507812, "loss": 0.9813, "nll_loss": 0.9362738132476807, "rewards/accuracies": 0.875, "rewards/chosen": -0.08332803845405579, "rewards/margins": 0.12820225954055786, "rewards/rejected": -0.21153029799461365, "step": 4076 }, { "epoch": 2.4871130089980173, "grad_norm": 1.5458407402038574, "learning_rate": 4.0347826086956515e-06, "log_odds_chosen": 1.6540014743804932, "log_odds_ratio": -0.5119696259498596, "logits/chosen": -0.9717764258384705, "logits/rejected": -0.9864884614944458, "logps/chosen": -0.8302393555641174, "logps/rejected": -2.1056442260742188, "loss": 1.1729, "nll_loss": 0.9658359289169312, "rewards/accuracies": 0.5, "rewards/chosen": -0.08302393555641174, "rewards/margins": 0.1275404691696167, "rewards/rejected": -0.21056443452835083, "step": 4077 }, { "epoch": 2.487723044075034, "grad_norm": 1.5025829076766968, "learning_rate": 4.0338028169014085e-06, "log_odds_chosen": 0.7733012437820435, "log_odds_ratio": -0.6108724474906921, "logits/chosen": -1.019778847694397, "logits/rejected": -1.0319987535476685, "logps/chosen": -1.0650057792663574, "logps/rejected": -1.67189621925354, "loss": 1.1546, "nll_loss": 1.2175875902175903, "rewards/accuracies": 0.5, "rewards/chosen": -0.10650058090686798, "rewards/margins": 0.0606890469789505, "rewards/rejected": -0.16718962788581848, "step": 4078 }, { "epoch": 2.488333079152051, "grad_norm": 1.2164602279663086, "learning_rate": 4.032823025107165e-06, "log_odds_chosen": 3.032418727874756, "log_odds_ratio": -0.2608933746814728, "logits/chosen": -0.8848727941513062, "logits/rejected": -1.0707513093948364, "logps/chosen": -0.742598295211792, "logps/rejected": -3.219479560852051, "loss": 1.1657, "nll_loss": 1.1764909029006958, "rewards/accuracies": 0.875, "rewards/chosen": -0.07425982505083084, "rewards/margins": 0.24768811464309692, "rewards/rejected": -0.32194799184799194, "step": 4079 }, { "epoch": 2.4889431142290683, "grad_norm": 1.1417735815048218, "learning_rate": 4.031843233312921e-06, "log_odds_chosen": 1.513311505317688, "log_odds_ratio": -0.3811899423599243, "logits/chosen": -0.8118539452552795, "logits/rejected": -0.7441635131835938, "logps/chosen": -0.5853819847106934, "logps/rejected": -1.5901963710784912, "loss": 1.0924, "nll_loss": 0.8317167162895203, "rewards/accuracies": 0.875, "rewards/chosen": -0.05853819102048874, "rewards/margins": 0.10048143565654755, "rewards/rejected": -0.15901963412761688, "step": 4080 }, { "epoch": 2.489553149306085, "grad_norm": 1.564510464668274, "learning_rate": 4.030863441518678e-06, "log_odds_chosen": 1.5849673748016357, "log_odds_ratio": -0.5473985075950623, "logits/chosen": -0.9309645891189575, "logits/rejected": -0.8684384822845459, "logps/chosen": -0.7890084385871887, "logps/rejected": -1.9774259328842163, "loss": 1.0882, "nll_loss": 1.0676130056381226, "rewards/accuracies": 0.75, "rewards/chosen": -0.07890083640813828, "rewards/margins": 0.11884176731109619, "rewards/rejected": -0.19774259626865387, "step": 4081 }, { "epoch": 2.490163184383102, "grad_norm": 1.4215574264526367, "learning_rate": 4.029883649724433e-06, "log_odds_chosen": 3.8152661323547363, "log_odds_ratio": -0.22716690599918365, "logits/chosen": -0.7995450496673584, "logits/rejected": -0.9749985933303833, "logps/chosen": -0.653313398361206, "logps/rejected": -3.7914657592773438, "loss": 0.9338, "nll_loss": 0.8783109784126282, "rewards/accuracies": 0.875, "rewards/chosen": -0.0653313398361206, "rewards/margins": 0.31381526589393616, "rewards/rejected": -0.37914660573005676, "step": 4082 }, { "epoch": 2.490773219460119, "grad_norm": 1.2482017278671265, "learning_rate": 4.028903857930189e-06, "log_odds_chosen": 0.16050943732261658, "log_odds_ratio": -0.9524829983711243, "logits/chosen": -1.1694047451019287, "logits/rejected": -0.9718747138977051, "logps/chosen": -1.1338679790496826, "logps/rejected": -1.2811001539230347, "loss": 1.2344, "nll_loss": 1.3978419303894043, "rewards/accuracies": 0.375, "rewards/chosen": -0.11338678747415543, "rewards/margins": 0.014723224565386772, "rewards/rejected": -0.12811002135276794, "step": 4083 }, { "epoch": 2.4913832545371357, "grad_norm": 1.4173362255096436, "learning_rate": 4.027924066135946e-06, "log_odds_chosen": 2.398996353149414, "log_odds_ratio": -0.4435286521911621, "logits/chosen": -0.8835177421569824, "logits/rejected": -1.013183832168579, "logps/chosen": -0.9293235540390015, "logps/rejected": -2.90480375289917, "loss": 1.1647, "nll_loss": 1.0972588062286377, "rewards/accuracies": 0.625, "rewards/chosen": -0.09293235838413239, "rewards/margins": 0.1975480318069458, "rewards/rejected": -0.290480375289917, "step": 4084 }, { "epoch": 2.491993289614153, "grad_norm": 2.26491379737854, "learning_rate": 4.026944274341702e-06, "log_odds_chosen": 0.7996444702148438, "log_odds_ratio": -0.48673492670059204, "logits/chosen": -0.9967496991157532, "logits/rejected": -0.9896301627159119, "logps/chosen": -0.7204980850219727, "logps/rejected": -1.2039365768432617, "loss": 1.0572, "nll_loss": 1.181893229484558, "rewards/accuracies": 0.75, "rewards/chosen": -0.0720498114824295, "rewards/margins": 0.04834384471178055, "rewards/rejected": -0.12039365619421005, "step": 4085 }, { "epoch": 2.49260332469117, "grad_norm": 12.981096267700195, "learning_rate": 4.025964482547459e-06, "log_odds_chosen": 0.3689829111099243, "log_odds_ratio": -0.7813987731933594, "logits/chosen": -0.9525724649429321, "logits/rejected": -0.9516865611076355, "logps/chosen": -0.9386154413223267, "logps/rejected": -1.2414836883544922, "loss": 1.1215, "nll_loss": 1.3114198446273804, "rewards/accuracies": 0.375, "rewards/chosen": -0.09386155009269714, "rewards/margins": 0.030286822468042374, "rewards/rejected": -0.12414836883544922, "step": 4086 }, { "epoch": 2.4932133597681867, "grad_norm": 5.869980812072754, "learning_rate": 4.0249846907532145e-06, "log_odds_chosen": 3.273062229156494, "log_odds_ratio": -0.2768348753452301, "logits/chosen": -0.8472413420677185, "logits/rejected": -1.0219545364379883, "logps/chosen": -0.7229042053222656, "logps/rejected": -3.3740878105163574, "loss": 1.1219, "nll_loss": 0.8654844760894775, "rewards/accuracies": 0.875, "rewards/chosen": -0.07229042053222656, "rewards/margins": 0.26511839032173157, "rewards/rejected": -0.33740881085395813, "step": 4087 }, { "epoch": 2.4938233948452035, "grad_norm": 0.920221209526062, "learning_rate": 4.024004898958971e-06, "log_odds_chosen": 1.9937078952789307, "log_odds_ratio": -0.34872978925704956, "logits/chosen": -0.8275197744369507, "logits/rejected": -1.04161536693573, "logps/chosen": -0.8235939145088196, "logps/rejected": -2.364927291870117, "loss": 1.1201, "nll_loss": 0.9990072250366211, "rewards/accuracies": 0.875, "rewards/chosen": -0.08235939592123032, "rewards/margins": 0.15413333475589752, "rewards/rejected": -0.23649270832538605, "step": 4088 }, { "epoch": 2.4944334299222204, "grad_norm": 1.2228682041168213, "learning_rate": 4.0230251071647276e-06, "log_odds_chosen": 0.8776513934135437, "log_odds_ratio": -0.5192204713821411, "logits/chosen": -0.9242272973060608, "logits/rejected": -0.9563170075416565, "logps/chosen": -0.7470776438713074, "logps/rejected": -1.3555983304977417, "loss": 1.1372, "nll_loss": 1.0416162014007568, "rewards/accuracies": 0.625, "rewards/chosen": -0.0747077688574791, "rewards/margins": 0.060852065682411194, "rewards/rejected": -0.1355598270893097, "step": 4089 }, { "epoch": 2.4950434649992372, "grad_norm": 1.3654975891113281, "learning_rate": 4.022045315370484e-06, "log_odds_chosen": 1.6573188304901123, "log_odds_ratio": -0.4636983275413513, "logits/chosen": -0.953395426273346, "logits/rejected": -0.8064755797386169, "logps/chosen": -0.7617553472518921, "logps/rejected": -2.10587739944458, "loss": 0.8965, "nll_loss": 0.9059407711029053, "rewards/accuracies": 0.875, "rewards/chosen": -0.07617554068565369, "rewards/margins": 0.13441218435764313, "rewards/rejected": -0.21058772504329681, "step": 4090 }, { "epoch": 2.4956535000762545, "grad_norm": 1.3602275848388672, "learning_rate": 4.02106552357624e-06, "log_odds_chosen": 0.6335718631744385, "log_odds_ratio": -0.5438922643661499, "logits/chosen": -1.0232503414154053, "logits/rejected": -0.9879089593887329, "logps/chosen": -0.9706613421440125, "logps/rejected": -1.3042889833450317, "loss": 1.0726, "nll_loss": 1.2990450859069824, "rewards/accuracies": 0.625, "rewards/chosen": -0.09706614166498184, "rewards/margins": 0.03336275741457939, "rewards/rejected": -0.13042889535427094, "step": 4091 }, { "epoch": 2.4962635351532714, "grad_norm": 3.4944911003112793, "learning_rate": 4.020085731781997e-06, "log_odds_chosen": 1.9086089134216309, "log_odds_ratio": -0.3230477571487427, "logits/chosen": -0.6733097434043884, "logits/rejected": -0.7581685781478882, "logps/chosen": -0.6056452393531799, "logps/rejected": -1.8691035509109497, "loss": 0.8948, "nll_loss": 0.7285788655281067, "rewards/accuracies": 0.75, "rewards/chosen": -0.06056452542543411, "rewards/margins": 0.12634584307670593, "rewards/rejected": -0.18691036105155945, "step": 4092 }, { "epoch": 2.496873570230288, "grad_norm": 1.1783689260482788, "learning_rate": 4.019105939987752e-06, "log_odds_chosen": 1.3225672245025635, "log_odds_ratio": -0.32058948278427124, "logits/chosen": -0.9013558626174927, "logits/rejected": -0.8958519697189331, "logps/chosen": -0.6907659769058228, "logps/rejected": -1.486008644104004, "loss": 0.9677, "nll_loss": 0.8475992679595947, "rewards/accuracies": 1.0, "rewards/chosen": -0.06907659769058228, "rewards/margins": 0.07952426373958588, "rewards/rejected": -0.14860086143016815, "step": 4093 }, { "epoch": 2.497483605307305, "grad_norm": 3.3554587364196777, "learning_rate": 4.018126148193508e-06, "log_odds_chosen": 1.7780439853668213, "log_odds_ratio": -0.38737863302230835, "logits/chosen": -0.9525744318962097, "logits/rejected": -0.9080438613891602, "logps/chosen": -0.8918598890304565, "logps/rejected": -2.2505295276641846, "loss": 1.0864, "nll_loss": 1.1716551780700684, "rewards/accuracies": 0.75, "rewards/chosen": -0.08918599784374237, "rewards/margins": 0.13586698472499847, "rewards/rejected": -0.22505296766757965, "step": 4094 }, { "epoch": 2.498093640384322, "grad_norm": 2.0420632362365723, "learning_rate": 4.017146356399265e-06, "log_odds_chosen": 2.081449031829834, "log_odds_ratio": -0.3895919919013977, "logits/chosen": -0.6541274785995483, "logits/rejected": -0.8633551001548767, "logps/chosen": -0.6010918021202087, "logps/rejected": -2.1782584190368652, "loss": 0.873, "nll_loss": 0.6996574997901917, "rewards/accuracies": 0.75, "rewards/chosen": -0.06010918319225311, "rewards/margins": 0.15771666169166565, "rewards/rejected": -0.21782584488391876, "step": 4095 }, { "epoch": 2.498703675461339, "grad_norm": 1.1908814907073975, "learning_rate": 4.016166564605021e-06, "log_odds_chosen": 1.7945270538330078, "log_odds_ratio": -0.3242560923099518, "logits/chosen": -0.538341760635376, "logits/rejected": -0.7118349075317383, "logps/chosen": -0.8118665218353271, "logps/rejected": -2.153679132461548, "loss": 0.9484, "nll_loss": 0.8243551254272461, "rewards/accuracies": 0.875, "rewards/chosen": -0.08118665218353271, "rewards/margins": 0.13418123126029968, "rewards/rejected": -0.21536791324615479, "step": 4096 }, { "epoch": 2.499313710538356, "grad_norm": 7.200560569763184, "learning_rate": 4.015186772810777e-06, "log_odds_chosen": 2.390232801437378, "log_odds_ratio": -0.5317727327346802, "logits/chosen": -0.9198863506317139, "logits/rejected": -0.9154952168464661, "logps/chosen": -0.7975595593452454, "logps/rejected": -2.8034374713897705, "loss": 1.0883, "nll_loss": 0.9920641183853149, "rewards/accuracies": 0.625, "rewards/chosen": -0.07975596189498901, "rewards/margins": 0.20058779418468475, "rewards/rejected": -0.2803437411785126, "step": 4097 }, { "epoch": 2.499923745615373, "grad_norm": 3.384366035461426, "learning_rate": 4.014206981016534e-06, "log_odds_chosen": 2.7903456687927246, "log_odds_ratio": -0.29153603315353394, "logits/chosen": -0.6262416243553162, "logits/rejected": -0.736205518245697, "logps/chosen": -0.5965214371681213, "logps/rejected": -2.6774322986602783, "loss": 0.9517, "nll_loss": 0.7972829341888428, "rewards/accuracies": 0.75, "rewards/chosen": -0.059652142226696014, "rewards/margins": 0.20809108018875122, "rewards/rejected": -0.2677432596683502, "step": 4098 }, { "epoch": 2.5005337806923897, "grad_norm": 1.8244209289550781, "learning_rate": 4.01322718922229e-06, "log_odds_chosen": 1.1185922622680664, "log_odds_ratio": -0.48879367113113403, "logits/chosen": -0.8558545112609863, "logits/rejected": -0.8222580552101135, "logps/chosen": -0.9096997976303101, "logps/rejected": -1.867173671722412, "loss": 1.1011, "nll_loss": 1.0941988229751587, "rewards/accuracies": 0.75, "rewards/chosen": -0.090969979763031, "rewards/margins": 0.09574739634990692, "rewards/rejected": -0.18671739101409912, "step": 4099 }, { "epoch": 2.5011438157694066, "grad_norm": 1.7944093942642212, "learning_rate": 4.012247397428047e-06, "log_odds_chosen": 1.503610610961914, "log_odds_ratio": -0.41905832290649414, "logits/chosen": -0.9643744826316833, "logits/rejected": -1.0418366193771362, "logps/chosen": -1.0111137628555298, "logps/rejected": -2.0745816230773926, "loss": 1.1068, "nll_loss": 1.3064215183258057, "rewards/accuracies": 0.75, "rewards/chosen": -0.10111138224601746, "rewards/margins": 0.10634677112102509, "rewards/rejected": -0.20745813846588135, "step": 4100 }, { "epoch": 2.5017538508464234, "grad_norm": 1.255360722541809, "learning_rate": 4.011267605633803e-06, "log_odds_chosen": 1.7968655824661255, "log_odds_ratio": -0.537670910358429, "logits/chosen": -0.6011017560958862, "logits/rejected": -0.9160091876983643, "logps/chosen": -0.9215040802955627, "logps/rejected": -2.0873775482177734, "loss": 1.0506, "nll_loss": 1.0513648986816406, "rewards/accuracies": 0.625, "rewards/chosen": -0.09215040504932404, "rewards/margins": 0.11658735573291779, "rewards/rejected": -0.20873776078224182, "step": 4101 }, { "epoch": 2.5023638859234407, "grad_norm": 2.208232879638672, "learning_rate": 4.010287813839559e-06, "log_odds_chosen": 1.9346991777420044, "log_odds_ratio": -0.3104979693889618, "logits/chosen": -0.8889306783676147, "logits/rejected": -0.9316507577896118, "logps/chosen": -0.8108016848564148, "logps/rejected": -2.316986083984375, "loss": 0.9476, "nll_loss": 0.8996526002883911, "rewards/accuracies": 0.875, "rewards/chosen": -0.08108016848564148, "rewards/margins": 0.15061843395233154, "rewards/rejected": -0.2316986322402954, "step": 4102 }, { "epoch": 2.5029739210004576, "grad_norm": 2.5254604816436768, "learning_rate": 4.009308022045316e-06, "log_odds_chosen": 2.1713614463806152, "log_odds_ratio": -0.21773415803909302, "logits/chosen": -0.7219863533973694, "logits/rejected": -0.851367175579071, "logps/chosen": -0.6375049352645874, "logps/rejected": -2.2036452293395996, "loss": 1.0545, "nll_loss": 0.961268424987793, "rewards/accuracies": 0.875, "rewards/chosen": -0.0637504979968071, "rewards/margins": 0.1566140204668045, "rewards/rejected": -0.220364511013031, "step": 4103 }, { "epoch": 2.5035839560774744, "grad_norm": 21.21269416809082, "learning_rate": 4.008328230251071e-06, "log_odds_chosen": 3.731675624847412, "log_odds_ratio": -0.2990073561668396, "logits/chosen": -1.0735383033752441, "logits/rejected": -1.0998303890228271, "logps/chosen": -0.7532882690429688, "logps/rejected": -3.6916470527648926, "loss": 1.1203, "nll_loss": 0.9975498914718628, "rewards/accuracies": 0.875, "rewards/chosen": -0.07532882690429688, "rewards/margins": 0.29383593797683716, "rewards/rejected": -0.36916473507881165, "step": 4104 }, { "epoch": 2.5041939911544913, "grad_norm": 1.272322177886963, "learning_rate": 4.007348438456827e-06, "log_odds_chosen": 2.735808849334717, "log_odds_ratio": -0.19981494545936584, "logits/chosen": -0.749907910823822, "logits/rejected": -0.8111873865127563, "logps/chosen": -0.5035747289657593, "logps/rejected": -2.5550503730773926, "loss": 0.8968, "nll_loss": 0.6289169788360596, "rewards/accuracies": 0.875, "rewards/chosen": -0.05035747215151787, "rewards/margins": 0.20514756441116333, "rewards/rejected": -0.2555050253868103, "step": 4105 }, { "epoch": 2.5048040262315086, "grad_norm": 4.557440280914307, "learning_rate": 4.006368646662584e-06, "log_odds_chosen": 2.332275867462158, "log_odds_ratio": -0.38121268153190613, "logits/chosen": -0.6918360590934753, "logits/rejected": -1.0655219554901123, "logps/chosen": -0.8801933526992798, "logps/rejected": -2.99807071685791, "loss": 1.1798, "nll_loss": 1.058303952217102, "rewards/accuracies": 0.875, "rewards/chosen": -0.08801934123039246, "rewards/margins": 0.21178773045539856, "rewards/rejected": -0.299807071685791, "step": 4106 }, { "epoch": 2.5054140613085254, "grad_norm": 1.6349303722381592, "learning_rate": 4.00538885486834e-06, "log_odds_chosen": 1.1041638851165771, "log_odds_ratio": -0.315248966217041, "logits/chosen": -1.0183340311050415, "logits/rejected": -0.9017088413238525, "logps/chosen": -0.981339693069458, "logps/rejected": -1.7341252565383911, "loss": 1.2054, "nll_loss": 1.319954752922058, "rewards/accuracies": 1.0, "rewards/chosen": -0.09813397377729416, "rewards/margins": 0.07527855038642883, "rewards/rejected": -0.1734125316143036, "step": 4107 }, { "epoch": 2.5060240963855422, "grad_norm": 2.707289457321167, "learning_rate": 4.004409063074096e-06, "log_odds_chosen": 2.940321922302246, "log_odds_ratio": -0.39155155420303345, "logits/chosen": -0.8588260412216187, "logits/rejected": -1.0279736518859863, "logps/chosen": -0.7579211592674255, "logps/rejected": -3.2484281063079834, "loss": 1.1857, "nll_loss": 1.0976581573486328, "rewards/accuracies": 0.625, "rewards/chosen": -0.07579211890697479, "rewards/margins": 0.24905070662498474, "rewards/rejected": -0.32484281063079834, "step": 4108 }, { "epoch": 2.506634131462559, "grad_norm": 3.323255777359009, "learning_rate": 4.003429271279853e-06, "log_odds_chosen": 0.9889498949050903, "log_odds_ratio": -0.5406979322433472, "logits/chosen": -1.0152570009231567, "logits/rejected": -0.8634828329086304, "logps/chosen": -0.7926012277603149, "logps/rejected": -1.4674341678619385, "loss": 1.0074, "nll_loss": 0.8856024742126465, "rewards/accuracies": 0.625, "rewards/chosen": -0.07926011085510254, "rewards/margins": 0.06748330593109131, "rewards/rejected": -0.14674341678619385, "step": 4109 }, { "epoch": 2.507244166539576, "grad_norm": 1.6366723775863647, "learning_rate": 4.002449479485609e-06, "log_odds_chosen": 2.844527244567871, "log_odds_ratio": -0.2721644639968872, "logits/chosen": -0.754359245300293, "logits/rejected": -1.067887783050537, "logps/chosen": -0.5365798473358154, "logps/rejected": -2.6913278102874756, "loss": 1.0513, "nll_loss": 1.0508848428726196, "rewards/accuracies": 0.875, "rewards/chosen": -0.053657982498407364, "rewards/margins": 0.21547478437423706, "rewards/rejected": -0.2691327929496765, "step": 4110 }, { "epoch": 2.507854201616593, "grad_norm": 1.9568382501602173, "learning_rate": 4.001469687691365e-06, "log_odds_chosen": 2.178316593170166, "log_odds_ratio": -0.21227632462978363, "logits/chosen": -0.5687956809997559, "logits/rejected": -0.7737438082695007, "logps/chosen": -0.7094936966896057, "logps/rejected": -2.2608652114868164, "loss": 1.058, "nll_loss": 0.8013638257980347, "rewards/accuracies": 1.0, "rewards/chosen": -0.07094937562942505, "rewards/margins": 0.15513715147972107, "rewards/rejected": -0.22608651220798492, "step": 4111 }, { "epoch": 2.5084642366936096, "grad_norm": 2.565941095352173, "learning_rate": 4.000489895897122e-06, "log_odds_chosen": 1.8198715448379517, "log_odds_ratio": -0.5141490697860718, "logits/chosen": -0.7829645872116089, "logits/rejected": -0.9217333197593689, "logps/chosen": -0.7614209651947021, "logps/rejected": -2.2617075443267822, "loss": 1.0977, "nll_loss": 0.9980984926223755, "rewards/accuracies": 0.5, "rewards/chosen": -0.07614210247993469, "rewards/margins": 0.15002867579460144, "rewards/rejected": -0.22617076337337494, "step": 4112 }, { "epoch": 2.509074271770627, "grad_norm": 1.4765092134475708, "learning_rate": 3.999510104102878e-06, "log_odds_chosen": 1.2886927127838135, "log_odds_ratio": -0.4927735924720764, "logits/chosen": -0.6758869886398315, "logits/rejected": -0.7731425762176514, "logps/chosen": -0.6922303438186646, "logps/rejected": -1.586815357208252, "loss": 0.9605, "nll_loss": 0.7300761342048645, "rewards/accuracies": 0.5, "rewards/chosen": -0.06922303140163422, "rewards/margins": 0.08945849537849426, "rewards/rejected": -0.15868151187896729, "step": 4113 }, { "epoch": 2.5096843068476438, "grad_norm": 1.3805876970291138, "learning_rate": 3.998530312308634e-06, "log_odds_chosen": 0.6816613674163818, "log_odds_ratio": -0.6206083297729492, "logits/chosen": -0.935661792755127, "logits/rejected": -0.933840274810791, "logps/chosen": -0.8466029167175293, "logps/rejected": -1.36869215965271, "loss": 1.0371, "nll_loss": 0.8808960914611816, "rewards/accuracies": 0.5, "rewards/chosen": -0.08466028422117233, "rewards/margins": 0.052208926528692245, "rewards/rejected": -0.13686920702457428, "step": 4114 }, { "epoch": 2.5102943419246606, "grad_norm": 3.402757406234741, "learning_rate": 3.99755052051439e-06, "log_odds_chosen": 3.093557357788086, "log_odds_ratio": -0.31311726570129395, "logits/chosen": -0.9543958902359009, "logits/rejected": -1.1223928928375244, "logps/chosen": -0.7941919565200806, "logps/rejected": -3.3074686527252197, "loss": 1.0737, "nll_loss": 1.2243220806121826, "rewards/accuracies": 0.75, "rewards/chosen": -0.07941919565200806, "rewards/margins": 0.2513276934623718, "rewards/rejected": -0.3307468593120575, "step": 4115 }, { "epoch": 2.5109043770016775, "grad_norm": 6.114466190338135, "learning_rate": 3.996570728720147e-06, "log_odds_chosen": 0.7074544429779053, "log_odds_ratio": -0.6403741240501404, "logits/chosen": -0.86482173204422, "logits/rejected": -0.9204225540161133, "logps/chosen": -1.1550540924072266, "logps/rejected": -1.7515579462051392, "loss": 1.1563, "nll_loss": 1.2874749898910522, "rewards/accuracies": 0.5, "rewards/chosen": -0.1155054122209549, "rewards/margins": 0.05965039134025574, "rewards/rejected": -0.17515578866004944, "step": 4116 }, { "epoch": 2.5115144120786947, "grad_norm": 3.3822219371795654, "learning_rate": 3.995590936925903e-06, "log_odds_chosen": 1.556119441986084, "log_odds_ratio": -0.4254322648048401, "logits/chosen": -0.9282780289649963, "logits/rejected": -0.9535027146339417, "logps/chosen": -0.936444103717804, "logps/rejected": -2.1928188800811768, "loss": 0.956, "nll_loss": 0.9824773669242859, "rewards/accuracies": 0.75, "rewards/chosen": -0.0936444103717804, "rewards/margins": 0.1256374716758728, "rewards/rejected": -0.2192818820476532, "step": 4117 }, { "epoch": 2.5121244471557116, "grad_norm": 1.217574119567871, "learning_rate": 3.994611145131659e-06, "log_odds_chosen": 0.6235042810440063, "log_odds_ratio": -0.5129632353782654, "logits/chosen": -0.8078041076660156, "logits/rejected": -0.8225162625312805, "logps/chosen": -0.9810131788253784, "logps/rejected": -1.3964614868164062, "loss": 0.9925, "nll_loss": 1.0901176929473877, "rewards/accuracies": 0.625, "rewards/chosen": -0.09810133278369904, "rewards/margins": 0.041544828563928604, "rewards/rejected": -0.13964615762233734, "step": 4118 }, { "epoch": 2.5127344822327284, "grad_norm": 1.578619360923767, "learning_rate": 3.9936313533374155e-06, "log_odds_chosen": 1.339190125465393, "log_odds_ratio": -0.44684433937072754, "logits/chosen": -0.8048251867294312, "logits/rejected": -0.96775221824646, "logps/chosen": -0.7984940409660339, "logps/rejected": -1.7065703868865967, "loss": 0.9681, "nll_loss": 0.9725348353385925, "rewards/accuracies": 0.75, "rewards/chosen": -0.07984940707683563, "rewards/margins": 0.09080763906240463, "rewards/rejected": -0.17065703868865967, "step": 4119 }, { "epoch": 2.5133445173097453, "grad_norm": 2.156785488128662, "learning_rate": 3.992651561543172e-06, "log_odds_chosen": 1.4104634523391724, "log_odds_ratio": -0.3920776844024658, "logits/chosen": -0.919641375541687, "logits/rejected": -0.7786227464675903, "logps/chosen": -0.7159587144851685, "logps/rejected": -1.6573091745376587, "loss": 0.9969, "nll_loss": 0.965118408203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.07159587740898132, "rewards/margins": 0.09413505345582962, "rewards/rejected": -0.16573093831539154, "step": 4120 }, { "epoch": 2.513954552386762, "grad_norm": 1.0743855237960815, "learning_rate": 3.9916717697489286e-06, "log_odds_chosen": 0.9948606491088867, "log_odds_ratio": -0.4551941752433777, "logits/chosen": -0.948927104473114, "logits/rejected": -0.9492130875587463, "logps/chosen": -0.7311415076255798, "logps/rejected": -1.4178588390350342, "loss": 1.0565, "nll_loss": 0.8520095348358154, "rewards/accuracies": 0.625, "rewards/chosen": -0.07311415672302246, "rewards/margins": 0.06867173314094543, "rewards/rejected": -0.1417858749628067, "step": 4121 }, { "epoch": 2.514564587463779, "grad_norm": 1.4351052045822144, "learning_rate": 3.990691977954685e-06, "log_odds_chosen": 1.212223768234253, "log_odds_ratio": -0.5468041896820068, "logits/chosen": -0.8611093163490295, "logits/rejected": -0.8701375126838684, "logps/chosen": -0.8742102384567261, "logps/rejected": -1.7148014307022095, "loss": 1.0624, "nll_loss": 1.0061535835266113, "rewards/accuracies": 0.75, "rewards/chosen": -0.08742102980613708, "rewards/margins": 0.08405911177396774, "rewards/rejected": -0.17148013412952423, "step": 4122 }, { "epoch": 2.515174622540796, "grad_norm": 1.9563367366790771, "learning_rate": 3.989712186160441e-06, "log_odds_chosen": 1.8817811012268066, "log_odds_ratio": -0.4112701416015625, "logits/chosen": -0.8826255202293396, "logits/rejected": -1.1413718461990356, "logps/chosen": -0.679650068283081, "logps/rejected": -2.084932565689087, "loss": 1.2088, "nll_loss": 1.1563414335250854, "rewards/accuracies": 0.75, "rewards/chosen": -0.06796500086784363, "rewards/margins": 0.14052826166152954, "rewards/rejected": -0.20849326252937317, "step": 4123 }, { "epoch": 2.515784657617813, "grad_norm": 1.2828693389892578, "learning_rate": 3.988732394366197e-06, "log_odds_chosen": 2.3396334648132324, "log_odds_ratio": -0.3542991876602173, "logits/chosen": -0.602498471736908, "logits/rejected": -0.9581469893455505, "logps/chosen": -0.5520969033241272, "logps/rejected": -2.298063039779663, "loss": 0.9394, "nll_loss": 0.802059531211853, "rewards/accuracies": 0.875, "rewards/chosen": -0.0552096962928772, "rewards/margins": 0.17459659278392792, "rewards/rejected": -0.22980627417564392, "step": 4124 }, { "epoch": 2.51639469269483, "grad_norm": 1.1384990215301514, "learning_rate": 3.987752602571953e-06, "log_odds_chosen": 1.781823992729187, "log_odds_ratio": -0.347540020942688, "logits/chosen": -0.7572017908096313, "logits/rejected": -0.809616208076477, "logps/chosen": -0.5399618744850159, "logps/rejected": -1.8653571605682373, "loss": 0.8444, "nll_loss": 0.6751418709754944, "rewards/accuracies": 0.875, "rewards/chosen": -0.053996190428733826, "rewards/margins": 0.1325395107269287, "rewards/rejected": -0.18653571605682373, "step": 4125 }, { "epoch": 2.517004727771847, "grad_norm": 7.512204647064209, "learning_rate": 3.986772810777709e-06, "log_odds_chosen": 2.488003730773926, "log_odds_ratio": -0.12483396381139755, "logits/chosen": -0.9489365816116333, "logits/rejected": -1.0693106651306152, "logps/chosen": -0.7263028025627136, "logps/rejected": -2.6130311489105225, "loss": 0.9032, "nll_loss": 0.9879274964332581, "rewards/accuracies": 1.0, "rewards/chosen": -0.07263027876615524, "rewards/margins": 0.18867284059524536, "rewards/rejected": -0.2613031268119812, "step": 4126 }, { "epoch": 2.5176147628488637, "grad_norm": 3.4171693325042725, "learning_rate": 3.985793018983466e-06, "log_odds_chosen": 2.2545480728149414, "log_odds_ratio": -0.4993036985397339, "logits/chosen": -0.8553626537322998, "logits/rejected": -1.0722349882125854, "logps/chosen": -0.7295087575912476, "logps/rejected": -2.611076593399048, "loss": 1.1477, "nll_loss": 0.9160129427909851, "rewards/accuracies": 0.625, "rewards/chosen": -0.07295088469982147, "rewards/margins": 0.18815676867961884, "rewards/rejected": -0.2611076235771179, "step": 4127 }, { "epoch": 2.518224797925881, "grad_norm": 1.9523769617080688, "learning_rate": 3.984813227189222e-06, "log_odds_chosen": 2.395085096359253, "log_odds_ratio": -0.41289812326431274, "logits/chosen": -1.068604826927185, "logits/rejected": -1.1045161485671997, "logps/chosen": -0.7615983486175537, "logps/rejected": -2.5903823375701904, "loss": 1.1888, "nll_loss": 1.2538537979125977, "rewards/accuracies": 0.75, "rewards/chosen": -0.07615983486175537, "rewards/margins": 0.18287841975688934, "rewards/rejected": -0.2590382695198059, "step": 4128 }, { "epoch": 2.518834833002898, "grad_norm": 1.565352439880371, "learning_rate": 3.983833435394978e-06, "log_odds_chosen": 1.5347075462341309, "log_odds_ratio": -0.5873543620109558, "logits/chosen": -0.883831799030304, "logits/rejected": -0.9063749313354492, "logps/chosen": -0.9217765927314758, "logps/rejected": -2.1385183334350586, "loss": 1.0959, "nll_loss": 1.0287750959396362, "rewards/accuracies": 0.5, "rewards/chosen": -0.09217765182256699, "rewards/margins": 0.12167415767908096, "rewards/rejected": -0.21385180950164795, "step": 4129 }, { "epoch": 2.5194448680799146, "grad_norm": 4.94135856628418, "learning_rate": 3.9828536436007345e-06, "log_odds_chosen": 2.4977004528045654, "log_odds_ratio": -0.2837618589401245, "logits/chosen": -0.7529959678649902, "logits/rejected": -0.897914707660675, "logps/chosen": -0.7105385661125183, "logps/rejected": -2.6591625213623047, "loss": 0.9051, "nll_loss": 0.9795678853988647, "rewards/accuracies": 0.875, "rewards/chosen": -0.07105385512113571, "rewards/margins": 0.19486242532730103, "rewards/rejected": -0.26591625809669495, "step": 4130 }, { "epoch": 2.5200549031569315, "grad_norm": 3.4681296348571777, "learning_rate": 3.9818738518064915e-06, "log_odds_chosen": 0.8024028539657593, "log_odds_ratio": -0.5153229236602783, "logits/chosen": -0.9256219863891602, "logits/rejected": -0.948013424873352, "logps/chosen": -0.8763175010681152, "logps/rejected": -1.4198799133300781, "loss": 1.0855, "nll_loss": 0.9863279461860657, "rewards/accuracies": 0.625, "rewards/chosen": -0.08763174712657928, "rewards/margins": 0.05435624346137047, "rewards/rejected": -0.14198799431324005, "step": 4131 }, { "epoch": 2.5206649382339483, "grad_norm": 1.4997576475143433, "learning_rate": 3.980894060012247e-06, "log_odds_chosen": 1.5071512460708618, "log_odds_ratio": -0.4309529960155487, "logits/chosen": -0.7718334197998047, "logits/rejected": -0.7761368751525879, "logps/chosen": -0.6574702262878418, "logps/rejected": -1.7607500553131104, "loss": 0.9466, "nll_loss": 0.8261359333992004, "rewards/accuracies": 0.75, "rewards/chosen": -0.06574702262878418, "rewards/margins": 0.11032798886299133, "rewards/rejected": -0.1760750114917755, "step": 4132 }, { "epoch": 2.521274973310965, "grad_norm": 1.1578329801559448, "learning_rate": 3.979914268218004e-06, "log_odds_chosen": 0.41544294357299805, "log_odds_ratio": -0.6294691562652588, "logits/chosen": -1.0741971731185913, "logits/rejected": -0.9523533582687378, "logps/chosen": -1.0846775770187378, "logps/rejected": -1.4454447031021118, "loss": 1.1284, "nll_loss": 1.2741978168487549, "rewards/accuracies": 0.5, "rewards/chosen": -0.10846775025129318, "rewards/margins": 0.03607672080397606, "rewards/rejected": -0.14454448223114014, "step": 4133 }, { "epoch": 2.5218850083879825, "grad_norm": 1.5281131267547607, "learning_rate": 3.97893447642376e-06, "log_odds_chosen": 0.8865963816642761, "log_odds_ratio": -0.49079886078834534, "logits/chosen": -0.7418683171272278, "logits/rejected": -0.8872207999229431, "logps/chosen": -0.7205767631530762, "logps/rejected": -1.3228144645690918, "loss": 1.0057, "nll_loss": 0.8644158840179443, "rewards/accuracies": 0.75, "rewards/chosen": -0.07205767929553986, "rewards/margins": 0.0602237693965435, "rewards/rejected": -0.13228145241737366, "step": 4134 }, { "epoch": 2.5224950434649993, "grad_norm": 1.9563648700714111, "learning_rate": 3.977954684629516e-06, "log_odds_chosen": 1.9223146438598633, "log_odds_ratio": -0.5116190910339355, "logits/chosen": -0.937934935092926, "logits/rejected": -1.075236439704895, "logps/chosen": -0.7637705206871033, "logps/rejected": -2.354210376739502, "loss": 1.1367, "nll_loss": 0.9322328567504883, "rewards/accuracies": 0.75, "rewards/chosen": -0.07637704908847809, "rewards/margins": 0.15904399752616882, "rewards/rejected": -0.2354210615158081, "step": 4135 }, { "epoch": 2.523105078542016, "grad_norm": 3.9456770420074463, "learning_rate": 3.976974892835272e-06, "log_odds_chosen": 2.756007671356201, "log_odds_ratio": -0.29483091831207275, "logits/chosen": -0.9253081679344177, "logits/rejected": -1.0112836360931396, "logps/chosen": -0.784767210483551, "logps/rejected": -3.0473990440368652, "loss": 1.0175, "nll_loss": 0.856964647769928, "rewards/accuracies": 0.75, "rewards/chosen": -0.07847671955823898, "rewards/margins": 0.226263165473938, "rewards/rejected": -0.30473989248275757, "step": 4136 }, { "epoch": 2.523715113619033, "grad_norm": 1.332659125328064, "learning_rate": 3.975995101041028e-06, "log_odds_chosen": 2.9171600341796875, "log_odds_ratio": -0.2673128545284271, "logits/chosen": -0.8513657450675964, "logits/rejected": -0.9198672771453857, "logps/chosen": -0.4951804280281067, "logps/rejected": -2.6740810871124268, "loss": 1.0009, "nll_loss": 0.8154115080833435, "rewards/accuracies": 0.75, "rewards/chosen": -0.04951804131269455, "rewards/margins": 0.21789006888866425, "rewards/rejected": -0.2674081027507782, "step": 4137 }, { "epoch": 2.5243251486960503, "grad_norm": 5.169435501098633, "learning_rate": 3.975015309246785e-06, "log_odds_chosen": 1.829771876335144, "log_odds_ratio": -0.6865577101707458, "logits/chosen": -0.9281217455863953, "logits/rejected": -0.9347831010818481, "logps/chosen": -1.0689165592193604, "logps/rejected": -2.5847079753875732, "loss": 1.1137, "nll_loss": 1.01523756980896, "rewards/accuracies": 0.75, "rewards/chosen": -0.10689166188240051, "rewards/margins": 0.15157915651798248, "rewards/rejected": -0.2584708034992218, "step": 4138 }, { "epoch": 2.524935183773067, "grad_norm": 5.229574680328369, "learning_rate": 3.974035517452541e-06, "log_odds_chosen": 2.6205313205718994, "log_odds_ratio": -0.4543832838535309, "logits/chosen": -0.8623225092887878, "logits/rejected": -1.1507993936538696, "logps/chosen": -0.7568978667259216, "logps/rejected": -2.8403494358062744, "loss": 1.01, "nll_loss": 1.1336435079574585, "rewards/accuracies": 0.625, "rewards/chosen": -0.07568979263305664, "rewards/margins": 0.20834515988826752, "rewards/rejected": -0.28403496742248535, "step": 4139 }, { "epoch": 2.525545218850084, "grad_norm": 2.719438076019287, "learning_rate": 3.973055725658297e-06, "log_odds_chosen": 0.5890160799026489, "log_odds_ratio": -0.6651669144630432, "logits/chosen": -0.9530224204063416, "logits/rejected": -1.084890365600586, "logps/chosen": -0.9051130414009094, "logps/rejected": -1.4526996612548828, "loss": 0.948, "nll_loss": 1.192832112312317, "rewards/accuracies": 0.375, "rewards/chosen": -0.09051130712032318, "rewards/margins": 0.05475866049528122, "rewards/rejected": -0.1452699601650238, "step": 4140 }, { "epoch": 2.526155253927101, "grad_norm": 1.634015679359436, "learning_rate": 3.9720759338640535e-06, "log_odds_chosen": 2.4276161193847656, "log_odds_ratio": -0.4255770444869995, "logits/chosen": -0.8208301067352295, "logits/rejected": -0.9783159494400024, "logps/chosen": -0.7566803097724915, "logps/rejected": -2.641514778137207, "loss": 1.0299, "nll_loss": 0.985118567943573, "rewards/accuracies": 0.875, "rewards/chosen": -0.07566803693771362, "rewards/margins": 0.18848346173763275, "rewards/rejected": -0.2641514837741852, "step": 4141 }, { "epoch": 2.5267652890041177, "grad_norm": 1.9219554662704468, "learning_rate": 3.9710961420698105e-06, "log_odds_chosen": 2.906083106994629, "log_odds_ratio": -0.18959711492061615, "logits/chosen": -0.7077503204345703, "logits/rejected": -0.8976181745529175, "logps/chosen": -0.6891345381736755, "logps/rejected": -2.9525206089019775, "loss": 1.0226, "nll_loss": 0.934844970703125, "rewards/accuracies": 1.0, "rewards/chosen": -0.06891345232725143, "rewards/margins": 0.22633861005306244, "rewards/rejected": -0.2952520549297333, "step": 4142 }, { "epoch": 2.5273753240811345, "grad_norm": 2.1736061573028564, "learning_rate": 3.970116350275566e-06, "log_odds_chosen": 0.33617112040519714, "log_odds_ratio": -0.7588559985160828, "logits/chosen": -1.1110285520553589, "logits/rejected": -1.1005284786224365, "logps/chosen": -1.1635663509368896, "logps/rejected": -1.4059040546417236, "loss": 1.0717, "nll_loss": 1.327322244644165, "rewards/accuracies": 0.375, "rewards/chosen": -0.11635662615299225, "rewards/margins": 0.024233775213360786, "rewards/rejected": -0.14059039950370789, "step": 4143 }, { "epoch": 2.5279853591581514, "grad_norm": 6.205873966217041, "learning_rate": 3.969136558481323e-06, "log_odds_chosen": 2.865283966064453, "log_odds_ratio": -0.21842698752880096, "logits/chosen": -0.6984732151031494, "logits/rejected": -0.9635521769523621, "logps/chosen": -0.5786105394363403, "logps/rejected": -2.709693431854248, "loss": 0.9945, "nll_loss": 0.6450576782226562, "rewards/accuracies": 0.875, "rewards/chosen": -0.057861052453517914, "rewards/margins": 0.21310828626155853, "rewards/rejected": -0.27096936106681824, "step": 4144 }, { "epoch": 2.5285953942351687, "grad_norm": 2.031893491744995, "learning_rate": 3.968156766687079e-06, "log_odds_chosen": 0.9765085577964783, "log_odds_ratio": -0.43150055408477783, "logits/chosen": -0.969318151473999, "logits/rejected": -0.9842785596847534, "logps/chosen": -0.6968529224395752, "logps/rejected": -1.2623496055603027, "loss": 1.0712, "nll_loss": 1.0862013101577759, "rewards/accuracies": 0.75, "rewards/chosen": -0.06968529522418976, "rewards/margins": 0.056549668312072754, "rewards/rejected": -0.1262349635362625, "step": 4145 }, { "epoch": 2.5292054293121855, "grad_norm": 1.4845901727676392, "learning_rate": 3.967176974892835e-06, "log_odds_chosen": 2.381354331970215, "log_odds_ratio": -0.2296498417854309, "logits/chosen": -0.5845699906349182, "logits/rejected": -0.6494708061218262, "logps/chosen": -0.5114250779151917, "logps/rejected": -2.2337112426757812, "loss": 1.0053, "nll_loss": 0.8688222169876099, "rewards/accuracies": 0.875, "rewards/chosen": -0.051142510026693344, "rewards/margins": 0.1722286343574524, "rewards/rejected": -0.22337113320827484, "step": 4146 }, { "epoch": 2.5298154643892024, "grad_norm": 1.6641038656234741, "learning_rate": 3.966197183098591e-06, "log_odds_chosen": 2.232421636581421, "log_odds_ratio": -0.5201412439346313, "logits/chosen": -0.9719429612159729, "logits/rejected": -1.0049033164978027, "logps/chosen": -0.8048704862594604, "logps/rejected": -2.638176918029785, "loss": 1.0578, "nll_loss": 0.9317361116409302, "rewards/accuracies": 0.5, "rewards/chosen": -0.08048705756664276, "rewards/margins": 0.18333062529563904, "rewards/rejected": -0.263817697763443, "step": 4147 }, { "epoch": 2.530425499466219, "grad_norm": 1.2446773052215576, "learning_rate": 3.965217391304348e-06, "log_odds_chosen": 1.7625269889831543, "log_odds_ratio": -0.3352925181388855, "logits/chosen": -0.8626649379730225, "logits/rejected": -0.918272852897644, "logps/chosen": -0.7731441259384155, "logps/rejected": -2.1021132469177246, "loss": 0.8698, "nll_loss": 0.9022460579872131, "rewards/accuracies": 0.875, "rewards/chosen": -0.07731440663337708, "rewards/margins": 0.13289691507816315, "rewards/rejected": -0.21021133661270142, "step": 4148 }, { "epoch": 2.5310355345432365, "grad_norm": 1.3719571828842163, "learning_rate": 3.964237599510104e-06, "log_odds_chosen": 0.5380282402038574, "log_odds_ratio": -0.5388577580451965, "logits/chosen": -0.9691401124000549, "logits/rejected": -0.8616471290588379, "logps/chosen": -1.0190720558166504, "logps/rejected": -1.4420051574707031, "loss": 1.1229, "nll_loss": 1.2082849740982056, "rewards/accuracies": 0.75, "rewards/chosen": -0.10190720111131668, "rewards/margins": 0.04229331761598587, "rewards/rejected": -0.14420051872730255, "step": 4149 }, { "epoch": 2.5316455696202533, "grad_norm": 1.7789478302001953, "learning_rate": 3.96325780771586e-06, "log_odds_chosen": 0.9077023267745972, "log_odds_ratio": -0.5273083448410034, "logits/chosen": -0.8688972592353821, "logits/rejected": -0.708382248878479, "logps/chosen": -0.8412443995475769, "logps/rejected": -1.4021052122116089, "loss": 1.1592, "nll_loss": 1.0920140743255615, "rewards/accuracies": 0.625, "rewards/chosen": -0.08412444591522217, "rewards/margins": 0.05608607828617096, "rewards/rejected": -0.14021050930023193, "step": 4150 }, { "epoch": 2.53225560469727, "grad_norm": 1.8153611421585083, "learning_rate": 3.9622780159216165e-06, "log_odds_chosen": 0.785057544708252, "log_odds_ratio": -0.4977974593639374, "logits/chosen": -0.95161372423172, "logits/rejected": -1.0698168277740479, "logps/chosen": -0.9814590811729431, "logps/rejected": -1.2282562255859375, "loss": 1.1272, "nll_loss": 1.2646490335464478, "rewards/accuracies": 0.75, "rewards/chosen": -0.09814590960741043, "rewards/margins": 0.024679705500602722, "rewards/rejected": -0.12282562255859375, "step": 4151 }, { "epoch": 2.532865639774287, "grad_norm": 1.6450926065444946, "learning_rate": 3.961298224127373e-06, "log_odds_chosen": 1.070631504058838, "log_odds_ratio": -0.48522937297821045, "logits/chosen": -0.9028592109680176, "logits/rejected": -0.7937788963317871, "logps/chosen": -0.8096661567687988, "logps/rejected": -1.4865376949310303, "loss": 1.1972, "nll_loss": 1.1223310232162476, "rewards/accuracies": 0.875, "rewards/chosen": -0.08096661418676376, "rewards/margins": 0.06768715381622314, "rewards/rejected": -0.1486537754535675, "step": 4152 }, { "epoch": 2.533475674851304, "grad_norm": 1.192082405090332, "learning_rate": 3.960318432333129e-06, "log_odds_chosen": 1.8912882804870605, "log_odds_ratio": -0.4131898880004883, "logits/chosen": -0.792519211769104, "logits/rejected": -0.857016921043396, "logps/chosen": -0.714087963104248, "logps/rejected": -2.209613561630249, "loss": 1.0463, "nll_loss": 0.8387101888656616, "rewards/accuracies": 0.75, "rewards/chosen": -0.07140879333019257, "rewards/margins": 0.14955256879329681, "rewards/rejected": -0.22096136212348938, "step": 4153 }, { "epoch": 2.5340857099283207, "grad_norm": 1.3865411281585693, "learning_rate": 3.959338640538885e-06, "log_odds_chosen": 1.6059638261795044, "log_odds_ratio": -0.3962802290916443, "logits/chosen": -0.6068856120109558, "logits/rejected": -0.8333061933517456, "logps/chosen": -0.6658645868301392, "logps/rejected": -1.795587420463562, "loss": 0.9267, "nll_loss": 0.990943193435669, "rewards/accuracies": 0.875, "rewards/chosen": -0.0665864646434784, "rewards/margins": 0.11297228932380676, "rewards/rejected": -0.17955875396728516, "step": 4154 }, { "epoch": 2.5346957450053376, "grad_norm": 0.963574230670929, "learning_rate": 3.958358848744642e-06, "log_odds_chosen": 3.2096691131591797, "log_odds_ratio": -0.3139381408691406, "logits/chosen": -0.6835896968841553, "logits/rejected": -0.7156420946121216, "logps/chosen": -0.5615019202232361, "logps/rejected": -3.1762197017669678, "loss": 0.9808, "nll_loss": 0.7204404473304749, "rewards/accuracies": 0.875, "rewards/chosen": -0.05615019053220749, "rewards/margins": 0.26147180795669556, "rewards/rejected": -0.31762200593948364, "step": 4155 }, { "epoch": 2.535305780082355, "grad_norm": 1.8987820148468018, "learning_rate": 3.957379056950398e-06, "log_odds_chosen": 1.2669111490249634, "log_odds_ratio": -0.3663789927959442, "logits/chosen": -0.9622598886489868, "logits/rejected": -0.9640395045280457, "logps/chosen": -1.0023324489593506, "logps/rejected": -1.9198706150054932, "loss": 1.0871, "nll_loss": 1.425404667854309, "rewards/accuracies": 0.875, "rewards/chosen": -0.10023324191570282, "rewards/margins": 0.09175381809473038, "rewards/rejected": -0.1919870674610138, "step": 4156 }, { "epoch": 2.5359158151593717, "grad_norm": 1.2595552206039429, "learning_rate": 3.956399265156154e-06, "log_odds_chosen": 1.601881980895996, "log_odds_ratio": -0.44694459438323975, "logits/chosen": -1.0700539350509644, "logits/rejected": -0.9520388841629028, "logps/chosen": -0.9113222360610962, "logps/rejected": -2.1182703971862793, "loss": 1.053, "nll_loss": 0.971705973148346, "rewards/accuracies": 0.75, "rewards/chosen": -0.09113222360610962, "rewards/margins": 0.12069482356309891, "rewards/rejected": -0.21182703971862793, "step": 4157 }, { "epoch": 2.5365258502363885, "grad_norm": 7.217296600341797, "learning_rate": 3.95541947336191e-06, "log_odds_chosen": 0.9061082601547241, "log_odds_ratio": -0.4032119810581207, "logits/chosen": -0.6418133974075317, "logits/rejected": -0.628031849861145, "logps/chosen": -0.899371325969696, "logps/rejected": -1.4591509103775024, "loss": 1.0861, "nll_loss": 0.8878644704818726, "rewards/accuracies": 0.875, "rewards/chosen": -0.08993712812662125, "rewards/margins": 0.0559779554605484, "rewards/rejected": -0.14591509103775024, "step": 4158 }, { "epoch": 2.5371358853134054, "grad_norm": 9.70667552947998, "learning_rate": 3.954439681567667e-06, "log_odds_chosen": 3.889052391052246, "log_odds_ratio": -0.44994425773620605, "logits/chosen": -0.9193305373191833, "logits/rejected": -0.8744369745254517, "logps/chosen": -0.8162088990211487, "logps/rejected": -4.36594295501709, "loss": 1.1614, "nll_loss": 0.9797870516777039, "rewards/accuracies": 0.75, "rewards/chosen": -0.08162088692188263, "rewards/margins": 0.3549734354019165, "rewards/rejected": -0.43659430742263794, "step": 4159 }, { "epoch": 2.5377459203904227, "grad_norm": 1.6681774854660034, "learning_rate": 3.953459889773422e-06, "log_odds_chosen": 1.5008496046066284, "log_odds_ratio": -0.3817215859889984, "logits/chosen": -1.100631594657898, "logits/rejected": -0.8957332372665405, "logps/chosen": -0.8423924446105957, "logps/rejected": -2.032390832901001, "loss": 1.1209, "nll_loss": 1.1393009424209595, "rewards/accuracies": 0.75, "rewards/chosen": -0.08423924446105957, "rewards/margins": 0.11899985373020172, "rewards/rejected": -0.2032390832901001, "step": 4160 }, { "epoch": 2.5383559554674395, "grad_norm": 1.7879029512405396, "learning_rate": 3.952480097979179e-06, "log_odds_chosen": 0.9773683547973633, "log_odds_ratio": -0.47099775075912476, "logits/chosen": -1.0847876071929932, "logits/rejected": -1.0375807285308838, "logps/chosen": -0.8829308748245239, "logps/rejected": -1.6780130863189697, "loss": 1.3273, "nll_loss": 1.184272050857544, "rewards/accuracies": 0.75, "rewards/chosen": -0.08829308301210403, "rewards/margins": 0.0795082375407219, "rewards/rejected": -0.16780132055282593, "step": 4161 }, { "epoch": 2.5389659905444564, "grad_norm": 8.32424259185791, "learning_rate": 3.9515003061849355e-06, "log_odds_chosen": 0.6756426095962524, "log_odds_ratio": -0.6216462850570679, "logits/chosen": -1.0497827529907227, "logits/rejected": -0.8398311138153076, "logps/chosen": -1.0345089435577393, "logps/rejected": -1.654508113861084, "loss": 1.0837, "nll_loss": 1.215991497039795, "rewards/accuracies": 0.5, "rewards/chosen": -0.10345088690519333, "rewards/margins": 0.061999931931495667, "rewards/rejected": -0.1654508113861084, "step": 4162 }, { "epoch": 2.539576025621473, "grad_norm": 1.576033592224121, "learning_rate": 3.950520514390692e-06, "log_odds_chosen": 1.2131390571594238, "log_odds_ratio": -0.6072754859924316, "logits/chosen": -0.8636053204536438, "logits/rejected": -0.9230526685714722, "logps/chosen": -0.8012614250183105, "logps/rejected": -1.7120403051376343, "loss": 0.9845, "nll_loss": 0.9213119745254517, "rewards/accuracies": 0.5, "rewards/chosen": -0.08012614399194717, "rewards/margins": 0.09107789397239685, "rewards/rejected": -0.17120404541492462, "step": 4163 }, { "epoch": 2.54018606069849, "grad_norm": 2.0600662231445312, "learning_rate": 3.949540722596448e-06, "log_odds_chosen": 2.489549398422241, "log_odds_ratio": -0.27500540018081665, "logits/chosen": -0.6600114107131958, "logits/rejected": -0.9393389225006104, "logps/chosen": -0.4765912592411041, "logps/rejected": -2.129901170730591, "loss": 0.9223, "nll_loss": 0.7258341312408447, "rewards/accuracies": 0.75, "rewards/chosen": -0.04765912517905235, "rewards/margins": 0.16533097624778748, "rewards/rejected": -0.21299010515213013, "step": 4164 }, { "epoch": 2.540796095775507, "grad_norm": 2.3088672161102295, "learning_rate": 3.948560930802205e-06, "log_odds_chosen": 1.6462172269821167, "log_odds_ratio": -0.2500483989715576, "logits/chosen": -0.8345540165901184, "logits/rejected": -0.7199392318725586, "logps/chosen": -0.7637144923210144, "logps/rejected": -2.0109963417053223, "loss": 1.217, "nll_loss": 1.2372825145721436, "rewards/accuracies": 1.0, "rewards/chosen": -0.0763714462518692, "rewards/margins": 0.12472819536924362, "rewards/rejected": -0.20109963417053223, "step": 4165 }, { "epoch": 2.5414061308525238, "grad_norm": 0.9412242770195007, "learning_rate": 3.947581139007961e-06, "log_odds_chosen": 1.7215198278427124, "log_odds_ratio": -0.49651581048965454, "logits/chosen": -1.116815209388733, "logits/rejected": -1.0397613048553467, "logps/chosen": -1.005892038345337, "logps/rejected": -2.373490810394287, "loss": 1.1339, "nll_loss": 1.0882999897003174, "rewards/accuracies": 0.625, "rewards/chosen": -0.10058921575546265, "rewards/margins": 0.13675987720489502, "rewards/rejected": -0.23734909296035767, "step": 4166 }, { "epoch": 2.542016165929541, "grad_norm": 1.328145980834961, "learning_rate": 3.946601347213717e-06, "log_odds_chosen": 1.638857364654541, "log_odds_ratio": -0.45099538564682007, "logits/chosen": -0.7498159408569336, "logits/rejected": -0.8039496541023254, "logps/chosen": -0.7304568886756897, "logps/rejected": -1.9733465909957886, "loss": 0.9852, "nll_loss": 0.9069414138793945, "rewards/accuracies": 0.75, "rewards/chosen": -0.07304568588733673, "rewards/margins": 0.12428897619247437, "rewards/rejected": -0.1973346769809723, "step": 4167 }, { "epoch": 2.542626201006558, "grad_norm": 1.1057873964309692, "learning_rate": 3.945621555419473e-06, "log_odds_chosen": 2.623203992843628, "log_odds_ratio": -0.22345943748950958, "logits/chosen": -0.5492790937423706, "logits/rejected": -0.7230823636054993, "logps/chosen": -0.48253947496414185, "logps/rejected": -2.467193603515625, "loss": 0.9669, "nll_loss": 0.7292593717575073, "rewards/accuracies": 1.0, "rewards/chosen": -0.048253946006298065, "rewards/margins": 0.19846540689468384, "rewards/rejected": -0.2467193603515625, "step": 4168 }, { "epoch": 2.5432362360835747, "grad_norm": 1.296573519706726, "learning_rate": 3.944641763625229e-06, "log_odds_chosen": 2.574342727661133, "log_odds_ratio": -0.49329236149787903, "logits/chosen": -0.7627863883972168, "logits/rejected": -0.8491413593292236, "logps/chosen": -0.7489345669746399, "logps/rejected": -2.8940062522888184, "loss": 0.9727, "nll_loss": 0.9772471189498901, "rewards/accuracies": 0.625, "rewards/chosen": -0.07489345222711563, "rewards/margins": 0.21450717747211456, "rewards/rejected": -0.2894006371498108, "step": 4169 }, { "epoch": 2.5438462711605916, "grad_norm": 1.634945273399353, "learning_rate": 3.943661971830986e-06, "log_odds_chosen": 2.310506820678711, "log_odds_ratio": -0.34104228019714355, "logits/chosen": -0.7489246726036072, "logits/rejected": -0.8986712694168091, "logps/chosen": -0.5473818778991699, "logps/rejected": -2.290900707244873, "loss": 0.9054, "nll_loss": 0.6737406849861145, "rewards/accuracies": 0.75, "rewards/chosen": -0.05473819375038147, "rewards/margins": 0.17435190081596375, "rewards/rejected": -0.22909007966518402, "step": 4170 }, { "epoch": 2.544456306237609, "grad_norm": 1.5409451723098755, "learning_rate": 3.9426821800367414e-06, "log_odds_chosen": 1.0055413246154785, "log_odds_ratio": -0.5479235649108887, "logits/chosen": -0.8773300647735596, "logits/rejected": -0.9142717123031616, "logps/chosen": -0.8432860374450684, "logps/rejected": -1.4194765090942383, "loss": 1.1475, "nll_loss": 1.044052243232727, "rewards/accuracies": 0.375, "rewards/chosen": -0.08432860672473907, "rewards/margins": 0.05761905759572983, "rewards/rejected": -0.1419476568698883, "step": 4171 }, { "epoch": 2.5450663413146257, "grad_norm": 1.362488031387329, "learning_rate": 3.941702388242498e-06, "log_odds_chosen": 0.39858150482177734, "log_odds_ratio": -0.7520700693130493, "logits/chosen": -0.732404351234436, "logits/rejected": -0.8619400262832642, "logps/chosen": -0.9511789083480835, "logps/rejected": -1.31315279006958, "loss": 1.0828, "nll_loss": 1.0829110145568848, "rewards/accuracies": 0.5, "rewards/chosen": -0.09511788934469223, "rewards/margins": 0.03619739040732384, "rewards/rejected": -0.13131529092788696, "step": 4172 }, { "epoch": 2.5456763763916426, "grad_norm": 1.1052916049957275, "learning_rate": 3.9407225964482545e-06, "log_odds_chosen": 1.0098717212677002, "log_odds_ratio": -0.5254815816879272, "logits/chosen": -0.8143911361694336, "logits/rejected": -0.7519314885139465, "logps/chosen": -0.6469461917877197, "logps/rejected": -1.361802577972412, "loss": 0.8712, "nll_loss": 0.8163330554962158, "rewards/accuracies": 0.75, "rewards/chosen": -0.06469462066888809, "rewards/margins": 0.07148562371730804, "rewards/rejected": -0.13618025183677673, "step": 4173 }, { "epoch": 2.5462864114686594, "grad_norm": 1.398115634918213, "learning_rate": 3.939742804654011e-06, "log_odds_chosen": 2.4557809829711914, "log_odds_ratio": -0.2673778533935547, "logits/chosen": -0.7615528106689453, "logits/rejected": -0.8776780962944031, "logps/chosen": -0.652098536491394, "logps/rejected": -2.537775993347168, "loss": 1.0144, "nll_loss": 0.8579955101013184, "rewards/accuracies": 1.0, "rewards/chosen": -0.06520985066890717, "rewards/margins": 0.18856775760650635, "rewards/rejected": -0.2537776231765747, "step": 4174 }, { "epoch": 2.5468964465456763, "grad_norm": 1.1471205949783325, "learning_rate": 3.938763012859767e-06, "log_odds_chosen": 1.314962387084961, "log_odds_ratio": -0.37704604864120483, "logits/chosen": -0.6816293001174927, "logits/rejected": -0.6371253728866577, "logps/chosen": -0.6303907632827759, "logps/rejected": -1.5043423175811768, "loss": 0.9038, "nll_loss": 0.7450307011604309, "rewards/accuracies": 0.875, "rewards/chosen": -0.06303907930850983, "rewards/margins": 0.08739516139030457, "rewards/rejected": -0.1504342406988144, "step": 4175 }, { "epoch": 2.547506481622693, "grad_norm": 3.08194899559021, "learning_rate": 3.937783221065524e-06, "log_odds_chosen": 1.3726532459259033, "log_odds_ratio": -0.357270747423172, "logits/chosen": -0.9061486721038818, "logits/rejected": -1.091093897819519, "logps/chosen": -0.6669043302536011, "logps/rejected": -1.6321489810943604, "loss": 1.0385, "nll_loss": 0.9602667689323425, "rewards/accuracies": 0.75, "rewards/chosen": -0.06669043004512787, "rewards/margins": 0.09652446955442429, "rewards/rejected": -0.16321489214897156, "step": 4176 }, { "epoch": 2.5481165166997104, "grad_norm": 1.2413831949234009, "learning_rate": 3.93680342927128e-06, "log_odds_chosen": -0.25564154982566833, "log_odds_ratio": -0.8828330039978027, "logits/chosen": -0.9748116731643677, "logits/rejected": -0.9070778489112854, "logps/chosen": -1.150896668434143, "logps/rejected": -1.0064667463302612, "loss": 1.1127, "nll_loss": 1.2878150939941406, "rewards/accuracies": 0.375, "rewards/chosen": -0.11508966982364655, "rewards/margins": -0.01444298680871725, "rewards/rejected": -0.10064667463302612, "step": 4177 }, { "epoch": 2.5487265517767272, "grad_norm": 0.9476168751716614, "learning_rate": 3.935823637477036e-06, "log_odds_chosen": 1.3579293489456177, "log_odds_ratio": -0.4941452741622925, "logits/chosen": -0.9778878688812256, "logits/rejected": -0.8317373991012573, "logps/chosen": -0.8645960092544556, "logps/rejected": -1.8997552394866943, "loss": 1.0677, "nll_loss": 1.2294739484786987, "rewards/accuracies": 0.625, "rewards/chosen": -0.08645959943532944, "rewards/margins": 0.10351593047380447, "rewards/rejected": -0.18997551500797272, "step": 4178 }, { "epoch": 2.549336586853744, "grad_norm": 3.2170791625976562, "learning_rate": 3.934843845682792e-06, "log_odds_chosen": 0.4884047508239746, "log_odds_ratio": -0.6821712255477905, "logits/chosen": -0.8926100730895996, "logits/rejected": -1.031188726425171, "logps/chosen": -1.1756856441497803, "logps/rejected": -1.3820191621780396, "loss": 1.4896, "nll_loss": 1.6600098609924316, "rewards/accuracies": 0.625, "rewards/chosen": -0.11756856739521027, "rewards/margins": 0.02063334546983242, "rewards/rejected": -0.13820192217826843, "step": 4179 }, { "epoch": 2.549946621930761, "grad_norm": 1.8116874694824219, "learning_rate": 3.933864053888548e-06, "log_odds_chosen": 2.8744287490844727, "log_odds_ratio": -0.3572484254837036, "logits/chosen": -0.8477271795272827, "logits/rejected": -0.9073882102966309, "logps/chosen": -0.9834985136985779, "logps/rejected": -3.4200804233551025, "loss": 1.0595, "nll_loss": 1.140825629234314, "rewards/accuracies": 0.75, "rewards/chosen": -0.09834984689950943, "rewards/margins": 0.2436581701040268, "rewards/rejected": -0.34200799465179443, "step": 4180 }, { "epoch": 2.550556657007778, "grad_norm": 2.3878328800201416, "learning_rate": 3.932884262094305e-06, "log_odds_chosen": -0.0899352878332138, "log_odds_ratio": -1.0258963108062744, "logits/chosen": -1.034486174583435, "logits/rejected": -0.7798066139221191, "logps/chosen": -1.2453995943069458, "logps/rejected": -1.3913037776947021, "loss": 1.2344, "nll_loss": 1.4295247793197632, "rewards/accuracies": 0.375, "rewards/chosen": -0.12453995645046234, "rewards/margins": 0.014590414240956306, "rewards/rejected": -0.1391303837299347, "step": 4181 }, { "epoch": 2.551166692084795, "grad_norm": 1.4785773754119873, "learning_rate": 3.931904470300061e-06, "log_odds_chosen": 0.18384668231010437, "log_odds_ratio": -0.6784385442733765, "logits/chosen": -1.0559303760528564, "logits/rejected": -1.0274370908737183, "logps/chosen": -1.0324556827545166, "logps/rejected": -1.1674789190292358, "loss": 1.0259, "nll_loss": 1.1776330471038818, "rewards/accuracies": 0.5, "rewards/chosen": -0.1032455712556839, "rewards/margins": 0.013502329587936401, "rewards/rejected": -0.1167479008436203, "step": 4182 }, { "epoch": 2.551776727161812, "grad_norm": 1.131071925163269, "learning_rate": 3.9309246785058175e-06, "log_odds_chosen": 1.5736846923828125, "log_odds_ratio": -0.31987500190734863, "logits/chosen": -0.8336557149887085, "logits/rejected": -0.8617940545082092, "logps/chosen": -0.6503047943115234, "logps/rejected": -1.7545180320739746, "loss": 0.883, "nll_loss": 0.827487587928772, "rewards/accuracies": 0.875, "rewards/chosen": -0.06503048539161682, "rewards/margins": 0.1104213297367096, "rewards/rejected": -0.17545181512832642, "step": 4183 }, { "epoch": 2.5523867622388288, "grad_norm": 1.458877444267273, "learning_rate": 3.9299448867115736e-06, "log_odds_chosen": 1.3626537322998047, "log_odds_ratio": -0.3293217122554779, "logits/chosen": -0.8066684603691101, "logits/rejected": -0.9169577360153198, "logps/chosen": -0.8725988864898682, "logps/rejected": -1.8394019603729248, "loss": 1.1633, "nll_loss": 1.2213454246520996, "rewards/accuracies": 0.875, "rewards/chosen": -0.08725990355014801, "rewards/margins": 0.09668028354644775, "rewards/rejected": -0.18394017219543457, "step": 4184 }, { "epoch": 2.5529967973158456, "grad_norm": 1.471006155014038, "learning_rate": 3.92896509491733e-06, "log_odds_chosen": 0.5730631947517395, "log_odds_ratio": -0.587590754032135, "logits/chosen": -0.8090131282806396, "logits/rejected": -0.8649828433990479, "logps/chosen": -0.7815670371055603, "logps/rejected": -1.1638253927230835, "loss": 0.9691, "nll_loss": 1.0198078155517578, "rewards/accuracies": 0.625, "rewards/chosen": -0.07815670222043991, "rewards/margins": 0.038225844502449036, "rewards/rejected": -0.11638254672288895, "step": 4185 }, { "epoch": 2.5536068323928625, "grad_norm": 1.140868067741394, "learning_rate": 3.927985303123086e-06, "log_odds_chosen": 0.8226673603057861, "log_odds_ratio": -0.4615999758243561, "logits/chosen": -0.6765662431716919, "logits/rejected": -0.7751455307006836, "logps/chosen": -0.6347438097000122, "logps/rejected": -0.9568694829940796, "loss": 1.1105, "nll_loss": 0.8055134415626526, "rewards/accuracies": 0.75, "rewards/chosen": -0.0634743869304657, "rewards/margins": 0.032212574034929276, "rewards/rejected": -0.09568695724010468, "step": 4186 }, { "epoch": 2.5542168674698793, "grad_norm": 1.4111474752426147, "learning_rate": 3.927005511328843e-06, "log_odds_chosen": 1.2346895933151245, "log_odds_ratio": -0.6043556332588196, "logits/chosen": -0.9781941175460815, "logits/rejected": -0.9439852237701416, "logps/chosen": -1.0027687549591064, "logps/rejected": -1.952676773071289, "loss": 1.1612, "nll_loss": 1.149927020072937, "rewards/accuracies": 0.625, "rewards/chosen": -0.100276879966259, "rewards/margins": 0.0949908196926117, "rewards/rejected": -0.1952676922082901, "step": 4187 }, { "epoch": 2.5548269025468966, "grad_norm": 14.31124496459961, "learning_rate": 3.926025719534599e-06, "log_odds_chosen": 1.2456145286560059, "log_odds_ratio": -0.4243888854980469, "logits/chosen": -0.7815847992897034, "logits/rejected": -0.9327859282493591, "logps/chosen": -0.7191357612609863, "logps/rejected": -1.5969150066375732, "loss": 1.2114, "nll_loss": 1.1557033061981201, "rewards/accuracies": 0.875, "rewards/chosen": -0.07191357761621475, "rewards/margins": 0.08777792751789093, "rewards/rejected": -0.15969151258468628, "step": 4188 }, { "epoch": 2.5554369376239134, "grad_norm": 2.029388904571533, "learning_rate": 3.925045927740355e-06, "log_odds_chosen": 0.7329237461090088, "log_odds_ratio": -0.7004457712173462, "logits/chosen": -0.8574128746986389, "logits/rejected": -0.8448939323425293, "logps/chosen": -0.9649739861488342, "logps/rejected": -1.5610923767089844, "loss": 1.1136, "nll_loss": 1.0731794834136963, "rewards/accuracies": 0.5, "rewards/chosen": -0.09649740159511566, "rewards/margins": 0.059611834585666656, "rewards/rejected": -0.15610924363136292, "step": 4189 }, { "epoch": 2.5560469727009303, "grad_norm": 3.516435146331787, "learning_rate": 3.924066135946111e-06, "log_odds_chosen": 1.865659475326538, "log_odds_ratio": -0.3605910837650299, "logits/chosen": -0.8562592267990112, "logits/rejected": -0.7934409379959106, "logps/chosen": -0.8814261555671692, "logps/rejected": -2.429046392440796, "loss": 1.1381, "nll_loss": 1.1219379901885986, "rewards/accuracies": 0.75, "rewards/chosen": -0.08814261853694916, "rewards/margins": 0.15476204454898834, "rewards/rejected": -0.2429046630859375, "step": 4190 }, { "epoch": 2.556657007777947, "grad_norm": 1.2391977310180664, "learning_rate": 3.923086344151867e-06, "log_odds_chosen": 1.2402657270431519, "log_odds_ratio": -0.423449844121933, "logits/chosen": -1.0266187191009521, "logits/rejected": -1.0483672618865967, "logps/chosen": -0.7679582834243774, "logps/rejected": -1.6787068843841553, "loss": 1.0244, "nll_loss": 0.9280811548233032, "rewards/accuracies": 0.75, "rewards/chosen": -0.07679583132266998, "rewards/margins": 0.0910748615860939, "rewards/rejected": -0.1678706705570221, "step": 4191 }, { "epoch": 2.5572670428549644, "grad_norm": 2.7411997318267822, "learning_rate": 3.922106552357623e-06, "log_odds_chosen": -0.14740145206451416, "log_odds_ratio": -0.8196048736572266, "logits/chosen": -0.9810113906860352, "logits/rejected": -1.005530834197998, "logps/chosen": -1.3081369400024414, "logps/rejected": -1.2088871002197266, "loss": 1.3222, "nll_loss": 1.3081988096237183, "rewards/accuracies": 0.5, "rewards/chosen": -0.13081370294094086, "rewards/margins": -0.009924990125000477, "rewards/rejected": -0.12088870257139206, "step": 4192 }, { "epoch": 2.5578770779319813, "grad_norm": 2.2300331592559814, "learning_rate": 3.92112676056338e-06, "log_odds_chosen": 1.9811968803405762, "log_odds_ratio": -0.4247745871543884, "logits/chosen": -0.9229683876037598, "logits/rejected": -0.9898291826248169, "logps/chosen": -0.7891424298286438, "logps/rejected": -2.2933249473571777, "loss": 1.0147, "nll_loss": 0.9676156044006348, "rewards/accuracies": 0.625, "rewards/chosen": -0.07891424745321274, "rewards/margins": 0.1504182517528534, "rewards/rejected": -0.22933250665664673, "step": 4193 }, { "epoch": 2.558487113008998, "grad_norm": 2.1369051933288574, "learning_rate": 3.9201469687691365e-06, "log_odds_chosen": 1.086173176765442, "log_odds_ratio": -0.8545637726783752, "logits/chosen": -0.9574476480484009, "logits/rejected": -1.0884212255477905, "logps/chosen": -0.9165035486221313, "logps/rejected": -1.954477071762085, "loss": 1.0891, "nll_loss": 1.1214896440505981, "rewards/accuracies": 0.625, "rewards/chosen": -0.0916503518819809, "rewards/margins": 0.10379735380411148, "rewards/rejected": -0.19544771313667297, "step": 4194 }, { "epoch": 2.559097148086015, "grad_norm": 1.7629715204238892, "learning_rate": 3.919167176974893e-06, "log_odds_chosen": 2.6766388416290283, "log_odds_ratio": -0.3758946657180786, "logits/chosen": -0.6179187297821045, "logits/rejected": -0.8389079570770264, "logps/chosen": -0.6660175323486328, "logps/rejected": -2.7418715953826904, "loss": 1.0559, "nll_loss": 0.8043167591094971, "rewards/accuracies": 0.75, "rewards/chosen": -0.06660174578428268, "rewards/margins": 0.2075854390859604, "rewards/rejected": -0.2741871774196625, "step": 4195 }, { "epoch": 2.559707183163032, "grad_norm": 4.557766914367676, "learning_rate": 3.918187385180649e-06, "log_odds_chosen": 2.539262533187866, "log_odds_ratio": -0.4118049740791321, "logits/chosen": -0.8902665972709656, "logits/rejected": -0.9627267718315125, "logps/chosen": -0.7660367488861084, "logps/rejected": -2.8643200397491455, "loss": 1.0069, "nll_loss": 0.8936452865600586, "rewards/accuracies": 0.75, "rewards/chosen": -0.07660368084907532, "rewards/margins": 0.20982836186885834, "rewards/rejected": -0.28643202781677246, "step": 4196 }, { "epoch": 2.5603172182400487, "grad_norm": 1.8727693557739258, "learning_rate": 3.917207593386405e-06, "log_odds_chosen": 1.1860918998718262, "log_odds_ratio": -0.46984946727752686, "logits/chosen": -0.5350436568260193, "logits/rejected": -0.7260606288909912, "logps/chosen": -0.6128615736961365, "logps/rejected": -1.3033932447433472, "loss": 1.098, "nll_loss": 0.9080812335014343, "rewards/accuracies": 0.625, "rewards/chosen": -0.06128615885972977, "rewards/margins": 0.06905317306518555, "rewards/rejected": -0.13033932447433472, "step": 4197 }, { "epoch": 2.5609272533170655, "grad_norm": 2.2467563152313232, "learning_rate": 3.916227801592162e-06, "log_odds_chosen": 0.4563276767730713, "log_odds_ratio": -0.7367886304855347, "logits/chosen": -0.7973420023918152, "logits/rejected": -0.9016693830490112, "logps/chosen": -0.9051620364189148, "logps/rejected": -1.22718346118927, "loss": 1.1394, "nll_loss": 1.0378713607788086, "rewards/accuracies": 0.375, "rewards/chosen": -0.09051620960235596, "rewards/margins": 0.03220214322209358, "rewards/rejected": -0.12271834909915924, "step": 4198 }, { "epoch": 2.561537288394083, "grad_norm": 1.6883608102798462, "learning_rate": 3.915248009797918e-06, "log_odds_chosen": 0.5642668604850769, "log_odds_ratio": -0.6038212776184082, "logits/chosen": -0.8875705003738403, "logits/rejected": -0.9664009213447571, "logps/chosen": -0.7476341128349304, "logps/rejected": -1.1290935277938843, "loss": 1.1798, "nll_loss": 1.221034288406372, "rewards/accuracies": 0.625, "rewards/chosen": -0.07476341724395752, "rewards/margins": 0.03814593702554703, "rewards/rejected": -0.11290934681892395, "step": 4199 }, { "epoch": 2.5621473234710996, "grad_norm": 0.9623265862464905, "learning_rate": 3.914268218003674e-06, "log_odds_chosen": 3.7830421924591064, "log_odds_ratio": -0.31772714853286743, "logits/chosen": -0.7683814167976379, "logits/rejected": -0.9671238660812378, "logps/chosen": -0.6219972968101501, "logps/rejected": -3.800868034362793, "loss": 1.0312, "nll_loss": 0.8660599589347839, "rewards/accuracies": 0.75, "rewards/chosen": -0.06219973415136337, "rewards/margins": 0.3178870975971222, "rewards/rejected": -0.3800868093967438, "step": 4200 }, { "epoch": 2.5627573585481165, "grad_norm": 4.391965389251709, "learning_rate": 3.91328842620943e-06, "log_odds_chosen": 1.3012884855270386, "log_odds_ratio": -0.7471364736557007, "logits/chosen": -1.0423814058303833, "logits/rejected": -1.0037585496902466, "logps/chosen": -1.0413408279418945, "logps/rejected": -2.313582420349121, "loss": 1.1689, "nll_loss": 1.2076808214187622, "rewards/accuracies": 0.25, "rewards/chosen": -0.10413409769535065, "rewards/margins": 0.1272241473197937, "rewards/rejected": -0.23135825991630554, "step": 4201 }, { "epoch": 2.5633673936251333, "grad_norm": 2.4421606063842773, "learning_rate": 3.912308634415187e-06, "log_odds_chosen": 2.706871747970581, "log_odds_ratio": -0.39454740285873413, "logits/chosen": -0.8922981023788452, "logits/rejected": -1.0014426708221436, "logps/chosen": -0.7170555591583252, "logps/rejected": -3.0553462505340576, "loss": 0.9534, "nll_loss": 0.928521990776062, "rewards/accuracies": 0.625, "rewards/chosen": -0.07170554995536804, "rewards/margins": 0.2338290810585022, "rewards/rejected": -0.30553463101387024, "step": 4202 }, { "epoch": 2.5639774287021506, "grad_norm": 1.691514015197754, "learning_rate": 3.9113288426209424e-06, "log_odds_chosen": 3.481081485748291, "log_odds_ratio": -0.31461310386657715, "logits/chosen": -0.9714505672454834, "logits/rejected": -0.9857861995697021, "logps/chosen": -0.9303921461105347, "logps/rejected": -3.892129898071289, "loss": 1.1478, "nll_loss": 1.1795730590820312, "rewards/accuracies": 0.75, "rewards/chosen": -0.09303921461105347, "rewards/margins": 0.2961737811565399, "rewards/rejected": -0.38921302556991577, "step": 4203 }, { "epoch": 2.5645874637791675, "grad_norm": 4.118432521820068, "learning_rate": 3.910349050826699e-06, "log_odds_chosen": 2.6763179302215576, "log_odds_ratio": -0.3628229796886444, "logits/chosen": -0.8493756651878357, "logits/rejected": -0.8793034553527832, "logps/chosen": -0.8145999908447266, "logps/rejected": -2.997434616088867, "loss": 1.0532, "nll_loss": 0.9031227827072144, "rewards/accuracies": 0.75, "rewards/chosen": -0.08145999908447266, "rewards/margins": 0.21828347444534302, "rewards/rejected": -0.2997434437274933, "step": 4204 }, { "epoch": 2.5651974988561843, "grad_norm": 4.55180025100708, "learning_rate": 3.9093692590324555e-06, "log_odds_chosen": 1.2800395488739014, "log_odds_ratio": -0.48652249574661255, "logits/chosen": -0.8538457751274109, "logits/rejected": -0.8851788640022278, "logps/chosen": -0.8495097160339355, "logps/rejected": -1.7696161270141602, "loss": 1.0614, "nll_loss": 1.1502480506896973, "rewards/accuracies": 0.625, "rewards/chosen": -0.08495096862316132, "rewards/margins": 0.09201064705848694, "rewards/rejected": -0.17696160078048706, "step": 4205 }, { "epoch": 2.565807533933201, "grad_norm": 1.8262901306152344, "learning_rate": 3.908389467238212e-06, "log_odds_chosen": 2.208916664123535, "log_odds_ratio": -0.22446653246879578, "logits/chosen": -0.7374923229217529, "logits/rejected": -0.7936844825744629, "logps/chosen": -0.6420237421989441, "logps/rejected": -2.2539474964141846, "loss": 0.8382, "nll_loss": 0.7690433859825134, "rewards/accuracies": 0.875, "rewards/chosen": -0.06420237571001053, "rewards/margins": 0.1611923724412918, "rewards/rejected": -0.22539475560188293, "step": 4206 }, { "epoch": 2.566417569010218, "grad_norm": 5.472377777099609, "learning_rate": 3.907409675443968e-06, "log_odds_chosen": 2.0164732933044434, "log_odds_ratio": -0.33251166343688965, "logits/chosen": -0.7257487773895264, "logits/rejected": -0.8668457269668579, "logps/chosen": -0.6807957887649536, "logps/rejected": -2.113723039627075, "loss": 0.9881, "nll_loss": 1.0419186353683472, "rewards/accuracies": 0.875, "rewards/chosen": -0.06807957589626312, "rewards/margins": 0.14329272508621216, "rewards/rejected": -0.21137231588363647, "step": 4207 }, { "epoch": 2.567027604087235, "grad_norm": 1.5518684387207031, "learning_rate": 3.906429883649725e-06, "log_odds_chosen": 2.4440038204193115, "log_odds_ratio": -0.5080345273017883, "logits/chosen": -1.0494199991226196, "logits/rejected": -1.0713722705841064, "logps/chosen": -1.0002055168151855, "logps/rejected": -3.1695799827575684, "loss": 0.962, "nll_loss": 1.2997486591339111, "rewards/accuracies": 0.5, "rewards/chosen": -0.10002054274082184, "rewards/margins": 0.21693746745586395, "rewards/rejected": -0.3169580399990082, "step": 4208 }, { "epoch": 2.5676376391642517, "grad_norm": 4.78380823135376, "learning_rate": 3.905450091855481e-06, "log_odds_chosen": 0.5856422781944275, "log_odds_ratio": -0.5164691209793091, "logits/chosen": -1.0665680170059204, "logits/rejected": -1.0863478183746338, "logps/chosen": -1.059637427330017, "logps/rejected": -1.527961015701294, "loss": 1.1896, "nll_loss": 1.3090659379959106, "rewards/accuracies": 0.75, "rewards/chosen": -0.10596375167369843, "rewards/margins": 0.04683235287666321, "rewards/rejected": -0.15279610455036163, "step": 4209 }, { "epoch": 2.568247674241269, "grad_norm": 2.9780445098876953, "learning_rate": 3.904470300061237e-06, "log_odds_chosen": 2.2777748107910156, "log_odds_ratio": -0.4242507517337799, "logits/chosen": -0.9385533332824707, "logits/rejected": -0.91972815990448, "logps/chosen": -0.771190881729126, "logps/rejected": -2.5151896476745605, "loss": 1.0565, "nll_loss": 1.029404640197754, "rewards/accuracies": 0.75, "rewards/chosen": -0.07711908221244812, "rewards/margins": 0.17439989745616913, "rewards/rejected": -0.25151896476745605, "step": 4210 }, { "epoch": 2.568857709318286, "grad_norm": 1.192165493965149, "learning_rate": 3.903490508266993e-06, "log_odds_chosen": 1.6787232160568237, "log_odds_ratio": -0.3558674156665802, "logits/chosen": -0.8720102310180664, "logits/rejected": -0.913337230682373, "logps/chosen": -0.668601930141449, "logps/rejected": -1.816113829612732, "loss": 1.0017, "nll_loss": 0.8867664337158203, "rewards/accuracies": 0.625, "rewards/chosen": -0.06686019152402878, "rewards/margins": 0.1147511899471283, "rewards/rejected": -0.18161138892173767, "step": 4211 }, { "epoch": 2.5694677443953027, "grad_norm": 1.8710702657699585, "learning_rate": 3.902510716472749e-06, "log_odds_chosen": 3.1189522743225098, "log_odds_ratio": -0.36509427428245544, "logits/chosen": -0.8718132376670837, "logits/rejected": -0.9646251201629639, "logps/chosen": -0.7136286497116089, "logps/rejected": -3.239816665649414, "loss": 1.1466, "nll_loss": 0.9801811575889587, "rewards/accuracies": 0.625, "rewards/chosen": -0.07136286795139313, "rewards/margins": 0.25261878967285156, "rewards/rejected": -0.3239816725254059, "step": 4212 }, { "epoch": 2.5700777794723195, "grad_norm": 4.382176399230957, "learning_rate": 3.901530924678505e-06, "log_odds_chosen": 1.5484538078308105, "log_odds_ratio": -0.5571126937866211, "logits/chosen": -1.1340234279632568, "logits/rejected": -1.0330623388290405, "logps/chosen": -0.8429380655288696, "logps/rejected": -2.175462245941162, "loss": 1.1535, "nll_loss": 1.1550722122192383, "rewards/accuracies": 0.5, "rewards/chosen": -0.08429381251335144, "rewards/margins": 0.13325242698192596, "rewards/rejected": -0.2175462245941162, "step": 4213 }, { "epoch": 2.570687814549337, "grad_norm": 1.3656123876571655, "learning_rate": 3.9005511328842615e-06, "log_odds_chosen": 0.9798117876052856, "log_odds_ratio": -0.574055552482605, "logits/chosen": -1.037463665008545, "logits/rejected": -1.1299176216125488, "logps/chosen": -0.8715523481369019, "logps/rejected": -1.572011947631836, "loss": 1.1678, "nll_loss": 1.2064650058746338, "rewards/accuracies": 0.75, "rewards/chosen": -0.08715523779392242, "rewards/margins": 0.07004596292972565, "rewards/rejected": -0.15720121562480927, "step": 4214 }, { "epoch": 2.5712978496263537, "grad_norm": 2.5000314712524414, "learning_rate": 3.8995713410900184e-06, "log_odds_chosen": 1.9486526250839233, "log_odds_ratio": -0.3660843074321747, "logits/chosen": -0.7226138114929199, "logits/rejected": -0.7533018589019775, "logps/chosen": -0.6670228838920593, "logps/rejected": -1.940826177597046, "loss": 1.0248, "nll_loss": 0.8979339599609375, "rewards/accuracies": 0.875, "rewards/chosen": -0.06670229136943817, "rewards/margins": 0.12738032639026642, "rewards/rejected": -0.1940826028585434, "step": 4215 }, { "epoch": 2.5719078847033705, "grad_norm": 1.7736936807632446, "learning_rate": 3.8985915492957746e-06, "log_odds_chosen": 1.40361750125885, "log_odds_ratio": -0.37624412775039673, "logits/chosen": -0.9150797128677368, "logits/rejected": -1.0133849382400513, "logps/chosen": -0.9359891414642334, "logps/rejected": -2.056155204772949, "loss": 1.1201, "nll_loss": 1.0792961120605469, "rewards/accuracies": 0.875, "rewards/chosen": -0.09359891712665558, "rewards/margins": 0.11201660335063934, "rewards/rejected": -0.20561552047729492, "step": 4216 }, { "epoch": 2.5725179197803874, "grad_norm": 1.5517159700393677, "learning_rate": 3.897611757501531e-06, "log_odds_chosen": 2.1395936012268066, "log_odds_ratio": -0.4538750946521759, "logits/chosen": -0.547059178352356, "logits/rejected": -0.6472848057746887, "logps/chosen": -0.6109744310379028, "logps/rejected": -2.27836275100708, "loss": 1.1169, "nll_loss": 0.9830483198165894, "rewards/accuracies": 0.75, "rewards/chosen": -0.06109745055437088, "rewards/margins": 0.166738823056221, "rewards/rejected": -0.2278362661600113, "step": 4217 }, { "epoch": 2.573127954857404, "grad_norm": 1.483891487121582, "learning_rate": 3.896631965707287e-06, "log_odds_chosen": 3.0328030586242676, "log_odds_ratio": -0.40154150128364563, "logits/chosen": -0.7325986623764038, "logits/rejected": -0.9182659387588501, "logps/chosen": -0.7173712253570557, "logps/rejected": -3.2090580463409424, "loss": 1.0068, "nll_loss": 0.756966769695282, "rewards/accuracies": 0.625, "rewards/chosen": -0.0717371255159378, "rewards/margins": 0.24916867911815643, "rewards/rejected": -0.32090580463409424, "step": 4218 }, { "epoch": 2.573737989934421, "grad_norm": 1.2082206010818481, "learning_rate": 3.895652173913044e-06, "log_odds_chosen": 1.6846102476119995, "log_odds_ratio": -0.34163588285446167, "logits/chosen": -0.8969293832778931, "logits/rejected": -0.958670973777771, "logps/chosen": -0.8702859878540039, "logps/rejected": -2.1018872261047363, "loss": 0.9689, "nll_loss": 0.990764319896698, "rewards/accuracies": 0.875, "rewards/chosen": -0.08702860027551651, "rewards/margins": 0.12316011637449265, "rewards/rejected": -0.21018873155117035, "step": 4219 }, { "epoch": 2.574348025011438, "grad_norm": 1.573950171470642, "learning_rate": 3.894672382118799e-06, "log_odds_chosen": 1.1449189186096191, "log_odds_ratio": -0.5043259859085083, "logits/chosen": -0.8491343259811401, "logits/rejected": -0.6830222606658936, "logps/chosen": -0.715827465057373, "logps/rejected": -1.5907307863235474, "loss": 1.0737, "nll_loss": 0.9046357870101929, "rewards/accuracies": 0.625, "rewards/chosen": -0.07158274948596954, "rewards/margins": 0.08749033510684967, "rewards/rejected": -0.1590730845928192, "step": 4220 }, { "epoch": 2.574958060088455, "grad_norm": 0.9345664381980896, "learning_rate": 3.893692590324556e-06, "log_odds_chosen": 1.3008759021759033, "log_odds_ratio": -0.41818690299987793, "logits/chosen": -0.7229029536247253, "logits/rejected": -0.7779220938682556, "logps/chosen": -0.7727938890457153, "logps/rejected": -1.705386757850647, "loss": 0.9605, "nll_loss": 0.8012012839317322, "rewards/accuracies": 0.75, "rewards/chosen": -0.07727938890457153, "rewards/margins": 0.0932593047618866, "rewards/rejected": -0.17053867876529694, "step": 4221 }, { "epoch": 2.575568095165472, "grad_norm": 1.3810536861419678, "learning_rate": 3.892712798530312e-06, "log_odds_chosen": 1.3816301822662354, "log_odds_ratio": -0.4351022243499756, "logits/chosen": -0.9606115221977234, "logits/rejected": -0.9928082227706909, "logps/chosen": -0.7446951866149902, "logps/rejected": -1.7132912874221802, "loss": 0.9744, "nll_loss": 1.0944589376449585, "rewards/accuracies": 0.75, "rewards/chosen": -0.07446952164173126, "rewards/margins": 0.09685960412025452, "rewards/rejected": -0.17132914066314697, "step": 4222 }, { "epoch": 2.576178130242489, "grad_norm": 1.4460933208465576, "learning_rate": 3.891733006736068e-06, "log_odds_chosen": 2.1105031967163086, "log_odds_ratio": -0.39413267374038696, "logits/chosen": -0.813779354095459, "logits/rejected": -1.0927485227584839, "logps/chosen": -0.786779522895813, "logps/rejected": -2.2059485912323, "loss": 1.1893, "nll_loss": 1.1967506408691406, "rewards/accuracies": 0.625, "rewards/chosen": -0.0786779522895813, "rewards/margins": 0.1419169008731842, "rewards/rejected": -0.2205948680639267, "step": 4223 }, { "epoch": 2.5767881653195057, "grad_norm": 5.66734504699707, "learning_rate": 3.890753214941824e-06, "log_odds_chosen": 1.5934665203094482, "log_odds_ratio": -0.5080231428146362, "logits/chosen": -0.9422160983085632, "logits/rejected": -0.9418798089027405, "logps/chosen": -0.9937832355499268, "logps/rejected": -2.2050721645355225, "loss": 1.1146, "nll_loss": 1.0343642234802246, "rewards/accuracies": 0.75, "rewards/chosen": -0.09937833249568939, "rewards/margins": 0.12112888693809509, "rewards/rejected": -0.2205072045326233, "step": 4224 }, { "epoch": 2.577398200396523, "grad_norm": 1.0664716958999634, "learning_rate": 3.889773423147581e-06, "log_odds_chosen": 0.8080174326896667, "log_odds_ratio": -0.45174479484558105, "logits/chosen": -0.8796035051345825, "logits/rejected": -1.00448739528656, "logps/chosen": -0.7957153916358948, "logps/rejected": -1.2658841609954834, "loss": 0.8614, "nll_loss": 0.8519796133041382, "rewards/accuracies": 0.75, "rewards/chosen": -0.07957153767347336, "rewards/margins": 0.047016892582178116, "rewards/rejected": -0.12658843398094177, "step": 4225 }, { "epoch": 2.57800823547354, "grad_norm": 2.047703266143799, "learning_rate": 3.8887936313533375e-06, "log_odds_chosen": 1.7968056201934814, "log_odds_ratio": -0.40753957629203796, "logits/chosen": -0.9289348125457764, "logits/rejected": -0.8957046270370483, "logps/chosen": -0.917960524559021, "logps/rejected": -2.1492114067077637, "loss": 1.1572, "nll_loss": 0.9861158132553101, "rewards/accuracies": 0.625, "rewards/chosen": -0.09179605543613434, "rewards/margins": 0.1231250911951065, "rewards/rejected": -0.21492114663124084, "step": 4226 }, { "epoch": 2.5786182705505567, "grad_norm": 1.2219730615615845, "learning_rate": 3.887813839559094e-06, "log_odds_chosen": 1.4461511373519897, "log_odds_ratio": -0.3680523633956909, "logits/chosen": -0.9155468344688416, "logits/rejected": -0.9555140733718872, "logps/chosen": -0.9045972228050232, "logps/rejected": -2.0264179706573486, "loss": 1.0727, "nll_loss": 1.2095719575881958, "rewards/accuracies": 0.75, "rewards/chosen": -0.09045971930027008, "rewards/margins": 0.11218208074569702, "rewards/rejected": -0.2026418149471283, "step": 4227 }, { "epoch": 2.5792283056275735, "grad_norm": 1.3525866270065308, "learning_rate": 3.88683404776485e-06, "log_odds_chosen": 2.7409281730651855, "log_odds_ratio": -0.292404443025589, "logits/chosen": -1.014453411102295, "logits/rejected": -1.0137194395065308, "logps/chosen": -0.6871867775917053, "logps/rejected": -2.782658100128174, "loss": 1.1512, "nll_loss": 1.0777254104614258, "rewards/accuracies": 0.875, "rewards/chosen": -0.06871867924928665, "rewards/margins": 0.20954710245132446, "rewards/rejected": -0.2782657742500305, "step": 4228 }, { "epoch": 2.5798383407045904, "grad_norm": 1.484947919845581, "learning_rate": 3.885854255970606e-06, "log_odds_chosen": 0.9133118391036987, "log_odds_ratio": -0.5787093043327332, "logits/chosen": -0.8734330534934998, "logits/rejected": -0.8774327039718628, "logps/chosen": -0.9949415922164917, "logps/rejected": -1.730285406112671, "loss": 1.1824, "nll_loss": 1.146622896194458, "rewards/accuracies": 0.5, "rewards/chosen": -0.09949415922164917, "rewards/margins": 0.07353438436985016, "rewards/rejected": -0.17302854359149933, "step": 4229 }, { "epoch": 2.5804483757816072, "grad_norm": 2.5829918384552, "learning_rate": 3.884874464176363e-06, "log_odds_chosen": 2.365797281265259, "log_odds_ratio": -0.3694819509983063, "logits/chosen": -0.8771257400512695, "logits/rejected": -0.8494343161582947, "logps/chosen": -0.6432590484619141, "logps/rejected": -2.36385440826416, "loss": 1.056, "nll_loss": 1.0091552734375, "rewards/accuracies": 0.75, "rewards/chosen": -0.06432590633630753, "rewards/margins": 0.1720595359802246, "rewards/rejected": -0.23638543486595154, "step": 4230 }, { "epoch": 2.5810584108586245, "grad_norm": 1.2042254209518433, "learning_rate": 3.883894672382118e-06, "log_odds_chosen": 1.0030570030212402, "log_odds_ratio": -0.49648019671440125, "logits/chosen": -0.9084956645965576, "logits/rejected": -1.052351951599121, "logps/chosen": -0.7501696944236755, "logps/rejected": -1.3568894863128662, "loss": 1.0436, "nll_loss": 1.0498145818710327, "rewards/accuracies": 0.5, "rewards/chosen": -0.07501697540283203, "rewards/margins": 0.060671962797641754, "rewards/rejected": -0.13568894565105438, "step": 4231 }, { "epoch": 2.5816684459356414, "grad_norm": 1.0293999910354614, "learning_rate": 3.882914880587875e-06, "log_odds_chosen": 0.3898460566997528, "log_odds_ratio": -0.6426286697387695, "logits/chosen": -0.8154678344726562, "logits/rejected": -0.7947645783424377, "logps/chosen": -0.8039875030517578, "logps/rejected": -1.007995843887329, "loss": 1.0842, "nll_loss": 0.9215748906135559, "rewards/accuracies": 0.375, "rewards/chosen": -0.08039875328540802, "rewards/margins": 0.020400840789079666, "rewards/rejected": -0.10079959034919739, "step": 4232 }, { "epoch": 2.5822784810126582, "grad_norm": 1.5073882341384888, "learning_rate": 3.881935088793631e-06, "log_odds_chosen": 2.358356475830078, "log_odds_ratio": -0.4528135359287262, "logits/chosen": -0.7706970572471619, "logits/rejected": -0.936108410358429, "logps/chosen": -0.6520084738731384, "logps/rejected": -2.556206226348877, "loss": 1.2068, "nll_loss": 1.1859208345413208, "rewards/accuracies": 0.75, "rewards/chosen": -0.06520084291696548, "rewards/margins": 0.1904197782278061, "rewards/rejected": -0.2556206285953522, "step": 4233 }, { "epoch": 2.582888516089675, "grad_norm": 2.0786614418029785, "learning_rate": 3.880955296999387e-06, "log_odds_chosen": 0.7781984806060791, "log_odds_ratio": -0.6004945635795593, "logits/chosen": -1.0964620113372803, "logits/rejected": -1.0790023803710938, "logps/chosen": -1.0380889177322388, "logps/rejected": -1.7142155170440674, "loss": 1.2203, "nll_loss": 1.3435885906219482, "rewards/accuracies": 0.5, "rewards/chosen": -0.10380889475345612, "rewards/margins": 0.0676126703619957, "rewards/rejected": -0.17142155766487122, "step": 4234 }, { "epoch": 2.5834985511666924, "grad_norm": 4.2371015548706055, "learning_rate": 3.8799755052051434e-06, "log_odds_chosen": 1.0016942024230957, "log_odds_ratio": -0.5196270942687988, "logits/chosen": -0.9837183356285095, "logits/rejected": -1.0271822214126587, "logps/chosen": -0.6892567276954651, "logps/rejected": -1.4192613363265991, "loss": 0.9864, "nll_loss": 0.9997676014900208, "rewards/accuracies": 0.625, "rewards/chosen": -0.06892567127943039, "rewards/margins": 0.073000468313694, "rewards/rejected": -0.1419261395931244, "step": 4235 }, { "epoch": 2.584108586243709, "grad_norm": 1.1952685117721558, "learning_rate": 3.8789957134109e-06, "log_odds_chosen": 0.6928735971450806, "log_odds_ratio": -0.6140403747558594, "logits/chosen": -1.1736862659454346, "logits/rejected": -1.0262938737869263, "logps/chosen": -1.0534930229187012, "logps/rejected": -1.6080172061920166, "loss": 1.2168, "nll_loss": 1.276003122329712, "rewards/accuracies": 0.75, "rewards/chosen": -0.10534931719303131, "rewards/margins": 0.05545240640640259, "rewards/rejected": -0.1608017235994339, "step": 4236 }, { "epoch": 2.584718621320726, "grad_norm": 0.9575546383857727, "learning_rate": 3.8780159216166565e-06, "log_odds_chosen": 2.1169466972351074, "log_odds_ratio": -0.3386126160621643, "logits/chosen": -0.8944172263145447, "logits/rejected": -1.1035116910934448, "logps/chosen": -0.649846076965332, "logps/rejected": -2.2013449668884277, "loss": 1.0229, "nll_loss": 0.7590599060058594, "rewards/accuracies": 0.75, "rewards/chosen": -0.06498460471630096, "rewards/margins": 0.1551498919725418, "rewards/rejected": -0.22013451159000397, "step": 4237 }, { "epoch": 2.585328656397743, "grad_norm": 1.7304216623306274, "learning_rate": 3.877036129822413e-06, "log_odds_chosen": 0.9748691320419312, "log_odds_ratio": -0.5266681909561157, "logits/chosen": -1.1669654846191406, "logits/rejected": -1.0441848039627075, "logps/chosen": -0.9306682348251343, "logps/rejected": -1.7116777896881104, "loss": 1.0647, "nll_loss": 1.034813642501831, "rewards/accuracies": 0.625, "rewards/chosen": -0.09306682646274567, "rewards/margins": 0.07810097187757492, "rewards/rejected": -0.17116779088974, "step": 4238 }, { "epoch": 2.5859386914747597, "grad_norm": 2.290419578552246, "learning_rate": 3.876056338028169e-06, "log_odds_chosen": 1.8279321193695068, "log_odds_ratio": -0.3957347273826599, "logits/chosen": -0.8914743661880493, "logits/rejected": -0.9522004127502441, "logps/chosen": -0.8108446598052979, "logps/rejected": -2.264981985092163, "loss": 1.0053, "nll_loss": 1.0153851509094238, "rewards/accuracies": 0.625, "rewards/chosen": -0.08108446002006531, "rewards/margins": 0.14541374146938324, "rewards/rejected": -0.22649820148944855, "step": 4239 }, { "epoch": 2.5865487265517766, "grad_norm": 2.273953676223755, "learning_rate": 3.875076546233925e-06, "log_odds_chosen": 1.783050537109375, "log_odds_ratio": -0.4590241014957428, "logits/chosen": -0.833108127117157, "logits/rejected": -0.9644286632537842, "logps/chosen": -0.6738054156303406, "logps/rejected": -1.8926951885223389, "loss": 1.0023, "nll_loss": 0.970906138420105, "rewards/accuracies": 0.625, "rewards/chosen": -0.06738054752349854, "rewards/margins": 0.12188898026943207, "rewards/rejected": -0.1892695277929306, "step": 4240 }, { "epoch": 2.5871587616287934, "grad_norm": 5.944943428039551, "learning_rate": 3.874096754439681e-06, "log_odds_chosen": 2.1067605018615723, "log_odds_ratio": -0.700459897518158, "logits/chosen": -0.9781517386436462, "logits/rejected": -0.9930105805397034, "logps/chosen": -0.9324536919593811, "logps/rejected": -2.740626811981201, "loss": 1.0656, "nll_loss": 0.9982568025588989, "rewards/accuracies": 0.5, "rewards/chosen": -0.09324537217617035, "rewards/margins": 0.18081732094287872, "rewards/rejected": -0.2740626931190491, "step": 4241 }, { "epoch": 2.5877687967058107, "grad_norm": 2.303795576095581, "learning_rate": 3.873116962645438e-06, "log_odds_chosen": 2.7440624237060547, "log_odds_ratio": -0.3264999985694885, "logits/chosen": -0.908795952796936, "logits/rejected": -1.043658971786499, "logps/chosen": -0.6883617639541626, "logps/rejected": -2.856809377670288, "loss": 1.1849, "nll_loss": 1.3046331405639648, "rewards/accuracies": 0.875, "rewards/chosen": -0.06883618235588074, "rewards/margins": 0.21684475243091583, "rewards/rejected": -0.2856809198856354, "step": 4242 }, { "epoch": 2.5883788317828276, "grad_norm": 1.2469550371170044, "learning_rate": 3.872137170851194e-06, "log_odds_chosen": 2.4546573162078857, "log_odds_ratio": -0.319068044424057, "logits/chosen": -0.7615734338760376, "logits/rejected": -0.8557273149490356, "logps/chosen": -0.6588907241821289, "logps/rejected": -2.6133217811584473, "loss": 1.0668, "nll_loss": 0.9263849258422852, "rewards/accuracies": 0.75, "rewards/chosen": -0.06588907539844513, "rewards/margins": 0.19544309377670288, "rewards/rejected": -0.2613321542739868, "step": 4243 }, { "epoch": 2.5889888668598444, "grad_norm": 2.2391581535339355, "learning_rate": 3.87115737905695e-06, "log_odds_chosen": 1.2568930387496948, "log_odds_ratio": -0.5215985774993896, "logits/chosen": -0.753142237663269, "logits/rejected": -0.8336951732635498, "logps/chosen": -0.7546430826187134, "logps/rejected": -1.7401282787322998, "loss": 1.1096, "nll_loss": 0.9038730263710022, "rewards/accuracies": 0.625, "rewards/chosen": -0.07546430826187134, "rewards/margins": 0.098548524081707, "rewards/rejected": -0.17401283979415894, "step": 4244 }, { "epoch": 2.5895989019368613, "grad_norm": 3.036450147628784, "learning_rate": 3.870177587262706e-06, "log_odds_chosen": 0.8400329947471619, "log_odds_ratio": -0.6469112634658813, "logits/chosen": -1.0813093185424805, "logits/rejected": -1.0768760442733765, "logps/chosen": -0.868790328502655, "logps/rejected": -1.5883193016052246, "loss": 1.0157, "nll_loss": 0.9396427273750305, "rewards/accuracies": 0.5, "rewards/chosen": -0.08687903732061386, "rewards/margins": 0.07195290178060532, "rewards/rejected": -0.15883192420005798, "step": 4245 }, { "epoch": 2.5902089370138786, "grad_norm": 3.171949625015259, "learning_rate": 3.8691977954684625e-06, "log_odds_chosen": 0.23873105645179749, "log_odds_ratio": -0.6779953241348267, "logits/chosen": -1.1177773475646973, "logits/rejected": -1.0368762016296387, "logps/chosen": -0.9473074078559875, "logps/rejected": -1.1349835395812988, "loss": 1.0782, "nll_loss": 1.0136940479278564, "rewards/accuracies": 0.625, "rewards/chosen": -0.09473074972629547, "rewards/margins": 0.018767613917589188, "rewards/rejected": -0.11349835991859436, "step": 4246 }, { "epoch": 2.5908189720908954, "grad_norm": 3.358757734298706, "learning_rate": 3.8682180036742194e-06, "log_odds_chosen": 1.3058212995529175, "log_odds_ratio": -0.49281835556030273, "logits/chosen": -0.9920964241027832, "logits/rejected": -1.0580570697784424, "logps/chosen": -1.0847541093826294, "logps/rejected": -2.1123032569885254, "loss": 1.2607, "nll_loss": 1.4158538579940796, "rewards/accuracies": 0.625, "rewards/chosen": -0.10847540199756622, "rewards/margins": 0.10275492072105408, "rewards/rejected": -0.2112303227186203, "step": 4247 }, { "epoch": 2.5914290071679122, "grad_norm": 5.152890682220459, "learning_rate": 3.867238211879975e-06, "log_odds_chosen": 0.6850304007530212, "log_odds_ratio": -0.6294436454772949, "logits/chosen": -0.7901198863983154, "logits/rejected": -0.8012063503265381, "logps/chosen": -1.0198676586151123, "logps/rejected": -1.5546021461486816, "loss": 1.1548, "nll_loss": 1.0692987442016602, "rewards/accuracies": 0.25, "rewards/chosen": -0.10198676586151123, "rewards/margins": 0.053473442792892456, "rewards/rejected": -0.1554602086544037, "step": 4248 }, { "epoch": 2.592039042244929, "grad_norm": 2.779430627822876, "learning_rate": 3.866258420085732e-06, "log_odds_chosen": 3.9308860301971436, "log_odds_ratio": -0.26029688119888306, "logits/chosen": -0.7890030741691589, "logits/rejected": -1.0638006925582886, "logps/chosen": -0.636652410030365, "logps/rejected": -3.848447561264038, "loss": 0.9873, "nll_loss": 0.7634432315826416, "rewards/accuracies": 0.875, "rewards/chosen": -0.0636652410030365, "rewards/margins": 0.3211795389652252, "rewards/rejected": -0.38484475016593933, "step": 4249 }, { "epoch": 2.592649077321946, "grad_norm": 5.3035759925842285, "learning_rate": 3.865278628291488e-06, "log_odds_chosen": 2.995617389678955, "log_odds_ratio": -0.30983471870422363, "logits/chosen": -0.8878329396247864, "logits/rejected": -1.0444811582565308, "logps/chosen": -0.7937213778495789, "logps/rejected": -3.241745948791504, "loss": 0.9717, "nll_loss": 0.9208175539970398, "rewards/accuracies": 0.625, "rewards/chosen": -0.07937213778495789, "rewards/margins": 0.24480244517326355, "rewards/rejected": -0.32417458295822144, "step": 4250 }, { "epoch": 2.593259112398963, "grad_norm": 2.86444354057312, "learning_rate": 3.864298836497244e-06, "log_odds_chosen": 1.2394812107086182, "log_odds_ratio": -0.5008701682090759, "logits/chosen": -0.9190319776535034, "logits/rejected": -0.889564037322998, "logps/chosen": -0.7083532214164734, "logps/rejected": -1.7662808895111084, "loss": 1.1412, "nll_loss": 0.9037683010101318, "rewards/accuracies": 0.75, "rewards/chosen": -0.07083532959222794, "rewards/margins": 0.10579276829957962, "rewards/rejected": -0.17662808299064636, "step": 4251 }, { "epoch": 2.5938691474759796, "grad_norm": 1.1267359256744385, "learning_rate": 3.863319044703e-06, "log_odds_chosen": 3.036813497543335, "log_odds_ratio": -0.4597627520561218, "logits/chosen": -0.8231545686721802, "logits/rejected": -1.0442830324172974, "logps/chosen": -0.7551353573799133, "logps/rejected": -3.3272838592529297, "loss": 1.1302, "nll_loss": 1.0187652111053467, "rewards/accuracies": 0.5, "rewards/chosen": -0.07551353424787521, "rewards/margins": 0.25721484422683716, "rewards/rejected": -0.33272838592529297, "step": 4252 }, { "epoch": 2.594479182552997, "grad_norm": 1.1803367137908936, "learning_rate": 3.862339252908757e-06, "log_odds_chosen": 2.4404382705688477, "log_odds_ratio": -0.35897618532180786, "logits/chosen": -0.6050529479980469, "logits/rejected": -0.9606463313102722, "logps/chosen": -0.6265166997909546, "logps/rejected": -2.356736898422241, "loss": 1.0102, "nll_loss": 0.8087305426597595, "rewards/accuracies": 0.875, "rewards/chosen": -0.06265167146921158, "rewards/margins": 0.17302201688289642, "rewards/rejected": -0.2356736809015274, "step": 4253 }, { "epoch": 2.5950892176300138, "grad_norm": 1.5080984830856323, "learning_rate": 3.861359461114513e-06, "log_odds_chosen": 2.0526180267333984, "log_odds_ratio": -0.3458113670349121, "logits/chosen": -1.0316559076309204, "logits/rejected": -0.9701136946678162, "logps/chosen": -0.6494516134262085, "logps/rejected": -2.1437954902648926, "loss": 1.1947, "nll_loss": 1.1884493827819824, "rewards/accuracies": 0.75, "rewards/chosen": -0.06494516134262085, "rewards/margins": 0.1494343876838684, "rewards/rejected": -0.21437954902648926, "step": 4254 }, { "epoch": 2.5956992527070306, "grad_norm": 1.4770194292068481, "learning_rate": 3.860379669320269e-06, "log_odds_chosen": 2.0844874382019043, "log_odds_ratio": -0.31258055567741394, "logits/chosen": -0.7113442420959473, "logits/rejected": -0.8483548760414124, "logps/chosen": -0.6230851411819458, "logps/rejected": -2.0267674922943115, "loss": 0.9815, "nll_loss": 0.8850222826004028, "rewards/accuracies": 0.875, "rewards/chosen": -0.06230851635336876, "rewards/margins": 0.14036822319030762, "rewards/rejected": -0.20267674326896667, "step": 4255 }, { "epoch": 2.5963092877840475, "grad_norm": 1.7534003257751465, "learning_rate": 3.859399877526025e-06, "log_odds_chosen": 2.088817596435547, "log_odds_ratio": -0.4839492440223694, "logits/chosen": -0.851058304309845, "logits/rejected": -1.0379289388656616, "logps/chosen": -0.9742376804351807, "logps/rejected": -2.838157892227173, "loss": 1.1821, "nll_loss": 1.2700817584991455, "rewards/accuracies": 0.625, "rewards/chosen": -0.09742376208305359, "rewards/margins": 0.18639200925827026, "rewards/rejected": -0.28381580114364624, "step": 4256 }, { "epoch": 2.5969193228610647, "grad_norm": 1.5115976333618164, "learning_rate": 3.8584200857317815e-06, "log_odds_chosen": 2.4801995754241943, "log_odds_ratio": -0.2631043791770935, "logits/chosen": -0.4850093126296997, "logits/rejected": -0.8252257108688354, "logps/chosen": -0.6343066096305847, "logps/rejected": -2.4696712493896484, "loss": 0.8654, "nll_loss": 0.7852708101272583, "rewards/accuracies": 0.875, "rewards/chosen": -0.06343066692352295, "rewards/margins": 0.18353646993637085, "rewards/rejected": -0.2469671219587326, "step": 4257 }, { "epoch": 2.5975293579380816, "grad_norm": 1.121177315711975, "learning_rate": 3.8574402939375385e-06, "log_odds_chosen": 1.5574215650558472, "log_odds_ratio": -0.46601635217666626, "logits/chosen": -0.7287129759788513, "logits/rejected": -0.8634572625160217, "logps/chosen": -0.829498291015625, "logps/rejected": -2.047025442123413, "loss": 0.8675, "nll_loss": 0.8458718657493591, "rewards/accuracies": 0.75, "rewards/chosen": -0.08294983208179474, "rewards/margins": 0.12175270915031433, "rewards/rejected": -0.20470255613327026, "step": 4258 }, { "epoch": 2.5981393930150984, "grad_norm": 6.598846435546875, "learning_rate": 3.856460502143295e-06, "log_odds_chosen": 0.4529258906841278, "log_odds_ratio": -0.5533697009086609, "logits/chosen": -0.48820096254348755, "logits/rejected": -0.7530921697616577, "logps/chosen": -0.7218910455703735, "logps/rejected": -0.9982673525810242, "loss": 1.0716, "nll_loss": 0.8574215769767761, "rewards/accuracies": 0.75, "rewards/chosen": -0.07218910753726959, "rewards/margins": 0.027637630701065063, "rewards/rejected": -0.09982673823833466, "step": 4259 }, { "epoch": 2.5987494280921153, "grad_norm": 3.49289608001709, "learning_rate": 3.855480710349051e-06, "log_odds_chosen": 2.617832660675049, "log_odds_ratio": -0.206495001912117, "logits/chosen": -0.8493295907974243, "logits/rejected": -0.9818099737167358, "logps/chosen": -0.7580441832542419, "logps/rejected": -2.781111478805542, "loss": 1.1818, "nll_loss": 1.0195878744125366, "rewards/accuracies": 1.0, "rewards/chosen": -0.07580441981554031, "rewards/margins": 0.20230674743652344, "rewards/rejected": -0.27811115980148315, "step": 4260 }, { "epoch": 2.599359463169132, "grad_norm": 2.0756068229675293, "learning_rate": 3.854500918554807e-06, "log_odds_chosen": 1.9006495475769043, "log_odds_ratio": -0.40348345041275024, "logits/chosen": -0.6346002817153931, "logits/rejected": -0.7024588584899902, "logps/chosen": -1.1561859846115112, "logps/rejected": -2.69575834274292, "loss": 1.0297, "nll_loss": 1.0233807563781738, "rewards/accuracies": 0.75, "rewards/chosen": -0.11561860144138336, "rewards/margins": 0.15395724773406982, "rewards/rejected": -0.269575834274292, "step": 4261 }, { "epoch": 2.599969498246149, "grad_norm": 3.6971943378448486, "learning_rate": 3.853521126760564e-06, "log_odds_chosen": 2.112997055053711, "log_odds_ratio": -0.4035312235355377, "logits/chosen": -0.9638786315917969, "logits/rejected": -1.0302029848098755, "logps/chosen": -0.8395925164222717, "logps/rejected": -2.596853733062744, "loss": 1.0325, "nll_loss": 1.046790361404419, "rewards/accuracies": 0.75, "rewards/chosen": -0.08395925909280777, "rewards/margins": 0.17572611570358276, "rewards/rejected": -0.25968536734580994, "step": 4262 }, { "epoch": 2.600579533323166, "grad_norm": 1.3604868650436401, "learning_rate": 3.852541334966319e-06, "log_odds_chosen": 2.370387554168701, "log_odds_ratio": -0.39657655358314514, "logits/chosen": -0.8501912355422974, "logits/rejected": -1.0301603078842163, "logps/chosen": -0.6905839443206787, "logps/rejected": -2.440767288208008, "loss": 1.0746, "nll_loss": 0.9040796756744385, "rewards/accuracies": 0.75, "rewards/chosen": -0.06905840337276459, "rewards/margins": 0.1750183403491974, "rewards/rejected": -0.24407672882080078, "step": 4263 }, { "epoch": 2.601189568400183, "grad_norm": 3.29518723487854, "learning_rate": 3.851561543172076e-06, "log_odds_chosen": 2.661831855773926, "log_odds_ratio": -0.46173030138015747, "logits/chosen": -0.8728258013725281, "logits/rejected": -0.9159009456634521, "logps/chosen": -0.7436725497245789, "logps/rejected": -3.025975465774536, "loss": 1.083, "nll_loss": 0.9148054122924805, "rewards/accuracies": 0.625, "rewards/chosen": -0.07436726242303848, "rewards/margins": 0.2282302975654602, "rewards/rejected": -0.3025975525379181, "step": 4264 }, { "epoch": 2.6017996034772, "grad_norm": 1.5955638885498047, "learning_rate": 3.850581751377832e-06, "log_odds_chosen": 1.2607808113098145, "log_odds_ratio": -0.47785788774490356, "logits/chosen": -0.6581249833106995, "logits/rejected": -0.7459889650344849, "logps/chosen": -0.7456508874893188, "logps/rejected": -1.711310863494873, "loss": 1.0534, "nll_loss": 1.0165314674377441, "rewards/accuracies": 0.625, "rewards/chosen": -0.07456508278846741, "rewards/margins": 0.09656598418951035, "rewards/rejected": -0.17113108932971954, "step": 4265 }, { "epoch": 2.602409638554217, "grad_norm": 1.484634518623352, "learning_rate": 3.849601959583588e-06, "log_odds_chosen": 1.1876134872436523, "log_odds_ratio": -0.41867560148239136, "logits/chosen": -0.7392091155052185, "logits/rejected": -0.7471413612365723, "logps/chosen": -0.6340216398239136, "logps/rejected": -1.3373342752456665, "loss": 0.9537, "nll_loss": 0.8972835540771484, "rewards/accuracies": 0.875, "rewards/chosen": -0.06340216845273972, "rewards/margins": 0.07033126801252365, "rewards/rejected": -0.13373343646526337, "step": 4266 }, { "epoch": 2.6030196736312337, "grad_norm": 1.090478539466858, "learning_rate": 3.8486221677893444e-06, "log_odds_chosen": -0.08156244456768036, "log_odds_ratio": -0.8461716175079346, "logits/chosen": -0.9168824553489685, "logits/rejected": -0.8610710501670837, "logps/chosen": -1.1049354076385498, "logps/rejected": -1.0845853090286255, "loss": 1.1485, "nll_loss": 1.2665956020355225, "rewards/accuracies": 0.375, "rewards/chosen": -0.11049355566501617, "rewards/margins": -0.002035028301179409, "rewards/rejected": -0.10845853388309479, "step": 4267 }, { "epoch": 2.603629708708251, "grad_norm": 1.5810205936431885, "learning_rate": 3.8476423759951005e-06, "log_odds_chosen": 0.8730342388153076, "log_odds_ratio": -0.5787173509597778, "logits/chosen": -0.8980653882026672, "logits/rejected": -0.9580508470535278, "logps/chosen": -0.7321243286132812, "logps/rejected": -1.3325234651565552, "loss": 0.9204, "nll_loss": 0.8319602012634277, "rewards/accuracies": 0.625, "rewards/chosen": -0.07321243733167648, "rewards/margins": 0.06003992259502411, "rewards/rejected": -0.13325235247612, "step": 4268 }, { "epoch": 2.604239743785268, "grad_norm": 1.7234370708465576, "learning_rate": 3.8466625842008575e-06, "log_odds_chosen": 0.7990055084228516, "log_odds_ratio": -0.5074607133865356, "logits/chosen": -0.9479701519012451, "logits/rejected": -0.9073817133903503, "logps/chosen": -0.6557734608650208, "logps/rejected": -1.2909291982650757, "loss": 1.0277, "nll_loss": 0.8753173351287842, "rewards/accuracies": 0.75, "rewards/chosen": -0.06557735055685043, "rewards/margins": 0.0635155662894249, "rewards/rejected": -0.12909291684627533, "step": 4269 }, { "epoch": 2.6048497788622846, "grad_norm": 1.3879239559173584, "learning_rate": 3.845682792406614e-06, "log_odds_chosen": 2.6422224044799805, "log_odds_ratio": -0.18045540153980255, "logits/chosen": -0.849098801612854, "logits/rejected": -0.9363636374473572, "logps/chosen": -0.5482141971588135, "logps/rejected": -2.3335914611816406, "loss": 0.9451, "nll_loss": 0.827081561088562, "rewards/accuracies": 1.0, "rewards/chosen": -0.05482141673564911, "rewards/margins": 0.17853772640228271, "rewards/rejected": -0.23335914313793182, "step": 4270 }, { "epoch": 2.6054598139393015, "grad_norm": 2.090266227722168, "learning_rate": 3.84470300061237e-06, "log_odds_chosen": 1.2411799430847168, "log_odds_ratio": -0.43692874908447266, "logits/chosen": -0.886072039604187, "logits/rejected": -1.0681953430175781, "logps/chosen": -0.8481515645980835, "logps/rejected": -1.7637213468551636, "loss": 1.0308, "nll_loss": 1.033575415611267, "rewards/accuracies": 0.625, "rewards/chosen": -0.08481515198945999, "rewards/margins": 0.09155698120594025, "rewards/rejected": -0.17637214064598083, "step": 4271 }, { "epoch": 2.6060698490163183, "grad_norm": 9.57577133178711, "learning_rate": 3.843723208818126e-06, "log_odds_chosen": 2.2233238220214844, "log_odds_ratio": -0.2795405089855194, "logits/chosen": -0.8111647963523865, "logits/rejected": -0.8496673703193665, "logps/chosen": -0.8034272789955139, "logps/rejected": -2.347712516784668, "loss": 1.049, "nll_loss": 1.080721378326416, "rewards/accuracies": 0.875, "rewards/chosen": -0.08034272491931915, "rewards/margins": 0.15442852675914764, "rewards/rejected": -0.2347712516784668, "step": 4272 }, { "epoch": 2.606679884093335, "grad_norm": 1.290582537651062, "learning_rate": 3.842743417023882e-06, "log_odds_chosen": 0.48584434390068054, "log_odds_ratio": -0.6456649899482727, "logits/chosen": -0.8462085127830505, "logits/rejected": -0.8701174259185791, "logps/chosen": -0.995675802230835, "logps/rejected": -1.3537570238113403, "loss": 1.0284, "nll_loss": 1.1268393993377686, "rewards/accuracies": 0.5, "rewards/chosen": -0.09956758469343185, "rewards/margins": 0.03580811247229576, "rewards/rejected": -0.13537569344043732, "step": 4273 }, { "epoch": 2.607289919170352, "grad_norm": 1.0513043403625488, "learning_rate": 3.841763625229638e-06, "log_odds_chosen": 2.2810847759246826, "log_odds_ratio": -0.33963483572006226, "logits/chosen": -0.8741494417190552, "logits/rejected": -1.0810617208480835, "logps/chosen": -0.6982393860816956, "logps/rejected": -2.345808982849121, "loss": 1.0471, "nll_loss": 1.0006846189498901, "rewards/accuracies": 0.75, "rewards/chosen": -0.06982394307851791, "rewards/margins": 0.16475698351860046, "rewards/rejected": -0.23458091914653778, "step": 4274 }, { "epoch": 2.6078999542473693, "grad_norm": 1.4805519580841064, "learning_rate": 3.840783833435395e-06, "log_odds_chosen": 0.43570676445961, "log_odds_ratio": -0.633265495300293, "logits/chosen": -0.8883519172668457, "logits/rejected": -0.9608461260795593, "logps/chosen": -0.7370352745056152, "logps/rejected": -1.0575767755508423, "loss": 1.1837, "nll_loss": 0.9110208749771118, "rewards/accuracies": 0.5, "rewards/chosen": -0.07370351999998093, "rewards/margins": 0.03205415979027748, "rewards/rejected": -0.1057576835155487, "step": 4275 }, { "epoch": 2.608509989324386, "grad_norm": 2.0585124492645264, "learning_rate": 3.839804041641151e-06, "log_odds_chosen": 1.555392861366272, "log_odds_ratio": -0.3952018618583679, "logits/chosen": -0.7104570865631104, "logits/rejected": -0.7386967539787292, "logps/chosen": -0.6579642295837402, "logps/rejected": -1.7210191488265991, "loss": 0.95, "nll_loss": 0.810612678527832, "rewards/accuracies": 0.75, "rewards/chosen": -0.06579641997814178, "rewards/margins": 0.10630550980567932, "rewards/rejected": -0.1721019446849823, "step": 4276 }, { "epoch": 2.609120024401403, "grad_norm": 1.2683560848236084, "learning_rate": 3.838824249846907e-06, "log_odds_chosen": 0.7304707169532776, "log_odds_ratio": -0.48063793778419495, "logits/chosen": -0.9880160689353943, "logits/rejected": -0.9434150457382202, "logps/chosen": -1.0459355115890503, "logps/rejected": -1.6590149402618408, "loss": 1.167, "nll_loss": 1.271380066871643, "rewards/accuracies": 0.75, "rewards/chosen": -0.10459354519844055, "rewards/margins": 0.06130794808268547, "rewards/rejected": -0.16590148210525513, "step": 4277 }, { "epoch": 2.60973005947842, "grad_norm": 1.6092942953109741, "learning_rate": 3.8378444580526635e-06, "log_odds_chosen": 1.0106871128082275, "log_odds_ratio": -0.45071181654930115, "logits/chosen": -1.0267071723937988, "logits/rejected": -0.979241132736206, "logps/chosen": -0.9179009199142456, "logps/rejected": -1.5843645334243774, "loss": 1.2043, "nll_loss": 1.0606762170791626, "rewards/accuracies": 0.75, "rewards/chosen": -0.0917900875210762, "rewards/margins": 0.06664636731147766, "rewards/rejected": -0.15843644738197327, "step": 4278 }, { "epoch": 2.610340094555437, "grad_norm": 1.8555220365524292, "learning_rate": 3.8368646662584204e-06, "log_odds_chosen": 1.352363109588623, "log_odds_ratio": -0.6810935735702515, "logits/chosen": -0.9288434982299805, "logits/rejected": -0.9806183576583862, "logps/chosen": -1.0374255180358887, "logps/rejected": -2.1680939197540283, "loss": 1.1969, "nll_loss": 1.3906134366989136, "rewards/accuracies": 0.375, "rewards/chosen": -0.1037425622344017, "rewards/margins": 0.11306684464216232, "rewards/rejected": -0.21680939197540283, "step": 4279 }, { "epoch": 2.610950129632454, "grad_norm": 1.4788315296173096, "learning_rate": 3.835884874464176e-06, "log_odds_chosen": 0.9769262075424194, "log_odds_ratio": -0.6482688188552856, "logits/chosen": -0.9370673894882202, "logits/rejected": -0.8726034164428711, "logps/chosen": -0.7934006452560425, "logps/rejected": -1.6800869703292847, "loss": 0.9072, "nll_loss": 0.9516374468803406, "rewards/accuracies": 0.375, "rewards/chosen": -0.07934007048606873, "rewards/margins": 0.08866862952709198, "rewards/rejected": -0.1680087000131607, "step": 4280 }, { "epoch": 2.611560164709471, "grad_norm": 0.9680260419845581, "learning_rate": 3.834905082669933e-06, "log_odds_chosen": 0.8972123861312866, "log_odds_ratio": -0.7579661011695862, "logits/chosen": -1.1402757167816162, "logits/rejected": -1.1035877466201782, "logps/chosen": -1.1646510362625122, "logps/rejected": -1.891223669052124, "loss": 1.1035, "nll_loss": 1.1659654378890991, "rewards/accuracies": 0.5, "rewards/chosen": -0.11646510660648346, "rewards/margins": 0.0726572722196579, "rewards/rejected": -0.18912237882614136, "step": 4281 }, { "epoch": 2.6121701997864877, "grad_norm": 1.786242127418518, "learning_rate": 3.833925290875689e-06, "log_odds_chosen": 0.3241123855113983, "log_odds_ratio": -0.7517644762992859, "logits/chosen": -0.769120454788208, "logits/rejected": -0.7298657894134521, "logps/chosen": -0.8699147701263428, "logps/rejected": -1.1066274642944336, "loss": 1.0523, "nll_loss": 0.9386612176895142, "rewards/accuracies": 0.5, "rewards/chosen": -0.08699148148298264, "rewards/margins": 0.023671265691518784, "rewards/rejected": -0.11066274344921112, "step": 4282 }, { "epoch": 2.6127802348635045, "grad_norm": 8.240397453308105, "learning_rate": 3.832945499081445e-06, "log_odds_chosen": 2.1444427967071533, "log_odds_ratio": -0.3680310547351837, "logits/chosen": -0.8825807571411133, "logits/rejected": -1.0093348026275635, "logps/chosen": -0.6883128881454468, "logps/rejected": -2.3609414100646973, "loss": 1.1263, "nll_loss": 0.8406698703765869, "rewards/accuracies": 0.875, "rewards/chosen": -0.06883127987384796, "rewards/margins": 0.16726286709308624, "rewards/rejected": -0.2360941469669342, "step": 4283 }, { "epoch": 2.6133902699405214, "grad_norm": 1.4000197649002075, "learning_rate": 3.831965707287201e-06, "log_odds_chosen": 1.7452143430709839, "log_odds_ratio": -0.32770657539367676, "logits/chosen": -0.7783811688423157, "logits/rejected": -0.7771419882774353, "logps/chosen": -0.678298830986023, "logps/rejected": -1.7434674501419067, "loss": 1.0037, "nll_loss": 0.8787227272987366, "rewards/accuracies": 0.875, "rewards/chosen": -0.06782989203929901, "rewards/margins": 0.10651685297489166, "rewards/rejected": -0.17434674501419067, "step": 4284 }, { "epoch": 2.6140003050175387, "grad_norm": 1.6536085605621338, "learning_rate": 3.830985915492957e-06, "log_odds_chosen": 1.5649161338806152, "log_odds_ratio": -0.4279210865497589, "logits/chosen": -1.0366051197052002, "logits/rejected": -1.0399333238601685, "logps/chosen": -0.8274667263031006, "logps/rejected": -1.8896235227584839, "loss": 0.9646, "nll_loss": 0.8800519704818726, "rewards/accuracies": 0.75, "rewards/chosen": -0.08274666965007782, "rewards/margins": 0.10621567815542221, "rewards/rejected": -0.18896235525608063, "step": 4285 }, { "epoch": 2.6146103400945555, "grad_norm": 1.2207481861114502, "learning_rate": 3.830006123698714e-06, "log_odds_chosen": 1.399378776550293, "log_odds_ratio": -0.4498305916786194, "logits/chosen": -0.9577309489250183, "logits/rejected": -0.8943331241607666, "logps/chosen": -0.9156191349029541, "logps/rejected": -1.946484923362732, "loss": 0.9209, "nll_loss": 1.0016967058181763, "rewards/accuracies": 0.625, "rewards/chosen": -0.09156192094087601, "rewards/margins": 0.10308659821748734, "rewards/rejected": -0.19464850425720215, "step": 4286 }, { "epoch": 2.6152203751715724, "grad_norm": 2.0943405628204346, "learning_rate": 3.82902633190447e-06, "log_odds_chosen": 0.6981187462806702, "log_odds_ratio": -0.7055518627166748, "logits/chosen": -0.9764264822006226, "logits/rejected": -1.010900616645813, "logps/chosen": -1.1948659420013428, "logps/rejected": -1.7809603214263916, "loss": 1.167, "nll_loss": 1.37489652633667, "rewards/accuracies": 0.375, "rewards/chosen": -0.11948660016059875, "rewards/margins": 0.05860944464802742, "rewards/rejected": -0.17809604108333588, "step": 4287 }, { "epoch": 2.615830410248589, "grad_norm": 4.047237873077393, "learning_rate": 3.828046540110226e-06, "log_odds_chosen": 1.03007173538208, "log_odds_ratio": -0.6803976893424988, "logits/chosen": -0.9339951276779175, "logits/rejected": -1.0788099765777588, "logps/chosen": -0.9657323360443115, "logps/rejected": -1.6256132125854492, "loss": 1.0995, "nll_loss": 1.0539650917053223, "rewards/accuracies": 0.5, "rewards/chosen": -0.09657324105501175, "rewards/margins": 0.06598809361457825, "rewards/rejected": -0.1625613421201706, "step": 4288 }, { "epoch": 2.6164404453256065, "grad_norm": 2.8407464027404785, "learning_rate": 3.8270667483159825e-06, "log_odds_chosen": 0.5477027893066406, "log_odds_ratio": -0.6784915924072266, "logits/chosen": -1.0066337585449219, "logits/rejected": -1.0311970710754395, "logps/chosen": -0.9362190961837769, "logps/rejected": -1.4674896001815796, "loss": 1.1102, "nll_loss": 1.12869131565094, "rewards/accuracies": 0.5, "rewards/chosen": -0.09362191706895828, "rewards/margins": 0.053127046674489975, "rewards/rejected": -0.14674896001815796, "step": 4289 }, { "epoch": 2.6170504804026233, "grad_norm": 1.5603076219558716, "learning_rate": 3.8260869565217395e-06, "log_odds_chosen": 0.9941888451576233, "log_odds_ratio": -0.46581751108169556, "logits/chosen": -0.978069543838501, "logits/rejected": -1.1477917432785034, "logps/chosen": -0.6202304363250732, "logps/rejected": -1.2574923038482666, "loss": 0.9429, "nll_loss": 0.913118839263916, "rewards/accuracies": 0.75, "rewards/chosen": -0.062023043632507324, "rewards/margins": 0.06372619420289993, "rewards/rejected": -0.12574924528598785, "step": 4290 }, { "epoch": 2.61766051547964, "grad_norm": 1.3919775485992432, "learning_rate": 3.825107164727495e-06, "log_odds_chosen": 0.985512375831604, "log_odds_ratio": -0.47896578907966614, "logits/chosen": -0.9881184101104736, "logits/rejected": -1.0259218215942383, "logps/chosen": -0.7118866443634033, "logps/rejected": -1.302778959274292, "loss": 0.975, "nll_loss": 0.9782640933990479, "rewards/accuracies": 0.75, "rewards/chosen": -0.07118866592645645, "rewards/margins": 0.059089239686727524, "rewards/rejected": -0.13027790188789368, "step": 4291 }, { "epoch": 2.618270550556657, "grad_norm": 1.7045300006866455, "learning_rate": 3.824127372933252e-06, "log_odds_chosen": 2.3028910160064697, "log_odds_ratio": -0.15443876385688782, "logits/chosen": -0.6773271560668945, "logits/rejected": -0.7854901552200317, "logps/chosen": -0.5874820947647095, "logps/rejected": -2.1151278018951416, "loss": 0.8563, "nll_loss": 0.679283618927002, "rewards/accuracies": 1.0, "rewards/chosen": -0.058748215436935425, "rewards/margins": 0.15276455879211426, "rewards/rejected": -0.21151278913021088, "step": 4292 }, { "epoch": 2.618880585633674, "grad_norm": 1.5356271266937256, "learning_rate": 3.823147581139008e-06, "log_odds_chosen": 2.802994728088379, "log_odds_ratio": -0.2281034141778946, "logits/chosen": -0.7477926015853882, "logits/rejected": -1.0437638759613037, "logps/chosen": -0.9932451248168945, "logps/rejected": -3.2804818153381348, "loss": 1.0406, "nll_loss": 1.0877625942230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.09932450950145721, "rewards/margins": 0.2287236452102661, "rewards/rejected": -0.3280481696128845, "step": 4293 }, { "epoch": 2.6194906207106907, "grad_norm": 1.8909926414489746, "learning_rate": 3.822167789344764e-06, "log_odds_chosen": 1.3060331344604492, "log_odds_ratio": -0.5510847568511963, "logits/chosen": -0.9890987873077393, "logits/rejected": -0.8427395820617676, "logps/chosen": -0.804561197757721, "logps/rejected": -1.852162480354309, "loss": 0.9862, "nll_loss": 1.0266600847244263, "rewards/accuracies": 0.625, "rewards/chosen": -0.08045611530542374, "rewards/margins": 0.10476012527942657, "rewards/rejected": -0.1852162480354309, "step": 4294 }, { "epoch": 2.6201006557877076, "grad_norm": 1.1643284559249878, "learning_rate": 3.82118799755052e-06, "log_odds_chosen": 1.8931066989898682, "log_odds_ratio": -0.6561031341552734, "logits/chosen": -0.8314132690429688, "logits/rejected": -0.9126268625259399, "logps/chosen": -1.0224400758743286, "logps/rejected": -2.7942519187927246, "loss": 0.9972, "nll_loss": 1.0698809623718262, "rewards/accuracies": 0.375, "rewards/chosen": -0.10224401205778122, "rewards/margins": 0.1771811693906784, "rewards/rejected": -0.2794252038002014, "step": 4295 }, { "epoch": 2.620710690864725, "grad_norm": 2.7769992351531982, "learning_rate": 3.820208205756277e-06, "log_odds_chosen": 0.454311728477478, "log_odds_ratio": -0.5960217714309692, "logits/chosen": -0.9574064016342163, "logits/rejected": -1.006616234779358, "logps/chosen": -0.918907105922699, "logps/rejected": -1.2283576726913452, "loss": 1.1525, "nll_loss": 0.957976758480072, "rewards/accuracies": 0.5, "rewards/chosen": -0.09189070761203766, "rewards/margins": 0.03094504401087761, "rewards/rejected": -0.12283575534820557, "step": 4296 }, { "epoch": 2.6213207259417417, "grad_norm": 1.2938547134399414, "learning_rate": 3.819228413962033e-06, "log_odds_chosen": 1.1657536029815674, "log_odds_ratio": -0.608656644821167, "logits/chosen": -0.7599802613258362, "logits/rejected": -0.8073288202285767, "logps/chosen": -0.8355046510696411, "logps/rejected": -1.7525455951690674, "loss": 1.0057, "nll_loss": 1.0768691301345825, "rewards/accuracies": 0.375, "rewards/chosen": -0.08355046808719635, "rewards/margins": 0.0917041003704071, "rewards/rejected": -0.17525456845760345, "step": 4297 }, { "epoch": 2.6219307610187585, "grad_norm": 1.454646348953247, "learning_rate": 3.818248622167789e-06, "log_odds_chosen": 2.375389575958252, "log_odds_ratio": -0.2751104235649109, "logits/chosen": -0.7045317888259888, "logits/rejected": -0.941986620426178, "logps/chosen": -0.716353714466095, "logps/rejected": -2.605565071105957, "loss": 1.0439, "nll_loss": 0.8645015358924866, "rewards/accuracies": 0.875, "rewards/chosen": -0.07163536548614502, "rewards/margins": 0.18892115354537964, "rewards/rejected": -0.26055651903152466, "step": 4298 }, { "epoch": 2.6225407960957754, "grad_norm": 1.7652748823165894, "learning_rate": 3.817268830373545e-06, "log_odds_chosen": 2.1663553714752197, "log_odds_ratio": -0.24046742916107178, "logits/chosen": -0.6765834093093872, "logits/rejected": -0.6763708591461182, "logps/chosen": -0.6607190370559692, "logps/rejected": -2.30545973777771, "loss": 0.9866, "nll_loss": 0.8088389039039612, "rewards/accuracies": 1.0, "rewards/chosen": -0.06607190519571304, "rewards/margins": 0.16447408497333527, "rewards/rejected": -0.2305459827184677, "step": 4299 }, { "epoch": 2.6231508311727927, "grad_norm": 1.325985074043274, "learning_rate": 3.8162890385793015e-06, "log_odds_chosen": 3.30576753616333, "log_odds_ratio": -0.3325986862182617, "logits/chosen": -0.9717140197753906, "logits/rejected": -1.0226945877075195, "logps/chosen": -0.6970207691192627, "logps/rejected": -3.2327141761779785, "loss": 0.9898, "nll_loss": 0.9933268427848816, "rewards/accuracies": 0.75, "rewards/chosen": -0.06970207393169403, "rewards/margins": 0.2535693645477295, "rewards/rejected": -0.32327142357826233, "step": 4300 }, { "epoch": 2.6237608662498095, "grad_norm": 1.5466047525405884, "learning_rate": 3.815309246785058e-06, "log_odds_chosen": 2.929245948791504, "log_odds_ratio": -0.2344098836183548, "logits/chosen": -0.8819136023521423, "logits/rejected": -0.9971124529838562, "logps/chosen": -0.6852356791496277, "logps/rejected": -2.94745135307312, "loss": 0.8731, "nll_loss": 0.8265594244003296, "rewards/accuracies": 0.875, "rewards/chosen": -0.06852357089519501, "rewards/margins": 0.22622156143188477, "rewards/rejected": -0.29474514722824097, "step": 4301 }, { "epoch": 2.6243709013268264, "grad_norm": 1.8472239971160889, "learning_rate": 3.8143294549908146e-06, "log_odds_chosen": 1.4299870729446411, "log_odds_ratio": -0.6864186525344849, "logits/chosen": -1.060816764831543, "logits/rejected": -1.0377939939498901, "logps/chosen": -1.051045536994934, "logps/rejected": -2.3684892654418945, "loss": 1.1127, "nll_loss": 1.1409389972686768, "rewards/accuracies": 0.5, "rewards/chosen": -0.10510455071926117, "rewards/margins": 0.1317443698644638, "rewards/rejected": -0.23684893548488617, "step": 4302 }, { "epoch": 2.6249809364038432, "grad_norm": 1.284190058708191, "learning_rate": 3.8133496631965703e-06, "log_odds_chosen": 0.8727121353149414, "log_odds_ratio": -0.5914239883422852, "logits/chosen": -1.1552541255950928, "logits/rejected": -1.0468900203704834, "logps/chosen": -1.0563312768936157, "logps/rejected": -1.7836668491363525, "loss": 1.1708, "nll_loss": 1.1561328172683716, "rewards/accuracies": 0.5, "rewards/chosen": -0.10563312470912933, "rewards/margins": 0.072733573615551, "rewards/rejected": -0.17836669087409973, "step": 4303 }, { "epoch": 2.62559097148086, "grad_norm": 6.865411281585693, "learning_rate": 3.812369871402327e-06, "log_odds_chosen": 1.2438688278198242, "log_odds_ratio": -0.4625371992588043, "logits/chosen": -0.9833946228027344, "logits/rejected": -1.008864402770996, "logps/chosen": -0.9337056279182434, "logps/rejected": -1.874528169631958, "loss": 0.9875, "nll_loss": 1.1194934844970703, "rewards/accuracies": 0.75, "rewards/chosen": -0.09337057173252106, "rewards/margins": 0.09408225119113922, "rewards/rejected": -0.18745283782482147, "step": 4304 }, { "epoch": 2.626201006557877, "grad_norm": 1.1825164556503296, "learning_rate": 3.811390079608083e-06, "log_odds_chosen": 1.4225646257400513, "log_odds_ratio": -0.37659433484077454, "logits/chosen": -0.844785213470459, "logits/rejected": -0.7540754079818726, "logps/chosen": -0.7307537794113159, "logps/rejected": -1.7237166166305542, "loss": 1.0998, "nll_loss": 0.9231897592544556, "rewards/accuracies": 0.75, "rewards/chosen": -0.07307538390159607, "rewards/margins": 0.09929628670215607, "rewards/rejected": -0.17237165570259094, "step": 4305 }, { "epoch": 2.6268110416348938, "grad_norm": 1.1250429153442383, "learning_rate": 3.810410287813839e-06, "log_odds_chosen": 1.2144014835357666, "log_odds_ratio": -0.46274304389953613, "logits/chosen": -0.8485167026519775, "logits/rejected": -0.8668034076690674, "logps/chosen": -0.7639851570129395, "logps/rejected": -1.6097521781921387, "loss": 1.0411, "nll_loss": 0.8749001622200012, "rewards/accuracies": 0.75, "rewards/chosen": -0.07639852166175842, "rewards/margins": 0.08457671105861664, "rewards/rejected": -0.16097523272037506, "step": 4306 }, { "epoch": 2.627421076711911, "grad_norm": 4.321530818939209, "learning_rate": 3.8094304960195957e-06, "log_odds_chosen": 0.17797882854938507, "log_odds_ratio": -0.9013264775276184, "logits/chosen": -0.9679991006851196, "logits/rejected": -1.04341459274292, "logps/chosen": -1.2939646244049072, "logps/rejected": -1.3845475912094116, "loss": 1.1499, "nll_loss": 1.2825431823730469, "rewards/accuracies": 0.375, "rewards/chosen": -0.1293964684009552, "rewards/margins": 0.009058298543095589, "rewards/rejected": -0.13845475018024445, "step": 4307 }, { "epoch": 2.628031111788928, "grad_norm": 1.4014770984649658, "learning_rate": 3.808450704225352e-06, "log_odds_chosen": 0.952739417552948, "log_odds_ratio": -0.608336865901947, "logits/chosen": -0.8030543923377991, "logits/rejected": -0.9363179206848145, "logps/chosen": -0.9021302461624146, "logps/rejected": -1.6011196374893188, "loss": 0.9706, "nll_loss": 0.917059600353241, "rewards/accuracies": 0.5, "rewards/chosen": -0.09021303057670593, "rewards/margins": 0.06989894807338715, "rewards/rejected": -0.16011196374893188, "step": 4308 }, { "epoch": 2.6286411468659447, "grad_norm": 2.3774943351745605, "learning_rate": 3.8074709124311083e-06, "log_odds_chosen": 1.60835599899292, "log_odds_ratio": -0.43913769721984863, "logits/chosen": -1.027570366859436, "logits/rejected": -1.0772053003311157, "logps/chosen": -0.6403782367706299, "logps/rejected": -1.734181523323059, "loss": 1.0727, "nll_loss": 1.0693244934082031, "rewards/accuracies": 0.75, "rewards/chosen": -0.06403782218694687, "rewards/margins": 0.1093803271651268, "rewards/rejected": -0.17341814935207367, "step": 4309 }, { "epoch": 2.6292511819429616, "grad_norm": 1.245965600013733, "learning_rate": 3.8064911206368645e-06, "log_odds_chosen": 0.6913995742797852, "log_odds_ratio": -0.5409126281738281, "logits/chosen": -0.9841939806938171, "logits/rejected": -0.9209650754928589, "logps/chosen": -0.925737738609314, "logps/rejected": -1.4885081052780151, "loss": 1.0678, "nll_loss": 1.0120723247528076, "rewards/accuracies": 0.75, "rewards/chosen": -0.09257376939058304, "rewards/margins": 0.05627703666687012, "rewards/rejected": -0.14885081350803375, "step": 4310 }, { "epoch": 2.629861217019979, "grad_norm": 2.5971829891204834, "learning_rate": 3.8055113288426206e-06, "log_odds_chosen": 1.5013482570648193, "log_odds_ratio": -0.4337393045425415, "logits/chosen": -0.9938806295394897, "logits/rejected": -1.0575480461120605, "logps/chosen": -0.7711938619613647, "logps/rejected": -1.9021148681640625, "loss": 1.0292, "nll_loss": 0.827460765838623, "rewards/accuracies": 0.75, "rewards/chosen": -0.0771193876862526, "rewards/margins": 0.1130920946598053, "rewards/rejected": -0.1902114897966385, "step": 4311 }, { "epoch": 2.6304712520969957, "grad_norm": 4.044504642486572, "learning_rate": 3.804531537048377e-06, "log_odds_chosen": 2.9255149364471436, "log_odds_ratio": -0.3017886281013489, "logits/chosen": -0.9214194416999817, "logits/rejected": -1.1315014362335205, "logps/chosen": -0.661960244178772, "logps/rejected": -2.9974989891052246, "loss": 1.0278, "nll_loss": 0.8131468296051025, "rewards/accuracies": 0.875, "rewards/chosen": -0.0661960244178772, "rewards/margins": 0.23355385661125183, "rewards/rejected": -0.29974985122680664, "step": 4312 }, { "epoch": 2.6310812871740126, "grad_norm": 2.659839153289795, "learning_rate": 3.8035517452541337e-06, "log_odds_chosen": 2.8608250617980957, "log_odds_ratio": -0.24526627361774445, "logits/chosen": -0.6899465918540955, "logits/rejected": -0.8370599150657654, "logps/chosen": -0.6072534322738647, "logps/rejected": -2.7526450157165527, "loss": 0.9201, "nll_loss": 0.7558178305625916, "rewards/accuracies": 1.0, "rewards/chosen": -0.060725342482328415, "rewards/margins": 0.21453917026519775, "rewards/rejected": -0.2752645015716553, "step": 4313 }, { "epoch": 2.6316913222510294, "grad_norm": 1.10564386844635, "learning_rate": 3.8025719534598894e-06, "log_odds_chosen": 0.8486140966415405, "log_odds_ratio": -0.4855315685272217, "logits/chosen": -1.0103888511657715, "logits/rejected": -1.0119779109954834, "logps/chosen": -0.873291552066803, "logps/rejected": -1.5470634698867798, "loss": 1.1435, "nll_loss": 0.9758396148681641, "rewards/accuracies": 0.75, "rewards/chosen": -0.08732916414737701, "rewards/margins": 0.06737718731164932, "rewards/rejected": -0.15470635890960693, "step": 4314 }, { "epoch": 2.6323013573280463, "grad_norm": 1.080673336982727, "learning_rate": 3.801592161665646e-06, "log_odds_chosen": 4.684230804443359, "log_odds_ratio": -0.19552448391914368, "logits/chosen": -0.8631407022476196, "logits/rejected": -1.0342953205108643, "logps/chosen": -0.6844770908355713, "logps/rejected": -4.7306809425354, "loss": 0.9459, "nll_loss": 0.9365978240966797, "rewards/accuracies": 1.0, "rewards/chosen": -0.06844770908355713, "rewards/margins": 0.4046204090118408, "rewards/rejected": -0.47306811809539795, "step": 4315 }, { "epoch": 2.632911392405063, "grad_norm": 2.00773286819458, "learning_rate": 3.8006123698714025e-06, "log_odds_chosen": 2.582200765609741, "log_odds_ratio": -0.3739944100379944, "logits/chosen": -0.8754522204399109, "logits/rejected": -0.9188742637634277, "logps/chosen": -0.6907854080200195, "logps/rejected": -2.639443874359131, "loss": 1.023, "nll_loss": 0.9287771582603455, "rewards/accuracies": 0.75, "rewards/chosen": -0.06907853484153748, "rewards/margins": 0.1948658525943756, "rewards/rejected": -0.2639443874359131, "step": 4316 }, { "epoch": 2.63352142748208, "grad_norm": 1.760422945022583, "learning_rate": 3.799632578077158e-06, "log_odds_chosen": 2.4770150184631348, "log_odds_ratio": -0.2931733727455139, "logits/chosen": -0.7687503695487976, "logits/rejected": -0.9373614192008972, "logps/chosen": -0.9978625774383545, "logps/rejected": -3.0776827335357666, "loss": 1.0438, "nll_loss": 1.0166244506835938, "rewards/accuracies": 0.875, "rewards/chosen": -0.09978625923395157, "rewards/margins": 0.20798200368881226, "rewards/rejected": -0.3077682554721832, "step": 4317 }, { "epoch": 2.6341314625590972, "grad_norm": 1.7178680896759033, "learning_rate": 3.7986527862829147e-06, "log_odds_chosen": 1.9895626306533813, "log_odds_ratio": -0.4824838638305664, "logits/chosen": -0.9108842611312866, "logits/rejected": -1.0280975103378296, "logps/chosen": -0.8176158666610718, "logps/rejected": -2.386648416519165, "loss": 1.0831, "nll_loss": 1.0038378238677979, "rewards/accuracies": 0.625, "rewards/chosen": -0.08176159113645554, "rewards/margins": 0.15690326690673828, "rewards/rejected": -0.23866485059261322, "step": 4318 }, { "epoch": 2.634741497636114, "grad_norm": 1.4350371360778809, "learning_rate": 3.7976729944886713e-06, "log_odds_chosen": 1.5719548463821411, "log_odds_ratio": -0.4305034279823303, "logits/chosen": -1.0508649349212646, "logits/rejected": -1.0717662572860718, "logps/chosen": -1.0237005949020386, "logps/rejected": -2.3496079444885254, "loss": 1.1116, "nll_loss": 1.0502344369888306, "rewards/accuracies": 0.75, "rewards/chosen": -0.10237006843090057, "rewards/margins": 0.13259075582027435, "rewards/rejected": -0.23496080935001373, "step": 4319 }, { "epoch": 2.635351532713131, "grad_norm": 2.8200790882110596, "learning_rate": 3.7966932026944274e-06, "log_odds_chosen": 2.5289487838745117, "log_odds_ratio": -0.31704917550086975, "logits/chosen": -0.8670428991317749, "logits/rejected": -0.9995999932289124, "logps/chosen": -0.8145701289176941, "logps/rejected": -2.7305264472961426, "loss": 1.1306, "nll_loss": 0.9361762404441833, "rewards/accuracies": 0.75, "rewards/chosen": -0.08145701140165329, "rewards/margins": 0.19159561395645142, "rewards/rejected": -0.2730526328086853, "step": 4320 }, { "epoch": 2.635961567790148, "grad_norm": 1.3736186027526855, "learning_rate": 3.7957134109001835e-06, "log_odds_chosen": 3.0889949798583984, "log_odds_ratio": -0.22593359649181366, "logits/chosen": -0.795387327671051, "logits/rejected": -1.0707801580429077, "logps/chosen": -0.7123534679412842, "logps/rejected": -3.181637763977051, "loss": 1.1157, "nll_loss": 0.8092257976531982, "rewards/accuracies": 1.0, "rewards/chosen": -0.07123535126447678, "rewards/margins": 0.24692845344543457, "rewards/rejected": -0.31816381216049194, "step": 4321 }, { "epoch": 2.636571602867165, "grad_norm": 1.8812499046325684, "learning_rate": 3.79473361910594e-06, "log_odds_chosen": 1.1200798749923706, "log_odds_ratio": -0.422206312417984, "logits/chosen": -0.8081752061843872, "logits/rejected": -0.8907952904701233, "logps/chosen": -0.6603524684906006, "logps/rejected": -1.3768129348754883, "loss": 0.9793, "nll_loss": 0.8481268882751465, "rewards/accuracies": 0.875, "rewards/chosen": -0.06603525578975677, "rewards/margins": 0.0716460645198822, "rewards/rejected": -0.13768132030963898, "step": 4322 }, { "epoch": 2.637181637944182, "grad_norm": 4.983442783355713, "learning_rate": 3.793753827311696e-06, "log_odds_chosen": 1.9469764232635498, "log_odds_ratio": -0.36568596959114075, "logits/chosen": -0.7739536762237549, "logits/rejected": -0.9298064708709717, "logps/chosen": -0.7568433284759521, "logps/rejected": -2.2462656497955322, "loss": 0.9894, "nll_loss": 0.935234785079956, "rewards/accuracies": 0.75, "rewards/chosen": -0.07568433880805969, "rewards/margins": 0.148942232131958, "rewards/rejected": -0.2246265709400177, "step": 4323 }, { "epoch": 2.6377916730211988, "grad_norm": 1.5800052881240845, "learning_rate": 3.7927740355174523e-06, "log_odds_chosen": 0.9482339024543762, "log_odds_ratio": -0.47512108087539673, "logits/chosen": -0.7832314372062683, "logits/rejected": -0.7484384775161743, "logps/chosen": -0.7473793029785156, "logps/rejected": -1.3928990364074707, "loss": 1.0565, "nll_loss": 0.9896079301834106, "rewards/accuracies": 0.75, "rewards/chosen": -0.07473792880773544, "rewards/margins": 0.06455197185277939, "rewards/rejected": -0.13928988575935364, "step": 4324 }, { "epoch": 2.6384017080982156, "grad_norm": 1.600174903869629, "learning_rate": 3.7917942437232084e-06, "log_odds_chosen": 1.5144097805023193, "log_odds_ratio": -0.47448521852493286, "logits/chosen": -0.9673032760620117, "logits/rejected": -1.0064975023269653, "logps/chosen": -0.7847546339035034, "logps/rejected": -1.9590024948120117, "loss": 1.0851, "nll_loss": 1.0564042329788208, "rewards/accuracies": 0.75, "rewards/chosen": -0.0784754678606987, "rewards/margins": 0.11742477864027023, "rewards/rejected": -0.19590023159980774, "step": 4325 }, { "epoch": 2.6390117431752325, "grad_norm": 4.323711395263672, "learning_rate": 3.790814451928965e-06, "log_odds_chosen": 2.389491558074951, "log_odds_ratio": -0.27778512239456177, "logits/chosen": -0.7076393961906433, "logits/rejected": -0.7552371621131897, "logps/chosen": -0.5931438207626343, "logps/rejected": -2.420198917388916, "loss": 0.9779, "nll_loss": 0.676929771900177, "rewards/accuracies": 0.875, "rewards/chosen": -0.05931438133120537, "rewards/margins": 0.18270552158355713, "rewards/rejected": -0.2420198917388916, "step": 4326 }, { "epoch": 2.6396217782522493, "grad_norm": 1.2512120008468628, "learning_rate": 3.7898346601347215e-06, "log_odds_chosen": 1.1125969886779785, "log_odds_ratio": -0.6111111640930176, "logits/chosen": -1.0947930812835693, "logits/rejected": -1.0794695615768433, "logps/chosen": -1.009637475013733, "logps/rejected": -1.9370954036712646, "loss": 1.0384, "nll_loss": 0.936180830001831, "rewards/accuracies": 0.625, "rewards/chosen": -0.10096374899148941, "rewards/margins": 0.09274580329656601, "rewards/rejected": -0.19370955228805542, "step": 4327 }, { "epoch": 2.6402318133292666, "grad_norm": 1.527764916419983, "learning_rate": 3.788854868340477e-06, "log_odds_chosen": 2.6743462085723877, "log_odds_ratio": -0.343749076128006, "logits/chosen": -0.842073917388916, "logits/rejected": -0.8806486129760742, "logps/chosen": -0.8061806559562683, "logps/rejected": -2.9728453159332275, "loss": 1.1247, "nll_loss": 0.8646117448806763, "rewards/accuracies": 0.875, "rewards/chosen": -0.08061806112527847, "rewards/margins": 0.21666646003723145, "rewards/rejected": -0.2972845435142517, "step": 4328 }, { "epoch": 2.6408418484062834, "grad_norm": 1.547241449356079, "learning_rate": 3.7878750765462337e-06, "log_odds_chosen": 0.9556318521499634, "log_odds_ratio": -0.5431177616119385, "logits/chosen": -0.8537694215774536, "logits/rejected": -0.8797580003738403, "logps/chosen": -0.8908974528312683, "logps/rejected": -1.6218187808990479, "loss": 1.1209, "nll_loss": 0.9823822975158691, "rewards/accuracies": 0.75, "rewards/chosen": -0.08908975124359131, "rewards/margins": 0.07309213280677795, "rewards/rejected": -0.16218188405036926, "step": 4329 }, { "epoch": 2.6414518834833003, "grad_norm": 2.10744047164917, "learning_rate": 3.7868952847519903e-06, "log_odds_chosen": 1.3668320178985596, "log_odds_ratio": -0.37708020210266113, "logits/chosen": -0.9167557954788208, "logits/rejected": -0.9371655583381653, "logps/chosen": -0.9118241667747498, "logps/rejected": -1.9140794277191162, "loss": 1.0679, "nll_loss": 1.0530967712402344, "rewards/accuracies": 0.75, "rewards/chosen": -0.0911824107170105, "rewards/margins": 0.1002255380153656, "rewards/rejected": -0.1914079487323761, "step": 4330 }, { "epoch": 2.642061918560317, "grad_norm": 2.1950576305389404, "learning_rate": 3.785915492957746e-06, "log_odds_chosen": 1.6409895420074463, "log_odds_ratio": -0.2892477512359619, "logits/chosen": -0.8490453958511353, "logits/rejected": -0.89803147315979, "logps/chosen": -0.6716797351837158, "logps/rejected": -1.8809348344802856, "loss": 0.9676, "nll_loss": 1.08074152469635, "rewards/accuracies": 0.875, "rewards/chosen": -0.0671679750084877, "rewards/margins": 0.12092552334070206, "rewards/rejected": -0.18809348344802856, "step": 4331 }, { "epoch": 2.642671953637334, "grad_norm": 1.6689045429229736, "learning_rate": 3.7849357011635025e-06, "log_odds_chosen": 0.5125511884689331, "log_odds_ratio": -0.963519811630249, "logits/chosen": -1.0504134893417358, "logits/rejected": -1.1470489501953125, "logps/chosen": -0.8385409116744995, "logps/rejected": -1.3846473693847656, "loss": 1.3085, "nll_loss": 1.532053828239441, "rewards/accuracies": 0.625, "rewards/chosen": -0.08385409414768219, "rewards/margins": 0.054610639810562134, "rewards/rejected": -0.13846473395824432, "step": 4332 }, { "epoch": 2.6432819887143513, "grad_norm": 1.0944995880126953, "learning_rate": 3.783955909369259e-06, "log_odds_chosen": 1.4724640846252441, "log_odds_ratio": -0.39024367928504944, "logits/chosen": -0.8824821710586548, "logits/rejected": -0.9695615768432617, "logps/chosen": -0.7867274284362793, "logps/rejected": -1.8795124292373657, "loss": 1.0413, "nll_loss": 0.9868341088294983, "rewards/accuracies": 0.875, "rewards/chosen": -0.07867274433374405, "rewards/margins": 0.10927850008010864, "rewards/rejected": -0.1879512369632721, "step": 4333 }, { "epoch": 2.643892023791368, "grad_norm": 1.4483096599578857, "learning_rate": 3.782976117575015e-06, "log_odds_chosen": 2.621945381164551, "log_odds_ratio": -0.3528577387332916, "logits/chosen": -0.7976052165031433, "logits/rejected": -0.9479904174804688, "logps/chosen": -0.6455783247947693, "logps/rejected": -2.6779017448425293, "loss": 0.9055, "nll_loss": 0.8089226484298706, "rewards/accuracies": 0.75, "rewards/chosen": -0.06455783545970917, "rewards/margins": 0.20323236286640167, "rewards/rejected": -0.26779019832611084, "step": 4334 }, { "epoch": 2.644502058868385, "grad_norm": 3.9306249618530273, "learning_rate": 3.7819963257807713e-06, "log_odds_chosen": 1.0214083194732666, "log_odds_ratio": -0.41318151354789734, "logits/chosen": -1.0718919038772583, "logits/rejected": -1.16939115524292, "logps/chosen": -1.049367904663086, "logps/rejected": -1.8684552907943726, "loss": 0.9661, "nll_loss": 1.0815224647521973, "rewards/accuracies": 0.75, "rewards/chosen": -0.10493678599596024, "rewards/margins": 0.08190874755382538, "rewards/rejected": -0.1868455410003662, "step": 4335 }, { "epoch": 2.645112093945402, "grad_norm": 2.8448004722595215, "learning_rate": 3.781016533986528e-06, "log_odds_chosen": 0.45069262385368347, "log_odds_ratio": -0.5982218980789185, "logits/chosen": -1.117911458015442, "logits/rejected": -0.9924985766410828, "logps/chosen": -1.0034980773925781, "logps/rejected": -1.3748211860656738, "loss": 1.1279, "nll_loss": 1.1484094858169556, "rewards/accuracies": 0.625, "rewards/chosen": -0.10034981369972229, "rewards/margins": 0.03713230788707733, "rewards/rejected": -0.13748210668563843, "step": 4336 }, { "epoch": 2.6457221290224187, "grad_norm": 3.1127376556396484, "learning_rate": 3.780036742192284e-06, "log_odds_chosen": 1.5651142597198486, "log_odds_ratio": -0.5205975770950317, "logits/chosen": -0.9148764610290527, "logits/rejected": -1.0759103298187256, "logps/chosen": -0.9797171354293823, "logps/rejected": -2.1782729625701904, "loss": 1.0478, "nll_loss": 1.1275320053100586, "rewards/accuracies": 0.625, "rewards/chosen": -0.09797171503305435, "rewards/margins": 0.11985559016466141, "rewards/rejected": -0.21782732009887695, "step": 4337 }, { "epoch": 2.6463321640994355, "grad_norm": 3.182668924331665, "learning_rate": 3.77905695039804e-06, "log_odds_chosen": 1.794600248336792, "log_odds_ratio": -0.35823625326156616, "logits/chosen": -0.9191215634346008, "logits/rejected": -1.015852928161621, "logps/chosen": -0.8178318738937378, "logps/rejected": -2.1486740112304688, "loss": 1.0142, "nll_loss": 1.0254337787628174, "rewards/accuracies": 0.75, "rewards/chosen": -0.08178319036960602, "rewards/margins": 0.133084237575531, "rewards/rejected": -0.21486742794513702, "step": 4338 }, { "epoch": 2.646942199176453, "grad_norm": 1.5204564332962036, "learning_rate": 3.7780771586037967e-06, "log_odds_chosen": 0.517778754234314, "log_odds_ratio": -0.6182154417037964, "logits/chosen": -0.9211323261260986, "logits/rejected": -0.8835620880126953, "logps/chosen": -1.0499207973480225, "logps/rejected": -1.4894922971725464, "loss": 1.0426, "nll_loss": 1.3217251300811768, "rewards/accuracies": 0.375, "rewards/chosen": -0.1049920916557312, "rewards/margins": 0.04395715892314911, "rewards/rejected": -0.14894923567771912, "step": 4339 }, { "epoch": 2.6475522342534696, "grad_norm": 3.8216288089752197, "learning_rate": 3.7770973668095528e-06, "log_odds_chosen": 0.2512761354446411, "log_odds_ratio": -0.6689318418502808, "logits/chosen": -1.1142773628234863, "logits/rejected": -1.102760672569275, "logps/chosen": -1.0144306421279907, "logps/rejected": -1.0088860988616943, "loss": 0.9973, "nll_loss": 1.2107855081558228, "rewards/accuracies": 0.75, "rewards/chosen": -0.10144306719303131, "rewards/margins": -0.0005544601008296013, "rewards/rejected": -0.10088860988616943, "step": 4340 }, { "epoch": 2.6481622693304865, "grad_norm": 1.3465176820755005, "learning_rate": 3.7761175750153093e-06, "log_odds_chosen": 1.1240568161010742, "log_odds_ratio": -0.4774973392486572, "logits/chosen": -0.8885214328765869, "logits/rejected": -1.0214773416519165, "logps/chosen": -0.7280882596969604, "logps/rejected": -1.4288914203643799, "loss": 1.08, "nll_loss": 1.070205807685852, "rewards/accuracies": 0.875, "rewards/chosen": -0.07280882447957993, "rewards/margins": 0.07008033245801926, "rewards/rejected": -0.14288917183876038, "step": 4341 }, { "epoch": 2.6487723044075033, "grad_norm": 3.140531301498413, "learning_rate": 3.775137783221065e-06, "log_odds_chosen": 2.1407241821289062, "log_odds_ratio": -0.439456582069397, "logits/chosen": -0.9182896614074707, "logits/rejected": -0.9857196807861328, "logps/chosen": -0.8239448070526123, "logps/rejected": -2.5604522228240967, "loss": 1.0968, "nll_loss": 0.9923908710479736, "rewards/accuracies": 0.75, "rewards/chosen": -0.08239448070526123, "rewards/margins": 0.17365074157714844, "rewards/rejected": -0.25604522228240967, "step": 4342 }, { "epoch": 2.6493823394845206, "grad_norm": 1.194327712059021, "learning_rate": 3.7741579914268216e-06, "log_odds_chosen": 4.257928371429443, "log_odds_ratio": -0.2629965543746948, "logits/chosen": -0.8014714121818542, "logits/rejected": -0.9975565075874329, "logps/chosen": -0.5633390545845032, "logps/rejected": -4.189605712890625, "loss": 0.9575, "nll_loss": 0.7595271468162537, "rewards/accuracies": 0.75, "rewards/chosen": -0.056333910673856735, "rewards/margins": 0.36262667179107666, "rewards/rejected": -0.4189605712890625, "step": 4343 }, { "epoch": 2.6499923745615375, "grad_norm": 9.862737655639648, "learning_rate": 3.773178199632578e-06, "log_odds_chosen": 1.2620577812194824, "log_odds_ratio": -0.40094858407974243, "logits/chosen": -0.6462253332138062, "logits/rejected": -0.5125226378440857, "logps/chosen": -0.6831158399581909, "logps/rejected": -1.4734283685684204, "loss": 1.0245, "nll_loss": 0.8186550140380859, "rewards/accuracies": 0.875, "rewards/chosen": -0.06831158697605133, "rewards/margins": 0.07903125882148743, "rewards/rejected": -0.14734283089637756, "step": 4344 }, { "epoch": 2.6506024096385543, "grad_norm": 1.1559430360794067, "learning_rate": 3.772198407838334e-06, "log_odds_chosen": 1.5516631603240967, "log_odds_ratio": -0.3776124119758606, "logits/chosen": -0.8253043293952942, "logits/rejected": -0.766663134098053, "logps/chosen": -0.9169600009918213, "logps/rejected": -1.9671990871429443, "loss": 1.098, "nll_loss": 1.198885440826416, "rewards/accuracies": 0.875, "rewards/chosen": -0.09169600903987885, "rewards/margins": 0.10502389818429947, "rewards/rejected": -0.19671989977359772, "step": 4345 }, { "epoch": 2.651212444715571, "grad_norm": 2.073884963989258, "learning_rate": 3.7712186160440904e-06, "log_odds_chosen": 1.1312721967697144, "log_odds_ratio": -0.5131248235702515, "logits/chosen": -0.9065155982971191, "logits/rejected": -0.9337223172187805, "logps/chosen": -0.7417041063308716, "logps/rejected": -1.544487476348877, "loss": 0.8724, "nll_loss": 0.85063236951828, "rewards/accuracies": 0.75, "rewards/chosen": -0.07417041063308716, "rewards/margins": 0.08027833700180054, "rewards/rejected": -0.1544487625360489, "step": 4346 }, { "epoch": 2.651822479792588, "grad_norm": 2.0067026615142822, "learning_rate": 3.770238824249847e-06, "log_odds_chosen": 0.915162980556488, "log_odds_ratio": -0.5842760801315308, "logits/chosen": -0.8180620670318604, "logits/rejected": -0.8225654363632202, "logps/chosen": -0.6859641075134277, "logps/rejected": -1.3631178140640259, "loss": 1.0714, "nll_loss": 1.0844271183013916, "rewards/accuracies": 0.625, "rewards/chosen": -0.06859641522169113, "rewards/margins": 0.06771537661552429, "rewards/rejected": -0.13631178438663483, "step": 4347 }, { "epoch": 2.652432514869605, "grad_norm": 3.28074312210083, "learning_rate": 3.769259032455603e-06, "log_odds_chosen": 0.9369056224822998, "log_odds_ratio": -0.39320802688598633, "logits/chosen": -0.8681821823120117, "logits/rejected": -0.7977508306503296, "logps/chosen": -0.618854284286499, "logps/rejected": -1.1163926124572754, "loss": 0.9509, "nll_loss": 0.6768167018890381, "rewards/accuracies": 0.875, "rewards/chosen": -0.06188543140888214, "rewards/margins": 0.049753837287425995, "rewards/rejected": -0.11163926869630814, "step": 4348 }, { "epoch": 2.6530425499466217, "grad_norm": 1.3256735801696777, "learning_rate": 3.768279240661359e-06, "log_odds_chosen": 1.0924280881881714, "log_odds_ratio": -0.4068170189857483, "logits/chosen": -0.9283684492111206, "logits/rejected": -0.9284360408782959, "logps/chosen": -0.6970769166946411, "logps/rejected": -1.4571000337600708, "loss": 0.981, "nll_loss": 1.0075054168701172, "rewards/accuracies": 0.75, "rewards/chosen": -0.06970768421888351, "rewards/margins": 0.07600231468677521, "rewards/rejected": -0.14571000635623932, "step": 4349 }, { "epoch": 2.653652585023639, "grad_norm": 2.6034159660339355, "learning_rate": 3.7672994488671157e-06, "log_odds_chosen": 1.1416776180267334, "log_odds_ratio": -0.4985728859901428, "logits/chosen": -0.748456597328186, "logits/rejected": -0.7631492018699646, "logps/chosen": -0.8454088568687439, "logps/rejected": -1.7538673877716064, "loss": 1.0852, "nll_loss": 0.9456540942192078, "rewards/accuracies": 0.875, "rewards/chosen": -0.08454088866710663, "rewards/margins": 0.09084586054086685, "rewards/rejected": -0.17538674175739288, "step": 4350 }, { "epoch": 2.654262620100656, "grad_norm": 2.6438074111938477, "learning_rate": 3.766319657072872e-06, "log_odds_chosen": 3.7514028549194336, "log_odds_ratio": -0.19982948899269104, "logits/chosen": -0.7380114793777466, "logits/rejected": -0.8573394417762756, "logps/chosen": -0.6295289993286133, "logps/rejected": -3.5337905883789062, "loss": 0.9993, "nll_loss": 0.7223499417304993, "rewards/accuracies": 0.875, "rewards/chosen": -0.06295289844274521, "rewards/margins": 0.2904261350631714, "rewards/rejected": -0.3533790707588196, "step": 4351 }, { "epoch": 2.6548726551776727, "grad_norm": 1.2892870903015137, "learning_rate": 3.765339865278628e-06, "log_odds_chosen": 1.8144569396972656, "log_odds_ratio": -0.41413652896881104, "logits/chosen": -0.9547433257102966, "logits/rejected": -1.0742909908294678, "logps/chosen": -0.721676230430603, "logps/rejected": -2.1241633892059326, "loss": 1.078, "nll_loss": 1.058242917060852, "rewards/accuracies": 0.75, "rewards/chosen": -0.07216762751340866, "rewards/margins": 0.14024873077869415, "rewards/rejected": -0.21241635084152222, "step": 4352 }, { "epoch": 2.6554826902546895, "grad_norm": 1.3917648792266846, "learning_rate": 3.7643600734843845e-06, "log_odds_chosen": 2.4643402099609375, "log_odds_ratio": -0.32064488530158997, "logits/chosen": -0.7438335418701172, "logits/rejected": -0.9212195873260498, "logps/chosen": -0.7522989511489868, "logps/rejected": -2.63754940032959, "loss": 0.9401, "nll_loss": 0.9107028245925903, "rewards/accuracies": 0.75, "rewards/chosen": -0.07522989809513092, "rewards/margins": 0.18852505087852478, "rewards/rejected": -0.2637549340724945, "step": 4353 }, { "epoch": 2.656092725331707, "grad_norm": 1.2460453510284424, "learning_rate": 3.7633802816901406e-06, "log_odds_chosen": 2.8862197399139404, "log_odds_ratio": -0.22711682319641113, "logits/chosen": -0.7899826765060425, "logits/rejected": -0.8614601492881775, "logps/chosen": -0.7337198257446289, "logps/rejected": -3.0160417556762695, "loss": 1.0885, "nll_loss": 0.9936674237251282, "rewards/accuracies": 0.875, "rewards/chosen": -0.07337197661399841, "rewards/margins": 0.22823219001293182, "rewards/rejected": -0.30160418152809143, "step": 4354 }, { "epoch": 2.6567027604087237, "grad_norm": 2.4920437335968018, "learning_rate": 3.762400489895897e-06, "log_odds_chosen": 1.0343806743621826, "log_odds_ratio": -0.5379432439804077, "logits/chosen": -1.0058019161224365, "logits/rejected": -0.9386177062988281, "logps/chosen": -0.6918373107910156, "logps/rejected": -1.479814052581787, "loss": 1.1242, "nll_loss": 1.4196207523345947, "rewards/accuracies": 0.75, "rewards/chosen": -0.06918373703956604, "rewards/margins": 0.07879765331745148, "rewards/rejected": -0.1479814052581787, "step": 4355 }, { "epoch": 2.6573127954857405, "grad_norm": 2.6965105533599854, "learning_rate": 3.7614206981016533e-06, "log_odds_chosen": 2.5615286827087402, "log_odds_ratio": -0.37740781903266907, "logits/chosen": -0.6717629432678223, "logits/rejected": -0.9165585041046143, "logps/chosen": -0.8643337488174438, "logps/rejected": -2.83878231048584, "loss": 1.1309, "nll_loss": 1.0631107091903687, "rewards/accuracies": 0.875, "rewards/chosen": -0.08643338084220886, "rewards/margins": 0.1974448710680008, "rewards/rejected": -0.28387823700904846, "step": 4356 }, { "epoch": 2.6579228305627574, "grad_norm": 1.5889278650283813, "learning_rate": 3.7604409063074094e-06, "log_odds_chosen": 1.9151362180709839, "log_odds_ratio": -0.47641250491142273, "logits/chosen": -0.9381727576255798, "logits/rejected": -0.9351475834846497, "logps/chosen": -0.7354552149772644, "logps/rejected": -1.95868718624115, "loss": 1.082, "nll_loss": 1.0294885635375977, "rewards/accuracies": 0.5, "rewards/chosen": -0.07354553043842316, "rewards/margins": 0.1223231852054596, "rewards/rejected": -0.19586871564388275, "step": 4357 }, { "epoch": 2.658532865639774, "grad_norm": 2.3707258701324463, "learning_rate": 3.759461114513166e-06, "log_odds_chosen": 2.8305068016052246, "log_odds_ratio": -0.31637388467788696, "logits/chosen": -0.8381847143173218, "logits/rejected": -1.0434484481811523, "logps/chosen": -0.8349602222442627, "logps/rejected": -3.0901739597320557, "loss": 1.242, "nll_loss": 0.9609147310256958, "rewards/accuracies": 0.875, "rewards/chosen": -0.08349602669477463, "rewards/margins": 0.22552138566970825, "rewards/rejected": -0.3090174198150635, "step": 4358 }, { "epoch": 2.659142900716791, "grad_norm": 3.3720040321350098, "learning_rate": 3.758481322718922e-06, "log_odds_chosen": 0.8953544497489929, "log_odds_ratio": -0.6938380002975464, "logits/chosen": -0.9269965887069702, "logits/rejected": -1.0151804685592651, "logps/chosen": -0.9585267305374146, "logps/rejected": -1.7830920219421387, "loss": 1.2771, "nll_loss": 1.0948302745819092, "rewards/accuracies": 0.5, "rewards/chosen": -0.09585267305374146, "rewards/margins": 0.08245652914047241, "rewards/rejected": -0.17830920219421387, "step": 4359 }, { "epoch": 2.659752935793808, "grad_norm": 4.898334980010986, "learning_rate": 3.757501530924678e-06, "log_odds_chosen": 1.9502475261688232, "log_odds_ratio": -0.2763998508453369, "logits/chosen": -0.590175449848175, "logits/rejected": -0.9126476645469666, "logps/chosen": -0.552227795124054, "logps/rejected": -1.8915245532989502, "loss": 0.7761, "nll_loss": 0.6828387379646301, "rewards/accuracies": 1.0, "rewards/chosen": -0.055222783237695694, "rewards/margins": 0.13392966985702515, "rewards/rejected": -0.18915246427059174, "step": 4360 }, { "epoch": 2.660362970870825, "grad_norm": 2.7662770748138428, "learning_rate": 3.7565217391304347e-06, "log_odds_chosen": 1.7320806980133057, "log_odds_ratio": -0.3702657222747803, "logits/chosen": -0.7821387052536011, "logits/rejected": -1.00055992603302, "logps/chosen": -0.48416799306869507, "logps/rejected": -1.5148457288742065, "loss": 1.0516, "nll_loss": 0.8929967284202576, "rewards/accuracies": 0.625, "rewards/chosen": -0.04841679707169533, "rewards/margins": 0.1030677929520607, "rewards/rejected": -0.15148459374904633, "step": 4361 }, { "epoch": 2.660973005947842, "grad_norm": 1.1241071224212646, "learning_rate": 3.755541947336191e-06, "log_odds_chosen": 1.565847396850586, "log_odds_ratio": -0.2780519425868988, "logits/chosen": -0.9002790451049805, "logits/rejected": -1.0371921062469482, "logps/chosen": -0.7368654012680054, "logps/rejected": -1.6910080909729004, "loss": 1.0355, "nll_loss": 1.0318760871887207, "rewards/accuracies": 1.0, "rewards/chosen": -0.07368654012680054, "rewards/margins": 0.09541426599025726, "rewards/rejected": -0.169100821018219, "step": 4362 }, { "epoch": 2.661583041024859, "grad_norm": 2.395890951156616, "learning_rate": 3.754562155541947e-06, "log_odds_chosen": 2.380992889404297, "log_odds_ratio": -0.33027487993240356, "logits/chosen": -0.9915406703948975, "logits/rejected": -1.0179122686386108, "logps/chosen": -0.8087150454521179, "logps/rejected": -2.6171352863311768, "loss": 1.1094, "nll_loss": 1.0948450565338135, "rewards/accuracies": 0.875, "rewards/chosen": -0.08087150752544403, "rewards/margins": 0.18084204196929932, "rewards/rejected": -0.26171356439590454, "step": 4363 }, { "epoch": 2.6621930761018757, "grad_norm": 3.9437873363494873, "learning_rate": 3.7535823637477035e-06, "log_odds_chosen": 1.9057070016860962, "log_odds_ratio": -0.30673477053642273, "logits/chosen": -0.7138227224349976, "logits/rejected": -1.0165152549743652, "logps/chosen": -0.6919161081314087, "logps/rejected": -2.133122444152832, "loss": 1.0801, "nll_loss": 0.926823616027832, "rewards/accuracies": 0.875, "rewards/chosen": -0.06919161230325699, "rewards/margins": 0.14412064850330353, "rewards/rejected": -0.21331225335597992, "step": 4364 }, { "epoch": 2.662803111178893, "grad_norm": 1.5581824779510498, "learning_rate": 3.7526025719534597e-06, "log_odds_chosen": 2.6814045906066895, "log_odds_ratio": -0.343692421913147, "logits/chosen": -0.6399871706962585, "logits/rejected": -0.9538302421569824, "logps/chosen": -0.698454737663269, "logps/rejected": -2.7661478519439697, "loss": 0.96, "nll_loss": 0.9371519088745117, "rewards/accuracies": 0.875, "rewards/chosen": -0.06984547525644302, "rewards/margins": 0.20676930248737335, "rewards/rejected": -0.2766147553920746, "step": 4365 }, { "epoch": 2.66341314625591, "grad_norm": 3.8915083408355713, "learning_rate": 3.7516227801592158e-06, "log_odds_chosen": 2.946842908859253, "log_odds_ratio": -0.1581391543149948, "logits/chosen": -0.5585944652557373, "logits/rejected": -0.7477718591690063, "logps/chosen": -0.6306973695755005, "logps/rejected": -2.8572511672973633, "loss": 1.0579, "nll_loss": 0.9084234237670898, "rewards/accuracies": 1.0, "rewards/chosen": -0.06306973844766617, "rewards/margins": 0.22265538573265076, "rewards/rejected": -0.28572511672973633, "step": 4366 }, { "epoch": 2.6640231813329267, "grad_norm": 1.897136926651001, "learning_rate": 3.7506429883649723e-06, "log_odds_chosen": 3.858272075653076, "log_odds_ratio": -0.12724554538726807, "logits/chosen": -0.8559594750404358, "logits/rejected": -1.0579266548156738, "logps/chosen": -0.7178375720977783, "logps/rejected": -3.7352142333984375, "loss": 1.1111, "nll_loss": 1.146005392074585, "rewards/accuracies": 0.875, "rewards/chosen": -0.07178375124931335, "rewards/margins": 0.3017376959323883, "rewards/rejected": -0.37352144718170166, "step": 4367 }, { "epoch": 2.6646332164099436, "grad_norm": 1.5900250673294067, "learning_rate": 3.7496631965707284e-06, "log_odds_chosen": 0.8013154864311218, "log_odds_ratio": -0.5116013288497925, "logits/chosen": -0.8226814866065979, "logits/rejected": -0.7287607192993164, "logps/chosen": -0.7790849804878235, "logps/rejected": -1.4153684377670288, "loss": 1.0028, "nll_loss": 0.8319218754768372, "rewards/accuracies": 0.625, "rewards/chosen": -0.07790850102901459, "rewards/margins": 0.06362833827733994, "rewards/rejected": -0.14153683185577393, "step": 4368 }, { "epoch": 2.6652432514869604, "grad_norm": 7.274078845977783, "learning_rate": 3.748683404776485e-06, "log_odds_chosen": 0.693545937538147, "log_odds_ratio": -0.666753888130188, "logits/chosen": -0.8120841979980469, "logits/rejected": -0.7727931141853333, "logps/chosen": -0.7428162693977356, "logps/rejected": -1.259629726409912, "loss": 1.1188, "nll_loss": 0.9458833932876587, "rewards/accuracies": 0.375, "rewards/chosen": -0.07428163290023804, "rewards/margins": 0.05168134719133377, "rewards/rejected": -0.1259629875421524, "step": 4369 }, { "epoch": 2.6658532865639772, "grad_norm": 1.655907392501831, "learning_rate": 3.747703612982241e-06, "log_odds_chosen": 4.035366058349609, "log_odds_ratio": -0.24246616661548615, "logits/chosen": -0.7072524428367615, "logits/rejected": -0.9085046052932739, "logps/chosen": -0.6871044039726257, "logps/rejected": -4.006105899810791, "loss": 0.9771, "nll_loss": 0.7193679809570312, "rewards/accuracies": 0.875, "rewards/chosen": -0.06871043145656586, "rewards/margins": 0.33190014958381653, "rewards/rejected": -0.4006105959415436, "step": 4370 }, { "epoch": 2.666463321640994, "grad_norm": 0.9877184629440308, "learning_rate": 3.7467238211879972e-06, "log_odds_chosen": 1.7790989875793457, "log_odds_ratio": -0.5526408553123474, "logits/chosen": -0.8028721213340759, "logits/rejected": -0.8442952632904053, "logps/chosen": -0.8371407389640808, "logps/rejected": -2.3987135887145996, "loss": 1.0424, "nll_loss": 0.8957552909851074, "rewards/accuracies": 0.5, "rewards/chosen": -0.0837140679359436, "rewards/margins": 0.15615728497505188, "rewards/rejected": -0.23987135291099548, "step": 4371 }, { "epoch": 2.6670733567180114, "grad_norm": 1.0803930759429932, "learning_rate": 3.7457440293937538e-06, "log_odds_chosen": 1.612257957458496, "log_odds_ratio": -0.5774139165878296, "logits/chosen": -0.7011865377426147, "logits/rejected": -0.762587308883667, "logps/chosen": -0.7630406618118286, "logps/rejected": -1.939942479133606, "loss": 1.0511, "nll_loss": 0.9041007161140442, "rewards/accuracies": 0.625, "rewards/chosen": -0.07630406320095062, "rewards/margins": 0.11769017577171326, "rewards/rejected": -0.19399425387382507, "step": 4372 }, { "epoch": 2.6676833917950282, "grad_norm": 1.4711657762527466, "learning_rate": 3.74476423759951e-06, "log_odds_chosen": 0.848235547542572, "log_odds_ratio": -0.5473598837852478, "logits/chosen": -0.8715870976448059, "logits/rejected": -1.0163147449493408, "logps/chosen": -0.8616805672645569, "logps/rejected": -1.5070698261260986, "loss": 1.1065, "nll_loss": 1.1175518035888672, "rewards/accuracies": 0.625, "rewards/chosen": -0.08616805821657181, "rewards/margins": 0.06453893333673477, "rewards/rejected": -0.15070697665214539, "step": 4373 }, { "epoch": 2.668293426872045, "grad_norm": 3.1315505504608154, "learning_rate": 3.743784445805266e-06, "log_odds_chosen": 1.462742567062378, "log_odds_ratio": -0.5183407664299011, "logits/chosen": -0.6189045310020447, "logits/rejected": -0.9195361137390137, "logps/chosen": -0.8744962215423584, "logps/rejected": -2.030709743499756, "loss": 1.1264, "nll_loss": 0.9956908822059631, "rewards/accuracies": 0.625, "rewards/chosen": -0.08744961768388748, "rewards/margins": 0.11562135815620422, "rewards/rejected": -0.2030709832906723, "step": 4374 }, { "epoch": 2.668903461949062, "grad_norm": 1.3986557722091675, "learning_rate": 3.7428046540110226e-06, "log_odds_chosen": 0.8816866278648376, "log_odds_ratio": -0.555584192276001, "logits/chosen": -0.83400958776474, "logits/rejected": -0.9473493099212646, "logps/chosen": -0.8109970092773438, "logps/rejected": -1.4888989925384521, "loss": 0.9796, "nll_loss": 0.9445011019706726, "rewards/accuracies": 0.625, "rewards/chosen": -0.08109970390796661, "rewards/margins": 0.0677901953458786, "rewards/rejected": -0.14888989925384521, "step": 4375 }, { "epoch": 2.669513497026079, "grad_norm": 1.501466155052185, "learning_rate": 3.741824862216779e-06, "log_odds_chosen": 2.8048384189605713, "log_odds_ratio": -0.14368784427642822, "logits/chosen": -0.7220557928085327, "logits/rejected": -0.8662272691726685, "logps/chosen": -0.5841342210769653, "logps/rejected": -2.5429885387420654, "loss": 0.91, "nll_loss": 0.9701061248779297, "rewards/accuracies": 1.0, "rewards/chosen": -0.05841342359781265, "rewards/margins": 0.19588543474674225, "rewards/rejected": -0.2542988657951355, "step": 4376 }, { "epoch": 2.670123532103096, "grad_norm": 1.5435148477554321, "learning_rate": 3.740845070422535e-06, "log_odds_chosen": 1.4635446071624756, "log_odds_ratio": -0.4486619830131531, "logits/chosen": -0.9222122430801392, "logits/rejected": -1.0226380825042725, "logps/chosen": -0.7639739513397217, "logps/rejected": -1.8786563873291016, "loss": 1.2326, "nll_loss": 1.3029189109802246, "rewards/accuracies": 0.625, "rewards/chosen": -0.07639738917350769, "rewards/margins": 0.11146826297044754, "rewards/rejected": -0.18786564469337463, "step": 4377 }, { "epoch": 2.670733567180113, "grad_norm": 1.2857383489608765, "learning_rate": 3.7398652786282914e-06, "log_odds_chosen": 1.162928581237793, "log_odds_ratio": -0.6646714806556702, "logits/chosen": -0.531128466129303, "logits/rejected": -0.7090317010879517, "logps/chosen": -0.5701175928115845, "logps/rejected": -1.4425771236419678, "loss": 1.0344, "nll_loss": 0.7910910844802856, "rewards/accuracies": 0.75, "rewards/chosen": -0.05701176077127457, "rewards/margins": 0.08724596351385117, "rewards/rejected": -0.14425772428512573, "step": 4378 }, { "epoch": 2.6713436022571297, "grad_norm": 1.764115810394287, "learning_rate": 3.738885486834048e-06, "log_odds_chosen": 1.140366554260254, "log_odds_ratio": -0.4522993564605713, "logits/chosen": -0.6620327830314636, "logits/rejected": -0.7680350542068481, "logps/chosen": -0.5755372047424316, "logps/rejected": -1.3159549236297607, "loss": 1.0013, "nll_loss": 0.7766373157501221, "rewards/accuracies": 0.875, "rewards/chosen": -0.0575537234544754, "rewards/margins": 0.07404176145792007, "rewards/rejected": -0.13159549236297607, "step": 4379 }, { "epoch": 2.6719536373341466, "grad_norm": 2.003338098526001, "learning_rate": 3.7379056950398036e-06, "log_odds_chosen": 2.8498544692993164, "log_odds_ratio": -0.49605971574783325, "logits/chosen": -0.8933020830154419, "logits/rejected": -1.031538963317871, "logps/chosen": -0.8536369800567627, "logps/rejected": -3.3208329677581787, "loss": 1.1964, "nll_loss": 1.0375555753707886, "rewards/accuracies": 0.625, "rewards/chosen": -0.08536370098590851, "rewards/margins": 0.2467195987701416, "rewards/rejected": -0.3320832848548889, "step": 4380 }, { "epoch": 2.6725636724111634, "grad_norm": 4.319491386413574, "learning_rate": 3.73692590324556e-06, "log_odds_chosen": 0.7105764150619507, "log_odds_ratio": -0.8128957748413086, "logits/chosen": -0.9817913770675659, "logits/rejected": -0.9402179718017578, "logps/chosen": -1.323430061340332, "logps/rejected": -1.9749929904937744, "loss": 1.0561, "nll_loss": 1.3413207530975342, "rewards/accuracies": 0.625, "rewards/chosen": -0.13234300911426544, "rewards/margins": 0.06515629589557648, "rewards/rejected": -0.19749930500984192, "step": 4381 }, { "epoch": 2.6731737074881807, "grad_norm": 2.7983510494232178, "learning_rate": 3.7359461114513163e-06, "log_odds_chosen": 1.1841551065444946, "log_odds_ratio": -0.37522652745246887, "logits/chosen": -0.820339024066925, "logits/rejected": -0.886151909828186, "logps/chosen": -1.022482991218567, "logps/rejected": -1.968036413192749, "loss": 1.082, "nll_loss": 1.1202880144119263, "rewards/accuracies": 1.0, "rewards/chosen": -0.10224829614162445, "rewards/margins": 0.0945553332567215, "rewards/rejected": -0.19680362939834595, "step": 4382 }, { "epoch": 2.6737837425651976, "grad_norm": 1.9530137777328491, "learning_rate": 3.734966319657073e-06, "log_odds_chosen": 0.9052623510360718, "log_odds_ratio": -0.6282283067703247, "logits/chosen": -0.9469878673553467, "logits/rejected": -0.9600527286529541, "logps/chosen": -0.6793879270553589, "logps/rejected": -1.426180124282837, "loss": 1.1465, "nll_loss": 1.1564021110534668, "rewards/accuracies": 0.5, "rewards/chosen": -0.06793878972530365, "rewards/margins": 0.07467924058437347, "rewards/rejected": -0.14261803030967712, "step": 4383 }, { "epoch": 2.6743937776422144, "grad_norm": 3.6408650875091553, "learning_rate": 3.733986527862829e-06, "log_odds_chosen": 1.7958732843399048, "log_odds_ratio": -0.36281296610832214, "logits/chosen": -0.5754194259643555, "logits/rejected": -0.8131192922592163, "logps/chosen": -0.6044325232505798, "logps/rejected": -1.8694770336151123, "loss": 0.9024, "nll_loss": 0.7249722480773926, "rewards/accuracies": 0.75, "rewards/chosen": -0.06044325605034828, "rewards/margins": 0.12650445103645325, "rewards/rejected": -0.18694770336151123, "step": 4384 }, { "epoch": 2.6750038127192313, "grad_norm": 1.4514060020446777, "learning_rate": 3.733006736068585e-06, "log_odds_chosen": 1.0642808675765991, "log_odds_ratio": -0.45933374762535095, "logits/chosen": -0.807403564453125, "logits/rejected": -0.8446710109710693, "logps/chosen": -0.605580747127533, "logps/rejected": -1.231487512588501, "loss": 1.1276, "nll_loss": 0.9624357223510742, "rewards/accuracies": 0.75, "rewards/chosen": -0.060558073222637177, "rewards/margins": 0.06259067356586456, "rewards/rejected": -0.12314875423908234, "step": 4385 }, { "epoch": 2.6756138477962486, "grad_norm": 1.4233856201171875, "learning_rate": 3.7320269442743416e-06, "log_odds_chosen": 0.5380359888076782, "log_odds_ratio": -0.5508726835250854, "logits/chosen": -0.6727069020271301, "logits/rejected": -0.8399202823638916, "logps/chosen": -0.8581053018569946, "logps/rejected": -1.2135679721832275, "loss": 1.0017, "nll_loss": 0.83103346824646, "rewards/accuracies": 0.75, "rewards/chosen": -0.08581052720546722, "rewards/margins": 0.03554626181721687, "rewards/rejected": -0.1213567927479744, "step": 4386 }, { "epoch": 2.6762238828732654, "grad_norm": 4.206045627593994, "learning_rate": 3.7310471524800977e-06, "log_odds_chosen": 1.923285961151123, "log_odds_ratio": -0.3556278944015503, "logits/chosen": -0.6522616744041443, "logits/rejected": -0.8917393684387207, "logps/chosen": -0.599693238735199, "logps/rejected": -2.0694684982299805, "loss": 1.0558, "nll_loss": 0.7703824043273926, "rewards/accuracies": 0.875, "rewards/chosen": -0.059969328343868256, "rewards/margins": 0.14697754383087158, "rewards/rejected": -0.20694684982299805, "step": 4387 }, { "epoch": 2.6768339179502822, "grad_norm": 1.3673248291015625, "learning_rate": 3.730067360685854e-06, "log_odds_chosen": 0.5804578065872192, "log_odds_ratio": -0.5219569802284241, "logits/chosen": -1.177783489227295, "logits/rejected": -1.1767277717590332, "logps/chosen": -0.913098931312561, "logps/rejected": -1.2592498064041138, "loss": 1.0233, "nll_loss": 1.054650068283081, "rewards/accuracies": 0.75, "rewards/chosen": -0.09130989015102386, "rewards/margins": 0.034615080803632736, "rewards/rejected": -0.1259249746799469, "step": 4388 }, { "epoch": 2.677443953027299, "grad_norm": 1.1696394681930542, "learning_rate": 3.7290875688916104e-06, "log_odds_chosen": 2.6369683742523193, "log_odds_ratio": -0.2865242063999176, "logits/chosen": -0.693132221698761, "logits/rejected": -0.861467182636261, "logps/chosen": -0.7438933253288269, "logps/rejected": -2.7426881790161133, "loss": 0.9829, "nll_loss": 0.8431147933006287, "rewards/accuracies": 0.875, "rewards/chosen": -0.07438933104276657, "rewards/margins": 0.1998794972896576, "rewards/rejected": -0.2742688059806824, "step": 4389 }, { "epoch": 2.678053988104316, "grad_norm": 1.846335530281067, "learning_rate": 3.728107777097367e-06, "log_odds_chosen": 1.081903100013733, "log_odds_ratio": -0.5611627101898193, "logits/chosen": -1.0120346546173096, "logits/rejected": -1.014125108718872, "logps/chosen": -0.8112868070602417, "logps/rejected": -1.6245335340499878, "loss": 0.9627, "nll_loss": 0.8950269818305969, "rewards/accuracies": 0.625, "rewards/chosen": -0.08112867921590805, "rewards/margins": 0.08132469654083252, "rewards/rejected": -0.16245336830615997, "step": 4390 }, { "epoch": 2.678664023181333, "grad_norm": 10.689168930053711, "learning_rate": 3.7271279853031226e-06, "log_odds_chosen": 0.6428983807563782, "log_odds_ratio": -0.6158397197723389, "logits/chosen": -0.7978099584579468, "logits/rejected": -0.8253604173660278, "logps/chosen": -0.7682254910469055, "logps/rejected": -1.2407830953598022, "loss": 1.0383, "nll_loss": 1.0290188789367676, "rewards/accuracies": 0.5, "rewards/chosen": -0.07682254910469055, "rewards/margins": 0.04725576564669609, "rewards/rejected": -0.12407830357551575, "step": 4391 }, { "epoch": 2.6792740582583496, "grad_norm": 3.4888579845428467, "learning_rate": 3.726148193508879e-06, "log_odds_chosen": 1.9240467548370361, "log_odds_ratio": -0.3479262888431549, "logits/chosen": -0.8014129400253296, "logits/rejected": -0.8010944128036499, "logps/chosen": -0.7526861429214478, "logps/rejected": -2.214245319366455, "loss": 1.0019, "nll_loss": 0.8364051580429077, "rewards/accuracies": 0.875, "rewards/chosen": -0.07526861131191254, "rewards/margins": 0.1461559236049652, "rewards/rejected": -0.22142454981803894, "step": 4392 }, { "epoch": 2.679884093335367, "grad_norm": 1.3999085426330566, "learning_rate": 3.7251684017146357e-06, "log_odds_chosen": 1.2351927757263184, "log_odds_ratio": -0.5011584758758545, "logits/chosen": -0.9538778066635132, "logits/rejected": -1.0409098863601685, "logps/chosen": -0.8257084488868713, "logps/rejected": -1.7649047374725342, "loss": 0.9628, "nll_loss": 0.9452778100967407, "rewards/accuracies": 0.75, "rewards/chosen": -0.08257084339857101, "rewards/margins": 0.09391963481903076, "rewards/rejected": -0.17649048566818237, "step": 4393 }, { "epoch": 2.6804941284123838, "grad_norm": 2.8785321712493896, "learning_rate": 3.7241886099203914e-06, "log_odds_chosen": 3.379549980163574, "log_odds_ratio": -0.23944687843322754, "logits/chosen": -0.8155472278594971, "logits/rejected": -0.9587117433547974, "logps/chosen": -0.8008551597595215, "logps/rejected": -3.6168766021728516, "loss": 0.9042, "nll_loss": 0.9914500713348389, "rewards/accuracies": 0.875, "rewards/chosen": -0.08008551597595215, "rewards/margins": 0.281602144241333, "rewards/rejected": -0.36168766021728516, "step": 4394 }, { "epoch": 2.6811041634894006, "grad_norm": 2.512943744659424, "learning_rate": 3.723208818126148e-06, "log_odds_chosen": 2.475294589996338, "log_odds_ratio": -0.4672020375728607, "logits/chosen": -0.5943355560302734, "logits/rejected": -0.8787784576416016, "logps/chosen": -0.7001583576202393, "logps/rejected": -2.623988628387451, "loss": 1.0504, "nll_loss": 0.9147723913192749, "rewards/accuracies": 0.625, "rewards/chosen": -0.07001583278179169, "rewards/margins": 0.1923830211162567, "rewards/rejected": -0.2623988389968872, "step": 4395 }, { "epoch": 2.6817141985664175, "grad_norm": 1.0493817329406738, "learning_rate": 3.7222290263319045e-06, "log_odds_chosen": 1.5867995023727417, "log_odds_ratio": -0.4242839515209198, "logits/chosen": -0.7172738313674927, "logits/rejected": -0.8616428375244141, "logps/chosen": -0.7348746061325073, "logps/rejected": -1.854063630104065, "loss": 0.9358, "nll_loss": 0.9421438574790955, "rewards/accuracies": 0.75, "rewards/chosen": -0.07348746061325073, "rewards/margins": 0.11191890388727188, "rewards/rejected": -0.18540635704994202, "step": 4396 }, { "epoch": 2.6823242336434348, "grad_norm": 3.2244412899017334, "learning_rate": 3.7212492345376606e-06, "log_odds_chosen": 1.2573566436767578, "log_odds_ratio": -0.45653867721557617, "logits/chosen": -0.7690261602401733, "logits/rejected": -0.7538637518882751, "logps/chosen": -0.8735183477401733, "logps/rejected": -1.6997323036193848, "loss": 1.0059, "nll_loss": 0.9088296890258789, "rewards/accuracies": 0.625, "rewards/chosen": -0.08735183626413345, "rewards/margins": 0.08262140303850174, "rewards/rejected": -0.1699732393026352, "step": 4397 }, { "epoch": 2.6829342687204516, "grad_norm": 1.7476717233657837, "learning_rate": 3.7202694427434168e-06, "log_odds_chosen": 1.2551075220108032, "log_odds_ratio": -0.3788347840309143, "logits/chosen": -0.8349683880805969, "logits/rejected": -0.8660027384757996, "logps/chosen": -0.7204898595809937, "logps/rejected": -1.6782615184783936, "loss": 1.0023, "nll_loss": 0.872180163860321, "rewards/accuracies": 0.875, "rewards/chosen": -0.07204897701740265, "rewards/margins": 0.09577717632055283, "rewards/rejected": -0.16782614588737488, "step": 4398 }, { "epoch": 2.6835443037974684, "grad_norm": 2.7102601528167725, "learning_rate": 3.7192896509491733e-06, "log_odds_chosen": 1.415192723274231, "log_odds_ratio": -0.39164429903030396, "logits/chosen": -0.8612205982208252, "logits/rejected": -0.678766131401062, "logps/chosen": -0.9270170331001282, "logps/rejected": -2.0026111602783203, "loss": 0.9784, "nll_loss": 0.9466615915298462, "rewards/accuracies": 0.875, "rewards/chosen": -0.09270170331001282, "rewards/margins": 0.10755939781665802, "rewards/rejected": -0.20026111602783203, "step": 4399 }, { "epoch": 2.6841543388744853, "grad_norm": 1.7746880054473877, "learning_rate": 3.7183098591549294e-06, "log_odds_chosen": 1.0328354835510254, "log_odds_ratio": -0.45959240198135376, "logits/chosen": -1.1097896099090576, "logits/rejected": -0.9485723376274109, "logps/chosen": -1.2494864463806152, "logps/rejected": -2.1749374866485596, "loss": 1.3301, "nll_loss": 1.3902132511138916, "rewards/accuracies": 0.75, "rewards/chosen": -0.1249486431479454, "rewards/margins": 0.09254513680934906, "rewards/rejected": -0.21749377250671387, "step": 4400 }, { "epoch": 2.684764373951502, "grad_norm": 1.30569589138031, "learning_rate": 3.7173300673606856e-06, "log_odds_chosen": 2.6638708114624023, "log_odds_ratio": -0.3291108012199402, "logits/chosen": -0.7437934279441833, "logits/rejected": -0.876939058303833, "logps/chosen": -0.7539932131767273, "logps/rejected": -3.002586841583252, "loss": 0.9281, "nll_loss": 0.8784502148628235, "rewards/accuracies": 1.0, "rewards/chosen": -0.07539932429790497, "rewards/margins": 0.22485937178134918, "rewards/rejected": -0.30025869607925415, "step": 4401 }, { "epoch": 2.685374409028519, "grad_norm": 2.133754253387451, "learning_rate": 3.7163502755664417e-06, "log_odds_chosen": 1.1870050430297852, "log_odds_ratio": -0.5046124458312988, "logits/chosen": -0.8925517797470093, "logits/rejected": -0.7967447638511658, "logps/chosen": -0.8747571706771851, "logps/rejected": -1.800440788269043, "loss": 1.1164, "nll_loss": 1.0056129693984985, "rewards/accuracies": 0.75, "rewards/chosen": -0.0874757170677185, "rewards/margins": 0.09256835281848907, "rewards/rejected": -0.18004408478736877, "step": 4402 }, { "epoch": 2.685984444105536, "grad_norm": 1.4066191911697388, "learning_rate": 3.7153704837721982e-06, "log_odds_chosen": 1.4138457775115967, "log_odds_ratio": -0.40108731389045715, "logits/chosen": -0.9455428719520569, "logits/rejected": -0.7907027006149292, "logps/chosen": -0.7859198451042175, "logps/rejected": -1.9560577869415283, "loss": 0.8625, "nll_loss": 0.9532078504562378, "rewards/accuracies": 0.75, "rewards/chosen": -0.07859198749065399, "rewards/margins": 0.11701381206512451, "rewards/rejected": -0.1956057846546173, "step": 4403 }, { "epoch": 2.686594479182553, "grad_norm": 4.6078972816467285, "learning_rate": 3.7143906919779548e-06, "log_odds_chosen": 3.416785478591919, "log_odds_ratio": -0.2676742672920227, "logits/chosen": -0.7615482807159424, "logits/rejected": -0.9309329986572266, "logps/chosen": -0.5541825294494629, "logps/rejected": -3.3303189277648926, "loss": 1.1164, "nll_loss": 0.7378754019737244, "rewards/accuracies": 0.875, "rewards/chosen": -0.05541825294494629, "rewards/margins": 0.27761363983154297, "rewards/rejected": -0.33303189277648926, "step": 4404 }, { "epoch": 2.68720451425957, "grad_norm": 1.1201245784759521, "learning_rate": 3.7134109001837105e-06, "log_odds_chosen": 1.8997000455856323, "log_odds_ratio": -0.36426013708114624, "logits/chosen": -0.8838050365447998, "logits/rejected": -0.9785639047622681, "logps/chosen": -0.8720571994781494, "logps/rejected": -2.3649182319641113, "loss": 1.0394, "nll_loss": 1.0987355709075928, "rewards/accuracies": 0.875, "rewards/chosen": -0.08720572292804718, "rewards/margins": 0.14928610622882843, "rewards/rejected": -0.23649181425571442, "step": 4405 }, { "epoch": 2.687814549336587, "grad_norm": 1.2794032096862793, "learning_rate": 3.712431108389467e-06, "log_odds_chosen": 2.749162197113037, "log_odds_ratio": -0.4273983836174011, "logits/chosen": -0.9444890022277832, "logits/rejected": -0.9636658430099487, "logps/chosen": -1.0086448192596436, "logps/rejected": -3.412539482116699, "loss": 1.1511, "nll_loss": 1.1157243251800537, "rewards/accuracies": 0.75, "rewards/chosen": -0.1008644849061966, "rewards/margins": 0.24038949608802795, "rewards/rejected": -0.34125399589538574, "step": 4406 }, { "epoch": 2.6884245844136037, "grad_norm": 1.2370274066925049, "learning_rate": 3.7114513165952236e-06, "log_odds_chosen": 0.6044016480445862, "log_odds_ratio": -0.7656800746917725, "logits/chosen": -0.8682717680931091, "logits/rejected": -0.8553405404090881, "logps/chosen": -0.8596307039260864, "logps/rejected": -1.4156274795532227, "loss": 1.0929, "nll_loss": 1.1354575157165527, "rewards/accuracies": 0.5, "rewards/chosen": -0.08596307039260864, "rewards/margins": 0.05559968948364258, "rewards/rejected": -0.14156275987625122, "step": 4407 }, { "epoch": 2.689034619490621, "grad_norm": 1.250407099723816, "learning_rate": 3.7104715248009793e-06, "log_odds_chosen": 0.40897780656814575, "log_odds_ratio": -0.7497586011886597, "logits/chosen": -0.7855738401412964, "logits/rejected": -0.8878930807113647, "logps/chosen": -1.032310962677002, "logps/rejected": -1.2907953262329102, "loss": 1.0025, "nll_loss": 1.1239910125732422, "rewards/accuracies": 0.5, "rewards/chosen": -0.10323110222816467, "rewards/margins": 0.02584844082593918, "rewards/rejected": -0.12907955050468445, "step": 4408 }, { "epoch": 2.689644654567638, "grad_norm": 1.6922749280929565, "learning_rate": 3.709491733006736e-06, "log_odds_chosen": 0.8437761068344116, "log_odds_ratio": -0.45854657888412476, "logits/chosen": -0.6958388686180115, "logits/rejected": -0.5368339419364929, "logps/chosen": -0.9688377380371094, "logps/rejected": -1.6364834308624268, "loss": 1.0535, "nll_loss": 1.0092263221740723, "rewards/accuracies": 0.75, "rewards/chosen": -0.09688377380371094, "rewards/margins": 0.06676456332206726, "rewards/rejected": -0.1636483520269394, "step": 4409 }, { "epoch": 2.6902546896446546, "grad_norm": 1.4033222198486328, "learning_rate": 3.7085119412124924e-06, "log_odds_chosen": 2.0407321453094482, "log_odds_ratio": -0.3296979069709778, "logits/chosen": -0.8974820971488953, "logits/rejected": -0.8946845531463623, "logps/chosen": -0.7751495242118835, "logps/rejected": -2.3234260082244873, "loss": 0.8991, "nll_loss": 1.0317249298095703, "rewards/accuracies": 0.75, "rewards/chosen": -0.07751496136188507, "rewards/margins": 0.15482762455940247, "rewards/rejected": -0.23234260082244873, "step": 4410 }, { "epoch": 2.6908647247216715, "grad_norm": 2.008974313735962, "learning_rate": 3.7075321494182485e-06, "log_odds_chosen": 2.251250743865967, "log_odds_ratio": -0.42829030752182007, "logits/chosen": -0.9349535703659058, "logits/rejected": -0.9641931653022766, "logps/chosen": -0.7630736827850342, "logps/rejected": -2.6766183376312256, "loss": 1.1307, "nll_loss": 1.0474852323532104, "rewards/accuracies": 0.625, "rewards/chosen": -0.07630736380815506, "rewards/margins": 0.19135448336601257, "rewards/rejected": -0.26766183972358704, "step": 4411 }, { "epoch": 2.6914747597986883, "grad_norm": 1.0123907327651978, "learning_rate": 3.7065523576240046e-06, "log_odds_chosen": 1.633804440498352, "log_odds_ratio": -0.36727678775787354, "logits/chosen": -0.8229658603668213, "logits/rejected": -0.7537292838096619, "logps/chosen": -0.742462158203125, "logps/rejected": -2.0361995697021484, "loss": 0.9943, "nll_loss": 0.9628243446350098, "rewards/accuracies": 0.875, "rewards/chosen": -0.07424622774124146, "rewards/margins": 0.12937375903129578, "rewards/rejected": -0.20361997187137604, "step": 4412 }, { "epoch": 2.692084794875705, "grad_norm": 2.5000815391540527, "learning_rate": 3.705572565829761e-06, "log_odds_chosen": 0.32533738017082214, "log_odds_ratio": -0.7160861492156982, "logits/chosen": -1.1321626901626587, "logits/rejected": -1.0948947668075562, "logps/chosen": -1.1415425539016724, "logps/rejected": -1.478288173675537, "loss": 1.0662, "nll_loss": 1.2278614044189453, "rewards/accuracies": 0.375, "rewards/chosen": -0.11415426433086395, "rewards/margins": 0.03367455676198006, "rewards/rejected": -0.1478288173675537, "step": 4413 }, { "epoch": 2.692694829952722, "grad_norm": 0.8077138066291809, "learning_rate": 3.7045927740355173e-06, "log_odds_chosen": 0.9496026039123535, "log_odds_ratio": -0.6421158313751221, "logits/chosen": -0.6867570877075195, "logits/rejected": -0.7551963329315186, "logps/chosen": -0.7882866859436035, "logps/rejected": -1.5564935207366943, "loss": 1.0124, "nll_loss": 0.9708767533302307, "rewards/accuracies": 0.375, "rewards/chosen": -0.07882867008447647, "rewards/margins": 0.07682071626186371, "rewards/rejected": -0.15564937889575958, "step": 4414 }, { "epoch": 2.6933048650297393, "grad_norm": 2.3251142501831055, "learning_rate": 3.703612982241274e-06, "log_odds_chosen": 1.8212562799453735, "log_odds_ratio": -0.26524803042411804, "logits/chosen": -0.8321303129196167, "logits/rejected": -0.9810293912887573, "logps/chosen": -0.6383237838745117, "logps/rejected": -1.8977413177490234, "loss": 1.091, "nll_loss": 0.7553739547729492, "rewards/accuracies": 1.0, "rewards/chosen": -0.06383238732814789, "rewards/margins": 0.12594175338745117, "rewards/rejected": -0.18977415561676025, "step": 4415 }, { "epoch": 2.693914900106756, "grad_norm": 7.617587089538574, "learning_rate": 3.70263319044703e-06, "log_odds_chosen": 1.2949178218841553, "log_odds_ratio": -0.42387500405311584, "logits/chosen": -0.8299238085746765, "logits/rejected": -0.9064676761627197, "logps/chosen": -0.8755204677581787, "logps/rejected": -1.769439458847046, "loss": 1.0609, "nll_loss": 0.9119693636894226, "rewards/accuracies": 0.875, "rewards/chosen": -0.0875520408153534, "rewards/margins": 0.08939190208911896, "rewards/rejected": -0.17694395780563354, "step": 4416 }, { "epoch": 2.694524935183773, "grad_norm": 2.5757288932800293, "learning_rate": 3.701653398652786e-06, "log_odds_chosen": 0.5975179672241211, "log_odds_ratio": -0.5587666034698486, "logits/chosen": -0.9513684511184692, "logits/rejected": -0.9510844349861145, "logps/chosen": -1.0265111923217773, "logps/rejected": -1.493787169456482, "loss": 1.1595, "nll_loss": 1.3449004888534546, "rewards/accuracies": 0.625, "rewards/chosen": -0.10265112668275833, "rewards/margins": 0.04672759771347046, "rewards/rejected": -0.1493787318468094, "step": 4417 }, { "epoch": 2.69513497026079, "grad_norm": 1.2271558046340942, "learning_rate": 3.7006736068585426e-06, "log_odds_chosen": 1.1166883707046509, "log_odds_ratio": -0.4938865900039673, "logits/chosen": -0.6864132881164551, "logits/rejected": -0.7408261299133301, "logps/chosen": -0.8332912921905518, "logps/rejected": -1.4651305675506592, "loss": 1.1151, "nll_loss": 1.0209519863128662, "rewards/accuracies": 0.625, "rewards/chosen": -0.08332912623882294, "rewards/margins": 0.06318393349647522, "rewards/rejected": -0.14651305973529816, "step": 4418 }, { "epoch": 2.695745005337807, "grad_norm": 1.8606196641921997, "learning_rate": 3.6996938150642983e-06, "log_odds_chosen": 2.2311527729034424, "log_odds_ratio": -0.3826408088207245, "logits/chosen": -0.5276340246200562, "logits/rejected": -0.6617755889892578, "logps/chosen": -0.5843714475631714, "logps/rejected": -2.3476333618164062, "loss": 0.9526, "nll_loss": 0.8350970149040222, "rewards/accuracies": 0.75, "rewards/chosen": -0.05843714252114296, "rewards/margins": 0.1763262003660202, "rewards/rejected": -0.23476333916187286, "step": 4419 }, { "epoch": 2.696355040414824, "grad_norm": 2.4253528118133545, "learning_rate": 3.698714023270055e-06, "log_odds_chosen": 1.3937910795211792, "log_odds_ratio": -0.3872240483760834, "logits/chosen": -0.7668134570121765, "logits/rejected": -0.9035968780517578, "logps/chosen": -0.7501927614212036, "logps/rejected": -1.7150325775146484, "loss": 1.1077, "nll_loss": 1.107981562614441, "rewards/accuracies": 0.75, "rewards/chosen": -0.07501927018165588, "rewards/margins": 0.0964839830994606, "rewards/rejected": -0.17150326073169708, "step": 4420 }, { "epoch": 2.696965075491841, "grad_norm": 8.465909957885742, "learning_rate": 3.6977342314758114e-06, "log_odds_chosen": 0.45107749104499817, "log_odds_ratio": -0.8028210401535034, "logits/chosen": -0.9422730207443237, "logits/rejected": -0.9019773602485657, "logps/chosen": -0.8806025385856628, "logps/rejected": -1.3367067575454712, "loss": 1.1787, "nll_loss": 0.9566043019294739, "rewards/accuracies": 0.5, "rewards/chosen": -0.08806024491786957, "rewards/margins": 0.045610420405864716, "rewards/rejected": -0.13367067277431488, "step": 4421 }, { "epoch": 2.6975751105688577, "grad_norm": 1.0544682741165161, "learning_rate": 3.696754439681567e-06, "log_odds_chosen": 2.066260814666748, "log_odds_ratio": -0.23083963990211487, "logits/chosen": -0.6105170845985413, "logits/rejected": -0.814321756362915, "logps/chosen": -0.8322767019271851, "logps/rejected": -2.3822195529937744, "loss": 1.1781, "nll_loss": 1.1231478452682495, "rewards/accuracies": 0.875, "rewards/chosen": -0.08322766423225403, "rewards/margins": 0.15499427914619446, "rewards/rejected": -0.23822195827960968, "step": 4422 }, { "epoch": 2.6981851456458745, "grad_norm": 1.5493842363357544, "learning_rate": 3.6957746478873236e-06, "log_odds_chosen": 2.2179815769195557, "log_odds_ratio": -0.44040125608444214, "logits/chosen": -0.775060772895813, "logits/rejected": -0.8233072757720947, "logps/chosen": -0.771487295627594, "logps/rejected": -2.497586250305176, "loss": 1.1221, "nll_loss": 1.2104809284210205, "rewards/accuracies": 0.75, "rewards/chosen": -0.07714872807264328, "rewards/margins": 0.1726098656654358, "rewards/rejected": -0.24975860118865967, "step": 4423 }, { "epoch": 2.6987951807228914, "grad_norm": 1.3700158596038818, "learning_rate": 3.69479485609308e-06, "log_odds_chosen": 3.174297332763672, "log_odds_ratio": -0.2810102105140686, "logits/chosen": -0.6640496253967285, "logits/rejected": -0.7503975033760071, "logps/chosen": -0.6482633352279663, "logps/rejected": -2.990732192993164, "loss": 0.8685, "nll_loss": 0.6570616960525513, "rewards/accuracies": 0.875, "rewards/chosen": -0.06482633948326111, "rewards/margins": 0.2342468649148941, "rewards/rejected": -0.2990732192993164, "step": 4424 }, { "epoch": 2.699405215799908, "grad_norm": 1.313860535621643, "learning_rate": 3.6938150642988363e-06, "log_odds_chosen": 2.098665237426758, "log_odds_ratio": -0.5313171744346619, "logits/chosen": -0.8319831490516663, "logits/rejected": -0.9448023438453674, "logps/chosen": -0.748663067817688, "logps/rejected": -2.4317009449005127, "loss": 1.1923, "nll_loss": 0.8812450170516968, "rewards/accuracies": 0.5, "rewards/chosen": -0.07486630976200104, "rewards/margins": 0.16830381751060486, "rewards/rejected": -0.2431701123714447, "step": 4425 }, { "epoch": 2.7000152508769255, "grad_norm": 1.912057638168335, "learning_rate": 3.6928352725045924e-06, "log_odds_chosen": 3.0843255519866943, "log_odds_ratio": -0.24706560373306274, "logits/chosen": -0.5675683617591858, "logits/rejected": -0.5994886755943298, "logps/chosen": -0.4704433083534241, "logps/rejected": -2.739927291870117, "loss": 0.8726, "nll_loss": 0.5720129013061523, "rewards/accuracies": 0.875, "rewards/chosen": -0.047044333070516586, "rewards/margins": 0.22694841027259827, "rewards/rejected": -0.27399274706840515, "step": 4426 }, { "epoch": 2.7006252859539424, "grad_norm": 1.8466966152191162, "learning_rate": 3.691855480710349e-06, "log_odds_chosen": 2.9413046836853027, "log_odds_ratio": -0.5453616976737976, "logits/chosen": -0.9683343172073364, "logits/rejected": -1.0442007780075073, "logps/chosen": -0.8228227496147156, "logps/rejected": -3.3437209129333496, "loss": 1.1199, "nll_loss": 0.9597274661064148, "rewards/accuracies": 0.5, "rewards/chosen": -0.08228228241205215, "rewards/margins": 0.25208979845046997, "rewards/rejected": -0.3343721032142639, "step": 4427 }, { "epoch": 2.701235321030959, "grad_norm": 2.968371629714966, "learning_rate": 3.690875688916105e-06, "log_odds_chosen": 0.5023868680000305, "log_odds_ratio": -0.5679609179496765, "logits/chosen": -0.8354208469390869, "logits/rejected": -0.8072556257247925, "logps/chosen": -0.8661590218544006, "logps/rejected": -1.0939340591430664, "loss": 1.1294, "nll_loss": 1.2879964113235474, "rewards/accuracies": 0.75, "rewards/chosen": -0.0866159051656723, "rewards/margins": 0.022777503356337547, "rewards/rejected": -0.1093934029340744, "step": 4428 }, { "epoch": 2.701845356107976, "grad_norm": 1.5662323236465454, "learning_rate": 3.6898958971218616e-06, "log_odds_chosen": 1.1550116539001465, "log_odds_ratio": -0.5231361985206604, "logits/chosen": -1.1076778173446655, "logits/rejected": -0.9096482992172241, "logps/chosen": -0.964493453502655, "logps/rejected": -1.9751371145248413, "loss": 1.0276, "nll_loss": 1.1260411739349365, "rewards/accuracies": 0.625, "rewards/chosen": -0.0964493453502655, "rewards/margins": 0.10106436908245087, "rewards/rejected": -0.19751371443271637, "step": 4429 }, { "epoch": 2.7024553911849933, "grad_norm": 7.761834144592285, "learning_rate": 3.6889161053276178e-06, "log_odds_chosen": 1.1062960624694824, "log_odds_ratio": -0.7177045941352844, "logits/chosen": -0.9326927661895752, "logits/rejected": -1.0519750118255615, "logps/chosen": -0.819433331489563, "logps/rejected": -1.9364869594573975, "loss": 1.2311, "nll_loss": 1.1940802335739136, "rewards/accuracies": 0.625, "rewards/chosen": -0.0819433331489563, "rewards/margins": 0.11170537024736404, "rewards/rejected": -0.19364869594573975, "step": 4430 }, { "epoch": 2.70306542626201, "grad_norm": 1.0463136434555054, "learning_rate": 3.687936313533374e-06, "log_odds_chosen": 2.760523796081543, "log_odds_ratio": -0.4571135342121124, "logits/chosen": -0.8570244312286377, "logits/rejected": -0.8577710390090942, "logps/chosen": -0.8618326783180237, "logps/rejected": -3.3245646953582764, "loss": 1.1391, "nll_loss": 1.1665748357772827, "rewards/accuracies": 0.625, "rewards/chosen": -0.08618327975273132, "rewards/margins": 0.2462732046842575, "rewards/rejected": -0.33245646953582764, "step": 4431 }, { "epoch": 2.703675461339027, "grad_norm": 1.6970924139022827, "learning_rate": 3.6869565217391304e-06, "log_odds_chosen": 3.4962878227233887, "log_odds_ratio": -0.3678181767463684, "logits/chosen": -0.7729078531265259, "logits/rejected": -0.8813779354095459, "logps/chosen": -0.9129643440246582, "logps/rejected": -3.986176013946533, "loss": 1.0882, "nll_loss": 1.020623803138733, "rewards/accuracies": 0.75, "rewards/chosen": -0.09129642695188522, "rewards/margins": 0.3073211908340454, "rewards/rejected": -0.39861756563186646, "step": 4432 }, { "epoch": 2.704285496416044, "grad_norm": 1.6328572034835815, "learning_rate": 3.6859767299448866e-06, "log_odds_chosen": 1.7430176734924316, "log_odds_ratio": -0.36690884828567505, "logits/chosen": -0.8902820348739624, "logits/rejected": -0.891082763671875, "logps/chosen": -0.7014136910438538, "logps/rejected": -2.058375120162964, "loss": 0.942, "nll_loss": 0.9974381923675537, "rewards/accuracies": 0.875, "rewards/chosen": -0.07014137506484985, "rewards/margins": 0.135696142911911, "rewards/rejected": -0.20583751797676086, "step": 4433 }, { "epoch": 2.7048955314930607, "grad_norm": 8.862496376037598, "learning_rate": 3.6849969381506427e-06, "log_odds_chosen": 0.6552358865737915, "log_odds_ratio": -0.575311541557312, "logits/chosen": -0.93709796667099, "logits/rejected": -0.9961144328117371, "logps/chosen": -0.9947443008422852, "logps/rejected": -1.4805368185043335, "loss": 1.058, "nll_loss": 1.2823904752731323, "rewards/accuracies": 0.75, "rewards/chosen": -0.09947443008422852, "rewards/margins": 0.048579245805740356, "rewards/rejected": -0.14805367588996887, "step": 4434 }, { "epoch": 2.7055055665700776, "grad_norm": 1.332453966140747, "learning_rate": 3.6840171463563992e-06, "log_odds_chosen": 1.1354435682296753, "log_odds_ratio": -0.5291705131530762, "logits/chosen": -0.9127392768859863, "logits/rejected": -0.9227162599563599, "logps/chosen": -0.9702967405319214, "logps/rejected": -1.8710212707519531, "loss": 1.0146, "nll_loss": 1.1338766813278198, "rewards/accuracies": 0.625, "rewards/chosen": -0.0970296710729599, "rewards/margins": 0.09007246047258377, "rewards/rejected": -0.18710212409496307, "step": 4435 }, { "epoch": 2.706115601647095, "grad_norm": 0.9593508839607239, "learning_rate": 3.6830373545621558e-06, "log_odds_chosen": 1.2254009246826172, "log_odds_ratio": -0.46106448769569397, "logits/chosen": -1.0053452253341675, "logits/rejected": -1.0330479145050049, "logps/chosen": -1.0978156328201294, "logps/rejected": -2.1076548099517822, "loss": 1.2912, "nll_loss": 1.412764072418213, "rewards/accuracies": 0.625, "rewards/chosen": -0.10978156328201294, "rewards/margins": 0.10098393261432648, "rewards/rejected": -0.2107655107975006, "step": 4436 }, { "epoch": 2.7067256367241117, "grad_norm": 1.0416537523269653, "learning_rate": 3.6820575627679115e-06, "log_odds_chosen": 1.0920720100402832, "log_odds_ratio": -0.458276629447937, "logits/chosen": -0.8704215288162231, "logits/rejected": -0.8815720081329346, "logps/chosen": -0.6992002725601196, "logps/rejected": -1.5440189838409424, "loss": 1.0767, "nll_loss": 0.8754663467407227, "rewards/accuracies": 0.75, "rewards/chosen": -0.06992002576589584, "rewards/margins": 0.08448188006877899, "rewards/rejected": -0.15440189838409424, "step": 4437 }, { "epoch": 2.7073356718011286, "grad_norm": 1.9049086570739746, "learning_rate": 3.681077770973668e-06, "log_odds_chosen": 0.6602987051010132, "log_odds_ratio": -0.5089491009712219, "logits/chosen": -1.0757431983947754, "logits/rejected": -0.9126026630401611, "logps/chosen": -0.9350653886795044, "logps/rejected": -1.4381587505340576, "loss": 1.2405, "nll_loss": 1.1423470973968506, "rewards/accuracies": 0.875, "rewards/chosen": -0.09350653737783432, "rewards/margins": 0.05030934512615204, "rewards/rejected": -0.14381587505340576, "step": 4438 }, { "epoch": 2.7079457068781454, "grad_norm": 2.464017152786255, "learning_rate": 3.680097979179424e-06, "log_odds_chosen": 2.5228519439697266, "log_odds_ratio": -0.22879327833652496, "logits/chosen": -0.5245581269264221, "logits/rejected": -0.8478410243988037, "logps/chosen": -0.41524216532707214, "logps/rejected": -2.1807641983032227, "loss": 0.8897, "nll_loss": 0.5122471451759338, "rewards/accuracies": 0.875, "rewards/chosen": -0.041524216532707214, "rewards/margins": 0.1765522062778473, "rewards/rejected": -0.2180764079093933, "step": 4439 }, { "epoch": 2.7085557419551627, "grad_norm": 2.594904899597168, "learning_rate": 3.6791181873851803e-06, "log_odds_chosen": 1.2467474937438965, "log_odds_ratio": -0.5003774762153625, "logits/chosen": -0.6654026508331299, "logits/rejected": -0.7858073711395264, "logps/chosen": -0.7880250215530396, "logps/rejected": -1.686564564704895, "loss": 1.0813, "nll_loss": 1.0705472230911255, "rewards/accuracies": 0.875, "rewards/chosen": -0.07880251109600067, "rewards/margins": 0.08985394984483719, "rewards/rejected": -0.16865645349025726, "step": 4440 }, { "epoch": 2.7091657770321795, "grad_norm": 1.5081714391708374, "learning_rate": 3.678138395590937e-06, "log_odds_chosen": 0.548672616481781, "log_odds_ratio": -0.6747919917106628, "logits/chosen": -0.7994535565376282, "logits/rejected": -0.8723305463790894, "logps/chosen": -0.8458497524261475, "logps/rejected": -1.294018030166626, "loss": 1.1181, "nll_loss": 0.9754525423049927, "rewards/accuracies": 0.375, "rewards/chosen": -0.08458498120307922, "rewards/margins": 0.04481682926416397, "rewards/rejected": -0.1294018030166626, "step": 4441 }, { "epoch": 2.7097758121091964, "grad_norm": 2.475918769836426, "learning_rate": 3.677158603796693e-06, "log_odds_chosen": 2.6266491413116455, "log_odds_ratio": -0.4630874991416931, "logits/chosen": -0.9623966217041016, "logits/rejected": -1.0230369567871094, "logps/chosen": -0.9620869159698486, "logps/rejected": -3.284158706665039, "loss": 1.1486, "nll_loss": 1.1438496112823486, "rewards/accuracies": 0.625, "rewards/chosen": -0.09620868414640427, "rewards/margins": 0.23220719397068024, "rewards/rejected": -0.3284158706665039, "step": 4442 }, { "epoch": 2.7103858471862132, "grad_norm": 2.3788692951202393, "learning_rate": 3.6761788120024495e-06, "log_odds_chosen": 1.7067595720291138, "log_odds_ratio": -0.4352279007434845, "logits/chosen": -0.9061071872711182, "logits/rejected": -0.9559793472290039, "logps/chosen": -0.778108537197113, "logps/rejected": -2.1394765377044678, "loss": 1.0701, "nll_loss": 1.0399378538131714, "rewards/accuracies": 0.875, "rewards/chosen": -0.0778108537197113, "rewards/margins": 0.13613680005073547, "rewards/rejected": -0.21394765377044678, "step": 4443 }, { "epoch": 2.71099588226323, "grad_norm": 2.982194423675537, "learning_rate": 3.6751990202082056e-06, "log_odds_chosen": 0.8434372544288635, "log_odds_ratio": -0.5488470792770386, "logits/chosen": -0.8462612628936768, "logits/rejected": -0.7847635746002197, "logps/chosen": -0.8205058574676514, "logps/rejected": -1.3392375707626343, "loss": 1.1689, "nll_loss": 1.2198758125305176, "rewards/accuracies": 0.625, "rewards/chosen": -0.08205059170722961, "rewards/margins": 0.05187314748764038, "rewards/rejected": -0.13392373919487, "step": 4444 }, { "epoch": 2.711605917340247, "grad_norm": 1.7767103910446167, "learning_rate": 3.6742192284139617e-06, "log_odds_chosen": 2.9581167697906494, "log_odds_ratio": -0.1751786470413208, "logits/chosen": -0.7036725282669067, "logits/rejected": -0.8331671953201294, "logps/chosen": -0.6767486333847046, "logps/rejected": -2.942681312561035, "loss": 0.8884, "nll_loss": 0.8479268550872803, "rewards/accuracies": 1.0, "rewards/chosen": -0.06767486780881882, "rewards/margins": 0.2265932708978653, "rewards/rejected": -0.2942681312561035, "step": 4445 }, { "epoch": 2.7122159524172638, "grad_norm": 1.4028937816619873, "learning_rate": 3.6732394366197183e-06, "log_odds_chosen": 1.555546522140503, "log_odds_ratio": -0.3339868485927582, "logits/chosen": -0.794013261795044, "logits/rejected": -0.840587854385376, "logps/chosen": -0.8238359689712524, "logps/rejected": -1.7682982683181763, "loss": 0.9943, "nll_loss": 1.0322449207305908, "rewards/accuracies": 0.875, "rewards/chosen": -0.08238360285758972, "rewards/margins": 0.09444624185562134, "rewards/rejected": -0.17682984471321106, "step": 4446 }, { "epoch": 2.712825987494281, "grad_norm": 4.409276962280273, "learning_rate": 3.6722596448254744e-06, "log_odds_chosen": 1.880901575088501, "log_odds_ratio": -0.35377222299575806, "logits/chosen": -0.7418601512908936, "logits/rejected": -0.8923046588897705, "logps/chosen": -0.7040871381759644, "logps/rejected": -2.1828222274780273, "loss": 1.0067, "nll_loss": 0.7988340854644775, "rewards/accuracies": 0.875, "rewards/chosen": -0.07040871679782867, "rewards/margins": 0.14787352085113525, "rewards/rejected": -0.21828222274780273, "step": 4447 }, { "epoch": 2.713436022571298, "grad_norm": 1.9813685417175293, "learning_rate": 3.6712798530312305e-06, "log_odds_chosen": -0.05144292116165161, "log_odds_ratio": -0.7690070867538452, "logits/chosen": -0.8466781377792358, "logits/rejected": -0.8597879409790039, "logps/chosen": -0.9176885485649109, "logps/rejected": -0.9565753936767578, "loss": 0.9837, "nll_loss": 1.067893385887146, "rewards/accuracies": 0.375, "rewards/chosen": -0.09176885336637497, "rewards/margins": 0.0038886861875653267, "rewards/rejected": -0.09565754234790802, "step": 4448 }, { "epoch": 2.7140460576483147, "grad_norm": 1.3968310356140137, "learning_rate": 3.670300061236987e-06, "log_odds_chosen": 1.0831990242004395, "log_odds_ratio": -0.4830138385295868, "logits/chosen": -0.8636944890022278, "logits/rejected": -0.8080408573150635, "logps/chosen": -0.8058101534843445, "logps/rejected": -1.483994483947754, "loss": 1.0505, "nll_loss": 0.9196840524673462, "rewards/accuracies": 0.75, "rewards/chosen": -0.08058101683855057, "rewards/margins": 0.06781841814517975, "rewards/rejected": -0.1483994424343109, "step": 4449 }, { "epoch": 2.7146560927253316, "grad_norm": 1.0578447580337524, "learning_rate": 3.6693202694427436e-06, "log_odds_chosen": 1.2251250743865967, "log_odds_ratio": -0.53802490234375, "logits/chosen": -0.7258704304695129, "logits/rejected": -0.9372312426567078, "logps/chosen": -1.0498460531234741, "logps/rejected": -2.025214195251465, "loss": 1.0151, "nll_loss": 1.244964838027954, "rewards/accuracies": 0.75, "rewards/chosen": -0.10498461127281189, "rewards/margins": 0.09753680229187012, "rewards/rejected": -0.202521413564682, "step": 4450 }, { "epoch": 2.715266127802349, "grad_norm": 1.800227403640747, "learning_rate": 3.6683404776484993e-06, "log_odds_chosen": 1.121239423751831, "log_odds_ratio": -0.4305558204650879, "logits/chosen": -0.8995213508605957, "logits/rejected": -0.8259276151657104, "logps/chosen": -0.8992313146591187, "logps/rejected": -1.6173503398895264, "loss": 1.0195, "nll_loss": 1.0096913576126099, "rewards/accuracies": 0.75, "rewards/chosen": -0.08992312848567963, "rewards/margins": 0.07181190699338913, "rewards/rejected": -0.16173504292964935, "step": 4451 }, { "epoch": 2.7158761628793657, "grad_norm": 2.062002658843994, "learning_rate": 3.667360685854256e-06, "log_odds_chosen": 2.7157366275787354, "log_odds_ratio": -0.3025878667831421, "logits/chosen": -0.7422151565551758, "logits/rejected": -0.9865715503692627, "logps/chosen": -0.7405424118041992, "logps/rejected": -2.9058330059051514, "loss": 1.086, "nll_loss": 1.091027855873108, "rewards/accuracies": 0.875, "rewards/chosen": -0.07405424118041992, "rewards/margins": 0.21652907133102417, "rewards/rejected": -0.2905833125114441, "step": 4452 }, { "epoch": 2.7164861979563826, "grad_norm": 13.465529441833496, "learning_rate": 3.6663808940600124e-06, "log_odds_chosen": 1.9118987321853638, "log_odds_ratio": -0.45275017619132996, "logits/chosen": -0.7519775629043579, "logits/rejected": -0.8900268077850342, "logps/chosen": -0.8078352808952332, "logps/rejected": -2.3681130409240723, "loss": 1.1976, "nll_loss": 1.019054889678955, "rewards/accuracies": 0.75, "rewards/chosen": -0.08078353106975555, "rewards/margins": 0.15602776408195496, "rewards/rejected": -0.2368113100528717, "step": 4453 }, { "epoch": 2.7170962330333994, "grad_norm": 2.5722885131835938, "learning_rate": 3.665401102265768e-06, "log_odds_chosen": 0.8311448097229004, "log_odds_ratio": -0.5818160772323608, "logits/chosen": -0.7760289907455444, "logits/rejected": -0.8490530252456665, "logps/chosen": -0.7131444811820984, "logps/rejected": -1.3014224767684937, "loss": 0.9632, "nll_loss": 0.9039320945739746, "rewards/accuracies": 0.625, "rewards/chosen": -0.07131444662809372, "rewards/margins": 0.05882780998945236, "rewards/rejected": -0.13014225661754608, "step": 4454 }, { "epoch": 2.7177062681104163, "grad_norm": 2.453801393508911, "learning_rate": 3.6644213104715246e-06, "log_odds_chosen": 2.030982494354248, "log_odds_ratio": -0.3562391698360443, "logits/chosen": -0.8401120901107788, "logits/rejected": -0.8034669160842896, "logps/chosen": -0.7852703332901001, "logps/rejected": -2.4193811416625977, "loss": 1.2024, "nll_loss": 1.2241929769515991, "rewards/accuracies": 0.875, "rewards/chosen": -0.07852703332901001, "rewards/margins": 0.16341109573841095, "rewards/rejected": -0.24193812906742096, "step": 4455 }, { "epoch": 2.718316303187433, "grad_norm": 8.789965629577637, "learning_rate": 3.663441518677281e-06, "log_odds_chosen": 0.925214409828186, "log_odds_ratio": -0.392800509929657, "logits/chosen": -1.0167874097824097, "logits/rejected": -0.9931206107139587, "logps/chosen": -1.1675491333007812, "logps/rejected": -1.8018335103988647, "loss": 1.1589, "nll_loss": 1.2273198366165161, "rewards/accuracies": 0.875, "rewards/chosen": -0.1167549267411232, "rewards/margins": 0.06342842429876328, "rewards/rejected": -0.18018335103988647, "step": 4456 }, { "epoch": 2.71892633826445, "grad_norm": 2.8832015991210938, "learning_rate": 3.6624617268830373e-06, "log_odds_chosen": 2.412332534790039, "log_odds_ratio": -0.23545661568641663, "logits/chosen": -0.7782719135284424, "logits/rejected": -0.8704694509506226, "logps/chosen": -0.5144084095954895, "logps/rejected": -2.120337963104248, "loss": 1.0491, "nll_loss": 0.9113154411315918, "rewards/accuracies": 0.875, "rewards/chosen": -0.05144084244966507, "rewards/margins": 0.1605929583311081, "rewards/rejected": -0.21203379333019257, "step": 4457 }, { "epoch": 2.7195363733414673, "grad_norm": 3.350093126296997, "learning_rate": 3.6614819350887934e-06, "log_odds_chosen": 2.0641348361968994, "log_odds_ratio": -0.35713502764701843, "logits/chosen": -0.6049674153327942, "logits/rejected": -0.9276546239852905, "logps/chosen": -0.7588516473770142, "logps/rejected": -2.318242311477661, "loss": 1.0063, "nll_loss": 0.9065924882888794, "rewards/accuracies": 0.75, "rewards/chosen": -0.07588517665863037, "rewards/margins": 0.15593905746936798, "rewards/rejected": -0.23182421922683716, "step": 4458 }, { "epoch": 2.720146408418484, "grad_norm": 2.52968430519104, "learning_rate": 3.6605021432945495e-06, "log_odds_chosen": 0.2911939024925232, "log_odds_ratio": -0.6485556364059448, "logits/chosen": -0.8097641468048096, "logits/rejected": -1.0410996675491333, "logps/chosen": -1.0931850671768188, "logps/rejected": -1.3296971321105957, "loss": 1.116, "nll_loss": 1.1671178340911865, "rewards/accuracies": 0.5, "rewards/chosen": -0.10931850969791412, "rewards/margins": 0.023651206865906715, "rewards/rejected": -0.1329697221517563, "step": 4459 }, { "epoch": 2.720756443495501, "grad_norm": 1.6028423309326172, "learning_rate": 3.659522351500306e-06, "log_odds_chosen": 0.5587695240974426, "log_odds_ratio": -0.5981687903404236, "logits/chosen": -0.8847002983093262, "logits/rejected": -0.8836068511009216, "logps/chosen": -0.8901692628860474, "logps/rejected": -1.2666233777999878, "loss": 1.0248, "nll_loss": 1.1091258525848389, "rewards/accuracies": 0.75, "rewards/chosen": -0.08901692926883698, "rewards/margins": 0.03764541447162628, "rewards/rejected": -0.12666234374046326, "step": 4460 }, { "epoch": 2.721366478572518, "grad_norm": 2.4117214679718018, "learning_rate": 3.6585425597060622e-06, "log_odds_chosen": 1.3113200664520264, "log_odds_ratio": -0.5648108124732971, "logits/chosen": -0.8987307548522949, "logits/rejected": -1.0011199712753296, "logps/chosen": -1.001611590385437, "logps/rejected": -2.0777132511138916, "loss": 1.0619, "nll_loss": 1.2228174209594727, "rewards/accuracies": 0.5, "rewards/chosen": -0.10016116499900818, "rewards/margins": 0.10761015862226486, "rewards/rejected": -0.20777131617069244, "step": 4461 }, { "epoch": 2.721976513649535, "grad_norm": 1.5214807987213135, "learning_rate": 3.6575627679118183e-06, "log_odds_chosen": 0.7323627471923828, "log_odds_ratio": -0.4660937488079071, "logits/chosen": -1.1531614065170288, "logits/rejected": -1.025775671005249, "logps/chosen": -0.806472659111023, "logps/rejected": -1.3208515644073486, "loss": 1.1123, "nll_loss": 1.0845935344696045, "rewards/accuracies": 0.75, "rewards/chosen": -0.08064725995063782, "rewards/margins": 0.05143789201974869, "rewards/rejected": -0.1320851594209671, "step": 4462 }, { "epoch": 2.722586548726552, "grad_norm": 1.1051548719406128, "learning_rate": 3.656582976117575e-06, "log_odds_chosen": 1.166309118270874, "log_odds_ratio": -0.4304657280445099, "logits/chosen": -0.9199652075767517, "logits/rejected": -0.8833692073822021, "logps/chosen": -0.9487044811248779, "logps/rejected": -1.8783650398254395, "loss": 1.0809, "nll_loss": 1.214051604270935, "rewards/accuracies": 0.875, "rewards/chosen": -0.09487044811248779, "rewards/margins": 0.09296604990959167, "rewards/rejected": -0.18783649802207947, "step": 4463 }, { "epoch": 2.7231965838035688, "grad_norm": 6.448940753936768, "learning_rate": 3.6556031843233314e-06, "log_odds_chosen": 2.167851686477661, "log_odds_ratio": -0.41110894083976746, "logits/chosen": -0.7906157374382019, "logits/rejected": -0.9180101156234741, "logps/chosen": -0.9149738550186157, "logps/rejected": -2.564533233642578, "loss": 1.0397, "nll_loss": 1.0685545206069946, "rewards/accuracies": 0.75, "rewards/chosen": -0.09149739146232605, "rewards/margins": 0.16495594382286072, "rewards/rejected": -0.25645333528518677, "step": 4464 }, { "epoch": 2.7238066188805856, "grad_norm": 2.9476685523986816, "learning_rate": 3.654623392529087e-06, "log_odds_chosen": 1.935175895690918, "log_odds_ratio": -0.42639005184173584, "logits/chosen": -0.9554523229598999, "logits/rejected": -1.0360603332519531, "logps/chosen": -0.9265608787536621, "logps/rejected": -2.4357411861419678, "loss": 1.097, "nll_loss": 1.1988167762756348, "rewards/accuracies": 0.875, "rewards/chosen": -0.09265609085559845, "rewards/margins": 0.15091803669929504, "rewards/rejected": -0.2435741126537323, "step": 4465 }, { "epoch": 2.7244166539576025, "grad_norm": 3.3573360443115234, "learning_rate": 3.6536436007348437e-06, "log_odds_chosen": 2.5001142024993896, "log_odds_ratio": -0.30335718393325806, "logits/chosen": -0.7914509773254395, "logits/rejected": -0.9689391851425171, "logps/chosen": -0.6958924531936646, "logps/rejected": -2.556892156600952, "loss": 1.1059, "nll_loss": 1.0301506519317627, "rewards/accuracies": 0.875, "rewards/chosen": -0.06958924233913422, "rewards/margins": 0.18609997630119324, "rewards/rejected": -0.25568920373916626, "step": 4466 }, { "epoch": 2.7250266890346193, "grad_norm": 2.909147024154663, "learning_rate": 3.6526638089406002e-06, "log_odds_chosen": 1.8994699716567993, "log_odds_ratio": -0.2522818446159363, "logits/chosen": -0.6179653406143188, "logits/rejected": -0.8099607229232788, "logps/chosen": -0.6383575797080994, "logps/rejected": -2.0095810890197754, "loss": 1.0946, "nll_loss": 0.9413196444511414, "rewards/accuracies": 1.0, "rewards/chosen": -0.0638357549905777, "rewards/margins": 0.13712236285209656, "rewards/rejected": -0.20095811784267426, "step": 4467 }, { "epoch": 2.725636724111636, "grad_norm": 2.3078975677490234, "learning_rate": 3.651684017146356e-06, "log_odds_chosen": 1.9777907133102417, "log_odds_ratio": -0.31226277351379395, "logits/chosen": -0.724919319152832, "logits/rejected": -0.8451602458953857, "logps/chosen": -0.8656022548675537, "logps/rejected": -2.4633007049560547, "loss": 1.1227, "nll_loss": 1.1072051525115967, "rewards/accuracies": 0.875, "rewards/chosen": -0.0865602195262909, "rewards/margins": 0.15976984798908234, "rewards/rejected": -0.24633006751537323, "step": 4468 }, { "epoch": 2.7262467591886534, "grad_norm": 8.047093391418457, "learning_rate": 3.6507042253521125e-06, "log_odds_chosen": 0.847011387348175, "log_odds_ratio": -0.6699075698852539, "logits/chosen": -1.1420891284942627, "logits/rejected": -1.0153281688690186, "logps/chosen": -1.1229407787322998, "logps/rejected": -1.8293429613113403, "loss": 1.0359, "nll_loss": 1.2362949848175049, "rewards/accuracies": 0.375, "rewards/chosen": -0.11229406297206879, "rewards/margins": 0.07064023613929749, "rewards/rejected": -0.18293431401252747, "step": 4469 }, { "epoch": 2.7268567942656703, "grad_norm": 1.0455408096313477, "learning_rate": 3.649724433557869e-06, "log_odds_chosen": 1.844180941581726, "log_odds_ratio": -0.3429078161716461, "logits/chosen": -0.7890971302986145, "logits/rejected": -0.7517831325531006, "logps/chosen": -0.7588518261909485, "logps/rejected": -2.1727538108825684, "loss": 0.8457, "nll_loss": 0.903579592704773, "rewards/accuracies": 0.75, "rewards/chosen": -0.07588518410921097, "rewards/margins": 0.14139017462730408, "rewards/rejected": -0.21727538108825684, "step": 4470 }, { "epoch": 2.727466829342687, "grad_norm": 2.0285189151763916, "learning_rate": 3.648744641763625e-06, "log_odds_chosen": 2.6000070571899414, "log_odds_ratio": -0.5907086730003357, "logits/chosen": -0.8358389735221863, "logits/rejected": -0.9766702651977539, "logps/chosen": -0.6573663353919983, "logps/rejected": -2.813845157623291, "loss": 1.0127, "nll_loss": 0.9626476764678955, "rewards/accuracies": 0.625, "rewards/chosen": -0.06573663651943207, "rewards/margins": 0.2156478762626648, "rewards/rejected": -0.28138449788093567, "step": 4471 }, { "epoch": 2.728076864419704, "grad_norm": 5.241933345794678, "learning_rate": 3.6477648499693813e-06, "log_odds_chosen": -0.31503212451934814, "log_odds_ratio": -0.9437001943588257, "logits/chosen": -0.7270970344543457, "logits/rejected": -0.5509023070335388, "logps/chosen": -1.022447109222412, "logps/rejected": -0.8068273067474365, "loss": 1.1453, "nll_loss": 1.0641778707504272, "rewards/accuracies": 0.25, "rewards/chosen": -0.10224471986293793, "rewards/margins": -0.021561987698078156, "rewards/rejected": -0.08068273216485977, "step": 4472 }, { "epoch": 2.7286868994967213, "grad_norm": 1.0268871784210205, "learning_rate": 3.646785058175138e-06, "log_odds_chosen": 0.0966569036245346, "log_odds_ratio": -0.6987439393997192, "logits/chosen": -0.7697193622589111, "logits/rejected": -0.8333250284194946, "logps/chosen": -0.9059802293777466, "logps/rejected": -1.0016940832138062, "loss": 1.0929, "nll_loss": 1.211730718612671, "rewards/accuracies": 0.375, "rewards/chosen": -0.09059801697731018, "rewards/margins": 0.009571390226483345, "rewards/rejected": -0.10016941279172897, "step": 4473 }, { "epoch": 2.729296934573738, "grad_norm": 2.8090450763702393, "learning_rate": 3.645805266380894e-06, "log_odds_chosen": 2.642162799835205, "log_odds_ratio": -0.22911834716796875, "logits/chosen": -0.6829041242599487, "logits/rejected": -0.9701558351516724, "logps/chosen": -0.6147117614746094, "logps/rejected": -2.629624605178833, "loss": 0.9688, "nll_loss": 0.7603474855422974, "rewards/accuracies": 1.0, "rewards/chosen": -0.061471179127693176, "rewards/margins": 0.20149126648902893, "rewards/rejected": -0.2629624605178833, "step": 4474 }, { "epoch": 2.729906969650755, "grad_norm": 1.5081437826156616, "learning_rate": 3.64482547458665e-06, "log_odds_chosen": 0.47784075140953064, "log_odds_ratio": -0.5840345025062561, "logits/chosen": -0.7683609127998352, "logits/rejected": -0.7269346117973328, "logps/chosen": -0.8884011507034302, "logps/rejected": -1.115222692489624, "loss": 1.1031, "nll_loss": 1.2057654857635498, "rewards/accuracies": 0.625, "rewards/chosen": -0.08884011209011078, "rewards/margins": 0.022682148963212967, "rewards/rejected": -0.11152227222919464, "step": 4475 }, { "epoch": 2.730517004727772, "grad_norm": 2.221606492996216, "learning_rate": 3.643845682792406e-06, "log_odds_chosen": 0.11339728534221649, "log_odds_ratio": -0.765921413898468, "logits/chosen": -0.9321063756942749, "logits/rejected": -0.7460247874259949, "logps/chosen": -0.9957643151283264, "logps/rejected": -1.1011719703674316, "loss": 1.1422, "nll_loss": 1.2984939813613892, "rewards/accuracies": 0.375, "rewards/chosen": -0.099576435983181, "rewards/margins": 0.01054076012223959, "rewards/rejected": -0.11011719703674316, "step": 4476 }, { "epoch": 2.7311270398047887, "grad_norm": 1.2353569269180298, "learning_rate": 3.6428658909981627e-06, "log_odds_chosen": 1.0884250402450562, "log_odds_ratio": -0.5373618006706238, "logits/chosen": -0.7058932781219482, "logits/rejected": -0.7001562714576721, "logps/chosen": -0.7378255128860474, "logps/rejected": -1.4770257472991943, "loss": 1.0835, "nll_loss": 1.2121477127075195, "rewards/accuracies": 0.5, "rewards/chosen": -0.0737825557589531, "rewards/margins": 0.07392002642154694, "rewards/rejected": -0.14770257472991943, "step": 4477 }, { "epoch": 2.7317370748818055, "grad_norm": 2.1137611865997314, "learning_rate": 3.6418860992039193e-06, "log_odds_chosen": 0.8079016208648682, "log_odds_ratio": -0.5303670167922974, "logits/chosen": -0.8174623847007751, "logits/rejected": -0.8283852338790894, "logps/chosen": -1.115647554397583, "logps/rejected": -1.6084620952606201, "loss": 1.194, "nll_loss": 1.1935036182403564, "rewards/accuracies": 0.75, "rewards/chosen": -0.1115647554397583, "rewards/margins": 0.049281470477581024, "rewards/rejected": -0.16084623336791992, "step": 4478 }, { "epoch": 2.732347109958823, "grad_norm": 1.3159180879592896, "learning_rate": 3.640906307409675e-06, "log_odds_chosen": 1.2604930400848389, "log_odds_ratio": -0.6344633102416992, "logits/chosen": -0.9776784777641296, "logits/rejected": -0.8979732990264893, "logps/chosen": -0.9027295112609863, "logps/rejected": -1.9377378225326538, "loss": 1.0744, "nll_loss": 1.105906367301941, "rewards/accuracies": 0.625, "rewards/chosen": -0.09027295559644699, "rewards/margins": 0.1035008355975151, "rewards/rejected": -0.1937737911939621, "step": 4479 }, { "epoch": 2.7329571450358396, "grad_norm": 1.5256706476211548, "learning_rate": 3.6399265156154315e-06, "log_odds_chosen": 2.601935863494873, "log_odds_ratio": -0.2868964672088623, "logits/chosen": -0.6024302244186401, "logits/rejected": -0.7840237021446228, "logps/chosen": -0.5187587738037109, "logps/rejected": -2.357363224029541, "loss": 0.857, "nll_loss": 0.7960096597671509, "rewards/accuracies": 0.875, "rewards/chosen": -0.05187588185071945, "rewards/margins": 0.1838604211807251, "rewards/rejected": -0.23573632538318634, "step": 4480 }, { "epoch": 2.7335671801128565, "grad_norm": 2.5430972576141357, "learning_rate": 3.638946723821188e-06, "log_odds_chosen": 1.1873711347579956, "log_odds_ratio": -0.47877976298332214, "logits/chosen": -0.7606877088546753, "logits/rejected": -0.8404984474182129, "logps/chosen": -0.8031490445137024, "logps/rejected": -1.6368248462677002, "loss": 1.099, "nll_loss": 0.9381202459335327, "rewards/accuracies": 0.75, "rewards/chosen": -0.08031490445137024, "rewards/margins": 0.08336757868528366, "rewards/rejected": -0.1636824905872345, "step": 4481 }, { "epoch": 2.7341772151898733, "grad_norm": 2.043013095855713, "learning_rate": 3.6379669320269437e-06, "log_odds_chosen": 0.7480227947235107, "log_odds_ratio": -0.5522167682647705, "logits/chosen": -0.8647995591163635, "logits/rejected": -0.6371312737464905, "logps/chosen": -0.8168506622314453, "logps/rejected": -1.2963759899139404, "loss": 1.0458, "nll_loss": 1.0412970781326294, "rewards/accuracies": 0.625, "rewards/chosen": -0.08168507367372513, "rewards/margins": 0.04795253276824951, "rewards/rejected": -0.12963759899139404, "step": 4482 }, { "epoch": 2.7347872502668906, "grad_norm": 2.7812654972076416, "learning_rate": 3.6369871402327003e-06, "log_odds_chosen": 3.54390549659729, "log_odds_ratio": -0.24046871066093445, "logits/chosen": -0.5493175387382507, "logits/rejected": -0.9422698020935059, "logps/chosen": -0.5977143049240112, "logps/rejected": -3.2880146503448486, "loss": 1.2378, "nll_loss": 0.9949950575828552, "rewards/accuracies": 0.875, "rewards/chosen": -0.059771426022052765, "rewards/margins": 0.26903003454208374, "rewards/rejected": -0.3288014531135559, "step": 4483 }, { "epoch": 2.7353972853439075, "grad_norm": 2.171708106994629, "learning_rate": 3.636007348438457e-06, "log_odds_chosen": 3.3300912380218506, "log_odds_ratio": -0.3100324273109436, "logits/chosen": -0.8477009534835815, "logits/rejected": -1.1503069400787354, "logps/chosen": -0.850055992603302, "logps/rejected": -3.653330087661743, "loss": 1.1162, "nll_loss": 1.0775973796844482, "rewards/accuracies": 0.875, "rewards/chosen": -0.08500559628009796, "rewards/margins": 0.2803274095058441, "rewards/rejected": -0.36533302068710327, "step": 4484 }, { "epoch": 2.7360073204209243, "grad_norm": 1.7314302921295166, "learning_rate": 3.635027556644213e-06, "log_odds_chosen": 3.173983573913574, "log_odds_ratio": -0.36608952283859253, "logits/chosen": -0.6930524110794067, "logits/rejected": -0.8857035636901855, "logps/chosen": -0.6551303267478943, "logps/rejected": -3.2564384937286377, "loss": 0.9544, "nll_loss": 0.7919254899024963, "rewards/accuracies": 0.875, "rewards/chosen": -0.06551303714513779, "rewards/margins": 0.26013079285621643, "rewards/rejected": -0.3256438374519348, "step": 4485 }, { "epoch": 2.736617355497941, "grad_norm": 2.8835599422454834, "learning_rate": 3.634047764849969e-06, "log_odds_chosen": 1.8972266912460327, "log_odds_ratio": -0.34478747844696045, "logits/chosen": -0.7659354209899902, "logits/rejected": -0.8282055854797363, "logps/chosen": -0.7701879739761353, "logps/rejected": -2.145284414291382, "loss": 1.0698, "nll_loss": 0.9124386310577393, "rewards/accuracies": 0.75, "rewards/chosen": -0.07701879739761353, "rewards/margins": 0.13750965893268585, "rewards/rejected": -0.21452844142913818, "step": 4486 }, { "epoch": 2.737227390574958, "grad_norm": 1.6962029933929443, "learning_rate": 3.6330679730557256e-06, "log_odds_chosen": 2.054133653640747, "log_odds_ratio": -0.31260547041893005, "logits/chosen": -0.6896231174468994, "logits/rejected": -0.9455153942108154, "logps/chosen": -0.8528178334236145, "logps/rejected": -2.3802309036254883, "loss": 0.9942, "nll_loss": 0.9854002594947815, "rewards/accuracies": 0.75, "rewards/chosen": -0.08528178185224533, "rewards/margins": 0.15274131298065186, "rewards/rejected": -0.2380230873823166, "step": 4487 }, { "epoch": 2.737837425651975, "grad_norm": 2.4818198680877686, "learning_rate": 3.6320881812614818e-06, "log_odds_chosen": 0.6496773958206177, "log_odds_ratio": -0.5077558755874634, "logits/chosen": -0.6832705736160278, "logits/rejected": -0.6802708506584167, "logps/chosen": -0.691271185874939, "logps/rejected": -1.1040711402893066, "loss": 1.1267, "nll_loss": 0.9048608541488647, "rewards/accuracies": 0.625, "rewards/chosen": -0.06912711262702942, "rewards/margins": 0.041280001401901245, "rewards/rejected": -0.11040712147951126, "step": 4488 }, { "epoch": 2.7384474607289917, "grad_norm": 1.7548445463180542, "learning_rate": 3.631108389467238e-06, "log_odds_chosen": 1.1607953310012817, "log_odds_ratio": -0.36393260955810547, "logits/chosen": -0.7861928939819336, "logits/rejected": -0.9610849022865295, "logps/chosen": -0.7361633777618408, "logps/rejected": -1.3948132991790771, "loss": 1.1564, "nll_loss": 1.335228443145752, "rewards/accuracies": 0.875, "rewards/chosen": -0.07361634075641632, "rewards/margins": 0.06586501002311707, "rewards/rejected": -0.1394813358783722, "step": 4489 }, { "epoch": 2.739057495806009, "grad_norm": 14.346057891845703, "learning_rate": 3.6301285976729944e-06, "log_odds_chosen": 1.6888835430145264, "log_odds_ratio": -0.43973565101623535, "logits/chosen": -0.6206508278846741, "logits/rejected": -0.7788050174713135, "logps/chosen": -0.7816049456596375, "logps/rejected": -2.140326738357544, "loss": 0.8969, "nll_loss": 0.8867336511611938, "rewards/accuracies": 0.625, "rewards/chosen": -0.07816050201654434, "rewards/margins": 0.13587217032909393, "rewards/rejected": -0.21403267979621887, "step": 4490 }, { "epoch": 2.739667530883026, "grad_norm": 8.882654190063477, "learning_rate": 3.6291488058787505e-06, "log_odds_chosen": 3.084550380706787, "log_odds_ratio": -0.15221111476421356, "logits/chosen": -0.8217269778251648, "logits/rejected": -1.0664379596710205, "logps/chosen": -0.6060687303543091, "logps/rejected": -2.9825801849365234, "loss": 0.9731, "nll_loss": 0.9561965465545654, "rewards/accuracies": 1.0, "rewards/chosen": -0.060606878250837326, "rewards/margins": 0.23765115439891815, "rewards/rejected": -0.2982580363750458, "step": 4491 }, { "epoch": 2.7402775659600427, "grad_norm": 1.1341873407363892, "learning_rate": 3.628169014084507e-06, "log_odds_chosen": 2.485133647918701, "log_odds_ratio": -0.29979515075683594, "logits/chosen": -0.7282264232635498, "logits/rejected": -0.7794475555419922, "logps/chosen": -0.7075701951980591, "logps/rejected": -2.71655011177063, "loss": 1.0196, "nll_loss": 0.8814171552658081, "rewards/accuracies": 0.75, "rewards/chosen": -0.07075701653957367, "rewards/margins": 0.20089800655841827, "rewards/rejected": -0.27165502309799194, "step": 4492 }, { "epoch": 2.7408876010370595, "grad_norm": 0.9033165574073792, "learning_rate": 3.627189222290263e-06, "log_odds_chosen": 1.099015235900879, "log_odds_ratio": -0.3981391191482544, "logits/chosen": -0.8834443688392639, "logits/rejected": -0.962395429611206, "logps/chosen": -0.9727477431297302, "logps/rejected": -1.678841233253479, "loss": 1.0889, "nll_loss": 1.086031198501587, "rewards/accuracies": 0.75, "rewards/chosen": -0.0972747653722763, "rewards/margins": 0.07060935348272324, "rewards/rejected": -0.16788412630558014, "step": 4493 }, { "epoch": 2.741497636114077, "grad_norm": 2.5490736961364746, "learning_rate": 3.6262094304960193e-06, "log_odds_chosen": 3.160247325897217, "log_odds_ratio": -0.37584951519966125, "logits/chosen": -0.9363213777542114, "logits/rejected": -1.028510570526123, "logps/chosen": -0.6904697418212891, "logps/rejected": -3.2181644439697266, "loss": 1.0062, "nll_loss": 0.8079980611801147, "rewards/accuracies": 0.875, "rewards/chosen": -0.0690469741821289, "rewards/margins": 0.25276947021484375, "rewards/rejected": -0.32181644439697266, "step": 4494 }, { "epoch": 2.7421076711910937, "grad_norm": 2.778822898864746, "learning_rate": 3.625229638701776e-06, "log_odds_chosen": 1.1784229278564453, "log_odds_ratio": -0.5708942413330078, "logits/chosen": -0.7808802723884583, "logits/rejected": -0.904869556427002, "logps/chosen": -0.7065691947937012, "logps/rejected": -1.6523351669311523, "loss": 1.051, "nll_loss": 0.8292941451072693, "rewards/accuracies": 0.5, "rewards/chosen": -0.070656917989254, "rewards/margins": 0.09457659721374512, "rewards/rejected": -0.16523350775241852, "step": 4495 }, { "epoch": 2.7427177062681105, "grad_norm": 1.1709293127059937, "learning_rate": 3.6242498469075316e-06, "log_odds_chosen": 0.6370337605476379, "log_odds_ratio": -0.5804184079170227, "logits/chosen": -0.9198912978172302, "logits/rejected": -1.03302800655365, "logps/chosen": -0.8666607141494751, "logps/rejected": -1.316300630569458, "loss": 1.0125, "nll_loss": 1.0694761276245117, "rewards/accuracies": 0.5, "rewards/chosen": -0.08666607737541199, "rewards/margins": 0.044963981956243515, "rewards/rejected": -0.1316300630569458, "step": 4496 }, { "epoch": 2.7433277413451274, "grad_norm": 0.8607810139656067, "learning_rate": 3.623270055113288e-06, "log_odds_chosen": 1.4018179178237915, "log_odds_ratio": -0.8726822137832642, "logits/chosen": -0.9336815476417542, "logits/rejected": -0.9848121404647827, "logps/chosen": -1.111356258392334, "logps/rejected": -2.3069586753845215, "loss": 1.1741, "nll_loss": 1.3782947063446045, "rewards/accuracies": 0.375, "rewards/chosen": -0.11113561689853668, "rewards/margins": 0.11956024914979935, "rewards/rejected": -0.23069587349891663, "step": 4497 }, { "epoch": 2.743937776422144, "grad_norm": 1.4570499658584595, "learning_rate": 3.6222902633190447e-06, "log_odds_chosen": 1.4447070360183716, "log_odds_ratio": -0.5287810564041138, "logits/chosen": -0.89622962474823, "logits/rejected": -0.8918663263320923, "logps/chosen": -0.8736134171485901, "logps/rejected": -2.0140552520751953, "loss": 1.082, "nll_loss": 0.9393613338470459, "rewards/accuracies": 0.625, "rewards/chosen": -0.08736133575439453, "rewards/margins": 0.1140441745519638, "rewards/rejected": -0.20140551030635834, "step": 4498 }, { "epoch": 2.744547811499161, "grad_norm": 1.1924904584884644, "learning_rate": 3.621310471524801e-06, "log_odds_chosen": 1.8779691457748413, "log_odds_ratio": -0.4502089321613312, "logits/chosen": -0.7681685090065002, "logits/rejected": -0.8252345323562622, "logps/chosen": -0.5854964256286621, "logps/rejected": -2.048164129257202, "loss": 1.0248, "nll_loss": 0.8777456283569336, "rewards/accuracies": 0.875, "rewards/chosen": -0.05854965001344681, "rewards/margins": 0.14626675844192505, "rewards/rejected": -0.20481641590595245, "step": 4499 }, { "epoch": 2.745157846576178, "grad_norm": 1.2250064611434937, "learning_rate": 3.620330679730557e-06, "log_odds_chosen": 0.7942379117012024, "log_odds_ratio": -0.6226769089698792, "logits/chosen": -1.0091309547424316, "logits/rejected": -1.0065921545028687, "logps/chosen": -0.9425297975540161, "logps/rejected": -1.6066398620605469, "loss": 1.0475, "nll_loss": 1.0994445085525513, "rewards/accuracies": 0.5, "rewards/chosen": -0.09425297379493713, "rewards/margins": 0.06641100347042084, "rewards/rejected": -0.16066397726535797, "step": 4500 }, { "epoch": 2.745767881653195, "grad_norm": 1.945708155632019, "learning_rate": 3.6193508879363135e-06, "log_odds_chosen": 1.1877366304397583, "log_odds_ratio": -0.4409082531929016, "logits/chosen": -1.0070401430130005, "logits/rejected": -0.9900612831115723, "logps/chosen": -1.0711156129837036, "logps/rejected": -1.9404445886611938, "loss": 1.0066, "nll_loss": 1.3280229568481445, "rewards/accuracies": 0.75, "rewards/chosen": -0.10711156576871872, "rewards/margins": 0.08693289756774902, "rewards/rejected": -0.19404447078704834, "step": 4501 }, { "epoch": 2.746377916730212, "grad_norm": 3.8329713344573975, "learning_rate": 3.6183710961420696e-06, "log_odds_chosen": 3.0994205474853516, "log_odds_ratio": -0.3254117965698242, "logits/chosen": -0.7375513315200806, "logits/rejected": -0.9333488941192627, "logps/chosen": -0.7560895681381226, "logps/rejected": -3.2731971740722656, "loss": 1.1289, "nll_loss": 0.9874796867370605, "rewards/accuracies": 0.875, "rewards/chosen": -0.07560896128416061, "rewards/margins": 0.25171077251434326, "rewards/rejected": -0.3273197412490845, "step": 4502 }, { "epoch": 2.746987951807229, "grad_norm": 9.587895393371582, "learning_rate": 3.6173913043478257e-06, "log_odds_chosen": 0.9426158666610718, "log_odds_ratio": -0.6069592237472534, "logits/chosen": -1.0938515663146973, "logits/rejected": -1.0284932851791382, "logps/chosen": -0.9172128438949585, "logps/rejected": -1.7457664012908936, "loss": 1.1341, "nll_loss": 1.2895222902297974, "rewards/accuracies": 0.625, "rewards/chosen": -0.09172128140926361, "rewards/margins": 0.08285535871982574, "rewards/rejected": -0.17457664012908936, "step": 4503 }, { "epoch": 2.7475979868842457, "grad_norm": 4.368526935577393, "learning_rate": 3.6164115125535822e-06, "log_odds_chosen": 1.1262519359588623, "log_odds_ratio": -0.6103991270065308, "logits/chosen": -0.8267950415611267, "logits/rejected": -0.8659642934799194, "logps/chosen": -1.1412359476089478, "logps/rejected": -2.1220717430114746, "loss": 1.2404, "nll_loss": 1.250167965888977, "rewards/accuracies": 0.5, "rewards/chosen": -0.11412359774112701, "rewards/margins": 0.09808357059955597, "rewards/rejected": -0.21220716834068298, "step": 4504 }, { "epoch": 2.748208021961263, "grad_norm": 1.4943859577178955, "learning_rate": 3.6154317207593384e-06, "log_odds_chosen": 1.9775876998901367, "log_odds_ratio": -0.49636852741241455, "logits/chosen": -0.7824665307998657, "logits/rejected": -0.8700551390647888, "logps/chosen": -0.593062162399292, "logps/rejected": -1.9814116954803467, "loss": 0.9376, "nll_loss": 0.97324538230896, "rewards/accuracies": 0.625, "rewards/chosen": -0.05930621176958084, "rewards/margins": 0.13883495330810547, "rewards/rejected": -0.1981411725282669, "step": 4505 }, { "epoch": 2.74881805703828, "grad_norm": 1.4452522993087769, "learning_rate": 3.614451928965095e-06, "log_odds_chosen": 1.602756381034851, "log_odds_ratio": -0.2953494191169739, "logits/chosen": -0.6598522663116455, "logits/rejected": -0.8368754982948303, "logps/chosen": -0.6628307104110718, "logps/rejected": -1.5918912887573242, "loss": 0.977, "nll_loss": 0.982444703578949, "rewards/accuracies": 0.875, "rewards/chosen": -0.06628307700157166, "rewards/margins": 0.09290607273578644, "rewards/rejected": -0.1591891348361969, "step": 4506 }, { "epoch": 2.7494280921152967, "grad_norm": 2.677161455154419, "learning_rate": 3.613472137170851e-06, "log_odds_chosen": 1.810810923576355, "log_odds_ratio": -0.42400261759757996, "logits/chosen": -0.9784546494483948, "logits/rejected": -0.9299613237380981, "logps/chosen": -1.038899540901184, "logps/rejected": -2.6177735328674316, "loss": 1.0918, "nll_loss": 1.1828253269195557, "rewards/accuracies": 0.75, "rewards/chosen": -0.10388995707035065, "rewards/margins": 0.15788739919662476, "rewards/rejected": -0.2617773711681366, "step": 4507 }, { "epoch": 2.7500381271923136, "grad_norm": 1.5391865968704224, "learning_rate": 3.612492345376607e-06, "log_odds_chosen": 3.8475685119628906, "log_odds_ratio": -0.09845415502786636, "logits/chosen": -0.6067884564399719, "logits/rejected": -0.7597990036010742, "logps/chosen": -0.5141251087188721, "logps/rejected": -3.5111818313598633, "loss": 0.9863, "nll_loss": 0.8334500193595886, "rewards/accuracies": 1.0, "rewards/chosen": -0.051412515342235565, "rewards/margins": 0.2997056543827057, "rewards/rejected": -0.35111817717552185, "step": 4508 }, { "epoch": 2.7506481622693304, "grad_norm": 1.3570008277893066, "learning_rate": 3.6115125535823637e-06, "log_odds_chosen": 1.7578868865966797, "log_odds_ratio": -0.5478554964065552, "logits/chosen": -1.0469081401824951, "logits/rejected": -1.0339797735214233, "logps/chosen": -1.1341595649719238, "logps/rejected": -2.79055118560791, "loss": 1.0649, "nll_loss": 1.144494652748108, "rewards/accuracies": 0.75, "rewards/chosen": -0.11341595649719238, "rewards/margins": 0.16563914716243744, "rewards/rejected": -0.279055118560791, "step": 4509 }, { "epoch": 2.7512581973463472, "grad_norm": 2.924931287765503, "learning_rate": 3.6105327617881203e-06, "log_odds_chosen": 2.148858070373535, "log_odds_ratio": -0.31447792053222656, "logits/chosen": -0.8882300853729248, "logits/rejected": -0.8999150991439819, "logps/chosen": -0.9186769127845764, "logps/rejected": -2.5811853408813477, "loss": 1.1491, "nll_loss": 1.1445363759994507, "rewards/accuracies": 0.75, "rewards/chosen": -0.09186768531799316, "rewards/margins": 0.16625085473060608, "rewards/rejected": -0.25811854004859924, "step": 4510 }, { "epoch": 2.751868232423364, "grad_norm": 1.5590356588363647, "learning_rate": 3.609552969993876e-06, "log_odds_chosen": 2.2341794967651367, "log_odds_ratio": -0.2933844327926636, "logits/chosen": -0.7597595453262329, "logits/rejected": -1.0340603590011597, "logps/chosen": -0.6198563575744629, "logps/rejected": -2.343630313873291, "loss": 0.9412, "nll_loss": 0.7252757549285889, "rewards/accuracies": 0.875, "rewards/chosen": -0.06198563426733017, "rewards/margins": 0.17237740755081177, "rewards/rejected": -0.23436304926872253, "step": 4511 }, { "epoch": 2.7524782675003814, "grad_norm": 1.9893547296524048, "learning_rate": 3.6085731781996325e-06, "log_odds_chosen": 2.7439732551574707, "log_odds_ratio": -0.24028468132019043, "logits/chosen": -0.3522855043411255, "logits/rejected": -0.6319018602371216, "logps/chosen": -0.5745861530303955, "logps/rejected": -2.580169200897217, "loss": 1.0047, "nll_loss": 0.7927063703536987, "rewards/accuracies": 0.875, "rewards/chosen": -0.05745861679315567, "rewards/margins": 0.20055833458900452, "rewards/rejected": -0.2580169439315796, "step": 4512 }, { "epoch": 2.7530883025773982, "grad_norm": 1.5082303285598755, "learning_rate": 3.607593386405389e-06, "log_odds_chosen": 2.1143741607666016, "log_odds_ratio": -0.5146304965019226, "logits/chosen": -0.8366101980209351, "logits/rejected": -0.8691914081573486, "logps/chosen": -0.8750694990158081, "logps/rejected": -2.4861721992492676, "loss": 1.0979, "nll_loss": 1.022247076034546, "rewards/accuracies": 0.625, "rewards/chosen": -0.08750694990158081, "rewards/margins": 0.1611102670431137, "rewards/rejected": -0.24861721694469452, "step": 4513 }, { "epoch": 2.753698337654415, "grad_norm": 6.933584213256836, "learning_rate": 3.6066135946111447e-06, "log_odds_chosen": 2.226975917816162, "log_odds_ratio": -0.4425618052482605, "logits/chosen": -0.8543750047683716, "logits/rejected": -0.8911954164505005, "logps/chosen": -0.9561160802841187, "logps/rejected": -2.9173314571380615, "loss": 1.1129, "nll_loss": 1.1665562391281128, "rewards/accuracies": 0.75, "rewards/chosen": -0.09561161696910858, "rewards/margins": 0.19612154364585876, "rewards/rejected": -0.29173314571380615, "step": 4514 }, { "epoch": 2.754308372731432, "grad_norm": 1.7538228034973145, "learning_rate": 3.6056338028169013e-06, "log_odds_chosen": 1.486985445022583, "log_odds_ratio": -0.43669453263282776, "logits/chosen": -0.4142135977745056, "logits/rejected": -0.6014870405197144, "logps/chosen": -0.5388955473899841, "logps/rejected": -1.5423829555511475, "loss": 1.164, "nll_loss": 0.9319841861724854, "rewards/accuracies": 0.75, "rewards/chosen": -0.05388955771923065, "rewards/margins": 0.10034874826669693, "rewards/rejected": -0.15423829853534698, "step": 4515 }, { "epoch": 2.754918407808449, "grad_norm": 1.2753528356552124, "learning_rate": 3.6046540110226574e-06, "log_odds_chosen": 1.0771085023880005, "log_odds_ratio": -0.5184186100959778, "logits/chosen": -0.7063491344451904, "logits/rejected": -0.9379674792289734, "logps/chosen": -0.8023345470428467, "logps/rejected": -1.5105377435684204, "loss": 1.0237, "nll_loss": 1.107638955116272, "rewards/accuracies": 0.625, "rewards/chosen": -0.08023344725370407, "rewards/margins": 0.07082032412290573, "rewards/rejected": -0.1510537713766098, "step": 4516 }, { "epoch": 2.755528442885466, "grad_norm": 2.5882463455200195, "learning_rate": 3.603674219228414e-06, "log_odds_chosen": 1.3974175453186035, "log_odds_ratio": -0.6896685361862183, "logits/chosen": -0.8276250958442688, "logits/rejected": -0.904334545135498, "logps/chosen": -0.7192967534065247, "logps/rejected": -1.9596989154815674, "loss": 1.2363, "nll_loss": 1.3392261266708374, "rewards/accuracies": 0.375, "rewards/chosen": -0.0719296783208847, "rewards/margins": 0.12404021620750427, "rewards/rejected": -0.19596989452838898, "step": 4517 }, { "epoch": 2.756138477962483, "grad_norm": 2.909822702407837, "learning_rate": 3.60269442743417e-06, "log_odds_chosen": 1.2466657161712646, "log_odds_ratio": -0.5089503526687622, "logits/chosen": -0.7587242722511292, "logits/rejected": -0.807147204875946, "logps/chosen": -0.7558231949806213, "logps/rejected": -1.5200400352478027, "loss": 0.9455, "nll_loss": 0.9944794178009033, "rewards/accuracies": 0.625, "rewards/chosen": -0.07558232545852661, "rewards/margins": 0.07642168551683426, "rewards/rejected": -0.15200400352478027, "step": 4518 }, { "epoch": 2.7567485130394997, "grad_norm": 1.70335853099823, "learning_rate": 3.601714635639926e-06, "log_odds_chosen": 1.9801647663116455, "log_odds_ratio": -0.565919041633606, "logits/chosen": -1.0351601839065552, "logits/rejected": -1.0502231121063232, "logps/chosen": -0.8952322006225586, "logps/rejected": -2.579008102416992, "loss": 1.1561, "nll_loss": 1.2084815502166748, "rewards/accuracies": 0.5, "rewards/chosen": -0.08952322602272034, "rewards/margins": 0.1683775782585144, "rewards/rejected": -0.25790083408355713, "step": 4519 }, { "epoch": 2.7573585481165166, "grad_norm": 1.218206524848938, "learning_rate": 3.6007348438456827e-06, "log_odds_chosen": 1.9534732103347778, "log_odds_ratio": -0.4192626476287842, "logits/chosen": -0.8390029668807983, "logits/rejected": -0.9400256872177124, "logps/chosen": -0.7264819741249084, "logps/rejected": -2.2651941776275635, "loss": 1.0906, "nll_loss": 0.938290536403656, "rewards/accuracies": 0.75, "rewards/chosen": -0.07264819741249084, "rewards/margins": 0.15387120842933655, "rewards/rejected": -0.2265194207429886, "step": 4520 }, { "epoch": 2.7579685831935334, "grad_norm": 1.9499318599700928, "learning_rate": 3.599755052051439e-06, "log_odds_chosen": 1.8356988430023193, "log_odds_ratio": -0.3193868398666382, "logits/chosen": -0.6619105339050293, "logits/rejected": -0.6826986074447632, "logps/chosen": -0.6505159735679626, "logps/rejected": -1.9631340503692627, "loss": 0.8569, "nll_loss": 0.7004393339157104, "rewards/accuracies": 0.875, "rewards/chosen": -0.0650515928864479, "rewards/margins": 0.13126182556152344, "rewards/rejected": -0.19631341099739075, "step": 4521 }, { "epoch": 2.7585786182705503, "grad_norm": 1.5305603742599487, "learning_rate": 3.598775260257195e-06, "log_odds_chosen": 2.651669979095459, "log_odds_ratio": -0.3458634316921234, "logits/chosen": -0.6124020218849182, "logits/rejected": -0.7723791599273682, "logps/chosen": -0.5607059001922607, "logps/rejected": -2.4885385036468506, "loss": 0.8906, "nll_loss": 0.6501026749610901, "rewards/accuracies": 0.75, "rewards/chosen": -0.05607059597969055, "rewards/margins": 0.19278325140476227, "rewards/rejected": -0.24885384738445282, "step": 4522 }, { "epoch": 2.7591886533475676, "grad_norm": 2.6727941036224365, "learning_rate": 3.5977954684629515e-06, "log_odds_chosen": 2.3936166763305664, "log_odds_ratio": -0.36252954602241516, "logits/chosen": -0.8271429538726807, "logits/rejected": -0.9168968200683594, "logps/chosen": -0.8060967326164246, "logps/rejected": -2.6566081047058105, "loss": 1.001, "nll_loss": 0.9392107725143433, "rewards/accuracies": 0.875, "rewards/chosen": -0.08060967177152634, "rewards/margins": 0.18505114316940308, "rewards/rejected": -0.26566082239151, "step": 4523 }, { "epoch": 2.7597986884245844, "grad_norm": 1.3623950481414795, "learning_rate": 3.596815676668708e-06, "log_odds_chosen": 1.5181090831756592, "log_odds_ratio": -0.45304396748542786, "logits/chosen": -0.6845727562904358, "logits/rejected": -0.8524795174598694, "logps/chosen": -0.6597716212272644, "logps/rejected": -1.6425708532333374, "loss": 1.0665, "nll_loss": 0.9422961473464966, "rewards/accuracies": 0.75, "rewards/chosen": -0.06597715616226196, "rewards/margins": 0.0982799232006073, "rewards/rejected": -0.16425709426403046, "step": 4524 }, { "epoch": 2.7604087235016013, "grad_norm": 3.8954226970672607, "learning_rate": 3.5958358848744638e-06, "log_odds_chosen": 2.507065773010254, "log_odds_ratio": -0.5097953081130981, "logits/chosen": -0.9803866744041443, "logits/rejected": -1.058709740638733, "logps/chosen": -0.6180095672607422, "logps/rejected": -2.829530954360962, "loss": 1.1367, "nll_loss": 1.0827385187149048, "rewards/accuracies": 0.625, "rewards/chosen": -0.06180095672607422, "rewards/margins": 0.2211521565914154, "rewards/rejected": -0.28295308351516724, "step": 4525 }, { "epoch": 2.761018758578618, "grad_norm": 1.7701985836029053, "learning_rate": 3.5948560930802203e-06, "log_odds_chosen": 0.623394250869751, "log_odds_ratio": -0.5680480599403381, "logits/chosen": -0.8969308137893677, "logits/rejected": -0.8223096132278442, "logps/chosen": -0.946172833442688, "logps/rejected": -1.5305454730987549, "loss": 1.083, "nll_loss": 1.1165908575057983, "rewards/accuracies": 0.75, "rewards/chosen": -0.09461728483438492, "rewards/margins": 0.05843725800514221, "rewards/rejected": -0.15305453538894653, "step": 4526 }, { "epoch": 2.7616287936556354, "grad_norm": 1.7447423934936523, "learning_rate": 3.593876301285977e-06, "log_odds_chosen": 2.1003880500793457, "log_odds_ratio": -0.3177216053009033, "logits/chosen": -1.0318636894226074, "logits/rejected": -1.0583295822143555, "logps/chosen": -0.9574956297874451, "logps/rejected": -2.794179916381836, "loss": 1.1013, "nll_loss": 1.2024731636047363, "rewards/accuracies": 1.0, "rewards/chosen": -0.09574956446886063, "rewards/margins": 0.18366843461990356, "rewards/rejected": -0.2794179916381836, "step": 4527 }, { "epoch": 2.7622388287326523, "grad_norm": 1.8857698440551758, "learning_rate": 3.5928965094917326e-06, "log_odds_chosen": 0.7781990766525269, "log_odds_ratio": -0.4375361502170563, "logits/chosen": -0.9950112700462341, "logits/rejected": -1.0023435354232788, "logps/chosen": -0.9884351491928101, "logps/rejected": -1.5752623081207275, "loss": 1.0359, "nll_loss": 1.2079648971557617, "rewards/accuracies": 0.625, "rewards/chosen": -0.098843514919281, "rewards/margins": 0.05868270993232727, "rewards/rejected": -0.15752622485160828, "step": 4528 }, { "epoch": 2.762848863809669, "grad_norm": 7.8846025466918945, "learning_rate": 3.591916717697489e-06, "log_odds_chosen": 0.09056302905082703, "log_odds_ratio": -0.870563268661499, "logits/chosen": -0.9257006645202637, "logits/rejected": -0.77197265625, "logps/chosen": -1.3446078300476074, "logps/rejected": -1.3380866050720215, "loss": 1.288, "nll_loss": 1.2668044567108154, "rewards/accuracies": 0.375, "rewards/chosen": -0.13446079194545746, "rewards/margins": -0.0006521381437778473, "rewards/rejected": -0.13380864262580872, "step": 4529 }, { "epoch": 2.763458898886686, "grad_norm": 1.2183431386947632, "learning_rate": 3.5909369259032457e-06, "log_odds_chosen": 0.527089536190033, "log_odds_ratio": -0.6620395183563232, "logits/chosen": -1.0475729703903198, "logits/rejected": -1.122289776802063, "logps/chosen": -0.8891842365264893, "logps/rejected": -1.2682240009307861, "loss": 0.9332, "nll_loss": 1.1180570125579834, "rewards/accuracies": 0.5, "rewards/chosen": -0.08891842514276505, "rewards/margins": 0.03790396824479103, "rewards/rejected": -0.12682238221168518, "step": 4530 }, { "epoch": 2.764068933963703, "grad_norm": 1.6344434022903442, "learning_rate": 3.5899571341090018e-06, "log_odds_chosen": 2.92734432220459, "log_odds_ratio": -0.13162639737129211, "logits/chosen": -0.847109317779541, "logits/rejected": -0.9454329013824463, "logps/chosen": -0.5543597936630249, "logps/rejected": -2.581223487854004, "loss": 0.9664, "nll_loss": 0.7473626136779785, "rewards/accuracies": 1.0, "rewards/chosen": -0.05543598532676697, "rewards/margins": 0.2026863694190979, "rewards/rejected": -0.25812235474586487, "step": 4531 }, { "epoch": 2.7646789690407196, "grad_norm": 1.0922812223434448, "learning_rate": 3.588977342314758e-06, "log_odds_chosen": 1.795035481452942, "log_odds_ratio": -0.5220034122467041, "logits/chosen": -1.000386357307434, "logits/rejected": -1.0779139995574951, "logps/chosen": -0.7847307920455933, "logps/rejected": -2.1996192932128906, "loss": 1.0249, "nll_loss": 0.914612889289856, "rewards/accuracies": 0.5, "rewards/chosen": -0.07847307622432709, "rewards/margins": 0.14148886501789093, "rewards/rejected": -0.21996192634105682, "step": 4532 }, { "epoch": 2.765289004117737, "grad_norm": 3.391521692276001, "learning_rate": 3.5879975505205145e-06, "log_odds_chosen": 2.542367935180664, "log_odds_ratio": -0.22153574228286743, "logits/chosen": -0.7244137525558472, "logits/rejected": -0.8111954927444458, "logps/chosen": -0.6343016624450684, "logps/rejected": -2.5829708576202393, "loss": 1.0522, "nll_loss": 0.748681366443634, "rewards/accuracies": 1.0, "rewards/chosen": -0.06343016773462296, "rewards/margins": 0.19486692547798157, "rewards/rejected": -0.2582970857620239, "step": 4533 }, { "epoch": 2.7658990391947538, "grad_norm": 1.3396157026290894, "learning_rate": 3.5870177587262706e-06, "log_odds_chosen": 3.1485447883605957, "log_odds_ratio": -0.15851616859436035, "logits/chosen": -0.5451605319976807, "logits/rejected": -0.6692653298377991, "logps/chosen": -0.5118240118026733, "logps/rejected": -2.6415064334869385, "loss": 1.124, "nll_loss": 0.7853658199310303, "rewards/accuracies": 1.0, "rewards/chosen": -0.051182400435209274, "rewards/margins": 0.21296826004981995, "rewards/rejected": -0.2641506493091583, "step": 4534 }, { "epoch": 2.7665090742717706, "grad_norm": 2.1863584518432617, "learning_rate": 3.5860379669320267e-06, "log_odds_chosen": 4.158267974853516, "log_odds_ratio": -0.2168424427509308, "logits/chosen": -0.8857746124267578, "logits/rejected": -1.0432629585266113, "logps/chosen": -0.7036932706832886, "logps/rejected": -4.13435173034668, "loss": 0.9658, "nll_loss": 1.0073822736740112, "rewards/accuracies": 0.75, "rewards/chosen": -0.07036933302879333, "rewards/margins": 0.3430658280849457, "rewards/rejected": -0.413435161113739, "step": 4535 }, { "epoch": 2.7671191093487875, "grad_norm": 5.203588485717773, "learning_rate": 3.585058175137783e-06, "log_odds_chosen": 1.2830708026885986, "log_odds_ratio": -0.39319610595703125, "logits/chosen": -0.562900722026825, "logits/rejected": -0.7361935377120972, "logps/chosen": -0.5906674861907959, "logps/rejected": -1.438920021057129, "loss": 0.9057, "nll_loss": 0.7218616008758545, "rewards/accuracies": 1.0, "rewards/chosen": -0.05906674638390541, "rewards/margins": 0.08482526242733002, "rewards/rejected": -0.14389200508594513, "step": 4536 }, { "epoch": 2.7677291444258048, "grad_norm": 1.8859379291534424, "learning_rate": 3.5840783833435394e-06, "log_odds_chosen": 1.3112865686416626, "log_odds_ratio": -0.43597227334976196, "logits/chosen": -0.9743025302886963, "logits/rejected": -1.0595043897628784, "logps/chosen": -0.7287963032722473, "logps/rejected": -1.5608407258987427, "loss": 1.0811, "nll_loss": 1.1030123233795166, "rewards/accuracies": 0.75, "rewards/chosen": -0.07287963479757309, "rewards/margins": 0.08320443332195282, "rewards/rejected": -0.1560840755701065, "step": 4537 }, { "epoch": 2.7683391795028216, "grad_norm": 1.2549508810043335, "learning_rate": 3.583098591549296e-06, "log_odds_chosen": 0.6859893202781677, "log_odds_ratio": -0.7239716649055481, "logits/chosen": -0.9622832536697388, "logits/rejected": -1.0290114879608154, "logps/chosen": -1.0935969352722168, "logps/rejected": -1.730798363685608, "loss": 1.0554, "nll_loss": 1.07367742061615, "rewards/accuracies": 0.625, "rewards/chosen": -0.10935969650745392, "rewards/margins": 0.06372014433145523, "rewards/rejected": -0.17307984828948975, "step": 4538 }, { "epoch": 2.7689492145798384, "grad_norm": 1.777579665184021, "learning_rate": 3.5821187997550516e-06, "log_odds_chosen": 1.870033621788025, "log_odds_ratio": -0.3073546290397644, "logits/chosen": -0.787729024887085, "logits/rejected": -0.9575214385986328, "logps/chosen": -1.1672649383544922, "logps/rejected": -2.85192608833313, "loss": 1.0924, "nll_loss": 1.2602100372314453, "rewards/accuracies": 0.875, "rewards/chosen": -0.11672650277614594, "rewards/margins": 0.16846610605716705, "rewards/rejected": -0.285192608833313, "step": 4539 }, { "epoch": 2.7695592496568553, "grad_norm": 1.587547779083252, "learning_rate": 3.581139007960808e-06, "log_odds_chosen": 1.9894392490386963, "log_odds_ratio": -0.3580130934715271, "logits/chosen": -0.8844032287597656, "logits/rejected": -0.8705729842185974, "logps/chosen": -0.5617483258247375, "logps/rejected": -1.9359561204910278, "loss": 0.9918, "nll_loss": 0.9837920069694519, "rewards/accuracies": 0.75, "rewards/chosen": -0.056174833327531815, "rewards/margins": 0.13742077350616455, "rewards/rejected": -0.19359560310840607, "step": 4540 }, { "epoch": 2.770169284733872, "grad_norm": 1.2901427745819092, "learning_rate": 3.5801592161665647e-06, "log_odds_chosen": 1.3080912828445435, "log_odds_ratio": -0.3852362632751465, "logits/chosen": -0.7795307040214539, "logits/rejected": -0.8067098259925842, "logps/chosen": -0.6548045873641968, "logps/rejected": -1.351622462272644, "loss": 1.0844, "nll_loss": 1.1283602714538574, "rewards/accuracies": 0.75, "rewards/chosen": -0.06548045575618744, "rewards/margins": 0.06968178600072861, "rewards/rejected": -0.13516224920749664, "step": 4541 }, { "epoch": 2.770779319810889, "grad_norm": 1.480312466621399, "learning_rate": 3.5791794243723204e-06, "log_odds_chosen": 1.016926884651184, "log_odds_ratio": -0.5403215289115906, "logits/chosen": -0.9771369695663452, "logits/rejected": -0.9820676445960999, "logps/chosen": -0.9441990256309509, "logps/rejected": -1.7163149118423462, "loss": 1.2177, "nll_loss": 1.1631685495376587, "rewards/accuracies": 0.625, "rewards/chosen": -0.09441990405321121, "rewards/margins": 0.07721159607172012, "rewards/rejected": -0.17163150012493134, "step": 4542 }, { "epoch": 2.771389354887906, "grad_norm": 1.1389350891113281, "learning_rate": 3.578199632578077e-06, "log_odds_chosen": 1.2573798894882202, "log_odds_ratio": -0.49962401390075684, "logits/chosen": -0.833600640296936, "logits/rejected": -0.7816867232322693, "logps/chosen": -0.6910656690597534, "logps/rejected": -1.5591496229171753, "loss": 1.2254, "nll_loss": 1.1281641721725464, "rewards/accuracies": 0.625, "rewards/chosen": -0.0691065788269043, "rewards/margins": 0.08680839836597443, "rewards/rejected": -0.15591497719287872, "step": 4543 }, { "epoch": 2.771999389964923, "grad_norm": 1.2853455543518066, "learning_rate": 3.5772198407838335e-06, "log_odds_chosen": 1.6331647634506226, "log_odds_ratio": -0.44398558139801025, "logits/chosen": -0.8870428800582886, "logits/rejected": -0.864324688911438, "logps/chosen": -0.9834945201873779, "logps/rejected": -2.2057607173919678, "loss": 1.1398, "nll_loss": 1.0793217420578003, "rewards/accuracies": 0.625, "rewards/chosen": -0.0983494445681572, "rewards/margins": 0.12222663313150406, "rewards/rejected": -0.22057607769966125, "step": 4544 }, { "epoch": 2.77260942504194, "grad_norm": 1.4812661409378052, "learning_rate": 3.5762400489895896e-06, "log_odds_chosen": 3.3275012969970703, "log_odds_ratio": -0.1487162709236145, "logits/chosen": -0.5463894605636597, "logits/rejected": -0.47831225395202637, "logps/chosen": -0.5091013312339783, "logps/rejected": -2.9909725189208984, "loss": 0.9791, "nll_loss": 0.7359068393707275, "rewards/accuracies": 0.875, "rewards/chosen": -0.050910137593746185, "rewards/margins": 0.2481871247291565, "rewards/rejected": -0.2990972697734833, "step": 4545 }, { "epoch": 2.773219460118957, "grad_norm": 1.8788909912109375, "learning_rate": 3.5752602571953457e-06, "log_odds_chosen": 2.9788453578948975, "log_odds_ratio": -0.32703253626823425, "logits/chosen": -0.9591073989868164, "logits/rejected": -1.0616525411605835, "logps/chosen": -0.8584623336791992, "logps/rejected": -3.4636788368225098, "loss": 1.2485, "nll_loss": 1.0844330787658691, "rewards/accuracies": 0.875, "rewards/chosen": -0.08584623783826828, "rewards/margins": 0.26052168011665344, "rewards/rejected": -0.34636789560317993, "step": 4546 }, { "epoch": 2.7738294951959737, "grad_norm": 2.0150365829467773, "learning_rate": 3.5742804654011023e-06, "log_odds_chosen": 2.607832193374634, "log_odds_ratio": -0.4225206673145294, "logits/chosen": -0.922831118106842, "logits/rejected": -0.8604767322540283, "logps/chosen": -1.1810132265090942, "logps/rejected": -3.508011817932129, "loss": 1.004, "nll_loss": 1.275389313697815, "rewards/accuracies": 0.875, "rewards/chosen": -0.1181013286113739, "rewards/margins": 0.23269987106323242, "rewards/rejected": -0.3508012294769287, "step": 4547 }, { "epoch": 2.774439530272991, "grad_norm": 1.5244245529174805, "learning_rate": 3.5733006736068584e-06, "log_odds_chosen": 3.8149797916412354, "log_odds_ratio": -0.26355791091918945, "logits/chosen": -0.8463948965072632, "logits/rejected": -0.9648719429969788, "logps/chosen": -0.7107446789741516, "logps/rejected": -3.94020414352417, "loss": 1.2651, "nll_loss": 1.093646764755249, "rewards/accuracies": 0.875, "rewards/chosen": -0.0710744708776474, "rewards/margins": 0.3229459524154663, "rewards/rejected": -0.3940204381942749, "step": 4548 }, { "epoch": 2.775049565350008, "grad_norm": 1.2296674251556396, "learning_rate": 3.5723208818126145e-06, "log_odds_chosen": 1.183410406112671, "log_odds_ratio": -0.39868348836898804, "logits/chosen": -0.9263755083084106, "logits/rejected": -0.911853551864624, "logps/chosen": -0.9347360730171204, "logps/rejected": -1.817705750465393, "loss": 1.1245, "nll_loss": 1.2261191606521606, "rewards/accuracies": 0.75, "rewards/chosen": -0.09347359836101532, "rewards/margins": 0.08829696476459503, "rewards/rejected": -0.18177056312561035, "step": 4549 }, { "epoch": 2.7756596004270246, "grad_norm": 1.4380195140838623, "learning_rate": 3.571341090018371e-06, "log_odds_chosen": 1.976351022720337, "log_odds_ratio": -0.4286503195762634, "logits/chosen": -0.8062112331390381, "logits/rejected": -0.9096532464027405, "logps/chosen": -0.8374493718147278, "logps/rejected": -2.3820197582244873, "loss": 1.1043, "nll_loss": 1.0091313123703003, "rewards/accuracies": 0.75, "rewards/chosen": -0.08374494314193726, "rewards/margins": 0.15445704758167267, "rewards/rejected": -0.23820199072360992, "step": 4550 }, { "epoch": 2.7762696355040415, "grad_norm": 2.031420946121216, "learning_rate": 3.570361298224127e-06, "log_odds_chosen": 1.878968596458435, "log_odds_ratio": -0.4737188518047333, "logits/chosen": -0.8873773813247681, "logits/rejected": -1.0328552722930908, "logps/chosen": -0.6280158162117004, "logps/rejected": -1.6482475996017456, "loss": 1.1637, "nll_loss": 1.1034300327301025, "rewards/accuracies": 0.75, "rewards/chosen": -0.06280158460140228, "rewards/margins": 0.102023184299469, "rewards/rejected": -0.16482476890087128, "step": 4551 }, { "epoch": 2.7768796705810583, "grad_norm": 1.6876853704452515, "learning_rate": 3.5693815064298837e-06, "log_odds_chosen": 1.461168885231018, "log_odds_ratio": -0.26541125774383545, "logits/chosen": -0.9836598038673401, "logits/rejected": -0.8810696005821228, "logps/chosen": -0.8511406183242798, "logps/rejected": -1.8539783954620361, "loss": 1.3334, "nll_loss": 1.154299259185791, "rewards/accuracies": 1.0, "rewards/chosen": -0.08511406183242798, "rewards/margins": 0.10028377920389175, "rewards/rejected": -0.18539784848690033, "step": 4552 }, { "epoch": 2.777489705658075, "grad_norm": 2.5232911109924316, "learning_rate": 3.5684017146356394e-06, "log_odds_chosen": 0.20671889185905457, "log_odds_ratio": -0.6226191520690918, "logits/chosen": -0.994296133518219, "logits/rejected": -1.0532238483428955, "logps/chosen": -0.9877943396568298, "logps/rejected": -1.1683109998703003, "loss": 1.1766, "nll_loss": 1.2688758373260498, "rewards/accuracies": 0.75, "rewards/chosen": -0.09877942502498627, "rewards/margins": 0.01805167831480503, "rewards/rejected": -0.11683110892772675, "step": 4553 }, { "epoch": 2.778099740735092, "grad_norm": 1.5673260688781738, "learning_rate": 3.567421922841396e-06, "log_odds_chosen": 2.071455240249634, "log_odds_ratio": -0.4133453965187073, "logits/chosen": -0.9138885736465454, "logits/rejected": -1.008493185043335, "logps/chosen": -0.7849181294441223, "logps/rejected": -2.468146562576294, "loss": 1.1566, "nll_loss": 1.236158013343811, "rewards/accuracies": 0.625, "rewards/chosen": -0.07849181443452835, "rewards/margins": 0.1683228313922882, "rewards/rejected": -0.24681465327739716, "step": 4554 }, { "epoch": 2.7787097758121093, "grad_norm": 10.388256072998047, "learning_rate": 3.5664421310471525e-06, "log_odds_chosen": 3.861790180206299, "log_odds_ratio": -0.14100883901119232, "logits/chosen": -0.7198415994644165, "logits/rejected": -0.8077958226203918, "logps/chosen": -0.4629320502281189, "logps/rejected": -3.315211296081543, "loss": 1.0188, "nll_loss": 0.756039023399353, "rewards/accuracies": 0.875, "rewards/chosen": -0.04629320651292801, "rewards/margins": 0.2852279245853424, "rewards/rejected": -0.3315211534500122, "step": 4555 }, { "epoch": 2.779319810889126, "grad_norm": 4.021124839782715, "learning_rate": 3.5654623392529082e-06, "log_odds_chosen": 1.6837003231048584, "log_odds_ratio": -0.37003374099731445, "logits/chosen": -1.0072600841522217, "logits/rejected": -0.8807778358459473, "logps/chosen": -0.7004007697105408, "logps/rejected": -1.8592426776885986, "loss": 1.0284, "nll_loss": 0.9670175313949585, "rewards/accuracies": 0.75, "rewards/chosen": -0.07004007697105408, "rewards/margins": 0.1158841922879219, "rewards/rejected": -0.18592426180839539, "step": 4556 }, { "epoch": 2.779929845966143, "grad_norm": 6.193094253540039, "learning_rate": 3.5644825474586648e-06, "log_odds_chosen": 1.1528801918029785, "log_odds_ratio": -0.48576128482818604, "logits/chosen": -0.898776650428772, "logits/rejected": -0.9584951400756836, "logps/chosen": -0.8015755414962769, "logps/rejected": -1.7869486808776855, "loss": 0.9209, "nll_loss": 0.8965306282043457, "rewards/accuracies": 0.75, "rewards/chosen": -0.0801575556397438, "rewards/margins": 0.09853731095790863, "rewards/rejected": -0.17869487404823303, "step": 4557 }, { "epoch": 2.78053988104316, "grad_norm": 4.0463972091674805, "learning_rate": 3.5635027556644213e-06, "log_odds_chosen": 1.8521928787231445, "log_odds_ratio": -0.47113722562789917, "logits/chosen": -1.0040099620819092, "logits/rejected": -1.0005252361297607, "logps/chosen": -0.9045025706291199, "logps/rejected": -2.530259370803833, "loss": 1.0521, "nll_loss": 1.0973495244979858, "rewards/accuracies": 0.625, "rewards/chosen": -0.09045025706291199, "rewards/margins": 0.1625756472349167, "rewards/rejected": -0.25302591919898987, "step": 4558 }, { "epoch": 2.781149916120177, "grad_norm": 1.2965688705444336, "learning_rate": 3.5625229638701774e-06, "log_odds_chosen": 0.7127085328102112, "log_odds_ratio": -0.6062552332878113, "logits/chosen": -1.0606164932250977, "logits/rejected": -1.140924334526062, "logps/chosen": -0.9231458902359009, "logps/rejected": -1.5434648990631104, "loss": 1.1397, "nll_loss": 0.956640899181366, "rewards/accuracies": 0.625, "rewards/chosen": -0.09231460094451904, "rewards/margins": 0.06203188747167587, "rewards/rejected": -0.1543464958667755, "step": 4559 }, { "epoch": 2.781759951197194, "grad_norm": 5.654443264007568, "learning_rate": 3.5615431720759336e-06, "log_odds_chosen": 2.158400774002075, "log_odds_ratio": -0.6113011240959167, "logits/chosen": -0.7155947685241699, "logits/rejected": -0.8997183442115784, "logps/chosen": -0.806977391242981, "logps/rejected": -2.746730327606201, "loss": 0.9534, "nll_loss": 1.0926918983459473, "rewards/accuracies": 0.75, "rewards/chosen": -0.08069773763418198, "rewards/margins": 0.1939753144979477, "rewards/rejected": -0.2746730446815491, "step": 4560 }, { "epoch": 2.782369986274211, "grad_norm": 8.010490417480469, "learning_rate": 3.56056338028169e-06, "log_odds_chosen": 2.497561454772949, "log_odds_ratio": -0.27476081252098083, "logits/chosen": -0.7772759199142456, "logits/rejected": -0.9216095209121704, "logps/chosen": -0.6526785492897034, "logps/rejected": -2.5369200706481934, "loss": 0.9369, "nll_loss": 0.6720603704452515, "rewards/accuracies": 0.875, "rewards/chosen": -0.06526785343885422, "rewards/margins": 0.18842414021492004, "rewards/rejected": -0.25369200110435486, "step": 4561 }, { "epoch": 2.7829800213512277, "grad_norm": 2.584014654159546, "learning_rate": 3.5595835884874462e-06, "log_odds_chosen": 1.6977401971817017, "log_odds_ratio": -0.27191123366355896, "logits/chosen": -0.6277018785476685, "logits/rejected": -0.6766328811645508, "logps/chosen": -0.5676354169845581, "logps/rejected": -1.6190625429153442, "loss": 0.9526, "nll_loss": 0.7819744348526001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05676354467868805, "rewards/margins": 0.10514270514249802, "rewards/rejected": -0.16190627217292786, "step": 4562 }, { "epoch": 2.7835900564282445, "grad_norm": 2.002624034881592, "learning_rate": 3.5586037966932024e-06, "log_odds_chosen": 0.707289457321167, "log_odds_ratio": -0.5863057971000671, "logits/chosen": -0.8004872798919678, "logits/rejected": -0.8531005382537842, "logps/chosen": -0.9239407181739807, "logps/rejected": -1.417255163192749, "loss": 1.0132, "nll_loss": 0.9789122343063354, "rewards/accuracies": 0.625, "rewards/chosen": -0.09239407628774643, "rewards/margins": 0.04933144152164459, "rewards/rejected": -0.14172551035881042, "step": 4563 }, { "epoch": 2.7842000915052614, "grad_norm": 2.886502742767334, "learning_rate": 3.557624004898959e-06, "log_odds_chosen": 0.8965796232223511, "log_odds_ratio": -0.4922782778739929, "logits/chosen": -0.799526035785675, "logits/rejected": -0.8264497518539429, "logps/chosen": -0.6798049211502075, "logps/rejected": -1.282987356185913, "loss": 1.0635, "nll_loss": 0.8468412160873413, "rewards/accuracies": 0.75, "rewards/chosen": -0.06798049062490463, "rewards/margins": 0.0603182390332222, "rewards/rejected": -0.12829872965812683, "step": 4564 }, { "epoch": 2.7848101265822782, "grad_norm": 1.8433289527893066, "learning_rate": 3.556644213104715e-06, "log_odds_chosen": 2.399331569671631, "log_odds_ratio": -0.430006206035614, "logits/chosen": -0.717725396156311, "logits/rejected": -0.8913372159004211, "logps/chosen": -0.7577763795852661, "logps/rejected": -2.5956430435180664, "loss": 0.9624, "nll_loss": 0.8491607308387756, "rewards/accuracies": 0.625, "rewards/chosen": -0.07577764242887497, "rewards/margins": 0.18378666043281555, "rewards/rejected": -0.2595643103122711, "step": 4565 }, { "epoch": 2.7854201616592955, "grad_norm": 1.3583197593688965, "learning_rate": 3.5556644213104716e-06, "log_odds_chosen": 0.655288815498352, "log_odds_ratio": -0.49759140610694885, "logits/chosen": -1.0906336307525635, "logits/rejected": -0.9402968883514404, "logps/chosen": -0.9094911813735962, "logps/rejected": -1.4222533702850342, "loss": 1.0406, "nll_loss": 1.1006736755371094, "rewards/accuracies": 0.75, "rewards/chosen": -0.09094911813735962, "rewards/margins": 0.05127622187137604, "rewards/rejected": -0.14222534000873566, "step": 4566 }, { "epoch": 2.7860301967363124, "grad_norm": 1.3050018548965454, "learning_rate": 3.5546846295162277e-06, "log_odds_chosen": 1.261324405670166, "log_odds_ratio": -0.6255044937133789, "logits/chosen": -0.8895741105079651, "logits/rejected": -0.892801821231842, "logps/chosen": -0.9533258080482483, "logps/rejected": -1.994689702987671, "loss": 1.1393, "nll_loss": 1.1330863237380981, "rewards/accuracies": 0.625, "rewards/chosen": -0.09533257782459259, "rewards/margins": 0.1041364073753357, "rewards/rejected": -0.19946898519992828, "step": 4567 }, { "epoch": 2.786640231813329, "grad_norm": 2.428196430206299, "learning_rate": 3.553704837721984e-06, "log_odds_chosen": 2.058671474456787, "log_odds_ratio": -0.5023720264434814, "logits/chosen": -0.9404873847961426, "logits/rejected": -1.008202075958252, "logps/chosen": -0.7456199526786804, "logps/rejected": -2.2919366359710693, "loss": 1.0156, "nll_loss": 0.994042158126831, "rewards/accuracies": 0.625, "rewards/chosen": -0.07456199824810028, "rewards/margins": 0.15463165938854218, "rewards/rejected": -0.22919367253780365, "step": 4568 }, { "epoch": 2.787250266890346, "grad_norm": 1.612205982208252, "learning_rate": 3.5527250459277404e-06, "log_odds_chosen": 1.5187132358551025, "log_odds_ratio": -0.41902780532836914, "logits/chosen": -0.919215202331543, "logits/rejected": -0.9518993496894836, "logps/chosen": -0.6990038156509399, "logps/rejected": -1.6995790004730225, "loss": 1.054, "nll_loss": 1.1920483112335205, "rewards/accuracies": 0.75, "rewards/chosen": -0.06990037858486176, "rewards/margins": 0.10005752742290497, "rewards/rejected": -0.16995790600776672, "step": 4569 }, { "epoch": 2.7878603019673633, "grad_norm": 3.918445348739624, "learning_rate": 3.5517452541334965e-06, "log_odds_chosen": 0.2344338595867157, "log_odds_ratio": -0.6217668056488037, "logits/chosen": -0.8902565240859985, "logits/rejected": -0.9734443426132202, "logps/chosen": -0.9071965217590332, "logps/rejected": -1.0369739532470703, "loss": 1.0695, "nll_loss": 1.20992910861969, "rewards/accuracies": 0.75, "rewards/chosen": -0.09071965515613556, "rewards/margins": 0.012977737002074718, "rewards/rejected": -0.10369738936424255, "step": 4570 }, { "epoch": 2.78847033704438, "grad_norm": 2.25938081741333, "learning_rate": 3.5507654623392526e-06, "log_odds_chosen": 0.16685420274734497, "log_odds_ratio": -0.7105417251586914, "logits/chosen": -0.9165809154510498, "logits/rejected": -0.9118040204048157, "logps/chosen": -0.913062572479248, "logps/rejected": -0.969923198223114, "loss": 1.2221, "nll_loss": 1.2329990863800049, "rewards/accuracies": 0.5, "rewards/chosen": -0.09130626171827316, "rewards/margins": 0.00568606611341238, "rewards/rejected": -0.09699232876300812, "step": 4571 }, { "epoch": 2.789080372121397, "grad_norm": 1.8755427598953247, "learning_rate": 3.549785670545009e-06, "log_odds_chosen": 1.8937163352966309, "log_odds_ratio": -0.3137744069099426, "logits/chosen": -0.7146977782249451, "logits/rejected": -1.0282206535339355, "logps/chosen": -0.7802125215530396, "logps/rejected": -2.0569019317626953, "loss": 1.0799, "nll_loss": 1.158018708229065, "rewards/accuracies": 0.875, "rewards/chosen": -0.07802124321460724, "rewards/margins": 0.12766894698143005, "rewards/rejected": -0.2056901752948761, "step": 4572 }, { "epoch": 2.789690407198414, "grad_norm": 1.7268140316009521, "learning_rate": 3.5488058787507653e-06, "log_odds_chosen": 2.312687873840332, "log_odds_ratio": -0.3866792917251587, "logits/chosen": -0.9344550371170044, "logits/rejected": -0.9831703901290894, "logps/chosen": -0.7650423049926758, "logps/rejected": -2.676865816116333, "loss": 1.1293, "nll_loss": 1.1512267589569092, "rewards/accuracies": 0.75, "rewards/chosen": -0.07650423049926758, "rewards/margins": 0.19118234515190125, "rewards/rejected": -0.2676865756511688, "step": 4573 }, { "epoch": 2.7903004422754307, "grad_norm": 7.244848728179932, "learning_rate": 3.5478260869565214e-06, "log_odds_chosen": 2.2857067584991455, "log_odds_ratio": -0.28003498911857605, "logits/chosen": -0.7526319026947021, "logits/rejected": -1.0233051776885986, "logps/chosen": -0.7568298578262329, "logps/rejected": -2.5444912910461426, "loss": 1.0252, "nll_loss": 0.9506750106811523, "rewards/accuracies": 0.875, "rewards/chosen": -0.07568299770355225, "rewards/margins": 0.1787661463022232, "rewards/rejected": -0.25444915890693665, "step": 4574 }, { "epoch": 2.7909104773524476, "grad_norm": 1.2551825046539307, "learning_rate": 3.546846295162278e-06, "log_odds_chosen": 1.5376386642456055, "log_odds_ratio": -0.46835222840309143, "logits/chosen": -0.7893139123916626, "logits/rejected": -0.9330486059188843, "logps/chosen": -0.7398779392242432, "logps/rejected": -1.8067831993103027, "loss": 1.0846, "nll_loss": 0.8400064706802368, "rewards/accuracies": 0.75, "rewards/chosen": -0.07398779690265656, "rewards/margins": 0.10669053345918655, "rewards/rejected": -0.1806783229112625, "step": 4575 }, { "epoch": 2.7915205124294644, "grad_norm": 7.149937152862549, "learning_rate": 3.545866503368034e-06, "log_odds_chosen": 1.5328069925308228, "log_odds_ratio": -0.34663403034210205, "logits/chosen": -0.7732487320899963, "logits/rejected": -0.8011109828948975, "logps/chosen": -0.7858079671859741, "logps/rejected": -1.9436668157577515, "loss": 0.9908, "nll_loss": 1.0316362380981445, "rewards/accuracies": 0.75, "rewards/chosen": -0.07858079671859741, "rewards/margins": 0.1157858744263649, "rewards/rejected": -0.1943666785955429, "step": 4576 }, { "epoch": 2.7921305475064817, "grad_norm": 1.7817331552505493, "learning_rate": 3.54488671157379e-06, "log_odds_chosen": 1.8269364833831787, "log_odds_ratio": -0.5072425603866577, "logits/chosen": -0.876991331577301, "logits/rejected": -0.920551061630249, "logps/chosen": -0.8065057992935181, "logps/rejected": -2.411381721496582, "loss": 1.0509, "nll_loss": 0.9193452596664429, "rewards/accuracies": 0.75, "rewards/chosen": -0.08065058290958405, "rewards/margins": 0.1604875922203064, "rewards/rejected": -0.24113817512989044, "step": 4577 }, { "epoch": 2.7927405825834986, "grad_norm": 1.9240859746932983, "learning_rate": 3.5439069197795467e-06, "log_odds_chosen": 1.4783669710159302, "log_odds_ratio": -0.37368717789649963, "logits/chosen": -1.0077333450317383, "logits/rejected": -0.8561992645263672, "logps/chosen": -0.9134595394134521, "logps/rejected": -2.082601547241211, "loss": 1.2252, "nll_loss": 1.1495394706726074, "rewards/accuracies": 0.875, "rewards/chosen": -0.09134595841169357, "rewards/margins": 0.11691419780254364, "rewards/rejected": -0.2082601636648178, "step": 4578 }, { "epoch": 2.7933506176605154, "grad_norm": 1.409832239151001, "learning_rate": 3.542927127985303e-06, "log_odds_chosen": 0.7058961987495422, "log_odds_ratio": -0.6832676529884338, "logits/chosen": -0.8827328085899353, "logits/rejected": -0.9009372591972351, "logps/chosen": -0.8428775668144226, "logps/rejected": -1.3070940971374512, "loss": 1.1004, "nll_loss": 1.1887000799179077, "rewards/accuracies": 0.625, "rewards/chosen": -0.08428776264190674, "rewards/margins": 0.04642166197299957, "rewards/rejected": -0.1307094246149063, "step": 4579 }, { "epoch": 2.7939606527375322, "grad_norm": 1.4492703676223755, "learning_rate": 3.5419473361910594e-06, "log_odds_chosen": 2.526034116744995, "log_odds_ratio": -0.2930094599723816, "logits/chosen": -0.7804581522941589, "logits/rejected": -0.9026815891265869, "logps/chosen": -0.5833466053009033, "logps/rejected": -2.399257183074951, "loss": 1.1249, "nll_loss": 0.7211278676986694, "rewards/accuracies": 0.875, "rewards/chosen": -0.05833466351032257, "rewards/margins": 0.18159106373786926, "rewards/rejected": -0.23992571234703064, "step": 4580 }, { "epoch": 2.7945706878145495, "grad_norm": 1.8698740005493164, "learning_rate": 3.5409675443968155e-06, "log_odds_chosen": 2.586071491241455, "log_odds_ratio": -0.3004104495048523, "logits/chosen": -0.9304153323173523, "logits/rejected": -1.0252677202224731, "logps/chosen": -0.7716103792190552, "logps/rejected": -2.7920804023742676, "loss": 1.0708, "nll_loss": 1.082411289215088, "rewards/accuracies": 0.75, "rewards/chosen": -0.07716104388237, "rewards/margins": 0.20204699039459229, "rewards/rejected": -0.2792080342769623, "step": 4581 }, { "epoch": 2.7951807228915664, "grad_norm": 1.2982414960861206, "learning_rate": 3.5399877526025716e-06, "log_odds_chosen": 1.3128271102905273, "log_odds_ratio": -0.5664510726928711, "logits/chosen": -0.9478745460510254, "logits/rejected": -0.8672124147415161, "logps/chosen": -0.8632339239120483, "logps/rejected": -1.8509222269058228, "loss": 1.0637, "nll_loss": 1.0853739976882935, "rewards/accuracies": 0.75, "rewards/chosen": -0.08632338792085648, "rewards/margins": 0.09876883774995804, "rewards/rejected": -0.18509221076965332, "step": 4582 }, { "epoch": 2.7957907579685832, "grad_norm": 5.581195831298828, "learning_rate": 3.539007960808328e-06, "log_odds_chosen": 2.5460665225982666, "log_odds_ratio": -0.29529863595962524, "logits/chosen": -0.8773337602615356, "logits/rejected": -0.961058497428894, "logps/chosen": -0.7548249363899231, "logps/rejected": -2.7094216346740723, "loss": 1.2365, "nll_loss": 1.1615347862243652, "rewards/accuracies": 0.875, "rewards/chosen": -0.07548248767852783, "rewards/margins": 0.19545969367027283, "rewards/rejected": -0.27094218134880066, "step": 4583 }, { "epoch": 2.7964007930456, "grad_norm": 2.0253255367279053, "learning_rate": 3.5380281690140843e-06, "log_odds_chosen": 1.2254376411437988, "log_odds_ratio": -0.5140825510025024, "logits/chosen": -0.9303646087646484, "logits/rejected": -0.9021521806716919, "logps/chosen": -0.9692491292953491, "logps/rejected": -1.9266467094421387, "loss": 1.0148, "nll_loss": 1.1004852056503296, "rewards/accuracies": 0.75, "rewards/chosen": -0.09692491590976715, "rewards/margins": 0.09573975950479507, "rewards/rejected": -0.19266465306282043, "step": 4584 }, { "epoch": 2.797010828122617, "grad_norm": 8.94979476928711, "learning_rate": 3.5370483772198404e-06, "log_odds_chosen": 2.133230447769165, "log_odds_ratio": -0.36579984426498413, "logits/chosen": -0.8808322548866272, "logits/rejected": -0.9323955774307251, "logps/chosen": -0.7089189291000366, "logps/rejected": -2.3242528438568115, "loss": 1.0013, "nll_loss": 0.8198012709617615, "rewards/accuracies": 0.75, "rewards/chosen": -0.07089188694953918, "rewards/margins": 0.1615334153175354, "rewards/rejected": -0.23242530226707458, "step": 4585 }, { "epoch": 2.7976208631996338, "grad_norm": 1.406409740447998, "learning_rate": 3.536068585425597e-06, "log_odds_chosen": 1.145572304725647, "log_odds_ratio": -0.42588675022125244, "logits/chosen": -1.118964672088623, "logits/rejected": -1.0140490531921387, "logps/chosen": -0.978073000907898, "logps/rejected": -1.800602912902832, "loss": 1.2611, "nll_loss": 1.3975799083709717, "rewards/accuracies": 0.75, "rewards/chosen": -0.09780730307102203, "rewards/margins": 0.08225299417972565, "rewards/rejected": -0.18006029725074768, "step": 4586 }, { "epoch": 2.798230898276651, "grad_norm": 2.0574710369110107, "learning_rate": 3.5350887936313535e-06, "log_odds_chosen": 3.062258243560791, "log_odds_ratio": -0.25887390971183777, "logits/chosen": -0.8664813041687012, "logits/rejected": -1.0452443361282349, "logps/chosen": -0.6832561492919922, "logps/rejected": -3.1351776123046875, "loss": 1.031, "nll_loss": 0.8837224245071411, "rewards/accuracies": 1.0, "rewards/chosen": -0.06832562386989594, "rewards/margins": 0.24519217014312744, "rewards/rejected": -0.3135177791118622, "step": 4587 }, { "epoch": 2.798840933353668, "grad_norm": 5.727626323699951, "learning_rate": 3.5341090018371092e-06, "log_odds_chosen": 0.787623405456543, "log_odds_ratio": -0.9026882648468018, "logits/chosen": -0.8820435404777527, "logits/rejected": -0.9358164072036743, "logps/chosen": -0.9136137962341309, "logps/rejected": -1.8366022109985352, "loss": 1.2335, "nll_loss": 1.091134786605835, "rewards/accuracies": 0.625, "rewards/chosen": -0.0913613885641098, "rewards/margins": 0.09229883551597595, "rewards/rejected": -0.18366022408008575, "step": 4588 }, { "epoch": 2.7994509684306847, "grad_norm": 1.1236281394958496, "learning_rate": 3.5331292100428658e-06, "log_odds_chosen": 3.2501413822174072, "log_odds_ratio": -0.225172221660614, "logits/chosen": -0.718166172504425, "logits/rejected": -0.9827253222465515, "logps/chosen": -0.6424246430397034, "logps/rejected": -3.230747699737549, "loss": 0.9172, "nll_loss": 0.7291791439056396, "rewards/accuracies": 1.0, "rewards/chosen": -0.06424246728420258, "rewards/margins": 0.25883230566978455, "rewards/rejected": -0.3230747580528259, "step": 4589 }, { "epoch": 2.8000610035077016, "grad_norm": 2.422722816467285, "learning_rate": 3.5321494182486223e-06, "log_odds_chosen": 0.6292151212692261, "log_odds_ratio": -0.5580044984817505, "logits/chosen": -0.9575191736221313, "logits/rejected": -1.1205899715423584, "logps/chosen": -0.8699160814285278, "logps/rejected": -1.3409638404846191, "loss": 0.9597, "nll_loss": 0.9995851516723633, "rewards/accuracies": 0.75, "rewards/chosen": -0.08699160814285278, "rewards/margins": 0.04710479453206062, "rewards/rejected": -0.1340963989496231, "step": 4590 }, { "epoch": 2.800671038584719, "grad_norm": 1.830186367034912, "learning_rate": 3.531169626454378e-06, "log_odds_chosen": 1.899606466293335, "log_odds_ratio": -0.4287419319152832, "logits/chosen": -0.7252404093742371, "logits/rejected": -0.7892300486564636, "logps/chosen": -0.8702185750007629, "logps/rejected": -2.3901617527008057, "loss": 1.045, "nll_loss": 0.9924789667129517, "rewards/accuracies": 0.625, "rewards/chosen": -0.0870218575000763, "rewards/margins": 0.15199433267116547, "rewards/rejected": -0.23901620507240295, "step": 4591 }, { "epoch": 2.8012810736617357, "grad_norm": 2.075721025466919, "learning_rate": 3.5301898346601346e-06, "log_odds_chosen": 0.8952003121376038, "log_odds_ratio": -0.5850459337234497, "logits/chosen": -0.9172037839889526, "logits/rejected": -1.0946325063705444, "logps/chosen": -0.9578063488006592, "logps/rejected": -1.669020175933838, "loss": 1.1505, "nll_loss": 1.0720242261886597, "rewards/accuracies": 0.625, "rewards/chosen": -0.0957806259393692, "rewards/margins": 0.07112140208482742, "rewards/rejected": -0.16690202057361603, "step": 4592 }, { "epoch": 2.8018911087387526, "grad_norm": 1.663589596748352, "learning_rate": 3.5292100428658907e-06, "log_odds_chosen": 1.7417672872543335, "log_odds_ratio": -0.33586055040359497, "logits/chosen": -0.979616641998291, "logits/rejected": -0.9697377681732178, "logps/chosen": -0.7340712547302246, "logps/rejected": -2.0923311710357666, "loss": 0.9408, "nll_loss": 0.8840280771255493, "rewards/accuracies": 0.75, "rewards/chosen": -0.0734071210026741, "rewards/margins": 0.1358260065317154, "rewards/rejected": -0.2092331349849701, "step": 4593 }, { "epoch": 2.8025011438157694, "grad_norm": 2.595062017440796, "learning_rate": 3.5282302510716472e-06, "log_odds_chosen": 3.0104050636291504, "log_odds_ratio": -0.211675763130188, "logits/chosen": -0.5449081063270569, "logits/rejected": -0.7750283479690552, "logps/chosen": -0.8095179796218872, "logps/rejected": -3.045283317565918, "loss": 0.879, "nll_loss": 0.7388530969619751, "rewards/accuracies": 0.875, "rewards/chosen": -0.08095180243253708, "rewards/margins": 0.22357654571533203, "rewards/rejected": -0.3045283555984497, "step": 4594 }, { "epoch": 2.8031111788927863, "grad_norm": 2.692456007003784, "learning_rate": 3.5272504592774034e-06, "log_odds_chosen": 1.3916783332824707, "log_odds_ratio": -0.3937346637248993, "logits/chosen": -0.6953920125961304, "logits/rejected": -0.9001004695892334, "logps/chosen": -0.7543208599090576, "logps/rejected": -1.9079333543777466, "loss": 1.145, "nll_loss": 0.9945464134216309, "rewards/accuracies": 0.75, "rewards/chosen": -0.07543209195137024, "rewards/margins": 0.11536124348640442, "rewards/rejected": -0.19079333543777466, "step": 4595 }, { "epoch": 2.803721213969803, "grad_norm": 6.289001941680908, "learning_rate": 3.5262706674831595e-06, "log_odds_chosen": 2.813988208770752, "log_odds_ratio": -0.19288960099220276, "logits/chosen": -0.6196948885917664, "logits/rejected": -0.8618354797363281, "logps/chosen": -0.8263394832611084, "logps/rejected": -3.0161004066467285, "loss": 1.0326, "nll_loss": 0.7223997712135315, "rewards/accuracies": 1.0, "rewards/chosen": -0.08263394981622696, "rewards/margins": 0.21897611021995544, "rewards/rejected": -0.3016100525856018, "step": 4596 }, { "epoch": 2.80433124904682, "grad_norm": 3.095808744430542, "learning_rate": 3.525290875688916e-06, "log_odds_chosen": 1.3778295516967773, "log_odds_ratio": -0.404123455286026, "logits/chosen": -0.960473358631134, "logits/rejected": -0.8768594264984131, "logps/chosen": -0.7586333751678467, "logps/rejected": -1.6922733783721924, "loss": 1.0655, "nll_loss": 0.8885015249252319, "rewards/accuracies": 0.875, "rewards/chosen": -0.07586333155632019, "rewards/margins": 0.09336400032043457, "rewards/rejected": -0.16922733187675476, "step": 4597 }, { "epoch": 2.8049412841238373, "grad_norm": 2.509230852127075, "learning_rate": 3.5243110838946726e-06, "log_odds_chosen": 2.236525774002075, "log_odds_ratio": -0.2046475112438202, "logits/chosen": -0.9104713201522827, "logits/rejected": -1.0851163864135742, "logps/chosen": -0.9871777296066284, "logps/rejected": -2.674022674560547, "loss": 1.054, "nll_loss": 1.185581088066101, "rewards/accuracies": 0.875, "rewards/chosen": -0.09871777892112732, "rewards/margins": 0.16868449747562408, "rewards/rejected": -0.2674022912979126, "step": 4598 }, { "epoch": 2.805551319200854, "grad_norm": 1.464465618133545, "learning_rate": 3.5233312921004283e-06, "log_odds_chosen": 2.3893914222717285, "log_odds_ratio": -0.3170245289802551, "logits/chosen": -0.6101686358451843, "logits/rejected": -0.7786216139793396, "logps/chosen": -0.5756654143333435, "logps/rejected": -2.208406448364258, "loss": 0.9707, "nll_loss": 0.6188188791275024, "rewards/accuracies": 0.875, "rewards/chosen": -0.05756654217839241, "rewards/margins": 0.1632741093635559, "rewards/rejected": -0.22084064781665802, "step": 4599 }, { "epoch": 2.806161354277871, "grad_norm": 1.5035293102264404, "learning_rate": 3.522351500306185e-06, "log_odds_chosen": 1.4355108737945557, "log_odds_ratio": -0.5012818574905396, "logits/chosen": -0.7958577871322632, "logits/rejected": -0.8769001364707947, "logps/chosen": -0.7120696306228638, "logps/rejected": -1.7207847833633423, "loss": 1.1175, "nll_loss": 0.8591261506080627, "rewards/accuracies": 0.625, "rewards/chosen": -0.0712069571018219, "rewards/margins": 0.1008715033531189, "rewards/rejected": -0.1720784604549408, "step": 4600 }, { "epoch": 2.806771389354888, "grad_norm": 1.8596620559692383, "learning_rate": 3.5213717085119414e-06, "log_odds_chosen": 0.003052506595849991, "log_odds_ratio": -0.7534589171409607, "logits/chosen": -0.7731214761734009, "logits/rejected": -0.8086563348770142, "logps/chosen": -1.0027663707733154, "logps/rejected": -0.9889946579933167, "loss": 1.2746, "nll_loss": 1.5168132781982422, "rewards/accuracies": 0.5, "rewards/chosen": -0.1002766340970993, "rewards/margins": -0.0013771643862128258, "rewards/rejected": -0.0988994687795639, "step": 4601 }, { "epoch": 2.807381424431905, "grad_norm": 2.822084665298462, "learning_rate": 3.520391916717697e-06, "log_odds_chosen": 0.6946489214897156, "log_odds_ratio": -0.5490732789039612, "logits/chosen": -0.7007548809051514, "logits/rejected": -0.864270806312561, "logps/chosen": -0.9551100730895996, "logps/rejected": -1.45902681350708, "loss": 1.1269, "nll_loss": 1.1167256832122803, "rewards/accuracies": 0.75, "rewards/chosen": -0.09551101177930832, "rewards/margins": 0.05039167404174805, "rewards/rejected": -0.14590269327163696, "step": 4602 }, { "epoch": 2.807991459508922, "grad_norm": 3.5107381343841553, "learning_rate": 3.5194121249234536e-06, "log_odds_chosen": 1.2750704288482666, "log_odds_ratio": -0.6211341023445129, "logits/chosen": -0.8327772617340088, "logits/rejected": -0.9192023873329163, "logps/chosen": -0.8914084434509277, "logps/rejected": -2.007932662963867, "loss": 1.009, "nll_loss": 1.052034616470337, "rewards/accuracies": 0.75, "rewards/chosen": -0.08914084732532501, "rewards/margins": 0.1116524189710617, "rewards/rejected": -0.20079326629638672, "step": 4603 }, { "epoch": 2.8086014945859388, "grad_norm": 4.236861705780029, "learning_rate": 3.51843233312921e-06, "log_odds_chosen": 1.5297560691833496, "log_odds_ratio": -0.4328307509422302, "logits/chosen": -0.9180805683135986, "logits/rejected": -0.9154550433158875, "logps/chosen": -0.8212149143218994, "logps/rejected": -2.0070276260375977, "loss": 1.0726, "nll_loss": 1.027488112449646, "rewards/accuracies": 0.625, "rewards/chosen": -0.08212149143218994, "rewards/margins": 0.11858128011226654, "rewards/rejected": -0.2007027566432953, "step": 4604 }, { "epoch": 2.8092115296629556, "grad_norm": 2.6746456623077393, "learning_rate": 3.517452541334966e-06, "log_odds_chosen": 0.7448223829269409, "log_odds_ratio": -0.7607100009918213, "logits/chosen": -0.9635290503501892, "logits/rejected": -0.8172487616539001, "logps/chosen": -1.0288583040237427, "logps/rejected": -1.8847111463546753, "loss": 1.2534, "nll_loss": 1.3370274305343628, "rewards/accuracies": 0.375, "rewards/chosen": -0.10288582742214203, "rewards/margins": 0.08558528870344162, "rewards/rejected": -0.18847112357616425, "step": 4605 }, { "epoch": 2.8098215647399725, "grad_norm": 1.638036847114563, "learning_rate": 3.5164727495407224e-06, "log_odds_chosen": 3.0373947620391846, "log_odds_ratio": -0.24125415086746216, "logits/chosen": -0.7746074795722961, "logits/rejected": -0.946466863155365, "logps/chosen": -0.7026630640029907, "logps/rejected": -3.1938042640686035, "loss": 0.9973, "nll_loss": 0.7962906956672668, "rewards/accuracies": 1.0, "rewards/chosen": -0.07026630640029907, "rewards/margins": 0.24911411106586456, "rewards/rejected": -0.3193804621696472, "step": 4606 }, { "epoch": 2.8104315998169893, "grad_norm": 1.6779296398162842, "learning_rate": 3.515492957746479e-06, "log_odds_chosen": 2.162703514099121, "log_odds_ratio": -0.310272753238678, "logits/chosen": -0.8123397827148438, "logits/rejected": -0.9883620738983154, "logps/chosen": -0.880297839641571, "logps/rejected": -2.5232348442077637, "loss": 1.0399, "nll_loss": 1.1390604972839355, "rewards/accuracies": 0.875, "rewards/chosen": -0.08802978694438934, "rewards/margins": 0.16429370641708374, "rewards/rejected": -0.2523235082626343, "step": 4607 }, { "epoch": 2.811041634894006, "grad_norm": 1.88515305519104, "learning_rate": 3.514513165952235e-06, "log_odds_chosen": 0.9340249300003052, "log_odds_ratio": -0.5064300894737244, "logits/chosen": -0.9257491827011108, "logits/rejected": -0.872599720954895, "logps/chosen": -0.8819334506988525, "logps/rejected": -1.6489160060882568, "loss": 1.0791, "nll_loss": 1.110409140586853, "rewards/accuracies": 0.75, "rewards/chosen": -0.08819334208965302, "rewards/margins": 0.07669824361801147, "rewards/rejected": -0.16489160060882568, "step": 4608 }, { "epoch": 2.8116516699710234, "grad_norm": 8.123322486877441, "learning_rate": 3.513533374157991e-06, "log_odds_chosen": 0.33604544401168823, "log_odds_ratio": -0.6718294024467468, "logits/chosen": -0.9053847193717957, "logits/rejected": -0.8996737003326416, "logps/chosen": -0.8018099665641785, "logps/rejected": -0.9780439138412476, "loss": 1.2164, "nll_loss": 0.9847091436386108, "rewards/accuracies": 0.625, "rewards/chosen": -0.08018099516630173, "rewards/margins": 0.017623398452997208, "rewards/rejected": -0.09780439734458923, "step": 4609 }, { "epoch": 2.8122617050480403, "grad_norm": 5.752862930297852, "learning_rate": 3.5125535823637473e-06, "log_odds_chosen": 2.7566752433776855, "log_odds_ratio": -0.3576279878616333, "logits/chosen": -0.7871303558349609, "logits/rejected": -1.037324070930481, "logps/chosen": -0.7055904865264893, "logps/rejected": -2.903940200805664, "loss": 0.9865, "nll_loss": 0.9057683348655701, "rewards/accuracies": 0.75, "rewards/chosen": -0.0705590546131134, "rewards/margins": 0.21983496844768524, "rewards/rejected": -0.29039400815963745, "step": 4610 }, { "epoch": 2.812871740125057, "grad_norm": 1.8611780405044556, "learning_rate": 3.511573790569504e-06, "log_odds_chosen": 0.3309323787689209, "log_odds_ratio": -0.708836019039154, "logits/chosen": -0.8635066747665405, "logits/rejected": -0.8122258186340332, "logps/chosen": -0.9833297729492188, "logps/rejected": -1.2147984504699707, "loss": 1.1247, "nll_loss": 1.242789626121521, "rewards/accuracies": 0.5, "rewards/chosen": -0.0983329713344574, "rewards/margins": 0.023146875202655792, "rewards/rejected": -0.12147984653711319, "step": 4611 }, { "epoch": 2.813481775202074, "grad_norm": 1.2390289306640625, "learning_rate": 3.5105939987752604e-06, "log_odds_chosen": 1.4688736200332642, "log_odds_ratio": -0.4492489695549011, "logits/chosen": -0.9031569957733154, "logits/rejected": -0.7992827296257019, "logps/chosen": -0.8369815349578857, "logps/rejected": -2.0367016792297363, "loss": 1.0763, "nll_loss": 0.9530199766159058, "rewards/accuracies": 0.625, "rewards/chosen": -0.08369815349578857, "rewards/margins": 0.11997202038764954, "rewards/rejected": -0.2036702036857605, "step": 4612 }, { "epoch": 2.8140918102790913, "grad_norm": 1.7058889865875244, "learning_rate": 3.509614206981016e-06, "log_odds_chosen": 1.5659897327423096, "log_odds_ratio": -0.3629278838634491, "logits/chosen": -0.7172155380249023, "logits/rejected": -0.5997174978256226, "logps/chosen": -0.735806941986084, "logps/rejected": -1.862443208694458, "loss": 1.1304, "nll_loss": 0.9824657440185547, "rewards/accuracies": 0.875, "rewards/chosen": -0.07358069717884064, "rewards/margins": 0.1126636266708374, "rewards/rejected": -0.18624433875083923, "step": 4613 }, { "epoch": 2.814701845356108, "grad_norm": 1.5765093564987183, "learning_rate": 3.5086344151867726e-06, "log_odds_chosen": 1.7147576808929443, "log_odds_ratio": -0.6673266887664795, "logits/chosen": -0.7841256260871887, "logits/rejected": -0.9101564288139343, "logps/chosen": -0.8262299299240112, "logps/rejected": -2.2215676307678223, "loss": 0.9732, "nll_loss": 0.9351962208747864, "rewards/accuracies": 0.5, "rewards/chosen": -0.08262299001216888, "rewards/margins": 0.13953378796577454, "rewards/rejected": -0.22215677797794342, "step": 4614 }, { "epoch": 2.815311880433125, "grad_norm": 1.9843647480010986, "learning_rate": 3.507654623392529e-06, "log_odds_chosen": 0.24499869346618652, "log_odds_ratio": -0.682486891746521, "logits/chosen": -0.9117059111595154, "logits/rejected": -0.8282357454299927, "logps/chosen": -0.8926354050636292, "logps/rejected": -1.1219265460968018, "loss": 1.0915, "nll_loss": 1.0644299983978271, "rewards/accuracies": 0.375, "rewards/chosen": -0.08926354348659515, "rewards/margins": 0.022929107770323753, "rewards/rejected": -0.11219264566898346, "step": 4615 }, { "epoch": 2.815921915510142, "grad_norm": 1.5774366855621338, "learning_rate": 3.506674831598285e-06, "log_odds_chosen": 2.3184101581573486, "log_odds_ratio": -0.4553179442882538, "logits/chosen": -0.7063978910446167, "logits/rejected": -0.8209701180458069, "logps/chosen": -0.5659362077713013, "logps/rejected": -2.3914082050323486, "loss": 0.8637, "nll_loss": 0.7605077028274536, "rewards/accuracies": 0.625, "rewards/chosen": -0.05659361928701401, "rewards/margins": 0.1825472116470337, "rewards/rejected": -0.2391408234834671, "step": 4616 }, { "epoch": 2.8165319505871587, "grad_norm": 1.4317446947097778, "learning_rate": 3.5056950398040414e-06, "log_odds_chosen": 0.8280052542686462, "log_odds_ratio": -0.6208096146583557, "logits/chosen": -1.1057687997817993, "logits/rejected": -1.0445047616958618, "logps/chosen": -0.9960114359855652, "logps/rejected": -1.3207426071166992, "loss": 1.3051, "nll_loss": 1.430357575416565, "rewards/accuracies": 0.625, "rewards/chosen": -0.09960115700960159, "rewards/margins": 0.032473124563694, "rewards/rejected": -0.1320742666721344, "step": 4617 }, { "epoch": 2.8171419856641755, "grad_norm": 1.1907134056091309, "learning_rate": 3.504715248009798e-06, "log_odds_chosen": 1.6690630912780762, "log_odds_ratio": -0.4494004249572754, "logits/chosen": -1.020179271697998, "logits/rejected": -1.0205014944076538, "logps/chosen": -0.7729852199554443, "logps/rejected": -2.061411142349243, "loss": 0.9937, "nll_loss": 0.9099340438842773, "rewards/accuracies": 0.75, "rewards/chosen": -0.07729852199554443, "rewards/margins": 0.1288425773382187, "rewards/rejected": -0.20614111423492432, "step": 4618 }, { "epoch": 2.8177520207411924, "grad_norm": 1.3103151321411133, "learning_rate": 3.503735456215554e-06, "log_odds_chosen": 1.9446834325790405, "log_odds_ratio": -0.3860318958759308, "logits/chosen": -1.0710115432739258, "logits/rejected": -1.073918342590332, "logps/chosen": -0.8659080266952515, "logps/rejected": -2.481705904006958, "loss": 1.1629, "nll_loss": 1.0118883848190308, "rewards/accuracies": 0.875, "rewards/chosen": -0.08659079670906067, "rewards/margins": 0.16157981753349304, "rewards/rejected": -0.2481706142425537, "step": 4619 }, { "epoch": 2.8183620558182096, "grad_norm": 1.5880826711654663, "learning_rate": 3.5027556644213102e-06, "log_odds_chosen": 1.6919422149658203, "log_odds_ratio": -0.3632825016975403, "logits/chosen": -0.9112833738327026, "logits/rejected": -1.0701377391815186, "logps/chosen": -0.8612338304519653, "logps/rejected": -2.112147808074951, "loss": 0.9932, "nll_loss": 1.0058778524398804, "rewards/accuracies": 0.75, "rewards/chosen": -0.08612339198589325, "rewards/margins": 0.12509138882160187, "rewards/rejected": -0.2112147957086563, "step": 4620 }, { "epoch": 2.8189720908952265, "grad_norm": 1.018266201019287, "learning_rate": 3.5017758726270668e-06, "log_odds_chosen": 2.08954119682312, "log_odds_ratio": -0.4319981634616852, "logits/chosen": -0.6182552576065063, "logits/rejected": -0.8752074837684631, "logps/chosen": -0.5906977653503418, "logps/rejected": -2.217874765396118, "loss": 0.9531, "nll_loss": 0.7524846196174622, "rewards/accuracies": 0.625, "rewards/chosen": -0.05906978249549866, "rewards/margins": 0.16271768510341644, "rewards/rejected": -0.2217874675989151, "step": 4621 }, { "epoch": 2.8195821259722433, "grad_norm": 1.3991966247558594, "learning_rate": 3.500796080832823e-06, "log_odds_chosen": 1.4122552871704102, "log_odds_ratio": -0.4370081424713135, "logits/chosen": -1.0365793704986572, "logits/rejected": -1.0661280155181885, "logps/chosen": -1.1139777898788452, "logps/rejected": -2.339614152908325, "loss": 1.1155, "nll_loss": 1.279041051864624, "rewards/accuracies": 0.875, "rewards/chosen": -0.11139778792858124, "rewards/margins": 0.12256363034248352, "rewards/rejected": -0.23396141827106476, "step": 4622 }, { "epoch": 2.82019216104926, "grad_norm": 2.459563732147217, "learning_rate": 3.499816289038579e-06, "log_odds_chosen": 0.9191660284996033, "log_odds_ratio": -0.5262426733970642, "logits/chosen": -0.7832268476486206, "logits/rejected": -0.8130401968955994, "logps/chosen": -0.7723280787467957, "logps/rejected": -1.4342409372329712, "loss": 0.9426, "nll_loss": 0.928542971611023, "rewards/accuracies": 0.625, "rewards/chosen": -0.07723280787467957, "rewards/margins": 0.06619128584861755, "rewards/rejected": -0.14342409372329712, "step": 4623 }, { "epoch": 2.8208021961262775, "grad_norm": 1.5880310535430908, "learning_rate": 3.4988364972443356e-06, "log_odds_chosen": 1.178919792175293, "log_odds_ratio": -0.7247116565704346, "logits/chosen": -0.9636845588684082, "logits/rejected": -0.9653195142745972, "logps/chosen": -0.8926889896392822, "logps/rejected": -1.4190880060195923, "loss": 1.1555, "nll_loss": 1.079925298690796, "rewards/accuracies": 0.375, "rewards/chosen": -0.08926889300346375, "rewards/margins": 0.052639905363321304, "rewards/rejected": -0.14190879464149475, "step": 4624 }, { "epoch": 2.8214122312032943, "grad_norm": 7.85286808013916, "learning_rate": 3.4978567054500917e-06, "log_odds_chosen": 0.6083558201789856, "log_odds_ratio": -0.47179850935935974, "logits/chosen": -1.0263426303863525, "logits/rejected": -1.0821146965026855, "logps/chosen": -1.1520018577575684, "logps/rejected": -1.5751532316207886, "loss": 1.273, "nll_loss": 1.2621204853057861, "rewards/accuracies": 0.875, "rewards/chosen": -0.11520017683506012, "rewards/margins": 0.04231514781713486, "rewards/rejected": -0.15751531720161438, "step": 4625 }, { "epoch": 2.822022266280311, "grad_norm": 1.651427984237671, "learning_rate": 3.4968769136558482e-06, "log_odds_chosen": 1.6681967973709106, "log_odds_ratio": -0.47870442271232605, "logits/chosen": -0.5139245986938477, "logits/rejected": -0.5682601928710938, "logps/chosen": -0.7408879399299622, "logps/rejected": -1.8925200700759888, "loss": 1.0129, "nll_loss": 0.7964959740638733, "rewards/accuracies": 0.625, "rewards/chosen": -0.07408878952264786, "rewards/margins": 0.11516322195529938, "rewards/rejected": -0.18925201892852783, "step": 4626 }, { "epoch": 2.822632301357328, "grad_norm": 2.7963333129882812, "learning_rate": 3.4958971218616043e-06, "log_odds_chosen": 3.150871515274048, "log_odds_ratio": -0.18464231491088867, "logits/chosen": -0.7719716429710388, "logits/rejected": -0.8547443151473999, "logps/chosen": -0.8276288509368896, "logps/rejected": -3.4127578735351562, "loss": 1.0556, "nll_loss": 1.1194638013839722, "rewards/accuracies": 1.0, "rewards/chosen": -0.08276288211345673, "rewards/margins": 0.2585129141807556, "rewards/rejected": -0.34127575159072876, "step": 4627 }, { "epoch": 2.823242336434345, "grad_norm": 2.132835865020752, "learning_rate": 3.4949173300673605e-06, "log_odds_chosen": 1.6792681217193604, "log_odds_ratio": -0.4075867235660553, "logits/chosen": -0.9352469444274902, "logits/rejected": -0.940011739730835, "logps/chosen": -0.8646723031997681, "logps/rejected": -2.125005006790161, "loss": 1.2366, "nll_loss": 1.062908411026001, "rewards/accuracies": 0.75, "rewards/chosen": -0.08646723628044128, "rewards/margins": 0.1260332614183426, "rewards/rejected": -0.21250051259994507, "step": 4628 }, { "epoch": 2.8238523715113617, "grad_norm": 1.4287797212600708, "learning_rate": 3.493937538273117e-06, "log_odds_chosen": 1.1126165390014648, "log_odds_ratio": -0.5483852624893188, "logits/chosen": -0.692092776298523, "logits/rejected": -0.7594295740127563, "logps/chosen": -0.8620854020118713, "logps/rejected": -1.695978045463562, "loss": 0.986, "nll_loss": 0.8269087076187134, "rewards/accuracies": 0.75, "rewards/chosen": -0.0862085372209549, "rewards/margins": 0.0833892747759819, "rewards/rejected": -0.1695978045463562, "step": 4629 }, { "epoch": 2.824462406588379, "grad_norm": 2.2045536041259766, "learning_rate": 3.4929577464788727e-06, "log_odds_chosen": 1.2966853380203247, "log_odds_ratio": -0.682610034942627, "logits/chosen": -0.8423665165901184, "logits/rejected": -0.8613373637199402, "logps/chosen": -0.8015178442001343, "logps/rejected": -1.8824350833892822, "loss": 1.0896, "nll_loss": 0.9837902784347534, "rewards/accuracies": 0.625, "rewards/chosen": -0.08015178889036179, "rewards/margins": 0.10809171944856644, "rewards/rejected": -0.18824350833892822, "step": 4630 }, { "epoch": 2.825072441665396, "grad_norm": 2.5472006797790527, "learning_rate": 3.4919779546846293e-06, "log_odds_chosen": 1.7165724039077759, "log_odds_ratio": -0.5198041200637817, "logits/chosen": -1.008506417274475, "logits/rejected": -0.9417762160301208, "logps/chosen": -0.8433024883270264, "logps/rejected": -2.378091335296631, "loss": 1.0577, "nll_loss": 1.034313440322876, "rewards/accuracies": 0.75, "rewards/chosen": -0.0843302458524704, "rewards/margins": 0.15347887575626373, "rewards/rejected": -0.23780912160873413, "step": 4631 }, { "epoch": 2.8256824767424127, "grad_norm": 1.875594139099121, "learning_rate": 3.490998162890386e-06, "log_odds_chosen": 3.117136240005493, "log_odds_ratio": -0.4029403626918793, "logits/chosen": -0.6555665731430054, "logits/rejected": -0.83842933177948, "logps/chosen": -0.6456608772277832, "logps/rejected": -3.2453203201293945, "loss": 1.1119, "nll_loss": 0.8536845445632935, "rewards/accuracies": 0.625, "rewards/chosen": -0.06456609070301056, "rewards/margins": 0.2599659562110901, "rewards/rejected": -0.32453203201293945, "step": 4632 }, { "epoch": 2.8262925118194295, "grad_norm": 3.6190006732940674, "learning_rate": 3.490018371096142e-06, "log_odds_chosen": 1.9704338312149048, "log_odds_ratio": -0.5470579862594604, "logits/chosen": -0.8765645623207092, "logits/rejected": -0.9607752561569214, "logps/chosen": -0.7570199966430664, "logps/rejected": -2.3527719974517822, "loss": 0.8881, "nll_loss": 0.9118293523788452, "rewards/accuracies": 0.625, "rewards/chosen": -0.075702004134655, "rewards/margins": 0.15957516431808472, "rewards/rejected": -0.2352771759033203, "step": 4633 }, { "epoch": 2.826902546896447, "grad_norm": 1.2473512887954712, "learning_rate": 3.489038579301898e-06, "log_odds_chosen": 1.2804293632507324, "log_odds_ratio": -0.4708091616630554, "logits/chosen": -0.680458664894104, "logits/rejected": -0.7443772554397583, "logps/chosen": -0.8731750249862671, "logps/rejected": -1.8930814266204834, "loss": 1.0177, "nll_loss": 0.8661898374557495, "rewards/accuracies": 0.75, "rewards/chosen": -0.08731749653816223, "rewards/margins": 0.10199064761400223, "rewards/rejected": -0.18930815160274506, "step": 4634 }, { "epoch": 2.8275125819734637, "grad_norm": 1.5085910558700562, "learning_rate": 3.4880587875076546e-06, "log_odds_chosen": 0.7513722777366638, "log_odds_ratio": -0.4787384867668152, "logits/chosen": -0.9548038244247437, "logits/rejected": -0.9846645593643188, "logps/chosen": -0.8182184100151062, "logps/rejected": -1.408191204071045, "loss": 1.061, "nll_loss": 0.9907821416854858, "rewards/accuracies": 0.625, "rewards/chosen": -0.08182183653116226, "rewards/margins": 0.05899728834629059, "rewards/rejected": -0.14081913232803345, "step": 4635 }, { "epoch": 2.8281226170504805, "grad_norm": 1.3373528718948364, "learning_rate": 3.4870789957134107e-06, "log_odds_chosen": 2.3192148208618164, "log_odds_ratio": -0.3330930173397064, "logits/chosen": -0.6772927045822144, "logits/rejected": -0.7817517518997192, "logps/chosen": -0.6762402057647705, "logps/rejected": -2.3647923469543457, "loss": 0.8159, "nll_loss": 0.8215938210487366, "rewards/accuracies": 0.875, "rewards/chosen": -0.06762401759624481, "rewards/margins": 0.1688552051782608, "rewards/rejected": -0.23647922277450562, "step": 4636 }, { "epoch": 2.8287326521274974, "grad_norm": 2.88202166557312, "learning_rate": 3.486099203919167e-06, "log_odds_chosen": 2.34074330329895, "log_odds_ratio": -0.48105427622795105, "logits/chosen": -0.8005273938179016, "logits/rejected": -0.7966349124908447, "logps/chosen": -0.7043920159339905, "logps/rejected": -2.518932342529297, "loss": 1.125, "nll_loss": 0.8999406695365906, "rewards/accuracies": 0.625, "rewards/chosen": -0.07043920457363129, "rewards/margins": 0.18145403265953064, "rewards/rejected": -0.25189322233200073, "step": 4637 }, { "epoch": 2.829342687204514, "grad_norm": 2.343454360961914, "learning_rate": 3.4851194121249234e-06, "log_odds_chosen": 2.8965489864349365, "log_odds_ratio": -0.31124550104141235, "logits/chosen": -0.6418617367744446, "logits/rejected": -0.9790821075439453, "logps/chosen": -0.5668884515762329, "logps/rejected": -2.829651117324829, "loss": 0.8867, "nll_loss": 0.6284676790237427, "rewards/accuracies": 0.875, "rewards/chosen": -0.05668884888291359, "rewards/margins": 0.22627627849578857, "rewards/rejected": -0.28296512365341187, "step": 4638 }, { "epoch": 2.829952722281531, "grad_norm": 1.6269359588623047, "learning_rate": 3.4841396203306795e-06, "log_odds_chosen": 1.3638978004455566, "log_odds_ratio": -0.48127344250679016, "logits/chosen": -0.752869725227356, "logits/rejected": -0.8561464548110962, "logps/chosen": -0.8464473485946655, "logps/rejected": -1.7272385358810425, "loss": 1.1964, "nll_loss": 0.9938735365867615, "rewards/accuracies": 0.75, "rewards/chosen": -0.08464473485946655, "rewards/margins": 0.08807911723852158, "rewards/rejected": -0.17272385954856873, "step": 4639 }, { "epoch": 2.830562757358548, "grad_norm": 3.2352359294891357, "learning_rate": 3.483159828536436e-06, "log_odds_chosen": 2.102930784225464, "log_odds_ratio": -0.23030820488929749, "logits/chosen": -0.6839175820350647, "logits/rejected": -0.8087441921234131, "logps/chosen": -0.721922755241394, "logps/rejected": -2.2618484497070312, "loss": 1.2335, "nll_loss": 1.143981695175171, "rewards/accuracies": 1.0, "rewards/chosen": -0.07219227403402328, "rewards/margins": 0.15399256348609924, "rewards/rejected": -0.22618484497070312, "step": 4640 }, { "epoch": 2.831172792435565, "grad_norm": 1.646774411201477, "learning_rate": 3.482180036742192e-06, "log_odds_chosen": 2.8463518619537354, "log_odds_ratio": -0.16389347612857819, "logits/chosen": -0.6762018203735352, "logits/rejected": -1.04180109500885, "logps/chosen": -1.000279188156128, "logps/rejected": -3.2972939014434814, "loss": 1.1596, "nll_loss": 1.1102544069290161, "rewards/accuracies": 1.0, "rewards/chosen": -0.10002791881561279, "rewards/margins": 0.22970148921012878, "rewards/rejected": -0.3297294080257416, "step": 4641 }, { "epoch": 2.831782827512582, "grad_norm": 1.7773993015289307, "learning_rate": 3.4812002449479483e-06, "log_odds_chosen": 1.8407025337219238, "log_odds_ratio": -0.4313456118106842, "logits/chosen": -0.8236867785453796, "logits/rejected": -0.886755108833313, "logps/chosen": -0.831298828125, "logps/rejected": -2.284090757369995, "loss": 0.9497, "nll_loss": 0.9133415222167969, "rewards/accuracies": 0.625, "rewards/chosen": -0.0831298828125, "rewards/margins": 0.1452791690826416, "rewards/rejected": -0.2284090667963028, "step": 4642 }, { "epoch": 2.832392862589599, "grad_norm": 1.350496530532837, "learning_rate": 3.480220453153705e-06, "log_odds_chosen": 0.6723432540893555, "log_odds_ratio": -0.5833415985107422, "logits/chosen": -0.9088931679725647, "logits/rejected": -0.9084885120391846, "logps/chosen": -0.9012933969497681, "logps/rejected": -1.3967721462249756, "loss": 0.9837, "nll_loss": 1.0251457691192627, "rewards/accuracies": 0.625, "rewards/chosen": -0.09012934565544128, "rewards/margins": 0.04954787343740463, "rewards/rejected": -0.13967721164226532, "step": 4643 }, { "epoch": 2.8330028976666157, "grad_norm": 1.935200572013855, "learning_rate": 3.479240661359461e-06, "log_odds_chosen": 1.3765922784805298, "log_odds_ratio": -0.508991003036499, "logits/chosen": -0.7976958751678467, "logits/rejected": -0.9048987030982971, "logps/chosen": -0.9532507061958313, "logps/rejected": -2.0206680297851562, "loss": 1.0283, "nll_loss": 1.0031914710998535, "rewards/accuracies": 0.75, "rewards/chosen": -0.09532507508993149, "rewards/margins": 0.10674172639846802, "rewards/rejected": -0.2020668089389801, "step": 4644 }, { "epoch": 2.833612932743633, "grad_norm": 1.718027114868164, "learning_rate": 3.478260869565217e-06, "log_odds_chosen": 1.4771071672439575, "log_odds_ratio": -0.5849807262420654, "logits/chosen": -1.0080690383911133, "logits/rejected": -0.9238336086273193, "logps/chosen": -0.8287710547447205, "logps/rejected": -2.094132900238037, "loss": 0.9986, "nll_loss": 0.9152686595916748, "rewards/accuracies": 0.75, "rewards/chosen": -0.08287710696458817, "rewards/margins": 0.12653620541095734, "rewards/rejected": -0.2094133049249649, "step": 4645 }, { "epoch": 2.83422296782065, "grad_norm": 2.5091726779937744, "learning_rate": 3.4772810777709736e-06, "log_odds_chosen": 0.6259745359420776, "log_odds_ratio": -0.5877653360366821, "logits/chosen": -1.1053128242492676, "logits/rejected": -1.043248176574707, "logps/chosen": -0.8954771757125854, "logps/rejected": -1.3094673156738281, "loss": 1.1965, "nll_loss": 1.3507905006408691, "rewards/accuracies": 0.5, "rewards/chosen": -0.08954771608114243, "rewards/margins": 0.0413990244269371, "rewards/rejected": -0.13094674050807953, "step": 4646 }, { "epoch": 2.8348330028976667, "grad_norm": 1.574626088142395, "learning_rate": 3.47630128597673e-06, "log_odds_chosen": 2.0056660175323486, "log_odds_ratio": -0.31580930948257446, "logits/chosen": -0.578697144985199, "logits/rejected": -0.5100284218788147, "logps/chosen": -0.5627830624580383, "logps/rejected": -2.0011017322540283, "loss": 1.0603, "nll_loss": 0.6961191296577454, "rewards/accuracies": 1.0, "rewards/chosen": -0.056278303265571594, "rewards/margins": 0.14383187890052795, "rewards/rejected": -0.20011018216609955, "step": 4647 }, { "epoch": 2.8354430379746836, "grad_norm": 2.197462797164917, "learning_rate": 3.475321494182486e-06, "log_odds_chosen": 3.1897337436676025, "log_odds_ratio": -0.2868579030036926, "logits/chosen": -0.7607067823410034, "logits/rejected": -1.035091757774353, "logps/chosen": -0.7526108622550964, "logps/rejected": -3.2826809883117676, "loss": 0.9693, "nll_loss": 0.9247549772262573, "rewards/accuracies": 0.75, "rewards/chosen": -0.07526107877492905, "rewards/margins": 0.25300702452659607, "rewards/rejected": -0.3282681107521057, "step": 4648 }, { "epoch": 2.8360530730517004, "grad_norm": 5.468931198120117, "learning_rate": 3.4743417023882424e-06, "log_odds_chosen": 0.3790980577468872, "log_odds_ratio": -0.695898175239563, "logits/chosen": -0.8182048797607422, "logits/rejected": -0.8546777963638306, "logps/chosen": -0.9755640029907227, "logps/rejected": -1.3992465734481812, "loss": 1.0282, "nll_loss": 1.0809251070022583, "rewards/accuracies": 0.625, "rewards/chosen": -0.09755639731884003, "rewards/margins": 0.04236824810504913, "rewards/rejected": -0.13992464542388916, "step": 4649 }, { "epoch": 2.8366631081287172, "grad_norm": 1.2225533723831177, "learning_rate": 3.4733619105939985e-06, "log_odds_chosen": 2.8016610145568848, "log_odds_ratio": -0.24456238746643066, "logits/chosen": -0.49347078800201416, "logits/rejected": -0.8319264650344849, "logps/chosen": -0.4902530610561371, "logps/rejected": -2.5951948165893555, "loss": 0.8012, "nll_loss": 0.6974688172340393, "rewards/accuracies": 1.0, "rewards/chosen": -0.04902530461549759, "rewards/margins": 0.21049417555332184, "rewards/rejected": -0.2595194876194, "step": 4650 }, { "epoch": 2.837273143205734, "grad_norm": 2.30283522605896, "learning_rate": 3.4723821187997547e-06, "log_odds_chosen": 2.423614501953125, "log_odds_ratio": -0.3838559091091156, "logits/chosen": -0.6847015619277954, "logits/rejected": -0.9174224138259888, "logps/chosen": -0.752544641494751, "logps/rejected": -2.7808141708374023, "loss": 1.1447, "nll_loss": 0.932357668876648, "rewards/accuracies": 0.875, "rewards/chosen": -0.07525446265935898, "rewards/margins": 0.20282696187496185, "rewards/rejected": -0.27808141708374023, "step": 4651 }, { "epoch": 2.8378831782827514, "grad_norm": 1.702959418296814, "learning_rate": 3.4714023270055112e-06, "log_odds_chosen": 1.813982367515564, "log_odds_ratio": -0.4212331175804138, "logits/chosen": -0.8780667781829834, "logits/rejected": -0.9788643717765808, "logps/chosen": -0.8509489893913269, "logps/rejected": -2.0941507816314697, "loss": 1.0234, "nll_loss": 1.2262195348739624, "rewards/accuracies": 0.625, "rewards/chosen": -0.0850948914885521, "rewards/margins": 0.12432019412517548, "rewards/rejected": -0.20941510796546936, "step": 4652 }, { "epoch": 2.8384932133597682, "grad_norm": 1.8405424356460571, "learning_rate": 3.4704225352112673e-06, "log_odds_chosen": 0.46173524856567383, "log_odds_ratio": -0.5916864275932312, "logits/chosen": -0.9556266069412231, "logits/rejected": -0.9918477535247803, "logps/chosen": -0.9979019165039062, "logps/rejected": -1.3362846374511719, "loss": 1.0141, "nll_loss": 1.12202787399292, "rewards/accuracies": 0.5, "rewards/chosen": -0.09979019314050674, "rewards/margins": 0.03383827582001686, "rewards/rejected": -0.1336284726858139, "step": 4653 }, { "epoch": 2.839103248436785, "grad_norm": 2.449151039123535, "learning_rate": 3.469442743417024e-06, "log_odds_chosen": 1.8961622714996338, "log_odds_ratio": -0.4499339759349823, "logits/chosen": -0.961737871170044, "logits/rejected": -1.1648380756378174, "logps/chosen": -1.1407999992370605, "logps/rejected": -2.6565027236938477, "loss": 0.9331, "nll_loss": 1.0612685680389404, "rewards/accuracies": 0.75, "rewards/chosen": -0.11407999694347382, "rewards/margins": 0.15157032012939453, "rewards/rejected": -0.26565030217170715, "step": 4654 }, { "epoch": 2.839713283513802, "grad_norm": 2.5445656776428223, "learning_rate": 3.46846295162278e-06, "log_odds_chosen": 3.1251988410949707, "log_odds_ratio": -0.2677082419395447, "logits/chosen": -0.5703871250152588, "logits/rejected": -0.8330269455909729, "logps/chosen": -0.7809240818023682, "logps/rejected": -3.4296371936798096, "loss": 0.9983, "nll_loss": 1.0144753456115723, "rewards/accuracies": 0.875, "rewards/chosen": -0.07809240370988846, "rewards/margins": 0.2648712992668152, "rewards/rejected": -0.3429637551307678, "step": 4655 }, { "epoch": 2.840323318590819, "grad_norm": 1.668667197227478, "learning_rate": 3.467483159828536e-06, "log_odds_chosen": 3.2319605350494385, "log_odds_ratio": -0.31961363554000854, "logits/chosen": -0.9476999044418335, "logits/rejected": -1.054500937461853, "logps/chosen": -0.7845903635025024, "logps/rejected": -3.5067756175994873, "loss": 1.1398, "nll_loss": 1.0937010049819946, "rewards/accuracies": 0.75, "rewards/chosen": -0.07845903933048248, "rewards/margins": 0.2722185254096985, "rewards/rejected": -0.35067757964134216, "step": 4656 }, { "epoch": 2.840933353667836, "grad_norm": 1.618456482887268, "learning_rate": 3.4665033680342927e-06, "log_odds_chosen": 2.408134937286377, "log_odds_ratio": -0.42890802025794983, "logits/chosen": -0.9138023257255554, "logits/rejected": -0.9294195771217346, "logps/chosen": -0.7020872235298157, "logps/rejected": -2.573848247528076, "loss": 1.1384, "nll_loss": 1.263524055480957, "rewards/accuracies": 0.875, "rewards/chosen": -0.07020872831344604, "rewards/margins": 0.18717610836029053, "rewards/rejected": -0.2573848366737366, "step": 4657 }, { "epoch": 2.841543388744853, "grad_norm": 1.1810322999954224, "learning_rate": 3.465523576240049e-06, "log_odds_chosen": 1.7912821769714355, "log_odds_ratio": -0.3706918954849243, "logits/chosen": -0.8259691596031189, "logits/rejected": -0.8212527632713318, "logps/chosen": -0.7646366953849792, "logps/rejected": -2.1431498527526855, "loss": 0.8817, "nll_loss": 0.9201536178588867, "rewards/accuracies": 0.75, "rewards/chosen": -0.07646366953849792, "rewards/margins": 0.13785132765769958, "rewards/rejected": -0.2143149971961975, "step": 4658 }, { "epoch": 2.8421534238218698, "grad_norm": 1.3915072679519653, "learning_rate": 3.464543784445805e-06, "log_odds_chosen": 3.880291700363159, "log_odds_ratio": -0.24331825971603394, "logits/chosen": -0.8344465494155884, "logits/rejected": -1.1413898468017578, "logps/chosen": -0.6954692006111145, "logps/rejected": -4.039295673370361, "loss": 1.0972, "nll_loss": 0.9976822733879089, "rewards/accuracies": 0.875, "rewards/chosen": -0.06954692304134369, "rewards/margins": 0.3343826234340668, "rewards/rejected": -0.40392956137657166, "step": 4659 }, { "epoch": 2.8427634588988866, "grad_norm": 1.222909688949585, "learning_rate": 3.4635639926515615e-06, "log_odds_chosen": 2.5290584564208984, "log_odds_ratio": -0.3377207815647125, "logits/chosen": -0.8373589515686035, "logits/rejected": -0.9557527303695679, "logps/chosen": -0.739123523235321, "logps/rejected": -2.7498672008514404, "loss": 0.9933, "nll_loss": 0.8681974411010742, "rewards/accuracies": 0.75, "rewards/chosen": -0.0739123523235321, "rewards/margins": 0.20107439160346985, "rewards/rejected": -0.27498674392700195, "step": 4660 }, { "epoch": 2.8433734939759034, "grad_norm": 2.6881117820739746, "learning_rate": 3.462584200857318e-06, "log_odds_chosen": 3.0827677249908447, "log_odds_ratio": -0.26096054911613464, "logits/chosen": -0.7810370922088623, "logits/rejected": -0.9241983890533447, "logps/chosen": -0.6447095274925232, "logps/rejected": -3.126241683959961, "loss": 1.079, "nll_loss": 1.2768291234970093, "rewards/accuracies": 1.0, "rewards/chosen": -0.06447095423936844, "rewards/margins": 0.2481532096862793, "rewards/rejected": -0.31262415647506714, "step": 4661 }, { "epoch": 2.8439835290529203, "grad_norm": 4.929953098297119, "learning_rate": 3.4616044090630737e-06, "log_odds_chosen": 2.7078051567077637, "log_odds_ratio": -0.4281885325908661, "logits/chosen": -0.9081869721412659, "logits/rejected": -1.0841844081878662, "logps/chosen": -0.7567518949508667, "logps/rejected": -3.0320446491241455, "loss": 1.0402, "nll_loss": 0.9727827906608582, "rewards/accuracies": 0.625, "rewards/chosen": -0.07567518949508667, "rewards/margins": 0.22752927243709564, "rewards/rejected": -0.3032044768333435, "step": 4662 }, { "epoch": 2.8445935641299376, "grad_norm": 4.256538391113281, "learning_rate": 3.4606246172688303e-06, "log_odds_chosen": 1.5320804119110107, "log_odds_ratio": -0.6338683366775513, "logits/chosen": -0.8450276255607605, "logits/rejected": -0.8731849193572998, "logps/chosen": -0.7599750757217407, "logps/rejected": -2.027496337890625, "loss": 1.2049, "nll_loss": 1.0010725259780884, "rewards/accuracies": 0.625, "rewards/chosen": -0.07599750906229019, "rewards/margins": 0.12675213813781738, "rewards/rejected": -0.20274963974952698, "step": 4663 }, { "epoch": 2.8452035992069544, "grad_norm": 3.964792013168335, "learning_rate": 3.459644825474587e-06, "log_odds_chosen": 1.7350040674209595, "log_odds_ratio": -0.3134107291698456, "logits/chosen": -0.883538007736206, "logits/rejected": -0.9654916524887085, "logps/chosen": -0.977534294128418, "logps/rejected": -2.2868645191192627, "loss": 1.2576, "nll_loss": 1.1628124713897705, "rewards/accuracies": 0.875, "rewards/chosen": -0.09775342792272568, "rewards/margins": 0.13093301653862, "rewards/rejected": -0.22868645191192627, "step": 4664 }, { "epoch": 2.8458136342839713, "grad_norm": 8.4296875, "learning_rate": 3.4586650336803425e-06, "log_odds_chosen": 2.1692326068878174, "log_odds_ratio": -0.3147423565387726, "logits/chosen": -0.7465489506721497, "logits/rejected": -0.7912113070487976, "logps/chosen": -0.6295842528343201, "logps/rejected": -2.2702460289001465, "loss": 1.0564, "nll_loss": 0.7962971329689026, "rewards/accuracies": 0.875, "rewards/chosen": -0.06295841932296753, "rewards/margins": 0.16406619548797607, "rewards/rejected": -0.2270246148109436, "step": 4665 }, { "epoch": 2.846423669360988, "grad_norm": 1.8431190252304077, "learning_rate": 3.457685241886099e-06, "log_odds_chosen": 3.2879106998443604, "log_odds_ratio": -0.22447913885116577, "logits/chosen": -0.6591951847076416, "logits/rejected": -0.847791850566864, "logps/chosen": -0.5865495204925537, "logps/rejected": -3.040092945098877, "loss": 1.0377, "nll_loss": 1.0482271909713745, "rewards/accuracies": 1.0, "rewards/chosen": -0.05865495651960373, "rewards/margins": 0.24535433948040009, "rewards/rejected": -0.3040092885494232, "step": 4666 }, { "epoch": 2.8470337044380054, "grad_norm": 2.0622057914733887, "learning_rate": 3.4567054500918556e-06, "log_odds_chosen": 3.112989664077759, "log_odds_ratio": -0.4220215678215027, "logits/chosen": -0.8961992859840393, "logits/rejected": -1.0411893129348755, "logps/chosen": -0.828693151473999, "logps/rejected": -3.7027828693389893, "loss": 1.1403, "nll_loss": 1.0086448192596436, "rewards/accuracies": 0.75, "rewards/chosen": -0.08286930620670319, "rewards/margins": 0.2874089479446411, "rewards/rejected": -0.3702782690525055, "step": 4667 }, { "epoch": 2.8476437395150223, "grad_norm": 6.17278528213501, "learning_rate": 3.4557256582976117e-06, "log_odds_chosen": 0.9066886901855469, "log_odds_ratio": -0.62295001745224, "logits/chosen": -0.8237586617469788, "logits/rejected": -0.7235990166664124, "logps/chosen": -1.075059413909912, "logps/rejected": -1.864037275314331, "loss": 0.9575, "nll_loss": 1.1731611490249634, "rewards/accuracies": 0.625, "rewards/chosen": -0.10750594735145569, "rewards/margins": 0.07889778167009354, "rewards/rejected": -0.18640372157096863, "step": 4668 }, { "epoch": 2.848253774592039, "grad_norm": 2.355715274810791, "learning_rate": 3.454745866503368e-06, "log_odds_chosen": 4.179841041564941, "log_odds_ratio": -0.45849424600601196, "logits/chosen": -0.8126861453056335, "logits/rejected": -0.9765125513076782, "logps/chosen": -0.7838845252990723, "logps/rejected": -4.654775619506836, "loss": 1.1412, "nll_loss": 1.1761728525161743, "rewards/accuracies": 0.75, "rewards/chosen": -0.07838845998048782, "rewards/margins": 0.3870891332626343, "rewards/rejected": -0.4654775857925415, "step": 4669 }, { "epoch": 2.848863809669056, "grad_norm": 2.236241340637207, "learning_rate": 3.453766074709124e-06, "log_odds_chosen": 2.3231253623962402, "log_odds_ratio": -0.3342353403568268, "logits/chosen": -0.7936918139457703, "logits/rejected": -1.0527108907699585, "logps/chosen": -0.626011848449707, "logps/rejected": -2.370281457901001, "loss": 1.0491, "nll_loss": 1.0313345193862915, "rewards/accuracies": 0.75, "rewards/chosen": -0.06260118633508682, "rewards/margins": 0.17442697286605835, "rewards/rejected": -0.23702815175056458, "step": 4670 }, { "epoch": 2.849473844746073, "grad_norm": 0.9638728499412537, "learning_rate": 3.4527862829148805e-06, "log_odds_chosen": 2.3332958221435547, "log_odds_ratio": -0.4078437089920044, "logits/chosen": -0.8317798376083374, "logits/rejected": -0.9836122989654541, "logps/chosen": -0.7401646375656128, "logps/rejected": -2.6174399852752686, "loss": 1.065, "nll_loss": 0.9002243280410767, "rewards/accuracies": 0.75, "rewards/chosen": -0.07401645928621292, "rewards/margins": 0.18772754073143005, "rewards/rejected": -0.2617439925670624, "step": 4671 }, { "epoch": 2.8500838798230896, "grad_norm": 3.2638652324676514, "learning_rate": 3.4518064911206366e-06, "log_odds_chosen": 0.45698195695877075, "log_odds_ratio": -0.7629906535148621, "logits/chosen": -1.0021077394485474, "logits/rejected": -0.9696801900863647, "logps/chosen": -0.9264622926712036, "logps/rejected": -1.218898057937622, "loss": 1.3246, "nll_loss": 1.383286714553833, "rewards/accuracies": 0.375, "rewards/chosen": -0.09264623373746872, "rewards/margins": 0.029243575409054756, "rewards/rejected": -0.12188980728387833, "step": 4672 }, { "epoch": 2.8506939149001065, "grad_norm": 5.16361665725708, "learning_rate": 3.4508266993263927e-06, "log_odds_chosen": 2.1871862411499023, "log_odds_ratio": -0.5487390160560608, "logits/chosen": -0.8912314176559448, "logits/rejected": -1.025808334350586, "logps/chosen": -0.7390639781951904, "logps/rejected": -2.562331199645996, "loss": 0.9589, "nll_loss": 0.9518666863441467, "rewards/accuracies": 0.5, "rewards/chosen": -0.07390639930963516, "rewards/margins": 0.18232674896717072, "rewards/rejected": -0.2562331557273865, "step": 4673 }, { "epoch": 2.8513039499771238, "grad_norm": 1.264174461364746, "learning_rate": 3.4498469075321493e-06, "log_odds_chosen": 0.9863030910491943, "log_odds_ratio": -0.5471272468566895, "logits/chosen": -0.8838680982589722, "logits/rejected": -0.8246203660964966, "logps/chosen": -0.6051691770553589, "logps/rejected": -1.1389892101287842, "loss": 0.9111, "nll_loss": 0.9681228995323181, "rewards/accuracies": 0.5, "rewards/chosen": -0.06051691994071007, "rewards/margins": 0.05338200554251671, "rewards/rejected": -0.11389892548322678, "step": 4674 }, { "epoch": 2.8519139850541406, "grad_norm": 1.3201795816421509, "learning_rate": 3.448867115737906e-06, "log_odds_chosen": 0.7623894214630127, "log_odds_ratio": -0.44196707010269165, "logits/chosen": -1.0056968927383423, "logits/rejected": -0.8614119291305542, "logps/chosen": -1.0060713291168213, "logps/rejected": -1.4218940734863281, "loss": 1.131, "nll_loss": 1.2441725730895996, "rewards/accuracies": 0.75, "rewards/chosen": -0.10060712695121765, "rewards/margins": 0.041582271456718445, "rewards/rejected": -0.1421893984079361, "step": 4675 }, { "epoch": 2.8525240201311575, "grad_norm": 1.3927552700042725, "learning_rate": 3.4478873239436615e-06, "log_odds_chosen": 1.7214815616607666, "log_odds_ratio": -0.30965566635131836, "logits/chosen": -0.7960861325263977, "logits/rejected": -0.9625847339630127, "logps/chosen": -0.5943416357040405, "logps/rejected": -1.834753394126892, "loss": 0.929, "nll_loss": 0.9321002960205078, "rewards/accuracies": 1.0, "rewards/chosen": -0.05943416804075241, "rewards/margins": 0.12404116988182068, "rewards/rejected": -0.1834753453731537, "step": 4676 }, { "epoch": 2.8531340552081743, "grad_norm": 1.3838340044021606, "learning_rate": 3.446907532149418e-06, "log_odds_chosen": 2.4053521156311035, "log_odds_ratio": -0.3757248520851135, "logits/chosen": -0.6673524379730225, "logits/rejected": -0.8847540616989136, "logps/chosen": -0.8648390769958496, "logps/rejected": -2.7033140659332275, "loss": 1.0912, "nll_loss": 0.8672690391540527, "rewards/accuracies": 0.875, "rewards/chosen": -0.0864839106798172, "rewards/margins": 0.18384748697280884, "rewards/rejected": -0.27033141255378723, "step": 4677 }, { "epoch": 2.8537440902851916, "grad_norm": 1.6255464553833008, "learning_rate": 3.4459277403551746e-06, "log_odds_chosen": 1.5019142627716064, "log_odds_ratio": -0.5306975245475769, "logits/chosen": -0.8484649658203125, "logits/rejected": -0.8248252272605896, "logps/chosen": -0.854148268699646, "logps/rejected": -2.101569890975952, "loss": 1.0906, "nll_loss": 1.061193585395813, "rewards/accuracies": 0.625, "rewards/chosen": -0.0854148343205452, "rewards/margins": 0.12474215030670166, "rewards/rejected": -0.21015700697898865, "step": 4678 }, { "epoch": 2.8543541253622084, "grad_norm": 2.3625528812408447, "learning_rate": 3.4449479485609303e-06, "log_odds_chosen": 2.75003719329834, "log_odds_ratio": -0.27546897530555725, "logits/chosen": -0.8678892254829407, "logits/rejected": -1.048187017440796, "logps/chosen": -0.5252588391304016, "logps/rejected": -2.682528495788574, "loss": 1.1469, "nll_loss": 0.8304341435432434, "rewards/accuracies": 0.875, "rewards/chosen": -0.05252588540315628, "rewards/margins": 0.21572697162628174, "rewards/rejected": -0.2682528495788574, "step": 4679 }, { "epoch": 2.8549641604392253, "grad_norm": 1.1931571960449219, "learning_rate": 3.443968156766687e-06, "log_odds_chosen": 1.9076772928237915, "log_odds_ratio": -0.7688924670219421, "logits/chosen": -0.9276978373527527, "logits/rejected": -1.004381775856018, "logps/chosen": -1.089669108390808, "logps/rejected": -2.806502342224121, "loss": 1.1445, "nll_loss": 1.4213000535964966, "rewards/accuracies": 0.5, "rewards/chosen": -0.10896691679954529, "rewards/margins": 0.17168331146240234, "rewards/rejected": -0.28065022826194763, "step": 4680 }, { "epoch": 2.855574195516242, "grad_norm": 2.0019724369049072, "learning_rate": 3.4429883649724434e-06, "log_odds_chosen": 2.0557303428649902, "log_odds_ratio": -0.30573591589927673, "logits/chosen": -0.8061159253120422, "logits/rejected": -0.9111135601997375, "logps/chosen": -0.7175772786140442, "logps/rejected": -2.186728000640869, "loss": 1.0577, "nll_loss": 0.9842110872268677, "rewards/accuracies": 0.875, "rewards/chosen": -0.0717577263712883, "rewards/margins": 0.14691506326198578, "rewards/rejected": -0.21867281198501587, "step": 4681 }, { "epoch": 2.856184230593259, "grad_norm": 1.4326648712158203, "learning_rate": 3.4420085731781995e-06, "log_odds_chosen": 2.8967106342315674, "log_odds_ratio": -0.4941931962966919, "logits/chosen": -0.9479913115501404, "logits/rejected": -0.9658833742141724, "logps/chosen": -0.7727950811386108, "logps/rejected": -3.3843657970428467, "loss": 1.1383, "nll_loss": 0.9803435206413269, "rewards/accuracies": 0.625, "rewards/chosen": -0.07727950811386108, "rewards/margins": 0.2611571252346039, "rewards/rejected": -0.33843663334846497, "step": 4682 }, { "epoch": 2.856794265670276, "grad_norm": 6.158348560333252, "learning_rate": 3.4410287813839557e-06, "log_odds_chosen": 0.2903623580932617, "log_odds_ratio": -0.6775332689285278, "logits/chosen": -0.7930701971054077, "logits/rejected": -0.8246480226516724, "logps/chosen": -0.9566033482551575, "logps/rejected": -1.1914218664169312, "loss": 0.9836, "nll_loss": 1.0734816789627075, "rewards/accuracies": 0.25, "rewards/chosen": -0.09566032886505127, "rewards/margins": 0.023481857031583786, "rewards/rejected": -0.11914218962192535, "step": 4683 }, { "epoch": 2.857404300747293, "grad_norm": 8.725096702575684, "learning_rate": 3.440048989589712e-06, "log_odds_chosen": 0.9810271263122559, "log_odds_ratio": -0.5623549818992615, "logits/chosen": -0.903126060962677, "logits/rejected": -0.9256725907325745, "logps/chosen": -1.0007882118225098, "logps/rejected": -1.5644811391830444, "loss": 0.9025, "nll_loss": 0.9370281100273132, "rewards/accuracies": 0.625, "rewards/chosen": -0.10007882118225098, "rewards/margins": 0.056369297206401825, "rewards/rejected": -0.1564481258392334, "step": 4684 }, { "epoch": 2.85801433582431, "grad_norm": 4.9005208015441895, "learning_rate": 3.4390691977954683e-06, "log_odds_chosen": 2.0075109004974365, "log_odds_ratio": -0.34166640043258667, "logits/chosen": -0.9081461429595947, "logits/rejected": -0.9399739503860474, "logps/chosen": -0.5940542221069336, "logps/rejected": -2.0132060050964355, "loss": 1.1131, "nll_loss": 0.8169875144958496, "rewards/accuracies": 0.75, "rewards/chosen": -0.05940542742609978, "rewards/margins": 0.1419151872396469, "rewards/rejected": -0.2013206034898758, "step": 4685 }, { "epoch": 2.858624370901327, "grad_norm": 1.6984214782714844, "learning_rate": 3.4380894060012245e-06, "log_odds_chosen": 1.1342931985855103, "log_odds_ratio": -0.6037877202033997, "logits/chosen": -0.894875705242157, "logits/rejected": -0.9502763748168945, "logps/chosen": -1.197944164276123, "logps/rejected": -1.9775904417037964, "loss": 1.1118, "nll_loss": 1.188557744026184, "rewards/accuracies": 0.625, "rewards/chosen": -0.11979441344738007, "rewards/margins": 0.07796463370323181, "rewards/rejected": -0.19775904715061188, "step": 4686 }, { "epoch": 2.8592344059783437, "grad_norm": 1.3215863704681396, "learning_rate": 3.4371096142069806e-06, "log_odds_chosen": 1.4889822006225586, "log_odds_ratio": -0.6075829267501831, "logits/chosen": -1.098363995552063, "logits/rejected": -1.2298107147216797, "logps/chosen": -1.0596299171447754, "logps/rejected": -2.3691110610961914, "loss": 1.1168, "nll_loss": 1.23405122756958, "rewards/accuracies": 0.5, "rewards/chosen": -0.10596299916505814, "rewards/margins": 0.13094811141490936, "rewards/rejected": -0.2369111031293869, "step": 4687 }, { "epoch": 2.859844441055361, "grad_norm": 1.5836786031723022, "learning_rate": 3.436129822412737e-06, "log_odds_chosen": 2.5288145542144775, "log_odds_ratio": -0.48292210698127747, "logits/chosen": -0.7422360181808472, "logits/rejected": -0.8451856374740601, "logps/chosen": -0.7977527379989624, "logps/rejected": -2.5930776596069336, "loss": 0.9591, "nll_loss": 1.0186009407043457, "rewards/accuracies": 0.75, "rewards/chosen": -0.07977528125047684, "rewards/margins": 0.1795324981212616, "rewards/rejected": -0.25930777192115784, "step": 4688 }, { "epoch": 2.860454476132378, "grad_norm": 2.2312119007110596, "learning_rate": 3.4351500306184937e-06, "log_odds_chosen": 1.8129774332046509, "log_odds_ratio": -0.5143333673477173, "logits/chosen": -1.2381844520568848, "logits/rejected": -1.2029526233673096, "logps/chosen": -0.9239212274551392, "logps/rejected": -2.474047899246216, "loss": 1.1802, "nll_loss": 1.2132740020751953, "rewards/accuracies": 0.625, "rewards/chosen": -0.09239212423563004, "rewards/margins": 0.15501268208026886, "rewards/rejected": -0.2474047839641571, "step": 4689 }, { "epoch": 2.8610645112093946, "grad_norm": 6.615963935852051, "learning_rate": 3.4341702388242494e-06, "log_odds_chosen": 1.0636147260665894, "log_odds_ratio": -0.4600248336791992, "logits/chosen": -0.9013940691947937, "logits/rejected": -0.8733210563659668, "logps/chosen": -0.9252864718437195, "logps/rejected": -1.604377031326294, "loss": 1.0246, "nll_loss": 0.9786298274993896, "rewards/accuracies": 0.75, "rewards/chosen": -0.09252864867448807, "rewards/margins": 0.06790905445814133, "rewards/rejected": -0.1604377031326294, "step": 4690 }, { "epoch": 2.8616745462864115, "grad_norm": 3.783719539642334, "learning_rate": 3.433190447030006e-06, "log_odds_chosen": 1.9537609815597534, "log_odds_ratio": -0.5238694548606873, "logits/chosen": -0.8163110017776489, "logits/rejected": -0.9705025553703308, "logps/chosen": -1.017116904258728, "logps/rejected": -2.7607314586639404, "loss": 0.9843, "nll_loss": 0.9640817046165466, "rewards/accuracies": 0.75, "rewards/chosen": -0.1017116978764534, "rewards/margins": 0.1743614673614502, "rewards/rejected": -0.276073157787323, "step": 4691 }, { "epoch": 2.8622845813634283, "grad_norm": 4.175072193145752, "learning_rate": 3.4322106552357625e-06, "log_odds_chosen": 1.5024809837341309, "log_odds_ratio": -0.4259398579597473, "logits/chosen": -0.9546334743499756, "logits/rejected": -1.0015912055969238, "logps/chosen": -0.6901112198829651, "logps/rejected": -1.7283425331115723, "loss": 1.0303, "nll_loss": 1.0194271802902222, "rewards/accuracies": 0.875, "rewards/chosen": -0.06901112198829651, "rewards/margins": 0.10382314026355743, "rewards/rejected": -0.17283426225185394, "step": 4692 }, { "epoch": 2.862894616440445, "grad_norm": 2.004225492477417, "learning_rate": 3.431230863441518e-06, "log_odds_chosen": -0.03177507966756821, "log_odds_ratio": -0.7715054154396057, "logits/chosen": -0.9100933074951172, "logits/rejected": -0.8917633295059204, "logps/chosen": -0.9189304113388062, "logps/rejected": -0.883758544921875, "loss": 1.1212, "nll_loss": 1.0775911808013916, "rewards/accuracies": 0.375, "rewards/chosen": -0.0918930396437645, "rewards/margins": -0.0035171890631318092, "rewards/rejected": -0.08837585151195526, "step": 4693 }, { "epoch": 2.863504651517462, "grad_norm": 1.2244431972503662, "learning_rate": 3.4302510716472747e-06, "log_odds_chosen": 1.6472811698913574, "log_odds_ratio": -0.3488924503326416, "logits/chosen": -0.6743465662002563, "logits/rejected": -0.6960147023200989, "logps/chosen": -0.5812556147575378, "logps/rejected": -1.773805022239685, "loss": 0.9666, "nll_loss": 0.7471112012863159, "rewards/accuracies": 0.875, "rewards/chosen": -0.058125562965869904, "rewards/margins": 0.1192549392580986, "rewards/rejected": -0.1773805022239685, "step": 4694 }, { "epoch": 2.8641146865944793, "grad_norm": 1.1613484621047974, "learning_rate": 3.4292712798530312e-06, "log_odds_chosen": 0.7779655456542969, "log_odds_ratio": -0.6784584522247314, "logits/chosen": -0.9523123502731323, "logits/rejected": -0.9495630860328674, "logps/chosen": -0.9519649147987366, "logps/rejected": -1.5544304847717285, "loss": 1.1895, "nll_loss": 1.1558839082717896, "rewards/accuracies": 0.5, "rewards/chosen": -0.09519649296998978, "rewards/margins": 0.0602465495467186, "rewards/rejected": -0.15544304251670837, "step": 4695 }, { "epoch": 2.864724721671496, "grad_norm": 1.992547869682312, "learning_rate": 3.4282914880587874e-06, "log_odds_chosen": 2.5306124687194824, "log_odds_ratio": -0.2270248979330063, "logits/chosen": -0.6394033432006836, "logits/rejected": -0.778357207775116, "logps/chosen": -0.5981929898262024, "logps/rejected": -2.4132182598114014, "loss": 0.8237, "nll_loss": 0.6308414936065674, "rewards/accuracies": 1.0, "rewards/chosen": -0.059819296002388, "rewards/margins": 0.1815025359392166, "rewards/rejected": -0.2413218468427658, "step": 4696 }, { "epoch": 2.865334756748513, "grad_norm": 3.2926971912384033, "learning_rate": 3.4273116962645435e-06, "log_odds_chosen": 2.41764497756958, "log_odds_ratio": -0.35417690873146057, "logits/chosen": -0.9891049861907959, "logits/rejected": -1.0535837411880493, "logps/chosen": -0.7858739495277405, "logps/rejected": -2.848698854446411, "loss": 1.3454, "nll_loss": 1.0529379844665527, "rewards/accuracies": 0.75, "rewards/chosen": -0.07858739793300629, "rewards/margins": 0.20628249645233154, "rewards/rejected": -0.28486987948417664, "step": 4697 }, { "epoch": 2.86594479182553, "grad_norm": 1.2284090518951416, "learning_rate": 3.4263319044703e-06, "log_odds_chosen": 1.7488017082214355, "log_odds_ratio": -0.46456268429756165, "logits/chosen": -0.900454044342041, "logits/rejected": -0.991001307964325, "logps/chosen": -0.8287988901138306, "logps/rejected": -2.2441296577453613, "loss": 0.8136, "nll_loss": 0.9800689816474915, "rewards/accuracies": 0.5, "rewards/chosen": -0.08287989348173141, "rewards/margins": 0.14153307676315308, "rewards/rejected": -0.2244129627943039, "step": 4698 }, { "epoch": 2.866554826902547, "grad_norm": 2.005932331085205, "learning_rate": 3.425352112676056e-06, "log_odds_chosen": 0.9862151145935059, "log_odds_ratio": -0.577093243598938, "logits/chosen": -0.9369755387306213, "logits/rejected": -1.0366472005844116, "logps/chosen": -0.9120532870292664, "logps/rejected": -1.650511622428894, "loss": 1.2574, "nll_loss": 1.2476723194122314, "rewards/accuracies": 0.625, "rewards/chosen": -0.09120532870292664, "rewards/margins": 0.07384583353996277, "rewards/rejected": -0.1650511771440506, "step": 4699 }, { "epoch": 2.867164861979564, "grad_norm": 1.4671223163604736, "learning_rate": 3.4243723208818123e-06, "log_odds_chosen": 1.640152096748352, "log_odds_ratio": -0.5837361812591553, "logits/chosen": -0.8685855269432068, "logits/rejected": -0.8641096353530884, "logps/chosen": -0.716651976108551, "logps/rejected": -2.0538225173950195, "loss": 0.9839, "nll_loss": 0.9334584474563599, "rewards/accuracies": 0.5, "rewards/chosen": -0.0716651976108551, "rewards/margins": 0.13371708989143372, "rewards/rejected": -0.20538227260112762, "step": 4700 }, { "epoch": 2.867774897056581, "grad_norm": 3.1346919536590576, "learning_rate": 3.423392529087569e-06, "log_odds_chosen": 1.9728121757507324, "log_odds_ratio": -0.39077329635620117, "logits/chosen": -0.8563783168792725, "logits/rejected": -0.9173707962036133, "logps/chosen": -0.785367488861084, "logps/rejected": -2.2454957962036133, "loss": 1.1342, "nll_loss": 1.2180988788604736, "rewards/accuracies": 0.75, "rewards/chosen": -0.0785367488861084, "rewards/margins": 0.1460128277540207, "rewards/rejected": -0.2245495617389679, "step": 4701 }, { "epoch": 2.8683849321335977, "grad_norm": 2.147937059402466, "learning_rate": 3.422412737293325e-06, "log_odds_chosen": 3.3388242721557617, "log_odds_ratio": -0.2696288824081421, "logits/chosen": -0.636171817779541, "logits/rejected": -0.858873724937439, "logps/chosen": -0.5364861488342285, "logps/rejected": -3.0616986751556396, "loss": 1.0655, "nll_loss": 0.8180774450302124, "rewards/accuracies": 0.875, "rewards/chosen": -0.05364862084388733, "rewards/margins": 0.252521276473999, "rewards/rejected": -0.3061698377132416, "step": 4702 }, { "epoch": 2.8689949672106145, "grad_norm": 1.1808602809906006, "learning_rate": 3.4214329454990815e-06, "log_odds_chosen": 1.7974518537521362, "log_odds_ratio": -0.31891030073165894, "logits/chosen": -0.9171550273895264, "logits/rejected": -0.8455202579498291, "logps/chosen": -0.5921459197998047, "logps/rejected": -1.7518235445022583, "loss": 1.0577, "nll_loss": 1.1138995885849, "rewards/accuracies": 0.875, "rewards/chosen": -0.05921459197998047, "rewards/margins": 0.115967757999897, "rewards/rejected": -0.17518235743045807, "step": 4703 }, { "epoch": 2.8696050022876314, "grad_norm": 1.1631684303283691, "learning_rate": 3.4204531537048376e-06, "log_odds_chosen": 2.1184632778167725, "log_odds_ratio": -0.40310660004615784, "logits/chosen": -0.9302569627761841, "logits/rejected": -0.9921072721481323, "logps/chosen": -0.7305153608322144, "logps/rejected": -2.4067070484161377, "loss": 0.9622, "nll_loss": 0.8212149143218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.07305154204368591, "rewards/margins": 0.16761915385723114, "rewards/rejected": -0.24067068099975586, "step": 4704 }, { "epoch": 2.8702150373646482, "grad_norm": 1.2273516654968262, "learning_rate": 3.4194733619105937e-06, "log_odds_chosen": 2.184800148010254, "log_odds_ratio": -0.4226730763912201, "logits/chosen": -0.9661226272583008, "logits/rejected": -1.0551948547363281, "logps/chosen": -0.7807604074478149, "logps/rejected": -2.6152563095092773, "loss": 1.0255, "nll_loss": 1.0516704320907593, "rewards/accuracies": 0.625, "rewards/chosen": -0.07807604223489761, "rewards/margins": 0.18344958126544952, "rewards/rejected": -0.26152563095092773, "step": 4705 }, { "epoch": 2.8708250724416655, "grad_norm": 2.068350076675415, "learning_rate": 3.4184935701163503e-06, "log_odds_chosen": 1.4648563861846924, "log_odds_ratio": -0.5579783320426941, "logits/chosen": -0.7919281125068665, "logits/rejected": -0.8630630373954773, "logps/chosen": -0.8133754134178162, "logps/rejected": -1.9783337116241455, "loss": 1.2149, "nll_loss": 1.0044512748718262, "rewards/accuracies": 0.75, "rewards/chosen": -0.08133754879236221, "rewards/margins": 0.11649583280086517, "rewards/rejected": -0.19783338904380798, "step": 4706 }, { "epoch": 2.8714351075186824, "grad_norm": 3.241168737411499, "learning_rate": 3.417513778322106e-06, "log_odds_chosen": 1.2648468017578125, "log_odds_ratio": -0.5087724924087524, "logits/chosen": -0.7592592239379883, "logits/rejected": -0.8758662939071655, "logps/chosen": -0.8352450728416443, "logps/rejected": -1.7637978792190552, "loss": 0.959, "nll_loss": 0.9008581638336182, "rewards/accuracies": 0.75, "rewards/chosen": -0.08352451026439667, "rewards/margins": 0.09285527467727661, "rewards/rejected": -0.17637979984283447, "step": 4707 }, { "epoch": 2.872045142595699, "grad_norm": 2.0899555683135986, "learning_rate": 3.4165339865278625e-06, "log_odds_chosen": 1.6945654153823853, "log_odds_ratio": -0.5057550668716431, "logits/chosen": -0.9177724123001099, "logits/rejected": -0.9612771272659302, "logps/chosen": -0.7433786988258362, "logps/rejected": -2.0450990200042725, "loss": 1.0804, "nll_loss": 1.0352253913879395, "rewards/accuracies": 0.75, "rewards/chosen": -0.07433787733316422, "rewards/margins": 0.1301720142364502, "rewards/rejected": -0.204509899020195, "step": 4708 }, { "epoch": 2.872655177672716, "grad_norm": 1.6045303344726562, "learning_rate": 3.415554194733619e-06, "log_odds_chosen": 0.08752691745758057, "log_odds_ratio": -0.7697327136993408, "logits/chosen": -0.7208578586578369, "logits/rejected": -0.6818495392799377, "logps/chosen": -0.8861687183380127, "logps/rejected": -0.9394378662109375, "loss": 1.0801, "nll_loss": 1.0638444423675537, "rewards/accuracies": 0.375, "rewards/chosen": -0.08861686289310455, "rewards/margins": 0.00532691553235054, "rewards/rejected": -0.09394378960132599, "step": 4709 }, { "epoch": 2.8732652127497333, "grad_norm": 1.28934645652771, "learning_rate": 3.414574402939375e-06, "log_odds_chosen": 1.4333337545394897, "log_odds_ratio": -0.3396431803703308, "logits/chosen": -0.9008985757827759, "logits/rejected": -0.8859957456588745, "logps/chosen": -0.665996253490448, "logps/rejected": -1.6078269481658936, "loss": 1.1044, "nll_loss": 0.9130215048789978, "rewards/accuracies": 0.875, "rewards/chosen": -0.06659962981939316, "rewards/margins": 0.0941830724477768, "rewards/rejected": -0.16078269481658936, "step": 4710 }, { "epoch": 2.87387524782675, "grad_norm": 2.0454046726226807, "learning_rate": 3.4135946111451313e-06, "log_odds_chosen": 2.892747640609741, "log_odds_ratio": -0.22659556567668915, "logits/chosen": -0.7709900140762329, "logits/rejected": -1.048037052154541, "logps/chosen": -0.5517873167991638, "logps/rejected": -2.7552874088287354, "loss": 1.0877, "nll_loss": 0.92402184009552, "rewards/accuracies": 1.0, "rewards/chosen": -0.05517873167991638, "rewards/margins": 0.2203500121831894, "rewards/rejected": -0.2755287289619446, "step": 4711 }, { "epoch": 2.874485282903767, "grad_norm": 1.6523751020431519, "learning_rate": 3.412614819350888e-06, "log_odds_chosen": 2.5365240573883057, "log_odds_ratio": -0.29749634861946106, "logits/chosen": -0.7293123006820679, "logits/rejected": -0.8989863991737366, "logps/chosen": -0.5464845895767212, "logps/rejected": -2.390789031982422, "loss": 0.886, "nll_loss": 0.8008603453636169, "rewards/accuracies": 0.875, "rewards/chosen": -0.05464845895767212, "rewards/margins": 0.18443045020103455, "rewards/rejected": -0.23907890915870667, "step": 4712 }, { "epoch": 2.875095317980784, "grad_norm": 1.3660664558410645, "learning_rate": 3.411635027556644e-06, "log_odds_chosen": 1.4073171615600586, "log_odds_ratio": -0.3954405188560486, "logits/chosen": -0.7910098433494568, "logits/rejected": -0.8069605827331543, "logps/chosen": -0.7620192170143127, "logps/rejected": -1.5761263370513916, "loss": 1.041, "nll_loss": 1.015087604522705, "rewards/accuracies": 0.75, "rewards/chosen": -0.0762019231915474, "rewards/margins": 0.08141070604324341, "rewards/rejected": -0.1576126217842102, "step": 4713 }, { "epoch": 2.8757053530578007, "grad_norm": 1.9466838836669922, "learning_rate": 3.4106552357624005e-06, "log_odds_chosen": 0.02388634905219078, "log_odds_ratio": -0.8820239901542664, "logits/chosen": -0.8164577484130859, "logits/rejected": -0.8361741304397583, "logps/chosen": -1.2934234142303467, "logps/rejected": -1.361907958984375, "loss": 1.1549, "nll_loss": 1.3244532346725464, "rewards/accuracies": 0.375, "rewards/chosen": -0.12934233248233795, "rewards/margins": 0.006848445162177086, "rewards/rejected": -0.13619078695774078, "step": 4714 }, { "epoch": 2.8763153881348176, "grad_norm": 1.4106849431991577, "learning_rate": 3.4096754439681567e-06, "log_odds_chosen": 0.2532317638397217, "log_odds_ratio": -0.7393937110900879, "logits/chosen": -0.9981967210769653, "logits/rejected": -0.9571364521980286, "logps/chosen": -1.1250061988830566, "logps/rejected": -1.3322831392288208, "loss": 1.2752, "nll_loss": 1.3680751323699951, "rewards/accuracies": 0.375, "rewards/chosen": -0.1125006303191185, "rewards/margins": 0.020727690309286118, "rewards/rejected": -0.1332283318042755, "step": 4715 }, { "epoch": 2.8769254232118344, "grad_norm": 1.485632300376892, "learning_rate": 3.4086956521739128e-06, "log_odds_chosen": 4.61478853225708, "log_odds_ratio": -0.13006997108459473, "logits/chosen": -0.653380811214447, "logits/rejected": -0.8953192830085754, "logps/chosen": -0.6192740797996521, "logps/rejected": -4.359984397888184, "loss": 0.8536, "nll_loss": 0.8295533657073975, "rewards/accuracies": 1.0, "rewards/chosen": -0.06192741170525551, "rewards/margins": 0.37407106161117554, "rewards/rejected": -0.43599846959114075, "step": 4716 }, { "epoch": 2.8775354582888517, "grad_norm": 2.0695464611053467, "learning_rate": 3.4077158603796693e-06, "log_odds_chosen": 1.7166333198547363, "log_odds_ratio": -0.3881967067718506, "logits/chosen": -0.971187949180603, "logits/rejected": -1.040330410003662, "logps/chosen": -0.724795937538147, "logps/rejected": -1.9264590740203857, "loss": 1.1772, "nll_loss": 1.2597887516021729, "rewards/accuracies": 0.625, "rewards/chosen": -0.07247959077358246, "rewards/margins": 0.12016630917787552, "rewards/rejected": -0.19264590740203857, "step": 4717 }, { "epoch": 2.8781454933658686, "grad_norm": 1.1436223983764648, "learning_rate": 3.4067360685854254e-06, "log_odds_chosen": 0.2542616128921509, "log_odds_ratio": -0.7636817097663879, "logits/chosen": -1.0595178604125977, "logits/rejected": -1.018101453781128, "logps/chosen": -1.0097721815109253, "logps/rejected": -1.297200083732605, "loss": 1.1656, "nll_loss": 1.5223182439804077, "rewards/accuracies": 0.375, "rewards/chosen": -0.10097722709178925, "rewards/margins": 0.02874279022216797, "rewards/rejected": -0.12972001731395721, "step": 4718 }, { "epoch": 2.8787555284428854, "grad_norm": 1.4514521360397339, "learning_rate": 3.4057562767911816e-06, "log_odds_chosen": 1.546411156654358, "log_odds_ratio": -0.5036541223526001, "logits/chosen": -1.107366919517517, "logits/rejected": -1.1131150722503662, "logps/chosen": -0.836184024810791, "logps/rejected": -2.0230066776275635, "loss": 1.1217, "nll_loss": 1.0133973360061646, "rewards/accuracies": 0.75, "rewards/chosen": -0.0836184024810791, "rewards/margins": 0.11868226528167725, "rewards/rejected": -0.20230068266391754, "step": 4719 }, { "epoch": 2.8793655635199022, "grad_norm": 1.466038703918457, "learning_rate": 3.404776484996938e-06, "log_odds_chosen": 0.14193135499954224, "log_odds_ratio": -0.8516265153884888, "logits/chosen": -0.8672055602073669, "logits/rejected": -0.7565562725067139, "logps/chosen": -1.075535774230957, "logps/rejected": -1.2395490407943726, "loss": 1.1601, "nll_loss": 1.4590396881103516, "rewards/accuracies": 0.25, "rewards/chosen": -0.10755357891321182, "rewards/margins": 0.01640133187174797, "rewards/rejected": -0.1239549070596695, "step": 4720 }, { "epoch": 2.8799755985969195, "grad_norm": 1.6287139654159546, "learning_rate": 3.4037966932026947e-06, "log_odds_chosen": 0.5785365700721741, "log_odds_ratio": -0.6139374375343323, "logits/chosen": -1.061280369758606, "logits/rejected": -1.061516523361206, "logps/chosen": -0.8931009769439697, "logps/rejected": -1.3273358345031738, "loss": 1.1006, "nll_loss": 1.0451512336730957, "rewards/accuracies": 0.625, "rewards/chosen": -0.08931010216474533, "rewards/margins": 0.043423496186733246, "rewards/rejected": -0.13273358345031738, "step": 4721 }, { "epoch": 2.8805856336739364, "grad_norm": 1.4094974994659424, "learning_rate": 3.4028169014084504e-06, "log_odds_chosen": 1.0765860080718994, "log_odds_ratio": -0.5244206190109253, "logits/chosen": -0.7399511337280273, "logits/rejected": -0.9091610312461853, "logps/chosen": -0.8019551038742065, "logps/rejected": -1.487330675125122, "loss": 0.8642, "nll_loss": 0.998112142086029, "rewards/accuracies": 0.5, "rewards/chosen": -0.08019551634788513, "rewards/margins": 0.06853756308555603, "rewards/rejected": -0.14873307943344116, "step": 4722 }, { "epoch": 2.8811956687509532, "grad_norm": 1.8266035318374634, "learning_rate": 3.401837109614207e-06, "log_odds_chosen": 1.318951964378357, "log_odds_ratio": -0.5525655150413513, "logits/chosen": -1.0177744626998901, "logits/rejected": -1.0668509006500244, "logps/chosen": -0.9920110702514648, "logps/rejected": -2.00368595123291, "loss": 1.1731, "nll_loss": 1.306743860244751, "rewards/accuracies": 0.75, "rewards/chosen": -0.09920110553503036, "rewards/margins": 0.10116750001907349, "rewards/rejected": -0.20036859810352325, "step": 4723 }, { "epoch": 2.88180570382797, "grad_norm": 2.880094528198242, "learning_rate": 3.4008573178199635e-06, "log_odds_chosen": 1.100372076034546, "log_odds_ratio": -0.4413215219974518, "logits/chosen": -0.8671665787696838, "logits/rejected": -0.9340614676475525, "logps/chosen": -0.8487256765365601, "logps/rejected": -1.5816314220428467, "loss": 1.1486, "nll_loss": 1.111268162727356, "rewards/accuracies": 0.75, "rewards/chosen": -0.08487257361412048, "rewards/margins": 0.07329057157039642, "rewards/rejected": -0.1581631302833557, "step": 4724 }, { "epoch": 2.882415738904987, "grad_norm": 12.112432479858398, "learning_rate": 3.399877526025719e-06, "log_odds_chosen": 3.1393651962280273, "log_odds_ratio": -0.28845110535621643, "logits/chosen": -0.79048091173172, "logits/rejected": -0.8985104560852051, "logps/chosen": -0.6166641712188721, "logps/rejected": -3.1218509674072266, "loss": 1.0008, "nll_loss": 0.8249455690383911, "rewards/accuracies": 0.875, "rewards/chosen": -0.06166641786694527, "rewards/margins": 0.25051867961883545, "rewards/rejected": -0.3121851086616516, "step": 4725 }, { "epoch": 2.8830257739820038, "grad_norm": 2.7635879516601562, "learning_rate": 3.3988977342314757e-06, "log_odds_chosen": 2.391937732696533, "log_odds_ratio": -0.2685554027557373, "logits/chosen": -0.7086179256439209, "logits/rejected": -0.8354445099830627, "logps/chosen": -0.5653945207595825, "logps/rejected": -2.2573320865631104, "loss": 1.1282, "nll_loss": 0.9560825228691101, "rewards/accuracies": 0.875, "rewards/chosen": -0.05653945729136467, "rewards/margins": 0.16919374465942383, "rewards/rejected": -0.22573322057724, "step": 4726 }, { "epoch": 2.883635809059021, "grad_norm": 2.4235522747039795, "learning_rate": 3.397917942437232e-06, "log_odds_chosen": 2.3404409885406494, "log_odds_ratio": -0.4142339527606964, "logits/chosen": -0.9065968990325928, "logits/rejected": -0.8595061898231506, "logps/chosen": -0.6824890375137329, "logps/rejected": -2.3640410900115967, "loss": 1.0438, "nll_loss": 0.8824769258499146, "rewards/accuracies": 0.75, "rewards/chosen": -0.06824889779090881, "rewards/margins": 0.16815517842769623, "rewards/rejected": -0.23640410602092743, "step": 4727 }, { "epoch": 2.884245844136038, "grad_norm": 2.0300543308258057, "learning_rate": 3.3969381506429884e-06, "log_odds_chosen": 1.0943528413772583, "log_odds_ratio": -0.6285711526870728, "logits/chosen": -1.0749386548995972, "logits/rejected": -1.0367672443389893, "logps/chosen": -0.9328356981277466, "logps/rejected": -1.9479789733886719, "loss": 1.051, "nll_loss": 1.0828049182891846, "rewards/accuracies": 0.75, "rewards/chosen": -0.09328357130289078, "rewards/margins": 0.10151432454586029, "rewards/rejected": -0.19479790329933167, "step": 4728 }, { "epoch": 2.8848558792130548, "grad_norm": 1.5610260963439941, "learning_rate": 3.3959583588487445e-06, "log_odds_chosen": 2.351072311401367, "log_odds_ratio": -0.3911028802394867, "logits/chosen": -0.6168713569641113, "logits/rejected": -0.9182933568954468, "logps/chosen": -0.7263619899749756, "logps/rejected": -2.6814777851104736, "loss": 1.0413, "nll_loss": 0.8463257551193237, "rewards/accuracies": 0.75, "rewards/chosen": -0.0726362019777298, "rewards/margins": 0.1955116093158722, "rewards/rejected": -0.2681477963924408, "step": 4729 }, { "epoch": 2.8854659142900716, "grad_norm": 1.7844033241271973, "learning_rate": 3.3949785670545006e-06, "log_odds_chosen": 0.7562109231948853, "log_odds_ratio": -0.5852581858634949, "logits/chosen": -0.932918131351471, "logits/rejected": -0.9424854516983032, "logps/chosen": -0.7452408671379089, "logps/rejected": -1.277402400970459, "loss": 1.0636, "nll_loss": 1.0006959438323975, "rewards/accuracies": 0.5, "rewards/chosen": -0.07452408224344254, "rewards/margins": 0.05321615934371948, "rewards/rejected": -0.12774024903774261, "step": 4730 }, { "epoch": 2.8860759493670884, "grad_norm": 5.022831916809082, "learning_rate": 3.393998775260257e-06, "log_odds_chosen": 4.0116963386535645, "log_odds_ratio": -0.2729090750217438, "logits/chosen": -0.7595937848091125, "logits/rejected": -1.0339664220809937, "logps/chosen": -0.6902104616165161, "logps/rejected": -4.025644779205322, "loss": 1.1031, "nll_loss": 1.0806455612182617, "rewards/accuracies": 0.875, "rewards/chosen": -0.06902104616165161, "rewards/margins": 0.33354341983795166, "rewards/rejected": -0.40256449580192566, "step": 4731 }, { "epoch": 2.8866859844441057, "grad_norm": 2.2592155933380127, "learning_rate": 3.3930189834660133e-06, "log_odds_chosen": 1.8787448406219482, "log_odds_ratio": -0.41141366958618164, "logits/chosen": -0.7971740365028381, "logits/rejected": -0.904830813407898, "logps/chosen": -0.9603631496429443, "logps/rejected": -2.3516547679901123, "loss": 1.1178, "nll_loss": 0.9411830306053162, "rewards/accuracies": 0.75, "rewards/chosen": -0.09603632241487503, "rewards/margins": 0.1391291320323944, "rewards/rejected": -0.23516547679901123, "step": 4732 }, { "epoch": 2.8872960195211226, "grad_norm": 3.7329201698303223, "learning_rate": 3.3920391916717694e-06, "log_odds_chosen": 1.9966344833374023, "log_odds_ratio": -0.4287639856338501, "logits/chosen": -0.693267822265625, "logits/rejected": -0.9110099673271179, "logps/chosen": -0.6262754201889038, "logps/rejected": -2.0659360885620117, "loss": 1.2833, "nll_loss": 0.9545282125473022, "rewards/accuracies": 0.75, "rewards/chosen": -0.06262754648923874, "rewards/margins": 0.14396604895591736, "rewards/rejected": -0.2065936028957367, "step": 4733 }, { "epoch": 2.8879060545981394, "grad_norm": 1.9050614833831787, "learning_rate": 3.391059399877526e-06, "log_odds_chosen": 1.4113235473632812, "log_odds_ratio": -0.45528358221054077, "logits/chosen": -0.7219952940940857, "logits/rejected": -0.8210629224777222, "logps/chosen": -0.7729086875915527, "logps/rejected": -1.7141553163528442, "loss": 1.0877, "nll_loss": 0.8924177289009094, "rewards/accuracies": 0.875, "rewards/chosen": -0.0772908627986908, "rewards/margins": 0.0941246822476387, "rewards/rejected": -0.1714155524969101, "step": 4734 }, { "epoch": 2.8885160896751563, "grad_norm": 1.5678890943527222, "learning_rate": 3.3900796080832825e-06, "log_odds_chosen": 0.5601399540901184, "log_odds_ratio": -0.5231906175613403, "logits/chosen": -1.015480875968933, "logits/rejected": -0.9961552619934082, "logps/chosen": -0.8596231341362, "logps/rejected": -1.260749340057373, "loss": 1.1071, "nll_loss": 1.1173341274261475, "rewards/accuracies": 0.75, "rewards/chosen": -0.08596231788396835, "rewards/margins": 0.04011262580752373, "rewards/rejected": -0.12607495486736298, "step": 4735 }, { "epoch": 2.889126124752173, "grad_norm": 3.13980770111084, "learning_rate": 3.389099816289038e-06, "log_odds_chosen": 1.3000705242156982, "log_odds_ratio": -0.3854198157787323, "logits/chosen": -0.9563151597976685, "logits/rejected": -0.9833891987800598, "logps/chosen": -0.7997165322303772, "logps/rejected": -1.7514475584030151, "loss": 1.0661, "nll_loss": 1.0978641510009766, "rewards/accuracies": 1.0, "rewards/chosen": -0.07997165620326996, "rewards/margins": 0.09517308324575424, "rewards/rejected": -0.175144761800766, "step": 4736 }, { "epoch": 2.88973615982919, "grad_norm": 1.125618815422058, "learning_rate": 3.3881200244947947e-06, "log_odds_chosen": 1.609930157661438, "log_odds_ratio": -0.39144167304039, "logits/chosen": -0.7267487645149231, "logits/rejected": -0.7076935768127441, "logps/chosen": -0.6674519777297974, "logps/rejected": -1.8493446111679077, "loss": 0.9956, "nll_loss": 0.7582682371139526, "rewards/accuracies": 0.625, "rewards/chosen": -0.06674520671367645, "rewards/margins": 0.1181892529129982, "rewards/rejected": -0.18493446707725525, "step": 4737 }, { "epoch": 2.8903461949062073, "grad_norm": 1.6232423782348633, "learning_rate": 3.3871402327005513e-06, "log_odds_chosen": 0.4045642614364624, "log_odds_ratio": -0.594048023223877, "logits/chosen": -1.0922642946243286, "logits/rejected": -1.02213454246521, "logps/chosen": -0.919509768486023, "logps/rejected": -1.2876425981521606, "loss": 1.0435, "nll_loss": 1.1505337953567505, "rewards/accuracies": 0.625, "rewards/chosen": -0.09195098280906677, "rewards/margins": 0.03681328520178795, "rewards/rejected": -0.12876427173614502, "step": 4738 }, { "epoch": 2.890956229983224, "grad_norm": 2.953639030456543, "learning_rate": 3.386160440906307e-06, "log_odds_chosen": 0.9574987292289734, "log_odds_ratio": -0.6298791170120239, "logits/chosen": -0.8599046468734741, "logits/rejected": -0.9098286032676697, "logps/chosen": -0.946361780166626, "logps/rejected": -1.7167377471923828, "loss": 1.014, "nll_loss": 1.0391409397125244, "rewards/accuracies": 0.5, "rewards/chosen": -0.09463617950677872, "rewards/margins": 0.07703760266304016, "rewards/rejected": -0.17167378962039948, "step": 4739 }, { "epoch": 2.891566265060241, "grad_norm": 1.3367615938186646, "learning_rate": 3.3851806491120635e-06, "log_odds_chosen": 1.2534765005111694, "log_odds_ratio": -0.5038865208625793, "logits/chosen": -0.6965076327323914, "logits/rejected": -0.8718680143356323, "logps/chosen": -0.8481910228729248, "logps/rejected": -1.8273600339889526, "loss": 1.2614, "nll_loss": 1.1582893133163452, "rewards/accuracies": 0.625, "rewards/chosen": -0.08481910079717636, "rewards/margins": 0.09791690856218338, "rewards/rejected": -0.18273602426052094, "step": 4740 }, { "epoch": 2.892176300137258, "grad_norm": 3.540973663330078, "learning_rate": 3.38420085731782e-06, "log_odds_chosen": 1.4500017166137695, "log_odds_ratio": -0.35597914457321167, "logits/chosen": -0.8133242130279541, "logits/rejected": -0.89577317237854, "logps/chosen": -0.6066544651985168, "logps/rejected": -1.6026110649108887, "loss": 0.9782, "nll_loss": 0.8423811793327332, "rewards/accuracies": 0.875, "rewards/chosen": -0.060665447264909744, "rewards/margins": 0.09959565848112106, "rewards/rejected": -0.1602611094713211, "step": 4741 }, { "epoch": 2.892786335214275, "grad_norm": 1.1665931940078735, "learning_rate": 3.383221065523576e-06, "log_odds_chosen": 3.532846212387085, "log_odds_ratio": -0.2398923635482788, "logits/chosen": -0.8982301354408264, "logits/rejected": -0.9113270044326782, "logps/chosen": -0.608112096786499, "logps/rejected": -3.3976645469665527, "loss": 0.8698, "nll_loss": 1.003556489944458, "rewards/accuracies": 0.875, "rewards/chosen": -0.06081121414899826, "rewards/margins": 0.27895525097846985, "rewards/rejected": -0.3397664427757263, "step": 4742 }, { "epoch": 2.893396370291292, "grad_norm": 1.7519224882125854, "learning_rate": 3.3822412737293323e-06, "log_odds_chosen": 1.8734440803527832, "log_odds_ratio": -0.43679022789001465, "logits/chosen": -0.9770117998123169, "logits/rejected": -0.916967511177063, "logps/chosen": -0.9196800589561462, "logps/rejected": -2.4125938415527344, "loss": 1.0645, "nll_loss": 1.1268633604049683, "rewards/accuracies": 0.75, "rewards/chosen": -0.09196799993515015, "rewards/margins": 0.14929139614105225, "rewards/rejected": -0.2412593960762024, "step": 4743 }, { "epoch": 2.8940064053683088, "grad_norm": 1.3373126983642578, "learning_rate": 3.3812614819350884e-06, "log_odds_chosen": 2.1218740940093994, "log_odds_ratio": -0.24642008543014526, "logits/chosen": -0.8441072702407837, "logits/rejected": -0.8720407485961914, "logps/chosen": -0.5739393830299377, "logps/rejected": -1.942392110824585, "loss": 1.0927, "nll_loss": 0.8416792750358582, "rewards/accuracies": 0.875, "rewards/chosen": -0.057393938302993774, "rewards/margins": 0.13684529066085815, "rewards/rejected": -0.19423922896385193, "step": 4744 }, { "epoch": 2.8946164404453256, "grad_norm": 11.20059871673584, "learning_rate": 3.380281690140845e-06, "log_odds_chosen": 0.8508636355400085, "log_odds_ratio": -0.4738547205924988, "logits/chosen": -1.1311662197113037, "logits/rejected": -0.9839645624160767, "logps/chosen": -1.0267928838729858, "logps/rejected": -1.7306020259857178, "loss": 1.1563, "nll_loss": 1.289095401763916, "rewards/accuracies": 0.625, "rewards/chosen": -0.1026792824268341, "rewards/margins": 0.07038092613220215, "rewards/rejected": -0.17306020855903625, "step": 4745 }, { "epoch": 2.8952264755223425, "grad_norm": 2.493865966796875, "learning_rate": 3.379301898346601e-06, "log_odds_chosen": 1.1811717748641968, "log_odds_ratio": -0.5351030230522156, "logits/chosen": -0.8924424648284912, "logits/rejected": -1.0124127864837646, "logps/chosen": -0.9991220235824585, "logps/rejected": -1.8429107666015625, "loss": 1.194, "nll_loss": 1.0110063552856445, "rewards/accuracies": 0.625, "rewards/chosen": -0.09991219639778137, "rewards/margins": 0.08437886834144592, "rewards/rejected": -0.1842910796403885, "step": 4746 }, { "epoch": 2.8958365105993593, "grad_norm": 2.3006386756896973, "learning_rate": 3.3783221065523572e-06, "log_odds_chosen": 3.36918306350708, "log_odds_ratio": -0.13122466206550598, "logits/chosen": -0.6335106492042542, "logits/rejected": -0.9865122437477112, "logps/chosen": -0.6294904947280884, "logps/rejected": -3.1847503185272217, "loss": 0.9171, "nll_loss": 0.7055355906486511, "rewards/accuracies": 1.0, "rewards/chosen": -0.0629490464925766, "rewards/margins": 0.25552597641944885, "rewards/rejected": -0.31847503781318665, "step": 4747 }, { "epoch": 2.896446545676376, "grad_norm": 10.425902366638184, "learning_rate": 3.3773423147581138e-06, "log_odds_chosen": 0.6960405111312866, "log_odds_ratio": -0.589335560798645, "logits/chosen": -0.9141902327537537, "logits/rejected": -0.9559448957443237, "logps/chosen": -0.7532109022140503, "logps/rejected": -1.200148344039917, "loss": 0.9236, "nll_loss": 1.0192033052444458, "rewards/accuracies": 0.375, "rewards/chosen": -0.07532109320163727, "rewards/margins": 0.04469374567270279, "rewards/rejected": -0.12001483142375946, "step": 4748 }, { "epoch": 2.8970565807533935, "grad_norm": 4.280609607696533, "learning_rate": 3.3763625229638703e-06, "log_odds_chosen": 0.9414117336273193, "log_odds_ratio": -0.4929215610027313, "logits/chosen": -1.0148897171020508, "logits/rejected": -1.0326323509216309, "logps/chosen": -0.8268463611602783, "logps/rejected": -1.5048682689666748, "loss": 1.0997, "nll_loss": 1.0811958312988281, "rewards/accuracies": 0.75, "rewards/chosen": -0.08268462866544724, "rewards/margins": 0.06780219823122025, "rewards/rejected": -0.15048682689666748, "step": 4749 }, { "epoch": 2.8976666158304103, "grad_norm": 1.8349827527999878, "learning_rate": 3.375382731169626e-06, "log_odds_chosen": 2.8594937324523926, "log_odds_ratio": -0.32062506675720215, "logits/chosen": -0.776113748550415, "logits/rejected": -0.9785255193710327, "logps/chosen": -0.8286738991737366, "logps/rejected": -3.1468751430511475, "loss": 0.9844, "nll_loss": 1.0756431818008423, "rewards/accuracies": 0.75, "rewards/chosen": -0.08286738395690918, "rewards/margins": 0.23182013630867004, "rewards/rejected": -0.3146875202655792, "step": 4750 }, { "epoch": 2.898276650907427, "grad_norm": 2.743907928466797, "learning_rate": 3.3744029393753826e-06, "log_odds_chosen": 2.4766929149627686, "log_odds_ratio": -0.4346751570701599, "logits/chosen": -0.6108776330947876, "logits/rejected": -0.904836118221283, "logps/chosen": -0.6726042628288269, "logps/rejected": -2.5995397567749023, "loss": 1.077, "nll_loss": 0.7665913701057434, "rewards/accuracies": 0.625, "rewards/chosen": -0.06726042181253433, "rewards/margins": 0.1926935613155365, "rewards/rejected": -0.25995397567749023, "step": 4751 }, { "epoch": 2.898886685984444, "grad_norm": 6.903102874755859, "learning_rate": 3.373423147581139e-06, "log_odds_chosen": 0.6216468811035156, "log_odds_ratio": -0.7331575751304626, "logits/chosen": -0.9464941620826721, "logits/rejected": -1.0370240211486816, "logps/chosen": -1.0501856803894043, "logps/rejected": -1.45285964012146, "loss": 1.1143, "nll_loss": 1.0684592723846436, "rewards/accuracies": 0.375, "rewards/chosen": -0.10501855611801147, "rewards/margins": 0.04026740789413452, "rewards/rejected": -0.145285964012146, "step": 4752 }, { "epoch": 2.8994967210614613, "grad_norm": 1.2215994596481323, "learning_rate": 3.372443355786895e-06, "log_odds_chosen": 0.56248939037323, "log_odds_ratio": -0.5841179490089417, "logits/chosen": -0.8558278679847717, "logits/rejected": -0.788386344909668, "logps/chosen": -0.8703799247741699, "logps/rejected": -1.2486652135849, "loss": 1.051, "nll_loss": 0.8949192762374878, "rewards/accuracies": 0.5, "rewards/chosen": -0.08703799545764923, "rewards/margins": 0.03782852739095688, "rewards/rejected": -0.12486651539802551, "step": 4753 }, { "epoch": 2.900106756138478, "grad_norm": 2.9230191707611084, "learning_rate": 3.3714635639926514e-06, "log_odds_chosen": 3.154456615447998, "log_odds_ratio": -0.1190040111541748, "logits/chosen": -0.6466965079307556, "logits/rejected": -0.853285551071167, "logps/chosen": -0.646902322769165, "logps/rejected": -3.042445659637451, "loss": 0.9344, "nll_loss": 0.7618298530578613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0646902322769165, "rewards/margins": 0.23955434560775757, "rewards/rejected": -0.3042445778846741, "step": 4754 }, { "epoch": 2.900716791215495, "grad_norm": 4.142613410949707, "learning_rate": 3.370483772198408e-06, "log_odds_chosen": 1.7674522399902344, "log_odds_ratio": -0.459653377532959, "logits/chosen": -0.7956022024154663, "logits/rejected": -0.7416963577270508, "logps/chosen": -1.0177229642868042, "logps/rejected": -2.5598530769348145, "loss": 0.9719, "nll_loss": 1.0887858867645264, "rewards/accuracies": 0.75, "rewards/chosen": -0.10177230089902878, "rewards/margins": 0.15421301126480103, "rewards/rejected": -0.255985289812088, "step": 4755 }, { "epoch": 2.901326826292512, "grad_norm": 1.4553037881851196, "learning_rate": 3.369503980404164e-06, "log_odds_chosen": 1.7365885972976685, "log_odds_ratio": -0.523688554763794, "logits/chosen": -0.8319167494773865, "logits/rejected": -0.9655473232269287, "logps/chosen": -0.7747630476951599, "logps/rejected": -2.0854499340057373, "loss": 1.0958, "nll_loss": 0.9294390678405762, "rewards/accuracies": 0.5, "rewards/chosen": -0.07747630774974823, "rewards/margins": 0.1310686618089676, "rewards/rejected": -0.20854498445987701, "step": 4756 }, { "epoch": 2.9019368613695287, "grad_norm": 1.5332973003387451, "learning_rate": 3.36852418860992e-06, "log_odds_chosen": 1.2971999645233154, "log_odds_ratio": -0.5095484852790833, "logits/chosen": -0.7501163482666016, "logits/rejected": -0.7246420383453369, "logps/chosen": -0.8809540867805481, "logps/rejected": -1.9362139701843262, "loss": 0.9618, "nll_loss": 1.0499076843261719, "rewards/accuracies": 0.625, "rewards/chosen": -0.08809540420770645, "rewards/margins": 0.10552599281072617, "rewards/rejected": -0.1936214119195938, "step": 4757 }, { "epoch": 2.9025468964465455, "grad_norm": 1.469464659690857, "learning_rate": 3.3675443968156767e-06, "log_odds_chosen": 1.2669837474822998, "log_odds_ratio": -0.46847057342529297, "logits/chosen": -0.9831708073616028, "logits/rejected": -1.0099904537200928, "logps/chosen": -0.8254308700561523, "logps/rejected": -1.660840392112732, "loss": 1.1153, "nll_loss": 1.006227731704712, "rewards/accuracies": 0.5, "rewards/chosen": -0.08254308998584747, "rewards/margins": 0.08354094624519348, "rewards/rejected": -0.16608403623104095, "step": 4758 }, { "epoch": 2.9031569315235624, "grad_norm": 1.301555871963501, "learning_rate": 3.366564605021433e-06, "log_odds_chosen": 2.2448105812072754, "log_odds_ratio": -0.34056878089904785, "logits/chosen": -0.9070044755935669, "logits/rejected": -0.9741243124008179, "logps/chosen": -0.6843456029891968, "logps/rejected": -2.465592384338379, "loss": 1.064, "nll_loss": 1.1204469203948975, "rewards/accuracies": 0.875, "rewards/chosen": -0.06843456625938416, "rewards/margins": 0.17812469601631165, "rewards/rejected": -0.2465592473745346, "step": 4759 }, { "epoch": 2.9037669666005796, "grad_norm": 1.4112651348114014, "learning_rate": 3.365584813227189e-06, "log_odds_chosen": 2.6897196769714355, "log_odds_ratio": -0.3771132230758667, "logits/chosen": -0.8920122385025024, "logits/rejected": -1.1598024368286133, "logps/chosen": -0.7347649335861206, "logps/rejected": -2.972031354904175, "loss": 1.0458, "nll_loss": 1.091897964477539, "rewards/accuracies": 0.75, "rewards/chosen": -0.07347649335861206, "rewards/margins": 0.22372663021087646, "rewards/rejected": -0.2972031533718109, "step": 4760 }, { "epoch": 2.9043770016775965, "grad_norm": 2.172961950302124, "learning_rate": 3.3646050214329455e-06, "log_odds_chosen": 1.7346158027648926, "log_odds_ratio": -0.27806782722473145, "logits/chosen": -0.6277830600738525, "logits/rejected": -0.8376847505569458, "logps/chosen": -0.8531488180160522, "logps/rejected": -2.1270644664764404, "loss": 1.0225, "nll_loss": 1.1546756029129028, "rewards/accuracies": 0.875, "rewards/chosen": -0.08531488478183746, "rewards/margins": 0.12739156186580658, "rewards/rejected": -0.21270644664764404, "step": 4761 }, { "epoch": 2.9049870367546133, "grad_norm": 1.690873384475708, "learning_rate": 3.3636252296387016e-06, "log_odds_chosen": 0.7958390116691589, "log_odds_ratio": -0.6395783424377441, "logits/chosen": -1.1333180665969849, "logits/rejected": -1.1358156204223633, "logps/chosen": -1.0406574010849, "logps/rejected": -1.748426914215088, "loss": 1.211, "nll_loss": 1.2459605932235718, "rewards/accuracies": 0.5, "rewards/chosen": -0.10406573116779327, "rewards/margins": 0.07077696174383163, "rewards/rejected": -0.1748427003622055, "step": 4762 }, { "epoch": 2.90559707183163, "grad_norm": 1.6919496059417725, "learning_rate": 3.362645437844458e-06, "log_odds_chosen": 0.7831149101257324, "log_odds_ratio": -0.5065041780471802, "logits/chosen": -0.9894770383834839, "logits/rejected": -0.9659428000450134, "logps/chosen": -1.013952374458313, "logps/rejected": -1.5824602842330933, "loss": 1.054, "nll_loss": 1.1448748111724854, "rewards/accuracies": 0.875, "rewards/chosen": -0.10139524191617966, "rewards/margins": 0.05685078352689743, "rewards/rejected": -0.1582460254430771, "step": 4763 }, { "epoch": 2.9062071069086475, "grad_norm": 1.5225799083709717, "learning_rate": 3.361665646050214e-06, "log_odds_chosen": 2.398130178451538, "log_odds_ratio": -0.43458104133605957, "logits/chosen": -1.0004404783248901, "logits/rejected": -1.1215795278549194, "logps/chosen": -0.8546941876411438, "logps/rejected": -2.7843503952026367, "loss": 1.1454, "nll_loss": 1.0479581356048584, "rewards/accuracies": 0.625, "rewards/chosen": -0.08546940982341766, "rewards/margins": 0.19296559691429138, "rewards/rejected": -0.27843502163887024, "step": 4764 }, { "epoch": 2.9068171419856643, "grad_norm": 1.5288233757019043, "learning_rate": 3.3606858542559704e-06, "log_odds_chosen": 1.651491641998291, "log_odds_ratio": -0.5188111662864685, "logits/chosen": -0.7359309196472168, "logits/rejected": -0.8269407153129578, "logps/chosen": -0.757707953453064, "logps/rejected": -2.2668049335479736, "loss": 1.0633, "nll_loss": 1.1289939880371094, "rewards/accuracies": 0.625, "rewards/chosen": -0.0757707953453064, "rewards/margins": 0.1509096920490265, "rewards/rejected": -0.22668048739433289, "step": 4765 }, { "epoch": 2.907427177062681, "grad_norm": 3.1883459091186523, "learning_rate": 3.359706062461727e-06, "log_odds_chosen": 0.7483567595481873, "log_odds_ratio": -0.5240861177444458, "logits/chosen": -0.7080726623535156, "logits/rejected": -0.8886737823486328, "logps/chosen": -0.9209480285644531, "logps/rejected": -1.4848589897155762, "loss": 1.013, "nll_loss": 1.152472972869873, "rewards/accuracies": 0.875, "rewards/chosen": -0.09209480881690979, "rewards/margins": 0.05639110505580902, "rewards/rejected": -0.1484859138727188, "step": 4766 }, { "epoch": 2.908037212139698, "grad_norm": 1.6583352088928223, "learning_rate": 3.3587262706674826e-06, "log_odds_chosen": 2.965360164642334, "log_odds_ratio": -0.26611825823783875, "logits/chosen": -0.954267680644989, "logits/rejected": -0.9845597743988037, "logps/chosen": -0.6937453746795654, "logps/rejected": -2.7775325775146484, "loss": 1.2685, "nll_loss": 1.268875002861023, "rewards/accuracies": 0.875, "rewards/chosen": -0.06937453150749207, "rewards/margins": 0.20837874710559845, "rewards/rejected": -0.2777532935142517, "step": 4767 }, { "epoch": 2.908647247216715, "grad_norm": 1.3940755128860474, "learning_rate": 3.357746478873239e-06, "log_odds_chosen": 1.9562065601348877, "log_odds_ratio": -0.4813709855079651, "logits/chosen": -0.697091817855835, "logits/rejected": -0.8141980171203613, "logps/chosen": -0.7232810258865356, "logps/rejected": -2.3026437759399414, "loss": 1.0114, "nll_loss": 0.9727750420570374, "rewards/accuracies": 0.5, "rewards/chosen": -0.0723280981183052, "rewards/margins": 0.15793627500534058, "rewards/rejected": -0.23026438057422638, "step": 4768 }, { "epoch": 2.9092572822937317, "grad_norm": 1.629351258277893, "learning_rate": 3.3567666870789957e-06, "log_odds_chosen": 1.504311203956604, "log_odds_ratio": -0.4988557696342468, "logits/chosen": -0.9564955234527588, "logits/rejected": -1.0518262386322021, "logps/chosen": -0.8901631832122803, "logps/rejected": -2.073941707611084, "loss": 1.1031, "nll_loss": 1.0978302955627441, "rewards/accuracies": 0.75, "rewards/chosen": -0.08901631832122803, "rewards/margins": 0.11837784200906754, "rewards/rejected": -0.20739416778087616, "step": 4769 }, { "epoch": 2.9098673173707486, "grad_norm": 3.6867995262145996, "learning_rate": 3.355786895284752e-06, "log_odds_chosen": 2.9022722244262695, "log_odds_ratio": -0.17361482977867126, "logits/chosen": -0.8939052224159241, "logits/rejected": -1.058830976486206, "logps/chosen": -1.0321669578552246, "logps/rejected": -3.3735928535461426, "loss": 1.1515, "nll_loss": 1.178359031677246, "rewards/accuracies": 1.0, "rewards/chosen": -0.10321670770645142, "rewards/margins": 0.23414260149002075, "rewards/rejected": -0.33735930919647217, "step": 4770 }, { "epoch": 2.910477352447766, "grad_norm": 6.028931617736816, "learning_rate": 3.354807103490508e-06, "log_odds_chosen": 1.2901456356048584, "log_odds_ratio": -0.583540678024292, "logits/chosen": -0.9757789373397827, "logits/rejected": -0.9661762714385986, "logps/chosen": -0.8743569254875183, "logps/rejected": -2.0104057788848877, "loss": 1.0809, "nll_loss": 0.9488933086395264, "rewards/accuracies": 0.5, "rewards/chosen": -0.08743569999933243, "rewards/margins": 0.11360489577054977, "rewards/rejected": -0.2010405957698822, "step": 4771 }, { "epoch": 2.9110873875247827, "grad_norm": 4.300200462341309, "learning_rate": 3.3538273116962645e-06, "log_odds_chosen": 1.0874714851379395, "log_odds_ratio": -0.47427743673324585, "logits/chosen": -0.8670899271965027, "logits/rejected": -1.0182112455368042, "logps/chosen": -0.8267953991889954, "logps/rejected": -1.6420693397521973, "loss": 1.0564, "nll_loss": 1.3131659030914307, "rewards/accuracies": 0.75, "rewards/chosen": -0.08267953991889954, "rewards/margins": 0.08152741193771362, "rewards/rejected": -0.16420695185661316, "step": 4772 }, { "epoch": 2.9116974226017995, "grad_norm": 11.21308708190918, "learning_rate": 3.3528475199020206e-06, "log_odds_chosen": 1.6441242694854736, "log_odds_ratio": -0.38208767771720886, "logits/chosen": -0.844397246837616, "logits/rejected": -0.9871001839637756, "logps/chosen": -0.9618821144104004, "logps/rejected": -2.3179244995117188, "loss": 1.159, "nll_loss": 1.100605845451355, "rewards/accuracies": 0.75, "rewards/chosen": -0.09618821740150452, "rewards/margins": 0.13560424745082855, "rewards/rejected": -0.23179247975349426, "step": 4773 }, { "epoch": 2.9123074576788164, "grad_norm": 9.374794006347656, "learning_rate": 3.3518677281077768e-06, "log_odds_chosen": 1.8691089153289795, "log_odds_ratio": -0.3769293427467346, "logits/chosen": -0.9255266785621643, "logits/rejected": -0.9119673371315002, "logps/chosen": -0.7370073795318604, "logps/rejected": -2.1039767265319824, "loss": 1.1451, "nll_loss": 0.8991936445236206, "rewards/accuracies": 0.75, "rewards/chosen": -0.07370074093341827, "rewards/margins": 0.1366969496011734, "rewards/rejected": -0.21039769053459167, "step": 4774 }, { "epoch": 2.9129174927558337, "grad_norm": 1.7513734102249146, "learning_rate": 3.3508879363135333e-06, "log_odds_chosen": 0.7365535497665405, "log_odds_ratio": -0.5303347110748291, "logits/chosen": -0.7918338775634766, "logits/rejected": -0.7456274628639221, "logps/chosen": -0.6890318393707275, "logps/rejected": -1.1272307634353638, "loss": 1.0973, "nll_loss": 0.8887357115745544, "rewards/accuracies": 0.625, "rewards/chosen": -0.06890318542718887, "rewards/margins": 0.043819889426231384, "rewards/rejected": -0.11272308230400085, "step": 4775 }, { "epoch": 2.9135275278328505, "grad_norm": 1.4234050512313843, "learning_rate": 3.3499081445192894e-06, "log_odds_chosen": 1.9846203327178955, "log_odds_ratio": -0.3390839993953705, "logits/chosen": -1.0296645164489746, "logits/rejected": -0.9810591340065002, "logps/chosen": -0.8187562227249146, "logps/rejected": -2.394314765930176, "loss": 1.2588, "nll_loss": 1.1527504920959473, "rewards/accuracies": 0.75, "rewards/chosen": -0.08187562227249146, "rewards/margins": 0.15755584836006165, "rewards/rejected": -0.2394314706325531, "step": 4776 }, { "epoch": 2.9141375629098674, "grad_norm": 1.435205101966858, "learning_rate": 3.348928352725046e-06, "log_odds_chosen": 1.0699657201766968, "log_odds_ratio": -0.4985114634037018, "logits/chosen": -0.9239969849586487, "logits/rejected": -0.9886114001274109, "logps/chosen": -0.8355600833892822, "logps/rejected": -1.4061847925186157, "loss": 1.2961, "nll_loss": 1.192215919494629, "rewards/accuracies": 0.875, "rewards/chosen": -0.08355601131916046, "rewards/margins": 0.057062458246946335, "rewards/rejected": -0.1406184732913971, "step": 4777 }, { "epoch": 2.914747597986884, "grad_norm": 2.4559073448181152, "learning_rate": 3.347948560930802e-06, "log_odds_chosen": 3.485872268676758, "log_odds_ratio": -0.10510797798633575, "logits/chosen": -0.7741485238075256, "logits/rejected": -0.9718379974365234, "logps/chosen": -0.5948939323425293, "logps/rejected": -3.2142810821533203, "loss": 0.9493, "nll_loss": 1.1780494451522827, "rewards/accuracies": 1.0, "rewards/chosen": -0.05948939174413681, "rewards/margins": 0.2619387209415436, "rewards/rejected": -0.321428120136261, "step": 4778 }, { "epoch": 2.915357633063901, "grad_norm": 2.7528412342071533, "learning_rate": 3.3469687691365582e-06, "log_odds_chosen": 1.438461422920227, "log_odds_ratio": -0.4190666079521179, "logits/chosen": -1.0756527185440063, "logits/rejected": -1.0164254903793335, "logps/chosen": -0.8889896869659424, "logps/rejected": -2.0410943031311035, "loss": 1.2187, "nll_loss": 1.0237241983413696, "rewards/accuracies": 0.75, "rewards/chosen": -0.08889897167682648, "rewards/margins": 0.11521043628454208, "rewards/rejected": -0.20410941541194916, "step": 4779 }, { "epoch": 2.915967668140918, "grad_norm": 1.716099500656128, "learning_rate": 3.3459889773423148e-06, "log_odds_chosen": 0.9275381565093994, "log_odds_ratio": -0.5677670836448669, "logits/chosen": -1.0665671825408936, "logits/rejected": -1.1281538009643555, "logps/chosen": -0.7636202573776245, "logps/rejected": -1.5060611963272095, "loss": 1.0679, "nll_loss": 0.961529016494751, "rewards/accuracies": 0.5, "rewards/chosen": -0.07636202871799469, "rewards/margins": 0.07424411177635193, "rewards/rejected": -0.15060614049434662, "step": 4780 }, { "epoch": 2.916577703217935, "grad_norm": 1.8416962623596191, "learning_rate": 3.345009185548071e-06, "log_odds_chosen": 3.561570167541504, "log_odds_ratio": -0.21828484535217285, "logits/chosen": -0.739621639251709, "logits/rejected": -0.7942239046096802, "logps/chosen": -0.5004427433013916, "logps/rejected": -3.1873669624328613, "loss": 0.8878, "nll_loss": 0.8600137233734131, "rewards/accuracies": 0.875, "rewards/chosen": -0.05004427209496498, "rewards/margins": 0.2686924338340759, "rewards/rejected": -0.3187367022037506, "step": 4781 }, { "epoch": 2.917187738294952, "grad_norm": 1.916634202003479, "learning_rate": 3.344029393753827e-06, "log_odds_chosen": 0.9684755206108093, "log_odds_ratio": -0.5106104016304016, "logits/chosen": -0.9239197373390198, "logits/rejected": -0.9296591281890869, "logps/chosen": -0.9455941915512085, "logps/rejected": -1.729851484298706, "loss": 1.2286, "nll_loss": 1.0396106243133545, "rewards/accuracies": 0.625, "rewards/chosen": -0.09455941617488861, "rewards/margins": 0.07842573523521423, "rewards/rejected": -0.17298515141010284, "step": 4782 }, { "epoch": 2.917797773371969, "grad_norm": 5.891666412353516, "learning_rate": 3.3430496019595836e-06, "log_odds_chosen": 0.8977935910224915, "log_odds_ratio": -0.9989359378814697, "logits/chosen": -0.8944525718688965, "logits/rejected": -1.11726713180542, "logps/chosen": -2.076307535171509, "logps/rejected": -2.5555739402770996, "loss": 1.0266, "nll_loss": 0.9417458176612854, "rewards/accuracies": 0.875, "rewards/chosen": -0.20763073861598969, "rewards/margins": 0.047926634550094604, "rewards/rejected": -0.2555573880672455, "step": 4783 }, { "epoch": 2.9184078084489857, "grad_norm": 1.1165658235549927, "learning_rate": 3.3420698101653397e-06, "log_odds_chosen": 1.7288806438446045, "log_odds_ratio": -0.42258012294769287, "logits/chosen": -0.8231194019317627, "logits/rejected": -1.1167585849761963, "logps/chosen": -0.5778706073760986, "logps/rejected": -1.7402969598770142, "loss": 0.8988, "nll_loss": 0.7356243133544922, "rewards/accuracies": 0.75, "rewards/chosen": -0.057787057012319565, "rewards/margins": 0.11624264717102051, "rewards/rejected": -0.17402970790863037, "step": 4784 }, { "epoch": 2.919017843526003, "grad_norm": 1.3721319437026978, "learning_rate": 3.341090018371096e-06, "log_odds_chosen": 1.2573177814483643, "log_odds_ratio": -0.4287697970867157, "logits/chosen": -0.9499119520187378, "logits/rejected": -0.8638588786125183, "logps/chosen": -0.6290727853775024, "logps/rejected": -1.2617650032043457, "loss": 0.9806, "nll_loss": 0.9739575982093811, "rewards/accuracies": 0.625, "rewards/chosen": -0.06290727108716965, "rewards/margins": 0.0632692277431488, "rewards/rejected": -0.12617650628089905, "step": 4785 }, { "epoch": 2.91962787860302, "grad_norm": 1.0159916877746582, "learning_rate": 3.3401102265768523e-06, "log_odds_chosen": 0.1122220903635025, "log_odds_ratio": -0.6590901017189026, "logits/chosen": -1.127236008644104, "logits/rejected": -0.9719440937042236, "logps/chosen": -1.1559069156646729, "logps/rejected": -1.238651156425476, "loss": 0.9721, "nll_loss": 1.2524702548980713, "rewards/accuracies": 0.625, "rewards/chosen": -0.11559068411588669, "rewards/margins": 0.00827442854642868, "rewards/rejected": -0.12386511266231537, "step": 4786 }, { "epoch": 2.9202379136800367, "grad_norm": 8.452188491821289, "learning_rate": 3.3391304347826085e-06, "log_odds_chosen": 1.4024031162261963, "log_odds_ratio": -0.41905421018600464, "logits/chosen": -0.898745596408844, "logits/rejected": -0.8480711579322815, "logps/chosen": -0.8846216201782227, "logps/rejected": -2.0096633434295654, "loss": 1.0716, "nll_loss": 1.0214284658432007, "rewards/accuracies": 0.75, "rewards/chosen": -0.08846215903759003, "rewards/margins": 0.11250416934490204, "rewards/rejected": -0.20096632838249207, "step": 4787 }, { "epoch": 2.9208479487570536, "grad_norm": 1.8612278699874878, "learning_rate": 3.3381506429883646e-06, "log_odds_chosen": 2.1603407859802246, "log_odds_ratio": -0.39469459652900696, "logits/chosen": -0.6937416791915894, "logits/rejected": -0.9045759439468384, "logps/chosen": -0.6673786640167236, "logps/rejected": -2.4320995807647705, "loss": 1.1001, "nll_loss": 0.9033281803131104, "rewards/accuracies": 0.75, "rewards/chosen": -0.06673786789178848, "rewards/margins": 0.17647209763526917, "rewards/rejected": -0.24320995807647705, "step": 4788 }, { "epoch": 2.9214579838340704, "grad_norm": 1.2566897869110107, "learning_rate": 3.337170851194121e-06, "log_odds_chosen": 0.2975664734840393, "log_odds_ratio": -0.6644152402877808, "logits/chosen": -0.8620727062225342, "logits/rejected": -0.9199676513671875, "logps/chosen": -0.8974134922027588, "logps/rejected": -1.145593285560608, "loss": 1.0719, "nll_loss": 1.0815315246582031, "rewards/accuracies": 0.5, "rewards/chosen": -0.08974134922027588, "rewards/margins": 0.02481798455119133, "rewards/rejected": -0.11455933004617691, "step": 4789 }, { "epoch": 2.9220680189110873, "grad_norm": 1.0480904579162598, "learning_rate": 3.3361910593998773e-06, "log_odds_chosen": 2.255396604537964, "log_odds_ratio": -0.3568071126937866, "logits/chosen": -0.7940506935119629, "logits/rejected": -0.8692226409912109, "logps/chosen": -0.6602178812026978, "logps/rejected": -2.4057440757751465, "loss": 0.9982, "nll_loss": 0.7976883053779602, "rewards/accuracies": 0.75, "rewards/chosen": -0.06602180004119873, "rewards/margins": 0.17455258965492249, "rewards/rejected": -0.2405744045972824, "step": 4790 }, { "epoch": 2.922678053988104, "grad_norm": 6.154791355133057, "learning_rate": 3.335211267605634e-06, "log_odds_chosen": 2.026837110519409, "log_odds_ratio": -0.23107677698135376, "logits/chosen": -0.9247945547103882, "logits/rejected": -0.9863263368606567, "logps/chosen": -0.6788723468780518, "logps/rejected": -2.102564811706543, "loss": 0.9758, "nll_loss": 0.9005675315856934, "rewards/accuracies": 1.0, "rewards/chosen": -0.06788723915815353, "rewards/margins": 0.14236924052238464, "rewards/rejected": -0.2102564573287964, "step": 4791 }, { "epoch": 2.9232880890651214, "grad_norm": 1.0483574867248535, "learning_rate": 3.33423147581139e-06, "log_odds_chosen": 1.6165690422058105, "log_odds_ratio": -0.4693780243396759, "logits/chosen": -0.8573173880577087, "logits/rejected": -1.0071626901626587, "logps/chosen": -0.918242871761322, "logps/rejected": -2.279676914215088, "loss": 1.0277, "nll_loss": 1.1133174896240234, "rewards/accuracies": 0.75, "rewards/chosen": -0.09182428568601608, "rewards/margins": 0.13614338636398315, "rewards/rejected": -0.22796767950057983, "step": 4792 }, { "epoch": 2.9238981241421382, "grad_norm": 3.6338253021240234, "learning_rate": 3.333251684017146e-06, "log_odds_chosen": 2.4696602821350098, "log_odds_ratio": -0.42261528968811035, "logits/chosen": -0.6690637469291687, "logits/rejected": -0.8449132442474365, "logps/chosen": -0.6579002737998962, "logps/rejected": -2.620025634765625, "loss": 1.0584, "nll_loss": 0.9547600150108337, "rewards/accuracies": 0.625, "rewards/chosen": -0.06579003483057022, "rewards/margins": 0.1962125599384308, "rewards/rejected": -0.2620025873184204, "step": 4793 }, { "epoch": 2.924508159219155, "grad_norm": 1.2881275415420532, "learning_rate": 3.3322718922229026e-06, "log_odds_chosen": 1.4580717086791992, "log_odds_ratio": -0.4163821041584015, "logits/chosen": -0.7472584247589111, "logits/rejected": -0.797284722328186, "logps/chosen": -0.7165665030479431, "logps/rejected": -1.7202372550964355, "loss": 1.0865, "nll_loss": 0.9171136617660522, "rewards/accuracies": 0.75, "rewards/chosen": -0.07165665179491043, "rewards/margins": 0.10036706179380417, "rewards/rejected": -0.1720237135887146, "step": 4794 }, { "epoch": 2.925118194296172, "grad_norm": 2.2788543701171875, "learning_rate": 3.331292100428659e-06, "log_odds_chosen": 1.6793346405029297, "log_odds_ratio": -0.3641664683818817, "logits/chosen": -0.8750975131988525, "logits/rejected": -0.9370388388633728, "logps/chosen": -0.5301632881164551, "logps/rejected": -1.4687740802764893, "loss": 1.2387, "nll_loss": 1.2920233011245728, "rewards/accuracies": 0.75, "rewards/chosen": -0.05301632359623909, "rewards/margins": 0.09386108815670013, "rewards/rejected": -0.14687742292881012, "step": 4795 }, { "epoch": 2.925728229373189, "grad_norm": 1.3200019598007202, "learning_rate": 3.330312308634415e-06, "log_odds_chosen": 0.5785581469535828, "log_odds_ratio": -0.674020528793335, "logits/chosen": -0.9073840379714966, "logits/rejected": -1.0066412687301636, "logps/chosen": -0.9064794182777405, "logps/rejected": -1.296506404876709, "loss": 1.0403, "nll_loss": 1.062078595161438, "rewards/accuracies": 0.5, "rewards/chosen": -0.09064795076847076, "rewards/margins": 0.03900269791483879, "rewards/rejected": -0.12965063750743866, "step": 4796 }, { "epoch": 2.926338264450206, "grad_norm": 1.4388656616210938, "learning_rate": 3.3293325168401714e-06, "log_odds_chosen": 1.3378708362579346, "log_odds_ratio": -0.32687053084373474, "logits/chosen": -0.825036346912384, "logits/rejected": -0.9577150344848633, "logps/chosen": -0.9375726580619812, "logps/rejected": -1.8424938917160034, "loss": 1.1802, "nll_loss": 1.0868709087371826, "rewards/accuracies": 1.0, "rewards/chosen": -0.0937572717666626, "rewards/margins": 0.0904921144247055, "rewards/rejected": -0.1842493861913681, "step": 4797 }, { "epoch": 2.926948299527223, "grad_norm": 1.5678306818008423, "learning_rate": 3.328352725045928e-06, "log_odds_chosen": 2.1737747192382812, "log_odds_ratio": -0.30462923645973206, "logits/chosen": -0.9173963069915771, "logits/rejected": -0.999774694442749, "logps/chosen": -0.7071649432182312, "logps/rejected": -2.346696615219116, "loss": 0.9732, "nll_loss": 0.8505021333694458, "rewards/accuracies": 0.75, "rewards/chosen": -0.070716492831707, "rewards/margins": 0.16395317018032074, "rewards/rejected": -0.23466967046260834, "step": 4798 }, { "epoch": 2.9275583346042398, "grad_norm": 1.432209849357605, "learning_rate": 3.3273729332516836e-06, "log_odds_chosen": 0.8048866987228394, "log_odds_ratio": -0.4952443838119507, "logits/chosen": -0.9593924880027771, "logits/rejected": -0.871242880821228, "logps/chosen": -1.1227235794067383, "logps/rejected": -1.8383381366729736, "loss": 1.2956, "nll_loss": 1.3501849174499512, "rewards/accuracies": 0.75, "rewards/chosen": -0.11227235943078995, "rewards/margins": 0.07156147062778473, "rewards/rejected": -0.18383383750915527, "step": 4799 }, { "epoch": 2.9281683696812566, "grad_norm": 1.6988672018051147, "learning_rate": 3.32639314145744e-06, "log_odds_chosen": 1.3656868934631348, "log_odds_ratio": -0.5832986831665039, "logits/chosen": -0.6906796097755432, "logits/rejected": -0.7223570346832275, "logps/chosen": -0.744448721408844, "logps/rejected": -1.8427866697311401, "loss": 1.0392, "nll_loss": 0.9262392520904541, "rewards/accuracies": 0.625, "rewards/chosen": -0.07444487512111664, "rewards/margins": 0.10983379185199738, "rewards/rejected": -0.184278666973114, "step": 4800 }, { "epoch": 2.9287784047582734, "grad_norm": 1.9494640827178955, "learning_rate": 3.3254133496631963e-06, "log_odds_chosen": 2.0163731575012207, "log_odds_ratio": -0.22633324563503265, "logits/chosen": -1.04128897190094, "logits/rejected": -1.0213313102722168, "logps/chosen": -0.731980562210083, "logps/rejected": -2.1465001106262207, "loss": 1.0339, "nll_loss": 1.1326534748077393, "rewards/accuracies": 1.0, "rewards/chosen": -0.07319805026054382, "rewards/margins": 0.14145193994045258, "rewards/rejected": -0.2146500051021576, "step": 4801 }, { "epoch": 2.9293884398352903, "grad_norm": 2.0907890796661377, "learning_rate": 3.3244335578689524e-06, "log_odds_chosen": 1.0566630363464355, "log_odds_ratio": -0.5177095532417297, "logits/chosen": -1.1017166376113892, "logits/rejected": -1.105726718902588, "logps/chosen": -0.897610604763031, "logps/rejected": -1.6635794639587402, "loss": 0.9928, "nll_loss": 0.9721555709838867, "rewards/accuracies": 0.625, "rewards/chosen": -0.08976106345653534, "rewards/margins": 0.07659686356782913, "rewards/rejected": -0.16635793447494507, "step": 4802 }, { "epoch": 2.9299984749123076, "grad_norm": 3.5574638843536377, "learning_rate": 3.323453766074709e-06, "log_odds_chosen": 2.420621871948242, "log_odds_ratio": -0.3730299174785614, "logits/chosen": -0.790854811668396, "logits/rejected": -0.795825719833374, "logps/chosen": -0.7221320867538452, "logps/rejected": -2.465526580810547, "loss": 1.1354, "nll_loss": 0.852657675743103, "rewards/accuracies": 0.625, "rewards/chosen": -0.07221321016550064, "rewards/margins": 0.17433945834636688, "rewards/rejected": -0.24655267596244812, "step": 4803 }, { "epoch": 2.9306085099893244, "grad_norm": 7.534546375274658, "learning_rate": 3.322473974280465e-06, "log_odds_chosen": 2.1049070358276367, "log_odds_ratio": -0.3614235818386078, "logits/chosen": -1.0438902378082275, "logits/rejected": -1.0956817865371704, "logps/chosen": -0.8332732915878296, "logps/rejected": -2.5194716453552246, "loss": 1.1754, "nll_loss": 0.983485221862793, "rewards/accuracies": 0.75, "rewards/chosen": -0.08332733809947968, "rewards/margins": 0.16861982643604279, "rewards/rejected": -0.25194716453552246, "step": 4804 }, { "epoch": 2.9312185450663413, "grad_norm": 1.4901138544082642, "learning_rate": 3.3214941824862216e-06, "log_odds_chosen": 1.3942451477050781, "log_odds_ratio": -0.48672857880592346, "logits/chosen": -1.0630580186843872, "logits/rejected": -1.158242106437683, "logps/chosen": -0.979323148727417, "logps/rejected": -2.1567468643188477, "loss": 1.0054, "nll_loss": 1.1406950950622559, "rewards/accuracies": 0.875, "rewards/chosen": -0.09793232381343842, "rewards/margins": 0.1177423745393753, "rewards/rejected": -0.21567469835281372, "step": 4805 }, { "epoch": 2.931828580143358, "grad_norm": 1.220507025718689, "learning_rate": 3.3205143906919778e-06, "log_odds_chosen": 0.670073390007019, "log_odds_ratio": -0.5858919620513916, "logits/chosen": -0.8534173965454102, "logits/rejected": -0.862397313117981, "logps/chosen": -0.917203426361084, "logps/rejected": -1.3646095991134644, "loss": 1.0493, "nll_loss": 0.9476531744003296, "rewards/accuracies": 0.625, "rewards/chosen": -0.0917203426361084, "rewards/margins": 0.04474061727523804, "rewards/rejected": -0.13646095991134644, "step": 4806 }, { "epoch": 2.9324386152203754, "grad_norm": 1.4489740133285522, "learning_rate": 3.319534598897734e-06, "log_odds_chosen": 4.649693965911865, "log_odds_ratio": -0.2848890423774719, "logits/chosen": -0.9035449028015137, "logits/rejected": -0.8689231872558594, "logps/chosen": -0.5990315675735474, "logps/rejected": -4.5671706199646, "loss": 0.966, "nll_loss": 1.047696828842163, "rewards/accuracies": 0.875, "rewards/chosen": -0.059903163462877274, "rewards/margins": 0.39681389927864075, "rewards/rejected": -0.45671704411506653, "step": 4807 }, { "epoch": 2.9330486502973923, "grad_norm": 1.5656315088272095, "learning_rate": 3.3185548071034904e-06, "log_odds_chosen": 2.709106683731079, "log_odds_ratio": -0.3279728591442108, "logits/chosen": -0.9273431301116943, "logits/rejected": -1.026426911354065, "logps/chosen": -0.6057923436164856, "logps/rejected": -2.725586414337158, "loss": 0.9454, "nll_loss": 0.8785603642463684, "rewards/accuracies": 0.875, "rewards/chosen": -0.06057924032211304, "rewards/margins": 0.21197938919067383, "rewards/rejected": -0.27255862951278687, "step": 4808 }, { "epoch": 2.933658685374409, "grad_norm": 1.8108227252960205, "learning_rate": 3.317575015309247e-06, "log_odds_chosen": 1.3928799629211426, "log_odds_ratio": -0.4410783648490906, "logits/chosen": -0.9050549268722534, "logits/rejected": -1.0302143096923828, "logps/chosen": -0.6297612190246582, "logps/rejected": -1.5171393156051636, "loss": 1.0432, "nll_loss": 0.9883219599723816, "rewards/accuracies": 0.75, "rewards/chosen": -0.06297612190246582, "rewards/margins": 0.08873780071735382, "rewards/rejected": -0.15171392261981964, "step": 4809 }, { "epoch": 2.934268720451426, "grad_norm": 1.2244197130203247, "learning_rate": 3.3165952235150027e-06, "log_odds_chosen": 1.7423242330551147, "log_odds_ratio": -0.46504464745521545, "logits/chosen": -0.8333728313446045, "logits/rejected": -0.860974907875061, "logps/chosen": -0.6470747590065002, "logps/rejected": -1.8634998798370361, "loss": 0.9397, "nll_loss": 0.8597415685653687, "rewards/accuracies": 0.875, "rewards/chosen": -0.06470747292041779, "rewards/margins": 0.12164251506328583, "rewards/rejected": -0.1863499879837036, "step": 4810 }, { "epoch": 2.934878755528443, "grad_norm": 1.4369778633117676, "learning_rate": 3.3156154317207592e-06, "log_odds_chosen": 1.4611661434173584, "log_odds_ratio": -0.44788238406181335, "logits/chosen": -1.0554949045181274, "logits/rejected": -1.1675416231155396, "logps/chosen": -1.1792924404144287, "logps/rejected": -2.4078640937805176, "loss": 1.217, "nll_loss": 1.4302120208740234, "rewards/accuracies": 0.625, "rewards/chosen": -0.11792925745248795, "rewards/margins": 0.12285717576742172, "rewards/rejected": -0.24078643321990967, "step": 4811 }, { "epoch": 2.9354887906054596, "grad_norm": 1.997085690498352, "learning_rate": 3.3146356399265158e-06, "log_odds_chosen": 2.922128677368164, "log_odds_ratio": -0.5784552693367004, "logits/chosen": -0.6927484273910522, "logits/rejected": -0.9395272135734558, "logps/chosen": -0.7002187371253967, "logps/rejected": -3.0540528297424316, "loss": 1.1396, "nll_loss": 1.010956048965454, "rewards/accuracies": 0.625, "rewards/chosen": -0.07002188265323639, "rewards/margins": 0.23538342118263245, "rewards/rejected": -0.30540528893470764, "step": 4812 }, { "epoch": 2.9360988256824765, "grad_norm": 1.5978668928146362, "learning_rate": 3.3136558481322715e-06, "log_odds_chosen": 1.6717060804367065, "log_odds_ratio": -0.3798084557056427, "logits/chosen": -0.9563499689102173, "logits/rejected": -1.1253395080566406, "logps/chosen": -0.7424737215042114, "logps/rejected": -1.926335096359253, "loss": 0.956, "nll_loss": 0.9531779289245605, "rewards/accuracies": 0.75, "rewards/chosen": -0.07424737513065338, "rewards/margins": 0.11838614195585251, "rewards/rejected": -0.1926335096359253, "step": 4813 }, { "epoch": 2.9367088607594938, "grad_norm": 1.9703288078308105, "learning_rate": 3.312676056338028e-06, "log_odds_chosen": 2.68284273147583, "log_odds_ratio": -0.28681373596191406, "logits/chosen": -0.8152806162834167, "logits/rejected": -1.0124677419662476, "logps/chosen": -0.803546667098999, "logps/rejected": -2.959381580352783, "loss": 1.0398, "nll_loss": 0.9808831214904785, "rewards/accuracies": 0.875, "rewards/chosen": -0.08035466820001602, "rewards/margins": 0.21558350324630737, "rewards/rejected": -0.2959381341934204, "step": 4814 }, { "epoch": 2.9373188958365106, "grad_norm": 1.941032886505127, "learning_rate": 3.3116962645437846e-06, "log_odds_chosen": 0.8370131850242615, "log_odds_ratio": -0.6381669044494629, "logits/chosen": -0.8525350093841553, "logits/rejected": -0.9925979375839233, "logps/chosen": -0.7864370942115784, "logps/rejected": -1.5113739967346191, "loss": 0.9849, "nll_loss": 0.9480661153793335, "rewards/accuracies": 0.5, "rewards/chosen": -0.07864370942115784, "rewards/margins": 0.07249370217323303, "rewards/rejected": -0.15113741159439087, "step": 4815 }, { "epoch": 2.9379289309135275, "grad_norm": 1.4022949934005737, "learning_rate": 3.3107164727495407e-06, "log_odds_chosen": 3.0965206623077393, "log_odds_ratio": -0.2998640239238739, "logits/chosen": -1.0427989959716797, "logits/rejected": -1.2160191535949707, "logps/chosen": -0.9241361618041992, "logps/rejected": -3.5724093914031982, "loss": 1.2006, "nll_loss": 1.1128660440444946, "rewards/accuracies": 0.875, "rewards/chosen": -0.09241361171007156, "rewards/margins": 0.2648273706436157, "rewards/rejected": -0.3572409749031067, "step": 4816 }, { "epoch": 2.9385389659905443, "grad_norm": 4.293566703796387, "learning_rate": 3.309736680955297e-06, "log_odds_chosen": 1.2907228469848633, "log_odds_ratio": -0.45300036668777466, "logits/chosen": -0.9213336110115051, "logits/rejected": -0.9949852228164673, "logps/chosen": -0.8144038915634155, "logps/rejected": -1.6997179985046387, "loss": 0.8903, "nll_loss": 0.9758756756782532, "rewards/accuracies": 0.75, "rewards/chosen": -0.08144039660692215, "rewards/margins": 0.08853141218423843, "rewards/rejected": -0.16997180879116058, "step": 4817 }, { "epoch": 2.9391490010675616, "grad_norm": 1.5453808307647705, "learning_rate": 3.3087568891610533e-06, "log_odds_chosen": 2.958700180053711, "log_odds_ratio": -0.1945856660604477, "logits/chosen": -0.8408765196800232, "logits/rejected": -0.8721216917037964, "logps/chosen": -0.6290261149406433, "logps/rejected": -2.8424153327941895, "loss": 0.9758, "nll_loss": 0.9057367444038391, "rewards/accuracies": 0.875, "rewards/chosen": -0.06290261447429657, "rewards/margins": 0.2213389128446579, "rewards/rejected": -0.28424152731895447, "step": 4818 }, { "epoch": 2.9397590361445785, "grad_norm": 1.2209107875823975, "learning_rate": 3.3077770973668095e-06, "log_odds_chosen": 2.4943950176239014, "log_odds_ratio": -0.2709553837776184, "logits/chosen": -0.8019095659255981, "logits/rejected": -1.00727379322052, "logps/chosen": -0.8331654071807861, "logps/rejected": -2.882200241088867, "loss": 1.0005, "nll_loss": 1.023582100868225, "rewards/accuracies": 1.0, "rewards/chosen": -0.08331653475761414, "rewards/margins": 0.2049034684896469, "rewards/rejected": -0.28821998834609985, "step": 4819 }, { "epoch": 2.9403690712215953, "grad_norm": 6.545192241668701, "learning_rate": 3.3067973055725656e-06, "log_odds_chosen": 1.687136173248291, "log_odds_ratio": -0.3422250747680664, "logits/chosen": -0.874937891960144, "logits/rejected": -0.9595038890838623, "logps/chosen": -0.8702359199523926, "logps/rejected": -2.2829883098602295, "loss": 1.0726, "nll_loss": 1.0469744205474854, "rewards/accuracies": 0.875, "rewards/chosen": -0.08702360093593597, "rewards/margins": 0.14127522706985474, "rewards/rejected": -0.2282988280057907, "step": 4820 }, { "epoch": 2.940979106298612, "grad_norm": 1.8841675519943237, "learning_rate": 3.3058175137783217e-06, "log_odds_chosen": 2.650219440460205, "log_odds_ratio": -0.37000805139541626, "logits/chosen": -0.9229704141616821, "logits/rejected": -1.1703931093215942, "logps/chosen": -0.7265105247497559, "logps/rejected": -2.813690662384033, "loss": 1.0224, "nll_loss": 0.9505466222763062, "rewards/accuracies": 0.625, "rewards/chosen": -0.07265105098485947, "rewards/margins": 0.20871801674365997, "rewards/rejected": -0.28136909008026123, "step": 4821 }, { "epoch": 2.941589141375629, "grad_norm": 9.858010292053223, "learning_rate": 3.3048377219840783e-06, "log_odds_chosen": 1.1732361316680908, "log_odds_ratio": -0.5245269536972046, "logits/chosen": -1.027198076248169, "logits/rejected": -1.0301979780197144, "logps/chosen": -0.8315369486808777, "logps/rejected": -1.8106653690338135, "loss": 0.9653, "nll_loss": 0.9226566553115845, "rewards/accuracies": 0.625, "rewards/chosen": -0.08315370231866837, "rewards/margins": 0.09791283309459686, "rewards/rejected": -0.18106654286384583, "step": 4822 }, { "epoch": 2.942199176452646, "grad_norm": 1.381584882736206, "learning_rate": 3.303857930189835e-06, "log_odds_chosen": 2.144866943359375, "log_odds_ratio": -0.4409351348876953, "logits/chosen": -0.8782771825790405, "logits/rejected": -0.9140549898147583, "logps/chosen": -0.8335056304931641, "logps/rejected": -2.3039536476135254, "loss": 1.0634, "nll_loss": 1.1207345724105835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08335056900978088, "rewards/margins": 0.1470448076725006, "rewards/rejected": -0.2303953766822815, "step": 4823 }, { "epoch": 2.9428092115296627, "grad_norm": 7.873528003692627, "learning_rate": 3.3028781383955905e-06, "log_odds_chosen": 0.8580037355422974, "log_odds_ratio": -0.7438103556632996, "logits/chosen": -0.9614191651344299, "logits/rejected": -0.9686498045921326, "logps/chosen": -0.8345732688903809, "logps/rejected": -1.6895512342453003, "loss": 1.042, "nll_loss": 1.104688286781311, "rewards/accuracies": 0.375, "rewards/chosen": -0.08345732092857361, "rewards/margins": 0.08549780398607254, "rewards/rejected": -0.16895513236522675, "step": 4824 }, { "epoch": 2.94341924660668, "grad_norm": 1.809743046760559, "learning_rate": 3.301898346601347e-06, "log_odds_chosen": 2.104984998703003, "log_odds_ratio": -0.32131433486938477, "logits/chosen": -0.8257946968078613, "logits/rejected": -0.947047233581543, "logps/chosen": -0.7322955131530762, "logps/rejected": -2.3186450004577637, "loss": 0.9935, "nll_loss": 0.9036574959754944, "rewards/accuracies": 0.875, "rewards/chosen": -0.07322955131530762, "rewards/margins": 0.15863493084907532, "rewards/rejected": -0.23186448216438293, "step": 4825 }, { "epoch": 2.944029281683697, "grad_norm": 5.773399829864502, "learning_rate": 3.3009185548071036e-06, "log_odds_chosen": 1.4865915775299072, "log_odds_ratio": -0.5074082612991333, "logits/chosen": -0.8712942600250244, "logits/rejected": -0.9484997391700745, "logps/chosen": -0.742583155632019, "logps/rejected": -1.9524834156036377, "loss": 1.1178, "nll_loss": 0.9344189167022705, "rewards/accuracies": 0.625, "rewards/chosen": -0.07425831258296967, "rewards/margins": 0.12099002301692963, "rewards/rejected": -0.1952483355998993, "step": 4826 }, { "epoch": 2.9446393167607137, "grad_norm": 5.81112003326416, "learning_rate": 3.2999387630128593e-06, "log_odds_chosen": 2.8748936653137207, "log_odds_ratio": -0.2126019150018692, "logits/chosen": -0.7862632274627686, "logits/rejected": -0.927570641040802, "logps/chosen": -0.6658556461334229, "logps/rejected": -2.970885753631592, "loss": 1.0372, "nll_loss": 1.0402719974517822, "rewards/accuracies": 1.0, "rewards/chosen": -0.06658557057380676, "rewards/margins": 0.23050302267074585, "rewards/rejected": -0.2970885634422302, "step": 4827 }, { "epoch": 2.9452493518377305, "grad_norm": 0.9496036171913147, "learning_rate": 3.298958971218616e-06, "log_odds_chosen": 1.6977035999298096, "log_odds_ratio": -0.39259013533592224, "logits/chosen": -0.5683395862579346, "logits/rejected": -0.5730608701705933, "logps/chosen": -0.5535447597503662, "logps/rejected": -1.7991186380386353, "loss": 0.9764, "nll_loss": 0.616466760635376, "rewards/accuracies": 0.875, "rewards/chosen": -0.05535447597503662, "rewards/margins": 0.12455738335847855, "rewards/rejected": -0.17991185188293457, "step": 4828 }, { "epoch": 2.945859386914748, "grad_norm": 3.7555532455444336, "learning_rate": 3.2979791794243724e-06, "log_odds_chosen": 1.7516865730285645, "log_odds_ratio": -0.3344641327857971, "logits/chosen": -1.0677564144134521, "logits/rejected": -0.8595576286315918, "logps/chosen": -0.9110859632492065, "logps/rejected": -2.2647202014923096, "loss": 1.0778, "nll_loss": 1.0699865818023682, "rewards/accuracies": 0.875, "rewards/chosen": -0.09110859036445618, "rewards/margins": 0.1353634148836136, "rewards/rejected": -0.22647200524806976, "step": 4829 }, { "epoch": 2.9464694219917646, "grad_norm": 1.6562864780426025, "learning_rate": 3.2969993876301285e-06, "log_odds_chosen": 1.4243395328521729, "log_odds_ratio": -0.47903284430503845, "logits/chosen": -0.8638156652450562, "logits/rejected": -1.0250202417373657, "logps/chosen": -0.7972966432571411, "logps/rejected": -1.8218425512313843, "loss": 1.0793, "nll_loss": 1.1990249156951904, "rewards/accuracies": 0.625, "rewards/chosen": -0.07972966134548187, "rewards/margins": 0.10245460271835327, "rewards/rejected": -0.18218426406383514, "step": 4830 }, { "epoch": 2.9470794570687815, "grad_norm": 1.9778645038604736, "learning_rate": 3.2960195958358846e-06, "log_odds_chosen": 1.0083261728286743, "log_odds_ratio": -0.43729862570762634, "logits/chosen": -1.025683879852295, "logits/rejected": -1.09165358543396, "logps/chosen": -0.9327347874641418, "logps/rejected": -1.6934789419174194, "loss": 0.9003, "nll_loss": 1.000160574913025, "rewards/accuracies": 0.75, "rewards/chosen": -0.09327347576618195, "rewards/margins": 0.07607442140579224, "rewards/rejected": -0.16934789717197418, "step": 4831 }, { "epoch": 2.9476894921457983, "grad_norm": 2.697903633117676, "learning_rate": 3.295039804041641e-06, "log_odds_chosen": 2.0785984992980957, "log_odds_ratio": -0.21190349757671356, "logits/chosen": -0.8830929398536682, "logits/rejected": -0.948867917060852, "logps/chosen": -0.6708933115005493, "logps/rejected": -2.1071863174438477, "loss": 1.1921, "nll_loss": 0.9120824337005615, "rewards/accuracies": 1.0, "rewards/chosen": -0.06708932667970657, "rewards/margins": 0.14362932741641998, "rewards/rejected": -0.21071866154670715, "step": 4832 }, { "epoch": 2.948299527222815, "grad_norm": 1.2018969058990479, "learning_rate": 3.2940600122473973e-06, "log_odds_chosen": 1.1949636936187744, "log_odds_ratio": -0.3571511507034302, "logits/chosen": -0.8216193914413452, "logits/rejected": -0.8309022784233093, "logps/chosen": -0.773327112197876, "logps/rejected": -1.5340633392333984, "loss": 1.0093, "nll_loss": 1.0907135009765625, "rewards/accuracies": 0.875, "rewards/chosen": -0.07733271270990372, "rewards/margins": 0.07607361674308777, "rewards/rejected": -0.15340633690357208, "step": 4833 }, { "epoch": 2.948909562299832, "grad_norm": 3.6499733924865723, "learning_rate": 3.2930802204531534e-06, "log_odds_chosen": 0.8036466836929321, "log_odds_ratio": -0.6866418719291687, "logits/chosen": -0.8619742393493652, "logits/rejected": -0.9047386646270752, "logps/chosen": -0.790668249130249, "logps/rejected": -1.3064990043640137, "loss": 1.0545, "nll_loss": 1.1259725093841553, "rewards/accuracies": 0.5, "rewards/chosen": -0.07906682789325714, "rewards/margins": 0.05158306658267975, "rewards/rejected": -0.1306498944759369, "step": 4834 }, { "epoch": 2.9495195973768493, "grad_norm": 2.824176788330078, "learning_rate": 3.29210042865891e-06, "log_odds_chosen": 4.210100173950195, "log_odds_ratio": -0.108241967856884, "logits/chosen": -0.8048100471496582, "logits/rejected": -1.0806646347045898, "logps/chosen": -0.45345044136047363, "logps/rejected": -3.605440139770508, "loss": 1.0175, "nll_loss": 0.7876901030540466, "rewards/accuracies": 1.0, "rewards/chosen": -0.04534504562616348, "rewards/margins": 0.31519895792007446, "rewards/rejected": -0.36054402589797974, "step": 4835 }, { "epoch": 2.950129632453866, "grad_norm": 1.7045284509658813, "learning_rate": 3.291120636864666e-06, "log_odds_chosen": 0.9805537462234497, "log_odds_ratio": -0.4807613492012024, "logits/chosen": -0.9235484600067139, "logits/rejected": -1.0100746154785156, "logps/chosen": -0.9398963451385498, "logps/rejected": -1.6926521062850952, "loss": 1.1434, "nll_loss": 1.0374810695648193, "rewards/accuracies": 0.75, "rewards/chosen": -0.09398964047431946, "rewards/margins": 0.07527557760477066, "rewards/rejected": -0.16926521062850952, "step": 4836 }, { "epoch": 2.950739667530883, "grad_norm": 1.7442548274993896, "learning_rate": 3.2901408450704226e-06, "log_odds_chosen": 2.468972682952881, "log_odds_ratio": -0.3373379111289978, "logits/chosen": -0.8863984942436218, "logits/rejected": -1.050168752670288, "logps/chosen": -0.728547215461731, "logps/rejected": -2.633087635040283, "loss": 0.9365, "nll_loss": 0.8440902233123779, "rewards/accuracies": 0.75, "rewards/chosen": -0.07285472750663757, "rewards/margins": 0.19045403599739075, "rewards/rejected": -0.2633087635040283, "step": 4837 }, { "epoch": 2.9513497026079, "grad_norm": 1.2903450727462769, "learning_rate": 3.2891610532761788e-06, "log_odds_chosen": 1.0400567054748535, "log_odds_ratio": -0.5551160573959351, "logits/chosen": -1.0934576988220215, "logits/rejected": -0.9112232327461243, "logps/chosen": -1.2045139074325562, "logps/rejected": -2.0937387943267822, "loss": 1.1598, "nll_loss": 1.3148479461669922, "rewards/accuracies": 0.625, "rewards/chosen": -0.12045140564441681, "rewards/margins": 0.08892248570919037, "rewards/rejected": -0.20937387645244598, "step": 4838 }, { "epoch": 2.951959737684917, "grad_norm": 1.815185308456421, "learning_rate": 3.288181261481935e-06, "log_odds_chosen": 0.3874950110912323, "log_odds_ratio": -0.5739637613296509, "logits/chosen": -0.9500253200531006, "logits/rejected": -0.9601253271102905, "logps/chosen": -0.8101381063461304, "logps/rejected": -1.0647035837173462, "loss": 1.0917, "nll_loss": 1.0048531293869019, "rewards/accuracies": 0.75, "rewards/chosen": -0.08101381361484528, "rewards/margins": 0.02545654959976673, "rewards/rejected": -0.10647035390138626, "step": 4839 }, { "epoch": 2.952569772761934, "grad_norm": 9.878759384155273, "learning_rate": 3.2872014696876914e-06, "log_odds_chosen": 1.1562762260437012, "log_odds_ratio": -0.6420925855636597, "logits/chosen": -0.7987712621688843, "logits/rejected": -0.8349318504333496, "logps/chosen": -0.8488391041755676, "logps/rejected": -1.7644422054290771, "loss": 1.0523, "nll_loss": 0.9196742177009583, "rewards/accuracies": 0.5, "rewards/chosen": -0.084883913397789, "rewards/margins": 0.09156031906604767, "rewards/rejected": -0.17644423246383667, "step": 4840 }, { "epoch": 2.953179807838951, "grad_norm": 1.8425339460372925, "learning_rate": 3.286221677893447e-06, "log_odds_chosen": 0.7878081798553467, "log_odds_ratio": -0.5217243432998657, "logits/chosen": -1.0234723091125488, "logits/rejected": -0.760826826095581, "logps/chosen": -1.2373669147491455, "logps/rejected": -1.8504893779754639, "loss": 1.2827, "nll_loss": 1.2725857496261597, "rewards/accuracies": 0.5, "rewards/chosen": -0.12373669445514679, "rewards/margins": 0.0613122433423996, "rewards/rejected": -0.1850489377975464, "step": 4841 }, { "epoch": 2.9537898429159677, "grad_norm": 2.519357204437256, "learning_rate": 3.2852418860992037e-06, "log_odds_chosen": 0.934662938117981, "log_odds_ratio": -0.5409574508666992, "logits/chosen": -0.8560566306114197, "logits/rejected": -0.9311399459838867, "logps/chosen": -0.7930974960327148, "logps/rejected": -1.4285874366760254, "loss": 0.9846, "nll_loss": 1.0059897899627686, "rewards/accuracies": 0.75, "rewards/chosen": -0.07930974662303925, "rewards/margins": 0.0635489970445633, "rewards/rejected": -0.14285874366760254, "step": 4842 }, { "epoch": 2.9543998779929845, "grad_norm": 3.360849380493164, "learning_rate": 3.28426209430496e-06, "log_odds_chosen": 1.5104897022247314, "log_odds_ratio": -0.4855635166168213, "logits/chosen": -0.7859467267990112, "logits/rejected": -0.8448532819747925, "logps/chosen": -0.6504095792770386, "logps/rejected": -1.5441536903381348, "loss": 1.0689, "nll_loss": 1.0249006748199463, "rewards/accuracies": 0.75, "rewards/chosen": -0.0650409609079361, "rewards/margins": 0.08937440067529678, "rewards/rejected": -0.15441536903381348, "step": 4843 }, { "epoch": 2.9550099130700014, "grad_norm": 1.2115063667297363, "learning_rate": 3.2832823025107163e-06, "log_odds_chosen": 2.595158576965332, "log_odds_ratio": -0.4858609139919281, "logits/chosen": -0.7036570310592651, "logits/rejected": -0.8539401888847351, "logps/chosen": -0.9336864948272705, "logps/rejected": -3.0436673164367676, "loss": 1.0878, "nll_loss": 1.1006433963775635, "rewards/accuracies": 0.625, "rewards/chosen": -0.09336864948272705, "rewards/margins": 0.21099808812141418, "rewards/rejected": -0.30436673760414124, "step": 4844 }, { "epoch": 2.9556199481470182, "grad_norm": 1.5194796323776245, "learning_rate": 3.2823025107164725e-06, "log_odds_chosen": 1.1286003589630127, "log_odds_ratio": -0.5204211473464966, "logits/chosen": -0.8660851716995239, "logits/rejected": -1.0054959058761597, "logps/chosen": -0.8564012050628662, "logps/rejected": -1.7507963180541992, "loss": 1.0591, "nll_loss": 0.9257715344429016, "rewards/accuracies": 0.5, "rewards/chosen": -0.08564011752605438, "rewards/margins": 0.0894395187497139, "rewards/rejected": -0.17507962882518768, "step": 4845 }, { "epoch": 2.9562299832240355, "grad_norm": 2.185706615447998, "learning_rate": 3.281322718922229e-06, "log_odds_chosen": 1.597198247909546, "log_odds_ratio": -0.629595160484314, "logits/chosen": -0.9157786965370178, "logits/rejected": -0.9670588374137878, "logps/chosen": -0.8604176640510559, "logps/rejected": -2.153038263320923, "loss": 1.1914, "nll_loss": 1.0115679502487183, "rewards/accuracies": 0.5, "rewards/chosen": -0.08604177087545395, "rewards/margins": 0.12926208972930908, "rewards/rejected": -0.21530385315418243, "step": 4846 }, { "epoch": 2.9568400183010524, "grad_norm": 1.531349778175354, "learning_rate": 3.280342927127985e-06, "log_odds_chosen": 1.8654568195343018, "log_odds_ratio": -0.37934789061546326, "logits/chosen": -0.9012335538864136, "logits/rejected": -1.0931764841079712, "logps/chosen": -0.8099726438522339, "logps/rejected": -2.248765707015991, "loss": 1.0863, "nll_loss": 1.0973684787750244, "rewards/accuracies": 0.875, "rewards/chosen": -0.08099726587533951, "rewards/margins": 0.14387929439544678, "rewards/rejected": -0.2248765528202057, "step": 4847 }, { "epoch": 2.957450053378069, "grad_norm": 1.61969792842865, "learning_rate": 3.2793631353337412e-06, "log_odds_chosen": 1.0242475271224976, "log_odds_ratio": -0.6353286504745483, "logits/chosen": -0.7890360355377197, "logits/rejected": -0.7580097317695618, "logps/chosen": -0.9189114570617676, "logps/rejected": -1.751204252243042, "loss": 1.1212, "nll_loss": 1.0935685634613037, "rewards/accuracies": 0.625, "rewards/chosen": -0.09189115464687347, "rewards/margins": 0.08322928845882416, "rewards/rejected": -0.17512044310569763, "step": 4848 }, { "epoch": 2.958060088455086, "grad_norm": 2.8512184619903564, "learning_rate": 3.278383343539498e-06, "log_odds_chosen": 0.9106915593147278, "log_odds_ratio": -0.4350036382675171, "logits/chosen": -1.000522494316101, "logits/rejected": -0.8668465614318848, "logps/chosen": -0.6638549566268921, "logps/rejected": -1.270817756652832, "loss": 0.8712, "nll_loss": 0.8784564733505249, "rewards/accuracies": 0.75, "rewards/chosen": -0.06638550758361816, "rewards/margins": 0.06069627031683922, "rewards/rejected": -0.12708178162574768, "step": 4849 }, { "epoch": 2.9586701235321033, "grad_norm": 1.2065584659576416, "learning_rate": 3.277403551745254e-06, "log_odds_chosen": 2.6259045600891113, "log_odds_ratio": -0.3218516409397125, "logits/chosen": -0.6605961322784424, "logits/rejected": -0.7913293242454529, "logps/chosen": -0.5970744490623474, "logps/rejected": -2.5806829929351807, "loss": 0.9769, "nll_loss": 0.7715495824813843, "rewards/accuracies": 0.875, "rewards/chosen": -0.05970745533704758, "rewards/margins": 0.1983608454465866, "rewards/rejected": -0.258068323135376, "step": 4850 }, { "epoch": 2.95928015860912, "grad_norm": 1.7954803705215454, "learning_rate": 3.2764237599510105e-06, "log_odds_chosen": 1.7372114658355713, "log_odds_ratio": -0.5295813083648682, "logits/chosen": -0.8953279852867126, "logits/rejected": -0.9960646033287048, "logps/chosen": -0.9946457743644714, "logps/rejected": -2.365812063217163, "loss": 1.1481, "nll_loss": 1.323824167251587, "rewards/accuracies": 0.75, "rewards/chosen": -0.09946458041667938, "rewards/margins": 0.13711662590503693, "rewards/rejected": -0.2365812063217163, "step": 4851 }, { "epoch": 2.959890193686137, "grad_norm": 2.5300323963165283, "learning_rate": 3.2754439681567666e-06, "log_odds_chosen": 2.014310836791992, "log_odds_ratio": -0.3194791376590729, "logits/chosen": -0.6602624654769897, "logits/rejected": -0.9212468862533569, "logps/chosen": -0.5604242086410522, "logps/rejected": -2.031996726989746, "loss": 0.9603, "nll_loss": 0.6438473463058472, "rewards/accuracies": 0.75, "rewards/chosen": -0.056042421609163284, "rewards/margins": 0.14715725183486938, "rewards/rejected": -0.20319965481758118, "step": 4852 }, { "epoch": 2.960500228763154, "grad_norm": 1.2763797044754028, "learning_rate": 3.2744641763625227e-06, "log_odds_chosen": 1.3969144821166992, "log_odds_ratio": -0.48528414964675903, "logits/chosen": -0.8702142238616943, "logits/rejected": -0.9155288934707642, "logps/chosen": -0.8857113122940063, "logps/rejected": -2.0268263816833496, "loss": 1.0572, "nll_loss": 1.0810229778289795, "rewards/accuracies": 0.75, "rewards/chosen": -0.08857113122940063, "rewards/margins": 0.1141115203499794, "rewards/rejected": -0.20268267393112183, "step": 4853 }, { "epoch": 2.9611102638401707, "grad_norm": 4.064289569854736, "learning_rate": 3.2734843845682793e-06, "log_odds_chosen": 2.156733512878418, "log_odds_ratio": -0.27262890338897705, "logits/chosen": -0.7646633982658386, "logits/rejected": -1.0087226629257202, "logps/chosen": -0.595020592212677, "logps/rejected": -2.1122536659240723, "loss": 0.9302, "nll_loss": 0.8148499131202698, "rewards/accuracies": 1.0, "rewards/chosen": -0.05950205773115158, "rewards/margins": 0.15172331035137177, "rewards/rejected": -0.21122537553310394, "step": 4854 }, { "epoch": 2.9617202989171876, "grad_norm": 2.1718060970306396, "learning_rate": 3.2725045927740354e-06, "log_odds_chosen": 1.6360963582992554, "log_odds_ratio": -0.4947907626628876, "logits/chosen": -0.7920633554458618, "logits/rejected": -0.9135392904281616, "logps/chosen": -0.7372804880142212, "logps/rejected": -1.6237059831619263, "loss": 1.295, "nll_loss": 1.0978162288665771, "rewards/accuracies": 0.75, "rewards/chosen": -0.073728047311306, "rewards/margins": 0.08864254504442215, "rewards/rejected": -0.16237060725688934, "step": 4855 }, { "epoch": 2.9623303339942044, "grad_norm": 1.5102280378341675, "learning_rate": 3.2715248009797915e-06, "log_odds_chosen": 1.3673555850982666, "log_odds_ratio": -0.4528788924217224, "logits/chosen": -0.8929373621940613, "logits/rejected": -1.128244400024414, "logps/chosen": -0.7919518947601318, "logps/rejected": -1.6395962238311768, "loss": 0.9682, "nll_loss": 0.8356786966323853, "rewards/accuracies": 0.625, "rewards/chosen": -0.07919518649578094, "rewards/margins": 0.08476442843675613, "rewards/rejected": -0.16395962238311768, "step": 4856 }, { "epoch": 2.9629403690712217, "grad_norm": 1.5121707916259766, "learning_rate": 3.270545009185548e-06, "log_odds_chosen": 2.3980443477630615, "log_odds_ratio": -0.2294667661190033, "logits/chosen": -0.7542887330055237, "logits/rejected": -0.8005400896072388, "logps/chosen": -0.6646111607551575, "logps/rejected": -2.4182701110839844, "loss": 0.8206, "nll_loss": 0.7946910262107849, "rewards/accuracies": 1.0, "rewards/chosen": -0.06646112352609634, "rewards/margins": 0.1753658950328827, "rewards/rejected": -0.24182701110839844, "step": 4857 }, { "epoch": 2.9635504041482386, "grad_norm": 8.406465530395508, "learning_rate": 3.2695652173913046e-06, "log_odds_chosen": 1.1639912128448486, "log_odds_ratio": -0.34131723642349243, "logits/chosen": -1.0627634525299072, "logits/rejected": -1.017975091934204, "logps/chosen": -0.9432194232940674, "logps/rejected": -1.8282115459442139, "loss": 1.0297, "nll_loss": 1.0727659463882446, "rewards/accuracies": 0.75, "rewards/chosen": -0.09432194381952286, "rewards/margins": 0.08849921822547913, "rewards/rejected": -0.1828211545944214, "step": 4858 }, { "epoch": 2.9641604392252554, "grad_norm": 7.800058364868164, "learning_rate": 3.2685854255970603e-06, "log_odds_chosen": 1.8894240856170654, "log_odds_ratio": -0.2909250557422638, "logits/chosen": -0.6225313544273376, "logits/rejected": -0.87204909324646, "logps/chosen": -0.7765974998474121, "logps/rejected": -2.147308826446533, "loss": 1.2792, "nll_loss": 0.9465028047561646, "rewards/accuracies": 1.0, "rewards/chosen": -0.07765974849462509, "rewards/margins": 0.1370711475610733, "rewards/rejected": -0.2147308886051178, "step": 4859 }, { "epoch": 2.9647704743022723, "grad_norm": 8.025651931762695, "learning_rate": 3.267605633802817e-06, "log_odds_chosen": 0.7669002413749695, "log_odds_ratio": -0.5968100428581238, "logits/chosen": -0.75031977891922, "logits/rejected": -0.8237887620925903, "logps/chosen": -0.8136406540870667, "logps/rejected": -1.2918627262115479, "loss": 0.9025, "nll_loss": 0.8729413747787476, "rewards/accuracies": 0.625, "rewards/chosen": -0.08136407285928726, "rewards/margins": 0.04782218486070633, "rewards/rejected": -0.12918627262115479, "step": 4860 }, { "epoch": 2.9653805093792895, "grad_norm": 2.3132429122924805, "learning_rate": 3.266625842008573e-06, "log_odds_chosen": 2.130002737045288, "log_odds_ratio": -0.4827238917350769, "logits/chosen": -1.0726181268692017, "logits/rejected": -1.1425023078918457, "logps/chosen": -0.9772014617919922, "logps/rejected": -2.7494466304779053, "loss": 1.2295, "nll_loss": 1.1642963886260986, "rewards/accuracies": 0.625, "rewards/chosen": -0.09772015362977982, "rewards/margins": 0.1772245168685913, "rewards/rejected": -0.2749446630477905, "step": 4861 }, { "epoch": 2.9659905444563064, "grad_norm": 6.567968845367432, "learning_rate": 3.265646050214329e-06, "log_odds_chosen": 2.458827257156372, "log_odds_ratio": -0.3380853235721588, "logits/chosen": -1.038453221321106, "logits/rejected": -1.127061128616333, "logps/chosen": -0.9598541259765625, "logps/rejected": -2.926685333251953, "loss": 1.0698, "nll_loss": 1.1692379713058472, "rewards/accuracies": 0.75, "rewards/chosen": -0.09598540514707565, "rewards/margins": 0.1966831088066101, "rewards/rejected": -0.29266855120658875, "step": 4862 }, { "epoch": 2.9666005795333232, "grad_norm": 5.553539276123047, "learning_rate": 3.2646662584200856e-06, "log_odds_chosen": 0.7249870300292969, "log_odds_ratio": -0.6057037115097046, "logits/chosen": -1.1372582912445068, "logits/rejected": -0.9916942119598389, "logps/chosen": -1.254367709159851, "logps/rejected": -1.916345238685608, "loss": 1.2803, "nll_loss": 1.4679259061813354, "rewards/accuracies": 0.5, "rewards/chosen": -0.12543676793575287, "rewards/margins": 0.06619774550199509, "rewards/rejected": -0.19163452088832855, "step": 4863 }, { "epoch": 2.96721061461034, "grad_norm": 7.720835208892822, "learning_rate": 3.2636864666258417e-06, "log_odds_chosen": 2.1141128540039062, "log_odds_ratio": -0.29039266705513, "logits/chosen": -0.7397752404212952, "logits/rejected": -0.9343430995941162, "logps/chosen": -0.7924602031707764, "logps/rejected": -2.336515426635742, "loss": 1.0419, "nll_loss": 0.908173680305481, "rewards/accuracies": 0.875, "rewards/chosen": -0.07924602925777435, "rewards/margins": 0.15440550446510315, "rewards/rejected": -0.2336515337228775, "step": 4864 }, { "epoch": 2.967820649687357, "grad_norm": 1.2675782442092896, "learning_rate": 3.2627066748315983e-06, "log_odds_chosen": 0.9509222507476807, "log_odds_ratio": -0.613166093826294, "logits/chosen": -1.0553950071334839, "logits/rejected": -1.069899559020996, "logps/chosen": -0.9372855424880981, "logps/rejected": -1.6680529117584229, "loss": 1.1888, "nll_loss": 1.2088041305541992, "rewards/accuracies": 0.5, "rewards/chosen": -0.09372854977846146, "rewards/margins": 0.07307673245668411, "rewards/rejected": -0.16680529713630676, "step": 4865 }, { "epoch": 2.9684306847643738, "grad_norm": 3.1482512950897217, "learning_rate": 3.2617268830373544e-06, "log_odds_chosen": 0.09879939258098602, "log_odds_ratio": -0.7283488512039185, "logits/chosen": -1.0387252569198608, "logits/rejected": -0.9714870452880859, "logps/chosen": -1.12458074092865, "logps/rejected": -1.1566388607025146, "loss": 1.0539, "nll_loss": 1.2951010465621948, "rewards/accuracies": 0.625, "rewards/chosen": -0.11245808005332947, "rewards/margins": 0.0032058190554380417, "rewards/rejected": -0.11566388607025146, "step": 4866 }, { "epoch": 2.9690407198413906, "grad_norm": 2.943378210067749, "learning_rate": 3.2607470912431105e-06, "log_odds_chosen": 1.2860685586929321, "log_odds_ratio": -0.3534575402736664, "logits/chosen": -0.6716070175170898, "logits/rejected": -0.6528385877609253, "logps/chosen": -0.6923785209655762, "logps/rejected": -1.5405431985855103, "loss": 1.2065, "nll_loss": 0.8748067617416382, "rewards/accuracies": 0.875, "rewards/chosen": -0.0692378580570221, "rewards/margins": 0.08481645584106445, "rewards/rejected": -0.15405431389808655, "step": 4867 }, { "epoch": 2.969650754918408, "grad_norm": 1.6999804973602295, "learning_rate": 3.259767299448867e-06, "log_odds_chosen": 1.5786888599395752, "log_odds_ratio": -0.8286340236663818, "logits/chosen": -0.5531415343284607, "logits/rejected": -0.6638076305389404, "logps/chosen": -1.0935333967208862, "logps/rejected": -2.335139751434326, "loss": 0.873, "nll_loss": 0.8245905637741089, "rewards/accuracies": 0.625, "rewards/chosen": -0.10935334116220474, "rewards/margins": 0.12416061758995056, "rewards/rejected": -0.2335139662027359, "step": 4868 }, { "epoch": 2.9702607899954248, "grad_norm": 1.6418648958206177, "learning_rate": 3.258787507654623e-06, "log_odds_chosen": 0.4409913420677185, "log_odds_ratio": -0.5615706443786621, "logits/chosen": -1.0515666007995605, "logits/rejected": -0.7380869388580322, "logps/chosen": -0.9902651309967041, "logps/rejected": -1.2957594394683838, "loss": 1.147, "nll_loss": 1.115297794342041, "rewards/accuracies": 0.625, "rewards/chosen": -0.09902651607990265, "rewards/margins": 0.030549431219697, "rewards/rejected": -0.1295759379863739, "step": 4869 }, { "epoch": 2.9708708250724416, "grad_norm": 2.826744794845581, "learning_rate": 3.2578077158603793e-06, "log_odds_chosen": 0.45187512040138245, "log_odds_ratio": -0.6126095056533813, "logits/chosen": -0.9880850315093994, "logits/rejected": -1.066874384880066, "logps/chosen": -1.0828804969787598, "logps/rejected": -1.4511022567749023, "loss": 1.1241, "nll_loss": 1.3094326257705688, "rewards/accuracies": 0.5, "rewards/chosen": -0.10828804969787598, "rewards/margins": 0.03682217746973038, "rewards/rejected": -0.14511023461818695, "step": 4870 }, { "epoch": 2.9714808601494584, "grad_norm": 1.774644136428833, "learning_rate": 3.256827924066136e-06, "log_odds_chosen": 1.9580687284469604, "log_odds_ratio": -0.33778053522109985, "logits/chosen": -0.7943147420883179, "logits/rejected": -0.822879433631897, "logps/chosen": -0.7303998470306396, "logps/rejected": -2.165095806121826, "loss": 0.8068, "nll_loss": 0.7927841544151306, "rewards/accuracies": 0.875, "rewards/chosen": -0.07303999364376068, "rewards/margins": 0.14346960186958313, "rewards/rejected": -0.2165095955133438, "step": 4871 }, { "epoch": 2.9720908952264757, "grad_norm": 1.3829240798950195, "learning_rate": 3.2558481322718924e-06, "log_odds_chosen": 0.41320377588272095, "log_odds_ratio": -0.5943480134010315, "logits/chosen": -0.6410343647003174, "logits/rejected": -0.6598290205001831, "logps/chosen": -0.7467052340507507, "logps/rejected": -0.9834355115890503, "loss": 1.0022, "nll_loss": 0.977484941482544, "rewards/accuracies": 0.625, "rewards/chosen": -0.07467053085565567, "rewards/margins": 0.023673029616475105, "rewards/rejected": -0.09834355860948563, "step": 4872 }, { "epoch": 2.9727009303034926, "grad_norm": 2.0779757499694824, "learning_rate": 3.254868340477648e-06, "log_odds_chosen": 1.9242959022521973, "log_odds_ratio": -0.44007593393325806, "logits/chosen": -0.81223464012146, "logits/rejected": -0.793850302696228, "logps/chosen": -0.7488360404968262, "logps/rejected": -2.252030849456787, "loss": 1.1386, "nll_loss": 0.8500738739967346, "rewards/accuracies": 0.75, "rewards/chosen": -0.0748836100101471, "rewards/margins": 0.15031947195529938, "rewards/rejected": -0.22520309686660767, "step": 4873 }, { "epoch": 2.9733109653805094, "grad_norm": 1.5856691598892212, "learning_rate": 3.2538885486834047e-06, "log_odds_chosen": 0.8312318921089172, "log_odds_ratio": -0.5233563780784607, "logits/chosen": -0.777064323425293, "logits/rejected": -0.8107140064239502, "logps/chosen": -0.9658978581428528, "logps/rejected": -1.6106576919555664, "loss": 1.2559, "nll_loss": 1.1134824752807617, "rewards/accuracies": 0.75, "rewards/chosen": -0.09658978879451752, "rewards/margins": 0.06447598338127136, "rewards/rejected": -0.16106577217578888, "step": 4874 }, { "epoch": 2.9739210004575263, "grad_norm": 1.180316686630249, "learning_rate": 3.252908756889161e-06, "log_odds_chosen": 1.7186909914016724, "log_odds_ratio": -0.3509565591812134, "logits/chosen": -0.8297553062438965, "logits/rejected": -0.6955644488334656, "logps/chosen": -0.7370148301124573, "logps/rejected": -2.078728437423706, "loss": 1.1291, "nll_loss": 0.8978313207626343, "rewards/accuracies": 0.75, "rewards/chosen": -0.07370148599147797, "rewards/margins": 0.13417135179042816, "rewards/rejected": -0.20787283778190613, "step": 4875 }, { "epoch": 2.974531035534543, "grad_norm": 1.3597133159637451, "learning_rate": 3.251928965094917e-06, "log_odds_chosen": 0.8508962392807007, "log_odds_ratio": -0.43540751934051514, "logits/chosen": -0.779914140701294, "logits/rejected": -0.7145382761955261, "logps/chosen": -0.6014061570167542, "logps/rejected": -1.057704210281372, "loss": 0.9312, "nll_loss": 0.9904599189758301, "rewards/accuracies": 0.875, "rewards/chosen": -0.060140613466501236, "rewards/margins": 0.04562981054186821, "rewards/rejected": -0.10577043890953064, "step": 4876 }, { "epoch": 2.97514107061156, "grad_norm": 1.5461382865905762, "learning_rate": 3.2509491733006735e-06, "log_odds_chosen": 1.5363188982009888, "log_odds_ratio": -0.4977855086326599, "logits/chosen": -0.9874885082244873, "logits/rejected": -0.9743735194206238, "logps/chosen": -0.7105984687805176, "logps/rejected": -1.559553861618042, "loss": 1.0302, "nll_loss": 1.065517783164978, "rewards/accuracies": 0.625, "rewards/chosen": -0.07105984538793564, "rewards/margins": 0.0848955512046814, "rewards/rejected": -0.15595537424087524, "step": 4877 }, { "epoch": 2.9757511056885773, "grad_norm": 1.1142332553863525, "learning_rate": 3.2499693815064296e-06, "log_odds_chosen": 2.010812282562256, "log_odds_ratio": -0.4365130066871643, "logits/chosen": -0.7250129580497742, "logits/rejected": -0.8801360130310059, "logps/chosen": -0.7564879059791565, "logps/rejected": -2.3384616374969482, "loss": 0.9646, "nll_loss": 0.81296306848526, "rewards/accuracies": 0.875, "rewards/chosen": -0.07564879208803177, "rewards/margins": 0.15819740295410156, "rewards/rejected": -0.23384618759155273, "step": 4878 }, { "epoch": 2.976361140765594, "grad_norm": 1.2890334129333496, "learning_rate": 3.248989589712186e-06, "log_odds_chosen": 1.16954505443573, "log_odds_ratio": -0.41914820671081543, "logits/chosen": -0.9752185344696045, "logits/rejected": -0.9339443445205688, "logps/chosen": -0.8758605718612671, "logps/rejected": -1.816281795501709, "loss": 0.982, "nll_loss": 1.0354878902435303, "rewards/accuracies": 0.75, "rewards/chosen": -0.08758606016635895, "rewards/margins": 0.09404211491346359, "rewards/rejected": -0.18162818253040314, "step": 4879 }, { "epoch": 2.976971175842611, "grad_norm": 1.3516136407852173, "learning_rate": 3.2480097979179422e-06, "log_odds_chosen": 0.3640380799770355, "log_odds_ratio": -0.5858931541442871, "logits/chosen": -1.0162410736083984, "logits/rejected": -0.933708906173706, "logps/chosen": -0.7744171619415283, "logps/rejected": -0.9567978382110596, "loss": 1.0402, "nll_loss": 1.046962857246399, "rewards/accuracies": 0.75, "rewards/chosen": -0.07744172215461731, "rewards/margins": 0.01823805831372738, "rewards/rejected": -0.09567978978157043, "step": 4880 }, { "epoch": 2.977581210919628, "grad_norm": 1.4019215106964111, "learning_rate": 3.2470300061236984e-06, "log_odds_chosen": 1.567336916923523, "log_odds_ratio": -0.4596669375896454, "logits/chosen": -0.7928130626678467, "logits/rejected": -0.8585178852081299, "logps/chosen": -0.8756906986236572, "logps/rejected": -2.1062777042388916, "loss": 1.1435, "nll_loss": 1.0470530986785889, "rewards/accuracies": 0.5, "rewards/chosen": -0.08756907284259796, "rewards/margins": 0.12305870652198792, "rewards/rejected": -0.21062777936458588, "step": 4881 }, { "epoch": 2.9781912459966446, "grad_norm": 1.3472031354904175, "learning_rate": 3.246050214329455e-06, "log_odds_chosen": 2.296114683151245, "log_odds_ratio": -0.44612663984298706, "logits/chosen": -0.887832522392273, "logits/rejected": -0.955232560634613, "logps/chosen": -1.0536997318267822, "logps/rejected": -2.833289861679077, "loss": 1.0962, "nll_loss": 1.1758780479431152, "rewards/accuracies": 0.75, "rewards/chosen": -0.10536997765302658, "rewards/margins": 0.17795899510383606, "rewards/rejected": -0.28332898020744324, "step": 4882 }, { "epoch": 2.978801281073662, "grad_norm": 1.388408899307251, "learning_rate": 3.245070422535211e-06, "log_odds_chosen": 4.934619903564453, "log_odds_ratio": -0.11421473324298859, "logits/chosen": -0.711776614189148, "logits/rejected": -1.042311429977417, "logps/chosen": -0.5183042287826538, "logps/rejected": -4.592946529388428, "loss": 0.9156, "nll_loss": 0.7939397096633911, "rewards/accuracies": 1.0, "rewards/chosen": -0.05183042213320732, "rewards/margins": 0.4074642062187195, "rewards/rejected": -0.4592946171760559, "step": 4883 }, { "epoch": 2.979411316150679, "grad_norm": 1.5054526329040527, "learning_rate": 3.244090630740967e-06, "log_odds_chosen": 2.272327184677124, "log_odds_ratio": -0.4416441321372986, "logits/chosen": -0.6154462099075317, "logits/rejected": -0.757910966873169, "logps/chosen": -0.7912980318069458, "logps/rejected": -2.5937719345092773, "loss": 1.0568, "nll_loss": 0.9068636894226074, "rewards/accuracies": 0.875, "rewards/chosen": -0.07912980020046234, "rewards/margins": 0.18024739623069763, "rewards/rejected": -0.2593771815299988, "step": 4884 }, { "epoch": 2.9800213512276956, "grad_norm": 1.4269253015518188, "learning_rate": 3.2431108389467237e-06, "log_odds_chosen": 1.5150090456008911, "log_odds_ratio": -0.38943177461624146, "logits/chosen": -0.7916573882102966, "logits/rejected": -0.9235184192657471, "logps/chosen": -0.5226448178291321, "logps/rejected": -1.570530652999878, "loss": 1.0939, "nll_loss": 1.1137768030166626, "rewards/accuracies": 0.75, "rewards/chosen": -0.05226448178291321, "rewards/margins": 0.10478858649730682, "rewards/rejected": -0.15705305337905884, "step": 4885 }, { "epoch": 2.9806313863047125, "grad_norm": 1.5325191020965576, "learning_rate": 3.2421310471524802e-06, "log_odds_chosen": 2.027414083480835, "log_odds_ratio": -0.3292660117149353, "logits/chosen": -0.8525388240814209, "logits/rejected": -0.9700536727905273, "logps/chosen": -0.8063336610794067, "logps/rejected": -2.406620979309082, "loss": 1.1163, "nll_loss": 1.1215062141418457, "rewards/accuracies": 0.875, "rewards/chosen": -0.08063337206840515, "rewards/margins": 0.16002871096134186, "rewards/rejected": -0.2406620979309082, "step": 4886 }, { "epoch": 2.9812414213817293, "grad_norm": 16.993621826171875, "learning_rate": 3.241151255358236e-06, "log_odds_chosen": 0.7981768846511841, "log_odds_ratio": -0.6048048734664917, "logits/chosen": -0.9974437355995178, "logits/rejected": -0.9701586961746216, "logps/chosen": -0.8838219046592712, "logps/rejected": -1.5987818241119385, "loss": 0.944, "nll_loss": 1.0176081657409668, "rewards/accuracies": 0.5, "rewards/chosen": -0.08838219195604324, "rewards/margins": 0.07149600237607956, "rewards/rejected": -0.1598781943321228, "step": 4887 }, { "epoch": 2.981851456458746, "grad_norm": 1.3396779298782349, "learning_rate": 3.2401714635639925e-06, "log_odds_chosen": 1.3742387294769287, "log_odds_ratio": -0.4389042556285858, "logits/chosen": -0.9713752269744873, "logits/rejected": -1.0463449954986572, "logps/chosen": -0.8824223279953003, "logps/rejected": -1.942298412322998, "loss": 0.9842, "nll_loss": 0.973652720451355, "rewards/accuracies": 0.75, "rewards/chosen": -0.08824223279953003, "rewards/margins": 0.10598760098218918, "rewards/rejected": -0.1942298412322998, "step": 4888 }, { "epoch": 2.9824614915357635, "grad_norm": 1.1488820314407349, "learning_rate": 3.239191671769749e-06, "log_odds_chosen": 1.8316091299057007, "log_odds_ratio": -0.5442516803741455, "logits/chosen": -0.7980718612670898, "logits/rejected": -0.9416595697402954, "logps/chosen": -0.8897987008094788, "logps/rejected": -2.539926290512085, "loss": 1.1314, "nll_loss": 1.1918449401855469, "rewards/accuracies": 0.625, "rewards/chosen": -0.08897987008094788, "rewards/margins": 0.16501277685165405, "rewards/rejected": -0.25399264693260193, "step": 4889 }, { "epoch": 2.9830715266127803, "grad_norm": 1.6764696836471558, "learning_rate": 3.2382118799755047e-06, "log_odds_chosen": 2.4310927391052246, "log_odds_ratio": -0.2469853162765503, "logits/chosen": -0.8774464130401611, "logits/rejected": -1.0028409957885742, "logps/chosen": -0.959686279296875, "logps/rejected": -2.8840696811676025, "loss": 0.9246, "nll_loss": 1.0992310047149658, "rewards/accuracies": 0.875, "rewards/chosen": -0.09596864134073257, "rewards/margins": 0.19243836402893066, "rewards/rejected": -0.28840696811676025, "step": 4890 }, { "epoch": 2.983681561689797, "grad_norm": 1.2411415576934814, "learning_rate": 3.2372320881812613e-06, "log_odds_chosen": 3.800516366958618, "log_odds_ratio": -0.1865340918302536, "logits/chosen": -0.7496153712272644, "logits/rejected": -1.002709150314331, "logps/chosen": -0.5502765774726868, "logps/rejected": -3.557633638381958, "loss": 0.9123, "nll_loss": 0.6316575407981873, "rewards/accuracies": 0.875, "rewards/chosen": -0.055027663707733154, "rewards/margins": 0.3007357120513916, "rewards/rejected": -0.35576337575912476, "step": 4891 }, { "epoch": 2.984291596766814, "grad_norm": 2.9590959548950195, "learning_rate": 3.236252296387018e-06, "log_odds_chosen": 1.7186172008514404, "log_odds_ratio": -0.5222885012626648, "logits/chosen": -0.7461270093917847, "logits/rejected": -0.8630859851837158, "logps/chosen": -0.8226017951965332, "logps/rejected": -2.229008197784424, "loss": 1.1158, "nll_loss": 1.3078502416610718, "rewards/accuracies": 0.5, "rewards/chosen": -0.08226018399000168, "rewards/margins": 0.14064064621925354, "rewards/rejected": -0.22290082275867462, "step": 4892 }, { "epoch": 2.9849016318438313, "grad_norm": 8.661893844604492, "learning_rate": 3.235272504592774e-06, "log_odds_chosen": 1.5893396139144897, "log_odds_ratio": -0.6217318177223206, "logits/chosen": -0.9542398452758789, "logits/rejected": -0.8833538889884949, "logps/chosen": -1.1995338201522827, "logps/rejected": -2.4889326095581055, "loss": 1.1946, "nll_loss": 1.0222668647766113, "rewards/accuracies": 0.625, "rewards/chosen": -0.11995338648557663, "rewards/margins": 0.1289398968219757, "rewards/rejected": -0.24889327585697174, "step": 4893 }, { "epoch": 2.985511666920848, "grad_norm": 4.82615327835083, "learning_rate": 3.23429271279853e-06, "log_odds_chosen": 2.7838551998138428, "log_odds_ratio": -0.30844828486442566, "logits/chosen": -0.8534476161003113, "logits/rejected": -0.97606360912323, "logps/chosen": -0.7891836166381836, "logps/rejected": -3.027700185775757, "loss": 0.9821, "nll_loss": 1.0281569957733154, "rewards/accuracies": 0.875, "rewards/chosen": -0.07891836762428284, "rewards/margins": 0.22385166585445404, "rewards/rejected": -0.3027700185775757, "step": 4894 }, { "epoch": 2.986121701997865, "grad_norm": 4.475761413574219, "learning_rate": 3.2333129210042866e-06, "log_odds_chosen": 3.2157344818115234, "log_odds_ratio": -0.31018295884132385, "logits/chosen": -0.8348293900489807, "logits/rejected": -0.9909166097640991, "logps/chosen": -0.5963627099990845, "logps/rejected": -3.1406049728393555, "loss": 1.1328, "nll_loss": 0.9652240872383118, "rewards/accuracies": 0.875, "rewards/chosen": -0.05963627249002457, "rewards/margins": 0.25442421436309814, "rewards/rejected": -0.3140604794025421, "step": 4895 }, { "epoch": 2.986731737074882, "grad_norm": 2.3580634593963623, "learning_rate": 3.2323331292100427e-06, "log_odds_chosen": 3.822744369506836, "log_odds_ratio": -0.17883560061454773, "logits/chosen": -0.7663987278938293, "logits/rejected": -0.9937692880630493, "logps/chosen": -0.4351636469364166, "logps/rejected": -3.128049850463867, "loss": 0.8708, "nll_loss": 0.8669878244400024, "rewards/accuracies": 1.0, "rewards/chosen": -0.0435163676738739, "rewards/margins": 0.2692885994911194, "rewards/rejected": -0.3128049969673157, "step": 4896 }, { "epoch": 2.9873417721518987, "grad_norm": 5.283348560333252, "learning_rate": 3.2313533374157993e-06, "log_odds_chosen": 0.7001841068267822, "log_odds_ratio": -0.4710647761821747, "logits/chosen": -0.6793218851089478, "logits/rejected": -0.8934013843536377, "logps/chosen": -0.8342393636703491, "logps/rejected": -1.2970232963562012, "loss": 1.1483, "nll_loss": 0.9902875423431396, "rewards/accuracies": 0.75, "rewards/chosen": -0.08342394232749939, "rewards/margins": 0.04627837985754013, "rewards/rejected": -0.12970232963562012, "step": 4897 }, { "epoch": 2.9879518072289155, "grad_norm": 1.1130198240280151, "learning_rate": 3.230373545621555e-06, "log_odds_chosen": 3.1339166164398193, "log_odds_ratio": -0.2498762309551239, "logits/chosen": -0.9927182197570801, "logits/rejected": -0.9763695001602173, "logps/chosen": -0.7894715070724487, "logps/rejected": -3.4732859134674072, "loss": 0.9111, "nll_loss": 0.8042999505996704, "rewards/accuracies": 0.875, "rewards/chosen": -0.07894715666770935, "rewards/margins": 0.2683814764022827, "rewards/rejected": -0.34732863306999207, "step": 4898 }, { "epoch": 2.9885618423059324, "grad_norm": 4.711206436157227, "learning_rate": 3.2293937538273115e-06, "log_odds_chosen": 2.1728968620300293, "log_odds_ratio": -0.23667359352111816, "logits/chosen": -0.6291505098342896, "logits/rejected": -0.9018863439559937, "logps/chosen": -0.6063982248306274, "logps/rejected": -2.1546099185943604, "loss": 0.8969, "nll_loss": 0.7987089157104492, "rewards/accuracies": 1.0, "rewards/chosen": -0.060639820992946625, "rewards/margins": 0.15482115745544434, "rewards/rejected": -0.21546098589897156, "step": 4899 }, { "epoch": 2.9891718773829496, "grad_norm": 1.2084771394729614, "learning_rate": 3.228413962033068e-06, "log_odds_chosen": 2.704697608947754, "log_odds_ratio": -0.3213077783584595, "logits/chosen": -1.0160142183303833, "logits/rejected": -1.0572818517684937, "logps/chosen": -0.75685054063797, "logps/rejected": -2.813084602355957, "loss": 1.0125, "nll_loss": 0.98489910364151, "rewards/accuracies": 0.75, "rewards/chosen": -0.075685054063797, "rewards/margins": 0.20562344789505005, "rewards/rejected": -0.28130847215652466, "step": 4900 }, { "epoch": 2.9897819124599665, "grad_norm": 10.036919593811035, "learning_rate": 3.2274341702388238e-06, "log_odds_chosen": 1.2714784145355225, "log_odds_ratio": -0.5180152654647827, "logits/chosen": -1.2227071523666382, "logits/rejected": -1.1411899328231812, "logps/chosen": -0.9973597526550293, "logps/rejected": -2.0479421615600586, "loss": 1.0449, "nll_loss": 1.079493522644043, "rewards/accuracies": 0.625, "rewards/chosen": -0.09973597526550293, "rewards/margins": 0.10505823791027069, "rewards/rejected": -0.20479421317577362, "step": 4901 }, { "epoch": 2.9903919475369833, "grad_norm": 1.6402466297149658, "learning_rate": 3.2264543784445803e-06, "log_odds_chosen": 1.4196834564208984, "log_odds_ratio": -0.6001706123352051, "logits/chosen": -1.103159785270691, "logits/rejected": -1.0290040969848633, "logps/chosen": -1.1431498527526855, "logps/rejected": -2.2103466987609863, "loss": 1.256, "nll_loss": 1.2494189739227295, "rewards/accuracies": 0.75, "rewards/chosen": -0.1143149882555008, "rewards/margins": 0.10671967267990112, "rewards/rejected": -0.2210346758365631, "step": 4902 }, { "epoch": 2.991001982614, "grad_norm": 1.860298991203308, "learning_rate": 3.225474586650337e-06, "log_odds_chosen": 1.9095726013183594, "log_odds_ratio": -0.42768001556396484, "logits/chosen": -0.8261963725090027, "logits/rejected": -0.9590787887573242, "logps/chosen": -0.6080938577651978, "logps/rejected": -2.085984230041504, "loss": 1.1241, "nll_loss": 0.9196467995643616, "rewards/accuracies": 0.75, "rewards/chosen": -0.060809388756752014, "rewards/margins": 0.14778903126716614, "rewards/rejected": -0.20859842002391815, "step": 4903 }, { "epoch": 2.9916120176910175, "grad_norm": 2.2934744358062744, "learning_rate": 3.2244947948560926e-06, "log_odds_chosen": 0.4939782917499542, "log_odds_ratio": -0.8113268613815308, "logits/chosen": -0.9472629427909851, "logits/rejected": -0.8472943902015686, "logps/chosen": -0.9595620632171631, "logps/rejected": -1.4177203178405762, "loss": 1.2547, "nll_loss": 1.1093401908874512, "rewards/accuracies": 0.5, "rewards/chosen": -0.0959562137722969, "rewards/margins": 0.04581582546234131, "rewards/rejected": -0.14177203178405762, "step": 4904 }, { "epoch": 2.9922220527680343, "grad_norm": 1.5825501680374146, "learning_rate": 3.223515003061849e-06, "log_odds_chosen": 1.2877057790756226, "log_odds_ratio": -0.39170041680336, "logits/chosen": -0.751084566116333, "logits/rejected": -0.7601046562194824, "logps/chosen": -0.8968788385391235, "logps/rejected": -1.7817462682724, "loss": 1.1053, "nll_loss": 1.229689121246338, "rewards/accuracies": 0.75, "rewards/chosen": -0.08968788385391235, "rewards/margins": 0.08848673850297928, "rewards/rejected": -0.17817461490631104, "step": 4905 }, { "epoch": 2.992832087845051, "grad_norm": 1.6528565883636475, "learning_rate": 3.2225352112676057e-06, "log_odds_chosen": 1.5020594596862793, "log_odds_ratio": -0.4200068712234497, "logits/chosen": -0.9339576959609985, "logits/rejected": -0.8754795789718628, "logps/chosen": -0.7515038847923279, "logps/rejected": -1.865199327468872, "loss": 1.1784, "nll_loss": 0.9228218197822571, "rewards/accuracies": 0.875, "rewards/chosen": -0.07515039294958115, "rewards/margins": 0.1113695353269577, "rewards/rejected": -0.18651993572711945, "step": 4906 }, { "epoch": 2.993442122922068, "grad_norm": 1.9310216903686523, "learning_rate": 3.2215554194733618e-06, "log_odds_chosen": 2.1351237297058105, "log_odds_ratio": -0.3774487376213074, "logits/chosen": -0.9093563556671143, "logits/rejected": -0.9172890186309814, "logps/chosen": -0.7661854028701782, "logps/rejected": -2.481107711791992, "loss": 0.8962, "nll_loss": 0.9611358046531677, "rewards/accuracies": 0.875, "rewards/chosen": -0.07661854475736618, "rewards/margins": 0.17149224877357483, "rewards/rejected": -0.2481108009815216, "step": 4907 }, { "epoch": 2.994052157999085, "grad_norm": 2.0207157135009766, "learning_rate": 3.220575627679118e-06, "log_odds_chosen": 0.8912925720214844, "log_odds_ratio": -0.5114323496818542, "logits/chosen": -0.6189631819725037, "logits/rejected": -0.6780674457550049, "logps/chosen": -0.6409304141998291, "logps/rejected": -1.1518878936767578, "loss": 0.9615, "nll_loss": 0.7918765544891357, "rewards/accuracies": 0.625, "rewards/chosen": -0.06409304589033127, "rewards/margins": 0.05109575390815735, "rewards/rejected": -0.11518879234790802, "step": 4908 }, { "epoch": 2.9946621930761017, "grad_norm": 0.9558258056640625, "learning_rate": 3.2195958358848744e-06, "log_odds_chosen": 1.5560741424560547, "log_odds_ratio": -0.43711110949516296, "logits/chosen": -0.9130182266235352, "logits/rejected": -0.9596701860427856, "logps/chosen": -0.7240701913833618, "logps/rejected": -1.980628490447998, "loss": 0.9842, "nll_loss": 1.028296709060669, "rewards/accuracies": 0.625, "rewards/chosen": -0.07240702211856842, "rewards/margins": 0.12565582990646362, "rewards/rejected": -0.19806285202503204, "step": 4909 }, { "epoch": 2.9952722281531186, "grad_norm": 2.029181718826294, "learning_rate": 3.2186160440906306e-06, "log_odds_chosen": 0.1255130022764206, "log_odds_ratio": -0.6538256406784058, "logits/chosen": -1.0642509460449219, "logits/rejected": -0.9736146926879883, "logps/chosen": -0.9441372752189636, "logps/rejected": -1.0178076028823853, "loss": 1.0039, "nll_loss": 1.1031608581542969, "rewards/accuracies": 0.75, "rewards/chosen": -0.09441374242305756, "rewards/margins": 0.007367022801190615, "rewards/rejected": -0.10178075730800629, "step": 4910 }, { "epoch": 2.995882263230136, "grad_norm": 1.4029748439788818, "learning_rate": 3.217636252296387e-06, "log_odds_chosen": 2.8281846046447754, "log_odds_ratio": -0.38122063875198364, "logits/chosen": -0.9000416994094849, "logits/rejected": -1.027403473854065, "logps/chosen": -0.6640075445175171, "logps/rejected": -3.008114814758301, "loss": 0.996, "nll_loss": 0.9520153999328613, "rewards/accuracies": 0.75, "rewards/chosen": -0.06640075892210007, "rewards/margins": 0.23441073298454285, "rewards/rejected": -0.3008114993572235, "step": 4911 }, { "epoch": 2.9964922983071527, "grad_norm": 2.974912643432617, "learning_rate": 3.2166564605021432e-06, "log_odds_chosen": 0.6977576613426208, "log_odds_ratio": -0.5183599591255188, "logits/chosen": -0.6907418370246887, "logits/rejected": -0.6340208053588867, "logps/chosen": -0.6458691358566284, "logps/rejected": -1.147668480873108, "loss": 1.0187, "nll_loss": 0.8297388553619385, "rewards/accuracies": 0.75, "rewards/chosen": -0.06458691507577896, "rewards/margins": 0.05017993599176407, "rewards/rejected": -0.11476685106754303, "step": 4912 }, { "epoch": 2.9971023333841695, "grad_norm": 1.1717171669006348, "learning_rate": 3.2156766687078994e-06, "log_odds_chosen": 1.5746264457702637, "log_odds_ratio": -0.4782944917678833, "logits/chosen": -0.8074749708175659, "logits/rejected": -0.8461215496063232, "logps/chosen": -0.7438133358955383, "logps/rejected": -1.9776934385299683, "loss": 1.0567, "nll_loss": 1.0474002361297607, "rewards/accuracies": 0.75, "rewards/chosen": -0.07438133656978607, "rewards/margins": 0.12338799983263016, "rewards/rejected": -0.19776932895183563, "step": 4913 }, { "epoch": 2.9977123684611864, "grad_norm": 9.223176956176758, "learning_rate": 3.214696876913656e-06, "log_odds_chosen": 2.3279294967651367, "log_odds_ratio": -0.36078739166259766, "logits/chosen": -0.9938856363296509, "logits/rejected": -0.9920907616615295, "logps/chosen": -0.8873246908187866, "logps/rejected": -2.861325263977051, "loss": 1.0624, "nll_loss": 1.239408016204834, "rewards/accuracies": 0.875, "rewards/chosen": -0.08873246610164642, "rewards/margins": 0.1974000781774521, "rewards/rejected": -0.2861325442790985, "step": 4914 }, { "epoch": 2.9983224035382037, "grad_norm": 1.473063349723816, "learning_rate": 3.213717085119412e-06, "log_odds_chosen": 1.1451547145843506, "log_odds_ratio": -0.5893722772598267, "logits/chosen": -0.8710072040557861, "logits/rejected": -0.9527702927589417, "logps/chosen": -0.8866913318634033, "logps/rejected": -1.7487295866012573, "loss": 0.9764, "nll_loss": 1.0425255298614502, "rewards/accuracies": 0.625, "rewards/chosen": -0.08866912871599197, "rewards/margins": 0.08620382100343704, "rewards/rejected": -0.17487293481826782, "step": 4915 }, { "epoch": 2.9989324386152205, "grad_norm": 3.033438205718994, "learning_rate": 3.212737293325168e-06, "log_odds_chosen": 0.8890178203582764, "log_odds_ratio": -0.5164030194282532, "logits/chosen": -1.0603370666503906, "logits/rejected": -1.08357572555542, "logps/chosen": -0.9524250030517578, "logps/rejected": -1.6484442949295044, "loss": 1.2164, "nll_loss": 1.4710931777954102, "rewards/accuracies": 0.75, "rewards/chosen": -0.09524250030517578, "rewards/margins": 0.06960193067789078, "rewards/rejected": -0.16484442353248596, "step": 4916 }, { "epoch": 2.9995424736922374, "grad_norm": 1.2985563278198242, "learning_rate": 3.2117575015309247e-06, "log_odds_chosen": 0.6616852283477783, "log_odds_ratio": -0.7329928874969482, "logits/chosen": -1.002493977546692, "logits/rejected": -0.8630609512329102, "logps/chosen": -0.8956108093261719, "logps/rejected": -1.5183264017105103, "loss": 1.0891, "nll_loss": 1.025158166885376, "rewards/accuracies": 0.5, "rewards/chosen": -0.0895610824227333, "rewards/margins": 0.06227156147360802, "rewards/rejected": -0.15183264017105103, "step": 4917 }, { "epoch": 2.9995424736922374, "eval_log_odds_chosen": 1.7376518249511719, "eval_log_odds_ratio": -0.4457593262195587, "eval_logits/chosen": -0.8876088261604309, "eval_logits/rejected": -0.935762345790863, "eval_logps/chosen": -0.819965124130249, "eval_logps/rejected": -2.184474468231201, "eval_loss": 1.0654408931732178, "eval_nll_loss": 1.064808964729309, "eval_rewards/accuracies": 0.7196969985961914, "eval_rewards/chosen": -0.08199651539325714, "eval_rewards/margins": 0.13645091652870178, "eval_rewards/rejected": -0.21844741702079773, "eval_runtime": 398.0123, "eval_samples_per_second": 0.987, "eval_steps_per_second": 0.166, "step": 4917 }, { "epoch": 3.000152508769254, "grad_norm": 1.1898502111434937, "learning_rate": 3.210777709736681e-06, "log_odds_chosen": 1.4699441194534302, "log_odds_ratio": -0.6499964594841003, "logits/chosen": -0.8346318006515503, "logits/rejected": -0.8334183096885681, "logps/chosen": -0.8326268196105957, "logps/rejected": -1.9695184230804443, "loss": 1.0465, "nll_loss": 1.0568921566009521, "rewards/accuracies": 0.625, "rewards/chosen": -0.08326268196105957, "rewards/margins": 0.11368916183710098, "rewards/rejected": -0.19695186614990234, "step": 4918 }, { "epoch": 3.000762543846271, "grad_norm": 1.607332706451416, "learning_rate": 3.209797917942437e-06, "log_odds_chosen": 1.7598731517791748, "log_odds_ratio": -0.39547282457351685, "logits/chosen": -0.8309085965156555, "logits/rejected": -0.7717685103416443, "logps/chosen": -0.6019936800003052, "logps/rejected": -1.7401514053344727, "loss": 1.0635, "nll_loss": 0.9707585573196411, "rewards/accuracies": 0.875, "rewards/chosen": -0.06019936501979828, "rewards/margins": 0.1138157844543457, "rewards/rejected": -0.17401514947414398, "step": 4919 }, { "epoch": 3.001372578923288, "grad_norm": 2.605469226837158, "learning_rate": 3.2088181261481935e-06, "log_odds_chosen": 0.7761913537979126, "log_odds_ratio": -0.47780942916870117, "logits/chosen": -1.108676552772522, "logits/rejected": -1.0277667045593262, "logps/chosen": -0.7611902356147766, "logps/rejected": -1.2747286558151245, "loss": 1.0887, "nll_loss": 1.1698682308197021, "rewards/accuracies": 0.875, "rewards/chosen": -0.07611902803182602, "rewards/margins": 0.05135384202003479, "rewards/rejected": -0.1274728775024414, "step": 4920 }, { "epoch": 3.001982614000305, "grad_norm": 2.7400734424591064, "learning_rate": 3.2078383343539496e-06, "log_odds_chosen": 2.2988452911376953, "log_odds_ratio": -0.4238918423652649, "logits/chosen": -0.8947460651397705, "logits/rejected": -0.9898868799209595, "logps/chosen": -0.8045933246612549, "logps/rejected": -2.6922168731689453, "loss": 1.3315, "nll_loss": 1.0040504932403564, "rewards/accuracies": 0.625, "rewards/chosen": -0.08045932650566101, "rewards/margins": 0.1887623518705368, "rewards/rejected": -0.2692216634750366, "step": 4921 }, { "epoch": 3.002592649077322, "grad_norm": 1.0492019653320312, "learning_rate": 3.2068585425597057e-06, "log_odds_chosen": 2.5220935344696045, "log_odds_ratio": -0.3131621479988098, "logits/chosen": -0.6213685274124146, "logits/rejected": -0.9532631635665894, "logps/chosen": -0.6018701791763306, "logps/rejected": -2.3043148517608643, "loss": 0.9139, "nll_loss": 0.9026821851730347, "rewards/accuracies": 0.875, "rewards/chosen": -0.060187019407749176, "rewards/margins": 0.1702444702386856, "rewards/rejected": -0.2304314821958542, "step": 4922 }, { "epoch": 3.003202684154339, "grad_norm": 1.6421105861663818, "learning_rate": 3.2058787507654623e-06, "log_odds_chosen": 1.376564621925354, "log_odds_ratio": -0.3955537676811218, "logits/chosen": -1.0258361101150513, "logits/rejected": -0.9274687767028809, "logps/chosen": -0.7917766571044922, "logps/rejected": -1.856205940246582, "loss": 1.169, "nll_loss": 1.0841553211212158, "rewards/accuracies": 0.75, "rewards/chosen": -0.07917767018079758, "rewards/margins": 0.10644292086362839, "rewards/rejected": -0.18562057614326477, "step": 4923 }, { "epoch": 3.0038127192313557, "grad_norm": 1.666890025138855, "learning_rate": 3.2048989589712184e-06, "log_odds_chosen": -0.15512971580028534, "log_odds_ratio": -0.8260604739189148, "logits/chosen": -0.8785232305526733, "logits/rejected": -0.904746949672699, "logps/chosen": -1.222271203994751, "logps/rejected": -1.1097447872161865, "loss": 1.0124, "nll_loss": 1.2956598997116089, "rewards/accuracies": 0.5, "rewards/chosen": -0.12222711741924286, "rewards/margins": -0.011252639815211296, "rewards/rejected": -0.11097448319196701, "step": 4924 }, { "epoch": 3.0044227543083726, "grad_norm": 1.1648799180984497, "learning_rate": 3.203919167176975e-06, "log_odds_chosen": 0.44345539808273315, "log_odds_ratio": -0.7790898680686951, "logits/chosen": -0.8983368873596191, "logits/rejected": -0.8669072389602661, "logps/chosen": -1.1156038045883179, "logps/rejected": -1.4800351858139038, "loss": 1.1157, "nll_loss": 1.1452280282974243, "rewards/accuracies": 0.5, "rewards/chosen": -0.1115603819489479, "rewards/margins": 0.036443132907152176, "rewards/rejected": -0.1480035036802292, "step": 4925 }, { "epoch": 3.00503278938539, "grad_norm": 3.858041524887085, "learning_rate": 3.202939375382731e-06, "log_odds_chosen": 1.7198193073272705, "log_odds_ratio": -0.4109947383403778, "logits/chosen": -0.8919107913970947, "logits/rejected": -0.8643028736114502, "logps/chosen": -0.7042514085769653, "logps/rejected": -1.9278614521026611, "loss": 0.9905, "nll_loss": 0.8094480037689209, "rewards/accuracies": 0.875, "rewards/chosen": -0.0704251378774643, "rewards/margins": 0.12236099690198898, "rewards/rejected": -0.19278614223003387, "step": 4926 }, { "epoch": 3.0056428244624067, "grad_norm": 1.5012507438659668, "learning_rate": 3.201959583588487e-06, "log_odds_chosen": 1.8118665218353271, "log_odds_ratio": -0.21505196392536163, "logits/chosen": -0.8852642774581909, "logits/rejected": -0.7378966808319092, "logps/chosen": -0.7028917074203491, "logps/rejected": -1.8673861026763916, "loss": 0.8765, "nll_loss": 0.9126564264297485, "rewards/accuracies": 0.875, "rewards/chosen": -0.07028916478157043, "rewards/margins": 0.11644944548606873, "rewards/rejected": -0.18673861026763916, "step": 4927 }, { "epoch": 3.0062528595394236, "grad_norm": 1.8486056327819824, "learning_rate": 3.2009797917942437e-06, "log_odds_chosen": 2.0794148445129395, "log_odds_ratio": -0.25137123465538025, "logits/chosen": -0.7506874799728394, "logits/rejected": -0.7583230137825012, "logps/chosen": -0.8152272701263428, "logps/rejected": -2.2980728149414062, "loss": 0.9712, "nll_loss": 0.8738969564437866, "rewards/accuracies": 0.875, "rewards/chosen": -0.08152273297309875, "rewards/margins": 0.14828458428382874, "rewards/rejected": -0.2298073172569275, "step": 4928 }, { "epoch": 3.0068628946164404, "grad_norm": 2.1860556602478027, "learning_rate": 3.2e-06, "log_odds_chosen": 3.284158229827881, "log_odds_ratio": -0.3798062205314636, "logits/chosen": -0.7229036092758179, "logits/rejected": -0.9431737661361694, "logps/chosen": -0.4335641860961914, "logps/rejected": -2.5680699348449707, "loss": 1.1272, "nll_loss": 0.8301723599433899, "rewards/accuracies": 0.875, "rewards/chosen": -0.04335641860961914, "rewards/margins": 0.2134505808353424, "rewards/rejected": -0.25680699944496155, "step": 4929 }, { "epoch": 3.0074729296934573, "grad_norm": 7.744995594024658, "learning_rate": 3.199020208205756e-06, "log_odds_chosen": 1.4657107591629028, "log_odds_ratio": -0.5849572420120239, "logits/chosen": -1.117300271987915, "logits/rejected": -1.081380844116211, "logps/chosen": -0.9872791767120361, "logps/rejected": -2.204197645187378, "loss": 1.1996, "nll_loss": 1.1972607374191284, "rewards/accuracies": 0.5, "rewards/chosen": -0.09872792661190033, "rewards/margins": 0.12169183790683746, "rewards/rejected": -0.2204197645187378, "step": 4930 }, { "epoch": 3.008082964770474, "grad_norm": 3.1598715782165527, "learning_rate": 3.1980404164115125e-06, "log_odds_chosen": 1.754946231842041, "log_odds_ratio": -0.36719590425491333, "logits/chosen": -0.7533156871795654, "logits/rejected": -0.7765923142433167, "logps/chosen": -0.6474908590316772, "logps/rejected": -1.9573695659637451, "loss": 1.0267, "nll_loss": 0.7811576724052429, "rewards/accuracies": 0.75, "rewards/chosen": -0.0647490844130516, "rewards/margins": 0.13098786771297455, "rewards/rejected": -0.19573695957660675, "step": 4931 }, { "epoch": 3.0086929998474914, "grad_norm": 1.5446544885635376, "learning_rate": 3.197060624617269e-06, "log_odds_chosen": 3.029913902282715, "log_odds_ratio": -0.2734760344028473, "logits/chosen": -0.8706800937652588, "logits/rejected": -0.9380597472190857, "logps/chosen": -0.47776249051094055, "logps/rejected": -2.524892807006836, "loss": 0.9717, "nll_loss": 0.9404703378677368, "rewards/accuracies": 0.875, "rewards/chosen": -0.047776248306035995, "rewards/margins": 0.20471304655075073, "rewards/rejected": -0.252489298582077, "step": 4932 }, { "epoch": 3.0093030349245082, "grad_norm": 6.068483352661133, "learning_rate": 3.1960808328230248e-06, "log_odds_chosen": 1.615199089050293, "log_odds_ratio": -0.3985949158668518, "logits/chosen": -0.7490785121917725, "logits/rejected": -0.864770233631134, "logps/chosen": -0.6450095176696777, "logps/rejected": -1.8674980401992798, "loss": 0.9461, "nll_loss": 0.854804515838623, "rewards/accuracies": 0.875, "rewards/chosen": -0.06450095027685165, "rewards/margins": 0.12224885821342468, "rewards/rejected": -0.18674978613853455, "step": 4933 }, { "epoch": 3.009913070001525, "grad_norm": 1.5541859865188599, "learning_rate": 3.1951010410287813e-06, "log_odds_chosen": 2.5663821697235107, "log_odds_ratio": -0.3090021014213562, "logits/chosen": -1.1159988641738892, "logits/rejected": -1.007141351699829, "logps/chosen": -0.8602287173271179, "logps/rejected": -2.916525363922119, "loss": 1.0413, "nll_loss": 1.304964542388916, "rewards/accuracies": 0.875, "rewards/chosen": -0.08602287620306015, "rewards/margins": 0.20562966167926788, "rewards/rejected": -0.29165253043174744, "step": 4934 }, { "epoch": 3.010523105078542, "grad_norm": 1.0660947561264038, "learning_rate": 3.1941212492345374e-06, "log_odds_chosen": 1.822280764579773, "log_odds_ratio": -0.5326336026191711, "logits/chosen": -0.9798436164855957, "logits/rejected": -1.0157470703125, "logps/chosen": -0.7977702617645264, "logps/rejected": -2.360224962234497, "loss": 1.0105, "nll_loss": 1.0384140014648438, "rewards/accuracies": 0.5, "rewards/chosen": -0.07977702468633652, "rewards/margins": 0.15624547004699707, "rewards/rejected": -0.23602250218391418, "step": 4935 }, { "epoch": 3.0111331401555588, "grad_norm": 0.9980874061584473, "learning_rate": 3.1931414574402936e-06, "log_odds_chosen": 2.8791844844818115, "log_odds_ratio": -0.16373957693576813, "logits/chosen": -0.7009381055831909, "logits/rejected": -0.829226016998291, "logps/chosen": -0.5028443336486816, "logps/rejected": -2.6102867126464844, "loss": 0.805, "nll_loss": 0.6467465162277222, "rewards/accuracies": 0.875, "rewards/chosen": -0.050284434109926224, "rewards/margins": 0.2107442319393158, "rewards/rejected": -0.2610286772251129, "step": 4936 }, { "epoch": 3.011743175232576, "grad_norm": 1.5190242528915405, "learning_rate": 3.19216166564605e-06, "log_odds_chosen": 1.7095597982406616, "log_odds_ratio": -0.35255661606788635, "logits/chosen": -0.677271842956543, "logits/rejected": -0.8080757260322571, "logps/chosen": -0.536872386932373, "logps/rejected": -1.700950026512146, "loss": 1.164, "nll_loss": 0.6397180557250977, "rewards/accuracies": 0.75, "rewards/chosen": -0.05368723347783089, "rewards/margins": 0.11640775948762894, "rewards/rejected": -0.17009499669075012, "step": 4937 }, { "epoch": 3.012353210309593, "grad_norm": 1.2003309726715088, "learning_rate": 3.1911818738518062e-06, "log_odds_chosen": 2.9661197662353516, "log_odds_ratio": -0.4558051526546478, "logits/chosen": -0.8990151882171631, "logits/rejected": -0.9823621511459351, "logps/chosen": -0.8744748830795288, "logps/rejected": -3.489680290222168, "loss": 1.1152, "nll_loss": 1.2827180624008179, "rewards/accuracies": 0.75, "rewards/chosen": -0.08744748681783676, "rewards/margins": 0.26152053475379944, "rewards/rejected": -0.3489680588245392, "step": 4938 }, { "epoch": 3.0129632453866098, "grad_norm": 11.450394630432129, "learning_rate": 3.1902020820575628e-06, "log_odds_chosen": 2.6691932678222656, "log_odds_ratio": -0.2898483872413635, "logits/chosen": -0.7362123727798462, "logits/rejected": -0.8143603801727295, "logps/chosen": -0.648476243019104, "logps/rejected": -2.721303939819336, "loss": 0.9193, "nll_loss": 0.8757283091545105, "rewards/accuracies": 0.75, "rewards/chosen": -0.06484763324260712, "rewards/margins": 0.20728279650211334, "rewards/rejected": -0.27213042974472046, "step": 4939 }, { "epoch": 3.0135732804636266, "grad_norm": 1.8705973625183105, "learning_rate": 3.189222290263319e-06, "log_odds_chosen": 0.9421157836914062, "log_odds_ratio": -0.6661231517791748, "logits/chosen": -0.7902493476867676, "logits/rejected": -0.8252354860305786, "logps/chosen": -0.938473641872406, "logps/rejected": -1.7925342321395874, "loss": 1.1848, "nll_loss": 1.003887414932251, "rewards/accuracies": 0.5, "rewards/chosen": -0.0938473641872406, "rewards/margins": 0.08540605008602142, "rewards/rejected": -0.17925342917442322, "step": 4940 }, { "epoch": 3.0141833155406434, "grad_norm": 4.3500871658325195, "learning_rate": 3.188242498469075e-06, "log_odds_chosen": 1.1773377656936646, "log_odds_ratio": -0.6365994215011597, "logits/chosen": -0.9812909364700317, "logits/rejected": -0.9669719338417053, "logps/chosen": -1.0608415603637695, "logps/rejected": -1.9301214218139648, "loss": 1.1445, "nll_loss": 1.5008141994476318, "rewards/accuracies": 0.625, "rewards/chosen": -0.10608415305614471, "rewards/margins": 0.08692798763513565, "rewards/rejected": -0.19301214814186096, "step": 4941 }, { "epoch": 3.0147933506176603, "grad_norm": 1.2689934968948364, "learning_rate": 3.1872627066748316e-06, "log_odds_chosen": 2.289210796356201, "log_odds_ratio": -0.2990967035293579, "logits/chosen": -0.8770766258239746, "logits/rejected": -0.8922395706176758, "logps/chosen": -0.6173539161682129, "logps/rejected": -2.220099687576294, "loss": 1.0278, "nll_loss": 0.9493553638458252, "rewards/accuracies": 0.75, "rewards/chosen": -0.06173539534211159, "rewards/margins": 0.16027458012104034, "rewards/rejected": -0.22200998663902283, "step": 4942 }, { "epoch": 3.0154033856946776, "grad_norm": 1.327340006828308, "learning_rate": 3.1862829148805877e-06, "log_odds_chosen": 1.7217774391174316, "log_odds_ratio": -0.41821610927581787, "logits/chosen": -1.1088018417358398, "logits/rejected": -1.1468441486358643, "logps/chosen": -0.6356823444366455, "logps/rejected": -1.781189203262329, "loss": 1.0866, "nll_loss": 1.1900420188903809, "rewards/accuracies": 0.75, "rewards/chosen": -0.06356823444366455, "rewards/margins": 0.11455068737268448, "rewards/rejected": -0.17811892926692963, "step": 4943 }, { "epoch": 3.0160134207716944, "grad_norm": 5.223083019256592, "learning_rate": 3.185303123086344e-06, "log_odds_chosen": 2.8814640045166016, "log_odds_ratio": -0.2048913538455963, "logits/chosen": -0.757312536239624, "logits/rejected": -0.9074101448059082, "logps/chosen": -0.5986824631690979, "logps/rejected": -2.550309658050537, "loss": 0.8833, "nll_loss": 0.9506354331970215, "rewards/accuracies": 1.0, "rewards/chosen": -0.05986824631690979, "rewards/margins": 0.19516272842884064, "rewards/rejected": -0.25503095984458923, "step": 4944 }, { "epoch": 3.0166234558487113, "grad_norm": 1.725629448890686, "learning_rate": 3.1843233312921004e-06, "log_odds_chosen": 2.4801950454711914, "log_odds_ratio": -0.33909446001052856, "logits/chosen": -0.8406269550323486, "logits/rejected": -0.962624728679657, "logps/chosen": -0.8865099549293518, "logps/rejected": -2.9368557929992676, "loss": 1.1238, "nll_loss": 1.0982567071914673, "rewards/accuracies": 0.875, "rewards/chosen": -0.08865099400281906, "rewards/margins": 0.20503458380699158, "rewards/rejected": -0.29368558526039124, "step": 4945 }, { "epoch": 3.017233490925728, "grad_norm": 1.4074652194976807, "learning_rate": 3.183343539497857e-06, "log_odds_chosen": 2.0591137409210205, "log_odds_ratio": -0.2873331904411316, "logits/chosen": -0.7528835535049438, "logits/rejected": -0.8916141986846924, "logps/chosen": -0.5935476422309875, "logps/rejected": -2.009915351867676, "loss": 0.9073, "nll_loss": 0.7268477082252502, "rewards/accuracies": 0.875, "rewards/chosen": -0.059354763478040695, "rewards/margins": 0.14163677394390106, "rewards/rejected": -0.20099154114723206, "step": 4946 }, { "epoch": 3.017843526002745, "grad_norm": 1.6432667970657349, "learning_rate": 3.1823637477036126e-06, "log_odds_chosen": 1.4895658493041992, "log_odds_ratio": -0.417028546333313, "logits/chosen": -0.9497747421264648, "logits/rejected": -1.0717720985412598, "logps/chosen": -0.6994892358779907, "logps/rejected": -1.7365652322769165, "loss": 1.2307, "nll_loss": 1.2876231670379639, "rewards/accuracies": 0.75, "rewards/chosen": -0.06994892656803131, "rewards/margins": 0.10370760411024094, "rewards/rejected": -0.17365652322769165, "step": 4947 }, { "epoch": 3.0184535610797623, "grad_norm": 1.3275636434555054, "learning_rate": 3.181383955909369e-06, "log_odds_chosen": 1.2079567909240723, "log_odds_ratio": -0.434627503156662, "logits/chosen": -0.8049144148826599, "logits/rejected": -0.9666392207145691, "logps/chosen": -0.9726524353027344, "logps/rejected": -1.91628098487854, "loss": 1.1373, "nll_loss": 1.3665926456451416, "rewards/accuracies": 0.875, "rewards/chosen": -0.09726524353027344, "rewards/margins": 0.09436285495758057, "rewards/rejected": -0.191628098487854, "step": 4948 }, { "epoch": 3.019063596156779, "grad_norm": 2.7518210411071777, "learning_rate": 3.1804041641151257e-06, "log_odds_chosen": 1.123425841331482, "log_odds_ratio": -0.45496851205825806, "logits/chosen": -0.913894534111023, "logits/rejected": -0.9731325507164001, "logps/chosen": -0.9582021236419678, "logps/rejected": -1.827385425567627, "loss": 1.0814, "nll_loss": 1.0006060600280762, "rewards/accuracies": 0.625, "rewards/chosen": -0.09582021087408066, "rewards/margins": 0.08691832423210144, "rewards/rejected": -0.1827385425567627, "step": 4949 }, { "epoch": 3.019673631233796, "grad_norm": 1.261513590812683, "learning_rate": 3.1794243723208814e-06, "log_odds_chosen": 1.8746185302734375, "log_odds_ratio": -0.39314088225364685, "logits/chosen": -0.9497609734535217, "logits/rejected": -0.9255026578903198, "logps/chosen": -0.9488433003425598, "logps/rejected": -2.451096296310425, "loss": 1.1082, "nll_loss": 1.0875424146652222, "rewards/accuracies": 0.875, "rewards/chosen": -0.09488433599472046, "rewards/margins": 0.15022531151771545, "rewards/rejected": -0.24510961771011353, "step": 4950 }, { "epoch": 3.020283666310813, "grad_norm": 2.0770986080169678, "learning_rate": 3.178444580526638e-06, "log_odds_chosen": 1.5608731508255005, "log_odds_ratio": -0.5848995447158813, "logits/chosen": -1.0418351888656616, "logits/rejected": -1.081512689590454, "logps/chosen": -0.9707687497138977, "logps/rejected": -2.320744276046753, "loss": 1.0933, "nll_loss": 1.2749295234680176, "rewards/accuracies": 0.5, "rewards/chosen": -0.09707687795162201, "rewards/margins": 0.13499754667282104, "rewards/rejected": -0.23207442462444305, "step": 4951 }, { "epoch": 3.0208937013878296, "grad_norm": 2.3894457817077637, "learning_rate": 3.1774647887323945e-06, "log_odds_chosen": 3.3651819229125977, "log_odds_ratio": -0.19815132021903992, "logits/chosen": -0.7876156568527222, "logits/rejected": -0.9462695121765137, "logps/chosen": -0.810049295425415, "logps/rejected": -3.6036577224731445, "loss": 1.0906, "nll_loss": 0.9932507276535034, "rewards/accuracies": 1.0, "rewards/chosen": -0.08100493252277374, "rewards/margins": 0.2793608605861664, "rewards/rejected": -0.3603658080101013, "step": 4952 }, { "epoch": 3.021503736464847, "grad_norm": 1.8094217777252197, "learning_rate": 3.1764849969381506e-06, "log_odds_chosen": -0.12196817994117737, "log_odds_ratio": -0.9020590782165527, "logits/chosen": -1.0203635692596436, "logits/rejected": -0.9110013246536255, "logps/chosen": -0.9020804762840271, "logps/rejected": -0.8040512204170227, "loss": 1.13, "nll_loss": 1.0385956764221191, "rewards/accuracies": 0.25, "rewards/chosen": -0.09020805358886719, "rewards/margins": -0.009802930057048798, "rewards/rejected": -0.08040512353181839, "step": 4953 }, { "epoch": 3.022113771541864, "grad_norm": 1.7260241508483887, "learning_rate": 3.1755052051439067e-06, "log_odds_chosen": 1.7239376306533813, "log_odds_ratio": -0.3910010755062103, "logits/chosen": -0.7207404375076294, "logits/rejected": -0.6967737674713135, "logps/chosen": -0.8396707773208618, "logps/rejected": -2.2492012977600098, "loss": 1.0273, "nll_loss": 0.859461784362793, "rewards/accuracies": 0.75, "rewards/chosen": -0.08396708220243454, "rewards/margins": 0.14095307886600494, "rewards/rejected": -0.2249201387166977, "step": 4954 }, { "epoch": 3.0227238066188806, "grad_norm": 7.30714750289917, "learning_rate": 3.174525413349663e-06, "log_odds_chosen": 0.9329549074172974, "log_odds_ratio": -0.5417575240135193, "logits/chosen": -0.7028746604919434, "logits/rejected": -0.7282919883728027, "logps/chosen": -0.5997312068939209, "logps/rejected": -1.0710337162017822, "loss": 0.9458, "nll_loss": 1.1144297122955322, "rewards/accuracies": 0.75, "rewards/chosen": -0.05997312068939209, "rewards/margins": 0.047130241990089417, "rewards/rejected": -0.1071033626794815, "step": 4955 }, { "epoch": 3.0233338416958975, "grad_norm": 1.8851450681686401, "learning_rate": 3.1735456215554194e-06, "log_odds_chosen": 1.1213645935058594, "log_odds_ratio": -0.44007641077041626, "logits/chosen": -0.9445505738258362, "logits/rejected": -0.9590056538581848, "logps/chosen": -0.9992137551307678, "logps/rejected": -1.9258050918579102, "loss": 1.2561, "nll_loss": 1.214930534362793, "rewards/accuracies": 0.625, "rewards/chosen": -0.09992137551307678, "rewards/margins": 0.092659130692482, "rewards/rejected": -0.19258052110671997, "step": 4956 }, { "epoch": 3.0239438767729143, "grad_norm": 1.5320318937301636, "learning_rate": 3.1725658297611755e-06, "log_odds_chosen": 1.0506916046142578, "log_odds_ratio": -0.48714810609817505, "logits/chosen": -0.720093846321106, "logits/rejected": -0.8462737202644348, "logps/chosen": -0.7103281021118164, "logps/rejected": -1.5569723844528198, "loss": 1.166, "nll_loss": 0.8237037062644958, "rewards/accuracies": 0.5, "rewards/chosen": -0.07103281468153, "rewards/margins": 0.08466442674398422, "rewards/rejected": -0.15569725632667542, "step": 4957 }, { "epoch": 3.024553911849931, "grad_norm": 5.211421012878418, "learning_rate": 3.1715860379669316e-06, "log_odds_chosen": 1.9079198837280273, "log_odds_ratio": -0.4153927266597748, "logits/chosen": -0.8710997700691223, "logits/rejected": -0.9523888826370239, "logps/chosen": -0.9195334315299988, "logps/rejected": -2.448323965072632, "loss": 1.027, "nll_loss": 1.0826032161712646, "rewards/accuracies": 0.75, "rewards/chosen": -0.091953344643116, "rewards/margins": 0.1528790444135666, "rewards/rejected": -0.24483239650726318, "step": 4958 }, { "epoch": 3.0251639469269485, "grad_norm": 1.3446531295776367, "learning_rate": 3.170606246172688e-06, "log_odds_chosen": 1.1584806442260742, "log_odds_ratio": -0.5848710536956787, "logits/chosen": -0.6530307531356812, "logits/rejected": -0.9228252172470093, "logps/chosen": -0.6687362194061279, "logps/rejected": -1.4053282737731934, "loss": 1.0246, "nll_loss": 0.9748769998550415, "rewards/accuracies": 0.5, "rewards/chosen": -0.06687362492084503, "rewards/margins": 0.07365920394659042, "rewards/rejected": -0.14053283631801605, "step": 4959 }, { "epoch": 3.0257739820039653, "grad_norm": 9.597723007202148, "learning_rate": 3.1696264543784447e-06, "log_odds_chosen": 1.0718498229980469, "log_odds_ratio": -0.40486499667167664, "logits/chosen": -0.9302632808685303, "logits/rejected": -0.880085825920105, "logps/chosen": -1.0227222442626953, "logps/rejected": -1.8715639114379883, "loss": 1.0842, "nll_loss": 1.209063172340393, "rewards/accuracies": 0.75, "rewards/chosen": -0.10227223485708237, "rewards/margins": 0.0848841741681099, "rewards/rejected": -0.18715640902519226, "step": 4960 }, { "epoch": 3.026384017080982, "grad_norm": 2.0791127681732178, "learning_rate": 3.1686466625842004e-06, "log_odds_chosen": 3.5773117542266846, "log_odds_ratio": -0.1642717868089676, "logits/chosen": -0.8224488496780396, "logits/rejected": -1.0141133069992065, "logps/chosen": -0.6367626190185547, "logps/rejected": -3.4059667587280273, "loss": 1.1012, "nll_loss": 0.7921059131622314, "rewards/accuracies": 0.875, "rewards/chosen": -0.06367626786231995, "rewards/margins": 0.2769204378128052, "rewards/rejected": -0.34059667587280273, "step": 4961 }, { "epoch": 3.026994052157999, "grad_norm": 16.235111236572266, "learning_rate": 3.167666870789957e-06, "log_odds_chosen": 1.8594799041748047, "log_odds_ratio": -0.4570285379886627, "logits/chosen": -0.9263443350791931, "logits/rejected": -0.9922032356262207, "logps/chosen": -0.8672940135002136, "logps/rejected": -2.4311153888702393, "loss": 1.2958, "nll_loss": 1.21427583694458, "rewards/accuracies": 0.625, "rewards/chosen": -0.08672939985990524, "rewards/margins": 0.15638214349746704, "rewards/rejected": -0.24311155080795288, "step": 4962 }, { "epoch": 3.027604087235016, "grad_norm": 3.5693209171295166, "learning_rate": 3.1666870789957135e-06, "log_odds_chosen": 1.6487890481948853, "log_odds_ratio": -0.32691490650177, "logits/chosen": -0.7866601347923279, "logits/rejected": -0.7583284974098206, "logps/chosen": -0.7334364652633667, "logps/rejected": -1.824615240097046, "loss": 0.9616, "nll_loss": 0.9332761764526367, "rewards/accuracies": 0.875, "rewards/chosen": -0.07334363460540771, "rewards/margins": 0.10911789536476135, "rewards/rejected": -0.18246152997016907, "step": 4963 }, { "epoch": 3.028214122312033, "grad_norm": 5.2939043045043945, "learning_rate": 3.1657072872014692e-06, "log_odds_chosen": 1.1712371110916138, "log_odds_ratio": -0.44148701429367065, "logits/chosen": -0.8973272442817688, "logits/rejected": -0.9074759483337402, "logps/chosen": -0.7240614891052246, "logps/rejected": -1.5714110136032104, "loss": 1.1196, "nll_loss": 1.2120236158370972, "rewards/accuracies": 0.75, "rewards/chosen": -0.07240614295005798, "rewards/margins": 0.0847349688410759, "rewards/rejected": -0.15714111924171448, "step": 4964 }, { "epoch": 3.02882415738905, "grad_norm": 9.269549369812012, "learning_rate": 3.1647274954072258e-06, "log_odds_chosen": 2.122382402420044, "log_odds_ratio": -0.2817859947681427, "logits/chosen": -0.7093862295150757, "logits/rejected": -0.9588615894317627, "logps/chosen": -1.0351232290267944, "logps/rejected": -2.6091270446777344, "loss": 1.2886, "nll_loss": 1.3110214471817017, "rewards/accuracies": 0.875, "rewards/chosen": -0.10351233929395676, "rewards/margins": 0.15740038454532623, "rewards/rejected": -0.2609127163887024, "step": 4965 }, { "epoch": 3.029434192466067, "grad_norm": 1.6594195365905762, "learning_rate": 3.1637477036129823e-06, "log_odds_chosen": 2.58034086227417, "log_odds_ratio": -0.3700487017631531, "logits/chosen": -0.9556893706321716, "logits/rejected": -0.9760269522666931, "logps/chosen": -0.5987797975540161, "logps/rejected": -2.675473928451538, "loss": 1.1059, "nll_loss": 0.851033091545105, "rewards/accuracies": 0.75, "rewards/chosen": -0.05987798050045967, "rewards/margins": 0.20766940712928772, "rewards/rejected": -0.2675473988056183, "step": 4966 }, { "epoch": 3.0300442275430837, "grad_norm": 2.1172244548797607, "learning_rate": 3.1627679118187384e-06, "log_odds_chosen": 4.358253479003906, "log_odds_ratio": -0.15400327742099762, "logits/chosen": -0.8498712182044983, "logits/rejected": -0.9694263935089111, "logps/chosen": -0.5895863771438599, "logps/rejected": -4.28387451171875, "loss": 0.9796, "nll_loss": 0.7978204488754272, "rewards/accuracies": 1.0, "rewards/chosen": -0.058958638459444046, "rewards/margins": 0.36942875385284424, "rewards/rejected": -0.4283874034881592, "step": 4967 }, { "epoch": 3.0306542626201005, "grad_norm": 1.6013010740280151, "learning_rate": 3.1617881200244946e-06, "log_odds_chosen": 1.4078336954116821, "log_odds_ratio": -0.49834951758384705, "logits/chosen": -0.6390555500984192, "logits/rejected": -0.6375341415405273, "logps/chosen": -0.6349731087684631, "logps/rejected": -1.7151578664779663, "loss": 0.9669, "nll_loss": 0.9073747396469116, "rewards/accuracies": 0.75, "rewards/chosen": -0.06349731981754303, "rewards/margins": 0.10801847279071808, "rewards/rejected": -0.1715157926082611, "step": 4968 }, { "epoch": 3.031264297697118, "grad_norm": 2.037795305252075, "learning_rate": 3.160808328230251e-06, "log_odds_chosen": 1.9857547283172607, "log_odds_ratio": -0.3315506875514984, "logits/chosen": -0.7232720255851746, "logits/rejected": -0.8628029823303223, "logps/chosen": -0.6375343799591064, "logps/rejected": -2.0145790576934814, "loss": 1.0385, "nll_loss": 0.8443596363067627, "rewards/accuracies": 0.875, "rewards/chosen": -0.06375343352556229, "rewards/margins": 0.13770447671413422, "rewards/rejected": -0.2014579176902771, "step": 4969 }, { "epoch": 3.0318743327741347, "grad_norm": 1.4417986869812012, "learning_rate": 3.1598285364360072e-06, "log_odds_chosen": 1.5473618507385254, "log_odds_ratio": -0.48436418175697327, "logits/chosen": -0.7380294799804688, "logits/rejected": -0.7955379486083984, "logps/chosen": -0.8627285957336426, "logps/rejected": -2.1260476112365723, "loss": 1.2029, "nll_loss": 0.9438456296920776, "rewards/accuracies": 0.75, "rewards/chosen": -0.08627285808324814, "rewards/margins": 0.12633191049098969, "rewards/rejected": -0.21260476112365723, "step": 4970 }, { "epoch": 3.0324843678511515, "grad_norm": 1.4639408588409424, "learning_rate": 3.1588487446417633e-06, "log_odds_chosen": 2.209105968475342, "log_odds_ratio": -0.2808101773262024, "logits/chosen": -0.6959212422370911, "logits/rejected": -0.7022867202758789, "logps/chosen": -0.6223023533821106, "logps/rejected": -2.067110300064087, "loss": 1.021, "nll_loss": 0.8812348246574402, "rewards/accuracies": 0.875, "rewards/chosen": -0.06223023310303688, "rewards/margins": 0.14448080956935883, "rewards/rejected": -0.2067110389471054, "step": 4971 }, { "epoch": 3.0330944029281683, "grad_norm": 12.059218406677246, "learning_rate": 3.15786895284752e-06, "log_odds_chosen": 4.274146556854248, "log_odds_ratio": -0.3073597550392151, "logits/chosen": -0.7434501647949219, "logits/rejected": -0.783324658870697, "logps/chosen": -0.7425639629364014, "logps/rejected": -4.535885810852051, "loss": 1.0643, "nll_loss": 1.0094460248947144, "rewards/accuracies": 0.875, "rewards/chosen": -0.07425639033317566, "rewards/margins": 0.37933218479156494, "rewards/rejected": -0.453588604927063, "step": 4972 }, { "epoch": 3.033704438005185, "grad_norm": 1.3551522493362427, "learning_rate": 3.156889161053276e-06, "log_odds_chosen": 2.996361255645752, "log_odds_ratio": -0.29767292737960815, "logits/chosen": -0.8421284556388855, "logits/rejected": -0.909601628780365, "logps/chosen": -0.7391233444213867, "logps/rejected": -3.219947338104248, "loss": 0.986, "nll_loss": 1.0752571821212769, "rewards/accuracies": 0.875, "rewards/chosen": -0.07391233742237091, "rewards/margins": 0.24808239936828613, "rewards/rejected": -0.32199472188949585, "step": 4973 }, { "epoch": 3.034314473082202, "grad_norm": 1.8066201210021973, "learning_rate": 3.1559093692590326e-06, "log_odds_chosen": 2.826401948928833, "log_odds_ratio": -0.4538624882698059, "logits/chosen": -0.8533370494842529, "logits/rejected": -0.9049588441848755, "logps/chosen": -0.6914659738540649, "logps/rejected": -3.1546976566314697, "loss": 1.012, "nll_loss": 0.8753753900527954, "rewards/accuracies": 0.75, "rewards/chosen": -0.06914660334587097, "rewards/margins": 0.24632315337657928, "rewards/rejected": -0.31546974182128906, "step": 4974 }, { "epoch": 3.0349245081592193, "grad_norm": 1.9768391847610474, "learning_rate": 3.1549295774647883e-06, "log_odds_chosen": 1.4686638116836548, "log_odds_ratio": -0.6280522346496582, "logits/chosen": -0.891771674156189, "logits/rejected": -0.9419561624526978, "logps/chosen": -0.8928375840187073, "logps/rejected": -2.172555923461914, "loss": 1.1559, "nll_loss": 1.0318071842193604, "rewards/accuracies": 0.5, "rewards/chosen": -0.0892837643623352, "rewards/margins": 0.127971813082695, "rewards/rejected": -0.2172555923461914, "step": 4975 }, { "epoch": 3.035534543236236, "grad_norm": 3.473900079727173, "learning_rate": 3.153949785670545e-06, "log_odds_chosen": 2.4483275413513184, "log_odds_ratio": -0.517727255821228, "logits/chosen": -0.6390873193740845, "logits/rejected": -0.877497673034668, "logps/chosen": -0.8414788842201233, "logps/rejected": -2.901898145675659, "loss": 1.106, "nll_loss": 1.019982933998108, "rewards/accuracies": 0.75, "rewards/chosen": -0.08414789289236069, "rewards/margins": 0.20604191720485687, "rewards/rejected": -0.29018980264663696, "step": 4976 }, { "epoch": 3.036144578313253, "grad_norm": 1.583245873451233, "learning_rate": 3.1529699938763013e-06, "log_odds_chosen": 0.3422935903072357, "log_odds_ratio": -0.6465251445770264, "logits/chosen": -0.8440215587615967, "logits/rejected": -0.9521117806434631, "logps/chosen": -0.9866615533828735, "logps/rejected": -1.1971702575683594, "loss": 1.112, "nll_loss": 1.1909830570220947, "rewards/accuracies": 0.5, "rewards/chosen": -0.09866615384817123, "rewards/margins": 0.021050864830613136, "rewards/rejected": -0.11971701681613922, "step": 4977 }, { "epoch": 3.03675461339027, "grad_norm": 0.953410804271698, "learning_rate": 3.151990202082057e-06, "log_odds_chosen": 1.5136971473693848, "log_odds_ratio": -0.5427061915397644, "logits/chosen": -0.921216607093811, "logits/rejected": -1.0689817667007446, "logps/chosen": -0.7089710235595703, "logps/rejected": -1.932616949081421, "loss": 0.9351, "nll_loss": 0.8823891282081604, "rewards/accuracies": 0.625, "rewards/chosen": -0.07089710235595703, "rewards/margins": 0.1223645806312561, "rewards/rejected": -0.19326168298721313, "step": 4978 }, { "epoch": 3.0373646484672867, "grad_norm": 1.636165738105774, "learning_rate": 3.1510104102878136e-06, "log_odds_chosen": 0.018784940242767334, "log_odds_ratio": -0.6935749053955078, "logits/chosen": -0.9898192882537842, "logits/rejected": -0.8451769351959229, "logps/chosen": -0.9399664998054504, "logps/rejected": -0.9529874324798584, "loss": 0.9445, "nll_loss": 1.2477831840515137, "rewards/accuracies": 0.375, "rewards/chosen": -0.09399665892124176, "rewards/margins": 0.0013020862825214863, "rewards/rejected": -0.09529874473810196, "step": 4979 }, { "epoch": 3.037974683544304, "grad_norm": 21.42072296142578, "learning_rate": 3.15003061849357e-06, "log_odds_chosen": 1.7361117601394653, "log_odds_ratio": -0.417287141084671, "logits/chosen": -0.7753388285636902, "logits/rejected": -0.9118174314498901, "logps/chosen": -0.7099658250808716, "logps/rejected": -1.773275375366211, "loss": 1.0621, "nll_loss": 0.9863951206207275, "rewards/accuracies": 0.75, "rewards/chosen": -0.07099658250808716, "rewards/margins": 0.10633096098899841, "rewards/rejected": -0.17732754349708557, "step": 4980 }, { "epoch": 3.038584718621321, "grad_norm": 1.8591419458389282, "learning_rate": 3.1490508266993263e-06, "log_odds_chosen": 1.2615313529968262, "log_odds_ratio": -0.437669962644577, "logits/chosen": -1.0366501808166504, "logits/rejected": -0.8810442090034485, "logps/chosen": -0.9423984289169312, "logps/rejected": -1.9605205059051514, "loss": 1.0467, "nll_loss": 1.0391907691955566, "rewards/accuracies": 0.75, "rewards/chosen": -0.09423984587192535, "rewards/margins": 0.1018121987581253, "rewards/rejected": -0.19605204463005066, "step": 4981 }, { "epoch": 3.0391947536983377, "grad_norm": 1.8733729124069214, "learning_rate": 3.1480710349050824e-06, "log_odds_chosen": 0.6092901825904846, "log_odds_ratio": -0.6481662392616272, "logits/chosen": -1.1275463104248047, "logits/rejected": -1.1017985343933105, "logps/chosen": -1.1117103099822998, "logps/rejected": -1.6134718656539917, "loss": 1.2133, "nll_loss": 1.3113462924957275, "rewards/accuracies": 0.5, "rewards/chosen": -0.11117103695869446, "rewards/margins": 0.05017614737153053, "rewards/rejected": -0.1613471806049347, "step": 4982 }, { "epoch": 3.0398047887753545, "grad_norm": 1.038704752922058, "learning_rate": 3.147091243110839e-06, "log_odds_chosen": 1.5454171895980835, "log_odds_ratio": -0.38492918014526367, "logits/chosen": -0.9279383420944214, "logits/rejected": -1.0424171686172485, "logps/chosen": -0.9955200552940369, "logps/rejected": -2.2559309005737305, "loss": 1.092, "nll_loss": 1.0987982749938965, "rewards/accuracies": 0.75, "rewards/chosen": -0.09955199807882309, "rewards/margins": 0.12604105472564697, "rewards/rejected": -0.22559306025505066, "step": 4983 }, { "epoch": 3.0404148238523714, "grad_norm": 1.6246654987335205, "learning_rate": 3.146111451316595e-06, "log_odds_chosen": 2.460707187652588, "log_odds_ratio": -0.36930814385414124, "logits/chosen": -0.9219419956207275, "logits/rejected": -0.9558863639831543, "logps/chosen": -0.9128700494766235, "logps/rejected": -2.951979637145996, "loss": 1.093, "nll_loss": 1.1358362436294556, "rewards/accuracies": 0.625, "rewards/chosen": -0.09128700941801071, "rewards/margins": 0.2039109468460083, "rewards/rejected": -0.2951979637145996, "step": 4984 }, { "epoch": 3.0410248589293882, "grad_norm": 3.498009443283081, "learning_rate": 3.145131659522351e-06, "log_odds_chosen": 2.79797625541687, "log_odds_ratio": -0.3476807475090027, "logits/chosen": -0.7970938682556152, "logits/rejected": -0.9802331328392029, "logps/chosen": -0.751252293586731, "logps/rejected": -2.96513295173645, "loss": 1.1427, "nll_loss": 0.919460654258728, "rewards/accuracies": 0.75, "rewards/chosen": -0.07512522488832474, "rewards/margins": 0.2213880717754364, "rewards/rejected": -0.29651328921318054, "step": 4985 }, { "epoch": 3.0416348940064055, "grad_norm": 5.497838973999023, "learning_rate": 3.1441518677281077e-06, "log_odds_chosen": 2.2008700370788574, "log_odds_ratio": -0.2700187861919403, "logits/chosen": -0.8305935263633728, "logits/rejected": -0.8966069221496582, "logps/chosen": -0.6260586380958557, "logps/rejected": -2.1545095443725586, "loss": 1.167, "nll_loss": 1.0882948637008667, "rewards/accuracies": 0.875, "rewards/chosen": -0.06260586529970169, "rewards/margins": 0.1528450846672058, "rewards/rejected": -0.2154509425163269, "step": 4986 }, { "epoch": 3.0422449290834224, "grad_norm": 1.522801399230957, "learning_rate": 3.143172075933864e-06, "log_odds_chosen": 1.9379223585128784, "log_odds_ratio": -0.3877575993537903, "logits/chosen": -0.9539203643798828, "logits/rejected": -0.9156345725059509, "logps/chosen": -0.9291280508041382, "logps/rejected": -2.555851936340332, "loss": 1.2152, "nll_loss": 1.0519254207611084, "rewards/accuracies": 0.625, "rewards/chosen": -0.09291280806064606, "rewards/margins": 0.16267240047454834, "rewards/rejected": -0.2555851936340332, "step": 4987 }, { "epoch": 3.042854964160439, "grad_norm": 2.1691396236419678, "learning_rate": 3.1421922841396204e-06, "log_odds_chosen": 0.6206974983215332, "log_odds_ratio": -0.8512907028198242, "logits/chosen": -0.7546459436416626, "logits/rejected": -0.8066065907478333, "logps/chosen": -0.8252506256103516, "logps/rejected": -1.496195912361145, "loss": 1.0012, "nll_loss": 1.0105953216552734, "rewards/accuracies": 0.25, "rewards/chosen": -0.08252506703138351, "rewards/margins": 0.06709452718496323, "rewards/rejected": -0.14961959421634674, "step": 4988 }, { "epoch": 3.043464999237456, "grad_norm": 2.6523377895355225, "learning_rate": 3.1412124923453765e-06, "log_odds_chosen": 3.571683406829834, "log_odds_ratio": -0.33814188838005066, "logits/chosen": -0.8118712902069092, "logits/rejected": -1.0495495796203613, "logps/chosen": -0.6134248375892639, "logps/rejected": -3.462308168411255, "loss": 0.9882, "nll_loss": 0.8705291748046875, "rewards/accuracies": 0.75, "rewards/chosen": -0.06134248897433281, "rewards/margins": 0.2848883271217346, "rewards/rejected": -0.34623080492019653, "step": 4989 }, { "epoch": 3.044075034314473, "grad_norm": 1.2038310766220093, "learning_rate": 3.1402327005511326e-06, "log_odds_chosen": 1.6394802331924438, "log_odds_ratio": -0.3438255786895752, "logits/chosen": -0.7980719208717346, "logits/rejected": -0.7433940768241882, "logps/chosen": -0.6528036594390869, "logps/rejected": -1.8303970098495483, "loss": 1.0557, "nll_loss": 0.9223655462265015, "rewards/accuracies": 0.75, "rewards/chosen": -0.06528037041425705, "rewards/margins": 0.1177593395113945, "rewards/rejected": -0.18303970992565155, "step": 4990 }, { "epoch": 3.04468506939149, "grad_norm": 1.8921537399291992, "learning_rate": 3.139252908756889e-06, "log_odds_chosen": 1.8870275020599365, "log_odds_ratio": -0.4857456088066101, "logits/chosen": -0.7046087384223938, "logits/rejected": -0.8407449722290039, "logps/chosen": -0.6444116830825806, "logps/rejected": -2.0815329551696777, "loss": 1.0954, "nll_loss": 0.8789899349212646, "rewards/accuracies": 0.75, "rewards/chosen": -0.06444116681814194, "rewards/margins": 0.1437121331691742, "rewards/rejected": -0.20815330743789673, "step": 4991 }, { "epoch": 3.045295104468507, "grad_norm": 1.3153921365737915, "learning_rate": 3.1382731169626457e-06, "log_odds_chosen": 0.14272111654281616, "log_odds_ratio": -0.6740310192108154, "logits/chosen": -0.8811245560646057, "logits/rejected": -0.8396850824356079, "logps/chosen": -0.8510178327560425, "logps/rejected": -0.8926229476928711, "loss": 1.0949, "nll_loss": 1.1833481788635254, "rewards/accuracies": 0.5, "rewards/chosen": -0.08510178327560425, "rewards/margins": 0.0041605099104344845, "rewards/rejected": -0.08926229178905487, "step": 4992 }, { "epoch": 3.045905139545524, "grad_norm": 1.4522908926010132, "learning_rate": 3.1372933251684014e-06, "log_odds_chosen": 2.901240348815918, "log_odds_ratio": -0.3711056709289551, "logits/chosen": -0.8734182119369507, "logits/rejected": -0.9906802177429199, "logps/chosen": -0.6092513799667358, "logps/rejected": -2.8838372230529785, "loss": 1.031, "nll_loss": 0.9144290685653687, "rewards/accuracies": 0.875, "rewards/chosen": -0.06092514097690582, "rewards/margins": 0.22745856642723083, "rewards/rejected": -0.28838372230529785, "step": 4993 }, { "epoch": 3.0465151746225407, "grad_norm": 0.9983800649642944, "learning_rate": 3.136313533374158e-06, "log_odds_chosen": 1.8358099460601807, "log_odds_ratio": -0.31551650166511536, "logits/chosen": -0.7894419431686401, "logits/rejected": -0.9460996985435486, "logps/chosen": -0.8007339835166931, "logps/rejected": -2.192723512649536, "loss": 0.8676, "nll_loss": 0.8895871639251709, "rewards/accuracies": 0.875, "rewards/chosen": -0.08007340133190155, "rewards/margins": 0.13919895887374878, "rewards/rejected": -0.21927234530448914, "step": 4994 }, { "epoch": 3.0471252096995576, "grad_norm": 1.206328272819519, "learning_rate": 3.135333741579914e-06, "log_odds_chosen": 0.8426005840301514, "log_odds_ratio": -0.457237184047699, "logits/chosen": -1.0928919315338135, "logits/rejected": -0.953061580657959, "logps/chosen": -0.9168171882629395, "logps/rejected": -1.5278265476226807, "loss": 1.1576, "nll_loss": 0.9668106436729431, "rewards/accuracies": 0.75, "rewards/chosen": -0.09168172627687454, "rewards/margins": 0.061100929975509644, "rewards/rejected": -0.1527826488018036, "step": 4995 }, { "epoch": 3.047735244776575, "grad_norm": 11.307072639465332, "learning_rate": 3.13435394978567e-06, "log_odds_chosen": 2.9270358085632324, "log_odds_ratio": -0.17190143465995789, "logits/chosen": -0.7395726442337036, "logits/rejected": -0.9084271192550659, "logps/chosen": -0.6699376702308655, "logps/rejected": -2.899057626724243, "loss": 1.2039, "nll_loss": 0.9576888084411621, "rewards/accuracies": 1.0, "rewards/chosen": -0.06699375808238983, "rewards/margins": 0.22291199862957, "rewards/rejected": -0.2899057865142822, "step": 4996 }, { "epoch": 3.0483452798535917, "grad_norm": 1.298688292503357, "learning_rate": 3.1333741579914268e-06, "log_odds_chosen": 1.0325850248336792, "log_odds_ratio": -0.5288753509521484, "logits/chosen": -1.0436220169067383, "logits/rejected": -1.0719510316848755, "logps/chosen": -0.9119967222213745, "logps/rejected": -1.414371132850647, "loss": 1.0746, "nll_loss": 1.3449122905731201, "rewards/accuracies": 0.625, "rewards/chosen": -0.09119968116283417, "rewards/margins": 0.05023743584752083, "rewards/rejected": -0.1414371132850647, "step": 4997 }, { "epoch": 3.0489553149306086, "grad_norm": 1.8709793090820312, "learning_rate": 3.132394366197183e-06, "log_odds_chosen": 0.8826436996459961, "log_odds_ratio": -0.3953776955604553, "logits/chosen": -0.8632683157920837, "logits/rejected": -0.9591469764709473, "logps/chosen": -0.8894138336181641, "logps/rejected": -1.553946614265442, "loss": 1.1032, "nll_loss": 1.1023410558700562, "rewards/accuracies": 0.875, "rewards/chosen": -0.08894138783216476, "rewards/margins": 0.06645327806472778, "rewards/rejected": -0.15539465844631195, "step": 4998 }, { "epoch": 3.0495653500076254, "grad_norm": 3.2169179916381836, "learning_rate": 3.131414574402939e-06, "log_odds_chosen": 1.9629429578781128, "log_odds_ratio": -0.34663301706314087, "logits/chosen": -0.8319047689437866, "logits/rejected": -0.910114049911499, "logps/chosen": -0.8827087879180908, "logps/rejected": -2.531158924102783, "loss": 1.0719, "nll_loss": 1.130934238433838, "rewards/accuracies": 0.625, "rewards/chosen": -0.0882708728313446, "rewards/margins": 0.1648450344800949, "rewards/rejected": -0.2531158924102783, "step": 4999 }, { "epoch": 3.0501753850846423, "grad_norm": 1.7537375688552856, "learning_rate": 3.1304347826086955e-06, "log_odds_chosen": 2.077732563018799, "log_odds_ratio": -0.3498692810535431, "logits/chosen": -0.723231852054596, "logits/rejected": -0.7735257744789124, "logps/chosen": -0.7810612916946411, "logps/rejected": -2.3655519485473633, "loss": 1.0099, "nll_loss": 0.897499680519104, "rewards/accuracies": 0.75, "rewards/chosen": -0.07810613512992859, "rewards/margins": 0.15844905376434326, "rewards/rejected": -0.23655518889427185, "step": 5000 }, { "epoch": 3.050785420161659, "grad_norm": 1.7226284742355347, "learning_rate": 3.1294549908144517e-06, "log_odds_chosen": 1.2075414657592773, "log_odds_ratio": -0.5876161456108093, "logits/chosen": -0.7964913845062256, "logits/rejected": -0.8314827680587769, "logps/chosen": -0.8829922080039978, "logps/rejected": -1.7848315238952637, "loss": 1.0234, "nll_loss": 0.9641479253768921, "rewards/accuracies": 0.625, "rewards/chosen": -0.0882992222905159, "rewards/margins": 0.09018394351005554, "rewards/rejected": -0.17848315834999084, "step": 5001 }, { "epoch": 3.0513954552386764, "grad_norm": 1.837415337562561, "learning_rate": 3.1284751990202082e-06, "log_odds_chosen": 2.5493462085723877, "log_odds_ratio": -0.4005067050457001, "logits/chosen": -0.5825221538543701, "logits/rejected": -0.7573596239089966, "logps/chosen": -0.7100476026535034, "logps/rejected": -2.7863106727600098, "loss": 1.08, "nll_loss": 0.8283225893974304, "rewards/accuracies": 0.75, "rewards/chosen": -0.07100476324558258, "rewards/margins": 0.2076263129711151, "rewards/rejected": -0.2786310911178589, "step": 5002 }, { "epoch": 3.0520054903156932, "grad_norm": 1.3148211240768433, "learning_rate": 3.1274954072259643e-06, "log_odds_chosen": 3.15077805519104, "log_odds_ratio": -0.23580145835876465, "logits/chosen": -0.7690356373786926, "logits/rejected": -0.9674313068389893, "logps/chosen": -0.649151623249054, "logps/rejected": -3.120314121246338, "loss": 0.9648, "nll_loss": 0.7527847290039062, "rewards/accuracies": 0.875, "rewards/chosen": -0.06491515785455704, "rewards/margins": 0.24711626768112183, "rewards/rejected": -0.3120313882827759, "step": 5003 }, { "epoch": 3.05261552539271, "grad_norm": 3.2883803844451904, "learning_rate": 3.1265156154317205e-06, "log_odds_chosen": 1.9179497957229614, "log_odds_ratio": -0.33704593777656555, "logits/chosen": -0.8136590123176575, "logits/rejected": -0.8793003559112549, "logps/chosen": -0.6736499667167664, "logps/rejected": -2.0498483180999756, "loss": 1.112, "nll_loss": 0.9550099968910217, "rewards/accuracies": 0.75, "rewards/chosen": -0.06736499071121216, "rewards/margins": 0.13761985301971436, "rewards/rejected": -0.2049848437309265, "step": 5004 }, { "epoch": 3.053225560469727, "grad_norm": 1.376264214515686, "learning_rate": 3.125535823637477e-06, "log_odds_chosen": 1.2712078094482422, "log_odds_ratio": -0.4581676125526428, "logits/chosen": -0.7501168251037598, "logits/rejected": -0.8214942812919617, "logps/chosen": -0.8570780754089355, "logps/rejected": -1.877981185913086, "loss": 0.9247, "nll_loss": 0.8932302594184875, "rewards/accuracies": 0.875, "rewards/chosen": -0.08570781350135803, "rewards/margins": 0.10209032148122787, "rewards/rejected": -0.1877981424331665, "step": 5005 }, { "epoch": 3.0538355955467438, "grad_norm": 1.2246074676513672, "learning_rate": 3.1245560318432336e-06, "log_odds_chosen": 3.326260566711426, "log_odds_ratio": -0.20185495913028717, "logits/chosen": -0.8046316504478455, "logits/rejected": -1.0897750854492188, "logps/chosen": -0.7301343679428101, "logps/rejected": -3.4517040252685547, "loss": 1.0097, "nll_loss": 0.8840990662574768, "rewards/accuracies": 1.0, "rewards/chosen": -0.07301343977451324, "rewards/margins": 0.2721569538116455, "rewards/rejected": -0.34517040848731995, "step": 5006 }, { "epoch": 3.054445630623761, "grad_norm": 4.35747766494751, "learning_rate": 3.1235762400489893e-06, "log_odds_chosen": 3.4834415912628174, "log_odds_ratio": -0.27454128861427307, "logits/chosen": -0.7776957750320435, "logits/rejected": -0.9865610599517822, "logps/chosen": -0.7544407248497009, "logps/rejected": -3.7362489700317383, "loss": 1.119, "nll_loss": 0.9830819368362427, "rewards/accuracies": 0.875, "rewards/chosen": -0.07544407248497009, "rewards/margins": 0.29818081855773926, "rewards/rejected": -0.37362489104270935, "step": 5007 }, { "epoch": 3.055055665700778, "grad_norm": 1.7556185722351074, "learning_rate": 3.122596448254746e-06, "log_odds_chosen": 0.9407529830932617, "log_odds_ratio": -0.516637921333313, "logits/chosen": -0.9596071243286133, "logits/rejected": -0.9387978315353394, "logps/chosen": -0.9175330400466919, "logps/rejected": -1.6405303478240967, "loss": 1.2158, "nll_loss": 1.0320773124694824, "rewards/accuracies": 0.625, "rewards/chosen": -0.09175329655408859, "rewards/margins": 0.07229973375797272, "rewards/rejected": -0.1640530377626419, "step": 5008 }, { "epoch": 3.0556657007777948, "grad_norm": 5.641814231872559, "learning_rate": 3.1216166564605023e-06, "log_odds_chosen": 0.03505522385239601, "log_odds_ratio": -0.7040306329727173, "logits/chosen": -0.8811950087547302, "logits/rejected": -0.9022301435470581, "logps/chosen": -0.9045842289924622, "logps/rejected": -0.8979241251945496, "loss": 1.0184, "nll_loss": 1.010945439338684, "rewards/accuracies": 0.375, "rewards/chosen": -0.09045842289924622, "rewards/margins": -0.0006660115905106068, "rewards/rejected": -0.0897924154996872, "step": 5009 }, { "epoch": 3.0562757358548116, "grad_norm": 1.0152355432510376, "learning_rate": 3.120636864666258e-06, "log_odds_chosen": 3.1653385162353516, "log_odds_ratio": -0.12748262286186218, "logits/chosen": -0.6543938517570496, "logits/rejected": -0.8716398477554321, "logps/chosen": -0.42035287618637085, "logps/rejected": -2.6596922874450684, "loss": 0.9624, "nll_loss": 0.7598531246185303, "rewards/accuracies": 1.0, "rewards/chosen": -0.042035289108753204, "rewards/margins": 0.22393393516540527, "rewards/rejected": -0.2659692168235779, "step": 5010 }, { "epoch": 3.0568857709318284, "grad_norm": 2.102684736251831, "learning_rate": 3.1196570728720146e-06, "log_odds_chosen": 2.5535471439361572, "log_odds_ratio": -0.3427750766277313, "logits/chosen": -0.8590352535247803, "logits/rejected": -1.1605300903320312, "logps/chosen": -0.7140403389930725, "logps/rejected": -2.7308340072631836, "loss": 0.9891, "nll_loss": 0.9215086698532104, "rewards/accuracies": 0.875, "rewards/chosen": -0.07140403985977173, "rewards/margins": 0.20167936384677887, "rewards/rejected": -0.2730833888053894, "step": 5011 }, { "epoch": 3.0574958060088453, "grad_norm": 9.391961097717285, "learning_rate": 3.1186772810777707e-06, "log_odds_chosen": 1.7269821166992188, "log_odds_ratio": -0.3380466103553772, "logits/chosen": -0.8090907335281372, "logits/rejected": -0.944191575050354, "logps/chosen": -0.8431322574615479, "logps/rejected": -2.099403142929077, "loss": 1.0321, "nll_loss": 0.8508956432342529, "rewards/accuracies": 0.875, "rewards/chosen": -0.08431322872638702, "rewards/margins": 0.1256270855665207, "rewards/rejected": -0.20994031429290771, "step": 5012 }, { "epoch": 3.0581058410858626, "grad_norm": 1.2166550159454346, "learning_rate": 3.1176974892835273e-06, "log_odds_chosen": 3.4502451419830322, "log_odds_ratio": -0.20441004633903503, "logits/chosen": -1.0148550271987915, "logits/rejected": -0.9942858815193176, "logps/chosen": -0.9335817098617554, "logps/rejected": -3.8493764400482178, "loss": 1.1337, "nll_loss": 1.1007211208343506, "rewards/accuracies": 0.875, "rewards/chosen": -0.09335818141698837, "rewards/margins": 0.2915794849395752, "rewards/rejected": -0.3849376440048218, "step": 5013 }, { "epoch": 3.0587158761628794, "grad_norm": 1.429335117340088, "learning_rate": 3.1167176974892834e-06, "log_odds_chosen": 2.679603099822998, "log_odds_ratio": -0.15637733042240143, "logits/chosen": -0.7821323871612549, "logits/rejected": -0.9213329553604126, "logps/chosen": -0.751448929309845, "logps/rejected": -2.813283920288086, "loss": 1.0181, "nll_loss": 0.8013722896575928, "rewards/accuracies": 1.0, "rewards/chosen": -0.07514489442110062, "rewards/margins": 0.20618347823619843, "rewards/rejected": -0.28132838010787964, "step": 5014 }, { "epoch": 3.0593259112398963, "grad_norm": 1.340155005455017, "learning_rate": 3.1157379056950395e-06, "log_odds_chosen": 1.2378337383270264, "log_odds_ratio": -0.5209591388702393, "logits/chosen": -0.8848108053207397, "logits/rejected": -0.8967524766921997, "logps/chosen": -1.002807855606079, "logps/rejected": -2.0057411193847656, "loss": 1.0846, "nll_loss": 1.060018539428711, "rewards/accuracies": 0.625, "rewards/chosen": -0.10028079152107239, "rewards/margins": 0.10029332339763641, "rewards/rejected": -0.2005741000175476, "step": 5015 }, { "epoch": 3.059935946316913, "grad_norm": 11.880914688110352, "learning_rate": 3.114758113900796e-06, "log_odds_chosen": 1.340040922164917, "log_odds_ratio": -0.5532643795013428, "logits/chosen": -0.8285213112831116, "logits/rejected": -0.8285306692123413, "logps/chosen": -0.9229875802993774, "logps/rejected": -1.9082064628601074, "loss": 1.0775, "nll_loss": 0.9808127880096436, "rewards/accuracies": 0.625, "rewards/chosen": -0.09229875355958939, "rewards/margins": 0.09852190315723419, "rewards/rejected": -0.19082064926624298, "step": 5016 }, { "epoch": 3.06054598139393, "grad_norm": 3.3850162029266357, "learning_rate": 3.113778322106552e-06, "log_odds_chosen": 1.5369048118591309, "log_odds_ratio": -0.3342438042163849, "logits/chosen": -0.740493655204773, "logits/rejected": -0.7421120405197144, "logps/chosen": -0.5543796420097351, "logps/rejected": -1.5297733545303345, "loss": 1.0111, "nll_loss": 0.8630493879318237, "rewards/accuracies": 0.875, "rewards/chosen": -0.05543796718120575, "rewards/margins": 0.09753937274217606, "rewards/rejected": -0.1529773324728012, "step": 5017 }, { "epoch": 3.0611560164709473, "grad_norm": 1.6739205121994019, "learning_rate": 3.1127985303123083e-06, "log_odds_chosen": 1.9520676136016846, "log_odds_ratio": -0.2622412443161011, "logits/chosen": -0.6772134304046631, "logits/rejected": -0.620206892490387, "logps/chosen": -0.7479479312896729, "logps/rejected": -2.242682456970215, "loss": 1.0801, "nll_loss": 0.9372918605804443, "rewards/accuracies": 0.875, "rewards/chosen": -0.07479479908943176, "rewards/margins": 0.14947345852851868, "rewards/rejected": -0.22426825761795044, "step": 5018 }, { "epoch": 3.061766051547964, "grad_norm": 1.295582890510559, "learning_rate": 3.111818738518065e-06, "log_odds_chosen": 2.456442356109619, "log_odds_ratio": -0.3587714433670044, "logits/chosen": -0.8335614204406738, "logits/rejected": -1.0004947185516357, "logps/chosen": -0.7697939872741699, "logps/rejected": -2.8852591514587402, "loss": 0.9485, "nll_loss": 0.8589441776275635, "rewards/accuracies": 0.875, "rewards/chosen": -0.07697939872741699, "rewards/margins": 0.21154654026031494, "rewards/rejected": -0.28852593898773193, "step": 5019 }, { "epoch": 3.062376086624981, "grad_norm": 1.6037973165512085, "learning_rate": 3.1108389467238214e-06, "log_odds_chosen": 2.1446800231933594, "log_odds_ratio": -0.5103134512901306, "logits/chosen": -1.047810435295105, "logits/rejected": -0.9792622327804565, "logps/chosen": -0.8214226961135864, "logps/rejected": -2.582174777984619, "loss": 1.1097, "nll_loss": 1.2527289390563965, "rewards/accuracies": 0.625, "rewards/chosen": -0.08214227855205536, "rewards/margins": 0.17607519030570984, "rewards/rejected": -0.258217453956604, "step": 5020 }, { "epoch": 3.062986121701998, "grad_norm": 1.5784465074539185, "learning_rate": 3.109859154929577e-06, "log_odds_chosen": 0.4476648271083832, "log_odds_ratio": -0.5916194915771484, "logits/chosen": -0.9559286832809448, "logits/rejected": -0.9111578464508057, "logps/chosen": -0.9543319344520569, "logps/rejected": -1.327946662902832, "loss": 1.0195, "nll_loss": 1.0914103984832764, "rewards/accuracies": 0.625, "rewards/chosen": -0.09543319046497345, "rewards/margins": 0.03736145794391632, "rewards/rejected": -0.13279464840888977, "step": 5021 }, { "epoch": 3.0635961567790146, "grad_norm": 2.8764140605926514, "learning_rate": 3.1088793631353336e-06, "log_odds_chosen": 0.5851728916168213, "log_odds_ratio": -0.5625337958335876, "logits/chosen": -0.8241981267929077, "logits/rejected": -1.0147194862365723, "logps/chosen": -0.8395765423774719, "logps/rejected": -1.1058356761932373, "loss": 0.952, "nll_loss": 0.8974244594573975, "rewards/accuracies": 0.625, "rewards/chosen": -0.08395765721797943, "rewards/margins": 0.02662590891122818, "rewards/rejected": -0.11058357357978821, "step": 5022 }, { "epoch": 3.064206191856032, "grad_norm": 5.55601167678833, "learning_rate": 3.10789957134109e-06, "log_odds_chosen": 3.1389784812927246, "log_odds_ratio": -0.3447410762310028, "logits/chosen": -0.8015625476837158, "logits/rejected": -0.9566601514816284, "logps/chosen": -0.7129113078117371, "logps/rejected": -3.2834668159484863, "loss": 1.0134, "nll_loss": 0.9411541223526001, "rewards/accuracies": 0.75, "rewards/chosen": -0.07129113376140594, "rewards/margins": 0.2570555806159973, "rewards/rejected": -0.32834669947624207, "step": 5023 }, { "epoch": 3.064816226933049, "grad_norm": 1.470041275024414, "learning_rate": 3.106919779546846e-06, "log_odds_chosen": 2.603695869445801, "log_odds_ratio": -0.36630040407180786, "logits/chosen": -0.8702600598335266, "logits/rejected": -1.0603880882263184, "logps/chosen": -0.6731639504432678, "logps/rejected": -2.708552360534668, "loss": 1.012, "nll_loss": 1.0230669975280762, "rewards/accuracies": 0.75, "rewards/chosen": -0.06731639802455902, "rewards/margins": 0.20353883504867554, "rewards/rejected": -0.27085524797439575, "step": 5024 }, { "epoch": 3.0654262620100656, "grad_norm": 1.1594520807266235, "learning_rate": 3.1059399877526024e-06, "log_odds_chosen": 1.9343068599700928, "log_odds_ratio": -0.32187697291374207, "logits/chosen": -0.8514140844345093, "logits/rejected": -0.9864822626113892, "logps/chosen": -0.7419384717941284, "logps/rejected": -2.026791572570801, "loss": 1.0263, "nll_loss": 1.206439733505249, "rewards/accuracies": 0.75, "rewards/chosen": -0.07419384270906448, "rewards/margins": 0.1284853219985962, "rewards/rejected": -0.20267915725708008, "step": 5025 }, { "epoch": 3.0660362970870825, "grad_norm": 2.0531976222991943, "learning_rate": 3.104960195958359e-06, "log_odds_chosen": 2.1805741786956787, "log_odds_ratio": -0.3042903244495392, "logits/chosen": -0.8449004888534546, "logits/rejected": -0.9163124561309814, "logps/chosen": -0.8314845561981201, "logps/rejected": -2.5284533500671387, "loss": 0.948, "nll_loss": 0.8394470810890198, "rewards/accuracies": 1.0, "rewards/chosen": -0.08314846456050873, "rewards/margins": 0.1696968674659729, "rewards/rejected": -0.2528453469276428, "step": 5026 }, { "epoch": 3.0666463321640993, "grad_norm": 6.216227054595947, "learning_rate": 3.103980404164115e-06, "log_odds_chosen": 1.4566619396209717, "log_odds_ratio": -0.6017309427261353, "logits/chosen": -1.1034152507781982, "logits/rejected": -1.0525180101394653, "logps/chosen": -0.953518807888031, "logps/rejected": -1.99961519241333, "loss": 0.9671, "nll_loss": 1.3895522356033325, "rewards/accuracies": 0.5, "rewards/chosen": -0.09535187482833862, "rewards/margins": 0.10460963100194931, "rewards/rejected": -0.19996151328086853, "step": 5027 }, { "epoch": 3.067256367241116, "grad_norm": 1.352256178855896, "learning_rate": 3.103000612369871e-06, "log_odds_chosen": 3.1272964477539062, "log_odds_ratio": -0.3449418246746063, "logits/chosen": -0.7494794130325317, "logits/rejected": -0.8737326264381409, "logps/chosen": -0.7558685541152954, "logps/rejected": -3.3845016956329346, "loss": 1.0272, "nll_loss": 0.9456308484077454, "rewards/accuracies": 0.875, "rewards/chosen": -0.07558684796094894, "rewards/margins": 0.2628633379936218, "rewards/rejected": -0.338450163602829, "step": 5028 }, { "epoch": 3.0678664023181335, "grad_norm": 1.442280888557434, "learning_rate": 3.1020208205756278e-06, "log_odds_chosen": 1.2206487655639648, "log_odds_ratio": -0.48160722851753235, "logits/chosen": -0.6845626831054688, "logits/rejected": -0.7384864091873169, "logps/chosen": -0.8303192257881165, "logps/rejected": -1.690494418144226, "loss": 0.998, "nll_loss": 1.0100616216659546, "rewards/accuracies": 0.75, "rewards/chosen": -0.08303192257881165, "rewards/margins": 0.08601751923561096, "rewards/rejected": -0.1690494269132614, "step": 5029 }, { "epoch": 3.0684764373951503, "grad_norm": 1.5113900899887085, "learning_rate": 3.101041028781384e-06, "log_odds_chosen": 1.891507863998413, "log_odds_ratio": -0.5056371092796326, "logits/chosen": -0.6567791104316711, "logits/rejected": -0.8929837942123413, "logps/chosen": -0.9397050738334656, "logps/rejected": -2.369489908218384, "loss": 1.0157, "nll_loss": 0.8913664817810059, "rewards/accuracies": 0.75, "rewards/chosen": -0.09397050738334656, "rewards/margins": 0.1429784893989563, "rewards/rejected": -0.23694899678230286, "step": 5030 }, { "epoch": 3.069086472472167, "grad_norm": 1.3317389488220215, "learning_rate": 3.10006123698714e-06, "log_odds_chosen": 1.6320037841796875, "log_odds_ratio": -0.3946835696697235, "logits/chosen": -0.7185840606689453, "logits/rejected": -0.943711519241333, "logps/chosen": -0.5862258672714233, "logps/rejected": -1.8137874603271484, "loss": 0.9494, "nll_loss": 0.7565898299217224, "rewards/accuracies": 0.875, "rewards/chosen": -0.058622587472200394, "rewards/margins": 0.12275616824626923, "rewards/rejected": -0.18137875199317932, "step": 5031 }, { "epoch": 3.069696507549184, "grad_norm": 1.1033003330230713, "learning_rate": 3.099081445192896e-06, "log_odds_chosen": 1.3377509117126465, "log_odds_ratio": -0.5203658938407898, "logits/chosen": -0.7690330147743225, "logits/rejected": -0.8260080218315125, "logps/chosen": -0.8229930400848389, "logps/rejected": -2.00418758392334, "loss": 0.9837, "nll_loss": 1.0288480520248413, "rewards/accuracies": 0.75, "rewards/chosen": -0.08229930698871613, "rewards/margins": 0.11811944842338562, "rewards/rejected": -0.20041875541210175, "step": 5032 }, { "epoch": 3.070306542626201, "grad_norm": 1.8952354192733765, "learning_rate": 3.0981016533986527e-06, "log_odds_chosen": 1.6333203315734863, "log_odds_ratio": -0.3741213083267212, "logits/chosen": -0.8829275369644165, "logits/rejected": -0.8593401312828064, "logps/chosen": -0.6828253269195557, "logps/rejected": -1.7955594062805176, "loss": 0.9675, "nll_loss": 0.9680202603340149, "rewards/accuracies": 0.75, "rewards/chosen": -0.06828253716230392, "rewards/margins": 0.11127340793609619, "rewards/rejected": -0.1795559525489807, "step": 5033 }, { "epoch": 3.070916577703218, "grad_norm": 1.4228190183639526, "learning_rate": 3.097121861604409e-06, "log_odds_chosen": 1.75355863571167, "log_odds_ratio": -0.38945096731185913, "logits/chosen": -0.9069770574569702, "logits/rejected": -0.8996409177780151, "logps/chosen": -0.7269598245620728, "logps/rejected": -2.050786256790161, "loss": 0.8963, "nll_loss": 0.8084067702293396, "rewards/accuracies": 0.75, "rewards/chosen": -0.07269598543643951, "rewards/margins": 0.13238264620304108, "rewards/rejected": -0.2050786167383194, "step": 5034 }, { "epoch": 3.071526612780235, "grad_norm": 3.2699642181396484, "learning_rate": 3.096142069810165e-06, "log_odds_chosen": 3.2682440280914307, "log_odds_ratio": -0.29735326766967773, "logits/chosen": -0.6969347596168518, "logits/rejected": -0.8151232004165649, "logps/chosen": -0.6910203099250793, "logps/rejected": -3.432602882385254, "loss": 0.8956, "nll_loss": 0.8823796510696411, "rewards/accuracies": 0.875, "rewards/chosen": -0.06910202652215958, "rewards/margins": 0.274158239364624, "rewards/rejected": -0.3432602882385254, "step": 5035 }, { "epoch": 3.072136647857252, "grad_norm": 20.854793548583984, "learning_rate": 3.0951622780159215e-06, "log_odds_chosen": 1.4061239957809448, "log_odds_ratio": -0.42079392075538635, "logits/chosen": -0.6977524757385254, "logits/rejected": -0.803993821144104, "logps/chosen": -0.6548309326171875, "logps/rejected": -1.5847351551055908, "loss": 0.9682, "nll_loss": 0.8719446063041687, "rewards/accuracies": 0.625, "rewards/chosen": -0.06548309326171875, "rewards/margins": 0.09299042820930481, "rewards/rejected": -0.15847353637218475, "step": 5036 }, { "epoch": 3.0727466829342687, "grad_norm": 1.2137285470962524, "learning_rate": 3.094182486221678e-06, "log_odds_chosen": 1.53824782371521, "log_odds_ratio": -0.4511450529098511, "logits/chosen": -0.7633188962936401, "logits/rejected": -0.9072188138961792, "logps/chosen": -0.7564697265625, "logps/rejected": -2.005258321762085, "loss": 1.1224, "nll_loss": 1.073473334312439, "rewards/accuracies": 0.75, "rewards/chosen": -0.07564697414636612, "rewards/margins": 0.12487886846065521, "rewards/rejected": -0.20052583515644073, "step": 5037 }, { "epoch": 3.0733567180112855, "grad_norm": 1.6890952587127686, "learning_rate": 3.0932026944274337e-06, "log_odds_chosen": 3.785813331604004, "log_odds_ratio": -0.22906818985939026, "logits/chosen": -0.8025496602058411, "logits/rejected": -0.8780767321586609, "logps/chosen": -0.6134918928146362, "logps/rejected": -3.767432928085327, "loss": 1.0641, "nll_loss": 0.8656125068664551, "rewards/accuracies": 1.0, "rewards/chosen": -0.061349183320999146, "rewards/margins": 0.3153941333293915, "rewards/rejected": -0.37674325704574585, "step": 5038 }, { "epoch": 3.0739667530883024, "grad_norm": 2.073901414871216, "learning_rate": 3.0922229026331902e-06, "log_odds_chosen": 1.4358327388763428, "log_odds_ratio": -0.4274672567844391, "logits/chosen": -0.8447828888893127, "logits/rejected": -1.018876075744629, "logps/chosen": -0.7344810366630554, "logps/rejected": -1.7936015129089355, "loss": 0.8382, "nll_loss": 0.846571683883667, "rewards/accuracies": 0.75, "rewards/chosen": -0.07344810664653778, "rewards/margins": 0.10591205954551697, "rewards/rejected": -0.17936016619205475, "step": 5039 }, { "epoch": 3.0745767881653197, "grad_norm": 4.136985778808594, "learning_rate": 3.091243110838947e-06, "log_odds_chosen": 1.888845443725586, "log_odds_ratio": -0.3225017786026001, "logits/chosen": -0.6705211400985718, "logits/rejected": -0.8458828926086426, "logps/chosen": -0.6817461252212524, "logps/rejected": -2.007664442062378, "loss": 1.0892, "nll_loss": 1.0039973258972168, "rewards/accuracies": 0.875, "rewards/chosen": -0.06817461550235748, "rewards/margins": 0.1325918436050415, "rewards/rejected": -0.20076647400856018, "step": 5040 }, { "epoch": 3.0751868232423365, "grad_norm": 3.162351608276367, "learning_rate": 3.090263319044703e-06, "log_odds_chosen": 1.1107782125473022, "log_odds_ratio": -0.5555549263954163, "logits/chosen": -0.8275716304779053, "logits/rejected": -0.7924134135246277, "logps/chosen": -0.8279497027397156, "logps/rejected": -1.570244312286377, "loss": 1.0289, "nll_loss": 1.0095266103744507, "rewards/accuracies": 0.5, "rewards/chosen": -0.08279496431350708, "rewards/margins": 0.07422947138547897, "rewards/rejected": -0.15702444314956665, "step": 5041 }, { "epoch": 3.0757968583193533, "grad_norm": 1.8949577808380127, "learning_rate": 3.089283527250459e-06, "log_odds_chosen": 2.0267982482910156, "log_odds_ratio": -0.3622362017631531, "logits/chosen": -0.9079402685165405, "logits/rejected": -0.9044054746627808, "logps/chosen": -0.8638793230056763, "logps/rejected": -2.571730136871338, "loss": 1.2011, "nll_loss": 1.1544889211654663, "rewards/accuracies": 1.0, "rewards/chosen": -0.08638792484998703, "rewards/margins": 0.1707850992679596, "rewards/rejected": -0.2571730315685272, "step": 5042 }, { "epoch": 3.07640689339637, "grad_norm": 1.2677898406982422, "learning_rate": 3.0883037354562156e-06, "log_odds_chosen": 2.143770933151245, "log_odds_ratio": -0.2678200304508209, "logits/chosen": -1.0222797393798828, "logits/rejected": -0.9234030842781067, "logps/chosen": -0.9931734800338745, "logps/rejected": -2.790609359741211, "loss": 1.0481, "nll_loss": 1.2068486213684082, "rewards/accuracies": 1.0, "rewards/chosen": -0.09931734949350357, "rewards/margins": 0.17974361777305603, "rewards/rejected": -0.279060959815979, "step": 5043 }, { "epoch": 3.077016928473387, "grad_norm": 3.135143518447876, "learning_rate": 3.0873239436619717e-06, "log_odds_chosen": 3.9192025661468506, "log_odds_ratio": -0.14353252947330475, "logits/chosen": -0.775141716003418, "logits/rejected": -0.9445226788520813, "logps/chosen": -0.5817908048629761, "logps/rejected": -3.6577372550964355, "loss": 0.9326, "nll_loss": 0.8343989253044128, "rewards/accuracies": 1.0, "rewards/chosen": -0.05817908048629761, "rewards/margins": 0.3075946569442749, "rewards/rejected": -0.3657737374305725, "step": 5044 }, { "epoch": 3.0776269635504043, "grad_norm": 2.8859786987304688, "learning_rate": 3.086344151867728e-06, "log_odds_chosen": 0.8291893005371094, "log_odds_ratio": -0.5691192150115967, "logits/chosen": -0.947693407535553, "logits/rejected": -0.9682251214981079, "logps/chosen": -0.9634765386581421, "logps/rejected": -1.4482684135437012, "loss": 1.036, "nll_loss": 1.196488380432129, "rewards/accuracies": 0.75, "rewards/chosen": -0.09634765237569809, "rewards/margins": 0.04847918078303337, "rewards/rejected": -0.14482682943344116, "step": 5045 }, { "epoch": 3.078236998627421, "grad_norm": 4.425553321838379, "learning_rate": 3.0853643600734844e-06, "log_odds_chosen": 2.1051132678985596, "log_odds_ratio": -0.5229255557060242, "logits/chosen": -0.8605856895446777, "logits/rejected": -1.0526355504989624, "logps/chosen": -0.8792538642883301, "logps/rejected": -2.5218045711517334, "loss": 1.0693, "nll_loss": 0.9821076989173889, "rewards/accuracies": 0.625, "rewards/chosen": -0.08792538940906525, "rewards/margins": 0.1642550528049469, "rewards/rejected": -0.25218045711517334, "step": 5046 }, { "epoch": 3.078847033704438, "grad_norm": 1.2540473937988281, "learning_rate": 3.0843845682792405e-06, "log_odds_chosen": 1.5823153257369995, "log_odds_ratio": -0.49256837368011475, "logits/chosen": -0.5763301849365234, "logits/rejected": -0.686821699142456, "logps/chosen": -0.5722947716712952, "logps/rejected": -1.7291271686553955, "loss": 0.8152, "nll_loss": 0.7185899019241333, "rewards/accuracies": 0.625, "rewards/chosen": -0.05722947418689728, "rewards/margins": 0.11568323522806168, "rewards/rejected": -0.17291270196437836, "step": 5047 }, { "epoch": 3.079457068781455, "grad_norm": 5.3205156326293945, "learning_rate": 3.083404776484997e-06, "log_odds_chosen": 1.8513669967651367, "log_odds_ratio": -0.3863631784915924, "logits/chosen": -0.6261555552482605, "logits/rejected": -0.7694931030273438, "logps/chosen": -0.7267150282859802, "logps/rejected": -2.0547356605529785, "loss": 0.9234, "nll_loss": 0.7953771948814392, "rewards/accuracies": 0.625, "rewards/chosen": -0.07267151027917862, "rewards/margins": 0.13280203938484192, "rewards/rejected": -0.20547354221343994, "step": 5048 }, { "epoch": 3.0800671038584717, "grad_norm": 2.246403932571411, "learning_rate": 3.082424984690753e-06, "log_odds_chosen": 0.5350439548492432, "log_odds_ratio": -0.6485811471939087, "logits/chosen": -1.1374918222427368, "logits/rejected": -1.0143778324127197, "logps/chosen": -0.9376518130302429, "logps/rejected": -1.2476301193237305, "loss": 1.0714, "nll_loss": 1.1180113554000854, "rewards/accuracies": 0.625, "rewards/chosen": -0.09376517683267593, "rewards/margins": 0.03099784255027771, "rewards/rejected": -0.12476302683353424, "step": 5049 }, { "epoch": 3.080677138935489, "grad_norm": 1.1822779178619385, "learning_rate": 3.0814451928965093e-06, "log_odds_chosen": 3.0459442138671875, "log_odds_ratio": -0.2040378898382187, "logits/chosen": -0.8817787170410156, "logits/rejected": -0.9883777499198914, "logps/chosen": -0.764522910118103, "logps/rejected": -3.2189879417419434, "loss": 0.9787, "nll_loss": 1.051072120666504, "rewards/accuracies": 1.0, "rewards/chosen": -0.07645229250192642, "rewards/margins": 0.24544650316238403, "rewards/rejected": -0.32189881801605225, "step": 5050 }, { "epoch": 3.081287174012506, "grad_norm": 1.4465075731277466, "learning_rate": 3.080465401102266e-06, "log_odds_chosen": 0.15478003025054932, "log_odds_ratio": -0.658197283744812, "logits/chosen": -1.0154778957366943, "logits/rejected": -0.9677445292472839, "logps/chosen": -0.9376264810562134, "logps/rejected": -1.0681926012039185, "loss": 1.0465, "nll_loss": 1.0189664363861084, "rewards/accuracies": 0.5, "rewards/chosen": -0.09376265108585358, "rewards/margins": 0.013056603260338306, "rewards/rejected": -0.1068192571401596, "step": 5051 }, { "epoch": 3.0818972090895227, "grad_norm": 8.472233772277832, "learning_rate": 3.0794856093080215e-06, "log_odds_chosen": 1.2665178775787354, "log_odds_ratio": -0.7485118508338928, "logits/chosen": -0.8741052746772766, "logits/rejected": -0.9738471508026123, "logps/chosen": -1.1064870357513428, "logps/rejected": -2.0747663974761963, "loss": 1.1296, "nll_loss": 1.2685763835906982, "rewards/accuracies": 0.5, "rewards/chosen": -0.11064870655536652, "rewards/margins": 0.096827931702137, "rewards/rejected": -0.2074766308069229, "step": 5052 }, { "epoch": 3.0825072441665395, "grad_norm": 5.728667736053467, "learning_rate": 3.078505817513778e-06, "log_odds_chosen": 3.203568935394287, "log_odds_ratio": -0.17195925116539001, "logits/chosen": -0.74483722448349, "logits/rejected": -0.7335371971130371, "logps/chosen": -0.6066513061523438, "logps/rejected": -2.916253089904785, "loss": 0.9658, "nll_loss": 0.8786826133728027, "rewards/accuracies": 1.0, "rewards/chosen": -0.06066513434052467, "rewards/margins": 0.2309601902961731, "rewards/rejected": -0.29162532091140747, "step": 5053 }, { "epoch": 3.0831172792435564, "grad_norm": 6.600225925445557, "learning_rate": 3.0775260257195346e-06, "log_odds_chosen": 1.2501938343048096, "log_odds_ratio": -0.4105709195137024, "logits/chosen": -0.8185510039329529, "logits/rejected": -1.034158706665039, "logps/chosen": -0.7804936170578003, "logps/rejected": -1.6584498882293701, "loss": 1.1311, "nll_loss": 1.0484366416931152, "rewards/accuracies": 0.75, "rewards/chosen": -0.07804935425519943, "rewards/margins": 0.08779563754796982, "rewards/rejected": -0.16584499180316925, "step": 5054 }, { "epoch": 3.0837273143205732, "grad_norm": 2.0027847290039062, "learning_rate": 3.0765462339252907e-06, "log_odds_chosen": 2.076998233795166, "log_odds_ratio": -0.39337122440338135, "logits/chosen": -0.923281192779541, "logits/rejected": -0.885924220085144, "logps/chosen": -0.8514391183853149, "logps/rejected": -2.6046085357666016, "loss": 0.9398, "nll_loss": 0.9480834007263184, "rewards/accuracies": 0.875, "rewards/chosen": -0.08514390885829926, "rewards/margins": 0.1753169298171997, "rewards/rejected": -0.26046085357666016, "step": 5055 }, { "epoch": 3.0843373493975905, "grad_norm": 3.262641191482544, "learning_rate": 3.075566442131047e-06, "log_odds_chosen": 1.9281883239746094, "log_odds_ratio": -0.5344812870025635, "logits/chosen": -0.8670839667320251, "logits/rejected": -1.1631934642791748, "logps/chosen": -0.8122028112411499, "logps/rejected": -2.3218798637390137, "loss": 1.0891, "nll_loss": 0.9910035729408264, "rewards/accuracies": 0.625, "rewards/chosen": -0.08122027665376663, "rewards/margins": 0.15096770226955414, "rewards/rejected": -0.23218798637390137, "step": 5056 }, { "epoch": 3.0849473844746074, "grad_norm": 1.6574008464813232, "learning_rate": 3.0745866503368034e-06, "log_odds_chosen": 3.68668532371521, "log_odds_ratio": -0.2746756374835968, "logits/chosen": -0.5992919206619263, "logits/rejected": -0.801547110080719, "logps/chosen": -0.5328525304794312, "logps/rejected": -3.2176196575164795, "loss": 0.9684, "nll_loss": 1.0707597732543945, "rewards/accuracies": 0.75, "rewards/chosen": -0.053285256028175354, "rewards/margins": 0.2684767246246338, "rewards/rejected": -0.32176196575164795, "step": 5057 }, { "epoch": 3.085557419551624, "grad_norm": 2.4837963581085205, "learning_rate": 3.0736068585425595e-06, "log_odds_chosen": 1.6462764739990234, "log_odds_ratio": -0.4198344349861145, "logits/chosen": -0.8173702359199524, "logits/rejected": -0.8734355568885803, "logps/chosen": -1.0559393167495728, "logps/rejected": -2.2722864151000977, "loss": 1.0835, "nll_loss": 1.1418429613113403, "rewards/accuracies": 0.75, "rewards/chosen": -0.10559392720460892, "rewards/margins": 0.12163471430540085, "rewards/rejected": -0.22722864151000977, "step": 5058 }, { "epoch": 3.086167454628641, "grad_norm": 1.2967981100082397, "learning_rate": 3.0726270667483157e-06, "log_odds_chosen": 1.3169219493865967, "log_odds_ratio": -0.4784654378890991, "logits/chosen": -0.704279899597168, "logits/rejected": -0.7358739376068115, "logps/chosen": -0.5733767747879028, "logps/rejected": -1.49592924118042, "loss": 1.0496, "nll_loss": 0.869128406047821, "rewards/accuracies": 0.625, "rewards/chosen": -0.057337686419487, "rewards/margins": 0.09225523471832275, "rewards/rejected": -0.14959292113780975, "step": 5059 }, { "epoch": 3.086777489705658, "grad_norm": 13.62529182434082, "learning_rate": 3.071647274954072e-06, "log_odds_chosen": 1.0681507587432861, "log_odds_ratio": -0.39149558544158936, "logits/chosen": -0.7385997772216797, "logits/rejected": -0.5698907375335693, "logps/chosen": -0.7094893455505371, "logps/rejected": -1.3558719158172607, "loss": 1.0963, "nll_loss": 0.7639282941818237, "rewards/accuracies": 0.875, "rewards/chosen": -0.07094893604516983, "rewards/margins": 0.06463824212551117, "rewards/rejected": -0.1355871856212616, "step": 5060 }, { "epoch": 3.087387524782675, "grad_norm": 2.8874518871307373, "learning_rate": 3.0706674831598283e-06, "log_odds_chosen": 3.3176493644714355, "log_odds_ratio": -0.12210194766521454, "logits/chosen": -0.6451645493507385, "logits/rejected": -0.6076580286026001, "logps/chosen": -0.6884219646453857, "logps/rejected": -3.387115478515625, "loss": 1.003, "nll_loss": 0.9053457975387573, "rewards/accuracies": 1.0, "rewards/chosen": -0.06884220242500305, "rewards/margins": 0.2698693573474884, "rewards/rejected": -0.33871155977249146, "step": 5061 }, { "epoch": 3.087997559859692, "grad_norm": 1.9284974336624146, "learning_rate": 3.069687691365585e-06, "log_odds_chosen": 4.190817832946777, "log_odds_ratio": -0.24319933354854584, "logits/chosen": -0.6441627740859985, "logits/rejected": -0.9656357765197754, "logps/chosen": -0.6726329326629639, "logps/rejected": -4.200684547424316, "loss": 1.0087, "nll_loss": 0.7698261737823486, "rewards/accuracies": 0.875, "rewards/chosen": -0.06726330518722534, "rewards/margins": 0.35280513763427734, "rewards/rejected": -0.4200684428215027, "step": 5062 }, { "epoch": 3.088607594936709, "grad_norm": 1.5525469779968262, "learning_rate": 3.068707899571341e-06, "log_odds_chosen": 1.056462049484253, "log_odds_ratio": -0.3519328534603119, "logits/chosen": -0.9620053172111511, "logits/rejected": -0.7883215546607971, "logps/chosen": -0.6548969745635986, "logps/rejected": -1.336272120475769, "loss": 1.1655, "nll_loss": 0.9087266325950623, "rewards/accuracies": 0.875, "rewards/chosen": -0.06548970937728882, "rewards/margins": 0.06813749670982361, "rewards/rejected": -0.13362720608711243, "step": 5063 }, { "epoch": 3.0892176300137257, "grad_norm": 4.872971057891846, "learning_rate": 3.067728107777097e-06, "log_odds_chosen": 2.1947708129882812, "log_odds_ratio": -0.32799726724624634, "logits/chosen": -0.9011232852935791, "logits/rejected": -1.0000779628753662, "logps/chosen": -0.814491868019104, "logps/rejected": -2.6032919883728027, "loss": 1.0132, "nll_loss": 1.0630903244018555, "rewards/accuracies": 1.0, "rewards/chosen": -0.08144918829202652, "rewards/margins": 0.17888003587722778, "rewards/rejected": -0.2603292167186737, "step": 5064 }, { "epoch": 3.0898276650907426, "grad_norm": 1.6425001621246338, "learning_rate": 3.0667483159828537e-06, "log_odds_chosen": 2.1198484897613525, "log_odds_ratio": -0.3649917244911194, "logits/chosen": -0.8341246843338013, "logits/rejected": -1.0503325462341309, "logps/chosen": -0.7455154061317444, "logps/rejected": -2.387916326522827, "loss": 1.0547, "nll_loss": 0.8839727640151978, "rewards/accuracies": 0.75, "rewards/chosen": -0.0745515450835228, "rewards/margins": 0.16424010694026947, "rewards/rejected": -0.23879164457321167, "step": 5065 }, { "epoch": 3.09043770016776, "grad_norm": 2.5090415477752686, "learning_rate": 3.0657685241886098e-06, "log_odds_chosen": 0.6494825482368469, "log_odds_ratio": -0.8297072649002075, "logits/chosen": -1.0508091449737549, "logits/rejected": -1.1271135807037354, "logps/chosen": -1.211146593093872, "logps/rejected": -1.681291937828064, "loss": 1.1399, "nll_loss": 1.2516939640045166, "rewards/accuracies": 0.5, "rewards/chosen": -0.12111466377973557, "rewards/margins": 0.04701453819870949, "rewards/rejected": -0.16812920570373535, "step": 5066 }, { "epoch": 3.0910477352447767, "grad_norm": 2.0140984058380127, "learning_rate": 3.064788732394366e-06, "log_odds_chosen": 3.3960063457489014, "log_odds_ratio": -0.19167804718017578, "logits/chosen": -0.6078466773033142, "logits/rejected": -0.6413880586624146, "logps/chosen": -0.5112602710723877, "logps/rejected": -3.1764109134674072, "loss": 1.1344, "nll_loss": 0.6530513167381287, "rewards/accuracies": 1.0, "rewards/chosen": -0.05112603306770325, "rewards/margins": 0.2665150761604309, "rewards/rejected": -0.31764107942581177, "step": 5067 }, { "epoch": 3.0916577703217936, "grad_norm": 1.9681767225265503, "learning_rate": 3.0638089406001224e-06, "log_odds_chosen": 2.124521255493164, "log_odds_ratio": -0.4960330128669739, "logits/chosen": -0.9367611408233643, "logits/rejected": -1.0965940952301025, "logps/chosen": -0.8730366826057434, "logps/rejected": -2.6260461807250977, "loss": 1.0849, "nll_loss": 1.0322542190551758, "rewards/accuracies": 0.5, "rewards/chosen": -0.08730366826057434, "rewards/margins": 0.1753009408712387, "rewards/rejected": -0.26260462403297424, "step": 5068 }, { "epoch": 3.0922678053988104, "grad_norm": 1.4134056568145752, "learning_rate": 3.0628291488058786e-06, "log_odds_chosen": 1.602292537689209, "log_odds_ratio": -0.6135996580123901, "logits/chosen": -0.774796724319458, "logits/rejected": -0.8383324146270752, "logps/chosen": -0.8866123557090759, "logps/rejected": -2.0999650955200195, "loss": 1.0213, "nll_loss": 1.086984395980835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08866123855113983, "rewards/margins": 0.12133529037237167, "rewards/rejected": -0.2099965214729309, "step": 5069 }, { "epoch": 3.0928778404758273, "grad_norm": 3.7349281311035156, "learning_rate": 3.0618493570116347e-06, "log_odds_chosen": 2.324662923812866, "log_odds_ratio": -0.2733142077922821, "logits/chosen": -0.6366047263145447, "logits/rejected": -0.7979249954223633, "logps/chosen": -0.5923905372619629, "logps/rejected": -2.302267551422119, "loss": 0.9324, "nll_loss": 0.8682342767715454, "rewards/accuracies": 0.875, "rewards/chosen": -0.05923905596137047, "rewards/margins": 0.17098772525787354, "rewards/rejected": -0.2302267700433731, "step": 5070 }, { "epoch": 3.093487875552844, "grad_norm": 1.413948893547058, "learning_rate": 3.0608695652173912e-06, "log_odds_chosen": 2.5677571296691895, "log_odds_ratio": -0.386143296957016, "logits/chosen": -0.7613853812217712, "logits/rejected": -0.9967575073242188, "logps/chosen": -0.706479549407959, "logps/rejected": -2.742497682571411, "loss": 0.9206, "nll_loss": 0.8194441199302673, "rewards/accuracies": 0.75, "rewards/chosen": -0.0706479549407959, "rewards/margins": 0.20360180735588074, "rewards/rejected": -0.274249792098999, "step": 5071 }, { "epoch": 3.0940979106298614, "grad_norm": 2.1594746112823486, "learning_rate": 3.0598897734231474e-06, "log_odds_chosen": 1.1121855974197388, "log_odds_ratio": -0.4848896265029907, "logits/chosen": -0.8242749571800232, "logits/rejected": -0.732287585735321, "logps/chosen": -0.9410380721092224, "logps/rejected": -1.745375156402588, "loss": 1.1503, "nll_loss": 1.1851365566253662, "rewards/accuracies": 0.75, "rewards/chosen": -0.09410380572080612, "rewards/margins": 0.08043372631072998, "rewards/rejected": -0.1745375245809555, "step": 5072 }, { "epoch": 3.0947079457068782, "grad_norm": 1.0432888269424438, "learning_rate": 3.0589099816289035e-06, "log_odds_chosen": 2.5102810859680176, "log_odds_ratio": -0.3053060472011566, "logits/chosen": -0.9473860263824463, "logits/rejected": -0.9186074733734131, "logps/chosen": -0.6197341680526733, "logps/rejected": -2.245283603668213, "loss": 0.9906, "nll_loss": 0.8658828139305115, "rewards/accuracies": 0.75, "rewards/chosen": -0.06197342276573181, "rewards/margins": 0.16255493462085724, "rewards/rejected": -0.22452835738658905, "step": 5073 }, { "epoch": 3.095317980783895, "grad_norm": 1.3091603517532349, "learning_rate": 3.05793018983466e-06, "log_odds_chosen": 2.795703649520874, "log_odds_ratio": -0.33479273319244385, "logits/chosen": -0.7464160919189453, "logits/rejected": -0.9067438840866089, "logps/chosen": -0.671588659286499, "logps/rejected": -2.7415637969970703, "loss": 1.0398, "nll_loss": 0.8801860809326172, "rewards/accuracies": 0.625, "rewards/chosen": -0.06715887039899826, "rewards/margins": 0.20699752867221832, "rewards/rejected": -0.274156391620636, "step": 5074 }, { "epoch": 3.095928015860912, "grad_norm": 1.8400936126708984, "learning_rate": 3.056950398040416e-06, "log_odds_chosen": 0.4511687457561493, "log_odds_ratio": -0.5957619547843933, "logits/chosen": -1.0465562343597412, "logits/rejected": -0.9547059535980225, "logps/chosen": -0.9579454660415649, "logps/rejected": -1.311826467514038, "loss": 1.1371, "nll_loss": 1.0737779140472412, "rewards/accuracies": 0.5, "rewards/chosen": -0.09579454362392426, "rewards/margins": 0.03538810461759567, "rewards/rejected": -0.13118265569210052, "step": 5075 }, { "epoch": 3.0965380509379288, "grad_norm": 1.178949236869812, "learning_rate": 3.0559706062461727e-06, "log_odds_chosen": 3.3925042152404785, "log_odds_ratio": -0.2608066201210022, "logits/chosen": -0.5988292098045349, "logits/rejected": -0.7695165276527405, "logps/chosen": -0.5045552253723145, "logps/rejected": -3.1408114433288574, "loss": 0.9012, "nll_loss": 0.7441425323486328, "rewards/accuracies": 0.875, "rewards/chosen": -0.050455521792173386, "rewards/margins": 0.2636256217956543, "rewards/rejected": -0.3140811324119568, "step": 5076 }, { "epoch": 3.097148086014946, "grad_norm": 1.7928365468978882, "learning_rate": 3.054990814451929e-06, "log_odds_chosen": 2.7365119457244873, "log_odds_ratio": -0.3627958595752716, "logits/chosen": -0.5362926721572876, "logits/rejected": -0.7390307188034058, "logps/chosen": -0.6942285895347595, "logps/rejected": -3.0136475563049316, "loss": 1.0227, "nll_loss": 0.7914987802505493, "rewards/accuracies": 0.75, "rewards/chosen": -0.06942285597324371, "rewards/margins": 0.23194190859794617, "rewards/rejected": -0.3013647198677063, "step": 5077 }, { "epoch": 3.097758121091963, "grad_norm": 4.174391269683838, "learning_rate": 3.054011022657685e-06, "log_odds_chosen": 1.8248984813690186, "log_odds_ratio": -0.34308040142059326, "logits/chosen": -0.8186473846435547, "logits/rejected": -1.0619629621505737, "logps/chosen": -0.7493066787719727, "logps/rejected": -2.208709478378296, "loss": 1.0009, "nll_loss": 0.8765891790390015, "rewards/accuracies": 0.75, "rewards/chosen": -0.07493066787719727, "rewards/margins": 0.14594027400016785, "rewards/rejected": -0.2208709418773651, "step": 5078 }, { "epoch": 3.0983681561689798, "grad_norm": 1.3552207946777344, "learning_rate": 3.0530312308634415e-06, "log_odds_chosen": 2.4926795959472656, "log_odds_ratio": -0.19755896925926208, "logits/chosen": -0.7703432440757751, "logits/rejected": -0.9182677865028381, "logps/chosen": -0.6036447882652283, "logps/rejected": -2.4299840927124023, "loss": 0.9624, "nll_loss": 0.7215560674667358, "rewards/accuracies": 0.875, "rewards/chosen": -0.06036447733640671, "rewards/margins": 0.1826339215040207, "rewards/rejected": -0.2429983913898468, "step": 5079 }, { "epoch": 3.0989781912459966, "grad_norm": 1.0629299879074097, "learning_rate": 3.0520514390691976e-06, "log_odds_chosen": 1.3832069635391235, "log_odds_ratio": -0.3860763907432556, "logits/chosen": -0.6848229169845581, "logits/rejected": -0.7595943808555603, "logps/chosen": -0.8856396675109863, "logps/rejected": -1.9517629146575928, "loss": 1.0866, "nll_loss": 1.023511290550232, "rewards/accuracies": 0.875, "rewards/chosen": -0.08856397122144699, "rewards/margins": 0.10661231726408005, "rewards/rejected": -0.19517628848552704, "step": 5080 }, { "epoch": 3.0995882263230135, "grad_norm": 1.3754099607467651, "learning_rate": 3.0510716472749537e-06, "log_odds_chosen": 1.100940465927124, "log_odds_ratio": -0.548202395439148, "logits/chosen": -0.6210988759994507, "logits/rejected": -0.6871371269226074, "logps/chosen": -0.6927694082260132, "logps/rejected": -1.4557404518127441, "loss": 0.9939, "nll_loss": 0.9229635000228882, "rewards/accuracies": 0.625, "rewards/chosen": -0.06927694380283356, "rewards/margins": 0.0762971043586731, "rewards/rejected": -0.14557406306266785, "step": 5081 }, { "epoch": 3.1001982614000303, "grad_norm": 3.1629414558410645, "learning_rate": 3.0500918554807103e-06, "log_odds_chosen": 2.092048168182373, "log_odds_ratio": -0.3085451126098633, "logits/chosen": -0.7554140090942383, "logits/rejected": -0.8467750549316406, "logps/chosen": -0.7851930260658264, "logps/rejected": -2.3261284828186035, "loss": 1.1575, "nll_loss": 0.9370446801185608, "rewards/accuracies": 0.875, "rewards/chosen": -0.078519307076931, "rewards/margins": 0.15409354865550995, "rewards/rejected": -0.23261284828186035, "step": 5082 }, { "epoch": 3.1008082964770476, "grad_norm": 1.280292272567749, "learning_rate": 3.049112063686467e-06, "log_odds_chosen": 2.3844268321990967, "log_odds_ratio": -0.22551634907722473, "logits/chosen": -0.5337933301925659, "logits/rejected": -0.6847100257873535, "logps/chosen": -0.42461344599723816, "logps/rejected": -1.9349359273910522, "loss": 0.9419, "nll_loss": 0.6103715896606445, "rewards/accuracies": 1.0, "rewards/chosen": -0.04246135056018829, "rewards/margins": 0.1510322540998459, "rewards/rejected": -0.19349361956119537, "step": 5083 }, { "epoch": 3.1014183315540644, "grad_norm": 1.728039264678955, "learning_rate": 3.0481322718922225e-06, "log_odds_chosen": 2.6000750064849854, "log_odds_ratio": -0.4113786816596985, "logits/chosen": -0.6809301972389221, "logits/rejected": -0.6822388172149658, "logps/chosen": -0.7345830798149109, "logps/rejected": -2.9530415534973145, "loss": 1.026, "nll_loss": 0.8381963968276978, "rewards/accuracies": 0.75, "rewards/chosen": -0.07345831394195557, "rewards/margins": 0.2218458503484726, "rewards/rejected": -0.29530417919158936, "step": 5084 }, { "epoch": 3.1020283666310813, "grad_norm": 1.3325003385543823, "learning_rate": 3.047152480097979e-06, "log_odds_chosen": 1.8871936798095703, "log_odds_ratio": -0.4786628186702728, "logits/chosen": -0.7669770121574402, "logits/rejected": -0.8534095883369446, "logps/chosen": -0.8164882063865662, "logps/rejected": -2.258718729019165, "loss": 1.243, "nll_loss": 1.0933082103729248, "rewards/accuracies": 0.625, "rewards/chosen": -0.0816488265991211, "rewards/margins": 0.14422303438186646, "rewards/rejected": -0.22587186098098755, "step": 5085 }, { "epoch": 3.102638401708098, "grad_norm": 1.247584342956543, "learning_rate": 3.0461726883037356e-06, "log_odds_chosen": 1.8264563083648682, "log_odds_ratio": -0.2797894775867462, "logits/chosen": -0.7269372940063477, "logits/rejected": -0.6893735527992249, "logps/chosen": -0.5495851039886475, "logps/rejected": -1.7670682668685913, "loss": 0.9152, "nll_loss": 0.7425586581230164, "rewards/accuracies": 1.0, "rewards/chosen": -0.054958511143922806, "rewards/margins": 0.12174832075834274, "rewards/rejected": -0.17670683562755585, "step": 5086 }, { "epoch": 3.103248436785115, "grad_norm": 9.334939002990723, "learning_rate": 3.0451928965094913e-06, "log_odds_chosen": 2.5696654319763184, "log_odds_ratio": -0.4093623757362366, "logits/chosen": -1.0390335321426392, "logits/rejected": -1.0927952527999878, "logps/chosen": -1.0438437461853027, "logps/rejected": -3.452877998352051, "loss": 0.9858, "nll_loss": 1.247730016708374, "rewards/accuracies": 0.625, "rewards/chosen": -0.1043843924999237, "rewards/margins": 0.24090343713760376, "rewards/rejected": -0.3452877998352051, "step": 5087 }, { "epoch": 3.1038584718621323, "grad_norm": 2.164463520050049, "learning_rate": 3.044213104715248e-06, "log_odds_chosen": 3.042813777923584, "log_odds_ratio": -0.29757067561149597, "logits/chosen": -0.7506632804870605, "logits/rejected": -0.8724937438964844, "logps/chosen": -0.6236224174499512, "logps/rejected": -3.0226402282714844, "loss": 1.0873, "nll_loss": 0.881216824054718, "rewards/accuracies": 0.75, "rewards/chosen": -0.062362246215343475, "rewards/margins": 0.23990179598331451, "rewards/rejected": -0.3022640347480774, "step": 5088 }, { "epoch": 3.104468506939149, "grad_norm": 5.32657527923584, "learning_rate": 3.043233312921004e-06, "log_odds_chosen": 3.459040641784668, "log_odds_ratio": -0.32847267389297485, "logits/chosen": -0.944305956363678, "logits/rejected": -1.0451358556747437, "logps/chosen": -0.7149814963340759, "logps/rejected": -3.4671757221221924, "loss": 1.0695, "nll_loss": 0.8750016689300537, "rewards/accuracies": 0.75, "rewards/chosen": -0.07149815559387207, "rewards/margins": 0.2752194404602051, "rewards/rejected": -0.34671759605407715, "step": 5089 }, { "epoch": 3.105078542016166, "grad_norm": 1.87451171875, "learning_rate": 3.0422535211267605e-06, "log_odds_chosen": 1.6580593585968018, "log_odds_ratio": -0.4006965756416321, "logits/chosen": -0.7624974846839905, "logits/rejected": -0.9726693034172058, "logps/chosen": -0.7430541515350342, "logps/rejected": -2.0122437477111816, "loss": 1.049, "nll_loss": 1.0949106216430664, "rewards/accuracies": 0.75, "rewards/chosen": -0.07430541515350342, "rewards/margins": 0.1269189566373825, "rewards/rejected": -0.20122437179088593, "step": 5090 }, { "epoch": 3.105688577093183, "grad_norm": 1.6564006805419922, "learning_rate": 3.0412737293325166e-06, "log_odds_chosen": 2.1608684062957764, "log_odds_ratio": -0.44164490699768066, "logits/chosen": -0.9145231246948242, "logits/rejected": -1.004833459854126, "logps/chosen": -0.6891407370567322, "logps/rejected": -2.143089771270752, "loss": 1.0457, "nll_loss": 0.9239751100540161, "rewards/accuracies": 0.625, "rewards/chosen": -0.06891407072544098, "rewards/margins": 0.14539489150047302, "rewards/rejected": -0.2143089920282364, "step": 5091 }, { "epoch": 3.1062986121701996, "grad_norm": 1.9837257862091064, "learning_rate": 3.0402939375382728e-06, "log_odds_chosen": 0.5980833172798157, "log_odds_ratio": -0.575246274471283, "logits/chosen": -1.1314409971237183, "logits/rejected": -1.1335703134536743, "logps/chosen": -0.92948979139328, "logps/rejected": -1.3504348993301392, "loss": 1.102, "nll_loss": 1.4107379913330078, "rewards/accuracies": 0.5, "rewards/chosen": -0.09294898062944412, "rewards/margins": 0.04209449887275696, "rewards/rejected": -0.13504348695278168, "step": 5092 }, { "epoch": 3.1069086472472165, "grad_norm": 8.267404556274414, "learning_rate": 3.0393141457440293e-06, "log_odds_chosen": 1.0481116771697998, "log_odds_ratio": -0.47789686918258667, "logits/chosen": -0.6348435282707214, "logits/rejected": -0.863659143447876, "logps/chosen": -0.945125937461853, "logps/rejected": -1.515188217163086, "loss": 1.1952, "nll_loss": 1.1829400062561035, "rewards/accuracies": 0.625, "rewards/chosen": -0.09451259672641754, "rewards/margins": 0.05700623616576195, "rewards/rejected": -0.1515188217163086, "step": 5093 }, { "epoch": 3.107518682324234, "grad_norm": 2.9783711433410645, "learning_rate": 3.038334353949786e-06, "log_odds_chosen": 1.5360921621322632, "log_odds_ratio": -0.445618212223053, "logits/chosen": -0.672008752822876, "logits/rejected": -0.8251999616622925, "logps/chosen": -0.7461434602737427, "logps/rejected": -1.8965893983840942, "loss": 1.0911, "nll_loss": 1.0639785528182983, "rewards/accuracies": 0.75, "rewards/chosen": -0.07461433857679367, "rewards/margins": 0.11504460126161575, "rewards/rejected": -0.18965893983840942, "step": 5094 }, { "epoch": 3.1081287174012506, "grad_norm": 2.8685312271118164, "learning_rate": 3.0373545621555416e-06, "log_odds_chosen": 1.9736658334732056, "log_odds_ratio": -0.36873558163642883, "logits/chosen": -1.0121897459030151, "logits/rejected": -0.8667607307434082, "logps/chosen": -0.7156100273132324, "logps/rejected": -1.9111944437026978, "loss": 1.0331, "nll_loss": 1.1246576309204102, "rewards/accuracies": 0.75, "rewards/chosen": -0.07156100124120712, "rewards/margins": 0.1195584386587143, "rewards/rejected": -0.19111944735050201, "step": 5095 }, { "epoch": 3.1087387524782675, "grad_norm": 1.318132996559143, "learning_rate": 3.036374770361298e-06, "log_odds_chosen": 0.8738923072814941, "log_odds_ratio": -0.6045516729354858, "logits/chosen": -0.8772634267807007, "logits/rejected": -1.0240967273712158, "logps/chosen": -0.9181817173957825, "logps/rejected": -1.5156831741333008, "loss": 0.9893, "nll_loss": 1.1751480102539062, "rewards/accuracies": 0.375, "rewards/chosen": -0.09181816875934601, "rewards/margins": 0.059750158339738846, "rewards/rejected": -0.15156832337379456, "step": 5096 }, { "epoch": 3.1093487875552843, "grad_norm": 1.4816560745239258, "learning_rate": 3.0353949785670547e-06, "log_odds_chosen": 1.174790382385254, "log_odds_ratio": -0.6327708959579468, "logits/chosen": -0.7538289427757263, "logits/rejected": -0.8322831392288208, "logps/chosen": -0.904703676700592, "logps/rejected": -1.8471570014953613, "loss": 0.9515, "nll_loss": 1.1252188682556152, "rewards/accuracies": 0.625, "rewards/chosen": -0.09047037363052368, "rewards/margins": 0.09424534440040588, "rewards/rejected": -0.18471571803092957, "step": 5097 }, { "epoch": 3.109958822632301, "grad_norm": 1.5183770656585693, "learning_rate": 3.0344151867728104e-06, "log_odds_chosen": 1.9010488986968994, "log_odds_ratio": -0.48808690905570984, "logits/chosen": -0.9870692491531372, "logits/rejected": -1.1543464660644531, "logps/chosen": -0.6784521341323853, "logps/rejected": -2.2909741401672363, "loss": 1.1782, "nll_loss": 1.3020455837249756, "rewards/accuracies": 0.625, "rewards/chosen": -0.06784521788358688, "rewards/margins": 0.1612521857023239, "rewards/rejected": -0.2290973961353302, "step": 5098 }, { "epoch": 3.1105688577093185, "grad_norm": 6.945102691650391, "learning_rate": 3.033435394978567e-06, "log_odds_chosen": 1.392661213874817, "log_odds_ratio": -0.40666380524635315, "logits/chosen": -0.6508589386940002, "logits/rejected": -0.7530546188354492, "logps/chosen": -0.7539491653442383, "logps/rejected": -1.8343931436538696, "loss": 1.0349, "nll_loss": 0.8837084174156189, "rewards/accuracies": 0.75, "rewards/chosen": -0.07539492100477219, "rewards/margins": 0.10804440081119537, "rewards/rejected": -0.18343931436538696, "step": 5099 }, { "epoch": 3.1111788927863353, "grad_norm": 1.450015664100647, "learning_rate": 3.0324556031843234e-06, "log_odds_chosen": 1.7013764381408691, "log_odds_ratio": -0.4672665297985077, "logits/chosen": -0.8360433578491211, "logits/rejected": -0.8854487538337708, "logps/chosen": -0.8570458889007568, "logps/rejected": -1.9553288221359253, "loss": 1.0505, "nll_loss": 0.9271873235702515, "rewards/accuracies": 0.75, "rewards/chosen": -0.08570458739995956, "rewards/margins": 0.10982829332351685, "rewards/rejected": -0.195532888174057, "step": 5100 }, { "epoch": 3.111788927863352, "grad_norm": 2.956646203994751, "learning_rate": 3.031475811390079e-06, "log_odds_chosen": 1.3860266208648682, "log_odds_ratio": -0.43261921405792236, "logits/chosen": -0.8970502614974976, "logits/rejected": -0.9532362818717957, "logps/chosen": -0.7431718707084656, "logps/rejected": -1.7965152263641357, "loss": 1.2088, "nll_loss": 1.2292885780334473, "rewards/accuracies": 0.75, "rewards/chosen": -0.07431718707084656, "rewards/margins": 0.1053343191742897, "rewards/rejected": -0.17965151369571686, "step": 5101 }, { "epoch": 3.112398962940369, "grad_norm": 1.947547435760498, "learning_rate": 3.0304960195958357e-06, "log_odds_chosen": 3.0567054748535156, "log_odds_ratio": -0.4187115430831909, "logits/chosen": -0.7920055389404297, "logits/rejected": -0.9738267660140991, "logps/chosen": -0.6982634663581848, "logps/rejected": -3.2930784225463867, "loss": 1.1666, "nll_loss": 0.9095696806907654, "rewards/accuracies": 0.625, "rewards/chosen": -0.06982634961605072, "rewards/margins": 0.2594814896583557, "rewards/rejected": -0.3293078541755676, "step": 5102 }, { "epoch": 3.113008998017386, "grad_norm": 2.136018991470337, "learning_rate": 3.0295162278015922e-06, "log_odds_chosen": 2.5697875022888184, "log_odds_ratio": -0.35972315073013306, "logits/chosen": -0.7028923034667969, "logits/rejected": -0.9513263702392578, "logps/chosen": -0.9540582895278931, "logps/rejected": -3.0866920948028564, "loss": 1.1261, "nll_loss": 0.8720755577087402, "rewards/accuracies": 0.875, "rewards/chosen": -0.09540582448244095, "rewards/margins": 0.2132634073495865, "rewards/rejected": -0.30866923928260803, "step": 5103 }, { "epoch": 3.113619033094403, "grad_norm": 1.4163695573806763, "learning_rate": 3.0285364360073484e-06, "log_odds_chosen": 2.9241414070129395, "log_odds_ratio": -0.31026196479797363, "logits/chosen": -0.8356508016586304, "logits/rejected": -0.8997957110404968, "logps/chosen": -0.6597015261650085, "logps/rejected": -2.8668107986450195, "loss": 0.9899, "nll_loss": 0.8472966551780701, "rewards/accuracies": 0.75, "rewards/chosen": -0.06597016006708145, "rewards/margins": 0.22071093320846558, "rewards/rejected": -0.2866811156272888, "step": 5104 }, { "epoch": 3.11422906817142, "grad_norm": 1.4893800020217896, "learning_rate": 3.0275566442131045e-06, "log_odds_chosen": 2.36194109916687, "log_odds_ratio": -0.4477502107620239, "logits/chosen": -0.7752140760421753, "logits/rejected": -0.8681477904319763, "logps/chosen": -0.6552597880363464, "logps/rejected": -2.562161445617676, "loss": 0.9956, "nll_loss": 0.8206685781478882, "rewards/accuracies": 0.75, "rewards/chosen": -0.06552597880363464, "rewards/margins": 0.19069015979766846, "rewards/rejected": -0.2562161386013031, "step": 5105 }, { "epoch": 3.114839103248437, "grad_norm": 2.1739583015441895, "learning_rate": 3.026576852418861e-06, "log_odds_chosen": 2.6958465576171875, "log_odds_ratio": -0.29626378417015076, "logits/chosen": -0.6879646182060242, "logits/rejected": -0.9912704229354858, "logps/chosen": -0.8664794564247131, "logps/rejected": -3.102936267852783, "loss": 0.8781, "nll_loss": 0.9111055135726929, "rewards/accuracies": 0.75, "rewards/chosen": -0.08664794266223907, "rewards/margins": 0.22364571690559387, "rewards/rejected": -0.31029367446899414, "step": 5106 }, { "epoch": 3.1154491383254537, "grad_norm": 1.2378785610198975, "learning_rate": 3.025597060624617e-06, "log_odds_chosen": 2.1645843982696533, "log_odds_ratio": -0.29136592149734497, "logits/chosen": -0.8000178933143616, "logits/rejected": -0.8958705067634583, "logps/chosen": -0.7700140476226807, "logps/rejected": -2.463191032409668, "loss": 0.9435, "nll_loss": 0.8227252960205078, "rewards/accuracies": 0.875, "rewards/chosen": -0.0770014077425003, "rewards/margins": 0.16931770741939545, "rewards/rejected": -0.24631911516189575, "step": 5107 }, { "epoch": 3.1160591734024705, "grad_norm": 1.7606157064437866, "learning_rate": 3.0246172688303737e-06, "log_odds_chosen": 2.3180198669433594, "log_odds_ratio": -0.2007269561290741, "logits/chosen": -0.9022965431213379, "logits/rejected": -0.8401433825492859, "logps/chosen": -0.6270990371704102, "logps/rejected": -2.3079893589019775, "loss": 0.8749, "nll_loss": 0.7527830600738525, "rewards/accuracies": 1.0, "rewards/chosen": -0.06270990520715714, "rewards/margins": 0.16808903217315674, "rewards/rejected": -0.23079892992973328, "step": 5108 }, { "epoch": 3.1166692084794874, "grad_norm": 4.759308815002441, "learning_rate": 3.0236374770361294e-06, "log_odds_chosen": 0.26861077547073364, "log_odds_ratio": -0.6874147653579712, "logits/chosen": -1.064910650253296, "logits/rejected": -1.1219720840454102, "logps/chosen": -1.119523525238037, "logps/rejected": -1.366792917251587, "loss": 1.262, "nll_loss": 1.3543965816497803, "rewards/accuracies": 0.5, "rewards/chosen": -0.11195236444473267, "rewards/margins": 0.02472694404423237, "rewards/rejected": -0.13667930662631989, "step": 5109 }, { "epoch": 3.1172792435565047, "grad_norm": 1.485750436782837, "learning_rate": 3.022657685241886e-06, "log_odds_chosen": -0.06772930920124054, "log_odds_ratio": -0.7380803227424622, "logits/chosen": -1.0465245246887207, "logits/rejected": -0.9595943689346313, "logps/chosen": -0.7854699492454529, "logps/rejected": -0.7386096715927124, "loss": 1.2191, "nll_loss": 0.8904505968093872, "rewards/accuracies": 0.625, "rewards/chosen": -0.07854699343442917, "rewards/margins": -0.004686023574322462, "rewards/rejected": -0.07386097311973572, "step": 5110 }, { "epoch": 3.1178892786335215, "grad_norm": 1.3432680368423462, "learning_rate": 3.0216778934476425e-06, "log_odds_chosen": 1.211213231086731, "log_odds_ratio": -0.3947939872741699, "logits/chosen": -0.7498316168785095, "logits/rejected": -0.8633997440338135, "logps/chosen": -0.6072479486465454, "logps/rejected": -1.4135499000549316, "loss": 0.8801, "nll_loss": 0.8856425285339355, "rewards/accuracies": 0.75, "rewards/chosen": -0.06072479486465454, "rewards/margins": 0.08063020557165146, "rewards/rejected": -0.1413550078868866, "step": 5111 }, { "epoch": 3.1184993137105383, "grad_norm": 1.1351196765899658, "learning_rate": 3.020698101653398e-06, "log_odds_chosen": 3.5432515144348145, "log_odds_ratio": -0.31413760781288147, "logits/chosen": -0.9694756269454956, "logits/rejected": -0.9544309377670288, "logps/chosen": -0.8082435727119446, "logps/rejected": -3.8649446964263916, "loss": 1.0199, "nll_loss": 0.9575474262237549, "rewards/accuracies": 0.875, "rewards/chosen": -0.0808243602514267, "rewards/margins": 0.3056701123714447, "rewards/rejected": -0.3864944577217102, "step": 5112 }, { "epoch": 3.119109348787555, "grad_norm": 1.0888957977294922, "learning_rate": 3.0197183098591547e-06, "log_odds_chosen": 0.8485894203186035, "log_odds_ratio": -0.5813930034637451, "logits/chosen": -0.8981796503067017, "logits/rejected": -0.9415454268455505, "logps/chosen": -0.7593032121658325, "logps/rejected": -1.2774019241333008, "loss": 1.0536, "nll_loss": 0.8983508348464966, "rewards/accuracies": 0.5, "rewards/chosen": -0.07593032717704773, "rewards/margins": 0.051809877157211304, "rewards/rejected": -0.12774018943309784, "step": 5113 }, { "epoch": 3.119719383864572, "grad_norm": 2.337172269821167, "learning_rate": 3.0187385180649113e-06, "log_odds_chosen": 1.3602886199951172, "log_odds_ratio": -0.50443035364151, "logits/chosen": -0.8631823062896729, "logits/rejected": -1.0362635850906372, "logps/chosen": -0.9316712021827698, "logps/rejected": -1.9227399826049805, "loss": 1.1548, "nll_loss": 1.2395724058151245, "rewards/accuracies": 0.625, "rewards/chosen": -0.09316712617874146, "rewards/margins": 0.09910689294338226, "rewards/rejected": -0.19227401912212372, "step": 5114 }, { "epoch": 3.1203294189415893, "grad_norm": 1.8242459297180176, "learning_rate": 3.0177587262706674e-06, "log_odds_chosen": 1.3374755382537842, "log_odds_ratio": -0.5321460962295532, "logits/chosen": -0.8488272428512573, "logits/rejected": -1.0678249597549438, "logps/chosen": -0.9883109331130981, "logps/rejected": -2.0600743293762207, "loss": 1.2441, "nll_loss": 1.3011566400527954, "rewards/accuracies": 0.625, "rewards/chosen": -0.09883110225200653, "rewards/margins": 0.10717633366584778, "rewards/rejected": -0.2060074359178543, "step": 5115 }, { "epoch": 3.120939454018606, "grad_norm": 1.0585664510726929, "learning_rate": 3.0167789344764235e-06, "log_odds_chosen": 1.8062591552734375, "log_odds_ratio": -0.3448241353034973, "logits/chosen": -0.7764334082603455, "logits/rejected": -0.8903437852859497, "logps/chosen": -0.8801350593566895, "logps/rejected": -2.3246288299560547, "loss": 1.1896, "nll_loss": 0.9936448335647583, "rewards/accuracies": 0.875, "rewards/chosen": -0.08801349997520447, "rewards/margins": 0.144449383020401, "rewards/rejected": -0.23246288299560547, "step": 5116 }, { "epoch": 3.121549489095623, "grad_norm": 2.716953992843628, "learning_rate": 3.01579914268218e-06, "log_odds_chosen": 2.357067584991455, "log_odds_ratio": -0.4118598401546478, "logits/chosen": -0.9023452401161194, "logits/rejected": -1.0253452062606812, "logps/chosen": -0.731583833694458, "logps/rejected": -2.4567384719848633, "loss": 1.0166, "nll_loss": 0.914818525314331, "rewards/accuracies": 0.75, "rewards/chosen": -0.0731583833694458, "rewards/margins": 0.17251545190811157, "rewards/rejected": -0.24567383527755737, "step": 5117 }, { "epoch": 3.12215952417264, "grad_norm": 8.78939437866211, "learning_rate": 3.014819350887936e-06, "log_odds_chosen": 1.3831837177276611, "log_odds_ratio": -0.4515923261642456, "logits/chosen": -0.886603593826294, "logits/rejected": -0.9577159881591797, "logps/chosen": -0.9001257419586182, "logps/rejected": -1.9425923824310303, "loss": 0.9993, "nll_loss": 1.081949234008789, "rewards/accuracies": 0.625, "rewards/chosen": -0.0900125727057457, "rewards/margins": 0.10424666106700897, "rewards/rejected": -0.19425922632217407, "step": 5118 }, { "epoch": 3.1227695592496567, "grad_norm": 8.185564041137695, "learning_rate": 3.0138395590936923e-06, "log_odds_chosen": 1.4631497859954834, "log_odds_ratio": -0.421051025390625, "logits/chosen": -0.721166729927063, "logits/rejected": -0.6617461442947388, "logps/chosen": -0.603396475315094, "logps/rejected": -1.6892682313919067, "loss": 0.9208, "nll_loss": 0.8546321988105774, "rewards/accuracies": 0.75, "rewards/chosen": -0.06033964455127716, "rewards/margins": 0.10858719050884247, "rewards/rejected": -0.16892683506011963, "step": 5119 }, { "epoch": 3.123379594326674, "grad_norm": 1.3782436847686768, "learning_rate": 3.012859767299449e-06, "log_odds_chosen": 0.2129354327917099, "log_odds_ratio": -0.7625203132629395, "logits/chosen": -0.8784829378128052, "logits/rejected": -0.9407099485397339, "logps/chosen": -1.010939359664917, "logps/rejected": -1.095043420791626, "loss": 1.0783, "nll_loss": 1.090559482574463, "rewards/accuracies": 0.5, "rewards/chosen": -0.10109394043684006, "rewards/margins": 0.008410410955548286, "rewards/rejected": -0.1095043495297432, "step": 5120 }, { "epoch": 3.123989629403691, "grad_norm": 4.215287208557129, "learning_rate": 3.011879975505205e-06, "log_odds_chosen": 2.0189223289489746, "log_odds_ratio": -0.3761979341506958, "logits/chosen": -1.0467331409454346, "logits/rejected": -1.1346704959869385, "logps/chosen": -0.7988793849945068, "logps/rejected": -2.4242069721221924, "loss": 1.0761, "nll_loss": 1.004477858543396, "rewards/accuracies": 0.75, "rewards/chosen": -0.07988792657852173, "rewards/margins": 0.1625327616930008, "rewards/rejected": -0.24242070317268372, "step": 5121 }, { "epoch": 3.1245996644807077, "grad_norm": 2.231184482574463, "learning_rate": 3.0109001837109615e-06, "log_odds_chosen": 3.0132639408111572, "log_odds_ratio": -0.31036701798439026, "logits/chosen": -0.8340508937835693, "logits/rejected": -0.8718458414077759, "logps/chosen": -0.8982928991317749, "logps/rejected": -3.1711957454681396, "loss": 1.1364, "nll_loss": 1.0475459098815918, "rewards/accuracies": 0.875, "rewards/chosen": -0.08982928842306137, "rewards/margins": 0.2272903025150299, "rewards/rejected": -0.3171195685863495, "step": 5122 }, { "epoch": 3.1252096995577245, "grad_norm": 1.2849829196929932, "learning_rate": 3.0099203919167176e-06, "log_odds_chosen": 2.6193366050720215, "log_odds_ratio": -0.2999333143234253, "logits/chosen": -0.9203155636787415, "logits/rejected": -1.0116126537322998, "logps/chosen": -0.9564510583877563, "logps/rejected": -3.144813299179077, "loss": 1.1255, "nll_loss": 1.1052591800689697, "rewards/accuracies": 0.875, "rewards/chosen": -0.09564509987831116, "rewards/margins": 0.2188362181186676, "rewards/rejected": -0.31448131799697876, "step": 5123 }, { "epoch": 3.1258197346347414, "grad_norm": 6.707756996154785, "learning_rate": 3.0089406001224738e-06, "log_odds_chosen": 2.745718479156494, "log_odds_ratio": -0.4604232907295227, "logits/chosen": -0.9869169592857361, "logits/rejected": -1.0693902969360352, "logps/chosen": -0.9253412485122681, "logps/rejected": -3.3443548679351807, "loss": 0.9989, "nll_loss": 1.2166131734848022, "rewards/accuracies": 0.625, "rewards/chosen": -0.0925341248512268, "rewards/margins": 0.24190136790275574, "rewards/rejected": -0.33443549275398254, "step": 5124 }, { "epoch": 3.1264297697117582, "grad_norm": 6.41657018661499, "learning_rate": 3.0079608083282303e-06, "log_odds_chosen": 2.0269436836242676, "log_odds_ratio": -0.3053199052810669, "logits/chosen": -0.8402188420295715, "logits/rejected": -0.8055839538574219, "logps/chosen": -0.7298591136932373, "logps/rejected": -2.260695457458496, "loss": 0.8125, "nll_loss": 0.9171254634857178, "rewards/accuracies": 0.875, "rewards/chosen": -0.07298591732978821, "rewards/margins": 0.15308362245559692, "rewards/rejected": -0.22606955468654633, "step": 5125 }, { "epoch": 3.1270398047887755, "grad_norm": 1.163196325302124, "learning_rate": 3.006981016533986e-06, "log_odds_chosen": 1.7899034023284912, "log_odds_ratio": -0.3872978687286377, "logits/chosen": -1.1143614053726196, "logits/rejected": -1.11418616771698, "logps/chosen": -0.9550392031669617, "logps/rejected": -2.4094977378845215, "loss": 1.0488, "nll_loss": 1.0971732139587402, "rewards/accuracies": 0.75, "rewards/chosen": -0.09550392627716064, "rewards/margins": 0.1454458385705948, "rewards/rejected": -0.24094976484775543, "step": 5126 }, { "epoch": 3.1276498398657924, "grad_norm": 1.347622036933899, "learning_rate": 3.0060012247397426e-06, "log_odds_chosen": 2.319827079772949, "log_odds_ratio": -0.36823567748069763, "logits/chosen": -0.9095262885093689, "logits/rejected": -1.016728162765503, "logps/chosen": -0.7848877906799316, "logps/rejected": -2.620114326477051, "loss": 0.9784, "nll_loss": 0.921718180179596, "rewards/accuracies": 0.875, "rewards/chosen": -0.0784887820482254, "rewards/margins": 0.18352267146110535, "rewards/rejected": -0.26201146841049194, "step": 5127 }, { "epoch": 3.128259874942809, "grad_norm": 2.7908833026885986, "learning_rate": 3.005021432945499e-06, "log_odds_chosen": 2.140876293182373, "log_odds_ratio": -0.39268630743026733, "logits/chosen": -1.046433687210083, "logits/rejected": -1.1120338439941406, "logps/chosen": -0.933256983757019, "logps/rejected": -2.7565343379974365, "loss": 1.0108, "nll_loss": 1.0058109760284424, "rewards/accuracies": 0.625, "rewards/chosen": -0.09332568943500519, "rewards/margins": 0.1823277324438095, "rewards/rejected": -0.2756534218788147, "step": 5128 }, { "epoch": 3.128869910019826, "grad_norm": 1.6248414516448975, "learning_rate": 3.0040416411512552e-06, "log_odds_chosen": 2.4542064666748047, "log_odds_ratio": -0.3064683675765991, "logits/chosen": -0.8203898668289185, "logits/rejected": -0.9948813319206238, "logps/chosen": -0.6298954486846924, "logps/rejected": -2.3826608657836914, "loss": 1.1335, "nll_loss": 0.8825152516365051, "rewards/accuracies": 0.875, "rewards/chosen": -0.06298954039812088, "rewards/margins": 0.17527654767036438, "rewards/rejected": -0.23826609551906586, "step": 5129 }, { "epoch": 3.129479945096843, "grad_norm": 1.9285660982131958, "learning_rate": 3.0030618493570113e-06, "log_odds_chosen": 1.4044480323791504, "log_odds_ratio": -0.4226020574569702, "logits/chosen": -0.889318585395813, "logits/rejected": -0.9503248333930969, "logps/chosen": -0.529177725315094, "logps/rejected": -1.3809177875518799, "loss": 1.1145, "nll_loss": 0.9873753190040588, "rewards/accuracies": 0.875, "rewards/chosen": -0.05291777849197388, "rewards/margins": 0.08517400175333023, "rewards/rejected": -0.1380917727947235, "step": 5130 }, { "epoch": 3.13008998017386, "grad_norm": 1.5105849504470825, "learning_rate": 3.002082057562768e-06, "log_odds_chosen": 0.2296772003173828, "log_odds_ratio": -0.6238940954208374, "logits/chosen": -0.9704166650772095, "logits/rejected": -1.0654759407043457, "logps/chosen": -0.7158913612365723, "logps/rejected": -0.8464287519454956, "loss": 0.9957, "nll_loss": 1.018710732460022, "rewards/accuracies": 0.625, "rewards/chosen": -0.0715891420841217, "rewards/margins": 0.013053737580776215, "rewards/rejected": -0.08464288711547852, "step": 5131 }, { "epoch": 3.130700015250877, "grad_norm": 1.0698060989379883, "learning_rate": 3.001102265768524e-06, "log_odds_chosen": 1.8429960012435913, "log_odds_ratio": -0.27817925810813904, "logits/chosen": -0.8356949090957642, "logits/rejected": -0.904705286026001, "logps/chosen": -0.673822820186615, "logps/rejected": -1.9311103820800781, "loss": 0.9948, "nll_loss": 0.7624354362487793, "rewards/accuracies": 0.875, "rewards/chosen": -0.06738227605819702, "rewards/margins": 0.1257287561893463, "rewards/rejected": -0.19311104714870453, "step": 5132 }, { "epoch": 3.131310050327894, "grad_norm": 3.2781200408935547, "learning_rate": 3.00012247397428e-06, "log_odds_chosen": 1.6867018938064575, "log_odds_ratio": -0.4608609974384308, "logits/chosen": -0.9301633834838867, "logits/rejected": -1.0078051090240479, "logps/chosen": -0.8584692478179932, "logps/rejected": -2.2350196838378906, "loss": 0.9078, "nll_loss": 0.9940599203109741, "rewards/accuracies": 0.5, "rewards/chosen": -0.0858469232916832, "rewards/margins": 0.13765506446361542, "rewards/rejected": -0.22350198030471802, "step": 5133 }, { "epoch": 3.1319200854049107, "grad_norm": 2.6496269702911377, "learning_rate": 2.9991426821800367e-06, "log_odds_chosen": 2.7144346237182617, "log_odds_ratio": -0.636286735534668, "logits/chosen": -0.7736201882362366, "logits/rejected": -0.9365168809890747, "logps/chosen": -0.9564855098724365, "logps/rejected": -3.5017287731170654, "loss": 1.1618, "nll_loss": 1.2459197044372559, "rewards/accuracies": 0.5, "rewards/chosen": -0.09564854949712753, "rewards/margins": 0.2545243501663208, "rewards/rejected": -0.35017287731170654, "step": 5134 }, { "epoch": 3.1325301204819276, "grad_norm": 1.5141043663024902, "learning_rate": 2.998162890385793e-06, "log_odds_chosen": 3.132539749145508, "log_odds_ratio": -0.3782809376716614, "logits/chosen": -0.8338289260864258, "logits/rejected": -1.0669749975204468, "logps/chosen": -0.9052932262420654, "logps/rejected": -3.506495952606201, "loss": 1.0953, "nll_loss": 1.1492135524749756, "rewards/accuracies": 0.75, "rewards/chosen": -0.09052932262420654, "rewards/margins": 0.26012030243873596, "rewards/rejected": -0.3506495952606201, "step": 5135 }, { "epoch": 3.1331401555589444, "grad_norm": 4.764245986938477, "learning_rate": 2.9971830985915494e-06, "log_odds_chosen": 2.2609753608703613, "log_odds_ratio": -0.30492132902145386, "logits/chosen": -0.9661178588867188, "logits/rejected": -1.1446226835250854, "logps/chosen": -0.5770156979560852, "logps/rejected": -2.225142240524292, "loss": 1.0728, "nll_loss": 1.1003882884979248, "rewards/accuracies": 1.0, "rewards/chosen": -0.05770156532526016, "rewards/margins": 0.16481263935565948, "rewards/rejected": -0.22251421213150024, "step": 5136 }, { "epoch": 3.1337501906359617, "grad_norm": 2.03812575340271, "learning_rate": 2.9962033067973055e-06, "log_odds_chosen": 2.1175384521484375, "log_odds_ratio": -0.5655169486999512, "logits/chosen": -0.7539054751396179, "logits/rejected": -0.9558128714561462, "logps/chosen": -0.9686181545257568, "logps/rejected": -2.5308475494384766, "loss": 1.0437, "nll_loss": 0.9137629270553589, "rewards/accuracies": 0.875, "rewards/chosen": -0.0968618169426918, "rewards/margins": 0.15622295439243317, "rewards/rejected": -0.25308477878570557, "step": 5137 }, { "epoch": 3.1343602257129786, "grad_norm": 1.6484103202819824, "learning_rate": 2.9952235150030616e-06, "log_odds_chosen": 2.1158812046051025, "log_odds_ratio": -0.40242525935173035, "logits/chosen": -0.8455353379249573, "logits/rejected": -0.9380855560302734, "logps/chosen": -0.7435352802276611, "logps/rejected": -2.3822429180145264, "loss": 1.0069, "nll_loss": 0.8919154405593872, "rewards/accuracies": 0.75, "rewards/chosen": -0.07435353100299835, "rewards/margins": 0.16387076675891876, "rewards/rejected": -0.23822429776191711, "step": 5138 }, { "epoch": 3.1349702607899954, "grad_norm": 1.1709063053131104, "learning_rate": 2.994243723208818e-06, "log_odds_chosen": 3.497809648513794, "log_odds_ratio": -0.35313552618026733, "logits/chosen": -1.013060450553894, "logits/rejected": -1.0387928485870361, "logps/chosen": -0.6414785385131836, "logps/rejected": -3.535038471221924, "loss": 1.007, "nll_loss": 0.8895703554153442, "rewards/accuracies": 0.75, "rewards/chosen": -0.06414785981178284, "rewards/margins": 0.289355993270874, "rewards/rejected": -0.3535038232803345, "step": 5139 }, { "epoch": 3.1355802958670123, "grad_norm": 4.1698808670043945, "learning_rate": 2.9932639314145743e-06, "log_odds_chosen": 2.5979559421539307, "log_odds_ratio": -0.38117748498916626, "logits/chosen": -0.8146370649337769, "logits/rejected": -0.9046045541763306, "logps/chosen": -0.7949982285499573, "logps/rejected": -2.7659523487091064, "loss": 1.189, "nll_loss": 1.1267380714416504, "rewards/accuracies": 0.75, "rewards/chosen": -0.07949983328580856, "rewards/margins": 0.19709540903568268, "rewards/rejected": -0.27659523487091064, "step": 5140 }, { "epoch": 3.136190330944029, "grad_norm": 9.095477104187012, "learning_rate": 2.9922841396203304e-06, "log_odds_chosen": 0.7403733730316162, "log_odds_ratio": -0.6784858107566833, "logits/chosen": -1.0974044799804688, "logits/rejected": -1.072553277015686, "logps/chosen": -1.116271734237671, "logps/rejected": -1.7072205543518066, "loss": 1.1417, "nll_loss": 1.1870136260986328, "rewards/accuracies": 0.5, "rewards/chosen": -0.11162716150283813, "rewards/margins": 0.0590948760509491, "rewards/rejected": -0.17072203755378723, "step": 5141 }, { "epoch": 3.1368003660210464, "grad_norm": 4.635860443115234, "learning_rate": 2.991304347826087e-06, "log_odds_chosen": 2.8810081481933594, "log_odds_ratio": -0.4399039149284363, "logits/chosen": -0.7523051500320435, "logits/rejected": -0.8770943880081177, "logps/chosen": -0.7468430995941162, "logps/rejected": -3.136460542678833, "loss": 1.0995, "nll_loss": 0.9189373254776001, "rewards/accuracies": 0.625, "rewards/chosen": -0.07468430697917938, "rewards/margins": 0.23896172642707825, "rewards/rejected": -0.3136460483074188, "step": 5142 }, { "epoch": 3.1374104010980632, "grad_norm": 1.28861665725708, "learning_rate": 2.9903245560318435e-06, "log_odds_chosen": 2.2684836387634277, "log_odds_ratio": -0.3961094915866852, "logits/chosen": -0.9188209176063538, "logits/rejected": -0.9181138277053833, "logps/chosen": -0.8643922805786133, "logps/rejected": -2.7457032203674316, "loss": 0.8244, "nll_loss": 0.9762371778488159, "rewards/accuracies": 0.75, "rewards/chosen": -0.08643923699855804, "rewards/margins": 0.18813109397888184, "rewards/rejected": -0.2745703160762787, "step": 5143 }, { "epoch": 3.13802043617508, "grad_norm": 2.820962429046631, "learning_rate": 2.989344764237599e-06, "log_odds_chosen": 1.5151113271713257, "log_odds_ratio": -0.43100351095199585, "logits/chosen": -1.1177167892456055, "logits/rejected": -1.0829026699066162, "logps/chosen": -0.9700800776481628, "logps/rejected": -2.1167845726013184, "loss": 1.4094, "nll_loss": 1.4166642427444458, "rewards/accuracies": 0.75, "rewards/chosen": -0.09700801223516464, "rewards/margins": 0.11467045545578003, "rewards/rejected": -0.21167844533920288, "step": 5144 }, { "epoch": 3.138630471252097, "grad_norm": 1.0683014392852783, "learning_rate": 2.9883649724433557e-06, "log_odds_chosen": 3.1864566802978516, "log_odds_ratio": -0.31288591027259827, "logits/chosen": -0.767266571521759, "logits/rejected": -0.8726018071174622, "logps/chosen": -0.6349487900733948, "logps/rejected": -3.200793743133545, "loss": 0.8674, "nll_loss": 0.7768643498420715, "rewards/accuracies": 0.75, "rewards/chosen": -0.06349487602710724, "rewards/margins": 0.256584495306015, "rewards/rejected": -0.32007935643196106, "step": 5145 }, { "epoch": 3.1392405063291138, "grad_norm": 1.1797279119491577, "learning_rate": 2.987385180649112e-06, "log_odds_chosen": 3.027933120727539, "log_odds_ratio": -0.5570515990257263, "logits/chosen": -0.986693263053894, "logits/rejected": -1.1464455127716064, "logps/chosen": -0.8150613307952881, "logps/rejected": -3.448493003845215, "loss": 1.1022, "nll_loss": 1.1359736919403076, "rewards/accuracies": 0.625, "rewards/chosen": -0.0815061405301094, "rewards/margins": 0.2633431553840637, "rewards/rejected": -0.34484928846359253, "step": 5146 }, { "epoch": 3.1398505414061306, "grad_norm": 17.1816463470459, "learning_rate": 2.986405388854868e-06, "log_odds_chosen": 1.1203281879425049, "log_odds_ratio": -0.5747584700584412, "logits/chosen": -0.8124012351036072, "logits/rejected": -0.9999939203262329, "logps/chosen": -1.4964344501495361, "logps/rejected": -2.2885351181030273, "loss": 1.0355, "nll_loss": 1.2319955825805664, "rewards/accuracies": 0.75, "rewards/chosen": -0.1496434509754181, "rewards/margins": 0.0792100727558136, "rewards/rejected": -0.2288535237312317, "step": 5147 }, { "epoch": 3.140460576483148, "grad_norm": 1.158886432647705, "learning_rate": 2.9854255970606245e-06, "log_odds_chosen": 2.1328279972076416, "log_odds_ratio": -0.30966877937316895, "logits/chosen": -0.8685429692268372, "logits/rejected": -0.9688458442687988, "logps/chosen": -0.7133013010025024, "logps/rejected": -2.4243574142456055, "loss": 0.9576, "nll_loss": 0.8726247549057007, "rewards/accuracies": 0.75, "rewards/chosen": -0.07133013755083084, "rewards/margins": 0.17110562324523926, "rewards/rejected": -0.2424357533454895, "step": 5148 }, { "epoch": 3.1410706115601648, "grad_norm": 1.572585105895996, "learning_rate": 2.9844458052663806e-06, "log_odds_chosen": 2.5665342807769775, "log_odds_ratio": -0.2341846078634262, "logits/chosen": -0.8834898471832275, "logits/rejected": -1.0104038715362549, "logps/chosen": -0.8712205290794373, "logps/rejected": -3.0491628646850586, "loss": 1.059, "nll_loss": 1.0977506637573242, "rewards/accuracies": 0.875, "rewards/chosen": -0.08712206035852432, "rewards/margins": 0.2177942395210266, "rewards/rejected": -0.3049163222312927, "step": 5149 }, { "epoch": 3.1416806466371816, "grad_norm": 2.4076273441314697, "learning_rate": 2.983466013472137e-06, "log_odds_chosen": 0.8726130723953247, "log_odds_ratio": -0.4325167238712311, "logits/chosen": -0.7381879091262817, "logits/rejected": -0.7911152243614197, "logps/chosen": -0.8217266798019409, "logps/rejected": -1.2748514413833618, "loss": 1.0567, "nll_loss": 1.080180048942566, "rewards/accuracies": 0.625, "rewards/chosen": -0.08217266947031021, "rewards/margins": 0.04531247168779373, "rewards/rejected": -0.12748515605926514, "step": 5150 }, { "epoch": 3.1422906817141985, "grad_norm": 1.2019730806350708, "learning_rate": 2.9824862216778933e-06, "log_odds_chosen": 1.845363974571228, "log_odds_ratio": -0.533876895904541, "logits/chosen": -0.7386031150817871, "logits/rejected": -0.8071293234825134, "logps/chosen": -0.8501307368278503, "logps/rejected": -2.2962889671325684, "loss": 0.8896, "nll_loss": 1.0319933891296387, "rewards/accuracies": 0.625, "rewards/chosen": -0.08501306921243668, "rewards/margins": 0.14461582899093628, "rewards/rejected": -0.22962889075279236, "step": 5151 }, { "epoch": 3.1429007167912153, "grad_norm": 2.1108181476593018, "learning_rate": 2.9815064298836494e-06, "log_odds_chosen": 0.8103082776069641, "log_odds_ratio": -0.5406897068023682, "logits/chosen": -0.6577283143997192, "logits/rejected": -0.6251532435417175, "logps/chosen": -0.8895541429519653, "logps/rejected": -1.527902603149414, "loss": 1.0395, "nll_loss": 1.1046253442764282, "rewards/accuracies": 0.75, "rewards/chosen": -0.08895541727542877, "rewards/margins": 0.06383483856916428, "rewards/rejected": -0.15279024839401245, "step": 5152 }, { "epoch": 3.1435107518682326, "grad_norm": 1.334126591682434, "learning_rate": 2.980526638089406e-06, "log_odds_chosen": 0.9105122685432434, "log_odds_ratio": -0.49644193053245544, "logits/chosen": -0.7231563925743103, "logits/rejected": -0.817907989025116, "logps/chosen": -0.7564490437507629, "logps/rejected": -1.3960264921188354, "loss": 1.0797, "nll_loss": 0.8270285129547119, "rewards/accuracies": 0.625, "rewards/chosen": -0.07564491033554077, "rewards/margins": 0.06395774334669113, "rewards/rejected": -0.1396026611328125, "step": 5153 }, { "epoch": 3.1441207869452494, "grad_norm": 2.007343053817749, "learning_rate": 2.979546846295162e-06, "log_odds_chosen": 1.912931203842163, "log_odds_ratio": -0.5381449460983276, "logits/chosen": -0.7261461615562439, "logits/rejected": -1.0084894895553589, "logps/chosen": -0.9719688892364502, "logps/rejected": -2.6218371391296387, "loss": 1.1041, "nll_loss": 1.0768444538116455, "rewards/accuracies": 0.75, "rewards/chosen": -0.09719687700271606, "rewards/margins": 0.16498681902885437, "rewards/rejected": -0.2621837258338928, "step": 5154 }, { "epoch": 3.1447308220222663, "grad_norm": 1.589457392692566, "learning_rate": 2.9785670545009182e-06, "log_odds_chosen": 1.5828839540481567, "log_odds_ratio": -0.57936692237854, "logits/chosen": -0.9467740058898926, "logits/rejected": -0.9790751934051514, "logps/chosen": -0.5972586870193481, "logps/rejected": -2.0603456497192383, "loss": 0.8918, "nll_loss": 0.8203606605529785, "rewards/accuracies": 0.625, "rewards/chosen": -0.05972587317228317, "rewards/margins": 0.14630869030952454, "rewards/rejected": -0.2060345709323883, "step": 5155 }, { "epoch": 3.145340857099283, "grad_norm": 1.2959647178649902, "learning_rate": 2.9775872627066748e-06, "log_odds_chosen": 1.3395249843597412, "log_odds_ratio": -0.3686363697052002, "logits/chosen": -0.8782100677490234, "logits/rejected": -0.9387053847312927, "logps/chosen": -0.857665479183197, "logps/rejected": -1.782908320426941, "loss": 1.039, "nll_loss": 0.9766759872436523, "rewards/accuracies": 0.75, "rewards/chosen": -0.08576654642820358, "rewards/margins": 0.09252429008483887, "rewards/rejected": -0.17829084396362305, "step": 5156 }, { "epoch": 3.1459508921763, "grad_norm": 1.1541928052902222, "learning_rate": 2.9766074709124313e-06, "log_odds_chosen": 3.4617409706115723, "log_odds_ratio": -0.16647566854953766, "logits/chosen": -0.71734619140625, "logits/rejected": -0.7732012271881104, "logps/chosen": -0.6714533567428589, "logps/rejected": -3.412161111831665, "loss": 0.9466, "nll_loss": 0.8294177055358887, "rewards/accuracies": 0.875, "rewards/chosen": -0.06714533269405365, "rewards/margins": 0.27407076954841614, "rewards/rejected": -0.341216117143631, "step": 5157 }, { "epoch": 3.1465609272533173, "grad_norm": 2.351053237915039, "learning_rate": 2.975627679118187e-06, "log_odds_chosen": 2.0324323177337646, "log_odds_ratio": -0.37911155819892883, "logits/chosen": -0.8665239810943604, "logits/rejected": -1.044960618019104, "logps/chosen": -0.853209376335144, "logps/rejected": -2.445481777191162, "loss": 1.0921, "nll_loss": 1.2926537990570068, "rewards/accuracies": 0.625, "rewards/chosen": -0.08532094955444336, "rewards/margins": 0.15922725200653076, "rewards/rejected": -0.24454818665981293, "step": 5158 }, { "epoch": 3.147170962330334, "grad_norm": 2.1389596462249756, "learning_rate": 2.9746478873239436e-06, "log_odds_chosen": 1.3011376857757568, "log_odds_ratio": -0.6480362415313721, "logits/chosen": -1.0360766649246216, "logits/rejected": -0.8834859132766724, "logps/chosen": -0.9780762195587158, "logps/rejected": -2.1016368865966797, "loss": 1.1153, "nll_loss": 1.0624088048934937, "rewards/accuracies": 0.5, "rewards/chosen": -0.0978076159954071, "rewards/margins": 0.11235608160495758, "rewards/rejected": -0.2101636826992035, "step": 5159 }, { "epoch": 3.147780997407351, "grad_norm": 1.465004563331604, "learning_rate": 2.9736680955297e-06, "log_odds_chosen": 3.486894369125366, "log_odds_ratio": -0.26855039596557617, "logits/chosen": -0.7961711287498474, "logits/rejected": -0.9080029726028442, "logps/chosen": -0.6266242861747742, "logps/rejected": -3.447943687438965, "loss": 1.0295, "nll_loss": 0.7650086283683777, "rewards/accuracies": 0.875, "rewards/chosen": -0.06266242265701294, "rewards/margins": 0.28213194012641907, "rewards/rejected": -0.344794362783432, "step": 5160 }, { "epoch": 3.148391032484368, "grad_norm": 2.4167394638061523, "learning_rate": 2.972688303735456e-06, "log_odds_chosen": 1.0559662580490112, "log_odds_ratio": -0.5550988912582397, "logits/chosen": -0.8383009433746338, "logits/rejected": -0.7598767876625061, "logps/chosen": -0.8350972533226013, "logps/rejected": -1.6473214626312256, "loss": 1.0063, "nll_loss": 1.0278677940368652, "rewards/accuracies": 0.625, "rewards/chosen": -0.08350972831249237, "rewards/margins": 0.08122242242097855, "rewards/rejected": -0.16473212838172913, "step": 5161 }, { "epoch": 3.1490010675613846, "grad_norm": 1.3650215864181519, "learning_rate": 2.9717085119412123e-06, "log_odds_chosen": 2.6746020317077637, "log_odds_ratio": -0.3398559093475342, "logits/chosen": -0.8621645569801331, "logits/rejected": -0.9713520407676697, "logps/chosen": -0.7607632875442505, "logps/rejected": -2.8915646076202393, "loss": 0.9457, "nll_loss": 0.8952064514160156, "rewards/accuracies": 0.875, "rewards/chosen": -0.07607633620500565, "rewards/margins": 0.21308012306690216, "rewards/rejected": -0.289156436920166, "step": 5162 }, { "epoch": 3.149611102638402, "grad_norm": 1.3235218524932861, "learning_rate": 2.970728720146969e-06, "log_odds_chosen": 0.7114483118057251, "log_odds_ratio": -0.6835523843765259, "logits/chosen": -0.8876070976257324, "logits/rejected": -0.9070032238960266, "logps/chosen": -0.8484089374542236, "logps/rejected": -1.457220196723938, "loss": 1.097, "nll_loss": 1.035991907119751, "rewards/accuracies": 0.5, "rewards/chosen": -0.08484089374542236, "rewards/margins": 0.06088113412261009, "rewards/rejected": -0.14572201669216156, "step": 5163 }, { "epoch": 3.150221137715419, "grad_norm": 2.3354952335357666, "learning_rate": 2.969748928352725e-06, "log_odds_chosen": -0.28725481033325195, "log_odds_ratio": -0.8899528980255127, "logits/chosen": -0.8803884983062744, "logits/rejected": -0.8922635912895203, "logps/chosen": -1.1413367986679077, "logps/rejected": -0.9829005599021912, "loss": 1.1645, "nll_loss": 1.2370113134384155, "rewards/accuracies": 0.375, "rewards/chosen": -0.11413367837667465, "rewards/margins": -0.01584361493587494, "rewards/rejected": -0.09829006344079971, "step": 5164 }, { "epoch": 3.1508311727924356, "grad_norm": 3.480926752090454, "learning_rate": 2.968769136558481e-06, "log_odds_chosen": 1.7551586627960205, "log_odds_ratio": -0.20998510718345642, "logits/chosen": -0.8515954613685608, "logits/rejected": -0.8873243927955627, "logps/chosen": -0.6440733075141907, "logps/rejected": -1.7835016250610352, "loss": 1.0665, "nll_loss": 0.8725242018699646, "rewards/accuracies": 1.0, "rewards/chosen": -0.0644073337316513, "rewards/margins": 0.11394283175468445, "rewards/rejected": -0.17835015058517456, "step": 5165 }, { "epoch": 3.1514412078694525, "grad_norm": 1.3161799907684326, "learning_rate": 2.9677893447642373e-06, "log_odds_chosen": 2.3741672039031982, "log_odds_ratio": -0.2712503671646118, "logits/chosen": -0.8708600997924805, "logits/rejected": -0.864778995513916, "logps/chosen": -0.6020269393920898, "logps/rejected": -2.470099687576294, "loss": 0.9117, "nll_loss": 0.8271295428276062, "rewards/accuracies": 0.875, "rewards/chosen": -0.060202695429325104, "rewards/margins": 0.1868072748184204, "rewards/rejected": -0.24700996279716492, "step": 5166 }, { "epoch": 3.1520512429464693, "grad_norm": 1.306288480758667, "learning_rate": 2.966809552969994e-06, "log_odds_chosen": 2.029280185699463, "log_odds_ratio": -0.4623958468437195, "logits/chosen": -0.7485517859458923, "logits/rejected": -0.8318686485290527, "logps/chosen": -0.7139982581138611, "logps/rejected": -2.217953681945801, "loss": 0.9656, "nll_loss": 0.9461005926132202, "rewards/accuracies": 0.625, "rewards/chosen": -0.07139982283115387, "rewards/margins": 0.15039554238319397, "rewards/rejected": -0.22179536521434784, "step": 5167 }, { "epoch": 3.152661278023486, "grad_norm": 1.1874903440475464, "learning_rate": 2.96582976117575e-06, "log_odds_chosen": 3.2598979473114014, "log_odds_ratio": -0.39655566215515137, "logits/chosen": -0.6661738157272339, "logits/rejected": -0.8262375593185425, "logps/chosen": -0.7256501913070679, "logps/rejected": -3.538771629333496, "loss": 0.851, "nll_loss": 0.8082137107849121, "rewards/accuracies": 0.625, "rewards/chosen": -0.07256501913070679, "rewards/margins": 0.28131216764450073, "rewards/rejected": -0.3538771867752075, "step": 5168 }, { "epoch": 3.1532713131005035, "grad_norm": 1.4641791582107544, "learning_rate": 2.964849969381506e-06, "log_odds_chosen": 1.0100517272949219, "log_odds_ratio": -0.5322434902191162, "logits/chosen": -0.8106289505958557, "logits/rejected": -0.8632163405418396, "logps/chosen": -0.6797780990600586, "logps/rejected": -1.371166467666626, "loss": 1.0875, "nll_loss": 0.9937666654586792, "rewards/accuracies": 0.75, "rewards/chosen": -0.06797781586647034, "rewards/margins": 0.06913883239030838, "rewards/rejected": -0.13711664080619812, "step": 5169 }, { "epoch": 3.1538813481775203, "grad_norm": 6.479657173156738, "learning_rate": 2.9638701775872626e-06, "log_odds_chosen": 1.2403850555419922, "log_odds_ratio": -0.34637463092803955, "logits/chosen": -0.933832049369812, "logits/rejected": -0.980029284954071, "logps/chosen": -0.7232283353805542, "logps/rejected": -1.536489486694336, "loss": 1.0225, "nll_loss": 0.938607931137085, "rewards/accuracies": 0.875, "rewards/chosen": -0.07232283800840378, "rewards/margins": 0.08132612705230713, "rewards/rejected": -0.1536489576101303, "step": 5170 }, { "epoch": 3.154491383254537, "grad_norm": 3.257840394973755, "learning_rate": 2.962890385793019e-06, "log_odds_chosen": 1.3167939186096191, "log_odds_ratio": -0.36192476749420166, "logits/chosen": -0.7248916625976562, "logits/rejected": -0.7831689119338989, "logps/chosen": -0.7497563362121582, "logps/rejected": -1.6487751007080078, "loss": 1.1839, "nll_loss": 0.9649220108985901, "rewards/accuracies": 0.75, "rewards/chosen": -0.0749756395816803, "rewards/margins": 0.0899018794298172, "rewards/rejected": -0.1648775339126587, "step": 5171 }, { "epoch": 3.155101418331554, "grad_norm": 1.8981770277023315, "learning_rate": 2.961910593998775e-06, "log_odds_chosen": 1.4842349290847778, "log_odds_ratio": -0.36235707998275757, "logits/chosen": -0.9361624717712402, "logits/rejected": -1.0681084394454956, "logps/chosen": -0.782526969909668, "logps/rejected": -1.8208961486816406, "loss": 1.0834, "nll_loss": 0.9057798981666565, "rewards/accuracies": 0.75, "rewards/chosen": -0.07825270295143127, "rewards/margins": 0.10383690893650055, "rewards/rejected": -0.18208959698677063, "step": 5172 }, { "epoch": 3.155711453408571, "grad_norm": 1.1325024366378784, "learning_rate": 2.9609308022045314e-06, "log_odds_chosen": 3.351240873336792, "log_odds_ratio": -0.2273980975151062, "logits/chosen": -0.4578641951084137, "logits/rejected": -0.6964112520217896, "logps/chosen": -0.47655731439590454, "logps/rejected": -3.0810251235961914, "loss": 0.9487, "nll_loss": 0.7516523599624634, "rewards/accuracies": 1.0, "rewards/chosen": -0.047655731439590454, "rewards/margins": 0.26044678688049316, "rewards/rejected": -0.308102548122406, "step": 5173 }, { "epoch": 3.156321488485588, "grad_norm": 1.7512195110321045, "learning_rate": 2.959951010410288e-06, "log_odds_chosen": 1.5129287242889404, "log_odds_ratio": -0.3428341746330261, "logits/chosen": -0.5139127373695374, "logits/rejected": -0.6805283427238464, "logps/chosen": -0.7388912439346313, "logps/rejected": -1.783440113067627, "loss": 1.1001, "nll_loss": 1.24976646900177, "rewards/accuracies": 0.875, "rewards/chosen": -0.07388913631439209, "rewards/margins": 0.1044548749923706, "rewards/rejected": -0.1783440113067627, "step": 5174 }, { "epoch": 3.156931523562605, "grad_norm": 1.6540244817733765, "learning_rate": 2.9589712186160436e-06, "log_odds_chosen": 2.508014678955078, "log_odds_ratio": -0.3074493110179901, "logits/chosen": -0.6402447819709778, "logits/rejected": -0.8087347745895386, "logps/chosen": -0.6557104587554932, "logps/rejected": -2.652503490447998, "loss": 0.9432, "nll_loss": 0.7321615219116211, "rewards/accuracies": 0.875, "rewards/chosen": -0.06557105481624603, "rewards/margins": 0.19967928528785706, "rewards/rejected": -0.2652503252029419, "step": 5175 }, { "epoch": 3.157541558639622, "grad_norm": 2.363992929458618, "learning_rate": 2.9579914268218e-06, "log_odds_chosen": 2.0872573852539062, "log_odds_ratio": -0.2731325328350067, "logits/chosen": -1.027618646621704, "logits/rejected": -1.0278587341308594, "logps/chosen": -0.825092613697052, "logps/rejected": -2.471755027770996, "loss": 1.1369, "nll_loss": 0.9215302467346191, "rewards/accuracies": 0.75, "rewards/chosen": -0.08250926434993744, "rewards/margins": 0.16466626524925232, "rewards/rejected": -0.24717551469802856, "step": 5176 }, { "epoch": 3.1581515937166387, "grad_norm": 2.149103879928589, "learning_rate": 2.9570116350275567e-06, "log_odds_chosen": 0.9420883059501648, "log_odds_ratio": -0.5876454710960388, "logits/chosen": -1.0055242776870728, "logits/rejected": -0.9777757525444031, "logps/chosen": -0.9114223718643188, "logps/rejected": -1.6937566995620728, "loss": 1.0552, "nll_loss": 1.2528518438339233, "rewards/accuracies": 0.625, "rewards/chosen": -0.09114224463701248, "rewards/margins": 0.07823343575000763, "rewards/rejected": -0.16937567293643951, "step": 5177 }, { "epoch": 3.1587616287936555, "grad_norm": 1.810546636581421, "learning_rate": 2.956031843233313e-06, "log_odds_chosen": 1.3435373306274414, "log_odds_ratio": -0.550388514995575, "logits/chosen": -1.0632814168930054, "logits/rejected": -1.2521779537200928, "logps/chosen": -1.1508827209472656, "logps/rejected": -2.3406949043273926, "loss": 1.3448, "nll_loss": 1.5671557188034058, "rewards/accuracies": 0.625, "rewards/chosen": -0.11508826911449432, "rewards/margins": 0.11898121237754822, "rewards/rejected": -0.23406948149204254, "step": 5178 }, { "epoch": 3.1593716638706724, "grad_norm": 1.6409350633621216, "learning_rate": 2.955052051439069e-06, "log_odds_chosen": 1.8911240100860596, "log_odds_ratio": -0.3152238726615906, "logits/chosen": -0.890573263168335, "logits/rejected": -1.153979778289795, "logps/chosen": -0.6707313060760498, "logps/rejected": -1.8260713815689087, "loss": 0.9864, "nll_loss": 0.9304918646812439, "rewards/accuracies": 0.875, "rewards/chosen": -0.06707313656806946, "rewards/margins": 0.11553400754928589, "rewards/rejected": -0.18260714411735535, "step": 5179 }, { "epoch": 3.1599816989476897, "grad_norm": 6.934014797210693, "learning_rate": 2.9540722596448255e-06, "log_odds_chosen": 1.7766728401184082, "log_odds_ratio": -0.324056476354599, "logits/chosen": -0.6646411418914795, "logits/rejected": -0.9657532572746277, "logps/chosen": -0.7509655952453613, "logps/rejected": -2.0591466426849365, "loss": 1.0358, "nll_loss": 0.8586969375610352, "rewards/accuracies": 0.875, "rewards/chosen": -0.07509656250476837, "rewards/margins": 0.13081809878349304, "rewards/rejected": -0.2059146761894226, "step": 5180 }, { "epoch": 3.1605917340247065, "grad_norm": 1.726792573928833, "learning_rate": 2.9530924678505816e-06, "log_odds_chosen": 3.9430744647979736, "log_odds_ratio": -0.11477351933717728, "logits/chosen": -0.795642614364624, "logits/rejected": -0.8649086952209473, "logps/chosen": -0.6902153491973877, "logps/rejected": -3.816227436065674, "loss": 0.971, "nll_loss": 0.9538556337356567, "rewards/accuracies": 0.875, "rewards/chosen": -0.0690215453505516, "rewards/margins": 0.3126012086868286, "rewards/rejected": -0.3816227614879608, "step": 5181 }, { "epoch": 3.1612017691017233, "grad_norm": 1.295377254486084, "learning_rate": 2.9521126760563378e-06, "log_odds_chosen": 0.7895658016204834, "log_odds_ratio": -0.7649874091148376, "logits/chosen": -0.8185021281242371, "logits/rejected": -0.8767770528793335, "logps/chosen": -0.9497706294059753, "logps/rejected": -1.7120132446289062, "loss": 1.0465, "nll_loss": 1.0768177509307861, "rewards/accuracies": 0.5, "rewards/chosen": -0.09497706592082977, "rewards/margins": 0.07622426003217697, "rewards/rejected": -0.17120131850242615, "step": 5182 }, { "epoch": 3.16181180417874, "grad_norm": 2.447467803955078, "learning_rate": 2.9511328842620943e-06, "log_odds_chosen": 3.0387933254241943, "log_odds_ratio": -0.285677969455719, "logits/chosen": -0.7870697975158691, "logits/rejected": -1.0081524848937988, "logps/chosen": -1.0714695453643799, "logps/rejected": -3.643679618835449, "loss": 1.152, "nll_loss": 1.1060681343078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.10714695602655411, "rewards/margins": 0.2572210431098938, "rewards/rejected": -0.3643679916858673, "step": 5183 }, { "epoch": 3.162421839255757, "grad_norm": 1.3779661655426025, "learning_rate": 2.9501530924678504e-06, "log_odds_chosen": 1.6451740264892578, "log_odds_ratio": -0.5850219130516052, "logits/chosen": -0.9973316788673401, "logits/rejected": -0.9446406364440918, "logps/chosen": -1.1204766035079956, "logps/rejected": -2.536837577819824, "loss": 1.1486, "nll_loss": 1.3495877981185913, "rewards/accuracies": 0.625, "rewards/chosen": -0.11204766482114792, "rewards/margins": 0.14163607358932495, "rewards/rejected": -0.25368374586105347, "step": 5184 }, { "epoch": 3.1630318743327743, "grad_norm": 2.19781494140625, "learning_rate": 2.949173300673607e-06, "log_odds_chosen": 3.878169298171997, "log_odds_ratio": -0.17961539328098297, "logits/chosen": -0.8530905842781067, "logits/rejected": -1.0230553150177002, "logps/chosen": -0.6583949327468872, "logps/rejected": -3.86102557182312, "loss": 1.1393, "nll_loss": 0.9288949966430664, "rewards/accuracies": 1.0, "rewards/chosen": -0.0658394992351532, "rewards/margins": 0.3202630877494812, "rewards/rejected": -0.3861025869846344, "step": 5185 }, { "epoch": 3.163641909409791, "grad_norm": 2.056602954864502, "learning_rate": 2.9481935088793627e-06, "log_odds_chosen": 3.052605628967285, "log_odds_ratio": -0.22282984852790833, "logits/chosen": -0.8228010535240173, "logits/rejected": -0.9272502660751343, "logps/chosen": -0.5172231197357178, "logps/rejected": -2.8792648315429688, "loss": 1.0596, "nll_loss": 0.8728957176208496, "rewards/accuracies": 1.0, "rewards/chosen": -0.051722317934036255, "rewards/margins": 0.23620416224002838, "rewards/rejected": -0.28792649507522583, "step": 5186 }, { "epoch": 3.164251944486808, "grad_norm": 4.044860363006592, "learning_rate": 2.947213717085119e-06, "log_odds_chosen": 0.7686893939971924, "log_odds_ratio": -0.6352999806404114, "logits/chosen": -0.5508925318717957, "logits/rejected": -0.5329676270484924, "logps/chosen": -0.6309259533882141, "logps/rejected": -1.0958046913146973, "loss": 1.0741, "nll_loss": 0.8583884239196777, "rewards/accuracies": 0.75, "rewards/chosen": -0.06309259682893753, "rewards/margins": 0.046487875282764435, "rewards/rejected": -0.10958047211170197, "step": 5187 }, { "epoch": 3.164861979563825, "grad_norm": 1.3028416633605957, "learning_rate": 2.9462339252908758e-06, "log_odds_chosen": 1.8700740337371826, "log_odds_ratio": -0.3725700378417969, "logits/chosen": -0.8007323145866394, "logits/rejected": -0.9886246919631958, "logps/chosen": -0.9485933780670166, "logps/rejected": -2.380009651184082, "loss": 1.0004, "nll_loss": 1.0298662185668945, "rewards/accuracies": 0.75, "rewards/chosen": -0.09485934674739838, "rewards/margins": 0.14314161241054535, "rewards/rejected": -0.23800095915794373, "step": 5188 }, { "epoch": 3.1654720146408417, "grad_norm": 4.784588813781738, "learning_rate": 2.9452541334966315e-06, "log_odds_chosen": 3.197162628173828, "log_odds_ratio": -0.4119582772254944, "logits/chosen": -0.9441164135932922, "logits/rejected": -1.027252435684204, "logps/chosen": -0.6719658374786377, "logps/rejected": -3.4163875579833984, "loss": 1.1396, "nll_loss": 0.9003635048866272, "rewards/accuracies": 0.625, "rewards/chosen": -0.06719658523797989, "rewards/margins": 0.274442195892334, "rewards/rejected": -0.34163880348205566, "step": 5189 }, { "epoch": 3.1660820497178586, "grad_norm": 1.612433671951294, "learning_rate": 2.944274341702388e-06, "log_odds_chosen": 2.5782713890075684, "log_odds_ratio": -0.3298890292644501, "logits/chosen": -0.9462325572967529, "logits/rejected": -0.8537013530731201, "logps/chosen": -0.700709879398346, "logps/rejected": -2.9024505615234375, "loss": 1.0206, "nll_loss": 0.907866358757019, "rewards/accuracies": 0.875, "rewards/chosen": -0.07007098942995071, "rewards/margins": 0.22017410397529602, "rewards/rejected": -0.29024508595466614, "step": 5190 }, { "epoch": 3.166692084794876, "grad_norm": 1.8326339721679688, "learning_rate": 2.9432945499081445e-06, "log_odds_chosen": 1.5378201007843018, "log_odds_ratio": -0.37205931544303894, "logits/chosen": -0.7775801420211792, "logits/rejected": -0.8392303586006165, "logps/chosen": -0.7589441537857056, "logps/rejected": -1.7753263711929321, "loss": 0.9766, "nll_loss": 0.881985068321228, "rewards/accuracies": 0.875, "rewards/chosen": -0.07589442282915115, "rewards/margins": 0.10163822025060654, "rewards/rejected": -0.1775326430797577, "step": 5191 }, { "epoch": 3.1673021198718927, "grad_norm": 1.8625397682189941, "learning_rate": 2.9423147581139007e-06, "log_odds_chosen": 2.7775967121124268, "log_odds_ratio": -0.6645247936248779, "logits/chosen": -0.7890557646751404, "logits/rejected": -0.9405907392501831, "logps/chosen": -0.8545684814453125, "logps/rejected": -3.1405394077301025, "loss": 0.9892, "nll_loss": 0.8428565263748169, "rewards/accuracies": 0.5, "rewards/chosen": -0.08545684814453125, "rewards/margins": 0.22859710454940796, "rewards/rejected": -0.3140539526939392, "step": 5192 }, { "epoch": 3.1679121549489095, "grad_norm": 1.8200862407684326, "learning_rate": 2.941334966319657e-06, "log_odds_chosen": 2.459076404571533, "log_odds_ratio": -0.3858538269996643, "logits/chosen": -0.9763649702072144, "logits/rejected": -0.9472166299819946, "logps/chosen": -0.8206873536109924, "logps/rejected": -2.857325553894043, "loss": 1.1716, "nll_loss": 1.108366847038269, "rewards/accuracies": 0.75, "rewards/chosen": -0.08206874132156372, "rewards/margins": 0.20366384088993073, "rewards/rejected": -0.28573256731033325, "step": 5193 }, { "epoch": 3.1685221900259264, "grad_norm": 1.6347362995147705, "learning_rate": 2.9403551745254133e-06, "log_odds_chosen": 3.0898094177246094, "log_odds_ratio": -0.1309632807970047, "logits/chosen": -0.6635905504226685, "logits/rejected": -0.847059428691864, "logps/chosen": -0.7242697477340698, "logps/rejected": -3.134949207305908, "loss": 0.9264, "nll_loss": 0.7933440208435059, "rewards/accuracies": 1.0, "rewards/chosen": -0.07242697477340698, "rewards/margins": 0.24106797575950623, "rewards/rejected": -0.3134949505329132, "step": 5194 }, { "epoch": 3.1691322251029432, "grad_norm": 1.4249751567840576, "learning_rate": 2.9393753827311695e-06, "log_odds_chosen": 1.8619003295898438, "log_odds_ratio": -0.19737237691879272, "logits/chosen": -0.932268500328064, "logits/rejected": -0.9661437273025513, "logps/chosen": -0.7226811647415161, "logps/rejected": -2.01104998588562, "loss": 1.0672, "nll_loss": 0.9410737752914429, "rewards/accuracies": 1.0, "rewards/chosen": -0.07226811349391937, "rewards/margins": 0.12883688509464264, "rewards/rejected": -0.201104998588562, "step": 5195 }, { "epoch": 3.1697422601799605, "grad_norm": 2.915233850479126, "learning_rate": 2.938395590936926e-06, "log_odds_chosen": 1.347141981124878, "log_odds_ratio": -0.4112619161605835, "logits/chosen": -0.865487813949585, "logits/rejected": -0.8790403604507446, "logps/chosen": -0.7843362092971802, "logps/rejected": -1.7629280090332031, "loss": 1.108, "nll_loss": 1.0433841943740845, "rewards/accuracies": 0.75, "rewards/chosen": -0.07843361794948578, "rewards/margins": 0.09785918891429901, "rewards/rejected": -0.1762928068637848, "step": 5196 }, { "epoch": 3.1703522952569774, "grad_norm": 1.1891459226608276, "learning_rate": 2.937415799142682e-06, "log_odds_chosen": 2.0866634845733643, "log_odds_ratio": -0.5987263321876526, "logits/chosen": -0.8952418565750122, "logits/rejected": -0.8965234160423279, "logps/chosen": -0.9042173624038696, "logps/rejected": -2.625638008117676, "loss": 1.061, "nll_loss": 0.9969745874404907, "rewards/accuracies": 0.625, "rewards/chosen": -0.09042174369096756, "rewards/margins": 0.17214205861091614, "rewards/rejected": -0.2625637948513031, "step": 5197 }, { "epoch": 3.170962330333994, "grad_norm": 11.61276912689209, "learning_rate": 2.9364360073484382e-06, "log_odds_chosen": 2.6900105476379395, "log_odds_ratio": -0.2809433043003082, "logits/chosen": -0.9957898259162903, "logits/rejected": -1.1284127235412598, "logps/chosen": -0.8678994178771973, "logps/rejected": -3.1003596782684326, "loss": 1.0318, "nll_loss": 1.0999529361724854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08678993582725525, "rewards/margins": 0.2232460379600525, "rewards/rejected": -0.3100360035896301, "step": 5198 }, { "epoch": 3.171572365411011, "grad_norm": 3.6816532611846924, "learning_rate": 2.935456215554195e-06, "log_odds_chosen": 1.563831090927124, "log_odds_ratio": -0.3785034418106079, "logits/chosen": -0.8952064514160156, "logits/rejected": -0.9260364770889282, "logps/chosen": -0.8257867097854614, "logps/rejected": -1.9387528896331787, "loss": 1.0995, "nll_loss": 1.2530139684677124, "rewards/accuracies": 0.75, "rewards/chosen": -0.08257866650819778, "rewards/margins": 0.1112966388463974, "rewards/rejected": -0.1938752979040146, "step": 5199 }, { "epoch": 3.172182400488028, "grad_norm": 1.0690315961837769, "learning_rate": 2.934476423759951e-06, "log_odds_chosen": 1.353041410446167, "log_odds_ratio": -0.525737464427948, "logits/chosen": -0.6832368969917297, "logits/rejected": -0.7590909004211426, "logps/chosen": -0.6225607395172119, "logps/rejected": -1.602957010269165, "loss": 0.9074, "nll_loss": 0.7496194243431091, "rewards/accuracies": 0.625, "rewards/chosen": -0.06225607544183731, "rewards/margins": 0.0980396419763565, "rewards/rejected": -0.16029572486877441, "step": 5200 }, { "epoch": 3.1727924355650448, "grad_norm": 3.2767796516418457, "learning_rate": 2.933496631965707e-06, "log_odds_chosen": 2.92144775390625, "log_odds_ratio": -0.31198567152023315, "logits/chosen": -0.8064612150192261, "logits/rejected": -0.9510201811790466, "logps/chosen": -0.5611656308174133, "logps/rejected": -2.8093512058258057, "loss": 0.9773, "nll_loss": 0.730144739151001, "rewards/accuracies": 0.875, "rewards/chosen": -0.056116558611392975, "rewards/margins": 0.22481858730316162, "rewards/rejected": -0.2809351086616516, "step": 5201 }, { "epoch": 3.173402470642062, "grad_norm": 4.167745113372803, "learning_rate": 2.9325168401714636e-06, "log_odds_chosen": 1.641371726989746, "log_odds_ratio": -0.26926806569099426, "logits/chosen": -0.9190576672554016, "logits/rejected": -0.9410756826400757, "logps/chosen": -0.8016030788421631, "logps/rejected": -1.9618620872497559, "loss": 0.9556, "nll_loss": 1.0180151462554932, "rewards/accuracies": 1.0, "rewards/chosen": -0.08016031235456467, "rewards/margins": 0.1160259023308754, "rewards/rejected": -0.19618619978427887, "step": 5202 }, { "epoch": 3.174012505719079, "grad_norm": 1.7402113676071167, "learning_rate": 2.9315370483772193e-06, "log_odds_chosen": 0.43734222650527954, "log_odds_ratio": -0.6666383743286133, "logits/chosen": -0.9506077766418457, "logits/rejected": -0.9421715140342712, "logps/chosen": -1.1620172262191772, "logps/rejected": -1.561396837234497, "loss": 1.1131, "nll_loss": 1.3032392263412476, "rewards/accuracies": 0.5, "rewards/chosen": -0.1162017285823822, "rewards/margins": 0.039937958121299744, "rewards/rejected": -0.15613970160484314, "step": 5203 }, { "epoch": 3.1746225407960957, "grad_norm": 1.249146819114685, "learning_rate": 2.930557256582976e-06, "log_odds_chosen": 2.9442412853240967, "log_odds_ratio": -0.281561940908432, "logits/chosen": -0.8603087663650513, "logits/rejected": -0.9180976152420044, "logps/chosen": -0.7826751470565796, "logps/rejected": -3.3180971145629883, "loss": 1.1015, "nll_loss": 0.8706911206245422, "rewards/accuracies": 0.875, "rewards/chosen": -0.07826751470565796, "rewards/margins": 0.2535421550273895, "rewards/rejected": -0.3318096697330475, "step": 5204 }, { "epoch": 3.1752325758731126, "grad_norm": 2.839510202407837, "learning_rate": 2.9295774647887324e-06, "log_odds_chosen": 0.9912203550338745, "log_odds_ratio": -0.6951519846916199, "logits/chosen": -0.7196991443634033, "logits/rejected": -0.5919457674026489, "logps/chosen": -0.8151069283485413, "logps/rejected": -1.4949707984924316, "loss": 1.0272, "nll_loss": 1.0378352403640747, "rewards/accuracies": 0.75, "rewards/chosen": -0.08151069283485413, "rewards/margins": 0.0679863765835762, "rewards/rejected": -0.14949706196784973, "step": 5205 }, { "epoch": 3.1758426109501294, "grad_norm": 1.692530870437622, "learning_rate": 2.9285976729944885e-06, "log_odds_chosen": 1.5318984985351562, "log_odds_ratio": -0.39593473076820374, "logits/chosen": -0.808799147605896, "logits/rejected": -0.897742509841919, "logps/chosen": -0.8542280793190002, "logps/rejected": -1.9708151817321777, "loss": 1.1438, "nll_loss": 1.0828269720077515, "rewards/accuracies": 0.625, "rewards/chosen": -0.0854228138923645, "rewards/margins": 0.11165871471166611, "rewards/rejected": -0.19708152115345, "step": 5206 }, { "epoch": 3.1764526460271467, "grad_norm": 1.4305102825164795, "learning_rate": 2.9276178812002446e-06, "log_odds_chosen": 2.4508237838745117, "log_odds_ratio": -0.2988339066505432, "logits/chosen": -1.0805160999298096, "logits/rejected": -1.1367275714874268, "logps/chosen": -0.6681208610534668, "logps/rejected": -2.647207260131836, "loss": 1.0934, "nll_loss": 1.2443552017211914, "rewards/accuracies": 0.875, "rewards/chosen": -0.06681208312511444, "rewards/margins": 0.1979086697101593, "rewards/rejected": -0.26472073793411255, "step": 5207 }, { "epoch": 3.1770626811041636, "grad_norm": 3.1794395446777344, "learning_rate": 2.926638089406001e-06, "log_odds_chosen": 1.3045995235443115, "log_odds_ratio": -0.5015503764152527, "logits/chosen": -0.684162974357605, "logits/rejected": -0.6654871702194214, "logps/chosen": -0.7294676303863525, "logps/rejected": -1.617276906967163, "loss": 0.9348, "nll_loss": 0.9528622627258301, "rewards/accuracies": 0.75, "rewards/chosen": -0.07294676452875137, "rewards/margins": 0.08878093957901001, "rewards/rejected": -0.1617276966571808, "step": 5208 }, { "epoch": 3.1776727161811804, "grad_norm": 3.352339744567871, "learning_rate": 2.9256582976117573e-06, "log_odds_chosen": 1.378169059753418, "log_odds_ratio": -0.4998350739479065, "logits/chosen": -0.6151556372642517, "logits/rejected": -0.9213453531265259, "logps/chosen": -0.9075959324836731, "logps/rejected": -1.9582374095916748, "loss": 1.0698, "nll_loss": 1.2008248567581177, "rewards/accuracies": 0.75, "rewards/chosen": -0.09075959026813507, "rewards/margins": 0.10506413877010345, "rewards/rejected": -0.19582372903823853, "step": 5209 }, { "epoch": 3.1782827512581973, "grad_norm": 7.384481906890869, "learning_rate": 2.924678505817514e-06, "log_odds_chosen": 0.605492889881134, "log_odds_ratio": -0.46130600571632385, "logits/chosen": -0.7528156638145447, "logits/rejected": -0.8343275189399719, "logps/chosen": -0.7814923524856567, "logps/rejected": -1.1413424015045166, "loss": 0.9652, "nll_loss": 0.8893325924873352, "rewards/accuracies": 0.875, "rewards/chosen": -0.0781492367386818, "rewards/margins": 0.03598500043153763, "rewards/rejected": -0.11413423717021942, "step": 5210 }, { "epoch": 3.178892786335214, "grad_norm": 1.5001368522644043, "learning_rate": 2.92369871402327e-06, "log_odds_chosen": 1.6999315023422241, "log_odds_ratio": -0.49101537466049194, "logits/chosen": -0.7870901226997375, "logits/rejected": -0.8308137655258179, "logps/chosen": -0.7753726243972778, "logps/rejected": -2.0495786666870117, "loss": 0.8916, "nll_loss": 0.8875053524971008, "rewards/accuracies": 0.625, "rewards/chosen": -0.07753726094961166, "rewards/margins": 0.1274206042289734, "rewards/rejected": -0.20495787262916565, "step": 5211 }, { "epoch": 3.1795028214122314, "grad_norm": 2.229844570159912, "learning_rate": 2.922718922229026e-06, "log_odds_chosen": 1.2526214122772217, "log_odds_ratio": -0.4682758152484894, "logits/chosen": -0.6753073334693909, "logits/rejected": -0.8060885667800903, "logps/chosen": -0.8733488321304321, "logps/rejected": -1.7920012474060059, "loss": 1.0946, "nll_loss": 1.1132723093032837, "rewards/accuracies": 0.625, "rewards/chosen": -0.08733487874269485, "rewards/margins": 0.09186524897813797, "rewards/rejected": -0.17920014262199402, "step": 5212 }, { "epoch": 3.1801128564892482, "grad_norm": 4.366857051849365, "learning_rate": 2.9217391304347826e-06, "log_odds_chosen": 2.4080944061279297, "log_odds_ratio": -0.276418000459671, "logits/chosen": -0.7750339508056641, "logits/rejected": -0.907699704170227, "logps/chosen": -0.6403343081474304, "logps/rejected": -2.4825198650360107, "loss": 1.1035, "nll_loss": 0.9357646703720093, "rewards/accuracies": 0.875, "rewards/chosen": -0.06403343379497528, "rewards/margins": 0.18421857059001923, "rewards/rejected": -0.2482519894838333, "step": 5213 }, { "epoch": 3.180722891566265, "grad_norm": 6.6094584465026855, "learning_rate": 2.9207593386405387e-06, "log_odds_chosen": 0.6863172054290771, "log_odds_ratio": -0.5936153531074524, "logits/chosen": -1.0770174264907837, "logits/rejected": -1.0285673141479492, "logps/chosen": -1.0912940502166748, "logps/rejected": -1.6938838958740234, "loss": 1.1509, "nll_loss": 1.2679911851882935, "rewards/accuracies": 0.625, "rewards/chosen": -0.109129399061203, "rewards/margins": 0.06025898456573486, "rewards/rejected": -0.16938838362693787, "step": 5214 }, { "epoch": 3.181332926643282, "grad_norm": 10.91470718383789, "learning_rate": 2.919779546846295e-06, "log_odds_chosen": 0.4567202925682068, "log_odds_ratio": -0.5423440933227539, "logits/chosen": -1.1255923509597778, "logits/rejected": -1.0878766775131226, "logps/chosen": -1.1993248462677002, "logps/rejected": -1.5788438320159912, "loss": 1.0725, "nll_loss": 1.255323886871338, "rewards/accuracies": 0.75, "rewards/chosen": -0.11993248760700226, "rewards/margins": 0.03795189410448074, "rewards/rejected": -0.1578843891620636, "step": 5215 }, { "epoch": 3.181942961720299, "grad_norm": 1.4614735841751099, "learning_rate": 2.9187997550520514e-06, "log_odds_chosen": 0.45713695883750916, "log_odds_ratio": -0.5995869636535645, "logits/chosen": -1.1695961952209473, "logits/rejected": -1.0070171356201172, "logps/chosen": -0.9442415833473206, "logps/rejected": -1.154935598373413, "loss": 1.0234, "nll_loss": 1.187458872795105, "rewards/accuracies": 0.625, "rewards/chosen": -0.09442415833473206, "rewards/margins": 0.021069401875138283, "rewards/rejected": -0.11549355834722519, "step": 5216 }, { "epoch": 3.182552996797316, "grad_norm": 1.795011281967163, "learning_rate": 2.917819963257808e-06, "log_odds_chosen": 3.3191912174224854, "log_odds_ratio": -0.27324455976486206, "logits/chosen": -0.9315754175186157, "logits/rejected": -1.0549684762954712, "logps/chosen": -0.7989220023155212, "logps/rejected": -3.5235421657562256, "loss": 1.0367, "nll_loss": 1.0575724840164185, "rewards/accuracies": 0.875, "rewards/chosen": -0.07989220321178436, "rewards/margins": 0.27246204018592834, "rewards/rejected": -0.3523542582988739, "step": 5217 }, { "epoch": 3.183163031874333, "grad_norm": 2.267122745513916, "learning_rate": 2.9168401714635637e-06, "log_odds_chosen": 1.8159241676330566, "log_odds_ratio": -0.48042237758636475, "logits/chosen": -1.0173367261886597, "logits/rejected": -0.9101452827453613, "logps/chosen": -0.8631036281585693, "logps/rejected": -2.475277900695801, "loss": 1.062, "nll_loss": 1.0106531381607056, "rewards/accuracies": 0.875, "rewards/chosen": -0.08631037175655365, "rewards/margins": 0.16121745109558105, "rewards/rejected": -0.2475278228521347, "step": 5218 }, { "epoch": 3.1837730669513498, "grad_norm": 2.196580171585083, "learning_rate": 2.91586037966932e-06, "log_odds_chosen": 0.7426198720932007, "log_odds_ratio": -0.6027919054031372, "logits/chosen": -0.9502677917480469, "logits/rejected": -0.8882174491882324, "logps/chosen": -0.7022039294242859, "logps/rejected": -1.248051404953003, "loss": 1.0857, "nll_loss": 1.0828969478607178, "rewards/accuracies": 0.625, "rewards/chosen": -0.07022039592266083, "rewards/margins": 0.05458473786711693, "rewards/rejected": -0.12480513751506805, "step": 5219 }, { "epoch": 3.1843831020283666, "grad_norm": 3.683189868927002, "learning_rate": 2.9148805878750768e-06, "log_odds_chosen": 1.9958442449569702, "log_odds_ratio": -0.4017777144908905, "logits/chosen": -0.9125787019729614, "logits/rejected": -0.9313567876815796, "logps/chosen": -0.8117228746414185, "logps/rejected": -2.3759677410125732, "loss": 1.1794, "nll_loss": 1.0106544494628906, "rewards/accuracies": 0.625, "rewards/chosen": -0.08117229491472244, "rewards/margins": 0.15642449259757996, "rewards/rejected": -0.2375967800617218, "step": 5220 }, { "epoch": 3.1849931371053835, "grad_norm": 2.88515567779541, "learning_rate": 2.9139007960808324e-06, "log_odds_chosen": 1.6049323081970215, "log_odds_ratio": -0.7041085958480835, "logits/chosen": -1.0267572402954102, "logits/rejected": -0.9996914863586426, "logps/chosen": -0.9536518454551697, "logps/rejected": -2.170879364013672, "loss": 1.1035, "nll_loss": 1.2077256441116333, "rewards/accuracies": 0.375, "rewards/chosen": -0.09536518901586533, "rewards/margins": 0.1217227503657341, "rewards/rejected": -0.21708793938159943, "step": 5221 }, { "epoch": 3.1856031721824003, "grad_norm": 1.5442136526107788, "learning_rate": 2.912921004286589e-06, "log_odds_chosen": 2.233825206756592, "log_odds_ratio": -0.3089803457260132, "logits/chosen": -0.9533677101135254, "logits/rejected": -0.9207462072372437, "logps/chosen": -0.7634835243225098, "logps/rejected": -2.506472110748291, "loss": 1.0235, "nll_loss": 1.0337468385696411, "rewards/accuracies": 0.75, "rewards/chosen": -0.07634834945201874, "rewards/margins": 0.17429885268211365, "rewards/rejected": -0.2506472170352936, "step": 5222 }, { "epoch": 3.1862132072594176, "grad_norm": 1.5810943841934204, "learning_rate": 2.911941212492345e-06, "log_odds_chosen": 1.2874436378479004, "log_odds_ratio": -0.5382354259490967, "logits/chosen": -0.8750460147857666, "logits/rejected": -0.8078410625457764, "logps/chosen": -0.9775516986846924, "logps/rejected": -2.096951484680176, "loss": 1.1152, "nll_loss": 1.0157166719436646, "rewards/accuracies": 0.625, "rewards/chosen": -0.09775517135858536, "rewards/margins": 0.11193997412919998, "rewards/rejected": -0.20969516038894653, "step": 5223 }, { "epoch": 3.1868232423364344, "grad_norm": 1.3510926961898804, "learning_rate": 2.9109614206981017e-06, "log_odds_chosen": 1.6391204595565796, "log_odds_ratio": -0.5730462074279785, "logits/chosen": -0.8568528294563293, "logits/rejected": -0.8902758359909058, "logps/chosen": -0.8544948101043701, "logps/rejected": -2.25148606300354, "loss": 1.0792, "nll_loss": 0.9873340725898743, "rewards/accuracies": 0.625, "rewards/chosen": -0.0854494720697403, "rewards/margins": 0.13969914615154266, "rewards/rejected": -0.22514861822128296, "step": 5224 }, { "epoch": 3.1874332774134513, "grad_norm": 1.5330020189285278, "learning_rate": 2.9099816289038578e-06, "log_odds_chosen": 0.7051745653152466, "log_odds_ratio": -0.5302734971046448, "logits/chosen": -0.970415472984314, "logits/rejected": -0.9455622434616089, "logps/chosen": -0.915701150894165, "logps/rejected": -1.446242332458496, "loss": 0.9987, "nll_loss": 1.006824016571045, "rewards/accuracies": 0.75, "rewards/chosen": -0.09157012403011322, "rewards/margins": 0.05305413156747818, "rewards/rejected": -0.1446242481470108, "step": 5225 }, { "epoch": 3.188043312490468, "grad_norm": 8.226213455200195, "learning_rate": 2.909001837109614e-06, "log_odds_chosen": 2.2655370235443115, "log_odds_ratio": -0.22968600690364838, "logits/chosen": -0.6569182276725769, "logits/rejected": -0.8126925230026245, "logps/chosen": -0.6991700530052185, "logps/rejected": -2.36624813079834, "loss": 0.8985, "nll_loss": 0.8269941806793213, "rewards/accuracies": 0.875, "rewards/chosen": -0.06991700828075409, "rewards/margins": 0.1667078286409378, "rewards/rejected": -0.2366248220205307, "step": 5226 }, { "epoch": 3.188653347567485, "grad_norm": 1.3980712890625, "learning_rate": 2.9080220453153705e-06, "log_odds_chosen": 0.4755643308162689, "log_odds_ratio": -0.5301123261451721, "logits/chosen": -0.8126871585845947, "logits/rejected": -0.8306537866592407, "logps/chosen": -0.8270919919013977, "logps/rejected": -1.1098933219909668, "loss": 1.1338, "nll_loss": 1.0592975616455078, "rewards/accuracies": 0.75, "rewards/chosen": -0.08270920813083649, "rewards/margins": 0.028280135244131088, "rewards/rejected": -0.11098933964967728, "step": 5227 }, { "epoch": 3.1892633826445023, "grad_norm": 8.060858726501465, "learning_rate": 2.9070422535211266e-06, "log_odds_chosen": 0.32120347023010254, "log_odds_ratio": -0.7660413384437561, "logits/chosen": -1.1955392360687256, "logits/rejected": -1.1219546794891357, "logps/chosen": -1.0722275972366333, "logps/rejected": -1.4321883916854858, "loss": 1.1871, "nll_loss": 1.2005298137664795, "rewards/accuracies": 0.5, "rewards/chosen": -0.10722276568412781, "rewards/margins": 0.035996079444885254, "rewards/rejected": -0.14321884512901306, "step": 5228 }, { "epoch": 3.189873417721519, "grad_norm": 1.1092467308044434, "learning_rate": 2.9060624617268827e-06, "log_odds_chosen": 3.1173720359802246, "log_odds_ratio": -0.1287641078233719, "logits/chosen": -0.9235846996307373, "logits/rejected": -0.9008380174636841, "logps/chosen": -0.6088712215423584, "logps/rejected": -2.893939971923828, "loss": 1.0341, "nll_loss": 0.9509974718093872, "rewards/accuracies": 1.0, "rewards/chosen": -0.06088712811470032, "rewards/margins": 0.2285068929195404, "rewards/rejected": -0.2893940210342407, "step": 5229 }, { "epoch": 3.190483452798536, "grad_norm": 1.5913597345352173, "learning_rate": 2.9050826699326392e-06, "log_odds_chosen": 2.156235456466675, "log_odds_ratio": -0.3256828188896179, "logits/chosen": -0.8426254391670227, "logits/rejected": -0.9207778573036194, "logps/chosen": -0.624343752861023, "logps/rejected": -2.1128361225128174, "loss": 1.0343, "nll_loss": 1.1379752159118652, "rewards/accuracies": 0.75, "rewards/chosen": -0.06243437901139259, "rewards/margins": 0.1488492339849472, "rewards/rejected": -0.2112836241722107, "step": 5230 }, { "epoch": 3.191093487875553, "grad_norm": 6.2190961837768555, "learning_rate": 2.904102878138396e-06, "log_odds_chosen": 0.4550783038139343, "log_odds_ratio": -0.5952231884002686, "logits/chosen": -0.8044532537460327, "logits/rejected": -0.7302860617637634, "logps/chosen": -0.7571310997009277, "logps/rejected": -0.9736766815185547, "loss": 0.9749, "nll_loss": 0.9922508001327515, "rewards/accuracies": 0.5, "rewards/chosen": -0.07571311295032501, "rewards/margins": 0.021654557436704636, "rewards/rejected": -0.09736767411231995, "step": 5231 }, { "epoch": 3.1917035229525696, "grad_norm": 2.346400260925293, "learning_rate": 2.9031230863441515e-06, "log_odds_chosen": 2.3719332218170166, "log_odds_ratio": -0.4775121808052063, "logits/chosen": -0.8865454196929932, "logits/rejected": -0.9080864191055298, "logps/chosen": -0.9676631689071655, "logps/rejected": -2.9658150672912598, "loss": 1.0582, "nll_loss": 1.3077181577682495, "rewards/accuracies": 0.875, "rewards/chosen": -0.09676632285118103, "rewards/margins": 0.19981518387794495, "rewards/rejected": -0.29658153653144836, "step": 5232 }, { "epoch": 3.1923135580295865, "grad_norm": 1.4280827045440674, "learning_rate": 2.902143294549908e-06, "log_odds_chosen": 0.9667121171951294, "log_odds_ratio": -0.5732414722442627, "logits/chosen": -0.8862158060073853, "logits/rejected": -0.9108377695083618, "logps/chosen": -0.7923892736434937, "logps/rejected": -1.4776272773742676, "loss": 1.042, "nll_loss": 0.9537333250045776, "rewards/accuracies": 0.625, "rewards/chosen": -0.07923892885446548, "rewards/margins": 0.06852379441261292, "rewards/rejected": -0.1477627158164978, "step": 5233 }, { "epoch": 3.192923593106604, "grad_norm": 6.617705345153809, "learning_rate": 2.9011635027556646e-06, "log_odds_chosen": 2.198500156402588, "log_odds_ratio": -0.31199169158935547, "logits/chosen": -1.0181862115859985, "logits/rejected": -1.1064376831054688, "logps/chosen": -0.8938628435134888, "logps/rejected": -2.7171530723571777, "loss": 1.2115, "nll_loss": 1.3288383483886719, "rewards/accuracies": 1.0, "rewards/chosen": -0.08938628435134888, "rewards/margins": 0.18232902884483337, "rewards/rejected": -0.27171531319618225, "step": 5234 }, { "epoch": 3.1935336281836206, "grad_norm": 5.545228004455566, "learning_rate": 2.9001837109614203e-06, "log_odds_chosen": 2.3814425468444824, "log_odds_ratio": -0.48281580209732056, "logits/chosen": -0.7624762058258057, "logits/rejected": -0.8058243989944458, "logps/chosen": -0.7117631435394287, "logps/rejected": -2.7601518630981445, "loss": 1.0738, "nll_loss": 0.9455329179763794, "rewards/accuracies": 0.875, "rewards/chosen": -0.07117632031440735, "rewards/margins": 0.2048388570547104, "rewards/rejected": -0.27601516246795654, "step": 5235 }, { "epoch": 3.1941436632606375, "grad_norm": 1.8469873666763306, "learning_rate": 2.899203919167177e-06, "log_odds_chosen": 1.9316576719284058, "log_odds_ratio": -0.4601064622402191, "logits/chosen": -0.9811204671859741, "logits/rejected": -0.9204468727111816, "logps/chosen": -0.8884814381599426, "logps/rejected": -2.574584484100342, "loss": 1.2283, "nll_loss": 1.0687801837921143, "rewards/accuracies": 0.75, "rewards/chosen": -0.08884815126657486, "rewards/margins": 0.1686103194952011, "rewards/rejected": -0.2574584484100342, "step": 5236 }, { "epoch": 3.1947536983376543, "grad_norm": 2.615128517150879, "learning_rate": 2.8982241273729334e-06, "log_odds_chosen": 2.8004915714263916, "log_odds_ratio": -0.4670504927635193, "logits/chosen": -0.6911793947219849, "logits/rejected": -0.8462613821029663, "logps/chosen": -0.5067012310028076, "logps/rejected": -2.687887191772461, "loss": 0.9123, "nll_loss": 0.9250399470329285, "rewards/accuracies": 0.5, "rewards/chosen": -0.05067012831568718, "rewards/margins": 0.2181185781955719, "rewards/rejected": -0.2687886953353882, "step": 5237 }, { "epoch": 3.195363733414671, "grad_norm": 22.557594299316406, "learning_rate": 2.8972443355786895e-06, "log_odds_chosen": 1.772165298461914, "log_odds_ratio": -0.595710277557373, "logits/chosen": -1.0191444158554077, "logits/rejected": -1.101779580116272, "logps/chosen": -0.8866990208625793, "logps/rejected": -2.239328145980835, "loss": 1.1927, "nll_loss": 1.2687644958496094, "rewards/accuracies": 0.375, "rewards/chosen": -0.08866989612579346, "rewards/margins": 0.13526292145252228, "rewards/rejected": -0.22393281757831573, "step": 5238 }, { "epoch": 3.1959737684916885, "grad_norm": 2.1616415977478027, "learning_rate": 2.8962645437844456e-06, "log_odds_chosen": 0.25621289014816284, "log_odds_ratio": -0.6226025819778442, "logits/chosen": -0.7313964366912842, "logits/rejected": -0.6858241558074951, "logps/chosen": -0.967674195766449, "logps/rejected": -1.164510726928711, "loss": 1.112, "nll_loss": 1.0956764221191406, "rewards/accuracies": 0.75, "rewards/chosen": -0.09676742553710938, "rewards/margins": 0.01968364603817463, "rewards/rejected": -0.11645108461380005, "step": 5239 }, { "epoch": 3.1965838035687053, "grad_norm": 21.23512077331543, "learning_rate": 2.895284751990202e-06, "log_odds_chosen": 1.8896033763885498, "log_odds_ratio": -0.4904125928878784, "logits/chosen": -1.004183053970337, "logits/rejected": -1.125313401222229, "logps/chosen": -0.7724963426589966, "logps/rejected": -2.362489700317383, "loss": 1.0965, "nll_loss": 1.1953909397125244, "rewards/accuracies": 0.625, "rewards/chosen": -0.07724963873624802, "rewards/margins": 0.15899932384490967, "rewards/rejected": -0.2362489551305771, "step": 5240 }, { "epoch": 3.197193838645722, "grad_norm": 6.643962383270264, "learning_rate": 2.8943049601959583e-06, "log_odds_chosen": 1.9910094738006592, "log_odds_ratio": -0.4265705943107605, "logits/chosen": -0.8984790444374084, "logits/rejected": -0.9982408285140991, "logps/chosen": -0.9199107885360718, "logps/rejected": -2.539804458618164, "loss": 0.9942, "nll_loss": 1.1007664203643799, "rewards/accuracies": 0.75, "rewards/chosen": -0.09199108183383942, "rewards/margins": 0.16198936104774475, "rewards/rejected": -0.25398045778274536, "step": 5241 }, { "epoch": 3.197803873722739, "grad_norm": 5.365699291229248, "learning_rate": 2.8933251684017144e-06, "log_odds_chosen": 1.5322266817092896, "log_odds_ratio": -0.42792731523513794, "logits/chosen": -0.7698755860328674, "logits/rejected": -0.8028361797332764, "logps/chosen": -0.7742779850959778, "logps/rejected": -2.0177440643310547, "loss": 0.8845, "nll_loss": 0.8818178176879883, "rewards/accuracies": 0.625, "rewards/chosen": -0.07742779701948166, "rewards/margins": 0.12434662878513336, "rewards/rejected": -0.20177443325519562, "step": 5242 }, { "epoch": 3.198413908799756, "grad_norm": 1.7656042575836182, "learning_rate": 2.8923453766074705e-06, "log_odds_chosen": 1.369739294052124, "log_odds_ratio": -0.4998169243335724, "logits/chosen": -0.8355231285095215, "logits/rejected": -0.9928176403045654, "logps/chosen": -0.8940972685813904, "logps/rejected": -1.9201438426971436, "loss": 0.8952, "nll_loss": 1.1110458374023438, "rewards/accuracies": 0.625, "rewards/chosen": -0.0894097238779068, "rewards/margins": 0.10260464251041412, "rewards/rejected": -0.19201438128948212, "step": 5243 }, { "epoch": 3.1990239438767727, "grad_norm": 2.077130079269409, "learning_rate": 2.891365584813227e-06, "log_odds_chosen": 1.2710630893707275, "log_odds_ratio": -0.2956332266330719, "logits/chosen": -0.6704909801483154, "logits/rejected": -0.717126190662384, "logps/chosen": -0.8712713718414307, "logps/rejected": -1.6641700267791748, "loss": 0.8915, "nll_loss": 0.7970779538154602, "rewards/accuracies": 1.0, "rewards/chosen": -0.08712714165449142, "rewards/margins": 0.07928984612226486, "rewards/rejected": -0.1664169877767563, "step": 5244 }, { "epoch": 3.19963397895379, "grad_norm": 1.7797188758850098, "learning_rate": 2.8903857930189836e-06, "log_odds_chosen": 2.310431718826294, "log_odds_ratio": -0.2995638847351074, "logits/chosen": -0.7410991191864014, "logits/rejected": -0.8450039029121399, "logps/chosen": -0.7790893316268921, "logps/rejected": -2.5204358100891113, "loss": 0.918, "nll_loss": 0.8719139099121094, "rewards/accuracies": 0.75, "rewards/chosen": -0.0779089406132698, "rewards/margins": 0.17413467168807983, "rewards/rejected": -0.25204360485076904, "step": 5245 }, { "epoch": 3.200244014030807, "grad_norm": 1.4559298753738403, "learning_rate": 2.8894060012247393e-06, "log_odds_chosen": 0.4797735810279846, "log_odds_ratio": -0.7007105946540833, "logits/chosen": -0.7383304834365845, "logits/rejected": -0.7261844873428345, "logps/chosen": -0.746590256690979, "logps/rejected": -0.9970229268074036, "loss": 1.0725, "nll_loss": 1.0660463571548462, "rewards/accuracies": 0.625, "rewards/chosen": -0.07465902715921402, "rewards/margins": 0.025043271481990814, "rewards/rejected": -0.09970229864120483, "step": 5246 }, { "epoch": 3.2008540491078237, "grad_norm": 7.392157077789307, "learning_rate": 2.888426209430496e-06, "log_odds_chosen": 2.961517095565796, "log_odds_ratio": -0.3130077123641968, "logits/chosen": -0.7359114289283752, "logits/rejected": -0.7779026627540588, "logps/chosen": -0.7106184959411621, "logps/rejected": -3.113673448562622, "loss": 0.9287, "nll_loss": 1.0435665845870972, "rewards/accuracies": 0.875, "rewards/chosen": -0.07106184959411621, "rewards/margins": 0.24030549824237823, "rewards/rejected": -0.31136733293533325, "step": 5247 }, { "epoch": 3.2014640841848405, "grad_norm": 1.4748774766921997, "learning_rate": 2.8874464176362524e-06, "log_odds_chosen": 1.752246618270874, "log_odds_ratio": -0.3608344793319702, "logits/chosen": -0.7795906066894531, "logits/rejected": -0.9316256046295166, "logps/chosen": -0.7027473449707031, "logps/rejected": -2.0269532203674316, "loss": 0.9808, "nll_loss": 0.8929761052131653, "rewards/accuracies": 0.875, "rewards/chosen": -0.07027474045753479, "rewards/margins": 0.13242056965827942, "rewards/rejected": -0.2026953101158142, "step": 5248 }, { "epoch": 3.2020741192618574, "grad_norm": 10.824453353881836, "learning_rate": 2.886466625842008e-06, "log_odds_chosen": 0.9140655994415283, "log_odds_ratio": -0.4310217499732971, "logits/chosen": -1.0003668069839478, "logits/rejected": -0.9028078317642212, "logps/chosen": -0.8376797437667847, "logps/rejected": -1.2872731685638428, "loss": 1.0512, "nll_loss": 1.2117191553115845, "rewards/accuracies": 0.875, "rewards/chosen": -0.08376798033714294, "rewards/margins": 0.04495933651924133, "rewards/rejected": -0.12872733175754547, "step": 5249 }, { "epoch": 3.2026841543388747, "grad_norm": 1.933058738708496, "learning_rate": 2.8854868340477647e-06, "log_odds_chosen": 0.3771056532859802, "log_odds_ratio": -0.6222563982009888, "logits/chosen": -0.8935707807540894, "logits/rejected": -0.8966525197029114, "logps/chosen": -0.6898336410522461, "logps/rejected": -0.9545416831970215, "loss": 0.9526, "nll_loss": 1.0624839067459106, "rewards/accuracies": 0.625, "rewards/chosen": -0.06898336857557297, "rewards/margins": 0.02647080458700657, "rewards/rejected": -0.09545417129993439, "step": 5250 }, { "epoch": 3.2032941894158915, "grad_norm": 1.6338037252426147, "learning_rate": 2.884507042253521e-06, "log_odds_chosen": 1.556493878364563, "log_odds_ratio": -0.5009588003158569, "logits/chosen": -0.8638187646865845, "logits/rejected": -0.8970826864242554, "logps/chosen": -0.8499996662139893, "logps/rejected": -2.0109739303588867, "loss": 0.9799, "nll_loss": 1.0323753356933594, "rewards/accuracies": 0.75, "rewards/chosen": -0.08499995619058609, "rewards/margins": 0.11609743535518646, "rewards/rejected": -0.20109739899635315, "step": 5251 }, { "epoch": 3.2039042244929083, "grad_norm": 1.6940579414367676, "learning_rate": 2.8835272504592773e-06, "log_odds_chosen": 1.633563756942749, "log_odds_ratio": -0.42750802636146545, "logits/chosen": -0.7933313250541687, "logits/rejected": -1.0455459356307983, "logps/chosen": -0.7148865461349487, "logps/rejected": -1.9996966123580933, "loss": 1.0418, "nll_loss": 0.8650795221328735, "rewards/accuracies": 0.75, "rewards/chosen": -0.07148865610361099, "rewards/margins": 0.12848100066184998, "rewards/rejected": -0.19996966421604156, "step": 5252 }, { "epoch": 3.204514259569925, "grad_norm": 1.2493290901184082, "learning_rate": 2.8825474586650334e-06, "log_odds_chosen": 0.5357732176780701, "log_odds_ratio": -0.7023864984512329, "logits/chosen": -0.7813192009925842, "logits/rejected": -0.8161248564720154, "logps/chosen": -0.8868170976638794, "logps/rejected": -1.3461717367172241, "loss": 1.1893, "nll_loss": 1.0833040475845337, "rewards/accuracies": 0.5, "rewards/chosen": -0.08868170529603958, "rewards/margins": 0.04593547433614731, "rewards/rejected": -0.1346171796321869, "step": 5253 }, { "epoch": 3.205124294646942, "grad_norm": 1.6777284145355225, "learning_rate": 2.88156766687079e-06, "log_odds_chosen": 1.0777859687805176, "log_odds_ratio": -0.4417742192745209, "logits/chosen": -0.9266016483306885, "logits/rejected": -0.8744122982025146, "logps/chosen": -0.8386722803115845, "logps/rejected": -1.627533197402954, "loss": 0.9642, "nll_loss": 0.9773927330970764, "rewards/accuracies": 0.875, "rewards/chosen": -0.08386722952127457, "rewards/margins": 0.07888607680797577, "rewards/rejected": -0.16275331377983093, "step": 5254 }, { "epoch": 3.2057343297239593, "grad_norm": 1.702142596244812, "learning_rate": 2.880587875076546e-06, "log_odds_chosen": 1.0805318355560303, "log_odds_ratio": -0.6226728558540344, "logits/chosen": -0.7432928681373596, "logits/rejected": -0.7800390720367432, "logps/chosen": -0.8824272751808167, "logps/rejected": -1.832504153251648, "loss": 1.0351, "nll_loss": 1.0202720165252686, "rewards/accuracies": 0.5, "rewards/chosen": -0.08824272453784943, "rewards/margins": 0.09500770270824432, "rewards/rejected": -0.18325041234493256, "step": 5255 }, { "epoch": 3.206344364800976, "grad_norm": 1.239691138267517, "learning_rate": 2.8796080832823022e-06, "log_odds_chosen": 2.7321982383728027, "log_odds_ratio": -0.240232914686203, "logits/chosen": -0.9353814125061035, "logits/rejected": -0.9803382158279419, "logps/chosen": -0.645269513130188, "logps/rejected": -2.76873779296875, "loss": 1.0629, "nll_loss": 0.8650951385498047, "rewards/accuracies": 0.875, "rewards/chosen": -0.06452696025371552, "rewards/margins": 0.21234683692455292, "rewards/rejected": -0.27687379717826843, "step": 5256 }, { "epoch": 3.206954399877993, "grad_norm": 3.646544933319092, "learning_rate": 2.8786282914880588e-06, "log_odds_chosen": 1.5629602670669556, "log_odds_ratio": -0.4669512212276459, "logits/chosen": -0.8111485242843628, "logits/rejected": -0.9932855367660522, "logps/chosen": -0.8713724613189697, "logps/rejected": -2.1986594200134277, "loss": 1.1365, "nll_loss": 0.9916259050369263, "rewards/accuracies": 0.875, "rewards/chosen": -0.08713725209236145, "rewards/margins": 0.1327286809682846, "rewards/rejected": -0.21986593306064606, "step": 5257 }, { "epoch": 3.20756443495501, "grad_norm": 1.3724669218063354, "learning_rate": 2.877648499693815e-06, "log_odds_chosen": 3.987867832183838, "log_odds_ratio": -0.18594682216644287, "logits/chosen": -0.810981810092926, "logits/rejected": -0.9794423580169678, "logps/chosen": -0.6438318490982056, "logps/rejected": -3.9052977561950684, "loss": 1.022, "nll_loss": 0.964447021484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.06438319385051727, "rewards/margins": 0.32614657282829285, "rewards/rejected": -0.3905297517776489, "step": 5258 }, { "epoch": 3.2081744700320267, "grad_norm": 2.4589874744415283, "learning_rate": 2.8766687078995714e-06, "log_odds_chosen": 2.7079696655273438, "log_odds_ratio": -0.2523862421512604, "logits/chosen": -0.8163625597953796, "logits/rejected": -0.8587184548377991, "logps/chosen": -0.6047278642654419, "logps/rejected": -2.6325345039367676, "loss": 1.0689, "nll_loss": 0.8415941596031189, "rewards/accuracies": 0.875, "rewards/chosen": -0.06047278642654419, "rewards/margins": 0.20278066396713257, "rewards/rejected": -0.26325345039367676, "step": 5259 }, { "epoch": 3.208784505109044, "grad_norm": 2.222848415374756, "learning_rate": 2.875688916105327e-06, "log_odds_chosen": 0.23179280757904053, "log_odds_ratio": -0.7382699251174927, "logits/chosen": -0.9309415817260742, "logits/rejected": -0.8784521818161011, "logps/chosen": -0.8145459294319153, "logps/rejected": -0.9312921762466431, "loss": 1.0686, "nll_loss": 1.3848580121994019, "rewards/accuracies": 0.75, "rewards/chosen": -0.08145458996295929, "rewards/margins": 0.011674625799059868, "rewards/rejected": -0.0931292176246643, "step": 5260 }, { "epoch": 3.209394540186061, "grad_norm": 1.308384656906128, "learning_rate": 2.8747091243110837e-06, "log_odds_chosen": 1.3180034160614014, "log_odds_ratio": -0.3611070215702057, "logits/chosen": -0.8545817136764526, "logits/rejected": -0.9268274307250977, "logps/chosen": -0.6656770706176758, "logps/rejected": -1.5088074207305908, "loss": 1.1575, "nll_loss": 1.0510735511779785, "rewards/accuracies": 0.75, "rewards/chosen": -0.06656771153211594, "rewards/margins": 0.08431302756071091, "rewards/rejected": -0.15088073909282684, "step": 5261 }, { "epoch": 3.2100045752630777, "grad_norm": 5.188671112060547, "learning_rate": 2.8737293325168402e-06, "log_odds_chosen": 1.9398431777954102, "log_odds_ratio": -0.3811863660812378, "logits/chosen": -0.6336593627929688, "logits/rejected": -0.7958030700683594, "logps/chosen": -0.7066395282745361, "logps/rejected": -2.1885182857513428, "loss": 0.9579, "nll_loss": 0.8180146813392639, "rewards/accuracies": 0.75, "rewards/chosen": -0.0706639438867569, "rewards/margins": 0.14818786084651947, "rewards/rejected": -0.21885180473327637, "step": 5262 }, { "epoch": 3.2106146103400945, "grad_norm": 14.823465347290039, "learning_rate": 2.872749540722596e-06, "log_odds_chosen": 3.007903814315796, "log_odds_ratio": -0.2472265064716339, "logits/chosen": -0.5321751832962036, "logits/rejected": -0.7821246981620789, "logps/chosen": -0.6004180312156677, "logps/rejected": -2.908878803253174, "loss": 0.9102, "nll_loss": 0.7416083812713623, "rewards/accuracies": 0.875, "rewards/chosen": -0.06004180386662483, "rewards/margins": 0.2308460772037506, "rewards/rejected": -0.29088789224624634, "step": 5263 }, { "epoch": 3.2112246454171114, "grad_norm": 6.120779991149902, "learning_rate": 2.8717697489283525e-06, "log_odds_chosen": 1.154792070388794, "log_odds_ratio": -0.49653276801109314, "logits/chosen": -1.161039113998413, "logits/rejected": -1.0701408386230469, "logps/chosen": -0.9970839023590088, "logps/rejected": -1.9857226610183716, "loss": 1.0514, "nll_loss": 1.1873785257339478, "rewards/accuracies": 0.625, "rewards/chosen": -0.09970838576555252, "rewards/margins": 0.0988638773560524, "rewards/rejected": -0.1985722780227661, "step": 5264 }, { "epoch": 3.2118346804941282, "grad_norm": 3.0719950199127197, "learning_rate": 2.870789957134109e-06, "log_odds_chosen": -0.10735541582107544, "log_odds_ratio": -0.9013897180557251, "logits/chosen": -1.03466796875, "logits/rejected": -0.9202397465705872, "logps/chosen": -1.1495094299316406, "logps/rejected": -1.0678470134735107, "loss": 1.245, "nll_loss": 1.205047369003296, "rewards/accuracies": 0.375, "rewards/chosen": -0.11495094001293182, "rewards/margins": -0.0081662368029356, "rewards/rejected": -0.10678471624851227, "step": 5265 }, { "epoch": 3.2124447155711455, "grad_norm": 1.2504369020462036, "learning_rate": 2.869810165339865e-06, "log_odds_chosen": 1.6281646490097046, "log_odds_ratio": -0.40722566843032837, "logits/chosen": -0.7103508114814758, "logits/rejected": -0.7928757071495056, "logps/chosen": -0.5587190985679626, "logps/rejected": -1.5274643898010254, "loss": 1.0509, "nll_loss": 0.9334213137626648, "rewards/accuracies": 0.875, "rewards/chosen": -0.05587191507220268, "rewards/margins": 0.09687453508377075, "rewards/rejected": -0.15274643898010254, "step": 5266 }, { "epoch": 3.2130547506481624, "grad_norm": 1.854434609413147, "learning_rate": 2.8688303735456213e-06, "log_odds_chosen": 0.6942054629325867, "log_odds_ratio": -0.6313186883926392, "logits/chosen": -0.9770596623420715, "logits/rejected": -0.964213490486145, "logps/chosen": -0.6844717264175415, "logps/rejected": -1.2172350883483887, "loss": 1.1105, "nll_loss": 0.947515070438385, "rewards/accuracies": 0.5, "rewards/chosen": -0.06844717264175415, "rewards/margins": 0.053276341408491135, "rewards/rejected": -0.12172351032495499, "step": 5267 }, { "epoch": 3.213664785725179, "grad_norm": 1.633907437324524, "learning_rate": 2.867850581751378e-06, "log_odds_chosen": 2.0882408618927, "log_odds_ratio": -0.2972721457481384, "logits/chosen": -0.9204705953598022, "logits/rejected": -0.9356672763824463, "logps/chosen": -0.8120228052139282, "logps/rejected": -2.5240583419799805, "loss": 1.0104, "nll_loss": 1.0369209051132202, "rewards/accuracies": 0.75, "rewards/chosen": -0.08120228350162506, "rewards/margins": 0.17120355367660522, "rewards/rejected": -0.2524058222770691, "step": 5268 }, { "epoch": 3.214274820802196, "grad_norm": 3.3247950077056885, "learning_rate": 2.866870789957134e-06, "log_odds_chosen": 0.9406732320785522, "log_odds_ratio": -0.4798242449760437, "logits/chosen": -0.798214316368103, "logits/rejected": -1.022504210472107, "logps/chosen": -0.7646045684814453, "logps/rejected": -1.3657104969024658, "loss": 1.0583, "nll_loss": 0.9029687643051147, "rewards/accuracies": 0.75, "rewards/chosen": -0.07646046578884125, "rewards/margins": 0.06011059135198593, "rewards/rejected": -0.13657104969024658, "step": 5269 }, { "epoch": 3.214884855879213, "grad_norm": 1.0790597200393677, "learning_rate": 2.86589099816289e-06, "log_odds_chosen": 1.220888376235962, "log_odds_ratio": -0.4247463047504425, "logits/chosen": -0.6693600416183472, "logits/rejected": -0.8285657167434692, "logps/chosen": -0.6665696501731873, "logps/rejected": -1.453216791152954, "loss": 0.8959, "nll_loss": 0.8733562231063843, "rewards/accuracies": 0.875, "rewards/chosen": -0.06665696203708649, "rewards/margins": 0.07866472005844116, "rewards/rejected": -0.14532168209552765, "step": 5270 }, { "epoch": 3.21549489095623, "grad_norm": 1.4011189937591553, "learning_rate": 2.8649112063686466e-06, "log_odds_chosen": 0.7447492480278015, "log_odds_ratio": -0.5392171144485474, "logits/chosen": -0.793160617351532, "logits/rejected": -0.8362297415733337, "logps/chosen": -0.8548462986946106, "logps/rejected": -1.4182353019714355, "loss": 0.8995, "nll_loss": 0.9193792939186096, "rewards/accuracies": 0.625, "rewards/chosen": -0.08548463135957718, "rewards/margins": 0.05633889511227608, "rewards/rejected": -0.14182353019714355, "step": 5271 }, { "epoch": 3.216104926033247, "grad_norm": 13.896239280700684, "learning_rate": 2.8639314145744027e-06, "log_odds_chosen": 0.5273005366325378, "log_odds_ratio": -0.511911153793335, "logits/chosen": -0.8513701558113098, "logits/rejected": -0.8990094661712646, "logps/chosen": -0.8380562663078308, "logps/rejected": -1.2080248594284058, "loss": 1.0994, "nll_loss": 1.1804763078689575, "rewards/accuracies": 0.875, "rewards/chosen": -0.0838056281208992, "rewards/margins": 0.03699686378240585, "rewards/rejected": -0.12080249190330505, "step": 5272 }, { "epoch": 3.216714961110264, "grad_norm": 1.3853089809417725, "learning_rate": 2.8629516227801593e-06, "log_odds_chosen": 3.18215274810791, "log_odds_ratio": -0.22644853591918945, "logits/chosen": -0.7330713272094727, "logits/rejected": -0.9163022637367249, "logps/chosen": -0.681307315826416, "logps/rejected": -3.184420347213745, "loss": 0.9881, "nll_loss": 0.8358750343322754, "rewards/accuracies": 0.75, "rewards/chosen": -0.0681307315826416, "rewards/margins": 0.25031131505966187, "rewards/rejected": -0.31844204664230347, "step": 5273 }, { "epoch": 3.2173249961872807, "grad_norm": 16.10178565979004, "learning_rate": 2.8619718309859154e-06, "log_odds_chosen": 0.5064001083374023, "log_odds_ratio": -0.7337918281555176, "logits/chosen": -0.8405877351760864, "logits/rejected": -0.8403795957565308, "logps/chosen": -1.0142828226089478, "logps/rejected": -1.4221924543380737, "loss": 1.03, "nll_loss": 1.2487618923187256, "rewards/accuracies": 0.5, "rewards/chosen": -0.10142829269170761, "rewards/margins": 0.040790949016809464, "rewards/rejected": -0.14221923053264618, "step": 5274 }, { "epoch": 3.2179350312642976, "grad_norm": 1.5385639667510986, "learning_rate": 2.8609920391916715e-06, "log_odds_chosen": 1.9067528247833252, "log_odds_ratio": -0.40534284710884094, "logits/chosen": -0.5590643882751465, "logits/rejected": -0.712341845035553, "logps/chosen": -0.6408448815345764, "logps/rejected": -2.158597469329834, "loss": 0.9977, "nll_loss": 0.7476738095283508, "rewards/accuracies": 0.75, "rewards/chosen": -0.0640844851732254, "rewards/margins": 0.1517752707004547, "rewards/rejected": -0.21585975587368011, "step": 5275 }, { "epoch": 3.2185450663413144, "grad_norm": 2.2073416709899902, "learning_rate": 2.860012247397428e-06, "log_odds_chosen": 0.9774666428565979, "log_odds_ratio": -0.5228627920150757, "logits/chosen": -0.8056591749191284, "logits/rejected": -0.8410103917121887, "logps/chosen": -0.7190601825714111, "logps/rejected": -1.4870284795761108, "loss": 1.1531, "nll_loss": 0.8469510078430176, "rewards/accuracies": 0.5, "rewards/chosen": -0.07190601527690887, "rewards/margins": 0.07679682970046997, "rewards/rejected": -0.14870285987854004, "step": 5276 }, { "epoch": 3.2191551014183317, "grad_norm": 1.780898928642273, "learning_rate": 2.859032455603184e-06, "log_odds_chosen": 2.2716224193573, "log_odds_ratio": -0.429294615983963, "logits/chosen": -0.7637495398521423, "logits/rejected": -0.9348592758178711, "logps/chosen": -0.8655236959457397, "logps/rejected": -2.753016948699951, "loss": 1.1076, "nll_loss": 1.1215627193450928, "rewards/accuracies": 0.75, "rewards/chosen": -0.08655236661434174, "rewards/margins": 0.18874934315681458, "rewards/rejected": -0.2753017246723175, "step": 5277 }, { "epoch": 3.2197651364953486, "grad_norm": 3.3042569160461426, "learning_rate": 2.8580526638089403e-06, "log_odds_chosen": 1.2746983766555786, "log_odds_ratio": -0.5385913252830505, "logits/chosen": -0.9516963362693787, "logits/rejected": -0.8690142035484314, "logps/chosen": -1.0556795597076416, "logps/rejected": -2.18912672996521, "loss": 1.132, "nll_loss": 1.2189642190933228, "rewards/accuracies": 0.5, "rewards/chosen": -0.10556794703006744, "rewards/margins": 0.1133447140455246, "rewards/rejected": -0.21891267597675323, "step": 5278 }, { "epoch": 3.2203751715723654, "grad_norm": 2.082143545150757, "learning_rate": 2.857072872014697e-06, "log_odds_chosen": 1.4874331951141357, "log_odds_ratio": -0.6700258255004883, "logits/chosen": -0.8133066892623901, "logits/rejected": -0.9981197118759155, "logps/chosen": -0.8072245717048645, "logps/rejected": -1.9145290851593018, "loss": 1.2326, "nll_loss": 0.9547088742256165, "rewards/accuracies": 0.625, "rewards/chosen": -0.08072245866060257, "rewards/margins": 0.11073045432567596, "rewards/rejected": -0.19145290553569794, "step": 5279 }, { "epoch": 3.2209852066493823, "grad_norm": 1.522862434387207, "learning_rate": 2.856093080220453e-06, "log_odds_chosen": 1.6772626638412476, "log_odds_ratio": -0.4953421354293823, "logits/chosen": -0.9781115055084229, "logits/rejected": -1.0193090438842773, "logps/chosen": -0.9824774265289307, "logps/rejected": -2.2785863876342773, "loss": 1.1908, "nll_loss": 1.2646548748016357, "rewards/accuracies": 0.625, "rewards/chosen": -0.09824774414300919, "rewards/margins": 0.12961089611053467, "rewards/rejected": -0.22785863280296326, "step": 5280 }, { "epoch": 3.221595241726399, "grad_norm": 1.2369002103805542, "learning_rate": 2.855113288426209e-06, "log_odds_chosen": 1.0976344347000122, "log_odds_ratio": -0.3261021673679352, "logits/chosen": -0.9127238988876343, "logits/rejected": -1.0033084154129028, "logps/chosen": -0.7971272468566895, "logps/rejected": -1.5214033126831055, "loss": 1.119, "nll_loss": 1.1963996887207031, "rewards/accuracies": 1.0, "rewards/chosen": -0.07971271872520447, "rewards/margins": 0.07242761552333832, "rewards/rejected": -0.15214034914970398, "step": 5281 }, { "epoch": 3.2222052768034164, "grad_norm": 1.7913429737091064, "learning_rate": 2.8541334966319656e-06, "log_odds_chosen": 3.059762477874756, "log_odds_ratio": -0.18018031120300293, "logits/chosen": -0.6949053406715393, "logits/rejected": -0.7787365913391113, "logps/chosen": -0.6011068820953369, "logps/rejected": -2.949911594390869, "loss": 1.0024, "nll_loss": 0.8707407712936401, "rewards/accuracies": 0.875, "rewards/chosen": -0.06011068820953369, "rewards/margins": 0.2348804920911789, "rewards/rejected": -0.2949911952018738, "step": 5282 }, { "epoch": 3.2228153118804332, "grad_norm": 1.8542057275772095, "learning_rate": 2.8531537048377218e-06, "log_odds_chosen": 1.245503544807434, "log_odds_ratio": -0.40630000829696655, "logits/chosen": -0.8853281736373901, "logits/rejected": -0.9065679311752319, "logps/chosen": -0.8896346092224121, "logps/rejected": -1.6613261699676514, "loss": 1.2085, "nll_loss": 1.3226253986358643, "rewards/accuracies": 0.75, "rewards/chosen": -0.08896345645189285, "rewards/margins": 0.07716915011405945, "rewards/rejected": -0.1661326140165329, "step": 5283 }, { "epoch": 3.22342534695745, "grad_norm": 1.5749174356460571, "learning_rate": 2.852173913043478e-06, "log_odds_chosen": 3.38679838180542, "log_odds_ratio": -0.12269540131092072, "logits/chosen": -0.5131347179412842, "logits/rejected": -0.7045733332633972, "logps/chosen": -0.5580167174339294, "logps/rejected": -3.146122932434082, "loss": 0.8414, "nll_loss": 0.6193755865097046, "rewards/accuracies": 1.0, "rewards/chosen": -0.055801670998334885, "rewards/margins": 0.2588106095790863, "rewards/rejected": -0.3146122694015503, "step": 5284 }, { "epoch": 3.224035382034467, "grad_norm": 1.2456670999526978, "learning_rate": 2.8511941212492344e-06, "log_odds_chosen": 1.21320378780365, "log_odds_ratio": -0.48666417598724365, "logits/chosen": -0.8081539273262024, "logits/rejected": -0.8660670518875122, "logps/chosen": -0.7150124311447144, "logps/rejected": -1.6716790199279785, "loss": 1.2267, "nll_loss": 1.1199090480804443, "rewards/accuracies": 0.75, "rewards/chosen": -0.0715012475848198, "rewards/margins": 0.09566664695739746, "rewards/rejected": -0.16716790199279785, "step": 5285 }, { "epoch": 3.224645417111484, "grad_norm": 1.89462149143219, "learning_rate": 2.8502143294549906e-06, "log_odds_chosen": 2.0867693424224854, "log_odds_ratio": -0.349509596824646, "logits/chosen": -1.0314966440200806, "logits/rejected": -1.143122911453247, "logps/chosen": -0.7990580201148987, "logps/rejected": -2.491217613220215, "loss": 0.9889, "nll_loss": 1.2290904521942139, "rewards/accuracies": 0.875, "rewards/chosen": -0.07990580052137375, "rewards/margins": 0.16921596229076385, "rewards/rejected": -0.249121755361557, "step": 5286 }, { "epoch": 3.2252554521885006, "grad_norm": 1.4989186525344849, "learning_rate": 2.849234537660747e-06, "log_odds_chosen": 3.0164756774902344, "log_odds_ratio": -0.34398481249809265, "logits/chosen": -0.6802572011947632, "logits/rejected": -0.7697704434394836, "logps/chosen": -0.756171703338623, "logps/rejected": -3.1518301963806152, "loss": 1.2357, "nll_loss": 1.1289324760437012, "rewards/accuracies": 0.75, "rewards/chosen": -0.07561717182397842, "rewards/margins": 0.23956584930419922, "rewards/rejected": -0.31518301367759705, "step": 5287 }, { "epoch": 3.225865487265518, "grad_norm": 4.089122295379639, "learning_rate": 2.8482547458665032e-06, "log_odds_chosen": 1.738452434539795, "log_odds_ratio": -0.3254935145378113, "logits/chosen": -0.9277039766311646, "logits/rejected": -0.7988137006759644, "logps/chosen": -0.846623420715332, "logps/rejected": -2.2857506275177, "loss": 1.0153, "nll_loss": 0.9719566106796265, "rewards/accuracies": 1.0, "rewards/chosen": -0.08466234803199768, "rewards/margins": 0.14391274750232697, "rewards/rejected": -0.22857508063316345, "step": 5288 }, { "epoch": 3.2264755223425348, "grad_norm": 1.3684183359146118, "learning_rate": 2.8472749540722594e-06, "log_odds_chosen": 0.3040347099304199, "log_odds_ratio": -0.5956916809082031, "logits/chosen": -1.0067105293273926, "logits/rejected": -0.9767612218856812, "logps/chosen": -1.0442020893096924, "logps/rejected": -1.278458595275879, "loss": 1.1449, "nll_loss": 1.2900065183639526, "rewards/accuracies": 0.75, "rewards/chosen": -0.10442020744085312, "rewards/margins": 0.023425646126270294, "rewards/rejected": -0.1278458535671234, "step": 5289 }, { "epoch": 3.2270855574195516, "grad_norm": 1.1697899103164673, "learning_rate": 2.846295162278016e-06, "log_odds_chosen": 2.488861083984375, "log_odds_ratio": -0.26908639073371887, "logits/chosen": -0.8185520768165588, "logits/rejected": -0.9130454063415527, "logps/chosen": -0.7160892486572266, "logps/rejected": -2.713624954223633, "loss": 0.9779, "nll_loss": 0.8678081035614014, "rewards/accuracies": 0.875, "rewards/chosen": -0.07160893082618713, "rewards/margins": 0.1997535526752472, "rewards/rejected": -0.2713624835014343, "step": 5290 }, { "epoch": 3.2276955924965685, "grad_norm": 1.1163315773010254, "learning_rate": 2.8453153704837724e-06, "log_odds_chosen": 1.2880797386169434, "log_odds_ratio": -0.4400192201137543, "logits/chosen": -0.832024097442627, "logits/rejected": -0.7995629906654358, "logps/chosen": -0.7638654708862305, "logps/rejected": -1.6209770441055298, "loss": 1.1379, "nll_loss": 1.0473967790603638, "rewards/accuracies": 0.75, "rewards/chosen": -0.07638655602931976, "rewards/margins": 0.08571115136146545, "rewards/rejected": -0.1620977222919464, "step": 5291 }, { "epoch": 3.2283056275735853, "grad_norm": 1.521972894668579, "learning_rate": 2.844335578689528e-06, "log_odds_chosen": 1.5694243907928467, "log_odds_ratio": -0.3381754159927368, "logits/chosen": -0.8194374442100525, "logits/rejected": -1.0345327854156494, "logps/chosen": -0.564324140548706, "logps/rejected": -1.5422308444976807, "loss": 1.0699, "nll_loss": 0.9063951969146729, "rewards/accuracies": 1.0, "rewards/chosen": -0.056432418525218964, "rewards/margins": 0.0977906584739685, "rewards/rejected": -0.15422308444976807, "step": 5292 }, { "epoch": 3.2289156626506026, "grad_norm": 1.408730149269104, "learning_rate": 2.8433557868952847e-06, "log_odds_chosen": 2.922163724899292, "log_odds_ratio": -0.3646637797355652, "logits/chosen": -0.9391013979911804, "logits/rejected": -1.073852777481079, "logps/chosen": -0.9177907705307007, "logps/rejected": -3.5571377277374268, "loss": 1.2498, "nll_loss": 1.2412476539611816, "rewards/accuracies": 0.875, "rewards/chosen": -0.09177907556295395, "rewards/margins": 0.2639347314834595, "rewards/rejected": -0.35571378469467163, "step": 5293 }, { "epoch": 3.2295256977276194, "grad_norm": 1.8934693336486816, "learning_rate": 2.8423759951010412e-06, "log_odds_chosen": 0.5931646227836609, "log_odds_ratio": -0.5729337930679321, "logits/chosen": -0.9113110303878784, "logits/rejected": -0.9374070167541504, "logps/chosen": -0.9921802282333374, "logps/rejected": -1.4955685138702393, "loss": 1.0947, "nll_loss": 1.219085931777954, "rewards/accuracies": 0.625, "rewards/chosen": -0.09921801835298538, "rewards/margins": 0.05033884197473526, "rewards/rejected": -0.14955686032772064, "step": 5294 }, { "epoch": 3.2301357328046363, "grad_norm": 1.21827232837677, "learning_rate": 2.841396203306797e-06, "log_odds_chosen": 2.2959580421447754, "log_odds_ratio": -0.2747516334056854, "logits/chosen": -0.9257878065109253, "logits/rejected": -0.917017936706543, "logps/chosen": -0.9311026930809021, "logps/rejected": -2.8350467681884766, "loss": 1.0747, "nll_loss": 1.075186848640442, "rewards/accuracies": 1.0, "rewards/chosen": -0.09311027824878693, "rewards/margins": 0.19039440155029297, "rewards/rejected": -0.2835046648979187, "step": 5295 }, { "epoch": 3.230745767881653, "grad_norm": 3.5771262645721436, "learning_rate": 2.8404164115125535e-06, "log_odds_chosen": 1.8839272260665894, "log_odds_ratio": -0.46414825320243835, "logits/chosen": -0.8881401419639587, "logits/rejected": -0.8951535820960999, "logps/chosen": -0.860715389251709, "logps/rejected": -2.3507981300354004, "loss": 1.1381, "nll_loss": 1.0365196466445923, "rewards/accuracies": 0.75, "rewards/chosen": -0.08607153594493866, "rewards/margins": 0.14900828897953033, "rewards/rejected": -0.235079824924469, "step": 5296 }, { "epoch": 3.23135580295867, "grad_norm": 3.8317739963531494, "learning_rate": 2.83943661971831e-06, "log_odds_chosen": 2.2545862197875977, "log_odds_ratio": -0.4141803979873657, "logits/chosen": -0.8393037915229797, "logits/rejected": -0.9510856866836548, "logps/chosen": -0.7408799529075623, "logps/rejected": -2.5026891231536865, "loss": 1.0257, "nll_loss": 1.0158891677856445, "rewards/accuracies": 0.625, "rewards/chosen": -0.07408799231052399, "rewards/margins": 0.1761809140443802, "rewards/rejected": -0.2502689063549042, "step": 5297 }, { "epoch": 3.231965838035687, "grad_norm": 2.3546645641326904, "learning_rate": 2.8384568279240657e-06, "log_odds_chosen": 2.494159698486328, "log_odds_ratio": -0.20001789927482605, "logits/chosen": -0.8421167731285095, "logits/rejected": -0.908860445022583, "logps/chosen": -0.5804139971733093, "logps/rejected": -2.197342872619629, "loss": 0.9764, "nll_loss": 1.0265026092529297, "rewards/accuracies": 1.0, "rewards/chosen": -0.05804140120744705, "rewards/margins": 0.16169288754463196, "rewards/rejected": -0.2197342813014984, "step": 5298 }, { "epoch": 3.232575873112704, "grad_norm": 3.4510903358459473, "learning_rate": 2.8374770361298223e-06, "log_odds_chosen": 1.8023213148117065, "log_odds_ratio": -0.4961071014404297, "logits/chosen": -0.8728665709495544, "logits/rejected": -1.0075688362121582, "logps/chosen": -0.9842805862426758, "logps/rejected": -2.4654223918914795, "loss": 0.9582, "nll_loss": 1.1425042152404785, "rewards/accuracies": 0.5, "rewards/chosen": -0.09842806309461594, "rewards/margins": 0.14811420440673828, "rewards/rejected": -0.24654226005077362, "step": 5299 }, { "epoch": 3.233185908189721, "grad_norm": 1.4740955829620361, "learning_rate": 2.8364972443355784e-06, "log_odds_chosen": 2.557213544845581, "log_odds_ratio": -0.25614747405052185, "logits/chosen": -0.8119806051254272, "logits/rejected": -0.9174424409866333, "logps/chosen": -0.5741275548934937, "logps/rejected": -2.4745585918426514, "loss": 0.9606, "nll_loss": 0.9240796566009521, "rewards/accuracies": 0.875, "rewards/chosen": -0.057412754744291306, "rewards/margins": 0.1900431215763092, "rewards/rejected": -0.2474558800458908, "step": 5300 }, { "epoch": 3.233795943266738, "grad_norm": 1.5472087860107422, "learning_rate": 2.835517452541335e-06, "log_odds_chosen": 2.943972110748291, "log_odds_ratio": -0.1992790549993515, "logits/chosen": -0.8044444918632507, "logits/rejected": -1.0057744979858398, "logps/chosen": -0.674952507019043, "logps/rejected": -2.8884100914001465, "loss": 1.0296, "nll_loss": 0.9103179574012756, "rewards/accuracies": 1.0, "rewards/chosen": -0.06749525666236877, "rewards/margins": 0.22134578227996826, "rewards/rejected": -0.28884103894233704, "step": 5301 }, { "epoch": 3.2344059783437547, "grad_norm": 17.11214256286621, "learning_rate": 2.834537660747091e-06, "log_odds_chosen": 2.109767436981201, "log_odds_ratio": -0.4073215425014496, "logits/chosen": -0.7656852602958679, "logits/rejected": -0.7940067052841187, "logps/chosen": -0.7318344116210938, "logps/rejected": -2.3475546836853027, "loss": 1.0598, "nll_loss": 0.9836649298667908, "rewards/accuracies": 0.75, "rewards/chosen": -0.07318343967199326, "rewards/margins": 0.16157202422618866, "rewards/rejected": -0.2347554862499237, "step": 5302 }, { "epoch": 3.2350160134207715, "grad_norm": 1.934442162513733, "learning_rate": 2.833557868952847e-06, "log_odds_chosen": 2.9450159072875977, "log_odds_ratio": -0.376467227935791, "logits/chosen": -0.9576958417892456, "logits/rejected": -0.9379009008407593, "logps/chosen": -0.9153679609298706, "logps/rejected": -3.282454490661621, "loss": 1.3544, "nll_loss": 1.8961408138275146, "rewards/accuracies": 0.75, "rewards/chosen": -0.09153679758310318, "rewards/margins": 0.2367086559534073, "rewards/rejected": -0.3282454311847687, "step": 5303 }, { "epoch": 3.235626048497789, "grad_norm": 1.610317349433899, "learning_rate": 2.8325780771586037e-06, "log_odds_chosen": 1.681792974472046, "log_odds_ratio": -0.3722953498363495, "logits/chosen": -0.8781378269195557, "logits/rejected": -0.9265446662902832, "logps/chosen": -0.7411586046218872, "logps/rejected": -1.98905348777771, "loss": 1.0145, "nll_loss": 0.8502840399742126, "rewards/accuracies": 0.75, "rewards/chosen": -0.07411586493253708, "rewards/margins": 0.12478949129581451, "rewards/rejected": -0.198905348777771, "step": 5304 }, { "epoch": 3.2362360835748056, "grad_norm": 7.778625011444092, "learning_rate": 2.8315982853643603e-06, "log_odds_chosen": 2.849801540374756, "log_odds_ratio": -0.17174378037452698, "logits/chosen": -0.7910802364349365, "logits/rejected": -0.8984032869338989, "logps/chosen": -0.7268410921096802, "logps/rejected": -2.842540979385376, "loss": 0.9757, "nll_loss": 0.9765936136245728, "rewards/accuracies": 0.875, "rewards/chosen": -0.07268410921096802, "rewards/margins": 0.21157002449035645, "rewards/rejected": -0.28425413370132446, "step": 5305 }, { "epoch": 3.2368461186518225, "grad_norm": 5.806548118591309, "learning_rate": 2.830618493570116e-06, "log_odds_chosen": 2.1259379386901855, "log_odds_ratio": -0.5846376419067383, "logits/chosen": -0.7947754263877869, "logits/rejected": -0.866086483001709, "logps/chosen": -0.8391254544258118, "logps/rejected": -2.3662149906158447, "loss": 1.1065, "nll_loss": 0.9768745303153992, "rewards/accuracies": 0.5, "rewards/chosen": -0.08391255140304565, "rewards/margins": 0.15270894765853882, "rewards/rejected": -0.23662149906158447, "step": 5306 }, { "epoch": 3.2374561537288393, "grad_norm": 18.855546951293945, "learning_rate": 2.8296387017758725e-06, "log_odds_chosen": 0.932157039642334, "log_odds_ratio": -0.46378839015960693, "logits/chosen": -0.7808893918991089, "logits/rejected": -0.8048419952392578, "logps/chosen": -0.7686307430267334, "logps/rejected": -1.447765588760376, "loss": 1.2772, "nll_loss": 1.0489583015441895, "rewards/accuracies": 0.625, "rewards/chosen": -0.07686307281255722, "rewards/margins": 0.06791350245475769, "rewards/rejected": -0.1447765827178955, "step": 5307 }, { "epoch": 3.238066188805856, "grad_norm": 4.131369590759277, "learning_rate": 2.828658909981629e-06, "log_odds_chosen": 1.7812881469726562, "log_odds_ratio": -0.4847058057785034, "logits/chosen": -0.8134013414382935, "logits/rejected": -0.8421605825424194, "logps/chosen": -1.0222197771072388, "logps/rejected": -2.4010941982269287, "loss": 1.2603, "nll_loss": 1.12165367603302, "rewards/accuracies": 0.5, "rewards/chosen": -0.10222197324037552, "rewards/margins": 0.13788744807243347, "rewards/rejected": -0.2401094138622284, "step": 5308 }, { "epoch": 3.2386762238828735, "grad_norm": 1.6573048830032349, "learning_rate": 2.8276791181873848e-06, "log_odds_chosen": 1.7006546258926392, "log_odds_ratio": -0.41585272550582886, "logits/chosen": -0.8406500220298767, "logits/rejected": -1.0072987079620361, "logps/chosen": -0.9274799823760986, "logps/rejected": -2.3848540782928467, "loss": 1.0283, "nll_loss": 0.9123321771621704, "rewards/accuracies": 0.875, "rewards/chosen": -0.0927480012178421, "rewards/margins": 0.1457373946905136, "rewards/rejected": -0.2384853959083557, "step": 5309 }, { "epoch": 3.2392862589598903, "grad_norm": 5.048223972320557, "learning_rate": 2.8266993263931413e-06, "log_odds_chosen": 3.5427894592285156, "log_odds_ratio": -0.2754366397857666, "logits/chosen": -0.836341381072998, "logits/rejected": -0.8319472670555115, "logps/chosen": -0.8165490627288818, "logps/rejected": -3.843332052230835, "loss": 1.0682, "nll_loss": 0.9485085010528564, "rewards/accuracies": 0.875, "rewards/chosen": -0.08165490627288818, "rewards/margins": 0.30267834663391113, "rewards/rejected": -0.3843332529067993, "step": 5310 }, { "epoch": 3.239896294036907, "grad_norm": 2.204075813293457, "learning_rate": 2.825719534598898e-06, "log_odds_chosen": 1.4519857168197632, "log_odds_ratio": -0.3815152049064636, "logits/chosen": -0.5500681400299072, "logits/rejected": -0.6157656908035278, "logps/chosen": -0.6971203684806824, "logps/rejected": -1.7681347131729126, "loss": 1.0228, "nll_loss": 0.8444242477416992, "rewards/accuracies": 0.75, "rewards/chosen": -0.06971204280853271, "rewards/margins": 0.1071014329791069, "rewards/rejected": -0.17681348323822021, "step": 5311 }, { "epoch": 3.240506329113924, "grad_norm": 2.040693998336792, "learning_rate": 2.824739742804654e-06, "log_odds_chosen": 2.54196834564209, "log_odds_ratio": -0.3114803433418274, "logits/chosen": -1.0061333179473877, "logits/rejected": -0.9986178874969482, "logps/chosen": -0.844251811504364, "logps/rejected": -2.9274842739105225, "loss": 1.1454, "nll_loss": 1.2346445322036743, "rewards/accuracies": 1.0, "rewards/chosen": -0.0844251811504364, "rewards/margins": 0.20832327008247375, "rewards/rejected": -0.29274842143058777, "step": 5312 }, { "epoch": 3.241116364190941, "grad_norm": 5.262094497680664, "learning_rate": 2.82375995101041e-06, "log_odds_chosen": 1.585147738456726, "log_odds_ratio": -0.4616933763027191, "logits/chosen": -0.8315984606742859, "logits/rejected": -0.8911837339401245, "logps/chosen": -0.6979182958602905, "logps/rejected": -1.9152328968048096, "loss": 0.8935, "nll_loss": 0.8171243667602539, "rewards/accuracies": 0.625, "rewards/chosen": -0.06979183107614517, "rewards/margins": 0.1217314600944519, "rewards/rejected": -0.19152331352233887, "step": 5313 }, { "epoch": 3.241726399267958, "grad_norm": 1.8080470561981201, "learning_rate": 2.8227801592161666e-06, "log_odds_chosen": 2.582761526107788, "log_odds_ratio": -0.23220670223236084, "logits/chosen": -0.7628998160362244, "logits/rejected": -0.8785710334777832, "logps/chosen": -0.5300241708755493, "logps/rejected": -2.3518080711364746, "loss": 0.9175, "nll_loss": 0.6865599155426025, "rewards/accuracies": 0.875, "rewards/chosen": -0.05300241708755493, "rewards/margins": 0.18217837810516357, "rewards/rejected": -0.2351807951927185, "step": 5314 }, { "epoch": 3.242336434344975, "grad_norm": 1.6153085231781006, "learning_rate": 2.8218003674219228e-06, "log_odds_chosen": 1.014992594718933, "log_odds_ratio": -0.48405683040618896, "logits/chosen": -0.8186734914779663, "logits/rejected": -0.9261109828948975, "logps/chosen": -0.7954727411270142, "logps/rejected": -1.4156802892684937, "loss": 1.0139, "nll_loss": 0.890308141708374, "rewards/accuracies": 0.875, "rewards/chosen": -0.07954727858304977, "rewards/margins": 0.06202076002955437, "rewards/rejected": -0.14156803488731384, "step": 5315 }, { "epoch": 3.242946469421992, "grad_norm": 1.134783387184143, "learning_rate": 2.820820575627679e-06, "log_odds_chosen": 3.431922197341919, "log_odds_ratio": -0.20373857021331787, "logits/chosen": -0.6719055771827698, "logits/rejected": -0.8537219762802124, "logps/chosen": -0.673430860042572, "logps/rejected": -3.396047592163086, "loss": 0.9409, "nll_loss": 0.9060925245285034, "rewards/accuracies": 0.875, "rewards/chosen": -0.0673430860042572, "rewards/margins": 0.27226167917251587, "rewards/rejected": -0.3396047353744507, "step": 5316 }, { "epoch": 3.2435565044990087, "grad_norm": 9.0626802444458, "learning_rate": 2.8198407838334354e-06, "log_odds_chosen": 0.4809810519218445, "log_odds_ratio": -0.7413861155509949, "logits/chosen": -1.0134575366973877, "logits/rejected": -1.0650180578231812, "logps/chosen": -0.9517126083374023, "logps/rejected": -1.344785213470459, "loss": 1.1378, "nll_loss": 1.239569902420044, "rewards/accuracies": 0.5, "rewards/chosen": -0.095171257853508, "rewards/margins": 0.03930725157260895, "rewards/rejected": -0.13447850942611694, "step": 5317 }, { "epoch": 3.2441665395760255, "grad_norm": 4.285978317260742, "learning_rate": 2.8188609920391916e-06, "log_odds_chosen": 0.9323340058326721, "log_odds_ratio": -0.5477355718612671, "logits/chosen": -0.9444729685783386, "logits/rejected": -1.0034229755401611, "logps/chosen": -0.9301179051399231, "logps/rejected": -1.759641170501709, "loss": 1.0472, "nll_loss": 1.1211639642715454, "rewards/accuracies": 0.5, "rewards/chosen": -0.09301178902387619, "rewards/margins": 0.0829523354768753, "rewards/rejected": -0.1759641170501709, "step": 5318 }, { "epoch": 3.2447765746530424, "grad_norm": 3.637897253036499, "learning_rate": 2.817881200244948e-06, "log_odds_chosen": 1.3919328451156616, "log_odds_ratio": -0.5789701342582703, "logits/chosen": -0.9776443839073181, "logits/rejected": -0.9093843102455139, "logps/chosen": -1.1422104835510254, "logps/rejected": -2.242109775543213, "loss": 1.1373, "nll_loss": 1.410766363143921, "rewards/accuracies": 0.625, "rewards/chosen": -0.11422105133533478, "rewards/margins": 0.10998991131782532, "rewards/rejected": -0.2242109775543213, "step": 5319 }, { "epoch": 3.2453866097300597, "grad_norm": 4.916977882385254, "learning_rate": 2.816901408450704e-06, "log_odds_chosen": 2.0259640216827393, "log_odds_ratio": -0.35915225744247437, "logits/chosen": -0.7261342406272888, "logits/rejected": -0.867480993270874, "logps/chosen": -0.6302580833435059, "logps/rejected": -2.136624336242676, "loss": 0.9221, "nll_loss": 0.722263753414154, "rewards/accuracies": 0.75, "rewards/chosen": -0.0630258098244667, "rewards/margins": 0.15063661336898804, "rewards/rejected": -0.21366241574287415, "step": 5320 }, { "epoch": 3.2459966448070765, "grad_norm": 11.612189292907715, "learning_rate": 2.8159216166564603e-06, "log_odds_chosen": 2.3318848609924316, "log_odds_ratio": -0.24952340126037598, "logits/chosen": -0.6009300351142883, "logits/rejected": -0.4736185073852539, "logps/chosen": -0.6453317403793335, "logps/rejected": -2.4662556648254395, "loss": 0.9788, "nll_loss": 0.82608962059021, "rewards/accuracies": 0.875, "rewards/chosen": -0.06453317403793335, "rewards/margins": 0.18209238350391388, "rewards/rejected": -0.24662555754184723, "step": 5321 }, { "epoch": 3.2466066798840933, "grad_norm": 1.2424739599227905, "learning_rate": 2.814941824862217e-06, "log_odds_chosen": 2.7762560844421387, "log_odds_ratio": -0.3556663393974304, "logits/chosen": -0.9358558654785156, "logits/rejected": -1.0764729976654053, "logps/chosen": -0.8124984502792358, "logps/rejected": -3.1618614196777344, "loss": 1.1425, "nll_loss": 1.2539300918579102, "rewards/accuracies": 0.75, "rewards/chosen": -0.08124984055757523, "rewards/margins": 0.23493632674217224, "rewards/rejected": -0.3161861300468445, "step": 5322 }, { "epoch": 3.24721671496111, "grad_norm": 1.3381427526474, "learning_rate": 2.8139620330679726e-06, "log_odds_chosen": 0.23381800949573517, "log_odds_ratio": -0.6262885332107544, "logits/chosen": -0.8841953277587891, "logits/rejected": -0.8182538747787476, "logps/chosen": -1.1962089538574219, "logps/rejected": -1.3702025413513184, "loss": 1.1397, "nll_loss": 1.2848914861679077, "rewards/accuracies": 0.625, "rewards/chosen": -0.11962088942527771, "rewards/margins": 0.017399372532963753, "rewards/rejected": -0.1370202600955963, "step": 5323 }, { "epoch": 3.247826750038127, "grad_norm": 1.7929441928863525, "learning_rate": 2.812982241273729e-06, "log_odds_chosen": 3.7341763973236084, "log_odds_ratio": -0.14001469314098358, "logits/chosen": -0.6068781614303589, "logits/rejected": -0.9094390869140625, "logps/chosen": -0.49076566100120544, "logps/rejected": -3.0501532554626465, "loss": 1.0092, "nll_loss": 0.8151305913925171, "rewards/accuracies": 1.0, "rewards/chosen": -0.049076564610004425, "rewards/margins": 0.2559387683868408, "rewards/rejected": -0.30501532554626465, "step": 5324 }, { "epoch": 3.2484367851151443, "grad_norm": 2.1481714248657227, "learning_rate": 2.8120024494794857e-06, "log_odds_chosen": 0.7334581613540649, "log_odds_ratio": -0.5143907070159912, "logits/chosen": -0.8265061974525452, "logits/rejected": -0.7511695623397827, "logps/chosen": -0.8392223119735718, "logps/rejected": -1.3599474430084229, "loss": 1.0534, "nll_loss": 1.0378702878952026, "rewards/accuracies": 0.75, "rewards/chosen": -0.08392222970724106, "rewards/margins": 0.052072517573833466, "rewards/rejected": -0.13599474728107452, "step": 5325 }, { "epoch": 3.249046820192161, "grad_norm": 1.336599588394165, "learning_rate": 2.811022657685242e-06, "log_odds_chosen": 1.0585252046585083, "log_odds_ratio": -0.4392661154270172, "logits/chosen": -0.7602550387382507, "logits/rejected": -0.7502375841140747, "logps/chosen": -1.0497673749923706, "logps/rejected": -1.9268670082092285, "loss": 1.2274, "nll_loss": 1.1436516046524048, "rewards/accuracies": 0.875, "rewards/chosen": -0.10497672855854034, "rewards/margins": 0.08770997077226639, "rewards/rejected": -0.19268670678138733, "step": 5326 }, { "epoch": 3.249656855269178, "grad_norm": 2.6736292839050293, "learning_rate": 2.810042865890998e-06, "log_odds_chosen": 3.1302390098571777, "log_odds_ratio": -0.32307446002960205, "logits/chosen": -0.7023378610610962, "logits/rejected": -0.7998052835464478, "logps/chosen": -0.5955849885940552, "logps/rejected": -3.0311570167541504, "loss": 0.9669, "nll_loss": 0.7603069543838501, "rewards/accuracies": 0.875, "rewards/chosen": -0.05955849215388298, "rewards/margins": 0.24355721473693848, "rewards/rejected": -0.30311572551727295, "step": 5327 }, { "epoch": 3.250266890346195, "grad_norm": 1.969546914100647, "learning_rate": 2.8090630740967545e-06, "log_odds_chosen": 1.5422312021255493, "log_odds_ratio": -0.4724350571632385, "logits/chosen": -0.839667558670044, "logits/rejected": -0.84931480884552, "logps/chosen": -0.7562123537063599, "logps/rejected": -1.7747776508331299, "loss": 0.9927, "nll_loss": 0.8857874870300293, "rewards/accuracies": 0.75, "rewards/chosen": -0.07562123239040375, "rewards/margins": 0.1018565446138382, "rewards/rejected": -0.17747777700424194, "step": 5328 }, { "epoch": 3.2508769254232117, "grad_norm": 1.269417643547058, "learning_rate": 2.8080832823025106e-06, "log_odds_chosen": 0.7121718525886536, "log_odds_ratio": -0.7156690955162048, "logits/chosen": -0.7841957211494446, "logits/rejected": -0.7905450463294983, "logps/chosen": -0.7718432545661926, "logps/rejected": -1.2992684841156006, "loss": 1.1137, "nll_loss": 1.1280860900878906, "rewards/accuracies": 0.5, "rewards/chosen": -0.07718431949615479, "rewards/margins": 0.052742525935173035, "rewards/rejected": -0.12992684543132782, "step": 5329 }, { "epoch": 3.2514869605002286, "grad_norm": 2.2780392169952393, "learning_rate": 2.8071034905082667e-06, "log_odds_chosen": 1.2304649353027344, "log_odds_ratio": -0.32616323232650757, "logits/chosen": -0.7769527435302734, "logits/rejected": -0.9450758695602417, "logps/chosen": -0.6718422174453735, "logps/rejected": -1.492053747177124, "loss": 1.0944, "nll_loss": 1.0010263919830322, "rewards/accuracies": 1.0, "rewards/chosen": -0.06718422472476959, "rewards/margins": 0.08202114701271057, "rewards/rejected": -0.14920537173748016, "step": 5330 }, { "epoch": 3.252096995577246, "grad_norm": 1.5011959075927734, "learning_rate": 2.8061236987140233e-06, "log_odds_chosen": 0.3914565443992615, "log_odds_ratio": -0.5747191905975342, "logits/chosen": -0.9890692234039307, "logits/rejected": -0.7416624426841736, "logps/chosen": -0.9275494813919067, "logps/rejected": -1.1688921451568604, "loss": 1.122, "nll_loss": 1.0997989177703857, "rewards/accuracies": 0.75, "rewards/chosen": -0.09275494515895844, "rewards/margins": 0.024134261533617973, "rewards/rejected": -0.11688920855522156, "step": 5331 }, { "epoch": 3.2527070306542627, "grad_norm": 1.41228449344635, "learning_rate": 2.8051439069197794e-06, "log_odds_chosen": 0.7228356599807739, "log_odds_ratio": -0.674803614616394, "logits/chosen": -0.9410646557807922, "logits/rejected": -1.0613524913787842, "logps/chosen": -0.950340211391449, "logps/rejected": -1.5582023859024048, "loss": 1.0424, "nll_loss": 1.0881919860839844, "rewards/accuracies": 0.375, "rewards/chosen": -0.09503402560949326, "rewards/margins": 0.06078620254993439, "rewards/rejected": -0.15582023561000824, "step": 5332 }, { "epoch": 3.2533170657312795, "grad_norm": 4.4488725662231445, "learning_rate": 2.804164115125536e-06, "log_odds_chosen": 0.9163354635238647, "log_odds_ratio": -0.6025019884109497, "logits/chosen": -0.8360943794250488, "logits/rejected": -0.8978934288024902, "logps/chosen": -0.8079644441604614, "logps/rejected": -1.5379276275634766, "loss": 1.0804, "nll_loss": 0.9151771068572998, "rewards/accuracies": 0.625, "rewards/chosen": -0.08079645037651062, "rewards/margins": 0.07299631834030151, "rewards/rejected": -0.15379276871681213, "step": 5333 }, { "epoch": 3.2539271008082964, "grad_norm": 1.3459161520004272, "learning_rate": 2.803184323331292e-06, "log_odds_chosen": 3.781834602355957, "log_odds_ratio": -0.11969975382089615, "logits/chosen": -0.5112481713294983, "logits/rejected": -0.7635785937309265, "logps/chosen": -0.5695881843566895, "logps/rejected": -3.5849030017852783, "loss": 0.9927, "nll_loss": 0.6917669773101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.05695881322026253, "rewards/margins": 0.30153152346611023, "rewards/rejected": -0.35849034786224365, "step": 5334 }, { "epoch": 3.2545371358853132, "grad_norm": 1.2225252389907837, "learning_rate": 2.802204531537048e-06, "log_odds_chosen": 2.422168493270874, "log_odds_ratio": -0.48411235213279724, "logits/chosen": -0.782656729221344, "logits/rejected": -0.7998809814453125, "logps/chosen": -0.8056914806365967, "logps/rejected": -2.7723708152770996, "loss": 1.0132, "nll_loss": 1.074210524559021, "rewards/accuracies": 0.5, "rewards/chosen": -0.08056914806365967, "rewards/margins": 0.19666790962219238, "rewards/rejected": -0.27723705768585205, "step": 5335 }, { "epoch": 3.2551471709623305, "grad_norm": 1.1562297344207764, "learning_rate": 2.8012247397428047e-06, "log_odds_chosen": 3.1564455032348633, "log_odds_ratio": -0.2993450164794922, "logits/chosen": -0.8890203237533569, "logits/rejected": -1.0878986120224, "logps/chosen": -0.8477940559387207, "logps/rejected": -3.452949047088623, "loss": 0.9916, "nll_loss": 1.0305665731430054, "rewards/accuracies": 0.75, "rewards/chosen": -0.08477941155433655, "rewards/margins": 0.2605155110359192, "rewards/rejected": -0.34529492259025574, "step": 5336 }, { "epoch": 3.2557572060393474, "grad_norm": 1.9751818180084229, "learning_rate": 2.8002449479485604e-06, "log_odds_chosen": 2.535116672515869, "log_odds_ratio": -0.30340349674224854, "logits/chosen": -0.7892428636550903, "logits/rejected": -0.8721539378166199, "logps/chosen": -0.7072921395301819, "logps/rejected": -2.597698926925659, "loss": 1.0851, "nll_loss": 0.9161875247955322, "rewards/accuracies": 0.875, "rewards/chosen": -0.07072921842336655, "rewards/margins": 0.18904069066047668, "rewards/rejected": -0.25976991653442383, "step": 5337 }, { "epoch": 3.256367241116364, "grad_norm": 5.075812816619873, "learning_rate": 2.799265156154317e-06, "log_odds_chosen": 1.5472337007522583, "log_odds_ratio": -0.26448994874954224, "logits/chosen": -0.9528371691703796, "logits/rejected": -0.9083882570266724, "logps/chosen": -0.7757745981216431, "logps/rejected": -1.9169926643371582, "loss": 0.9461, "nll_loss": 0.8696854114532471, "rewards/accuracies": 1.0, "rewards/chosen": -0.07757746428251266, "rewards/margins": 0.11412180960178375, "rewards/rejected": -0.19169926643371582, "step": 5338 }, { "epoch": 3.256977276193381, "grad_norm": 1.9221524000167847, "learning_rate": 2.7982853643600735e-06, "log_odds_chosen": 1.3047088384628296, "log_odds_ratio": -0.7144249677658081, "logits/chosen": -0.9315266609191895, "logits/rejected": -1.0148159265518188, "logps/chosen": -1.1716631650924683, "logps/rejected": -2.326190233230591, "loss": 1.1756, "nll_loss": 1.3635823726654053, "rewards/accuracies": 0.5, "rewards/chosen": -0.11716631054878235, "rewards/margins": 0.11545271426439285, "rewards/rejected": -0.2326190173625946, "step": 5339 }, { "epoch": 3.257587311270398, "grad_norm": 1.750005841255188, "learning_rate": 2.7973055725658296e-06, "log_odds_chosen": 3.288689613342285, "log_odds_ratio": -0.16291043162345886, "logits/chosen": -0.7672866582870483, "logits/rejected": -0.9668040871620178, "logps/chosen": -0.5969848036766052, "logps/rejected": -3.1865615844726562, "loss": 0.9467, "nll_loss": 1.0103744268417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.05969848483800888, "rewards/margins": 0.25895771384239197, "rewards/rejected": -0.31865617632865906, "step": 5340 }, { "epoch": 3.2581973463474148, "grad_norm": 1.3018887042999268, "learning_rate": 2.7963257807715858e-06, "log_odds_chosen": 0.9987658262252808, "log_odds_ratio": -0.6435685157775879, "logits/chosen": -0.6649775505065918, "logits/rejected": -0.8639492988586426, "logps/chosen": -0.7605584859848022, "logps/rejected": -1.3703677654266357, "loss": 1.0879, "nll_loss": 0.9798009395599365, "rewards/accuracies": 0.5, "rewards/chosen": -0.0760558471083641, "rewards/margins": 0.06098093092441559, "rewards/rejected": -0.1370367705821991, "step": 5341 }, { "epoch": 3.258807381424432, "grad_norm": 4.143359661102295, "learning_rate": 2.7953459889773423e-06, "log_odds_chosen": 1.8317430019378662, "log_odds_ratio": -0.5695464611053467, "logits/chosen": -0.7559982538223267, "logits/rejected": -0.9120362401008606, "logps/chosen": -0.7028396129608154, "logps/rejected": -2.067755699157715, "loss": 1.0455, "nll_loss": 0.9019556045532227, "rewards/accuracies": 0.875, "rewards/chosen": -0.07028395682573318, "rewards/margins": 0.136491596698761, "rewards/rejected": -0.20677556097507477, "step": 5342 }, { "epoch": 3.259417416501449, "grad_norm": 1.0699490308761597, "learning_rate": 2.7943661971830984e-06, "log_odds_chosen": 2.4310574531555176, "log_odds_ratio": -0.4363940954208374, "logits/chosen": -0.8187634944915771, "logits/rejected": -0.9800096750259399, "logps/chosen": -0.6821726560592651, "logps/rejected": -2.6896138191223145, "loss": 0.9227, "nll_loss": 1.0082125663757324, "rewards/accuracies": 0.75, "rewards/chosen": -0.06821726262569427, "rewards/margins": 0.2007441371679306, "rewards/rejected": -0.2689613997936249, "step": 5343 }, { "epoch": 3.2600274515784657, "grad_norm": 1.577664852142334, "learning_rate": 2.7933864053888545e-06, "log_odds_chosen": 0.7090001106262207, "log_odds_ratio": -0.4799021780490875, "logits/chosen": -0.908435583114624, "logits/rejected": -0.9930640459060669, "logps/chosen": -0.9006075859069824, "logps/rejected": -1.3607583045959473, "loss": 1.195, "nll_loss": 1.3501739501953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.090060755610466, "rewards/margins": 0.04601508751511574, "rewards/rejected": -0.13607585430145264, "step": 5344 }, { "epoch": 3.2606374866554826, "grad_norm": 11.167492866516113, "learning_rate": 2.792406613594611e-06, "log_odds_chosen": 1.3347402811050415, "log_odds_ratio": -0.4383835196495056, "logits/chosen": -0.982938826084137, "logits/rejected": -0.9817333221435547, "logps/chosen": -0.8140391707420349, "logps/rejected": -1.902724266052246, "loss": 1.0923, "nll_loss": 0.9550771713256836, "rewards/accuracies": 0.875, "rewards/chosen": -0.08140391856431961, "rewards/margins": 0.10886852443218231, "rewards/rejected": -0.19027243554592133, "step": 5345 }, { "epoch": 3.2612475217324994, "grad_norm": 1.6996405124664307, "learning_rate": 2.7914268218003672e-06, "log_odds_chosen": 0.6836504936218262, "log_odds_ratio": -0.6224755048751831, "logits/chosen": -0.952404260635376, "logits/rejected": -0.8951119184494019, "logps/chosen": -0.8210586905479431, "logps/rejected": -1.3117319345474243, "loss": 0.9994, "nll_loss": 0.9265857934951782, "rewards/accuracies": 0.75, "rewards/chosen": -0.08210587501525879, "rewards/margins": 0.049067310988903046, "rewards/rejected": -0.13117317855358124, "step": 5346 }, { "epoch": 3.2618575568095167, "grad_norm": 6.4075703620910645, "learning_rate": 2.7904470300061238e-06, "log_odds_chosen": 1.8668951988220215, "log_odds_ratio": -0.5458729267120361, "logits/chosen": -0.7355472445487976, "logits/rejected": -0.9302995204925537, "logps/chosen": -0.6050325036048889, "logps/rejected": -2.0081965923309326, "loss": 0.9331, "nll_loss": 0.7550772428512573, "rewards/accuracies": 0.625, "rewards/chosen": -0.06050325185060501, "rewards/margins": 0.1403164118528366, "rewards/rejected": -0.20081965625286102, "step": 5347 }, { "epoch": 3.2624675918865336, "grad_norm": 1.472883939743042, "learning_rate": 2.78946723821188e-06, "log_odds_chosen": 1.3517074584960938, "log_odds_ratio": -0.4993647336959839, "logits/chosen": -0.7689411640167236, "logits/rejected": -0.8344646096229553, "logps/chosen": -0.7031881809234619, "logps/rejected": -1.737833023071289, "loss": 0.8259, "nll_loss": 0.9627457857131958, "rewards/accuracies": 0.625, "rewards/chosen": -0.07031881809234619, "rewards/margins": 0.10346449166536331, "rewards/rejected": -0.1737833023071289, "step": 5348 }, { "epoch": 3.2630776269635504, "grad_norm": 3.899413585662842, "learning_rate": 2.788487446417636e-06, "log_odds_chosen": 1.1700637340545654, "log_odds_ratio": -0.5594789981842041, "logits/chosen": -0.707958459854126, "logits/rejected": -0.8403403759002686, "logps/chosen": -0.8211629390716553, "logps/rejected": -1.6869884729385376, "loss": 0.9008, "nll_loss": 0.9316526055335999, "rewards/accuracies": 0.625, "rewards/chosen": -0.08211629092693329, "rewards/margins": 0.08658255636692047, "rewards/rejected": -0.16869884729385376, "step": 5349 }, { "epoch": 3.2636876620405673, "grad_norm": 1.480776309967041, "learning_rate": 2.7875076546233925e-06, "log_odds_chosen": 1.111031174659729, "log_odds_ratio": -0.4284476339817047, "logits/chosen": -1.0544899702072144, "logits/rejected": -1.0489850044250488, "logps/chosen": -0.9118690490722656, "logps/rejected": -1.7413043975830078, "loss": 1.2304, "nll_loss": 1.109977126121521, "rewards/accuracies": 0.75, "rewards/chosen": -0.09118690341711044, "rewards/margins": 0.08294352889060974, "rewards/rejected": -0.1741304248571396, "step": 5350 }, { "epoch": 3.264297697117584, "grad_norm": 2.6971147060394287, "learning_rate": 2.7865278628291487e-06, "log_odds_chosen": 0.8923896551132202, "log_odds_ratio": -0.5329187512397766, "logits/chosen": -0.9927841424942017, "logits/rejected": -1.003695011138916, "logps/chosen": -0.8319364786148071, "logps/rejected": -1.4670993089675903, "loss": 1.2803, "nll_loss": 1.220892310142517, "rewards/accuracies": 0.875, "rewards/chosen": -0.08319365233182907, "rewards/margins": 0.0635162740945816, "rewards/rejected": -0.14670991897583008, "step": 5351 }, { "epoch": 3.264907732194601, "grad_norm": 1.5614290237426758, "learning_rate": 2.785548071034905e-06, "log_odds_chosen": 2.3879683017730713, "log_odds_ratio": -0.33502089977264404, "logits/chosen": -0.9063187837600708, "logits/rejected": -0.9832438230514526, "logps/chosen": -0.8831814527511597, "logps/rejected": -2.8374197483062744, "loss": 1.1977, "nll_loss": 1.0549523830413818, "rewards/accuracies": 0.875, "rewards/chosen": -0.08831814676523209, "rewards/margins": 0.19542385637760162, "rewards/rejected": -0.2837419807910919, "step": 5352 }, { "epoch": 3.2655177672716182, "grad_norm": 2.35662579536438, "learning_rate": 2.7845682792406613e-06, "log_odds_chosen": 0.8426693081855774, "log_odds_ratio": -0.5846157073974609, "logits/chosen": -0.895354688167572, "logits/rejected": -0.9451843500137329, "logps/chosen": -1.1472052335739136, "logps/rejected": -1.84768545627594, "loss": 1.0525, "nll_loss": 1.1069997549057007, "rewards/accuracies": 0.75, "rewards/chosen": -0.11472052335739136, "rewards/margins": 0.0700480192899704, "rewards/rejected": -0.18476855754852295, "step": 5353 }, { "epoch": 3.266127802348635, "grad_norm": 1.41842520236969, "learning_rate": 2.783588487446418e-06, "log_odds_chosen": 1.752676010131836, "log_odds_ratio": -0.42249152064323425, "logits/chosen": -0.6525842547416687, "logits/rejected": -0.7867809534072876, "logps/chosen": -0.600042462348938, "logps/rejected": -1.85305917263031, "loss": 0.968, "nll_loss": 1.0360044240951538, "rewards/accuracies": 0.75, "rewards/chosen": -0.06000424921512604, "rewards/margins": 0.12530165910720825, "rewards/rejected": -0.1853059083223343, "step": 5354 }, { "epoch": 3.266737837425652, "grad_norm": 1.3066221475601196, "learning_rate": 2.7826086956521736e-06, "log_odds_chosen": 0.9577739238739014, "log_odds_ratio": -0.5319620370864868, "logits/chosen": -0.8703485131263733, "logits/rejected": -0.8992832899093628, "logps/chosen": -0.8876402974128723, "logps/rejected": -1.5260214805603027, "loss": 0.9688, "nll_loss": 0.8910182118415833, "rewards/accuracies": 0.625, "rewards/chosen": -0.08876403421163559, "rewards/margins": 0.06383809447288513, "rewards/rejected": -0.15260212123394012, "step": 5355 }, { "epoch": 3.267347872502669, "grad_norm": 6.72017765045166, "learning_rate": 2.78162890385793e-06, "log_odds_chosen": 0.9687013626098633, "log_odds_ratio": -0.6488915085792542, "logits/chosen": -0.9456973075866699, "logits/rejected": -0.9347814321517944, "logps/chosen": -0.8332106471061707, "logps/rejected": -1.5951471328735352, "loss": 1.1065, "nll_loss": 0.9832184314727783, "rewards/accuracies": 0.375, "rewards/chosen": -0.08332106471061707, "rewards/margins": 0.07619364559650421, "rewards/rejected": -0.15951471030712128, "step": 5356 }, { "epoch": 3.267957907579686, "grad_norm": 1.0896481275558472, "learning_rate": 2.7806491120636863e-06, "log_odds_chosen": 2.531864881515503, "log_odds_ratio": -0.26572301983833313, "logits/chosen": -0.8734234571456909, "logits/rejected": -0.884610652923584, "logps/chosen": -0.6881752610206604, "logps/rejected": -2.6477770805358887, "loss": 0.9444, "nll_loss": 0.9523307085037231, "rewards/accuracies": 0.875, "rewards/chosen": -0.06881752610206604, "rewards/margins": 0.19596019387245178, "rewards/rejected": -0.2647777199745178, "step": 5357 }, { "epoch": 3.268567942656703, "grad_norm": 1.2351436614990234, "learning_rate": 2.7796693202694424e-06, "log_odds_chosen": 0.9851536750793457, "log_odds_ratio": -0.5247161388397217, "logits/chosen": -0.8589463829994202, "logits/rejected": -0.9542745351791382, "logps/chosen": -0.9694035053253174, "logps/rejected": -1.5998029708862305, "loss": 1.3243, "nll_loss": 1.169438362121582, "rewards/accuracies": 0.625, "rewards/chosen": -0.09694035351276398, "rewards/margins": 0.06303994357585907, "rewards/rejected": -0.15998029708862305, "step": 5358 }, { "epoch": 3.2691779777337198, "grad_norm": 2.2189369201660156, "learning_rate": 2.778689528475199e-06, "log_odds_chosen": 2.0053374767303467, "log_odds_ratio": -0.47946596145629883, "logits/chosen": -1.0709747076034546, "logits/rejected": -1.0496082305908203, "logps/chosen": -0.9436163306236267, "logps/rejected": -2.6620066165924072, "loss": 1.1814, "nll_loss": 1.0763065814971924, "rewards/accuracies": 0.625, "rewards/chosen": -0.09436164051294327, "rewards/margins": 0.17183902859687805, "rewards/rejected": -0.2662006616592407, "step": 5359 }, { "epoch": 3.2697880128107366, "grad_norm": 4.31366491317749, "learning_rate": 2.777709736680955e-06, "log_odds_chosen": 2.4124655723571777, "log_odds_ratio": -0.25833654403686523, "logits/chosen": -0.49605029821395874, "logits/rejected": -0.7650982737541199, "logps/chosen": -0.5126473307609558, "logps/rejected": -2.1646134853363037, "loss": 0.9543, "nll_loss": 0.7319859862327576, "rewards/accuracies": 0.875, "rewards/chosen": -0.05126474052667618, "rewards/margins": 0.16519662737846375, "rewards/rejected": -0.21646136045455933, "step": 5360 }, { "epoch": 3.2703980478877535, "grad_norm": 2.0802974700927734, "learning_rate": 2.7767299448867116e-06, "log_odds_chosen": 3.8946783542633057, "log_odds_ratio": -0.21204634010791779, "logits/chosen": -0.8233678936958313, "logits/rejected": -1.0210323333740234, "logps/chosen": -0.5121549963951111, "logps/rejected": -3.654942512512207, "loss": 1.0947, "nll_loss": 1.072761058807373, "rewards/accuracies": 0.875, "rewards/chosen": -0.05121549963951111, "rewards/margins": 0.314278781414032, "rewards/rejected": -0.3654942512512207, "step": 5361 }, { "epoch": 3.2710080829647703, "grad_norm": 2.128049373626709, "learning_rate": 2.7757501530924677e-06, "log_odds_chosen": 1.0762003660202026, "log_odds_ratio": -0.539750874042511, "logits/chosen": -0.8933512568473816, "logits/rejected": -0.9283502101898193, "logps/chosen": -0.8623824715614319, "logps/rejected": -1.6471096277236938, "loss": 1.1664, "nll_loss": 1.1138869524002075, "rewards/accuracies": 0.625, "rewards/chosen": -0.08623825013637543, "rewards/margins": 0.07847271859645844, "rewards/rejected": -0.16471095383167267, "step": 5362 }, { "epoch": 3.2716181180417876, "grad_norm": 1.3435779809951782, "learning_rate": 2.774770361298224e-06, "log_odds_chosen": 2.113370656967163, "log_odds_ratio": -0.4680776000022888, "logits/chosen": -0.8450828790664673, "logits/rejected": -0.9995782375335693, "logps/chosen": -0.8658205270767212, "logps/rejected": -2.665719509124756, "loss": 1.1149, "nll_loss": 1.05099618434906, "rewards/accuracies": 0.75, "rewards/chosen": -0.08658206462860107, "rewards/margins": 0.17998990416526794, "rewards/rejected": -0.26657193899154663, "step": 5363 }, { "epoch": 3.2722281531188044, "grad_norm": 1.146378517150879, "learning_rate": 2.7737905695039804e-06, "log_odds_chosen": 2.971794366836548, "log_odds_ratio": -0.36861494183540344, "logits/chosen": -0.8830721974372864, "logits/rejected": -0.910677433013916, "logps/chosen": -0.6167485117912292, "logps/rejected": -3.101710557937622, "loss": 0.9264, "nll_loss": 0.7960633039474487, "rewards/accuracies": 0.75, "rewards/chosen": -0.061674851924180984, "rewards/margins": 0.24849621951580048, "rewards/rejected": -0.31017106771469116, "step": 5364 }, { "epoch": 3.2728381881958213, "grad_norm": 1.1939905881881714, "learning_rate": 2.7728107777097365e-06, "log_odds_chosen": 2.3025190830230713, "log_odds_ratio": -0.4333244562149048, "logits/chosen": -0.7812435626983643, "logits/rejected": -0.9845229387283325, "logps/chosen": -0.9028137922286987, "logps/rejected": -2.6428141593933105, "loss": 1.1534, "nll_loss": 1.116891622543335, "rewards/accuracies": 0.875, "rewards/chosen": -0.09028137475252151, "rewards/margins": 0.17400002479553223, "rewards/rejected": -0.26428139209747314, "step": 5365 }, { "epoch": 3.273448223272838, "grad_norm": 1.3223775625228882, "learning_rate": 2.7718309859154926e-06, "log_odds_chosen": 2.294060707092285, "log_odds_ratio": -0.3838854134082794, "logits/chosen": -0.6460787057876587, "logits/rejected": -0.7703782916069031, "logps/chosen": -0.5945838093757629, "logps/rejected": -2.2710225582122803, "loss": 0.9517, "nll_loss": 0.844446063041687, "rewards/accuracies": 0.75, "rewards/chosen": -0.059458378702402115, "rewards/margins": 0.16764388978481293, "rewards/rejected": -0.22710226476192474, "step": 5366 }, { "epoch": 3.274058258349855, "grad_norm": 5.207531452178955, "learning_rate": 2.770851194121249e-06, "log_odds_chosen": 3.075364828109741, "log_odds_ratio": -0.38265281915664673, "logits/chosen": -0.7173244953155518, "logits/rejected": -0.9006651639938354, "logps/chosen": -0.6632040739059448, "logps/rejected": -3.2461113929748535, "loss": 0.8859, "nll_loss": 0.8916524052619934, "rewards/accuracies": 0.875, "rewards/chosen": -0.06632040441036224, "rewards/margins": 0.25829076766967773, "rewards/rejected": -0.32461118698120117, "step": 5367 }, { "epoch": 3.2746682934268723, "grad_norm": 2.572885274887085, "learning_rate": 2.7698714023270057e-06, "log_odds_chosen": 3.37760853767395, "log_odds_ratio": -0.27261266112327576, "logits/chosen": -0.8037218451499939, "logits/rejected": -1.001466155052185, "logps/chosen": -0.7178636193275452, "logps/rejected": -3.4753081798553467, "loss": 0.9821, "nll_loss": 0.8359450697898865, "rewards/accuracies": 0.875, "rewards/chosen": -0.07178636640310287, "rewards/margins": 0.2757444381713867, "rewards/rejected": -0.3475308418273926, "step": 5368 }, { "epoch": 3.275278328503889, "grad_norm": 1.2371516227722168, "learning_rate": 2.7688916105327614e-06, "log_odds_chosen": 1.7609598636627197, "log_odds_ratio": -0.472221702337265, "logits/chosen": -0.8279004096984863, "logits/rejected": -1.0439361333847046, "logps/chosen": -0.8773021697998047, "logps/rejected": -2.1830554008483887, "loss": 0.9694, "nll_loss": 0.9472818374633789, "rewards/accuracies": 0.5, "rewards/chosen": -0.08773021399974823, "rewards/margins": 0.13057531416416168, "rewards/rejected": -0.2183055281639099, "step": 5369 }, { "epoch": 3.275888363580906, "grad_norm": 2.3308722972869873, "learning_rate": 2.767911818738518e-06, "log_odds_chosen": 2.1571309566497803, "log_odds_ratio": -0.368978887796402, "logits/chosen": -0.8718763589859009, "logits/rejected": -0.866232693195343, "logps/chosen": -0.8520171642303467, "logps/rejected": -2.44523549079895, "loss": 1.0966, "nll_loss": 1.0961426496505737, "rewards/accuracies": 0.875, "rewards/chosen": -0.08520171046257019, "rewards/margins": 0.1593218296766281, "rewards/rejected": -0.2445235401391983, "step": 5370 }, { "epoch": 3.276498398657923, "grad_norm": 1.0310211181640625, "learning_rate": 2.7669320269442745e-06, "log_odds_chosen": 3.5609211921691895, "log_odds_ratio": -0.21817386150360107, "logits/chosen": -0.8750433921813965, "logits/rejected": -0.958240270614624, "logps/chosen": -0.633469820022583, "logps/rejected": -3.5551295280456543, "loss": 0.9893, "nll_loss": 0.7721836566925049, "rewards/accuracies": 0.875, "rewards/chosen": -0.0633469894528389, "rewards/margins": 0.29216596484184265, "rewards/rejected": -0.35551297664642334, "step": 5371 }, { "epoch": 3.2771084337349397, "grad_norm": 1.45914626121521, "learning_rate": 2.76595223515003e-06, "log_odds_chosen": 2.3239693641662598, "log_odds_ratio": -0.30947360396385193, "logits/chosen": -0.8693090677261353, "logits/rejected": -0.8309482932090759, "logps/chosen": -0.6995657682418823, "logps/rejected": -2.4747509956359863, "loss": 1.0745, "nll_loss": 0.8792802691459656, "rewards/accuracies": 0.875, "rewards/chosen": -0.06995657086372375, "rewards/margins": 0.17751853168010712, "rewards/rejected": -0.24747510254383087, "step": 5372 }, { "epoch": 3.2777184688119565, "grad_norm": 2.778656005859375, "learning_rate": 2.7649724433557867e-06, "log_odds_chosen": 1.5877186059951782, "log_odds_ratio": -0.5584349632263184, "logits/chosen": -0.935014545917511, "logits/rejected": -1.0229897499084473, "logps/chosen": -1.017068862915039, "logps/rejected": -2.4093856811523438, "loss": 1.046, "nll_loss": 1.1188515424728394, "rewards/accuracies": 0.75, "rewards/chosen": -0.10170689225196838, "rewards/margins": 0.13923169672489166, "rewards/rejected": -0.24093860387802124, "step": 5373 }, { "epoch": 3.278328503888974, "grad_norm": 2.51202130317688, "learning_rate": 2.7639926515615433e-06, "log_odds_chosen": 3.835512161254883, "log_odds_ratio": -0.4139070510864258, "logits/chosen": -0.7964801788330078, "logits/rejected": -0.95046466588974, "logps/chosen": -0.8691607713699341, "logps/rejected": -4.169208526611328, "loss": 1.034, "nll_loss": 0.9306057095527649, "rewards/accuracies": 0.625, "rewards/chosen": -0.08691607415676117, "rewards/margins": 0.3300047516822815, "rewards/rejected": -0.41692084074020386, "step": 5374 }, { "epoch": 3.2789385389659906, "grad_norm": 17.04872703552246, "learning_rate": 2.7630128597672994e-06, "log_odds_chosen": 0.40380051732063293, "log_odds_ratio": -0.5499721765518188, "logits/chosen": -1.0088930130004883, "logits/rejected": -1.0032497644424438, "logps/chosen": -0.9358929991722107, "logps/rejected": -1.1825470924377441, "loss": 1.0987, "nll_loss": 1.205493688583374, "rewards/accuracies": 0.75, "rewards/chosen": -0.09358930587768555, "rewards/margins": 0.024665409699082375, "rewards/rejected": -0.11825470626354218, "step": 5375 }, { "epoch": 3.2795485740430075, "grad_norm": 2.509164810180664, "learning_rate": 2.7620330679730555e-06, "log_odds_chosen": 1.5157849788665771, "log_odds_ratio": -0.43266957998275757, "logits/chosen": -0.7662723064422607, "logits/rejected": -0.8185716867446899, "logps/chosen": -0.7940539121627808, "logps/rejected": -1.885545015335083, "loss": 1.1269, "nll_loss": 1.020386815071106, "rewards/accuracies": 0.875, "rewards/chosen": -0.07940539717674255, "rewards/margins": 0.10914911329746246, "rewards/rejected": -0.18855451047420502, "step": 5376 }, { "epoch": 3.2801586091200243, "grad_norm": 2.1056058406829834, "learning_rate": 2.7610532761788117e-06, "log_odds_chosen": 1.3529722690582275, "log_odds_ratio": -0.45516449213027954, "logits/chosen": -0.7190539836883545, "logits/rejected": -0.8469681143760681, "logps/chosen": -0.830141007900238, "logps/rejected": -1.8066747188568115, "loss": 1.161, "nll_loss": 0.9742962121963501, "rewards/accuracies": 0.625, "rewards/chosen": -0.0830141007900238, "rewards/margins": 0.09765337407588959, "rewards/rejected": -0.1806674748659134, "step": 5377 }, { "epoch": 3.280768644197041, "grad_norm": 1.692979335784912, "learning_rate": 2.760073484384568e-06, "log_odds_chosen": 2.548771381378174, "log_odds_ratio": -0.675954282283783, "logits/chosen": -0.7887508869171143, "logits/rejected": -0.8004952669143677, "logps/chosen": -0.819922924041748, "logps/rejected": -2.768542766571045, "loss": 0.962, "nll_loss": 1.1778440475463867, "rewards/accuracies": 0.5, "rewards/chosen": -0.08199229836463928, "rewards/margins": 0.1948619931936264, "rewards/rejected": -0.2768542766571045, "step": 5378 }, { "epoch": 3.2813786792740585, "grad_norm": 3.2168397903442383, "learning_rate": 2.7590936925903243e-06, "log_odds_chosen": 1.9975714683532715, "log_odds_ratio": -0.36581191420555115, "logits/chosen": -0.8272811770439148, "logits/rejected": -0.8839319944381714, "logps/chosen": -0.6353793144226074, "logps/rejected": -2.1335644721984863, "loss": 0.9426, "nll_loss": 0.693122148513794, "rewards/accuracies": 0.875, "rewards/chosen": -0.06353793293237686, "rewards/margins": 0.1498185396194458, "rewards/rejected": -0.21335646510124207, "step": 5379 }, { "epoch": 3.2819887143510753, "grad_norm": 10.277728080749512, "learning_rate": 2.7581139007960805e-06, "log_odds_chosen": 2.1078455448150635, "log_odds_ratio": -0.4696626663208008, "logits/chosen": -0.823340654373169, "logits/rejected": -0.9480631351470947, "logps/chosen": -0.7592016458511353, "logps/rejected": -2.6320974826812744, "loss": 0.9519, "nll_loss": 0.9262634515762329, "rewards/accuracies": 0.625, "rewards/chosen": -0.07592016458511353, "rewards/margins": 0.18728956580162048, "rewards/rejected": -0.2632097601890564, "step": 5380 }, { "epoch": 3.282598749428092, "grad_norm": 1.8081475496292114, "learning_rate": 2.757134109001837e-06, "log_odds_chosen": 1.748624563217163, "log_odds_ratio": -0.3315226137638092, "logits/chosen": -0.782660722732544, "logits/rejected": -0.9639140367507935, "logps/chosen": -1.007157564163208, "logps/rejected": -2.485485315322876, "loss": 1.2381, "nll_loss": 1.0725791454315186, "rewards/accuracies": 0.875, "rewards/chosen": -0.100715771317482, "rewards/margins": 0.1478327512741089, "rewards/rejected": -0.24854853749275208, "step": 5381 }, { "epoch": 3.283208784505109, "grad_norm": 2.2424705028533936, "learning_rate": 2.7561543172075935e-06, "log_odds_chosen": 2.3997843265533447, "log_odds_ratio": -0.2913181483745575, "logits/chosen": -0.6830918788909912, "logits/rejected": -0.8040779829025269, "logps/chosen": -0.6935676336288452, "logps/rejected": -2.5812902450561523, "loss": 1.0199, "nll_loss": 0.9963843822479248, "rewards/accuracies": 1.0, "rewards/chosen": -0.069356769323349, "rewards/margins": 0.1887722611427307, "rewards/rejected": -0.2581290304660797, "step": 5382 }, { "epoch": 3.283818819582126, "grad_norm": 0.9334893226623535, "learning_rate": 2.7551745254133492e-06, "log_odds_chosen": 2.215424060821533, "log_odds_ratio": -0.42714419960975647, "logits/chosen": -0.785993218421936, "logits/rejected": -0.9337886571884155, "logps/chosen": -0.8200294971466064, "logps/rejected": -2.5781755447387695, "loss": 0.8919, "nll_loss": 0.9714635014533997, "rewards/accuracies": 0.75, "rewards/chosen": -0.08200294524431229, "rewards/margins": 0.17581462860107422, "rewards/rejected": -0.2578175663948059, "step": 5383 }, { "epoch": 3.2844288546591427, "grad_norm": 1.5729602575302124, "learning_rate": 2.754194733619106e-06, "log_odds_chosen": 1.1024906635284424, "log_odds_ratio": -0.3567703366279602, "logits/chosen": -1.0666377544403076, "logits/rejected": -1.0626518726348877, "logps/chosen": -1.0061033964157104, "logps/rejected": -1.8102834224700928, "loss": 1.1844, "nll_loss": 1.1587419509887695, "rewards/accuracies": 0.875, "rewards/chosen": -0.10061033815145493, "rewards/margins": 0.08041800558567047, "rewards/rejected": -0.181028351187706, "step": 5384 }, { "epoch": 3.28503888973616, "grad_norm": 2.1492767333984375, "learning_rate": 2.7532149418248623e-06, "log_odds_chosen": 1.0931074619293213, "log_odds_ratio": -0.461635947227478, "logits/chosen": -0.8377208113670349, "logits/rejected": -0.9462915658950806, "logps/chosen": -0.8342674970626831, "logps/rejected": -1.6517298221588135, "loss": 1.1552, "nll_loss": 1.025471806526184, "rewards/accuracies": 0.75, "rewards/chosen": -0.08342675119638443, "rewards/margins": 0.08174622058868408, "rewards/rejected": -0.1651729792356491, "step": 5385 }, { "epoch": 3.285648924813177, "grad_norm": 9.738831520080566, "learning_rate": 2.752235150030618e-06, "log_odds_chosen": 0.5841323137283325, "log_odds_ratio": -0.5775467753410339, "logits/chosen": -1.01880943775177, "logits/rejected": -1.0777270793914795, "logps/chosen": -0.8947165608406067, "logps/rejected": -1.4129105806350708, "loss": 0.9507, "nll_loss": 1.006968379020691, "rewards/accuracies": 0.5, "rewards/chosen": -0.08947166055440903, "rewards/margins": 0.05181939899921417, "rewards/rejected": -0.1412910521030426, "step": 5386 }, { "epoch": 3.2862589598901937, "grad_norm": 1.658961534500122, "learning_rate": 2.7512553582363746e-06, "log_odds_chosen": 1.8285000324249268, "log_odds_ratio": -0.21708893775939941, "logits/chosen": -0.6724575757980347, "logits/rejected": -0.7999380230903625, "logps/chosen": -0.7591685652732849, "logps/rejected": -2.083630084991455, "loss": 1.1741, "nll_loss": 1.00076425075531, "rewards/accuracies": 1.0, "rewards/chosen": -0.07591685652732849, "rewards/margins": 0.13244614005088806, "rewards/rejected": -0.20836299657821655, "step": 5387 }, { "epoch": 3.2868689949672105, "grad_norm": 2.5558080673217773, "learning_rate": 2.750275566442131e-06, "log_odds_chosen": 2.0842835903167725, "log_odds_ratio": -0.33346128463745117, "logits/chosen": -0.8518863916397095, "logits/rejected": -1.0653377771377563, "logps/chosen": -0.7246246337890625, "logps/rejected": -1.9931433200836182, "loss": 1.2175, "nll_loss": 1.094992756843567, "rewards/accuracies": 0.875, "rewards/chosen": -0.07246246933937073, "rewards/margins": 0.1268518716096878, "rewards/rejected": -0.19931432604789734, "step": 5388 }, { "epoch": 3.2874790300442274, "grad_norm": 1.9600108861923218, "learning_rate": 2.7492957746478872e-06, "log_odds_chosen": 3.1036643981933594, "log_odds_ratio": -0.22914747893810272, "logits/chosen": -0.8056217432022095, "logits/rejected": -0.9117739200592041, "logps/chosen": -0.6555630564689636, "logps/rejected": -3.1600522994995117, "loss": 0.9793, "nll_loss": 0.8490555286407471, "rewards/accuracies": 0.875, "rewards/chosen": -0.06555631011724472, "rewards/margins": 0.25044894218444824, "rewards/rejected": -0.31600522994995117, "step": 5389 }, { "epoch": 3.2880890651212447, "grad_norm": 4.952900409698486, "learning_rate": 2.7483159828536434e-06, "log_odds_chosen": 1.786528468132019, "log_odds_ratio": -0.2937711477279663, "logits/chosen": -0.9064613580703735, "logits/rejected": -1.0042014122009277, "logps/chosen": -0.803053617477417, "logps/rejected": -2.1613597869873047, "loss": 1.0632, "nll_loss": 1.104083776473999, "rewards/accuracies": 1.0, "rewards/chosen": -0.08030535280704498, "rewards/margins": 0.1358306109905243, "rewards/rejected": -0.21613597869873047, "step": 5390 }, { "epoch": 3.2886991001982615, "grad_norm": 1.8831353187561035, "learning_rate": 2.7473361910594e-06, "log_odds_chosen": 4.199331283569336, "log_odds_ratio": -0.2138209491968155, "logits/chosen": -0.7727087140083313, "logits/rejected": -0.9965744614601135, "logps/chosen": -0.5718764066696167, "logps/rejected": -4.041705131530762, "loss": 0.9732, "nll_loss": 0.7456108331680298, "rewards/accuracies": 0.875, "rewards/chosen": -0.05718763917684555, "rewards/margins": 0.34698283672332764, "rewards/rejected": -0.4041704535484314, "step": 5391 }, { "epoch": 3.2893091352752784, "grad_norm": 2.0808773040771484, "learning_rate": 2.746356399265156e-06, "log_odds_chosen": 1.4246230125427246, "log_odds_ratio": -0.5622143745422363, "logits/chosen": -0.8376891613006592, "logits/rejected": -0.9424586296081543, "logps/chosen": -0.7675173282623291, "logps/rejected": -1.9443625211715698, "loss": 0.8794, "nll_loss": 0.808942437171936, "rewards/accuracies": 0.5, "rewards/chosen": -0.07675173878669739, "rewards/margins": 0.11768452823162079, "rewards/rejected": -0.19443625211715698, "step": 5392 }, { "epoch": 3.289919170352295, "grad_norm": 1.504795789718628, "learning_rate": 2.7453766074709126e-06, "log_odds_chosen": 1.0053960084915161, "log_odds_ratio": -0.49019521474838257, "logits/chosen": -0.6621255278587341, "logits/rejected": -0.8354756236076355, "logps/chosen": -0.8028436899185181, "logps/rejected": -1.4130804538726807, "loss": 1.13, "nll_loss": 1.0290563106536865, "rewards/accuracies": 0.75, "rewards/chosen": -0.08028437197208405, "rewards/margins": 0.061023663729429245, "rewards/rejected": -0.1413080394268036, "step": 5393 }, { "epoch": 3.290529205429312, "grad_norm": 2.094576597213745, "learning_rate": 2.7443968156766683e-06, "log_odds_chosen": 1.8408324718475342, "log_odds_ratio": -0.3438231348991394, "logits/chosen": -0.641279935836792, "logits/rejected": -0.7968094348907471, "logps/chosen": -0.6351543664932251, "logps/rejected": -1.9246619939804077, "loss": 1.019, "nll_loss": 0.730666995048523, "rewards/accuracies": 0.875, "rewards/chosen": -0.06351543962955475, "rewards/margins": 0.12895077466964722, "rewards/rejected": -0.19246619939804077, "step": 5394 }, { "epoch": 3.291139240506329, "grad_norm": 5.081132888793945, "learning_rate": 2.743417023882425e-06, "log_odds_chosen": 0.7204855680465698, "log_odds_ratio": -0.5608106851577759, "logits/chosen": -0.9838646054267883, "logits/rejected": -0.9093261361122131, "logps/chosen": -0.8919625282287598, "logps/rejected": -1.4377479553222656, "loss": 1.0952, "nll_loss": 1.1156834363937378, "rewards/accuracies": 0.625, "rewards/chosen": -0.08919624984264374, "rewards/margins": 0.054578542709350586, "rewards/rejected": -0.14377479255199432, "step": 5395 }, { "epoch": 3.291749275583346, "grad_norm": 3.9197187423706055, "learning_rate": 2.7424372320881814e-06, "log_odds_chosen": 2.596522331237793, "log_odds_ratio": -0.3699207305908203, "logits/chosen": -0.7680335640907288, "logits/rejected": -0.9314780235290527, "logps/chosen": -0.6528229117393494, "logps/rejected": -2.7577245235443115, "loss": 0.9875, "nll_loss": 0.7733019590377808, "rewards/accuracies": 0.875, "rewards/chosen": -0.06528228521347046, "rewards/margins": 0.2104901671409607, "rewards/rejected": -0.27577245235443115, "step": 5396 }, { "epoch": 3.292359310660363, "grad_norm": 1.2771425247192383, "learning_rate": 2.741457440293937e-06, "log_odds_chosen": 3.4006106853485107, "log_odds_ratio": -0.3026576638221741, "logits/chosen": -0.6445019245147705, "logits/rejected": -1.0294685363769531, "logps/chosen": -0.5796379446983337, "logps/rejected": -3.3163881301879883, "loss": 1.055, "nll_loss": 0.793481171131134, "rewards/accuracies": 0.75, "rewards/chosen": -0.05796379595994949, "rewards/margins": 0.27367502450942993, "rewards/rejected": -0.33163881301879883, "step": 5397 }, { "epoch": 3.29296934573738, "grad_norm": 1.8811581134796143, "learning_rate": 2.7404776484996936e-06, "log_odds_chosen": 3.1454272270202637, "log_odds_ratio": -0.3503376245498657, "logits/chosen": -0.9437389969825745, "logits/rejected": -1.023689866065979, "logps/chosen": -0.7600514888763428, "logps/rejected": -3.443039894104004, "loss": 0.9948, "nll_loss": 1.1124725341796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.07600514590740204, "rewards/margins": 0.268298864364624, "rewards/rejected": -0.34430399537086487, "step": 5398 }, { "epoch": 3.2935793808143967, "grad_norm": 4.505350112915039, "learning_rate": 2.73949785670545e-06, "log_odds_chosen": 1.1712442636489868, "log_odds_ratio": -0.37735515832901, "logits/chosen": -0.8478203415870667, "logits/rejected": -0.8952591419219971, "logps/chosen": -0.6693508625030518, "logps/rejected": -1.3640079498291016, "loss": 1.0598, "nll_loss": 0.9130735993385315, "rewards/accuracies": 0.75, "rewards/chosen": -0.06693509221076965, "rewards/margins": 0.06946570426225662, "rewards/rejected": -0.13640078902244568, "step": 5399 }, { "epoch": 3.2941894158914136, "grad_norm": 1.9927566051483154, "learning_rate": 2.738518064911206e-06, "log_odds_chosen": 2.150569200515747, "log_odds_ratio": -0.3052597939968109, "logits/chosen": -0.8263399600982666, "logits/rejected": -0.7637810111045837, "logps/chosen": -0.7869405150413513, "logps/rejected": -2.459554672241211, "loss": 0.9647, "nll_loss": 0.997028112411499, "rewards/accuracies": 0.875, "rewards/chosen": -0.07869406044483185, "rewards/margins": 0.16726142168045044, "rewards/rejected": -0.2459554672241211, "step": 5400 }, { "epoch": 3.294799450968431, "grad_norm": 1.4251348972320557, "learning_rate": 2.7375382731169624e-06, "log_odds_chosen": 1.1639375686645508, "log_odds_ratio": -0.3873664140701294, "logits/chosen": -0.9425604343414307, "logits/rejected": -0.9352021217346191, "logps/chosen": -0.924604058265686, "logps/rejected": -1.729053020477295, "loss": 1.1011, "nll_loss": 1.1124285459518433, "rewards/accuracies": 0.875, "rewards/chosen": -0.09246040135622025, "rewards/margins": 0.08044490218162537, "rewards/rejected": -0.17290529608726501, "step": 5401 }, { "epoch": 3.2954094860454477, "grad_norm": 1.148172378540039, "learning_rate": 2.736558481322719e-06, "log_odds_chosen": 1.6533122062683105, "log_odds_ratio": -0.3698602616786957, "logits/chosen": -0.5838254690170288, "logits/rejected": -0.6308227777481079, "logps/chosen": -0.6703345775604248, "logps/rejected": -1.7552279233932495, "loss": 1.0464, "nll_loss": 0.8307027220726013, "rewards/accuracies": 0.875, "rewards/chosen": -0.06703345477581024, "rewards/margins": 0.10848934203386307, "rewards/rejected": -0.1755227893590927, "step": 5402 }, { "epoch": 3.2960195211224645, "grad_norm": 1.19149911403656, "learning_rate": 2.735578689528475e-06, "log_odds_chosen": 1.7491376399993896, "log_odds_ratio": -0.38230836391448975, "logits/chosen": -0.7565479874610901, "logits/rejected": -0.9285036325454712, "logps/chosen": -0.599506139755249, "logps/rejected": -1.819010615348816, "loss": 0.8913, "nll_loss": 0.8229342699050903, "rewards/accuracies": 0.875, "rewards/chosen": -0.05995061621069908, "rewards/margins": 0.12195045500993729, "rewards/rejected": -0.18190106749534607, "step": 5403 }, { "epoch": 3.2966295561994814, "grad_norm": 16.029455184936523, "learning_rate": 2.734598897734231e-06, "log_odds_chosen": 1.286390781402588, "log_odds_ratio": -0.47478029131889343, "logits/chosen": -1.11894953250885, "logits/rejected": -1.0802860260009766, "logps/chosen": -1.1346702575683594, "logps/rejected": -2.220517635345459, "loss": 1.1136, "nll_loss": 1.3351123332977295, "rewards/accuracies": 0.625, "rewards/chosen": -0.1134670227766037, "rewards/margins": 0.10858474671840668, "rewards/rejected": -0.22205176949501038, "step": 5404 }, { "epoch": 3.2972395912764982, "grad_norm": 2.335757255554199, "learning_rate": 2.7336191059399877e-06, "log_odds_chosen": 3.5031983852386475, "log_odds_ratio": -0.1772359311580658, "logits/chosen": -0.844321072101593, "logits/rejected": -1.0075894594192505, "logps/chosen": -0.7144412398338318, "logps/rejected": -3.562211036682129, "loss": 0.94, "nll_loss": 1.0019875764846802, "rewards/accuracies": 0.875, "rewards/chosen": -0.07144412398338318, "rewards/margins": 0.2847769856452942, "rewards/rejected": -0.35622113943099976, "step": 5405 }, { "epoch": 3.297849626353515, "grad_norm": 3.3559796810150146, "learning_rate": 2.732639314145744e-06, "log_odds_chosen": 3.8005294799804688, "log_odds_ratio": -0.36088627576828003, "logits/chosen": -0.8278176188468933, "logits/rejected": -0.9970249533653259, "logps/chosen": -0.6223996877670288, "logps/rejected": -3.6942379474639893, "loss": 1.2153, "nll_loss": 1.1081334352493286, "rewards/accuracies": 0.75, "rewards/chosen": -0.06223996728658676, "rewards/margins": 0.3071838617324829, "rewards/rejected": -0.3694237768650055, "step": 5406 }, { "epoch": 3.2984596614305324, "grad_norm": 1.2220118045806885, "learning_rate": 2.7316595223515004e-06, "log_odds_chosen": 1.4105613231658936, "log_odds_ratio": -0.516788899898529, "logits/chosen": -0.8022699356079102, "logits/rejected": -0.7900376319885254, "logps/chosen": -0.7331545352935791, "logps/rejected": -1.8402390480041504, "loss": 0.9701, "nll_loss": 0.8621882200241089, "rewards/accuracies": 0.875, "rewards/chosen": -0.07331544905900955, "rewards/margins": 0.11070846021175385, "rewards/rejected": -0.184023916721344, "step": 5407 }, { "epoch": 3.299069696507549, "grad_norm": 2.126697063446045, "learning_rate": 2.7306797305572565e-06, "log_odds_chosen": 1.0293471813201904, "log_odds_ratio": -0.46613508462905884, "logits/chosen": -0.7072993516921997, "logits/rejected": -0.6194051504135132, "logps/chosen": -0.8849667310714722, "logps/rejected": -1.641262173652649, "loss": 1.084, "nll_loss": 1.0625030994415283, "rewards/accuracies": 0.875, "rewards/chosen": -0.08849667012691498, "rewards/margins": 0.07562955468893051, "rewards/rejected": -0.1641262173652649, "step": 5408 }, { "epoch": 3.299679731584566, "grad_norm": 4.28006649017334, "learning_rate": 2.7296999387630127e-06, "log_odds_chosen": 1.2309174537658691, "log_odds_ratio": -0.32486769556999207, "logits/chosen": -0.8696344494819641, "logits/rejected": -0.8972780704498291, "logps/chosen": -0.687993586063385, "logps/rejected": -1.4069546461105347, "loss": 1.1442, "nll_loss": 1.0699477195739746, "rewards/accuracies": 0.875, "rewards/chosen": -0.06879936158657074, "rewards/margins": 0.07189610600471497, "rewards/rejected": -0.1406954675912857, "step": 5409 }, { "epoch": 3.300289766661583, "grad_norm": 3.645631790161133, "learning_rate": 2.728720146968769e-06, "log_odds_chosen": 0.9796773195266724, "log_odds_ratio": -0.45296379923820496, "logits/chosen": -0.8040157556533813, "logits/rejected": -0.9270674586296082, "logps/chosen": -1.1649868488311768, "logps/rejected": -1.923734188079834, "loss": 1.093, "nll_loss": 1.3651325702667236, "rewards/accuracies": 0.75, "rewards/chosen": -0.1164986863732338, "rewards/margins": 0.07587473094463348, "rewards/rejected": -0.19237343966960907, "step": 5410 }, { "epoch": 3.3008998017386, "grad_norm": 2.0260705947875977, "learning_rate": 2.7277403551745253e-06, "log_odds_chosen": 0.20703431963920593, "log_odds_ratio": -0.6259726285934448, "logits/chosen": -0.7349278926849365, "logits/rejected": -0.757383406162262, "logps/chosen": -0.91663658618927, "logps/rejected": -1.0503578186035156, "loss": 1.0109, "nll_loss": 1.1146385669708252, "rewards/accuracies": 0.75, "rewards/chosen": -0.091663658618927, "rewards/margins": 0.013372127898037434, "rewards/rejected": -0.10503578931093216, "step": 5411 }, { "epoch": 3.301509836815617, "grad_norm": 9.050243377685547, "learning_rate": 2.7267605633802814e-06, "log_odds_chosen": 1.386803388595581, "log_odds_ratio": -0.4270876944065094, "logits/chosen": -0.9712374210357666, "logits/rejected": -0.8249582052230835, "logps/chosen": -0.8491820096969604, "logps/rejected": -1.843205213546753, "loss": 1.0002, "nll_loss": 1.054888129234314, "rewards/accuracies": 0.875, "rewards/chosen": -0.08491820096969604, "rewards/margins": 0.09940231591463089, "rewards/rejected": -0.18432050943374634, "step": 5412 }, { "epoch": 3.302119871892634, "grad_norm": 5.5758514404296875, "learning_rate": 2.725780771586038e-06, "log_odds_chosen": 1.4539591073989868, "log_odds_ratio": -0.48761940002441406, "logits/chosen": -0.6969399452209473, "logits/rejected": -0.9094028472900391, "logps/chosen": -0.7685642242431641, "logps/rejected": -1.8669216632843018, "loss": 1.0446, "nll_loss": 0.9148930311203003, "rewards/accuracies": 0.625, "rewards/chosen": -0.07685642689466476, "rewards/margins": 0.10983574390411377, "rewards/rejected": -0.18669216334819794, "step": 5413 }, { "epoch": 3.3027299069696507, "grad_norm": 2.5075321197509766, "learning_rate": 2.724800979791794e-06, "log_odds_chosen": 3.172727584838867, "log_odds_ratio": -0.19646582007408142, "logits/chosen": -0.9863031506538391, "logits/rejected": -1.16087007522583, "logps/chosen": -0.6621160507202148, "logps/rejected": -2.989326000213623, "loss": 0.9083, "nll_loss": 1.1209664344787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.06621161103248596, "rewards/margins": 0.2327209860086441, "rewards/rejected": -0.29893261194229126, "step": 5414 }, { "epoch": 3.3033399420466676, "grad_norm": 1.954584002494812, "learning_rate": 2.7238211879975502e-06, "log_odds_chosen": 0.42872869968414307, "log_odds_ratio": -0.6217036247253418, "logits/chosen": -0.9027721881866455, "logits/rejected": -0.8514136075973511, "logps/chosen": -0.8575995564460754, "logps/rejected": -1.151677131652832, "loss": 1.068, "nll_loss": 1.13789701461792, "rewards/accuracies": 0.75, "rewards/chosen": -0.0857599601149559, "rewards/margins": 0.029407760128378868, "rewards/rejected": -0.11516772210597992, "step": 5415 }, { "epoch": 3.3039499771236844, "grad_norm": 1.5560967922210693, "learning_rate": 2.7228413962033068e-06, "log_odds_chosen": 1.6881475448608398, "log_odds_ratio": -0.6001825928688049, "logits/chosen": -0.7185373306274414, "logits/rejected": -0.8707603812217712, "logps/chosen": -0.8450779318809509, "logps/rejected": -2.272284746170044, "loss": 0.925, "nll_loss": 0.8633263111114502, "rewards/accuracies": 0.5, "rewards/chosen": -0.08450779318809509, "rewards/margins": 0.14272069931030273, "rewards/rejected": -0.22722849249839783, "step": 5416 }, { "epoch": 3.3045600122007017, "grad_norm": 1.4468715190887451, "learning_rate": 2.721861604409063e-06, "log_odds_chosen": 2.1505134105682373, "log_odds_ratio": -0.3389269709587097, "logits/chosen": -0.7773854732513428, "logits/rejected": -0.7571609020233154, "logps/chosen": -0.7890282869338989, "logps/rejected": -2.4601564407348633, "loss": 1.0258, "nll_loss": 0.8785132169723511, "rewards/accuracies": 0.75, "rewards/chosen": -0.07890283316373825, "rewards/margins": 0.167112797498703, "rewards/rejected": -0.24601563811302185, "step": 5417 }, { "epoch": 3.3051700472777186, "grad_norm": 2.3367035388946533, "learning_rate": 2.720881812614819e-06, "log_odds_chosen": 1.0405480861663818, "log_odds_ratio": -0.5391548871994019, "logits/chosen": -0.6887476444244385, "logits/rejected": -0.7850629091262817, "logps/chosen": -1.0291757583618164, "logps/rejected": -1.7336786985397339, "loss": 1.1545, "nll_loss": 1.0878241062164307, "rewards/accuracies": 0.5, "rewards/chosen": -0.10291758179664612, "rewards/margins": 0.07045028358697891, "rewards/rejected": -0.17336785793304443, "step": 5418 }, { "epoch": 3.3057800823547354, "grad_norm": 1.308440089225769, "learning_rate": 2.7199020208205756e-06, "log_odds_chosen": 1.4907399415969849, "log_odds_ratio": -0.5373146533966064, "logits/chosen": -0.7709952592849731, "logits/rejected": -0.793524980545044, "logps/chosen": -0.6975153684616089, "logps/rejected": -1.8141424655914307, "loss": 1.0293, "nll_loss": 0.920828104019165, "rewards/accuracies": 0.5, "rewards/chosen": -0.06975153088569641, "rewards/margins": 0.11166271567344666, "rewards/rejected": -0.18141423165798187, "step": 5419 }, { "epoch": 3.3063901174317523, "grad_norm": 3.394652843475342, "learning_rate": 2.7189222290263317e-06, "log_odds_chosen": 2.058954954147339, "log_odds_ratio": -0.4095514416694641, "logits/chosen": -0.748863697052002, "logits/rejected": -0.7950426340103149, "logps/chosen": -0.7490272521972656, "logps/rejected": -2.331563949584961, "loss": 1.1932, "nll_loss": 0.8184778690338135, "rewards/accuracies": 0.5, "rewards/chosen": -0.0749027281999588, "rewards/margins": 0.15825368463993073, "rewards/rejected": -0.23315638303756714, "step": 5420 }, { "epoch": 3.307000152508769, "grad_norm": 19.202383041381836, "learning_rate": 2.7179424372320882e-06, "log_odds_chosen": 1.0655090808868408, "log_odds_ratio": -0.4274356961250305, "logits/chosen": -0.9343639016151428, "logits/rejected": -0.989311933517456, "logps/chosen": -0.8926388025283813, "logps/rejected": -1.7420216798782349, "loss": 1.1238, "nll_loss": 1.0273737907409668, "rewards/accuracies": 0.625, "rewards/chosen": -0.08926387876272202, "rewards/margins": 0.08493828773498535, "rewards/rejected": -0.17420215904712677, "step": 5421 }, { "epoch": 3.3076101875857864, "grad_norm": 4.5589823722839355, "learning_rate": 2.7169626454378444e-06, "log_odds_chosen": 2.3838322162628174, "log_odds_ratio": -0.46757200360298157, "logits/chosen": -0.8204914927482605, "logits/rejected": -0.8615053296089172, "logps/chosen": -0.7364327311515808, "logps/rejected": -2.6257693767547607, "loss": 1.0029, "nll_loss": 0.8108437657356262, "rewards/accuracies": 0.625, "rewards/chosen": -0.0736432746052742, "rewards/margins": 0.18893365561962128, "rewards/rejected": -0.2625769376754761, "step": 5422 }, { "epoch": 3.3082202226628032, "grad_norm": 2.008258819580078, "learning_rate": 2.7159828536436005e-06, "log_odds_chosen": 1.3630874156951904, "log_odds_ratio": -0.3731398284435272, "logits/chosen": -0.8380830883979797, "logits/rejected": -0.9602451920509338, "logps/chosen": -0.8373363018035889, "logps/rejected": -1.8407483100891113, "loss": 1.103, "nll_loss": 1.0671041011810303, "rewards/accuracies": 0.75, "rewards/chosen": -0.08373363316059113, "rewards/margins": 0.10034121572971344, "rewards/rejected": -0.18407484889030457, "step": 5423 }, { "epoch": 3.30883025773982, "grad_norm": 1.2846624851226807, "learning_rate": 2.715003061849357e-06, "log_odds_chosen": 1.0885120630264282, "log_odds_ratio": -0.3758910298347473, "logits/chosen": -0.8429349064826965, "logits/rejected": -0.8132842183113098, "logps/chosen": -0.7496780157089233, "logps/rejected": -1.2889926433563232, "loss": 0.8508, "nll_loss": 1.001381278038025, "rewards/accuracies": 0.75, "rewards/chosen": -0.07496779412031174, "rewards/margins": 0.05393148213624954, "rewards/rejected": -0.12889927625656128, "step": 5424 }, { "epoch": 3.309440292816837, "grad_norm": 2.901200294494629, "learning_rate": 2.714023270055113e-06, "log_odds_chosen": 1.8999897241592407, "log_odds_ratio": -0.37356895208358765, "logits/chosen": -0.8709656596183777, "logits/rejected": -0.9178562164306641, "logps/chosen": -0.7663681507110596, "logps/rejected": -2.216409683227539, "loss": 0.967, "nll_loss": 0.8863043785095215, "rewards/accuracies": 0.75, "rewards/chosen": -0.07663682103157043, "rewards/margins": 0.14500415325164795, "rewards/rejected": -0.22164097428321838, "step": 5425 }, { "epoch": 3.310050327893854, "grad_norm": 4.541430950164795, "learning_rate": 2.7130434782608693e-06, "log_odds_chosen": 2.0881402492523193, "log_odds_ratio": -0.4188839793205261, "logits/chosen": -1.003350853919983, "logits/rejected": -0.9802843928337097, "logps/chosen": -0.6689202785491943, "logps/rejected": -2.388151168823242, "loss": 0.9395, "nll_loss": 0.9324792623519897, "rewards/accuracies": 0.875, "rewards/chosen": -0.06689202040433884, "rewards/margins": 0.17192308604717255, "rewards/rejected": -0.23881511390209198, "step": 5426 }, { "epoch": 3.3106603629708706, "grad_norm": 3.7984838485717773, "learning_rate": 2.712063686466626e-06, "log_odds_chosen": 3.118557929992676, "log_odds_ratio": -0.21887606382369995, "logits/chosen": -0.6425793170928955, "logits/rejected": -0.829556405544281, "logps/chosen": -0.7138844728469849, "logps/rejected": -3.229123592376709, "loss": 0.991, "nll_loss": 0.8218833804130554, "rewards/accuracies": 0.875, "rewards/chosen": -0.07138845324516296, "rewards/margins": 0.25152388215065, "rewards/rejected": -0.3229123651981354, "step": 5427 }, { "epoch": 3.311270398047888, "grad_norm": 1.334054708480835, "learning_rate": 2.7110838946723824e-06, "log_odds_chosen": 2.0175342559814453, "log_odds_ratio": -0.2589098811149597, "logits/chosen": -0.7618581652641296, "logits/rejected": -0.8026235699653625, "logps/chosen": -0.6776161193847656, "logps/rejected": -2.1184487342834473, "loss": 1.0433, "nll_loss": 1.043080449104309, "rewards/accuracies": 0.875, "rewards/chosen": -0.0677616074681282, "rewards/margins": 0.14408326148986816, "rewards/rejected": -0.21184487640857697, "step": 5428 }, { "epoch": 3.3118804331249048, "grad_norm": 2.2353737354278564, "learning_rate": 2.710104102878138e-06, "log_odds_chosen": 2.629711151123047, "log_odds_ratio": -0.2173960655927658, "logits/chosen": -0.9598743915557861, "logits/rejected": -0.9553635120391846, "logps/chosen": -0.7384307980537415, "logps/rejected": -2.7959227561950684, "loss": 1.1007, "nll_loss": 1.0178399085998535, "rewards/accuracies": 0.875, "rewards/chosen": -0.0738430768251419, "rewards/margins": 0.20574918389320374, "rewards/rejected": -0.27959224581718445, "step": 5429 }, { "epoch": 3.3124904682019216, "grad_norm": 1.8081260919570923, "learning_rate": 2.7091243110838946e-06, "log_odds_chosen": 1.8918167352676392, "log_odds_ratio": -0.5093816518783569, "logits/chosen": -0.597030758857727, "logits/rejected": -0.8632514476776123, "logps/chosen": -0.7508243322372437, "logps/rejected": -2.3107106685638428, "loss": 1.0665, "nll_loss": 0.8691364526748657, "rewards/accuracies": 0.625, "rewards/chosen": -0.0750824362039566, "rewards/margins": 0.1559886336326599, "rewards/rejected": -0.23107106983661652, "step": 5430 }, { "epoch": 3.3131005032789385, "grad_norm": 5.1954240798950195, "learning_rate": 2.708144519289651e-06, "log_odds_chosen": 2.860487461090088, "log_odds_ratio": -0.24086253345012665, "logits/chosen": -0.6360041499137878, "logits/rejected": -0.8796170949935913, "logps/chosen": -0.6487436294555664, "logps/rejected": -2.912917137145996, "loss": 1.2814, "nll_loss": 0.8541589975357056, "rewards/accuracies": 0.875, "rewards/chosen": -0.06487436592578888, "rewards/margins": 0.22641736268997192, "rewards/rejected": -0.2912917137145996, "step": 5431 }, { "epoch": 3.3137105383559553, "grad_norm": 6.69340705871582, "learning_rate": 2.707164727495407e-06, "log_odds_chosen": 0.5352472066879272, "log_odds_ratio": -0.6741379499435425, "logits/chosen": -0.7402506470680237, "logits/rejected": -0.8122294545173645, "logps/chosen": -0.982933759689331, "logps/rejected": -1.402738332748413, "loss": 1.1039, "nll_loss": 1.1203967332839966, "rewards/accuracies": 0.375, "rewards/chosen": -0.09829337894916534, "rewards/margins": 0.041980449110269547, "rewards/rejected": -0.1402738243341446, "step": 5432 }, { "epoch": 3.3143205734329726, "grad_norm": 1.907341480255127, "learning_rate": 2.7061849357011634e-06, "log_odds_chosen": 1.294917345046997, "log_odds_ratio": -0.5393082499504089, "logits/chosen": -0.8696781396865845, "logits/rejected": -0.7179408669471741, "logps/chosen": -0.6433417797088623, "logps/rejected": -1.595552682876587, "loss": 1.0022, "nll_loss": 0.8538893461227417, "rewards/accuracies": 0.625, "rewards/chosen": -0.06433417648077011, "rewards/margins": 0.09522108733654022, "rewards/rejected": -0.15955525636672974, "step": 5433 }, { "epoch": 3.3149306085099894, "grad_norm": 1.7674223184585571, "learning_rate": 2.7052051439069195e-06, "log_odds_chosen": 1.3330895900726318, "log_odds_ratio": -0.5147973895072937, "logits/chosen": -0.4641875624656677, "logits/rejected": -0.5111833810806274, "logps/chosen": -0.594599187374115, "logps/rejected": -1.4075016975402832, "loss": 0.9361, "nll_loss": 0.6941754817962646, "rewards/accuracies": 0.875, "rewards/chosen": -0.05945991724729538, "rewards/margins": 0.08129024505615234, "rewards/rejected": -0.14075016975402832, "step": 5434 }, { "epoch": 3.3155406435870063, "grad_norm": 2.875666856765747, "learning_rate": 2.704225352112676e-06, "log_odds_chosen": 0.9882709383964539, "log_odds_ratio": -0.4509400725364685, "logits/chosen": -0.737787663936615, "logits/rejected": -0.8292946219444275, "logps/chosen": -0.760166585445404, "logps/rejected": -1.4129831790924072, "loss": 0.9406, "nll_loss": 1.1091762781143188, "rewards/accuracies": 0.75, "rewards/chosen": -0.07601665705442429, "rewards/margins": 0.06528165936470032, "rewards/rejected": -0.1412983238697052, "step": 5435 }, { "epoch": 3.316150678664023, "grad_norm": 1.4871379137039185, "learning_rate": 2.703245560318432e-06, "log_odds_chosen": 2.574636459350586, "log_odds_ratio": -0.2599790394306183, "logits/chosen": -0.5279076099395752, "logits/rejected": -0.766202449798584, "logps/chosen": -0.48949146270751953, "logps/rejected": -2.109729290008545, "loss": 0.9212, "nll_loss": 0.7402371168136597, "rewards/accuracies": 0.875, "rewards/chosen": -0.048949144780635834, "rewards/margins": 0.16202378273010254, "rewards/rejected": -0.21097293496131897, "step": 5436 }, { "epoch": 3.31676071374104, "grad_norm": 3.2247936725616455, "learning_rate": 2.7022657685241883e-06, "log_odds_chosen": 2.291332721710205, "log_odds_ratio": -0.37004756927490234, "logits/chosen": -0.6746982932090759, "logits/rejected": -0.8771324753761292, "logps/chosen": -0.9224933981895447, "logps/rejected": -2.8000664710998535, "loss": 1.0818, "nll_loss": 1.0384995937347412, "rewards/accuracies": 0.875, "rewards/chosen": -0.09224934130907059, "rewards/margins": 0.18775729835033417, "rewards/rejected": -0.28000661730766296, "step": 5437 }, { "epoch": 3.317370748818057, "grad_norm": 2.784555673599243, "learning_rate": 2.701285976729945e-06, "log_odds_chosen": 1.365513563156128, "log_odds_ratio": -0.5186707973480225, "logits/chosen": -0.795467734336853, "logits/rejected": -0.9390823841094971, "logps/chosen": -0.7806920409202576, "logps/rejected": -1.943550944328308, "loss": 1.0666, "nll_loss": 0.9462815523147583, "rewards/accuracies": 0.875, "rewards/chosen": -0.07806921005249023, "rewards/margins": 0.11628589779138565, "rewards/rejected": -0.1943551003932953, "step": 5438 }, { "epoch": 3.317980783895074, "grad_norm": 1.1999409198760986, "learning_rate": 2.700306184935701e-06, "log_odds_chosen": 3.7302355766296387, "log_odds_ratio": -0.2894693613052368, "logits/chosen": -0.5225744247436523, "logits/rejected": -0.9447658061981201, "logps/chosen": -0.5629143118858337, "logps/rejected": -3.495927333831787, "loss": 0.8923, "nll_loss": 0.6907123923301697, "rewards/accuracies": 0.75, "rewards/chosen": -0.05629143491387367, "rewards/margins": 0.2933012843132019, "rewards/rejected": -0.3495927155017853, "step": 5439 }, { "epoch": 3.318590818972091, "grad_norm": 1.5121601819992065, "learning_rate": 2.699326393141457e-06, "log_odds_chosen": 1.6255385875701904, "log_odds_ratio": -0.4377356767654419, "logits/chosen": -0.9762561917304993, "logits/rejected": -1.0033273696899414, "logps/chosen": -0.825011670589447, "logps/rejected": -2.1762142181396484, "loss": 1.0621, "nll_loss": 1.100434422492981, "rewards/accuracies": 0.75, "rewards/chosen": -0.08250117301940918, "rewards/margins": 0.13512025773525238, "rewards/rejected": -0.21762141585350037, "step": 5440 }, { "epoch": 3.319200854049108, "grad_norm": 4.669336318969727, "learning_rate": 2.6983466013472137e-06, "log_odds_chosen": 0.9799679517745972, "log_odds_ratio": -0.4639197289943695, "logits/chosen": -0.770332932472229, "logits/rejected": -0.9394867420196533, "logps/chosen": -0.8755922317504883, "logps/rejected": -1.5886187553405762, "loss": 1.0723, "nll_loss": 1.0827492475509644, "rewards/accuracies": 0.75, "rewards/chosen": -0.08755922317504883, "rewards/margins": 0.07130266726016998, "rewards/rejected": -0.15886187553405762, "step": 5441 }, { "epoch": 3.3198108891261247, "grad_norm": 3.9989452362060547, "learning_rate": 2.69736680955297e-06, "log_odds_chosen": 1.1039668321609497, "log_odds_ratio": -0.5464693307876587, "logits/chosen": -0.8999801874160767, "logits/rejected": -0.853601336479187, "logps/chosen": -0.6738420724868774, "logps/rejected": -1.1463730335235596, "loss": 1.0071, "nll_loss": 0.9728829860687256, "rewards/accuracies": 0.75, "rewards/chosen": -0.06738421320915222, "rewards/margins": 0.04725309833884239, "rewards/rejected": -0.11463731527328491, "step": 5442 }, { "epoch": 3.3204209242031415, "grad_norm": 4.121947288513184, "learning_rate": 2.696387017758726e-06, "log_odds_chosen": 1.0553853511810303, "log_odds_ratio": -0.5965169072151184, "logits/chosen": -0.9452572464942932, "logits/rejected": -0.9599323272705078, "logps/chosen": -0.9814473390579224, "logps/rejected": -1.8527119159698486, "loss": 1.1214, "nll_loss": 1.1927462816238403, "rewards/accuracies": 0.625, "rewards/chosen": -0.09814473241567612, "rewards/margins": 0.08712644875049591, "rewards/rejected": -0.18527117371559143, "step": 5443 }, { "epoch": 3.321030959280159, "grad_norm": 1.97111976146698, "learning_rate": 2.6954072259644824e-06, "log_odds_chosen": 1.3706462383270264, "log_odds_ratio": -0.4248221218585968, "logits/chosen": -1.0358116626739502, "logits/rejected": -1.076289176940918, "logps/chosen": -0.9634107947349548, "logps/rejected": -2.013965129852295, "loss": 1.045, "nll_loss": 1.2244287729263306, "rewards/accuracies": 0.875, "rewards/chosen": -0.0963410809636116, "rewards/margins": 0.10505543649196625, "rewards/rejected": -0.20139651000499725, "step": 5444 }, { "epoch": 3.3216409943571756, "grad_norm": 1.2294362783432007, "learning_rate": 2.694427434170239e-06, "log_odds_chosen": 2.156493663787842, "log_odds_ratio": -0.30496564507484436, "logits/chosen": -0.8976548910140991, "logits/rejected": -0.8551735877990723, "logps/chosen": -0.9594241380691528, "logps/rejected": -2.7392759323120117, "loss": 1.0698, "nll_loss": 1.0880141258239746, "rewards/accuracies": 0.875, "rewards/chosen": -0.0959424152970314, "rewards/margins": 0.17798517644405365, "rewards/rejected": -0.27392759919166565, "step": 5445 }, { "epoch": 3.3222510294341925, "grad_norm": 1.2648379802703857, "learning_rate": 2.6934476423759947e-06, "log_odds_chosen": 1.8463850021362305, "log_odds_ratio": -0.3672184348106384, "logits/chosen": -0.7122713327407837, "logits/rejected": -0.948324978351593, "logps/chosen": -1.0171520709991455, "logps/rejected": -2.4359397888183594, "loss": 0.9885, "nll_loss": 1.1109567880630493, "rewards/accuracies": 0.875, "rewards/chosen": -0.10171520709991455, "rewards/margins": 0.14187878370285034, "rewards/rejected": -0.2435939908027649, "step": 5446 }, { "epoch": 3.3228610645112093, "grad_norm": 8.789155006408691, "learning_rate": 2.6924678505817512e-06, "log_odds_chosen": 2.851466178894043, "log_odds_ratio": -0.2729991674423218, "logits/chosen": -0.7402122020721436, "logits/rejected": -0.9716955423355103, "logps/chosen": -0.7267653346061707, "logps/rejected": -3.0569300651550293, "loss": 0.9617, "nll_loss": 0.9138252139091492, "rewards/accuracies": 0.875, "rewards/chosen": -0.07267653942108154, "rewards/margins": 0.2330164909362793, "rewards/rejected": -0.30569300055503845, "step": 5447 }, { "epoch": 3.323471099588226, "grad_norm": 9.58335018157959, "learning_rate": 2.6914880587875078e-06, "log_odds_chosen": 3.2947731018066406, "log_odds_ratio": -0.3589739501476288, "logits/chosen": -0.8009464740753174, "logits/rejected": -0.9417043924331665, "logps/chosen": -0.7690584659576416, "logps/rejected": -3.550096273422241, "loss": 1.0588, "nll_loss": 0.9805853962898254, "rewards/accuracies": 0.875, "rewards/chosen": -0.07690584659576416, "rewards/margins": 0.278103768825531, "rewards/rejected": -0.35500961542129517, "step": 5448 }, { "epoch": 3.324081134665243, "grad_norm": 3.21555495262146, "learning_rate": 2.690508266993264e-06, "log_odds_chosen": 0.6876837015151978, "log_odds_ratio": -0.5507512092590332, "logits/chosen": -0.7845026850700378, "logits/rejected": -0.781012773513794, "logps/chosen": -0.8366854190826416, "logps/rejected": -1.1629290580749512, "loss": 0.9636, "nll_loss": 1.0873504877090454, "rewards/accuracies": 0.5, "rewards/chosen": -0.0836685374379158, "rewards/margins": 0.03262436017394066, "rewards/rejected": -0.11629289388656616, "step": 5449 }, { "epoch": 3.3246911697422603, "grad_norm": 1.2710192203521729, "learning_rate": 2.68952847519902e-06, "log_odds_chosen": 2.2208874225616455, "log_odds_ratio": -0.5339493751525879, "logits/chosen": -0.8800817131996155, "logits/rejected": -0.8500584363937378, "logps/chosen": -0.8388484716415405, "logps/rejected": -2.7248780727386475, "loss": 0.9095, "nll_loss": 0.9343535304069519, "rewards/accuracies": 0.5, "rewards/chosen": -0.08388485759496689, "rewards/margins": 0.18860295414924622, "rewards/rejected": -0.2724878191947937, "step": 5450 }, { "epoch": 3.325301204819277, "grad_norm": 1.4975641965866089, "learning_rate": 2.6885486834047766e-06, "log_odds_chosen": 1.0170234441757202, "log_odds_ratio": -0.38561582565307617, "logits/chosen": -1.0637234449386597, "logits/rejected": -0.9307048320770264, "logps/chosen": -0.9263051748275757, "logps/rejected": -1.6980555057525635, "loss": 1.3229, "nll_loss": 1.0590453147888184, "rewards/accuracies": 1.0, "rewards/chosen": -0.09263052046298981, "rewards/margins": 0.07717502862215042, "rewards/rejected": -0.16980555653572083, "step": 5451 }, { "epoch": 3.325911239896294, "grad_norm": 2.037876605987549, "learning_rate": 2.6875688916105327e-06, "log_odds_chosen": 1.3531584739685059, "log_odds_ratio": -0.4335094392299652, "logits/chosen": -0.9398515224456787, "logits/rejected": -1.031895399093628, "logps/chosen": -0.6508457660675049, "logps/rejected": -1.5940035581588745, "loss": 0.9465, "nll_loss": 0.7473663687705994, "rewards/accuracies": 0.625, "rewards/chosen": -0.06508457660675049, "rewards/margins": 0.0943157821893692, "rewards/rejected": -0.1594003587961197, "step": 5452 }, { "epoch": 3.326521274973311, "grad_norm": 1.484467625617981, "learning_rate": 2.686589099816289e-06, "log_odds_chosen": 1.5389151573181152, "log_odds_ratio": -0.544908881187439, "logits/chosen": -0.6721194386482239, "logits/rejected": -0.8846744298934937, "logps/chosen": -0.8193135261535645, "logps/rejected": -1.9367283582687378, "loss": 1.1571, "nll_loss": 1.048827886581421, "rewards/accuracies": 0.625, "rewards/chosen": -0.08193135261535645, "rewards/margins": 0.11174148321151733, "rewards/rejected": -0.19367283582687378, "step": 5453 }, { "epoch": 3.327131310050328, "grad_norm": 1.286847472190857, "learning_rate": 2.685609308022045e-06, "log_odds_chosen": 0.995360255241394, "log_odds_ratio": -0.5619853138923645, "logits/chosen": -0.7463164925575256, "logits/rejected": -0.7926505208015442, "logps/chosen": -0.8654636144638062, "logps/rejected": -1.7367579936981201, "loss": 1.0656, "nll_loss": 0.9835556149482727, "rewards/accuracies": 0.5, "rewards/chosen": -0.08654636144638062, "rewards/margins": 0.08712942898273468, "rewards/rejected": -0.1736758053302765, "step": 5454 }, { "epoch": 3.327741345127345, "grad_norm": 1.2578201293945312, "learning_rate": 2.6846295162278015e-06, "log_odds_chosen": 1.3072376251220703, "log_odds_ratio": -0.6080542802810669, "logits/chosen": -0.8973045945167542, "logits/rejected": -0.8590763211250305, "logps/chosen": -0.817379355430603, "logps/rejected": -1.9957791566848755, "loss": 0.9652, "nll_loss": 0.9776780009269714, "rewards/accuracies": 0.625, "rewards/chosen": -0.0817379355430603, "rewards/margins": 0.11783997714519501, "rewards/rejected": -0.1995779275894165, "step": 5455 }, { "epoch": 3.328351380204362, "grad_norm": 1.358795166015625, "learning_rate": 2.683649724433558e-06, "log_odds_chosen": 2.580568313598633, "log_odds_ratio": -0.18244493007659912, "logits/chosen": -0.8400678634643555, "logits/rejected": -1.0309606790542603, "logps/chosen": -0.6526367664337158, "logps/rejected": -2.560877799987793, "loss": 1.0716, "nll_loss": 0.7503101825714111, "rewards/accuracies": 1.0, "rewards/chosen": -0.06526368111371994, "rewards/margins": 0.19082410633563995, "rewards/rejected": -0.2560877799987793, "step": 5456 }, { "epoch": 3.3289614152813787, "grad_norm": 2.283942461013794, "learning_rate": 2.6826699326393137e-06, "log_odds_chosen": 1.4949607849121094, "log_odds_ratio": -0.33438146114349365, "logits/chosen": -0.5805569887161255, "logits/rejected": -0.6196020841598511, "logps/chosen": -0.6801563501358032, "logps/rejected": -1.6353318691253662, "loss": 1.1068, "nll_loss": 0.8271991014480591, "rewards/accuracies": 0.875, "rewards/chosen": -0.06801563501358032, "rewards/margins": 0.09551756829023361, "rewards/rejected": -0.16353321075439453, "step": 5457 }, { "epoch": 3.3295714503583955, "grad_norm": 6.558530807495117, "learning_rate": 2.6816901408450703e-06, "log_odds_chosen": 3.190699577331543, "log_odds_ratio": -0.2540540397167206, "logits/chosen": -0.5936174392700195, "logits/rejected": -0.7186769843101501, "logps/chosen": -0.6599417924880981, "logps/rejected": -3.307640790939331, "loss": 0.795, "nll_loss": 0.7647586464881897, "rewards/accuracies": 0.875, "rewards/chosen": -0.06599418073892593, "rewards/margins": 0.26476991176605225, "rewards/rejected": -0.3307640850543976, "step": 5458 }, { "epoch": 3.3301814854354124, "grad_norm": 1.3551692962646484, "learning_rate": 2.680710349050827e-06, "log_odds_chosen": 2.222337245941162, "log_odds_ratio": -0.3319571316242218, "logits/chosen": -0.6476256251335144, "logits/rejected": -0.7641023993492126, "logps/chosen": -0.5797178745269775, "logps/rejected": -2.2892754077911377, "loss": 0.976, "nll_loss": 0.7362563610076904, "rewards/accuracies": 0.75, "rewards/chosen": -0.057971786707639694, "rewards/margins": 0.17095574736595154, "rewards/rejected": -0.22892755270004272, "step": 5459 }, { "epoch": 3.330791520512429, "grad_norm": 1.9705886840820312, "learning_rate": 2.6797305572565825e-06, "log_odds_chosen": 1.419214129447937, "log_odds_ratio": -0.4135032296180725, "logits/chosen": -0.6748795509338379, "logits/rejected": -0.7769575119018555, "logps/chosen": -0.7161589860916138, "logps/rejected": -1.655975580215454, "loss": 1.1159, "nll_loss": 0.9355428218841553, "rewards/accuracies": 0.875, "rewards/chosen": -0.07161589711904526, "rewards/margins": 0.09398166835308075, "rewards/rejected": -0.1655975729227066, "step": 5460 }, { "epoch": 3.3314015555894465, "grad_norm": 2.534827470779419, "learning_rate": 2.678750765462339e-06, "log_odds_chosen": 0.6463217735290527, "log_odds_ratio": -0.6206586360931396, "logits/chosen": -0.9657507538795471, "logits/rejected": -0.8249464631080627, "logps/chosen": -0.8475192189216614, "logps/rejected": -1.4216110706329346, "loss": 1.1125, "nll_loss": 0.9798840284347534, "rewards/accuracies": 0.75, "rewards/chosen": -0.0847519263625145, "rewards/margins": 0.05740918219089508, "rewards/rejected": -0.14216110110282898, "step": 5461 }, { "epoch": 3.3320115906664634, "grad_norm": 1.6788415908813477, "learning_rate": 2.6777709736680956e-06, "log_odds_chosen": 3.1526100635528564, "log_odds_ratio": -0.2672305107116699, "logits/chosen": -0.7361298203468323, "logits/rejected": -0.8569818139076233, "logps/chosen": -0.7962135076522827, "logps/rejected": -3.4953391551971436, "loss": 0.9512, "nll_loss": 0.9360005855560303, "rewards/accuracies": 0.875, "rewards/chosen": -0.07962135970592499, "rewards/margins": 0.2699125409126282, "rewards/rejected": -0.34953391551971436, "step": 5462 }, { "epoch": 3.33262162574348, "grad_norm": 19.811473846435547, "learning_rate": 2.6767911818738517e-06, "log_odds_chosen": 1.7868436574935913, "log_odds_ratio": -0.31119292974472046, "logits/chosen": -0.39330190420150757, "logits/rejected": -0.5973047614097595, "logps/chosen": -0.6029655337333679, "logps/rejected": -1.8043662309646606, "loss": 0.8834, "nll_loss": 0.6802865266799927, "rewards/accuracies": 0.875, "rewards/chosen": -0.06029655411839485, "rewards/margins": 0.12014006823301315, "rewards/rejected": -0.1804366260766983, "step": 5463 }, { "epoch": 3.333231660820497, "grad_norm": 2.8431808948516846, "learning_rate": 2.675811390079608e-06, "log_odds_chosen": 1.0835970640182495, "log_odds_ratio": -0.393497496843338, "logits/chosen": -1.0986387729644775, "logits/rejected": -1.111392855644226, "logps/chosen": -0.7581393718719482, "logps/rejected": -1.471687912940979, "loss": 0.9986, "nll_loss": 0.9559326171875, "rewards/accuracies": 0.75, "rewards/chosen": -0.07581394165754318, "rewards/margins": 0.07135485112667084, "rewards/rejected": -0.14716878533363342, "step": 5464 }, { "epoch": 3.3338416958975143, "grad_norm": 1.6515320539474487, "learning_rate": 2.6748315982853644e-06, "log_odds_chosen": 2.76408314704895, "log_odds_ratio": -0.23544467985630035, "logits/chosen": -0.9781838655471802, "logits/rejected": -1.0238503217697144, "logps/chosen": -1.0689603090286255, "logps/rejected": -3.3777663707733154, "loss": 1.0969, "nll_loss": 1.2173900604248047, "rewards/accuracies": 0.875, "rewards/chosen": -0.1068960428237915, "rewards/margins": 0.23088058829307556, "rewards/rejected": -0.33777666091918945, "step": 5465 }, { "epoch": 3.334451730974531, "grad_norm": 0.922160804271698, "learning_rate": 2.6738518064911205e-06, "log_odds_chosen": 1.2283377647399902, "log_odds_ratio": -0.4542860984802246, "logits/chosen": -0.807510495185852, "logits/rejected": -0.739383339881897, "logps/chosen": -0.8625266551971436, "logps/rejected": -1.7920961380004883, "loss": 0.9689, "nll_loss": 1.0109994411468506, "rewards/accuracies": 0.75, "rewards/chosen": -0.08625267446041107, "rewards/margins": 0.09295694530010223, "rewards/rejected": -0.1792096048593521, "step": 5466 }, { "epoch": 3.335061766051548, "grad_norm": 5.862128734588623, "learning_rate": 2.6728720146968766e-06, "log_odds_chosen": 2.5407400131225586, "log_odds_ratio": -0.32126858830451965, "logits/chosen": -0.7742797136306763, "logits/rejected": -0.922195315361023, "logps/chosen": -0.7101877927780151, "logps/rejected": -2.6338930130004883, "loss": 1.1804, "nll_loss": 1.2664544582366943, "rewards/accuracies": 0.75, "rewards/chosen": -0.07101878523826599, "rewards/margins": 0.19237053394317627, "rewards/rejected": -0.2633892893791199, "step": 5467 }, { "epoch": 3.335671801128565, "grad_norm": 1.7572301626205444, "learning_rate": 2.671892222902633e-06, "log_odds_chosen": 1.488074541091919, "log_odds_ratio": -0.3817160725593567, "logits/chosen": -0.9299548268318176, "logits/rejected": -0.8809030055999756, "logps/chosen": -0.7548158764839172, "logps/rejected": -1.7838828563690186, "loss": 0.9655, "nll_loss": 1.0857006311416626, "rewards/accuracies": 0.875, "rewards/chosen": -0.0754815936088562, "rewards/margins": 0.10290670394897461, "rewards/rejected": -0.1783882975578308, "step": 5468 }, { "epoch": 3.3362818362055817, "grad_norm": 4.384343147277832, "learning_rate": 2.6709124311083893e-06, "log_odds_chosen": 1.9193944931030273, "log_odds_ratio": -0.36803296208381653, "logits/chosen": -0.8276362419128418, "logits/rejected": -1.040036916732788, "logps/chosen": -0.705865740776062, "logps/rejected": -2.129302501678467, "loss": 1.0305, "nll_loss": 0.9658318758010864, "rewards/accuracies": 0.75, "rewards/chosen": -0.07058657705783844, "rewards/margins": 0.1423436850309372, "rewards/rejected": -0.21293026208877563, "step": 5469 }, { "epoch": 3.3368918712825986, "grad_norm": 1.7969778776168823, "learning_rate": 2.669932639314146e-06, "log_odds_chosen": 3.5322818756103516, "log_odds_ratio": -0.20167098939418793, "logits/chosen": -0.4532390534877777, "logits/rejected": -0.8564715385437012, "logps/chosen": -0.5187807083129883, "logps/rejected": -3.3047986030578613, "loss": 0.9894, "nll_loss": 0.5493654012680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.05187806859612465, "rewards/margins": 0.2786017954349518, "rewards/rejected": -0.33047986030578613, "step": 5470 }, { "epoch": 3.337501906359616, "grad_norm": 1.6346900463104248, "learning_rate": 2.6689528475199016e-06, "log_odds_chosen": 1.8079497814178467, "log_odds_ratio": -0.3191988468170166, "logits/chosen": -1.164955973625183, "logits/rejected": -1.099422812461853, "logps/chosen": -0.858766496181488, "logps/rejected": -2.246694564819336, "loss": 1.0301, "nll_loss": 1.148430585861206, "rewards/accuracies": 0.75, "rewards/chosen": -0.08587665110826492, "rewards/margins": 0.13879279792308807, "rewards/rejected": -0.2246694564819336, "step": 5471 }, { "epoch": 3.3381119414366327, "grad_norm": 1.9603092670440674, "learning_rate": 2.667973055725658e-06, "log_odds_chosen": 1.7020432949066162, "log_odds_ratio": -0.5103854537010193, "logits/chosen": -0.7848063111305237, "logits/rejected": -0.8403785824775696, "logps/chosen": -0.7769057750701904, "logps/rejected": -2.2035508155822754, "loss": 1.0235, "nll_loss": 0.8585220575332642, "rewards/accuracies": 0.625, "rewards/chosen": -0.07769057899713516, "rewards/margins": 0.14266449213027954, "rewards/rejected": -0.2203550636768341, "step": 5472 }, { "epoch": 3.3387219765136495, "grad_norm": 1.3877146244049072, "learning_rate": 2.6669932639314146e-06, "log_odds_chosen": 2.819646120071411, "log_odds_ratio": -0.26248979568481445, "logits/chosen": -0.7124980688095093, "logits/rejected": -0.9933079481124878, "logps/chosen": -0.7586475610733032, "logps/rejected": -3.009093999862671, "loss": 1.0545, "nll_loss": 1.0428810119628906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0758647620677948, "rewards/margins": 0.2250446379184723, "rewards/rejected": -0.3009093999862671, "step": 5473 }, { "epoch": 3.3393320115906664, "grad_norm": 1.4999643564224243, "learning_rate": 2.6660134721371703e-06, "log_odds_chosen": 1.5812313556671143, "log_odds_ratio": -0.43825921416282654, "logits/chosen": -0.8021678924560547, "logits/rejected": -0.7797814607620239, "logps/chosen": -0.8921382427215576, "logps/rejected": -2.222925901412964, "loss": 0.9829, "nll_loss": 0.9940978288650513, "rewards/accuracies": 0.625, "rewards/chosen": -0.08921382576227188, "rewards/margins": 0.13307878375053406, "rewards/rejected": -0.22229260206222534, "step": 5474 }, { "epoch": 3.3399420466676832, "grad_norm": 2.54026460647583, "learning_rate": 2.665033680342927e-06, "log_odds_chosen": 2.851294755935669, "log_odds_ratio": -0.32796013355255127, "logits/chosen": -1.1253095865249634, "logits/rejected": -1.166724681854248, "logps/chosen": -0.8647353649139404, "logps/rejected": -3.209702968597412, "loss": 1.1285, "nll_loss": 1.208395004272461, "rewards/accuracies": 0.875, "rewards/chosen": -0.08647353947162628, "rewards/margins": 0.23449672758579254, "rewards/rejected": -0.3209702670574188, "step": 5475 }, { "epoch": 3.3405520817447005, "grad_norm": 2.3463377952575684, "learning_rate": 2.6640538885486834e-06, "log_odds_chosen": 2.956173896789551, "log_odds_ratio": -0.30172720551490784, "logits/chosen": -0.8962218761444092, "logits/rejected": -0.9864035844802856, "logps/chosen": -0.7545950412750244, "logps/rejected": -3.1269278526306152, "loss": 1.0225, "nll_loss": 0.8789636492729187, "rewards/accuracies": 0.75, "rewards/chosen": -0.07545951008796692, "rewards/margins": 0.23723329603672028, "rewards/rejected": -0.3126928210258484, "step": 5476 }, { "epoch": 3.3411621168217174, "grad_norm": 5.143762111663818, "learning_rate": 2.6630740967544396e-06, "log_odds_chosen": 2.294745445251465, "log_odds_ratio": -0.44755446910858154, "logits/chosen": -0.7378626465797424, "logits/rejected": -0.8344031572341919, "logps/chosen": -0.6200477480888367, "logps/rejected": -2.4218766689300537, "loss": 0.9171, "nll_loss": 0.7245779633522034, "rewards/accuracies": 0.625, "rewards/chosen": -0.06200477480888367, "rewards/margins": 0.18018287420272827, "rewards/rejected": -0.24218767881393433, "step": 5477 }, { "epoch": 3.3417721518987342, "grad_norm": 2.9278347492218018, "learning_rate": 2.6620943049601957e-06, "log_odds_chosen": 2.942133903503418, "log_odds_ratio": -0.30710291862487793, "logits/chosen": -0.8799505829811096, "logits/rejected": -0.7698130011558533, "logps/chosen": -0.8353697061538696, "logps/rejected": -3.3835949897766113, "loss": 1.0525, "nll_loss": 0.9653452634811401, "rewards/accuracies": 0.75, "rewards/chosen": -0.08353698253631592, "rewards/margins": 0.2548225224018097, "rewards/rejected": -0.3383595049381256, "step": 5478 }, { "epoch": 3.342382186975751, "grad_norm": 1.3579152822494507, "learning_rate": 2.6611145131659522e-06, "log_odds_chosen": 0.9152815341949463, "log_odds_ratio": -0.4839972257614136, "logits/chosen": -0.7903721332550049, "logits/rejected": -0.8692915439605713, "logps/chosen": -0.8860891461372375, "logps/rejected": -1.5848329067230225, "loss": 0.9241, "nll_loss": 0.9149001836776733, "rewards/accuracies": 0.75, "rewards/chosen": -0.08860891312360764, "rewards/margins": 0.06987437605857849, "rewards/rejected": -0.15848329663276672, "step": 5479 }, { "epoch": 3.342992222052768, "grad_norm": 8.053631782531738, "learning_rate": 2.6601347213717083e-06, "log_odds_chosen": 1.4933223724365234, "log_odds_ratio": -0.45336252450942993, "logits/chosen": -0.8635882139205933, "logits/rejected": -0.9795339107513428, "logps/chosen": -0.762122631072998, "logps/rejected": -1.9002976417541504, "loss": 1.0458, "nll_loss": 1.1259795427322388, "rewards/accuracies": 0.625, "rewards/chosen": -0.07621225714683533, "rewards/margins": 0.11381750553846359, "rewards/rejected": -0.19002975523471832, "step": 5480 }, { "epoch": 3.3436022571297848, "grad_norm": 2.458906650543213, "learning_rate": 2.6591549295774645e-06, "log_odds_chosen": 1.5315017700195312, "log_odds_ratio": -0.5922720432281494, "logits/chosen": -0.8962048888206482, "logits/rejected": -0.9026093482971191, "logps/chosen": -0.7847844362258911, "logps/rejected": -1.992059588432312, "loss": 1.0401, "nll_loss": 1.0144670009613037, "rewards/accuracies": 0.375, "rewards/chosen": -0.07847844809293747, "rewards/margins": 0.12072750926017761, "rewards/rejected": -0.19920596480369568, "step": 5481 }, { "epoch": 3.344212292206802, "grad_norm": 1.527251958847046, "learning_rate": 2.658175137783221e-06, "log_odds_chosen": 1.7295114994049072, "log_odds_ratio": -0.4722040891647339, "logits/chosen": -0.7147181034088135, "logits/rejected": -0.6748997569084167, "logps/chosen": -0.713068425655365, "logps/rejected": -1.8413094282150269, "loss": 1.0224, "nll_loss": 0.9785425066947937, "rewards/accuracies": 0.75, "rewards/chosen": -0.07130684703588486, "rewards/margins": 0.11282410472631454, "rewards/rejected": -0.1841309368610382, "step": 5482 }, { "epoch": 3.344822327283819, "grad_norm": 1.143698811531067, "learning_rate": 2.657195345988977e-06, "log_odds_chosen": 1.709416389465332, "log_odds_ratio": -0.4948608875274658, "logits/chosen": -0.7232112884521484, "logits/rejected": -0.8470561504364014, "logps/chosen": -0.7551184296607971, "logps/rejected": -2.0982866287231445, "loss": 1.0661, "nll_loss": 0.8649240732192993, "rewards/accuracies": 0.625, "rewards/chosen": -0.07551184296607971, "rewards/margins": 0.1343168169260025, "rewards/rejected": -0.20982865989208221, "step": 5483 }, { "epoch": 3.3454323623608357, "grad_norm": 1.4568636417388916, "learning_rate": 2.6562155541947337e-06, "log_odds_chosen": 1.964526653289795, "log_odds_ratio": -0.4954219460487366, "logits/chosen": -0.8733076453208923, "logits/rejected": -0.9848282337188721, "logps/chosen": -0.6981375217437744, "logps/rejected": -2.1886892318725586, "loss": 1.025, "nll_loss": 1.0779138803482056, "rewards/accuracies": 0.75, "rewards/chosen": -0.06981375813484192, "rewards/margins": 0.14905515313148499, "rewards/rejected": -0.2188688963651657, "step": 5484 }, { "epoch": 3.3460423974378526, "grad_norm": 2.4388175010681152, "learning_rate": 2.65523576240049e-06, "log_odds_chosen": 1.1486620903015137, "log_odds_ratio": -0.626546323299408, "logits/chosen": -1.010924220085144, "logits/rejected": -0.9357869625091553, "logps/chosen": -0.8602744340896606, "logps/rejected": -1.9098875522613525, "loss": 1.092, "nll_loss": 1.0723611116409302, "rewards/accuracies": 0.5, "rewards/chosen": -0.08602744340896606, "rewards/margins": 0.10496131330728531, "rewards/rejected": -0.19098874926567078, "step": 5485 }, { "epoch": 3.3466524325148694, "grad_norm": 1.6339852809906006, "learning_rate": 2.654255970606246e-06, "log_odds_chosen": 1.2626627683639526, "log_odds_ratio": -0.37524986267089844, "logits/chosen": -0.8689796924591064, "logits/rejected": -0.8460570573806763, "logps/chosen": -0.8768980503082275, "logps/rejected": -1.865081787109375, "loss": 1.1733, "nll_loss": 1.1592950820922852, "rewards/accuracies": 0.75, "rewards/chosen": -0.08768980205059052, "rewards/margins": 0.09881836920976639, "rewards/rejected": -0.1865081787109375, "step": 5486 }, { "epoch": 3.3472624675918867, "grad_norm": 1.2133550643920898, "learning_rate": 2.6532761788120025e-06, "log_odds_chosen": 2.3357815742492676, "log_odds_ratio": -0.3900904655456543, "logits/chosen": -0.8395351767539978, "logits/rejected": -0.9955193996429443, "logps/chosen": -0.9354180693626404, "logps/rejected": -2.868767261505127, "loss": 1.0, "nll_loss": 1.0381534099578857, "rewards/accuracies": 0.875, "rewards/chosen": -0.09354180097579956, "rewards/margins": 0.1933349370956421, "rewards/rejected": -0.28687670826911926, "step": 5487 }, { "epoch": 3.3478725026689036, "grad_norm": 2.485476016998291, "learning_rate": 2.652296387017759e-06, "log_odds_chosen": 1.3294261693954468, "log_odds_ratio": -0.535697340965271, "logits/chosen": -0.8715786337852478, "logits/rejected": -1.0390899181365967, "logps/chosen": -0.8990916609764099, "logps/rejected": -2.0433311462402344, "loss": 1.0296, "nll_loss": 1.2391244173049927, "rewards/accuracies": 0.5, "rewards/chosen": -0.08990916609764099, "rewards/margins": 0.1144239529967308, "rewards/rejected": -0.2043331265449524, "step": 5488 }, { "epoch": 3.3484825377459204, "grad_norm": 4.530276298522949, "learning_rate": 2.6513165952235147e-06, "log_odds_chosen": 3.5676605701446533, "log_odds_ratio": -0.15683428943157196, "logits/chosen": -0.7744134068489075, "logits/rejected": -1.0610295534133911, "logps/chosen": -0.7326109409332275, "logps/rejected": -3.552097797393799, "loss": 1.045, "nll_loss": 0.8016675114631653, "rewards/accuracies": 1.0, "rewards/chosen": -0.07326109707355499, "rewards/margins": 0.2819487154483795, "rewards/rejected": -0.3552097976207733, "step": 5489 }, { "epoch": 3.3490925728229373, "grad_norm": 1.1942088603973389, "learning_rate": 2.6503368034292713e-06, "log_odds_chosen": 1.4376063346862793, "log_odds_ratio": -0.3664393424987793, "logits/chosen": -0.7228289842605591, "logits/rejected": -0.8426766991615295, "logps/chosen": -0.7099093794822693, "logps/rejected": -1.8103995323181152, "loss": 1.0089, "nll_loss": 0.916056752204895, "rewards/accuracies": 0.75, "rewards/chosen": -0.07099093496799469, "rewards/margins": 0.11004902422428131, "rewards/rejected": -0.1810399740934372, "step": 5490 }, { "epoch": 3.349702607899954, "grad_norm": 1.1280615329742432, "learning_rate": 2.6493570116350274e-06, "log_odds_chosen": 1.7744877338409424, "log_odds_ratio": -0.39612239599227905, "logits/chosen": -0.8693268299102783, "logits/rejected": -1.0077375173568726, "logps/chosen": -0.6609651446342468, "logps/rejected": -1.8949050903320312, "loss": 1.1071, "nll_loss": 1.2257075309753418, "rewards/accuracies": 0.75, "rewards/chosen": -0.06609651446342468, "rewards/margins": 0.12339401245117188, "rewards/rejected": -0.18949052691459656, "step": 5491 }, { "epoch": 3.350312642976971, "grad_norm": 3.507798433303833, "learning_rate": 2.6483772198407835e-06, "log_odds_chosen": 2.8038277626037598, "log_odds_ratio": -0.4934028685092926, "logits/chosen": -1.0342220067977905, "logits/rejected": -1.0131430625915527, "logps/chosen": -1.2972939014434814, "logps/rejected": -3.939401149749756, "loss": 1.2085, "nll_loss": 1.265567421913147, "rewards/accuracies": 0.5, "rewards/chosen": -0.12972939014434814, "rewards/margins": 0.26421070098876953, "rewards/rejected": -0.39394015073776245, "step": 5492 }, { "epoch": 3.3509226780539882, "grad_norm": 1.3159195184707642, "learning_rate": 2.64739742804654e-06, "log_odds_chosen": 2.141721248626709, "log_odds_ratio": -0.4857408404350281, "logits/chosen": -0.6472417116165161, "logits/rejected": -0.9051687121391296, "logps/chosen": -0.8650286197662354, "logps/rejected": -2.569915771484375, "loss": 1.001, "nll_loss": 0.8250566124916077, "rewards/accuracies": 0.75, "rewards/chosen": -0.08650286495685577, "rewards/margins": 0.17048871517181396, "rewards/rejected": -0.25699159502983093, "step": 5493 }, { "epoch": 3.351532713131005, "grad_norm": 7.099554538726807, "learning_rate": 2.646417636252296e-06, "log_odds_chosen": 1.7772395610809326, "log_odds_ratio": -0.4239324927330017, "logits/chosen": -0.9265121817588806, "logits/rejected": -1.038095235824585, "logps/chosen": -0.7328394651412964, "logps/rejected": -1.7525968551635742, "loss": 1.1235, "nll_loss": 0.9829492568969727, "rewards/accuracies": 0.75, "rewards/chosen": -0.07328394800424576, "rewards/margins": 0.10197575390338898, "rewards/rejected": -0.17525967955589294, "step": 5494 }, { "epoch": 3.352142748208022, "grad_norm": 2.5012481212615967, "learning_rate": 2.6454378444580527e-06, "log_odds_chosen": 2.0618932247161865, "log_odds_ratio": -0.4815300703048706, "logits/chosen": -0.4847283661365509, "logits/rejected": -0.7352962493896484, "logps/chosen": -0.8677140474319458, "logps/rejected": -2.553945779800415, "loss": 1.0733, "nll_loss": 1.112572193145752, "rewards/accuracies": 0.875, "rewards/chosen": -0.0867714062333107, "rewards/margins": 0.16862313449382782, "rewards/rejected": -0.2553945481777191, "step": 5495 }, { "epoch": 3.352752783285039, "grad_norm": 1.5590343475341797, "learning_rate": 2.644458052663809e-06, "log_odds_chosen": 1.0757310390472412, "log_odds_ratio": -0.4761417806148529, "logits/chosen": -0.692244827747345, "logits/rejected": -0.6710897088050842, "logps/chosen": -1.0063250064849854, "logps/rejected": -1.85311758518219, "loss": 1.1574, "nll_loss": 1.0522152185440063, "rewards/accuracies": 0.75, "rewards/chosen": -0.10063249617815018, "rewards/margins": 0.0846792683005333, "rewards/rejected": -0.18531176447868347, "step": 5496 }, { "epoch": 3.3533628183620556, "grad_norm": 1.309618592262268, "learning_rate": 2.643478260869565e-06, "log_odds_chosen": 2.804703712463379, "log_odds_ratio": -0.3510504961013794, "logits/chosen": -0.8744979500770569, "logits/rejected": -0.8098491430282593, "logps/chosen": -0.7436525225639343, "logps/rejected": -3.0404446125030518, "loss": 1.061, "nll_loss": 0.9830434322357178, "rewards/accuracies": 0.75, "rewards/chosen": -0.07436525076627731, "rewards/margins": 0.22967921197414398, "rewards/rejected": -0.3040444850921631, "step": 5497 }, { "epoch": 3.353972853439073, "grad_norm": 3.378868341445923, "learning_rate": 2.6424984690753215e-06, "log_odds_chosen": 2.391367197036743, "log_odds_ratio": -0.4270284175872803, "logits/chosen": -0.9539152979850769, "logits/rejected": -1.0398567914962769, "logps/chosen": -0.78091961145401, "logps/rejected": -2.7496700286865234, "loss": 1.081, "nll_loss": 0.9312037229537964, "rewards/accuracies": 0.75, "rewards/chosen": -0.07809196412563324, "rewards/margins": 0.19687502086162567, "rewards/rejected": -0.2749669849872589, "step": 5498 }, { "epoch": 3.3545828885160898, "grad_norm": 1.909407615661621, "learning_rate": 2.6415186772810776e-06, "log_odds_chosen": 2.6752867698669434, "log_odds_ratio": -0.2699861228466034, "logits/chosen": -0.7028778195381165, "logits/rejected": -0.8491177558898926, "logps/chosen": -0.6881945729255676, "logps/rejected": -2.7327864170074463, "loss": 0.9591, "nll_loss": 0.7965745329856873, "rewards/accuracies": 0.875, "rewards/chosen": -0.06881946325302124, "rewards/margins": 0.20445919036865234, "rewards/rejected": -0.2732786536216736, "step": 5499 }, { "epoch": 3.3551929235931066, "grad_norm": 1.966651439666748, "learning_rate": 2.6405388854868338e-06, "log_odds_chosen": 1.3750364780426025, "log_odds_ratio": -0.4411996603012085, "logits/chosen": -0.9668617248535156, "logits/rejected": -0.9845678806304932, "logps/chosen": -0.8541150093078613, "logps/rejected": -1.8599278926849365, "loss": 1.2557, "nll_loss": 1.0568934679031372, "rewards/accuracies": 0.625, "rewards/chosen": -0.08541150391101837, "rewards/margins": 0.10058128833770752, "rewards/rejected": -0.18599280714988708, "step": 5500 }, { "epoch": 3.3558029586701235, "grad_norm": 2.4151711463928223, "learning_rate": 2.6395590936925903e-06, "log_odds_chosen": 2.852221965789795, "log_odds_ratio": -0.18156388401985168, "logits/chosen": -0.9235326647758484, "logits/rejected": -1.0882779359817505, "logps/chosen": -0.8353275060653687, "logps/rejected": -3.0541458129882812, "loss": 1.0829, "nll_loss": 1.0638948678970337, "rewards/accuracies": 1.0, "rewards/chosen": -0.08353275060653687, "rewards/margins": 0.22188180685043335, "rewards/rejected": -0.3054145872592926, "step": 5501 }, { "epoch": 3.3564129937471403, "grad_norm": 7.720128536224365, "learning_rate": 2.638579301898347e-06, "log_odds_chosen": 2.6301488876342773, "log_odds_ratio": -0.417348712682724, "logits/chosen": -0.5802515149116516, "logits/rejected": -0.819223940372467, "logps/chosen": -0.590269923210144, "logps/rejected": -2.6881775856018066, "loss": 0.9857, "nll_loss": 0.8329427242279053, "rewards/accuracies": 0.625, "rewards/chosen": -0.059026993811130524, "rewards/margins": 0.20979078114032745, "rewards/rejected": -0.2688177824020386, "step": 5502 }, { "epoch": 3.357023028824157, "grad_norm": 1.9286739826202393, "learning_rate": 2.6375995101041025e-06, "log_odds_chosen": 0.6793534159660339, "log_odds_ratio": -0.5798478126525879, "logits/chosen": -1.0494461059570312, "logits/rejected": -1.067429542541504, "logps/chosen": -0.9237079620361328, "logps/rejected": -1.5076172351837158, "loss": 1.1791, "nll_loss": 1.0783767700195312, "rewards/accuracies": 0.625, "rewards/chosen": -0.09237079322338104, "rewards/margins": 0.05839092284440994, "rewards/rejected": -0.15076172351837158, "step": 5503 }, { "epoch": 3.3576330639011744, "grad_norm": 8.39452075958252, "learning_rate": 2.636619718309859e-06, "log_odds_chosen": 2.729419469833374, "log_odds_ratio": -0.28468772768974304, "logits/chosen": -0.6512508988380432, "logits/rejected": -0.6939810514450073, "logps/chosen": -0.6797791123390198, "logps/rejected": -2.9387550354003906, "loss": 1.0522, "nll_loss": 0.8304843306541443, "rewards/accuracies": 0.875, "rewards/chosen": -0.0679779201745987, "rewards/margins": 0.22589761018753052, "rewards/rejected": -0.293875515460968, "step": 5504 }, { "epoch": 3.3582430989781913, "grad_norm": 1.1795587539672852, "learning_rate": 2.6356399265156156e-06, "log_odds_chosen": 0.7090799808502197, "log_odds_ratio": -0.5651956796646118, "logits/chosen": -0.7673899531364441, "logits/rejected": -0.7337958216667175, "logps/chosen": -0.5802328586578369, "logps/rejected": -1.0048596858978271, "loss": 1.031, "nll_loss": 1.033815860748291, "rewards/accuracies": 0.75, "rewards/chosen": -0.05802328884601593, "rewards/margins": 0.04246268421411514, "rewards/rejected": -0.10048598051071167, "step": 5505 }, { "epoch": 3.358853134055208, "grad_norm": 4.835409164428711, "learning_rate": 2.6346601347213713e-06, "log_odds_chosen": 0.7147835493087769, "log_odds_ratio": -0.6092800498008728, "logits/chosen": -0.9600237607955933, "logits/rejected": -0.8767123818397522, "logps/chosen": -1.3146519660949707, "logps/rejected": -1.829148769378662, "loss": 1.1682, "nll_loss": 1.0882275104522705, "rewards/accuracies": 0.625, "rewards/chosen": -0.13146519660949707, "rewards/margins": 0.05144966393709183, "rewards/rejected": -0.1829148679971695, "step": 5506 }, { "epoch": 3.359463169132225, "grad_norm": 2.5216801166534424, "learning_rate": 2.633680342927128e-06, "log_odds_chosen": 1.0191134214401245, "log_odds_ratio": -0.5902688503265381, "logits/chosen": -1.055067539215088, "logits/rejected": -1.0966086387634277, "logps/chosen": -0.9829084873199463, "logps/rejected": -1.7069735527038574, "loss": 1.2096, "nll_loss": 1.2187554836273193, "rewards/accuracies": 0.5, "rewards/chosen": -0.09829085320234299, "rewards/margins": 0.07240650057792664, "rewards/rejected": -0.17069734632968903, "step": 5507 }, { "epoch": 3.3600732042092423, "grad_norm": 2.9618980884552, "learning_rate": 2.6327005511328844e-06, "log_odds_chosen": 2.646587610244751, "log_odds_ratio": -0.355055034160614, "logits/chosen": -0.7060889601707458, "logits/rejected": -0.915034830570221, "logps/chosen": -0.6905530691146851, "logps/rejected": -2.8679749965667725, "loss": 1.1714, "nll_loss": 0.8793874382972717, "rewards/accuracies": 0.75, "rewards/chosen": -0.06905530393123627, "rewards/margins": 0.2177422046661377, "rewards/rejected": -0.28679749369621277, "step": 5508 }, { "epoch": 3.360683239286259, "grad_norm": 4.8035407066345215, "learning_rate": 2.6317207593386406e-06, "log_odds_chosen": 1.5532944202423096, "log_odds_ratio": -0.4171006977558136, "logits/chosen": -0.9098466038703918, "logits/rejected": -0.9395831823348999, "logps/chosen": -0.7765766382217407, "logps/rejected": -1.887993574142456, "loss": 1.1687, "nll_loss": 1.0301859378814697, "rewards/accuracies": 0.625, "rewards/chosen": -0.07765766233205795, "rewards/margins": 0.11114169657230377, "rewards/rejected": -0.18879936635494232, "step": 5509 }, { "epoch": 3.361293274363276, "grad_norm": 9.070928573608398, "learning_rate": 2.6307409675443967e-06, "log_odds_chosen": 1.5229637622833252, "log_odds_ratio": -0.47263163328170776, "logits/chosen": -0.8741611242294312, "logits/rejected": -0.9711387753486633, "logps/chosen": -0.8569476008415222, "logps/rejected": -1.858306646347046, "loss": 0.9621, "nll_loss": 0.9234128594398499, "rewards/accuracies": 0.75, "rewards/chosen": -0.08569476008415222, "rewards/margins": 0.10013590008020401, "rewards/rejected": -0.18583065271377563, "step": 5510 }, { "epoch": 3.361903309440293, "grad_norm": 1.3803707361221313, "learning_rate": 2.629761175750153e-06, "log_odds_chosen": 1.7123422622680664, "log_odds_ratio": -0.4202399253845215, "logits/chosen": -0.78232342004776, "logits/rejected": -0.9002007246017456, "logps/chosen": -0.6568209528923035, "logps/rejected": -1.8390307426452637, "loss": 0.9241, "nll_loss": 0.819164514541626, "rewards/accuracies": 0.75, "rewards/chosen": -0.06568209826946259, "rewards/margins": 0.11822096258401871, "rewards/rejected": -0.1839030683040619, "step": 5511 }, { "epoch": 3.3625133445173097, "grad_norm": 6.221075057983398, "learning_rate": 2.6287813839559093e-06, "log_odds_chosen": 2.537092447280884, "log_odds_ratio": -0.3891215920448303, "logits/chosen": -0.8134760856628418, "logits/rejected": -0.9051710367202759, "logps/chosen": -0.6083277463912964, "logps/rejected": -2.3390204906463623, "loss": 1.1526, "nll_loss": 1.0875649452209473, "rewards/accuracies": 0.625, "rewards/chosen": -0.06083277612924576, "rewards/margins": 0.1730692833662033, "rewards/rejected": -0.23390203714370728, "step": 5512 }, { "epoch": 3.3631233795943265, "grad_norm": 1.2704699039459229, "learning_rate": 2.6278015921616655e-06, "log_odds_chosen": 3.2694196701049805, "log_odds_ratio": -0.30018943548202515, "logits/chosen": -0.8396258354187012, "logits/rejected": -0.9685434699058533, "logps/chosen": -0.6022566556930542, "logps/rejected": -3.041304111480713, "loss": 1.0456, "nll_loss": 1.0583696365356445, "rewards/accuracies": 0.875, "rewards/chosen": -0.06022566556930542, "rewards/margins": 0.24390475451946259, "rewards/rejected": -0.3041304349899292, "step": 5513 }, { "epoch": 3.363733414671344, "grad_norm": 3.426952838897705, "learning_rate": 2.6268218003674216e-06, "log_odds_chosen": 1.4378855228424072, "log_odds_ratio": -0.39884936809539795, "logits/chosen": -0.8548320531845093, "logits/rejected": -0.9450076818466187, "logps/chosen": -0.626919686794281, "logps/rejected": -1.5554065704345703, "loss": 1.0737, "nll_loss": 1.0176969766616821, "rewards/accuracies": 0.75, "rewards/chosen": -0.06269196420907974, "rewards/margins": 0.09284868836402893, "rewards/rejected": -0.15554064512252808, "step": 5514 }, { "epoch": 3.3643434497483606, "grad_norm": 3.1244850158691406, "learning_rate": 2.625842008573178e-06, "log_odds_chosen": 1.6130993366241455, "log_odds_ratio": -0.5417219996452332, "logits/chosen": -1.027280569076538, "logits/rejected": -1.0185861587524414, "logps/chosen": -0.7818018794059753, "logps/rejected": -2.0638749599456787, "loss": 1.0493, "nll_loss": 0.9649473428726196, "rewards/accuracies": 0.5, "rewards/chosen": -0.07818019390106201, "rewards/margins": 0.12820731103420258, "rewards/rejected": -0.2063875049352646, "step": 5515 }, { "epoch": 3.3649534848253775, "grad_norm": 1.662808895111084, "learning_rate": 2.6248622167789347e-06, "log_odds_chosen": 2.1807589530944824, "log_odds_ratio": -0.2933397591114044, "logits/chosen": -1.0224785804748535, "logits/rejected": -1.0639842748641968, "logps/chosen": -0.7724584341049194, "logps/rejected": -2.312196731567383, "loss": 1.0942, "nll_loss": 1.0917339324951172, "rewards/accuracies": 1.0, "rewards/chosen": -0.07724584639072418, "rewards/margins": 0.15397383272647858, "rewards/rejected": -0.23121967911720276, "step": 5516 }, { "epoch": 3.3655635199023943, "grad_norm": 2.0537283420562744, "learning_rate": 2.6238824249846904e-06, "log_odds_chosen": 1.6855542659759521, "log_odds_ratio": -0.45612069964408875, "logits/chosen": -0.7996035218238831, "logits/rejected": -0.9866446852684021, "logps/chosen": -0.7085420489311218, "logps/rejected": -1.9968602657318115, "loss": 1.0064, "nll_loss": 0.8229162096977234, "rewards/accuracies": 0.75, "rewards/chosen": -0.07085420936346054, "rewards/margins": 0.12883181869983673, "rewards/rejected": -0.19968602061271667, "step": 5517 }, { "epoch": 3.366173554979411, "grad_norm": 1.3989055156707764, "learning_rate": 2.622902633190447e-06, "log_odds_chosen": 0.7668298482894897, "log_odds_ratio": -0.6674430966377258, "logits/chosen": -0.8560097217559814, "logits/rejected": -0.7766686677932739, "logps/chosen": -0.830839991569519, "logps/rejected": -1.4667625427246094, "loss": 1.1394, "nll_loss": 0.9671751260757446, "rewards/accuracies": 0.375, "rewards/chosen": -0.08308399468660355, "rewards/margins": 0.06359225511550903, "rewards/rejected": -0.14667625725269318, "step": 5518 }, { "epoch": 3.3667835900564285, "grad_norm": 2.379193067550659, "learning_rate": 2.6219228413962035e-06, "log_odds_chosen": 2.3342814445495605, "log_odds_ratio": -0.37711694836616516, "logits/chosen": -0.9503291845321655, "logits/rejected": -0.9289045333862305, "logps/chosen": -0.9743406772613525, "logps/rejected": -2.835909366607666, "loss": 1.1211, "nll_loss": 1.1518127918243408, "rewards/accuracies": 0.75, "rewards/chosen": -0.09743407368659973, "rewards/margins": 0.18615686893463135, "rewards/rejected": -0.2835909426212311, "step": 5519 }, { "epoch": 3.3673936251334453, "grad_norm": 1.9137972593307495, "learning_rate": 2.620943049601959e-06, "log_odds_chosen": 4.16303825378418, "log_odds_ratio": -0.3136157691478729, "logits/chosen": -0.9974429607391357, "logits/rejected": -1.127023696899414, "logps/chosen": -0.8698678016662598, "logps/rejected": -4.556449890136719, "loss": 1.1773, "nll_loss": 1.1679463386535645, "rewards/accuracies": 0.875, "rewards/chosen": -0.08698678016662598, "rewards/margins": 0.36865824460983276, "rewards/rejected": -0.45564499497413635, "step": 5520 }, { "epoch": 3.368003660210462, "grad_norm": 1.0827844142913818, "learning_rate": 2.6199632578077157e-06, "log_odds_chosen": 2.274256467819214, "log_odds_ratio": -0.5547729730606079, "logits/chosen": -0.8707205057144165, "logits/rejected": -0.9252108931541443, "logps/chosen": -0.8029240369796753, "logps/rejected": -2.6930506229400635, "loss": 1.1022, "nll_loss": 1.0339818000793457, "rewards/accuracies": 0.625, "rewards/chosen": -0.08029241114854813, "rewards/margins": 0.18901266157627106, "rewards/rejected": -0.2693050503730774, "step": 5521 }, { "epoch": 3.368613695287479, "grad_norm": 0.9327929615974426, "learning_rate": 2.6189834660134723e-06, "log_odds_chosen": 1.8649075031280518, "log_odds_ratio": -0.31597819924354553, "logits/chosen": -0.8628564476966858, "logits/rejected": -0.8008468747138977, "logps/chosen": -0.7565181255340576, "logps/rejected": -2.0949885845184326, "loss": 1.0977, "nll_loss": 0.9925503134727478, "rewards/accuracies": 0.875, "rewards/chosen": -0.07565181702375412, "rewards/margins": 0.13384705781936646, "rewards/rejected": -0.20949888229370117, "step": 5522 }, { "epoch": 3.369223730364496, "grad_norm": 2.885589599609375, "learning_rate": 2.6180036742192284e-06, "log_odds_chosen": 4.533404350280762, "log_odds_ratio": -0.29582497477531433, "logits/chosen": -0.8188472986221313, "logits/rejected": -1.1162620782852173, "logps/chosen": -0.7843796610832214, "logps/rejected": -4.832258224487305, "loss": 1.1674, "nll_loss": 1.1405935287475586, "rewards/accuracies": 0.875, "rewards/chosen": -0.07843796908855438, "rewards/margins": 0.4047878384590149, "rewards/rejected": -0.48322582244873047, "step": 5523 }, { "epoch": 3.3698337654415127, "grad_norm": 1.5528367757797241, "learning_rate": 2.6170238824249845e-06, "log_odds_chosen": 2.5165722370147705, "log_odds_ratio": -0.2964506149291992, "logits/chosen": -0.7543116807937622, "logits/rejected": -0.9141069054603577, "logps/chosen": -0.6321741938591003, "logps/rejected": -2.5753426551818848, "loss": 0.9608, "nll_loss": 0.7616019248962402, "rewards/accuracies": 0.875, "rewards/chosen": -0.0632174164056778, "rewards/margins": 0.19431687891483307, "rewards/rejected": -0.25753429532051086, "step": 5524 }, { "epoch": 3.37044380051853, "grad_norm": 7.424290657043457, "learning_rate": 2.616044090630741e-06, "log_odds_chosen": 1.6818732023239136, "log_odds_ratio": -0.48315340280532837, "logits/chosen": -0.7117959260940552, "logits/rejected": -0.7708232998847961, "logps/chosen": -0.7414166331291199, "logps/rejected": -1.9946882724761963, "loss": 1.023, "nll_loss": 0.8402062654495239, "rewards/accuracies": 0.625, "rewards/chosen": -0.07414165884256363, "rewards/margins": 0.12532715499401093, "rewards/rejected": -0.19946882128715515, "step": 5525 }, { "epoch": 3.371053835595547, "grad_norm": 4.173002243041992, "learning_rate": 2.615064298836497e-06, "log_odds_chosen": 2.3252036571502686, "log_odds_ratio": -0.2904968857765198, "logits/chosen": -0.8806329965591431, "logits/rejected": -0.9051473140716553, "logps/chosen": -0.6874780654907227, "logps/rejected": -2.5845415592193604, "loss": 0.9268, "nll_loss": 1.0056275129318237, "rewards/accuracies": 0.875, "rewards/chosen": -0.06874780356884003, "rewards/margins": 0.18970635533332825, "rewards/rejected": -0.2584541440010071, "step": 5526 }, { "epoch": 3.3716638706725637, "grad_norm": 1.5384457111358643, "learning_rate": 2.6140845070422533e-06, "log_odds_chosen": 1.7701340913772583, "log_odds_ratio": -0.3340911567211151, "logits/chosen": -0.8560837507247925, "logits/rejected": -0.9663399457931519, "logps/chosen": -0.7840635776519775, "logps/rejected": -2.17576265335083, "loss": 1.1234, "nll_loss": 0.9782588481903076, "rewards/accuracies": 0.875, "rewards/chosen": -0.07840635627508163, "rewards/margins": 0.13916991651058197, "rewards/rejected": -0.2175762802362442, "step": 5527 }, { "epoch": 3.3722739057495805, "grad_norm": 1.6206779479980469, "learning_rate": 2.6131047152480094e-06, "log_odds_chosen": 1.3234906196594238, "log_odds_ratio": -0.33855903148651123, "logits/chosen": -0.8980298042297363, "logits/rejected": -0.8779760599136353, "logps/chosen": -0.9508010745048523, "logps/rejected": -1.9946650266647339, "loss": 1.2225, "nll_loss": 1.2305264472961426, "rewards/accuracies": 0.875, "rewards/chosen": -0.09508010745048523, "rewards/margins": 0.10438640415668488, "rewards/rejected": -0.1994665116071701, "step": 5528 }, { "epoch": 3.3728839408265974, "grad_norm": 1.3692538738250732, "learning_rate": 2.612124923453766e-06, "log_odds_chosen": 0.10957664251327515, "log_odds_ratio": -0.746317446231842, "logits/chosen": -1.0685135126113892, "logits/rejected": -1.030937910079956, "logps/chosen": -1.075553297996521, "logps/rejected": -1.1391572952270508, "loss": 1.0803, "nll_loss": 1.3236984014511108, "rewards/accuracies": 0.625, "rewards/chosen": -0.1075553372502327, "rewards/margins": 0.006360403727740049, "rewards/rejected": -0.11391572654247284, "step": 5529 }, { "epoch": 3.3734939759036147, "grad_norm": 1.531111717224121, "learning_rate": 2.6111451316595225e-06, "log_odds_chosen": 2.736849784851074, "log_odds_ratio": -0.3235665559768677, "logits/chosen": -0.8526817560195923, "logits/rejected": -0.9805841445922852, "logps/chosen": -0.7225952744483948, "logps/rejected": -2.9371042251586914, "loss": 1.2129, "nll_loss": 1.1995190382003784, "rewards/accuracies": 0.75, "rewards/chosen": -0.07225953042507172, "rewards/margins": 0.22145089507102966, "rewards/rejected": -0.2937104105949402, "step": 5530 }, { "epoch": 3.3741040109806315, "grad_norm": 2.2932722568511963, "learning_rate": 2.610165339865278e-06, "log_odds_chosen": 0.07967662811279297, "log_odds_ratio": -0.7276903986930847, "logits/chosen": -0.6490169167518616, "logits/rejected": -0.7568039894104004, "logps/chosen": -0.9351391196250916, "logps/rejected": -1.012709140777588, "loss": 1.0836, "nll_loss": 1.0880087614059448, "rewards/accuracies": 0.625, "rewards/chosen": -0.09351391345262527, "rewards/margins": 0.007756996899843216, "rewards/rejected": -0.10127091407775879, "step": 5531 }, { "epoch": 3.3747140460576484, "grad_norm": 1.1045029163360596, "learning_rate": 2.6091855480710348e-06, "log_odds_chosen": 1.7256174087524414, "log_odds_ratio": -0.31215283274650574, "logits/chosen": -0.6806026697158813, "logits/rejected": -0.8365857601165771, "logps/chosen": -0.5194698572158813, "logps/rejected": -1.4664855003356934, "loss": 0.8788, "nll_loss": 0.6484960317611694, "rewards/accuracies": 0.875, "rewards/chosen": -0.051946986466646194, "rewards/margins": 0.09470157325267792, "rewards/rejected": -0.1466485559940338, "step": 5532 }, { "epoch": 3.375324081134665, "grad_norm": 1.7908176183700562, "learning_rate": 2.6082057562767913e-06, "log_odds_chosen": 2.1962087154388428, "log_odds_ratio": -0.36157214641571045, "logits/chosen": -0.8842642903327942, "logits/rejected": -0.9999662637710571, "logps/chosen": -0.92903071641922, "logps/rejected": -2.722231864929199, "loss": 1.0168, "nll_loss": 1.1438848972320557, "rewards/accuracies": 0.875, "rewards/chosen": -0.09290307015180588, "rewards/margins": 0.17932014167308807, "rewards/rejected": -0.27222320437431335, "step": 5533 }, { "epoch": 3.375934116211682, "grad_norm": 1.188358187675476, "learning_rate": 2.607225964482547e-06, "log_odds_chosen": 1.4172168970108032, "log_odds_ratio": -0.41215282678604126, "logits/chosen": -0.8513492345809937, "logits/rejected": -1.0060361623764038, "logps/chosen": -0.9349526166915894, "logps/rejected": -2.1752123832702637, "loss": 1.1798, "nll_loss": 1.0486156940460205, "rewards/accuracies": 0.625, "rewards/chosen": -0.09349526464939117, "rewards/margins": 0.12402597069740295, "rewards/rejected": -0.21752123534679413, "step": 5534 }, { "epoch": 3.376544151288699, "grad_norm": 1.3905162811279297, "learning_rate": 2.6062461726883035e-06, "log_odds_chosen": 0.928672730922699, "log_odds_ratio": -0.5970157980918884, "logits/chosen": -1.0005512237548828, "logits/rejected": -0.9864437580108643, "logps/chosen": -0.933803379535675, "logps/rejected": -1.7914048433303833, "loss": 1.1005, "nll_loss": 1.0937767028808594, "rewards/accuracies": 0.625, "rewards/chosen": -0.09338033199310303, "rewards/margins": 0.08576014637947083, "rewards/rejected": -0.17914047837257385, "step": 5535 }, { "epoch": 3.377154186365716, "grad_norm": 1.3247122764587402, "learning_rate": 2.60526638089406e-06, "log_odds_chosen": 3.949486255645752, "log_odds_ratio": -0.16568118333816528, "logits/chosen": -0.615241527557373, "logits/rejected": -0.8738563060760498, "logps/chosen": -0.49981755018234253, "logps/rejected": -3.5996642112731934, "loss": 0.9011, "nll_loss": 0.6986044645309448, "rewards/accuracies": 0.875, "rewards/chosen": -0.049981750547885895, "rewards/margins": 0.3099846839904785, "rewards/rejected": -0.3599664568901062, "step": 5536 }, { "epoch": 3.377764221442733, "grad_norm": 2.2383713722229004, "learning_rate": 2.6042865890998162e-06, "log_odds_chosen": 1.3715229034423828, "log_odds_ratio": -0.3763769268989563, "logits/chosen": -1.0243709087371826, "logits/rejected": -1.0055162906646729, "logps/chosen": -0.7138384580612183, "logps/rejected": -1.6132234334945679, "loss": 0.9842, "nll_loss": 0.9552574157714844, "rewards/accuracies": 0.75, "rewards/chosen": -0.07138384878635406, "rewards/margins": 0.08993849158287048, "rewards/rejected": -0.16132234036922455, "step": 5537 }, { "epoch": 3.37837425651975, "grad_norm": 4.318953037261963, "learning_rate": 2.6033067973055723e-06, "log_odds_chosen": 3.810457229614258, "log_odds_ratio": -0.2087632566690445, "logits/chosen": -0.5370723605155945, "logits/rejected": -0.891024112701416, "logps/chosen": -0.612345814704895, "logps/rejected": -3.576841115951538, "loss": 0.8356, "nll_loss": 0.8016868829727173, "rewards/accuracies": 0.75, "rewards/chosen": -0.06123458221554756, "rewards/margins": 0.29644954204559326, "rewards/rejected": -0.3576841354370117, "step": 5538 }, { "epoch": 3.3789842915967667, "grad_norm": 2.0876500606536865, "learning_rate": 2.602327005511329e-06, "log_odds_chosen": 0.6192969083786011, "log_odds_ratio": -0.6132292747497559, "logits/chosen": -1.187943458557129, "logits/rejected": -1.0700193643569946, "logps/chosen": -1.0931766033172607, "logps/rejected": -1.5388256311416626, "loss": 1.2817, "nll_loss": 1.1888947486877441, "rewards/accuracies": 0.75, "rewards/chosen": -0.10931767523288727, "rewards/margins": 0.04456489533185959, "rewards/rejected": -0.15388256311416626, "step": 5539 }, { "epoch": 3.3795943266737836, "grad_norm": 2.089484691619873, "learning_rate": 2.601347213717085e-06, "log_odds_chosen": 1.8643040657043457, "log_odds_ratio": -0.3968067467212677, "logits/chosen": -0.6236811876296997, "logits/rejected": -0.7535360455513, "logps/chosen": -0.6218658089637756, "logps/rejected": -1.7406809329986572, "loss": 1.034, "nll_loss": 0.7909625768661499, "rewards/accuracies": 0.75, "rewards/chosen": -0.062186580151319504, "rewards/margins": 0.11188150942325592, "rewards/rejected": -0.17406809329986572, "step": 5540 }, { "epoch": 3.380204361750801, "grad_norm": 1.162411093711853, "learning_rate": 2.600367421922841e-06, "log_odds_chosen": 2.12184476852417, "log_odds_ratio": -0.26015621423721313, "logits/chosen": -0.7449513673782349, "logits/rejected": -0.9867855310440063, "logps/chosen": -0.7041627168655396, "logps/rejected": -2.283478260040283, "loss": 0.9975, "nll_loss": 0.9604008197784424, "rewards/accuracies": 0.875, "rewards/chosen": -0.07041627168655396, "rewards/margins": 0.15793156623840332, "rewards/rejected": -0.22834783792495728, "step": 5541 }, { "epoch": 3.3808143968278177, "grad_norm": 9.925496101379395, "learning_rate": 2.5993876301285977e-06, "log_odds_chosen": 2.2181003093719482, "log_odds_ratio": -0.36764955520629883, "logits/chosen": -0.929093599319458, "logits/rejected": -1.073434591293335, "logps/chosen": -0.9281847476959229, "logps/rejected": -2.797952890396118, "loss": 1.0069, "nll_loss": 0.9162227511405945, "rewards/accuracies": 0.875, "rewards/chosen": -0.09281846880912781, "rewards/margins": 0.1869768500328064, "rewards/rejected": -0.2797952890396118, "step": 5542 }, { "epoch": 3.3814244319048345, "grad_norm": 2.164205551147461, "learning_rate": 2.598407838334354e-06, "log_odds_chosen": 4.003256797790527, "log_odds_ratio": -0.14345142245292664, "logits/chosen": -0.6272488832473755, "logits/rejected": -0.7841767072677612, "logps/chosen": -0.5879627466201782, "logps/rejected": -3.689546823501587, "loss": 1.0232, "nll_loss": 0.787190854549408, "rewards/accuracies": 0.875, "rewards/chosen": -0.05879627913236618, "rewards/margins": 0.3101584315299988, "rewards/rejected": -0.36895471811294556, "step": 5543 }, { "epoch": 3.3820344669818514, "grad_norm": 1.2023663520812988, "learning_rate": 2.5974280465401103e-06, "log_odds_chosen": 0.8252608180046082, "log_odds_ratio": -0.6042184233665466, "logits/chosen": -0.8888680934906006, "logits/rejected": -0.8677664399147034, "logps/chosen": -0.9898806810379028, "logps/rejected": -1.7241764068603516, "loss": 0.9466, "nll_loss": 1.04690420627594, "rewards/accuracies": 0.625, "rewards/chosen": -0.09898807108402252, "rewards/margins": 0.07342957705259323, "rewards/rejected": -0.17241765558719635, "step": 5544 }, { "epoch": 3.3826445020588682, "grad_norm": 2.0582447052001953, "learning_rate": 2.5964482547458665e-06, "log_odds_chosen": 1.6412404775619507, "log_odds_ratio": -0.3701510727405548, "logits/chosen": -0.6156145334243774, "logits/rejected": -0.6040535569190979, "logps/chosen": -0.4996034502983093, "logps/rejected": -1.5538067817687988, "loss": 0.8657, "nll_loss": 0.8162713646888733, "rewards/accuracies": 0.75, "rewards/chosen": -0.04996034502983093, "rewards/margins": 0.10542032122612, "rewards/rejected": -0.15538068115711212, "step": 5545 }, { "epoch": 3.383254537135885, "grad_norm": 1.33351731300354, "learning_rate": 2.5954684629516226e-06, "log_odds_chosen": 3.254909038543701, "log_odds_ratio": -0.23499347269535065, "logits/chosen": -1.0694348812103271, "logits/rejected": -1.042090892791748, "logps/chosen": -0.8026701211929321, "logps/rejected": -3.5326714515686035, "loss": 1.1424, "nll_loss": 1.0704911947250366, "rewards/accuracies": 1.0, "rewards/chosen": -0.08026701211929321, "rewards/margins": 0.2730001211166382, "rewards/rejected": -0.3532671332359314, "step": 5546 }, { "epoch": 3.3838645722129024, "grad_norm": 2.3533737659454346, "learning_rate": 2.594488671157379e-06, "log_odds_chosen": 2.5265660285949707, "log_odds_ratio": -0.2200731337070465, "logits/chosen": -0.7071070671081543, "logits/rejected": -1.0294939279556274, "logps/chosen": -0.5821807384490967, "logps/rejected": -2.444087505340576, "loss": 0.9747, "nll_loss": 0.8618221879005432, "rewards/accuracies": 0.875, "rewards/chosen": -0.05821807682514191, "rewards/margins": 0.186190664768219, "rewards/rejected": -0.2444087266921997, "step": 5547 }, { "epoch": 3.3844746072899192, "grad_norm": 10.972349166870117, "learning_rate": 2.593508879363135e-06, "log_odds_chosen": 1.208418369293213, "log_odds_ratio": -0.6307264566421509, "logits/chosen": -0.9498875141143799, "logits/rejected": -0.9447134733200073, "logps/chosen": -1.0234193801879883, "logps/rejected": -2.1287403106689453, "loss": 1.2094, "nll_loss": 1.3442695140838623, "rewards/accuracies": 0.625, "rewards/chosen": -0.10234193503856659, "rewards/margins": 0.11053209006786346, "rewards/rejected": -0.21287401020526886, "step": 5548 }, { "epoch": 3.385084642366936, "grad_norm": 7.917000770568848, "learning_rate": 2.5925290875688914e-06, "log_odds_chosen": 2.113020658493042, "log_odds_ratio": -0.4713388979434967, "logits/chosen": -0.8054783344268799, "logits/rejected": -0.8261154890060425, "logps/chosen": -0.7724425792694092, "logps/rejected": -2.4621784687042236, "loss": 0.8908, "nll_loss": 0.918195366859436, "rewards/accuracies": 0.625, "rewards/chosen": -0.07724425941705704, "rewards/margins": 0.16897359490394592, "rewards/rejected": -0.24621786177158356, "step": 5549 }, { "epoch": 3.385694677443953, "grad_norm": 3.222062826156616, "learning_rate": 2.591549295774648e-06, "log_odds_chosen": 2.458756685256958, "log_odds_ratio": -0.4661068618297577, "logits/chosen": -0.7218976020812988, "logits/rejected": -0.8589943647384644, "logps/chosen": -0.8522411584854126, "logps/rejected": -2.863410472869873, "loss": 1.0176, "nll_loss": 0.8802281618118286, "rewards/accuracies": 0.75, "rewards/chosen": -0.08522411435842514, "rewards/margins": 0.20111696422100067, "rewards/rejected": -0.2863410711288452, "step": 5550 }, { "epoch": 3.3863047125209698, "grad_norm": 1.5732194185256958, "learning_rate": 2.590569503980404e-06, "log_odds_chosen": 0.3650677800178528, "log_odds_ratio": -0.7072744369506836, "logits/chosen": -0.7746750116348267, "logits/rejected": -0.6498277187347412, "logps/chosen": -1.0180494785308838, "logps/rejected": -1.4061945676803589, "loss": 1.3153, "nll_loss": 1.158193588256836, "rewards/accuracies": 0.5, "rewards/chosen": -0.1018049567937851, "rewards/margins": 0.03881451115012169, "rewards/rejected": -0.1406194567680359, "step": 5551 }, { "epoch": 3.386914747597987, "grad_norm": 8.726583480834961, "learning_rate": 2.58958971218616e-06, "log_odds_chosen": 1.7346115112304688, "log_odds_ratio": -0.3668268322944641, "logits/chosen": -0.8946663737297058, "logits/rejected": -0.9937244653701782, "logps/chosen": -0.8800444006919861, "logps/rejected": -2.306243658065796, "loss": 1.0657, "nll_loss": 1.1526285409927368, "rewards/accuracies": 0.875, "rewards/chosen": -0.08800444006919861, "rewards/margins": 0.14261993765830994, "rewards/rejected": -0.23062434792518616, "step": 5552 }, { "epoch": 3.387524782675004, "grad_norm": 4.5947065353393555, "learning_rate": 2.5886099203919167e-06, "log_odds_chosen": 1.2325044870376587, "log_odds_ratio": -0.5914332866668701, "logits/chosen": -0.7874244451522827, "logits/rejected": -0.914121150970459, "logps/chosen": -0.7446761727333069, "logps/rejected": -1.7191734313964844, "loss": 1.0921, "nll_loss": 0.9792124629020691, "rewards/accuracies": 0.5, "rewards/chosen": -0.07446761429309845, "rewards/margins": 0.09744971990585327, "rewards/rejected": -0.17191733419895172, "step": 5553 }, { "epoch": 3.3881348177520207, "grad_norm": 1.4436529874801636, "learning_rate": 2.587630128597673e-06, "log_odds_chosen": 2.5694522857666016, "log_odds_ratio": -0.22416798770427704, "logits/chosen": -0.768230676651001, "logits/rejected": -0.9870695471763611, "logps/chosen": -0.6039870977401733, "logps/rejected": -2.4758501052856445, "loss": 0.9315, "nll_loss": 0.7672685384750366, "rewards/accuracies": 0.875, "rewards/chosen": -0.060398709028959274, "rewards/margins": 0.1871863156557083, "rewards/rejected": -0.24758502840995789, "step": 5554 }, { "epoch": 3.3887448528290376, "grad_norm": 0.9666545391082764, "learning_rate": 2.586650336803429e-06, "log_odds_chosen": 1.9376461505889893, "log_odds_ratio": -0.3783230781555176, "logits/chosen": -0.8006868958473206, "logits/rejected": -0.8368579149246216, "logps/chosen": -0.7990250587463379, "logps/rejected": -2.368567943572998, "loss": 1.0689, "nll_loss": 0.9382026195526123, "rewards/accuracies": 0.875, "rewards/chosen": -0.07990249991416931, "rewards/margins": 0.1569543033838272, "rewards/rejected": -0.23685680329799652, "step": 5555 }, { "epoch": 3.3893548879060544, "grad_norm": 2.103835344314575, "learning_rate": 2.5856705450091855e-06, "log_odds_chosen": 1.2721387147903442, "log_odds_ratio": -0.5111202001571655, "logits/chosen": -1.059536337852478, "logits/rejected": -0.9572086334228516, "logps/chosen": -1.042351245880127, "logps/rejected": -2.1612601280212402, "loss": 1.2036, "nll_loss": 1.1341785192489624, "rewards/accuracies": 0.75, "rewards/chosen": -0.10423512756824493, "rewards/margins": 0.11189089715480804, "rewards/rejected": -0.21612602472305298, "step": 5556 }, { "epoch": 3.3899649229830713, "grad_norm": 5.116487979888916, "learning_rate": 2.5846907532149416e-06, "log_odds_chosen": 2.779911518096924, "log_odds_ratio": -0.32163411378860474, "logits/chosen": -1.0158765316009521, "logits/rejected": -1.116048812866211, "logps/chosen": -0.7346978783607483, "logps/rejected": -2.715644359588623, "loss": 1.0895, "nll_loss": 1.149973750114441, "rewards/accuracies": 0.75, "rewards/chosen": -0.07346979528665543, "rewards/margins": 0.19809463620185852, "rewards/rejected": -0.27156442403793335, "step": 5557 }, { "epoch": 3.3905749580600886, "grad_norm": 1.4786183834075928, "learning_rate": 2.583710961420698e-06, "log_odds_chosen": 3.7001307010650635, "log_odds_ratio": -0.34780389070510864, "logits/chosen": -0.6998282670974731, "logits/rejected": -0.8066428303718567, "logps/chosen": -0.48426175117492676, "logps/rejected": -3.5503087043762207, "loss": 1.0807, "nll_loss": 0.7610787749290466, "rewards/accuracies": 0.875, "rewards/chosen": -0.04842618107795715, "rewards/margins": 0.30660468339920044, "rewards/rejected": -0.3550308644771576, "step": 5558 }, { "epoch": 3.3911849931371054, "grad_norm": 1.7367146015167236, "learning_rate": 2.5827311696264543e-06, "log_odds_chosen": 1.320959210395813, "log_odds_ratio": -0.31564927101135254, "logits/chosen": -0.9272871017456055, "logits/rejected": -0.9210240840911865, "logps/chosen": -0.8798616528511047, "logps/rejected": -1.8690190315246582, "loss": 0.9417, "nll_loss": 1.2214298248291016, "rewards/accuracies": 1.0, "rewards/chosen": -0.08798617124557495, "rewards/margins": 0.09891574084758759, "rewards/rejected": -0.18690191209316254, "step": 5559 }, { "epoch": 3.3917950282141223, "grad_norm": 3.103484630584717, "learning_rate": 2.5817513778322104e-06, "log_odds_chosen": 1.5717109441757202, "log_odds_ratio": -0.5432840585708618, "logits/chosen": -0.9830276966094971, "logits/rejected": -0.9146060943603516, "logps/chosen": -1.004770040512085, "logps/rejected": -2.0526862144470215, "loss": 1.0156, "nll_loss": 1.3344895839691162, "rewards/accuracies": 0.625, "rewards/chosen": -0.10047701746225357, "rewards/margins": 0.10479160398244858, "rewards/rejected": -0.20526860654354095, "step": 5560 }, { "epoch": 3.392405063291139, "grad_norm": 1.8116939067840576, "learning_rate": 2.580771586037967e-06, "log_odds_chosen": 1.0740654468536377, "log_odds_ratio": -0.45959609746932983, "logits/chosen": -0.8801507949829102, "logits/rejected": -0.7801679372787476, "logps/chosen": -0.7008312940597534, "logps/rejected": -1.3593782186508179, "loss": 0.9545, "nll_loss": 0.8137656450271606, "rewards/accuracies": 0.875, "rewards/chosen": -0.0700831264257431, "rewards/margins": 0.06585469096899033, "rewards/rejected": -0.13593782484531403, "step": 5561 }, { "epoch": 3.3930150983681564, "grad_norm": 2.2797842025756836, "learning_rate": 2.579791794243723e-06, "log_odds_chosen": -0.257089763879776, "log_odds_ratio": -0.8650472164154053, "logits/chosen": -1.08560049533844, "logits/rejected": -0.9256237745285034, "logps/chosen": -1.0166089534759521, "logps/rejected": -0.8619821071624756, "loss": 1.2567, "nll_loss": 1.0949890613555908, "rewards/accuracies": 0.375, "rewards/chosen": -0.10166090726852417, "rewards/margins": -0.015462689101696014, "rewards/rejected": -0.08619821071624756, "step": 5562 }, { "epoch": 3.3936251334451732, "grad_norm": 1.5859720706939697, "learning_rate": 2.578812002449479e-06, "log_odds_chosen": 1.9746179580688477, "log_odds_ratio": -0.32809245586395264, "logits/chosen": -0.8851659297943115, "logits/rejected": -0.9784505367279053, "logps/chosen": -0.9699101448059082, "logps/rejected": -2.5537590980529785, "loss": 1.0004, "nll_loss": 1.0616953372955322, "rewards/accuracies": 0.875, "rewards/chosen": -0.09699101746082306, "rewards/margins": 0.15838488936424255, "rewards/rejected": -0.2553759217262268, "step": 5563 }, { "epoch": 3.39423516852219, "grad_norm": 1.7331558465957642, "learning_rate": 2.5778322106552357e-06, "log_odds_chosen": 3.1092891693115234, "log_odds_ratio": -0.30717146396636963, "logits/chosen": -0.8425977826118469, "logits/rejected": -0.9996858835220337, "logps/chosen": -0.7689403891563416, "logps/rejected": -3.199467658996582, "loss": 1.0417, "nll_loss": 0.9880304336547852, "rewards/accuracies": 0.75, "rewards/chosen": -0.07689404487609863, "rewards/margins": 0.24305272102355957, "rewards/rejected": -0.3199467658996582, "step": 5564 }, { "epoch": 3.394845203599207, "grad_norm": 1.8178377151489258, "learning_rate": 2.5768524188609923e-06, "log_odds_chosen": 1.9906268119812012, "log_odds_ratio": -0.3779791593551636, "logits/chosen": -0.7736772894859314, "logits/rejected": -0.9497182369232178, "logps/chosen": -0.592365026473999, "logps/rejected": -2.005629539489746, "loss": 1.0161, "nll_loss": 0.7879798412322998, "rewards/accuracies": 0.75, "rewards/chosen": -0.05923650413751602, "rewards/margins": 0.141326442360878, "rewards/rejected": -0.2005629539489746, "step": 5565 }, { "epoch": 3.395455238676224, "grad_norm": 1.8440320491790771, "learning_rate": 2.575872627066748e-06, "log_odds_chosen": 1.3033843040466309, "log_odds_ratio": -0.43148326873779297, "logits/chosen": -0.947819709777832, "logits/rejected": -1.1057229042053223, "logps/chosen": -0.8736670613288879, "logps/rejected": -1.894296646118164, "loss": 1.1541, "nll_loss": 1.0348559617996216, "rewards/accuracies": 1.0, "rewards/chosen": -0.08736670762300491, "rewards/margins": 0.10206296294927597, "rewards/rejected": -0.18942967057228088, "step": 5566 }, { "epoch": 3.3960652737532406, "grad_norm": 4.302739143371582, "learning_rate": 2.5748928352725045e-06, "log_odds_chosen": 2.0593626499176025, "log_odds_ratio": -0.2895318865776062, "logits/chosen": -0.6910961866378784, "logits/rejected": -0.834753692150116, "logps/chosen": -0.9588765501976013, "logps/rejected": -2.5345330238342285, "loss": 1.125, "nll_loss": 1.0266247987747192, "rewards/accuracies": 0.875, "rewards/chosen": -0.09588766098022461, "rewards/margins": 0.1575656533241272, "rewards/rejected": -0.2534533143043518, "step": 5567 }, { "epoch": 3.396675308830258, "grad_norm": 2.5561916828155518, "learning_rate": 2.5739130434782607e-06, "log_odds_chosen": 0.9737884402275085, "log_odds_ratio": -0.5607162714004517, "logits/chosen": -0.8612494468688965, "logits/rejected": -0.8885070085525513, "logps/chosen": -0.792860746383667, "logps/rejected": -1.5094883441925049, "loss": 1.0331, "nll_loss": 0.9635978937149048, "rewards/accuracies": 0.375, "rewards/chosen": -0.07928607612848282, "rewards/margins": 0.07166275382041931, "rewards/rejected": -0.15094882249832153, "step": 5568 }, { "epoch": 3.3972853439072748, "grad_norm": 2.60412335395813, "learning_rate": 2.5729332516840168e-06, "log_odds_chosen": 2.034212827682495, "log_odds_ratio": -0.47057628631591797, "logits/chosen": -1.0154404640197754, "logits/rejected": -0.999747097492218, "logps/chosen": -0.8189100027084351, "logps/rejected": -2.4042389392852783, "loss": 1.2321, "nll_loss": 1.0624332427978516, "rewards/accuracies": 0.75, "rewards/chosen": -0.0818910002708435, "rewards/margins": 0.15853288769721985, "rewards/rejected": -0.24042388796806335, "step": 5569 }, { "epoch": 3.3978953789842916, "grad_norm": 1.6642218828201294, "learning_rate": 2.5719534598897733e-06, "log_odds_chosen": 2.4622159004211426, "log_odds_ratio": -0.3233990967273712, "logits/chosen": -0.7820917963981628, "logits/rejected": -0.8573411703109741, "logps/chosen": -0.5814996361732483, "logps/rejected": -2.3345515727996826, "loss": 1.0389, "nll_loss": 0.9250189065933228, "rewards/accuracies": 0.875, "rewards/chosen": -0.05814996361732483, "rewards/margins": 0.17530521750450134, "rewards/rejected": -0.23345518112182617, "step": 5570 }, { "epoch": 3.3985054140613085, "grad_norm": 1.925134301185608, "learning_rate": 2.5709736680955295e-06, "log_odds_chosen": 2.0130221843719482, "log_odds_ratio": -0.42699766159057617, "logits/chosen": -0.9637731909751892, "logits/rejected": -0.9913003444671631, "logps/chosen": -0.7123849987983704, "logps/rejected": -2.2991178035736084, "loss": 0.9191, "nll_loss": 0.8298051953315735, "rewards/accuracies": 0.5, "rewards/chosen": -0.07123850286006927, "rewards/margins": 0.1586732566356659, "rewards/rejected": -0.22991175949573517, "step": 5571 }, { "epoch": 3.3991154491383253, "grad_norm": 1.8006832599639893, "learning_rate": 2.569993876301286e-06, "log_odds_chosen": 2.3236618041992188, "log_odds_ratio": -0.3207976520061493, "logits/chosen": -0.8740415573120117, "logits/rejected": -0.9365559816360474, "logps/chosen": -0.518834114074707, "logps/rejected": -2.0312085151672363, "loss": 1.0609, "nll_loss": 0.8432709574699402, "rewards/accuracies": 0.875, "rewards/chosen": -0.051883406937122345, "rewards/margins": 0.15123742818832397, "rewards/rejected": -0.20312082767486572, "step": 5572 }, { "epoch": 3.3997254842153426, "grad_norm": 1.771925926208496, "learning_rate": 2.569014084507042e-06, "log_odds_chosen": 1.9077728986740112, "log_odds_ratio": -0.30648118257522583, "logits/chosen": -0.9972409009933472, "logits/rejected": -0.7877696752548218, "logps/chosen": -0.6200263500213623, "logps/rejected": -1.8860738277435303, "loss": 1.0449, "nll_loss": 1.1629688739776611, "rewards/accuracies": 0.75, "rewards/chosen": -0.06200263649225235, "rewards/margins": 0.12660475075244904, "rewards/rejected": -0.18860739469528198, "step": 5573 }, { "epoch": 3.4003355192923594, "grad_norm": 6.3109517097473145, "learning_rate": 2.5680342927127982e-06, "log_odds_chosen": 2.151703357696533, "log_odds_ratio": -0.35178399085998535, "logits/chosen": -0.8730570673942566, "logits/rejected": -0.9632227420806885, "logps/chosen": -0.7777673602104187, "logps/rejected": -2.3896005153656006, "loss": 0.9961, "nll_loss": 0.9199435710906982, "rewards/accuracies": 0.75, "rewards/chosen": -0.07777673006057739, "rewards/margins": 0.16118331253528595, "rewards/rejected": -0.23896005749702454, "step": 5574 }, { "epoch": 3.4009455543693763, "grad_norm": 4.213218688964844, "learning_rate": 2.5670545009185548e-06, "log_odds_chosen": 2.6825103759765625, "log_odds_ratio": -0.21174180507659912, "logits/chosen": -0.8422057628631592, "logits/rejected": -1.0609898567199707, "logps/chosen": -0.7219701409339905, "logps/rejected": -2.634204864501953, "loss": 1.0672, "nll_loss": 1.1267571449279785, "rewards/accuracies": 1.0, "rewards/chosen": -0.07219701260328293, "rewards/margins": 0.19122344255447388, "rewards/rejected": -0.2634204626083374, "step": 5575 }, { "epoch": 3.401555589446393, "grad_norm": 4.171882629394531, "learning_rate": 2.566074709124311e-06, "log_odds_chosen": 2.27864408493042, "log_odds_ratio": -0.3543476462364197, "logits/chosen": -0.8893657326698303, "logits/rejected": -0.8394794464111328, "logps/chosen": -0.6753052473068237, "logps/rejected": -2.5239105224609375, "loss": 0.9648, "nll_loss": 0.7884320020675659, "rewards/accuracies": 0.75, "rewards/chosen": -0.06753052771091461, "rewards/margins": 0.18486055731773376, "rewards/rejected": -0.2523910701274872, "step": 5576 }, { "epoch": 3.40216562452341, "grad_norm": 1.7261216640472412, "learning_rate": 2.565094917330067e-06, "log_odds_chosen": 1.9993746280670166, "log_odds_ratio": -0.4184282422065735, "logits/chosen": -0.6878737807273865, "logits/rejected": -0.8196125626564026, "logps/chosen": -0.7045551538467407, "logps/rejected": -2.015427350997925, "loss": 1.039, "nll_loss": 0.9028109312057495, "rewards/accuracies": 0.875, "rewards/chosen": -0.07045550644397736, "rewards/margins": 0.13108721375465393, "rewards/rejected": -0.20154273509979248, "step": 5577 }, { "epoch": 3.402775659600427, "grad_norm": 4.236235618591309, "learning_rate": 2.5641151255358236e-06, "log_odds_chosen": 1.723721981048584, "log_odds_ratio": -0.3569411039352417, "logits/chosen": -0.6864261627197266, "logits/rejected": -0.806806206703186, "logps/chosen": -0.946206271648407, "logps/rejected": -2.435403347015381, "loss": 1.1677, "nll_loss": 1.361860990524292, "rewards/accuracies": 0.75, "rewards/chosen": -0.09462063014507294, "rewards/margins": 0.1489197164773941, "rewards/rejected": -0.24354033172130585, "step": 5578 }, { "epoch": 3.403385694677444, "grad_norm": 2.9448044300079346, "learning_rate": 2.56313533374158e-06, "log_odds_chosen": 1.258334994316101, "log_odds_ratio": -0.4260070025920868, "logits/chosen": -1.1251606941223145, "logits/rejected": -1.0616772174835205, "logps/chosen": -0.9052664041519165, "logps/rejected": -1.8040618896484375, "loss": 1.1157, "nll_loss": 1.435863971710205, "rewards/accuracies": 0.75, "rewards/chosen": -0.09052664786577225, "rewards/margins": 0.08987954258918762, "rewards/rejected": -0.18040618300437927, "step": 5579 }, { "epoch": 3.403995729754461, "grad_norm": 1.1766180992126465, "learning_rate": 2.562155541947336e-06, "log_odds_chosen": 1.6854947805404663, "log_odds_ratio": -0.4597291648387909, "logits/chosen": -0.807370662689209, "logits/rejected": -1.0152255296707153, "logps/chosen": -0.7251759171485901, "logps/rejected": -2.0568244457244873, "loss": 0.9254, "nll_loss": 1.0361919403076172, "rewards/accuracies": 0.875, "rewards/chosen": -0.07251759618520737, "rewards/margins": 0.13316483795642853, "rewards/rejected": -0.2056824415922165, "step": 5580 }, { "epoch": 3.404605764831478, "grad_norm": 3.634206771850586, "learning_rate": 2.5611757501530924e-06, "log_odds_chosen": 1.8046457767486572, "log_odds_ratio": -0.4599425494670868, "logits/chosen": -0.9431333541870117, "logits/rejected": -1.1376852989196777, "logps/chosen": -0.7858373522758484, "logps/rejected": -2.2034401893615723, "loss": 1.2109, "nll_loss": 1.0045362710952759, "rewards/accuracies": 0.625, "rewards/chosen": -0.0785837322473526, "rewards/margins": 0.14176030457019806, "rewards/rejected": -0.22034403681755066, "step": 5581 }, { "epoch": 3.4052157999084947, "grad_norm": 1.887891173362732, "learning_rate": 2.560195958358849e-06, "log_odds_chosen": 4.1134443283081055, "log_odds_ratio": -0.1055607870221138, "logits/chosen": -0.5903834104537964, "logits/rejected": -0.992542028427124, "logps/chosen": -0.4380878210067749, "logps/rejected": -3.580526113510132, "loss": 0.8784, "nll_loss": 0.5704505443572998, "rewards/accuracies": 1.0, "rewards/chosen": -0.04380878061056137, "rewards/margins": 0.31424379348754883, "rewards/rejected": -0.3580526113510132, "step": 5582 }, { "epoch": 3.4058258349855115, "grad_norm": 1.068700909614563, "learning_rate": 2.5592161665646046e-06, "log_odds_chosen": 1.5618724822998047, "log_odds_ratio": -0.3520381450653076, "logits/chosen": -0.7534518241882324, "logits/rejected": -0.8642761707305908, "logps/chosen": -0.7043777108192444, "logps/rejected": -1.7858757972717285, "loss": 0.9049, "nll_loss": 0.9170675277709961, "rewards/accuracies": 0.875, "rewards/chosen": -0.07043777406215668, "rewards/margins": 0.10814981907606125, "rewards/rejected": -0.17858757078647614, "step": 5583 }, { "epoch": 3.406435870062529, "grad_norm": 2.843433380126953, "learning_rate": 2.558236374770361e-06, "log_odds_chosen": 3.535769462585449, "log_odds_ratio": -0.283812940120697, "logits/chosen": -0.6042120456695557, "logits/rejected": -0.9819627404212952, "logps/chosen": -0.7217949628829956, "logps/rejected": -3.6657166481018066, "loss": 0.9418, "nll_loss": 0.9696769118309021, "rewards/accuracies": 0.75, "rewards/chosen": -0.07217949628829956, "rewards/margins": 0.2943921685218811, "rewards/rejected": -0.36657166481018066, "step": 5584 }, { "epoch": 3.4070459051395456, "grad_norm": 2.3647263050079346, "learning_rate": 2.5572565829761173e-06, "log_odds_chosen": 1.5400116443634033, "log_odds_ratio": -0.4771324396133423, "logits/chosen": -0.7894826531410217, "logits/rejected": -0.9837905168533325, "logps/chosen": -1.0387938022613525, "logps/rejected": -2.196070909500122, "loss": 1.0161, "nll_loss": 0.994668185710907, "rewards/accuracies": 0.75, "rewards/chosen": -0.10387938469648361, "rewards/margins": 0.11572771519422531, "rewards/rejected": -0.21960708498954773, "step": 5585 }, { "epoch": 3.4076559402165625, "grad_norm": 1.243654489517212, "learning_rate": 2.556276791181874e-06, "log_odds_chosen": 3.5486249923706055, "log_odds_ratio": -0.278881698846817, "logits/chosen": -0.9107975363731384, "logits/rejected": -0.9056828022003174, "logps/chosen": -1.1250985860824585, "logps/rejected": -4.308182716369629, "loss": 1.0326, "nll_loss": 1.1402051448822021, "rewards/accuracies": 0.875, "rewards/chosen": -0.11250986158847809, "rewards/margins": 0.31830841302871704, "rewards/rejected": -0.43081825971603394, "step": 5586 }, { "epoch": 3.4082659752935793, "grad_norm": 11.462150573730469, "learning_rate": 2.55529699938763e-06, "log_odds_chosen": 2.7006311416625977, "log_odds_ratio": -0.25637087225914, "logits/chosen": -0.8918862342834473, "logits/rejected": -0.8652607202529907, "logps/chosen": -0.673343300819397, "logps/rejected": -2.446840763092041, "loss": 0.9068, "nll_loss": 0.8932065367698669, "rewards/accuracies": 0.875, "rewards/chosen": -0.06733433157205582, "rewards/margins": 0.1773497462272644, "rewards/rejected": -0.24468407034873962, "step": 5587 }, { "epoch": 3.408876010370596, "grad_norm": 1.887091040611267, "learning_rate": 2.554317207593386e-06, "log_odds_chosen": -0.30370569229125977, "log_odds_ratio": -0.9423675537109375, "logits/chosen": -1.0040013790130615, "logits/rejected": -1.0776140689849854, "logps/chosen": -1.1961214542388916, "logps/rejected": -0.9974625110626221, "loss": 1.2314, "nll_loss": 1.4570250511169434, "rewards/accuracies": 0.375, "rewards/chosen": -0.11961215734481812, "rewards/margins": -0.01986590027809143, "rewards/rejected": -0.09974625706672668, "step": 5588 }, { "epoch": 3.409486045447613, "grad_norm": 2.132307291030884, "learning_rate": 2.5533374157991426e-06, "log_odds_chosen": 2.616140842437744, "log_odds_ratio": -0.3406372666358948, "logits/chosen": -0.7079059481620789, "logits/rejected": -0.9960806369781494, "logps/chosen": -0.5133188962936401, "logps/rejected": -2.520193576812744, "loss": 1.0497, "nll_loss": 0.7917045950889587, "rewards/accuracies": 0.875, "rewards/chosen": -0.051331888884305954, "rewards/margins": 0.2006874829530716, "rewards/rejected": -0.25201937556266785, "step": 5589 }, { "epoch": 3.4100960805246303, "grad_norm": 1.343593955039978, "learning_rate": 2.552357624004899e-06, "log_odds_chosen": 3.1763916015625, "log_odds_ratio": -0.22489479184150696, "logits/chosen": -0.6928169131278992, "logits/rejected": -0.9400689601898193, "logps/chosen": -0.8341473340988159, "logps/rejected": -3.509692907333374, "loss": 0.9989, "nll_loss": 1.0520154237747192, "rewards/accuracies": 0.875, "rewards/chosen": -0.08341473340988159, "rewards/margins": 0.26755455136299133, "rewards/rejected": -0.3509693145751953, "step": 5590 }, { "epoch": 3.410706115601647, "grad_norm": 3.6783456802368164, "learning_rate": 2.551377832210655e-06, "log_odds_chosen": 2.2042336463928223, "log_odds_ratio": -0.23598292469978333, "logits/chosen": -0.6795018911361694, "logits/rejected": -0.8849910497665405, "logps/chosen": -0.6650888919830322, "logps/rejected": -2.2317676544189453, "loss": 0.9897, "nll_loss": 0.909393310546875, "rewards/accuracies": 0.875, "rewards/chosen": -0.06650888919830322, "rewards/margins": 0.15666787326335907, "rewards/rejected": -0.2231767475605011, "step": 5591 }, { "epoch": 3.411316150678664, "grad_norm": 10.012283325195312, "learning_rate": 2.5503980404164114e-06, "log_odds_chosen": 2.2514989376068115, "log_odds_ratio": -0.352003276348114, "logits/chosen": -1.0514092445373535, "logits/rejected": -0.9307074546813965, "logps/chosen": -0.8619369268417358, "logps/rejected": -2.849332094192505, "loss": 1.0062, "nll_loss": 1.0096588134765625, "rewards/accuracies": 0.875, "rewards/chosen": -0.08619369566440582, "rewards/margins": 0.19873951375484467, "rewards/rejected": -0.2849332094192505, "step": 5592 }, { "epoch": 3.411926185755681, "grad_norm": 8.524900436401367, "learning_rate": 2.549418248622168e-06, "log_odds_chosen": 2.053044319152832, "log_odds_ratio": -0.22163107991218567, "logits/chosen": -0.9591123461723328, "logits/rejected": -0.9455863237380981, "logps/chosen": -0.7590674161911011, "logps/rejected": -2.0913829803466797, "loss": 1.1281, "nll_loss": 1.081950068473816, "rewards/accuracies": 0.875, "rewards/chosen": -0.07590674608945847, "rewards/margins": 0.13323155045509338, "rewards/rejected": -0.20913828909397125, "step": 5593 }, { "epoch": 3.4125362208326977, "grad_norm": 1.9906671047210693, "learning_rate": 2.5484384568279237e-06, "log_odds_chosen": 2.856996536254883, "log_odds_ratio": -0.3959762454032898, "logits/chosen": -0.8281656503677368, "logits/rejected": -0.950590968132019, "logps/chosen": -0.8448929786682129, "logps/rejected": -3.341991662979126, "loss": 1.1669, "nll_loss": 1.150104284286499, "rewards/accuracies": 0.875, "rewards/chosen": -0.08448929339647293, "rewards/margins": 0.2497098743915558, "rewards/rejected": -0.3341991603374481, "step": 5594 }, { "epoch": 3.413146255909715, "grad_norm": 1.5408833026885986, "learning_rate": 2.54745866503368e-06, "log_odds_chosen": 1.532973051071167, "log_odds_ratio": -0.5375243425369263, "logits/chosen": -0.920741081237793, "logits/rejected": -0.9589384198188782, "logps/chosen": -1.057950735092163, "logps/rejected": -2.4109230041503906, "loss": 1.0767, "nll_loss": 1.3327476978302002, "rewards/accuracies": 0.5, "rewards/chosen": -0.10579507052898407, "rewards/margins": 0.1352972388267517, "rewards/rejected": -0.24109229445457458, "step": 5595 }, { "epoch": 3.413756290986732, "grad_norm": 2.5901949405670166, "learning_rate": 2.5464788732394367e-06, "log_odds_chosen": 2.855719566345215, "log_odds_ratio": -0.40391913056373596, "logits/chosen": -0.8961732387542725, "logits/rejected": -1.0225489139556885, "logps/chosen": -0.7308380007743835, "logps/rejected": -3.105548143386841, "loss": 1.0623, "nll_loss": 1.1270016431808472, "rewards/accuracies": 0.625, "rewards/chosen": -0.0730838030576706, "rewards/margins": 0.23747102916240692, "rewards/rejected": -0.3105548322200775, "step": 5596 }, { "epoch": 3.4143663260637487, "grad_norm": 1.5455747842788696, "learning_rate": 2.5454990814451924e-06, "log_odds_chosen": 2.0350682735443115, "log_odds_ratio": -0.33584871888160706, "logits/chosen": -0.8662658333778381, "logits/rejected": -0.8896121978759766, "logps/chosen": -0.6218252778053284, "logps/rejected": -2.1492257118225098, "loss": 1.0049, "nll_loss": 0.7300329208374023, "rewards/accuracies": 0.875, "rewards/chosen": -0.06218252703547478, "rewards/margins": 0.15274003148078918, "rewards/rejected": -0.21492257714271545, "step": 5597 }, { "epoch": 3.4149763611407655, "grad_norm": 3.4032247066497803, "learning_rate": 2.544519289650949e-06, "log_odds_chosen": 3.262693405151367, "log_odds_ratio": -0.3274608552455902, "logits/chosen": -0.8185223340988159, "logits/rejected": -1.0127217769622803, "logps/chosen": -0.8886139392852783, "logps/rejected": -3.4035019874572754, "loss": 1.0614, "nll_loss": 1.2146482467651367, "rewards/accuracies": 0.875, "rewards/chosen": -0.0888613909482956, "rewards/margins": 0.2514888048171997, "rewards/rejected": -0.3403502106666565, "step": 5598 }, { "epoch": 3.4155863962177824, "grad_norm": 1.943085789680481, "learning_rate": 2.5435394978567055e-06, "log_odds_chosen": 0.9266323447227478, "log_odds_ratio": -0.4041934013366699, "logits/chosen": -1.0426602363586426, "logits/rejected": -0.9962755441665649, "logps/chosen": -1.1477766036987305, "logps/rejected": -1.9010435342788696, "loss": 1.1809, "nll_loss": 1.273012638092041, "rewards/accuracies": 0.875, "rewards/chosen": -0.11477766931056976, "rewards/margins": 0.07532668113708496, "rewards/rejected": -0.19010436534881592, "step": 5599 }, { "epoch": 3.416196431294799, "grad_norm": 5.11128568649292, "learning_rate": 2.5425597060624617e-06, "log_odds_chosen": 2.00129771232605, "log_odds_ratio": -0.500890851020813, "logits/chosen": -0.8852340579032898, "logits/rejected": -0.927636981010437, "logps/chosen": -0.9072577953338623, "logps/rejected": -2.5214004516601562, "loss": 1.1677, "nll_loss": 1.2208480834960938, "rewards/accuracies": 0.625, "rewards/chosen": -0.09072577953338623, "rewards/margins": 0.1614142656326294, "rewards/rejected": -0.2521400451660156, "step": 5600 }, { "epoch": 3.4168064663718165, "grad_norm": 18.574705123901367, "learning_rate": 2.5415799142682178e-06, "log_odds_chosen": 2.5795798301696777, "log_odds_ratio": -0.28656330704689026, "logits/chosen": -0.8128678202629089, "logits/rejected": -0.997946560382843, "logps/chosen": -0.8656256794929504, "logps/rejected": -3.009382486343384, "loss": 0.9908, "nll_loss": 1.035650372505188, "rewards/accuracies": 0.875, "rewards/chosen": -0.08656256645917892, "rewards/margins": 0.21437567472457886, "rewards/rejected": -0.3009382486343384, "step": 5601 }, { "epoch": 3.4174165014488334, "grad_norm": 1.7283302545547485, "learning_rate": 2.5406001224739743e-06, "log_odds_chosen": 2.894026279449463, "log_odds_ratio": -0.2641352713108063, "logits/chosen": -0.8111082315444946, "logits/rejected": -0.9604469537734985, "logps/chosen": -0.5752874612808228, "logps/rejected": -2.631289482116699, "loss": 0.9504, "nll_loss": 0.886383593082428, "rewards/accuracies": 0.875, "rewards/chosen": -0.057528745383024216, "rewards/margins": 0.20560020208358765, "rewards/rejected": -0.26312893629074097, "step": 5602 }, { "epoch": 3.41802653652585, "grad_norm": 1.8097894191741943, "learning_rate": 2.5396203306797304e-06, "log_odds_chosen": 1.9048230648040771, "log_odds_ratio": -0.3484158217906952, "logits/chosen": -0.9916386008262634, "logits/rejected": -0.9787912368774414, "logps/chosen": -0.8332041501998901, "logps/rejected": -2.311177968978882, "loss": 1.1849, "nll_loss": 1.104611873626709, "rewards/accuracies": 0.75, "rewards/chosen": -0.08332041651010513, "rewards/margins": 0.1477973759174347, "rewards/rejected": -0.23111781477928162, "step": 5603 }, { "epoch": 3.418636571602867, "grad_norm": 1.5547583103179932, "learning_rate": 2.538640538885487e-06, "log_odds_chosen": -0.015811137855052948, "log_odds_ratio": -0.7415004968643188, "logits/chosen": -0.6929345726966858, "logits/rejected": -0.7662214040756226, "logps/chosen": -0.9698567390441895, "logps/rejected": -1.0001626014709473, "loss": 0.9923, "nll_loss": 1.0575127601623535, "rewards/accuracies": 0.375, "rewards/chosen": -0.09698568284511566, "rewards/margins": 0.003030586987733841, "rewards/rejected": -0.1000162735581398, "step": 5604 }, { "epoch": 3.4192466066798843, "grad_norm": 2.3516199588775635, "learning_rate": 2.5376607470912427e-06, "log_odds_chosen": 1.6422616243362427, "log_odds_ratio": -0.4089652895927429, "logits/chosen": -0.8059630393981934, "logits/rejected": -0.9056928753852844, "logps/chosen": -0.6677768230438232, "logps/rejected": -1.9131685495376587, "loss": 1.0193, "nll_loss": 0.9428877830505371, "rewards/accuracies": 0.875, "rewards/chosen": -0.06677769124507904, "rewards/margins": 0.12453915178775787, "rewards/rejected": -0.1913168579339981, "step": 5605 }, { "epoch": 3.419856641756901, "grad_norm": 2.121039390563965, "learning_rate": 2.5366809552969992e-06, "log_odds_chosen": 1.1896913051605225, "log_odds_ratio": -0.5043113231658936, "logits/chosen": -0.7347859740257263, "logits/rejected": -0.6477348208427429, "logps/chosen": -0.7843768000602722, "logps/rejected": -1.5841326713562012, "loss": 1.1391, "nll_loss": 0.9297871589660645, "rewards/accuracies": 0.5, "rewards/chosen": -0.0784376785159111, "rewards/margins": 0.07997559010982513, "rewards/rejected": -0.15841326117515564, "step": 5606 }, { "epoch": 3.420466676833918, "grad_norm": 1.4313846826553345, "learning_rate": 2.5357011635027558e-06, "log_odds_chosen": 1.5605154037475586, "log_odds_ratio": -0.43721601366996765, "logits/chosen": -0.5837439894676208, "logits/rejected": -0.6916192770004272, "logps/chosen": -0.5505927801132202, "logps/rejected": -1.6323497295379639, "loss": 0.8861, "nll_loss": 0.7835208773612976, "rewards/accuracies": 0.625, "rewards/chosen": -0.0550592839717865, "rewards/margins": 0.10817569494247437, "rewards/rejected": -0.16323497891426086, "step": 5607 }, { "epoch": 3.421076711910935, "grad_norm": 4.0716328620910645, "learning_rate": 2.5347213717085115e-06, "log_odds_chosen": 0.7889392375946045, "log_odds_ratio": -0.7415151596069336, "logits/chosen": -0.99769127368927, "logits/rejected": -0.9830111265182495, "logps/chosen": -1.10231614112854, "logps/rejected": -1.7600525617599487, "loss": 1.1179, "nll_loss": 1.204903244972229, "rewards/accuracies": 0.625, "rewards/chosen": -0.11023160815238953, "rewards/margins": 0.06577363610267639, "rewards/rejected": -0.17600524425506592, "step": 5608 }, { "epoch": 3.4216867469879517, "grad_norm": 1.7823340892791748, "learning_rate": 2.533741579914268e-06, "log_odds_chosen": 1.8421541452407837, "log_odds_ratio": -0.3352839946746826, "logits/chosen": -0.7771919965744019, "logits/rejected": -0.7303757071495056, "logps/chosen": -0.5763311386108398, "logps/rejected": -1.9238137006759644, "loss": 1.0131, "nll_loss": 1.058167576789856, "rewards/accuracies": 0.875, "rewards/chosen": -0.057633113116025925, "rewards/margins": 0.13474826514720917, "rewards/rejected": -0.1923813819885254, "step": 5609 }, { "epoch": 3.4222967820649686, "grad_norm": 2.1556308269500732, "learning_rate": 2.5327617881200246e-06, "log_odds_chosen": 2.1427175998687744, "log_odds_ratio": -0.29005634784698486, "logits/chosen": -0.8795598745346069, "logits/rejected": -0.9780963659286499, "logps/chosen": -0.8744196891784668, "logps/rejected": -2.553936243057251, "loss": 1.0503, "nll_loss": 0.8960421681404114, "rewards/accuracies": 0.875, "rewards/chosen": -0.08744198083877563, "rewards/margins": 0.16795167326927185, "rewards/rejected": -0.2553936541080475, "step": 5610 }, { "epoch": 3.422906817141986, "grad_norm": 2.0561764240264893, "learning_rate": 2.5317819963257807e-06, "log_odds_chosen": 1.8528839349746704, "log_odds_ratio": -0.4009658396244049, "logits/chosen": -0.8042210340499878, "logits/rejected": -0.9498423337936401, "logps/chosen": -0.772591769695282, "logps/rejected": -2.007627010345459, "loss": 1.2096, "nll_loss": 1.186633586883545, "rewards/accuracies": 0.625, "rewards/chosen": -0.07725917547941208, "rewards/margins": 0.12350353598594666, "rewards/rejected": -0.20076271891593933, "step": 5611 }, { "epoch": 3.4235168522190027, "grad_norm": 2.168956756591797, "learning_rate": 2.530802204531537e-06, "log_odds_chosen": 1.6523150205612183, "log_odds_ratio": -0.39524418115615845, "logits/chosen": -0.7718392014503479, "logits/rejected": -0.9582618474960327, "logps/chosen": -0.682119607925415, "logps/rejected": -1.8752998113632202, "loss": 1.1043, "nll_loss": 0.8481709361076355, "rewards/accuracies": 0.75, "rewards/chosen": -0.06821197271347046, "rewards/margins": 0.11931800097227097, "rewards/rejected": -0.18752998113632202, "step": 5612 }, { "epoch": 3.4241268872960196, "grad_norm": 2.2393722534179688, "learning_rate": 2.5298224127372934e-06, "log_odds_chosen": 3.13107967376709, "log_odds_ratio": -0.41581863164901733, "logits/chosen": -0.8555763959884644, "logits/rejected": -0.8548799753189087, "logps/chosen": -0.8947049379348755, "logps/rejected": -3.611440896987915, "loss": 1.0069, "nll_loss": 1.099714756011963, "rewards/accuracies": 0.625, "rewards/chosen": -0.08947049826383591, "rewards/margins": 0.2716735899448395, "rewards/rejected": -0.3611440658569336, "step": 5613 }, { "epoch": 3.4247369223730364, "grad_norm": 4.297885417938232, "learning_rate": 2.5288426209430495e-06, "log_odds_chosen": 1.7973934412002563, "log_odds_ratio": -0.39609017968177795, "logits/chosen": -0.7103071808815002, "logits/rejected": -0.7470450401306152, "logps/chosen": -0.7613310813903809, "logps/rejected": -2.1136374473571777, "loss": 1.0451, "nll_loss": 0.8525496125221252, "rewards/accuracies": 0.625, "rewards/chosen": -0.0761331096291542, "rewards/margins": 0.1352306306362152, "rewards/rejected": -0.21136373281478882, "step": 5614 }, { "epoch": 3.4253469574500532, "grad_norm": 1.6842209100723267, "learning_rate": 2.5278628291488056e-06, "log_odds_chosen": 1.7561806440353394, "log_odds_ratio": -0.30429890751838684, "logits/chosen": -0.7398115396499634, "logits/rejected": -0.9562726020812988, "logps/chosen": -0.8307405710220337, "logps/rejected": -2.131995439529419, "loss": 1.0223, "nll_loss": 0.9928127527236938, "rewards/accuracies": 0.875, "rewards/chosen": -0.08307406306266785, "rewards/margins": 0.130125492811203, "rewards/rejected": -0.21319955587387085, "step": 5615 }, { "epoch": 3.4259569925270705, "grad_norm": 14.429130554199219, "learning_rate": 2.526883037354562e-06, "log_odds_chosen": 1.3851549625396729, "log_odds_ratio": -0.5642078518867493, "logits/chosen": -0.8783438801765442, "logits/rejected": -0.9474769830703735, "logps/chosen": -0.9429863691329956, "logps/rejected": -2.139946222305298, "loss": 1.0275, "nll_loss": 0.9684069156646729, "rewards/accuracies": 0.625, "rewards/chosen": -0.09429863095283508, "rewards/margins": 0.1196959912776947, "rewards/rejected": -0.21399462223052979, "step": 5616 }, { "epoch": 3.4265670276040874, "grad_norm": 1.826422095298767, "learning_rate": 2.5259032455603183e-06, "log_odds_chosen": 1.067572832107544, "log_odds_ratio": -0.4939124286174774, "logits/chosen": -0.9531588554382324, "logits/rejected": -0.9798916578292847, "logps/chosen": -0.6850095391273499, "logps/rejected": -1.2228569984436035, "loss": 1.0337, "nll_loss": 1.0529630184173584, "rewards/accuracies": 0.625, "rewards/chosen": -0.06850095093250275, "rewards/margins": 0.053784750401973724, "rewards/rejected": -0.12228570878505707, "step": 5617 }, { "epoch": 3.4271770626811042, "grad_norm": 3.5245747566223145, "learning_rate": 2.524923453766075e-06, "log_odds_chosen": 1.1724680662155151, "log_odds_ratio": -0.5315419435501099, "logits/chosen": -0.936805009841919, "logits/rejected": -0.9537410736083984, "logps/chosen": -0.9720275402069092, "logps/rejected": -1.9663300514221191, "loss": 1.1304, "nll_loss": 1.1318233013153076, "rewards/accuracies": 0.75, "rewards/chosen": -0.09720276296138763, "rewards/margins": 0.09943024814128876, "rewards/rejected": -0.1966330111026764, "step": 5618 }, { "epoch": 3.427787097758121, "grad_norm": 1.9100579023361206, "learning_rate": 2.523943661971831e-06, "log_odds_chosen": 2.0093424320220947, "log_odds_ratio": -0.5454968214035034, "logits/chosen": -0.798326313495636, "logits/rejected": -0.8765964508056641, "logps/chosen": -0.8418947458267212, "logps/rejected": -2.4832959175109863, "loss": 1.0164, "nll_loss": 0.958862841129303, "rewards/accuracies": 0.625, "rewards/chosen": -0.08418947458267212, "rewards/margins": 0.16414013504981995, "rewards/rejected": -0.24832962453365326, "step": 5619 }, { "epoch": 3.428397132835138, "grad_norm": 4.173308849334717, "learning_rate": 2.522963870177587e-06, "log_odds_chosen": 0.3803718388080597, "log_odds_ratio": -0.5806607604026794, "logits/chosen": -0.8821232318878174, "logits/rejected": -0.9134973287582397, "logps/chosen": -0.9957897663116455, "logps/rejected": -1.2770371437072754, "loss": 1.0951, "nll_loss": 1.1292402744293213, "rewards/accuracies": 0.625, "rewards/chosen": -0.09957896918058395, "rewards/margins": 0.028124742209911346, "rewards/rejected": -0.1277037113904953, "step": 5620 }, { "epoch": 3.4290071679121548, "grad_norm": 6.888010501861572, "learning_rate": 2.5219840783833436e-06, "log_odds_chosen": 2.855059862136841, "log_odds_ratio": -0.27958357334136963, "logits/chosen": -0.8090479373931885, "logits/rejected": -0.9739763736724854, "logps/chosen": -1.004604697227478, "logps/rejected": -3.380742073059082, "loss": 0.9995, "nll_loss": 0.9712921977043152, "rewards/accuracies": 0.875, "rewards/chosen": -0.1004604771733284, "rewards/margins": 0.2376137524843216, "rewards/rejected": -0.3380742371082306, "step": 5621 }, { "epoch": 3.429617202989172, "grad_norm": 1.3078032732009888, "learning_rate": 2.5210042865890997e-06, "log_odds_chosen": 2.3786354064941406, "log_odds_ratio": -0.3307918608188629, "logits/chosen": -0.7540591359138489, "logits/rejected": -1.0315041542053223, "logps/chosen": -0.6704798936843872, "logps/rejected": -2.3937172889709473, "loss": 1.0688, "nll_loss": 0.8070820569992065, "rewards/accuracies": 0.75, "rewards/chosen": -0.06704799085855484, "rewards/margins": 0.17232376337051392, "rewards/rejected": -0.23937174677848816, "step": 5622 }, { "epoch": 3.430227238066189, "grad_norm": 2.033118486404419, "learning_rate": 2.520024494794856e-06, "log_odds_chosen": 0.9337511658668518, "log_odds_ratio": -0.5006064772605896, "logits/chosen": -0.841026782989502, "logits/rejected": -0.9402709007263184, "logps/chosen": -0.6896334886550903, "logps/rejected": -1.2537779808044434, "loss": 1.0325, "nll_loss": 1.112133264541626, "rewards/accuracies": 0.75, "rewards/chosen": -0.06896334886550903, "rewards/margins": 0.05641445145010948, "rewards/rejected": -0.1253778040409088, "step": 5623 }, { "epoch": 3.4308372731432057, "grad_norm": 1.9877718687057495, "learning_rate": 2.5190447030006124e-06, "log_odds_chosen": 2.8031277656555176, "log_odds_ratio": -0.3230224847793579, "logits/chosen": -0.6359393000602722, "logits/rejected": -0.8784733414649963, "logps/chosen": -0.5789452195167542, "logps/rejected": -2.7443649768829346, "loss": 0.9878, "nll_loss": 0.8823436498641968, "rewards/accuracies": 0.875, "rewards/chosen": -0.057894524186849594, "rewards/margins": 0.21654199063777924, "rewards/rejected": -0.2744365334510803, "step": 5624 }, { "epoch": 3.4314473082202226, "grad_norm": 1.7250391244888306, "learning_rate": 2.5180649112063685e-06, "log_odds_chosen": 1.3205680847167969, "log_odds_ratio": -0.38798707723617554, "logits/chosen": -0.9889893531799316, "logits/rejected": -1.027849555015564, "logps/chosen": -0.9655271172523499, "logps/rejected": -1.9835388660430908, "loss": 1.1852, "nll_loss": 1.1106419563293457, "rewards/accuracies": 1.0, "rewards/chosen": -0.09655271470546722, "rewards/margins": 0.10180117189884186, "rewards/rejected": -0.19835388660430908, "step": 5625 }, { "epoch": 3.4320573432972394, "grad_norm": 2.704827308654785, "learning_rate": 2.5170851194121246e-06, "log_odds_chosen": 1.2366259098052979, "log_odds_ratio": -0.5299646854400635, "logits/chosen": -0.810200572013855, "logits/rejected": -0.9488860368728638, "logps/chosen": -0.8655447363853455, "logps/rejected": -1.7919726371765137, "loss": 1.0108, "nll_loss": 1.0051243305206299, "rewards/accuracies": 0.375, "rewards/chosen": -0.08655448257923126, "rewards/margins": 0.09264279901981354, "rewards/rejected": -0.1791972815990448, "step": 5626 }, { "epoch": 3.4326673783742567, "grad_norm": 9.730889320373535, "learning_rate": 2.516105327617881e-06, "log_odds_chosen": 0.15507981181144714, "log_odds_ratio": -0.6950841546058655, "logits/chosen": -0.8471488356590271, "logits/rejected": -0.8982276320457458, "logps/chosen": -0.9071993827819824, "logps/rejected": -0.9608293175697327, "loss": 1.2069, "nll_loss": 1.0825918912887573, "rewards/accuracies": 0.625, "rewards/chosen": -0.09071995317935944, "rewards/margins": 0.005362994037568569, "rewards/rejected": -0.09608293324708939, "step": 5627 }, { "epoch": 3.4332774134512736, "grad_norm": 2.4801127910614014, "learning_rate": 2.5151255358236373e-06, "log_odds_chosen": 1.3931612968444824, "log_odds_ratio": -0.4115862548351288, "logits/chosen": -0.7993291616439819, "logits/rejected": -0.9091290235519409, "logps/chosen": -0.7607916593551636, "logps/rejected": -1.5847067832946777, "loss": 1.1605, "nll_loss": 1.0257136821746826, "rewards/accuracies": 0.875, "rewards/chosen": -0.07607915997505188, "rewards/margins": 0.08239152282476425, "rewards/rejected": -0.15847069025039673, "step": 5628 }, { "epoch": 3.4338874485282904, "grad_norm": 1.702040195465088, "learning_rate": 2.5141457440293934e-06, "log_odds_chosen": 1.1489307880401611, "log_odds_ratio": -0.3968455195426941, "logits/chosen": -0.7326833009719849, "logits/rejected": -0.8062812089920044, "logps/chosen": -0.6316267251968384, "logps/rejected": -1.4362379312515259, "loss": 0.9691, "nll_loss": 1.0015466213226318, "rewards/accuracies": 0.875, "rewards/chosen": -0.0631626695394516, "rewards/margins": 0.08046112954616547, "rewards/rejected": -0.14362381398677826, "step": 5629 }, { "epoch": 3.4344974836053073, "grad_norm": 1.7933330535888672, "learning_rate": 2.51316595223515e-06, "log_odds_chosen": 0.7251556515693665, "log_odds_ratio": -0.5128795504570007, "logits/chosen": -1.0308542251586914, "logits/rejected": -1.0470012426376343, "logps/chosen": -1.0239458084106445, "logps/rejected": -1.5067105293273926, "loss": 1.1659, "nll_loss": 1.2491776943206787, "rewards/accuracies": 0.75, "rewards/chosen": -0.10239458829164505, "rewards/margins": 0.04827647656202316, "rewards/rejected": -0.1506710648536682, "step": 5630 }, { "epoch": 3.435107518682324, "grad_norm": 1.4396249055862427, "learning_rate": 2.512186160440906e-06, "log_odds_chosen": 3.327244758605957, "log_odds_ratio": -0.1828894317150116, "logits/chosen": -0.8098413348197937, "logits/rejected": -1.0814814567565918, "logps/chosen": -0.558281660079956, "logps/rejected": -3.1356334686279297, "loss": 0.7947, "nll_loss": 0.6890925765037537, "rewards/accuracies": 0.875, "rewards/chosen": -0.055828165262937546, "rewards/margins": 0.2577351927757263, "rewards/rejected": -0.31356334686279297, "step": 5631 }, { "epoch": 3.435717553759341, "grad_norm": 1.737694263458252, "learning_rate": 2.5112063686466627e-06, "log_odds_chosen": 1.2793560028076172, "log_odds_ratio": -0.4067292809486389, "logits/chosen": -0.7132540941238403, "logits/rejected": -0.8645719289779663, "logps/chosen": -0.7319786548614502, "logps/rejected": -1.602108359336853, "loss": 0.9491, "nll_loss": 1.0581228733062744, "rewards/accuracies": 0.625, "rewards/chosen": -0.0731978639960289, "rewards/margins": 0.08701296150684357, "rewards/rejected": -0.16021083295345306, "step": 5632 }, { "epoch": 3.4363275888363582, "grad_norm": 2.5316004753112793, "learning_rate": 2.5102265768524188e-06, "log_odds_chosen": 2.561695098876953, "log_odds_ratio": -0.3540493845939636, "logits/chosen": -0.7560078501701355, "logits/rejected": -0.9325546026229858, "logps/chosen": -0.5145149827003479, "logps/rejected": -2.2457427978515625, "loss": 1.1856, "nll_loss": 1.1454949378967285, "rewards/accuracies": 0.625, "rewards/chosen": -0.05145149677991867, "rewards/margins": 0.17312276363372803, "rewards/rejected": -0.2245742678642273, "step": 5633 }, { "epoch": 3.436937623913375, "grad_norm": 2.877856731414795, "learning_rate": 2.509246785058175e-06, "log_odds_chosen": 1.539395809173584, "log_odds_ratio": -0.36945655941963196, "logits/chosen": -0.9072182774543762, "logits/rejected": -0.9885152578353882, "logps/chosen": -0.8635424375534058, "logps/rejected": -1.9792436361312866, "loss": 0.9451, "nll_loss": 0.9390491247177124, "rewards/accuracies": 0.875, "rewards/chosen": -0.08635424822568893, "rewards/margins": 0.11157011985778809, "rewards/rejected": -0.19792436063289642, "step": 5634 }, { "epoch": 3.437547658990392, "grad_norm": 1.9692825078964233, "learning_rate": 2.5082669932639314e-06, "log_odds_chosen": 0.4386102557182312, "log_odds_ratio": -0.779018759727478, "logits/chosen": -0.9529127478599548, "logits/rejected": -1.083364725112915, "logps/chosen": -1.1626567840576172, "logps/rejected": -1.5444687604904175, "loss": 1.0992, "nll_loss": 1.4022564888000488, "rewards/accuracies": 0.375, "rewards/chosen": -0.1162656843662262, "rewards/margins": 0.03818119689822197, "rewards/rejected": -0.15444689989089966, "step": 5635 }, { "epoch": 3.438157694067409, "grad_norm": 2.5323362350463867, "learning_rate": 2.5072872014696876e-06, "log_odds_chosen": 2.923124313354492, "log_odds_ratio": -0.37918925285339355, "logits/chosen": -0.9363693594932556, "logits/rejected": -0.9144501686096191, "logps/chosen": -1.1366857290267944, "logps/rejected": -3.819417715072632, "loss": 1.0017, "nll_loss": 1.129327654838562, "rewards/accuracies": 0.75, "rewards/chosen": -0.11366858333349228, "rewards/margins": 0.2682732045650482, "rewards/rejected": -0.3819417357444763, "step": 5636 }, { "epoch": 3.4387677291444256, "grad_norm": 1.3303897380828857, "learning_rate": 2.5063074096754437e-06, "log_odds_chosen": 2.7790732383728027, "log_odds_ratio": -0.37628173828125, "logits/chosen": -0.8452703952789307, "logits/rejected": -1.0603989362716675, "logps/chosen": -0.9164390563964844, "logps/rejected": -3.205270767211914, "loss": 1.1516, "nll_loss": 1.0345877408981323, "rewards/accuracies": 0.875, "rewards/chosen": -0.09164390712976456, "rewards/margins": 0.22888320684432983, "rewards/rejected": -0.3205271065235138, "step": 5637 }, { "epoch": 3.439377764221443, "grad_norm": 2.556248188018799, "learning_rate": 2.5053276178812002e-06, "log_odds_chosen": 4.452503204345703, "log_odds_ratio": -0.22077903151512146, "logits/chosen": -0.6981009840965271, "logits/rejected": -0.9175707101821899, "logps/chosen": -0.8766227960586548, "logps/rejected": -4.718942642211914, "loss": 1.0473, "nll_loss": 0.8940126895904541, "rewards/accuracies": 1.0, "rewards/chosen": -0.08766227960586548, "rewards/margins": 0.3842319846153259, "rewards/rejected": -0.4718942642211914, "step": 5638 }, { "epoch": 3.4399877992984598, "grad_norm": 3.3082756996154785, "learning_rate": 2.5043478260869568e-06, "log_odds_chosen": 1.1609476804733276, "log_odds_ratio": -0.38349324464797974, "logits/chosen": -0.7154301404953003, "logits/rejected": -0.9675799012184143, "logps/chosen": -0.7999920845031738, "logps/rejected": -1.5921449661254883, "loss": 1.0476, "nll_loss": 1.1664149761199951, "rewards/accuracies": 0.875, "rewards/chosen": -0.07999920845031738, "rewards/margins": 0.07921528816223145, "rewards/rejected": -0.15921451151371002, "step": 5639 }, { "epoch": 3.4405978343754766, "grad_norm": 14.39570426940918, "learning_rate": 2.5033680342927125e-06, "log_odds_chosen": 2.021237850189209, "log_odds_ratio": -0.5728819370269775, "logits/chosen": -0.6810837984085083, "logits/rejected": -0.8475151658058167, "logps/chosen": -0.8434185981750488, "logps/rejected": -2.4925758838653564, "loss": 1.0587, "nll_loss": 1.013122320175171, "rewards/accuracies": 0.625, "rewards/chosen": -0.0843418538570404, "rewards/margins": 0.16491571068763733, "rewards/rejected": -0.24925756454467773, "step": 5640 }, { "epoch": 3.4412078694524935, "grad_norm": 5.4754157066345215, "learning_rate": 2.502388242498469e-06, "log_odds_chosen": 1.1535589694976807, "log_odds_ratio": -0.37054747343063354, "logits/chosen": -0.9931522011756897, "logits/rejected": -1.05873441696167, "logps/chosen": -1.0702348947525024, "logps/rejected": -2.0124869346618652, "loss": 0.9803, "nll_loss": 1.121875524520874, "rewards/accuracies": 0.875, "rewards/chosen": -0.10702349245548248, "rewards/margins": 0.09422518312931061, "rewards/rejected": -0.2012486755847931, "step": 5641 }, { "epoch": 3.4418179045295103, "grad_norm": 4.904305934906006, "learning_rate": 2.5014084507042256e-06, "log_odds_chosen": 1.0177136659622192, "log_odds_ratio": -0.4814983010292053, "logits/chosen": -0.9976903796195984, "logits/rejected": -1.0486851930618286, "logps/chosen": -0.7297229170799255, "logps/rejected": -1.3662821054458618, "loss": 1.0117, "nll_loss": 1.0866715908050537, "rewards/accuracies": 0.625, "rewards/chosen": -0.07297229021787643, "rewards/margins": 0.06365591287612915, "rewards/rejected": -0.13662821054458618, "step": 5642 }, { "epoch": 3.442427939606527, "grad_norm": 1.536444067955017, "learning_rate": 2.5004286589099813e-06, "log_odds_chosen": 2.126798152923584, "log_odds_ratio": -0.38977542519569397, "logits/chosen": -0.5660824775695801, "logits/rejected": -0.7972195148468018, "logps/chosen": -0.7004609107971191, "logps/rejected": -2.4032769203186035, "loss": 0.945, "nll_loss": 0.8828909397125244, "rewards/accuracies": 0.75, "rewards/chosen": -0.07004609704017639, "rewards/margins": 0.17028160393238068, "rewards/rejected": -0.24032771587371826, "step": 5643 }, { "epoch": 3.4430379746835444, "grad_norm": 3.1644177436828613, "learning_rate": 2.499448867115738e-06, "log_odds_chosen": 0.7455447912216187, "log_odds_ratio": -0.6236059665679932, "logits/chosen": -0.9390392899513245, "logits/rejected": -0.9607279300689697, "logps/chosen": -0.8825060129165649, "logps/rejected": -1.3885459899902344, "loss": 0.9552, "nll_loss": 1.0577785968780518, "rewards/accuracies": 0.5, "rewards/chosen": -0.08825060725212097, "rewards/margins": 0.050603993237018585, "rewards/rejected": -0.13885459303855896, "step": 5644 }, { "epoch": 3.4436480097605613, "grad_norm": 1.405167818069458, "learning_rate": 2.498469075321494e-06, "log_odds_chosen": 1.413701057434082, "log_odds_ratio": -0.6276629567146301, "logits/chosen": -0.8017858862876892, "logits/rejected": -0.8384419679641724, "logps/chosen": -0.9832407236099243, "logps/rejected": -2.110675573348999, "loss": 1.0976, "nll_loss": 1.2590692043304443, "rewards/accuracies": 0.5, "rewards/chosen": -0.09832407534122467, "rewards/margins": 0.11274348199367523, "rewards/rejected": -0.2110675573348999, "step": 5645 }, { "epoch": 3.444258044837578, "grad_norm": 4.341750621795654, "learning_rate": 2.4974892835272505e-06, "log_odds_chosen": 1.1971311569213867, "log_odds_ratio": -0.449718713760376, "logits/chosen": -0.8788666725158691, "logits/rejected": -1.0751360654830933, "logps/chosen": -0.7478901147842407, "logps/rejected": -1.4064300060272217, "loss": 1.0517, "nll_loss": 1.0518906116485596, "rewards/accuracies": 0.75, "rewards/chosen": -0.07478901743888855, "rewards/margins": 0.06585400551557541, "rewards/rejected": -0.14064301550388336, "step": 5646 }, { "epoch": 3.444868079914595, "grad_norm": 1.2672165632247925, "learning_rate": 2.4965094917330066e-06, "log_odds_chosen": 1.4264129400253296, "log_odds_ratio": -0.6031501293182373, "logits/chosen": -0.7984573841094971, "logits/rejected": -0.985893189907074, "logps/chosen": -0.8553723096847534, "logps/rejected": -2.0179808139801025, "loss": 1.124, "nll_loss": 1.3176164627075195, "rewards/accuracies": 0.5, "rewards/chosen": -0.08553722500801086, "rewards/margins": 0.11626085638999939, "rewards/rejected": -0.20179809629917145, "step": 5647 }, { "epoch": 3.445478114991612, "grad_norm": 1.2069405317306519, "learning_rate": 2.4955296999387627e-06, "log_odds_chosen": 1.174476146697998, "log_odds_ratio": -0.7245378494262695, "logits/chosen": -0.7334039211273193, "logits/rejected": -0.923890233039856, "logps/chosen": -1.0248732566833496, "logps/rejected": -2.0683627128601074, "loss": 1.148, "nll_loss": 1.091300368309021, "rewards/accuracies": 0.5, "rewards/chosen": -0.10248733311891556, "rewards/margins": 0.10434893518686295, "rewards/rejected": -0.2068362832069397, "step": 5648 }, { "epoch": 3.446088150068629, "grad_norm": 1.1548140048980713, "learning_rate": 2.4945499081445193e-06, "log_odds_chosen": 2.211805820465088, "log_odds_ratio": -0.312974214553833, "logits/chosen": -0.752055287361145, "logits/rejected": -0.8416714668273926, "logps/chosen": -0.533737301826477, "logps/rejected": -2.084223985671997, "loss": 0.9765, "nll_loss": 0.9099007844924927, "rewards/accuracies": 0.875, "rewards/chosen": -0.053373731672763824, "rewards/margins": 0.1550486832857132, "rewards/rejected": -0.20842242240905762, "step": 5649 }, { "epoch": 3.446698185145646, "grad_norm": 2.792133331298828, "learning_rate": 2.4935701163502754e-06, "log_odds_chosen": 1.4773855209350586, "log_odds_ratio": -0.5886059403419495, "logits/chosen": -1.0240237712860107, "logits/rejected": -0.9574434161186218, "logps/chosen": -0.9969604015350342, "logps/rejected": -2.2903544902801514, "loss": 1.0418, "nll_loss": 1.2361536026000977, "rewards/accuracies": 0.5, "rewards/chosen": -0.09969604015350342, "rewards/margins": 0.12933941185474396, "rewards/rejected": -0.22903545200824738, "step": 5650 }, { "epoch": 3.447308220222663, "grad_norm": 3.231935977935791, "learning_rate": 2.4925903245560315e-06, "log_odds_chosen": 3.987683057785034, "log_odds_ratio": -0.2932942509651184, "logits/chosen": -0.7878016233444214, "logits/rejected": -0.7948318719863892, "logps/chosen": -0.696914792060852, "logps/rejected": -4.066899299621582, "loss": 1.0833, "nll_loss": 0.972718358039856, "rewards/accuracies": 0.875, "rewards/chosen": -0.0696914792060852, "rewards/margins": 0.33699849247932434, "rewards/rejected": -0.40668994188308716, "step": 5651 }, { "epoch": 3.4479182552996797, "grad_norm": 6.28695011138916, "learning_rate": 2.491610532761788e-06, "log_odds_chosen": 2.1651105880737305, "log_odds_ratio": -0.32004669308662415, "logits/chosen": -0.6655739545822144, "logits/rejected": -0.8324452042579651, "logps/chosen": -0.7023603916168213, "logps/rejected": -2.2064530849456787, "loss": 0.9921, "nll_loss": 0.9300968647003174, "rewards/accuracies": 0.875, "rewards/chosen": -0.07023604214191437, "rewards/margins": 0.1504092812538147, "rewards/rejected": -0.22064530849456787, "step": 5652 }, { "epoch": 3.4485282903766965, "grad_norm": 1.855450987815857, "learning_rate": 2.4906307409675446e-06, "log_odds_chosen": 2.306443214416504, "log_odds_ratio": -0.32143527269363403, "logits/chosen": -0.7168757319450378, "logits/rejected": -0.770862877368927, "logps/chosen": -0.6458565592765808, "logps/rejected": -2.196932315826416, "loss": 0.949, "nll_loss": 0.9744654893875122, "rewards/accuracies": 0.875, "rewards/chosen": -0.06458565592765808, "rewards/margins": 0.15510758757591248, "rewards/rejected": -0.21969322860240936, "step": 5653 }, { "epoch": 3.4491383254537133, "grad_norm": 1.4647008180618286, "learning_rate": 2.4896509491733003e-06, "log_odds_chosen": 1.529662847518921, "log_odds_ratio": -0.5357871055603027, "logits/chosen": -0.765033483505249, "logits/rejected": -0.8475483655929565, "logps/chosen": -0.856452465057373, "logps/rejected": -2.046220302581787, "loss": 1.1168, "nll_loss": 0.9620333313941956, "rewards/accuracies": 0.75, "rewards/chosen": -0.08564525097608566, "rewards/margins": 0.11897678673267365, "rewards/rejected": -0.2046220302581787, "step": 5654 }, { "epoch": 3.4497483605307306, "grad_norm": 1.0763626098632812, "learning_rate": 2.488671157379057e-06, "log_odds_chosen": 1.288062572479248, "log_odds_ratio": -0.4606631100177765, "logits/chosen": -0.9408506155014038, "logits/rejected": -0.9597338438034058, "logps/chosen": -0.7462801933288574, "logps/rejected": -1.6306697130203247, "loss": 1.1051, "nll_loss": 1.152761697769165, "rewards/accuracies": 0.75, "rewards/chosen": -0.07462801784276962, "rewards/margins": 0.08843895047903061, "rewards/rejected": -0.16306696832180023, "step": 5655 }, { "epoch": 3.4503583956077475, "grad_norm": 1.8492215871810913, "learning_rate": 2.4876913655848134e-06, "log_odds_chosen": 0.7788654565811157, "log_odds_ratio": -0.5957063436508179, "logits/chosen": -0.7263781428337097, "logits/rejected": -0.8446336388587952, "logps/chosen": -0.9297829866409302, "logps/rejected": -1.5828572511672974, "loss": 1.1064, "nll_loss": 1.1915361881256104, "rewards/accuracies": 0.5, "rewards/chosen": -0.09297829866409302, "rewards/margins": 0.06530742347240448, "rewards/rejected": -0.1582857072353363, "step": 5656 }, { "epoch": 3.4509684306847643, "grad_norm": 2.078859567642212, "learning_rate": 2.486711573790569e-06, "log_odds_chosen": 1.3601841926574707, "log_odds_ratio": -0.5847318768501282, "logits/chosen": -0.745743989944458, "logits/rejected": -0.8308242559432983, "logps/chosen": -0.7453395128250122, "logps/rejected": -1.9217041730880737, "loss": 1.0732, "nll_loss": 1.0632551908493042, "rewards/accuracies": 0.625, "rewards/chosen": -0.07453394681215286, "rewards/margins": 0.11763647198677063, "rewards/rejected": -0.1921704113483429, "step": 5657 }, { "epoch": 3.451578465761781, "grad_norm": 1.5158611536026, "learning_rate": 2.4857317819963256e-06, "log_odds_chosen": 1.4755995273590088, "log_odds_ratio": -0.4380269944667816, "logits/chosen": -0.893572211265564, "logits/rejected": -0.9226732850074768, "logps/chosen": -0.8641572594642639, "logps/rejected": -1.9805957078933716, "loss": 0.9877, "nll_loss": 0.9841121435165405, "rewards/accuracies": 0.75, "rewards/chosen": -0.08641573041677475, "rewards/margins": 0.11164384335279465, "rewards/rejected": -0.1980595737695694, "step": 5658 }, { "epoch": 3.4521885008387985, "grad_norm": 1.3473361730575562, "learning_rate": 2.484751990202082e-06, "log_odds_chosen": 1.3965173959732056, "log_odds_ratio": -0.45754924416542053, "logits/chosen": -0.8583574295043945, "logits/rejected": -0.7373091578483582, "logps/chosen": -0.7377995848655701, "logps/rejected": -1.7263574600219727, "loss": 1.0793, "nll_loss": 1.0188716650009155, "rewards/accuracies": 0.625, "rewards/chosen": -0.07377995550632477, "rewards/margins": 0.09885578602552414, "rewards/rejected": -0.1726357340812683, "step": 5659 }, { "epoch": 3.4527985359158153, "grad_norm": 1.7474217414855957, "learning_rate": 2.4837721984078383e-06, "log_odds_chosen": 2.4241747856140137, "log_odds_ratio": -0.34543314576148987, "logits/chosen": -0.7218945622444153, "logits/rejected": -0.7976060509681702, "logps/chosen": -0.7165358662605286, "logps/rejected": -2.651634454727173, "loss": 0.9412, "nll_loss": 0.9236943125724792, "rewards/accuracies": 1.0, "rewards/chosen": -0.0716535896062851, "rewards/margins": 0.19350986182689667, "rewards/rejected": -0.26516345143318176, "step": 5660 }, { "epoch": 3.453408570992832, "grad_norm": 8.211970329284668, "learning_rate": 2.4827924066135944e-06, "log_odds_chosen": 1.9318839311599731, "log_odds_ratio": -0.29940468072891235, "logits/chosen": -1.1327763795852661, "logits/rejected": -1.0879288911819458, "logps/chosen": -0.5638644695281982, "logps/rejected": -1.7700626850128174, "loss": 0.9653, "nll_loss": 0.9580549001693726, "rewards/accuracies": 1.0, "rewards/chosen": -0.056386448442935944, "rewards/margins": 0.12061981856822968, "rewards/rejected": -0.17700627446174622, "step": 5661 }, { "epoch": 3.454018606069849, "grad_norm": 1.6840623617172241, "learning_rate": 2.4818126148193506e-06, "log_odds_chosen": 0.7092313766479492, "log_odds_ratio": -0.5581384897232056, "logits/chosen": -0.9040209054946899, "logits/rejected": -0.875422477722168, "logps/chosen": -0.9139267206192017, "logps/rejected": -1.5027527809143066, "loss": 0.9958, "nll_loss": 1.0768985748291016, "rewards/accuracies": 0.625, "rewards/chosen": -0.09139267355203629, "rewards/margins": 0.05888260900974274, "rewards/rejected": -0.15027529001235962, "step": 5662 }, { "epoch": 3.454628641146866, "grad_norm": 1.83250093460083, "learning_rate": 2.480832823025107e-06, "log_odds_chosen": 3.603938341140747, "log_odds_ratio": -0.23574687540531158, "logits/chosen": -0.7715451717376709, "logits/rejected": -0.892959475517273, "logps/chosen": -0.7679080963134766, "logps/rejected": -3.751133441925049, "loss": 1.0443, "nll_loss": 0.9404995441436768, "rewards/accuracies": 1.0, "rewards/chosen": -0.07679080963134766, "rewards/margins": 0.29832252860069275, "rewards/rejected": -0.3751133680343628, "step": 5663 }, { "epoch": 3.4552386762238827, "grad_norm": 1.396842122077942, "learning_rate": 2.4798530312308632e-06, "log_odds_chosen": 0.9777612090110779, "log_odds_ratio": -0.5728054046630859, "logits/chosen": -1.0072855949401855, "logits/rejected": -0.9716774225234985, "logps/chosen": -0.7963563203811646, "logps/rejected": -1.525394320487976, "loss": 1.0105, "nll_loss": 0.9831520318984985, "rewards/accuracies": 0.75, "rewards/chosen": -0.0796356350183487, "rewards/margins": 0.07290379703044891, "rewards/rejected": -0.1525394320487976, "step": 5664 }, { "epoch": 3.4558487113009, "grad_norm": 2.9020814895629883, "learning_rate": 2.4788732394366193e-06, "log_odds_chosen": 1.6434203386306763, "log_odds_ratio": -0.42664337158203125, "logits/chosen": -0.977983832359314, "logits/rejected": -0.9963216781616211, "logps/chosen": -1.0596823692321777, "logps/rejected": -2.2878847122192383, "loss": 1.2643, "nll_loss": 1.024953007698059, "rewards/accuracies": 0.75, "rewards/chosen": -0.10596825182437897, "rewards/margins": 0.12282022833824158, "rewards/rejected": -0.22878849506378174, "step": 5665 }, { "epoch": 3.456458746377917, "grad_norm": 1.3247284889221191, "learning_rate": 2.477893447642376e-06, "log_odds_chosen": 3.1420671939849854, "log_odds_ratio": -0.1776687502861023, "logits/chosen": -0.5233482122421265, "logits/rejected": -0.8567463159561157, "logps/chosen": -0.7887874245643616, "logps/rejected": -3.278515338897705, "loss": 1.0556, "nll_loss": 1.0028811693191528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0788787454366684, "rewards/margins": 0.2489727884531021, "rewards/rejected": -0.3278515338897705, "step": 5666 }, { "epoch": 3.4570687814549337, "grad_norm": 1.2461247444152832, "learning_rate": 2.4769136558481324e-06, "log_odds_chosen": 1.6538578271865845, "log_odds_ratio": -0.48354706168174744, "logits/chosen": -0.9954962730407715, "logits/rejected": -1.0109567642211914, "logps/chosen": -0.9458460211753845, "logps/rejected": -2.17962646484375, "loss": 1.1363, "nll_loss": 1.0712087154388428, "rewards/accuracies": 0.875, "rewards/chosen": -0.09458460658788681, "rewards/margins": 0.12337803095579147, "rewards/rejected": -0.2179626226425171, "step": 5667 }, { "epoch": 3.4576788165319505, "grad_norm": 1.6361558437347412, "learning_rate": 2.475933864053888e-06, "log_odds_chosen": 2.3245890140533447, "log_odds_ratio": -0.3171674609184265, "logits/chosen": -0.7179577946662903, "logits/rejected": -0.7704459428787231, "logps/chosen": -0.7444534301757812, "logps/rejected": -2.605762004852295, "loss": 0.9612, "nll_loss": 0.9393619298934937, "rewards/accuracies": 0.875, "rewards/chosen": -0.07444534450769424, "rewards/margins": 0.18613089621067047, "rewards/rejected": -0.2605762481689453, "step": 5668 }, { "epoch": 3.4582888516089674, "grad_norm": 1.325890302658081, "learning_rate": 2.4749540722596447e-06, "log_odds_chosen": 4.01033878326416, "log_odds_ratio": -0.31415048241615295, "logits/chosen": -0.6793546080589294, "logits/rejected": -0.886013925075531, "logps/chosen": -0.4922027587890625, "logps/rejected": -3.7775614261627197, "loss": 0.8957, "nll_loss": 0.6408526301383972, "rewards/accuracies": 0.875, "rewards/chosen": -0.04922027513384819, "rewards/margins": 0.32853585481643677, "rewards/rejected": -0.37775614857673645, "step": 5669 }, { "epoch": 3.4588988866859847, "grad_norm": 1.9600626230239868, "learning_rate": 2.4739742804654012e-06, "log_odds_chosen": 1.1164071559906006, "log_odds_ratio": -0.5592554807662964, "logits/chosen": -0.7979459762573242, "logits/rejected": -0.8006236553192139, "logps/chosen": -0.7768588066101074, "logps/rejected": -1.4997782707214355, "loss": 0.9663, "nll_loss": 1.0099844932556152, "rewards/accuracies": 0.625, "rewards/chosen": -0.0776858776807785, "rewards/margins": 0.07229194045066833, "rewards/rejected": -0.14997783303260803, "step": 5670 }, { "epoch": 3.4595089217630015, "grad_norm": 1.3463157415390015, "learning_rate": 2.472994488671157e-06, "log_odds_chosen": 2.072608470916748, "log_odds_ratio": -0.4933691918849945, "logits/chosen": -0.9154437184333801, "logits/rejected": -0.8779310584068298, "logps/chosen": -0.8780736923217773, "logps/rejected": -2.7378101348876953, "loss": 0.9831, "nll_loss": 1.1804563999176025, "rewards/accuracies": 0.75, "rewards/chosen": -0.08780737221240997, "rewards/margins": 0.1859736442565918, "rewards/rejected": -0.27378103137016296, "step": 5671 }, { "epoch": 3.4601189568400184, "grad_norm": 1.1648553609848022, "learning_rate": 2.4720146968769135e-06, "log_odds_chosen": 1.7119685411453247, "log_odds_ratio": -0.38071709871292114, "logits/chosen": -0.9875668287277222, "logits/rejected": -0.9662169218063354, "logps/chosen": -0.7693982720375061, "logps/rejected": -1.9368562698364258, "loss": 1.0225, "nll_loss": 1.0460315942764282, "rewards/accuracies": 0.75, "rewards/chosen": -0.07693982869386673, "rewards/margins": 0.11674582213163376, "rewards/rejected": -0.1936856508255005, "step": 5672 }, { "epoch": 3.460728991917035, "grad_norm": 2.283705949783325, "learning_rate": 2.47103490508267e-06, "log_odds_chosen": 1.1830224990844727, "log_odds_ratio": -0.48784661293029785, "logits/chosen": -0.6792764663696289, "logits/rejected": -0.7956187725067139, "logps/chosen": -0.7656296491622925, "logps/rejected": -1.6591507196426392, "loss": 1.0049, "nll_loss": 0.9269499778747559, "rewards/accuracies": 0.5, "rewards/chosen": -0.07656297087669373, "rewards/margins": 0.08935210108757019, "rewards/rejected": -0.1659150868654251, "step": 5673 }, { "epoch": 3.461339026994052, "grad_norm": 6.955241680145264, "learning_rate": 2.470055113288426e-06, "log_odds_chosen": 0.9404508471488953, "log_odds_ratio": -0.5729359984397888, "logits/chosen": -0.8892073035240173, "logits/rejected": -0.930275022983551, "logps/chosen": -0.9610000252723694, "logps/rejected": -1.4092272520065308, "loss": 0.8734, "nll_loss": 1.1187928915023804, "rewards/accuracies": 0.5, "rewards/chosen": -0.09610000252723694, "rewards/margins": 0.04482271894812584, "rewards/rejected": -0.14092272520065308, "step": 5674 }, { "epoch": 3.461949062071069, "grad_norm": 1.6245479583740234, "learning_rate": 2.4690753214941823e-06, "log_odds_chosen": 1.7510931491851807, "log_odds_ratio": -0.40806126594543457, "logits/chosen": -0.7631148099899292, "logits/rejected": -0.8360472321510315, "logps/chosen": -0.6724851727485657, "logps/rejected": -1.9700145721435547, "loss": 0.8988, "nll_loss": 0.8897761106491089, "rewards/accuracies": 0.875, "rewards/chosen": -0.06724852323532104, "rewards/margins": 0.12975293397903442, "rewards/rejected": -0.19700145721435547, "step": 5675 }, { "epoch": 3.462559097148086, "grad_norm": 2.1938116550445557, "learning_rate": 2.468095529699939e-06, "log_odds_chosen": 1.9448399543762207, "log_odds_ratio": -0.6169581413269043, "logits/chosen": -0.9168050289154053, "logits/rejected": -0.9962214231491089, "logps/chosen": -0.6305866837501526, "logps/rejected": -2.3042497634887695, "loss": 1.0459, "nll_loss": 1.0027825832366943, "rewards/accuracies": 0.875, "rewards/chosen": -0.06305867433547974, "rewards/margins": 0.16736629605293274, "rewards/rejected": -0.23042498528957367, "step": 5676 }, { "epoch": 3.463169132225103, "grad_norm": 2.7726495265960693, "learning_rate": 2.467115737905695e-06, "log_odds_chosen": 0.8915854692459106, "log_odds_ratio": -0.5231316685676575, "logits/chosen": -0.786024272441864, "logits/rejected": -0.884426474571228, "logps/chosen": -0.9441605806350708, "logps/rejected": -1.6567661762237549, "loss": 1.0703, "nll_loss": 1.2725104093551636, "rewards/accuracies": 0.625, "rewards/chosen": -0.0944160670042038, "rewards/margins": 0.07126054912805557, "rewards/rejected": -0.16567662358283997, "step": 5677 }, { "epoch": 3.46377916730212, "grad_norm": 2.3439583778381348, "learning_rate": 2.466135946111451e-06, "log_odds_chosen": 1.9798818826675415, "log_odds_ratio": -0.7742393016815186, "logits/chosen": -0.8963021039962769, "logits/rejected": -0.9249455332756042, "logps/chosen": -1.1310441493988037, "logps/rejected": -2.842282295227051, "loss": 1.1386, "nll_loss": 0.9558913707733154, "rewards/accuracies": 0.625, "rewards/chosen": -0.11310441792011261, "rewards/margins": 0.17112378776073456, "rewards/rejected": -0.28422820568084717, "step": 5678 }, { "epoch": 3.4643892023791367, "grad_norm": 2.9467759132385254, "learning_rate": 2.4651561543172076e-06, "log_odds_chosen": 2.302769184112549, "log_odds_ratio": -0.3504619300365448, "logits/chosen": -0.8488277196884155, "logits/rejected": -1.0660717487335205, "logps/chosen": -0.7439219951629639, "logps/rejected": -2.4818456172943115, "loss": 1.0722, "nll_loss": 0.9840868711471558, "rewards/accuracies": 0.75, "rewards/chosen": -0.07439219951629639, "rewards/margins": 0.17379236221313477, "rewards/rejected": -0.24818456172943115, "step": 5679 }, { "epoch": 3.4649992374561536, "grad_norm": 1.8220258951187134, "learning_rate": 2.4641763625229637e-06, "log_odds_chosen": 0.2374744415283203, "log_odds_ratio": -0.7172274589538574, "logits/chosen": -0.7821000218391418, "logits/rejected": -0.7939098477363586, "logps/chosen": -0.8164602518081665, "logps/rejected": -0.9494145512580872, "loss": 1.1543, "nll_loss": 0.9161615967750549, "rewards/accuracies": 0.5, "rewards/chosen": -0.08164602518081665, "rewards/margins": 0.013295425102114677, "rewards/rejected": -0.09494145214557648, "step": 5680 }, { "epoch": 3.465609272533171, "grad_norm": 1.6031461954116821, "learning_rate": 2.4631965707287203e-06, "log_odds_chosen": 1.1524980068206787, "log_odds_ratio": -0.49743151664733887, "logits/chosen": -0.9952198266983032, "logits/rejected": -1.1202694177627563, "logps/chosen": -0.8597848415374756, "logps/rejected": -1.569434404373169, "loss": 1.3334, "nll_loss": 1.4565871953964233, "rewards/accuracies": 0.75, "rewards/chosen": -0.08597849309444427, "rewards/margins": 0.07096496969461441, "rewards/rejected": -0.15694347023963928, "step": 5681 }, { "epoch": 3.4662193076101877, "grad_norm": 7.221279621124268, "learning_rate": 2.462216778934476e-06, "log_odds_chosen": 1.8415277004241943, "log_odds_ratio": -0.30150383710861206, "logits/chosen": -0.7738024592399597, "logits/rejected": -0.8887265920639038, "logps/chosen": -0.7148610949516296, "logps/rejected": -2.0579090118408203, "loss": 1.0236, "nll_loss": 0.8706693649291992, "rewards/accuracies": 1.0, "rewards/chosen": -0.07148610800504684, "rewards/margins": 0.13430479168891907, "rewards/rejected": -0.2057909071445465, "step": 5682 }, { "epoch": 3.4668293426872046, "grad_norm": 1.596444010734558, "learning_rate": 2.4612369871402325e-06, "log_odds_chosen": 0.813880443572998, "log_odds_ratio": -0.46299993991851807, "logits/chosen": -0.751190721988678, "logits/rejected": -0.9030935764312744, "logps/chosen": -0.8766014575958252, "logps/rejected": -1.4518134593963623, "loss": 0.9439, "nll_loss": 0.9655896425247192, "rewards/accuracies": 0.75, "rewards/chosen": -0.08766014873981476, "rewards/margins": 0.05752120167016983, "rewards/rejected": -0.14518135786056519, "step": 5683 }, { "epoch": 3.4674393777642214, "grad_norm": 1.6528213024139404, "learning_rate": 2.460257195345989e-06, "log_odds_chosen": 2.907000780105591, "log_odds_ratio": -0.24469143152236938, "logits/chosen": -0.674504280090332, "logits/rejected": -0.8534402847290039, "logps/chosen": -0.4758394956588745, "logps/rejected": -2.6723549365997314, "loss": 0.872, "nll_loss": 0.6645895838737488, "rewards/accuracies": 1.0, "rewards/chosen": -0.04758394882082939, "rewards/margins": 0.21965155005455017, "rewards/rejected": -0.26723548769950867, "step": 5684 }, { "epoch": 3.4680494128412382, "grad_norm": 1.3487884998321533, "learning_rate": 2.4592774035517448e-06, "log_odds_chosen": 2.151556968688965, "log_odds_ratio": -0.4091357886791229, "logits/chosen": -0.5648366212844849, "logits/rejected": -0.8215439319610596, "logps/chosen": -0.5054832696914673, "logps/rejected": -1.7861430644989014, "loss": 0.8732, "nll_loss": 0.8477118015289307, "rewards/accuracies": 0.875, "rewards/chosen": -0.05054832622408867, "rewards/margins": 0.12806597352027893, "rewards/rejected": -0.1786143034696579, "step": 5685 }, { "epoch": 3.468659447918255, "grad_norm": 4.419350624084473, "learning_rate": 2.4582976117575013e-06, "log_odds_chosen": 2.998619794845581, "log_odds_ratio": -0.2791440486907959, "logits/chosen": -0.6340393424034119, "logits/rejected": -0.727413535118103, "logps/chosen": -0.8183243870735168, "logps/rejected": -3.2053184509277344, "loss": 0.9802, "nll_loss": 0.8065455555915833, "rewards/accuracies": 0.75, "rewards/chosen": -0.08183243870735168, "rewards/margins": 0.23869942128658295, "rewards/rejected": -0.3205318748950958, "step": 5686 }, { "epoch": 3.4692694829952724, "grad_norm": 1.6994786262512207, "learning_rate": 2.457317819963258e-06, "log_odds_chosen": 2.5743250846862793, "log_odds_ratio": -0.36418256163597107, "logits/chosen": -0.675195038318634, "logits/rejected": -0.7921756505966187, "logps/chosen": -0.687160849571228, "logps/rejected": -2.6814157962799072, "loss": 1.1221, "nll_loss": 0.9868111610412598, "rewards/accuracies": 0.75, "rewards/chosen": -0.06871608644723892, "rewards/margins": 0.19942548871040344, "rewards/rejected": -0.26814156770706177, "step": 5687 }, { "epoch": 3.4698795180722892, "grad_norm": 2.067812204360962, "learning_rate": 2.456338028169014e-06, "log_odds_chosen": 3.4232072830200195, "log_odds_ratio": -0.14664679765701294, "logits/chosen": -0.7741586565971375, "logits/rejected": -0.9982568025588989, "logps/chosen": -0.721829891204834, "logps/rejected": -3.4727048873901367, "loss": 1.1192, "nll_loss": 1.0688623189926147, "rewards/accuracies": 0.875, "rewards/chosen": -0.07218298316001892, "rewards/margins": 0.27508750557899475, "rewards/rejected": -0.34727048873901367, "step": 5688 }, { "epoch": 3.470489553149306, "grad_norm": 7.281960964202881, "learning_rate": 2.45535823637477e-06, "log_odds_chosen": 1.364386796951294, "log_odds_ratio": -0.5789908170700073, "logits/chosen": -0.8589946031570435, "logits/rejected": -0.8528145551681519, "logps/chosen": -0.7777918577194214, "logps/rejected": -1.8550782203674316, "loss": 1.0769, "nll_loss": 0.9166344404220581, "rewards/accuracies": 0.5, "rewards/chosen": -0.07777918875217438, "rewards/margins": 0.10772864520549774, "rewards/rejected": -0.18550783395767212, "step": 5689 }, { "epoch": 3.471099588226323, "grad_norm": 0.9884326457977295, "learning_rate": 2.4543784445805266e-06, "log_odds_chosen": 0.04335235059261322, "log_odds_ratio": -0.735214352607727, "logits/chosen": -1.0448670387268066, "logits/rejected": -1.0472952127456665, "logps/chosen": -1.0107002258300781, "logps/rejected": -1.077654242515564, "loss": 1.0841, "nll_loss": 1.1558805704116821, "rewards/accuracies": 0.5, "rewards/chosen": -0.10107003897428513, "rewards/margins": 0.006695393938571215, "rewards/rejected": -0.10776542872190475, "step": 5690 }, { "epoch": 3.4717096233033398, "grad_norm": 1.2942637205123901, "learning_rate": 2.4533986527862828e-06, "log_odds_chosen": 3.0441744327545166, "log_odds_ratio": -0.2872562110424042, "logits/chosen": -0.4582103490829468, "logits/rejected": -0.6808663010597229, "logps/chosen": -0.48425009846687317, "logps/rejected": -2.422797918319702, "loss": 0.7114, "nll_loss": 0.6284603476524353, "rewards/accuracies": 0.75, "rewards/chosen": -0.048425011336803436, "rewards/margins": 0.19385480880737305, "rewards/rejected": -0.2422797977924347, "step": 5691 }, { "epoch": 3.472319658380357, "grad_norm": 7.608437538146973, "learning_rate": 2.4524188609920393e-06, "log_odds_chosen": 2.4489710330963135, "log_odds_ratio": -0.20582164824008942, "logits/chosen": -0.8159630298614502, "logits/rejected": -0.9353612661361694, "logps/chosen": -0.6490134000778198, "logps/rejected": -2.397869348526001, "loss": 0.9755, "nll_loss": 0.9394122362136841, "rewards/accuracies": 1.0, "rewards/chosen": -0.06490133702754974, "rewards/margins": 0.1748856008052826, "rewards/rejected": -0.23978695273399353, "step": 5692 }, { "epoch": 3.472929693457374, "grad_norm": 1.2375622987747192, "learning_rate": 2.4514390691977954e-06, "log_odds_chosen": 2.826509475708008, "log_odds_ratio": -0.334179162979126, "logits/chosen": -0.8947285413742065, "logits/rejected": -0.9306166172027588, "logps/chosen": -0.7239466905593872, "logps/rejected": -3.0338938236236572, "loss": 1.0412, "nll_loss": 0.8718292713165283, "rewards/accuracies": 0.75, "rewards/chosen": -0.07239466905593872, "rewards/margins": 0.23099470138549805, "rewards/rejected": -0.30338937044143677, "step": 5693 }, { "epoch": 3.4735397285343907, "grad_norm": 2.098329782485962, "learning_rate": 2.4504592774035515e-06, "log_odds_chosen": 2.4780311584472656, "log_odds_ratio": -0.1875331699848175, "logits/chosen": -0.7777809500694275, "logits/rejected": -0.9420934915542603, "logps/chosen": -0.6147752404212952, "logps/rejected": -2.3932838439941406, "loss": 1.0074, "nll_loss": 0.7404577732086182, "rewards/accuracies": 1.0, "rewards/chosen": -0.06147752329707146, "rewards/margins": 0.1778508871793747, "rewards/rejected": -0.23932839930057526, "step": 5694 }, { "epoch": 3.4741497636114076, "grad_norm": 1.6361950635910034, "learning_rate": 2.449479485609308e-06, "log_odds_chosen": 2.454118013381958, "log_odds_ratio": -0.631146252155304, "logits/chosen": -0.992311954498291, "logits/rejected": -0.9839263558387756, "logps/chosen": -0.8251558542251587, "logps/rejected": -2.8631234169006348, "loss": 1.1718, "nll_loss": 1.3938173055648804, "rewards/accuracies": 0.625, "rewards/chosen": -0.08251558244228363, "rewards/margins": 0.20379677414894104, "rewards/rejected": -0.2863123416900635, "step": 5695 }, { "epoch": 3.4747597986884244, "grad_norm": 1.0399624109268188, "learning_rate": 2.4484996938150642e-06, "log_odds_chosen": 3.9855191707611084, "log_odds_ratio": -0.383853554725647, "logits/chosen": -0.8320348858833313, "logits/rejected": -0.9473786354064941, "logps/chosen": -0.615531325340271, "logps/rejected": -4.189545154571533, "loss": 0.7903, "nll_loss": 0.8324975371360779, "rewards/accuracies": 0.75, "rewards/chosen": -0.06155312433838844, "rewards/margins": 0.35740140080451965, "rewards/rejected": -0.4189545214176178, "step": 5696 }, { "epoch": 3.4753698337654413, "grad_norm": 5.1884636878967285, "learning_rate": 2.4475199020208203e-06, "log_odds_chosen": 3.765850782394409, "log_odds_ratio": -0.0894700288772583, "logits/chosen": -0.8140859603881836, "logits/rejected": -1.063921332359314, "logps/chosen": -0.7207291126251221, "logps/rejected": -3.799771308898926, "loss": 1.001, "nll_loss": 0.8436349034309387, "rewards/accuracies": 1.0, "rewards/chosen": -0.07207291573286057, "rewards/margins": 0.3079042136669159, "rewards/rejected": -0.37997713685035706, "step": 5697 }, { "epoch": 3.4759798688424586, "grad_norm": 2.2642016410827637, "learning_rate": 2.446540110226577e-06, "log_odds_chosen": 2.084169387817383, "log_odds_ratio": -0.3339196741580963, "logits/chosen": -1.0156962871551514, "logits/rejected": -1.0665154457092285, "logps/chosen": -0.8712797164916992, "logps/rejected": -2.555211067199707, "loss": 1.0673, "nll_loss": 0.9939318895339966, "rewards/accuracies": 0.75, "rewards/chosen": -0.08712796866893768, "rewards/margins": 0.16839313507080078, "rewards/rejected": -0.25552108883857727, "step": 5698 }, { "epoch": 3.4765899039194754, "grad_norm": 3.4203948974609375, "learning_rate": 2.4455603184323334e-06, "log_odds_chosen": 2.0986297130584717, "log_odds_ratio": -0.4616345465183258, "logits/chosen": -0.7614368796348572, "logits/rejected": -0.8288854360580444, "logps/chosen": -0.761961042881012, "logps/rejected": -2.38059663772583, "loss": 1.1113, "nll_loss": 0.8578165769577026, "rewards/accuracies": 0.75, "rewards/chosen": -0.0761961042881012, "rewards/margins": 0.16186358034610748, "rewards/rejected": -0.23805968463420868, "step": 5699 }, { "epoch": 3.4771999389964923, "grad_norm": 2.034196615219116, "learning_rate": 2.444580526638089e-06, "log_odds_chosen": 1.9849953651428223, "log_odds_ratio": -0.4113946855068207, "logits/chosen": -1.0488089323043823, "logits/rejected": -0.9186111688613892, "logps/chosen": -0.988066554069519, "logps/rejected": -2.7368974685668945, "loss": 1.0544, "nll_loss": 1.089111089706421, "rewards/accuracies": 0.875, "rewards/chosen": -0.09880664944648743, "rewards/margins": 0.17488311231136322, "rewards/rejected": -0.27368974685668945, "step": 5700 }, { "epoch": 3.477809974073509, "grad_norm": 6.300088405609131, "learning_rate": 2.4436007348438457e-06, "log_odds_chosen": 3.003047227859497, "log_odds_ratio": -0.20516304671764374, "logits/chosen": -1.0626051425933838, "logits/rejected": -1.2055375576019287, "logps/chosen": -0.6966778039932251, "logps/rejected": -3.142355442047119, "loss": 1.0755, "nll_loss": 1.1107200384140015, "rewards/accuracies": 0.875, "rewards/chosen": -0.0696677714586258, "rewards/margins": 0.24456781148910522, "rewards/rejected": -0.3142355978488922, "step": 5701 }, { "epoch": 3.4784200091505264, "grad_norm": 3.5738918781280518, "learning_rate": 2.442620943049602e-06, "log_odds_chosen": 2.1197640895843506, "log_odds_ratio": -0.38471221923828125, "logits/chosen": -1.004983901977539, "logits/rejected": -0.966221034526825, "logps/chosen": -0.7667790651321411, "logps/rejected": -2.2865166664123535, "loss": 1.097, "nll_loss": 0.957954466342926, "rewards/accuracies": 0.75, "rewards/chosen": -0.07667791843414307, "rewards/margins": 0.15197373926639557, "rewards/rejected": -0.22865165770053864, "step": 5702 }, { "epoch": 3.4790300442275433, "grad_norm": 1.4301868677139282, "learning_rate": 2.441641151255358e-06, "log_odds_chosen": 1.4360830783843994, "log_odds_ratio": -0.4780098497867584, "logits/chosen": -0.8239114284515381, "logits/rejected": -0.8668212294578552, "logps/chosen": -0.9320773482322693, "logps/rejected": -2.1499948501586914, "loss": 0.9784, "nll_loss": 0.931609034538269, "rewards/accuracies": 0.625, "rewards/chosen": -0.09320774674415588, "rewards/margins": 0.12179173529148102, "rewards/rejected": -0.2149994671344757, "step": 5703 }, { "epoch": 3.47964007930456, "grad_norm": 1.5265624523162842, "learning_rate": 2.4406613594611145e-06, "log_odds_chosen": 3.148832082748413, "log_odds_ratio": -0.3588497042655945, "logits/chosen": -1.0581533908843994, "logits/rejected": -1.0179519653320312, "logps/chosen": -0.7339282631874084, "logps/rejected": -3.416849136352539, "loss": 1.2978, "nll_loss": 1.1068205833435059, "rewards/accuracies": 0.875, "rewards/chosen": -0.0733928233385086, "rewards/margins": 0.268292099237442, "rewards/rejected": -0.3416849374771118, "step": 5704 }, { "epoch": 3.480250114381577, "grad_norm": 2.285503387451172, "learning_rate": 2.4396815676668706e-06, "log_odds_chosen": 0.7897269129753113, "log_odds_ratio": -0.6527748703956604, "logits/chosen": -0.886517345905304, "logits/rejected": -0.6787385940551758, "logps/chosen": -0.9347124695777893, "logps/rejected": -1.5794436931610107, "loss": 0.9779, "nll_loss": 1.134169101715088, "rewards/accuracies": 0.625, "rewards/chosen": -0.09347125142812729, "rewards/margins": 0.06447311490774155, "rewards/rejected": -0.15794436633586884, "step": 5705 }, { "epoch": 3.480860149458594, "grad_norm": 4.512049674987793, "learning_rate": 2.438701775872627e-06, "log_odds_chosen": 2.109159231185913, "log_odds_ratio": -0.3657073974609375, "logits/chosen": -0.9481857419013977, "logits/rejected": -1.0051478147506714, "logps/chosen": -0.7511960864067078, "logps/rejected": -2.512131929397583, "loss": 0.9294, "nll_loss": 1.0643218755722046, "rewards/accuracies": 0.75, "rewards/chosen": -0.07511960715055466, "rewards/margins": 0.17609357833862305, "rewards/rejected": -0.2512131929397583, "step": 5706 }, { "epoch": 3.4814701845356106, "grad_norm": 1.813515305519104, "learning_rate": 2.4377219840783833e-06, "log_odds_chosen": 1.6944997310638428, "log_odds_ratio": -0.4407922029495239, "logits/chosen": -0.7446385622024536, "logits/rejected": -0.8906947374343872, "logps/chosen": -0.8429614305496216, "logps/rejected": -1.8805837631225586, "loss": 0.9763, "nll_loss": 0.935179591178894, "rewards/accuracies": 0.625, "rewards/chosen": -0.08429615199565887, "rewards/margins": 0.10376222431659698, "rewards/rejected": -0.18805837631225586, "step": 5707 }, { "epoch": 3.4820802196126275, "grad_norm": 1.8104870319366455, "learning_rate": 2.4367421922841394e-06, "log_odds_chosen": 0.5951454639434814, "log_odds_ratio": -0.6768234372138977, "logits/chosen": -1.0455942153930664, "logits/rejected": -0.9530064463615417, "logps/chosen": -0.8935706615447998, "logps/rejected": -1.2729201316833496, "loss": 1.1182, "nll_loss": 1.178244948387146, "rewards/accuracies": 0.625, "rewards/chosen": -0.08935706317424774, "rewards/margins": 0.03793494403362274, "rewards/rejected": -0.12729200720787048, "step": 5708 }, { "epoch": 3.4826902546896448, "grad_norm": 1.3715742826461792, "learning_rate": 2.435762400489896e-06, "log_odds_chosen": 2.2832555770874023, "log_odds_ratio": -0.299161821603775, "logits/chosen": -0.7703639268875122, "logits/rejected": -0.7522781491279602, "logps/chosen": -0.614529013633728, "logps/rejected": -2.2039906978607178, "loss": 1.1289, "nll_loss": 1.0725345611572266, "rewards/accuracies": 0.875, "rewards/chosen": -0.061452895402908325, "rewards/margins": 0.1589461863040924, "rewards/rejected": -0.22039908170700073, "step": 5709 }, { "epoch": 3.4833002897666616, "grad_norm": 1.4575315713882446, "learning_rate": 2.434782608695652e-06, "log_odds_chosen": 1.4085355997085571, "log_odds_ratio": -0.5802662372589111, "logits/chosen": -1.0928868055343628, "logits/rejected": -1.0285505056381226, "logps/chosen": -1.0486515760421753, "logps/rejected": -2.304826498031616, "loss": 1.356, "nll_loss": 1.2924604415893555, "rewards/accuracies": 0.625, "rewards/chosen": -0.1048651710152626, "rewards/margins": 0.12561748921871185, "rewards/rejected": -0.23048266768455505, "step": 5710 }, { "epoch": 3.4839103248436785, "grad_norm": 1.1712138652801514, "learning_rate": 2.433802816901408e-06, "log_odds_chosen": 2.3777806758880615, "log_odds_ratio": -0.3186661899089813, "logits/chosen": -0.5937851667404175, "logits/rejected": -0.7371053099632263, "logps/chosen": -0.642730712890625, "logps/rejected": -2.3678362369537354, "loss": 0.9409, "nll_loss": 0.8293855786323547, "rewards/accuracies": 0.875, "rewards/chosen": -0.06427306681871414, "rewards/margins": 0.17251056432724, "rewards/rejected": -0.23678362369537354, "step": 5711 }, { "epoch": 3.4845203599206953, "grad_norm": 0.945859432220459, "learning_rate": 2.4328230251071647e-06, "log_odds_chosen": 0.7767860293388367, "log_odds_ratio": -0.48664993047714233, "logits/chosen": -0.822820782661438, "logits/rejected": -0.7787538766860962, "logps/chosen": -0.8704317808151245, "logps/rejected": -1.3963087797164917, "loss": 1.1389, "nll_loss": 1.0525949001312256, "rewards/accuracies": 0.75, "rewards/chosen": -0.0870431661605835, "rewards/margins": 0.05258769914507866, "rewards/rejected": -0.13963088393211365, "step": 5712 }, { "epoch": 3.4851303949977126, "grad_norm": 2.097381591796875, "learning_rate": 2.4318432333129213e-06, "log_odds_chosen": 1.3636239767074585, "log_odds_ratio": -0.41244202852249146, "logits/chosen": -0.7296606302261353, "logits/rejected": -0.9374501705169678, "logps/chosen": -0.8567546010017395, "logps/rejected": -1.926711082458496, "loss": 1.0656, "nll_loss": 0.9294597506523132, "rewards/accuracies": 0.75, "rewards/chosen": -0.085675448179245, "rewards/margins": 0.10699565708637238, "rewards/rejected": -0.19267112016677856, "step": 5713 }, { "epoch": 3.4857404300747294, "grad_norm": 1.3380752801895142, "learning_rate": 2.430863441518677e-06, "log_odds_chosen": 2.5488386154174805, "log_odds_ratio": -0.21565857529640198, "logits/chosen": -0.918419599533081, "logits/rejected": -0.9332132339477539, "logps/chosen": -0.7258684635162354, "logps/rejected": -2.6146788597106934, "loss": 0.9926, "nll_loss": 1.0257214307785034, "rewards/accuracies": 0.875, "rewards/chosen": -0.07258684933185577, "rewards/margins": 0.1888810396194458, "rewards/rejected": -0.2614678740501404, "step": 5714 }, { "epoch": 3.4863504651517463, "grad_norm": 2.467808723449707, "learning_rate": 2.4298836497244335e-06, "log_odds_chosen": 4.070096969604492, "log_odds_ratio": -0.2548842430114746, "logits/chosen": -0.8089014887809753, "logits/rejected": -0.940652072429657, "logps/chosen": -0.5407333374023438, "logps/rejected": -3.7471039295196533, "loss": 1.1225, "nll_loss": 0.8537405729293823, "rewards/accuracies": 0.875, "rewards/chosen": -0.05407334119081497, "rewards/margins": 0.320637047290802, "rewards/rejected": -0.37471041083335876, "step": 5715 }, { "epoch": 3.486960500228763, "grad_norm": 2.037551164627075, "learning_rate": 2.42890385793019e-06, "log_odds_chosen": 1.8772869110107422, "log_odds_ratio": -0.5392505526542664, "logits/chosen": -0.915020227432251, "logits/rejected": -0.7441916465759277, "logps/chosen": -1.0723180770874023, "logps/rejected": -2.7235395908355713, "loss": 1.2225, "nll_loss": 1.1507673263549805, "rewards/accuracies": 0.625, "rewards/chosen": -0.10723181813955307, "rewards/margins": 0.1651221215724945, "rewards/rejected": -0.2723539471626282, "step": 5716 }, { "epoch": 3.48757053530578, "grad_norm": 2.2825586795806885, "learning_rate": 2.4279240661359457e-06, "log_odds_chosen": 1.974771499633789, "log_odds_ratio": -0.501809298992157, "logits/chosen": -0.8226719498634338, "logits/rejected": -0.940669059753418, "logps/chosen": -0.7199953198432922, "logps/rejected": -2.177398443222046, "loss": 0.9074, "nll_loss": 0.7600936889648438, "rewards/accuracies": 0.875, "rewards/chosen": -0.07199953496456146, "rewards/margins": 0.1457403153181076, "rewards/rejected": -0.21773983538150787, "step": 5717 }, { "epoch": 3.488180570382797, "grad_norm": 2.157888650894165, "learning_rate": 2.4269442743417023e-06, "log_odds_chosen": 1.3620684146881104, "log_odds_ratio": -0.35770681500434875, "logits/chosen": -0.683070719242096, "logits/rejected": -0.7455954551696777, "logps/chosen": -0.6591852307319641, "logps/rejected": -1.5097315311431885, "loss": 0.9058, "nll_loss": 1.0264829397201538, "rewards/accuracies": 0.875, "rewards/chosen": -0.06591852009296417, "rewards/margins": 0.08505465090274811, "rewards/rejected": -0.15097317099571228, "step": 5718 }, { "epoch": 3.488790605459814, "grad_norm": 4.048610687255859, "learning_rate": 2.4259644825474584e-06, "log_odds_chosen": 2.0725021362304688, "log_odds_ratio": -0.7600564360618591, "logits/chosen": -0.856698215007782, "logits/rejected": -0.8401203751564026, "logps/chosen": -0.984651505947113, "logps/rejected": -2.683818817138672, "loss": 1.0175, "nll_loss": 0.9276562333106995, "rewards/accuracies": 0.875, "rewards/chosen": -0.09846515208482742, "rewards/margins": 0.16991674900054932, "rewards/rejected": -0.26838189363479614, "step": 5719 }, { "epoch": 3.489400640536831, "grad_norm": 1.4033019542694092, "learning_rate": 2.424984690753215e-06, "log_odds_chosen": 2.434221029281616, "log_odds_ratio": -0.34620827436447144, "logits/chosen": -0.7901880145072937, "logits/rejected": -0.948972225189209, "logps/chosen": -0.752609372138977, "logps/rejected": -2.5070977210998535, "loss": 1.1673, "nll_loss": 1.0840309858322144, "rewards/accuracies": 0.875, "rewards/chosen": -0.0752609372138977, "rewards/margins": 0.17544884979724884, "rewards/rejected": -0.25070977210998535, "step": 5720 }, { "epoch": 3.490010675613848, "grad_norm": 1.3944865465164185, "learning_rate": 2.424004898958971e-06, "log_odds_chosen": 1.6639058589935303, "log_odds_ratio": -0.3877846896648407, "logits/chosen": -0.7415013313293457, "logits/rejected": -0.8554671406745911, "logps/chosen": -0.7095319032669067, "logps/rejected": -1.8913458585739136, "loss": 0.8232, "nll_loss": 0.8358112573623657, "rewards/accuracies": 0.75, "rewards/chosen": -0.07095319032669067, "rewards/margins": 0.11818140745162964, "rewards/rejected": -0.1891345977783203, "step": 5721 }, { "epoch": 3.4906207106908647, "grad_norm": 2.168733835220337, "learning_rate": 2.423025107164727e-06, "log_odds_chosen": 1.118371605873108, "log_odds_ratio": -0.49409303069114685, "logits/chosen": -1.1424462795257568, "logits/rejected": -1.0947411060333252, "logps/chosen": -0.8450043201446533, "logps/rejected": -1.7485454082489014, "loss": 1.1879, "nll_loss": 1.306422472000122, "rewards/accuracies": 0.625, "rewards/chosen": -0.08450043201446533, "rewards/margins": 0.09035412222146988, "rewards/rejected": -0.17485454678535461, "step": 5722 }, { "epoch": 3.4912307457678815, "grad_norm": 1.8713912963867188, "learning_rate": 2.4220453153704838e-06, "log_odds_chosen": 1.1143330335617065, "log_odds_ratio": -0.453127920627594, "logits/chosen": -1.0636341571807861, "logits/rejected": -0.9217574596405029, "logps/chosen": -0.8174307346343994, "logps/rejected": -1.584928035736084, "loss": 1.1175, "nll_loss": 1.0654264688491821, "rewards/accuracies": 0.875, "rewards/chosen": -0.08174307644367218, "rewards/margins": 0.07674972712993622, "rewards/rejected": -0.1584928035736084, "step": 5723 }, { "epoch": 3.491840780844899, "grad_norm": 1.5458495616912842, "learning_rate": 2.42106552357624e-06, "log_odds_chosen": 1.7829490900039673, "log_odds_ratio": -0.6045647859573364, "logits/chosen": -1.008729100227356, "logits/rejected": -1.1405576467514038, "logps/chosen": -0.9635155200958252, "logps/rejected": -2.6025443077087402, "loss": 1.203, "nll_loss": 1.2704764604568481, "rewards/accuracies": 0.5, "rewards/chosen": -0.09635155647993088, "rewards/margins": 0.1639028787612915, "rewards/rejected": -0.260254442691803, "step": 5724 }, { "epoch": 3.4924508159219156, "grad_norm": 1.2317960262298584, "learning_rate": 2.420085731781996e-06, "log_odds_chosen": 1.0101913213729858, "log_odds_ratio": -0.458856463432312, "logits/chosen": -1.0133885145187378, "logits/rejected": -0.9473148584365845, "logps/chosen": -0.9465727210044861, "logps/rejected": -1.6717414855957031, "loss": 1.1022, "nll_loss": 1.087644100189209, "rewards/accuracies": 0.625, "rewards/chosen": -0.09465727210044861, "rewards/margins": 0.07251686602830887, "rewards/rejected": -0.16717414557933807, "step": 5725 }, { "epoch": 3.4930608509989325, "grad_norm": 4.122796535491943, "learning_rate": 2.4191059399877525e-06, "log_odds_chosen": 3.7447166442871094, "log_odds_ratio": -0.12409698218107224, "logits/chosen": -0.5666991472244263, "logits/rejected": -0.803266167640686, "logps/chosen": -0.5903024077415466, "logps/rejected": -3.611949920654297, "loss": 0.9717, "nll_loss": 0.7508353590965271, "rewards/accuracies": 1.0, "rewards/chosen": -0.05903024226427078, "rewards/margins": 0.3021647334098816, "rewards/rejected": -0.3611949682235718, "step": 5726 }, { "epoch": 3.4936708860759493, "grad_norm": 1.3441513776779175, "learning_rate": 2.418126148193509e-06, "log_odds_chosen": 1.5961790084838867, "log_odds_ratio": -0.49587804079055786, "logits/chosen": -0.772924542427063, "logits/rejected": -0.8939212560653687, "logps/chosen": -0.6296804547309875, "logps/rejected": -1.7128612995147705, "loss": 1.0209, "nll_loss": 0.9878009557723999, "rewards/accuracies": 0.625, "rewards/chosen": -0.06296804547309875, "rewards/margins": 0.10831809043884277, "rewards/rejected": -0.17128613591194153, "step": 5727 }, { "epoch": 3.494280921152966, "grad_norm": 1.8579593896865845, "learning_rate": 2.4171463563992648e-06, "log_odds_chosen": 1.0247207880020142, "log_odds_ratio": -0.5634825825691223, "logits/chosen": -0.8339881896972656, "logits/rejected": -0.7174192667007446, "logps/chosen": -0.641432523727417, "logps/rejected": -1.221308946609497, "loss": 1.0217, "nll_loss": 1.056269645690918, "rewards/accuracies": 0.5, "rewards/chosen": -0.06414325535297394, "rewards/margins": 0.05798764526844025, "rewards/rejected": -0.12213089317083359, "step": 5728 }, { "epoch": 3.494890956229983, "grad_norm": 6.696702003479004, "learning_rate": 2.4161665646050213e-06, "log_odds_chosen": 4.054511547088623, "log_odds_ratio": -0.23459097743034363, "logits/chosen": -0.7940192222595215, "logits/rejected": -0.8833945989608765, "logps/chosen": -0.8240609765052795, "logps/rejected": -4.3172736167907715, "loss": 0.9731, "nll_loss": 0.9886944890022278, "rewards/accuracies": 0.875, "rewards/chosen": -0.08240609616041183, "rewards/margins": 0.34932127594947815, "rewards/rejected": -0.4317273795604706, "step": 5729 }, { "epoch": 3.4955009913070003, "grad_norm": 1.75182044506073, "learning_rate": 2.415186772810778e-06, "log_odds_chosen": 0.08673955500125885, "log_odds_ratio": -0.6747146844863892, "logits/chosen": -0.8377970457077026, "logits/rejected": -0.7985935211181641, "logps/chosen": -0.7226853370666504, "logps/rejected": -0.7937706708908081, "loss": 1.0113, "nll_loss": 1.0776509046554565, "rewards/accuracies": 0.625, "rewards/chosen": -0.0722685381770134, "rewards/margins": 0.007108527701348066, "rewards/rejected": -0.07937707006931305, "step": 5730 }, { "epoch": 3.496111026384017, "grad_norm": 1.5499786138534546, "learning_rate": 2.4142069810165336e-06, "log_odds_chosen": 0.8719148635864258, "log_odds_ratio": -0.6295483112335205, "logits/chosen": -1.0137332677841187, "logits/rejected": -0.8343386650085449, "logps/chosen": -0.9425976276397705, "logps/rejected": -1.6498816013336182, "loss": 1.1463, "nll_loss": 1.1961023807525635, "rewards/accuracies": 0.625, "rewards/chosen": -0.09425976872444153, "rewards/margins": 0.07072839140892029, "rewards/rejected": -0.16498816013336182, "step": 5731 }, { "epoch": 3.496721061461034, "grad_norm": 2.005927562713623, "learning_rate": 2.41322718922229e-06, "log_odds_chosen": 2.36710786819458, "log_odds_ratio": -0.21259810030460358, "logits/chosen": -0.8138324618339539, "logits/rejected": -0.8874285221099854, "logps/chosen": -0.6601486206054688, "logps/rejected": -2.3983042240142822, "loss": 1.0813, "nll_loss": 1.2500766515731812, "rewards/accuracies": 0.875, "rewards/chosen": -0.0660148561000824, "rewards/margins": 0.17381557822227478, "rewards/rejected": -0.23983043432235718, "step": 5732 }, { "epoch": 3.497331096538051, "grad_norm": 2.0087156295776367, "learning_rate": 2.4122473974280467e-06, "log_odds_chosen": 3.0993385314941406, "log_odds_ratio": -0.29711443185806274, "logits/chosen": -0.7654940485954285, "logits/rejected": -1.0399279594421387, "logps/chosen": -0.7627529501914978, "logps/rejected": -3.372948169708252, "loss": 0.9525, "nll_loss": 0.9430761337280273, "rewards/accuracies": 0.875, "rewards/chosen": -0.0762753039598465, "rewards/margins": 0.2610195279121399, "rewards/rejected": -0.3372948169708252, "step": 5733 }, { "epoch": 3.4979411316150677, "grad_norm": 1.2022533416748047, "learning_rate": 2.411267605633803e-06, "log_odds_chosen": 1.9688938856124878, "log_odds_ratio": -0.3899465501308441, "logits/chosen": -0.7889968156814575, "logits/rejected": -0.8584678173065186, "logps/chosen": -0.769818902015686, "logps/rejected": -2.3453307151794434, "loss": 1.0679, "nll_loss": 0.9490201473236084, "rewards/accuracies": 0.875, "rewards/chosen": -0.07698188722133636, "rewards/margins": 0.15755118429660797, "rewards/rejected": -0.23453307151794434, "step": 5734 }, { "epoch": 3.498551166692085, "grad_norm": 1.350428581237793, "learning_rate": 2.410287813839559e-06, "log_odds_chosen": 0.4256228804588318, "log_odds_ratio": -0.5773046016693115, "logits/chosen": -0.6176588535308838, "logits/rejected": -0.6530188918113708, "logps/chosen": -0.7164524793624878, "logps/rejected": -0.9914523363113403, "loss": 0.9391, "nll_loss": 0.8602108359336853, "rewards/accuracies": 0.5, "rewards/chosen": -0.07164525240659714, "rewards/margins": 0.02749999240040779, "rewards/rejected": -0.09914524108171463, "step": 5735 }, { "epoch": 3.499161201769102, "grad_norm": 1.3785345554351807, "learning_rate": 2.4093080220453155e-06, "log_odds_chosen": 2.634242057800293, "log_odds_ratio": -0.44598186016082764, "logits/chosen": -0.9655189514160156, "logits/rejected": -1.068710446357727, "logps/chosen": -0.8340339064598083, "logps/rejected": -3.187213659286499, "loss": 1.0678, "nll_loss": 0.9831897616386414, "rewards/accuracies": 0.75, "rewards/chosen": -0.08340338617563248, "rewards/margins": 0.23531800508499146, "rewards/rejected": -0.31872138381004333, "step": 5736 }, { "epoch": 3.4997712368461187, "grad_norm": 1.5354655981063843, "learning_rate": 2.4083282302510716e-06, "log_odds_chosen": 3.1610913276672363, "log_odds_ratio": -0.27361953258514404, "logits/chosen": -0.7175038456916809, "logits/rejected": -0.9148739576339722, "logps/chosen": -0.5442905426025391, "logps/rejected": -3.011967897415161, "loss": 0.9641, "nll_loss": 0.9055534601211548, "rewards/accuracies": 1.0, "rewards/chosen": -0.054429054260253906, "rewards/margins": 0.24676772952079773, "rewards/rejected": -0.301196813583374, "step": 5737 }, { "epoch": 3.5003812719231355, "grad_norm": 1.9216325283050537, "learning_rate": 2.4073484384568277e-06, "log_odds_chosen": 0.1804232895374298, "log_odds_ratio": -0.7221109867095947, "logits/chosen": -0.9235749244689941, "logits/rejected": -0.9265038371086121, "logps/chosen": -1.0777143239974976, "logps/rejected": -1.2667391300201416, "loss": 1.1457, "nll_loss": 1.1153795719146729, "rewards/accuracies": 0.5, "rewards/chosen": -0.10777144134044647, "rewards/margins": 0.01890246570110321, "rewards/rejected": -0.12667390704154968, "step": 5738 }, { "epoch": 3.5009913070001524, "grad_norm": 1.5120351314544678, "learning_rate": 2.406368646662584e-06, "log_odds_chosen": 3.01572585105896, "log_odds_ratio": -0.24098755419254303, "logits/chosen": -0.5299030542373657, "logits/rejected": -0.7651487588882446, "logps/chosen": -0.5729340314865112, "logps/rejected": -2.936685085296631, "loss": 0.823, "nll_loss": 0.7222907543182373, "rewards/accuracies": 0.875, "rewards/chosen": -0.057293400168418884, "rewards/margins": 0.23637515306472778, "rewards/rejected": -0.2936685085296631, "step": 5739 }, { "epoch": 3.501601342077169, "grad_norm": 3.471466541290283, "learning_rate": 2.4053888548683404e-06, "log_odds_chosen": 2.6393985748291016, "log_odds_ratio": -0.262239933013916, "logits/chosen": -0.7788829207420349, "logits/rejected": -1.0017495155334473, "logps/chosen": -0.5791606903076172, "logps/rejected": -2.5240468978881836, "loss": 0.8947, "nll_loss": 0.8578484058380127, "rewards/accuracies": 0.875, "rewards/chosen": -0.0579160712659359, "rewards/margins": 0.19448862969875336, "rewards/rejected": -0.25240471959114075, "step": 5740 }, { "epoch": 3.5022113771541865, "grad_norm": 1.3935614824295044, "learning_rate": 2.404409063074097e-06, "log_odds_chosen": 1.0801572799682617, "log_odds_ratio": -0.46350693702697754, "logits/chosen": -0.8110985159873962, "logits/rejected": -0.7868990302085876, "logps/chosen": -0.8671576380729675, "logps/rejected": -1.6364275217056274, "loss": 0.941, "nll_loss": 1.0760833024978638, "rewards/accuracies": 0.75, "rewards/chosen": -0.08671576529741287, "rewards/margins": 0.07692698389291763, "rewards/rejected": -0.1636427640914917, "step": 5741 }, { "epoch": 3.5028214122312034, "grad_norm": 3.4068899154663086, "learning_rate": 2.4034292712798526e-06, "log_odds_chosen": 2.506409168243408, "log_odds_ratio": -0.30017751455307007, "logits/chosen": -0.6415352821350098, "logits/rejected": -0.9196262359619141, "logps/chosen": -0.6847105622291565, "logps/rejected": -2.563544988632202, "loss": 0.9555, "nll_loss": 0.7955541014671326, "rewards/accuracies": 0.875, "rewards/chosen": -0.06847105920314789, "rewards/margins": 0.18788345158100128, "rewards/rejected": -0.25635451078414917, "step": 5742 }, { "epoch": 3.50343144730822, "grad_norm": 4.762058258056641, "learning_rate": 2.402449479485609e-06, "log_odds_chosen": 1.645798683166504, "log_odds_ratio": -0.5833711624145508, "logits/chosen": -1.033772349357605, "logits/rejected": -1.2319132089614868, "logps/chosen": -0.9248160719871521, "logps/rejected": -2.3462884426116943, "loss": 1.3007, "nll_loss": 1.7604070901870728, "rewards/accuracies": 0.75, "rewards/chosen": -0.09248160570859909, "rewards/margins": 0.1421472579240799, "rewards/rejected": -0.23462887108325958, "step": 5743 }, { "epoch": 3.504041482385237, "grad_norm": 1.5106627941131592, "learning_rate": 2.4014696876913657e-06, "log_odds_chosen": 2.955353021621704, "log_odds_ratio": -0.2950681149959564, "logits/chosen": -0.7672443985939026, "logits/rejected": -1.0067946910858154, "logps/chosen": -0.7670876979827881, "logps/rejected": -3.046895980834961, "loss": 1.2653, "nll_loss": 1.0261857509613037, "rewards/accuracies": 0.875, "rewards/chosen": -0.07670877128839493, "rewards/margins": 0.2279808074235916, "rewards/rejected": -0.30468958616256714, "step": 5744 }, { "epoch": 3.5046515174622543, "grad_norm": 2.133901834487915, "learning_rate": 2.4004898958971214e-06, "log_odds_chosen": 0.6998412609100342, "log_odds_ratio": -0.4500149190425873, "logits/chosen": -0.7510459423065186, "logits/rejected": -0.7424134016036987, "logps/chosen": -0.7817412614822388, "logps/rejected": -1.120229959487915, "loss": 0.9949, "nll_loss": 1.0866382122039795, "rewards/accuracies": 0.75, "rewards/chosen": -0.07817412167787552, "rewards/margins": 0.03384887054562569, "rewards/rejected": -0.1120229959487915, "step": 5745 }, { "epoch": 3.505261552539271, "grad_norm": 3.9100520610809326, "learning_rate": 2.399510104102878e-06, "log_odds_chosen": 3.5978119373321533, "log_odds_ratio": -0.41129013895988464, "logits/chosen": -0.5969852805137634, "logits/rejected": -0.7934772968292236, "logps/chosen": -0.8500030040740967, "logps/rejected": -4.048742294311523, "loss": 1.1352, "nll_loss": 1.0361875295639038, "rewards/accuracies": 0.75, "rewards/chosen": -0.08500029146671295, "rewards/margins": 0.31987395882606506, "rewards/rejected": -0.4048742651939392, "step": 5746 }, { "epoch": 3.505871587616288, "grad_norm": 1.6632413864135742, "learning_rate": 2.3985303123086345e-06, "log_odds_chosen": 1.4395546913146973, "log_odds_ratio": -0.654457688331604, "logits/chosen": -0.7508809566497803, "logits/rejected": -0.7890879511833191, "logps/chosen": -0.714935302734375, "logps/rejected": -1.9513750076293945, "loss": 0.9977, "nll_loss": 0.977620005607605, "rewards/accuracies": 0.875, "rewards/chosen": -0.07149352878332138, "rewards/margins": 0.12364397943019867, "rewards/rejected": -0.19513751566410065, "step": 5747 }, { "epoch": 3.506481622693305, "grad_norm": 1.0103360414505005, "learning_rate": 2.3975505205143906e-06, "log_odds_chosen": 2.1847331523895264, "log_odds_ratio": -0.3945035934448242, "logits/chosen": -0.7411497831344604, "logits/rejected": -0.8847759366035461, "logps/chosen": -0.7874917387962341, "logps/rejected": -2.5597946643829346, "loss": 1.023, "nll_loss": 0.9704504013061523, "rewards/accuracies": 0.75, "rewards/chosen": -0.0787491723895073, "rewards/margins": 0.17723028361797333, "rewards/rejected": -0.2559794783592224, "step": 5748 }, { "epoch": 3.5070916577703217, "grad_norm": 2.9174082279205322, "learning_rate": 2.3965707287201467e-06, "log_odds_chosen": 0.9072583317756653, "log_odds_ratio": -0.5968359112739563, "logits/chosen": -0.9940952062606812, "logits/rejected": -1.0388474464416504, "logps/chosen": -1.0316249132156372, "logps/rejected": -1.7532374858856201, "loss": 1.103, "nll_loss": 1.3348407745361328, "rewards/accuracies": 0.5, "rewards/chosen": -0.103162482380867, "rewards/margins": 0.0721612498164177, "rewards/rejected": -0.1753237396478653, "step": 5749 }, { "epoch": 3.5077016928473386, "grad_norm": 1.5325568914413452, "learning_rate": 2.3955909369259033e-06, "log_odds_chosen": 2.5447216033935547, "log_odds_ratio": -0.2865976095199585, "logits/chosen": -0.6897954344749451, "logits/rejected": -0.8278074860572815, "logps/chosen": -0.8578627109527588, "logps/rejected": -2.853863000869751, "loss": 0.99, "nll_loss": 0.7898193001747131, "rewards/accuracies": 0.875, "rewards/chosen": -0.08578627556562424, "rewards/margins": 0.19960002601146698, "rewards/rejected": -0.285386323928833, "step": 5750 }, { "epoch": 3.5083117279243554, "grad_norm": 2.8514113426208496, "learning_rate": 2.3946111451316594e-06, "log_odds_chosen": 1.8072426319122314, "log_odds_ratio": -0.5111616253852844, "logits/chosen": -1.003101110458374, "logits/rejected": -1.1006296873092651, "logps/chosen": -0.757139265537262, "logps/rejected": -2.1761114597320557, "loss": 0.9051, "nll_loss": 0.8404116630554199, "rewards/accuracies": 0.75, "rewards/chosen": -0.07571392506361008, "rewards/margins": 0.14189721643924713, "rewards/rejected": -0.2176111340522766, "step": 5751 }, { "epoch": 3.5089217630013727, "grad_norm": 1.3665211200714111, "learning_rate": 2.3936313533374155e-06, "log_odds_chosen": 1.0102763175964355, "log_odds_ratio": -0.5047630071640015, "logits/chosen": -1.063993215560913, "logits/rejected": -0.9865540266036987, "logps/chosen": -0.8198266625404358, "logps/rejected": -1.5069124698638916, "loss": 1.0418, "nll_loss": 1.224222183227539, "rewards/accuracies": 0.75, "rewards/chosen": -0.08198267221450806, "rewards/margins": 0.06870858371257782, "rewards/rejected": -0.15069125592708588, "step": 5752 }, { "epoch": 3.5095317980783896, "grad_norm": 1.2223472595214844, "learning_rate": 2.392651561543172e-06, "log_odds_chosen": 0.9986308217048645, "log_odds_ratio": -0.4841228723526001, "logits/chosen": -0.8560081124305725, "logits/rejected": -0.8265750408172607, "logps/chosen": -0.7403113842010498, "logps/rejected": -1.4881805181503296, "loss": 0.9443, "nll_loss": 1.0036516189575195, "rewards/accuracies": 0.75, "rewards/chosen": -0.07403114438056946, "rewards/margins": 0.07478691637516022, "rewards/rejected": -0.14881806075572968, "step": 5753 }, { "epoch": 3.5101418331554064, "grad_norm": 1.9084316492080688, "learning_rate": 2.391671769748928e-06, "log_odds_chosen": 2.2953317165374756, "log_odds_ratio": -0.37043529748916626, "logits/chosen": -0.8330533504486084, "logits/rejected": -0.8613204956054688, "logps/chosen": -0.5649933218955994, "logps/rejected": -2.084590435028076, "loss": 0.9859, "nll_loss": 1.0004585981369019, "rewards/accuracies": 0.625, "rewards/chosen": -0.056499332189559937, "rewards/margins": 0.15195973217487335, "rewards/rejected": -0.2084590643644333, "step": 5754 }, { "epoch": 3.5107518682324232, "grad_norm": 7.16130256652832, "learning_rate": 2.3906919779546847e-06, "log_odds_chosen": 2.2230184078216553, "log_odds_ratio": -0.34343791007995605, "logits/chosen": -0.7101759314537048, "logits/rejected": -0.7289727330207825, "logps/chosen": -0.9155513048171997, "logps/rejected": -2.7737913131713867, "loss": 1.2415, "nll_loss": 0.9819543361663818, "rewards/accuracies": 0.75, "rewards/chosen": -0.09155513346195221, "rewards/margins": 0.18582400679588318, "rewards/rejected": -0.2773791551589966, "step": 5755 }, { "epoch": 3.5113619033094405, "grad_norm": 1.1981825828552246, "learning_rate": 2.389712186160441e-06, "log_odds_chosen": 1.9954774379730225, "log_odds_ratio": -0.44506537914276123, "logits/chosen": -0.8584541082382202, "logits/rejected": -0.8900192379951477, "logps/chosen": -0.7621168494224548, "logps/rejected": -2.3538503646850586, "loss": 1.2267, "nll_loss": 1.1083273887634277, "rewards/accuracies": 0.625, "rewards/chosen": -0.07621168345212936, "rewards/margins": 0.1591733694076538, "rewards/rejected": -0.23538506031036377, "step": 5756 }, { "epoch": 3.5119719383864574, "grad_norm": 6.7275166511535645, "learning_rate": 2.388732394366197e-06, "log_odds_chosen": 1.000161051750183, "log_odds_ratio": -0.45324185490608215, "logits/chosen": -0.7753067016601562, "logits/rejected": -0.6452480554580688, "logps/chosen": -0.7336984872817993, "logps/rejected": -1.288575530052185, "loss": 1.1582, "nll_loss": 1.018149733543396, "rewards/accuracies": 0.75, "rewards/chosen": -0.07336985319852829, "rewards/margins": 0.055487699806690216, "rewards/rejected": -0.1288575530052185, "step": 5757 }, { "epoch": 3.5125819734634742, "grad_norm": 1.3973227739334106, "learning_rate": 2.3877526025719535e-06, "log_odds_chosen": 2.2797577381134033, "log_odds_ratio": -0.37440162897109985, "logits/chosen": -1.0015138387680054, "logits/rejected": -1.0580919981002808, "logps/chosen": -0.9006911516189575, "logps/rejected": -2.794353723526001, "loss": 0.951, "nll_loss": 1.0622292757034302, "rewards/accuracies": 0.75, "rewards/chosen": -0.09006912261247635, "rewards/margins": 0.18936628103256226, "rewards/rejected": -0.279435396194458, "step": 5758 }, { "epoch": 3.513192008540491, "grad_norm": 1.7897759675979614, "learning_rate": 2.3867728107777092e-06, "log_odds_chosen": 1.2689040899276733, "log_odds_ratio": -0.6371469497680664, "logits/chosen": -1.0832743644714355, "logits/rejected": -1.0121357440948486, "logps/chosen": -1.0776320695877075, "logps/rejected": -2.2645859718322754, "loss": 1.0149, "nll_loss": 1.121111273765564, "rewards/accuracies": 0.625, "rewards/chosen": -0.10776320099830627, "rewards/margins": 0.11869540065526962, "rewards/rejected": -0.2264586091041565, "step": 5759 }, { "epoch": 3.513802043617508, "grad_norm": 1.3908573389053345, "learning_rate": 2.3857930189834658e-06, "log_odds_chosen": 3.591064453125, "log_odds_ratio": -0.4297131597995758, "logits/chosen": -1.158839225769043, "logits/rejected": -1.0605815649032593, "logps/chosen": -1.0102713108062744, "logps/rejected": -4.346130847930908, "loss": 1.0929, "nll_loss": 1.1906121969223022, "rewards/accuracies": 0.625, "rewards/chosen": -0.10102713853120804, "rewards/margins": 0.3335859775543213, "rewards/rejected": -0.43461310863494873, "step": 5760 }, { "epoch": 3.5144120786945248, "grad_norm": 1.7687909603118896, "learning_rate": 2.3848132271892223e-06, "log_odds_chosen": 1.5669598579406738, "log_odds_ratio": -0.40586036443710327, "logits/chosen": -0.6358143091201782, "logits/rejected": -0.605492115020752, "logps/chosen": -0.7181675434112549, "logps/rejected": -1.435018539428711, "loss": 0.9905, "nll_loss": 0.9651561975479126, "rewards/accuracies": 0.625, "rewards/chosen": -0.07181675732135773, "rewards/margins": 0.07168509811162949, "rewards/rejected": -0.14350184798240662, "step": 5761 }, { "epoch": 3.5150221137715416, "grad_norm": 1.2979282140731812, "learning_rate": 2.3838334353949784e-06, "log_odds_chosen": 0.7521874308586121, "log_odds_ratio": -0.576697587966919, "logits/chosen": -0.9817047119140625, "logits/rejected": -1.0241858959197998, "logps/chosen": -0.9707831144332886, "logps/rejected": -1.5648672580718994, "loss": 1.0868, "nll_loss": 1.1152315139770508, "rewards/accuracies": 0.625, "rewards/chosen": -0.09707832336425781, "rewards/margins": 0.059408415108919144, "rewards/rejected": -0.15648673474788666, "step": 5762 }, { "epoch": 3.515632148848559, "grad_norm": 7.1058478355407715, "learning_rate": 2.3828536436007346e-06, "log_odds_chosen": 1.9997553825378418, "log_odds_ratio": -0.3236050009727478, "logits/chosen": -0.554958164691925, "logits/rejected": -0.6654596328735352, "logps/chosen": -0.459354966878891, "logps/rejected": -1.8747810125350952, "loss": 1.0993, "nll_loss": 0.8921706676483154, "rewards/accuracies": 0.875, "rewards/chosen": -0.0459355004131794, "rewards/margins": 0.14154259860515594, "rewards/rejected": -0.18747809529304504, "step": 5763 }, { "epoch": 3.5162421839255757, "grad_norm": 1.3037117719650269, "learning_rate": 2.381873851806491e-06, "log_odds_chosen": 0.7556501626968384, "log_odds_ratio": -0.6256756782531738, "logits/chosen": -1.0395193099975586, "logits/rejected": -0.8895723223686218, "logps/chosen": -0.9873907566070557, "logps/rejected": -1.5970423221588135, "loss": 0.9975, "nll_loss": 1.233154058456421, "rewards/accuracies": 0.625, "rewards/chosen": -0.09873908013105392, "rewards/margins": 0.0609651580452919, "rewards/rejected": -0.15970423817634583, "step": 5764 }, { "epoch": 3.5168522190025926, "grad_norm": 1.6289011240005493, "learning_rate": 2.3808940600122472e-06, "log_odds_chosen": 1.7101631164550781, "log_odds_ratio": -0.3841676712036133, "logits/chosen": -0.9563059210777283, "logits/rejected": -0.9850496053695679, "logps/chosen": -0.8605579137802124, "logps/rejected": -2.038233518600464, "loss": 1.152, "nll_loss": 1.058590054512024, "rewards/accuracies": 0.625, "rewards/chosen": -0.08605580031871796, "rewards/margins": 0.1177675724029541, "rewards/rejected": -0.20382335782051086, "step": 5765 }, { "epoch": 3.5174622540796094, "grad_norm": 2.761246919631958, "learning_rate": 2.3799142682180034e-06, "log_odds_chosen": 1.8332509994506836, "log_odds_ratio": -0.4915458559989929, "logits/chosen": -0.93975430727005, "logits/rejected": -1.0060080289840698, "logps/chosen": -0.7853153944015503, "logps/rejected": -2.290095806121826, "loss": 1.0047, "nll_loss": 1.0444316864013672, "rewards/accuracies": 0.5, "rewards/chosen": -0.07853154093027115, "rewards/margins": 0.1504780352115631, "rewards/rejected": -0.22900956869125366, "step": 5766 }, { "epoch": 3.5180722891566267, "grad_norm": 2.0577728748321533, "learning_rate": 2.37893447642376e-06, "log_odds_chosen": 2.424539089202881, "log_odds_ratio": -0.31515005230903625, "logits/chosen": -0.7687896490097046, "logits/rejected": -0.9643175601959229, "logps/chosen": -0.6988274455070496, "logps/rejected": -2.745479106903076, "loss": 1.0819, "nll_loss": 1.0128939151763916, "rewards/accuracies": 0.875, "rewards/chosen": -0.06988274306058884, "rewards/margins": 0.2046651691198349, "rewards/rejected": -0.27454790472984314, "step": 5767 }, { "epoch": 3.5186823242336436, "grad_norm": 1.4829559326171875, "learning_rate": 2.377954684629516e-06, "log_odds_chosen": 1.1626297235488892, "log_odds_ratio": -0.40097880363464355, "logits/chosen": -0.9011956453323364, "logits/rejected": -0.7464808821678162, "logps/chosen": -0.7904651165008545, "logps/rejected": -1.662610411643982, "loss": 1.0317, "nll_loss": 1.004298210144043, "rewards/accuracies": 0.875, "rewards/chosen": -0.07904651015996933, "rewards/margins": 0.08721453696489334, "rewards/rejected": -0.16626104712486267, "step": 5768 }, { "epoch": 3.5192923593106604, "grad_norm": 2.865088939666748, "learning_rate": 2.3769748928352726e-06, "log_odds_chosen": 2.4707632064819336, "log_odds_ratio": -0.3870689272880554, "logits/chosen": -0.8665060997009277, "logits/rejected": -0.9576132893562317, "logps/chosen": -0.8008850812911987, "logps/rejected": -2.7665069103240967, "loss": 1.216, "nll_loss": 1.127351999282837, "rewards/accuracies": 0.875, "rewards/chosen": -0.08008851110935211, "rewards/margins": 0.19656220078468323, "rewards/rejected": -0.27665069699287415, "step": 5769 }, { "epoch": 3.5199023943876773, "grad_norm": 1.955454707145691, "learning_rate": 2.3759951010410287e-06, "log_odds_chosen": 0.7723754048347473, "log_odds_ratio": -0.5893567204475403, "logits/chosen": -1.0460827350616455, "logits/rejected": -0.7569010257720947, "logps/chosen": -0.792807936668396, "logps/rejected": -1.2845125198364258, "loss": 1.1722, "nll_loss": 0.9304349422454834, "rewards/accuracies": 0.5, "rewards/chosen": -0.0792807936668396, "rewards/margins": 0.04917046055197716, "rewards/rejected": -0.12845125794410706, "step": 5770 }, { "epoch": 3.520512429464694, "grad_norm": 3.5461411476135254, "learning_rate": 2.375015309246785e-06, "log_odds_chosen": 2.2493720054626465, "log_odds_ratio": -0.35630378127098083, "logits/chosen": -0.6765172481536865, "logits/rejected": -0.8468063473701477, "logps/chosen": -0.6503623723983765, "logps/rejected": -2.440877676010132, "loss": 0.9352, "nll_loss": 0.8238803148269653, "rewards/accuracies": 1.0, "rewards/chosen": -0.06503623723983765, "rewards/margins": 0.17905153334140778, "rewards/rejected": -0.24408778548240662, "step": 5771 }, { "epoch": 3.521122464541711, "grad_norm": 1.664808750152588, "learning_rate": 2.3740355174525414e-06, "log_odds_chosen": 2.384197235107422, "log_odds_ratio": -0.3322703540325165, "logits/chosen": -0.9403049945831299, "logits/rejected": -1.0446040630340576, "logps/chosen": -0.7906728982925415, "logps/rejected": -2.774731159210205, "loss": 1.0521, "nll_loss": 0.9709972143173218, "rewards/accuracies": 0.875, "rewards/chosen": -0.07906728237867355, "rewards/margins": 0.19840580224990845, "rewards/rejected": -0.2774730920791626, "step": 5772 }, { "epoch": 3.521732499618728, "grad_norm": 1.5810216665267944, "learning_rate": 2.373055725658298e-06, "log_odds_chosen": 2.523721933364868, "log_odds_ratio": -0.42072322964668274, "logits/chosen": -0.7821703553199768, "logits/rejected": -1.012600302696228, "logps/chosen": -0.8628053069114685, "logps/rejected": -2.7465829849243164, "loss": 1.0302, "nll_loss": 1.0311168432235718, "rewards/accuracies": 0.75, "rewards/chosen": -0.08628053218126297, "rewards/margins": 0.1883777379989624, "rewards/rejected": -0.2746582627296448, "step": 5773 }, { "epoch": 3.522342534695745, "grad_norm": 1.3009555339813232, "learning_rate": 2.3720759338640536e-06, "log_odds_chosen": 0.6581109166145325, "log_odds_ratio": -0.6147133111953735, "logits/chosen": -0.968171238899231, "logits/rejected": -0.9838390350341797, "logps/chosen": -0.9031755328178406, "logps/rejected": -1.3285512924194336, "loss": 0.9071, "nll_loss": 0.9577902555465698, "rewards/accuracies": 0.625, "rewards/chosen": -0.09031756222248077, "rewards/margins": 0.04253757745027542, "rewards/rejected": -0.1328551471233368, "step": 5774 }, { "epoch": 3.522952569772762, "grad_norm": 1.5513246059417725, "learning_rate": 2.37109614206981e-06, "log_odds_chosen": 2.702812910079956, "log_odds_ratio": -0.36371511220932007, "logits/chosen": -0.7294642329216003, "logits/rejected": -0.874722957611084, "logps/chosen": -0.6811054944992065, "logps/rejected": -2.567075729370117, "loss": 1.0178, "nll_loss": 1.0767909288406372, "rewards/accuracies": 0.875, "rewards/chosen": -0.06811055541038513, "rewards/margins": 0.18859705328941345, "rewards/rejected": -0.2567076086997986, "step": 5775 }, { "epoch": 3.523562604849779, "grad_norm": 3.1871445178985596, "learning_rate": 2.3701163502755667e-06, "log_odds_chosen": 1.3081281185150146, "log_odds_ratio": -0.5827027559280396, "logits/chosen": -0.7772207856178284, "logits/rejected": -0.9070736169815063, "logps/chosen": -0.8592334389686584, "logps/rejected": -1.863701581954956, "loss": 1.1441, "nll_loss": 1.1429749727249146, "rewards/accuracies": 0.625, "rewards/chosen": -0.08592334389686584, "rewards/margins": 0.10044681280851364, "rewards/rejected": -0.18637016415596008, "step": 5776 }, { "epoch": 3.5241726399267956, "grad_norm": 7.758515357971191, "learning_rate": 2.3691365584813224e-06, "log_odds_chosen": 1.0099529027938843, "log_odds_ratio": -0.4470921754837036, "logits/chosen": -0.8508726954460144, "logits/rejected": -0.9238959550857544, "logps/chosen": -1.049487829208374, "logps/rejected": -1.9081906080245972, "loss": 1.1135, "nll_loss": 1.268528699874878, "rewards/accuracies": 0.875, "rewards/chosen": -0.10494878888130188, "rewards/margins": 0.08587028086185455, "rewards/rejected": -0.19081908464431763, "step": 5777 }, { "epoch": 3.524782675003813, "grad_norm": 1.289114236831665, "learning_rate": 2.368156766687079e-06, "log_odds_chosen": 4.663220405578613, "log_odds_ratio": -0.18470299243927002, "logits/chosen": -0.7286839485168457, "logits/rejected": -1.0207257270812988, "logps/chosen": -0.6489932537078857, "logps/rejected": -4.573887825012207, "loss": 1.0419, "nll_loss": 0.7571126222610474, "rewards/accuracies": 0.875, "rewards/chosen": -0.06489932537078857, "rewards/margins": 0.392489492893219, "rewards/rejected": -0.45738881826400757, "step": 5778 }, { "epoch": 3.5253927100808298, "grad_norm": 9.813863754272461, "learning_rate": 2.367176974892835e-06, "log_odds_chosen": 1.1931501626968384, "log_odds_ratio": -0.5649059414863586, "logits/chosen": -0.800485372543335, "logits/rejected": -0.7452658414840698, "logps/chosen": -0.7788376808166504, "logps/rejected": -1.7897839546203613, "loss": 1.276, "nll_loss": 1.2130956649780273, "rewards/accuracies": 0.625, "rewards/chosen": -0.0778837725520134, "rewards/margins": 0.10109463334083557, "rewards/rejected": -0.17897838354110718, "step": 5779 }, { "epoch": 3.5260027451578466, "grad_norm": 2.1781845092773438, "learning_rate": 2.366197183098591e-06, "log_odds_chosen": 2.5394885540008545, "log_odds_ratio": -0.4808395802974701, "logits/chosen": -1.1215596199035645, "logits/rejected": -1.1655644178390503, "logps/chosen": -1.0288219451904297, "logps/rejected": -3.1079118251800537, "loss": 1.2503, "nll_loss": 1.277034878730774, "rewards/accuracies": 0.75, "rewards/chosen": -0.10288220643997192, "rewards/margins": 0.2079089879989624, "rewards/rejected": -0.3107911944389343, "step": 5780 }, { "epoch": 3.5266127802348635, "grad_norm": 2.0839710235595703, "learning_rate": 2.3652173913043477e-06, "log_odds_chosen": 3.360058307647705, "log_odds_ratio": -0.21549290418624878, "logits/chosen": -0.8341379761695862, "logits/rejected": -1.1593971252441406, "logps/chosen": -0.495037317276001, "logps/rejected": -2.9176993370056152, "loss": 0.8998, "nll_loss": 0.7866654396057129, "rewards/accuracies": 1.0, "rewards/chosen": -0.049503736197948456, "rewards/margins": 0.2422662079334259, "rewards/rejected": -0.29176992177963257, "step": 5781 }, { "epoch": 3.5272228153118803, "grad_norm": 1.2572816610336304, "learning_rate": 2.364237599510104e-06, "log_odds_chosen": 1.8928406238555908, "log_odds_ratio": -0.5057631731033325, "logits/chosen": -0.8956714272499084, "logits/rejected": -0.9229623675346375, "logps/chosen": -0.6375929117202759, "logps/rejected": -2.1348111629486084, "loss": 1.0685, "nll_loss": 0.9828881025314331, "rewards/accuracies": 0.5, "rewards/chosen": -0.06375928968191147, "rewards/margins": 0.14972181618213654, "rewards/rejected": -0.2134811282157898, "step": 5782 }, { "epoch": 3.527832850388897, "grad_norm": 1.0786346197128296, "learning_rate": 2.3632578077158604e-06, "log_odds_chosen": 0.8486709594726562, "log_odds_ratio": -0.4939180016517639, "logits/chosen": -1.002240777015686, "logits/rejected": -1.1023340225219727, "logps/chosen": -0.9406625032424927, "logps/rejected": -1.5173759460449219, "loss": 1.2243, "nll_loss": 1.0347427129745483, "rewards/accuracies": 0.625, "rewards/chosen": -0.09406626224517822, "rewards/margins": 0.057671334594488144, "rewards/rejected": -0.15173760056495667, "step": 5783 }, { "epoch": 3.5284428854659144, "grad_norm": 1.109370470046997, "learning_rate": 2.3622780159216165e-06, "log_odds_chosen": 3.587952136993408, "log_odds_ratio": -0.15528151392936707, "logits/chosen": -0.614722728729248, "logits/rejected": -1.0299004316329956, "logps/chosen": -0.49516820907592773, "logps/rejected": -3.2102913856506348, "loss": 0.9772, "nll_loss": 0.6399188041687012, "rewards/accuracies": 1.0, "rewards/chosen": -0.049516819417476654, "rewards/margins": 0.27151229977607727, "rewards/rejected": -0.3210291266441345, "step": 5784 }, { "epoch": 3.5290529205429313, "grad_norm": 2.1750495433807373, "learning_rate": 2.3612982241273727e-06, "log_odds_chosen": 0.6580496430397034, "log_odds_ratio": -0.6290104389190674, "logits/chosen": -1.058100700378418, "logits/rejected": -0.9521109461784363, "logps/chosen": -1.0734124183654785, "logps/rejected": -1.5207480192184448, "loss": 1.223, "nll_loss": 1.2486779689788818, "rewards/accuracies": 0.75, "rewards/chosen": -0.10734124481678009, "rewards/margins": 0.04473356530070305, "rewards/rejected": -0.15207479894161224, "step": 5785 }, { "epoch": 3.529662955619948, "grad_norm": 17.335735321044922, "learning_rate": 2.360318432333129e-06, "log_odds_chosen": 2.2578141689300537, "log_odds_ratio": -0.35399913787841797, "logits/chosen": -0.753444492816925, "logits/rejected": -0.8747996091842651, "logps/chosen": -0.5167132616043091, "logps/rejected": -2.222022533416748, "loss": 1.0147, "nll_loss": 0.7948046922683716, "rewards/accuracies": 0.875, "rewards/chosen": -0.05167132988572121, "rewards/margins": 0.17053091526031494, "rewards/rejected": -0.22220224142074585, "step": 5786 }, { "epoch": 3.530272990696965, "grad_norm": 6.413908004760742, "learning_rate": 2.3593386405388857e-06, "log_odds_chosen": 2.0940682888031006, "log_odds_ratio": -0.33914169669151306, "logits/chosen": -0.6523807048797607, "logits/rejected": -0.9237854480743408, "logps/chosen": -0.6439217329025269, "logps/rejected": -2.2767794132232666, "loss": 0.8843, "nll_loss": 0.7566554546356201, "rewards/accuracies": 0.875, "rewards/chosen": -0.06439217180013657, "rewards/margins": 0.1632857769727707, "rewards/rejected": -0.22767795622348785, "step": 5787 }, { "epoch": 3.5308830257739823, "grad_norm": 3.876974582672119, "learning_rate": 2.3583588487446414e-06, "log_odds_chosen": 1.3394932746887207, "log_odds_ratio": -0.3144131600856781, "logits/chosen": -0.7219918370246887, "logits/rejected": -0.8581528663635254, "logps/chosen": -0.7737135887145996, "logps/rejected": -1.6912599802017212, "loss": 0.93, "nll_loss": 0.8963099718093872, "rewards/accuracies": 0.875, "rewards/chosen": -0.07737136632204056, "rewards/margins": 0.09175463765859604, "rewards/rejected": -0.1691259890794754, "step": 5788 }, { "epoch": 3.531493060850999, "grad_norm": 1.1357316970825195, "learning_rate": 2.357379056950398e-06, "log_odds_chosen": 1.8407998085021973, "log_odds_ratio": -0.4958499073982239, "logits/chosen": -0.8102920055389404, "logits/rejected": -0.8793896436691284, "logps/chosen": -0.8071353435516357, "logps/rejected": -2.3495683670043945, "loss": 0.8732, "nll_loss": 0.9985498189926147, "rewards/accuracies": 0.625, "rewards/chosen": -0.08071352541446686, "rewards/margins": 0.15424329042434692, "rewards/rejected": -0.23495683073997498, "step": 5789 }, { "epoch": 3.532103095928016, "grad_norm": 12.709980964660645, "learning_rate": 2.3563992651561545e-06, "log_odds_chosen": 1.4517531394958496, "log_odds_ratio": -0.5049448609352112, "logits/chosen": -1.0681856870651245, "logits/rejected": -1.0806841850280762, "logps/chosen": -1.1604335308074951, "logps/rejected": -2.3363163471221924, "loss": 1.2181, "nll_loss": 1.3030450344085693, "rewards/accuracies": 0.75, "rewards/chosen": -0.11604335159063339, "rewards/margins": 0.11758831143379211, "rewards/rejected": -0.2336316555738449, "step": 5790 }, { "epoch": 3.532713131005033, "grad_norm": 1.5235052108764648, "learning_rate": 2.3554194733619102e-06, "log_odds_chosen": 2.1809186935424805, "log_odds_ratio": -0.30324822664260864, "logits/chosen": -0.8209834694862366, "logits/rejected": -1.057734727859497, "logps/chosen": -0.6675618886947632, "logps/rejected": -2.2395431995391846, "loss": 1.2211, "nll_loss": 0.8828112483024597, "rewards/accuracies": 0.75, "rewards/chosen": -0.06675618886947632, "rewards/margins": 0.15719811618328094, "rewards/rejected": -0.22395430505275726, "step": 5791 }, { "epoch": 3.5333231660820497, "grad_norm": 1.3549230098724365, "learning_rate": 2.3544396815676668e-06, "log_odds_chosen": 3.4210925102233887, "log_odds_ratio": -0.19866858422756195, "logits/chosen": -0.8746600151062012, "logits/rejected": -1.1430230140686035, "logps/chosen": -0.6270956993103027, "logps/rejected": -3.2633352279663086, "loss": 1.0724, "nll_loss": 0.9518591165542603, "rewards/accuracies": 0.875, "rewards/chosen": -0.06270956993103027, "rewards/margins": 0.2636239230632782, "rewards/rejected": -0.32633352279663086, "step": 5792 }, { "epoch": 3.5339332011590665, "grad_norm": 1.462868094444275, "learning_rate": 2.3534598897734233e-06, "log_odds_chosen": 2.299680471420288, "log_odds_ratio": -0.4745331108570099, "logits/chosen": -0.8635115623474121, "logits/rejected": -0.8787470459938049, "logps/chosen": -0.6667590737342834, "logps/rejected": -2.3344662189483643, "loss": 1.1067, "nll_loss": 1.0220129489898682, "rewards/accuracies": 0.5, "rewards/chosen": -0.06667590886354446, "rewards/margins": 0.16677071154117584, "rewards/rejected": -0.2334466278553009, "step": 5793 }, { "epoch": 3.5345432362360834, "grad_norm": 1.0914041996002197, "learning_rate": 2.3524800979791794e-06, "log_odds_chosen": 0.0817214697599411, "log_odds_ratio": -0.712746262550354, "logits/chosen": -0.8615940809249878, "logits/rejected": -0.8446167707443237, "logps/chosen": -0.9121575951576233, "logps/rejected": -0.9719245433807373, "loss": 1.1557, "nll_loss": 1.0043706893920898, "rewards/accuracies": 0.375, "rewards/chosen": -0.09121576696634293, "rewards/margins": 0.0059766932390630245, "rewards/rejected": -0.09719245880842209, "step": 5794 }, { "epoch": 3.5351532713131006, "grad_norm": 22.450090408325195, "learning_rate": 2.3515003061849356e-06, "log_odds_chosen": 0.16463804244995117, "log_odds_ratio": -0.9800240993499756, "logits/chosen": -1.2989606857299805, "logits/rejected": -1.2392356395721436, "logps/chosen": -2.052781820297241, "logps/rejected": -2.093846559524536, "loss": 1.149, "nll_loss": 1.338444709777832, "rewards/accuracies": 0.5, "rewards/chosen": -0.2052781879901886, "rewards/margins": 0.004106465727090836, "rewards/rejected": -0.20938466489315033, "step": 5795 }, { "epoch": 3.5357633063901175, "grad_norm": 4.698184013366699, "learning_rate": 2.3505205143906917e-06, "log_odds_chosen": 1.296976089477539, "log_odds_ratio": -0.7053055167198181, "logits/chosen": -1.0010433197021484, "logits/rejected": -0.9646135568618774, "logps/chosen": -0.806525707244873, "logps/rejected": -1.8796424865722656, "loss": 1.0487, "nll_loss": 0.8970664143562317, "rewards/accuracies": 0.5, "rewards/chosen": -0.08065256476402283, "rewards/margins": 0.1073116809129715, "rewards/rejected": -0.18796426057815552, "step": 5796 }, { "epoch": 3.5363733414671343, "grad_norm": 1.671051025390625, "learning_rate": 2.3495407225964482e-06, "log_odds_chosen": 2.000865936279297, "log_odds_ratio": -0.25583693385124207, "logits/chosen": -0.9928364157676697, "logits/rejected": -1.0914798974990845, "logps/chosen": -0.8649153709411621, "logps/rejected": -2.3310389518737793, "loss": 1.0762, "nll_loss": 1.042576551437378, "rewards/accuracies": 0.875, "rewards/chosen": -0.08649154007434845, "rewards/margins": 0.14661237597465515, "rewards/rejected": -0.2331039160490036, "step": 5797 }, { "epoch": 3.536983376544151, "grad_norm": 1.1703377962112427, "learning_rate": 2.3485609308022044e-06, "log_odds_chosen": 1.9258251190185547, "log_odds_ratio": -0.46422073245048523, "logits/chosen": -0.9049502611160278, "logits/rejected": -0.9099352359771729, "logps/chosen": -0.7032716870307922, "logps/rejected": -1.998671293258667, "loss": 1.0178, "nll_loss": 0.9558264017105103, "rewards/accuracies": 0.625, "rewards/chosen": -0.07032717764377594, "rewards/margins": 0.12953996658325195, "rewards/rejected": -0.1998671591281891, "step": 5798 }, { "epoch": 3.5375934116211685, "grad_norm": 6.889951229095459, "learning_rate": 2.3475811390079605e-06, "log_odds_chosen": 3.113832950592041, "log_odds_ratio": -0.3232836425304413, "logits/chosen": -0.8703113794326782, "logits/rejected": -1.1202943325042725, "logps/chosen": -0.6888643503189087, "logps/rejected": -3.2951502799987793, "loss": 1.066, "nll_loss": 0.9533346891403198, "rewards/accuracies": 0.75, "rewards/chosen": -0.06888643652200699, "rewards/margins": 0.2606286108493805, "rewards/rejected": -0.3295150399208069, "step": 5799 }, { "epoch": 3.5382034466981853, "grad_norm": 1.0871777534484863, "learning_rate": 2.346601347213717e-06, "log_odds_chosen": 3.1388955116271973, "log_odds_ratio": -0.1878184676170349, "logits/chosen": -0.8277575969696045, "logits/rejected": -1.0058600902557373, "logps/chosen": -0.56720370054245, "logps/rejected": -2.800851821899414, "loss": 0.9934, "nll_loss": 0.8559573292732239, "rewards/accuracies": 0.875, "rewards/chosen": -0.056720372289419174, "rewards/margins": 0.22336483001708984, "rewards/rejected": -0.28008517622947693, "step": 5800 }, { "epoch": 3.538813481775202, "grad_norm": 1.310677409172058, "learning_rate": 2.3456215554194736e-06, "log_odds_chosen": 2.076417922973633, "log_odds_ratio": -0.3246081471443176, "logits/chosen": -0.9993353486061096, "logits/rejected": -1.0778968334197998, "logps/chosen": -0.8822025060653687, "logps/rejected": -2.571195363998413, "loss": 1.0097, "nll_loss": 1.1294755935668945, "rewards/accuracies": 0.875, "rewards/chosen": -0.0882202535867691, "rewards/margins": 0.1688992828130722, "rewards/rejected": -0.2571195363998413, "step": 5801 }, { "epoch": 3.539423516852219, "grad_norm": 3.042391061782837, "learning_rate": 2.3446417636252293e-06, "log_odds_chosen": 3.8316376209259033, "log_odds_ratio": -0.1683279573917389, "logits/chosen": -0.8538018465042114, "logits/rejected": -0.9070019721984863, "logps/chosen": -0.7407950162887573, "logps/rejected": -4.046375274658203, "loss": 1.0686, "nll_loss": 0.9197564125061035, "rewards/accuracies": 0.875, "rewards/chosen": -0.0740794986486435, "rewards/margins": 0.33055803179740906, "rewards/rejected": -0.40463751554489136, "step": 5802 }, { "epoch": 3.540033551929236, "grad_norm": 1.7192661762237549, "learning_rate": 2.343661971830986e-06, "log_odds_chosen": 1.3019040822982788, "log_odds_ratio": -0.554972767829895, "logits/chosen": -0.936761736869812, "logits/rejected": -1.000488519668579, "logps/chosen": -0.8472257256507874, "logps/rejected": -1.867708444595337, "loss": 0.9835, "nll_loss": 1.0002429485321045, "rewards/accuracies": 0.5, "rewards/chosen": -0.08472257107496262, "rewards/margins": 0.10204826295375824, "rewards/rejected": -0.18677084147930145, "step": 5803 }, { "epoch": 3.5406435870062527, "grad_norm": 11.55678653717041, "learning_rate": 2.3426821800367424e-06, "log_odds_chosen": 1.5505552291870117, "log_odds_ratio": -0.37268608808517456, "logits/chosen": -0.906534731388092, "logits/rejected": -1.0020079612731934, "logps/chosen": -1.0038868188858032, "logps/rejected": -2.1851749420166016, "loss": 1.0856, "nll_loss": 1.1992684602737427, "rewards/accuracies": 0.875, "rewards/chosen": -0.10038868337869644, "rewards/margins": 0.11812880635261536, "rewards/rejected": -0.2185174822807312, "step": 5804 }, { "epoch": 3.5412536220832695, "grad_norm": 2.8568179607391357, "learning_rate": 2.341702388242498e-06, "log_odds_chosen": 1.3495864868164062, "log_odds_ratio": -0.5021141171455383, "logits/chosen": -0.9578899145126343, "logits/rejected": -1.0614084005355835, "logps/chosen": -0.7589659094810486, "logps/rejected": -1.9388536214828491, "loss": 1.1365, "nll_loss": 1.1292531490325928, "rewards/accuracies": 0.75, "rewards/chosen": -0.07589659094810486, "rewards/margins": 0.11798878014087677, "rewards/rejected": -0.19388538599014282, "step": 5805 }, { "epoch": 3.541863657160287, "grad_norm": 1.6410913467407227, "learning_rate": 2.3407225964482546e-06, "log_odds_chosen": 0.07058486342430115, "log_odds_ratio": -0.8408215641975403, "logits/chosen": -0.9824702739715576, "logits/rejected": -0.8225592374801636, "logps/chosen": -0.8363519906997681, "logps/rejected": -1.0353329181671143, "loss": 0.9842, "nll_loss": 0.9471269249916077, "rewards/accuracies": 0.25, "rewards/chosen": -0.08363520354032516, "rewards/margins": 0.01989808864891529, "rewards/rejected": -0.1035332977771759, "step": 5806 }, { "epoch": 3.5424736922373037, "grad_norm": 1.7197996377944946, "learning_rate": 2.339742804654011e-06, "log_odds_chosen": 2.873021125793457, "log_odds_ratio": -0.40952709317207336, "logits/chosen": -0.9277706146240234, "logits/rejected": -1.0570151805877686, "logps/chosen": -0.7637256383895874, "logps/rejected": -3.0273265838623047, "loss": 1.0311, "nll_loss": 0.9578406810760498, "rewards/accuracies": 0.875, "rewards/chosen": -0.07637257128953934, "rewards/margins": 0.22636011242866516, "rewards/rejected": -0.3027326464653015, "step": 5807 }, { "epoch": 3.5430837273143205, "grad_norm": 1.4573792219161987, "learning_rate": 2.3387630128597673e-06, "log_odds_chosen": 2.528191566467285, "log_odds_ratio": -0.3687712550163269, "logits/chosen": -0.7095882892608643, "logits/rejected": -0.792915403842926, "logps/chosen": -0.655399739742279, "logps/rejected": -2.6537022590637207, "loss": 1.1163, "nll_loss": 0.7409659624099731, "rewards/accuracies": 0.75, "rewards/chosen": -0.06553997099399567, "rewards/margins": 0.19983026385307312, "rewards/rejected": -0.26537024974823, "step": 5808 }, { "epoch": 3.5436937623913374, "grad_norm": 2.607299566268921, "learning_rate": 2.3377832210655234e-06, "log_odds_chosen": 1.332409381866455, "log_odds_ratio": -0.38351762294769287, "logits/chosen": -0.9158064126968384, "logits/rejected": -1.11924147605896, "logps/chosen": -0.8984943628311157, "logps/rejected": -1.9658700227737427, "loss": 1.1512, "nll_loss": 1.038462519645691, "rewards/accuracies": 0.75, "rewards/chosen": -0.08984943479299545, "rewards/margins": 0.10673756897449493, "rewards/rejected": -0.19658701121807098, "step": 5809 }, { "epoch": 3.5443037974683547, "grad_norm": 1.82845139503479, "learning_rate": 2.33680342927128e-06, "log_odds_chosen": 1.7642326354980469, "log_odds_ratio": -0.27713051438331604, "logits/chosen": -0.7021180391311646, "logits/rejected": -0.9057585000991821, "logps/chosen": -0.782584547996521, "logps/rejected": -2.062300443649292, "loss": 0.8868, "nll_loss": 0.8736913800239563, "rewards/accuracies": 0.875, "rewards/chosen": -0.0782584547996521, "rewards/margins": 0.1279715746641159, "rewards/rejected": -0.206230029463768, "step": 5810 }, { "epoch": 3.5449138325453715, "grad_norm": 5.749746799468994, "learning_rate": 2.335823637477036e-06, "log_odds_chosen": 1.6769561767578125, "log_odds_ratio": -0.4733145534992218, "logits/chosen": -0.9573508501052856, "logits/rejected": -1.0089082717895508, "logps/chosen": -0.8950332999229431, "logps/rejected": -2.264537811279297, "loss": 1.2811, "nll_loss": 1.1059417724609375, "rewards/accuracies": 0.625, "rewards/chosen": -0.08950333297252655, "rewards/margins": 0.13695044815540314, "rewards/rejected": -0.22645379602909088, "step": 5811 }, { "epoch": 3.5455238676223884, "grad_norm": 1.2120530605316162, "learning_rate": 2.334843845682792e-06, "log_odds_chosen": 1.2202253341674805, "log_odds_ratio": -0.561785101890564, "logits/chosen": -0.8776059150695801, "logits/rejected": -0.8712230920791626, "logps/chosen": -0.6409386396408081, "logps/rejected": -1.5902942419052124, "loss": 0.9396, "nll_loss": 0.8100012540817261, "rewards/accuracies": 0.625, "rewards/chosen": -0.06409385800361633, "rewards/margins": 0.09493556618690491, "rewards/rejected": -0.15902942419052124, "step": 5812 }, { "epoch": 3.546133902699405, "grad_norm": 1.9298282861709595, "learning_rate": 2.3338640538885487e-06, "log_odds_chosen": 1.253665566444397, "log_odds_ratio": -0.458665132522583, "logits/chosen": -0.6883963346481323, "logits/rejected": -0.907753586769104, "logps/chosen": -0.7783396244049072, "logps/rejected": -1.7289021015167236, "loss": 1.0175, "nll_loss": 0.9330142736434937, "rewards/accuracies": 0.75, "rewards/chosen": -0.07783395797014236, "rewards/margins": 0.09505625814199448, "rewards/rejected": -0.17289021611213684, "step": 5813 }, { "epoch": 3.546743937776422, "grad_norm": 2.085933208465576, "learning_rate": 2.332884262094305e-06, "log_odds_chosen": 2.682403087615967, "log_odds_ratio": -0.2728480398654938, "logits/chosen": -0.5577784776687622, "logits/rejected": -0.7303935289382935, "logps/chosen": -0.5810406804084778, "logps/rejected": -2.598443031311035, "loss": 0.8662, "nll_loss": 0.9033282995223999, "rewards/accuracies": 1.0, "rewards/chosen": -0.05810406804084778, "rewards/margins": 0.20174022018909454, "rewards/rejected": -0.25984427332878113, "step": 5814 }, { "epoch": 3.547353972853439, "grad_norm": 3.7762722969055176, "learning_rate": 2.3319044703000614e-06, "log_odds_chosen": 3.108750343322754, "log_odds_ratio": -0.33765512704849243, "logits/chosen": -0.8497110605239868, "logits/rejected": -0.9715264439582825, "logps/chosen": -0.783284604549408, "logps/rejected": -3.3487513065338135, "loss": 0.8563, "nll_loss": 0.9149709939956665, "rewards/accuracies": 0.75, "rewards/chosen": -0.0783284604549408, "rewards/margins": 0.25654667615890503, "rewards/rejected": -0.33487510681152344, "step": 5815 }, { "epoch": 3.5479640079304557, "grad_norm": 3.2725183963775635, "learning_rate": 2.330924678505817e-06, "log_odds_chosen": 3.0297398567199707, "log_odds_ratio": -0.21379342675209045, "logits/chosen": -0.6624878644943237, "logits/rejected": -0.8498721122741699, "logps/chosen": -0.6242004036903381, "logps/rejected": -3.06522274017334, "loss": 0.9411, "nll_loss": 0.7108701467514038, "rewards/accuracies": 1.0, "rewards/chosen": -0.06242004409432411, "rewards/margins": 0.24410223960876465, "rewards/rejected": -0.30652227997779846, "step": 5816 }, { "epoch": 3.548574043007473, "grad_norm": 1.8275057077407837, "learning_rate": 2.3299448867115736e-06, "log_odds_chosen": 1.8739678859710693, "log_odds_ratio": -0.6832237839698792, "logits/chosen": -0.9014930725097656, "logits/rejected": -0.9148685932159424, "logps/chosen": -0.95931476354599, "logps/rejected": -2.4659860134124756, "loss": 1.0876, "nll_loss": 1.1280211210250854, "rewards/accuracies": 0.625, "rewards/chosen": -0.09593147784471512, "rewards/margins": 0.15066711604595184, "rewards/rejected": -0.24659860134124756, "step": 5817 }, { "epoch": 3.54918407808449, "grad_norm": 1.7645184993743896, "learning_rate": 2.32896509491733e-06, "log_odds_chosen": 1.8489296436309814, "log_odds_ratio": -0.4716700315475464, "logits/chosen": -0.8643901348114014, "logits/rejected": -1.0047047138214111, "logps/chosen": -0.6792114973068237, "logps/rejected": -2.108753204345703, "loss": 1.0107, "nll_loss": 1.0312790870666504, "rewards/accuracies": 0.625, "rewards/chosen": -0.06792115420103073, "rewards/margins": 0.14295417070388794, "rewards/rejected": -0.21087531745433807, "step": 5818 }, { "epoch": 3.5497941131615067, "grad_norm": 1.1648521423339844, "learning_rate": 2.327985303123086e-06, "log_odds_chosen": 2.5963728427886963, "log_odds_ratio": -0.34245336055755615, "logits/chosen": -0.7262487411499023, "logits/rejected": -0.9370322823524475, "logps/chosen": -0.6692706346511841, "logps/rejected": -2.7179861068725586, "loss": 0.968, "nll_loss": 0.8186504244804382, "rewards/accuracies": 0.75, "rewards/chosen": -0.06692706048488617, "rewards/margins": 0.2048715204000473, "rewards/rejected": -0.27179858088493347, "step": 5819 }, { "epoch": 3.5504041482385236, "grad_norm": 5.392085552215576, "learning_rate": 2.3270055113288424e-06, "log_odds_chosen": 1.1780377626419067, "log_odds_ratio": -0.4772561192512512, "logits/chosen": -1.0238356590270996, "logits/rejected": -0.9461379051208496, "logps/chosen": -0.9836810827255249, "logps/rejected": -1.9112300872802734, "loss": 1.2204, "nll_loss": 1.2892955541610718, "rewards/accuracies": 0.75, "rewards/chosen": -0.09836810827255249, "rewards/margins": 0.09275491535663605, "rewards/rejected": -0.19112302362918854, "step": 5820 }, { "epoch": 3.551014183315541, "grad_norm": 1.2184958457946777, "learning_rate": 2.326025719534599e-06, "log_odds_chosen": 0.8949154019355774, "log_odds_ratio": -0.5663996934890747, "logits/chosen": -1.1608338356018066, "logits/rejected": -1.1263272762298584, "logps/chosen": -0.8562855124473572, "logps/rejected": -1.5407686233520508, "loss": 1.0409, "nll_loss": 0.9104480743408203, "rewards/accuracies": 0.625, "rewards/chosen": -0.08562855422496796, "rewards/margins": 0.06844832748174667, "rewards/rejected": -0.15407687425613403, "step": 5821 }, { "epoch": 3.5516242183925577, "grad_norm": 1.2611840963363647, "learning_rate": 2.325045927740355e-06, "log_odds_chosen": 2.6489856243133545, "log_odds_ratio": -0.4774553179740906, "logits/chosen": -0.7822491526603699, "logits/rejected": -0.9515939950942993, "logps/chosen": -0.8355252742767334, "logps/rejected": -3.211915969848633, "loss": 1.0904, "nll_loss": 1.0155385732650757, "rewards/accuracies": 0.75, "rewards/chosen": -0.0835525244474411, "rewards/margins": 0.23763906955718994, "rewards/rejected": -0.32119157910346985, "step": 5822 }, { "epoch": 3.5522342534695746, "grad_norm": 1.4511263370513916, "learning_rate": 2.3240661359461112e-06, "log_odds_chosen": -0.05315914750099182, "log_odds_ratio": -0.7431648969650269, "logits/chosen": -0.8015668392181396, "logits/rejected": -0.8054871559143066, "logps/chosen": -0.9482720494270325, "logps/rejected": -0.9634392261505127, "loss": 0.9612, "nll_loss": 1.0689197778701782, "rewards/accuracies": 0.625, "rewards/chosen": -0.09482720494270325, "rewards/margins": 0.0015167118981480598, "rewards/rejected": -0.09634391963481903, "step": 5823 }, { "epoch": 3.5528442885465914, "grad_norm": 1.7675598859786987, "learning_rate": 2.3230863441518678e-06, "log_odds_chosen": 1.3431793451309204, "log_odds_ratio": -0.5780745148658752, "logits/chosen": -0.8315805792808533, "logits/rejected": -0.8289713263511658, "logps/chosen": -0.7281217575073242, "logps/rejected": -1.7741566896438599, "loss": 1.0816, "nll_loss": 0.9876453876495361, "rewards/accuracies": 0.375, "rewards/chosen": -0.07281216979026794, "rewards/margins": 0.10460349917411804, "rewards/rejected": -0.177415668964386, "step": 5824 }, { "epoch": 3.5534543236236082, "grad_norm": 1.508131504058838, "learning_rate": 2.322106552357624e-06, "log_odds_chosen": 2.6268224716186523, "log_odds_ratio": -0.3653588891029358, "logits/chosen": -1.0124309062957764, "logits/rejected": -1.0389539003372192, "logps/chosen": -0.7496333122253418, "logps/rejected": -2.8576912879943848, "loss": 0.958, "nll_loss": 1.0805463790893555, "rewards/accuracies": 0.625, "rewards/chosen": -0.07496333122253418, "rewards/margins": 0.21080580353736877, "rewards/rejected": -0.28576913475990295, "step": 5825 }, { "epoch": 3.554064358700625, "grad_norm": 1.281356930732727, "learning_rate": 2.32112676056338e-06, "log_odds_chosen": 1.6980235576629639, "log_odds_ratio": -0.547547459602356, "logits/chosen": -0.9377895593643188, "logits/rejected": -0.945071816444397, "logps/chosen": -0.878764271736145, "logps/rejected": -2.1064343452453613, "loss": 1.0958, "nll_loss": 1.2388166189193726, "rewards/accuracies": 0.625, "rewards/chosen": -0.08787643909454346, "rewards/margins": 0.12276700884103775, "rewards/rejected": -0.2106434404850006, "step": 5826 }, { "epoch": 3.554674393777642, "grad_norm": 6.426202297210693, "learning_rate": 2.3201469687691366e-06, "log_odds_chosen": 2.232365131378174, "log_odds_ratio": -0.34854093194007874, "logits/chosen": -0.6253860592842102, "logits/rejected": -0.8355454206466675, "logps/chosen": -0.7532036304473877, "logps/rejected": -2.492136001586914, "loss": 1.075, "nll_loss": 1.0803190469741821, "rewards/accuracies": 0.625, "rewards/chosen": -0.07532037049531937, "rewards/margins": 0.17389324307441711, "rewards/rejected": -0.24921360611915588, "step": 5827 }, { "epoch": 3.5552844288546592, "grad_norm": 4.212855815887451, "learning_rate": 2.3191671769748927e-06, "log_odds_chosen": 5.27256965637207, "log_odds_ratio": -0.263346403837204, "logits/chosen": -0.9357593655586243, "logits/rejected": -1.098534107208252, "logps/chosen": -0.8148595094680786, "logps/rejected": -5.59464168548584, "loss": 1.0884, "nll_loss": 1.0164284706115723, "rewards/accuracies": 0.875, "rewards/chosen": -0.08148594200611115, "rewards/margins": 0.4779782295227051, "rewards/rejected": -0.559464156627655, "step": 5828 }, { "epoch": 3.555894463931676, "grad_norm": 2.131999969482422, "learning_rate": 2.3181873851806492e-06, "log_odds_chosen": 3.3498146533966064, "log_odds_ratio": -0.15239934623241425, "logits/chosen": -0.513317346572876, "logits/rejected": -0.9160867929458618, "logps/chosen": -0.6715905666351318, "logps/rejected": -3.2965497970581055, "loss": 0.9804, "nll_loss": 0.8857431411743164, "rewards/accuracies": 1.0, "rewards/chosen": -0.06715905666351318, "rewards/margins": 0.2624959349632263, "rewards/rejected": -0.3296549916267395, "step": 5829 }, { "epoch": 3.556504499008693, "grad_norm": 1.5276892185211182, "learning_rate": 2.3172075933864054e-06, "log_odds_chosen": 0.8573366403579712, "log_odds_ratio": -0.5726525783538818, "logits/chosen": -0.9077444076538086, "logits/rejected": -0.911548376083374, "logps/chosen": -0.908176064491272, "logps/rejected": -1.4904948472976685, "loss": 0.8994, "nll_loss": 0.9666548371315002, "rewards/accuracies": 0.75, "rewards/chosen": -0.09081760048866272, "rewards/margins": 0.058231886476278305, "rewards/rejected": -0.14904949069023132, "step": 5830 }, { "epoch": 3.5571145340857098, "grad_norm": 1.7134085893630981, "learning_rate": 2.3162278015921615e-06, "log_odds_chosen": 3.001542091369629, "log_odds_ratio": -0.2803334593772888, "logits/chosen": -0.796908974647522, "logits/rejected": -0.9931675791740417, "logps/chosen": -0.7348463535308838, "logps/rejected": -3.178246021270752, "loss": 1.1423, "nll_loss": 0.9965466260910034, "rewards/accuracies": 0.875, "rewards/chosen": -0.0734846368432045, "rewards/margins": 0.2443399876356125, "rewards/rejected": -0.3178246021270752, "step": 5831 }, { "epoch": 3.557724569162727, "grad_norm": 1.5533608198165894, "learning_rate": 2.315248009797918e-06, "log_odds_chosen": 3.3007683753967285, "log_odds_ratio": -0.128642737865448, "logits/chosen": -0.8295389413833618, "logits/rejected": -1.067853331565857, "logps/chosen": -0.7908291816711426, "logps/rejected": -3.472100257873535, "loss": 0.7874, "nll_loss": 0.9071162939071655, "rewards/accuracies": 0.875, "rewards/chosen": -0.0790829211473465, "rewards/margins": 0.26812708377838135, "rewards/rejected": -0.34721001982688904, "step": 5832 }, { "epoch": 3.558334604239744, "grad_norm": 1.3388994932174683, "learning_rate": 2.314268218003674e-06, "log_odds_chosen": 0.3942199945449829, "log_odds_ratio": -0.5941461324691772, "logits/chosen": -0.9458574652671814, "logits/rejected": -0.9972512722015381, "logps/chosen": -0.8876393437385559, "logps/rejected": -1.1276841163635254, "loss": 1.0382, "nll_loss": 1.1365500688552856, "rewards/accuracies": 0.5, "rewards/chosen": -0.08876393735408783, "rewards/margins": 0.024004481732845306, "rewards/rejected": -0.11276841163635254, "step": 5833 }, { "epoch": 3.5589446393167607, "grad_norm": 5.794464588165283, "learning_rate": 2.3132884262094303e-06, "log_odds_chosen": 3.2924885749816895, "log_odds_ratio": -0.3531684875488281, "logits/chosen": -0.7511997222900391, "logits/rejected": -0.9822000861167908, "logps/chosen": -0.8044165968894958, "logps/rejected": -3.6247482299804688, "loss": 1.0881, "nll_loss": 1.118804693222046, "rewards/accuracies": 0.875, "rewards/chosen": -0.0804416611790657, "rewards/margins": 0.28203317523002625, "rewards/rejected": -0.36247482895851135, "step": 5834 }, { "epoch": 3.5595546743937776, "grad_norm": 1.233386754989624, "learning_rate": 2.312308634415187e-06, "log_odds_chosen": 1.5172032117843628, "log_odds_ratio": -0.49225082993507385, "logits/chosen": -1.1122214794158936, "logits/rejected": -1.0263340473175049, "logps/chosen": -1.0009057521820068, "logps/rejected": -2.3134734630584717, "loss": 1.1706, "nll_loss": 1.1828703880310059, "rewards/accuracies": 0.625, "rewards/chosen": -0.10009057819843292, "rewards/margins": 0.13125677406787872, "rewards/rejected": -0.23134736716747284, "step": 5835 }, { "epoch": 3.5601647094707944, "grad_norm": 13.733217239379883, "learning_rate": 2.311328842620943e-06, "log_odds_chosen": 2.091602087020874, "log_odds_ratio": -0.3933337330818176, "logits/chosen": -0.8434298038482666, "logits/rejected": -1.047177791595459, "logps/chosen": -1.2563215494155884, "logps/rejected": -2.982769250869751, "loss": 1.1301, "nll_loss": 1.178960919380188, "rewards/accuracies": 0.875, "rewards/chosen": -0.1256321668624878, "rewards/margins": 0.17264476418495178, "rewards/rejected": -0.29827696084976196, "step": 5836 }, { "epoch": 3.5607747445478113, "grad_norm": 12.154495239257812, "learning_rate": 2.310349050826699e-06, "log_odds_chosen": 2.4705681800842285, "log_odds_ratio": -0.18662306666374207, "logits/chosen": -1.0358902215957642, "logits/rejected": -0.9979461431503296, "logps/chosen": -0.7514280676841736, "logps/rejected": -2.6663565635681152, "loss": 1.1115, "nll_loss": 0.8774601221084595, "rewards/accuracies": 1.0, "rewards/chosen": -0.07514280825853348, "rewards/margins": 0.19149284064769745, "rewards/rejected": -0.2666356563568115, "step": 5837 }, { "epoch": 3.5613847796248286, "grad_norm": 2.7514801025390625, "learning_rate": 2.3093692590324556e-06, "log_odds_chosen": 1.4089252948760986, "log_odds_ratio": -0.4127340316772461, "logits/chosen": -0.8081427812576294, "logits/rejected": -0.7451174259185791, "logps/chosen": -0.8313758373260498, "logps/rejected": -1.8910367488861084, "loss": 0.9772, "nll_loss": 0.8985010385513306, "rewards/accuracies": 0.75, "rewards/chosen": -0.08313758671283722, "rewards/margins": 0.10596609115600586, "rewards/rejected": -0.18910367786884308, "step": 5838 }, { "epoch": 3.5619948147018454, "grad_norm": 2.911752462387085, "learning_rate": 2.3083894672382117e-06, "log_odds_chosen": 0.9012391567230225, "log_odds_ratio": -0.5961203575134277, "logits/chosen": -0.745734453201294, "logits/rejected": -0.6782469153404236, "logps/chosen": -0.8167965412139893, "logps/rejected": -1.4859750270843506, "loss": 1.0266, "nll_loss": 1.156234860420227, "rewards/accuracies": 0.5, "rewards/chosen": -0.08167964965105057, "rewards/margins": 0.06691785156726837, "rewards/rejected": -0.14859750866889954, "step": 5839 }, { "epoch": 3.5626048497788623, "grad_norm": 7.388120651245117, "learning_rate": 2.307409675443968e-06, "log_odds_chosen": 1.9641926288604736, "log_odds_ratio": -0.3663146197795868, "logits/chosen": -0.7807362079620361, "logits/rejected": -0.8609014749526978, "logps/chosen": -0.5412613749504089, "logps/rejected": -1.8975088596343994, "loss": 0.9588, "nll_loss": 0.7338206171989441, "rewards/accuracies": 0.75, "rewards/chosen": -0.05412614345550537, "rewards/margins": 0.1356247514486313, "rewards/rejected": -0.18975089490413666, "step": 5840 }, { "epoch": 3.563214884855879, "grad_norm": 1.2656265497207642, "learning_rate": 2.3064298836497244e-06, "log_odds_chosen": 0.9577856659889221, "log_odds_ratio": -0.4470944106578827, "logits/chosen": -0.7192546129226685, "logits/rejected": -0.7858467698097229, "logps/chosen": -0.7098172903060913, "logps/rejected": -1.322881817817688, "loss": 0.9386, "nll_loss": 0.9323900938034058, "rewards/accuracies": 0.75, "rewards/chosen": -0.07098172605037689, "rewards/margins": 0.06130645424127579, "rewards/rejected": -0.13228818774223328, "step": 5841 }, { "epoch": 3.5638249199328964, "grad_norm": 2.187793493270874, "learning_rate": 2.3054500918554805e-06, "log_odds_chosen": 2.034764289855957, "log_odds_ratio": -0.30459460616111755, "logits/chosen": -0.8860567212104797, "logits/rejected": -0.9066094756126404, "logps/chosen": -0.7883706092834473, "logps/rejected": -2.201342821121216, "loss": 1.1551, "nll_loss": 0.9335885047912598, "rewards/accuracies": 0.75, "rewards/chosen": -0.0788370668888092, "rewards/margins": 0.14129722118377686, "rewards/rejected": -0.22013425827026367, "step": 5842 }, { "epoch": 3.5644349550099133, "grad_norm": 5.269262790679932, "learning_rate": 2.304470300061237e-06, "log_odds_chosen": 2.1052236557006836, "log_odds_ratio": -0.39149993658065796, "logits/chosen": -0.650530219078064, "logits/rejected": -0.8877713084220886, "logps/chosen": -0.7826154232025146, "logps/rejected": -2.3932485580444336, "loss": 1.0908, "nll_loss": 0.980975866317749, "rewards/accuracies": 0.875, "rewards/chosen": -0.07826153934001923, "rewards/margins": 0.1610633134841919, "rewards/rejected": -0.23932486772537231, "step": 5843 }, { "epoch": 3.56504499008693, "grad_norm": 1.492300033569336, "learning_rate": 2.303490508266993e-06, "log_odds_chosen": 0.6754403710365295, "log_odds_ratio": -0.6367628574371338, "logits/chosen": -0.9644027948379517, "logits/rejected": -0.7897579669952393, "logps/chosen": -1.0363490581512451, "logps/rejected": -1.4993642568588257, "loss": 0.9724, "nll_loss": 1.0606777667999268, "rewards/accuracies": 0.75, "rewards/chosen": -0.10363490879535675, "rewards/margins": 0.04630153253674507, "rewards/rejected": -0.14993643760681152, "step": 5844 }, { "epoch": 3.565655025163947, "grad_norm": 1.5982789993286133, "learning_rate": 2.3025107164727493e-06, "log_odds_chosen": 1.9747681617736816, "log_odds_ratio": -0.3193110525608063, "logits/chosen": -1.04957115650177, "logits/rejected": -1.1119325160980225, "logps/chosen": -0.9475910663604736, "logps/rejected": -2.5131547451019287, "loss": 1.076, "nll_loss": 1.1449428796768188, "rewards/accuracies": 0.75, "rewards/chosen": -0.09475910663604736, "rewards/margins": 0.15655635297298431, "rewards/rejected": -0.25131547451019287, "step": 5845 }, { "epoch": 3.566265060240964, "grad_norm": 2.093785285949707, "learning_rate": 2.301530924678506e-06, "log_odds_chosen": 2.2895021438598633, "log_odds_ratio": -0.31811612844467163, "logits/chosen": -0.7586017847061157, "logits/rejected": -0.7972850799560547, "logps/chosen": -0.661137580871582, "logps/rejected": -2.205566883087158, "loss": 0.8377, "nll_loss": 0.8083661794662476, "rewards/accuracies": 0.75, "rewards/chosen": -0.06611375510692596, "rewards/margins": 0.1544429361820221, "rewards/rejected": -0.22055670619010925, "step": 5846 }, { "epoch": 3.5668750953179806, "grad_norm": 2.327685832977295, "learning_rate": 2.300551132884262e-06, "log_odds_chosen": 2.1738626956939697, "log_odds_ratio": -0.4955669939517975, "logits/chosen": -0.8437925577163696, "logits/rejected": -0.9981250762939453, "logps/chosen": -0.6369434595108032, "logps/rejected": -2.434156656265259, "loss": 1.0641, "nll_loss": 0.8044531941413879, "rewards/accuracies": 0.75, "rewards/chosen": -0.06369433552026749, "rewards/margins": 0.17972132563591003, "rewards/rejected": -0.24341565370559692, "step": 5847 }, { "epoch": 3.5674851303949975, "grad_norm": 1.5429775714874268, "learning_rate": 2.299571341090018e-06, "log_odds_chosen": 1.1671638488769531, "log_odds_ratio": -0.33986711502075195, "logits/chosen": -0.803421676158905, "logits/rejected": -0.8566328287124634, "logps/chosen": -0.8069356083869934, "logps/rejected": -1.6745418310165405, "loss": 1.006, "nll_loss": 0.9592315554618835, "rewards/accuracies": 0.875, "rewards/chosen": -0.0806935578584671, "rewards/margins": 0.08676061779260635, "rewards/rejected": -0.16745416820049286, "step": 5848 }, { "epoch": 3.5680951654720148, "grad_norm": 1.860947608947754, "learning_rate": 2.2985915492957746e-06, "log_odds_chosen": 1.9848811626434326, "log_odds_ratio": -0.3610619008541107, "logits/chosen": -0.7358028888702393, "logits/rejected": -0.8308045268058777, "logps/chosen": -0.8334196209907532, "logps/rejected": -2.4471168518066406, "loss": 0.8848, "nll_loss": 0.862042248249054, "rewards/accuracies": 0.875, "rewards/chosen": -0.08334196358919144, "rewards/margins": 0.16136972606182098, "rewards/rejected": -0.24471169710159302, "step": 5849 }, { "epoch": 3.5687052005490316, "grad_norm": 3.5663137435913086, "learning_rate": 2.297611757501531e-06, "log_odds_chosen": 1.988393783569336, "log_odds_ratio": -0.4167781174182892, "logits/chosen": -0.7543445229530334, "logits/rejected": -0.6724734306335449, "logps/chosen": -0.7112642526626587, "logps/rejected": -2.3056952953338623, "loss": 1.0, "nll_loss": 0.761733889579773, "rewards/accuracies": 0.75, "rewards/chosen": -0.07112642377614975, "rewards/margins": 0.15944311022758484, "rewards/rejected": -0.23056954145431519, "step": 5850 }, { "epoch": 3.5693152356260485, "grad_norm": 1.188519835472107, "learning_rate": 2.296631965707287e-06, "log_odds_chosen": 1.587650179862976, "log_odds_ratio": -0.47756263613700867, "logits/chosen": -1.0718177556991577, "logits/rejected": -1.165418267250061, "logps/chosen": -0.9037472009658813, "logps/rejected": -2.1527647972106934, "loss": 0.9828, "nll_loss": 1.0508356094360352, "rewards/accuracies": 0.75, "rewards/chosen": -0.09037472307682037, "rewards/margins": 0.12490177899599075, "rewards/rejected": -0.21527647972106934, "step": 5851 }, { "epoch": 3.5699252707030653, "grad_norm": 3.357790470123291, "learning_rate": 2.2956521739130434e-06, "log_odds_chosen": 3.605992317199707, "log_odds_ratio": -0.16760611534118652, "logits/chosen": -0.7614198923110962, "logits/rejected": -0.7945520877838135, "logps/chosen": -0.6502278447151184, "logps/rejected": -3.5200035572052, "loss": 0.9747, "nll_loss": 0.7983739972114563, "rewards/accuracies": 0.875, "rewards/chosen": -0.0650227814912796, "rewards/margins": 0.2869775891304016, "rewards/rejected": -0.35200035572052, "step": 5852 }, { "epoch": 3.5705353057800826, "grad_norm": 1.9125882387161255, "learning_rate": 2.2946723821187996e-06, "log_odds_chosen": 2.603008508682251, "log_odds_ratio": -0.31663766503334045, "logits/chosen": -0.8761885166168213, "logits/rejected": -0.9799183011054993, "logps/chosen": -0.726299524307251, "logps/rejected": -2.8658769130706787, "loss": 0.9879, "nll_loss": 0.840155303478241, "rewards/accuracies": 0.875, "rewards/chosen": -0.07262995094060898, "rewards/margins": 0.2139577567577362, "rewards/rejected": -0.2865877151489258, "step": 5853 }, { "epoch": 3.5711453408570994, "grad_norm": 1.0400553941726685, "learning_rate": 2.2936925903245557e-06, "log_odds_chosen": 1.8429200649261475, "log_odds_ratio": -0.5612293481826782, "logits/chosen": -1.008842945098877, "logits/rejected": -1.0973520278930664, "logps/chosen": -0.913509726524353, "logps/rejected": -2.531533718109131, "loss": 1.0555, "nll_loss": 1.030432939529419, "rewards/accuracies": 0.625, "rewards/chosen": -0.0913509726524353, "rewards/margins": 0.16180239617824554, "rewards/rejected": -0.25315335392951965, "step": 5854 }, { "epoch": 3.5717553759341163, "grad_norm": 2.635606527328491, "learning_rate": 2.2927127985303122e-06, "log_odds_chosen": 1.5646885633468628, "log_odds_ratio": -0.5189229846000671, "logits/chosen": -1.0146816968917847, "logits/rejected": -1.0155234336853027, "logps/chosen": -0.9514157772064209, "logps/rejected": -2.2390120029449463, "loss": 1.0471, "nll_loss": 1.093542218208313, "rewards/accuracies": 0.5, "rewards/chosen": -0.09514158219099045, "rewards/margins": 0.12875962257385254, "rewards/rejected": -0.22390121221542358, "step": 5855 }, { "epoch": 3.572365411011133, "grad_norm": 1.4097795486450195, "learning_rate": 2.2917330067360683e-06, "log_odds_chosen": 1.8161386251449585, "log_odds_ratio": -0.6473606824874878, "logits/chosen": -1.0091633796691895, "logits/rejected": -0.8589906692504883, "logps/chosen": -1.0194636583328247, "logps/rejected": -2.7104806900024414, "loss": 1.1229, "nll_loss": 1.2157692909240723, "rewards/accuracies": 0.625, "rewards/chosen": -0.10194636881351471, "rewards/margins": 0.16910172998905182, "rewards/rejected": -0.27104809880256653, "step": 5856 }, { "epoch": 3.57297544608815, "grad_norm": 1.6089199781417847, "learning_rate": 2.290753214941825e-06, "log_odds_chosen": 2.1527585983276367, "log_odds_ratio": -0.40025684237480164, "logits/chosen": -0.8715190887451172, "logits/rejected": -0.9600790739059448, "logps/chosen": -0.832693338394165, "logps/rejected": -2.594675302505493, "loss": 0.9083, "nll_loss": 1.0286598205566406, "rewards/accuracies": 0.625, "rewards/chosen": -0.08326932787895203, "rewards/margins": 0.17619821429252625, "rewards/rejected": -0.25946754217147827, "step": 5857 }, { "epoch": 3.573585481165167, "grad_norm": 2.0845468044281006, "learning_rate": 2.289773423147581e-06, "log_odds_chosen": 2.04180908203125, "log_odds_ratio": -0.6257026195526123, "logits/chosen": -0.9417647123336792, "logits/rejected": -0.9460214972496033, "logps/chosen": -0.893665611743927, "logps/rejected": -2.7512288093566895, "loss": 1.1868, "nll_loss": 1.0291879177093506, "rewards/accuracies": 0.625, "rewards/chosen": -0.08936656266450882, "rewards/margins": 0.18575632572174072, "rewards/rejected": -0.27512291073799133, "step": 5858 }, { "epoch": 3.5741955162421837, "grad_norm": 1.5477973222732544, "learning_rate": 2.288793631353337e-06, "log_odds_chosen": 0.1586083173751831, "log_odds_ratio": -0.7464019060134888, "logits/chosen": -0.7696759700775146, "logits/rejected": -0.7153298854827881, "logps/chosen": -0.794365644454956, "logps/rejected": -0.9209839701652527, "loss": 1.0402, "nll_loss": 1.0780318975448608, "rewards/accuracies": 0.625, "rewards/chosen": -0.07943655550479889, "rewards/margins": 0.012661841697990894, "rewards/rejected": -0.09209839999675751, "step": 5859 }, { "epoch": 3.574805551319201, "grad_norm": 1.4462721347808838, "learning_rate": 2.2878138395590937e-06, "log_odds_chosen": 1.5081347227096558, "log_odds_ratio": -0.41512608528137207, "logits/chosen": -0.8251739144325256, "logits/rejected": -0.9741151332855225, "logps/chosen": -0.8383392095565796, "logps/rejected": -1.9634219408035278, "loss": 1.007, "nll_loss": 0.9557750821113586, "rewards/accuracies": 0.875, "rewards/chosen": -0.08383392542600632, "rewards/margins": 0.11250828206539154, "rewards/rejected": -0.19634220004081726, "step": 5860 }, { "epoch": 3.575415586396218, "grad_norm": 1.2084052562713623, "learning_rate": 2.28683404776485e-06, "log_odds_chosen": 1.4799401760101318, "log_odds_ratio": -0.29105961322784424, "logits/chosen": -0.9268123507499695, "logits/rejected": -1.0150172710418701, "logps/chosen": -0.7074111104011536, "logps/rejected": -1.7296419143676758, "loss": 0.9812, "nll_loss": 0.7638890147209167, "rewards/accuracies": 0.875, "rewards/chosen": -0.07074110954999924, "rewards/margins": 0.10222308337688446, "rewards/rejected": -0.1729642003774643, "step": 5861 }, { "epoch": 3.5760256214732347, "grad_norm": 1.3366236686706543, "learning_rate": 2.285854255970606e-06, "log_odds_chosen": 1.786020040512085, "log_odds_ratio": -0.41404297947883606, "logits/chosen": -0.8622359037399292, "logits/rejected": -0.9503847360610962, "logps/chosen": -0.7973697185516357, "logps/rejected": -2.0894618034362793, "loss": 0.9947, "nll_loss": 1.014997959136963, "rewards/accuracies": 0.75, "rewards/chosen": -0.07973698526620865, "rewards/margins": 0.1292092204093933, "rewards/rejected": -0.20894618332386017, "step": 5862 }, { "epoch": 3.5766356565502515, "grad_norm": 1.7089526653289795, "learning_rate": 2.2848744641763625e-06, "log_odds_chosen": 2.7452375888824463, "log_odds_ratio": -0.2775886356830597, "logits/chosen": -0.6930223703384399, "logits/rejected": -0.8902336955070496, "logps/chosen": -0.6565287709236145, "logps/rejected": -2.814311981201172, "loss": 0.9702, "nll_loss": 1.0071340799331665, "rewards/accuracies": 0.875, "rewards/chosen": -0.06565287709236145, "rewards/margins": 0.21577835083007812, "rewards/rejected": -0.2814312279224396, "step": 5863 }, { "epoch": 3.577245691627269, "grad_norm": 1.5398218631744385, "learning_rate": 2.283894672382119e-06, "log_odds_chosen": 2.594148635864258, "log_odds_ratio": -0.3292214274406433, "logits/chosen": -0.7146479487419128, "logits/rejected": -1.0421550273895264, "logps/chosen": -0.6663370132446289, "logps/rejected": -2.774153709411621, "loss": 0.9019, "nll_loss": 0.8063458800315857, "rewards/accuracies": 0.625, "rewards/chosen": -0.06663370132446289, "rewards/margins": 0.21078169345855713, "rewards/rejected": -0.27741536498069763, "step": 5864 }, { "epoch": 3.5778557267042856, "grad_norm": 1.8736859560012817, "learning_rate": 2.2829148805878747e-06, "log_odds_chosen": 0.1992020159959793, "log_odds_ratio": -0.7641220688819885, "logits/chosen": -1.0053168535232544, "logits/rejected": -1.0391608476638794, "logps/chosen": -0.8445939421653748, "logps/rejected": -0.9412207007408142, "loss": 1.1639, "nll_loss": 1.1089694499969482, "rewards/accuracies": 0.375, "rewards/chosen": -0.08445940166711807, "rewards/margins": 0.009662676602602005, "rewards/rejected": -0.09412206709384918, "step": 5865 }, { "epoch": 3.5784657617813025, "grad_norm": 3.559035539627075, "learning_rate": 2.2819350887936313e-06, "log_odds_chosen": 1.3952481746673584, "log_odds_ratio": -0.4268485903739929, "logits/chosen": -0.8486894965171814, "logits/rejected": -0.9732072353363037, "logps/chosen": -0.6544516086578369, "logps/rejected": -1.5616044998168945, "loss": 0.8967, "nll_loss": 0.6961134672164917, "rewards/accuracies": 0.75, "rewards/chosen": -0.06544516980648041, "rewards/margins": 0.09071527421474457, "rewards/rejected": -0.15616044402122498, "step": 5866 }, { "epoch": 3.5790757968583193, "grad_norm": 3.40104341506958, "learning_rate": 2.280955296999388e-06, "log_odds_chosen": 2.817680835723877, "log_odds_ratio": -0.5163362622261047, "logits/chosen": -0.7865713834762573, "logits/rejected": -0.8392462730407715, "logps/chosen": -0.7330323457717896, "logps/rejected": -3.0496490001678467, "loss": 1.0148, "nll_loss": 1.0595234632492065, "rewards/accuracies": 0.625, "rewards/chosen": -0.0733032375574112, "rewards/margins": 0.23166164755821228, "rewards/rejected": -0.30496490001678467, "step": 5867 }, { "epoch": 3.579685831935336, "grad_norm": 7.174677848815918, "learning_rate": 2.2799755052051435e-06, "log_odds_chosen": 1.3351116180419922, "log_odds_ratio": -0.40884602069854736, "logits/chosen": -0.8780698776245117, "logits/rejected": -0.9186185598373413, "logps/chosen": -0.5801185369491577, "logps/rejected": -1.5235395431518555, "loss": 0.9651, "nll_loss": 0.8308749198913574, "rewards/accuracies": 0.875, "rewards/chosen": -0.05801185965538025, "rewards/margins": 0.09434210509061813, "rewards/rejected": -0.15235395729541779, "step": 5868 }, { "epoch": 3.580295867012353, "grad_norm": 9.128684043884277, "learning_rate": 2.2789957134109e-06, "log_odds_chosen": 1.077834129333496, "log_odds_ratio": -0.5679425001144409, "logits/chosen": -0.882581353187561, "logits/rejected": -0.9241166114807129, "logps/chosen": -0.8548665642738342, "logps/rejected": -1.6559597253799438, "loss": 1.1543, "nll_loss": 1.2327994108200073, "rewards/accuracies": 0.5, "rewards/chosen": -0.08548665791749954, "rewards/margins": 0.08010930567979813, "rewards/rejected": -0.16559596359729767, "step": 5869 }, { "epoch": 3.58090590208937, "grad_norm": 2.972714424133301, "learning_rate": 2.2780159216166566e-06, "log_odds_chosen": 0.939015805721283, "log_odds_ratio": -0.5192795395851135, "logits/chosen": -0.45372533798217773, "logits/rejected": -0.7187602519989014, "logps/chosen": -0.8867430686950684, "logps/rejected": -1.4547381401062012, "loss": 1.0124, "nll_loss": 0.9507543444633484, "rewards/accuracies": 0.875, "rewards/chosen": -0.08867431432008743, "rewards/margins": 0.056799501180648804, "rewards/rejected": -0.14547380805015564, "step": 5870 }, { "epoch": 3.581515937166387, "grad_norm": 3.5926058292388916, "learning_rate": 2.2770361298224127e-06, "log_odds_chosen": 1.7018072605133057, "log_odds_ratio": -0.5073419809341431, "logits/chosen": -1.037809133529663, "logits/rejected": -1.0616530179977417, "logps/chosen": -1.154270052909851, "logps/rejected": -2.4804699420928955, "loss": 1.2389, "nll_loss": 1.4591801166534424, "rewards/accuracies": 0.625, "rewards/chosen": -0.11542700976133347, "rewards/margins": 0.13261999189853668, "rewards/rejected": -0.24804702401161194, "step": 5871 }, { "epoch": 3.582125972243404, "grad_norm": 1.549219012260437, "learning_rate": 2.276056338028169e-06, "log_odds_chosen": 1.9047214984893799, "log_odds_ratio": -0.4382380545139313, "logits/chosen": -0.8525521159172058, "logits/rejected": -0.798160195350647, "logps/chosen": -0.8509263396263123, "logps/rejected": -2.397469997406006, "loss": 1.1064, "nll_loss": 1.0913136005401611, "rewards/accuracies": 0.75, "rewards/chosen": -0.08509263396263123, "rewards/margins": 0.1546543836593628, "rewards/rejected": -0.23974701762199402, "step": 5872 }, { "epoch": 3.582736007320421, "grad_norm": 2.0551722049713135, "learning_rate": 2.275076546233925e-06, "log_odds_chosen": 3.1068403720855713, "log_odds_ratio": -0.12355975806713104, "logits/chosen": -0.7976651787757874, "logits/rejected": -0.8679568767547607, "logps/chosen": -0.578497052192688, "logps/rejected": -2.8427319526672363, "loss": 1.1159, "nll_loss": 0.7743862867355347, "rewards/accuracies": 1.0, "rewards/chosen": -0.0578497014939785, "rewards/margins": 0.2264234870672226, "rewards/rejected": -0.2842732071876526, "step": 5873 }, { "epoch": 3.5833460423974377, "grad_norm": 2.2778282165527344, "learning_rate": 2.2740967544396815e-06, "log_odds_chosen": 2.3332691192626953, "log_odds_ratio": -0.28541287779808044, "logits/chosen": -0.7072362899780273, "logits/rejected": -0.9636934995651245, "logps/chosen": -0.6789381504058838, "logps/rejected": -2.5232460498809814, "loss": 0.9404, "nll_loss": 1.0396573543548584, "rewards/accuracies": 0.875, "rewards/chosen": -0.06789381802082062, "rewards/margins": 0.1844307780265808, "rewards/rejected": -0.2523246109485626, "step": 5874 }, { "epoch": 3.583956077474455, "grad_norm": 1.5005855560302734, "learning_rate": 2.2731169626454376e-06, "log_odds_chosen": 1.6941291093826294, "log_odds_ratio": -0.4165480136871338, "logits/chosen": -0.8245286345481873, "logits/rejected": -0.8136383295059204, "logps/chosen": -0.6783242225646973, "logps/rejected": -2.01523494720459, "loss": 0.8712, "nll_loss": 0.7818940877914429, "rewards/accuracies": 0.75, "rewards/chosen": -0.06783242523670197, "rewards/margins": 0.13369105756282806, "rewards/rejected": -0.20152348279953003, "step": 5875 }, { "epoch": 3.584566112551472, "grad_norm": 2.7629590034484863, "learning_rate": 2.2721371708511938e-06, "log_odds_chosen": 3.228553533554077, "log_odds_ratio": -0.2877703905105591, "logits/chosen": -0.9107642769813538, "logits/rejected": -1.0211986303329468, "logps/chosen": -0.643852710723877, "logps/rejected": -3.3178482055664062, "loss": 1.0356, "nll_loss": 0.9701650142669678, "rewards/accuracies": 0.875, "rewards/chosen": -0.06438528001308441, "rewards/margins": 0.26739954948425293, "rewards/rejected": -0.33178484439849854, "step": 5876 }, { "epoch": 3.5851761476284887, "grad_norm": 1.3974246978759766, "learning_rate": 2.2711573790569503e-06, "log_odds_chosen": 0.5184213519096375, "log_odds_ratio": -0.5608779191970825, "logits/chosen": -0.8463943004608154, "logits/rejected": -0.9504046440124512, "logps/chosen": -0.8396597504615784, "logps/rejected": -1.228882908821106, "loss": 0.9532, "nll_loss": 1.0079494714736938, "rewards/accuracies": 0.625, "rewards/chosen": -0.0839659720659256, "rewards/margins": 0.03892231732606888, "rewards/rejected": -0.12288829684257507, "step": 5877 }, { "epoch": 3.5857861827055055, "grad_norm": 2.5413405895233154, "learning_rate": 2.270177587262707e-06, "log_odds_chosen": 0.8818342685699463, "log_odds_ratio": -0.606155514717102, "logits/chosen": -0.7131138443946838, "logits/rejected": -0.7714530825614929, "logps/chosen": -0.8971362113952637, "logps/rejected": -1.6158279180526733, "loss": 1.0169, "nll_loss": 1.0354821681976318, "rewards/accuracies": 0.5, "rewards/chosen": -0.08971361815929413, "rewards/margins": 0.07186916470527649, "rewards/rejected": -0.16158278286457062, "step": 5878 }, { "epoch": 3.5863962177825224, "grad_norm": 1.3540927171707153, "learning_rate": 2.2691977954684625e-06, "log_odds_chosen": 2.252507209777832, "log_odds_ratio": -0.4148448407649994, "logits/chosen": -0.7576284408569336, "logits/rejected": -0.9634252786636353, "logps/chosen": -0.6838427782058716, "logps/rejected": -2.4486136436462402, "loss": 1.0833, "nll_loss": 1.1084537506103516, "rewards/accuracies": 0.625, "rewards/chosen": -0.06838427484035492, "rewards/margins": 0.1764770895242691, "rewards/rejected": -0.24486136436462402, "step": 5879 }, { "epoch": 3.5870062528595392, "grad_norm": 1.4511637687683105, "learning_rate": 2.268218003674219e-06, "log_odds_chosen": 1.0589896440505981, "log_odds_ratio": -0.6082285642623901, "logits/chosen": -0.929908275604248, "logits/rejected": -1.0476429462432861, "logps/chosen": -0.9423914551734924, "logps/rejected": -1.8482060432434082, "loss": 0.9489, "nll_loss": 1.0796831846237183, "rewards/accuracies": 0.75, "rewards/chosen": -0.09423913806676865, "rewards/margins": 0.09058146178722382, "rewards/rejected": -0.18482059240341187, "step": 5880 }, { "epoch": 3.5876162879365565, "grad_norm": 2.054839849472046, "learning_rate": 2.2672382118799756e-06, "log_odds_chosen": 2.863062858581543, "log_odds_ratio": -0.2359614223241806, "logits/chosen": -0.9987438321113586, "logits/rejected": -1.0458829402923584, "logps/chosen": -0.7439277172088623, "logps/rejected": -3.0351243019104004, "loss": 0.9232, "nll_loss": 0.955890417098999, "rewards/accuracies": 0.875, "rewards/chosen": -0.07439278066158295, "rewards/margins": 0.22911964356899261, "rewards/rejected": -0.30351242423057556, "step": 5881 }, { "epoch": 3.5882263230135734, "grad_norm": 1.7649879455566406, "learning_rate": 2.2662584200857313e-06, "log_odds_chosen": 1.5932588577270508, "log_odds_ratio": -0.35911789536476135, "logits/chosen": -0.8984501957893372, "logits/rejected": -1.0686352252960205, "logps/chosen": -0.6755180954933167, "logps/rejected": -1.775282382965088, "loss": 0.9919, "nll_loss": 0.9478867053985596, "rewards/accuracies": 0.875, "rewards/chosen": -0.06755180656909943, "rewards/margins": 0.10997643321752548, "rewards/rejected": -0.1775282323360443, "step": 5882 }, { "epoch": 3.58883635809059, "grad_norm": 1.7705926895141602, "learning_rate": 2.265278628291488e-06, "log_odds_chosen": 1.784754991531372, "log_odds_ratio": -0.4529011845588684, "logits/chosen": -0.9446800947189331, "logits/rejected": -1.0376524925231934, "logps/chosen": -1.0685460567474365, "logps/rejected": -2.535202980041504, "loss": 1.2169, "nll_loss": 1.3149151802062988, "rewards/accuracies": 0.625, "rewards/chosen": -0.10685461759567261, "rewards/margins": 0.14666570723056793, "rewards/rejected": -0.25352030992507935, "step": 5883 }, { "epoch": 3.589446393167607, "grad_norm": 1.2272629737854004, "learning_rate": 2.2642988364972444e-06, "log_odds_chosen": 1.2892541885375977, "log_odds_ratio": -0.48691147565841675, "logits/chosen": -0.7452046871185303, "logits/rejected": -0.8658472299575806, "logps/chosen": -0.8276722431182861, "logps/rejected": -1.7273309230804443, "loss": 1.0656, "nll_loss": 0.944622278213501, "rewards/accuracies": 0.75, "rewards/chosen": -0.08276722580194473, "rewards/margins": 0.08996586501598358, "rewards/rejected": -0.1727330982685089, "step": 5884 }, { "epoch": 3.5900564282446243, "grad_norm": 1.7148306369781494, "learning_rate": 2.2633190447030005e-06, "log_odds_chosen": 1.9121922254562378, "log_odds_ratio": -0.2676045894622803, "logits/chosen": -0.8297986388206482, "logits/rejected": -0.8526351451873779, "logps/chosen": -0.6763778924942017, "logps/rejected": -1.8272632360458374, "loss": 1.1324, "nll_loss": 0.9605299234390259, "rewards/accuracies": 0.875, "rewards/chosen": -0.06763778626918793, "rewards/margins": 0.11508853733539581, "rewards/rejected": -0.18272632360458374, "step": 5885 }, { "epoch": 3.590666463321641, "grad_norm": 1.3016268014907837, "learning_rate": 2.2623392529087567e-06, "log_odds_chosen": 1.1303207874298096, "log_odds_ratio": -0.5870875716209412, "logits/chosen": -1.1390140056610107, "logits/rejected": -1.0552353858947754, "logps/chosen": -0.9753910303115845, "logps/rejected": -1.9860987663269043, "loss": 1.1358, "nll_loss": 1.1214566230773926, "rewards/accuracies": 0.5, "rewards/chosen": -0.09753910452127457, "rewards/margins": 0.10107077658176422, "rewards/rejected": -0.19860988855361938, "step": 5886 }, { "epoch": 3.591276498398658, "grad_norm": 3.6985740661621094, "learning_rate": 2.2613594611145132e-06, "log_odds_chosen": 1.638311743736267, "log_odds_ratio": -0.5170255899429321, "logits/chosen": -1.0970581769943237, "logits/rejected": -1.1572203636169434, "logps/chosen": -1.1580579280853271, "logps/rejected": -2.5633769035339355, "loss": 1.0558, "nll_loss": 1.2313340902328491, "rewards/accuracies": 0.75, "rewards/chosen": -0.11580579727888107, "rewards/margins": 0.14053191244602203, "rewards/rejected": -0.2563377022743225, "step": 5887 }, { "epoch": 3.591886533475675, "grad_norm": 1.2375855445861816, "learning_rate": 2.2603796693202693e-06, "log_odds_chosen": 0.3404739499092102, "log_odds_ratio": -0.6191802024841309, "logits/chosen": -0.8540502786636353, "logits/rejected": -0.8592092990875244, "logps/chosen": -0.8105144500732422, "logps/rejected": -1.0651310682296753, "loss": 1.0007, "nll_loss": 0.9795886874198914, "rewards/accuracies": 0.5, "rewards/chosen": -0.08105144649744034, "rewards/margins": 0.02546166256070137, "rewards/rejected": -0.106513112783432, "step": 5888 }, { "epoch": 3.5924965685526917, "grad_norm": 2.4662811756134033, "learning_rate": 2.259399877526026e-06, "log_odds_chosen": 3.5748636722564697, "log_odds_ratio": -0.403864324092865, "logits/chosen": -0.796984076499939, "logits/rejected": -1.0036042928695679, "logps/chosen": -0.8217307925224304, "logps/rejected": -4.057476043701172, "loss": 1.0294, "nll_loss": 0.8090205788612366, "rewards/accuracies": 0.75, "rewards/chosen": -0.08217307925224304, "rewards/margins": 0.3235745131969452, "rewards/rejected": -0.40574756264686584, "step": 5889 }, { "epoch": 3.5931066036297086, "grad_norm": 1.6978498697280884, "learning_rate": 2.258420085731782e-06, "log_odds_chosen": 6.046841144561768, "log_odds_ratio": -0.09230127930641174, "logits/chosen": -0.8880317807197571, "logits/rejected": -1.1604080200195312, "logps/chosen": -0.5773038268089294, "logps/rejected": -5.82423734664917, "loss": 1.0601, "nll_loss": 0.8656066656112671, "rewards/accuracies": 1.0, "rewards/chosen": -0.057730384171009064, "rewards/margins": 0.5246933698654175, "rewards/rejected": -0.582423746585846, "step": 5890 }, { "epoch": 3.5937166387067254, "grad_norm": 5.911113262176514, "learning_rate": 2.257440293937538e-06, "log_odds_chosen": 3.1439108848571777, "log_odds_ratio": -0.23919376730918884, "logits/chosen": -1.0000523328781128, "logits/rejected": -1.0326225757598877, "logps/chosen": -0.6601160764694214, "logps/rejected": -3.1943037509918213, "loss": 0.9567, "nll_loss": 0.9142619371414185, "rewards/accuracies": 0.875, "rewards/chosen": -0.06601160764694214, "rewards/margins": 0.25341880321502686, "rewards/rejected": -0.3194303810596466, "step": 5891 }, { "epoch": 3.5943266737837427, "grad_norm": 8.867149353027344, "learning_rate": 2.2564605021432947e-06, "log_odds_chosen": 0.4662828743457794, "log_odds_ratio": -0.6803224682807922, "logits/chosen": -0.9251279830932617, "logits/rejected": -0.9159085750579834, "logps/chosen": -1.0875303745269775, "logps/rejected": -1.47145676612854, "loss": 1.1462, "nll_loss": 1.143325924873352, "rewards/accuracies": 0.5, "rewards/chosen": -0.10875304043292999, "rewards/margins": 0.03839264065027237, "rewards/rejected": -0.14714567363262177, "step": 5892 }, { "epoch": 3.5949367088607596, "grad_norm": 3.40609073638916, "learning_rate": 2.2554807103490504e-06, "log_odds_chosen": 1.2180957794189453, "log_odds_ratio": -0.4815176725387573, "logits/chosen": -1.0856256484985352, "logits/rejected": -1.0808545351028442, "logps/chosen": -1.000952959060669, "logps/rejected": -2.0611917972564697, "loss": 1.195, "nll_loss": 1.0608091354370117, "rewards/accuracies": 0.5, "rewards/chosen": -0.10009530186653137, "rewards/margins": 0.1060238778591156, "rewards/rejected": -0.20611917972564697, "step": 5893 }, { "epoch": 3.5955467439377764, "grad_norm": 11.239224433898926, "learning_rate": 2.254500918554807e-06, "log_odds_chosen": 2.857421875, "log_odds_ratio": -0.35853293538093567, "logits/chosen": -0.6324220299720764, "logits/rejected": -0.8821828365325928, "logps/chosen": -0.6275067329406738, "logps/rejected": -2.8256568908691406, "loss": 0.9486, "nll_loss": 0.7410706281661987, "rewards/accuracies": 0.75, "rewards/chosen": -0.0627506673336029, "rewards/margins": 0.21981503069400787, "rewards/rejected": -0.282565712928772, "step": 5894 }, { "epoch": 3.5961567790147932, "grad_norm": 1.2782626152038574, "learning_rate": 2.2535211267605635e-06, "log_odds_chosen": 0.6687310934066772, "log_odds_ratio": -0.6576011180877686, "logits/chosen": -0.9094185829162598, "logits/rejected": -1.0053279399871826, "logps/chosen": -0.8567368388175964, "logps/rejected": -1.3771016597747803, "loss": 1.1161, "nll_loss": 1.1545597314834595, "rewards/accuracies": 0.625, "rewards/chosen": -0.08567368239164352, "rewards/margins": 0.05203649029135704, "rewards/rejected": -0.13771018385887146, "step": 5895 }, { "epoch": 3.5967668140918105, "grad_norm": 1.4698289632797241, "learning_rate": 2.252541334966319e-06, "log_odds_chosen": 1.990412712097168, "log_odds_ratio": -0.48652154207229614, "logits/chosen": -0.6738299131393433, "logits/rejected": -0.6938323378562927, "logps/chosen": -0.8324469923973083, "logps/rejected": -2.5329928398132324, "loss": 1.0668, "nll_loss": 1.0927276611328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0832446962594986, "rewards/margins": 0.1700545996427536, "rewards/rejected": -0.2532992959022522, "step": 5896 }, { "epoch": 3.5973768491688274, "grad_norm": 1.4597711563110352, "learning_rate": 2.2515615431720757e-06, "log_odds_chosen": 0.861426591873169, "log_odds_ratio": -0.658203125, "logits/chosen": -1.0781989097595215, "logits/rejected": -1.1557364463806152, "logps/chosen": -1.0275081396102905, "logps/rejected": -1.7195056676864624, "loss": 1.2216, "nll_loss": 1.117222785949707, "rewards/accuracies": 0.625, "rewards/chosen": -0.10275081545114517, "rewards/margins": 0.06919975578784943, "rewards/rejected": -0.171950563788414, "step": 5897 }, { "epoch": 3.5979868842458442, "grad_norm": 3.4495420455932617, "learning_rate": 2.2505817513778323e-06, "log_odds_chosen": 2.598853588104248, "log_odds_ratio": -0.29056981205940247, "logits/chosen": -0.9646036624908447, "logits/rejected": -1.185847520828247, "logps/chosen": -0.6694201231002808, "logps/rejected": -2.748457670211792, "loss": 1.131, "nll_loss": 0.9206010103225708, "rewards/accuracies": 0.75, "rewards/chosen": -0.0669420138001442, "rewards/margins": 0.20790375769138336, "rewards/rejected": -0.27484574913978577, "step": 5898 }, { "epoch": 3.598596919322861, "grad_norm": 1.7820409536361694, "learning_rate": 2.2496019595835884e-06, "log_odds_chosen": 1.36174738407135, "log_odds_ratio": -0.44464462995529175, "logits/chosen": -0.8342437744140625, "logits/rejected": -1.0068374872207642, "logps/chosen": -0.6537423133850098, "logps/rejected": -1.649371862411499, "loss": 1.0952, "nll_loss": 1.2399296760559082, "rewards/accuracies": 0.75, "rewards/chosen": -0.06537424027919769, "rewards/margins": 0.09956294298171997, "rewards/rejected": -0.16493718326091766, "step": 5899 }, { "epoch": 3.599206954399878, "grad_norm": 1.1147032976150513, "learning_rate": 2.2486221677893445e-06, "log_odds_chosen": 2.568009853363037, "log_odds_ratio": -0.24353545904159546, "logits/chosen": -0.8119993209838867, "logits/rejected": -0.943772554397583, "logps/chosen": -0.6132498979568481, "logps/rejected": -2.537062168121338, "loss": 0.8217, "nll_loss": 0.881149172782898, "rewards/accuracies": 1.0, "rewards/chosen": -0.061324987560510635, "rewards/margins": 0.19238126277923584, "rewards/rejected": -0.2537062466144562, "step": 5900 }, { "epoch": 3.5998169894768948, "grad_norm": 2.08834171295166, "learning_rate": 2.247642375995101e-06, "log_odds_chosen": 2.146479606628418, "log_odds_ratio": -0.2593497037887573, "logits/chosen": -0.678918719291687, "logits/rejected": -0.8318811058998108, "logps/chosen": -0.6455127596855164, "logps/rejected": -2.2292165756225586, "loss": 0.8994, "nll_loss": 0.8665307760238647, "rewards/accuracies": 0.875, "rewards/chosen": -0.06455127894878387, "rewards/margins": 0.15837037563323975, "rewards/rejected": -0.22292166948318481, "step": 5901 }, { "epoch": 3.6004270245539116, "grad_norm": 1.4304076433181763, "learning_rate": 2.246662584200857e-06, "log_odds_chosen": 2.079663038253784, "log_odds_ratio": -0.4297875761985779, "logits/chosen": -0.7239981293678284, "logits/rejected": -0.8163785934448242, "logps/chosen": -0.6556222438812256, "logps/rejected": -2.3355307579040527, "loss": 0.855, "nll_loss": 0.8078171014785767, "rewards/accuracies": 0.75, "rewards/chosen": -0.06556221842765808, "rewards/margins": 0.16799083352088928, "rewards/rejected": -0.23355308175086975, "step": 5902 }, { "epoch": 3.601037059630929, "grad_norm": 2.3994181156158447, "learning_rate": 2.2456827924066137e-06, "log_odds_chosen": 2.673642873764038, "log_odds_ratio": -0.41972172260284424, "logits/chosen": -0.5907368063926697, "logits/rejected": -0.6934075951576233, "logps/chosen": -0.6125222444534302, "logps/rejected": -2.7791433334350586, "loss": 1.0467, "nll_loss": 0.7408241629600525, "rewards/accuracies": 0.625, "rewards/chosen": -0.061252228915691376, "rewards/margins": 0.21666209399700165, "rewards/rejected": -0.2779143154621124, "step": 5903 }, { "epoch": 3.6016470947079458, "grad_norm": 1.3501675128936768, "learning_rate": 2.24470300061237e-06, "log_odds_chosen": 1.8296709060668945, "log_odds_ratio": -0.4447724223136902, "logits/chosen": -0.8598431348800659, "logits/rejected": -0.9980877637863159, "logps/chosen": -0.6943714618682861, "logps/rejected": -2.016671657562256, "loss": 1.067, "nll_loss": 0.9582430124282837, "rewards/accuracies": 0.75, "rewards/chosen": -0.06943715363740921, "rewards/margins": 0.1322299838066101, "rewards/rejected": -0.2016671597957611, "step": 5904 }, { "epoch": 3.6022571297849626, "grad_norm": 1.6050680875778198, "learning_rate": 2.243723208818126e-06, "log_odds_chosen": 1.7457116842269897, "log_odds_ratio": -0.5017004013061523, "logits/chosen": -0.9279063940048218, "logits/rejected": -0.9777218103408813, "logps/chosen": -0.7176691293716431, "logps/rejected": -1.9328938722610474, "loss": 1.1049, "nll_loss": 1.0102992057800293, "rewards/accuracies": 0.625, "rewards/chosen": -0.0717669203877449, "rewards/margins": 0.12152247875928879, "rewards/rejected": -0.1932893991470337, "step": 5905 }, { "epoch": 3.6028671648619794, "grad_norm": 1.849629282951355, "learning_rate": 2.2427434170238825e-06, "log_odds_chosen": 2.337552070617676, "log_odds_ratio": -0.24034424126148224, "logits/chosen": -0.8329153060913086, "logits/rejected": -0.8398118019104004, "logps/chosen": -0.6109002828598022, "logps/rejected": -2.28953218460083, "loss": 0.8642, "nll_loss": 0.797328531742096, "rewards/accuracies": 1.0, "rewards/chosen": -0.06109003722667694, "rewards/margins": 0.16786320507526398, "rewards/rejected": -0.22895322740077972, "step": 5906 }, { "epoch": 3.6034771999389967, "grad_norm": 1.9150142669677734, "learning_rate": 2.2417636252296386e-06, "log_odds_chosen": 1.383575677871704, "log_odds_ratio": -0.45271891355514526, "logits/chosen": -0.724122166633606, "logits/rejected": -0.8692367672920227, "logps/chosen": -0.6678415536880493, "logps/rejected": -1.4039802551269531, "loss": 1.206, "nll_loss": 0.9254686832427979, "rewards/accuracies": 0.625, "rewards/chosen": -0.06678415834903717, "rewards/margins": 0.07361388206481934, "rewards/rejected": -0.1403980255126953, "step": 5907 }, { "epoch": 3.6040872350160136, "grad_norm": 8.823319435119629, "learning_rate": 2.2407838334353947e-06, "log_odds_chosen": 1.5891014337539673, "log_odds_ratio": -0.459292471408844, "logits/chosen": -0.8728445172309875, "logits/rejected": -1.171709418296814, "logps/chosen": -0.8864514827728271, "logps/rejected": -2.163297653198242, "loss": 1.151, "nll_loss": 0.9948947429656982, "rewards/accuracies": 0.75, "rewards/chosen": -0.08864515274763107, "rewards/margins": 0.12768462300300598, "rewards/rejected": -0.21632978320121765, "step": 5908 }, { "epoch": 3.6046972700930304, "grad_norm": 1.455538034439087, "learning_rate": 2.2398040416411513e-06, "log_odds_chosen": 3.2198524475097656, "log_odds_ratio": -0.13688541948795319, "logits/chosen": -0.7454224228858948, "logits/rejected": -1.0474833250045776, "logps/chosen": -0.5494553446769714, "logps/rejected": -3.00278377532959, "loss": 0.8866, "nll_loss": 0.9929791688919067, "rewards/accuracies": 1.0, "rewards/chosen": -0.05494553595781326, "rewards/margins": 0.24533286690711975, "rewards/rejected": -0.3002783954143524, "step": 5909 }, { "epoch": 3.6053073051700473, "grad_norm": 1.9451582431793213, "learning_rate": 2.2388242498469074e-06, "log_odds_chosen": 3.0772454738616943, "log_odds_ratio": -0.3045521378517151, "logits/chosen": -0.7452974915504456, "logits/rejected": -1.0552845001220703, "logps/chosen": -0.7570458650588989, "logps/rejected": -3.297341823577881, "loss": 0.9528, "nll_loss": 0.8540856242179871, "rewards/accuracies": 0.75, "rewards/chosen": -0.07570458948612213, "rewards/margins": 0.2540295720100403, "rewards/rejected": -0.3297341763973236, "step": 5910 }, { "epoch": 3.605917340247064, "grad_norm": 1.6349283456802368, "learning_rate": 2.2378444580526635e-06, "log_odds_chosen": 0.5548321008682251, "log_odds_ratio": -0.5772460699081421, "logits/chosen": -0.9507330656051636, "logits/rejected": -0.948259174823761, "logps/chosen": -0.8574953675270081, "logps/rejected": -1.2850291728973389, "loss": 1.0848, "nll_loss": 0.9737560749053955, "rewards/accuracies": 0.375, "rewards/chosen": -0.0857495367527008, "rewards/margins": 0.042753394693136215, "rewards/rejected": -0.12850292026996613, "step": 5911 }, { "epoch": 3.606527375324081, "grad_norm": 2.0691778659820557, "learning_rate": 2.23686466625842e-06, "log_odds_chosen": 2.417724132537842, "log_odds_ratio": -0.4253726005554199, "logits/chosen": -0.8762483596801758, "logits/rejected": -0.9916267991065979, "logps/chosen": -0.972350001335144, "logps/rejected": -2.998622179031372, "loss": 1.1234, "nll_loss": 1.1146292686462402, "rewards/accuracies": 0.75, "rewards/chosen": -0.09723500162363052, "rewards/margins": 0.20262722671031952, "rewards/rejected": -0.29986220598220825, "step": 5912 }, { "epoch": 3.607137410401098, "grad_norm": 1.1649446487426758, "learning_rate": 2.235884874464176e-06, "log_odds_chosen": 3.6703314781188965, "log_odds_ratio": -0.23905308544635773, "logits/chosen": -0.8972105979919434, "logits/rejected": -1.0763870477676392, "logps/chosen": -0.6133965253829956, "logps/rejected": -3.559447765350342, "loss": 1.0173, "nll_loss": 0.8227100372314453, "rewards/accuracies": 0.875, "rewards/chosen": -0.06133965775370598, "rewards/margins": 0.2946051359176636, "rewards/rejected": -0.35594478249549866, "step": 5913 }, { "epoch": 3.607747445478115, "grad_norm": 3.1617369651794434, "learning_rate": 2.2349050826699323e-06, "log_odds_chosen": 0.9538275599479675, "log_odds_ratio": -0.6637975573539734, "logits/chosen": -0.9193437099456787, "logits/rejected": -0.9264008402824402, "logps/chosen": -0.8840188980102539, "logps/rejected": -1.5910340547561646, "loss": 1.1372, "nll_loss": 1.1098421812057495, "rewards/accuracies": 0.375, "rewards/chosen": -0.08840189129114151, "rewards/margins": 0.07070151716470718, "rewards/rejected": -0.1591034084558487, "step": 5914 }, { "epoch": 3.608357480555132, "grad_norm": 1.4604123830795288, "learning_rate": 2.233925290875689e-06, "log_odds_chosen": 1.1713969707489014, "log_odds_ratio": -0.44528305530548096, "logits/chosen": -0.9268007874488831, "logits/rejected": -1.0471538305282593, "logps/chosen": -0.9561316967010498, "logps/rejected": -1.8247454166412354, "loss": 1.1734, "nll_loss": 1.1373634338378906, "rewards/accuracies": 0.875, "rewards/chosen": -0.09561317414045334, "rewards/margins": 0.08686139434576035, "rewards/rejected": -0.18247456848621368, "step": 5915 }, { "epoch": 3.608967515632149, "grad_norm": 3.3316004276275635, "learning_rate": 2.232945499081445e-06, "log_odds_chosen": 2.1634275913238525, "log_odds_ratio": -0.3462265729904175, "logits/chosen": -0.9609093070030212, "logits/rejected": -0.9776684045791626, "logps/chosen": -0.9302636384963989, "logps/rejected": -2.7735917568206787, "loss": 1.031, "nll_loss": 1.1013903617858887, "rewards/accuracies": 0.875, "rewards/chosen": -0.09302636981010437, "rewards/margins": 0.18433283269405365, "rewards/rejected": -0.2773591876029968, "step": 5916 }, { "epoch": 3.6095775507091656, "grad_norm": 1.0516841411590576, "learning_rate": 2.2319657072872015e-06, "log_odds_chosen": 2.268864631652832, "log_odds_ratio": -0.5141199231147766, "logits/chosen": -0.8474218845367432, "logits/rejected": -1.0278359651565552, "logps/chosen": -0.942326545715332, "logps/rejected": -2.7132110595703125, "loss": 1.0103, "nll_loss": 1.038466215133667, "rewards/accuracies": 0.625, "rewards/chosen": -0.09423265606164932, "rewards/margins": 0.1770884394645691, "rewards/rejected": -0.2713211178779602, "step": 5917 }, { "epoch": 3.610187585786183, "grad_norm": 2.042133331298828, "learning_rate": 2.2309859154929577e-06, "log_odds_chosen": 0.4351298213005066, "log_odds_ratio": -0.5679761171340942, "logits/chosen": -0.9942854046821594, "logits/rejected": -0.9804692268371582, "logps/chosen": -0.8923236131668091, "logps/rejected": -1.1111527681350708, "loss": 1.1264, "nll_loss": 1.1146771907806396, "rewards/accuracies": 0.75, "rewards/chosen": -0.08923235535621643, "rewards/margins": 0.02188291773200035, "rewards/rejected": -0.11111528426408768, "step": 5918 }, { "epoch": 3.6107976208631998, "grad_norm": 2.8562166690826416, "learning_rate": 2.2300061236987138e-06, "log_odds_chosen": 3.346940279006958, "log_odds_ratio": -0.30675023794174194, "logits/chosen": -0.7386519908905029, "logits/rejected": -1.0785598754882812, "logps/chosen": -0.8378807306289673, "logps/rejected": -3.7207448482513428, "loss": 1.2176, "nll_loss": 1.2695727348327637, "rewards/accuracies": 0.875, "rewards/chosen": -0.08378806710243225, "rewards/margins": 0.288286417722702, "rewards/rejected": -0.37207451462745667, "step": 5919 }, { "epoch": 3.6114076559402166, "grad_norm": 8.274909019470215, "learning_rate": 2.2290263319044703e-06, "log_odds_chosen": 1.0184495449066162, "log_odds_ratio": -0.5256000757217407, "logits/chosen": -0.872275710105896, "logits/rejected": -0.8282980918884277, "logps/chosen": -1.0361301898956299, "logps/rejected": -1.9546480178833008, "loss": 1.2308, "nll_loss": 1.2287968397140503, "rewards/accuracies": 0.625, "rewards/chosen": -0.10361302644014359, "rewards/margins": 0.09185178577899933, "rewards/rejected": -0.19546480476856232, "step": 5920 }, { "epoch": 3.6120176910172335, "grad_norm": 1.316064476966858, "learning_rate": 2.2280465401102265e-06, "log_odds_chosen": 1.9519938230514526, "log_odds_ratio": -0.34544095396995544, "logits/chosen": -0.7306277751922607, "logits/rejected": -0.9218084216117859, "logps/chosen": -0.8505634069442749, "logps/rejected": -2.4097726345062256, "loss": 1.1095, "nll_loss": 1.0853514671325684, "rewards/accuracies": 0.75, "rewards/chosen": -0.08505633473396301, "rewards/margins": 0.15592092275619507, "rewards/rejected": -0.24097725749015808, "step": 5921 }, { "epoch": 3.6126277260942503, "grad_norm": 10.37867259979248, "learning_rate": 2.2270667483159826e-06, "log_odds_chosen": 1.0526552200317383, "log_odds_ratio": -0.76997971534729, "logits/chosen": -1.0712454319000244, "logits/rejected": -0.9790011644363403, "logps/chosen": -1.146188497543335, "logps/rejected": -2.1122231483459473, "loss": 1.1001, "nll_loss": 1.3317465782165527, "rewards/accuracies": 0.375, "rewards/chosen": -0.11461885273456573, "rewards/margins": 0.09660347551107407, "rewards/rejected": -0.211222305893898, "step": 5922 }, { "epoch": 3.613237761171267, "grad_norm": 4.144874095916748, "learning_rate": 2.226086956521739e-06, "log_odds_chosen": 1.8144944906234741, "log_odds_ratio": -0.47261542081832886, "logits/chosen": -0.7900594472885132, "logits/rejected": -0.8543762564659119, "logps/chosen": -0.7883983850479126, "logps/rejected": -2.251452684402466, "loss": 0.9605, "nll_loss": 0.925952136516571, "rewards/accuracies": 0.625, "rewards/chosen": -0.07883983850479126, "rewards/margins": 0.14630545675754547, "rewards/rejected": -0.22514531016349792, "step": 5923 }, { "epoch": 3.613847796248284, "grad_norm": 2.3822433948516846, "learning_rate": 2.2251071647274957e-06, "log_odds_chosen": 2.0655627250671387, "log_odds_ratio": -0.4157233238220215, "logits/chosen": -0.7930173277854919, "logits/rejected": -0.967381477355957, "logps/chosen": -0.8267627954483032, "logps/rejected": -2.379750967025757, "loss": 1.0249, "nll_loss": 1.1412014961242676, "rewards/accuracies": 0.75, "rewards/chosen": -0.08267629146575928, "rewards/margins": 0.15529879927635193, "rewards/rejected": -0.2379750907421112, "step": 5924 }, { "epoch": 3.6144578313253013, "grad_norm": 1.4234123229980469, "learning_rate": 2.2241273729332514e-06, "log_odds_chosen": 3.0058491230010986, "log_odds_ratio": -0.13260410726070404, "logits/chosen": -0.8019647002220154, "logits/rejected": -0.8954802751541138, "logps/chosen": -0.5640184879302979, "logps/rejected": -2.633606195449829, "loss": 0.912, "nll_loss": 0.9399752616882324, "rewards/accuracies": 1.0, "rewards/chosen": -0.056401848793029785, "rewards/margins": 0.2069588005542755, "rewards/rejected": -0.2633606493473053, "step": 5925 }, { "epoch": 3.615067866402318, "grad_norm": 1.5741466283798218, "learning_rate": 2.223147581139008e-06, "log_odds_chosen": 1.36299729347229, "log_odds_ratio": -0.5333306789398193, "logits/chosen": -0.7577078342437744, "logits/rejected": -0.97584068775177, "logps/chosen": -0.9039797186851501, "logps/rejected": -1.9225609302520752, "loss": 1.1421, "nll_loss": 1.4668946266174316, "rewards/accuracies": 0.5, "rewards/chosen": -0.09039797633886337, "rewards/margins": 0.10185811668634415, "rewards/rejected": -0.19225607812404633, "step": 5926 }, { "epoch": 3.615677901479335, "grad_norm": 1.9863287210464478, "learning_rate": 2.2221677893447645e-06, "log_odds_chosen": 2.809213161468506, "log_odds_ratio": -0.5139245390892029, "logits/chosen": -0.6695857048034668, "logits/rejected": -0.8280805349349976, "logps/chosen": -0.7591515183448792, "logps/rejected": -3.1667985916137695, "loss": 1.2141, "nll_loss": 0.953027069568634, "rewards/accuracies": 0.625, "rewards/chosen": -0.0759151503443718, "rewards/margins": 0.24076469242572784, "rewards/rejected": -0.31667983531951904, "step": 5927 }, { "epoch": 3.616287936556352, "grad_norm": 8.659728050231934, "learning_rate": 2.22118799755052e-06, "log_odds_chosen": 2.0127625465393066, "log_odds_ratio": -0.5822925567626953, "logits/chosen": -0.811784565448761, "logits/rejected": -0.8552203178405762, "logps/chosen": -0.9012919068336487, "logps/rejected": -2.695467710494995, "loss": 1.0231, "nll_loss": 1.1804180145263672, "rewards/accuracies": 0.5, "rewards/chosen": -0.09012918919324875, "rewards/margins": 0.17941758036613464, "rewards/rejected": -0.2695467472076416, "step": 5928 }, { "epoch": 3.616897971633369, "grad_norm": 1.097532033920288, "learning_rate": 2.2202082057562767e-06, "log_odds_chosen": 1.3964331150054932, "log_odds_ratio": -0.34759509563446045, "logits/chosen": -0.9095524549484253, "logits/rejected": -0.9577370882034302, "logps/chosen": -0.7098751068115234, "logps/rejected": -1.6047402620315552, "loss": 1.0022, "nll_loss": 1.1119911670684814, "rewards/accuracies": 0.875, "rewards/chosen": -0.0709875151515007, "rewards/margins": 0.0894865095615387, "rewards/rejected": -0.16047403216362, "step": 5929 }, { "epoch": 3.617508006710386, "grad_norm": 2.251648187637329, "learning_rate": 2.219228413962033e-06, "log_odds_chosen": 3.0416200160980225, "log_odds_ratio": -0.32479041814804077, "logits/chosen": -0.7373819351196289, "logits/rejected": -0.8390540480613708, "logps/chosen": -0.6619753241539001, "logps/rejected": -3.0998494625091553, "loss": 1.0075, "nll_loss": 0.8392966389656067, "rewards/accuracies": 0.75, "rewards/chosen": -0.06619752943515778, "rewards/margins": 0.24378742277622223, "rewards/rejected": -0.30998495221138, "step": 5930 }, { "epoch": 3.618118041787403, "grad_norm": 1.7109907865524292, "learning_rate": 2.2182486221677894e-06, "log_odds_chosen": 0.8619248270988464, "log_odds_ratio": -0.46617037057876587, "logits/chosen": -0.7057691216468811, "logits/rejected": -0.6739813685417175, "logps/chosen": -0.8711727857589722, "logps/rejected": -1.4782259464263916, "loss": 1.1234, "nll_loss": 1.1285834312438965, "rewards/accuracies": 0.75, "rewards/chosen": -0.0871172845363617, "rewards/margins": 0.06070532277226448, "rewards/rejected": -0.14782260358333588, "step": 5931 }, { "epoch": 3.6187280768644197, "grad_norm": 1.641077995300293, "learning_rate": 2.2172688303735455e-06, "log_odds_chosen": 0.5225983262062073, "log_odds_ratio": -0.6628845930099487, "logits/chosen": -0.7005728483200073, "logits/rejected": -0.7535473108291626, "logps/chosen": -0.7030416131019592, "logps/rejected": -1.0906264781951904, "loss": 0.9976, "nll_loss": 0.973487913608551, "rewards/accuracies": 0.5, "rewards/chosen": -0.07030415534973145, "rewards/margins": 0.03875848650932312, "rewards/rejected": -0.10906264930963516, "step": 5932 }, { "epoch": 3.6193381119414365, "grad_norm": 1.1423735618591309, "learning_rate": 2.2162890385793016e-06, "log_odds_chosen": 1.3309786319732666, "log_odds_ratio": -0.3013814091682434, "logits/chosen": -0.8012716770172119, "logits/rejected": -0.6511685848236084, "logps/chosen": -0.7995761632919312, "logps/rejected": -1.766452670097351, "loss": 1.1391, "nll_loss": 0.909555971622467, "rewards/accuracies": 1.0, "rewards/chosen": -0.07995761185884476, "rewards/margins": 0.09668765217065811, "rewards/rejected": -0.17664526402950287, "step": 5933 }, { "epoch": 3.6199481470184534, "grad_norm": 5.70077657699585, "learning_rate": 2.215309246785058e-06, "log_odds_chosen": 2.199258327484131, "log_odds_ratio": -0.383583664894104, "logits/chosen": -0.9134143590927124, "logits/rejected": -1.0531182289123535, "logps/chosen": -0.6830394268035889, "logps/rejected": -2.250535011291504, "loss": 1.1575, "nll_loss": 1.1757408380508423, "rewards/accuracies": 0.625, "rewards/chosen": -0.06830394268035889, "rewards/margins": 0.15674954652786255, "rewards/rejected": -0.22505348920822144, "step": 5934 }, { "epoch": 3.6205581820954706, "grad_norm": 2.282392978668213, "learning_rate": 2.2143294549908143e-06, "log_odds_chosen": 3.1140992641448975, "log_odds_ratio": -0.37477022409439087, "logits/chosen": -0.7119008898735046, "logits/rejected": -0.9294192790985107, "logps/chosen": -0.7247024774551392, "logps/rejected": -3.248626470565796, "loss": 1.0527, "nll_loss": 0.8823168873786926, "rewards/accuracies": 0.75, "rewards/chosen": -0.07247024774551392, "rewards/margins": 0.25239241123199463, "rewards/rejected": -0.32486262917518616, "step": 5935 }, { "epoch": 3.6211682171724875, "grad_norm": 3.2842860221862793, "learning_rate": 2.2133496631965704e-06, "log_odds_chosen": 2.165994644165039, "log_odds_ratio": -0.433928906917572, "logits/chosen": -0.721544623374939, "logits/rejected": -0.8305257558822632, "logps/chosen": -0.8810123205184937, "logps/rejected": -2.7542128562927246, "loss": 1.2314, "nll_loss": 1.2714407444000244, "rewards/accuracies": 0.625, "rewards/chosen": -0.08810123056173325, "rewards/margins": 0.1873200535774231, "rewards/rejected": -0.27542126178741455, "step": 5936 }, { "epoch": 3.6217782522495043, "grad_norm": 2.8893463611602783, "learning_rate": 2.212369871402327e-06, "log_odds_chosen": 0.8102142214775085, "log_odds_ratio": -0.5044572353363037, "logits/chosen": -0.7612373232841492, "logits/rejected": -0.8090846538543701, "logps/chosen": -1.049484372138977, "logps/rejected": -1.6924690008163452, "loss": 1.1822, "nll_loss": 1.2134263515472412, "rewards/accuracies": 0.625, "rewards/chosen": -0.10494844615459442, "rewards/margins": 0.06429845839738846, "rewards/rejected": -0.16924689710140228, "step": 5937 }, { "epoch": 3.622388287326521, "grad_norm": 2.2017486095428467, "learning_rate": 2.2113900796080835e-06, "log_odds_chosen": 2.3885223865509033, "log_odds_ratio": -0.41580089926719666, "logits/chosen": -0.8381713628768921, "logits/rejected": -0.8598988056182861, "logps/chosen": -0.7592946290969849, "logps/rejected": -2.8280467987060547, "loss": 0.9662, "nll_loss": 1.0247361660003662, "rewards/accuracies": 1.0, "rewards/chosen": -0.07592946290969849, "rewards/margins": 0.20687523484230042, "rewards/rejected": -0.2828046977519989, "step": 5938 }, { "epoch": 3.6229983224035385, "grad_norm": 1.5704208612442017, "learning_rate": 2.210410287813839e-06, "log_odds_chosen": 2.6009035110473633, "log_odds_ratio": -0.24700312316417694, "logits/chosen": -0.9348900318145752, "logits/rejected": -0.9737058877944946, "logps/chosen": -0.744364321231842, "logps/rejected": -2.8370511531829834, "loss": 0.9676, "nll_loss": 0.8939055800437927, "rewards/accuracies": 0.875, "rewards/chosen": -0.07443643361330032, "rewards/margins": 0.209268718957901, "rewards/rejected": -0.2837051451206207, "step": 5939 }, { "epoch": 3.6236083574805553, "grad_norm": 1.4169639348983765, "learning_rate": 2.2094304960195957e-06, "log_odds_chosen": 0.7216355800628662, "log_odds_ratio": -0.639561653137207, "logits/chosen": -0.8043263554573059, "logits/rejected": -0.8442880511283875, "logps/chosen": -1.0800836086273193, "logps/rejected": -1.5986741781234741, "loss": 1.1963, "nll_loss": 1.2318586111068726, "rewards/accuracies": 0.375, "rewards/chosen": -0.10800835490226746, "rewards/margins": 0.051859062165021896, "rewards/rejected": -0.15986743569374084, "step": 5940 }, { "epoch": 3.624218392557572, "grad_norm": 1.6975274085998535, "learning_rate": 2.2084507042253523e-06, "log_odds_chosen": 1.9553449153900146, "log_odds_ratio": -0.3529720902442932, "logits/chosen": -0.7956088185310364, "logits/rejected": -0.7754155397415161, "logps/chosen": -0.558068573474884, "logps/rejected": -1.9463956356048584, "loss": 1.0352, "nll_loss": 0.9087926149368286, "rewards/accuracies": 1.0, "rewards/chosen": -0.055806852877140045, "rewards/margins": 0.1388327032327652, "rewards/rejected": -0.19463956356048584, "step": 5941 }, { "epoch": 3.624828427634589, "grad_norm": 1.2519744634628296, "learning_rate": 2.207470912431108e-06, "log_odds_chosen": 2.0076591968536377, "log_odds_ratio": -0.428186297416687, "logits/chosen": -0.8430953621864319, "logits/rejected": -0.9446743726730347, "logps/chosen": -0.6485173106193542, "logps/rejected": -2.0400562286376953, "loss": 0.9066, "nll_loss": 0.9528710246086121, "rewards/accuracies": 0.625, "rewards/chosen": -0.06485173106193542, "rewards/margins": 0.13915389776229858, "rewards/rejected": -0.20400561392307281, "step": 5942 }, { "epoch": 3.625438462711606, "grad_norm": 7.548275947570801, "learning_rate": 2.2064911206368645e-06, "log_odds_chosen": 1.347034215927124, "log_odds_ratio": -0.405908465385437, "logits/chosen": -0.9756879210472107, "logits/rejected": -0.9873391389846802, "logps/chosen": -0.9449482560157776, "logps/rejected": -1.9528874158859253, "loss": 1.0286, "nll_loss": 1.0648071765899658, "rewards/accuracies": 0.875, "rewards/chosen": -0.09449482709169388, "rewards/margins": 0.10079392045736313, "rewards/rejected": -0.195288747549057, "step": 5943 }, { "epoch": 3.6260484977886227, "grad_norm": 1.5630011558532715, "learning_rate": 2.205511328842621e-06, "log_odds_chosen": 2.3509578704833984, "log_odds_ratio": -0.3249135911464691, "logits/chosen": -0.9251766204833984, "logits/rejected": -0.9737814664840698, "logps/chosen": -0.5833300352096558, "logps/rejected": -2.0445940494537354, "loss": 0.9563, "nll_loss": 0.9395734071731567, "rewards/accuracies": 0.75, "rewards/chosen": -0.058333005756139755, "rewards/margins": 0.1461264044046402, "rewards/rejected": -0.20445939898490906, "step": 5944 }, { "epoch": 3.6266585328656396, "grad_norm": 2.4253437519073486, "learning_rate": 2.204531537048377e-06, "log_odds_chosen": 1.8716347217559814, "log_odds_ratio": -0.24929164350032806, "logits/chosen": -0.784874439239502, "logits/rejected": -0.8490425944328308, "logps/chosen": -0.7042571306228638, "logps/rejected": -1.992864966392517, "loss": 1.1623, "nll_loss": 0.9814913868904114, "rewards/accuracies": 1.0, "rewards/chosen": -0.07042571902275085, "rewards/margins": 0.12886080145835876, "rewards/rejected": -0.19928652048110962, "step": 5945 }, { "epoch": 3.627268567942657, "grad_norm": 2.5680949687957764, "learning_rate": 2.2035517452541333e-06, "log_odds_chosen": 0.9518733024597168, "log_odds_ratio": -0.41324329376220703, "logits/chosen": -0.9957955479621887, "logits/rejected": -1.1491671800613403, "logps/chosen": -0.8454172611236572, "logps/rejected": -1.4910902976989746, "loss": 1.1719, "nll_loss": 1.2525744438171387, "rewards/accuracies": 0.75, "rewards/chosen": -0.08454172313213348, "rewards/margins": 0.06456731259822845, "rewards/rejected": -0.14910903573036194, "step": 5946 }, { "epoch": 3.6278786030196737, "grad_norm": 1.3356424570083618, "learning_rate": 2.20257195345989e-06, "log_odds_chosen": 1.5628066062927246, "log_odds_ratio": -0.642210066318512, "logits/chosen": -0.8964987993240356, "logits/rejected": -1.0475218296051025, "logps/chosen": -0.9269879460334778, "logps/rejected": -2.4010069370269775, "loss": 1.0118, "nll_loss": 1.1655819416046143, "rewards/accuracies": 0.375, "rewards/chosen": -0.09269880503416061, "rewards/margins": 0.14740188419818878, "rewards/rejected": -0.2401006817817688, "step": 5947 }, { "epoch": 3.6284886380966905, "grad_norm": 1.229667067527771, "learning_rate": 2.201592161665646e-06, "log_odds_chosen": 1.755800724029541, "log_odds_ratio": -0.5081101059913635, "logits/chosen": -0.8608840703964233, "logits/rejected": -0.9300767183303833, "logps/chosen": -0.8920930624008179, "logps/rejected": -2.26466703414917, "loss": 0.986, "nll_loss": 1.0650343894958496, "rewards/accuracies": 0.75, "rewards/chosen": -0.08920930325984955, "rewards/margins": 0.137257382273674, "rewards/rejected": -0.22646668553352356, "step": 5948 }, { "epoch": 3.6290986731737074, "grad_norm": 3.438908815383911, "learning_rate": 2.200612369871402e-06, "log_odds_chosen": 1.7645565271377563, "log_odds_ratio": -0.3373894989490509, "logits/chosen": -0.7737300395965576, "logits/rejected": -0.769931435585022, "logps/chosen": -0.6439878344535828, "logps/rejected": -1.9359750747680664, "loss": 1.0028, "nll_loss": 0.9064639806747437, "rewards/accuracies": 0.875, "rewards/chosen": -0.06439878046512604, "rewards/margins": 0.12919872999191284, "rewards/rejected": -0.19359751045703888, "step": 5949 }, { "epoch": 3.6297087082507247, "grad_norm": 6.1875481605529785, "learning_rate": 2.1996325780771582e-06, "log_odds_chosen": 1.1123046875, "log_odds_ratio": -0.5893120765686035, "logits/chosen": -0.752392053604126, "logits/rejected": -0.8468573093414307, "logps/chosen": -0.7813125848770142, "logps/rejected": -1.7682468891143799, "loss": 1.0482, "nll_loss": 1.1432785987854004, "rewards/accuracies": 0.75, "rewards/chosen": -0.07813125848770142, "rewards/margins": 0.09869343042373657, "rewards/rejected": -0.176824688911438, "step": 5950 }, { "epoch": 3.6303187433277415, "grad_norm": 5.117948532104492, "learning_rate": 2.1986527862829148e-06, "log_odds_chosen": 2.2607533931732178, "log_odds_ratio": -0.41212865710258484, "logits/chosen": -0.9420119524002075, "logits/rejected": -1.09413743019104, "logps/chosen": -0.8744869232177734, "logps/rejected": -2.6274499893188477, "loss": 1.1829, "nll_loss": 1.1108782291412354, "rewards/accuracies": 0.625, "rewards/chosen": -0.08744869381189346, "rewards/margins": 0.17529630661010742, "rewards/rejected": -0.2627449929714203, "step": 5951 }, { "epoch": 3.6309287784047584, "grad_norm": 1.2165300846099854, "learning_rate": 2.1976729944886713e-06, "log_odds_chosen": 2.6937146186828613, "log_odds_ratio": -0.3636617064476013, "logits/chosen": -0.8175590634346008, "logits/rejected": -0.8925623893737793, "logps/chosen": -0.6889027953147888, "logps/rejected": -2.6903562545776367, "loss": 1.0088, "nll_loss": 0.8309875726699829, "rewards/accuracies": 0.75, "rewards/chosen": -0.0688902884721756, "rewards/margins": 0.20014534890651703, "rewards/rejected": -0.2690356373786926, "step": 5952 }, { "epoch": 3.631538813481775, "grad_norm": 1.3455631732940674, "learning_rate": 2.196693202694427e-06, "log_odds_chosen": 2.351628303527832, "log_odds_ratio": -0.35203680396080017, "logits/chosen": -0.5352567434310913, "logits/rejected": -0.8073763251304626, "logps/chosen": -0.5101887583732605, "logps/rejected": -2.1699376106262207, "loss": 0.7745, "nll_loss": 0.555757462978363, "rewards/accuracies": 0.625, "rewards/chosen": -0.05101887881755829, "rewards/margins": 0.16597488522529602, "rewards/rejected": -0.2169937640428543, "step": 5953 }, { "epoch": 3.632148848558792, "grad_norm": 8.919976234436035, "learning_rate": 2.1957134109001836e-06, "log_odds_chosen": 1.578541874885559, "log_odds_ratio": -0.39770591259002686, "logits/chosen": -1.0606391429901123, "logits/rejected": -0.9940726161003113, "logps/chosen": -0.7769142985343933, "logps/rejected": -1.9859873056411743, "loss": 0.9707, "nll_loss": 1.0945852994918823, "rewards/accuracies": 0.875, "rewards/chosen": -0.07769143581390381, "rewards/margins": 0.12090729176998138, "rewards/rejected": -0.1985987424850464, "step": 5954 }, { "epoch": 3.632758883635809, "grad_norm": 2.029770612716675, "learning_rate": 2.19473361910594e-06, "log_odds_chosen": 3.6548421382904053, "log_odds_ratio": -0.11693452298641205, "logits/chosen": -0.6831028461456299, "logits/rejected": -0.7526525259017944, "logps/chosen": -0.6710587739944458, "logps/rejected": -3.6255321502685547, "loss": 1.1865, "nll_loss": 0.98441082239151, "rewards/accuracies": 1.0, "rewards/chosen": -0.06710587441921234, "rewards/margins": 0.29544731974601746, "rewards/rejected": -0.3625532388687134, "step": 5955 }, { "epoch": 3.6333689187128257, "grad_norm": 6.05206823348999, "learning_rate": 2.193753827311696e-06, "log_odds_chosen": 2.4867303371429443, "log_odds_ratio": -0.4900006055831909, "logits/chosen": -0.739030659198761, "logits/rejected": -0.909127950668335, "logps/chosen": -1.0472910404205322, "logps/rejected": -3.2273848056793213, "loss": 1.116, "nll_loss": 1.2757443189620972, "rewards/accuracies": 0.625, "rewards/chosen": -0.10472910106182098, "rewards/margins": 0.21800941228866577, "rewards/rejected": -0.32273849844932556, "step": 5956 }, { "epoch": 3.633978953789843, "grad_norm": 8.246133804321289, "learning_rate": 2.1927740355174524e-06, "log_odds_chosen": 2.4380416870117188, "log_odds_ratio": -0.5010545253753662, "logits/chosen": -0.8719792366027832, "logits/rejected": -0.7297676801681519, "logps/chosen": -0.9956893920898438, "logps/rejected": -3.1699059009552, "loss": 0.9259, "nll_loss": 1.171912670135498, "rewards/accuracies": 0.625, "rewards/chosen": -0.0995689406991005, "rewards/margins": 0.21742168068885803, "rewards/rejected": -0.31699058413505554, "step": 5957 }, { "epoch": 3.63458898886686, "grad_norm": 1.52178955078125, "learning_rate": 2.191794243723209e-06, "log_odds_chosen": 2.603036403656006, "log_odds_ratio": -0.4832684397697449, "logits/chosen": -0.828831136226654, "logits/rejected": -0.895943284034729, "logps/chosen": -0.716715395450592, "logps/rejected": -2.996854782104492, "loss": 0.8748, "nll_loss": 0.893121600151062, "rewards/accuracies": 0.625, "rewards/chosen": -0.07167153060436249, "rewards/margins": 0.22801396250724792, "rewards/rejected": -0.2996855080127716, "step": 5958 }, { "epoch": 3.6351990239438767, "grad_norm": 1.8854196071624756, "learning_rate": 2.190814451928965e-06, "log_odds_chosen": 2.6171488761901855, "log_odds_ratio": -0.292614221572876, "logits/chosen": -0.8127951622009277, "logits/rejected": -0.9876161813735962, "logps/chosen": -0.8297067880630493, "logps/rejected": -2.8771800994873047, "loss": 0.9522, "nll_loss": 1.033902883529663, "rewards/accuracies": 0.875, "rewards/chosen": -0.08297067880630493, "rewards/margins": 0.20474731922149658, "rewards/rejected": -0.2877179980278015, "step": 5959 }, { "epoch": 3.6358090590208936, "grad_norm": 5.26861047744751, "learning_rate": 2.189834660134721e-06, "log_odds_chosen": 1.9073834419250488, "log_odds_ratio": -0.43278858065605164, "logits/chosen": -0.8322490453720093, "logits/rejected": -0.7372448444366455, "logps/chosen": -0.7175943851470947, "logps/rejected": -2.27620530128479, "loss": 0.9641, "nll_loss": 0.9767762422561646, "rewards/accuracies": 0.625, "rewards/chosen": -0.071759432554245, "rewards/margins": 0.15586109459400177, "rewards/rejected": -0.22762052714824677, "step": 5960 }, { "epoch": 3.636419094097911, "grad_norm": 8.435482025146484, "learning_rate": 2.1888548683404777e-06, "log_odds_chosen": 1.3730608224868774, "log_odds_ratio": -0.5300707221031189, "logits/chosen": -0.9350649118423462, "logits/rejected": -0.9572505354881287, "logps/chosen": -0.9463306665420532, "logps/rejected": -2.09698748588562, "loss": 0.9773, "nll_loss": 1.023007869720459, "rewards/accuracies": 0.625, "rewards/chosen": -0.0946330726146698, "rewards/margins": 0.11506567895412445, "rewards/rejected": -0.20969875156879425, "step": 5961 }, { "epoch": 3.6370291291749277, "grad_norm": 2.250175714492798, "learning_rate": 2.187875076546234e-06, "log_odds_chosen": 1.9652440547943115, "log_odds_ratio": -0.3798326551914215, "logits/chosen": -0.6954689025878906, "logits/rejected": -0.7529131770133972, "logps/chosen": -0.7931501865386963, "logps/rejected": -2.359779119491577, "loss": 0.9587, "nll_loss": 0.8450356125831604, "rewards/accuracies": 0.625, "rewards/chosen": -0.07931501418352127, "rewards/margins": 0.15666288137435913, "rewards/rejected": -0.235977903008461, "step": 5962 }, { "epoch": 3.6376391642519446, "grad_norm": 3.9071218967437744, "learning_rate": 2.18689528475199e-06, "log_odds_chosen": 1.7601685523986816, "log_odds_ratio": -0.45424607396125793, "logits/chosen": -0.6711241006851196, "logits/rejected": -0.848616361618042, "logps/chosen": -0.8674618005752563, "logps/rejected": -2.241321086883545, "loss": 1.0306, "nll_loss": 0.9147865772247314, "rewards/accuracies": 0.75, "rewards/chosen": -0.08674617111682892, "rewards/margins": 0.13738593459129333, "rewards/rejected": -0.22413212060928345, "step": 5963 }, { "epoch": 3.6382491993289614, "grad_norm": 1.515028953552246, "learning_rate": 2.1859154929577465e-06, "log_odds_chosen": 0.7879225015640259, "log_odds_ratio": -0.5221422910690308, "logits/chosen": -0.7711701393127441, "logits/rejected": -0.7955346703529358, "logps/chosen": -0.8744603395462036, "logps/rejected": -1.4502118825912476, "loss": 1.0833, "nll_loss": 0.9467804431915283, "rewards/accuracies": 0.625, "rewards/chosen": -0.08744603395462036, "rewards/margins": 0.05757516622543335, "rewards/rejected": -0.1450212001800537, "step": 5964 }, { "epoch": 3.6388592344059782, "grad_norm": 1.448169231414795, "learning_rate": 2.1849357011635026e-06, "log_odds_chosen": 2.1500847339630127, "log_odds_ratio": -0.552208662033081, "logits/chosen": -0.8347549438476562, "logits/rejected": -0.9163171648979187, "logps/chosen": -0.84079909324646, "logps/rejected": -2.635343313217163, "loss": 0.9997, "nll_loss": 0.9829280376434326, "rewards/accuracies": 0.75, "rewards/chosen": -0.08407990634441376, "rewards/margins": 0.17945441603660583, "rewards/rejected": -0.2635343372821808, "step": 5965 }, { "epoch": 3.639469269482995, "grad_norm": 1.3606135845184326, "learning_rate": 2.183955909369259e-06, "log_odds_chosen": 0.8736175894737244, "log_odds_ratio": -0.584973931312561, "logits/chosen": -0.9302432537078857, "logits/rejected": -0.9237412214279175, "logps/chosen": -0.8292779922485352, "logps/rejected": -1.4433053731918335, "loss": 1.0455, "nll_loss": 0.8655197024345398, "rewards/accuracies": 0.625, "rewards/chosen": -0.08292780071496964, "rewards/margins": 0.061402738094329834, "rewards/rejected": -0.14433053135871887, "step": 5966 }, { "epoch": 3.640079304560012, "grad_norm": 1.5178377628326416, "learning_rate": 2.1829761175750153e-06, "log_odds_chosen": 1.8080247640609741, "log_odds_ratio": -0.29237592220306396, "logits/chosen": -0.7884286046028137, "logits/rejected": -0.9742578268051147, "logps/chosen": -0.7310785055160522, "logps/rejected": -1.9458580017089844, "loss": 1.0107, "nll_loss": 1.0321917533874512, "rewards/accuracies": 0.875, "rewards/chosen": -0.07310785353183746, "rewards/margins": 0.12147794663906097, "rewards/rejected": -0.19458580017089844, "step": 5967 }, { "epoch": 3.6406893396370292, "grad_norm": 1.3854682445526123, "learning_rate": 2.1819963257807714e-06, "log_odds_chosen": 1.0565035343170166, "log_odds_ratio": -0.6921664476394653, "logits/chosen": -0.9349265098571777, "logits/rejected": -0.9501282572746277, "logps/chosen": -1.0181093215942383, "logps/rejected": -1.852480411529541, "loss": 0.8903, "nll_loss": 1.1745679378509521, "rewards/accuracies": 0.625, "rewards/chosen": -0.10181093961000443, "rewards/margins": 0.08343709260225296, "rewards/rejected": -0.18524804711341858, "step": 5968 }, { "epoch": 3.641299374714046, "grad_norm": 1.9518930912017822, "learning_rate": 2.181016533986528e-06, "log_odds_chosen": 1.6261749267578125, "log_odds_ratio": -0.5772935748100281, "logits/chosen": -0.8731027841567993, "logits/rejected": -0.8569111824035645, "logps/chosen": -0.8416571617126465, "logps/rejected": -2.0671989917755127, "loss": 1.1611, "nll_loss": 1.245093584060669, "rewards/accuracies": 0.625, "rewards/chosen": -0.08416572213172913, "rewards/margins": 0.12255417555570602, "rewards/rejected": -0.20671987533569336, "step": 5969 }, { "epoch": 3.641909409791063, "grad_norm": 1.548073649406433, "learning_rate": 2.1800367421922836e-06, "log_odds_chosen": 2.4692869186401367, "log_odds_ratio": -0.5371041297912598, "logits/chosen": -0.8708124160766602, "logits/rejected": -0.9736380577087402, "logps/chosen": -0.8325342535972595, "logps/rejected": -2.6150009632110596, "loss": 1.0859, "nll_loss": 1.2473325729370117, "rewards/accuracies": 0.625, "rewards/chosen": -0.0832534208893776, "rewards/margins": 0.17824667692184448, "rewards/rejected": -0.26150012016296387, "step": 5970 }, { "epoch": 3.6425194448680798, "grad_norm": 1.5700215101242065, "learning_rate": 2.17905695039804e-06, "log_odds_chosen": 0.9746264815330505, "log_odds_ratio": -0.6630768179893494, "logits/chosen": -0.7123632431030273, "logits/rejected": -0.8091052770614624, "logps/chosen": -0.9610675573348999, "logps/rejected": -1.7386285066604614, "loss": 1.0971, "nll_loss": 1.042590618133545, "rewards/accuracies": 0.5, "rewards/chosen": -0.09610675275325775, "rewards/margins": 0.07775609940290451, "rewards/rejected": -0.17386284470558167, "step": 5971 }, { "epoch": 3.643129479945097, "grad_norm": 3.406118392944336, "learning_rate": 2.1780771586037967e-06, "log_odds_chosen": 1.90818190574646, "log_odds_ratio": -0.33163875341415405, "logits/chosen": -0.9177893996238708, "logits/rejected": -0.8658044338226318, "logps/chosen": -0.8176364898681641, "logps/rejected": -2.3660664558410645, "loss": 1.063, "nll_loss": 0.9611104130744934, "rewards/accuracies": 0.875, "rewards/chosen": -0.08176364749670029, "rewards/margins": 0.15484297275543213, "rewards/rejected": -0.236606627702713, "step": 5972 }, { "epoch": 3.643739515022114, "grad_norm": 1.257519006729126, "learning_rate": 2.177097366809553e-06, "log_odds_chosen": 2.873034954071045, "log_odds_ratio": -0.30185022950172424, "logits/chosen": -0.7509527206420898, "logits/rejected": -0.8846315741539001, "logps/chosen": -0.9364356994628906, "logps/rejected": -3.373244524002075, "loss": 1.0093, "nll_loss": 1.0273170471191406, "rewards/accuracies": 0.875, "rewards/chosen": -0.09364356845617294, "rewards/margins": 0.2436809092760086, "rewards/rejected": -0.33732450008392334, "step": 5973 }, { "epoch": 3.6443495500991308, "grad_norm": 1.3761820793151855, "learning_rate": 2.176117575015309e-06, "log_odds_chosen": 0.49593618512153625, "log_odds_ratio": -0.6285162568092346, "logits/chosen": -0.7744457721710205, "logits/rejected": -0.6848729252815247, "logps/chosen": -0.7719876766204834, "logps/rejected": -1.0945899486541748, "loss": 1.0098, "nll_loss": 1.1213113069534302, "rewards/accuracies": 0.5, "rewards/chosen": -0.07719877362251282, "rewards/margins": 0.0322602242231369, "rewards/rejected": -0.10945899784564972, "step": 5974 }, { "epoch": 3.6449595851761476, "grad_norm": 1.573460340499878, "learning_rate": 2.1751377832210655e-06, "log_odds_chosen": 1.8256850242614746, "log_odds_ratio": -0.34019118547439575, "logits/chosen": -0.8770446181297302, "logits/rejected": -0.9672285914421082, "logps/chosen": -0.8342775702476501, "logps/rejected": -2.1469268798828125, "loss": 0.9774, "nll_loss": 1.1040314435958862, "rewards/accuracies": 0.875, "rewards/chosen": -0.08342775702476501, "rewards/margins": 0.13126493990421295, "rewards/rejected": -0.21469269692897797, "step": 5975 }, { "epoch": 3.6455696202531644, "grad_norm": 1.3787273168563843, "learning_rate": 2.1741579914268216e-06, "log_odds_chosen": 2.543076992034912, "log_odds_ratio": -0.4111401438713074, "logits/chosen": -0.5573257803916931, "logits/rejected": -0.7969706654548645, "logps/chosen": -0.6202799081802368, "logps/rejected": -2.6333346366882324, "loss": 0.9748, "nll_loss": 0.9522501230239868, "rewards/accuracies": 0.75, "rewards/chosen": -0.06202799826860428, "rewards/margins": 0.20130544900894165, "rewards/rejected": -0.26333343982696533, "step": 5976 }, { "epoch": 3.6461796553301813, "grad_norm": 1.7176539897918701, "learning_rate": 2.1731781996325778e-06, "log_odds_chosen": 2.5737311840057373, "log_odds_ratio": -0.26970091462135315, "logits/chosen": -0.9930565357208252, "logits/rejected": -0.8817538619041443, "logps/chosen": -0.9122952818870544, "logps/rejected": -2.886784076690674, "loss": 1.232, "nll_loss": 1.2704415321350098, "rewards/accuracies": 0.875, "rewards/chosen": -0.09122952818870544, "rewards/margins": 0.19744889438152313, "rewards/rejected": -0.2886784076690674, "step": 5977 }, { "epoch": 3.646789690407198, "grad_norm": 2.054776668548584, "learning_rate": 2.1721984078383343e-06, "log_odds_chosen": 2.8211381435394287, "log_odds_ratio": -0.247842937707901, "logits/chosen": -0.8049013614654541, "logits/rejected": -0.9950331449508667, "logps/chosen": -0.7225936651229858, "logps/rejected": -2.9752912521362305, "loss": 1.135, "nll_loss": 0.9929013252258301, "rewards/accuracies": 1.0, "rewards/chosen": -0.07225935906171799, "rewards/margins": 0.22526977956295013, "rewards/rejected": -0.2975291311740875, "step": 5978 }, { "epoch": 3.6473997254842154, "grad_norm": 1.8778047561645508, "learning_rate": 2.1712186160440904e-06, "log_odds_chosen": 0.8488067388534546, "log_odds_ratio": -0.6980756521224976, "logits/chosen": -0.9584277868270874, "logits/rejected": -0.9841456413269043, "logps/chosen": -0.9801790118217468, "logps/rejected": -1.7263177633285522, "loss": 1.2392, "nll_loss": 1.1885344982147217, "rewards/accuracies": 0.5, "rewards/chosen": -0.09801790863275528, "rewards/margins": 0.07461387664079666, "rewards/rejected": -0.17263178527355194, "step": 5979 }, { "epoch": 3.6480097605612323, "grad_norm": 1.5590276718139648, "learning_rate": 2.170238824249847e-06, "log_odds_chosen": 1.6151520013809204, "log_odds_ratio": -0.42038172483444214, "logits/chosen": -0.7780311107635498, "logits/rejected": -1.032228946685791, "logps/chosen": -0.9280364513397217, "logps/rejected": -2.1689066886901855, "loss": 1.1824, "nll_loss": 1.2233033180236816, "rewards/accuracies": 0.75, "rewards/chosen": -0.09280364215373993, "rewards/margins": 0.12408704310655594, "rewards/rejected": -0.21689069271087646, "step": 5980 }, { "epoch": 3.648619795638249, "grad_norm": 3.544616222381592, "learning_rate": 2.169259032455603e-06, "log_odds_chosen": 0.844917356967926, "log_odds_ratio": -0.6035838723182678, "logits/chosen": -0.8650351166725159, "logits/rejected": -0.7871191501617432, "logps/chosen": -0.8264830708503723, "logps/rejected": -1.4331169128417969, "loss": 1.0128, "nll_loss": 1.0750447511672974, "rewards/accuracies": 0.625, "rewards/chosen": -0.08264830708503723, "rewards/margins": 0.06066339090466499, "rewards/rejected": -0.14331169426441193, "step": 5981 }, { "epoch": 3.649229830715266, "grad_norm": 1.3603620529174805, "learning_rate": 2.1682792406613592e-06, "log_odds_chosen": 1.9148705005645752, "log_odds_ratio": -0.42327797412872314, "logits/chosen": -0.808752179145813, "logits/rejected": -0.9935060143470764, "logps/chosen": -0.7527408599853516, "logps/rejected": -2.2310242652893066, "loss": 0.908, "nll_loss": 0.7866119146347046, "rewards/accuracies": 0.75, "rewards/chosen": -0.07527408748865128, "rewards/margins": 0.1478283405303955, "rewards/rejected": -0.2231024205684662, "step": 5982 }, { "epoch": 3.6498398657922833, "grad_norm": 2.3953752517700195, "learning_rate": 2.1672994488671158e-06, "log_odds_chosen": 1.6422669887542725, "log_odds_ratio": -0.4922325611114502, "logits/chosen": -0.7905505895614624, "logits/rejected": -0.8134545683860779, "logps/chosen": -0.7682375311851501, "logps/rejected": -2.062771797180176, "loss": 1.016, "nll_loss": 0.8417770266532898, "rewards/accuracies": 0.75, "rewards/chosen": -0.07682375609874725, "rewards/margins": 0.12945343554019928, "rewards/rejected": -0.20627719163894653, "step": 5983 }, { "epoch": 3.6504499008693, "grad_norm": 1.3106651306152344, "learning_rate": 2.1663196570728723e-06, "log_odds_chosen": 2.4062914848327637, "log_odds_ratio": -0.2601417303085327, "logits/chosen": -0.6269200444221497, "logits/rejected": -0.799655020236969, "logps/chosen": -0.6465926170349121, "logps/rejected": -2.3234024047851562, "loss": 0.9417, "nll_loss": 0.7519581913948059, "rewards/accuracies": 0.875, "rewards/chosen": -0.06465926021337509, "rewards/margins": 0.1676810085773468, "rewards/rejected": -0.2323402762413025, "step": 5984 }, { "epoch": 3.651059935946317, "grad_norm": 1.392065167427063, "learning_rate": 2.165339865278628e-06, "log_odds_chosen": 2.341968059539795, "log_odds_ratio": -0.451214462518692, "logits/chosen": -0.8323795795440674, "logits/rejected": -1.0821969509124756, "logps/chosen": -0.6046930551528931, "logps/rejected": -2.4530951976776123, "loss": 1.0148, "nll_loss": 0.8874368071556091, "rewards/accuracies": 0.75, "rewards/chosen": -0.060469307005405426, "rewards/margins": 0.18484021723270416, "rewards/rejected": -0.245309516787529, "step": 5985 }, { "epoch": 3.651669971023334, "grad_norm": 1.5397535562515259, "learning_rate": 2.1643600734843846e-06, "log_odds_chosen": 0.26073193550109863, "log_odds_ratio": -0.6500188112258911, "logits/chosen": -1.0593091249465942, "logits/rejected": -0.9488124251365662, "logps/chosen": -1.2056390047073364, "logps/rejected": -1.4765541553497314, "loss": 1.094, "nll_loss": 1.3535630702972412, "rewards/accuracies": 0.625, "rewards/chosen": -0.12056389451026917, "rewards/margins": 0.02709152363240719, "rewards/rejected": -0.1476554274559021, "step": 5986 }, { "epoch": 3.6522800061003506, "grad_norm": 1.2763333320617676, "learning_rate": 2.1633802816901407e-06, "log_odds_chosen": 3.6219983100891113, "log_odds_ratio": -0.2805982232093811, "logits/chosen": -0.505856454372406, "logits/rejected": -0.9351279735565186, "logps/chosen": -0.7218539714813232, "logps/rejected": -3.7583413124084473, "loss": 0.9721, "nll_loss": 0.9355660676956177, "rewards/accuracies": 0.875, "rewards/chosen": -0.07218539714813232, "rewards/margins": 0.3036487400531769, "rewards/rejected": -0.3758341073989868, "step": 5987 }, { "epoch": 3.6528900411773675, "grad_norm": 1.4241628646850586, "learning_rate": 2.162400489895897e-06, "log_odds_chosen": 1.553707242012024, "log_odds_ratio": -0.40066543221473694, "logits/chosen": -0.6986722350120544, "logits/rejected": -0.8112144470214844, "logps/chosen": -0.6236523985862732, "logps/rejected": -1.7311499118804932, "loss": 1.0036, "nll_loss": 0.8680970072746277, "rewards/accuracies": 0.625, "rewards/chosen": -0.062365248799324036, "rewards/margins": 0.1107497364282608, "rewards/rejected": -0.17311497032642365, "step": 5988 }, { "epoch": 3.6535000762543848, "grad_norm": 1.0901708602905273, "learning_rate": 2.1614206981016534e-06, "log_odds_chosen": 2.382561445236206, "log_odds_ratio": -0.45642223954200745, "logits/chosen": -0.6949635744094849, "logits/rejected": -0.8775399923324585, "logps/chosen": -0.7805822491645813, "logps/rejected": -2.717378854751587, "loss": 1.1181, "nll_loss": 0.9340599775314331, "rewards/accuracies": 0.75, "rewards/chosen": -0.07805823534727097, "rewards/margins": 0.19367966055870056, "rewards/rejected": -0.27173787355422974, "step": 5989 }, { "epoch": 3.6541101113314016, "grad_norm": 2.0326993465423584, "learning_rate": 2.1604409063074095e-06, "log_odds_chosen": 1.348850131034851, "log_odds_ratio": -0.40290936827659607, "logits/chosen": -0.8655079007148743, "logits/rejected": -0.9539774656295776, "logps/chosen": -0.7156617641448975, "logps/rejected": -1.7048497200012207, "loss": 1.0344, "nll_loss": 0.9240898489952087, "rewards/accuracies": 0.75, "rewards/chosen": -0.07156617939472198, "rewards/margins": 0.09891879558563232, "rewards/rejected": -0.1704849898815155, "step": 5990 }, { "epoch": 3.6547201464084185, "grad_norm": 1.253037452697754, "learning_rate": 2.159461114513166e-06, "log_odds_chosen": 0.7247886061668396, "log_odds_ratio": -0.52683424949646, "logits/chosen": -0.8740799427032471, "logits/rejected": -0.8543400168418884, "logps/chosen": -0.8641772866249084, "logps/rejected": -1.4009063243865967, "loss": 1.0788, "nll_loss": 0.9854106307029724, "rewards/accuracies": 0.625, "rewards/chosen": -0.08641773462295532, "rewards/margins": 0.053672906011343, "rewards/rejected": -0.14009062945842743, "step": 5991 }, { "epoch": 3.6553301814854353, "grad_norm": 1.7234364748001099, "learning_rate": 2.158481322718922e-06, "log_odds_chosen": 2.5839786529541016, "log_odds_ratio": -0.2860203981399536, "logits/chosen": -0.6843961477279663, "logits/rejected": -1.0155023336410522, "logps/chosen": -0.8398975729942322, "logps/rejected": -2.810021162033081, "loss": 0.9896, "nll_loss": 0.9843981266021729, "rewards/accuracies": 0.75, "rewards/chosen": -0.08398976176977158, "rewards/margins": 0.19701236486434937, "rewards/rejected": -0.28100213408470154, "step": 5992 }, { "epoch": 3.6559402165624526, "grad_norm": 7.781706809997559, "learning_rate": 2.1575015309246783e-06, "log_odds_chosen": 1.980043649673462, "log_odds_ratio": -0.5014432668685913, "logits/chosen": -0.8033628463745117, "logits/rejected": -0.9563703536987305, "logps/chosen": -0.9429664611816406, "logps/rejected": -2.6406679153442383, "loss": 1.1886, "nll_loss": 1.0937823057174683, "rewards/accuracies": 0.75, "rewards/chosen": -0.0942966490983963, "rewards/margins": 0.16977015137672424, "rewards/rejected": -0.26406681537628174, "step": 5993 }, { "epoch": 3.6565502516394695, "grad_norm": 4.134922981262207, "learning_rate": 2.156521739130435e-06, "log_odds_chosen": 2.5823235511779785, "log_odds_ratio": -0.5169974565505981, "logits/chosen": -0.7890008091926575, "logits/rejected": -1.0281000137329102, "logps/chosen": -0.8905848860740662, "logps/rejected": -3.120913505554199, "loss": 1.0917, "nll_loss": 1.0605989694595337, "rewards/accuracies": 0.625, "rewards/chosen": -0.08905848860740662, "rewards/margins": 0.22303283214569092, "rewards/rejected": -0.3120913505554199, "step": 5994 }, { "epoch": 3.6571602867164863, "grad_norm": 10.409021377563477, "learning_rate": 2.155541947336191e-06, "log_odds_chosen": 1.3441853523254395, "log_odds_ratio": -0.50969398021698, "logits/chosen": -0.911560595035553, "logits/rejected": -0.9077495336532593, "logps/chosen": -1.2074946165084839, "logps/rejected": -2.3303639888763428, "loss": 1.1895, "nll_loss": 1.2390292882919312, "rewards/accuracies": 0.625, "rewards/chosen": -0.12074947357177734, "rewards/margins": 0.11228694021701813, "rewards/rejected": -0.23303639888763428, "step": 5995 }, { "epoch": 3.657770321793503, "grad_norm": 1.4037727117538452, "learning_rate": 2.154562155541947e-06, "log_odds_chosen": 0.5481119155883789, "log_odds_ratio": -0.5697624087333679, "logits/chosen": -0.9514231085777283, "logits/rejected": -0.8998427987098694, "logps/chosen": -0.8484607338905334, "logps/rejected": -1.0906293392181396, "loss": 0.9931, "nll_loss": 1.1022212505340576, "rewards/accuracies": 0.5, "rewards/chosen": -0.08484607189893723, "rewards/margins": 0.02421686425805092, "rewards/rejected": -0.10906293988227844, "step": 5996 }, { "epoch": 3.65838035687052, "grad_norm": 1.2632153034210205, "learning_rate": 2.1535823637477036e-06, "log_odds_chosen": 1.4461288452148438, "log_odds_ratio": -0.3016338646411896, "logits/chosen": -0.8243581056594849, "logits/rejected": -0.7367815971374512, "logps/chosen": -0.8282630443572998, "logps/rejected": -1.9326105117797852, "loss": 0.9957, "nll_loss": 0.968905508518219, "rewards/accuracies": 1.0, "rewards/chosen": -0.08282630890607834, "rewards/margins": 0.11043474823236465, "rewards/rejected": -0.193261057138443, "step": 5997 }, { "epoch": 3.658990391947537, "grad_norm": 1.5614415407180786, "learning_rate": 2.15260257195346e-06, "log_odds_chosen": 2.886610984802246, "log_odds_ratio": -0.29332512617111206, "logits/chosen": -0.6431079506874084, "logits/rejected": -0.8538857102394104, "logps/chosen": -0.5503594875335693, "logps/rejected": -2.6835806369781494, "loss": 1.1786, "nll_loss": 0.7265366315841675, "rewards/accuracies": 0.75, "rewards/chosen": -0.05503595247864723, "rewards/margins": 0.21332211792469025, "rewards/rejected": -0.2683580815792084, "step": 5998 }, { "epoch": 3.6596004270245537, "grad_norm": 2.389031171798706, "learning_rate": 2.151622780159216e-06, "log_odds_chosen": 1.3950440883636475, "log_odds_ratio": -0.3312617838382721, "logits/chosen": -0.3763028383255005, "logits/rejected": -0.3215484321117401, "logps/chosen": -0.6647371053695679, "logps/rejected": -1.5222275257110596, "loss": 1.0575, "nll_loss": 0.8688569664955139, "rewards/accuracies": 0.875, "rewards/chosen": -0.06647371500730515, "rewards/margins": 0.08574902266263962, "rewards/rejected": -0.15222275257110596, "step": 5999 }, { "epoch": 3.660210462101571, "grad_norm": 1.3657346963882446, "learning_rate": 2.1506429883649724e-06, "log_odds_chosen": 3.0559215545654297, "log_odds_ratio": -0.2293820083141327, "logits/chosen": -0.6886243224143982, "logits/rejected": -0.9925625324249268, "logps/chosen": -0.6871463060379028, "logps/rejected": -3.091294765472412, "loss": 1.1903, "nll_loss": 0.9416735172271729, "rewards/accuracies": 1.0, "rewards/chosen": -0.06871463358402252, "rewards/margins": 0.2404148429632187, "rewards/rejected": -0.3091294765472412, "step": 6000 }, { "epoch": 3.660820497178588, "grad_norm": 10.883676528930664, "learning_rate": 2.149663196570729e-06, "log_odds_chosen": 2.413496732711792, "log_odds_ratio": -0.4535144865512848, "logits/chosen": -0.5555787682533264, "logits/rejected": -0.7347145676612854, "logps/chosen": -0.5435795783996582, "logps/rejected": -2.545816421508789, "loss": 0.791, "nll_loss": 0.7440221905708313, "rewards/accuracies": 0.625, "rewards/chosen": -0.05435795336961746, "rewards/margins": 0.2002236694097519, "rewards/rejected": -0.25458163022994995, "step": 6001 }, { "epoch": 3.6614305322556047, "grad_norm": 1.7368123531341553, "learning_rate": 2.1486834047764846e-06, "log_odds_chosen": 1.336452841758728, "log_odds_ratio": -0.5357720851898193, "logits/chosen": -0.9476702213287354, "logits/rejected": -1.103507161140442, "logps/chosen": -0.9507187604904175, "logps/rejected": -2.112285852432251, "loss": 1.0234, "nll_loss": 1.2175542116165161, "rewards/accuracies": 0.75, "rewards/chosen": -0.09507188200950623, "rewards/margins": 0.11615671217441559, "rewards/rejected": -0.211228609085083, "step": 6002 }, { "epoch": 3.6620405673326215, "grad_norm": 6.8705291748046875, "learning_rate": 2.147703612982241e-06, "log_odds_chosen": 0.8979217410087585, "log_odds_ratio": -0.7834893465042114, "logits/chosen": -0.7474900484085083, "logits/rejected": -0.7409360408782959, "logps/chosen": -0.8209109306335449, "logps/rejected": -1.6744630336761475, "loss": 1.0149, "nll_loss": 0.9387091994285583, "rewards/accuracies": 0.5, "rewards/chosen": -0.08209110051393509, "rewards/margins": 0.08535519242286682, "rewards/rejected": -0.1674462854862213, "step": 6003 }, { "epoch": 3.662650602409639, "grad_norm": 1.1280303001403809, "learning_rate": 2.1467238211879977e-06, "log_odds_chosen": 1.831131935119629, "log_odds_ratio": -0.34948331117630005, "logits/chosen": -0.9115803241729736, "logits/rejected": -0.9531337022781372, "logps/chosen": -0.6361292600631714, "logps/rejected": -1.8851925134658813, "loss": 0.9416, "nll_loss": 0.9510404467582703, "rewards/accuracies": 0.875, "rewards/chosen": -0.0636129230260849, "rewards/margins": 0.12490632385015488, "rewards/rejected": -0.18851925432682037, "step": 6004 }, { "epoch": 3.6632606374866556, "grad_norm": 1.3323323726654053, "learning_rate": 2.145744029393754e-06, "log_odds_chosen": 2.0303797721862793, "log_odds_ratio": -0.3821190297603607, "logits/chosen": -0.5799437165260315, "logits/rejected": -0.8225628137588501, "logps/chosen": -0.7596039772033691, "logps/rejected": -2.335657835006714, "loss": 0.9984, "nll_loss": 0.8227744698524475, "rewards/accuracies": 0.75, "rewards/chosen": -0.07596039772033691, "rewards/margins": 0.15760537981987, "rewards/rejected": -0.2335657775402069, "step": 6005 }, { "epoch": 3.6638706725636725, "grad_norm": 1.4666904211044312, "learning_rate": 2.14476423759951e-06, "log_odds_chosen": 0.9429321885108948, "log_odds_ratio": -0.5036234855651855, "logits/chosen": -0.9572427272796631, "logits/rejected": -0.8346580862998962, "logps/chosen": -0.9413196444511414, "logps/rejected": -1.6941723823547363, "loss": 0.9793, "nll_loss": 1.1522812843322754, "rewards/accuracies": 0.625, "rewards/chosen": -0.0941319689154625, "rewards/margins": 0.07528527081012726, "rewards/rejected": -0.16941724717617035, "step": 6006 }, { "epoch": 3.6644807076406893, "grad_norm": 3.604672908782959, "learning_rate": 2.143784445805266e-06, "log_odds_chosen": 2.3810536861419678, "log_odds_ratio": -0.3095971345901489, "logits/chosen": -0.6269384622573853, "logits/rejected": -0.8546875715255737, "logps/chosen": -0.6037535071372986, "logps/rejected": -2.2830567359924316, "loss": 1.1201, "nll_loss": 1.0975961685180664, "rewards/accuracies": 0.875, "rewards/chosen": -0.06037534773349762, "rewards/margins": 0.16793033480644226, "rewards/rejected": -0.2283056676387787, "step": 6007 }, { "epoch": 3.665090742717706, "grad_norm": 1.503440022468567, "learning_rate": 2.1428046540110226e-06, "log_odds_chosen": 0.9792076945304871, "log_odds_ratio": -0.5236867666244507, "logits/chosen": -0.8218895196914673, "logits/rejected": -0.8063501119613647, "logps/chosen": -0.6473016142845154, "logps/rejected": -1.345473051071167, "loss": 1.0953, "nll_loss": 1.052150011062622, "rewards/accuracies": 0.625, "rewards/chosen": -0.06473015993833542, "rewards/margins": 0.06981714069843292, "rewards/rejected": -0.13454729318618774, "step": 6008 }, { "epoch": 3.665700777794723, "grad_norm": 1.4314197301864624, "learning_rate": 2.1418248622167788e-06, "log_odds_chosen": 2.6034178733825684, "log_odds_ratio": -0.4212510287761688, "logits/chosen": -0.8678504824638367, "logits/rejected": -0.9766127467155457, "logps/chosen": -0.7226781249046326, "logps/rejected": -2.9062085151672363, "loss": 1.0308, "nll_loss": 0.9864460229873657, "rewards/accuracies": 0.75, "rewards/chosen": -0.0722678154706955, "rewards/margins": 0.2183530181646347, "rewards/rejected": -0.2906208336353302, "step": 6009 }, { "epoch": 3.66631081287174, "grad_norm": 1.3069409132003784, "learning_rate": 2.140845070422535e-06, "log_odds_chosen": 2.835144519805908, "log_odds_ratio": -0.3335408866405487, "logits/chosen": -0.8597056269645691, "logits/rejected": -0.963864803314209, "logps/chosen": -0.6735348105430603, "logps/rejected": -3.071808338165283, "loss": 0.895, "nll_loss": 0.8065398931503296, "rewards/accuracies": 0.875, "rewards/chosen": -0.06735347956418991, "rewards/margins": 0.23982734978199005, "rewards/rejected": -0.30718082189559937, "step": 6010 }, { "epoch": 3.666920847948757, "grad_norm": 1.1424522399902344, "learning_rate": 2.1398652786282914e-06, "log_odds_chosen": 1.665666103363037, "log_odds_ratio": -0.5378689169883728, "logits/chosen": -1.067138671875, "logits/rejected": -1.0641916990280151, "logps/chosen": -1.0740057229995728, "logps/rejected": -2.5346479415893555, "loss": 1.0847, "nll_loss": 1.2774577140808105, "rewards/accuracies": 0.75, "rewards/chosen": -0.1074005663394928, "rewards/margins": 0.14606422185897827, "rewards/rejected": -0.25346478819847107, "step": 6011 }, { "epoch": 3.667530883025774, "grad_norm": 1.737338900566101, "learning_rate": 2.138885486834048e-06, "log_odds_chosen": 2.30178165435791, "log_odds_ratio": -0.4359581768512726, "logits/chosen": -0.8533670902252197, "logits/rejected": -1.0647990703582764, "logps/chosen": -0.8970203995704651, "logps/rejected": -2.8466227054595947, "loss": 1.0966, "nll_loss": 1.0441985130310059, "rewards/accuracies": 0.75, "rewards/chosen": -0.08970203995704651, "rewards/margins": 0.19496023654937744, "rewards/rejected": -0.28466227650642395, "step": 6012 }, { "epoch": 3.668140918102791, "grad_norm": 1.065287709236145, "learning_rate": 2.1379056950398037e-06, "log_odds_chosen": 1.7570641040802002, "log_odds_ratio": -0.35995179414749146, "logits/chosen": -0.9268059730529785, "logits/rejected": -0.9106974005699158, "logps/chosen": -0.6997945308685303, "logps/rejected": -2.0951972007751465, "loss": 0.8227, "nll_loss": 0.7621148228645325, "rewards/accuracies": 0.875, "rewards/chosen": -0.06997945159673691, "rewards/margins": 0.13954026997089386, "rewards/rejected": -0.20951971411705017, "step": 6013 }, { "epoch": 3.6687509531798077, "grad_norm": 1.3719826936721802, "learning_rate": 2.1369259032455602e-06, "log_odds_chosen": 0.8784693479537964, "log_odds_ratio": -0.5670828819274902, "logits/chosen": -0.6312182545661926, "logits/rejected": -0.7879050374031067, "logps/chosen": -0.7720169425010681, "logps/rejected": -1.420334815979004, "loss": 1.0035, "nll_loss": 0.8714650869369507, "rewards/accuracies": 0.5, "rewards/chosen": -0.07720169425010681, "rewards/margins": 0.06483178585767746, "rewards/rejected": -0.14203348755836487, "step": 6014 }, { "epoch": 3.669360988256825, "grad_norm": 2.1882975101470947, "learning_rate": 2.1359461114513168e-06, "log_odds_chosen": 2.7335448265075684, "log_odds_ratio": -0.27867230772972107, "logits/chosen": -1.006561040878296, "logits/rejected": -1.0558185577392578, "logps/chosen": -0.7446317672729492, "logps/rejected": -3.0407495498657227, "loss": 1.1648, "nll_loss": 0.999768853187561, "rewards/accuracies": 0.75, "rewards/chosen": -0.07446318119764328, "rewards/margins": 0.22961179912090302, "rewards/rejected": -0.3040750026702881, "step": 6015 }, { "epoch": 3.669971023333842, "grad_norm": 2.7309415340423584, "learning_rate": 2.1349663196570725e-06, "log_odds_chosen": 2.5356035232543945, "log_odds_ratio": -0.2635718882083893, "logits/chosen": -0.8442981243133545, "logits/rejected": -0.9673689603805542, "logps/chosen": -0.5956954956054688, "logps/rejected": -2.486945867538452, "loss": 0.9029, "nll_loss": 0.9949751496315002, "rewards/accuracies": 1.0, "rewards/chosen": -0.05956954509019852, "rewards/margins": 0.18912504613399506, "rewards/rejected": -0.24869458377361298, "step": 6016 }, { "epoch": 3.6705810584108587, "grad_norm": 1.5218111276626587, "learning_rate": 2.133986527862829e-06, "log_odds_chosen": 1.1498618125915527, "log_odds_ratio": -0.5216402411460876, "logits/chosen": -0.6982922554016113, "logits/rejected": -0.7265956401824951, "logps/chosen": -0.8022669553756714, "logps/rejected": -1.3897078037261963, "loss": 0.9585, "nll_loss": 0.8430600166320801, "rewards/accuracies": 0.625, "rewards/chosen": -0.08022669702768326, "rewards/margins": 0.05874408781528473, "rewards/rejected": -0.1389707773923874, "step": 6017 }, { "epoch": 3.6711910934878755, "grad_norm": 1.305548071861267, "learning_rate": 2.1330067360685856e-06, "log_odds_chosen": 2.060898542404175, "log_odds_ratio": -0.351127028465271, "logits/chosen": -0.6669109463691711, "logits/rejected": -0.8105875253677368, "logps/chosen": -0.6758126020431519, "logps/rejected": -1.9194183349609375, "loss": 0.8227, "nll_loss": 0.8656444549560547, "rewards/accuracies": 0.875, "rewards/chosen": -0.06758126616477966, "rewards/margins": 0.1243605762720108, "rewards/rejected": -0.19194182753562927, "step": 6018 }, { "epoch": 3.6718011285648924, "grad_norm": 2.7040700912475586, "learning_rate": 2.1320269442743417e-06, "log_odds_chosen": 1.3377238512039185, "log_odds_ratio": -0.45174306631088257, "logits/chosen": -0.8458137512207031, "logits/rejected": -0.9729960560798645, "logps/chosen": -0.8374224305152893, "logps/rejected": -1.7694435119628906, "loss": 1.1633, "nll_loss": 1.14507257938385, "rewards/accuracies": 0.75, "rewards/chosen": -0.08374225348234177, "rewards/margins": 0.09320209920406342, "rewards/rejected": -0.17694434523582458, "step": 6019 }, { "epoch": 3.6724111636419092, "grad_norm": 7.294165134429932, "learning_rate": 2.131047152480098e-06, "log_odds_chosen": 2.1826419830322266, "log_odds_ratio": -0.300132155418396, "logits/chosen": -1.1082298755645752, "logits/rejected": -1.07365882396698, "logps/chosen": -0.9125516414642334, "logps/rejected": -2.6839582920074463, "loss": 1.1647, "nll_loss": 1.2416305541992188, "rewards/accuracies": 0.875, "rewards/chosen": -0.09125517308712006, "rewards/margins": 0.17714068293571472, "rewards/rejected": -0.2683958411216736, "step": 6020 }, { "epoch": 3.673021198718926, "grad_norm": 5.119103908538818, "learning_rate": 2.1300673606858544e-06, "log_odds_chosen": 1.8741258382797241, "log_odds_ratio": -0.42009156942367554, "logits/chosen": -0.5803859233856201, "logits/rejected": -0.9205948114395142, "logps/chosen": -0.802990198135376, "logps/rejected": -2.340904712677002, "loss": 1.1676, "nll_loss": 0.9155291318893433, "rewards/accuracies": 0.75, "rewards/chosen": -0.080299012362957, "rewards/margins": 0.15379145741462708, "rewards/rejected": -0.23409047722816467, "step": 6021 }, { "epoch": 3.6736312337959434, "grad_norm": 1.6225734949111938, "learning_rate": 2.1290875688916105e-06, "log_odds_chosen": 1.7124223709106445, "log_odds_ratio": -0.44990667700767517, "logits/chosen": -0.755194902420044, "logits/rejected": -0.9364391565322876, "logps/chosen": -0.7778376936912537, "logps/rejected": -2.1106886863708496, "loss": 1.0675, "nll_loss": 1.020799160003662, "rewards/accuracies": 0.625, "rewards/chosen": -0.07778377085924149, "rewards/margins": 0.13328510522842407, "rewards/rejected": -0.21106886863708496, "step": 6022 }, { "epoch": 3.67424126887296, "grad_norm": 1.2710084915161133, "learning_rate": 2.1281077770973666e-06, "log_odds_chosen": 3.1179490089416504, "log_odds_ratio": -0.3919428288936615, "logits/chosen": -0.7661925554275513, "logits/rejected": -0.9685277938842773, "logps/chosen": -0.8428018689155579, "logps/rejected": -3.715010404586792, "loss": 1.0146, "nll_loss": 1.0811161994934082, "rewards/accuracies": 0.75, "rewards/chosen": -0.08428018540143967, "rewards/margins": 0.28722086548805237, "rewards/rejected": -0.37150102853775024, "step": 6023 }, { "epoch": 3.674851303949977, "grad_norm": 0.9977359771728516, "learning_rate": 2.127127985303123e-06, "log_odds_chosen": 1.849824070930481, "log_odds_ratio": -0.4016718864440918, "logits/chosen": -0.7458347678184509, "logits/rejected": -0.9335637092590332, "logps/chosen": -0.7501974105834961, "logps/rejected": -2.1763031482696533, "loss": 1.0867, "nll_loss": 0.9862813949584961, "rewards/accuracies": 0.875, "rewards/chosen": -0.07501974701881409, "rewards/margins": 0.142610564827919, "rewards/rejected": -0.2176303118467331, "step": 6024 }, { "epoch": 3.675461339026994, "grad_norm": 4.003711223602295, "learning_rate": 2.1261481935088793e-06, "log_odds_chosen": 3.062337875366211, "log_odds_ratio": -0.4493691921234131, "logits/chosen": -0.8475038409233093, "logits/rejected": -1.0206153392791748, "logps/chosen": -0.6009622812271118, "logps/rejected": -3.0293776988983154, "loss": 0.9309, "nll_loss": 0.8292955160140991, "rewards/accuracies": 0.75, "rewards/chosen": -0.06009622663259506, "rewards/margins": 0.24284152686595917, "rewards/rejected": -0.302937775850296, "step": 6025 }, { "epoch": 3.676071374104011, "grad_norm": 13.210744857788086, "learning_rate": 2.125168401714636e-06, "log_odds_chosen": 3.4991843700408936, "log_odds_ratio": -0.2554587125778198, "logits/chosen": -0.613676905632019, "logits/rejected": -0.7883687019348145, "logps/chosen": -0.6260641813278198, "logps/rejected": -3.3293850421905518, "loss": 1.0142, "nll_loss": 0.843133807182312, "rewards/accuracies": 0.875, "rewards/chosen": -0.06260642409324646, "rewards/margins": 0.27033206820487976, "rewards/rejected": -0.3329384922981262, "step": 6026 }, { "epoch": 3.676681409181028, "grad_norm": 1.425186276435852, "learning_rate": 2.1241886099203915e-06, "log_odds_chosen": 2.369354724884033, "log_odds_ratio": -0.39623481035232544, "logits/chosen": -0.81130051612854, "logits/rejected": -1.0871531963348389, "logps/chosen": -0.7704145312309265, "logps/rejected": -2.5488226413726807, "loss": 0.8857, "nll_loss": 0.9151061773300171, "rewards/accuracies": 0.625, "rewards/chosen": -0.07704144716262817, "rewards/margins": 0.17784082889556885, "rewards/rejected": -0.254882276058197, "step": 6027 }, { "epoch": 3.677291444258045, "grad_norm": 2.6585042476654053, "learning_rate": 2.123208818126148e-06, "log_odds_chosen": 4.109650611877441, "log_odds_ratio": -0.2672874927520752, "logits/chosen": -0.7864634394645691, "logits/rejected": -1.191269040107727, "logps/chosen": -0.6449347138404846, "logps/rejected": -4.154219150543213, "loss": 1.1227, "nll_loss": 1.0539493560791016, "rewards/accuracies": 0.875, "rewards/chosen": -0.06449346989393234, "rewards/margins": 0.3509284555912018, "rewards/rejected": -0.4154219627380371, "step": 6028 }, { "epoch": 3.6779014793350617, "grad_norm": 1.756650447845459, "learning_rate": 2.1222290263319046e-06, "log_odds_chosen": 3.532175302505493, "log_odds_ratio": -0.25467783212661743, "logits/chosen": -0.8228516578674316, "logits/rejected": -0.872287929058075, "logps/chosen": -0.7259666919708252, "logps/rejected": -3.809067726135254, "loss": 1.0567, "nll_loss": 0.9012244939804077, "rewards/accuracies": 0.875, "rewards/chosen": -0.07259666919708252, "rewards/margins": 0.3083100914955139, "rewards/rejected": -0.38090676069259644, "step": 6029 }, { "epoch": 3.6785115144120786, "grad_norm": 2.5222957134246826, "learning_rate": 2.1212492345376603e-06, "log_odds_chosen": 2.021111488342285, "log_odds_ratio": -0.3267596960067749, "logits/chosen": -0.8864867687225342, "logits/rejected": -0.9676016569137573, "logps/chosen": -0.8678693771362305, "logps/rejected": -2.5681872367858887, "loss": 1.0456, "nll_loss": 1.0064998865127563, "rewards/accuracies": 0.875, "rewards/chosen": -0.08678694069385529, "rewards/margins": 0.17003178596496582, "rewards/rejected": -0.2568187415599823, "step": 6030 }, { "epoch": 3.6791215494890954, "grad_norm": 3.204376220703125, "learning_rate": 2.120269442743417e-06, "log_odds_chosen": 3.742393970489502, "log_odds_ratio": -0.40823549032211304, "logits/chosen": -0.7232648730278015, "logits/rejected": -0.7390127182006836, "logps/chosen": -0.7372620701789856, "logps/rejected": -4.0613603591918945, "loss": 1.1375, "nll_loss": 0.8305901288986206, "rewards/accuracies": 0.875, "rewards/chosen": -0.07372619956731796, "rewards/margins": 0.3324097990989685, "rewards/rejected": -0.40613603591918945, "step": 6031 }, { "epoch": 3.6797315845661127, "grad_norm": 1.6364809274673462, "learning_rate": 2.1192896509491734e-06, "log_odds_chosen": 2.4079344272613525, "log_odds_ratio": -0.3626273274421692, "logits/chosen": -0.9397090077400208, "logits/rejected": -0.9872258305549622, "logps/chosen": -0.7611148357391357, "logps/rejected": -2.6522507667541504, "loss": 1.0412, "nll_loss": 0.905902624130249, "rewards/accuracies": 0.75, "rewards/chosen": -0.07611148804426193, "rewards/margins": 0.1891135722398758, "rewards/rejected": -0.2652250826358795, "step": 6032 }, { "epoch": 3.6803416196431296, "grad_norm": 1.2725598812103271, "learning_rate": 2.1183098591549295e-06, "log_odds_chosen": 2.5456535816192627, "log_odds_ratio": -0.2537316083908081, "logits/chosen": -0.805274248123169, "logits/rejected": -1.0034278631210327, "logps/chosen": -0.7660316824913025, "logps/rejected": -2.7140214443206787, "loss": 1.0318, "nll_loss": 0.8987764716148376, "rewards/accuracies": 0.875, "rewards/chosen": -0.07660317420959473, "rewards/margins": 0.19479899108409882, "rewards/rejected": -0.27140215039253235, "step": 6033 }, { "epoch": 3.6809516547201464, "grad_norm": 1.9316133260726929, "learning_rate": 2.1173300673606856e-06, "log_odds_chosen": 1.3470807075500488, "log_odds_ratio": -0.5358835458755493, "logits/chosen": -0.9203652739524841, "logits/rejected": -0.9330968856811523, "logps/chosen": -0.9137217998504639, "logps/rejected": -2.1291069984436035, "loss": 1.0859, "nll_loss": 1.0799086093902588, "rewards/accuracies": 0.75, "rewards/chosen": -0.09137217700481415, "rewards/margins": 0.12153850495815277, "rewards/rejected": -0.21291068196296692, "step": 6034 }, { "epoch": 3.6815616897971633, "grad_norm": 2.281914472579956, "learning_rate": 2.116350275566442e-06, "log_odds_chosen": 2.1701571941375732, "log_odds_ratio": -0.3820553421974182, "logits/chosen": -0.9879802465438843, "logits/rejected": -1.0460987091064453, "logps/chosen": -0.740213930606842, "logps/rejected": -2.4225594997406006, "loss": 0.9472, "nll_loss": 1.0819369554519653, "rewards/accuracies": 0.75, "rewards/chosen": -0.07402139902114868, "rewards/margins": 0.16823452711105347, "rewards/rejected": -0.24225592613220215, "step": 6035 }, { "epoch": 3.6821717248741805, "grad_norm": 1.4870367050170898, "learning_rate": 2.1153704837721983e-06, "log_odds_chosen": 1.7383601665496826, "log_odds_ratio": -0.43786466121673584, "logits/chosen": -1.0091489553451538, "logits/rejected": -0.9662221670150757, "logps/chosen": -0.8303039073944092, "logps/rejected": -2.308932304382324, "loss": 1.1226, "nll_loss": 1.0869430303573608, "rewards/accuracies": 0.75, "rewards/chosen": -0.08303039520978928, "rewards/margins": 0.14786283671855927, "rewards/rejected": -0.23089322447776794, "step": 6036 }, { "epoch": 3.6827817599511974, "grad_norm": 2.096322774887085, "learning_rate": 2.1143906919779544e-06, "log_odds_chosen": 1.133359670639038, "log_odds_ratio": -0.5539947748184204, "logits/chosen": -0.5231307744979858, "logits/rejected": -0.8342616558074951, "logps/chosen": -0.9748727083206177, "logps/rejected": -1.8349518775939941, "loss": 1.1132, "nll_loss": 0.9846067428588867, "rewards/accuracies": 0.75, "rewards/chosen": -0.09748727083206177, "rewards/margins": 0.08600792288780212, "rewards/rejected": -0.1834951788187027, "step": 6037 }, { "epoch": 3.6833917950282142, "grad_norm": 1.5847220420837402, "learning_rate": 2.113410900183711e-06, "log_odds_chosen": 0.7738357782363892, "log_odds_ratio": -0.5838080644607544, "logits/chosen": -1.0411276817321777, "logits/rejected": -0.95525062084198, "logps/chosen": -1.0301951169967651, "logps/rejected": -1.7233052253723145, "loss": 1.0092, "nll_loss": 1.1630439758300781, "rewards/accuracies": 0.5, "rewards/chosen": -0.10301952064037323, "rewards/margins": 0.06931100785732269, "rewards/rejected": -0.17233052849769592, "step": 6038 }, { "epoch": 3.684001830105231, "grad_norm": 1.4338160753250122, "learning_rate": 2.112431108389467e-06, "log_odds_chosen": 3.352889060974121, "log_odds_ratio": -0.29707518219947815, "logits/chosen": -1.0018314123153687, "logits/rejected": -1.1247544288635254, "logps/chosen": -0.6701381802558899, "logps/rejected": -3.4106526374816895, "loss": 1.0773, "nll_loss": 1.0623884201049805, "rewards/accuracies": 0.875, "rewards/chosen": -0.06701382249593735, "rewards/margins": 0.2740514874458313, "rewards/rejected": -0.34106528759002686, "step": 6039 }, { "epoch": 3.684611865182248, "grad_norm": 1.8782434463500977, "learning_rate": 2.1114513165952236e-06, "log_odds_chosen": 1.1701023578643799, "log_odds_ratio": -0.44354379177093506, "logits/chosen": -0.8544400930404663, "logits/rejected": -0.870124101638794, "logps/chosen": -0.8025793433189392, "logps/rejected": -1.6490966081619263, "loss": 0.9571, "nll_loss": 1.178611159324646, "rewards/accuracies": 0.875, "rewards/chosen": -0.08025793731212616, "rewards/margins": 0.08465172350406647, "rewards/rejected": -0.16490967571735382, "step": 6040 }, { "epoch": 3.6852219002592648, "grad_norm": 2.114461660385132, "learning_rate": 2.1104715248009798e-06, "log_odds_chosen": 0.5153101682662964, "log_odds_ratio": -0.6215306520462036, "logits/chosen": -0.9405036568641663, "logits/rejected": -1.0320171117782593, "logps/chosen": -0.9021053314208984, "logps/rejected": -1.25384521484375, "loss": 1.0556, "nll_loss": 0.9036437273025513, "rewards/accuracies": 0.5, "rewards/chosen": -0.09021052718162537, "rewards/margins": 0.03517399728298187, "rewards/rejected": -0.12538453936576843, "step": 6041 }, { "epoch": 3.6858319353362816, "grad_norm": 1.422105073928833, "learning_rate": 2.109491733006736e-06, "log_odds_chosen": 1.0423592329025269, "log_odds_ratio": -0.4766235947608948, "logits/chosen": -1.022923231124878, "logits/rejected": -1.034813642501831, "logps/chosen": -1.0664114952087402, "logps/rejected": -1.9133563041687012, "loss": 1.1709, "nll_loss": 1.1525665521621704, "rewards/accuracies": 0.75, "rewards/chosen": -0.10664115846157074, "rewards/margins": 0.08469447493553162, "rewards/rejected": -0.19133564829826355, "step": 6042 }, { "epoch": 3.686441970413299, "grad_norm": 1.2258504629135132, "learning_rate": 2.1085119412124924e-06, "log_odds_chosen": 2.189858913421631, "log_odds_ratio": -0.2791677713394165, "logits/chosen": -0.8116005063056946, "logits/rejected": -0.9940690994262695, "logps/chosen": -0.6174836754798889, "logps/rejected": -2.0775578022003174, "loss": 1.0306, "nll_loss": 1.1095854043960571, "rewards/accuracies": 0.875, "rewards/chosen": -0.06174837052822113, "rewards/margins": 0.14600740373134613, "rewards/rejected": -0.20775577425956726, "step": 6043 }, { "epoch": 3.6870520054903158, "grad_norm": 3.7437262535095215, "learning_rate": 2.107532149418248e-06, "log_odds_chosen": 1.3450132608413696, "log_odds_ratio": -0.46945226192474365, "logits/chosen": -1.052536129951477, "logits/rejected": -0.9543692469596863, "logps/chosen": -0.8809241056442261, "logps/rejected": -2.012622117996216, "loss": 1.0885, "nll_loss": 0.9512753486633301, "rewards/accuracies": 0.625, "rewards/chosen": -0.08809240162372589, "rewards/margins": 0.113169826567173, "rewards/rejected": -0.20126225054264069, "step": 6044 }, { "epoch": 3.6876620405673326, "grad_norm": 1.8167951107025146, "learning_rate": 2.1065523576240047e-06, "log_odds_chosen": 2.1850149631500244, "log_odds_ratio": -0.5284217596054077, "logits/chosen": -0.9174466133117676, "logits/rejected": -1.1095737218856812, "logps/chosen": -0.6967319846153259, "logps/rejected": -2.091012954711914, "loss": 1.0567, "nll_loss": 1.0475678443908691, "rewards/accuracies": 0.5, "rewards/chosen": -0.06967320293188095, "rewards/margins": 0.13942810893058777, "rewards/rejected": -0.20910128951072693, "step": 6045 }, { "epoch": 3.6882720756443494, "grad_norm": 8.610304832458496, "learning_rate": 2.1055725658297612e-06, "log_odds_chosen": 0.8344898223876953, "log_odds_ratio": -0.5497090816497803, "logits/chosen": -0.7310926914215088, "logits/rejected": -0.7055926322937012, "logps/chosen": -0.7927663326263428, "logps/rejected": -1.5086238384246826, "loss": 1.0584, "nll_loss": 1.019108772277832, "rewards/accuracies": 0.625, "rewards/chosen": -0.07927663624286652, "rewards/margins": 0.07158573716878891, "rewards/rejected": -0.15086236596107483, "step": 6046 }, { "epoch": 3.6888821107213667, "grad_norm": 2.083850145339966, "learning_rate": 2.1045927740355173e-06, "log_odds_chosen": 0.5446614623069763, "log_odds_ratio": -0.605523407459259, "logits/chosen": -1.2568202018737793, "logits/rejected": -1.043386459350586, "logps/chosen": -1.1139839887619019, "logps/rejected": -1.5694434642791748, "loss": 1.3423, "nll_loss": 1.2908388376235962, "rewards/accuracies": 0.625, "rewards/chosen": -0.11139839887619019, "rewards/margins": 0.045545946806669235, "rewards/rejected": -0.15694434940814972, "step": 6047 }, { "epoch": 3.6894921457983836, "grad_norm": 8.420305252075195, "learning_rate": 2.1036129822412735e-06, "log_odds_chosen": 0.8757901191711426, "log_odds_ratio": -0.4012112319469452, "logits/chosen": -0.9529293775558472, "logits/rejected": -0.9848411083221436, "logps/chosen": -0.759903073310852, "logps/rejected": -1.3092135190963745, "loss": 0.9953, "nll_loss": 0.9553742408752441, "rewards/accuracies": 0.875, "rewards/chosen": -0.07599030435085297, "rewards/margins": 0.05493105202913284, "rewards/rejected": -0.1309213638305664, "step": 6048 }, { "epoch": 3.6901021808754004, "grad_norm": 1.5040510892868042, "learning_rate": 2.10263319044703e-06, "log_odds_chosen": 1.0692436695098877, "log_odds_ratio": -0.4780504107475281, "logits/chosen": -0.8590680360794067, "logits/rejected": -0.6946763396263123, "logps/chosen": -0.9585561752319336, "logps/rejected": -1.6641877889633179, "loss": 1.0483, "nll_loss": 0.8594460487365723, "rewards/accuracies": 0.75, "rewards/chosen": -0.09585563838481903, "rewards/margins": 0.07056315243244171, "rewards/rejected": -0.16641877591609955, "step": 6049 }, { "epoch": 3.6907122159524173, "grad_norm": 1.5424063205718994, "learning_rate": 2.101653398652786e-06, "log_odds_chosen": 2.609414577484131, "log_odds_ratio": -0.48901647329330444, "logits/chosen": -0.8264449834823608, "logits/rejected": -1.0251927375793457, "logps/chosen": -1.0306508541107178, "logps/rejected": -3.141083240509033, "loss": 1.1932, "nll_loss": 1.0665096044540405, "rewards/accuracies": 0.625, "rewards/chosen": -0.10306508839130402, "rewards/margins": 0.21104323863983154, "rewards/rejected": -0.31410834193229675, "step": 6050 }, { "epoch": 3.691322251029434, "grad_norm": 3.763960361480713, "learning_rate": 2.1006736068585423e-06, "log_odds_chosen": 1.4471410512924194, "log_odds_ratio": -0.5013560056686401, "logits/chosen": -0.7683182954788208, "logits/rejected": -0.9386424422264099, "logps/chosen": -0.7509230971336365, "logps/rejected": -1.9050967693328857, "loss": 0.9438, "nll_loss": 0.9099757671356201, "rewards/accuracies": 0.5, "rewards/chosen": -0.07509230822324753, "rewards/margins": 0.11541736125946045, "rewards/rejected": -0.19050967693328857, "step": 6051 }, { "epoch": 3.691932286106451, "grad_norm": 2.6001503467559814, "learning_rate": 2.099693815064299e-06, "log_odds_chosen": 2.5227417945861816, "log_odds_ratio": -0.4503049850463867, "logits/chosen": -0.7729965448379517, "logits/rejected": -0.91704261302948, "logps/chosen": -0.7410352230072021, "logps/rejected": -2.8967702388763428, "loss": 0.988, "nll_loss": 0.8298050165176392, "rewards/accuracies": 0.625, "rewards/chosen": -0.07410351932048798, "rewards/margins": 0.2155735045671463, "rewards/rejected": -0.2896770238876343, "step": 6052 }, { "epoch": 3.692542321183468, "grad_norm": 1.3311508893966675, "learning_rate": 2.098714023270055e-06, "log_odds_chosen": 1.5927774906158447, "log_odds_ratio": -0.5655462741851807, "logits/chosen": -0.8654976487159729, "logits/rejected": -0.8317406177520752, "logps/chosen": -0.7453510165214539, "logps/rejected": -1.9212749004364014, "loss": 0.8643, "nll_loss": 0.8527751564979553, "rewards/accuracies": 0.625, "rewards/chosen": -0.07453510165214539, "rewards/margins": 0.11759239435195923, "rewards/rejected": -0.19212749600410461, "step": 6053 }, { "epoch": 3.693152356260485, "grad_norm": 1.2492073774337769, "learning_rate": 2.0977342314758115e-06, "log_odds_chosen": 1.730764627456665, "log_odds_ratio": -0.3049718737602234, "logits/chosen": -0.8648175597190857, "logits/rejected": -0.9666533470153809, "logps/chosen": -0.7313445210456848, "logps/rejected": -1.920013666152954, "loss": 1.0482, "nll_loss": 0.8843058347702026, "rewards/accuracies": 0.875, "rewards/chosen": -0.07313445210456848, "rewards/margins": 0.1188669204711914, "rewards/rejected": -0.19200138747692108, "step": 6054 }, { "epoch": 3.693762391337502, "grad_norm": 1.9100037813186646, "learning_rate": 2.0967544396815676e-06, "log_odds_chosen": 1.591537356376648, "log_odds_ratio": -0.563117265701294, "logits/chosen": -0.8994579911231995, "logits/rejected": -0.8703457117080688, "logps/chosen": -0.9511151909828186, "logps/rejected": -2.3016059398651123, "loss": 1.0412, "nll_loss": 1.0626072883605957, "rewards/accuracies": 0.625, "rewards/chosen": -0.09511151909828186, "rewards/margins": 0.13504907488822937, "rewards/rejected": -0.23016062378883362, "step": 6055 }, { "epoch": 3.694372426414519, "grad_norm": 1.3406503200531006, "learning_rate": 2.0957746478873237e-06, "log_odds_chosen": 4.3373236656188965, "log_odds_ratio": -0.16601499915122986, "logits/chosen": -0.6552422642707825, "logits/rejected": -0.8239352107048035, "logps/chosen": -0.7920379638671875, "logps/rejected": -4.325899600982666, "loss": 1.0618, "nll_loss": 1.1193126440048218, "rewards/accuracies": 1.0, "rewards/chosen": -0.07920379936695099, "rewards/margins": 0.35338616371154785, "rewards/rejected": -0.43258994817733765, "step": 6056 }, { "epoch": 3.6949824614915356, "grad_norm": 5.6980695724487305, "learning_rate": 2.0947948560930803e-06, "log_odds_chosen": 2.2272567749023438, "log_odds_ratio": -0.3199152946472168, "logits/chosen": -0.9526127576828003, "logits/rejected": -0.960397481918335, "logps/chosen": -0.784530520439148, "logps/rejected": -2.6270530223846436, "loss": 0.9815, "nll_loss": 0.8940384387969971, "rewards/accuracies": 0.875, "rewards/chosen": -0.07845305651426315, "rewards/margins": 0.18425226211547852, "rewards/rejected": -0.26270532608032227, "step": 6057 }, { "epoch": 3.695592496568553, "grad_norm": 2.0231001377105713, "learning_rate": 2.0938150642988364e-06, "log_odds_chosen": 2.0265514850616455, "log_odds_ratio": -0.3034009039402008, "logits/chosen": -0.6032984256744385, "logits/rejected": -0.8857506513595581, "logps/chosen": -0.5508298873901367, "logps/rejected": -1.953067660331726, "loss": 1.0136, "nll_loss": 0.81775963306427, "rewards/accuracies": 0.875, "rewards/chosen": -0.05508299171924591, "rewards/margins": 0.14022378623485565, "rewards/rejected": -0.19530677795410156, "step": 6058 }, { "epoch": 3.6962025316455698, "grad_norm": 1.47975754737854, "learning_rate": 2.0928352725045925e-06, "log_odds_chosen": 3.7734947204589844, "log_odds_ratio": -0.24104434251785278, "logits/chosen": -0.8710049390792847, "logits/rejected": -1.0284360647201538, "logps/chosen": -0.6927423477172852, "logps/rejected": -3.874683141708374, "loss": 0.8052, "nll_loss": 0.8275653123855591, "rewards/accuracies": 0.875, "rewards/chosen": -0.06927423924207687, "rewards/margins": 0.31819403171539307, "rewards/rejected": -0.3874683082103729, "step": 6059 }, { "epoch": 3.6968125667225866, "grad_norm": 1.213230013847351, "learning_rate": 2.091855480710349e-06, "log_odds_chosen": 1.7664512395858765, "log_odds_ratio": -0.3517918288707733, "logits/chosen": -0.7340459823608398, "logits/rejected": -0.7932517528533936, "logps/chosen": -0.5826340913772583, "logps/rejected": -1.7540884017944336, "loss": 1.0094, "nll_loss": 0.8682106733322144, "rewards/accuracies": 0.75, "rewards/chosen": -0.05826340988278389, "rewards/margins": 0.11714542657136917, "rewards/rejected": -0.17540884017944336, "step": 6060 }, { "epoch": 3.6974226017996035, "grad_norm": 2.2651944160461426, "learning_rate": 2.0908756889161056e-06, "log_odds_chosen": 0.12577901780605316, "log_odds_ratio": -0.6851126551628113, "logits/chosen": -1.0454514026641846, "logits/rejected": -1.0130969285964966, "logps/chosen": -1.11337149143219, "logps/rejected": -1.2140681743621826, "loss": 1.1554, "nll_loss": 1.269412875175476, "rewards/accuracies": 0.5, "rewards/chosen": -0.11133714020252228, "rewards/margins": 0.010069670155644417, "rewards/rejected": -0.12140682339668274, "step": 6061 }, { "epoch": 3.6980326368766203, "grad_norm": 9.470728874206543, "learning_rate": 2.0898958971218613e-06, "log_odds_chosen": 3.1810929775238037, "log_odds_ratio": -0.15835076570510864, "logits/chosen": -0.7359552383422852, "logits/rejected": -1.022385835647583, "logps/chosen": -0.6267288327217102, "logps/rejected": -3.0448038578033447, "loss": 0.9955, "nll_loss": 0.8524066209793091, "rewards/accuracies": 0.875, "rewards/chosen": -0.06267288327217102, "rewards/margins": 0.2418074905872345, "rewards/rejected": -0.3044804036617279, "step": 6062 }, { "epoch": 3.698642671953637, "grad_norm": 1.674081563949585, "learning_rate": 2.088916105327618e-06, "log_odds_chosen": 1.716873049736023, "log_odds_ratio": -0.3444265127182007, "logits/chosen": -0.6751439571380615, "logits/rejected": -0.7564125657081604, "logps/chosen": -0.6401039361953735, "logps/rejected": -1.7412747144699097, "loss": 1.1063, "nll_loss": 0.9150767922401428, "rewards/accuracies": 0.75, "rewards/chosen": -0.06401039659976959, "rewards/margins": 0.11011707037687302, "rewards/rejected": -0.174127459526062, "step": 6063 }, { "epoch": 3.699252707030654, "grad_norm": 1.7053861618041992, "learning_rate": 2.087936313533374e-06, "log_odds_chosen": 1.4075692892074585, "log_odds_ratio": -0.44479334354400635, "logits/chosen": -0.7735410928726196, "logits/rejected": -0.7597605586051941, "logps/chosen": -0.806030809879303, "logps/rejected": -1.929300308227539, "loss": 0.8908, "nll_loss": 0.7929033637046814, "rewards/accuracies": 0.625, "rewards/chosen": -0.08060307800769806, "rewards/margins": 0.1123269647359848, "rewards/rejected": -0.19293002784252167, "step": 6064 }, { "epoch": 3.6998627421076713, "grad_norm": 3.681654930114746, "learning_rate": 2.08695652173913e-06, "log_odds_chosen": 1.9952038526535034, "log_odds_ratio": -0.37717878818511963, "logits/chosen": -0.8853076696395874, "logits/rejected": -0.9294043779373169, "logps/chosen": -0.7666553854942322, "logps/rejected": -2.295597553253174, "loss": 1.1118, "nll_loss": 1.0403720140457153, "rewards/accuracies": 0.875, "rewards/chosen": -0.07666554301977158, "rewards/margins": 0.15289422869682312, "rewards/rejected": -0.2295597642660141, "step": 6065 }, { "epoch": 3.700472777184688, "grad_norm": 2.4922308921813965, "learning_rate": 2.0859767299448866e-06, "log_odds_chosen": 1.1011426448822021, "log_odds_ratio": -0.5086719989776611, "logits/chosen": -0.9782218933105469, "logits/rejected": -1.014241099357605, "logps/chosen": -0.9765323400497437, "logps/rejected": -1.8705652952194214, "loss": 1.0839, "nll_loss": 1.3822253942489624, "rewards/accuracies": 0.75, "rewards/chosen": -0.09765323996543884, "rewards/margins": 0.08940330147743225, "rewards/rejected": -0.1870565265417099, "step": 6066 }, { "epoch": 3.701082812261705, "grad_norm": 1.9122931957244873, "learning_rate": 2.0849969381506428e-06, "log_odds_chosen": 1.291566014289856, "log_odds_ratio": -0.5895957946777344, "logits/chosen": -0.980954647064209, "logits/rejected": -1.1294746398925781, "logps/chosen": -0.9009125232696533, "logps/rejected": -1.7936780452728271, "loss": 0.9507, "nll_loss": 0.9663841724395752, "rewards/accuracies": 0.5, "rewards/chosen": -0.09009125083684921, "rewards/margins": 0.08927655965089798, "rewards/rejected": -0.1793678104877472, "step": 6067 }, { "epoch": 3.701692847338722, "grad_norm": 1.6090933084487915, "learning_rate": 2.0840171463563993e-06, "log_odds_chosen": 2.2514657974243164, "log_odds_ratio": -0.329902708530426, "logits/chosen": -0.8968764543533325, "logits/rejected": -0.9571791887283325, "logps/chosen": -0.7752887010574341, "logps/rejected": -2.549938917160034, "loss": 0.945, "nll_loss": 0.8839143514633179, "rewards/accuracies": 0.75, "rewards/chosen": -0.07752886414527893, "rewards/margins": 0.1774650365114212, "rewards/rejected": -0.25499388575553894, "step": 6068 }, { "epoch": 3.702302882415739, "grad_norm": 1.2364646196365356, "learning_rate": 2.0830373545621554e-06, "log_odds_chosen": 2.0727717876434326, "log_odds_ratio": -0.5515406131744385, "logits/chosen": -0.9497840404510498, "logits/rejected": -1.128222942352295, "logps/chosen": -0.8875038623809814, "logps/rejected": -2.7733960151672363, "loss": 0.9911, "nll_loss": 1.013629674911499, "rewards/accuracies": 0.625, "rewards/chosen": -0.08875039219856262, "rewards/margins": 0.18858924508094788, "rewards/rejected": -0.2773396372795105, "step": 6069 }, { "epoch": 3.702912917492756, "grad_norm": 1.50601327419281, "learning_rate": 2.0820575627679115e-06, "log_odds_chosen": 1.3863515853881836, "log_odds_ratio": -0.34680458903312683, "logits/chosen": -0.7383365035057068, "logits/rejected": -0.7810958027839661, "logps/chosen": -0.6129976511001587, "logps/rejected": -1.3440570831298828, "loss": 1.1057, "nll_loss": 1.003807544708252, "rewards/accuracies": 0.875, "rewards/chosen": -0.06129976361989975, "rewards/margins": 0.07310595363378525, "rewards/rejected": -0.134405717253685, "step": 6070 }, { "epoch": 3.703522952569773, "grad_norm": 1.2218263149261475, "learning_rate": 2.081077770973668e-06, "log_odds_chosen": 0.6645671129226685, "log_odds_ratio": -0.6939213275909424, "logits/chosen": -0.8591997623443604, "logits/rejected": -0.9661639928817749, "logps/chosen": -0.7764467597007751, "logps/rejected": -1.333579182624817, "loss": 1.1638, "nll_loss": 0.952210545539856, "rewards/accuracies": 0.375, "rewards/chosen": -0.07764467597007751, "rewards/margins": 0.05571324750781059, "rewards/rejected": -0.1333579272031784, "step": 6071 }, { "epoch": 3.7041329876467897, "grad_norm": 1.230265736579895, "learning_rate": 2.0800979791794246e-06, "log_odds_chosen": 4.131708145141602, "log_odds_ratio": -0.1451418399810791, "logits/chosen": -0.6016790866851807, "logits/rejected": -0.873579204082489, "logps/chosen": -0.6141449809074402, "logps/rejected": -3.904811382293701, "loss": 1.0774, "nll_loss": 0.8496406078338623, "rewards/accuracies": 1.0, "rewards/chosen": -0.06141449511051178, "rewards/margins": 0.329066663980484, "rewards/rejected": -0.390481173992157, "step": 6072 }, { "epoch": 3.7047430227238065, "grad_norm": 1.2239813804626465, "learning_rate": 2.0791181873851803e-06, "log_odds_chosen": 1.4395017623901367, "log_odds_ratio": -0.45969757437705994, "logits/chosen": -0.8043715953826904, "logits/rejected": -0.9369887709617615, "logps/chosen": -0.7814648151397705, "logps/rejected": -1.6978999376296997, "loss": 0.9299, "nll_loss": 0.9640783071517944, "rewards/accuracies": 0.75, "rewards/chosen": -0.07814648002386093, "rewards/margins": 0.09164352715015411, "rewards/rejected": -0.16978999972343445, "step": 6073 }, { "epoch": 3.7053530578008234, "grad_norm": 6.63216495513916, "learning_rate": 2.078138395590937e-06, "log_odds_chosen": 5.0204877853393555, "log_odds_ratio": -0.04315949231386185, "logits/chosen": -0.6301591396331787, "logits/rejected": -1.0561800003051758, "logps/chosen": -0.7201555967330933, "logps/rejected": -4.916747093200684, "loss": 0.9437, "nll_loss": 1.0371880531311035, "rewards/accuracies": 1.0, "rewards/chosen": -0.07201556861400604, "rewards/margins": 0.41965916752815247, "rewards/rejected": -0.4916747808456421, "step": 6074 }, { "epoch": 3.70596309287784, "grad_norm": 2.520474672317505, "learning_rate": 2.0771586037966934e-06, "log_odds_chosen": 2.366910934448242, "log_odds_ratio": -0.37000012397766113, "logits/chosen": -1.0323593616485596, "logits/rejected": -0.9648827314376831, "logps/chosen": -0.8638261556625366, "logps/rejected": -2.809652090072632, "loss": 1.0864, "nll_loss": 1.2161097526550293, "rewards/accuracies": 0.75, "rewards/chosen": -0.08638261258602142, "rewards/margins": 0.19458262622356415, "rewards/rejected": -0.28096523880958557, "step": 6075 }, { "epoch": 3.7065731279548575, "grad_norm": 31.547189712524414, "learning_rate": 2.076178812002449e-06, "log_odds_chosen": 1.3911596536636353, "log_odds_ratio": -0.418548047542572, "logits/chosen": -0.7297269701957703, "logits/rejected": -0.7363399863243103, "logps/chosen": -0.8076325058937073, "logps/rejected": -1.776538610458374, "loss": 1.1119, "nll_loss": 0.9022939801216125, "rewards/accuracies": 0.875, "rewards/chosen": -0.08076325058937073, "rewards/margins": 0.09689059853553772, "rewards/rejected": -0.17765384912490845, "step": 6076 }, { "epoch": 3.7071831630318743, "grad_norm": 2.5761518478393555, "learning_rate": 2.0751990202082057e-06, "log_odds_chosen": 1.5721632242202759, "log_odds_ratio": -0.3749147653579712, "logits/chosen": -0.9780277013778687, "logits/rejected": -0.9751601219177246, "logps/chosen": -1.0062880516052246, "logps/rejected": -2.23329496383667, "loss": 1.1423, "nll_loss": 1.0669177770614624, "rewards/accuracies": 0.75, "rewards/chosen": -0.1006288081407547, "rewards/margins": 0.12270068377256393, "rewards/rejected": -0.22332948446273804, "step": 6077 }, { "epoch": 3.707793198108891, "grad_norm": 3.926959276199341, "learning_rate": 2.0742192284139622e-06, "log_odds_chosen": 3.019268274307251, "log_odds_ratio": -0.327572226524353, "logits/chosen": -0.8465105891227722, "logits/rejected": -0.9820575714111328, "logps/chosen": -0.8547041416168213, "logps/rejected": -3.437394142150879, "loss": 1.0729, "nll_loss": 0.9793605208396912, "rewards/accuracies": 0.875, "rewards/chosen": -0.08547040820121765, "rewards/margins": 0.25826898217201233, "rewards/rejected": -0.34373939037323, "step": 6078 }, { "epoch": 3.708403233185908, "grad_norm": 2.6707632541656494, "learning_rate": 2.073239436619718e-06, "log_odds_chosen": 1.7426668405532837, "log_odds_ratio": -0.27516764402389526, "logits/chosen": -0.8738958835601807, "logits/rejected": -0.8893418312072754, "logps/chosen": -0.9219809770584106, "logps/rejected": -2.190593957901001, "loss": 1.1556, "nll_loss": 1.0210708379745483, "rewards/accuracies": 1.0, "rewards/chosen": -0.09219810366630554, "rewards/margins": 0.12686128914356232, "rewards/rejected": -0.21905940771102905, "step": 6079 }, { "epoch": 3.7090132682629253, "grad_norm": 1.1882967948913574, "learning_rate": 2.0722596448254745e-06, "log_odds_chosen": 0.7086223363876343, "log_odds_ratio": -0.5278550982475281, "logits/chosen": -0.7504441738128662, "logits/rejected": -0.74001145362854, "logps/chosen": -1.012231707572937, "logps/rejected": -1.5233068466186523, "loss": 1.1399, "nll_loss": 1.0348875522613525, "rewards/accuracies": 0.5, "rewards/chosen": -0.1012231633067131, "rewards/margins": 0.05110752210021019, "rewards/rejected": -0.1523306965827942, "step": 6080 }, { "epoch": 3.709623303339942, "grad_norm": 6.823029518127441, "learning_rate": 2.071279853031231e-06, "log_odds_chosen": 0.9925001263618469, "log_odds_ratio": -0.55412358045578, "logits/chosen": -0.8483037948608398, "logits/rejected": -0.8177488446235657, "logps/chosen": -0.8198448419570923, "logps/rejected": -1.5952160358428955, "loss": 1.0311, "nll_loss": 0.9965173006057739, "rewards/accuracies": 0.625, "rewards/chosen": -0.08198448270559311, "rewards/margins": 0.07753712683916092, "rewards/rejected": -0.15952160954475403, "step": 6081 }, { "epoch": 3.710233338416959, "grad_norm": 1.2924199104309082, "learning_rate": 2.070300061236987e-06, "log_odds_chosen": 0.9188454747200012, "log_odds_ratio": -0.5032927989959717, "logits/chosen": -0.9327601790428162, "logits/rejected": -0.9528490304946899, "logps/chosen": -0.8044830560684204, "logps/rejected": -1.4612689018249512, "loss": 1.0102, "nll_loss": 1.0516867637634277, "rewards/accuracies": 0.75, "rewards/chosen": -0.08044829964637756, "rewards/margins": 0.06567858904600143, "rewards/rejected": -0.1461268961429596, "step": 6082 }, { "epoch": 3.710843373493976, "grad_norm": 1.5558037757873535, "learning_rate": 2.0693202694427432e-06, "log_odds_chosen": 0.9486756324768066, "log_odds_ratio": -0.4844381511211395, "logits/chosen": -0.9256123900413513, "logits/rejected": -0.9347847700119019, "logps/chosen": -1.0552029609680176, "logps/rejected": -1.8494107723236084, "loss": 1.2554, "nll_loss": 1.3795894384384155, "rewards/accuracies": 0.75, "rewards/chosen": -0.10552028566598892, "rewards/margins": 0.0794207900762558, "rewards/rejected": -0.18494108319282532, "step": 6083 }, { "epoch": 3.7114534085709927, "grad_norm": 21.07760238647461, "learning_rate": 2.0683404776484994e-06, "log_odds_chosen": 1.608863353729248, "log_odds_ratio": -0.44598516821861267, "logits/chosen": -0.9582958221435547, "logits/rejected": -0.9769725799560547, "logps/chosen": -0.9327515363693237, "logps/rejected": -2.2568299770355225, "loss": 1.0078, "nll_loss": 1.0176246166229248, "rewards/accuracies": 0.75, "rewards/chosen": -0.09327514469623566, "rewards/margins": 0.13240785896778107, "rewards/rejected": -0.22568300366401672, "step": 6084 }, { "epoch": 3.7120634436480096, "grad_norm": 1.969573736190796, "learning_rate": 2.067360685854256e-06, "log_odds_chosen": 2.703380823135376, "log_odds_ratio": -0.42829951643943787, "logits/chosen": -0.6616662740707397, "logits/rejected": -0.824739933013916, "logps/chosen": -0.6521540880203247, "logps/rejected": -2.8582522869110107, "loss": 1.0548, "nll_loss": 0.7692433595657349, "rewards/accuracies": 0.75, "rewards/chosen": -0.06521540880203247, "rewards/margins": 0.2206098437309265, "rewards/rejected": -0.2858252227306366, "step": 6085 }, { "epoch": 3.712673478725027, "grad_norm": 1.6333054304122925, "learning_rate": 2.0663808940600125e-06, "log_odds_chosen": 1.347461462020874, "log_odds_ratio": -0.563414990901947, "logits/chosen": -0.8462628722190857, "logits/rejected": -0.8679755926132202, "logps/chosen": -0.749188244342804, "logps/rejected": -1.8043736219406128, "loss": 1.0036, "nll_loss": 0.7871280908584595, "rewards/accuracies": 0.5, "rewards/chosen": -0.07491882145404816, "rewards/margins": 0.10551854223012924, "rewards/rejected": -0.1804373562335968, "step": 6086 }, { "epoch": 3.7132835138020437, "grad_norm": 8.07568359375, "learning_rate": 2.065401102265768e-06, "log_odds_chosen": 1.927793264389038, "log_odds_ratio": -0.505146861076355, "logits/chosen": -0.9626345634460449, "logits/rejected": -1.0166726112365723, "logps/chosen": -0.7331369519233704, "logps/rejected": -2.2194371223449707, "loss": 1.2229, "nll_loss": 1.1339592933654785, "rewards/accuracies": 0.625, "rewards/chosen": -0.07331369072198868, "rewards/margins": 0.1486300230026245, "rewards/rejected": -0.2219437211751938, "step": 6087 }, { "epoch": 3.7138935488790605, "grad_norm": 15.874176979064941, "learning_rate": 2.0644213104715247e-06, "log_odds_chosen": 0.24220532178878784, "log_odds_ratio": -0.6067836284637451, "logits/chosen": -0.9832053184509277, "logits/rejected": -0.9961717128753662, "logps/chosen": -0.9155089855194092, "logps/rejected": -1.0608205795288086, "loss": 1.0824, "nll_loss": 1.1330914497375488, "rewards/accuracies": 0.625, "rewards/chosen": -0.09155090153217316, "rewards/margins": 0.01453116163611412, "rewards/rejected": -0.10608206689357758, "step": 6088 }, { "epoch": 3.7145035839560774, "grad_norm": 1.9670387506484985, "learning_rate": 2.0634415186772813e-06, "log_odds_chosen": 0.24556350708007812, "log_odds_ratio": -0.7189874053001404, "logits/chosen": -0.9243794679641724, "logits/rejected": -0.8842321038246155, "logps/chosen": -1.0120176076889038, "logps/rejected": -1.2251211404800415, "loss": 1.1463, "nll_loss": 1.1239309310913086, "rewards/accuracies": 0.625, "rewards/chosen": -0.10120175778865814, "rewards/margins": 0.021310344338417053, "rewards/rejected": -0.1225121021270752, "step": 6089 }, { "epoch": 3.7151136190330947, "grad_norm": 17.030925750732422, "learning_rate": 2.062461726883037e-06, "log_odds_chosen": 2.9885571002960205, "log_odds_ratio": -0.3310484290122986, "logits/chosen": -0.7879694104194641, "logits/rejected": -0.9973974823951721, "logps/chosen": -0.8625034689903259, "logps/rejected": -3.4468648433685303, "loss": 0.9676, "nll_loss": 0.9819090366363525, "rewards/accuracies": 0.75, "rewards/chosen": -0.08625034987926483, "rewards/margins": 0.2584361433982849, "rewards/rejected": -0.34468647837638855, "step": 6090 }, { "epoch": 3.7157236541101115, "grad_norm": 2.4867966175079346, "learning_rate": 2.0614819350887935e-06, "log_odds_chosen": 2.166928291320801, "log_odds_ratio": -0.241238534450531, "logits/chosen": -0.4571784734725952, "logits/rejected": -0.6271133422851562, "logps/chosen": -0.5874780416488647, "logps/rejected": -2.167349100112915, "loss": 0.942, "nll_loss": 0.6537909507751465, "rewards/accuracies": 1.0, "rewards/chosen": -0.05874780938029289, "rewards/margins": 0.15798711776733398, "rewards/rejected": -0.21673491597175598, "step": 6091 }, { "epoch": 3.7163336891871284, "grad_norm": 1.6688425540924072, "learning_rate": 2.06050214329455e-06, "log_odds_chosen": 2.6161680221557617, "log_odds_ratio": -0.3638179898262024, "logits/chosen": -1.0566051006317139, "logits/rejected": -1.036982774734497, "logps/chosen": -1.0613175630569458, "logps/rejected": -3.4165821075439453, "loss": 1.1882, "nll_loss": 1.4132575988769531, "rewards/accuracies": 0.75, "rewards/chosen": -0.10613176226615906, "rewards/margins": 0.235526442527771, "rewards/rejected": -0.34165820479393005, "step": 6092 }, { "epoch": 3.716943724264145, "grad_norm": 1.4388012886047363, "learning_rate": 2.059522351500306e-06, "log_odds_chosen": 3.6502113342285156, "log_odds_ratio": -0.4185214042663574, "logits/chosen": -0.7614800930023193, "logits/rejected": -0.9603984355926514, "logps/chosen": -0.7193787097930908, "logps/rejected": -3.8828935623168945, "loss": 0.9535, "nll_loss": 0.8719517588615417, "rewards/accuracies": 0.5, "rewards/chosen": -0.07193787395954132, "rewards/margins": 0.3163515031337738, "rewards/rejected": -0.3882893919944763, "step": 6093 }, { "epoch": 3.717553759341162, "grad_norm": 1.1434653997421265, "learning_rate": 2.0585425597060623e-06, "log_odds_chosen": 2.2475502490997314, "log_odds_ratio": -0.2816462218761444, "logits/chosen": -0.7677243947982788, "logits/rejected": -0.775010883808136, "logps/chosen": -0.6916604042053223, "logps/rejected": -2.3776674270629883, "loss": 0.9904, "nll_loss": 0.772307813167572, "rewards/accuracies": 0.875, "rewards/chosen": -0.06916604191064835, "rewards/margins": 0.16860070824623108, "rewards/rejected": -0.23776674270629883, "step": 6094 }, { "epoch": 3.718163794418179, "grad_norm": 1.7234385013580322, "learning_rate": 2.057562767911819e-06, "log_odds_chosen": 1.3844833374023438, "log_odds_ratio": -0.4966609477996826, "logits/chosen": -0.8598951101303101, "logits/rejected": -0.7961602210998535, "logps/chosen": -0.7504821419715881, "logps/rejected": -1.7344731092453003, "loss": 1.1022, "nll_loss": 0.8895622491836548, "rewards/accuracies": 0.625, "rewards/chosen": -0.07504820823669434, "rewards/margins": 0.0983991026878357, "rewards/rejected": -0.17344731092453003, "step": 6095 }, { "epoch": 3.7187738294951957, "grad_norm": 1.333632230758667, "learning_rate": 2.056582976117575e-06, "log_odds_chosen": 0.5406631231307983, "log_odds_ratio": -0.6105719804763794, "logits/chosen": -0.7546479105949402, "logits/rejected": -0.8733389973640442, "logps/chosen": -0.6285560131072998, "logps/rejected": -0.8259059190750122, "loss": 0.9751, "nll_loss": 0.9261473417282104, "rewards/accuracies": 0.625, "rewards/chosen": -0.06285560131072998, "rewards/margins": 0.01973499357700348, "rewards/rejected": -0.08259059488773346, "step": 6096 }, { "epoch": 3.719383864572213, "grad_norm": 1.8065929412841797, "learning_rate": 2.055603184323331e-06, "log_odds_chosen": 2.002180337905884, "log_odds_ratio": -0.5462705492973328, "logits/chosen": -0.8916984796524048, "logits/rejected": -0.859004557132721, "logps/chosen": -0.9621213674545288, "logps/rejected": -2.6536340713500977, "loss": 1.0403, "nll_loss": 1.0493024587631226, "rewards/accuracies": 0.625, "rewards/chosen": -0.09621214121580124, "rewards/margins": 0.16915126144886017, "rewards/rejected": -0.2653634250164032, "step": 6097 }, { "epoch": 3.71999389964923, "grad_norm": 7.747988224029541, "learning_rate": 2.0546233925290876e-06, "log_odds_chosen": 3.2618484497070312, "log_odds_ratio": -0.381753534078598, "logits/chosen": -0.7310382723808289, "logits/rejected": -0.9650830030441284, "logps/chosen": -0.6386452913284302, "logps/rejected": -3.4191246032714844, "loss": 1.001, "nll_loss": 0.8070452213287354, "rewards/accuracies": 1.0, "rewards/chosen": -0.06386452913284302, "rewards/margins": 0.27804791927337646, "rewards/rejected": -0.34191247820854187, "step": 6098 }, { "epoch": 3.7206039347262467, "grad_norm": 1.3671139478683472, "learning_rate": 2.0536436007348437e-06, "log_odds_chosen": 1.0473788976669312, "log_odds_ratio": -0.5096538066864014, "logits/chosen": -0.8323009014129639, "logits/rejected": -0.9126134514808655, "logps/chosen": -0.8348309993743896, "logps/rejected": -1.6597272157669067, "loss": 0.9361, "nll_loss": 0.9129421710968018, "rewards/accuracies": 0.625, "rewards/chosen": -0.08348309993743896, "rewards/margins": 0.08248961716890335, "rewards/rejected": -0.16597270965576172, "step": 6099 }, { "epoch": 3.7212139698032636, "grad_norm": 1.4272788763046265, "learning_rate": 2.0526638089406003e-06, "log_odds_chosen": 2.98146653175354, "log_odds_ratio": -0.23536573350429535, "logits/chosen": -0.7258622646331787, "logits/rejected": -0.9150071144104004, "logps/chosen": -0.72111976146698, "logps/rejected": -2.990838050842285, "loss": 1.0748, "nll_loss": 0.8321669101715088, "rewards/accuracies": 0.875, "rewards/chosen": -0.07211197167634964, "rewards/margins": 0.226971834897995, "rewards/rejected": -0.29908379912376404, "step": 6100 }, { "epoch": 3.721824004880281, "grad_norm": 2.3567252159118652, "learning_rate": 2.0516840171463564e-06, "log_odds_chosen": 1.549721360206604, "log_odds_ratio": -0.4917181730270386, "logits/chosen": -0.8938058614730835, "logits/rejected": -0.8836339712142944, "logps/chosen": -0.8873127698898315, "logps/rejected": -2.2358527183532715, "loss": 1.1173, "nll_loss": 1.1047914028167725, "rewards/accuracies": 0.75, "rewards/chosen": -0.08873127400875092, "rewards/margins": 0.1348540037870407, "rewards/rejected": -0.223585307598114, "step": 6101 }, { "epoch": 3.7224340399572977, "grad_norm": 1.9119892120361328, "learning_rate": 2.0507042253521125e-06, "log_odds_chosen": 0.8449370861053467, "log_odds_ratio": -0.36917975544929504, "logits/chosen": -0.7867188453674316, "logits/rejected": -0.7800593972206116, "logps/chosen": -0.9371616840362549, "logps/rejected": -1.4913283586502075, "loss": 1.1713, "nll_loss": 1.3929089307785034, "rewards/accuracies": 1.0, "rewards/chosen": -0.09371616691350937, "rewards/margins": 0.05541667342185974, "rewards/rejected": -0.1491328477859497, "step": 6102 }, { "epoch": 3.7230440750343146, "grad_norm": 1.3762292861938477, "learning_rate": 2.049724433557869e-06, "log_odds_chosen": 2.835348129272461, "log_odds_ratio": -0.28448307514190674, "logits/chosen": -0.9144687056541443, "logits/rejected": -0.9442992210388184, "logps/chosen": -0.6713775992393494, "logps/rejected": -3.029414415359497, "loss": 0.8469, "nll_loss": 0.9925318956375122, "rewards/accuracies": 1.0, "rewards/chosen": -0.06713776290416718, "rewards/margins": 0.23580366373062134, "rewards/rejected": -0.3029414415359497, "step": 6103 }, { "epoch": 3.7236541101113314, "grad_norm": 1.9478346109390259, "learning_rate": 2.0487446417636248e-06, "log_odds_chosen": 1.7958405017852783, "log_odds_ratio": -0.3618709146976471, "logits/chosen": -0.7731781005859375, "logits/rejected": -0.7475333213806152, "logps/chosen": -0.7703100442886353, "logps/rejected": -2.2546956539154053, "loss": 1.0642, "nll_loss": 0.9244539737701416, "rewards/accuracies": 0.875, "rewards/chosen": -0.07703100889921188, "rewards/margins": 0.14843854308128357, "rewards/rejected": -0.22546955943107605, "step": 6104 }, { "epoch": 3.7242641451883483, "grad_norm": 1.4049293994903564, "learning_rate": 2.0477648499693813e-06, "log_odds_chosen": 2.634263515472412, "log_odds_ratio": -0.3175833225250244, "logits/chosen": -0.6264278292655945, "logits/rejected": -0.8194026947021484, "logps/chosen": -0.8191213607788086, "logps/rejected": -2.9855151176452637, "loss": 0.9008, "nll_loss": 1.092513918876648, "rewards/accuracies": 0.875, "rewards/chosen": -0.08191213756799698, "rewards/margins": 0.21663936972618103, "rewards/rejected": -0.2985514998435974, "step": 6105 }, { "epoch": 3.724874180265365, "grad_norm": 2.5408456325531006, "learning_rate": 2.046785058175138e-06, "log_odds_chosen": 2.7171316146850586, "log_odds_ratio": -0.4685601592063904, "logits/chosen": -0.7324432730674744, "logits/rejected": -0.8712273836135864, "logps/chosen": -0.8159844279289246, "logps/rejected": -3.2056617736816406, "loss": 1.1806, "nll_loss": 1.0476714372634888, "rewards/accuracies": 0.625, "rewards/chosen": -0.0815984383225441, "rewards/margins": 0.23896774649620056, "rewards/rejected": -0.32056617736816406, "step": 6106 }, { "epoch": 3.725484215342382, "grad_norm": 2.2570641040802, "learning_rate": 2.045805266380894e-06, "log_odds_chosen": 0.354999303817749, "log_odds_ratio": -0.5685157179832458, "logits/chosen": -1.0075699090957642, "logits/rejected": -1.0288029909133911, "logps/chosen": -0.9607756733894348, "logps/rejected": -1.205609679222107, "loss": 1.0156, "nll_loss": 0.9766342043876648, "rewards/accuracies": 0.75, "rewards/chosen": -0.0960775762796402, "rewards/margins": 0.024483397603034973, "rewards/rejected": -0.12056095898151398, "step": 6107 }, { "epoch": 3.7260942504193992, "grad_norm": 4.719748497009277, "learning_rate": 2.04482547458665e-06, "log_odds_chosen": 1.6269538402557373, "log_odds_ratio": -0.4182412326335907, "logits/chosen": -0.8565770983695984, "logits/rejected": -0.8870701193809509, "logps/chosen": -0.7281055450439453, "logps/rejected": -1.8588203191757202, "loss": 0.9136, "nll_loss": 0.7781068682670593, "rewards/accuracies": 0.875, "rewards/chosen": -0.07281056046485901, "rewards/margins": 0.11307147145271301, "rewards/rejected": -0.18588203191757202, "step": 6108 }, { "epoch": 3.726704285496416, "grad_norm": 8.416421890258789, "learning_rate": 2.0438456827924067e-06, "log_odds_chosen": 1.9262313842773438, "log_odds_ratio": -0.4408216178417206, "logits/chosen": -0.9243643879890442, "logits/rejected": -0.8544429540634155, "logps/chosen": -0.7426052093505859, "logps/rejected": -2.1598081588745117, "loss": 1.0431, "nll_loss": 1.0258225202560425, "rewards/accuracies": 0.625, "rewards/chosen": -0.07426052540540695, "rewards/margins": 0.14172029495239258, "rewards/rejected": -0.21598081290721893, "step": 6109 }, { "epoch": 3.727314320573433, "grad_norm": 7.953030109405518, "learning_rate": 2.0428658909981628e-06, "log_odds_chosen": 1.3464109897613525, "log_odds_ratio": -0.4560346007347107, "logits/chosen": -0.9728308916091919, "logits/rejected": -0.9525375366210938, "logps/chosen": -0.8620846271514893, "logps/rejected": -1.9170100688934326, "loss": 1.0091, "nll_loss": 1.045820951461792, "rewards/accuracies": 0.75, "rewards/chosen": -0.08620846271514893, "rewards/margins": 0.10549253225326538, "rewards/rejected": -0.1917010098695755, "step": 6110 }, { "epoch": 3.7279243556504498, "grad_norm": 1.213780164718628, "learning_rate": 2.041886099203919e-06, "log_odds_chosen": 1.9467453956604004, "log_odds_ratio": -0.40129274129867554, "logits/chosen": -0.8694459199905396, "logits/rejected": -0.9705417156219482, "logps/chosen": -0.8516396284103394, "logps/rejected": -2.3736789226531982, "loss": 0.9892, "nll_loss": 0.9692158699035645, "rewards/accuracies": 0.625, "rewards/chosen": -0.08516396582126617, "rewards/margins": 0.15220394730567932, "rewards/rejected": -0.2373678982257843, "step": 6111 }, { "epoch": 3.728534390727467, "grad_norm": 1.4478873014450073, "learning_rate": 2.0409063074096755e-06, "log_odds_chosen": 2.004513740539551, "log_odds_ratio": -0.540155291557312, "logits/chosen": -1.055928111076355, "logits/rejected": -0.9919654130935669, "logps/chosen": -0.9824466705322266, "logps/rejected": -2.7206430435180664, "loss": 1.0817, "nll_loss": 1.3351025581359863, "rewards/accuracies": 0.5, "rewards/chosen": -0.09824467450380325, "rewards/margins": 0.1738196611404419, "rewards/rejected": -0.27206432819366455, "step": 6112 }, { "epoch": 3.729144425804484, "grad_norm": 1.2423596382141113, "learning_rate": 2.0399265156154316e-06, "log_odds_chosen": 2.662137508392334, "log_odds_ratio": -0.568832278251648, "logits/chosen": -0.91688072681427, "logits/rejected": -1.1280858516693115, "logps/chosen": -0.6638803482055664, "logps/rejected": -3.1003336906433105, "loss": 0.9779, "nll_loss": 1.0042394399642944, "rewards/accuracies": 0.625, "rewards/chosen": -0.06638804078102112, "rewards/margins": 0.24364537000656128, "rewards/rejected": -0.31003338098526, "step": 6113 }, { "epoch": 3.7297544608815008, "grad_norm": 2.043283700942993, "learning_rate": 2.038946723821188e-06, "log_odds_chosen": 3.0368592739105225, "log_odds_ratio": -0.3748609125614166, "logits/chosen": -0.7708807587623596, "logits/rejected": -0.8306073546409607, "logps/chosen": -0.7010048031806946, "logps/rejected": -2.8122920989990234, "loss": 1.1031, "nll_loss": 1.0798579454421997, "rewards/accuracies": 0.75, "rewards/chosen": -0.07010048627853394, "rewards/margins": 0.21112872660160065, "rewards/rejected": -0.2812291979789734, "step": 6114 }, { "epoch": 3.7303644959585176, "grad_norm": 4.696131229400635, "learning_rate": 2.0379669320269442e-06, "log_odds_chosen": 1.7664308547973633, "log_odds_ratio": -0.5553370714187622, "logits/chosen": -0.8335205912590027, "logits/rejected": -0.9953271150588989, "logps/chosen": -0.9596093893051147, "logps/rejected": -2.470564126968384, "loss": 1.1076, "nll_loss": 0.9626405835151672, "rewards/accuracies": 0.5, "rewards/chosen": -0.09596094489097595, "rewards/margins": 0.1510954648256302, "rewards/rejected": -0.24705640971660614, "step": 6115 }, { "epoch": 3.7309745310355344, "grad_norm": 1.5776997804641724, "learning_rate": 2.0369871402327004e-06, "log_odds_chosen": 1.306623101234436, "log_odds_ratio": -0.4203777015209198, "logits/chosen": -0.8217764496803284, "logits/rejected": -0.8942997455596924, "logps/chosen": -1.067467212677002, "logps/rejected": -2.1354432106018066, "loss": 1.1134, "nll_loss": 1.1469824314117432, "rewards/accuracies": 0.75, "rewards/chosen": -0.10674672573804855, "rewards/margins": 0.10679760575294495, "rewards/rejected": -0.2135443240404129, "step": 6116 }, { "epoch": 3.7315845661125513, "grad_norm": 7.651029109954834, "learning_rate": 2.036007348438457e-06, "log_odds_chosen": 2.2750227451324463, "log_odds_ratio": -0.3846242427825928, "logits/chosen": -0.9280185103416443, "logits/rejected": -0.9093539118766785, "logps/chosen": -0.9318423867225647, "logps/rejected": -2.8430981636047363, "loss": 1.1674, "nll_loss": 1.060628890991211, "rewards/accuracies": 0.875, "rewards/chosen": -0.09318423271179199, "rewards/margins": 0.1911255568265915, "rewards/rejected": -0.28430983424186707, "step": 6117 }, { "epoch": 3.732194601189568, "grad_norm": 2.4424729347229004, "learning_rate": 2.035027556644213e-06, "log_odds_chosen": 3.467252731323242, "log_odds_ratio": -0.19112259149551392, "logits/chosen": -0.8229679465293884, "logits/rejected": -1.0324499607086182, "logps/chosen": -0.7168744802474976, "logps/rejected": -3.4918265342712402, "loss": 1.0367, "nll_loss": 0.9674757719039917, "rewards/accuracies": 1.0, "rewards/chosen": -0.07168744504451752, "rewards/margins": 0.27749520540237427, "rewards/rejected": -0.349182665348053, "step": 6118 }, { "epoch": 3.7328046362665854, "grad_norm": 1.59854257106781, "learning_rate": 2.034047764849969e-06, "log_odds_chosen": 1.0132664442062378, "log_odds_ratio": -0.47612449526786804, "logits/chosen": -0.7891712188720703, "logits/rejected": -0.7617766857147217, "logps/chosen": -0.700228750705719, "logps/rejected": -1.4235188961029053, "loss": 1.0434, "nll_loss": 1.035224437713623, "rewards/accuracies": 0.625, "rewards/chosen": -0.07002287358045578, "rewards/margins": 0.07232902199029922, "rewards/rejected": -0.142351895570755, "step": 6119 }, { "epoch": 3.7334146713436023, "grad_norm": 4.360892295837402, "learning_rate": 2.0330679730557257e-06, "log_odds_chosen": 2.4782516956329346, "log_odds_ratio": -0.29831379652023315, "logits/chosen": -0.7782455682754517, "logits/rejected": -0.9118817448616028, "logps/chosen": -0.6381300687789917, "logps/rejected": -2.5142574310302734, "loss": 1.1328, "nll_loss": 0.90540611743927, "rewards/accuracies": 0.75, "rewards/chosen": -0.06381300091743469, "rewards/margins": 0.18761272728443146, "rewards/rejected": -0.25142571330070496, "step": 6120 }, { "epoch": 3.734024706420619, "grad_norm": 1.3679333925247192, "learning_rate": 2.032088181261482e-06, "log_odds_chosen": 2.0758960247039795, "log_odds_ratio": -0.40428394079208374, "logits/chosen": -0.8333423137664795, "logits/rejected": -0.825262725353241, "logps/chosen": -0.9571176767349243, "logps/rejected": -2.608962297439575, "loss": 0.9171, "nll_loss": 1.0777373313903809, "rewards/accuracies": 0.75, "rewards/chosen": -0.09571176767349243, "rewards/margins": 0.16518446803092957, "rewards/rejected": -0.260896235704422, "step": 6121 }, { "epoch": 3.734634741497636, "grad_norm": 1.2125725746154785, "learning_rate": 2.031108389467238e-06, "log_odds_chosen": -0.18220046162605286, "log_odds_ratio": -0.9749120473861694, "logits/chosen": -0.9341346025466919, "logits/rejected": -0.8050740957260132, "logps/chosen": -1.3476414680480957, "logps/rejected": -1.1940778493881226, "loss": 1.1392, "nll_loss": 1.348761796951294, "rewards/accuracies": 0.25, "rewards/chosen": -0.134764164686203, "rewards/margins": -0.015356363728642464, "rewards/rejected": -0.1194077879190445, "step": 6122 }, { "epoch": 3.7352447765746533, "grad_norm": 2.4764740467071533, "learning_rate": 2.0301285976729945e-06, "log_odds_chosen": 1.0349419116973877, "log_odds_ratio": -0.6800395250320435, "logits/chosen": -0.8540425896644592, "logits/rejected": -1.0082666873931885, "logps/chosen": -0.9045315980911255, "logps/rejected": -1.663173794746399, "loss": 1.2126, "nll_loss": 1.3021540641784668, "rewards/accuracies": 0.375, "rewards/chosen": -0.09045316278934479, "rewards/margins": 0.07586422562599182, "rewards/rejected": -0.1663173884153366, "step": 6123 }, { "epoch": 3.73585481165167, "grad_norm": 1.6322875022888184, "learning_rate": 2.0291488058787506e-06, "log_odds_chosen": 0.7555010914802551, "log_odds_ratio": -0.5760040283203125, "logits/chosen": -0.9794703722000122, "logits/rejected": -0.9953737258911133, "logps/chosen": -1.0007394552230835, "logps/rejected": -1.6145986318588257, "loss": 0.996, "nll_loss": 1.1193581819534302, "rewards/accuracies": 0.5, "rewards/chosen": -0.10007394850254059, "rewards/margins": 0.061385929584503174, "rewards/rejected": -0.16145986318588257, "step": 6124 }, { "epoch": 3.736464846728687, "grad_norm": 2.2266550064086914, "learning_rate": 2.0281690140845067e-06, "log_odds_chosen": 2.9381906986236572, "log_odds_ratio": -0.43809452652931213, "logits/chosen": -0.8774434924125671, "logits/rejected": -1.053432822227478, "logps/chosen": -0.7493640184402466, "logps/rejected": -3.28537917137146, "loss": 1.0224, "nll_loss": 0.8663589358329773, "rewards/accuracies": 0.75, "rewards/chosen": -0.0749363973736763, "rewards/margins": 0.2536015212535858, "rewards/rejected": -0.3285379111766815, "step": 6125 }, { "epoch": 3.737074881805704, "grad_norm": 6.010210037231445, "learning_rate": 2.0271892222902633e-06, "log_odds_chosen": 1.354880452156067, "log_odds_ratio": -0.5647087097167969, "logits/chosen": -0.9006033539772034, "logits/rejected": -0.9696369171142578, "logps/chosen": -0.8255159854888916, "logps/rejected": -1.8675256967544556, "loss": 1.0762, "nll_loss": 1.1604373455047607, "rewards/accuracies": 0.5, "rewards/chosen": -0.08255159854888916, "rewards/margins": 0.10420098155736923, "rewards/rejected": -0.186752587556839, "step": 6126 }, { "epoch": 3.7376849168827206, "grad_norm": 2.417142629623413, "learning_rate": 2.0262094304960194e-06, "log_odds_chosen": 1.733842134475708, "log_odds_ratio": -0.34097355604171753, "logits/chosen": -0.8689854145050049, "logits/rejected": -0.9880900382995605, "logps/chosen": -0.5707410573959351, "logps/rejected": -1.6643962860107422, "loss": 1.1448, "nll_loss": 1.554039716720581, "rewards/accuracies": 0.875, "rewards/chosen": -0.05707410350441933, "rewards/margins": 0.10936553031206131, "rewards/rejected": -0.16643963754177094, "step": 6127 }, { "epoch": 3.7382949519597375, "grad_norm": 1.9086108207702637, "learning_rate": 2.025229638701776e-06, "log_odds_chosen": 2.2803761959075928, "log_odds_ratio": -0.5165197849273682, "logits/chosen": -0.7623217105865479, "logits/rejected": -0.7528372406959534, "logps/chosen": -0.7421474456787109, "logps/rejected": -2.6299805641174316, "loss": 0.9864, "nll_loss": 0.8821622133255005, "rewards/accuracies": 0.5, "rewards/chosen": -0.07421474903821945, "rewards/margins": 0.18878331780433655, "rewards/rejected": -0.2629980444908142, "step": 6128 }, { "epoch": 3.738904987036755, "grad_norm": 1.4000146389007568, "learning_rate": 2.024249846907532e-06, "log_odds_chosen": 3.4525938034057617, "log_odds_ratio": -0.1637214720249176, "logits/chosen": -0.5507630109786987, "logits/rejected": -0.8509293794631958, "logps/chosen": -0.5564824938774109, "logps/rejected": -3.244363784790039, "loss": 1.0397, "nll_loss": 0.824768602848053, "rewards/accuracies": 1.0, "rewards/chosen": -0.05564824864268303, "rewards/margins": 0.2687881290912628, "rewards/rejected": -0.32443639636039734, "step": 6129 }, { "epoch": 3.7395150221137716, "grad_norm": 2.1553823947906494, "learning_rate": 2.023270055113288e-06, "log_odds_chosen": 0.3000539541244507, "log_odds_ratio": -0.6917674541473389, "logits/chosen": -0.8337098360061646, "logits/rejected": -0.9077131152153015, "logps/chosen": -1.05653715133667, "logps/rejected": -1.172006368637085, "loss": 1.124, "nll_loss": 1.1660239696502686, "rewards/accuracies": 0.5, "rewards/chosen": -0.10565371811389923, "rewards/margins": 0.011546917259693146, "rewards/rejected": -0.11720062792301178, "step": 6130 }, { "epoch": 3.7401250571907885, "grad_norm": 5.051983833312988, "learning_rate": 2.0222902633190447e-06, "log_odds_chosen": 1.0592718124389648, "log_odds_ratio": -0.524071216583252, "logits/chosen": -0.9943515062332153, "logits/rejected": -1.0015677213668823, "logps/chosen": -1.0254383087158203, "logps/rejected": -1.8925189971923828, "loss": 1.2056, "nll_loss": 1.1215221881866455, "rewards/accuracies": 0.75, "rewards/chosen": -0.10254383832216263, "rewards/margins": 0.08670806139707565, "rewards/rejected": -0.1892518848180771, "step": 6131 }, { "epoch": 3.7407350922678053, "grad_norm": 1.1399037837982178, "learning_rate": 2.021310471524801e-06, "log_odds_chosen": 1.983054280281067, "log_odds_ratio": -0.6051900386810303, "logits/chosen": -0.7350971102714539, "logits/rejected": -0.7977750897407532, "logps/chosen": -0.6512727737426758, "logps/rejected": -2.2945809364318848, "loss": 1.0251, "nll_loss": 0.8831892013549805, "rewards/accuracies": 0.625, "rewards/chosen": -0.06512728333473206, "rewards/margins": 0.1643308401107788, "rewards/rejected": -0.22945809364318848, "step": 6132 }, { "epoch": 3.741345127344822, "grad_norm": 1.351500153541565, "learning_rate": 2.020330679730557e-06, "log_odds_chosen": 2.611654281616211, "log_odds_ratio": -0.2524726986885071, "logits/chosen": -0.9899181127548218, "logits/rejected": -1.0839641094207764, "logps/chosen": -0.689470648765564, "logps/rejected": -2.638718605041504, "loss": 0.971, "nll_loss": 1.0176929235458374, "rewards/accuracies": 0.875, "rewards/chosen": -0.06894706189632416, "rewards/margins": 0.19492477178573608, "rewards/rejected": -0.26387184858322144, "step": 6133 }, { "epoch": 3.7419551624218395, "grad_norm": 2.517943859100342, "learning_rate": 2.0193508879363135e-06, "log_odds_chosen": 1.99561607837677, "log_odds_ratio": -0.437313050031662, "logits/chosen": -0.7647882699966431, "logits/rejected": -0.9107614159584045, "logps/chosen": -0.5846978425979614, "logps/rejected": -2.0596940517425537, "loss": 0.8278, "nll_loss": 0.9215604662895203, "rewards/accuracies": 0.75, "rewards/chosen": -0.05846978724002838, "rewards/margins": 0.14749963581562042, "rewards/rejected": -0.2059694230556488, "step": 6134 }, { "epoch": 3.7425651974988563, "grad_norm": 3.0521886348724365, "learning_rate": 2.01837109614207e-06, "log_odds_chosen": 2.2431914806365967, "log_odds_ratio": -0.35902905464172363, "logits/chosen": -0.7989972829818726, "logits/rejected": -0.806748628616333, "logps/chosen": -0.7426036596298218, "logps/rejected": -2.545006275177002, "loss": 1.262, "nll_loss": 0.9654756784439087, "rewards/accuracies": 0.875, "rewards/chosen": -0.07426036894321442, "rewards/margins": 0.18024027347564697, "rewards/rejected": -0.2545006275177002, "step": 6135 }, { "epoch": 3.743175232575873, "grad_norm": 7.956542491912842, "learning_rate": 2.0173913043478258e-06, "log_odds_chosen": 2.876274585723877, "log_odds_ratio": -0.3083850145339966, "logits/chosen": -0.9497568607330322, "logits/rejected": -0.9569591283798218, "logps/chosen": -0.8935095071792603, "logps/rejected": -3.3352417945861816, "loss": 1.0265, "nll_loss": 1.0046348571777344, "rewards/accuracies": 0.75, "rewards/chosen": -0.08935095369815826, "rewards/margins": 0.24417325854301453, "rewards/rejected": -0.3335241973400116, "step": 6136 }, { "epoch": 3.74378526765289, "grad_norm": 7.7530035972595215, "learning_rate": 2.0164115125535823e-06, "log_odds_chosen": 2.467222213745117, "log_odds_ratio": -0.4052545428276062, "logits/chosen": -0.8288129568099976, "logits/rejected": -0.9469431638717651, "logps/chosen": -0.8474138975143433, "logps/rejected": -2.691689968109131, "loss": 1.0461, "nll_loss": 1.0477781295776367, "rewards/accuracies": 0.5, "rewards/chosen": -0.08474138379096985, "rewards/margins": 0.18442760407924652, "rewards/rejected": -0.26916900277137756, "step": 6137 }, { "epoch": 3.744395302729907, "grad_norm": 10.569145202636719, "learning_rate": 2.015431720759339e-06, "log_odds_chosen": 1.9329543113708496, "log_odds_ratio": -0.38121339678764343, "logits/chosen": -0.5047662854194641, "logits/rejected": -0.7637048363685608, "logps/chosen": -0.6852251291275024, "logps/rejected": -2.006894826889038, "loss": 0.9375, "nll_loss": 0.7314808368682861, "rewards/accuracies": 0.75, "rewards/chosen": -0.06852251291275024, "rewards/margins": 0.13216698169708252, "rewards/rejected": -0.20068946480751038, "step": 6138 }, { "epoch": 3.7450053378069237, "grad_norm": 17.282686233520508, "learning_rate": 2.0144519289650946e-06, "log_odds_chosen": 2.3470025062561035, "log_odds_ratio": -0.19777941703796387, "logits/chosen": -1.001761794090271, "logits/rejected": -0.8828006982803345, "logps/chosen": -0.7422058582305908, "logps/rejected": -2.444760799407959, "loss": 0.9471, "nll_loss": 0.8922415971755981, "rewards/accuracies": 1.0, "rewards/chosen": -0.07422059029340744, "rewards/margins": 0.17025548219680786, "rewards/rejected": -0.2444760799407959, "step": 6139 }, { "epoch": 3.745615372883941, "grad_norm": 1.324955940246582, "learning_rate": 2.013472137170851e-06, "log_odds_chosen": 2.688032388687134, "log_odds_ratio": -0.28251636028289795, "logits/chosen": -0.5057048797607422, "logits/rejected": -0.7737010717391968, "logps/chosen": -0.458200603723526, "logps/rejected": -2.305826425552368, "loss": 0.9129, "nll_loss": 0.7578216791152954, "rewards/accuracies": 0.875, "rewards/chosen": -0.04582006484270096, "rewards/margins": 0.18476256728172302, "rewards/rejected": -0.23058262467384338, "step": 6140 }, { "epoch": 3.746225407960958, "grad_norm": 1.4887396097183228, "learning_rate": 2.0124923453766072e-06, "log_odds_chosen": 2.6694517135620117, "log_odds_ratio": -0.3713318407535553, "logits/chosen": -0.7531686425209045, "logits/rejected": -0.6152077317237854, "logps/chosen": -0.6959207057952881, "logps/rejected": -2.8928356170654297, "loss": 0.9119, "nll_loss": 0.7880979776382446, "rewards/accuracies": 0.875, "rewards/chosen": -0.06959207355976105, "rewards/margins": 0.21969148516654968, "rewards/rejected": -0.28928354382514954, "step": 6141 }, { "epoch": 3.7468354430379747, "grad_norm": 5.5232648849487305, "learning_rate": 2.0115125535823638e-06, "log_odds_chosen": 1.972023367881775, "log_odds_ratio": -0.38126522302627563, "logits/chosen": -0.9500391483306885, "logits/rejected": -0.860567033290863, "logps/chosen": -0.7273374199867249, "logps/rejected": -2.361898183822632, "loss": 1.1136, "nll_loss": 1.0702321529388428, "rewards/accuracies": 1.0, "rewards/chosen": -0.07273373752832413, "rewards/margins": 0.16345606744289398, "rewards/rejected": -0.2361897975206375, "step": 6142 }, { "epoch": 3.7474454781149915, "grad_norm": 1.4345548152923584, "learning_rate": 2.01053276178812e-06, "log_odds_chosen": 1.1065012216567993, "log_odds_ratio": -0.8036344051361084, "logits/chosen": -0.7169378995895386, "logits/rejected": -0.6557879447937012, "logps/chosen": -1.1442086696624756, "logps/rejected": -1.8158183097839355, "loss": 1.0938, "nll_loss": 1.032146692276001, "rewards/accuracies": 0.625, "rewards/chosen": -0.11442086845636368, "rewards/margins": 0.06716097146272659, "rewards/rejected": -0.18158183991909027, "step": 6143 }, { "epoch": 3.748055513192009, "grad_norm": 3.959334135055542, "learning_rate": 2.009552969993876e-06, "log_odds_chosen": 1.3867862224578857, "log_odds_ratio": -0.47970837354660034, "logits/chosen": -0.8888755440711975, "logits/rejected": -0.8681756258010864, "logps/chosen": -0.659328818321228, "logps/rejected": -1.7893095016479492, "loss": 0.9796, "nll_loss": 1.0193586349487305, "rewards/accuracies": 0.625, "rewards/chosen": -0.06593288481235504, "rewards/margins": 0.11299806833267212, "rewards/rejected": -0.17893095314502716, "step": 6144 }, { "epoch": 3.7486655482690256, "grad_norm": 1.53802490234375, "learning_rate": 2.0085731781996326e-06, "log_odds_chosen": 1.5920031070709229, "log_odds_ratio": -0.2767289876937866, "logits/chosen": -1.1149072647094727, "logits/rejected": -1.0752203464508057, "logps/chosen": -0.6991987824440002, "logps/rejected": -1.7240300178527832, "loss": 0.9522, "nll_loss": 0.9145852327346802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0699198842048645, "rewards/margins": 0.1024831160902977, "rewards/rejected": -0.1724029928445816, "step": 6145 }, { "epoch": 3.7492755833460425, "grad_norm": 1.3517272472381592, "learning_rate": 2.0075933864053887e-06, "log_odds_chosen": 1.3697724342346191, "log_odds_ratio": -0.45653101801872253, "logits/chosen": -0.6703707575798035, "logits/rejected": -0.8237619400024414, "logps/chosen": -0.8163447380065918, "logps/rejected": -1.9580937623977661, "loss": 0.9645, "nll_loss": 1.0553648471832275, "rewards/accuracies": 0.75, "rewards/chosen": -0.08163447678089142, "rewards/margins": 0.11417491734027863, "rewards/rejected": -0.19580939412117004, "step": 6146 }, { "epoch": 3.7498856184230593, "grad_norm": 1.6066787242889404, "learning_rate": 2.006613594611145e-06, "log_odds_chosen": 2.8094189167022705, "log_odds_ratio": -0.31780731678009033, "logits/chosen": -0.8585458993911743, "logits/rejected": -0.9199150204658508, "logps/chosen": -0.6904555559158325, "logps/rejected": -2.882112979888916, "loss": 0.9942, "nll_loss": 0.9754111766815186, "rewards/accuracies": 0.875, "rewards/chosen": -0.06904555857181549, "rewards/margins": 0.21916574239730835, "rewards/rejected": -0.28821128606796265, "step": 6147 }, { "epoch": 3.750495653500076, "grad_norm": 4.034144878387451, "learning_rate": 2.0056338028169014e-06, "log_odds_chosen": 3.698237419128418, "log_odds_ratio": -0.1979881227016449, "logits/chosen": -0.7354448437690735, "logits/rejected": -0.7938475012779236, "logps/chosen": -0.7633053064346313, "logps/rejected": -3.9093480110168457, "loss": 1.0203, "nll_loss": 0.9689586758613586, "rewards/accuracies": 0.875, "rewards/chosen": -0.0763305276632309, "rewards/margins": 0.3146042823791504, "rewards/rejected": -0.3909348249435425, "step": 6148 }, { "epoch": 3.751105688577093, "grad_norm": 1.5815057754516602, "learning_rate": 2.004654011022658e-06, "log_odds_chosen": 0.35106950998306274, "log_odds_ratio": -0.6838231682777405, "logits/chosen": -0.8896717429161072, "logits/rejected": -0.9630938768386841, "logps/chosen": -1.0146598815917969, "logps/rejected": -1.2784488201141357, "loss": 1.1812, "nll_loss": 1.3127104043960571, "rewards/accuracies": 0.375, "rewards/chosen": -0.10146598517894745, "rewards/margins": 0.026378896087408066, "rewards/rejected": -0.12784487009048462, "step": 6149 }, { "epoch": 3.75171572365411, "grad_norm": 1.3806475400924683, "learning_rate": 2.0036742192284136e-06, "log_odds_chosen": 0.6971237659454346, "log_odds_ratio": -0.49335336685180664, "logits/chosen": -0.9866687655448914, "logits/rejected": -1.058305263519287, "logps/chosen": -0.7852649688720703, "logps/rejected": -1.1438605785369873, "loss": 1.3481, "nll_loss": 1.2647892236709595, "rewards/accuracies": 0.75, "rewards/chosen": -0.07852649688720703, "rewards/margins": 0.03585955873131752, "rewards/rejected": -0.11438605934381485, "step": 6150 }, { "epoch": 3.752325758731127, "grad_norm": 1.258094072341919, "learning_rate": 2.00269442743417e-06, "log_odds_chosen": 3.0959672927856445, "log_odds_ratio": -0.4090389013290405, "logits/chosen": -0.6974721550941467, "logits/rejected": -0.9090545177459717, "logps/chosen": -0.7143703699111938, "logps/rejected": -3.3737001419067383, "loss": 0.9212, "nll_loss": 0.9076866507530212, "rewards/accuracies": 0.625, "rewards/chosen": -0.0714370384812355, "rewards/margins": 0.26593297719955444, "rewards/rejected": -0.33737003803253174, "step": 6151 }, { "epoch": 3.752935793808144, "grad_norm": 1.6096237897872925, "learning_rate": 2.0017146356399267e-06, "log_odds_chosen": 0.9221336245536804, "log_odds_ratio": -0.6375101804733276, "logits/chosen": -0.8365263938903809, "logits/rejected": -0.8467544317245483, "logps/chosen": -0.8480409383773804, "logps/rejected": -1.4956610202789307, "loss": 1.0558, "nll_loss": 1.1403172016143799, "rewards/accuracies": 0.5, "rewards/chosen": -0.08480409532785416, "rewards/margins": 0.06476198881864548, "rewards/rejected": -0.14956608414649963, "step": 6152 }, { "epoch": 3.753545828885161, "grad_norm": 1.402206540107727, "learning_rate": 2.0007348438456824e-06, "log_odds_chosen": 1.5415455102920532, "log_odds_ratio": -0.4248368740081787, "logits/chosen": -0.7775607705116272, "logits/rejected": -0.972244381904602, "logps/chosen": -0.8153328895568848, "logps/rejected": -1.7946091890335083, "loss": 1.0765, "nll_loss": 1.1064445972442627, "rewards/accuracies": 0.75, "rewards/chosen": -0.0815332904458046, "rewards/margins": 0.09792763739824295, "rewards/rejected": -0.17946091294288635, "step": 6153 }, { "epoch": 3.7541558639621777, "grad_norm": 1.6023837327957153, "learning_rate": 1.999755052051439e-06, "log_odds_chosen": 0.8675950169563293, "log_odds_ratio": -0.5491912364959717, "logits/chosen": -0.7349345088005066, "logits/rejected": -0.8551827669143677, "logps/chosen": -0.8461999893188477, "logps/rejected": -1.1455150842666626, "loss": 1.2757, "nll_loss": 1.2382506132125854, "rewards/accuracies": 0.75, "rewards/chosen": -0.08461999893188477, "rewards/margins": 0.02993151731789112, "rewards/rejected": -0.11455151438713074, "step": 6154 }, { "epoch": 3.754765899039195, "grad_norm": 1.9056586027145386, "learning_rate": 1.998775260257195e-06, "log_odds_chosen": 0.4437549412250519, "log_odds_ratio": -0.5100167393684387, "logits/chosen": -0.7952415943145752, "logits/rejected": -0.9541926383972168, "logps/chosen": -1.003515362739563, "logps/rejected": -1.3153423070907593, "loss": 1.0043, "nll_loss": 0.9415909051895142, "rewards/accuracies": 0.875, "rewards/chosen": -0.10035154223442078, "rewards/margins": 0.03118269518017769, "rewards/rejected": -0.13153423368930817, "step": 6155 }, { "epoch": 3.755375934116212, "grad_norm": 3.0169670581817627, "learning_rate": 1.9977954684629516e-06, "log_odds_chosen": 3.260012149810791, "log_odds_ratio": -0.2606837749481201, "logits/chosen": -0.9960263967514038, "logits/rejected": -1.1029504537582397, "logps/chosen": -0.7735321521759033, "logps/rejected": -3.5474491119384766, "loss": 0.9494, "nll_loss": 0.9345346689224243, "rewards/accuracies": 0.75, "rewards/chosen": -0.07735320925712585, "rewards/margins": 0.2773916721343994, "rewards/rejected": -0.35474491119384766, "step": 6156 }, { "epoch": 3.7559859691932287, "grad_norm": 4.235934734344482, "learning_rate": 1.9968156766687077e-06, "log_odds_chosen": 1.0489501953125, "log_odds_ratio": -0.3522132337093353, "logits/chosen": -0.7413333654403687, "logits/rejected": -0.7104998826980591, "logps/chosen": -0.7513856887817383, "logps/rejected": -1.4406547546386719, "loss": 1.0486, "nll_loss": 0.8800547122955322, "rewards/accuracies": 1.0, "rewards/chosen": -0.07513856887817383, "rewards/margins": 0.06892689317464828, "rewards/rejected": -0.14406545460224152, "step": 6157 }, { "epoch": 3.7565960042702455, "grad_norm": 1.8767621517181396, "learning_rate": 1.9958358848744643e-06, "log_odds_chosen": 2.4545414447784424, "log_odds_ratio": -0.3036932647228241, "logits/chosen": -0.7991849780082703, "logits/rejected": -0.7066186666488647, "logps/chosen": -0.661845862865448, "logps/rejected": -2.572267770767212, "loss": 1.0451, "nll_loss": 0.8484591841697693, "rewards/accuracies": 0.75, "rewards/chosen": -0.06618459522724152, "rewards/margins": 0.19104218482971191, "rewards/rejected": -0.2572267949581146, "step": 6158 }, { "epoch": 3.7572060393472624, "grad_norm": 4.381149768829346, "learning_rate": 1.9948560930802204e-06, "log_odds_chosen": 1.2027548551559448, "log_odds_ratio": -0.4962405562400818, "logits/chosen": -0.9096660017967224, "logits/rejected": -0.9279459118843079, "logps/chosen": -0.7129300832748413, "logps/rejected": -1.64048433303833, "loss": 1.1, "nll_loss": 1.0217851400375366, "rewards/accuracies": 0.625, "rewards/chosen": -0.07129301875829697, "rewards/margins": 0.09275543689727783, "rewards/rejected": -0.1640484482049942, "step": 6159 }, { "epoch": 3.7578160744242792, "grad_norm": 5.377931594848633, "learning_rate": 1.9938763012859765e-06, "log_odds_chosen": 0.23170001804828644, "log_odds_ratio": -0.7621763944625854, "logits/chosen": -0.7805891036987305, "logits/rejected": -0.6642318367958069, "logps/chosen": -1.04668128490448, "logps/rejected": -1.220729947090149, "loss": 1.1202, "nll_loss": 1.1668744087219238, "rewards/accuracies": 0.375, "rewards/chosen": -0.10466812551021576, "rewards/margins": 0.017404865473508835, "rewards/rejected": -0.12207299470901489, "step": 6160 }, { "epoch": 3.758426109501296, "grad_norm": 1.2407352924346924, "learning_rate": 1.992896509491733e-06, "log_odds_chosen": 3.014780044555664, "log_odds_ratio": -0.3031778335571289, "logits/chosen": -0.9651498794555664, "logits/rejected": -1.11343252658844, "logps/chosen": -0.7981200218200684, "logps/rejected": -3.3138701915740967, "loss": 1.0697, "nll_loss": 1.26406991481781, "rewards/accuracies": 1.0, "rewards/chosen": -0.07981199771165848, "rewards/margins": 0.2515750229358673, "rewards/rejected": -0.3313870131969452, "step": 6161 }, { "epoch": 3.7590361445783134, "grad_norm": 2.68119740486145, "learning_rate": 1.991916717697489e-06, "log_odds_chosen": 1.0697336196899414, "log_odds_ratio": -0.42919301986694336, "logits/chosen": -0.9099717140197754, "logits/rejected": -0.86085444688797, "logps/chosen": -0.8065608143806458, "logps/rejected": -1.6076207160949707, "loss": 1.1017, "nll_loss": 1.0655747652053833, "rewards/accuracies": 0.75, "rewards/chosen": -0.08065608143806458, "rewards/margins": 0.08010599762201309, "rewards/rejected": -0.16076207160949707, "step": 6162 }, { "epoch": 3.75964617965533, "grad_norm": 1.335667610168457, "learning_rate": 1.9909369259032457e-06, "log_odds_chosen": 1.6782184839248657, "log_odds_ratio": -0.5896554589271545, "logits/chosen": -0.553410530090332, "logits/rejected": -0.7390842437744141, "logps/chosen": -0.8836773633956909, "logps/rejected": -2.193904399871826, "loss": 0.9828, "nll_loss": 0.8477564454078674, "rewards/accuracies": 0.5, "rewards/chosen": -0.08836774528026581, "rewards/margins": 0.13102269172668457, "rewards/rejected": -0.21939043700695038, "step": 6163 }, { "epoch": 3.760256214732347, "grad_norm": 1.363320231437683, "learning_rate": 1.989957134109002e-06, "log_odds_chosen": 1.9474399089813232, "log_odds_ratio": -0.23874235153198242, "logits/chosen": -0.7160364985466003, "logits/rejected": -0.78656005859375, "logps/chosen": -0.5329441428184509, "logps/rejected": -1.8185107707977295, "loss": 1.0794, "nll_loss": 0.7236636281013489, "rewards/accuracies": 0.875, "rewards/chosen": -0.05329441279172897, "rewards/margins": 0.12855666875839233, "rewards/rejected": -0.1818510740995407, "step": 6164 }, { "epoch": 3.760866249809364, "grad_norm": 1.647545576095581, "learning_rate": 1.988977342314758e-06, "log_odds_chosen": 0.5726410150527954, "log_odds_ratio": -0.4756571054458618, "logits/chosen": -0.8528732657432556, "logits/rejected": -0.7863877415657043, "logps/chosen": -0.95866459608078, "logps/rejected": -1.382103681564331, "loss": 1.0893, "nll_loss": 1.0167509317398071, "rewards/accuracies": 0.875, "rewards/chosen": -0.09586645662784576, "rewards/margins": 0.04234392195940018, "rewards/rejected": -0.13821038603782654, "step": 6165 }, { "epoch": 3.761476284886381, "grad_norm": 2.2414016723632812, "learning_rate": 1.987997550520514e-06, "log_odds_chosen": 2.378274440765381, "log_odds_ratio": -0.21135948598384857, "logits/chosen": -0.6808892488479614, "logits/rejected": -0.7663140296936035, "logps/chosen": -0.823277473449707, "logps/rejected": -2.573524236679077, "loss": 1.003, "nll_loss": 1.0517711639404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.08232775330543518, "rewards/margins": 0.17502467334270477, "rewards/rejected": -0.25735241174697876, "step": 6166 }, { "epoch": 3.762086319963398, "grad_norm": 2.058649778366089, "learning_rate": 1.9870177587262706e-06, "log_odds_chosen": 2.878044843673706, "log_odds_ratio": -0.38239333033561707, "logits/chosen": -0.8279240131378174, "logits/rejected": -1.0553098917007446, "logps/chosen": -0.7982335686683655, "logps/rejected": -3.268789529800415, "loss": 1.0092, "nll_loss": 0.9230616092681885, "rewards/accuracies": 0.75, "rewards/chosen": -0.07982335984706879, "rewards/margins": 0.24705561995506287, "rewards/rejected": -0.32687896490097046, "step": 6167 }, { "epoch": 3.762696355040415, "grad_norm": 1.5275075435638428, "learning_rate": 1.9860379669320268e-06, "log_odds_chosen": 1.354848027229309, "log_odds_ratio": -0.33874279260635376, "logits/chosen": -0.6301331520080566, "logits/rejected": -0.5303634405136108, "logps/chosen": -0.6249929666519165, "logps/rejected": -1.476104736328125, "loss": 1.0424, "nll_loss": 0.9243819713592529, "rewards/accuracies": 0.875, "rewards/chosen": -0.06249929964542389, "rewards/margins": 0.08511117845773697, "rewards/rejected": -0.14761048555374146, "step": 6168 }, { "epoch": 3.7633063901174317, "grad_norm": 2.0673348903656006, "learning_rate": 1.985058175137783e-06, "log_odds_chosen": 2.9552230834960938, "log_odds_ratio": -0.23405499756336212, "logits/chosen": -0.9687145948410034, "logits/rejected": -1.2051188945770264, "logps/chosen": -0.9682239294052124, "logps/rejected": -3.4572665691375732, "loss": 1.0924, "nll_loss": 1.0323097705841064, "rewards/accuracies": 1.0, "rewards/chosen": -0.09682239592075348, "rewards/margins": 0.2489042580127716, "rewards/rejected": -0.3457266688346863, "step": 6169 }, { "epoch": 3.7639164251944486, "grad_norm": 1.9247037172317505, "learning_rate": 1.9840783833435394e-06, "log_odds_chosen": 1.899395227432251, "log_odds_ratio": -0.38498181104660034, "logits/chosen": -0.9687997698783875, "logits/rejected": -0.9720237255096436, "logps/chosen": -0.7196650505065918, "logps/rejected": -2.303030490875244, "loss": 0.9886, "nll_loss": 0.837264358997345, "rewards/accuracies": 0.625, "rewards/chosen": -0.0719665065407753, "rewards/margins": 0.15833653509616852, "rewards/rejected": -0.23030304908752441, "step": 6170 }, { "epoch": 3.7645264602714654, "grad_norm": 1.0727510452270508, "learning_rate": 1.9830985915492956e-06, "log_odds_chosen": 2.2359111309051514, "log_odds_ratio": -0.34144043922424316, "logits/chosen": -0.7552884817123413, "logits/rejected": -0.9216080904006958, "logps/chosen": -0.7895888686180115, "logps/rejected": -2.5583927631378174, "loss": 0.9794, "nll_loss": 1.1022554636001587, "rewards/accuracies": 0.875, "rewards/chosen": -0.07895888388156891, "rewards/margins": 0.1768803894519806, "rewards/rejected": -0.2558392882347107, "step": 6171 }, { "epoch": 3.7651364953484823, "grad_norm": 2.1673364639282227, "learning_rate": 1.982118799755052e-06, "log_odds_chosen": 1.5063071250915527, "log_odds_ratio": -0.5235652327537537, "logits/chosen": -0.7713017463684082, "logits/rejected": -0.8170726895332336, "logps/chosen": -0.8165988922119141, "logps/rejected": -2.0051379203796387, "loss": 0.9605, "nll_loss": 0.9450674653053284, "rewards/accuracies": 0.625, "rewards/chosen": -0.08165989816188812, "rewards/margins": 0.11885389685630798, "rewards/rejected": -0.2005137801170349, "step": 6172 }, { "epoch": 3.7657465304254996, "grad_norm": 1.9871028661727905, "learning_rate": 1.9811390079608082e-06, "log_odds_chosen": 1.2050399780273438, "log_odds_ratio": -0.373822420835495, "logits/chosen": -0.9158028960227966, "logits/rejected": -0.9680812358856201, "logps/chosen": -0.8430483341217041, "logps/rejected": -1.6098816394805908, "loss": 1.0947, "nll_loss": 0.8232293128967285, "rewards/accuracies": 0.875, "rewards/chosen": -0.08430483192205429, "rewards/margins": 0.07668332755565643, "rewards/rejected": -0.16098816692829132, "step": 6173 }, { "epoch": 3.7663565655025164, "grad_norm": 2.417512893676758, "learning_rate": 1.9801592161665643e-06, "log_odds_chosen": 2.640049934387207, "log_odds_ratio": -0.3335973024368286, "logits/chosen": -0.7064825892448425, "logits/rejected": -0.8118178248405457, "logps/chosen": -0.7467678189277649, "logps/rejected": -2.8632662296295166, "loss": 1.0124, "nll_loss": 1.009587049484253, "rewards/accuracies": 0.875, "rewards/chosen": -0.07467678934335709, "rewards/margins": 0.21164986491203308, "rewards/rejected": -0.28632664680480957, "step": 6174 }, { "epoch": 3.7669666005795333, "grad_norm": 8.884034156799316, "learning_rate": 1.979179424372321e-06, "log_odds_chosen": 2.112968921661377, "log_odds_ratio": -0.4625343680381775, "logits/chosen": -0.9904235601425171, "logits/rejected": -1.0231943130493164, "logps/chosen": -0.9840855598449707, "logps/rejected": -2.776235580444336, "loss": 1.0931, "nll_loss": 1.2675119638442993, "rewards/accuracies": 0.625, "rewards/chosen": -0.09840855002403259, "rewards/margins": 0.17921499907970428, "rewards/rejected": -0.27762356400489807, "step": 6175 }, { "epoch": 3.76757663565655, "grad_norm": 2.0646042823791504, "learning_rate": 1.978199632578077e-06, "log_odds_chosen": 3.4826643466949463, "log_odds_ratio": -0.24716812372207642, "logits/chosen": -1.001035213470459, "logits/rejected": -0.9409695863723755, "logps/chosen": -0.9376155138015747, "logps/rejected": -3.9945414066314697, "loss": 0.9395, "nll_loss": 1.1166527271270752, "rewards/accuracies": 0.75, "rewards/chosen": -0.09376154839992523, "rewards/margins": 0.305692583322525, "rewards/rejected": -0.39945414662361145, "step": 6176 }, { "epoch": 3.7681866707335674, "grad_norm": 6.106185436248779, "learning_rate": 1.9772198407838336e-06, "log_odds_chosen": 1.7613826990127563, "log_odds_ratio": -0.4086722135543823, "logits/chosen": -1.0807530879974365, "logits/rejected": -1.0348339080810547, "logps/chosen": -0.8721832633018494, "logps/rejected": -2.2631869316101074, "loss": 1.0887, "nll_loss": 1.1242964267730713, "rewards/accuracies": 0.75, "rewards/chosen": -0.08721832931041718, "rewards/margins": 0.13910037279129028, "rewards/rejected": -0.22631870210170746, "step": 6177 }, { "epoch": 3.7687967058105842, "grad_norm": 1.9924412965774536, "learning_rate": 1.9762400489895897e-06, "log_odds_chosen": 0.9541258215904236, "log_odds_ratio": -0.568341851234436, "logits/chosen": -0.8063241243362427, "logits/rejected": -0.8068900108337402, "logps/chosen": -0.8803151249885559, "logps/rejected": -1.696323275566101, "loss": 1.0654, "nll_loss": 1.1661109924316406, "rewards/accuracies": 0.5, "rewards/chosen": -0.08803151547908783, "rewards/margins": 0.08160080015659332, "rewards/rejected": -0.16963231563568115, "step": 6178 }, { "epoch": 3.769406740887601, "grad_norm": 1.5602387189865112, "learning_rate": 1.975260257195346e-06, "log_odds_chosen": 0.8624063730239868, "log_odds_ratio": -0.6747956275939941, "logits/chosen": -1.106512427330017, "logits/rejected": -1.0589749813079834, "logps/chosen": -0.9908306002616882, "logps/rejected": -1.653160572052002, "loss": 0.9787, "nll_loss": 1.1208975315093994, "rewards/accuracies": 0.625, "rewards/chosen": -0.0990830585360527, "rewards/margins": 0.06623300909996033, "rewards/rejected": -0.16531606018543243, "step": 6179 }, { "epoch": 3.770016775964618, "grad_norm": 7.052381992340088, "learning_rate": 1.9742804654011024e-06, "log_odds_chosen": 0.7385051846504211, "log_odds_ratio": -0.6037466526031494, "logits/chosen": -0.8755732774734497, "logits/rejected": -0.8870632648468018, "logps/chosen": -0.9221198558807373, "logps/rejected": -1.4590855836868286, "loss": 1.1535, "nll_loss": 1.0919572114944458, "rewards/accuracies": 0.5, "rewards/chosen": -0.0922120064496994, "rewards/margins": 0.05369655787944794, "rewards/rejected": -0.14590854942798615, "step": 6180 }, { "epoch": 3.7706268110416348, "grad_norm": 11.4744291305542, "learning_rate": 1.9733006736068585e-06, "log_odds_chosen": 1.2796534299850464, "log_odds_ratio": -0.44660040736198425, "logits/chosen": -0.7966121435165405, "logits/rejected": -1.0724127292633057, "logps/chosen": -0.9521574974060059, "logps/rejected": -1.9209952354431152, "loss": 1.1081, "nll_loss": 1.0436328649520874, "rewards/accuracies": 0.875, "rewards/chosen": -0.09521574527025223, "rewards/margins": 0.09688377380371094, "rewards/rejected": -0.19209952652454376, "step": 6181 }, { "epoch": 3.7712368461186516, "grad_norm": 2.1479251384735107, "learning_rate": 1.9723208818126146e-06, "log_odds_chosen": 2.3655567169189453, "log_odds_ratio": -0.16963118314743042, "logits/chosen": -0.7992831468582153, "logits/rejected": -0.9017179608345032, "logps/chosen": -0.7220103740692139, "logps/rejected": -2.508591413497925, "loss": 0.856, "nll_loss": 0.8718482851982117, "rewards/accuracies": 1.0, "rewards/chosen": -0.07220104336738586, "rewards/margins": 0.1786581426858902, "rewards/rejected": -0.25085917115211487, "step": 6182 }, { "epoch": 3.771846881195669, "grad_norm": 1.1744565963745117, "learning_rate": 1.9713410900183707e-06, "log_odds_chosen": 1.1112165451049805, "log_odds_ratio": -0.5484391450881958, "logits/chosen": -0.7834765315055847, "logits/rejected": -0.9093518853187561, "logps/chosen": -0.8478243947029114, "logps/rejected": -1.5900189876556396, "loss": 0.9975, "nll_loss": 0.9157733917236328, "rewards/accuracies": 0.625, "rewards/chosen": -0.0847824364900589, "rewards/margins": 0.07421945780515671, "rewards/rejected": -0.159001886844635, "step": 6183 }, { "epoch": 3.7724569162726858, "grad_norm": 3.2105600833892822, "learning_rate": 1.9703612982241273e-06, "log_odds_chosen": 1.0977195501327515, "log_odds_ratio": -0.5009360313415527, "logits/chosen": -0.9946566820144653, "logits/rejected": -1.0215195417404175, "logps/chosen": -0.8025224208831787, "logps/rejected": -1.6595191955566406, "loss": 1.1838, "nll_loss": 1.1886484622955322, "rewards/accuracies": 0.75, "rewards/chosen": -0.08025224506855011, "rewards/margins": 0.08569967746734619, "rewards/rejected": -0.1659519374370575, "step": 6184 }, { "epoch": 3.7730669513497026, "grad_norm": 9.121512413024902, "learning_rate": 1.9693815064298834e-06, "log_odds_chosen": 1.2791328430175781, "log_odds_ratio": -0.5320626497268677, "logits/chosen": -0.9906500577926636, "logits/rejected": -0.9915280938148499, "logps/chosen": -0.9925820827484131, "logps/rejected": -2.021909713745117, "loss": 1.2142, "nll_loss": 1.5157825946807861, "rewards/accuracies": 0.625, "rewards/chosen": -0.09925820678472519, "rewards/margins": 0.10293275117874146, "rewards/rejected": -0.20219096541404724, "step": 6185 }, { "epoch": 3.7736769864267194, "grad_norm": 1.6085286140441895, "learning_rate": 1.96840171463564e-06, "log_odds_chosen": 1.621140718460083, "log_odds_ratio": -0.44288477301597595, "logits/chosen": -0.9290803670883179, "logits/rejected": -1.0787899494171143, "logps/chosen": -0.9647765159606934, "logps/rejected": -2.387861728668213, "loss": 0.9437, "nll_loss": 1.002996802330017, "rewards/accuracies": 0.875, "rewards/chosen": -0.09647765755653381, "rewards/margins": 0.14230850338935852, "rewards/rejected": -0.23878616094589233, "step": 6186 }, { "epoch": 3.7742870215037367, "grad_norm": 1.8364354372024536, "learning_rate": 1.967421922841396e-06, "log_odds_chosen": 2.8715009689331055, "log_odds_ratio": -0.3170084059238434, "logits/chosen": -0.6010787487030029, "logits/rejected": -0.9335293173789978, "logps/chosen": -0.7107405662536621, "logps/rejected": -3.0472772121429443, "loss": 1.0316, "nll_loss": 0.8779555559158325, "rewards/accuracies": 0.875, "rewards/chosen": -0.07107405364513397, "rewards/margins": 0.23365361988544464, "rewards/rejected": -0.3047276735305786, "step": 6187 }, { "epoch": 3.7748970565807536, "grad_norm": 2.4036076068878174, "learning_rate": 1.9664421310471526e-06, "log_odds_chosen": 0.876928448677063, "log_odds_ratio": -0.6094435453414917, "logits/chosen": -1.0871292352676392, "logits/rejected": -1.2022546529769897, "logps/chosen": -1.0519967079162598, "logps/rejected": -1.5605322122573853, "loss": 1.2385, "nll_loss": 1.54362952709198, "rewards/accuracies": 0.375, "rewards/chosen": -0.1051996648311615, "rewards/margins": 0.050853561609983444, "rewards/rejected": -0.15605321526527405, "step": 6188 }, { "epoch": 3.7755070916577704, "grad_norm": 1.6722267866134644, "learning_rate": 1.9654623392529087e-06, "log_odds_chosen": 1.5012154579162598, "log_odds_ratio": -0.3971704840660095, "logits/chosen": -0.9112288951873779, "logits/rejected": -0.9855484366416931, "logps/chosen": -0.751329779624939, "logps/rejected": -1.897996187210083, "loss": 1.0346, "nll_loss": 0.9538748264312744, "rewards/accuracies": 0.75, "rewards/chosen": -0.07513298094272614, "rewards/margins": 0.114666648209095, "rewards/rejected": -0.18979963660240173, "step": 6189 }, { "epoch": 3.7761171267347873, "grad_norm": 6.446746826171875, "learning_rate": 1.964482547458665e-06, "log_odds_chosen": 0.7221989631652832, "log_odds_ratio": -0.5092337727546692, "logits/chosen": -0.7720966935157776, "logits/rejected": -0.8579636216163635, "logps/chosen": -0.6816248893737793, "logps/rejected": -1.2108980417251587, "loss": 1.0142, "nll_loss": 0.8635974526405334, "rewards/accuracies": 0.75, "rewards/chosen": -0.06816248595714569, "rewards/margins": 0.052927322685718536, "rewards/rejected": -0.12108980864286423, "step": 6190 }, { "epoch": 3.776727161811804, "grad_norm": 1.735258936882019, "learning_rate": 1.9635027556644214e-06, "log_odds_chosen": 0.40039628744125366, "log_odds_ratio": -0.5952622294425964, "logits/chosen": -0.8783845901489258, "logits/rejected": -0.9699612855911255, "logps/chosen": -0.731726884841919, "logps/rejected": -0.9752959609031677, "loss": 0.9883, "nll_loss": 0.9391224384307861, "rewards/accuracies": 0.625, "rewards/chosen": -0.0731726884841919, "rewards/margins": 0.02435690350830555, "rewards/rejected": -0.0975295901298523, "step": 6191 }, { "epoch": 3.777337196888821, "grad_norm": 1.57829749584198, "learning_rate": 1.9625229638701775e-06, "log_odds_chosen": 2.532796859741211, "log_odds_ratio": -0.29564598202705383, "logits/chosen": -0.6401673555374146, "logits/rejected": -0.6856735348701477, "logps/chosen": -0.589418351650238, "logps/rejected": -2.532378673553467, "loss": 0.9625, "nll_loss": 0.7901370525360107, "rewards/accuracies": 0.875, "rewards/chosen": -0.05894184112548828, "rewards/margins": 0.19429603219032288, "rewards/rejected": -0.25323787331581116, "step": 6192 }, { "epoch": 3.777947231965838, "grad_norm": 1.4041486978530884, "learning_rate": 1.9615431720759336e-06, "log_odds_chosen": 2.2335903644561768, "log_odds_ratio": -0.3301123082637787, "logits/chosen": -0.7558786273002625, "logits/rejected": -1.017646312713623, "logps/chosen": -0.6835898160934448, "logps/rejected": -2.441467761993408, "loss": 0.8846, "nll_loss": 0.8692091107368469, "rewards/accuracies": 0.875, "rewards/chosen": -0.06835898011922836, "rewards/margins": 0.1757877767086029, "rewards/rejected": -0.24414676427841187, "step": 6193 }, { "epoch": 3.778557267042855, "grad_norm": 2.027674674987793, "learning_rate": 1.96056338028169e-06, "log_odds_chosen": 1.438376545906067, "log_odds_ratio": -0.45492300391197205, "logits/chosen": -0.6498324275016785, "logits/rejected": -0.8182709217071533, "logps/chosen": -0.7830411791801453, "logps/rejected": -1.796046257019043, "loss": 0.9459, "nll_loss": 0.9806911945343018, "rewards/accuracies": 0.625, "rewards/chosen": -0.07830411195755005, "rewards/margins": 0.10130050033330917, "rewards/rejected": -0.17960461974143982, "step": 6194 }, { "epoch": 3.779167302119872, "grad_norm": 6.138519287109375, "learning_rate": 1.9595835884874463e-06, "log_odds_chosen": 1.986332654953003, "log_odds_ratio": -0.45926398038864136, "logits/chosen": -0.7720800638198853, "logits/rejected": -0.9598085284233093, "logps/chosen": -0.8095438480377197, "logps/rejected": -2.404212474822998, "loss": 1.0152, "nll_loss": 0.9689358472824097, "rewards/accuracies": 0.875, "rewards/chosen": -0.08095438778400421, "rewards/margins": 0.15946686267852783, "rewards/rejected": -0.24042125046253204, "step": 6195 }, { "epoch": 3.779777337196889, "grad_norm": 1.3795270919799805, "learning_rate": 1.9586037966932024e-06, "log_odds_chosen": 2.1205787658691406, "log_odds_ratio": -0.4078483283519745, "logits/chosen": -0.6683592796325684, "logits/rejected": -0.7561683058738708, "logps/chosen": -0.5811338424682617, "logps/rejected": -2.220444917678833, "loss": 1.0701, "nll_loss": 0.8305791020393372, "rewards/accuracies": 0.75, "rewards/chosen": -0.05811338871717453, "rewards/margins": 0.16393110156059265, "rewards/rejected": -0.22204449772834778, "step": 6196 }, { "epoch": 3.7803873722739056, "grad_norm": 4.542880058288574, "learning_rate": 1.957624004898959e-06, "log_odds_chosen": 0.3304542303085327, "log_odds_ratio": -0.5970750451087952, "logits/chosen": -0.7258100509643555, "logits/rejected": -0.709499180316925, "logps/chosen": -0.7257002592086792, "logps/rejected": -0.9405399560928345, "loss": 1.2204, "nll_loss": 1.019713282585144, "rewards/accuracies": 0.625, "rewards/chosen": -0.07257002592086792, "rewards/margins": 0.02148396149277687, "rewards/rejected": -0.09405399113893509, "step": 6197 }, { "epoch": 3.780997407350923, "grad_norm": 2.431689739227295, "learning_rate": 1.956644213104715e-06, "log_odds_chosen": 2.353983163833618, "log_odds_ratio": -0.32715505361557007, "logits/chosen": -0.6370046138763428, "logits/rejected": -0.7662031054496765, "logps/chosen": -0.7282251119613647, "logps/rejected": -2.685460329055786, "loss": 1.0677, "nll_loss": 0.9920825958251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.07282251864671707, "rewards/margins": 0.1957235336303711, "rewards/rejected": -0.26854604482650757, "step": 6198 }, { "epoch": 3.78160744242794, "grad_norm": 2.0712621212005615, "learning_rate": 1.9556644213104712e-06, "log_odds_chosen": 1.77170729637146, "log_odds_ratio": -0.3782247006893158, "logits/chosen": -0.7717461585998535, "logits/rejected": -0.7955641746520996, "logps/chosen": -0.6652157306671143, "logps/rejected": -1.7924786806106567, "loss": 0.9835, "nll_loss": 0.9482169151306152, "rewards/accuracies": 0.75, "rewards/chosen": -0.06652157753705978, "rewards/margins": 0.11272629350423813, "rewards/rejected": -0.17924785614013672, "step": 6199 }, { "epoch": 3.7822174775049566, "grad_norm": 5.236003875732422, "learning_rate": 1.9546846295162278e-06, "log_odds_chosen": 1.1188023090362549, "log_odds_ratio": -0.4436032772064209, "logits/chosen": -0.8152750730514526, "logits/rejected": -0.8904650807380676, "logps/chosen": -1.0733829736709595, "logps/rejected": -1.971152901649475, "loss": 1.0931, "nll_loss": 1.0787429809570312, "rewards/accuracies": 0.875, "rewards/chosen": -0.1073383018374443, "rewards/margins": 0.08977700024843216, "rewards/rejected": -0.19711528718471527, "step": 6200 }, { "epoch": 3.7828275125819735, "grad_norm": 5.110223293304443, "learning_rate": 1.953704837721984e-06, "log_odds_chosen": 2.5481367111206055, "log_odds_ratio": -0.2798999547958374, "logits/chosen": -0.8219967484474182, "logits/rejected": -1.0298715829849243, "logps/chosen": -0.6824461221694946, "logps/rejected": -2.6338882446289062, "loss": 1.0122, "nll_loss": 0.84067702293396, "rewards/accuracies": 0.875, "rewards/chosen": -0.06824460625648499, "rewards/margins": 0.19514420628547668, "rewards/rejected": -0.26338881254196167, "step": 6201 }, { "epoch": 3.7834375476589903, "grad_norm": 2.0920543670654297, "learning_rate": 1.9527250459277404e-06, "log_odds_chosen": 1.607406735420227, "log_odds_ratio": -0.6335783004760742, "logits/chosen": -0.8923880457878113, "logits/rejected": -0.9852533936500549, "logps/chosen": -0.8430394530296326, "logps/rejected": -2.3128371238708496, "loss": 1.0979, "nll_loss": 1.049594521522522, "rewards/accuracies": 0.5, "rewards/chosen": -0.08430393785238266, "rewards/margins": 0.14697977900505066, "rewards/rejected": -0.23128370940685272, "step": 6202 }, { "epoch": 3.784047582736007, "grad_norm": 1.5666626691818237, "learning_rate": 1.9517452541334966e-06, "log_odds_chosen": 1.6963050365447998, "log_odds_ratio": -0.39934787154197693, "logits/chosen": -0.8869197368621826, "logits/rejected": -0.8478939533233643, "logps/chosen": -0.8372761607170105, "logps/rejected": -2.112572193145752, "loss": 1.0532, "nll_loss": 1.0105106830596924, "rewards/accuracies": 0.875, "rewards/chosen": -0.08372762054204941, "rewards/margins": 0.12752960622310638, "rewards/rejected": -0.2112572193145752, "step": 6203 }, { "epoch": 3.784657617813024, "grad_norm": 4.471240043640137, "learning_rate": 1.9507654623392527e-06, "log_odds_chosen": 1.2548882961273193, "log_odds_ratio": -0.521859884262085, "logits/chosen": -0.7108604311943054, "logits/rejected": -0.8196196556091309, "logps/chosen": -0.7430729866027832, "logps/rejected": -1.589707612991333, "loss": 1.091, "nll_loss": 0.9281377792358398, "rewards/accuracies": 0.75, "rewards/chosen": -0.07430729269981384, "rewards/margins": 0.08466346561908722, "rewards/rejected": -0.15897075831890106, "step": 6204 }, { "epoch": 3.7852676528900413, "grad_norm": 6.386728763580322, "learning_rate": 1.9497856705450092e-06, "log_odds_chosen": 0.8414164781570435, "log_odds_ratio": -0.5436081886291504, "logits/chosen": -0.8125876188278198, "logits/rejected": -0.9276332259178162, "logps/chosen": -0.7109073996543884, "logps/rejected": -1.2166036367416382, "loss": 1.0015, "nll_loss": 0.8988597989082336, "rewards/accuracies": 0.75, "rewards/chosen": -0.07109075039625168, "rewards/margins": 0.050569623708724976, "rewards/rejected": -0.12166036665439606, "step": 6205 }, { "epoch": 3.785877687967058, "grad_norm": 2.3022191524505615, "learning_rate": 1.9488058787507653e-06, "log_odds_chosen": 3.0113437175750732, "log_odds_ratio": -0.3516182601451874, "logits/chosen": -0.8449024558067322, "logits/rejected": -0.9408727884292603, "logps/chosen": -0.7198358178138733, "logps/rejected": -3.183481454849243, "loss": 1.0056, "nll_loss": 0.8380662202835083, "rewards/accuracies": 0.75, "rewards/chosen": -0.07198358327150345, "rewards/margins": 0.24636457860469818, "rewards/rejected": -0.3183481693267822, "step": 6206 }, { "epoch": 3.786487723044075, "grad_norm": 2.8518028259277344, "learning_rate": 1.947826086956522e-06, "log_odds_chosen": 1.1904658079147339, "log_odds_ratio": -0.4564998745918274, "logits/chosen": -0.7211925387382507, "logits/rejected": -0.9152909517288208, "logps/chosen": -0.7181345224380493, "logps/rejected": -1.5651891231536865, "loss": 1.1062, "nll_loss": 0.8807281255722046, "rewards/accuracies": 0.875, "rewards/chosen": -0.07181345671415329, "rewards/margins": 0.08470547199249268, "rewards/rejected": -0.15651893615722656, "step": 6207 }, { "epoch": 3.787097758121092, "grad_norm": 1.7724764347076416, "learning_rate": 1.946846295162278e-06, "log_odds_chosen": 2.1880595684051514, "log_odds_ratio": -0.33069831132888794, "logits/chosen": -0.691058874130249, "logits/rejected": -0.9320921301841736, "logps/chosen": -0.5677658319473267, "logps/rejected": -2.2092630863189697, "loss": 1.0865, "nll_loss": 0.8482259511947632, "rewards/accuracies": 0.875, "rewards/chosen": -0.05677657946944237, "rewards/margins": 0.1641497164964676, "rewards/rejected": -0.22092629969120026, "step": 6208 }, { "epoch": 3.787707793198109, "grad_norm": 1.5557132959365845, "learning_rate": 1.945866503368034e-06, "log_odds_chosen": 0.44709667563438416, "log_odds_ratio": -0.52984219789505, "logits/chosen": -0.8775269985198975, "logits/rejected": -0.9928461313247681, "logps/chosen": -0.8216627240180969, "logps/rejected": -1.1172444820404053, "loss": 1.0365, "nll_loss": 0.9236689805984497, "rewards/accuracies": 0.75, "rewards/chosen": -0.08216627687215805, "rewards/margins": 0.029558174312114716, "rewards/rejected": -0.11172445863485336, "step": 6209 }, { "epoch": 3.788317828275126, "grad_norm": 1.4132407903671265, "learning_rate": 1.9448867115737907e-06, "log_odds_chosen": 1.1541385650634766, "log_odds_ratio": -0.41951417922973633, "logits/chosen": -0.806765615940094, "logits/rejected": -0.8941073417663574, "logps/chosen": -0.8696998357772827, "logps/rejected": -1.693127155303955, "loss": 1.1738, "nll_loss": 1.2905285358428955, "rewards/accuracies": 0.625, "rewards/chosen": -0.08696997910737991, "rewards/margins": 0.0823427364230156, "rewards/rejected": -0.1693127155303955, "step": 6210 }, { "epoch": 3.788927863352143, "grad_norm": 3.3010120391845703, "learning_rate": 1.943906919779547e-06, "log_odds_chosen": 0.2994523048400879, "log_odds_ratio": -0.6163462996482849, "logits/chosen": -0.9697873592376709, "logits/rejected": -0.8772296905517578, "logps/chosen": -1.0769503116607666, "logps/rejected": -1.3008928298950195, "loss": 1.0964, "nll_loss": 1.2931838035583496, "rewards/accuracies": 0.625, "rewards/chosen": -0.10769504308700562, "rewards/margins": 0.02239423617720604, "rewards/rejected": -0.13008928298950195, "step": 6211 }, { "epoch": 3.7895378984291597, "grad_norm": 4.623466491699219, "learning_rate": 1.942927127985303e-06, "log_odds_chosen": 0.7162532806396484, "log_odds_ratio": -0.6195303797721863, "logits/chosen": -1.010985255241394, "logits/rejected": -1.0196646451950073, "logps/chosen": -0.9253637790679932, "logps/rejected": -1.4285199642181396, "loss": 1.2141, "nll_loss": 1.1236056089401245, "rewards/accuracies": 0.5, "rewards/chosen": -0.09253638237714767, "rewards/margins": 0.05031562224030495, "rewards/rejected": -0.14285200834274292, "step": 6212 }, { "epoch": 3.7901479335061765, "grad_norm": 1.6308579444885254, "learning_rate": 1.941947336191059e-06, "log_odds_chosen": 1.7797924280166626, "log_odds_ratio": -0.5043848752975464, "logits/chosen": -0.8773714303970337, "logits/rejected": -0.8411091566085815, "logps/chosen": -0.842006266117096, "logps/rejected": -2.1938278675079346, "loss": 1.1738, "nll_loss": 1.0237302780151367, "rewards/accuracies": 0.625, "rewards/chosen": -0.08420062810182571, "rewards/margins": 0.13518217206001282, "rewards/rejected": -0.21938279271125793, "step": 6213 }, { "epoch": 3.7907579685831934, "grad_norm": 1.3080989122390747, "learning_rate": 1.9409675443968156e-06, "log_odds_chosen": 2.2070469856262207, "log_odds_ratio": -0.4501005709171295, "logits/chosen": -0.9403907656669617, "logits/rejected": -1.0252344608306885, "logps/chosen": -0.8571916222572327, "logps/rejected": -2.722689151763916, "loss": 1.0386, "nll_loss": 1.0263187885284424, "rewards/accuracies": 0.75, "rewards/chosen": -0.08571915328502655, "rewards/margins": 0.18654973804950714, "rewards/rejected": -0.2722688913345337, "step": 6214 }, { "epoch": 3.79136800366021, "grad_norm": 5.832460403442383, "learning_rate": 1.9399877526025717e-06, "log_odds_chosen": 5.504910469055176, "log_odds_ratio": -0.12069091200828552, "logits/chosen": -0.890422523021698, "logits/rejected": -1.0765931606292725, "logps/chosen": -0.7214223146438599, "logps/rejected": -5.590157508850098, "loss": 1.075, "nll_loss": 0.9424126148223877, "rewards/accuracies": 1.0, "rewards/chosen": -0.07214222848415375, "rewards/margins": 0.4868735074996948, "rewards/rejected": -0.5590157508850098, "step": 6215 }, { "epoch": 3.7919780387372275, "grad_norm": 1.1677932739257812, "learning_rate": 1.9390079608083283e-06, "log_odds_chosen": 1.5976226329803467, "log_odds_ratio": -0.3681555390357971, "logits/chosen": -0.7071828842163086, "logits/rejected": -0.7728898525238037, "logps/chosen": -0.6838958263397217, "logps/rejected": -1.8602375984191895, "loss": 0.9488, "nll_loss": 0.896397590637207, "rewards/accuracies": 0.875, "rewards/chosen": -0.06838957965373993, "rewards/margins": 0.11763418465852737, "rewards/rejected": -0.1860237717628479, "step": 6216 }, { "epoch": 3.7925880738142443, "grad_norm": 1.6416845321655273, "learning_rate": 1.9380281690140844e-06, "log_odds_chosen": 1.412481665611267, "log_odds_ratio": -0.36897000670433044, "logits/chosen": -0.8665955662727356, "logits/rejected": -0.8904062509536743, "logps/chosen": -0.8500677347183228, "logps/rejected": -1.9166061878204346, "loss": 1.2458, "nll_loss": 1.3390915393829346, "rewards/accuracies": 0.75, "rewards/chosen": -0.08500677347183228, "rewards/margins": 0.1066538542509079, "rewards/rejected": -0.19166062772274017, "step": 6217 }, { "epoch": 3.793198108891261, "grad_norm": 5.8385910987854, "learning_rate": 1.9370483772198405e-06, "log_odds_chosen": 1.8059885501861572, "log_odds_ratio": -0.4232260286808014, "logits/chosen": -0.8319332003593445, "logits/rejected": -0.7749460935592651, "logps/chosen": -0.859634518623352, "logps/rejected": -2.2082457542419434, "loss": 1.0045, "nll_loss": 0.8479839563369751, "rewards/accuracies": 0.625, "rewards/chosen": -0.08596345782279968, "rewards/margins": 0.13486114144325256, "rewards/rejected": -0.22082458436489105, "step": 6218 }, { "epoch": 3.793808143968278, "grad_norm": 12.644366264343262, "learning_rate": 1.936068585425597e-06, "log_odds_chosen": 2.8905835151672363, "log_odds_ratio": -0.45953691005706787, "logits/chosen": -0.7578524351119995, "logits/rejected": -1.0243616104125977, "logps/chosen": -0.5870221853256226, "logps/rejected": -3.0529963970184326, "loss": 1.0553, "nll_loss": 0.9617351293563843, "rewards/accuracies": 0.75, "rewards/chosen": -0.05870221555233002, "rewards/margins": 0.24659740924835205, "rewards/rejected": -0.30529963970184326, "step": 6219 }, { "epoch": 3.7944181790452953, "grad_norm": 1.1157746315002441, "learning_rate": 1.935088793631353e-06, "log_odds_chosen": 1.6551086902618408, "log_odds_ratio": -0.5269832611083984, "logits/chosen": -0.8781370520591736, "logits/rejected": -0.9354880452156067, "logps/chosen": -0.8254978060722351, "logps/rejected": -2.1602823734283447, "loss": 0.9681, "nll_loss": 0.9388667345046997, "rewards/accuracies": 0.625, "rewards/chosen": -0.08254978060722351, "rewards/margins": 0.13347846269607544, "rewards/rejected": -0.21602827310562134, "step": 6220 }, { "epoch": 3.795028214122312, "grad_norm": 1.1806432008743286, "learning_rate": 1.9341090018371097e-06, "log_odds_chosen": 3.9199657440185547, "log_odds_ratio": -0.314200222492218, "logits/chosen": -0.7125972509384155, "logits/rejected": -0.8275004625320435, "logps/chosen": -0.5961122512817383, "logps/rejected": -3.9127399921417236, "loss": 1.1155, "nll_loss": 0.8156992197036743, "rewards/accuracies": 0.875, "rewards/chosen": -0.059611234813928604, "rewards/margins": 0.33166277408599854, "rewards/rejected": -0.39127403497695923, "step": 6221 }, { "epoch": 3.795638249199329, "grad_norm": 8.510004997253418, "learning_rate": 1.933129210042866e-06, "log_odds_chosen": 1.6246604919433594, "log_odds_ratio": -0.4047495722770691, "logits/chosen": -0.8559295535087585, "logits/rejected": -1.014093279838562, "logps/chosen": -1.0222254991531372, "logps/rejected": -2.2517566680908203, "loss": 1.16, "nll_loss": 1.2244054079055786, "rewards/accuracies": 0.75, "rewards/chosen": -0.10222254693508148, "rewards/margins": 0.12295311689376831, "rewards/rejected": -0.2251756638288498, "step": 6222 }, { "epoch": 3.796248284276346, "grad_norm": 1.3820736408233643, "learning_rate": 1.932149418248622e-06, "log_odds_chosen": 0.9460052251815796, "log_odds_ratio": -0.6071187257766724, "logits/chosen": -0.9290553331375122, "logits/rejected": -0.8139145970344543, "logps/chosen": -0.9376640915870667, "logps/rejected": -1.6577715873718262, "loss": 1.1047, "nll_loss": 1.0104924440383911, "rewards/accuracies": 0.625, "rewards/chosen": -0.09376641362905502, "rewards/margins": 0.07201074063777924, "rewards/rejected": -0.16577714681625366, "step": 6223 }, { "epoch": 3.7968583193533627, "grad_norm": 1.2058990001678467, "learning_rate": 1.9311696264543785e-06, "log_odds_chosen": 2.1363065242767334, "log_odds_ratio": -0.3911131024360657, "logits/chosen": -0.7185491323471069, "logits/rejected": -0.9220839738845825, "logps/chosen": -0.6203330159187317, "logps/rejected": -2.125276565551758, "loss": 1.0321, "nll_loss": 0.8522197008132935, "rewards/accuracies": 0.75, "rewards/chosen": -0.06203330308198929, "rewards/margins": 0.15049438178539276, "rewards/rejected": -0.21252769231796265, "step": 6224 }, { "epoch": 3.7974683544303796, "grad_norm": 1.465868353843689, "learning_rate": 1.9301898346601346e-06, "log_odds_chosen": 1.345306158065796, "log_odds_ratio": -0.4580079913139343, "logits/chosen": -1.0030386447906494, "logits/rejected": -1.0554689168930054, "logps/chosen": -0.8422337770462036, "logps/rejected": -1.8387610912322998, "loss": 1.0067, "nll_loss": 1.2363430261611938, "rewards/accuracies": 0.625, "rewards/chosen": -0.08422337472438812, "rewards/margins": 0.0996527373790741, "rewards/rejected": -0.18387611210346222, "step": 6225 }, { "epoch": 3.7980783895073964, "grad_norm": 1.5146759748458862, "learning_rate": 1.9292100428658908e-06, "log_odds_chosen": 4.916999340057373, "log_odds_ratio": -0.15544159710407257, "logits/chosen": -0.78680419921875, "logits/rejected": -1.0071525573730469, "logps/chosen": -0.5831572413444519, "logps/rejected": -4.677945613861084, "loss": 0.8969, "nll_loss": 0.8125512599945068, "rewards/accuracies": 0.875, "rewards/chosen": -0.05831573158502579, "rewards/margins": 0.4094788134098053, "rewards/rejected": -0.4677945375442505, "step": 6226 }, { "epoch": 3.7986884245844137, "grad_norm": 7.803908824920654, "learning_rate": 1.9282302510716473e-06, "log_odds_chosen": 3.3235814571380615, "log_odds_ratio": -0.2825213372707367, "logits/chosen": -0.699043869972229, "logits/rejected": -0.8814564943313599, "logps/chosen": -0.7723677158355713, "logps/rejected": -3.367659091949463, "loss": 1.0229, "nll_loss": 0.9811601042747498, "rewards/accuracies": 0.75, "rewards/chosen": -0.07723677158355713, "rewards/margins": 0.25952911376953125, "rewards/rejected": -0.3367658853530884, "step": 6227 }, { "epoch": 3.7992984596614305, "grad_norm": 1.4218456745147705, "learning_rate": 1.9272504592774034e-06, "log_odds_chosen": 2.2833940982818604, "log_odds_ratio": -0.43917787075042725, "logits/chosen": -1.0669230222702026, "logits/rejected": -1.136498212814331, "logps/chosen": -1.086594581604004, "logps/rejected": -3.067182779312134, "loss": 1.1346, "nll_loss": 1.1460152864456177, "rewards/accuracies": 0.625, "rewards/chosen": -0.10865944623947144, "rewards/margins": 0.1980588436126709, "rewards/rejected": -0.30671828985214233, "step": 6228 }, { "epoch": 3.7999084947384474, "grad_norm": 3.9774208068847656, "learning_rate": 1.9262706674831595e-06, "log_odds_chosen": 1.1475512981414795, "log_odds_ratio": -0.4332329332828522, "logits/chosen": -0.9567790627479553, "logits/rejected": -0.9856011867523193, "logps/chosen": -0.9590103626251221, "logps/rejected": -1.8928148746490479, "loss": 1.2436, "nll_loss": 1.2529513835906982, "rewards/accuracies": 0.75, "rewards/chosen": -0.09590103477239609, "rewards/margins": 0.09338046610355377, "rewards/rejected": -0.18928150832653046, "step": 6229 }, { "epoch": 3.8005185298154642, "grad_norm": 4.6513519287109375, "learning_rate": 1.925290875688916e-06, "log_odds_chosen": 1.2316824197769165, "log_odds_ratio": -0.5726127624511719, "logits/chosen": -1.0593018531799316, "logits/rejected": -1.0465631484985352, "logps/chosen": -0.9094698429107666, "logps/rejected": -1.910304069519043, "loss": 0.9541, "nll_loss": 1.1232631206512451, "rewards/accuracies": 0.5, "rewards/chosen": -0.0909469798207283, "rewards/margins": 0.10008343309164047, "rewards/rejected": -0.19103041291236877, "step": 6230 }, { "epoch": 3.8011285648924815, "grad_norm": 1.1151392459869385, "learning_rate": 1.9243110838946722e-06, "log_odds_chosen": 2.965930223464966, "log_odds_ratio": -0.35351574420928955, "logits/chosen": -0.8061683177947998, "logits/rejected": -0.8335057497024536, "logps/chosen": -0.9345731735229492, "logps/rejected": -3.665268659591675, "loss": 1.1534, "nll_loss": 1.048740029335022, "rewards/accuracies": 0.875, "rewards/chosen": -0.09345733374357224, "rewards/margins": 0.27306950092315674, "rewards/rejected": -0.36652687191963196, "step": 6231 }, { "epoch": 3.8017385999694984, "grad_norm": 1.5389808416366577, "learning_rate": 1.9233312921004288e-06, "log_odds_chosen": 2.3415980339050293, "log_odds_ratio": -0.34612715244293213, "logits/chosen": -0.88338702917099, "logits/rejected": -0.9368035197257996, "logps/chosen": -0.6928818821907043, "logps/rejected": -2.3836684226989746, "loss": 1.0618, "nll_loss": 0.9917402863502502, "rewards/accuracies": 0.625, "rewards/chosen": -0.06928819417953491, "rewards/margins": 0.16907864809036255, "rewards/rejected": -0.23836684226989746, "step": 6232 }, { "epoch": 3.802348635046515, "grad_norm": 1.390946388244629, "learning_rate": 1.922351500306185e-06, "log_odds_chosen": 0.18453870713710785, "log_odds_ratio": -0.7346442937850952, "logits/chosen": -0.8237825632095337, "logits/rejected": -0.8239194750785828, "logps/chosen": -0.8558846712112427, "logps/rejected": -0.9413835406303406, "loss": 1.2188, "nll_loss": 1.4621282815933228, "rewards/accuracies": 0.375, "rewards/chosen": -0.0855884701013565, "rewards/margins": 0.008549883961677551, "rewards/rejected": -0.09413835406303406, "step": 6233 }, { "epoch": 3.802958670123532, "grad_norm": 1.2644575834274292, "learning_rate": 1.921371708511941e-06, "log_odds_chosen": 1.6561288833618164, "log_odds_ratio": -0.4924813508987427, "logits/chosen": -0.7799317240715027, "logits/rejected": -0.8732079863548279, "logps/chosen": -0.6195149421691895, "logps/rejected": -1.854884386062622, "loss": 0.9269, "nll_loss": 0.7528730034828186, "rewards/accuracies": 0.75, "rewards/chosen": -0.06195149943232536, "rewards/margins": 0.12353695929050446, "rewards/rejected": -0.18548844754695892, "step": 6234 }, { "epoch": 3.803568705200549, "grad_norm": 1.6953697204589844, "learning_rate": 1.9203919167176975e-06, "log_odds_chosen": 1.8195679187774658, "log_odds_ratio": -0.36757874488830566, "logits/chosen": -0.6744387149810791, "logits/rejected": -0.7619777917861938, "logps/chosen": -0.5426343679428101, "logps/rejected": -1.872628092765808, "loss": 1.0098, "nll_loss": 0.7493461966514587, "rewards/accuracies": 0.875, "rewards/chosen": -0.05426344275474548, "rewards/margins": 0.13299937546253204, "rewards/rejected": -0.18726281821727753, "step": 6235 }, { "epoch": 3.8041787402775658, "grad_norm": 2.0723721981048584, "learning_rate": 1.9194121249234537e-06, "log_odds_chosen": 1.1829077005386353, "log_odds_ratio": -0.7874863743782043, "logits/chosen": -0.8165445327758789, "logits/rejected": -0.9119299054145813, "logps/chosen": -0.7245234847068787, "logps/rejected": -1.812652587890625, "loss": 1.0139, "nll_loss": 0.8961882591247559, "rewards/accuracies": 0.25, "rewards/chosen": -0.0724523514509201, "rewards/margins": 0.10881292074918747, "rewards/rejected": -0.18126526474952698, "step": 6236 }, { "epoch": 3.804788775354583, "grad_norm": 4.027349472045898, "learning_rate": 1.9184323331292102e-06, "log_odds_chosen": 2.3981730937957764, "log_odds_ratio": -0.24514001607894897, "logits/chosen": -0.6821144223213196, "logits/rejected": -0.8928054571151733, "logps/chosen": -0.6666253805160522, "logps/rejected": -2.305986166000366, "loss": 0.8749, "nll_loss": 0.8205502033233643, "rewards/accuracies": 1.0, "rewards/chosen": -0.06666254252195358, "rewards/margins": 0.1639360785484314, "rewards/rejected": -0.2305985987186432, "step": 6237 }, { "epoch": 3.8053988104316, "grad_norm": 2.2363810539245605, "learning_rate": 1.9174525413349663e-06, "log_odds_chosen": 0.7814810276031494, "log_odds_ratio": -0.5342132449150085, "logits/chosen": -0.6828063130378723, "logits/rejected": -0.803568422794342, "logps/chosen": -0.7166227698326111, "logps/rejected": -1.2217376232147217, "loss": 0.8748, "nll_loss": 0.8604205250740051, "rewards/accuracies": 0.75, "rewards/chosen": -0.07166227698326111, "rewards/margins": 0.05051147937774658, "rewards/rejected": -0.12217375636100769, "step": 6238 }, { "epoch": 3.8060088455086167, "grad_norm": 8.919245719909668, "learning_rate": 1.9164727495407225e-06, "log_odds_chosen": 1.959717869758606, "log_odds_ratio": -0.24474118649959564, "logits/chosen": -0.7756010293960571, "logits/rejected": -0.9485795497894287, "logps/chosen": -0.6998234987258911, "logps/rejected": -2.1058990955352783, "loss": 1.0947, "nll_loss": 1.0936334133148193, "rewards/accuracies": 1.0, "rewards/chosen": -0.06998234987258911, "rewards/margins": 0.1406075656414032, "rewards/rejected": -0.2105899155139923, "step": 6239 }, { "epoch": 3.8066188805856336, "grad_norm": 1.4567413330078125, "learning_rate": 1.9154929577464786e-06, "log_odds_chosen": 2.7773044109344482, "log_odds_ratio": -0.47663652896881104, "logits/chosen": -0.7609021663665771, "logits/rejected": -0.9536919593811035, "logps/chosen": -0.9647600650787354, "logps/rejected": -3.4850854873657227, "loss": 1.0857, "nll_loss": 1.2040349245071411, "rewards/accuracies": 0.625, "rewards/chosen": -0.0964760035276413, "rewards/margins": 0.2520325779914856, "rewards/rejected": -0.3485085964202881, "step": 6240 }, { "epoch": 3.807228915662651, "grad_norm": 1.3072842359542847, "learning_rate": 1.914513165952235e-06, "log_odds_chosen": 0.6377714276313782, "log_odds_ratio": -0.5629484057426453, "logits/chosen": -1.0933529138565063, "logits/rejected": -1.0340094566345215, "logps/chosen": -0.8943512439727783, "logps/rejected": -1.367823600769043, "loss": 1.1062, "nll_loss": 1.260054588317871, "rewards/accuracies": 0.5, "rewards/chosen": -0.08943512290716171, "rewards/margins": 0.04734724760055542, "rewards/rejected": -0.13678236305713654, "step": 6241 }, { "epoch": 3.8078389507396677, "grad_norm": 5.394762992858887, "learning_rate": 1.9135333741579913e-06, "log_odds_chosen": 3.442709445953369, "log_odds_ratio": -0.2824490964412689, "logits/chosen": -0.7312739491462708, "logits/rejected": -0.7630891799926758, "logps/chosen": -0.5253720283508301, "logps/rejected": -3.1572656631469727, "loss": 1.0294, "nll_loss": 0.7829863429069519, "rewards/accuracies": 0.875, "rewards/chosen": -0.052537210285663605, "rewards/margins": 0.2631893754005432, "rewards/rejected": -0.3157265782356262, "step": 6242 }, { "epoch": 3.8084489858166846, "grad_norm": 2.130469560623169, "learning_rate": 1.9125535823637474e-06, "log_odds_chosen": 3.2399368286132812, "log_odds_ratio": -0.1741655021905899, "logits/chosen": -0.7806906700134277, "logits/rejected": -1.0573910474777222, "logps/chosen": -0.6178275942802429, "logps/rejected": -3.104599952697754, "loss": 0.9816, "nll_loss": 0.918009877204895, "rewards/accuracies": 1.0, "rewards/chosen": -0.06178275868296623, "rewards/margins": 0.24867722392082214, "rewards/rejected": -0.3104599714279175, "step": 6243 }, { "epoch": 3.8090590208937014, "grad_norm": 1.8289276361465454, "learning_rate": 1.911573790569504e-06, "log_odds_chosen": 0.700404167175293, "log_odds_ratio": -0.48877865076065063, "logits/chosen": -0.7366632223129272, "logits/rejected": -0.9677242040634155, "logps/chosen": -0.9165140986442566, "logps/rejected": -1.4180948734283447, "loss": 0.9945, "nll_loss": 0.9819204807281494, "rewards/accuracies": 0.875, "rewards/chosen": -0.09165140241384506, "rewards/margins": 0.05015807971358299, "rewards/rejected": -0.14180949330329895, "step": 6244 }, { "epoch": 3.8096690559707183, "grad_norm": 1.4700647592544556, "learning_rate": 1.91059399877526e-06, "log_odds_chosen": 2.8155264854431152, "log_odds_ratio": -0.3619057834148407, "logits/chosen": -0.8548607230186462, "logits/rejected": -1.061134696006775, "logps/chosen": -0.7702950835227966, "logps/rejected": -3.038653612136841, "loss": 0.9754, "nll_loss": 0.9283844232559204, "rewards/accuracies": 0.75, "rewards/chosen": -0.0770295113325119, "rewards/margins": 0.22683584690093994, "rewards/rejected": -0.30386537313461304, "step": 6245 }, { "epoch": 3.810279091047735, "grad_norm": 1.6597440242767334, "learning_rate": 1.9096142069810166e-06, "log_odds_chosen": 1.3158581256866455, "log_odds_ratio": -0.5261735916137695, "logits/chosen": -1.0274490118026733, "logits/rejected": -0.9844704270362854, "logps/chosen": -0.9383498430252075, "logps/rejected": -1.9683475494384766, "loss": 1.0538, "nll_loss": 1.1141295433044434, "rewards/accuracies": 0.625, "rewards/chosen": -0.09383498877286911, "rewards/margins": 0.10299979150295258, "rewards/rejected": -0.1968347728252411, "step": 6246 }, { "epoch": 3.810889126124752, "grad_norm": 2.8623640537261963, "learning_rate": 1.9086344151867727e-06, "log_odds_chosen": 1.475888967514038, "log_odds_ratio": -0.39176082611083984, "logits/chosen": -0.8827724456787109, "logits/rejected": -0.9404523372650146, "logps/chosen": -0.7097307443618774, "logps/rejected": -1.735891580581665, "loss": 1.0342, "nll_loss": 0.7589075565338135, "rewards/accuracies": 0.75, "rewards/chosen": -0.07097307592630386, "rewards/margins": 0.1026160717010498, "rewards/rejected": -0.17358915507793427, "step": 6247 }, { "epoch": 3.8114991612017692, "grad_norm": 1.1659716367721558, "learning_rate": 1.907654623392529e-06, "log_odds_chosen": 0.45075878500938416, "log_odds_ratio": -0.8346835970878601, "logits/chosen": -0.9204180240631104, "logits/rejected": -0.8050767183303833, "logps/chosen": -1.1191564798355103, "logps/rejected": -1.3494834899902344, "loss": 1.0708, "nll_loss": 1.121881127357483, "rewards/accuracies": 0.625, "rewards/chosen": -0.11191564798355103, "rewards/margins": 0.023032695055007935, "rewards/rejected": -0.13494834303855896, "step": 6248 }, { "epoch": 3.812109196278786, "grad_norm": 19.409711837768555, "learning_rate": 1.9066748315982852e-06, "log_odds_chosen": 2.243959665298462, "log_odds_ratio": -0.4014861583709717, "logits/chosen": -0.9528639912605286, "logits/rejected": -1.2308714389801025, "logps/chosen": -0.879206657409668, "logps/rejected": -2.668789863586426, "loss": 1.1265, "nll_loss": 1.0325490236282349, "rewards/accuracies": 0.875, "rewards/chosen": -0.0879206657409668, "rewards/margins": 0.17895834147930145, "rewards/rejected": -0.26687899231910706, "step": 6249 }, { "epoch": 3.812719231355803, "grad_norm": 2.101879835128784, "learning_rate": 1.9056950398040415e-06, "log_odds_chosen": 1.0858473777770996, "log_odds_ratio": -0.44822457432746887, "logits/chosen": -0.9012666344642639, "logits/rejected": -0.98856121301651, "logps/chosen": -0.762335479259491, "logps/rejected": -1.5270779132843018, "loss": 1.1579, "nll_loss": 1.0246888399124146, "rewards/accuracies": 0.625, "rewards/chosen": -0.07623355090618134, "rewards/margins": 0.07647424936294556, "rewards/rejected": -0.1527077853679657, "step": 6250 }, { "epoch": 3.8133292664328198, "grad_norm": 1.4776432514190674, "learning_rate": 1.9047152480097978e-06, "log_odds_chosen": 1.9467294216156006, "log_odds_ratio": -0.3328084349632263, "logits/chosen": -0.7884666919708252, "logits/rejected": -0.7644411325454712, "logps/chosen": -0.8062645792961121, "logps/rejected": -2.3612823486328125, "loss": 1.1489, "nll_loss": 0.8454284071922302, "rewards/accuracies": 0.75, "rewards/chosen": -0.0806264579296112, "rewards/margins": 0.15550176799297333, "rewards/rejected": -0.23612821102142334, "step": 6251 }, { "epoch": 3.813939301509837, "grad_norm": 1.9132511615753174, "learning_rate": 1.9037354562155542e-06, "log_odds_chosen": 2.7018866539001465, "log_odds_ratio": -0.32186269760131836, "logits/chosen": -0.7228277325630188, "logits/rejected": -0.8727587461471558, "logps/chosen": -0.6030486822128296, "logps/rejected": -2.6962602138519287, "loss": 1.1655, "nll_loss": 0.9338088035583496, "rewards/accuracies": 0.875, "rewards/chosen": -0.06030486896634102, "rewards/margins": 0.20932117104530334, "rewards/rejected": -0.26962602138519287, "step": 6252 }, { "epoch": 3.814549336586854, "grad_norm": 1.1054184436798096, "learning_rate": 1.9027556644213103e-06, "log_odds_chosen": 1.6363446712493896, "log_odds_ratio": -0.38458535075187683, "logits/chosen": -0.8631455898284912, "logits/rejected": -0.8183491826057434, "logps/chosen": -0.7381801605224609, "logps/rejected": -2.0555052757263184, "loss": 0.8435, "nll_loss": 0.9189313054084778, "rewards/accuracies": 0.875, "rewards/chosen": -0.07381802052259445, "rewards/margins": 0.1317325234413147, "rewards/rejected": -0.20555055141448975, "step": 6253 }, { "epoch": 3.8151593716638708, "grad_norm": 3.872842788696289, "learning_rate": 1.9017758726270668e-06, "log_odds_chosen": 3.1838858127593994, "log_odds_ratio": -0.4906400442123413, "logits/chosen": -0.9782699942588806, "logits/rejected": -1.083099603652954, "logps/chosen": -0.8028353452682495, "logps/rejected": -3.5630087852478027, "loss": 1.1821, "nll_loss": 1.0153580904006958, "rewards/accuracies": 0.75, "rewards/chosen": -0.08028353750705719, "rewards/margins": 0.27601736783981323, "rewards/rejected": -0.35630089044570923, "step": 6254 }, { "epoch": 3.8157694067408876, "grad_norm": 2.959847927093506, "learning_rate": 1.900796080832823e-06, "log_odds_chosen": 0.32804030179977417, "log_odds_ratio": -0.6119400858879089, "logits/chosen": -0.8232884407043457, "logits/rejected": -0.841522216796875, "logps/chosen": -0.9399171471595764, "logps/rejected": -1.22470223903656, "loss": 1.0178, "nll_loss": 1.1689008474349976, "rewards/accuracies": 0.625, "rewards/chosen": -0.093991719186306, "rewards/margins": 0.028478512540459633, "rewards/rejected": -0.12247022986412048, "step": 6255 }, { "epoch": 3.8163794418179044, "grad_norm": 0.9303586483001709, "learning_rate": 1.899816289038579e-06, "log_odds_chosen": 2.0094494819641113, "log_odds_ratio": -0.3574194610118866, "logits/chosen": -0.8017323017120361, "logits/rejected": -0.8582291603088379, "logps/chosen": -0.9408023357391357, "logps/rejected": -2.5344367027282715, "loss": 1.0556, "nll_loss": 1.1444631814956665, "rewards/accuracies": 0.875, "rewards/chosen": -0.09408022463321686, "rewards/margins": 0.15936346352100372, "rewards/rejected": -0.2534436881542206, "step": 6256 }, { "epoch": 3.8169894768949213, "grad_norm": 1.7118420600891113, "learning_rate": 1.8988364972443356e-06, "log_odds_chosen": 1.6018840074539185, "log_odds_ratio": -0.3764302730560303, "logits/chosen": -0.9444177150726318, "logits/rejected": -0.8557373285293579, "logps/chosen": -0.7395772933959961, "logps/rejected": -2.0141546726226807, "loss": 0.9935, "nll_loss": 0.9570060968399048, "rewards/accuracies": 0.75, "rewards/chosen": -0.07395772635936737, "rewards/margins": 0.12745773792266846, "rewards/rejected": -0.20141546428203583, "step": 6257 }, { "epoch": 3.817599511971938, "grad_norm": 1.7620490789413452, "learning_rate": 1.8978567054500917e-06, "log_odds_chosen": 2.5194687843322754, "log_odds_ratio": -0.3496588468551636, "logits/chosen": -0.8108620047569275, "logits/rejected": -0.9410938620567322, "logps/chosen": -0.7019853591918945, "logps/rejected": -2.7547502517700195, "loss": 0.9942, "nll_loss": 0.8074855804443359, "rewards/accuracies": 0.625, "rewards/chosen": -0.07019853591918945, "rewards/margins": 0.2052764892578125, "rewards/rejected": -0.27547502517700195, "step": 6258 }, { "epoch": 3.8182095470489554, "grad_norm": 1.7847626209259033, "learning_rate": 1.896876913655848e-06, "log_odds_chosen": 2.643723487854004, "log_odds_ratio": -0.21315234899520874, "logits/chosen": -0.6381836533546448, "logits/rejected": -0.8621363639831543, "logps/chosen": -0.4243263900279999, "logps/rejected": -2.210578441619873, "loss": 0.9704, "nll_loss": 0.8358239531517029, "rewards/accuracies": 1.0, "rewards/chosen": -0.04243263602256775, "rewards/margins": 0.1786251962184906, "rewards/rejected": -0.22105784714221954, "step": 6259 }, { "epoch": 3.8188195821259723, "grad_norm": 2.806739330291748, "learning_rate": 1.8958971218616042e-06, "log_odds_chosen": 2.6713294982910156, "log_odds_ratio": -0.3956040143966675, "logits/chosen": -0.8038595914840698, "logits/rejected": -0.9055103659629822, "logps/chosen": -0.6521447896957397, "logps/rejected": -2.874927282333374, "loss": 0.898, "nll_loss": 0.7249064445495605, "rewards/accuracies": 0.75, "rewards/chosen": -0.06521448493003845, "rewards/margins": 0.22227826714515686, "rewards/rejected": -0.2874927520751953, "step": 6260 }, { "epoch": 3.819429617202989, "grad_norm": 1.9671947956085205, "learning_rate": 1.8949173300673608e-06, "log_odds_chosen": 0.7030614018440247, "log_odds_ratio": -0.4473138749599457, "logits/chosen": -1.0294134616851807, "logits/rejected": -0.9953071475028992, "logps/chosen": -0.8246377110481262, "logps/rejected": -1.30712890625, "loss": 1.1018, "nll_loss": 0.8914127349853516, "rewards/accuracies": 0.875, "rewards/chosen": -0.08246377110481262, "rewards/margins": 0.04824911803007126, "rewards/rejected": -0.13071289658546448, "step": 6261 }, { "epoch": 3.820039652280006, "grad_norm": 1.7144054174423218, "learning_rate": 1.8939375382731169e-06, "log_odds_chosen": 0.4101521670818329, "log_odds_ratio": -0.5462985634803772, "logits/chosen": -0.8174657821655273, "logits/rejected": -0.8819941878318787, "logps/chosen": -0.8889724016189575, "logps/rejected": -1.1465058326721191, "loss": 1.0887, "nll_loss": 1.069441795349121, "rewards/accuracies": 0.875, "rewards/chosen": -0.08889724314212799, "rewards/margins": 0.025753332301974297, "rewards/rejected": -0.11465058475732803, "step": 6262 }, { "epoch": 3.8206496873570233, "grad_norm": 1.6633923053741455, "learning_rate": 1.892957746478873e-06, "log_odds_chosen": 3.2412569522857666, "log_odds_ratio": -0.33326810598373413, "logits/chosen": -0.9751337766647339, "logits/rejected": -1.04008948802948, "logps/chosen": -0.7128079533576965, "logps/rejected": -3.3404176235198975, "loss": 1.112, "nll_loss": 1.1348713636398315, "rewards/accuracies": 0.875, "rewards/chosen": -0.07128080725669861, "rewards/margins": 0.2627609670162201, "rewards/rejected": -0.3340417742729187, "step": 6263 }, { "epoch": 3.82125972243404, "grad_norm": 1.3523250818252563, "learning_rate": 1.8919779546846295e-06, "log_odds_chosen": 1.0530741214752197, "log_odds_ratio": -0.6926940083503723, "logits/chosen": -0.8139511942863464, "logits/rejected": -0.8622340559959412, "logps/chosen": -0.8764523267745972, "logps/rejected": -1.7609140872955322, "loss": 0.9234, "nll_loss": 0.9494990706443787, "rewards/accuracies": 0.625, "rewards/chosen": -0.08764523267745972, "rewards/margins": 0.08844619244337082, "rewards/rejected": -0.17609141767024994, "step": 6264 }, { "epoch": 3.821869757511057, "grad_norm": 2.7348737716674805, "learning_rate": 1.8909981628903857e-06, "log_odds_chosen": 0.13374681770801544, "log_odds_ratio": -0.836319088935852, "logits/chosen": -0.8944469690322876, "logits/rejected": -0.902228832244873, "logps/chosen": -1.2141084671020508, "logps/rejected": -1.3903849124908447, "loss": 1.1161, "nll_loss": 1.2991217374801636, "rewards/accuracies": 0.375, "rewards/chosen": -0.12141086161136627, "rewards/margins": 0.017627635970711708, "rewards/rejected": -0.13903848826885223, "step": 6265 }, { "epoch": 3.822479792588074, "grad_norm": 2.0448832511901855, "learning_rate": 1.890018371096142e-06, "log_odds_chosen": 0.5046136379241943, "log_odds_ratio": -0.6706455945968628, "logits/chosen": -0.8629087805747986, "logits/rejected": -0.7613719701766968, "logps/chosen": -0.7699358463287354, "logps/rejected": -1.0896447896957397, "loss": 1.0474, "nll_loss": 0.9541729092597961, "rewards/accuracies": 0.875, "rewards/chosen": -0.07699358463287354, "rewards/margins": 0.03197088465094566, "rewards/rejected": -0.1089644730091095, "step": 6266 }, { "epoch": 3.8230898276650906, "grad_norm": 1.4264211654663086, "learning_rate": 1.8890385793018983e-06, "log_odds_chosen": 0.864191472530365, "log_odds_ratio": -0.4776667654514313, "logits/chosen": -0.9396599531173706, "logits/rejected": -0.963600754737854, "logps/chosen": -0.8447529673576355, "logps/rejected": -1.4718762636184692, "loss": 1.0728, "nll_loss": 1.0113886594772339, "rewards/accuracies": 0.875, "rewards/chosen": -0.08447530120611191, "rewards/margins": 0.06271233409643173, "rewards/rejected": -0.14718763530254364, "step": 6267 }, { "epoch": 3.8236998627421075, "grad_norm": 1.4103624820709229, "learning_rate": 1.8880587875076547e-06, "log_odds_chosen": 3.5078628063201904, "log_odds_ratio": -0.2670302987098694, "logits/chosen": -0.826800525188446, "logits/rejected": -0.8594154119491577, "logps/chosen": -0.7789067029953003, "logps/rejected": -3.762709140777588, "loss": 1.2753, "nll_loss": 1.0607516765594482, "rewards/accuracies": 1.0, "rewards/chosen": -0.07789067178964615, "rewards/margins": 0.2983802258968353, "rewards/rejected": -0.37627092003822327, "step": 6268 }, { "epoch": 3.8243098978191243, "grad_norm": 7.387147426605225, "learning_rate": 1.8870789957134108e-06, "log_odds_chosen": 3.1343111991882324, "log_odds_ratio": -0.18048444390296936, "logits/chosen": -0.8324113488197327, "logits/rejected": -0.9581388235092163, "logps/chosen": -0.6914491653442383, "logps/rejected": -3.209064483642578, "loss": 1.1203, "nll_loss": 0.8754221200942993, "rewards/accuracies": 1.0, "rewards/chosen": -0.06914491951465607, "rewards/margins": 0.2517615258693695, "rewards/rejected": -0.3209064304828644, "step": 6269 }, { "epoch": 3.8249199328961416, "grad_norm": 21.803756713867188, "learning_rate": 1.886099203919167e-06, "log_odds_chosen": 2.4678709506988525, "log_odds_ratio": -0.388736367225647, "logits/chosen": -0.6891639232635498, "logits/rejected": -0.8853762149810791, "logps/chosen": -0.5941764116287231, "logps/rejected": -2.511970043182373, "loss": 0.997, "nll_loss": 0.7684134840965271, "rewards/accuracies": 0.75, "rewards/chosen": -0.059417642652988434, "rewards/margins": 0.19177936017513275, "rewards/rejected": -0.2511970102787018, "step": 6270 }, { "epoch": 3.8255299679731585, "grad_norm": 1.5197943449020386, "learning_rate": 1.8851194121249235e-06, "log_odds_chosen": 2.4532487392425537, "log_odds_ratio": -0.32730618119239807, "logits/chosen": -0.7859103679656982, "logits/rejected": -0.9112548828125, "logps/chosen": -0.5959464311599731, "logps/rejected": -2.6185877323150635, "loss": 0.8942, "nll_loss": 0.6734870076179504, "rewards/accuracies": 0.75, "rewards/chosen": -0.05959464982151985, "rewards/margins": 0.20226414501667023, "rewards/rejected": -0.2618587613105774, "step": 6271 }, { "epoch": 3.8261400030501753, "grad_norm": 1.4121614694595337, "learning_rate": 1.8841396203306796e-06, "log_odds_chosen": 2.7752273082733154, "log_odds_ratio": -0.23906093835830688, "logits/chosen": -0.5858527421951294, "logits/rejected": -0.8266129493713379, "logps/chosen": -0.5146252512931824, "logps/rejected": -2.6180732250213623, "loss": 0.9671, "nll_loss": 0.6916905641555786, "rewards/accuracies": 0.875, "rewards/chosen": -0.05146252363920212, "rewards/margins": 0.2103448212146759, "rewards/rejected": -0.261807382106781, "step": 6272 }, { "epoch": 3.826750038127192, "grad_norm": 4.22066068649292, "learning_rate": 1.883159828536436e-06, "log_odds_chosen": 3.5530502796173096, "log_odds_ratio": -0.20148733258247375, "logits/chosen": -0.9681626558303833, "logits/rejected": -1.187648057937622, "logps/chosen": -0.784687340259552, "logps/rejected": -3.755950689315796, "loss": 1.0205, "nll_loss": 1.1040046215057373, "rewards/accuracies": 1.0, "rewards/chosen": -0.07846873253583908, "rewards/margins": 0.29712632298469543, "rewards/rejected": -0.3755950331687927, "step": 6273 }, { "epoch": 3.8273600732042095, "grad_norm": 9.385658264160156, "learning_rate": 1.8821800367421922e-06, "log_odds_chosen": 3.3486640453338623, "log_odds_ratio": -0.2178555428981781, "logits/chosen": -0.853919267654419, "logits/rejected": -0.9547762870788574, "logps/chosen": -0.479978084564209, "logps/rejected": -2.845587730407715, "loss": 1.0935, "nll_loss": 0.8166896104812622, "rewards/accuracies": 0.875, "rewards/chosen": -0.04799780994653702, "rewards/margins": 0.23656097054481506, "rewards/rejected": -0.2845587730407715, "step": 6274 }, { "epoch": 3.8279701082812263, "grad_norm": 1.8180655241012573, "learning_rate": 1.8812002449479486e-06, "log_odds_chosen": 2.967975616455078, "log_odds_ratio": -0.3213160037994385, "logits/chosen": -0.7439613342285156, "logits/rejected": -0.9568139314651489, "logps/chosen": -0.7241411805152893, "logps/rejected": -2.987344264984131, "loss": 1.0396, "nll_loss": 1.02284574508667, "rewards/accuracies": 0.75, "rewards/chosen": -0.07241411507129669, "rewards/margins": 0.2263203114271164, "rewards/rejected": -0.2987344264984131, "step": 6275 }, { "epoch": 3.828580143358243, "grad_norm": 2.9638023376464844, "learning_rate": 1.8802204531537047e-06, "log_odds_chosen": 2.1927170753479004, "log_odds_ratio": -0.27719902992248535, "logits/chosen": -0.9561983346939087, "logits/rejected": -0.9303462505340576, "logps/chosen": -0.8043752908706665, "logps/rejected": -2.5415050983428955, "loss": 1.2214, "nll_loss": 0.899608314037323, "rewards/accuracies": 0.875, "rewards/chosen": -0.08043753355741501, "rewards/margins": 0.17371296882629395, "rewards/rejected": -0.25415050983428955, "step": 6276 }, { "epoch": 3.82919017843526, "grad_norm": 1.798335075378418, "learning_rate": 1.879240661359461e-06, "log_odds_chosen": 2.081188201904297, "log_odds_ratio": -0.3553854823112488, "logits/chosen": -0.7832441926002502, "logits/rejected": -0.8945723176002502, "logps/chosen": -0.7643556594848633, "logps/rejected": -2.3872225284576416, "loss": 1.0388, "nll_loss": 0.9213637113571167, "rewards/accuracies": 0.875, "rewards/chosen": -0.07643557339906693, "rewards/margins": 0.1622866690158844, "rewards/rejected": -0.23872223496437073, "step": 6277 }, { "epoch": 3.829800213512277, "grad_norm": 2.249070644378662, "learning_rate": 1.8782608695652174e-06, "log_odds_chosen": 2.099045753479004, "log_odds_ratio": -0.33875176310539246, "logits/chosen": -0.8312480449676514, "logits/rejected": -0.9954283237457275, "logps/chosen": -0.6089139580726624, "logps/rejected": -2.158324718475342, "loss": 1.0729, "nll_loss": 0.9283605813980103, "rewards/accuracies": 0.875, "rewards/chosen": -0.060891397297382355, "rewards/margins": 0.15494108200073242, "rewards/rejected": -0.21583248674869537, "step": 6278 }, { "epoch": 3.8304102485892937, "grad_norm": 3.726618766784668, "learning_rate": 1.8772810777709735e-06, "log_odds_chosen": 0.20174574851989746, "log_odds_ratio": -0.6177480220794678, "logits/chosen": -1.0595346689224243, "logits/rejected": -1.0216548442840576, "logps/chosen": -0.7325234413146973, "logps/rejected": -0.8302829265594482, "loss": 1.0951, "nll_loss": 0.8539220690727234, "rewards/accuracies": 0.625, "rewards/chosen": -0.07325234264135361, "rewards/margins": 0.009775952436029911, "rewards/rejected": -0.08302830159664154, "step": 6279 }, { "epoch": 3.831020283666311, "grad_norm": 2.1600255966186523, "learning_rate": 1.8763012859767298e-06, "log_odds_chosen": 1.8138035535812378, "log_odds_ratio": -0.4244232177734375, "logits/chosen": -0.9832348823547363, "logits/rejected": -0.9437832832336426, "logps/chosen": -0.921269416809082, "logps/rejected": -2.4564266204833984, "loss": 1.0485, "nll_loss": 1.0455793142318726, "rewards/accuracies": 0.75, "rewards/chosen": -0.09212694317102432, "rewards/margins": 0.15351571142673492, "rewards/rejected": -0.24564266204833984, "step": 6280 }, { "epoch": 3.831630318743328, "grad_norm": 3.424612522125244, "learning_rate": 1.8753214941824862e-06, "log_odds_chosen": 1.1799501180648804, "log_odds_ratio": -0.43758368492126465, "logits/chosen": -0.7575914859771729, "logits/rejected": -0.8653962016105652, "logps/chosen": -0.8671373128890991, "logps/rejected": -1.7061877250671387, "loss": 1.2279, "nll_loss": 0.9554355144500732, "rewards/accuracies": 0.75, "rewards/chosen": -0.08671373873949051, "rewards/margins": 0.08390503376722336, "rewards/rejected": -0.17061878740787506, "step": 6281 }, { "epoch": 3.8322403538203447, "grad_norm": 1.3866764307022095, "learning_rate": 1.8743417023882425e-06, "log_odds_chosen": 1.5410926342010498, "log_odds_ratio": -0.4214514195919037, "logits/chosen": -0.7793415784835815, "logits/rejected": -0.8506029844284058, "logps/chosen": -0.732731819152832, "logps/rejected": -1.9052624702453613, "loss": 1.0423, "nll_loss": 0.8556566834449768, "rewards/accuracies": 0.75, "rewards/chosen": -0.0732731819152832, "rewards/margins": 0.11725308001041412, "rewards/rejected": -0.19052624702453613, "step": 6282 }, { "epoch": 3.8328503888973615, "grad_norm": 2.9653966426849365, "learning_rate": 1.8733619105939986e-06, "log_odds_chosen": 0.22161200642585754, "log_odds_ratio": -0.9652402400970459, "logits/chosen": -0.9990147352218628, "logits/rejected": -1.0240917205810547, "logps/chosen": -1.1334407329559326, "logps/rejected": -1.4256891012191772, "loss": 1.2448, "nll_loss": 1.2251313924789429, "rewards/accuracies": 0.375, "rewards/chosen": -0.11334408074617386, "rewards/margins": 0.029224833473563194, "rewards/rejected": -0.1425689160823822, "step": 6283 }, { "epoch": 3.8334604239743784, "grad_norm": 1.1274874210357666, "learning_rate": 1.872382118799755e-06, "log_odds_chosen": 1.0552603006362915, "log_odds_ratio": -0.6206994652748108, "logits/chosen": -0.8972936272621155, "logits/rejected": -1.028175950050354, "logps/chosen": -1.0590183734893799, "logps/rejected": -1.8607006072998047, "loss": 1.2005, "nll_loss": 1.277450680732727, "rewards/accuracies": 0.5, "rewards/chosen": -0.10590185225009918, "rewards/margins": 0.080168217420578, "rewards/rejected": -0.186070054769516, "step": 6284 }, { "epoch": 3.8340704590513957, "grad_norm": 3.361922264099121, "learning_rate": 1.8714023270055113e-06, "log_odds_chosen": 2.471022129058838, "log_odds_ratio": -0.3713724911212921, "logits/chosen": -0.5703505277633667, "logits/rejected": -0.8487218618392944, "logps/chosen": -0.5453978776931763, "logps/rejected": -2.4135894775390625, "loss": 0.8574, "nll_loss": 0.8188071250915527, "rewards/accuracies": 0.75, "rewards/chosen": -0.05453978478908539, "rewards/margins": 0.1868191808462143, "rewards/rejected": -0.24135896563529968, "step": 6285 }, { "epoch": 3.8346804941284125, "grad_norm": 7.209690093994141, "learning_rate": 1.8704225352112674e-06, "log_odds_chosen": 1.2975096702575684, "log_odds_ratio": -0.5444812774658203, "logits/chosen": -0.9094038009643555, "logits/rejected": -1.0913019180297852, "logps/chosen": -1.0484681129455566, "logps/rejected": -2.071587324142456, "loss": 1.1566, "nll_loss": 1.2424695491790771, "rewards/accuracies": 0.625, "rewards/chosen": -0.10484682023525238, "rewards/margins": 0.10231191664934158, "rewards/rejected": -0.20715874433517456, "step": 6286 }, { "epoch": 3.8352905292054293, "grad_norm": 6.750010967254639, "learning_rate": 1.869442743417024e-06, "log_odds_chosen": 1.5318429470062256, "log_odds_ratio": -0.31451791524887085, "logits/chosen": -0.9420878887176514, "logits/rejected": -0.9547127485275269, "logps/chosen": -0.9427585005760193, "logps/rejected": -2.1153249740600586, "loss": 1.0778, "nll_loss": 1.1457949876785278, "rewards/accuracies": 0.875, "rewards/chosen": -0.09427584707736969, "rewards/margins": 0.11725664138793945, "rewards/rejected": -0.21153250336647034, "step": 6287 }, { "epoch": 3.835900564282446, "grad_norm": 1.2623002529144287, "learning_rate": 1.86846295162278e-06, "log_odds_chosen": 1.7405412197113037, "log_odds_ratio": -0.38207370042800903, "logits/chosen": -0.9143595099449158, "logits/rejected": -0.9524110555648804, "logps/chosen": -0.864413321018219, "logps/rejected": -2.195979118347168, "loss": 1.2445, "nll_loss": 1.1838682889938354, "rewards/accuracies": 0.875, "rewards/chosen": -0.08644133061170578, "rewards/margins": 0.13315658271312714, "rewards/rejected": -0.2195979356765747, "step": 6288 }, { "epoch": 3.836510599359463, "grad_norm": 2.058753490447998, "learning_rate": 1.8674831598285364e-06, "log_odds_chosen": 2.9949631690979004, "log_odds_ratio": -0.3889424800872803, "logits/chosen": -0.8255095481872559, "logits/rejected": -1.1378271579742432, "logps/chosen": -0.6365355253219604, "logps/rejected": -3.0747251510620117, "loss": 1.0698, "nll_loss": 0.802491307258606, "rewards/accuracies": 0.875, "rewards/chosen": -0.06365355849266052, "rewards/margins": 0.2438189536333084, "rewards/rejected": -0.30747249722480774, "step": 6289 }, { "epoch": 3.83712063443648, "grad_norm": 1.1503283977508545, "learning_rate": 1.8665033680342925e-06, "log_odds_chosen": 2.0819151401519775, "log_odds_ratio": -0.37387409806251526, "logits/chosen": -0.793367862701416, "logits/rejected": -0.9893451929092407, "logps/chosen": -0.7224134802818298, "logps/rejected": -2.235755681991577, "loss": 1.0673, "nll_loss": 1.036729097366333, "rewards/accuracies": 0.75, "rewards/chosen": -0.07224135100841522, "rewards/margins": 0.1513342261314392, "rewards/rejected": -0.22357559204101562, "step": 6290 }, { "epoch": 3.837730669513497, "grad_norm": 1.1364474296569824, "learning_rate": 1.8655235762400489e-06, "log_odds_chosen": 2.110527753829956, "log_odds_ratio": -0.41041749715805054, "logits/chosen": -0.9348787665367126, "logits/rejected": -0.8730800747871399, "logps/chosen": -0.7019978761672974, "logps/rejected": -2.2811551094055176, "loss": 1.0441, "nll_loss": 1.026201844215393, "rewards/accuracies": 0.75, "rewards/chosen": -0.07019978761672974, "rewards/margins": 0.15791571140289307, "rewards/rejected": -0.2281154990196228, "step": 6291 }, { "epoch": 3.838340704590514, "grad_norm": 1.5598984956741333, "learning_rate": 1.8645437844458052e-06, "log_odds_chosen": 1.7841180562973022, "log_odds_ratio": -0.24107377231121063, "logits/chosen": -0.9094827175140381, "logits/rejected": -1.0283448696136475, "logps/chosen": -0.8999072313308716, "logps/rejected": -2.2609355449676514, "loss": 1.0817, "nll_loss": 1.2905616760253906, "rewards/accuracies": 1.0, "rewards/chosen": -0.08999072760343552, "rewards/margins": 0.1361028254032135, "rewards/rejected": -0.22609356045722961, "step": 6292 }, { "epoch": 3.838950739667531, "grad_norm": 2.0272648334503174, "learning_rate": 1.8635639926515613e-06, "log_odds_chosen": 1.4667843580245972, "log_odds_ratio": -0.4224873185157776, "logits/chosen": -0.9137471318244934, "logits/rejected": -0.8832270503044128, "logps/chosen": -0.5885846614837646, "logps/rejected": -1.62537682056427, "loss": 1.0356, "nll_loss": 0.8375195264816284, "rewards/accuracies": 0.875, "rewards/chosen": -0.05885846167802811, "rewards/margins": 0.10367920994758606, "rewards/rejected": -0.16253767907619476, "step": 6293 }, { "epoch": 3.8395607747445477, "grad_norm": 2.3128020763397217, "learning_rate": 1.8625842008573179e-06, "log_odds_chosen": 1.966847538948059, "log_odds_ratio": -0.5583456754684448, "logits/chosen": -0.763365626335144, "logits/rejected": -0.9432570934295654, "logps/chosen": -0.7350108623504639, "logps/rejected": -2.3257827758789062, "loss": 1.0415, "nll_loss": 0.8842994570732117, "rewards/accuracies": 0.75, "rewards/chosen": -0.0735010951757431, "rewards/margins": 0.1590772122144699, "rewards/rejected": -0.23257827758789062, "step": 6294 }, { "epoch": 3.840170809821565, "grad_norm": 1.7710193395614624, "learning_rate": 1.861604409063074e-06, "log_odds_chosen": 2.5622496604919434, "log_odds_ratio": -0.37023335695266724, "logits/chosen": -0.7406579256057739, "logits/rejected": -0.9999282360076904, "logps/chosen": -0.8103702664375305, "logps/rejected": -2.944725275039673, "loss": 1.0613, "nll_loss": 0.8183190822601318, "rewards/accuracies": 0.75, "rewards/chosen": -0.08103702962398529, "rewards/margins": 0.21343550086021423, "rewards/rejected": -0.2944725453853607, "step": 6295 }, { "epoch": 3.840780844898582, "grad_norm": 1.588217854499817, "learning_rate": 1.8606246172688303e-06, "log_odds_chosen": 1.8580726385116577, "log_odds_ratio": -0.4345713257789612, "logits/chosen": -1.0057353973388672, "logits/rejected": -1.002464771270752, "logps/chosen": -1.1288607120513916, "logps/rejected": -2.824145793914795, "loss": 1.0932, "nll_loss": 1.2942335605621338, "rewards/accuracies": 0.625, "rewards/chosen": -0.11288607120513916, "rewards/margins": 0.16952849924564362, "rewards/rejected": -0.2824145555496216, "step": 6296 }, { "epoch": 3.8413908799755987, "grad_norm": 1.2772592306137085, "learning_rate": 1.8596448254745867e-06, "log_odds_chosen": 1.639892339706421, "log_odds_ratio": -0.3590044677257538, "logits/chosen": -0.49370408058166504, "logits/rejected": -0.7349639534950256, "logps/chosen": -0.5697857737541199, "logps/rejected": -1.6687911748886108, "loss": 1.0724, "nll_loss": 0.7681534886360168, "rewards/accuracies": 0.875, "rewards/chosen": -0.05697857588529587, "rewards/margins": 0.10990053415298462, "rewards/rejected": -0.16687911748886108, "step": 6297 }, { "epoch": 3.8420009150526155, "grad_norm": 3.2060563564300537, "learning_rate": 1.8586650336803428e-06, "log_odds_chosen": 1.5066887140274048, "log_odds_ratio": -0.6282908916473389, "logits/chosen": -1.1912131309509277, "logits/rejected": -1.0868922472000122, "logps/chosen": -0.9856194853782654, "logps/rejected": -2.2167234420776367, "loss": 1.0218, "nll_loss": 1.104065179824829, "rewards/accuracies": 0.625, "rewards/chosen": -0.09856195002794266, "rewards/margins": 0.12311038374900818, "rewards/rejected": -0.22167234122753143, "step": 6298 }, { "epoch": 3.8426109501296324, "grad_norm": 1.3077130317687988, "learning_rate": 1.8576852418860991e-06, "log_odds_chosen": 1.776343822479248, "log_odds_ratio": -0.43815943598747253, "logits/chosen": -0.8618137240409851, "logits/rejected": -1.1164778470993042, "logps/chosen": -0.8010973334312439, "logps/rejected": -2.104269027709961, "loss": 1.0246, "nll_loss": 1.1770849227905273, "rewards/accuracies": 0.625, "rewards/chosen": -0.08010973781347275, "rewards/margins": 0.13031719624996185, "rewards/rejected": -0.210426926612854, "step": 6299 }, { "epoch": 3.8432209852066492, "grad_norm": 1.7824745178222656, "learning_rate": 1.8567054500918552e-06, "log_odds_chosen": 0.9775828123092651, "log_odds_ratio": -0.3746940493583679, "logits/chosen": -0.9928821921348572, "logits/rejected": -1.0040411949157715, "logps/chosen": -0.7582795023918152, "logps/rejected": -1.3613343238830566, "loss": 1.0891, "nll_loss": 0.8803441524505615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0758279487490654, "rewards/margins": 0.06030549481511116, "rewards/rejected": -0.13613344728946686, "step": 6300 }, { "epoch": 3.843831020283666, "grad_norm": 2.0826830863952637, "learning_rate": 1.8557256582976118e-06, "log_odds_chosen": 1.3098853826522827, "log_odds_ratio": -0.5477002263069153, "logits/chosen": -0.8104342222213745, "logits/rejected": -0.8671166300773621, "logps/chosen": -1.1903111934661865, "logps/rejected": -2.333573341369629, "loss": 1.0638, "nll_loss": 1.341651201248169, "rewards/accuracies": 0.875, "rewards/chosen": -0.11903110891580582, "rewards/margins": 0.11432620882987976, "rewards/rejected": -0.23335732519626617, "step": 6301 }, { "epoch": 3.8444410553606834, "grad_norm": 1.5377455949783325, "learning_rate": 1.854745866503368e-06, "log_odds_chosen": 2.4061059951782227, "log_odds_ratio": -0.36376050114631653, "logits/chosen": -0.6897570490837097, "logits/rejected": -0.9111334085464478, "logps/chosen": -0.7448124289512634, "logps/rejected": -2.536752223968506, "loss": 0.8564, "nll_loss": 0.7946144342422485, "rewards/accuracies": 0.75, "rewards/chosen": -0.07448124885559082, "rewards/margins": 0.17919398844242096, "rewards/rejected": -0.253675252199173, "step": 6302 }, { "epoch": 3.8450510904377, "grad_norm": 10.14319133758545, "learning_rate": 1.8537660747091242e-06, "log_odds_chosen": 3.2231879234313965, "log_odds_ratio": -0.22459924221038818, "logits/chosen": -0.6314232349395752, "logits/rejected": -1.033164620399475, "logps/chosen": -0.5673844814300537, "logps/rejected": -3.0406851768493652, "loss": 0.9567, "nll_loss": 0.865507960319519, "rewards/accuracies": 0.875, "rewards/chosen": -0.05673845484852791, "rewards/margins": 0.24733008444309235, "rewards/rejected": -0.30406853556632996, "step": 6303 }, { "epoch": 3.845661125514717, "grad_norm": 6.314538478851318, "learning_rate": 1.8527862829148806e-06, "log_odds_chosen": 0.18415716290473938, "log_odds_ratio": -0.6555055379867554, "logits/chosen": -0.9144442081451416, "logits/rejected": -0.8961204290390015, "logps/chosen": -0.8022323846817017, "logps/rejected": -0.8699429035186768, "loss": 1.176, "nll_loss": 1.0696194171905518, "rewards/accuracies": 0.625, "rewards/chosen": -0.08022324740886688, "rewards/margins": 0.006771049927920103, "rewards/rejected": -0.08699429780244827, "step": 6304 }, { "epoch": 3.846271160591734, "grad_norm": 1.166712999343872, "learning_rate": 1.851806491120637e-06, "log_odds_chosen": 0.5188652873039246, "log_odds_ratio": -0.6250314712524414, "logits/chosen": -1.2194044589996338, "logits/rejected": -1.0464942455291748, "logps/chosen": -1.0661942958831787, "logps/rejected": -1.49624764919281, "loss": 1.1709, "nll_loss": 1.2476074695587158, "rewards/accuracies": 0.625, "rewards/chosen": -0.10661943256855011, "rewards/margins": 0.04300532117486, "rewards/rejected": -0.1496247500181198, "step": 6305 }, { "epoch": 3.846881195668751, "grad_norm": 1.5720927715301514, "learning_rate": 1.850826699326393e-06, "log_odds_chosen": 0.8880875110626221, "log_odds_ratio": -0.5545440912246704, "logits/chosen": -0.9753519892692566, "logits/rejected": -1.0272516012191772, "logps/chosen": -0.9120409488677979, "logps/rejected": -1.6698181629180908, "loss": 1.0136, "nll_loss": 1.0613161325454712, "rewards/accuracies": 0.5, "rewards/chosen": -0.09120409190654755, "rewards/margins": 0.07577772438526154, "rewards/rejected": -0.16698181629180908, "step": 6306 }, { "epoch": 3.847491230745768, "grad_norm": 5.634073734283447, "learning_rate": 1.8498469075321492e-06, "log_odds_chosen": 3.913297176361084, "log_odds_ratio": -0.1756388545036316, "logits/chosen": -0.8508646488189697, "logits/rejected": -1.0066810846328735, "logps/chosen": -0.7729049921035767, "logps/rejected": -4.030669212341309, "loss": 0.855, "nll_loss": 0.9441899061203003, "rewards/accuracies": 0.875, "rewards/chosen": -0.07729050517082214, "rewards/margins": 0.3257763981819153, "rewards/rejected": -0.4030669331550598, "step": 6307 }, { "epoch": 3.848101265822785, "grad_norm": 1.5340731143951416, "learning_rate": 1.8488671157379057e-06, "log_odds_chosen": 2.9221363067626953, "log_odds_ratio": -0.41357773542404175, "logits/chosen": -0.9785621166229248, "logits/rejected": -1.052058219909668, "logps/chosen": -0.9975991249084473, "logps/rejected": -3.5749011039733887, "loss": 1.0986, "nll_loss": 1.4260331392288208, "rewards/accuracies": 0.75, "rewards/chosen": -0.09975991398096085, "rewards/margins": 0.2577301859855652, "rewards/rejected": -0.3574901223182678, "step": 6308 }, { "epoch": 3.8487113008998017, "grad_norm": 1.737358570098877, "learning_rate": 1.8478873239436618e-06, "log_odds_chosen": 1.411897897720337, "log_odds_ratio": -0.4608723819255829, "logits/chosen": -0.9655241966247559, "logits/rejected": -1.054642915725708, "logps/chosen": -0.8521586060523987, "logps/rejected": -1.9981544017791748, "loss": 1.0546, "nll_loss": 0.9536562561988831, "rewards/accuracies": 0.625, "rewards/chosen": -0.08521585911512375, "rewards/margins": 0.11459958553314209, "rewards/rejected": -0.19981545209884644, "step": 6309 }, { "epoch": 3.8493213359768186, "grad_norm": 6.9296135902404785, "learning_rate": 1.8469075321494182e-06, "log_odds_chosen": 1.000730276107788, "log_odds_ratio": -0.5453196167945862, "logits/chosen": -0.8247164487838745, "logits/rejected": -0.9029099345207214, "logps/chosen": -0.7031235694885254, "logps/rejected": -1.3744006156921387, "loss": 0.9514, "nll_loss": 0.9450091123580933, "rewards/accuracies": 0.625, "rewards/chosen": -0.07031235843896866, "rewards/margins": 0.06712771952152252, "rewards/rejected": -0.13744007050991058, "step": 6310 }, { "epoch": 3.8499313710538354, "grad_norm": 2.3648886680603027, "learning_rate": 1.8459277403551745e-06, "log_odds_chosen": 1.645887017250061, "log_odds_ratio": -0.5838791131973267, "logits/chosen": -1.0942175388336182, "logits/rejected": -1.043749213218689, "logps/chosen": -0.8996224999427795, "logps/rejected": -2.425863742828369, "loss": 1.0111, "nll_loss": 1.098670482635498, "rewards/accuracies": 0.5, "rewards/chosen": -0.08996224403381348, "rewards/margins": 0.15262411534786224, "rewards/rejected": -0.24258637428283691, "step": 6311 }, { "epoch": 3.8505414061308523, "grad_norm": 2.655291795730591, "learning_rate": 1.8449479485609308e-06, "log_odds_chosen": 0.3291283845901489, "log_odds_ratio": -0.7827655673027039, "logits/chosen": -0.8946489691734314, "logits/rejected": -0.9526060223579407, "logps/chosen": -1.001538872718811, "logps/rejected": -1.3675404787063599, "loss": 1.1153, "nll_loss": 1.1875038146972656, "rewards/accuracies": 0.375, "rewards/chosen": -0.10015389323234558, "rewards/margins": 0.03660016506910324, "rewards/rejected": -0.13675406575202942, "step": 6312 }, { "epoch": 3.8511514412078696, "grad_norm": 4.668542385101318, "learning_rate": 1.843968156766687e-06, "log_odds_chosen": 4.509189605712891, "log_odds_ratio": -0.18966826796531677, "logits/chosen": -0.8001461029052734, "logits/rejected": -1.0632110834121704, "logps/chosen": -0.7484503984451294, "logps/rejected": -4.6778740882873535, "loss": 1.1611, "nll_loss": 0.8360265493392944, "rewards/accuracies": 0.875, "rewards/chosen": -0.07484503835439682, "rewards/margins": 0.3929423987865448, "rewards/rejected": -0.4677874445915222, "step": 6313 }, { "epoch": 3.8517614762848864, "grad_norm": 1.2505152225494385, "learning_rate": 1.8429883649724433e-06, "log_odds_chosen": 1.57069730758667, "log_odds_ratio": -0.48232001066207886, "logits/chosen": -0.9313797950744629, "logits/rejected": -0.9874665141105652, "logps/chosen": -0.766089677810669, "logps/rejected": -1.9381390810012817, "loss": 0.9451, "nll_loss": 1.0091910362243652, "rewards/accuracies": 0.75, "rewards/chosen": -0.07660897076129913, "rewards/margins": 0.11720495671033859, "rewards/rejected": -0.19381392002105713, "step": 6314 }, { "epoch": 3.8523715113619033, "grad_norm": 1.5287015438079834, "learning_rate": 1.8420085731781996e-06, "log_odds_chosen": 1.0584189891815186, "log_odds_ratio": -0.5717645883560181, "logits/chosen": -0.6568701267242432, "logits/rejected": -0.7893567681312561, "logps/chosen": -0.9209846258163452, "logps/rejected": -1.6809619665145874, "loss": 1.0324, "nll_loss": 0.8699769377708435, "rewards/accuracies": 0.875, "rewards/chosen": -0.09209847450256348, "rewards/margins": 0.0759977251291275, "rewards/rejected": -0.16809619963169098, "step": 6315 }, { "epoch": 3.85298154643892, "grad_norm": 1.2691582441329956, "learning_rate": 1.8410287813839557e-06, "log_odds_chosen": 3.992441415786743, "log_odds_ratio": -0.3018088936805725, "logits/chosen": -0.5598243474960327, "logits/rejected": -0.8482635617256165, "logps/chosen": -0.4881397485733032, "logps/rejected": -3.769003391265869, "loss": 0.9076, "nll_loss": 0.8062300682067871, "rewards/accuracies": 0.875, "rewards/chosen": -0.04881397262215614, "rewards/margins": 0.32808637619018555, "rewards/rejected": -0.3769003450870514, "step": 6316 }, { "epoch": 3.8535915815159374, "grad_norm": 2.3980069160461426, "learning_rate": 1.840048989589712e-06, "log_odds_chosen": 1.0449776649475098, "log_odds_ratio": -0.4959651827812195, "logits/chosen": -0.8162767887115479, "logits/rejected": -0.909843921661377, "logps/chosen": -0.7034333944320679, "logps/rejected": -1.5200731754302979, "loss": 0.9798, "nll_loss": 0.8815168142318726, "rewards/accuracies": 0.5, "rewards/chosen": -0.07034334540367126, "rewards/margins": 0.08166397362947464, "rewards/rejected": -0.1520073115825653, "step": 6317 }, { "epoch": 3.8542016165929542, "grad_norm": 1.1619828939437866, "learning_rate": 1.8390691977954684e-06, "log_odds_chosen": 0.7234175801277161, "log_odds_ratio": -0.675581693649292, "logits/chosen": -0.8471766114234924, "logits/rejected": -0.8909435272216797, "logps/chosen": -0.9483356475830078, "logps/rejected": -1.5299034118652344, "loss": 0.9815, "nll_loss": 1.0725957155227661, "rewards/accuracies": 0.5, "rewards/chosen": -0.09483356773853302, "rewards/margins": 0.058156777173280716, "rewards/rejected": -0.15299034118652344, "step": 6318 }, { "epoch": 3.854811651669971, "grad_norm": 0.9770637154579163, "learning_rate": 1.8380894060012247e-06, "log_odds_chosen": 0.2253502756357193, "log_odds_ratio": -0.7347515821456909, "logits/chosen": -0.9007874727249146, "logits/rejected": -0.9192216992378235, "logps/chosen": -0.9733858704566956, "logps/rejected": -1.1462185382843018, "loss": 0.9297, "nll_loss": 1.0937336683273315, "rewards/accuracies": 0.375, "rewards/chosen": -0.09733859449625015, "rewards/margins": 0.017283255234360695, "rewards/rejected": -0.1146218478679657, "step": 6319 }, { "epoch": 3.855421686746988, "grad_norm": 1.2337396144866943, "learning_rate": 1.8371096142069809e-06, "log_odds_chosen": 3.102790355682373, "log_odds_ratio": -0.2252635955810547, "logits/chosen": -0.8144347667694092, "logits/rejected": -0.9920284748077393, "logps/chosen": -0.5922973155975342, "logps/rejected": -3.026259660720825, "loss": 0.8994, "nll_loss": 0.6550204753875732, "rewards/accuracies": 0.75, "rewards/chosen": -0.05922972783446312, "rewards/margins": 0.24339625239372253, "rewards/rejected": -0.30262595415115356, "step": 6320 }, { "epoch": 3.8560317218240048, "grad_norm": 1.2529460191726685, "learning_rate": 1.8361298224127372e-06, "log_odds_chosen": 3.1853699684143066, "log_odds_ratio": -0.42329686880111694, "logits/chosen": -0.8875017166137695, "logits/rejected": -1.118062138557434, "logps/chosen": -0.8728610873222351, "logps/rejected": -3.4442334175109863, "loss": 1.1108, "nll_loss": 1.129367709159851, "rewards/accuracies": 0.75, "rewards/chosen": -0.08728610724210739, "rewards/margins": 0.2571372389793396, "rewards/rejected": -0.3444233536720276, "step": 6321 }, { "epoch": 3.8566417569010216, "grad_norm": 1.551546573638916, "learning_rate": 1.8351500306184935e-06, "log_odds_chosen": 1.5919818878173828, "log_odds_ratio": -0.3656015694141388, "logits/chosen": -0.7650682926177979, "logits/rejected": -0.8996269702911377, "logps/chosen": -0.6127902865409851, "logps/rejected": -1.5339641571044922, "loss": 1.0763, "nll_loss": 0.8128841519355774, "rewards/accuracies": 0.75, "rewards/chosen": -0.06127902865409851, "rewards/margins": 0.09211739152669907, "rewards/rejected": -0.15339641273021698, "step": 6322 }, { "epoch": 3.8572517919780385, "grad_norm": 1.7646751403808594, "learning_rate": 1.8341702388242496e-06, "log_odds_chosen": 0.4056141972541809, "log_odds_ratio": -0.6839486956596375, "logits/chosen": -1.0636688470840454, "logits/rejected": -1.0279223918914795, "logps/chosen": -0.8942718505859375, "logps/rejected": -1.2268848419189453, "loss": 1.075, "nll_loss": 1.0661041736602783, "rewards/accuracies": 0.5, "rewards/chosen": -0.08942718803882599, "rewards/margins": 0.033261291682720184, "rewards/rejected": -0.12268847227096558, "step": 6323 }, { "epoch": 3.8578618270550558, "grad_norm": 1.4802420139312744, "learning_rate": 1.8331904470300062e-06, "log_odds_chosen": 0.8879759311676025, "log_odds_ratio": -0.4782540202140808, "logits/chosen": -0.8346133828163147, "logits/rejected": -0.8395549058914185, "logps/chosen": -0.7771065831184387, "logps/rejected": -1.3989136219024658, "loss": 1.0393, "nll_loss": 1.0830186605453491, "rewards/accuracies": 0.875, "rewards/chosen": -0.07771066576242447, "rewards/margins": 0.06218069791793823, "rewards/rejected": -0.1398913562297821, "step": 6324 }, { "epoch": 3.8584718621320726, "grad_norm": 1.2506163120269775, "learning_rate": 1.8322106552357623e-06, "log_odds_chosen": 2.182339668273926, "log_odds_ratio": -0.6064755320549011, "logits/chosen": -0.9711099863052368, "logits/rejected": -1.1218020915985107, "logps/chosen": -0.9634630680084229, "logps/rejected": -2.9066057205200195, "loss": 1.2207, "nll_loss": 1.3950942754745483, "rewards/accuracies": 0.625, "rewards/chosen": -0.09634631127119064, "rewards/margins": 0.19431425631046295, "rewards/rejected": -0.290660560131073, "step": 6325 }, { "epoch": 3.8590818972090895, "grad_norm": 4.556509971618652, "learning_rate": 1.8312308634415187e-06, "log_odds_chosen": 0.38991764187812805, "log_odds_ratio": -0.6738337278366089, "logits/chosen": -0.8797958493232727, "logits/rejected": -0.8932132720947266, "logps/chosen": -0.9088979959487915, "logps/rejected": -1.1416255235671997, "loss": 1.134, "nll_loss": 1.0785980224609375, "rewards/accuracies": 0.625, "rewards/chosen": -0.09088979661464691, "rewards/margins": 0.02327275462448597, "rewards/rejected": -0.11416256427764893, "step": 6326 }, { "epoch": 3.8596919322861063, "grad_norm": 1.7052415609359741, "learning_rate": 1.8302510716472748e-06, "log_odds_chosen": 2.1875157356262207, "log_odds_ratio": -0.313148558139801, "logits/chosen": -0.8598579168319702, "logits/rejected": -1.0281131267547607, "logps/chosen": -0.8165065050125122, "logps/rejected": -2.425137519836426, "loss": 1.0987, "nll_loss": 0.9500298500061035, "rewards/accuracies": 0.875, "rewards/chosen": -0.08165064454078674, "rewards/margins": 0.16086310148239136, "rewards/rejected": -0.2425137460231781, "step": 6327 }, { "epoch": 3.8603019673631236, "grad_norm": 1.1720948219299316, "learning_rate": 1.8292712798530311e-06, "log_odds_chosen": 2.5432519912719727, "log_odds_ratio": -0.30208662152290344, "logits/chosen": -0.72368323802948, "logits/rejected": -0.9259763956069946, "logps/chosen": -0.7790554761886597, "logps/rejected": -2.892394542694092, "loss": 1.0215, "nll_loss": 0.946797788143158, "rewards/accuracies": 1.0, "rewards/chosen": -0.0779055505990982, "rewards/margins": 0.21133390069007874, "rewards/rejected": -0.28923946619033813, "step": 6328 }, { "epoch": 3.8609120024401404, "grad_norm": 6.456887245178223, "learning_rate": 1.8282914880587874e-06, "log_odds_chosen": 0.8564469218254089, "log_odds_ratio": -0.5194998979568481, "logits/chosen": -0.8731935024261475, "logits/rejected": -0.7442601919174194, "logps/chosen": -0.7555291652679443, "logps/rejected": -1.3208286762237549, "loss": 0.9898, "nll_loss": 0.9836711883544922, "rewards/accuracies": 0.75, "rewards/chosen": -0.07555291801691055, "rewards/margins": 0.0565299428999424, "rewards/rejected": -0.13208286464214325, "step": 6329 }, { "epoch": 3.8615220375171573, "grad_norm": 7.003080368041992, "learning_rate": 1.8273116962645436e-06, "log_odds_chosen": 1.9499167203903198, "log_odds_ratio": -0.44513335824012756, "logits/chosen": -0.7255687713623047, "logits/rejected": -0.7417882680892944, "logps/chosen": -0.8533907532691956, "logps/rejected": -2.5338876247406006, "loss": 1.1296, "nll_loss": 1.0593186616897583, "rewards/accuracies": 0.5, "rewards/chosen": -0.08533908426761627, "rewards/margins": 0.16804969310760498, "rewards/rejected": -0.25338876247406006, "step": 6330 }, { "epoch": 3.862132072594174, "grad_norm": 1.526883602142334, "learning_rate": 1.8263319044703001e-06, "log_odds_chosen": 2.6700732707977295, "log_odds_ratio": -0.328405499458313, "logits/chosen": -0.9408072233200073, "logits/rejected": -0.9745112657546997, "logps/chosen": -0.737138032913208, "logps/rejected": -2.8269200325012207, "loss": 1.1526, "nll_loss": 1.1281592845916748, "rewards/accuracies": 0.75, "rewards/chosen": -0.07371380180120468, "rewards/margins": 0.20897822082042694, "rewards/rejected": -0.282692015171051, "step": 6331 }, { "epoch": 3.862742107671191, "grad_norm": 1.472912311553955, "learning_rate": 1.8253521126760562e-06, "log_odds_chosen": 2.392451286315918, "log_odds_ratio": -0.4002106785774231, "logits/chosen": -0.8725870251655579, "logits/rejected": -1.00946843624115, "logps/chosen": -0.6765685081481934, "logps/rejected": -2.473606586456299, "loss": 1.0492, "nll_loss": 0.9368376135826111, "rewards/accuracies": 0.875, "rewards/chosen": -0.06765685230493546, "rewards/margins": 0.17970381677150726, "rewards/rejected": -0.24736067652702332, "step": 6332 }, { "epoch": 3.863352142748208, "grad_norm": 2.016202211380005, "learning_rate": 1.8243723208818126e-06, "log_odds_chosen": 0.9311802387237549, "log_odds_ratio": -0.4452453851699829, "logits/chosen": -0.9620637893676758, "logits/rejected": -0.9907141923904419, "logps/chosen": -0.839857816696167, "logps/rejected": -1.5102667808532715, "loss": 1.0893, "nll_loss": 1.1990654468536377, "rewards/accuracies": 0.875, "rewards/chosen": -0.08398579061031342, "rewards/margins": 0.06704089045524597, "rewards/rejected": -0.1510266810655594, "step": 6333 }, { "epoch": 3.863962177825225, "grad_norm": 1.4361698627471924, "learning_rate": 1.823392529087569e-06, "log_odds_chosen": 2.813924551010132, "log_odds_ratio": -0.4201822578907013, "logits/chosen": -0.9250467419624329, "logits/rejected": -0.9364675283432007, "logps/chosen": -0.8559650778770447, "logps/rejected": -3.1412742137908936, "loss": 1.1045, "nll_loss": 1.0348148345947266, "rewards/accuracies": 0.625, "rewards/chosen": -0.08559651672840118, "rewards/margins": 0.2285308986902237, "rewards/rejected": -0.3141274154186249, "step": 6334 }, { "epoch": 3.864572212902242, "grad_norm": 1.7744371891021729, "learning_rate": 1.822412737293325e-06, "log_odds_chosen": 2.0885844230651855, "log_odds_ratio": -0.3684108853340149, "logits/chosen": -0.785537838935852, "logits/rejected": -1.023961067199707, "logps/chosen": -0.6801424026489258, "logps/rejected": -2.234347343444824, "loss": 1.0107, "nll_loss": 0.78477942943573, "rewards/accuracies": 0.875, "rewards/chosen": -0.0680142417550087, "rewards/margins": 0.15542051196098328, "rewards/rejected": -0.22343474626541138, "step": 6335 }, { "epoch": 3.865182247979259, "grad_norm": 4.398589611053467, "learning_rate": 1.8214329454990814e-06, "log_odds_chosen": 1.0085830688476562, "log_odds_ratio": -0.5745192766189575, "logits/chosen": -0.9307905435562134, "logits/rejected": -0.9651735424995422, "logps/chosen": -0.8673211336135864, "logps/rejected": -1.5316333770751953, "loss": 1.1124, "nll_loss": 1.066111445426941, "rewards/accuracies": 0.875, "rewards/chosen": -0.08673210442066193, "rewards/margins": 0.06643123179674149, "rewards/rejected": -0.153163343667984, "step": 6336 }, { "epoch": 3.8657922830562756, "grad_norm": 1.3979079723358154, "learning_rate": 1.8204531537048375e-06, "log_odds_chosen": 1.276380181312561, "log_odds_ratio": -0.4168999195098877, "logits/chosen": -0.9225625991821289, "logits/rejected": -0.9010698795318604, "logps/chosen": -0.8797622919082642, "logps/rejected": -1.9202866554260254, "loss": 1.0139, "nll_loss": 1.0064992904663086, "rewards/accuracies": 0.875, "rewards/chosen": -0.08797622472047806, "rewards/margins": 0.10405244678258896, "rewards/rejected": -0.19202867150306702, "step": 6337 }, { "epoch": 3.866402318133293, "grad_norm": 3.4332685470581055, "learning_rate": 1.819473361910594e-06, "log_odds_chosen": 1.0004096031188965, "log_odds_ratio": -0.6238995790481567, "logits/chosen": -1.1149040460586548, "logits/rejected": -1.158483862876892, "logps/chosen": -1.0259498357772827, "logps/rejected": -1.8690006732940674, "loss": 1.2204, "nll_loss": 1.4320391416549683, "rewards/accuracies": 0.5, "rewards/chosen": -0.10259499400854111, "rewards/margins": 0.08430509269237518, "rewards/rejected": -0.1869000792503357, "step": 6338 }, { "epoch": 3.86701235321031, "grad_norm": 1.4788750410079956, "learning_rate": 1.8184935701163501e-06, "log_odds_chosen": 0.33553725481033325, "log_odds_ratio": -0.5659074783325195, "logits/chosen": -0.592131495475769, "logits/rejected": -0.8149968385696411, "logps/chosen": -1.1438699960708618, "logps/rejected": -1.3645286560058594, "loss": 1.0062, "nll_loss": 0.9072000980377197, "rewards/accuracies": 0.75, "rewards/chosen": -0.11438700556755066, "rewards/margins": 0.022065861150622368, "rewards/rejected": -0.13645288348197937, "step": 6339 }, { "epoch": 3.8676223882873266, "grad_norm": 1.315424919128418, "learning_rate": 1.8175137783221065e-06, "log_odds_chosen": 3.0424306392669678, "log_odds_ratio": -0.14421847462654114, "logits/chosen": -1.067668080329895, "logits/rejected": -1.1250463724136353, "logps/chosen": -0.6610534191131592, "logps/rejected": -2.7784886360168457, "loss": 0.9793, "nll_loss": 1.1338281631469727, "rewards/accuracies": 1.0, "rewards/chosen": -0.06610534340143204, "rewards/margins": 0.2117435187101364, "rewards/rejected": -0.27784883975982666, "step": 6340 }, { "epoch": 3.8682324233643435, "grad_norm": 2.1364049911499023, "learning_rate": 1.8165339865278628e-06, "log_odds_chosen": 0.6569457054138184, "log_odds_ratio": -0.6224817037582397, "logits/chosen": -1.0511558055877686, "logits/rejected": -1.0308997631072998, "logps/chosen": -0.9926437735557556, "logps/rejected": -1.5334360599517822, "loss": 1.1309, "nll_loss": 1.1517643928527832, "rewards/accuracies": 0.625, "rewards/chosen": -0.09926438331604004, "rewards/margins": 0.05407922714948654, "rewards/rejected": -0.15334360301494598, "step": 6341 }, { "epoch": 3.8688424584413603, "grad_norm": 1.6765080690383911, "learning_rate": 1.815554194733619e-06, "log_odds_chosen": 4.311233043670654, "log_odds_ratio": -0.27481287717819214, "logits/chosen": -0.9692884683609009, "logits/rejected": -1.0693789720535278, "logps/chosen": -0.6925941109657288, "logps/rejected": -4.3183159828186035, "loss": 0.9673, "nll_loss": 0.8081789016723633, "rewards/accuracies": 0.75, "rewards/chosen": -0.06925942003726959, "rewards/margins": 0.36257219314575195, "rewards/rejected": -0.43183162808418274, "step": 6342 }, { "epoch": 3.869452493518377, "grad_norm": 1.253583550453186, "learning_rate": 1.8145744029393753e-06, "log_odds_chosen": 1.0285124778747559, "log_odds_ratio": -0.4534572958946228, "logits/chosen": -1.135797142982483, "logits/rejected": -1.1671347618103027, "logps/chosen": -0.9232693314552307, "logps/rejected": -1.5149422883987427, "loss": 1.1576, "nll_loss": 1.373106598854065, "rewards/accuracies": 0.875, "rewards/chosen": -0.09232693910598755, "rewards/margins": 0.059167295694351196, "rewards/rejected": -0.15149423480033875, "step": 6343 }, { "epoch": 3.870062528595394, "grad_norm": 1.4431871175765991, "learning_rate": 1.8135946111451316e-06, "log_odds_chosen": 1.4580316543579102, "log_odds_ratio": -0.3385845422744751, "logits/chosen": -0.8509554862976074, "logits/rejected": -0.8759423494338989, "logps/chosen": -0.7846362590789795, "logps/rejected": -1.7929425239562988, "loss": 1.0274, "nll_loss": 1.0568455457687378, "rewards/accuracies": 1.0, "rewards/chosen": -0.07846363633871078, "rewards/margins": 0.10083059966564178, "rewards/rejected": -0.17929422855377197, "step": 6344 }, { "epoch": 3.8706725636724113, "grad_norm": 2.0706160068511963, "learning_rate": 1.812614819350888e-06, "log_odds_chosen": 1.921554684638977, "log_odds_ratio": -0.5192643404006958, "logits/chosen": -0.9410476684570312, "logits/rejected": -0.9919383525848389, "logps/chosen": -0.7556179165840149, "logps/rejected": -2.1902661323547363, "loss": 0.96, "nll_loss": 0.9147130250930786, "rewards/accuracies": 0.75, "rewards/chosen": -0.07556179165840149, "rewards/margins": 0.1434648334980011, "rewards/rejected": -0.2190266251564026, "step": 6345 }, { "epoch": 3.871282598749428, "grad_norm": 1.4330329895019531, "learning_rate": 1.811635027556644e-06, "log_odds_chosen": 1.5154485702514648, "log_odds_ratio": -0.7539454698562622, "logits/chosen": -0.9379540681838989, "logits/rejected": -0.9700087904930115, "logps/chosen": -0.9509927034378052, "logps/rejected": -2.3691608905792236, "loss": 1.0113, "nll_loss": 1.1156749725341797, "rewards/accuracies": 0.25, "rewards/chosen": -0.09509927779436111, "rewards/margins": 0.14181679487228394, "rewards/rejected": -0.23691608011722565, "step": 6346 }, { "epoch": 3.871892633826445, "grad_norm": 1.674101710319519, "learning_rate": 1.8106552357624004e-06, "log_odds_chosen": 2.1045618057250977, "log_odds_ratio": -0.30157721042633057, "logits/chosen": -0.8968585729598999, "logits/rejected": -1.1390044689178467, "logps/chosen": -0.6975257396697998, "logps/rejected": -2.2685530185699463, "loss": 0.8791, "nll_loss": 0.9772951006889343, "rewards/accuracies": 1.0, "rewards/chosen": -0.06975257396697998, "rewards/margins": 0.15710273385047913, "rewards/rejected": -0.2268553078174591, "step": 6347 }, { "epoch": 3.872502668903462, "grad_norm": 1.979581356048584, "learning_rate": 1.8096754439681567e-06, "log_odds_chosen": 0.9651686549186707, "log_odds_ratio": -0.6235750317573547, "logits/chosen": -0.8763971924781799, "logits/rejected": -0.988784909248352, "logps/chosen": -0.948755145072937, "logps/rejected": -1.7487910985946655, "loss": 1.1168, "nll_loss": 1.1849385499954224, "rewards/accuracies": 0.625, "rewards/chosen": -0.0948755219578743, "rewards/margins": 0.08000358939170837, "rewards/rejected": -0.17487910389900208, "step": 6348 }, { "epoch": 3.873112703980479, "grad_norm": 2.138054132461548, "learning_rate": 1.8086956521739129e-06, "log_odds_chosen": 2.7259912490844727, "log_odds_ratio": -0.28112882375717163, "logits/chosen": -0.9412157535552979, "logits/rejected": -1.0391931533813477, "logps/chosen": -0.8107340931892395, "logps/rejected": -3.049044609069824, "loss": 1.1394, "nll_loss": 0.9029285907745361, "rewards/accuracies": 0.875, "rewards/chosen": -0.08107341080904007, "rewards/margins": 0.22383108735084534, "rewards/rejected": -0.3049044609069824, "step": 6349 }, { "epoch": 3.873722739057496, "grad_norm": 1.3479217290878296, "learning_rate": 1.8077158603796692e-06, "log_odds_chosen": 1.237829566001892, "log_odds_ratio": -0.31866732239723206, "logits/chosen": -0.9004729986190796, "logits/rejected": -1.011165976524353, "logps/chosen": -0.820724368095398, "logps/rejected": -1.651319146156311, "loss": 1.1441, "nll_loss": 1.0037415027618408, "rewards/accuracies": 1.0, "rewards/chosen": -0.0820724368095398, "rewards/margins": 0.08305948227643967, "rewards/rejected": -0.16513191163539886, "step": 6350 }, { "epoch": 3.874332774134513, "grad_norm": 1.726147174835205, "learning_rate": 1.8067360685854255e-06, "log_odds_chosen": 1.8328038454055786, "log_odds_ratio": -0.4313080906867981, "logits/chosen": -0.893085241317749, "logits/rejected": -1.0716766119003296, "logps/chosen": -0.7938898801803589, "logps/rejected": -2.0926222801208496, "loss": 1.2124, "nll_loss": 1.0734702348709106, "rewards/accuracies": 0.875, "rewards/chosen": -0.07938899099826813, "rewards/margins": 0.12987324595451355, "rewards/rejected": -0.20926223695278168, "step": 6351 }, { "epoch": 3.8749428092115297, "grad_norm": 1.3944604396820068, "learning_rate": 1.8057562767911819e-06, "log_odds_chosen": 1.8017971515655518, "log_odds_ratio": -0.42623278498649597, "logits/chosen": -0.6261634826660156, "logits/rejected": -0.7966495156288147, "logps/chosen": -0.6548501253128052, "logps/rejected": -1.9017711877822876, "loss": 0.8657, "nll_loss": 0.6639307141304016, "rewards/accuracies": 0.625, "rewards/chosen": -0.06548501551151276, "rewards/margins": 0.12469211220741272, "rewards/rejected": -0.19017714262008667, "step": 6352 }, { "epoch": 3.8755528442885465, "grad_norm": 1.3250294923782349, "learning_rate": 1.804776484996938e-06, "log_odds_chosen": 3.6240944862365723, "log_odds_ratio": -0.31330862641334534, "logits/chosen": -1.0306442975997925, "logits/rejected": -1.2010138034820557, "logps/chosen": -0.9011054039001465, "logps/rejected": -4.049177646636963, "loss": 1.1883, "nll_loss": 1.2439583539962769, "rewards/accuracies": 0.875, "rewards/chosen": -0.09011054039001465, "rewards/margins": 0.3148072361946106, "rewards/rejected": -0.40491777658462524, "step": 6353 }, { "epoch": 3.8761628793655634, "grad_norm": 1.6466290950775146, "learning_rate": 1.8037966932026945e-06, "log_odds_chosen": 0.5628935694694519, "log_odds_ratio": -0.4776843786239624, "logits/chosen": -0.8864419460296631, "logits/rejected": -0.9934027791023254, "logps/chosen": -0.8642523288726807, "logps/rejected": -1.2093101739883423, "loss": 1.2078, "nll_loss": 1.0149465799331665, "rewards/accuracies": 0.875, "rewards/chosen": -0.08642524480819702, "rewards/margins": 0.03450577333569527, "rewards/rejected": -0.12093101441860199, "step": 6354 }, { "epoch": 3.87677291444258, "grad_norm": 1.3189945220947266, "learning_rate": 1.8028169014084506e-06, "log_odds_chosen": 1.8177293539047241, "log_odds_ratio": -0.346022367477417, "logits/chosen": -0.9201841354370117, "logits/rejected": -1.0564053058624268, "logps/chosen": -0.7691986560821533, "logps/rejected": -2.1782846450805664, "loss": 0.917, "nll_loss": 0.8575156927108765, "rewards/accuracies": 1.0, "rewards/chosen": -0.07691986858844757, "rewards/margins": 0.1409086138010025, "rewards/rejected": -0.21782848238945007, "step": 6355 }, { "epoch": 3.8773829495195975, "grad_norm": 1.4853428602218628, "learning_rate": 1.801837109614207e-06, "log_odds_chosen": 3.953490972518921, "log_odds_ratio": -0.22674596309661865, "logits/chosen": -1.024939775466919, "logits/rejected": -1.2566773891448975, "logps/chosen": -0.6765103936195374, "logps/rejected": -4.088720321655273, "loss": 1.1002, "nll_loss": 1.237245798110962, "rewards/accuracies": 1.0, "rewards/chosen": -0.06765103340148926, "rewards/margins": 0.34122100472450256, "rewards/rejected": -0.4088720381259918, "step": 6356 }, { "epoch": 3.8779929845966143, "grad_norm": 5.870913982391357, "learning_rate": 1.800857317819963e-06, "log_odds_chosen": 2.339016914367676, "log_odds_ratio": -0.4435468912124634, "logits/chosen": -0.8625795841217041, "logits/rejected": -1.1457940340042114, "logps/chosen": -0.7767641544342041, "logps/rejected": -2.579972267150879, "loss": 1.0184, "nll_loss": 1.135305643081665, "rewards/accuracies": 0.625, "rewards/chosen": -0.07767640799283981, "rewards/margins": 0.18032079935073853, "rewards/rejected": -0.25799721479415894, "step": 6357 }, { "epoch": 3.878603019673631, "grad_norm": 1.8616042137145996, "learning_rate": 1.7998775260257194e-06, "log_odds_chosen": 0.899413526058197, "log_odds_ratio": -0.5050809383392334, "logits/chosen": -0.8946366310119629, "logits/rejected": -0.9236505031585693, "logps/chosen": -0.7835617065429688, "logps/rejected": -1.3940848112106323, "loss": 0.9576, "nll_loss": 0.9567127227783203, "rewards/accuracies": 0.75, "rewards/chosen": -0.07835616916418076, "rewards/margins": 0.06105230748653412, "rewards/rejected": -0.13940846920013428, "step": 6358 }, { "epoch": 3.879213054750648, "grad_norm": 2.390634775161743, "learning_rate": 1.7988977342314758e-06, "log_odds_chosen": 3.184752941131592, "log_odds_ratio": -0.19216561317443848, "logits/chosen": -0.9379646182060242, "logits/rejected": -1.132899284362793, "logps/chosen": -0.6727322340011597, "logps/rejected": -3.2116830348968506, "loss": 1.1781, "nll_loss": 0.9372382164001465, "rewards/accuracies": 1.0, "rewards/chosen": -0.06727322190999985, "rewards/margins": 0.2538950741291046, "rewards/rejected": -0.32116833329200745, "step": 6359 }, { "epoch": 3.8798230898276653, "grad_norm": 1.991563081741333, "learning_rate": 1.7979179424372319e-06, "log_odds_chosen": 2.7494211196899414, "log_odds_ratio": -0.39535245299339294, "logits/chosen": -1.1294217109680176, "logits/rejected": -1.216009259223938, "logps/chosen": -0.8984432816505432, "logps/rejected": -3.02185320854187, "loss": 1.1009, "nll_loss": 1.1461317539215088, "rewards/accuracies": 0.75, "rewards/chosen": -0.08984432369470596, "rewards/margins": 0.21234099566936493, "rewards/rejected": -0.3021853268146515, "step": 6360 }, { "epoch": 3.880433124904682, "grad_norm": 2.324904203414917, "learning_rate": 1.7969381506429884e-06, "log_odds_chosen": 0.9837485551834106, "log_odds_ratio": -0.5959333777427673, "logits/chosen": -1.1969691514968872, "logits/rejected": -1.1425871849060059, "logps/chosen": -0.9436219930648804, "logps/rejected": -1.8499799966812134, "loss": 1.1668, "nll_loss": 1.1789276599884033, "rewards/accuracies": 0.375, "rewards/chosen": -0.09436219930648804, "rewards/margins": 0.09063581377267838, "rewards/rejected": -0.18499800562858582, "step": 6361 }, { "epoch": 3.881043159981699, "grad_norm": 5.4034295082092285, "learning_rate": 1.7959583588487446e-06, "log_odds_chosen": 1.7426578998565674, "log_odds_ratio": -0.3548259139060974, "logits/chosen": -0.9530066251754761, "logits/rejected": -1.0014299154281616, "logps/chosen": -0.9134712219238281, "logps/rejected": -2.3290858268737793, "loss": 1.1289, "nll_loss": 1.1025316715240479, "rewards/accuracies": 0.75, "rewards/chosen": -0.0913471207022667, "rewards/margins": 0.14156144857406616, "rewards/rejected": -0.23290856182575226, "step": 6362 }, { "epoch": 3.881653195058716, "grad_norm": 8.366369247436523, "learning_rate": 1.7949785670545009e-06, "log_odds_chosen": 2.1277575492858887, "log_odds_ratio": -0.4371841847896576, "logits/chosen": -1.0376842021942139, "logits/rejected": -1.273890733718872, "logps/chosen": -0.7085335850715637, "logps/rejected": -2.4055299758911133, "loss": 1.1018, "nll_loss": 0.914114773273468, "rewards/accuracies": 0.75, "rewards/chosen": -0.07085336744785309, "rewards/margins": 0.16969965398311615, "rewards/rejected": -0.24055302143096924, "step": 6363 }, { "epoch": 3.8822632301357327, "grad_norm": 1.5163928270339966, "learning_rate": 1.7939987752602572e-06, "log_odds_chosen": 0.8311526775360107, "log_odds_ratio": -0.4383241534233093, "logits/chosen": -0.9183498620986938, "logits/rejected": -0.940211832523346, "logps/chosen": -0.8741617202758789, "logps/rejected": -1.4500327110290527, "loss": 1.023, "nll_loss": 0.8489181995391846, "rewards/accuracies": 1.0, "rewards/chosen": -0.08741617202758789, "rewards/margins": 0.05758709833025932, "rewards/rejected": -0.1450032740831375, "step": 6364 }, { "epoch": 3.8828732652127496, "grad_norm": 7.85457706451416, "learning_rate": 1.7930189834660133e-06, "log_odds_chosen": 1.4163789749145508, "log_odds_ratio": -0.5233396887779236, "logits/chosen": -0.9604837894439697, "logits/rejected": -0.8513934016227722, "logps/chosen": -0.832427442073822, "logps/rejected": -1.9255034923553467, "loss": 1.081, "nll_loss": 0.8887145519256592, "rewards/accuracies": 0.625, "rewards/chosen": -0.0832427442073822, "rewards/margins": 0.10930760204792023, "rewards/rejected": -0.19255034625530243, "step": 6365 }, { "epoch": 3.8834833002897664, "grad_norm": 10.772170066833496, "learning_rate": 1.7920391916717697e-06, "log_odds_chosen": 0.6354130506515503, "log_odds_ratio": -0.7850409746170044, "logits/chosen": -0.8336882591247559, "logits/rejected": -1.0201528072357178, "logps/chosen": -1.361232042312622, "logps/rejected": -1.850919246673584, "loss": 1.0427, "nll_loss": 1.2481440305709839, "rewards/accuracies": 0.75, "rewards/chosen": -0.13612321019172668, "rewards/margins": 0.048968736082315445, "rewards/rejected": -0.18509194254875183, "step": 6366 }, { "epoch": 3.8840933353667837, "grad_norm": 1.89829683303833, "learning_rate": 1.7910593998775258e-06, "log_odds_chosen": 0.7198923230171204, "log_odds_ratio": -1.2364250421524048, "logits/chosen": -0.9155498147010803, "logits/rejected": -0.9728715419769287, "logps/chosen": -2.140864372253418, "logps/rejected": -2.6967108249664307, "loss": 1.0441, "nll_loss": 1.0422486066818237, "rewards/accuracies": 0.75, "rewards/chosen": -0.21408644318580627, "rewards/margins": 0.05558465048670769, "rewards/rejected": -0.26967108249664307, "step": 6367 }, { "epoch": 3.8847033704438005, "grad_norm": 3.1445388793945312, "learning_rate": 1.7900796080832824e-06, "log_odds_chosen": 3.057600498199463, "log_odds_ratio": -0.10633544623851776, "logits/chosen": -0.7846028208732605, "logits/rejected": -1.0559204816818237, "logps/chosen": -0.47068679332733154, "logps/rejected": -2.5304229259490967, "loss": 0.7869, "nll_loss": 0.5968412160873413, "rewards/accuracies": 1.0, "rewards/chosen": -0.04706868156790733, "rewards/margins": 0.20597362518310547, "rewards/rejected": -0.2530422806739807, "step": 6368 }, { "epoch": 3.8853134055208174, "grad_norm": 1.9709358215332031, "learning_rate": 1.7890998162890385e-06, "log_odds_chosen": 2.6293821334838867, "log_odds_ratio": -0.4551715552806854, "logits/chosen": -0.9364543557167053, "logits/rejected": -1.1163785457611084, "logps/chosen": -0.8250561356544495, "logps/rejected": -3.09130859375, "loss": 0.9858, "nll_loss": 1.0897060632705688, "rewards/accuracies": 0.625, "rewards/chosen": -0.08250562101602554, "rewards/margins": 0.2266252636909485, "rewards/rejected": -0.30913087725639343, "step": 6369 }, { "epoch": 3.8859234405978342, "grad_norm": 2.6245269775390625, "learning_rate": 1.7881200244947948e-06, "log_odds_chosen": 2.136363983154297, "log_odds_ratio": -0.3296632170677185, "logits/chosen": -1.0468950271606445, "logits/rejected": -1.053390622138977, "logps/chosen": -1.095019817352295, "logps/rejected": -2.9683408737182617, "loss": 1.1083, "nll_loss": 1.1956207752227783, "rewards/accuracies": 0.75, "rewards/chosen": -0.10950198024511337, "rewards/margins": 0.1873321235179901, "rewards/rejected": -0.2968341112136841, "step": 6370 }, { "epoch": 3.8865334756748515, "grad_norm": 1.3013808727264404, "learning_rate": 1.7871402327005511e-06, "log_odds_chosen": 0.6169997453689575, "log_odds_ratio": -0.5122424364089966, "logits/chosen": -0.809316098690033, "logits/rejected": -0.9723600745201111, "logps/chosen": -0.8535147905349731, "logps/rejected": -1.2665425539016724, "loss": 1.0668, "nll_loss": 1.0386037826538086, "rewards/accuracies": 0.75, "rewards/chosen": -0.08535147458314896, "rewards/margins": 0.04130277782678604, "rewards/rejected": -0.1266542673110962, "step": 6371 }, { "epoch": 3.8871435107518684, "grad_norm": 1.4701566696166992, "learning_rate": 1.7861604409063073e-06, "log_odds_chosen": 2.584803581237793, "log_odds_ratio": -0.4165618419647217, "logits/chosen": -1.039309024810791, "logits/rejected": -1.1354039907455444, "logps/chosen": -0.7836136817932129, "logps/rejected": -2.883150815963745, "loss": 1.1053, "nll_loss": 1.0777735710144043, "rewards/accuracies": 0.75, "rewards/chosen": -0.07836136966943741, "rewards/margins": 0.20995372533798218, "rewards/rejected": -0.288315087556839, "step": 6372 }, { "epoch": 3.887753545828885, "grad_norm": 2.0445587635040283, "learning_rate": 1.7851806491120636e-06, "log_odds_chosen": 4.122287273406982, "log_odds_ratio": -0.08334866166114807, "logits/chosen": -0.6270179748535156, "logits/rejected": -0.8497719168663025, "logps/chosen": -0.46044719219207764, "logps/rejected": -3.5527946949005127, "loss": 0.9544, "nll_loss": 0.6239098310470581, "rewards/accuracies": 1.0, "rewards/chosen": -0.04604472219944, "rewards/margins": 0.30923476815223694, "rewards/rejected": -0.35527950525283813, "step": 6373 }, { "epoch": 3.888363580905902, "grad_norm": 1.7519971132278442, "learning_rate": 1.7842008573178197e-06, "log_odds_chosen": 2.4577956199645996, "log_odds_ratio": -0.3639274537563324, "logits/chosen": -0.8127433061599731, "logits/rejected": -0.9802616834640503, "logps/chosen": -0.7774901390075684, "logps/rejected": -2.780672073364258, "loss": 1.0252, "nll_loss": 0.8153642416000366, "rewards/accuracies": 0.875, "rewards/chosen": -0.07774901390075684, "rewards/margins": 0.20031818747520447, "rewards/rejected": -0.2780671715736389, "step": 6374 }, { "epoch": 3.888973615982919, "grad_norm": 1.4864206314086914, "learning_rate": 1.7832210655235763e-06, "log_odds_chosen": 3.6230955123901367, "log_odds_ratio": -0.1823902726173401, "logits/chosen": -0.5831515192985535, "logits/rejected": -0.8871155977249146, "logps/chosen": -0.5558709502220154, "logps/rejected": -2.9639978408813477, "loss": 0.9887, "nll_loss": 0.9202374219894409, "rewards/accuracies": 0.875, "rewards/chosen": -0.055587101727724075, "rewards/margins": 0.24081270396709442, "rewards/rejected": -0.2963998019695282, "step": 6375 }, { "epoch": 3.8895836510599358, "grad_norm": 1.8769354820251465, "learning_rate": 1.7822412737293324e-06, "log_odds_chosen": 1.3078755140304565, "log_odds_ratio": -0.43175211548805237, "logits/chosen": -0.8868544101715088, "logits/rejected": -0.8948922157287598, "logps/chosen": -0.7876343727111816, "logps/rejected": -1.6285982131958008, "loss": 0.9607, "nll_loss": 1.067455530166626, "rewards/accuracies": 0.75, "rewards/chosen": -0.0787634402513504, "rewards/margins": 0.08409638702869415, "rewards/rejected": -0.16285982728004456, "step": 6376 }, { "epoch": 3.8901936861369526, "grad_norm": 4.937449932098389, "learning_rate": 1.7812614819350887e-06, "log_odds_chosen": 1.5409839153289795, "log_odds_ratio": -0.29718124866485596, "logits/chosen": -0.8329375982284546, "logits/rejected": -0.8868123888969421, "logps/chosen": -0.8352867364883423, "logps/rejected": -2.013277530670166, "loss": 0.9979, "nll_loss": 0.9959021806716919, "rewards/accuracies": 0.875, "rewards/chosen": -0.08352866768836975, "rewards/margins": 0.1177990734577179, "rewards/rejected": -0.20132774114608765, "step": 6377 }, { "epoch": 3.89080372121397, "grad_norm": 2.012138843536377, "learning_rate": 1.780281690140845e-06, "log_odds_chosen": 2.6006784439086914, "log_odds_ratio": -0.33062997460365295, "logits/chosen": -0.8775696754455566, "logits/rejected": -0.9890745878219604, "logps/chosen": -0.6237918138504028, "logps/rejected": -2.3307905197143555, "loss": 1.1423, "nll_loss": 1.2298866510391235, "rewards/accuracies": 0.75, "rewards/chosen": -0.06237918511033058, "rewards/margins": 0.1706998646259308, "rewards/rejected": -0.23307904601097107, "step": 6378 }, { "epoch": 3.8914137562909867, "grad_norm": 12.530439376831055, "learning_rate": 1.7793018983466012e-06, "log_odds_chosen": 0.7563338279724121, "log_odds_ratio": -0.6779503226280212, "logits/chosen": -0.9640970230102539, "logits/rejected": -0.9105963110923767, "logps/chosen": -1.0216600894927979, "logps/rejected": -1.613685131072998, "loss": 1.1824, "nll_loss": 1.066859483718872, "rewards/accuracies": 0.375, "rewards/chosen": -0.10216601192951202, "rewards/margins": 0.05920249968767166, "rewards/rejected": -0.16136851906776428, "step": 6379 }, { "epoch": 3.8920237913680036, "grad_norm": 1.3201042413711548, "learning_rate": 1.7783221065523575e-06, "log_odds_chosen": 2.305384635925293, "log_odds_ratio": -0.27683883905410767, "logits/chosen": -0.8798860311508179, "logits/rejected": -0.9451954364776611, "logps/chosen": -0.6752399206161499, "logps/rejected": -2.472285509109497, "loss": 0.8996, "nll_loss": 0.7886492013931274, "rewards/accuracies": 0.875, "rewards/chosen": -0.06752399355173111, "rewards/margins": 0.17970457673072815, "rewards/rejected": -0.24722856283187866, "step": 6380 }, { "epoch": 3.8926338264450204, "grad_norm": 3.2428877353668213, "learning_rate": 1.7773423147581138e-06, "log_odds_chosen": 2.2081847190856934, "log_odds_ratio": -0.3899189829826355, "logits/chosen": -0.748022198677063, "logits/rejected": -0.9810366630554199, "logps/chosen": -0.8088713884353638, "logps/rejected": -2.487675428390503, "loss": 1.0264, "nll_loss": 0.848269522190094, "rewards/accuracies": 0.75, "rewards/chosen": -0.08088713884353638, "rewards/margins": 0.16788041591644287, "rewards/rejected": -0.24876753985881805, "step": 6381 }, { "epoch": 3.8932438615220377, "grad_norm": 3.667452573776245, "learning_rate": 1.7763625229638702e-06, "log_odds_chosen": 1.5556296110153198, "log_odds_ratio": -0.6535189747810364, "logits/chosen": -0.7264793515205383, "logits/rejected": -0.9038837552070618, "logps/chosen": -0.8686904311180115, "logps/rejected": -2.2269163131713867, "loss": 0.9709, "nll_loss": 0.9532849788665771, "rewards/accuracies": 0.625, "rewards/chosen": -0.08686904609203339, "rewards/margins": 0.135822594165802, "rewards/rejected": -0.22269165515899658, "step": 6382 }, { "epoch": 3.8938538965990546, "grad_norm": 2.06118106842041, "learning_rate": 1.7753827311696263e-06, "log_odds_chosen": 3.657463550567627, "log_odds_ratio": -0.25905847549438477, "logits/chosen": -0.726830005645752, "logits/rejected": -1.0585284233093262, "logps/chosen": -0.5773975849151611, "logps/rejected": -3.4533348083496094, "loss": 0.983, "nll_loss": 1.0249848365783691, "rewards/accuracies": 0.875, "rewards/chosen": -0.057739757001399994, "rewards/margins": 0.2875937521457672, "rewards/rejected": -0.3453335165977478, "step": 6383 }, { "epoch": 3.8944639316760714, "grad_norm": 1.3186908960342407, "learning_rate": 1.7744029393753826e-06, "log_odds_chosen": 3.061950206756592, "log_odds_ratio": -0.38937488198280334, "logits/chosen": -0.9240027666091919, "logits/rejected": -1.0884368419647217, "logps/chosen": -0.8378300666809082, "logps/rejected": -3.541773557662964, "loss": 0.9867, "nll_loss": 0.9796461462974548, "rewards/accuracies": 0.875, "rewards/chosen": -0.08378300070762634, "rewards/margins": 0.27039435505867004, "rewards/rejected": -0.3541773557662964, "step": 6384 }, { "epoch": 3.8950739667530883, "grad_norm": 1.342352032661438, "learning_rate": 1.773423147581139e-06, "log_odds_chosen": 1.5369725227355957, "log_odds_ratio": -0.28488001227378845, "logits/chosen": -0.8577761650085449, "logits/rejected": -0.9022824764251709, "logps/chosen": -0.7730234861373901, "logps/rejected": -1.828640341758728, "loss": 0.9187, "nll_loss": 0.9981684684753418, "rewards/accuracies": 1.0, "rewards/chosen": -0.07730235159397125, "rewards/margins": 0.10556168854236603, "rewards/rejected": -0.1828640252351761, "step": 6385 }, { "epoch": 3.895684001830105, "grad_norm": 3.697690725326538, "learning_rate": 1.772443355786895e-06, "log_odds_chosen": 1.3997650146484375, "log_odds_ratio": -0.37809932231903076, "logits/chosen": -0.7633139491081238, "logits/rejected": -0.9106794595718384, "logps/chosen": -0.8217456340789795, "logps/rejected": -1.6604106426239014, "loss": 1.018, "nll_loss": 1.0426924228668213, "rewards/accuracies": 0.75, "rewards/chosen": -0.08217456936836243, "rewards/margins": 0.08386650681495667, "rewards/rejected": -0.1660410761833191, "step": 6386 }, { "epoch": 3.896294036907122, "grad_norm": 1.6778427362442017, "learning_rate": 1.7714635639926514e-06, "log_odds_chosen": 1.734879493713379, "log_odds_ratio": -0.544303297996521, "logits/chosen": -0.8674394488334656, "logits/rejected": -0.8977314829826355, "logps/chosen": -0.7357368469238281, "logps/rejected": -1.9943084716796875, "loss": 1.1317, "nll_loss": 0.9984614253044128, "rewards/accuracies": 0.5, "rewards/chosen": -0.07357369363307953, "rewards/margins": 0.1258571594953537, "rewards/rejected": -0.19943085312843323, "step": 6387 }, { "epoch": 3.8969040719841392, "grad_norm": 1.67449152469635, "learning_rate": 1.7704837721984078e-06, "log_odds_chosen": 0.9781320691108704, "log_odds_ratio": -0.7027364373207092, "logits/chosen": -0.720744788646698, "logits/rejected": -0.8165098428726196, "logps/chosen": -0.8985742926597595, "logps/rejected": -1.6046783924102783, "loss": 1.0673, "nll_loss": 1.0511105060577393, "rewards/accuracies": 0.375, "rewards/chosen": -0.08985744416713715, "rewards/margins": 0.070610411465168, "rewards/rejected": -0.16046783328056335, "step": 6388 }, { "epoch": 3.897514107061156, "grad_norm": 1.354366660118103, "learning_rate": 1.769503980404164e-06, "log_odds_chosen": 2.516688346862793, "log_odds_ratio": -0.27149051427841187, "logits/chosen": -0.6916765570640564, "logits/rejected": -0.906062662601471, "logps/chosen": -0.5523725152015686, "logps/rejected": -2.2475924491882324, "loss": 1.0465, "nll_loss": 0.8916361927986145, "rewards/accuracies": 0.875, "rewards/chosen": -0.05523725226521492, "rewards/margins": 0.1695220023393631, "rewards/rejected": -0.22475925087928772, "step": 6389 }, { "epoch": 3.898124142138173, "grad_norm": 1.2644447088241577, "learning_rate": 1.7685241886099202e-06, "log_odds_chosen": 1.561985731124878, "log_odds_ratio": -0.5056743025779724, "logits/chosen": -0.7886200547218323, "logits/rejected": -0.8685328960418701, "logps/chosen": -0.8589891195297241, "logps/rejected": -2.1476593017578125, "loss": 0.9703, "nll_loss": 0.9998997449874878, "rewards/accuracies": 0.5, "rewards/chosen": -0.08589892089366913, "rewards/margins": 0.1288670152425766, "rewards/rejected": -0.21476593613624573, "step": 6390 }, { "epoch": 3.8987341772151898, "grad_norm": 7.0898027420043945, "learning_rate": 1.7675443968156768e-06, "log_odds_chosen": 4.648181915283203, "log_odds_ratio": -0.0986332818865776, "logits/chosen": -0.8044359683990479, "logits/rejected": -1.2328060865402222, "logps/chosen": -0.7132753729820251, "logps/rejected": -4.665567398071289, "loss": 1.0583, "nll_loss": 1.2245222330093384, "rewards/accuracies": 1.0, "rewards/chosen": -0.07132753729820251, "rewards/margins": 0.3952292799949646, "rewards/rejected": -0.4665568172931671, "step": 6391 }, { "epoch": 3.899344212292207, "grad_norm": 1.553802728652954, "learning_rate": 1.7665646050214329e-06, "log_odds_chosen": 1.3692445755004883, "log_odds_ratio": -0.47577106952667236, "logits/chosen": -0.8402564525604248, "logits/rejected": -0.9805209040641785, "logps/chosen": -0.732491135597229, "logps/rejected": -1.7550017833709717, "loss": 1.0646, "nll_loss": 0.8957127332687378, "rewards/accuracies": 0.625, "rewards/chosen": -0.07324911653995514, "rewards/margins": 0.10225105285644531, "rewards/rejected": -0.17550016939640045, "step": 6392 }, { "epoch": 3.899954247369224, "grad_norm": 2.339263916015625, "learning_rate": 1.765584813227189e-06, "log_odds_chosen": 1.484078288078308, "log_odds_ratio": -0.4125809967517853, "logits/chosen": -0.7482997179031372, "logits/rejected": -0.7413763403892517, "logps/chosen": -0.9825847148895264, "logps/rejected": -1.9555094242095947, "loss": 0.9724, "nll_loss": 0.8996315002441406, "rewards/accuracies": 0.75, "rewards/chosen": -0.09825847297906876, "rewards/margins": 0.0972924754023552, "rewards/rejected": -0.19555094838142395, "step": 6393 }, { "epoch": 3.9005642824462408, "grad_norm": 1.1294904947280884, "learning_rate": 1.7646050214329453e-06, "log_odds_chosen": 0.9521656036376953, "log_odds_ratio": -0.5805201530456543, "logits/chosen": -0.8351353406906128, "logits/rejected": -0.9775427579879761, "logps/chosen": -0.7092540860176086, "logps/rejected": -1.3654537200927734, "loss": 1.0269, "nll_loss": 0.9895372986793518, "rewards/accuracies": 0.5, "rewards/chosen": -0.07092540711164474, "rewards/margins": 0.06561996787786484, "rewards/rejected": -0.13654538989067078, "step": 6394 }, { "epoch": 3.9011743175232576, "grad_norm": 2.287626266479492, "learning_rate": 1.7636252296387017e-06, "log_odds_chosen": 2.5649256706237793, "log_odds_ratio": -0.22732114791870117, "logits/chosen": -1.0002670288085938, "logits/rejected": -0.9358969926834106, "logps/chosen": -0.8206393718719482, "logps/rejected": -2.844712734222412, "loss": 0.9994, "nll_loss": 1.032522201538086, "rewards/accuracies": 1.0, "rewards/chosen": -0.0820639356970787, "rewards/margins": 0.2024073451757431, "rewards/rejected": -0.2844712734222412, "step": 6395 }, { "epoch": 3.9017843526002745, "grad_norm": 1.5643608570098877, "learning_rate": 1.762645437844458e-06, "log_odds_chosen": 2.9416093826293945, "log_odds_ratio": -0.22348785400390625, "logits/chosen": -0.6295538544654846, "logits/rejected": -0.8731718063354492, "logps/chosen": -0.6229745149612427, "logps/rejected": -2.9356844425201416, "loss": 1.0303, "nll_loss": 1.0210909843444824, "rewards/accuracies": 0.875, "rewards/chosen": -0.06229745224118233, "rewards/margins": 0.23127099871635437, "rewards/rejected": -0.2935684323310852, "step": 6396 }, { "epoch": 3.9023943876772913, "grad_norm": 2.168496608734131, "learning_rate": 1.7616656460502141e-06, "log_odds_chosen": 2.871946096420288, "log_odds_ratio": -0.6503486037254333, "logits/chosen": -0.8921869993209839, "logits/rejected": -1.0057930946350098, "logps/chosen": -0.9569573402404785, "logps/rejected": -3.5899388790130615, "loss": 1.1101, "nll_loss": 1.0716071128845215, "rewards/accuracies": 0.625, "rewards/chosen": -0.09569574147462845, "rewards/margins": 0.2632981538772583, "rewards/rejected": -0.35899388790130615, "step": 6397 }, { "epoch": 3.903004422754308, "grad_norm": 2.81036114692688, "learning_rate": 1.7606858542559707e-06, "log_odds_chosen": 2.253352165222168, "log_odds_ratio": -0.3717951476573944, "logits/chosen": -0.6970576047897339, "logits/rejected": -1.0368376970291138, "logps/chosen": -0.7886031866073608, "logps/rejected": -2.45029878616333, "loss": 1.1084, "nll_loss": 1.087898850440979, "rewards/accuracies": 0.875, "rewards/chosen": -0.0788603127002716, "rewards/margins": 0.16616955399513245, "rewards/rejected": -0.24502986669540405, "step": 6398 }, { "epoch": 3.9036144578313254, "grad_norm": 2.2865164279937744, "learning_rate": 1.7597060624617268e-06, "log_odds_chosen": 1.3036860227584839, "log_odds_ratio": -0.5015999674797058, "logits/chosen": -0.824471652507782, "logits/rejected": -0.8821091651916504, "logps/chosen": -0.8881775140762329, "logps/rejected": -1.942642092704773, "loss": 1.0722, "nll_loss": 1.046481966972351, "rewards/accuracies": 0.625, "rewards/chosen": -0.08881776034832001, "rewards/margins": 0.105446457862854, "rewards/rejected": -0.1942642331123352, "step": 6399 }, { "epoch": 3.9042244929083423, "grad_norm": 1.7446832656860352, "learning_rate": 1.758726270667483e-06, "log_odds_chosen": 2.2618861198425293, "log_odds_ratio": -0.32212895154953003, "logits/chosen": -0.7056883573532104, "logits/rejected": -0.7404018640518188, "logps/chosen": -0.6929346919059753, "logps/rejected": -2.4630348682403564, "loss": 0.899, "nll_loss": 0.814314603805542, "rewards/accuracies": 0.875, "rewards/chosen": -0.06929346919059753, "rewards/margins": 0.17700999975204468, "rewards/rejected": -0.2463034838438034, "step": 6400 }, { "epoch": 3.904834527985359, "grad_norm": 1.776785969734192, "learning_rate": 1.7577464788732395e-06, "log_odds_chosen": 2.332944631576538, "log_odds_ratio": -0.2027590274810791, "logits/chosen": -0.6369339823722839, "logits/rejected": -0.7768293023109436, "logps/chosen": -0.6619028449058533, "logps/rejected": -2.312546730041504, "loss": 0.9725, "nll_loss": 0.8095893263816833, "rewards/accuracies": 1.0, "rewards/chosen": -0.06619028747081757, "rewards/margins": 0.16506439447402954, "rewards/rejected": -0.2312546670436859, "step": 6401 }, { "epoch": 3.905444563062376, "grad_norm": 1.4539076089859009, "learning_rate": 1.7567666870789956e-06, "log_odds_chosen": 2.9785633087158203, "log_odds_ratio": -0.3315955698490143, "logits/chosen": -0.975583553314209, "logits/rejected": -1.1476191282272339, "logps/chosen": -0.7968583703041077, "logps/rejected": -3.1257500648498535, "loss": 1.0313, "nll_loss": 1.1155837774276733, "rewards/accuracies": 0.625, "rewards/chosen": -0.07968583703041077, "rewards/margins": 0.23288919031620026, "rewards/rejected": -0.31257501244544983, "step": 6402 }, { "epoch": 3.9060545981393933, "grad_norm": 5.484230995178223, "learning_rate": 1.755786895284752e-06, "log_odds_chosen": 2.39998459815979, "log_odds_ratio": -0.45684999227523804, "logits/chosen": -0.9300321340560913, "logits/rejected": -1.0239953994750977, "logps/chosen": -1.0716817378997803, "logps/rejected": -3.3237593173980713, "loss": 1.0903, "nll_loss": 1.0422124862670898, "rewards/accuracies": 0.625, "rewards/chosen": -0.10716818273067474, "rewards/margins": 0.22520776093006134, "rewards/rejected": -0.3323759436607361, "step": 6403 }, { "epoch": 3.90666463321641, "grad_norm": 1.3590261936187744, "learning_rate": 1.754807103490508e-06, "log_odds_chosen": 2.3193304538726807, "log_odds_ratio": -0.47917652130126953, "logits/chosen": -0.8837984800338745, "logits/rejected": -1.0544929504394531, "logps/chosen": -0.7068431377410889, "logps/rejected": -2.7624192237854004, "loss": 1.0467, "nll_loss": 0.7804876565933228, "rewards/accuracies": 0.75, "rewards/chosen": -0.07068431377410889, "rewards/margins": 0.20555758476257324, "rewards/rejected": -0.2762419283390045, "step": 6404 }, { "epoch": 3.907274668293427, "grad_norm": 1.573745608329773, "learning_rate": 1.7538273116962646e-06, "log_odds_chosen": 1.0030111074447632, "log_odds_ratio": -0.4139997959136963, "logits/chosen": -0.5576868653297424, "logits/rejected": -0.8622667789459229, "logps/chosen": -0.6203320622444153, "logps/rejected": -1.241346836090088, "loss": 1.0237, "nll_loss": 0.7738070487976074, "rewards/accuracies": 0.75, "rewards/chosen": -0.06203320622444153, "rewards/margins": 0.062101468443870544, "rewards/rejected": -0.12413467466831207, "step": 6405 }, { "epoch": 3.907884703370444, "grad_norm": 5.421780586242676, "learning_rate": 1.7528475199020207e-06, "log_odds_chosen": 2.7635602951049805, "log_odds_ratio": -0.36889150738716125, "logits/chosen": -0.8277879953384399, "logits/rejected": -0.8297827839851379, "logps/chosen": -0.7793207168579102, "logps/rejected": -3.1286492347717285, "loss": 0.9931, "nll_loss": 0.9979322552680969, "rewards/accuracies": 0.75, "rewards/chosen": -0.07793206721544266, "rewards/margins": 0.23493283987045288, "rewards/rejected": -0.31286492943763733, "step": 6406 }, { "epoch": 3.9084947384474606, "grad_norm": 1.5187913179397583, "learning_rate": 1.751867728107777e-06, "log_odds_chosen": 1.078414797782898, "log_odds_ratio": -0.5506833791732788, "logits/chosen": -0.8696665167808533, "logits/rejected": -0.9493823051452637, "logps/chosen": -0.7919617295265198, "logps/rejected": -1.5486040115356445, "loss": 1.0244, "nll_loss": 1.0021296739578247, "rewards/accuracies": 0.625, "rewards/chosen": -0.07919617742300034, "rewards/margins": 0.075664222240448, "rewards/rejected": -0.15486040711402893, "step": 6407 }, { "epoch": 3.9091047735244775, "grad_norm": 1.7693095207214355, "learning_rate": 1.7508879363135334e-06, "log_odds_chosen": 2.0193727016448975, "log_odds_ratio": -0.28388118743896484, "logits/chosen": -0.8603981733322144, "logits/rejected": -0.9198558926582336, "logps/chosen": -0.5652766823768616, "logps/rejected": -1.9712796211242676, "loss": 0.9406, "nll_loss": 0.929655909538269, "rewards/accuracies": 0.875, "rewards/chosen": -0.05652766674757004, "rewards/margins": 0.1406002938747406, "rewards/rejected": -0.19712796807289124, "step": 6408 }, { "epoch": 3.9097148086014943, "grad_norm": 1.6463918685913086, "learning_rate": 1.7499081445192895e-06, "log_odds_chosen": 0.966492772102356, "log_odds_ratio": -0.6383450031280518, "logits/chosen": -0.9616365432739258, "logits/rejected": -0.9602927565574646, "logps/chosen": -0.8849955797195435, "logps/rejected": -1.6655548810958862, "loss": 0.9626, "nll_loss": 0.9762716293334961, "rewards/accuracies": 0.5, "rewards/chosen": -0.08849955350160599, "rewards/margins": 0.07805593311786652, "rewards/rejected": -0.1665554940700531, "step": 6409 }, { "epoch": 3.9103248436785116, "grad_norm": 2.509967088699341, "learning_rate": 1.7489283527250458e-06, "log_odds_chosen": 3.0205917358398438, "log_odds_ratio": -0.16820093989372253, "logits/chosen": -0.8306092619895935, "logits/rejected": -0.9551430940628052, "logps/chosen": -0.572675883769989, "logps/rejected": -2.6611766815185547, "loss": 0.9685, "nll_loss": 1.0375317335128784, "rewards/accuracies": 0.875, "rewards/chosen": -0.05726758763194084, "rewards/margins": 0.20885008573532104, "rewards/rejected": -0.2661176919937134, "step": 6410 }, { "epoch": 3.9109348787555285, "grad_norm": 2.0794119834899902, "learning_rate": 1.7479485609308022e-06, "log_odds_chosen": 1.4315943717956543, "log_odds_ratio": -0.35705310106277466, "logits/chosen": -0.9486311674118042, "logits/rejected": -1.069492220878601, "logps/chosen": -1.0007835626602173, "logps/rejected": -2.1687912940979004, "loss": 1.0818, "nll_loss": 1.227723240852356, "rewards/accuracies": 0.75, "rewards/chosen": -0.10007835179567337, "rewards/margins": 0.11680075526237488, "rewards/rejected": -0.21687909960746765, "step": 6411 }, { "epoch": 3.9115449138325453, "grad_norm": 1.3679912090301514, "learning_rate": 1.7469687691365585e-06, "log_odds_chosen": 4.224343299865723, "log_odds_ratio": -0.38889968395233154, "logits/chosen": -0.6445238590240479, "logits/rejected": -1.0066497325897217, "logps/chosen": -0.6533088684082031, "logps/rejected": -4.179656505584717, "loss": 0.8805, "nll_loss": 0.7689500451087952, "rewards/accuracies": 0.625, "rewards/chosen": -0.06533089280128479, "rewards/margins": 0.3526347875595093, "rewards/rejected": -0.41796571016311646, "step": 6412 }, { "epoch": 3.912154948909562, "grad_norm": 1.126220703125, "learning_rate": 1.7459889773423146e-06, "log_odds_chosen": 1.9877058267593384, "log_odds_ratio": -0.1839408576488495, "logits/chosen": -0.532408595085144, "logits/rejected": -0.7891654372215271, "logps/chosen": -0.5229953527450562, "logps/rejected": -1.8383793830871582, "loss": 1.0007, "nll_loss": 0.7053614854812622, "rewards/accuracies": 1.0, "rewards/chosen": -0.052299536764621735, "rewards/margins": 0.13153840601444244, "rewards/rejected": -0.1838379204273224, "step": 6413 }, { "epoch": 3.9127649839865795, "grad_norm": 1.5333787202835083, "learning_rate": 1.745009185548071e-06, "log_odds_chosen": 3.1805436611175537, "log_odds_ratio": -0.3390718698501587, "logits/chosen": -0.6005815267562866, "logits/rejected": -0.8365483283996582, "logps/chosen": -0.5250692963600159, "logps/rejected": -3.0432677268981934, "loss": 1.0275, "nll_loss": 0.6793376803398132, "rewards/accuracies": 0.75, "rewards/chosen": -0.052506931126117706, "rewards/margins": 0.2518198490142822, "rewards/rejected": -0.30432677268981934, "step": 6414 }, { "epoch": 3.9133750190635963, "grad_norm": 1.5814616680145264, "learning_rate": 1.7440293937538273e-06, "log_odds_chosen": 1.8631699085235596, "log_odds_ratio": -0.324374794960022, "logits/chosen": -0.9302029013633728, "logits/rejected": -1.0896083116531372, "logps/chosen": -0.7652490139007568, "logps/rejected": -2.071244716644287, "loss": 1.1518, "nll_loss": 1.1311951875686646, "rewards/accuracies": 1.0, "rewards/chosen": -0.07652490586042404, "rewards/margins": 0.13059955835342407, "rewards/rejected": -0.2071244716644287, "step": 6415 }, { "epoch": 3.913985054140613, "grad_norm": 8.35014820098877, "learning_rate": 1.7430496019595834e-06, "log_odds_chosen": 2.4495553970336914, "log_odds_ratio": -0.38946521282196045, "logits/chosen": -0.8250632286071777, "logits/rejected": -0.8697848320007324, "logps/chosen": -0.7632181644439697, "logps/rejected": -2.8450136184692383, "loss": 0.9374, "nll_loss": 0.9901225566864014, "rewards/accuracies": 0.875, "rewards/chosen": -0.07632181793451309, "rewards/margins": 0.2081795483827591, "rewards/rejected": -0.2845013737678528, "step": 6416 }, { "epoch": 3.91459508921763, "grad_norm": 2.196959972381592, "learning_rate": 1.7420698101653398e-06, "log_odds_chosen": 2.450350046157837, "log_odds_ratio": -0.2840479612350464, "logits/chosen": -0.9754305481910706, "logits/rejected": -1.0304404497146606, "logps/chosen": -0.9019952416419983, "logps/rejected": -2.7619972229003906, "loss": 1.0481, "nll_loss": 1.2306931018829346, "rewards/accuracies": 0.875, "rewards/chosen": -0.09019951522350311, "rewards/margins": 0.18600018322467804, "rewards/rejected": -0.27619972825050354, "step": 6417 }, { "epoch": 3.915205124294647, "grad_norm": 3.458591938018799, "learning_rate": 1.741090018371096e-06, "log_odds_chosen": 0.4471043348312378, "log_odds_ratio": -0.6014142632484436, "logits/chosen": -1.0633267164230347, "logits/rejected": -1.0660799741744995, "logps/chosen": -1.009943962097168, "logps/rejected": -1.3939764499664307, "loss": 1.0907, "nll_loss": 1.117060899734497, "rewards/accuracies": 0.625, "rewards/chosen": -0.10099439322948456, "rewards/margins": 0.03840325027704239, "rewards/rejected": -0.13939765095710754, "step": 6418 }, { "epoch": 3.9158151593716637, "grad_norm": 1.7290939092636108, "learning_rate": 1.7401102265768524e-06, "log_odds_chosen": 1.9510409832000732, "log_odds_ratio": -0.3506752848625183, "logits/chosen": -0.6847977638244629, "logits/rejected": -0.8008906841278076, "logps/chosen": -0.5858199000358582, "logps/rejected": -1.808310627937317, "loss": 1.0024, "nll_loss": 0.8104516267776489, "rewards/accuracies": 0.875, "rewards/chosen": -0.05858199670910835, "rewards/margins": 0.1222490593791008, "rewards/rejected": -0.18083107471466064, "step": 6419 }, { "epoch": 3.9164251944486805, "grad_norm": 7.284419536590576, "learning_rate": 1.7391304347826085e-06, "log_odds_chosen": 2.040872812271118, "log_odds_ratio": -0.34824684262275696, "logits/chosen": -0.9515565633773804, "logits/rejected": -0.9218952059745789, "logps/chosen": -1.0393807888031006, "logps/rejected": -2.7566781044006348, "loss": 1.1072, "nll_loss": 1.2953853607177734, "rewards/accuracies": 0.75, "rewards/chosen": -0.10393807291984558, "rewards/margins": 0.17172975838184357, "rewards/rejected": -0.27566781640052795, "step": 6420 }, { "epoch": 3.917035229525698, "grad_norm": 3.1771788597106934, "learning_rate": 1.738150642988365e-06, "log_odds_chosen": 2.341054677963257, "log_odds_ratio": -0.5473201870918274, "logits/chosen": -0.8627620935440063, "logits/rejected": -0.9650202393531799, "logps/chosen": -0.880967378616333, "logps/rejected": -2.5974647998809814, "loss": 0.9777, "nll_loss": 1.023951768875122, "rewards/accuracies": 0.75, "rewards/chosen": -0.0880967378616333, "rewards/margins": 0.1716497540473938, "rewards/rejected": -0.2597464919090271, "step": 6421 }, { "epoch": 3.9176452646027147, "grad_norm": 1.1876717805862427, "learning_rate": 1.7371708511941212e-06, "log_odds_chosen": 1.811558485031128, "log_odds_ratio": -0.3862878680229187, "logits/chosen": -0.7448402643203735, "logits/rejected": -0.8416149616241455, "logps/chosen": -0.5257051587104797, "logps/rejected": -1.9062094688415527, "loss": 0.9247, "nll_loss": 0.7898650169372559, "rewards/accuracies": 0.875, "rewards/chosen": -0.05257052183151245, "rewards/margins": 0.13805043697357178, "rewards/rejected": -0.19062095880508423, "step": 6422 }, { "epoch": 3.9182552996797315, "grad_norm": 1.6422686576843262, "learning_rate": 1.7361910593998773e-06, "log_odds_chosen": 2.966777801513672, "log_odds_ratio": -0.30903658270835876, "logits/chosen": -0.9658331871032715, "logits/rejected": -1.0380184650421143, "logps/chosen": -0.6101278066635132, "logps/rejected": -2.9888508319854736, "loss": 1.1193, "nll_loss": 0.927811861038208, "rewards/accuracies": 0.875, "rewards/chosen": -0.061012789607048035, "rewards/margins": 0.23787228763103485, "rewards/rejected": -0.2988850772380829, "step": 6423 }, { "epoch": 3.9188653347567484, "grad_norm": 1.7285022735595703, "learning_rate": 1.7352112676056337e-06, "log_odds_chosen": 1.6341632604599, "log_odds_ratio": -0.4273020029067993, "logits/chosen": -0.8832844495773315, "logits/rejected": -1.1200456619262695, "logps/chosen": -0.9446607828140259, "logps/rejected": -2.1918258666992188, "loss": 1.0381, "nll_loss": 1.0861674547195435, "rewards/accuracies": 0.75, "rewards/chosen": -0.09446608275175095, "rewards/margins": 0.12471651285886765, "rewards/rejected": -0.2191825956106186, "step": 6424 }, { "epoch": 3.9194753698337657, "grad_norm": 1.992234706878662, "learning_rate": 1.73423147581139e-06, "log_odds_chosen": 1.5433818101882935, "log_odds_ratio": -0.3860536813735962, "logits/chosen": -0.886568009853363, "logits/rejected": -1.068713665008545, "logps/chosen": -0.795419454574585, "logps/rejected": -1.9696109294891357, "loss": 1.1928, "nll_loss": 1.0660178661346436, "rewards/accuracies": 0.75, "rewards/chosen": -0.07954195141792297, "rewards/margins": 0.11741915345191956, "rewards/rejected": -0.19696110486984253, "step": 6425 }, { "epoch": 3.9200854049107825, "grad_norm": 2.227757692337036, "learning_rate": 1.7332516840171463e-06, "log_odds_chosen": 1.0370852947235107, "log_odds_ratio": -0.5969218015670776, "logits/chosen": -0.91243976354599, "logits/rejected": -0.9481526017189026, "logps/chosen": -0.7788505554199219, "logps/rejected": -1.5831674337387085, "loss": 0.9961, "nll_loss": 0.9840686321258545, "rewards/accuracies": 0.625, "rewards/chosen": -0.07788506150245667, "rewards/margins": 0.08043168485164642, "rewards/rejected": -0.1583167463541031, "step": 6426 }, { "epoch": 3.9206954399877993, "grad_norm": 1.408743977546692, "learning_rate": 1.7322718922229025e-06, "log_odds_chosen": 1.5544791221618652, "log_odds_ratio": -0.3898830711841583, "logits/chosen": -0.7660696506500244, "logits/rejected": -1.0317738056182861, "logps/chosen": -0.6441831588745117, "logps/rejected": -1.7730145454406738, "loss": 1.097, "nll_loss": 0.7926154136657715, "rewards/accuracies": 0.75, "rewards/chosen": -0.06441831588745117, "rewards/margins": 0.11288313567638397, "rewards/rejected": -0.17730145156383514, "step": 6427 }, { "epoch": 3.921305475064816, "grad_norm": 1.3644882440567017, "learning_rate": 1.731292100428659e-06, "log_odds_chosen": 2.385023593902588, "log_odds_ratio": -0.3855854868888855, "logits/chosen": -0.8453032374382019, "logits/rejected": -0.9265717267990112, "logps/chosen": -0.6994072198867798, "logps/rejected": -2.6181728839874268, "loss": 0.9625, "nll_loss": 0.8449461460113525, "rewards/accuracies": 0.875, "rewards/chosen": -0.0699407309293747, "rewards/margins": 0.19187654554843903, "rewards/rejected": -0.2618172764778137, "step": 6428 }, { "epoch": 3.921915510141833, "grad_norm": 3.982445001602173, "learning_rate": 1.7303123086344151e-06, "log_odds_chosen": 0.7469009757041931, "log_odds_ratio": -0.5728304386138916, "logits/chosen": -0.893047571182251, "logits/rejected": -0.9003766179084778, "logps/chosen": -0.8564993143081665, "logps/rejected": -1.3799717426300049, "loss": 1.1292, "nll_loss": 1.0269805192947388, "rewards/accuracies": 0.375, "rewards/chosen": -0.08564993739128113, "rewards/margins": 0.05234724283218384, "rewards/rejected": -0.13799718022346497, "step": 6429 }, { "epoch": 3.92252554521885, "grad_norm": 5.947022438049316, "learning_rate": 1.7293325168401712e-06, "log_odds_chosen": 1.4558292627334595, "log_odds_ratio": -0.3846485912799835, "logits/chosen": -0.9790021777153015, "logits/rejected": -1.0488945245742798, "logps/chosen": -0.8256874680519104, "logps/rejected": -1.968387484550476, "loss": 1.1011, "nll_loss": 1.240139365196228, "rewards/accuracies": 0.875, "rewards/chosen": -0.08256874978542328, "rewards/margins": 0.11427000164985657, "rewards/rejected": -0.19683875143527985, "step": 6430 }, { "epoch": 3.923135580295867, "grad_norm": 1.2171926498413086, "learning_rate": 1.7283527250459278e-06, "log_odds_chosen": 1.5340251922607422, "log_odds_ratio": -0.46311670541763306, "logits/chosen": -0.9488130807876587, "logits/rejected": -1.0498583316802979, "logps/chosen": -0.8751599788665771, "logps/rejected": -2.100372791290283, "loss": 1.0534, "nll_loss": 1.0297720432281494, "rewards/accuracies": 0.875, "rewards/chosen": -0.08751599490642548, "rewards/margins": 0.12252131849527359, "rewards/rejected": -0.21003732085227966, "step": 6431 }, { "epoch": 3.923745615372884, "grad_norm": 1.588998556137085, "learning_rate": 1.727372933251684e-06, "log_odds_chosen": 1.4367179870605469, "log_odds_ratio": -0.42028212547302246, "logits/chosen": -0.9037448167800903, "logits/rejected": -0.9605076313018799, "logps/chosen": -0.713280200958252, "logps/rejected": -1.6692055463790894, "loss": 0.9626, "nll_loss": 0.8541884422302246, "rewards/accuracies": 0.75, "rewards/chosen": -0.07132801413536072, "rewards/margins": 0.09559255093336105, "rewards/rejected": -0.16692057251930237, "step": 6432 }, { "epoch": 3.924355650449901, "grad_norm": 1.9114686250686646, "learning_rate": 1.7263931414574403e-06, "log_odds_chosen": 2.0426855087280273, "log_odds_ratio": -0.2725449204444885, "logits/chosen": -0.9021855592727661, "logits/rejected": -0.9518879055976868, "logps/chosen": -0.7405257821083069, "logps/rejected": -2.183558940887451, "loss": 1.0007, "nll_loss": 0.8290062546730042, "rewards/accuracies": 0.875, "rewards/chosen": -0.07405257970094681, "rewards/margins": 0.1443033069372177, "rewards/rejected": -0.21835589408874512, "step": 6433 }, { "epoch": 3.9249656855269177, "grad_norm": 1.6250962018966675, "learning_rate": 1.7254133496631964e-06, "log_odds_chosen": 2.839815139770508, "log_odds_ratio": -0.37522876262664795, "logits/chosen": -0.957200825214386, "logits/rejected": -0.9915804862976074, "logps/chosen": -0.759223461151123, "logps/rejected": -2.9956183433532715, "loss": 1.1322, "nll_loss": 1.0403393507003784, "rewards/accuracies": 0.875, "rewards/chosen": -0.07592234760522842, "rewards/margins": 0.22363951802253723, "rewards/rejected": -0.29956185817718506, "step": 6434 }, { "epoch": 3.925575720603935, "grad_norm": 2.632789373397827, "learning_rate": 1.724433557868953e-06, "log_odds_chosen": 0.664839506149292, "log_odds_ratio": -0.605826735496521, "logits/chosen": -0.7561063170433044, "logits/rejected": -0.7176695466041565, "logps/chosen": -0.7631136178970337, "logps/rejected": -1.3253214359283447, "loss": 1.1608, "nll_loss": 0.9162770509719849, "rewards/accuracies": 0.625, "rewards/chosen": -0.07631136476993561, "rewards/margins": 0.05622076988220215, "rewards/rejected": -0.13253213465213776, "step": 6435 }, { "epoch": 3.926185755680952, "grad_norm": 2.730929136276245, "learning_rate": 1.723453766074709e-06, "log_odds_chosen": 2.0906424522399902, "log_odds_ratio": -0.553894579410553, "logits/chosen": -0.7652645111083984, "logits/rejected": -0.9504318833351135, "logps/chosen": -1.2178459167480469, "logps/rejected": -3.0861406326293945, "loss": 1.1289, "nll_loss": 1.2666484117507935, "rewards/accuracies": 0.625, "rewards/chosen": -0.12178459018468857, "rewards/margins": 0.18682947754859924, "rewards/rejected": -0.308614045381546, "step": 6436 }, { "epoch": 3.9267957907579687, "grad_norm": 1.4382798671722412, "learning_rate": 1.7224739742804652e-06, "log_odds_chosen": 1.5423803329467773, "log_odds_ratio": -0.4838869869709015, "logits/chosen": -0.9848991632461548, "logits/rejected": -1.0557410717010498, "logps/chosen": -0.7634512782096863, "logps/rejected": -1.8896511793136597, "loss": 1.0703, "nll_loss": 0.8072811365127563, "rewards/accuracies": 0.625, "rewards/chosen": -0.07634513080120087, "rewards/margins": 0.11261999607086182, "rewards/rejected": -0.18896512687206268, "step": 6437 }, { "epoch": 3.9274058258349855, "grad_norm": 2.2384114265441895, "learning_rate": 1.7214941824862217e-06, "log_odds_chosen": 1.3945263624191284, "log_odds_ratio": -0.38132262229919434, "logits/chosen": -0.7067680954933167, "logits/rejected": -0.6659530401229858, "logps/chosen": -0.6594537496566772, "logps/rejected": -1.599182367324829, "loss": 1.0111, "nll_loss": 0.9075748324394226, "rewards/accuracies": 0.875, "rewards/chosen": -0.06594537198543549, "rewards/margins": 0.09397287666797638, "rewards/rejected": -0.15991824865341187, "step": 6438 }, { "epoch": 3.9280158609120024, "grad_norm": 1.6176940202713013, "learning_rate": 1.7205143906919778e-06, "log_odds_chosen": 2.0020174980163574, "log_odds_ratio": -0.410632848739624, "logits/chosen": -1.0405371189117432, "logits/rejected": -1.0552505254745483, "logps/chosen": -0.6975486278533936, "logps/rejected": -2.2735531330108643, "loss": 1.0104, "nll_loss": 0.9877440333366394, "rewards/accuracies": 0.625, "rewards/chosen": -0.06975486874580383, "rewards/margins": 0.15760046243667603, "rewards/rejected": -0.22735533118247986, "step": 6439 }, { "epoch": 3.9286258959890192, "grad_norm": 1.5462945699691772, "learning_rate": 1.7195345988977342e-06, "log_odds_chosen": 1.283149242401123, "log_odds_ratio": -0.368628591299057, "logits/chosen": -0.8006808757781982, "logits/rejected": -0.9177248477935791, "logps/chosen": -0.7431315779685974, "logps/rejected": -1.5758709907531738, "loss": 1.1204, "nll_loss": 0.8410695195198059, "rewards/accuracies": 1.0, "rewards/chosen": -0.07431316375732422, "rewards/margins": 0.08327393233776093, "rewards/rejected": -0.15758711099624634, "step": 6440 }, { "epoch": 3.929235931066036, "grad_norm": 2.4070777893066406, "learning_rate": 1.7185548071034903e-06, "log_odds_chosen": 2.446221351623535, "log_odds_ratio": -0.4725227952003479, "logits/chosen": -0.726280927658081, "logits/rejected": -0.8016948699951172, "logps/chosen": -0.8869831562042236, "logps/rejected": -2.9758548736572266, "loss": 0.8875, "nll_loss": 0.8659633994102478, "rewards/accuracies": 0.75, "rewards/chosen": -0.08869832009077072, "rewards/margins": 0.20888718962669373, "rewards/rejected": -0.29758548736572266, "step": 6441 }, { "epoch": 3.9298459661430534, "grad_norm": 2.2828733921051025, "learning_rate": 1.7175750153092468e-06, "log_odds_chosen": 1.1021265983581543, "log_odds_ratio": -0.45532912015914917, "logits/chosen": -0.8654272556304932, "logits/rejected": -0.8859419226646423, "logps/chosen": -0.7753496170043945, "logps/rejected": -1.6627291440963745, "loss": 0.9506, "nll_loss": 0.9633873701095581, "rewards/accuracies": 0.625, "rewards/chosen": -0.07753497362136841, "rewards/margins": 0.08873794972896576, "rewards/rejected": -0.16627292335033417, "step": 6442 }, { "epoch": 3.93045600122007, "grad_norm": 1.4293265342712402, "learning_rate": 1.716595223515003e-06, "log_odds_chosen": 2.9546964168548584, "log_odds_ratio": -0.2939119040966034, "logits/chosen": -0.8641697764396667, "logits/rejected": -0.9699609279632568, "logps/chosen": -0.8976958990097046, "logps/rejected": -3.513406753540039, "loss": 1.0443, "nll_loss": 0.9707245230674744, "rewards/accuracies": 1.0, "rewards/chosen": -0.08976959437131882, "rewards/margins": 0.26157110929489136, "rewards/rejected": -0.3513406813144684, "step": 6443 }, { "epoch": 3.931066036297087, "grad_norm": 1.9497109651565552, "learning_rate": 1.715615431720759e-06, "log_odds_chosen": 0.8560903072357178, "log_odds_ratio": -0.4732148051261902, "logits/chosen": -0.9195994138717651, "logits/rejected": -1.057816505432129, "logps/chosen": -0.8782444596290588, "logps/rejected": -1.5355602502822876, "loss": 1.0189, "nll_loss": 0.9751145243644714, "rewards/accuracies": 0.625, "rewards/chosen": -0.08782444894313812, "rewards/margins": 0.06573158502578735, "rewards/rejected": -0.15355603396892548, "step": 6444 }, { "epoch": 3.931676071374104, "grad_norm": 4.479532241821289, "learning_rate": 1.7146356399265156e-06, "log_odds_chosen": 3.0920116901397705, "log_odds_ratio": -0.2760464549064636, "logits/chosen": -1.0101414918899536, "logits/rejected": -1.2175542116165161, "logps/chosen": -0.7455944418907166, "logps/rejected": -3.3535683155059814, "loss": 1.0032, "nll_loss": 0.9939826726913452, "rewards/accuracies": 0.875, "rewards/chosen": -0.07455944269895554, "rewards/margins": 0.2607974112033844, "rewards/rejected": -0.33535683155059814, "step": 6445 }, { "epoch": 3.932286106451121, "grad_norm": 5.945572853088379, "learning_rate": 1.7136558481322717e-06, "log_odds_chosen": 3.212555408477783, "log_odds_ratio": -0.3330642879009247, "logits/chosen": -0.6494208574295044, "logits/rejected": -1.019648790359497, "logps/chosen": -0.622963547706604, "logps/rejected": -3.3634510040283203, "loss": 1.087, "nll_loss": 0.7020785808563232, "rewards/accuracies": 0.75, "rewards/chosen": -0.06229636073112488, "rewards/margins": 0.27404874563217163, "rewards/rejected": -0.3363451063632965, "step": 6446 }, { "epoch": 3.932896141528138, "grad_norm": 1.7755192518234253, "learning_rate": 1.712676056338028e-06, "log_odds_chosen": 1.3361982107162476, "log_odds_ratio": -0.6292057633399963, "logits/chosen": -0.8651807308197021, "logits/rejected": -0.9516744613647461, "logps/chosen": -0.8802189826965332, "logps/rejected": -1.9914672374725342, "loss": 1.1363, "nll_loss": 1.2131115198135376, "rewards/accuracies": 0.375, "rewards/chosen": -0.0880218967795372, "rewards/margins": 0.11112483590841293, "rewards/rejected": -0.19914671778678894, "step": 6447 }, { "epoch": 3.933506176605155, "grad_norm": 5.25433874130249, "learning_rate": 1.7116962645437844e-06, "log_odds_chosen": 2.7575125694274902, "log_odds_ratio": -0.23122426867485046, "logits/chosen": -0.9371581077575684, "logits/rejected": -1.0750921964645386, "logps/chosen": -0.7596876621246338, "logps/rejected": -2.9048476219177246, "loss": 0.9734, "nll_loss": 0.9824066162109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.07596876472234726, "rewards/margins": 0.21451599895954132, "rewards/rejected": -0.290484756231308, "step": 6448 }, { "epoch": 3.9341162116821717, "grad_norm": 1.7235318422317505, "learning_rate": 1.7107164727495407e-06, "log_odds_chosen": 3.1779751777648926, "log_odds_ratio": -0.21585366129875183, "logits/chosen": -0.856779932975769, "logits/rejected": -1.0985352993011475, "logps/chosen": -0.6599085927009583, "logps/rejected": -3.125429153442383, "loss": 1.1656, "nll_loss": 1.0564234256744385, "rewards/accuracies": 0.875, "rewards/chosen": -0.0659908652305603, "rewards/margins": 0.24655203521251678, "rewards/rejected": -0.3125429153442383, "step": 6449 }, { "epoch": 3.9347262467591886, "grad_norm": 2.206202745437622, "learning_rate": 1.7097366809552969e-06, "log_odds_chosen": 0.46935099363327026, "log_odds_ratio": -0.6369066834449768, "logits/chosen": -0.890448808670044, "logits/rejected": -0.9725238680839539, "logps/chosen": -1.0429450273513794, "logps/rejected": -1.4009838104248047, "loss": 1.02, "nll_loss": 1.0784244537353516, "rewards/accuracies": 0.625, "rewards/chosen": -0.10429450869560242, "rewards/margins": 0.035803865641355515, "rewards/rejected": -0.14009837806224823, "step": 6450 }, { "epoch": 3.9353362818362054, "grad_norm": 3.8519256114959717, "learning_rate": 1.708756889161053e-06, "log_odds_chosen": 2.9154467582702637, "log_odds_ratio": -0.5399802923202515, "logits/chosen": -0.7244282960891724, "logits/rejected": -0.9896461963653564, "logps/chosen": -0.9025123715400696, "logps/rejected": -3.4194371700286865, "loss": 1.1472, "nll_loss": 1.0162256956100464, "rewards/accuracies": 0.75, "rewards/chosen": -0.09025123715400696, "rewards/margins": 0.2516924738883972, "rewards/rejected": -0.34194374084472656, "step": 6451 }, { "epoch": 3.9359463169132223, "grad_norm": 1.7610719203948975, "learning_rate": 1.7077770973668095e-06, "log_odds_chosen": 4.053084373474121, "log_odds_ratio": -0.13971950113773346, "logits/chosen": -0.7399773597717285, "logits/rejected": -1.1228594779968262, "logps/chosen": -0.590316891670227, "logps/rejected": -3.7981112003326416, "loss": 0.8327, "nll_loss": 0.6559662222862244, "rewards/accuracies": 1.0, "rewards/chosen": -0.059031691402196884, "rewards/margins": 0.32077938318252563, "rewards/rejected": -0.3798111081123352, "step": 6452 }, { "epoch": 3.9365563519902396, "grad_norm": 1.964280366897583, "learning_rate": 1.7067973055725657e-06, "log_odds_chosen": 0.1280292272567749, "log_odds_ratio": -0.6756113767623901, "logits/chosen": -0.8736088871955872, "logits/rejected": -0.7802484631538391, "logps/chosen": -1.2913943529129028, "logps/rejected": -1.3881564140319824, "loss": 1.2105, "nll_loss": 1.3799959421157837, "rewards/accuracies": 0.375, "rewards/chosen": -0.12913943827152252, "rewards/margins": 0.009676208719611168, "rewards/rejected": -0.13881564140319824, "step": 6453 }, { "epoch": 3.9371663870672564, "grad_norm": 2.215261697769165, "learning_rate": 1.705817513778322e-06, "log_odds_chosen": 3.3082940578460693, "log_odds_ratio": -0.2973419427871704, "logits/chosen": -0.8825076818466187, "logits/rejected": -1.0984418392181396, "logps/chosen": -0.9810268878936768, "logps/rejected": -3.6735434532165527, "loss": 1.1507, "nll_loss": 1.0930081605911255, "rewards/accuracies": 0.75, "rewards/chosen": -0.09810268133878708, "rewards/margins": 0.26925167441368103, "rewards/rejected": -0.3673543334007263, "step": 6454 }, { "epoch": 3.9377764221442733, "grad_norm": 8.012020111083984, "learning_rate": 1.7048377219840783e-06, "log_odds_chosen": 1.2357707023620605, "log_odds_ratio": -0.4075344502925873, "logits/chosen": -0.7929729223251343, "logits/rejected": -1.0297248363494873, "logps/chosen": -0.8323349356651306, "logps/rejected": -1.6905382871627808, "loss": 1.2283, "nll_loss": 1.1974048614501953, "rewards/accuracies": 0.875, "rewards/chosen": -0.08323349058628082, "rewards/margins": 0.08582033216953278, "rewards/rejected": -0.1690538227558136, "step": 6455 }, { "epoch": 3.93838645722129, "grad_norm": 1.237793207168579, "learning_rate": 1.7038579301898347e-06, "log_odds_chosen": 4.88691520690918, "log_odds_ratio": -0.10328340530395508, "logits/chosen": -0.7219291925430298, "logits/rejected": -1.05619478225708, "logps/chosen": -0.43372613191604614, "logps/rejected": -4.236982822418213, "loss": 0.7741, "nll_loss": 0.6451989412307739, "rewards/accuracies": 1.0, "rewards/chosen": -0.043372612446546555, "rewards/margins": 0.38032567501068115, "rewards/rejected": -0.4236982762813568, "step": 6456 }, { "epoch": 3.9389964922983074, "grad_norm": 1.4081734418869019, "learning_rate": 1.7028781383955908e-06, "log_odds_chosen": 2.3021538257598877, "log_odds_ratio": -0.4382287859916687, "logits/chosen": -0.9124501943588257, "logits/rejected": -1.0429826974868774, "logps/chosen": -0.8190814852714539, "logps/rejected": -2.610159397125244, "loss": 0.9839, "nll_loss": 0.949073851108551, "rewards/accuracies": 0.75, "rewards/chosen": -0.08190815150737762, "rewards/margins": 0.17910780012607574, "rewards/rejected": -0.26101595163345337, "step": 6457 }, { "epoch": 3.9396065273753242, "grad_norm": 11.238510131835938, "learning_rate": 1.7018983466013473e-06, "log_odds_chosen": 1.0891008377075195, "log_odds_ratio": -0.4859556257724762, "logits/chosen": -0.9744599461555481, "logits/rejected": -0.9442237615585327, "logps/chosen": -0.7564789056777954, "logps/rejected": -1.4887486696243286, "loss": 1.075, "nll_loss": 1.0267095565795898, "rewards/accuracies": 0.625, "rewards/chosen": -0.07564789056777954, "rewards/margins": 0.07322697341442108, "rewards/rejected": -0.14887486398220062, "step": 6458 }, { "epoch": 3.940216562452341, "grad_norm": 1.690301775932312, "learning_rate": 1.7009185548071035e-06, "log_odds_chosen": 1.9209094047546387, "log_odds_ratio": -0.41606950759887695, "logits/chosen": -0.7906993627548218, "logits/rejected": -0.9940136075019836, "logps/chosen": -0.7112371325492859, "logps/rejected": -2.2288119792938232, "loss": 0.9118, "nll_loss": 0.8563651442527771, "rewards/accuracies": 0.625, "rewards/chosen": -0.07112370431423187, "rewards/margins": 0.15175750851631165, "rewards/rejected": -0.2228812277317047, "step": 6459 }, { "epoch": 3.940826597529358, "grad_norm": 3.4093520641326904, "learning_rate": 1.6999387630128596e-06, "log_odds_chosen": 1.8321541547775269, "log_odds_ratio": -0.3378235399723053, "logits/chosen": -0.799993634223938, "logits/rejected": -0.8953213691711426, "logps/chosen": -0.7932969927787781, "logps/rejected": -2.201536178588867, "loss": 1.2141, "nll_loss": 1.0157065391540527, "rewards/accuracies": 0.75, "rewards/chosen": -0.07932969927787781, "rewards/margins": 0.14082391560077667, "rewards/rejected": -0.2201535999774933, "step": 6460 }, { "epoch": 3.941436632606375, "grad_norm": 2.0590710639953613, "learning_rate": 1.698958971218616e-06, "log_odds_chosen": 1.5094857215881348, "log_odds_ratio": -0.39151084423065186, "logits/chosen": -0.8673933148384094, "logits/rejected": -0.9995229244232178, "logps/chosen": -0.8000035881996155, "logps/rejected": -1.9487675428390503, "loss": 1.0468, "nll_loss": 1.0143073797225952, "rewards/accuracies": 0.625, "rewards/chosen": -0.08000035583972931, "rewards/margins": 0.1148764044046402, "rewards/rejected": -0.1948767900466919, "step": 6461 }, { "epoch": 3.9420466676833916, "grad_norm": 1.1397385597229004, "learning_rate": 1.6979791794243722e-06, "log_odds_chosen": 2.395904779434204, "log_odds_ratio": -0.44046127796173096, "logits/chosen": -0.9244253635406494, "logits/rejected": -0.9971112012863159, "logps/chosen": -0.8785245418548584, "logps/rejected": -2.735229730606079, "loss": 1.0543, "nll_loss": 1.2595957517623901, "rewards/accuracies": 0.75, "rewards/chosen": -0.08785245567560196, "rewards/margins": 0.18567055463790894, "rewards/rejected": -0.2735230028629303, "step": 6462 }, { "epoch": 3.9426567027604085, "grad_norm": 1.5590622425079346, "learning_rate": 1.6969993876301286e-06, "log_odds_chosen": 3.337557077407837, "log_odds_ratio": -0.3441450595855713, "logits/chosen": -0.9399043321609497, "logits/rejected": -0.9622247219085693, "logps/chosen": -0.9099181890487671, "logps/rejected": -3.8972392082214355, "loss": 0.9367, "nll_loss": 1.182221531867981, "rewards/accuracies": 0.875, "rewards/chosen": -0.09099182486534119, "rewards/margins": 0.29873210191726685, "rewards/rejected": -0.38972392678260803, "step": 6463 }, { "epoch": 3.9432667378374258, "grad_norm": 1.5115723609924316, "learning_rate": 1.6960195958358847e-06, "log_odds_chosen": 4.340679168701172, "log_odds_ratio": -0.18244287371635437, "logits/chosen": -0.8242810964584351, "logits/rejected": -1.079956293106079, "logps/chosen": -0.692504346370697, "logps/rejected": -4.375022888183594, "loss": 1.0658, "nll_loss": 0.8287147283554077, "rewards/accuracies": 1.0, "rewards/chosen": -0.0692504346370697, "rewards/margins": 0.36825186014175415, "rewards/rejected": -0.43750232458114624, "step": 6464 }, { "epoch": 3.9438767729144426, "grad_norm": 1.6834163665771484, "learning_rate": 1.6950398040416412e-06, "log_odds_chosen": 1.153145432472229, "log_odds_ratio": -0.4484381675720215, "logits/chosen": -0.8373164534568787, "logits/rejected": -0.8022786378860474, "logps/chosen": -0.9000164270401001, "logps/rejected": -1.7684621810913086, "loss": 0.9487, "nll_loss": 1.0824000835418701, "rewards/accuracies": 0.625, "rewards/chosen": -0.09000164270401001, "rewards/margins": 0.08684457838535309, "rewards/rejected": -0.1768462359905243, "step": 6465 }, { "epoch": 3.9444868079914595, "grad_norm": 10.271461486816406, "learning_rate": 1.6940600122473974e-06, "log_odds_chosen": 1.6054539680480957, "log_odds_ratio": -0.4909498691558838, "logits/chosen": -0.8991566300392151, "logits/rejected": -1.00437593460083, "logps/chosen": -1.005957841873169, "logps/rejected": -2.1121702194213867, "loss": 1.0305, "nll_loss": 1.0493617057800293, "rewards/accuracies": 0.625, "rewards/chosen": -0.10059577971696854, "rewards/margins": 0.11062122881412506, "rewards/rejected": -0.2112170159816742, "step": 6466 }, { "epoch": 3.9450968430684763, "grad_norm": 1.8612507581710815, "learning_rate": 1.6930802204531535e-06, "log_odds_chosen": 3.176678419113159, "log_odds_ratio": -0.5252574682235718, "logits/chosen": -0.8900958299636841, "logits/rejected": -0.9785982966423035, "logps/chosen": -0.7621164321899414, "logps/rejected": -3.4751808643341064, "loss": 1.0036, "nll_loss": 0.9195825457572937, "rewards/accuracies": 0.75, "rewards/chosen": -0.07621164619922638, "rewards/margins": 0.27130645513534546, "rewards/rejected": -0.34751811623573303, "step": 6467 }, { "epoch": 3.9457068781454936, "grad_norm": 1.7307301759719849, "learning_rate": 1.69210042865891e-06, "log_odds_chosen": 0.8624329566955566, "log_odds_ratio": -0.5072250366210938, "logits/chosen": -0.9300210475921631, "logits/rejected": -0.8956590890884399, "logps/chosen": -0.8538174033164978, "logps/rejected": -1.490110158920288, "loss": 1.0488, "nll_loss": 1.2166730165481567, "rewards/accuracies": 0.75, "rewards/chosen": -0.08538173139095306, "rewards/margins": 0.06362928450107574, "rewards/rejected": -0.1490110158920288, "step": 6468 }, { "epoch": 3.9463169132225104, "grad_norm": 2.0088565349578857, "learning_rate": 1.6911206368646662e-06, "log_odds_chosen": 1.5964159965515137, "log_odds_ratio": -0.3530396521091461, "logits/chosen": -0.7539534568786621, "logits/rejected": -0.9894393682479858, "logps/chosen": -0.791030764579773, "logps/rejected": -1.8778148889541626, "loss": 1.0264, "nll_loss": 1.0407475233078003, "rewards/accuracies": 1.0, "rewards/chosen": -0.07910307496786118, "rewards/margins": 0.1086784228682518, "rewards/rejected": -0.18778149783611298, "step": 6469 }, { "epoch": 3.9469269482995273, "grad_norm": 10.674254417419434, "learning_rate": 1.6901408450704225e-06, "log_odds_chosen": 1.162880539894104, "log_odds_ratio": -0.5568519234657288, "logits/chosen": -0.7236451506614685, "logits/rejected": -0.7902132868766785, "logps/chosen": -0.7991765737533569, "logps/rejected": -1.7627968788146973, "loss": 0.965, "nll_loss": 0.8703557848930359, "rewards/accuracies": 0.625, "rewards/chosen": -0.07991765439510345, "rewards/margins": 0.09636203944683075, "rewards/rejected": -0.1762796938419342, "step": 6470 }, { "epoch": 3.947536983376544, "grad_norm": 5.844146728515625, "learning_rate": 1.6891610532761786e-06, "log_odds_chosen": 0.3683202862739563, "log_odds_ratio": -0.6506885290145874, "logits/chosen": -0.9227473735809326, "logits/rejected": -0.9555204510688782, "logps/chosen": -0.712820291519165, "logps/rejected": -0.9551482796669006, "loss": 0.9657, "nll_loss": 0.8020028471946716, "rewards/accuracies": 0.625, "rewards/chosen": -0.0712820291519165, "rewards/margins": 0.02423279732465744, "rewards/rejected": -0.09551481902599335, "step": 6471 }, { "epoch": 3.948147018453561, "grad_norm": 3.8505218029022217, "learning_rate": 1.6881812614819352e-06, "log_odds_chosen": 1.6552337408065796, "log_odds_ratio": -0.37052398920059204, "logits/chosen": -1.0116995573043823, "logits/rejected": -1.0318753719329834, "logps/chosen": -0.8403240442276001, "logps/rejected": -2.094796657562256, "loss": 1.1138, "nll_loss": 1.0903583765029907, "rewards/accuracies": 0.75, "rewards/chosen": -0.08403240144252777, "rewards/margins": 0.12544727325439453, "rewards/rejected": -0.2094796746969223, "step": 6472 }, { "epoch": 3.948757053530578, "grad_norm": 1.9512152671813965, "learning_rate": 1.6872014696876913e-06, "log_odds_chosen": 2.6536858081817627, "log_odds_ratio": -0.5408809185028076, "logits/chosen": -0.8648926615715027, "logits/rejected": -1.0020970106124878, "logps/chosen": -0.7576167583465576, "logps/rejected": -2.981137752532959, "loss": 1.1617, "nll_loss": 0.8973556160926819, "rewards/accuracies": 0.625, "rewards/chosen": -0.07576167583465576, "rewards/margins": 0.22235213220119476, "rewards/rejected": -0.2981138229370117, "step": 6473 }, { "epoch": 3.9493670886075947, "grad_norm": 3.0723984241485596, "learning_rate": 1.6862216778934474e-06, "log_odds_chosen": 2.338021755218506, "log_odds_ratio": -0.5696491003036499, "logits/chosen": -1.0820813179016113, "logits/rejected": -1.1392407417297363, "logps/chosen": -1.1429227590560913, "logps/rejected": -3.2793502807617188, "loss": 1.2861, "nll_loss": 1.329986572265625, "rewards/accuracies": 0.75, "rewards/chosen": -0.11429227888584137, "rewards/margins": 0.21364276111125946, "rewards/rejected": -0.32793503999710083, "step": 6474 }, { "epoch": 3.949977123684612, "grad_norm": 1.3399684429168701, "learning_rate": 1.685241886099204e-06, "log_odds_chosen": 1.8670774698257446, "log_odds_ratio": -0.3986485004425049, "logits/chosen": -0.8623378276824951, "logits/rejected": -0.9586631059646606, "logps/chosen": -0.7137777209281921, "logps/rejected": -2.115558624267578, "loss": 1.0791, "nll_loss": 1.0090559720993042, "rewards/accuracies": 0.75, "rewards/chosen": -0.07137776911258698, "rewards/margins": 0.14017808437347412, "rewards/rejected": -0.21155588328838348, "step": 6475 }, { "epoch": 3.950587158761629, "grad_norm": 2.109321117401123, "learning_rate": 1.68426209430496e-06, "log_odds_chosen": 1.0080499649047852, "log_odds_ratio": -0.70032799243927, "logits/chosen": -1.0733063220977783, "logits/rejected": -1.0651259422302246, "logps/chosen": -0.9782600402832031, "logps/rejected": -1.869722843170166, "loss": 1.121, "nll_loss": 1.1718101501464844, "rewards/accuracies": 0.75, "rewards/chosen": -0.09782600402832031, "rewards/margins": 0.08914628624916077, "rewards/rejected": -0.18697229027748108, "step": 6476 }, { "epoch": 3.9511971938386456, "grad_norm": 1.6953420639038086, "learning_rate": 1.6832823025107164e-06, "log_odds_chosen": 2.105630397796631, "log_odds_ratio": -0.2156398743391037, "logits/chosen": -0.8799099326133728, "logits/rejected": -1.0014746189117432, "logps/chosen": -0.677408754825592, "logps/rejected": -1.9702879190444946, "loss": 0.9688, "nll_loss": 0.9995784759521484, "rewards/accuracies": 0.875, "rewards/chosen": -0.06774087995290756, "rewards/margins": 0.12928791344165802, "rewards/rejected": -0.19702878594398499, "step": 6477 }, { "epoch": 3.9518072289156625, "grad_norm": 2.2274329662323, "learning_rate": 1.6823025107164727e-06, "log_odds_chosen": 1.9667572975158691, "log_odds_ratio": -0.4868820011615753, "logits/chosen": -0.8740701079368591, "logits/rejected": -0.941239058971405, "logps/chosen": -0.8410763740539551, "logps/rejected": -2.5276269912719727, "loss": 1.0884, "nll_loss": 1.131908893585205, "rewards/accuracies": 0.625, "rewards/chosen": -0.08410763740539551, "rewards/margins": 0.16865506768226624, "rewards/rejected": -0.25276270508766174, "step": 6478 }, { "epoch": 3.95241726399268, "grad_norm": 1.500015139579773, "learning_rate": 1.681322718922229e-06, "log_odds_chosen": 3.352640151977539, "log_odds_ratio": -0.1980191171169281, "logits/chosen": -0.7514263987541199, "logits/rejected": -0.9190003871917725, "logps/chosen": -0.5038051605224609, "logps/rejected": -3.009906053543091, "loss": 1.0303, "nll_loss": 0.7343713045120239, "rewards/accuracies": 0.875, "rewards/chosen": -0.05038051679730415, "rewards/margins": 0.2506101131439209, "rewards/rejected": -0.30099064111709595, "step": 6479 }, { "epoch": 3.9530272990696966, "grad_norm": 1.4455547332763672, "learning_rate": 1.6803429271279852e-06, "log_odds_chosen": 2.864438772201538, "log_odds_ratio": -0.38137489557266235, "logits/chosen": -0.8558527827262878, "logits/rejected": -1.1689064502716064, "logps/chosen": -0.669593334197998, "logps/rejected": -3.0385727882385254, "loss": 0.9839, "nll_loss": 0.8368843197822571, "rewards/accuracies": 0.75, "rewards/chosen": -0.06695933640003204, "rewards/margins": 0.23689793050289154, "rewards/rejected": -0.3038572669029236, "step": 6480 }, { "epoch": 3.9536373341467135, "grad_norm": 5.878086090087891, "learning_rate": 1.6793631353337413e-06, "log_odds_chosen": 4.244376182556152, "log_odds_ratio": -0.33271366357803345, "logits/chosen": -0.6643002033233643, "logits/rejected": -0.8695549964904785, "logps/chosen": -0.7533134818077087, "logps/rejected": -4.499953269958496, "loss": 0.8303, "nll_loss": 0.9352508187294006, "rewards/accuracies": 0.75, "rewards/chosen": -0.07533134520053864, "rewards/margins": 0.3746640086174011, "rewards/rejected": -0.44999533891677856, "step": 6481 }, { "epoch": 3.9542473692237303, "grad_norm": 5.170929431915283, "learning_rate": 1.6783833435394979e-06, "log_odds_chosen": 2.2227301597595215, "log_odds_ratio": -0.35064932703971863, "logits/chosen": -0.6565975546836853, "logits/rejected": -0.8258429765701294, "logps/chosen": -0.7919397354125977, "logps/rejected": -1.9997508525848389, "loss": 1.2182, "nll_loss": 1.135199785232544, "rewards/accuracies": 0.75, "rewards/chosen": -0.07919397205114365, "rewards/margins": 0.12078113108873367, "rewards/rejected": -0.19997510313987732, "step": 6482 }, { "epoch": 3.954857404300747, "grad_norm": 1.2801179885864258, "learning_rate": 1.677403551745254e-06, "log_odds_chosen": 1.0960595607757568, "log_odds_ratio": -0.46357297897338867, "logits/chosen": -0.9195277094841003, "logits/rejected": -1.0365970134735107, "logps/chosen": -1.0242037773132324, "logps/rejected": -1.841966152191162, "loss": 1.1478, "nll_loss": 1.2360048294067383, "rewards/accuracies": 0.875, "rewards/chosen": -0.1024203673005104, "rewards/margins": 0.08177626132965088, "rewards/rejected": -0.1841966211795807, "step": 6483 }, { "epoch": 3.955467439377764, "grad_norm": 1.323569655418396, "learning_rate": 1.6764237599510103e-06, "log_odds_chosen": 1.9384443759918213, "log_odds_ratio": -0.3131048083305359, "logits/chosen": -0.7865839004516602, "logits/rejected": -0.8316260576248169, "logps/chosen": -0.6311572194099426, "logps/rejected": -2.0383760929107666, "loss": 0.9675, "nll_loss": 0.9032003879547119, "rewards/accuracies": 0.875, "rewards/chosen": -0.06311571598052979, "rewards/margins": 0.1407219022512436, "rewards/rejected": -0.20383761823177338, "step": 6484 }, { "epoch": 3.9560774744547813, "grad_norm": 1.3352724313735962, "learning_rate": 1.6754439681567667e-06, "log_odds_chosen": 0.7854263782501221, "log_odds_ratio": -0.48183897137641907, "logits/chosen": -0.8483535647392273, "logits/rejected": -0.942216694355011, "logps/chosen": -0.8005278706550598, "logps/rejected": -1.3020997047424316, "loss": 1.0263, "nll_loss": 1.0601651668548584, "rewards/accuracies": 0.875, "rewards/chosen": -0.08005277812480927, "rewards/margins": 0.05015718191862106, "rewards/rejected": -0.13020996749401093, "step": 6485 }, { "epoch": 3.956687509531798, "grad_norm": 1.2007182836532593, "learning_rate": 1.674464176362523e-06, "log_odds_chosen": 2.0749175548553467, "log_odds_ratio": -0.28694969415664673, "logits/chosen": -0.9772161245346069, "logits/rejected": -0.9799054265022278, "logps/chosen": -0.988906979560852, "logps/rejected": -2.6897237300872803, "loss": 1.0045, "nll_loss": 1.1651908159255981, "rewards/accuracies": 0.875, "rewards/chosen": -0.09889069944620132, "rewards/margins": 0.17008167505264282, "rewards/rejected": -0.26897236704826355, "step": 6486 }, { "epoch": 3.957297544608815, "grad_norm": 1.4552044868469238, "learning_rate": 1.6734843845682791e-06, "log_odds_chosen": 2.247443199157715, "log_odds_ratio": -0.3297104239463806, "logits/chosen": -0.7070380449295044, "logits/rejected": -0.9410412311553955, "logps/chosen": -0.6723016500473022, "logps/rejected": -2.265263557434082, "loss": 0.9927, "nll_loss": 0.828349769115448, "rewards/accuracies": 0.75, "rewards/chosen": -0.06723016500473022, "rewards/margins": 0.1592962145805359, "rewards/rejected": -0.22652636468410492, "step": 6487 }, { "epoch": 3.957907579685832, "grad_norm": 2.4294846057891846, "learning_rate": 1.6725045927740354e-06, "log_odds_chosen": 1.718913197517395, "log_odds_ratio": -0.39131057262420654, "logits/chosen": -0.8015412092208862, "logits/rejected": -0.9618097543716431, "logps/chosen": -0.8855329751968384, "logps/rejected": -2.3118889331817627, "loss": 1.0221, "nll_loss": 1.1486554145812988, "rewards/accuracies": 0.875, "rewards/chosen": -0.0885532945394516, "rewards/margins": 0.14263558387756348, "rewards/rejected": -0.23118886351585388, "step": 6488 }, { "epoch": 3.958517614762849, "grad_norm": 1.5256983041763306, "learning_rate": 1.6715248009797918e-06, "log_odds_chosen": 1.6056594848632812, "log_odds_ratio": -0.3697957694530487, "logits/chosen": -1.131158471107483, "logits/rejected": -1.095491647720337, "logps/chosen": -0.7178999185562134, "logps/rejected": -1.8887677192687988, "loss": 0.9698, "nll_loss": 1.1675052642822266, "rewards/accuracies": 0.75, "rewards/chosen": -0.07178999483585358, "rewards/margins": 0.11708678305149078, "rewards/rejected": -0.18887677788734436, "step": 6489 }, { "epoch": 3.959127649839866, "grad_norm": 9.502069473266602, "learning_rate": 1.670545009185548e-06, "log_odds_chosen": 1.6157383918762207, "log_odds_ratio": -0.39027854800224304, "logits/chosen": -0.8170993328094482, "logits/rejected": -0.977828860282898, "logps/chosen": -0.7310560941696167, "logps/rejected": -1.8468985557556152, "loss": 0.9072, "nll_loss": 0.8783760070800781, "rewards/accuracies": 0.75, "rewards/chosen": -0.07310561090707779, "rewards/margins": 0.11158424615859985, "rewards/rejected": -0.18468984961509705, "step": 6490 }, { "epoch": 3.959737684916883, "grad_norm": 6.268126964569092, "learning_rate": 1.6695652173913042e-06, "log_odds_chosen": 2.8188629150390625, "log_odds_ratio": -0.22896315157413483, "logits/chosen": -0.6579180359840393, "logits/rejected": -0.8090970516204834, "logps/chosen": -0.6348453164100647, "logps/rejected": -2.7906646728515625, "loss": 0.8336, "nll_loss": 0.8195686936378479, "rewards/accuracies": 0.75, "rewards/chosen": -0.06348453462123871, "rewards/margins": 0.21558193862438202, "rewards/rejected": -0.2790664732456207, "step": 6491 }, { "epoch": 3.9603477199938997, "grad_norm": 1.4843255281448364, "learning_rate": 1.6685854255970606e-06, "log_odds_chosen": 0.23022623360157013, "log_odds_ratio": -0.6241950988769531, "logits/chosen": -1.0530606508255005, "logits/rejected": -0.9673587083816528, "logps/chosen": -0.9007482528686523, "logps/rejected": -1.0444302558898926, "loss": 1.1203, "nll_loss": 1.12840735912323, "rewards/accuracies": 0.5, "rewards/chosen": -0.090074822306633, "rewards/margins": 0.01436819601804018, "rewards/rejected": -0.1044430285692215, "step": 6492 }, { "epoch": 3.9609577550709165, "grad_norm": 1.8736622333526611, "learning_rate": 1.667605633802817e-06, "log_odds_chosen": 1.905060052871704, "log_odds_ratio": -0.3069080710411072, "logits/chosen": -0.8228036165237427, "logits/rejected": -0.9249194860458374, "logps/chosen": -0.7365437746047974, "logps/rejected": -2.121159553527832, "loss": 0.9389, "nll_loss": 0.9286826848983765, "rewards/accuracies": 0.875, "rewards/chosen": -0.07365438342094421, "rewards/margins": 0.13846156001091003, "rewards/rejected": -0.21211594343185425, "step": 6493 }, { "epoch": 3.9615677901479334, "grad_norm": 1.2481497526168823, "learning_rate": 1.666625842008573e-06, "log_odds_chosen": 1.2595585584640503, "log_odds_ratio": -0.5102828741073608, "logits/chosen": -0.7922433614730835, "logits/rejected": -0.7190663814544678, "logps/chosen": -0.7096644639968872, "logps/rejected": -1.801282525062561, "loss": 0.942, "nll_loss": 0.8780688047409058, "rewards/accuracies": 0.75, "rewards/chosen": -0.0709664523601532, "rewards/margins": 0.10916180163621902, "rewards/rejected": -0.18012824654579163, "step": 6494 }, { "epoch": 3.96217782522495, "grad_norm": 2.422118663787842, "learning_rate": 1.6656460502143296e-06, "log_odds_chosen": 1.513321876525879, "log_odds_ratio": -0.44278860092163086, "logits/chosen": -0.7057225704193115, "logits/rejected": -0.8295806646347046, "logps/chosen": -0.737267017364502, "logps/rejected": -1.966230034828186, "loss": 0.9503, "nll_loss": 0.9877773523330688, "rewards/accuracies": 0.625, "rewards/chosen": -0.07372669875621796, "rewards/margins": 0.12289629876613617, "rewards/rejected": -0.19662299752235413, "step": 6495 }, { "epoch": 3.9627878603019675, "grad_norm": 1.2968530654907227, "learning_rate": 1.6646662584200857e-06, "log_odds_chosen": 1.2706003189086914, "log_odds_ratio": -0.45032745599746704, "logits/chosen": -0.9004283547401428, "logits/rejected": -0.99836266040802, "logps/chosen": -0.8668960332870483, "logps/rejected": -1.8649414777755737, "loss": 1.0848, "nll_loss": 1.08915376663208, "rewards/accuracies": 0.75, "rewards/chosen": -0.08668960630893707, "rewards/margins": 0.09980454295873642, "rewards/rejected": -0.1864941567182541, "step": 6496 }, { "epoch": 3.9633978953789843, "grad_norm": 2.3782198429107666, "learning_rate": 1.6636864666258418e-06, "log_odds_chosen": 2.9742932319641113, "log_odds_ratio": -0.14253246784210205, "logits/chosen": -0.8564680814743042, "logits/rejected": -0.9489539861679077, "logps/chosen": -0.6404924392700195, "logps/rejected": -2.9590961933135986, "loss": 0.8614, "nll_loss": 0.824103832244873, "rewards/accuracies": 1.0, "rewards/chosen": -0.06404924392700195, "rewards/margins": 0.23186036944389343, "rewards/rejected": -0.2959096133708954, "step": 6497 }, { "epoch": 3.964007930456001, "grad_norm": 6.296519756317139, "learning_rate": 1.6627066748315982e-06, "log_odds_chosen": 1.1267359256744385, "log_odds_ratio": -0.6049305200576782, "logits/chosen": -0.905882716178894, "logits/rejected": -0.9529623985290527, "logps/chosen": -0.8612274527549744, "logps/rejected": -1.5965452194213867, "loss": 1.0024, "nll_loss": 1.0067224502563477, "rewards/accuracies": 0.625, "rewards/chosen": -0.08612275123596191, "rewards/margins": 0.07353176921606064, "rewards/rejected": -0.15965452790260315, "step": 6498 }, { "epoch": 3.964617965533018, "grad_norm": 2.6722638607025146, "learning_rate": 1.6617268830373545e-06, "log_odds_chosen": 1.4249827861785889, "log_odds_ratio": -0.44695404171943665, "logits/chosen": -0.833053708076477, "logits/rejected": -0.771332859992981, "logps/chosen": -0.7435228824615479, "logps/rejected": -1.903027057647705, "loss": 0.9994, "nll_loss": 0.902673602104187, "rewards/accuracies": 0.625, "rewards/chosen": -0.07435229420661926, "rewards/margins": 0.11595042794942856, "rewards/rejected": -0.19030271470546722, "step": 6499 }, { "epoch": 3.9652280006100353, "grad_norm": 1.684466004371643, "learning_rate": 1.6607470912431108e-06, "log_odds_chosen": 4.050307273864746, "log_odds_ratio": -0.2283102124929428, "logits/chosen": -0.8300092220306396, "logits/rejected": -1.0478386878967285, "logps/chosen": -0.7041335105895996, "logps/rejected": -4.093148708343506, "loss": 1.1649, "nll_loss": 1.1164398193359375, "rewards/accuracies": 0.875, "rewards/chosen": -0.07041335105895996, "rewards/margins": 0.3389015197753906, "rewards/rejected": -0.4093148708343506, "step": 6500 }, { "epoch": 3.965838035687052, "grad_norm": 5.445207118988037, "learning_rate": 1.659767299448867e-06, "log_odds_chosen": 1.4454553127288818, "log_odds_ratio": -0.4411439597606659, "logits/chosen": -1.008246660232544, "logits/rejected": -0.9376817345619202, "logps/chosen": -0.7220361828804016, "logps/rejected": -1.756120204925537, "loss": 1.0154, "nll_loss": 0.9169491529464722, "rewards/accuracies": 0.75, "rewards/chosen": -0.0722036212682724, "rewards/margins": 0.10340840369462967, "rewards/rejected": -0.17561203241348267, "step": 6501 }, { "epoch": 3.966448070764069, "grad_norm": 2.9419655799865723, "learning_rate": 1.6587875076546235e-06, "log_odds_chosen": 2.5962820053100586, "log_odds_ratio": -0.32062405347824097, "logits/chosen": -0.797816276550293, "logits/rejected": -1.077888011932373, "logps/chosen": -0.7491269111633301, "logps/rejected": -2.619338035583496, "loss": 0.9834, "nll_loss": 1.0193976163864136, "rewards/accuracies": 0.75, "rewards/chosen": -0.07491268962621689, "rewards/margins": 0.1870211511850357, "rewards/rejected": -0.261933833360672, "step": 6502 }, { "epoch": 3.967058105841086, "grad_norm": 3.0055103302001953, "learning_rate": 1.6578077158603796e-06, "log_odds_chosen": 1.4879926443099976, "log_odds_ratio": -0.44231289625167847, "logits/chosen": -0.8390421271324158, "logits/rejected": -0.8075830340385437, "logps/chosen": -0.7852871417999268, "logps/rejected": -1.9348728656768799, "loss": 1.0492, "nll_loss": 0.990352988243103, "rewards/accuracies": 0.75, "rewards/chosen": -0.07852871716022491, "rewards/margins": 0.11495858430862427, "rewards/rejected": -0.19348731637001038, "step": 6503 }, { "epoch": 3.9676681409181027, "grad_norm": 1.7790278196334839, "learning_rate": 1.6568279240661357e-06, "log_odds_chosen": 2.101724624633789, "log_odds_ratio": -0.49547725915908813, "logits/chosen": -0.7219105362892151, "logits/rejected": -0.9113402366638184, "logps/chosen": -0.8147269487380981, "logps/rejected": -2.5609564781188965, "loss": 0.9223, "nll_loss": 0.9507851600646973, "rewards/accuracies": 0.625, "rewards/chosen": -0.08147269487380981, "rewards/margins": 0.17462295293807983, "rewards/rejected": -0.25609564781188965, "step": 6504 }, { "epoch": 3.9682781759951196, "grad_norm": 1.962196707725525, "learning_rate": 1.6558481322718923e-06, "log_odds_chosen": 2.4343159198760986, "log_odds_ratio": -0.29663461446762085, "logits/chosen": -0.9403316974639893, "logits/rejected": -1.0514118671417236, "logps/chosen": -0.7861001491546631, "logps/rejected": -2.7453105449676514, "loss": 0.969, "nll_loss": 0.9957175254821777, "rewards/accuracies": 0.875, "rewards/chosen": -0.07861001789569855, "rewards/margins": 0.19592106342315674, "rewards/rejected": -0.2745310962200165, "step": 6505 }, { "epoch": 3.9688882110721364, "grad_norm": 1.6913264989852905, "learning_rate": 1.6548683404776484e-06, "log_odds_chosen": 2.8404786586761475, "log_odds_ratio": -0.18593791127204895, "logits/chosen": -0.8827958106994629, "logits/rejected": -0.944861650466919, "logps/chosen": -0.678615391254425, "logps/rejected": -2.8331246376037598, "loss": 1.0564, "nll_loss": 0.9461424350738525, "rewards/accuracies": 1.0, "rewards/chosen": -0.06786154210567474, "rewards/margins": 0.2154509276151657, "rewards/rejected": -0.28331246972084045, "step": 6506 }, { "epoch": 3.9694982461491537, "grad_norm": 1.1119502782821655, "learning_rate": 1.6538885486834047e-06, "log_odds_chosen": 2.8123726844787598, "log_odds_ratio": -0.11728187650442123, "logits/chosen": -0.9459503889083862, "logits/rejected": -1.0513650178909302, "logps/chosen": -0.6207726001739502, "logps/rejected": -2.597140312194824, "loss": 0.9978, "nll_loss": 0.9404459595680237, "rewards/accuracies": 1.0, "rewards/chosen": -0.06207726150751114, "rewards/margins": 0.19763676822185516, "rewards/rejected": -0.2597140073776245, "step": 6507 }, { "epoch": 3.9701082812261705, "grad_norm": 1.6115896701812744, "learning_rate": 1.6529087568891609e-06, "log_odds_chosen": 2.5984394550323486, "log_odds_ratio": -0.344789057970047, "logits/chosen": -0.9518656730651855, "logits/rejected": -0.9859943389892578, "logps/chosen": -0.7771555781364441, "logps/rejected": -2.8267428874969482, "loss": 1.0563, "nll_loss": 0.9501750469207764, "rewards/accuracies": 0.875, "rewards/chosen": -0.07771556079387665, "rewards/margins": 0.2049587219953537, "rewards/rejected": -0.28267428278923035, "step": 6508 }, { "epoch": 3.9707183163031874, "grad_norm": 2.333097219467163, "learning_rate": 1.6519289650949174e-06, "log_odds_chosen": 1.9505500793457031, "log_odds_ratio": -0.49949583411216736, "logits/chosen": -0.8313133716583252, "logits/rejected": -1.0547246932983398, "logps/chosen": -0.9060272574424744, "logps/rejected": -2.6080260276794434, "loss": 1.046, "nll_loss": 1.0697723627090454, "rewards/accuracies": 0.625, "rewards/chosen": -0.09060272574424744, "rewards/margins": 0.17019988596439362, "rewards/rejected": -0.26080259680747986, "step": 6509 }, { "epoch": 3.9713283513802042, "grad_norm": 9.780318260192871, "learning_rate": 1.6509491733006735e-06, "log_odds_chosen": 1.8644027709960938, "log_odds_ratio": -0.29557812213897705, "logits/chosen": -0.8717765808105469, "logits/rejected": -0.9248191118240356, "logps/chosen": -0.7595126032829285, "logps/rejected": -1.961102843284607, "loss": 1.2007, "nll_loss": 0.9989902377128601, "rewards/accuracies": 0.875, "rewards/chosen": -0.07595126330852509, "rewards/margins": 0.12015902251005173, "rewards/rejected": -0.19611026346683502, "step": 6510 }, { "epoch": 3.9719383864572215, "grad_norm": 1.4492950439453125, "learning_rate": 1.6499693815064296e-06, "log_odds_chosen": 2.3685975074768066, "log_odds_ratio": -0.14011457562446594, "logits/chosen": -0.9288349151611328, "logits/rejected": -1.0766276121139526, "logps/chosen": -0.7330060005187988, "logps/rejected": -2.524386167526245, "loss": 0.9252, "nll_loss": 1.0079985857009888, "rewards/accuracies": 1.0, "rewards/chosen": -0.07330060005187988, "rewards/margins": 0.17913803458213806, "rewards/rejected": -0.25243863463401794, "step": 6511 }, { "epoch": 3.9725484215342384, "grad_norm": 1.199983835220337, "learning_rate": 1.6489895897121862e-06, "log_odds_chosen": 1.4911307096481323, "log_odds_ratio": -0.6545583009719849, "logits/chosen": -0.7314023971557617, "logits/rejected": -0.9506216645240784, "logps/chosen": -0.9742729663848877, "logps/rejected": -2.1907660961151123, "loss": 1.1347, "nll_loss": 1.1826876401901245, "rewards/accuracies": 0.625, "rewards/chosen": -0.09742730855941772, "rewards/margins": 0.12164930254220963, "rewards/rejected": -0.21907661855220795, "step": 6512 }, { "epoch": 3.973158456611255, "grad_norm": 6.931212902069092, "learning_rate": 1.6480097979179423e-06, "log_odds_chosen": 1.8598835468292236, "log_odds_ratio": -0.46712031960487366, "logits/chosen": -0.7497972249984741, "logits/rejected": -0.7821906208992004, "logps/chosen": -0.7286853790283203, "logps/rejected": -2.179508924484253, "loss": 1.015, "nll_loss": 0.9522705078125, "rewards/accuracies": 0.75, "rewards/chosen": -0.07286854088306427, "rewards/margins": 0.14508235454559326, "rewards/rejected": -0.21795089542865753, "step": 6513 }, { "epoch": 3.973768491688272, "grad_norm": 1.6516642570495605, "learning_rate": 1.6470300061236986e-06, "log_odds_chosen": 1.555870532989502, "log_odds_ratio": -0.2892850637435913, "logits/chosen": -0.723483681678772, "logits/rejected": -1.0542806386947632, "logps/chosen": -1.0710718631744385, "logps/rejected": -2.2511496543884277, "loss": 0.9061, "nll_loss": 1.0820972919464111, "rewards/accuracies": 1.0, "rewards/chosen": -0.10710719227790833, "rewards/margins": 0.11800779402256012, "rewards/rejected": -0.22511497139930725, "step": 6514 }, { "epoch": 3.974378526765289, "grad_norm": 1.4984118938446045, "learning_rate": 1.646050214329455e-06, "log_odds_chosen": 1.930673599243164, "log_odds_ratio": -0.5061147809028625, "logits/chosen": -0.7507771253585815, "logits/rejected": -0.8666222095489502, "logps/chosen": -0.7300902605056763, "logps/rejected": -2.2979938983917236, "loss": 0.9773, "nll_loss": 0.9598838090896606, "rewards/accuracies": 0.625, "rewards/chosen": -0.07300902158021927, "rewards/margins": 0.1567903757095337, "rewards/rejected": -0.22979938983917236, "step": 6515 }, { "epoch": 3.9749885618423058, "grad_norm": 4.110848426818848, "learning_rate": 1.6450704225352113e-06, "log_odds_chosen": 1.2048633098602295, "log_odds_ratio": -0.4750147759914398, "logits/chosen": -0.7516116499900818, "logits/rejected": -0.8810285925865173, "logps/chosen": -1.0782781839370728, "logps/rejected": -2.0741734504699707, "loss": 1.2979, "nll_loss": 1.2862842082977295, "rewards/accuracies": 0.625, "rewards/chosen": -0.1078278198838234, "rewards/margins": 0.099589504301548, "rewards/rejected": -0.2074173241853714, "step": 6516 }, { "epoch": 3.9755985969193226, "grad_norm": 1.3532742261886597, "learning_rate": 1.6440906307409674e-06, "log_odds_chosen": 2.2397825717926025, "log_odds_ratio": -0.3404260575771332, "logits/chosen": -0.8041249513626099, "logits/rejected": -0.9097020030021667, "logps/chosen": -0.7420340180397034, "logps/rejected": -2.5467116832733154, "loss": 0.9852, "nll_loss": 0.8850629329681396, "rewards/accuracies": 0.75, "rewards/chosen": -0.07420340180397034, "rewards/margins": 0.18046778440475464, "rewards/rejected": -0.254671186208725, "step": 6517 }, { "epoch": 3.97620863199634, "grad_norm": 2.541494607925415, "learning_rate": 1.6431108389467236e-06, "log_odds_chosen": 1.8115183115005493, "log_odds_ratio": -0.4460849463939667, "logits/chosen": -1.0769964456558228, "logits/rejected": -0.9912651181221008, "logps/chosen": -1.1819649934768677, "logps/rejected": -2.5280189514160156, "loss": 0.9801, "nll_loss": 1.134977102279663, "rewards/accuracies": 0.875, "rewards/chosen": -0.11819649487733841, "rewards/margins": 0.13460540771484375, "rewards/rejected": -0.25280189514160156, "step": 6518 }, { "epoch": 3.9768186670733567, "grad_norm": 2.5303876399993896, "learning_rate": 1.64213104715248e-06, "log_odds_chosen": 2.2089767456054688, "log_odds_ratio": -0.629435122013092, "logits/chosen": -0.9513061046600342, "logits/rejected": -1.0211756229400635, "logps/chosen": -0.8917615413665771, "logps/rejected": -2.7885518074035645, "loss": 1.1637, "nll_loss": 1.1732796430587769, "rewards/accuracies": 0.5, "rewards/chosen": -0.08917615562677383, "rewards/margins": 0.18967902660369873, "rewards/rejected": -0.27885520458221436, "step": 6519 }, { "epoch": 3.9774287021503736, "grad_norm": 1.5189160108566284, "learning_rate": 1.6411512553582362e-06, "log_odds_chosen": 0.6412428021430969, "log_odds_ratio": -0.521996796131134, "logits/chosen": -0.7471049427986145, "logits/rejected": -0.8269814252853394, "logps/chosen": -0.6900275945663452, "logps/rejected": -1.0221624374389648, "loss": 0.9776, "nll_loss": 0.9420837759971619, "rewards/accuracies": 0.625, "rewards/chosen": -0.06900276243686676, "rewards/margins": 0.033213481307029724, "rewards/rejected": -0.10221623629331589, "step": 6520 }, { "epoch": 3.9780387372273904, "grad_norm": 1.2470475435256958, "learning_rate": 1.6401714635639926e-06, "log_odds_chosen": 4.340864658355713, "log_odds_ratio": -0.28885000944137573, "logits/chosen": -0.5905858874320984, "logits/rejected": -1.0115617513656616, "logps/chosen": -0.6467512845993042, "logps/rejected": -4.415384292602539, "loss": 1.0553, "nll_loss": 0.8494408130645752, "rewards/accuracies": 0.875, "rewards/chosen": -0.06467512249946594, "rewards/margins": 0.3768633008003235, "rewards/rejected": -0.4415384531021118, "step": 6521 }, { "epoch": 3.9786487723044077, "grad_norm": 2.2879648208618164, "learning_rate": 1.639191671769749e-06, "log_odds_chosen": 1.5769145488739014, "log_odds_ratio": -0.6178553700447083, "logits/chosen": -1.0306782722473145, "logits/rejected": -0.965469241142273, "logps/chosen": -0.7902765274047852, "logps/rejected": -2.1609089374542236, "loss": 1.0362, "nll_loss": 0.9915227890014648, "rewards/accuracies": 0.5, "rewards/chosen": -0.07902765274047852, "rewards/margins": 0.13706323504447937, "rewards/rejected": -0.21609088778495789, "step": 6522 }, { "epoch": 3.9792588073814246, "grad_norm": 3.633220911026001, "learning_rate": 1.6382118799755052e-06, "log_odds_chosen": 1.7425177097320557, "log_odds_ratio": -0.45792409777641296, "logits/chosen": -0.8975023627281189, "logits/rejected": -0.9655759334564209, "logps/chosen": -0.9254821538925171, "logps/rejected": -2.4242072105407715, "loss": 1.1546, "nll_loss": 0.9944138526916504, "rewards/accuracies": 0.75, "rewards/chosen": -0.09254822134971619, "rewards/margins": 0.14987249672412872, "rewards/rejected": -0.2424207180738449, "step": 6523 }, { "epoch": 3.9798688424584414, "grad_norm": 12.941193580627441, "learning_rate": 1.6372320881812614e-06, "log_odds_chosen": 1.9568719863891602, "log_odds_ratio": -0.3805597722530365, "logits/chosen": -0.8092025518417358, "logits/rejected": -1.0261768102645874, "logps/chosen": -0.7779644131660461, "logps/rejected": -2.2946887016296387, "loss": 0.886, "nll_loss": 0.8831806778907776, "rewards/accuracies": 0.75, "rewards/chosen": -0.07779644429683685, "rewards/margins": 0.15167242288589478, "rewards/rejected": -0.22946888208389282, "step": 6524 }, { "epoch": 3.9804788775354583, "grad_norm": 1.6665148735046387, "learning_rate": 1.6362522963870177e-06, "log_odds_chosen": 0.8770380020141602, "log_odds_ratio": -0.6642629504203796, "logits/chosen": -0.9445384740829468, "logits/rejected": -1.012765645980835, "logps/chosen": -0.9118480682373047, "logps/rejected": -1.52970552444458, "loss": 1.1228, "nll_loss": 1.308250904083252, "rewards/accuracies": 0.375, "rewards/chosen": -0.09118480235338211, "rewards/margins": 0.061785753816366196, "rewards/rejected": -0.152970552444458, "step": 6525 }, { "epoch": 3.981088912612475, "grad_norm": 1.4177920818328857, "learning_rate": 1.635272504592774e-06, "log_odds_chosen": 3.6415295600891113, "log_odds_ratio": -0.13817428052425385, "logits/chosen": -0.883643388748169, "logits/rejected": -1.144312858581543, "logps/chosen": -0.5962740182876587, "logps/rejected": -3.410827398300171, "loss": 1.0449, "nll_loss": 0.8104168176651001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05962740257382393, "rewards/margins": 0.2814553380012512, "rewards/rejected": -0.34108275175094604, "step": 6526 }, { "epoch": 3.981698947689492, "grad_norm": 2.079690933227539, "learning_rate": 1.6342927127985301e-06, "log_odds_chosen": 1.319746732711792, "log_odds_ratio": -0.5170763731002808, "logits/chosen": -0.7717633247375488, "logits/rejected": -0.8476148843765259, "logps/chosen": -0.8019803762435913, "logps/rejected": -1.7903974056243896, "loss": 0.9296, "nll_loss": 0.9177209138870239, "rewards/accuracies": 0.75, "rewards/chosen": -0.08019803464412689, "rewards/margins": 0.09884171187877655, "rewards/rejected": -0.17903976142406464, "step": 6527 }, { "epoch": 3.982308982766509, "grad_norm": 4.208745002746582, "learning_rate": 1.6333129210042865e-06, "log_odds_chosen": 3.5402400493621826, "log_odds_ratio": -0.18318580090999603, "logits/chosen": -0.8469129204750061, "logits/rejected": -1.0937988758087158, "logps/chosen": -0.8189014792442322, "logps/rejected": -3.811701774597168, "loss": 1.163, "nll_loss": 1.0586919784545898, "rewards/accuracies": 1.0, "rewards/chosen": -0.08189015090465546, "rewards/margins": 0.2992800176143646, "rewards/rejected": -0.3811701834201813, "step": 6528 }, { "epoch": 3.982919017843526, "grad_norm": 1.7967050075531006, "learning_rate": 1.6323331292100428e-06, "log_odds_chosen": 2.1284573078155518, "log_odds_ratio": -0.2866501212120056, "logits/chosen": -0.9183861613273621, "logits/rejected": -0.9044653177261353, "logps/chosen": -0.8005856275558472, "logps/rejected": -2.4607577323913574, "loss": 0.9809, "nll_loss": 0.9405631422996521, "rewards/accuracies": 1.0, "rewards/chosen": -0.08005855977535248, "rewards/margins": 0.16601720452308655, "rewards/rejected": -0.24607577919960022, "step": 6529 }, { "epoch": 3.983529052920543, "grad_norm": 1.8655325174331665, "learning_rate": 1.6313533374157991e-06, "log_odds_chosen": 0.8605142831802368, "log_odds_ratio": -0.46580472588539124, "logits/chosen": -0.976079523563385, "logits/rejected": -1.0893735885620117, "logps/chosen": -0.9188676476478577, "logps/rejected": -1.588719129562378, "loss": 0.998, "nll_loss": 1.0137406587600708, "rewards/accuracies": 0.75, "rewards/chosen": -0.09188676625490189, "rewards/margins": 0.06698516011238098, "rewards/rejected": -0.15887191891670227, "step": 6530 }, { "epoch": 3.98413908799756, "grad_norm": 5.645956993103027, "learning_rate": 1.6303735456215553e-06, "log_odds_chosen": 1.5442312955856323, "log_odds_ratio": -0.5401670336723328, "logits/chosen": -0.8833411335945129, "logits/rejected": -0.9763559699058533, "logps/chosen": -0.8985292911529541, "logps/rejected": -2.0329012870788574, "loss": 1.218, "nll_loss": 1.3380357027053833, "rewards/accuracies": 0.5, "rewards/chosen": -0.08985292911529541, "rewards/margins": 0.11343719810247421, "rewards/rejected": -0.20329011976718903, "step": 6531 }, { "epoch": 3.9847491230745766, "grad_norm": 1.303635597229004, "learning_rate": 1.6293937538273116e-06, "log_odds_chosen": 0.7066446542739868, "log_odds_ratio": -0.5982319712638855, "logits/chosen": -0.8676396608352661, "logits/rejected": -0.9099260568618774, "logps/chosen": -0.8764941692352295, "logps/rejected": -1.3728777170181274, "loss": 1.2178, "nll_loss": 0.9935601949691772, "rewards/accuracies": 0.625, "rewards/chosen": -0.08764941990375519, "rewards/margins": 0.04963836446404457, "rewards/rejected": -0.13728778064250946, "step": 6532 }, { "epoch": 3.985359158151594, "grad_norm": 1.5907610654830933, "learning_rate": 1.628413962033068e-06, "log_odds_chosen": 1.3375182151794434, "log_odds_ratio": -0.3837392330169678, "logits/chosen": -0.8561364412307739, "logits/rejected": -0.8045066595077515, "logps/chosen": -0.7022334337234497, "logps/rejected": -1.6286165714263916, "loss": 0.9968, "nll_loss": 1.1072499752044678, "rewards/accuracies": 0.75, "rewards/chosen": -0.07022334635257721, "rewards/margins": 0.09263831377029419, "rewards/rejected": -0.1628616750240326, "step": 6533 }, { "epoch": 3.9859691932286108, "grad_norm": 1.6502008438110352, "learning_rate": 1.627434170238824e-06, "log_odds_chosen": 2.5525460243225098, "log_odds_ratio": -0.2899160087108612, "logits/chosen": -0.7884823679924011, "logits/rejected": -0.8862342834472656, "logps/chosen": -0.5978986024856567, "logps/rejected": -2.351245880126953, "loss": 1.0955, "nll_loss": 1.0225712060928345, "rewards/accuracies": 0.875, "rewards/chosen": -0.059789855033159256, "rewards/margins": 0.17533473670482635, "rewards/rejected": -0.2351245880126953, "step": 6534 }, { "epoch": 3.9865792283056276, "grad_norm": 2.062819480895996, "learning_rate": 1.6264543784445806e-06, "log_odds_chosen": 1.6243350505828857, "log_odds_ratio": -0.5151851177215576, "logits/chosen": -0.9673402905464172, "logits/rejected": -1.0807514190673828, "logps/chosen": -0.7423290014266968, "logps/rejected": -2.031285047531128, "loss": 1.1083, "nll_loss": 0.7121477127075195, "rewards/accuracies": 0.625, "rewards/chosen": -0.07423290610313416, "rewards/margins": 0.1288955956697464, "rewards/rejected": -0.20312850177288055, "step": 6535 }, { "epoch": 3.9871892633826445, "grad_norm": 1.6294218301773071, "learning_rate": 1.6254745866503367e-06, "log_odds_chosen": 3.632396697998047, "log_odds_ratio": -0.11135233938694, "logits/chosen": -0.8153995871543884, "logits/rejected": -1.0554516315460205, "logps/chosen": -0.6627966165542603, "logps/rejected": -3.44496488571167, "loss": 0.9711, "nll_loss": 0.8721802234649658, "rewards/accuracies": 1.0, "rewards/chosen": -0.06627966463565826, "rewards/margins": 0.2782168388366699, "rewards/rejected": -0.344496488571167, "step": 6536 }, { "epoch": 3.9877992984596613, "grad_norm": 1.6256399154663086, "learning_rate": 1.624494794856093e-06, "log_odds_chosen": 0.7103543281555176, "log_odds_ratio": -0.5382463932037354, "logits/chosen": -0.6993162631988525, "logits/rejected": -0.7490724325180054, "logps/chosen": -0.7020251154899597, "logps/rejected": -1.2341485023498535, "loss": 0.9198, "nll_loss": 0.9601976871490479, "rewards/accuracies": 0.75, "rewards/chosen": -0.07020251452922821, "rewards/margins": 0.05321233719587326, "rewards/rejected": -0.12341485917568207, "step": 6537 }, { "epoch": 3.988409333536678, "grad_norm": 2.427737236022949, "learning_rate": 1.6235150030618492e-06, "log_odds_chosen": 0.6046104431152344, "log_odds_ratio": -0.5379113554954529, "logits/chosen": -1.0552923679351807, "logits/rejected": -1.0221666097640991, "logps/chosen": -0.8745762705802917, "logps/rejected": -1.2962183952331543, "loss": 1.1279, "nll_loss": 1.09254789352417, "rewards/accuracies": 0.625, "rewards/chosen": -0.08745762705802917, "rewards/margins": 0.042164213955402374, "rewards/rejected": -0.12962183356285095, "step": 6538 }, { "epoch": 3.9890193686136954, "grad_norm": 1.786389946937561, "learning_rate": 1.6225352112676055e-06, "log_odds_chosen": 2.504918336868286, "log_odds_ratio": -0.49768540263175964, "logits/chosen": -0.774798572063446, "logits/rejected": -0.9628793001174927, "logps/chosen": -0.7158907651901245, "logps/rejected": -2.5535550117492676, "loss": 1.1934, "nll_loss": 1.0564332008361816, "rewards/accuracies": 0.625, "rewards/chosen": -0.07158907502889633, "rewards/margins": 0.1837664246559143, "rewards/rejected": -0.25535550713539124, "step": 6539 }, { "epoch": 3.9896294036907123, "grad_norm": 2.7304699420928955, "learning_rate": 1.6215554194733618e-06, "log_odds_chosen": 1.289582371711731, "log_odds_ratio": -0.49181726574897766, "logits/chosen": -0.7965998649597168, "logits/rejected": -0.8162100315093994, "logps/chosen": -0.7472306489944458, "logps/rejected": -1.7539429664611816, "loss": 1.2468, "nll_loss": 1.0788244009017944, "rewards/accuracies": 0.75, "rewards/chosen": -0.07472305744886398, "rewards/margins": 0.10067124664783478, "rewards/rejected": -0.17539429664611816, "step": 6540 }, { "epoch": 3.990239438767729, "grad_norm": 1.9124846458435059, "learning_rate": 1.620575627679118e-06, "log_odds_chosen": 2.0858962535858154, "log_odds_ratio": -0.34224140644073486, "logits/chosen": -0.7197504639625549, "logits/rejected": -0.9324096441268921, "logps/chosen": -0.7173388004302979, "logps/rejected": -2.2950968742370605, "loss": 1.0114, "nll_loss": 0.919899582862854, "rewards/accuracies": 0.75, "rewards/chosen": -0.07173387706279755, "rewards/margins": 0.15777578949928284, "rewards/rejected": -0.22950969636440277, "step": 6541 }, { "epoch": 3.990849473844746, "grad_norm": 1.702657699584961, "learning_rate": 1.6195958358848745e-06, "log_odds_chosen": 2.678133010864258, "log_odds_ratio": -0.22059394419193268, "logits/chosen": -0.70293128490448, "logits/rejected": -1.0133987665176392, "logps/chosen": -0.5149108171463013, "logps/rejected": -2.1934926509857178, "loss": 0.9849, "nll_loss": 0.9334803223609924, "rewards/accuracies": 0.75, "rewards/chosen": -0.051491085439920425, "rewards/margins": 0.16785819828510284, "rewards/rejected": -0.21934929490089417, "step": 6542 }, { "epoch": 3.9914595089217633, "grad_norm": 2.9555370807647705, "learning_rate": 1.6186160440906306e-06, "log_odds_chosen": 0.7586965560913086, "log_odds_ratio": -0.47201234102249146, "logits/chosen": -0.9113541841506958, "logits/rejected": -0.8736618161201477, "logps/chosen": -0.9606664776802063, "logps/rejected": -1.5727319717407227, "loss": 1.1303, "nll_loss": 1.0643489360809326, "rewards/accuracies": 0.625, "rewards/chosen": -0.09606665372848511, "rewards/margins": 0.061206549406051636, "rewards/rejected": -0.15727320313453674, "step": 6543 }, { "epoch": 3.99206954399878, "grad_norm": 5.745318412780762, "learning_rate": 1.617636252296387e-06, "log_odds_chosen": 1.5467214584350586, "log_odds_ratio": -0.39361444115638733, "logits/chosen": -0.9218357801437378, "logits/rejected": -0.9659447073936462, "logps/chosen": -0.7653282880783081, "logps/rejected": -1.9430309534072876, "loss": 1.1388, "nll_loss": 1.128375768661499, "rewards/accuracies": 0.75, "rewards/chosen": -0.07653282582759857, "rewards/margins": 0.11777028441429138, "rewards/rejected": -0.19430309534072876, "step": 6544 }, { "epoch": 3.992679579075797, "grad_norm": 2.1755218505859375, "learning_rate": 1.6166564605021433e-06, "log_odds_chosen": 3.544070243835449, "log_odds_ratio": -0.3878380060195923, "logits/chosen": -0.7591018080711365, "logits/rejected": -1.0039819478988647, "logps/chosen": -0.7393350601196289, "logps/rejected": -3.7141315937042236, "loss": 0.9364, "nll_loss": 0.8323723077774048, "rewards/accuracies": 0.875, "rewards/chosen": -0.07393350452184677, "rewards/margins": 0.29747965931892395, "rewards/rejected": -0.37141314148902893, "step": 6545 }, { "epoch": 3.993289614152814, "grad_norm": 1.6196141242980957, "learning_rate": 1.6156766687078996e-06, "log_odds_chosen": 1.938688039779663, "log_odds_ratio": -0.42808565497398376, "logits/chosen": -0.8236023187637329, "logits/rejected": -0.9646337032318115, "logps/chosen": -0.8706371188163757, "logps/rejected": -2.501516819000244, "loss": 1.1018, "nll_loss": 1.144105076789856, "rewards/accuracies": 0.625, "rewards/chosen": -0.08706371486186981, "rewards/margins": 0.16308796405792236, "rewards/rejected": -0.25015169382095337, "step": 6546 }, { "epoch": 3.9938996492298307, "grad_norm": 5.0280632972717285, "learning_rate": 1.6146968769136558e-06, "log_odds_chosen": 2.6570067405700684, "log_odds_ratio": -0.32257941365242004, "logits/chosen": -0.8137732744216919, "logits/rejected": -1.0046359300613403, "logps/chosen": -0.7880387306213379, "logps/rejected": -2.935947895050049, "loss": 1.1877, "nll_loss": 1.0115138292312622, "rewards/accuracies": 0.875, "rewards/chosen": -0.07880386710166931, "rewards/margins": 0.214790940284729, "rewards/rejected": -0.2935948073863983, "step": 6547 }, { "epoch": 3.9945096843068475, "grad_norm": 1.1729985475540161, "learning_rate": 1.6137170851194119e-06, "log_odds_chosen": 1.6656489372253418, "log_odds_ratio": -0.329751193523407, "logits/chosen": -0.6934550404548645, "logits/rejected": -0.8700152635574341, "logps/chosen": -0.6262847185134888, "logps/rejected": -1.7260944843292236, "loss": 1.1102, "nll_loss": 1.0208699703216553, "rewards/accuracies": 0.875, "rewards/chosen": -0.06262847036123276, "rewards/margins": 0.10998097062110901, "rewards/rejected": -0.17260944843292236, "step": 6548 }, { "epoch": 3.9951197193838643, "grad_norm": 0.9580252766609192, "learning_rate": 1.6127372933251684e-06, "log_odds_chosen": 1.7304039001464844, "log_odds_ratio": -0.487117737531662, "logits/chosen": -0.920392632484436, "logits/rejected": -0.9958460330963135, "logps/chosen": -0.9426820278167725, "logps/rejected": -2.3245010375976562, "loss": 1.2444, "nll_loss": 1.294285774230957, "rewards/accuracies": 0.625, "rewards/chosen": -0.09426819533109665, "rewards/margins": 0.1381819248199463, "rewards/rejected": -0.23245012760162354, "step": 6549 }, { "epoch": 3.9957297544608816, "grad_norm": 1.5972710847854614, "learning_rate": 1.6117575015309246e-06, "log_odds_chosen": 1.6865414381027222, "log_odds_ratio": -0.4839814007282257, "logits/chosen": -0.6183760166168213, "logits/rejected": -0.7767460346221924, "logps/chosen": -0.6158031225204468, "logps/rejected": -1.9405874013900757, "loss": 1.0822, "nll_loss": 0.8756213188171387, "rewards/accuracies": 0.75, "rewards/chosen": -0.06158031150698662, "rewards/margins": 0.13247844576835632, "rewards/rejected": -0.19405873119831085, "step": 6550 }, { "epoch": 3.9963397895378985, "grad_norm": 1.5187517404556274, "learning_rate": 1.6107777097366809e-06, "log_odds_chosen": 1.6136980056762695, "log_odds_ratio": -0.3748243451118469, "logits/chosen": -0.7287333011627197, "logits/rejected": -0.7903589010238647, "logps/chosen": -0.6918258666992188, "logps/rejected": -1.8059875965118408, "loss": 0.8077, "nll_loss": 0.9139410853385925, "rewards/accuracies": 0.75, "rewards/chosen": -0.06918258219957352, "rewards/margins": 0.11141617596149445, "rewards/rejected": -0.18059876561164856, "step": 6551 }, { "epoch": 3.9969498246149153, "grad_norm": 1.1804344654083252, "learning_rate": 1.6097979179424372e-06, "log_odds_chosen": 2.047168254852295, "log_odds_ratio": -0.3025077283382416, "logits/chosen": -0.3743312954902649, "logits/rejected": -0.6181168556213379, "logps/chosen": -0.6400213837623596, "logps/rejected": -2.172917366027832, "loss": 0.9708, "nll_loss": 0.8979946374893188, "rewards/accuracies": 0.875, "rewards/chosen": -0.0640021413564682, "rewards/margins": 0.1532895863056183, "rewards/rejected": -0.21729174256324768, "step": 6552 }, { "epoch": 3.997559859691932, "grad_norm": 3.6194851398468018, "learning_rate": 1.6088181261481936e-06, "log_odds_chosen": 1.3495936393737793, "log_odds_ratio": -0.40306755900382996, "logits/chosen": -0.8763998746871948, "logits/rejected": -0.9919376373291016, "logps/chosen": -0.852304220199585, "logps/rejected": -1.956754446029663, "loss": 0.9744, "nll_loss": 0.9560681581497192, "rewards/accuracies": 0.75, "rewards/chosen": -0.08523042500019073, "rewards/margins": 0.11044501513242722, "rewards/rejected": -0.19567544758319855, "step": 6553 }, { "epoch": 3.9981698947689495, "grad_norm": 1.2192068099975586, "learning_rate": 1.6078383343539497e-06, "log_odds_chosen": 1.610361099243164, "log_odds_ratio": -0.47247374057769775, "logits/chosen": -0.8981380462646484, "logits/rejected": -1.0325223207473755, "logps/chosen": -0.9565356373786926, "logps/rejected": -2.25525164604187, "loss": 1.2173, "nll_loss": 1.2363520860671997, "rewards/accuracies": 0.875, "rewards/chosen": -0.09565356373786926, "rewards/margins": 0.12987159192562103, "rewards/rejected": -0.2255251705646515, "step": 6554 }, { "epoch": 3.9987799298459663, "grad_norm": 1.576083779335022, "learning_rate": 1.606858542559706e-06, "log_odds_chosen": 0.6451735496520996, "log_odds_ratio": -0.5306631326675415, "logits/chosen": -0.7574297785758972, "logits/rejected": -0.6614904403686523, "logps/chosen": -0.7063080668449402, "logps/rejected": -1.0017938613891602, "loss": 0.9748, "nll_loss": 0.909359335899353, "rewards/accuracies": 0.625, "rewards/chosen": -0.07063080370426178, "rewards/margins": 0.02954857610166073, "rewards/rejected": -0.10017938166856766, "step": 6555 }, { "epoch": 3.999389964922983, "grad_norm": 1.7639796733856201, "learning_rate": 1.6058787507654623e-06, "log_odds_chosen": 2.2493367195129395, "log_odds_ratio": -0.39592650532722473, "logits/chosen": -0.731778621673584, "logits/rejected": -0.8871036767959595, "logps/chosen": -0.683260977268219, "logps/rejected": -2.435703754425049, "loss": 1.0521, "nll_loss": 0.8247021436691284, "rewards/accuracies": 0.75, "rewards/chosen": -0.06832610070705414, "rewards/margins": 0.1752442568540573, "rewards/rejected": -0.24357038736343384, "step": 6556 }, { "epoch": 3.999389964922983, "eval_log_odds_chosen": 1.9928852319717407, "eval_log_odds_ratio": -0.40604811906814575, "eval_logits/chosen": -0.8917006254196167, "eval_logits/rejected": -0.9997873306274414, "eval_logps/chosen": -0.8243983387947083, "eval_logps/rejected": -2.4049148559570312, "eval_loss": 1.0604735612869263, "eval_nll_loss": 1.0630130767822266, "eval_rewards/accuracies": 0.7727272510528564, "eval_rewards/chosen": -0.08243982493877411, "eval_rewards/margins": 0.15805163979530334, "eval_rewards/rejected": -0.24049149453639984, "eval_runtime": 394.0007, "eval_samples_per_second": 0.997, "eval_steps_per_second": 0.168, "step": 6556 }, { "epoch": 4.0, "grad_norm": 2.0907623767852783, "learning_rate": 1.6048989589712185e-06, "log_odds_chosen": 2.953411340713501, "log_odds_ratio": -0.28790488839149475, "logits/chosen": -0.8371830582618713, "logits/rejected": -0.9786993265151978, "logps/chosen": -0.8339414000511169, "logps/rejected": -3.0775089263916016, "loss": 1.0424, "nll_loss": 0.885592520236969, "rewards/accuracies": 0.75, "rewards/chosen": -0.08339414745569229, "rewards/margins": 0.2243567556142807, "rewards/rejected": -0.3077508807182312, "step": 6557 }, { "epoch": 4.000610035077017, "grad_norm": 8.375060081481934, "learning_rate": 1.6039191671769748e-06, "log_odds_chosen": 2.358396530151367, "log_odds_ratio": -0.37708139419555664, "logits/chosen": -0.8132748007774353, "logits/rejected": -0.9405051469802856, "logps/chosen": -0.9751270413398743, "logps/rejected": -2.9101290702819824, "loss": 1.0341, "nll_loss": 1.1898064613342285, "rewards/accuracies": 0.75, "rewards/chosen": -0.09751270711421967, "rewards/margins": 0.19350020587444305, "rewards/rejected": -0.2910129129886627, "step": 6558 }, { "epoch": 4.001220070154034, "grad_norm": 1.1132665872573853, "learning_rate": 1.6029393753827311e-06, "log_odds_chosen": 1.682008147239685, "log_odds_ratio": -0.3720542788505554, "logits/chosen": -0.8995365500450134, "logits/rejected": -1.0445637702941895, "logps/chosen": -0.8453032970428467, "logps/rejected": -2.1033406257629395, "loss": 0.8911, "nll_loss": 0.8785454034805298, "rewards/accuracies": 0.75, "rewards/chosen": -0.08453033119440079, "rewards/margins": 0.12580373883247375, "rewards/rejected": -0.21033406257629395, "step": 6559 }, { "epoch": 4.0018301052310505, "grad_norm": 5.125351905822754, "learning_rate": 1.6019595835884875e-06, "log_odds_chosen": 0.6484696865081787, "log_odds_ratio": -0.6113778948783875, "logits/chosen": -0.8841331005096436, "logits/rejected": -0.9588656425476074, "logps/chosen": -0.894686222076416, "logps/rejected": -1.3669891357421875, "loss": 1.0287, "nll_loss": 1.22269868850708, "rewards/accuracies": 0.5, "rewards/chosen": -0.08946862816810608, "rewards/margins": 0.047230277210474014, "rewards/rejected": -0.136698916554451, "step": 6560 }, { "epoch": 4.002440140308067, "grad_norm": 2.2996389865875244, "learning_rate": 1.6009797917942436e-06, "log_odds_chosen": 0.9662818908691406, "log_odds_ratio": -0.7826062440872192, "logits/chosen": -0.9569730758666992, "logits/rejected": -0.8470847606658936, "logps/chosen": -0.9169429540634155, "logps/rejected": -1.792909026145935, "loss": 1.2554, "nll_loss": 1.0966219902038574, "rewards/accuracies": 0.375, "rewards/chosen": -0.09169429540634155, "rewards/margins": 0.08759661018848419, "rewards/rejected": -0.17929089069366455, "step": 6561 }, { "epoch": 4.003050175385084, "grad_norm": 1.781072735786438, "learning_rate": 1.6e-06, "log_odds_chosen": 3.7372777462005615, "log_odds_ratio": -0.295927494764328, "logits/chosen": -0.9210813045501709, "logits/rejected": -1.186258316040039, "logps/chosen": -0.7282302975654602, "logps/rejected": -4.007524490356445, "loss": 1.026, "nll_loss": 1.0539520978927612, "rewards/accuracies": 0.75, "rewards/chosen": -0.07282304018735886, "rewards/margins": 0.32792940735816956, "rewards/rejected": -0.400752454996109, "step": 6562 }, { "epoch": 4.003660210462102, "grad_norm": 1.4926859140396118, "learning_rate": 1.5990202082057563e-06, "log_odds_chosen": 2.4907257556915283, "log_odds_ratio": -0.45613688230514526, "logits/chosen": -0.8772986531257629, "logits/rejected": -0.9946883916854858, "logps/chosen": -0.8277400732040405, "logps/rejected": -2.958601474761963, "loss": 1.1234, "nll_loss": 1.0144567489624023, "rewards/accuracies": 0.75, "rewards/chosen": -0.08277400583028793, "rewards/margins": 0.21308612823486328, "rewards/rejected": -0.2958601415157318, "step": 6563 }, { "epoch": 4.004270245539119, "grad_norm": 4.272326946258545, "learning_rate": 1.5980404164115124e-06, "log_odds_chosen": 0.6934784054756165, "log_odds_ratio": -0.6485456228256226, "logits/chosen": -0.9999806880950928, "logits/rejected": -1.048803448677063, "logps/chosen": -0.8776536583900452, "logps/rejected": -1.3012940883636475, "loss": 1.1883, "nll_loss": 1.3077753782272339, "rewards/accuracies": 0.5, "rewards/chosen": -0.08776536583900452, "rewards/margins": 0.042364053428173065, "rewards/rejected": -0.13012942671775818, "step": 6564 }, { "epoch": 4.004880280616136, "grad_norm": 1.3903999328613281, "learning_rate": 1.5970606246172687e-06, "log_odds_chosen": 1.4365215301513672, "log_odds_ratio": -0.4219757616519928, "logits/chosen": -0.8002710342407227, "logits/rejected": -0.7505437135696411, "logps/chosen": -0.8245417475700378, "logps/rejected": -1.9397249221801758, "loss": 1.1689, "nll_loss": 1.1073782444000244, "rewards/accuracies": 0.875, "rewards/chosen": -0.08245417475700378, "rewards/margins": 0.11151832342147827, "rewards/rejected": -0.19397249817848206, "step": 6565 }, { "epoch": 4.0054903156931525, "grad_norm": 11.226700782775879, "learning_rate": 1.596080832823025e-06, "log_odds_chosen": 1.919335126876831, "log_odds_ratio": -0.38044291734695435, "logits/chosen": -0.7491578459739685, "logits/rejected": -0.9412313103675842, "logps/chosen": -0.6678712964057922, "logps/rejected": -2.119478225708008, "loss": 1.1323, "nll_loss": 1.0062838792800903, "rewards/accuracies": 0.875, "rewards/chosen": -0.06678712368011475, "rewards/margins": 0.1451607048511505, "rewards/rejected": -0.21194779872894287, "step": 6566 }, { "epoch": 4.006100350770169, "grad_norm": 1.287621021270752, "learning_rate": 1.5951010410287814e-06, "log_odds_chosen": 3.4123148918151855, "log_odds_ratio": -0.27912241220474243, "logits/chosen": -0.8003460168838501, "logits/rejected": -1.0719947814941406, "logps/chosen": -0.493098646402359, "logps/rejected": -3.214064598083496, "loss": 0.8916, "nll_loss": 0.9849344491958618, "rewards/accuracies": 0.875, "rewards/chosen": -0.0493098646402359, "rewards/margins": 0.2720966041088104, "rewards/rejected": -0.32140645384788513, "step": 6567 }, { "epoch": 4.006710385847186, "grad_norm": 2.1020095348358154, "learning_rate": 1.5941212492345375e-06, "log_odds_chosen": 0.6651365160942078, "log_odds_ratio": -0.5230067372322083, "logits/chosen": -0.7752726078033447, "logits/rejected": -0.8729217052459717, "logps/chosen": -0.7831908464431763, "logps/rejected": -1.2497742176055908, "loss": 0.9838, "nll_loss": 0.9211231470108032, "rewards/accuracies": 0.75, "rewards/chosen": -0.07831908762454987, "rewards/margins": 0.046658337116241455, "rewards/rejected": -0.12497741729021072, "step": 6568 }, { "epoch": 4.007320420924203, "grad_norm": 2.375526189804077, "learning_rate": 1.5931414574402938e-06, "log_odds_chosen": 2.2435154914855957, "log_odds_ratio": -0.2802794575691223, "logits/chosen": -0.9120743274688721, "logits/rejected": -0.9520002603530884, "logps/chosen": -0.8779202103614807, "logps/rejected": -2.5716075897216797, "loss": 0.9748, "nll_loss": 1.0082108974456787, "rewards/accuracies": 0.875, "rewards/chosen": -0.08779203146696091, "rewards/margins": 0.16936872899532318, "rewards/rejected": -0.2571607530117035, "step": 6569 }, { "epoch": 4.00793045600122, "grad_norm": 9.531914710998535, "learning_rate": 1.5921616656460502e-06, "log_odds_chosen": 1.045652985572815, "log_odds_ratio": -0.51607346534729, "logits/chosen": -1.0099883079528809, "logits/rejected": -1.0550025701522827, "logps/chosen": -0.9857841730117798, "logps/rejected": -1.913147211074829, "loss": 1.2054, "nll_loss": 1.0643982887268066, "rewards/accuracies": 0.625, "rewards/chosen": -0.09857841581106186, "rewards/margins": 0.09273631125688553, "rewards/rejected": -0.1913147270679474, "step": 6570 }, { "epoch": 4.008540491078237, "grad_norm": 2.1791439056396484, "learning_rate": 1.5911818738518063e-06, "log_odds_chosen": 1.4485169649124146, "log_odds_ratio": -0.32785898447036743, "logits/chosen": -1.116868257522583, "logits/rejected": -1.017590045928955, "logps/chosen": -0.704923152923584, "logps/rejected": -1.6848938465118408, "loss": 0.9769, "nll_loss": 0.8943374752998352, "rewards/accuracies": 1.0, "rewards/chosen": -0.07049231231212616, "rewards/margins": 0.09799706935882568, "rewards/rejected": -0.16848938167095184, "step": 6571 }, { "epoch": 4.009150526155254, "grad_norm": 1.7287683486938477, "learning_rate": 1.5902020820575628e-06, "log_odds_chosen": 2.316155433654785, "log_odds_ratio": -0.2845328450202942, "logits/chosen": -0.9632754921913147, "logits/rejected": -0.8907667398452759, "logps/chosen": -0.8732982873916626, "logps/rejected": -2.857389450073242, "loss": 1.0811, "nll_loss": 0.9354598522186279, "rewards/accuracies": 1.0, "rewards/chosen": -0.08732981979846954, "rewards/margins": 0.19840911030769348, "rewards/rejected": -0.2857389450073242, "step": 6572 }, { "epoch": 4.00976056123227, "grad_norm": 4.154331684112549, "learning_rate": 1.589222290263319e-06, "log_odds_chosen": 1.3828563690185547, "log_odds_ratio": -0.5423812866210938, "logits/chosen": -0.8292068839073181, "logits/rejected": -0.9637742042541504, "logps/chosen": -0.8681696653366089, "logps/rejected": -1.7884571552276611, "loss": 0.9243, "nll_loss": 0.7636179327964783, "rewards/accuracies": 0.75, "rewards/chosen": -0.08681697398424149, "rewards/margins": 0.09202874451875687, "rewards/rejected": -0.17884571850299835, "step": 6573 }, { "epoch": 4.010370596309288, "grad_norm": 14.548824310302734, "learning_rate": 1.5882424984690753e-06, "log_odds_chosen": 2.3467249870300293, "log_odds_ratio": -0.46201103925704956, "logits/chosen": -0.7177025079727173, "logits/rejected": -0.9029672145843506, "logps/chosen": -0.7875134944915771, "logps/rejected": -2.689614772796631, "loss": 1.2003, "nll_loss": 1.0068565607070923, "rewards/accuracies": 0.625, "rewards/chosen": -0.0787513479590416, "rewards/margins": 0.19021013379096985, "rewards/rejected": -0.26896148920059204, "step": 6574 }, { "epoch": 4.010980631386305, "grad_norm": 5.576543807983398, "learning_rate": 1.5872627066748314e-06, "log_odds_chosen": 0.728729248046875, "log_odds_ratio": -0.5916263461112976, "logits/chosen": -0.917014479637146, "logits/rejected": -0.9848381876945496, "logps/chosen": -0.7506357431411743, "logps/rejected": -1.2701877355575562, "loss": 1.0842, "nll_loss": 1.003357172012329, "rewards/accuracies": 0.5, "rewards/chosen": -0.07506357878446579, "rewards/margins": 0.0519552007317543, "rewards/rejected": -0.1270187795162201, "step": 6575 }, { "epoch": 4.011590666463322, "grad_norm": 5.308556079864502, "learning_rate": 1.5862829148805878e-06, "log_odds_chosen": 1.5529459714889526, "log_odds_ratio": -0.50261390209198, "logits/chosen": -0.7683023810386658, "logits/rejected": -0.933853030204773, "logps/chosen": -1.0435943603515625, "logps/rejected": -2.3497567176818848, "loss": 1.2572, "nll_loss": 1.190826654434204, "rewards/accuracies": 0.625, "rewards/chosen": -0.1043594479560852, "rewards/margins": 0.1306162327528, "rewards/rejected": -0.234975665807724, "step": 6576 }, { "epoch": 4.012200701540339, "grad_norm": 1.136444330215454, "learning_rate": 1.585303123086344e-06, "log_odds_chosen": 0.9284317493438721, "log_odds_ratio": -0.6882988810539246, "logits/chosen": -0.8830771446228027, "logits/rejected": -0.9186689853668213, "logps/chosen": -0.877072274684906, "logps/rejected": -1.5323946475982666, "loss": 1.0562, "nll_loss": 1.0556228160858154, "rewards/accuracies": 0.5, "rewards/chosen": -0.08770722150802612, "rewards/margins": 0.06553224474191666, "rewards/rejected": -0.15323947370052338, "step": 6577 }, { "epoch": 4.0128107366173555, "grad_norm": 7.430509090423584, "learning_rate": 1.5843233312921002e-06, "log_odds_chosen": 2.195977210998535, "log_odds_ratio": -0.3090668022632599, "logits/chosen": -0.9879815578460693, "logits/rejected": -0.9743732810020447, "logps/chosen": -0.639747679233551, "logps/rejected": -2.253084659576416, "loss": 1.0077, "nll_loss": 0.9490963816642761, "rewards/accuracies": 0.875, "rewards/chosen": -0.0639747679233551, "rewards/margins": 0.16133369505405426, "rewards/rejected": -0.22530846297740936, "step": 6578 }, { "epoch": 4.013420771694372, "grad_norm": 6.479709148406982, "learning_rate": 1.5833435394978568e-06, "log_odds_chosen": 1.3993721008300781, "log_odds_ratio": -0.4200039207935333, "logits/chosen": -0.9737443923950195, "logits/rejected": -1.0488793849945068, "logps/chosen": -0.78365558385849, "logps/rejected": -1.8093852996826172, "loss": 0.9835, "nll_loss": 0.8400669693946838, "rewards/accuracies": 0.625, "rewards/chosen": -0.07836556434631348, "rewards/margins": 0.1025729700922966, "rewards/rejected": -0.1809385120868683, "step": 6579 }, { "epoch": 4.014030806771389, "grad_norm": 1.6555330753326416, "learning_rate": 1.5823637477036129e-06, "log_odds_chosen": 2.2548093795776367, "log_odds_ratio": -0.5818850994110107, "logits/chosen": -0.8470103740692139, "logits/rejected": -0.9973070621490479, "logps/chosen": -0.9519174098968506, "logps/rejected": -3.0233616828918457, "loss": 1.0697, "nll_loss": 1.0290651321411133, "rewards/accuracies": 0.5, "rewards/chosen": -0.09519173949956894, "rewards/margins": 0.20714443922042847, "rewards/rejected": -0.302336186170578, "step": 6580 }, { "epoch": 4.014640841848406, "grad_norm": 1.6580427885055542, "learning_rate": 1.5813839559093692e-06, "log_odds_chosen": 0.8597671985626221, "log_odds_ratio": -0.5402889251708984, "logits/chosen": -0.9356154203414917, "logits/rejected": -0.9673333168029785, "logps/chosen": -0.8613923788070679, "logps/rejected": -1.4711687564849854, "loss": 1.0247, "nll_loss": 1.0473568439483643, "rewards/accuracies": 0.75, "rewards/chosen": -0.0861392393708229, "rewards/margins": 0.060977645218372345, "rewards/rejected": -0.14711688458919525, "step": 6581 }, { "epoch": 4.015250876925423, "grad_norm": 1.9748585224151611, "learning_rate": 1.5804041641151255e-06, "log_odds_chosen": 1.5975146293640137, "log_odds_ratio": -0.3127378821372986, "logits/chosen": -0.9264059066772461, "logits/rejected": -0.8734532594680786, "logps/chosen": -0.7896211743354797, "logps/rejected": -1.9520362615585327, "loss": 1.1106, "nll_loss": 0.9782198667526245, "rewards/accuracies": 0.875, "rewards/chosen": -0.07896212488412857, "rewards/margins": 0.11624149233102798, "rewards/rejected": -0.19520361721515656, "step": 6582 }, { "epoch": 4.01586091200244, "grad_norm": 1.5746350288391113, "learning_rate": 1.5794243723208817e-06, "log_odds_chosen": 1.4483426809310913, "log_odds_ratio": -0.38699695467948914, "logits/chosen": -0.590509295463562, "logits/rejected": -0.8143454194068909, "logps/chosen": -0.6970498561859131, "logps/rejected": -1.7211583852767944, "loss": 0.9367, "nll_loss": 0.7383268475532532, "rewards/accuracies": 0.75, "rewards/chosen": -0.06970498710870743, "rewards/margins": 0.10241086781024933, "rewards/rejected": -0.17211584746837616, "step": 6583 }, { "epoch": 4.0164709470794575, "grad_norm": 2.1264138221740723, "learning_rate": 1.578444580526638e-06, "log_odds_chosen": 2.9458131790161133, "log_odds_ratio": -0.3400367200374603, "logits/chosen": -0.8793479204177856, "logits/rejected": -1.0301283597946167, "logps/chosen": -0.7661517858505249, "logps/rejected": -3.2448320388793945, "loss": 1.1165, "nll_loss": 1.0495167970657349, "rewards/accuracies": 0.75, "rewards/chosen": -0.07661518454551697, "rewards/margins": 0.24786800146102905, "rewards/rejected": -0.324483186006546, "step": 6584 }, { "epoch": 4.017080982156474, "grad_norm": 1.0485014915466309, "learning_rate": 1.5774647887323941e-06, "log_odds_chosen": 0.41989144682884216, "log_odds_ratio": -0.7428184747695923, "logits/chosen": -0.8490955829620361, "logits/rejected": -0.9734706282615662, "logps/chosen": -0.9598457217216492, "logps/rejected": -1.2630701065063477, "loss": 1.0335, "nll_loss": 1.0250033140182495, "rewards/accuracies": 0.625, "rewards/chosen": -0.0959845781326294, "rewards/margins": 0.030322430655360222, "rewards/rejected": -0.12630701065063477, "step": 6585 }, { "epoch": 4.017691017233491, "grad_norm": 1.6295723915100098, "learning_rate": 1.5764849969381507e-06, "log_odds_chosen": 3.292361259460449, "log_odds_ratio": -0.2572862207889557, "logits/chosen": -1.1064815521240234, "logits/rejected": -1.1313210725784302, "logps/chosen": -0.879054844379425, "logps/rejected": -3.6839799880981445, "loss": 1.1743, "nll_loss": 1.21443772315979, "rewards/accuracies": 0.875, "rewards/chosen": -0.08790548890829086, "rewards/margins": 0.28049251437187195, "rewards/rejected": -0.3683980107307434, "step": 6586 }, { "epoch": 4.018301052310508, "grad_norm": 1.3971080780029297, "learning_rate": 1.5755052051439068e-06, "log_odds_chosen": 0.2718980014324188, "log_odds_ratio": -0.6071526408195496, "logits/chosen": -1.0612233877182007, "logits/rejected": -1.067768931388855, "logps/chosen": -1.0619490146636963, "logps/rejected": -1.2319920063018799, "loss": 1.2042, "nll_loss": 1.2498220205307007, "rewards/accuracies": 0.625, "rewards/chosen": -0.10619490593671799, "rewards/margins": 0.017004288733005524, "rewards/rejected": -0.12319918721914291, "step": 6587 }, { "epoch": 4.018911087387525, "grad_norm": 1.274553894996643, "learning_rate": 1.5745254133496631e-06, "log_odds_chosen": 0.6922887563705444, "log_odds_ratio": -0.688683271408081, "logits/chosen": -1.0231010913848877, "logits/rejected": -1.034360647201538, "logps/chosen": -0.9888918995857239, "logps/rejected": -1.5972893238067627, "loss": 1.2565, "nll_loss": 1.1441938877105713, "rewards/accuracies": 0.5, "rewards/chosen": -0.09888918697834015, "rewards/margins": 0.060839734971523285, "rewards/rejected": -0.15972891449928284, "step": 6588 }, { "epoch": 4.019521122464542, "grad_norm": 1.5814448595046997, "learning_rate": 1.5735456215554195e-06, "log_odds_chosen": 0.29193437099456787, "log_odds_ratio": -0.6118738055229187, "logits/chosen": -1.0209567546844482, "logits/rejected": -0.9856278300285339, "logps/chosen": -0.7025655508041382, "logps/rejected": -0.8897756338119507, "loss": 1.0918, "nll_loss": 1.1749165058135986, "rewards/accuracies": 0.625, "rewards/chosen": -0.0702565610408783, "rewards/margins": 0.018721016123890877, "rewards/rejected": -0.08897757530212402, "step": 6589 }, { "epoch": 4.020131157541559, "grad_norm": 1.4714336395263672, "learning_rate": 1.5725658297611756e-06, "log_odds_chosen": 0.732754647731781, "log_odds_ratio": -0.5020875334739685, "logits/chosen": -0.7992731928825378, "logits/rejected": -0.8229285478591919, "logps/chosen": -0.6637112498283386, "logps/rejected": -1.082506775856018, "loss": 1.0309, "nll_loss": 0.8153272867202759, "rewards/accuracies": 0.625, "rewards/chosen": -0.0663711279630661, "rewards/margins": 0.0418795570731163, "rewards/rejected": -0.1082506850361824, "step": 6590 }, { "epoch": 4.020741192618575, "grad_norm": 1.214949369430542, "learning_rate": 1.571586037966932e-06, "log_odds_chosen": 0.4870702028274536, "log_odds_ratio": -0.7117264866828918, "logits/chosen": -0.9769250154495239, "logits/rejected": -1.1208800077438354, "logps/chosen": -1.2214531898498535, "logps/rejected": -1.646430492401123, "loss": 1.2381, "nll_loss": 1.4829192161560059, "rewards/accuracies": 0.375, "rewards/chosen": -0.12214533239603043, "rewards/margins": 0.042497724294662476, "rewards/rejected": -0.1646430492401123, "step": 6591 }, { "epoch": 4.021351227695592, "grad_norm": 1.4354119300842285, "learning_rate": 1.5706062461726883e-06, "log_odds_chosen": 1.5113294124603271, "log_odds_ratio": -0.5456035137176514, "logits/chosen": -1.1023287773132324, "logits/rejected": -1.0583289861679077, "logps/chosen": -1.1394999027252197, "logps/rejected": -2.503901958465576, "loss": 1.1962, "nll_loss": 1.3391202688217163, "rewards/accuracies": 0.625, "rewards/chosen": -0.1139499843120575, "rewards/margins": 0.1364402025938034, "rewards/rejected": -0.2503902018070221, "step": 6592 }, { "epoch": 4.021961262772609, "grad_norm": 1.2898598909378052, "learning_rate": 1.5696264543784446e-06, "log_odds_chosen": 1.564618706703186, "log_odds_ratio": -0.6460772752761841, "logits/chosen": -0.768916666507721, "logits/rejected": -0.8621902465820312, "logps/chosen": -0.8458014726638794, "logps/rejected": -2.1256117820739746, "loss": 1.0384, "nll_loss": 0.9673036932945251, "rewards/accuracies": 0.375, "rewards/chosen": -0.08458014577627182, "rewards/margins": 0.1279810220003128, "rewards/rejected": -0.21256116032600403, "step": 6593 }, { "epoch": 4.022571297849626, "grad_norm": 2.439742088317871, "learning_rate": 1.5686466625842007e-06, "log_odds_chosen": 2.2004313468933105, "log_odds_ratio": -0.4344434440135956, "logits/chosen": -0.852135181427002, "logits/rejected": -0.973086953163147, "logps/chosen": -0.8048949241638184, "logps/rejected": -2.588428497314453, "loss": 1.0372, "nll_loss": 1.0202038288116455, "rewards/accuracies": 0.75, "rewards/chosen": -0.08048949390649796, "rewards/margins": 0.17835333943367004, "rewards/rejected": -0.2588428556919098, "step": 6594 }, { "epoch": 4.023181332926644, "grad_norm": 1.1907559633255005, "learning_rate": 1.567666870789957e-06, "log_odds_chosen": 2.179992198944092, "log_odds_ratio": -0.3350939452648163, "logits/chosen": -0.5194346308708191, "logits/rejected": -0.9019607305526733, "logps/chosen": -0.5378000140190125, "logps/rejected": -2.0719332695007324, "loss": 0.7714, "nll_loss": 0.7519904971122742, "rewards/accuracies": 0.75, "rewards/chosen": -0.053780004382133484, "rewards/margins": 0.153413325548172, "rewards/rejected": -0.20719334483146667, "step": 6595 }, { "epoch": 4.0237913680036606, "grad_norm": 1.0420544147491455, "learning_rate": 1.5666870789957134e-06, "log_odds_chosen": 1.0001041889190674, "log_odds_ratio": -0.4978935122489929, "logits/chosen": -1.0553529262542725, "logits/rejected": -1.0999892950057983, "logps/chosen": -0.8630234003067017, "logps/rejected": -1.515689730644226, "loss": 0.996, "nll_loss": 0.8706523776054382, "rewards/accuracies": 0.75, "rewards/chosen": -0.08630234003067017, "rewards/margins": 0.06526662409305573, "rewards/rejected": -0.15156897902488708, "step": 6596 }, { "epoch": 4.024401403080677, "grad_norm": 1.8679016828536987, "learning_rate": 1.5657072872014695e-06, "log_odds_chosen": 0.1415850818157196, "log_odds_ratio": -0.728595495223999, "logits/chosen": -0.9711980819702148, "logits/rejected": -1.001173496246338, "logps/chosen": -1.0076810121536255, "logps/rejected": -1.0851492881774902, "loss": 1.1051, "nll_loss": 1.1472383737564087, "rewards/accuracies": 0.5, "rewards/chosen": -0.10076810419559479, "rewards/margins": 0.007746828719973564, "rewards/rejected": -0.1085149347782135, "step": 6597 }, { "epoch": 4.025011438157694, "grad_norm": 1.8036503791809082, "learning_rate": 1.5647274954072258e-06, "log_odds_chosen": 3.242932081222534, "log_odds_ratio": -0.17903627455234528, "logits/chosen": -0.7812041640281677, "logits/rejected": -1.042513370513916, "logps/chosen": -0.5682421326637268, "logps/rejected": -2.9385857582092285, "loss": 1.0756, "nll_loss": 0.8082423806190491, "rewards/accuracies": 1.0, "rewards/chosen": -0.0568242147564888, "rewards/margins": 0.2370343655347824, "rewards/rejected": -0.2938585579395294, "step": 6598 }, { "epoch": 4.025621473234711, "grad_norm": 1.338722586631775, "learning_rate": 1.5637477036129822e-06, "log_odds_chosen": 3.6723787784576416, "log_odds_ratio": -0.20588169991970062, "logits/chosen": -0.817295253276825, "logits/rejected": -1.0156655311584473, "logps/chosen": -0.6315599679946899, "logps/rejected": -3.5048582553863525, "loss": 0.8332, "nll_loss": 0.9071654677391052, "rewards/accuracies": 1.0, "rewards/chosen": -0.06315600126981735, "rewards/margins": 0.2873298227787018, "rewards/rejected": -0.35048583149909973, "step": 6599 }, { "epoch": 4.026231508311728, "grad_norm": 8.015647888183594, "learning_rate": 1.5627679118187385e-06, "log_odds_chosen": -0.35180896520614624, "log_odds_ratio": -1.1393163204193115, "logits/chosen": -0.8604182600975037, "logits/rejected": -0.9098829030990601, "logps/chosen": -1.3637254238128662, "logps/rejected": -1.0883277654647827, "loss": 1.004, "nll_loss": 1.2120370864868164, "rewards/accuracies": 0.375, "rewards/chosen": -0.13637253642082214, "rewards/margins": -0.027539759874343872, "rewards/rejected": -0.10883278399705887, "step": 6600 }, { "epoch": 4.026841543388745, "grad_norm": 6.577837944030762, "learning_rate": 1.5617881200244946e-06, "log_odds_chosen": 1.7114133834838867, "log_odds_ratio": -0.244846910238266, "logits/chosen": -0.909313976764679, "logits/rejected": -0.8707248568534851, "logps/chosen": -0.7726707458496094, "logps/rejected": -1.9937665462493896, "loss": 1.0304, "nll_loss": 1.163877010345459, "rewards/accuracies": 1.0, "rewards/chosen": -0.07726708054542542, "rewards/margins": 0.12210959941148758, "rewards/rejected": -0.1993766725063324, "step": 6601 }, { "epoch": 4.027451578465762, "grad_norm": 3.3705341815948486, "learning_rate": 1.5608083282302512e-06, "log_odds_chosen": 1.6142860651016235, "log_odds_ratio": -0.33169978857040405, "logits/chosen": -1.0341650247573853, "logits/rejected": -1.100875973701477, "logps/chosen": -0.7378487586975098, "logps/rejected": -1.9507837295532227, "loss": 1.1618, "nll_loss": 1.008032202720642, "rewards/accuracies": 0.875, "rewards/chosen": -0.07378487288951874, "rewards/margins": 0.12129350751638412, "rewards/rejected": -0.19507837295532227, "step": 6602 }, { "epoch": 4.0280616135427785, "grad_norm": 1.8547165393829346, "learning_rate": 1.5598285364360073e-06, "log_odds_chosen": 1.2261828184127808, "log_odds_ratio": -0.4452695846557617, "logits/chosen": -0.9349291324615479, "logits/rejected": -1.0360404253005981, "logps/chosen": -0.8073174357414246, "logps/rejected": -1.7020610570907593, "loss": 1.0554, "nll_loss": 1.129982352256775, "rewards/accuracies": 0.75, "rewards/chosen": -0.08073174953460693, "rewards/margins": 0.08947435766458511, "rewards/rejected": -0.17020609974861145, "step": 6603 }, { "epoch": 4.028671648619795, "grad_norm": 1.4385710954666138, "learning_rate": 1.5588487446417636e-06, "log_odds_chosen": 3.2354931831359863, "log_odds_ratio": -0.3612917959690094, "logits/chosen": -0.8308807015419006, "logits/rejected": -0.9574699997901917, "logps/chosen": -0.6472235918045044, "logps/rejected": -3.2791194915771484, "loss": 0.9957, "nll_loss": 0.8618532419204712, "rewards/accuracies": 0.75, "rewards/chosen": -0.06472235918045044, "rewards/margins": 0.2631896138191223, "rewards/rejected": -0.32791194319725037, "step": 6604 }, { "epoch": 4.029281683696812, "grad_norm": 2.0504775047302246, "learning_rate": 1.5578689528475197e-06, "log_odds_chosen": 2.178760051727295, "log_odds_ratio": -0.3165220618247986, "logits/chosen": -0.8690402507781982, "logits/rejected": -0.9577478170394897, "logps/chosen": -0.7783223390579224, "logps/rejected": -2.487387180328369, "loss": 0.9829, "nll_loss": 0.8948155641555786, "rewards/accuracies": 0.875, "rewards/chosen": -0.07783223688602448, "rewards/margins": 0.17090648412704468, "rewards/rejected": -0.24873872101306915, "step": 6605 }, { "epoch": 4.02989171877383, "grad_norm": 1.3003530502319336, "learning_rate": 1.556889161053276e-06, "log_odds_chosen": 3.6642684936523438, "log_odds_ratio": -0.13702714443206787, "logits/chosen": -0.8424249887466431, "logits/rejected": -1.1342706680297852, "logps/chosen": -0.6735482811927795, "logps/rejected": -3.6352458000183105, "loss": 0.8948, "nll_loss": 0.9376647472381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.06735483556985855, "rewards/margins": 0.2961697578430176, "rewards/rejected": -0.36352458596229553, "step": 6606 }, { "epoch": 4.030501753850847, "grad_norm": 3.227205514907837, "learning_rate": 1.5559093692590324e-06, "log_odds_chosen": 2.856670618057251, "log_odds_ratio": -0.2019321322441101, "logits/chosen": -0.9713517427444458, "logits/rejected": -1.092249870300293, "logps/chosen": -0.5005636215209961, "logps/rejected": -2.608100652694702, "loss": 1.0991, "nll_loss": 1.1747004985809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.05005636066198349, "rewards/margins": 0.21075370907783508, "rewards/rejected": -0.2608100473880768, "step": 6607 }, { "epoch": 4.031111788927864, "grad_norm": 5.014383792877197, "learning_rate": 1.5549295774647885e-06, "log_odds_chosen": 1.3643261194229126, "log_odds_ratio": -0.48251795768737793, "logits/chosen": -0.999143123626709, "logits/rejected": -0.8685816526412964, "logps/chosen": -0.8163226842880249, "logps/rejected": -1.708069920539856, "loss": 1.015, "nll_loss": 1.0211460590362549, "rewards/accuracies": 0.5, "rewards/chosen": -0.08163227140903473, "rewards/margins": 0.08917471766471863, "rewards/rejected": -0.17080698907375336, "step": 6608 }, { "epoch": 4.03172182400488, "grad_norm": 9.069757461547852, "learning_rate": 1.553949785670545e-06, "log_odds_chosen": 1.901531457901001, "log_odds_ratio": -0.2533425986766815, "logits/chosen": -0.9684321284294128, "logits/rejected": -1.1120399236679077, "logps/chosen": -0.7178345918655396, "logps/rejected": -2.0926289558410645, "loss": 0.9448, "nll_loss": 0.8265780210494995, "rewards/accuracies": 1.0, "rewards/chosen": -0.07178346067667007, "rewards/margins": 0.13747945427894592, "rewards/rejected": -0.2092629075050354, "step": 6609 }, { "epoch": 4.032331859081897, "grad_norm": 7.001032829284668, "learning_rate": 1.5529699938763012e-06, "log_odds_chosen": 2.261631727218628, "log_odds_ratio": -0.30699124932289124, "logits/chosen": -0.9000585079193115, "logits/rejected": -1.0324779748916626, "logps/chosen": -0.9150793552398682, "logps/rejected": -2.8214242458343506, "loss": 1.0262, "nll_loss": 1.0761497020721436, "rewards/accuracies": 0.875, "rewards/chosen": -0.0915079414844513, "rewards/margins": 0.19063448905944824, "rewards/rejected": -0.28214243054389954, "step": 6610 }, { "epoch": 4.032941894158914, "grad_norm": 3.9813356399536133, "learning_rate": 1.5519902020820575e-06, "log_odds_chosen": 1.8794493675231934, "log_odds_ratio": -0.435468852519989, "logits/chosen": -0.7988013029098511, "logits/rejected": -0.7029168605804443, "logps/chosen": -0.6906683444976807, "logps/rejected": -2.0953822135925293, "loss": 0.9587, "nll_loss": 0.88526850938797, "rewards/accuracies": 0.75, "rewards/chosen": -0.06906682997941971, "rewards/margins": 0.14047139883041382, "rewards/rejected": -0.20953822135925293, "step": 6611 }, { "epoch": 4.033551929235931, "grad_norm": 1.737838864326477, "learning_rate": 1.5510104102878139e-06, "log_odds_chosen": 1.5990442037582397, "log_odds_ratio": -0.5287338495254517, "logits/chosen": -0.8749207258224487, "logits/rejected": -0.8635392785072327, "logps/chosen": -0.9264326095581055, "logps/rejected": -2.099353075027466, "loss": 1.252, "nll_loss": 1.1812782287597656, "rewards/accuracies": 0.75, "rewards/chosen": -0.09264326095581055, "rewards/margins": 0.11729203164577484, "rewards/rejected": -0.20993530750274658, "step": 6612 }, { "epoch": 4.034161964312948, "grad_norm": 2.18111515045166, "learning_rate": 1.55003061849357e-06, "log_odds_chosen": 2.892594337463379, "log_odds_ratio": -0.3716273009777069, "logits/chosen": -0.7950903177261353, "logits/rejected": -0.9712989926338196, "logps/chosen": -0.7664053440093994, "logps/rejected": -2.8970625400543213, "loss": 1.0382, "nll_loss": 1.246720314025879, "rewards/accuracies": 0.625, "rewards/chosen": -0.0766405314207077, "rewards/margins": 0.2130657285451889, "rewards/rejected": -0.2897062599658966, "step": 6613 }, { "epoch": 4.034771999389965, "grad_norm": 8.399306297302246, "learning_rate": 1.5490508266993263e-06, "log_odds_chosen": 1.8374842405319214, "log_odds_ratio": -0.36226311326026917, "logits/chosen": -0.8287301063537598, "logits/rejected": -1.062506914138794, "logps/chosen": -0.7394709587097168, "logps/rejected": -1.9892370700836182, "loss": 1.0772, "nll_loss": 0.8287187218666077, "rewards/accuracies": 0.875, "rewards/chosen": -0.07394709438085556, "rewards/margins": 0.12497663497924805, "rewards/rejected": -0.198923721909523, "step": 6614 }, { "epoch": 4.0353820344669815, "grad_norm": 1.5679923295974731, "learning_rate": 1.5480710349050825e-06, "log_odds_chosen": 1.3146625757217407, "log_odds_ratio": -0.5152701139450073, "logits/chosen": -0.763128399848938, "logits/rejected": -0.8162908554077148, "logps/chosen": -0.6194894313812256, "logps/rejected": -1.6083489656448364, "loss": 0.9545, "nll_loss": 1.0151697397232056, "rewards/accuracies": 0.625, "rewards/chosen": -0.06194894015789032, "rewards/margins": 0.09888595342636108, "rewards/rejected": -0.1608348935842514, "step": 6615 }, { "epoch": 4.035992069543998, "grad_norm": 1.304121732711792, "learning_rate": 1.547091243110839e-06, "log_odds_chosen": 3.126904249191284, "log_odds_ratio": -0.32456129789352417, "logits/chosen": -1.0155853033065796, "logits/rejected": -1.1194097995758057, "logps/chosen": -0.7763371467590332, "logps/rejected": -3.3746674060821533, "loss": 0.9743, "nll_loss": 1.0237617492675781, "rewards/accuracies": 0.875, "rewards/chosen": -0.07763370871543884, "rewards/margins": 0.2598330080509186, "rewards/rejected": -0.3374667465686798, "step": 6616 }, { "epoch": 4.036602104621016, "grad_norm": 1.3829110860824585, "learning_rate": 1.5461114513165951e-06, "log_odds_chosen": 2.3666155338287354, "log_odds_ratio": -0.32307174801826477, "logits/chosen": -0.8957592248916626, "logits/rejected": -1.036661982536316, "logps/chosen": -0.6815657615661621, "logps/rejected": -2.5161962509155273, "loss": 1.1071, "nll_loss": 0.870663583278656, "rewards/accuracies": 0.875, "rewards/chosen": -0.06815657764673233, "rewards/margins": 0.18346303701400757, "rewards/rejected": -0.2516196072101593, "step": 6617 }, { "epoch": 4.037212139698033, "grad_norm": 1.6560684442520142, "learning_rate": 1.5451316595223515e-06, "log_odds_chosen": 1.0710303783416748, "log_odds_ratio": -0.4551224410533905, "logits/chosen": -1.0390256643295288, "logits/rejected": -1.0671709775924683, "logps/chosen": -0.943900465965271, "logps/rejected": -1.7821919918060303, "loss": 0.9503, "nll_loss": 1.0333147048950195, "rewards/accuracies": 0.75, "rewards/chosen": -0.09439004957675934, "rewards/margins": 0.08382915705442429, "rewards/rejected": -0.17821919918060303, "step": 6618 }, { "epoch": 4.03782217477505, "grad_norm": 3.6546878814697266, "learning_rate": 1.5441518677281078e-06, "log_odds_chosen": 2.1519198417663574, "log_odds_ratio": -0.5095921754837036, "logits/chosen": -0.9690662622451782, "logits/rejected": -1.0845530033111572, "logps/chosen": -0.8025269508361816, "logps/rejected": -2.2538161277770996, "loss": 0.996, "nll_loss": 1.0105963945388794, "rewards/accuracies": 0.625, "rewards/chosen": -0.08025269210338593, "rewards/margins": 0.14512893557548523, "rewards/rejected": -0.22538162767887115, "step": 6619 }, { "epoch": 4.038432209852067, "grad_norm": 2.673706531524658, "learning_rate": 1.543172075933864e-06, "log_odds_chosen": 0.6583520770072937, "log_odds_ratio": -0.5005385279655457, "logits/chosen": -0.9813767671585083, "logits/rejected": -0.9498317837715149, "logps/chosen": -0.8302823305130005, "logps/rejected": -1.3340028524398804, "loss": 1.1594, "nll_loss": 1.033279538154602, "rewards/accuracies": 0.75, "rewards/chosen": -0.08302823454141617, "rewards/margins": 0.050372056663036346, "rewards/rejected": -0.13340029120445251, "step": 6620 }, { "epoch": 4.0390422449290835, "grad_norm": 1.212684154510498, "learning_rate": 1.5421922841396202e-06, "log_odds_chosen": 1.3493900299072266, "log_odds_ratio": -0.49091583490371704, "logits/chosen": -0.8745649456977844, "logits/rejected": -0.8961458802223206, "logps/chosen": -0.6480283737182617, "logps/rejected": -1.5753830671310425, "loss": 1.1587, "nll_loss": 0.9081887006759644, "rewards/accuracies": 0.875, "rewards/chosen": -0.06480284035205841, "rewards/margins": 0.09273547679185867, "rewards/rejected": -0.1575382947921753, "step": 6621 }, { "epoch": 4.0396522800061, "grad_norm": 1.447623610496521, "learning_rate": 1.5412124923453766e-06, "log_odds_chosen": 2.2308592796325684, "log_odds_ratio": -0.3318634331226349, "logits/chosen": -0.9365565776824951, "logits/rejected": -0.8680421710014343, "logps/chosen": -0.8030694723129272, "logps/rejected": -2.7020983695983887, "loss": 1.0421, "nll_loss": 0.9926316738128662, "rewards/accuracies": 0.75, "rewards/chosen": -0.08030693978071213, "rewards/margins": 0.1899029165506363, "rewards/rejected": -0.2702098488807678, "step": 6622 }, { "epoch": 4.040262315083117, "grad_norm": 1.1898428201675415, "learning_rate": 1.540232700551133e-06, "log_odds_chosen": 1.9769392013549805, "log_odds_ratio": -0.31805145740509033, "logits/chosen": -0.9097194075584412, "logits/rejected": -0.9228155612945557, "logps/chosen": -0.8272686004638672, "logps/rejected": -2.264641046524048, "loss": 1.1593, "nll_loss": 1.0361508131027222, "rewards/accuracies": 0.75, "rewards/chosen": -0.0827268660068512, "rewards/margins": 0.14373724162578583, "rewards/rejected": -0.22646410763263702, "step": 6623 }, { "epoch": 4.040872350160134, "grad_norm": 1.3338102102279663, "learning_rate": 1.539252908756889e-06, "log_odds_chosen": 2.1680591106414795, "log_odds_ratio": -0.45821890234947205, "logits/chosen": -0.7522121667861938, "logits/rejected": -0.909186065196991, "logps/chosen": -0.746131420135498, "logps/rejected": -2.4271116256713867, "loss": 0.985, "nll_loss": 0.9802289009094238, "rewards/accuracies": 0.75, "rewards/chosen": -0.07461313903331757, "rewards/margins": 0.16809803247451782, "rewards/rejected": -0.2427111566066742, "step": 6624 }, { "epoch": 4.041482385237151, "grad_norm": 8.676034927368164, "learning_rate": 1.5382731169626454e-06, "log_odds_chosen": 1.4531525373458862, "log_odds_ratio": -0.35789304971694946, "logits/chosen": -0.9280699491500854, "logits/rejected": -0.8830001950263977, "logps/chosen": -0.7516529560089111, "logps/rejected": -1.817199468612671, "loss": 1.0577, "nll_loss": 1.252695083618164, "rewards/accuracies": 0.875, "rewards/chosen": -0.075165294110775, "rewards/margins": 0.10655464977025986, "rewards/rejected": -0.18171992897987366, "step": 6625 }, { "epoch": 4.042092420314168, "grad_norm": 1.6888350248336792, "learning_rate": 1.5372933251684017e-06, "log_odds_chosen": 1.5674145221710205, "log_odds_ratio": -0.47726431488990784, "logits/chosen": -0.8853185772895813, "logits/rejected": -1.039273738861084, "logps/chosen": -0.7435742020606995, "logps/rejected": -2.0000507831573486, "loss": 1.0157, "nll_loss": 1.1654738187789917, "rewards/accuracies": 0.75, "rewards/chosen": -0.07435742020606995, "rewards/margins": 0.1256476789712906, "rewards/rejected": -0.20000509917736053, "step": 6626 }, { "epoch": 4.042702455391185, "grad_norm": 1.6242550611495972, "learning_rate": 1.5363135333741578e-06, "log_odds_chosen": 1.7479074001312256, "log_odds_ratio": -0.40606117248535156, "logits/chosen": -0.60838782787323, "logits/rejected": -0.5669727325439453, "logps/chosen": -0.5939165353775024, "logps/rejected": -1.6861226558685303, "loss": 0.9265, "nll_loss": 0.6609886884689331, "rewards/accuracies": 0.875, "rewards/chosen": -0.05939164757728577, "rewards/margins": 0.10922062397003174, "rewards/rejected": -0.1686122715473175, "step": 6627 }, { "epoch": 4.043312490468202, "grad_norm": 1.8322432041168213, "learning_rate": 1.5353337415799142e-06, "log_odds_chosen": 0.4035540223121643, "log_odds_ratio": -0.6047232747077942, "logits/chosen": -1.0803850889205933, "logits/rejected": -0.965510904788971, "logps/chosen": -1.290473222732544, "logps/rejected": -1.5387449264526367, "loss": 1.1296, "nll_loss": 1.4696990251541138, "rewards/accuracies": 0.625, "rewards/chosen": -0.12904733419418335, "rewards/margins": 0.024827156215906143, "rewards/rejected": -0.1538744866847992, "step": 6628 }, { "epoch": 4.043922525545219, "grad_norm": 1.8673936128616333, "learning_rate": 1.5343539497856705e-06, "log_odds_chosen": 2.192394256591797, "log_odds_ratio": -0.4197799563407898, "logits/chosen": -0.8393469452857971, "logits/rejected": -0.9224504828453064, "logps/chosen": -0.7741339206695557, "logps/rejected": -2.5759084224700928, "loss": 1.1131, "nll_loss": 1.0605348348617554, "rewards/accuracies": 0.625, "rewards/chosen": -0.07741338759660721, "rewards/margins": 0.1801774799823761, "rewards/rejected": -0.2575908601284027, "step": 6629 }, { "epoch": 4.044532560622236, "grad_norm": 1.1076236963272095, "learning_rate": 1.5333741579914268e-06, "log_odds_chosen": 2.9704346656799316, "log_odds_ratio": -0.2662862539291382, "logits/chosen": -0.9866963624954224, "logits/rejected": -0.9853922128677368, "logps/chosen": -0.8335882425308228, "logps/rejected": -3.3600120544433594, "loss": 1.0423, "nll_loss": 1.024531364440918, "rewards/accuracies": 0.875, "rewards/chosen": -0.08335882425308228, "rewards/margins": 0.2526423931121826, "rewards/rejected": -0.3360012173652649, "step": 6630 }, { "epoch": 4.045142595699253, "grad_norm": 1.66954505443573, "learning_rate": 1.532394366197183e-06, "log_odds_chosen": 2.1122751235961914, "log_odds_ratio": -0.5539424419403076, "logits/chosen": -0.7082027196884155, "logits/rejected": -0.9059586524963379, "logps/chosen": -0.4999350607395172, "logps/rejected": -2.1650290489196777, "loss": 0.9555, "nll_loss": 0.7603442668914795, "rewards/accuracies": 0.75, "rewards/chosen": -0.04999350383877754, "rewards/margins": 0.16650941967964172, "rewards/rejected": -0.21650293469429016, "step": 6631 }, { "epoch": 4.04575263077627, "grad_norm": 1.9679027795791626, "learning_rate": 1.5314145744029393e-06, "log_odds_chosen": 1.6725496053695679, "log_odds_ratio": -0.2684955596923828, "logits/chosen": -0.7044768929481506, "logits/rejected": -0.7232146263122559, "logps/chosen": -0.66167813539505, "logps/rejected": -1.7847485542297363, "loss": 0.9747, "nll_loss": 0.7441468238830566, "rewards/accuracies": 1.0, "rewards/chosen": -0.06616781651973724, "rewards/margins": 0.11230704933404922, "rewards/rejected": -0.17847487330436707, "step": 6632 }, { "epoch": 4.0463626658532865, "grad_norm": 4.049924373626709, "learning_rate": 1.5304347826086956e-06, "log_odds_chosen": 2.3915367126464844, "log_odds_ratio": -0.30966833233833313, "logits/chosen": -0.8257523775100708, "logits/rejected": -0.9639242887496948, "logps/chosen": -0.735550045967102, "logps/rejected": -2.540585994720459, "loss": 0.9676, "nll_loss": 1.04203200340271, "rewards/accuracies": 0.875, "rewards/chosen": -0.07355500757694244, "rewards/margins": 0.18050360679626465, "rewards/rejected": -0.2540585994720459, "step": 6633 }, { "epoch": 4.046972700930303, "grad_norm": 1.6429485082626343, "learning_rate": 1.5294549908144517e-06, "log_odds_chosen": 0.3208134174346924, "log_odds_ratio": -0.7166624665260315, "logits/chosen": -1.037933349609375, "logits/rejected": -1.0761054754257202, "logps/chosen": -1.0153433084487915, "logps/rejected": -1.2711663246154785, "loss": 1.1531, "nll_loss": 1.1771295070648193, "rewards/accuracies": 0.5, "rewards/chosen": -0.10153433680534363, "rewards/margins": 0.025582294911146164, "rewards/rejected": -0.1271166205406189, "step": 6634 }, { "epoch": 4.04758273600732, "grad_norm": 2.7448272705078125, "learning_rate": 1.528475199020208e-06, "log_odds_chosen": 3.016354560852051, "log_odds_ratio": -0.27636706829071045, "logits/chosen": -1.0495818853378296, "logits/rejected": -1.130542516708374, "logps/chosen": -0.7874062061309814, "logps/rejected": -3.2964601516723633, "loss": 0.9659, "nll_loss": 0.9933438301086426, "rewards/accuracies": 0.875, "rewards/chosen": -0.07874062657356262, "rewards/margins": 0.2509053945541382, "rewards/rejected": -0.3296459913253784, "step": 6635 }, { "epoch": 4.048192771084337, "grad_norm": 1.6163748502731323, "learning_rate": 1.5274954072259644e-06, "log_odds_chosen": 2.8471932411193848, "log_odds_ratio": -0.20778419077396393, "logits/chosen": -0.792499840259552, "logits/rejected": -0.8391823768615723, "logps/chosen": -0.6411705017089844, "logps/rejected": -2.741326093673706, "loss": 0.8725, "nll_loss": 0.750888466835022, "rewards/accuracies": 0.875, "rewards/chosen": -0.06411705911159515, "rewards/margins": 0.21001556515693665, "rewards/rejected": -0.2741326093673706, "step": 6636 }, { "epoch": 4.048802806161354, "grad_norm": 3.2894487380981445, "learning_rate": 1.5265156154317207e-06, "log_odds_chosen": 0.25887221097946167, "log_odds_ratio": -0.7468221783638, "logits/chosen": -1.1043293476104736, "logits/rejected": -0.9887759685516357, "logps/chosen": -0.9786068201065063, "logps/rejected": -1.1657902002334595, "loss": 1.1754, "nll_loss": 1.2565207481384277, "rewards/accuracies": 0.375, "rewards/chosen": -0.0978606790304184, "rewards/margins": 0.01871834136545658, "rewards/rejected": -0.11657902598381042, "step": 6637 }, { "epoch": 4.049412841238372, "grad_norm": 3.6902406215667725, "learning_rate": 1.5255358236374769e-06, "log_odds_chosen": 3.2962911128997803, "log_odds_ratio": -0.20932018756866455, "logits/chosen": -0.9985030293464661, "logits/rejected": -1.0913071632385254, "logps/chosen": -0.9620786905288696, "logps/rejected": -3.838056802749634, "loss": 1.1989, "nll_loss": 1.1785110235214233, "rewards/accuracies": 0.875, "rewards/chosen": -0.0962078720331192, "rewards/margins": 0.2875978350639343, "rewards/rejected": -0.38380569219589233, "step": 6638 }, { "epoch": 4.0500228763153885, "grad_norm": 8.67151927947998, "learning_rate": 1.5245560318432334e-06, "log_odds_chosen": 2.7805681228637695, "log_odds_ratio": -0.3128000795841217, "logits/chosen": -0.8060437440872192, "logits/rejected": -0.9687272906303406, "logps/chosen": -0.7035792469978333, "logps/rejected": -2.9120397567749023, "loss": 1.0002, "nll_loss": 0.9122463464736938, "rewards/accuracies": 0.75, "rewards/chosen": -0.07035792618989944, "rewards/margins": 0.220846027135849, "rewards/rejected": -0.29120397567749023, "step": 6639 }, { "epoch": 4.050632911392405, "grad_norm": 9.024361610412598, "learning_rate": 1.5235762400489895e-06, "log_odds_chosen": 2.410386323928833, "log_odds_ratio": -0.2456689178943634, "logits/chosen": -0.7891898155212402, "logits/rejected": -0.8473657369613647, "logps/chosen": -0.7871538400650024, "logps/rejected": -2.7126379013061523, "loss": 1.1088, "nll_loss": 0.9801568984985352, "rewards/accuracies": 0.875, "rewards/chosen": -0.07871538400650024, "rewards/margins": 0.19254837930202484, "rewards/rejected": -0.2712637782096863, "step": 6640 }, { "epoch": 4.051242946469422, "grad_norm": 15.667455673217773, "learning_rate": 1.5225964482547457e-06, "log_odds_chosen": 2.557262659072876, "log_odds_ratio": -0.40624240040779114, "logits/chosen": -0.9183309078216553, "logits/rejected": -1.0509016513824463, "logps/chosen": -0.6931911706924438, "logps/rejected": -2.8708748817443848, "loss": 1.1057, "nll_loss": 0.9679434299468994, "rewards/accuracies": 0.875, "rewards/chosen": -0.06931912153959274, "rewards/margins": 0.2177683562040329, "rewards/rejected": -0.28708747029304504, "step": 6641 }, { "epoch": 4.051852981546439, "grad_norm": 2.0032122135162354, "learning_rate": 1.521616656460502e-06, "log_odds_chosen": 1.9122464656829834, "log_odds_ratio": -0.32768455147743225, "logits/chosen": -0.8232372999191284, "logits/rejected": -0.9876178503036499, "logps/chosen": -0.6575099229812622, "logps/rejected": -2.1433639526367188, "loss": 0.9899, "nll_loss": 0.8595079779624939, "rewards/accuracies": 0.875, "rewards/chosen": -0.06575100123882294, "rewards/margins": 0.14858539402484894, "rewards/rejected": -0.21433641016483307, "step": 6642 }, { "epoch": 4.052463016623456, "grad_norm": 1.304250717163086, "learning_rate": 1.5206368646662583e-06, "log_odds_chosen": 1.625315546989441, "log_odds_ratio": -0.7795892357826233, "logits/chosen": -1.041409969329834, "logits/rejected": -1.0039079189300537, "logps/chosen": -1.051416277885437, "logps/rejected": -2.5759074687957764, "loss": 1.2837, "nll_loss": 1.388240098953247, "rewards/accuracies": 0.625, "rewards/chosen": -0.10514163970947266, "rewards/margins": 0.1524491310119629, "rewards/rejected": -0.25759077072143555, "step": 6643 }, { "epoch": 4.053073051700473, "grad_norm": 2.7530996799468994, "learning_rate": 1.5196570728720147e-06, "log_odds_chosen": 1.6332284212112427, "log_odds_ratio": -0.35754016041755676, "logits/chosen": -0.9873560667037964, "logits/rejected": -0.9485652446746826, "logps/chosen": -0.9258855581283569, "logps/rejected": -2.164614200592041, "loss": 1.0035, "nll_loss": 0.9758813381195068, "rewards/accuracies": 0.75, "rewards/chosen": -0.09258855879306793, "rewards/margins": 0.12387287616729736, "rewards/rejected": -0.2164614349603653, "step": 6644 }, { "epoch": 4.05368308677749, "grad_norm": 1.7615333795547485, "learning_rate": 1.5186772810777708e-06, "log_odds_chosen": 1.6576673984527588, "log_odds_ratio": -0.3279821276664734, "logits/chosen": -1.0839519500732422, "logits/rejected": -1.1309521198272705, "logps/chosen": -0.692658543586731, "logps/rejected": -1.9278557300567627, "loss": 1.0783, "nll_loss": 0.8998846411705017, "rewards/accuracies": 1.0, "rewards/chosen": -0.06926585733890533, "rewards/margins": 0.12351971119642258, "rewards/rejected": -0.19278556108474731, "step": 6645 }, { "epoch": 4.054293121854506, "grad_norm": 14.731963157653809, "learning_rate": 1.5176974892835273e-06, "log_odds_chosen": 1.4794087409973145, "log_odds_ratio": -0.2976917624473572, "logits/chosen": -0.8848180770874023, "logits/rejected": -1.0600526332855225, "logps/chosen": -0.6357868909835815, "logps/rejected": -1.6158415079116821, "loss": 1.0345, "nll_loss": 0.8294975161552429, "rewards/accuracies": 1.0, "rewards/chosen": -0.06357869505882263, "rewards/margins": 0.09800545871257782, "rewards/rejected": -0.16158415377140045, "step": 6646 }, { "epoch": 4.054903156931523, "grad_norm": 2.2346913814544678, "learning_rate": 1.5167176974892834e-06, "log_odds_chosen": 2.521754741668701, "log_odds_ratio": -0.22392264008522034, "logits/chosen": -1.0802736282348633, "logits/rejected": -1.1834497451782227, "logps/chosen": -0.8889548778533936, "logps/rejected": -2.9872560501098633, "loss": 1.0972, "nll_loss": 1.1918416023254395, "rewards/accuracies": 1.0, "rewards/chosen": -0.08889548480510712, "rewards/margins": 0.2098301202058792, "rewards/rejected": -0.2987256348133087, "step": 6647 }, { "epoch": 4.05551319200854, "grad_norm": 3.699551820755005, "learning_rate": 1.5157379056950396e-06, "log_odds_chosen": 3.1047377586364746, "log_odds_ratio": -0.3997374475002289, "logits/chosen": -0.9750971794128418, "logits/rejected": -1.1933298110961914, "logps/chosen": -1.0800526142120361, "logps/rejected": -3.9345149993896484, "loss": 1.1359, "nll_loss": 1.0946171283721924, "rewards/accuracies": 0.75, "rewards/chosen": -0.10800526291131973, "rewards/margins": 0.28544625639915466, "rewards/rejected": -0.3934515118598938, "step": 6648 }, { "epoch": 4.056123227085558, "grad_norm": 1.8093485832214355, "learning_rate": 1.5147581139007961e-06, "log_odds_chosen": 2.0237984657287598, "log_odds_ratio": -0.45121678709983826, "logits/chosen": -0.9849860668182373, "logits/rejected": -1.0109647512435913, "logps/chosen": -0.8808764219284058, "logps/rejected": -2.644956588745117, "loss": 1.0319, "nll_loss": 1.1535533666610718, "rewards/accuracies": 0.625, "rewards/chosen": -0.08808764070272446, "rewards/margins": 0.17640800774097443, "rewards/rejected": -0.2644956707954407, "step": 6649 }, { "epoch": 4.056733262162575, "grad_norm": 4.448615550994873, "learning_rate": 1.5137783221065522e-06, "log_odds_chosen": 3.5597949028015137, "log_odds_ratio": -0.28253623843193054, "logits/chosen": -0.8079190254211426, "logits/rejected": -1.0276498794555664, "logps/chosen": -0.6299199461936951, "logps/rejected": -3.589597702026367, "loss": 1.1246, "nll_loss": 0.7970403432846069, "rewards/accuracies": 0.875, "rewards/chosen": -0.06299199908971786, "rewards/margins": 0.29596778750419617, "rewards/rejected": -0.35895979404449463, "step": 6650 }, { "epoch": 4.0573432972395915, "grad_norm": 4.384335041046143, "learning_rate": 1.5127985303123086e-06, "log_odds_chosen": 2.0929760932922363, "log_odds_ratio": -0.38127660751342773, "logits/chosen": -0.807795524597168, "logits/rejected": -0.8915475606918335, "logps/chosen": -0.6790822744369507, "logps/rejected": -2.2648866176605225, "loss": 1.1062, "nll_loss": 0.9835089445114136, "rewards/accuracies": 0.875, "rewards/chosen": -0.06790822744369507, "rewards/margins": 0.15858042240142822, "rewards/rejected": -0.22648866474628448, "step": 6651 }, { "epoch": 4.057953332316608, "grad_norm": 7.591826438903809, "learning_rate": 1.5118187385180647e-06, "log_odds_chosen": 0.9463032484054565, "log_odds_ratio": -0.5733116269111633, "logits/chosen": -0.9856131076812744, "logits/rejected": -1.023946762084961, "logps/chosen": -1.0278995037078857, "logps/rejected": -1.7982308864593506, "loss": 1.0892, "nll_loss": 1.224048376083374, "rewards/accuracies": 0.625, "rewards/chosen": -0.10278993844985962, "rewards/margins": 0.07703316956758499, "rewards/rejected": -0.1798231154680252, "step": 6652 }, { "epoch": 4.058563367393625, "grad_norm": 1.7594889402389526, "learning_rate": 1.5108389467238212e-06, "log_odds_chosen": 1.2839369773864746, "log_odds_ratio": -0.44644421339035034, "logits/chosen": -0.7859371900558472, "logits/rejected": -1.017319679260254, "logps/chosen": -0.8104733824729919, "logps/rejected": -1.7087947130203247, "loss": 0.9314, "nll_loss": 0.929007887840271, "rewards/accuracies": 0.75, "rewards/chosen": -0.08104734122753143, "rewards/margins": 0.0898321270942688, "rewards/rejected": -0.17087946832180023, "step": 6653 }, { "epoch": 4.059173402470642, "grad_norm": 3.796635866165161, "learning_rate": 1.5098591549295774e-06, "log_odds_chosen": 0.6166427135467529, "log_odds_ratio": -0.6299277544021606, "logits/chosen": -0.959945559501648, "logits/rejected": -1.07990300655365, "logps/chosen": -0.9773977994918823, "logps/rejected": -1.3924773931503296, "loss": 1.1393, "nll_loss": 1.0107941627502441, "rewards/accuracies": 0.625, "rewards/chosen": -0.09773978590965271, "rewards/margins": 0.04150797054171562, "rewards/rejected": -0.13924774527549744, "step": 6654 }, { "epoch": 4.059783437547659, "grad_norm": 3.5861167907714844, "learning_rate": 1.5088793631353337e-06, "log_odds_chosen": 2.0015339851379395, "log_odds_ratio": -0.29637476801872253, "logits/chosen": -0.8001171350479126, "logits/rejected": -0.9067021608352661, "logps/chosen": -0.6850544214248657, "logps/rejected": -2.2112178802490234, "loss": 1.0415, "nll_loss": 1.0224189758300781, "rewards/accuracies": 1.0, "rewards/chosen": -0.0685054361820221, "rewards/margins": 0.15261633694171906, "rewards/rejected": -0.22112178802490234, "step": 6655 }, { "epoch": 4.060393472624676, "grad_norm": 2.6102309226989746, "learning_rate": 1.50789957134109e-06, "log_odds_chosen": 0.690435528755188, "log_odds_ratio": -0.529217004776001, "logits/chosen": -0.9782450795173645, "logits/rejected": -0.8871569037437439, "logps/chosen": -0.8934703469276428, "logps/rejected": -1.339514970779419, "loss": 0.9131, "nll_loss": 0.9737027883529663, "rewards/accuracies": 0.625, "rewards/chosen": -0.08934703469276428, "rewards/margins": 0.04460446536540985, "rewards/rejected": -0.13395150005817413, "step": 6656 }, { "epoch": 4.061003507701693, "grad_norm": 4.734148025512695, "learning_rate": 1.5069197795468462e-06, "log_odds_chosen": 2.0056495666503906, "log_odds_ratio": -0.33691591024398804, "logits/chosen": -0.9993213415145874, "logits/rejected": -1.0678068399429321, "logps/chosen": -0.7187861800193787, "logps/rejected": -2.2291624546051025, "loss": 1.0617, "nll_loss": 1.0914591550827026, "rewards/accuracies": 0.875, "rewards/chosen": -0.07187862694263458, "rewards/margins": 0.15103761851787567, "rewards/rejected": -0.22291624546051025, "step": 6657 }, { "epoch": 4.0616135427787095, "grad_norm": 1.887802243232727, "learning_rate": 1.5059399877526025e-06, "log_odds_chosen": 3.1266674995422363, "log_odds_ratio": -0.4277629852294922, "logits/chosen": -0.7132681608200073, "logits/rejected": -0.9290883541107178, "logps/chosen": -0.7600820064544678, "logps/rejected": -3.539005756378174, "loss": 1.0301, "nll_loss": 0.9146698713302612, "rewards/accuracies": 0.75, "rewards/chosen": -0.07600820064544678, "rewards/margins": 0.27789241075515747, "rewards/rejected": -0.35390058159828186, "step": 6658 }, { "epoch": 4.062223577855726, "grad_norm": 2.1566431522369385, "learning_rate": 1.5049601959583588e-06, "log_odds_chosen": 1.324578881263733, "log_odds_ratio": -0.45007917284965515, "logits/chosen": -0.8261130452156067, "logits/rejected": -0.8650062084197998, "logps/chosen": -0.7835855484008789, "logps/rejected": -1.7619893550872803, "loss": 1.0961, "nll_loss": 1.2301368713378906, "rewards/accuracies": 0.625, "rewards/chosen": -0.07835855334997177, "rewards/margins": 0.09784038364887238, "rewards/rejected": -0.17619894444942474, "step": 6659 }, { "epoch": 4.062833612932744, "grad_norm": 1.3298628330230713, "learning_rate": 1.5039804041641152e-06, "log_odds_chosen": 2.419867753982544, "log_odds_ratio": -0.4064139723777771, "logits/chosen": -0.7591301202774048, "logits/rejected": -0.9122470021247864, "logps/chosen": -0.5786490440368652, "logps/rejected": -2.5667006969451904, "loss": 1.1227, "nll_loss": 0.905487596988678, "rewards/accuracies": 0.875, "rewards/chosen": -0.05786490812897682, "rewards/margins": 0.19880518317222595, "rewards/rejected": -0.2566700577735901, "step": 6660 }, { "epoch": 4.063443648009761, "grad_norm": 1.3829480409622192, "learning_rate": 1.5030006123698713e-06, "log_odds_chosen": 1.6892564296722412, "log_odds_ratio": -0.5623233914375305, "logits/chosen": -0.9749169945716858, "logits/rejected": -1.1246585845947266, "logps/chosen": -0.6990562081336975, "logps/rejected": -2.1092135906219482, "loss": 0.9181, "nll_loss": 0.9260548949241638, "rewards/accuracies": 0.625, "rewards/chosen": -0.06990562379360199, "rewards/margins": 0.14101573824882507, "rewards/rejected": -0.21092137694358826, "step": 6661 }, { "epoch": 4.064053683086778, "grad_norm": 1.4796984195709229, "learning_rate": 1.5020208205756276e-06, "log_odds_chosen": 1.9290499687194824, "log_odds_ratio": -0.3312227129936218, "logits/chosen": -1.0289804935455322, "logits/rejected": -1.1390961408615112, "logps/chosen": -0.7012683153152466, "logps/rejected": -2.1829142570495605, "loss": 1.0189, "nll_loss": 0.9378728270530701, "rewards/accuracies": 0.875, "rewards/chosen": -0.07012683153152466, "rewards/margins": 0.14816460013389587, "rewards/rejected": -0.21829143166542053, "step": 6662 }, { "epoch": 4.064663718163795, "grad_norm": 1.4189355373382568, "learning_rate": 1.501041028781384e-06, "log_odds_chosen": 1.0337814092636108, "log_odds_ratio": -0.49283140897750854, "logits/chosen": -0.9164464473724365, "logits/rejected": -0.9791499972343445, "logps/chosen": -0.7528642416000366, "logps/rejected": -1.4407882690429688, "loss": 1.0715, "nll_loss": 0.9164122939109802, "rewards/accuracies": 0.875, "rewards/chosen": -0.07528642565011978, "rewards/margins": 0.06879240274429321, "rewards/rejected": -0.1440788358449936, "step": 6663 }, { "epoch": 4.065273753240811, "grad_norm": 0.9465242028236389, "learning_rate": 1.50006123698714e-06, "log_odds_chosen": 1.3291394710540771, "log_odds_ratio": -0.36456918716430664, "logits/chosen": -0.7169655561447144, "logits/rejected": -0.8433383703231812, "logps/chosen": -0.9345383644104004, "logps/rejected": -1.8184161186218262, "loss": 1.0271, "nll_loss": 1.032605528831482, "rewards/accuracies": 0.625, "rewards/chosen": -0.09345383942127228, "rewards/margins": 0.08838777244091034, "rewards/rejected": -0.18184161186218262, "step": 6664 }, { "epoch": 4.065883788317828, "grad_norm": 5.790257453918457, "learning_rate": 1.4990814451928964e-06, "log_odds_chosen": 3.239590883255005, "log_odds_ratio": -0.25202640891075134, "logits/chosen": -0.9349281787872314, "logits/rejected": -1.0248303413391113, "logps/chosen": -0.6622143983840942, "logps/rejected": -3.1992082595825195, "loss": 1.1148, "nll_loss": 0.8277466297149658, "rewards/accuracies": 0.875, "rewards/chosen": -0.06622143089771271, "rewards/margins": 0.2536994218826294, "rewards/rejected": -0.3199208378791809, "step": 6665 }, { "epoch": 4.066493823394845, "grad_norm": 1.6996675729751587, "learning_rate": 1.4981016533986527e-06, "log_odds_chosen": 0.9116722345352173, "log_odds_ratio": -0.49275606870651245, "logits/chosen": -0.8995782136917114, "logits/rejected": -0.8849818110466003, "logps/chosen": -0.9515767097473145, "logps/rejected": -1.6355016231536865, "loss": 0.9825, "nll_loss": 1.1283022165298462, "rewards/accuracies": 0.625, "rewards/chosen": -0.0951576679944992, "rewards/margins": 0.06839248538017273, "rewards/rejected": -0.16355015337467194, "step": 6666 }, { "epoch": 4.067103858471862, "grad_norm": 1.3025553226470947, "learning_rate": 1.497121861604409e-06, "log_odds_chosen": 1.1357717514038086, "log_odds_ratio": -0.40379083156585693, "logits/chosen": -0.8173455595970154, "logits/rejected": -0.8594195246696472, "logps/chosen": -0.6934345960617065, "logps/rejected": -1.3939119577407837, "loss": 1.0287, "nll_loss": 0.9267213344573975, "rewards/accuracies": 0.875, "rewards/chosen": -0.0693434625864029, "rewards/margins": 0.07004772871732712, "rewards/rejected": -0.13939118385314941, "step": 6667 }, { "epoch": 4.067713893548879, "grad_norm": 1.5336971282958984, "learning_rate": 1.4961420698101652e-06, "log_odds_chosen": 1.3082646131515503, "log_odds_ratio": -0.5684040188789368, "logits/chosen": -0.8169726133346558, "logits/rejected": -0.8649405241012573, "logps/chosen": -0.8423176407814026, "logps/rejected": -1.913830280303955, "loss": 1.1135, "nll_loss": 1.0305557250976562, "rewards/accuracies": 0.625, "rewards/chosen": -0.08423176407814026, "rewards/margins": 0.10715126246213913, "rewards/rejected": -0.19138303399085999, "step": 6668 }, { "epoch": 4.068323928625896, "grad_norm": 3.5028884410858154, "learning_rate": 1.4951622780159217e-06, "log_odds_chosen": -0.3103508949279785, "log_odds_ratio": -0.8799726963043213, "logits/chosen": -0.8366516828536987, "logits/rejected": -0.788849949836731, "logps/chosen": -0.8714085817337036, "logps/rejected": -0.7026017308235168, "loss": 0.963, "nll_loss": 1.0136133432388306, "rewards/accuracies": 0.25, "rewards/chosen": -0.08714086562395096, "rewards/margins": -0.016880687326192856, "rewards/rejected": -0.0702601745724678, "step": 6669 }, { "epoch": 4.0689339637029125, "grad_norm": 1.2109923362731934, "learning_rate": 1.4941824862216779e-06, "log_odds_chosen": 0.915502667427063, "log_odds_ratio": -0.4529072940349579, "logits/chosen": -1.0186959505081177, "logits/rejected": -0.9700948596000671, "logps/chosen": -0.8822207450866699, "logps/rejected": -1.4882146120071411, "loss": 1.0776, "nll_loss": 1.0583581924438477, "rewards/accuracies": 0.625, "rewards/chosen": -0.08822207897901535, "rewards/margins": 0.06059938669204712, "rewards/rejected": -0.14882145822048187, "step": 6670 }, { "epoch": 4.06954399877993, "grad_norm": 2.145648956298828, "learning_rate": 1.493202694427434e-06, "log_odds_chosen": 1.7997431755065918, "log_odds_ratio": -0.464499294757843, "logits/chosen": -0.7040433287620544, "logits/rejected": -0.8114420175552368, "logps/chosen": -0.7695791721343994, "logps/rejected": -2.09991192817688, "loss": 1.0503, "nll_loss": 1.2769488096237183, "rewards/accuracies": 0.625, "rewards/chosen": -0.07695791870355606, "rewards/margins": 0.13303329050540924, "rewards/rejected": -0.2099912166595459, "step": 6671 }, { "epoch": 4.070154033856947, "grad_norm": 1.829894781112671, "learning_rate": 1.4922229026331903e-06, "log_odds_chosen": 3.564887285232544, "log_odds_ratio": -0.22853927314281464, "logits/chosen": -0.9181959629058838, "logits/rejected": -1.0985136032104492, "logps/chosen": -0.8157486319541931, "logps/rejected": -3.7568862438201904, "loss": 1.1641, "nll_loss": 1.0542941093444824, "rewards/accuracies": 0.875, "rewards/chosen": -0.08157486468553543, "rewards/margins": 0.29411375522613525, "rewards/rejected": -0.3756886124610901, "step": 6672 }, { "epoch": 4.070764068933964, "grad_norm": 2.054736852645874, "learning_rate": 1.4912431108389467e-06, "log_odds_chosen": 0.44492775201797485, "log_odds_ratio": -0.5544138550758362, "logits/chosen": -1.1342182159423828, "logits/rejected": -0.9329698085784912, "logps/chosen": -1.0137486457824707, "logps/rejected": -1.3382899761199951, "loss": 1.0273, "nll_loss": 1.2280960083007812, "rewards/accuracies": 0.75, "rewards/chosen": -0.10137486457824707, "rewards/margins": 0.03245412930846214, "rewards/rejected": -0.1338289976119995, "step": 6673 }, { "epoch": 4.071374104010981, "grad_norm": 1.5565261840820312, "learning_rate": 1.490263319044703e-06, "log_odds_chosen": 2.8145158290863037, "log_odds_ratio": -0.2343122661113739, "logits/chosen": -0.8288872838020325, "logits/rejected": -1.018447995185852, "logps/chosen": -0.7251698970794678, "logps/rejected": -2.977332830429077, "loss": 1.0415, "nll_loss": 0.976224958896637, "rewards/accuracies": 0.75, "rewards/chosen": -0.07251699268817902, "rewards/margins": 0.22521628439426422, "rewards/rejected": -0.29773327708244324, "step": 6674 }, { "epoch": 4.071984139087998, "grad_norm": 8.057777404785156, "learning_rate": 1.4892835272504591e-06, "log_odds_chosen": 1.5684467554092407, "log_odds_ratio": -0.36711153388023376, "logits/chosen": -0.8922179937362671, "logits/rejected": -0.9456576108932495, "logps/chosen": -0.7998263239860535, "logps/rejected": -1.9665194749832153, "loss": 1.0716, "nll_loss": 0.9459220170974731, "rewards/accuracies": 0.875, "rewards/chosen": -0.07998263835906982, "rewards/margins": 0.11666931957006454, "rewards/rejected": -0.19665195047855377, "step": 6675 }, { "epoch": 4.0725941741650145, "grad_norm": 1.5894109010696411, "learning_rate": 1.4883037354562157e-06, "log_odds_chosen": 2.107849597930908, "log_odds_ratio": -0.42092445492744446, "logits/chosen": -0.9942777752876282, "logits/rejected": -1.0096349716186523, "logps/chosen": -0.7199552059173584, "logps/rejected": -2.3188440799713135, "loss": 1.0093, "nll_loss": 1.1153950691223145, "rewards/accuracies": 0.75, "rewards/chosen": -0.07199551910161972, "rewards/margins": 0.1598888784646988, "rewards/rejected": -0.2318844050168991, "step": 6676 }, { "epoch": 4.073204209242031, "grad_norm": 5.644143104553223, "learning_rate": 1.4873239436619718e-06, "log_odds_chosen": 3.4890947341918945, "log_odds_ratio": -0.2168060541152954, "logits/chosen": -0.8698807954788208, "logits/rejected": -1.1274713277816772, "logps/chosen": -0.7724620699882507, "logps/rejected": -3.6811037063598633, "loss": 1.0451, "nll_loss": 0.8740622997283936, "rewards/accuracies": 0.875, "rewards/chosen": -0.07724620401859283, "rewards/margins": 0.29086416959762573, "rewards/rejected": -0.3681103587150574, "step": 6677 }, { "epoch": 4.073814244319048, "grad_norm": 3.2354977130889893, "learning_rate": 1.486344151867728e-06, "log_odds_chosen": 1.9376145601272583, "log_odds_ratio": -0.29130199551582336, "logits/chosen": -1.1766971349716187, "logits/rejected": -1.240635633468628, "logps/chosen": -1.0304566621780396, "logps/rejected": -2.6996593475341797, "loss": 1.0468, "nll_loss": 1.226646065711975, "rewards/accuracies": 0.875, "rewards/chosen": -0.10304566472768784, "rewards/margins": 0.1669202744960785, "rewards/rejected": -0.2699659466743469, "step": 6678 }, { "epoch": 4.074424279396065, "grad_norm": 1.3829576969146729, "learning_rate": 1.4853643600734844e-06, "log_odds_chosen": 2.30849289894104, "log_odds_ratio": -0.35314303636550903, "logits/chosen": -0.9216778874397278, "logits/rejected": -0.9805876612663269, "logps/chosen": -0.7683740854263306, "logps/rejected": -2.5967864990234375, "loss": 1.1701, "nll_loss": 0.9662278294563293, "rewards/accuracies": 0.875, "rewards/chosen": -0.07683741301298141, "rewards/margins": 0.1828412562608719, "rewards/rejected": -0.2596786618232727, "step": 6679 }, { "epoch": 4.075034314473082, "grad_norm": 1.4732979536056519, "learning_rate": 1.4843845682792406e-06, "log_odds_chosen": 1.0575042963027954, "log_odds_ratio": -0.528360903263092, "logits/chosen": -0.9626781940460205, "logits/rejected": -0.9388617277145386, "logps/chosen": -0.8349737524986267, "logps/rejected": -1.547181248664856, "loss": 1.0156, "nll_loss": 0.9124864935874939, "rewards/accuracies": 0.75, "rewards/chosen": -0.08349736779928207, "rewards/margins": 0.0712207555770874, "rewards/rejected": -0.15471813082695007, "step": 6680 }, { "epoch": 4.0756443495501, "grad_norm": 3.089345932006836, "learning_rate": 1.483404776484997e-06, "log_odds_chosen": 2.8185882568359375, "log_odds_ratio": -0.2712128758430481, "logits/chosen": -0.7418406009674072, "logits/rejected": -1.026678204536438, "logps/chosen": -0.595285177230835, "logps/rejected": -2.8294053077697754, "loss": 0.8132, "nll_loss": 0.7668241262435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.059528522193431854, "rewards/margins": 0.22341200709342957, "rewards/rejected": -0.2829405665397644, "step": 6681 }, { "epoch": 4.076254384627116, "grad_norm": 1.7227661609649658, "learning_rate": 1.482424984690753e-06, "log_odds_chosen": 1.8113429546356201, "log_odds_ratio": -0.4904460310935974, "logits/chosen": -1.0985114574432373, "logits/rejected": -1.1208419799804688, "logps/chosen": -0.9764630794525146, "logps/rejected": -2.337477445602417, "loss": 1.1489, "nll_loss": 1.3194050788879395, "rewards/accuracies": 0.75, "rewards/chosen": -0.0976463109254837, "rewards/margins": 0.13610145449638367, "rewards/rejected": -0.23374775052070618, "step": 6682 }, { "epoch": 4.076864419704133, "grad_norm": 1.8026072978973389, "learning_rate": 1.4814451928965096e-06, "log_odds_chosen": 1.9229378700256348, "log_odds_ratio": -0.45493829250335693, "logits/chosen": -0.8933833241462708, "logits/rejected": -1.0431712865829468, "logps/chosen": -0.9077778458595276, "logps/rejected": -2.401041030883789, "loss": 1.0221, "nll_loss": 0.9698941111564636, "rewards/accuracies": 0.75, "rewards/chosen": -0.09077778458595276, "rewards/margins": 0.14932633936405182, "rewards/rejected": -0.24010412395000458, "step": 6683 }, { "epoch": 4.07747445478115, "grad_norm": 1.15316903591156, "learning_rate": 1.4804654011022657e-06, "log_odds_chosen": 0.5386577248573303, "log_odds_ratio": -0.5893959999084473, "logits/chosen": -1.0814796686172485, "logits/rejected": -0.9456675052642822, "logps/chosen": -1.035462498664856, "logps/rejected": -1.476070761680603, "loss": 1.204, "nll_loss": 1.1529242992401123, "rewards/accuracies": 0.625, "rewards/chosen": -0.10354625433683395, "rewards/margins": 0.044060833752155304, "rewards/rejected": -0.14760708808898926, "step": 6684 }, { "epoch": 4.078084489858167, "grad_norm": 8.627233505249023, "learning_rate": 1.4794856093080218e-06, "log_odds_chosen": 0.6928168535232544, "log_odds_ratio": -0.542094349861145, "logits/chosen": -0.847266435623169, "logits/rejected": -0.8686832785606384, "logps/chosen": -0.739342212677002, "logps/rejected": -1.1670944690704346, "loss": 0.9404, "nll_loss": 0.9681788682937622, "rewards/accuracies": 0.75, "rewards/chosen": -0.07393422722816467, "rewards/margins": 0.042775221168994904, "rewards/rejected": -0.11670945584774017, "step": 6685 }, { "epoch": 4.078694524935184, "grad_norm": 1.4364184141159058, "learning_rate": 1.4785058175137784e-06, "log_odds_chosen": 2.843329906463623, "log_odds_ratio": -0.32656043767929077, "logits/chosen": -0.9030884504318237, "logits/rejected": -1.172395944595337, "logps/chosen": -0.6689713001251221, "logps/rejected": -2.84855055809021, "loss": 0.9411, "nll_loss": 0.8275375366210938, "rewards/accuracies": 0.625, "rewards/chosen": -0.06689713150262833, "rewards/margins": 0.21795792877674103, "rewards/rejected": -0.28485506772994995, "step": 6686 }, { "epoch": 4.079304560012201, "grad_norm": 1.1529163122177124, "learning_rate": 1.4775260257195345e-06, "log_odds_chosen": 1.018550157546997, "log_odds_ratio": -0.5145503878593445, "logits/chosen": -0.9122209548950195, "logits/rejected": -0.9550433158874512, "logps/chosen": -0.7785776257514954, "logps/rejected": -1.5179717540740967, "loss": 1.1063, "nll_loss": 1.2580325603485107, "rewards/accuracies": 0.625, "rewards/chosen": -0.07785776257514954, "rewards/margins": 0.07393941283226013, "rewards/rejected": -0.15179717540740967, "step": 6687 }, { "epoch": 4.0799145950892175, "grad_norm": 2.3914904594421387, "learning_rate": 1.4765462339252908e-06, "log_odds_chosen": 2.1233835220336914, "log_odds_ratio": -0.43529391288757324, "logits/chosen": -1.0670843124389648, "logits/rejected": -1.070276141166687, "logps/chosen": -0.9855620861053467, "logps/rejected": -2.7234301567077637, "loss": 1.0889, "nll_loss": 1.0461868047714233, "rewards/accuracies": 0.75, "rewards/chosen": -0.09855620563030243, "rewards/margins": 0.17378681898117065, "rewards/rejected": -0.2723430097103119, "step": 6688 }, { "epoch": 4.080524630166234, "grad_norm": 1.2949273586273193, "learning_rate": 1.4755664421310471e-06, "log_odds_chosen": 1.5999906063079834, "log_odds_ratio": -0.29790252447128296, "logits/chosen": -1.04193913936615, "logits/rejected": -1.13125741481781, "logps/chosen": -0.9765869379043579, "logps/rejected": -2.2418434619903564, "loss": 1.0565, "nll_loss": 1.0552380084991455, "rewards/accuracies": 1.0, "rewards/chosen": -0.09765869379043579, "rewards/margins": 0.1265256702899933, "rewards/rejected": -0.22418436408042908, "step": 6689 }, { "epoch": 4.081134665243251, "grad_norm": 1.2425652742385864, "learning_rate": 1.4745866503368035e-06, "log_odds_chosen": 0.5599682331085205, "log_odds_ratio": -0.6448938846588135, "logits/chosen": -1.0690279006958008, "logits/rejected": -0.9405725598335266, "logps/chosen": -0.8897371292114258, "logps/rejected": -1.2145463228225708, "loss": 1.0782, "nll_loss": 1.1083035469055176, "rewards/accuracies": 0.625, "rewards/chosen": -0.08897370845079422, "rewards/margins": 0.03248092532157898, "rewards/rejected": -0.1214546263217926, "step": 6690 }, { "epoch": 4.081744700320268, "grad_norm": 3.461073875427246, "learning_rate": 1.4736068585425596e-06, "log_odds_chosen": 3.9288904666900635, "log_odds_ratio": -0.19219791889190674, "logits/chosen": -0.9392151236534119, "logits/rejected": -0.9789811372756958, "logps/chosen": -0.7125809192657471, "logps/rejected": -4.055595397949219, "loss": 1.0686, "nll_loss": 1.1242347955703735, "rewards/accuracies": 0.875, "rewards/chosen": -0.07125809788703918, "rewards/margins": 0.3343014717102051, "rewards/rejected": -0.4055595397949219, "step": 6691 }, { "epoch": 4.082354735397286, "grad_norm": 1.4997155666351318, "learning_rate": 1.4726270667483157e-06, "log_odds_chosen": 0.5650610327720642, "log_odds_ratio": -0.5530155897140503, "logits/chosen": -0.9235518574714661, "logits/rejected": -0.9401353597640991, "logps/chosen": -0.852163553237915, "logps/rejected": -1.298246145248413, "loss": 1.0095, "nll_loss": 0.9814682006835938, "rewards/accuracies": 0.625, "rewards/chosen": -0.08521636575460434, "rewards/margins": 0.04460824280977249, "rewards/rejected": -0.12982460856437683, "step": 6692 }, { "epoch": 4.082964770474303, "grad_norm": 5.948431968688965, "learning_rate": 1.4716472749540723e-06, "log_odds_chosen": 1.8179211616516113, "log_odds_ratio": -0.36470848321914673, "logits/chosen": -0.819150447845459, "logits/rejected": -0.7942057847976685, "logps/chosen": -0.6003760695457458, "logps/rejected": -1.8256855010986328, "loss": 0.9441, "nll_loss": 0.7359651327133179, "rewards/accuracies": 0.875, "rewards/chosen": -0.060037609189748764, "rewards/margins": 0.12253093719482422, "rewards/rejected": -0.18256855010986328, "step": 6693 }, { "epoch": 4.0835748055513195, "grad_norm": 2.625575304031372, "learning_rate": 1.4706674831598284e-06, "log_odds_chosen": 2.014469623565674, "log_odds_ratio": -0.4380374550819397, "logits/chosen": -0.7946096658706665, "logits/rejected": -0.9358660578727722, "logps/chosen": -0.7706540822982788, "logps/rejected": -2.42875337600708, "loss": 0.9853, "nll_loss": 0.9397981762886047, "rewards/accuracies": 0.75, "rewards/chosen": -0.07706540822982788, "rewards/margins": 0.16580994427204132, "rewards/rejected": -0.242875337600708, "step": 6694 }, { "epoch": 4.084184840628336, "grad_norm": 2.163536548614502, "learning_rate": 1.4696876913655847e-06, "log_odds_chosen": 2.2304325103759766, "log_odds_ratio": -0.4203386902809143, "logits/chosen": -0.9247683882713318, "logits/rejected": -0.9796536564826965, "logps/chosen": -0.7144423127174377, "logps/rejected": -2.4837491512298584, "loss": 1.0149, "nll_loss": 0.8639488220214844, "rewards/accuracies": 0.625, "rewards/chosen": -0.07144423574209213, "rewards/margins": 0.17693068087100983, "rewards/rejected": -0.24837490916252136, "step": 6695 }, { "epoch": 4.084794875705353, "grad_norm": 1.72132408618927, "learning_rate": 1.468707899571341e-06, "log_odds_chosen": 1.5558834075927734, "log_odds_ratio": -0.3647245168685913, "logits/chosen": -0.7897657155990601, "logits/rejected": -0.8909381628036499, "logps/chosen": -0.9073117971420288, "logps/rejected": -2.0727505683898926, "loss": 1.0389, "nll_loss": 0.9595073461532593, "rewards/accuracies": 0.875, "rewards/chosen": -0.0907311886548996, "rewards/margins": 0.11654388904571533, "rewards/rejected": -0.20727506279945374, "step": 6696 }, { "epoch": 4.08540491078237, "grad_norm": 3.243896722793579, "learning_rate": 1.4677281077770974e-06, "log_odds_chosen": 1.0671236515045166, "log_odds_ratio": -0.555381715297699, "logits/chosen": -1.0872448682785034, "logits/rejected": -1.031525731086731, "logps/chosen": -0.8296495079994202, "logps/rejected": -1.5928986072540283, "loss": 1.0196, "nll_loss": 0.9516843557357788, "rewards/accuracies": 0.75, "rewards/chosen": -0.0829649567604065, "rewards/margins": 0.07632491737604141, "rewards/rejected": -0.1592898666858673, "step": 6697 }, { "epoch": 4.086014945859387, "grad_norm": 5.314842700958252, "learning_rate": 1.4667483159828535e-06, "log_odds_chosen": 1.8834033012390137, "log_odds_ratio": -0.4061543047428131, "logits/chosen": -0.8005293607711792, "logits/rejected": -0.8705451488494873, "logps/chosen": -0.724288284778595, "logps/rejected": -2.1629221439361572, "loss": 0.9125, "nll_loss": 0.7220693826675415, "rewards/accuracies": 0.75, "rewards/chosen": -0.07242883741855621, "rewards/margins": 0.14386339485645294, "rewards/rejected": -0.21629223227500916, "step": 6698 }, { "epoch": 4.086624980936404, "grad_norm": 1.8580455780029297, "learning_rate": 1.4657685241886096e-06, "log_odds_chosen": 1.5476375818252563, "log_odds_ratio": -0.371958464384079, "logits/chosen": -0.7414600849151611, "logits/rejected": -0.8923945426940918, "logps/chosen": -0.7632818818092346, "logps/rejected": -1.8221995830535889, "loss": 1.0043, "nll_loss": 1.1580400466918945, "rewards/accuracies": 0.75, "rewards/chosen": -0.07632818818092346, "rewards/margins": 0.10589177906513214, "rewards/rejected": -0.1822199672460556, "step": 6699 }, { "epoch": 4.0872350160134205, "grad_norm": 5.167096138000488, "learning_rate": 1.4647887323943662e-06, "log_odds_chosen": 3.0081186294555664, "log_odds_ratio": -0.4668232798576355, "logits/chosen": -1.0496329069137573, "logits/rejected": -1.1172993183135986, "logps/chosen": -0.7962625026702881, "logps/rejected": -3.4720301628112793, "loss": 1.07, "nll_loss": 0.9205527901649475, "rewards/accuracies": 0.75, "rewards/chosen": -0.07962624728679657, "rewards/margins": 0.26757675409317017, "rewards/rejected": -0.34720301628112793, "step": 6700 }, { "epoch": 4.087845051090437, "grad_norm": 15.926424026489258, "learning_rate": 1.4638089406001223e-06, "log_odds_chosen": 1.890953540802002, "log_odds_ratio": -0.25246110558509827, "logits/chosen": -0.7696897983551025, "logits/rejected": -0.9149198532104492, "logps/chosen": -0.5661932826042175, "logps/rejected": -1.8879215717315674, "loss": 0.8265, "nll_loss": 0.6901111602783203, "rewards/accuracies": 1.0, "rewards/chosen": -0.05661933124065399, "rewards/margins": 0.1321728378534317, "rewards/rejected": -0.1887921690940857, "step": 6701 }, { "epoch": 4.088455086167454, "grad_norm": 1.567242980003357, "learning_rate": 1.4628291488058786e-06, "log_odds_chosen": 2.1264147758483887, "log_odds_ratio": -0.3828656077384949, "logits/chosen": -0.8820471167564392, "logits/rejected": -0.9740740060806274, "logps/chosen": -0.7810816764831543, "logps/rejected": -2.6142053604125977, "loss": 1.084, "nll_loss": 0.9364288449287415, "rewards/accuracies": 0.875, "rewards/chosen": -0.07810817658901215, "rewards/margins": 0.18331235647201538, "rewards/rejected": -0.2614205479621887, "step": 6702 }, { "epoch": 4.089065121244472, "grad_norm": 1.5713093280792236, "learning_rate": 1.461849357011635e-06, "log_odds_chosen": 1.7201623916625977, "log_odds_ratio": -0.3344944715499878, "logits/chosen": -0.9331125020980835, "logits/rejected": -1.0034615993499756, "logps/chosen": -0.7545068264007568, "logps/rejected": -1.9415379762649536, "loss": 0.8694, "nll_loss": 0.8888007402420044, "rewards/accuracies": 0.875, "rewards/chosen": -0.07545068860054016, "rewards/margins": 0.11870311945676804, "rewards/rejected": -0.1941538006067276, "step": 6703 }, { "epoch": 4.089675156321489, "grad_norm": 1.3134431838989258, "learning_rate": 1.4608695652173913e-06, "log_odds_chosen": 1.285042643547058, "log_odds_ratio": -0.46696770191192627, "logits/chosen": -0.7956508994102478, "logits/rejected": -0.8726158142089844, "logps/chosen": -0.8696012496948242, "logps/rejected": -1.7279226779937744, "loss": 1.021, "nll_loss": 1.0568331480026245, "rewards/accuracies": 0.75, "rewards/chosen": -0.08696013689041138, "rewards/margins": 0.0858321413397789, "rewards/rejected": -0.17279228568077087, "step": 6704 }, { "epoch": 4.090285191398506, "grad_norm": 1.8260681629180908, "learning_rate": 1.4598897734231474e-06, "log_odds_chosen": 1.6306467056274414, "log_odds_ratio": -0.3022988438606262, "logits/chosen": -0.9060828685760498, "logits/rejected": -0.9213462471961975, "logps/chosen": -0.7960888147354126, "logps/rejected": -2.0880343914031982, "loss": 0.9786, "nll_loss": 1.1258389949798584, "rewards/accuracies": 1.0, "rewards/chosen": -0.07960887998342514, "rewards/margins": 0.12919458746910095, "rewards/rejected": -0.2088034451007843, "step": 6705 }, { "epoch": 4.0908952264755225, "grad_norm": 5.303091049194336, "learning_rate": 1.458909981628904e-06, "log_odds_chosen": 1.253108024597168, "log_odds_ratio": -0.47233086824417114, "logits/chosen": -0.8041388392448425, "logits/rejected": -0.6665711998939514, "logps/chosen": -0.8066545128822327, "logps/rejected": -1.677032232284546, "loss": 1.1893, "nll_loss": 1.1655011177062988, "rewards/accuracies": 0.625, "rewards/chosen": -0.0806654542684555, "rewards/margins": 0.08703777194023132, "rewards/rejected": -0.16770322620868683, "step": 6706 }, { "epoch": 4.091505261552539, "grad_norm": 1.0482447147369385, "learning_rate": 1.45793018983466e-06, "log_odds_chosen": 3.425934314727783, "log_odds_ratio": -0.25913265347480774, "logits/chosen": -0.6346475481987, "logits/rejected": -0.8903704881668091, "logps/chosen": -0.5070533752441406, "logps/rejected": -3.1634883880615234, "loss": 1.0651, "nll_loss": 0.718259334564209, "rewards/accuracies": 1.0, "rewards/chosen": -0.05070533603429794, "rewards/margins": 0.26564347743988037, "rewards/rejected": -0.3163488507270813, "step": 6707 }, { "epoch": 4.092115296629556, "grad_norm": 3.155562400817871, "learning_rate": 1.4569503980404162e-06, "log_odds_chosen": 2.0774435997009277, "log_odds_ratio": -0.2868288457393646, "logits/chosen": -0.9896569848060608, "logits/rejected": -1.0091804265975952, "logps/chosen": -0.8296476602554321, "logps/rejected": -2.433572769165039, "loss": 1.0818, "nll_loss": 0.9539937973022461, "rewards/accuracies": 1.0, "rewards/chosen": -0.08296477049589157, "rewards/margins": 0.16039252281188965, "rewards/rejected": -0.24335725605487823, "step": 6708 }, { "epoch": 4.092725331706573, "grad_norm": 1.2179365158081055, "learning_rate": 1.4559706062461726e-06, "log_odds_chosen": 1.8746891021728516, "log_odds_ratio": -0.2670229971408844, "logits/chosen": -0.928761899471283, "logits/rejected": -0.9646323323249817, "logps/chosen": -0.6769367456436157, "logps/rejected": -2.008183479309082, "loss": 0.9584, "nll_loss": 0.8765773773193359, "rewards/accuracies": 0.875, "rewards/chosen": -0.06769367307424545, "rewards/margins": 0.13312466442584991, "rewards/rejected": -0.20081834495067596, "step": 6709 }, { "epoch": 4.09333536678359, "grad_norm": 1.5987999439239502, "learning_rate": 1.4549908144519289e-06, "log_odds_chosen": 5.767455577850342, "log_odds_ratio": -0.15160749852657318, "logits/chosen": -0.7016148567199707, "logits/rejected": -0.9986329078674316, "logps/chosen": -0.5665405988693237, "logps/rejected": -5.367104530334473, "loss": 0.9789, "nll_loss": 0.7808809876441956, "rewards/accuracies": 0.875, "rewards/chosen": -0.05665406212210655, "rewards/margins": 0.48005640506744385, "rewards/rejected": -0.5367104411125183, "step": 6710 }, { "epoch": 4.093945401860607, "grad_norm": 1.6432313919067383, "learning_rate": 1.4540110226576852e-06, "log_odds_chosen": 2.3002238273620605, "log_odds_ratio": -0.34841254353523254, "logits/chosen": -0.7471810579299927, "logits/rejected": -0.833768367767334, "logps/chosen": -0.6486350297927856, "logps/rejected": -2.210826873779297, "loss": 0.9953, "nll_loss": 0.9006954431533813, "rewards/accuracies": 0.875, "rewards/chosen": -0.06486350297927856, "rewards/margins": 0.15621918439865112, "rewards/rejected": -0.2210826873779297, "step": 6711 }, { "epoch": 4.094555436937624, "grad_norm": 1.8344099521636963, "learning_rate": 1.4530312308634413e-06, "log_odds_chosen": 3.5137271881103516, "log_odds_ratio": -0.39713960886001587, "logits/chosen": -0.704131007194519, "logits/rejected": -1.0102579593658447, "logps/chosen": -0.8185425996780396, "logps/rejected": -3.9197559356689453, "loss": 1.1807, "nll_loss": 0.9975813627243042, "rewards/accuracies": 0.625, "rewards/chosen": -0.08185426145792007, "rewards/margins": 0.3101212978363037, "rewards/rejected": -0.3919755816459656, "step": 6712 }, { "epoch": 4.09516547201464, "grad_norm": 5.01796293258667, "learning_rate": 1.452051439069198e-06, "log_odds_chosen": 1.110780954360962, "log_odds_ratio": -0.7584017515182495, "logits/chosen": -0.9973812103271484, "logits/rejected": -1.0254160165786743, "logps/chosen": -0.9306117296218872, "logps/rejected": -1.4716075658798218, "loss": 1.0627, "nll_loss": 1.1918768882751465, "rewards/accuracies": 0.375, "rewards/chosen": -0.0930611789226532, "rewards/margins": 0.05409958213567734, "rewards/rejected": -0.14716076850891113, "step": 6713 }, { "epoch": 4.095775507091658, "grad_norm": 1.293075680732727, "learning_rate": 1.451071647274954e-06, "log_odds_chosen": 3.2460100650787354, "log_odds_ratio": -0.3557303249835968, "logits/chosen": -0.757785975933075, "logits/rejected": -1.0125778913497925, "logps/chosen": -0.7852681875228882, "logps/rejected": -3.6150155067443848, "loss": 1.0999, "nll_loss": 0.9796243906021118, "rewards/accuracies": 0.875, "rewards/chosen": -0.0785268247127533, "rewards/margins": 0.2829747200012207, "rewards/rejected": -0.3615015745162964, "step": 6714 }, { "epoch": 4.096385542168675, "grad_norm": 1.766945481300354, "learning_rate": 1.4500918554807101e-06, "log_odds_chosen": 4.210569381713867, "log_odds_ratio": -0.21172894537448883, "logits/chosen": -0.8618447780609131, "logits/rejected": -1.1072609424591064, "logps/chosen": -0.7548093795776367, "logps/rejected": -4.281573295593262, "loss": 1.0255, "nll_loss": 0.9506130218505859, "rewards/accuracies": 1.0, "rewards/chosen": -0.07548093795776367, "rewards/margins": 0.3526764214038849, "rewards/rejected": -0.42815735936164856, "step": 6715 }, { "epoch": 4.096995577245692, "grad_norm": 1.5029243230819702, "learning_rate": 1.4491120636864667e-06, "log_odds_chosen": 1.2015705108642578, "log_odds_ratio": -0.41987425088882446, "logits/chosen": -1.1056607961654663, "logits/rejected": -1.0058917999267578, "logps/chosen": -0.8010904788970947, "logps/rejected": -1.6135896444320679, "loss": 1.1507, "nll_loss": 1.2075408697128296, "rewards/accuracies": 0.875, "rewards/chosen": -0.08010904490947723, "rewards/margins": 0.08124992996454239, "rewards/rejected": -0.16135898232460022, "step": 6716 }, { "epoch": 4.097605612322709, "grad_norm": 1.7282339334487915, "learning_rate": 1.4481322718922228e-06, "log_odds_chosen": 4.073338031768799, "log_odds_ratio": -0.2333846241235733, "logits/chosen": -1.0365278720855713, "logits/rejected": -1.0858471393585205, "logps/chosen": -0.9391074776649475, "logps/rejected": -4.496266841888428, "loss": 1.146, "nll_loss": 1.1259665489196777, "rewards/accuracies": 0.875, "rewards/chosen": -0.09391075372695923, "rewards/margins": 0.35571593046188354, "rewards/rejected": -0.4496266841888428, "step": 6717 }, { "epoch": 4.0982156473997255, "grad_norm": 1.3406178951263428, "learning_rate": 1.4471524800979791e-06, "log_odds_chosen": 0.5925017595291138, "log_odds_ratio": -0.6111143827438354, "logits/chosen": -1.0038843154907227, "logits/rejected": -0.8871450424194336, "logps/chosen": -0.9470251202583313, "logps/rejected": -1.3910632133483887, "loss": 1.0413, "nll_loss": 1.1221272945404053, "rewards/accuracies": 0.5, "rewards/chosen": -0.09470251202583313, "rewards/margins": 0.04440383240580559, "rewards/rejected": -0.13910633325576782, "step": 6718 }, { "epoch": 4.098825682476742, "grad_norm": 1.11147940158844, "learning_rate": 1.4461726883037353e-06, "log_odds_chosen": 3.069979190826416, "log_odds_ratio": -0.18297918140888214, "logits/chosen": -0.697911262512207, "logits/rejected": -0.8719775080680847, "logps/chosen": -0.5093603134155273, "logps/rejected": -2.7640161514282227, "loss": 0.8856, "nll_loss": 0.7041922807693481, "rewards/accuracies": 0.875, "rewards/chosen": -0.050936028361320496, "rewards/margins": 0.2254655957221985, "rewards/rejected": -0.2764016389846802, "step": 6719 }, { "epoch": 4.099435717553759, "grad_norm": 1.484309196472168, "learning_rate": 1.4451928965094918e-06, "log_odds_chosen": 3.340467929840088, "log_odds_ratio": -0.26065972447395325, "logits/chosen": -0.9783942103385925, "logits/rejected": -1.0607030391693115, "logps/chosen": -0.7224929928779602, "logps/rejected": -3.159801721572876, "loss": 1.112, "nll_loss": 1.0316733121871948, "rewards/accuracies": 0.875, "rewards/chosen": -0.07224929332733154, "rewards/margins": 0.24373085796833038, "rewards/rejected": -0.31598013639450073, "step": 6720 }, { "epoch": 4.100045752630776, "grad_norm": 7.729186534881592, "learning_rate": 1.444213104715248e-06, "log_odds_chosen": 1.7531543970108032, "log_odds_ratio": -0.51560378074646, "logits/chosen": -0.932708740234375, "logits/rejected": -1.083423137664795, "logps/chosen": -0.8879232406616211, "logps/rejected": -2.357388734817505, "loss": 1.1528, "nll_loss": 1.0568443536758423, "rewards/accuracies": 0.625, "rewards/chosen": -0.08879232406616211, "rewards/margins": 0.14694656431674957, "rewards/rejected": -0.23573890328407288, "step": 6721 }, { "epoch": 4.100655787707793, "grad_norm": 4.621541976928711, "learning_rate": 1.443233312921004e-06, "log_odds_chosen": 2.275473117828369, "log_odds_ratio": -0.42307060956954956, "logits/chosen": -0.9889867901802063, "logits/rejected": -1.0140904188156128, "logps/chosen": -0.983944296836853, "logps/rejected": -3.056220531463623, "loss": 1.2555, "nll_loss": 1.1278574466705322, "rewards/accuracies": 0.75, "rewards/chosen": -0.09839443117380142, "rewards/margins": 0.20722763240337372, "rewards/rejected": -0.30562204122543335, "step": 6722 }, { "epoch": 4.10126582278481, "grad_norm": 1.8814619779586792, "learning_rate": 1.4422535211267606e-06, "log_odds_chosen": 1.8078864812850952, "log_odds_ratio": -0.4507828950881958, "logits/chosen": -0.8887174129486084, "logits/rejected": -0.9848336577415466, "logps/chosen": -0.8802917003631592, "logps/rejected": -2.171635866165161, "loss": 0.9515, "nll_loss": 1.1088569164276123, "rewards/accuracies": 0.625, "rewards/chosen": -0.0880291685461998, "rewards/margins": 0.129134401679039, "rewards/rejected": -0.2171635627746582, "step": 6723 }, { "epoch": 4.101875857861827, "grad_norm": 1.411099910736084, "learning_rate": 1.4412737293325167e-06, "log_odds_chosen": 2.158355951309204, "log_odds_ratio": -0.3240104913711548, "logits/chosen": -0.8046871423721313, "logits/rejected": -0.8823001384735107, "logps/chosen": -0.7265249490737915, "logps/rejected": -2.2131378650665283, "loss": 1.1073, "nll_loss": 0.9908510446548462, "rewards/accuracies": 0.875, "rewards/chosen": -0.07265249639749527, "rewards/margins": 0.1486612856388092, "rewards/rejected": -0.22131380438804626, "step": 6724 }, { "epoch": 4.102485892938844, "grad_norm": 4.176541328430176, "learning_rate": 1.440293937538273e-06, "log_odds_chosen": 2.094787120819092, "log_odds_ratio": -0.3443799614906311, "logits/chosen": -0.824413537979126, "logits/rejected": -0.8533852696418762, "logps/chosen": -0.7695784568786621, "logps/rejected": -2.435875415802002, "loss": 0.9428, "nll_loss": 0.9016839265823364, "rewards/accuracies": 0.75, "rewards/chosen": -0.07695785164833069, "rewards/margins": 0.16662968695163727, "rewards/rejected": -0.24358753859996796, "step": 6725 }, { "epoch": 4.103095928015861, "grad_norm": 12.899310111999512, "learning_rate": 1.4393141457440294e-06, "log_odds_chosen": 3.360677480697632, "log_odds_ratio": -0.19437482953071594, "logits/chosen": -0.8760584592819214, "logits/rejected": -1.0567436218261719, "logps/chosen": -0.43921801447868347, "logps/rejected": -2.846177816390991, "loss": 1.2469, "nll_loss": 1.280763864517212, "rewards/accuracies": 1.0, "rewards/chosen": -0.04392180219292641, "rewards/margins": 0.240695983171463, "rewards/rejected": -0.2846177816390991, "step": 6726 }, { "epoch": 4.103705963092878, "grad_norm": 1.1441059112548828, "learning_rate": 1.4383343539497857e-06, "log_odds_chosen": 1.2201323509216309, "log_odds_ratio": -0.3829178810119629, "logits/chosen": -0.8279512524604797, "logits/rejected": -0.6452924013137817, "logps/chosen": -0.8349498510360718, "logps/rejected": -1.6974531412124634, "loss": 0.9667, "nll_loss": 0.9962767958641052, "rewards/accuracies": 0.75, "rewards/chosen": -0.08349497616291046, "rewards/margins": 0.08625032752752304, "rewards/rejected": -0.1697452962398529, "step": 6727 }, { "epoch": 4.104315998169895, "grad_norm": 8.378671646118164, "learning_rate": 1.4373545621555418e-06, "log_odds_chosen": 1.3052300214767456, "log_odds_ratio": -0.48542624711990356, "logits/chosen": -0.9090614914894104, "logits/rejected": -1.0837514400482178, "logps/chosen": -0.8587381839752197, "logps/rejected": -1.8805062770843506, "loss": 1.0436, "nll_loss": 1.0291056632995605, "rewards/accuracies": 0.75, "rewards/chosen": -0.0858738124370575, "rewards/margins": 0.10217683017253876, "rewards/rejected": -0.18805064260959625, "step": 6728 }, { "epoch": 4.104926033246912, "grad_norm": 2.4795165061950684, "learning_rate": 1.436374770361298e-06, "log_odds_chosen": 0.6559987664222717, "log_odds_ratio": -0.5299224853515625, "logits/chosen": -0.9148668050765991, "logits/rejected": -0.8662922382354736, "logps/chosen": -0.8830434083938599, "logps/rejected": -1.332722544670105, "loss": 1.1099, "nll_loss": 0.9993695616722107, "rewards/accuracies": 0.625, "rewards/chosen": -0.08830434083938599, "rewards/margins": 0.04496791958808899, "rewards/rejected": -0.13327226042747498, "step": 6729 }, { "epoch": 4.105536068323929, "grad_norm": 2.0802087783813477, "learning_rate": 1.4353949785670545e-06, "log_odds_chosen": 0.20752127468585968, "log_odds_ratio": -0.814773678779602, "logits/chosen": -1.0197888612747192, "logits/rejected": -0.958113968372345, "logps/chosen": -1.1203718185424805, "logps/rejected": -1.3061144351959229, "loss": 1.1548, "nll_loss": 1.2046496868133545, "rewards/accuracies": 0.5, "rewards/chosen": -0.11203718185424805, "rewards/margins": 0.01857426017522812, "rewards/rejected": -0.13061144948005676, "step": 6730 }, { "epoch": 4.106146103400945, "grad_norm": 1.1758966445922852, "learning_rate": 1.4344151867728106e-06, "log_odds_chosen": 2.3080086708068848, "log_odds_ratio": -0.38158315420150757, "logits/chosen": -0.6189908385276794, "logits/rejected": -0.8680872321128845, "logps/chosen": -0.6337342858314514, "logps/rejected": -2.4056105613708496, "loss": 0.8227, "nll_loss": 0.8685217499732971, "rewards/accuracies": 0.875, "rewards/chosen": -0.06337342411279678, "rewards/margins": 0.17718765139579773, "rewards/rejected": -0.24056106805801392, "step": 6731 }, { "epoch": 4.106756138477962, "grad_norm": 1.5771287679672241, "learning_rate": 1.433435394978567e-06, "log_odds_chosen": 3.3181793689727783, "log_odds_ratio": -0.20689232647418976, "logits/chosen": -0.9055710434913635, "logits/rejected": -1.1270005702972412, "logps/chosen": -0.6922158002853394, "logps/rejected": -3.2700676918029785, "loss": 1.0517, "nll_loss": 0.8705160021781921, "rewards/accuracies": 0.875, "rewards/chosen": -0.06922158598899841, "rewards/margins": 0.2577851712703705, "rewards/rejected": -0.3270067572593689, "step": 6732 }, { "epoch": 4.107366173554979, "grad_norm": 2.960465669631958, "learning_rate": 1.4324556031843233e-06, "log_odds_chosen": 2.350289821624756, "log_odds_ratio": -0.37842464447021484, "logits/chosen": -0.8718172311782837, "logits/rejected": -0.9908444881439209, "logps/chosen": -0.6705089807510376, "logps/rejected": -2.493932008743286, "loss": 0.9988, "nll_loss": 0.8286280035972595, "rewards/accuracies": 0.75, "rewards/chosen": -0.06705089658498764, "rewards/margins": 0.1823422908782959, "rewards/rejected": -0.24939319491386414, "step": 6733 }, { "epoch": 4.107976208631996, "grad_norm": 1.8998606204986572, "learning_rate": 1.4314758113900796e-06, "log_odds_chosen": 2.2917959690093994, "log_odds_ratio": -0.3944651782512665, "logits/chosen": -0.8681360483169556, "logits/rejected": -0.8966199159622192, "logps/chosen": -0.8743597269058228, "logps/rejected": -2.5907223224639893, "loss": 1.0864, "nll_loss": 1.05377197265625, "rewards/accuracies": 0.75, "rewards/chosen": -0.08743597567081451, "rewards/margins": 0.17163623869419098, "rewards/rejected": -0.2590722441673279, "step": 6734 }, { "epoch": 4.108586243709013, "grad_norm": 5.662785053253174, "learning_rate": 1.4304960195958358e-06, "log_odds_chosen": 1.408545732498169, "log_odds_ratio": -0.47992461919784546, "logits/chosen": -0.8572748899459839, "logits/rejected": -0.9260682463645935, "logps/chosen": -0.8563797473907471, "logps/rejected": -2.0652854442596436, "loss": 1.0711, "nll_loss": 0.9644069671630859, "rewards/accuracies": 0.625, "rewards/chosen": -0.08563798666000366, "rewards/margins": 0.12089057266712189, "rewards/rejected": -0.20652854442596436, "step": 6735 }, { "epoch": 4.1091962787860306, "grad_norm": 1.0758155584335327, "learning_rate": 1.429516227801592e-06, "log_odds_chosen": 1.4351533651351929, "log_odds_ratio": -0.4793351888656616, "logits/chosen": -1.1084257364273071, "logits/rejected": -1.1213940382003784, "logps/chosen": -0.7470797896385193, "logps/rejected": -1.73109769821167, "loss": 0.9912, "nll_loss": 0.9578039050102234, "rewards/accuracies": 0.75, "rewards/chosen": -0.07470797747373581, "rewards/margins": 0.09840178489685059, "rewards/rejected": -0.173109769821167, "step": 6736 }, { "epoch": 4.109806313863047, "grad_norm": 1.6473069190979004, "learning_rate": 1.4285364360073484e-06, "log_odds_chosen": 2.3250925540924072, "log_odds_ratio": -0.31437885761260986, "logits/chosen": -0.915579617023468, "logits/rejected": -0.9517615437507629, "logps/chosen": -0.7353227138519287, "logps/rejected": -2.643890380859375, "loss": 1.0927, "nll_loss": 0.9586029648780823, "rewards/accuracies": 0.875, "rewards/chosen": -0.07353226840496063, "rewards/margins": 0.19085675477981567, "rewards/rejected": -0.2643890380859375, "step": 6737 }, { "epoch": 4.110416348940064, "grad_norm": 1.0690337419509888, "learning_rate": 1.4275566442131046e-06, "log_odds_chosen": 1.6555126905441284, "log_odds_ratio": -0.4754655361175537, "logits/chosen": -0.9999812841415405, "logits/rejected": -0.9952035546302795, "logps/chosen": -0.8510373830795288, "logps/rejected": -2.1613969802856445, "loss": 1.1092, "nll_loss": 1.0273916721343994, "rewards/accuracies": 0.75, "rewards/chosen": -0.08510373532772064, "rewards/margins": 0.1310359537601471, "rewards/rejected": -0.21613970398902893, "step": 6738 }, { "epoch": 4.111026384017081, "grad_norm": 1.2422778606414795, "learning_rate": 1.4265768524188609e-06, "log_odds_chosen": 2.021733283996582, "log_odds_ratio": -0.29325100779533386, "logits/chosen": -0.7990493774414062, "logits/rejected": -0.8095248937606812, "logps/chosen": -0.6457444429397583, "logps/rejected": -2.0807688236236572, "loss": 1.0848, "nll_loss": 0.9232399463653564, "rewards/accuracies": 1.0, "rewards/chosen": -0.06457444280385971, "rewards/margins": 0.14350242912769318, "rewards/rejected": -0.20807689428329468, "step": 6739 }, { "epoch": 4.111636419094098, "grad_norm": 1.976004719734192, "learning_rate": 1.4255970606246172e-06, "log_odds_chosen": 1.873230218887329, "log_odds_ratio": -0.42820098996162415, "logits/chosen": -1.0314688682556152, "logits/rejected": -1.1095995903015137, "logps/chosen": -0.9760230183601379, "logps/rejected": -2.5400593280792236, "loss": 1.1977, "nll_loss": 1.2804981470108032, "rewards/accuracies": 0.625, "rewards/chosen": -0.09760230779647827, "rewards/margins": 0.15640363097190857, "rewards/rejected": -0.25400590896606445, "step": 6740 }, { "epoch": 4.112246454171115, "grad_norm": 1.8285175561904907, "learning_rate": 1.4246172688303736e-06, "log_odds_chosen": 3.1196391582489014, "log_odds_ratio": -0.3686603009700775, "logits/chosen": -0.8835712671279907, "logits/rejected": -1.1120573282241821, "logps/chosen": -0.6842886209487915, "logps/rejected": -3.267343759536743, "loss": 1.1896, "nll_loss": 1.1666233539581299, "rewards/accuracies": 0.75, "rewards/chosen": -0.06842885911464691, "rewards/margins": 0.25830554962158203, "rewards/rejected": -0.32673439383506775, "step": 6741 }, { "epoch": 4.112856489248132, "grad_norm": 1.304679036140442, "learning_rate": 1.4236374770361297e-06, "log_odds_chosen": 2.4694557189941406, "log_odds_ratio": -0.31111738085746765, "logits/chosen": -0.5605278015136719, "logits/rejected": -0.919366717338562, "logps/chosen": -0.6978928446769714, "logps/rejected": -2.691068649291992, "loss": 1.0387, "nll_loss": 0.8580167293548584, "rewards/accuracies": 0.875, "rewards/chosen": -0.06978928297758102, "rewards/margins": 0.1993175894021988, "rewards/rejected": -0.2691068649291992, "step": 6742 }, { "epoch": 4.1134665243251485, "grad_norm": 1.1641812324523926, "learning_rate": 1.4226576852418862e-06, "log_odds_chosen": 2.2053139209747314, "log_odds_ratio": -0.7019621133804321, "logits/chosen": -1.0115715265274048, "logits/rejected": -1.1851651668548584, "logps/chosen": -0.7403430938720703, "logps/rejected": -2.7259862422943115, "loss": 1.0289, "nll_loss": 1.229276418685913, "rewards/accuracies": 0.75, "rewards/chosen": -0.07403431832790375, "rewards/margins": 0.1985643208026886, "rewards/rejected": -0.27259862422943115, "step": 6743 }, { "epoch": 4.114076559402165, "grad_norm": 4.721680641174316, "learning_rate": 1.4216778934476423e-06, "log_odds_chosen": 0.49957275390625, "log_odds_ratio": -0.6250313520431519, "logits/chosen": -1.0403133630752563, "logits/rejected": -0.9805365204811096, "logps/chosen": -1.1057872772216797, "logps/rejected": -1.4661517143249512, "loss": 1.3179, "nll_loss": 1.2744219303131104, "rewards/accuracies": 0.5, "rewards/chosen": -0.11057871580123901, "rewards/margins": 0.036036454141139984, "rewards/rejected": -0.1466151773929596, "step": 6744 }, { "epoch": 4.114686594479182, "grad_norm": 6.50340461730957, "learning_rate": 1.4206981016533985e-06, "log_odds_chosen": 2.4755825996398926, "log_odds_ratio": -0.3457093834877014, "logits/chosen": -0.8850573897361755, "logits/rejected": -1.0262370109558105, "logps/chosen": -0.932112455368042, "logps/rejected": -3.0402140617370605, "loss": 1.1829, "nll_loss": 1.3306066989898682, "rewards/accuracies": 0.875, "rewards/chosen": -0.09321124851703644, "rewards/margins": 0.21081013977527618, "rewards/rejected": -0.304021418094635, "step": 6745 }, { "epoch": 4.1152966295562, "grad_norm": 1.6274700164794922, "learning_rate": 1.419718309859155e-06, "log_odds_chosen": 1.9463365077972412, "log_odds_ratio": -0.3682098090648651, "logits/chosen": -0.9913061857223511, "logits/rejected": -0.9758344888687134, "logps/chosen": -0.8989310264587402, "logps/rejected": -2.5996718406677246, "loss": 1.0809, "nll_loss": 0.9525401592254639, "rewards/accuracies": 0.75, "rewards/chosen": -0.08989310264587402, "rewards/margins": 0.17007407546043396, "rewards/rejected": -0.259967178106308, "step": 6746 }, { "epoch": 4.115906664633217, "grad_norm": 1.2455209493637085, "learning_rate": 1.4187385180649111e-06, "log_odds_chosen": 0.5595157146453857, "log_odds_ratio": -0.5464911460876465, "logits/chosen": -0.8349759578704834, "logits/rejected": -0.9899604320526123, "logps/chosen": -0.9723630547523499, "logps/rejected": -1.4150514602661133, "loss": 1.0663, "nll_loss": 1.0618534088134766, "rewards/accuracies": 0.625, "rewards/chosen": -0.09723630547523499, "rewards/margins": 0.04426884278655052, "rewards/rejected": -0.1415051519870758, "step": 6747 }, { "epoch": 4.116516699710234, "grad_norm": 1.748350739479065, "learning_rate": 1.4177587262706675e-06, "log_odds_chosen": 3.669098377227783, "log_odds_ratio": -0.21818026900291443, "logits/chosen": -0.8553414344787598, "logits/rejected": -1.126248836517334, "logps/chosen": -0.9445193409919739, "logps/rejected": -4.211409568786621, "loss": 1.1431, "nll_loss": 1.1416754722595215, "rewards/accuracies": 0.875, "rewards/chosen": -0.09445193409919739, "rewards/margins": 0.3266890048980713, "rewards/rejected": -0.42114096879959106, "step": 6748 }, { "epoch": 4.11712673478725, "grad_norm": 6.108729362487793, "learning_rate": 1.4167789344764236e-06, "log_odds_chosen": 3.292264938354492, "log_odds_ratio": -0.49259936809539795, "logits/chosen": -0.8387568593025208, "logits/rejected": -0.9012635946273804, "logps/chosen": -0.8469839692115784, "logps/rejected": -3.90651798248291, "loss": 0.9853, "nll_loss": 1.1759706735610962, "rewards/accuracies": 0.75, "rewards/chosen": -0.0846984013915062, "rewards/margins": 0.30595338344573975, "rewards/rejected": -0.39065179228782654, "step": 6749 }, { "epoch": 4.117736769864267, "grad_norm": 1.6015429496765137, "learning_rate": 1.4157991426821801e-06, "log_odds_chosen": 2.319495439529419, "log_odds_ratio": -0.41971099376678467, "logits/chosen": -0.9257117509841919, "logits/rejected": -1.0476081371307373, "logps/chosen": -0.9223951101303101, "logps/rejected": -2.7813243865966797, "loss": 1.1872, "nll_loss": 1.1923843622207642, "rewards/accuracies": 0.75, "rewards/chosen": -0.09223951399326324, "rewards/margins": 0.18589292466640472, "rewards/rejected": -0.27813243865966797, "step": 6750 }, { "epoch": 4.118346804941284, "grad_norm": 10.523116111755371, "learning_rate": 1.4148193508879363e-06, "log_odds_chosen": -0.15383252501487732, "log_odds_ratio": -0.7966172695159912, "logits/chosen": -1.011250615119934, "logits/rejected": -1.019469976425171, "logps/chosen": -1.2600805759429932, "logps/rejected": -1.152392864227295, "loss": 1.2079, "nll_loss": 1.388794183731079, "rewards/accuracies": 0.375, "rewards/chosen": -0.1260080486536026, "rewards/margins": -0.010768767446279526, "rewards/rejected": -0.11523928493261337, "step": 6751 }, { "epoch": 4.118956840018301, "grad_norm": 2.162673234939575, "learning_rate": 1.4138395590936924e-06, "log_odds_chosen": 1.5546287298202515, "log_odds_ratio": -0.5486451387405396, "logits/chosen": -0.9081064462661743, "logits/rejected": -1.107810378074646, "logps/chosen": -0.7628421783447266, "logps/rejected": -2.028501510620117, "loss": 0.9815, "nll_loss": 0.9602267146110535, "rewards/accuracies": 0.625, "rewards/chosen": -0.07628421485424042, "rewards/margins": 0.12656594812870026, "rewards/rejected": -0.20285016298294067, "step": 6752 }, { "epoch": 4.119566875095318, "grad_norm": 1.9168962240219116, "learning_rate": 1.412859767299449e-06, "log_odds_chosen": 2.811789035797119, "log_odds_ratio": -0.4299284815788269, "logits/chosen": -0.7465149164199829, "logits/rejected": -0.9291296601295471, "logps/chosen": -0.6087113618850708, "logps/rejected": -3.048307418823242, "loss": 1.1355, "nll_loss": 0.868742048740387, "rewards/accuracies": 0.625, "rewards/chosen": -0.06087113916873932, "rewards/margins": 0.24395960569381714, "rewards/rejected": -0.30483075976371765, "step": 6753 }, { "epoch": 4.120176910172335, "grad_norm": 2.0321195125579834, "learning_rate": 1.411879975505205e-06, "log_odds_chosen": 1.1204434633255005, "log_odds_ratio": -0.5863101482391357, "logits/chosen": -0.9504272937774658, "logits/rejected": -0.9182088971138, "logps/chosen": -0.7410802841186523, "logps/rejected": -1.5126457214355469, "loss": 0.8651, "nll_loss": 0.8910669684410095, "rewards/accuracies": 0.5, "rewards/chosen": -0.07410801947116852, "rewards/margins": 0.07715655118227005, "rewards/rejected": -0.15126457810401917, "step": 6754 }, { "epoch": 4.1207869452493515, "grad_norm": 1.5084258317947388, "learning_rate": 1.4109001837109614e-06, "log_odds_chosen": 4.989009857177734, "log_odds_ratio": -0.06664052605628967, "logits/chosen": -0.6535458564758301, "logits/rejected": -1.003068208694458, "logps/chosen": -0.5507657527923584, "logps/rejected": -4.489573001861572, "loss": 0.895, "nll_loss": 0.6784594058990479, "rewards/accuracies": 1.0, "rewards/chosen": -0.05507658049464226, "rewards/margins": 0.3938807249069214, "rewards/rejected": -0.44895732402801514, "step": 6755 }, { "epoch": 4.121396980326368, "grad_norm": 1.408288836479187, "learning_rate": 1.4099203919167177e-06, "log_odds_chosen": 0.19310586154460907, "log_odds_ratio": -0.6648943424224854, "logits/chosen": -0.6214303970336914, "logits/rejected": -0.6879310011863708, "logps/chosen": -0.7828168869018555, "logps/rejected": -0.8398135900497437, "loss": 1.0114, "nll_loss": 0.8252502083778381, "rewards/accuracies": 0.625, "rewards/chosen": -0.07828168570995331, "rewards/margins": 0.005699682515114546, "rewards/rejected": -0.08398137241601944, "step": 6756 }, { "epoch": 4.122007015403386, "grad_norm": 1.1673351526260376, "learning_rate": 1.408940600122474e-06, "log_odds_chosen": 1.8121951818466187, "log_odds_ratio": -0.3027242422103882, "logits/chosen": -0.744566798210144, "logits/rejected": -0.8335162401199341, "logps/chosen": -0.7763731479644775, "logps/rejected": -2.1172752380371094, "loss": 1.0929, "nll_loss": 1.0042437314987183, "rewards/accuracies": 0.875, "rewards/chosen": -0.07763731479644775, "rewards/margins": 0.13409020006656647, "rewards/rejected": -0.21172752976417542, "step": 6757 }, { "epoch": 4.122617050480403, "grad_norm": 2.006218194961548, "learning_rate": 1.4079608083282302e-06, "log_odds_chosen": 1.2364709377288818, "log_odds_ratio": -0.4179569184780121, "logits/chosen": -1.0900105237960815, "logits/rejected": -0.961148738861084, "logps/chosen": -1.0793615579605103, "logps/rejected": -1.8476046323776245, "loss": 1.0687, "nll_loss": 1.2681695222854614, "rewards/accuracies": 0.75, "rewards/chosen": -0.10793615877628326, "rewards/margins": 0.07682430744171143, "rewards/rejected": -0.1847604662179947, "step": 6758 }, { "epoch": 4.12322708555742, "grad_norm": 1.7499693632125854, "learning_rate": 1.4069810165339863e-06, "log_odds_chosen": 1.7510210275650024, "log_odds_ratio": -0.3428769111633301, "logits/chosen": -0.8512054085731506, "logits/rejected": -0.9586646556854248, "logps/chosen": -0.9846110343933105, "logps/rejected": -2.385712146759033, "loss": 1.1784, "nll_loss": 1.2679551839828491, "rewards/accuracies": 0.875, "rewards/chosen": -0.0984610989689827, "rewards/margins": 0.14011013507843018, "rewards/rejected": -0.23857124149799347, "step": 6759 }, { "epoch": 4.123837120634437, "grad_norm": 1.9334890842437744, "learning_rate": 1.4060012247397428e-06, "log_odds_chosen": 2.5749640464782715, "log_odds_ratio": -0.4317398965358734, "logits/chosen": -0.9622131586074829, "logits/rejected": -1.0602848529815674, "logps/chosen": -1.051093578338623, "logps/rejected": -3.3707854747772217, "loss": 1.1101, "nll_loss": 1.2599488496780396, "rewards/accuracies": 0.75, "rewards/chosen": -0.10510937124490738, "rewards/margins": 0.2319691926240921, "rewards/rejected": -0.3370785713195801, "step": 6760 }, { "epoch": 4.1244471557114535, "grad_norm": 1.5797330141067505, "learning_rate": 1.405021432945499e-06, "log_odds_chosen": 2.9198849201202393, "log_odds_ratio": -0.3668573796749115, "logits/chosen": -0.7459239363670349, "logits/rejected": -0.8101612329483032, "logps/chosen": -0.9425230026245117, "logps/rejected": -3.5542538166046143, "loss": 1.0691, "nll_loss": 1.1369774341583252, "rewards/accuracies": 0.875, "rewards/chosen": -0.09425230324268341, "rewards/margins": 0.2611730694770813, "rewards/rejected": -0.3554253876209259, "step": 6761 }, { "epoch": 4.12505719078847, "grad_norm": 5.5903639793396, "learning_rate": 1.4040416411512553e-06, "log_odds_chosen": 1.2551944255828857, "log_odds_ratio": -0.4915248155593872, "logits/chosen": -0.9126415252685547, "logits/rejected": -0.8364195823669434, "logps/chosen": -1.0058335065841675, "logps/rejected": -1.9640448093414307, "loss": 1.0887, "nll_loss": 1.142944097518921, "rewards/accuracies": 0.625, "rewards/chosen": -0.10058335959911346, "rewards/margins": 0.09582113474607468, "rewards/rejected": -0.19640448689460754, "step": 6762 }, { "epoch": 4.125667225865487, "grad_norm": 1.849491834640503, "learning_rate": 1.4030618493570116e-06, "log_odds_chosen": 1.4383373260498047, "log_odds_ratio": -0.6188713312149048, "logits/chosen": -1.0333983898162842, "logits/rejected": -1.0612030029296875, "logps/chosen": -1.1452009677886963, "logps/rejected": -2.453122854232788, "loss": 1.2048, "nll_loss": 1.4286537170410156, "rewards/accuracies": 0.375, "rewards/chosen": -0.1145201027393341, "rewards/margins": 0.13079218566417694, "rewards/rejected": -0.24531228840351105, "step": 6763 }, { "epoch": 4.126277260942504, "grad_norm": 1.8886826038360596, "learning_rate": 1.402082057562768e-06, "log_odds_chosen": 2.5680010318756104, "log_odds_ratio": -0.33446618914604187, "logits/chosen": -0.832780122756958, "logits/rejected": -1.0623433589935303, "logps/chosen": -0.8284858465194702, "logps/rejected": -2.8697376251220703, "loss": 1.0024, "nll_loss": 0.9938653707504272, "rewards/accuracies": 0.875, "rewards/chosen": -0.08284858614206314, "rewards/margins": 0.20412518084049225, "rewards/rejected": -0.286973774433136, "step": 6764 }, { "epoch": 4.126887296019521, "grad_norm": 4.05986213684082, "learning_rate": 1.401102265768524e-06, "log_odds_chosen": 1.4960826635360718, "log_odds_ratio": -0.4442116320133209, "logits/chosen": -0.9374198913574219, "logits/rejected": -0.9755444526672363, "logps/chosen": -0.7872951030731201, "logps/rejected": -2.054044008255005, "loss": 1.0403, "nll_loss": 1.0496151447296143, "rewards/accuracies": 0.875, "rewards/chosen": -0.07872951030731201, "rewards/margins": 0.12667489051818848, "rewards/rejected": -0.2054044008255005, "step": 6765 }, { "epoch": 4.127497331096538, "grad_norm": 1.1215283870697021, "learning_rate": 1.4001224739742802e-06, "log_odds_chosen": 1.7479169368743896, "log_odds_ratio": -0.4826110601425171, "logits/chosen": -0.8755972385406494, "logits/rejected": -1.0252101421356201, "logps/chosen": -0.6728370785713196, "logps/rejected": -1.9960522651672363, "loss": 1.118, "nll_loss": 0.9412003755569458, "rewards/accuracies": 0.75, "rewards/chosen": -0.06728371232748032, "rewards/margins": 0.1323215216398239, "rewards/rejected": -0.19960522651672363, "step": 6766 }, { "epoch": 4.128107366173555, "grad_norm": 3.416069746017456, "learning_rate": 1.3991426821800368e-06, "log_odds_chosen": 0.7467530965805054, "log_odds_ratio": -0.5573667287826538, "logits/chosen": -1.010590672492981, "logits/rejected": -1.039167881011963, "logps/chosen": -0.8223410844802856, "logps/rejected": -1.3793691396713257, "loss": 1.0196, "nll_loss": 1.0599002838134766, "rewards/accuracies": 0.625, "rewards/chosen": -0.08223410695791245, "rewards/margins": 0.055702801793813705, "rewards/rejected": -0.13793690502643585, "step": 6767 }, { "epoch": 4.128717401250572, "grad_norm": 1.62971830368042, "learning_rate": 1.3981628903857929e-06, "log_odds_chosen": 1.3704205751419067, "log_odds_ratio": -0.27702564001083374, "logits/chosen": -0.6607769727706909, "logits/rejected": -0.6206715703010559, "logps/chosen": -0.8394140601158142, "logps/rejected": -1.8358197212219238, "loss": 1.0111, "nll_loss": 0.9138994812965393, "rewards/accuracies": 1.0, "rewards/chosen": -0.08394141495227814, "rewards/margins": 0.09964057058095932, "rewards/rejected": -0.18358197808265686, "step": 6768 }, { "epoch": 4.129327436327589, "grad_norm": 4.7047295570373535, "learning_rate": 1.3971830985915492e-06, "log_odds_chosen": 2.17795729637146, "log_odds_ratio": -0.4784563183784485, "logits/chosen": -0.9448373913764954, "logits/rejected": -1.0110394954681396, "logps/chosen": -0.8740679621696472, "logps/rejected": -2.6456804275512695, "loss": 1.1599, "nll_loss": 1.1797165870666504, "rewards/accuracies": 0.625, "rewards/chosen": -0.08740679919719696, "rewards/margins": 0.17716123163700104, "rewards/rejected": -0.264568030834198, "step": 6769 }, { "epoch": 4.129937471404606, "grad_norm": 3.806837558746338, "learning_rate": 1.3962033067973055e-06, "log_odds_chosen": 2.704730987548828, "log_odds_ratio": -0.28010132908821106, "logits/chosen": -0.6929865479469299, "logits/rejected": -0.8065610527992249, "logps/chosen": -0.6523216962814331, "logps/rejected": -2.69535493850708, "loss": 1.104, "nll_loss": 0.9362452030181885, "rewards/accuracies": 0.875, "rewards/chosen": -0.06523217260837555, "rewards/margins": 0.2043033242225647, "rewards/rejected": -0.26953548192977905, "step": 6770 }, { "epoch": 4.130547506481623, "grad_norm": 5.987960338592529, "learning_rate": 1.3952235150030619e-06, "log_odds_chosen": 1.1657779216766357, "log_odds_ratio": -0.4033304452896118, "logits/chosen": -0.7379563450813293, "logits/rejected": -0.7786442041397095, "logps/chosen": -0.8586788177490234, "logps/rejected": -1.6617056131362915, "loss": 0.9426, "nll_loss": 1.0116472244262695, "rewards/accuracies": 0.75, "rewards/chosen": -0.08586788177490234, "rewards/margins": 0.08030267059803009, "rewards/rejected": -0.16617056727409363, "step": 6771 }, { "epoch": 4.13115754155864, "grad_norm": 1.3089200258255005, "learning_rate": 1.394243723208818e-06, "log_odds_chosen": 2.5724124908447266, "log_odds_ratio": -0.15710300207138062, "logits/chosen": -1.0158417224884033, "logits/rejected": -1.0463775396347046, "logps/chosen": -0.7786939144134521, "logps/rejected": -2.7609171867370605, "loss": 1.1774, "nll_loss": 0.995374858379364, "rewards/accuracies": 1.0, "rewards/chosen": -0.07786940038204193, "rewards/margins": 0.1982223093509674, "rewards/rejected": -0.27609172463417053, "step": 6772 }, { "epoch": 4.1317675766356565, "grad_norm": 4.549338340759277, "learning_rate": 1.3932639314145743e-06, "log_odds_chosen": 3.8411881923675537, "log_odds_ratio": -0.21058836579322815, "logits/chosen": -0.8716263771057129, "logits/rejected": -1.0418840646743774, "logps/chosen": -0.6707737445831299, "logps/rejected": -3.866799831390381, "loss": 1.1321, "nll_loss": 0.9081308841705322, "rewards/accuracies": 0.875, "rewards/chosen": -0.06707737594842911, "rewards/margins": 0.3196026086807251, "rewards/rejected": -0.386680006980896, "step": 6773 }, { "epoch": 4.132377611712673, "grad_norm": 1.3943531513214111, "learning_rate": 1.3922841396203307e-06, "log_odds_chosen": 3.9998302459716797, "log_odds_ratio": -0.253073126077652, "logits/chosen": -0.7969317436218262, "logits/rejected": -0.9429613947868347, "logps/chosen": -0.7216467261314392, "logps/rejected": -4.165287494659424, "loss": 0.9443, "nll_loss": 0.8743033409118652, "rewards/accuracies": 0.875, "rewards/chosen": -0.07216467708349228, "rewards/margins": 0.34436410665512085, "rewards/rejected": -0.4165287911891937, "step": 6774 }, { "epoch": 4.13298764678969, "grad_norm": 9.27368450164795, "learning_rate": 1.3913043478260868e-06, "log_odds_chosen": 1.0024452209472656, "log_odds_ratio": -0.48761722445487976, "logits/chosen": -1.0980494022369385, "logits/rejected": -1.147056221961975, "logps/chosen": -0.8892275094985962, "logps/rejected": -1.5251080989837646, "loss": 0.968, "nll_loss": 1.1628503799438477, "rewards/accuracies": 0.75, "rewards/chosen": -0.08892275393009186, "rewards/margins": 0.06358806043863297, "rewards/rejected": -0.15251082181930542, "step": 6775 }, { "epoch": 4.133597681866707, "grad_norm": 1.7544622421264648, "learning_rate": 1.3903245560318431e-06, "log_odds_chosen": 1.5963263511657715, "log_odds_ratio": -0.4403209686279297, "logits/chosen": -0.9414986371994019, "logits/rejected": -1.0026706457138062, "logps/chosen": -0.8662133812904358, "logps/rejected": -2.15962553024292, "loss": 1.048, "nll_loss": 1.101450800895691, "rewards/accuracies": 0.625, "rewards/chosen": -0.08662134408950806, "rewards/margins": 0.12934119999408722, "rewards/rejected": -0.21596254408359528, "step": 6776 }, { "epoch": 4.134207716943724, "grad_norm": 3.366288423538208, "learning_rate": 1.3893447642375995e-06, "log_odds_chosen": 1.758005142211914, "log_odds_ratio": -0.3175469934940338, "logits/chosen": -0.6560128331184387, "logits/rejected": -0.6787843108177185, "logps/chosen": -0.5610561370849609, "logps/rejected": -1.6841919422149658, "loss": 0.8705, "nll_loss": 0.6842367053031921, "rewards/accuracies": 0.75, "rewards/chosen": -0.056105609983205795, "rewards/margins": 0.11231358349323273, "rewards/rejected": -0.16841918230056763, "step": 6777 }, { "epoch": 4.134817752020741, "grad_norm": 2.6841535568237305, "learning_rate": 1.3883649724433558e-06, "log_odds_chosen": 1.7119909524917603, "log_odds_ratio": -0.49308741092681885, "logits/chosen": -0.8434137105941772, "logits/rejected": -0.9083280563354492, "logps/chosen": -0.7593811750411987, "logps/rejected": -2.0335235595703125, "loss": 1.0583, "nll_loss": 1.0869674682617188, "rewards/accuracies": 0.625, "rewards/chosen": -0.07593811303377151, "rewards/margins": 0.12741424143314362, "rewards/rejected": -0.20335234701633453, "step": 6778 }, { "epoch": 4.1354277870977585, "grad_norm": 1.3904048204421997, "learning_rate": 1.387385180649112e-06, "log_odds_chosen": 1.1213780641555786, "log_odds_ratio": -0.5537136793136597, "logits/chosen": -0.48979297280311584, "logits/rejected": -0.5658837556838989, "logps/chosen": -0.7417211532592773, "logps/rejected": -1.6968342065811157, "loss": 1.1106, "nll_loss": 0.9038222432136536, "rewards/accuracies": 0.5, "rewards/chosen": -0.07417211681604385, "rewards/margins": 0.09551131725311279, "rewards/rejected": -0.16968342661857605, "step": 6779 }, { "epoch": 4.136037822174775, "grad_norm": 1.523905634880066, "learning_rate": 1.3864053888548683e-06, "log_odds_chosen": 3.4976463317871094, "log_odds_ratio": -0.219088613986969, "logits/chosen": -0.8413941860198975, "logits/rejected": -1.0363943576812744, "logps/chosen": -0.6834480166435242, "logps/rejected": -3.2107443809509277, "loss": 1.1159, "nll_loss": 1.1172102689743042, "rewards/accuracies": 1.0, "rewards/chosen": -0.06834480166435242, "rewards/margins": 0.2527296245098114, "rewards/rejected": -0.3210744261741638, "step": 6780 }, { "epoch": 4.136647857251792, "grad_norm": 1.4100393056869507, "learning_rate": 1.3854255970606246e-06, "log_odds_chosen": 3.0372114181518555, "log_odds_ratio": -0.23938700556755066, "logits/chosen": -0.7023050785064697, "logits/rejected": -0.9109585285186768, "logps/chosen": -0.6438581943511963, "logps/rejected": -3.1361083984375, "loss": 0.9553, "nll_loss": 0.8621829748153687, "rewards/accuracies": 0.875, "rewards/chosen": -0.06438582390546799, "rewards/margins": 0.24922502040863037, "rewards/rejected": -0.31361085176467896, "step": 6781 }, { "epoch": 4.137257892328809, "grad_norm": 8.649250984191895, "learning_rate": 1.3844458052663807e-06, "log_odds_chosen": 3.004694938659668, "log_odds_ratio": -0.3976726830005646, "logits/chosen": -0.9993550777435303, "logits/rejected": -1.1432690620422363, "logps/chosen": -0.9051392674446106, "logps/rejected": -3.482401132583618, "loss": 1.1261, "nll_loss": 1.1644287109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0905139297246933, "rewards/margins": 0.25772619247436523, "rewards/rejected": -0.34824010729789734, "step": 6782 }, { "epoch": 4.137867927405826, "grad_norm": 1.6949900388717651, "learning_rate": 1.3834660134721373e-06, "log_odds_chosen": 2.8164613246917725, "log_odds_ratio": -0.30621254444122314, "logits/chosen": -0.843284547328949, "logits/rejected": -0.9113630056381226, "logps/chosen": -0.7554532289505005, "logps/rejected": -3.0415494441986084, "loss": 0.8888, "nll_loss": 0.8603323101997375, "rewards/accuracies": 0.875, "rewards/chosen": -0.07554532587528229, "rewards/margins": 0.2286096066236496, "rewards/rejected": -0.3041549623012543, "step": 6783 }, { "epoch": 4.138477962482843, "grad_norm": 1.9719352722167969, "learning_rate": 1.3824862216778934e-06, "log_odds_chosen": 3.179058313369751, "log_odds_ratio": -0.24575823545455933, "logits/chosen": -0.907100260257721, "logits/rejected": -1.0604807138442993, "logps/chosen": -0.6073301434516907, "logps/rejected": -3.0846822261810303, "loss": 0.9271, "nll_loss": 0.7586237192153931, "rewards/accuracies": 0.75, "rewards/chosen": -0.06073301285505295, "rewards/margins": 0.24773521721363068, "rewards/rejected": -0.308468222618103, "step": 6784 }, { "epoch": 4.13908799755986, "grad_norm": 16.179826736450195, "learning_rate": 1.3815064298836497e-06, "log_odds_chosen": 0.8270235657691956, "log_odds_ratio": -0.46740230917930603, "logits/chosen": -0.9834170341491699, "logits/rejected": -1.104996919631958, "logps/chosen": -0.9225778579711914, "logps/rejected": -1.4551138877868652, "loss": 1.1593, "nll_loss": 0.9567362070083618, "rewards/accuracies": 0.75, "rewards/chosen": -0.0922577902674675, "rewards/margins": 0.053253598511219025, "rewards/rejected": -0.14551138877868652, "step": 6785 }, { "epoch": 4.139698032636876, "grad_norm": 1.746055006980896, "learning_rate": 1.3805266380894058e-06, "log_odds_chosen": 0.9637980461120605, "log_odds_ratio": -0.5174700021743774, "logits/chosen": -0.7425858974456787, "logits/rejected": -0.959842324256897, "logps/chosen": -0.8848201036453247, "logps/rejected": -1.6074438095092773, "loss": 1.1431, "nll_loss": 0.9935963749885559, "rewards/accuracies": 0.5, "rewards/chosen": -0.08848202228546143, "rewards/margins": 0.07226236164569855, "rewards/rejected": -0.16074438393115997, "step": 6786 }, { "epoch": 4.140308067713893, "grad_norm": 3.1868557929992676, "learning_rate": 1.3795468462951622e-06, "log_odds_chosen": 1.5233197212219238, "log_odds_ratio": -0.37406808137893677, "logits/chosen": -0.8120036721229553, "logits/rejected": -0.7792467474937439, "logps/chosen": -0.7866238951683044, "logps/rejected": -2.0136117935180664, "loss": 1.0467, "nll_loss": 1.1430704593658447, "rewards/accuracies": 1.0, "rewards/chosen": -0.07866239547729492, "rewards/margins": 0.12269878387451172, "rewards/rejected": -0.20136117935180664, "step": 6787 }, { "epoch": 4.14091810279091, "grad_norm": 4.349508285522461, "learning_rate": 1.3785670545009185e-06, "log_odds_chosen": 1.6193996667861938, "log_odds_ratio": -0.38703739643096924, "logits/chosen": -0.7803200483322144, "logits/rejected": -0.9094496965408325, "logps/chosen": -0.7500903606414795, "logps/rejected": -2.0097742080688477, "loss": 1.0769, "nll_loss": 0.9114328026771545, "rewards/accuracies": 0.625, "rewards/chosen": -0.07500903308391571, "rewards/margins": 0.1259683519601822, "rewards/rejected": -0.2009773999452591, "step": 6788 }, { "epoch": 4.141528137867928, "grad_norm": 1.7912654876708984, "learning_rate": 1.3775872627066746e-06, "log_odds_chosen": 3.1280593872070312, "log_odds_ratio": -0.2623554468154907, "logits/chosen": -0.733113169670105, "logits/rejected": -0.9701628684997559, "logps/chosen": -0.5899276733398438, "logps/rejected": -2.851630687713623, "loss": 0.9446, "nll_loss": 1.0197486877441406, "rewards/accuracies": 0.875, "rewards/chosen": -0.05899277329444885, "rewards/margins": 0.22617027163505554, "rewards/rejected": -0.2851630449295044, "step": 6789 }, { "epoch": 4.142138172944945, "grad_norm": 1.8074506521224976, "learning_rate": 1.3766074709124312e-06, "log_odds_chosen": 1.246173620223999, "log_odds_ratio": -0.44199612736701965, "logits/chosen": -0.8180701732635498, "logits/rejected": -0.8152414560317993, "logps/chosen": -0.8097425699234009, "logps/rejected": -1.7510473728179932, "loss": 1.066, "nll_loss": 1.019932508468628, "rewards/accuracies": 0.75, "rewards/chosen": -0.08097425848245621, "rewards/margins": 0.0941304862499237, "rewards/rejected": -0.1751047521829605, "step": 6790 }, { "epoch": 4.1427482080219615, "grad_norm": 1.6569335460662842, "learning_rate": 1.3756276791181873e-06, "log_odds_chosen": 1.2907441854476929, "log_odds_ratio": -0.5759171843528748, "logits/chosen": -1.1040343046188354, "logits/rejected": -1.002495288848877, "logps/chosen": -1.0067670345306396, "logps/rejected": -1.8931077718734741, "loss": 1.2103, "nll_loss": 1.3055839538574219, "rewards/accuracies": 0.625, "rewards/chosen": -0.10067670047283173, "rewards/margins": 0.08863408863544464, "rewards/rejected": -0.18931077420711517, "step": 6791 }, { "epoch": 4.143358243098978, "grad_norm": 1.3245034217834473, "learning_rate": 1.3746478873239436e-06, "log_odds_chosen": 2.622525215148926, "log_odds_ratio": -0.3121163547039032, "logits/chosen": -0.8292819857597351, "logits/rejected": -0.8931555151939392, "logps/chosen": -0.7022148966789246, "logps/rejected": -2.8319756984710693, "loss": 1.0431, "nll_loss": 0.989323616027832, "rewards/accuracies": 0.75, "rewards/chosen": -0.07022149115800858, "rewards/margins": 0.2129760980606079, "rewards/rejected": -0.2831975817680359, "step": 6792 }, { "epoch": 4.143968278175995, "grad_norm": 1.4824572801589966, "learning_rate": 1.3736680955297e-06, "log_odds_chosen": 1.472064733505249, "log_odds_ratio": -0.43027034401893616, "logits/chosen": -0.8821839094161987, "logits/rejected": -0.95112544298172, "logps/chosen": -0.7051985263824463, "logps/rejected": -1.8086304664611816, "loss": 1.0758, "nll_loss": 0.9609301090240479, "rewards/accuracies": 0.625, "rewards/chosen": -0.07051984965801239, "rewards/margins": 0.11034318804740906, "rewards/rejected": -0.18086305260658264, "step": 6793 }, { "epoch": 4.144578313253012, "grad_norm": 1.438971757888794, "learning_rate": 1.3726883037354563e-06, "log_odds_chosen": 1.022101640701294, "log_odds_ratio": -0.5170270204544067, "logits/chosen": -0.8535645008087158, "logits/rejected": -0.8274478912353516, "logps/chosen": -0.9095731377601624, "logps/rejected": -1.6120814085006714, "loss": 1.0909, "nll_loss": 1.2987004518508911, "rewards/accuracies": 0.5, "rewards/chosen": -0.09095731377601624, "rewards/margins": 0.07025083899497986, "rewards/rejected": -0.1612081378698349, "step": 6794 }, { "epoch": 4.145188348330029, "grad_norm": 1.6769498586654663, "learning_rate": 1.3717085119412124e-06, "log_odds_chosen": 1.0357327461242676, "log_odds_ratio": -0.416395902633667, "logits/chosen": -0.9127063751220703, "logits/rejected": -0.8971203565597534, "logps/chosen": -0.6324042081832886, "logps/rejected": -1.0929319858551025, "loss": 1.1483, "nll_loss": 1.0774056911468506, "rewards/accuracies": 0.875, "rewards/chosen": -0.0632404237985611, "rewards/margins": 0.046052780002355576, "rewards/rejected": -0.10929320007562637, "step": 6795 }, { "epoch": 4.145798383407046, "grad_norm": 1.5520591735839844, "learning_rate": 1.3707287201469685e-06, "log_odds_chosen": 1.3284064531326294, "log_odds_ratio": -0.34446579217910767, "logits/chosen": -0.8552958369255066, "logits/rejected": -0.8178150057792664, "logps/chosen": -0.9673567414283752, "logps/rejected": -1.9730199575424194, "loss": 0.9891, "nll_loss": 1.0488992929458618, "rewards/accuracies": 0.875, "rewards/chosen": -0.09673567861318588, "rewards/margins": 0.1005663052201271, "rewards/rejected": -0.19730199873447418, "step": 6796 }, { "epoch": 4.146408418484063, "grad_norm": 8.638785362243652, "learning_rate": 1.369748928352725e-06, "log_odds_chosen": 4.959002494812012, "log_odds_ratio": -0.16545502841472626, "logits/chosen": -0.6489277482032776, "logits/rejected": -0.7630904316902161, "logps/chosen": -0.5705244541168213, "logps/rejected": -4.720562934875488, "loss": 0.9617, "nll_loss": 0.7879676818847656, "rewards/accuracies": 0.875, "rewards/chosen": -0.05705244839191437, "rewards/margins": 0.41500383615493774, "rewards/rejected": -0.4720562696456909, "step": 6797 }, { "epoch": 4.1470184535610795, "grad_norm": 1.3432830572128296, "learning_rate": 1.3687691365584812e-06, "log_odds_chosen": 1.597604751586914, "log_odds_ratio": -0.2762205898761749, "logits/chosen": -0.7426607608795166, "logits/rejected": -0.7817317247390747, "logps/chosen": -0.49220603704452515, "logps/rejected": -1.4149833917617798, "loss": 0.9814, "nll_loss": 0.7420264482498169, "rewards/accuracies": 1.0, "rewards/chosen": -0.049220602959394455, "rewards/margins": 0.09227775037288666, "rewards/rejected": -0.1414983570575714, "step": 6798 }, { "epoch": 4.147628488638096, "grad_norm": 2.789365530014038, "learning_rate": 1.3677893447642375e-06, "log_odds_chosen": 1.5586806535720825, "log_odds_ratio": -0.6036550402641296, "logits/chosen": -1.0279262065887451, "logits/rejected": -1.1008349657058716, "logps/chosen": -0.8166444301605225, "logps/rejected": -2.1575043201446533, "loss": 1.0869, "nll_loss": 1.0804917812347412, "rewards/accuracies": 0.625, "rewards/chosen": -0.08166444301605225, "rewards/margins": 0.1340859830379486, "rewards/rejected": -0.21575042605400085, "step": 6799 }, { "epoch": 4.148238523715114, "grad_norm": 1.761078953742981, "learning_rate": 1.3668095529699939e-06, "log_odds_chosen": 1.4139679670333862, "log_odds_ratio": -0.6604857444763184, "logits/chosen": -1.0554149150848389, "logits/rejected": -1.0539741516113281, "logps/chosen": -0.8798952102661133, "logps/rejected": -1.9699078798294067, "loss": 1.3089, "nll_loss": 1.3443630933761597, "rewards/accuracies": 0.5, "rewards/chosen": -0.08798952400684357, "rewards/margins": 0.1090012639760971, "rewards/rejected": -0.19699078798294067, "step": 6800 }, { "epoch": 4.148848558792131, "grad_norm": 1.736824870109558, "learning_rate": 1.3658297611757502e-06, "log_odds_chosen": 2.8631515502929688, "log_odds_ratio": -0.28922039270401, "logits/chosen": -0.7990832328796387, "logits/rejected": -0.9888631701469421, "logps/chosen": -0.86087566614151, "logps/rejected": -3.2252724170684814, "loss": 1.0927, "nll_loss": 1.0700805187225342, "rewards/accuracies": 0.875, "rewards/chosen": -0.08608756959438324, "rewards/margins": 0.23643970489501953, "rewards/rejected": -0.3225272595882416, "step": 6801 }, { "epoch": 4.149458593869148, "grad_norm": 2.106412410736084, "learning_rate": 1.3648499693815063e-06, "log_odds_chosen": 3.3963656425476074, "log_odds_ratio": -0.22213636338710785, "logits/chosen": -0.6267892718315125, "logits/rejected": -0.8507403135299683, "logps/chosen": -0.5595489740371704, "logps/rejected": -3.177340030670166, "loss": 0.9844, "nll_loss": 0.6606647372245789, "rewards/accuracies": 0.875, "rewards/chosen": -0.05595490336418152, "rewards/margins": 0.2617790699005127, "rewards/rejected": -0.3177340030670166, "step": 6802 }, { "epoch": 4.150068628946165, "grad_norm": 1.7258902788162231, "learning_rate": 1.3638701775872627e-06, "log_odds_chosen": 5.730356216430664, "log_odds_ratio": -0.06902530789375305, "logits/chosen": -0.8044850826263428, "logits/rejected": -1.2194390296936035, "logps/chosen": -0.4897181987762451, "logps/rejected": -5.340736389160156, "loss": 0.9592, "nll_loss": 0.9031838178634644, "rewards/accuracies": 1.0, "rewards/chosen": -0.04897182434797287, "rewards/margins": 0.4851017892360687, "rewards/rejected": -0.5340735912322998, "step": 6803 }, { "epoch": 4.150678664023181, "grad_norm": 1.2424583435058594, "learning_rate": 1.362890385793019e-06, "log_odds_chosen": 2.496739387512207, "log_odds_ratio": -0.28837570548057556, "logits/chosen": -0.8364871144294739, "logits/rejected": -1.0990253686904907, "logps/chosen": -0.6880218386650085, "logps/rejected": -2.6837446689605713, "loss": 0.9156, "nll_loss": 0.8672537803649902, "rewards/accuracies": 0.75, "rewards/chosen": -0.06880219280719757, "rewards/margins": 0.19957229495048523, "rewards/rejected": -0.2683744728565216, "step": 6804 }, { "epoch": 4.151288699100198, "grad_norm": 2.833629608154297, "learning_rate": 1.3619105939987751e-06, "log_odds_chosen": 1.432507872581482, "log_odds_ratio": -0.49476101994514465, "logits/chosen": -1.0242867469787598, "logits/rejected": -1.1979355812072754, "logps/chosen": -0.9057939648628235, "logps/rejected": -2.0160067081451416, "loss": 1.0138, "nll_loss": 1.0917011499404907, "rewards/accuracies": 0.625, "rewards/chosen": -0.09057939052581787, "rewards/margins": 0.11102128773927689, "rewards/rejected": -0.20160067081451416, "step": 6805 }, { "epoch": 4.151898734177215, "grad_norm": 1.3384426832199097, "learning_rate": 1.3609308022045315e-06, "log_odds_chosen": 2.771122932434082, "log_odds_ratio": -0.532314121723175, "logits/chosen": -0.723637580871582, "logits/rejected": -0.9444001913070679, "logps/chosen": -0.7619507908821106, "logps/rejected": -3.1046018600463867, "loss": 1.0305, "nll_loss": 0.9860904216766357, "rewards/accuracies": 0.5, "rewards/chosen": -0.07619507610797882, "rewards/margins": 0.23426511883735657, "rewards/rejected": -0.3104602098464966, "step": 6806 }, { "epoch": 4.152508769254232, "grad_norm": 14.679539680480957, "learning_rate": 1.3599510104102878e-06, "log_odds_chosen": 2.647824764251709, "log_odds_ratio": -0.2461472451686859, "logits/chosen": -0.7122732996940613, "logits/rejected": -0.9445034861564636, "logps/chosen": -0.5856281518936157, "logps/rejected": -2.5465097427368164, "loss": 0.8697, "nll_loss": 0.7529621720314026, "rewards/accuracies": 1.0, "rewards/chosen": -0.05856281891465187, "rewards/margins": 0.19608817994594574, "rewards/rejected": -0.2546509802341461, "step": 6807 }, { "epoch": 4.153118804331249, "grad_norm": 6.2904157638549805, "learning_rate": 1.3589712186160441e-06, "log_odds_chosen": 2.1424875259399414, "log_odds_ratio": -0.2710961103439331, "logits/chosen": -1.0320464372634888, "logits/rejected": -1.0241204500198364, "logps/chosen": -0.6709659099578857, "logps/rejected": -2.170576333999634, "loss": 1.0507, "nll_loss": 1.0165647268295288, "rewards/accuracies": 0.75, "rewards/chosen": -0.06709659099578857, "rewards/margins": 0.14996103942394257, "rewards/rejected": -0.21705763041973114, "step": 6808 }, { "epoch": 4.153728839408266, "grad_norm": 6.097300052642822, "learning_rate": 1.3579914268218002e-06, "log_odds_chosen": 2.753518581390381, "log_odds_ratio": -0.3774588406085968, "logits/chosen": -1.0257591009140015, "logits/rejected": -1.0870009660720825, "logps/chosen": -0.6047015190124512, "logps/rejected": -2.9789905548095703, "loss": 1.154, "nll_loss": 1.1850796937942505, "rewards/accuracies": 0.875, "rewards/chosen": -0.060470156371593475, "rewards/margins": 0.23742888867855072, "rewards/rejected": -0.297899067401886, "step": 6809 }, { "epoch": 4.1543388744852825, "grad_norm": 1.5163713693618774, "learning_rate": 1.3570116350275566e-06, "log_odds_chosen": 3.137908458709717, "log_odds_ratio": -0.27375221252441406, "logits/chosen": -1.0028678178787231, "logits/rejected": -1.1148717403411865, "logps/chosen": -0.6744186282157898, "logps/rejected": -3.2604658603668213, "loss": 1.2121, "nll_loss": 1.3469589948654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.06744186580181122, "rewards/margins": 0.2586047053337097, "rewards/rejected": -0.32604655623435974, "step": 6810 }, { "epoch": 4.1549489095623, "grad_norm": 1.032783031463623, "learning_rate": 1.356031843233313e-06, "log_odds_chosen": 0.34170854091644287, "log_odds_ratio": -0.6176222562789917, "logits/chosen": -0.7581788301467896, "logits/rejected": -0.6735561490058899, "logps/chosen": -0.7263070344924927, "logps/rejected": -0.9173493981361389, "loss": 1.1462, "nll_loss": 0.920644998550415, "rewards/accuracies": 0.5, "rewards/chosen": -0.07263070344924927, "rewards/margins": 0.019104234874248505, "rewards/rejected": -0.09173493832349777, "step": 6811 }, { "epoch": 4.155558944639317, "grad_norm": 1.575941801071167, "learning_rate": 1.355052051439069e-06, "log_odds_chosen": 2.3611302375793457, "log_odds_ratio": -0.35126423835754395, "logits/chosen": -0.8720904588699341, "logits/rejected": -0.9851648807525635, "logps/chosen": -0.6952157020568848, "logps/rejected": -2.577366828918457, "loss": 1.0658, "nll_loss": 0.9515446424484253, "rewards/accuracies": 0.75, "rewards/chosen": -0.06952156871557236, "rewards/margins": 0.18821515142917633, "rewards/rejected": -0.2577367126941681, "step": 6812 }, { "epoch": 4.156168979716334, "grad_norm": 1.7255135774612427, "learning_rate": 1.3540722596448256e-06, "log_odds_chosen": 1.3025765419006348, "log_odds_ratio": -0.35769402980804443, "logits/chosen": -0.7908996343612671, "logits/rejected": -0.8226404190063477, "logps/chosen": -0.6110546588897705, "logps/rejected": -1.415130615234375, "loss": 1.0543, "nll_loss": 0.8376430869102478, "rewards/accuracies": 0.75, "rewards/chosen": -0.06110547110438347, "rewards/margins": 0.08040760457515717, "rewards/rejected": -0.14151306450366974, "step": 6813 }, { "epoch": 4.156779014793351, "grad_norm": 1.8561266660690308, "learning_rate": 1.3530924678505817e-06, "log_odds_chosen": 1.3231420516967773, "log_odds_ratio": -0.46490204334259033, "logits/chosen": -0.7923859357833862, "logits/rejected": -0.8638490438461304, "logps/chosen": -0.6676695346832275, "logps/rejected": -1.5555434226989746, "loss": 0.9881, "nll_loss": 0.8603524565696716, "rewards/accuracies": 0.625, "rewards/chosen": -0.06676696240901947, "rewards/margins": 0.08878739178180695, "rewards/rejected": -0.15555435419082642, "step": 6814 }, { "epoch": 4.157389049870368, "grad_norm": 1.303061842918396, "learning_rate": 1.352112676056338e-06, "log_odds_chosen": 1.3162598609924316, "log_odds_ratio": -0.4925220012664795, "logits/chosen": -1.105726957321167, "logits/rejected": -1.1555883884429932, "logps/chosen": -1.3034331798553467, "logps/rejected": -2.4508721828460693, "loss": 1.2834, "nll_loss": 1.543945074081421, "rewards/accuracies": 0.625, "rewards/chosen": -0.13034333288669586, "rewards/margins": 0.11474389582872391, "rewards/rejected": -0.24508720636367798, "step": 6815 }, { "epoch": 4.1579990849473845, "grad_norm": 1.669215202331543, "learning_rate": 1.3511328842620942e-06, "log_odds_chosen": 1.1630947589874268, "log_odds_ratio": -0.6304945945739746, "logits/chosen": -1.0314043760299683, "logits/rejected": -1.0934799909591675, "logps/chosen": -0.9888138771057129, "logps/rejected": -1.7180516719818115, "loss": 1.0937, "nll_loss": 1.3518171310424805, "rewards/accuracies": 0.5, "rewards/chosen": -0.09888137876987457, "rewards/margins": 0.07292377948760986, "rewards/rejected": -0.17180517315864563, "step": 6816 }, { "epoch": 4.158609120024401, "grad_norm": 1.0329355001449585, "learning_rate": 1.3501530924678505e-06, "log_odds_chosen": 2.8019208908081055, "log_odds_ratio": -0.23577198386192322, "logits/chosen": -0.6146202087402344, "logits/rejected": -0.7666051387786865, "logps/chosen": -0.7271202802658081, "logps/rejected": -3.032613754272461, "loss": 0.9027, "nll_loss": 0.8705521821975708, "rewards/accuracies": 1.0, "rewards/chosen": -0.07271203398704529, "rewards/margins": 0.23054936528205872, "rewards/rejected": -0.303261399269104, "step": 6817 }, { "epoch": 4.159219155101418, "grad_norm": 2.347914934158325, "learning_rate": 1.3491733006736068e-06, "log_odds_chosen": 1.826585054397583, "log_odds_ratio": -0.35536277294158936, "logits/chosen": -0.5945528745651245, "logits/rejected": -0.7981112599372864, "logps/chosen": -0.6660129427909851, "logps/rejected": -1.9872936010360718, "loss": 0.9326, "nll_loss": 0.8492377996444702, "rewards/accuracies": 0.875, "rewards/chosen": -0.06660129874944687, "rewards/margins": 0.1321280598640442, "rewards/rejected": -0.19872936606407166, "step": 6818 }, { "epoch": 4.159829190178435, "grad_norm": 4.634580135345459, "learning_rate": 1.348193508879363e-06, "log_odds_chosen": 1.522236943244934, "log_odds_ratio": -0.42102205753326416, "logits/chosen": -0.6695994138717651, "logits/rejected": -0.7480239868164062, "logps/chosen": -0.5876427888870239, "logps/rejected": -1.6563467979431152, "loss": 0.9744, "nll_loss": 0.9914497137069702, "rewards/accuracies": 0.75, "rewards/chosen": -0.058764275163412094, "rewards/margins": 0.10687040537595749, "rewards/rejected": -0.16563467681407928, "step": 6819 }, { "epoch": 4.160439225255452, "grad_norm": 2.0195791721343994, "learning_rate": 1.3472137170851195e-06, "log_odds_chosen": 3.040182113647461, "log_odds_ratio": -0.1621466726064682, "logits/chosen": -0.7417784333229065, "logits/rejected": -0.8620131015777588, "logps/chosen": -0.6176980137825012, "logps/rejected": -2.9052629470825195, "loss": 0.9822, "nll_loss": 0.8396586179733276, "rewards/accuracies": 0.875, "rewards/chosen": -0.06176980212330818, "rewards/margins": 0.22875650227069855, "rewards/rejected": -0.29052630066871643, "step": 6820 }, { "epoch": 4.161049260332469, "grad_norm": 5.701013565063477, "learning_rate": 1.3462339252908756e-06, "log_odds_chosen": 1.3951058387756348, "log_odds_ratio": -0.4629504084587097, "logits/chosen": -1.154510736465454, "logits/rejected": -1.0635707378387451, "logps/chosen": -0.8473612070083618, "logps/rejected": -1.856957197189331, "loss": 1.0068, "nll_loss": 1.0601518154144287, "rewards/accuracies": 0.625, "rewards/chosen": -0.08473612368106842, "rewards/margins": 0.10095959156751633, "rewards/rejected": -0.18569570779800415, "step": 6821 }, { "epoch": 4.161659295409486, "grad_norm": 6.525023460388184, "learning_rate": 1.345254133496632e-06, "log_odds_chosen": 2.3156332969665527, "log_odds_ratio": -0.2166295200586319, "logits/chosen": -0.9967350959777832, "logits/rejected": -1.0091923475265503, "logps/chosen": -0.6308777332305908, "logps/rejected": -2.3354368209838867, "loss": 0.9932, "nll_loss": 1.0700920820236206, "rewards/accuracies": 1.0, "rewards/chosen": -0.06308777630329132, "rewards/margins": 0.1704559028148651, "rewards/rejected": -0.23354367911815643, "step": 6822 }, { "epoch": 4.162269330486503, "grad_norm": 5.521933078765869, "learning_rate": 1.3442743417023883e-06, "log_odds_chosen": 1.3195555210113525, "log_odds_ratio": -0.38557326793670654, "logits/chosen": -0.9101608395576477, "logits/rejected": -0.8712696433067322, "logps/chosen": -0.9143764972686768, "logps/rejected": -1.8582801818847656, "loss": 1.1162, "nll_loss": 1.062753438949585, "rewards/accuracies": 0.875, "rewards/chosen": -0.09143765270709991, "rewards/margins": 0.0943903923034668, "rewards/rejected": -0.1858280450105667, "step": 6823 }, { "epoch": 4.16287936556352, "grad_norm": 1.785911202430725, "learning_rate": 1.3432945499081444e-06, "log_odds_chosen": 1.4856271743774414, "log_odds_ratio": -0.38896816968917847, "logits/chosen": -0.8812894821166992, "logits/rejected": -1.0295497179031372, "logps/chosen": -0.7697737216949463, "logps/rejected": -1.7592747211456299, "loss": 1.0601, "nll_loss": 1.0875177383422852, "rewards/accuracies": 0.875, "rewards/chosen": -0.07697736471891403, "rewards/margins": 0.0989500880241394, "rewards/rejected": -0.17592746019363403, "step": 6824 }, { "epoch": 4.163489400640537, "grad_norm": 3.2796261310577393, "learning_rate": 1.3423147581139007e-06, "log_odds_chosen": 0.514557957649231, "log_odds_ratio": -0.6378808617591858, "logits/chosen": -0.8118659257888794, "logits/rejected": -0.9213854074478149, "logps/chosen": -0.8773567080497742, "logps/rejected": -1.2213635444641113, "loss": 1.1001, "nll_loss": 1.0048508644104004, "rewards/accuracies": 0.375, "rewards/chosen": -0.08773567527532578, "rewards/margins": 0.034400682896375656, "rewards/rejected": -0.12213636189699173, "step": 6825 }, { "epoch": 4.164099435717554, "grad_norm": 5.895175933837891, "learning_rate": 1.3413349663196569e-06, "log_odds_chosen": 0.9907264113426208, "log_odds_ratio": -0.5842973589897156, "logits/chosen": -0.8030149340629578, "logits/rejected": -0.8980586528778076, "logps/chosen": -0.8985953330993652, "logps/rejected": -1.683821678161621, "loss": 1.1418, "nll_loss": 1.1177021265029907, "rewards/accuracies": 0.625, "rewards/chosen": -0.08985953778028488, "rewards/margins": 0.07852263748645782, "rewards/rejected": -0.1683821678161621, "step": 6826 }, { "epoch": 4.164709470794571, "grad_norm": 6.581897258758545, "learning_rate": 1.3403551745254134e-06, "log_odds_chosen": 1.4088232517242432, "log_odds_ratio": -0.4418761134147644, "logits/chosen": -0.9712497591972351, "logits/rejected": -1.0649657249450684, "logps/chosen": -0.7351851463317871, "logps/rejected": -1.7710434198379517, "loss": 1.0446, "nll_loss": 0.958734393119812, "rewards/accuracies": 0.75, "rewards/chosen": -0.07351851463317871, "rewards/margins": 0.10358583927154541, "rewards/rejected": -0.17710435390472412, "step": 6827 }, { "epoch": 4.1653195058715875, "grad_norm": 1.7109742164611816, "learning_rate": 1.3393753827311695e-06, "log_odds_chosen": 2.293004274368286, "log_odds_ratio": -0.48185306787490845, "logits/chosen": -0.965196430683136, "logits/rejected": -1.1584590673446655, "logps/chosen": -0.8081201314926147, "logps/rejected": -2.691380262374878, "loss": 1.0721, "nll_loss": 0.9258935451507568, "rewards/accuracies": 0.75, "rewards/chosen": -0.08081202208995819, "rewards/margins": 0.18832603096961975, "rewards/rejected": -0.26913803815841675, "step": 6828 }, { "epoch": 4.165929540948604, "grad_norm": 1.697998046875, "learning_rate": 1.3383955909369259e-06, "log_odds_chosen": 1.1675937175750732, "log_odds_ratio": -0.6937485337257385, "logits/chosen": -0.8246532082557678, "logits/rejected": -0.8143690228462219, "logps/chosen": -0.818912148475647, "logps/rejected": -1.9021587371826172, "loss": 1.0555, "nll_loss": 0.8517177700996399, "rewards/accuracies": 0.5, "rewards/chosen": -0.08189122378826141, "rewards/margins": 0.10832466930150986, "rewards/rejected": -0.19021588563919067, "step": 6829 }, { "epoch": 4.166539576025621, "grad_norm": 2.066134214401245, "learning_rate": 1.3374157991426822e-06, "log_odds_chosen": 3.062385082244873, "log_odds_ratio": -0.23549547791481018, "logits/chosen": -0.6932344436645508, "logits/rejected": -0.9396790862083435, "logps/chosen": -0.7306467294692993, "logps/rejected": -3.206652879714966, "loss": 0.8583, "nll_loss": 0.8263005018234253, "rewards/accuracies": 0.875, "rewards/chosen": -0.07306467741727829, "rewards/margins": 0.24760062992572784, "rewards/rejected": -0.32066529989242554, "step": 6830 }, { "epoch": 4.167149611102638, "grad_norm": 1.2827914953231812, "learning_rate": 1.3364360073484383e-06, "log_odds_chosen": 1.6265923976898193, "log_odds_ratio": -0.4637059271335602, "logits/chosen": -0.7336277961730957, "logits/rejected": -0.903862714767456, "logps/chosen": -0.751492977142334, "logps/rejected": -1.9706926345825195, "loss": 1.0356, "nll_loss": 1.138077974319458, "rewards/accuracies": 0.75, "rewards/chosen": -0.0751492977142334, "rewards/margins": 0.12191997468471527, "rewards/rejected": -0.19706928730010986, "step": 6831 }, { "epoch": 4.167759646179656, "grad_norm": 8.83969497680664, "learning_rate": 1.3354562155541947e-06, "log_odds_chosen": 1.0947015285491943, "log_odds_ratio": -0.5105434656143188, "logits/chosen": -0.608542799949646, "logits/rejected": -0.916827917098999, "logps/chosen": -0.7133219242095947, "logps/rejected": -1.459438681602478, "loss": 0.9979, "nll_loss": 0.7882372736930847, "rewards/accuracies": 0.625, "rewards/chosen": -0.07133219391107559, "rewards/margins": 0.07461166381835938, "rewards/rejected": -0.14594386518001556, "step": 6832 }, { "epoch": 4.168369681256673, "grad_norm": 8.209707260131836, "learning_rate": 1.3344764237599508e-06, "log_odds_chosen": 1.2522459030151367, "log_odds_ratio": -0.3656628131866455, "logits/chosen": -0.9150881171226501, "logits/rejected": -0.9368071556091309, "logps/chosen": -0.6788597702980042, "logps/rejected": -1.4650439023971558, "loss": 0.9983, "nll_loss": 1.274151086807251, "rewards/accuracies": 0.875, "rewards/chosen": -0.06788598746061325, "rewards/margins": 0.07861840724945068, "rewards/rejected": -0.14650438725948334, "step": 6833 }, { "epoch": 4.1689797163336895, "grad_norm": 1.780656099319458, "learning_rate": 1.3334966319657073e-06, "log_odds_chosen": 2.2003440856933594, "log_odds_ratio": -0.38483691215515137, "logits/chosen": -0.9001932144165039, "logits/rejected": -1.105237603187561, "logps/chosen": -0.8758898973464966, "logps/rejected": -2.6466240882873535, "loss": 1.1684, "nll_loss": 0.9235580563545227, "rewards/accuracies": 0.75, "rewards/chosen": -0.08758899569511414, "rewards/margins": 0.1770734041929245, "rewards/rejected": -0.26466241478919983, "step": 6834 }, { "epoch": 4.169589751410706, "grad_norm": 9.938704490661621, "learning_rate": 1.3325168401714634e-06, "log_odds_chosen": 2.287832021713257, "log_odds_ratio": -0.4861217141151428, "logits/chosen": -0.7014154195785522, "logits/rejected": -0.8773556351661682, "logps/chosen": -0.7015593647956848, "logps/rejected": -2.527503490447998, "loss": 0.9182, "nll_loss": 0.7490376830101013, "rewards/accuracies": 0.625, "rewards/chosen": -0.07015593349933624, "rewards/margins": 0.182594433426857, "rewards/rejected": -0.25275036692619324, "step": 6835 }, { "epoch": 4.170199786487723, "grad_norm": 1.3521816730499268, "learning_rate": 1.3315370483772198e-06, "log_odds_chosen": 1.5855143070220947, "log_odds_ratio": -0.3648805022239685, "logits/chosen": -0.9028487205505371, "logits/rejected": -0.9657549858093262, "logps/chosen": -0.7210671305656433, "logps/rejected": -1.8356268405914307, "loss": 1.0186, "nll_loss": 0.7709245085716248, "rewards/accuracies": 0.875, "rewards/chosen": -0.07210671156644821, "rewards/margins": 0.11145597696304321, "rewards/rejected": -0.18356269598007202, "step": 6836 }, { "epoch": 4.17080982156474, "grad_norm": 1.594290018081665, "learning_rate": 1.3305572565829761e-06, "log_odds_chosen": 3.299769163131714, "log_odds_ratio": -0.23271994292736053, "logits/chosen": -0.6675239205360413, "logits/rejected": -0.8879872560501099, "logps/chosen": -0.4787335991859436, "logps/rejected": -2.7151706218719482, "loss": 0.7164, "nll_loss": 0.6476596593856812, "rewards/accuracies": 0.875, "rewards/chosen": -0.047873355448246, "rewards/margins": 0.2236437201499939, "rewards/rejected": -0.2715170681476593, "step": 6837 }, { "epoch": 4.171419856641757, "grad_norm": 1.5338712930679321, "learning_rate": 1.3295774647887322e-06, "log_odds_chosen": 1.465395212173462, "log_odds_ratio": -0.4545624554157257, "logits/chosen": -0.7138887047767639, "logits/rejected": -0.9049558043479919, "logps/chosen": -0.7816958427429199, "logps/rejected": -1.8550646305084229, "loss": 0.9235, "nll_loss": 0.9783403873443604, "rewards/accuracies": 0.625, "rewards/chosen": -0.07816959172487259, "rewards/margins": 0.10733687877655029, "rewards/rejected": -0.18550646305084229, "step": 6838 }, { "epoch": 4.172029891718774, "grad_norm": 3.7217538356781006, "learning_rate": 1.3285976729944886e-06, "log_odds_chosen": 1.475942850112915, "log_odds_ratio": -0.4971798360347748, "logits/chosen": -0.7618950009346008, "logits/rejected": -0.8775675296783447, "logps/chosen": -0.7317622303962708, "logps/rejected": -1.8969762325286865, "loss": 0.9157, "nll_loss": 0.7663534879684448, "rewards/accuracies": 0.75, "rewards/chosen": -0.07317622750997543, "rewards/margins": 0.11652138829231262, "rewards/rejected": -0.18969762325286865, "step": 6839 }, { "epoch": 4.1726399267957905, "grad_norm": 1.3920278549194336, "learning_rate": 1.327617881200245e-06, "log_odds_chosen": 1.1917650699615479, "log_odds_ratio": -0.4951205253601074, "logits/chosen": -0.7586579322814941, "logits/rejected": -0.7974343299865723, "logps/chosen": -0.6559137105941772, "logps/rejected": -1.5305354595184326, "loss": 0.9565, "nll_loss": 0.9946177005767822, "rewards/accuracies": 0.75, "rewards/chosen": -0.06559137254953384, "rewards/margins": 0.0874621793627739, "rewards/rejected": -0.15305355191230774, "step": 6840 }, { "epoch": 4.173249961872807, "grad_norm": 1.6135518550872803, "learning_rate": 1.3266380894060012e-06, "log_odds_chosen": 0.7259068489074707, "log_odds_ratio": -0.523205578327179, "logits/chosen": -0.9522035121917725, "logits/rejected": -0.9888752698898315, "logps/chosen": -0.9661667943000793, "logps/rejected": -1.487316608428955, "loss": 1.1486, "nll_loss": 1.148861289024353, "rewards/accuracies": 0.75, "rewards/chosen": -0.09661667793989182, "rewards/margins": 0.05211498588323593, "rewards/rejected": -0.14873166382312775, "step": 6841 }, { "epoch": 4.173859996949824, "grad_norm": 1.5221627950668335, "learning_rate": 1.3256582976117574e-06, "log_odds_chosen": 2.569580554962158, "log_odds_ratio": -0.3153764009475708, "logits/chosen": -0.8233168125152588, "logits/rejected": -0.8618699312210083, "logps/chosen": -0.6494134664535522, "logps/rejected": -2.5747313499450684, "loss": 0.9991, "nll_loss": 0.8194220066070557, "rewards/accuracies": 0.875, "rewards/chosen": -0.06494134664535522, "rewards/margins": 0.19253180921077728, "rewards/rejected": -0.2574731409549713, "step": 6842 }, { "epoch": 4.174470032026842, "grad_norm": 1.6301642656326294, "learning_rate": 1.3246785058175137e-06, "log_odds_chosen": 1.6637978553771973, "log_odds_ratio": -0.4693320095539093, "logits/chosen": -0.9258846044540405, "logits/rejected": -0.9187843203544617, "logps/chosen": -0.7405925989151001, "logps/rejected": -2.067058563232422, "loss": 1.0764, "nll_loss": 1.115818738937378, "rewards/accuracies": 0.75, "rewards/chosen": -0.07405926287174225, "rewards/margins": 0.1326465904712677, "rewards/rejected": -0.20670585334300995, "step": 6843 }, { "epoch": 4.175080067103859, "grad_norm": 1.2722692489624023, "learning_rate": 1.32369871402327e-06, "log_odds_chosen": 2.4332730770111084, "log_odds_ratio": -0.41446900367736816, "logits/chosen": -0.8449859023094177, "logits/rejected": -0.7963412404060364, "logps/chosen": -0.8032609224319458, "logps/rejected": -2.8893685340881348, "loss": 1.0338, "nll_loss": 1.0249755382537842, "rewards/accuracies": 0.75, "rewards/chosen": -0.08032609522342682, "rewards/margins": 0.20861075818538666, "rewards/rejected": -0.2889368534088135, "step": 6844 }, { "epoch": 4.175690102180876, "grad_norm": 1.3739278316497803, "learning_rate": 1.3227189222290264e-06, "log_odds_chosen": 3.058832883834839, "log_odds_ratio": -0.22059331834316254, "logits/chosen": -0.8617234826087952, "logits/rejected": -1.0131416320800781, "logps/chosen": -0.7014479637145996, "logps/rejected": -3.112870216369629, "loss": 1.0466, "nll_loss": 0.9136477708816528, "rewards/accuracies": 0.875, "rewards/chosen": -0.07014480233192444, "rewards/margins": 0.24114221334457397, "rewards/rejected": -0.3112870156764984, "step": 6845 }, { "epoch": 4.1763001372578925, "grad_norm": 4.867904186248779, "learning_rate": 1.3217391304347825e-06, "log_odds_chosen": 1.274884819984436, "log_odds_ratio": -0.6631647944450378, "logits/chosen": -0.722578227519989, "logits/rejected": -0.7876493334770203, "logps/chosen": -0.7843999862670898, "logps/rejected": -1.8687211275100708, "loss": 1.0835, "nll_loss": 0.921531081199646, "rewards/accuracies": 0.5, "rewards/chosen": -0.07843999564647675, "rewards/margins": 0.1084321141242981, "rewards/rejected": -0.18687210977077484, "step": 6846 }, { "epoch": 4.176910172334909, "grad_norm": 1.9100857973098755, "learning_rate": 1.3207593386405388e-06, "log_odds_chosen": 0.7859355211257935, "log_odds_ratio": -0.4687785804271698, "logits/chosen": -0.9148045778274536, "logits/rejected": -1.0128663778305054, "logps/chosen": -0.9049334526062012, "logps/rejected": -1.4034415483474731, "loss": 1.1704, "nll_loss": 1.0586576461791992, "rewards/accuracies": 0.625, "rewards/chosen": -0.0904933512210846, "rewards/margins": 0.04985079914331436, "rewards/rejected": -0.14034415781497955, "step": 6847 }, { "epoch": 4.177520207411926, "grad_norm": 1.275024175643921, "learning_rate": 1.3197795468462952e-06, "log_odds_chosen": 1.7463574409484863, "log_odds_ratio": -0.38925689458847046, "logits/chosen": -1.0335967540740967, "logits/rejected": -1.0373834371566772, "logps/chosen": -0.6662575006484985, "logps/rejected": -1.702134370803833, "loss": 1.056, "nll_loss": 1.0768557786941528, "rewards/accuracies": 0.875, "rewards/chosen": -0.06662574410438538, "rewards/margins": 0.10358768701553345, "rewards/rejected": -0.17021343111991882, "step": 6848 }, { "epoch": 4.178130242488943, "grad_norm": 1.3563324213027954, "learning_rate": 1.3187997550520513e-06, "log_odds_chosen": 2.5349273681640625, "log_odds_ratio": -0.4742280840873718, "logits/chosen": -1.017798662185669, "logits/rejected": -1.0478830337524414, "logps/chosen": -0.879391074180603, "logps/rejected": -2.9803388118743896, "loss": 1.0752, "nll_loss": 1.085960030555725, "rewards/accuracies": 0.625, "rewards/chosen": -0.08793911337852478, "rewards/margins": 0.21009480953216553, "rewards/rejected": -0.2980338931083679, "step": 6849 }, { "epoch": 4.17874027756596, "grad_norm": 1.5254034996032715, "learning_rate": 1.3178199632578078e-06, "log_odds_chosen": 1.5873483419418335, "log_odds_ratio": -0.38373807072639465, "logits/chosen": -0.6416102647781372, "logits/rejected": -0.7785714864730835, "logps/chosen": -0.5196449160575867, "logps/rejected": -1.5006284713745117, "loss": 0.781, "nll_loss": 0.7320648431777954, "rewards/accuracies": 0.75, "rewards/chosen": -0.051964495331048965, "rewards/margins": 0.09809836000204086, "rewards/rejected": -0.15006285905838013, "step": 6850 }, { "epoch": 4.179350312642977, "grad_norm": 1.27642023563385, "learning_rate": 1.316840171463564e-06, "log_odds_chosen": 1.0178643465042114, "log_odds_ratio": -0.4769667983055115, "logits/chosen": -1.0364460945129395, "logits/rejected": -0.9966281652450562, "logps/chosen": -0.6253074407577515, "logps/rejected": -1.2036597728729248, "loss": 1.067, "nll_loss": 0.9766780734062195, "rewards/accuracies": 0.75, "rewards/chosen": -0.0625307485461235, "rewards/margins": 0.05783523991703987, "rewards/rejected": -0.12036598473787308, "step": 6851 }, { "epoch": 4.179960347719994, "grad_norm": 2.1205270290374756, "learning_rate": 1.3158603796693203e-06, "log_odds_chosen": 0.8110772371292114, "log_odds_ratio": -0.5461919903755188, "logits/chosen": -0.9521262645721436, "logits/rejected": -0.9711123704910278, "logps/chosen": -0.8712125420570374, "logps/rejected": -1.4609034061431885, "loss": 1.0142, "nll_loss": 0.952951192855835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08712124824523926, "rewards/margins": 0.05896909534931183, "rewards/rejected": -0.1460903435945511, "step": 6852 }, { "epoch": 4.18057038279701, "grad_norm": 1.0248903036117554, "learning_rate": 1.3148805878750764e-06, "log_odds_chosen": 1.8602075576782227, "log_odds_ratio": -0.3767135739326477, "logits/chosen": -0.8268843293190002, "logits/rejected": -0.9714856743812561, "logps/chosen": -0.6191107034683228, "logps/rejected": -1.9850404262542725, "loss": 0.9532, "nll_loss": 1.0105023384094238, "rewards/accuracies": 0.75, "rewards/chosen": -0.061911072582006454, "rewards/margins": 0.13659296929836273, "rewards/rejected": -0.1985040307044983, "step": 6853 }, { "epoch": 4.181180417874028, "grad_norm": 1.9661059379577637, "learning_rate": 1.3139007960808327e-06, "log_odds_chosen": 3.048855781555176, "log_odds_ratio": -0.4154285490512848, "logits/chosen": -0.8653233647346497, "logits/rejected": -0.8415480852127075, "logps/chosen": -0.6702873706817627, "logps/rejected": -3.2357311248779297, "loss": 1.0709, "nll_loss": 0.8586239218711853, "rewards/accuracies": 0.75, "rewards/chosen": -0.06702873855829239, "rewards/margins": 0.2565443813800812, "rewards/rejected": -0.32357311248779297, "step": 6854 }, { "epoch": 4.181790452951045, "grad_norm": 2.022181987762451, "learning_rate": 1.312921004286589e-06, "log_odds_chosen": 3.8275294303894043, "log_odds_ratio": -0.1525586098432541, "logits/chosen": -0.9866313934326172, "logits/rejected": -1.0320814847946167, "logps/chosen": -0.7357689142227173, "logps/rejected": -3.901020050048828, "loss": 0.96, "nll_loss": 0.8774454593658447, "rewards/accuracies": 1.0, "rewards/chosen": -0.0735768973827362, "rewards/margins": 0.31652510166168213, "rewards/rejected": -0.3901020288467407, "step": 6855 }, { "epoch": 4.182400488028062, "grad_norm": 5.810150146484375, "learning_rate": 1.3119412124923452e-06, "log_odds_chosen": 2.2726993560791016, "log_odds_ratio": -0.411954402923584, "logits/chosen": -0.9158229827880859, "logits/rejected": -1.025855302810669, "logps/chosen": -1.2044463157653809, "logps/rejected": -3.2078661918640137, "loss": 0.9545, "nll_loss": 1.1073384284973145, "rewards/accuracies": 0.625, "rewards/chosen": -0.1204446330666542, "rewards/margins": 0.20034195482730865, "rewards/rejected": -0.32078656554222107, "step": 6856 }, { "epoch": 4.183010523105079, "grad_norm": 8.07571029663086, "learning_rate": 1.3109614206981017e-06, "log_odds_chosen": 3.333836078643799, "log_odds_ratio": -0.3488854169845581, "logits/chosen": -0.8607973456382751, "logits/rejected": -0.9130894541740417, "logps/chosen": -0.6447086930274963, "logps/rejected": -3.3285982608795166, "loss": 1.0416, "nll_loss": 0.8493738174438477, "rewards/accuracies": 0.75, "rewards/chosen": -0.06447087228298187, "rewards/margins": 0.268388956785202, "rewards/rejected": -0.3328598141670227, "step": 6857 }, { "epoch": 4.1836205581820956, "grad_norm": 1.3190091848373413, "learning_rate": 1.3099816289038579e-06, "log_odds_chosen": 3.5504653453826904, "log_odds_ratio": -0.17752604186534882, "logits/chosen": -0.5889924764633179, "logits/rejected": -0.8045593500137329, "logps/chosen": -0.3259163498878479, "logps/rejected": -2.89756441116333, "loss": 1.0209, "nll_loss": 0.7119045853614807, "rewards/accuracies": 1.0, "rewards/chosen": -0.03259163349866867, "rewards/margins": 0.2571648359298706, "rewards/rejected": -0.2897564768791199, "step": 6858 }, { "epoch": 4.184230593259112, "grad_norm": 1.5094988346099854, "learning_rate": 1.3090018371096142e-06, "log_odds_chosen": 1.0986101627349854, "log_odds_ratio": -0.4915282130241394, "logits/chosen": -1.0016309022903442, "logits/rejected": -1.0182490348815918, "logps/chosen": -1.1094422340393066, "logps/rejected": -1.9032089710235596, "loss": 1.0872, "nll_loss": 1.3439505100250244, "rewards/accuracies": 0.625, "rewards/chosen": -0.1109442189335823, "rewards/margins": 0.07937668263912201, "rewards/rejected": -0.19032089412212372, "step": 6859 }, { "epoch": 4.184840628336129, "grad_norm": 1.1727427244186401, "learning_rate": 1.3080220453153705e-06, "log_odds_chosen": 1.4981329441070557, "log_odds_ratio": -0.41113612055778503, "logits/chosen": -0.9529997110366821, "logits/rejected": -1.0390560626983643, "logps/chosen": -0.8668508529663086, "logps/rejected": -2.1399240493774414, "loss": 1.0205, "nll_loss": 0.9861356616020203, "rewards/accuracies": 0.625, "rewards/chosen": -0.08668508380651474, "rewards/margins": 0.12730732560157776, "rewards/rejected": -0.2139924168586731, "step": 6860 }, { "epoch": 4.185450663413146, "grad_norm": 2.3681607246398926, "learning_rate": 1.3070422535211266e-06, "log_odds_chosen": 3.07552433013916, "log_odds_ratio": -0.15896473824977875, "logits/chosen": -0.7775142788887024, "logits/rejected": -0.9005102515220642, "logps/chosen": -0.5472629070281982, "logps/rejected": -2.6055002212524414, "loss": 0.9091, "nll_loss": 0.6740915179252625, "rewards/accuracies": 0.875, "rewards/chosen": -0.054726291447877884, "rewards/margins": 0.20582374930381775, "rewards/rejected": -0.26055002212524414, "step": 6861 }, { "epoch": 4.186060698490163, "grad_norm": 1.3175013065338135, "learning_rate": 1.306062461726883e-06, "log_odds_chosen": 0.6445884704589844, "log_odds_ratio": -0.4780309498310089, "logits/chosen": -1.0711085796356201, "logits/rejected": -1.03725266456604, "logps/chosen": -0.9822989702224731, "logps/rejected": -1.4218604564666748, "loss": 1.186, "nll_loss": 1.1876425743103027, "rewards/accuracies": 0.875, "rewards/chosen": -0.09822990000247955, "rewards/margins": 0.04395613074302673, "rewards/rejected": -0.1421860307455063, "step": 6862 }, { "epoch": 4.18667073356718, "grad_norm": 1.7326332330703735, "learning_rate": 1.305082669932639e-06, "log_odds_chosen": 4.219788074493408, "log_odds_ratio": -0.1600908488035202, "logits/chosen": -0.9844844341278076, "logits/rejected": -1.1628797054290771, "logps/chosen": -0.7592133283615112, "logps/rejected": -4.283356666564941, "loss": 0.9855, "nll_loss": 0.9884966611862183, "rewards/accuracies": 1.0, "rewards/chosen": -0.07592133432626724, "rewards/margins": 0.3524142801761627, "rewards/rejected": -0.42833563685417175, "step": 6863 }, { "epoch": 4.187280768644197, "grad_norm": 1.9177765846252441, "learning_rate": 1.3041028781383956e-06, "log_odds_chosen": 5.293006896972656, "log_odds_ratio": -0.26406311988830566, "logits/chosen": -0.8740404844284058, "logits/rejected": -1.192335605621338, "logps/chosen": -0.7506002187728882, "logps/rejected": -5.354221820831299, "loss": 0.9682, "nll_loss": 0.9511719942092896, "rewards/accuracies": 0.875, "rewards/chosen": -0.07506002485752106, "rewards/margins": 0.46036219596862793, "rewards/rejected": -0.5354222059249878, "step": 6864 }, { "epoch": 4.187890803721214, "grad_norm": 6.873646259307861, "learning_rate": 1.3031230863441518e-06, "log_odds_chosen": 2.205019950866699, "log_odds_ratio": -0.2669498920440674, "logits/chosen": -0.9344831705093384, "logits/rejected": -1.0270912647247314, "logps/chosen": -0.8455345630645752, "logps/rejected": -2.3882293701171875, "loss": 1.1302, "nll_loss": 1.1622153520584106, "rewards/accuracies": 1.0, "rewards/chosen": -0.08455345779657364, "rewards/margins": 0.1542695015668869, "rewards/rejected": -0.23882295191287994, "step": 6865 }, { "epoch": 4.188500838798231, "grad_norm": 8.49764347076416, "learning_rate": 1.3021432945499081e-06, "log_odds_chosen": 1.1938642263412476, "log_odds_ratio": -0.4241192936897278, "logits/chosen": -0.9588638544082642, "logits/rejected": -0.9345104694366455, "logps/chosen": -0.9659922122955322, "logps/rejected": -1.8918520212173462, "loss": 1.0206, "nll_loss": 1.0942208766937256, "rewards/accuracies": 0.625, "rewards/chosen": -0.09659922122955322, "rewards/margins": 0.092585988342762, "rewards/rejected": -0.18918520212173462, "step": 6866 }, { "epoch": 4.189110873875248, "grad_norm": 1.316178798675537, "learning_rate": 1.3011635027556644e-06, "log_odds_chosen": 3.494335174560547, "log_odds_ratio": -0.3365188241004944, "logits/chosen": -0.7810351848602295, "logits/rejected": -0.799474835395813, "logps/chosen": -0.524958610534668, "logps/rejected": -3.3191208839416504, "loss": 1.124, "nll_loss": 0.7375560998916626, "rewards/accuracies": 0.75, "rewards/chosen": -0.05249585956335068, "rewards/margins": 0.27941620349884033, "rewards/rejected": -0.3319120705127716, "step": 6867 }, { "epoch": 4.189720908952265, "grad_norm": 2.6189627647399902, "learning_rate": 1.3001837109614206e-06, "log_odds_chosen": 1.0610700845718384, "log_odds_ratio": -0.48860496282577515, "logits/chosen": -1.0647516250610352, "logits/rejected": -0.9173787832260132, "logps/chosen": -0.8808789849281311, "logps/rejected": -1.594099521636963, "loss": 0.8658, "nll_loss": 0.9965763092041016, "rewards/accuracies": 0.75, "rewards/chosen": -0.08808790147304535, "rewards/margins": 0.07132206112146378, "rewards/rejected": -0.15940995514392853, "step": 6868 }, { "epoch": 4.190330944029282, "grad_norm": 1.3800153732299805, "learning_rate": 1.299203919167177e-06, "log_odds_chosen": 1.9528119564056396, "log_odds_ratio": -0.3545617461204529, "logits/chosen": -0.9196615219116211, "logits/rejected": -1.031053900718689, "logps/chosen": -0.7345516681671143, "logps/rejected": -2.251030206680298, "loss": 0.9874, "nll_loss": 0.8912529349327087, "rewards/accuracies": 0.75, "rewards/chosen": -0.07345516979694366, "rewards/margins": 0.15164786577224731, "rewards/rejected": -0.22510302066802979, "step": 6869 }, { "epoch": 4.190940979106299, "grad_norm": 5.713367938995361, "learning_rate": 1.2982241273729332e-06, "log_odds_chosen": 2.475228786468506, "log_odds_ratio": -0.34445101022720337, "logits/chosen": -0.8137999773025513, "logits/rejected": -1.0290228128433228, "logps/chosen": -0.6020618081092834, "logps/rejected": -2.5604732036590576, "loss": 0.8687, "nll_loss": 0.6889852285385132, "rewards/accuracies": 0.75, "rewards/chosen": -0.06020617485046387, "rewards/margins": 0.19584116339683533, "rewards/rejected": -0.2560473382472992, "step": 6870 }, { "epoch": 4.191551014183315, "grad_norm": 3.3150460720062256, "learning_rate": 1.2972443355786896e-06, "log_odds_chosen": 1.2127959728240967, "log_odds_ratio": -0.4391745924949646, "logits/chosen": -0.9147467613220215, "logits/rejected": -0.8934972286224365, "logps/chosen": -0.9974719285964966, "logps/rejected": -1.9769001007080078, "loss": 1.0051, "nll_loss": 1.2908415794372559, "rewards/accuracies": 0.75, "rewards/chosen": -0.0997471958398819, "rewards/margins": 0.09794282168149948, "rewards/rejected": -0.19769001007080078, "step": 6871 }, { "epoch": 4.192161049260332, "grad_norm": 3.9443111419677734, "learning_rate": 1.2962645437844457e-06, "log_odds_chosen": 1.219890832901001, "log_odds_ratio": -0.4281333088874817, "logits/chosen": -0.7123012542724609, "logits/rejected": -1.0005621910095215, "logps/chosen": -0.7222843170166016, "logps/rejected": -1.5162216424942017, "loss": 0.9859, "nll_loss": 0.8512016534805298, "rewards/accuracies": 0.75, "rewards/chosen": -0.07222843170166016, "rewards/margins": 0.07939372211694717, "rewards/rejected": -0.15162214636802673, "step": 6872 }, { "epoch": 4.192771084337349, "grad_norm": 3.0639214515686035, "learning_rate": 1.295284751990202e-06, "log_odds_chosen": 1.8481104373931885, "log_odds_ratio": -0.38448530435562134, "logits/chosen": -0.8138097524642944, "logits/rejected": -1.0199222564697266, "logps/chosen": -0.8136007785797119, "logps/rejected": -2.1551284790039062, "loss": 1.2706, "nll_loss": 0.8864952325820923, "rewards/accuracies": 0.75, "rewards/chosen": -0.08136007189750671, "rewards/margins": 0.13415277004241943, "rewards/rejected": -0.21551285684108734, "step": 6873 }, { "epoch": 4.193381119414366, "grad_norm": 1.358721375465393, "learning_rate": 1.2943049601959584e-06, "log_odds_chosen": 1.2570059299468994, "log_odds_ratio": -0.4066745638847351, "logits/chosen": -0.8339295387268066, "logits/rejected": -0.9078741073608398, "logps/chosen": -0.7862640619277954, "logps/rejected": -1.7274619340896606, "loss": 1.0321, "nll_loss": 0.9012566804885864, "rewards/accuracies": 0.875, "rewards/chosen": -0.07862640917301178, "rewards/margins": 0.09411978721618652, "rewards/rejected": -0.1727461814880371, "step": 6874 }, { "epoch": 4.193991154491384, "grad_norm": 1.0911320447921753, "learning_rate": 1.2933251684017145e-06, "log_odds_chosen": 1.5925347805023193, "log_odds_ratio": -0.3952018916606903, "logits/chosen": -0.7160701155662537, "logits/rejected": -0.964961051940918, "logps/chosen": -0.859247088432312, "logps/rejected": -1.9772684574127197, "loss": 0.9705, "nll_loss": 1.100921630859375, "rewards/accuracies": 0.75, "rewards/chosen": -0.08592470735311508, "rewards/margins": 0.11180214583873749, "rewards/rejected": -0.19772686064243317, "step": 6875 }, { "epoch": 4.194601189568401, "grad_norm": 2.353527545928955, "learning_rate": 1.2923453766074708e-06, "log_odds_chosen": 1.70560622215271, "log_odds_ratio": -0.5053985118865967, "logits/chosen": -1.0445622205734253, "logits/rejected": -1.0599149465560913, "logps/chosen": -0.8828818798065186, "logps/rejected": -2.3422205448150635, "loss": 1.0605, "nll_loss": 0.9316132664680481, "rewards/accuracies": 0.625, "rewards/chosen": -0.08828819543123245, "rewards/margins": 0.1459338515996933, "rewards/rejected": -0.23422205448150635, "step": 6876 }, { "epoch": 4.195211224645417, "grad_norm": 4.120497226715088, "learning_rate": 1.2913655848132271e-06, "log_odds_chosen": 1.888480305671692, "log_odds_ratio": -0.4189716875553131, "logits/chosen": -0.993370532989502, "logits/rejected": -1.114289402961731, "logps/chosen": -0.9156838655471802, "logps/rejected": -2.549959182739258, "loss": 1.0307, "nll_loss": 1.0673432350158691, "rewards/accuracies": 0.75, "rewards/chosen": -0.09156838804483414, "rewards/margins": 0.16342754662036896, "rewards/rejected": -0.2549959123134613, "step": 6877 }, { "epoch": 4.195821259722434, "grad_norm": 6.573786735534668, "learning_rate": 1.2903857930189835e-06, "log_odds_chosen": 2.698239326477051, "log_odds_ratio": -0.3476117253303528, "logits/chosen": -0.8322309851646423, "logits/rejected": -0.8886162042617798, "logps/chosen": -0.728175699710846, "logps/rejected": -2.8469386100769043, "loss": 1.1077, "nll_loss": 0.9136914014816284, "rewards/accuracies": 0.75, "rewards/chosen": -0.07281757146120071, "rewards/margins": 0.2118763029575348, "rewards/rejected": -0.2846938967704773, "step": 6878 }, { "epoch": 4.196431294799451, "grad_norm": 1.5466418266296387, "learning_rate": 1.2894060012247396e-06, "log_odds_chosen": 2.0859203338623047, "log_odds_ratio": -0.5974425077438354, "logits/chosen": -0.8423725366592407, "logits/rejected": -0.8292498588562012, "logps/chosen": -0.9143155813217163, "logps/rejected": -2.766726016998291, "loss": 1.067, "nll_loss": 1.0079655647277832, "rewards/accuracies": 0.625, "rewards/chosen": -0.09143155813217163, "rewards/margins": 0.18524101376533508, "rewards/rejected": -0.2766725718975067, "step": 6879 }, { "epoch": 4.197041329876468, "grad_norm": 1.6603469848632812, "learning_rate": 1.2884262094304961e-06, "log_odds_chosen": 0.9395328760147095, "log_odds_ratio": -0.5403978824615479, "logits/chosen": -0.8378050923347473, "logits/rejected": -0.9356216788291931, "logps/chosen": -0.764204740524292, "logps/rejected": -1.4172255992889404, "loss": 1.1043, "nll_loss": 1.1233394145965576, "rewards/accuracies": 0.75, "rewards/chosen": -0.07642047107219696, "rewards/margins": 0.06530209630727768, "rewards/rejected": -0.14172257483005524, "step": 6880 }, { "epoch": 4.197651364953485, "grad_norm": 2.492417812347412, "learning_rate": 1.2874464176362523e-06, "log_odds_chosen": 1.495342493057251, "log_odds_ratio": -0.32290273904800415, "logits/chosen": -1.0025216341018677, "logits/rejected": -1.0476408004760742, "logps/chosen": -0.8529417514801025, "logps/rejected": -1.905858039855957, "loss": 1.1602, "nll_loss": 1.1756935119628906, "rewards/accuracies": 0.75, "rewards/chosen": -0.08529417961835861, "rewards/margins": 0.10529161989688873, "rewards/rejected": -0.19058580696582794, "step": 6881 }, { "epoch": 4.198261400030502, "grad_norm": 1.2852381467819214, "learning_rate": 1.2864666258420084e-06, "log_odds_chosen": 2.154634475708008, "log_odds_ratio": -0.563839316368103, "logits/chosen": -0.8014044761657715, "logits/rejected": -0.9630565047264099, "logps/chosen": -0.8310590982437134, "logps/rejected": -2.5467019081115723, "loss": 1.1157, "nll_loss": 1.0693473815917969, "rewards/accuracies": 0.75, "rewards/chosen": -0.0831058993935585, "rewards/margins": 0.1715642809867859, "rewards/rejected": -0.2546702027320862, "step": 6882 }, { "epoch": 4.1988714351075185, "grad_norm": 14.325522422790527, "learning_rate": 1.2854868340477647e-06, "log_odds_chosen": 0.703369140625, "log_odds_ratio": -0.5298671126365662, "logits/chosen": -0.9793049097061157, "logits/rejected": -0.9522724747657776, "logps/chosen": -1.0869938135147095, "logps/rejected": -1.4938886165618896, "loss": 1.2169, "nll_loss": 1.2591168880462646, "rewards/accuracies": 0.625, "rewards/chosen": -0.10869938135147095, "rewards/margins": 0.040689483284950256, "rewards/rejected": -0.1493888646364212, "step": 6883 }, { "epoch": 4.199481470184535, "grad_norm": 1.272311806678772, "learning_rate": 1.284507042253521e-06, "log_odds_chosen": 1.2924391031265259, "log_odds_ratio": -0.5148978233337402, "logits/chosen": -0.9729505777359009, "logits/rejected": -1.0796935558319092, "logps/chosen": -1.130897045135498, "logps/rejected": -2.1046640872955322, "loss": 1.0692, "nll_loss": 1.1994003057479858, "rewards/accuracies": 0.75, "rewards/chosen": -0.11308970302343369, "rewards/margins": 0.09737669676542282, "rewards/rejected": -0.2104664146900177, "step": 6884 }, { "epoch": 4.200091505261552, "grad_norm": 6.528550624847412, "learning_rate": 1.2835272504592774e-06, "log_odds_chosen": 2.1995081901550293, "log_odds_ratio": -0.30385833978652954, "logits/chosen": -0.7717805504798889, "logits/rejected": -0.8211325407028198, "logps/chosen": -0.6628685593605042, "logps/rejected": -2.2930097579956055, "loss": 0.975, "nll_loss": 0.8567755222320557, "rewards/accuracies": 0.875, "rewards/chosen": -0.0662868544459343, "rewards/margins": 0.16301412880420685, "rewards/rejected": -0.22930097579956055, "step": 6885 }, { "epoch": 4.200701540338569, "grad_norm": 2.015458345413208, "learning_rate": 1.2825474586650335e-06, "log_odds_chosen": 0.22194120287895203, "log_odds_ratio": -0.6866336464881897, "logits/chosen": -0.9017906785011292, "logits/rejected": -0.8160480260848999, "logps/chosen": -0.9939045310020447, "logps/rejected": -1.1194571256637573, "loss": 1.0568, "nll_loss": 1.1181775331497192, "rewards/accuracies": 0.5, "rewards/chosen": -0.09939045459032059, "rewards/margins": 0.012555268593132496, "rewards/rejected": -0.1119457259774208, "step": 6886 }, { "epoch": 4.201311575415587, "grad_norm": 10.66283130645752, "learning_rate": 1.28156766687079e-06, "log_odds_chosen": 4.228841781616211, "log_odds_ratio": -0.10076898336410522, "logits/chosen": -0.5407257676124573, "logits/rejected": -0.7657267451286316, "logps/chosen": -0.3771718740463257, "logps/rejected": -3.4030628204345703, "loss": 0.9256, "nll_loss": 0.6760700345039368, "rewards/accuracies": 1.0, "rewards/chosen": -0.03771718963980675, "rewards/margins": 0.3025891184806824, "rewards/rejected": -0.34030628204345703, "step": 6887 }, { "epoch": 4.201921610492604, "grad_norm": 1.7400569915771484, "learning_rate": 1.2805878750765462e-06, "log_odds_chosen": 2.1628127098083496, "log_odds_ratio": -0.3369377553462982, "logits/chosen": -0.9418824911117554, "logits/rejected": -0.9965943098068237, "logps/chosen": -0.8410570621490479, "logps/rejected": -2.668334484100342, "loss": 1.1196, "nll_loss": 1.1150104999542236, "rewards/accuracies": 1.0, "rewards/chosen": -0.0841057077050209, "rewards/margins": 0.18272773921489716, "rewards/rejected": -0.26683345437049866, "step": 6888 }, { "epoch": 4.2025316455696204, "grad_norm": 2.290189266204834, "learning_rate": 1.2796080832823023e-06, "log_odds_chosen": 1.115281105041504, "log_odds_ratio": -0.45040008425712585, "logits/chosen": -0.7590955495834351, "logits/rejected": -0.9445620179176331, "logps/chosen": -0.833428144454956, "logps/rejected": -1.6241977214813232, "loss": 1.1233, "nll_loss": 0.7889542579650879, "rewards/accuracies": 0.75, "rewards/chosen": -0.08334282040596008, "rewards/margins": 0.07907697558403015, "rewards/rejected": -0.16241979598999023, "step": 6889 }, { "epoch": 4.203141680646637, "grad_norm": 1.63517165184021, "learning_rate": 1.2786282914880586e-06, "log_odds_chosen": 4.846165657043457, "log_odds_ratio": -0.25915995240211487, "logits/chosen": -0.9868602752685547, "logits/rejected": -1.1293797492980957, "logps/chosen": -0.8549463748931885, "logps/rejected": -5.095747947692871, "loss": 0.9777, "nll_loss": 1.129047155380249, "rewards/accuracies": 0.75, "rewards/chosen": -0.08549463748931885, "rewards/margins": 0.42408016324043274, "rewards/rejected": -0.509574830532074, "step": 6890 }, { "epoch": 4.203751715723654, "grad_norm": 1.5557726621627808, "learning_rate": 1.277648499693815e-06, "log_odds_chosen": 2.2776589393615723, "log_odds_ratio": -0.29043859243392944, "logits/chosen": -0.8627132177352905, "logits/rejected": -0.9288170337677002, "logps/chosen": -0.8068660497665405, "logps/rejected": -2.6137568950653076, "loss": 0.9357, "nll_loss": 0.9493352174758911, "rewards/accuracies": 0.875, "rewards/chosen": -0.08068660646677017, "rewards/margins": 0.18068909645080566, "rewards/rejected": -0.26137566566467285, "step": 6891 }, { "epoch": 4.204361750800671, "grad_norm": 1.712323784828186, "learning_rate": 1.2766687078995713e-06, "log_odds_chosen": 1.5516924858093262, "log_odds_ratio": -0.48713529109954834, "logits/chosen": -0.9016032218933105, "logits/rejected": -0.9383240938186646, "logps/chosen": -0.9461009502410889, "logps/rejected": -2.2899975776672363, "loss": 1.0426, "nll_loss": 1.085286021232605, "rewards/accuracies": 0.625, "rewards/chosen": -0.09461009502410889, "rewards/margins": 0.1343896985054016, "rewards/rejected": -0.2289997935295105, "step": 6892 }, { "epoch": 4.204971785877688, "grad_norm": 2.578451156616211, "learning_rate": 1.2756889161053274e-06, "log_odds_chosen": 1.8961557149887085, "log_odds_ratio": -0.2855687737464905, "logits/chosen": -0.7308430671691895, "logits/rejected": -0.6195762753486633, "logps/chosen": -0.635915219783783, "logps/rejected": -2.0113093852996826, "loss": 0.9236, "nll_loss": 0.777084231376648, "rewards/accuracies": 0.875, "rewards/chosen": -0.06359152495861053, "rewards/margins": 0.13753943145275116, "rewards/rejected": -0.2011309564113617, "step": 6893 }, { "epoch": 4.205581820954705, "grad_norm": 2.026026725769043, "learning_rate": 1.274709124311084e-06, "log_odds_chosen": 1.4322319030761719, "log_odds_ratio": -0.5163512229919434, "logits/chosen": -1.021828293800354, "logits/rejected": -1.0655815601348877, "logps/chosen": -1.0018270015716553, "logps/rejected": -2.242178440093994, "loss": 1.1377, "nll_loss": 1.0602607727050781, "rewards/accuracies": 0.75, "rewards/chosen": -0.10018269717693329, "rewards/margins": 0.12403513491153717, "rewards/rejected": -0.22421784698963165, "step": 6894 }, { "epoch": 4.2061918560317215, "grad_norm": 2.534759044647217, "learning_rate": 1.27372933251684e-06, "log_odds_chosen": 2.2378735542297363, "log_odds_ratio": -0.49482935667037964, "logits/chosen": -1.0460715293884277, "logits/rejected": -1.03081476688385, "logps/chosen": -0.98314368724823, "logps/rejected": -2.702101469039917, "loss": 1.0528, "nll_loss": 1.1690418720245361, "rewards/accuracies": 0.625, "rewards/chosen": -0.09831437468528748, "rewards/margins": 0.1718958020210266, "rewards/rejected": -0.2702101469039917, "step": 6895 }, { "epoch": 4.206801891108738, "grad_norm": 1.1566163301467896, "learning_rate": 1.2727495407225962e-06, "log_odds_chosen": 2.0804147720336914, "log_odds_ratio": -0.33749014139175415, "logits/chosen": -1.0510369539260864, "logits/rejected": -1.011975646018982, "logps/chosen": -0.803009033203125, "logps/rejected": -2.2514724731445312, "loss": 1.0914, "nll_loss": 0.9914780259132385, "rewards/accuracies": 0.75, "rewards/chosen": -0.08030090481042862, "rewards/margins": 0.1448463499546051, "rewards/rejected": -0.22514724731445312, "step": 6896 }, { "epoch": 4.207411926185756, "grad_norm": 12.633172988891602, "learning_rate": 1.2717697489283528e-06, "log_odds_chosen": 1.896336555480957, "log_odds_ratio": -0.3062729835510254, "logits/chosen": -0.8737891316413879, "logits/rejected": -0.8827411532402039, "logps/chosen": -0.7454169988632202, "logps/rejected": -2.177934408187866, "loss": 1.0302, "nll_loss": 0.9318517446517944, "rewards/accuracies": 0.875, "rewards/chosen": -0.07454170286655426, "rewards/margins": 0.14325174689292908, "rewards/rejected": -0.21779344975948334, "step": 6897 }, { "epoch": 4.208021961262773, "grad_norm": 1.2317860126495361, "learning_rate": 1.2707899571341089e-06, "log_odds_chosen": 2.4640214443206787, "log_odds_ratio": -0.5727013945579529, "logits/chosen": -0.8069130182266235, "logits/rejected": -0.8610258102416992, "logps/chosen": -0.8081102967262268, "logps/rejected": -2.970146656036377, "loss": 1.0407, "nll_loss": 1.078625202178955, "rewards/accuracies": 0.625, "rewards/chosen": -0.0808110311627388, "rewards/margins": 0.21620365977287292, "rewards/rejected": -0.29701468348503113, "step": 6898 }, { "epoch": 4.20863199633979, "grad_norm": 3.4919204711914062, "learning_rate": 1.2698101653398652e-06, "log_odds_chosen": 0.7566666007041931, "log_odds_ratio": -0.46544259786605835, "logits/chosen": -0.8809760808944702, "logits/rejected": -0.8506155014038086, "logps/chosen": -0.8993487358093262, "logps/rejected": -1.4680860042572021, "loss": 1.1073, "nll_loss": 1.0982295274734497, "rewards/accuracies": 0.875, "rewards/chosen": -0.08993487060070038, "rewards/margins": 0.056873735040426254, "rewards/rejected": -0.14680860936641693, "step": 6899 }, { "epoch": 4.209242031416807, "grad_norm": 5.839410305023193, "learning_rate": 1.2688303735456213e-06, "log_odds_chosen": 2.495898723602295, "log_odds_ratio": -0.37685427069664, "logits/chosen": -0.8352295160293579, "logits/rejected": -0.9253977537155151, "logps/chosen": -0.5646311044692993, "logps/rejected": -2.470254898071289, "loss": 0.8773, "nll_loss": 0.8843128085136414, "rewards/accuracies": 0.875, "rewards/chosen": -0.05646310746669769, "rewards/margins": 0.1905623823404312, "rewards/rejected": -0.2470254898071289, "step": 6900 }, { "epoch": 4.2098520664938235, "grad_norm": 3.6865289211273193, "learning_rate": 1.2678505817513779e-06, "log_odds_chosen": 2.1209115982055664, "log_odds_ratio": -0.2744126617908478, "logits/chosen": -0.9185009002685547, "logits/rejected": -0.7900897264480591, "logps/chosen": -0.609164834022522, "logps/rejected": -2.0845584869384766, "loss": 1.066, "nll_loss": 0.897597074508667, "rewards/accuracies": 0.875, "rewards/chosen": -0.0609164834022522, "rewards/margins": 0.14753937721252441, "rewards/rejected": -0.2084558606147766, "step": 6901 }, { "epoch": 4.21046210157084, "grad_norm": 2.0929930210113525, "learning_rate": 1.266870789957134e-06, "log_odds_chosen": 1.5556021928787231, "log_odds_ratio": -0.46735280752182007, "logits/chosen": -0.6910837888717651, "logits/rejected": -0.7750455141067505, "logps/chosen": -0.8513510227203369, "logps/rejected": -2.040266513824463, "loss": 1.0031, "nll_loss": 0.9450556039810181, "rewards/accuracies": 0.75, "rewards/chosen": -0.08513510227203369, "rewards/margins": 0.11889156699180603, "rewards/rejected": -0.20402665436267853, "step": 6902 }, { "epoch": 4.211072136647857, "grad_norm": 1.6668609380722046, "learning_rate": 1.2658909981628903e-06, "log_odds_chosen": 1.2475099563598633, "log_odds_ratio": -0.641270101070404, "logits/chosen": -0.8953333497047424, "logits/rejected": -0.9880808591842651, "logps/chosen": -0.8551583290100098, "logps/rejected": -1.8589262962341309, "loss": 1.2032, "nll_loss": 1.1337714195251465, "rewards/accuracies": 0.5, "rewards/chosen": -0.0855158343911171, "rewards/margins": 0.10037679970264435, "rewards/rejected": -0.18589262664318085, "step": 6903 }, { "epoch": 4.211682171724874, "grad_norm": 1.2985968589782715, "learning_rate": 1.2649112063686467e-06, "log_odds_chosen": 1.6563634872436523, "log_odds_ratio": -0.41963833570480347, "logits/chosen": -0.9210357069969177, "logits/rejected": -1.0625582933425903, "logps/chosen": -1.006185531616211, "logps/rejected": -2.244039297103882, "loss": 0.8933, "nll_loss": 1.1488345861434937, "rewards/accuracies": 0.625, "rewards/chosen": -0.10061855614185333, "rewards/margins": 0.12378536909818649, "rewards/rejected": -0.22440393269062042, "step": 6904 }, { "epoch": 4.212292206801891, "grad_norm": 9.587020874023438, "learning_rate": 1.2639314145744028e-06, "log_odds_chosen": 1.1216322183609009, "log_odds_ratio": -0.5183720588684082, "logits/chosen": -0.85379958152771, "logits/rejected": -0.8884360790252686, "logps/chosen": -0.8667995929718018, "logps/rejected": -1.8138964176177979, "loss": 0.8929, "nll_loss": 0.8892257213592529, "rewards/accuracies": 0.625, "rewards/chosen": -0.08667995035648346, "rewards/margins": 0.09470969438552856, "rewards/rejected": -0.18138965964317322, "step": 6905 }, { "epoch": 4.212902241878908, "grad_norm": 1.3177235126495361, "learning_rate": 1.2629516227801591e-06, "log_odds_chosen": 2.4747676849365234, "log_odds_ratio": -0.48830103874206543, "logits/chosen": -0.7568449974060059, "logits/rejected": -0.9526888728141785, "logps/chosen": -0.7512686252593994, "logps/rejected": -2.7374534606933594, "loss": 1.0886, "nll_loss": 0.9105960130691528, "rewards/accuracies": 0.625, "rewards/chosen": -0.07512685656547546, "rewards/margins": 0.19861851632595062, "rewards/rejected": -0.2737453579902649, "step": 6906 }, { "epoch": 4.213512276955925, "grad_norm": 1.931103229522705, "learning_rate": 1.2619718309859155e-06, "log_odds_chosen": 1.1451221704483032, "log_odds_ratio": -0.4083471894264221, "logits/chosen": -0.9122366905212402, "logits/rejected": -0.9463741183280945, "logps/chosen": -0.8650782704353333, "logps/rejected": -1.6897540092468262, "loss": 0.9269, "nll_loss": 0.9386885762214661, "rewards/accuracies": 0.875, "rewards/chosen": -0.08650782704353333, "rewards/margins": 0.08246757090091705, "rewards/rejected": -0.16897539794445038, "step": 6907 }, { "epoch": 4.214122312032942, "grad_norm": 6.408544063568115, "learning_rate": 1.2609920391916718e-06, "log_odds_chosen": 1.5076963901519775, "log_odds_ratio": -0.3893020451068878, "logits/chosen": -0.6417970061302185, "logits/rejected": -0.7207551002502441, "logps/chosen": -0.5978857278823853, "logps/rejected": -1.6061663627624512, "loss": 1.0567, "nll_loss": 0.8869478702545166, "rewards/accuracies": 0.75, "rewards/chosen": -0.059788577258586884, "rewards/margins": 0.10082806646823883, "rewards/rejected": -0.16061663627624512, "step": 6908 }, { "epoch": 4.214732347109959, "grad_norm": 1.1362475156784058, "learning_rate": 1.260012247397428e-06, "log_odds_chosen": 1.2923213243484497, "log_odds_ratio": -0.48438745737075806, "logits/chosen": -0.6079331040382385, "logits/rejected": -0.667646586894989, "logps/chosen": -0.747052013874054, "logps/rejected": -1.3786797523498535, "loss": 0.9917, "nll_loss": 0.9363043308258057, "rewards/accuracies": 0.75, "rewards/chosen": -0.07470519840717316, "rewards/margins": 0.06316277384757996, "rewards/rejected": -0.1378679871559143, "step": 6909 }, { "epoch": 4.215342382186976, "grad_norm": 4.055028438568115, "learning_rate": 1.2590324556031843e-06, "log_odds_chosen": 1.0883514881134033, "log_odds_ratio": -0.4470182955265045, "logits/chosen": -0.92777419090271, "logits/rejected": -0.9184876680374146, "logps/chosen": -0.8801626563072205, "logps/rejected": -1.6840593814849854, "loss": 1.0416, "nll_loss": 0.9403356909751892, "rewards/accuracies": 0.75, "rewards/chosen": -0.08801627159118652, "rewards/margins": 0.08038967102766037, "rewards/rejected": -0.1684059500694275, "step": 6910 }, { "epoch": 4.215952417263993, "grad_norm": 11.141800880432129, "learning_rate": 1.2580526638089406e-06, "log_odds_chosen": 1.3202351331710815, "log_odds_ratio": -0.37122344970703125, "logits/chosen": -0.9453397393226624, "logits/rejected": -0.9276375770568848, "logps/chosen": -0.8304498791694641, "logps/rejected": -1.8148064613342285, "loss": 1.0812, "nll_loss": 1.042776346206665, "rewards/accuracies": 0.875, "rewards/chosen": -0.08304499089717865, "rewards/margins": 0.0984356477856636, "rewards/rejected": -0.18148064613342285, "step": 6911 }, { "epoch": 4.21656245234101, "grad_norm": 5.873286247253418, "learning_rate": 1.2570728720146967e-06, "log_odds_chosen": 2.260455369949341, "log_odds_ratio": -0.4725850224494934, "logits/chosen": -0.8499364852905273, "logits/rejected": -1.0576618909835815, "logps/chosen": -0.7365685701370239, "logps/rejected": -2.582491636276245, "loss": 1.1188, "nll_loss": 0.962317705154419, "rewards/accuracies": 0.625, "rewards/chosen": -0.07365685701370239, "rewards/margins": 0.18459230661392212, "rewards/rejected": -0.2582491636276245, "step": 6912 }, { "epoch": 4.2171724874180265, "grad_norm": 1.3470338582992554, "learning_rate": 1.256093080220453e-06, "log_odds_chosen": 1.4666223526000977, "log_odds_ratio": -0.5877373814582825, "logits/chosen": -0.8371448516845703, "logits/rejected": -0.9579023718833923, "logps/chosen": -0.7646569013595581, "logps/rejected": -1.8494534492492676, "loss": 1.0529, "nll_loss": 0.9420218467712402, "rewards/accuracies": 0.625, "rewards/chosen": -0.07646569609642029, "rewards/margins": 0.10847965627908707, "rewards/rejected": -0.18494534492492676, "step": 6913 }, { "epoch": 4.217782522495043, "grad_norm": 1.3655717372894287, "learning_rate": 1.2551132884262094e-06, "log_odds_chosen": 2.2611465454101562, "log_odds_ratio": -0.4852384924888611, "logits/chosen": -1.0631059408187866, "logits/rejected": -1.1232397556304932, "logps/chosen": -0.7654143571853638, "logps/rejected": -2.599032402038574, "loss": 1.0094, "nll_loss": 1.0729219913482666, "rewards/accuracies": 0.5, "rewards/chosen": -0.07654143124818802, "rewards/margins": 0.18336182832717896, "rewards/rejected": -0.2599032521247864, "step": 6914 }, { "epoch": 4.21839255757206, "grad_norm": 1.4290199279785156, "learning_rate": 1.2541334966319657e-06, "log_odds_chosen": 4.9029459953308105, "log_odds_ratio": -0.4127463400363922, "logits/chosen": -0.5929837226867676, "logits/rejected": -0.720107913017273, "logps/chosen": -0.6199294924736023, "logps/rejected": -4.958460807800293, "loss": 0.922, "nll_loss": 0.77680504322052, "rewards/accuracies": 0.875, "rewards/chosen": -0.06199295073747635, "rewards/margins": 0.4338531792163849, "rewards/rejected": -0.495846152305603, "step": 6915 }, { "epoch": 4.219002592649077, "grad_norm": 1.6168485879898071, "learning_rate": 1.2531537048377218e-06, "log_odds_chosen": 2.5377440452575684, "log_odds_ratio": -0.4176827669143677, "logits/chosen": -0.8830992579460144, "logits/rejected": -1.098369836807251, "logps/chosen": -0.938761830329895, "logps/rejected": -3.087641716003418, "loss": 1.0036, "nll_loss": 1.0151216983795166, "rewards/accuracies": 0.75, "rewards/chosen": -0.0938761904835701, "rewards/margins": 0.21488799154758453, "rewards/rejected": -0.30876415967941284, "step": 6916 }, { "epoch": 4.219612627726094, "grad_norm": 1.066063404083252, "learning_rate": 1.2521739130434784e-06, "log_odds_chosen": 3.282970905303955, "log_odds_ratio": -0.3249443471431732, "logits/chosen": -0.6668214797973633, "logits/rejected": -0.7762539386749268, "logps/chosen": -0.5769740343093872, "logps/rejected": -3.294776678085327, "loss": 0.8539, "nll_loss": 0.7363104820251465, "rewards/accuracies": 0.875, "rewards/chosen": -0.05769740417599678, "rewards/margins": 0.2717802822589874, "rewards/rejected": -0.3294776678085327, "step": 6917 }, { "epoch": 4.220222662803111, "grad_norm": 3.058692693710327, "learning_rate": 1.2511941212492345e-06, "log_odds_chosen": 1.9146161079406738, "log_odds_ratio": -0.3978911340236664, "logits/chosen": -0.9802269339561462, "logits/rejected": -0.9212324619293213, "logps/chosen": -0.8238006830215454, "logps/rejected": -2.3562259674072266, "loss": 1.1292, "nll_loss": 1.1120678186416626, "rewards/accuracies": 0.75, "rewards/chosen": -0.08238007128238678, "rewards/margins": 0.1532425433397293, "rewards/rejected": -0.2356226146221161, "step": 6918 }, { "epoch": 4.2208326978801285, "grad_norm": 1.1442581415176392, "learning_rate": 1.2502143294549906e-06, "log_odds_chosen": 2.1202077865600586, "log_odds_ratio": -0.5104620456695557, "logits/chosen": -0.7841434478759766, "logits/rejected": -0.9150713086128235, "logps/chosen": -0.660495400428772, "logps/rejected": -2.341407537460327, "loss": 1.1338, "nll_loss": 0.8770321011543274, "rewards/accuracies": 0.75, "rewards/chosen": -0.06604953855276108, "rewards/margins": 0.16809122264385223, "rewards/rejected": -0.23414075374603271, "step": 6919 }, { "epoch": 4.221442732957145, "grad_norm": 12.89665412902832, "learning_rate": 1.249234537660747e-06, "log_odds_chosen": 0.6710866689682007, "log_odds_ratio": -0.5638943910598755, "logits/chosen": -0.7310438752174377, "logits/rejected": -0.6232043504714966, "logps/chosen": -0.6281529068946838, "logps/rejected": -1.0990475416183472, "loss": 0.9184, "nll_loss": 0.8123689293861389, "rewards/accuracies": 0.75, "rewards/chosen": -0.06281528621912003, "rewards/margins": 0.04708946496248245, "rewards/rejected": -0.10990475863218307, "step": 6920 }, { "epoch": 4.222052768034162, "grad_norm": 1.6098679304122925, "learning_rate": 1.2482547458665033e-06, "log_odds_chosen": 2.894375801086426, "log_odds_ratio": -0.26610130071640015, "logits/chosen": -0.8355870246887207, "logits/rejected": -1.0308973789215088, "logps/chosen": -0.7678718566894531, "logps/rejected": -3.043196678161621, "loss": 1.1003, "nll_loss": 1.0576552152633667, "rewards/accuracies": 0.75, "rewards/chosen": -0.07678718119859695, "rewards/margins": 0.2275325059890747, "rewards/rejected": -0.30431967973709106, "step": 6921 }, { "epoch": 4.222662803111179, "grad_norm": 3.925969362258911, "learning_rate": 1.2472749540722596e-06, "log_odds_chosen": 2.8640313148498535, "log_odds_ratio": -0.3256760239601135, "logits/chosen": -0.721832275390625, "logits/rejected": -0.8383458256721497, "logps/chosen": -0.6767925024032593, "logps/rejected": -2.9074716567993164, "loss": 1.2354, "nll_loss": 0.8280203342437744, "rewards/accuracies": 0.75, "rewards/chosen": -0.06767924875020981, "rewards/margins": 0.22306790947914124, "rewards/rejected": -0.29074716567993164, "step": 6922 }, { "epoch": 4.223272838188196, "grad_norm": 4.19284200668335, "learning_rate": 1.2462951622780158e-06, "log_odds_chosen": 1.4973351955413818, "log_odds_ratio": -0.40545836091041565, "logits/chosen": -0.6168828010559082, "logits/rejected": -0.7665877938270569, "logps/chosen": -0.8655943870544434, "logps/rejected": -1.9333399534225464, "loss": 1.1375, "nll_loss": 0.9868018627166748, "rewards/accuracies": 0.875, "rewards/chosen": -0.08655943721532822, "rewards/margins": 0.10677456110715866, "rewards/rejected": -0.19333401322364807, "step": 6923 }, { "epoch": 4.223882873265213, "grad_norm": 1.9239310026168823, "learning_rate": 1.2453153704837723e-06, "log_odds_chosen": 1.5620723962783813, "log_odds_ratio": -0.5887024402618408, "logits/chosen": -0.9109362959861755, "logits/rejected": -1.0751323699951172, "logps/chosen": -0.8382563591003418, "logps/rejected": -2.0364768505096436, "loss": 1.0326, "nll_loss": 1.0918300151824951, "rewards/accuracies": 0.625, "rewards/chosen": -0.08382563292980194, "rewards/margins": 0.11982205510139465, "rewards/rejected": -0.2036476880311966, "step": 6924 }, { "epoch": 4.22449290834223, "grad_norm": 1.8213540315628052, "learning_rate": 1.2443355786895284e-06, "log_odds_chosen": 4.350817680358887, "log_odds_ratio": -0.26770463585853577, "logits/chosen": -0.6885719299316406, "logits/rejected": -0.8619098663330078, "logps/chosen": -0.682702898979187, "logps/rejected": -4.432793617248535, "loss": 0.96, "nll_loss": 0.7280855774879456, "rewards/accuracies": 0.875, "rewards/chosen": -0.06827028840780258, "rewards/margins": 0.37500911951065063, "rewards/rejected": -0.4432793855667114, "step": 6925 }, { "epoch": 4.225102943419246, "grad_norm": 12.34052848815918, "learning_rate": 1.2433557868952845e-06, "log_odds_chosen": 1.8923757076263428, "log_odds_ratio": -0.29354849457740784, "logits/chosen": -0.8509747982025146, "logits/rejected": -0.9183460474014282, "logps/chosen": -0.7035262584686279, "logps/rejected": -2.093766450881958, "loss": 0.9619, "nll_loss": 0.8867226839065552, "rewards/accuracies": 0.875, "rewards/chosen": -0.07035262882709503, "rewards/margins": 0.139024019241333, "rewards/rejected": -0.20937664806842804, "step": 6926 }, { "epoch": 4.225712978496263, "grad_norm": 1.4868385791778564, "learning_rate": 1.242375995101041e-06, "log_odds_chosen": 1.849972128868103, "log_odds_ratio": -0.332742303609848, "logits/chosen": -0.7158249020576477, "logits/rejected": -0.8357239365577698, "logps/chosen": -0.7948068976402283, "logps/rejected": -2.1720213890075684, "loss": 1.0949, "nll_loss": 0.950779914855957, "rewards/accuracies": 0.875, "rewards/chosen": -0.07948069274425507, "rewards/margins": 0.137721449136734, "rewards/rejected": -0.21720215678215027, "step": 6927 }, { "epoch": 4.22632301357328, "grad_norm": 1.813951015472412, "learning_rate": 1.2413962033067972e-06, "log_odds_chosen": 3.0543673038482666, "log_odds_ratio": -0.3182714581489563, "logits/chosen": -0.797152042388916, "logits/rejected": -1.0512237548828125, "logps/chosen": -0.6903187036514282, "logps/rejected": -3.278782606124878, "loss": 0.9874, "nll_loss": 0.851374626159668, "rewards/accuracies": 0.75, "rewards/chosen": -0.06903187185525894, "rewards/margins": 0.2588464319705963, "rewards/rejected": -0.32787829637527466, "step": 6928 }, { "epoch": 4.226933048650297, "grad_norm": 1.7842248678207397, "learning_rate": 1.2404164115125535e-06, "log_odds_chosen": 0.21911144256591797, "log_odds_ratio": -0.681222140789032, "logits/chosen": -0.9053926467895508, "logits/rejected": -0.9807342290878296, "logps/chosen": -1.094383716583252, "logps/rejected": -1.3332030773162842, "loss": 1.1289, "nll_loss": 1.1317161321640015, "rewards/accuracies": 0.5, "rewards/chosen": -0.10943837463855743, "rewards/margins": 0.0238819420337677, "rewards/rejected": -0.13332030177116394, "step": 6929 }, { "epoch": 4.227543083727315, "grad_norm": 1.9689832925796509, "learning_rate": 1.2394366197183097e-06, "log_odds_chosen": 0.47525864839553833, "log_odds_ratio": -0.5297527313232422, "logits/chosen": -0.9256174564361572, "logits/rejected": -0.9022469520568848, "logps/chosen": -0.9575940370559692, "logps/rejected": -1.2578752040863037, "loss": 1.1567, "nll_loss": 1.0507279634475708, "rewards/accuracies": 0.75, "rewards/chosen": -0.09575940668582916, "rewards/margins": 0.030028119683265686, "rewards/rejected": -0.12578752636909485, "step": 6930 }, { "epoch": 4.2281531188043315, "grad_norm": 9.166234970092773, "learning_rate": 1.2384568279240662e-06, "log_odds_chosen": 1.34732186794281, "log_odds_ratio": -0.5054044723510742, "logits/chosen": -1.0046405792236328, "logits/rejected": -0.9798740148544312, "logps/chosen": -0.7856835126876831, "logps/rejected": -1.6487056016921997, "loss": 1.1377, "nll_loss": 1.1064865589141846, "rewards/accuracies": 0.625, "rewards/chosen": -0.07856835424900055, "rewards/margins": 0.08630220592021942, "rewards/rejected": -0.16487056016921997, "step": 6931 }, { "epoch": 4.228763153881348, "grad_norm": 1.2493537664413452, "learning_rate": 1.2374770361298223e-06, "log_odds_chosen": 1.7119996547698975, "log_odds_ratio": -0.5963752269744873, "logits/chosen": -0.8823389410972595, "logits/rejected": -0.8834272623062134, "logps/chosen": -0.7745813727378845, "logps/rejected": -1.9851521253585815, "loss": 0.9645, "nll_loss": 1.0499190092086792, "rewards/accuracies": 0.375, "rewards/chosen": -0.07745814323425293, "rewards/margins": 0.12105708569288254, "rewards/rejected": -0.19851522147655487, "step": 6932 }, { "epoch": 4.229373188958365, "grad_norm": 1.7866815328598022, "learning_rate": 1.2364972443355785e-06, "log_odds_chosen": 1.6037293672561646, "log_odds_ratio": -0.44928041100502014, "logits/chosen": -1.0450135469436646, "logits/rejected": -0.875777006149292, "logps/chosen": -0.7626932859420776, "logps/rejected": -1.8937512636184692, "loss": 1.0384, "nll_loss": 1.183401346206665, "rewards/accuracies": 0.75, "rewards/chosen": -0.07626932859420776, "rewards/margins": 0.11310581862926483, "rewards/rejected": -0.1893751323223114, "step": 6933 }, { "epoch": 4.229983224035382, "grad_norm": 2.6434638500213623, "learning_rate": 1.235517452541335e-06, "log_odds_chosen": 1.6186347007751465, "log_odds_ratio": -0.3197977840900421, "logits/chosen": -0.8341590762138367, "logits/rejected": -0.7937341332435608, "logps/chosen": -0.5747128129005432, "logps/rejected": -1.575368881225586, "loss": 1.0406, "nll_loss": 0.9573253989219666, "rewards/accuracies": 0.875, "rewards/chosen": -0.05747127905488014, "rewards/margins": 0.10006560385227203, "rewards/rejected": -0.15753689408302307, "step": 6934 }, { "epoch": 4.230593259112399, "grad_norm": 2.167839527130127, "learning_rate": 1.2345376607470911e-06, "log_odds_chosen": 3.076721429824829, "log_odds_ratio": -0.24127072095870972, "logits/chosen": -0.9532060623168945, "logits/rejected": -1.0453962087631226, "logps/chosen": -0.8475780487060547, "logps/rejected": -3.4391567707061768, "loss": 0.9202, "nll_loss": 0.9588574767112732, "rewards/accuracies": 0.875, "rewards/chosen": -0.08475780487060547, "rewards/margins": 0.2591578960418701, "rewards/rejected": -0.3439157009124756, "step": 6935 }, { "epoch": 4.231203294189416, "grad_norm": 2.2919301986694336, "learning_rate": 1.2335578689528475e-06, "log_odds_chosen": 1.0214130878448486, "log_odds_ratio": -0.4696808457374573, "logits/chosen": -0.9216853380203247, "logits/rejected": -0.855690062046051, "logps/chosen": -0.8730878233909607, "logps/rejected": -1.710245132446289, "loss": 0.9569, "nll_loss": 0.9804233312606812, "rewards/accuracies": 0.75, "rewards/chosen": -0.08730878680944443, "rewards/margins": 0.08371573686599731, "rewards/rejected": -0.17102450132369995, "step": 6936 }, { "epoch": 4.231813329266433, "grad_norm": 1.2280410528182983, "learning_rate": 1.2325780771586038e-06, "log_odds_chosen": 0.7877889275550842, "log_odds_ratio": -0.499553382396698, "logits/chosen": -0.7765333652496338, "logits/rejected": -0.7649755477905273, "logps/chosen": -0.873020350933075, "logps/rejected": -1.3880668878555298, "loss": 1.0038, "nll_loss": 0.9937610030174255, "rewards/accuracies": 0.625, "rewards/chosen": -0.08730204403400421, "rewards/margins": 0.051504649221897125, "rewards/rejected": -0.13880668580532074, "step": 6937 }, { "epoch": 4.2324233643434495, "grad_norm": 1.4941153526306152, "learning_rate": 1.2315982853643601e-06, "log_odds_chosen": 1.372807264328003, "log_odds_ratio": -0.5344919562339783, "logits/chosen": -0.8990718126296997, "logits/rejected": -0.9962972402572632, "logps/chosen": -0.7549108266830444, "logps/rejected": -1.7347010374069214, "loss": 0.9696, "nll_loss": 0.9732519388198853, "rewards/accuracies": 0.625, "rewards/chosen": -0.07549109309911728, "rewards/margins": 0.09797901660203934, "rewards/rejected": -0.17347010970115662, "step": 6938 }, { "epoch": 4.233033399420466, "grad_norm": 10.633999824523926, "learning_rate": 1.2306184935701163e-06, "log_odds_chosen": 1.6105499267578125, "log_odds_ratio": -0.41356760263442993, "logits/chosen": -0.9558402895927429, "logits/rejected": -0.9964725971221924, "logps/chosen": -0.8450126647949219, "logps/rejected": -2.074829339981079, "loss": 1.1046, "nll_loss": 1.0457265377044678, "rewards/accuracies": 0.875, "rewards/chosen": -0.08450127393007278, "rewards/margins": 0.12298166006803513, "rewards/rejected": -0.2074829339981079, "step": 6939 }, { "epoch": 4.233643434497484, "grad_norm": 1.453173279762268, "learning_rate": 1.2296387017758724e-06, "log_odds_chosen": 2.4535341262817383, "log_odds_ratio": -0.4004415273666382, "logits/chosen": -1.0079814195632935, "logits/rejected": -0.9806409478187561, "logps/chosen": -0.6633880734443665, "logps/rejected": -2.714944839477539, "loss": 0.8607, "nll_loss": 0.7937197089195251, "rewards/accuracies": 0.875, "rewards/chosen": -0.06633880734443665, "rewards/margins": 0.20515570044517517, "rewards/rejected": -0.2714945077896118, "step": 6940 }, { "epoch": 4.234253469574501, "grad_norm": 1.1128482818603516, "learning_rate": 1.228658909981629e-06, "log_odds_chosen": 0.9883135557174683, "log_odds_ratio": -0.4221872091293335, "logits/chosen": -1.121080994606018, "logits/rejected": -0.9679819941520691, "logps/chosen": -0.7737222909927368, "logps/rejected": -1.4451152086257935, "loss": 1.0026, "nll_loss": 0.9792439937591553, "rewards/accuracies": 0.75, "rewards/chosen": -0.0773722231388092, "rewards/margins": 0.06713929772377014, "rewards/rejected": -0.14451152086257935, "step": 6941 }, { "epoch": 4.234863504651518, "grad_norm": 1.6501368284225464, "learning_rate": 1.227679118187385e-06, "log_odds_chosen": 1.7867885828018188, "log_odds_ratio": -0.35937437415122986, "logits/chosen": -0.7840416431427002, "logits/rejected": -0.8112545609474182, "logps/chosen": -0.5678427219390869, "logps/rejected": -1.7768619060516357, "loss": 0.8855, "nll_loss": 0.6045144200325012, "rewards/accuracies": 0.875, "rewards/chosen": -0.05678426846861839, "rewards/margins": 0.1209019273519516, "rewards/rejected": -0.1776861995458603, "step": 6942 }, { "epoch": 4.235473539728535, "grad_norm": 2.6073250770568848, "learning_rate": 1.2266993263931414e-06, "log_odds_chosen": 1.5327616930007935, "log_odds_ratio": -0.3918113112449646, "logits/chosen": -0.806124210357666, "logits/rejected": -0.9384678602218628, "logps/chosen": -0.7669115662574768, "logps/rejected": -1.934832215309143, "loss": 1.0685, "nll_loss": 0.849594235420227, "rewards/accuracies": 0.875, "rewards/chosen": -0.0766911506652832, "rewards/margins": 0.11679207533597946, "rewards/rejected": -0.19348324835300446, "step": 6943 }, { "epoch": 4.236083574805551, "grad_norm": 1.2091221809387207, "learning_rate": 1.2257195345988977e-06, "log_odds_chosen": 2.764218807220459, "log_odds_ratio": -0.3891929090023041, "logits/chosen": -0.7052983641624451, "logits/rejected": -0.8768481016159058, "logps/chosen": -0.7949634194374084, "logps/rejected": -3.176367998123169, "loss": 1.0145, "nll_loss": 0.9211960434913635, "rewards/accuracies": 0.625, "rewards/chosen": -0.0794963464140892, "rewards/margins": 0.23814049363136292, "rewards/rejected": -0.3176368176937103, "step": 6944 }, { "epoch": 4.236693609882568, "grad_norm": 1.6294167041778564, "learning_rate": 1.224739742804654e-06, "log_odds_chosen": 1.5598715543746948, "log_odds_ratio": -0.5780054330825806, "logits/chosen": -0.8626641035079956, "logits/rejected": -1.036344289779663, "logps/chosen": -0.8531632423400879, "logps/rejected": -2.1784489154815674, "loss": 1.1149, "nll_loss": 1.1016945838928223, "rewards/accuracies": 0.5, "rewards/chosen": -0.08531633019447327, "rewards/margins": 0.13252854347229004, "rewards/rejected": -0.2178448885679245, "step": 6945 }, { "epoch": 4.237303644959585, "grad_norm": 1.3522554636001587, "learning_rate": 1.2237599510104102e-06, "log_odds_chosen": 2.2295761108398438, "log_odds_ratio": -0.38618606328964233, "logits/chosen": -0.7322742342948914, "logits/rejected": -0.7615076303482056, "logps/chosen": -0.5338469743728638, "logps/rejected": -2.0426483154296875, "loss": 0.9771, "nll_loss": 0.9160484671592712, "rewards/accuracies": 0.75, "rewards/chosen": -0.0533846914768219, "rewards/margins": 0.1508801430463791, "rewards/rejected": -0.204264834523201, "step": 6946 }, { "epoch": 4.237913680036602, "grad_norm": 1.283852458000183, "learning_rate": 1.2227801592161667e-06, "log_odds_chosen": 2.6496949195861816, "log_odds_ratio": -0.3627910315990448, "logits/chosen": -0.9491952657699585, "logits/rejected": -1.080302357673645, "logps/chosen": -0.6284329295158386, "logps/rejected": -2.8056323528289795, "loss": 0.9934, "nll_loss": 0.8718578815460205, "rewards/accuracies": 0.625, "rewards/chosen": -0.06284329295158386, "rewards/margins": 0.21771995723247528, "rewards/rejected": -0.28056323528289795, "step": 6947 }, { "epoch": 4.238523715113619, "grad_norm": 1.45695161819458, "learning_rate": 1.2218003674219228e-06, "log_odds_chosen": 0.35008159279823303, "log_odds_ratio": -0.8232802152633667, "logits/chosen": -1.0087120532989502, "logits/rejected": -1.028226375579834, "logps/chosen": -1.189939260482788, "logps/rejected": -1.4489704370498657, "loss": 1.0954, "nll_loss": 1.278125524520874, "rewards/accuracies": 0.5, "rewards/chosen": -0.11899392306804657, "rewards/margins": 0.025903120636940002, "rewards/rejected": -0.14489704370498657, "step": 6948 }, { "epoch": 4.239133750190636, "grad_norm": 2.441830635070801, "learning_rate": 1.220820575627679e-06, "log_odds_chosen": 0.877027153968811, "log_odds_ratio": -0.4363955855369568, "logits/chosen": -0.7144210338592529, "logits/rejected": -0.8940390944480896, "logps/chosen": -0.8998865485191345, "logps/rejected": -1.5347213745117188, "loss": 0.9809, "nll_loss": 1.1431338787078857, "rewards/accuracies": 0.875, "rewards/chosen": -0.08998866379261017, "rewards/margins": 0.06348348408937454, "rewards/rejected": -0.1534721404314041, "step": 6949 }, { "epoch": 4.2397437852676525, "grad_norm": 1.483076810836792, "learning_rate": 1.2198407838334353e-06, "log_odds_chosen": 2.4934940338134766, "log_odds_ratio": -0.2023524045944214, "logits/chosen": -0.76439368724823, "logits/rejected": -0.9689805507659912, "logps/chosen": -0.7228564620018005, "logps/rejected": -2.638801336288452, "loss": 1.1058, "nll_loss": 0.9769376516342163, "rewards/accuracies": 1.0, "rewards/chosen": -0.07228564471006393, "rewards/margins": 0.19159449636936188, "rewards/rejected": -0.2638801634311676, "step": 6950 }, { "epoch": 4.24035382034467, "grad_norm": 1.314850926399231, "learning_rate": 1.2188609920391916e-06, "log_odds_chosen": 1.6436349153518677, "log_odds_ratio": -0.3977062702178955, "logits/chosen": -0.8327049016952515, "logits/rejected": -1.0425941944122314, "logps/chosen": -0.9562416076660156, "logps/rejected": -2.230522871017456, "loss": 1.1282, "nll_loss": 1.0219234228134155, "rewards/accuracies": 0.875, "rewards/chosen": -0.0956241637468338, "rewards/margins": 0.1274281144142151, "rewards/rejected": -0.22305229306221008, "step": 6951 }, { "epoch": 4.240963855421687, "grad_norm": 2.1473777294158936, "learning_rate": 1.217881200244948e-06, "log_odds_chosen": 1.2938722372055054, "log_odds_ratio": -0.5190263390541077, "logits/chosen": -0.959376335144043, "logits/rejected": -0.9958789348602295, "logps/chosen": -0.7782957553863525, "logps/rejected": -1.889758586883545, "loss": 1.0949, "nll_loss": 1.2025110721588135, "rewards/accuracies": 0.75, "rewards/chosen": -0.07782956957817078, "rewards/margins": 0.11114628612995148, "rewards/rejected": -0.18897587060928345, "step": 6952 }, { "epoch": 4.241573890498704, "grad_norm": 7.914224624633789, "learning_rate": 1.216901408450704e-06, "log_odds_chosen": 0.7812187075614929, "log_odds_ratio": -0.5830249786376953, "logits/chosen": -1.1553502082824707, "logits/rejected": -1.0768405199050903, "logps/chosen": -1.0525422096252441, "logps/rejected": -1.5684083700180054, "loss": 1.122, "nll_loss": 1.2666994333267212, "rewards/accuracies": 0.5, "rewards/chosen": -0.10525421798229218, "rewards/margins": 0.05158662796020508, "rewards/rejected": -0.15684084594249725, "step": 6953 }, { "epoch": 4.242183925575721, "grad_norm": 2.5400469303131104, "learning_rate": 1.2159216166564606e-06, "log_odds_chosen": 0.5130496025085449, "log_odds_ratio": -0.5181150436401367, "logits/chosen": -0.9512194395065308, "logits/rejected": -1.0079368352890015, "logps/chosen": -0.7919643521308899, "logps/rejected": -1.0835132598876953, "loss": 1.1866, "nll_loss": 1.0174802541732788, "rewards/accuracies": 0.75, "rewards/chosen": -0.07919643819332123, "rewards/margins": 0.029154885560274124, "rewards/rejected": -0.10835132747888565, "step": 6954 }, { "epoch": 4.242793960652738, "grad_norm": 14.068400382995605, "learning_rate": 1.2149418248622168e-06, "log_odds_chosen": 1.6853184700012207, "log_odds_ratio": -0.5324406623840332, "logits/chosen": -0.9717839956283569, "logits/rejected": -1.0096064805984497, "logps/chosen": -0.8479799032211304, "logps/rejected": -2.1537604331970215, "loss": 1.2748, "nll_loss": 1.032707691192627, "rewards/accuracies": 0.625, "rewards/chosen": -0.08479798585176468, "rewards/margins": 0.13057808578014374, "rewards/rejected": -0.21537606418132782, "step": 6955 }, { "epoch": 4.2434039957297545, "grad_norm": 2.157606363296509, "learning_rate": 1.2139620330679729e-06, "log_odds_chosen": 1.7327091693878174, "log_odds_ratio": -0.596879243850708, "logits/chosen": -0.7918339967727661, "logits/rejected": -0.8443483710289001, "logps/chosen": -0.7388508915901184, "logps/rejected": -2.005314826965332, "loss": 1.0817, "nll_loss": 0.9847311973571777, "rewards/accuracies": 0.5, "rewards/chosen": -0.07388509064912796, "rewards/margins": 0.12664641439914703, "rewards/rejected": -0.2005314826965332, "step": 6956 }, { "epoch": 4.244014030806771, "grad_norm": 5.222532749176025, "learning_rate": 1.2129822412737292e-06, "log_odds_chosen": 2.423830032348633, "log_odds_ratio": -0.25879955291748047, "logits/chosen": -0.8903677463531494, "logits/rejected": -1.052620530128479, "logps/chosen": -0.5288093090057373, "logps/rejected": -2.1813864707946777, "loss": 1.1489, "nll_loss": 1.066705584526062, "rewards/accuracies": 0.875, "rewards/chosen": -0.05288092419505119, "rewards/margins": 0.16525773704051971, "rewards/rejected": -0.2181386649608612, "step": 6957 }, { "epoch": 4.244624065883788, "grad_norm": 2.4882116317749023, "learning_rate": 1.2120024494794855e-06, "log_odds_chosen": 1.7057380676269531, "log_odds_ratio": -0.24065181612968445, "logits/chosen": -1.0204613208770752, "logits/rejected": -1.0346906185150146, "logps/chosen": -0.983847975730896, "logps/rejected": -2.31691575050354, "loss": 1.1392, "nll_loss": 1.0767285823822021, "rewards/accuracies": 1.0, "rewards/chosen": -0.0983848050236702, "rewards/margins": 0.13330677151679993, "rewards/rejected": -0.23169155418872833, "step": 6958 }, { "epoch": 4.245234100960805, "grad_norm": 1.3311638832092285, "learning_rate": 1.2110226576852419e-06, "log_odds_chosen": 1.5801159143447876, "log_odds_ratio": -0.4679039716720581, "logits/chosen": -1.0354604721069336, "logits/rejected": -1.0903992652893066, "logps/chosen": -0.8295694589614868, "logps/rejected": -2.0391340255737305, "loss": 1.1732, "nll_loss": 1.3670244216918945, "rewards/accuracies": 0.75, "rewards/chosen": -0.0829569548368454, "rewards/margins": 0.12095647305250168, "rewards/rejected": -0.20391342043876648, "step": 6959 }, { "epoch": 4.245844136037822, "grad_norm": 1.814832329750061, "learning_rate": 1.210042865890998e-06, "log_odds_chosen": 2.9205784797668457, "log_odds_ratio": -0.2634729743003845, "logits/chosen": -0.789409875869751, "logits/rejected": -1.036889910697937, "logps/chosen": -0.48441600799560547, "logps/rejected": -2.690230369567871, "loss": 1.17, "nll_loss": 0.8798109889030457, "rewards/accuracies": 0.75, "rewards/chosen": -0.04844159632921219, "rewards/margins": 0.22058142721652985, "rewards/rejected": -0.26902303099632263, "step": 6960 }, { "epoch": 4.246454171114839, "grad_norm": 1.2930731773376465, "learning_rate": 1.2090630740967545e-06, "log_odds_chosen": 1.656087875366211, "log_odds_ratio": -0.41649359464645386, "logits/chosen": -0.9533573389053345, "logits/rejected": -0.9968248605728149, "logps/chosen": -0.7007317543029785, "logps/rejected": -1.8960442543029785, "loss": 1.1485, "nll_loss": 0.9694496393203735, "rewards/accuracies": 0.875, "rewards/chosen": -0.07007317245006561, "rewards/margins": 0.11953124403953552, "rewards/rejected": -0.18960443139076233, "step": 6961 }, { "epoch": 4.247064206191856, "grad_norm": 1.1169449090957642, "learning_rate": 1.2080832823025107e-06, "log_odds_chosen": 2.113898277282715, "log_odds_ratio": -0.27056166529655457, "logits/chosen": -0.7460326552391052, "logits/rejected": -0.8162500262260437, "logps/chosen": -0.7015782594680786, "logps/rejected": -2.154669761657715, "loss": 0.9372, "nll_loss": 0.9642160534858704, "rewards/accuracies": 0.75, "rewards/chosen": -0.07015782594680786, "rewards/margins": 0.14530916512012482, "rewards/rejected": -0.21546699106693268, "step": 6962 }, { "epoch": 4.247674241268873, "grad_norm": 4.819353103637695, "learning_rate": 1.2071034905082668e-06, "log_odds_chosen": 2.2358603477478027, "log_odds_ratio": -0.22529160976409912, "logits/chosen": -0.832987904548645, "logits/rejected": -1.0761200189590454, "logps/chosen": -0.6822007298469543, "logps/rejected": -2.3097681999206543, "loss": 1.0482, "nll_loss": 1.092474102973938, "rewards/accuracies": 1.0, "rewards/chosen": -0.06822007894515991, "rewards/margins": 0.16275674104690552, "rewards/rejected": -0.23097681999206543, "step": 6963 }, { "epoch": 4.24828427634589, "grad_norm": 9.552886009216309, "learning_rate": 1.2061236987140233e-06, "log_odds_chosen": 0.6797568202018738, "log_odds_ratio": -0.6397037506103516, "logits/chosen": -1.116262674331665, "logits/rejected": -1.1395667791366577, "logps/chosen": -0.8447691202163696, "logps/rejected": -1.4980762004852295, "loss": 1.0398, "nll_loss": 1.0746850967407227, "rewards/accuracies": 0.5, "rewards/chosen": -0.08447691053152084, "rewards/margins": 0.06533071398735046, "rewards/rejected": -0.1498076319694519, "step": 6964 }, { "epoch": 4.248894311422907, "grad_norm": 1.100306510925293, "learning_rate": 1.2051439069197795e-06, "log_odds_chosen": 1.0757300853729248, "log_odds_ratio": -0.49926960468292236, "logits/chosen": -0.6706141233444214, "logits/rejected": -0.8878942728042603, "logps/chosen": -0.7288422584533691, "logps/rejected": -1.5069941282272339, "loss": 1.1216, "nll_loss": 0.9680933952331543, "rewards/accuracies": 0.625, "rewards/chosen": -0.0728842243552208, "rewards/margins": 0.07781518250703812, "rewards/rejected": -0.1506994068622589, "step": 6965 }, { "epoch": 4.249504346499924, "grad_norm": 9.027831077575684, "learning_rate": 1.2041641151255358e-06, "log_odds_chosen": 2.3245372772216797, "log_odds_ratio": -0.3092518448829651, "logits/chosen": -0.7771859169006348, "logits/rejected": -0.798337459564209, "logps/chosen": -0.5689378380775452, "logps/rejected": -2.1238794326782227, "loss": 1.0478, "nll_loss": 1.108099102973938, "rewards/accuracies": 0.875, "rewards/chosen": -0.05689378082752228, "rewards/margins": 0.15549415349960327, "rewards/rejected": -0.21238793432712555, "step": 6966 }, { "epoch": 4.250114381576941, "grad_norm": 1.166480302810669, "learning_rate": 1.203184323331292e-06, "log_odds_chosen": 3.181969165802002, "log_odds_ratio": -0.350057452917099, "logits/chosen": -1.0078866481781006, "logits/rejected": -1.0793884992599487, "logps/chosen": -0.6877527236938477, "logps/rejected": -3.4137446880340576, "loss": 1.0873, "nll_loss": 1.1398171186447144, "rewards/accuracies": 0.75, "rewards/chosen": -0.06877526640892029, "rewards/margins": 0.2725992202758789, "rewards/rejected": -0.3413744866847992, "step": 6967 }, { "epoch": 4.2507244166539575, "grad_norm": 1.381170630455017, "learning_rate": 1.2022045315370485e-06, "log_odds_chosen": 1.5802850723266602, "log_odds_ratio": -0.4172877073287964, "logits/chosen": -0.8807443380355835, "logits/rejected": -1.009738564491272, "logps/chosen": -1.0176658630371094, "logps/rejected": -2.257549524307251, "loss": 1.0908, "nll_loss": 1.0577677488327026, "rewards/accuracies": 0.75, "rewards/chosen": -0.10176658630371094, "rewards/margins": 0.12398835271596909, "rewards/rejected": -0.22575494647026062, "step": 6968 }, { "epoch": 4.251334451730974, "grad_norm": 2.014143466949463, "learning_rate": 1.2012247397428046e-06, "log_odds_chosen": 1.6725496053695679, "log_odds_ratio": -0.4467105567455292, "logits/chosen": -1.0712224245071411, "logits/rejected": -1.1142446994781494, "logps/chosen": -1.0647485256195068, "logps/rejected": -2.509094715118408, "loss": 1.082, "nll_loss": 1.2778937816619873, "rewards/accuracies": 0.875, "rewards/chosen": -0.1064748615026474, "rewards/margins": 0.1444346159696579, "rewards/rejected": -0.2509094774723053, "step": 6969 }, { "epoch": 4.251944486807991, "grad_norm": 10.179359436035156, "learning_rate": 1.2002449479485607e-06, "log_odds_chosen": 1.132832407951355, "log_odds_ratio": -0.46841827034950256, "logits/chosen": -0.7324501872062683, "logits/rejected": -0.721429169178009, "logps/chosen": -0.8482619524002075, "logps/rejected": -1.698330283164978, "loss": 1.0538, "nll_loss": 0.9317281246185303, "rewards/accuracies": 0.75, "rewards/chosen": -0.08482620120048523, "rewards/margins": 0.08500682562589645, "rewards/rejected": -0.16983303427696228, "step": 6970 }, { "epoch": 4.252554521885008, "grad_norm": 1.4594829082489014, "learning_rate": 1.1992651561543172e-06, "log_odds_chosen": 1.7793288230895996, "log_odds_ratio": -0.5015318989753723, "logits/chosen": -0.8782452344894409, "logits/rejected": -0.9726424813270569, "logps/chosen": -0.785522997379303, "logps/rejected": -2.186375379562378, "loss": 1.0524, "nll_loss": 1.0122276544570923, "rewards/accuracies": 0.5, "rewards/chosen": -0.07855229824781418, "rewards/margins": 0.14008525013923645, "rewards/rejected": -0.21863755583763123, "step": 6971 }, { "epoch": 4.253164556962025, "grad_norm": 1.230427622795105, "learning_rate": 1.1982853643600734e-06, "log_odds_chosen": 1.4205965995788574, "log_odds_ratio": -0.6224819421768188, "logits/chosen": -0.95697021484375, "logits/rejected": -0.9540597796440125, "logps/chosen": -0.8928227424621582, "logps/rejected": -2.079106092453003, "loss": 1.0498, "nll_loss": 1.0713863372802734, "rewards/accuracies": 0.5, "rewards/chosen": -0.08928228169679642, "rewards/margins": 0.1186283528804779, "rewards/rejected": -0.20791062712669373, "step": 6972 }, { "epoch": 4.253774592039043, "grad_norm": 1.6409748792648315, "learning_rate": 1.1973055725658297e-06, "log_odds_chosen": 0.9404028058052063, "log_odds_ratio": -0.5787245035171509, "logits/chosen": -0.9171363115310669, "logits/rejected": -1.0462772846221924, "logps/chosen": -0.7548909783363342, "logps/rejected": -1.4069769382476807, "loss": 0.9301, "nll_loss": 0.8846173882484436, "rewards/accuracies": 0.75, "rewards/chosen": -0.0754891037940979, "rewards/margins": 0.06520859897136688, "rewards/rejected": -0.14069770276546478, "step": 6973 }, { "epoch": 4.2543846271160595, "grad_norm": 10.546852111816406, "learning_rate": 1.196325780771586e-06, "log_odds_chosen": 2.648073196411133, "log_odds_ratio": -0.42461955547332764, "logits/chosen": -0.7382986545562744, "logits/rejected": -0.965865969657898, "logps/chosen": -0.7556402683258057, "logps/rejected": -2.9077649116516113, "loss": 0.9922, "nll_loss": 0.9738165140151978, "rewards/accuracies": 0.625, "rewards/chosen": -0.07556402683258057, "rewards/margins": 0.21521246433258057, "rewards/rejected": -0.29077646136283875, "step": 6974 }, { "epoch": 4.254994662193076, "grad_norm": 8.467941284179688, "learning_rate": 1.1953459889773424e-06, "log_odds_chosen": 1.481865644454956, "log_odds_ratio": -0.42734628915786743, "logits/chosen": -0.8978782296180725, "logits/rejected": -0.953506350517273, "logps/chosen": -0.7185028791427612, "logps/rejected": -1.8549432754516602, "loss": 1.027, "nll_loss": 0.9841547012329102, "rewards/accuracies": 0.75, "rewards/chosen": -0.07185028493404388, "rewards/margins": 0.11364404112100601, "rewards/rejected": -0.1854943186044693, "step": 6975 }, { "epoch": 4.255604697270093, "grad_norm": 2.0968410968780518, "learning_rate": 1.1943661971830985e-06, "log_odds_chosen": 2.2265448570251465, "log_odds_ratio": -0.3762928545475006, "logits/chosen": -0.7912773489952087, "logits/rejected": -0.9170719981193542, "logps/chosen": -0.8109752535820007, "logps/rejected": -2.7081961631774902, "loss": 0.9282, "nll_loss": 0.9378210306167603, "rewards/accuracies": 0.75, "rewards/chosen": -0.08109752833843231, "rewards/margins": 0.18972209095954895, "rewards/rejected": -0.27081963419914246, "step": 6976 }, { "epoch": 4.25621473234711, "grad_norm": 1.4448778629302979, "learning_rate": 1.1933864053888546e-06, "log_odds_chosen": 2.1499102115631104, "log_odds_ratio": -0.4760879874229431, "logits/chosen": -0.7915343046188354, "logits/rejected": -0.8799697160720825, "logps/chosen": -0.8822236061096191, "logps/rejected": -2.61775541305542, "loss": 1.1167, "nll_loss": 1.004833698272705, "rewards/accuracies": 0.625, "rewards/chosen": -0.08822236955165863, "rewards/margins": 0.1735531985759735, "rewards/rejected": -0.26177555322647095, "step": 6977 }, { "epoch": 4.256824767424127, "grad_norm": 2.1646409034729004, "learning_rate": 1.1924066135946112e-06, "log_odds_chosen": 2.1581053733825684, "log_odds_ratio": -0.35796859860420227, "logits/chosen": -0.8724064826965332, "logits/rejected": -1.0041552782058716, "logps/chosen": -0.7423598766326904, "logps/rejected": -2.4315688610076904, "loss": 1.0913, "nll_loss": 0.9985266327857971, "rewards/accuracies": 0.875, "rewards/chosen": -0.07423599064350128, "rewards/margins": 0.16892090439796448, "rewards/rejected": -0.24315689504146576, "step": 6978 }, { "epoch": 4.257434802501144, "grad_norm": 5.952618598937988, "learning_rate": 1.1914268218003673e-06, "log_odds_chosen": 1.9646583795547485, "log_odds_ratio": -0.33957183361053467, "logits/chosen": -0.971524715423584, "logits/rejected": -1.0632637739181519, "logps/chosen": -0.9387133121490479, "logps/rejected": -2.4223239421844482, "loss": 1.1013, "nll_loss": 1.0633913278579712, "rewards/accuracies": 0.75, "rewards/chosen": -0.0938713327050209, "rewards/margins": 0.14836105704307556, "rewards/rejected": -0.24223238229751587, "step": 6979 }, { "epoch": 4.2580448375781605, "grad_norm": 1.5221720933914185, "learning_rate": 1.1904470300061236e-06, "log_odds_chosen": 1.0176007747650146, "log_odds_ratio": -0.3755215108394623, "logits/chosen": -0.7037177681922913, "logits/rejected": -0.8655112981796265, "logps/chosen": -0.6291317939758301, "logps/rejected": -1.1965006589889526, "loss": 1.1359, "nll_loss": 0.8362584114074707, "rewards/accuracies": 0.875, "rewards/chosen": -0.06291317939758301, "rewards/margins": 0.05673689395189285, "rewards/rejected": -0.11965006589889526, "step": 6980 }, { "epoch": 4.258654872655177, "grad_norm": 2.7116570472717285, "learning_rate": 1.18946723821188e-06, "log_odds_chosen": 1.6651382446289062, "log_odds_ratio": -0.39564889669418335, "logits/chosen": -0.9495470523834229, "logits/rejected": -1.0339934825897217, "logps/chosen": -1.2449185848236084, "logps/rejected": -2.663236141204834, "loss": 1.0649, "nll_loss": 1.1758241653442383, "rewards/accuracies": 0.875, "rewards/chosen": -0.1244918629527092, "rewards/margins": 0.14183172583580017, "rewards/rejected": -0.26632359623908997, "step": 6981 }, { "epoch": 4.259264907732194, "grad_norm": 1.4783283472061157, "learning_rate": 1.1884874464176363e-06, "log_odds_chosen": 3.167530059814453, "log_odds_ratio": -0.3106052875518799, "logits/chosen": -0.7612062692642212, "logits/rejected": -0.9007513523101807, "logps/chosen": -0.7835383415222168, "logps/rejected": -3.557734489440918, "loss": 1.0764, "nll_loss": 0.7781945466995239, "rewards/accuracies": 1.0, "rewards/chosen": -0.07835384458303452, "rewards/margins": 0.2774196267127991, "rewards/rejected": -0.3557734489440918, "step": 6982 }, { "epoch": 4.259874942809212, "grad_norm": 1.5375289916992188, "learning_rate": 1.1875076546233924e-06, "log_odds_chosen": 1.483236312866211, "log_odds_ratio": -0.46566328406333923, "logits/chosen": -0.915469765663147, "logits/rejected": -0.9743392467498779, "logps/chosen": -0.6601848602294922, "logps/rejected": -1.5885326862335205, "loss": 0.8597, "nll_loss": 0.9869714975357056, "rewards/accuracies": 0.875, "rewards/chosen": -0.0660184770822525, "rewards/margins": 0.09283480793237686, "rewards/rejected": -0.15885329246520996, "step": 6983 }, { "epoch": 4.260484977886229, "grad_norm": 1.4067492485046387, "learning_rate": 1.186527862829149e-06, "log_odds_chosen": 0.3231824040412903, "log_odds_ratio": -0.6306415796279907, "logits/chosen": -1.0329526662826538, "logits/rejected": -0.9138497114181519, "logps/chosen": -0.640499472618103, "logps/rejected": -0.7426159977912903, "loss": 1.0977, "nll_loss": 1.1002455949783325, "rewards/accuracies": 0.625, "rewards/chosen": -0.06404994428157806, "rewards/margins": 0.010211655870079994, "rewards/rejected": -0.0742616057395935, "step": 6984 }, { "epoch": 4.261095012963246, "grad_norm": 2.193598508834839, "learning_rate": 1.185548071034905e-06, "log_odds_chosen": 2.1526315212249756, "log_odds_ratio": -0.4233531057834625, "logits/chosen": -0.7565948963165283, "logits/rejected": -0.9005510807037354, "logps/chosen": -0.5598028898239136, "logps/rejected": -2.257037401199341, "loss": 0.792, "nll_loss": 0.7189930081367493, "rewards/accuracies": 0.875, "rewards/chosen": -0.05598028376698494, "rewards/margins": 0.16972345113754272, "rewards/rejected": -0.22570374608039856, "step": 6985 }, { "epoch": 4.2617050480402625, "grad_norm": 1.5109940767288208, "learning_rate": 1.1845682792406612e-06, "log_odds_chosen": 2.922529697418213, "log_odds_ratio": -0.3192332983016968, "logits/chosen": -0.6928077936172485, "logits/rejected": -0.9489545822143555, "logps/chosen": -0.7744371891021729, "logps/rejected": -3.169328212738037, "loss": 1.0559, "nll_loss": 0.8594103455543518, "rewards/accuracies": 0.75, "rewards/chosen": -0.07744371891021729, "rewards/margins": 0.2394891083240509, "rewards/rejected": -0.3169328272342682, "step": 6986 }, { "epoch": 4.262315083117279, "grad_norm": 7.838533878326416, "learning_rate": 1.1835884874464175e-06, "log_odds_chosen": 2.4986801147460938, "log_odds_ratio": -0.2703012228012085, "logits/chosen": -0.8372070789337158, "logits/rejected": -1.0012816190719604, "logps/chosen": -0.7532555460929871, "logps/rejected": -2.635230541229248, "loss": 0.9412, "nll_loss": 0.9205805063247681, "rewards/accuracies": 0.875, "rewards/chosen": -0.07532555609941483, "rewards/margins": 0.18819749355316162, "rewards/rejected": -0.26352304220199585, "step": 6987 }, { "epoch": 4.262925118194296, "grad_norm": 1.4560757875442505, "learning_rate": 1.1826086956521739e-06, "log_odds_chosen": 1.9389610290527344, "log_odds_ratio": -0.3470389246940613, "logits/chosen": -0.835150420665741, "logits/rejected": -1.0135669708251953, "logps/chosen": -0.7280184030532837, "logps/rejected": -2.2316434383392334, "loss": 1.0045, "nll_loss": 1.0426911115646362, "rewards/accuracies": 1.0, "rewards/chosen": -0.07280184328556061, "rewards/margins": 0.1503625214099884, "rewards/rejected": -0.22316434979438782, "step": 6988 }, { "epoch": 4.263535153271313, "grad_norm": 2.194243907928467, "learning_rate": 1.1816289038579302e-06, "log_odds_chosen": 4.569494724273682, "log_odds_ratio": -0.13374893367290497, "logits/chosen": -0.6266088485717773, "logits/rejected": -1.0607250928878784, "logps/chosen": -0.5341815948486328, "logps/rejected": -4.346469879150391, "loss": 1.2099, "nll_loss": 0.8858722448348999, "rewards/accuracies": 0.875, "rewards/chosen": -0.05341816321015358, "rewards/margins": 0.38122883439064026, "rewards/rejected": -0.4346470236778259, "step": 6989 }, { "epoch": 4.26414518834833, "grad_norm": 3.282042980194092, "learning_rate": 1.1806491120636863e-06, "log_odds_chosen": 0.6736413240432739, "log_odds_ratio": -0.6385692358016968, "logits/chosen": -1.066335678100586, "logits/rejected": -1.1162755489349365, "logps/chosen": -1.1354663372039795, "logps/rejected": -1.6653411388397217, "loss": 1.1944, "nll_loss": 1.304717779159546, "rewards/accuracies": 0.5, "rewards/chosen": -0.11354663223028183, "rewards/margins": 0.05298749357461929, "rewards/rejected": -0.16653412580490112, "step": 6990 }, { "epoch": 4.264755223425347, "grad_norm": 4.116471767425537, "learning_rate": 1.1796693202694429e-06, "log_odds_chosen": 0.7941249012947083, "log_odds_ratio": -0.5937856435775757, "logits/chosen": -1.21189546585083, "logits/rejected": -1.2070226669311523, "logps/chosen": -1.5870327949523926, "logps/rejected": -2.3503634929656982, "loss": 1.2517, "nll_loss": 1.6428284645080566, "rewards/accuracies": 0.625, "rewards/chosen": -0.1587032973766327, "rewards/margins": 0.07633306086063385, "rewards/rejected": -0.23503635823726654, "step": 6991 }, { "epoch": 4.265365258502364, "grad_norm": 1.4620606899261475, "learning_rate": 1.178689528475199e-06, "log_odds_chosen": 2.4444689750671387, "log_odds_ratio": -0.3739664852619171, "logits/chosen": -0.8688173890113831, "logits/rejected": -1.0601576566696167, "logps/chosen": -0.8764129877090454, "logps/rejected": -2.833315849304199, "loss": 0.9872, "nll_loss": 0.9353165626525879, "rewards/accuracies": 0.875, "rewards/chosen": -0.08764130622148514, "rewards/margins": 0.1956903040409088, "rewards/rejected": -0.28333160281181335, "step": 6992 }, { "epoch": 4.26597529357938, "grad_norm": 2.0650203227996826, "learning_rate": 1.1777097366809551e-06, "log_odds_chosen": 3.5308573246002197, "log_odds_ratio": -0.7117215991020203, "logits/chosen": -1.099521279335022, "logits/rejected": -0.9525107741355896, "logps/chosen": -1.079163908958435, "logps/rejected": -4.246833324432373, "loss": 1.1435, "nll_loss": 1.451725959777832, "rewards/accuracies": 0.75, "rewards/chosen": -0.10791639238595963, "rewards/margins": 0.31676697731018066, "rewards/rejected": -0.4246833622455597, "step": 6993 }, { "epoch": 4.266585328656397, "grad_norm": 1.2590726613998413, "learning_rate": 1.1767299448867117e-06, "log_odds_chosen": 1.272078037261963, "log_odds_ratio": -0.473250150680542, "logits/chosen": -1.0811176300048828, "logits/rejected": -0.9621527791023254, "logps/chosen": -0.9234300851821899, "logps/rejected": -1.9736758470535278, "loss": 0.9739, "nll_loss": 1.034437894821167, "rewards/accuracies": 0.75, "rewards/chosen": -0.09234301745891571, "rewards/margins": 0.10502458363771439, "rewards/rejected": -0.1973675787448883, "step": 6994 }, { "epoch": 4.267195363733415, "grad_norm": 6.364412784576416, "learning_rate": 1.1757501530924678e-06, "log_odds_chosen": 0.21080610156059265, "log_odds_ratio": -0.8593385815620422, "logits/chosen": -0.807175874710083, "logits/rejected": -0.8929656744003296, "logps/chosen": -0.9931130409240723, "logps/rejected": -1.2311447858810425, "loss": 1.1656, "nll_loss": 1.1643716096878052, "rewards/accuracies": 0.5, "rewards/chosen": -0.09931130707263947, "rewards/margins": 0.023803170770406723, "rewards/rejected": -0.12311448156833649, "step": 6995 }, { "epoch": 4.267805398810432, "grad_norm": 1.5849156379699707, "learning_rate": 1.1747703612982241e-06, "log_odds_chosen": 0.34458696842193604, "log_odds_ratio": -0.6461927890777588, "logits/chosen": -0.9182361364364624, "logits/rejected": -0.905015230178833, "logps/chosen": -0.844441831111908, "logps/rejected": -1.0675067901611328, "loss": 0.9142, "nll_loss": 1.0002466440200806, "rewards/accuracies": 0.5, "rewards/chosen": -0.08444419503211975, "rewards/margins": 0.022306498140096664, "rewards/rejected": -0.10675068199634552, "step": 6996 }, { "epoch": 4.268415433887449, "grad_norm": 6.643284320831299, "learning_rate": 1.1737905695039802e-06, "log_odds_chosen": 2.6882565021514893, "log_odds_ratio": -0.2514185607433319, "logits/chosen": -0.8742096424102783, "logits/rejected": -1.0257116556167603, "logps/chosen": -0.6180747151374817, "logps/rejected": -2.6920719146728516, "loss": 0.8695, "nll_loss": 0.7882609963417053, "rewards/accuracies": 0.875, "rewards/chosen": -0.06180747598409653, "rewards/margins": 0.20739969611167908, "rewards/rejected": -0.2692071795463562, "step": 6997 }, { "epoch": 4.2690254689644656, "grad_norm": 3.3583173751831055, "learning_rate": 1.1728107777097368e-06, "log_odds_chosen": 1.3525831699371338, "log_odds_ratio": -0.5629158616065979, "logits/chosen": -0.9177074432373047, "logits/rejected": -0.9468801021575928, "logps/chosen": -0.8573786020278931, "logps/rejected": -1.8587188720703125, "loss": 1.0191, "nll_loss": 0.9544610977172852, "rewards/accuracies": 0.625, "rewards/chosen": -0.08573786914348602, "rewards/margins": 0.10013402253389359, "rewards/rejected": -0.185871884226799, "step": 6998 }, { "epoch": 4.269635504041482, "grad_norm": 3.21026611328125, "learning_rate": 1.171830985915493e-06, "log_odds_chosen": 1.3512160778045654, "log_odds_ratio": -0.34328022599220276, "logits/chosen": -1.007537841796875, "logits/rejected": -0.8222737908363342, "logps/chosen": -0.9035845994949341, "logps/rejected": -1.8652137517929077, "loss": 1.0022, "nll_loss": 1.140474557876587, "rewards/accuracies": 0.875, "rewards/chosen": -0.09035846590995789, "rewards/margins": 0.09616291522979736, "rewards/rejected": -0.18652138113975525, "step": 6999 }, { "epoch": 4.270245539118499, "grad_norm": 4.759943962097168, "learning_rate": 1.170851194121249e-06, "log_odds_chosen": 2.550170421600342, "log_odds_ratio": -0.3401074707508087, "logits/chosen": -0.6734392046928406, "logits/rejected": -0.8102864027023315, "logps/chosen": -0.8105441331863403, "logps/rejected": -2.847407579421997, "loss": 0.9587, "nll_loss": 0.8903003931045532, "rewards/accuracies": 0.75, "rewards/chosen": -0.08105441927909851, "rewards/margins": 0.20368634164333344, "rewards/rejected": -0.28474074602127075, "step": 7000 }, { "epoch": 4.270855574195516, "grad_norm": 1.4064488410949707, "learning_rate": 1.1698714023270056e-06, "log_odds_chosen": 2.1610357761383057, "log_odds_ratio": -0.3917187452316284, "logits/chosen": -0.8862137794494629, "logits/rejected": -0.9798753261566162, "logps/chosen": -0.7563909888267517, "logps/rejected": -2.47952938079834, "loss": 0.8941, "nll_loss": 0.896094024181366, "rewards/accuracies": 0.75, "rewards/chosen": -0.07563909888267517, "rewards/margins": 0.1723138391971588, "rewards/rejected": -0.2479529082775116, "step": 7001 }, { "epoch": 4.271465609272533, "grad_norm": 2.3058817386627197, "learning_rate": 1.1688916105327617e-06, "log_odds_chosen": 1.032825231552124, "log_odds_ratio": -0.42015519738197327, "logits/chosen": -0.9328879117965698, "logits/rejected": -1.019052505493164, "logps/chosen": -0.9022526144981384, "logps/rejected": -1.6445425748825073, "loss": 1.1134, "nll_loss": 1.0719884634017944, "rewards/accuracies": 0.875, "rewards/chosen": -0.09022526443004608, "rewards/margins": 0.07422900199890137, "rewards/rejected": -0.16445426642894745, "step": 7002 }, { "epoch": 4.27207564434955, "grad_norm": 1.587881326675415, "learning_rate": 1.167911818738518e-06, "log_odds_chosen": 2.5570991039276123, "log_odds_ratio": -0.37824925780296326, "logits/chosen": -0.5768895149230957, "logits/rejected": -0.9101689457893372, "logps/chosen": -0.7257378101348877, "logps/rejected": -2.7644035816192627, "loss": 0.933, "nll_loss": 0.9357370138168335, "rewards/accuracies": 0.75, "rewards/chosen": -0.07257378101348877, "rewards/margins": 0.20386658608913422, "rewards/rejected": -0.2764403820037842, "step": 7003 }, { "epoch": 4.272685679426567, "grad_norm": 1.6052993535995483, "learning_rate": 1.1669320269442744e-06, "log_odds_chosen": 1.468284010887146, "log_odds_ratio": -0.5775381326675415, "logits/chosen": -0.8968123197555542, "logits/rejected": -0.9403495788574219, "logps/chosen": -0.7782572507858276, "logps/rejected": -1.9936084747314453, "loss": 0.9595, "nll_loss": 1.0221163034439087, "rewards/accuracies": 0.625, "rewards/chosen": -0.07782572507858276, "rewards/margins": 0.12153510749340057, "rewards/rejected": -0.19936084747314453, "step": 7004 }, { "epoch": 4.273295714503584, "grad_norm": 1.4763718843460083, "learning_rate": 1.1659522351500307e-06, "log_odds_chosen": 2.109626531600952, "log_odds_ratio": -0.34620025753974915, "logits/chosen": -0.7962195873260498, "logits/rejected": -1.0036144256591797, "logps/chosen": -0.634401798248291, "logps/rejected": -2.24764347076416, "loss": 0.9632, "nll_loss": 0.7810612320899963, "rewards/accuracies": 0.875, "rewards/chosen": -0.06344018131494522, "rewards/margins": 0.16132420301437378, "rewards/rejected": -0.2247643768787384, "step": 7005 }, { "epoch": 4.273905749580601, "grad_norm": 1.7761802673339844, "learning_rate": 1.1649724433557868e-06, "log_odds_chosen": 1.3779922723770142, "log_odds_ratio": -0.3714159429073334, "logits/chosen": -0.9290696978569031, "logits/rejected": -0.9856833219528198, "logps/chosen": -0.9528169631958008, "logps/rejected": -2.0403990745544434, "loss": 1.0231, "nll_loss": 1.1858906745910645, "rewards/accuracies": 0.75, "rewards/chosen": -0.0952816978096962, "rewards/margins": 0.10875821113586426, "rewards/rejected": -0.20403990149497986, "step": 7006 }, { "epoch": 4.274515784657618, "grad_norm": 16.99709701538086, "learning_rate": 1.163992651561543e-06, "log_odds_chosen": 2.264155626296997, "log_odds_ratio": -0.537558913230896, "logits/chosen": -1.0798389911651611, "logits/rejected": -1.1065421104431152, "logps/chosen": -1.1460689306259155, "logps/rejected": -3.195812702178955, "loss": 1.2727, "nll_loss": 1.4128532409667969, "rewards/accuracies": 0.625, "rewards/chosen": -0.11460688710212708, "rewards/margins": 0.20497441291809082, "rewards/rejected": -0.3195812702178955, "step": 7007 }, { "epoch": 4.275125819734635, "grad_norm": 1.779229760169983, "learning_rate": 1.1630128597672995e-06, "log_odds_chosen": 1.4092482328414917, "log_odds_ratio": -0.4656308889389038, "logits/chosen": -0.9204554557800293, "logits/rejected": -0.9058793783187866, "logps/chosen": -0.749068558216095, "logps/rejected": -1.6154100894927979, "loss": 1.1571, "nll_loss": 1.0741053819656372, "rewards/accuracies": 0.75, "rewards/chosen": -0.0749068558216095, "rewards/margins": 0.08663414418697357, "rewards/rejected": -0.16154100000858307, "step": 7008 }, { "epoch": 4.275735854811652, "grad_norm": 1.3047815561294556, "learning_rate": 1.1620330679730556e-06, "log_odds_chosen": 5.239741325378418, "log_odds_ratio": -0.19564716517925262, "logits/chosen": -0.6718378663063049, "logits/rejected": -0.8455764055252075, "logps/chosen": -0.6132724285125732, "logps/rejected": -5.153463363647461, "loss": 0.9148, "nll_loss": 0.7849658131599426, "rewards/accuracies": 1.0, "rewards/chosen": -0.061327241361141205, "rewards/margins": 0.45401906967163086, "rewards/rejected": -0.5153462886810303, "step": 7009 }, { "epoch": 4.276345889888669, "grad_norm": 1.5852636098861694, "learning_rate": 1.161053276178812e-06, "log_odds_chosen": 1.5294643640518188, "log_odds_ratio": -0.5368767380714417, "logits/chosen": -0.9234843850135803, "logits/rejected": -0.959282398223877, "logps/chosen": -0.966325581073761, "logps/rejected": -2.26371431350708, "loss": 1.0503, "nll_loss": 1.1152336597442627, "rewards/accuracies": 0.5, "rewards/chosen": -0.09663254767656326, "rewards/margins": 0.12973885238170624, "rewards/rejected": -0.2263714224100113, "step": 7010 }, { "epoch": 4.276955924965685, "grad_norm": 1.591633677482605, "learning_rate": 1.1600734843845683e-06, "log_odds_chosen": 2.87471866607666, "log_odds_ratio": -0.31062597036361694, "logits/chosen": -0.8176226615905762, "logits/rejected": -1.0956120491027832, "logps/chosen": -0.63372802734375, "logps/rejected": -2.851625919342041, "loss": 1.0226, "nll_loss": 1.2034043073654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.06337280571460724, "rewards/margins": 0.22178980708122253, "rewards/rejected": -0.28516262769699097, "step": 7011 }, { "epoch": 4.277565960042702, "grad_norm": 2.324216842651367, "learning_rate": 1.1590936925903246e-06, "log_odds_chosen": 1.419080376625061, "log_odds_ratio": -0.7223271727561951, "logits/chosen": -0.7243716716766357, "logits/rejected": -0.7596622109413147, "logps/chosen": -1.013545036315918, "logps/rejected": -2.4791791439056396, "loss": 1.1493, "nll_loss": 1.14780592918396, "rewards/accuracies": 0.375, "rewards/chosen": -0.10135450959205627, "rewards/margins": 0.14656341075897217, "rewards/rejected": -0.24791792035102844, "step": 7012 }, { "epoch": 4.278175995119719, "grad_norm": 2.391500473022461, "learning_rate": 1.1581139007960807e-06, "log_odds_chosen": 3.0721516609191895, "log_odds_ratio": -0.4014510214328766, "logits/chosen": -0.810308575630188, "logits/rejected": -1.0888599157333374, "logps/chosen": -0.7371177673339844, "logps/rejected": -3.1454789638519287, "loss": 0.9952, "nll_loss": 0.9918930530548096, "rewards/accuracies": 0.75, "rewards/chosen": -0.07371177524328232, "rewards/margins": 0.24083611369132996, "rewards/rejected": -0.31454789638519287, "step": 7013 }, { "epoch": 4.278786030196736, "grad_norm": 1.4669203758239746, "learning_rate": 1.157134109001837e-06, "log_odds_chosen": 0.4209027886390686, "log_odds_ratio": -0.5550723075866699, "logits/chosen": -0.8765748739242554, "logits/rejected": -0.8974897861480713, "logps/chosen": -0.922283411026001, "logps/rejected": -1.2343361377716064, "loss": 1.1701, "nll_loss": 1.1829311847686768, "rewards/accuracies": 0.875, "rewards/chosen": -0.09222833067178726, "rewards/margins": 0.031205270439386368, "rewards/rejected": -0.12343361228704453, "step": 7014 }, { "epoch": 4.279396065273753, "grad_norm": 1.3773373365402222, "learning_rate": 1.1561543172075934e-06, "log_odds_chosen": 0.661360502243042, "log_odds_ratio": -0.611493706703186, "logits/chosen": -0.792073667049408, "logits/rejected": -0.8102289438247681, "logps/chosen": -0.7420033812522888, "logps/rejected": -1.180842399597168, "loss": 0.9444, "nll_loss": 0.9634131789207458, "rewards/accuracies": 0.5, "rewards/chosen": -0.074200339615345, "rewards/margins": 0.043883904814720154, "rewards/rejected": -0.11808425188064575, "step": 7015 }, { "epoch": 4.280006100350771, "grad_norm": 3.2726964950561523, "learning_rate": 1.1551745254133495e-06, "log_odds_chosen": 2.862614631652832, "log_odds_ratio": -0.21968325972557068, "logits/chosen": -0.6995629072189331, "logits/rejected": -1.0183533430099487, "logps/chosen": -0.6181352734565735, "logps/rejected": -2.7724647521972656, "loss": 0.9692, "nll_loss": 0.8885283470153809, "rewards/accuracies": 0.75, "rewards/chosen": -0.06181352585554123, "rewards/margins": 0.21543297171592712, "rewards/rejected": -0.27724647521972656, "step": 7016 }, { "epoch": 4.280616135427787, "grad_norm": 1.548553705215454, "learning_rate": 1.1541947336191059e-06, "log_odds_chosen": 2.2669267654418945, "log_odds_ratio": -0.21355105936527252, "logits/chosen": -1.0501303672790527, "logits/rejected": -1.0426043272018433, "logps/chosen": -0.6832126379013062, "logps/rejected": -2.4068450927734375, "loss": 1.0493, "nll_loss": 1.039454698562622, "rewards/accuracies": 1.0, "rewards/chosen": -0.06832126528024673, "rewards/margins": 0.17236323654651642, "rewards/rejected": -0.24068450927734375, "step": 7017 }, { "epoch": 4.281226170504804, "grad_norm": 1.566163182258606, "learning_rate": 1.1532149418248622e-06, "log_odds_chosen": 1.339053988456726, "log_odds_ratio": -0.3614753484725952, "logits/chosen": -1.0947264432907104, "logits/rejected": -1.0745704174041748, "logps/chosen": -0.8328440189361572, "logps/rejected": -1.9436149597167969, "loss": 1.015, "nll_loss": 1.3195154666900635, "rewards/accuracies": 1.0, "rewards/chosen": -0.0832844078540802, "rewards/margins": 0.11107710003852844, "rewards/rejected": -0.19436149299144745, "step": 7018 }, { "epoch": 4.281836205581821, "grad_norm": 1.4510399103164673, "learning_rate": 1.1522351500306185e-06, "log_odds_chosen": 1.9254225492477417, "log_odds_ratio": -0.35015493631362915, "logits/chosen": -1.0917456150054932, "logits/rejected": -1.1124755144119263, "logps/chosen": -0.8975576758384705, "logps/rejected": -2.438330888748169, "loss": 1.1267, "nll_loss": 1.2115017175674438, "rewards/accuracies": 0.75, "rewards/chosen": -0.08975577354431152, "rewards/margins": 0.15407732129096985, "rewards/rejected": -0.24383310973644257, "step": 7019 }, { "epoch": 4.282446240658838, "grad_norm": 7.254389762878418, "learning_rate": 1.1512553582363747e-06, "log_odds_chosen": 2.686178207397461, "log_odds_ratio": -0.6506822109222412, "logits/chosen": -0.9619089961051941, "logits/rejected": -1.0814145803451538, "logps/chosen": -0.9990682005882263, "logps/rejected": -3.341472864151001, "loss": 1.0357, "nll_loss": 0.8918775320053101, "rewards/accuracies": 0.625, "rewards/chosen": -0.0999068170785904, "rewards/margins": 0.23424047231674194, "rewards/rejected": -0.33414727449417114, "step": 7020 }, { "epoch": 4.283056275735855, "grad_norm": 1.6249099969863892, "learning_rate": 1.150275566442131e-06, "log_odds_chosen": 1.3213895559310913, "log_odds_ratio": -0.41343432664871216, "logits/chosen": -0.5781225562095642, "logits/rejected": -0.7973278760910034, "logps/chosen": -0.6431676149368286, "logps/rejected": -1.5230618715286255, "loss": 0.8815, "nll_loss": 0.7921308279037476, "rewards/accuracies": 0.75, "rewards/chosen": -0.0643167644739151, "rewards/margins": 0.087989442050457, "rewards/rejected": -0.1523061990737915, "step": 7021 }, { "epoch": 4.283666310812872, "grad_norm": 11.723063468933105, "learning_rate": 1.1492957746478873e-06, "log_odds_chosen": 1.7188202142715454, "log_odds_ratio": -0.5025909543037415, "logits/chosen": -0.8937596082687378, "logits/rejected": -0.9992637634277344, "logps/chosen": -0.9858595728874207, "logps/rejected": -2.4193296432495117, "loss": 1.0787, "nll_loss": 1.1816227436065674, "rewards/accuracies": 0.625, "rewards/chosen": -0.09858596324920654, "rewards/margins": 0.14334698021411896, "rewards/rejected": -0.2419329583644867, "step": 7022 }, { "epoch": 4.2842763458898885, "grad_norm": 1.596877098083496, "learning_rate": 1.1483159828536434e-06, "log_odds_chosen": 3.8416333198547363, "log_odds_ratio": -0.24599651992321014, "logits/chosen": -0.6322698593139648, "logits/rejected": -0.7278493046760559, "logps/chosen": -0.5962396860122681, "logps/rejected": -3.757091760635376, "loss": 0.9061, "nll_loss": 0.744970440864563, "rewards/accuracies": 0.75, "rewards/chosen": -0.05962396785616875, "rewards/margins": 0.31608521938323975, "rewards/rejected": -0.3757091760635376, "step": 7023 }, { "epoch": 4.284886380966905, "grad_norm": 2.6722404956817627, "learning_rate": 1.1473361910593998e-06, "log_odds_chosen": 1.1508232355117798, "log_odds_ratio": -0.5940835475921631, "logits/chosen": -0.8302267789840698, "logits/rejected": -0.878717303276062, "logps/chosen": -0.8711163997650146, "logps/rejected": -1.763297438621521, "loss": 1.1673, "nll_loss": 1.1312052011489868, "rewards/accuracies": 0.625, "rewards/chosen": -0.08711163699626923, "rewards/margins": 0.08921810984611511, "rewards/rejected": -0.17632974684238434, "step": 7024 }, { "epoch": 4.285496416043922, "grad_norm": 1.2786129713058472, "learning_rate": 1.1463563992651561e-06, "log_odds_chosen": 1.4263733625411987, "log_odds_ratio": -0.3090354800224304, "logits/chosen": -0.5160120725631714, "logits/rejected": -0.84513258934021, "logps/chosen": -0.93620365858078, "logps/rejected": -1.922959327697754, "loss": 1.0874, "nll_loss": 0.9588562250137329, "rewards/accuracies": 1.0, "rewards/chosen": -0.09362036734819412, "rewards/margins": 0.09867556393146515, "rewards/rejected": -0.19229593873023987, "step": 7025 }, { "epoch": 4.28610645112094, "grad_norm": 2.0000598430633545, "learning_rate": 1.1453766074709124e-06, "log_odds_chosen": 2.9223365783691406, "log_odds_ratio": -0.19855372607707977, "logits/chosen": -0.7335602045059204, "logits/rejected": -0.9276474118232727, "logps/chosen": -0.46083876490592957, "logps/rejected": -2.536914825439453, "loss": 0.838, "nll_loss": 0.6691354513168335, "rewards/accuracies": 0.875, "rewards/chosen": -0.046083878725767136, "rewards/margins": 0.20760761201381683, "rewards/rejected": -0.25369149446487427, "step": 7026 }, { "epoch": 4.286716486197957, "grad_norm": 2.7127017974853516, "learning_rate": 1.1443968156766686e-06, "log_odds_chosen": 1.9635894298553467, "log_odds_ratio": -0.3790489733219147, "logits/chosen": -0.9573485851287842, "logits/rejected": -0.974544882774353, "logps/chosen": -0.9176836013793945, "logps/rejected": -2.505519151687622, "loss": 0.9255, "nll_loss": 1.0712367296218872, "rewards/accuracies": 0.875, "rewards/chosen": -0.09176836907863617, "rewards/margins": 0.15878355503082275, "rewards/rejected": -0.2505519390106201, "step": 7027 }, { "epoch": 4.287326521274974, "grad_norm": 1.2153156995773315, "learning_rate": 1.143417023882425e-06, "log_odds_chosen": 2.0588512420654297, "log_odds_ratio": -0.36734333634376526, "logits/chosen": -0.8821852207183838, "logits/rejected": -0.9113656282424927, "logps/chosen": -0.9434062242507935, "logps/rejected": -2.709531545639038, "loss": 0.9962, "nll_loss": 1.1961601972579956, "rewards/accuracies": 0.75, "rewards/chosen": -0.09434062242507935, "rewards/margins": 0.17661254107952118, "rewards/rejected": -0.2709531784057617, "step": 7028 }, { "epoch": 4.2879365563519904, "grad_norm": 1.6444634199142456, "learning_rate": 1.1424372320881812e-06, "log_odds_chosen": 1.6975297927856445, "log_odds_ratio": -0.42567178606987, "logits/chosen": -0.9532518982887268, "logits/rejected": -0.9921219348907471, "logps/chosen": -0.7856828570365906, "logps/rejected": -2.1730923652648926, "loss": 1.0783, "nll_loss": 0.9826279878616333, "rewards/accuracies": 0.75, "rewards/chosen": -0.07856828719377518, "rewards/margins": 0.13874095678329468, "rewards/rejected": -0.21730925142765045, "step": 7029 }, { "epoch": 4.288546591429007, "grad_norm": 2.137723445892334, "learning_rate": 1.1414574402939374e-06, "log_odds_chosen": 4.085862636566162, "log_odds_ratio": -0.16336581110954285, "logits/chosen": -0.6628603935241699, "logits/rejected": -0.7681810259819031, "logps/chosen": -0.7443666458129883, "logps/rejected": -4.1933770179748535, "loss": 1.1211, "nll_loss": 1.0666296482086182, "rewards/accuracies": 1.0, "rewards/chosen": -0.07443666458129883, "rewards/margins": 0.34490105509757996, "rewards/rejected": -0.4193377196788788, "step": 7030 }, { "epoch": 4.289156626506024, "grad_norm": 2.0672402381896973, "learning_rate": 1.140477648499694e-06, "log_odds_chosen": 2.1205315589904785, "log_odds_ratio": -0.46178367733955383, "logits/chosen": -1.0304986238479614, "logits/rejected": -1.0472556352615356, "logps/chosen": -0.8155766129493713, "logps/rejected": -2.2019009590148926, "loss": 1.3329, "nll_loss": 1.341346263885498, "rewards/accuracies": 0.75, "rewards/chosen": -0.08155766129493713, "rewards/margins": 0.1386324167251587, "rewards/rejected": -0.22019006311893463, "step": 7031 }, { "epoch": 4.289766661583041, "grad_norm": 6.977458477020264, "learning_rate": 1.13949785670545e-06, "log_odds_chosen": 2.9890177249908447, "log_odds_ratio": -0.31255051493644714, "logits/chosen": -0.7968465089797974, "logits/rejected": -0.9901738166809082, "logps/chosen": -0.7551937103271484, "logps/rejected": -3.305492639541626, "loss": 1.1531, "nll_loss": 0.8065961003303528, "rewards/accuracies": 0.75, "rewards/chosen": -0.0755193680524826, "rewards/margins": 0.25502991676330566, "rewards/rejected": -0.3305492699146271, "step": 7032 }, { "epoch": 4.290376696660058, "grad_norm": 1.2592926025390625, "learning_rate": 1.1385180649112064e-06, "log_odds_chosen": 1.8159654140472412, "log_odds_ratio": -0.4248482286930084, "logits/chosen": -0.8449977040290833, "logits/rejected": -0.9380372762680054, "logps/chosen": -0.8203914761543274, "logps/rejected": -2.229856491088867, "loss": 1.1115, "nll_loss": 0.8305945992469788, "rewards/accuracies": 0.75, "rewards/chosen": -0.08203914761543274, "rewards/margins": 0.14094652235507965, "rewards/rejected": -0.22298568487167358, "step": 7033 }, { "epoch": 4.290986731737075, "grad_norm": 9.265910148620605, "learning_rate": 1.1375382731169625e-06, "log_odds_chosen": 1.636734962463379, "log_odds_ratio": -0.3885002136230469, "logits/chosen": -0.6094793081283569, "logits/rejected": -0.6640046834945679, "logps/chosen": -0.6370909214019775, "logps/rejected": -1.8297293186187744, "loss": 1.0814, "nll_loss": 0.9168391823768616, "rewards/accuracies": 0.75, "rewards/chosen": -0.0637090876698494, "rewards/margins": 0.11926385015249252, "rewards/rejected": -0.18297293782234192, "step": 7034 }, { "epoch": 4.2915967668140915, "grad_norm": 1.8013032674789429, "learning_rate": 1.1365584813227188e-06, "log_odds_chosen": 1.845777988433838, "log_odds_ratio": -0.3843175172805786, "logits/chosen": -0.8559151887893677, "logits/rejected": -1.043788194656372, "logps/chosen": -0.8403298854827881, "logps/rejected": -2.369858503341675, "loss": 1.0635, "nll_loss": 0.9435669183731079, "rewards/accuracies": 0.75, "rewards/chosen": -0.08403298258781433, "rewards/margins": 0.15295284986495972, "rewards/rejected": -0.23698584735393524, "step": 7035 }, { "epoch": 4.292206801891108, "grad_norm": 1.3580855131149292, "learning_rate": 1.1355786895284751e-06, "log_odds_chosen": 2.0980279445648193, "log_odds_ratio": -0.24030175805091858, "logits/chosen": -0.6935331225395203, "logits/rejected": -0.7995611429214478, "logps/chosen": -0.6864316463470459, "logps/rejected": -2.273458957672119, "loss": 1.0523, "nll_loss": 1.0894726514816284, "rewards/accuracies": 1.0, "rewards/chosen": -0.06864316761493683, "rewards/margins": 0.15870273113250732, "rewards/rejected": -0.22734591364860535, "step": 7036 }, { "epoch": 4.292816836968125, "grad_norm": 2.400634288787842, "learning_rate": 1.1345988977342313e-06, "log_odds_chosen": 1.7668485641479492, "log_odds_ratio": -0.35563912987709045, "logits/chosen": -0.8074436187744141, "logits/rejected": -0.9084566235542297, "logps/chosen": -0.7574349641799927, "logps/rejected": -2.067253828048706, "loss": 1.0183, "nll_loss": 0.9394311308860779, "rewards/accuracies": 0.875, "rewards/chosen": -0.07574349641799927, "rewards/margins": 0.13098189234733582, "rewards/rejected": -0.20672538876533508, "step": 7037 }, { "epoch": 4.293426872045143, "grad_norm": 1.7123823165893555, "learning_rate": 1.1336191059399878e-06, "log_odds_chosen": 3.1473703384399414, "log_odds_ratio": -0.3169971704483032, "logits/chosen": -0.8714078664779663, "logits/rejected": -1.0157973766326904, "logps/chosen": -0.5579530596733093, "logps/rejected": -3.128066062927246, "loss": 0.9219, "nll_loss": 0.7438769340515137, "rewards/accuracies": 0.75, "rewards/chosen": -0.05579530820250511, "rewards/margins": 0.2570112943649292, "rewards/rejected": -0.3128066062927246, "step": 7038 }, { "epoch": 4.29403690712216, "grad_norm": 2.010469675064087, "learning_rate": 1.132639314145744e-06, "log_odds_chosen": 3.2846431732177734, "log_odds_ratio": -0.27319347858428955, "logits/chosen": -1.0020685195922852, "logits/rejected": -1.1341259479522705, "logps/chosen": -0.7995781302452087, "logps/rejected": -3.5954833030700684, "loss": 1.0143, "nll_loss": 1.0047645568847656, "rewards/accuracies": 0.875, "rewards/chosen": -0.07995781302452087, "rewards/margins": 0.27959051728248596, "rewards/rejected": -0.35954833030700684, "step": 7039 }, { "epoch": 4.294646942199177, "grad_norm": 9.370065689086914, "learning_rate": 1.1316595223515003e-06, "log_odds_chosen": 0.8901669979095459, "log_odds_ratio": -0.39403530955314636, "logits/chosen": -0.7789342403411865, "logits/rejected": -0.8586354851722717, "logps/chosen": -0.868026852607727, "logps/rejected": -1.47313392162323, "loss": 1.107, "nll_loss": 1.0539579391479492, "rewards/accuracies": 0.875, "rewards/chosen": -0.08680268377065659, "rewards/margins": 0.060510702431201935, "rewards/rejected": -0.14731340110301971, "step": 7040 }, { "epoch": 4.2952569772761935, "grad_norm": 1.4722516536712646, "learning_rate": 1.1306797305572566e-06, "log_odds_chosen": 2.348257064819336, "log_odds_ratio": -0.3651333153247833, "logits/chosen": -0.8611767888069153, "logits/rejected": -1.0965791940689087, "logps/chosen": -0.7840654850006104, "logps/rejected": -2.627480983734131, "loss": 1.043, "nll_loss": 1.1331425905227661, "rewards/accuracies": 0.875, "rewards/chosen": -0.07840654999017715, "rewards/margins": 0.18434154987335205, "rewards/rejected": -0.2627480924129486, "step": 7041 }, { "epoch": 4.29586701235321, "grad_norm": 3.456447124481201, "learning_rate": 1.129699938763013e-06, "log_odds_chosen": 1.7813975811004639, "log_odds_ratio": -0.7042627334594727, "logits/chosen": -0.6518082022666931, "logits/rejected": -0.8010392189025879, "logps/chosen": -1.178839921951294, "logps/rejected": -2.781686782836914, "loss": 1.1301, "nll_loss": 1.1771557331085205, "rewards/accuracies": 0.625, "rewards/chosen": -0.11788398027420044, "rewards/margins": 0.16028469800949097, "rewards/rejected": -0.2781686782836914, "step": 7042 }, { "epoch": 4.296477047430227, "grad_norm": 1.9471668004989624, "learning_rate": 1.128720146968769e-06, "log_odds_chosen": 2.5987284183502197, "log_odds_ratio": -0.3041142225265503, "logits/chosen": -0.8182957768440247, "logits/rejected": -1.0369545221328735, "logps/chosen": -0.8292327523231506, "logps/rejected": -2.9312169551849365, "loss": 0.9858, "nll_loss": 1.0337886810302734, "rewards/accuracies": 0.875, "rewards/chosen": -0.0829232782125473, "rewards/margins": 0.21019841730594635, "rewards/rejected": -0.29312169551849365, "step": 7043 }, { "epoch": 4.297087082507244, "grad_norm": 2.446044445037842, "learning_rate": 1.1277403551745252e-06, "log_odds_chosen": 1.4989075660705566, "log_odds_ratio": -0.43384066224098206, "logits/chosen": -0.7289879322052002, "logits/rejected": -0.8001089692115784, "logps/chosen": -0.8809213638305664, "logps/rejected": -2.11145281791687, "loss": 0.9953, "nll_loss": 1.00148344039917, "rewards/accuracies": 0.75, "rewards/chosen": -0.0880921483039856, "rewards/margins": 0.12305314838886261, "rewards/rejected": -0.211145281791687, "step": 7044 }, { "epoch": 4.297697117584261, "grad_norm": 1.3739080429077148, "learning_rate": 1.1267605633802817e-06, "log_odds_chosen": 1.9121246337890625, "log_odds_ratio": -0.3381580412387848, "logits/chosen": -0.9664639234542847, "logits/rejected": -0.9887080192565918, "logps/chosen": -0.9065310955047607, "logps/rejected": -2.469749927520752, "loss": 1.1377, "nll_loss": 1.149787425994873, "rewards/accuracies": 0.875, "rewards/chosen": -0.09065310657024384, "rewards/margins": 0.15632189810276031, "rewards/rejected": -0.24697500467300415, "step": 7045 }, { "epoch": 4.298307152661278, "grad_norm": 1.3847622871398926, "learning_rate": 1.1257807715860379e-06, "log_odds_chosen": 3.3385391235351562, "log_odds_ratio": -0.22730901837348938, "logits/chosen": -0.7183871269226074, "logits/rejected": -0.8389301300048828, "logps/chosen": -0.620586633682251, "logps/rejected": -3.347864866256714, "loss": 0.8877, "nll_loss": 0.9349931478500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.06205866485834122, "rewards/margins": 0.2727278470993042, "rewards/rejected": -0.3347865045070648, "step": 7046 }, { "epoch": 4.298917187738295, "grad_norm": 2.7822182178497314, "learning_rate": 1.1248009797917942e-06, "log_odds_chosen": 1.2478501796722412, "log_odds_ratio": -0.5585562586784363, "logits/chosen": -1.0672688484191895, "logits/rejected": -1.1341536045074463, "logps/chosen": -0.8955259919166565, "logps/rejected": -1.9994699954986572, "loss": 1.1035, "nll_loss": 0.9450159072875977, "rewards/accuracies": 0.625, "rewards/chosen": -0.08955260366201401, "rewards/margins": 0.11039440333843231, "rewards/rejected": -0.19994701445102692, "step": 7047 }, { "epoch": 4.299527222815312, "grad_norm": 1.6208628416061401, "learning_rate": 1.1238211879975505e-06, "log_odds_chosen": 5.56266975402832, "log_odds_ratio": -0.15701204538345337, "logits/chosen": -0.5711121559143066, "logits/rejected": -1.1133532524108887, "logps/chosen": -0.7400210499763489, "logps/rejected": -5.555909633636475, "loss": 0.9442, "nll_loss": 1.0377404689788818, "rewards/accuracies": 0.875, "rewards/chosen": -0.07400210946798325, "rewards/margins": 0.48158887028694153, "rewards/rejected": -0.5555909872055054, "step": 7048 }, { "epoch": 4.300137257892329, "grad_norm": 2.5764076709747314, "learning_rate": 1.1228413962033069e-06, "log_odds_chosen": 1.4915879964828491, "log_odds_ratio": -0.44157350063323975, "logits/chosen": -0.77643883228302, "logits/rejected": -0.8942567110061646, "logps/chosen": -0.7035859823226929, "logps/rejected": -1.7672866582870483, "loss": 0.8744, "nll_loss": 0.9869158864021301, "rewards/accuracies": 0.75, "rewards/chosen": -0.07035860419273376, "rewards/margins": 0.10637006163597107, "rewards/rejected": -0.17672866582870483, "step": 7049 }, { "epoch": 4.300747292969346, "grad_norm": 5.429152488708496, "learning_rate": 1.121861604409063e-06, "log_odds_chosen": 2.3721261024475098, "log_odds_ratio": -0.2589241862297058, "logits/chosen": -0.9278630018234253, "logits/rejected": -1.0140466690063477, "logps/chosen": -0.6382431387901306, "logps/rejected": -2.398733615875244, "loss": 0.9342, "nll_loss": 1.1198413372039795, "rewards/accuracies": 1.0, "rewards/chosen": -0.06382431089878082, "rewards/margins": 0.17604905366897583, "rewards/rejected": -0.23987337946891785, "step": 7050 }, { "epoch": 4.301357328046363, "grad_norm": 11.2964506149292, "learning_rate": 1.1208818126148193e-06, "log_odds_chosen": 2.2164740562438965, "log_odds_ratio": -0.2778244614601135, "logits/chosen": -0.869238018989563, "logits/rejected": -0.9565779566764832, "logps/chosen": -0.7224584817886353, "logps/rejected": -2.327746868133545, "loss": 1.0195, "nll_loss": 1.023692011833191, "rewards/accuracies": 0.875, "rewards/chosen": -0.07224585860967636, "rewards/margins": 0.16052883863449097, "rewards/rejected": -0.23277470469474792, "step": 7051 }, { "epoch": 4.30196736312338, "grad_norm": 7.460373401641846, "learning_rate": 1.1199020208205756e-06, "log_odds_chosen": 2.164353609085083, "log_odds_ratio": -0.29535266757011414, "logits/chosen": -0.7282894253730774, "logits/rejected": -0.8245692253112793, "logps/chosen": -0.7604561448097229, "logps/rejected": -2.3481996059417725, "loss": 1.0318, "nll_loss": 0.8641960620880127, "rewards/accuracies": 0.875, "rewards/chosen": -0.07604561746120453, "rewards/margins": 0.15877434611320496, "rewards/rejected": -0.23481996357440948, "step": 7052 }, { "epoch": 4.3025773982003965, "grad_norm": 3.1721274852752686, "learning_rate": 1.1189222290263318e-06, "log_odds_chosen": 2.8988122940063477, "log_odds_ratio": -0.3058960437774658, "logits/chosen": -0.7390660643577576, "logits/rejected": -0.9472802877426147, "logps/chosen": -0.8487865924835205, "logps/rejected": -3.234070301055908, "loss": 1.1468, "nll_loss": 0.9931495785713196, "rewards/accuracies": 0.875, "rewards/chosen": -0.08487866073846817, "rewards/margins": 0.23852840065956116, "rewards/rejected": -0.32340705394744873, "step": 7053 }, { "epoch": 4.303187433277413, "grad_norm": 7.450954914093018, "learning_rate": 1.117942437232088e-06, "log_odds_chosen": 3.1935367584228516, "log_odds_ratio": -0.2818838357925415, "logits/chosen": -0.8847895860671997, "logits/rejected": -1.051733136177063, "logps/chosen": -0.7782728672027588, "logps/rejected": -3.4073214530944824, "loss": 1.1419, "nll_loss": 0.9440045356750488, "rewards/accuracies": 0.875, "rewards/chosen": -0.07782728224992752, "rewards/margins": 0.2629048526287079, "rewards/rejected": -0.3407321572303772, "step": 7054 }, { "epoch": 4.30379746835443, "grad_norm": 1.9212785959243774, "learning_rate": 1.1169626454378444e-06, "log_odds_chosen": -0.11144998669624329, "log_odds_ratio": -0.7997275590896606, "logits/chosen": -0.9985733032226562, "logits/rejected": -1.0521125793457031, "logps/chosen": -0.896281898021698, "logps/rejected": -0.8169412612915039, "loss": 1.034, "nll_loss": 1.0685420036315918, "rewards/accuracies": 0.5, "rewards/chosen": -0.0896281972527504, "rewards/margins": -0.007934064604341984, "rewards/rejected": -0.08169412612915039, "step": 7055 }, { "epoch": 4.304407503431447, "grad_norm": 1.4979490041732788, "learning_rate": 1.1159828536436008e-06, "log_odds_chosen": 0.6844446659088135, "log_odds_ratio": -0.5232658386230469, "logits/chosen": -0.8882694244384766, "logits/rejected": -0.8398410081863403, "logps/chosen": -0.8643243312835693, "logps/rejected": -1.3583163022994995, "loss": 1.0892, "nll_loss": 0.9699414372444153, "rewards/accuracies": 0.625, "rewards/chosen": -0.08643242716789246, "rewards/margins": 0.049399204552173615, "rewards/rejected": -0.13583162426948547, "step": 7056 }, { "epoch": 4.305017538508464, "grad_norm": 1.3252304792404175, "learning_rate": 1.1150030618493569e-06, "log_odds_chosen": 1.3434003591537476, "log_odds_ratio": -0.611836314201355, "logits/chosen": -0.9404627084732056, "logits/rejected": -0.9367209672927856, "logps/chosen": -0.9465776681900024, "logps/rejected": -2.1069560050964355, "loss": 1.0573, "nll_loss": 1.1966842412948608, "rewards/accuracies": 0.625, "rewards/chosen": -0.094657763838768, "rewards/margins": 0.11603785306215286, "rewards/rejected": -0.21069562435150146, "step": 7057 }, { "epoch": 4.305627573585481, "grad_norm": 1.0037230253219604, "learning_rate": 1.1140232700551132e-06, "log_odds_chosen": 1.7897745370864868, "log_odds_ratio": -0.28328150510787964, "logits/chosen": -0.7654129862785339, "logits/rejected": -1.0159149169921875, "logps/chosen": -0.6757214665412903, "logps/rejected": -1.8699162006378174, "loss": 1.0213, "nll_loss": 1.1684327125549316, "rewards/accuracies": 0.875, "rewards/chosen": -0.06757215410470963, "rewards/margins": 0.11941947042942047, "rewards/rejected": -0.1869916319847107, "step": 7058 }, { "epoch": 4.3062376086624985, "grad_norm": 1.2974287271499634, "learning_rate": 1.1130434782608696e-06, "log_odds_chosen": 0.7583770751953125, "log_odds_ratio": -0.6877202987670898, "logits/chosen": -0.7427316308021545, "logits/rejected": -1.0437674522399902, "logps/chosen": -1.1821131706237793, "logps/rejected": -1.7235605716705322, "loss": 1.2071, "nll_loss": 1.138122797012329, "rewards/accuracies": 0.5, "rewards/chosen": -0.11821132153272629, "rewards/margins": 0.05414474755525589, "rewards/rejected": -0.17235606908798218, "step": 7059 }, { "epoch": 4.306847643739515, "grad_norm": 1.8875312805175781, "learning_rate": 1.1120636864666257e-06, "log_odds_chosen": 1.207688808441162, "log_odds_ratio": -0.4991041421890259, "logits/chosen": -0.7422209978103638, "logits/rejected": -0.8988116979598999, "logps/chosen": -0.6408581733703613, "logps/rejected": -1.5614103078842163, "loss": 0.9537, "nll_loss": 1.030256748199463, "rewards/accuracies": 0.625, "rewards/chosen": -0.06408582627773285, "rewards/margins": 0.09205520898103714, "rewards/rejected": -0.15614104270935059, "step": 7060 }, { "epoch": 4.307457678816532, "grad_norm": 3.1124799251556396, "learning_rate": 1.1110838946723822e-06, "log_odds_chosen": 1.5907180309295654, "log_odds_ratio": -0.3122962415218353, "logits/chosen": -0.8243274688720703, "logits/rejected": -0.9757716655731201, "logps/chosen": -0.8090252876281738, "logps/rejected": -2.0177502632141113, "loss": 1.0784, "nll_loss": 0.9962253570556641, "rewards/accuracies": 0.75, "rewards/chosen": -0.08090253174304962, "rewards/margins": 0.12087249755859375, "rewards/rejected": -0.20177504420280457, "step": 7061 }, { "epoch": 4.308067713893549, "grad_norm": 1.5596152544021606, "learning_rate": 1.1101041028781384e-06, "log_odds_chosen": 1.7980256080627441, "log_odds_ratio": -0.3660268783569336, "logits/chosen": -0.9200910925865173, "logits/rejected": -1.0319550037384033, "logps/chosen": -0.9378490447998047, "logps/rejected": -2.3172473907470703, "loss": 1.1573, "nll_loss": 1.1322078704833984, "rewards/accuracies": 0.875, "rewards/chosen": -0.09378490597009659, "rewards/margins": 0.13793984055519104, "rewards/rejected": -0.23172475397586823, "step": 7062 }, { "epoch": 4.308677748970566, "grad_norm": 1.2632472515106201, "learning_rate": 1.1091243110838947e-06, "log_odds_chosen": 2.674638271331787, "log_odds_ratio": -0.33570319414138794, "logits/chosen": -0.6722986698150635, "logits/rejected": -0.8115727305412292, "logps/chosen": -0.589238166809082, "logps/rejected": -2.5145103931427, "loss": 0.9943, "nll_loss": 0.7838793992996216, "rewards/accuracies": 0.875, "rewards/chosen": -0.05892381817102432, "rewards/margins": 0.19252723455429077, "rewards/rejected": -0.2514510452747345, "step": 7063 }, { "epoch": 4.309287784047583, "grad_norm": 13.327178955078125, "learning_rate": 1.1081445192896508e-06, "log_odds_chosen": 2.9548044204711914, "log_odds_ratio": -0.3324037790298462, "logits/chosen": -0.9954803586006165, "logits/rejected": -1.1060322523117065, "logps/chosen": -0.7942558526992798, "logps/rejected": -3.205702304840088, "loss": 0.8542, "nll_loss": 0.9005156755447388, "rewards/accuracies": 0.875, "rewards/chosen": -0.07942558825016022, "rewards/margins": 0.24114464223384857, "rewards/rejected": -0.3205702304840088, "step": 7064 }, { "epoch": 4.3098978191246, "grad_norm": 1.865623116493225, "learning_rate": 1.1071647274954071e-06, "log_odds_chosen": 1.8143572807312012, "log_odds_ratio": -0.3413262367248535, "logits/chosen": -0.7736231684684753, "logits/rejected": -0.944170355796814, "logps/chosen": -0.7294056415557861, "logps/rejected": -2.094022750854492, "loss": 0.9948, "nll_loss": 0.9111602306365967, "rewards/accuracies": 0.625, "rewards/chosen": -0.07294055819511414, "rewards/margins": 0.13646171987056732, "rewards/rejected": -0.20940229296684265, "step": 7065 }, { "epoch": 4.310507854201616, "grad_norm": 2.299962282180786, "learning_rate": 1.1061849357011635e-06, "log_odds_chosen": 1.110184669494629, "log_odds_ratio": -0.4193255305290222, "logits/chosen": -0.8682221174240112, "logits/rejected": -0.9291257858276367, "logps/chosen": -0.9710850715637207, "logps/rejected": -1.8467638492584229, "loss": 1.1593, "nll_loss": 1.106829047203064, "rewards/accuracies": 0.75, "rewards/chosen": -0.09710851311683655, "rewards/margins": 0.08756789565086365, "rewards/rejected": -0.184676393866539, "step": 7066 }, { "epoch": 4.311117889278633, "grad_norm": 1.1973310708999634, "learning_rate": 1.1052051439069196e-06, "log_odds_chosen": 1.3190455436706543, "log_odds_ratio": -0.5261141061782837, "logits/chosen": -0.7840566635131836, "logits/rejected": -0.8133041262626648, "logps/chosen": -0.9019884467124939, "logps/rejected": -1.9254984855651855, "loss": 1.1081, "nll_loss": 0.9862583875656128, "rewards/accuracies": 0.625, "rewards/chosen": -0.09019884467124939, "rewards/margins": 0.10235100984573364, "rewards/rejected": -0.19254986941814423, "step": 7067 }, { "epoch": 4.31172792435565, "grad_norm": 1.9332863092422485, "learning_rate": 1.1042253521126761e-06, "log_odds_chosen": 2.208559036254883, "log_odds_ratio": -0.27204492688179016, "logits/chosen": -0.8321035504341125, "logits/rejected": -0.9732534885406494, "logps/chosen": -0.8319702744483948, "logps/rejected": -2.555605888366699, "loss": 0.9784, "nll_loss": 1.1812450885772705, "rewards/accuracies": 1.0, "rewards/chosen": -0.08319702744483948, "rewards/margins": 0.17236357927322388, "rewards/rejected": -0.25556060671806335, "step": 7068 }, { "epoch": 4.312337959432668, "grad_norm": 1.684922218322754, "learning_rate": 1.1032455603184323e-06, "log_odds_chosen": 3.5141172409057617, "log_odds_ratio": -0.2938670516014099, "logits/chosen": -1.0082930326461792, "logits/rejected": -1.0594482421875, "logps/chosen": -0.9018106460571289, "logps/rejected": -3.9995834827423096, "loss": 1.1856, "nll_loss": 1.2680978775024414, "rewards/accuracies": 0.875, "rewards/chosen": -0.09018106013536453, "rewards/margins": 0.30977731943130493, "rewards/rejected": -0.39995837211608887, "step": 7069 }, { "epoch": 4.312947994509685, "grad_norm": 1.928726077079773, "learning_rate": 1.1022657685241886e-06, "log_odds_chosen": 2.8193845748901367, "log_odds_ratio": -0.24680742621421814, "logits/chosen": -0.746421217918396, "logits/rejected": -0.8258185982704163, "logps/chosen": -0.725312352180481, "logps/rejected": -2.8818447589874268, "loss": 1.012, "nll_loss": 0.8458415269851685, "rewards/accuracies": 0.75, "rewards/chosen": -0.07253123819828033, "rewards/margins": 0.2156532257795334, "rewards/rejected": -0.2881844639778137, "step": 7070 }, { "epoch": 4.3135580295867015, "grad_norm": 3.1395177841186523, "learning_rate": 1.101285976729945e-06, "log_odds_chosen": 0.8404016494750977, "log_odds_ratio": -0.5711723566055298, "logits/chosen": -1.049233078956604, "logits/rejected": -0.914486289024353, "logps/chosen": -0.8077943325042725, "logps/rejected": -1.5642584562301636, "loss": 1.105, "nll_loss": 0.9264186024665833, "rewards/accuracies": 0.5, "rewards/chosen": -0.08077943325042725, "rewards/margins": 0.07564640045166016, "rewards/rejected": -0.1564258337020874, "step": 7071 }, { "epoch": 4.314168064663718, "grad_norm": 2.1574366092681885, "learning_rate": 1.100306184935701e-06, "log_odds_chosen": 1.2593342065811157, "log_odds_ratio": -0.4041709899902344, "logits/chosen": -0.9827162623405457, "logits/rejected": -0.9861229062080383, "logps/chosen": -0.8354429006576538, "logps/rejected": -1.8110783100128174, "loss": 1.0005, "nll_loss": 0.9290606379508972, "rewards/accuracies": 0.75, "rewards/chosen": -0.0835442915558815, "rewards/margins": 0.09756353497505188, "rewards/rejected": -0.18110781908035278, "step": 7072 }, { "epoch": 4.314778099740735, "grad_norm": 2.118288040161133, "learning_rate": 1.0993263931414574e-06, "log_odds_chosen": 1.936431884765625, "log_odds_ratio": -0.374870240688324, "logits/chosen": -0.8808670043945312, "logits/rejected": -0.9680408239364624, "logps/chosen": -0.8507967591285706, "logps/rejected": -2.427422285079956, "loss": 1.0569, "nll_loss": 0.9472218155860901, "rewards/accuracies": 0.875, "rewards/chosen": -0.08507966995239258, "rewards/margins": 0.15766258537769318, "rewards/rejected": -0.24274225533008575, "step": 7073 }, { "epoch": 4.315388134817752, "grad_norm": 1.9492686986923218, "learning_rate": 1.0983466013472135e-06, "log_odds_chosen": 2.724174976348877, "log_odds_ratio": -0.2675415277481079, "logits/chosen": -0.8362293839454651, "logits/rejected": -0.9839649200439453, "logps/chosen": -0.76474928855896, "logps/rejected": -2.829246997833252, "loss": 1.0174, "nll_loss": 1.0104583501815796, "rewards/accuracies": 0.875, "rewards/chosen": -0.07647493481636047, "rewards/margins": 0.20644979178905487, "rewards/rejected": -0.28292471170425415, "step": 7074 }, { "epoch": 4.315998169894769, "grad_norm": 1.8915164470672607, "learning_rate": 1.09736680955297e-06, "log_odds_chosen": 0.6273089051246643, "log_odds_ratio": -0.5880549550056458, "logits/chosen": -0.7398314476013184, "logits/rejected": -0.8558925986289978, "logps/chosen": -0.9189877510070801, "logps/rejected": -1.3121352195739746, "loss": 1.0642, "nll_loss": 1.1492092609405518, "rewards/accuracies": 0.625, "rewards/chosen": -0.09189878404140472, "rewards/margins": 0.039314739406108856, "rewards/rejected": -0.13121351599693298, "step": 7075 }, { "epoch": 4.316608204971786, "grad_norm": 1.9686551094055176, "learning_rate": 1.0963870177587262e-06, "log_odds_chosen": 2.512000560760498, "log_odds_ratio": -0.4494452476501465, "logits/chosen": -0.8178166151046753, "logits/rejected": -0.8778219819068909, "logps/chosen": -0.7941487431526184, "logps/rejected": -2.8457045555114746, "loss": 1.1439, "nll_loss": 1.0636423826217651, "rewards/accuracies": 0.75, "rewards/chosen": -0.07941487431526184, "rewards/margins": 0.20515558123588562, "rewards/rejected": -0.28457045555114746, "step": 7076 }, { "epoch": 4.317218240048803, "grad_norm": 1.30770742893219, "learning_rate": 1.0954072259644825e-06, "log_odds_chosen": 1.9177978038787842, "log_odds_ratio": -0.26984137296676636, "logits/chosen": -0.8331699371337891, "logits/rejected": -0.9966152906417847, "logps/chosen": -0.83939528465271, "logps/rejected": -2.2579171657562256, "loss": 1.0063, "nll_loss": 0.9193138480186462, "rewards/accuracies": 0.875, "rewards/chosen": -0.08393952995538712, "rewards/margins": 0.14185218513011932, "rewards/rejected": -0.22579172253608704, "step": 7077 }, { "epoch": 4.3178282751258195, "grad_norm": 4.766452312469482, "learning_rate": 1.0944274341702388e-06, "log_odds_chosen": 2.355109691619873, "log_odds_ratio": -0.5260289907455444, "logits/chosen": -0.8259133100509644, "logits/rejected": -1.0701584815979004, "logps/chosen": -0.7007558345794678, "logps/rejected": -2.605431318283081, "loss": 0.8695, "nll_loss": 0.8283895254135132, "rewards/accuracies": 0.5, "rewards/chosen": -0.07007558643817902, "rewards/margins": 0.19046753644943237, "rewards/rejected": -0.2605431079864502, "step": 7078 }, { "epoch": 4.318438310202836, "grad_norm": 1.958983063697815, "learning_rate": 1.093447642375995e-06, "log_odds_chosen": 1.843043565750122, "log_odds_ratio": -0.5248191952705383, "logits/chosen": -0.6985089182853699, "logits/rejected": -0.8698011040687561, "logps/chosen": -0.6311073899269104, "logps/rejected": -2.0640358924865723, "loss": 0.908, "nll_loss": 0.8035976886749268, "rewards/accuracies": 0.625, "rewards/chosen": -0.06311073899269104, "rewards/margins": 0.1432928740978241, "rewards/rejected": -0.20640359818935394, "step": 7079 }, { "epoch": 4.319048345279853, "grad_norm": 1.052817940711975, "learning_rate": 1.0924678505817513e-06, "log_odds_chosen": 1.7529268264770508, "log_odds_ratio": -0.3439117670059204, "logits/chosen": -1.0379900932312012, "logits/rejected": -1.0652587413787842, "logps/chosen": -0.586323082447052, "logps/rejected": -1.6721062660217285, "loss": 1.0505, "nll_loss": 1.011016607284546, "rewards/accuracies": 0.875, "rewards/chosen": -0.05863230675458908, "rewards/margins": 0.10857832431793213, "rewards/rejected": -0.1672106385231018, "step": 7080 }, { "epoch": 4.319658380356871, "grad_norm": 1.3224791288375854, "learning_rate": 1.0914880587875076e-06, "log_odds_chosen": 1.9645843505859375, "log_odds_ratio": -0.7207074761390686, "logits/chosen": -0.9112803936004639, "logits/rejected": -0.9484505653381348, "logps/chosen": -0.912185549736023, "logps/rejected": -2.763660192489624, "loss": 1.0742, "nll_loss": 1.054724097251892, "rewards/accuracies": 0.375, "rewards/chosen": -0.09121856093406677, "rewards/margins": 0.1851474642753601, "rewards/rejected": -0.2763660252094269, "step": 7081 }, { "epoch": 4.320268415433888, "grad_norm": 2.6855082511901855, "learning_rate": 1.090508266993264e-06, "log_odds_chosen": 1.341181993484497, "log_odds_ratio": -0.5710949897766113, "logits/chosen": -0.9612796306610107, "logits/rejected": -0.9461921453475952, "logps/chosen": -0.871109127998352, "logps/rejected": -1.8152316808700562, "loss": 0.9533, "nll_loss": 0.9502044916152954, "rewards/accuracies": 0.625, "rewards/chosen": -0.08711090683937073, "rewards/margins": 0.09441225230693817, "rewards/rejected": -0.1815231740474701, "step": 7082 }, { "epoch": 4.320878450510905, "grad_norm": 1.2626988887786865, "learning_rate": 1.08952847519902e-06, "log_odds_chosen": 0.9476890563964844, "log_odds_ratio": -0.6634032726287842, "logits/chosen": -0.950793445110321, "logits/rejected": -0.9773638248443604, "logps/chosen": -0.9047297835350037, "logps/rejected": -1.6989485025405884, "loss": 1.0891, "nll_loss": 1.0056562423706055, "rewards/accuracies": 0.375, "rewards/chosen": -0.0904729813337326, "rewards/margins": 0.07942187786102295, "rewards/rejected": -0.16989484429359436, "step": 7083 }, { "epoch": 4.321488485587921, "grad_norm": 2.9857847690582275, "learning_rate": 1.0885486834047764e-06, "log_odds_chosen": 2.8117308616638184, "log_odds_ratio": -0.5592330694198608, "logits/chosen": -0.6673128604888916, "logits/rejected": -1.0479345321655273, "logps/chosen": -0.7857152223587036, "logps/rejected": -3.2379326820373535, "loss": 1.2569, "nll_loss": 0.8653939962387085, "rewards/accuracies": 0.5, "rewards/chosen": -0.07857152819633484, "rewards/margins": 0.24522174894809723, "rewards/rejected": -0.32379329204559326, "step": 7084 }, { "epoch": 4.322098520664938, "grad_norm": 3.1737008094787598, "learning_rate": 1.0875688916105328e-06, "log_odds_chosen": 1.2144687175750732, "log_odds_ratio": -0.35197144746780396, "logits/chosen": -1.0264892578125, "logits/rejected": -1.0047932863235474, "logps/chosen": -0.6557307839393616, "logps/rejected": -1.4211468696594238, "loss": 1.0435, "nll_loss": 0.9517541527748108, "rewards/accuracies": 0.875, "rewards/chosen": -0.0655730813741684, "rewards/margins": 0.07654163241386414, "rewards/rejected": -0.14211471378803253, "step": 7085 }, { "epoch": 4.322708555741955, "grad_norm": 3.9816691875457764, "learning_rate": 1.0865890998162889e-06, "log_odds_chosen": 2.42075252532959, "log_odds_ratio": -0.35324814915657043, "logits/chosen": -0.9077802896499634, "logits/rejected": -1.059923529624939, "logps/chosen": -0.6680598855018616, "logps/rejected": -2.526487350463867, "loss": 0.9235, "nll_loss": 0.7841709852218628, "rewards/accuracies": 0.75, "rewards/chosen": -0.06680599600076675, "rewards/margins": 0.18584273755550385, "rewards/rejected": -0.2526487410068512, "step": 7086 }, { "epoch": 4.323318590818972, "grad_norm": 1.3521755933761597, "learning_rate": 1.0856093080220452e-06, "log_odds_chosen": 1.3677144050598145, "log_odds_ratio": -0.5116525292396545, "logits/chosen": -1.0502506494522095, "logits/rejected": -1.0770155191421509, "logps/chosen": -1.0026929378509521, "logps/rejected": -2.116530418395996, "loss": 1.064, "nll_loss": 1.126108169555664, "rewards/accuracies": 0.625, "rewards/chosen": -0.10026928782463074, "rewards/margins": 0.11138377338647842, "rewards/rejected": -0.21165305376052856, "step": 7087 }, { "epoch": 4.323928625895989, "grad_norm": 1.8722330331802368, "learning_rate": 1.0846295162278016e-06, "log_odds_chosen": 2.260118007659912, "log_odds_ratio": -0.2764319181442261, "logits/chosen": -0.884247362613678, "logits/rejected": -1.0265783071517944, "logps/chosen": -0.7222517728805542, "logps/rejected": -2.339871883392334, "loss": 0.9608, "nll_loss": 0.830981969833374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0722251832485199, "rewards/margins": 0.16176199913024902, "rewards/rejected": -0.23398718237876892, "step": 7088 }, { "epoch": 4.324538660973006, "grad_norm": 1.7452201843261719, "learning_rate": 1.0836497244335579e-06, "log_odds_chosen": 3.9514358043670654, "log_odds_ratio": -0.09354695677757263, "logits/chosen": -0.8702877759933472, "logits/rejected": -1.1834696531295776, "logps/chosen": -0.5153630375862122, "logps/rejected": -3.2720718383789062, "loss": 1.0097, "nll_loss": 0.9693806767463684, "rewards/accuracies": 1.0, "rewards/chosen": -0.051536306738853455, "rewards/margins": 0.2756708860397339, "rewards/rejected": -0.32720720767974854, "step": 7089 }, { "epoch": 4.3251486960500225, "grad_norm": 3.633359432220459, "learning_rate": 1.082669932639314e-06, "log_odds_chosen": 2.7117745876312256, "log_odds_ratio": -0.2434968501329422, "logits/chosen": -0.9227046966552734, "logits/rejected": -1.0380568504333496, "logps/chosen": -0.7114239931106567, "logps/rejected": -2.8794074058532715, "loss": 0.9251, "nll_loss": 1.079527497291565, "rewards/accuracies": 0.875, "rewards/chosen": -0.07114240527153015, "rewards/margins": 0.2167983502149582, "rewards/rejected": -0.28794074058532715, "step": 7090 }, { "epoch": 4.32575873112704, "grad_norm": 1.4169788360595703, "learning_rate": 1.0816901408450703e-06, "log_odds_chosen": 4.287364959716797, "log_odds_ratio": -0.5740523934364319, "logits/chosen": -0.8535033464431763, "logits/rejected": -0.8747375011444092, "logps/chosen": -0.9929667115211487, "logps/rejected": -5.121609687805176, "loss": 1.1076, "nll_loss": 1.1617897748947144, "rewards/accuracies": 0.5, "rewards/chosen": -0.09929666668176651, "rewards/margins": 0.4128642678260803, "rewards/rejected": -0.5121609568595886, "step": 7091 }, { "epoch": 4.326368766204057, "grad_norm": 2.1879191398620605, "learning_rate": 1.0807103490508267e-06, "log_odds_chosen": 0.5239797830581665, "log_odds_ratio": -0.6339500546455383, "logits/chosen": -0.9901916980743408, "logits/rejected": -1.0401127338409424, "logps/chosen": -1.028225302696228, "logps/rejected": -1.288639783859253, "loss": 1.0629, "nll_loss": 1.145958423614502, "rewards/accuracies": 0.625, "rewards/chosen": -0.10282252728939056, "rewards/margins": 0.02604145184159279, "rewards/rejected": -0.12886399030685425, "step": 7092 }, { "epoch": 4.326978801281074, "grad_norm": 1.6372828483581543, "learning_rate": 1.079730557256583e-06, "log_odds_chosen": 2.4361205101013184, "log_odds_ratio": -0.5234084129333496, "logits/chosen": -0.9969710111618042, "logits/rejected": -1.1629817485809326, "logps/chosen": -0.8680021166801453, "logps/rejected": -2.7608890533447266, "loss": 1.3157, "nll_loss": 1.4289982318878174, "rewards/accuracies": 0.75, "rewards/chosen": -0.08680021017789841, "rewards/margins": 0.18928870558738708, "rewards/rejected": -0.2760889232158661, "step": 7093 }, { "epoch": 4.327588836358091, "grad_norm": 2.0020835399627686, "learning_rate": 1.0787507654623391e-06, "log_odds_chosen": 1.2260586023330688, "log_odds_ratio": -0.3638572692871094, "logits/chosen": -0.7746789455413818, "logits/rejected": -0.7558778524398804, "logps/chosen": -0.6817086935043335, "logps/rejected": -1.4396305084228516, "loss": 0.9327, "nll_loss": 0.9766099452972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.06817086786031723, "rewards/margins": 0.07579218596220016, "rewards/rejected": -0.1439630687236786, "step": 7094 }, { "epoch": 4.328198871435108, "grad_norm": 13.835865020751953, "learning_rate": 1.0777709736680955e-06, "log_odds_chosen": 1.4762619733810425, "log_odds_ratio": -0.3624999523162842, "logits/chosen": -0.8566530346870422, "logits/rejected": -1.0000461339950562, "logps/chosen": -0.5617485046386719, "logps/rejected": -1.452926754951477, "loss": 0.9454, "nll_loss": 0.7896517515182495, "rewards/accuracies": 0.875, "rewards/chosen": -0.05617485195398331, "rewards/margins": 0.08911783248186111, "rewards/rejected": -0.14529268443584442, "step": 7095 }, { "epoch": 4.3288089065121245, "grad_norm": 1.8632816076278687, "learning_rate": 1.0767911818738518e-06, "log_odds_chosen": 2.4298555850982666, "log_odds_ratio": -0.35390329360961914, "logits/chosen": -0.9467880725860596, "logits/rejected": -0.8492890000343323, "logps/chosen": -0.5856651067733765, "logps/rejected": -2.405210494995117, "loss": 1.0359, "nll_loss": 1.1193346977233887, "rewards/accuracies": 0.625, "rewards/chosen": -0.05856650695204735, "rewards/margins": 0.18195456266403198, "rewards/rejected": -0.24052107334136963, "step": 7096 }, { "epoch": 4.329418941589141, "grad_norm": 1.426253080368042, "learning_rate": 1.075811390079608e-06, "log_odds_chosen": 3.3257505893707275, "log_odds_ratio": -0.18643948435783386, "logits/chosen": -0.8667922019958496, "logits/rejected": -0.9223625659942627, "logps/chosen": -0.5338991284370422, "logps/rejected": -3.050804853439331, "loss": 0.9934, "nll_loss": 0.7517892122268677, "rewards/accuracies": 1.0, "rewards/chosen": -0.05338991433382034, "rewards/margins": 0.2516905665397644, "rewards/rejected": -0.30508047342300415, "step": 7097 }, { "epoch": 4.330028976666158, "grad_norm": 12.657108306884766, "learning_rate": 1.0748315982853645e-06, "log_odds_chosen": 2.066795825958252, "log_odds_ratio": -0.3703446090221405, "logits/chosen": -1.0968832969665527, "logits/rejected": -1.0935310125350952, "logps/chosen": -0.7858518362045288, "logps/rejected": -2.4839816093444824, "loss": 1.0548, "nll_loss": 1.0232421159744263, "rewards/accuracies": 1.0, "rewards/chosen": -0.0785851925611496, "rewards/margins": 0.16981299221515656, "rewards/rejected": -0.24839818477630615, "step": 7098 }, { "epoch": 4.330639011743175, "grad_norm": 1.5138076543807983, "learning_rate": 1.0738518064911206e-06, "log_odds_chosen": 1.546034574508667, "log_odds_ratio": -0.38004428148269653, "logits/chosen": -1.0187841653823853, "logits/rejected": -1.095494031906128, "logps/chosen": -0.8517802357673645, "logps/rejected": -2.0551552772521973, "loss": 1.086, "nll_loss": 0.9692529439926147, "rewards/accuracies": 1.0, "rewards/chosen": -0.08517801761627197, "rewards/margins": 0.12033750116825104, "rewards/rejected": -0.205515518784523, "step": 7099 }, { "epoch": 4.331249046820192, "grad_norm": 1.582249641418457, "learning_rate": 1.072872014696877e-06, "log_odds_chosen": 1.4092882871627808, "log_odds_ratio": -0.5267657041549683, "logits/chosen": -0.9379031658172607, "logits/rejected": -0.9497168064117432, "logps/chosen": -0.9028922319412231, "logps/rejected": -2.1846909523010254, "loss": 1.0606, "nll_loss": 1.1403385400772095, "rewards/accuracies": 0.5, "rewards/chosen": -0.09028922766447067, "rewards/margins": 0.1281798630952835, "rewards/rejected": -0.21846908330917358, "step": 7100 }, { "epoch": 4.331859081897209, "grad_norm": 2.618772506713867, "learning_rate": 1.071892222902633e-06, "log_odds_chosen": 3.9507853984832764, "log_odds_ratio": -0.3727247416973114, "logits/chosen": -0.6227138042449951, "logits/rejected": -0.8677704334259033, "logps/chosen": -0.5948725938796997, "logps/rejected": -3.9249041080474854, "loss": 0.9681, "nll_loss": 0.850882887840271, "rewards/accuracies": 0.75, "rewards/chosen": -0.05948726087808609, "rewards/margins": 0.3330031633377075, "rewards/rejected": -0.392490416765213, "step": 7101 }, { "epoch": 4.332469116974226, "grad_norm": 2.325448989868164, "learning_rate": 1.0709124311083894e-06, "log_odds_chosen": 3.0291330814361572, "log_odds_ratio": -0.1705993413925171, "logits/chosen": -0.9243017435073853, "logits/rejected": -0.9739722609519958, "logps/chosen": -0.7695646286010742, "logps/rejected": -3.1958718299865723, "loss": 0.8601, "nll_loss": 0.7985427379608154, "rewards/accuracies": 1.0, "rewards/chosen": -0.07695646584033966, "rewards/margins": 0.2426307201385498, "rewards/rejected": -0.31958720088005066, "step": 7102 }, { "epoch": 4.333079152051243, "grad_norm": 10.767004013061523, "learning_rate": 1.0699326393141457e-06, "log_odds_chosen": 1.6667697429656982, "log_odds_ratio": -0.36503899097442627, "logits/chosen": -0.9350929260253906, "logits/rejected": -1.0031113624572754, "logps/chosen": -0.7429259419441223, "logps/rejected": -1.9681566953659058, "loss": 0.9049, "nll_loss": 0.8887690305709839, "rewards/accuracies": 0.875, "rewards/chosen": -0.07429260015487671, "rewards/margins": 0.12252307683229446, "rewards/rejected": -0.19681568443775177, "step": 7103 }, { "epoch": 4.33368918712826, "grad_norm": 1.6375279426574707, "learning_rate": 1.0689528475199018e-06, "log_odds_chosen": 2.310472011566162, "log_odds_ratio": -0.37403085827827454, "logits/chosen": -0.943579912185669, "logits/rejected": -0.97393798828125, "logps/chosen": -0.8044817447662354, "logps/rejected": -2.694647789001465, "loss": 1.0882, "nll_loss": 1.0258064270019531, "rewards/accuracies": 0.75, "rewards/chosen": -0.08044817298650742, "rewards/margins": 0.18901656568050385, "rewards/rejected": -0.26946473121643066, "step": 7104 }, { "epoch": 4.334299222205277, "grad_norm": 15.317399978637695, "learning_rate": 1.0679730557256584e-06, "log_odds_chosen": 4.378532409667969, "log_odds_ratio": -0.2182341367006302, "logits/chosen": -0.7496879696846008, "logits/rejected": -0.9076443314552307, "logps/chosen": -0.7889832258224487, "logps/rejected": -4.433567523956299, "loss": 0.973, "nll_loss": 0.976381242275238, "rewards/accuracies": 0.875, "rewards/chosen": -0.07889831811189651, "rewards/margins": 0.36445844173431396, "rewards/rejected": -0.4433567523956299, "step": 7105 }, { "epoch": 4.334909257282294, "grad_norm": 4.8134260177612305, "learning_rate": 1.0669932639314145e-06, "log_odds_chosen": 0.7151004076004028, "log_odds_ratio": -0.47258561849594116, "logits/chosen": -0.7226810455322266, "logits/rejected": -0.7865118980407715, "logps/chosen": -0.7035502791404724, "logps/rejected": -1.114263892173767, "loss": 0.9019, "nll_loss": 0.8076844811439514, "rewards/accuracies": 0.75, "rewards/chosen": -0.07035502791404724, "rewards/margins": 0.04107136279344559, "rewards/rejected": -0.11142639815807343, "step": 7106 }, { "epoch": 4.335519292359311, "grad_norm": 2.300238847732544, "learning_rate": 1.0660134721371708e-06, "log_odds_chosen": 2.4020228385925293, "log_odds_ratio": -0.5978155136108398, "logits/chosen": -1.0688440799713135, "logits/rejected": -1.1039454936981201, "logps/chosen": -0.8754401803016663, "logps/rejected": -2.938462018966675, "loss": 1.08, "nll_loss": 1.2391237020492554, "rewards/accuracies": 0.75, "rewards/chosen": -0.0875440239906311, "rewards/margins": 0.206302210688591, "rewards/rejected": -0.2938462197780609, "step": 7107 }, { "epoch": 4.3361293274363275, "grad_norm": 1.6946078538894653, "learning_rate": 1.0650336803429272e-06, "log_odds_chosen": 2.5287911891937256, "log_odds_ratio": -0.35080432891845703, "logits/chosen": -0.957947313785553, "logits/rejected": -0.9945245981216431, "logps/chosen": -0.8237031698226929, "logps/rejected": -2.854145050048828, "loss": 1.0099, "nll_loss": 1.055480718612671, "rewards/accuracies": 0.75, "rewards/chosen": -0.0823703184723854, "rewards/margins": 0.20304422080516815, "rewards/rejected": -0.28541451692581177, "step": 7108 }, { "epoch": 4.336739362513344, "grad_norm": 1.8982049226760864, "learning_rate": 1.0640538885486833e-06, "log_odds_chosen": 1.7994136810302734, "log_odds_ratio": -0.3195628821849823, "logits/chosen": -0.7732986211776733, "logits/rejected": -0.9247732162475586, "logps/chosen": -0.6087389588356018, "logps/rejected": -1.8137506246566772, "loss": 1.0348, "nll_loss": 1.0674200057983398, "rewards/accuracies": 0.875, "rewards/chosen": -0.06087389588356018, "rewards/margins": 0.12050117552280426, "rewards/rejected": -0.18137505650520325, "step": 7109 }, { "epoch": 4.337349397590361, "grad_norm": 1.7271013259887695, "learning_rate": 1.0630740967544396e-06, "log_odds_chosen": 1.6349363327026367, "log_odds_ratio": -0.45702826976776123, "logits/chosen": -0.9502905607223511, "logits/rejected": -1.0674723386764526, "logps/chosen": -0.8338838815689087, "logps/rejected": -2.021662950515747, "loss": 0.8994, "nll_loss": 0.9170240163803101, "rewards/accuracies": 0.625, "rewards/chosen": -0.08338838815689087, "rewards/margins": 0.11877792328596115, "rewards/rejected": -0.20216628909111023, "step": 7110 }, { "epoch": 4.337959432667378, "grad_norm": 5.237148761749268, "learning_rate": 1.0620943049601958e-06, "log_odds_chosen": 3.3879008293151855, "log_odds_ratio": -0.17617329955101013, "logits/chosen": -0.928329348564148, "logits/rejected": -1.1954407691955566, "logps/chosen": -0.6099806427955627, "logps/rejected": -3.314741373062134, "loss": 1.1354, "nll_loss": 1.0510748624801636, "rewards/accuracies": 1.0, "rewards/chosen": -0.060998063534498215, "rewards/margins": 0.2704760432243347, "rewards/rejected": -0.3314741253852844, "step": 7111 }, { "epoch": 4.338569467744395, "grad_norm": 2.2079355716705322, "learning_rate": 1.0611145131659523e-06, "log_odds_chosen": 1.4822272062301636, "log_odds_ratio": -0.4638344943523407, "logits/chosen": -0.8827685117721558, "logits/rejected": -0.8802353143692017, "logps/chosen": -0.9696499109268188, "logps/rejected": -2.0215842723846436, "loss": 1.063, "nll_loss": 1.3163729906082153, "rewards/accuracies": 0.625, "rewards/chosen": -0.09696498513221741, "rewards/margins": 0.10519344359636307, "rewards/rejected": -0.20215842127799988, "step": 7112 }, { "epoch": 4.339179502821413, "grad_norm": 7.844799041748047, "learning_rate": 1.0601347213717084e-06, "log_odds_chosen": 2.217453718185425, "log_odds_ratio": -0.28316009044647217, "logits/chosen": -1.0540053844451904, "logits/rejected": -1.0154640674591064, "logps/chosen": -0.906013011932373, "logps/rejected": -2.806021213531494, "loss": 1.2452, "nll_loss": 1.2589125633239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.09060129523277283, "rewards/margins": 0.19000083208084106, "rewards/rejected": -0.2806021273136139, "step": 7113 }, { "epoch": 4.3397895378984295, "grad_norm": 6.955849647521973, "learning_rate": 1.0591549295774648e-06, "log_odds_chosen": 0.7637975215911865, "log_odds_ratio": -0.6308510303497314, "logits/chosen": -1.0952033996582031, "logits/rejected": -1.1376526355743408, "logps/chosen": -1.0640299320220947, "logps/rejected": -1.714734673500061, "loss": 1.0663, "nll_loss": 1.3631656169891357, "rewards/accuracies": 0.625, "rewards/chosen": -0.10640300065279007, "rewards/margins": 0.06507046520709991, "rewards/rejected": -0.17147347331047058, "step": 7114 }, { "epoch": 4.340399572975446, "grad_norm": 10.145490646362305, "learning_rate": 1.058175137783221e-06, "log_odds_chosen": 1.8941755294799805, "log_odds_ratio": -0.2635915279388428, "logits/chosen": -0.894041121006012, "logits/rejected": -1.0502313375473022, "logps/chosen": -0.8073220252990723, "logps/rejected": -2.269021511077881, "loss": 1.1013, "nll_loss": 1.0387710332870483, "rewards/accuracies": 1.0, "rewards/chosen": -0.08073220402002335, "rewards/margins": 0.14616996049880981, "rewards/rejected": -0.22690215706825256, "step": 7115 }, { "epoch": 4.341009608052463, "grad_norm": 1.3687751293182373, "learning_rate": 1.0571953459889772e-06, "log_odds_chosen": 1.1919748783111572, "log_odds_ratio": -0.5783823728561401, "logits/chosen": -1.1099767684936523, "logits/rejected": -1.042366623878479, "logps/chosen": -0.8764020204544067, "logps/rejected": -1.8975334167480469, "loss": 1.0592, "nll_loss": 1.0514695644378662, "rewards/accuracies": 0.75, "rewards/chosen": -0.0876402035355568, "rewards/margins": 0.10211314260959625, "rewards/rejected": -0.18975335359573364, "step": 7116 }, { "epoch": 4.34161964312948, "grad_norm": 1.7269737720489502, "learning_rate": 1.0562155541947335e-06, "log_odds_chosen": 1.1419012546539307, "log_odds_ratio": -0.3630978465080261, "logits/chosen": -0.7815362215042114, "logits/rejected": -0.9745296239852905, "logps/chosen": -0.9266523718833923, "logps/rejected": -1.727860450744629, "loss": 1.0855, "nll_loss": 1.104595422744751, "rewards/accuracies": 0.875, "rewards/chosen": -0.09266524016857147, "rewards/margins": 0.08012081682682037, "rewards/rejected": -0.17278604209423065, "step": 7117 }, { "epoch": 4.342229678206497, "grad_norm": 2.021204948425293, "learning_rate": 1.0552357624004899e-06, "log_odds_chosen": 1.736487627029419, "log_odds_ratio": -0.45954272150993347, "logits/chosen": -0.8034532070159912, "logits/rejected": -0.9231797456741333, "logps/chosen": -0.7537184953689575, "logps/rejected": -2.032430648803711, "loss": 0.9291, "nll_loss": 0.8909519910812378, "rewards/accuracies": 0.875, "rewards/chosen": -0.07537185400724411, "rewards/margins": 0.12787121534347534, "rewards/rejected": -0.20324306190013885, "step": 7118 }, { "epoch": 4.342839713283514, "grad_norm": 1.9688382148742676, "learning_rate": 1.0542559706062462e-06, "log_odds_chosen": 2.2475967407226562, "log_odds_ratio": -0.4444240629673004, "logits/chosen": -0.8461317420005798, "logits/rejected": -1.048213005065918, "logps/chosen": -0.7050426006317139, "logps/rejected": -2.4857044219970703, "loss": 0.9974, "nll_loss": 0.8543352484703064, "rewards/accuracies": 0.75, "rewards/chosen": -0.07050427049398422, "rewards/margins": 0.1780661642551422, "rewards/rejected": -0.24857044219970703, "step": 7119 }, { "epoch": 4.3434497483605305, "grad_norm": 1.5647101402282715, "learning_rate": 1.0532761788120023e-06, "log_odds_chosen": 2.3011343479156494, "log_odds_ratio": -0.32319188117980957, "logits/chosen": -1.038973093032837, "logits/rejected": -0.9687220454216003, "logps/chosen": -0.5519495010375977, "logps/rejected": -1.6561373472213745, "loss": 1.0162, "nll_loss": 1.1972155570983887, "rewards/accuracies": 0.875, "rewards/chosen": -0.05519494414329529, "rewards/margins": 0.11041878163814545, "rewards/rejected": -0.16561372578144073, "step": 7120 }, { "epoch": 4.344059783437547, "grad_norm": 1.3938542604446411, "learning_rate": 1.0522963870177587e-06, "log_odds_chosen": 1.0830916166305542, "log_odds_ratio": -0.5751743912696838, "logits/chosen": -0.8162999153137207, "logits/rejected": -0.912647008895874, "logps/chosen": -0.9738398194313049, "logps/rejected": -1.9169180393218994, "loss": 0.9766, "nll_loss": 1.0163685083389282, "rewards/accuracies": 0.625, "rewards/chosen": -0.09738399088382721, "rewards/margins": 0.09430782496929169, "rewards/rejected": -0.1916918158531189, "step": 7121 }, { "epoch": 4.344669818514564, "grad_norm": 9.308152198791504, "learning_rate": 1.051316595223515e-06, "log_odds_chosen": 2.2565014362335205, "log_odds_ratio": -0.3411155045032501, "logits/chosen": -0.90325927734375, "logits/rejected": -1.0647249221801758, "logps/chosen": -0.7905341386795044, "logps/rejected": -2.609179973602295, "loss": 1.0585, "nll_loss": 1.0907518863677979, "rewards/accuracies": 0.875, "rewards/chosen": -0.07905340939760208, "rewards/margins": 0.18186460435390472, "rewards/rejected": -0.2609180212020874, "step": 7122 }, { "epoch": 4.345279853591581, "grad_norm": 1.7719674110412598, "learning_rate": 1.0503368034292711e-06, "log_odds_chosen": 3.309368371963501, "log_odds_ratio": -0.26205867528915405, "logits/chosen": -0.7416501045227051, "logits/rejected": -1.0030397176742554, "logps/chosen": -0.6539657711982727, "logps/rejected": -3.2958531379699707, "loss": 1.1147, "nll_loss": 0.8466187715530396, "rewards/accuracies": 0.875, "rewards/chosen": -0.06539657711982727, "rewards/margins": 0.2641887664794922, "rewards/rejected": -0.32958531379699707, "step": 7123 }, { "epoch": 4.345889888668599, "grad_norm": 2.555724620819092, "learning_rate": 1.0493570116350275e-06, "log_odds_chosen": 2.0275042057037354, "log_odds_ratio": -0.2552984356880188, "logits/chosen": -0.676161527633667, "logits/rejected": -0.6873546838760376, "logps/chosen": -0.6039535403251648, "logps/rejected": -1.9384477138519287, "loss": 0.9604, "nll_loss": 0.7396674752235413, "rewards/accuracies": 1.0, "rewards/chosen": -0.06039535999298096, "rewards/margins": 0.13344943523406982, "rewards/rejected": -0.19384479522705078, "step": 7124 }, { "epoch": 4.346499923745616, "grad_norm": 2.1622490882873535, "learning_rate": 1.0483772198407838e-06, "log_odds_chosen": 3.4697930812835693, "log_odds_ratio": -0.3803764283657074, "logits/chosen": -0.8558979034423828, "logits/rejected": -1.036412239074707, "logps/chosen": -0.6154608726501465, "logps/rejected": -3.383659839630127, "loss": 1.1297, "nll_loss": 0.8787834048271179, "rewards/accuracies": 0.625, "rewards/chosen": -0.06154608726501465, "rewards/margins": 0.2768199145793915, "rewards/rejected": -0.33836600184440613, "step": 7125 }, { "epoch": 4.3471099588226325, "grad_norm": 1.2229756116867065, "learning_rate": 1.0473974280465401e-06, "log_odds_chosen": 2.786647319793701, "log_odds_ratio": -0.40294957160949707, "logits/chosen": -0.9234011173248291, "logits/rejected": -0.8558436632156372, "logps/chosen": -0.7536425590515137, "logps/rejected": -3.0607733726501465, "loss": 0.9333, "nll_loss": 0.8984962105751038, "rewards/accuracies": 0.75, "rewards/chosen": -0.07536425441503525, "rewards/margins": 0.23071306943893433, "rewards/rejected": -0.30607736110687256, "step": 7126 }, { "epoch": 4.347719993899649, "grad_norm": 1.2496975660324097, "learning_rate": 1.0464176362522963e-06, "log_odds_chosen": 2.2138025760650635, "log_odds_ratio": -0.4167516231536865, "logits/chosen": -0.800384521484375, "logits/rejected": -0.8064233064651489, "logps/chosen": -0.5818006992340088, "logps/rejected": -2.049800395965576, "loss": 0.9972, "nll_loss": 1.0039361715316772, "rewards/accuracies": 0.625, "rewards/chosen": -0.0581800751388073, "rewards/margins": 0.1467999666929245, "rewards/rejected": -0.2049800604581833, "step": 7127 }, { "epoch": 4.348330028976666, "grad_norm": 3.069129705429077, "learning_rate": 1.0454378444580528e-06, "log_odds_chosen": 1.0735857486724854, "log_odds_ratio": -0.8250254392623901, "logits/chosen": -1.0550050735473633, "logits/rejected": -0.9687410593032837, "logps/chosen": -0.8565632104873657, "logps/rejected": -1.9753234386444092, "loss": 1.2506, "nll_loss": 1.4199451208114624, "rewards/accuracies": 0.375, "rewards/chosen": -0.08565632253885269, "rewards/margins": 0.11187603324651718, "rewards/rejected": -0.19753235578536987, "step": 7128 }, { "epoch": 4.348940064053683, "grad_norm": 1.6250022649765015, "learning_rate": 1.044458052663809e-06, "log_odds_chosen": 1.4215830564498901, "log_odds_ratio": -0.41321849822998047, "logits/chosen": -0.8244138956069946, "logits/rejected": -0.7999993562698364, "logps/chosen": -0.8056660294532776, "logps/rejected": -1.919029951095581, "loss": 0.9796, "nll_loss": 0.9344284534454346, "rewards/accuracies": 0.75, "rewards/chosen": -0.08056660741567612, "rewards/margins": 0.11133640259504318, "rewards/rejected": -0.1919030100107193, "step": 7129 }, { "epoch": 4.3495500991307, "grad_norm": 1.934134602546692, "learning_rate": 1.043478260869565e-06, "log_odds_chosen": 2.7173573970794678, "log_odds_ratio": -0.28589296340942383, "logits/chosen": -0.9702122211456299, "logits/rejected": -1.176778793334961, "logps/chosen": -0.9534887075424194, "logps/rejected": -3.295327663421631, "loss": 1.0269, "nll_loss": 0.9445825815200806, "rewards/accuracies": 0.875, "rewards/chosen": -0.09534887224435806, "rewards/margins": 0.2341838926076889, "rewards/rejected": -0.32953277230262756, "step": 7130 }, { "epoch": 4.350160134207717, "grad_norm": 1.3984055519104004, "learning_rate": 1.0424984690753214e-06, "log_odds_chosen": 2.118044376373291, "log_odds_ratio": -0.275258868932724, "logits/chosen": -0.8324227333068848, "logits/rejected": -0.8723855018615723, "logps/chosen": -0.5835749506950378, "logps/rejected": -2.044863700866699, "loss": 0.9305, "nll_loss": 0.7210803627967834, "rewards/accuracies": 0.875, "rewards/chosen": -0.058357495814561844, "rewards/margins": 0.14612889289855957, "rewards/rejected": -0.20448638498783112, "step": 7131 }, { "epoch": 4.350770169284734, "grad_norm": 2.808443784713745, "learning_rate": 1.0415186772810777e-06, "log_odds_chosen": 2.063082695007324, "log_odds_ratio": -0.4823406934738159, "logits/chosen": -0.9773022532463074, "logits/rejected": -0.9449536800384521, "logps/chosen": -0.9215246438980103, "logps/rejected": -2.4486989974975586, "loss": 1.1514, "nll_loss": 1.236012578010559, "rewards/accuracies": 0.625, "rewards/chosen": -0.09215246140956879, "rewards/margins": 0.1527174413204193, "rewards/rejected": -0.2448698878288269, "step": 7132 }, { "epoch": 4.35138020436175, "grad_norm": 1.9194245338439941, "learning_rate": 1.040538885486834e-06, "log_odds_chosen": 2.079350233078003, "log_odds_ratio": -0.3035409450531006, "logits/chosen": -0.9118046760559082, "logits/rejected": -0.9295459985733032, "logps/chosen": -0.8561263084411621, "logps/rejected": -2.4816155433654785, "loss": 1.0965, "nll_loss": 0.9927518963813782, "rewards/accuracies": 0.875, "rewards/chosen": -0.08561263978481293, "rewards/margins": 0.16254892945289612, "rewards/rejected": -0.24816155433654785, "step": 7133 }, { "epoch": 4.351990239438768, "grad_norm": 1.294722318649292, "learning_rate": 1.0395590936925902e-06, "log_odds_chosen": 0.7465413212776184, "log_odds_ratio": -0.6273394823074341, "logits/chosen": -1.085694432258606, "logits/rejected": -1.0310052633285522, "logps/chosen": -0.8712208271026611, "logps/rejected": -1.4547144174575806, "loss": 1.1845, "nll_loss": 1.1585500240325928, "rewards/accuracies": 0.625, "rewards/chosen": -0.08712206780910492, "rewards/margins": 0.0583493635058403, "rewards/rejected": -0.14547143876552582, "step": 7134 }, { "epoch": 4.352600274515785, "grad_norm": 2.8432042598724365, "learning_rate": 1.0385793018983467e-06, "log_odds_chosen": 1.5275650024414062, "log_odds_ratio": -0.5152359008789062, "logits/chosen": -0.9639683961868286, "logits/rejected": -1.1290596723556519, "logps/chosen": -0.9795775413513184, "logps/rejected": -2.262209892272949, "loss": 0.9976, "nll_loss": 1.1284078359603882, "rewards/accuracies": 0.625, "rewards/chosen": -0.09795774519443512, "rewards/margins": 0.12826324999332428, "rewards/rejected": -0.2262209951877594, "step": 7135 }, { "epoch": 4.353210309592802, "grad_norm": 1.5414847135543823, "learning_rate": 1.0375995101041028e-06, "log_odds_chosen": 3.3279926776885986, "log_odds_ratio": -0.16314803063869476, "logits/chosen": -0.7252303957939148, "logits/rejected": -0.8935564160346985, "logps/chosen": -0.5515555143356323, "logps/rejected": -3.017367124557495, "loss": 0.9957, "nll_loss": 0.8079871535301208, "rewards/accuracies": 0.875, "rewards/chosen": -0.055155545473098755, "rewards/margins": 0.24658116698265076, "rewards/rejected": -0.3017367124557495, "step": 7136 }, { "epoch": 4.353820344669819, "grad_norm": 1.4077376127243042, "learning_rate": 1.036619718309859e-06, "log_odds_chosen": 2.258636951446533, "log_odds_ratio": -0.4358706474304199, "logits/chosen": -0.9750925302505493, "logits/rejected": -1.0040478706359863, "logps/chosen": -0.843575656414032, "logps/rejected": -2.848360300064087, "loss": 0.9274, "nll_loss": 1.0140380859375, "rewards/accuracies": 0.875, "rewards/chosen": -0.08435757458209991, "rewards/margins": 0.2004784792661667, "rewards/rejected": -0.2848360240459442, "step": 7137 }, { "epoch": 4.3544303797468356, "grad_norm": 1.4599610567092896, "learning_rate": 1.0356399265156155e-06, "log_odds_chosen": 0.23628532886505127, "log_odds_ratio": -1.1108115911483765, "logits/chosen": -0.8988901972770691, "logits/rejected": -0.8064926862716675, "logps/chosen": -1.1902023553848267, "logps/rejected": -1.518839955329895, "loss": 1.0684, "nll_loss": 1.093044638633728, "rewards/accuracies": 0.5, "rewards/chosen": -0.1190202385187149, "rewards/margins": 0.03286375850439072, "rewards/rejected": -0.15188400447368622, "step": 7138 }, { "epoch": 4.355040414823852, "grad_norm": 1.2511719465255737, "learning_rate": 1.0346601347213716e-06, "log_odds_chosen": 2.138456344604492, "log_odds_ratio": -0.6126973628997803, "logits/chosen": -0.7951889634132385, "logits/rejected": -0.9498286247253418, "logps/chosen": -0.9560507535934448, "logps/rejected": -2.7587673664093018, "loss": 1.0942, "nll_loss": 1.205352544784546, "rewards/accuracies": 0.5, "rewards/chosen": -0.09560507535934448, "rewards/margins": 0.1802716702222824, "rewards/rejected": -0.2758767306804657, "step": 7139 }, { "epoch": 4.355650449900869, "grad_norm": 2.1240224838256836, "learning_rate": 1.033680342927128e-06, "log_odds_chosen": 1.1721899509429932, "log_odds_ratio": -0.4469010531902313, "logits/chosen": -1.0973151922225952, "logits/rejected": -1.0170106887817383, "logps/chosen": -0.9581265449523926, "logps/rejected": -1.8961331844329834, "loss": 1.2341, "nll_loss": 1.2038581371307373, "rewards/accuracies": 0.75, "rewards/chosen": -0.09581265598535538, "rewards/margins": 0.09380065649747849, "rewards/rejected": -0.18961331248283386, "step": 7140 }, { "epoch": 4.356260484977886, "grad_norm": 2.180952787399292, "learning_rate": 1.032700551132884e-06, "log_odds_chosen": 3.91884183883667, "log_odds_ratio": -0.3872169852256775, "logits/chosen": -0.770606517791748, "logits/rejected": -1.0712329149246216, "logps/chosen": -0.5502225160598755, "logps/rejected": -3.87203049659729, "loss": 0.862, "nll_loss": 0.7654940485954285, "rewards/accuracies": 0.75, "rewards/chosen": -0.05502225458621979, "rewards/margins": 0.33218082785606384, "rewards/rejected": -0.38720306754112244, "step": 7141 }, { "epoch": 4.356870520054903, "grad_norm": 1.496403455734253, "learning_rate": 1.0317207593386406e-06, "log_odds_chosen": 2.6924052238464355, "log_odds_ratio": -0.3436088263988495, "logits/chosen": -0.7648279666900635, "logits/rejected": -0.84779953956604, "logps/chosen": -0.6404911875724792, "logps/rejected": -2.847318649291992, "loss": 0.8836, "nll_loss": 0.9202122688293457, "rewards/accuracies": 0.875, "rewards/chosen": -0.0640491247177124, "rewards/margins": 0.22068274021148682, "rewards/rejected": -0.2847318649291992, "step": 7142 }, { "epoch": 4.35748055513192, "grad_norm": 6.327649116516113, "learning_rate": 1.0307409675443967e-06, "log_odds_chosen": 1.5603091716766357, "log_odds_ratio": -0.43201038241386414, "logits/chosen": -0.773581862449646, "logits/rejected": -0.9484023451805115, "logps/chosen": -0.8346994519233704, "logps/rejected": -1.9702270030975342, "loss": 0.998, "nll_loss": 0.9580751061439514, "rewards/accuracies": 0.875, "rewards/chosen": -0.0834699496626854, "rewards/margins": 0.1135527566075325, "rewards/rejected": -0.1970227062702179, "step": 7143 }, { "epoch": 4.358090590208937, "grad_norm": 2.932478904724121, "learning_rate": 1.029761175750153e-06, "log_odds_chosen": 1.8398181200027466, "log_odds_ratio": -0.5266687870025635, "logits/chosen": -0.9067763686180115, "logits/rejected": -0.9637577533721924, "logps/chosen": -0.6896144151687622, "logps/rejected": -1.7906806468963623, "loss": 1.0992, "nll_loss": 1.1959972381591797, "rewards/accuracies": 0.75, "rewards/chosen": -0.06896144151687622, "rewards/margins": 0.11010661721229553, "rewards/rejected": -0.17906805872917175, "step": 7144 }, { "epoch": 4.3587006252859535, "grad_norm": 2.16113018989563, "learning_rate": 1.0287813839559094e-06, "log_odds_chosen": 1.7195006608963013, "log_odds_ratio": -0.39386269450187683, "logits/chosen": -0.8462016582489014, "logits/rejected": -0.9251046180725098, "logps/chosen": -0.6122280359268188, "logps/rejected": -1.5970323085784912, "loss": 1.1427, "nll_loss": 1.2054972648620605, "rewards/accuracies": 0.875, "rewards/chosen": -0.06122279912233353, "rewards/margins": 0.09848044067621231, "rewards/rejected": -0.15970323979854584, "step": 7145 }, { "epoch": 4.359310660362971, "grad_norm": 1.9450966119766235, "learning_rate": 1.0278015921616655e-06, "log_odds_chosen": 3.274749517440796, "log_odds_ratio": -0.28686922788619995, "logits/chosen": -1.0535473823547363, "logits/rejected": -1.1794764995574951, "logps/chosen": -0.9177356958389282, "logps/rejected": -3.825197219848633, "loss": 1.089, "nll_loss": 1.2926913499832153, "rewards/accuracies": 1.0, "rewards/chosen": -0.09177356958389282, "rewards/margins": 0.29074615240097046, "rewards/rejected": -0.38251978158950806, "step": 7146 }, { "epoch": 4.359920695439988, "grad_norm": 1.3737425804138184, "learning_rate": 1.0268218003674219e-06, "log_odds_chosen": 2.163168430328369, "log_odds_ratio": -0.4454626142978668, "logits/chosen": -0.848886251449585, "logits/rejected": -0.9307367205619812, "logps/chosen": -0.6834644675254822, "logps/rejected": -2.3437371253967285, "loss": 0.8301, "nll_loss": 0.7960643768310547, "rewards/accuracies": 0.625, "rewards/chosen": -0.06834644824266434, "rewards/margins": 0.1660272628068924, "rewards/rejected": -0.23437371850013733, "step": 7147 }, { "epoch": 4.360530730517005, "grad_norm": 7.154007911682129, "learning_rate": 1.0258420085731782e-06, "log_odds_chosen": 1.7475647926330566, "log_odds_ratio": -0.527950644493103, "logits/chosen": -0.9426119327545166, "logits/rejected": -0.9442994594573975, "logps/chosen": -0.8502024412155151, "logps/rejected": -2.285207986831665, "loss": 1.1077, "nll_loss": 0.9714434146881104, "rewards/accuracies": 0.75, "rewards/chosen": -0.08502024412155151, "rewards/margins": 0.14350055158138275, "rewards/rejected": -0.22852081060409546, "step": 7148 }, { "epoch": 4.361140765594022, "grad_norm": 1.3656187057495117, "learning_rate": 1.0248622167789345e-06, "log_odds_chosen": 0.6008226275444031, "log_odds_ratio": -0.634596049785614, "logits/chosen": -0.852768063545227, "logits/rejected": -0.7638083100318909, "logps/chosen": -0.8110268115997314, "logps/rejected": -1.2717052698135376, "loss": 0.9058, "nll_loss": 0.8414053916931152, "rewards/accuracies": 0.5, "rewards/chosen": -0.08110267668962479, "rewards/margins": 0.04606784135103226, "rewards/rejected": -0.12717051804065704, "step": 7149 }, { "epoch": 4.361750800671039, "grad_norm": 6.042542457580566, "learning_rate": 1.0238824249846907e-06, "log_odds_chosen": 2.1938929557800293, "log_odds_ratio": -0.33830487728118896, "logits/chosen": -0.9429160952568054, "logits/rejected": -1.0319963693618774, "logps/chosen": -0.6474372744560242, "logps/rejected": -2.070204973220825, "loss": 1.0658, "nll_loss": 0.8810238838195801, "rewards/accuracies": 0.75, "rewards/chosen": -0.06474372744560242, "rewards/margins": 0.14227677881717682, "rewards/rejected": -0.20702049136161804, "step": 7150 }, { "epoch": 4.362360835748055, "grad_norm": 1.9749648571014404, "learning_rate": 1.022902633190447e-06, "log_odds_chosen": 2.2457222938537598, "log_odds_ratio": -0.2651447653770447, "logits/chosen": -0.8711060881614685, "logits/rejected": -1.1993293762207031, "logps/chosen": -0.7590304613113403, "logps/rejected": -2.468660354614258, "loss": 1.2201, "nll_loss": 1.069612741470337, "rewards/accuracies": 0.875, "rewards/chosen": -0.0759030431509018, "rewards/margins": 0.17096297442913055, "rewards/rejected": -0.24686601758003235, "step": 7151 }, { "epoch": 4.362970870825072, "grad_norm": 1.757982611656189, "learning_rate": 1.0219228413962033e-06, "log_odds_chosen": 1.9676457643508911, "log_odds_ratio": -0.6020568609237671, "logits/chosen": -1.0438002347946167, "logits/rejected": -0.9956897497177124, "logps/chosen": -0.8596069812774658, "logps/rejected": -2.533273458480835, "loss": 1.1519, "nll_loss": 1.192628264427185, "rewards/accuracies": 0.5, "rewards/chosen": -0.08596069365739822, "rewards/margins": 0.16736666858196259, "rewards/rejected": -0.2533273696899414, "step": 7152 }, { "epoch": 4.363580905902089, "grad_norm": 1.1438368558883667, "learning_rate": 1.0209430496019595e-06, "log_odds_chosen": 1.153769850730896, "log_odds_ratio": -0.547124981880188, "logits/chosen": -1.0881882905960083, "logits/rejected": -1.0396931171417236, "logps/chosen": -0.7537434101104736, "logps/rejected": -1.5197736024856567, "loss": 1.1008, "nll_loss": 1.206750512123108, "rewards/accuracies": 0.625, "rewards/chosen": -0.07537434250116348, "rewards/margins": 0.07660301774740219, "rewards/rejected": -0.15197736024856567, "step": 7153 }, { "epoch": 4.364190940979106, "grad_norm": 3.1836259365081787, "learning_rate": 1.0199632578077158e-06, "log_odds_chosen": 1.7790507078170776, "log_odds_ratio": -0.47996076941490173, "logits/chosen": -0.9712827205657959, "logits/rejected": -1.004900574684143, "logps/chosen": -1.012415885925293, "logps/rejected": -2.237952709197998, "loss": 1.0105, "nll_loss": 1.1544535160064697, "rewards/accuracies": 0.75, "rewards/chosen": -0.1012415960431099, "rewards/margins": 0.12255369126796722, "rewards/rejected": -0.22379527986049652, "step": 7154 }, { "epoch": 4.364800976056123, "grad_norm": 1.3627887964248657, "learning_rate": 1.0189834660134721e-06, "log_odds_chosen": 1.778444528579712, "log_odds_ratio": -0.5154390335083008, "logits/chosen": -0.9963440299034119, "logits/rejected": -1.0139005184173584, "logps/chosen": -0.8781261444091797, "logps/rejected": -2.4185147285461426, "loss": 1.0766, "nll_loss": 1.076059341430664, "rewards/accuracies": 0.75, "rewards/chosen": -0.08781261742115021, "rewards/margins": 0.15403884649276733, "rewards/rejected": -0.24185147881507874, "step": 7155 }, { "epoch": 4.365411011133141, "grad_norm": 1.0648915767669678, "learning_rate": 1.0180036742192285e-06, "log_odds_chosen": 2.280447244644165, "log_odds_ratio": -0.33016401529312134, "logits/chosen": -0.7565774917602539, "logits/rejected": -0.9099786281585693, "logps/chosen": -0.7248826026916504, "logps/rejected": -2.438575506210327, "loss": 0.9766, "nll_loss": 0.8105067014694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.07248826324939728, "rewards/margins": 0.1713692843914032, "rewards/rejected": -0.24385756254196167, "step": 7156 }, { "epoch": 4.366021046210157, "grad_norm": 2.218658685684204, "learning_rate": 1.0170238824249846e-06, "log_odds_chosen": 0.8367660045623779, "log_odds_ratio": -0.5875200033187866, "logits/chosen": -0.8980448842048645, "logits/rejected": -0.9593013525009155, "logps/chosen": -0.8768155574798584, "logps/rejected": -1.5780752897262573, "loss": 1.2805, "nll_loss": 0.9906622171401978, "rewards/accuracies": 0.5, "rewards/chosen": -0.08768154680728912, "rewards/margins": 0.07012597471475601, "rewards/rejected": -0.15780754387378693, "step": 7157 }, { "epoch": 4.366631081287174, "grad_norm": 5.993738174438477, "learning_rate": 1.016044090630741e-06, "log_odds_chosen": 0.9531359076499939, "log_odds_ratio": -0.6475765705108643, "logits/chosen": -1.1273198127746582, "logits/rejected": -1.0399441719055176, "logps/chosen": -0.81932532787323, "logps/rejected": -1.6071827411651611, "loss": 1.0408, "nll_loss": 0.9316736459732056, "rewards/accuracies": 0.5, "rewards/chosen": -0.08193252980709076, "rewards/margins": 0.078785739839077, "rewards/rejected": -0.16071829199790955, "step": 7158 }, { "epoch": 4.367241116364191, "grad_norm": 2.4369940757751465, "learning_rate": 1.0150642988364972e-06, "log_odds_chosen": 2.1320886611938477, "log_odds_ratio": -0.75328129529953, "logits/chosen": -0.9290527701377869, "logits/rejected": -0.943770170211792, "logps/chosen": -0.9117425084114075, "logps/rejected": -3.020094394683838, "loss": 1.1954, "nll_loss": 1.1994400024414062, "rewards/accuracies": 0.375, "rewards/chosen": -0.09117425978183746, "rewards/margins": 0.21083520352840424, "rewards/rejected": -0.3020094633102417, "step": 7159 }, { "epoch": 4.367851151441208, "grad_norm": 1.7138938903808594, "learning_rate": 1.0140845070422534e-06, "log_odds_chosen": 0.44810760021209717, "log_odds_ratio": -0.5806313753128052, "logits/chosen": -0.7465430498123169, "logits/rejected": -0.956616997718811, "logps/chosen": -0.7864879369735718, "logps/rejected": -1.0487273931503296, "loss": 1.1159, "nll_loss": 0.8924710154533386, "rewards/accuracies": 0.625, "rewards/chosen": -0.07864879071712494, "rewards/margins": 0.02622394636273384, "rewards/rejected": -0.10487273335456848, "step": 7160 }, { "epoch": 4.368461186518225, "grad_norm": 1.771302580833435, "learning_rate": 1.0131047152480097e-06, "log_odds_chosen": 2.4853734970092773, "log_odds_ratio": -0.34916532039642334, "logits/chosen": -0.8268389701843262, "logits/rejected": -1.0086638927459717, "logps/chosen": -0.6974892616271973, "logps/rejected": -2.5220282077789307, "loss": 0.9742, "nll_loss": 0.9001480340957642, "rewards/accuracies": 0.75, "rewards/chosen": -0.06974893063306808, "rewards/margins": 0.18245390057563782, "rewards/rejected": -0.2522028386592865, "step": 7161 }, { "epoch": 4.369071221595242, "grad_norm": 1.8950281143188477, "learning_rate": 1.012124923453766e-06, "log_odds_chosen": 2.083737850189209, "log_odds_ratio": -0.3615502715110779, "logits/chosen": -0.711031436920166, "logits/rejected": -0.869529664516449, "logps/chosen": -0.7644282579421997, "logps/rejected": -2.289310932159424, "loss": 1.0138, "nll_loss": 0.7232677936553955, "rewards/accuracies": 0.875, "rewards/chosen": -0.07644283771514893, "rewards/margins": 0.15248826146125793, "rewards/rejected": -0.22893109917640686, "step": 7162 }, { "epoch": 4.3696812566722585, "grad_norm": 1.5577224493026733, "learning_rate": 1.0111451316595224e-06, "log_odds_chosen": 1.2323760986328125, "log_odds_ratio": -0.523544192314148, "logits/chosen": -0.6513220071792603, "logits/rejected": -0.9083446264266968, "logps/chosen": -0.6058827042579651, "logps/rejected": -1.4006106853485107, "loss": 1.0971, "nll_loss": 0.9006727933883667, "rewards/accuracies": 0.75, "rewards/chosen": -0.06058826670050621, "rewards/margins": 0.07947279512882233, "rewards/rejected": -0.14006106555461884, "step": 7163 }, { "epoch": 4.370291291749275, "grad_norm": 1.2310528755187988, "learning_rate": 1.0101653398652785e-06, "log_odds_chosen": 0.4382163882255554, "log_odds_ratio": -0.662968099117279, "logits/chosen": -0.9750256538391113, "logits/rejected": -0.8973080515861511, "logps/chosen": -0.8996307253837585, "logps/rejected": -1.2301855087280273, "loss": 1.0637, "nll_loss": 1.117616891860962, "rewards/accuracies": 0.5, "rewards/chosen": -0.08996307849884033, "rewards/margins": 0.03305548429489136, "rewards/rejected": -0.1230185478925705, "step": 7164 }, { "epoch": 4.370901326826292, "grad_norm": 1.397270679473877, "learning_rate": 1.009185548071035e-06, "log_odds_chosen": 1.6862688064575195, "log_odds_ratio": -0.4923110008239746, "logits/chosen": -1.1743561029434204, "logits/rejected": -1.1524409055709839, "logps/chosen": -1.0738729238510132, "logps/rejected": -2.50026535987854, "loss": 0.9641, "nll_loss": 1.2113054990768433, "rewards/accuracies": 0.625, "rewards/chosen": -0.10738728940486908, "rewards/margins": 0.14263926446437836, "rewards/rejected": -0.25002655386924744, "step": 7165 }, { "epoch": 4.371511361903309, "grad_norm": 1.7367452383041382, "learning_rate": 1.0082057562767912e-06, "log_odds_chosen": 2.6994781494140625, "log_odds_ratio": -0.2949969470500946, "logits/chosen": -0.8903499841690063, "logits/rejected": -0.979530394077301, "logps/chosen": -0.8011823892593384, "logps/rejected": -3.0197505950927734, "loss": 0.9468, "nll_loss": 1.1127394437789917, "rewards/accuracies": 0.875, "rewards/chosen": -0.08011823892593384, "rewards/margins": 0.22185681760311127, "rewards/rejected": -0.3019750714302063, "step": 7166 }, { "epoch": 4.372121396980327, "grad_norm": 2.0687639713287354, "learning_rate": 1.0072259644825473e-06, "log_odds_chosen": 3.1283044815063477, "log_odds_ratio": -0.21910646557807922, "logits/chosen": -0.5970665216445923, "logits/rejected": -0.8984558582305908, "logps/chosen": -0.6912094950675964, "logps/rejected": -3.114258050918579, "loss": 0.8674, "nll_loss": 0.9759413599967957, "rewards/accuracies": 0.875, "rewards/chosen": -0.06912095099687576, "rewards/margins": 0.24230486154556274, "rewards/rejected": -0.3114258050918579, "step": 7167 }, { "epoch": 4.372731432057344, "grad_norm": 1.134193778038025, "learning_rate": 1.0062461726883036e-06, "log_odds_chosen": 2.7470295429229736, "log_odds_ratio": -0.14780813455581665, "logits/chosen": -0.9421870708465576, "logits/rejected": -0.9712880253791809, "logps/chosen": -0.8546487092971802, "logps/rejected": -2.985287666320801, "loss": 1.0549, "nll_loss": 1.0108461380004883, "rewards/accuracies": 1.0, "rewards/chosen": -0.08546487987041473, "rewards/margins": 0.21306388080120087, "rewards/rejected": -0.2985287308692932, "step": 7168 }, { "epoch": 4.3733414671343604, "grad_norm": 12.891249656677246, "learning_rate": 1.00526638089406e-06, "log_odds_chosen": 0.7636239528656006, "log_odds_ratio": -0.5167757868766785, "logits/chosen": -1.1808043718338013, "logits/rejected": -1.1441413164138794, "logps/chosen": -1.0076239109039307, "logps/rejected": -1.582453966140747, "loss": 1.0889, "nll_loss": 1.1138362884521484, "rewards/accuracies": 0.625, "rewards/chosen": -0.10076238960027695, "rewards/margins": 0.0574830137193203, "rewards/rejected": -0.15824539959430695, "step": 7169 }, { "epoch": 4.373951502211377, "grad_norm": 8.631166458129883, "learning_rate": 1.0042865890998163e-06, "log_odds_chosen": 1.9746696949005127, "log_odds_ratio": -0.4333423376083374, "logits/chosen": -0.8549681901931763, "logits/rejected": -1.0236653089523315, "logps/chosen": -0.7652920484542847, "logps/rejected": -2.1916747093200684, "loss": 1.0213, "nll_loss": 0.750158965587616, "rewards/accuracies": 0.75, "rewards/chosen": -0.07652920484542847, "rewards/margins": 0.14263825118541718, "rewards/rejected": -0.21916747093200684, "step": 7170 }, { "epoch": 4.374561537288394, "grad_norm": 1.0118286609649658, "learning_rate": 1.0033067973055724e-06, "log_odds_chosen": 0.34806108474731445, "log_odds_ratio": -0.6831293702125549, "logits/chosen": -0.7558727860450745, "logits/rejected": -0.5737239122390747, "logps/chosen": -0.7452774047851562, "logps/rejected": -0.9695273041725159, "loss": 0.9359, "nll_loss": 0.9017913937568665, "rewards/accuracies": 0.375, "rewards/chosen": -0.07452774047851562, "rewards/margins": 0.022424988448619843, "rewards/rejected": -0.09695273637771606, "step": 7171 }, { "epoch": 4.375171572365411, "grad_norm": 5.2408447265625, "learning_rate": 1.002327005511329e-06, "log_odds_chosen": 0.9172374606132507, "log_odds_ratio": -0.460022896528244, "logits/chosen": -0.9045373201370239, "logits/rejected": -0.992347240447998, "logps/chosen": -0.6124181747436523, "logps/rejected": -1.1752972602844238, "loss": 1.0462, "nll_loss": 1.0604524612426758, "rewards/accuracies": 0.75, "rewards/chosen": -0.06124182045459747, "rewards/margins": 0.056287895888090134, "rewards/rejected": -0.1175297200679779, "step": 7172 }, { "epoch": 4.375781607442428, "grad_norm": 1.807248592376709, "learning_rate": 1.001347213717085e-06, "log_odds_chosen": 1.555504560470581, "log_odds_ratio": -0.36427226662635803, "logits/chosen": -0.7188286185264587, "logits/rejected": -0.8623359203338623, "logps/chosen": -0.6381158232688904, "logps/rejected": -1.8153620958328247, "loss": 1.0833, "nll_loss": 1.010331153869629, "rewards/accuracies": 0.875, "rewards/chosen": -0.06381158530712128, "rewards/margins": 0.11772462725639343, "rewards/rejected": -0.1815362125635147, "step": 7173 }, { "epoch": 4.376391642519445, "grad_norm": 2.054215908050537, "learning_rate": 1.0003674219228412e-06, "log_odds_chosen": 0.9267829656600952, "log_odds_ratio": -0.4832134246826172, "logits/chosen": -0.9298031330108643, "logits/rejected": -1.089963674545288, "logps/chosen": -1.1167449951171875, "logps/rejected": -1.8658664226531982, "loss": 1.3607, "nll_loss": 1.240147352218628, "rewards/accuracies": 0.75, "rewards/chosen": -0.11167450249195099, "rewards/margins": 0.07491214573383331, "rewards/rejected": -0.1865866482257843, "step": 7174 }, { "epoch": 4.3770016775964615, "grad_norm": 5.935530662536621, "learning_rate": 9.993876301285975e-07, "log_odds_chosen": 2.8408732414245605, "log_odds_ratio": -0.2609197795391083, "logits/chosen": -0.7499399185180664, "logits/rejected": -0.9705508351325989, "logps/chosen": -0.6144794225692749, "logps/rejected": -2.855344295501709, "loss": 0.998, "nll_loss": 0.9299541711807251, "rewards/accuracies": 0.75, "rewards/chosen": -0.06144794076681137, "rewards/margins": 0.2240864783525467, "rewards/rejected": -0.28553441166877747, "step": 7175 }, { "epoch": 4.377611712673478, "grad_norm": 1.543956995010376, "learning_rate": 9.984078383343539e-07, "log_odds_chosen": 2.1678385734558105, "log_odds_ratio": -0.32544079422950745, "logits/chosen": -1.0190017223358154, "logits/rejected": -1.0182117223739624, "logps/chosen": -0.8205358982086182, "logps/rejected": -2.4190268516540527, "loss": 1.1355, "nll_loss": 1.093091607093811, "rewards/accuracies": 0.75, "rewards/chosen": -0.08205360174179077, "rewards/margins": 0.15984909236431122, "rewards/rejected": -0.241902694106102, "step": 7176 }, { "epoch": 4.378221747750496, "grad_norm": 1.6477888822555542, "learning_rate": 9.974280465401102e-07, "log_odds_chosen": 2.499656915664673, "log_odds_ratio": -0.28232041001319885, "logits/chosen": -0.8386787176132202, "logits/rejected": -0.9320113658905029, "logps/chosen": -0.4872870445251465, "logps/rejected": -2.157193183898926, "loss": 0.8989, "nll_loss": 0.7259848117828369, "rewards/accuracies": 0.875, "rewards/chosen": -0.04872870445251465, "rewards/margins": 0.16699063777923584, "rewards/rejected": -0.2157193422317505, "step": 7177 }, { "epoch": 4.378831782827513, "grad_norm": 2.1459012031555176, "learning_rate": 9.964482547458665e-07, "log_odds_chosen": 3.069908618927002, "log_odds_ratio": -0.2567112445831299, "logits/chosen": -0.9133448600769043, "logits/rejected": -1.0886353254318237, "logps/chosen": -0.786329448223114, "logps/rejected": -3.260411262512207, "loss": 1.0208, "nll_loss": 0.9557234644889832, "rewards/accuracies": 0.875, "rewards/chosen": -0.07863293588161469, "rewards/margins": 0.2474082112312317, "rewards/rejected": -0.32604116201400757, "step": 7178 }, { "epoch": 4.37944181790453, "grad_norm": 10.815093040466309, "learning_rate": 9.954684629516229e-07, "log_odds_chosen": 2.000282049179077, "log_odds_ratio": -0.3345377743244171, "logits/chosen": -0.890331506729126, "logits/rejected": -1.0155360698699951, "logps/chosen": -0.8519132137298584, "logps/rejected": -2.514796257019043, "loss": 1.0608, "nll_loss": 0.9768299460411072, "rewards/accuracies": 0.875, "rewards/chosen": -0.08519132435321808, "rewards/margins": 0.1662883162498474, "rewards/rejected": -0.2514796257019043, "step": 7179 }, { "epoch": 4.380051852981547, "grad_norm": 1.6092509031295776, "learning_rate": 9.94488671157379e-07, "log_odds_chosen": 4.172967910766602, "log_odds_ratio": -0.25718939304351807, "logits/chosen": -0.7730630040168762, "logits/rejected": -1.048962116241455, "logps/chosen": -0.7491297721862793, "logps/rejected": -4.179590702056885, "loss": 0.9491, "nll_loss": 0.8255977034568787, "rewards/accuracies": 0.875, "rewards/chosen": -0.07491297274827957, "rewards/margins": 0.343046098947525, "rewards/rejected": -0.4179590344429016, "step": 7180 }, { "epoch": 4.3806618880585635, "grad_norm": 1.414528727531433, "learning_rate": 9.935088793631353e-07, "log_odds_chosen": 1.194006323814392, "log_odds_ratio": -0.4431252181529999, "logits/chosen": -0.7367601990699768, "logits/rejected": -0.8178561925888062, "logps/chosen": -0.6846351027488708, "logps/rejected": -1.6261379718780518, "loss": 0.9913, "nll_loss": 0.8044476509094238, "rewards/accuracies": 0.625, "rewards/chosen": -0.0684635117650032, "rewards/margins": 0.09415028989315033, "rewards/rejected": -0.16261380910873413, "step": 7181 }, { "epoch": 4.38127192313558, "grad_norm": 20.21300506591797, "learning_rate": 9.925290875688914e-07, "log_odds_chosen": 3.6648030281066895, "log_odds_ratio": -0.2618650496006012, "logits/chosen": -0.858843982219696, "logits/rejected": -1.030737042427063, "logps/chosen": -0.663867712020874, "logps/rejected": -3.7317888736724854, "loss": 1.0118, "nll_loss": 0.8624666929244995, "rewards/accuracies": 0.875, "rewards/chosen": -0.06638677418231964, "rewards/margins": 0.30679214000701904, "rewards/rejected": -0.3731788992881775, "step": 7182 }, { "epoch": 4.381881958212597, "grad_norm": 2.1223394870758057, "learning_rate": 9.915492957746478e-07, "log_odds_chosen": 0.6244522333145142, "log_odds_ratio": -0.5308617353439331, "logits/chosen": -0.9591403007507324, "logits/rejected": -1.0783767700195312, "logps/chosen": -0.788983941078186, "logps/rejected": -1.2170677185058594, "loss": 1.0292, "nll_loss": 1.1986736059188843, "rewards/accuracies": 0.625, "rewards/chosen": -0.07889840006828308, "rewards/margins": 0.04280838370323181, "rewards/rejected": -0.12170678377151489, "step": 7183 }, { "epoch": 4.382491993289614, "grad_norm": 1.507428765296936, "learning_rate": 9.905695039804041e-07, "log_odds_chosen": 2.2037909030914307, "log_odds_ratio": -0.5052284598350525, "logits/chosen": -0.8161263465881348, "logits/rejected": -0.8411509990692139, "logps/chosen": -0.7004523873329163, "logps/rejected": -2.6141889095306396, "loss": 0.9231, "nll_loss": 0.8393005132675171, "rewards/accuracies": 0.75, "rewards/chosen": -0.07004524022340775, "rewards/margins": 0.19137367606163025, "rewards/rejected": -0.2614189386367798, "step": 7184 }, { "epoch": 4.383102028366631, "grad_norm": 4.457408905029297, "learning_rate": 9.895897121861604e-07, "log_odds_chosen": 1.1848564147949219, "log_odds_ratio": -0.5290032625198364, "logits/chosen": -0.9455095529556274, "logits/rejected": -1.0129199028015137, "logps/chosen": -0.8846734762191772, "logps/rejected": -1.8262962102890015, "loss": 1.0301, "nll_loss": 0.9607881903648376, "rewards/accuracies": 0.625, "rewards/chosen": -0.08846735209226608, "rewards/margins": 0.09416226297616959, "rewards/rejected": -0.18262961506843567, "step": 7185 }, { "epoch": 4.383712063443648, "grad_norm": 1.8238656520843506, "learning_rate": 9.886099203919168e-07, "log_odds_chosen": 0.5573899745941162, "log_odds_ratio": -0.6940670013427734, "logits/chosen": -1.1819095611572266, "logits/rejected": -0.9709641337394714, "logps/chosen": -1.0593878030776978, "logps/rejected": -1.5422190427780151, "loss": 1.038, "nll_loss": 1.152129888534546, "rewards/accuracies": 0.25, "rewards/chosen": -0.10593877732753754, "rewards/margins": 0.04828312247991562, "rewards/rejected": -0.15422189235687256, "step": 7186 }, { "epoch": 4.384322098520665, "grad_norm": 2.754714250564575, "learning_rate": 9.87630128597673e-07, "log_odds_chosen": 6.107072353363037, "log_odds_ratio": -0.19492417573928833, "logits/chosen": -0.8189772367477417, "logits/rejected": -1.1872459650039673, "logps/chosen": -0.6248476505279541, "logps/rejected": -5.918895244598389, "loss": 0.9573, "nll_loss": 0.7767808437347412, "rewards/accuracies": 0.875, "rewards/chosen": -0.06248475983738899, "rewards/margins": 0.5294047594070435, "rewards/rejected": -0.5918895602226257, "step": 7187 }, { "epoch": 4.384932133597681, "grad_norm": 2.731049060821533, "learning_rate": 9.866503368034292e-07, "log_odds_chosen": 3.8075475692749023, "log_odds_ratio": -0.36900776624679565, "logits/chosen": -0.9205224514007568, "logits/rejected": -1.0417239665985107, "logps/chosen": -0.7786407470703125, "logps/rejected": -4.232394218444824, "loss": 1.0524, "nll_loss": 1.0232971906661987, "rewards/accuracies": 0.875, "rewards/chosen": -0.07786408066749573, "rewards/margins": 0.34537532925605774, "rewards/rejected": -0.42323940992355347, "step": 7188 }, { "epoch": 4.385542168674699, "grad_norm": 3.1013197898864746, "learning_rate": 9.856705450091854e-07, "log_odds_chosen": 3.744330644607544, "log_odds_ratio": -0.18710841238498688, "logits/chosen": -0.9968997836112976, "logits/rejected": -1.2433407306671143, "logps/chosen": -0.7486653327941895, "logps/rejected": -3.796884775161743, "loss": 1.0197, "nll_loss": 0.9159648418426514, "rewards/accuracies": 1.0, "rewards/chosen": -0.07486653327941895, "rewards/margins": 0.3048219680786133, "rewards/rejected": -0.3796885013580322, "step": 7189 }, { "epoch": 4.386152203751716, "grad_norm": 1.5502287149429321, "learning_rate": 9.846907532149417e-07, "log_odds_chosen": 2.2918646335601807, "log_odds_ratio": -0.4736844301223755, "logits/chosen": -0.8951606750488281, "logits/rejected": -1.0365010499954224, "logps/chosen": -0.750336229801178, "logps/rejected": -2.6989023685455322, "loss": 0.9361, "nll_loss": 1.007423996925354, "rewards/accuracies": 0.625, "rewards/chosen": -0.07503362745046616, "rewards/margins": 0.19485662877559662, "rewards/rejected": -0.26989027857780457, "step": 7190 }, { "epoch": 4.386762238828733, "grad_norm": 3.1627357006073, "learning_rate": 9.83710961420698e-07, "log_odds_chosen": 2.9890875816345215, "log_odds_ratio": -0.5231991410255432, "logits/chosen": -1.0158276557922363, "logits/rejected": -1.020106554031372, "logps/chosen": -0.7335366606712341, "logps/rejected": -2.918105125427246, "loss": 1.1856, "nll_loss": 1.1074830293655396, "rewards/accuracies": 0.625, "rewards/chosen": -0.07335367053747177, "rewards/margins": 0.21845684945583344, "rewards/rejected": -0.2918105125427246, "step": 7191 }, { "epoch": 4.38737227390575, "grad_norm": 16.61761474609375, "learning_rate": 9.827311696264544e-07, "log_odds_chosen": 2.407965898513794, "log_odds_ratio": -0.30774685740470886, "logits/chosen": -0.9364670515060425, "logits/rejected": -0.9746833443641663, "logps/chosen": -0.6858066320419312, "logps/rejected": -2.5353519916534424, "loss": 1.0788, "nll_loss": 0.8802905082702637, "rewards/accuracies": 0.875, "rewards/chosen": -0.06858067214488983, "rewards/margins": 0.18495453894138336, "rewards/rejected": -0.2535352110862732, "step": 7192 }, { "epoch": 4.3879823089827665, "grad_norm": 8.480600357055664, "learning_rate": 9.817513778322107e-07, "log_odds_chosen": 0.9843290448188782, "log_odds_ratio": -0.699561357498169, "logits/chosen": -0.8939186930656433, "logits/rejected": -0.927003800868988, "logps/chosen": -0.995696485042572, "logps/rejected": -1.9509443044662476, "loss": 1.1714, "nll_loss": 1.2801287174224854, "rewards/accuracies": 0.625, "rewards/chosen": -0.0995696485042572, "rewards/margins": 0.09552478045225143, "rewards/rejected": -0.19509443640708923, "step": 7193 }, { "epoch": 4.388592344059783, "grad_norm": 4.118434906005859, "learning_rate": 9.807715860379668e-07, "log_odds_chosen": 2.00437331199646, "log_odds_ratio": -0.4969187080860138, "logits/chosen": -0.9313591718673706, "logits/rejected": -0.9376605749130249, "logps/chosen": -0.8242378234863281, "logps/rejected": -2.44867205619812, "loss": 0.967, "nll_loss": 0.9783672094345093, "rewards/accuracies": 0.625, "rewards/chosen": -0.08242377638816833, "rewards/margins": 0.1624433994293213, "rewards/rejected": -0.244867205619812, "step": 7194 }, { "epoch": 4.3892023791368, "grad_norm": 2.3801190853118896, "learning_rate": 9.797917942437232e-07, "log_odds_chosen": 1.228123664855957, "log_odds_ratio": -0.4707932770252228, "logits/chosen": -0.9345579147338867, "logits/rejected": -0.9440110921859741, "logps/chosen": -0.854828953742981, "logps/rejected": -1.7475699186325073, "loss": 0.968, "nll_loss": 0.9977692365646362, "rewards/accuracies": 0.875, "rewards/chosen": -0.0854828953742981, "rewards/margins": 0.08927410840988159, "rewards/rejected": -0.1747569888830185, "step": 7195 }, { "epoch": 4.389812414213817, "grad_norm": 3.1856274604797363, "learning_rate": 9.788120024494795e-07, "log_odds_chosen": 3.8990163803100586, "log_odds_ratio": -0.1327812373638153, "logits/chosen": -0.5610600709915161, "logits/rejected": -0.8641700744628906, "logps/chosen": -0.5278788805007935, "logps/rejected": -3.581761598587036, "loss": 0.8505, "nll_loss": 0.6394047737121582, "rewards/accuracies": 1.0, "rewards/chosen": -0.052787892520427704, "rewards/margins": 0.30538830161094666, "rewards/rejected": -0.3581761419773102, "step": 7196 }, { "epoch": 4.390422449290834, "grad_norm": 2.3896644115448, "learning_rate": 9.778322106552356e-07, "log_odds_chosen": 2.8587238788604736, "log_odds_ratio": -0.530219554901123, "logits/chosen": -0.8704884648323059, "logits/rejected": -0.950777530670166, "logps/chosen": -0.7511628866195679, "logps/rejected": -3.103022813796997, "loss": 1.0241, "nll_loss": 0.9324849247932434, "rewards/accuracies": 0.625, "rewards/chosen": -0.07511628419160843, "rewards/margins": 0.23518598079681396, "rewards/rejected": -0.3103022873401642, "step": 7197 }, { "epoch": 4.391032484367851, "grad_norm": 12.58047866821289, "learning_rate": 9.76852418860992e-07, "log_odds_chosen": 2.0292558670043945, "log_odds_ratio": -0.29616183042526245, "logits/chosen": -0.7470213174819946, "logits/rejected": -0.7871596813201904, "logps/chosen": -0.5298148393630981, "logps/rejected": -1.798316240310669, "loss": 0.9874, "nll_loss": 0.7325725555419922, "rewards/accuracies": 0.75, "rewards/chosen": -0.052981484681367874, "rewards/margins": 0.12685014307498932, "rewards/rejected": -0.1798316240310669, "step": 7198 }, { "epoch": 4.3916425194448685, "grad_norm": 1.6964062452316284, "learning_rate": 9.758726270667483e-07, "log_odds_chosen": 3.394791841506958, "log_odds_ratio": -0.43190354108810425, "logits/chosen": -0.884651243686676, "logits/rejected": -0.8221983909606934, "logps/chosen": -0.7838127613067627, "logps/rejected": -3.6989197731018066, "loss": 1.1794, "nll_loss": 1.0728787183761597, "rewards/accuracies": 0.625, "rewards/chosen": -0.07838128507137299, "rewards/margins": 0.2915107011795044, "rewards/rejected": -0.3698919713497162, "step": 7199 }, { "epoch": 4.392252554521885, "grad_norm": 2.533982992172241, "learning_rate": 9.748928352725046e-07, "log_odds_chosen": 1.4773467779159546, "log_odds_ratio": -0.3546278178691864, "logits/chosen": -1.0383495092391968, "logits/rejected": -1.1231416463851929, "logps/chosen": -0.9631116986274719, "logps/rejected": -1.9553755521774292, "loss": 1.2209, "nll_loss": 1.216172456741333, "rewards/accuracies": 1.0, "rewards/chosen": -0.09631117433309555, "rewards/margins": 0.09922638535499573, "rewards/rejected": -0.19553756713867188, "step": 7200 }, { "epoch": 4.392862589598902, "grad_norm": 1.5383754968643188, "learning_rate": 9.73913043478261e-07, "log_odds_chosen": 0.32633650302886963, "log_odds_ratio": -0.6085698008537292, "logits/chosen": -1.044858455657959, "logits/rejected": -0.995400071144104, "logps/chosen": -1.0050636529922485, "logps/rejected": -1.2013397216796875, "loss": 1.097, "nll_loss": 0.9834309220314026, "rewards/accuracies": 0.375, "rewards/chosen": -0.10050636529922485, "rewards/margins": 0.019627608358860016, "rewards/rejected": -0.12013397365808487, "step": 7201 }, { "epoch": 4.393472624675919, "grad_norm": 1.6770126819610596, "learning_rate": 9.72933251684017e-07, "log_odds_chosen": 1.7178564071655273, "log_odds_ratio": -0.41803136467933655, "logits/chosen": -0.9624631404876709, "logits/rejected": -0.9887913465499878, "logps/chosen": -0.876681923866272, "logps/rejected": -2.2607457637786865, "loss": 1.0391, "nll_loss": 1.080075740814209, "rewards/accuracies": 0.75, "rewards/chosen": -0.08766818791627884, "rewards/margins": 0.1384063959121704, "rewards/rejected": -0.22607457637786865, "step": 7202 }, { "epoch": 4.394082659752936, "grad_norm": 1.4791791439056396, "learning_rate": 9.719534598897734e-07, "log_odds_chosen": 1.4288091659545898, "log_odds_ratio": -0.3902532458305359, "logits/chosen": -0.7091178297996521, "logits/rejected": -0.8209463953971863, "logps/chosen": -0.5496729016304016, "logps/rejected": -1.4386341571807861, "loss": 0.8244, "nll_loss": 0.7984541058540344, "rewards/accuracies": 0.75, "rewards/chosen": -0.05496729165315628, "rewards/margins": 0.08889612555503845, "rewards/rejected": -0.14386342465877533, "step": 7203 }, { "epoch": 4.394692694829953, "grad_norm": 1.3655067682266235, "learning_rate": 9.709736680955295e-07, "log_odds_chosen": 0.8715366721153259, "log_odds_ratio": -0.5234268307685852, "logits/chosen": -0.8270630240440369, "logits/rejected": -0.9781375527381897, "logps/chosen": -0.8458921313285828, "logps/rejected": -1.446056842803955, "loss": 1.0341, "nll_loss": 0.991798996925354, "rewards/accuracies": 0.75, "rewards/chosen": -0.08458921313285828, "rewards/margins": 0.0600164532661438, "rewards/rejected": -0.14460566639900208, "step": 7204 }, { "epoch": 4.39530272990697, "grad_norm": 2.621018171310425, "learning_rate": 9.699938763012859e-07, "log_odds_chosen": 1.1419552564620972, "log_odds_ratio": -0.5733320713043213, "logits/chosen": -0.8449996709823608, "logits/rejected": -0.797301709651947, "logps/chosen": -0.7925258874893188, "logps/rejected": -1.5685349702835083, "loss": 1.1781, "nll_loss": 0.9867833852767944, "rewards/accuracies": 0.625, "rewards/chosen": -0.07925258576869965, "rewards/margins": 0.07760091125965118, "rewards/rejected": -0.15685349702835083, "step": 7205 }, { "epoch": 4.395912764983986, "grad_norm": 1.3315294981002808, "learning_rate": 9.690140845070422e-07, "log_odds_chosen": 0.8511888384819031, "log_odds_ratio": -0.6825343370437622, "logits/chosen": -0.9377026557922363, "logits/rejected": -0.9218430519104004, "logps/chosen": -1.0373592376708984, "logps/rejected": -1.6978764533996582, "loss": 1.1308, "nll_loss": 1.2722238302230835, "rewards/accuracies": 0.625, "rewards/chosen": -0.10373592376708984, "rewards/margins": 0.06605172157287598, "rewards/rejected": -0.16978764533996582, "step": 7206 }, { "epoch": 4.396522800061003, "grad_norm": 1.458356261253357, "learning_rate": 9.680342927127985e-07, "log_odds_chosen": 1.756510615348816, "log_odds_ratio": -0.32320529222488403, "logits/chosen": -0.9728966355323792, "logits/rejected": -0.8052002191543579, "logps/chosen": -0.8166341781616211, "logps/rejected": -2.1571788787841797, "loss": 0.9249, "nll_loss": 0.9573167562484741, "rewards/accuracies": 0.75, "rewards/chosen": -0.08166341483592987, "rewards/margins": 0.13405448198318481, "rewards/rejected": -0.21571789681911469, "step": 7207 }, { "epoch": 4.39713283513802, "grad_norm": 1.5262593030929565, "learning_rate": 9.670545009185549e-07, "log_odds_chosen": 1.4314420223236084, "log_odds_ratio": -0.3883962035179138, "logits/chosen": -0.7274455428123474, "logits/rejected": -0.8809710741043091, "logps/chosen": -0.7387849688529968, "logps/rejected": -1.7241222858428955, "loss": 0.8847, "nll_loss": 0.7575491070747375, "rewards/accuracies": 0.875, "rewards/chosen": -0.07387850433588028, "rewards/margins": 0.0985337346792221, "rewards/rejected": -0.17241224646568298, "step": 7208 }, { "epoch": 4.397742870215037, "grad_norm": 1.7966228723526, "learning_rate": 9.66074709124311e-07, "log_odds_chosen": 0.8386061787605286, "log_odds_ratio": -0.5446579456329346, "logits/chosen": -0.8329957723617554, "logits/rejected": -0.8995652198791504, "logps/chosen": -0.8905868530273438, "logps/rejected": -1.5264697074890137, "loss": 1.066, "nll_loss": 1.0309767723083496, "rewards/accuracies": 0.625, "rewards/chosen": -0.08905868232250214, "rewards/margins": 0.06358827650547028, "rewards/rejected": -0.1526469588279724, "step": 7209 }, { "epoch": 4.398352905292055, "grad_norm": 2.0798072814941406, "learning_rate": 9.650949173300673e-07, "log_odds_chosen": 1.9139480590820312, "log_odds_ratio": -0.32970041036605835, "logits/chosen": -1.08026123046875, "logits/rejected": -1.0961575508117676, "logps/chosen": -0.7880473732948303, "logps/rejected": -2.2203845977783203, "loss": 1.0412, "nll_loss": 1.0434956550598145, "rewards/accuracies": 0.875, "rewards/chosen": -0.07880473881959915, "rewards/margins": 0.14323373138904572, "rewards/rejected": -0.22203847765922546, "step": 7210 }, { "epoch": 4.3989629403690715, "grad_norm": 1.118666410446167, "learning_rate": 9.641151255358237e-07, "log_odds_chosen": 2.8744242191314697, "log_odds_ratio": -0.3452727496623993, "logits/chosen": -0.9935981631278992, "logits/rejected": -0.9688770174980164, "logps/chosen": -0.7726026177406311, "logps/rejected": -3.1269123554229736, "loss": 0.9231, "nll_loss": 0.998748242855072, "rewards/accuracies": 0.75, "rewards/chosen": -0.07726026326417923, "rewards/margins": 0.23543095588684082, "rewards/rejected": -0.31269124150276184, "step": 7211 }, { "epoch": 4.399572975446088, "grad_norm": 1.585119366645813, "learning_rate": 9.631353337415798e-07, "log_odds_chosen": 0.1376991868019104, "log_odds_ratio": -0.6699649095535278, "logits/chosen": -0.8539683818817139, "logits/rejected": -0.8634762763977051, "logps/chosen": -0.9532197713851929, "logps/rejected": -1.0771404504776, "loss": 1.019, "nll_loss": 1.0021387338638306, "rewards/accuracies": 0.375, "rewards/chosen": -0.09532198309898376, "rewards/margins": 0.012392068281769753, "rewards/rejected": -0.10771404206752777, "step": 7212 }, { "epoch": 4.400183010523105, "grad_norm": 2.1254029273986816, "learning_rate": 9.621555419473361e-07, "log_odds_chosen": 1.7323228120803833, "log_odds_ratio": -0.43001240491867065, "logits/chosen": -0.8065846562385559, "logits/rejected": -0.9118211269378662, "logps/chosen": -0.6414958238601685, "logps/rejected": -1.924664855003357, "loss": 1.1982, "nll_loss": 0.9846962690353394, "rewards/accuracies": 0.75, "rewards/chosen": -0.06414958834648132, "rewards/margins": 0.12831690907478333, "rewards/rejected": -0.19246648252010345, "step": 7213 }, { "epoch": 4.400793045600122, "grad_norm": 1.599825143814087, "learning_rate": 9.611757501530924e-07, "log_odds_chosen": 2.3559718132019043, "log_odds_ratio": -0.39888402819633484, "logits/chosen": -0.6832284927368164, "logits/rejected": -0.9657466411590576, "logps/chosen": -0.9708753228187561, "logps/rejected": -3.0774524211883545, "loss": 1.062, "nll_loss": 1.1017894744873047, "rewards/accuracies": 0.75, "rewards/chosen": -0.09708753228187561, "rewards/margins": 0.21065771579742432, "rewards/rejected": -0.3077452480792999, "step": 7214 }, { "epoch": 4.401403080677139, "grad_norm": 1.4860124588012695, "learning_rate": 9.601959583588488e-07, "log_odds_chosen": 3.4035093784332275, "log_odds_ratio": -0.35233545303344727, "logits/chosen": -0.8959448337554932, "logits/rejected": -0.9478527307510376, "logps/chosen": -0.7403830289840698, "logps/rejected": -3.6398696899414062, "loss": 0.949, "nll_loss": 0.9263747930526733, "rewards/accuracies": 0.875, "rewards/chosen": -0.0740383043885231, "rewards/margins": 0.2899487018585205, "rewards/rejected": -0.363986998796463, "step": 7215 }, { "epoch": 4.402013115754156, "grad_norm": 2.4646875858306885, "learning_rate": 9.592161665646051e-07, "log_odds_chosen": 2.8100738525390625, "log_odds_ratio": -0.3918931186199188, "logits/chosen": -1.0391409397125244, "logits/rejected": -1.1845817565917969, "logps/chosen": -0.8528496026992798, "logps/rejected": -3.270183563232422, "loss": 1.0803, "nll_loss": 0.9989066123962402, "rewards/accuracies": 0.75, "rewards/chosen": -0.08528496325016022, "rewards/margins": 0.2417334020137787, "rewards/rejected": -0.3270183503627777, "step": 7216 }, { "epoch": 4.402623150831173, "grad_norm": 2.103580951690674, "learning_rate": 9.582363747703612e-07, "log_odds_chosen": 1.3584082126617432, "log_odds_ratio": -0.42340004444122314, "logits/chosen": -0.9344921112060547, "logits/rejected": -1.0708470344543457, "logps/chosen": -0.7717004418373108, "logps/rejected": -1.4538602828979492, "loss": 1.114, "nll_loss": 1.248311996459961, "rewards/accuracies": 0.875, "rewards/chosen": -0.07717004418373108, "rewards/margins": 0.0682159960269928, "rewards/rejected": -0.14538602530956268, "step": 7217 }, { "epoch": 4.4032331859081895, "grad_norm": 1.7534769773483276, "learning_rate": 9.572565829761176e-07, "log_odds_chosen": 2.16186785697937, "log_odds_ratio": -0.36304694414138794, "logits/chosen": -0.8255579471588135, "logits/rejected": -0.7838668823242188, "logps/chosen": -0.7705411314964294, "logps/rejected": -2.4361050128936768, "loss": 1.0358, "nll_loss": 0.9430890083312988, "rewards/accuracies": 0.75, "rewards/chosen": -0.07705411314964294, "rewards/margins": 0.16655638813972473, "rewards/rejected": -0.24361050128936768, "step": 7218 }, { "epoch": 4.403843220985206, "grad_norm": 1.8573718070983887, "learning_rate": 9.562767911818737e-07, "log_odds_chosen": 1.7088922262191772, "log_odds_ratio": -0.4412913918495178, "logits/chosen": -0.8360517621040344, "logits/rejected": -0.9483774900436401, "logps/chosen": -0.6198693513870239, "logps/rejected": -1.9447957277297974, "loss": 1.103, "nll_loss": 1.1261470317840576, "rewards/accuracies": 0.625, "rewards/chosen": -0.06198693811893463, "rewards/margins": 0.13249264657497406, "rewards/rejected": -0.1944795697927475, "step": 7219 }, { "epoch": 4.404453256062224, "grad_norm": 1.3551433086395264, "learning_rate": 9.5529699938763e-07, "log_odds_chosen": 1.4801173210144043, "log_odds_ratio": -0.37380796670913696, "logits/chosen": -1.1006360054016113, "logits/rejected": -1.0524232387542725, "logps/chosen": -0.8504979610443115, "logps/rejected": -1.848737359046936, "loss": 1.0314, "nll_loss": 1.075678825378418, "rewards/accuracies": 0.875, "rewards/chosen": -0.08504979312419891, "rewards/margins": 0.09982393682003021, "rewards/rejected": -0.18487372994422913, "step": 7220 }, { "epoch": 4.405063291139241, "grad_norm": 1.0337456464767456, "learning_rate": 9.543172075933864e-07, "log_odds_chosen": 3.939566135406494, "log_odds_ratio": -0.22532518208026886, "logits/chosen": -0.9159802198410034, "logits/rejected": -1.0912070274353027, "logps/chosen": -0.7122622728347778, "logps/rejected": -4.0701212882995605, "loss": 0.7812, "nll_loss": 0.8108152151107788, "rewards/accuracies": 0.875, "rewards/chosen": -0.07122622430324554, "rewards/margins": 0.3357859253883362, "rewards/rejected": -0.40701210498809814, "step": 7221 }, { "epoch": 4.405673326216258, "grad_norm": 1.5850460529327393, "learning_rate": 9.533374157991426e-07, "log_odds_chosen": 1.760457992553711, "log_odds_ratio": -0.49989765882492065, "logits/chosen": -0.9574607610702515, "logits/rejected": -0.9508702754974365, "logps/chosen": -0.8668559789657593, "logps/rejected": -2.431640625, "loss": 1.121, "nll_loss": 1.1731770038604736, "rewards/accuracies": 0.5, "rewards/chosen": -0.08668559789657593, "rewards/margins": 0.15647846460342407, "rewards/rejected": -0.2431640475988388, "step": 7222 }, { "epoch": 4.406283361293275, "grad_norm": 1.628940463066101, "learning_rate": 9.523576240048989e-07, "log_odds_chosen": 2.319270610809326, "log_odds_ratio": -0.39725643396377563, "logits/chosen": -0.8771519660949707, "logits/rejected": -0.796259880065918, "logps/chosen": -0.7380716800689697, "logps/rejected": -2.644502878189087, "loss": 0.9583, "nll_loss": 0.8869591951370239, "rewards/accuracies": 0.75, "rewards/chosen": -0.07380717247724533, "rewards/margins": 0.19064311683177948, "rewards/rejected": -0.2644502818584442, "step": 7223 }, { "epoch": 4.406893396370291, "grad_norm": 2.785785675048828, "learning_rate": 9.513778322106551e-07, "log_odds_chosen": 1.36190927028656, "log_odds_ratio": -0.5875855088233948, "logits/chosen": -0.8458123207092285, "logits/rejected": -1.1694968938827515, "logps/chosen": -1.2954035997390747, "logps/rejected": -2.3728532791137695, "loss": 1.2703, "nll_loss": 1.5435066223144531, "rewards/accuracies": 0.625, "rewards/chosen": -0.1295403689146042, "rewards/margins": 0.1077449843287468, "rewards/rejected": -0.23728534579277039, "step": 7224 }, { "epoch": 4.407503431447308, "grad_norm": 2.0929863452911377, "learning_rate": 9.503980404164115e-07, "log_odds_chosen": 2.5714616775512695, "log_odds_ratio": -0.5147608518600464, "logits/chosen": -0.8984917998313904, "logits/rejected": -1.0228532552719116, "logps/chosen": -0.7164449691772461, "logps/rejected": -2.949676036834717, "loss": 1.0506, "nll_loss": 1.064594030380249, "rewards/accuracies": 0.625, "rewards/chosen": -0.07164449989795685, "rewards/margins": 0.22332313656806946, "rewards/rejected": -0.2949676513671875, "step": 7225 }, { "epoch": 4.408113466524325, "grad_norm": 6.20482873916626, "learning_rate": 9.494182486221678e-07, "log_odds_chosen": 3.067365884780884, "log_odds_ratio": -0.28755271434783936, "logits/chosen": -0.9644354581832886, "logits/rejected": -0.8031395673751831, "logps/chosen": -0.6582456827163696, "logps/rejected": -3.152285575866699, "loss": 1.0001, "nll_loss": 0.9701525568962097, "rewards/accuracies": 0.875, "rewards/chosen": -0.06582457572221756, "rewards/margins": 0.24940399825572968, "rewards/rejected": -0.31522858142852783, "step": 7226 }, { "epoch": 4.408723501601342, "grad_norm": 1.839400291442871, "learning_rate": 9.48438456827924e-07, "log_odds_chosen": 2.4748308658599854, "log_odds_ratio": -0.3558028042316437, "logits/chosen": -0.9649742841720581, "logits/rejected": -1.0750665664672852, "logps/chosen": -0.7814189791679382, "logps/rejected": -2.772172451019287, "loss": 0.9299, "nll_loss": 0.9272427558898926, "rewards/accuracies": 0.875, "rewards/chosen": -0.07814189791679382, "rewards/margins": 0.19907531142234802, "rewards/rejected": -0.27721720933914185, "step": 7227 }, { "epoch": 4.409333536678359, "grad_norm": 2.0533738136291504, "learning_rate": 9.474586650336804e-07, "log_odds_chosen": 1.8097467422485352, "log_odds_ratio": -0.37834107875823975, "logits/chosen": -0.8366867899894714, "logits/rejected": -0.969050407409668, "logps/chosen": -0.777768075466156, "logps/rejected": -2.275942087173462, "loss": 0.9449, "nll_loss": 0.9142417311668396, "rewards/accuracies": 0.75, "rewards/chosen": -0.07777680456638336, "rewards/margins": 0.14981739223003387, "rewards/rejected": -0.22759419679641724, "step": 7228 }, { "epoch": 4.409943571755376, "grad_norm": 10.04179573059082, "learning_rate": 9.464788732394365e-07, "log_odds_chosen": 3.517171859741211, "log_odds_ratio": -0.20540842413902283, "logits/chosen": -0.8486608266830444, "logits/rejected": -1.1385809183120728, "logps/chosen": -0.6843833923339844, "logps/rejected": -3.4020020961761475, "loss": 0.9841, "nll_loss": 0.687865138053894, "rewards/accuracies": 0.875, "rewards/chosen": -0.0684383437037468, "rewards/margins": 0.2717618942260742, "rewards/rejected": -0.3402002155780792, "step": 7229 }, { "epoch": 4.4105536068323925, "grad_norm": 2.2817161083221436, "learning_rate": 9.454990814451928e-07, "log_odds_chosen": 0.5717213749885559, "log_odds_ratio": -0.7770936489105225, "logits/chosen": -1.13975191116333, "logits/rejected": -1.0317368507385254, "logps/chosen": -1.1308534145355225, "logps/rejected": -1.6624209880828857, "loss": 1.0618, "nll_loss": 1.2865269184112549, "rewards/accuracies": 0.375, "rewards/chosen": -0.11308535188436508, "rewards/margins": 0.05315675213932991, "rewards/rejected": -0.1662421077489853, "step": 7230 }, { "epoch": 4.411163641909409, "grad_norm": 1.7805732488632202, "learning_rate": 9.445192896509492e-07, "log_odds_chosen": 1.8715660572052002, "log_odds_ratio": -0.4455499053001404, "logits/chosen": -0.8765961527824402, "logits/rejected": -1.0294203758239746, "logps/chosen": -0.7647826671600342, "logps/rejected": -2.267688274383545, "loss": 0.9569, "nll_loss": 0.8728227615356445, "rewards/accuracies": 0.75, "rewards/chosen": -0.0764782726764679, "rewards/margins": 0.1502905786037445, "rewards/rejected": -0.22676882147789001, "step": 7231 }, { "epoch": 4.411773676986427, "grad_norm": 2.041463851928711, "learning_rate": 9.435394978567054e-07, "log_odds_chosen": 1.8437135219573975, "log_odds_ratio": -0.3128626346588135, "logits/chosen": -0.8996529579162598, "logits/rejected": -1.1223669052124023, "logps/chosen": -0.7833147644996643, "logps/rejected": -2.176736354827881, "loss": 1.1054, "nll_loss": 1.2699737548828125, "rewards/accuracies": 1.0, "rewards/chosen": -0.07833147794008255, "rewards/margins": 0.13934214413166046, "rewards/rejected": -0.21767361462116241, "step": 7232 }, { "epoch": 4.412383712063444, "grad_norm": 1.556634783744812, "learning_rate": 9.425597060624617e-07, "log_odds_chosen": 3.836578607559204, "log_odds_ratio": -0.23951880633831024, "logits/chosen": -0.7981987595558167, "logits/rejected": -1.085619568824768, "logps/chosen": -0.5843020081520081, "logps/rejected": -3.651581287384033, "loss": 0.9083, "nll_loss": 0.7330697774887085, "rewards/accuracies": 0.875, "rewards/chosen": -0.058430202305316925, "rewards/margins": 0.30672791600227356, "rewards/rejected": -0.3651581406593323, "step": 7233 }, { "epoch": 4.412993747140461, "grad_norm": 3.4823453426361084, "learning_rate": 9.41579914268218e-07, "log_odds_chosen": 2.5515713691711426, "log_odds_ratio": -0.3171542286872864, "logits/chosen": -0.8444631099700928, "logits/rejected": -0.8060818314552307, "logps/chosen": -0.7848894596099854, "logps/rejected": -2.898134469985962, "loss": 1.036, "nll_loss": 0.9608996510505676, "rewards/accuracies": 0.75, "rewards/chosen": -0.07848893851041794, "rewards/margins": 0.21132449805736542, "rewards/rejected": -0.28981342911720276, "step": 7234 }, { "epoch": 4.413603782217478, "grad_norm": 3.7182533740997314, "learning_rate": 9.406001224739743e-07, "log_odds_chosen": 3.6432981491088867, "log_odds_ratio": -0.32107508182525635, "logits/chosen": -0.9226880073547363, "logits/rejected": -1.0289967060089111, "logps/chosen": -0.5726844072341919, "logps/rejected": -3.527757167816162, "loss": 0.9525, "nll_loss": 0.8222915530204773, "rewards/accuracies": 1.0, "rewards/chosen": -0.05726844444870949, "rewards/margins": 0.2955072820186615, "rewards/rejected": -0.3527757227420807, "step": 7235 }, { "epoch": 4.4142138172944945, "grad_norm": 2.9039113521575928, "learning_rate": 9.396203306797305e-07, "log_odds_chosen": 0.6895067691802979, "log_odds_ratio": -0.5209859013557434, "logits/chosen": -0.9744073152542114, "logits/rejected": -0.9782097339630127, "logps/chosen": -0.7099756002426147, "logps/rejected": -1.1368451118469238, "loss": 1.0691, "nll_loss": 0.9480559825897217, "rewards/accuracies": 0.625, "rewards/chosen": -0.07099756598472595, "rewards/margins": 0.04268694296479225, "rewards/rejected": -0.1136845052242279, "step": 7236 }, { "epoch": 4.414823852371511, "grad_norm": 2.2002110481262207, "learning_rate": 9.386405388854867e-07, "log_odds_chosen": 1.6043899059295654, "log_odds_ratio": -0.4376887083053589, "logits/chosen": -0.9631593227386475, "logits/rejected": -1.1188035011291504, "logps/chosen": -0.8723152875900269, "logps/rejected": -2.205673933029175, "loss": 1.1285, "nll_loss": 1.362016201019287, "rewards/accuracies": 0.75, "rewards/chosen": -0.08723153173923492, "rewards/margins": 0.1333358734846115, "rewards/rejected": -0.22056740522384644, "step": 7237 }, { "epoch": 4.415433887448528, "grad_norm": 3.7343268394470215, "learning_rate": 9.376607470912431e-07, "log_odds_chosen": 1.4724712371826172, "log_odds_ratio": -0.46946340799331665, "logits/chosen": -0.8150290250778198, "logits/rejected": -0.8512420654296875, "logps/chosen": -0.7259175777435303, "logps/rejected": -1.7287182807922363, "loss": 0.8928, "nll_loss": 0.8660533428192139, "rewards/accuracies": 0.75, "rewards/chosen": -0.07259175181388855, "rewards/margins": 0.10028006881475449, "rewards/rejected": -0.17287182807922363, "step": 7238 }, { "epoch": 4.416043922525545, "grad_norm": 1.200178861618042, "learning_rate": 9.366809552969993e-07, "log_odds_chosen": 3.4194188117980957, "log_odds_ratio": -0.1411057859659195, "logits/chosen": -0.7461443543434143, "logits/rejected": -0.8780316114425659, "logps/chosen": -0.7368327975273132, "logps/rejected": -3.0077121257781982, "loss": 1.0127, "nll_loss": 0.9608375430107117, "rewards/accuracies": 1.0, "rewards/chosen": -0.07368328422307968, "rewards/margins": 0.22708794474601746, "rewards/rejected": -0.30077123641967773, "step": 7239 }, { "epoch": 4.416653957602562, "grad_norm": 1.7022958993911743, "learning_rate": 9.357011635027556e-07, "log_odds_chosen": 2.792207956314087, "log_odds_ratio": -0.32641172409057617, "logits/chosen": -0.8916738033294678, "logits/rejected": -1.2307020425796509, "logps/chosen": -0.6348172426223755, "logps/rejected": -2.738231658935547, "loss": 1.0244, "nll_loss": 1.1581858396530151, "rewards/accuracies": 0.75, "rewards/chosen": -0.06348172575235367, "rewards/margins": 0.2103414535522461, "rewards/rejected": -0.27382320165634155, "step": 7240 }, { "epoch": 4.417263992679579, "grad_norm": 1.4396684169769287, "learning_rate": 9.34721371708512e-07, "log_odds_chosen": 1.337355613708496, "log_odds_ratio": -0.4006650447845459, "logits/chosen": -0.9048635959625244, "logits/rejected": -0.9817463159561157, "logps/chosen": -1.0236364603042603, "logps/rejected": -2.027723789215088, "loss": 0.9469, "nll_loss": 1.1198859214782715, "rewards/accuracies": 0.75, "rewards/chosen": -0.10236364603042603, "rewards/margins": 0.10040874034166336, "rewards/rejected": -0.20277239382266998, "step": 7241 }, { "epoch": 4.417874027756596, "grad_norm": 4.501682281494141, "learning_rate": 9.337415799142682e-07, "log_odds_chosen": 2.1203465461730957, "log_odds_ratio": -0.5494170188903809, "logits/chosen": -0.7791821956634521, "logits/rejected": -0.9182852506637573, "logps/chosen": -0.7524906396865845, "logps/rejected": -2.3015427589416504, "loss": 1.0268, "nll_loss": 1.0817456245422363, "rewards/accuracies": 0.375, "rewards/chosen": -0.07524906098842621, "rewards/margins": 0.15490520000457764, "rewards/rejected": -0.23015427589416504, "step": 7242 }, { "epoch": 4.418484062833613, "grad_norm": 1.8676785230636597, "learning_rate": 9.327617881200244e-07, "log_odds_chosen": 2.7971434593200684, "log_odds_ratio": -0.3111400604248047, "logits/chosen": -0.7950494289398193, "logits/rejected": -0.9785560965538025, "logps/chosen": -0.5669313669204712, "logps/rejected": -2.6766510009765625, "loss": 0.8649, "nll_loss": 0.6896317005157471, "rewards/accuracies": 0.75, "rewards/chosen": -0.05669313669204712, "rewards/margins": 0.21097198128700256, "rewards/rejected": -0.2676650881767273, "step": 7243 }, { "epoch": 4.41909409791063, "grad_norm": 2.620575428009033, "learning_rate": 9.317819963257807e-07, "log_odds_chosen": 1.0570569038391113, "log_odds_ratio": -0.635758638381958, "logits/chosen": -0.7122415900230408, "logits/rejected": -1.0491818189620972, "logps/chosen": -0.989466667175293, "logps/rejected": -1.7242236137390137, "loss": 1.0483, "nll_loss": 1.191589593887329, "rewards/accuracies": 0.5, "rewards/chosen": -0.09894667565822601, "rewards/margins": 0.07347569614648819, "rewards/rejected": -0.1724223792552948, "step": 7244 }, { "epoch": 4.419704132987647, "grad_norm": 5.97547721862793, "learning_rate": 9.30802204531537e-07, "log_odds_chosen": 2.264375686645508, "log_odds_ratio": -0.4216687083244324, "logits/chosen": -0.9120118618011475, "logits/rejected": -1.012691617012024, "logps/chosen": -0.6876229047775269, "logps/rejected": -2.603653907775879, "loss": 1.0684, "nll_loss": 0.9988807439804077, "rewards/accuracies": 0.625, "rewards/chosen": -0.06876229494810104, "rewards/margins": 0.19160312414169312, "rewards/rejected": -0.26036542654037476, "step": 7245 }, { "epoch": 4.420314168064664, "grad_norm": 2.2549691200256348, "learning_rate": 9.298224127372933e-07, "log_odds_chosen": 0.6183216571807861, "log_odds_ratio": -0.5723404288291931, "logits/chosen": -0.8904932737350464, "logits/rejected": -0.8630658984184265, "logps/chosen": -0.978827714920044, "logps/rejected": -1.452697515487671, "loss": 1.1967, "nll_loss": 1.1652779579162598, "rewards/accuracies": 0.5, "rewards/chosen": -0.09788277000188828, "rewards/margins": 0.04738697409629822, "rewards/rejected": -0.1452697515487671, "step": 7246 }, { "epoch": 4.420924203141681, "grad_norm": 1.568679928779602, "learning_rate": 9.288426209430496e-07, "log_odds_chosen": 3.6996665000915527, "log_odds_ratio": -0.5930657386779785, "logits/chosen": -0.8141756653785706, "logits/rejected": -0.936322808265686, "logps/chosen": -0.7637622952461243, "logps/rejected": -4.123242378234863, "loss": 0.9641, "nll_loss": 0.9680783152580261, "rewards/accuracies": 0.5, "rewards/chosen": -0.07637622952461243, "rewards/margins": 0.33594799041748047, "rewards/rejected": -0.4123242497444153, "step": 7247 }, { "epoch": 4.4215342382186975, "grad_norm": 1.670764684677124, "learning_rate": 9.278628291488059e-07, "log_odds_chosen": 3.8330836296081543, "log_odds_ratio": -0.39483749866485596, "logits/chosen": -0.7651784420013428, "logits/rejected": -1.2068253755569458, "logps/chosen": -0.7309942841529846, "logps/rejected": -4.030956745147705, "loss": 1.1866, "nll_loss": 1.2307699918746948, "rewards/accuracies": 0.75, "rewards/chosen": -0.07309943437576294, "rewards/margins": 0.3299962282180786, "rewards/rejected": -0.40309566259384155, "step": 7248 }, { "epoch": 4.422144273295714, "grad_norm": 1.1612915992736816, "learning_rate": 9.268830373545621e-07, "log_odds_chosen": 2.379457473754883, "log_odds_ratio": -0.5882541537284851, "logits/chosen": -0.9419721961021423, "logits/rejected": -1.0263484716415405, "logps/chosen": -1.0165576934814453, "logps/rejected": -2.7646422386169434, "loss": 0.9879, "nll_loss": 1.0224592685699463, "rewards/accuracies": 0.75, "rewards/chosen": -0.10165577381849289, "rewards/margins": 0.17480844259262085, "rewards/rejected": -0.27646422386169434, "step": 7249 }, { "epoch": 4.422754308372731, "grad_norm": 3.654858112335205, "learning_rate": 9.259032455603185e-07, "log_odds_chosen": 2.813558578491211, "log_odds_ratio": -0.5315835475921631, "logits/chosen": -0.9435344934463501, "logits/rejected": -1.0632884502410889, "logps/chosen": -0.7657602429389954, "logps/rejected": -3.2316699028015137, "loss": 1.1471, "nll_loss": 0.9903841018676758, "rewards/accuracies": 0.5, "rewards/chosen": -0.07657603174448013, "rewards/margins": 0.24659094214439392, "rewards/rejected": -0.32316696643829346, "step": 7250 }, { "epoch": 4.423364343449748, "grad_norm": 2.208319664001465, "learning_rate": 9.249234537660746e-07, "log_odds_chosen": 1.8752418756484985, "log_odds_ratio": -0.35826972126960754, "logits/chosen": -0.814166784286499, "logits/rejected": -0.9762122631072998, "logps/chosen": -0.9087671637535095, "logps/rejected": -2.4259283542633057, "loss": 1.0428, "nll_loss": 1.0109385251998901, "rewards/accuracies": 0.875, "rewards/chosen": -0.09087672084569931, "rewards/margins": 0.15171611309051514, "rewards/rejected": -0.24259284138679504, "step": 7251 }, { "epoch": 4.423974378526765, "grad_norm": 3.1821391582489014, "learning_rate": 9.239436619718309e-07, "log_odds_chosen": 1.2559351921081543, "log_odds_ratio": -0.4453054964542389, "logits/chosen": -1.0005124807357788, "logits/rejected": -1.1128416061401367, "logps/chosen": -0.904948890209198, "logps/rejected": -1.9738922119140625, "loss": 1.0844, "nll_loss": 0.9532762765884399, "rewards/accuracies": 0.625, "rewards/chosen": -0.09049488604068756, "rewards/margins": 0.10689433664083481, "rewards/rejected": -0.19738921523094177, "step": 7252 }, { "epoch": 4.424584413603783, "grad_norm": 3.4670119285583496, "learning_rate": 9.229638701775872e-07, "log_odds_chosen": 3.4374985694885254, "log_odds_ratio": -0.26280656456947327, "logits/chosen": -0.7881784439086914, "logits/rejected": -0.9597844481468201, "logps/chosen": -0.5422545075416565, "logps/rejected": -3.286571741104126, "loss": 0.8616, "nll_loss": 0.8173211216926575, "rewards/accuracies": 0.875, "rewards/chosen": -0.05422544851899147, "rewards/margins": 0.2744317352771759, "rewards/rejected": -0.3286571800708771, "step": 7253 }, { "epoch": 4.4251944486807995, "grad_norm": 1.7320106029510498, "learning_rate": 9.219840783833435e-07, "log_odds_chosen": 2.2348499298095703, "log_odds_ratio": -0.2656431496143341, "logits/chosen": -0.8427753448486328, "logits/rejected": -0.8997005820274353, "logps/chosen": -0.5887018442153931, "logps/rejected": -2.0930233001708984, "loss": 1.1108, "nll_loss": 1.0590360164642334, "rewards/accuracies": 1.0, "rewards/chosen": -0.05887018144130707, "rewards/margins": 0.15043213963508606, "rewards/rejected": -0.20930232107639313, "step": 7254 }, { "epoch": 4.425804483757816, "grad_norm": 1.1116149425506592, "learning_rate": 9.210042865890998e-07, "log_odds_chosen": 2.2773847579956055, "log_odds_ratio": -0.5345463752746582, "logits/chosen": -1.0645105838775635, "logits/rejected": -1.1518456935882568, "logps/chosen": -0.8358912467956543, "logps/rejected": -2.8047680854797363, "loss": 0.9422, "nll_loss": 0.9211609363555908, "rewards/accuracies": 0.75, "rewards/chosen": -0.08358912914991379, "rewards/margins": 0.19688767194747925, "rewards/rejected": -0.28047680854797363, "step": 7255 }, { "epoch": 4.426414518834833, "grad_norm": 16.653636932373047, "learning_rate": 9.20024494794856e-07, "log_odds_chosen": 3.756072998046875, "log_odds_ratio": -0.2858034074306488, "logits/chosen": -0.9632238149642944, "logits/rejected": -1.2222414016723633, "logps/chosen": -0.8492609858512878, "logps/rejected": -3.894284963607788, "loss": 0.9979, "nll_loss": 0.947138786315918, "rewards/accuracies": 0.75, "rewards/chosen": -0.08492609858512878, "rewards/margins": 0.30450239777565, "rewards/rejected": -0.3894284963607788, "step": 7256 }, { "epoch": 4.42702455391185, "grad_norm": 3.8917653560638428, "learning_rate": 9.190447030006124e-07, "log_odds_chosen": 2.1514580249786377, "log_odds_ratio": -0.2879942059516907, "logits/chosen": -0.8824758529663086, "logits/rejected": -1.0287805795669556, "logps/chosen": -0.6178232431411743, "logps/rejected": -2.026782989501953, "loss": 0.8987, "nll_loss": 0.9476190805435181, "rewards/accuracies": 0.75, "rewards/chosen": -0.06178232282400131, "rewards/margins": 0.14089597761631012, "rewards/rejected": -0.20267829298973083, "step": 7257 }, { "epoch": 4.427634588988867, "grad_norm": 3.9924299716949463, "learning_rate": 9.180649112063686e-07, "log_odds_chosen": 2.0487778186798096, "log_odds_ratio": -0.47793421149253845, "logits/chosen": -0.9620089530944824, "logits/rejected": -1.1100667715072632, "logps/chosen": -0.8165326714515686, "logps/rejected": -2.5416665077209473, "loss": 1.0977, "nll_loss": 1.1203107833862305, "rewards/accuracies": 0.75, "rewards/chosen": -0.08165328204631805, "rewards/margins": 0.17251339554786682, "rewards/rejected": -0.2541666626930237, "step": 7258 }, { "epoch": 4.428244624065884, "grad_norm": 2.182724714279175, "learning_rate": 9.170851194121248e-07, "log_odds_chosen": 3.3035924434661865, "log_odds_ratio": -0.17745208740234375, "logits/chosen": -0.6649945974349976, "logits/rejected": -0.899829626083374, "logps/chosen": -0.5546712875366211, "logps/rejected": -3.15822696685791, "loss": 0.9951, "nll_loss": 0.7211083769798279, "rewards/accuracies": 1.0, "rewards/chosen": -0.05546712875366211, "rewards/margins": 0.2603555917739868, "rewards/rejected": -0.3158227205276489, "step": 7259 }, { "epoch": 4.4288546591429006, "grad_norm": 7.32633638381958, "learning_rate": 9.161053276178812e-07, "log_odds_chosen": 1.68362295627594, "log_odds_ratio": -0.4944004416465759, "logits/chosen": -0.932353138923645, "logits/rejected": -1.0565156936645508, "logps/chosen": -0.6497341990470886, "logps/rejected": -1.74611496925354, "loss": 1.103, "nll_loss": 1.0164746046066284, "rewards/accuracies": 0.75, "rewards/chosen": -0.06497342139482498, "rewards/margins": 0.10963808000087738, "rewards/rejected": -0.17461149394512177, "step": 7260 }, { "epoch": 4.429464694219917, "grad_norm": 13.405933380126953, "learning_rate": 9.151255358236374e-07, "log_odds_chosen": 1.198459267616272, "log_odds_ratio": -0.6202799081802368, "logits/chosen": -0.9026989340782166, "logits/rejected": -1.0838310718536377, "logps/chosen": -0.6787487268447876, "logps/rejected": -1.6053459644317627, "loss": 1.009, "nll_loss": 0.8440443277359009, "rewards/accuracies": 0.5, "rewards/chosen": -0.06787486374378204, "rewards/margins": 0.09265971928834915, "rewards/rejected": -0.1605345904827118, "step": 7261 }, { "epoch": 4.430074729296934, "grad_norm": 15.885169982910156, "learning_rate": 9.141457440293937e-07, "log_odds_chosen": 1.3344619274139404, "log_odds_ratio": -0.5034931898117065, "logits/chosen": -0.9987627863883972, "logits/rejected": -1.0589821338653564, "logps/chosen": -0.7069977521896362, "logps/rejected": -1.676271677017212, "loss": 1.033, "nll_loss": 1.0486042499542236, "rewards/accuracies": 0.75, "rewards/chosen": -0.0706997811794281, "rewards/margins": 0.09692739695310593, "rewards/rejected": -0.16762718558311462, "step": 7262 }, { "epoch": 4.430684764373951, "grad_norm": 1.9326320886611938, "learning_rate": 9.131659522351501e-07, "log_odds_chosen": -0.010250046849250793, "log_odds_ratio": -0.757865846157074, "logits/chosen": -0.9138193130493164, "logits/rejected": -0.8652896881103516, "logps/chosen": -0.9859483242034912, "logps/rejected": -0.9464786648750305, "loss": 1.1059, "nll_loss": 0.9884626865386963, "rewards/accuracies": 0.5, "rewards/chosen": -0.09859484434127808, "rewards/margins": -0.003946971148252487, "rewards/rejected": -0.0946478620171547, "step": 7263 }, { "epoch": 4.431294799450969, "grad_norm": 4.267952919006348, "learning_rate": 9.121861604409063e-07, "log_odds_chosen": 1.9363465309143066, "log_odds_ratio": -0.3047342002391815, "logits/chosen": -0.9077710509300232, "logits/rejected": -0.9003435373306274, "logps/chosen": -0.7437217831611633, "logps/rejected": -2.1431617736816406, "loss": 1.0757, "nll_loss": 0.865494966506958, "rewards/accuracies": 0.875, "rewards/chosen": -0.07437218725681305, "rewards/margins": 0.13994400203227997, "rewards/rejected": -0.21431618928909302, "step": 7264 }, { "epoch": 4.431904834527986, "grad_norm": 1.8464998006820679, "learning_rate": 9.112063686466625e-07, "log_odds_chosen": 2.1149401664733887, "log_odds_ratio": -0.543285071849823, "logits/chosen": -0.8242671489715576, "logits/rejected": -0.9864521026611328, "logps/chosen": -0.6907715797424316, "logps/rejected": -2.3572704792022705, "loss": 1.0942, "nll_loss": 0.884308934211731, "rewards/accuracies": 0.625, "rewards/chosen": -0.06907715648412704, "rewards/margins": 0.16664990782737732, "rewards/rejected": -0.23572707176208496, "step": 7265 }, { "epoch": 4.4325148696050025, "grad_norm": 1.5514516830444336, "learning_rate": 9.102265768524187e-07, "log_odds_chosen": 1.1268110275268555, "log_odds_ratio": -0.4978172481060028, "logits/chosen": -0.8141305446624756, "logits/rejected": -0.8579468727111816, "logps/chosen": -0.7785205841064453, "logps/rejected": -1.6246163845062256, "loss": 0.8478, "nll_loss": 0.9519921541213989, "rewards/accuracies": 0.75, "rewards/chosen": -0.07785205543041229, "rewards/margins": 0.08460958302021027, "rewards/rejected": -0.16246163845062256, "step": 7266 }, { "epoch": 4.433124904682019, "grad_norm": 6.418430328369141, "learning_rate": 9.092467850581751e-07, "log_odds_chosen": 0.23232579231262207, "log_odds_ratio": -0.6010450720787048, "logits/chosen": -1.0462255477905273, "logits/rejected": -0.9931445121765137, "logps/chosen": -0.7577185034751892, "logps/rejected": -0.892240047454834, "loss": 1.0365, "nll_loss": 0.9408040046691895, "rewards/accuracies": 0.625, "rewards/chosen": -0.07577185332775116, "rewards/margins": 0.013452151790261269, "rewards/rejected": -0.08922401070594788, "step": 7267 }, { "epoch": 4.433734939759036, "grad_norm": 2.168456792831421, "learning_rate": 9.082669932639314e-07, "log_odds_chosen": 0.8463703393936157, "log_odds_ratio": -0.670565128326416, "logits/chosen": -0.8834667205810547, "logits/rejected": -0.9774963855743408, "logps/chosen": -0.9848113656044006, "logps/rejected": -1.6672327518463135, "loss": 1.0136, "nll_loss": 1.1226385831832886, "rewards/accuracies": 0.625, "rewards/chosen": -0.09848114103078842, "rewards/margins": 0.0682421401143074, "rewards/rejected": -0.16672328114509583, "step": 7268 }, { "epoch": 4.434344974836053, "grad_norm": 5.929874897003174, "learning_rate": 9.072872014696876e-07, "log_odds_chosen": 1.9686132669448853, "log_odds_ratio": -0.3263099789619446, "logits/chosen": -0.8187271356582642, "logits/rejected": -1.0039812326431274, "logps/chosen": -0.6743649244308472, "logps/rejected": -2.0972020626068115, "loss": 1.082, "nll_loss": 0.7821964621543884, "rewards/accuracies": 0.875, "rewards/chosen": -0.06743649393320084, "rewards/margins": 0.14228372275829315, "rewards/rejected": -0.2097202092409134, "step": 7269 }, { "epoch": 4.43495500991307, "grad_norm": 2.603600263595581, "learning_rate": 9.06307409675444e-07, "log_odds_chosen": 1.1392834186553955, "log_odds_ratio": -0.46106991171836853, "logits/chosen": -0.8714106678962708, "logits/rejected": -0.9304656386375427, "logps/chosen": -0.8413602113723755, "logps/rejected": -1.780648112297058, "loss": 1.1045, "nll_loss": 1.069965124130249, "rewards/accuracies": 0.75, "rewards/chosen": -0.08413602411746979, "rewards/margins": 0.09392878413200378, "rewards/rejected": -0.17806482315063477, "step": 7270 }, { "epoch": 4.435565044990087, "grad_norm": 1.149272084236145, "learning_rate": 9.053276178812002e-07, "log_odds_chosen": 2.400789260864258, "log_odds_ratio": -0.49652889370918274, "logits/chosen": -0.9574321508407593, "logits/rejected": -1.0242891311645508, "logps/chosen": -0.8258527517318726, "logps/rejected": -2.6781938076019287, "loss": 1.0803, "nll_loss": 1.1498606204986572, "rewards/accuracies": 0.75, "rewards/chosen": -0.08258527517318726, "rewards/margins": 0.18523411452770233, "rewards/rejected": -0.2678194046020508, "step": 7271 }, { "epoch": 4.436175080067104, "grad_norm": 2.110588550567627, "learning_rate": 9.043478260869564e-07, "log_odds_chosen": 2.209951877593994, "log_odds_ratio": -0.2949537932872772, "logits/chosen": -0.7265080213546753, "logits/rejected": -0.8627622127532959, "logps/chosen": -0.7081391215324402, "logps/rejected": -2.4115424156188965, "loss": 1.0538, "nll_loss": 0.868309736251831, "rewards/accuracies": 0.875, "rewards/chosen": -0.07081391662359238, "rewards/margins": 0.17034032940864563, "rewards/rejected": -0.2411542534828186, "step": 7272 }, { "epoch": 4.43678511514412, "grad_norm": 1.8018757104873657, "learning_rate": 9.033680342927128e-07, "log_odds_chosen": 1.400640606880188, "log_odds_ratio": -0.47954246401786804, "logits/chosen": -0.9447644948959351, "logits/rejected": -1.07890784740448, "logps/chosen": -0.8338682651519775, "logps/rejected": -1.7892205715179443, "loss": 0.9892, "nll_loss": 1.0801444053649902, "rewards/accuracies": 0.625, "rewards/chosen": -0.08338683098554611, "rewards/margins": 0.09553523361682892, "rewards/rejected": -0.17892205715179443, "step": 7273 }, { "epoch": 4.437395150221137, "grad_norm": 1.550121545791626, "learning_rate": 9.02388242498469e-07, "log_odds_chosen": 1.9279429912567139, "log_odds_ratio": -0.4224225878715515, "logits/chosen": -0.8561266660690308, "logits/rejected": -1.0658540725708008, "logps/chosen": -0.9524032473564148, "logps/rejected": -2.380359172821045, "loss": 0.9828, "nll_loss": 1.108884334564209, "rewards/accuracies": 0.625, "rewards/chosen": -0.09524032473564148, "rewards/margins": 0.1427956223487854, "rewards/rejected": -0.23803594708442688, "step": 7274 }, { "epoch": 4.438005185298155, "grad_norm": 3.698458194732666, "learning_rate": 9.014084507042253e-07, "log_odds_chosen": 3.618090867996216, "log_odds_ratio": -0.2725875675678253, "logits/chosen": -0.8294427394866943, "logits/rejected": -0.9558141231536865, "logps/chosen": -0.6005396842956543, "logps/rejected": -3.5361664295196533, "loss": 0.9521, "nll_loss": 0.9369597434997559, "rewards/accuracies": 0.875, "rewards/chosen": -0.06005396693944931, "rewards/margins": 0.293562650680542, "rewards/rejected": -0.3536166548728943, "step": 7275 }, { "epoch": 4.438615220375172, "grad_norm": 1.3727649450302124, "learning_rate": 9.004286589099815e-07, "log_odds_chosen": 2.9342846870422363, "log_odds_ratio": -0.40799999237060547, "logits/chosen": -0.7766164541244507, "logits/rejected": -1.0298367738723755, "logps/chosen": -0.6251355409622192, "logps/rejected": -3.0026049613952637, "loss": 1.0659, "nll_loss": 0.767409086227417, "rewards/accuracies": 0.75, "rewards/chosen": -0.0625135526061058, "rewards/margins": 0.237746924161911, "rewards/rejected": -0.3002604842185974, "step": 7276 }, { "epoch": 4.439225255452189, "grad_norm": 1.8936741352081299, "learning_rate": 8.994488671157379e-07, "log_odds_chosen": 1.062696099281311, "log_odds_ratio": -0.5049511194229126, "logits/chosen": -0.6298750638961792, "logits/rejected": -1.0185497999191284, "logps/chosen": -0.9843797087669373, "logps/rejected": -1.8257014751434326, "loss": 0.8641, "nll_loss": 0.9488934278488159, "rewards/accuracies": 0.875, "rewards/chosen": -0.09843797981739044, "rewards/margins": 0.08413216471672058, "rewards/rejected": -0.18257012963294983, "step": 7277 }, { "epoch": 4.439835290529206, "grad_norm": 1.3542275428771973, "learning_rate": 8.984690753214942e-07, "log_odds_chosen": 2.0987966060638428, "log_odds_ratio": -0.3612141013145447, "logits/chosen": -0.7195742130279541, "logits/rejected": -0.8433394432067871, "logps/chosen": -0.7203540802001953, "logps/rejected": -2.3608341217041016, "loss": 1.0422, "nll_loss": 0.8528268933296204, "rewards/accuracies": 0.875, "rewards/chosen": -0.07203540951013565, "rewards/margins": 0.1640479862689972, "rewards/rejected": -0.23608338832855225, "step": 7278 }, { "epoch": 4.440445325606222, "grad_norm": 1.7207525968551636, "learning_rate": 8.974892835272504e-07, "log_odds_chosen": 1.5763061046600342, "log_odds_ratio": -0.582970142364502, "logits/chosen": -0.8955377340316772, "logits/rejected": -0.9732152223587036, "logps/chosen": -0.8761978149414062, "logps/rejected": -2.0708487033843994, "loss": 1.1448, "nll_loss": 1.0787650346755981, "rewards/accuracies": 0.5, "rewards/chosen": -0.08761978894472122, "rewards/margins": 0.11946508288383484, "rewards/rejected": -0.20708486437797546, "step": 7279 }, { "epoch": 4.441055360683239, "grad_norm": 1.269450306892395, "learning_rate": 8.965094917330067e-07, "log_odds_chosen": 3.6030800342559814, "log_odds_ratio": -0.33489662408828735, "logits/chosen": -0.4883009195327759, "logits/rejected": -0.7563619613647461, "logps/chosen": -0.5827013254165649, "logps/rejected": -3.2971651554107666, "loss": 0.9699, "nll_loss": 0.9426921010017395, "rewards/accuracies": 0.875, "rewards/chosen": -0.058270134031772614, "rewards/margins": 0.2714463472366333, "rewards/rejected": -0.3297164738178253, "step": 7280 }, { "epoch": 4.441665395760256, "grad_norm": 1.677768349647522, "learning_rate": 8.955296999387629e-07, "log_odds_chosen": 2.491004467010498, "log_odds_ratio": -0.2315709888935089, "logits/chosen": -0.924475908279419, "logits/rejected": -1.1203782558441162, "logps/chosen": -0.7953754663467407, "logps/rejected": -2.684934377670288, "loss": 0.9816, "nll_loss": 0.8733153343200684, "rewards/accuracies": 1.0, "rewards/chosen": -0.0795375406742096, "rewards/margins": 0.1889559030532837, "rewards/rejected": -0.2684934139251709, "step": 7281 }, { "epoch": 4.442275430837273, "grad_norm": 3.571989059448242, "learning_rate": 8.945499081445192e-07, "log_odds_chosen": 3.315769672393799, "log_odds_ratio": -0.30372482538223267, "logits/chosen": -1.2101905345916748, "logits/rejected": -1.2073804140090942, "logps/chosen": -1.1597027778625488, "logps/rejected": -4.189753532409668, "loss": 1.1655, "nll_loss": 1.4647364616394043, "rewards/accuracies": 0.75, "rewards/chosen": -0.11597027629613876, "rewards/margins": 0.3030050992965698, "rewards/rejected": -0.4189754128456116, "step": 7282 }, { "epoch": 4.44288546591429, "grad_norm": 1.49806547164917, "learning_rate": 8.935701163502756e-07, "log_odds_chosen": 1.082973837852478, "log_odds_ratio": -0.6840195059776306, "logits/chosen": -0.9024219512939453, "logits/rejected": -0.9302530288696289, "logps/chosen": -0.989389955997467, "logps/rejected": -1.5655611753463745, "loss": 0.9748, "nll_loss": 1.2347981929779053, "rewards/accuracies": 0.625, "rewards/chosen": -0.09893899410963058, "rewards/margins": 0.05761711299419403, "rewards/rejected": -0.1565561145544052, "step": 7283 }, { "epoch": 4.443495500991307, "grad_norm": 1.8001090288162231, "learning_rate": 8.925903245560318e-07, "log_odds_chosen": 3.9830265045166016, "log_odds_ratio": -0.166633740067482, "logits/chosen": -0.7067897915840149, "logits/rejected": -0.9652302265167236, "logps/chosen": -0.5158658623695374, "logps/rejected": -3.6016604900360107, "loss": 0.8892, "nll_loss": 0.7342538833618164, "rewards/accuracies": 0.875, "rewards/chosen": -0.051586586982011795, "rewards/margins": 0.3085794746875763, "rewards/rejected": -0.3601660430431366, "step": 7284 }, { "epoch": 4.444105536068324, "grad_norm": 1.5992107391357422, "learning_rate": 8.916105327617881e-07, "log_odds_chosen": 2.2254695892333984, "log_odds_ratio": -0.38730189204216003, "logits/chosen": -0.8855055570602417, "logits/rejected": -1.0181584358215332, "logps/chosen": -0.8166664838790894, "logps/rejected": -2.5505285263061523, "loss": 0.9363, "nll_loss": 0.9806543588638306, "rewards/accuracies": 0.5, "rewards/chosen": -0.08166664838790894, "rewards/margins": 0.17338621616363525, "rewards/rejected": -0.2550528645515442, "step": 7285 }, { "epoch": 4.444715571145341, "grad_norm": 6.8617777824401855, "learning_rate": 8.906307409675444e-07, "log_odds_chosen": 0.9223058819770813, "log_odds_ratio": -0.509395956993103, "logits/chosen": -0.8423383235931396, "logits/rejected": -0.8696982860565186, "logps/chosen": -0.9310587048530579, "logps/rejected": -1.7829046249389648, "loss": 1.0164, "nll_loss": 0.9980779886245728, "rewards/accuracies": 0.75, "rewards/chosen": -0.09310587495565414, "rewards/margins": 0.08518458902835846, "rewards/rejected": -0.178290456533432, "step": 7286 }, { "epoch": 4.445325606222358, "grad_norm": 2.2340545654296875, "learning_rate": 8.896509491733006e-07, "log_odds_chosen": 2.4820122718811035, "log_odds_ratio": -0.38529229164123535, "logits/chosen": -0.8456333875656128, "logits/rejected": -0.9286981821060181, "logps/chosen": -0.950830340385437, "logps/rejected": -3.0472588539123535, "loss": 1.0431, "nll_loss": 0.9500336647033691, "rewards/accuracies": 0.875, "rewards/chosen": -0.09508303552865982, "rewards/margins": 0.20964287221431732, "rewards/rejected": -0.30472591519355774, "step": 7287 }, { "epoch": 4.445935641299375, "grad_norm": 1.667841911315918, "learning_rate": 8.886711573790569e-07, "log_odds_chosen": 2.702336072921753, "log_odds_ratio": -0.33776748180389404, "logits/chosen": -0.9581810235977173, "logits/rejected": -1.1010421514511108, "logps/chosen": -0.5982884168624878, "logps/rejected": -2.821913242340088, "loss": 0.9373, "nll_loss": 0.8308841586112976, "rewards/accuracies": 1.0, "rewards/chosen": -0.05982884764671326, "rewards/margins": 0.2223624736070633, "rewards/rejected": -0.28219130635261536, "step": 7288 }, { "epoch": 4.446545676376392, "grad_norm": 7.645203590393066, "learning_rate": 8.876913655848132e-07, "log_odds_chosen": 0.9689574837684631, "log_odds_ratio": -0.5563808679580688, "logits/chosen": -0.9682220220565796, "logits/rejected": -1.0942318439483643, "logps/chosen": -0.7390367388725281, "logps/rejected": -1.4078867435455322, "loss": 1.0835, "nll_loss": 1.0039318799972534, "rewards/accuracies": 0.5, "rewards/chosen": -0.07390367984771729, "rewards/margins": 0.06688500940799713, "rewards/rejected": -0.14078867435455322, "step": 7289 }, { "epoch": 4.447155711453409, "grad_norm": 2.1288564205169678, "learning_rate": 8.867115737905695e-07, "log_odds_chosen": 2.9264602661132812, "log_odds_ratio": -0.5206578969955444, "logits/chosen": -0.7253246307373047, "logits/rejected": -0.8809038400650024, "logps/chosen": -0.9758126139640808, "logps/rejected": -3.6282901763916016, "loss": 1.1302, "nll_loss": 0.9475369453430176, "rewards/accuracies": 0.5, "rewards/chosen": -0.09758126735687256, "rewards/margins": 0.26524776220321655, "rewards/rejected": -0.3628290295600891, "step": 7290 }, { "epoch": 4.4477657465304254, "grad_norm": 1.478220820426941, "learning_rate": 8.857317819963257e-07, "log_odds_chosen": 1.468921422958374, "log_odds_ratio": -0.4132736921310425, "logits/chosen": -1.0617763996124268, "logits/rejected": -1.1058404445648193, "logps/chosen": -1.0027596950531006, "logps/rejected": -2.1133596897125244, "loss": 0.999, "nll_loss": 1.098324179649353, "rewards/accuracies": 1.0, "rewards/chosen": -0.10027597099542618, "rewards/margins": 0.1110599935054779, "rewards/rejected": -0.2113359570503235, "step": 7291 }, { "epoch": 4.448375781607442, "grad_norm": 9.013503074645996, "learning_rate": 8.84751990202082e-07, "log_odds_chosen": 3.8939859867095947, "log_odds_ratio": -0.15574535727500916, "logits/chosen": -1.019365906715393, "logits/rejected": -1.2649401426315308, "logps/chosen": -0.7576727867126465, "logps/rejected": -4.0264387130737305, "loss": 0.9517, "nll_loss": 1.0636991262435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.07576727867126465, "rewards/margins": 0.32687658071517944, "rewards/rejected": -0.4026438891887665, "step": 7292 }, { "epoch": 4.448985816684459, "grad_norm": 1.4332692623138428, "learning_rate": 8.837721984078384e-07, "log_odds_chosen": 3.0766568183898926, "log_odds_ratio": -0.31249740719795227, "logits/chosen": -0.8208605051040649, "logits/rejected": -1.125834345817566, "logps/chosen": -0.7839664816856384, "logps/rejected": -3.1670804023742676, "loss": 0.9412, "nll_loss": 0.8970096111297607, "rewards/accuracies": 0.75, "rewards/chosen": -0.07839664816856384, "rewards/margins": 0.23831138014793396, "rewards/rejected": -0.3167080283164978, "step": 7293 }, { "epoch": 4.449595851761476, "grad_norm": 1.7018218040466309, "learning_rate": 8.827924066135945e-07, "log_odds_chosen": 1.7049874067306519, "log_odds_ratio": -0.30936819314956665, "logits/chosen": -0.9107092022895813, "logits/rejected": -1.15852689743042, "logps/chosen": -0.9658893346786499, "logps/rejected": -2.3475544452667236, "loss": 0.8809, "nll_loss": 1.0073645114898682, "rewards/accuracies": 1.0, "rewards/chosen": -0.09658893942832947, "rewards/margins": 0.13816651701927185, "rewards/rejected": -0.23475545644760132, "step": 7294 }, { "epoch": 4.450205886838493, "grad_norm": 7.3845133781433105, "learning_rate": 8.818126148193508e-07, "log_odds_chosen": 1.2026315927505493, "log_odds_ratio": -0.3979710340499878, "logits/chosen": -0.749595582485199, "logits/rejected": -0.8709176778793335, "logps/chosen": -0.7744361758232117, "logps/rejected": -1.523155927658081, "loss": 1.0245, "nll_loss": 0.8380001783370972, "rewards/accuracies": 0.875, "rewards/chosen": -0.07744362205266953, "rewards/margins": 0.0748719796538353, "rewards/rejected": -0.15231558680534363, "step": 7295 }, { "epoch": 4.45081592191551, "grad_norm": 1.8105374574661255, "learning_rate": 8.808328230251071e-07, "log_odds_chosen": 1.004474401473999, "log_odds_ratio": -0.52705979347229, "logits/chosen": -0.9761223196983337, "logits/rejected": -0.9156639575958252, "logps/chosen": -0.9442787766456604, "logps/rejected": -1.825490951538086, "loss": 1.096, "nll_loss": 1.1200745105743408, "rewards/accuracies": 0.75, "rewards/chosen": -0.09442787617444992, "rewards/margins": 0.08812122046947479, "rewards/rejected": -0.18254908919334412, "step": 7296 }, { "epoch": 4.451425956992527, "grad_norm": 7.075171947479248, "learning_rate": 8.798530312308634e-07, "log_odds_chosen": 1.486555814743042, "log_odds_ratio": -0.33352163434028625, "logits/chosen": -0.8634555339813232, "logits/rejected": -1.0438898801803589, "logps/chosen": -0.8178741335868835, "logps/rejected": -1.9068472385406494, "loss": 1.0251, "nll_loss": 0.8621684908866882, "rewards/accuracies": 1.0, "rewards/chosen": -0.08178741484880447, "rewards/margins": 0.10889729857444763, "rewards/rejected": -0.1906847059726715, "step": 7297 }, { "epoch": 4.452035992069544, "grad_norm": 1.5617445707321167, "learning_rate": 8.788732394366197e-07, "log_odds_chosen": 2.5164053440093994, "log_odds_ratio": -0.21609950065612793, "logits/chosen": -0.5981695055961609, "logits/rejected": -0.8287888765335083, "logps/chosen": -0.6853485107421875, "logps/rejected": -2.5827951431274414, "loss": 0.8593, "nll_loss": 0.7971588969230652, "rewards/accuracies": 1.0, "rewards/chosen": -0.06853485107421875, "rewards/margins": 0.18974466621875763, "rewards/rejected": -0.2582795321941376, "step": 7298 }, { "epoch": 4.452646027146561, "grad_norm": 1.242254614830017, "learning_rate": 8.77893447642376e-07, "log_odds_chosen": 4.185074806213379, "log_odds_ratio": -0.11094249039888382, "logits/chosen": -0.8189677000045776, "logits/rejected": -1.172269582748413, "logps/chosen": -0.7647578120231628, "logps/rejected": -4.289278030395508, "loss": 0.9931, "nll_loss": 1.028953194618225, "rewards/accuracies": 1.0, "rewards/chosen": -0.07647578418254852, "rewards/margins": 0.35245198011398315, "rewards/rejected": -0.42892780900001526, "step": 7299 }, { "epoch": 4.453256062223578, "grad_norm": 11.1307954788208, "learning_rate": 8.769136558481323e-07, "log_odds_chosen": 1.785506248474121, "log_odds_ratio": -0.3700556755065918, "logits/chosen": -0.7101123929023743, "logits/rejected": -1.0032967329025269, "logps/chosen": -0.7314300537109375, "logps/rejected": -2.140831232070923, "loss": 1.3169, "nll_loss": 0.9381879568099976, "rewards/accuracies": 0.875, "rewards/chosen": -0.07314301282167435, "rewards/margins": 0.1409401148557663, "rewards/rejected": -0.21408313512802124, "step": 7300 }, { "epoch": 4.453866097300595, "grad_norm": 1.547997236251831, "learning_rate": 8.759338640538885e-07, "log_odds_chosen": 2.5541868209838867, "log_odds_ratio": -0.31801342964172363, "logits/chosen": -0.7665283679962158, "logits/rejected": -0.8076322078704834, "logps/chosen": -0.7791446447372437, "logps/rejected": -2.756147861480713, "loss": 1.0187, "nll_loss": 0.9193558692932129, "rewards/accuracies": 0.875, "rewards/chosen": -0.07791446149349213, "rewards/margins": 0.19770032167434692, "rewards/rejected": -0.27561476826667786, "step": 7301 }, { "epoch": 4.454476132377612, "grad_norm": 3.827394485473633, "learning_rate": 8.749540722596448e-07, "log_odds_chosen": 2.0359690189361572, "log_odds_ratio": -0.4197983145713806, "logits/chosen": -0.798477053642273, "logits/rejected": -0.7666381001472473, "logps/chosen": -0.7673882246017456, "logps/rejected": -2.0959253311157227, "loss": 1.0221, "nll_loss": 1.0086228847503662, "rewards/accuracies": 0.75, "rewards/chosen": -0.07673881947994232, "rewards/margins": 0.132853701710701, "rewards/rejected": -0.2095925211906433, "step": 7302 }, { "epoch": 4.4550861674546285, "grad_norm": 1.905515193939209, "learning_rate": 8.739742804654011e-07, "log_odds_chosen": 1.6641435623168945, "log_odds_ratio": -0.33804190158843994, "logits/chosen": -0.9165489673614502, "logits/rejected": -1.0601110458374023, "logps/chosen": -0.9570128321647644, "logps/rejected": -2.2897422313690186, "loss": 1.127, "nll_loss": 1.0846853256225586, "rewards/accuracies": 0.75, "rewards/chosen": -0.09570128470659256, "rewards/margins": 0.13327297568321228, "rewards/rejected": -0.22897425293922424, "step": 7303 }, { "epoch": 4.455696202531645, "grad_norm": 12.139161109924316, "learning_rate": 8.729944886711573e-07, "log_odds_chosen": 3.808014154434204, "log_odds_ratio": -0.42881619930267334, "logits/chosen": -0.960870623588562, "logits/rejected": -1.086719274520874, "logps/chosen": -0.6460157036781311, "logps/rejected": -3.9437432289123535, "loss": 1.0139, "nll_loss": 0.9142732620239258, "rewards/accuracies": 0.75, "rewards/chosen": -0.06460157036781311, "rewards/margins": 0.3297727704048157, "rewards/rejected": -0.3943743407726288, "step": 7304 }, { "epoch": 4.456306237608662, "grad_norm": 1.401958703994751, "learning_rate": 8.720146968769136e-07, "log_odds_chosen": 1.6192375421524048, "log_odds_ratio": -0.3322172462940216, "logits/chosen": -0.9635584354400635, "logits/rejected": -1.0106219053268433, "logps/chosen": -0.8157080411911011, "logps/rejected": -1.9364612102508545, "loss": 1.1675, "nll_loss": 0.9622116088867188, "rewards/accuracies": 0.75, "rewards/chosen": -0.08157080411911011, "rewards/margins": 0.1120753139257431, "rewards/rejected": -0.1936461180448532, "step": 7305 }, { "epoch": 4.456916272685679, "grad_norm": 8.254334449768066, "learning_rate": 8.710349050826699e-07, "log_odds_chosen": 2.529726028442383, "log_odds_ratio": -0.5004507899284363, "logits/chosen": -0.8031436800956726, "logits/rejected": -0.8286949992179871, "logps/chosen": -0.6678977608680725, "logps/rejected": -2.684781789779663, "loss": 1.0792, "nll_loss": 0.9481655359268188, "rewards/accuracies": 0.75, "rewards/chosen": -0.06678977608680725, "rewards/margins": 0.20168840885162354, "rewards/rejected": -0.2684781849384308, "step": 7306 }, { "epoch": 4.457526307762697, "grad_norm": 1.248151421546936, "learning_rate": 8.700551132884262e-07, "log_odds_chosen": 1.5998046398162842, "log_odds_ratio": -0.42696475982666016, "logits/chosen": -0.9071311354637146, "logits/rejected": -1.014646053314209, "logps/chosen": -0.8126513957977295, "logps/rejected": -2.1666324138641357, "loss": 1.0028, "nll_loss": 0.9069443941116333, "rewards/accuracies": 0.875, "rewards/chosen": -0.08126513659954071, "rewards/margins": 0.13539810478687286, "rewards/rejected": -0.21666322648525238, "step": 7307 }, { "epoch": 4.458136342839714, "grad_norm": 1.4949284791946411, "learning_rate": 8.690753214941825e-07, "log_odds_chosen": 1.6691789627075195, "log_odds_ratio": -0.6221764087677002, "logits/chosen": -0.8095999956130981, "logits/rejected": -0.967313826084137, "logps/chosen": -0.7645047903060913, "logps/rejected": -2.046860694885254, "loss": 1.0939, "nll_loss": 0.9984428882598877, "rewards/accuracies": 0.5, "rewards/chosen": -0.07645047456026077, "rewards/margins": 0.1282355934381485, "rewards/rejected": -0.20468607544898987, "step": 7308 }, { "epoch": 4.4587463779167305, "grad_norm": 12.120192527770996, "learning_rate": 8.680955296999387e-07, "log_odds_chosen": 3.688325881958008, "log_odds_ratio": -0.26042962074279785, "logits/chosen": -0.8151029348373413, "logits/rejected": -1.0938055515289307, "logps/chosen": -0.6113883256912231, "logps/rejected": -3.5999937057495117, "loss": 0.9706, "nll_loss": 0.7812473177909851, "rewards/accuracies": 0.875, "rewards/chosen": -0.061138831079006195, "rewards/margins": 0.2988605201244354, "rewards/rejected": -0.3599993586540222, "step": 7309 }, { "epoch": 4.459356412993747, "grad_norm": 1.4860128164291382, "learning_rate": 8.67115737905695e-07, "log_odds_chosen": 2.4694409370422363, "log_odds_ratio": -0.22764502465724945, "logits/chosen": -0.9752058982849121, "logits/rejected": -1.1549493074417114, "logps/chosen": -0.52353835105896, "logps/rejected": -2.3264083862304688, "loss": 1.301, "nll_loss": 1.210893988609314, "rewards/accuracies": 1.0, "rewards/chosen": -0.052353840321302414, "rewards/margins": 0.18028700351715088, "rewards/rejected": -0.2326408326625824, "step": 7310 }, { "epoch": 4.459966448070764, "grad_norm": 2.218473434448242, "learning_rate": 8.661359461114512e-07, "log_odds_chosen": 2.003387689590454, "log_odds_ratio": -0.3120240867137909, "logits/chosen": -0.7393602728843689, "logits/rejected": -1.0031477212905884, "logps/chosen": -0.6164226531982422, "logps/rejected": -2.0491724014282227, "loss": 0.9644, "nll_loss": 0.7048011422157288, "rewards/accuracies": 0.875, "rewards/chosen": -0.06164226680994034, "rewards/margins": 0.14327499270439148, "rewards/rejected": -0.20491725206375122, "step": 7311 }, { "epoch": 4.460576483147781, "grad_norm": 1.9104336500167847, "learning_rate": 8.651561543172076e-07, "log_odds_chosen": 1.7698185443878174, "log_odds_ratio": -0.25798889994621277, "logits/chosen": -0.924649715423584, "logits/rejected": -0.9748673439025879, "logps/chosen": -0.895296037197113, "logps/rejected": -2.296949625015259, "loss": 1.0635, "nll_loss": 1.0613819360733032, "rewards/accuracies": 0.875, "rewards/chosen": -0.0895296037197113, "rewards/margins": 0.14016537368297577, "rewards/rejected": -0.22969496250152588, "step": 7312 }, { "epoch": 4.461186518224798, "grad_norm": 1.9659008979797363, "learning_rate": 8.641763625229639e-07, "log_odds_chosen": 2.187286853790283, "log_odds_ratio": -0.4279293715953827, "logits/chosen": -0.6243493556976318, "logits/rejected": -0.8355686068534851, "logps/chosen": -0.5334886908531189, "logps/rejected": -2.224196195602417, "loss": 0.8925, "nll_loss": 0.7023341059684753, "rewards/accuracies": 0.75, "rewards/chosen": -0.05334886908531189, "rewards/margins": 0.1690707504749298, "rewards/rejected": -0.2224196195602417, "step": 7313 }, { "epoch": 4.461796553301815, "grad_norm": 1.929017424583435, "learning_rate": 8.631965707287201e-07, "log_odds_chosen": 2.939221143722534, "log_odds_ratio": -0.23863747715950012, "logits/chosen": -0.9212055802345276, "logits/rejected": -0.9993726015090942, "logps/chosen": -0.6910113096237183, "logps/rejected": -3.0827534198760986, "loss": 0.9978, "nll_loss": 0.9077985882759094, "rewards/accuracies": 1.0, "rewards/chosen": -0.06910113990306854, "rewards/margins": 0.23917421698570251, "rewards/rejected": -0.30827534198760986, "step": 7314 }, { "epoch": 4.4624065883788315, "grad_norm": 18.587636947631836, "learning_rate": 8.622167789344765e-07, "log_odds_chosen": 2.9327003955841064, "log_odds_ratio": -0.2306104302406311, "logits/chosen": -0.7579438090324402, "logits/rejected": -1.083437204360962, "logps/chosen": -0.7247176170349121, "logps/rejected": -3.0089759826660156, "loss": 0.9922, "nll_loss": 0.8226916193962097, "rewards/accuracies": 0.875, "rewards/chosen": -0.07247176021337509, "rewards/margins": 0.22842584550380707, "rewards/rejected": -0.30089759826660156, "step": 7315 }, { "epoch": 4.463016623455848, "grad_norm": 5.798833847045898, "learning_rate": 8.612369871402326e-07, "log_odds_chosen": 2.0501291751861572, "log_odds_ratio": -0.4540334641933441, "logits/chosen": -0.9632310271263123, "logits/rejected": -1.1103910207748413, "logps/chosen": -0.8397084474563599, "logps/rejected": -2.5142104625701904, "loss": 1.1398, "nll_loss": 1.0303915739059448, "rewards/accuracies": 0.625, "rewards/chosen": -0.08397084474563599, "rewards/margins": 0.1674501746892929, "rewards/rejected": -0.2514210343360901, "step": 7316 }, { "epoch": 4.463626658532865, "grad_norm": 1.8909733295440674, "learning_rate": 8.602571953459889e-07, "log_odds_chosen": 1.5443966388702393, "log_odds_ratio": -0.5071924924850464, "logits/chosen": -0.9136887192726135, "logits/rejected": -1.0026438236236572, "logps/chosen": -0.8283699750900269, "logps/rejected": -2.072038173675537, "loss": 1.0077, "nll_loss": 0.9587511420249939, "rewards/accuracies": 0.75, "rewards/chosen": -0.08283699303865433, "rewards/margins": 0.12436683475971222, "rewards/rejected": -0.20720382034778595, "step": 7317 }, { "epoch": 4.464236693609883, "grad_norm": 2.1863622665405273, "learning_rate": 8.592774035517451e-07, "log_odds_chosen": 2.0265281200408936, "log_odds_ratio": -0.3587585687637329, "logits/chosen": -0.9611597061157227, "logits/rejected": -1.0325573682785034, "logps/chosen": -0.7478303909301758, "logps/rejected": -2.361109733581543, "loss": 0.8545, "nll_loss": 0.8992838859558105, "rewards/accuracies": 0.75, "rewards/chosen": -0.07478303462266922, "rewards/margins": 0.16132794320583344, "rewards/rejected": -0.23611098527908325, "step": 7318 }, { "epoch": 4.4648467286869, "grad_norm": 2.2526862621307373, "learning_rate": 8.582976117575015e-07, "log_odds_chosen": 2.0671491622924805, "log_odds_ratio": -0.32622960209846497, "logits/chosen": -0.9552960395812988, "logits/rejected": -0.9897705912590027, "logps/chosen": -0.9387015700340271, "logps/rejected": -2.8001766204833984, "loss": 1.1433, "nll_loss": 1.0543118715286255, "rewards/accuracies": 0.75, "rewards/chosen": -0.093870148062706, "rewards/margins": 0.1861475259065628, "rewards/rejected": -0.2800176739692688, "step": 7319 }, { "epoch": 4.465456763763917, "grad_norm": 1.8047195672988892, "learning_rate": 8.573178199632578e-07, "log_odds_chosen": 0.08309908956289291, "log_odds_ratio": -0.6866533756256104, "logits/chosen": -1.1354116201400757, "logits/rejected": -1.0041245222091675, "logps/chosen": -0.9569282531738281, "logps/rejected": -1.0256483554840088, "loss": 0.9889, "nll_loss": 1.037611484527588, "rewards/accuracies": 0.625, "rewards/chosen": -0.09569282829761505, "rewards/margins": 0.0068720076233148575, "rewards/rejected": -0.10256483405828476, "step": 7320 }, { "epoch": 4.4660667988409335, "grad_norm": 1.8327208757400513, "learning_rate": 8.56338028169014e-07, "log_odds_chosen": 1.3386225700378418, "log_odds_ratio": -0.6114075183868408, "logits/chosen": -0.893485963344574, "logits/rejected": -0.9490231871604919, "logps/chosen": -0.7266790866851807, "logps/rejected": -1.759896159172058, "loss": 1.0492, "nll_loss": 0.9745832681655884, "rewards/accuracies": 0.625, "rewards/chosen": -0.07266790419816971, "rewards/margins": 0.10332170128822327, "rewards/rejected": -0.17598959803581238, "step": 7321 }, { "epoch": 4.46667683391795, "grad_norm": 2.905503273010254, "learning_rate": 8.553582363747704e-07, "log_odds_chosen": 0.7043405771255493, "log_odds_ratio": -0.45923107862472534, "logits/chosen": -0.6544018983840942, "logits/rejected": -0.9185299277305603, "logps/chosen": -0.8262152671813965, "logps/rejected": -1.3442941904067993, "loss": 1.06, "nll_loss": 1.0479906797409058, "rewards/accuracies": 0.75, "rewards/chosen": -0.08262152969837189, "rewards/margins": 0.05180788040161133, "rewards/rejected": -0.13442941009998322, "step": 7322 }, { "epoch": 4.467286868994967, "grad_norm": 1.4609142541885376, "learning_rate": 8.543784445805265e-07, "log_odds_chosen": 4.87380313873291, "log_odds_ratio": -0.3901066184043884, "logits/chosen": -0.8798646926879883, "logits/rejected": -1.0346041917800903, "logps/chosen": -0.8029286861419678, "logps/rejected": -5.017953395843506, "loss": 1.0736, "nll_loss": 1.0975841283798218, "rewards/accuracies": 0.75, "rewards/chosen": -0.08029286563396454, "rewards/margins": 0.4215024709701538, "rewards/rejected": -0.5017953515052795, "step": 7323 }, { "epoch": 4.467896904071984, "grad_norm": 4.667251110076904, "learning_rate": 8.533986527862828e-07, "log_odds_chosen": 1.7376044988632202, "log_odds_ratio": -0.36222508549690247, "logits/chosen": -0.4050273597240448, "logits/rejected": -0.49192267656326294, "logps/chosen": -0.6862167119979858, "logps/rejected": -1.8331494331359863, "loss": 1.042, "nll_loss": 0.796660304069519, "rewards/accuracies": 0.75, "rewards/chosen": -0.0686216726899147, "rewards/margins": 0.114693284034729, "rewards/rejected": -0.1833149492740631, "step": 7324 }, { "epoch": 4.468506939149001, "grad_norm": 1.3720669746398926, "learning_rate": 8.524188609920392e-07, "log_odds_chosen": 2.35917592048645, "log_odds_ratio": -0.3349139392375946, "logits/chosen": -0.844971776008606, "logits/rejected": -0.9090408086776733, "logps/chosen": -0.7733492851257324, "logps/rejected": -2.4171297550201416, "loss": 0.9728, "nll_loss": 0.9258332252502441, "rewards/accuracies": 0.75, "rewards/chosen": -0.077334925532341, "rewards/margins": 0.16437803208827972, "rewards/rejected": -0.24171297252178192, "step": 7325 }, { "epoch": 4.469116974226018, "grad_norm": 1.1277767419815063, "learning_rate": 8.514390691977954e-07, "log_odds_chosen": 1.728342056274414, "log_odds_ratio": -0.3561590015888214, "logits/chosen": -0.7973936796188354, "logits/rejected": -0.8226009607315063, "logps/chosen": -0.6939506530761719, "logps/rejected": -1.9181740283966064, "loss": 0.9036, "nll_loss": 0.9843106269836426, "rewards/accuracies": 0.875, "rewards/chosen": -0.06939506530761719, "rewards/margins": 0.12242233753204346, "rewards/rejected": -0.19181740283966064, "step": 7326 }, { "epoch": 4.469727009303035, "grad_norm": 2.131032943725586, "learning_rate": 8.504592774035517e-07, "log_odds_chosen": 0.7677636742591858, "log_odds_ratio": -0.710273265838623, "logits/chosen": -0.6814022660255432, "logits/rejected": -0.8381555080413818, "logps/chosen": -1.0867140293121338, "logps/rejected": -1.5750401020050049, "loss": 0.9938, "nll_loss": 0.8716646432876587, "rewards/accuracies": 0.625, "rewards/chosen": -0.1086714044213295, "rewards/margins": 0.04883261024951935, "rewards/rejected": -0.15750400722026825, "step": 7327 }, { "epoch": 4.470337044380052, "grad_norm": 2.143153667449951, "learning_rate": 8.49479485609308e-07, "log_odds_chosen": 0.747893214225769, "log_odds_ratio": -0.6799324154853821, "logits/chosen": -0.8200926780700684, "logits/rejected": -0.8718365430831909, "logps/chosen": -0.8214800357818604, "logps/rejected": -1.4321227073669434, "loss": 1.0678, "nll_loss": 0.9922990202903748, "rewards/accuracies": 0.625, "rewards/chosen": -0.0821480005979538, "rewards/margins": 0.06106426939368248, "rewards/rejected": -0.14321227371692657, "step": 7328 }, { "epoch": 4.470947079457069, "grad_norm": 1.2237982749938965, "learning_rate": 8.484996938150643e-07, "log_odds_chosen": 3.102548360824585, "log_odds_ratio": -0.2688078284263611, "logits/chosen": -0.8013768792152405, "logits/rejected": -1.0403847694396973, "logps/chosen": -0.6652092933654785, "logps/rejected": -3.127593755722046, "loss": 0.9577, "nll_loss": 0.8686007261276245, "rewards/accuracies": 0.875, "rewards/chosen": -0.06652092933654785, "rewards/margins": 0.24623844027519226, "rewards/rejected": -0.3127593994140625, "step": 7329 }, { "epoch": 4.471557114534086, "grad_norm": 10.220643997192383, "learning_rate": 8.475199020208206e-07, "log_odds_chosen": 5.291333198547363, "log_odds_ratio": -0.15627656877040863, "logits/chosen": -0.9929132461547852, "logits/rejected": -1.2272729873657227, "logps/chosen": -0.6706429123878479, "logps/rejected": -5.141712188720703, "loss": 1.1555, "nll_loss": 1.0237677097320557, "rewards/accuracies": 1.0, "rewards/chosen": -0.06706429272890091, "rewards/margins": 0.44710689783096313, "rewards/rejected": -0.5141711831092834, "step": 7330 }, { "epoch": 4.472167149611103, "grad_norm": 15.428010940551758, "learning_rate": 8.465401102265767e-07, "log_odds_chosen": 2.2565479278564453, "log_odds_ratio": -0.3556448221206665, "logits/chosen": -1.0172946453094482, "logits/rejected": -0.9429687857627869, "logps/chosen": -0.8383452892303467, "logps/rejected": -2.581388235092163, "loss": 0.8399, "nll_loss": 1.0774651765823364, "rewards/accuracies": 0.75, "rewards/chosen": -0.08383452892303467, "rewards/margins": 0.1743043065071106, "rewards/rejected": -0.25813883543014526, "step": 7331 }, { "epoch": 4.47277718468812, "grad_norm": 3.933420419692993, "learning_rate": 8.455603184323331e-07, "log_odds_chosen": 3.758232355117798, "log_odds_ratio": -0.3129076361656189, "logits/chosen": -0.6620584726333618, "logits/rejected": -0.9414805769920349, "logps/chosen": -0.7166666984558105, "logps/rejected": -3.851557970046997, "loss": 1.0269, "nll_loss": 0.8544315099716187, "rewards/accuracies": 0.75, "rewards/chosen": -0.0716666653752327, "rewards/margins": 0.31348916888237, "rewards/rejected": -0.3851558268070221, "step": 7332 }, { "epoch": 4.4733872197651365, "grad_norm": 2.1787009239196777, "learning_rate": 8.445805266380893e-07, "log_odds_chosen": 0.9570392966270447, "log_odds_ratio": -0.5603621602058411, "logits/chosen": -0.9340665340423584, "logits/rejected": -1.0213556289672852, "logps/chosen": -0.7914748191833496, "logps/rejected": -1.5574564933776855, "loss": 1.0479, "nll_loss": 0.8440250754356384, "rewards/accuracies": 0.625, "rewards/chosen": -0.07914748042821884, "rewards/margins": 0.07659818232059479, "rewards/rejected": -0.15574565529823303, "step": 7333 }, { "epoch": 4.473997254842153, "grad_norm": 2.3892621994018555, "learning_rate": 8.436007348438456e-07, "log_odds_chosen": 1.8221797943115234, "log_odds_ratio": -0.4233716130256653, "logits/chosen": -0.9772245287895203, "logits/rejected": -1.1565781831741333, "logps/chosen": -0.6279999017715454, "logps/rejected": -1.9932032823562622, "loss": 1.1581, "nll_loss": 0.991340160369873, "rewards/accuracies": 0.75, "rewards/chosen": -0.06279999017715454, "rewards/margins": 0.13652034103870392, "rewards/rejected": -0.19932031631469727, "step": 7334 }, { "epoch": 4.47460728991917, "grad_norm": 2.4151923656463623, "learning_rate": 8.42620943049602e-07, "log_odds_chosen": 2.7174124717712402, "log_odds_ratio": -0.4320506453514099, "logits/chosen": -0.9665039777755737, "logits/rejected": -0.9270749092102051, "logps/chosen": -0.6574429869651794, "logps/rejected": -2.9664506912231445, "loss": 0.9358, "nll_loss": 0.7898802161216736, "rewards/accuracies": 0.75, "rewards/chosen": -0.0657442957162857, "rewards/margins": 0.23090079426765442, "rewards/rejected": -0.2966451048851013, "step": 7335 }, { "epoch": 4.475217324996187, "grad_norm": 2.2860097885131836, "learning_rate": 8.416411512553582e-07, "log_odds_chosen": 2.8462822437286377, "log_odds_ratio": -0.31862354278564453, "logits/chosen": -0.7857813835144043, "logits/rejected": -0.720020055770874, "logps/chosen": -0.6714729070663452, "logps/rejected": -2.9614903926849365, "loss": 0.981, "nll_loss": 0.8809858560562134, "rewards/accuracies": 0.875, "rewards/chosen": -0.06714728474617004, "rewards/margins": 0.22900177538394928, "rewards/rejected": -0.29614901542663574, "step": 7336 }, { "epoch": 4.475827360073204, "grad_norm": 2.924980401992798, "learning_rate": 8.406613594611145e-07, "log_odds_chosen": 1.592496633529663, "log_odds_ratio": -0.34560471773147583, "logits/chosen": -0.9572579860687256, "logits/rejected": -0.935234785079956, "logps/chosen": -0.7918305993080139, "logps/rejected": -1.9258897304534912, "loss": 0.9779, "nll_loss": 0.9497373700141907, "rewards/accuracies": 0.875, "rewards/chosen": -0.07918305695056915, "rewards/margins": 0.11340592056512833, "rewards/rejected": -0.19258897006511688, "step": 7337 }, { "epoch": 4.476437395150221, "grad_norm": 1.4425774812698364, "learning_rate": 8.396815676668707e-07, "log_odds_chosen": 1.9782499074935913, "log_odds_ratio": -0.5828199982643127, "logits/chosen": -0.8678590655326843, "logits/rejected": -0.9428130388259888, "logps/chosen": -0.8301132321357727, "logps/rejected": -2.2403059005737305, "loss": 1.1428, "nll_loss": 1.3241150379180908, "rewards/accuracies": 0.75, "rewards/chosen": -0.08301132172346115, "rewards/margins": 0.14101925492286682, "rewards/rejected": -0.22403058409690857, "step": 7338 }, { "epoch": 4.477047430227238, "grad_norm": 1.5724756717681885, "learning_rate": 8.38701775872627e-07, "log_odds_chosen": 0.9410496950149536, "log_odds_ratio": -0.5056589245796204, "logits/chosen": -0.7849550247192383, "logits/rejected": -0.8918843269348145, "logps/chosen": -0.8550278544425964, "logps/rejected": -1.4661307334899902, "loss": 1.0244, "nll_loss": 0.9348435401916504, "rewards/accuracies": 0.75, "rewards/chosen": -0.08550278842449188, "rewards/margins": 0.06111029163002968, "rewards/rejected": -0.14661307632923126, "step": 7339 }, { "epoch": 4.477657465304255, "grad_norm": 1.8891334533691406, "learning_rate": 8.377219840783833e-07, "log_odds_chosen": 1.5112800598144531, "log_odds_ratio": -0.5120108723640442, "logits/chosen": -1.0020774602890015, "logits/rejected": -1.0718960762023926, "logps/chosen": -0.955682635307312, "logps/rejected": -2.361703872680664, "loss": 1.115, "nll_loss": 1.398589849472046, "rewards/accuracies": 0.5, "rewards/chosen": -0.09556826949119568, "rewards/margins": 0.14060211181640625, "rewards/rejected": -0.23617038130760193, "step": 7340 }, { "epoch": 4.478267500381272, "grad_norm": 1.5902392864227295, "learning_rate": 8.367421922841396e-07, "log_odds_chosen": 2.667168617248535, "log_odds_ratio": -0.653615415096283, "logits/chosen": -1.029479742050171, "logits/rejected": -0.9990371465682983, "logps/chosen": -0.7783706188201904, "logps/rejected": -3.3271899223327637, "loss": 0.9739, "nll_loss": 0.9763628244400024, "rewards/accuracies": 0.5, "rewards/chosen": -0.07783706486225128, "rewards/margins": 0.25488194823265076, "rewards/rejected": -0.33271899819374084, "step": 7341 }, { "epoch": 4.478877535458289, "grad_norm": 2.140643835067749, "learning_rate": 8.357624004898959e-07, "log_odds_chosen": 0.47666922211647034, "log_odds_ratio": -0.5840233564376831, "logits/chosen": -0.774867594242096, "logits/rejected": -0.9202399253845215, "logps/chosen": -0.7908141613006592, "logps/rejected": -1.1274137496948242, "loss": 0.9618, "nll_loss": 0.8945866227149963, "rewards/accuracies": 0.75, "rewards/chosen": -0.07908141613006592, "rewards/margins": 0.03365997225046158, "rewards/rejected": -0.1127413809299469, "step": 7342 }, { "epoch": 4.479487570535306, "grad_norm": 1.3791605234146118, "learning_rate": 8.347826086956521e-07, "log_odds_chosen": 3.7113747596740723, "log_odds_ratio": -0.39812684059143066, "logits/chosen": -0.8903038501739502, "logits/rejected": -1.1028032302856445, "logps/chosen": -0.7503711581230164, "logps/rejected": -3.8730318546295166, "loss": 1.0653, "nll_loss": 0.984717845916748, "rewards/accuracies": 0.75, "rewards/chosen": -0.07503712922334671, "rewards/margins": 0.3122660517692566, "rewards/rejected": -0.3873032033443451, "step": 7343 }, { "epoch": 4.480097605612323, "grad_norm": 1.4643055200576782, "learning_rate": 8.338028169014085e-07, "log_odds_chosen": 0.20995575189590454, "log_odds_ratio": -0.6473522782325745, "logits/chosen": -0.6537251472473145, "logits/rejected": -0.7976682782173157, "logps/chosen": -0.8002055287361145, "logps/rejected": -0.959709644317627, "loss": 0.9694, "nll_loss": 1.0164813995361328, "rewards/accuracies": 0.5, "rewards/chosen": -0.08002055436372757, "rewards/margins": 0.015950413420796394, "rewards/rejected": -0.09597097337245941, "step": 7344 }, { "epoch": 4.48070764068934, "grad_norm": 2.0329954624176025, "learning_rate": 8.328230251071648e-07, "log_odds_chosen": 2.165891647338867, "log_odds_ratio": -0.5292129516601562, "logits/chosen": -1.0140106678009033, "logits/rejected": -1.1515268087387085, "logps/chosen": -1.0232033729553223, "logps/rejected": -3.0628671646118164, "loss": 1.1834, "nll_loss": 1.2476154565811157, "rewards/accuracies": 0.375, "rewards/chosen": -0.1023203432559967, "rewards/margins": 0.20396637916564941, "rewards/rejected": -0.3062867522239685, "step": 7345 }, { "epoch": 4.481317675766356, "grad_norm": 4.381892681121826, "learning_rate": 8.318432333129209e-07, "log_odds_chosen": 1.9899520874023438, "log_odds_ratio": -0.49199041724205017, "logits/chosen": -1.0016236305236816, "logits/rejected": -1.0792198181152344, "logps/chosen": -1.0897070169448853, "logps/rejected": -2.925528049468994, "loss": 0.9772, "nll_loss": 1.1463801860809326, "rewards/accuracies": 0.625, "rewards/chosen": -0.10897070169448853, "rewards/margins": 0.1835821121931076, "rewards/rejected": -0.29255279898643494, "step": 7346 }, { "epoch": 4.481927710843373, "grad_norm": 1.628504991531372, "learning_rate": 8.308634415186772e-07, "log_odds_chosen": 3.119462251663208, "log_odds_ratio": -0.2224797159433365, "logits/chosen": -0.8802862167358398, "logits/rejected": -1.0381369590759277, "logps/chosen": -0.7289295196533203, "logps/rejected": -3.1907379627227783, "loss": 1.0935, "nll_loss": 0.961788535118103, "rewards/accuracies": 0.875, "rewards/chosen": -0.07289295643568039, "rewards/margins": 0.24618083238601685, "rewards/rejected": -0.31907379627227783, "step": 7347 }, { "epoch": 4.48253774592039, "grad_norm": 3.4982998371124268, "learning_rate": 8.298836497244335e-07, "log_odds_chosen": 1.939791202545166, "log_odds_ratio": -0.5678019523620605, "logits/chosen": -0.9788472652435303, "logits/rejected": -0.9932087659835815, "logps/chosen": -0.9027696251869202, "logps/rejected": -2.37443470954895, "loss": 1.1368, "nll_loss": 1.1480724811553955, "rewards/accuracies": 0.5, "rewards/chosen": -0.09027695655822754, "rewards/margins": 0.14716652035713196, "rewards/rejected": -0.23744350671768188, "step": 7348 }, { "epoch": 4.483147780997407, "grad_norm": 5.132514953613281, "learning_rate": 8.289038579301898e-07, "log_odds_chosen": 1.3712716102600098, "log_odds_ratio": -0.48521581292152405, "logits/chosen": -0.7571182250976562, "logits/rejected": -0.8065653443336487, "logps/chosen": -0.7964106798171997, "logps/rejected": -1.8242051601409912, "loss": 0.9821, "nll_loss": 0.9090561270713806, "rewards/accuracies": 0.75, "rewards/chosen": -0.07964107394218445, "rewards/margins": 0.10277942568063736, "rewards/rejected": -0.1824204921722412, "step": 7349 }, { "epoch": 4.483757816074425, "grad_norm": 4.5678019523620605, "learning_rate": 8.279240661359461e-07, "log_odds_chosen": 2.991419792175293, "log_odds_ratio": -0.2773193120956421, "logits/chosen": -0.9417076110839844, "logits/rejected": -1.0429646968841553, "logps/chosen": -0.6406999230384827, "logps/rejected": -3.109959125518799, "loss": 0.8433, "nll_loss": 0.8212791681289673, "rewards/accuracies": 0.875, "rewards/chosen": -0.06406999379396439, "rewards/margins": 0.24692592024803162, "rewards/rejected": -0.3109959065914154, "step": 7350 }, { "epoch": 4.4843678511514415, "grad_norm": 5.199321269989014, "learning_rate": 8.269442743417024e-07, "log_odds_chosen": 2.445732831954956, "log_odds_ratio": -0.5758165717124939, "logits/chosen": -0.9351984262466431, "logits/rejected": -1.0611572265625, "logps/chosen": -1.0692871809005737, "logps/rejected": -3.162496328353882, "loss": 1.0212, "nll_loss": 1.2180354595184326, "rewards/accuracies": 0.75, "rewards/chosen": -0.10692871361970901, "rewards/margins": 0.20932093262672424, "rewards/rejected": -0.31624963879585266, "step": 7351 }, { "epoch": 4.484977886228458, "grad_norm": 1.4651991128921509, "learning_rate": 8.259644825474587e-07, "log_odds_chosen": 1.896855354309082, "log_odds_ratio": -0.44041597843170166, "logits/chosen": -1.0111751556396484, "logits/rejected": -1.0700995922088623, "logps/chosen": -0.8837985992431641, "logps/rejected": -2.3257312774658203, "loss": 1.0411, "nll_loss": 1.2122890949249268, "rewards/accuracies": 0.875, "rewards/chosen": -0.0883798599243164, "rewards/margins": 0.14419324696063995, "rewards/rejected": -0.23257310688495636, "step": 7352 }, { "epoch": 4.485587921305475, "grad_norm": 1.1078590154647827, "learning_rate": 8.249846907532148e-07, "log_odds_chosen": 3.23593807220459, "log_odds_ratio": -0.3486342132091522, "logits/chosen": -1.1191613674163818, "logits/rejected": -1.3017369508743286, "logps/chosen": -0.7683626413345337, "logps/rejected": -3.597559928894043, "loss": 1.1723, "nll_loss": 1.4384963512420654, "rewards/accuracies": 0.875, "rewards/chosen": -0.07683626562356949, "rewards/margins": 0.2829197347164154, "rewards/rejected": -0.3597559928894043, "step": 7353 }, { "epoch": 4.486197956382492, "grad_norm": 1.3077902793884277, "learning_rate": 8.240048989589712e-07, "log_odds_chosen": 2.4038846492767334, "log_odds_ratio": -0.3277166187763214, "logits/chosen": -0.9367495775222778, "logits/rejected": -1.1810699701309204, "logps/chosen": -0.8258388638496399, "logps/rejected": -2.689763307571411, "loss": 1.1162, "nll_loss": 1.0764374732971191, "rewards/accuracies": 0.875, "rewards/chosen": -0.08258388936519623, "rewards/margins": 0.18639245629310608, "rewards/rejected": -0.2689763307571411, "step": 7354 }, { "epoch": 4.486807991459509, "grad_norm": 1.3979662656784058, "learning_rate": 8.230251071647275e-07, "log_odds_chosen": 0.9864583015441895, "log_odds_ratio": -0.41716668009757996, "logits/chosen": -1.0050368309020996, "logits/rejected": -1.040366291999817, "logps/chosen": -0.8520076274871826, "logps/rejected": -1.5790544748306274, "loss": 1.1042, "nll_loss": 1.2182060480117798, "rewards/accuracies": 0.75, "rewards/chosen": -0.08520075678825378, "rewards/margins": 0.07270468771457672, "rewards/rejected": -0.1579054594039917, "step": 7355 }, { "epoch": 4.487418026536526, "grad_norm": 2.4505834579467773, "learning_rate": 8.220453153704837e-07, "log_odds_chosen": 4.080050468444824, "log_odds_ratio": -0.1430751383304596, "logits/chosen": -0.6094552874565125, "logits/rejected": -0.9528887271881104, "logps/chosen": -0.842659592628479, "logps/rejected": -4.229633808135986, "loss": 0.9394, "nll_loss": 0.8067371845245361, "rewards/accuracies": 0.875, "rewards/chosen": -0.08426594734191895, "rewards/margins": 0.3386974334716797, "rewards/rejected": -0.42296338081359863, "step": 7356 }, { "epoch": 4.488028061613543, "grad_norm": 3.724644899368286, "learning_rate": 8.2106552357624e-07, "log_odds_chosen": 1.624928593635559, "log_odds_ratio": -0.3931092619895935, "logits/chosen": -0.7465848922729492, "logits/rejected": -0.9721271991729736, "logps/chosen": -0.7402251958847046, "logps/rejected": -1.8297667503356934, "loss": 0.8572, "nll_loss": 0.7792998552322388, "rewards/accuracies": 0.875, "rewards/chosen": -0.07402251660823822, "rewards/margins": 0.10895416140556335, "rewards/rejected": -0.18297667801380157, "step": 7357 }, { "epoch": 4.4886380966905595, "grad_norm": 1.0601489543914795, "learning_rate": 8.200857317819963e-07, "log_odds_chosen": 1.8651148080825806, "log_odds_ratio": -0.3619619607925415, "logits/chosen": -1.020580768585205, "logits/rejected": -1.148630976676941, "logps/chosen": -0.9406178593635559, "logps/rejected": -2.5403120517730713, "loss": 1.0924, "nll_loss": 1.2388540506362915, "rewards/accuracies": 0.875, "rewards/chosen": -0.09406179189682007, "rewards/margins": 0.15996943414211273, "rewards/rejected": -0.254031240940094, "step": 7358 }, { "epoch": 4.489248131767576, "grad_norm": 1.671683430671692, "learning_rate": 8.191059399877526e-07, "log_odds_chosen": 2.1756041049957275, "log_odds_ratio": -0.43999895453453064, "logits/chosen": -0.8807144165039062, "logits/rejected": -0.9107118248939514, "logps/chosen": -1.1076552867889404, "logps/rejected": -2.9477148056030273, "loss": 1.1302, "nll_loss": 1.0519579648971558, "rewards/accuracies": 0.625, "rewards/chosen": -0.11076553165912628, "rewards/margins": 0.1840059459209442, "rewards/rejected": -0.2947714924812317, "step": 7359 }, { "epoch": 4.489858166844593, "grad_norm": 2.4120848178863525, "learning_rate": 8.181261481935088e-07, "log_odds_chosen": 1.783801794052124, "log_odds_ratio": -0.4756685495376587, "logits/chosen": -0.7401595711708069, "logits/rejected": -1.0174176692962646, "logps/chosen": -0.7003558874130249, "logps/rejected": -2.016535997390747, "loss": 0.9786, "nll_loss": 0.8687793612480164, "rewards/accuracies": 0.5, "rewards/chosen": -0.07003559172153473, "rewards/margins": 0.13161800801753998, "rewards/rejected": -0.2016535997390747, "step": 7360 }, { "epoch": 4.490468201921611, "grad_norm": 3.8289599418640137, "learning_rate": 8.171463563992651e-07, "log_odds_chosen": 1.6510016918182373, "log_odds_ratio": -0.3631780743598938, "logits/chosen": -1.0821776390075684, "logits/rejected": -1.196254014968872, "logps/chosen": -1.27168869972229, "logps/rejected": -2.601095199584961, "loss": 1.2206, "nll_loss": 1.4737653732299805, "rewards/accuracies": 0.75, "rewards/chosen": -0.12716886401176453, "rewards/margins": 0.13294067978858948, "rewards/rejected": -0.260109543800354, "step": 7361 }, { "epoch": 4.491078236998628, "grad_norm": 1.076985239982605, "learning_rate": 8.161665646050214e-07, "log_odds_chosen": 1.254558801651001, "log_odds_ratio": -0.5099833607673645, "logits/chosen": -1.023016095161438, "logits/rejected": -1.0087671279907227, "logps/chosen": -0.7621167302131653, "logps/rejected": -1.8332546949386597, "loss": 0.9572, "nll_loss": 1.0104708671569824, "rewards/accuracies": 0.75, "rewards/chosen": -0.07621167600154877, "rewards/margins": 0.1071137934923172, "rewards/rejected": -0.18332546949386597, "step": 7362 }, { "epoch": 4.491688272075645, "grad_norm": 6.136900901794434, "learning_rate": 8.151867728107776e-07, "log_odds_chosen": 0.9824250340461731, "log_odds_ratio": -0.37791907787323, "logits/chosen": -1.0393424034118652, "logits/rejected": -0.9980953335762024, "logps/chosen": -0.8265675902366638, "logps/rejected": -1.363457202911377, "loss": 1.0698, "nll_loss": 1.091482162475586, "rewards/accuracies": 0.875, "rewards/chosen": -0.08265675604343414, "rewards/margins": 0.053688958287239075, "rewards/rejected": -0.1363457292318344, "step": 7363 }, { "epoch": 4.492298307152661, "grad_norm": 1.6702601909637451, "learning_rate": 8.14206981016534e-07, "log_odds_chosen": 1.2327977418899536, "log_odds_ratio": -0.5459111332893372, "logits/chosen": -0.9320844411849976, "logits/rejected": -1.1287541389465332, "logps/chosen": -1.1167993545532227, "logps/rejected": -2.1329734325408936, "loss": 1.1538, "nll_loss": 1.268754005432129, "rewards/accuracies": 0.75, "rewards/chosen": -0.11167994141578674, "rewards/margins": 0.10161739587783813, "rewards/rejected": -0.21329733729362488, "step": 7364 }, { "epoch": 4.492908342229678, "grad_norm": 1.71885085105896, "learning_rate": 8.132271892222903e-07, "log_odds_chosen": 3.200305461883545, "log_odds_ratio": -0.21952897310256958, "logits/chosen": -0.8352646827697754, "logits/rejected": -1.1228803396224976, "logps/chosen": -0.64288729429245, "logps/rejected": -3.2097885608673096, "loss": 0.8817, "nll_loss": 0.7698461413383484, "rewards/accuracies": 1.0, "rewards/chosen": -0.06428872048854828, "rewards/margins": 0.2566901445388794, "rewards/rejected": -0.3209788501262665, "step": 7365 }, { "epoch": 4.493518377306695, "grad_norm": 2.0576164722442627, "learning_rate": 8.122473974280465e-07, "log_odds_chosen": 2.5454564094543457, "log_odds_ratio": -0.33987513184547424, "logits/chosen": -0.8813261985778809, "logits/rejected": -1.0664441585540771, "logps/chosen": -0.7764381170272827, "logps/rejected": -2.6310017108917236, "loss": 1.1215, "nll_loss": 0.9574191570281982, "rewards/accuracies": 0.75, "rewards/chosen": -0.07764381170272827, "rewards/margins": 0.18545636534690857, "rewards/rejected": -0.26310014724731445, "step": 7366 }, { "epoch": 4.494128412383712, "grad_norm": 2.098799705505371, "learning_rate": 8.112676056338028e-07, "log_odds_chosen": 1.083631992340088, "log_odds_ratio": -0.5929596424102783, "logits/chosen": -1.0189622640609741, "logits/rejected": -0.9899231791496277, "logps/chosen": -0.861204206943512, "logps/rejected": -1.7661854028701782, "loss": 0.9846, "nll_loss": 0.9644966125488281, "rewards/accuracies": 0.5, "rewards/chosen": -0.08612041920423508, "rewards/margins": 0.09049811214208603, "rewards/rejected": -0.1766185462474823, "step": 7367 }, { "epoch": 4.494738447460729, "grad_norm": 1.2747656106948853, "learning_rate": 8.10287813839559e-07, "log_odds_chosen": 2.475942850112915, "log_odds_ratio": -0.3588617444038391, "logits/chosen": -0.7497603893280029, "logits/rejected": -0.9901974201202393, "logps/chosen": -0.7549852132797241, "logps/rejected": -2.711350202560425, "loss": 1.2019, "nll_loss": 1.1725825071334839, "rewards/accuracies": 0.75, "rewards/chosen": -0.07549852132797241, "rewards/margins": 0.19563652575016022, "rewards/rejected": -0.27113503217697144, "step": 7368 }, { "epoch": 4.495348482537746, "grad_norm": 1.6251479387283325, "learning_rate": 8.093080220453153e-07, "log_odds_chosen": 3.723780632019043, "log_odds_ratio": -0.36992859840393066, "logits/chosen": -0.8363853096961975, "logits/rejected": -1.1383981704711914, "logps/chosen": -0.7903311848640442, "logps/rejected": -4.082878589630127, "loss": 1.0451, "nll_loss": 1.0980656147003174, "rewards/accuracies": 0.75, "rewards/chosen": -0.07903312146663666, "rewards/margins": 0.32925474643707275, "rewards/rejected": -0.4082878828048706, "step": 7369 }, { "epoch": 4.4959585176147625, "grad_norm": 2.2708640098571777, "learning_rate": 8.083282302510717e-07, "log_odds_chosen": 4.459745407104492, "log_odds_ratio": -0.27901023626327515, "logits/chosen": -1.1171369552612305, "logits/rejected": -1.2851858139038086, "logps/chosen": -0.9797042608261108, "logps/rejected": -4.965798854827881, "loss": 1.2923, "nll_loss": 1.2359226942062378, "rewards/accuracies": 0.75, "rewards/chosen": -0.09797042608261108, "rewards/margins": 0.398609459400177, "rewards/rejected": -0.4965799152851105, "step": 7370 }, { "epoch": 4.49656855269178, "grad_norm": 20.325023651123047, "learning_rate": 8.073484384568279e-07, "log_odds_chosen": 3.8880996704101562, "log_odds_ratio": -0.22700628638267517, "logits/chosen": -0.8844614028930664, "logits/rejected": -1.0198619365692139, "logps/chosen": -0.6545585989952087, "logps/rejected": -3.76094126701355, "loss": 1.0084, "nll_loss": 0.8688439130783081, "rewards/accuracies": 0.875, "rewards/chosen": -0.065455861389637, "rewards/margins": 0.3106382489204407, "rewards/rejected": -0.37609413266181946, "step": 7371 }, { "epoch": 4.497178587768797, "grad_norm": 1.812696099281311, "learning_rate": 8.063686466625842e-07, "log_odds_chosen": 1.5838561058044434, "log_odds_ratio": -0.5165071487426758, "logits/chosen": -0.9463582038879395, "logits/rejected": -1.0343725681304932, "logps/chosen": -0.7679963111877441, "logps/rejected": -1.8321144580841064, "loss": 1.0106, "nll_loss": 0.9302959442138672, "rewards/accuracies": 0.75, "rewards/chosen": -0.07679963856935501, "rewards/margins": 0.10641179978847504, "rewards/rejected": -0.18321144580841064, "step": 7372 }, { "epoch": 4.497788622845814, "grad_norm": 1.3001846075057983, "learning_rate": 8.053888548683404e-07, "log_odds_chosen": 1.9945660829544067, "log_odds_ratio": -0.43546608090400696, "logits/chosen": -0.9093871116638184, "logits/rejected": -1.028515338897705, "logps/chosen": -0.7622472047805786, "logps/rejected": -2.3758208751678467, "loss": 0.8269, "nll_loss": 0.9347456693649292, "rewards/accuracies": 0.75, "rewards/chosen": -0.07622472941875458, "rewards/margins": 0.16135737299919128, "rewards/rejected": -0.23758211731910706, "step": 7373 }, { "epoch": 4.498398657922831, "grad_norm": 1.6182998418807983, "learning_rate": 8.044090630740968e-07, "log_odds_chosen": 2.480583667755127, "log_odds_ratio": -0.21190880239009857, "logits/chosen": -0.5426859259605408, "logits/rejected": -0.8171085119247437, "logps/chosen": -0.4455585777759552, "logps/rejected": -1.9558979272842407, "loss": 0.8563, "nll_loss": 0.5361454486846924, "rewards/accuracies": 0.875, "rewards/chosen": -0.04455585777759552, "rewards/margins": 0.1510339379310608, "rewards/rejected": -0.19558978080749512, "step": 7374 }, { "epoch": 4.499008692999848, "grad_norm": 3.8420352935791016, "learning_rate": 8.03429271279853e-07, "log_odds_chosen": 2.154348373413086, "log_odds_ratio": -0.27294886112213135, "logits/chosen": -0.6972976326942444, "logits/rejected": -0.9009473323822021, "logps/chosen": -0.7337926626205444, "logps/rejected": -2.438181161880493, "loss": 0.9855, "nll_loss": 0.9610468745231628, "rewards/accuracies": 0.875, "rewards/chosen": -0.0733792632818222, "rewards/margins": 0.17043885588645935, "rewards/rejected": -0.24381811916828156, "step": 7375 }, { "epoch": 4.4996187280768645, "grad_norm": 1.4273539781570435, "learning_rate": 8.024494794856092e-07, "log_odds_chosen": 3.479069709777832, "log_odds_ratio": -0.48178738355636597, "logits/chosen": -0.7124328017234802, "logits/rejected": -0.7445674538612366, "logps/chosen": -0.7252076268196106, "logps/rejected": -3.8229379653930664, "loss": 0.9532, "nll_loss": 0.8363421559333801, "rewards/accuracies": 0.875, "rewards/chosen": -0.07252076268196106, "rewards/margins": 0.3097730278968811, "rewards/rejected": -0.38229379057884216, "step": 7376 }, { "epoch": 4.500228763153881, "grad_norm": 1.6090174913406372, "learning_rate": 8.014696876913656e-07, "log_odds_chosen": 2.2036681175231934, "log_odds_ratio": -0.27505922317504883, "logits/chosen": -0.7842744588851929, "logits/rejected": -0.9701752662658691, "logps/chosen": -0.7125924229621887, "logps/rejected": -2.3304712772369385, "loss": 1.0349, "nll_loss": 0.900757908821106, "rewards/accuracies": 0.875, "rewards/chosen": -0.07125924527645111, "rewards/margins": 0.16178786754608154, "rewards/rejected": -0.23304711282253265, "step": 7377 }, { "epoch": 4.500838798230898, "grad_norm": 10.146050453186035, "learning_rate": 8.004898958971218e-07, "log_odds_chosen": 3.4206631183624268, "log_odds_ratio": -0.2862021028995514, "logits/chosen": -0.9822778701782227, "logits/rejected": -1.0249916315078735, "logps/chosen": -0.7682589292526245, "logps/rejected": -3.437897205352783, "loss": 1.1427, "nll_loss": 1.3496556282043457, "rewards/accuracies": 0.75, "rewards/chosen": -0.07682588696479797, "rewards/margins": 0.2669638395309448, "rewards/rejected": -0.3437896966934204, "step": 7378 }, { "epoch": 4.501448833307915, "grad_norm": 9.40760326385498, "learning_rate": 7.995101041028781e-07, "log_odds_chosen": 1.9266705513000488, "log_odds_ratio": -0.18796291947364807, "logits/chosen": -0.8566582202911377, "logits/rejected": -1.0172876119613647, "logps/chosen": -0.7741307020187378, "logps/rejected": -2.138787031173706, "loss": 0.8879, "nll_loss": 0.9886775612831116, "rewards/accuracies": 1.0, "rewards/chosen": -0.07741308212280273, "rewards/margins": 0.1364656239748001, "rewards/rejected": -0.21387872099876404, "step": 7379 }, { "epoch": 4.502058868384932, "grad_norm": 15.761381149291992, "learning_rate": 7.985303123086344e-07, "log_odds_chosen": 1.226504921913147, "log_odds_ratio": -0.5625178217887878, "logits/chosen": -0.9448159337043762, "logits/rejected": -0.9744937419891357, "logps/chosen": -0.8876262903213501, "logps/rejected": -1.9123479127883911, "loss": 1.045, "nll_loss": 0.9831982851028442, "rewards/accuracies": 0.625, "rewards/chosen": -0.08876262605190277, "rewards/margins": 0.10247217118740082, "rewards/rejected": -0.1912347972393036, "step": 7380 }, { "epoch": 4.502668903461949, "grad_norm": 10.200254440307617, "learning_rate": 7.975505205143907e-07, "log_odds_chosen": 4.753730773925781, "log_odds_ratio": -0.04399573430418968, "logits/chosen": -0.9304875135421753, "logits/rejected": -1.0843236446380615, "logps/chosen": -0.796042799949646, "logps/rejected": -4.791473865509033, "loss": 1.0746, "nll_loss": 0.897118330001831, "rewards/accuracies": 1.0, "rewards/chosen": -0.07960428297519684, "rewards/margins": 0.39954307675361633, "rewards/rejected": -0.47914737462997437, "step": 7381 }, { "epoch": 4.5032789385389655, "grad_norm": 8.471023559570312, "learning_rate": 7.965707287201469e-07, "log_odds_chosen": 1.94340980052948, "log_odds_ratio": -0.39724794030189514, "logits/chosen": -0.9373723864555359, "logits/rejected": -0.96451336145401, "logps/chosen": -0.7203623652458191, "logps/rejected": -2.237558364868164, "loss": 1.1088, "nll_loss": 1.1599597930908203, "rewards/accuracies": 0.75, "rewards/chosen": -0.07203623652458191, "rewards/margins": 0.15171962976455688, "rewards/rejected": -0.2237558662891388, "step": 7382 }, { "epoch": 4.503888973615983, "grad_norm": 17.163362503051758, "learning_rate": 7.955909369259031e-07, "log_odds_chosen": 3.3757190704345703, "log_odds_ratio": -0.406770259141922, "logits/chosen": -0.8808373808860779, "logits/rejected": -1.09992516040802, "logps/chosen": -0.7796961665153503, "logps/rejected": -3.514540672302246, "loss": 1.0031, "nll_loss": 0.893755316734314, "rewards/accuracies": 0.875, "rewards/chosen": -0.07796961814165115, "rewards/margins": 0.27348440885543823, "rewards/rejected": -0.3514540195465088, "step": 7383 }, { "epoch": 4.504499008693, "grad_norm": 2.567375659942627, "learning_rate": 7.946111451316595e-07, "log_odds_chosen": 1.7464709281921387, "log_odds_ratio": -0.3027900755405426, "logits/chosen": -0.919157087802887, "logits/rejected": -0.9272060394287109, "logps/chosen": -0.6334505081176758, "logps/rejected": -1.8520654439926147, "loss": 1.0215, "nll_loss": 0.957432210445404, "rewards/accuracies": 0.875, "rewards/chosen": -0.0633450448513031, "rewards/margins": 0.12186149507761002, "rewards/rejected": -0.18520653247833252, "step": 7384 }, { "epoch": 4.505109043770017, "grad_norm": 4.613184452056885, "learning_rate": 7.936313533374157e-07, "log_odds_chosen": 3.3943705558776855, "log_odds_ratio": -0.136043518781662, "logits/chosen": -0.5754832625389099, "logits/rejected": -0.9567487239837646, "logps/chosen": -0.5104881525039673, "logps/rejected": -3.102912425994873, "loss": 1.0108, "nll_loss": 0.8046351671218872, "rewards/accuracies": 1.0, "rewards/chosen": -0.05104881897568703, "rewards/margins": 0.2592424154281616, "rewards/rejected": -0.31029123067855835, "step": 7385 }, { "epoch": 4.505719078847034, "grad_norm": 1.6452192068099976, "learning_rate": 7.92651561543172e-07, "log_odds_chosen": 2.331955909729004, "log_odds_ratio": -0.3019859194755554, "logits/chosen": -0.9890130758285522, "logits/rejected": -1.1265895366668701, "logps/chosen": -0.7791608572006226, "logps/rejected": -2.6673429012298584, "loss": 1.0398, "nll_loss": 1.0847991704940796, "rewards/accuracies": 0.875, "rewards/chosen": -0.07791608572006226, "rewards/margins": 0.18881821632385254, "rewards/rejected": -0.2667343020439148, "step": 7386 }, { "epoch": 4.506329113924051, "grad_norm": 1.758191466331482, "learning_rate": 7.916717697489284e-07, "log_odds_chosen": 3.8471462726593018, "log_odds_ratio": -0.1505350023508072, "logits/chosen": -0.7667597532272339, "logits/rejected": -0.9775234460830688, "logps/chosen": -0.6506379246711731, "logps/rejected": -3.802446126937866, "loss": 1.0528, "nll_loss": 0.8704042434692383, "rewards/accuracies": 1.0, "rewards/chosen": -0.06506379693746567, "rewards/margins": 0.31518080830574036, "rewards/rejected": -0.38024458289146423, "step": 7387 }, { "epoch": 4.5069391490010675, "grad_norm": 1.562941074371338, "learning_rate": 7.906919779546846e-07, "log_odds_chosen": 1.2486209869384766, "log_odds_ratio": -0.43688321113586426, "logits/chosen": -1.0646288394927979, "logits/rejected": -1.1504820585250854, "logps/chosen": -0.9633076190948486, "logps/rejected": -1.8127559423446655, "loss": 1.2219, "nll_loss": 1.2721318006515503, "rewards/accuracies": 0.625, "rewards/chosen": -0.09633076190948486, "rewards/margins": 0.08494483679533005, "rewards/rejected": -0.1812756061553955, "step": 7388 }, { "epoch": 4.507549184078084, "grad_norm": 2.8286144733428955, "learning_rate": 7.897121861604408e-07, "log_odds_chosen": 0.7348175644874573, "log_odds_ratio": -0.5167421102523804, "logits/chosen": -1.0660021305084229, "logits/rejected": -0.9968708753585815, "logps/chosen": -0.8238992691040039, "logps/rejected": -1.382514238357544, "loss": 1.0809, "nll_loss": 0.9614373445510864, "rewards/accuracies": 0.75, "rewards/chosen": -0.08238992094993591, "rewards/margins": 0.05586150661110878, "rewards/rejected": -0.1382514238357544, "step": 7389 }, { "epoch": 4.508159219155101, "grad_norm": 1.1486748456954956, "learning_rate": 7.887323943661971e-07, "log_odds_chosen": 2.1801490783691406, "log_odds_ratio": -0.261588454246521, "logits/chosen": -0.8932384252548218, "logits/rejected": -0.9479142427444458, "logps/chosen": -0.6735962629318237, "logps/rejected": -2.3192620277404785, "loss": 1.0996, "nll_loss": 1.1667823791503906, "rewards/accuracies": 0.875, "rewards/chosen": -0.06735963374376297, "rewards/margins": 0.16456657648086548, "rewards/rejected": -0.23192620277404785, "step": 7390 }, { "epoch": 4.508769254232118, "grad_norm": 11.72440242767334, "learning_rate": 7.877526025719534e-07, "log_odds_chosen": 1.4071720838546753, "log_odds_ratio": -0.4260481595993042, "logits/chosen": -0.8983219861984253, "logits/rejected": -0.8601440787315369, "logps/chosen": -1.0231761932373047, "logps/rejected": -2.0986995697021484, "loss": 1.1086, "nll_loss": 0.999137282371521, "rewards/accuracies": 0.75, "rewards/chosen": -0.10231760889291763, "rewards/margins": 0.10755234956741333, "rewards/rejected": -0.20986996591091156, "step": 7391 }, { "epoch": 4.509379289309135, "grad_norm": 11.009038925170898, "learning_rate": 7.867728107777097e-07, "log_odds_chosen": 5.124856948852539, "log_odds_ratio": -0.34763237833976746, "logits/chosen": -0.9182462692260742, "logits/rejected": -1.1034891605377197, "logps/chosen": -0.7825952172279358, "logps/rejected": -5.321020126342773, "loss": 1.191, "nll_loss": 1.123409628868103, "rewards/accuracies": 0.75, "rewards/chosen": -0.07825952023267746, "rewards/margins": 0.4538424611091614, "rewards/rejected": -0.5321019887924194, "step": 7392 }, { "epoch": 4.509989324386153, "grad_norm": 1.943215250968933, "learning_rate": 7.85793018983466e-07, "log_odds_chosen": 0.692981481552124, "log_odds_ratio": -0.5878000855445862, "logits/chosen": -1.115156888961792, "logits/rejected": -0.9976829886436462, "logps/chosen": -0.8475034236907959, "logps/rejected": -1.2920701503753662, "loss": 1.161, "nll_loss": 1.1461706161499023, "rewards/accuracies": 0.625, "rewards/chosen": -0.08475034683942795, "rewards/margins": 0.04445668309926987, "rewards/rejected": -0.12920701503753662, "step": 7393 }, { "epoch": 4.5105993594631695, "grad_norm": 3.79453706741333, "learning_rate": 7.848132271892223e-07, "log_odds_chosen": 1.7804282903671265, "log_odds_ratio": -0.4296497106552124, "logits/chosen": -0.9468863010406494, "logits/rejected": -1.0766091346740723, "logps/chosen": -0.7199711799621582, "logps/rejected": -2.140162467956543, "loss": 1.1127, "nll_loss": 1.0907869338989258, "rewards/accuracies": 0.875, "rewards/chosen": -0.07199712097644806, "rewards/margins": 0.1420191377401352, "rewards/rejected": -0.21401625871658325, "step": 7394 }, { "epoch": 4.511209394540186, "grad_norm": 1.1639941930770874, "learning_rate": 7.838334353949785e-07, "log_odds_chosen": 1.7698767185211182, "log_odds_ratio": -0.4801546633243561, "logits/chosen": -0.660607635974884, "logits/rejected": -0.758525013923645, "logps/chosen": -0.8592677116394043, "logps/rejected": -2.291013240814209, "loss": 0.9691, "nll_loss": 0.9197076559066772, "rewards/accuracies": 0.75, "rewards/chosen": -0.08592677116394043, "rewards/margins": 0.14317455887794495, "rewards/rejected": -0.22910133004188538, "step": 7395 }, { "epoch": 4.511819429617203, "grad_norm": 7.629388809204102, "learning_rate": 7.828536436007348e-07, "log_odds_chosen": 2.5364456176757812, "log_odds_ratio": -0.4931543171405792, "logits/chosen": -0.9520072937011719, "logits/rejected": -1.1843029260635376, "logps/chosen": -0.8506815433502197, "logps/rejected": -3.0179696083068848, "loss": 0.8747, "nll_loss": 0.8709985017776489, "rewards/accuracies": 0.75, "rewards/chosen": -0.08506815135478973, "rewards/margins": 0.2167287915945053, "rewards/rejected": -0.30179697275161743, "step": 7396 }, { "epoch": 4.51242946469422, "grad_norm": 4.1819539070129395, "learning_rate": 7.818738518064911e-07, "log_odds_chosen": 2.3640897274017334, "log_odds_ratio": -0.3133440613746643, "logits/chosen": -0.9045936465263367, "logits/rejected": -1.003587007522583, "logps/chosen": -0.7080422639846802, "logps/rejected": -2.3860301971435547, "loss": 1.1079, "nll_loss": 0.8883101940155029, "rewards/accuracies": 0.75, "rewards/chosen": -0.07080423086881638, "rewards/margins": 0.16779881715774536, "rewards/rejected": -0.23860302567481995, "step": 7397 }, { "epoch": 4.513039499771237, "grad_norm": 6.580105304718018, "learning_rate": 7.808940600122473e-07, "log_odds_chosen": 2.397134304046631, "log_odds_ratio": -0.2640746831893921, "logits/chosen": -0.8056305646896362, "logits/rejected": -0.9989949464797974, "logps/chosen": -0.6541019082069397, "logps/rejected": -2.34244441986084, "loss": 0.9556, "nll_loss": 0.8860622048377991, "rewards/accuracies": 0.875, "rewards/chosen": -0.06541018933057785, "rewards/margins": 0.16883426904678345, "rewards/rejected": -0.2342444658279419, "step": 7398 }, { "epoch": 4.513649534848254, "grad_norm": 8.430778503417969, "learning_rate": 7.799142682180036e-07, "log_odds_chosen": 0.9944941997528076, "log_odds_ratio": -0.6914293169975281, "logits/chosen": -0.9696733951568604, "logits/rejected": -1.1239910125732422, "logps/chosen": -0.9195986390113831, "logps/rejected": -1.854884147644043, "loss": 1.1985, "nll_loss": 1.0791138410568237, "rewards/accuracies": 0.375, "rewards/chosen": -0.0919598639011383, "rewards/margins": 0.09352855384349823, "rewards/rejected": -0.18548843264579773, "step": 7399 }, { "epoch": 4.5142595699252706, "grad_norm": 1.8473743200302124, "learning_rate": 7.789344764237599e-07, "log_odds_chosen": 2.772196054458618, "log_odds_ratio": -0.31798475980758667, "logits/chosen": -0.8545535802841187, "logits/rejected": -0.9944726824760437, "logps/chosen": -0.7716860771179199, "logps/rejected": -3.1163289546966553, "loss": 1.1844, "nll_loss": 1.1308908462524414, "rewards/accuracies": 0.75, "rewards/chosen": -0.07716861367225647, "rewards/margins": 0.23446428775787354, "rewards/rejected": -0.3116329312324524, "step": 7400 }, { "epoch": 4.514869605002287, "grad_norm": 1.6748197078704834, "learning_rate": 7.779546846295162e-07, "log_odds_chosen": 2.23567271232605, "log_odds_ratio": -0.25686562061309814, "logits/chosen": -0.6599583625793457, "logits/rejected": -0.8256880044937134, "logps/chosen": -0.6958959102630615, "logps/rejected": -2.4508814811706543, "loss": 1.0661, "nll_loss": 1.055019497871399, "rewards/accuracies": 1.0, "rewards/chosen": -0.06958959251642227, "rewards/margins": 0.17549854516983032, "rewards/rejected": -0.245088130235672, "step": 7401 }, { "epoch": 4.515479640079304, "grad_norm": 2.0458292961120605, "learning_rate": 7.769748928352725e-07, "log_odds_chosen": 1.4901397228240967, "log_odds_ratio": -0.49720269441604614, "logits/chosen": -0.8865702152252197, "logits/rejected": -0.9429314732551575, "logps/chosen": -0.9505157470703125, "logps/rejected": -2.049675464630127, "loss": 0.9778, "nll_loss": 1.0644451379776, "rewards/accuracies": 0.75, "rewards/chosen": -0.09505157172679901, "rewards/margins": 0.10991596430540085, "rewards/rejected": -0.20496755838394165, "step": 7402 }, { "epoch": 4.516089675156321, "grad_norm": 1.999269723892212, "learning_rate": 7.759951010410288e-07, "log_odds_chosen": 1.9932942390441895, "log_odds_ratio": -0.41351011395454407, "logits/chosen": -0.9268134832382202, "logits/rejected": -1.0547763109207153, "logps/chosen": -0.9034388661384583, "logps/rejected": -2.6037793159484863, "loss": 1.0268, "nll_loss": 1.2397528886795044, "rewards/accuracies": 0.75, "rewards/chosen": -0.0903438925743103, "rewards/margins": 0.17003406584262848, "rewards/rejected": -0.2603779435157776, "step": 7403 }, { "epoch": 4.516699710233338, "grad_norm": 2.7498178482055664, "learning_rate": 7.75015309246785e-07, "log_odds_chosen": 4.225893020629883, "log_odds_ratio": -0.34809789061546326, "logits/chosen": -0.9521986246109009, "logits/rejected": -1.0825371742248535, "logps/chosen": -0.6645474433898926, "logps/rejected": -4.425707817077637, "loss": 1.0442, "nll_loss": 0.9922916293144226, "rewards/accuracies": 0.875, "rewards/chosen": -0.06645473837852478, "rewards/margins": 0.3761160373687744, "rewards/rejected": -0.4425708055496216, "step": 7404 }, { "epoch": 4.517309745310356, "grad_norm": 1.3100676536560059, "learning_rate": 7.740355174525412e-07, "log_odds_chosen": 2.282917022705078, "log_odds_ratio": -0.3227147161960602, "logits/chosen": -0.9576468467712402, "logits/rejected": -0.8725309371948242, "logps/chosen": -0.7000494599342346, "logps/rejected": -2.493224620819092, "loss": 0.9216, "nll_loss": 0.780713677406311, "rewards/accuracies": 0.625, "rewards/chosen": -0.07000495493412018, "rewards/margins": 0.17931751906871796, "rewards/rejected": -0.24932247400283813, "step": 7405 }, { "epoch": 4.5179197803873725, "grad_norm": 1.8229929208755493, "learning_rate": 7.730557256582976e-07, "log_odds_chosen": 2.6833746433258057, "log_odds_ratio": -0.36810606718063354, "logits/chosen": -0.8220778107643127, "logits/rejected": -0.9024627208709717, "logps/chosen": -0.7959286570549011, "logps/rejected": -2.9948198795318604, "loss": 1.0487, "nll_loss": 0.971060574054718, "rewards/accuracies": 0.875, "rewards/chosen": -0.07959286868572235, "rewards/margins": 0.21988913416862488, "rewards/rejected": -0.29948198795318604, "step": 7406 }, { "epoch": 4.518529815464389, "grad_norm": 1.4061603546142578, "learning_rate": 7.720759338640539e-07, "log_odds_chosen": 2.3664968013763428, "log_odds_ratio": -0.3649934232234955, "logits/chosen": -0.8852539658546448, "logits/rejected": -0.9227648973464966, "logps/chosen": -0.745879054069519, "logps/rejected": -2.5654478073120117, "loss": 1.0185, "nll_loss": 0.9767687916755676, "rewards/accuracies": 0.75, "rewards/chosen": -0.07458790391683578, "rewards/margins": 0.18195685744285583, "rewards/rejected": -0.2565447688102722, "step": 7407 }, { "epoch": 4.519139850541406, "grad_norm": 2.3737010955810547, "learning_rate": 7.710961420698101e-07, "log_odds_chosen": 4.179265975952148, "log_odds_ratio": -0.21456611156463623, "logits/chosen": -0.6920661330223083, "logits/rejected": -1.1878856420516968, "logps/chosen": -0.7023698091506958, "logps/rejected": -4.20404577255249, "loss": 0.9686, "nll_loss": 0.8473221063613892, "rewards/accuracies": 0.875, "rewards/chosen": -0.07023698091506958, "rewards/margins": 0.3501676321029663, "rewards/rejected": -0.4204046130180359, "step": 7408 }, { "epoch": 4.519749885618423, "grad_norm": 1.9508087635040283, "learning_rate": 7.701163502755665e-07, "log_odds_chosen": 4.528963088989258, "log_odds_ratio": -0.14677110314369202, "logits/chosen": -0.7576912641525269, "logits/rejected": -1.0777511596679688, "logps/chosen": -0.5044384002685547, "logps/rejected": -4.199796676635742, "loss": 1.0255, "nll_loss": 0.9158692955970764, "rewards/accuracies": 1.0, "rewards/chosen": -0.05044384300708771, "rewards/margins": 0.36953580379486084, "rewards/rejected": -0.41997966170310974, "step": 7409 }, { "epoch": 4.52035992069544, "grad_norm": 10.716961860656738, "learning_rate": 7.691365584813227e-07, "log_odds_chosen": 0.44189903140068054, "log_odds_ratio": -0.6002752780914307, "logits/chosen": -1.0691981315612793, "logits/rejected": -1.120591402053833, "logps/chosen": -0.85784512758255, "logps/rejected": -1.1027612686157227, "loss": 0.9341, "nll_loss": 0.9900075197219849, "rewards/accuracies": 0.375, "rewards/chosen": -0.08578451722860336, "rewards/margins": 0.02449161931872368, "rewards/rejected": -0.11027613282203674, "step": 7410 }, { "epoch": 4.520969955772457, "grad_norm": 1.4827293157577515, "learning_rate": 7.681567666870789e-07, "log_odds_chosen": 2.196805477142334, "log_odds_ratio": -0.36912113428115845, "logits/chosen": -0.7886470556259155, "logits/rejected": -0.8180162906646729, "logps/chosen": -0.7284805178642273, "logps/rejected": -2.436394214630127, "loss": 1.001, "nll_loss": 0.9455097913742065, "rewards/accuracies": 0.875, "rewards/chosen": -0.07284805178642273, "rewards/margins": 0.1707913875579834, "rewards/rejected": -0.24363943934440613, "step": 7411 }, { "epoch": 4.521579990849474, "grad_norm": 1.7844372987747192, "learning_rate": 7.671769748928352e-07, "log_odds_chosen": 3.286612033843994, "log_odds_ratio": -0.4060216546058655, "logits/chosen": -0.7831260561943054, "logits/rejected": -1.0752779245376587, "logps/chosen": -0.5770732760429382, "logps/rejected": -3.3003129959106445, "loss": 0.9303, "nll_loss": 0.7094496488571167, "rewards/accuracies": 0.75, "rewards/chosen": -0.05770732834935188, "rewards/margins": 0.27232396602630615, "rewards/rejected": -0.33003127574920654, "step": 7412 }, { "epoch": 4.52219002592649, "grad_norm": 2.143105983734131, "learning_rate": 7.661971830985915e-07, "log_odds_chosen": 0.6595938801765442, "log_odds_ratio": -0.588004469871521, "logits/chosen": -0.8868721127510071, "logits/rejected": -0.9408786296844482, "logps/chosen": -0.8205618262290955, "logps/rejected": -1.2265934944152832, "loss": 1.0808, "nll_loss": 0.9579930305480957, "rewards/accuracies": 0.625, "rewards/chosen": -0.08205617964267731, "rewards/margins": 0.04060317203402519, "rewards/rejected": -0.1226593554019928, "step": 7413 }, { "epoch": 4.522800061003508, "grad_norm": 2.4729936122894287, "learning_rate": 7.652173913043478e-07, "log_odds_chosen": 1.4355849027633667, "log_odds_ratio": -0.4242240786552429, "logits/chosen": -1.0418494939804077, "logits/rejected": -1.0139033794403076, "logps/chosen": -0.8595709800720215, "logps/rejected": -1.9855594635009766, "loss": 1.1722, "nll_loss": 1.3501063585281372, "rewards/accuracies": 0.875, "rewards/chosen": -0.0859571024775505, "rewards/margins": 0.11259885132312775, "rewards/rejected": -0.19855594635009766, "step": 7414 }, { "epoch": 4.523410096080525, "grad_norm": 1.4132964611053467, "learning_rate": 7.64237599510104e-07, "log_odds_chosen": 0.5800950527191162, "log_odds_ratio": -0.6075525283813477, "logits/chosen": -0.9526621699333191, "logits/rejected": -0.9934973120689392, "logps/chosen": -1.010830044746399, "logps/rejected": -1.425421118736267, "loss": 0.931, "nll_loss": 1.0407936573028564, "rewards/accuracies": 0.75, "rewards/chosen": -0.10108301043510437, "rewards/margins": 0.041459113359451294, "rewards/rejected": -0.14254212379455566, "step": 7415 }, { "epoch": 4.524020131157542, "grad_norm": 2.077944755554199, "learning_rate": 7.632578077158604e-07, "log_odds_chosen": 1.5451864004135132, "log_odds_ratio": -0.3278579115867615, "logits/chosen": -0.9023633003234863, "logits/rejected": -0.9721688032150269, "logps/chosen": -0.7187411785125732, "logps/rejected": -1.8160464763641357, "loss": 1.0057, "nll_loss": 0.9060285687446594, "rewards/accuracies": 0.875, "rewards/chosen": -0.07187411189079285, "rewards/margins": 0.1097305417060852, "rewards/rejected": -0.18160466849803925, "step": 7416 }, { "epoch": 4.524630166234559, "grad_norm": 1.3193279504776, "learning_rate": 7.622780159216167e-07, "log_odds_chosen": 4.3436102867126465, "log_odds_ratio": -0.3859099745750427, "logits/chosen": -0.8070544600486755, "logits/rejected": -0.9178362488746643, "logps/chosen": -0.7934544682502747, "logps/rejected": -4.788951873779297, "loss": 1.0865, "nll_loss": 1.1347651481628418, "rewards/accuracies": 0.75, "rewards/chosen": -0.07934544235467911, "rewards/margins": 0.3995497524738312, "rewards/rejected": -0.4788952171802521, "step": 7417 }, { "epoch": 4.525240201311576, "grad_norm": 2.443267345428467, "learning_rate": 7.612982241273728e-07, "log_odds_chosen": 1.949671983718872, "log_odds_ratio": -0.5428216457366943, "logits/chosen": -1.0051865577697754, "logits/rejected": -1.0348131656646729, "logps/chosen": -0.7093073129653931, "logps/rejected": -2.3517022132873535, "loss": 1.0082, "nll_loss": 0.7774859666824341, "rewards/accuracies": 0.75, "rewards/chosen": -0.07093073427677155, "rewards/margins": 0.16423945128917694, "rewards/rejected": -0.23517020046710968, "step": 7418 }, { "epoch": 4.525850236388592, "grad_norm": 1.951363444328308, "learning_rate": 7.603184323331292e-07, "log_odds_chosen": 3.29795241355896, "log_odds_ratio": -0.3182178735733032, "logits/chosen": -0.9130128622055054, "logits/rejected": -0.9808745384216309, "logps/chosen": -0.7220102548599243, "logps/rejected": -3.5249056816101074, "loss": 1.03, "nll_loss": 1.1189730167388916, "rewards/accuracies": 0.875, "rewards/chosen": -0.07220102846622467, "rewards/margins": 0.28028956055641174, "rewards/rejected": -0.3524906039237976, "step": 7419 }, { "epoch": 4.526460271465609, "grad_norm": 1.6152541637420654, "learning_rate": 7.593386405388854e-07, "log_odds_chosen": 1.2176120281219482, "log_odds_ratio": -0.531936526298523, "logits/chosen": -0.7496351599693298, "logits/rejected": -0.8352364301681519, "logps/chosen": -0.6511213779449463, "logps/rejected": -1.4691150188446045, "loss": 0.9177, "nll_loss": 0.8382342457771301, "rewards/accuracies": 0.5, "rewards/chosen": -0.06511213630437851, "rewards/margins": 0.08179935067892075, "rewards/rejected": -0.14691150188446045, "step": 7420 }, { "epoch": 4.527070306542626, "grad_norm": 1.6825270652770996, "learning_rate": 7.583588487446417e-07, "log_odds_chosen": 0.8151441812515259, "log_odds_ratio": -0.5591696500778198, "logits/chosen": -0.9884100556373596, "logits/rejected": -0.9558700323104858, "logps/chosen": -0.8825637102127075, "logps/rejected": -1.5843459367752075, "loss": 1.0374, "nll_loss": 1.1413521766662598, "rewards/accuracies": 0.5, "rewards/chosen": -0.08825637400150299, "rewards/margins": 0.07017822563648224, "rewards/rejected": -0.15843459963798523, "step": 7421 }, { "epoch": 4.527680341619643, "grad_norm": 1.4862303733825684, "learning_rate": 7.573790569503981e-07, "log_odds_chosen": 1.7326022386550903, "log_odds_ratio": -0.3916548490524292, "logits/chosen": -0.7854956984519958, "logits/rejected": -0.8724538087844849, "logps/chosen": -0.6998515129089355, "logps/rejected": -2.0802364349365234, "loss": 1.0179, "nll_loss": 0.8680906891822815, "rewards/accuracies": 0.75, "rewards/chosen": -0.06998514384031296, "rewards/margins": 0.1380384862422943, "rewards/rejected": -0.20802363753318787, "step": 7422 }, { "epoch": 4.52829037669666, "grad_norm": 1.564306616783142, "learning_rate": 7.563992651561543e-07, "log_odds_chosen": 0.9715710878372192, "log_odds_ratio": -0.4909597933292389, "logits/chosen": -0.7874385118484497, "logits/rejected": -0.7605310082435608, "logps/chosen": -0.7853527069091797, "logps/rejected": -1.4545483589172363, "loss": 1.0479, "nll_loss": 0.8723354935646057, "rewards/accuracies": 0.75, "rewards/chosen": -0.0785352811217308, "rewards/margins": 0.06691956520080566, "rewards/rejected": -0.14545483887195587, "step": 7423 }, { "epoch": 4.528900411773677, "grad_norm": 1.2850677967071533, "learning_rate": 7.554194733619106e-07, "log_odds_chosen": 1.4898908138275146, "log_odds_ratio": -0.7742390632629395, "logits/chosen": -0.9464750289916992, "logits/rejected": -0.9918196201324463, "logps/chosen": -1.0867335796356201, "logps/rejected": -2.1443161964416504, "loss": 1.1111, "nll_loss": 0.9481870532035828, "rewards/accuracies": 0.75, "rewards/chosen": -0.10867336392402649, "rewards/margins": 0.10575826466083527, "rewards/rejected": -0.21443161368370056, "step": 7424 }, { "epoch": 4.5295104468506935, "grad_norm": 1.7596335411071777, "learning_rate": 7.544396815676668e-07, "log_odds_chosen": 1.1008590459823608, "log_odds_ratio": -0.5634890198707581, "logits/chosen": -1.0174343585968018, "logits/rejected": -1.12874174118042, "logps/chosen": -1.0156126022338867, "logps/rejected": -1.9879419803619385, "loss": 1.1144, "nll_loss": 1.0009878873825073, "rewards/accuracies": 0.875, "rewards/chosen": -0.10156125575304031, "rewards/margins": 0.09723293781280518, "rewards/rejected": -0.1987941861152649, "step": 7425 }, { "epoch": 4.530120481927711, "grad_norm": 1.6285258531570435, "learning_rate": 7.534598897734231e-07, "log_odds_chosen": 0.5724959373474121, "log_odds_ratio": -0.509735643863678, "logits/chosen": -0.9356374144554138, "logits/rejected": -0.9149361252784729, "logps/chosen": -0.9885873794555664, "logps/rejected": -1.419419527053833, "loss": 1.0181, "nll_loss": 1.1367653608322144, "rewards/accuracies": 0.75, "rewards/chosen": -0.09885872900485992, "rewards/margins": 0.043083224445581436, "rewards/rejected": -0.14194196462631226, "step": 7426 }, { "epoch": 4.530730517004728, "grad_norm": 2.364410877227783, "learning_rate": 7.524800979791794e-07, "log_odds_chosen": 1.1814563274383545, "log_odds_ratio": -0.40286117792129517, "logits/chosen": -1.0105228424072266, "logits/rejected": -0.9539918899536133, "logps/chosen": -0.7457178235054016, "logps/rejected": -1.5973622798919678, "loss": 1.0283, "nll_loss": 1.0031960010528564, "rewards/accuracies": 0.875, "rewards/chosen": -0.07457178086042404, "rewards/margins": 0.08516445010900497, "rewards/rejected": -0.15973623096942902, "step": 7427 }, { "epoch": 4.531340552081745, "grad_norm": 2.5677599906921387, "learning_rate": 7.515003061849356e-07, "log_odds_chosen": 1.5004076957702637, "log_odds_ratio": -0.42221859097480774, "logits/chosen": -0.9076154828071594, "logits/rejected": -0.9479495286941528, "logps/chosen": -0.9819268584251404, "logps/rejected": -2.218660831451416, "loss": 0.9844, "nll_loss": 1.1530499458312988, "rewards/accuracies": 0.75, "rewards/chosen": -0.09819268435239792, "rewards/margins": 0.12367339432239532, "rewards/rejected": -0.22186608612537384, "step": 7428 }, { "epoch": 4.531950587158762, "grad_norm": 1.4904710054397583, "learning_rate": 7.50520514390692e-07, "log_odds_chosen": 1.0751469135284424, "log_odds_ratio": -0.6077075004577637, "logits/chosen": -1.0180857181549072, "logits/rejected": -1.0789659023284912, "logps/chosen": -0.9878267049789429, "logps/rejected": -1.9471776485443115, "loss": 1.118, "nll_loss": 1.1146835088729858, "rewards/accuracies": 0.5, "rewards/chosen": -0.09878267347812653, "rewards/margins": 0.09593510627746582, "rewards/rejected": -0.19471776485443115, "step": 7429 }, { "epoch": 4.532560622235779, "grad_norm": 2.026113271713257, "learning_rate": 7.495407225964482e-07, "log_odds_chosen": 3.605057716369629, "log_odds_ratio": -0.15041711926460266, "logits/chosen": -0.9041590690612793, "logits/rejected": -1.0646222829818726, "logps/chosen": -0.4968377351760864, "logps/rejected": -3.1213443279266357, "loss": 1.0919, "nll_loss": 0.7701912522315979, "rewards/accuracies": 1.0, "rewards/chosen": -0.04968377202749252, "rewards/margins": 0.262450635433197, "rewards/rejected": -0.31213444471359253, "step": 7430 }, { "epoch": 4.5331706573127954, "grad_norm": 1.6041597127914429, "learning_rate": 7.485609308022045e-07, "log_odds_chosen": 2.5403032302856445, "log_odds_ratio": -0.2913089990615845, "logits/chosen": -0.9871108531951904, "logits/rejected": -1.0950560569763184, "logps/chosen": -0.6206467151641846, "logps/rejected": -2.462665319442749, "loss": 0.9801, "nll_loss": 0.9902932047843933, "rewards/accuracies": 0.875, "rewards/chosen": -0.062064677476882935, "rewards/margins": 0.18420186638832092, "rewards/rejected": -0.24626654386520386, "step": 7431 }, { "epoch": 4.533780692389812, "grad_norm": 1.7619357109069824, "learning_rate": 7.475811390079609e-07, "log_odds_chosen": 0.7535830140113831, "log_odds_ratio": -0.532573401927948, "logits/chosen": -0.9400090575218201, "logits/rejected": -0.9986703395843506, "logps/chosen": -0.8472135066986084, "logps/rejected": -1.3181768655776978, "loss": 1.1944, "nll_loss": 1.1445218324661255, "rewards/accuracies": 0.75, "rewards/chosen": -0.08472135663032532, "rewards/margins": 0.047096338123083115, "rewards/rejected": -0.13181769847869873, "step": 7432 }, { "epoch": 4.534390727466829, "grad_norm": 1.4440276622772217, "learning_rate": 7.46601347213717e-07, "log_odds_chosen": 2.5111613273620605, "log_odds_ratio": -0.2619911730289459, "logits/chosen": -0.7947361469268799, "logits/rejected": -0.912236213684082, "logps/chosen": -0.6862826347351074, "logps/rejected": -2.6933860778808594, "loss": 0.8639, "nll_loss": 0.8767186403274536, "rewards/accuracies": 1.0, "rewards/chosen": -0.06862826645374298, "rewards/margins": 0.20071034133434296, "rewards/rejected": -0.26933860778808594, "step": 7433 }, { "epoch": 4.535000762543846, "grad_norm": 1.3755037784576416, "learning_rate": 7.456215554194733e-07, "log_odds_chosen": 0.7828192710876465, "log_odds_ratio": -0.5205593109130859, "logits/chosen": -1.1605675220489502, "logits/rejected": -1.0747652053833008, "logps/chosen": -0.8894686698913574, "logps/rejected": -1.5188727378845215, "loss": 1.1766, "nll_loss": 1.2217705249786377, "rewards/accuracies": 0.75, "rewards/chosen": -0.0889468714594841, "rewards/margins": 0.06294040381908417, "rewards/rejected": -0.15188726782798767, "step": 7434 }, { "epoch": 4.535610797620863, "grad_norm": 1.7299604415893555, "learning_rate": 7.446417636252296e-07, "log_odds_chosen": 1.2066650390625, "log_odds_ratio": -0.6115192770957947, "logits/chosen": -0.7890319228172302, "logits/rejected": -0.9006032943725586, "logps/chosen": -0.8166154026985168, "logps/rejected": -1.7239630222320557, "loss": 0.9503, "nll_loss": 0.9991039037704468, "rewards/accuracies": 0.75, "rewards/chosen": -0.08166153728961945, "rewards/margins": 0.09073477238416672, "rewards/rejected": -0.17239630222320557, "step": 7435 }, { "epoch": 4.536220832697881, "grad_norm": 1.2864545583724976, "learning_rate": 7.436619718309859e-07, "log_odds_chosen": 2.75262713432312, "log_odds_ratio": -0.1972641944885254, "logits/chosen": -0.7675228118896484, "logits/rejected": -0.9433143138885498, "logps/chosen": -0.535214900970459, "logps/rejected": -2.553969383239746, "loss": 0.7791, "nll_loss": 0.7082846760749817, "rewards/accuracies": 1.0, "rewards/chosen": -0.05352148786187172, "rewards/margins": 0.2018754482269287, "rewards/rejected": -0.2553969621658325, "step": 7436 }, { "epoch": 4.536830867774897, "grad_norm": 1.203291416168213, "learning_rate": 7.426821800367422e-07, "log_odds_chosen": 1.915697693824768, "log_odds_ratio": -0.536873996257782, "logits/chosen": -0.82127445936203, "logits/rejected": -0.9345504641532898, "logps/chosen": -0.8386655449867249, "logps/rejected": -2.409822940826416, "loss": 0.9715, "nll_loss": 0.8816729784011841, "rewards/accuracies": 0.5, "rewards/chosen": -0.08386655896902084, "rewards/margins": 0.15711572766304016, "rewards/rejected": -0.2409822642803192, "step": 7437 }, { "epoch": 4.537440902851914, "grad_norm": 1.8490161895751953, "learning_rate": 7.417023882424984e-07, "log_odds_chosen": 3.620342493057251, "log_odds_ratio": -0.19664721190929413, "logits/chosen": -0.9095029830932617, "logits/rejected": -1.1213784217834473, "logps/chosen": -0.8285889029502869, "logps/rejected": -3.885155439376831, "loss": 1.0868, "nll_loss": 1.0022883415222168, "rewards/accuracies": 1.0, "rewards/chosen": -0.08285889029502869, "rewards/margins": 0.30565667152404785, "rewards/rejected": -0.38851556181907654, "step": 7438 }, { "epoch": 4.538050937928931, "grad_norm": 1.213416337966919, "learning_rate": 7.407225964482548e-07, "log_odds_chosen": 1.7500603199005127, "log_odds_ratio": -0.32593032717704773, "logits/chosen": -0.8634772896766663, "logits/rejected": -0.9340729713439941, "logps/chosen": -0.5941707491874695, "logps/rejected": -1.796913743019104, "loss": 0.7668, "nll_loss": 0.7002514600753784, "rewards/accuracies": 0.875, "rewards/chosen": -0.05941707640886307, "rewards/margins": 0.12027430534362793, "rewards/rejected": -0.1796913743019104, "step": 7439 }, { "epoch": 4.538660973005948, "grad_norm": 2.3513996601104736, "learning_rate": 7.397428046540109e-07, "log_odds_chosen": 0.7180919051170349, "log_odds_ratio": -0.6076281070709229, "logits/chosen": -1.009692907333374, "logits/rejected": -0.9994817972183228, "logps/chosen": -0.9285950660705566, "logps/rejected": -1.4983553886413574, "loss": 0.9916, "nll_loss": 0.9785952568054199, "rewards/accuracies": 0.5, "rewards/chosen": -0.09285950660705566, "rewards/margins": 0.056976042687892914, "rewards/rejected": -0.14983554184436798, "step": 7440 }, { "epoch": 4.539271008082965, "grad_norm": 1.602040410041809, "learning_rate": 7.387630128597672e-07, "log_odds_chosen": 3.165283203125, "log_odds_ratio": -0.4116799533367157, "logits/chosen": -0.7836904525756836, "logits/rejected": -1.0011570453643799, "logps/chosen": -0.7829306721687317, "logps/rejected": -3.284762382507324, "loss": 1.013, "nll_loss": 0.9006927013397217, "rewards/accuracies": 0.75, "rewards/chosen": -0.07829307019710541, "rewards/margins": 0.2501831650733948, "rewards/rejected": -0.32847627997398376, "step": 7441 }, { "epoch": 4.539881043159982, "grad_norm": 2.4043257236480713, "learning_rate": 7.377832210655236e-07, "log_odds_chosen": 1.8186614513397217, "log_odds_ratio": -0.4315617084503174, "logits/chosen": -1.1485017538070679, "logits/rejected": -1.0921872854232788, "logps/chosen": -0.7585113048553467, "logps/rejected": -2.193126678466797, "loss": 1.1246, "nll_loss": 1.1442326307296753, "rewards/accuracies": 0.75, "rewards/chosen": -0.07585112750530243, "rewards/margins": 0.14346154034137726, "rewards/rejected": -0.2193126678466797, "step": 7442 }, { "epoch": 4.5404910782369985, "grad_norm": 1.3906619548797607, "learning_rate": 7.368034292712798e-07, "log_odds_chosen": 2.177471399307251, "log_odds_ratio": -0.5447030663490295, "logits/chosen": -0.8645404577255249, "logits/rejected": -1.0700559616088867, "logps/chosen": -0.8072683811187744, "logps/rejected": -2.6035356521606445, "loss": 0.9563, "nll_loss": 0.9956433773040771, "rewards/accuracies": 0.625, "rewards/chosen": -0.08072684705257416, "rewards/margins": 0.1796267330646515, "rewards/rejected": -0.26035356521606445, "step": 7443 }, { "epoch": 4.541101113314015, "grad_norm": 2.2547972202301025, "learning_rate": 7.358236374770361e-07, "log_odds_chosen": 5.921505451202393, "log_odds_ratio": -0.0843086764216423, "logits/chosen": -1.0014586448669434, "logits/rejected": -1.21579110622406, "logps/chosen": -0.7156501412391663, "logps/rejected": -5.892303466796875, "loss": 1.1312, "nll_loss": 1.0590100288391113, "rewards/accuracies": 1.0, "rewards/chosen": -0.07156501710414886, "rewards/margins": 0.5176653265953064, "rewards/rejected": -0.5892304182052612, "step": 7444 }, { "epoch": 4.541711148391032, "grad_norm": 8.7584228515625, "learning_rate": 7.348438456827924e-07, "log_odds_chosen": 2.184088945388794, "log_odds_ratio": -0.2273329794406891, "logits/chosen": -0.920678436756134, "logits/rejected": -0.8516406416893005, "logps/chosen": -0.7614205479621887, "logps/rejected": -2.3570761680603027, "loss": 1.0133, "nll_loss": 1.0681084394454956, "rewards/accuracies": 1.0, "rewards/chosen": -0.07614205032587051, "rewards/margins": 0.15956558287143707, "rewards/rejected": -0.23570765554904938, "step": 7445 }, { "epoch": 4.542321183468049, "grad_norm": 9.439900398254395, "learning_rate": 7.338640538885487e-07, "log_odds_chosen": 0.36417675018310547, "log_odds_ratio": -0.6742540597915649, "logits/chosen": -0.726315438747406, "logits/rejected": -0.8056660890579224, "logps/chosen": -0.7695753574371338, "logps/rejected": -1.051804780960083, "loss": 1.0515, "nll_loss": 0.9168033599853516, "rewards/accuracies": 0.5, "rewards/chosen": -0.07695753872394562, "rewards/margins": 0.028222955763339996, "rewards/rejected": -0.10518048703670502, "step": 7446 }, { "epoch": 4.542931218545066, "grad_norm": 9.117708206176758, "learning_rate": 7.328842620943048e-07, "log_odds_chosen": 2.2574329376220703, "log_odds_ratio": -0.443096786737442, "logits/chosen": -0.8418450355529785, "logits/rejected": -0.9067696332931519, "logps/chosen": -0.8298934102058411, "logps/rejected": -2.7016286849975586, "loss": 1.1278, "nll_loss": 0.9876000881195068, "rewards/accuracies": 0.5, "rewards/chosen": -0.08298934251070023, "rewards/margins": 0.18717354536056519, "rewards/rejected": -0.2701629102230072, "step": 7447 }, { "epoch": 4.543541253622084, "grad_norm": 1.5907304286956787, "learning_rate": 7.319044703000612e-07, "log_odds_chosen": 1.6859866380691528, "log_odds_ratio": -0.5534905195236206, "logits/chosen": -1.0913724899291992, "logits/rejected": -1.0937371253967285, "logps/chosen": -0.9299533367156982, "logps/rejected": -2.2922089099884033, "loss": 1.1636, "nll_loss": 1.1058493852615356, "rewards/accuracies": 0.75, "rewards/chosen": -0.09299533069133759, "rewards/margins": 0.13622555136680603, "rewards/rejected": -0.22922088205814362, "step": 7448 }, { "epoch": 4.5441512886991005, "grad_norm": 1.7693612575531006, "learning_rate": 7.309246785058175e-07, "log_odds_chosen": 1.427487850189209, "log_odds_ratio": -0.37473082542419434, "logits/chosen": -1.034518837928772, "logits/rejected": -1.0956875085830688, "logps/chosen": -0.9416265487670898, "logps/rejected": -2.00553560256958, "loss": 0.9996, "nll_loss": 1.4032591581344604, "rewards/accuracies": 0.75, "rewards/chosen": -0.09416265785694122, "rewards/margins": 0.10639091581106186, "rewards/rejected": -0.20055356621742249, "step": 7449 }, { "epoch": 4.544761323776117, "grad_norm": 2.7376327514648438, "learning_rate": 7.299448867115737e-07, "log_odds_chosen": 3.657550096511841, "log_odds_ratio": -0.20698025822639465, "logits/chosen": -0.9493139982223511, "logits/rejected": -1.1787153482437134, "logps/chosen": -0.5072136521339417, "logps/rejected": -3.370344638824463, "loss": 0.9535, "nll_loss": 0.8252020478248596, "rewards/accuracies": 1.0, "rewards/chosen": -0.050721365958452225, "rewards/margins": 0.28631308674812317, "rewards/rejected": -0.3370344638824463, "step": 7450 }, { "epoch": 4.545371358853134, "grad_norm": 1.2793128490447998, "learning_rate": 7.2896509491733e-07, "log_odds_chosen": 0.8727118968963623, "log_odds_ratio": -0.4436221420764923, "logits/chosen": -0.9701288938522339, "logits/rejected": -0.99338698387146, "logps/chosen": -0.8223787546157837, "logps/rejected": -1.4490010738372803, "loss": 1.0246, "nll_loss": 0.9317609071731567, "rewards/accuracies": 0.625, "rewards/chosen": -0.08223788440227509, "rewards/margins": 0.06266222894191742, "rewards/rejected": -0.1449000984430313, "step": 7451 }, { "epoch": 4.545981393930151, "grad_norm": 6.15728759765625, "learning_rate": 7.279853031230863e-07, "log_odds_chosen": 0.8045462369918823, "log_odds_ratio": -0.4867096543312073, "logits/chosen": -0.8560223579406738, "logits/rejected": -0.9318953156471252, "logps/chosen": -0.8293617367744446, "logps/rejected": -1.3804025650024414, "loss": 1.0865, "nll_loss": 1.044785737991333, "rewards/accuracies": 0.625, "rewards/chosen": -0.08293617516756058, "rewards/margins": 0.055104080587625504, "rewards/rejected": -0.13804025948047638, "step": 7452 }, { "epoch": 4.546591429007168, "grad_norm": 10.095927238464355, "learning_rate": 7.270055113288426e-07, "log_odds_chosen": 2.6324479579925537, "log_odds_ratio": -0.5422088503837585, "logits/chosen": -0.669712483882904, "logits/rejected": -0.8097912073135376, "logps/chosen": -0.7522779703140259, "logps/rejected": -2.9895870685577393, "loss": 1.0509, "nll_loss": 0.8763682246208191, "rewards/accuracies": 0.625, "rewards/chosen": -0.07522779703140259, "rewards/margins": 0.2237308919429779, "rewards/rejected": -0.2989587187767029, "step": 7453 }, { "epoch": 4.547201464084185, "grad_norm": 1.4043564796447754, "learning_rate": 7.26025719534599e-07, "log_odds_chosen": 2.7119903564453125, "log_odds_ratio": -0.2948009967803955, "logits/chosen": -1.0467054843902588, "logits/rejected": -1.0331785678863525, "logps/chosen": -0.6430458426475525, "logps/rejected": -2.690641403198242, "loss": 0.9504, "nll_loss": 0.8827744722366333, "rewards/accuracies": 0.875, "rewards/chosen": -0.06430459022521973, "rewards/margins": 0.20475956797599792, "rewards/rejected": -0.26906412839889526, "step": 7454 }, { "epoch": 4.5478114991612015, "grad_norm": 13.225310325622559, "learning_rate": 7.250459277403551e-07, "log_odds_chosen": 2.647395610809326, "log_odds_ratio": -0.35198450088500977, "logits/chosen": -1.0362197160720825, "logits/rejected": -1.1149948835372925, "logps/chosen": -0.8554937839508057, "logps/rejected": -3.0840184688568115, "loss": 1.2315, "nll_loss": 1.0599066019058228, "rewards/accuracies": 0.875, "rewards/chosen": -0.08554938435554504, "rewards/margins": 0.22285248339176178, "rewards/rejected": -0.308401882648468, "step": 7455 }, { "epoch": 4.548421534238218, "grad_norm": 1.7934142351150513, "learning_rate": 7.240661359461114e-07, "log_odds_chosen": 1.5296543836593628, "log_odds_ratio": -0.3831244111061096, "logits/chosen": -0.9795442819595337, "logits/rejected": -1.0577504634857178, "logps/chosen": -0.6937490701675415, "logps/rejected": -1.6374976634979248, "loss": 1.0845, "nll_loss": 1.0969195365905762, "rewards/accuracies": 0.875, "rewards/chosen": -0.06937491148710251, "rewards/margins": 0.094374880194664, "rewards/rejected": -0.1637497842311859, "step": 7456 }, { "epoch": 4.549031569315236, "grad_norm": 1.2370775938034058, "learning_rate": 7.230863441518676e-07, "log_odds_chosen": 3.294795036315918, "log_odds_ratio": -0.3322471082210541, "logits/chosen": -0.8385651111602783, "logits/rejected": -1.0380696058273315, "logps/chosen": -0.7362441420555115, "logps/rejected": -3.38533353805542, "loss": 0.888, "nll_loss": 0.8630187511444092, "rewards/accuracies": 0.75, "rewards/chosen": -0.07362441718578339, "rewards/margins": 0.26490893959999084, "rewards/rejected": -0.33853334188461304, "step": 7457 }, { "epoch": 4.549641604392253, "grad_norm": 2.466142177581787, "learning_rate": 7.22106552357624e-07, "log_odds_chosen": 0.9570800065994263, "log_odds_ratio": -0.5707300901412964, "logits/chosen": -0.8690029978752136, "logits/rejected": -1.0923131704330444, "logps/chosen": -1.0053138732910156, "logps/rejected": -1.8086553812026978, "loss": 1.0751, "nll_loss": 1.0198359489440918, "rewards/accuracies": 0.5, "rewards/chosen": -0.10053139179944992, "rewards/margins": 0.0803341493010521, "rewards/rejected": -0.1808655560016632, "step": 7458 }, { "epoch": 4.55025163946927, "grad_norm": 1.768662691116333, "learning_rate": 7.211267605633803e-07, "log_odds_chosen": 3.474550247192383, "log_odds_ratio": -0.42122992873191833, "logits/chosen": -1.0363649129867554, "logits/rejected": -1.122962474822998, "logps/chosen": -1.0267473459243774, "logps/rejected": -4.174592971801758, "loss": 1.0989, "nll_loss": 1.1288493871688843, "rewards/accuracies": 0.625, "rewards/chosen": -0.10267473757266998, "rewards/margins": 0.31478455662727356, "rewards/rejected": -0.41745930910110474, "step": 7459 }, { "epoch": 4.550861674546287, "grad_norm": 1.2397559881210327, "learning_rate": 7.201469687691365e-07, "log_odds_chosen": 0.4856190085411072, "log_odds_ratio": -0.7630686163902283, "logits/chosen": -0.9016287922859192, "logits/rejected": -0.9755842685699463, "logps/chosen": -0.8444511890411377, "logps/rejected": -1.2945466041564941, "loss": 0.9989, "nll_loss": 0.9974023699760437, "rewards/accuracies": 0.625, "rewards/chosen": -0.08444511145353317, "rewards/margins": 0.0450095534324646, "rewards/rejected": -0.12945467233657837, "step": 7460 }, { "epoch": 4.5514717096233035, "grad_norm": 1.3112436532974243, "learning_rate": 7.191671769748929e-07, "log_odds_chosen": 0.8227748870849609, "log_odds_ratio": -0.4496418237686157, "logits/chosen": -0.9824313521385193, "logits/rejected": -0.8899576663970947, "logps/chosen": -0.9638177156448364, "logps/rejected": -1.5676751136779785, "loss": 1.1038, "nll_loss": 1.3367626667022705, "rewards/accuracies": 0.875, "rewards/chosen": -0.0963817685842514, "rewards/margins": 0.06038574501872063, "rewards/rejected": -0.15676751732826233, "step": 7461 }, { "epoch": 4.55208174470032, "grad_norm": 1.8527836799621582, "learning_rate": 7.18187385180649e-07, "log_odds_chosen": 1.8141992092132568, "log_odds_ratio": -0.3875010013580322, "logits/chosen": -1.0017616748809814, "logits/rejected": -1.1232240200042725, "logps/chosen": -0.9356620907783508, "logps/rejected": -2.418212890625, "loss": 1.0508, "nll_loss": 1.1180716753005981, "rewards/accuracies": 0.625, "rewards/chosen": -0.09356621652841568, "rewards/margins": 0.14825506508350372, "rewards/rejected": -0.2418212890625, "step": 7462 }, { "epoch": 4.552691779777337, "grad_norm": 7.275035858154297, "learning_rate": 7.172075933864053e-07, "log_odds_chosen": 2.8432164192199707, "log_odds_ratio": -0.37875694036483765, "logits/chosen": -0.9428735971450806, "logits/rejected": -1.0081826448440552, "logps/chosen": -0.5868358016014099, "logps/rejected": -2.9380359649658203, "loss": 0.8596, "nll_loss": 0.8283219337463379, "rewards/accuracies": 0.75, "rewards/chosen": -0.05868358165025711, "rewards/margins": 0.23512002825737, "rewards/rejected": -0.2938036024570465, "step": 7463 }, { "epoch": 4.553301814854354, "grad_norm": 3.901994228363037, "learning_rate": 7.162278015921617e-07, "log_odds_chosen": 1.755958914756775, "log_odds_ratio": -0.48378753662109375, "logits/chosen": -0.8081464171409607, "logits/rejected": -0.894223690032959, "logps/chosen": -0.7570093870162964, "logps/rejected": -2.0909907817840576, "loss": 0.9598, "nll_loss": 1.0007240772247314, "rewards/accuracies": 0.75, "rewards/chosen": -0.07570093870162964, "rewards/margins": 0.1333981305360794, "rewards/rejected": -0.20909908413887024, "step": 7464 }, { "epoch": 4.553911849931371, "grad_norm": 3.662592887878418, "learning_rate": 7.152480097979179e-07, "log_odds_chosen": 1.6099985837936401, "log_odds_ratio": -0.303600549697876, "logits/chosen": -1.0439385175704956, "logits/rejected": -0.9703242778778076, "logps/chosen": -0.7537437081336975, "logps/rejected": -1.981676459312439, "loss": 0.8792, "nll_loss": 0.9478927850723267, "rewards/accuracies": 0.875, "rewards/chosen": -0.07537437230348587, "rewards/margins": 0.1227932870388031, "rewards/rejected": -0.19816766679286957, "step": 7465 }, { "epoch": 4.554521885008388, "grad_norm": 2.3466920852661133, "learning_rate": 7.142682180036742e-07, "log_odds_chosen": 3.0062336921691895, "log_odds_ratio": -0.3989436626434326, "logits/chosen": -0.9737694263458252, "logits/rejected": -1.0123515129089355, "logps/chosen": -1.016336441040039, "logps/rejected": -3.814237117767334, "loss": 1.1158, "nll_loss": 1.1558977365493774, "rewards/accuracies": 0.75, "rewards/chosen": -0.10163365304470062, "rewards/margins": 0.27979007363319397, "rewards/rejected": -0.3814237117767334, "step": 7466 }, { "epoch": 4.555131920085405, "grad_norm": 2.169419288635254, "learning_rate": 7.132884262094304e-07, "log_odds_chosen": 2.1093759536743164, "log_odds_ratio": -0.46463313698768616, "logits/chosen": -0.6486231684684753, "logits/rejected": -0.68895423412323, "logps/chosen": -0.9289421439170837, "logps/rejected": -2.741539478302002, "loss": 0.9756, "nll_loss": 0.9897241592407227, "rewards/accuracies": 0.625, "rewards/chosen": -0.09289421886205673, "rewards/margins": 0.18125976622104645, "rewards/rejected": -0.2741539776325226, "step": 7467 }, { "epoch": 4.555741955162421, "grad_norm": 1.7922130823135376, "learning_rate": 7.123086344151868e-07, "log_odds_chosen": 2.0455379486083984, "log_odds_ratio": -0.30687475204467773, "logits/chosen": -0.8065147399902344, "logits/rejected": -0.9434722661972046, "logps/chosen": -0.665296196937561, "logps/rejected": -2.268568754196167, "loss": 1.0666, "nll_loss": 0.7943007946014404, "rewards/accuracies": 0.875, "rewards/chosen": -0.06652961671352386, "rewards/margins": 0.1603272706270218, "rewards/rejected": -0.22685687243938446, "step": 7468 }, { "epoch": 4.556351990239439, "grad_norm": 1.730197548866272, "learning_rate": 7.113288426209431e-07, "log_odds_chosen": 1.8249249458312988, "log_odds_ratio": -0.42556723952293396, "logits/chosen": -0.7525229454040527, "logits/rejected": -0.8910850882530212, "logps/chosen": -0.6537157297134399, "logps/rejected": -2.0352957248687744, "loss": 0.9453, "nll_loss": 0.6875393390655518, "rewards/accuracies": 0.625, "rewards/chosen": -0.065371572971344, "rewards/margins": 0.13815800845623016, "rewards/rejected": -0.20352958142757416, "step": 7469 }, { "epoch": 4.556962025316456, "grad_norm": 7.779201507568359, "learning_rate": 7.103490508266992e-07, "log_odds_chosen": 1.146104335784912, "log_odds_ratio": -0.47677290439605713, "logits/chosen": -0.9727264642715454, "logits/rejected": -1.1096086502075195, "logps/chosen": -0.8064372539520264, "logps/rejected": -1.5693548917770386, "loss": 1.0729, "nll_loss": 0.8991971015930176, "rewards/accuracies": 0.75, "rewards/chosen": -0.08064372092485428, "rewards/margins": 0.0762917697429657, "rewards/rejected": -0.15693548321723938, "step": 7470 }, { "epoch": 4.557572060393473, "grad_norm": 1.3044335842132568, "learning_rate": 7.093692590324556e-07, "log_odds_chosen": 1.8702311515808105, "log_odds_ratio": -0.5192309617996216, "logits/chosen": -0.8714397549629211, "logits/rejected": -0.9943362474441528, "logps/chosen": -0.7285804748535156, "logps/rejected": -2.2077126502990723, "loss": 1.044, "nll_loss": 0.8010402321815491, "rewards/accuracies": 0.75, "rewards/chosen": -0.0728580504655838, "rewards/margins": 0.14791320264339447, "rewards/rejected": -0.22077125310897827, "step": 7471 }, { "epoch": 4.55818209547049, "grad_norm": 1.1963934898376465, "learning_rate": 7.083894672382118e-07, "log_odds_chosen": 2.475790023803711, "log_odds_ratio": -0.2319849133491516, "logits/chosen": -0.8829771876335144, "logits/rejected": -1.0154346227645874, "logps/chosen": -0.7683929204940796, "logps/rejected": -2.788717746734619, "loss": 0.9675, "nll_loss": 0.9633270502090454, "rewards/accuracies": 1.0, "rewards/chosen": -0.07683929055929184, "rewards/margins": 0.20203249156475067, "rewards/rejected": -0.2788717746734619, "step": 7472 }, { "epoch": 4.5587921305475065, "grad_norm": 9.617517471313477, "learning_rate": 7.074096754439681e-07, "log_odds_chosen": 1.6187852621078491, "log_odds_ratio": -0.37913262844085693, "logits/chosen": -0.8157883882522583, "logits/rejected": -0.8905196785926819, "logps/chosen": -0.7415837049484253, "logps/rejected": -2.0095934867858887, "loss": 0.9838, "nll_loss": 0.881198525428772, "rewards/accuracies": 0.75, "rewards/chosen": -0.07415837049484253, "rewards/margins": 0.12680098414421082, "rewards/rejected": -0.20095935463905334, "step": 7473 }, { "epoch": 4.559402165624523, "grad_norm": 2.4229040145874023, "learning_rate": 7.064298836497245e-07, "log_odds_chosen": 2.4982376098632812, "log_odds_ratio": -0.275184690952301, "logits/chosen": -0.9621609449386597, "logits/rejected": -1.105926752090454, "logps/chosen": -0.6296409368515015, "logps/rejected": -2.55193829536438, "loss": 1.0156, "nll_loss": 1.196481704711914, "rewards/accuracies": 0.875, "rewards/chosen": -0.06296409666538239, "rewards/margins": 0.1922297328710556, "rewards/rejected": -0.255193829536438, "step": 7474 }, { "epoch": 4.56001220070154, "grad_norm": 6.241633892059326, "learning_rate": 7.054500918554807e-07, "log_odds_chosen": 1.776141881942749, "log_odds_ratio": -0.37740832567214966, "logits/chosen": -0.9365782737731934, "logits/rejected": -1.0320556163787842, "logps/chosen": -0.7021512985229492, "logps/rejected": -1.9709866046905518, "loss": 1.106, "nll_loss": 0.9193358421325684, "rewards/accuracies": 0.625, "rewards/chosen": -0.0702151209115982, "rewards/margins": 0.12688353657722473, "rewards/rejected": -0.19709865748882294, "step": 7475 }, { "epoch": 4.560622235778557, "grad_norm": 2.5406765937805176, "learning_rate": 7.04470300061237e-07, "log_odds_chosen": 3.719608783721924, "log_odds_ratio": -0.5148095488548279, "logits/chosen": -0.868302583694458, "logits/rejected": -1.0065336227416992, "logps/chosen": -0.8653712868690491, "logps/rejected": -4.218863487243652, "loss": 1.2498, "nll_loss": 1.1579993963241577, "rewards/accuracies": 0.625, "rewards/chosen": -0.08653713017702103, "rewards/margins": 0.3353492319583893, "rewards/rejected": -0.4218863844871521, "step": 7476 }, { "epoch": 4.561232270855574, "grad_norm": 8.099190711975098, "learning_rate": 7.034905082669931e-07, "log_odds_chosen": 1.8916689157485962, "log_odds_ratio": -0.3274613618850708, "logits/chosen": -0.6205731630325317, "logits/rejected": -0.8439955711364746, "logps/chosen": -0.6125249266624451, "logps/rejected": -1.8410747051239014, "loss": 0.8052, "nll_loss": 0.7656755447387695, "rewards/accuracies": 0.75, "rewards/chosen": -0.061252497136592865, "rewards/margins": 0.12285497784614563, "rewards/rejected": -0.1841074824333191, "step": 7477 }, { "epoch": 4.561842305932591, "grad_norm": 1.5064159631729126, "learning_rate": 7.025107164727495e-07, "log_odds_chosen": 1.7686896324157715, "log_odds_ratio": -0.3536607027053833, "logits/chosen": -0.7514229416847229, "logits/rejected": -0.8381384611129761, "logps/chosen": -0.6274904608726501, "logps/rejected": -1.9233635663986206, "loss": 1.0155, "nll_loss": 0.7682807445526123, "rewards/accuracies": 0.875, "rewards/chosen": -0.06274904310703278, "rewards/margins": 0.129587322473526, "rewards/rejected": -0.19233636558055878, "step": 7478 }, { "epoch": 4.5624523410096085, "grad_norm": 4.560080528259277, "learning_rate": 7.015309246785058e-07, "log_odds_chosen": 1.6151723861694336, "log_odds_ratio": -0.43556541204452515, "logits/chosen": -0.8802235126495361, "logits/rejected": -0.9987928867340088, "logps/chosen": -0.7801064252853394, "logps/rejected": -1.9460341930389404, "loss": 0.8962, "nll_loss": 0.8323379755020142, "rewards/accuracies": 0.875, "rewards/chosen": -0.07801064848899841, "rewards/margins": 0.11659277230501175, "rewards/rejected": -0.19460341334342957, "step": 7479 }, { "epoch": 4.563062376086625, "grad_norm": 1.9020031690597534, "learning_rate": 7.00551132884262e-07, "log_odds_chosen": 0.5904470682144165, "log_odds_ratio": -0.5918610095977783, "logits/chosen": -0.664168119430542, "logits/rejected": -0.8073081970214844, "logps/chosen": -0.691784143447876, "logps/rejected": -1.1133536100387573, "loss": 1.022, "nll_loss": 0.8534831404685974, "rewards/accuracies": 0.625, "rewards/chosen": -0.06917841732501984, "rewards/margins": 0.042156949639320374, "rewards/rejected": -0.11133536696434021, "step": 7480 }, { "epoch": 4.563672411163642, "grad_norm": 4.947437763214111, "learning_rate": 6.995713410900184e-07, "log_odds_chosen": 1.2937569618225098, "log_odds_ratio": -0.6416598558425903, "logits/chosen": -1.0750268697738647, "logits/rejected": -1.0518780946731567, "logps/chosen": -1.1958093643188477, "logps/rejected": -2.3963425159454346, "loss": 1.1681, "nll_loss": 1.3239465951919556, "rewards/accuracies": 0.375, "rewards/chosen": -0.11958092451095581, "rewards/margins": 0.12005332857370377, "rewards/rejected": -0.23963426053524017, "step": 7481 }, { "epoch": 4.564282446240659, "grad_norm": 1.6621390581130981, "learning_rate": 6.985915492957746e-07, "log_odds_chosen": 0.9940706491470337, "log_odds_ratio": -0.47048884630203247, "logits/chosen": -0.8032299876213074, "logits/rejected": -0.8135193586349487, "logps/chosen": -0.6897153854370117, "logps/rejected": -1.4099814891815186, "loss": 1.2104, "nll_loss": 0.9344525933265686, "rewards/accuracies": 0.75, "rewards/chosen": -0.06897153705358505, "rewards/margins": 0.07202662527561188, "rewards/rejected": -0.14099815487861633, "step": 7482 }, { "epoch": 4.564892481317676, "grad_norm": 1.5077089071273804, "learning_rate": 6.976117575015309e-07, "log_odds_chosen": 3.765901803970337, "log_odds_ratio": -0.22189432382583618, "logits/chosen": -0.8969213366508484, "logits/rejected": -1.1641827821731567, "logps/chosen": -0.6351402997970581, "logps/rejected": -3.7491135597229004, "loss": 1.0317, "nll_loss": 1.0161277055740356, "rewards/accuracies": 0.875, "rewards/chosen": -0.06351403146982193, "rewards/margins": 0.3113973140716553, "rewards/rejected": -0.3749113380908966, "step": 7483 }, { "epoch": 4.565502516394693, "grad_norm": 1.8588484525680542, "learning_rate": 6.966319657072872e-07, "log_odds_chosen": 0.911615252494812, "log_odds_ratio": -0.5430728197097778, "logits/chosen": -0.9847668409347534, "logits/rejected": -0.9851011037826538, "logps/chosen": -0.8204096555709839, "logps/rejected": -1.5399627685546875, "loss": 0.9645, "nll_loss": 0.944378674030304, "rewards/accuracies": 0.625, "rewards/chosen": -0.08204097300767899, "rewards/margins": 0.07195530831813812, "rewards/rejected": -0.1539962887763977, "step": 7484 }, { "epoch": 4.56611255147171, "grad_norm": 12.565804481506348, "learning_rate": 6.956521739130434e-07, "log_odds_chosen": 2.2150275707244873, "log_odds_ratio": -0.30506816506385803, "logits/chosen": -1.052101492881775, "logits/rejected": -1.0722836256027222, "logps/chosen": -1.0043519735336304, "logps/rejected": -2.7654738426208496, "loss": 1.2268, "nll_loss": 1.4380072355270386, "rewards/accuracies": 0.75, "rewards/chosen": -0.10043519735336304, "rewards/margins": 0.17611220479011536, "rewards/rejected": -0.276547372341156, "step": 7485 }, { "epoch": 4.566722586548726, "grad_norm": 12.725624084472656, "learning_rate": 6.946723821187997e-07, "log_odds_chosen": 2.1212332248687744, "log_odds_ratio": -0.35854285955429077, "logits/chosen": -1.0242352485656738, "logits/rejected": -0.8610174655914307, "logps/chosen": -1.0486423969268799, "logps/rejected": -2.8306572437286377, "loss": 1.0423, "nll_loss": 1.1682475805282593, "rewards/accuracies": 0.75, "rewards/chosen": -0.10486423969268799, "rewards/margins": 0.17820149660110474, "rewards/rejected": -0.28306570649147034, "step": 7486 }, { "epoch": 4.567332621625743, "grad_norm": 2.1782848834991455, "learning_rate": 6.93692590324556e-07, "log_odds_chosen": 5.107483863830566, "log_odds_ratio": -0.1896333545446396, "logits/chosen": -0.6123831868171692, "logits/rejected": -1.1710128784179688, "logps/chosen": -0.5688409805297852, "logps/rejected": -4.990305423736572, "loss": 1.1649, "nll_loss": 0.7354781627655029, "rewards/accuracies": 0.875, "rewards/chosen": -0.05688409507274628, "rewards/margins": 0.4421464800834656, "rewards/rejected": -0.49903053045272827, "step": 7487 }, { "epoch": 4.56794265670276, "grad_norm": 1.303759217262268, "learning_rate": 6.927127985303123e-07, "log_odds_chosen": 2.719759464263916, "log_odds_ratio": -0.4213927090167999, "logits/chosen": -0.8162508606910706, "logits/rejected": -0.8955399990081787, "logps/chosen": -0.8177663087844849, "logps/rejected": -3.182739734649658, "loss": 0.9955, "nll_loss": 1.05033540725708, "rewards/accuracies": 0.75, "rewards/chosen": -0.08177663385868073, "rewards/margins": 0.23649731278419495, "rewards/rejected": -0.31827399134635925, "step": 7488 }, { "epoch": 4.568552691779777, "grad_norm": 5.435584545135498, "learning_rate": 6.917330067360686e-07, "log_odds_chosen": 2.185431480407715, "log_odds_ratio": -0.5645440220832825, "logits/chosen": -0.8804001808166504, "logits/rejected": -1.0744285583496094, "logps/chosen": -0.7992554903030396, "logps/rejected": -2.7070164680480957, "loss": 1.0509, "nll_loss": 1.0159379243850708, "rewards/accuracies": 0.625, "rewards/chosen": -0.0799255520105362, "rewards/margins": 0.19077609479427338, "rewards/rejected": -0.27070164680480957, "step": 7489 }, { "epoch": 4.569162726856794, "grad_norm": 7.412477493286133, "learning_rate": 6.907532149418249e-07, "log_odds_chosen": 3.458472728729248, "log_odds_ratio": -0.45579811930656433, "logits/chosen": -0.9957748651504517, "logits/rejected": -1.0633678436279297, "logps/chosen": -0.8247690200805664, "logps/rejected": -3.932297945022583, "loss": 1.0179, "nll_loss": 1.1057605743408203, "rewards/accuracies": 0.625, "rewards/chosen": -0.0824768990278244, "rewards/margins": 0.31075286865234375, "rewards/rejected": -0.39322978258132935, "step": 7490 }, { "epoch": 4.5697727619338115, "grad_norm": 1.7176389694213867, "learning_rate": 6.897734231475811e-07, "log_odds_chosen": 1.7888349294662476, "log_odds_ratio": -0.479049950838089, "logits/chosen": -0.8211265802383423, "logits/rejected": -0.7804293632507324, "logps/chosen": -0.8490652441978455, "logps/rejected": -2.1943106651306152, "loss": 1.0497, "nll_loss": 1.1974601745605469, "rewards/accuracies": 0.625, "rewards/chosen": -0.08490652590990067, "rewards/margins": 0.13452453911304474, "rewards/rejected": -0.219431072473526, "step": 7491 }, { "epoch": 4.570382797010828, "grad_norm": 9.599357604980469, "learning_rate": 6.887936313533373e-07, "log_odds_chosen": 1.5492910146713257, "log_odds_ratio": -0.4151945412158966, "logits/chosen": -0.7011821269989014, "logits/rejected": -0.9119402170181274, "logps/chosen": -0.8095662593841553, "logps/rejected": -2.0175747871398926, "loss": 1.0334, "nll_loss": 0.8571053743362427, "rewards/accuracies": 0.75, "rewards/chosen": -0.08095662295818329, "rewards/margins": 0.12080087512731552, "rewards/rejected": -0.2017574906349182, "step": 7492 }, { "epoch": 4.570992832087845, "grad_norm": 2.126004695892334, "learning_rate": 6.878138395590936e-07, "log_odds_chosen": 2.4387447834014893, "log_odds_ratio": -0.47939446568489075, "logits/chosen": -0.8887017965316772, "logits/rejected": -0.9723993539810181, "logps/chosen": -0.9308344721794128, "logps/rejected": -3.1192426681518555, "loss": 1.1512, "nll_loss": 1.0982756614685059, "rewards/accuracies": 0.75, "rewards/chosen": -0.0930834487080574, "rewards/margins": 0.2188408374786377, "rewards/rejected": -0.3119242787361145, "step": 7493 }, { "epoch": 4.571602867164862, "grad_norm": 2.204718589782715, "learning_rate": 6.8683404776485e-07, "log_odds_chosen": 3.402773857116699, "log_odds_ratio": -0.19321003556251526, "logits/chosen": -0.9621231555938721, "logits/rejected": -1.135936975479126, "logps/chosen": -0.6766361594200134, "logps/rejected": -3.377713203430176, "loss": 1.1604, "nll_loss": 0.9753851890563965, "rewards/accuracies": 0.875, "rewards/chosen": -0.06766361743211746, "rewards/margins": 0.2701077163219452, "rewards/rejected": -0.33777132630348206, "step": 7494 }, { "epoch": 4.572212902241879, "grad_norm": 1.7268818616867065, "learning_rate": 6.858542559706062e-07, "log_odds_chosen": 2.062938690185547, "log_odds_ratio": -0.45315074920654297, "logits/chosen": -0.9644662141799927, "logits/rejected": -1.0972849130630493, "logps/chosen": -0.6294398903846741, "logps/rejected": -2.061995506286621, "loss": 1.1308, "nll_loss": 1.1247375011444092, "rewards/accuracies": 0.75, "rewards/chosen": -0.06294399499893188, "rewards/margins": 0.1432555615901947, "rewards/rejected": -0.2061995565891266, "step": 7495 }, { "epoch": 4.572822937318896, "grad_norm": 10.56337833404541, "learning_rate": 6.848744641763625e-07, "log_odds_chosen": 1.8339600563049316, "log_odds_ratio": -0.39694151282310486, "logits/chosen": -0.9901387095451355, "logits/rejected": -1.0051593780517578, "logps/chosen": -0.8985006213188171, "logps/rejected": -2.301175594329834, "loss": 1.1534, "nll_loss": 1.4756942987442017, "rewards/accuracies": 0.875, "rewards/chosen": -0.08985006809234619, "rewards/margins": 0.1402675062417984, "rewards/rejected": -0.2301175594329834, "step": 7496 }, { "epoch": 4.573432972395913, "grad_norm": 1.7563825845718384, "learning_rate": 6.838946723821188e-07, "log_odds_chosen": 1.3055590391159058, "log_odds_ratio": -0.36291879415512085, "logits/chosen": -0.8533714413642883, "logits/rejected": -0.9036917686462402, "logps/chosen": -0.794012188911438, "logps/rejected": -1.830355167388916, "loss": 1.049, "nll_loss": 0.9771072864532471, "rewards/accuracies": 1.0, "rewards/chosen": -0.07940120995044708, "rewards/margins": 0.103634312748909, "rewards/rejected": -0.18303552269935608, "step": 7497 }, { "epoch": 4.5740430074729295, "grad_norm": 1.6688743829727173, "learning_rate": 6.829148805878751e-07, "log_odds_chosen": 2.380253553390503, "log_odds_ratio": -0.38231319189071655, "logits/chosen": -0.7851178050041199, "logits/rejected": -0.9820944666862488, "logps/chosen": -0.7189836502075195, "logps/rejected": -2.64858341217041, "loss": 1.0612, "nll_loss": 1.052633285522461, "rewards/accuracies": 0.625, "rewards/chosen": -0.07189837098121643, "rewards/margins": 0.1929599642753601, "rewards/rejected": -0.2648583650588989, "step": 7498 }, { "epoch": 4.574653042549946, "grad_norm": 1.4699922800064087, "learning_rate": 6.819350887936313e-07, "log_odds_chosen": 2.8361258506774902, "log_odds_ratio": -0.26362553238868713, "logits/chosen": -0.8014838099479675, "logits/rejected": -0.9770858287811279, "logps/chosen": -0.6775004863739014, "logps/rejected": -2.9414920806884766, "loss": 0.9818, "nll_loss": 0.9056407809257507, "rewards/accuracies": 1.0, "rewards/chosen": -0.06775004416704178, "rewards/margins": 0.22639916837215424, "rewards/rejected": -0.2941492199897766, "step": 7499 }, { "epoch": 4.575263077626964, "grad_norm": 4.458709716796875, "learning_rate": 6.809552969993876e-07, "log_odds_chosen": 1.4674910306930542, "log_odds_ratio": -0.5499557852745056, "logits/chosen": -0.9916938543319702, "logits/rejected": -1.0731110572814941, "logps/chosen": -0.9473665356636047, "logps/rejected": -2.0874433517456055, "loss": 1.074, "nll_loss": 1.3743404150009155, "rewards/accuracies": 0.625, "rewards/chosen": -0.09473665803670883, "rewards/margins": 0.11400766670703888, "rewards/rejected": -0.2087443321943283, "step": 7500 }, { "epoch": 4.575873112703981, "grad_norm": 1.245063066482544, "learning_rate": 6.799755052051439e-07, "log_odds_chosen": 2.0117714405059814, "log_odds_ratio": -0.3515397012233734, "logits/chosen": -1.0666195154190063, "logits/rejected": -0.964342474937439, "logps/chosen": -0.8220828771591187, "logps/rejected": -2.363823175430298, "loss": 1.0562, "nll_loss": 1.1158578395843506, "rewards/accuracies": 0.75, "rewards/chosen": -0.0822082906961441, "rewards/margins": 0.1541740447282791, "rewards/rejected": -0.23638233542442322, "step": 7501 }, { "epoch": 4.576483147780998, "grad_norm": 11.902523040771484, "learning_rate": 6.789957134109001e-07, "log_odds_chosen": 2.6608638763427734, "log_odds_ratio": -0.32994353771209717, "logits/chosen": -1.0304920673370361, "logits/rejected": -1.2142138481140137, "logps/chosen": -0.782097578048706, "logps/rejected": -3.0219507217407227, "loss": 1.0718, "nll_loss": 1.0692379474639893, "rewards/accuracies": 0.875, "rewards/chosen": -0.0782097578048706, "rewards/margins": 0.22398532927036285, "rewards/rejected": -0.30219507217407227, "step": 7502 }, { "epoch": 4.577093182858015, "grad_norm": 6.090925216674805, "learning_rate": 6.780159216166565e-07, "log_odds_chosen": 0.9744300842285156, "log_odds_ratio": -0.44336241483688354, "logits/chosen": -0.8124964833259583, "logits/rejected": -0.7822571992874146, "logps/chosen": -0.7307640314102173, "logps/rejected": -1.4410854578018188, "loss": 1.1252, "nll_loss": 0.8340098261833191, "rewards/accuracies": 0.875, "rewards/chosen": -0.07307639718055725, "rewards/margins": 0.07103215903043747, "rewards/rejected": -0.14410856366157532, "step": 7503 }, { "epoch": 4.577703217935031, "grad_norm": 1.6006170511245728, "learning_rate": 6.770361298224128e-07, "log_odds_chosen": 1.8548707962036133, "log_odds_ratio": -0.5585095882415771, "logits/chosen": -0.9326986074447632, "logits/rejected": -0.961909830570221, "logps/chosen": -0.8039631843566895, "logps/rejected": -2.1053218841552734, "loss": 1.112, "nll_loss": 1.1190249919891357, "rewards/accuracies": 0.75, "rewards/chosen": -0.08039631694555283, "rewards/margins": 0.13013587892055511, "rewards/rejected": -0.21053220331668854, "step": 7504 }, { "epoch": 4.578313253012048, "grad_norm": 1.2330173254013062, "learning_rate": 6.76056338028169e-07, "log_odds_chosen": 1.182744026184082, "log_odds_ratio": -0.6260802745819092, "logits/chosen": -0.6890794634819031, "logits/rejected": -0.7263805270195007, "logps/chosen": -0.9049879312515259, "logps/rejected": -1.9245171546936035, "loss": 1.2234, "nll_loss": 0.9680446982383728, "rewards/accuracies": 0.375, "rewards/chosen": -0.09049879014492035, "rewards/margins": 0.1019529178738594, "rewards/rejected": -0.19245170056819916, "step": 7505 }, { "epoch": 4.578923288089065, "grad_norm": 2.9569623470306396, "learning_rate": 6.750765462339252e-07, "log_odds_chosen": 2.0929436683654785, "log_odds_ratio": -0.3156310021877289, "logits/chosen": -0.9399116039276123, "logits/rejected": -1.0492973327636719, "logps/chosen": -0.7204161882400513, "logps/rejected": -2.3821699619293213, "loss": 1.1052, "nll_loss": 1.008838415145874, "rewards/accuracies": 0.75, "rewards/chosen": -0.07204161584377289, "rewards/margins": 0.16617539525032043, "rewards/rejected": -0.23821701109409332, "step": 7506 }, { "epoch": 4.579533323166082, "grad_norm": 3.469007968902588, "learning_rate": 6.740967544396815e-07, "log_odds_chosen": 4.635190486907959, "log_odds_ratio": -0.32826608419418335, "logits/chosen": -0.8727338314056396, "logits/rejected": -1.0027005672454834, "logps/chosen": -0.8002927303314209, "logps/rejected": -4.8247270584106445, "loss": 0.9748, "nll_loss": 0.9386138916015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.08002927899360657, "rewards/margins": 0.40244340896606445, "rewards/rejected": -0.48247265815734863, "step": 7507 }, { "epoch": 4.580143358243099, "grad_norm": 1.7359135150909424, "learning_rate": 6.731169626454378e-07, "log_odds_chosen": 3.322951316833496, "log_odds_ratio": -0.18651032447814941, "logits/chosen": -0.7619942426681519, "logits/rejected": -1.0562798976898193, "logps/chosen": -0.8046374917030334, "logps/rejected": -3.53000807762146, "loss": 0.9193, "nll_loss": 1.0172061920166016, "rewards/accuracies": 1.0, "rewards/chosen": -0.08046375215053558, "rewards/margins": 0.2725370526313782, "rewards/rejected": -0.35300078988075256, "step": 7508 }, { "epoch": 4.580753393320116, "grad_norm": 1.0737744569778442, "learning_rate": 6.721371708511941e-07, "log_odds_chosen": 3.482780933380127, "log_odds_ratio": -0.31120187044143677, "logits/chosen": -0.5404385328292847, "logits/rejected": -0.9744217395782471, "logps/chosen": -0.5900416374206543, "logps/rejected": -3.406248092651367, "loss": 0.8264, "nll_loss": 0.7640246748924255, "rewards/accuracies": 0.875, "rewards/chosen": -0.05900416523218155, "rewards/margins": 0.28162065148353577, "rewards/rejected": -0.3406248092651367, "step": 7509 }, { "epoch": 4.5813634283971325, "grad_norm": 3.8550491333007812, "learning_rate": 6.711573790569504e-07, "log_odds_chosen": 2.8133463859558105, "log_odds_ratio": -0.18452627956867218, "logits/chosen": -0.8997194766998291, "logits/rejected": -1.0267457962036133, "logps/chosen": -0.6395950317382812, "logps/rejected": -2.7187905311584473, "loss": 1.0169, "nll_loss": 0.8127745985984802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0639595091342926, "rewards/margins": 0.20791953802108765, "rewards/rejected": -0.27187904715538025, "step": 7510 }, { "epoch": 4.581973463474149, "grad_norm": 3.239354133605957, "learning_rate": 6.701775872627067e-07, "log_odds_chosen": 1.8058832883834839, "log_odds_ratio": -0.4717245101928711, "logits/chosen": -0.8140295743942261, "logits/rejected": -0.792472243309021, "logps/chosen": -0.6350786685943604, "logps/rejected": -1.7544827461242676, "loss": 1.134, "nll_loss": 0.788230836391449, "rewards/accuracies": 0.75, "rewards/chosen": -0.06350786983966827, "rewards/margins": 0.1119404062628746, "rewards/rejected": -0.17544826865196228, "step": 7511 }, { "epoch": 4.582583498551166, "grad_norm": 1.4649327993392944, "learning_rate": 6.691977954684629e-07, "log_odds_chosen": 3.516860246658325, "log_odds_ratio": -0.39944106340408325, "logits/chosen": -0.7668386697769165, "logits/rejected": -1.0129539966583252, "logps/chosen": -0.7563369274139404, "logps/rejected": -3.8886256217956543, "loss": 0.991, "nll_loss": 0.8686317205429077, "rewards/accuracies": 0.625, "rewards/chosen": -0.0756336972117424, "rewards/margins": 0.31322887539863586, "rewards/rejected": -0.3888625502586365, "step": 7512 }, { "epoch": 4.583193533628184, "grad_norm": 5.796029567718506, "learning_rate": 6.682180036742192e-07, "log_odds_chosen": 2.0878419876098633, "log_odds_ratio": -0.5206257104873657, "logits/chosen": -0.8558592796325684, "logits/rejected": -1.0932033061981201, "logps/chosen": -0.7816349267959595, "logps/rejected": -2.505182981491089, "loss": 1.0755, "nll_loss": 1.1630773544311523, "rewards/accuracies": 0.625, "rewards/chosen": -0.07816348969936371, "rewards/margins": 0.1723548322916031, "rewards/rejected": -0.2505183219909668, "step": 7513 }, { "epoch": 4.583803568705201, "grad_norm": 1.3910996913909912, "learning_rate": 6.672382118799754e-07, "log_odds_chosen": 2.2904274463653564, "log_odds_ratio": -0.3071298897266388, "logits/chosen": -0.7791154980659485, "logits/rejected": -0.7954397797584534, "logps/chosen": -0.6774574518203735, "logps/rejected": -2.4114065170288086, "loss": 0.8183, "nll_loss": 0.8646431565284729, "rewards/accuracies": 0.75, "rewards/chosen": -0.06774574518203735, "rewards/margins": 0.17339490354061127, "rewards/rejected": -0.24114063382148743, "step": 7514 }, { "epoch": 4.584413603782218, "grad_norm": 2.4424726963043213, "learning_rate": 6.662584200857317e-07, "log_odds_chosen": 2.78157901763916, "log_odds_ratio": -0.27661311626434326, "logits/chosen": -0.8666912913322449, "logits/rejected": -1.0697572231292725, "logps/chosen": -0.807503879070282, "logps/rejected": -3.0579757690429688, "loss": 1.0671, "nll_loss": 0.9423092603683472, "rewards/accuracies": 1.0, "rewards/chosen": -0.08075039088726044, "rewards/margins": 0.22504720091819763, "rewards/rejected": -0.3057975769042969, "step": 7515 }, { "epoch": 4.5850236388592345, "grad_norm": 3.1646952629089355, "learning_rate": 6.652786282914881e-07, "log_odds_chosen": 2.202918291091919, "log_odds_ratio": -0.5038449764251709, "logits/chosen": -0.8510276079177856, "logits/rejected": -0.986843466758728, "logps/chosen": -0.796283483505249, "logps/rejected": -2.6903293132781982, "loss": 1.1909, "nll_loss": 1.017104148864746, "rewards/accuracies": 0.625, "rewards/chosen": -0.0796283483505249, "rewards/margins": 0.18940460681915283, "rewards/rejected": -0.26903295516967773, "step": 7516 }, { "epoch": 4.585633673936251, "grad_norm": 1.2614436149597168, "learning_rate": 6.642988364972443e-07, "log_odds_chosen": 2.787768840789795, "log_odds_ratio": -0.315716415643692, "logits/chosen": -0.9046400785446167, "logits/rejected": -0.9862841367721558, "logps/chosen": -0.7795021533966064, "logps/rejected": -3.1310651302337646, "loss": 1.0567, "nll_loss": 1.0082106590270996, "rewards/accuracies": 0.875, "rewards/chosen": -0.07795020937919617, "rewards/margins": 0.23515631258487701, "rewards/rejected": -0.313106507062912, "step": 7517 }, { "epoch": 4.586243709013268, "grad_norm": 6.266607761383057, "learning_rate": 6.633190447030006e-07, "log_odds_chosen": 3.8206610679626465, "log_odds_ratio": -0.13832814991474152, "logits/chosen": -0.4137810468673706, "logits/rejected": -0.7999254465103149, "logps/chosen": -0.40461465716362, "logps/rejected": -3.327641487121582, "loss": 0.9721, "nll_loss": 0.7313022613525391, "rewards/accuracies": 1.0, "rewards/chosen": -0.0404614694416523, "rewards/margins": 0.292302668094635, "rewards/rejected": -0.3327641487121582, "step": 7518 }, { "epoch": 4.586853744090285, "grad_norm": 5.523811340332031, "learning_rate": 6.623392529087568e-07, "log_odds_chosen": 1.7656697034835815, "log_odds_ratio": -0.35965782403945923, "logits/chosen": -0.9049805402755737, "logits/rejected": -1.017307162284851, "logps/chosen": -0.9038864374160767, "logps/rejected": -2.289130210876465, "loss": 1.0745, "nll_loss": 1.1455821990966797, "rewards/accuracies": 0.875, "rewards/chosen": -0.09038864821195602, "rewards/margins": 0.1385243833065033, "rewards/rejected": -0.22891302406787872, "step": 7519 }, { "epoch": 4.587463779167302, "grad_norm": 1.7405357360839844, "learning_rate": 6.613594611145132e-07, "log_odds_chosen": 2.18605899810791, "log_odds_ratio": -0.4015902280807495, "logits/chosen": -0.7365818023681641, "logits/rejected": -0.9412755966186523, "logps/chosen": -0.649753212928772, "logps/rejected": -2.138432741165161, "loss": 0.8062, "nll_loss": 0.7948809862136841, "rewards/accuracies": 0.625, "rewards/chosen": -0.0649753212928772, "rewards/margins": 0.14886796474456787, "rewards/rejected": -0.21384328603744507, "step": 7520 }, { "epoch": 4.588073814244319, "grad_norm": 2.0396251678466797, "learning_rate": 6.603796693202694e-07, "log_odds_chosen": 2.3148863315582275, "log_odds_ratio": -0.31986838579177856, "logits/chosen": -0.9294452667236328, "logits/rejected": -1.0736005306243896, "logps/chosen": -0.8345274925231934, "logps/rejected": -2.647087574005127, "loss": 1.0581, "nll_loss": 1.021283507347107, "rewards/accuracies": 0.875, "rewards/chosen": -0.0834527462720871, "rewards/margins": 0.1812559962272644, "rewards/rejected": -0.2647087574005127, "step": 7521 }, { "epoch": 4.588683849321336, "grad_norm": 4.0198540687561035, "learning_rate": 6.593998775260256e-07, "log_odds_chosen": 1.5196558237075806, "log_odds_ratio": -0.5769565105438232, "logits/chosen": -0.9613367319107056, "logits/rejected": -0.9867342710494995, "logps/chosen": -0.9595764875411987, "logps/rejected": -2.153841495513916, "loss": 1.0861, "nll_loss": 1.0292510986328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.09595764428377151, "rewards/margins": 0.11942651122808456, "rewards/rejected": -0.21538415551185608, "step": 7522 }, { "epoch": 4.589293884398353, "grad_norm": 17.735475540161133, "learning_rate": 6.58420085731782e-07, "log_odds_chosen": 2.592782497406006, "log_odds_ratio": -0.3715421259403229, "logits/chosen": -0.9606167078018188, "logits/rejected": -1.0267274379730225, "logps/chosen": -0.7336738705635071, "logps/rejected": -2.904961109161377, "loss": 0.9364, "nll_loss": 0.9249650835990906, "rewards/accuracies": 0.75, "rewards/chosen": -0.07336738705635071, "rewards/margins": 0.21712873876094818, "rewards/rejected": -0.2904961407184601, "step": 7523 }, { "epoch": 4.58990391947537, "grad_norm": 15.144683837890625, "learning_rate": 6.574402939375382e-07, "log_odds_chosen": 1.9302793741226196, "log_odds_ratio": -0.3505992293357849, "logits/chosen": -0.8158243894577026, "logits/rejected": -1.0875976085662842, "logps/chosen": -0.8452086448669434, "logps/rejected": -2.3144052028656006, "loss": 1.1096, "nll_loss": 0.9701768159866333, "rewards/accuracies": 0.75, "rewards/chosen": -0.0845208615064621, "rewards/margins": 0.14691966772079468, "rewards/rejected": -0.23144054412841797, "step": 7524 }, { "epoch": 4.590513954552387, "grad_norm": 1.0637253522872925, "learning_rate": 6.564605021432945e-07, "log_odds_chosen": 1.2959704399108887, "log_odds_ratio": -0.3949161767959595, "logits/chosen": -0.7921656966209412, "logits/rejected": -0.9589260816574097, "logps/chosen": -0.8764902353286743, "logps/rejected": -1.784587025642395, "loss": 1.0094, "nll_loss": 0.9567071795463562, "rewards/accuracies": 0.875, "rewards/chosen": -0.08764902502298355, "rewards/margins": 0.09080968052148819, "rewards/rejected": -0.17845872044563293, "step": 7525 }, { "epoch": 4.591123989629404, "grad_norm": 2.3938937187194824, "learning_rate": 6.554807103490509e-07, "log_odds_chosen": 2.83341646194458, "log_odds_ratio": -0.35959547758102417, "logits/chosen": -1.0166058540344238, "logits/rejected": -1.2134376764297485, "logps/chosen": -0.7970513105392456, "logps/rejected": -3.1668894290924072, "loss": 1.1379, "nll_loss": 1.0366891622543335, "rewards/accuracies": 0.75, "rewards/chosen": -0.0797051340341568, "rewards/margins": 0.23698385059833527, "rewards/rejected": -0.3166889548301697, "step": 7526 }, { "epoch": 4.591734024706421, "grad_norm": 1.6920266151428223, "learning_rate": 6.545009185548071e-07, "log_odds_chosen": 3.7230448722839355, "log_odds_ratio": -0.12882541120052338, "logits/chosen": -0.7113011479377747, "logits/rejected": -1.0334187746047974, "logps/chosen": -0.42836493253707886, "logps/rejected": -2.867175340652466, "loss": 0.9567, "nll_loss": 0.8413940072059631, "rewards/accuracies": 0.875, "rewards/chosen": -0.04283649101853371, "rewards/margins": 0.24388104677200317, "rewards/rejected": -0.2867175340652466, "step": 7527 }, { "epoch": 4.5923440597834375, "grad_norm": 1.9978874921798706, "learning_rate": 6.535211267605633e-07, "log_odds_chosen": 2.3963890075683594, "log_odds_ratio": -0.4274684190750122, "logits/chosen": -1.106438159942627, "logits/rejected": -1.008880376815796, "logps/chosen": -0.9213776588439941, "logps/rejected": -3.1312203407287598, "loss": 1.0828, "nll_loss": 1.1087095737457275, "rewards/accuracies": 0.75, "rewards/chosen": -0.09213776886463165, "rewards/margins": 0.22098428010940552, "rewards/rejected": -0.313122034072876, "step": 7528 }, { "epoch": 4.592954094860454, "grad_norm": 3.1619081497192383, "learning_rate": 6.525413349663196e-07, "log_odds_chosen": 0.8808290958404541, "log_odds_ratio": -0.9299386739730835, "logits/chosen": -1.0302165746688843, "logits/rejected": -1.1111845970153809, "logps/chosen": -1.159085988998413, "logps/rejected": -1.9668219089508057, "loss": 1.0163, "nll_loss": 1.1137852668762207, "rewards/accuracies": 0.375, "rewards/chosen": -0.11590860038995743, "rewards/margins": 0.08077360689640045, "rewards/rejected": -0.19668219983577728, "step": 7529 }, { "epoch": 4.593564129937471, "grad_norm": 1.4834072589874268, "learning_rate": 6.515615431720759e-07, "log_odds_chosen": 3.83697509765625, "log_odds_ratio": -0.31717607378959656, "logits/chosen": -0.9431793689727783, "logits/rejected": -1.175763726234436, "logps/chosen": -0.8390995860099792, "logps/rejected": -4.137409687042236, "loss": 1.0894, "nll_loss": 0.9777739644050598, "rewards/accuracies": 0.875, "rewards/chosen": -0.08390997350215912, "rewards/margins": 0.3298310339450836, "rewards/rejected": -0.41374099254608154, "step": 7530 }, { "epoch": 4.594174165014488, "grad_norm": 1.4337730407714844, "learning_rate": 6.505817513778322e-07, "log_odds_chosen": 3.5596823692321777, "log_odds_ratio": -0.17475078999996185, "logits/chosen": -0.689992368221283, "logits/rejected": -1.0038167238235474, "logps/chosen": -0.4750817120075226, "logps/rejected": -3.258371353149414, "loss": 0.9089, "nll_loss": 0.5800154805183411, "rewards/accuracies": 1.0, "rewards/chosen": -0.04750817269086838, "rewards/margins": 0.2783289849758148, "rewards/rejected": -0.3258371651172638, "step": 7531 }, { "epoch": 4.594784200091505, "grad_norm": 4.5150227546691895, "learning_rate": 6.496019595835884e-07, "log_odds_chosen": 1.942042589187622, "log_odds_ratio": -0.4387853145599365, "logits/chosen": -0.8584487438201904, "logits/rejected": -1.01540207862854, "logps/chosen": -0.95649653673172, "logps/rejected": -2.4894747734069824, "loss": 1.0016, "nll_loss": 1.105631709098816, "rewards/accuracies": 0.625, "rewards/chosen": -0.09564965218305588, "rewards/margins": 0.15329782664775848, "rewards/rejected": -0.24894747138023376, "step": 7532 }, { "epoch": 4.595394235168522, "grad_norm": 1.0854026079177856, "learning_rate": 6.486221677893448e-07, "log_odds_chosen": 0.8153266310691833, "log_odds_ratio": -0.4287782907485962, "logits/chosen": -1.0152013301849365, "logits/rejected": -1.0692558288574219, "logps/chosen": -0.928020715713501, "logps/rejected": -1.5420403480529785, "loss": 0.9589, "nll_loss": 1.1066789627075195, "rewards/accuracies": 0.875, "rewards/chosen": -0.09280207008123398, "rewards/margins": 0.061401959508657455, "rewards/rejected": -0.15420404076576233, "step": 7533 }, { "epoch": 4.5960042702455395, "grad_norm": 3.291522741317749, "learning_rate": 6.47642375995101e-07, "log_odds_chosen": 0.8900805115699768, "log_odds_ratio": -0.5455514192581177, "logits/chosen": -1.0333877801895142, "logits/rejected": -1.0341477394104004, "logps/chosen": -0.8063218593597412, "logps/rejected": -1.442028522491455, "loss": 1.0225, "nll_loss": 0.922106146812439, "rewards/accuracies": 0.5, "rewards/chosen": -0.08063219487667084, "rewards/margins": 0.06357066333293915, "rewards/rejected": -0.14420285820960999, "step": 7534 }, { "epoch": 4.596614305322556, "grad_norm": 1.7872962951660156, "learning_rate": 6.466625842008572e-07, "log_odds_chosen": 1.4393271207809448, "log_odds_ratio": -0.44111329317092896, "logits/chosen": -0.884695291519165, "logits/rejected": -0.9771957397460938, "logps/chosen": -0.8513086438179016, "logps/rejected": -1.9990224838256836, "loss": 1.0623, "nll_loss": 1.186131477355957, "rewards/accuracies": 0.75, "rewards/chosen": -0.08513087034225464, "rewards/margins": 0.11477138847112656, "rewards/rejected": -0.1999022662639618, "step": 7535 }, { "epoch": 4.597224340399573, "grad_norm": 7.550395965576172, "learning_rate": 6.456827924066136e-07, "log_odds_chosen": 2.608248710632324, "log_odds_ratio": -0.24338915944099426, "logits/chosen": -0.8499225378036499, "logits/rejected": -0.9507273435592651, "logps/chosen": -0.6231073141098022, "logps/rejected": -2.6382951736450195, "loss": 1.1492, "nll_loss": 0.8349077105522156, "rewards/accuracies": 0.875, "rewards/chosen": -0.062310732901096344, "rewards/margins": 0.20151877403259277, "rewards/rejected": -0.2638294994831085, "step": 7536 }, { "epoch": 4.59783437547659, "grad_norm": 2.098874568939209, "learning_rate": 6.447030006123698e-07, "log_odds_chosen": -0.12403057515621185, "log_odds_ratio": -0.8052385449409485, "logits/chosen": -1.0978162288665771, "logits/rejected": -1.0945369005203247, "logps/chosen": -0.9898637533187866, "logps/rejected": -0.9012036323547363, "loss": 1.1034, "nll_loss": 1.1583079099655151, "rewards/accuracies": 0.5, "rewards/chosen": -0.09898637980222702, "rewards/margins": -0.008866003714501858, "rewards/rejected": -0.09012037515640259, "step": 7537 }, { "epoch": 4.598444410553607, "grad_norm": 1.7074040174484253, "learning_rate": 6.437232088181261e-07, "log_odds_chosen": 2.0774495601654053, "log_odds_ratio": -0.36783045530319214, "logits/chosen": -0.707720935344696, "logits/rejected": -0.9307659864425659, "logps/chosen": -0.5890851616859436, "logps/rejected": -2.1281163692474365, "loss": 0.9056, "nll_loss": 0.7168898582458496, "rewards/accuracies": 0.75, "rewards/chosen": -0.05890851840376854, "rewards/margins": 0.15390312671661377, "rewards/rejected": -0.2128116488456726, "step": 7538 }, { "epoch": 4.599054445630624, "grad_norm": 7.582636833190918, "learning_rate": 6.427434170238824e-07, "log_odds_chosen": 0.701276421546936, "log_odds_ratio": -0.7685338258743286, "logits/chosen": -0.959120512008667, "logits/rejected": -1.0774884223937988, "logps/chosen": -0.7186468839645386, "logps/rejected": -1.4739570617675781, "loss": 0.9724, "nll_loss": 0.7964380383491516, "rewards/accuracies": 0.625, "rewards/chosen": -0.07186469435691833, "rewards/margins": 0.07553102821111679, "rewards/rejected": -0.14739571511745453, "step": 7539 }, { "epoch": 4.599664480707641, "grad_norm": 4.283054828643799, "learning_rate": 6.417636252296387e-07, "log_odds_chosen": 1.4531804323196411, "log_odds_ratio": -0.43271106481552124, "logits/chosen": -0.9314223527908325, "logits/rejected": -1.0120559930801392, "logps/chosen": -0.8989039063453674, "logps/rejected": -1.8772615194320679, "loss": 1.1466, "nll_loss": 1.0175623893737793, "rewards/accuracies": 0.875, "rewards/chosen": -0.08989039808511734, "rewards/margins": 0.09783576428890228, "rewards/rejected": -0.18772615492343903, "step": 7540 }, { "epoch": 4.600274515784657, "grad_norm": 2.1654675006866455, "learning_rate": 6.40783833435395e-07, "log_odds_chosen": 2.3838353157043457, "log_odds_ratio": -0.4815451502799988, "logits/chosen": -0.8035522699356079, "logits/rejected": -0.976013720035553, "logps/chosen": -0.7797215580940247, "logps/rejected": -2.905853748321533, "loss": 1.0268, "nll_loss": 0.9813554286956787, "rewards/accuracies": 0.75, "rewards/chosen": -0.0779721587896347, "rewards/margins": 0.21261325478553772, "rewards/rejected": -0.29058539867401123, "step": 7541 }, { "epoch": 4.600884550861674, "grad_norm": 20.40239906311035, "learning_rate": 6.398040416411512e-07, "log_odds_chosen": 1.675485610961914, "log_odds_ratio": -0.3433452546596527, "logits/chosen": -0.8054458498954773, "logits/rejected": -0.920669436454773, "logps/chosen": -0.791076123714447, "logps/rejected": -2.0258893966674805, "loss": 0.9879, "nll_loss": 0.8766469359397888, "rewards/accuracies": 0.875, "rewards/chosen": -0.0791076123714447, "rewards/margins": 0.12348134070634842, "rewards/rejected": -0.20258894562721252, "step": 7542 }, { "epoch": 4.601494585938691, "grad_norm": 1.6046578884124756, "learning_rate": 6.388242498469075e-07, "log_odds_chosen": 1.370421290397644, "log_odds_ratio": -0.667582631111145, "logits/chosen": -0.9639632105827332, "logits/rejected": -1.0607341527938843, "logps/chosen": -0.8213356137275696, "logps/rejected": -1.9637846946716309, "loss": 0.9585, "nll_loss": 1.0838814973831177, "rewards/accuracies": 0.5, "rewards/chosen": -0.08213356882333755, "rewards/margins": 0.11424491554498672, "rewards/rejected": -0.19637848436832428, "step": 7543 }, { "epoch": 4.602104621015709, "grad_norm": 1.609598159790039, "learning_rate": 6.378444580526637e-07, "log_odds_chosen": 1.597395658493042, "log_odds_ratio": -0.48722824454307556, "logits/chosen": -1.0540488958358765, "logits/rejected": -1.049661636352539, "logps/chosen": -0.9205645322799683, "logps/rejected": -2.296943187713623, "loss": 1.0784, "nll_loss": 1.0920978784561157, "rewards/accuracies": 0.75, "rewards/chosen": -0.09205646812915802, "rewards/margins": 0.13763785362243652, "rewards/rejected": -0.22969432175159454, "step": 7544 }, { "epoch": 4.602714656092726, "grad_norm": 1.711087703704834, "learning_rate": 6.3686466625842e-07, "log_odds_chosen": 3.1143033504486084, "log_odds_ratio": -0.19723518192768097, "logits/chosen": -0.708357036113739, "logits/rejected": -0.946155846118927, "logps/chosen": -0.488432377576828, "logps/rejected": -2.7225825786590576, "loss": 0.9311, "nll_loss": 0.7406039237976074, "rewards/accuracies": 0.875, "rewards/chosen": -0.04884323850274086, "rewards/margins": 0.22341501712799072, "rewards/rejected": -0.2722582519054413, "step": 7545 }, { "epoch": 4.6033246911697425, "grad_norm": 1.212982416152954, "learning_rate": 6.358848744641764e-07, "log_odds_chosen": 1.8530240058898926, "log_odds_ratio": -0.3336551785469055, "logits/chosen": -0.868804395198822, "logits/rejected": -1.0843231678009033, "logps/chosen": -0.9179341793060303, "logps/rejected": -2.241637945175171, "loss": 1.0152, "nll_loss": 0.9010685086250305, "rewards/accuracies": 0.875, "rewards/chosen": -0.09179341793060303, "rewards/margins": 0.13237038254737854, "rewards/rejected": -0.22416380047798157, "step": 7546 }, { "epoch": 4.603934726246759, "grad_norm": 1.301637053489685, "learning_rate": 6.349050826699326e-07, "log_odds_chosen": 0.4274265170097351, "log_odds_ratio": -0.5853027105331421, "logits/chosen": -1.0662660598754883, "logits/rejected": -0.9571048617362976, "logps/chosen": -0.9274090528488159, "logps/rejected": -1.2831170558929443, "loss": 0.9536, "nll_loss": 1.009294033050537, "rewards/accuracies": 0.5, "rewards/chosen": -0.09274090081453323, "rewards/margins": 0.03557080775499344, "rewards/rejected": -0.12831172347068787, "step": 7547 }, { "epoch": 4.604544761323776, "grad_norm": 1.364254355430603, "learning_rate": 6.339252908756889e-07, "log_odds_chosen": 1.1664390563964844, "log_odds_ratio": -0.5119354128837585, "logits/chosen": -0.9321749806404114, "logits/rejected": -0.9930623769760132, "logps/chosen": -0.8100314140319824, "logps/rejected": -1.7219548225402832, "loss": 0.998, "nll_loss": 0.9245409965515137, "rewards/accuracies": 0.75, "rewards/chosen": -0.08100314438343048, "rewards/margins": 0.0911923348903656, "rewards/rejected": -0.1721954643726349, "step": 7548 }, { "epoch": 4.605154796400793, "grad_norm": 8.615184783935547, "learning_rate": 6.329454990814452e-07, "log_odds_chosen": 3.900164842605591, "log_odds_ratio": -0.12241446226835251, "logits/chosen": -0.8730255961418152, "logits/rejected": -1.087019443511963, "logps/chosen": -0.7163357734680176, "logps/rejected": -4.023006439208984, "loss": 0.9171, "nll_loss": 0.8899180293083191, "rewards/accuracies": 1.0, "rewards/chosen": -0.07163357734680176, "rewards/margins": 0.33066704869270325, "rewards/rejected": -0.4023006558418274, "step": 7549 }, { "epoch": 4.60576483147781, "grad_norm": 1.6997101306915283, "learning_rate": 6.319657072872014e-07, "log_odds_chosen": 1.0466814041137695, "log_odds_ratio": -0.5053853988647461, "logits/chosen": -1.1224446296691895, "logits/rejected": -1.0992029905319214, "logps/chosen": -1.0484209060668945, "logps/rejected": -1.9218621253967285, "loss": 1.0498, "nll_loss": 1.085415005683899, "rewards/accuracies": 0.625, "rewards/chosen": -0.10484208166599274, "rewards/margins": 0.08734413981437683, "rewards/rejected": -0.19218623638153076, "step": 7550 }, { "epoch": 4.606374866554827, "grad_norm": 2.0956509113311768, "learning_rate": 6.309859154929577e-07, "log_odds_chosen": 1.8123456239700317, "log_odds_ratio": -0.4376029968261719, "logits/chosen": -0.9155641198158264, "logits/rejected": -0.8108758926391602, "logps/chosen": -0.8782573938369751, "logps/rejected": -2.387343645095825, "loss": 0.9118, "nll_loss": 0.9454174041748047, "rewards/accuracies": 0.75, "rewards/chosen": -0.08782573789358139, "rewards/margins": 0.15090863406658173, "rewards/rejected": -0.23873436450958252, "step": 7551 }, { "epoch": 4.606984901631844, "grad_norm": 6.085113048553467, "learning_rate": 6.30006123698714e-07, "log_odds_chosen": 0.6645542979240417, "log_odds_ratio": -0.7288762331008911, "logits/chosen": -0.8391655683517456, "logits/rejected": -0.8633079528808594, "logps/chosen": -0.9539128541946411, "logps/rejected": -1.3813645839691162, "loss": 1.2053, "nll_loss": 1.0648102760314941, "rewards/accuracies": 0.5, "rewards/chosen": -0.09539128094911575, "rewards/margins": 0.04274517297744751, "rewards/rejected": -0.13813646137714386, "step": 7552 }, { "epoch": 4.6075949367088604, "grad_norm": 5.545888900756836, "learning_rate": 6.290263319044703e-07, "log_odds_chosen": 3.6979732513427734, "log_odds_ratio": -0.42590010166168213, "logits/chosen": -0.8609008193016052, "logits/rejected": -0.9979143738746643, "logps/chosen": -0.6554921865463257, "logps/rejected": -3.969005584716797, "loss": 1.0284, "nll_loss": 1.0011651515960693, "rewards/accuracies": 0.75, "rewards/chosen": -0.06554921716451645, "rewards/margins": 0.3313513398170471, "rewards/rejected": -0.39690059423446655, "step": 7553 }, { "epoch": 4.608204971785877, "grad_norm": 2.7548983097076416, "learning_rate": 6.280465401102265e-07, "log_odds_chosen": 2.1928234100341797, "log_odds_ratio": -0.38191330432891846, "logits/chosen": -0.849140465259552, "logits/rejected": -0.9648022651672363, "logps/chosen": -0.5759924650192261, "logps/rejected": -2.193265199661255, "loss": 0.8591, "nll_loss": 0.6987254023551941, "rewards/accuracies": 0.875, "rewards/chosen": -0.05759924650192261, "rewards/margins": 0.16172727942466736, "rewards/rejected": -0.21932652592658997, "step": 7554 }, { "epoch": 4.608815006862894, "grad_norm": 1.410603404045105, "learning_rate": 6.270667483159829e-07, "log_odds_chosen": 1.8667519092559814, "log_odds_ratio": -0.3912750482559204, "logits/chosen": -1.2363711595535278, "logits/rejected": -1.1937609910964966, "logps/chosen": -1.0169355869293213, "logps/rejected": -2.466872453689575, "loss": 0.9809, "nll_loss": 1.208634614944458, "rewards/accuracies": 0.875, "rewards/chosen": -0.10169357806444168, "rewards/margins": 0.14499369263648987, "rewards/rejected": -0.24668726325035095, "step": 7555 }, { "epoch": 4.609425041939912, "grad_norm": 1.3715896606445312, "learning_rate": 6.260869565217392e-07, "log_odds_chosen": 1.663102388381958, "log_odds_ratio": -0.3436148464679718, "logits/chosen": -1.1281139850616455, "logits/rejected": -1.1531283855438232, "logps/chosen": -0.8991187810897827, "logps/rejected": -2.29034423828125, "loss": 1.0309, "nll_loss": 1.0509254932403564, "rewards/accuracies": 0.875, "rewards/chosen": -0.08991187810897827, "rewards/margins": 0.13912254571914673, "rewards/rejected": -0.229034423828125, "step": 7556 }, { "epoch": 4.610035077016929, "grad_norm": 1.4569745063781738, "learning_rate": 6.251071647274953e-07, "log_odds_chosen": 2.9053471088409424, "log_odds_ratio": -0.34671640396118164, "logits/chosen": -1.014754056930542, "logits/rejected": -1.1939195394515991, "logps/chosen": -0.8025912046432495, "logps/rejected": -2.834789752960205, "loss": 1.1003, "nll_loss": 1.3262349367141724, "rewards/accuracies": 0.75, "rewards/chosen": -0.08025912195444107, "rewards/margins": 0.20321986079216003, "rewards/rejected": -0.2834789752960205, "step": 7557 }, { "epoch": 4.610645112093946, "grad_norm": 1.3903559446334839, "learning_rate": 6.241273729332517e-07, "log_odds_chosen": 1.7649528980255127, "log_odds_ratio": -0.4181402325630188, "logits/chosen": -0.8382638692855835, "logits/rejected": -1.0340971946716309, "logps/chosen": -0.7441666126251221, "logps/rejected": -2.04949688911438, "loss": 0.956, "nll_loss": 1.0322242975234985, "rewards/accuracies": 0.625, "rewards/chosen": -0.07441665977239609, "rewards/margins": 0.13053302466869354, "rewards/rejected": -0.20494967699050903, "step": 7558 }, { "epoch": 4.611255147170962, "grad_norm": 1.7513803243637085, "learning_rate": 6.231475811390079e-07, "log_odds_chosen": 0.09516406059265137, "log_odds_ratio": -0.7001693248748779, "logits/chosen": -0.8910291194915771, "logits/rejected": -0.9207701086997986, "logps/chosen": -0.9725464582443237, "logps/rejected": -1.0307540893554688, "loss": 0.9792, "nll_loss": 1.1226357221603394, "rewards/accuracies": 0.625, "rewards/chosen": -0.09725464880466461, "rewards/margins": 0.005820751655846834, "rewards/rejected": -0.10307539999485016, "step": 7559 }, { "epoch": 4.611865182247979, "grad_norm": 1.8431581258773804, "learning_rate": 6.221677893447642e-07, "log_odds_chosen": 2.256575584411621, "log_odds_ratio": -0.3808324337005615, "logits/chosen": -0.7546246647834778, "logits/rejected": -0.8776469826698303, "logps/chosen": -0.5382292866706848, "logps/rejected": -1.9398164749145508, "loss": 0.974, "nll_loss": 0.7172890901565552, "rewards/accuracies": 0.75, "rewards/chosen": -0.05382293090224266, "rewards/margins": 0.1401587277650833, "rewards/rejected": -0.19398167729377747, "step": 7560 }, { "epoch": 4.612475217324996, "grad_norm": 1.5697975158691406, "learning_rate": 6.211879975505205e-07, "log_odds_chosen": 3.572295665740967, "log_odds_ratio": -0.4002588391304016, "logits/chosen": -0.7876074314117432, "logits/rejected": -0.9656484127044678, "logps/chosen": -0.6609617471694946, "logps/rejected": -3.3753302097320557, "loss": 0.9758, "nll_loss": 0.7445427179336548, "rewards/accuracies": 0.75, "rewards/chosen": -0.06609617173671722, "rewards/margins": 0.2714368402957916, "rewards/rejected": -0.33753302693367004, "step": 7561 }, { "epoch": 4.613085252402013, "grad_norm": 1.4029449224472046, "learning_rate": 6.202082057562768e-07, "log_odds_chosen": 3.8423914909362793, "log_odds_ratio": -0.2379111647605896, "logits/chosen": -0.6854719519615173, "logits/rejected": -0.8893323540687561, "logps/chosen": -0.8494481444358826, "logps/rejected": -4.165642261505127, "loss": 1.0561, "nll_loss": 0.9622364044189453, "rewards/accuracies": 0.875, "rewards/chosen": -0.08494481444358826, "rewards/margins": 0.33161941170692444, "rewards/rejected": -0.4165642261505127, "step": 7562 }, { "epoch": 4.61369528747903, "grad_norm": 1.5003117322921753, "learning_rate": 6.192284139620331e-07, "log_odds_chosen": 1.2173504829406738, "log_odds_ratio": -0.7323847413063049, "logits/chosen": -0.9093703627586365, "logits/rejected": -0.9499136805534363, "logps/chosen": -0.9903051257133484, "logps/rejected": -2.0043582916259766, "loss": 1.0077, "nll_loss": 1.300592064857483, "rewards/accuracies": 0.375, "rewards/chosen": -0.0990305095911026, "rewards/margins": 0.10140533000230789, "rewards/rejected": -0.2004358470439911, "step": 7563 }, { "epoch": 4.614305322556047, "grad_norm": 2.634885311126709, "learning_rate": 6.182486221677892e-07, "log_odds_chosen": 3.172788619995117, "log_odds_ratio": -0.38560789823532104, "logits/chosen": -0.8211562633514404, "logits/rejected": -0.9705589413642883, "logps/chosen": -0.7760961055755615, "logps/rejected": -3.311582088470459, "loss": 1.0108, "nll_loss": 1.095888376235962, "rewards/accuracies": 0.75, "rewards/chosen": -0.0776096060872078, "rewards/margins": 0.25354859232902527, "rewards/rejected": -0.33115822076797485, "step": 7564 }, { "epoch": 4.614915357633064, "grad_norm": 5.208682537078857, "learning_rate": 6.172688303735456e-07, "log_odds_chosen": 0.9598698616027832, "log_odds_ratio": -0.516745924949646, "logits/chosen": -1.0925748348236084, "logits/rejected": -0.995212197303772, "logps/chosen": -0.7922388911247253, "logps/rejected": -1.5563554763793945, "loss": 0.9971, "nll_loss": 0.9510892629623413, "rewards/accuracies": 0.75, "rewards/chosen": -0.07922389358282089, "rewards/margins": 0.0764116570353508, "rewards/rejected": -0.1556355357170105, "step": 7565 }, { "epoch": 4.615525392710081, "grad_norm": 15.289573669433594, "learning_rate": 6.162890385793019e-07, "log_odds_chosen": 1.9024803638458252, "log_odds_ratio": -0.5052977204322815, "logits/chosen": -0.770111083984375, "logits/rejected": -0.7581068873405457, "logps/chosen": -0.5609883069992065, "logps/rejected": -1.8467215299606323, "loss": 1.0554, "nll_loss": 1.09176766872406, "rewards/accuracies": 0.5, "rewards/chosen": -0.056098829954862595, "rewards/margins": 0.12857332825660706, "rewards/rejected": -0.18467214703559875, "step": 7566 }, { "epoch": 4.616135427787098, "grad_norm": 1.9899005889892578, "learning_rate": 6.153092467850581e-07, "log_odds_chosen": 1.165776014328003, "log_odds_ratio": -0.557674765586853, "logits/chosen": -1.0037009716033936, "logits/rejected": -1.0375165939331055, "logps/chosen": -0.8810489177703857, "logps/rejected": -1.496631145477295, "loss": 1.1734, "nll_loss": 1.1845245361328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.08810489624738693, "rewards/margins": 0.061558209359645844, "rewards/rejected": -0.14966312050819397, "step": 7567 }, { "epoch": 4.616745462864115, "grad_norm": 2.888002634048462, "learning_rate": 6.143294549908145e-07, "log_odds_chosen": 3.062154769897461, "log_odds_ratio": -0.1933957040309906, "logits/chosen": -0.8535803556442261, "logits/rejected": -1.061975121498108, "logps/chosen": -0.5520567297935486, "logps/rejected": -2.906506299972534, "loss": 1.0758, "nll_loss": 0.836854100227356, "rewards/accuracies": 1.0, "rewards/chosen": -0.05520567670464516, "rewards/margins": 0.23544497787952423, "rewards/rejected": -0.2906506657600403, "step": 7568 }, { "epoch": 4.617355497941132, "grad_norm": 1.2626651525497437, "learning_rate": 6.133496631965707e-07, "log_odds_chosen": 2.7339282035827637, "log_odds_ratio": -0.274387389421463, "logits/chosen": -1.0030012130737305, "logits/rejected": -1.1165927648544312, "logps/chosen": -0.8975028991699219, "logps/rejected": -3.1812212467193604, "loss": 1.0729, "nll_loss": 1.0048398971557617, "rewards/accuracies": 0.875, "rewards/chosen": -0.08975028246641159, "rewards/margins": 0.22837185859680176, "rewards/rejected": -0.31812214851379395, "step": 7569 }, { "epoch": 4.617965533018149, "grad_norm": 1.5970592498779297, "learning_rate": 6.12369871402327e-07, "log_odds_chosen": 2.6232247352600098, "log_odds_ratio": -0.29247862100601196, "logits/chosen": -0.7696990966796875, "logits/rejected": -0.9645082950592041, "logps/chosen": -0.5935635566711426, "logps/rejected": -2.708752155303955, "loss": 0.8537, "nll_loss": 0.7083311676979065, "rewards/accuracies": 0.875, "rewards/chosen": -0.05935635417699814, "rewards/margins": 0.21151885390281677, "rewards/rejected": -0.2708752155303955, "step": 7570 }, { "epoch": 4.6185755680951655, "grad_norm": 1.9563316106796265, "learning_rate": 6.113900796080834e-07, "log_odds_chosen": 2.1903960704803467, "log_odds_ratio": -0.3538431227207184, "logits/chosen": -1.1306291818618774, "logits/rejected": -1.2384365797042847, "logps/chosen": -0.8054713606834412, "logps/rejected": -2.5428662300109863, "loss": 1.0649, "nll_loss": 1.0682179927825928, "rewards/accuracies": 0.875, "rewards/chosen": -0.08054713159799576, "rewards/margins": 0.1737395077943802, "rewards/rejected": -0.25428664684295654, "step": 7571 }, { "epoch": 4.619185603172182, "grad_norm": 1.351365566253662, "learning_rate": 6.104102878138395e-07, "log_odds_chosen": 2.331831932067871, "log_odds_ratio": -0.39027130603790283, "logits/chosen": -0.8854318261146545, "logits/rejected": -1.014862298965454, "logps/chosen": -0.6758124232292175, "logps/rejected": -2.5547075271606445, "loss": 1.1158, "nll_loss": 1.0137929916381836, "rewards/accuracies": 0.75, "rewards/chosen": -0.06758124381303787, "rewards/margins": 0.18788953125476837, "rewards/rejected": -0.25547075271606445, "step": 7572 }, { "epoch": 4.619795638249199, "grad_norm": 1.5721391439437866, "learning_rate": 6.094304960195958e-07, "log_odds_chosen": 2.551476001739502, "log_odds_ratio": -0.39132434129714966, "logits/chosen": -0.8643523454666138, "logits/rejected": -0.9627323746681213, "logps/chosen": -0.8157283067703247, "logps/rejected": -2.921043872833252, "loss": 0.9544, "nll_loss": 1.1317030191421509, "rewards/accuracies": 0.75, "rewards/chosen": -0.08157283812761307, "rewards/margins": 0.2105315625667572, "rewards/rejected": -0.29210442304611206, "step": 7573 }, { "epoch": 4.620405673326216, "grad_norm": 7.97231388092041, "learning_rate": 6.08450704225352e-07, "log_odds_chosen": 1.4571795463562012, "log_odds_ratio": -0.3501295745372772, "logits/chosen": -0.9851256608963013, "logits/rejected": -0.9007211923599243, "logps/chosen": -0.6181973218917847, "logps/rejected": -1.5374433994293213, "loss": 0.9736, "nll_loss": 1.0287561416625977, "rewards/accuracies": 0.75, "rewards/chosen": -0.06181972846388817, "rewards/margins": 0.09192461520433426, "rewards/rejected": -0.15374433994293213, "step": 7574 }, { "epoch": 4.621015708403233, "grad_norm": 2.2491281032562256, "learning_rate": 6.074709124311084e-07, "log_odds_chosen": 1.7055249214172363, "log_odds_ratio": -0.4517458975315094, "logits/chosen": -0.9543921947479248, "logits/rejected": -1.0384730100631714, "logps/chosen": -0.6591354608535767, "logps/rejected": -1.9985498189926147, "loss": 1.0114, "nll_loss": 0.8220895528793335, "rewards/accuracies": 0.875, "rewards/chosen": -0.06591354310512543, "rewards/margins": 0.1339414417743683, "rewards/rejected": -0.1998549997806549, "step": 7575 }, { "epoch": 4.62162574348025, "grad_norm": 5.127908706665039, "learning_rate": 6.064911206368646e-07, "log_odds_chosen": 0.8455850481987, "log_odds_ratio": -0.6753364205360413, "logits/chosen": -1.1096080541610718, "logits/rejected": -1.0195114612579346, "logps/chosen": -0.9414178729057312, "logps/rejected": -1.725780963897705, "loss": 1.0728, "nll_loss": 1.1890647411346436, "rewards/accuracies": 0.375, "rewards/chosen": -0.09414178133010864, "rewards/margins": 0.07843631505966187, "rewards/rejected": -0.1725780963897705, "step": 7576 }, { "epoch": 4.622235778557267, "grad_norm": 9.471346855163574, "learning_rate": 6.055113288426209e-07, "log_odds_chosen": 1.132889747619629, "log_odds_ratio": -0.5791247487068176, "logits/chosen": -0.7795990705490112, "logits/rejected": -0.9350278973579407, "logps/chosen": -0.7029076814651489, "logps/rejected": -1.672649621963501, "loss": 1.0603, "nll_loss": 0.8624175786972046, "rewards/accuracies": 0.625, "rewards/chosen": -0.07029077410697937, "rewards/margins": 0.09697418659925461, "rewards/rejected": -0.16726496815681458, "step": 7577 }, { "epoch": 4.622845813634284, "grad_norm": 1.3685624599456787, "learning_rate": 6.045315370483773e-07, "log_odds_chosen": 0.3324497938156128, "log_odds_ratio": -0.6873411536216736, "logits/chosen": -1.1272168159484863, "logits/rejected": -1.0707371234893799, "logps/chosen": -1.0120631456375122, "logps/rejected": -1.2234008312225342, "loss": 1.0586, "nll_loss": 1.1069865226745605, "rewards/accuracies": 0.5, "rewards/chosen": -0.10120631754398346, "rewards/margins": 0.021133767440915108, "rewards/rejected": -0.12234008312225342, "step": 7578 }, { "epoch": 4.623455848711301, "grad_norm": 1.114553689956665, "learning_rate": 6.035517452541334e-07, "log_odds_chosen": 1.6757410764694214, "log_odds_ratio": -0.39498674869537354, "logits/chosen": -0.8453261256217957, "logits/rejected": -0.9567304849624634, "logps/chosen": -0.8364008069038391, "logps/rejected": -2.1748464107513428, "loss": 0.9573, "nll_loss": 0.9614537358283997, "rewards/accuracies": 0.875, "rewards/chosen": -0.08364008367061615, "rewards/margins": 0.1338445544242859, "rewards/rejected": -0.21748463809490204, "step": 7579 }, { "epoch": 4.624065883788318, "grad_norm": 1.6188371181488037, "learning_rate": 6.025719534598897e-07, "log_odds_chosen": 1.5555061101913452, "log_odds_ratio": -0.2643386125564575, "logits/chosen": -0.8540869355201721, "logits/rejected": -0.8736153841018677, "logps/chosen": -0.6503334641456604, "logps/rejected": -1.698577880859375, "loss": 0.9263, "nll_loss": 0.7832130789756775, "rewards/accuracies": 1.0, "rewards/chosen": -0.06503334641456604, "rewards/margins": 0.10482443869113922, "rewards/rejected": -0.16985780000686646, "step": 7580 }, { "epoch": 4.624675918865335, "grad_norm": 1.8603875637054443, "learning_rate": 6.01592161665646e-07, "log_odds_chosen": 2.0656611919403076, "log_odds_ratio": -0.6085109710693359, "logits/chosen": -0.9269183874130249, "logits/rejected": -1.001734733581543, "logps/chosen": -0.9739915132522583, "logps/rejected": -2.587153434753418, "loss": 1.1536, "nll_loss": 1.1482572555541992, "rewards/accuracies": 0.375, "rewards/chosen": -0.09739916026592255, "rewards/margins": 0.16131620109081268, "rewards/rejected": -0.25871536135673523, "step": 7581 }, { "epoch": 4.625285953942352, "grad_norm": 1.2541813850402832, "learning_rate": 6.006123698714023e-07, "log_odds_chosen": 1.8033045530319214, "log_odds_ratio": -0.40495458245277405, "logits/chosen": -0.5315242409706116, "logits/rejected": -0.8059142231941223, "logps/chosen": -0.6950768232345581, "logps/rejected": -1.993486762046814, "loss": 0.9245, "nll_loss": 0.7224544286727905, "rewards/accuracies": 0.75, "rewards/chosen": -0.0695076733827591, "rewards/margins": 0.12984101474285126, "rewards/rejected": -0.19934868812561035, "step": 7582 }, { "epoch": 4.6258959890193685, "grad_norm": 1.5335465669631958, "learning_rate": 5.996325780771586e-07, "log_odds_chosen": 1.9548662900924683, "log_odds_ratio": -0.41100364923477173, "logits/chosen": -0.9278284311294556, "logits/rejected": -1.0312198400497437, "logps/chosen": -0.6688821911811829, "logps/rejected": -2.1475682258605957, "loss": 0.9803, "nll_loss": 0.8289327621459961, "rewards/accuracies": 0.875, "rewards/chosen": -0.0668882206082344, "rewards/margins": 0.14786860346794128, "rewards/rejected": -0.2147568166255951, "step": 7583 }, { "epoch": 4.626506024096385, "grad_norm": 7.07863187789917, "learning_rate": 5.986527862829149e-07, "log_odds_chosen": 3.190164089202881, "log_odds_ratio": -0.2654673457145691, "logits/chosen": -0.8981404900550842, "logits/rejected": -0.9556658864021301, "logps/chosen": -0.7545448541641235, "logps/rejected": -3.2231597900390625, "loss": 1.2255, "nll_loss": 0.9133986830711365, "rewards/accuracies": 0.875, "rewards/chosen": -0.07545448839664459, "rewards/margins": 0.24686148762702942, "rewards/rejected": -0.3223159909248352, "step": 7584 }, { "epoch": 4.627116059173402, "grad_norm": 1.603345274925232, "learning_rate": 5.976729944886712e-07, "log_odds_chosen": 3.2928314208984375, "log_odds_ratio": -0.2969289720058441, "logits/chosen": -0.6857161521911621, "logits/rejected": -0.8193390369415283, "logps/chosen": -0.6924915313720703, "logps/rejected": -3.565493583679199, "loss": 0.9291, "nll_loss": 0.7687361836433411, "rewards/accuracies": 0.875, "rewards/chosen": -0.06924915313720703, "rewards/margins": 0.2873001992702484, "rewards/rejected": -0.35654932260513306, "step": 7585 }, { "epoch": 4.627726094250419, "grad_norm": 0.9718085527420044, "learning_rate": 5.966932026944273e-07, "log_odds_chosen": 2.9308218955993652, "log_odds_ratio": -0.25119754672050476, "logits/chosen": -0.8945316672325134, "logits/rejected": -1.152385950088501, "logps/chosen": -0.8742960691452026, "logps/rejected": -3.271878957748413, "loss": 0.9015, "nll_loss": 0.9418810606002808, "rewards/accuracies": 1.0, "rewards/chosen": -0.08742961287498474, "rewards/margins": 0.23975829780101776, "rewards/rejected": -0.3271878957748413, "step": 7586 }, { "epoch": 4.628336129327437, "grad_norm": 1.6071300506591797, "learning_rate": 5.957134109001836e-07, "log_odds_chosen": 1.5639550685882568, "log_odds_ratio": -0.38772696256637573, "logits/chosen": -0.2516424357891083, "logits/rejected": -0.42993974685668945, "logps/chosen": -0.775334894657135, "logps/rejected": -1.9155035018920898, "loss": 0.9344, "nll_loss": 0.7771175503730774, "rewards/accuracies": 0.75, "rewards/chosen": -0.07753349095582962, "rewards/margins": 0.11401684582233429, "rewards/rejected": -0.1915503442287445, "step": 7587 }, { "epoch": 4.628946164404454, "grad_norm": 2.1523144245147705, "learning_rate": 5.9473361910594e-07, "log_odds_chosen": 2.5942254066467285, "log_odds_ratio": -0.2963179647922516, "logits/chosen": -0.9717855453491211, "logits/rejected": -1.0917472839355469, "logps/chosen": -0.6725400686264038, "logps/rejected": -2.6214470863342285, "loss": 1.1336, "nll_loss": 1.052595853805542, "rewards/accuracies": 0.875, "rewards/chosen": -0.06725400686264038, "rewards/margins": 0.19489072263240814, "rewards/rejected": -0.26214471459388733, "step": 7588 }, { "epoch": 4.6295561994814705, "grad_norm": 2.850616693496704, "learning_rate": 5.937538273116962e-07, "log_odds_chosen": 2.557077646255493, "log_odds_ratio": -0.30525532364845276, "logits/chosen": -0.7720107436180115, "logits/rejected": -0.8875634670257568, "logps/chosen": -0.6464212536811829, "logps/rejected": -2.5077152252197266, "loss": 1.1146, "nll_loss": 0.7645910978317261, "rewards/accuracies": 0.75, "rewards/chosen": -0.06464212387800217, "rewards/margins": 0.1861294060945511, "rewards/rejected": -0.25077152252197266, "step": 7589 }, { "epoch": 4.630166234558487, "grad_norm": 2.1104953289031982, "learning_rate": 5.927740355174525e-07, "log_odds_chosen": 2.892585277557373, "log_odds_ratio": -0.264684796333313, "logits/chosen": -0.8369624614715576, "logits/rejected": -0.8935189247131348, "logps/chosen": -0.5972685813903809, "logps/rejected": -2.8722872734069824, "loss": 1.025, "nll_loss": 0.7686272859573364, "rewards/accuracies": 0.875, "rewards/chosen": -0.05972685664892197, "rewards/margins": 0.22750186920166016, "rewards/rejected": -0.2872287333011627, "step": 7590 }, { "epoch": 4.630776269635504, "grad_norm": 9.791805267333984, "learning_rate": 5.917942437232088e-07, "log_odds_chosen": 2.551675319671631, "log_odds_ratio": -0.33220961689949036, "logits/chosen": -1.0213454961776733, "logits/rejected": -0.9985190033912659, "logps/chosen": -0.8492332696914673, "logps/rejected": -3.0828664302825928, "loss": 1.1074, "nll_loss": 1.1552772521972656, "rewards/accuracies": 0.875, "rewards/chosen": -0.08492332696914673, "rewards/margins": 0.22336331009864807, "rewards/rejected": -0.3082866370677948, "step": 7591 }, { "epoch": 4.631386304712521, "grad_norm": 1.8627548217773438, "learning_rate": 5.908144519289651e-07, "log_odds_chosen": 2.6883795261383057, "log_odds_ratio": -0.2873460650444031, "logits/chosen": -0.9080314040184021, "logits/rejected": -1.0591442584991455, "logps/chosen": -0.5570184588432312, "logps/rejected": -2.49484920501709, "loss": 0.9534, "nll_loss": 1.0002930164337158, "rewards/accuracies": 0.875, "rewards/chosen": -0.055701844394207, "rewards/margins": 0.19378304481506348, "rewards/rejected": -0.24948489665985107, "step": 7592 }, { "epoch": 4.631996339789538, "grad_norm": 1.6014995574951172, "learning_rate": 5.898346601347214e-07, "log_odds_chosen": 1.680191993713379, "log_odds_ratio": -0.5586946606636047, "logits/chosen": -0.9855843186378479, "logits/rejected": -1.0712885856628418, "logps/chosen": -0.7835156321525574, "logps/rejected": -2.165327310562134, "loss": 0.9834, "nll_loss": 0.8903980255126953, "rewards/accuracies": 0.5, "rewards/chosen": -0.07835157215595245, "rewards/margins": 0.1381811797618866, "rewards/rejected": -0.21653275191783905, "step": 7593 }, { "epoch": 4.632606374866555, "grad_norm": 1.4695873260498047, "learning_rate": 5.888548683404776e-07, "log_odds_chosen": 2.725038766860962, "log_odds_ratio": -0.32234668731689453, "logits/chosen": -0.8695899248123169, "logits/rejected": -1.0792189836502075, "logps/chosen": -0.6488745212554932, "logps/rejected": -2.7644314765930176, "loss": 0.8042, "nll_loss": 0.8493408560752869, "rewards/accuracies": 0.875, "rewards/chosen": -0.06488744914531708, "rewards/margins": 0.21155567467212677, "rewards/rejected": -0.27644312381744385, "step": 7594 }, { "epoch": 4.6332164099435715, "grad_norm": 1.663335919380188, "learning_rate": 5.878750765462339e-07, "log_odds_chosen": 1.632669448852539, "log_odds_ratio": -0.4516037702560425, "logits/chosen": -0.9233495593070984, "logits/rejected": -1.014246940612793, "logps/chosen": -0.7829368114471436, "logps/rejected": -2.021414279937744, "loss": 0.9527, "nll_loss": 1.0119524002075195, "rewards/accuracies": 0.625, "rewards/chosen": -0.07829368859529495, "rewards/margins": 0.12384775280952454, "rewards/rejected": -0.20214144885540009, "step": 7595 }, { "epoch": 4.633826445020588, "grad_norm": 1.6144665479660034, "learning_rate": 5.868952847519901e-07, "log_odds_chosen": 2.2047579288482666, "log_odds_ratio": -0.2868618071079254, "logits/chosen": -0.9066954851150513, "logits/rejected": -0.8932420611381531, "logps/chosen": -0.6658127307891846, "logps/rejected": -2.2213120460510254, "loss": 0.8944, "nll_loss": 0.9535508751869202, "rewards/accuracies": 0.75, "rewards/chosen": -0.06658127903938293, "rewards/margins": 0.15554994344711304, "rewards/rejected": -0.22213120758533478, "step": 7596 }, { "epoch": 4.634436480097605, "grad_norm": 10.690573692321777, "learning_rate": 5.859154929577465e-07, "log_odds_chosen": 2.0679354667663574, "log_odds_ratio": -0.4438599944114685, "logits/chosen": -0.8274731636047363, "logits/rejected": -0.9740574359893799, "logps/chosen": -0.8795243501663208, "logps/rejected": -2.6477789878845215, "loss": 1.0137, "nll_loss": 1.0459163188934326, "rewards/accuracies": 0.75, "rewards/chosen": -0.08795243501663208, "rewards/margins": 0.17682546377182007, "rewards/rejected": -0.26477789878845215, "step": 7597 }, { "epoch": 4.635046515174622, "grad_norm": 1.4961419105529785, "learning_rate": 5.849357011635028e-07, "log_odds_chosen": 3.4399611949920654, "log_odds_ratio": -0.2928043603897095, "logits/chosen": -0.6778882145881653, "logits/rejected": -0.9918310642242432, "logps/chosen": -0.9554557800292969, "logps/rejected": -3.902860641479492, "loss": 1.0021, "nll_loss": 1.0244581699371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.09554557502269745, "rewards/margins": 0.2947404682636261, "rewards/rejected": -0.39028605818748474, "step": 7598 }, { "epoch": 4.63565655025164, "grad_norm": 1.6989878416061401, "learning_rate": 5.83955909369259e-07, "log_odds_chosen": 1.535701036453247, "log_odds_ratio": -0.6003272533416748, "logits/chosen": -0.9584534168243408, "logits/rejected": -0.9772979021072388, "logps/chosen": -0.9312899112701416, "logps/rejected": -2.0171661376953125, "loss": 1.2061, "nll_loss": 1.3561797142028809, "rewards/accuracies": 0.5, "rewards/chosen": -0.0931289941072464, "rewards/margins": 0.10858761519193649, "rewards/rejected": -0.2017166018486023, "step": 7599 }, { "epoch": 4.636266585328657, "grad_norm": 11.14141845703125, "learning_rate": 5.829761175750154e-07, "log_odds_chosen": 0.9179328680038452, "log_odds_ratio": -0.514329731464386, "logits/chosen": -0.797525942325592, "logits/rejected": -0.9358268976211548, "logps/chosen": -0.979529857635498, "logps/rejected": -1.7471275329589844, "loss": 1.099, "nll_loss": 1.1174510717391968, "rewards/accuracies": 0.5, "rewards/chosen": -0.09795298427343369, "rewards/margins": 0.07675978541374207, "rewards/rejected": -0.17471277713775635, "step": 7600 }, { "epoch": 4.6368766204056735, "grad_norm": 2.520958423614502, "learning_rate": 5.819963257807715e-07, "log_odds_chosen": 1.1469058990478516, "log_odds_ratio": -0.5694483518600464, "logits/chosen": -0.8575357794761658, "logits/rejected": -0.8991329073905945, "logps/chosen": -0.9370863437652588, "logps/rejected": -1.8759551048278809, "loss": 1.1234, "nll_loss": 1.091217279434204, "rewards/accuracies": 0.5, "rewards/chosen": -0.09370863437652588, "rewards/margins": 0.09388686716556549, "rewards/rejected": -0.18759551644325256, "step": 7601 }, { "epoch": 4.63748665548269, "grad_norm": 1.1655292510986328, "learning_rate": 5.810165339865278e-07, "log_odds_chosen": 1.53480064868927, "log_odds_ratio": -0.33341076970100403, "logits/chosen": -0.7087528705596924, "logits/rejected": -0.8594191074371338, "logps/chosen": -0.589250922203064, "logps/rejected": -1.4885510206222534, "loss": 0.8357, "nll_loss": 0.6064924001693726, "rewards/accuracies": 0.875, "rewards/chosen": -0.058925092220306396, "rewards/margins": 0.08993001282215118, "rewards/rejected": -0.14885510504245758, "step": 7602 }, { "epoch": 4.638096690559707, "grad_norm": 2.575205087661743, "learning_rate": 5.800367421922841e-07, "log_odds_chosen": 3.3268632888793945, "log_odds_ratio": -0.1788063496351242, "logits/chosen": -0.8037606477737427, "logits/rejected": -0.9498357772827148, "logps/chosen": -0.5183054208755493, "logps/rejected": -2.9940648078918457, "loss": 1.0696, "nll_loss": 0.7220418453216553, "rewards/accuracies": 0.875, "rewards/chosen": -0.05183054506778717, "rewards/margins": 0.24757595360279083, "rewards/rejected": -0.299406498670578, "step": 7603 }, { "epoch": 4.638706725636724, "grad_norm": 1.7853795289993286, "learning_rate": 5.790569503980404e-07, "log_odds_chosen": 0.5239257216453552, "log_odds_ratio": -0.5815755128860474, "logits/chosen": -0.9373873472213745, "logits/rejected": -0.9240281581878662, "logps/chosen": -0.8950561285018921, "logps/rejected": -1.2445430755615234, "loss": 1.0847, "nll_loss": 1.2847410440444946, "rewards/accuracies": 0.75, "rewards/chosen": -0.08950561285018921, "rewards/margins": 0.03494870290160179, "rewards/rejected": -0.1244543120265007, "step": 7604 }, { "epoch": 4.639316760713741, "grad_norm": 5.150551795959473, "learning_rate": 5.780771586037967e-07, "log_odds_chosen": 2.0544047355651855, "log_odds_ratio": -0.21377059817314148, "logits/chosen": -0.7363489866256714, "logits/rejected": -0.8746813535690308, "logps/chosen": -0.7604019641876221, "logps/rejected": -2.2992513179779053, "loss": 0.9086, "nll_loss": 0.9088109731674194, "rewards/accuracies": 1.0, "rewards/chosen": -0.07604019343852997, "rewards/margins": 0.15388494729995728, "rewards/rejected": -0.22992514073848724, "step": 7605 }, { "epoch": 4.639926795790758, "grad_norm": 3.230546712875366, "learning_rate": 5.770973668095529e-07, "log_odds_chosen": 2.460695266723633, "log_odds_ratio": -0.4738502502441406, "logits/chosen": -0.7730183005332947, "logits/rejected": -0.8806555271148682, "logps/chosen": -0.94496750831604, "logps/rejected": -3.2292890548706055, "loss": 1.09, "nll_loss": 1.1863794326782227, "rewards/accuracies": 0.625, "rewards/chosen": -0.09449675679206848, "rewards/margins": 0.22843213379383087, "rewards/rejected": -0.32292890548706055, "step": 7606 }, { "epoch": 4.640536830867775, "grad_norm": 1.0499988794326782, "learning_rate": 5.761175750153093e-07, "log_odds_chosen": 5.005819797515869, "log_odds_ratio": -0.04822688549757004, "logits/chosen": -0.8188526034355164, "logits/rejected": -1.088483214378357, "logps/chosen": -0.521786093711853, "logps/rejected": -4.419012546539307, "loss": 0.8564, "nll_loss": 0.846607506275177, "rewards/accuracies": 1.0, "rewards/chosen": -0.05217861384153366, "rewards/margins": 0.38972264528274536, "rewards/rejected": -0.4419012665748596, "step": 7607 }, { "epoch": 4.641146865944792, "grad_norm": 1.9730584621429443, "learning_rate": 5.751377832210655e-07, "log_odds_chosen": 1.011640191078186, "log_odds_ratio": -0.3808615803718567, "logits/chosen": -0.79948890209198, "logits/rejected": -0.9515191912651062, "logps/chosen": -0.6769082546234131, "logps/rejected": -1.2702147960662842, "loss": 1.1263, "nll_loss": 1.1361453533172607, "rewards/accuracies": 0.875, "rewards/chosen": -0.06769081950187683, "rewards/margins": 0.059330664575099945, "rewards/rejected": -0.12702149152755737, "step": 7608 }, { "epoch": 4.641756901021809, "grad_norm": 1.3957300186157227, "learning_rate": 5.741579914268217e-07, "log_odds_chosen": 2.2265548706054688, "log_odds_ratio": -0.47834300994873047, "logits/chosen": -0.9286510944366455, "logits/rejected": -1.02480149269104, "logps/chosen": -0.8345373868942261, "logps/rejected": -2.737360954284668, "loss": 0.8965, "nll_loss": 0.9094923138618469, "rewards/accuracies": 0.625, "rewards/chosen": -0.08345372974872589, "rewards/margins": 0.19028234481811523, "rewards/rejected": -0.2737360894680023, "step": 7609 }, { "epoch": 4.642366936098826, "grad_norm": 2.8684778213500977, "learning_rate": 5.731781996325781e-07, "log_odds_chosen": 2.3867030143737793, "log_odds_ratio": -0.25201505422592163, "logits/chosen": -0.8305800557136536, "logits/rejected": -0.8138415813446045, "logps/chosen": -0.8561518788337708, "logps/rejected": -2.7196390628814697, "loss": 0.9593, "nll_loss": 1.0278993844985962, "rewards/accuracies": 1.0, "rewards/chosen": -0.08561518788337708, "rewards/margins": 0.18634873628616333, "rewards/rejected": -0.271963894367218, "step": 7610 }, { "epoch": 4.642976971175843, "grad_norm": 10.655969619750977, "learning_rate": 5.721984078383343e-07, "log_odds_chosen": 1.67837393283844, "log_odds_ratio": -0.2958897054195404, "logits/chosen": -0.6260879635810852, "logits/rejected": -0.9728338122367859, "logps/chosen": -0.6305508017539978, "logps/rejected": -1.8103195428848267, "loss": 0.9876, "nll_loss": 0.7094271183013916, "rewards/accuracies": 0.875, "rewards/chosen": -0.06305507570505142, "rewards/margins": 0.11797686666250229, "rewards/rejected": -0.1810319423675537, "step": 7611 }, { "epoch": 4.64358700625286, "grad_norm": 2.167159080505371, "learning_rate": 5.712186160440906e-07, "log_odds_chosen": 0.9629494547843933, "log_odds_ratio": -0.4378950595855713, "logits/chosen": -0.985679566860199, "logits/rejected": -0.9847262501716614, "logps/chosen": -0.7747923731803894, "logps/rejected": -1.4439806938171387, "loss": 1.0833, "nll_loss": 1.028367280960083, "rewards/accuracies": 0.75, "rewards/chosen": -0.07747923582792282, "rewards/margins": 0.06691884994506836, "rewards/rejected": -0.14439807832241058, "step": 7612 }, { "epoch": 4.6441970413298765, "grad_norm": 1.2842267751693726, "learning_rate": 5.70238824249847e-07, "log_odds_chosen": 1.267562747001648, "log_odds_ratio": -0.6092998385429382, "logits/chosen": -1.0663750171661377, "logits/rejected": -1.1881390810012817, "logps/chosen": -1.111970067024231, "logps/rejected": -2.2147257328033447, "loss": 1.1483, "nll_loss": 1.382704257965088, "rewards/accuracies": 0.625, "rewards/chosen": -0.11119700968265533, "rewards/margins": 0.11027555912733078, "rewards/rejected": -0.22147256135940552, "step": 7613 }, { "epoch": 4.644807076406893, "grad_norm": 1.4711105823516846, "learning_rate": 5.692590324556032e-07, "log_odds_chosen": 1.651031494140625, "log_odds_ratio": -0.4025641679763794, "logits/chosen": -0.9495360255241394, "logits/rejected": -1.0312713384628296, "logps/chosen": -0.7958006858825684, "logps/rejected": -1.904512643814087, "loss": 1.0125, "nll_loss": 0.901814877986908, "rewards/accuracies": 0.75, "rewards/chosen": -0.07958006858825684, "rewards/margins": 0.11087121069431305, "rewards/rejected": -0.1904512643814087, "step": 7614 }, { "epoch": 4.64541711148391, "grad_norm": 1.1633517742156982, "learning_rate": 5.682792406613594e-07, "log_odds_chosen": 2.1338424682617188, "log_odds_ratio": -0.5135085582733154, "logits/chosen": -0.6664065718650818, "logits/rejected": -0.8658786416053772, "logps/chosen": -0.9175400137901306, "logps/rejected": -2.789315700531006, "loss": 1.005, "nll_loss": 1.0388034582138062, "rewards/accuracies": 0.625, "rewards/chosen": -0.0917540043592453, "rewards/margins": 0.18717753887176514, "rewards/rejected": -0.27893155813217163, "step": 7615 }, { "epoch": 4.646027146560927, "grad_norm": 1.7922930717468262, "learning_rate": 5.672994488671156e-07, "log_odds_chosen": 2.7382781505584717, "log_odds_ratio": -0.3705001175403595, "logits/chosen": -0.8121943473815918, "logits/rejected": -1.08005690574646, "logps/chosen": -0.5919802188873291, "logps/rejected": -2.5841894149780273, "loss": 0.9895, "nll_loss": 0.9469916224479675, "rewards/accuracies": 0.875, "rewards/chosen": -0.05919802188873291, "rewards/margins": 0.1992209255695343, "rewards/rejected": -0.2584189474582672, "step": 7616 }, { "epoch": 4.646637181637944, "grad_norm": 3.872880697250366, "learning_rate": 5.66319657072872e-07, "log_odds_chosen": 0.3888648450374603, "log_odds_ratio": -0.6557841300964355, "logits/chosen": -1.1026887893676758, "logits/rejected": -0.9874461889266968, "logps/chosen": -0.9943815469741821, "logps/rejected": -1.2713067531585693, "loss": 1.071, "nll_loss": 1.2092361450195312, "rewards/accuracies": 0.75, "rewards/chosen": -0.0994381532073021, "rewards/margins": 0.027692526578903198, "rewards/rejected": -0.1271306872367859, "step": 7617 }, { "epoch": 4.647247216714961, "grad_norm": 1.3052752017974854, "learning_rate": 5.653398652786283e-07, "log_odds_chosen": 2.577542543411255, "log_odds_ratio": -0.3602966368198395, "logits/chosen": -0.8742491602897644, "logits/rejected": -1.0137598514556885, "logps/chosen": -0.7366267442703247, "logps/rejected": -2.8326990604400635, "loss": 0.88, "nll_loss": 0.8267932534217834, "rewards/accuracies": 0.75, "rewards/chosen": -0.07366267591714859, "rewards/margins": 0.20960724353790283, "rewards/rejected": -0.2832699120044708, "step": 7618 }, { "epoch": 4.647857251791978, "grad_norm": 1.512258529663086, "learning_rate": 5.643600734843845e-07, "log_odds_chosen": 1.4642893075942993, "log_odds_ratio": -0.39925992488861084, "logits/chosen": -0.7603425979614258, "logits/rejected": -0.9319111108779907, "logps/chosen": -0.7628983855247498, "logps/rejected": -1.9010632038116455, "loss": 0.8982, "nll_loss": 0.9185524582862854, "rewards/accuracies": 0.875, "rewards/chosen": -0.0762898400425911, "rewards/margins": 0.11381648480892181, "rewards/rejected": -0.1901063174009323, "step": 7619 }, { "epoch": 4.648467286868995, "grad_norm": 1.3575695753097534, "learning_rate": 5.633802816901409e-07, "log_odds_chosen": 1.4968689680099487, "log_odds_ratio": -0.4446735382080078, "logits/chosen": -1.117769479751587, "logits/rejected": -1.0937072038650513, "logps/chosen": -0.8998672962188721, "logps/rejected": -2.1297261714935303, "loss": 1.0817, "nll_loss": 1.0471906661987305, "rewards/accuracies": 0.5, "rewards/chosen": -0.08998672664165497, "rewards/margins": 0.12298591434955597, "rewards/rejected": -0.21297264099121094, "step": 7620 }, { "epoch": 4.649077321946012, "grad_norm": 1.8592593669891357, "learning_rate": 5.624004898958971e-07, "log_odds_chosen": 1.3063151836395264, "log_odds_ratio": -0.4977093040943146, "logits/chosen": -0.9101487398147583, "logits/rejected": -1.0418466329574585, "logps/chosen": -0.9140073657035828, "logps/rejected": -1.7933118343353271, "loss": 1.1159, "nll_loss": 1.1588091850280762, "rewards/accuracies": 0.75, "rewards/chosen": -0.09140074253082275, "rewards/margins": 0.08793044090270996, "rewards/rejected": -0.17933116853237152, "step": 7621 }, { "epoch": 4.649687357023029, "grad_norm": 1.2892853021621704, "learning_rate": 5.614206981016534e-07, "log_odds_chosen": 1.3170150518417358, "log_odds_ratio": -0.601536214351654, "logits/chosen": -0.8581351637840271, "logits/rejected": -0.9634245038032532, "logps/chosen": -0.8851730823516846, "logps/rejected": -2.0640604496002197, "loss": 1.2351, "nll_loss": 0.9909618496894836, "rewards/accuracies": 0.625, "rewards/chosen": -0.08851730823516846, "rewards/margins": 0.11788873374462128, "rewards/rejected": -0.20640604197978973, "step": 7622 }, { "epoch": 4.650297392100046, "grad_norm": 1.7982538938522339, "learning_rate": 5.604409063074097e-07, "log_odds_chosen": 2.1372768878936768, "log_odds_ratio": -0.26413893699645996, "logits/chosen": -0.7861846685409546, "logits/rejected": -0.9419904351234436, "logps/chosen": -0.6321605443954468, "logps/rejected": -2.2654993534088135, "loss": 1.112, "nll_loss": 0.9437210559844971, "rewards/accuracies": 0.875, "rewards/chosen": -0.06321605294942856, "rewards/margins": 0.16333389282226562, "rewards/rejected": -0.2265499383211136, "step": 7623 }, { "epoch": 4.650907427177063, "grad_norm": 1.862362027168274, "learning_rate": 5.594611145131659e-07, "log_odds_chosen": 1.117239236831665, "log_odds_ratio": -0.5467642545700073, "logits/chosen": -0.9225142002105713, "logits/rejected": -0.9274122714996338, "logps/chosen": -0.8909111618995667, "logps/rejected": -1.7605597972869873, "loss": 1.1318, "nll_loss": 1.0287951231002808, "rewards/accuracies": 0.5, "rewards/chosen": -0.08909111469984055, "rewards/margins": 0.08696487545967102, "rewards/rejected": -0.17605599761009216, "step": 7624 }, { "epoch": 4.65151746225408, "grad_norm": 1.3944175243377686, "learning_rate": 5.584813227189222e-07, "log_odds_chosen": 5.227968215942383, "log_odds_ratio": -0.3014792799949646, "logits/chosen": -0.832154393196106, "logits/rejected": -1.0748631954193115, "logps/chosen": -0.7715210914611816, "logps/rejected": -5.312079429626465, "loss": 0.9922, "nll_loss": 0.8310157060623169, "rewards/accuracies": 0.875, "rewards/chosen": -0.07715211063623428, "rewards/margins": 0.45405587553977966, "rewards/rejected": -0.5312079787254333, "step": 7625 }, { "epoch": 4.652127497331096, "grad_norm": 1.93578040599823, "learning_rate": 5.575015309246784e-07, "log_odds_chosen": 1.9993194341659546, "log_odds_ratio": -0.28867802023887634, "logits/chosen": -1.0995347499847412, "logits/rejected": -1.1241717338562012, "logps/chosen": -0.791063129901886, "logps/rejected": -2.0135412216186523, "loss": 1.1572, "nll_loss": 1.3737303018569946, "rewards/accuracies": 0.875, "rewards/chosen": -0.07910631597042084, "rewards/margins": 0.12224780023097992, "rewards/rejected": -0.20135411620140076, "step": 7626 }, { "epoch": 4.652737532408113, "grad_norm": 5.657703876495361, "learning_rate": 5.565217391304348e-07, "log_odds_chosen": 0.6681256890296936, "log_odds_ratio": -0.8522491455078125, "logits/chosen": -0.7396679520606995, "logits/rejected": -0.7922937870025635, "logps/chosen": -0.6623461246490479, "logps/rejected": -1.3594002723693848, "loss": 1.0067, "nll_loss": 0.8831135630607605, "rewards/accuracies": 0.5, "rewards/chosen": -0.06623461097478867, "rewards/margins": 0.06970542669296265, "rewards/rejected": -0.13594003021717072, "step": 7627 }, { "epoch": 4.65334756748513, "grad_norm": 1.3581269979476929, "learning_rate": 5.555419473361911e-07, "log_odds_chosen": 2.3875174522399902, "log_odds_ratio": -0.5149408578872681, "logits/chosen": -1.0350812673568726, "logits/rejected": -0.9575327634811401, "logps/chosen": -0.729907751083374, "logps/rejected": -2.7572882175445557, "loss": 0.9835, "nll_loss": 0.8184205889701843, "rewards/accuracies": 0.625, "rewards/chosen": -0.0729907751083374, "rewards/margins": 0.20273806154727936, "rewards/rejected": -0.27572882175445557, "step": 7628 }, { "epoch": 4.653957602562147, "grad_norm": 1.3159232139587402, "learning_rate": 5.545621555419473e-07, "log_odds_chosen": 3.24505352973938, "log_odds_ratio": -0.16502726078033447, "logits/chosen": -0.7572172284126282, "logits/rejected": -1.0702826976776123, "logps/chosen": -0.6541063189506531, "logps/rejected": -3.2217609882354736, "loss": 0.8995, "nll_loss": 0.7877979278564453, "rewards/accuracies": 1.0, "rewards/chosen": -0.06541063636541367, "rewards/margins": 0.2567654550075531, "rewards/rejected": -0.32217612862586975, "step": 7629 }, { "epoch": 4.654567637639165, "grad_norm": 9.397547721862793, "learning_rate": 5.535823637477036e-07, "log_odds_chosen": 0.7483370304107666, "log_odds_ratio": -0.6504965424537659, "logits/chosen": -1.1111063957214355, "logits/rejected": -1.045103669166565, "logps/chosen": -0.8897576332092285, "logps/rejected": -1.4504637718200684, "loss": 1.2874, "nll_loss": 1.3918520212173462, "rewards/accuracies": 0.5, "rewards/chosen": -0.08897575736045837, "rewards/margins": 0.056070610880851746, "rewards/rejected": -0.1450463831424713, "step": 7630 }, { "epoch": 4.6551776727161815, "grad_norm": 1.366877555847168, "learning_rate": 5.526025719534598e-07, "log_odds_chosen": 2.194087505340576, "log_odds_ratio": -0.333650678396225, "logits/chosen": -0.986598551273346, "logits/rejected": -1.1086747646331787, "logps/chosen": -0.6062254309654236, "logps/rejected": -2.1649670600891113, "loss": 0.9653, "nll_loss": 0.8299665451049805, "rewards/accuracies": 0.875, "rewards/chosen": -0.06062254309654236, "rewards/margins": 0.15587416291236877, "rewards/rejected": -0.21649670600891113, "step": 7631 }, { "epoch": 4.655787707793198, "grad_norm": 2.088531494140625, "learning_rate": 5.516227801592161e-07, "log_odds_chosen": 3.639925479888916, "log_odds_ratio": -0.3015046715736389, "logits/chosen": -0.7868534326553345, "logits/rejected": -0.8601125478744507, "logps/chosen": -0.5495808124542236, "logps/rejected": -3.383833885192871, "loss": 1.2269, "nll_loss": 1.2750691175460815, "rewards/accuracies": 0.75, "rewards/chosen": -0.05495808273553848, "rewards/margins": 0.28342533111572266, "rewards/rejected": -0.33838343620300293, "step": 7632 }, { "epoch": 4.656397742870215, "grad_norm": 1.55157470703125, "learning_rate": 5.506429883649725e-07, "log_odds_chosen": 1.453865647315979, "log_odds_ratio": -0.6614389419555664, "logits/chosen": -0.9387454390525818, "logits/rejected": -1.1549053192138672, "logps/chosen": -1.0432448387145996, "logps/rejected": -2.199639320373535, "loss": 1.1788, "nll_loss": 1.3309136629104614, "rewards/accuracies": 0.5, "rewards/chosen": -0.10432448238134384, "rewards/margins": 0.11563946306705475, "rewards/rejected": -0.219963937997818, "step": 7633 }, { "epoch": 4.657007777947232, "grad_norm": 1.4736405611038208, "learning_rate": 5.496631965707287e-07, "log_odds_chosen": 4.424577713012695, "log_odds_ratio": -0.229312002658844, "logits/chosen": -0.8811570405960083, "logits/rejected": -1.1223409175872803, "logps/chosen": -0.9920486211776733, "logps/rejected": -4.838647842407227, "loss": 0.955, "nll_loss": 1.2661652565002441, "rewards/accuracies": 0.875, "rewards/chosen": -0.09920486807823181, "rewards/margins": 0.38465991616249084, "rewards/rejected": -0.48386481404304504, "step": 7634 }, { "epoch": 4.657617813024249, "grad_norm": 1.936279535293579, "learning_rate": 5.48683404776485e-07, "log_odds_chosen": 2.4401485919952393, "log_odds_ratio": -0.2848057746887207, "logits/chosen": -0.943588376045227, "logits/rejected": -0.9637422561645508, "logps/chosen": -1.1712297201156616, "logps/rejected": -3.114637613296509, "loss": 1.1282, "nll_loss": 1.1306681632995605, "rewards/accuracies": 0.875, "rewards/chosen": -0.11712297797203064, "rewards/margins": 0.1943408101797104, "rewards/rejected": -0.3114638030529022, "step": 7635 }, { "epoch": 4.658227848101266, "grad_norm": 1.3576862812042236, "learning_rate": 5.477036129822413e-07, "log_odds_chosen": 1.4963085651397705, "log_odds_ratio": -0.40642163157463074, "logits/chosen": -0.6358518600463867, "logits/rejected": -0.7504532337188721, "logps/chosen": -0.7271270751953125, "logps/rejected": -1.7778658866882324, "loss": 1.0537, "nll_loss": 0.7782354354858398, "rewards/accuracies": 0.75, "rewards/chosen": -0.0727127194404602, "rewards/margins": 0.10507388412952423, "rewards/rejected": -0.17778658866882324, "step": 7636 }, { "epoch": 4.658837883178283, "grad_norm": 4.577428340911865, "learning_rate": 5.467238211879975e-07, "log_odds_chosen": 0.5991339683532715, "log_odds_ratio": -0.5864410400390625, "logits/chosen": -0.6566528081893921, "logits/rejected": -0.8571321368217468, "logps/chosen": -0.80973219871521, "logps/rejected": -1.1728336811065674, "loss": 0.9829, "nll_loss": 0.8109548091888428, "rewards/accuracies": 0.75, "rewards/chosen": -0.08097322285175323, "rewards/margins": 0.03631015121936798, "rewards/rejected": -0.11728337407112122, "step": 7637 }, { "epoch": 4.6594479182552995, "grad_norm": 1.2161107063293457, "learning_rate": 5.457440293937538e-07, "log_odds_chosen": 4.142405986785889, "log_odds_ratio": -0.094760000705719, "logits/chosen": -0.8709695935249329, "logits/rejected": -1.0104296207427979, "logps/chosen": -0.629753589630127, "logps/rejected": -3.9996490478515625, "loss": 0.949, "nll_loss": 0.8568087816238403, "rewards/accuracies": 1.0, "rewards/chosen": -0.06297536194324493, "rewards/margins": 0.33698955178260803, "rewards/rejected": -0.39996492862701416, "step": 7638 }, { "epoch": 4.660057953332316, "grad_norm": 1.1779825687408447, "learning_rate": 5.4476423759951e-07, "log_odds_chosen": 2.2285921573638916, "log_odds_ratio": -0.34035271406173706, "logits/chosen": -0.9359829425811768, "logits/rejected": -1.008626937866211, "logps/chosen": -0.6459556221961975, "logps/rejected": -2.3001458644866943, "loss": 1.1307, "nll_loss": 0.6980183720588684, "rewards/accuracies": 0.875, "rewards/chosen": -0.06459556519985199, "rewards/margins": 0.16541904211044312, "rewards/rejected": -0.2300146073102951, "step": 7639 }, { "epoch": 4.660667988409333, "grad_norm": 13.780252456665039, "learning_rate": 5.437844458052664e-07, "log_odds_chosen": 2.7333664894104004, "log_odds_ratio": -0.3897886276245117, "logits/chosen": -0.7970219850540161, "logits/rejected": -1.0066192150115967, "logps/chosen": -0.6435773968696594, "logps/rejected": -2.8134098052978516, "loss": 1.1909, "nll_loss": 1.003584623336792, "rewards/accuracies": 0.875, "rewards/chosen": -0.06435774266719818, "rewards/margins": 0.21698325872421265, "rewards/rejected": -0.28134098649024963, "step": 7640 }, { "epoch": 4.66127802348635, "grad_norm": 9.892233848571777, "learning_rate": 5.428046540110226e-07, "log_odds_chosen": 0.6084414720535278, "log_odds_ratio": -0.5818243026733398, "logits/chosen": -0.9299150705337524, "logits/rejected": -0.9171164035797119, "logps/chosen": -0.7726300954818726, "logps/rejected": -1.1917612552642822, "loss": 1.0751, "nll_loss": 1.0053842067718506, "rewards/accuracies": 0.625, "rewards/chosen": -0.0772630050778389, "rewards/margins": 0.04191311076283455, "rewards/rejected": -0.11917611956596375, "step": 7641 }, { "epoch": 4.661888058563368, "grad_norm": 8.829185485839844, "learning_rate": 5.418248622167789e-07, "log_odds_chosen": 3.802551031112671, "log_odds_ratio": -0.30383360385894775, "logits/chosen": -0.8943995833396912, "logits/rejected": -1.2087843418121338, "logps/chosen": -0.6622505187988281, "logps/rejected": -3.8939642906188965, "loss": 1.0369, "nll_loss": 1.13051176071167, "rewards/accuracies": 0.75, "rewards/chosen": -0.06622505933046341, "rewards/margins": 0.32317137718200684, "rewards/rejected": -0.38939645886421204, "step": 7642 }, { "epoch": 4.662498093640385, "grad_norm": 8.733960151672363, "learning_rate": 5.408450704225352e-07, "log_odds_chosen": 2.6892902851104736, "log_odds_ratio": -0.5236489176750183, "logits/chosen": -0.9409645199775696, "logits/rejected": -1.1231377124786377, "logps/chosen": -0.804522693157196, "logps/rejected": -3.2055840492248535, "loss": 1.1465, "nll_loss": 1.4599030017852783, "rewards/accuracies": 0.625, "rewards/chosen": -0.08045227080583572, "rewards/margins": 0.24010612070560455, "rewards/rejected": -0.3205583989620209, "step": 7643 }, { "epoch": 4.663108128717401, "grad_norm": 1.7899460792541504, "learning_rate": 5.398652786282915e-07, "log_odds_chosen": 1.8834044933319092, "log_odds_ratio": -0.38838624954223633, "logits/chosen": -0.8828027248382568, "logits/rejected": -0.9548377394676208, "logps/chosen": -0.7556788921356201, "logps/rejected": -2.268228054046631, "loss": 0.9471, "nll_loss": 0.9969812631607056, "rewards/accuracies": 0.625, "rewards/chosen": -0.07556788623332977, "rewards/margins": 0.15125495195388794, "rewards/rejected": -0.22682282328605652, "step": 7644 }, { "epoch": 4.663718163794418, "grad_norm": 3.4183380603790283, "learning_rate": 5.388854868340477e-07, "log_odds_chosen": 2.3155620098114014, "log_odds_ratio": -0.29855066537857056, "logits/chosen": -0.7461327314376831, "logits/rejected": -0.8752304315567017, "logps/chosen": -0.6463150978088379, "logps/rejected": -2.460696220397949, "loss": 1.0061, "nll_loss": 0.851352334022522, "rewards/accuracies": 0.875, "rewards/chosen": -0.06463149935007095, "rewards/margins": 0.1814381182193756, "rewards/rejected": -0.24606963992118835, "step": 7645 }, { "epoch": 4.664328198871435, "grad_norm": 19.346710205078125, "learning_rate": 5.37905695039804e-07, "log_odds_chosen": 0.49392688274383545, "log_odds_ratio": -0.6184990406036377, "logits/chosen": -0.8293924331665039, "logits/rejected": -0.9705957174301147, "logps/chosen": -1.0456323623657227, "logps/rejected": -1.4772263765335083, "loss": 1.3127, "nll_loss": 1.268198013305664, "rewards/accuracies": 0.75, "rewards/chosen": -0.10456323623657227, "rewards/margins": 0.04315939173102379, "rewards/rejected": -0.14772263169288635, "step": 7646 }, { "epoch": 4.664938233948452, "grad_norm": 2.2330987453460693, "learning_rate": 5.369259032455603e-07, "log_odds_chosen": 2.5633013248443604, "log_odds_ratio": -0.2677674889564514, "logits/chosen": -0.8833218216896057, "logits/rejected": -1.003603219985962, "logps/chosen": -0.712445080280304, "logps/rejected": -2.65427827835083, "loss": 1.0362, "nll_loss": 0.9449766874313354, "rewards/accuracies": 0.875, "rewards/chosen": -0.0712445080280304, "rewards/margins": 0.19418330490589142, "rewards/rejected": -0.265427827835083, "step": 7647 }, { "epoch": 4.665548269025469, "grad_norm": 2.4042513370513916, "learning_rate": 5.359461114513165e-07, "log_odds_chosen": 2.314375162124634, "log_odds_ratio": -0.41689878702163696, "logits/chosen": -0.5988547205924988, "logits/rejected": -0.9121499061584473, "logps/chosen": -0.58139967918396, "logps/rejected": -2.4165756702423096, "loss": 0.9543, "nll_loss": 0.746666669845581, "rewards/accuracies": 0.625, "rewards/chosen": -0.058139968663454056, "rewards/margins": 0.18351760506629944, "rewards/rejected": -0.2416575700044632, "step": 7648 }, { "epoch": 4.666158304102486, "grad_norm": 12.083210945129395, "learning_rate": 5.349663196570729e-07, "log_odds_chosen": 3.031416893005371, "log_odds_ratio": -0.4593842029571533, "logits/chosen": -0.7917083501815796, "logits/rejected": -1.1002788543701172, "logps/chosen": -0.8670951128005981, "logps/rejected": -3.5423460006713867, "loss": 1.2189, "nll_loss": 1.2552316188812256, "rewards/accuracies": 0.625, "rewards/chosen": -0.08670951426029205, "rewards/margins": 0.2675250768661499, "rewards/rejected": -0.35423460602760315, "step": 7649 }, { "epoch": 4.6667683391795025, "grad_norm": 1.0288933515548706, "learning_rate": 5.339865278628292e-07, "log_odds_chosen": 3.05679988861084, "log_odds_ratio": -0.19573430716991425, "logits/chosen": -0.5783871412277222, "logits/rejected": -0.8814600110054016, "logps/chosen": -0.5458084344863892, "logps/rejected": -2.8961243629455566, "loss": 0.8599, "nll_loss": 0.7730010747909546, "rewards/accuracies": 1.0, "rewards/chosen": -0.054580848664045334, "rewards/margins": 0.2350316047668457, "rewards/rejected": -0.28961244225502014, "step": 7650 }, { "epoch": 4.66737837425652, "grad_norm": 2.0832762718200684, "learning_rate": 5.330067360685854e-07, "log_odds_chosen": 0.6348029375076294, "log_odds_ratio": -0.47608524560928345, "logits/chosen": -0.927310585975647, "logits/rejected": -0.9846147298812866, "logps/chosen": -0.8179983496665955, "logps/rejected": -1.2750287055969238, "loss": 0.9529, "nll_loss": 0.910078763961792, "rewards/accuracies": 0.875, "rewards/chosen": -0.08179983496665955, "rewards/margins": 0.045703042298555374, "rewards/rejected": -0.12750288844108582, "step": 7651 }, { "epoch": 4.667988409333537, "grad_norm": 1.6564358472824097, "learning_rate": 5.320269442743416e-07, "log_odds_chosen": 2.2169876098632812, "log_odds_ratio": -0.4488625228404999, "logits/chosen": -0.6224478483200073, "logits/rejected": -0.8467791676521301, "logps/chosen": -0.6969367265701294, "logps/rejected": -2.421351909637451, "loss": 1.0846, "nll_loss": 0.7874569892883301, "rewards/accuracies": 0.75, "rewards/chosen": -0.0696936696767807, "rewards/margins": 0.1724415123462677, "rewards/rejected": -0.2421351820230484, "step": 7652 }, { "epoch": 4.668598444410554, "grad_norm": 1.1311243772506714, "learning_rate": 5.310471524800979e-07, "log_odds_chosen": 2.043530225753784, "log_odds_ratio": -0.43180298805236816, "logits/chosen": -0.7684552669525146, "logits/rejected": -0.7934201955795288, "logps/chosen": -0.5589746832847595, "logps/rejected": -2.0257177352905273, "loss": 0.9573, "nll_loss": 0.7636057734489441, "rewards/accuracies": 0.625, "rewards/chosen": -0.05589746683835983, "rewards/margins": 0.14667430520057678, "rewards/rejected": -0.20257176458835602, "step": 7653 }, { "epoch": 4.669208479487571, "grad_norm": 1.2939743995666504, "learning_rate": 5.300673606858542e-07, "log_odds_chosen": 1.1170426607131958, "log_odds_ratio": -0.3954293727874756, "logits/chosen": -0.8527647256851196, "logits/rejected": -0.8303821086883545, "logps/chosen": -0.775407075881958, "logps/rejected": -1.511376142501831, "loss": 0.9753, "nll_loss": 0.9445968866348267, "rewards/accuracies": 0.75, "rewards/chosen": -0.07754071056842804, "rewards/margins": 0.07359690964221954, "rewards/rejected": -0.15113762021064758, "step": 7654 }, { "epoch": 4.669818514564588, "grad_norm": 1.368836760520935, "learning_rate": 5.290875688916105e-07, "log_odds_chosen": 1.6440043449401855, "log_odds_ratio": -0.42640745639801025, "logits/chosen": -0.7916157841682434, "logits/rejected": -0.8785938024520874, "logps/chosen": -0.8739032745361328, "logps/rejected": -2.0950164794921875, "loss": 1.123, "nll_loss": 0.8819848299026489, "rewards/accuracies": 0.625, "rewards/chosen": -0.08739033341407776, "rewards/margins": 0.12211134284734726, "rewards/rejected": -0.20950165390968323, "step": 7655 }, { "epoch": 4.6704285496416045, "grad_norm": 1.5439234972000122, "learning_rate": 5.281077770973668e-07, "log_odds_chosen": 4.665482521057129, "log_odds_ratio": -0.2522048354148865, "logits/chosen": -0.9603976011276245, "logits/rejected": -1.1015483140945435, "logps/chosen": -0.8711068630218506, "logps/rejected": -4.976013660430908, "loss": 1.0377, "nll_loss": 0.9860649108886719, "rewards/accuracies": 0.875, "rewards/chosen": -0.08711069077253342, "rewards/margins": 0.4104906916618347, "rewards/rejected": -0.4976014196872711, "step": 7656 }, { "epoch": 4.671038584718621, "grad_norm": 4.943800926208496, "learning_rate": 5.271279853031231e-07, "log_odds_chosen": 0.12143713235855103, "log_odds_ratio": -0.9295743107795715, "logits/chosen": -0.9915441274642944, "logits/rejected": -1.0203986167907715, "logps/chosen": -1.0415620803833008, "logps/rejected": -1.1546878814697266, "loss": 1.0755, "nll_loss": 1.1379822492599487, "rewards/accuracies": 0.375, "rewards/chosen": -0.10415621101856232, "rewards/margins": 0.0113125741481781, "rewards/rejected": -0.11546878516674042, "step": 7657 }, { "epoch": 4.671648619795638, "grad_norm": 1.106810450553894, "learning_rate": 5.261481935088793e-07, "log_odds_chosen": 2.401580810546875, "log_odds_ratio": -0.3015451431274414, "logits/chosen": -0.8859723210334778, "logits/rejected": -1.0463778972625732, "logps/chosen": -0.6910480260848999, "logps/rejected": -2.4419586658477783, "loss": 0.9949, "nll_loss": 0.941891074180603, "rewards/accuracies": 0.875, "rewards/chosen": -0.06910479813814163, "rewards/margins": 0.17509107291698456, "rewards/rejected": -0.2441958487033844, "step": 7658 }, { "epoch": 4.672258654872655, "grad_norm": 3.247443199157715, "learning_rate": 5.251684017146356e-07, "log_odds_chosen": 3.213360071182251, "log_odds_ratio": -0.33003848791122437, "logits/chosen": -1.0523451566696167, "logits/rejected": -1.1831581592559814, "logps/chosen": -0.8519980907440186, "logps/rejected": -3.630620241165161, "loss": 1.232, "nll_loss": 1.0425934791564941, "rewards/accuracies": 0.75, "rewards/chosen": -0.08519981056451797, "rewards/margins": 0.27786222100257874, "rewards/rejected": -0.3630620241165161, "step": 7659 }, { "epoch": 4.672868689949672, "grad_norm": 1.0907949209213257, "learning_rate": 5.241886099203919e-07, "log_odds_chosen": 1.4296945333480835, "log_odds_ratio": -0.4345310926437378, "logits/chosen": -0.75857013463974, "logits/rejected": -1.0407264232635498, "logps/chosen": -0.7919658422470093, "logps/rejected": -1.820156455039978, "loss": 1.025, "nll_loss": 1.2080954313278198, "rewards/accuracies": 0.75, "rewards/chosen": -0.07919658720493317, "rewards/margins": 0.102819062769413, "rewards/rejected": -0.18201565742492676, "step": 7660 }, { "epoch": 4.673478725026689, "grad_norm": 2.121204376220703, "learning_rate": 5.232088181261481e-07, "log_odds_chosen": 1.8352010250091553, "log_odds_ratio": -0.3698892593383789, "logits/chosen": -0.8148676156997681, "logits/rejected": -0.8096624612808228, "logps/chosen": -0.5248795747756958, "logps/rejected": -1.7042076587677002, "loss": 0.8258, "nll_loss": 0.659487783908844, "rewards/accuracies": 0.875, "rewards/chosen": -0.05248796194791794, "rewards/margins": 0.11793281883001328, "rewards/rejected": -0.17042076587677002, "step": 7661 }, { "epoch": 4.6740887601037056, "grad_norm": 1.0247774124145508, "learning_rate": 5.222290263319045e-07, "log_odds_chosen": 1.4771822690963745, "log_odds_ratio": -0.4625227451324463, "logits/chosen": -0.6677759885787964, "logits/rejected": -0.8176968693733215, "logps/chosen": -0.5420433282852173, "logps/rejected": -1.4838216304779053, "loss": 0.9002, "nll_loss": 0.6173789501190186, "rewards/accuracies": 0.5, "rewards/chosen": -0.05420432984828949, "rewards/margins": 0.09417782723903656, "rewards/rejected": -0.14838215708732605, "step": 7662 }, { "epoch": 4.674698795180722, "grad_norm": 3.2220592498779297, "learning_rate": 5.212492345376607e-07, "log_odds_chosen": 1.3641996383666992, "log_odds_ratio": -0.5051866769790649, "logits/chosen": -0.8182331323623657, "logits/rejected": -1.049533486366272, "logps/chosen": -0.8203124403953552, "logps/rejected": -1.7812024354934692, "loss": 1.1286, "nll_loss": 1.1328964233398438, "rewards/accuracies": 0.625, "rewards/chosen": -0.08203125, "rewards/margins": 0.09608899056911469, "rewards/rejected": -0.17812025547027588, "step": 7663 }, { "epoch": 4.67530883025774, "grad_norm": 5.237588405609131, "learning_rate": 5.20269442743417e-07, "log_odds_chosen": 0.009712934494018555, "log_odds_ratio": -0.8323934674263, "logits/chosen": -1.0635114908218384, "logits/rejected": -0.9743022918701172, "logps/chosen": -0.9007284641265869, "logps/rejected": -0.9176088571548462, "loss": 0.9364, "nll_loss": 1.0618257522583008, "rewards/accuracies": 0.25, "rewards/chosen": -0.09007285535335541, "rewards/margins": 0.0016880305483937263, "rewards/rejected": -0.09176088124513626, "step": 7664 }, { "epoch": 4.675918865334757, "grad_norm": 2.2669405937194824, "learning_rate": 5.192896509491734e-07, "log_odds_chosen": 2.9106361865997314, "log_odds_ratio": -0.38407108187675476, "logits/chosen": -1.0414745807647705, "logits/rejected": -1.0567203760147095, "logps/chosen": -0.8735834360122681, "logps/rejected": -3.521758556365967, "loss": 1.0935, "nll_loss": 1.0883851051330566, "rewards/accuracies": 0.75, "rewards/chosen": -0.08735834062099457, "rewards/margins": 0.2648175060749054, "rewards/rejected": -0.35217583179473877, "step": 7665 }, { "epoch": 4.676528900411774, "grad_norm": 1.977002501487732, "learning_rate": 5.183098591549295e-07, "log_odds_chosen": 2.39520263671875, "log_odds_ratio": -0.40847843885421753, "logits/chosen": -1.1034832000732422, "logits/rejected": -1.1852443218231201, "logps/chosen": -0.895067572593689, "logps/rejected": -3.030802011489868, "loss": 0.9979, "nll_loss": 0.9876813292503357, "rewards/accuracies": 0.75, "rewards/chosen": -0.08950674533843994, "rewards/margins": 0.21357345581054688, "rewards/rejected": -0.3030802011489868, "step": 7666 }, { "epoch": 4.677138935488791, "grad_norm": 2.6059844493865967, "learning_rate": 5.173300673606858e-07, "log_odds_chosen": 2.0868022441864014, "log_odds_ratio": -0.3676924407482147, "logits/chosen": -0.6876461505889893, "logits/rejected": -0.9142992496490479, "logps/chosen": -0.7558373212814331, "logps/rejected": -2.4609375, "loss": 0.9502, "nll_loss": 0.8017706274986267, "rewards/accuracies": 0.75, "rewards/chosen": -0.07558373361825943, "rewards/margins": 0.17051000893115997, "rewards/rejected": -0.24609375, "step": 7667 }, { "epoch": 4.6777489705658075, "grad_norm": 7.35103702545166, "learning_rate": 5.16350275566442e-07, "log_odds_chosen": 2.7273547649383545, "log_odds_ratio": -0.33142754435539246, "logits/chosen": -0.8751088976860046, "logits/rejected": -0.9508438110351562, "logps/chosen": -0.8089417815208435, "logps/rejected": -2.964327573776245, "loss": 1.0026, "nll_loss": 1.0257469415664673, "rewards/accuracies": 0.875, "rewards/chosen": -0.08089418709278107, "rewards/margins": 0.21553859114646912, "rewards/rejected": -0.296432763338089, "step": 7668 }, { "epoch": 4.678359005642824, "grad_norm": 1.6549454927444458, "learning_rate": 5.153704837721984e-07, "log_odds_chosen": 1.0390647649765015, "log_odds_ratio": -0.43621936440467834, "logits/chosen": -0.8254542350769043, "logits/rejected": -0.8831973671913147, "logps/chosen": -0.6736584305763245, "logps/rejected": -1.2995004653930664, "loss": 1.0232, "nll_loss": 1.0233973264694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.0673658475279808, "rewards/margins": 0.06258419901132584, "rewards/rejected": -0.12995004653930664, "step": 7669 }, { "epoch": 4.678969040719841, "grad_norm": 2.6589696407318115, "learning_rate": 5.143906919779547e-07, "log_odds_chosen": 1.0657912492752075, "log_odds_ratio": -0.5441180467605591, "logits/chosen": -0.8295350074768066, "logits/rejected": -0.6971845030784607, "logps/chosen": -0.7530257701873779, "logps/rejected": -1.5710127353668213, "loss": 1.1116, "nll_loss": 0.8274569511413574, "rewards/accuracies": 0.625, "rewards/chosen": -0.07530257105827332, "rewards/margins": 0.08179870992898941, "rewards/rejected": -0.15710127353668213, "step": 7670 }, { "epoch": 4.679579075796858, "grad_norm": 1.3197895288467407, "learning_rate": 5.134109001837109e-07, "log_odds_chosen": 1.715651512145996, "log_odds_ratio": -0.5667024850845337, "logits/chosen": -1.1351873874664307, "logits/rejected": -1.1264570951461792, "logps/chosen": -1.00948166847229, "logps/rejected": -2.5314340591430664, "loss": 1.1046, "nll_loss": 1.087324857711792, "rewards/accuracies": 0.625, "rewards/chosen": -0.10094816237688065, "rewards/margins": 0.15219524502754211, "rewards/rejected": -0.25314339995384216, "step": 7671 }, { "epoch": 4.680189110873875, "grad_norm": 1.764872670173645, "learning_rate": 5.124311083894673e-07, "log_odds_chosen": 2.4073805809020996, "log_odds_ratio": -0.42705830931663513, "logits/chosen": -0.9629231691360474, "logits/rejected": -1.022931456565857, "logps/chosen": -0.8394047021865845, "logps/rejected": -3.022533416748047, "loss": 0.9969, "nll_loss": 1.0595972537994385, "rewards/accuracies": 0.75, "rewards/chosen": -0.08394047617912292, "rewards/margins": 0.21831285953521729, "rewards/rejected": -0.3022533357143402, "step": 7672 }, { "epoch": 4.680799145950893, "grad_norm": 1.8773571252822876, "learning_rate": 5.114513165952235e-07, "log_odds_chosen": 2.7255077362060547, "log_odds_ratio": -0.2470463514328003, "logits/chosen": -0.7860810160636902, "logits/rejected": -0.9318828582763672, "logps/chosen": -0.5032185316085815, "logps/rejected": -2.556943416595459, "loss": 0.8677, "nll_loss": 0.7355328798294067, "rewards/accuracies": 0.875, "rewards/chosen": -0.050321850925683975, "rewards/margins": 0.20537249743938446, "rewards/rejected": -0.25569432973861694, "step": 7673 }, { "epoch": 4.6814091810279095, "grad_norm": 2.819483995437622, "learning_rate": 5.104715248009797e-07, "log_odds_chosen": 1.77693772315979, "log_odds_ratio": -0.46313464641571045, "logits/chosen": -0.8886815905570984, "logits/rejected": -0.9217715263366699, "logps/chosen": -0.826831579208374, "logps/rejected": -2.1734561920166016, "loss": 1.1123, "nll_loss": 1.037182092666626, "rewards/accuracies": 0.5, "rewards/chosen": -0.08268316090106964, "rewards/margins": 0.134662464261055, "rewards/rejected": -0.21734565496444702, "step": 7674 }, { "epoch": 4.682019216104926, "grad_norm": 1.5579193830490112, "learning_rate": 5.094917330067361e-07, "log_odds_chosen": 3.77272891998291, "log_odds_ratio": -0.3693237900733948, "logits/chosen": -0.9524440169334412, "logits/rejected": -1.047684907913208, "logps/chosen": -0.8257913589477539, "logps/rejected": -4.109691619873047, "loss": 1.2055, "nll_loss": 1.2867743968963623, "rewards/accuracies": 0.875, "rewards/chosen": -0.08257913589477539, "rewards/margins": 0.3283900320529938, "rewards/rejected": -0.4109691381454468, "step": 7675 }, { "epoch": 4.682629251181943, "grad_norm": 1.3935765027999878, "learning_rate": 5.085119412124923e-07, "log_odds_chosen": 0.8795996904373169, "log_odds_ratio": -0.5915341973304749, "logits/chosen": -1.0026750564575195, "logits/rejected": -1.0563557147979736, "logps/chosen": -0.7745804786682129, "logps/rejected": -1.4942121505737305, "loss": 0.9467, "nll_loss": 0.9005572199821472, "rewards/accuracies": 0.5, "rewards/chosen": -0.07745805382728577, "rewards/margins": 0.07196316868066788, "rewards/rejected": -0.14942121505737305, "step": 7676 }, { "epoch": 4.68323928625896, "grad_norm": 2.1176085472106934, "learning_rate": 5.075321494182486e-07, "log_odds_chosen": 2.517277956008911, "log_odds_ratio": -0.4445647597312927, "logits/chosen": -0.88506680727005, "logits/rejected": -0.9744790196418762, "logps/chosen": -0.7632530927658081, "logps/rejected": -2.8093953132629395, "loss": 1.0648, "nll_loss": 1.0560083389282227, "rewards/accuracies": 0.625, "rewards/chosen": -0.07632530480623245, "rewards/margins": 0.20461425185203552, "rewards/rejected": -0.280939519405365, "step": 7677 }, { "epoch": 4.683849321335977, "grad_norm": 3.191697835922241, "learning_rate": 5.065523576240049e-07, "log_odds_chosen": 0.6209187507629395, "log_odds_ratio": -0.5078461170196533, "logits/chosen": -0.9653137922286987, "logits/rejected": -0.9957998394966125, "logps/chosen": -0.8846944570541382, "logps/rejected": -1.2510616779327393, "loss": 1.1869, "nll_loss": 1.2433521747589111, "rewards/accuracies": 0.75, "rewards/chosen": -0.08846944570541382, "rewards/margins": 0.03663673251867294, "rewards/rejected": -0.12510618567466736, "step": 7678 }, { "epoch": 4.684459356412994, "grad_norm": 1.5795186758041382, "learning_rate": 5.055725658297612e-07, "log_odds_chosen": 5.1083984375, "log_odds_ratio": -0.32278767228126526, "logits/chosen": -0.8754348754882812, "logits/rejected": -1.1293656826019287, "logps/chosen": -0.6858396530151367, "logps/rejected": -5.182084560394287, "loss": 0.9646, "nll_loss": 1.004217505455017, "rewards/accuracies": 0.75, "rewards/chosen": -0.06858396530151367, "rewards/margins": 0.4496244788169861, "rewards/rejected": -0.5182084441184998, "step": 7679 }, { "epoch": 4.685069391490011, "grad_norm": 1.590834379196167, "learning_rate": 5.045927740355175e-07, "log_odds_chosen": 2.184631824493408, "log_odds_ratio": -0.3468520939350128, "logits/chosen": -0.8022538423538208, "logits/rejected": -0.7886620163917542, "logps/chosen": -0.7433153390884399, "logps/rejected": -2.454289197921753, "loss": 1.1137, "nll_loss": 1.2111380100250244, "rewards/accuracies": 0.75, "rewards/chosen": -0.07433153688907623, "rewards/margins": 0.17109739780426025, "rewards/rejected": -0.2454289197921753, "step": 7680 }, { "epoch": 4.685679426567027, "grad_norm": 2.421443223953247, "learning_rate": 5.036129822412736e-07, "log_odds_chosen": 2.118626832962036, "log_odds_ratio": -0.35011258721351624, "logits/chosen": -0.7771947979927063, "logits/rejected": -0.9280208349227905, "logps/chosen": -0.6124501824378967, "logps/rejected": -2.3057596683502197, "loss": 1.106, "nll_loss": 0.9013808965682983, "rewards/accuracies": 1.0, "rewards/chosen": -0.06124502047896385, "rewards/margins": 0.16933095455169678, "rewards/rejected": -0.23057600855827332, "step": 7681 }, { "epoch": 4.686289461644044, "grad_norm": 1.8585538864135742, "learning_rate": 5.0263319044703e-07, "log_odds_chosen": 1.2501426935195923, "log_odds_ratio": -0.4557744264602661, "logits/chosen": -1.0153415203094482, "logits/rejected": -1.0168333053588867, "logps/chosen": -0.7120504379272461, "logps/rejected": -1.6197240352630615, "loss": 0.9859, "nll_loss": 0.9466089606285095, "rewards/accuracies": 0.75, "rewards/chosen": -0.07120504230260849, "rewards/margins": 0.09076736122369766, "rewards/rejected": -0.16197238862514496, "step": 7682 }, { "epoch": 4.686899496721061, "grad_norm": 1.7941302061080933, "learning_rate": 5.016533986527862e-07, "log_odds_chosen": 2.517792224884033, "log_odds_ratio": -0.3660507798194885, "logits/chosen": -1.2309751510620117, "logits/rejected": -1.148805856704712, "logps/chosen": -0.8245238065719604, "logps/rejected": -2.7015700340270996, "loss": 1.3078, "nll_loss": 1.2177047729492188, "rewards/accuracies": 0.625, "rewards/chosen": -0.08245238661766052, "rewards/margins": 0.18770462274551392, "rewards/rejected": -0.27015700936317444, "step": 7683 }, { "epoch": 4.687509531798078, "grad_norm": 1.2105704545974731, "learning_rate": 5.006736068585425e-07, "log_odds_chosen": 1.4825797080993652, "log_odds_ratio": -0.44587838649749756, "logits/chosen": -0.9233765602111816, "logits/rejected": -0.9075250625610352, "logps/chosen": -0.6351255178451538, "logps/rejected": -1.693184733390808, "loss": 1.1061, "nll_loss": 0.8576219081878662, "rewards/accuracies": 0.75, "rewards/chosen": -0.06351254880428314, "rewards/margins": 0.10580594837665558, "rewards/rejected": -0.16931848227977753, "step": 7684 }, { "epoch": 4.688119566875096, "grad_norm": 1.7711832523345947, "learning_rate": 4.996938150642988e-07, "log_odds_chosen": 0.20361313223838806, "log_odds_ratio": -0.7137997150421143, "logits/chosen": -0.9862741231918335, "logits/rejected": -0.9241037368774414, "logps/chosen": -0.8750694990158081, "logps/rejected": -1.0159871578216553, "loss": 1.0757, "nll_loss": 1.1117074489593506, "rewards/accuracies": 0.375, "rewards/chosen": -0.08750694990158081, "rewards/margins": 0.014091767370700836, "rewards/rejected": -0.10159870982170105, "step": 7685 }, { "epoch": 4.6887296019521125, "grad_norm": 2.1200881004333496, "learning_rate": 4.987140232700551e-07, "log_odds_chosen": 1.9276278018951416, "log_odds_ratio": -0.40606454014778137, "logits/chosen": -1.0835868120193481, "logits/rejected": -1.0987693071365356, "logps/chosen": -0.8537633419036865, "logps/rejected": -2.337780475616455, "loss": 1.121, "nll_loss": 1.142538070678711, "rewards/accuracies": 0.625, "rewards/chosen": -0.08537633717060089, "rewards/margins": 0.14840172231197357, "rewards/rejected": -0.23377805948257446, "step": 7686 }, { "epoch": 4.689339637029129, "grad_norm": 1.1684857606887817, "learning_rate": 4.977342314758114e-07, "log_odds_chosen": 3.431137800216675, "log_odds_ratio": -0.2909489572048187, "logits/chosen": -0.9738335013389587, "logits/rejected": -1.072360873222351, "logps/chosen": -0.7516291737556458, "logps/rejected": -3.283291816711426, "loss": 1.0597, "nll_loss": 1.2475417852401733, "rewards/accuracies": 0.75, "rewards/chosen": -0.07516291737556458, "rewards/margins": 0.2531662583351135, "rewards/rejected": -0.3283292055130005, "step": 7687 }, { "epoch": 4.689949672106146, "grad_norm": 7.965496063232422, "learning_rate": 4.967544396815677e-07, "log_odds_chosen": 2.252819299697876, "log_odds_ratio": -0.40716540813446045, "logits/chosen": -0.781464695930481, "logits/rejected": -0.9083108305931091, "logps/chosen": -0.742569088935852, "logps/rejected": -2.5400938987731934, "loss": 0.9433, "nll_loss": 0.8509390950202942, "rewards/accuracies": 0.75, "rewards/chosen": -0.07425691187381744, "rewards/margins": 0.17975248396396637, "rewards/rejected": -0.2540093958377838, "step": 7688 }, { "epoch": 4.690559707183163, "grad_norm": 2.4948315620422363, "learning_rate": 4.957746478873239e-07, "log_odds_chosen": 2.770956039428711, "log_odds_ratio": -0.33531951904296875, "logits/chosen": -0.8086212873458862, "logits/rejected": -1.0568909645080566, "logps/chosen": -0.6147712469100952, "logps/rejected": -2.7402687072753906, "loss": 0.9151, "nll_loss": 0.926882803440094, "rewards/accuracies": 0.75, "rewards/chosen": -0.06147712841629982, "rewards/margins": 0.21254974603652954, "rewards/rejected": -0.27402687072753906, "step": 7689 }, { "epoch": 4.69116974226018, "grad_norm": 1.4939008951187134, "learning_rate": 4.947948560930802e-07, "log_odds_chosen": 2.4316983222961426, "log_odds_ratio": -0.42672091722488403, "logits/chosen": -1.0087168216705322, "logits/rejected": -1.1883057355880737, "logps/chosen": -0.7996468544006348, "logps/rejected": -2.7435688972473145, "loss": 0.9851, "nll_loss": 0.979248046875, "rewards/accuracies": 0.625, "rewards/chosen": -0.07996468991041183, "rewards/margins": 0.19439221918582916, "rewards/rejected": -0.2743569016456604, "step": 7690 }, { "epoch": 4.691779777337197, "grad_norm": 3.0451297760009766, "learning_rate": 4.938150642988365e-07, "log_odds_chosen": 2.899660348892212, "log_odds_ratio": -0.381111204624176, "logits/chosen": -0.7043821811676025, "logits/rejected": -0.9262644052505493, "logps/chosen": -0.6699884533882141, "logps/rejected": -3.0399372577667236, "loss": 0.933, "nll_loss": 0.8286125659942627, "rewards/accuracies": 0.75, "rewards/chosen": -0.06699884682893753, "rewards/margins": 0.2369948774576187, "rewards/rejected": -0.30399370193481445, "step": 7691 }, { "epoch": 4.692389812414214, "grad_norm": 1.9523913860321045, "learning_rate": 4.928352725045927e-07, "log_odds_chosen": 2.296091079711914, "log_odds_ratio": -0.3407154083251953, "logits/chosen": -0.7390438318252563, "logits/rejected": -0.8837317228317261, "logps/chosen": -0.824695885181427, "logps/rejected": -2.5688223838806152, "loss": 1.1756, "nll_loss": 0.8458194732666016, "rewards/accuracies": 0.75, "rewards/chosen": -0.08246959745883942, "rewards/margins": 0.17441260814666748, "rewards/rejected": -0.2568822205066681, "step": 7692 }, { "epoch": 4.6929998474912304, "grad_norm": 4.547776222229004, "learning_rate": 4.91855480710349e-07, "log_odds_chosen": 0.029032457619905472, "log_odds_ratio": -0.7908658385276794, "logits/chosen": -0.8749333620071411, "logits/rejected": -0.9561764001846313, "logps/chosen": -1.187514066696167, "logps/rejected": -1.3089544773101807, "loss": 1.2762, "nll_loss": 1.2513391971588135, "rewards/accuracies": 0.375, "rewards/chosen": -0.1187514141201973, "rewards/margins": 0.01214403472840786, "rewards/rejected": -0.1308954507112503, "step": 7693 }, { "epoch": 4.693609882568247, "grad_norm": 2.458770751953125, "learning_rate": 4.908756889161053e-07, "log_odds_chosen": 3.553910732269287, "log_odds_ratio": -0.27579545974731445, "logits/chosen": -0.7071568965911865, "logits/rejected": -0.8376272916793823, "logps/chosen": -0.6359546184539795, "logps/rejected": -3.5859289169311523, "loss": 1.0068, "nll_loss": 0.9473683834075928, "rewards/accuracies": 0.75, "rewards/chosen": -0.06359546631574631, "rewards/margins": 0.2949974834918976, "rewards/rejected": -0.3585929274559021, "step": 7694 }, { "epoch": 4.694219917645265, "grad_norm": 1.9755213260650635, "learning_rate": 4.898958971218616e-07, "log_odds_chosen": 1.460416555404663, "log_odds_ratio": -0.3451542854309082, "logits/chosen": -0.806995689868927, "logits/rejected": -0.817696213722229, "logps/chosen": -0.83345627784729, "logps/rejected": -1.9906206130981445, "loss": 0.8433, "nll_loss": 0.8530663251876831, "rewards/accuracies": 0.875, "rewards/chosen": -0.08334562182426453, "rewards/margins": 0.11571644246578217, "rewards/rejected": -0.1990620642900467, "step": 7695 }, { "epoch": 4.694829952722282, "grad_norm": 1.6482105255126953, "learning_rate": 4.889161053276178e-07, "log_odds_chosen": 2.520289421081543, "log_odds_ratio": -0.35908687114715576, "logits/chosen": -0.8213382363319397, "logits/rejected": -0.9792061448097229, "logps/chosen": -0.8221151828765869, "logps/rejected": -3.025909185409546, "loss": 1.0475, "nll_loss": 1.0162956714630127, "rewards/accuracies": 0.75, "rewards/chosen": -0.08221151679754257, "rewards/margins": 0.22037939727306366, "rewards/rejected": -0.30259090662002563, "step": 7696 }, { "epoch": 4.695439987799299, "grad_norm": 10.346465110778809, "learning_rate": 4.879363135333741e-07, "log_odds_chosen": 0.5651499032974243, "log_odds_ratio": -0.6480237245559692, "logits/chosen": -0.8324073553085327, "logits/rejected": -1.0112448930740356, "logps/chosen": -0.7609423995018005, "logps/rejected": -1.201409935951233, "loss": 1.0234, "nll_loss": 1.0067112445831299, "rewards/accuracies": 0.5, "rewards/chosen": -0.07609423249959946, "rewards/margins": 0.044046759605407715, "rewards/rejected": -0.12014099955558777, "step": 7697 }, { "epoch": 4.696050022876316, "grad_norm": 11.489533424377441, "learning_rate": 4.869565217391305e-07, "log_odds_chosen": 1.708363652229309, "log_odds_ratio": -0.2717345654964447, "logits/chosen": -0.9283944964408875, "logits/rejected": -0.9452468752861023, "logps/chosen": -0.8121699094772339, "logps/rejected": -2.081853151321411, "loss": 1.2086, "nll_loss": 0.9455888271331787, "rewards/accuracies": 0.875, "rewards/chosen": -0.08121699839830399, "rewards/margins": 0.12696832418441772, "rewards/rejected": -0.2081853151321411, "step": 7698 }, { "epoch": 4.696660057953332, "grad_norm": 2.8222265243530273, "learning_rate": 4.859767299448867e-07, "log_odds_chosen": 1.8978253602981567, "log_odds_ratio": -0.4487553834915161, "logits/chosen": -1.0647144317626953, "logits/rejected": -1.1373529434204102, "logps/chosen": -0.7000412940979004, "logps/rejected": -2.1537437438964844, "loss": 0.9762, "nll_loss": 0.9801924228668213, "rewards/accuracies": 0.75, "rewards/chosen": -0.07000413537025452, "rewards/margins": 0.1453702449798584, "rewards/rejected": -0.21537438035011292, "step": 7699 }, { "epoch": 4.697270093030349, "grad_norm": 1.7875242233276367, "learning_rate": 4.849969381506429e-07, "log_odds_chosen": 1.6587810516357422, "log_odds_ratio": -0.33650267124176025, "logits/chosen": -0.5962247848510742, "logits/rejected": -0.6159077882766724, "logps/chosen": -0.5262630581855774, "logps/rejected": -1.4577127695083618, "loss": 1.1046, "nll_loss": 0.8759322166442871, "rewards/accuracies": 0.875, "rewards/chosen": -0.05262630432844162, "rewards/margins": 0.0931449830532074, "rewards/rejected": -0.14577129483222961, "step": 7700 }, { "epoch": 4.697880128107366, "grad_norm": 2.0224740505218506, "learning_rate": 4.840171463563993e-07, "log_odds_chosen": 2.525387763977051, "log_odds_ratio": -0.4732034206390381, "logits/chosen": -0.9739961624145508, "logits/rejected": -1.058627963066101, "logps/chosen": -0.6812070608139038, "logps/rejected": -2.7831435203552246, "loss": 1.0128, "nll_loss": 0.9388596415519714, "rewards/accuracies": 0.75, "rewards/chosen": -0.06812070310115814, "rewards/margins": 0.21019363403320312, "rewards/rejected": -0.27831435203552246, "step": 7701 }, { "epoch": 4.698490163184383, "grad_norm": 6.863027095794678, "learning_rate": 4.830373545621555e-07, "log_odds_chosen": 1.5743403434753418, "log_odds_ratio": -0.6085807085037231, "logits/chosen": -0.9253528118133545, "logits/rejected": -1.0137256383895874, "logps/chosen": -0.7172520160675049, "logps/rejected": -1.8921873569488525, "loss": 1.1437, "nll_loss": 1.0998884439468384, "rewards/accuracies": 0.625, "rewards/chosen": -0.07172520458698273, "rewards/margins": 0.11749352514743805, "rewards/rejected": -0.18921872973442078, "step": 7702 }, { "epoch": 4.6991001982614, "grad_norm": 2.3804209232330322, "learning_rate": 4.820575627679118e-07, "log_odds_chosen": 2.1579771041870117, "log_odds_ratio": -0.32278430461883545, "logits/chosen": -1.0227196216583252, "logits/rejected": -0.8718353509902954, "logps/chosen": -0.8599401712417603, "logps/rejected": -2.492372512817383, "loss": 1.0785, "nll_loss": 1.0281232595443726, "rewards/accuracies": 1.0, "rewards/chosen": -0.08599402010440826, "rewards/margins": 0.16324321925640106, "rewards/rejected": -0.24923723936080933, "step": 7703 }, { "epoch": 4.699710233338417, "grad_norm": 2.234056234359741, "learning_rate": 4.810777709736681e-07, "log_odds_chosen": 1.8240509033203125, "log_odds_ratio": -0.26442351937294006, "logits/chosen": -0.9891562461853027, "logits/rejected": -0.9824209213256836, "logps/chosen": -0.8779305219650269, "logps/rejected": -2.250242233276367, "loss": 1.1958, "nll_loss": 1.1452057361602783, "rewards/accuracies": 0.875, "rewards/chosen": -0.08779305219650269, "rewards/margins": 0.13723117113113403, "rewards/rejected": -0.22502422332763672, "step": 7704 }, { "epoch": 4.7003202684154335, "grad_norm": 3.2566187381744385, "learning_rate": 4.800979791794244e-07, "log_odds_chosen": 0.8379563093185425, "log_odds_ratio": -0.5273876190185547, "logits/chosen": -0.9662218689918518, "logits/rejected": -0.9640742540359497, "logps/chosen": -0.8738174438476562, "logps/rejected": -1.5284100770950317, "loss": 1.1896, "nll_loss": 0.8925530910491943, "rewards/accuracies": 0.5, "rewards/chosen": -0.0873817428946495, "rewards/margins": 0.06545928120613098, "rewards/rejected": -0.1528410166501999, "step": 7705 }, { "epoch": 4.70093030349245, "grad_norm": 5.359095573425293, "learning_rate": 4.791181873851806e-07, "log_odds_chosen": 3.198735237121582, "log_odds_ratio": -0.18952155113220215, "logits/chosen": -0.5626667141914368, "logits/rejected": -0.804580569267273, "logps/chosen": -0.45926088094711304, "logps/rejected": -2.8239586353302, "loss": 0.8791, "nll_loss": 0.5571775436401367, "rewards/accuracies": 1.0, "rewards/chosen": -0.045926086604595184, "rewards/margins": 0.23646977543830872, "rewards/rejected": -0.2823958694934845, "step": 7706 }, { "epoch": 4.701540338569468, "grad_norm": 1.2750542163848877, "learning_rate": 4.781383955909368e-07, "log_odds_chosen": 2.589829206466675, "log_odds_ratio": -0.43373924493789673, "logits/chosen": -0.8127390146255493, "logits/rejected": -0.8979608416557312, "logps/chosen": -0.6064329743385315, "logps/rejected": -2.7449655532836914, "loss": 1.1888, "nll_loss": 0.993527889251709, "rewards/accuracies": 0.625, "rewards/chosen": -0.06064329668879509, "rewards/margins": 0.21385328471660614, "rewards/rejected": -0.27449658513069153, "step": 7707 }, { "epoch": 4.702150373646485, "grad_norm": 1.4464552402496338, "learning_rate": 4.771586037966932e-07, "log_odds_chosen": 2.0268967151641846, "log_odds_ratio": -0.30465927720069885, "logits/chosen": -1.1101560592651367, "logits/rejected": -1.0954996347427368, "logps/chosen": -0.8637943267822266, "logps/rejected": -2.540353775024414, "loss": 1.0655, "nll_loss": 1.031352162361145, "rewards/accuracies": 1.0, "rewards/chosen": -0.08637943863868713, "rewards/margins": 0.16765594482421875, "rewards/rejected": -0.2540353834629059, "step": 7708 }, { "epoch": 4.702760408723502, "grad_norm": 1.1544640064239502, "learning_rate": 4.7617881200244946e-07, "log_odds_chosen": 1.729041337966919, "log_odds_ratio": -0.3711831569671631, "logits/chosen": -0.8018715381622314, "logits/rejected": -0.8853507041931152, "logps/chosen": -0.56019526720047, "logps/rejected": -1.8469215631484985, "loss": 0.9325, "nll_loss": 0.7976642847061157, "rewards/accuracies": 0.875, "rewards/chosen": -0.056019529700279236, "rewards/margins": 0.12867262959480286, "rewards/rejected": -0.1846921592950821, "step": 7709 }, { "epoch": 4.703370443800519, "grad_norm": 1.7511991262435913, "learning_rate": 4.7519902020820574e-07, "log_odds_chosen": 1.58754301071167, "log_odds_ratio": -0.4010353684425354, "logits/chosen": -1.0133976936340332, "logits/rejected": -1.0748326778411865, "logps/chosen": -0.7009948492050171, "logps/rejected": -1.875737190246582, "loss": 1.0425, "nll_loss": 0.8197429180145264, "rewards/accuracies": 0.875, "rewards/chosen": -0.07009948790073395, "rewards/margins": 0.11747423559427261, "rewards/rejected": -0.18757373094558716, "step": 7710 }, { "epoch": 4.7039804788775355, "grad_norm": 6.46551513671875, "learning_rate": 4.74219228413962e-07, "log_odds_chosen": 3.8660025596618652, "log_odds_ratio": -0.1832389086484909, "logits/chosen": -0.7576755881309509, "logits/rejected": -0.9710677862167358, "logps/chosen": -0.57094806432724, "logps/rejected": -3.605839252471924, "loss": 1.1397, "nll_loss": 0.8492807745933533, "rewards/accuracies": 1.0, "rewards/chosen": -0.05709480121731758, "rewards/margins": 0.30348914861679077, "rewards/rejected": -0.36058396100997925, "step": 7711 }, { "epoch": 4.704590513954552, "grad_norm": 1.726678729057312, "learning_rate": 4.7323943661971825e-07, "log_odds_chosen": 0.5176972150802612, "log_odds_ratio": -0.6376760005950928, "logits/chosen": -1.0756767988204956, "logits/rejected": -0.9901203513145447, "logps/chosen": -1.0493881702423096, "logps/rejected": -1.5090070962905884, "loss": 0.9874, "nll_loss": 1.168637990951538, "rewards/accuracies": 0.5, "rewards/chosen": -0.10493883490562439, "rewards/margins": 0.045961882919073105, "rewards/rejected": -0.1509007066488266, "step": 7712 }, { "epoch": 4.705200549031569, "grad_norm": 3.420506477355957, "learning_rate": 4.722596448254746e-07, "log_odds_chosen": 2.206073522567749, "log_odds_ratio": -0.34100762009620667, "logits/chosen": -0.886836051940918, "logits/rejected": -1.1074256896972656, "logps/chosen": -0.7502824664115906, "logps/rejected": -2.395235300064087, "loss": 1.0892, "nll_loss": 0.8928529024124146, "rewards/accuracies": 0.75, "rewards/chosen": -0.07502824068069458, "rewards/margins": 0.1644952893257141, "rewards/rejected": -0.2395235300064087, "step": 7713 }, { "epoch": 4.705810584108586, "grad_norm": 9.737890243530273, "learning_rate": 4.7127985303123086e-07, "log_odds_chosen": 1.4597318172454834, "log_odds_ratio": -0.5220488905906677, "logits/chosen": -0.8327908515930176, "logits/rejected": -0.9274405241012573, "logps/chosen": -0.833325982093811, "logps/rejected": -2.1458163261413574, "loss": 1.0172, "nll_loss": 0.9513288140296936, "rewards/accuracies": 0.75, "rewards/chosen": -0.0833325982093811, "rewards/margins": 0.1312490552663803, "rewards/rejected": -0.2145816683769226, "step": 7714 }, { "epoch": 4.706420619185603, "grad_norm": 1.3209457397460938, "learning_rate": 4.7030006123698715e-07, "log_odds_chosen": 2.977567672729492, "log_odds_ratio": -0.3937007784843445, "logits/chosen": -0.9772650599479675, "logits/rejected": -1.0827248096466064, "logps/chosen": -0.873573362827301, "logps/rejected": -3.328131914138794, "loss": 1.0358, "nll_loss": 1.0597848892211914, "rewards/accuracies": 0.75, "rewards/chosen": -0.08735733479261398, "rewards/margins": 0.24545587599277496, "rewards/rejected": -0.33281320333480835, "step": 7715 }, { "epoch": 4.707030654262621, "grad_norm": 3.7464792728424072, "learning_rate": 4.6932026944274337e-07, "log_odds_chosen": 2.1699302196502686, "log_odds_ratio": -0.3855855166912079, "logits/chosen": -1.0418299436569214, "logits/rejected": -0.9502320289611816, "logps/chosen": -0.8961226344108582, "logps/rejected": -2.7701566219329834, "loss": 1.2041, "nll_loss": 1.0957752466201782, "rewards/accuracies": 0.75, "rewards/chosen": -0.08961227536201477, "rewards/margins": 0.18740341067314148, "rewards/rejected": -0.27701565623283386, "step": 7716 }, { "epoch": 4.707640689339637, "grad_norm": 1.317958116531372, "learning_rate": 4.6834047764849965e-07, "log_odds_chosen": 3.909207582473755, "log_odds_ratio": -0.23157066106796265, "logits/chosen": -0.8499369025230408, "logits/rejected": -1.0947296619415283, "logps/chosen": -0.8252476453781128, "logps/rejected": -4.074750900268555, "loss": 1.0055, "nll_loss": 1.0666636228561401, "rewards/accuracies": 0.875, "rewards/chosen": -0.08252476155757904, "rewards/margins": 0.32495030760765076, "rewards/rejected": -0.407475084066391, "step": 7717 }, { "epoch": 4.708250724416654, "grad_norm": 4.374953746795654, "learning_rate": 4.67360685854256e-07, "log_odds_chosen": 2.1378443241119385, "log_odds_ratio": -0.42883917689323425, "logits/chosen": -1.0599267482757568, "logits/rejected": -0.9928810596466064, "logps/chosen": -0.8704752326011658, "logps/rejected": -2.609334945678711, "loss": 0.9944, "nll_loss": 1.0546009540557861, "rewards/accuracies": 0.875, "rewards/chosen": -0.0870475247502327, "rewards/margins": 0.17388597130775452, "rewards/rejected": -0.2609334886074066, "step": 7718 }, { "epoch": 4.708860759493671, "grad_norm": 2.040489435195923, "learning_rate": 4.663808940600122e-07, "log_odds_chosen": 1.5353832244873047, "log_odds_ratio": -0.426685631275177, "logits/chosen": -0.8307636380195618, "logits/rejected": -0.9695417284965515, "logps/chosen": -0.6888644099235535, "logps/rejected": -1.6400110721588135, "loss": 0.9002, "nll_loss": 0.8500711917877197, "rewards/accuracies": 0.75, "rewards/chosen": -0.06888644397258759, "rewards/margins": 0.09511467814445496, "rewards/rejected": -0.16400112211704254, "step": 7719 }, { "epoch": 4.709470794570688, "grad_norm": 5.7652106285095215, "learning_rate": 4.654011022657685e-07, "log_odds_chosen": 2.2583768367767334, "log_odds_ratio": -0.43605172634124756, "logits/chosen": -0.6750191450119019, "logits/rejected": -0.7861552238464355, "logps/chosen": -0.5647995471954346, "logps/rejected": -2.3480210304260254, "loss": 0.8538, "nll_loss": 0.7176802158355713, "rewards/accuracies": 0.875, "rewards/chosen": -0.05647995322942734, "rewards/margins": 0.17832216620445251, "rewards/rejected": -0.23480212688446045, "step": 7720 }, { "epoch": 4.710080829647705, "grad_norm": 1.615195631980896, "learning_rate": 4.644213104715248e-07, "log_odds_chosen": 1.621924638748169, "log_odds_ratio": -0.5268455147743225, "logits/chosen": -0.7986338138580322, "logits/rejected": -0.8206570744514465, "logps/chosen": -0.7409404516220093, "logps/rejected": -1.9685605764389038, "loss": 0.9641, "nll_loss": 0.9085215330123901, "rewards/accuracies": 0.625, "rewards/chosen": -0.07409404218196869, "rewards/margins": 0.12276200950145721, "rewards/rejected": -0.1968560665845871, "step": 7721 }, { "epoch": 4.710690864724722, "grad_norm": 1.3281919956207275, "learning_rate": 4.6344151867728106e-07, "log_odds_chosen": 1.628110408782959, "log_odds_ratio": -0.3641568720340729, "logits/chosen": -0.8166611194610596, "logits/rejected": -1.0311217308044434, "logps/chosen": -0.8406795263290405, "logps/rejected": -2.068589687347412, "loss": 1.0727, "nll_loss": 0.9132843017578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0840679481625557, "rewards/margins": 0.12279103696346283, "rewards/rejected": -0.20685899257659912, "step": 7722 }, { "epoch": 4.7113008998017385, "grad_norm": 1.459145188331604, "learning_rate": 4.624617268830373e-07, "log_odds_chosen": 4.393299579620361, "log_odds_ratio": -0.18342605233192444, "logits/chosen": -0.9115827083587646, "logits/rejected": -1.1394643783569336, "logps/chosen": -0.6841558218002319, "logps/rejected": -4.34967041015625, "loss": 1.0655, "nll_loss": 0.9876676201820374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0684155821800232, "rewards/margins": 0.3665515184402466, "rewards/rejected": -0.4349670708179474, "step": 7723 }, { "epoch": 4.711910934878755, "grad_norm": 7.157857894897461, "learning_rate": 4.614819350887936e-07, "log_odds_chosen": 1.9424183368682861, "log_odds_ratio": -0.559711217880249, "logits/chosen": -1.0074982643127441, "logits/rejected": -1.0216749906539917, "logps/chosen": -0.8671308755874634, "logps/rejected": -2.6895155906677246, "loss": 1.0674, "nll_loss": 1.0792350769042969, "rewards/accuracies": 0.625, "rewards/chosen": -0.08671309053897858, "rewards/margins": 0.18223847448825836, "rewards/rejected": -0.2689515948295593, "step": 7724 }, { "epoch": 4.712520969955772, "grad_norm": 5.848475456237793, "learning_rate": 4.605021432945499e-07, "log_odds_chosen": 2.178283929824829, "log_odds_ratio": -0.30583813786506653, "logits/chosen": -0.9849976301193237, "logits/rejected": -1.207439661026001, "logps/chosen": -0.7230166792869568, "logps/rejected": -2.4560341835021973, "loss": 0.9544, "nll_loss": 0.8869284391403198, "rewards/accuracies": 0.875, "rewards/chosen": -0.07230167090892792, "rewards/margins": 0.1733018010854721, "rewards/rejected": -0.24560344219207764, "step": 7725 }, { "epoch": 4.713131005032789, "grad_norm": 1.1899350881576538, "learning_rate": 4.595223515003062e-07, "log_odds_chosen": 1.4596198797225952, "log_odds_ratio": -0.39293354749679565, "logits/chosen": -0.9009719491004944, "logits/rejected": -1.0703328847885132, "logps/chosen": -0.6567558646202087, "logps/rejected": -1.6080721616744995, "loss": 1.0552, "nll_loss": 0.8053653836250305, "rewards/accuracies": 0.75, "rewards/chosen": -0.06567558646202087, "rewards/margins": 0.09513163566589355, "rewards/rejected": -0.16080720722675323, "step": 7726 }, { "epoch": 4.713741040109806, "grad_norm": 1.5921058654785156, "learning_rate": 4.585425597060624e-07, "log_odds_chosen": 2.215869903564453, "log_odds_ratio": -0.514453113079071, "logits/chosen": -0.9419853687286377, "logits/rejected": -1.0330774784088135, "logps/chosen": -0.9368837475776672, "logps/rejected": -2.8674516677856445, "loss": 1.1142, "nll_loss": 1.2253837585449219, "rewards/accuracies": 0.625, "rewards/chosen": -0.09368838369846344, "rewards/margins": 0.19305682182312012, "rewards/rejected": -0.28674519062042236, "step": 7727 }, { "epoch": 4.714351075186824, "grad_norm": 4.367166996002197, "learning_rate": 4.575627679118187e-07, "log_odds_chosen": 0.8710415959358215, "log_odds_ratio": -0.539466381072998, "logits/chosen": -1.018991470336914, "logits/rejected": -1.116550326347351, "logps/chosen": -0.9670947790145874, "logps/rejected": -1.733597993850708, "loss": 1.039, "nll_loss": 1.1105539798736572, "rewards/accuracies": 0.625, "rewards/chosen": -0.0967094749212265, "rewards/margins": 0.07665033638477325, "rewards/rejected": -0.17335981130599976, "step": 7728 }, { "epoch": 4.7149611102638405, "grad_norm": 1.9026436805725098, "learning_rate": 4.5658297611757503e-07, "log_odds_chosen": 0.7142090797424316, "log_odds_ratio": -0.5377668142318726, "logits/chosen": -0.8212953209877014, "logits/rejected": -0.8683832883834839, "logps/chosen": -0.8146119117736816, "logps/rejected": -1.1525764465332031, "loss": 1.0393, "nll_loss": 1.0048049688339233, "rewards/accuracies": 0.75, "rewards/chosen": -0.08146119117736816, "rewards/margins": 0.03379645198583603, "rewards/rejected": -0.1152576357126236, "step": 7729 }, { "epoch": 4.715571145340857, "grad_norm": 5.360797882080078, "learning_rate": 4.5560318432333126e-07, "log_odds_chosen": 2.772225856781006, "log_odds_ratio": -0.4171493351459503, "logits/chosen": -0.8315244913101196, "logits/rejected": -0.9775537848472595, "logps/chosen": -0.7305812239646912, "logps/rejected": -2.9390757083892822, "loss": 1.0325, "nll_loss": 0.9214550256729126, "rewards/accuracies": 0.75, "rewards/chosen": -0.0730581283569336, "rewards/margins": 0.2208494395017624, "rewards/rejected": -0.2939075827598572, "step": 7730 }, { "epoch": 4.716181180417874, "grad_norm": 1.5076940059661865, "learning_rate": 4.5462339252908754e-07, "log_odds_chosen": 2.282383918762207, "log_odds_ratio": -0.3620675802230835, "logits/chosen": -0.822021484375, "logits/rejected": -0.8692913055419922, "logps/chosen": -0.5015418529510498, "logps/rejected": -2.030561923980713, "loss": 1.0101, "nll_loss": 0.7701648473739624, "rewards/accuracies": 0.75, "rewards/chosen": -0.0501541942358017, "rewards/margins": 0.1529020220041275, "rewards/rejected": -0.2030562162399292, "step": 7731 }, { "epoch": 4.716791215494891, "grad_norm": 1.6305992603302002, "learning_rate": 4.536436007348438e-07, "log_odds_chosen": 3.147096633911133, "log_odds_ratio": -0.15849411487579346, "logits/chosen": -0.6494961380958557, "logits/rejected": -0.8411078453063965, "logps/chosen": -0.6628769636154175, "logps/rejected": -3.127845287322998, "loss": 1.0525, "nll_loss": 0.8733590245246887, "rewards/accuracies": 1.0, "rewards/chosen": -0.06628769636154175, "rewards/margins": 0.24649682641029358, "rewards/rejected": -0.3127845525741577, "step": 7732 }, { "epoch": 4.717401250571908, "grad_norm": 2.2169411182403564, "learning_rate": 4.526638089406001e-07, "log_odds_chosen": 0.5416684746742249, "log_odds_ratio": -0.5403283834457397, "logits/chosen": -1.018792986869812, "logits/rejected": -0.7974565625190735, "logps/chosen": -0.844585120677948, "logps/rejected": -1.1707767248153687, "loss": 1.1104, "nll_loss": 1.0237703323364258, "rewards/accuracies": 0.75, "rewards/chosen": -0.08445850759744644, "rewards/margins": 0.032619163393974304, "rewards/rejected": -0.11707767844200134, "step": 7733 }, { "epoch": 4.718011285648925, "grad_norm": 4.703979969024658, "learning_rate": 4.516840171463564e-07, "log_odds_chosen": -0.311517596244812, "log_odds_ratio": -0.896382212638855, "logits/chosen": -0.7992159128189087, "logits/rejected": -0.8019284605979919, "logps/chosen": -0.9952379465103149, "logps/rejected": -0.828083336353302, "loss": 1.3269, "nll_loss": 1.2489800453186035, "rewards/accuracies": 0.25, "rewards/chosen": -0.09952379763126373, "rewards/margins": -0.016715463250875473, "rewards/rejected": -0.08280833065509796, "step": 7734 }, { "epoch": 4.7186213207259415, "grad_norm": 5.475969314575195, "learning_rate": 4.5070422535211266e-07, "log_odds_chosen": 4.286128520965576, "log_odds_ratio": -0.31763389706611633, "logits/chosen": -0.8665956258773804, "logits/rejected": -1.0621485710144043, "logps/chosen": -0.7599443197250366, "logps/rejected": -4.588217735290527, "loss": 0.9198, "nll_loss": 0.8639102578163147, "rewards/accuracies": 0.75, "rewards/chosen": -0.07599443197250366, "rewards/margins": 0.3828273415565491, "rewards/rejected": -0.45882174372673035, "step": 7735 }, { "epoch": 4.719231355802958, "grad_norm": 1.9843844175338745, "learning_rate": 4.4972443355786894e-07, "log_odds_chosen": 0.9564779996871948, "log_odds_ratio": -0.4589402675628662, "logits/chosen": -0.9096167087554932, "logits/rejected": -1.0315089225769043, "logps/chosen": -0.8895664811134338, "logps/rejected": -1.4548394680023193, "loss": 1.0983, "nll_loss": 1.2881386280059814, "rewards/accuracies": 0.625, "rewards/chosen": -0.08895665407180786, "rewards/margins": 0.05652729421854019, "rewards/rejected": -0.14548395574092865, "step": 7736 }, { "epoch": 4.719841390879975, "grad_norm": 2.3200554847717285, "learning_rate": 4.487446417636252e-07, "log_odds_chosen": 1.258953332901001, "log_odds_ratio": -0.4929024577140808, "logits/chosen": -0.999742865562439, "logits/rejected": -1.0428024530410767, "logps/chosen": -1.024639368057251, "logps/rejected": -2.1233370304107666, "loss": 1.1333, "nll_loss": 1.121889352798462, "rewards/accuracies": 0.75, "rewards/chosen": -0.10246393829584122, "rewards/margins": 0.10986977815628052, "rewards/rejected": -0.21233370900154114, "step": 7737 }, { "epoch": 4.720451425956993, "grad_norm": 2.0189390182495117, "learning_rate": 4.4776484996938145e-07, "log_odds_chosen": 2.474541425704956, "log_odds_ratio": -0.32534635066986084, "logits/chosen": -0.6706123352050781, "logits/rejected": -1.0448763370513916, "logps/chosen": -0.7327896356582642, "logps/rejected": -2.573378086090088, "loss": 1.0492, "nll_loss": 0.8378559947013855, "rewards/accuracies": 0.875, "rewards/chosen": -0.07327896356582642, "rewards/margins": 0.18405884504318237, "rewards/rejected": -0.2573378086090088, "step": 7738 }, { "epoch": 4.72106146103401, "grad_norm": 1.513615608215332, "learning_rate": 4.467850581751378e-07, "log_odds_chosen": 1.9925379753112793, "log_odds_ratio": -0.35149961709976196, "logits/chosen": -0.7878010272979736, "logits/rejected": -0.9078350067138672, "logps/chosen": -0.5679627656936646, "logps/rejected": -2.0202245712280273, "loss": 0.9704, "nll_loss": 0.8858140707015991, "rewards/accuracies": 0.875, "rewards/chosen": -0.05679628252983093, "rewards/margins": 0.14522618055343628, "rewards/rejected": -0.2020224630832672, "step": 7739 }, { "epoch": 4.721671496111027, "grad_norm": 3.857638120651245, "learning_rate": 4.4580526638089407e-07, "log_odds_chosen": 3.181100368499756, "log_odds_ratio": -0.2467680126428604, "logits/chosen": -0.5841414332389832, "logits/rejected": -1.0198125839233398, "logps/chosen": -0.7934975624084473, "logps/rejected": -3.407761573791504, "loss": 0.9576, "nll_loss": 0.8465516567230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.07934974879026413, "rewards/margins": 0.2614264190196991, "rewards/rejected": -0.3407761752605438, "step": 7740 }, { "epoch": 4.7222815311880435, "grad_norm": 2.1349680423736572, "learning_rate": 4.448254745866503e-07, "log_odds_chosen": 0.5899933576583862, "log_odds_ratio": -0.6906410455703735, "logits/chosen": -0.883962869644165, "logits/rejected": -0.8154264092445374, "logps/chosen": -0.781529426574707, "logps/rejected": -1.1985052824020386, "loss": 1.1273, "nll_loss": 1.0100287199020386, "rewards/accuracies": 0.5, "rewards/chosen": -0.07815293967723846, "rewards/margins": 0.04169758781790733, "rewards/rejected": -0.1198505386710167, "step": 7741 }, { "epoch": 4.72289156626506, "grad_norm": 1.3889906406402588, "learning_rate": 4.438456827924066e-07, "log_odds_chosen": 3.462545871734619, "log_odds_ratio": -0.28956955671310425, "logits/chosen": -0.9255173206329346, "logits/rejected": -1.0152047872543335, "logps/chosen": -0.7692754864692688, "logps/rejected": -3.637861728668213, "loss": 1.1853, "nll_loss": 0.8811233043670654, "rewards/accuracies": 0.75, "rewards/chosen": -0.0769275575876236, "rewards/margins": 0.28685861825942993, "rewards/rejected": -0.36378616094589233, "step": 7742 }, { "epoch": 4.723501601342077, "grad_norm": 3.3384621143341064, "learning_rate": 4.4286589099816286e-07, "log_odds_chosen": 1.9620726108551025, "log_odds_ratio": -0.42252618074417114, "logits/chosen": -0.9959206581115723, "logits/rejected": -1.0910656452178955, "logps/chosen": -0.7302042245864868, "logps/rejected": -2.3023502826690674, "loss": 1.2386, "nll_loss": 1.1014416217803955, "rewards/accuracies": 0.75, "rewards/chosen": -0.07302042841911316, "rewards/margins": 0.15721461176872253, "rewards/rejected": -0.2302350401878357, "step": 7743 }, { "epoch": 4.724111636419094, "grad_norm": 1.601244568824768, "learning_rate": 4.418860992039192e-07, "log_odds_chosen": 0.2707688510417938, "log_odds_ratio": -0.6742253303527832, "logits/chosen": -0.982309877872467, "logits/rejected": -1.0854196548461914, "logps/chosen": -0.9496077299118042, "logps/rejected": -1.1274306774139404, "loss": 1.0506, "nll_loss": 0.9364339113235474, "rewards/accuracies": 0.625, "rewards/chosen": -0.0949607789516449, "rewards/margins": 0.017782296985387802, "rewards/rejected": -0.112743079662323, "step": 7744 }, { "epoch": 4.724721671496111, "grad_norm": 1.921515941619873, "learning_rate": 4.409063074096754e-07, "log_odds_chosen": 1.1315457820892334, "log_odds_ratio": -0.3287174701690674, "logits/chosen": -0.8521151542663574, "logits/rejected": -0.8563896417617798, "logps/chosen": -0.7622075080871582, "logps/rejected": -1.535158395767212, "loss": 0.8732, "nll_loss": 0.9787095785140991, "rewards/accuracies": 1.0, "rewards/chosen": -0.07622075080871582, "rewards/margins": 0.07729507982730865, "rewards/rejected": -0.15351584553718567, "step": 7745 }, { "epoch": 4.725331706573128, "grad_norm": 1.4806275367736816, "learning_rate": 4.399265156154317e-07, "log_odds_chosen": 2.297996759414673, "log_odds_ratio": -0.3190547227859497, "logits/chosen": -0.6286514401435852, "logits/rejected": -0.8809267282485962, "logps/chosen": -0.6297260522842407, "logps/rejected": -2.3739237785339355, "loss": 0.8847, "nll_loss": 0.7838894128799438, "rewards/accuracies": 0.75, "rewards/chosen": -0.06297260522842407, "rewards/margins": 0.17441979050636292, "rewards/rejected": -0.2373923808336258, "step": 7746 }, { "epoch": 4.725941741650145, "grad_norm": 10.433266639709473, "learning_rate": 4.38946723821188e-07, "log_odds_chosen": 2.191563367843628, "log_odds_ratio": -0.3278713524341583, "logits/chosen": -1.0000699758529663, "logits/rejected": -1.0734734535217285, "logps/chosen": -1.1142048835754395, "logps/rejected": -3.0585856437683105, "loss": 1.1294, "nll_loss": 1.1459684371948242, "rewards/accuracies": 1.0, "rewards/chosen": -0.11142048239707947, "rewards/margins": 0.19443809986114502, "rewards/rejected": -0.3058586120605469, "step": 7747 }, { "epoch": 4.726551776727161, "grad_norm": 1.8862146139144897, "learning_rate": 4.3796693202694426e-07, "log_odds_chosen": 3.320729970932007, "log_odds_ratio": -0.45669496059417725, "logits/chosen": -0.6810294985771179, "logits/rejected": -1.0155258178710938, "logps/chosen": -0.541359543800354, "logps/rejected": -3.201366901397705, "loss": 0.9586, "nll_loss": 0.7896482944488525, "rewards/accuracies": 0.625, "rewards/chosen": -0.05413595214486122, "rewards/margins": 0.26600074768066406, "rewards/rejected": -0.3201367259025574, "step": 7748 }, { "epoch": 4.727161811804178, "grad_norm": 1.5843734741210938, "learning_rate": 4.3698714023270054e-07, "log_odds_chosen": 2.4684410095214844, "log_odds_ratio": -0.3776768445968628, "logits/chosen": -0.8584369421005249, "logits/rejected": -0.980302095413208, "logps/chosen": -0.8234899044036865, "logps/rejected": -2.9066083431243896, "loss": 0.9793, "nll_loss": 0.9506369233131409, "rewards/accuracies": 0.75, "rewards/chosen": -0.08234898746013641, "rewards/margins": 0.20831187069416046, "rewards/rejected": -0.2906608581542969, "step": 7749 }, { "epoch": 4.727771846881196, "grad_norm": 13.502782821655273, "learning_rate": 4.360073484384568e-07, "log_odds_chosen": 4.867947101593018, "log_odds_ratio": -0.22297154366970062, "logits/chosen": -0.8819406032562256, "logits/rejected": -1.029149055480957, "logps/chosen": -0.700947642326355, "logps/rejected": -4.919778347015381, "loss": 1.104, "nll_loss": 0.981338620185852, "rewards/accuracies": 0.875, "rewards/chosen": -0.0700947567820549, "rewards/margins": 0.42188307642936707, "rewards/rejected": -0.49197784066200256, "step": 7750 }, { "epoch": 4.728381881958213, "grad_norm": 1.4468755722045898, "learning_rate": 4.350275566442131e-07, "log_odds_chosen": 2.5470433235168457, "log_odds_ratio": -0.5245869755744934, "logits/chosen": -1.1366567611694336, "logits/rejected": -1.114727258682251, "logps/chosen": -0.7079191207885742, "logps/rejected": -2.797389268875122, "loss": 0.9145, "nll_loss": 0.9092052578926086, "rewards/accuracies": 0.5, "rewards/chosen": -0.07079191505908966, "rewards/margins": 0.20894701778888702, "rewards/rejected": -0.2797389328479767, "step": 7751 }, { "epoch": 4.72899191703523, "grad_norm": 2.351872444152832, "learning_rate": 4.3404776484996933e-07, "log_odds_chosen": 1.5328398942947388, "log_odds_ratio": -0.42761602997779846, "logits/chosen": -1.011720895767212, "logits/rejected": -0.9906103610992432, "logps/chosen": -1.0908137559890747, "logps/rejected": -2.4389805793762207, "loss": 1.1239, "nll_loss": 1.2113176584243774, "rewards/accuracies": 0.75, "rewards/chosen": -0.10908137261867523, "rewards/margins": 0.13481669127941132, "rewards/rejected": -0.24389806389808655, "step": 7752 }, { "epoch": 4.7296019521122465, "grad_norm": 3.166196823120117, "learning_rate": 4.330679730557256e-07, "log_odds_chosen": 0.9517065286636353, "log_odds_ratio": -0.5790401697158813, "logits/chosen": -0.8219179511070251, "logits/rejected": -0.8647576570510864, "logps/chosen": -0.8481050133705139, "logps/rejected": -1.710357904434204, "loss": 1.0729, "nll_loss": 1.0523523092269897, "rewards/accuracies": 0.625, "rewards/chosen": -0.08481051027774811, "rewards/margins": 0.08622528612613678, "rewards/rejected": -0.1710357815027237, "step": 7753 }, { "epoch": 4.730211987189263, "grad_norm": 1.7786606550216675, "learning_rate": 4.3208818126148195e-07, "log_odds_chosen": 1.9540863037109375, "log_odds_ratio": -0.3056265115737915, "logits/chosen": -0.593302845954895, "logits/rejected": -0.7270015478134155, "logps/chosen": -0.5530889630317688, "logps/rejected": -1.9210681915283203, "loss": 0.899, "nll_loss": 0.6701658368110657, "rewards/accuracies": 0.875, "rewards/chosen": -0.05530889704823494, "rewards/margins": 0.1367979198694229, "rewards/rejected": -0.19210681319236755, "step": 7754 }, { "epoch": 4.73082202226628, "grad_norm": 2.4159417152404785, "learning_rate": 4.3110838946723823e-07, "log_odds_chosen": 1.3422237634658813, "log_odds_ratio": -0.4363815188407898, "logits/chosen": -0.9645959138870239, "logits/rejected": -1.039228916168213, "logps/chosen": -0.8153622150421143, "logps/rejected": -1.7987616062164307, "loss": 1.0752, "nll_loss": 1.121384620666504, "rewards/accuracies": 0.75, "rewards/chosen": -0.08153622597455978, "rewards/margins": 0.09833993762731552, "rewards/rejected": -0.1798761785030365, "step": 7755 }, { "epoch": 4.731432057343297, "grad_norm": 1.8016769886016846, "learning_rate": 4.3012859767299446e-07, "log_odds_chosen": 1.598488211631775, "log_odds_ratio": -0.40030255913734436, "logits/chosen": -0.9241014122962952, "logits/rejected": -0.8796861171722412, "logps/chosen": -0.7812995314598083, "logps/rejected": -2.0261693000793457, "loss": 0.9595, "nll_loss": 1.0736631155014038, "rewards/accuracies": 0.75, "rewards/chosen": -0.07812995463609695, "rewards/margins": 0.12448697537183762, "rewards/rejected": -0.20261691510677338, "step": 7756 }, { "epoch": 4.732042092420314, "grad_norm": 6.53507137298584, "learning_rate": 4.2914880587875074e-07, "log_odds_chosen": 1.9845566749572754, "log_odds_ratio": -0.3251459002494812, "logits/chosen": -0.8453378677368164, "logits/rejected": -1.0049171447753906, "logps/chosen": -0.7385249733924866, "logps/rejected": -2.2051823139190674, "loss": 0.9779, "nll_loss": 0.975023090839386, "rewards/accuracies": 0.75, "rewards/chosen": -0.07385249435901642, "rewards/margins": 0.14666573703289032, "rewards/rejected": -0.22051823139190674, "step": 7757 }, { "epoch": 4.732652127497331, "grad_norm": 1.8207720518112183, "learning_rate": 4.28169014084507e-07, "log_odds_chosen": 0.9795011878013611, "log_odds_ratio": -0.6242729425430298, "logits/chosen": -1.0609524250030518, "logits/rejected": -1.0270363092422485, "logps/chosen": -1.1831021308898926, "logps/rejected": -2.014610767364502, "loss": 1.1652, "nll_loss": 1.378422737121582, "rewards/accuracies": 0.625, "rewards/chosen": -0.11831021308898926, "rewards/margins": 0.08315086364746094, "rewards/rejected": -0.2014610767364502, "step": 7758 }, { "epoch": 4.7332621625743485, "grad_norm": 1.7722612619400024, "learning_rate": 4.2718922229026325e-07, "log_odds_chosen": 2.5906434059143066, "log_odds_ratio": -0.29445379972457886, "logits/chosen": -0.8282771110534668, "logits/rejected": -1.0543718338012695, "logps/chosen": -0.5862252712249756, "logps/rejected": -2.472076177597046, "loss": 1.1335, "nll_loss": 1.009286880493164, "rewards/accuracies": 0.875, "rewards/chosen": -0.05862252786755562, "rewards/margins": 0.1885850876569748, "rewards/rejected": -0.2472076117992401, "step": 7759 }, { "epoch": 4.733872197651365, "grad_norm": 1.4043574333190918, "learning_rate": 4.262094304960196e-07, "log_odds_chosen": 2.81290340423584, "log_odds_ratio": -0.3278924822807312, "logits/chosen": -0.8230527639389038, "logits/rejected": -1.0886719226837158, "logps/chosen": -0.7355221509933472, "logps/rejected": -3.031752347946167, "loss": 1.0027, "nll_loss": 0.8935993909835815, "rewards/accuracies": 0.75, "rewards/chosen": -0.0735522136092186, "rewards/margins": 0.2296230047941208, "rewards/rejected": -0.3031752109527588, "step": 7760 }, { "epoch": 4.734482232728382, "grad_norm": 1.9823800325393677, "learning_rate": 4.2522963870177586e-07, "log_odds_chosen": 2.1407175064086914, "log_odds_ratio": -0.44578808546066284, "logits/chosen": -0.898298978805542, "logits/rejected": -0.9763578772544861, "logps/chosen": -0.7671732902526855, "logps/rejected": -2.453490734100342, "loss": 1.0521, "nll_loss": 0.941157341003418, "rewards/accuracies": 0.625, "rewards/chosen": -0.0767173320055008, "rewards/margins": 0.16863173246383667, "rewards/rejected": -0.24534906446933746, "step": 7761 }, { "epoch": 4.735092267805399, "grad_norm": 3.5551249980926514, "learning_rate": 4.2424984690753214e-07, "log_odds_chosen": 2.5411977767944336, "log_odds_ratio": -0.2203519344329834, "logits/chosen": -0.9236248135566711, "logits/rejected": -1.1946532726287842, "logps/chosen": -0.7188565135002136, "logps/rejected": -2.418126106262207, "loss": 1.1673, "nll_loss": 1.0295484066009521, "rewards/accuracies": 0.875, "rewards/chosen": -0.07188564538955688, "rewards/margins": 0.16992692649364471, "rewards/rejected": -0.241812601685524, "step": 7762 }, { "epoch": 4.735702302882416, "grad_norm": 1.368017554283142, "learning_rate": 4.2327005511328837e-07, "log_odds_chosen": 0.7725788354873657, "log_odds_ratio": -0.5267172455787659, "logits/chosen": -0.8949102163314819, "logits/rejected": -0.8538707494735718, "logps/chosen": -0.8364020586013794, "logps/rejected": -1.374830961227417, "loss": 0.9355, "nll_loss": 1.1353884935379028, "rewards/accuracies": 0.75, "rewards/chosen": -0.0836402103304863, "rewards/margins": 0.053842879831790924, "rewards/rejected": -0.13748309016227722, "step": 7763 }, { "epoch": 4.736312337959433, "grad_norm": 3.307149648666382, "learning_rate": 4.2229026331904465e-07, "log_odds_chosen": 3.0679283142089844, "log_odds_ratio": -0.23316305875778198, "logits/chosen": -0.8194988965988159, "logits/rejected": -0.9656144380569458, "logps/chosen": -0.5768615007400513, "logps/rejected": -2.9887967109680176, "loss": 1.1575, "nll_loss": 1.0801681280136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.057686153799295425, "rewards/margins": 0.24119353294372559, "rewards/rejected": -0.2988796830177307, "step": 7764 }, { "epoch": 4.73692237303645, "grad_norm": 9.263970375061035, "learning_rate": 4.21310471524801e-07, "log_odds_chosen": 3.017153739929199, "log_odds_ratio": -0.2906748354434967, "logits/chosen": -0.7454005479812622, "logits/rejected": -0.9021006226539612, "logps/chosen": -0.5345146656036377, "logps/rejected": -2.803051233291626, "loss": 0.9422, "nll_loss": 0.7721655368804932, "rewards/accuracies": 0.875, "rewards/chosen": -0.05345146730542183, "rewards/margins": 0.2268536388874054, "rewards/rejected": -0.2803051173686981, "step": 7765 }, { "epoch": 4.737532408113466, "grad_norm": 1.5648506879806519, "learning_rate": 4.2033067973055727e-07, "log_odds_chosen": 2.096414804458618, "log_odds_ratio": -0.3714359998703003, "logits/chosen": -1.0434274673461914, "logits/rejected": -1.066792607307434, "logps/chosen": -0.8185690641403198, "logps/rejected": -2.576287031173706, "loss": 1.0405, "nll_loss": 1.0525968074798584, "rewards/accuracies": 0.875, "rewards/chosen": -0.08185690641403198, "rewards/margins": 0.1757717877626419, "rewards/rejected": -0.2576286792755127, "step": 7766 }, { "epoch": 4.738142443190483, "grad_norm": 1.5876052379608154, "learning_rate": 4.193508879363135e-07, "log_odds_chosen": 1.4192047119140625, "log_odds_ratio": -0.645163357257843, "logits/chosen": -0.9619623422622681, "logits/rejected": -0.9819422960281372, "logps/chosen": -0.9679934978485107, "logps/rejected": -2.14054012298584, "loss": 1.0694, "nll_loss": 1.2162531614303589, "rewards/accuracies": 0.625, "rewards/chosen": -0.0967993438243866, "rewards/margins": 0.11725466698408127, "rewards/rejected": -0.21405400335788727, "step": 7767 }, { "epoch": 4.7387524782675, "grad_norm": 1.7549493312835693, "learning_rate": 4.183710961420698e-07, "log_odds_chosen": 0.5294248461723328, "log_odds_ratio": -0.6730550527572632, "logits/chosen": -1.1079868078231812, "logits/rejected": -1.0981305837631226, "logps/chosen": -1.1576780080795288, "logps/rejected": -1.6228266954421997, "loss": 1.1744, "nll_loss": 1.2114925384521484, "rewards/accuracies": 0.375, "rewards/chosen": -0.11576779931783676, "rewards/margins": 0.04651486501097679, "rewards/rejected": -0.16228266060352325, "step": 7768 }, { "epoch": 4.739362513344517, "grad_norm": 2.781263828277588, "learning_rate": 4.1739130434782606e-07, "log_odds_chosen": 1.3774646520614624, "log_odds_ratio": -0.3525487780570984, "logits/chosen": -0.8451986312866211, "logits/rejected": -0.8139706254005432, "logps/chosen": -0.7275955080986023, "logps/rejected": -1.7513530254364014, "loss": 0.9407, "nll_loss": 0.927373468875885, "rewards/accuracies": 0.875, "rewards/chosen": -0.07275955379009247, "rewards/margins": 0.10237576812505722, "rewards/rejected": -0.1751353144645691, "step": 7769 }, { "epoch": 4.739972548421534, "grad_norm": 1.446877121925354, "learning_rate": 4.164115125535824e-07, "log_odds_chosen": 0.8785156607627869, "log_odds_ratio": -0.4671635031700134, "logits/chosen": -0.8879384994506836, "logits/rejected": -0.9735519886016846, "logps/chosen": -0.9298439621925354, "logps/rejected": -1.5848305225372314, "loss": 1.0882, "nll_loss": 1.3362479209899902, "rewards/accuracies": 0.75, "rewards/chosen": -0.0929843932390213, "rewards/margins": 0.06549865752458572, "rewards/rejected": -0.15848305821418762, "step": 7770 }, { "epoch": 4.7405825834985515, "grad_norm": 1.3417326211929321, "learning_rate": 4.154317207593386e-07, "log_odds_chosen": 1.1912816762924194, "log_odds_ratio": -0.5376302599906921, "logits/chosen": -0.8242138624191284, "logits/rejected": -0.9579589366912842, "logps/chosen": -0.9821760654449463, "logps/rejected": -2.0482752323150635, "loss": 0.9873, "nll_loss": 1.1332924365997314, "rewards/accuracies": 0.625, "rewards/chosen": -0.09821760654449463, "rewards/margins": 0.10660990327596664, "rewards/rejected": -0.20482751727104187, "step": 7771 }, { "epoch": 4.741192618575568, "grad_norm": 4.162865161895752, "learning_rate": 4.144519289650949e-07, "log_odds_chosen": 3.529273509979248, "log_odds_ratio": -0.15511682629585266, "logits/chosen": -0.8656473159790039, "logits/rejected": -1.0693695545196533, "logps/chosen": -0.6530632972717285, "logps/rejected": -3.523420810699463, "loss": 1.0054, "nll_loss": 0.8214855790138245, "rewards/accuracies": 1.0, "rewards/chosen": -0.06530633568763733, "rewards/margins": 0.2870357036590576, "rewards/rejected": -0.3523420989513397, "step": 7772 }, { "epoch": 4.741802653652585, "grad_norm": 1.5128669738769531, "learning_rate": 4.134721371708512e-07, "log_odds_chosen": 1.7285337448120117, "log_odds_ratio": -0.4097844362258911, "logits/chosen": -0.7863178849220276, "logits/rejected": -0.9215661883354187, "logps/chosen": -0.7448827624320984, "logps/rejected": -2.0892791748046875, "loss": 1.0734, "nll_loss": 0.9549908638000488, "rewards/accuracies": 0.625, "rewards/chosen": -0.07448828220367432, "rewards/margins": 0.1344396471977234, "rewards/rejected": -0.2089279294013977, "step": 7773 }, { "epoch": 4.742412688729602, "grad_norm": 12.311366081237793, "learning_rate": 4.124923453766074e-07, "log_odds_chosen": 0.8228710293769836, "log_odds_ratio": -0.6863238215446472, "logits/chosen": -0.866517961025238, "logits/rejected": -0.7305645942687988, "logps/chosen": -0.810516893863678, "logps/rejected": -1.3831993341445923, "loss": 1.035, "nll_loss": 0.9652053713798523, "rewards/accuracies": 0.625, "rewards/chosen": -0.08105169236660004, "rewards/margins": 0.05726824700832367, "rewards/rejected": -0.1383199393749237, "step": 7774 }, { "epoch": 4.743022723806619, "grad_norm": 6.074742794036865, "learning_rate": 4.1151255358236375e-07, "log_odds_chosen": 1.8123762607574463, "log_odds_ratio": -0.5038938522338867, "logits/chosen": -0.7179117202758789, "logits/rejected": -0.971996009349823, "logps/chosen": -0.660390317440033, "logps/rejected": -2.0486819744110107, "loss": 0.9508, "nll_loss": 0.9461241960525513, "rewards/accuracies": 0.625, "rewards/chosen": -0.06603902578353882, "rewards/margins": 0.13882917165756226, "rewards/rejected": -0.20486819744110107, "step": 7775 }, { "epoch": 4.743632758883636, "grad_norm": 1.2249270677566528, "learning_rate": 4.1053276178812e-07, "log_odds_chosen": 0.999679684638977, "log_odds_ratio": -0.5020163655281067, "logits/chosen": -1.105903148651123, "logits/rejected": -1.128319501876831, "logps/chosen": -1.1863198280334473, "logps/rejected": -2.004837989807129, "loss": 1.1894, "nll_loss": 1.2248729467391968, "rewards/accuracies": 0.625, "rewards/chosen": -0.1186319962143898, "rewards/margins": 0.08185179531574249, "rewards/rejected": -0.2004837989807129, "step": 7776 }, { "epoch": 4.744242793960653, "grad_norm": 6.613514423370361, "learning_rate": 4.095529699938763e-07, "log_odds_chosen": 5.40829610824585, "log_odds_ratio": -0.11803607642650604, "logits/chosen": -0.8744155168533325, "logits/rejected": -1.0069451332092285, "logps/chosen": -0.7594782710075378, "logps/rejected": -5.385469436645508, "loss": 1.0936, "nll_loss": 1.0098463296890259, "rewards/accuracies": 1.0, "rewards/chosen": -0.0759478285908699, "rewards/margins": 0.4625990390777588, "rewards/rejected": -0.5385468602180481, "step": 7777 }, { "epoch": 4.7448528290376695, "grad_norm": 1.0489448308944702, "learning_rate": 4.0857317819963254e-07, "log_odds_chosen": 5.587714195251465, "log_odds_ratio": -0.2906043529510498, "logits/chosen": -0.8381305932998657, "logits/rejected": -0.8949480056762695, "logps/chosen": -0.8401697278022766, "logps/rejected": -5.957129001617432, "loss": 0.9119, "nll_loss": 1.1408729553222656, "rewards/accuracies": 0.875, "rewards/chosen": -0.08401697874069214, "rewards/margins": 0.511695921421051, "rewards/rejected": -0.5957129001617432, "step": 7778 }, { "epoch": 4.745462864114686, "grad_norm": 2.2023768424987793, "learning_rate": 4.075933864053888e-07, "log_odds_chosen": 4.294511318206787, "log_odds_ratio": -0.37334907054901123, "logits/chosen": -0.8337321281433105, "logits/rejected": -1.0499821901321411, "logps/chosen": -0.6437406539916992, "logps/rejected": -4.325758457183838, "loss": 1.0078, "nll_loss": 0.9536373019218445, "rewards/accuracies": 0.75, "rewards/chosen": -0.06437406688928604, "rewards/margins": 0.3682017922401428, "rewards/rejected": -0.43257585167884827, "step": 7779 }, { "epoch": 4.746072899191703, "grad_norm": 3.0053632259368896, "learning_rate": 4.0661359461114515e-07, "log_odds_chosen": 2.5049920082092285, "log_odds_ratio": -0.1736835092306137, "logits/chosen": -1.0697821378707886, "logits/rejected": -1.0428906679153442, "logps/chosen": -0.6529303193092346, "logps/rejected": -2.438016891479492, "loss": 0.9235, "nll_loss": 0.9928871393203735, "rewards/accuracies": 1.0, "rewards/chosen": -0.06529302895069122, "rewards/margins": 0.1785086989402771, "rewards/rejected": -0.24380171298980713, "step": 7780 }, { "epoch": 4.746682934268721, "grad_norm": 2.9525911808013916, "learning_rate": 4.056338028169014e-07, "log_odds_chosen": 2.328725576400757, "log_odds_ratio": -0.5445821285247803, "logits/chosen": -0.936873733997345, "logits/rejected": -1.136407494544983, "logps/chosen": -1.090012788772583, "logps/rejected": -3.0928311347961426, "loss": 1.1117, "nll_loss": 1.2904694080352783, "rewards/accuracies": 0.625, "rewards/chosen": -0.1090012788772583, "rewards/margins": 0.20028182864189148, "rewards/rejected": -0.3092831075191498, "step": 7781 }, { "epoch": 4.747292969345738, "grad_norm": 2.366241693496704, "learning_rate": 4.0465401102265766e-07, "log_odds_chosen": 1.6815794706344604, "log_odds_ratio": -0.40324535965919495, "logits/chosen": -0.9725339412689209, "logits/rejected": -0.9583418369293213, "logps/chosen": -0.866644561290741, "logps/rejected": -2.2154417037963867, "loss": 0.9547, "nll_loss": 1.1031267642974854, "rewards/accuracies": 0.875, "rewards/chosen": -0.08666445314884186, "rewards/margins": 0.1348797231912613, "rewards/rejected": -0.22154417634010315, "step": 7782 }, { "epoch": 4.747903004422755, "grad_norm": 2.7604804039001465, "learning_rate": 4.0367421922841394e-07, "log_odds_chosen": 1.4171037673950195, "log_odds_ratio": -0.4398699700832367, "logits/chosen": -0.8227132558822632, "logits/rejected": -0.9982295036315918, "logps/chosen": -0.7032101154327393, "logps/rejected": -1.6828782558441162, "loss": 1.0049, "nll_loss": 0.8587230443954468, "rewards/accuracies": 0.75, "rewards/chosen": -0.07032100856304169, "rewards/margins": 0.09796682000160217, "rewards/rejected": -0.16828782856464386, "step": 7783 }, { "epoch": 4.748513039499771, "grad_norm": 1.9035545587539673, "learning_rate": 4.026944274341702e-07, "log_odds_chosen": 1.4356005191802979, "log_odds_ratio": -0.4974442720413208, "logits/chosen": -0.8823353052139282, "logits/rejected": -0.987777829170227, "logps/chosen": -0.8355525135993958, "logps/rejected": -1.835371494293213, "loss": 1.1518, "nll_loss": 1.0985138416290283, "rewards/accuracies": 0.625, "rewards/chosen": -0.08355525135993958, "rewards/margins": 0.09998190402984619, "rewards/rejected": -0.18353715538978577, "step": 7784 }, { "epoch": 4.749123074576788, "grad_norm": 1.327146053314209, "learning_rate": 4.017146356399265e-07, "log_odds_chosen": 1.226963758468628, "log_odds_ratio": -0.5292748212814331, "logits/chosen": -0.633112370967865, "logits/rejected": -0.6517459154129028, "logps/chosen": -0.9809136390686035, "logps/rejected": -1.887700080871582, "loss": 0.9237, "nll_loss": 0.9848210215568542, "rewards/accuracies": 0.625, "rewards/chosen": -0.09809136390686035, "rewards/margins": 0.09067864716053009, "rewards/rejected": -0.18876999616622925, "step": 7785 }, { "epoch": 4.749733109653805, "grad_norm": 9.05285930633545, "learning_rate": 4.007348438456828e-07, "log_odds_chosen": 2.3594722747802734, "log_odds_ratio": -0.29986444115638733, "logits/chosen": -0.828059196472168, "logits/rejected": -0.9973309636116028, "logps/chosen": -0.656845211982727, "logps/rejected": -2.3799593448638916, "loss": 1.0354, "nll_loss": 0.8769537806510925, "rewards/accuracies": 0.75, "rewards/chosen": -0.06568451970815659, "rewards/margins": 0.1723114252090454, "rewards/rejected": -0.2379959523677826, "step": 7786 }, { "epoch": 4.750343144730822, "grad_norm": 2.789161443710327, "learning_rate": 3.9975505205143907e-07, "log_odds_chosen": 2.0156209468841553, "log_odds_ratio": -0.5145074129104614, "logits/chosen": -0.8837841153144836, "logits/rejected": -1.0399936437606812, "logps/chosen": -0.8225477933883667, "logps/rejected": -2.440005302429199, "loss": 1.117, "nll_loss": 1.1782793998718262, "rewards/accuracies": 0.5, "rewards/chosen": -0.08225477486848831, "rewards/margins": 0.16174571216106415, "rewards/rejected": -0.24400050938129425, "step": 7787 }, { "epoch": 4.750953179807839, "grad_norm": 1.4926775693893433, "learning_rate": 3.9877526025719535e-07, "log_odds_chosen": 4.152642250061035, "log_odds_ratio": -0.1921740174293518, "logits/chosen": -0.7749835252761841, "logits/rejected": -1.0267586708068848, "logps/chosen": -0.5896667838096619, "logps/rejected": -3.9715797901153564, "loss": 1.0187, "nll_loss": 1.0314689874649048, "rewards/accuracies": 1.0, "rewards/chosen": -0.05896667763590813, "rewards/margins": 0.33819133043289185, "rewards/rejected": -0.39715802669525146, "step": 7788 }, { "epoch": 4.751563214884856, "grad_norm": 1.892012119293213, "learning_rate": 3.977954684629516e-07, "log_odds_chosen": 2.7324697971343994, "log_odds_ratio": -0.24356329441070557, "logits/chosen": -0.6922749876976013, "logits/rejected": -0.8180091381072998, "logps/chosen": -0.5672563910484314, "logps/rejected": -2.5557761192321777, "loss": 0.9768, "nll_loss": 0.6773757934570312, "rewards/accuracies": 0.75, "rewards/chosen": -0.0567256398499012, "rewards/margins": 0.19885198771953583, "rewards/rejected": -0.25557762384414673, "step": 7789 }, { "epoch": 4.7521732499618725, "grad_norm": 12.534019470214844, "learning_rate": 3.9681567666870786e-07, "log_odds_chosen": 2.415135383605957, "log_odds_ratio": -0.424773633480072, "logits/chosen": -0.9261147975921631, "logits/rejected": -0.9905401468276978, "logps/chosen": -0.8467145562171936, "logps/rejected": -2.9102859497070312, "loss": 1.1393, "nll_loss": 0.9719861149787903, "rewards/accuracies": 0.75, "rewards/chosen": -0.08467145264148712, "rewards/margins": 0.2063571810722351, "rewards/rejected": -0.29102861881256104, "step": 7790 }, { "epoch": 4.752783285038889, "grad_norm": 2.8951022624969482, "learning_rate": 3.958358848744642e-07, "log_odds_chosen": 2.6524415016174316, "log_odds_ratio": -0.22035780549049377, "logits/chosen": -0.7242817878723145, "logits/rejected": -1.0959230661392212, "logps/chosen": -0.5438108444213867, "logps/rejected": -2.5175132751464844, "loss": 0.9371, "nll_loss": 0.7365440726280212, "rewards/accuracies": 0.875, "rewards/chosen": -0.05438108369708061, "rewards/margins": 0.197370246052742, "rewards/rejected": -0.2517513334751129, "step": 7791 }, { "epoch": 4.753393320115906, "grad_norm": 3.9554789066314697, "learning_rate": 3.948560930802204e-07, "log_odds_chosen": 1.6145063638687134, "log_odds_ratio": -0.44591590762138367, "logits/chosen": -1.0950236320495605, "logits/rejected": -1.0795633792877197, "logps/chosen": -1.4853179454803467, "logps/rejected": -2.8663456439971924, "loss": 1.1594, "nll_loss": 1.2242703437805176, "rewards/accuracies": 0.75, "rewards/chosen": -0.14853179454803467, "rewards/margins": 0.13810279965400696, "rewards/rejected": -0.286634624004364, "step": 7792 }, { "epoch": 4.754003355192924, "grad_norm": 1.9963839054107666, "learning_rate": 3.938763012859767e-07, "log_odds_chosen": 2.2966060638427734, "log_odds_ratio": -0.5261762738227844, "logits/chosen": -1.0203275680541992, "logits/rejected": -1.1547634601593018, "logps/chosen": -0.9320524334907532, "logps/rejected": -2.7876429557800293, "loss": 1.0796, "nll_loss": 1.1813684701919556, "rewards/accuracies": 0.75, "rewards/chosen": -0.09320524334907532, "rewards/margins": 0.18555906414985657, "rewards/rejected": -0.2787643074989319, "step": 7793 }, { "epoch": 4.754613390269941, "grad_norm": 3.8165533542633057, "learning_rate": 3.92896509491733e-07, "log_odds_chosen": 1.912360429763794, "log_odds_ratio": -0.5054079294204712, "logits/chosen": -1.0134339332580566, "logits/rejected": -1.1192469596862793, "logps/chosen": -0.9940890073776245, "logps/rejected": -2.6849007606506348, "loss": 0.978, "nll_loss": 1.2537051439285278, "rewards/accuracies": 0.625, "rewards/chosen": -0.09940890967845917, "rewards/margins": 0.16908115148544312, "rewards/rejected": -0.2684900462627411, "step": 7794 }, { "epoch": 4.755223425346958, "grad_norm": 1.7291680574417114, "learning_rate": 3.9191671769748926e-07, "log_odds_chosen": 0.8880070447921753, "log_odds_ratio": -0.5449445247650146, "logits/chosen": -0.9905216693878174, "logits/rejected": -0.9614317417144775, "logps/chosen": -0.8519845008850098, "logps/rejected": -1.4267218112945557, "loss": 1.089, "nll_loss": 1.1164400577545166, "rewards/accuracies": 0.625, "rewards/chosen": -0.08519845455884933, "rewards/margins": 0.05747373402118683, "rewards/rejected": -0.14267218112945557, "step": 7795 }, { "epoch": 4.7558334604239745, "grad_norm": 2.6904473304748535, "learning_rate": 3.9093692590324554e-07, "log_odds_chosen": 3.4920921325683594, "log_odds_ratio": -0.28818023204803467, "logits/chosen": -0.8456926345825195, "logits/rejected": -0.9834877252578735, "logps/chosen": -0.7103729248046875, "logps/rejected": -3.640601634979248, "loss": 0.8956, "nll_loss": 0.9466345310211182, "rewards/accuracies": 0.875, "rewards/chosen": -0.07103729248046875, "rewards/margins": 0.29302287101745605, "rewards/rejected": -0.3640601634979248, "step": 7796 }, { "epoch": 4.756443495500991, "grad_norm": 1.5734789371490479, "learning_rate": 3.899571341090018e-07, "log_odds_chosen": 1.495495319366455, "log_odds_ratio": -0.3934614658355713, "logits/chosen": -0.700741171836853, "logits/rejected": -0.997389018535614, "logps/chosen": -0.8933380842208862, "logps/rejected": -1.80559241771698, "loss": 0.9557, "nll_loss": 1.2200223207473755, "rewards/accuracies": 0.875, "rewards/chosen": -0.08933380991220474, "rewards/margins": 0.09122543781995773, "rewards/rejected": -0.18055924773216248, "step": 7797 }, { "epoch": 4.757053530578008, "grad_norm": 5.164999485015869, "learning_rate": 3.889773423147581e-07, "log_odds_chosen": 0.053096309304237366, "log_odds_ratio": -0.7991838455200195, "logits/chosen": -1.0155959129333496, "logits/rejected": -0.9097451567649841, "logps/chosen": -0.9402473568916321, "logps/rejected": -0.9857689738273621, "loss": 1.1982, "nll_loss": 1.217376708984375, "rewards/accuracies": 0.5, "rewards/chosen": -0.09402473270893097, "rewards/margins": 0.004552160389721394, "rewards/rejected": -0.09857690334320068, "step": 7798 }, { "epoch": 4.757663565655025, "grad_norm": 2.4049019813537598, "learning_rate": 3.879975505205144e-07, "log_odds_chosen": 1.2234747409820557, "log_odds_ratio": -0.4385291337966919, "logits/chosen": -1.0362190008163452, "logits/rejected": -0.9316173791885376, "logps/chosen": -0.7520148754119873, "logps/rejected": -1.6835554838180542, "loss": 0.9074, "nll_loss": 1.0322265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.07520148903131485, "rewards/margins": 0.09315405040979385, "rewards/rejected": -0.1683555394411087, "step": 7799 }, { "epoch": 4.758273600732042, "grad_norm": 1.4779890775680542, "learning_rate": 3.870177587262706e-07, "log_odds_chosen": 2.005976676940918, "log_odds_ratio": -0.5100539922714233, "logits/chosen": -0.8974713683128357, "logits/rejected": -1.0348265171051025, "logps/chosen": -0.8288676142692566, "logps/rejected": -2.450657844543457, "loss": 0.9837, "nll_loss": 1.0734115839004517, "rewards/accuracies": 0.5, "rewards/chosen": -0.08288676291704178, "rewards/margins": 0.16217900812625885, "rewards/rejected": -0.24506577849388123, "step": 7800 }, { "epoch": 4.758883635809059, "grad_norm": 2.1586525440216064, "learning_rate": 3.8603796693202695e-07, "log_odds_chosen": 4.768279075622559, "log_odds_ratio": -0.1239793598651886, "logits/chosen": -0.6200386881828308, "logits/rejected": -0.8750971555709839, "logps/chosen": -0.5427786111831665, "logps/rejected": -4.438729286193848, "loss": 0.8228, "nll_loss": 0.7432887554168701, "rewards/accuracies": 1.0, "rewards/chosen": -0.05427786707878113, "rewards/margins": 0.38959506154060364, "rewards/rejected": -0.44387292861938477, "step": 7801 }, { "epoch": 4.759493670886076, "grad_norm": 1.388396978378296, "learning_rate": 3.8505817513778323e-07, "log_odds_chosen": 4.507246017456055, "log_odds_ratio": -0.10437627881765366, "logits/chosen": -0.8843154311180115, "logits/rejected": -1.0725345611572266, "logps/chosen": -0.6894996166229248, "logps/rejected": -4.468595504760742, "loss": 0.8131, "nll_loss": 0.8951246738433838, "rewards/accuracies": 1.0, "rewards/chosen": -0.06894996017217636, "rewards/margins": 0.3779096305370331, "rewards/rejected": -0.44685956835746765, "step": 7802 }, { "epoch": 4.760103705963093, "grad_norm": 1.4917521476745605, "learning_rate": 3.8407838334353946e-07, "log_odds_chosen": 2.8270859718322754, "log_odds_ratio": -0.5201632380485535, "logits/chosen": -0.7569870352745056, "logits/rejected": -0.908161997795105, "logps/chosen": -0.7657889723777771, "logps/rejected": -3.0744552612304688, "loss": 0.9657, "nll_loss": 0.9936206340789795, "rewards/accuracies": 0.625, "rewards/chosen": -0.07657890021800995, "rewards/margins": 0.23086661100387573, "rewards/rejected": -0.3074454963207245, "step": 7803 }, { "epoch": 4.76071374104011, "grad_norm": 2.281921625137329, "learning_rate": 3.8309859154929574e-07, "log_odds_chosen": 2.7798547744750977, "log_odds_ratio": -0.2775736153125763, "logits/chosen": -0.7727304100990295, "logits/rejected": -0.821345329284668, "logps/chosen": -0.6474661827087402, "logps/rejected": -2.717864751815796, "loss": 0.9214, "nll_loss": 1.1018986701965332, "rewards/accuracies": 0.75, "rewards/chosen": -0.06474661827087402, "rewards/margins": 0.20703986287117004, "rewards/rejected": -0.27178648114204407, "step": 7804 }, { "epoch": 4.761323776117127, "grad_norm": 1.9754796028137207, "learning_rate": 3.82118799755052e-07, "log_odds_chosen": 4.728135108947754, "log_odds_ratio": -0.2883628308773041, "logits/chosen": -0.8500367403030396, "logits/rejected": -1.1907851696014404, "logps/chosen": -0.5855658054351807, "logps/rejected": -4.483453750610352, "loss": 0.9311, "nll_loss": 0.8262215852737427, "rewards/accuracies": 0.875, "rewards/chosen": -0.058556582778692245, "rewards/margins": 0.38978880643844604, "rewards/rejected": -0.4483453631401062, "step": 7805 }, { "epoch": 4.761933811194144, "grad_norm": 6.770234107971191, "learning_rate": 3.8113900796080835e-07, "log_odds_chosen": 0.6133551001548767, "log_odds_ratio": -0.5269804000854492, "logits/chosen": -0.8384627103805542, "logits/rejected": -0.9479936957359314, "logps/chosen": -0.596534788608551, "logps/rejected": -1.0431157350540161, "loss": 1.2437, "nll_loss": 1.182420015335083, "rewards/accuracies": 0.75, "rewards/chosen": -0.05965347960591316, "rewards/margins": 0.04465809091925621, "rewards/rejected": -0.10431157052516937, "step": 7806 }, { "epoch": 4.762543846271161, "grad_norm": 1.6638792753219604, "learning_rate": 3.801592161665646e-07, "log_odds_chosen": 0.4830179214477539, "log_odds_ratio": -0.6248601078987122, "logits/chosen": -0.9126797914505005, "logits/rejected": -0.9597358107566833, "logps/chosen": -0.964343786239624, "logps/rejected": -1.2855623960494995, "loss": 1.1155, "nll_loss": 1.031360387802124, "rewards/accuracies": 0.625, "rewards/chosen": -0.09643438458442688, "rewards/margins": 0.03212185576558113, "rewards/rejected": -0.1285562515258789, "step": 7807 }, { "epoch": 4.7631538813481775, "grad_norm": 1.3707959651947021, "learning_rate": 3.7917942437232086e-07, "log_odds_chosen": 4.940361976623535, "log_odds_ratio": -0.09685070812702179, "logits/chosen": -0.7299429178237915, "logits/rejected": -1.0444211959838867, "logps/chosen": -0.4680790603160858, "logps/rejected": -4.466750621795654, "loss": 0.9219, "nll_loss": 0.6783377528190613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0468079075217247, "rewards/margins": 0.3998671770095825, "rewards/rejected": -0.44667500257492065, "step": 7808 }, { "epoch": 4.763763916425194, "grad_norm": 1.7830203771591187, "learning_rate": 3.7819963257807714e-07, "log_odds_chosen": 1.2336256504058838, "log_odds_ratio": -0.48694702982902527, "logits/chosen": -0.760457456111908, "logits/rejected": -1.0055183172225952, "logps/chosen": -0.9388936758041382, "logps/rejected": -2.003833055496216, "loss": 0.9575, "nll_loss": 1.0069787502288818, "rewards/accuracies": 0.75, "rewards/chosen": -0.09388936311006546, "rewards/margins": 0.10649395734071732, "rewards/rejected": -0.20038332045078278, "step": 7809 }, { "epoch": 4.764373951502211, "grad_norm": 10.81640625, "learning_rate": 3.772198407838334e-07, "log_odds_chosen": 0.48686790466308594, "log_odds_ratio": -0.6520047187805176, "logits/chosen": -0.9771902561187744, "logits/rejected": -1.1161385774612427, "logps/chosen": -1.0611460208892822, "logps/rejected": -1.4079246520996094, "loss": 1.1268, "nll_loss": 1.1609266996383667, "rewards/accuracies": 0.625, "rewards/chosen": -0.10611459612846375, "rewards/margins": 0.03467785567045212, "rewards/rejected": -0.14079244434833527, "step": 7810 }, { "epoch": 4.764983986579228, "grad_norm": 1.6291577816009521, "learning_rate": 3.762400489895897e-07, "log_odds_chosen": 6.315945625305176, "log_odds_ratio": -0.26709121465682983, "logits/chosen": -0.9881455898284912, "logits/rejected": -1.1830732822418213, "logps/chosen": -0.8916734457015991, "logps/rejected": -6.719149589538574, "loss": 1.0404, "nll_loss": 0.9346268177032471, "rewards/accuracies": 0.875, "rewards/chosen": -0.08916734158992767, "rewards/margins": 0.5827475786209106, "rewards/rejected": -0.6719149351119995, "step": 7811 }, { "epoch": 4.765594021656245, "grad_norm": 1.4123516082763672, "learning_rate": 3.75260257195346e-07, "log_odds_chosen": 1.4537100791931152, "log_odds_ratio": -0.6019244194030762, "logits/chosen": -0.8838223218917847, "logits/rejected": -0.9754738807678223, "logps/chosen": -0.8103457689285278, "logps/rejected": -2.0425033569335938, "loss": 1.0499, "nll_loss": 0.8945199847221375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0810345858335495, "rewards/margins": 0.12321575731039047, "rewards/rejected": -0.20425033569335938, "step": 7812 }, { "epoch": 4.766204056733262, "grad_norm": 1.0916783809661865, "learning_rate": 3.7428046540110227e-07, "log_odds_chosen": 1.2593780755996704, "log_odds_ratio": -0.38965821266174316, "logits/chosen": -0.8009718656539917, "logits/rejected": -1.0088744163513184, "logps/chosen": -0.7621384263038635, "logps/rejected": -1.6620728969573975, "loss": 1.0821, "nll_loss": 0.9109978675842285, "rewards/accuracies": 0.75, "rewards/chosen": -0.07621385157108307, "rewards/margins": 0.0899934470653534, "rewards/rejected": -0.16620728373527527, "step": 7813 }, { "epoch": 4.766814091810279, "grad_norm": 3.779984951019287, "learning_rate": 3.733006736068585e-07, "log_odds_chosen": 1.5492717027664185, "log_odds_ratio": -0.641301691532135, "logits/chosen": -0.8547311425209045, "logits/rejected": -0.9279069304466248, "logps/chosen": -0.8428241014480591, "logps/rejected": -2.1202759742736816, "loss": 1.2192, "nll_loss": 1.4026710987091064, "rewards/accuracies": 0.5, "rewards/chosen": -0.08428241312503815, "rewards/margins": 0.12774518132209778, "rewards/rejected": -0.21202759444713593, "step": 7814 }, { "epoch": 4.767424126887296, "grad_norm": 4.9800214767456055, "learning_rate": 3.723208818126148e-07, "log_odds_chosen": 1.8314883708953857, "log_odds_ratio": -0.31977131962776184, "logits/chosen": -0.8903815150260925, "logits/rejected": -0.9987518787384033, "logps/chosen": -0.6383340358734131, "logps/rejected": -1.8127431869506836, "loss": 1.0288, "nll_loss": 0.886825442314148, "rewards/accuracies": 0.75, "rewards/chosen": -0.06383340060710907, "rewards/margins": 0.11744090914726257, "rewards/rejected": -0.18127432465553284, "step": 7815 }, { "epoch": 4.768034161964313, "grad_norm": 1.4535837173461914, "learning_rate": 3.713410900183711e-07, "log_odds_chosen": 2.522139549255371, "log_odds_ratio": -0.28283339738845825, "logits/chosen": -0.852857768535614, "logits/rejected": -1.0541605949401855, "logps/chosen": -0.5788328647613525, "logps/rejected": -2.378093719482422, "loss": 1.0488, "nll_loss": 0.8754063844680786, "rewards/accuracies": 0.875, "rewards/chosen": -0.057883284986019135, "rewards/margins": 0.1799260675907135, "rewards/rejected": -0.23780936002731323, "step": 7816 }, { "epoch": 4.76864419704133, "grad_norm": 2.133723020553589, "learning_rate": 3.703612982241274e-07, "log_odds_chosen": 1.4866836071014404, "log_odds_ratio": -0.4016629159450531, "logits/chosen": -0.7253691554069519, "logits/rejected": -0.9725707769393921, "logps/chosen": -0.5596949458122253, "logps/rejected": -1.6157176494598389, "loss": 0.9828, "nll_loss": 0.6743296384811401, "rewards/accuracies": 0.75, "rewards/chosen": -0.05596949905157089, "rewards/margins": 0.10560226440429688, "rewards/rejected": -0.16157175600528717, "step": 7817 }, { "epoch": 4.769254232118347, "grad_norm": 2.9427108764648438, "learning_rate": 3.693815064298836e-07, "log_odds_chosen": 2.323188304901123, "log_odds_ratio": -0.2752547264099121, "logits/chosen": -1.0291202068328857, "logits/rejected": -1.0340911149978638, "logps/chosen": -0.7058019638061523, "logps/rejected": -2.398707389831543, "loss": 0.8546, "nll_loss": 0.9535838961601257, "rewards/accuracies": 0.875, "rewards/chosen": -0.07058019936084747, "rewards/margins": 0.16929054260253906, "rewards/rejected": -0.23987072706222534, "step": 7818 }, { "epoch": 4.769864267195364, "grad_norm": 1.5359923839569092, "learning_rate": 3.684017146356399e-07, "log_odds_chosen": 3.6465413570404053, "log_odds_ratio": -0.2472255527973175, "logits/chosen": -0.7263072729110718, "logits/rejected": -1.0432407855987549, "logps/chosen": -0.6654123067855835, "logps/rejected": -3.606724500656128, "loss": 1.1071, "nll_loss": 0.9061594009399414, "rewards/accuracies": 0.875, "rewards/chosen": -0.06654123961925507, "rewards/margins": 0.29413121938705444, "rewards/rejected": -0.3606724441051483, "step": 7819 }, { "epoch": 4.770474302272381, "grad_norm": 1.6579381227493286, "learning_rate": 3.674219228413962e-07, "log_odds_chosen": 2.253506660461426, "log_odds_ratio": -0.3734284043312073, "logits/chosen": -1.1060690879821777, "logits/rejected": -1.1311366558074951, "logps/chosen": -0.8923399448394775, "logps/rejected": -2.819071054458618, "loss": 0.9738, "nll_loss": 0.9671521782875061, "rewards/accuracies": 0.875, "rewards/chosen": -0.08923399448394775, "rewards/margins": 0.19267310202121735, "rewards/rejected": -0.2819070816040039, "step": 7820 }, { "epoch": 4.771084337349397, "grad_norm": 1.2703986167907715, "learning_rate": 3.664421310471524e-07, "log_odds_chosen": 3.0818233489990234, "log_odds_ratio": -0.456603080034256, "logits/chosen": -0.8053474426269531, "logits/rejected": -0.8891493678092957, "logps/chosen": -1.0378762483596802, "logps/rejected": -3.8338992595672607, "loss": 1.1277, "nll_loss": 1.2922871112823486, "rewards/accuracies": 0.75, "rewards/chosen": -0.10378763824701309, "rewards/margins": 0.2796023190021515, "rewards/rejected": -0.383389949798584, "step": 7821 }, { "epoch": 4.771694372426414, "grad_norm": 12.750554084777832, "learning_rate": 3.6546233925290874e-07, "log_odds_chosen": 0.32303452491760254, "log_odds_ratio": -0.7937177419662476, "logits/chosen": -0.8280379772186279, "logits/rejected": -0.7959340810775757, "logps/chosen": -0.9030032157897949, "logps/rejected": -1.1683067083358765, "loss": 1.0833, "nll_loss": 1.1613905429840088, "rewards/accuracies": 0.375, "rewards/chosen": -0.09030032157897949, "rewards/margins": 0.026530349627137184, "rewards/rejected": -0.11683067679405212, "step": 7822 }, { "epoch": 4.772304407503431, "grad_norm": 1.3606702089309692, "learning_rate": 3.64482547458665e-07, "log_odds_chosen": 1.6508928537368774, "log_odds_ratio": -0.7011542320251465, "logits/chosen": -0.7720851898193359, "logits/rejected": -0.8829416632652283, "logps/chosen": -0.6331019997596741, "logps/rejected": -2.156827211380005, "loss": 0.8985, "nll_loss": 0.6899200081825256, "rewards/accuracies": 0.75, "rewards/chosen": -0.06331019848585129, "rewards/margins": 0.1523725390434265, "rewards/rejected": -0.2156827300786972, "step": 7823 }, { "epoch": 4.772914442580449, "grad_norm": 4.644917011260986, "learning_rate": 3.635027556644213e-07, "log_odds_chosen": 1.341680884361267, "log_odds_ratio": -0.528488039970398, "logits/chosen": -1.0807418823242188, "logits/rejected": -1.050122857093811, "logps/chosen": -0.9859548211097717, "logps/rejected": -2.152215003967285, "loss": 1.1779, "nll_loss": 1.1810088157653809, "rewards/accuracies": 0.625, "rewards/chosen": -0.09859548509120941, "rewards/margins": 0.11662603169679642, "rewards/rejected": -0.21522152423858643, "step": 7824 }, { "epoch": 4.773524477657466, "grad_norm": 1.35906982421875, "learning_rate": 3.6252296387017753e-07, "log_odds_chosen": 1.6387884616851807, "log_odds_ratio": -0.403385192155838, "logits/chosen": -0.9739221334457397, "logits/rejected": -1.0010682344436646, "logps/chosen": -0.9246970415115356, "logps/rejected": -2.1555190086364746, "loss": 1.1831, "nll_loss": 1.0223087072372437, "rewards/accuracies": 0.75, "rewards/chosen": -0.0924697071313858, "rewards/margins": 0.12308217585086823, "rewards/rejected": -0.21555188298225403, "step": 7825 }, { "epoch": 4.7741345127344825, "grad_norm": 9.276581764221191, "learning_rate": 3.615431720759338e-07, "log_odds_chosen": 2.9409995079040527, "log_odds_ratio": -0.2618555724620819, "logits/chosen": -0.5880339741706848, "logits/rejected": -0.7702979445457458, "logps/chosen": -0.5509801506996155, "logps/rejected": -2.620100975036621, "loss": 1.0127, "nll_loss": 0.7067772746086121, "rewards/accuracies": 0.75, "rewards/chosen": -0.05509801208972931, "rewards/margins": 0.2069120854139328, "rewards/rejected": -0.2620100975036621, "step": 7826 }, { "epoch": 4.774744547811499, "grad_norm": 1.0488940477371216, "learning_rate": 3.6056338028169015e-07, "log_odds_chosen": 2.610265016555786, "log_odds_ratio": -0.3310300409793854, "logits/chosen": -0.6089351773262024, "logits/rejected": -0.7376888394355774, "logps/chosen": -0.48455989360809326, "logps/rejected": -2.4854090213775635, "loss": 0.8636, "nll_loss": 0.6950738430023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.048455990850925446, "rewards/margins": 0.20008492469787598, "rewards/rejected": -0.24854090809822083, "step": 7827 }, { "epoch": 4.775354582888516, "grad_norm": 2.4902729988098145, "learning_rate": 3.5958358848744643e-07, "log_odds_chosen": 2.530486583709717, "log_odds_ratio": -0.4173271954059601, "logits/chosen": -0.8066023588180542, "logits/rejected": -0.891352653503418, "logps/chosen": -0.6936434507369995, "logps/rejected": -2.717130184173584, "loss": 1.0118, "nll_loss": 0.9491060972213745, "rewards/accuracies": 0.625, "rewards/chosen": -0.06936435401439667, "rewards/margins": 0.20234867930412292, "rewards/rejected": -0.2717130184173584, "step": 7828 }, { "epoch": 4.775964617965533, "grad_norm": 4.984894275665283, "learning_rate": 3.5860379669320266e-07, "log_odds_chosen": 3.1442627906799316, "log_odds_ratio": -0.22946381568908691, "logits/chosen": -0.5294504165649414, "logits/rejected": -0.8640426397323608, "logps/chosen": -0.7084469795227051, "logps/rejected": -3.3325605392456055, "loss": 1.0837, "nll_loss": 0.9348164796829224, "rewards/accuracies": 1.0, "rewards/chosen": -0.07084469497203827, "rewards/margins": 0.26241135597229004, "rewards/rejected": -0.3332560658454895, "step": 7829 }, { "epoch": 4.77657465304255, "grad_norm": 1.5880277156829834, "learning_rate": 3.5762400489895894e-07, "log_odds_chosen": 2.345186233520508, "log_odds_ratio": -0.41098377108573914, "logits/chosen": -0.49734893441200256, "logits/rejected": -0.5207247138023376, "logps/chosen": -0.6466148495674133, "logps/rejected": -2.4144368171691895, "loss": 0.9273, "nll_loss": 0.6510426998138428, "rewards/accuracies": 0.625, "rewards/chosen": -0.06466148793697357, "rewards/margins": 0.17678220570087433, "rewards/rejected": -0.2414436936378479, "step": 7830 }, { "epoch": 4.777184688119567, "grad_norm": 1.3432567119598389, "learning_rate": 3.566442131047152e-07, "log_odds_chosen": 2.400466203689575, "log_odds_ratio": -0.3219650387763977, "logits/chosen": -0.8897928595542908, "logits/rejected": -0.9716073870658875, "logps/chosen": -0.4923020005226135, "logps/rejected": -2.0193915367126465, "loss": 0.9969, "nll_loss": 0.8046590685844421, "rewards/accuracies": 0.875, "rewards/chosen": -0.04923020303249359, "rewards/margins": 0.1527089625597, "rewards/rejected": -0.2019391655921936, "step": 7831 }, { "epoch": 4.777794723196584, "grad_norm": 1.4150501489639282, "learning_rate": 3.5566442131047156e-07, "log_odds_chosen": 1.2911584377288818, "log_odds_ratio": -0.45145559310913086, "logits/chosen": -1.0139440298080444, "logits/rejected": -1.183012843132019, "logps/chosen": -0.6428227424621582, "logps/rejected": -1.521885871887207, "loss": 0.8909, "nll_loss": 0.9192912578582764, "rewards/accuracies": 0.75, "rewards/chosen": -0.06428226828575134, "rewards/margins": 0.08790631592273712, "rewards/rejected": -0.15218858420848846, "step": 7832 }, { "epoch": 4.7784047582736005, "grad_norm": 2.3604774475097656, "learning_rate": 3.546846295162278e-07, "log_odds_chosen": 0.4055632948875427, "log_odds_ratio": -0.7394749522209167, "logits/chosen": -0.9298282861709595, "logits/rejected": -0.9729495644569397, "logps/chosen": -0.803712010383606, "logps/rejected": -1.223609209060669, "loss": 1.0713, "nll_loss": 1.0625519752502441, "rewards/accuracies": 0.625, "rewards/chosen": -0.08037120848894119, "rewards/margins": 0.04198971018195152, "rewards/rejected": -0.12236091494560242, "step": 7833 }, { "epoch": 4.779014793350617, "grad_norm": 8.804251670837402, "learning_rate": 3.5370483772198406e-07, "log_odds_chosen": 1.8316447734832764, "log_odds_ratio": -0.4963497519493103, "logits/chosen": -0.8950481414794922, "logits/rejected": -0.9336246848106384, "logps/chosen": -0.8076415061950684, "logps/rejected": -2.265287399291992, "loss": 1.0522, "nll_loss": 0.8944284915924072, "rewards/accuracies": 0.75, "rewards/chosen": -0.08076415956020355, "rewards/margins": 0.14576460421085358, "rewards/rejected": -0.22652876377105713, "step": 7834 }, { "epoch": 4.779624828427634, "grad_norm": 5.424020767211914, "learning_rate": 3.5272504592774035e-07, "log_odds_chosen": 3.179523468017578, "log_odds_ratio": -0.24661493301391602, "logits/chosen": -0.8298149108886719, "logits/rejected": -0.9074900150299072, "logps/chosen": -0.6560846567153931, "logps/rejected": -3.272414445877075, "loss": 1.2013, "nll_loss": 0.804544985294342, "rewards/accuracies": 1.0, "rewards/chosen": -0.06560847163200378, "rewards/margins": 0.2616329789161682, "rewards/rejected": -0.327241450548172, "step": 7835 }, { "epoch": 4.780234863504652, "grad_norm": 1.7230846881866455, "learning_rate": 3.517452541334966e-07, "log_odds_chosen": 3.9207558631896973, "log_odds_ratio": -0.15448689460754395, "logits/chosen": -0.8243311643600464, "logits/rejected": -1.0056560039520264, "logps/chosen": -0.6519074440002441, "logps/rejected": -3.9551076889038086, "loss": 0.9522, "nll_loss": 0.9029433131217957, "rewards/accuracies": 1.0, "rewards/chosen": -0.06519074738025665, "rewards/margins": 0.3303200304508209, "rewards/rejected": -0.3955107629299164, "step": 7836 }, { "epoch": 4.780844898581669, "grad_norm": 1.8062942028045654, "learning_rate": 3.507654623392529e-07, "log_odds_chosen": 5.381793975830078, "log_odds_ratio": -0.11021168529987335, "logits/chosen": -0.6547217965126038, "logits/rejected": -1.0770235061645508, "logps/chosen": -0.621971845626831, "logps/rejected": -5.197327613830566, "loss": 1.0439, "nll_loss": 1.117642879486084, "rewards/accuracies": 1.0, "rewards/chosen": -0.06219719350337982, "rewards/margins": 0.45753559470176697, "rewards/rejected": -0.5197327733039856, "step": 7837 }, { "epoch": 4.781454933658686, "grad_norm": 2.1074135303497314, "learning_rate": 3.497856705450092e-07, "log_odds_chosen": 2.876999855041504, "log_odds_ratio": -0.3225814402103424, "logits/chosen": -0.850197434425354, "logits/rejected": -0.8346865773200989, "logps/chosen": -0.6230229139328003, "logps/rejected": -2.8206381797790527, "loss": 1.1503, "nll_loss": 1.1085867881774902, "rewards/accuracies": 0.875, "rewards/chosen": -0.06230229139328003, "rewards/margins": 0.21976152062416077, "rewards/rejected": -0.2820638418197632, "step": 7838 }, { "epoch": 4.782064968735702, "grad_norm": 1.178045392036438, "learning_rate": 3.4880587875076547e-07, "log_odds_chosen": 3.536240816116333, "log_odds_ratio": -0.5522453784942627, "logits/chosen": -0.7395403981208801, "logits/rejected": -0.9976712465286255, "logps/chosen": -0.7343444228172302, "logps/rejected": -3.9333138465881348, "loss": 1.1396, "nll_loss": 1.1350663900375366, "rewards/accuracies": 0.5, "rewards/chosen": -0.07343444228172302, "rewards/margins": 0.319896936416626, "rewards/rejected": -0.393331378698349, "step": 7839 }, { "epoch": 4.782675003812719, "grad_norm": 14.361274719238281, "learning_rate": 3.478260869565217e-07, "log_odds_chosen": 1.3761420249938965, "log_odds_ratio": -0.6082111597061157, "logits/chosen": -0.7426297068595886, "logits/rejected": -0.8956640362739563, "logps/chosen": -0.9985831379890442, "logps/rejected": -2.114823579788208, "loss": 0.9916, "nll_loss": 1.0203558206558228, "rewards/accuracies": 0.625, "rewards/chosen": -0.09985831379890442, "rewards/margins": 0.11162404716014862, "rewards/rejected": -0.21148236095905304, "step": 7840 }, { "epoch": 4.783285038889736, "grad_norm": 1.9992784261703491, "learning_rate": 3.46846295162278e-07, "log_odds_chosen": 1.9508248567581177, "log_odds_ratio": -0.40991857647895813, "logits/chosen": -0.9137316942214966, "logits/rejected": -1.0089157819747925, "logps/chosen": -0.899622917175293, "logps/rejected": -2.4600131511688232, "loss": 1.106, "nll_loss": 1.2646050453186035, "rewards/accuracies": 0.75, "rewards/chosen": -0.08996228873729706, "rewards/margins": 0.1560390293598175, "rewards/rejected": -0.24600133299827576, "step": 7841 }, { "epoch": 4.783895073966753, "grad_norm": 2.0110576152801514, "learning_rate": 3.458665033680343e-07, "log_odds_chosen": 2.1049230098724365, "log_odds_ratio": -0.5363956689834595, "logits/chosen": -0.8413011431694031, "logits/rejected": -0.9089833498001099, "logps/chosen": -1.0502159595489502, "logps/rejected": -2.6224617958068848, "loss": 1.1781, "nll_loss": 1.2637256383895874, "rewards/accuracies": 0.625, "rewards/chosen": -0.10502159595489502, "rewards/margins": 0.1572245955467224, "rewards/rejected": -0.26224619150161743, "step": 7842 }, { "epoch": 4.78450510904377, "grad_norm": 3.121548652648926, "learning_rate": 3.4488671157379054e-07, "log_odds_chosen": 1.8684808015823364, "log_odds_ratio": -0.4760397672653198, "logits/chosen": -0.9174672961235046, "logits/rejected": -1.024045467376709, "logps/chosen": -0.9969402551651001, "logps/rejected": -2.6546220779418945, "loss": 1.0933, "nll_loss": 1.1171832084655762, "rewards/accuracies": 0.625, "rewards/chosen": -0.09969402849674225, "rewards/margins": 0.16576817631721497, "rewards/rejected": -0.2654622197151184, "step": 7843 }, { "epoch": 4.785115144120787, "grad_norm": 1.3476837873458862, "learning_rate": 3.439069197795468e-07, "log_odds_chosen": 2.8581740856170654, "log_odds_ratio": -0.3924199640750885, "logits/chosen": -0.9351692199707031, "logits/rejected": -1.0218387842178345, "logps/chosen": -0.83039391040802, "logps/rejected": -3.1931533813476562, "loss": 0.9419, "nll_loss": 0.9640114307403564, "rewards/accuracies": 0.75, "rewards/chosen": -0.08303939551115036, "rewards/margins": 0.23627594113349915, "rewards/rejected": -0.3193153440952301, "step": 7844 }, { "epoch": 4.7857251791978035, "grad_norm": 1.4842228889465332, "learning_rate": 3.429271279853031e-07, "log_odds_chosen": 1.4548826217651367, "log_odds_ratio": -0.40250080823898315, "logits/chosen": -1.1598047018051147, "logits/rejected": -1.1966091394424438, "logps/chosen": -0.9892110824584961, "logps/rejected": -2.0884897708892822, "loss": 0.9745, "nll_loss": 1.184577465057373, "rewards/accuracies": 1.0, "rewards/chosen": -0.09892110526561737, "rewards/margins": 0.10992789268493652, "rewards/rejected": -0.2088489979505539, "step": 7845 }, { "epoch": 4.786335214274821, "grad_norm": 1.3534692525863647, "learning_rate": 3.419473361910594e-07, "log_odds_chosen": 0.9262201189994812, "log_odds_ratio": -0.451779305934906, "logits/chosen": -1.0053379535675049, "logits/rejected": -0.9737203121185303, "logps/chosen": -0.8242892026901245, "logps/rejected": -1.5040338039398193, "loss": 1.1493, "nll_loss": 1.0413976907730103, "rewards/accuracies": 0.875, "rewards/chosen": -0.08242891728878021, "rewards/margins": 0.06797446310520172, "rewards/rejected": -0.15040339529514313, "step": 7846 }, { "epoch": 4.786945249351838, "grad_norm": 1.46255624294281, "learning_rate": 3.4096754439681567e-07, "log_odds_chosen": 1.494154691696167, "log_odds_ratio": -0.381670206785202, "logits/chosen": -1.0053526163101196, "logits/rejected": -1.0250582695007324, "logps/chosen": -0.7612165212631226, "logps/rejected": -1.8193018436431885, "loss": 0.9404, "nll_loss": 0.947591245174408, "rewards/accuracies": 0.875, "rewards/chosen": -0.07612165063619614, "rewards/margins": 0.10580854117870331, "rewards/rejected": -0.18193018436431885, "step": 7847 }, { "epoch": 4.787555284428855, "grad_norm": 19.257259368896484, "learning_rate": 3.3998775260257195e-07, "log_odds_chosen": 3.233652353286743, "log_odds_ratio": -0.29707279801368713, "logits/chosen": -0.9293516278266907, "logits/rejected": -1.0985779762268066, "logps/chosen": -0.9019989967346191, "logps/rejected": -3.7361338138580322, "loss": 1.0775, "nll_loss": 1.3592697381973267, "rewards/accuracies": 0.75, "rewards/chosen": -0.09019989520311356, "rewards/margins": 0.28341349959373474, "rewards/rejected": -0.3736133873462677, "step": 7848 }, { "epoch": 4.788165319505872, "grad_norm": 1.90959894657135, "learning_rate": 3.3900796080832823e-07, "log_odds_chosen": 2.849043607711792, "log_odds_ratio": -0.24124298989772797, "logits/chosen": -0.8017975687980652, "logits/rejected": -1.0535701513290405, "logps/chosen": -0.6426230072975159, "logps/rejected": -2.8164920806884766, "loss": 0.9703, "nll_loss": 1.01542067527771, "rewards/accuracies": 0.875, "rewards/chosen": -0.06426229327917099, "rewards/margins": 0.21738693118095398, "rewards/rejected": -0.28164923191070557, "step": 7849 }, { "epoch": 4.788775354582889, "grad_norm": 15.080506324768066, "learning_rate": 3.380281690140845e-07, "log_odds_chosen": 1.4434103965759277, "log_odds_ratio": -0.3136841356754303, "logits/chosen": -0.883382260799408, "logits/rejected": -0.8880798816680908, "logps/chosen": -0.8362977504730225, "logps/rejected": -1.7660486698150635, "loss": 1.01, "nll_loss": 1.0612584352493286, "rewards/accuracies": 1.0, "rewards/chosen": -0.0836297795176506, "rewards/margins": 0.09297509491443634, "rewards/rejected": -0.17660486698150635, "step": 7850 }, { "epoch": 4.7893853896599055, "grad_norm": 1.547536849975586, "learning_rate": 3.3704837721984074e-07, "log_odds_chosen": 1.6638352870941162, "log_odds_ratio": -0.4026240110397339, "logits/chosen": -0.8329746723175049, "logits/rejected": -0.7784388065338135, "logps/chosen": -0.627526044845581, "logps/rejected": -1.922585129737854, "loss": 0.9349, "nll_loss": 0.905087947845459, "rewards/accuracies": 0.875, "rewards/chosen": -0.0627526044845581, "rewards/margins": 0.129505917429924, "rewards/rejected": -0.19225852191448212, "step": 7851 }, { "epoch": 4.789995424736922, "grad_norm": 7.803408622741699, "learning_rate": 3.3606858542559707e-07, "log_odds_chosen": 1.7560349702835083, "log_odds_ratio": -0.39864835143089294, "logits/chosen": -1.0901447534561157, "logits/rejected": -1.0777957439422607, "logps/chosen": -0.959905743598938, "logps/rejected": -2.4574689865112305, "loss": 1.1643, "nll_loss": 1.2689425945281982, "rewards/accuracies": 0.75, "rewards/chosen": -0.09599057585000992, "rewards/margins": 0.1497563123703003, "rewards/rejected": -0.24574688076972961, "step": 7852 }, { "epoch": 4.790605459813939, "grad_norm": 3.9416680335998535, "learning_rate": 3.3508879363135335e-07, "log_odds_chosen": 2.004067897796631, "log_odds_ratio": -0.4146724343299866, "logits/chosen": -0.8126698136329651, "logits/rejected": -1.0704047679901123, "logps/chosen": -0.9168864488601685, "logps/rejected": -2.4954288005828857, "loss": 1.1859, "nll_loss": 1.0695983171463013, "rewards/accuracies": 0.625, "rewards/chosen": -0.09168865531682968, "rewards/margins": 0.15785422921180725, "rewards/rejected": -0.24954287707805634, "step": 7853 }, { "epoch": 4.791215494890956, "grad_norm": 9.63090705871582, "learning_rate": 3.341090018371096e-07, "log_odds_chosen": 0.9698148369789124, "log_odds_ratio": -0.4582584500312805, "logits/chosen": -1.031091570854187, "logits/rejected": -0.9647879600524902, "logps/chosen": -0.9079836010932922, "logps/rejected": -1.600408911705017, "loss": 1.1284, "nll_loss": 1.155286431312561, "rewards/accuracies": 0.75, "rewards/chosen": -0.09079835563898087, "rewards/margins": 0.06924253702163696, "rewards/rejected": -0.16004088521003723, "step": 7854 }, { "epoch": 4.791825529967973, "grad_norm": 5.412054061889648, "learning_rate": 3.3312921004286586e-07, "log_odds_chosen": 2.3100812435150146, "log_odds_ratio": -0.36790406703948975, "logits/chosen": -0.6275089979171753, "logits/rejected": -0.6923753023147583, "logps/chosen": -0.809655487537384, "logps/rejected": -2.800790786743164, "loss": 0.9625, "nll_loss": 0.851760745048523, "rewards/accuracies": 0.75, "rewards/chosen": -0.0809655487537384, "rewards/margins": 0.19911354780197144, "rewards/rejected": -0.28007909655570984, "step": 7855 }, { "epoch": 4.79243556504499, "grad_norm": 2.963052988052368, "learning_rate": 3.3214941824862214e-07, "log_odds_chosen": 1.8041822910308838, "log_odds_ratio": -0.40683120489120483, "logits/chosen": -0.8878533244132996, "logits/rejected": -0.8975958228111267, "logps/chosen": -0.624954104423523, "logps/rejected": -1.9875514507293701, "loss": 0.9384, "nll_loss": 0.8140197992324829, "rewards/accuracies": 0.75, "rewards/chosen": -0.062495410442352295, "rewards/margins": 0.13625973463058472, "rewards/rejected": -0.198755145072937, "step": 7856 }, { "epoch": 4.7930456001220065, "grad_norm": 1.7810659408569336, "learning_rate": 3.311696264543784e-07, "log_odds_chosen": 0.9583214521408081, "log_odds_ratio": -0.4850844740867615, "logits/chosen": -0.9504619836807251, "logits/rejected": -1.0365723371505737, "logps/chosen": -0.748813271522522, "logps/rejected": -1.3611481189727783, "loss": 1.031, "nll_loss": 1.1200016736984253, "rewards/accuracies": 0.875, "rewards/chosen": -0.07488133013248444, "rewards/margins": 0.06123349443078041, "rewards/rejected": -0.13611482083797455, "step": 7857 }, { "epoch": 4.793655635199024, "grad_norm": 3.480384588241577, "learning_rate": 3.301898346601347e-07, "log_odds_chosen": 3.036480665206909, "log_odds_ratio": -0.6230989098548889, "logits/chosen": -0.80629563331604, "logits/rejected": -0.9261694550514221, "logps/chosen": -0.6626912355422974, "logps/rejected": -3.319427967071533, "loss": 1.0634, "nll_loss": 0.9089398384094238, "rewards/accuracies": 0.5, "rewards/chosen": -0.06626912951469421, "rewards/margins": 0.2656736373901367, "rewards/rejected": -0.3319427967071533, "step": 7858 }, { "epoch": 4.794265670276041, "grad_norm": 3.0627353191375732, "learning_rate": 3.29210042865891e-07, "log_odds_chosen": 2.7068252563476562, "log_odds_ratio": -0.22864030301570892, "logits/chosen": -0.7069565057754517, "logits/rejected": -0.8916858434677124, "logps/chosen": -0.6062237620353699, "logps/rejected": -2.714751720428467, "loss": 1.0506, "nll_loss": 0.8799764513969421, "rewards/accuracies": 0.875, "rewards/chosen": -0.060622379183769226, "rewards/margins": 0.21085280179977417, "rewards/rejected": -0.2714751958847046, "step": 7859 }, { "epoch": 4.794875705353058, "grad_norm": 1.2719669342041016, "learning_rate": 3.2823025107164727e-07, "log_odds_chosen": 2.558976411819458, "log_odds_ratio": -0.24287010729312897, "logits/chosen": -0.9456921815872192, "logits/rejected": -1.1267508268356323, "logps/chosen": -0.7533129453659058, "logps/rejected": -2.603109359741211, "loss": 1.0417, "nll_loss": 0.9949731230735779, "rewards/accuracies": 1.0, "rewards/chosen": -0.07533128559589386, "rewards/margins": 0.1849796175956726, "rewards/rejected": -0.26031091809272766, "step": 7860 }, { "epoch": 4.795485740430075, "grad_norm": 1.7599761486053467, "learning_rate": 3.2725045927740355e-07, "log_odds_chosen": 1.8345115184783936, "log_odds_ratio": -0.5509284138679504, "logits/chosen": -0.8293633460998535, "logits/rejected": -0.8851048946380615, "logps/chosen": -0.8878459334373474, "logps/rejected": -2.1783225536346436, "loss": 0.9997, "nll_loss": 1.1087052822113037, "rewards/accuracies": 0.5, "rewards/chosen": -0.0887845978140831, "rewards/margins": 0.12904766201972961, "rewards/rejected": -0.21783223748207092, "step": 7861 }, { "epoch": 4.796095775507092, "grad_norm": 7.942108631134033, "learning_rate": 3.262706674831598e-07, "log_odds_chosen": 1.7943413257598877, "log_odds_ratio": -0.2949618697166443, "logits/chosen": -0.6846423149108887, "logits/rejected": -0.8026115894317627, "logps/chosen": -0.8214000463485718, "logps/rejected": -2.1443235874176025, "loss": 0.9927, "nll_loss": 0.7952475547790527, "rewards/accuracies": 1.0, "rewards/chosen": -0.0821400061249733, "rewards/margins": 0.13229236006736755, "rewards/rejected": -0.21443238854408264, "step": 7862 }, { "epoch": 4.7967058105841085, "grad_norm": 5.89584493637085, "learning_rate": 3.252908756889161e-07, "log_odds_chosen": 1.6501893997192383, "log_odds_ratio": -0.359548419713974, "logits/chosen": -0.9261161088943481, "logits/rejected": -1.040099859237671, "logps/chosen": -0.7000256776809692, "logps/rejected": -1.8323187828063965, "loss": 1.0039, "nll_loss": 0.8181713223457336, "rewards/accuracies": 0.75, "rewards/chosen": -0.07000256329774857, "rewards/margins": 0.11322931945323944, "rewards/rejected": -0.1832318753004074, "step": 7863 }, { "epoch": 4.797315845661125, "grad_norm": 1.634752631187439, "learning_rate": 3.243110838946724e-07, "log_odds_chosen": 1.9834041595458984, "log_odds_ratio": -0.5040145516395569, "logits/chosen": -1.0259921550750732, "logits/rejected": -1.1559675931930542, "logps/chosen": -0.968619704246521, "logps/rejected": -2.7662951946258545, "loss": 1.1026, "nll_loss": 1.1190507411956787, "rewards/accuracies": 0.625, "rewards/chosen": -0.09686196595430374, "rewards/margins": 0.17976754903793335, "rewards/rejected": -0.2766295075416565, "step": 7864 }, { "epoch": 4.797925880738142, "grad_norm": 2.2247564792633057, "learning_rate": 3.233312921004286e-07, "log_odds_chosen": 1.8071990013122559, "log_odds_ratio": -0.3168221116065979, "logits/chosen": -0.747826099395752, "logits/rejected": -0.8059276342391968, "logps/chosen": -0.620254397392273, "logps/rejected": -1.7913398742675781, "loss": 0.8828, "nll_loss": 0.7548825740814209, "rewards/accuracies": 0.875, "rewards/chosen": -0.062025442719459534, "rewards/margins": 0.1171085312962532, "rewards/rejected": -0.17913398146629333, "step": 7865 }, { "epoch": 4.798535915815159, "grad_norm": 2.2932991981506348, "learning_rate": 3.223515003061849e-07, "log_odds_chosen": 1.7397822141647339, "log_odds_ratio": -0.3695942759513855, "logits/chosen": -0.7664707899093628, "logits/rejected": -0.9458461403846741, "logps/chosen": -0.6508455276489258, "logps/rejected": -1.832144021987915, "loss": 1.0029, "nll_loss": 0.7850238084793091, "rewards/accuracies": 0.75, "rewards/chosen": -0.0650845542550087, "rewards/margins": 0.11812985688447952, "rewards/rejected": -0.18321441113948822, "step": 7866 }, { "epoch": 4.799145950892177, "grad_norm": 1.3767662048339844, "learning_rate": 3.213717085119412e-07, "log_odds_chosen": 2.1418726444244385, "log_odds_ratio": -0.44055283069610596, "logits/chosen": -1.0186904668807983, "logits/rejected": -1.009955883026123, "logps/chosen": -0.9633324146270752, "logps/rejected": -2.8255040645599365, "loss": 1.144, "nll_loss": 1.2057163715362549, "rewards/accuracies": 0.75, "rewards/chosen": -0.09633324295282364, "rewards/margins": 0.18621717393398285, "rewards/rejected": -0.2825504243373871, "step": 7867 }, { "epoch": 4.799755985969194, "grad_norm": 9.656219482421875, "learning_rate": 3.203919167176975e-07, "log_odds_chosen": 1.8796228170394897, "log_odds_ratio": -0.38766613602638245, "logits/chosen": -0.6807202100753784, "logits/rejected": -0.6858780980110168, "logps/chosen": -0.6268057227134705, "logps/rejected": -2.0500240325927734, "loss": 0.9699, "nll_loss": 0.8304311037063599, "rewards/accuracies": 0.75, "rewards/chosen": -0.06268057227134705, "rewards/margins": 0.1423218548297882, "rewards/rejected": -0.20500242710113525, "step": 7868 }, { "epoch": 4.8003660210462105, "grad_norm": 1.532823085784912, "learning_rate": 3.1941212492345374e-07, "log_odds_chosen": 1.3759889602661133, "log_odds_ratio": -0.34503039717674255, "logits/chosen": -0.6367950439453125, "logits/rejected": -0.6931172609329224, "logps/chosen": -0.6496557593345642, "logps/rejected": -1.3304684162139893, "loss": 1.0496, "nll_loss": 0.9523598551750183, "rewards/accuracies": 1.0, "rewards/chosen": -0.06496557593345642, "rewards/margins": 0.06808126717805862, "rewards/rejected": -0.13304685056209564, "step": 7869 }, { "epoch": 4.800976056123227, "grad_norm": 2.1570680141448975, "learning_rate": 3.1843233312921e-07, "log_odds_chosen": 1.383302927017212, "log_odds_ratio": -0.39139047265052795, "logits/chosen": -1.0555124282836914, "logits/rejected": -1.1157233715057373, "logps/chosen": -0.9653981924057007, "logps/rejected": -1.9998494386672974, "loss": 1.2508, "nll_loss": 1.080322265625, "rewards/accuracies": 0.875, "rewards/chosen": -0.09653982520103455, "rewards/margins": 0.1034451350569725, "rewards/rejected": -0.19998495280742645, "step": 7870 }, { "epoch": 4.801586091200244, "grad_norm": 6.49107551574707, "learning_rate": 3.174525413349663e-07, "log_odds_chosen": 3.5255579948425293, "log_odds_ratio": -0.2786805331707001, "logits/chosen": -1.1043373346328735, "logits/rejected": -1.1746832132339478, "logps/chosen": -0.7076209187507629, "logps/rejected": -3.6015114784240723, "loss": 1.1341, "nll_loss": 1.2281086444854736, "rewards/accuracies": 0.875, "rewards/chosen": -0.07076209038496017, "rewards/margins": 0.28938907384872437, "rewards/rejected": -0.36015117168426514, "step": 7871 }, { "epoch": 4.802196126277261, "grad_norm": 4.329881191253662, "learning_rate": 3.164727495407226e-07, "log_odds_chosen": 2.3224968910217285, "log_odds_ratio": -0.2965409457683563, "logits/chosen": -0.8835288286209106, "logits/rejected": -1.0795915126800537, "logps/chosen": -0.8786717057228088, "logps/rejected": -2.7087440490722656, "loss": 1.1324, "nll_loss": 0.9684673547744751, "rewards/accuracies": 0.875, "rewards/chosen": -0.08786717057228088, "rewards/margins": 0.18300724029541016, "rewards/rejected": -0.27087441086769104, "step": 7872 }, { "epoch": 4.802806161354278, "grad_norm": 1.0783172845840454, "learning_rate": 3.1549295774647887e-07, "log_odds_chosen": 1.6837623119354248, "log_odds_ratio": -0.4882212281227112, "logits/chosen": -1.0508683919906616, "logits/rejected": -1.0344104766845703, "logps/chosen": -0.9388848543167114, "logps/rejected": -2.3163959980010986, "loss": 1.0667, "nll_loss": 1.1308120489120483, "rewards/accuracies": 0.75, "rewards/chosen": -0.09388849139213562, "rewards/margins": 0.13775110244750977, "rewards/rejected": -0.23163959383964539, "step": 7873 }, { "epoch": 4.803416196431295, "grad_norm": 1.4916855096817017, "learning_rate": 3.1451316595223515e-07, "log_odds_chosen": 1.5537563562393188, "log_odds_ratio": -0.40262120962142944, "logits/chosen": -1.0299561023712158, "logits/rejected": -1.1069176197052002, "logps/chosen": -0.8640443682670593, "logps/rejected": -2.219393730163574, "loss": 1.0741, "nll_loss": 0.9596467018127441, "rewards/accuracies": 0.75, "rewards/chosen": -0.08640443533658981, "rewards/margins": 0.13553494215011597, "rewards/rejected": -0.22193938493728638, "step": 7874 }, { "epoch": 4.8040262315083115, "grad_norm": 5.709155082702637, "learning_rate": 3.1353337415799143e-07, "log_odds_chosen": 1.3322361707687378, "log_odds_ratio": -0.5334955453872681, "logits/chosen": -0.9621407389640808, "logits/rejected": -0.996030330657959, "logps/chosen": -0.8395859003067017, "logps/rejected": -1.7971090078353882, "loss": 1.1025, "nll_loss": 1.0633150339126587, "rewards/accuracies": 0.625, "rewards/chosen": -0.08395858854055405, "rewards/margins": 0.0957523062825203, "rewards/rejected": -0.17971090972423553, "step": 7875 }, { "epoch": 4.804636266585328, "grad_norm": 2.0382254123687744, "learning_rate": 3.1255358236374766e-07, "log_odds_chosen": 3.9751739501953125, "log_odds_ratio": -0.3129490613937378, "logits/chosen": -0.6662594079971313, "logits/rejected": -0.847722053527832, "logps/chosen": -0.6087813973426819, "logps/rejected": -3.792738437652588, "loss": 1.0291, "nll_loss": 0.8352633714675903, "rewards/accuracies": 0.75, "rewards/chosen": -0.06087813898921013, "rewards/margins": 0.3183957040309906, "rewards/rejected": -0.37927383184432983, "step": 7876 }, { "epoch": 4.805246301662345, "grad_norm": 8.55721378326416, "learning_rate": 3.1157379056950394e-07, "log_odds_chosen": 1.3597368001937866, "log_odds_ratio": -0.3667983412742615, "logits/chosen": -1.0163590908050537, "logits/rejected": -1.013052225112915, "logps/chosen": -0.6156730651855469, "logps/rejected": -1.3607354164123535, "loss": 1.141, "nll_loss": 1.1091701984405518, "rewards/accuracies": 0.75, "rewards/chosen": -0.061567310243844986, "rewards/margins": 0.0745062381029129, "rewards/rejected": -0.1360735446214676, "step": 7877 }, { "epoch": 4.805856336739362, "grad_norm": 2.0652928352355957, "learning_rate": 3.1059399877526027e-07, "log_odds_chosen": 3.8053455352783203, "log_odds_ratio": -0.23338583111763, "logits/chosen": -0.7780948877334595, "logits/rejected": -1.1191972494125366, "logps/chosen": -0.5348563194274902, "logps/rejected": -3.5379066467285156, "loss": 0.9273, "nll_loss": 0.8267016410827637, "rewards/accuracies": 0.875, "rewards/chosen": -0.05348563194274902, "rewards/margins": 0.30030500888824463, "rewards/rejected": -0.35379064083099365, "step": 7878 }, { "epoch": 4.80646637181638, "grad_norm": 1.8496224880218506, "learning_rate": 3.0961420698101655e-07, "log_odds_chosen": 0.8467026948928833, "log_odds_ratio": -0.4800722897052765, "logits/chosen": -0.8131642937660217, "logits/rejected": -0.86491858959198, "logps/chosen": -0.7225841879844666, "logps/rejected": -1.3074358701705933, "loss": 1.1389, "nll_loss": 1.1362963914871216, "rewards/accuracies": 0.75, "rewards/chosen": -0.07225842773914337, "rewards/margins": 0.05848515406250954, "rewards/rejected": -0.1307435780763626, "step": 7879 }, { "epoch": 4.807076406893397, "grad_norm": 2.177765369415283, "learning_rate": 3.086344151867728e-07, "log_odds_chosen": 2.6371419429779053, "log_odds_ratio": -0.3081701397895813, "logits/chosen": -0.9866029024124146, "logits/rejected": -1.05158269405365, "logps/chosen": -0.5930213332176208, "logps/rejected": -2.4415876865386963, "loss": 0.994, "nll_loss": 0.9844029545783997, "rewards/accuracies": 0.875, "rewards/chosen": -0.059302136301994324, "rewards/margins": 0.1848566234111786, "rewards/rejected": -0.2441587597131729, "step": 7880 }, { "epoch": 4.8076864419704135, "grad_norm": 1.5690851211547852, "learning_rate": 3.0765462339252906e-07, "log_odds_chosen": 2.1965930461883545, "log_odds_ratio": -0.43459466099739075, "logits/chosen": -0.8349239826202393, "logits/rejected": -0.8301926851272583, "logps/chosen": -0.7929153442382812, "logps/rejected": -2.52406644821167, "loss": 0.9815, "nll_loss": 0.8209920525550842, "rewards/accuracies": 0.75, "rewards/chosen": -0.07929153740406036, "rewards/margins": 0.1731151044368744, "rewards/rejected": -0.25240662693977356, "step": 7881 }, { "epoch": 4.80829647704743, "grad_norm": 1.7239227294921875, "learning_rate": 3.0667483159828534e-07, "log_odds_chosen": 1.7012003660202026, "log_odds_ratio": -0.35163554549217224, "logits/chosen": -0.7887946963310242, "logits/rejected": -0.7604840397834778, "logps/chosen": -0.5984451174736023, "logps/rejected": -1.5840675830841064, "loss": 0.9397, "nll_loss": 0.9538804292678833, "rewards/accuracies": 1.0, "rewards/chosen": -0.05984451249241829, "rewards/margins": 0.09856224060058594, "rewards/rejected": -0.15840676426887512, "step": 7882 }, { "epoch": 4.808906512124447, "grad_norm": 1.035483956336975, "learning_rate": 3.056950398040417e-07, "log_odds_chosen": 0.5913532972335815, "log_odds_ratio": -0.759820818901062, "logits/chosen": -0.9283720850944519, "logits/rejected": -0.9526196718215942, "logps/chosen": -0.9808148741722107, "logps/rejected": -1.4622236490249634, "loss": 1.093, "nll_loss": 1.2853434085845947, "rewards/accuracies": 0.5, "rewards/chosen": -0.09808149188756943, "rewards/margins": 0.04814087226986885, "rewards/rejected": -0.14622236788272858, "step": 7883 }, { "epoch": 4.809516547201464, "grad_norm": 1.4941986799240112, "learning_rate": 3.047152480097979e-07, "log_odds_chosen": 2.1158785820007324, "log_odds_ratio": -0.3699822425842285, "logits/chosen": -1.0092761516571045, "logits/rejected": -1.0099457502365112, "logps/chosen": -1.103524088859558, "logps/rejected": -2.8123128414154053, "loss": 1.2792, "nll_loss": 1.1785285472869873, "rewards/accuracies": 0.75, "rewards/chosen": -0.11035240441560745, "rewards/margins": 0.17087887227535248, "rewards/rejected": -0.2812313139438629, "step": 7884 }, { "epoch": 4.810126582278481, "grad_norm": 1.7484126091003418, "learning_rate": 3.037354562155542e-07, "log_odds_chosen": 0.9595217108726501, "log_odds_ratio": -0.5405587553977966, "logits/chosen": -0.8216373920440674, "logits/rejected": -0.9450690150260925, "logps/chosen": -0.8723585605621338, "logps/rejected": -1.5729618072509766, "loss": 1.0218, "nll_loss": 1.0553873777389526, "rewards/accuracies": 0.625, "rewards/chosen": -0.08723586052656174, "rewards/margins": 0.07006032019853592, "rewards/rejected": -0.15729618072509766, "step": 7885 }, { "epoch": 4.810736617355498, "grad_norm": 1.7437448501586914, "learning_rate": 3.0275566442131047e-07, "log_odds_chosen": 1.6956264972686768, "log_odds_ratio": -0.4267302453517914, "logits/chosen": -1.0226198434829712, "logits/rejected": -1.1581213474273682, "logps/chosen": -0.7512267231941223, "logps/rejected": -2.095902919769287, "loss": 1.0706, "nll_loss": 0.9900033473968506, "rewards/accuracies": 0.75, "rewards/chosen": -0.07512266933917999, "rewards/margins": 0.13446760177612305, "rewards/rejected": -0.20959028601646423, "step": 7886 }, { "epoch": 4.811346652432515, "grad_norm": 1.3042019605636597, "learning_rate": 3.017758726270667e-07, "log_odds_chosen": 1.8051776885986328, "log_odds_ratio": -0.37168437242507935, "logits/chosen": -0.8208632469177246, "logits/rejected": -1.1037046909332275, "logps/chosen": -0.7918375730514526, "logps/rejected": -2.0621914863586426, "loss": 0.8647, "nll_loss": 0.8442831635475159, "rewards/accuracies": 0.875, "rewards/chosen": -0.07918375730514526, "rewards/margins": 0.12703537940979004, "rewards/rejected": -0.2062191516160965, "step": 7887 }, { "epoch": 4.811956687509531, "grad_norm": 1.418228030204773, "learning_rate": 3.00796080832823e-07, "log_odds_chosen": 3.455514430999756, "log_odds_ratio": -0.07684513926506042, "logits/chosen": -0.6811411380767822, "logits/rejected": -1.088945984840393, "logps/chosen": -0.8093795776367188, "logps/rejected": -3.6393375396728516, "loss": 0.9106, "nll_loss": 0.9690839648246765, "rewards/accuracies": 1.0, "rewards/chosen": -0.080937959253788, "rewards/margins": 0.2829957902431488, "rewards/rejected": -0.3639337420463562, "step": 7888 }, { "epoch": 4.812566722586549, "grad_norm": 6.604912757873535, "learning_rate": 2.998162890385793e-07, "log_odds_chosen": 1.0795320272445679, "log_odds_ratio": -0.605974555015564, "logits/chosen": -0.8727742433547974, "logits/rejected": -0.9794958829879761, "logps/chosen": -1.0010422468185425, "logps/rejected": -1.9690907001495361, "loss": 1.1434, "nll_loss": 1.097733974456787, "rewards/accuracies": 0.625, "rewards/chosen": -0.10010422766208649, "rewards/margins": 0.09680484235286713, "rewards/rejected": -0.1969090700149536, "step": 7889 }, { "epoch": 4.813176757663566, "grad_norm": 1.110831379890442, "learning_rate": 2.988364972443356e-07, "log_odds_chosen": 1.1776801347732544, "log_odds_ratio": -0.47208237648010254, "logits/chosen": -1.145747184753418, "logits/rejected": -1.1156158447265625, "logps/chosen": -0.8460548520088196, "logps/rejected": -1.4856300354003906, "loss": 1.1976, "nll_loss": 1.1878663301467896, "rewards/accuracies": 0.625, "rewards/chosen": -0.08460549265146255, "rewards/margins": 0.06395752727985382, "rewards/rejected": -0.14856301248073578, "step": 7890 }, { "epoch": 4.813786792740583, "grad_norm": 32.065223693847656, "learning_rate": 2.978567054500918e-07, "log_odds_chosen": 1.9489357471466064, "log_odds_ratio": -0.5475072264671326, "logits/chosen": -0.9572275876998901, "logits/rejected": -0.9550182223320007, "logps/chosen": -0.8023346662521362, "logps/rejected": -2.580106258392334, "loss": 1.0773, "nll_loss": 1.1518104076385498, "rewards/accuracies": 0.625, "rewards/chosen": -0.08023347705602646, "rewards/margins": 0.17777715623378754, "rewards/rejected": -0.2580106258392334, "step": 7891 }, { "epoch": 4.8143968278176, "grad_norm": 15.175440788269043, "learning_rate": 2.968769136558481e-07, "log_odds_chosen": 1.217010736465454, "log_odds_ratio": -0.5148453712463379, "logits/chosen": -1.043811321258545, "logits/rejected": -1.0854706764221191, "logps/chosen": -0.8415734171867371, "logps/rejected": -1.8329198360443115, "loss": 0.989, "nll_loss": 1.0117355585098267, "rewards/accuracies": 0.75, "rewards/chosen": -0.08415733277797699, "rewards/margins": 0.0991346538066864, "rewards/rejected": -0.1832919716835022, "step": 7892 }, { "epoch": 4.8150068628946165, "grad_norm": 1.283233880996704, "learning_rate": 2.958971218616044e-07, "log_odds_chosen": 1.8841570615768433, "log_odds_ratio": -0.4727388620376587, "logits/chosen": -0.8123300671577454, "logits/rejected": -0.9852325320243835, "logps/chosen": -0.8959458470344543, "logps/rejected": -2.479884386062622, "loss": 1.0637, "nll_loss": 1.0861105918884277, "rewards/accuracies": 0.875, "rewards/chosen": -0.08959458768367767, "rewards/margins": 0.15839387476444244, "rewards/rejected": -0.24798844754695892, "step": 7893 }, { "epoch": 4.815616897971633, "grad_norm": 1.4580529928207397, "learning_rate": 2.949173300673607e-07, "log_odds_chosen": 2.2015137672424316, "log_odds_ratio": -0.19404467940330505, "logits/chosen": -0.7218195199966431, "logits/rejected": -0.8999873995780945, "logps/chosen": -0.6819429397583008, "logps/rejected": -2.2126126289367676, "loss": 0.9393, "nll_loss": 0.7961496710777283, "rewards/accuracies": 1.0, "rewards/chosen": -0.06819429993629456, "rewards/margins": 0.1530669629573822, "rewards/rejected": -0.22126126289367676, "step": 7894 }, { "epoch": 4.81622693304865, "grad_norm": 1.9249613285064697, "learning_rate": 2.9393753827311695e-07, "log_odds_chosen": 1.1496516466140747, "log_odds_ratio": -0.48178133368492126, "logits/chosen": -0.8516787886619568, "logits/rejected": -0.9025305509567261, "logps/chosen": -0.6866810321807861, "logps/rejected": -1.4143779277801514, "loss": 1.0413, "nll_loss": 1.0512850284576416, "rewards/accuracies": 0.75, "rewards/chosen": -0.06866810470819473, "rewards/margins": 0.07276969403028488, "rewards/rejected": -0.14143779873847961, "step": 7895 }, { "epoch": 4.816836968125667, "grad_norm": 1.6483653783798218, "learning_rate": 2.9295774647887323e-07, "log_odds_chosen": 2.2323381900787354, "log_odds_ratio": -0.2571232318878174, "logits/chosen": -0.5922989845275879, "logits/rejected": -0.951804518699646, "logps/chosen": -0.7755753993988037, "logps/rejected": -2.4384608268737793, "loss": 0.911, "nll_loss": 0.9099549055099487, "rewards/accuracies": 1.0, "rewards/chosen": -0.07755754142999649, "rewards/margins": 0.16628853976726532, "rewards/rejected": -0.2438460737466812, "step": 7896 }, { "epoch": 4.817447003202684, "grad_norm": 1.3064444065093994, "learning_rate": 2.919779546846295e-07, "log_odds_chosen": 0.6524831652641296, "log_odds_ratio": -0.5208531618118286, "logits/chosen": -0.9479035139083862, "logits/rejected": -0.920330286026001, "logps/chosen": -0.9415003061294556, "logps/rejected": -1.4561859369277954, "loss": 0.9809, "nll_loss": 1.0407989025115967, "rewards/accuracies": 0.75, "rewards/chosen": -0.09415003657341003, "rewards/margins": 0.05146855488419533, "rewards/rejected": -0.14561858773231506, "step": 7897 }, { "epoch": 4.818057038279701, "grad_norm": 1.342928171157837, "learning_rate": 2.9099816289038574e-07, "log_odds_chosen": 0.5117071270942688, "log_odds_ratio": -0.5108317732810974, "logits/chosen": -0.9297058582305908, "logits/rejected": -0.9205564260482788, "logps/chosen": -0.8261842727661133, "logps/rejected": -1.174255132675171, "loss": 1.0307, "nll_loss": 1.0829250812530518, "rewards/accuracies": 0.875, "rewards/chosen": -0.08261843025684357, "rewards/margins": 0.034807078540325165, "rewards/rejected": -0.11742550879716873, "step": 7898 }, { "epoch": 4.818667073356718, "grad_norm": 5.02573299407959, "learning_rate": 2.9001837109614207e-07, "log_odds_chosen": 2.1202023029327393, "log_odds_ratio": -0.3882141709327698, "logits/chosen": -0.6490520238876343, "logits/rejected": -0.9178465604782104, "logps/chosen": -0.5592355132102966, "logps/rejected": -2.119565963745117, "loss": 1.0973, "nll_loss": 0.7377796173095703, "rewards/accuracies": 0.75, "rewards/chosen": -0.055923547595739365, "rewards/margins": 0.15603305399417877, "rewards/rejected": -0.21195660531520844, "step": 7899 }, { "epoch": 4.8192771084337345, "grad_norm": 1.3994554281234741, "learning_rate": 2.8903857930189835e-07, "log_odds_chosen": 2.9396448135375977, "log_odds_ratio": -0.33461669087409973, "logits/chosen": -1.0034618377685547, "logits/rejected": -1.025877833366394, "logps/chosen": -0.6724040508270264, "logps/rejected": -3.100459575653076, "loss": 1.0833, "nll_loss": 0.942588210105896, "rewards/accuracies": 0.75, "rewards/chosen": -0.067240409553051, "rewards/margins": 0.2428055703639984, "rewards/rejected": -0.3100459575653076, "step": 7900 }, { "epoch": 4.819887143510752, "grad_norm": 2.4672415256500244, "learning_rate": 2.8805878750765463e-07, "log_odds_chosen": 1.511033296585083, "log_odds_ratio": -0.630131721496582, "logits/chosen": -0.9880504608154297, "logits/rejected": -1.0781748294830322, "logps/chosen": -0.8024555444717407, "logps/rejected": -2.0940093994140625, "loss": 1.3395, "nll_loss": 1.1785101890563965, "rewards/accuracies": 0.5, "rewards/chosen": -0.08024555444717407, "rewards/margins": 0.12915539741516113, "rewards/rejected": -0.2094009518623352, "step": 7901 }, { "epoch": 4.820497178587769, "grad_norm": 4.126418590545654, "learning_rate": 2.8707899571341086e-07, "log_odds_chosen": 2.1126549243927, "log_odds_ratio": -0.35718220472335815, "logits/chosen": -0.8385385274887085, "logits/rejected": -0.8143864274024963, "logps/chosen": -0.8848116993904114, "logps/rejected": -2.581247091293335, "loss": 0.9553, "nll_loss": 1.026920199394226, "rewards/accuracies": 0.75, "rewards/chosen": -0.08848118036985397, "rewards/margins": 0.16964353621006012, "rewards/rejected": -0.2581247091293335, "step": 7902 }, { "epoch": 4.821107213664786, "grad_norm": 11.203023910522461, "learning_rate": 2.8609920391916714e-07, "log_odds_chosen": 4.47853422164917, "log_odds_ratio": -0.16436895728111267, "logits/chosen": -0.944085955619812, "logits/rejected": -1.1056430339813232, "logps/chosen": -0.6298196315765381, "logps/rejected": -4.4285688400268555, "loss": 0.9607, "nll_loss": 0.9155405759811401, "rewards/accuracies": 1.0, "rewards/chosen": -0.06298196315765381, "rewards/margins": 0.37987494468688965, "rewards/rejected": -0.44285687804222107, "step": 7903 }, { "epoch": 4.821717248741803, "grad_norm": 1.5251914262771606, "learning_rate": 2.851194121249235e-07, "log_odds_chosen": 3.2326648235321045, "log_odds_ratio": -0.22331178188323975, "logits/chosen": -0.9614961743354797, "logits/rejected": -1.0453273057937622, "logps/chosen": -0.7019799947738647, "logps/rejected": -3.225904941558838, "loss": 1.1676, "nll_loss": 0.9726635813713074, "rewards/accuracies": 0.875, "rewards/chosen": -0.07019799947738647, "rewards/margins": 0.2523925006389618, "rewards/rejected": -0.3225904703140259, "step": 7904 }, { "epoch": 4.82232728381882, "grad_norm": 5.376049518585205, "learning_rate": 2.841396203306797e-07, "log_odds_chosen": 2.438201904296875, "log_odds_ratio": -0.2865629196166992, "logits/chosen": -0.9239728450775146, "logits/rejected": -1.0139143466949463, "logps/chosen": -0.8562353849411011, "logps/rejected": -2.8370614051818848, "loss": 0.9847, "nll_loss": 1.018318772315979, "rewards/accuracies": 0.75, "rewards/chosen": -0.08562353998422623, "rewards/margins": 0.19808261096477509, "rewards/rejected": -0.2837061583995819, "step": 7905 }, { "epoch": 4.822937318895836, "grad_norm": 1.4518365859985352, "learning_rate": 2.83159828536436e-07, "log_odds_chosen": 1.1708769798278809, "log_odds_ratio": -0.4687308371067047, "logits/chosen": -1.0424872636795044, "logits/rejected": -1.085559368133545, "logps/chosen": -0.9099528789520264, "logps/rejected": -1.8308460712432861, "loss": 1.059, "nll_loss": 1.153283953666687, "rewards/accuracies": 0.625, "rewards/chosen": -0.09099528193473816, "rewards/margins": 0.09208932518959045, "rewards/rejected": -0.1830846071243286, "step": 7906 }, { "epoch": 4.823547353972853, "grad_norm": 1.364591121673584, "learning_rate": 2.8218003674219227e-07, "log_odds_chosen": 1.140274167060852, "log_odds_ratio": -0.8092710971832275, "logits/chosen": -1.1981006860733032, "logits/rejected": -1.1766819953918457, "logps/chosen": -1.206004023551941, "logps/rejected": -2.336106538772583, "loss": 1.1844, "nll_loss": 1.2370039224624634, "rewards/accuracies": 0.25, "rewards/chosen": -0.12060040235519409, "rewards/margins": 0.11301025003194809, "rewards/rejected": -0.23361065983772278, "step": 7907 }, { "epoch": 4.82415738904987, "grad_norm": 1.454297661781311, "learning_rate": 2.8120024494794855e-07, "log_odds_chosen": 3.6504600048065186, "log_odds_ratio": -0.2582533657550812, "logits/chosen": -0.7588744163513184, "logits/rejected": -1.0208346843719482, "logps/chosen": -0.5382962822914124, "logps/rejected": -3.311098098754883, "loss": 1.039, "nll_loss": 0.6752393245697021, "rewards/accuracies": 0.875, "rewards/chosen": -0.05382963642477989, "rewards/margins": 0.27728015184402466, "rewards/rejected": -0.33110979199409485, "step": 7908 }, { "epoch": 4.824767424126887, "grad_norm": 1.3881971836090088, "learning_rate": 2.8022045315370483e-07, "log_odds_chosen": 2.4011566638946533, "log_odds_ratio": -0.4224725365638733, "logits/chosen": -0.9375446438789368, "logits/rejected": -1.04094660282135, "logps/chosen": -0.6614114046096802, "logps/rejected": -2.5827202796936035, "loss": 0.952, "nll_loss": 0.9113776683807373, "rewards/accuracies": 0.75, "rewards/chosen": -0.06614114344120026, "rewards/margins": 0.19213086366653442, "rewards/rejected": -0.2582720220088959, "step": 7909 }, { "epoch": 4.825377459203905, "grad_norm": 2.3412539958953857, "learning_rate": 2.792406613594611e-07, "log_odds_chosen": 1.6332812309265137, "log_odds_ratio": -0.3568113446235657, "logits/chosen": -0.9109631776809692, "logits/rejected": -1.022475004196167, "logps/chosen": -0.8422374129295349, "logps/rejected": -2.061671733856201, "loss": 1.0766, "nll_loss": 0.9696860313415527, "rewards/accuracies": 0.75, "rewards/chosen": -0.08422373980283737, "rewards/margins": 0.12194344401359558, "rewards/rejected": -0.20616717636585236, "step": 7910 }, { "epoch": 4.8259874942809216, "grad_norm": 2.732771873474121, "learning_rate": 2.782608695652174e-07, "log_odds_chosen": 0.4646233022212982, "log_odds_ratio": -0.5610065460205078, "logits/chosen": -1.0299630165100098, "logits/rejected": -0.9073070287704468, "logps/chosen": -0.9382151961326599, "logps/rejected": -1.2164939641952515, "loss": 1.0893, "nll_loss": 1.3181108236312866, "rewards/accuracies": 0.625, "rewards/chosen": -0.09382152557373047, "rewards/margins": 0.027827873826026917, "rewards/rejected": -0.12164939939975739, "step": 7911 }, { "epoch": 4.826597529357938, "grad_norm": 1.6008082628250122, "learning_rate": 2.7728107777097367e-07, "log_odds_chosen": 2.9606945514678955, "log_odds_ratio": -0.3501630425453186, "logits/chosen": -1.0013360977172852, "logits/rejected": -1.0814428329467773, "logps/chosen": -1.162483811378479, "logps/rejected": -3.5810418128967285, "loss": 0.9734, "nll_loss": 1.029018759727478, "rewards/accuracies": 0.875, "rewards/chosen": -0.11624839901924133, "rewards/margins": 0.24185577034950256, "rewards/rejected": -0.3581041693687439, "step": 7912 }, { "epoch": 4.827207564434955, "grad_norm": 1.3735815286636353, "learning_rate": 2.763012859767299e-07, "log_odds_chosen": 1.7771333456039429, "log_odds_ratio": -0.45100152492523193, "logits/chosen": -1.0039589405059814, "logits/rejected": -1.0378049612045288, "logps/chosen": -0.9566648006439209, "logps/rejected": -2.497263193130493, "loss": 1.0819, "nll_loss": 1.1704113483428955, "rewards/accuracies": 0.75, "rewards/chosen": -0.09566646814346313, "rewards/margins": 0.15405984222888947, "rewards/rejected": -0.2497263252735138, "step": 7913 }, { "epoch": 4.827817599511972, "grad_norm": 4.9188456535339355, "learning_rate": 2.7532149418248623e-07, "log_odds_chosen": 3.9290456771850586, "log_odds_ratio": -0.3915182650089264, "logits/chosen": -0.9601690769195557, "logits/rejected": -1.197941780090332, "logps/chosen": -0.6641106605529785, "logps/rejected": -4.072519779205322, "loss": 0.9944, "nll_loss": 1.0799295902252197, "rewards/accuracies": 0.875, "rewards/chosen": -0.06641107052564621, "rewards/margins": 0.3408409357070923, "rewards/rejected": -0.4072519540786743, "step": 7914 }, { "epoch": 4.828427634588989, "grad_norm": 1.9696316719055176, "learning_rate": 2.743417023882425e-07, "log_odds_chosen": 3.963242530822754, "log_odds_ratio": -0.2460813820362091, "logits/chosen": -0.5830544233322144, "logits/rejected": -0.899474024772644, "logps/chosen": -0.6374441981315613, "logps/rejected": -3.986879825592041, "loss": 0.9255, "nll_loss": 0.7429078817367554, "rewards/accuracies": 1.0, "rewards/chosen": -0.06374441832304001, "rewards/margins": 0.334943562746048, "rewards/rejected": -0.3986879587173462, "step": 7915 }, { "epoch": 4.829037669666006, "grad_norm": 6.890197277069092, "learning_rate": 2.7336191059399874e-07, "log_odds_chosen": 1.3329811096191406, "log_odds_ratio": -0.5008223652839661, "logits/chosen": -0.9881466627120972, "logits/rejected": -1.025770902633667, "logps/chosen": -0.6922966241836548, "logps/rejected": -1.6923816204071045, "loss": 0.9515, "nll_loss": 0.7659571170806885, "rewards/accuracies": 0.625, "rewards/chosen": -0.06922966241836548, "rewards/margins": 0.10000850260257721, "rewards/rejected": -0.16923817992210388, "step": 7916 }, { "epoch": 4.829647704743023, "grad_norm": 1.1292400360107422, "learning_rate": 2.72382118799755e-07, "log_odds_chosen": 1.6526246070861816, "log_odds_ratio": -0.3324056565761566, "logits/chosen": -0.9875614047050476, "logits/rejected": -0.9967589974403381, "logps/chosen": -0.6796807646751404, "logps/rejected": -1.5948845148086548, "loss": 1.072, "nll_loss": 1.0961308479309082, "rewards/accuracies": 0.75, "rewards/chosen": -0.06796807795763016, "rewards/margins": 0.09152037650346756, "rewards/rejected": -0.15948845446109772, "step": 7917 }, { "epoch": 4.8302577398200395, "grad_norm": 9.545624732971191, "learning_rate": 2.714023270055113e-07, "log_odds_chosen": 1.9612079858779907, "log_odds_ratio": -0.3988052010536194, "logits/chosen": -1.0775902271270752, "logits/rejected": -1.275136947631836, "logps/chosen": -1.2196681499481201, "logps/rejected": -2.8266184329986572, "loss": 1.1414, "nll_loss": 1.5472323894500732, "rewards/accuracies": 0.875, "rewards/chosen": -0.12196682393550873, "rewards/margins": 0.16069504618644714, "rewards/rejected": -0.28266188502311707, "step": 7918 }, { "epoch": 4.830867774897056, "grad_norm": 1.3968608379364014, "learning_rate": 2.704225352112676e-07, "log_odds_chosen": 2.757812976837158, "log_odds_ratio": -0.29035085439682007, "logits/chosen": -0.7168805599212646, "logits/rejected": -0.822950541973114, "logps/chosen": -0.5961514711380005, "logps/rejected": -2.7189748287200928, "loss": 1.0144, "nll_loss": 0.7821764349937439, "rewards/accuracies": 1.0, "rewards/chosen": -0.05961515009403229, "rewards/margins": 0.21228235960006714, "rewards/rejected": -0.27189749479293823, "step": 7919 }, { "epoch": 4.831477809974073, "grad_norm": 1.5472891330718994, "learning_rate": 2.6944274341702387e-07, "log_odds_chosen": 3.1517996788024902, "log_odds_ratio": -0.3861798048019409, "logits/chosen": -0.9059682488441467, "logits/rejected": -1.1118665933609009, "logps/chosen": -0.6369327306747437, "logps/rejected": -3.2248480319976807, "loss": 0.9173, "nll_loss": 0.8476697206497192, "rewards/accuracies": 0.75, "rewards/chosen": -0.06369327753782272, "rewards/margins": 0.2587915360927582, "rewards/rejected": -0.3224847912788391, "step": 7920 }, { "epoch": 4.83208784505109, "grad_norm": 2.6328766345977783, "learning_rate": 2.6846295162278015e-07, "log_odds_chosen": 3.747098684310913, "log_odds_ratio": -0.5289111137390137, "logits/chosen": -0.8353291749954224, "logits/rejected": -0.9745815992355347, "logps/chosen": -0.7367702722549438, "logps/rejected": -3.9786949157714844, "loss": 1.0375, "nll_loss": 0.9548826217651367, "rewards/accuracies": 0.625, "rewards/chosen": -0.07367703318595886, "rewards/margins": 0.32419246435165405, "rewards/rejected": -0.3978694975376129, "step": 7921 }, { "epoch": 4.832697880128108, "grad_norm": 3.597991943359375, "learning_rate": 2.6748315982853643e-07, "log_odds_chosen": 4.138137340545654, "log_odds_ratio": -0.2053731381893158, "logits/chosen": -0.8333091735839844, "logits/rejected": -0.983083963394165, "logps/chosen": -0.7433701157569885, "logps/rejected": -3.9978387355804443, "loss": 0.9796, "nll_loss": 0.9387400150299072, "rewards/accuracies": 0.875, "rewards/chosen": -0.07433701306581497, "rewards/margins": 0.32544684410095215, "rewards/rejected": -0.3997839093208313, "step": 7922 }, { "epoch": 4.833307915205125, "grad_norm": 1.356520414352417, "learning_rate": 2.665033680342927e-07, "log_odds_chosen": 3.1679954528808594, "log_odds_ratio": -0.3290937840938568, "logits/chosen": -0.9065893292427063, "logits/rejected": -1.1487274169921875, "logps/chosen": -0.9092481732368469, "logps/rejected": -3.5905158519744873, "loss": 1.0964, "nll_loss": 1.1319859027862549, "rewards/accuracies": 0.875, "rewards/chosen": -0.09092481434345245, "rewards/margins": 0.26812678575515747, "rewards/rejected": -0.35905158519744873, "step": 7923 }, { "epoch": 4.833917950282141, "grad_norm": 1.4065675735473633, "learning_rate": 2.6552357624004894e-07, "log_odds_chosen": 3.1703524589538574, "log_odds_ratio": -0.5324448943138123, "logits/chosen": -0.9272662401199341, "logits/rejected": -1.0906684398651123, "logps/chosen": -0.83021479845047, "logps/rejected": -3.7178752422332764, "loss": 0.9123, "nll_loss": 1.0168706178665161, "rewards/accuracies": 0.875, "rewards/chosen": -0.08302148431539536, "rewards/margins": 0.2887660562992096, "rewards/rejected": -0.37178751826286316, "step": 7924 }, { "epoch": 4.834527985359158, "grad_norm": 1.701658010482788, "learning_rate": 2.6454378444580527e-07, "log_odds_chosen": 2.8578426837921143, "log_odds_ratio": -0.27747002243995667, "logits/chosen": -0.8233581781387329, "logits/rejected": -0.941213071346283, "logps/chosen": -0.7569851875305176, "logps/rejected": -3.1259310245513916, "loss": 1.0382, "nll_loss": 0.9729810953140259, "rewards/accuracies": 0.875, "rewards/chosen": -0.07569853216409683, "rewards/margins": 0.23689457774162292, "rewards/rejected": -0.31259310245513916, "step": 7925 }, { "epoch": 4.835138020436175, "grad_norm": 2.1392135620117188, "learning_rate": 2.6356399265156155e-07, "log_odds_chosen": 2.7030081748962402, "log_odds_ratio": -0.22922652959823608, "logits/chosen": -0.7454521059989929, "logits/rejected": -0.7469403147697449, "logps/chosen": -0.6390814185142517, "logps/rejected": -2.7222628593444824, "loss": 0.9718, "nll_loss": 0.9075342416763306, "rewards/accuracies": 1.0, "rewards/chosen": -0.06390814483165741, "rewards/margins": 0.20831817388534546, "rewards/rejected": -0.27222633361816406, "step": 7926 }, { "epoch": 4.835748055513192, "grad_norm": 1.4648722410202026, "learning_rate": 2.625842008573178e-07, "log_odds_chosen": 1.0429251194000244, "log_odds_ratio": -0.5486711263656616, "logits/chosen": -1.1092365980148315, "logits/rejected": -1.0641913414001465, "logps/chosen": -0.9034668207168579, "logps/rejected": -1.6080818176269531, "loss": 1.0021, "nll_loss": 1.064884066581726, "rewards/accuracies": 0.75, "rewards/chosen": -0.09034667909145355, "rewards/margins": 0.07046148926019669, "rewards/rejected": -0.16080817580223083, "step": 7927 }, { "epoch": 4.836358090590209, "grad_norm": 1.3054578304290771, "learning_rate": 2.6160440906307406e-07, "log_odds_chosen": 2.5940792560577393, "log_odds_ratio": -0.2297641783952713, "logits/chosen": -0.899039089679718, "logits/rejected": -1.1334125995635986, "logps/chosen": -0.6314725875854492, "logps/rejected": -2.416412591934204, "loss": 1.0426, "nll_loss": 0.7635598182678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.06314726173877716, "rewards/margins": 0.17849400639533997, "rewards/rejected": -0.24164125323295593, "step": 7928 }, { "epoch": 4.836968125667226, "grad_norm": 1.9831523895263672, "learning_rate": 2.6062461726883034e-07, "log_odds_chosen": 1.7655634880065918, "log_odds_ratio": -0.4148409366607666, "logits/chosen": -0.8798079490661621, "logits/rejected": -1.0975373983383179, "logps/chosen": -0.8723803758621216, "logps/rejected": -2.2178218364715576, "loss": 1.2166, "nll_loss": 0.9038762450218201, "rewards/accuracies": 0.75, "rewards/chosen": -0.08723803609609604, "rewards/margins": 0.13454416394233704, "rewards/rejected": -0.22178220748901367, "step": 7929 }, { "epoch": 4.8375781607442425, "grad_norm": 4.8448638916015625, "learning_rate": 2.596448254745867e-07, "log_odds_chosen": 1.8354474306106567, "log_odds_ratio": -0.5030958652496338, "logits/chosen": -1.0989854335784912, "logits/rejected": -1.1036016941070557, "logps/chosen": -0.7979910373687744, "logps/rejected": -2.3002769947052, "loss": 1.0883, "nll_loss": 1.0587797164916992, "rewards/accuracies": 0.625, "rewards/chosen": -0.0797991007566452, "rewards/margins": 0.1502285897731781, "rewards/rejected": -0.2300276905298233, "step": 7930 }, { "epoch": 4.838188195821259, "grad_norm": 2.099242687225342, "learning_rate": 2.586650336803429e-07, "log_odds_chosen": 1.736344814300537, "log_odds_ratio": -0.28856635093688965, "logits/chosen": -1.0319024324417114, "logits/rejected": -1.0319132804870605, "logps/chosen": -0.9034983515739441, "logps/rejected": -2.2946057319641113, "loss": 0.918, "nll_loss": 1.0483112335205078, "rewards/accuracies": 1.0, "rewards/chosen": -0.09034983813762665, "rewards/margins": 0.1391107439994812, "rewards/rejected": -0.22946058213710785, "step": 7931 }, { "epoch": 4.838798230898277, "grad_norm": 2.015202760696411, "learning_rate": 2.576852418860992e-07, "log_odds_chosen": 1.0762426853179932, "log_odds_ratio": -0.5303099155426025, "logits/chosen": -1.0360636711120605, "logits/rejected": -0.9633274078369141, "logps/chosen": -0.825708270072937, "logps/rejected": -1.6029243469238281, "loss": 1.0396, "nll_loss": 1.13862943649292, "rewards/accuracies": 0.75, "rewards/chosen": -0.08257082104682922, "rewards/margins": 0.07772161066532135, "rewards/rejected": -0.16029244661331177, "step": 7932 }, { "epoch": 4.839408265975294, "grad_norm": 2.5824601650238037, "learning_rate": 2.5670545009185547e-07, "log_odds_chosen": 0.6275132298469543, "log_odds_ratio": -0.666617214679718, "logits/chosen": -1.0379917621612549, "logits/rejected": -1.0809054374694824, "logps/chosen": -1.0264496803283691, "logps/rejected": -1.4896928071975708, "loss": 0.972, "nll_loss": 1.158722162246704, "rewards/accuracies": 0.625, "rewards/chosen": -0.10264496505260468, "rewards/margins": 0.04632432758808136, "rewards/rejected": -0.14896929264068604, "step": 7933 }, { "epoch": 4.840018301052311, "grad_norm": 5.732318878173828, "learning_rate": 2.5572565829761175e-07, "log_odds_chosen": 3.737865447998047, "log_odds_ratio": -0.09773089736700058, "logits/chosen": -0.6929084062576294, "logits/rejected": -0.948651909828186, "logps/chosen": -0.46143412590026855, "logps/rejected": -3.1675429344177246, "loss": 0.8971, "nll_loss": 0.6272745728492737, "rewards/accuracies": 1.0, "rewards/chosen": -0.046143412590026855, "rewards/margins": 0.27061086893081665, "rewards/rejected": -0.3167542815208435, "step": 7934 }, { "epoch": 4.840628336129328, "grad_norm": 3.8702635765075684, "learning_rate": 2.5474586650336803e-07, "log_odds_chosen": 1.5372806787490845, "log_odds_ratio": -0.47278928756713867, "logits/chosen": -1.0682957172393799, "logits/rejected": -1.0934395790100098, "logps/chosen": -0.755154013633728, "logps/rejected": -2.041768789291382, "loss": 1.1735, "nll_loss": 1.1583164930343628, "rewards/accuracies": 0.625, "rewards/chosen": -0.07551540434360504, "rewards/margins": 0.12866148352622986, "rewards/rejected": -0.2041768878698349, "step": 7935 }, { "epoch": 4.8412383712063445, "grad_norm": 2.554490566253662, "learning_rate": 2.537660747091243e-07, "log_odds_chosen": 3.0541200637817383, "log_odds_ratio": -0.3427344560623169, "logits/chosen": -0.7463970184326172, "logits/rejected": -1.0363430976867676, "logps/chosen": -0.7961753606796265, "logps/rejected": -3.343797206878662, "loss": 1.2046, "nll_loss": 1.082177996635437, "rewards/accuracies": 0.875, "rewards/chosen": -0.07961753755807877, "rewards/margins": 0.2547621428966522, "rewards/rejected": -0.3343797028064728, "step": 7936 }, { "epoch": 4.841848406283361, "grad_norm": 6.299246788024902, "learning_rate": 2.527862829148806e-07, "log_odds_chosen": 2.588670492172241, "log_odds_ratio": -0.36770132184028625, "logits/chosen": -0.7901079654693604, "logits/rejected": -0.8393915891647339, "logps/chosen": -0.719226598739624, "logps/rejected": -2.733649253845215, "loss": 1.0868, "nll_loss": 0.6877207159996033, "rewards/accuracies": 0.75, "rewards/chosen": -0.071922667324543, "rewards/margins": 0.20144227147102356, "rewards/rejected": -0.27336493134498596, "step": 7937 }, { "epoch": 4.842458441360378, "grad_norm": 15.284762382507324, "learning_rate": 2.518064911206368e-07, "log_odds_chosen": 1.1046794652938843, "log_odds_ratio": -0.4200323522090912, "logits/chosen": -0.8930146098136902, "logits/rejected": -0.7227048873901367, "logps/chosen": -0.7765052914619446, "logps/rejected": -1.6528067588806152, "loss": 1.0592, "nll_loss": 1.049437403678894, "rewards/accuracies": 0.75, "rewards/chosen": -0.0776505321264267, "rewards/margins": 0.08763015270233154, "rewards/rejected": -0.16528068482875824, "step": 7938 }, { "epoch": 4.843068476437395, "grad_norm": 13.672164916992188, "learning_rate": 2.508266993263931e-07, "log_odds_chosen": 0.6765320301055908, "log_odds_ratio": -0.5446348190307617, "logits/chosen": -1.0392831563949585, "logits/rejected": -0.9420610666275024, "logps/chosen": -0.7014226913452148, "logps/rejected": -1.197022795677185, "loss": 1.0075, "nll_loss": 0.9578108191490173, "rewards/accuracies": 0.625, "rewards/chosen": -0.07014226168394089, "rewards/margins": 0.049560025334358215, "rewards/rejected": -0.1197022870182991, "step": 7939 }, { "epoch": 4.843678511514412, "grad_norm": 2.3370249271392822, "learning_rate": 2.498469075321494e-07, "log_odds_chosen": 1.5994186401367188, "log_odds_ratio": -0.4428192377090454, "logits/chosen": -1.0972009897232056, "logits/rejected": -1.1236884593963623, "logps/chosen": -1.0207186937332153, "logps/rejected": -2.306309938430786, "loss": 1.2335, "nll_loss": 1.248501181602478, "rewards/accuracies": 0.75, "rewards/chosen": -0.1020718663930893, "rewards/margins": 0.12855912744998932, "rewards/rejected": -0.2306310087442398, "step": 7940 }, { "epoch": 4.844288546591429, "grad_norm": 1.5758161544799805, "learning_rate": 2.488671157379057e-07, "log_odds_chosen": 3.3021020889282227, "log_odds_ratio": -0.6466454267501831, "logits/chosen": -0.9368380904197693, "logits/rejected": -1.0770847797393799, "logps/chosen": -0.9752265214920044, "logps/rejected": -3.9565508365631104, "loss": 1.1249, "nll_loss": 1.4203904867172241, "rewards/accuracies": 0.625, "rewards/chosen": -0.09752265363931656, "rewards/margins": 0.29813241958618164, "rewards/rejected": -0.3956550657749176, "step": 7941 }, { "epoch": 4.844898581668446, "grad_norm": 1.8509472608566284, "learning_rate": 2.4788732394366194e-07, "log_odds_chosen": 1.8059197664260864, "log_odds_ratio": -0.44646328687667847, "logits/chosen": -0.7999266982078552, "logits/rejected": -1.0423123836517334, "logps/chosen": -0.8407782912254333, "logps/rejected": -2.0021018981933594, "loss": 1.2476, "nll_loss": 1.282393217086792, "rewards/accuracies": 0.75, "rewards/chosen": -0.08407783508300781, "rewards/margins": 0.11613236367702484, "rewards/rejected": -0.20021018385887146, "step": 7942 }, { "epoch": 4.845508616745462, "grad_norm": 7.509653568267822, "learning_rate": 2.469075321494182e-07, "log_odds_chosen": 2.1090641021728516, "log_odds_ratio": -0.43995043635368347, "logits/chosen": -0.8938966393470764, "logits/rejected": -0.7994387745857239, "logps/chosen": -0.8403807878494263, "logps/rejected": -2.5566892623901367, "loss": 0.9292, "nll_loss": 0.9072675108909607, "rewards/accuracies": 0.75, "rewards/chosen": -0.08403808623552322, "rewards/margins": 0.1716308295726776, "rewards/rejected": -0.2556689381599426, "step": 7943 }, { "epoch": 4.84611865182248, "grad_norm": 4.565431594848633, "learning_rate": 2.459277403551745e-07, "log_odds_chosen": 0.950157880783081, "log_odds_ratio": -0.5225560665130615, "logits/chosen": -0.808086633682251, "logits/rejected": -0.9539443254470825, "logps/chosen": -0.7669320106506348, "logps/rejected": -1.4118826389312744, "loss": 1.0806, "nll_loss": 0.8894991278648376, "rewards/accuracies": 0.75, "rewards/chosen": -0.07669319957494736, "rewards/margins": 0.06449506431818008, "rewards/rejected": -0.14118826389312744, "step": 7944 }, { "epoch": 4.846728686899497, "grad_norm": 2.3689701557159424, "learning_rate": 2.449479485609308e-07, "log_odds_chosen": 1.1462668180465698, "log_odds_ratio": -0.521736741065979, "logits/chosen": -0.906792402267456, "logits/rejected": -0.943480372428894, "logps/chosen": -0.8610625863075256, "logps/rejected": -1.758190631866455, "loss": 1.1917, "nll_loss": 1.2296251058578491, "rewards/accuracies": 0.75, "rewards/chosen": -0.08610625565052032, "rewards/margins": 0.08971281349658966, "rewards/rejected": -0.17581906914710999, "step": 7945 }, { "epoch": 4.847338721976514, "grad_norm": 4.1457037925720215, "learning_rate": 2.4396815676668707e-07, "log_odds_chosen": 1.3017040491104126, "log_odds_ratio": -0.6105294227600098, "logits/chosen": -0.9969456195831299, "logits/rejected": -1.114648461341858, "logps/chosen": -1.0661193132400513, "logps/rejected": -2.04512619972229, "loss": 1.0338, "nll_loss": 1.0079609155654907, "rewards/accuracies": 0.75, "rewards/chosen": -0.10661192238330841, "rewards/margins": 0.09790069609880447, "rewards/rejected": -0.20451262593269348, "step": 7946 }, { "epoch": 4.847948757053531, "grad_norm": 1.5250296592712402, "learning_rate": 2.4298836497244335e-07, "log_odds_chosen": 0.4861990213394165, "log_odds_ratio": -0.6187933683395386, "logits/chosen": -0.9518605470657349, "logits/rejected": -0.9889460206031799, "logps/chosen": -1.0876926183700562, "logps/rejected": -1.452453851699829, "loss": 1.1056, "nll_loss": 1.327942132949829, "rewards/accuracies": 0.5, "rewards/chosen": -0.10876926779747009, "rewards/margins": 0.036476124078035355, "rewards/rejected": -0.14524538815021515, "step": 7947 }, { "epoch": 4.8485587921305475, "grad_norm": 1.2927372455596924, "learning_rate": 2.4200857317819963e-07, "log_odds_chosen": 2.5584864616394043, "log_odds_ratio": -0.5465930700302124, "logits/chosen": -0.9362184405326843, "logits/rejected": -0.9563238620758057, "logps/chosen": -1.0647884607315063, "logps/rejected": -3.507760524749756, "loss": 1.0782, "nll_loss": 1.1237330436706543, "rewards/accuracies": 0.625, "rewards/chosen": -0.10647885501384735, "rewards/margins": 0.24429723620414734, "rewards/rejected": -0.3507760763168335, "step": 7948 }, { "epoch": 4.849168827207564, "grad_norm": 1.752120018005371, "learning_rate": 2.410287813839559e-07, "log_odds_chosen": 1.3106493949890137, "log_odds_ratio": -0.5076951384544373, "logits/chosen": -0.8010809421539307, "logits/rejected": -0.9046353101730347, "logps/chosen": -0.8514807224273682, "logps/rejected": -2.002534866333008, "loss": 1.077, "nll_loss": 1.0028057098388672, "rewards/accuracies": 0.625, "rewards/chosen": -0.08514808118343353, "rewards/margins": 0.11510539799928665, "rewards/rejected": -0.20025348663330078, "step": 7949 }, { "epoch": 4.849778862284581, "grad_norm": 2.4113993644714355, "learning_rate": 2.400489895897122e-07, "log_odds_chosen": 1.4231061935424805, "log_odds_ratio": -0.3866196870803833, "logits/chosen": -0.9044755697250366, "logits/rejected": -1.109592318534851, "logps/chosen": -0.8719046115875244, "logps/rejected": -1.8937273025512695, "loss": 1.0136, "nll_loss": 1.1448343992233276, "rewards/accuracies": 0.875, "rewards/chosen": -0.08719044923782349, "rewards/margins": 0.10218225419521332, "rewards/rejected": -0.189372718334198, "step": 7950 }, { "epoch": 4.850388897361598, "grad_norm": 2.3596408367156982, "learning_rate": 2.390691977954684e-07, "log_odds_chosen": 3.9701120853424072, "log_odds_ratio": -0.09338384866714478, "logits/chosen": -0.7334644794464111, "logits/rejected": -1.0513328313827515, "logps/chosen": -0.7423614263534546, "logps/rejected": -3.998136281967163, "loss": 0.9489, "nll_loss": 0.8223854899406433, "rewards/accuracies": 1.0, "rewards/chosen": -0.07423614710569382, "rewards/margins": 0.3255774676799774, "rewards/rejected": -0.39981362223625183, "step": 7951 }, { "epoch": 4.850998932438615, "grad_norm": 2.1579906940460205, "learning_rate": 2.3808940600122473e-07, "log_odds_chosen": 3.7759335041046143, "log_odds_ratio": -0.1768627166748047, "logits/chosen": -0.9551821351051331, "logits/rejected": -1.1620479822158813, "logps/chosen": -0.5522690415382385, "logps/rejected": -3.570889949798584, "loss": 1.038, "nll_loss": 1.0116783380508423, "rewards/accuracies": 1.0, "rewards/chosen": -0.05522690340876579, "rewards/margins": 0.30186206102371216, "rewards/rejected": -0.35708898305892944, "step": 7952 }, { "epoch": 4.851608967515633, "grad_norm": 1.857162594795227, "learning_rate": 2.37109614206981e-07, "log_odds_chosen": 0.4054526388645172, "log_odds_ratio": -0.7009815573692322, "logits/chosen": -1.0185503959655762, "logits/rejected": -1.095751404762268, "logps/chosen": -0.804817795753479, "logps/rejected": -1.0459785461425781, "loss": 1.1899, "nll_loss": 0.9002972841262817, "rewards/accuracies": 0.75, "rewards/chosen": -0.08048178255558014, "rewards/margins": 0.02411608211696148, "rewards/rejected": -0.10459786653518677, "step": 7953 }, { "epoch": 4.8522190025926495, "grad_norm": 1.3521091938018799, "learning_rate": 2.361298224127373e-07, "log_odds_chosen": 0.8457456231117249, "log_odds_ratio": -0.4715968370437622, "logits/chosen": -1.0342977046966553, "logits/rejected": -1.080046534538269, "logps/chosen": -0.8652385473251343, "logps/rejected": -1.4939749240875244, "loss": 1.0341, "nll_loss": 0.9228453636169434, "rewards/accuracies": 0.625, "rewards/chosen": -0.0865238606929779, "rewards/margins": 0.06287362426519394, "rewards/rejected": -0.14939749240875244, "step": 7954 }, { "epoch": 4.852829037669666, "grad_norm": 8.180912971496582, "learning_rate": 2.3515003061849357e-07, "log_odds_chosen": 1.4895328283309937, "log_odds_ratio": -0.4090844988822937, "logits/chosen": -0.7453044652938843, "logits/rejected": -0.9342586994171143, "logps/chosen": -0.7215617299079895, "logps/rejected": -1.798570156097412, "loss": 0.982, "nll_loss": 0.8604462146759033, "rewards/accuracies": 0.75, "rewards/chosen": -0.07215617597103119, "rewards/margins": 0.10770083963871002, "rewards/rejected": -0.1798570305109024, "step": 7955 }, { "epoch": 4.853439072746683, "grad_norm": 1.8949936628341675, "learning_rate": 2.3417023882424983e-07, "log_odds_chosen": 0.8707935810089111, "log_odds_ratio": -0.55329430103302, "logits/chosen": -0.9044886231422424, "logits/rejected": -0.8101643919944763, "logps/chosen": -0.8106997013092041, "logps/rejected": -1.4560763835906982, "loss": 1.1828, "nll_loss": 1.1080760955810547, "rewards/accuracies": 0.5, "rewards/chosen": -0.08106996864080429, "rewards/margins": 0.0645376592874527, "rewards/rejected": -0.1456076204776764, "step": 7956 }, { "epoch": 4.8540491078237, "grad_norm": 2.5658769607543945, "learning_rate": 2.331904470300061e-07, "log_odds_chosen": 1.0016591548919678, "log_odds_ratio": -0.5528084635734558, "logits/chosen": -1.1209614276885986, "logits/rejected": -1.0326650142669678, "logps/chosen": -0.7246482372283936, "logps/rejected": -1.6064778566360474, "loss": 1.0435, "nll_loss": 1.0139641761779785, "rewards/accuracies": 0.5, "rewards/chosen": -0.07246483117341995, "rewards/margins": 0.0881829559803009, "rewards/rejected": -0.16064777970314026, "step": 7957 }, { "epoch": 4.854659142900717, "grad_norm": 1.4943450689315796, "learning_rate": 2.322106552357624e-07, "log_odds_chosen": 2.0768613815307617, "log_odds_ratio": -0.3849763870239258, "logits/chosen": -1.0173308849334717, "logits/rejected": -1.087612271308899, "logps/chosen": -0.6736416220664978, "logps/rejected": -2.427643060684204, "loss": 0.962, "nll_loss": 0.7522441744804382, "rewards/accuracies": 0.75, "rewards/chosen": -0.0673641636967659, "rewards/margins": 0.17540013790130615, "rewards/rejected": -0.24276430904865265, "step": 7958 }, { "epoch": 4.855269177977734, "grad_norm": 1.3757332563400269, "learning_rate": 2.3123086344151864e-07, "log_odds_chosen": 2.0680575370788574, "log_odds_ratio": -0.49316972494125366, "logits/chosen": -0.9651056528091431, "logits/rejected": -1.0702468156814575, "logps/chosen": -0.991629958152771, "logps/rejected": -2.708665609359741, "loss": 1.0656, "nll_loss": 1.1426655054092407, "rewards/accuracies": 0.875, "rewards/chosen": -0.0991629958152771, "rewards/margins": 0.17170357704162598, "rewards/rejected": -0.2708665728569031, "step": 7959 }, { "epoch": 4.855879213054751, "grad_norm": 2.0700979232788086, "learning_rate": 2.3025107164727495e-07, "log_odds_chosen": 4.34163761138916, "log_odds_ratio": -0.42264294624328613, "logits/chosen": -0.8743475675582886, "logits/rejected": -1.0182056427001953, "logps/chosen": -0.660628080368042, "logps/rejected": -4.29946231842041, "loss": 0.874, "nll_loss": 0.7432185411453247, "rewards/accuracies": 0.625, "rewards/chosen": -0.0660628080368042, "rewards/margins": 0.36388346552848816, "rewards/rejected": -0.42994627356529236, "step": 7960 }, { "epoch": 4.856489248131767, "grad_norm": 12.530065536499023, "learning_rate": 2.292712798530312e-07, "log_odds_chosen": 1.4327313899993896, "log_odds_ratio": -0.4277651309967041, "logits/chosen": -1.009786605834961, "logits/rejected": -1.0307549238204956, "logps/chosen": -0.769559383392334, "logps/rejected": -1.8468866348266602, "loss": 0.9213, "nll_loss": 0.8209951519966125, "rewards/accuracies": 0.875, "rewards/chosen": -0.07695593684911728, "rewards/margins": 0.10773272067308426, "rewards/rejected": -0.18468865752220154, "step": 7961 }, { "epoch": 4.857099283208784, "grad_norm": 1.259126901626587, "learning_rate": 2.2829148805878751e-07, "log_odds_chosen": 1.5471947193145752, "log_odds_ratio": -0.34090495109558105, "logits/chosen": -0.8173417448997498, "logits/rejected": -0.9266167879104614, "logps/chosen": -0.6588218212127686, "logps/rejected": -1.5949459075927734, "loss": 1.0059, "nll_loss": 0.8351855278015137, "rewards/accuracies": 0.875, "rewards/chosen": -0.06588217616081238, "rewards/margins": 0.09361241012811661, "rewards/rejected": -0.15949459373950958, "step": 7962 }, { "epoch": 4.857709318285801, "grad_norm": 1.86370050907135, "learning_rate": 2.2731169626454377e-07, "log_odds_chosen": 0.8025985956192017, "log_odds_ratio": -0.5156046748161316, "logits/chosen": -0.8351334929466248, "logits/rejected": -0.909873902797699, "logps/chosen": -0.9011077880859375, "logps/rejected": -1.4461275339126587, "loss": 1.0415, "nll_loss": 0.9551929831504822, "rewards/accuracies": 0.5, "rewards/chosen": -0.09011077880859375, "rewards/margins": 0.054501973092556, "rewards/rejected": -0.14461275935173035, "step": 7963 }, { "epoch": 4.858319353362818, "grad_norm": 9.258767127990723, "learning_rate": 2.2633190447030005e-07, "log_odds_chosen": 5.603235721588135, "log_odds_ratio": -0.13714030385017395, "logits/chosen": -0.8490060567855835, "logits/rejected": -0.9379062652587891, "logps/chosen": -0.5955623388290405, "logps/rejected": -5.212908744812012, "loss": 0.9562, "nll_loss": 0.7920582294464111, "rewards/accuracies": 0.875, "rewards/chosen": -0.05955623462796211, "rewards/margins": 0.4617346525192261, "rewards/rejected": -0.5212908983230591, "step": 7964 }, { "epoch": 4.858929388439835, "grad_norm": 2.6433422565460205, "learning_rate": 2.2535211267605633e-07, "log_odds_chosen": 1.264707088470459, "log_odds_ratio": -0.4070037603378296, "logits/chosen": -0.5892511606216431, "logits/rejected": -0.849905252456665, "logps/chosen": -0.7350900769233704, "logps/rejected": -1.6547096967697144, "loss": 1.036, "nll_loss": 1.0127140283584595, "rewards/accuracies": 0.875, "rewards/chosen": -0.07350900769233704, "rewards/margins": 0.09196197986602783, "rewards/rejected": -0.16547097265720367, "step": 7965 }, { "epoch": 4.8595394235168525, "grad_norm": 1.5618661642074585, "learning_rate": 2.243723208818126e-07, "log_odds_chosen": 1.6107856035232544, "log_odds_ratio": -0.3125905990600586, "logits/chosen": -0.8493303060531616, "logits/rejected": -0.9750447869300842, "logps/chosen": -0.5800347328186035, "logps/rejected": -1.5582122802734375, "loss": 1.0658, "nll_loss": 0.8509649634361267, "rewards/accuracies": 1.0, "rewards/chosen": -0.05800347030162811, "rewards/margins": 0.09781775623559952, "rewards/rejected": -0.15582123398780823, "step": 7966 }, { "epoch": 4.860149458593869, "grad_norm": 1.498808741569519, "learning_rate": 2.233925290875689e-07, "log_odds_chosen": 1.8170833587646484, "log_odds_ratio": -0.3815614581108093, "logits/chosen": -0.8361892700195312, "logits/rejected": -1.1113755702972412, "logps/chosen": -0.7340297698974609, "logps/rejected": -2.1042473316192627, "loss": 0.8687, "nll_loss": 0.7892241477966309, "rewards/accuracies": 0.75, "rewards/chosen": -0.07340297847986221, "rewards/margins": 0.1370217651128769, "rewards/rejected": -0.2104247361421585, "step": 7967 }, { "epoch": 4.860759493670886, "grad_norm": 1.714070439338684, "learning_rate": 2.2241273729332515e-07, "log_odds_chosen": 0.2750523090362549, "log_odds_ratio": -0.5982421636581421, "logits/chosen": -1.0283327102661133, "logits/rejected": -0.9415611028671265, "logps/chosen": -0.7639687061309814, "logps/rejected": -0.9392600655555725, "loss": 0.9341, "nll_loss": 0.8090476989746094, "rewards/accuracies": 0.625, "rewards/chosen": -0.0763968750834465, "rewards/margins": 0.017529137432575226, "rewards/rejected": -0.09392601251602173, "step": 7968 }, { "epoch": 4.861369528747903, "grad_norm": 1.6422380208969116, "learning_rate": 2.2143294549908143e-07, "log_odds_chosen": 2.4064435958862305, "log_odds_ratio": -0.4129748046398163, "logits/chosen": -0.6065905690193176, "logits/rejected": -0.8651694655418396, "logps/chosen": -0.5222304463386536, "logps/rejected": -2.424480438232422, "loss": 1.0818, "nll_loss": 0.6739407777786255, "rewards/accuracies": 0.75, "rewards/chosen": -0.052223049104213715, "rewards/margins": 0.19022499024868011, "rewards/rejected": -0.24244804680347443, "step": 7969 }, { "epoch": 4.86197956382492, "grad_norm": 1.4017053842544556, "learning_rate": 2.204531537048377e-07, "log_odds_chosen": 1.3909108638763428, "log_odds_ratio": -0.45045405626296997, "logits/chosen": -0.7116826176643372, "logits/rejected": -0.8596770763397217, "logps/chosen": -0.6664465665817261, "logps/rejected": -1.592355728149414, "loss": 0.912, "nll_loss": 0.6667966842651367, "rewards/accuracies": 0.75, "rewards/chosen": -0.06664466112852097, "rewards/margins": 0.09259092807769775, "rewards/rejected": -0.15923558175563812, "step": 7970 }, { "epoch": 4.862589598901937, "grad_norm": 2.2304251194000244, "learning_rate": 2.19473361910594e-07, "log_odds_chosen": 2.020930290222168, "log_odds_ratio": -0.2202819585800171, "logits/chosen": -1.0544731616973877, "logits/rejected": -0.9733315706253052, "logps/chosen": -0.8324363827705383, "logps/rejected": -2.432924747467041, "loss": 0.9959, "nll_loss": 0.9259577989578247, "rewards/accuracies": 1.0, "rewards/chosen": -0.08324363827705383, "rewards/margins": 0.16004882752895355, "rewards/rejected": -0.2432924509048462, "step": 7971 }, { "epoch": 4.863199633978954, "grad_norm": 1.702065110206604, "learning_rate": 2.1849357011635027e-07, "log_odds_chosen": 1.9578279256820679, "log_odds_ratio": -0.3266972005367279, "logits/chosen": -0.9573372602462769, "logits/rejected": -1.0653594732284546, "logps/chosen": -0.8081454038619995, "logps/rejected": -2.303295135498047, "loss": 1.103, "nll_loss": 0.9808908104896545, "rewards/accuracies": 0.875, "rewards/chosen": -0.08081454038619995, "rewards/margins": 0.14951500296592712, "rewards/rejected": -0.23032952845096588, "step": 7972 }, { "epoch": 4.8638096690559705, "grad_norm": 1.4355849027633667, "learning_rate": 2.1751377832210655e-07, "log_odds_chosen": 3.303018569946289, "log_odds_ratio": -0.4378049671649933, "logits/chosen": -0.908714771270752, "logits/rejected": -1.159106731414795, "logps/chosen": -0.6711364388465881, "logps/rejected": -3.413503646850586, "loss": 0.8552, "nll_loss": 0.7695037126541138, "rewards/accuracies": 0.75, "rewards/chosen": -0.06711364537477493, "rewards/margins": 0.2742367386817932, "rewards/rejected": -0.34135037660598755, "step": 7973 }, { "epoch": 4.864419704132987, "grad_norm": 1.39186429977417, "learning_rate": 2.165339865278628e-07, "log_odds_chosen": 2.240335464477539, "log_odds_ratio": -0.4707343578338623, "logits/chosen": -0.9323990941047668, "logits/rejected": -0.8999065160751343, "logps/chosen": -0.8233785629272461, "logps/rejected": -2.6936235427856445, "loss": 1.0497, "nll_loss": 0.9524878859519958, "rewards/accuracies": 0.75, "rewards/chosen": -0.0823378637433052, "rewards/margins": 0.18702447414398193, "rewards/rejected": -0.26936233043670654, "step": 7974 }, { "epoch": 4.865029739210005, "grad_norm": 13.326284408569336, "learning_rate": 2.1555419473361911e-07, "log_odds_chosen": 1.1134610176086426, "log_odds_ratio": -0.35873913764953613, "logits/chosen": -0.7603656053543091, "logits/rejected": -0.9565211534500122, "logps/chosen": -0.8000977039337158, "logps/rejected": -1.5743813514709473, "loss": 1.063, "nll_loss": 0.9325224161148071, "rewards/accuracies": 0.875, "rewards/chosen": -0.08000977337360382, "rewards/margins": 0.07742836326360703, "rewards/rejected": -0.15743812918663025, "step": 7975 }, { "epoch": 4.865639774287022, "grad_norm": 2.585918426513672, "learning_rate": 2.1457440293937537e-07, "log_odds_chosen": 1.9314934015274048, "log_odds_ratio": -0.36670422554016113, "logits/chosen": -0.9764734506607056, "logits/rejected": -1.0901888608932495, "logps/chosen": -0.8219283819198608, "logps/rejected": -2.2721962928771973, "loss": 1.1759, "nll_loss": 1.024709939956665, "rewards/accuracies": 0.875, "rewards/chosen": -0.08219283074140549, "rewards/margins": 0.1450267881155014, "rewards/rejected": -0.2272196114063263, "step": 7976 }, { "epoch": 4.866249809364039, "grad_norm": 1.960486650466919, "learning_rate": 2.1359461114513162e-07, "log_odds_chosen": 4.749617576599121, "log_odds_ratio": -0.18202492594718933, "logits/chosen": -0.8282028436660767, "logits/rejected": -1.0603175163269043, "logps/chosen": -0.6964645981788635, "logps/rejected": -4.796409606933594, "loss": 0.9692, "nll_loss": 0.8172493577003479, "rewards/accuracies": 1.0, "rewards/chosen": -0.06964646279811859, "rewards/margins": 0.409994512796402, "rewards/rejected": -0.47964099049568176, "step": 7977 }, { "epoch": 4.866859844441056, "grad_norm": 8.045037269592285, "learning_rate": 2.1261481935088793e-07, "log_odds_chosen": 0.7907470464706421, "log_odds_ratio": -0.5264125466346741, "logits/chosen": -1.0569791793823242, "logits/rejected": -1.17966628074646, "logps/chosen": -0.9339182376861572, "logps/rejected": -1.577157735824585, "loss": 1.108, "nll_loss": 1.192602276802063, "rewards/accuracies": 0.625, "rewards/chosen": -0.09339182823896408, "rewards/margins": 0.06432393938302994, "rewards/rejected": -0.15771576762199402, "step": 7978 }, { "epoch": 4.867469879518072, "grad_norm": 12.316102027893066, "learning_rate": 2.1163502755664419e-07, "log_odds_chosen": 2.270789861679077, "log_odds_ratio": -0.3832204043865204, "logits/chosen": -0.8990846276283264, "logits/rejected": -0.9980796575546265, "logps/chosen": -0.7055115699768066, "logps/rejected": -2.524775505065918, "loss": 1.0796, "nll_loss": 0.8554573059082031, "rewards/accuracies": 0.625, "rewards/chosen": -0.07055115699768066, "rewards/margins": 0.1819263994693756, "rewards/rejected": -0.2524775564670563, "step": 7979 }, { "epoch": 4.868079914595089, "grad_norm": 10.738765716552734, "learning_rate": 2.106552357624005e-07, "log_odds_chosen": 2.250300407409668, "log_odds_ratio": -0.16655071079730988, "logits/chosen": -1.0769847631454468, "logits/rejected": -1.1236661672592163, "logps/chosen": -0.8811692595481873, "logps/rejected": -2.630357027053833, "loss": 1.1674, "nll_loss": 1.101212978363037, "rewards/accuracies": 1.0, "rewards/chosen": -0.08811692893505096, "rewards/margins": 0.1749187856912613, "rewards/rejected": -0.26303571462631226, "step": 7980 }, { "epoch": 4.868689949672106, "grad_norm": 2.2882139682769775, "learning_rate": 2.0967544396815675e-07, "log_odds_chosen": 1.7114664316177368, "log_odds_ratio": -0.4165725111961365, "logits/chosen": -0.886388897895813, "logits/rejected": -1.0379226207733154, "logps/chosen": -0.8657615780830383, "logps/rejected": -2.1120078563690186, "loss": 1.0273, "nll_loss": 0.9567254781723022, "rewards/accuracies": 0.75, "rewards/chosen": -0.08657615631818771, "rewards/margins": 0.12462462484836578, "rewards/rejected": -0.2112007737159729, "step": 7981 }, { "epoch": 4.869299984749123, "grad_norm": 2.1530416011810303, "learning_rate": 2.0869565217391303e-07, "log_odds_chosen": 2.6804771423339844, "log_odds_ratio": -0.23696908354759216, "logits/chosen": -0.9869606494903564, "logits/rejected": -1.1359803676605225, "logps/chosen": -0.8720740079879761, "logps/rejected": -3.1530230045318604, "loss": 0.9629, "nll_loss": 1.0609753131866455, "rewards/accuracies": 0.875, "rewards/chosen": -0.08720740675926208, "rewards/margins": 0.2280949056148529, "rewards/rejected": -0.315302312374115, "step": 7982 }, { "epoch": 4.86991001982614, "grad_norm": 11.40542221069336, "learning_rate": 2.077158603796693e-07, "log_odds_chosen": 1.938201665878296, "log_odds_ratio": -0.21068136394023895, "logits/chosen": -0.9832981824874878, "logits/rejected": -1.056384563446045, "logps/chosen": -0.7502551078796387, "logps/rejected": -2.1099190711975098, "loss": 1.0652, "nll_loss": 1.2022714614868164, "rewards/accuracies": 1.0, "rewards/chosen": -0.0750255137681961, "rewards/margins": 0.13596639037132263, "rewards/rejected": -0.21099190413951874, "step": 7983 }, { "epoch": 4.870520054903157, "grad_norm": 4.087358474731445, "learning_rate": 2.067360685854256e-07, "log_odds_chosen": 2.9204838275909424, "log_odds_ratio": -0.5572337508201599, "logits/chosen": -0.6929309368133545, "logits/rejected": -0.8488349318504333, "logps/chosen": -0.7173497676849365, "logps/rejected": -3.29835844039917, "loss": 1.0289, "nll_loss": 0.9030793309211731, "rewards/accuracies": 0.5, "rewards/chosen": -0.07173497974872589, "rewards/margins": 0.25810086727142334, "rewards/rejected": -0.32983583211898804, "step": 7984 }, { "epoch": 4.8711300899801735, "grad_norm": 1.5314035415649414, "learning_rate": 2.0575627679118187e-07, "log_odds_chosen": 1.728758692741394, "log_odds_ratio": -0.24183382093906403, "logits/chosen": -0.938051700592041, "logits/rejected": -1.0601985454559326, "logps/chosen": -0.7758398056030273, "logps/rejected": -2.0032544136047363, "loss": 0.9715, "nll_loss": 0.8933328986167908, "rewards/accuracies": 1.0, "rewards/chosen": -0.07758398354053497, "rewards/margins": 0.1227414458990097, "rewards/rejected": -0.20032541453838348, "step": 7985 }, { "epoch": 4.87174012505719, "grad_norm": 1.7498401403427124, "learning_rate": 2.0477648499693815e-07, "log_odds_chosen": 2.810096025466919, "log_odds_ratio": -0.30943650007247925, "logits/chosen": -0.7055883407592773, "logits/rejected": -1.0190414190292358, "logps/chosen": -0.632220983505249, "logps/rejected": -2.75833797454834, "loss": 1.0437, "nll_loss": 0.7399348020553589, "rewards/accuracies": 0.75, "rewards/chosen": -0.06322210282087326, "rewards/margins": 0.21261170506477356, "rewards/rejected": -0.2758338153362274, "step": 7986 }, { "epoch": 4.872350160134208, "grad_norm": 2.099951982498169, "learning_rate": 2.037966932026944e-07, "log_odds_chosen": 5.033385276794434, "log_odds_ratio": -0.1995791792869568, "logits/chosen": -0.7086806297302246, "logits/rejected": -1.0305677652359009, "logps/chosen": -0.5171435475349426, "logps/rejected": -4.764734268188477, "loss": 0.9837, "nll_loss": 0.7076157331466675, "rewards/accuracies": 0.875, "rewards/chosen": -0.05171436071395874, "rewards/margins": 0.42475906014442444, "rewards/rejected": -0.4764734208583832, "step": 7987 }, { "epoch": 4.872960195211225, "grad_norm": 2.2849931716918945, "learning_rate": 2.028169014084507e-07, "log_odds_chosen": 2.8270750045776367, "log_odds_ratio": -0.39544740319252014, "logits/chosen": -0.7370052337646484, "logits/rejected": -0.9744059443473816, "logps/chosen": -0.5164110660552979, "logps/rejected": -2.6971704959869385, "loss": 0.9259, "nll_loss": 0.8448446989059448, "rewards/accuracies": 0.625, "rewards/chosen": -0.051641110330820084, "rewards/margins": 0.2180759608745575, "rewards/rejected": -0.2697170674800873, "step": 7988 }, { "epoch": 4.873570230288242, "grad_norm": 1.238390326499939, "learning_rate": 2.0183710961420697e-07, "log_odds_chosen": 2.1360700130462646, "log_odds_ratio": -0.3262331783771515, "logits/chosen": -0.8599854707717896, "logits/rejected": -0.9374209642410278, "logps/chosen": -0.7024061679840088, "logps/rejected": -2.3787877559661865, "loss": 0.9214, "nll_loss": 0.7746835947036743, "rewards/accuracies": 1.0, "rewards/chosen": -0.07024061679840088, "rewards/margins": 0.1676381677389145, "rewards/rejected": -0.23787876963615417, "step": 7989 }, { "epoch": 4.874180265365259, "grad_norm": 24.661418914794922, "learning_rate": 2.0085731781996325e-07, "log_odds_chosen": 1.2029472589492798, "log_odds_ratio": -0.5524229407310486, "logits/chosen": -0.8321830034255981, "logits/rejected": -0.9026800990104675, "logps/chosen": -0.8103159070014954, "logps/rejected": -1.7646210193634033, "loss": 1.0078, "nll_loss": 0.9155066609382629, "rewards/accuracies": 0.5, "rewards/chosen": -0.08103159070014954, "rewards/margins": 0.09543052315711975, "rewards/rejected": -0.1764621138572693, "step": 7990 }, { "epoch": 4.8747903004422755, "grad_norm": 1.3878744840621948, "learning_rate": 1.9987752602571953e-07, "log_odds_chosen": 1.4267560243606567, "log_odds_ratio": -0.42990028858184814, "logits/chosen": -1.0405333042144775, "logits/rejected": -0.9402192234992981, "logps/chosen": -0.8516699075698853, "logps/rejected": -1.7817463874816895, "loss": 1.094, "nll_loss": 1.1270090341567993, "rewards/accuracies": 0.75, "rewards/chosen": -0.08516699075698853, "rewards/margins": 0.0930076465010643, "rewards/rejected": -0.17817464470863342, "step": 7991 }, { "epoch": 4.875400335519292, "grad_norm": 1.5290676355361938, "learning_rate": 1.988977342314758e-07, "log_odds_chosen": 0.9775692820549011, "log_odds_ratio": -0.44331780076026917, "logits/chosen": -0.9326874017715454, "logits/rejected": -1.0650749206542969, "logps/chosen": -0.8790991902351379, "logps/rejected": -1.5996429920196533, "loss": 1.1543, "nll_loss": 1.2017213106155396, "rewards/accuracies": 0.75, "rewards/chosen": -0.08790992200374603, "rewards/margins": 0.07205437868833542, "rewards/rejected": -0.15996429324150085, "step": 7992 }, { "epoch": 4.876010370596309, "grad_norm": 2.0777320861816406, "learning_rate": 1.979179424372321e-07, "log_odds_chosen": 0.48648738861083984, "log_odds_ratio": -0.6463319063186646, "logits/chosen": -1.0936826467514038, "logits/rejected": -1.003250002861023, "logps/chosen": -1.0953267812728882, "logps/rejected": -1.514930009841919, "loss": 1.214, "nll_loss": 1.328106164932251, "rewards/accuracies": 0.625, "rewards/chosen": -0.1095326766371727, "rewards/margins": 0.04196033626794815, "rewards/rejected": -0.15149301290512085, "step": 7993 }, { "epoch": 4.876620405673326, "grad_norm": 1.877449631690979, "learning_rate": 1.9693815064298835e-07, "log_odds_chosen": 2.0509302616119385, "log_odds_ratio": -0.4633368253707886, "logits/chosen": -0.7769455313682556, "logits/rejected": -0.9350641369819641, "logps/chosen": -0.7098738551139832, "logps/rejected": -2.3445518016815186, "loss": 0.9184, "nll_loss": 0.7933666706085205, "rewards/accuracies": 0.625, "rewards/chosen": -0.07098738849163055, "rewards/margins": 0.16346780955791473, "rewards/rejected": -0.2344551980495453, "step": 7994 }, { "epoch": 4.877230440750343, "grad_norm": 14.34495735168457, "learning_rate": 1.9595835884874463e-07, "log_odds_chosen": 3.1452090740203857, "log_odds_ratio": -0.26041948795318604, "logits/chosen": -0.9379395246505737, "logits/rejected": -1.0827674865722656, "logps/chosen": -0.9726171493530273, "logps/rejected": -3.6594886779785156, "loss": 1.1136, "nll_loss": 1.2150169610977173, "rewards/accuracies": 0.75, "rewards/chosen": -0.0972617119550705, "rewards/margins": 0.2686871588230133, "rewards/rejected": -0.3659488558769226, "step": 7995 }, { "epoch": 4.87784047582736, "grad_norm": 2.1344106197357178, "learning_rate": 1.949785670545009e-07, "log_odds_chosen": 1.4535924196243286, "log_odds_ratio": -0.4480879306793213, "logits/chosen": -0.9322496652603149, "logits/rejected": -1.0521812438964844, "logps/chosen": -0.9735633134841919, "logps/rejected": -2.1231517791748047, "loss": 1.072, "nll_loss": 1.2534469366073608, "rewards/accuracies": 0.75, "rewards/chosen": -0.09735632687807083, "rewards/margins": 0.11495883762836456, "rewards/rejected": -0.212315171957016, "step": 7996 }, { "epoch": 4.878450510904377, "grad_norm": 1.0146501064300537, "learning_rate": 1.939987752602572e-07, "log_odds_chosen": 2.6019740104675293, "log_odds_ratio": -0.33005404472351074, "logits/chosen": -0.8941395878791809, "logits/rejected": -1.024981141090393, "logps/chosen": -0.7945084571838379, "logps/rejected": -2.7885122299194336, "loss": 1.0899, "nll_loss": 1.0147391557693481, "rewards/accuracies": 0.625, "rewards/chosen": -0.07945083826780319, "rewards/margins": 0.19940036535263062, "rewards/rejected": -0.2788512110710144, "step": 7997 }, { "epoch": 4.879060545981394, "grad_norm": 1.6739248037338257, "learning_rate": 1.9301898346601347e-07, "log_odds_chosen": 0.856120228767395, "log_odds_ratio": -0.4601636528968811, "logits/chosen": -1.1197515726089478, "logits/rejected": -1.02903151512146, "logps/chosen": -0.9818000793457031, "logps/rejected": -1.6507182121276855, "loss": 0.9799, "nll_loss": 1.0822265148162842, "rewards/accuracies": 0.625, "rewards/chosen": -0.09818001091480255, "rewards/margins": 0.06689181923866272, "rewards/rejected": -0.16507183015346527, "step": 7998 }, { "epoch": 4.879670581058411, "grad_norm": 1.6300702095031738, "learning_rate": 1.9203919167176973e-07, "log_odds_chosen": 4.344772815704346, "log_odds_ratio": -0.3311396837234497, "logits/chosen": -0.6363086104393005, "logits/rejected": -0.9559120535850525, "logps/chosen": -0.7109405994415283, "logps/rejected": -4.400819301605225, "loss": 1.0902, "nll_loss": 0.7493728399276733, "rewards/accuracies": 0.75, "rewards/chosen": -0.07109406590461731, "rewards/margins": 0.3689878582954407, "rewards/rejected": -0.4400818943977356, "step": 7999 }, { "epoch": 4.880280616135428, "grad_norm": 1.5903009176254272, "learning_rate": 1.91059399877526e-07, "log_odds_chosen": 1.4792324304580688, "log_odds_ratio": -0.3152139186859131, "logits/chosen": -0.9999114871025085, "logits/rejected": -0.9471965432167053, "logps/chosen": -0.8982313871383667, "logps/rejected": -2.0741682052612305, "loss": 0.9666, "nll_loss": 0.9863258600234985, "rewards/accuracies": 0.875, "rewards/chosen": -0.08982314169406891, "rewards/margins": 0.1175936758518219, "rewards/rejected": -0.2074168175458908, "step": 8000 }, { "epoch": 4.880890651212445, "grad_norm": 1.1472218036651611, "learning_rate": 1.900796080832823e-07, "log_odds_chosen": 3.227975845336914, "log_odds_ratio": -0.41936278343200684, "logits/chosen": -1.0481071472167969, "logits/rejected": -1.2352261543273926, "logps/chosen": -0.8444501161575317, "logps/rejected": -3.750849962234497, "loss": 1.1303, "nll_loss": 1.1601449251174927, "rewards/accuracies": 0.75, "rewards/chosen": -0.08444501459598541, "rewards/margins": 0.2906399965286255, "rewards/rejected": -0.3750850260257721, "step": 8001 }, { "epoch": 4.881500686289462, "grad_norm": 1.1724739074707031, "learning_rate": 1.8909981628903857e-07, "log_odds_chosen": 2.394808053970337, "log_odds_ratio": -0.34365108609199524, "logits/chosen": -0.75334233045578, "logits/rejected": -0.7736368179321289, "logps/chosen": -0.748279333114624, "logps/rejected": -2.712602138519287, "loss": 1.0132, "nll_loss": 0.9844127893447876, "rewards/accuracies": 0.875, "rewards/chosen": -0.07482793182134628, "rewards/margins": 0.19643227756023407, "rewards/rejected": -0.27126020193099976, "step": 8002 }, { "epoch": 4.8821107213664785, "grad_norm": 1.546042799949646, "learning_rate": 1.8812002449479485e-07, "log_odds_chosen": 1.6176706552505493, "log_odds_ratio": -0.41878536343574524, "logits/chosen": -0.7989084720611572, "logits/rejected": -0.9406372308731079, "logps/chosen": -0.8020294904708862, "logps/rejected": -2.1107873916625977, "loss": 1.0697, "nll_loss": 1.2140862941741943, "rewards/accuracies": 0.875, "rewards/chosen": -0.08020294457674026, "rewards/margins": 0.130875825881958, "rewards/rejected": -0.21107876300811768, "step": 8003 }, { "epoch": 4.882720756443495, "grad_norm": 15.680940628051758, "learning_rate": 1.8714023270055113e-07, "log_odds_chosen": 2.828688383102417, "log_odds_ratio": -0.23920276761054993, "logits/chosen": -0.9203476905822754, "logits/rejected": -0.971189558506012, "logps/chosen": -0.6954877376556396, "logps/rejected": -2.9717001914978027, "loss": 1.1426, "nll_loss": 0.9466301202774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.06954877823591232, "rewards/margins": 0.22762125730514526, "rewards/rejected": -0.2971700429916382, "step": 8004 }, { "epoch": 4.883330791520512, "grad_norm": 1.7109935283660889, "learning_rate": 1.861604409063074e-07, "log_odds_chosen": 0.9725289940834045, "log_odds_ratio": -0.5884354114532471, "logits/chosen": -0.9238474369049072, "logits/rejected": -0.9601137638092041, "logps/chosen": -0.9288875460624695, "logps/rejected": -1.7495827674865723, "loss": 1.16, "nll_loss": 1.135988473892212, "rewards/accuracies": 0.625, "rewards/chosen": -0.09288875758647919, "rewards/margins": 0.0820695161819458, "rewards/rejected": -0.174958273768425, "step": 8005 }, { "epoch": 4.883940826597529, "grad_norm": 4.737610816955566, "learning_rate": 1.851806491120637e-07, "log_odds_chosen": 1.5565285682678223, "log_odds_ratio": -0.40841764211654663, "logits/chosen": -1.1779823303222656, "logits/rejected": -1.1710197925567627, "logps/chosen": -0.7848961353302002, "logps/rejected": -1.966944932937622, "loss": 0.9894, "nll_loss": 1.1617727279663086, "rewards/accuracies": 0.75, "rewards/chosen": -0.07848961651325226, "rewards/margins": 0.11820486932992935, "rewards/rejected": -0.196694478392601, "step": 8006 }, { "epoch": 4.884550861674546, "grad_norm": 1.8906673192977905, "learning_rate": 1.8420085731781995e-07, "log_odds_chosen": 4.5392303466796875, "log_odds_ratio": -0.18406908214092255, "logits/chosen": -0.9821557402610779, "logits/rejected": -1.046645164489746, "logps/chosen": -0.6840853095054626, "logps/rejected": -4.626596927642822, "loss": 1.0423, "nll_loss": 1.0853551626205444, "rewards/accuracies": 1.0, "rewards/chosen": -0.0684085339307785, "rewards/margins": 0.39425110816955566, "rewards/rejected": -0.462659627199173, "step": 8007 }, { "epoch": 4.885160896751563, "grad_norm": 11.293806076049805, "learning_rate": 1.832210655235762e-07, "log_odds_chosen": 1.702147364616394, "log_odds_ratio": -0.3018175959587097, "logits/chosen": -0.8976759910583496, "logits/rejected": -0.9620734453201294, "logps/chosen": -0.788841724395752, "logps/rejected": -2.0537846088409424, "loss": 0.9696, "nll_loss": 0.8891334533691406, "rewards/accuracies": 0.875, "rewards/chosen": -0.07888416945934296, "rewards/margins": 0.12649428844451904, "rewards/rejected": -0.2053784728050232, "step": 8008 }, { "epoch": 4.8857709318285805, "grad_norm": 1.7336835861206055, "learning_rate": 1.822412737293325e-07, "log_odds_chosen": 2.378596782684326, "log_odds_ratio": -0.3166499137878418, "logits/chosen": -0.8046088218688965, "logits/rejected": -0.997292160987854, "logps/chosen": -0.7877263426780701, "logps/rejected": -2.697117328643799, "loss": 1.0062, "nll_loss": 0.9551399946212769, "rewards/accuracies": 0.875, "rewards/chosen": -0.078772634267807, "rewards/margins": 0.19093911349773407, "rewards/rejected": -0.2697117328643799, "step": 8009 }, { "epoch": 4.886380966905597, "grad_norm": 1.7443877458572388, "learning_rate": 1.8126148193508877e-07, "log_odds_chosen": 3.471571207046509, "log_odds_ratio": -0.18186503648757935, "logits/chosen": -0.7572084665298462, "logits/rejected": -0.7697540521621704, "logps/chosen": -0.6165496706962585, "logps/rejected": -3.403162956237793, "loss": 0.8057, "nll_loss": 0.6849348545074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.06165497377514839, "rewards/margins": 0.27866131067276, "rewards/rejected": -0.3403162956237793, "step": 8010 }, { "epoch": 4.886991001982614, "grad_norm": 1.276618242263794, "learning_rate": 1.8028169014084507e-07, "log_odds_chosen": 0.9543594717979431, "log_odds_ratio": -0.4896433353424072, "logits/chosen": -0.8045294284820557, "logits/rejected": -0.879432737827301, "logps/chosen": -0.7995197772979736, "logps/rejected": -1.4906070232391357, "loss": 0.9854, "nll_loss": 0.9618406891822815, "rewards/accuracies": 0.75, "rewards/chosen": -0.07995198667049408, "rewards/margins": 0.06910871714353561, "rewards/rejected": -0.1490606963634491, "step": 8011 }, { "epoch": 4.887601037059631, "grad_norm": 8.573452949523926, "learning_rate": 1.7930189834660133e-07, "log_odds_chosen": 0.27220311760902405, "log_odds_ratio": -0.6993714570999146, "logits/chosen": -0.9178156852722168, "logits/rejected": -0.8856863975524902, "logps/chosen": -0.9150639772415161, "logps/rejected": -1.1400325298309326, "loss": 1.2011, "nll_loss": 1.1646510362625122, "rewards/accuracies": 0.5, "rewards/chosen": -0.09150639921426773, "rewards/margins": 0.022496841847896576, "rewards/rejected": -0.1140032485127449, "step": 8012 }, { "epoch": 4.888211072136648, "grad_norm": 1.2198843955993652, "learning_rate": 1.783221065523576e-07, "log_odds_chosen": 2.9074208736419678, "log_odds_ratio": -0.29230502247810364, "logits/chosen": -0.7431784868240356, "logits/rejected": -0.868298351764679, "logps/chosen": -0.5421367287635803, "logps/rejected": -2.7215676307678223, "loss": 0.8479, "nll_loss": 0.7312896251678467, "rewards/accuracies": 0.875, "rewards/chosen": -0.054213669151067734, "rewards/margins": 0.21794305741786957, "rewards/rejected": -0.2721567749977112, "step": 8013 }, { "epoch": 4.888821107213665, "grad_norm": 2.478628396987915, "learning_rate": 1.773423147581139e-07, "log_odds_chosen": 2.134821891784668, "log_odds_ratio": -0.2817756235599518, "logits/chosen": -0.9317712187767029, "logits/rejected": -1.0655049085617065, "logps/chosen": -0.5208040475845337, "logps/rejected": -1.9878208637237549, "loss": 1.0764, "nll_loss": 1.0505692958831787, "rewards/accuracies": 0.875, "rewards/chosen": -0.05208040401339531, "rewards/margins": 0.14670167863368988, "rewards/rejected": -0.1987820863723755, "step": 8014 }, { "epoch": 4.8894311422906815, "grad_norm": 19.583724975585938, "learning_rate": 1.7636252296387017e-07, "log_odds_chosen": 2.868553876876831, "log_odds_ratio": -0.2996595799922943, "logits/chosen": -0.8835857510566711, "logits/rejected": -0.8724852800369263, "logps/chosen": -0.641910195350647, "logps/rejected": -2.8818254470825195, "loss": 0.9546, "nll_loss": 1.0095324516296387, "rewards/accuracies": 0.875, "rewards/chosen": -0.06419101357460022, "rewards/margins": 0.2239915281534195, "rewards/rejected": -0.2881825566291809, "step": 8015 }, { "epoch": 4.890041177367698, "grad_norm": 2.2180795669555664, "learning_rate": 1.7538273116962645e-07, "log_odds_chosen": 1.1358740329742432, "log_odds_ratio": -0.5290154814720154, "logits/chosen": -0.9277448058128357, "logits/rejected": -1.105781078338623, "logps/chosen": -0.7843403220176697, "logps/rejected": -1.6900556087493896, "loss": 1.1315, "nll_loss": 0.9661323428153992, "rewards/accuracies": 0.625, "rewards/chosen": -0.0784340351819992, "rewards/margins": 0.09057153761386871, "rewards/rejected": -0.16900555789470673, "step": 8016 }, { "epoch": 4.890651212444715, "grad_norm": 1.5477043390274048, "learning_rate": 1.7440293937538273e-07, "log_odds_chosen": 3.0315403938293457, "log_odds_ratio": -0.3221583962440491, "logits/chosen": -0.8113712072372437, "logits/rejected": -0.8980997204780579, "logps/chosen": -0.6397896409034729, "logps/rejected": -3.037128448486328, "loss": 1.0401, "nll_loss": 0.8418133854866028, "rewards/accuracies": 0.875, "rewards/chosen": -0.06397897005081177, "rewards/margins": 0.23973388969898224, "rewards/rejected": -0.3037128448486328, "step": 8017 }, { "epoch": 4.891261247521733, "grad_norm": 7.556949138641357, "learning_rate": 1.73423147581139e-07, "log_odds_chosen": 0.8821319341659546, "log_odds_ratio": -0.500551164150238, "logits/chosen": -1.2725317478179932, "logits/rejected": -1.1523675918579102, "logps/chosen": -1.1659060716629028, "logps/rejected": -1.9084409475326538, "loss": 1.1603, "nll_loss": 1.2262020111083984, "rewards/accuracies": 0.625, "rewards/chosen": -0.11659060418605804, "rewards/margins": 0.07425349205732346, "rewards/rejected": -0.1908440887928009, "step": 8018 }, { "epoch": 4.89187128259875, "grad_norm": 1.499155044555664, "learning_rate": 1.7244335578689527e-07, "log_odds_chosen": 2.0835258960723877, "log_odds_ratio": -0.532872200012207, "logits/chosen": -0.9348424673080444, "logits/rejected": -1.061528205871582, "logps/chosen": -0.7875370979309082, "logps/rejected": -2.6007602214813232, "loss": 0.9318, "nll_loss": 0.9451612830162048, "rewards/accuracies": 0.75, "rewards/chosen": -0.07875371724367142, "rewards/margins": 0.18132232129573822, "rewards/rejected": -0.26007601618766785, "step": 8019 }, { "epoch": 4.892481317675767, "grad_norm": 1.6999046802520752, "learning_rate": 1.7146356399265155e-07, "log_odds_chosen": 1.8387272357940674, "log_odds_ratio": -0.341055303812027, "logits/chosen": -0.8260600566864014, "logits/rejected": -0.9107503890991211, "logps/chosen": -0.7115707397460938, "logps/rejected": -2.0466175079345703, "loss": 1.1615, "nll_loss": 1.253387212753296, "rewards/accuracies": 0.75, "rewards/chosen": -0.0711570680141449, "rewards/margins": 0.1335046887397766, "rewards/rejected": -0.2046617716550827, "step": 8020 }, { "epoch": 4.8930913527527835, "grad_norm": 2.2743935585021973, "learning_rate": 1.7048377219840783e-07, "log_odds_chosen": 2.6611738204956055, "log_odds_ratio": -0.24284029006958008, "logits/chosen": -0.6520481109619141, "logits/rejected": -0.7875388860702515, "logps/chosen": -0.5476219058036804, "logps/rejected": -2.4552111625671387, "loss": 1.1004, "nll_loss": 0.7468882203102112, "rewards/accuracies": 0.875, "rewards/chosen": -0.05476219207048416, "rewards/margins": 0.19075891375541687, "rewards/rejected": -0.24552111327648163, "step": 8021 }, { "epoch": 4.8937013878298, "grad_norm": 1.5417463779449463, "learning_rate": 1.6950398040416411e-07, "log_odds_chosen": 2.622262477874756, "log_odds_ratio": -0.28407594561576843, "logits/chosen": -0.8672608137130737, "logits/rejected": -1.00409996509552, "logps/chosen": -0.663520872592926, "logps/rejected": -2.873408794403076, "loss": 0.7751, "nll_loss": 0.8332672715187073, "rewards/accuracies": 0.875, "rewards/chosen": -0.06635208427906036, "rewards/margins": 0.22098878026008606, "rewards/rejected": -0.2873408794403076, "step": 8022 }, { "epoch": 4.894311422906817, "grad_norm": 3.3724238872528076, "learning_rate": 1.6852418860992037e-07, "log_odds_chosen": 3.6271779537200928, "log_odds_ratio": -0.1272260695695877, "logits/chosen": -0.7208096981048584, "logits/rejected": -0.8476032018661499, "logps/chosen": -0.7043965458869934, "logps/rejected": -3.5341758728027344, "loss": 0.9889, "nll_loss": 0.8341870903968811, "rewards/accuracies": 1.0, "rewards/chosen": -0.0704396590590477, "rewards/margins": 0.2829779386520386, "rewards/rejected": -0.3534175753593445, "step": 8023 }, { "epoch": 4.894921457983834, "grad_norm": 1.134701132774353, "learning_rate": 1.6754439681567668e-07, "log_odds_chosen": 2.7824630737304688, "log_odds_ratio": -0.295420378446579, "logits/chosen": -0.8622027635574341, "logits/rejected": -1.056699275970459, "logps/chosen": -0.6257349252700806, "logps/rejected": -2.6819007396698, "loss": 0.7503, "nll_loss": 0.7764730453491211, "rewards/accuracies": 0.875, "rewards/chosen": -0.06257349252700806, "rewards/margins": 0.20561659336090088, "rewards/rejected": -0.26819008588790894, "step": 8024 }, { "epoch": 4.895531493060851, "grad_norm": 1.2213910818099976, "learning_rate": 1.6656460502143293e-07, "log_odds_chosen": 5.226396560668945, "log_odds_ratio": -0.17758427560329437, "logits/chosen": -0.8217883110046387, "logits/rejected": -1.0522511005401611, "logps/chosen": -0.8682523369789124, "logps/rejected": -5.493504047393799, "loss": 0.9207, "nll_loss": 0.892353355884552, "rewards/accuracies": 0.875, "rewards/chosen": -0.08682523667812347, "rewards/margins": 0.4625251591205597, "rewards/rejected": -0.549350380897522, "step": 8025 }, { "epoch": 4.896141528137868, "grad_norm": 1.2864422798156738, "learning_rate": 1.655848132271892e-07, "log_odds_chosen": 2.2912657260894775, "log_odds_ratio": -0.5505640506744385, "logits/chosen": -0.9426776170730591, "logits/rejected": -0.9243582487106323, "logps/chosen": -0.8058496117591858, "logps/rejected": -2.6422178745269775, "loss": 1.1592, "nll_loss": 1.1135382652282715, "rewards/accuracies": 0.5, "rewards/chosen": -0.08058495819568634, "rewards/margins": 0.1836368292570114, "rewards/rejected": -0.26422178745269775, "step": 8026 }, { "epoch": 4.896751563214885, "grad_norm": 1.7586617469787598, "learning_rate": 1.646050214329455e-07, "log_odds_chosen": 1.785265326499939, "log_odds_ratio": -0.2500705420970917, "logits/chosen": -0.8725576400756836, "logits/rejected": -1.0216039419174194, "logps/chosen": -0.8406589031219482, "logps/rejected": -2.1099371910095215, "loss": 1.151, "nll_loss": 1.0267685651779175, "rewards/accuracies": 1.0, "rewards/chosen": -0.08406589180231094, "rewards/margins": 0.12692782282829285, "rewards/rejected": -0.2109937071800232, "step": 8027 }, { "epoch": 4.897361598291901, "grad_norm": 1.9741365909576416, "learning_rate": 1.6362522963870177e-07, "log_odds_chosen": 1.3796544075012207, "log_odds_ratio": -0.3635466992855072, "logits/chosen": -0.7919059991836548, "logits/rejected": -0.8739386796951294, "logps/chosen": -0.8469904661178589, "logps/rejected": -1.9610517024993896, "loss": 1.2475, "nll_loss": 1.1072816848754883, "rewards/accuracies": 0.75, "rewards/chosen": -0.08469904214143753, "rewards/margins": 0.1114061251282692, "rewards/rejected": -0.19610516726970673, "step": 8028 }, { "epoch": 4.897971633368918, "grad_norm": 1.1410882472991943, "learning_rate": 1.6264543784445806e-07, "log_odds_chosen": 2.793661594390869, "log_odds_ratio": -0.3339094817638397, "logits/chosen": -0.8426726460456848, "logits/rejected": -0.9033116102218628, "logps/chosen": -0.7588716745376587, "logps/rejected": -3.058959484100342, "loss": 1.0867, "nll_loss": 0.8898425698280334, "rewards/accuracies": 0.875, "rewards/chosen": -0.07588717341423035, "rewards/margins": 0.23000876605510712, "rewards/rejected": -0.30589592456817627, "step": 8029 }, { "epoch": 4.898581668445936, "grad_norm": 1.49451744556427, "learning_rate": 1.616656460502143e-07, "log_odds_chosen": 1.7400487661361694, "log_odds_ratio": -0.30857405066490173, "logits/chosen": -0.8657026886940002, "logits/rejected": -0.9423550367355347, "logps/chosen": -0.7887495756149292, "logps/rejected": -2.0231547355651855, "loss": 1.0601, "nll_loss": 0.960124135017395, "rewards/accuracies": 0.875, "rewards/chosen": -0.07887496799230576, "rewards/margins": 0.12344051897525787, "rewards/rejected": -0.20231547951698303, "step": 8030 }, { "epoch": 4.899191703522953, "grad_norm": 1.0794156789779663, "learning_rate": 1.606858542559706e-07, "log_odds_chosen": 2.3999111652374268, "log_odds_ratio": -0.3356631398200989, "logits/chosen": -0.8822687864303589, "logits/rejected": -1.01401948928833, "logps/chosen": -0.7044139504432678, "logps/rejected": -2.4748101234436035, "loss": 0.9575, "nll_loss": 0.8042275905609131, "rewards/accuracies": 0.875, "rewards/chosen": -0.07044139504432678, "rewards/margins": 0.17703963816165924, "rewards/rejected": -0.24748101830482483, "step": 8031 }, { "epoch": 4.89980173859997, "grad_norm": 1.3321995735168457, "learning_rate": 1.5970606246172687e-07, "log_odds_chosen": 3.894287586212158, "log_odds_ratio": -0.4194745421409607, "logits/chosen": -0.7254637479782104, "logits/rejected": -0.9848751425743103, "logps/chosen": -0.7014949917793274, "logps/rejected": -3.962245225906372, "loss": 0.9286, "nll_loss": 0.8566856384277344, "rewards/accuracies": 0.75, "rewards/chosen": -0.0701494961977005, "rewards/margins": 0.32607501745224, "rewards/rejected": -0.3962244987487793, "step": 8032 }, { "epoch": 4.9004117736769865, "grad_norm": 1.6693487167358398, "learning_rate": 1.5872627066748315e-07, "log_odds_chosen": 3.8037807941436768, "log_odds_ratio": -0.2594631016254425, "logits/chosen": -0.7813262343406677, "logits/rejected": -1.0835386514663696, "logps/chosen": -0.45176172256469727, "logps/rejected": -3.3998944759368896, "loss": 0.9102, "nll_loss": 0.6106528043746948, "rewards/accuracies": 0.875, "rewards/chosen": -0.04517617076635361, "rewards/margins": 0.2948133051395416, "rewards/rejected": -0.33998945355415344, "step": 8033 }, { "epoch": 4.901021808754003, "grad_norm": 2.184938669204712, "learning_rate": 1.5774647887323943e-07, "log_odds_chosen": 2.385326623916626, "log_odds_ratio": -0.24623513221740723, "logits/chosen": -0.8596729040145874, "logits/rejected": -0.9964326620101929, "logps/chosen": -0.6050690412521362, "logps/rejected": -2.2349977493286133, "loss": 1.1164, "nll_loss": 1.0680623054504395, "rewards/accuracies": 0.875, "rewards/chosen": -0.060506902635097504, "rewards/margins": 0.16299286484718323, "rewards/rejected": -0.22349977493286133, "step": 8034 }, { "epoch": 4.90163184383102, "grad_norm": 6.50853967666626, "learning_rate": 1.5676668707899572e-07, "log_odds_chosen": 1.2861286401748657, "log_odds_ratio": -0.5635864734649658, "logits/chosen": -0.7422875165939331, "logits/rejected": -0.6713458895683289, "logps/chosen": -1.0247992277145386, "logps/rejected": -2.191450595855713, "loss": 1.1703, "nll_loss": 1.0833585262298584, "rewards/accuracies": 0.75, "rewards/chosen": -0.10247993469238281, "rewards/margins": 0.11666514724493027, "rewards/rejected": -0.21914507448673248, "step": 8035 }, { "epoch": 4.902241878908037, "grad_norm": 1.331533670425415, "learning_rate": 1.5578689528475197e-07, "log_odds_chosen": 2.744236707687378, "log_odds_ratio": -0.23297296464443207, "logits/chosen": -0.9293718338012695, "logits/rejected": -0.9034439921379089, "logps/chosen": -0.6299303770065308, "logps/rejected": -2.743305206298828, "loss": 0.8294, "nll_loss": 0.7461444735527039, "rewards/accuracies": 0.75, "rewards/chosen": -0.06299303472042084, "rewards/margins": 0.21133747696876526, "rewards/rejected": -0.2743305265903473, "step": 8036 }, { "epoch": 4.902851913985054, "grad_norm": 0.94939786195755, "learning_rate": 1.5480710349050828e-07, "log_odds_chosen": 0.7488800287246704, "log_odds_ratio": -0.6258726119995117, "logits/chosen": -0.8944913148880005, "logits/rejected": -1.028982400894165, "logps/chosen": -0.8165650367736816, "logps/rejected": -1.2935030460357666, "loss": 0.981, "nll_loss": 0.9201055765151978, "rewards/accuracies": 0.5, "rewards/chosen": -0.08165650069713593, "rewards/margins": 0.04769380763173103, "rewards/rejected": -0.12935030460357666, "step": 8037 }, { "epoch": 4.903461949062071, "grad_norm": 4.88320779800415, "learning_rate": 1.5382731169626453e-07, "log_odds_chosen": 0.6934231519699097, "log_odds_ratio": -0.4687873125076294, "logits/chosen": -1.0787460803985596, "logits/rejected": -1.0916551351547241, "logps/chosen": -0.7935004234313965, "logps/rejected": -1.2934396266937256, "loss": 1.059, "nll_loss": 1.0098047256469727, "rewards/accuracies": 0.875, "rewards/chosen": -0.079350046813488, "rewards/margins": 0.04999392479658127, "rewards/rejected": -0.12934397161006927, "step": 8038 }, { "epoch": 4.904071984139088, "grad_norm": 2.83353590965271, "learning_rate": 1.5284751990202084e-07, "log_odds_chosen": 2.754180431365967, "log_odds_ratio": -0.32408106327056885, "logits/chosen": -0.8476375341415405, "logits/rejected": -0.9555243253707886, "logps/chosen": -0.7240455150604248, "logps/rejected": -2.979874849319458, "loss": 1.073, "nll_loss": 0.9346308708190918, "rewards/accuracies": 0.75, "rewards/chosen": -0.07240456342697144, "rewards/margins": 0.22558295726776123, "rewards/rejected": -0.29798752069473267, "step": 8039 }, { "epoch": 4.904682019216105, "grad_norm": 1.9187331199645996, "learning_rate": 1.518677281077771e-07, "log_odds_chosen": 3.247860908508301, "log_odds_ratio": -0.15102000534534454, "logits/chosen": -1.06882905960083, "logits/rejected": -1.2205573320388794, "logps/chosen": -1.0119754076004028, "logps/rejected": -3.7970573902130127, "loss": 1.0881, "nll_loss": 1.3839126825332642, "rewards/accuracies": 0.875, "rewards/chosen": -0.10119754076004028, "rewards/margins": 0.27850818634033203, "rewards/rejected": -0.3797057271003723, "step": 8040 }, { "epoch": 4.905292054293122, "grad_norm": 5.186366081237793, "learning_rate": 1.5088793631353335e-07, "log_odds_chosen": 1.758101224899292, "log_odds_ratio": -0.5154037475585938, "logits/chosen": -0.9910889863967896, "logits/rejected": -1.0180468559265137, "logps/chosen": -0.7775888442993164, "logps/rejected": -1.854841709136963, "loss": 0.993, "nll_loss": 1.0762434005737305, "rewards/accuracies": 0.5, "rewards/chosen": -0.07775888592004776, "rewards/margins": 0.10772529244422913, "rewards/rejected": -0.1854841709136963, "step": 8041 }, { "epoch": 4.905902089370139, "grad_norm": 1.9835973978042603, "learning_rate": 1.4990814451928966e-07, "log_odds_chosen": 1.372774362564087, "log_odds_ratio": -0.3665200173854828, "logits/chosen": -0.8367521166801453, "logits/rejected": -0.9994723796844482, "logps/chosen": -0.8740974068641663, "logps/rejected": -1.9381695985794067, "loss": 1.0696, "nll_loss": 1.0710821151733398, "rewards/accuracies": 1.0, "rewards/chosen": -0.08740974217653275, "rewards/margins": 0.10640721768140793, "rewards/rejected": -0.19381697475910187, "step": 8042 }, { "epoch": 4.906512124447156, "grad_norm": 1.458093523979187, "learning_rate": 1.489283527250459e-07, "log_odds_chosen": 2.9300050735473633, "log_odds_ratio": -0.34715333580970764, "logits/chosen": -0.855404257774353, "logits/rejected": -1.0065948963165283, "logps/chosen": -0.6159842014312744, "logps/rejected": -2.711472511291504, "loss": 0.9234, "nll_loss": 0.9975930452346802, "rewards/accuracies": 0.75, "rewards/chosen": -0.06159842386841774, "rewards/margins": 0.20954883098602295, "rewards/rejected": -0.2711472511291504, "step": 8043 }, { "epoch": 4.907122159524173, "grad_norm": 1.332589864730835, "learning_rate": 1.479485609308022e-07, "log_odds_chosen": 2.929659605026245, "log_odds_ratio": -0.20512732863426208, "logits/chosen": -1.0078306198120117, "logits/rejected": -1.2162582874298096, "logps/chosen": -0.5806688666343689, "logps/rejected": -2.751103639602661, "loss": 1.1178, "nll_loss": 1.1340993642807007, "rewards/accuracies": 1.0, "rewards/chosen": -0.05806688591837883, "rewards/margins": 0.2170434594154358, "rewards/rejected": -0.2751103639602661, "step": 8044 }, { "epoch": 4.90773219460119, "grad_norm": 1.8697867393493652, "learning_rate": 1.4696876913655847e-07, "log_odds_chosen": 2.594058036804199, "log_odds_ratio": -0.27703413367271423, "logits/chosen": -0.7796301245689392, "logits/rejected": -0.813825786113739, "logps/chosen": -0.5777037739753723, "logps/rejected": -2.530702829360962, "loss": 1.0501, "nll_loss": 0.9916942119598389, "rewards/accuracies": 0.875, "rewards/chosen": -0.05777037888765335, "rewards/margins": 0.19529989361763, "rewards/rejected": -0.25307026505470276, "step": 8045 }, { "epoch": 4.908342229678206, "grad_norm": 7.218766689300537, "learning_rate": 1.4598897734231475e-07, "log_odds_chosen": 0.3254505693912506, "log_odds_ratio": -0.8651450276374817, "logits/chosen": -0.830297589302063, "logits/rejected": -0.7982337474822998, "logps/chosen": -1.2603733539581299, "logps/rejected": -1.6145234107971191, "loss": 1.1895, "nll_loss": 1.2648019790649414, "rewards/accuracies": 0.5, "rewards/chosen": -0.1260373443365097, "rewards/margins": 0.035415008664131165, "rewards/rejected": -0.16145235300064087, "step": 8046 }, { "epoch": 4.908952264755223, "grad_norm": 2.1799755096435547, "learning_rate": 1.4500918554807104e-07, "log_odds_chosen": 0.48634904623031616, "log_odds_ratio": -0.5727524757385254, "logits/chosen": -0.876953125, "logits/rejected": -0.93625807762146, "logps/chosen": -0.8574950695037842, "logps/rejected": -1.1959937810897827, "loss": 1.0373, "nll_loss": 1.1537425518035889, "rewards/accuracies": 0.625, "rewards/chosen": -0.08574950695037842, "rewards/margins": 0.03384987264871597, "rewards/rejected": -0.11959938704967499, "step": 8047 }, { "epoch": 4.90956229983224, "grad_norm": 1.440752625465393, "learning_rate": 1.4402939375382732e-07, "log_odds_chosen": 1.8579847812652588, "log_odds_ratio": -0.28055909276008606, "logits/chosen": -0.9061461091041565, "logits/rejected": -0.9159742593765259, "logps/chosen": -0.7054007649421692, "logps/rejected": -2.011080503463745, "loss": 0.9916, "nll_loss": 0.8608787059783936, "rewards/accuracies": 0.875, "rewards/chosen": -0.07054007053375244, "rewards/margins": 0.1305679827928543, "rewards/rejected": -0.20110805332660675, "step": 8048 }, { "epoch": 4.910172334909257, "grad_norm": 2.968848466873169, "learning_rate": 1.4304960195958357e-07, "log_odds_chosen": 1.4306151866912842, "log_odds_ratio": -0.47796887159347534, "logits/chosen": -0.5989517569541931, "logits/rejected": -0.9286920428276062, "logps/chosen": -0.8135720491409302, "logps/rejected": -1.8875192403793335, "loss": 1.0053, "nll_loss": 0.9824705123901367, "rewards/accuracies": 0.75, "rewards/chosen": -0.0813572108745575, "rewards/margins": 0.10739471018314362, "rewards/rejected": -0.1887519210577011, "step": 8049 }, { "epoch": 4.910782369986274, "grad_norm": 4.443348407745361, "learning_rate": 1.4206981016533985e-07, "log_odds_chosen": 2.0463640689849854, "log_odds_ratio": -0.41273894906044006, "logits/chosen": -0.9350807070732117, "logits/rejected": -1.031313180923462, "logps/chosen": -0.9178441762924194, "logps/rejected": -2.6439642906188965, "loss": 0.9145, "nll_loss": 1.133643627166748, "rewards/accuracies": 0.75, "rewards/chosen": -0.09178441762924194, "rewards/margins": 0.1726120412349701, "rewards/rejected": -0.26439642906188965, "step": 8050 }, { "epoch": 4.911392405063291, "grad_norm": 4.812408447265625, "learning_rate": 1.4109001837109613e-07, "log_odds_chosen": 0.39455488324165344, "log_odds_ratio": -0.5761667490005493, "logits/chosen": -1.0946295261383057, "logits/rejected": -1.0325273275375366, "logps/chosen": -1.0022927522659302, "logps/rejected": -1.3228193521499634, "loss": 1.1468, "nll_loss": 1.087850570678711, "rewards/accuracies": 0.625, "rewards/chosen": -0.10022927820682526, "rewards/margins": 0.0320526584982872, "rewards/rejected": -0.13228192925453186, "step": 8051 }, { "epoch": 4.912002440140308, "grad_norm": 7.969082832336426, "learning_rate": 1.4011022657685241e-07, "log_odds_chosen": 2.5171103477478027, "log_odds_ratio": -0.3206479847431183, "logits/chosen": -0.8480558395385742, "logits/rejected": -0.9649363160133362, "logps/chosen": -0.7923095226287842, "logps/rejected": -2.9125967025756836, "loss": 1.0063, "nll_loss": 1.025747537612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.07923095673322678, "rewards/margins": 0.21202871203422546, "rewards/rejected": -0.29125964641571045, "step": 8052 }, { "epoch": 4.912612475217325, "grad_norm": 1.34501051902771, "learning_rate": 1.391304347826087e-07, "log_odds_chosen": 2.4889028072357178, "log_odds_ratio": -0.19681301712989807, "logits/chosen": -0.7818703055381775, "logits/rejected": -0.8266960978507996, "logps/chosen": -0.8182854652404785, "logps/rejected": -2.8819122314453125, "loss": 1.0982, "nll_loss": 0.9852404594421387, "rewards/accuracies": 1.0, "rewards/chosen": -0.0818285420536995, "rewards/margins": 0.20636266469955444, "rewards/rejected": -0.28819119930267334, "step": 8053 }, { "epoch": 4.913222510294342, "grad_norm": 1.8039915561676025, "learning_rate": 1.3815064298836495e-07, "log_odds_chosen": 1.6198651790618896, "log_odds_ratio": -0.33421677350997925, "logits/chosen": -0.9160902500152588, "logits/rejected": -1.0638115406036377, "logps/chosen": -0.7893897891044617, "logps/rejected": -1.9286284446716309, "loss": 1.1735, "nll_loss": 0.8712623119354248, "rewards/accuracies": 0.875, "rewards/chosen": -0.07893898338079453, "rewards/margins": 0.11392386257648468, "rewards/rejected": -0.1928628534078598, "step": 8054 }, { "epoch": 4.913832545371359, "grad_norm": 1.7953990697860718, "learning_rate": 1.3717085119412126e-07, "log_odds_chosen": 0.6925070285797119, "log_odds_ratio": -0.6075848937034607, "logits/chosen": -1.1244263648986816, "logits/rejected": -1.1146197319030762, "logps/chosen": -0.8309925198554993, "logps/rejected": -1.370839238166809, "loss": 1.093, "nll_loss": 1.232790470123291, "rewards/accuracies": 0.5, "rewards/chosen": -0.08309925347566605, "rewards/margins": 0.05398467928171158, "rewards/rejected": -0.13708391785621643, "step": 8055 }, { "epoch": 4.914442580448376, "grad_norm": 7.348334789276123, "learning_rate": 1.361910593998775e-07, "log_odds_chosen": 1.7774291038513184, "log_odds_ratio": -0.27867984771728516, "logits/chosen": -0.9679927229881287, "logits/rejected": -0.9523769021034241, "logps/chosen": -0.8051459789276123, "logps/rejected": -2.082792282104492, "loss": 1.0914, "nll_loss": 0.8700013756752014, "rewards/accuracies": 0.875, "rewards/chosen": -0.08051459491252899, "rewards/margins": 0.12776465713977814, "rewards/rejected": -0.20827926695346832, "step": 8056 }, { "epoch": 4.915052615525393, "grad_norm": 2.087355613708496, "learning_rate": 1.352112676056338e-07, "log_odds_chosen": 1.5020225048065186, "log_odds_ratio": -0.46056878566741943, "logits/chosen": -1.1413593292236328, "logits/rejected": -1.2177106142044067, "logps/chosen": -0.9708086252212524, "logps/rejected": -2.14736008644104, "loss": 1.2767, "nll_loss": 1.434731364250183, "rewards/accuracies": 0.75, "rewards/chosen": -0.09708087146282196, "rewards/margins": 0.11765516549348831, "rewards/rejected": -0.21473604440689087, "step": 8057 }, { "epoch": 4.9156626506024095, "grad_norm": 1.2567967176437378, "learning_rate": 1.3423147581139007e-07, "log_odds_chosen": 1.660783052444458, "log_odds_ratio": -0.37300607562065125, "logits/chosen": -1.1249104738235474, "logits/rejected": -1.195507287979126, "logps/chosen": -0.8906373381614685, "logps/rejected": -2.165705680847168, "loss": 1.1137, "nll_loss": 1.0413035154342651, "rewards/accuracies": 0.75, "rewards/chosen": -0.08906374126672745, "rewards/margins": 0.127506822347641, "rewards/rejected": -0.21657055616378784, "step": 8058 }, { "epoch": 4.916272685679426, "grad_norm": 8.102142333984375, "learning_rate": 1.3325168401714636e-07, "log_odds_chosen": 1.7247167825698853, "log_odds_ratio": -0.3286742568016052, "logits/chosen": -0.8698219060897827, "logits/rejected": -1.0100266933441162, "logps/chosen": -0.9408023953437805, "logps/rejected": -2.4015793800354004, "loss": 1.0108, "nll_loss": 1.046173334121704, "rewards/accuracies": 0.875, "rewards/chosen": -0.09408023953437805, "rewards/margins": 0.1460776925086975, "rewards/rejected": -0.24015793204307556, "step": 8059 }, { "epoch": 4.916882720756443, "grad_norm": 1.6168993711471558, "learning_rate": 1.3227189222290264e-07, "log_odds_chosen": 4.116544723510742, "log_odds_ratio": -0.305930495262146, "logits/chosen": -0.90802401304245, "logits/rejected": -1.095912218093872, "logps/chosen": -0.6305565237998962, "logps/rejected": -4.096802234649658, "loss": 1.1306, "nll_loss": 0.8285863995552063, "rewards/accuracies": 0.875, "rewards/chosen": -0.06305565685033798, "rewards/margins": 0.34662458300590515, "rewards/rejected": -0.40968021750450134, "step": 8060 }, { "epoch": 4.917492755833461, "grad_norm": 6.126885890960693, "learning_rate": 1.312921004286589e-07, "log_odds_chosen": 2.9208760261535645, "log_odds_ratio": -0.2994197607040405, "logits/chosen": -0.8816589713096619, "logits/rejected": -1.1917028427124023, "logps/chosen": -0.727371096611023, "logps/rejected": -3.176018714904785, "loss": 0.9831, "nll_loss": 1.0456002950668335, "rewards/accuracies": 0.875, "rewards/chosen": -0.07273711264133453, "rewards/margins": 0.24486473202705383, "rewards/rejected": -0.31760185956954956, "step": 8061 }, { "epoch": 4.918102790910478, "grad_norm": 3.419072151184082, "learning_rate": 1.3031230863441517e-07, "log_odds_chosen": 1.7556836605072021, "log_odds_ratio": -0.4261629283428192, "logits/chosen": -0.757921576499939, "logits/rejected": -0.7686182260513306, "logps/chosen": -0.7072568535804749, "logps/rejected": -2.1476781368255615, "loss": 0.8897, "nll_loss": 0.9714828133583069, "rewards/accuracies": 0.625, "rewards/chosen": -0.0707256942987442, "rewards/margins": 0.14404210448265076, "rewards/rejected": -0.21476781368255615, "step": 8062 }, { "epoch": 4.918712825987495, "grad_norm": 2.279811143875122, "learning_rate": 1.2933251684017145e-07, "log_odds_chosen": 1.6083741188049316, "log_odds_ratio": -0.4148341417312622, "logits/chosen": -0.7972503900527954, "logits/rejected": -0.9401538372039795, "logps/chosen": -0.49788424372673035, "logps/rejected": -1.4011201858520508, "loss": 0.8504, "nll_loss": 0.7292340397834778, "rewards/accuracies": 0.75, "rewards/chosen": -0.049788422882556915, "rewards/margins": 0.09032359719276428, "rewards/rejected": -0.1401120126247406, "step": 8063 }, { "epoch": 4.919322861064511, "grad_norm": 8.380013465881348, "learning_rate": 1.2835272504592773e-07, "log_odds_chosen": 1.312157392501831, "log_odds_ratio": -0.3901550769805908, "logits/chosen": -1.062972068786621, "logits/rejected": -0.9476444721221924, "logps/chosen": -1.052628993988037, "logps/rejected": -2.085941791534424, "loss": 0.941, "nll_loss": 1.121147632598877, "rewards/accuracies": 0.875, "rewards/chosen": -0.105262890458107, "rewards/margins": 0.1033312976360321, "rewards/rejected": -0.2085941880941391, "step": 8064 }, { "epoch": 4.919932896141528, "grad_norm": 1.606107234954834, "learning_rate": 1.2737293325168402e-07, "log_odds_chosen": 1.279102087020874, "log_odds_ratio": -0.5582977533340454, "logits/chosen": -0.7814134955406189, "logits/rejected": -0.774791955947876, "logps/chosen": -0.8439671397209167, "logps/rejected": -1.8191709518432617, "loss": 1.0032, "nll_loss": 1.0126912593841553, "rewards/accuracies": 0.625, "rewards/chosen": -0.08439671993255615, "rewards/margins": 0.0975203812122345, "rewards/rejected": -0.18191708624362946, "step": 8065 }, { "epoch": 4.920542931218545, "grad_norm": 2.9469528198242188, "learning_rate": 1.263931414574403e-07, "log_odds_chosen": 2.177555561065674, "log_odds_ratio": -0.3981321454048157, "logits/chosen": -0.9157693386077881, "logits/rejected": -1.0616499185562134, "logps/chosen": -0.899818480014801, "logps/rejected": -2.5755395889282227, "loss": 1.0772, "nll_loss": 0.92057204246521, "rewards/accuracies": 0.75, "rewards/chosen": -0.08998185396194458, "rewards/margins": 0.16757211089134216, "rewards/rejected": -0.25755396485328674, "step": 8066 }, { "epoch": 4.921152966295562, "grad_norm": 1.8117574453353882, "learning_rate": 1.2541334966319655e-07, "log_odds_chosen": 2.1269290447235107, "log_odds_ratio": -0.25689825415611267, "logits/chosen": -0.8097493648529053, "logits/rejected": -1.0583198070526123, "logps/chosen": -0.8866117000579834, "logps/rejected": -2.5849769115448, "loss": 1.0813, "nll_loss": 1.0516738891601562, "rewards/accuracies": 0.875, "rewards/chosen": -0.08866116404533386, "rewards/margins": 0.16983650624752045, "rewards/rejected": -0.2584976851940155, "step": 8067 }, { "epoch": 4.921763001372579, "grad_norm": 11.513056755065918, "learning_rate": 1.2443355786895286e-07, "log_odds_chosen": 1.3799282312393188, "log_odds_ratio": -0.5672523975372314, "logits/chosen": -0.8168063163757324, "logits/rejected": -1.0073792934417725, "logps/chosen": -0.9641589522361755, "logps/rejected": -1.9228737354278564, "loss": 1.1462, "nll_loss": 0.9365000128746033, "rewards/accuracies": 0.625, "rewards/chosen": -0.09641589224338531, "rewards/margins": 0.09587149322032928, "rewards/rejected": -0.1922873854637146, "step": 8068 }, { "epoch": 4.922373036449596, "grad_norm": 1.354583501815796, "learning_rate": 1.234537660747091e-07, "log_odds_chosen": 1.7587376832962036, "log_odds_ratio": -0.3320380449295044, "logits/chosen": -0.8790115118026733, "logits/rejected": -0.9199223518371582, "logps/chosen": -0.6517305374145508, "logps/rejected": -1.9235310554504395, "loss": 1.0713, "nll_loss": 0.7535836100578308, "rewards/accuracies": 0.875, "rewards/chosen": -0.06517305970191956, "rewards/margins": 0.1271800547838211, "rewards/rejected": -0.19235311448574066, "step": 8069 }, { "epoch": 4.9229830715266125, "grad_norm": 6.889892101287842, "learning_rate": 1.224739742804654e-07, "log_odds_chosen": 2.117004632949829, "log_odds_ratio": -0.3051331043243408, "logits/chosen": -1.0072126388549805, "logits/rejected": -1.1285717487335205, "logps/chosen": -0.8503621220588684, "logps/rejected": -2.5121891498565674, "loss": 1.0984, "nll_loss": 1.1707826852798462, "rewards/accuracies": 0.875, "rewards/chosen": -0.08503621816635132, "rewards/margins": 0.16618268191814423, "rewards/rejected": -0.25121891498565674, "step": 8070 }, { "epoch": 4.923593106603629, "grad_norm": 1.2708300352096558, "learning_rate": 1.2149418248622168e-07, "log_odds_chosen": 2.831171989440918, "log_odds_ratio": -0.1434555947780609, "logits/chosen": -0.7422958016395569, "logits/rejected": -0.8110042810440063, "logps/chosen": -0.5716086626052856, "logps/rejected": -2.596585750579834, "loss": 0.9756, "nll_loss": 0.7857611775398254, "rewards/accuracies": 1.0, "rewards/chosen": -0.0571608692407608, "rewards/margins": 0.2024977058172226, "rewards/rejected": -0.2596585750579834, "step": 8071 }, { "epoch": 4.924203141680646, "grad_norm": 3.78826904296875, "learning_rate": 1.2051439069197796e-07, "log_odds_chosen": 0.7795039415359497, "log_odds_ratio": -0.6269533634185791, "logits/chosen": -0.929341197013855, "logits/rejected": -0.8838298916816711, "logps/chosen": -0.9391217827796936, "logps/rejected": -1.4521090984344482, "loss": 1.0242, "nll_loss": 1.0615818500518799, "rewards/accuracies": 0.625, "rewards/chosen": -0.09391218423843384, "rewards/margins": 0.051298726350069046, "rewards/rejected": -0.14521090686321259, "step": 8072 }, { "epoch": 4.924813176757664, "grad_norm": 12.331427574157715, "learning_rate": 1.195345988977342e-07, "log_odds_chosen": 3.819281578063965, "log_odds_ratio": -0.0846620723605156, "logits/chosen": -0.8227649927139282, "logits/rejected": -1.1175215244293213, "logps/chosen": -0.7332183122634888, "logps/rejected": -3.8343582153320312, "loss": 1.0182, "nll_loss": 0.957433819770813, "rewards/accuracies": 1.0, "rewards/chosen": -0.07332184165716171, "rewards/margins": 0.3101139962673187, "rewards/rejected": -0.38343584537506104, "step": 8073 }, { "epoch": 4.925423211834681, "grad_norm": 2.3669066429138184, "learning_rate": 1.185548071034905e-07, "log_odds_chosen": 3.1509361267089844, "log_odds_ratio": -0.18245096504688263, "logits/chosen": -0.7298880219459534, "logits/rejected": -1.0426485538482666, "logps/chosen": -0.5140491724014282, "logps/rejected": -2.8469462394714355, "loss": 1.006, "nll_loss": 0.6770891547203064, "rewards/accuracies": 0.875, "rewards/chosen": -0.0514049157500267, "rewards/margins": 0.2332897186279297, "rewards/rejected": -0.284694641828537, "step": 8074 }, { "epoch": 4.926033246911698, "grad_norm": 1.6148895025253296, "learning_rate": 1.1757501530924679e-07, "log_odds_chosen": 1.1328665018081665, "log_odds_ratio": -0.5132092237472534, "logits/chosen": -0.6953338384628296, "logits/rejected": -0.9270175099372864, "logps/chosen": -0.7617492079734802, "logps/rejected": -1.5301697254180908, "loss": 0.962, "nll_loss": 0.9492601752281189, "rewards/accuracies": 0.625, "rewards/chosen": -0.07617492228746414, "rewards/margins": 0.0768420547246933, "rewards/rejected": -0.15301696956157684, "step": 8075 }, { "epoch": 4.9266432819887145, "grad_norm": 1.4833312034606934, "learning_rate": 1.1659522351500305e-07, "log_odds_chosen": 2.303248405456543, "log_odds_ratio": -0.32492244243621826, "logits/chosen": -0.9474666714668274, "logits/rejected": -0.970318078994751, "logps/chosen": -0.7926133275032043, "logps/rejected": -2.673591375350952, "loss": 1.0809, "nll_loss": 0.9262155294418335, "rewards/accuracies": 0.875, "rewards/chosen": -0.07926133275032043, "rewards/margins": 0.18809780478477478, "rewards/rejected": -0.2673591375350952, "step": 8076 }, { "epoch": 4.927253317065731, "grad_norm": 1.9485831260681152, "learning_rate": 1.1561543172075932e-07, "log_odds_chosen": 2.962324380874634, "log_odds_ratio": -0.20501646399497986, "logits/chosen": -0.9164007902145386, "logits/rejected": -1.015272855758667, "logps/chosen": -0.6912388801574707, "logps/rejected": -2.978870391845703, "loss": 1.0496, "nll_loss": 0.8918241262435913, "rewards/accuracies": 0.875, "rewards/chosen": -0.06912388652563095, "rewards/margins": 0.2287631332874298, "rewards/rejected": -0.29788702726364136, "step": 8077 }, { "epoch": 4.927863352142748, "grad_norm": 12.074275016784668, "learning_rate": 1.146356399265156e-07, "log_odds_chosen": 3.245762825012207, "log_odds_ratio": -0.26613807678222656, "logits/chosen": -0.7366045713424683, "logits/rejected": -0.9036133289337158, "logps/chosen": -0.7016245126724243, "logps/rejected": -3.2465922832489014, "loss": 1.0209, "nll_loss": 1.051310420036316, "rewards/accuracies": 0.875, "rewards/chosen": -0.07016245275735855, "rewards/margins": 0.2544967830181122, "rewards/rejected": -0.32465922832489014, "step": 8078 }, { "epoch": 4.928473387219765, "grad_norm": 1.8059513568878174, "learning_rate": 1.1365584813227188e-07, "log_odds_chosen": 3.2702858448028564, "log_odds_ratio": -0.1783711314201355, "logits/chosen": -0.7860110402107239, "logits/rejected": -0.8815616369247437, "logps/chosen": -0.5593660473823547, "logps/rejected": -3.0303821563720703, "loss": 0.8428, "nll_loss": 0.7665698528289795, "rewards/accuracies": 0.875, "rewards/chosen": -0.055936604738235474, "rewards/margins": 0.24710163474082947, "rewards/rejected": -0.30303823947906494, "step": 8079 }, { "epoch": 4.929083422296782, "grad_norm": 1.5681554079055786, "learning_rate": 1.1267605633802817e-07, "log_odds_chosen": 2.6364622116088867, "log_odds_ratio": -0.37666451930999756, "logits/chosen": -0.8378797173500061, "logits/rejected": -0.9902462959289551, "logps/chosen": -0.7573463916778564, "logps/rejected": -3.0087084770202637, "loss": 1.0076, "nll_loss": 1.0956459045410156, "rewards/accuracies": 0.75, "rewards/chosen": -0.07573464512825012, "rewards/margins": 0.2251361906528473, "rewards/rejected": -0.3008708357810974, "step": 8080 }, { "epoch": 4.929693457373799, "grad_norm": 2.4714794158935547, "learning_rate": 1.1169626454378445e-07, "log_odds_chosen": 2.954174518585205, "log_odds_ratio": -0.16425693035125732, "logits/chosen": -0.7444026470184326, "logits/rejected": -0.8964889645576477, "logps/chosen": -0.3865267038345337, "logps/rejected": -2.3647146224975586, "loss": 1.0351, "nll_loss": 0.7419257164001465, "rewards/accuracies": 1.0, "rewards/chosen": -0.03865266591310501, "rewards/margins": 0.1978188008069992, "rewards/rejected": -0.23647147417068481, "step": 8081 }, { "epoch": 4.930303492450816, "grad_norm": 1.4196875095367432, "learning_rate": 1.1071647274954071e-07, "log_odds_chosen": 1.3070210218429565, "log_odds_ratio": -0.346301406621933, "logits/chosen": -0.72901850938797, "logits/rejected": -0.8650867938995361, "logps/chosen": -0.6564290523529053, "logps/rejected": -1.5112887620925903, "loss": 0.7842, "nll_loss": 0.8285704851150513, "rewards/accuracies": 0.875, "rewards/chosen": -0.06564290821552277, "rewards/margins": 0.08548596501350403, "rewards/rejected": -0.151128888130188, "step": 8082 }, { "epoch": 4.930913527527833, "grad_norm": 1.8932994604110718, "learning_rate": 1.09736680955297e-07, "log_odds_chosen": 2.7572338581085205, "log_odds_ratio": -0.45185166597366333, "logits/chosen": -0.7525114417076111, "logits/rejected": -0.8160191774368286, "logps/chosen": -0.6202290654182434, "logps/rejected": -2.8728837966918945, "loss": 0.8789, "nll_loss": 0.9059685468673706, "rewards/accuracies": 0.75, "rewards/chosen": -0.06202290579676628, "rewards/margins": 0.2252654731273651, "rewards/rejected": -0.2872883975505829, "step": 8083 }, { "epoch": 4.93152356260485, "grad_norm": 2.7050318717956543, "learning_rate": 1.0875688916105328e-07, "log_odds_chosen": 2.733880043029785, "log_odds_ratio": -0.2938230037689209, "logits/chosen": -0.8057007193565369, "logits/rejected": -0.9572409391403198, "logps/chosen": -0.6553681492805481, "logps/rejected": -2.7310516834259033, "loss": 1.075, "nll_loss": 0.8798380494117737, "rewards/accuracies": 0.75, "rewards/chosen": -0.06553681194782257, "rewards/margins": 0.20756837725639343, "rewards/rejected": -0.2731052041053772, "step": 8084 }, { "epoch": 4.932133597681867, "grad_norm": 1.62424898147583, "learning_rate": 1.0777709736680956e-07, "log_odds_chosen": 2.472200870513916, "log_odds_ratio": -0.34841763973236084, "logits/chosen": -0.7169543504714966, "logits/rejected": -0.810485303401947, "logps/chosen": -0.6488275527954102, "logps/rejected": -2.3206140995025635, "loss": 0.852, "nll_loss": 0.7884418964385986, "rewards/accuracies": 0.75, "rewards/chosen": -0.06488275527954102, "rewards/margins": 0.16717864573001862, "rewards/rejected": -0.23206140100955963, "step": 8085 }, { "epoch": 4.932743632758884, "grad_norm": 2.8135368824005127, "learning_rate": 1.0679730557256581e-07, "log_odds_chosen": 1.9462733268737793, "log_odds_ratio": -0.5067858695983887, "logits/chosen": -1.0008270740509033, "logits/rejected": -0.867734432220459, "logps/chosen": -0.6976249814033508, "logps/rejected": -2.2704617977142334, "loss": 1.0372, "nll_loss": 0.8054927587509155, "rewards/accuracies": 0.5, "rewards/chosen": -0.06976251304149628, "rewards/margins": 0.1572836935520172, "rewards/rejected": -0.2270461916923523, "step": 8086 }, { "epoch": 4.933353667835901, "grad_norm": 1.2360737323760986, "learning_rate": 1.0581751377832209e-07, "log_odds_chosen": 0.6172434687614441, "log_odds_ratio": -0.4942378103733063, "logits/chosen": -0.9818251132965088, "logits/rejected": -0.8819566369056702, "logps/chosen": -0.9342221021652222, "logps/rejected": -1.3940792083740234, "loss": 1.2395, "nll_loss": 1.4213316440582275, "rewards/accuracies": 0.75, "rewards/chosen": -0.09342221170663834, "rewards/margins": 0.04598570615053177, "rewards/rejected": -0.1394079178571701, "step": 8087 }, { "epoch": 4.9339637029129175, "grad_norm": 1.234853982925415, "learning_rate": 1.0483772198407837e-07, "log_odds_chosen": 2.3838183879852295, "log_odds_ratio": -0.49922260642051697, "logits/chosen": -0.9412310123443604, "logits/rejected": -1.050459623336792, "logps/chosen": -0.7478768229484558, "logps/rejected": -2.753941059112549, "loss": 0.8971, "nll_loss": 0.9674575328826904, "rewards/accuracies": 0.625, "rewards/chosen": -0.0747876837849617, "rewards/margins": 0.20060643553733826, "rewards/rejected": -0.27539414167404175, "step": 8088 }, { "epoch": 4.934573737989934, "grad_norm": 19.610607147216797, "learning_rate": 1.0385793018983466e-07, "log_odds_chosen": 1.541808843612671, "log_odds_ratio": -0.3706777095794678, "logits/chosen": -1.0098141431808472, "logits/rejected": -1.0948760509490967, "logps/chosen": -1.0413978099822998, "logps/rejected": -2.2414462566375732, "loss": 1.144, "nll_loss": 1.3303953409194946, "rewards/accuracies": 0.875, "rewards/chosen": -0.1041397899389267, "rewards/margins": 0.12000484764575958, "rewards/rejected": -0.22414463758468628, "step": 8089 }, { "epoch": 4.935183773066951, "grad_norm": 4.869716644287109, "learning_rate": 1.0287813839559094e-07, "log_odds_chosen": 1.426537275314331, "log_odds_ratio": -0.6372955441474915, "logits/chosen": -0.9627600908279419, "logits/rejected": -0.9889965057373047, "logps/chosen": -0.8886100649833679, "logps/rejected": -2.0523300170898438, "loss": 1.0938, "nll_loss": 1.1216946840286255, "rewards/accuracies": 0.5, "rewards/chosen": -0.08886101096868515, "rewards/margins": 0.1163720190525055, "rewards/rejected": -0.20523302257061005, "step": 8090 }, { "epoch": 4.935793808143968, "grad_norm": 3.0446760654449463, "learning_rate": 1.018983466013472e-07, "log_odds_chosen": 2.979090929031372, "log_odds_ratio": -0.15008635818958282, "logits/chosen": -0.9135371446609497, "logits/rejected": -0.9890427589416504, "logps/chosen": -0.6358428597450256, "logps/rejected": -2.9317760467529297, "loss": 1.0459, "nll_loss": 0.8688210248947144, "rewards/accuracies": 0.875, "rewards/chosen": -0.06358428299427032, "rewards/margins": 0.22959332168102264, "rewards/rejected": -0.29317760467529297, "step": 8091 }, { "epoch": 4.936403843220985, "grad_norm": 1.4841848611831665, "learning_rate": 1.0091855480710349e-07, "log_odds_chosen": 2.719189167022705, "log_odds_ratio": -0.1964622139930725, "logits/chosen": -0.9186500310897827, "logits/rejected": -0.9481037855148315, "logps/chosen": -0.6237576007843018, "logps/rejected": -2.593968152999878, "loss": 0.9399, "nll_loss": 0.8744320273399353, "rewards/accuracies": 1.0, "rewards/chosen": -0.062375757843256, "rewards/margins": 0.19702103734016418, "rewards/rejected": -0.2593967914581299, "step": 8092 }, { "epoch": 4.937013878298002, "grad_norm": 2.2775096893310547, "learning_rate": 9.993876301285977e-08, "log_odds_chosen": 0.2805701494216919, "log_odds_ratio": -0.950879693031311, "logits/chosen": -0.974581241607666, "logits/rejected": -1.0204817056655884, "logps/chosen": -1.1191589832305908, "logps/rejected": -1.6021473407745361, "loss": 1.2229, "nll_loss": 1.2621384859085083, "rewards/accuracies": 0.375, "rewards/chosen": -0.11191590130329132, "rewards/margins": 0.04829883575439453, "rewards/rejected": -0.16021475195884705, "step": 8093 }, { "epoch": 4.937623913375019, "grad_norm": 1.164614200592041, "learning_rate": 9.895897121861605e-08, "log_odds_chosen": 2.2950620651245117, "log_odds_ratio": -0.4581362307071686, "logits/chosen": -0.7997235059738159, "logits/rejected": -0.9352317452430725, "logps/chosen": -0.7815693020820618, "logps/rejected": -2.651212215423584, "loss": 1.1454, "nll_loss": 0.9681308269500732, "rewards/accuracies": 0.75, "rewards/chosen": -0.07815693318843842, "rewards/margins": 0.18696428835391998, "rewards/rejected": -0.2651212215423584, "step": 8094 }, { "epoch": 4.938233948452036, "grad_norm": 2.4478507041931152, "learning_rate": 9.797917942437232e-08, "log_odds_chosen": 1.9560843706130981, "log_odds_ratio": -0.39261314272880554, "logits/chosen": -1.044558048248291, "logits/rejected": -1.043363332748413, "logps/chosen": -0.8734118938446045, "logps/rejected": -2.4515438079833984, "loss": 1.0415, "nll_loss": 0.9543853402137756, "rewards/accuracies": 0.75, "rewards/chosen": -0.08734118938446045, "rewards/margins": 0.1578132063150406, "rewards/rejected": -0.24515439569950104, "step": 8095 }, { "epoch": 4.938843983529053, "grad_norm": 7.792159557342529, "learning_rate": 9.69993876301286e-08, "log_odds_chosen": 1.5483622550964355, "log_odds_ratio": -0.3894192576408386, "logits/chosen": -0.8875843286514282, "logits/rejected": -0.9936113953590393, "logps/chosen": -0.9482461214065552, "logps/rejected": -2.054675340652466, "loss": 0.9926, "nll_loss": 0.9185185432434082, "rewards/accuracies": 0.875, "rewards/chosen": -0.09482461214065552, "rewards/margins": 0.1106429249048233, "rewards/rejected": -0.20546753704547882, "step": 8096 }, { "epoch": 4.93945401860607, "grad_norm": 2.5534749031066895, "learning_rate": 9.601959583588486e-08, "log_odds_chosen": 3.908690929412842, "log_odds_ratio": -0.15287256240844727, "logits/chosen": -0.805385947227478, "logits/rejected": -0.8593781590461731, "logps/chosen": -0.5261445045471191, "logps/rejected": -3.677898406982422, "loss": 0.8568, "nll_loss": 0.606891393661499, "rewards/accuracies": 1.0, "rewards/chosen": -0.052614450454711914, "rewards/margins": 0.3151753842830658, "rewards/rejected": -0.3677898049354553, "step": 8097 }, { "epoch": 4.940064053683087, "grad_norm": 1.0829589366912842, "learning_rate": 9.503980404164115e-08, "log_odds_chosen": 1.899315595626831, "log_odds_ratio": -0.289758563041687, "logits/chosen": -1.031732439994812, "logits/rejected": -1.0784058570861816, "logps/chosen": -0.6157260537147522, "logps/rejected": -2.034147262573242, "loss": 0.9604, "nll_loss": 0.9215684533119202, "rewards/accuracies": 1.0, "rewards/chosen": -0.0615726076066494, "rewards/margins": 0.14184211194515228, "rewards/rejected": -0.20341473817825317, "step": 8098 }, { "epoch": 4.940674088760104, "grad_norm": 3.0604441165924072, "learning_rate": 9.406001224739743e-08, "log_odds_chosen": 2.3466644287109375, "log_odds_ratio": -0.4694845974445343, "logits/chosen": -0.9847956299781799, "logits/rejected": -1.1243537664413452, "logps/chosen": -0.7199714183807373, "logps/rejected": -2.7633652687072754, "loss": 1.0147, "nll_loss": 1.1027605533599854, "rewards/accuracies": 0.625, "rewards/chosen": -0.07199714332818985, "rewards/margins": 0.204339399933815, "rewards/rejected": -0.27633652091026306, "step": 8099 }, { "epoch": 4.941284123837121, "grad_norm": 1.7576570510864258, "learning_rate": 9.30802204531537e-08, "log_odds_chosen": 1.9075452089309692, "log_odds_ratio": -0.5323699712753296, "logits/chosen": -0.9054924249649048, "logits/rejected": -1.0482285022735596, "logps/chosen": -0.8584281206130981, "logps/rejected": -2.375898599624634, "loss": 1.1236, "nll_loss": 0.9900009036064148, "rewards/accuracies": 0.5, "rewards/chosen": -0.08584281802177429, "rewards/margins": 0.15174704790115356, "rewards/rejected": -0.23758986592292786, "step": 8100 }, { "epoch": 4.941894158914137, "grad_norm": 17.644962310791016, "learning_rate": 9.210042865890998e-08, "log_odds_chosen": 1.3131189346313477, "log_odds_ratio": -0.32914456725120544, "logits/chosen": -0.7479184865951538, "logits/rejected": -0.7894142866134644, "logps/chosen": -0.8300758600234985, "logps/rejected": -1.8505195379257202, "loss": 0.9851, "nll_loss": 0.9426140189170837, "rewards/accuracies": 1.0, "rewards/chosen": -0.08300758898258209, "rewards/margins": 0.10204437375068665, "rewards/rejected": -0.18505196273326874, "step": 8101 }, { "epoch": 4.942504193991154, "grad_norm": 1.864037036895752, "learning_rate": 9.112063686466626e-08, "log_odds_chosen": 1.6730997562408447, "log_odds_ratio": -0.515386700630188, "logits/chosen": -0.9732460379600525, "logits/rejected": -0.9721295833587646, "logps/chosen": -0.7440831661224365, "logps/rejected": -2.140719413757324, "loss": 1.0058, "nll_loss": 0.965465784072876, "rewards/accuracies": 0.5, "rewards/chosen": -0.07440831512212753, "rewards/margins": 0.13966362178325653, "rewards/rejected": -0.21407194435596466, "step": 8102 }, { "epoch": 4.943114229068171, "grad_norm": 1.7692731618881226, "learning_rate": 9.014084507042254e-08, "log_odds_chosen": 0.7762687802314758, "log_odds_ratio": -0.5260235071182251, "logits/chosen": -0.9521635174751282, "logits/rejected": -0.959466814994812, "logps/chosen": -0.6576911211013794, "logps/rejected": -1.2610536813735962, "loss": 1.1724, "nll_loss": 0.9983704090118408, "rewards/accuracies": 0.5, "rewards/chosen": -0.06576910614967346, "rewards/margins": 0.060336269438266754, "rewards/rejected": -0.1261053830385208, "step": 8103 }, { "epoch": 4.943724264145189, "grad_norm": 1.7170915603637695, "learning_rate": 8.91610532761788e-08, "log_odds_chosen": 3.6923601627349854, "log_odds_ratio": -0.24276277422904968, "logits/chosen": -0.7181012630462646, "logits/rejected": -0.9899048805236816, "logps/chosen": -0.4709233045578003, "logps/rejected": -3.222121238708496, "loss": 0.8473, "nll_loss": 0.6225235462188721, "rewards/accuracies": 0.875, "rewards/chosen": -0.04709232971072197, "rewards/margins": 0.275119811296463, "rewards/rejected": -0.3222121596336365, "step": 8104 }, { "epoch": 4.944334299222206, "grad_norm": 1.5133627653121948, "learning_rate": 8.818126148193509e-08, "log_odds_chosen": 1.3630746603012085, "log_odds_ratio": -0.5595124959945679, "logits/chosen": -0.9572005271911621, "logits/rejected": -0.9894516468048096, "logps/chosen": -0.7418142557144165, "logps/rejected": -1.7816438674926758, "loss": 1.0539, "nll_loss": 1.0203797817230225, "rewards/accuracies": 0.75, "rewards/chosen": -0.07418143004179001, "rewards/margins": 0.10398294776678085, "rewards/rejected": -0.17816437780857086, "step": 8105 }, { "epoch": 4.9449443342992225, "grad_norm": 1.6558730602264404, "learning_rate": 8.720146968769137e-08, "log_odds_chosen": 3.526581048965454, "log_odds_ratio": -0.2856079936027527, "logits/chosen": -0.9990313053131104, "logits/rejected": -1.097710371017456, "logps/chosen": -0.6590876579284668, "logps/rejected": -3.5937488079071045, "loss": 1.0268, "nll_loss": 1.0718345642089844, "rewards/accuracies": 0.875, "rewards/chosen": -0.0659087672829628, "rewards/margins": 0.29346609115600586, "rewards/rejected": -0.35937485098838806, "step": 8106 }, { "epoch": 4.945554369376239, "grad_norm": 1.2670737504959106, "learning_rate": 8.622167789344764e-08, "log_odds_chosen": 3.2356650829315186, "log_odds_ratio": -0.35089290142059326, "logits/chosen": -0.7700662612915039, "logits/rejected": -0.8642271757125854, "logps/chosen": -0.6434401273727417, "logps/rejected": -3.2216074466705322, "loss": 0.8767, "nll_loss": 0.719841718673706, "rewards/accuracies": 0.625, "rewards/chosen": -0.06434401869773865, "rewards/margins": 0.25781673192977905, "rewards/rejected": -0.3221607208251953, "step": 8107 }, { "epoch": 4.946164404453256, "grad_norm": 1.5078870058059692, "learning_rate": 8.524188609920392e-08, "log_odds_chosen": 3.824469804763794, "log_odds_ratio": -0.27503788471221924, "logits/chosen": -1.1202337741851807, "logits/rejected": -1.180031418800354, "logps/chosen": -0.7666311264038086, "logps/rejected": -3.862976551055908, "loss": 1.2015, "nll_loss": 1.1487452983856201, "rewards/accuracies": 1.0, "rewards/chosen": -0.07666311413049698, "rewards/margins": 0.3096345067024231, "rewards/rejected": -0.3862976133823395, "step": 8108 }, { "epoch": 4.946774439530273, "grad_norm": 1.4461108446121216, "learning_rate": 8.426209430496018e-08, "log_odds_chosen": 3.2873504161834717, "log_odds_ratio": -0.23352974653244019, "logits/chosen": -0.7161498069763184, "logits/rejected": -1.046065092086792, "logps/chosen": -0.6308585405349731, "logps/rejected": -3.3946938514709473, "loss": 0.9765, "nll_loss": 0.7188459634780884, "rewards/accuracies": 1.0, "rewards/chosen": -0.06308585405349731, "rewards/margins": 0.27638351917266846, "rewards/rejected": -0.33946937322616577, "step": 8109 }, { "epoch": 4.94738447460729, "grad_norm": 1.6631197929382324, "learning_rate": 8.328230251071647e-08, "log_odds_chosen": 2.4496829509735107, "log_odds_ratio": -0.3841996490955353, "logits/chosen": -1.0008336305618286, "logits/rejected": -1.2085120677947998, "logps/chosen": -0.8858809471130371, "logps/rejected": -3.082205295562744, "loss": 1.1651, "nll_loss": 1.5009242296218872, "rewards/accuracies": 0.875, "rewards/chosen": -0.08858809620141983, "rewards/margins": 0.21963244676589966, "rewards/rejected": -0.3082205653190613, "step": 8110 }, { "epoch": 4.947994509684307, "grad_norm": 1.3886414766311646, "learning_rate": 8.230251071647275e-08, "log_odds_chosen": 1.4167991876602173, "log_odds_ratio": -0.4189196228981018, "logits/chosen": -0.8624370098114014, "logits/rejected": -0.944452166557312, "logps/chosen": -0.7711682319641113, "logps/rejected": -1.7962582111358643, "loss": 1.0562, "nll_loss": 0.9208611249923706, "rewards/accuracies": 0.75, "rewards/chosen": -0.07711682468652725, "rewards/margins": 0.10250899195671082, "rewards/rejected": -0.17962580919265747, "step": 8111 }, { "epoch": 4.948604544761324, "grad_norm": 1.6519345045089722, "learning_rate": 8.132271892222903e-08, "log_odds_chosen": 2.380985736846924, "log_odds_ratio": -0.31119629740715027, "logits/chosen": -0.8667920231819153, "logits/rejected": -0.94652259349823, "logps/chosen": -0.8091065883636475, "logps/rejected": -2.6534042358398438, "loss": 1.0212, "nll_loss": 1.0476289987564087, "rewards/accuracies": 1.0, "rewards/chosen": -0.08091066777706146, "rewards/margins": 0.18442977964878082, "rewards/rejected": -0.2653404176235199, "step": 8112 }, { "epoch": 4.9492145798383405, "grad_norm": 5.105556964874268, "learning_rate": 8.03429271279853e-08, "log_odds_chosen": 2.4806504249572754, "log_odds_ratio": -0.34974801540374756, "logits/chosen": -0.7214344143867493, "logits/rejected": -0.9213232398033142, "logps/chosen": -0.6504994630813599, "logps/rejected": -2.512413263320923, "loss": 0.9781, "nll_loss": 0.845840334892273, "rewards/accuracies": 0.875, "rewards/chosen": -0.06504995375871658, "rewards/margins": 0.1861913800239563, "rewards/rejected": -0.2512413263320923, "step": 8113 }, { "epoch": 4.949824614915357, "grad_norm": 5.641109943389893, "learning_rate": 7.936313533374158e-08, "log_odds_chosen": 1.7298356294631958, "log_odds_ratio": -0.42537063360214233, "logits/chosen": -0.6628025770187378, "logits/rejected": -0.9115517139434814, "logps/chosen": -0.7038047313690186, "logps/rejected": -2.080857276916504, "loss": 0.9886, "nll_loss": 0.9436404705047607, "rewards/accuracies": 0.625, "rewards/chosen": -0.07038047164678574, "rewards/margins": 0.13770528137683868, "rewards/rejected": -0.20808574557304382, "step": 8114 }, { "epoch": 4.950434649992374, "grad_norm": 1.3302395343780518, "learning_rate": 7.838334353949786e-08, "log_odds_chosen": 3.2362005710601807, "log_odds_ratio": -0.3019172251224518, "logits/chosen": -0.906732976436615, "logits/rejected": -1.1718312501907349, "logps/chosen": -0.5996795892715454, "logps/rejected": -3.317650079727173, "loss": 1.1319, "nll_loss": 0.8347510099411011, "rewards/accuracies": 0.75, "rewards/chosen": -0.05996796116232872, "rewards/margins": 0.2717970609664917, "rewards/rejected": -0.33176499605178833, "step": 8115 }, { "epoch": 4.951044685069391, "grad_norm": 1.4956879615783691, "learning_rate": 7.740355174525414e-08, "log_odds_chosen": 1.1463643312454224, "log_odds_ratio": -0.4505118131637573, "logits/chosen": -0.9209109544754028, "logits/rejected": -1.0805132389068604, "logps/chosen": -0.9182901382446289, "logps/rejected": -1.8086793422698975, "loss": 1.0937, "nll_loss": 0.9729257225990295, "rewards/accuracies": 0.625, "rewards/chosen": -0.09182900935411453, "rewards/margins": 0.0890389233827591, "rewards/rejected": -0.18086794018745422, "step": 8116 }, { "epoch": 4.951654720146409, "grad_norm": 1.6101027727127075, "learning_rate": 7.642375995101042e-08, "log_odds_chosen": 3.2765886783599854, "log_odds_ratio": -0.18625976145267487, "logits/chosen": -0.7110152244567871, "logits/rejected": -0.9582641124725342, "logps/chosen": -0.6339570879936218, "logps/rejected": -3.233863353729248, "loss": 0.9081, "nll_loss": 0.8143856525421143, "rewards/accuracies": 1.0, "rewards/chosen": -0.06339570879936218, "rewards/margins": 0.2599906325340271, "rewards/rejected": -0.32338637113571167, "step": 8117 }, { "epoch": 4.952264755223426, "grad_norm": 3.139493703842163, "learning_rate": 7.544396815676667e-08, "log_odds_chosen": 0.8576575517654419, "log_odds_ratio": -0.9184097051620483, "logits/chosen": -1.0666769742965698, "logits/rejected": -1.1288907527923584, "logps/chosen": -1.2402927875518799, "logps/rejected": -1.884692907333374, "loss": 1.1746, "nll_loss": 1.4702184200286865, "rewards/accuracies": 0.375, "rewards/chosen": -0.12402927875518799, "rewards/margins": 0.06444001197814941, "rewards/rejected": -0.1884692907333374, "step": 8118 }, { "epoch": 4.952874790300442, "grad_norm": 1.4605801105499268, "learning_rate": 7.446417636252296e-08, "log_odds_chosen": 0.5687973499298096, "log_odds_ratio": -0.5636356472969055, "logits/chosen": -0.928773820400238, "logits/rejected": -0.8618385791778564, "logps/chosen": -0.8860317468643188, "logps/rejected": -1.2251468896865845, "loss": 1.0718, "nll_loss": 1.2104532718658447, "rewards/accuracies": 0.5, "rewards/chosen": -0.088603176176548, "rewards/margins": 0.03391151875257492, "rewards/rejected": -0.12251468747854233, "step": 8119 }, { "epoch": 4.953484825377459, "grad_norm": 2.2762231826782227, "learning_rate": 7.348438456827924e-08, "log_odds_chosen": 1.8626796007156372, "log_odds_ratio": -0.3784465193748474, "logits/chosen": -1.0709220170974731, "logits/rejected": -1.2065329551696777, "logps/chosen": -0.8427221179008484, "logps/rejected": -2.1285102367401123, "loss": 0.8963, "nll_loss": 1.0460129976272583, "rewards/accuracies": 0.875, "rewards/chosen": -0.08427222073078156, "rewards/margins": 0.1285787969827652, "rewards/rejected": -0.21285101771354675, "step": 8120 }, { "epoch": 4.954094860454476, "grad_norm": 1.9352264404296875, "learning_rate": 7.250459277403552e-08, "log_odds_chosen": 2.096280574798584, "log_odds_ratio": -0.49607619643211365, "logits/chosen": -0.9650133848190308, "logits/rejected": -1.0336105823516846, "logps/chosen": -0.6281604170799255, "logps/rejected": -2.121434211730957, "loss": 0.9571, "nll_loss": 0.9537579417228699, "rewards/accuracies": 0.625, "rewards/chosen": -0.06281604617834091, "rewards/margins": 0.1493273675441742, "rewards/rejected": -0.2121434062719345, "step": 8121 }, { "epoch": 4.954704895531493, "grad_norm": 1.7097595930099487, "learning_rate": 7.152480097979179e-08, "log_odds_chosen": 0.4650108516216278, "log_odds_ratio": -0.6396524310112, "logits/chosen": -1.2435628175735474, "logits/rejected": -1.07967209815979, "logps/chosen": -0.8892854452133179, "logps/rejected": -1.2266676425933838, "loss": 1.1279, "nll_loss": 1.1335177421569824, "rewards/accuracies": 0.625, "rewards/chosen": -0.08892855048179626, "rewards/margins": 0.03373822569847107, "rewards/rejected": -0.12266676872968674, "step": 8122 }, { "epoch": 4.95531493060851, "grad_norm": 2.2121520042419434, "learning_rate": 7.054500918554807e-08, "log_odds_chosen": 1.376931071281433, "log_odds_ratio": -0.5727642774581909, "logits/chosen": -0.9647617340087891, "logits/rejected": -1.0607273578643799, "logps/chosen": -0.873231053352356, "logps/rejected": -1.9744207859039307, "loss": 1.1967, "nll_loss": 0.9761047959327698, "rewards/accuracies": 0.625, "rewards/chosen": -0.08732311427593231, "rewards/margins": 0.11011896282434464, "rewards/rejected": -0.19744208455085754, "step": 8123 }, { "epoch": 4.955924965685527, "grad_norm": 1.2167141437530518, "learning_rate": 6.956521739130435e-08, "log_odds_chosen": 3.664999485015869, "log_odds_ratio": -0.18475449085235596, "logits/chosen": -0.8560627102851868, "logits/rejected": -0.9369478821754456, "logps/chosen": -0.6018450260162354, "logps/rejected": -3.465512752532959, "loss": 0.9058, "nll_loss": 0.7667636871337891, "rewards/accuracies": 0.875, "rewards/chosen": -0.060184504836797714, "rewards/margins": 0.2863668203353882, "rewards/rejected": -0.3465512990951538, "step": 8124 }, { "epoch": 4.9565350007625435, "grad_norm": 1.0910626649856567, "learning_rate": 6.858542559706063e-08, "log_odds_chosen": 2.595611333847046, "log_odds_ratio": -0.29525503516197205, "logits/chosen": -0.816644549369812, "logits/rejected": -0.8377807140350342, "logps/chosen": -0.6623014211654663, "logps/rejected": -2.6746296882629395, "loss": 0.9519, "nll_loss": 0.9471336603164673, "rewards/accuracies": 0.75, "rewards/chosen": -0.06623014807701111, "rewards/margins": 0.20123282074928284, "rewards/rejected": -0.26746296882629395, "step": 8125 }, { "epoch": 4.957145035839561, "grad_norm": 1.3857500553131104, "learning_rate": 6.76056338028169e-08, "log_odds_chosen": 5.5199995040893555, "log_odds_ratio": -0.17129835486412048, "logits/chosen": -0.7700735926628113, "logits/rejected": -1.1121094226837158, "logps/chosen": -0.5458790063858032, "logps/rejected": -5.164220809936523, "loss": 0.9006, "nll_loss": 0.71440190076828, "rewards/accuracies": 0.875, "rewards/chosen": -0.054587896913290024, "rewards/margins": 0.46183425188064575, "rewards/rejected": -0.5164221525192261, "step": 8126 }, { "epoch": 4.957755070916578, "grad_norm": 1.904737114906311, "learning_rate": 6.662584200857318e-08, "log_odds_chosen": 1.833911418914795, "log_odds_ratio": -0.42130693793296814, "logits/chosen": -0.8845731019973755, "logits/rejected": -1.1092846393585205, "logps/chosen": -0.78557288646698, "logps/rejected": -2.094571828842163, "loss": 1.1251, "nll_loss": 1.2272030115127563, "rewards/accuracies": 0.625, "rewards/chosen": -0.07855728268623352, "rewards/margins": 0.13089992105960846, "rewards/rejected": -0.20945720374584198, "step": 8127 }, { "epoch": 4.958365105993595, "grad_norm": 1.6692615747451782, "learning_rate": 6.564605021432945e-08, "log_odds_chosen": 0.415041446685791, "log_odds_ratio": -0.6118313074111938, "logits/chosen": -0.9225208163261414, "logits/rejected": -1.0270769596099854, "logps/chosen": -0.9532460570335388, "logps/rejected": -1.3154025077819824, "loss": 1.0935, "nll_loss": 1.077516794204712, "rewards/accuracies": 0.5, "rewards/chosen": -0.09532461315393448, "rewards/margins": 0.0362156443297863, "rewards/rejected": -0.13154026865959167, "step": 8128 }, { "epoch": 4.958975141070612, "grad_norm": 1.20231294631958, "learning_rate": 6.466625842008573e-08, "log_odds_chosen": 0.5510940551757812, "log_odds_ratio": -0.6878290176391602, "logits/chosen": -0.8159444332122803, "logits/rejected": -1.0236575603485107, "logps/chosen": -0.8238885998725891, "logps/rejected": -1.2499792575836182, "loss": 0.9794, "nll_loss": 0.8873218894004822, "rewards/accuracies": 0.375, "rewards/chosen": -0.08238886296749115, "rewards/margins": 0.04260906204581261, "rewards/rejected": -0.12499792128801346, "step": 8129 }, { "epoch": 4.959585176147629, "grad_norm": 7.25202751159668, "learning_rate": 6.368646662584201e-08, "log_odds_chosen": 4.620698928833008, "log_odds_ratio": -0.18033698201179504, "logits/chosen": -0.9875591993331909, "logits/rejected": -1.029426097869873, "logps/chosen": -0.8252185583114624, "logps/rejected": -4.807349681854248, "loss": 1.0719, "nll_loss": 1.0145208835601807, "rewards/accuracies": 0.875, "rewards/chosen": -0.08252185583114624, "rewards/margins": 0.39821314811706543, "rewards/rejected": -0.48073500394821167, "step": 8130 }, { "epoch": 4.9601952112246455, "grad_norm": 1.5290683507919312, "learning_rate": 6.270667483159828e-08, "log_odds_chosen": 2.9896011352539062, "log_odds_ratio": -0.3598881959915161, "logits/chosen": -0.9184220433235168, "logits/rejected": -0.9544498920440674, "logps/chosen": -0.6514831781387329, "logps/rejected": -3.155914306640625, "loss": 0.9319, "nll_loss": 0.8600243926048279, "rewards/accuracies": 0.75, "rewards/chosen": -0.06514832377433777, "rewards/margins": 0.25044310092926025, "rewards/rejected": -0.315591424703598, "step": 8131 }, { "epoch": 4.960805246301662, "grad_norm": 13.723468780517578, "learning_rate": 6.172688303735456e-08, "log_odds_chosen": 1.1777236461639404, "log_odds_ratio": -0.3629186451435089, "logits/chosen": -1.0496189594268799, "logits/rejected": -0.9830409288406372, "logps/chosen": -0.8205752372741699, "logps/rejected": -1.5251697301864624, "loss": 1.0532, "nll_loss": 1.0528315305709839, "rewards/accuracies": 0.875, "rewards/chosen": -0.08205752074718475, "rewards/margins": 0.07045946270227432, "rewards/rejected": -0.15251697599887848, "step": 8132 }, { "epoch": 4.961415281378679, "grad_norm": 2.2581377029418945, "learning_rate": 6.074709124311084e-08, "log_odds_chosen": 1.90364670753479, "log_odds_ratio": -0.4586517810821533, "logits/chosen": -0.985694408416748, "logits/rejected": -0.9591920971870422, "logps/chosen": -0.8240545988082886, "logps/rejected": -2.4174511432647705, "loss": 1.0635, "nll_loss": 1.1190080642700195, "rewards/accuracies": 0.75, "rewards/chosen": -0.0824054628610611, "rewards/margins": 0.15933962166309357, "rewards/rejected": -0.24174508452415466, "step": 8133 }, { "epoch": 4.962025316455696, "grad_norm": 6.102175712585449, "learning_rate": 5.97672994488671e-08, "log_odds_chosen": 1.4268856048583984, "log_odds_ratio": -0.4761148691177368, "logits/chosen": -1.0893027782440186, "logits/rejected": -1.1862437725067139, "logps/chosen": -0.9357112050056458, "logps/rejected": -1.9993714094161987, "loss": 1.0438, "nll_loss": 1.1073853969573975, "rewards/accuracies": 0.5, "rewards/chosen": -0.09357112646102905, "rewards/margins": 0.10636603087186813, "rewards/rejected": -0.1999371349811554, "step": 8134 }, { "epoch": 4.962635351532713, "grad_norm": 1.335278868675232, "learning_rate": 5.878750765462339e-08, "log_odds_chosen": 1.0183464288711548, "log_odds_ratio": -0.5310505628585815, "logits/chosen": -1.0443062782287598, "logits/rejected": -0.9893297553062439, "logps/chosen": -0.803905725479126, "logps/rejected": -1.3250179290771484, "loss": 0.9772, "nll_loss": 0.975094199180603, "rewards/accuracies": 0.5, "rewards/chosen": -0.0803905725479126, "rewards/margins": 0.05211120843887329, "rewards/rejected": -0.13250179588794708, "step": 8135 }, { "epoch": 4.96324538660973, "grad_norm": 15.95641803741455, "learning_rate": 5.780771586037966e-08, "log_odds_chosen": 1.4703166484832764, "log_odds_ratio": -0.3071689009666443, "logits/chosen": -0.7892720699310303, "logits/rejected": -0.8978325724601746, "logps/chosen": -0.7894997000694275, "logps/rejected": -1.8026628494262695, "loss": 1.0446, "nll_loss": 0.9258633852005005, "rewards/accuracies": 0.75, "rewards/chosen": -0.07894997298717499, "rewards/margins": 0.10131631791591644, "rewards/rejected": -0.18026627600193024, "step": 8136 }, { "epoch": 4.9638554216867465, "grad_norm": 1.9296789169311523, "learning_rate": 5.682792406613594e-08, "log_odds_chosen": 1.6543073654174805, "log_odds_ratio": -0.39302942156791687, "logits/chosen": -1.1093108654022217, "logits/rejected": -1.0368757247924805, "logps/chosen": -0.7809736132621765, "logps/rejected": -2.0131218433380127, "loss": 1.075, "nll_loss": 1.0650038719177246, "rewards/accuracies": 0.75, "rewards/chosen": -0.07809735834598541, "rewards/margins": 0.12321481853723526, "rewards/rejected": -0.20131218433380127, "step": 8137 }, { "epoch": 4.964465456763764, "grad_norm": 1.3912113904953003, "learning_rate": 5.584813227189222e-08, "log_odds_chosen": 2.4798998832702637, "log_odds_ratio": -0.38536763191223145, "logits/chosen": -0.8520576357841492, "logits/rejected": -1.082888126373291, "logps/chosen": -0.7110581398010254, "logps/rejected": -2.636538505554199, "loss": 1.1201, "nll_loss": 0.9904366731643677, "rewards/accuracies": 0.75, "rewards/chosen": -0.07110581547021866, "rewards/margins": 0.19254803657531738, "rewards/rejected": -0.26365387439727783, "step": 8138 }, { "epoch": 4.965075491840781, "grad_norm": 4.179666042327881, "learning_rate": 5.48683404776485e-08, "log_odds_chosen": 2.7074661254882812, "log_odds_ratio": -0.2389528602361679, "logits/chosen": -1.0530580282211304, "logits/rejected": -1.1083933115005493, "logps/chosen": -0.8326781988143921, "logps/rejected": -2.835820436477661, "loss": 1.0699, "nll_loss": 1.1167904138565063, "rewards/accuracies": 0.875, "rewards/chosen": -0.08326782286167145, "rewards/margins": 0.2003142237663269, "rewards/rejected": -0.28358203172683716, "step": 8139 }, { "epoch": 4.965685526917798, "grad_norm": 2.3807597160339355, "learning_rate": 5.388854868340478e-08, "log_odds_chosen": 2.3829147815704346, "log_odds_ratio": -0.19363000988960266, "logits/chosen": -0.8605759143829346, "logits/rejected": -1.0745351314544678, "logps/chosen": -0.6944072842597961, "logps/rejected": -2.5074610710144043, "loss": 0.9894, "nll_loss": 0.8276812434196472, "rewards/accuracies": 0.875, "rewards/chosen": -0.06944072991609573, "rewards/margins": 0.18130537867546082, "rewards/rejected": -0.25074613094329834, "step": 8140 }, { "epoch": 4.966295561994815, "grad_norm": 10.466720581054688, "learning_rate": 5.2908756889161047e-08, "log_odds_chosen": 1.35515558719635, "log_odds_ratio": -0.4991579055786133, "logits/chosen": -0.8695592880249023, "logits/rejected": -1.0669581890106201, "logps/chosen": -0.8877031803131104, "logps/rejected": -1.9261709451675415, "loss": 1.0218, "nll_loss": 1.036690592765808, "rewards/accuracies": 0.75, "rewards/chosen": -0.08877032995223999, "rewards/margins": 0.10384676605463028, "rewards/rejected": -0.19261710345745087, "step": 8141 }, { "epoch": 4.966905597071832, "grad_norm": 9.127903938293457, "learning_rate": 5.192896509491733e-08, "log_odds_chosen": 3.583782196044922, "log_odds_ratio": -0.2790583074092865, "logits/chosen": -0.7581020593643188, "logits/rejected": -0.902205228805542, "logps/chosen": -0.6724910736083984, "logps/rejected": -3.523926019668579, "loss": 0.9755, "nll_loss": 0.8049518465995789, "rewards/accuracies": 0.75, "rewards/chosen": -0.0672491118311882, "rewards/margins": 0.28514349460601807, "rewards/rejected": -0.35239261388778687, "step": 8142 }, { "epoch": 4.9675156321488485, "grad_norm": 5.982781410217285, "learning_rate": 5.09491733006736e-08, "log_odds_chosen": 1.6461058855056763, "log_odds_ratio": -0.39397957921028137, "logits/chosen": -0.9162533283233643, "logits/rejected": -0.903703510761261, "logps/chosen": -0.6211186051368713, "logps/rejected": -1.9435489177703857, "loss": 1.2192, "nll_loss": 0.8885009288787842, "rewards/accuracies": 0.875, "rewards/chosen": -0.06211186200380325, "rewards/margins": 0.13224303722381592, "rewards/rejected": -0.19435490667819977, "step": 8143 }, { "epoch": 4.968125667225865, "grad_norm": 20.488779067993164, "learning_rate": 4.996938150642988e-08, "log_odds_chosen": 1.1519055366516113, "log_odds_ratio": -0.45974695682525635, "logits/chosen": -0.9860293865203857, "logits/rejected": -1.0613303184509277, "logps/chosen": -0.7406649589538574, "logps/rejected": -1.2313036918640137, "loss": 1.144, "nll_loss": 1.1101869344711304, "rewards/accuracies": 0.875, "rewards/chosen": -0.07406650483608246, "rewards/margins": 0.04906386882066727, "rewards/rejected": -0.12313036620616913, "step": 8144 }, { "epoch": 4.968735702302882, "grad_norm": 6.094376564025879, "learning_rate": 4.898958971218616e-08, "log_odds_chosen": 3.105621814727783, "log_odds_ratio": -0.38524922728538513, "logits/chosen": -0.8353404998779297, "logits/rejected": -0.957788348197937, "logps/chosen": -0.6506509184837341, "logps/rejected": -3.0631320476531982, "loss": 1.1375, "nll_loss": 0.7737205028533936, "rewards/accuracies": 0.625, "rewards/chosen": -0.06506509333848953, "rewards/margins": 0.24124813079833984, "rewards/rejected": -0.3063132166862488, "step": 8145 }, { "epoch": 4.969345737379899, "grad_norm": 3.030247211456299, "learning_rate": 4.800979791794243e-08, "log_odds_chosen": 2.8817591667175293, "log_odds_ratio": -0.22949199378490448, "logits/chosen": -0.8439038395881653, "logits/rejected": -0.8437329530715942, "logps/chosen": -0.7082314491271973, "logps/rejected": -2.8739171028137207, "loss": 1.0744, "nll_loss": 0.8406127691268921, "rewards/accuracies": 0.875, "rewards/chosen": -0.07082314789295197, "rewards/margins": 0.21656858921051025, "rewards/rejected": -0.287391722202301, "step": 8146 }, { "epoch": 4.969955772456916, "grad_norm": 1.5650492906570435, "learning_rate": 4.703000612369871e-08, "log_odds_chosen": 2.8258910179138184, "log_odds_ratio": -0.28906384110450745, "logits/chosen": -0.8053749203681946, "logits/rejected": -0.9205565452575684, "logps/chosen": -0.7588988542556763, "logps/rejected": -3.0862817764282227, "loss": 0.9201, "nll_loss": 0.9981516003608704, "rewards/accuracies": 0.875, "rewards/chosen": -0.07588988542556763, "rewards/margins": 0.23273828625679016, "rewards/rejected": -0.3086281716823578, "step": 8147 }, { "epoch": 4.970565807533934, "grad_norm": 10.615655899047852, "learning_rate": 4.605021432945499e-08, "log_odds_chosen": 1.9400455951690674, "log_odds_ratio": -0.423305869102478, "logits/chosen": -1.0057486295700073, "logits/rejected": -1.150337815284729, "logps/chosen": -0.9068121314048767, "logps/rejected": -2.5693230628967285, "loss": 1.1129, "nll_loss": 1.1214501857757568, "rewards/accuracies": 0.75, "rewards/chosen": -0.09068120270967484, "rewards/margins": 0.16625110805034637, "rewards/rejected": -0.2569323182106018, "step": 8148 }, { "epoch": 4.9711758426109505, "grad_norm": 8.713492393493652, "learning_rate": 4.507042253521127e-08, "log_odds_chosen": 1.0171173810958862, "log_odds_ratio": -0.41558048129081726, "logits/chosen": -0.8495922684669495, "logits/rejected": -0.9981156587600708, "logps/chosen": -0.848638117313385, "logps/rejected": -1.6321256160736084, "loss": 1.1846, "nll_loss": 1.1633589267730713, "rewards/accuracies": 0.875, "rewards/chosen": -0.0848638117313385, "rewards/margins": 0.07834875583648682, "rewards/rejected": -0.16321256756782532, "step": 8149 }, { "epoch": 4.971785877687967, "grad_norm": 5.60392427444458, "learning_rate": 4.409063074096754e-08, "log_odds_chosen": 3.2635462284088135, "log_odds_ratio": -0.35210177302360535, "logits/chosen": -0.8635262846946716, "logits/rejected": -1.1475895643234253, "logps/chosen": -0.9266427755355835, "logps/rejected": -3.7970657348632812, "loss": 1.1081, "nll_loss": 1.132641077041626, "rewards/accuracies": 0.75, "rewards/chosen": -0.09266427159309387, "rewards/margins": 0.2870422899723053, "rewards/rejected": -0.37970659136772156, "step": 8150 }, { "epoch": 4.972395912764984, "grad_norm": 1.6126751899719238, "learning_rate": 4.311083894672382e-08, "log_odds_chosen": 2.5739545822143555, "log_odds_ratio": -0.22838817536830902, "logits/chosen": -0.9416898488998413, "logits/rejected": -1.1327626705169678, "logps/chosen": -0.7351795434951782, "logps/rejected": -2.7534384727478027, "loss": 1.0564, "nll_loss": 1.0937517881393433, "rewards/accuracies": 0.875, "rewards/chosen": -0.07351796329021454, "rewards/margins": 0.20182591676712036, "rewards/rejected": -0.2753438651561737, "step": 8151 }, { "epoch": 4.973005947842001, "grad_norm": 6.73609733581543, "learning_rate": 4.213104715248009e-08, "log_odds_chosen": 3.569244861602783, "log_odds_ratio": -0.1756819784641266, "logits/chosen": -0.7142739295959473, "logits/rejected": -0.9223963618278503, "logps/chosen": -0.423199325799942, "logps/rejected": -3.074808120727539, "loss": 1.0097, "nll_loss": 0.7837941646575928, "rewards/accuracies": 1.0, "rewards/chosen": -0.04231993108987808, "rewards/margins": 0.2651608884334564, "rewards/rejected": -0.3074808418750763, "step": 8152 }, { "epoch": 4.973615982919018, "grad_norm": 1.5109094381332397, "learning_rate": 4.115125535823637e-08, "log_odds_chosen": 3.0876705646514893, "log_odds_ratio": -0.15769773721694946, "logits/chosen": -0.8741306662559509, "logits/rejected": -1.0104807615280151, "logps/chosen": -0.6445193290710449, "logps/rejected": -3.05493426322937, "loss": 0.9708, "nll_loss": 0.9613816142082214, "rewards/accuracies": 1.0, "rewards/chosen": -0.06445193290710449, "rewards/margins": 0.24104149639606476, "rewards/rejected": -0.30549344420433044, "step": 8153 }, { "epoch": 4.974226017996035, "grad_norm": 2.128603458404541, "learning_rate": 4.017146356399265e-08, "log_odds_chosen": 2.5106611251831055, "log_odds_ratio": -0.3205671012401581, "logits/chosen": -0.8539904356002808, "logits/rejected": -0.8497294783592224, "logps/chosen": -0.8242735862731934, "logps/rejected": -2.7903339862823486, "loss": 1.0904, "nll_loss": 1.0095288753509521, "rewards/accuracies": 0.875, "rewards/chosen": -0.08242735266685486, "rewards/margins": 0.19660605490207672, "rewards/rejected": -0.2790333926677704, "step": 8154 }, { "epoch": 4.9748360530730515, "grad_norm": 1.7663553953170776, "learning_rate": 3.919167176974893e-08, "log_odds_chosen": 2.326509714126587, "log_odds_ratio": -0.19798403978347778, "logits/chosen": -0.813213586807251, "logits/rejected": -0.9434751868247986, "logps/chosen": -0.793667197227478, "logps/rejected": -2.4741148948669434, "loss": 0.9944, "nll_loss": 1.079972505569458, "rewards/accuracies": 1.0, "rewards/chosen": -0.07936672866344452, "rewards/margins": 0.168044775724411, "rewards/rejected": -0.24741148948669434, "step": 8155 }, { "epoch": 4.975446088150068, "grad_norm": 10.111617088317871, "learning_rate": 3.821187997550521e-08, "log_odds_chosen": 0.46679550409317017, "log_odds_ratio": -0.558525025844574, "logits/chosen": -0.732742965221405, "logits/rejected": -0.8067504167556763, "logps/chosen": -0.9866777062416077, "logps/rejected": -1.3476276397705078, "loss": 1.0827, "nll_loss": 1.0743083953857422, "rewards/accuracies": 0.75, "rewards/chosen": -0.09866777062416077, "rewards/margins": 0.036094993352890015, "rewards/rejected": -0.13476276397705078, "step": 8156 }, { "epoch": 4.976056123227085, "grad_norm": 9.619261741638184, "learning_rate": 3.723208818126148e-08, "log_odds_chosen": 0.9004358649253845, "log_odds_ratio": -0.597234845161438, "logits/chosen": -0.9782179594039917, "logits/rejected": -1.1183366775512695, "logps/chosen": -0.7396997809410095, "logps/rejected": -1.3868728876113892, "loss": 0.9599, "nll_loss": 0.9930770397186279, "rewards/accuracies": 0.375, "rewards/chosen": -0.07396997511386871, "rewards/margins": 0.06471731513738632, "rewards/rejected": -0.13868728280067444, "step": 8157 }, { "epoch": 4.976666158304102, "grad_norm": 1.8251553773880005, "learning_rate": 3.625229638701776e-08, "log_odds_chosen": 1.3164842128753662, "log_odds_ratio": -0.45457497239112854, "logits/chosen": -0.984226405620575, "logits/rejected": -0.9905321598052979, "logps/chosen": -0.9982689023017883, "logps/rejected": -2.120351791381836, "loss": 1.0977, "nll_loss": 1.1278486251831055, "rewards/accuracies": 0.625, "rewards/chosen": -0.0998268872499466, "rewards/margins": 0.1122082769870758, "rewards/rejected": -0.2120351642370224, "step": 8158 }, { "epoch": 4.977276193381119, "grad_norm": 22.879854202270508, "learning_rate": 3.527250459277403e-08, "log_odds_chosen": 0.6288926601409912, "log_odds_ratio": -0.9605534076690674, "logits/chosen": -1.1786880493164062, "logits/rejected": -1.066532015800476, "logps/chosen": -1.2685747146606445, "logps/rejected": -1.9908225536346436, "loss": 1.1659, "nll_loss": 1.2116003036499023, "rewards/accuracies": 0.375, "rewards/chosen": -0.1268574595451355, "rewards/margins": 0.07222479581832886, "rewards/rejected": -0.19908225536346436, "step": 8159 }, { "epoch": 4.977886228458137, "grad_norm": 1.5886422395706177, "learning_rate": 3.4292712798530314e-08, "log_odds_chosen": 0.8200011849403381, "log_odds_ratio": -0.6696784496307373, "logits/chosen": -1.0084688663482666, "logits/rejected": -1.0471402406692505, "logps/chosen": -0.9483224749565125, "logps/rejected": -1.7040983438491821, "loss": 1.0651, "nll_loss": 1.0868417024612427, "rewards/accuracies": 0.75, "rewards/chosen": -0.09483224898576736, "rewards/margins": 0.07557758688926697, "rewards/rejected": -0.17040984332561493, "step": 8160 }, { "epoch": 4.9784962635351535, "grad_norm": 14.537879943847656, "learning_rate": 3.331292100428659e-08, "log_odds_chosen": 1.3472946882247925, "log_odds_ratio": -0.5298720002174377, "logits/chosen": -0.7625579833984375, "logits/rejected": -0.8912514448165894, "logps/chosen": -0.7737092971801758, "logps/rejected": -1.8835927248001099, "loss": 0.9935, "nll_loss": 0.9847694635391235, "rewards/accuracies": 0.75, "rewards/chosen": -0.07737092673778534, "rewards/margins": 0.11098834127187729, "rewards/rejected": -0.18835926055908203, "step": 8161 }, { "epoch": 4.97910629861217, "grad_norm": 5.465762138366699, "learning_rate": 3.233312921004286e-08, "log_odds_chosen": 1.444347858428955, "log_odds_ratio": -0.3544788658618927, "logits/chosen": -0.8589119911193848, "logits/rejected": -0.8268578052520752, "logps/chosen": -0.6900895833969116, "logps/rejected": -1.5827600955963135, "loss": 1.1806, "nll_loss": 0.8701874613761902, "rewards/accuracies": 0.875, "rewards/chosen": -0.0690089613199234, "rewards/margins": 0.0892670601606369, "rewards/rejected": -0.1582760214805603, "step": 8162 }, { "epoch": 4.979716333689187, "grad_norm": 1.3328849077224731, "learning_rate": 3.135333741579914e-08, "log_odds_chosen": 1.7960338592529297, "log_odds_ratio": -0.2969379425048828, "logits/chosen": -0.8095483779907227, "logits/rejected": -0.9618090391159058, "logps/chosen": -0.8332557678222656, "logps/rejected": -2.1232969760894775, "loss": 1.0069, "nll_loss": 0.9837393164634705, "rewards/accuracies": 0.875, "rewards/chosen": -0.0833255723118782, "rewards/margins": 0.1290041208267212, "rewards/rejected": -0.21232970058918, "step": 8163 }, { "epoch": 4.980326368766204, "grad_norm": 1.6048592329025269, "learning_rate": 3.037354562155542e-08, "log_odds_chosen": 3.9864320755004883, "log_odds_ratio": -0.11416961252689362, "logits/chosen": -0.6432961821556091, "logits/rejected": -0.9880719780921936, "logps/chosen": -0.6482592821121216, "logps/rejected": -3.806813955307007, "loss": 0.9732, "nll_loss": 0.762971043586731, "rewards/accuracies": 0.875, "rewards/chosen": -0.06482592970132828, "rewards/margins": 0.315855473279953, "rewards/rejected": -0.3806813955307007, "step": 8164 }, { "epoch": 4.980936403843221, "grad_norm": 1.718265414237976, "learning_rate": 2.9393753827311697e-08, "log_odds_chosen": 3.477431297302246, "log_odds_ratio": -0.40507885813713074, "logits/chosen": -0.7852433323860168, "logits/rejected": -0.9778796434402466, "logps/chosen": -0.7473234534263611, "logps/rejected": -3.845496654510498, "loss": 1.0447, "nll_loss": 0.866721510887146, "rewards/accuracies": 0.625, "rewards/chosen": -0.07473234832286835, "rewards/margins": 0.3098173141479492, "rewards/rejected": -0.38454967737197876, "step": 8165 }, { "epoch": 4.981546438920238, "grad_norm": 1.925246000289917, "learning_rate": 2.841396203306797e-08, "log_odds_chosen": 2.6713266372680664, "log_odds_ratio": -0.5066349506378174, "logits/chosen": -0.9788426160812378, "logits/rejected": -0.9597568511962891, "logps/chosen": -0.7893484830856323, "logps/rejected": -3.112928867340088, "loss": 1.1233, "nll_loss": 1.0446124076843262, "rewards/accuracies": 0.625, "rewards/chosen": -0.07893484085798264, "rewards/margins": 0.23235806822776794, "rewards/rejected": -0.3112928867340088, "step": 8166 }, { "epoch": 4.982156473997255, "grad_norm": 3.5434231758117676, "learning_rate": 2.743417023882425e-08, "log_odds_chosen": 3.1541433334350586, "log_odds_ratio": -0.41420215368270874, "logits/chosen": -0.7755035161972046, "logits/rejected": -1.0852302312850952, "logps/chosen": -0.8859509229660034, "logps/rejected": -3.6173105239868164, "loss": 1.0427, "nll_loss": 0.9577000737190247, "rewards/accuracies": 0.875, "rewards/chosen": -0.08859509974718094, "rewards/margins": 0.2731359601020813, "rewards/rejected": -0.36173105239868164, "step": 8167 }, { "epoch": 4.982766509074271, "grad_norm": 8.03766918182373, "learning_rate": 2.6454378444580523e-08, "log_odds_chosen": 2.970113515853882, "log_odds_ratio": -0.29352840781211853, "logits/chosen": -0.7279152274131775, "logits/rejected": -0.9101217985153198, "logps/chosen": -0.601693868637085, "logps/rejected": -2.872068405151367, "loss": 1.1732, "nll_loss": 0.9210000038146973, "rewards/accuracies": 0.75, "rewards/chosen": -0.060169391334056854, "rewards/margins": 0.2270374596118927, "rewards/rejected": -0.28720685839653015, "step": 8168 }, { "epoch": 4.983376544151289, "grad_norm": 9.573046684265137, "learning_rate": 2.54745866503368e-08, "log_odds_chosen": 2.252406358718872, "log_odds_ratio": -0.2749898135662079, "logits/chosen": -0.9196355938911438, "logits/rejected": -0.9970552325248718, "logps/chosen": -0.7855702638626099, "logps/rejected": -2.457669734954834, "loss": 1.2931, "nll_loss": 0.9802968502044678, "rewards/accuracies": 0.875, "rewards/chosen": -0.07855702936649323, "rewards/margins": 0.1672099381685257, "rewards/rejected": -0.24576696753501892, "step": 8169 }, { "epoch": 4.983986579228306, "grad_norm": 1.645529866218567, "learning_rate": 2.449479485609308e-08, "log_odds_chosen": 1.452104091644287, "log_odds_ratio": -0.3928390443325043, "logits/chosen": -0.813113808631897, "logits/rejected": -0.6890594363212585, "logps/chosen": -0.7332446575164795, "logps/rejected": -1.8661823272705078, "loss": 1.1285, "nll_loss": 1.0591838359832764, "rewards/accuracies": 0.625, "rewards/chosen": -0.07332447171211243, "rewards/margins": 0.11329378187656403, "rewards/rejected": -0.18661823868751526, "step": 8170 }, { "epoch": 4.984596614305323, "grad_norm": 1.5790365934371948, "learning_rate": 2.3515003061849357e-08, "log_odds_chosen": 1.3718514442443848, "log_odds_ratio": -0.574368417263031, "logits/chosen": -0.8954671025276184, "logits/rejected": -0.8229253888130188, "logps/chosen": -0.9043457508087158, "logps/rejected": -2.03987979888916, "loss": 1.0117, "nll_loss": 1.1648521423339844, "rewards/accuracies": 0.5, "rewards/chosen": -0.09043458104133606, "rewards/margins": 0.11355339735746384, "rewards/rejected": -0.2039879858493805, "step": 8171 }, { "epoch": 4.98520664938234, "grad_norm": 23.395307540893555, "learning_rate": 2.2535211267605634e-08, "log_odds_chosen": 2.212035894393921, "log_odds_ratio": -0.2964356541633606, "logits/chosen": -1.061004400253296, "logits/rejected": -1.133599877357483, "logps/chosen": -0.9080653190612793, "logps/rejected": -2.644843816757202, "loss": 1.0954, "nll_loss": 1.0556161403656006, "rewards/accuracies": 1.0, "rewards/chosen": -0.09080654382705688, "rewards/margins": 0.17367783188819885, "rewards/rejected": -0.26448437571525574, "step": 8172 }, { "epoch": 4.9858166844593566, "grad_norm": 8.541850090026855, "learning_rate": 2.155541947336191e-08, "log_odds_chosen": 3.223184108734131, "log_odds_ratio": -0.26083752512931824, "logits/chosen": -0.9182717204093933, "logits/rejected": -1.0186138153076172, "logps/chosen": -0.6681816577911377, "logps/rejected": -3.302157402038574, "loss": 0.9822, "nll_loss": 0.8918629884719849, "rewards/accuracies": 1.0, "rewards/chosen": -0.06681817024946213, "rewards/margins": 0.26339757442474365, "rewards/rejected": -0.330215722322464, "step": 8173 }, { "epoch": 4.986426719536373, "grad_norm": 8.0486421585083, "learning_rate": 2.0575627679118187e-08, "log_odds_chosen": 4.317000865936279, "log_odds_ratio": -0.10746259987354279, "logits/chosen": -0.6554152965545654, "logits/rejected": -0.7884919047355652, "logps/chosen": -0.6339455842971802, "logps/rejected": -4.110825538635254, "loss": 0.962, "nll_loss": 0.8628135323524475, "rewards/accuracies": 1.0, "rewards/chosen": -0.06339455395936966, "rewards/margins": 0.3476880192756653, "rewards/rejected": -0.41108256578445435, "step": 8174 }, { "epoch": 4.98703675461339, "grad_norm": 8.516887664794922, "learning_rate": 1.9595835884874464e-08, "log_odds_chosen": 3.5798747539520264, "log_odds_ratio": -0.4044891893863678, "logits/chosen": -0.9450201988220215, "logits/rejected": -1.0939973592758179, "logps/chosen": -0.7609357833862305, "logps/rejected": -3.916151285171509, "loss": 1.229, "nll_loss": 1.2048218250274658, "rewards/accuracies": 0.875, "rewards/chosen": -0.07609358429908752, "rewards/margins": 0.31552156805992126, "rewards/rejected": -0.3916151225566864, "step": 8175 }, { "epoch": 4.987646789690407, "grad_norm": 1.8179810047149658, "learning_rate": 1.861604409063074e-08, "log_odds_chosen": 0.9916906356811523, "log_odds_ratio": -0.5562847256660461, "logits/chosen": -0.7899194955825806, "logits/rejected": -0.9224815368652344, "logps/chosen": -0.8750813007354736, "logps/rejected": -1.5251661539077759, "loss": 1.1529, "nll_loss": 1.0005710124969482, "rewards/accuracies": 0.625, "rewards/chosen": -0.08750812709331512, "rewards/margins": 0.0650084912776947, "rewards/rejected": -0.15251661837100983, "step": 8176 }, { "epoch": 4.988256824767424, "grad_norm": 1.2758055925369263, "learning_rate": 1.7636252296387017e-08, "log_odds_chosen": 4.619750022888184, "log_odds_ratio": -0.2604783773422241, "logits/chosen": -1.0663695335388184, "logits/rejected": -1.1840616464614868, "logps/chosen": -0.7888292074203491, "logps/rejected": -4.860299587249756, "loss": 1.0102, "nll_loss": 0.9855611324310303, "rewards/accuracies": 0.875, "rewards/chosen": -0.07888292521238327, "rewards/margins": 0.40714702010154724, "rewards/rejected": -0.4860299527645111, "step": 8177 }, { "epoch": 4.988866859844441, "grad_norm": 1.2012590169906616, "learning_rate": 1.6656460502143294e-08, "log_odds_chosen": 2.6001524925231934, "log_odds_ratio": -0.20223991572856903, "logits/chosen": -0.7907809615135193, "logits/rejected": -0.8646895289421082, "logps/chosen": -0.4920963644981384, "logps/rejected": -2.1564507484436035, "loss": 1.0827, "nll_loss": 0.9369379878044128, "rewards/accuracies": 1.0, "rewards/chosen": -0.049209631979465485, "rewards/margins": 0.16643543541431427, "rewards/rejected": -0.21564507484436035, "step": 8178 }, { "epoch": 4.989476894921458, "grad_norm": 2.2477540969848633, "learning_rate": 1.567666870789957e-08, "log_odds_chosen": 1.9576354026794434, "log_odds_ratio": -0.5695075988769531, "logits/chosen": -0.826370120048523, "logits/rejected": -1.0290369987487793, "logps/chosen": -0.7859143614768982, "logps/rejected": -2.302320957183838, "loss": 0.9242, "nll_loss": 0.9177109003067017, "rewards/accuracies": 0.5, "rewards/chosen": -0.07859143614768982, "rewards/margins": 0.1516406536102295, "rewards/rejected": -0.23023207485675812, "step": 8179 }, { "epoch": 4.9900869299984745, "grad_norm": 2.7916483879089355, "learning_rate": 1.4696876913655848e-08, "log_odds_chosen": 1.8108854293823242, "log_odds_ratio": -0.47546836733818054, "logits/chosen": -1.0559478998184204, "logits/rejected": -1.0608917474746704, "logps/chosen": -0.8897145390510559, "logps/rejected": -2.4590516090393066, "loss": 0.9764, "nll_loss": 0.9227770566940308, "rewards/accuracies": 0.75, "rewards/chosen": -0.08897146582603455, "rewards/margins": 0.1569337248802185, "rewards/rejected": -0.24590517580509186, "step": 8180 }, { "epoch": 4.990696965075492, "grad_norm": 1.3424197435379028, "learning_rate": 1.3717085119412124e-08, "log_odds_chosen": 0.5292465686798096, "log_odds_ratio": -0.6067752242088318, "logits/chosen": -0.9929709434509277, "logits/rejected": -0.9019374847412109, "logps/chosen": -0.8951654434204102, "logps/rejected": -1.338091492652893, "loss": 1.1424, "nll_loss": 1.065584421157837, "rewards/accuracies": 0.625, "rewards/chosen": -0.0895165503025055, "rewards/margins": 0.044292598962783813, "rewards/rejected": -0.1338091492652893, "step": 8181 }, { "epoch": 4.991307000152509, "grad_norm": 2.561891794204712, "learning_rate": 1.27372933251684e-08, "log_odds_chosen": 0.7398306131362915, "log_odds_ratio": -0.5284032821655273, "logits/chosen": -0.8787046670913696, "logits/rejected": -1.0298765897750854, "logps/chosen": -1.201163411140442, "logps/rejected": -1.7378844022750854, "loss": 1.2416, "nll_loss": 1.5105657577514648, "rewards/accuracies": 0.5, "rewards/chosen": -0.12011635303497314, "rewards/margins": 0.053672101348638535, "rewards/rejected": -0.1737884283065796, "step": 8182 }, { "epoch": 4.991917035229526, "grad_norm": 1.3081293106079102, "learning_rate": 1.1757501530924678e-08, "log_odds_chosen": 1.0214260816574097, "log_odds_ratio": -0.47411733865737915, "logits/chosen": -0.8069526553153992, "logits/rejected": -0.8542693853378296, "logps/chosen": -0.8450227975845337, "logps/rejected": -1.6179094314575195, "loss": 1.1454, "nll_loss": 1.0227659940719604, "rewards/accuracies": 0.75, "rewards/chosen": -0.08450227975845337, "rewards/margins": 0.0772886648774147, "rewards/rejected": -0.16179093718528748, "step": 8183 }, { "epoch": 4.992527070306543, "grad_norm": 1.6641321182250977, "learning_rate": 1.0777709736680954e-08, "log_odds_chosen": 0.6531777381896973, "log_odds_ratio": -0.48696044087409973, "logits/chosen": -0.9528781771659851, "logits/rejected": -0.9915981292724609, "logps/chosen": -0.9150235056877136, "logps/rejected": -1.3597564697265625, "loss": 1.0485, "nll_loss": 1.113197922706604, "rewards/accuracies": 0.75, "rewards/chosen": -0.0915023535490036, "rewards/margins": 0.04447329789400101, "rewards/rejected": -0.1359756588935852, "step": 8184 }, { "epoch": 4.99313710538356, "grad_norm": 1.2345852851867676, "learning_rate": 9.797917942437232e-09, "log_odds_chosen": 2.0760271549224854, "log_odds_ratio": -0.4190443754196167, "logits/chosen": -0.8697463274002075, "logits/rejected": -1.0037883520126343, "logps/chosen": -0.6754162311553955, "logps/rejected": -2.197535276412964, "loss": 1.014, "nll_loss": 0.9400570392608643, "rewards/accuracies": 0.75, "rewards/chosen": -0.06754162907600403, "rewards/margins": 0.15221190452575684, "rewards/rejected": -0.21975351870059967, "step": 8185 }, { "epoch": 4.993747140460576, "grad_norm": 3.5899658203125, "learning_rate": 8.818126148193508e-09, "log_odds_chosen": 1.9669336080551147, "log_odds_ratio": -0.3864140510559082, "logits/chosen": -0.7701518535614014, "logits/rejected": -0.8527796268463135, "logps/chosen": -1.0533925294876099, "logps/rejected": -2.584099054336548, "loss": 0.9774, "nll_loss": 0.9520359039306641, "rewards/accuracies": 0.75, "rewards/chosen": -0.10533924400806427, "rewards/margins": 0.15307065844535828, "rewards/rejected": -0.25840991735458374, "step": 8186 }, { "epoch": 4.994357175537593, "grad_norm": 1.2070505619049072, "learning_rate": 7.838334353949784e-09, "log_odds_chosen": 1.3053771257400513, "log_odds_ratio": -0.42547205090522766, "logits/chosen": -1.010683298110962, "logits/rejected": -1.050208330154419, "logps/chosen": -0.9129226207733154, "logps/rejected": -1.8847657442092896, "loss": 1.1917, "nll_loss": 1.1086490154266357, "rewards/accuracies": 0.875, "rewards/chosen": -0.09129226207733154, "rewards/margins": 0.09718432277441025, "rewards/rejected": -0.1884765774011612, "step": 8187 }, { "epoch": 4.99496721061461, "grad_norm": 17.452688217163086, "learning_rate": 6.858542559706062e-09, "log_odds_chosen": 2.394685745239258, "log_odds_ratio": -0.2877320945262909, "logits/chosen": -0.8879888653755188, "logits/rejected": -0.9945039749145508, "logps/chosen": -0.6562870144844055, "logps/rejected": -2.4800291061401367, "loss": 1.0571, "nll_loss": 1.107426404953003, "rewards/accuracies": 0.875, "rewards/chosen": -0.06562870740890503, "rewards/margins": 0.18237422406673431, "rewards/rejected": -0.24800293147563934, "step": 8188 }, { "epoch": 4.995577245691627, "grad_norm": 1.5674132108688354, "learning_rate": 5.878750765462339e-09, "log_odds_chosen": 3.024486541748047, "log_odds_ratio": -0.2874741554260254, "logits/chosen": -0.9954949617385864, "logits/rejected": -1.141248345375061, "logps/chosen": -0.7568997144699097, "logps/rejected": -3.245044708251953, "loss": 1.1784, "nll_loss": 1.1511670351028442, "rewards/accuracies": 0.875, "rewards/chosen": -0.07568997144699097, "rewards/margins": 0.24881449341773987, "rewards/rejected": -0.3245044946670532, "step": 8189 }, { "epoch": 4.996187280768644, "grad_norm": 1.725140929222107, "learning_rate": 4.898958971218616e-09, "log_odds_chosen": 2.5080249309539795, "log_odds_ratio": -0.3445444703102112, "logits/chosen": -0.6590118408203125, "logits/rejected": -0.7821733355522156, "logps/chosen": -0.7168803215026855, "logps/rejected": -2.67486572265625, "loss": 1.058, "nll_loss": 0.8816923499107361, "rewards/accuracies": 0.75, "rewards/chosen": -0.07168803364038467, "rewards/margins": 0.19579853117465973, "rewards/rejected": -0.267486572265625, "step": 8190 }, { "epoch": 4.996797315845662, "grad_norm": 3.081469774246216, "learning_rate": 3.919167176974892e-09, "log_odds_chosen": 0.9011779427528381, "log_odds_ratio": -0.4224676787853241, "logits/chosen": -0.7576037645339966, "logits/rejected": -0.7759255170822144, "logps/chosen": -0.5281986594200134, "logps/rejected": -1.0419111251831055, "loss": 1.2894, "nll_loss": 0.9553655385971069, "rewards/accuracies": 0.875, "rewards/chosen": -0.0528198666870594, "rewards/margins": 0.051371246576309204, "rewards/rejected": -0.10419110953807831, "step": 8191 }, { "epoch": 4.997407350922678, "grad_norm": 1.3335026502609253, "learning_rate": 2.9393753827311696e-09, "log_odds_chosen": 3.424090623855591, "log_odds_ratio": -0.24846872687339783, "logits/chosen": -0.8684605360031128, "logits/rejected": -1.0244710445404053, "logps/chosen": -0.7029348611831665, "logps/rejected": -3.6861696243286133, "loss": 1.0407, "nll_loss": 0.9046676754951477, "rewards/accuracies": 0.875, "rewards/chosen": -0.07029348611831665, "rewards/margins": 0.29832348227500916, "rewards/rejected": -0.3686169683933258, "step": 8192 }, { "epoch": 4.998017385999695, "grad_norm": 1.4445053339004517, "learning_rate": 1.959583588487446e-09, "log_odds_chosen": 3.736905813217163, "log_odds_ratio": -0.07726879417896271, "logits/chosen": -0.9648209810256958, "logits/rejected": -0.9937523603439331, "logps/chosen": -0.6787735223770142, "logps/rejected": -3.6172101497650146, "loss": 1.0684, "nll_loss": 0.9365827441215515, "rewards/accuracies": 1.0, "rewards/chosen": -0.06787735223770142, "rewards/margins": 0.29384368658065796, "rewards/rejected": -0.361721009016037, "step": 8193 }, { "epoch": 4.998627421076712, "grad_norm": 3.102910280227661, "learning_rate": 9.79791794243723e-10, "log_odds_chosen": 0.8139480352401733, "log_odds_ratio": -0.5331491231918335, "logits/chosen": -0.991665244102478, "logits/rejected": -1.0150630474090576, "logps/chosen": -0.9523470401763916, "logps/rejected": -1.5759788751602173, "loss": 1.1749, "nll_loss": 1.0053598880767822, "rewards/accuracies": 0.5, "rewards/chosen": -0.0952347069978714, "rewards/margins": 0.06236318498849869, "rewards/rejected": -0.1575978845357895, "step": 8194 }, { "epoch": 4.999237456153729, "grad_norm": 1.4848300218582153, "learning_rate": 0.0, "log_odds_chosen": 4.282942295074463, "log_odds_ratio": -0.13192622363567352, "logits/chosen": -0.7162397503852844, "logits/rejected": -1.0406455993652344, "logps/chosen": -0.502235472202301, "logps/rejected": -4.015653133392334, "loss": 1.0763, "nll_loss": 0.8943740129470825, "rewards/accuracies": 1.0, "rewards/chosen": -0.050223544239997864, "rewards/margins": 0.35134178400039673, "rewards/rejected": -0.4015653133392334, "step": 8195 }, { "epoch": 4.999237456153729, "eval_log_odds_chosen": 2.101734161376953, "eval_log_odds_ratio": -0.39467301964759827, "eval_logits/chosen": -0.9354549646377563, "eval_logits/rejected": -1.034712791442871, "eval_logps/chosen": -0.8229629993438721, "eval_logps/rejected": -2.495842695236206, "eval_loss": 1.0581330060958862, "eval_nll_loss": 1.0625019073486328, "eval_rewards/accuracies": 0.7878788113594055, "eval_rewards/chosen": -0.08229630440473557, "eval_rewards/margins": 0.16728799045085907, "eval_rewards/rejected": -0.24958431720733643, "eval_runtime": 394.5377, "eval_samples_per_second": 0.996, "eval_steps_per_second": 0.167, "step": 8195 }, { "epoch": 4.999237456153729, "step": 8195, "total_flos": 0.0, "train_loss": 1.1049469483633314, "train_runtime": 586416.3764, "train_samples_per_second": 0.335, "train_steps_per_second": 0.014 } ], "logging_steps": 1, "max_steps": 8195, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }