diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,4573 +1,13683 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.0, + "epoch": 3.0, "eval_steps": 500, - "global_step": 1259, + "global_step": 3777, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003971405877680699, - "grad_norm": 1986.144287109375, + "grad_norm": 1984.91259765625, "learning_rate": 2.5000000000000004e-07, - "log_odds_chosen": -0.8231229782104492, - "log_odds_ratio": -1.391985297203064, - "logits/chosen": 102.16714477539062, - "logits/rejected": -12.402770042419434, - "logps/chosen": -16.669206619262695, - "logps/rejected": -15.846084594726562, - "loss": 14.8236, - "nll_loss": 15.787309646606445, + "log_odds_chosen": -0.822909951210022, + "log_odds_ratio": -1.3946492671966553, + "logits/chosen": 102.17945098876953, + "logits/rejected": -12.376755714416504, + "logps/chosen": -16.667949676513672, + "logps/rejected": -15.845039367675781, + "loss": 14.8233, + "nll_loss": 15.784014701843262, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.8334604501724243, - "rewards/margins": -0.041156161576509476, - "rewards/rejected": -0.7923042178153992, + "rewards/chosen": -0.8333975076675415, + "rewards/margins": -0.041145503520965576, + "rewards/rejected": -0.7922519445419312, "step": 5 }, { "epoch": 0.007942811755361398, - "grad_norm": 1124.8104248046875, + "grad_norm": 1128.173583984375, "learning_rate": 5.000000000000001e-07, - "log_odds_chosen": -1.4938147068023682, - "log_odds_ratio": -2.2117180824279785, - "logits/chosen": 69.36089324951172, - "logits/rejected": 133.50851440429688, - "logps/chosen": -14.369367599487305, - "logps/rejected": -12.875558853149414, - "loss": 12.448, - "nll_loss": 12.069561958312988, + "log_odds_chosen": -1.4871519804000854, + "log_odds_ratio": -2.2126498222351074, + "logits/chosen": 69.30543518066406, + "logits/rejected": 133.51295471191406, + "logps/chosen": -14.377180099487305, + "logps/rejected": -12.890034675598145, + "loss": 12.4575, + "nll_loss": 12.073002815246582, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.7184683680534363, - "rewards/margins": -0.07469038665294647, - "rewards/rejected": -0.6437779664993286, + "rewards/chosen": -0.7188590168952942, + "rewards/margins": -0.0743572935461998, + "rewards/rejected": -0.6445017457008362, "step": 10 }, { "epoch": 0.011914217633042097, - "grad_norm": 571.815673828125, + "grad_norm": 578.59765625, "learning_rate": 7.5e-07, - "log_odds_chosen": 0.07529473304748535, - "log_odds_ratio": -0.9602964520454407, - "logits/chosen": 194.84005737304688, - "logits/rejected": 170.63455200195312, - "logps/chosen": -8.528478622436523, - "logps/rejected": -8.603917121887207, - "loss": 8.6647, - "nll_loss": 8.767313003540039, + "log_odds_chosen": 0.08573625236749649, + "log_odds_ratio": -0.9523025751113892, + "logits/chosen": 194.7311248779297, + "logits/rejected": 170.49374389648438, + "logps/chosen": -8.521177291870117, + "logps/rejected": -8.607057571411133, + "loss": 8.662, + "nll_loss": 8.76197624206543, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.42642393708229065, - "rewards/margins": 0.00377195468172431, - "rewards/rejected": -0.4301958680152893, + "rewards/chosen": -0.4260588586330414, + "rewards/margins": 0.004294055514037609, + "rewards/rejected": -0.43035292625427246, "step": 15 }, { "epoch": 0.015885623510722795, - "grad_norm": 294.7610778808594, + "grad_norm": 292.1365051269531, "learning_rate": 1.0000000000000002e-06, - "log_odds_chosen": 1.0600534677505493, - "log_odds_ratio": -0.5760771632194519, - "logits/chosen": 130.64846801757812, - "logits/rejected": 219.7195281982422, - "logps/chosen": -5.179438591003418, - "logps/rejected": -6.236131191253662, - "loss": 5.951, - "nll_loss": 5.380393028259277, + "log_odds_chosen": 1.116430640220642, + "log_odds_ratio": -0.5740691423416138, + "logits/chosen": 130.79287719726562, + "logits/rejected": 219.90774536132812, + "logps/chosen": -5.188860893249512, + "logps/rejected": -6.301913261413574, + "loss": 5.9474, + "nll_loss": 5.384028434753418, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.2589719295501709, - "rewards/margins": 0.05283462256193161, - "rewards/rejected": -0.3118065893650055, + "rewards/chosen": -0.2594430446624756, + "rewards/margins": 0.055652640759944916, + "rewards/rejected": -0.3150956928730011, "step": 20 }, { "epoch": 0.019857029388403495, - "grad_norm": 175.71804809570312, + "grad_norm": 176.4138641357422, "learning_rate": 1.25e-06, - "log_odds_chosen": -0.23735050857067108, - "log_odds_ratio": -0.8487253189086914, - "logits/chosen": 127.70460510253906, - "logits/rejected": 241.3983154296875, - "logps/chosen": -3.850130796432495, - "logps/rejected": -3.6309711933135986, - "loss": 4.3668, - "nll_loss": 4.186649322509766, + "log_odds_chosen": -0.2425965815782547, + "log_odds_ratio": -0.8498791456222534, + "logits/chosen": 127.62091064453125, + "logits/rejected": 241.3440399169922, + "logps/chosen": -3.852045774459839, + "logps/rejected": -3.6273605823516846, + "loss": 4.3717, + "nll_loss": 4.1882829666137695, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.19250653684139252, - "rewards/margins": -0.010957981459796429, - "rewards/rejected": -0.18154855072498322, + "rewards/chosen": -0.19260229170322418, + "rewards/margins": -0.01123427040874958, + "rewards/rejected": -0.18136802315711975, "step": 25 }, { "epoch": 0.023828435266084195, - "grad_norm": 124.54273986816406, + "grad_norm": 122.2364501953125, "learning_rate": 1.5e-06, - "log_odds_chosen": 0.1647549569606781, - "log_odds_ratio": -1.391825795173645, - "logits/chosen": 265.0519714355469, - "logits/rejected": 185.30813598632812, - "logps/chosen": -3.637852430343628, - "logps/rejected": -3.7852470874786377, - "loss": 3.5575, - "nll_loss": 3.7124857902526855, + "log_odds_chosen": 0.13310351967811584, + "log_odds_ratio": -1.3691762685775757, + "logits/chosen": 266.5719299316406, + "logits/rejected": 187.1367645263672, + "logps/chosen": -3.6415443420410156, + "logps/rejected": -3.757603883743286, + "loss": 3.5507, + "nll_loss": 3.7023651599884033, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.18189263343811035, - "rewards/margins": 0.007369739003479481, - "rewards/rejected": -0.18926236033439636, + "rewards/chosen": -0.18207721412181854, + "rewards/margins": 0.0058029768988490105, + "rewards/rejected": -0.18788018822669983, "step": 30 }, { "epoch": 0.02779984114376489, - "grad_norm": 197.60816955566406, + "grad_norm": 193.59271240234375, "learning_rate": 1.75e-06, - "log_odds_chosen": 0.08467637002468109, - "log_odds_ratio": -0.7092531323432922, - "logits/chosen": 209.2491455078125, - "logits/rejected": 306.3213806152344, - "logps/chosen": -2.6641006469726562, - "logps/rejected": -2.7374536991119385, - "loss": 3.3245, - "nll_loss": 2.6615092754364014, + "log_odds_chosen": 0.06719346344470978, + "log_odds_ratio": -0.7199904918670654, + "logits/chosen": 215.12673950195312, + "logits/rejected": 310.61846923828125, + "logps/chosen": -2.6558592319488525, + "logps/rejected": -2.712460994720459, + "loss": 3.333, + "nll_loss": 2.6609179973602295, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.13320502638816833, - "rewards/margins": 0.0036676630843430758, - "rewards/rejected": -0.13687269389629364, + "rewards/chosen": -0.13279296457767487, + "rewards/margins": 0.002830089535564184, + "rewards/rejected": -0.13562306761741638, "step": 35 }, { "epoch": 0.03177124702144559, - "grad_norm": 185.7123260498047, + "grad_norm": 147.1302032470703, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.11347303539514542, - "log_odds_ratio": -0.7381674647331238, - "logits/chosen": 297.0364990234375, - "logits/rejected": 257.74261474609375, - "logps/chosen": -2.3121635913848877, - "logps/rejected": -2.4389572143554688, - "loss": 2.4557, - "nll_loss": 3.096728801727295, + "log_odds_chosen": 0.12157557159662247, + "log_odds_ratio": -0.7308082580566406, + "logits/chosen": 302.55181884765625, + "logits/rejected": 263.9683532714844, + "logps/chosen": -2.3353283405303955, + "logps/rejected": -2.4736487865448, + "loss": 2.4602, + "nll_loss": 3.1235008239746094, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.11560817062854767, - "rewards/margins": 0.006339688785374165, - "rewards/rejected": -0.12194786220788956, + "rewards/chosen": -0.11676641553640366, + "rewards/margins": 0.006916025187820196, + "rewards/rejected": -0.12368245422840118, "step": 40 }, { "epoch": 0.035742652899126294, - "grad_norm": 110.63021087646484, + "grad_norm": 110.2761459350586, "learning_rate": 2.25e-06, - "log_odds_chosen": -0.3320659101009369, - "log_odds_ratio": -0.9123506546020508, - "logits/chosen": 301.091552734375, - "logits/rejected": 277.54205322265625, - "logps/chosen": -1.4611170291900635, - "logps/rejected": -1.2663248777389526, - "loss": 2.169, - "nll_loss": 1.8063217401504517, + "log_odds_chosen": -0.2882576584815979, + "log_odds_ratio": -0.8804551362991333, + "logits/chosen": 304.38250732421875, + "logits/rejected": 283.35162353515625, + "logps/chosen": -1.4691202640533447, + "logps/rejected": -1.2973356246948242, + "loss": 2.1893, + "nll_loss": 1.8311408758163452, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07305584847927094, - "rewards/margins": -0.009739604778587818, - "rewards/rejected": -0.0633162409067154, + "rewards/chosen": -0.07345602661371231, + "rewards/margins": -0.008589239791035652, + "rewards/rejected": -0.06486678123474121, "step": 45 }, { "epoch": 0.03971405877680699, - "grad_norm": 178.21774291992188, + "grad_norm": 157.4852752685547, "learning_rate": 2.5e-06, - "log_odds_chosen": 0.13006296753883362, - "log_odds_ratio": -0.6751025915145874, - "logits/chosen": 348.32208251953125, - "logits/rejected": 229.12075805664062, - "logps/chosen": -1.464900255203247, - "logps/rejected": -1.5294269323349, - "loss": 2.2912, - "nll_loss": 2.135530710220337, + "log_odds_chosen": 0.11816178262233734, + "log_odds_ratio": -0.6793395280838013, + "logits/chosen": 351.5061950683594, + "logits/rejected": 229.8159637451172, + "logps/chosen": -1.4661238193511963, + "logps/rejected": -1.5235395431518555, + "loss": 2.2969, + "nll_loss": 2.131767988204956, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07324501872062683, - "rewards/margins": 0.003226341214030981, - "rewards/rejected": -0.07647135108709335, + "rewards/chosen": -0.07330618798732758, + "rewards/margins": 0.0028707936871796846, + "rewards/rejected": -0.07617697864770889, "step": 50 }, { "epoch": 0.043685464654487687, - "grad_norm": 245.26560974121094, + "grad_norm": 152.07127380371094, "learning_rate": 2.7500000000000004e-06, - "log_odds_chosen": 0.5194543600082397, - "log_odds_ratio": -0.47707730531692505, - "logits/chosen": 271.95819091796875, - "logits/rejected": 315.78558349609375, - "logps/chosen": -1.3941795825958252, - "logps/rejected": -1.8157556056976318, - "loss": 1.9179, - "nll_loss": 1.9026470184326172, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06970898061990738, - "rewards/margins": 0.021078798919916153, - "rewards/rejected": -0.09078778326511383, + "log_odds_chosen": 0.4677702784538269, + "log_odds_ratio": -0.4963720738887787, + "logits/chosen": 274.1103515625, + "logits/rejected": 317.81048583984375, + "logps/chosen": -1.4359080791473389, + "logps/rejected": -1.8219692707061768, + "loss": 1.9312, + "nll_loss": 1.9257862567901611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07179541140794754, + "rewards/margins": 0.019303051754832268, + "rewards/rejected": -0.09109846502542496, "step": 55 }, { "epoch": 0.04765687053216839, - "grad_norm": 50.196834564208984, + "grad_norm": 70.87137603759766, "learning_rate": 3e-06, - "log_odds_chosen": -0.3250153362751007, - "log_odds_ratio": -1.0242193937301636, - "logits/chosen": 358.44586181640625, - "logits/rejected": 318.2178955078125, - "logps/chosen": -1.6527748107910156, - "logps/rejected": -1.3916703462600708, - "loss": 2.1109, - "nll_loss": 1.7649085521697998, + "log_odds_chosen": -0.36796286702156067, + "log_odds_ratio": -1.036171317100525, + "logits/chosen": 356.27490234375, + "logits/rejected": 317.17694091796875, + "logps/chosen": -1.6219791173934937, + "logps/rejected": -1.34583580493927, + "loss": 2.1117, + "nll_loss": 1.755855917930603, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08263873308897018, - "rewards/margins": -0.013055220246315002, - "rewards/rejected": -0.06958352029323578, + "rewards/chosen": -0.08109895884990692, + "rewards/margins": -0.01380716823041439, + "rewards/rejected": -0.06729178875684738, "step": 60 }, { "epoch": 0.051628276409849086, - "grad_norm": 108.7151107788086, + "grad_norm": 58.58616638183594, "learning_rate": 3.2500000000000002e-06, - "log_odds_chosen": 0.29782918095588684, - "log_odds_ratio": -0.6089301705360413, - "logits/chosen": 285.428466796875, - "logits/rejected": 288.96624755859375, - "logps/chosen": -1.7433933019638062, - "logps/rejected": -2.007410764694214, - "loss": 2.4942, - "nll_loss": 2.303445339202881, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.08716966956853867, - "rewards/margins": 0.013200879096984863, - "rewards/rejected": -0.10037054866552353, + "log_odds_chosen": 0.2776266038417816, + "log_odds_ratio": -0.6214284300804138, + "logits/chosen": 280.74383544921875, + "logits/rejected": 284.61981201171875, + "logps/chosen": -1.7956883907318115, + "logps/rejected": -2.0456883907318115, + "loss": 2.5287, + "nll_loss": 2.3265061378479004, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0897844210267067, + "rewards/margins": 0.012500002980232239, + "rewards/rejected": -0.10228443145751953, "step": 65 }, { "epoch": 0.05559968228752978, - "grad_norm": 232.15228271484375, + "grad_norm": 208.72171020507812, "learning_rate": 3.5e-06, - "log_odds_chosen": -0.16374030709266663, - "log_odds_ratio": -0.8438912630081177, - "logits/chosen": 316.9410400390625, - "logits/rejected": 329.91485595703125, - "logps/chosen": -1.642735481262207, - "logps/rejected": -1.506341814994812, - "loss": 2.2097, - "nll_loss": 1.9609638452529907, + "log_odds_chosen": -0.17022135853767395, + "log_odds_ratio": -0.8455036282539368, + "logits/chosen": 307.35565185546875, + "logits/rejected": 320.88824462890625, + "logps/chosen": -1.625649094581604, + "logps/rejected": -1.4863574504852295, + "loss": 2.1645, + "nll_loss": 1.922053575515747, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08213677257299423, - "rewards/margins": -0.006819679401814938, - "rewards/rejected": -0.07531709969043732, + "rewards/chosen": -0.08128245174884796, + "rewards/margins": -0.006964580621570349, + "rewards/rejected": -0.07431787252426147, "step": 70 }, { "epoch": 0.059571088165210485, - "grad_norm": 78.16633605957031, + "grad_norm": 69.00164031982422, "learning_rate": 3.7500000000000005e-06, - "log_odds_chosen": 0.08518339693546295, - "log_odds_ratio": -0.6790117025375366, - "logits/chosen": 318.6166076660156, - "logits/rejected": 272.7696533203125, - "logps/chosen": -1.5736838579177856, - "logps/rejected": -1.6470115184783936, - "loss": 2.0889, - "nll_loss": 2.043640375137329, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07868418842554092, - "rewards/margins": 0.0036663957871496677, - "rewards/rejected": -0.08235058933496475, + "log_odds_chosen": 0.1811346560716629, + "log_odds_ratio": -0.6363757252693176, + "logits/chosen": 317.77398681640625, + "logits/rejected": 273.17938232421875, + "logps/chosen": -1.5566846132278442, + "logps/rejected": -1.7085206508636475, + "loss": 2.0867, + "nll_loss": 2.0406994819641113, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07783423364162445, + "rewards/margins": 0.0075918035581707954, + "rewards/rejected": -0.08542603254318237, "step": 75 }, { "epoch": 0.06354249404289118, - "grad_norm": 107.8170166015625, + "grad_norm": 77.00801086425781, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.7262557744979858, - "log_odds_ratio": -0.4617268443107605, - "logits/chosen": 258.40740966796875, - "logits/rejected": 339.81085205078125, - "logps/chosen": -1.2588051557540894, - "logps/rejected": -1.8158533573150635, - "loss": 2.0223, - "nll_loss": 1.9233391284942627, + "log_odds_chosen": 0.6646592617034912, + "log_odds_ratio": -0.48417338728904724, + "logits/chosen": 260.798583984375, + "logits/rejected": 340.7353820800781, + "logps/chosen": -1.2510426044464111, + "logps/rejected": -1.7401701211929321, + "loss": 2.0187, + "nll_loss": 1.9357578754425049, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06294026225805283, - "rewards/margins": 0.027852404862642288, - "rewards/rejected": -0.09079267084598541, + "rewards/chosen": -0.06255212426185608, + "rewards/margins": 0.02445637807250023, + "rewards/rejected": -0.0870085060596466, "step": 80 }, { "epoch": 0.06751389992057188, - "grad_norm": 94.64545440673828, + "grad_norm": 208.50125122070312, "learning_rate": 4.25e-06, - "log_odds_chosen": 0.24036984145641327, - "log_odds_ratio": -0.6297720670700073, - "logits/chosen": 322.71148681640625, - "logits/rejected": 335.28228759765625, - "logps/chosen": -1.28748619556427, - "logps/rejected": -1.4782545566558838, - "loss": 1.7358, - "nll_loss": 1.7002407312393188, + "log_odds_chosen": 0.2738969027996063, + "log_odds_ratio": -0.6175040006637573, + "logits/chosen": 320.6893005371094, + "logits/rejected": 330.1410827636719, + "logps/chosen": -1.2499741315841675, + "logps/rejected": -1.4630674123764038, + "loss": 1.7159, + "nll_loss": 1.6920101642608643, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06437431275844574, - "rewards/margins": 0.009538417682051659, - "rewards/rejected": -0.07391272485256195, + "rewards/chosen": -0.062498707324266434, + "rewards/margins": 0.010654664598405361, + "rewards/rejected": -0.07315336912870407, "step": 85 }, { "epoch": 0.07148530579825259, - "grad_norm": 133.2477264404297, + "grad_norm": 103.35198211669922, "learning_rate": 4.5e-06, - "log_odds_chosen": 0.11964414268732071, - "log_odds_ratio": -0.6827563643455505, - "logits/chosen": 325.7275695800781, - "logits/rejected": 273.9080505371094, - "logps/chosen": -1.321221947669983, - "logps/rejected": -1.4350178241729736, - "loss": 2.0792, - "nll_loss": 2.0664451122283936, + "log_odds_chosen": 0.12765750288963318, + "log_odds_ratio": -0.6823771595954895, + "logits/chosen": 321.28680419921875, + "logits/rejected": 270.31292724609375, + "logps/chosen": -1.3744524717330933, + "logps/rejected": -1.5009243488311768, + "loss": 2.1276, + "nll_loss": 2.185715436935425, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0660611018538475, - "rewards/margins": 0.005689795129001141, - "rewards/rejected": -0.07175089418888092, + "rewards/chosen": -0.06872262060642242, + "rewards/margins": 0.006323590874671936, + "rewards/rejected": -0.07504621893167496, "step": 90 }, { "epoch": 0.07545671167593328, - "grad_norm": 75.57556915283203, + "grad_norm": 61.852169036865234, "learning_rate": 4.75e-06, - "log_odds_chosen": 0.1924123615026474, - "log_odds_ratio": -0.7588543891906738, - "logits/chosen": 268.33111572265625, - "logits/rejected": 284.4769287109375, - "logps/chosen": -1.2349271774291992, - "logps/rejected": -1.3220834732055664, - "loss": 1.8031, - "nll_loss": 2.01653790473938, + "log_odds_chosen": 0.06399938464164734, + "log_odds_ratio": -0.8128548860549927, + "logits/chosen": 269.13970947265625, + "logits/rejected": 286.59197998046875, + "logps/chosen": -1.3123642206192017, + "logps/rejected": -1.3269928693771362, + "loss": 1.8274, + "nll_loss": 2.0496463775634766, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06174635887145996, - "rewards/margins": 0.0043578073382377625, - "rewards/rejected": -0.06610416620969772, + "rewards/chosen": -0.06561820954084396, + "rewards/margins": 0.0007314354297704995, + "rewards/rejected": -0.06634964793920517, "step": 95 }, { "epoch": 0.07942811755361398, - "grad_norm": 82.01164245605469, + "grad_norm": 54.209163665771484, "learning_rate": 5e-06, - "log_odds_chosen": 0.22679157555103302, - "log_odds_ratio": -0.6229124069213867, - "logits/chosen": 347.00042724609375, - "logits/rejected": 280.84796142578125, - "logps/chosen": -1.0023002624511719, - "logps/rejected": -1.172823429107666, - "loss": 1.9771, - "nll_loss": 1.778070092201233, + "log_odds_chosen": 0.27285704016685486, + "log_odds_ratio": -0.5980533957481384, + "logits/chosen": 343.4212951660156, + "logits/rejected": 275.14459228515625, + "logps/chosen": -0.9581457376480103, + "logps/rejected": -1.1551190614700317, + "loss": 1.9599, + "nll_loss": 1.7298065423965454, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.050115011632442474, - "rewards/margins": 0.008526156656444073, - "rewards/rejected": -0.05864117294549942, + "rewards/chosen": -0.04790728539228439, + "rewards/margins": 0.009848668240010738, + "rewards/rejected": -0.057755958288908005, "step": 100 }, { "epoch": 0.08339952343129468, - "grad_norm": 144.94651794433594, - "learning_rate": 4.99977039769305e-06, - "log_odds_chosen": -0.6747381091117859, - "log_odds_ratio": -1.3916146755218506, - "logits/chosen": 285.270751953125, - "logits/rejected": 392.5542907714844, - "logps/chosen": -2.4368515014648438, - "logps/rejected": -1.8863086700439453, - "loss": 2.1097, - "nll_loss": 2.505847454071045, + "grad_norm": 92.10086822509766, + "learning_rate": 4.8795003647426654e-06, + "log_odds_chosen": -0.664605975151062, + "log_odds_ratio": -1.3894308805465698, + "logits/chosen": 278.6569519042969, + "logits/rejected": 385.9412841796875, + "logps/chosen": -2.4318549633026123, + "logps/rejected": -1.900923728942871, + "loss": 2.1305, + "nll_loss": 2.418598175048828, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.12184257805347443, - "rewards/margins": -0.02752714790403843, - "rewards/rejected": -0.09431543946266174, + "rewards/chosen": -0.12159274518489838, + "rewards/margins": -0.02654656209051609, + "rewards/rejected": -0.09504619240760803, "step": 105 }, { "epoch": 0.08737092930897537, - "grad_norm": 907.4935302734375, - "learning_rate": 4.9990816329459744e-06, - "log_odds_chosen": 0.6644043326377869, - "log_odds_ratio": -0.6343256235122681, - "logits/chosen": 355.09014892578125, - "logits/rejected": 328.604736328125, - "logps/chosen": -3.468595027923584, - "logps/rejected": -4.0634002685546875, - "loss": 2.2136, - "nll_loss": 2.77046275138855, + "grad_norm": 392.0915832519531, + "learning_rate": 4.767312946227961e-06, + "log_odds_chosen": 0.9900819659233093, + "log_odds_ratio": -0.4791165292263031, + "logits/chosen": 352.7217102050781, + "logits/rejected": 326.9925537109375, + "logps/chosen": -2.6203694343566895, + "logps/rejected": -3.5170578956604004, + "loss": 1.9908, + "nll_loss": 2.289768934249878, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.17342975735664368, - "rewards/margins": 0.02974027954041958, - "rewards/rejected": -0.2031700611114502, + "rewards/chosen": -0.1310184746980667, + "rewards/margins": 0.04483442381024361, + "rewards/rejected": -0.17585287988185883, "step": 110 }, { "epoch": 0.09134233518665608, - "grad_norm": 55.67234802246094, - "learning_rate": 4.997933832272354e-06, - "log_odds_chosen": 0.5580138564109802, - "log_odds_ratio": -0.507127046585083, - "logits/chosen": 261.1326599121094, - "logits/rejected": 371.37396240234375, - "logps/chosen": -1.3323694467544556, - "logps/rejected": -1.7812392711639404, - "loss": 1.7648, - "nll_loss": 1.5408798456192017, + "grad_norm": 65.87200164794922, + "learning_rate": 4.662524041201569e-06, + "log_odds_chosen": 0.6037150621414185, + "log_odds_ratio": -0.4995183050632477, + "logits/chosen": 271.54107666015625, + "logits/rejected": 379.6575012207031, + "logps/chosen": -1.2679884433746338, + "logps/rejected": -1.7445560693740845, + "loss": 1.7958, + "nll_loss": 1.523012399673462, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06661847233772278, - "rewards/margins": 0.02244349755346775, - "rewards/rejected": -0.08906197547912598, + "rewards/chosen": -0.06339941918849945, + "rewards/margins": 0.023828381672501564, + "rewards/rejected": -0.08722780644893646, "step": 115 }, { "epoch": 0.09531374106433678, - "grad_norm": 155.13243103027344, - "learning_rate": 4.996327206502335e-06, - "log_odds_chosen": 0.4205778241157532, - "log_odds_ratio": -0.6450524926185608, - "logits/chosen": 249.2978973388672, - "logits/rejected": 313.87274169921875, - "logps/chosen": -1.5952913761138916, - "logps/rejected": -1.9669711589813232, - "loss": 2.1811, - "nll_loss": 2.7610068321228027, + "grad_norm": 101.74378967285156, + "learning_rate": 4.564354645876385e-06, + "log_odds_chosen": 0.4342077672481537, + "log_odds_ratio": -0.6743711233139038, + "logits/chosen": 263.05096435546875, + "logits/rejected": 325.2860412597656, + "logps/chosen": -1.5464773178100586, + "logps/rejected": -1.930596947669983, + "loss": 2.0518, + "nll_loss": 2.4592247009277344, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07976456731557846, - "rewards/margins": 0.018583994358778, - "rewards/rejected": -0.09834857285022736, + "rewards/chosen": -0.07732386887073517, + "rewards/margins": 0.019205976277589798, + "rewards/rejected": -0.09652984142303467, "step": 120 }, { "epoch": 0.09928514694201747, - "grad_norm": 56.40447235107422, - "learning_rate": 4.994262050743902e-06, - "log_odds_chosen": -0.344782292842865, - "log_odds_ratio": -0.9613167643547058, - "logits/chosen": 297.97796630859375, - "logits/rejected": 316.97796630859375, - "logps/chosen": -1.9141199588775635, - "logps/rejected": -1.645132064819336, - "loss": 2.097, - "nll_loss": 2.657578945159912, + "grad_norm": 48.3521614074707, + "learning_rate": 4.47213595499958e-06, + "log_odds_chosen": -0.35852494835853577, + "log_odds_ratio": -0.9780646562576294, + "logits/chosen": 302.3356628417969, + "logits/rejected": 319.9010009765625, + "logps/chosen": -1.8093980550765991, + "logps/rejected": -1.538129210472107, + "loss": 1.965, + "nll_loss": 2.3710861206054688, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.09570600092411041, - "rewards/margins": -0.013449391350150108, - "rewards/rejected": -0.08225660771131516, + "rewards/chosen": -0.09046990424394608, + "rewards/margins": -0.01356343924999237, + "rewards/rejected": -0.0769064649939537, "step": 125 }, { "epoch": 0.10325655281969817, - "grad_norm": 63.89404296875, - "learning_rate": 4.991738744328679e-06, - "log_odds_chosen": -0.5421128869056702, - "log_odds_ratio": -1.0545024871826172, - "logits/chosen": 358.645751953125, - "logits/rejected": 298.12786865234375, - "logps/chosen": -1.4808099269866943, - "logps/rejected": -1.1260448694229126, - "loss": 2.0496, - "nll_loss": 2.101855993270874, + "grad_norm": 62.85472869873047, + "learning_rate": 4.385290096535147e-06, + "log_odds_chosen": -0.6488355398178101, + "log_odds_ratio": -1.1188563108444214, + "logits/chosen": 354.04156494140625, + "logits/rejected": 294.17169189453125, + "logps/chosen": -1.5017735958099365, + "logps/rejected": -1.0746318101882935, + "loss": 1.965, + "nll_loss": 2.148465871810913, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0740404948592186, - "rewards/margins": -0.017738252878189087, - "rewards/rejected": -0.05630224198102951, + "rewards/chosen": -0.07508867979049683, + "rewards/margins": -0.021357093006372452, + "rewards/rejected": -0.05373159795999527, "step": 130 }, { "epoch": 0.10722795869737888, - "grad_norm": 94.53604125976562, - "learning_rate": 4.988757750742243e-06, - "log_odds_chosen": -0.7190758585929871, - "log_odds_ratio": -1.2133655548095703, - "logits/chosen": 339.0290832519531, - "logits/rejected": 269.4937438964844, - "logps/chosen": -1.472083568572998, - "logps/rejected": -1.066962718963623, - "loss": 1.8894, - "nll_loss": 2.1049113273620605, + "grad_norm": 80.7550277709961, + "learning_rate": 4.303314829119352e-06, + "log_odds_chosen": -0.6391368508338928, + "log_odds_ratio": -1.1500171422958374, + "logits/chosen": 333.481689453125, + "logits/rejected": 268.5390625, + "logps/chosen": -1.366288185119629, + "logps/rejected": -1.029240369796753, + "loss": 1.8453, + "nll_loss": 2.0549352169036865, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07360417395830154, - "rewards/margins": -0.020256036892533302, - "rewards/rejected": -0.05334814265370369, + "rewards/chosen": -0.06831441074609756, + "rewards/margins": -0.01685238815844059, + "rewards/rejected": -0.05146201699972153, "step": 135 }, { "epoch": 0.11119936457505956, - "grad_norm": 71.55030059814453, - "learning_rate": 4.985319617538998e-06, - "log_odds_chosen": 0.4117642343044281, - "log_odds_ratio": -0.6588890552520752, - "logits/chosen": 288.08966064453125, - "logits/rejected": 382.9283142089844, - "logps/chosen": -1.4075809717178345, - "logps/rejected": -1.670000433921814, - "loss": 2.1323, - "nll_loss": 1.7244634628295898, + "grad_norm": 52.68488693237305, + "learning_rate": 4.2257712736425835e-06, + "log_odds_chosen": 0.44900092482566833, + "log_odds_ratio": -0.6255335211753845, + "logits/chosen": 285.18841552734375, + "logits/rejected": 379.58984375, + "logps/chosen": -1.3383129835128784, + "logps/rejected": -1.6278247833251953, + "loss": 1.9521, + "nll_loss": 1.6224981546401978, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07037904858589172, - "rewards/margins": 0.013120980933308601, - "rewards/rejected": -0.08350002765655518, + "rewards/chosen": -0.06691565364599228, + "rewards/margins": 0.014475582167506218, + "rewards/rejected": -0.08139123767614365, "step": 140 }, { "epoch": 0.11517077045274027, - "grad_norm": 34.86751174926758, - "learning_rate": 4.981424976241598e-06, - "log_odds_chosen": 0.4058244228363037, - "log_odds_ratio": -0.6222633123397827, - "logits/chosen": 308.98876953125, - "logits/rejected": 273.16015625, - "logps/chosen": -1.11463463306427, - "logps/rejected": -1.428716778755188, - "loss": 1.7015, - "nll_loss": 1.501156210899353, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05573173239827156, - "rewards/margins": 0.01570410653948784, - "rewards/rejected": -0.0714358389377594, + "grad_norm": 32.54029083251953, + "learning_rate": 4.1522739926869985e-06, + "log_odds_chosen": 0.2789516746997833, + "log_odds_ratio": -0.6979798078536987, + "logits/chosen": 308.1046142578125, + "logits/rejected": 268.2256774902344, + "logps/chosen": -1.107441782951355, + "logps/rejected": -1.3338348865509033, + "loss": 1.6567, + "nll_loss": 1.4684410095214844, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.055372096598148346, + "rewards/margins": 0.011319654062390327, + "rewards/rejected": -0.06669174879789352, "step": 145 }, { "epoch": 0.11914217633042097, - "grad_norm": 52.555850982666016, - "learning_rate": 4.977074542224941e-06, - "log_odds_chosen": -0.05959262698888779, - "log_odds_ratio": -0.7522531151771545, - "logits/chosen": 290.1683349609375, - "logits/rejected": 313.0419616699219, - "logps/chosen": -1.5048227310180664, - "logps/rejected": -1.4883638620376587, - "loss": 1.814, - "nll_loss": 1.6905310153961182, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07524113357067108, - "rewards/margins": -0.0008229411905631423, - "rewards/rejected": -0.07441819459199905, + "grad_norm": 52.103843688964844, + "learning_rate": 4.082482904638631e-06, + "log_odds_chosen": -0.07480888068675995, + "log_odds_ratio": -0.7555229663848877, + "logits/chosen": 287.1299743652344, + "logits/rejected": 311.5876770019531, + "logps/chosen": -1.454633355140686, + "logps/rejected": -1.4153658151626587, + "loss": 1.7879, + "nll_loss": 1.646153450012207, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07273166626691818, + "rewards/margins": -0.0019633763004094362, + "rewards/rejected": -0.07076828926801682, "step": 150 }, { "epoch": 0.12311358220810167, - "grad_norm": 108.9931869506836, - "learning_rate": 4.972269114584779e-06, - "log_odds_chosen": -0.10580176115036011, - "log_odds_ratio": -0.794634997844696, - "logits/chosen": 386.6126403808594, - "logits/rejected": 284.34765625, - "logps/chosen": -1.1588705778121948, - "logps/rejected": -1.1435911655426025, - "loss": 1.9924, - "nll_loss": 2.0270378589630127, + "grad_norm": 76.20840454101562, + "learning_rate": 4.016096644512495e-06, + "log_odds_chosen": 0.01139686070382595, + "log_odds_ratio": -0.7648938894271851, + "logits/chosen": 377.6351013183594, + "logits/rejected": 283.956298828125, + "logps/chosen": -1.144536018371582, + "logps/rejected": -1.1955327987670898, + "loss": 1.8622, + "nll_loss": 1.9163175821304321, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05794353038072586, - "rewards/margins": -0.0007639724644832313, - "rewards/rejected": -0.05717955902218819, + "rewards/chosen": -0.05722679942846298, + "rewards/margins": 0.0025498413015156984, + "rewards/rejected": -0.05977664515376091, "step": 155 }, { "epoch": 0.12708498808578236, - "grad_norm": 61.099525451660156, - "learning_rate": 4.9670095759909275e-06, - "log_odds_chosen": 0.3500244915485382, - "log_odds_ratio": -0.577224612236023, - "logits/chosen": 328.20562744140625, - "logits/rejected": 250.172607421875, - "logps/chosen": -1.0037747621536255, - "logps/rejected": -1.1577932834625244, - "loss": 1.8237, - "nll_loss": 1.7924268245697021, + "grad_norm": 64.51053619384766, + "learning_rate": 3.952847075210474e-06, + "log_odds_chosen": 0.34641021490097046, + "log_odds_ratio": -0.6011036038398743, + "logits/chosen": 324.39093017578125, + "logits/rejected": 246.60037231445312, + "logps/chosen": -0.9995762705802917, + "logps/rejected": -1.1426242589950562, + "loss": 1.7913, + "nll_loss": 1.83511483669281, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.050188738852739334, - "rewards/margins": 0.007700921501964331, - "rewards/rejected": -0.0578896589577198, + "rewards/chosen": -0.04997881501913071, + "rewards/margins": 0.007152394857257605, + "rewards/rejected": -0.05713121220469475, "step": 160 }, { "epoch": 0.13105639396346305, - "grad_norm": 50.632930755615234, - "learning_rate": 4.961296892525144e-06, - "log_odds_chosen": 0.3148774206638336, - "log_odds_ratio": -0.7541528940200806, - "logits/chosen": 326.50323486328125, - "logits/rejected": 326.8573913574219, - "logps/chosen": -1.0643494129180908, - "logps/rejected": -1.2868483066558838, - "loss": 2.0091, - "nll_loss": 1.6609458923339844, + "grad_norm": 75.09971618652344, + "learning_rate": 3.892494720807615e-06, + "log_odds_chosen": 0.2190161645412445, + "log_odds_ratio": -0.7696462273597717, + "logits/chosen": 316.7698059082031, + "logits/rejected": 318.6285095214844, + "logps/chosen": -1.039623498916626, + "logps/rejected": -1.2176158428192139, + "loss": 1.9486, + "nll_loss": 1.644431710243225, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05321747064590454, - "rewards/margins": 0.011124944314360619, - "rewards/rejected": -0.06434241682291031, + "rewards/chosen": -0.05198118835687637, + "rewards/margins": 0.008899608626961708, + "rewards/rejected": -0.060880791395902634, "step": 165 }, { "epoch": 0.13502779984114377, - "grad_norm": 44.31376266479492, - "learning_rate": 4.95513211350367e-06, - "log_odds_chosen": 0.7684804797172546, - "log_odds_ratio": -0.424949586391449, - "logits/chosen": 315.80316162109375, - "logits/rejected": 317.47760009765625, - "logps/chosen": -1.0722829103469849, - "logps/rejected": -1.50656259059906, - "loss": 1.5922, - "nll_loss": 1.819898009300232, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05361414700746536, - "rewards/margins": 0.0217139795422554, - "rewards/rejected": -0.07532812654972076, + "grad_norm": 42.6450309753418, + "learning_rate": 3.834824944236852e-06, + "log_odds_chosen": 0.8528574705123901, + "log_odds_ratio": -0.38301119208335876, + "logits/chosen": 305.4313049316406, + "logits/rejected": 304.91485595703125, + "logps/chosen": -1.0219231843948364, + "logps/rejected": -1.5175138711929321, + "loss": 1.5835, + "nll_loss": 1.799435019493103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05109615996479988, + "rewards/margins": 0.024779539555311203, + "rewards/rejected": -0.07587569952011108, "step": 170 }, { "epoch": 0.13899920571882446, - "grad_norm": 37.67052459716797, - "learning_rate": 4.948516371284493e-06, - "log_odds_chosen": -0.38165563344955444, - "log_odds_ratio": -0.9263311624526978, - "logits/chosen": 399.5979919433594, - "logits/rejected": 327.8373107910156, - "logps/chosen": -1.0562649965286255, - "logps/rejected": -0.7994272112846375, - "loss": 1.7336, - "nll_loss": 1.5048203468322754, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.052813250571489334, - "rewards/margins": -0.012841887772083282, - "rewards/rejected": -0.03997135907411575, + "grad_norm": 60.28545379638672, + "learning_rate": 3.7796447300922724e-06, + "log_odds_chosen": -0.31903964281082153, + "log_odds_ratio": -0.9109575152397156, + "logits/chosen": 390.44110107421875, + "logits/rejected": 319.32049560546875, + "logps/chosen": -1.0434072017669678, + "logps/rejected": -0.8091991543769836, + "loss": 1.657, + "nll_loss": 1.5240576267242432, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05217035859823227, + "rewards/margins": -0.0117103960365057, + "rewards/rejected": -0.04045996814966202, "step": 175 }, { "epoch": 0.14297061159650518, - "grad_norm": 39.37586212158203, - "learning_rate": 4.941450881059354e-06, - "log_odds_chosen": 0.14676916599273682, - "log_odds_ratio": -0.6898115873336792, - "logits/chosen": 288.27630615234375, - "logits/rejected": 291.10162353515625, - "logps/chosen": -1.1020549535751343, - "logps/rejected": -1.1935259103775024, - "loss": 1.5256, - "nll_loss": 1.1978670358657837, + "grad_norm": 65.73664855957031, + "learning_rate": 3.72677996249965e-06, + "log_odds_chosen": 0.1744231879711151, + "log_odds_ratio": -0.6777268648147583, + "logits/chosen": 293.218505859375, + "logits/rejected": 292.60076904296875, + "logps/chosen": -1.0469298362731934, + "logps/rejected": -1.1555253267288208, + "loss": 1.4845, + "nll_loss": 1.1594089269638062, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05510275438427925, - "rewards/margins": 0.00457354262471199, - "rewards/rejected": -0.05967629700899124, + "rewards/chosen": -0.052346497774124146, + "rewards/margins": 0.0054297661408782005, + "rewards/rejected": -0.05777626112103462, "step": 180 }, { "epoch": 0.14694201747418587, - "grad_norm": 57.68708419799805, - "learning_rate": 4.933936940630537e-06, - "log_odds_chosen": -0.36938825249671936, - "log_odds_ratio": -0.9125706553459167, - "logits/chosen": 301.7081298828125, - "logits/rejected": 338.01690673828125, - "logps/chosen": -1.5262658596038818, - "logps/rejected": -1.2626394033432007, - "loss": 1.7968, - "nll_loss": 1.8017867803573608, + "grad_norm": 50.266204833984375, + "learning_rate": 3.6760731104690393e-06, + "log_odds_chosen": -0.4040209650993347, + "log_odds_ratio": -0.9406334161758423, + "logits/chosen": 303.67889404296875, + "logits/rejected": 342.15863037109375, + "logps/chosen": -1.4420201778411865, + "logps/rejected": -1.1751186847686768, + "loss": 1.7518, + "nll_loss": 1.7355467081069946, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07631329447031021, - "rewards/margins": -0.013181325979530811, - "rewards/rejected": -0.06313197314739227, + "rewards/chosen": -0.07210101187229156, + "rewards/margins": -0.013345075771212578, + "rewards/rejected": -0.05875593423843384, "step": 185 }, { "epoch": 0.15091342335186655, - "grad_norm": 61.880126953125, - "learning_rate": 4.925975930172489e-06, - "log_odds_chosen": 0.2720580995082855, - "log_odds_ratio": -0.5754180550575256, - "logits/chosen": 240.9505157470703, - "logits/rejected": 291.4943542480469, - "logps/chosen": -1.128051519393921, - "logps/rejected": -1.3102426528930664, - "loss": 1.6575, - "nll_loss": 1.4304395914077759, + "grad_norm": 45.1833381652832, + "learning_rate": 3.6273812505500587e-06, + "log_odds_chosen": 0.25176072120666504, + "log_odds_ratio": -0.5851801037788391, + "logits/chosen": 251.08883666992188, + "logits/rejected": 298.2301330566406, + "logps/chosen": -1.0510302782058716, + "logps/rejected": -1.2064368724822998, + "loss": 1.6223, + "nll_loss": 1.3497669696807861, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.056402575224637985, - "rewards/margins": 0.009109559468925, - "rewards/rejected": -0.06551213562488556, + "rewards/chosen": -0.0525515154004097, + "rewards/margins": 0.007770332042127848, + "rewards/rejected": -0.06032184511423111, "step": 190 }, { "epoch": 0.15488482922954727, - "grad_norm": 128.1157684326172, - "learning_rate": 4.917569311978301e-06, - "log_odds_chosen": -0.16586491465568542, - "log_odds_ratio": -0.8022063374519348, - "logits/chosen": 339.9231872558594, - "logits/rejected": 310.51458740234375, - "logps/chosen": -1.4521989822387695, - "logps/rejected": -1.3116356134414673, - "loss": 1.8756, - "nll_loss": 1.6476871967315674, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07260995358228683, - "rewards/margins": -0.007028169929981232, - "rewards/rejected": -0.0655817836523056, + "grad_norm": 133.46376037597656, + "learning_rate": 3.5805743701971648e-06, + "log_odds_chosen": -0.13634520769119263, + "log_odds_ratio": -0.7839670181274414, + "logits/chosen": 341.7732849121094, + "logits/rejected": 311.25469970703125, + "logps/chosen": -1.3579070568084717, + "logps/rejected": -1.2352676391601562, + "loss": 1.7763, + "nll_loss": 1.5284233093261719, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06789536029100418, + "rewards/margins": -0.0061319745145738125, + "rewards/rejected": -0.06176338344812393, "step": 195 }, { "epoch": 0.15885623510722796, - "grad_norm": 360.7728271484375, - "learning_rate": 4.9087186301911196e-06, - "log_odds_chosen": -0.15912006795406342, - "log_odds_ratio": -0.8308361172676086, - "logits/chosen": 348.33721923828125, - "logits/rejected": 337.98748779296875, - "logps/chosen": -1.4099957942962646, - "logps/rejected": -1.3773982524871826, - "loss": 1.7693, - "nll_loss": 2.075568914413452, + "grad_norm": 212.89317321777344, + "learning_rate": 3.5355339059327378e-06, + "log_odds_chosen": -0.16400772333145142, + "log_odds_ratio": -0.8339014053344727, + "logits/chosen": 351.3518981933594, + "logits/rejected": 341.30303955078125, + "logps/chosen": -1.33816659450531, + "logps/rejected": -1.29983651638031, + "loss": 1.6261, + "nll_loss": 1.852270483970642, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07049979269504547, - "rewards/margins": -0.001629872596822679, - "rewards/rejected": -0.06886991858482361, + "rewards/chosen": -0.0669083371758461, + "rewards/margins": -0.0019165098201483488, + "rewards/rejected": -0.06499183177947998, "step": 200 }, { "epoch": 0.16282764098490865, - "grad_norm": 55.327293395996094, - "learning_rate": 4.89942551052051e-06, - "log_odds_chosen": 0.21036644279956818, - "log_odds_ratio": -0.6468175053596497, - "logits/chosen": 343.9696960449219, - "logits/rejected": 264.9131164550781, - "logps/chosen": -1.5017131567001343, - "logps/rejected": -1.672141432762146, - "loss": 1.8711, - "nll_loss": 1.952646255493164, + "grad_norm": 70.86261749267578, + "learning_rate": 3.4921514788478916e-06, + "log_odds_chosen": 0.25013333559036255, + "log_odds_ratio": -0.636869490146637, + "logits/chosen": 347.8689270019531, + "logits/rejected": 272.3687438964844, + "logps/chosen": -1.3360661268234253, + "logps/rejected": -1.536778450012207, + "loss": 1.7544, + "nll_loss": 1.8276907205581665, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07508565485477448, - "rewards/margins": 0.008521410636603832, - "rewards/rejected": -0.08360707014799118, + "rewards/chosen": -0.06680331379175186, + "rewards/margins": 0.010035613551735878, + "rewards/rejected": -0.07683892548084259, "step": 205 }, { "epoch": 0.16679904686258937, - "grad_norm": 44.57875442504883, - "learning_rate": 4.889691659943842e-06, - "log_odds_chosen": 0.6125321388244629, - "log_odds_ratio": -0.5584419965744019, - "logits/chosen": 301.97369384765625, - "logits/rejected": 392.7519836425781, - "logps/chosen": -1.1298048496246338, - "logps/rejected": -1.5633180141448975, - "loss": 1.9275, - "nll_loss": 1.591841220855713, + "grad_norm": 34.17070770263672, + "learning_rate": 3.450327796711771e-06, + "log_odds_chosen": 0.3506450951099396, + "log_odds_ratio": -0.6737692952156067, + "logits/chosen": 300.19873046875, + "logits/rejected": 388.3397521972656, + "logps/chosen": -1.0396573543548584, + "logps/rejected": -1.299134612083435, + "loss": 1.7484, + "nll_loss": 1.4944199323654175, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.056490249931812286, - "rewards/margins": 0.021675655618309975, - "rewards/rejected": -0.07816590368747711, + "rewards/chosen": -0.05198286846280098, + "rewards/margins": 0.012973867356777191, + "rewards/rejected": -0.06495673954486847, "step": 210 }, { "epoch": 0.17077045274027006, - "grad_norm": 40.897430419921875, - "learning_rate": 4.879518866392757e-06, - "log_odds_chosen": 0.31865325570106506, - "log_odds_ratio": -0.5695816278457642, - "logits/chosen": 289.89654541015625, - "logits/rejected": 337.4901428222656, - "logps/chosen": -1.0857855081558228, - "logps/rejected": -1.3082635402679443, - "loss": 1.8816, - "nll_loss": 1.8894774913787842, + "grad_norm": 39.18354797363281, + "learning_rate": 3.409971697352368e-06, + "log_odds_chosen": 0.39639392495155334, + "log_odds_ratio": -0.5673670768737793, + "logits/chosen": 289.6415710449219, + "logits/rejected": 342.2761535644531, + "logps/chosen": -0.9784858822822571, + "logps/rejected": -1.2464014291763306, + "loss": 1.8046, + "nll_loss": 1.8137214183807373, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05428927391767502, - "rewards/margins": 0.011123904958367348, - "rewards/rejected": -0.06541318446397781, + "rewards/chosen": -0.048924293369054794, + "rewards/margins": 0.013395780697464943, + "rewards/rejected": -0.06232007220387459, "step": 215 }, { "epoch": 0.17474185861795075, - "grad_norm": 57.284912109375, - "learning_rate": 4.868908998424749e-06, - "log_odds_chosen": 1.3685444593429565, - "log_odds_ratio": -0.32077115774154663, - "logits/chosen": 461.255859375, - "logits/rejected": 236.81936645507812, - "logps/chosen": -1.125225305557251, - "logps/rejected": -2.0914998054504395, - "loss": 1.8968, - "nll_loss": 2.5089685916900635, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05626126378774643, - "rewards/margins": 0.04831372946500778, - "rewards/rejected": -0.10457499325275421, + "grad_norm": 66.22753143310547, + "learning_rate": 3.3709993123162106e-06, + "log_odds_chosen": 0.9709181785583496, + "log_odds_ratio": -0.44296368956565857, + "logits/chosen": 459.4443359375, + "logits/rejected": 243.2751007080078, + "logps/chosen": -1.1406949758529663, + "logps/rejected": -1.8125925064086914, + "loss": 1.8881, + "nll_loss": 2.4477667808532715, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.057034749537706375, + "rewards/margins": 0.03359488397836685, + "rewards/rejected": -0.09062962979078293, "step": 220 }, { "epoch": 0.17871326449563146, - "grad_norm": 48.706504821777344, - "learning_rate": 4.8578640048799435e-06, - "log_odds_chosen": 0.48141545057296753, - "log_odds_ratio": -0.5349053740501404, - "logits/chosen": 331.5611877441406, - "logits/rejected": 310.23876953125, - "logps/chosen": -1.1479623317718506, - "logps/rejected": -1.398716688156128, - "loss": 1.6831, - "nll_loss": 2.0783326625823975, + "grad_norm": 40.38410949707031, + "learning_rate": 3.3333333333333333e-06, + "log_odds_chosen": 0.5472304821014404, + "log_odds_ratio": -0.5069239735603333, + "logits/chosen": 337.7701721191406, + "logits/rejected": 314.196044921875, + "logps/chosen": -1.0212123394012451, + "logps/rejected": -1.2714849710464478, + "loss": 1.6271, + "nll_loss": 1.9993699789047241, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05739812180399895, - "rewards/margins": 0.012537715956568718, - "rewards/rejected": -0.06993584334850311, + "rewards/chosen": -0.05106062442064285, + "rewards/margins": 0.012513632886111736, + "rewards/rejected": -0.06357425451278687, "step": 225 }, { "epoch": 0.18268467037331215, - "grad_norm": 40.74349594116211, - "learning_rate": 4.846385914523143e-06, - "log_odds_chosen": -0.004401213023811579, - "log_odds_ratio": -0.7065997123718262, - "logits/chosen": 321.564453125, - "logits/rejected": 321.5721130371094, - "logps/chosen": -1.444544792175293, - "logps/rejected": -1.465693473815918, - "loss": 1.6282, - "nll_loss": 1.7196967601776123, + "grad_norm": 36.757057189941406, + "learning_rate": 3.296902366978936e-06, + "log_odds_chosen": 0.027332711964845657, + "log_odds_ratio": -0.6871450543403625, + "logits/chosen": 314.82354736328125, + "logits/rejected": 310.2427062988281, + "logps/chosen": -1.3505454063415527, + "logps/rejected": -1.3915187120437622, + "loss": 1.565, + "nll_loss": 1.6233704090118408, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07222724705934525, - "rewards/margins": 0.0010574304033070803, - "rewards/rejected": -0.07328467816114426, + "rewards/chosen": -0.06752727180719376, + "rewards/margins": 0.0020486623980104923, + "rewards/rejected": -0.06957593560218811, "step": 230 }, { "epoch": 0.18665607625099284, - "grad_norm": 124.04603576660156, - "learning_rate": 4.834476835671166e-06, - "log_odds_chosen": -0.40913257002830505, - "log_odds_ratio": -1.0309641361236572, - "logits/chosen": 303.4057312011719, - "logits/rejected": 345.175048828125, - "logps/chosen": -1.1636359691619873, - "logps/rejected": -0.9079948663711548, - "loss": 1.8382, - "nll_loss": 1.716036081314087, + "grad_norm": 61.87923049926758, + "learning_rate": 3.2616403652672114e-06, + "log_odds_chosen": -0.4674092233181, + "log_odds_ratio": -1.0502631664276123, + "logits/chosen": 289.9769592285156, + "logits/rejected": 327.74395751953125, + "logps/chosen": -1.076569676399231, + "logps/rejected": -0.7748829126358032, + "loss": 1.7465, + "nll_loss": 1.6123956441879272, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.058181799948215485, - "rewards/margins": -0.012782062403857708, - "rewards/rejected": -0.0453997440636158, + "rewards/chosen": -0.05382848531007767, + "rewards/margins": -0.015084335580468178, + "rewards/rejected": -0.03874415159225464, "step": 235 }, { "epoch": 0.19062748212867356, - "grad_norm": 354.8912048339844, - "learning_rate": 4.822138955805595e-06, - "log_odds_chosen": -0.03981683775782585, - "log_odds_ratio": -0.7560557126998901, - "logits/chosen": 365.21771240234375, - "logits/rejected": 281.8612060546875, - "logps/chosen": -1.5758720636367798, - "logps/rejected": -1.6159578561782837, - "loss": 1.7801, - "nll_loss": 1.9230833053588867, + "grad_norm": 73.88203430175781, + "learning_rate": 3.2274861218395142e-06, + "log_odds_chosen": 0.07915325462818146, + "log_odds_ratio": -0.7744172215461731, + "logits/chosen": 361.3428039550781, + "logits/rejected": 279.46966552734375, + "logps/chosen": -1.4194855690002441, + "logps/rejected": -1.5974981784820557, + "loss": 1.6476, + "nll_loss": 1.8600105047225952, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07879360020160675, - "rewards/margins": 0.0020043007098138332, - "rewards/rejected": -0.08079790323972702, + "rewards/chosen": -0.07097427546977997, + "rewards/margins": 0.008900630287826061, + "rewards/rejected": -0.0798749104142189, "step": 240 }, { "epoch": 0.19459888800635425, - "grad_norm": 112.47100067138672, - "learning_rate": 4.809374541170974e-06, - "log_odds_chosen": 0.4120238423347473, - "log_odds_ratio": -0.5282896161079407, - "logits/chosen": 280.23223876953125, - "logits/rejected": 381.760986328125, - "logps/chosen": -1.43479323387146, - "logps/rejected": -1.7411903142929077, - "loss": 1.705, - "nll_loss": 1.610185980796814, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07173965871334076, - "rewards/margins": 0.015319856815040112, - "rewards/rejected": -0.08705951273441315, + "grad_norm": 97.79542541503906, + "learning_rate": 3.1943828249997e-06, + "log_odds_chosen": 0.41313672065734863, + "log_odds_ratio": -0.5386548638343811, + "logits/chosen": 274.5640563964844, + "logits/rejected": 378.27227783203125, + "logps/chosen": -1.3742090463638306, + "logps/rejected": -1.6753637790679932, + "loss": 1.6432, + "nll_loss": 1.562549114227295, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06871045380830765, + "rewards/margins": 0.01505773700773716, + "rewards/rejected": -0.08376819640398026, "step": 245 }, { "epoch": 0.19857029388403494, - "grad_norm": 89.64398193359375, - "learning_rate": 4.796185936358543e-06, - "log_odds_chosen": -0.09865443408489227, - "log_odds_ratio": -0.8241540789604187, - "logits/chosen": 375.30975341796875, - "logits/rejected": 277.07080078125, - "logps/chosen": -1.1246061325073242, - "logps/rejected": -1.0761168003082275, - "loss": 1.5139, - "nll_loss": 1.5120210647583008, + "grad_norm": 52.48481369018555, + "learning_rate": 3.1622776601683796e-06, + "log_odds_chosen": -0.16644199192523956, + "log_odds_ratio": -0.8439895510673523, + "logits/chosen": 360.90277099609375, + "logits/rejected": 261.315185546875, + "logps/chosen": -1.0284861326217651, + "logps/rejected": -0.9436852335929871, + "loss": 1.3865, + "nll_loss": 1.3669432401657104, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05623030662536621, - "rewards/margins": -0.002424471778795123, - "rewards/rejected": -0.05380583554506302, + "rewards/chosen": -0.051424313336610794, + "rewards/margins": -0.004240049980580807, + "rewards/rejected": -0.04718426242470741, "step": 250 }, { "epoch": 0.20254169976171565, - "grad_norm": 71.554443359375, - "learning_rate": 4.78257556387557e-06, - "log_odds_chosen": 0.17121019959449768, - "log_odds_ratio": -0.8539883494377136, - "logits/chosen": 271.27740478515625, - "logits/rejected": 308.79962158203125, - "logps/chosen": -1.4543489217758179, - "logps/rejected": -1.6729555130004883, - "loss": 1.7579, - "nll_loss": 1.7364709377288818, + "grad_norm": 51.58882522583008, + "learning_rate": 3.131121455425748e-06, + "log_odds_chosen": 0.5241218209266663, + "log_odds_ratio": -0.8181684613227844, + "logits/chosen": 271.86822509765625, + "logits/rejected": 310.2635803222656, + "logps/chosen": -1.439292550086975, + "logps/rejected": -2.013617515563965, + "loss": 1.673, + "nll_loss": 1.6913448572158813, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07271744310855865, - "rewards/margins": 0.010930335149168968, - "rewards/rejected": -0.08364777266979218, + "rewards/chosen": -0.07196462154388428, + "rewards/margins": 0.028716260567307472, + "rewards/rejected": -0.100680872797966, "step": 255 }, { "epoch": 0.20651310563939634, - "grad_norm": 38.314876556396484, - "learning_rate": 4.7685459237003954e-06, - "log_odds_chosen": 0.2899346947669983, - "log_odds_ratio": -0.614177942276001, - "logits/chosen": 298.9414367675781, - "logits/rejected": 384.4459228515625, - "logps/chosen": -1.6951465606689453, - "logps/rejected": -1.8937695026397705, - "loss": 1.8883, - "nll_loss": 1.8541686534881592, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.08475733548402786, - "rewards/margins": 0.009931142441928387, - "rewards/rejected": -0.09468847513198853, + "grad_norm": 35.36348342895508, + "learning_rate": 3.1008683647302113e-06, + "log_odds_chosen": 0.6499348878860474, + "log_odds_ratio": -0.49271130561828613, + "logits/chosen": 296.9714050292969, + "logits/rejected": 382.0646057128906, + "logps/chosen": -1.4200375080108643, + "logps/rejected": -1.984840750694275, + "loss": 1.9446, + "nll_loss": 2.310304880142212, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07100187987089157, + "rewards/margins": 0.02824016846716404, + "rewards/rejected": -0.09924204647541046, "step": 260 }, { "epoch": 0.21048451151707703, - "grad_norm": 72.83179473876953, - "learning_rate": 4.754099592823216e-06, - "log_odds_chosen": 0.8651946783065796, - "log_odds_ratio": -0.5348523855209351, - "logits/chosen": 358.8797912597656, - "logits/rejected": 280.5845031738281, - "logps/chosen": -1.027199387550354, - "logps/rejected": -1.6986967325210571, - "loss": 1.512, - "nll_loss": 1.2993109226226807, + "grad_norm": 60.83711242675781, + "learning_rate": 3.0714755841697565e-06, + "log_odds_chosen": 0.645357608795166, + "log_odds_ratio": -0.5926202535629272, + "logits/chosen": 350.14630126953125, + "logits/rejected": 271.0173034667969, + "logps/chosen": -1.0696051120758057, + "logps/rejected": -1.5853424072265625, + "loss": 1.4227, + "nll_loss": 1.2928531169891357, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05135997012257576, - "rewards/margins": 0.033574867993593216, - "rewards/rejected": -0.08493484556674957, + "rewards/chosen": -0.05348025634884834, + "rewards/margins": 0.025786861777305603, + "rewards/rejected": -0.07926712185144424, "step": 265 }, { "epoch": 0.21445591739475775, - "grad_norm": 44.400272369384766, - "learning_rate": 4.739239224772751e-06, - "log_odds_chosen": -0.05065007135272026, - "log_odds_ratio": -0.8082035183906555, - "logits/chosen": 298.37335205078125, - "logits/rejected": 344.8133239746094, - "logps/chosen": -1.5913331508636475, - "logps/rejected": -1.4946712255477905, - "loss": 1.6712, - "nll_loss": 1.572104811668396, + "grad_norm": 36.92619323730469, + "learning_rate": 3.0429030972509227e-06, + "log_odds_chosen": -0.053741950541734695, + "log_odds_ratio": -0.8099346160888672, + "logits/chosen": 294.5556945800781, + "logits/rejected": 340.336669921875, + "logps/chosen": -1.5328369140625, + "logps/rejected": -1.4363396167755127, + "loss": 1.585, + "nll_loss": 1.5067962408065796, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07956665754318237, - "rewards/margins": -0.004833097103983164, - "rewards/rejected": -0.07473356276750565, + "rewards/chosen": -0.07664184272289276, + "rewards/margins": -0.0048248679377138615, + "rewards/rejected": -0.07181697338819504, "step": 270 }, { "epoch": 0.21842732327243844, - "grad_norm": 59.13341522216797, - "learning_rate": 4.7239675491288285e-06, - "log_odds_chosen": 0.4572678506374359, - "log_odds_ratio": -0.5575239062309265, - "logits/chosen": 366.19207763671875, - "logits/rejected": 318.4181213378906, - "logps/chosen": -1.4838532209396362, - "logps/rejected": -1.8985198736190796, - "loss": 1.6193, - "nll_loss": 1.7495654821395874, + "grad_norm": 59.67924499511719, + "learning_rate": 3.0151134457776365e-06, + "log_odds_chosen": 0.39039263129234314, + "log_odds_ratio": -0.5856078863143921, + "logits/chosen": 359.80999755859375, + "logits/rejected": 312.9378967285156, + "logps/chosen": -1.3910114765167236, + "logps/rejected": -1.746132254600525, + "loss": 1.5416, + "nll_loss": 1.6771767139434814, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07419265806674957, - "rewards/margins": 0.020733339712023735, - "rewards/rejected": -0.09492600709199905, + "rewards/chosen": -0.06955057382583618, + "rewards/margins": 0.01775604858994484, + "rewards/rejected": -0.08730661869049072, "step": 275 }, { "epoch": 0.22239872915011913, - "grad_norm": 153.78656005859375, - "learning_rate": 4.708287371021016e-06, - "log_odds_chosen": 0.4286623001098633, - "log_odds_ratio": -0.6032269597053528, - "logits/chosen": 264.1981506347656, - "logits/rejected": 437.3480529785156, - "logps/chosen": -1.4498531818389893, - "logps/rejected": -1.8169384002685547, - "loss": 1.6785, - "nll_loss": 1.932142972946167, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07249265164136887, - "rewards/margins": 0.018354270607233047, - "rewards/rejected": -0.09084691852331161, + "grad_norm": 89.38143920898438, + "learning_rate": 2.988071523335984e-06, + "log_odds_chosen": 0.49174147844314575, + "log_odds_ratio": -0.6083627939224243, + "logits/chosen": 258.7726135253906, + "logits/rejected": 430.52197265625, + "logps/chosen": -1.3561815023422241, + "logps/rejected": -1.7862621545791626, + "loss": 1.6064, + "nll_loss": 1.7953475713729858, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0678090825676918, + "rewards/margins": 0.021504027768969536, + "rewards/rejected": -0.08931310474872589, "step": 280 }, { "epoch": 0.22637013502779985, - "grad_norm": 55.48463821411133, - "learning_rate": 4.692201570613367e-06, - "log_odds_chosen": -0.38779932260513306, - "log_odds_ratio": -0.9519031643867493, - "logits/chosen": 315.96405029296875, - "logits/rejected": 441.1253967285156, - "logps/chosen": -1.785449743270874, - "logps/rejected": -1.507177472114563, - "loss": 1.9603, - "nll_loss": 1.9527504444122314, + "grad_norm": 43.1147575378418, + "learning_rate": 2.961744388795462e-06, + "log_odds_chosen": -0.42655545473098755, + "log_odds_ratio": -0.9692665934562683, + "logits/chosen": 313.4588317871094, + "logits/rejected": 437.51580810546875, + "logps/chosen": -1.7302316427230835, + "logps/rejected": -1.4267915487289429, + "loss": 1.8931, + "nll_loss": 1.8868739604949951, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08927249163389206, - "rewards/margins": -0.013913619332015514, - "rewards/rejected": -0.07535887509584427, + "rewards/chosen": -0.08651158958673477, + "rewards/margins": -0.015172007493674755, + "rewards/rejected": -0.07133957743644714, "step": 285 }, { "epoch": 0.23034154090548054, - "grad_norm": 196.46524047851562, - "learning_rate": 4.675713102575389e-06, - "log_odds_chosen": 0.13416634500026703, - "log_odds_ratio": -0.6926567554473877, - "logits/chosen": 325.7991943359375, - "logits/rejected": 359.30426025390625, - "logps/chosen": -1.287474274635315, - "logps/rejected": -1.4152565002441406, - "loss": 1.8866, - "nll_loss": 1.7071374654769897, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06437370926141739, - "rewards/margins": 0.006389112211763859, - "rewards/rejected": -0.07076282799243927, + "grad_norm": 111.950439453125, + "learning_rate": 2.9361010975735177e-06, + "log_odds_chosen": 0.18716028332710266, + "log_odds_ratio": -0.6489423513412476, + "logits/chosen": 320.8119201660156, + "logits/rejected": 353.88568115234375, + "logps/chosen": -1.203453779220581, + "logps/rejected": -1.3500152826309204, + "loss": 1.6784, + "nll_loss": 1.6349010467529297, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06017268821597099, + "rewards/margins": 0.007328073028475046, + "rewards/rejected": -0.0675007551908493, "step": 290 }, { "epoch": 0.23431294678316125, - "grad_norm": 40.833988189697266, - "learning_rate": 4.658824995539318e-06, - "log_odds_chosen": -0.3848091959953308, - "log_odds_ratio": -0.9408342242240906, - "logits/chosen": 466.2362365722656, - "logits/rejected": 305.64935302734375, - "logps/chosen": -1.5434811115264893, - "logps/rejected": -1.2438277006149292, - "loss": 1.7838, - "nll_loss": 1.8352413177490234, + "grad_norm": 45.6556282043457, + "learning_rate": 2.9111125486979104e-06, + "log_odds_chosen": -0.518650233745575, + "log_odds_ratio": -1.0133109092712402, + "logits/chosen": 456.8958435058594, + "logits/rejected": 289.4540100097656, + "logps/chosen": -1.5072423219680786, + "logps/rejected": -1.13739013671875, + "loss": 1.7026, + "nll_loss": 1.7881231307983398, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07717405259609222, - "rewards/margins": -0.01498266588896513, - "rewards/rejected": -0.06219138950109482, + "rewards/chosen": -0.07536212354898453, + "rewards/margins": -0.01849261112511158, + "rewards/rejected": -0.0568695068359375, "step": 295 }, { "epoch": 0.23828435266084194, - "grad_norm": 85.50627899169922, - "learning_rate": 4.641540351543826e-06, - "log_odds_chosen": -0.06552493572235107, - "log_odds_ratio": -0.8229795694351196, - "logits/chosen": 306.17236328125, - "logits/rejected": 333.0214538574219, - "logps/chosen": -1.408279538154602, - "logps/rejected": -1.4727718830108643, - "loss": 1.7687, - "nll_loss": 1.7104421854019165, + "grad_norm": 56.93437576293945, + "learning_rate": 2.8867513459481293e-06, + "log_odds_chosen": -0.09250687062740326, + "log_odds_ratio": -0.8143345713615417, + "logits/chosen": 297.9905700683594, + "logits/rejected": 320.3016052246094, + "logps/chosen": -1.3163940906524658, + "logps/rejected": -1.3317310810089111, + "loss": 1.7171, + "nll_loss": 1.6320091485977173, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0704139694571495, - "rewards/margins": 0.0032246210612356663, - "rewards/rejected": -0.07363860309123993, + "rewards/chosen": -0.06581971049308777, + "rewards/margins": 0.0007668494945392013, + "rewards/rejected": -0.06658656150102615, "step": 300 }, { "epoch": 0.24225575853852263, - "grad_norm": 52.15938186645508, - "learning_rate": 4.6238623454642215e-06, - "log_odds_chosen": -0.7005519270896912, - "log_odds_ratio": -1.180597186088562, - "logits/chosen": 393.58575439453125, - "logits/rejected": 291.7729187011719, - "logps/chosen": -1.5035789012908936, - "logps/rejected": -1.0074363946914673, - "loss": 1.9541, - "nll_loss": 1.8733670711517334, + "grad_norm": 53.10919952392578, + "learning_rate": 2.862991671569341e-06, + "log_odds_chosen": -0.7954866886138916, + "log_odds_ratio": -1.2641322612762451, + "logits/chosen": 384.24554443359375, + "logits/rejected": 274.28692626953125, + "logps/chosen": -1.4349799156188965, + "logps/rejected": -0.8974045515060425, + "loss": 1.8459, + "nll_loss": 1.7445728778839111, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07517894357442856, - "rewards/margins": -0.02480713650584221, - "rewards/rejected": -0.050371818244457245, + "rewards/chosen": -0.0717489942908287, + "rewards/margins": -0.02687876857817173, + "rewards/rejected": -0.04487023502588272, "step": 305 }, { "epoch": 0.24622716441620335, - "grad_norm": 71.7739028930664, - "learning_rate": 4.605794224429286e-06, - "log_odds_chosen": 0.5956661105155945, - "log_odds_ratio": -0.5032454133033752, - "logits/chosen": 287.00799560546875, - "logits/rejected": 278.5837707519531, - "logps/chosen": -1.3277604579925537, - "logps/rejected": -1.8034179210662842, - "loss": 2.0805, - "nll_loss": 2.333066463470459, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06638802587985992, - "rewards/margins": 0.023782875388860703, - "rewards/rejected": -0.09017090499401093, + "grad_norm": 31.78973388671875, + "learning_rate": 2.839809171235324e-06, + "log_odds_chosen": 0.8655799031257629, + "log_odds_ratio": -0.42556723952293396, + "logits/chosen": 275.6189880371094, + "logits/rejected": 267.2253723144531, + "logps/chosen": -1.1174626350402832, + "logps/rejected": -1.6986801624298096, + "loss": 1.8585, + "nll_loss": 2.0873680114746094, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05587313324213028, + "rewards/margins": 0.02906087599694729, + "rewards/rejected": -0.08493401110172272, "step": 310 }, { "epoch": 0.25019857029388404, - "grad_norm": 95.7601089477539, - "learning_rate": 4.587339307224837e-06, - "log_odds_chosen": 0.4633815884590149, - "log_odds_ratio": -0.5474061965942383, - "logits/chosen": 285.03057861328125, - "logits/rejected": 313.64971923828125, - "logps/chosen": -1.064753532409668, - "logps/rejected": -1.4205200672149658, - "loss": 1.674, - "nll_loss": 1.6759742498397827, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0532376766204834, - "rewards/margins": 0.01778833009302616, - "rewards/rejected": -0.07102601230144501, + "grad_norm": 54.311798095703125, + "learning_rate": 2.817180849095055e-06, + "log_odds_chosen": 0.48402324318885803, + "log_odds_ratio": -0.5392564535140991, + "logits/chosen": 273.8492431640625, + "logits/rejected": 303.4148864746094, + "logps/chosen": -0.9568207859992981, + "logps/rejected": -1.3038668632507324, + "loss": 1.5511, + "nll_loss": 1.534110188484192, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04784103482961655, + "rewards/margins": 0.017352305352687836, + "rewards/rejected": -0.06519334018230438, "step": 315 }, { "epoch": 0.2541699761715647, - "grad_norm": 35.280120849609375, - "learning_rate": 4.568500983684127e-06, - "log_odds_chosen": 1.0717216730117798, - "log_odds_ratio": -0.3502858281135559, - "logits/chosen": 316.9742431640625, - "logits/rejected": 361.8272705078125, - "logps/chosen": -1.0445092916488647, - "logps/rejected": -1.8099143505096436, - "loss": 1.5821, - "nll_loss": 1.5527000427246094, + "grad_norm": 46.84675979614258, + "learning_rate": 2.7950849718747376e-06, + "log_odds_chosen": 1.0449055433273315, + "log_odds_ratio": -0.3449096083641052, + "logits/chosen": 313.7346496582031, + "logits/rejected": 355.859130859375, + "logps/chosen": -0.9354375004768372, + "logps/rejected": -1.6399080753326416, + "loss": 1.445, + "nll_loss": 1.3939152956008911, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05222546309232712, - "rewards/margins": 0.0382702499628067, - "rewards/rejected": -0.09049571305513382, + "rewards/chosen": -0.04677187651395798, + "rewards/margins": 0.03522353246808052, + "rewards/rejected": -0.0819954052567482, "step": 320 }, { "epoch": 0.2581413820492454, - "grad_norm": 56.177520751953125, - "learning_rate": 4.54928271406519e-06, - "log_odds_chosen": 0.08797760307788849, - "log_odds_ratio": -0.6692509055137634, - "logits/chosen": 311.7930603027344, - "logits/rejected": 325.4112854003906, - "logps/chosen": -0.8228706121444702, - "logps/rejected": -0.8611005544662476, - "loss": 1.6019, - "nll_loss": 1.4109102487564087, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.04114353656768799, - "rewards/margins": 0.0019115001196041703, - "rewards/rejected": -0.043055035173892975, + "grad_norm": 57.15336990356445, + "learning_rate": 2.773500981126146e-06, + "log_odds_chosen": 0.07060068845748901, + "log_odds_ratio": -0.7055736184120178, + "logits/chosen": 301.77703857421875, + "logits/rejected": 314.42828369140625, + "logps/chosen": -0.818830132484436, + "logps/rejected": -0.8071014285087585, + "loss": 1.5234, + "nll_loss": 1.3474265336990356, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0409415028989315, + "rewards/margins": -0.0005864340928383172, + "rewards/rejected": -0.04035507142543793, "step": 325 }, { "epoch": 0.2621127879269261, - "grad_norm": 49.081207275390625, - "learning_rate": 4.529688028415259e-06, - "log_odds_chosen": 0.5036702156066895, - "log_odds_ratio": -0.5119932293891907, - "logits/chosen": 349.2168273925781, - "logits/rejected": 417.1556091308594, - "logps/chosen": -1.1619529724121094, - "logps/rejected": -1.4640512466430664, - "loss": 1.7083, - "nll_loss": 1.5884101390838623, + "grad_norm": 42.167640686035156, + "learning_rate": 2.752409412815902e-06, + "log_odds_chosen": 0.6327840685844421, + "log_odds_ratio": -0.47118449211120605, + "logits/chosen": 338.5755615234375, + "logits/rejected": 409.26776123046875, + "logps/chosen": -1.0573819875717163, + "logps/rejected": -1.443060278892517, + "loss": 1.647, + "nll_loss": 1.4856857061386108, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05809764936566353, - "rewards/margins": 0.015104919672012329, - "rewards/rejected": -0.07320256531238556, + "rewards/chosen": -0.052869103848934174, + "rewards/margins": 0.01928391121327877, + "rewards/rejected": -0.0721530169248581, "step": 330 }, { "epoch": 0.26608419380460685, - "grad_norm": 59.9859619140625, - "learning_rate": 4.509720525922356e-06, - "log_odds_chosen": -0.2589249014854431, - "log_odds_ratio": -0.8613080978393555, - "logits/chosen": 319.08282470703125, - "logits/rejected": 374.44512939453125, - "logps/chosen": -1.1872944831848145, - "logps/rejected": -1.0131120681762695, - "loss": 1.6996, - "nll_loss": 1.4569952487945557, + "grad_norm": 40.28363800048828, + "learning_rate": 2.7317918235407652e-06, + "log_odds_chosen": -0.2637309432029724, + "log_odds_ratio": -0.864203929901123, + "logits/chosen": 308.519287109375, + "logits/rejected": 366.68194580078125, + "logps/chosen": -1.1223952770233154, + "logps/rejected": -0.9605833888053894, + "loss": 1.5782, + "nll_loss": 1.3887097835540771, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05936472490429878, - "rewards/margins": -0.008709125220775604, - "rewards/rejected": -0.05065560340881348, + "rewards/chosen": -0.05611976981163025, + "rewards/margins": -0.008090597577393055, + "rewards/rejected": -0.04802917316555977, "step": 335 }, { "epoch": 0.27005559968228754, - "grad_norm": 34.96027755737305, - "learning_rate": 4.489383874254193e-06, - "log_odds_chosen": 0.19578817486763, - "log_odds_ratio": -0.6897228360176086, - "logits/chosen": 308.80645751953125, - "logits/rejected": 363.35272216796875, - "logps/chosen": -1.0954322814941406, - "logps/rejected": -1.26204514503479, - "loss": 1.8048, - "nll_loss": 1.4526374340057373, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05477161332964897, - "rewards/margins": 0.008330638520419598, - "rewards/rejected": -0.06310225278139114, + "grad_norm": 34.92796325683594, + "learning_rate": 2.711630722733202e-06, + "log_odds_chosen": 0.3361690640449524, + "log_odds_ratio": -0.6272503137588501, + "logits/chosen": 303.1048583984375, + "logits/rejected": 358.2105712890625, + "logps/chosen": -0.9646172523498535, + "logps/rejected": -1.2090504169464111, + "loss": 1.6717, + "nll_loss": 1.3398942947387695, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0482308603823185, + "rewards/margins": 0.012221668846905231, + "rewards/rejected": -0.060452528297901154, "step": 340 }, { "epoch": 0.27402700555996823, - "grad_norm": 88.31672668457031, - "learning_rate": 4.46868180888448e-06, - "log_odds_chosen": 0.5558279752731323, - "log_odds_ratio": -0.6389625072479248, - "logits/chosen": 343.12237548828125, - "logits/rejected": 301.82843017578125, - "logps/chosen": -2.0307748317718506, - "logps/rejected": -2.624824047088623, - "loss": 1.8654, - "nll_loss": 2.4084858894348145, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.1015387549996376, - "rewards/margins": 0.029702449217438698, - "rewards/rejected": -0.13124120235443115, + "grad_norm": 45.980873107910156, + "learning_rate": 2.691909510290828e-06, + "log_odds_chosen": 0.7104015350341797, + "log_odds_ratio": -0.49001604318618774, + "logits/chosen": 339.669677734375, + "logits/rejected": 297.57012939453125, + "logps/chosen": -1.9824626445770264, + "logps/rejected": -2.6118223667144775, + "loss": 1.7175, + "nll_loss": 2.170316219329834, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09912314265966415, + "rewards/margins": 0.0314679816365242, + "rewards/rejected": -0.13059112429618835, "step": 345 }, { "epoch": 0.2779984114376489, - "grad_norm": 36.35618209838867, - "learning_rate": 4.447618132406788e-06, - "log_odds_chosen": 0.05257995054125786, - "log_odds_ratio": -0.7060452699661255, - "logits/chosen": 395.55926513671875, - "logits/rejected": 324.0714416503906, - "logps/chosen": -1.2713950872421265, - "logps/rejected": -1.3762328624725342, - "loss": 1.807, - "nll_loss": 1.739328145980835, + "grad_norm": 36.35749816894531, + "learning_rate": 2.6726124191242444e-06, + "log_odds_chosen": 0.14296868443489075, + "log_odds_ratio": -0.6650969386100769, + "logits/chosen": 390.28662109375, + "logits/rejected": 319.8929138183594, + "logps/chosen": -1.1751973628997803, + "logps/rejected": -1.343955636024475, + "loss": 1.6748, + "nll_loss": 1.6485261917114258, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06356975436210632, - "rewards/margins": 0.005241888575255871, - "rewards/rejected": -0.06881164014339447, + "rewards/chosen": -0.058759868144989014, + "rewards/margins": 0.008437911979854107, + "rewards/rejected": -0.067197784781456, "step": 350 }, { "epoch": 0.2819698173153296, - "grad_norm": 56.2636604309082, - "learning_rate": 4.4261967138360905e-06, - "log_odds_chosen": -0.501254141330719, - "log_odds_ratio": -1.0499690771102905, - "logits/chosen": 375.1532897949219, - "logits/rejected": 253.8148193359375, - "logps/chosen": -1.6551625728607178, - "logps/rejected": -1.2964580059051514, - "loss": 1.5264, - "nll_loss": 1.8501193523406982, + "grad_norm": 46.82810592651367, + "learning_rate": 2.6537244621713765e-06, + "log_odds_chosen": -0.5206547975540161, + "log_odds_ratio": -1.062336802482605, + "logits/chosen": 371.12255859375, + "logits/rejected": 252.90744018554688, + "logps/chosen": -1.5387684106826782, + "logps/rejected": -1.1726481914520264, + "loss": 1.4011, + "nll_loss": 1.7245814800262451, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.08275812864303589, - "rewards/margins": -0.017935223877429962, - "rewards/rejected": -0.06482290476560593, + "rewards/chosen": -0.07693842798471451, + "rewards/margins": -0.01830601692199707, + "rewards/rejected": -0.05863240361213684, "step": 355 }, { "epoch": 0.28594122319301035, - "grad_norm": 87.0551528930664, - "learning_rate": 4.404421487898083e-06, - "log_odds_chosen": -0.14473596215248108, - "log_odds_ratio": -0.7785184383392334, - "logits/chosen": 331.86968994140625, - "logits/rejected": 319.239013671875, - "logps/chosen": -1.191341519355774, - "logps/rejected": -1.0848348140716553, - "loss": 1.6036, - "nll_loss": 1.8287181854248047, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.059567082673311234, - "rewards/margins": -0.005325344391167164, - "rewards/rejected": -0.054241735488176346, + "grad_norm": 84.11432647705078, + "learning_rate": 2.6352313834736496e-06, + "log_odds_chosen": -0.12257371097803116, + "log_odds_ratio": -0.7665070295333862, + "logits/chosen": 321.9914245605469, + "logits/rejected": 313.55279541015625, + "logps/chosen": -1.13595449924469, + "logps/rejected": -1.0432698726654053, + "loss": 1.5142, + "nll_loss": 1.7525981664657593, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05679772421717644, + "rewards/margins": -0.0046342299319803715, + "rewards/rejected": -0.052163492888212204, "step": 360 }, { "epoch": 0.28991262907069104, - "grad_norm": 42.729244232177734, - "learning_rate": 4.3822964543064536e-06, - "log_odds_chosen": -0.40415963530540466, - "log_odds_ratio": -0.9470674395561218, - "logits/chosen": 336.14984130859375, - "logits/rejected": 290.5699157714844, - "logps/chosen": -1.3225958347320557, - "logps/rejected": -0.9997372627258301, - "loss": 1.6299, - "nll_loss": 1.5339610576629639, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06612979620695114, - "rewards/margins": -0.01614293083548546, - "rewards/rejected": -0.04998686537146568, + "grad_norm": 91.57356262207031, + "learning_rate": 2.6171196129510684e-06, + "log_odds_chosen": -0.3752515912055969, + "log_odds_ratio": -0.9325364232063293, + "logits/chosen": 328.098876953125, + "logits/rejected": 284.2572326660156, + "logps/chosen": -1.2117679119110107, + "logps/rejected": -0.9372884631156921, + "loss": 1.5146, + "nll_loss": 1.3987443447113037, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.060588397085666656, + "rewards/margins": -0.01372397132217884, + "rewards/rejected": -0.046864427626132965, "step": 365 }, { "epoch": 0.29388403494837173, - "grad_norm": 46.94746780395508, - "learning_rate": 4.359825677028206e-06, - "log_odds_chosen": -0.8854487538337708, - "log_odds_ratio": -1.2963764667510986, - "logits/chosen": 375.13153076171875, - "logits/rejected": 305.2864990234375, - "logps/chosen": -1.3724980354309082, - "logps/rejected": -0.7736121416091919, - "loss": 1.7241, - "nll_loss": 1.8799304962158203, + "grad_norm": 56.33279037475586, + "learning_rate": 2.599376224550182e-06, + "log_odds_chosen": -0.8803679347038269, + "log_odds_ratio": -1.2740561962127686, + "logits/chosen": 370.04913330078125, + "logits/rejected": 296.248291015625, + "logps/chosen": -1.2923654317855835, + "logps/rejected": -0.7359521985054016, + "loss": 1.6098, + "nll_loss": 1.7468293905258179, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06862489879131317, - "rewards/margins": -0.029944291338324547, - "rewards/rejected": -0.038680605590343475, + "rewards/chosen": -0.06461827456951141, + "rewards/margins": -0.027820657938718796, + "rewards/rejected": -0.03679760918021202, "step": 370 }, { "epoch": 0.2978554408260524, - "grad_norm": 55.18174743652344, - "learning_rate": 4.337013283537182e-06, - "log_odds_chosen": 0.2510035037994385, - "log_odds_ratio": -0.6310834884643555, - "logits/chosen": 368.63214111328125, - "logits/rejected": 276.0588684082031, - "logps/chosen": -1.1369847059249878, - "logps/rejected": -1.3218457698822021, - "loss": 1.7533, - "nll_loss": 1.9112876653671265, + "grad_norm": 99.3619155883789, + "learning_rate": 2.5819888974716113e-06, + "log_odds_chosen": 0.06336264312267303, + "log_odds_ratio": -0.7155358791351318, + "logits/chosen": 357.4932861328125, + "logits/rejected": 261.9980163574219, + "logps/chosen": -1.088346242904663, + "logps/rejected": -1.1663461923599243, + "loss": 1.6697, + "nll_loss": 1.7833229303359985, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05684923380613327, - "rewards/margins": 0.009243053384125233, - "rewards/rejected": -0.06609228998422623, + "rewards/chosen": -0.054417312145233154, + "rewards/margins": 0.003899999661371112, + "rewards/rejected": -0.058317311108112335, "step": 375 }, { "epoch": 0.3018268467037331, - "grad_norm": 41.9847526550293, - "learning_rate": 4.3138634640559185e-06, - "log_odds_chosen": 0.3149817883968353, - "log_odds_ratio": -0.5802162289619446, - "logits/chosen": 320.53692626953125, - "logits/rejected": 413.7608337402344, - "logps/chosen": -1.536766767501831, - "logps/rejected": -1.7898311614990234, - "loss": 1.7084, - "nll_loss": 1.9358304738998413, + "grad_norm": 39.31928253173828, + "learning_rate": 2.564945880212886e-06, + "log_odds_chosen": 0.30186375975608826, + "log_odds_ratio": -0.5939323306083679, + "logits/chosen": 311.0823974609375, + "logits/rejected": 405.12408447265625, + "logps/chosen": -1.402393102645874, + "logps/rejected": -1.6425796747207642, + "loss": 1.6037, + "nll_loss": 1.767801284790039, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07683834433555603, - "rewards/margins": 0.012653221376240253, - "rewards/rejected": -0.08949156850576401, + "rewards/chosen": -0.07011966407299042, + "rewards/margins": 0.012009315192699432, + "rewards/rejected": -0.08212897181510925, "step": 380 }, { "epoch": 0.3057982525814138, - "grad_norm": 36.58580780029297, - "learning_rate": 4.290380470785984e-06, - "log_odds_chosen": -0.44627267122268677, - "log_odds_ratio": -0.9523780941963196, - "logits/chosen": 384.55609130859375, - "logits/rejected": 288.72259521484375, - "logps/chosen": -1.3820741176605225, - "logps/rejected": -1.060802698135376, - "loss": 1.7563, - "nll_loss": 1.8211044073104858, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06910370290279388, - "rewards/margins": -0.016063563525676727, - "rewards/rejected": -0.05304013937711716, + "grad_norm": 50.13980484008789, + "learning_rate": 2.5482359571881276e-06, + "log_odds_chosen": -0.5006630420684814, + "log_odds_ratio": -0.9872447848320007, + "logits/chosen": 373.640380859375, + "logits/rejected": 276.2611999511719, + "logps/chosen": -1.317260503768921, + "logps/rejected": -0.9704214334487915, + "loss": 1.6575, + "nll_loss": 1.706688642501831, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06586302816867828, + "rewards/margins": -0.017341960221529007, + "rewards/rejected": -0.048521075397729874, "step": 385 }, { "epoch": 0.30976965845909454, - "grad_norm": 41.187171936035156, - "learning_rate": 4.266568617126919e-06, - "log_odds_chosen": -0.3663569390773773, - "log_odds_ratio": -1.1484845876693726, - "logits/chosen": 353.6982727050781, - "logits/rejected": 327.0970764160156, - "logps/chosen": -1.3744146823883057, - "logps/rejected": -1.1000757217407227, - "loss": 1.7097, - "nll_loss": 1.7982555627822876, + "grad_norm": 38.53764724731445, + "learning_rate": 2.5318484177091667e-06, + "log_odds_chosen": -0.21573182940483093, + "log_odds_ratio": -1.0288946628570557, + "logits/chosen": 347.60894775390625, + "logits/rejected": 319.7806396484375, + "logps/chosen": -1.2338215112686157, + "logps/rejected": -1.0342485904693604, + "loss": 1.6194, + "nll_loss": 1.6776854991912842, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0687207281589508, - "rewards/margins": -0.013716941699385643, - "rewards/rejected": -0.05500379204750061, + "rewards/chosen": -0.061691075563430786, + "rewards/margins": -0.009978653863072395, + "rewards/rejected": -0.05171242356300354, "step": 390 }, { "epoch": 0.31374106433677523, - "grad_norm": 50.873226165771484, - "learning_rate": 4.2424322768839534e-06, - "log_odds_chosen": 0.5074528455734253, - "log_odds_ratio": -0.5399994254112244, - "logits/chosen": 336.0807800292969, - "logits/rejected": 341.9483947753906, - "logps/chosen": -1.1645238399505615, - "logps/rejected": -1.5138776302337646, - "loss": 1.6867, - "nll_loss": 1.4324095249176025, + "grad_norm": 38.11606979370117, + "learning_rate": 2.515773027133138e-06, + "log_odds_chosen": 0.3980295658111572, + "log_odds_ratio": -0.5573837161064148, + "logits/chosen": 329.6410827636719, + "logits/rejected": 334.8705139160156, + "logps/chosen": -1.0556812286376953, + "logps/rejected": -1.3309425115585327, + "loss": 1.5489, + "nll_loss": 1.286458134651184, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05822619050741196, - "rewards/margins": 0.017467692494392395, - "rewards/rejected": -0.07569388300180435, + "rewards/chosen": -0.052784062922000885, + "rewards/margins": 0.0137630645185709, + "rewards/rejected": -0.06654712557792664, "step": 395 }, { "epoch": 0.3177124702144559, - "grad_norm": 31.38866424560547, - "learning_rate": 4.217975883464604e-06, - "log_odds_chosen": 0.4223383367061615, - "log_odds_ratio": -0.557848334312439, - "logits/chosen": 341.1311950683594, - "logits/rejected": 330.12530517578125, - "logps/chosen": -0.972659707069397, - "logps/rejected": -1.1718895435333252, - "loss": 1.5047, - "nll_loss": 1.4151824712753296, + "grad_norm": 34.63991928100586, + "learning_rate": 2.5e-06, + "log_odds_chosen": 0.29624634981155396, + "log_odds_ratio": -0.585162878036499, + "logits/chosen": 335.17999267578125, + "logits/rejected": 316.7696228027344, + "logps/chosen": -0.9373574256896973, + "logps/rejected": -1.0805803537368774, + "loss": 1.4058, + "nll_loss": 1.348035216331482, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04863298311829567, - "rewards/margins": 0.009961498901247978, - "rewards/rejected": -0.0585944838821888, + "rewards/chosen": -0.04686787351965904, + "rewards/margins": 0.007161143235862255, + "rewards/rejected": -0.054029010236263275, "step": 400 }, { "epoch": 0.3216838760921366, - "grad_norm": 113.73686218261719, - "learning_rate": 4.1932039290643534e-06, - "log_odds_chosen": -0.6433423161506653, - "log_odds_ratio": -1.1012665033340454, - "logits/chosen": 340.7366638183594, - "logits/rejected": 359.9513854980469, - "logps/chosen": -1.2754091024398804, - "logps/rejected": -0.9146528244018555, - "loss": 1.5981, - "nll_loss": 1.4592812061309814, + "grad_norm": 84.49266052246094, + "learning_rate": 2.484519974999767e-06, + "log_odds_chosen": -0.5001649856567383, + "log_odds_ratio": -1.0039180517196655, + "logits/chosen": 334.81451416015625, + "logits/rejected": 353.23443603515625, + "logps/chosen": -1.1627585887908936, + "logps/rejected": -0.8853675723075867, + "loss": 1.3853, + "nll_loss": 1.3510539531707764, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06377045810222626, - "rewards/margins": -0.018037814646959305, - "rewards/rejected": -0.04573264718055725, + "rewards/chosen": -0.058137934654951096, + "rewards/margins": -0.013869555667042732, + "rewards/rejected": -0.044268377125263214, "step": 405 }, { "epoch": 0.3256552819698173, - "grad_norm": 27.97637367248535, - "learning_rate": 4.168120963841501e-06, - "log_odds_chosen": 0.515986979007721, - "log_odds_ratio": -0.4868692457675934, - "logits/chosen": 322.2940979003906, - "logits/rejected": 315.605224609375, - "logps/chosen": -1.0631706714630127, - "logps/rejected": -1.4311316013336182, - "loss": 1.5618, - "nll_loss": 1.2970093488693237, + "grad_norm": 29.99695587158203, + "learning_rate": 2.4693239916239746e-06, + "log_odds_chosen": 0.4416959881782532, + "log_odds_ratio": -0.5149996876716614, + "logits/chosen": 309.7474060058594, + "logits/rejected": 301.06805419921875, + "logps/chosen": -0.9938238263130188, + "logps/rejected": -1.2996299266815186, + "loss": 1.4475, + "nll_loss": 1.214712142944336, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.053158532828092575, - "rewards/margins": 0.018398040905594826, - "rewards/rejected": -0.07155657559633255, + "rewards/chosen": -0.04969118908047676, + "rewards/margins": 0.015290307812392712, + "rewards/rejected": -0.06498149782419205, "step": 410 }, { "epoch": 0.329626687847498, - "grad_norm": 111.9078598022461, - "learning_rate": 4.142731595081394e-06, - "log_odds_chosen": -0.7611304521560669, - "log_odds_ratio": -1.230291485786438, - "logits/chosen": 472.66253662109375, - "logits/rejected": 277.21435546875, - "logps/chosen": -2.0160393714904785, - "logps/rejected": -1.417152762413025, - "loss": 1.6858, - "nll_loss": 2.240736484527588, + "grad_norm": 102.2310791015625, + "learning_rate": 2.4544034683690802e-06, + "log_odds_chosen": -0.5294458270072937, + "log_odds_ratio": -1.0700939893722534, + "logits/chosen": 465.16192626953125, + "logits/rejected": 265.0263366699219, + "logps/chosen": -1.591260313987732, + "logps/rejected": -1.2482383251190186, + "loss": 1.4863, + "nll_loss": 1.8571536540985107, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.1008019670844078, - "rewards/margins": -0.029944339767098427, - "rewards/rejected": -0.07085762917995453, + "rewards/chosen": -0.07956302165985107, + "rewards/margins": -0.01715109869837761, + "rewards/rejected": -0.06241191551089287, "step": 415 }, { "epoch": 0.33359809372517873, - "grad_norm": 55.40426254272461, - "learning_rate": 4.117040486350141e-06, - "log_odds_chosen": -0.27019426226615906, - "log_odds_ratio": -0.9864810705184937, - "logits/chosen": 320.2743225097656, - "logits/rejected": 390.2867736816406, - "logps/chosen": -1.4704816341400146, - "logps/rejected": -1.3386101722717285, - "loss": 2.1069, - "nll_loss": 1.8192126750946045, + "grad_norm": 46.43037414550781, + "learning_rate": 2.4397501823713327e-06, + "log_odds_chosen": -0.16312381625175476, + "log_odds_ratio": -0.9062603712081909, + "logits/chosen": 308.7474060058594, + "logits/rejected": 377.7091369628906, + "logps/chosen": -1.2873585224151611, + "logps/rejected": -1.244364857673645, + "loss": 1.9129, + "nll_loss": 1.7224760055541992, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07352408021688461, - "rewards/margins": -0.00659357151016593, - "rewards/rejected": -0.06693051010370255, + "rewards/chosen": -0.06436793506145477, + "rewards/margins": -0.0021496848203241825, + "rewards/rejected": -0.06221824139356613, "step": 420 }, { "epoch": 0.3375694996028594, - "grad_norm": 80.17561340332031, - "learning_rate": 4.0910523566380115e-06, - "log_odds_chosen": 0.49502259492874146, - "log_odds_ratio": -0.5022796392440796, - "logits/chosen": 338.86505126953125, - "logits/rejected": 331.8837890625, - "logps/chosen": -1.1411818265914917, - "logps/rejected": -1.491014003753662, - "loss": 1.5704, - "nll_loss": 1.140134572982788, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.057059090584516525, - "rewards/margins": 0.01749161258339882, - "rewards/rejected": -0.07455070316791534, + "grad_norm": 60.4110107421875, + "learning_rate": 2.4253562503633297e-06, + "log_odds_chosen": 0.6081835627555847, + "log_odds_ratio": -0.4843834340572357, + "logits/chosen": 336.476318359375, + "logits/rejected": 328.60626220703125, + "logps/chosen": -0.955605685710907, + "logps/rejected": -1.360215425491333, + "loss": 1.4305, + "nll_loss": 1.0399436950683594, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04778028652071953, + "rewards/margins": 0.02023048885166645, + "rewards/rejected": -0.06801077723503113, "step": 425 }, { "epoch": 0.3415409054805401, - "grad_norm": 40.142581939697266, - "learning_rate": 4.06477197949263e-06, - "log_odds_chosen": 0.31735625863075256, - "log_odds_ratio": -0.6405702233314514, - "logits/chosen": 370.8393859863281, - "logits/rejected": 353.68585205078125, - "logps/chosen": -1.5020195245742798, - "logps/rejected": -1.7753969430923462, - "loss": 1.6606, - "nll_loss": 2.0026352405548096, + "grad_norm": 45.45296096801758, + "learning_rate": 2.411214110852061e-06, + "log_odds_chosen": 0.4710689187049866, + "log_odds_ratio": -0.5245848894119263, + "logits/chosen": 364.83978271484375, + "logits/rejected": 349.26629638671875, + "logps/chosen": -1.245513677597046, + "logps/rejected": -1.6239559650421143, + "loss": 1.5163, + "nll_loss": 1.7595207691192627, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07510097324848175, - "rewards/margins": 0.01366887241601944, - "rewards/rejected": -0.08876985311508179, + "rewards/chosen": -0.06227568909525871, + "rewards/margins": 0.01892211101949215, + "rewards/rejected": -0.08119779825210571, "step": 430 }, { "epoch": 0.3455123113582208, - "grad_norm": 39.17948913574219, - "learning_rate": 4.038204182142174e-06, - "log_odds_chosen": -0.04409918934106827, - "log_odds_ratio": -1.0017715692520142, - "logits/chosen": 365.0375061035156, - "logits/rejected": 287.4947814941406, - "logps/chosen": -1.5504963397979736, - "logps/rejected": -1.3872708082199097, - "loss": 1.754, - "nll_loss": 1.8789138793945312, + "grad_norm": 38.822410583496094, + "learning_rate": 2.3973165074269213e-06, + "log_odds_chosen": 0.3714559078216553, + "log_odds_ratio": -0.6340258717536926, + "logits/chosen": 350.9668273925781, + "logits/rejected": 270.967529296875, + "logps/chosen": -1.0809491872787476, + "logps/rejected": -1.2758018970489502, + "loss": 1.5124, + "nll_loss": 1.431868553161621, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0775248184800148, - "rewards/margins": -0.008161274716258049, - "rewards/rejected": -0.0693635419011116, + "rewards/chosen": -0.05404745787382126, + "rewards/margins": 0.009742636233568192, + "rewards/rejected": -0.06379009783267975, "step": 435 }, { "epoch": 0.3494837172359015, - "grad_norm": 65.36585998535156, - "learning_rate": 4.011353844608695e-06, - "log_odds_chosen": 0.25077468156814575, - "log_odds_ratio": -0.5847761034965515, - "logits/chosen": 398.1842346191406, - "logits/rejected": 302.77557373046875, - "logps/chosen": -1.1319334506988525, - "logps/rejected": -1.2919623851776123, - "loss": 1.5668, - "nll_loss": 1.5274730920791626, + "grad_norm": 48.35163879394531, + "learning_rate": 2.3836564731139807e-06, + "log_odds_chosen": 0.3045877516269684, + "log_odds_ratio": -0.5679005980491638, + "logits/chosen": 387.47454833984375, + "logits/rejected": 291.999755859375, + "logps/chosen": -1.0117883682250977, + "logps/rejected": -1.1841952800750732, + "loss": 1.4928, + "nll_loss": 1.4351252317428589, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.056596674025058746, - "rewards/margins": 0.008001448586583138, - "rewards/rejected": -0.06459812819957733, + "rewards/chosen": -0.050589419901371, + "rewards/margins": 0.00862034410238266, + "rewards/rejected": -0.05920976400375366, "step": 440 }, { "epoch": 0.3534551231135822, - "grad_norm": 66.42986297607422, - "learning_rate": 3.9842258988117435e-06, - "log_odds_chosen": 0.39154669642448425, - "log_odds_ratio": -0.5791794657707214, - "logits/chosen": 292.7413635253906, - "logits/rejected": 333.33306884765625, - "logps/chosen": -1.3780268430709839, - "logps/rejected": -1.5879228115081787, - "loss": 1.6821, - "nll_loss": 1.878339409828186, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06890134513378143, - "rewards/margins": 0.010494804009795189, - "rewards/rejected": -0.07939615100622177, + "grad_norm": 52.99742889404297, + "learning_rate": 2.3702273156998867e-06, + "log_odds_chosen": 0.6814774870872498, + "log_odds_ratio": -0.45101094245910645, + "logits/chosen": 279.8681945800781, + "logits/rejected": 321.81268310546875, + "logps/chosen": -1.1514198780059814, + "logps/rejected": -1.5826447010040283, + "loss": 1.5398, + "nll_loss": 1.701086401939392, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05757099390029907, + "rewards/margins": 0.021561237052083015, + "rewards/rejected": -0.07913222908973694, "step": 445 }, { "epoch": 0.3574265289912629, - "grad_norm": 41.643070220947266, - "learning_rate": 3.9568253276624704e-06, - "log_odds_chosen": 0.3447812795639038, - "log_odds_ratio": -0.6143133044242859, - "logits/chosen": 308.4149169921875, - "logits/rejected": 373.98712158203125, - "logps/chosen": -1.146469235420227, - "logps/rejected": -1.3937455415725708, - "loss": 1.7364, - "nll_loss": 1.5407047271728516, + "grad_norm": 38.22096633911133, + "learning_rate": 2.357022603955159e-06, + "log_odds_chosen": 0.3011036515235901, + "log_odds_ratio": -0.6046400666236877, + "logits/chosen": 295.15643310546875, + "logits/rejected": 363.99285888671875, + "logps/chosen": -1.0281888246536255, + "logps/rejected": -1.2145917415618896, + "loss": 1.5697, + "nll_loss": 1.3977224826812744, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05732346326112747, - "rewards/margins": 0.012363811954855919, - "rewards/rejected": -0.06968727707862854, + "rewards/chosen": -0.051409441977739334, + "rewards/margins": 0.009320144541561604, + "rewards/rejected": -0.060729581862688065, "step": 450 }, { "epoch": 0.3613979348689436, - "grad_norm": 51.8876953125, - "learning_rate": 3.929157164148352e-06, - "log_odds_chosen": 0.25782614946365356, - "log_odds_ratio": -0.6060336828231812, - "logits/chosen": 289.9158020019531, - "logits/rejected": 339.3231201171875, - "logps/chosen": -0.9036432504653931, - "logps/rejected": -1.0764741897583008, - "loss": 1.831, - "nll_loss": 1.5824403762817383, + "grad_norm": 59.05949783325195, + "learning_rate": 2.3440361546924774e-06, + "log_odds_chosen": 0.2038741558790207, + "log_odds_ratio": -0.6207619309425354, + "logits/chosen": 275.1322326660156, + "logits/rejected": 327.7266540527344, + "logps/chosen": -0.8248960375785828, + "logps/rejected": -0.9636802673339844, + "loss": 1.7361, + "nll_loss": 1.472722053527832, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.045182161033153534, - "rewards/margins": 0.008641545660793781, - "rewards/rejected": -0.05382370948791504, + "rewards/chosen": -0.04124480113387108, + "rewards/margins": 0.006939212791621685, + "rewards/rejected": -0.04818401485681534, "step": 455 }, { "epoch": 0.3653693407466243, - "grad_norm": 54.782958984375, - "learning_rate": 3.901226490408728e-06, - "log_odds_chosen": 0.2922573685646057, - "log_odds_ratio": -0.5908970236778259, - "logits/chosen": 307.7389221191406, - "logits/rejected": 323.6591796875, - "logps/chosen": -1.3589437007904053, - "logps/rejected": -1.5952913761138916, - "loss": 1.5749, - "nll_loss": 1.6173921823501587, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06794719398021698, - "rewards/margins": 0.011817372404038906, - "rewards/rejected": -0.07976455986499786, + "grad_norm": 75.4602279663086, + "learning_rate": 2.3312620206007847e-06, + "log_odds_chosen": 0.14844343066215515, + "log_odds_ratio": -0.6544772386550903, + "logits/chosen": 300.9977111816406, + "logits/rejected": 315.71978759765625, + "logps/chosen": -1.323999285697937, + "logps/rejected": -1.4490077495574951, + "loss": 1.4689, + "nll_loss": 1.5351868867874146, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06619997322559357, + "rewards/margins": 0.006250420119613409, + "rewards/rejected": -0.07245039194822311, "step": 460 }, { "epoch": 0.369340746624305, - "grad_norm": 30.29743194580078, - "learning_rate": 3.873038436801298e-06, - "log_odds_chosen": 0.310823529958725, - "log_odds_ratio": -0.662110447883606, - "logits/chosen": 300.14251708984375, - "logits/rejected": 391.355712890625, - "logps/chosen": -1.3028764724731445, - "logps/rejected": -1.4683092832565308, - "loss": 1.714, - "nll_loss": 1.549576997756958, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06514382362365723, - "rewards/margins": 0.008271644823253155, - "rewards/rejected": -0.07341547310352325, + "grad_norm": 30.48955535888672, + "learning_rate": 2.3186944788008413e-06, + "log_odds_chosen": 0.22145530581474304, + "log_odds_ratio": -0.6970380544662476, + "logits/chosen": 289.8216552734375, + "logits/rejected": 381.4501953125, + "logps/chosen": -1.2244035005569458, + "logps/rejected": -1.314239501953125, + "loss": 1.5863, + "nll_loss": 1.4692577123641968, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06122017651796341, + "rewards/margins": 0.0044917999766767025, + "rewards/rejected": -0.06571197509765625, "step": 465 }, { "epoch": 0.3733121525019857, - "grad_norm": 59.415611267089844, - "learning_rate": 3.8445981809597715e-06, - "log_odds_chosen": -0.1661299765110016, - "log_odds_ratio": -0.8740431070327759, - "logits/chosen": 272.77264404296875, - "logits/rejected": 408.80975341796875, - "logps/chosen": -1.0485327243804932, - "logps/rejected": -1.0229226350784302, - "loss": 1.4859, - "nll_loss": 1.2975164651870728, + "grad_norm": 53.512874603271484, + "learning_rate": 2.3063280200722128e-06, + "log_odds_chosen": -0.14795434474945068, + "log_odds_ratio": -0.841509997844696, + "logits/chosen": 259.2519226074219, + "logits/rejected": 400.40814208984375, + "logps/chosen": -0.9314160346984863, + "logps/rejected": -0.9140155911445618, + "loss": 1.4001, + "nll_loss": 1.1743015050888062, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05242663621902466, - "rewards/margins": -0.0012805074220523238, - "rewards/rejected": -0.05114613100886345, + "rewards/chosen": -0.046570807695388794, + "rewards/margins": -0.0008700292673893273, + "rewards/rejected": -0.04570077732205391, "step": 470 }, { "epoch": 0.37728355837966643, - "grad_norm": 38.312618255615234, - "learning_rate": 3.815910946842828e-06, - "log_odds_chosen": -0.014984751120209694, - "log_odds_ratio": -0.8228279948234558, - "logits/chosen": 292.1250305175781, - "logits/rejected": 363.7527770996094, - "logps/chosen": -1.3335845470428467, - "logps/rejected": -1.5042054653167725, - "loss": 1.5739, - "nll_loss": 1.5051485300064087, + "grad_norm": 41.76538848876953, + "learning_rate": 2.2941573387056174e-06, + "log_odds_chosen": 0.1297486275434494, + "log_odds_ratio": -0.7071259617805481, + "logits/chosen": 282.8462829589844, + "logits/rejected": 355.6139831542969, + "logps/chosen": -1.218668818473816, + "logps/rejected": -1.4113277196884155, + "loss": 1.4564, + "nll_loss": 1.3523343801498413, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0666792243719101, - "rewards/margins": 0.008531046099960804, - "rewards/rejected": -0.07521027326583862, + "rewards/chosen": -0.06093344837427139, + "rewards/margins": 0.009632943198084831, + "rewards/rejected": -0.07056639343500137, "step": 475 }, { "epoch": 0.3812549642573471, - "grad_norm": 56.91576385498047, - "learning_rate": 3.7869820037745773e-06, - "log_odds_chosen": -0.030622828751802444, - "log_odds_ratio": -0.7795476913452148, - "logits/chosen": 385.81536865234375, - "logits/rejected": 331.4559631347656, - "logps/chosen": -1.2507215738296509, - "logps/rejected": -1.279594898223877, - "loss": 1.6582, - "nll_loss": 1.5629332065582275, + "grad_norm": 39.9034538269043, + "learning_rate": 2.2821773229381924e-06, + "log_odds_chosen": 0.01033252477645874, + "log_odds_ratio": -0.7517456412315369, + "logits/chosen": 377.63323974609375, + "logits/rejected": 321.8352966308594, + "logps/chosen": -1.1192216873168945, + "logps/rejected": -1.179273009300232, + "loss": 1.5432, + "nll_loss": 1.4109880924224854, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0625360757112503, - "rewards/margins": 0.0014436636120080948, - "rewards/rejected": -0.06397974491119385, + "rewards/chosen": -0.05596108362078667, + "rewards/margins": 0.0030025753658264875, + "rewards/rejected": -0.058963656425476074, "step": 480 }, { "epoch": 0.3852263701350278, - "grad_norm": 147.40060424804688, - "learning_rate": 3.7578166654766695e-06, - "log_odds_chosen": 0.012112426571547985, - "log_odds_ratio": -0.8280073404312134, - "logits/chosen": 324.9056091308594, - "logits/rejected": 301.1451721191406, - "logps/chosen": -1.0767914056777954, - "logps/rejected": -0.936957836151123, - "loss": 1.6, - "nll_loss": 1.675865888595581, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05383957177400589, - "rewards/margins": -0.006991674192249775, - "rewards/rejected": -0.04684789478778839, + "grad_norm": 78.85708618164062, + "learning_rate": 2.270383045932499e-06, + "log_odds_chosen": -0.009592628106474876, + "log_odds_ratio": -0.8770645260810852, + "logits/chosen": 314.0664978027344, + "logits/rejected": 293.30218505859375, + "logps/chosen": -1.006894826889038, + "logps/rejected": -0.8256500363349915, + "loss": 1.479, + "nll_loss": 1.5845009088516235, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05034474655985832, + "rewards/margins": -0.009062247350811958, + "rewards/rejected": -0.041282497346401215, "step": 485 }, { "epoch": 0.3891977760127085, - "grad_norm": 62.930694580078125, - "learning_rate": 3.7284202890922707e-06, - "log_odds_chosen": -0.3624054789543152, - "log_odds_ratio": -0.9134989976882935, - "logits/chosen": 387.71923828125, - "logits/rejected": 309.9226989746094, - "logps/chosen": -1.203176498413086, - "logps/rejected": -0.9530506134033203, - "loss": 1.6178, - "nll_loss": 1.5490739345550537, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.060158826410770416, - "rewards/margins": -0.012506293132901192, - "rewards/rejected": -0.047652535140514374, + "grad_norm": 39.800479888916016, + "learning_rate": 2.2587697572631284e-06, + "log_odds_chosen": -0.4220407009124756, + "log_odds_ratio": -0.9446707963943481, + "logits/chosen": 381.3775329589844, + "logits/rejected": 301.7720947265625, + "logps/chosen": -1.1097631454467773, + "logps/rejected": -0.8344427943229675, + "loss": 1.5091, + "nll_loss": 1.4189200401306152, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05548815801739693, + "rewards/margins": -0.013766017742455006, + "rewards/rejected": -0.041722141206264496, "step": 490 }, { "epoch": 0.3931691818903892, - "grad_norm": 44.98346710205078, - "learning_rate": 3.698798274202048e-06, - "log_odds_chosen": 1.106994867324829, - "log_odds_ratio": -0.4969615340232849, - "logits/chosen": 416.2843322753906, - "logits/rejected": 322.34722900390625, - "logps/chosen": -1.0825506448745728, - "logps/rejected": -1.9832751750946045, - "loss": 1.4863, - "nll_loss": 1.3532053232192993, + "grad_norm": 43.88238525390625, + "learning_rate": 2.2473328748774737e-06, + "log_odds_chosen": 1.1393579244613647, + "log_odds_ratio": -0.5236693620681763, + "logits/chosen": 410.23822021484375, + "logits/rejected": 316.77801513671875, + "logps/chosen": -0.9757580757141113, + "logps/rejected": -1.8936541080474854, + "loss": 1.3702, + "nll_loss": 1.2193056344985962, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0541275329887867, - "rewards/margins": 0.045036230236291885, - "rewards/rejected": -0.09916376322507858, + "rewards/chosen": -0.048787906765937805, + "rewards/margins": 0.0458948090672493, + "rewards/rejected": -0.0946827083826065, "step": 495 }, { "epoch": 0.3971405877680699, - "grad_norm": 71.20710754394531, - "learning_rate": 3.668956061832365e-06, - "log_odds_chosen": 0.18296189606189728, - "log_odds_ratio": -0.6548603773117065, - "logits/chosen": 375.1370544433594, - "logits/rejected": 340.265869140625, - "logps/chosen": -1.1238301992416382, - "logps/rejected": -1.242456078529358, - "loss": 1.7543, - "nll_loss": 2.0999672412872314, + "grad_norm": 68.22760772705078, + "learning_rate": 2.23606797749979e-06, + "log_odds_chosen": 0.1606062352657318, + "log_odds_ratio": -0.6387936472892761, + "logits/chosen": 367.50335693359375, + "logits/rejected": 333.0289001464844, + "logps/chosen": -1.0744435787200928, + "logps/rejected": -1.2043496370315552, + "loss": 1.6871, + "nll_loss": 2.0462329387664795, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05619151517748833, - "rewards/margins": 0.005931290797889233, - "rewards/rejected": -0.062122803181409836, + "rewards/chosen": -0.05372218042612076, + "rewards/margins": 0.006495301611721516, + "rewards/rejected": -0.0602174811065197, "step": 500 }, { "epoch": 0.4011119936457506, - "grad_norm": 43.03315353393555, - "learning_rate": 3.6388991334558665e-06, - "log_odds_chosen": -0.3676120638847351, - "log_odds_ratio": -0.9084262847900391, - "logits/chosen": 356.4194030761719, - "logits/rejected": 317.63275146484375, - "logps/chosen": -1.1387929916381836, - "logps/rejected": -0.8903687596321106, - "loss": 1.4812, - "nll_loss": 1.3678486347198486, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.05693964287638664, - "rewards/margins": -0.012421206571161747, - "rewards/rejected": -0.04451843723654747, + "grad_norm": 40.497886657714844, + "learning_rate": 2.224970797449924e-06, + "log_odds_chosen": -0.4286605417728424, + "log_odds_ratio": -0.9496608972549438, + "logits/chosen": 351.97662353515625, + "logits/rejected": 314.7813415527344, + "logps/chosen": -1.0459034442901611, + "logps/rejected": -0.7734488248825073, + "loss": 1.3743, + "nll_loss": 1.2439095973968506, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.052295178174972534, + "rewards/margins": -0.01362273283302784, + "rewards/rejected": -0.038672447204589844, "step": 505 }, { "epoch": 0.4050833995234313, - "grad_norm": 27.506656646728516, - "learning_rate": 3.6086330099846274e-06, - "log_odds_chosen": -0.5120627880096436, - "log_odds_ratio": -1.0131856203079224, - "logits/chosen": 349.39093017578125, - "logits/rejected": 328.4683532714844, - "logps/chosen": -1.0289686918258667, - "logps/rejected": -0.724139392375946, - "loss": 1.7849, - "nll_loss": 1.7454910278320312, + "grad_norm": 30.911205291748047, + "learning_rate": 2.2140372138502386e-06, + "log_odds_chosen": -0.44240862131118774, + "log_odds_ratio": -1.003506064414978, + "logits/chosen": 344.8081359863281, + "logits/rejected": 322.8605651855469, + "logps/chosen": -0.9024990797042847, + "logps/rejected": -0.632462739944458, + "loss": 1.67, + "nll_loss": 1.6219089031219482, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05144844204187393, - "rewards/margins": -0.015241468325257301, - "rewards/rejected": -0.03620696812868118, + "rewards/chosen": -0.04512495547533035, + "rewards/margins": -0.013501817360520363, + "rewards/rejected": -0.03162313997745514, "step": 510 }, { "epoch": 0.409054805401112, - "grad_norm": 61.25779724121094, - "learning_rate": 3.578163250756065e-06, - "log_odds_chosen": 0.4315710663795471, - "log_odds_ratio": -0.5859761834144592, - "logits/chosen": 317.2684631347656, - "logits/rejected": 341.7981262207031, - "logps/chosen": -1.1744743585586548, - "logps/rejected": -1.5218207836151123, - "loss": 1.6567, - "nll_loss": 1.4415868520736694, + "grad_norm": 68.99786376953125, + "learning_rate": 2.203263246196159e-06, + "log_odds_chosen": 0.5419043898582458, + "log_odds_ratio": -0.5282405614852905, + "logits/chosen": 304.9704895019531, + "logits/rejected": 331.59344482421875, + "logps/chosen": -1.0522342920303345, + "logps/rejected": -1.4638428688049316, + "loss": 1.5493, + "nll_loss": 1.3706550598144531, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05872371047735214, - "rewards/margins": 0.017367318272590637, - "rewards/rejected": -0.07609103620052338, + "rewards/chosen": -0.05261171981692314, + "rewards/margins": 0.02058042585849762, + "rewards/rejected": -0.07319213449954987, "step": 515 }, { "epoch": 0.4130262112787927, - "grad_norm": 74.8428955078125, - "learning_rate": 3.5474954525117887e-06, - "log_odds_chosen": 0.05642819404602051, - "log_odds_ratio": -0.8421560525894165, - "logits/chosen": 321.3084716796875, - "logits/rejected": 286.5706787109375, - "logps/chosen": -1.519960641860962, - "logps/rejected": -1.6510608196258545, - "loss": 1.7797, - "nll_loss": 1.7759168148040771, + "grad_norm": 88.52311706542969, + "learning_rate": 2.1926450482675734e-06, + "log_odds_chosen": 0.0930122509598732, + "log_odds_ratio": -0.8259990811347961, + "logits/chosen": 312.1341247558594, + "logits/rejected": 277.54803466796875, + "logps/chosen": -1.3634002208709717, + "logps/rejected": -1.5216593742370605, + "loss": 1.6321, + "nll_loss": 1.6297897100448608, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07599803060293198, - "rewards/margins": 0.006555011961609125, - "rewards/rejected": -0.08255304396152496, + "rewards/chosen": -0.06817001104354858, + "rewards/margins": 0.007912958040833473, + "rewards/rejected": -0.07608296722173691, "step": 520 }, { "epoch": 0.4169976171564734, - "grad_norm": 50.956214904785156, - "learning_rate": 3.5166352483695803e-06, - "log_odds_chosen": 0.43155431747436523, - "log_odds_ratio": -0.5425541400909424, - "logits/chosen": 367.09210205078125, - "logits/rejected": 334.6671447753906, - "logps/chosen": -1.14950430393219, - "logps/rejected": -1.4378228187561035, - "loss": 1.7118, - "nll_loss": 1.758131742477417, + "grad_norm": 45.25846862792969, + "learning_rate": 2.182178902359924e-06, + "log_odds_chosen": 0.5764316320419312, + "log_odds_ratio": -0.494175523519516, + "logits/chosen": 357.0990295410156, + "logits/rejected": 322.9207458496094, + "logps/chosen": -1.0251896381378174, + "logps/rejected": -1.414226770401001, + "loss": 1.5803, + "nll_loss": 1.5963222980499268, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05747520923614502, - "rewards/margins": 0.014415934681892395, - "rewards/rejected": -0.07189114391803741, + "rewards/chosen": -0.05125947669148445, + "rewards/margins": 0.019451860338449478, + "rewards/rejected": -0.07071133702993393, "step": 525 }, { "epoch": 0.42096902303415407, - "grad_norm": 91.34703826904297, - "learning_rate": 3.4855883067886888e-06, - "log_odds_chosen": 0.24526679515838623, - "log_odds_ratio": -0.6680157780647278, - "logits/chosen": 316.42547607421875, - "logits/rejected": 285.8825378417969, - "logps/chosen": -0.9753881692886353, - "logps/rejected": -1.2148548364639282, - "loss": 1.4501, - "nll_loss": 1.2995529174804688, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04876940697431564, - "rewards/margins": 0.011973332613706589, - "rewards/rejected": -0.060742735862731934, + "grad_norm": 36.488765716552734, + "learning_rate": 2.1718612138153473e-06, + "log_odds_chosen": 0.36014705896377563, + "log_odds_ratio": -0.6301948428153992, + "logits/chosen": 304.56103515625, + "logits/rejected": 273.9854736328125, + "logps/chosen": -0.930140495300293, + "logps/rejected": -1.235038161277771, + "loss": 1.3532, + "nll_loss": 1.213836669921875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04650702700018883, + "rewards/margins": 0.01524488627910614, + "rewards/rejected": -0.06175190955400467, "step": 530 }, { "epoch": 0.4249404289118348, - "grad_norm": 110.02918243408203, - "learning_rate": 3.4543603305286432e-06, - "log_odds_chosen": 0.0072197020053863525, - "log_odds_ratio": -0.7949660420417786, - "logits/chosen": 320.6426696777344, - "logits/rejected": 313.28411865234375, - "logps/chosen": -1.0177392959594727, - "logps/rejected": -0.8970460891723633, - "loss": 1.6384, - "nll_loss": 1.6469953060150146, + "grad_norm": 84.07301330566406, + "learning_rate": 2.161688505835585e-06, + "log_odds_chosen": 0.10728853940963745, + "log_odds_ratio": -0.7639841437339783, + "logits/chosen": 312.0140686035156, + "logits/rejected": 303.831787109375, + "logps/chosen": -0.9278820157051086, + "logps/rejected": -0.8287888765335083, + "loss": 1.5005, + "nll_loss": 1.5337541103363037, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05088697001338005, - "rewards/margins": -0.006034668534994125, - "rewards/rejected": -0.044852301478385925, + "rewards/chosen": -0.04639409855008125, + "rewards/margins": -0.0049546584486961365, + "rewards/rejected": -0.041439443826675415, "step": 535 }, { "epoch": 0.4289118347895155, - "grad_norm": 39.02376937866211, - "learning_rate": 3.422957055601758e-06, - "log_odds_chosen": -0.32208532094955444, - "log_odds_ratio": -0.9446969032287598, - "logits/chosen": 281.58331298828125, - "logits/rejected": 433.8438415527344, - "logps/chosen": -1.2129267454147339, - "logps/rejected": -0.957917332649231, - "loss": 1.5224, - "nll_loss": 1.474649429321289, + "grad_norm": 39.88277816772461, + "learning_rate": 2.151657414559676e-06, + "log_odds_chosen": -0.4115291237831116, + "log_odds_ratio": -0.9856241345405579, + "logits/chosen": 274.3957214355469, + "logits/rejected": 427.97650146484375, + "logps/chosen": -1.1595760583877563, + "logps/rejected": -0.8591532707214355, + "loss": 1.4338, + "nll_loss": 1.3843562602996826, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.060646336525678635, - "rewards/margins": -0.012750471010804176, - "rewards/rejected": -0.04789586737751961, + "rewards/chosen": -0.0579788014292717, + "rewards/margins": -0.015021136030554771, + "rewards/rejected": -0.04295766353607178, "step": 540 }, { "epoch": 0.4328832406671962, - "grad_norm": 72.89534759521484, - "learning_rate": 3.3913842502195256e-06, - "log_odds_chosen": 0.10887251049280167, - "log_odds_ratio": -0.6846013069152832, - "logits/chosen": 337.80560302734375, - "logits/rejected": 373.49957275390625, - "logps/chosen": -1.1243635416030884, - "logps/rejected": -1.1805238723754883, - "loss": 1.573, - "nll_loss": 1.2677637338638306, + "grad_norm": 71.12447357177734, + "learning_rate": 2.1417646843905967e-06, + "log_odds_chosen": 0.19147726893424988, + "log_odds_ratio": -0.6580365896224976, + "logits/chosen": 331.36236572265625, + "logits/rejected": 365.7850036621094, + "logps/chosen": -1.0934817790985107, + "logps/rejected": -1.20845627784729, + "loss": 1.497, + "nll_loss": 1.2108908891677856, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05621817708015442, - "rewards/margins": 0.0028080150950700045, - "rewards/rejected": -0.059026192873716354, + "rewards/chosen": -0.05467408895492554, + "rewards/margins": 0.005748722702264786, + "rewards/rejected": -0.06042281538248062, "step": 545 }, { "epoch": 0.4368546465448769, - "grad_norm": 72.7591323852539, - "learning_rate": 3.3596477137331106e-06, - "log_odds_chosen": 0.3823426365852356, - "log_odds_ratio": -0.5422973036766052, - "logits/chosen": 345.8101806640625, - "logits/rejected": 315.0772399902344, - "logps/chosen": -1.0160396099090576, - "logps/rejected": -1.2651389837265015, - "loss": 1.6836, - "nll_loss": 1.2323284149169922, + "grad_norm": 66.69554901123047, + "learning_rate": 2.132007163556104e-06, + "log_odds_chosen": 0.5324321389198303, + "log_odds_ratio": -0.5010102391242981, + "logits/chosen": 332.88983154296875, + "logits/rejected": 302.9046630859375, + "logps/chosen": -0.9199882745742798, + "logps/rejected": -1.2687950134277344, + "loss": 1.5434, + "nll_loss": 1.1497266292572021, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05080198124051094, - "rewards/margins": 0.01245497353374958, - "rewards/rejected": -0.06325694918632507, + "rewards/chosen": -0.04599941521883011, + "rewards/margins": 0.01744033768773079, + "rewards/rejected": -0.0634397491812706, "step": 550 }, { "epoch": 0.44082605242255757, - "grad_norm": 90.90532684326172, - "learning_rate": 3.327753275568105e-06, - "log_odds_chosen": 0.33174973726272583, - "log_odds_ratio": -0.5770066380500793, - "logits/chosen": 357.4688415527344, - "logits/rejected": 283.86163330078125, - "logps/chosen": -1.4533777236938477, - "logps/rejected": -1.7138382196426392, - "loss": 1.9812, - "nll_loss": 1.9684550762176514, + "grad_norm": 84.53522491455078, + "learning_rate": 2.122381799890045e-06, + "log_odds_chosen": 0.588485836982727, + "log_odds_ratio": -0.5170946717262268, + "logits/chosen": 350.81085205078125, + "logits/rejected": 277.13232421875, + "logps/chosen": -1.1174993515014648, + "logps/rejected": -1.5437860488891602, + "loss": 1.7752, + "nll_loss": 1.7890150547027588, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0726688951253891, - "rewards/margins": 0.013023021630942822, - "rewards/rejected": -0.0856919139623642, + "rewards/chosen": -0.05587497353553772, + "rewards/margins": 0.021314334124326706, + "rewards/rejected": -0.07718930393457413, "step": 555 }, { "epoch": 0.44479745830023826, - "grad_norm": 50.66121292114258, - "learning_rate": 3.2957067941537745e-06, - "log_odds_chosen": -0.82317054271698, - "log_odds_ratio": -1.2737700939178467, - "logits/chosen": 260.4927062988281, - "logits/rejected": 432.49237060546875, - "logps/chosen": -1.632800817489624, - "logps/rejected": -1.045261025428772, - "loss": 1.8113, - "nll_loss": 1.9882593154907227, + "grad_norm": 51.40147018432617, + "learning_rate": 2.1128856368212917e-06, + "log_odds_chosen": -1.033674955368042, + "log_odds_ratio": -1.4361447095870972, + "logits/chosen": 247.4395751953125, + "logits/rejected": 423.43707275390625, + "logps/chosen": -1.5958759784698486, + "logps/rejected": -0.9105483293533325, + "loss": 1.7178, + "nll_loss": 1.9259655475616455, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.08164004981517792, - "rewards/margins": -0.02937699481844902, - "rewards/rejected": -0.0522630512714386, + "rewards/chosen": -0.07979379594326019, + "rewards/margins": -0.034266382455825806, + "rewards/rejected": -0.045527417212724686, "step": 560 }, { "epoch": 0.448768864177919, - "grad_norm": 74.89939880371094, - "learning_rate": 3.263514155846969e-06, - "log_odds_chosen": 0.328817218542099, - "log_odds_ratio": -0.5914020538330078, - "logits/chosen": 342.4215393066406, - "logits/rejected": 324.5187683105469, - "logps/chosen": -0.9976029396057129, - "logps/rejected": -1.1394035816192627, - "loss": 1.5494, - "nll_loss": 1.7881031036376953, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04988015443086624, - "rewards/margins": 0.007090026047080755, - "rewards/rejected": -0.056970179080963135, + "grad_norm": 72.64619445800781, + "learning_rate": 2.1035158095583564e-06, + "log_odds_chosen": 0.18388502299785614, + "log_odds_ratio": -0.6336166262626648, + "logits/chosen": 335.2535705566406, + "logits/rejected": 319.001953125, + "logps/chosen": -0.9879266619682312, + "logps/rejected": -1.0735465288162231, + "loss": 1.4765, + "nll_loss": 1.7013626098632812, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0493963286280632, + "rewards/margins": 0.00428099324926734, + "rewards/rejected": -0.05367732793092728, "step": 565 }, { "epoch": 0.4527402700555997, - "grad_norm": 79.3864974975586, - "learning_rate": 3.2311812738509062e-06, - "log_odds_chosen": 0.5359792709350586, - "log_odds_ratio": -0.582006573677063, - "logits/chosen": 312.7106018066406, - "logits/rejected": 337.06732177734375, - "logps/chosen": -1.0472519397735596, - "logps/rejected": -1.3570013046264648, - "loss": 1.7019, - "nll_loss": 1.3205798864364624, + "grad_norm": 74.86976623535156, + "learning_rate": 2.0942695414584777e-06, + "log_odds_chosen": 0.4404204487800598, + "log_odds_ratio": -0.6092650890350342, + "logits/chosen": 307.1649169921875, + "logits/rejected": 326.94189453125, + "logps/chosen": -0.9814633131027222, + "logps/rejected": -1.2262656688690186, + "loss": 1.5794, + "nll_loss": 1.2251192331314087, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0523625984787941, - "rewards/margins": 0.015487474389374256, - "rewards/rejected": -0.06785006821155548, + "rewards/chosen": -0.049073170870542526, + "rewards/margins": 0.012240114621818066, + "rewards/rejected": -0.06131328269839287, "step": 570 }, { "epoch": 0.4567116759332804, - "grad_norm": 45.589111328125, - "learning_rate": 3.198714087129024e-06, - "log_odds_chosen": 0.5243362188339233, - "log_odds_ratio": -0.5980243682861328, - "logits/chosen": 344.4964904785156, - "logits/rejected": 442.112548828125, - "logps/chosen": -1.115337610244751, - "logps/rejected": -1.4375159740447998, - "loss": 1.4739, - "nll_loss": 1.505415678024292, + "grad_norm": 53.87495422363281, + "learning_rate": 2.085144140570748e-06, + "log_odds_chosen": 0.6467480063438416, + "log_odds_ratio": -0.46934765577316284, + "logits/chosen": 328.2501525878906, + "logits/rejected": 424.5448303222656, + "logps/chosen": -0.916412353515625, + "logps/rejected": -1.2970696687698364, + "loss": 1.3723, + "nll_loss": 1.3209168910980225, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.055766891688108444, - "rewards/margins": 0.016108911484479904, - "rewards/rejected": -0.07187579572200775, + "rewards/chosen": -0.04582061618566513, + "rewards/margins": 0.019032862037420273, + "rewards/rejected": -0.0648534744977951, "step": 575 }, { "epoch": 0.46068308181096107, - "grad_norm": 82.72159576416016, - "learning_rate": 3.1661185593140986e-06, - "log_odds_chosen": -0.06218218803405762, - "log_odds_ratio": -0.7745502591133118, - "logits/chosen": 349.07562255859375, - "logits/rejected": 288.5042724609375, - "logps/chosen": -1.5691912174224854, - "logps/rejected": -1.5143083333969116, - "loss": 1.7202, - "nll_loss": 1.9988605976104736, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07845956087112427, - "rewards/margins": -0.0027441338170319796, - "rewards/rejected": -0.07571543008089066, + "grad_norm": 74.30099487304688, + "learning_rate": 2.0761369963434992e-06, + "log_odds_chosen": -0.007485628128051758, + "log_odds_ratio": -0.7629297971725464, + "logits/chosen": 338.58172607421875, + "logits/rejected": 277.81951904296875, + "logps/chosen": -1.2728912830352783, + "logps/rejected": -1.2861583232879639, + "loss": 1.5917, + "nll_loss": 1.7535676956176758, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06364456564188004, + "rewards/margins": 0.000663341605104506, + "rewards/rejected": -0.06430791318416595, "step": 580 }, { "epoch": 0.46465448768864176, - "grad_norm": 36.90455627441406, - "learning_rate": 3.133400677612836e-06, - "log_odds_chosen": -0.509355366230011, - "log_odds_ratio": -1.0258004665374756, - "logits/chosen": 336.6009216308594, - "logits/rejected": 255.05517578125, - "logps/chosen": -1.52445387840271, - "logps/rejected": -1.1118301153182983, - "loss": 1.7538, - "nll_loss": 2.2724270820617676, + "grad_norm": 40.75542449951172, + "learning_rate": 2.067245576486808e-06, + "log_odds_chosen": -0.4641965925693512, + "log_odds_ratio": -0.9669200778007507, + "logits/chosen": 329.0755310058594, + "logits/rejected": 246.0184326171875, + "logps/chosen": -1.3119654655456543, + "logps/rejected": -0.9784905314445496, + "loss": 1.663, + "nll_loss": 2.1507182121276855, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07622268795967102, - "rewards/margins": -0.020631182938814163, - "rewards/rejected": -0.05559150502085686, + "rewards/chosen": -0.0655982717871666, + "rewards/margins": -0.016673749312758446, + "rewards/rejected": -0.0489245280623436, "step": 585 }, { "epoch": 0.4686258935663225, - "grad_norm": 50.35420608520508, - "learning_rate": 3.100566451706132e-06, - "log_odds_chosen": -0.455788791179657, - "log_odds_ratio": -0.9897588491439819, - "logits/chosen": 316.55255126953125, - "logits/rejected": 320.16485595703125, - "logps/chosen": -1.5558842420578003, - "logps/rejected": -1.1951056718826294, - "loss": 1.8191, - "nll_loss": 1.6975910663604736, + "grad_norm": 42.86177444458008, + "learning_rate": 2.058467423981546e-06, + "log_odds_chosen": -0.478046715259552, + "log_odds_ratio": -0.9956863522529602, + "logits/chosen": 311.27435302734375, + "logits/rejected": 312.9971008300781, + "logps/chosen": -1.4736350774765015, + "logps/rejected": -1.113892912864685, + "loss": 1.6741, + "nll_loss": 1.6202127933502197, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07779420912265778, - "rewards/margins": -0.018038932234048843, - "rewards/rejected": -0.05975528433918953, + "rewards/chosen": -0.07368175685405731, + "rewards/margins": -0.01798710599541664, + "rewards/rejected": -0.05569465085864067, "step": 590 }, { "epoch": 0.4725972994440032, - "grad_norm": 51.966094970703125, - "learning_rate": 3.067621912645195e-06, - "log_odds_chosen": 0.5225323438644409, - "log_odds_ratio": -0.5115421414375305, - "logits/chosen": 318.2131042480469, - "logits/rejected": 299.44970703125, - "logps/chosen": -1.1214492321014404, - "logps/rejected": -1.5328994989395142, - "loss": 1.609, - "nll_loss": 1.4310369491577148, + "grad_norm": 42.01351547241211, + "learning_rate": 2.0498001542269694e-06, + "log_odds_chosen": 0.5545082688331604, + "log_odds_ratio": -0.4980427324771881, + "logits/chosen": 314.21270751953125, + "logits/rejected": 294.3743591308594, + "logps/chosen": -1.0309841632843018, + "logps/rejected": -1.4416837692260742, + "loss": 1.5197, + "nll_loss": 1.3417736291885376, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05607246607542038, - "rewards/margins": 0.020572511479258537, - "rewards/rejected": -0.07664497196674347, + "rewards/chosen": -0.05154920741915703, + "rewards/margins": 0.020534982904791832, + "rewards/rejected": -0.07208418846130371, "step": 595 }, { "epoch": 0.4765687053216839, - "grad_norm": 43.6855354309082, - "learning_rate": 3.0345731117437636e-06, - "log_odds_chosen": -0.1032852903008461, - "log_odds_ratio": -0.8357051610946655, - "logits/chosen": 371.5491638183594, - "logits/rejected": 289.3468322753906, - "logps/chosen": -1.2020314931869507, - "logps/rejected": -1.056931734085083, - "loss": 1.8538, - "nll_loss": 2.0933589935302734, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.060101576149463654, - "rewards/margins": -0.007254990749061108, - "rewards/rejected": -0.05284658074378967, + "grad_norm": 43.628326416015625, + "learning_rate": 2.0412414523193154e-06, + "log_odds_chosen": -0.10555162280797958, + "log_odds_ratio": -0.8111637234687805, + "logits/chosen": 362.4828186035156, + "logits/rejected": 278.252685546875, + "logps/chosen": -1.1187463998794556, + "logps/rejected": -0.9830799102783203, + "loss": 1.723, + "nll_loss": 1.9726502895355225, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05593731999397278, + "rewards/margins": -0.006783320102840662, + "rewards/rejected": -0.04915400221943855, "step": 600 }, { "epoch": 0.4805401111993646, - "grad_norm": 65.06558990478516, - "learning_rate": 3.001426119466581e-06, - "log_odds_chosen": 0.4957882761955261, - "log_odds_ratio": -0.4822470247745514, - "logits/chosen": 320.2109680175781, - "logits/rejected": 344.2149353027344, - "logps/chosen": -1.1690635681152344, - "logps/rejected": -1.5483322143554688, - "loss": 1.541, - "nll_loss": 1.4401990175247192, + "grad_norm": 63.654212951660156, + "learning_rate": 2.0327890704543546e-06, + "log_odds_chosen": 0.5946463942527771, + "log_odds_ratio": -0.450082391500473, + "logits/chosen": 309.9012756347656, + "logits/rejected": 330.7868957519531, + "logps/chosen": -1.0596506595611572, + "logps/rejected": -1.5116208791732788, + "loss": 1.4556, + "nll_loss": 1.3402467966079712, "rewards/accuracies": 1.0, - "rewards/chosen": -0.05845317989587784, - "rewards/margins": 0.018963433802127838, - "rewards/rejected": -0.07741661369800568, + "rewards/chosen": -0.052982527762651443, + "rewards/margins": 0.022598514333367348, + "rewards/rejected": -0.07558103650808334, "step": 605 }, { "epoch": 0.48451151707704526, - "grad_norm": 42.35622024536133, - "learning_rate": 2.9681870243143616e-06, - "log_odds_chosen": 0.8287761807441711, - "log_odds_ratio": -0.434671550989151, - "logits/chosen": 405.56640625, - "logits/rejected": 303.19744873046875, - "logps/chosen": -1.0719507932662964, - "logps/rejected": -1.627996802330017, - "loss": 1.7037, - "nll_loss": 1.929690957069397, + "grad_norm": 43.90888214111328, + "learning_rate": 2.0244408254472904e-06, + "log_odds_chosen": 0.7587811946868896, + "log_odds_ratio": -0.43494945764541626, + "logits/chosen": 397.4610290527344, + "logits/rejected": 294.28460693359375, + "logps/chosen": -0.9713308215141296, + "logps/rejected": -1.5056118965148926, + "loss": 1.6174, + "nll_loss": 1.8379215002059937, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.053597547113895416, - "rewards/margins": 0.027802307158708572, - "rewards/rejected": -0.08139985054731369, + "rewards/chosen": -0.0485665425658226, + "rewards/margins": 0.026714056730270386, + "rewards/rejected": -0.07528059184551239, "step": 610 }, { "epoch": 0.48848292295472595, - "grad_norm": 119.18313598632812, - "learning_rate": 2.9348619317054494e-06, - "log_odds_chosen": 0.6101234555244446, - "log_odds_ratio": -0.4588192403316498, - "logits/chosen": 344.44158935546875, - "logits/rejected": 381.56640625, - "logps/chosen": -0.940362274646759, - "logps/rejected": -1.3556063175201416, - "loss": 1.7092, - "nll_loss": 1.7841463088989258, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04701811820268631, - "rewards/margins": 0.02076220139861107, - "rewards/rejected": -0.06778031587600708, + "grad_norm": 65.09957122802734, + "learning_rate": 2.0161945963637796e-06, + "log_odds_chosen": 0.7116761803627014, + "log_odds_ratio": -0.41528528928756714, + "logits/chosen": 337.7419738769531, + "logits/rejected": 375.7625427246094, + "logps/chosen": -0.8582298159599304, + "logps/rejected": -1.3130686283111572, + "loss": 1.5795, + "nll_loss": 1.6238418817520142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04291149228811264, + "rewards/margins": 0.022741934284567833, + "rewards/rejected": -0.06565342843532562, "step": 615 }, { "epoch": 0.4924543288324067, - "grad_norm": 41.579444885253906, - "learning_rate": 2.9014569628543577e-06, - "log_odds_chosen": 0.6708475947380066, - "log_odds_ratio": -0.4850080907344818, - "logits/chosen": 271.7330017089844, - "logits/rejected": 390.313232421875, - "logps/chosen": -1.4829562902450562, - "logps/rejected": -1.998425841331482, - "loss": 1.5568, - "nll_loss": 1.7184091806411743, + "grad_norm": 57.64512252807617, + "learning_rate": 2.0080483222562476e-06, + "log_odds_chosen": 0.7363397479057312, + "log_odds_ratio": -0.4678316116333008, + "logits/chosen": 260.24725341796875, + "logits/rejected": 379.7354431152344, + "logps/chosen": -1.373089075088501, + "logps/rejected": -1.958742380142212, + "loss": 1.4653, + "nll_loss": 1.5931271314620972, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07414782792329788, - "rewards/margins": 0.02577347680926323, - "rewards/rejected": -0.09992130100727081, + "rewards/chosen": -0.06865445524454117, + "rewards/margins": 0.029282670468091965, + "rewards/rejected": -0.09793712943792343, "step": 620 }, { "epoch": 0.4964257347100874, - "grad_norm": 42.15742111206055, - "learning_rate": 2.867978253647416e-06, - "log_odds_chosen": 0.20527370274066925, - "log_odds_ratio": -0.798372209072113, - "logits/chosen": 467.48321533203125, - "logits/rejected": 273.2369079589844, - "logps/chosen": -1.0584286451339722, - "logps/rejected": -1.0820119380950928, - "loss": 1.6456, - "nll_loss": 1.5411746501922607, + "grad_norm": 43.39156723022461, + "learning_rate": 2.0000000000000003e-06, + "log_odds_chosen": 0.21482162177562714, + "log_odds_ratio": -0.7532765865325928, + "logits/chosen": 456.2254333496094, + "logits/rejected": 259.1532897949219, + "logps/chosen": -0.9853864908218384, + "logps/rejected": -1.0251073837280273, + "loss": 1.5771, + "nll_loss": 1.4871981143951416, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05292143300175667, - "rewards/margins": 0.0011791624128818512, - "rewards/rejected": -0.05410059541463852, + "rewards/chosen": -0.04926932230591774, + "rewards/margins": 0.0019860477186739445, + "rewards/rejected": -0.05125536769628525, "step": 625 }, { "epoch": 0.5003971405877681, - "grad_norm": 27.90037727355957, - "learning_rate": 2.8344319535157174e-06, - "log_odds_chosen": 0.42426902055740356, - "log_odds_ratio": -0.5486973524093628, - "logits/chosen": 346.8106994628906, - "logits/rejected": 325.4391174316406, - "logps/chosen": -1.075537085533142, - "logps/rejected": -1.3731486797332764, - "loss": 1.4489, - "nll_loss": 1.245110034942627, + "grad_norm": 27.689453125, + "learning_rate": 1.9920476822239895e-06, + "log_odds_chosen": 0.5083667039871216, + "log_odds_ratio": -0.5167660713195801, + "logits/chosen": 340.188232421875, + "logits/rejected": 316.4561462402344, + "logps/chosen": -1.0205366611480713, + "logps/rejected": -1.3779761791229248, + "loss": 1.3601, + "nll_loss": 1.174508810043335, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053776852786540985, - "rewards/margins": 0.014880577102303505, - "rewards/rejected": -0.06865743547677994, + "rewards/chosen": -0.051026832312345505, + "rewards/margins": 0.017871975898742676, + "rewards/rejected": -0.06889880448579788, "step": 630 }, { "epoch": 0.5043685464654488, - "grad_norm": 73.80306243896484, - "learning_rate": 2.800824224305584e-06, - "log_odds_chosen": 0.22638097405433655, - "log_odds_ratio": -0.6381832361221313, - "logits/chosen": 359.15911865234375, - "logits/rejected": 273.5746154785156, - "logps/chosen": -1.2116836309432983, - "logps/rejected": -1.3346078395843506, - "loss": 1.6291, - "nll_loss": 1.5670521259307861, + "grad_norm": 64.66783905029297, + "learning_rate": 1.9841894753313627e-06, + "log_odds_chosen": 0.32054659724235535, + "log_odds_ratio": -0.5909343361854553, + "logits/chosen": 350.8283386230469, + "logits/rejected": 264.38421630859375, + "logps/chosen": -1.0457651615142822, + "logps/rejected": -1.2276270389556885, + "loss": 1.5451, + "nll_loss": 1.4288588762283325, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.060584187507629395, - "rewards/margins": 0.006146208383142948, - "rewards/rejected": -0.06673039495944977, + "rewards/chosen": -0.052288252860307693, + "rewards/margins": 0.009093107655644417, + "rewards/rejected": -0.06138136237859726, "step": 635 }, { "epoch": 0.5083399523431295, - "grad_norm": 49.89323043823242, - "learning_rate": 2.76716123914674e-06, - "log_odds_chosen": 0.07012102752923965, - "log_odds_ratio": -0.7102433443069458, - "logits/chosen": 303.50506591796875, - "logits/rejected": 324.7231750488281, - "logps/chosen": -1.0607610940933228, - "logps/rejected": -1.0976377725601196, - "loss": 1.6129, - "nll_loss": 1.4195703268051147, + "grad_norm": 55.53464889526367, + "learning_rate": 1.976423537605237e-06, + "log_odds_chosen": 0.05188782140612602, + "log_odds_ratio": -0.7150664925575256, + "logits/chosen": 299.6695861816406, + "logits/rejected": 320.55572509765625, + "logps/chosen": -0.9817999005317688, + "logps/rejected": -1.0104490518569946, + "loss": 1.5411, + "nll_loss": 1.3620450496673584, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05303805321455002, - "rewards/margins": 0.0018438354600220919, - "rewards/rejected": -0.05488189309835434, + "rewards/chosen": -0.04908999055624008, + "rewards/margins": 0.0014324591029435396, + "rewards/rejected": -0.05052245408296585, "step": 640 }, { "epoch": 0.5123113582208102, - "grad_norm": 46.98134994506836, - "learning_rate": 2.7334491813184276e-06, - "log_odds_chosen": 0.28301459550857544, - "log_odds_ratio": -0.5637701749801636, - "logits/chosen": 274.5257568359375, - "logits/rejected": 413.9043884277344, - "logps/chosen": -0.8029153943061829, - "logps/rejected": -0.9688779711723328, - "loss": 1.6859, - "nll_loss": 1.5517845153808594, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04014577344059944, - "rewards/margins": 0.008298131637275219, - "rewards/rejected": -0.04844390228390694, + "grad_norm": 33.685333251953125, + "learning_rate": 1.9687480773953947e-06, + "log_odds_chosen": 0.3233865797519684, + "log_odds_ratio": -0.548931360244751, + "logits/chosen": 274.61444091796875, + "logits/rejected": 411.7635803222656, + "logps/chosen": -0.7365684509277344, + "logps/rejected": -0.9102706909179688, + "loss": 1.6014, + "nll_loss": 1.4943163394927979, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0368284247815609, + "rewards/margins": 0.008685111068189144, + "rewards/rejected": -0.04551353678107262, "step": 645 }, { "epoch": 0.5162827640984908, - "grad_norm": 26.37677764892578, - "learning_rate": 2.6996942431136466e-06, - "log_odds_chosen": 0.342237263917923, - "log_odds_ratio": -0.613301694393158, - "logits/chosen": 273.4066162109375, - "logits/rejected": 396.9400939941406, - "logps/chosen": -0.9960271716117859, - "logps/rejected": -1.2160321474075317, - "loss": 1.3995, - "nll_loss": 1.4034605026245117, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.049801357090473175, - "rewards/margins": 0.011000247672200203, - "rewards/rejected": -0.06080160662531853, + "grad_norm": 44.73686599731445, + "learning_rate": 1.961161351381841e-06, + "log_odds_chosen": 0.4027363359928131, + "log_odds_ratio": -0.6090582013130188, + "logits/chosen": 262.2003173828125, + "logits/rejected": 391.3258972167969, + "logps/chosen": -0.9413054585456848, + "logps/rejected": -1.1956028938293457, + "loss": 1.3514, + "nll_loss": 1.3425610065460205, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04706526920199394, + "rewards/margins": 0.012714875862002373, + "rewards/rejected": -0.059780143201351166, "step": 650 }, { "epoch": 0.5202541699761716, - "grad_norm": 46.33094024658203, - "learning_rate": 2.6659026247017418e-06, - "log_odds_chosen": 0.3311120867729187, - "log_odds_ratio": -0.7188401222229004, - "logits/chosen": 384.7994079589844, - "logits/rejected": 300.3538818359375, - "logps/chosen": -1.2231776714324951, - "logps/rejected": -1.4374616146087646, - "loss": 1.5018, - "nll_loss": 1.7496258020401, + "grad_norm": 64.39408874511719, + "learning_rate": 1.953661662911409e-06, + "log_odds_chosen": 0.3183760643005371, + "log_odds_ratio": -0.700135350227356, + "logits/chosen": 379.3218078613281, + "logits/rejected": 293.6615905761719, + "logps/chosen": -1.1582627296447754, + "logps/rejected": -1.3677259683609009, + "loss": 1.4144, + "nll_loss": 1.6593866348266602, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06115889549255371, - "rewards/margins": 0.01071419008076191, - "rewards/rejected": -0.07187308371067047, + "rewards/chosen": -0.05791313573718071, + "rewards/margins": 0.010473157279193401, + "rewards/rejected": -0.06838629394769669, "step": 655 }, { "epoch": 0.5242255758538522, - "grad_norm": 70.27367401123047, - "learning_rate": 2.6320805329895495e-06, - "log_odds_chosen": 0.4950195848941803, - "log_odds_ratio": -0.6416879296302795, - "logits/chosen": 338.72930908203125, - "logits/rejected": 325.9527282714844, - "logps/chosen": -1.1382685899734497, - "logps/rejected": -1.4173352718353271, - "loss": 1.6332, - "nll_loss": 1.4613453149795532, + "grad_norm": 66.78807830810547, + "learning_rate": 1.9462473604038077e-06, + "log_odds_chosen": 0.5076483488082886, + "log_odds_ratio": -0.6378599405288696, + "logits/chosen": 338.880859375, + "logits/rejected": 322.9387512207031, + "logps/chosen": -1.0433294773101807, + "logps/rejected": -1.3180233240127563, + "loss": 1.5652, + "nll_loss": 1.3921977281570435, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.056913428008556366, - "rewards/margins": 0.01395334117114544, - "rewards/rejected": -0.07086677849292755, + "rewards/chosen": -0.052166469395160675, + "rewards/margins": 0.013734695501625538, + "rewards/rejected": -0.06590116769075394, "step": 660 }, { "epoch": 0.528196981731533, - "grad_norm": 32.2733039855957, - "learning_rate": 2.5982341804812946e-06, - "log_odds_chosen": 0.3531644642353058, - "log_odds_ratio": -0.5361741781234741, - "logits/chosen": 295.8695068359375, - "logits/rejected": 391.4307556152344, - "logps/chosen": -1.4157756567001343, - "logps/rejected": -1.6907964944839478, - "loss": 1.6299, - "nll_loss": 1.7296804189682007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07078878581523895, - "rewards/margins": 0.01375104021281004, - "rewards/rejected": -0.08453982323408127, + "grad_norm": 36.77963638305664, + "learning_rate": 1.938916835823703e-06, + "log_odds_chosen": 0.31942346692085266, + "log_odds_ratio": -0.550063967704773, + "logits/chosen": 291.7728576660156, + "logits/rejected": 386.6041564941406, + "logps/chosen": -1.384905219078064, + "logps/rejected": -1.6302177906036377, + "loss": 1.5694, + "nll_loss": 1.6492725610733032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06924526393413544, + "rewards/margins": 0.012265628203749657, + "rewards/rejected": -0.08151089400053024, "step": 665 }, { "epoch": 0.5321683876092137, - "grad_norm": 42.08566665649414, - "learning_rate": 2.5643697841374722e-06, - "log_odds_chosen": 0.050149548798799515, - "log_odds_ratio": -0.7359659075737, - "logits/chosen": 320.44049072265625, - "logits/rejected": 269.22320556640625, - "logps/chosen": -1.551636815071106, - "logps/rejected": -1.6407535076141357, - "loss": 1.8105, - "nll_loss": 1.6291691064834595, + "grad_norm": 39.7949333190918, + "learning_rate": 1.9316685232156397e-06, + "log_odds_chosen": 0.03977243974804878, + "log_odds_ratio": -0.7474034428596497, + "logits/chosen": 319.749267578125, + "logits/rejected": 268.3997497558594, + "logps/chosen": -1.4536529779434204, + "logps/rejected": -1.5424761772155762, + "loss": 1.7192, + "nll_loss": 1.524833083152771, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07758183777332306, - "rewards/margins": 0.004455844406038523, - "rewards/rejected": -0.08203768730163574, + "rewards/chosen": -0.07268264889717102, + "rewards/margins": 0.004441158380359411, + "rewards/rejected": -0.07712380588054657, "step": 670 }, { "epoch": 0.5361397934868943, - "grad_norm": 52.635318756103516, - "learning_rate": 2.5304935642329e-06, - "log_odds_chosen": 0.18347088992595673, - "log_odds_ratio": -0.621238112449646, - "logits/chosen": 344.8230895996094, - "logits/rejected": 346.5325927734375, - "logps/chosen": -1.3301855325698853, - "logps/rejected": -1.4845483303070068, - "loss": 1.7328, - "nll_loss": 1.6084213256835938, + "grad_norm": 47.65932846069336, + "learning_rate": 1.924500897298753e-06, + "log_odds_chosen": 0.24133019149303436, + "log_odds_ratio": -0.5948769450187683, + "logits/chosen": 342.0313415527344, + "logits/rejected": 344.55010986328125, + "logps/chosen": -1.2440179586410522, + "logps/rejected": -1.4411834478378296, + "loss": 1.6389, + "nll_loss": 1.5813580751419067, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06650927662849426, - "rewards/margins": 0.007718136068433523, - "rewards/rejected": -0.07422741502523422, + "rewards/chosen": -0.06220090389251709, + "rewards/margins": 0.009858268313109875, + "rewards/rejected": -0.07205917686223984, "step": 675 }, { "epoch": 0.5401111993645751, - "grad_norm": 43.74726486206055, - "learning_rate": 2.4966117432141726e-06, - "log_odds_chosen": 0.24497541785240173, - "log_odds_ratio": -0.6605706810951233, - "logits/chosen": 298.27667236328125, - "logits/rejected": 422.50897216796875, - "logps/chosen": -0.9108420610427856, - "logps/rejected": -1.1715190410614014, - "loss": 1.6037, - "nll_loss": 1.2029683589935303, + "grad_norm": 58.68126678466797, + "learning_rate": 1.917412472118426e-06, + "log_odds_chosen": 0.30001306533813477, + "log_odds_ratio": -0.6535278558731079, + "logits/chosen": 293.0940856933594, + "logits/rejected": 417.0559997558594, + "logps/chosen": -0.8398979902267456, + "logps/rejected": -1.1216331720352173, + "loss": 1.4459, + "nll_loss": 1.1451408863067627, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04554210603237152, - "rewards/margins": 0.013033849187195301, - "rewards/rejected": -0.05857595056295395, + "rewards/chosen": -0.04199490323662758, + "rewards/margins": 0.014086750335991383, + "rewards/rejected": -0.05608165264129639, "step": 680 }, { "epoch": 0.5440826052422557, - "grad_norm": 64.52481079101562, - "learning_rate": 2.4627305445567048e-06, - "log_odds_chosen": 0.09129991382360458, - "log_odds_ratio": -0.6612669229507446, - "logits/chosen": 390.5311279296875, - "logits/rejected": 266.16888427734375, - "logps/chosen": -1.044427752494812, - "logps/rejected": -1.137475848197937, - "loss": 1.4905, - "nll_loss": 1.4350093603134155, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0522213876247406, - "rewards/margins": 0.0046524060890078545, - "rewards/rejected": -0.05687378719449043, + "grad_norm": 63.648494720458984, + "learning_rate": 1.9104017997521752e-06, + "log_odds_chosen": 0.20501708984375, + "log_odds_ratio": -0.6081960201263428, + "logits/chosen": 390.64794921875, + "logits/rejected": 267.46661376953125, + "logps/chosen": -0.8991649746894836, + "logps/rejected": -1.0455691814422607, + "loss": 1.4251, + "nll_loss": 1.3118441104888916, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.044958241283893585, + "rewards/margins": 0.0073202140629291534, + "rewards/rejected": -0.05227845907211304, "step": 685 }, { "epoch": 0.5480540111199365, - "grad_norm": 50.33750534057617, - "learning_rate": 2.428856191621596e-06, - "log_odds_chosen": -0.4379616677761078, - "log_odds_ratio": -0.9384675025939941, - "logits/chosen": 295.41632080078125, - "logits/rejected": 335.3627624511719, - "logps/chosen": -1.5291237831115723, - "logps/rejected": -1.2289823293685913, - "loss": 1.5525, - "nll_loss": 1.6753852367401123, + "grad_norm": 51.855377197265625, + "learning_rate": 1.9034674690672024e-06, + "log_odds_chosen": -0.4093741476535797, + "log_odds_ratio": -0.9202106595039368, + "logits/chosen": 294.8217468261719, + "logits/rejected": 336.0784912109375, + "logps/chosen": -1.4474905729293823, + "logps/rejected": -1.1725715398788452, + "loss": 1.4859, + "nll_loss": 1.618402123451233, "rewards/accuracies": 0.0, - "rewards/chosen": -0.07645618915557861, - "rewards/margins": -0.015007075853645802, - "rewards/rejected": -0.061449117958545685, + "rewards/chosen": -0.07237453758716583, + "rewards/margins": -0.01374595146626234, + "rewards/rejected": -0.05862858146429062, "step": 690 }, { "epoch": 0.5520254169976172, - "grad_norm": 62.857566833496094, - "learning_rate": 2.3949949065125107e-06, - "log_odds_chosen": -0.1392946094274521, - "log_odds_ratio": -0.8439092636108398, - "logits/chosen": 298.41705322265625, - "logits/rejected": 304.42584228515625, - "logps/chosen": -1.1663509607315063, - "logps/rejected": -1.0636799335479736, - "loss": 1.3371, - "nll_loss": 1.299024224281311, + "grad_norm": 73.54180908203125, + "learning_rate": 1.8966081045272043e-06, + "log_odds_chosen": -0.12494969367980957, + "log_odds_ratio": -0.8439692258834839, + "logits/chosen": 293.6024475097656, + "logits/rejected": 299.53753662109375, + "logps/chosen": -1.091850996017456, + "logps/rejected": -1.0055066347122192, + "loss": 1.2878, + "nll_loss": 1.2297693490982056, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05831754207611084, - "rewards/margins": -0.005133545026183128, - "rewards/rejected": -0.05318400263786316, + "rewards/chosen": -0.0545925572514534, + "rewards/margins": -0.004317224025726318, + "rewards/rejected": -0.050275325775146484, "step": 695 }, { "epoch": 0.5559968228752978, - "grad_norm": 36.2435417175293, - "learning_rate": 2.3611529089327893e-06, - "log_odds_chosen": 0.14748263359069824, - "log_odds_ratio": -0.6694773435592651, - "logits/chosen": 278.55145263671875, - "logits/rejected": 415.44378662109375, - "logps/chosen": -1.3171262741088867, - "logps/rejected": -1.4007153511047363, - "loss": 1.4775, - "nll_loss": 1.3410961627960205, + "grad_norm": 39.061195373535156, + "learning_rate": 1.8898223650461362e-06, + "log_odds_chosen": 0.16092145442962646, + "log_odds_ratio": -0.6716988682746887, + "logits/chosen": 274.28326416015625, + "logits/rejected": 411.262451171875, + "logps/chosen": -1.2318370342254639, + "logps/rejected": -1.321478009223938, + "loss": 1.4147, + "nll_loss": 1.2553250789642334, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06585632264614105, - "rewards/margins": 0.004179453942924738, - "rewards/rejected": -0.07003577053546906, + "rewards/chosen": -0.061591845005750656, + "rewards/margins": 0.004482048097997904, + "rewards/rejected": -0.06607390195131302, "step": 700 }, { "epoch": 0.5599682287529786, - "grad_norm": 47.188453674316406, - "learning_rate": 2.327336415043006e-06, - "log_odds_chosen": 0.15575894713401794, - "log_odds_ratio": -0.6959660649299622, - "logits/chosen": 335.40582275390625, - "logits/rejected": 465.32843017578125, - "logps/chosen": -1.123983383178711, - "logps/rejected": -1.2077070474624634, - "loss": 1.5045, - "nll_loss": 1.2318366765975952, + "grad_norm": 59.83469009399414, + "learning_rate": 1.8831089428867739e-06, + "log_odds_chosen": 0.13619789481163025, + "log_odds_ratio": -0.6964500546455383, + "logits/chosen": 330.43310546875, + "logits/rejected": 458.30767822265625, + "logps/chosen": -1.0438668727874756, + "logps/rejected": -1.1042603254318237, + "loss": 1.3856, + "nll_loss": 1.1628286838531494, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.056199170649051666, - "rewards/margins": 0.004186179488897324, - "rewards/rejected": -0.06038535386323929, + "rewards/chosen": -0.05219334363937378, + "rewards/margins": 0.003019676310941577, + "rewards/rejected": -0.055213022977113724, "step": 705 }, { "epoch": 0.5639396346306592, - "grad_norm": 33.867916107177734, - "learning_rate": 2.2935516363191695e-06, - "log_odds_chosen": -0.3543465733528137, - "log_odds_ratio": -0.9505079388618469, - "logits/chosen": 280.46343994140625, - "logits/rejected": 292.0199279785156, - "logps/chosen": -1.207983374595642, - "logps/rejected": -0.9264053106307983, - "loss": 1.5108, - "nll_loss": 1.4482814073562622, + "grad_norm": 33.04471206665039, + "learning_rate": 1.876466562602004e-06, + "log_odds_chosen": -0.26594752073287964, + "log_odds_ratio": -0.9066311120986938, + "logits/chosen": 269.53826904296875, + "logits/rejected": 281.7684631347656, + "logps/chosen": -1.1670176982879639, + "logps/rejected": -0.9401714205741882, + "loss": 1.4375, + "nll_loss": 1.4048278331756592, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.060399167239665985, - "rewards/margins": -0.014078897424042225, - "rewards/rejected": -0.046320270746946335, + "rewards/chosen": -0.05835089087486267, + "rewards/margins": -0.011342315934598446, + "rewards/rejected": -0.04700857400894165, "step": 710 }, { "epoch": 0.56791104050834, - "grad_norm": 71.63556671142578, - "learning_rate": 2.259804778411786e-06, - "log_odds_chosen": 0.571071982383728, - "log_odds_ratio": -0.4987887740135193, - "logits/chosen": 361.73345947265625, - "logits/rejected": 394.4234313964844, - "logps/chosen": -0.8434446454048157, - "logps/rejected": -1.139594554901123, - "loss": 1.5099, - "nll_loss": 1.2623536586761475, + "grad_norm": 78.54840087890625, + "learning_rate": 1.8698939800169145e-06, + "log_odds_chosen": 0.5749568939208984, + "log_odds_ratio": -0.48962122201919556, + "logits/chosen": 352.38568115234375, + "logits/rejected": 383.9367980957031, + "logps/chosen": -0.8274946212768555, + "logps/rejected": -1.1295303106307983, + "loss": 1.3946, + "nll_loss": 1.2294576168060303, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.042172230780124664, - "rewards/margins": 0.014807499945163727, - "rewards/rejected": -0.05697972699999809, + "rewards/chosen": -0.04137473553419113, + "rewards/margins": 0.015101781114935875, + "rewards/rejected": -0.05647651478648186, "step": 715 }, { "epoch": 0.5718824463860207, - "grad_norm": 31.155038833618164, - "learning_rate": 2.2261020400059986e-06, - "log_odds_chosen": 0.15770220756530762, - "log_odds_ratio": -0.6557624340057373, - "logits/chosen": 311.5298156738281, - "logits/rejected": 288.30810546875, - "logps/chosen": -1.1949565410614014, - "logps/rejected": -1.3707664012908936, - "loss": 1.4682, - "nll_loss": 1.4724897146224976, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05974782258272171, - "rewards/margins": 0.008790492080152035, - "rewards/rejected": -0.06853832304477692, + "grad_norm": 32.714805603027344, + "learning_rate": 1.863389981249825e-06, + "log_odds_chosen": 0.10385574400424957, + "log_odds_ratio": -0.6922389268875122, + "logits/chosen": 301.9817810058594, + "logits/rejected": 279.5206604003906, + "logps/chosen": -1.1496083736419678, + "logps/rejected": -1.2928965091705322, + "loss": 1.3971, + "nll_loss": 1.4229730367660522, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05748041719198227, + "rewards/margins": 0.00716440100222826, + "rewards/rejected": -0.06464481353759766, "step": 720 }, { "epoch": 0.5758538522637013, - "grad_norm": 32.814144134521484, - "learning_rate": 2.1924496116829996e-06, - "log_odds_chosen": 0.4697895050048828, - "log_odds_ratio": -0.6313791871070862, - "logits/chosen": 327.9530334472656, - "logits/rejected": 266.0186462402344, - "logps/chosen": -1.1744751930236816, - "logps/rejected": -1.497837781906128, - "loss": 1.4959, - "nll_loss": 1.3718366622924805, + "grad_norm": 41.090572357177734, + "learning_rate": 1.8569533817705187e-06, + "log_odds_chosen": 0.5001566410064697, + "log_odds_ratio": -0.6019404530525208, + "logits/chosen": 322.370361328125, + "logits/rejected": 263.5240173339844, + "logps/chosen": -1.0987929105758667, + "logps/rejected": -1.4239269495010376, + "loss": 1.4352, + "nll_loss": 1.3209176063537598, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05872376635670662, - "rewards/margins": 0.016168128699064255, - "rewards/rejected": -0.07489189505577087, + "rewards/chosen": -0.05493964999914169, + "rewards/margins": 0.016256701201200485, + "rewards/rejected": -0.07119635492563248, "step": 725 }, { "epoch": 0.5798252581413821, - "grad_norm": 48.00249099731445, - "learning_rate": 2.158853674782928e-06, - "log_odds_chosen": -0.15365850925445557, - "log_odds_ratio": -1.1835048198699951, - "logits/chosen": 280.81500244140625, - "logits/rejected": 461.069580078125, - "logps/chosen": -1.4112962484359741, - "logps/rejected": -1.6498725414276123, - "loss": 1.4424, - "nll_loss": 1.5177103281021118, + "grad_norm": 43.46686935424805, + "learning_rate": 1.8505830254940132e-06, + "log_odds_chosen": -0.10105061531066895, + "log_odds_ratio": -1.1564183235168457, + "logits/chosen": 275.8767395019531, + "logits/rejected": 457.92132568359375, + "logps/chosen": -1.3419158458709717, + "logps/rejected": -1.6229051351547241, + "loss": 1.3517, + "nll_loss": 1.4537432193756104, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07056482136249542, - "rewards/margins": 0.011928820051252842, - "rewards/rejected": -0.08249364048242569, + "rewards/chosen": -0.0670957863330841, + "rewards/margins": 0.0140494704246521, + "rewards/rejected": -0.0811452642083168, "step": 730 }, { "epoch": 0.5837966640190627, - "grad_norm": 38.61474609375, - "learning_rate": 2.1253204002694777e-06, - "log_odds_chosen": 0.62263023853302, - "log_odds_ratio": -0.541793704032898, - "logits/chosen": 304.44573974609375, - "logits/rejected": 331.6595458984375, - "logps/chosen": -1.1065720319747925, - "logps/rejected": -1.4321861267089844, - "loss": 1.5616, - "nll_loss": 1.639901876449585, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.055328596383333206, - "rewards/margins": 0.01628071442246437, - "rewards/rejected": -0.07160931080579758, + "grad_norm": 47.49483871459961, + "learning_rate": 1.8442777839082938e-06, + "log_odds_chosen": 0.5534607768058777, + "log_odds_ratio": -0.5553954243659973, + "logits/chosen": 300.9742126464844, + "logits/rejected": 328.1264953613281, + "logps/chosen": -1.0749738216400146, + "logps/rejected": -1.3655563592910767, + "loss": 1.4955, + "nll_loss": 1.5789165496826172, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05374868959188461, + "rewards/margins": 0.01452912949025631, + "rewards/rejected": -0.06827782094478607, "step": 735 }, { "epoch": 0.5877680698967435, - "grad_norm": 39.873226165771484, - "learning_rate": 2.091855947596401e-06, - "log_odds_chosen": -0.07302029430866241, - "log_odds_ratio": -0.8576405644416809, - "logits/chosen": 356.81842041015625, - "logits/rejected": 376.3813781738281, - "logps/chosen": -1.1168615818023682, - "logps/rejected": -1.0692174434661865, - "loss": 1.4753, - "nll_loss": 1.271024465560913, + "grad_norm": 47.386417388916016, + "learning_rate": 1.8380365552345197e-06, + "log_odds_chosen": -0.17396871745586395, + "log_odds_ratio": -0.9099219441413879, + "logits/chosen": 353.1138610839844, + "logits/rejected": 372.53668212890625, + "logps/chosen": -1.097666621208191, + "logps/rejected": -0.9902673959732056, + "loss": 1.4205, + "nll_loss": 1.2607418298721313, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05584307760000229, - "rewards/margins": -0.0023822046350687742, - "rewards/rejected": -0.053460873663425446, + "rewards/chosen": -0.05488333851099014, + "rewards/margins": -0.005369964987039566, + "rewards/rejected": -0.04951336979866028, "step": 740 }, { "epoch": 0.5917394757744241, - "grad_norm": 54.21368408203125, - "learning_rate": 2.058466463576124e-06, - "log_odds_chosen": -0.6579837203025818, - "log_odds_ratio": -1.344585657119751, - "logits/chosen": 289.74786376953125, - "logits/rejected": 320.3457946777344, - "logps/chosen": -1.8980462551116943, - "logps/rejected": -1.3352105617523193, - "loss": 1.8089, - "nll_loss": 1.8561862707138062, + "grad_norm": 61.877071380615234, + "learning_rate": 1.8318582636182793e-06, + "log_odds_chosen": -0.670925498008728, + "log_odds_ratio": -1.3397860527038574, + "logits/chosen": 281.83233642578125, + "logits/rejected": 313.679443359375, + "logps/chosen": -1.7743791341781616, + "logps/rejected": -1.2447985410690308, + "loss": 1.7376, + "nll_loss": 1.7562745809555054, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.09490232169628143, - "rewards/margins": -0.02814178541302681, - "rewards/rejected": -0.06676053255796432, + "rewards/chosen": -0.0887189581990242, + "rewards/margins": -0.026479026302695274, + "rewards/rejected": -0.062239933758974075, "step": 745 }, { "epoch": 0.5957108816521048, - "grad_norm": 33.83228302001953, - "learning_rate": 2.0251580812506938e-06, - "log_odds_chosen": -0.6078636050224304, - "log_odds_ratio": -1.1107518672943115, - "logits/chosen": 339.91595458984375, - "logits/rejected": 320.0901794433594, - "logps/chosen": -1.4174226522445679, - "logps/rejected": -1.0588816404342651, - "loss": 1.4017, - "nll_loss": 1.365027904510498, + "grad_norm": 34.587196350097656, + "learning_rate": 1.8257418583505536e-06, + "log_odds_chosen": -0.6947991251945496, + "log_odds_ratio": -1.1835838556289673, + "logits/chosen": 328.31439208984375, + "logits/rejected": 305.95892333984375, + "logps/chosen": -1.4247385263442993, + "logps/rejected": -1.009553074836731, + "loss": 1.3579, + "nll_loss": 1.3562877178192139, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07087112963199615, - "rewards/margins": -0.017927050590515137, - "rewards/rejected": -0.052944086492061615, + "rewards/chosen": -0.07123693078756332, + "rewards/margins": -0.020759278908371925, + "rewards/rejected": -0.05047765374183655, "step": 750 }, { "epoch": 0.5996822875297856, - "grad_norm": 37.01515579223633, - "learning_rate": 1.9919369187652483e-06, - "log_odds_chosen": -0.07303062826395035, - "log_odds_ratio": -0.7524539232254028, - "logits/chosen": 389.5413513183594, - "logits/rejected": 321.3714294433594, - "logps/chosen": -1.183814287185669, - "logps/rejected": -1.1148512363433838, - "loss": 1.619, - "nll_loss": 1.339825987815857, + "grad_norm": 48.92048645019531, + "learning_rate": 1.8196863131170976e-06, + "log_odds_chosen": -0.017104322090744972, + "log_odds_ratio": -0.7284756898880005, + "logits/chosen": 381.6023254394531, + "logits/rejected": 315.03009033203125, + "logps/chosen": -1.0912220478057861, + "logps/rejected": -1.0588710308074951, + "loss": 1.5279, + "nll_loss": 1.2532793283462524, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05919071286916733, - "rewards/margins": -0.003448158036917448, - "rewards/rejected": -0.05574256181716919, + "rewards/chosen": -0.05456110090017319, + "rewards/margins": -0.0016175527125597, + "rewards/rejected": -0.052943550050258636, "step": 755 }, { "epoch": 0.6036536934074662, - "grad_norm": 42.6160888671875, - "learning_rate": 1.9588090782442257e-06, - "log_odds_chosen": -0.02819465473294258, - "log_odds_ratio": -0.7264882922172546, - "logits/chosen": 313.9991760253906, - "logits/rejected": 334.552978515625, - "logps/chosen": -1.269689917564392, - "logps/rejected": -1.2629872560501099, - "loss": 1.5967, - "nll_loss": 1.4182217121124268, + "grad_norm": 60.20802688598633, + "learning_rate": 1.8136906252750293e-06, + "log_odds_chosen": -0.11121414601802826, + "log_odds_ratio": -0.7645635604858398, + "logits/chosen": 305.519287109375, + "logits/rejected": 327.36456298828125, + "logps/chosen": -1.2993478775024414, + "logps/rejected": -1.2332274913787842, + "loss": 1.5179, + "nll_loss": 1.4147006273269653, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06348450481891632, - "rewards/margins": -0.0003351382911205292, - "rewards/rejected": -0.0631493628025055, + "rewards/chosen": -0.06496739387512207, + "rewards/margins": -0.003306013997644186, + "rewards/rejected": -0.061661381274461746, "step": 760 }, { "epoch": 0.607625099285147, - "grad_norm": 45.021705627441406, - "learning_rate": 1.9257806446705116e-06, - "log_odds_chosen": -0.48236551880836487, - "log_odds_ratio": -1.207824468612671, - "logits/chosen": 385.98260498046875, - "logits/rejected": 320.9479675292969, - "logps/chosen": -1.317742109298706, - "logps/rejected": -0.8753318786621094, - "loss": 1.6117, - "nll_loss": 1.4493136405944824, + "grad_norm": 56.269866943359375, + "learning_rate": 1.807753815155468e-06, + "log_odds_chosen": -0.48918837308883667, + "log_odds_ratio": -1.195225477218628, + "logits/chosen": 376.7154846191406, + "logits/rejected": 306.25811767578125, + "logps/chosen": -1.2335858345031738, + "logps/rejected": -0.8009617924690247, + "loss": 1.5646, + "nll_loss": 1.3844496011734009, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06588710099458694, - "rewards/margins": -0.022120505571365356, - "rewards/rejected": -0.04376659542322159, + "rewards/chosen": -0.06167929247021675, + "rewards/margins": -0.02163120172917843, + "rewards/rejected": -0.04004809260368347, "step": 765 }, { "epoch": 0.6115965051628276, - "grad_norm": 45.63967514038086, - "learning_rate": 1.8928576847677404e-06, - "log_odds_chosen": 0.15900571644306183, - "log_odds_ratio": -0.71577388048172, - "logits/chosen": 349.0858459472656, - "logits/rejected": 374.12872314453125, - "logps/chosen": -0.9837914705276489, - "logps/rejected": -0.9930256009101868, - "loss": 1.8407, - "nll_loss": 1.8363087177276611, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.049189578741788864, - "rewards/margins": 0.0004617050290107727, - "rewards/rejected": -0.04965128004550934, + "grad_norm": 44.323631286621094, + "learning_rate": 1.801874925391118e-06, + "log_odds_chosen": 0.21221765875816345, + "log_odds_ratio": -0.6802242398262024, + "logits/chosen": 334.3392639160156, + "logits/rejected": 365.3385009765625, + "logps/chosen": -0.9386296272277832, + "logps/rejected": -1.0058612823486328, + "loss": 1.7071, + "nll_loss": 1.7459404468536377, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04693147912621498, + "rewards/margins": 0.0033615841530263424, + "rewards/rejected": -0.05029306560754776, "step": 770 }, { "epoch": 0.6155679110405083, - "grad_norm": 49.38869094848633, - "learning_rate": 1.8600462458859492e-06, - "log_odds_chosen": 0.5004615187644958, - "log_odds_ratio": -0.5722527503967285, - "logits/chosen": 338.10382080078125, - "logits/rejected": 324.82861328125, - "logps/chosen": -0.9794312715530396, - "logps/rejected": -1.2764912843704224, - "loss": 1.6962, - "nll_loss": 1.4810049533843994, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04897156357765198, - "rewards/margins": 0.014853003434836864, - "rewards/rejected": -0.06382457166910172, + "grad_norm": 59.921302795410156, + "learning_rate": 1.7960530202677493e-06, + "log_odds_chosen": 0.5171085000038147, + "log_odds_ratio": -0.5760589241981506, + "logits/chosen": 329.0060119628906, + "logits/rejected": 318.23797607421875, + "logps/chosen": -0.930219292640686, + "logps/rejected": -1.2369699478149414, + "loss": 1.5969, + "nll_loss": 1.449138879776001, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0465109646320343, + "rewards/margins": 0.015337531454861164, + "rewards/rejected": -0.06184849888086319, "step": 775 }, { "epoch": 0.6195393169181891, - "grad_norm": 55.28865432739258, - "learning_rate": 1.8273523548907867e-06, - "log_odds_chosen": 0.8983039855957031, - "log_odds_ratio": -0.7501333951950073, - "logits/chosen": 323.56842041015625, - "logits/rejected": 315.9183654785156, - "logps/chosen": -1.0870964527130127, - "logps/rejected": -1.8820436000823975, - "loss": 1.7622, - "nll_loss": 1.5614144802093506, + "grad_norm": 59.61544418334961, + "learning_rate": 1.7902871850985824e-06, + "log_odds_chosen": 0.9015772938728333, + "log_odds_ratio": -0.7340617179870605, + "logits/chosen": 311.7084045410156, + "logits/rejected": 305.33599853515625, + "logps/chosen": -0.9682513475418091, + "logps/rejected": -1.7009541988372803, + "loss": 1.695, + "nll_loss": 1.4785038232803345, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.054354824125766754, - "rewards/margins": 0.03974735736846924, - "rewards/rejected": -0.09410218894481659, + "rewards/chosen": -0.048412568867206573, + "rewards/margins": 0.036635152995586395, + "rewards/rejected": -0.08504771441221237, "step": 780 }, { "epoch": 0.6235107227958697, - "grad_norm": 38.31996154785156, - "learning_rate": 1.7947820170564897e-06, - "log_odds_chosen": 1.0626842975616455, - "log_odds_ratio": -0.44282132387161255, - "logits/chosen": 351.11236572265625, - "logits/rejected": 300.69366455078125, - "logps/chosen": -1.2781195640563965, - "logps/rejected": -2.0489819049835205, - "loss": 1.4668, - "nll_loss": 1.7934401035308838, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0639059767127037, - "rewards/margins": 0.03854311257600784, - "rewards/rejected": -0.10244909673929214, + "grad_norm": 44.869529724121094, + "learning_rate": 1.7845765256206243e-06, + "log_odds_chosen": 1.0826823711395264, + "log_odds_ratio": -0.4545539319515228, + "logits/chosen": 345.8677062988281, + "logits/rejected": 296.6705322265625, + "logps/chosen": -1.112697958946228, + "logps/rejected": -1.967371940612793, + "loss": 1.4201, + "nll_loss": 1.7027595043182373, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05563489720225334, + "rewards/margins": 0.04273369535803795, + "rewards/rejected": -0.09836859256029129, "step": 785 }, { "epoch": 0.6274821286735505, - "grad_norm": 49.6168212890625, - "learning_rate": 1.7623412149628216e-06, - "log_odds_chosen": -0.2784636616706848, - "log_odds_ratio": -0.9712227582931519, - "logits/chosen": 294.8133239746094, - "logits/rejected": 370.743896484375, - "logps/chosen": -1.2229773998260498, - "logps/rejected": -1.0270025730133057, - "loss": 1.6546, - "nll_loss": 1.4282915592193604, + "grad_norm": 54.221893310546875, + "learning_rate": 1.7789201674120502e-06, + "log_odds_chosen": -0.34022170305252075, + "log_odds_ratio": -1.0099502801895142, + "logits/chosen": 290.6964111328125, + "logits/rejected": 367.1211853027344, + "logps/chosen": -1.201079249382019, + "logps/rejected": -0.9725133776664734, + "loss": 1.5892, + "nll_loss": 1.398667335510254, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06114886328577995, - "rewards/margins": -0.009798737242817879, - "rewards/rejected": -0.051350127905607224, + "rewards/chosen": -0.06005396693944931, + "rewards/margins": -0.011428297497332096, + "rewards/rejected": -0.04862567037343979, "step": 790 }, { "epoch": 0.6314535345512311, - "grad_norm": 28.367610931396484, - "learning_rate": 1.7300359073961834e-06, - "log_odds_chosen": 0.42548590898513794, - "log_odds_ratio": -0.5335083603858948, - "logits/chosen": 337.03436279296875, - "logits/rejected": 383.9117736816406, - "logps/chosen": -1.062105655670166, - "logps/rejected": -1.415236473083496, - "loss": 1.3773, - "nll_loss": 1.3322608470916748, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05310528352856636, - "rewards/margins": 0.017656544223427773, - "rewards/rejected": -0.07076182961463928, + "grad_norm": 33.76439666748047, + "learning_rate": 1.7733172553297718e-06, + "log_odds_chosen": 0.3912748694419861, + "log_odds_ratio": -0.555752158164978, + "logits/chosen": 332.48321533203125, + "logits/rejected": 379.13909912109375, + "logps/chosen": -1.0452964305877686, + "logps/rejected": -1.3820005655288696, + "loss": 1.3308, + "nll_loss": 1.2774577140808105, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.052264828234910965, + "rewards/margins": 0.016835201531648636, + "rewards/rejected": -0.0691000297665596, "step": 795 }, { "epoch": 0.6354249404289118, - "grad_norm": 98.34535217285156, - "learning_rate": 1.6978720282550897e-06, - "log_odds_chosen": 0.07821528613567352, - "log_odds_ratio": -0.665870726108551, - "logits/chosen": 305.55938720703125, - "logits/rejected": 395.2098388671875, - "logps/chosen": -0.9890663027763367, - "logps/rejected": -1.0237197875976562, - "loss": 1.6046, - "nll_loss": 1.4329578876495361, + "grad_norm": 112.01268768310547, + "learning_rate": 1.7677669529663689e-06, + "log_odds_chosen": 0.15832357108592987, + "log_odds_ratio": -0.6339753866195679, + "logits/chosen": 303.36553955078125, + "logits/rejected": 390.05645751953125, + "logps/chosen": -0.9267553091049194, + "logps/rejected": -1.0056819915771484, + "loss": 1.5535, + "nll_loss": 1.344155192375183, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.049453310668468475, - "rewards/margins": 0.0017326741944998503, - "rewards/rejected": -0.051185984164476395, + "rewards/chosen": -0.04633776843547821, + "rewards/margins": 0.003946331329643726, + "rewards/rejected": -0.05028409883379936, "step": 800 }, { "epoch": 0.6393963463065926, - "grad_norm": 34.472469329833984, - "learning_rate": 1.6658554854602222e-06, - "log_odds_chosen": 0.6897698640823364, - "log_odds_ratio": -0.41707152128219604, - "logits/chosen": 309.0857849121094, - "logits/rejected": 351.38153076171875, - "logps/chosen": -1.0435580015182495, - "logps/rejected": -1.5444471836090088, - "loss": 1.5348, - "nll_loss": 1.5355165004730225, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.052177898585796356, - "rewards/margins": 0.025044452399015427, - "rewards/rejected": -0.07722235471010208, + "grad_norm": 42.41415023803711, + "learning_rate": 1.7622684421256037e-06, + "log_odds_chosen": 0.885511040687561, + "log_odds_ratio": -0.3806975185871124, + "logits/chosen": 300.3759460449219, + "logits/rejected": 346.12249755859375, + "logps/chosen": -0.8861316442489624, + "logps/rejected": -1.4850671291351318, + "loss": 1.4821, + "nll_loss": 1.4846971035003662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04430658370256424, + "rewards/margins": 0.029946770519018173, + "rewards/rejected": -0.07425335794687271, "step": 805 }, { "epoch": 0.6433677521842732, - "grad_norm": 48.47563171386719, - "learning_rate": 1.6339921598692476e-06, - "log_odds_chosen": 0.11802919209003448, - "log_odds_ratio": -0.7510842084884644, - "logits/chosen": 293.94256591796875, - "logits/rejected": 311.3507995605469, - "logps/chosen": -1.730385184288025, - "logps/rejected": -1.8177807331085205, - "loss": 1.7041, - "nll_loss": 1.842095136642456, + "grad_norm": 46.314781188964844, + "learning_rate": 1.7568209223157664e-06, + "log_odds_chosen": 0.2553045153617859, + "log_odds_ratio": -0.7045444846153259, + "logits/chosen": 285.037841796875, + "logits/rejected": 303.57769775390625, + "logps/chosen": -1.5863940715789795, + "logps/rejected": -1.8081388473510742, + "loss": 1.6063, + "nll_loss": 1.7009849548339844, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.086519256234169, - "rewards/margins": 0.0043697720393538475, - "rewards/rejected": -0.09088902175426483, + "rewards/chosen": -0.07931970804929733, + "rewards/margins": 0.011087236925959587, + "rewards/rejected": -0.09040693938732147, "step": 810 }, { "epoch": 0.647339158061954, - "grad_norm": 31.41144371032715, - "learning_rate": 1.6022879041966188e-06, - "log_odds_chosen": 0.3533809185028076, - "log_odds_ratio": -0.6280057430267334, - "logits/chosen": 335.3319396972656, - "logits/rejected": 264.1844787597656, - "logps/chosen": -1.0379221439361572, - "logps/rejected": -1.2540075778961182, - "loss": 1.3573, - "nll_loss": 1.4223954677581787, + "grad_norm": 38.062042236328125, + "learning_rate": 1.751423610260147e-06, + "log_odds_chosen": 0.33337265253067017, + "log_odds_ratio": -0.6462761759757996, + "logits/chosen": 331.96063232421875, + "logits/rejected": 264.3335876464844, + "logps/chosen": -1.0078685283660889, + "logps/rejected": -1.1855480670928955, + "loss": 1.329, + "nll_loss": 1.3942029476165771, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0518961064517498, - "rewards/margins": 0.01080426573753357, - "rewards/rejected": -0.06270037591457367, + "rewards/chosen": -0.050393424928188324, + "rewards/margins": 0.00888398103415966, + "rewards/rejected": -0.059277404099702835, "step": 815 }, { "epoch": 0.6513105639396346, - "grad_norm": 44.874664306640625, - "learning_rate": 1.5707485419385293e-06, - "log_odds_chosen": -0.14202973246574402, - "log_odds_ratio": -0.8004404306411743, - "logits/chosen": 360.9006042480469, - "logits/rejected": 264.46337890625, - "logps/chosen": -1.2262499332427979, - "logps/rejected": -1.134313941001892, - "loss": 1.496, - "nll_loss": 1.3937432765960693, + "grad_norm": 48.024879455566406, + "learning_rate": 1.7460757394239458e-06, + "log_odds_chosen": -0.12280458211898804, + "log_odds_ratio": -0.7886728048324585, + "logits/chosen": 360.01708984375, + "logits/rejected": 266.03411865234375, + "logps/chosen": -1.1657707691192627, + "logps/rejected": -1.0915082693099976, + "loss": 1.4432, + "nll_loss": 1.3378136157989502, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06131250411272049, - "rewards/margins": -0.00459679588675499, - "rewards/rejected": -0.0567157045006752, + "rewards/chosen": -0.058288536965847015, + "rewards/margins": -0.003713126527145505, + "rewards/rejected": -0.05457541346549988, "step": 820 }, { "epoch": 0.6552819698173153, - "grad_norm": 36.3102912902832, - "learning_rate": 1.539379866303245e-06, - "log_odds_chosen": 0.1242959052324295, - "log_odds_ratio": -0.6936241984367371, - "logits/chosen": 281.64874267578125, - "logits/rejected": 311.4866943359375, - "logps/chosen": -1.1951546669006348, - "logps/rejected": -1.2739152908325195, - "loss": 1.5673, - "nll_loss": 1.4124051332473755, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05975773185491562, - "rewards/margins": 0.003938031382858753, - "rewards/rejected": -0.0636957660317421, + "grad_norm": 41.48712158203125, + "learning_rate": 1.7407765595569787e-06, + "log_odds_chosen": -0.009509158320724964, + "log_odds_ratio": -0.7556756734848022, + "logits/chosen": 279.5709533691406, + "logits/rejected": 309.6410217285156, + "logps/chosen": -1.2121888399124146, + "logps/rejected": -1.1930046081542969, + "loss": 1.5544, + "nll_loss": 1.386554479598999, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06060944125056267, + "rewards/margins": -0.0009592041606083512, + "rewards/rejected": -0.0596502348780632, "step": 825 }, { "epoch": 0.659253375694996, - "grad_norm": 58.44258499145508, - "learning_rate": 1.508187639147001e-06, - "log_odds_chosen": 0.12791205942630768, - "log_odds_ratio": -0.6473852396011353, - "logits/chosen": 344.5693664550781, - "logits/rejected": 367.2503662109375, - "logps/chosen": -1.0457253456115723, - "logps/rejected": -1.142913579940796, - "loss": 1.5898, - "nll_loss": 1.3428993225097656, + "grad_norm": 105.25407409667969, + "learning_rate": 1.7355253362515584e-06, + "log_odds_chosen": 0.09693387895822525, + "log_odds_ratio": -0.6678298115730286, + "logits/chosen": 341.30438232421875, + "logits/rejected": 364.1517639160156, + "logps/chosen": -1.0246386528015137, + "logps/rejected": -1.1010067462921143, + "loss": 1.5619, + "nll_loss": 1.313808798789978, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05228627473115921, - "rewards/margins": 0.004859411157667637, - "rewards/rejected": -0.05714568495750427, + "rewards/chosen": -0.051231931895017624, + "rewards/margins": 0.0038184034638106823, + "rewards/rejected": -0.055050335824489594, "step": 830 }, { "epoch": 0.6632247815726767, - "grad_norm": 33.92708206176758, - "learning_rate": 1.4771775899156487e-06, - "log_odds_chosen": 0.5014594793319702, - "log_odds_ratio": -0.5115988254547119, - "logits/chosen": 318.1674499511719, - "logits/rejected": 471.2735290527344, - "logps/chosen": -1.0235928297042847, - "logps/rejected": -1.36759614944458, - "loss": 1.4523, - "nll_loss": 1.3124425411224365, + "grad_norm": 30.412721633911133, + "learning_rate": 1.7303213505149572e-06, + "log_odds_chosen": 0.4479742646217346, + "log_odds_ratio": -0.5424279570579529, + "logits/chosen": 312.5921325683594, + "logits/rejected": 465.254150390625, + "logps/chosen": -0.9704440832138062, + "logps/rejected": -1.2700642347335815, + "loss": 1.4003, + "nll_loss": 1.2447351217269897, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05117964744567871, - "rewards/margins": 0.017200157046318054, - "rewards/rejected": -0.06837980449199677, + "rewards/chosen": -0.04852220416069031, + "rewards/margins": 0.01498100720345974, + "rewards/rejected": -0.0635032132267952, "step": 835 }, { "epoch": 0.6671961874503575, - "grad_norm": 27.84844207763672, - "learning_rate": 1.4463554145922603e-06, - "log_odds_chosen": -0.31632497906684875, - "log_odds_ratio": -0.9493728876113892, - "logits/chosen": 424.86517333984375, - "logits/rejected": 282.5466613769531, - "logps/chosen": -1.1070573329925537, - "logps/rejected": -0.9589263796806335, - "loss": 1.4065, - "nll_loss": 1.1693588495254517, + "grad_norm": 33.53908157348633, + "learning_rate": 1.7251638983558855e-06, + "log_odds_chosen": -0.34184280037879944, + "log_odds_ratio": -0.9526281356811523, + "logits/chosen": 418.00823974609375, + "logits/rejected": 276.333984375, + "logps/chosen": -1.0781444311141968, + "logps/rejected": -0.9165736436843872, + "loss": 1.3678, + "nll_loss": 1.1540253162384033, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.055352866649627686, - "rewards/margins": -0.007406541611999273, - "rewards/rejected": -0.047946326434612274, + "rewards/chosen": -0.05390722304582596, + "rewards/margins": -0.008078541606664658, + "rewards/rejected": -0.0458286814391613, "step": 840 }, { "epoch": 0.6711675933280381, - "grad_norm": 51.31157302856445, - "learning_rate": 1.4157267746508834e-06, - "log_odds_chosen": 0.07127873599529266, - "log_odds_ratio": -0.7540755867958069, - "logits/chosen": 357.64971923828125, - "logits/rejected": 300.25811767578125, - "logps/chosen": -1.264819860458374, - "logps/rejected": -1.2909610271453857, - "loss": 1.2753, - "nll_loss": 1.4475698471069336, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06324099004268646, - "rewards/margins": 0.0013070597779005766, - "rewards/rejected": -0.0645480528473854, + "grad_norm": 38.89650344848633, + "learning_rate": 1.7200522903844539e-06, + "log_odds_chosen": -0.019759630784392357, + "log_odds_ratio": -0.7890421748161316, + "logits/chosen": 352.12933349609375, + "logits/rejected": 295.3460693359375, + "logps/chosen": -1.2261745929718018, + "logps/rejected": -1.1950092315673828, + "loss": 1.2377, + "nll_loss": 1.3689581155776978, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06130873039364815, + "rewards/margins": -0.001558272517286241, + "rewards/rejected": -0.05975046008825302, "step": 845 }, { "epoch": 0.6751389992057188, - "grad_norm": 47.41500473022461, - "learning_rate": 1.385297296016631e-06, - "log_odds_chosen": 0.2415420562028885, - "log_odds_ratio": -0.5941085815429688, - "logits/chosen": 419.50408935546875, - "logits/rejected": 281.9603576660156, - "logps/chosen": -0.820796012878418, - "logps/rejected": -0.9756487011909485, - "loss": 1.5865, - "nll_loss": 1.0082799196243286, + "grad_norm": 44.82482147216797, + "learning_rate": 1.7149858514250883e-06, + "log_odds_chosen": 0.27604418992996216, + "log_odds_ratio": -0.5769887566566467, + "logits/chosen": 410.49627685546875, + "logits/rejected": 273.0401916503906, + "logps/chosen": -0.771110475063324, + "logps/rejected": -0.9429551362991333, + "loss": 1.5443, + "nll_loss": 0.988152801990509, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.041039805859327316, - "rewards/margins": 0.007742627058178186, - "rewards/rejected": -0.048782430589199066, + "rewards/chosen": -0.03855552524328232, + "rewards/margins": 0.008592232130467892, + "rewards/rejected": -0.047147758305072784, "step": 850 }, { "epoch": 0.6791104050833995, - "grad_norm": 42.014591217041016, - "learning_rate": 1.3550725680322973e-06, - "log_odds_chosen": 1.7263425588607788, - "log_odds_ratio": -0.2393535077571869, - "logits/chosen": 317.6227111816406, - "logits/rejected": 302.24859619140625, - "logps/chosen": -0.7767224311828613, - "logps/rejected": -2.0884933471679688, - "loss": 1.5027, - "nll_loss": 1.3212028741836548, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.038836125284433365, - "rewards/margins": 0.06558854132890701, - "rewards/rejected": -0.10442467033863068, + "grad_norm": 41.895572662353516, + "learning_rate": 1.7099639201419239e-06, + "log_odds_chosen": 1.8448680639266968, + "log_odds_ratio": -0.22662608325481415, + "logits/chosen": 311.1007385253906, + "logits/rejected": 294.10302734375, + "logps/chosen": -0.6820273995399475, + "logps/rejected": -2.0481584072113037, + "loss": 1.4831, + "nll_loss": 1.288765788078308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034101370722055435, + "rewards/margins": 0.06830655783414841, + "rewards/rejected": -0.10240793228149414, "step": 855 }, { "epoch": 0.6830818109610802, - "grad_norm": 32.96746063232422, - "learning_rate": 1.3250581424317012e-06, - "log_odds_chosen": -0.05438203737139702, - "log_odds_ratio": -0.8108514547348022, - "logits/chosen": 328.84979248046875, - "logits/rejected": 378.38507080078125, - "logps/chosen": -1.1635057926177979, - "logps/rejected": -1.1345611810684204, - "loss": 1.456, - "nll_loss": 1.441853642463684, + "grad_norm": 32.27305603027344, + "learning_rate": 1.704985848676184e-06, + "log_odds_chosen": 0.05296659469604492, + "log_odds_ratio": -0.7537012100219727, + "logits/chosen": 322.4505310058594, + "logits/rejected": 371.6986389160156, + "logps/chosen": -1.063537836074829, + "logps/rejected": -1.099055290222168, + "loss": 1.4083, + "nll_loss": 1.3870474100112915, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05817528814077377, - "rewards/margins": -0.0014472283655777574, - "rewards/rejected": -0.0567280575633049, + "rewards/chosen": -0.05317689850926399, + "rewards/margins": 0.0017758652102202177, + "rewards/rejected": -0.05495276302099228, "step": 860 }, { "epoch": 0.687053216838761, - "grad_norm": 25.131879806518555, - "learning_rate": 1.295259532319927e-06, - "log_odds_chosen": 1.491008996963501, - "log_odds_ratio": -0.33983761072158813, - "logits/chosen": 305.7498474121094, - "logits/rejected": 395.27630615234375, - "logps/chosen": -0.9085075259208679, - "logps/rejected": -1.8494819402694702, - "loss": 1.5229, - "nll_loss": 1.9765069484710693, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.045425377786159515, - "rewards/margins": 0.04704872891306877, - "rewards/rejected": -0.09247410297393799, + "grad_norm": 27.749332427978516, + "learning_rate": 1.700051002295115e-06, + "log_odds_chosen": 1.5363937616348267, + "log_odds_ratio": -0.324050635099411, + "logits/chosen": 302.7940979003906, + "logits/rejected": 389.976318359375, + "logps/chosen": -0.8558230400085449, + "logps/rejected": -1.8002240657806396, + "loss": 1.4722, + "nll_loss": 1.92082941532135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.042791154235601425, + "rewards/margins": 0.04722005873918533, + "rewards/rejected": -0.09001120924949646, "step": 865 }, { "epoch": 0.6910246227164416, - "grad_norm": 37.967525482177734, - "learning_rate": 1.265682211160678e-06, - "log_odds_chosen": -0.6112550497055054, - "log_odds_ratio": -1.088639497756958, - "logits/chosen": 310.4002685546875, - "logits/rejected": 406.2005615234375, - "logps/chosen": -1.192030906677246, - "logps/rejected": -0.812456488609314, - "loss": 1.6589, - "nll_loss": 1.2748486995697021, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.059601545333862305, - "rewards/margins": -0.01897871494293213, - "rewards/rejected": -0.040622830390930176, + "grad_norm": 43.04403305053711, + "learning_rate": 1.6951587590520263e-06, + "log_odds_chosen": -0.35114437341690063, + "log_odds_ratio": -0.9282342791557312, + "logits/chosen": 306.46990966796875, + "logits/rejected": 401.5986328125, + "logps/chosen": -1.02718186378479, + "logps/rejected": -0.7973839640617371, + "loss": 1.6272, + "nll_loss": 1.2324786186218262, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05135909467935562, + "rewards/margins": -0.01148989424109459, + "rewards/rejected": -0.03986920043826103, "step": 870 }, { "epoch": 0.6949960285941224, - "grad_norm": 75.36614227294922, - "learning_rate": 1.2363316117708912e-06, - "log_odds_chosen": 0.7237299680709839, - "log_odds_ratio": -0.6293952465057373, - "logits/chosen": 310.7578430175781, - "logits/rejected": 359.1610412597656, - "logps/chosen": -1.019719123840332, - "logps/rejected": -1.6588159799575806, - "loss": 1.5328, - "nll_loss": 1.2692029476165771, + "grad_norm": 259.5257568359375, + "learning_rate": 1.6903085094570331e-06, + "log_odds_chosen": 0.6448081731796265, + "log_odds_ratio": -0.649856448173523, + "logits/chosen": 305.8426818847656, + "logits/rejected": 355.3719787597656, + "logps/chosen": -1.023664116859436, + "logps/rejected": -1.60223388671875, + "loss": 1.4816, + "nll_loss": 1.2721219062805176, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05098595470190048, - "rewards/margins": 0.031954843550920486, - "rewards/rejected": -0.08294080197811127, + "rewards/chosen": -0.051183201372623444, + "rewards/margins": 0.028928488492965698, + "rewards/rejected": -0.08011169731616974, "step": 875 }, { "epoch": 0.698967434471803, - "grad_norm": 36.7714958190918, - "learning_rate": 1.2072131253228292e-06, - "log_odds_chosen": -0.17302027344703674, - "log_odds_ratio": -0.8415447473526001, - "logits/chosen": 327.3581237792969, - "logits/rejected": 300.59027099609375, - "logps/chosen": -1.4463417530059814, - "logps/rejected": -1.3052947521209717, - "loss": 1.5306, - "nll_loss": 1.9659268856048584, + "grad_norm": 36.911766052246094, + "learning_rate": 1.6854996561581053e-06, + "log_odds_chosen": -0.09146185219287872, + "log_odds_ratio": -0.8007118105888367, + "logits/chosen": 322.17376708984375, + "logits/rejected": 294.0484619140625, + "logps/chosen": -1.3798397779464722, + "logps/rejected": -1.3085267543792725, + "loss": 1.4735, + "nll_loss": 1.8494446277618408, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07231709361076355, - "rewards/margins": -0.007052358239889145, - "rewards/rejected": -0.0652647316455841, + "rewards/chosen": -0.0689919963479042, + "rewards/margins": -0.0035656639374792576, + "rewards/rejected": -0.06542633473873138, "step": 880 }, { "epoch": 0.7029388403494837, - "grad_norm": 39.60774612426758, - "learning_rate": 1.1783321003538262e-06, - "log_odds_chosen": 0.14880752563476562, - "log_odds_ratio": -0.6801349520683289, - "logits/chosen": 326.66217041015625, - "logits/rejected": 288.07977294921875, - "logps/chosen": -1.3033047914505005, - "logps/rejected": -1.410351037979126, - "loss": 1.5044, - "nll_loss": 1.411853551864624, + "grad_norm": 40.6159782409668, + "learning_rate": 1.680731613632036e-06, + "log_odds_chosen": 0.105155348777771, + "log_odds_ratio": -0.7062179446220398, + "logits/chosen": 321.5434875488281, + "logits/rejected": 283.28021240234375, + "logps/chosen": -1.280366063117981, + "logps/rejected": -1.3586736917495728, + "loss": 1.4686, + "nll_loss": 1.4260222911834717, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06516523659229279, - "rewards/margins": 0.005352319683879614, - "rewards/rejected": -0.07051756232976913, + "rewards/chosen": -0.06401830166578293, + "rewards/margins": 0.0039153858087956905, + "rewards/rejected": -0.06793369352817535, "step": 885 }, { "epoch": 0.7069102462271644, - "grad_norm": 49.855010986328125, - "learning_rate": 1.1496938417838466e-06, - "log_odds_chosen": -0.6302076578140259, - "log_odds_ratio": -1.108147382736206, - "logits/chosen": 303.8426208496094, - "logits/rejected": 299.96282958984375, - "logps/chosen": -1.1776927709579468, - "logps/rejected": -0.8100347518920898, - "loss": 1.6152, - "nll_loss": 1.3265931606292725, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0588846430182457, - "rewards/margins": -0.018382901325821877, - "rewards/rejected": -0.04050173982977867, + "grad_norm": 43.12873077392578, + "learning_rate": 1.6760038078849776e-06, + "log_odds_chosen": -0.557292640209198, + "log_odds_ratio": -1.0417410135269165, + "logits/chosen": 296.5141296386719, + "logits/rejected": 291.49896240234375, + "logps/chosen": -1.1542619466781616, + "logps/rejected": -0.8248146176338196, + "loss": 1.6357, + "nll_loss": 1.2914320230484009, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0577131025493145, + "rewards/margins": -0.01647236943244934, + "rewards/rejected": -0.04124072939157486, "step": 890 }, { "epoch": 0.7108816521048451, - "grad_norm": 65.90828704833984, - "learning_rate": 1.1213036099410799e-06, - "log_odds_chosen": 0.3805133104324341, - "log_odds_ratio": -0.5482473373413086, - "logits/chosen": 334.39703369140625, - "logits/rejected": 280.8984680175781, - "logps/chosen": -1.1065565347671509, - "logps/rejected": -1.3260886669158936, - "loss": 1.5504, - "nll_loss": 1.5361745357513428, + "grad_norm": 84.90913391113281, + "learning_rate": 1.6713156761621891e-06, + "log_odds_chosen": 0.16997307538986206, + "log_odds_ratio": -0.6351084113121033, + "logits/chosen": 327.1002502441406, + "logits/rejected": 273.7991638183594, + "logps/chosen": -1.1396805047988892, + "logps/rejected": -1.2256426811218262, + "loss": 1.5056, + "nll_loss": 1.5154684782028198, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.055327825248241425, - "rewards/margins": 0.010976609773933887, - "rewards/rejected": -0.06630443036556244, + "rewards/chosen": -0.056984029710292816, + "rewards/margins": 0.004298110492527485, + "rewards/rejected": -0.06128213554620743, "step": 895 }, { "epoch": 0.7148530579825259, - "grad_norm": 44.0485725402832, - "learning_rate": 1.0931666195957053e-06, - "log_odds_chosen": 0.4383140504360199, - "log_odds_ratio": -0.5253010392189026, - "logits/chosen": 337.86090087890625, - "logits/rejected": 394.5022888183594, - "logps/chosen": -0.7901648283004761, - "logps/rejected": -1.034003496170044, - "loss": 1.4815, - "nll_loss": 1.1732326745986938, + "grad_norm": 52.2535285949707, + "learning_rate": 1.6666666666666667e-06, + "log_odds_chosen": 0.559515655040741, + "log_odds_ratio": -0.49422377347946167, + "logits/chosen": 334.64312744140625, + "logits/rejected": 391.33038330078125, + "logps/chosen": -0.7214312553405762, + "logps/rejected": -0.986790657043457, + "loss": 1.4393, + "nll_loss": 1.1182574033737183, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03950824216008186, - "rewards/margins": 0.012191934511065483, - "rewards/rejected": -0.0517001748085022, + "rewards/chosen": -0.03607156500220299, + "rewards/margins": 0.01326796691864729, + "rewards/rejected": -0.04933953285217285, "step": 900 }, { "epoch": 0.7188244638602065, - "grad_norm": 40.06319046020508, - "learning_rate": 1.0652880390020398e-06, - "log_odds_chosen": 2.0690839290618896, - "log_odds_ratio": -0.2347683608531952, - "logits/chosen": 423.53961181640625, - "logits/rejected": 276.1210021972656, - "logps/chosen": -0.6150510311126709, - "logps/rejected": -1.5576661825180054, - "loss": 1.4353, - "nll_loss": 1.4714704751968384, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030752548947930336, - "rewards/margins": 0.0471307598054409, - "rewards/rejected": -0.07788331806659698, + "grad_norm": 42.749168395996094, + "learning_rate": 1.6620562382863342e-06, + "log_odds_chosen": 1.9891941547393799, + "log_odds_ratio": -0.21987108886241913, + "logits/chosen": 414.6642150878906, + "logits/rejected": 266.611083984375, + "logps/chosen": -0.566085696220398, + "logps/rejected": -1.508644938468933, + "loss": 1.4136, + "nll_loss": 1.4871587753295898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028304290026426315, + "rewards/margins": 0.047127965837717056, + "rewards/rejected": -0.07543225586414337, "step": 905 }, { "epoch": 0.7227958697378872, - "grad_norm": 29.090539932250977, - "learning_rate": 1.0376729889492178e-06, - "log_odds_chosen": -0.0568159744143486, - "log_odds_ratio": -0.7396942377090454, - "logits/chosen": 287.9375915527344, - "logits/rejected": 300.72698974609375, - "logps/chosen": -1.0032362937927246, - "logps/rejected": -0.9725478887557983, - "loss": 1.5766, - "nll_loss": 1.5186008214950562, + "grad_norm": 28.15529441833496, + "learning_rate": 1.6574838603294898e-06, + "log_odds_chosen": 0.01718742772936821, + "log_odds_ratio": -0.7042349576950073, + "logits/chosen": 282.4758605957031, + "logits/rejected": 295.02996826171875, + "logps/chosen": -0.9299288988113403, + "logps/rejected": -0.9464460611343384, + "loss": 1.5259, + "nll_loss": 1.4807093143463135, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05016181990504265, - "rewards/margins": -0.0015344202984124422, - "rewards/rejected": -0.04862739518284798, + "rewards/chosen": -0.0464964434504509, + "rewards/margins": 0.000825858092866838, + "rewards/rejected": -0.04732229933142662, "step": 910 }, { "epoch": 0.7267672756155679, - "grad_norm": 43.51217269897461, - "learning_rate": 1.0103265418205984e-06, - "log_odds_chosen": 0.6717264652252197, - "log_odds_ratio": -0.5441080927848816, - "logits/chosen": 299.4734802246094, - "logits/rejected": 346.14617919921875, - "logps/chosen": -0.7290914058685303, - "logps/rejected": -1.1217130422592163, - "loss": 1.5629, - "nll_loss": 1.4440972805023193, + "grad_norm": 37.33399200439453, + "learning_rate": 1.6529490122682157e-06, + "log_odds_chosen": 0.5594145059585571, + "log_odds_ratio": -0.5877448916435242, + "logits/chosen": 292.7896423339844, + "logits/rejected": 335.90289306640625, + "logps/chosen": -0.7141492366790771, + "logps/rejected": -1.103790044784546, + "loss": 1.5321, + "nll_loss": 1.445112943649292, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03645457327365875, - "rewards/margins": 0.01963108405470848, - "rewards/rejected": -0.05608565732836723, + "rewards/chosen": -0.03570746257901192, + "rewards/margins": 0.019482046365737915, + "rewards/rejected": -0.05518950894474983, "step": 915 }, { "epoch": 0.7307386814932486, - "grad_norm": 43.0380859375, - "learning_rate": 9.832537206620594e-07, - "log_odds_chosen": 0.04898405075073242, - "log_odds_ratio": -0.8131389617919922, - "logits/chosen": 331.40570068359375, - "logits/rejected": 315.16229248046875, - "logps/chosen": -0.9992687106132507, - "logps/rejected": -1.0054936408996582, - "loss": 1.5654, - "nll_loss": 1.4301942586898804, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.049963437020778656, - "rewards/margins": 0.00031124576344154775, - "rewards/rejected": -0.05027468129992485, + "grad_norm": 38.81259536743164, + "learning_rate": 1.648451183489468e-06, + "log_odds_chosen": 0.08375336974859238, + "log_odds_ratio": -0.7926191091537476, + "logits/chosen": 324.5826110839844, + "logits/rejected": 307.3143005371094, + "logps/chosen": -0.9936789274215698, + "logps/rejected": -0.990047812461853, + "loss": 1.5009, + "nll_loss": 1.3815875053405762, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.04968394711613655, + "rewards/margins": -0.00018155350699089468, + "rewards/rejected": -0.04950239509344101, "step": 920 }, { "epoch": 0.7347100873709294, - "grad_norm": 37.88023376464844, - "learning_rate": 9.564594982593559e-07, - "log_odds_chosen": 0.1898471564054489, - "log_odds_ratio": -0.6842805743217468, - "logits/chosen": 346.6091613769531, - "logits/rejected": 315.18408203125, - "logps/chosen": -0.9871004819869995, - "logps/rejected": -1.0915769338607788, - "loss": 1.439, - "nll_loss": 1.487571358680725, + "grad_norm": 41.69913864135742, + "learning_rate": 1.643989873053573e-06, + "log_odds_chosen": 0.1969633847475052, + "log_odds_ratio": -0.6738036274909973, + "logits/chosen": 342.1506652832031, + "logits/rejected": 309.15814208984375, + "logps/chosen": -1.0204120874404907, + "logps/rejected": -1.1466134786605835, + "loss": 1.4047, + "nll_loss": 1.4902677536010742, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.049355026334524155, - "rewards/margins": 0.005223819054663181, - "rewards/rejected": -0.05457884818315506, + "rewards/chosen": -0.051020603626966476, + "rewards/margins": 0.006310069467872381, + "rewards/rejected": -0.057330675423145294, "step": 925 }, { "epoch": 0.73868149324861, - "grad_norm": 54.20380783081055, - "learning_rate": 9.299487962247089e-07, - "log_odds_chosen": 0.022501707077026367, - "log_odds_ratio": -0.8337327837944031, - "logits/chosen": 302.9130554199219, - "logits/rejected": 279.71685791015625, - "logps/chosen": -1.2005774974822998, - "logps/rejected": -1.1395219564437866, - "loss": 1.6788, - "nll_loss": 1.584804654121399, + "grad_norm": 43.032135009765625, + "learning_rate": 1.6395645894598825e-06, + "log_odds_chosen": 0.13928785920143127, + "log_odds_ratio": -0.8347541689872742, + "logits/chosen": 297.34014892578125, + "logits/rejected": 275.1300354003906, + "logps/chosen": -1.1409282684326172, + "logps/rejected": -1.0776065587997437, + "loss": 1.8936, + "nll_loss": 1.5503441095352173, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06002888083457947, - "rewards/margins": -0.0030527785420417786, - "rewards/rejected": -0.05697610229253769, + "rewards/chosen": -0.05704641342163086, + "rewards/margins": -0.003166080918163061, + "rewards/rejected": -0.05388033390045166, "step": 930 }, { "epoch": 0.7426528991262907, - "grad_norm": 47.39323425292969, - "learning_rate": 9.037264840927945e-07, - "log_odds_chosen": -0.10585136711597443, - "log_odds_ratio": -0.8327314257621765, - "logits/chosen": 284.0058898925781, - "logits/rejected": 345.33587646484375, - "logps/chosen": -1.1005109548568726, - "logps/rejected": -1.0587420463562012, - "loss": 1.3425, - "nll_loss": 1.6860910654067993, + "grad_norm": 57.749149322509766, + "learning_rate": 1.6351748504193218e-06, + "log_odds_chosen": -0.38780477643013, + "log_odds_ratio": -1.0117642879486084, + "logits/chosen": 280.0088195800781, + "logits/rejected": 341.8829345703125, + "logps/chosen": -1.1196801662445068, + "logps/rejected": -0.9303563237190247, + "loss": 1.3338, + "nll_loss": 1.6607334613800049, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05502554774284363, - "rewards/margins": -0.002088439418002963, - "rewards/rejected": -0.052937109023332596, + "rewards/chosen": -0.0559840090572834, + "rewards/margins": -0.009466195479035378, + "rewards/rejected": -0.04651781544089317, "step": 935 }, { "epoch": 0.7466243050039714, - "grad_norm": 38.21221923828125, - "learning_rate": 8.777973784263016e-07, - "log_odds_chosen": -0.43362635374069214, - "log_odds_ratio": -0.9653336405754089, - "logits/chosen": 285.54193115234375, - "logits/rejected": 329.3501892089844, - "logps/chosen": -1.3442434072494507, - "logps/rejected": -1.126430869102478, - "loss": 1.4812, - "nll_loss": 1.4815846681594849, + "grad_norm": 54.09027862548828, + "learning_rate": 1.6308201826336057e-06, + "log_odds_chosen": -0.379691481590271, + "log_odds_ratio": -0.9309779405593872, + "logits/chosen": 283.4677429199219, + "logits/rejected": 327.2042541503906, + "logps/chosen": -1.322261095046997, + "logps/rejected": -1.131831169128418, + "loss": 1.4357, + "nll_loss": 1.4476417303085327, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06721217930316925, - "rewards/margins": -0.01089063473045826, - "rewards/rejected": -0.05632154271006584, + "rewards/chosen": -0.06611306220293045, + "rewards/margins": -0.009521503001451492, + "rewards/rejected": -0.05659156292676926, "step": 940 }, { "epoch": 0.7505957108816521, - "grad_norm": 33.12229919433594, - "learning_rate": 8.521662419312124e-07, - "log_odds_chosen": 0.15825173258781433, - "log_odds_ratio": -0.7438204884529114, - "logits/chosen": 337.7537841796875, - "logits/rejected": 271.5417175292969, - "logps/chosen": -1.0434350967407227, - "logps/rejected": -1.243912696838379, - "loss": 1.5446, - "nll_loss": 1.3849962949752808, + "grad_norm": 31.14377784729004, + "learning_rate": 1.6265001215808888e-06, + "log_odds_chosen": 0.22349996864795685, + "log_odds_ratio": -0.666618824005127, + "logits/chosen": 335.06573486328125, + "logits/rejected": 268.8232116699219, + "logps/chosen": -1.0155109167099, + "logps/rejected": -1.231431484222412, + "loss": 1.5133, + "nll_loss": 1.3729729652404785, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05217175558209419, - "rewards/margins": 0.010023881681263447, - "rewards/rejected": -0.062195636332035065, + "rewards/chosen": -0.05077555030584335, + "rewards/margins": 0.010796028189361095, + "rewards/rejected": -0.06157157942652702, "step": 945 }, { "epoch": 0.7545671167593329, - "grad_norm": 48.55085372924805, - "learning_rate": 8.268377825819821e-07, - "log_odds_chosen": -0.014403104782104492, - "log_odds_ratio": -0.7055137157440186, - "logits/chosen": 371.1424865722656, - "logits/rejected": 286.8011779785156, - "logps/chosen": -1.4028490781784058, - "logps/rejected": -1.39651620388031, - "loss": 1.5356, - "nll_loss": 1.620958924293518, + "grad_norm": 50.59060287475586, + "learning_rate": 1.6222142113076255e-06, + "log_odds_chosen": -0.010113936848938465, + "log_odds_ratio": -0.6997529864311218, + "logits/chosen": 368.07037353515625, + "logits/rejected": 283.2250671386719, + "logps/chosen": -1.3818244934082031, + "logps/rejected": -1.378185510635376, + "loss": 1.5137, + "nll_loss": 1.5937120914459229, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07014245539903641, - "rewards/margins": -0.00031664298148825765, - "rewards/rejected": -0.06982581317424774, + "rewards/chosen": -0.06909122318029404, + "rewards/margins": -0.0001819491444621235, + "rewards/rejected": -0.06890927255153656, "step": 950 }, { "epoch": 0.7585385226370135, - "grad_norm": 36.20191192626953, - "learning_rate": 8.018166527567672e-07, - "log_odds_chosen": 1.0636457204818726, - "log_odds_ratio": -0.43774813413619995, - "logits/chosen": 259.2046813964844, - "logits/rejected": 375.35198974609375, - "logps/chosen": -0.868695080280304, - "logps/rejected": -1.445894479751587, - "loss": 1.4218, - "nll_loss": 1.3990987539291382, + "grad_norm": 38.753761291503906, + "learning_rate": 1.617962004226434e-06, + "log_odds_chosen": 1.379948377609253, + "log_odds_ratio": -0.3466225862503052, + "logits/chosen": 256.8636474609375, + "logits/rejected": 371.79840087890625, + "logps/chosen": -0.777528703212738, + "logps/rejected": -1.486754059791565, + "loss": 1.3994, + "nll_loss": 1.315920114517212, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0434347540140152, - "rewards/margins": 0.02885996736586094, - "rewards/rejected": -0.07229472696781158, + "rewards/chosen": -0.03887643292546272, + "rewards/margins": 0.035461269319057465, + "rewards/rejected": -0.07433770596981049, "step": 955 }, { "epoch": 0.7625099285146942, - "grad_norm": 29.8554744720459, - "learning_rate": 7.771074483828747e-07, - "log_odds_chosen": -0.15444841980934143, - "log_odds_ratio": -0.8071505427360535, - "logits/chosen": 290.59674072265625, - "logits/rejected": 347.6033630371094, - "logps/chosen": -1.40049147605896, - "logps/rejected": -1.3093515634536743, - "loss": 1.3432, - "nll_loss": 1.450141191482544, + "grad_norm": 33.10997009277344, + "learning_rate": 1.6137430609197571e-06, + "log_odds_chosen": -0.14383646845817566, + "log_odds_ratio": -0.8176695704460144, + "logits/chosen": 290.1402587890625, + "logits/rejected": 345.85064697265625, + "logps/chosen": -1.3824456930160522, + "logps/rejected": -1.3024415969848633, + "loss": 1.352, + "nll_loss": 1.4453611373901367, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07002457231283188, - "rewards/margins": -0.0045569948852062225, - "rewards/rejected": -0.06546757370233536, + "rewards/chosen": -0.06912229210138321, + "rewards/margins": -0.004000205546617508, + "rewards/rejected": -0.0651220828294754, "step": 960 }, { "epoch": 0.7664813343923749, - "grad_norm": 39.03826904296875, - "learning_rate": 7.52714708092565e-07, - "log_odds_chosen": 0.04803264141082764, - "log_odds_ratio": -0.7985066175460815, - "logits/chosen": 321.512939453125, - "logits/rejected": 325.80853271484375, - "logps/chosen": -1.1335227489471436, - "logps/rejected": -1.0179483890533447, - "loss": 1.5082, - "nll_loss": 1.2471481561660767, + "grad_norm": 48.93876266479492, + "learning_rate": 1.6095569499491263e-06, + "log_odds_chosen": -0.012878346256911755, + "log_odds_ratio": -0.8197082281112671, + "logits/chosen": 314.2780456542969, + "logits/rejected": 319.3282470703125, + "logps/chosen": -1.1133010387420654, + "logps/rejected": -0.9631961584091187, + "loss": 1.4791, + "nll_loss": 1.242210030555725, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05667613074183464, - "rewards/margins": -0.005778718274086714, - "rewards/rejected": -0.050897419452667236, + "rewards/chosen": -0.055665045976638794, + "rewards/margins": -0.00750524178147316, + "rewards/rejected": -0.04815980792045593, "step": 965 }, { "epoch": 0.7704527402700556, - "grad_norm": 43.7759895324707, - "learning_rate": 7.286429123893931e-07, - "log_odds_chosen": 0.35435453057289124, - "log_odds_ratio": -0.6048498153686523, - "logits/chosen": 308.8065490722656, - "logits/rejected": 375.87982177734375, - "logps/chosen": -1.6246349811553955, - "logps/rejected": -1.9498169422149658, - "loss": 1.6459, - "nll_loss": 1.7503254413604736, + "grad_norm": 50.061912536621094, + "learning_rate": 1.605403247669839e-06, + "log_odds_chosen": 0.3026159405708313, + "log_odds_ratio": -0.6236444711685181, + "logits/chosen": 304.08453369140625, + "logits/rejected": 370.66998291015625, + "logps/chosen": -1.6113126277923584, + "logps/rejected": -1.8838993310928345, + "loss": 1.628, + "nll_loss": 1.7272355556488037, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0812317505478859, - "rewards/margins": 0.016259105876088142, - "rewards/rejected": -0.09749085456132889, + "rewards/chosen": -0.08056564629077911, + "rewards/margins": 0.013629332184791565, + "rewards/rejected": -0.09419497102499008, "step": 970 }, { "epoch": 0.7744241461477362, - "grad_norm": 54.47321319580078, - "learning_rate": 7.048964828252188e-07, - "log_odds_chosen": 0.9730485081672668, - "log_odds_ratio": -0.47064799070358276, - "logits/chosen": 364.9896545410156, - "logits/rejected": 361.5113830566406, - "logps/chosen": -0.9137029647827148, - "logps/rejected": -1.395819067955017, - "loss": 1.3761, - "nll_loss": 1.2809137105941772, + "grad_norm": 57.93197250366211, + "learning_rate": 1.6012815380508715e-06, + "log_odds_chosen": 1.213180661201477, + "log_odds_ratio": -0.42742282152175903, + "logits/chosen": 357.7371826171875, + "logits/rejected": 351.83795166015625, + "logps/chosen": -0.8961232304573059, + "logps/rejected": -1.4123413562774658, + "loss": 1.3272, + "nll_loss": 1.2730929851531982, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04568514600396156, - "rewards/margins": 0.02410580962896347, - "rewards/rejected": -0.06979095190763474, + "rewards/chosen": -0.044806160032749176, + "rewards/margins": 0.025810906663537025, + "rewards/rejected": -0.07061706483364105, "step": 975 }, { "epoch": 0.778395552025417, - "grad_norm": 46.42416000366211, - "learning_rate": 6.814797811880525e-07, - "log_odds_chosen": 0.18214160203933716, - "log_odds_ratio": -0.714030921459198, - "logits/chosen": 386.58782958984375, - "logits/rejected": 275.394775390625, - "logps/chosen": -1.3727108240127563, - "logps/rejected": -1.474959373474121, - "loss": 1.4134, - "nll_loss": 1.827099084854126, + "grad_norm": 48.875247955322266, + "learning_rate": 1.59719141249985e-06, + "log_odds_chosen": 0.12866242229938507, + "log_odds_ratio": -0.7350910902023315, + "logits/chosen": 377.27935791015625, + "logits/rejected": 266.5240783691406, + "logps/chosen": -1.3747342824935913, + "logps/rejected": -1.4584678411483765, + "loss": 1.3958, + "nll_loss": 1.8285316228866577, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06863553822040558, - "rewards/margins": 0.005112423561513424, - "rewards/rejected": -0.07374797016382217, + "rewards/chosen": -0.0687367171049118, + "rewards/margins": 0.004186672158539295, + "rewards/rejected": -0.07292339205741882, "step": 980 }, { "epoch": 0.7823669579030977, - "grad_norm": 53.367584228515625, - "learning_rate": 6.583971087008654e-07, - "log_odds_chosen": 0.1975199282169342, - "log_odds_ratio": -0.6967960596084595, - "logits/chosen": 292.4072265625, - "logits/rejected": 433.8272399902344, - "logps/chosen": -0.9060415029525757, - "logps/rejected": -0.9691941142082214, - "loss": 1.3796, - "nll_loss": 1.407875657081604, + "grad_norm": 58.968650817871094, + "learning_rate": 1.5931324696929157e-06, + "log_odds_chosen": 0.25052839517593384, + "log_odds_ratio": -0.6790642738342285, + "logits/chosen": 283.9019470214844, + "logits/rejected": 426.68701171875, + "logps/chosen": -0.8618942499160767, + "logps/rejected": -0.9527530670166016, + "loss": 1.3527, + "nll_loss": 1.38225519657135, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04530208185315132, - "rewards/margins": 0.0031576238106936216, - "rewards/rejected": -0.04845970869064331, + "rewards/chosen": -0.04309471324086189, + "rewards/margins": 0.004542945884168148, + "rewards/rejected": -0.047637660056352615, "step": 985 }, { "epoch": 0.7863383637807784, - "grad_norm": 50.85255813598633, - "learning_rate": 6.356527052315403e-07, - "log_odds_chosen": -0.2651984691619873, - "log_odds_ratio": -0.8623536825180054, - "logits/chosen": 372.86358642578125, - "logits/rejected": 285.08172607421875, - "logps/chosen": -1.2994807958602905, - "logps/rejected": -1.1402140855789185, - "loss": 1.5812, - "nll_loss": 1.4798548221588135, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06497403979301453, - "rewards/margins": -0.007963338866829872, - "rewards/rejected": -0.0570107102394104, + "grad_norm": 55.875274658203125, + "learning_rate": 1.5891043154093205e-06, + "log_odds_chosen": -0.32832685112953186, + "log_odds_ratio": -0.8919731974601746, + "logits/chosen": 365.53662109375, + "logits/rejected": 278.527099609375, + "logps/chosen": -1.341890573501587, + "logps/rejected": -1.1177005767822266, + "loss": 1.5727, + "nll_loss": 1.4955816268920898, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06709453463554382, + "rewards/margins": -0.011209504678845406, + "rewards/rejected": -0.05588502809405327, "step": 990 }, { "epoch": 0.7903097696584591, - "grad_norm": 79.17274475097656, - "learning_rate": 6.132507485140843e-07, - "log_odds_chosen": 1.5265482664108276, - "log_odds_ratio": -0.2821381688117981, - "logits/chosen": 388.9686279296875, - "logits/rejected": 307.1571350097656, - "logps/chosen": -0.7675724625587463, - "logps/rejected": -1.6737315654754639, - "loss": 1.4555, - "nll_loss": 1.2786924839019775, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03837861865758896, - "rewards/margins": 0.045307956635951996, - "rewards/rejected": -0.08368657529354095, + "grad_norm": 82.46007537841797, + "learning_rate": 1.5851065623706038e-06, + "log_odds_chosen": 1.42555832862854, + "log_odds_ratio": -0.3155195116996765, + "logits/chosen": 380.34832763671875, + "logits/rejected": 302.12542724609375, + "logps/chosen": -0.7897301912307739, + "logps/rejected": -1.5760501623153687, + "loss": 1.4242, + "nll_loss": 1.2763941287994385, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03948650881648064, + "rewards/margins": 0.039315998554229736, + "rewards/rejected": -0.07880251109600067, "step": 995 }, { "epoch": 0.7942811755361397, - "grad_norm": 41.524253845214844, - "learning_rate": 5.911953533812506e-07, - "log_odds_chosen": -0.3479143977165222, - "log_odds_ratio": -0.9029040336608887, - "logits/chosen": 338.65325927734375, - "logits/rejected": 374.2637939453125, - "logps/chosen": -0.9410643577575684, - "logps/rejected": -0.7491464614868164, - "loss": 1.655, - "nll_loss": 1.3791284561157227, + "grad_norm": 72.97930145263672, + "learning_rate": 1.5811388300841898e-06, + "log_odds_chosen": -0.25392764806747437, + "log_odds_ratio": -0.873538613319397, + "logits/chosen": 334.4622497558594, + "logits/rejected": 370.5467834472656, + "logps/chosen": -0.9442771077156067, + "logps/rejected": -0.8281978368759155, + "loss": 1.6114, + "nll_loss": 1.3259779214859009, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.047053221613168716, - "rewards/margins": -0.009595893323421478, - "rewards/rejected": -0.03745732456445694, + "rewards/chosen": -0.04721385985612869, + "rewards/margins": -0.005803964100778103, + "rewards/rejected": -0.04140989109873772, "step": 1000 }, { "epoch": 0.7982525814138205, - "grad_norm": 47.42415237426758, - "learning_rate": 5.694905710087217e-07, - "log_odds_chosen": 0.20326891541481018, - "log_odds_ratio": -0.6220360398292542, - "logits/chosen": 397.6610107421875, - "logits/rejected": 313.22088623046875, - "logps/chosen": -0.8882730603218079, - "logps/rejected": -1.0516657829284668, - "loss": 1.6773, - "nll_loss": 1.243209719657898, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04441365599632263, - "rewards/margins": 0.008169631473720074, - "rewards/rejected": -0.05258328840136528, + "grad_norm": 53.86620330810547, + "learning_rate": 1.5772007446912793e-06, + "log_odds_chosen": 0.22081449627876282, + "log_odds_ratio": -0.6176373362541199, + "logits/chosen": 391.4696350097656, + "logits/rejected": 308.0226135253906, + "logps/chosen": -0.8516048192977905, + "logps/rejected": -1.0242515802383423, + "loss": 1.7721, + "nll_loss": 1.211578607559204, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04258023947477341, + "rewards/margins": 0.008632339537143707, + "rewards/rejected": -0.05121258646249771, "step": 1005 }, { "epoch": 0.8022239872915012, - "grad_norm": 45.351253509521484, - "learning_rate": 5.481403881709815e-07, - "log_odds_chosen": 0.5789100527763367, - "log_odds_ratio": -0.886620044708252, - "logits/chosen": 345.7013854980469, - "logits/rejected": 279.64801025390625, - "logps/chosen": -0.9864595532417297, - "logps/rejected": -1.3232471942901611, - "loss": 1.6636, - "nll_loss": 1.7329130172729492, + "grad_norm": 50.31648254394531, + "learning_rate": 1.5732919388188816e-06, + "log_odds_chosen": 0.5329298973083496, + "log_odds_ratio": -0.8860853314399719, + "logits/chosen": 341.54229736328125, + "logits/rejected": 272.69091796875, + "logps/chosen": -0.967967689037323, + "logps/rejected": -1.2983975410461426, + "loss": 1.6175, + "nll_loss": 1.6894325017929077, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04932297766208649, - "rewards/margins": 0.01683938130736351, - "rewards/rejected": -0.0661623626947403, + "rewards/chosen": -0.04839838296175003, + "rewards/margins": 0.016521494835615158, + "rewards/rejected": -0.06491987407207489, "step": 1010 }, { "epoch": 0.8061953931691819, - "grad_norm": 45.76054382324219, - "learning_rate": 5.271487265090163e-07, - "log_odds_chosen": 0.1340581774711609, - "log_odds_ratio": -0.6442204117774963, - "logits/chosen": 253.32131958007812, - "logits/rejected": 293.978759765625, - "logps/chosen": -0.9866166114807129, - "logps/rejected": -1.0805059671401978, - "loss": 1.3506, - "nll_loss": 1.1058270931243896, + "grad_norm": 49.962276458740234, + "learning_rate": 1.5694120514358613e-06, + "log_odds_chosen": 0.13751927018165588, + "log_odds_ratio": -0.6340736150741577, + "logits/chosen": 251.845947265625, + "logits/rejected": 292.0717468261719, + "logps/chosen": -0.9524608850479126, + "logps/rejected": -1.0290417671203613, + "loss": 1.3444, + "nll_loss": 1.1181609630584717, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04933083802461624, - "rewards/margins": 0.004694463685154915, - "rewards/rejected": -0.05402529984712601, + "rewards/chosen": -0.04762304201722145, + "rewards/margins": 0.003829048480838537, + "rewards/rejected": -0.051452092826366425, "step": 1015 }, { "epoch": 0.8101667990468626, - "grad_norm": 30.294597625732422, - "learning_rate": 5.06519441809982e-07, - "log_odds_chosen": 0.6967889666557312, - "log_odds_ratio": -0.5373490452766418, - "logits/chosen": 318.8917541503906, - "logits/rejected": 270.747802734375, - "logps/chosen": -1.074588418006897, - "logps/rejected": -1.4418222904205322, - "loss": 1.4542, - "nll_loss": 1.5221776962280273, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05372941493988037, - "rewards/margins": 0.01836169883608818, - "rewards/rejected": -0.07209111005067825, + "grad_norm": 28.708763122558594, + "learning_rate": 1.565560727712874e-06, + "log_odds_chosen": 0.4968925416469574, + "log_odds_ratio": -0.6516743898391724, + "logits/chosen": 311.68353271484375, + "logits/rejected": 264.3401184082031, + "logps/chosen": -1.1515090465545654, + "logps/rejected": -1.4103407859802246, + "loss": 1.4494, + "nll_loss": 1.5162445306777954, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05757545307278633, + "rewards/margins": 0.012941589578986168, + "rewards/rejected": -0.07051704823970795, "step": 1020 }, { "epoch": 0.8141382049245433, - "grad_norm": 53.206661224365234, - "learning_rate": 4.862563232989643e-07, - "log_odds_chosen": 0.20271643996238708, - "log_odds_ratio": -0.6173663139343262, - "logits/chosen": 485.3902893066406, - "logits/rejected": 313.65185546875, - "logps/chosen": -1.0637105703353882, - "logps/rejected": -1.2300751209259033, - "loss": 1.4923, - "nll_loss": 1.1044560670852661, + "grad_norm": 52.76094055175781, + "learning_rate": 1.561737618886061e-06, + "log_odds_chosen": 0.09350456297397614, + "log_odds_ratio": -0.6697710752487183, + "logits/chosen": 481.36810302734375, + "logits/rejected": 311.92694091796875, + "logps/chosen": -1.1021904945373535, + "logps/rejected": -1.1937323808670044, + "loss": 1.5063, + "nll_loss": 1.150399923324585, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05318553373217583, - "rewards/margins": 0.008318223990499973, - "rewards/rejected": -0.061503756791353226, + "rewards/chosen": -0.05510953068733215, + "rewards/margins": 0.004577091429382563, + "rewards/rejected": -0.05968661978840828, "step": 1025 }, { "epoch": 0.818109610802224, - "grad_norm": 78.89067077636719, - "learning_rate": 4.663630929429674e-07, - "log_odds_chosen": 0.3912231922149658, - "log_odds_ratio": -0.5344873666763306, - "logits/chosen": 278.98651123046875, - "logits/rejected": 279.85418701171875, - "logps/chosen": -0.7328051924705505, - "logps/rejected": -0.9687238931655884, - "loss": 1.497, - "nll_loss": 1.046706199645996, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.036640264093875885, - "rewards/margins": 0.011795936152338982, - "rewards/rejected": -0.048436202108860016, + "grad_norm": 72.83948516845703, + "learning_rate": 1.5579423821243897e-06, + "log_odds_chosen": 0.3802599310874939, + "log_odds_ratio": -0.5428605675697327, + "logits/chosen": 277.2724609375, + "logits/rejected": 278.0252685546875, + "logps/chosen": -0.7227882742881775, + "logps/rejected": -0.9464675784111023, + "loss": 1.5166, + "nll_loss": 1.0390559434890747, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03613941743969917, + "rewards/margins": 0.011183961294591427, + "rewards/rejected": -0.047323379665613174, "step": 1030 }, { "epoch": 0.8220810166799047, - "grad_norm": 51.44734573364258, - "learning_rate": 4.46843404767259e-07, - "log_odds_chosen": 0.3833610415458679, - "log_odds_ratio": -0.8149446249008179, - "logits/chosen": 314.2817077636719, - "logits/rejected": 307.8243103027344, - "logps/chosen": -1.357617735862732, - "logps/rejected": -1.7892076969146729, - "loss": 1.459, - "nll_loss": 1.4600521326065063, + "grad_norm": 47.56222152709961, + "learning_rate": 1.554174680400523e-06, + "log_odds_chosen": 0.40021246671676636, + "log_odds_ratio": -0.7876571416854858, + "logits/chosen": 311.06353759765625, + "logits/rejected": 303.9035949707031, + "logps/chosen": -1.366424560546875, + "logps/rejected": -1.8226970434188843, + "loss": 1.4538, + "nll_loss": 1.4211769104003906, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06788089126348495, - "rewards/margins": 0.021579492837190628, - "rewards/rejected": -0.08946038782596588, + "rewards/chosen": -0.06832123547792435, + "rewards/margins": 0.022813621908426285, + "rewards/rejected": -0.09113486111164093, "step": 1035 }, { "epoch": 0.8260524225575854, - "grad_norm": 39.02664566040039, - "learning_rate": 4.2770084418418736e-07, - "log_odds_chosen": 0.27395057678222656, - "log_odds_ratio": -0.7102149128913879, - "logits/chosen": 414.571533203125, - "logits/rejected": 287.80877685546875, - "logps/chosen": -0.8411673307418823, - "logps/rejected": -0.9671209454536438, - "loss": 1.5205, - "nll_loss": 1.5072977542877197, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.042058371007442474, - "rewards/margins": 0.006297673098742962, - "rewards/rejected": -0.04835604503750801, + "grad_norm": 65.6354751586914, + "learning_rate": 1.5504341823651056e-06, + "log_odds_chosen": 0.1901915967464447, + "log_odds_ratio": -0.7493601441383362, + "logits/chosen": 409.8924865722656, + "logits/rejected": 282.7567443847656, + "logps/chosen": -0.8832821846008301, + "logps/rejected": -0.9766547083854675, + "loss": 1.519, + "nll_loss": 1.5573896169662476, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.044164109975099564, + "rewards/margins": 0.004668629728257656, + "rewards/rejected": -0.048832736909389496, "step": 1040 }, { "epoch": 0.8300238284352661, - "grad_norm": 44.10149383544922, - "learning_rate": 4.089389273346084e-07, - "log_odds_chosen": 1.4467592239379883, - "log_odds_ratio": -0.36090224981307983, - "logits/chosen": 336.5260009765625, - "logits/rejected": 268.7255859375, - "logps/chosen": -0.579176664352417, - "logps/rejected": -1.433186650276184, - "loss": 1.4451, - "nll_loss": 2.0459094047546387, + "grad_norm": 45.96513366699219, + "learning_rate": 1.546720562224365e-06, + "log_odds_chosen": 1.7359260320663452, + "log_odds_ratio": -0.31901225447654724, + "logits/chosen": 329.9159240722656, + "logits/rejected": 264.3152160644531, + "logps/chosen": -0.5153332352638245, + "logps/rejected": -1.4465187788009644, + "loss": 1.4326, + "nll_loss": 1.9956477880477905, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.028958836570382118, - "rewards/margins": 0.042700495570898056, - "rewards/rejected": -0.07165933400392532, + "rewards/chosen": -0.025766659528017044, + "rewards/margins": 0.046559277921915054, + "rewards/rejected": -0.0723259299993515, "step": 1045 }, { "epoch": 0.8339952343129468, - "grad_norm": 29.071208953857422, - "learning_rate": 3.9056110044203594e-07, - "log_odds_chosen": 0.3594241142272949, - "log_odds_ratio": -0.6073828935623169, - "logits/chosen": 340.73431396484375, - "logits/rejected": 293.41363525390625, - "logps/chosen": -1.039194107055664, - "logps/rejected": -1.395516037940979, - "loss": 1.3767, - "nll_loss": 1.1890077590942383, + "grad_norm": 46.372764587402344, + "learning_rate": 1.5430334996209192e-06, + "log_odds_chosen": 0.3377481997013092, + "log_odds_ratio": -0.6260396242141724, + "logits/chosen": 338.7218322753906, + "logits/rejected": 290.7141418457031, + "logps/chosen": -1.0393493175506592, + "logps/rejected": -1.3871691226959229, + "loss": 1.3843, + "nll_loss": 1.16513192653656, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05195971205830574, - "rewards/margins": 0.017816094681620598, - "rewards/rejected": -0.06977580487728119, + "rewards/chosen": -0.05196746438741684, + "rewards/margins": 0.017390986904501915, + "rewards/rejected": -0.0693584531545639, "step": 1050 }, { "epoch": 0.8379666401906275, - "grad_norm": 50.16731643676758, - "learning_rate": 3.72570739179631e-07, - "log_odds_chosen": 0.49487370252609253, - "log_odds_ratio": -0.49297910928726196, - "logits/chosen": 280.0902404785156, - "logits/rejected": 468.6788024902344, - "logps/chosen": -1.0811553001403809, - "logps/rejected": -1.4258912801742554, - "loss": 1.453, - "nll_loss": 1.3579655885696411, + "grad_norm": 45.74534225463867, + "learning_rate": 1.539372679517698e-06, + "log_odds_chosen": 0.5245014429092407, + "log_odds_ratio": -0.4880025386810303, + "logits/chosen": 277.099365234375, + "logits/rejected": 466.10931396484375, + "logps/chosen": -1.0697903633117676, + "logps/rejected": -1.429344654083252, + "loss": 1.4411, + "nll_loss": 1.334235429763794, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0540577657520771, - "rewards/margins": 0.017236804589629173, - "rewards/rejected": -0.07129456847906113, + "rewards/chosen": -0.05348951742053032, + "rewards/margins": 0.01797771267592907, + "rewards/rejected": -0.07146723568439484, "step": 1055 }, { "epoch": 0.8419380460683081, - "grad_norm": 33.406272888183594, - "learning_rate": 3.5497114805015223e-07, - "log_odds_chosen": 0.14397627115249634, - "log_odds_ratio": -0.6715080142021179, - "logits/chosen": 373.4464416503906, - "logits/rejected": 325.99163818359375, - "logps/chosen": -0.9457298517227173, - "logps/rejected": -1.0310219526290894, - "loss": 1.4311, - "nll_loss": 1.5625425577163696, + "grad_norm": 54.054969787597656, + "learning_rate": 1.5357377920848783e-06, + "log_odds_chosen": 0.16159498691558838, + "log_odds_ratio": -0.6649383306503296, + "logits/chosen": 370.05181884765625, + "logits/rejected": 320.9632263183594, + "logps/chosen": -0.9088066220283508, + "logps/rejected": -1.0025485754013062, + "loss": 1.4118, + "nll_loss": 1.5204761028289795, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.047286491841077805, - "rewards/margins": 0.004264607094228268, - "rewards/rejected": -0.051551103591918945, + "rewards/chosen": -0.04544033482670784, + "rewards/margins": 0.004687097389250994, + "rewards/rejected": -0.05012742802500725, "step": 1060 }, { "epoch": 0.8459094519459889, - "grad_norm": 50.40607833862305, - "learning_rate": 3.377655597789789e-07, - "log_odds_chosen": -0.3970710337162018, - "log_odds_ratio": -1.0087158679962158, - "logits/chosen": 305.99798583984375, - "logits/rejected": 319.93890380859375, - "logps/chosen": -1.3028199672698975, - "logps/rejected": -1.107723593711853, - "loss": 1.5254, - "nll_loss": 1.708296537399292, + "grad_norm": 44.504940032958984, + "learning_rate": 1.532128532589739e-06, + "log_odds_chosen": -0.3477151691913605, + "log_odds_ratio": -0.9693183898925781, + "logits/chosen": 299.94952392578125, + "logits/rejected": 313.5872497558594, + "logps/chosen": -1.2391369342803955, + "logps/rejected": -1.0682213306427002, + "loss": 1.502, + "nll_loss": 1.6569955348968506, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06514099985361099, - "rewards/margins": -0.009754816070199013, - "rewards/rejected": -0.05538617819547653, + "rewards/chosen": -0.061956845223903656, + "rewards/margins": -0.008545780554413795, + "rewards/rejected": -0.05341106653213501, "step": 1065 }, { "epoch": 0.8498808578236696, - "grad_norm": 32.866085052490234, - "learning_rate": 3.209571347203197e-07, - "log_odds_chosen": -0.3324670195579529, - "log_odds_ratio": -0.9437648057937622, - "logits/chosen": 288.600830078125, - "logits/rejected": 395.1521911621094, - "logps/chosen": -0.9841065406799316, - "logps/rejected": -0.9132230877876282, - "loss": 1.2591, - "nll_loss": 1.0712697505950928, + "grad_norm": 33.01232147216797, + "learning_rate": 1.5285446012893579e-06, + "log_odds_chosen": -0.2956480085849762, + "log_odds_ratio": -0.9251123666763306, + "logits/chosen": 280.8124084472656, + "logits/rejected": 386.6439208984375, + "logps/chosen": -0.9560649991035461, + "logps/rejected": -0.9032427072525024, + "loss": 1.2335, + "nll_loss": 1.0527818202972412, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04920532926917076, - "rewards/margins": -0.0035441755317151546, - "rewards/rejected": -0.04566115140914917, + "rewards/chosen": -0.047803252935409546, + "rewards/margins": -0.0026411116123199463, + "rewards/rejected": -0.0451621375977993, "step": 1070 }, { "epoch": 0.8538522637013503, - "grad_norm": 63.01152038574219, - "learning_rate": 3.0454896027671073e-07, - "log_odds_chosen": -0.15130704641342163, - "log_odds_ratio": -0.8452315330505371, - "logits/chosen": 408.84552001953125, - "logits/rejected": 319.49493408203125, - "logps/chosen": -1.28704035282135, - "logps/rejected": -1.2764626741409302, - "loss": 1.6339, - "nll_loss": 1.682464361190796, + "grad_norm": 56.84487533569336, + "learning_rate": 1.5249857033260468e-06, + "log_odds_chosen": -0.0642160177230835, + "log_odds_ratio": -0.815158486366272, + "logits/chosen": 405.3268737792969, + "logits/rejected": 318.09503173828125, + "logps/chosen": -1.2454036474227905, + "logps/rejected": -1.285205602645874, + "loss": 1.6233, + "nll_loss": 1.6647475957870483, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06435202062129974, - "rewards/margins": -0.000528886157553643, - "rewards/rejected": -0.06382313370704651, + "rewards/chosen": -0.062270186841487885, + "rewards/margins": 0.001990094780921936, + "rewards/rejected": -0.06426028162240982, "step": 1075 }, { "epoch": 0.857823669579031, - "grad_norm": 32.351932525634766, - "learning_rate": 2.885440503319145e-07, - "log_odds_chosen": -0.4667375087738037, - "log_odds_ratio": -1.0818705558776855, - "logits/chosen": 459.02410888671875, - "logits/rejected": 257.15716552734375, - "logps/chosen": -1.1935930252075195, - "logps/rejected": -0.7796539068222046, - "loss": 1.548, - "nll_loss": 1.7794479131698608, + "grad_norm": 32.555484771728516, + "learning_rate": 1.5214515486254614e-06, + "log_odds_chosen": -0.31260019540786743, + "log_odds_ratio": -0.9709165692329407, + "logits/chosen": 455.90008544921875, + "logits/rejected": 257.8133850097656, + "logps/chosen": -1.0744378566741943, + "logps/rejected": -0.7803062200546265, + "loss": 1.5035, + "nll_loss": 1.744284987449646, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05967964977025986, - "rewards/margins": -0.020696954801678658, - "rewards/rejected": -0.03898269310593605, + "rewards/chosen": -0.053721893578767776, + "rewards/margins": -0.014706583693623543, + "rewards/rejected": -0.03901531547307968, "step": 1080 }, { "epoch": 0.8617950754567116, - "grad_norm": 36.15253829956055, - "learning_rate": 2.7294534469732794e-07, - "log_odds_chosen": 1.6522690057754517, - "log_odds_ratio": -0.4097142219543457, - "logits/chosen": 344.11962890625, - "logits/rejected": 319.623046875, - "logps/chosen": -0.894513726234436, - "logps/rejected": -2.088923692703247, - "loss": 1.4534, - "nll_loss": 1.7392299175262451, + "grad_norm": 46.7504768371582, + "learning_rate": 1.517941851797291e-06, + "log_odds_chosen": 1.6384780406951904, + "log_odds_ratio": -0.3990572392940521, + "logits/chosen": 343.765380859375, + "logits/rejected": 318.4671325683594, + "logps/chosen": -0.8857353925704956, + "logps/rejected": -2.0790462493896484, + "loss": 1.4511, + "nll_loss": 1.7493927478790283, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0447256863117218, - "rewards/margins": 0.05972049757838249, - "rewards/rejected": -0.10444619506597519, + "rewards/chosen": -0.04428676888346672, + "rewards/margins": 0.05966554209589958, + "rewards/rejected": -0.1039523109793663, "step": 1085 }, { "epoch": 0.8657664813343924, - "grad_norm": 47.798858642578125, - "learning_rate": 2.5775570857199144e-07, - "log_odds_chosen": -0.20097847282886505, - "log_odds_ratio": -0.8294545412063599, - "logits/chosen": 281.59942626953125, - "logits/rejected": 324.57403564453125, - "logps/chosen": -1.1493308544158936, - "logps/rejected": -0.9629890322685242, - "loss": 1.3747, - "nll_loss": 1.3210439682006836, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0574665442109108, - "rewards/margins": -0.009317094460129738, - "rewards/rejected": -0.04814944788813591, + "grad_norm": 45.060401916503906, + "learning_rate": 1.5144563320384566e-06, + "log_odds_chosen": -0.16205939650535583, + "log_odds_ratio": -0.8034777641296387, + "logits/chosen": 282.28436279296875, + "logits/rejected": 323.5531005859375, + "logps/chosen": -1.0885937213897705, + "logps/rejected": -0.9365935325622559, + "loss": 1.3514, + "nll_loss": 1.2723312377929688, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.054429687559604645, + "rewards/margins": -0.007600012235343456, + "rewards/rejected": -0.04682967811822891, "step": 1090 }, { "epoch": 0.8697378872120731, - "grad_norm": 29.671043395996094, - "learning_rate": 2.4297793201630113e-07, - "log_odds_chosen": 0.020139653235673904, - "log_odds_ratio": -0.7046443819999695, - "logits/chosen": 300.97998046875, - "logits/rejected": 333.9520568847656, - "logps/chosen": -0.972399115562439, - "logps/rejected": -0.9247520565986633, - "loss": 1.4489, - "nll_loss": 1.3582823276519775, + "grad_norm": 36.2373161315918, + "learning_rate": 1.5109947130387486e-06, + "log_odds_chosen": 0.03178917244076729, + "log_odds_ratio": -0.704880952835083, + "logits/chosen": 300.1133117675781, + "logits/rejected": 332.36920166015625, + "logps/chosen": -0.9320970773696899, + "logps/rejected": -0.8847508430480957, + "loss": 1.4299, + "nll_loss": 1.3066003322601318, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04861995577812195, - "rewards/margins": -0.0023823559749871492, - "rewards/rejected": -0.04623759910464287, + "rewards/chosen": -0.046604860574007034, + "rewards/margins": -0.002367311390116811, + "rewards/rejected": -0.04423754662275314, "step": 1095 }, { "epoch": 0.8737092930897538, - "grad_norm": 30.31666374206543, - "learning_rate": 2.286147294395283e-07, - "log_odds_chosen": 0.29012542963027954, - "log_odds_ratio": -0.7615915536880493, - "logits/chosen": 315.9916687011719, - "logits/rejected": 422.93017578125, - "logps/chosen": -1.0184037685394287, - "logps/rejected": -1.0094739198684692, - "loss": 1.5729, - "nll_loss": 1.4798800945281982, + "grad_norm": 36.22955322265625, + "learning_rate": 1.5075567228888182e-06, + "log_odds_chosen": 0.2864794135093689, + "log_odds_ratio": -0.7445758581161499, + "logits/chosen": 316.0003662109375, + "logits/rejected": 421.9435119628906, + "logps/chosen": -0.9958402514457703, + "logps/rejected": -0.9862167239189148, + "loss": 1.5677, + "nll_loss": 1.4682040214538574, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05092019587755203, - "rewards/margins": -0.00044649915071204305, - "rewards/rejected": -0.05047369748353958, + "rewards/chosen": -0.049792006611824036, + "rewards/margins": -0.00048117563710547984, + "rewards/rejected": -0.0493108332157135, "step": 1100 }, { "epoch": 0.8776806989674345, - "grad_norm": 32.19295120239258, - "learning_rate": 2.1466873910123058e-07, - "log_odds_chosen": -0.07446761429309845, - "log_odds_ratio": -0.8099315762519836, - "logits/chosen": 334.15740966796875, - "logits/rejected": 331.76031494140625, - "logps/chosen": -1.026379942893982, - "logps/rejected": -0.9773386120796204, - "loss": 1.4675, - "nll_loss": 1.4952067136764526, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05131899565458298, - "rewards/margins": -0.002452067332342267, - "rewards/rejected": -0.048866935074329376, + "grad_norm": 40.75776290893555, + "learning_rate": 1.5041420939904672e-06, + "log_odds_chosen": -0.04482314735651016, + "log_odds_ratio": -0.7973297834396362, + "logits/chosen": 330.23980712890625, + "logits/rejected": 326.890625, + "logps/chosen": -0.9843143224716187, + "logps/rejected": -0.9512438774108887, + "loss": 1.4566, + "nll_loss": 1.460742712020874, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04921571537852287, + "rewards/margins": -0.0016535200411453843, + "rewards/rejected": -0.04756220057606697, "step": 1105 }, { "epoch": 0.8816521048451151, - "grad_norm": 34.077449798583984, - "learning_rate": 2.0114252262665086e-07, - "log_odds_chosen": 0.30518868565559387, - "log_odds_ratio": -0.6247283220291138, - "logits/chosen": 313.799560546875, - "logits/rejected": 278.57293701171875, - "logps/chosen": -1.2211542129516602, - "logps/rejected": -1.4464585781097412, - "loss": 1.4043, - "nll_loss": 1.2219561338424683, + "grad_norm": 35.376686096191406, + "learning_rate": 1.5007505629691608e-06, + "log_odds_chosen": 0.24461106956005096, + "log_odds_ratio": -0.6386993527412415, + "logits/chosen": 314.48748779296875, + "logits/rejected": 279.96356201171875, + "logps/chosen": -1.2024238109588623, + "logps/rejected": -1.3905580043792725, + "loss": 1.3812, + "nll_loss": 1.2091633081436157, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06105770915746689, - "rewards/margins": 0.011265222914516926, - "rewards/rejected": -0.07232292741537094, + "rewards/chosen": -0.06012119725346565, + "rewards/margins": 0.00940670631825924, + "rewards/rejected": -0.06952790170907974, "step": 1110 }, { "epoch": 0.8856235107227959, - "grad_norm": 33.00147247314453, - "learning_rate": 1.880385645361951e-07, - "log_odds_chosen": 2.0590970516204834, - "log_odds_ratio": -0.4269631803035736, - "logits/chosen": 304.67572021484375, - "logits/rejected": 437.95751953125, - "logps/chosen": -0.8812467455863953, - "logps/rejected": -2.7326838970184326, - "loss": 1.5588, - "nll_loss": 1.2233951091766357, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04406233876943588, - "rewards/margins": 0.09257186949253082, - "rewards/rejected": -0.1366342008113861, + "grad_norm": 42.16975784301758, + "learning_rate": 1.4973818705886997e-06, + "log_odds_chosen": 2.133307456970215, + "log_odds_ratio": -0.39024442434310913, + "logits/chosen": 305.95953369140625, + "logits/rejected": 435.2921447753906, + "logps/chosen": -0.863163948059082, + "logps/rejected": -2.759533643722534, + "loss": 1.5651, + "nll_loss": 1.215340256690979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04315819591283798, + "rewards/margins": 0.09481848776340485, + "rewards/rejected": -0.13797667622566223, "step": 1115 }, { "epoch": 0.8895949166004765, - "grad_norm": 29.0517578125, - "learning_rate": 1.7535927178906898e-07, - "log_odds_chosen": 0.6221317648887634, - "log_odds_ratio": -0.5823326110839844, - "logits/chosen": 288.1285705566406, - "logits/rejected": 357.75701904296875, - "logps/chosen": -1.0755956172943115, - "logps/rejected": -1.5605504512786865, - "loss": 1.4892, - "nll_loss": 1.3548178672790527, + "grad_norm": 29.565824508666992, + "learning_rate": 1.494035761667992e-06, + "log_odds_chosen": 0.5857739448547363, + "log_odds_ratio": -0.5919302105903625, + "logits/chosen": 289.72613525390625, + "logits/rejected": 357.0590515136719, + "logps/chosen": -1.0517274141311646, + "logps/rejected": -1.5134713649749756, + "loss": 1.4878, + "nll_loss": 1.3550740480422974, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053779780864715576, - "rewards/margins": 0.024247746914625168, - "rewards/rejected": -0.07802753150463104, + "rewards/chosen": -0.05258636921644211, + "rewards/margins": 0.02308720164000988, + "rewards/rejected": -0.07567357271909714, "step": 1120 }, { "epoch": 0.8935663224781573, - "grad_norm": 37.735111236572266, - "learning_rate": 1.6310697334116583e-07, - "log_odds_chosen": -0.2765834629535675, - "log_odds_ratio": -0.8877069354057312, - "logits/chosen": 357.4658203125, - "logits/rejected": 302.4728088378906, - "logps/chosen": -1.121751070022583, - "logps/rejected": -0.9259787797927856, - "loss": 1.4662, - "nll_loss": 1.56728196144104, + "grad_norm": 42.470863342285156, + "learning_rate": 1.49071198499986e-06, + "log_odds_chosen": -0.2158789336681366, + "log_odds_ratio": -0.8656711578369141, + "logits/chosen": 358.0691833496094, + "logits/rejected": 302.2369689941406, + "logps/chosen": -1.077471375465393, + "logps/rejected": -0.9175441861152649, + "loss": 1.448, + "nll_loss": 1.5222632884979248, "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05608755350112915, - "rewards/margins": -0.009788615629076958, - "rewards/rejected": -0.04629894345998764, + "rewards/chosen": -0.05387356877326965, + "rewards/margins": -0.007996362634003162, + "rewards/rejected": -0.045877207070589066, "step": 1125 }, { "epoch": 0.897537728355838, - "grad_norm": 39.21086120605469, - "learning_rate": 1.512839197172758e-07, - "log_odds_chosen": 0.1607791781425476, - "log_odds_ratio": -0.7086135745048523, - "logits/chosen": 431.327880859375, - "logits/rejected": 309.34210205078125, - "logps/chosen": -1.1171777248382568, - "logps/rejected": -1.3307039737701416, - "loss": 1.3869, - "nll_loss": 1.7646287679672241, + "grad_norm": 40.679229736328125, + "learning_rate": 1.487410293271824e-06, + "log_odds_chosen": 0.6120940446853638, + "log_odds_ratio": -0.5448315739631653, + "logits/chosen": 431.89404296875, + "logits/rejected": 311.6705017089844, + "logps/chosen": -1.0258240699768066, + "logps/rejected": -1.5273563861846924, + "loss": 1.392, + "nll_loss": 1.7506325244903564, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05585888773202896, - "rewards/margins": 0.010676311329007149, - "rewards/rejected": -0.06653519719839096, + "rewards/chosen": -0.05129120498895645, + "rewards/margins": 0.025076616555452347, + "rewards/rejected": -0.0763678178191185, "step": 1130 }, { "epoch": 0.9015091342335186, - "grad_norm": 49.552589416503906, - "learning_rate": 1.398922825977092e-07, - "log_odds_chosen": 1.1862103939056396, - "log_odds_ratio": -0.36970359086990356, - "logits/chosen": 287.95318603515625, - "logits/rejected": 344.77764892578125, - "logps/chosen": -1.1711792945861816, - "logps/rejected": -2.0627903938293457, - "loss": 1.4555, - "nll_loss": 1.4977771043777466, + "grad_norm": 48.9831657409668, + "learning_rate": 1.484130442988812e-06, + "log_odds_chosen": 1.108486533164978, + "log_odds_ratio": -0.3976016938686371, + "logits/chosen": 286.84564208984375, + "logits/rejected": 343.97174072265625, + "logps/chosen": -1.2722723484039307, + "logps/rejected": -2.1502983570098877, + "loss": 1.4505, + "nll_loss": 1.5086886882781982, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05855896323919296, - "rewards/margins": 0.044580571353435516, - "rewards/rejected": -0.10313953459262848, + "rewards/chosen": -0.06361361593008041, + "rewards/margins": 0.04390129819512367, + "rewards/rejected": -0.10751490294933319, "step": 1135 }, { "epoch": 0.9054805401111994, - "grad_norm": 34.04672622680664, - "learning_rate": 1.2893415441939588e-07, - "log_odds_chosen": 0.042095281183719635, - "log_odds_ratio": -0.7100853323936462, - "logits/chosen": 319.3915710449219, - "logits/rejected": 372.4937438964844, - "logps/chosen": -0.8466545343399048, - "logps/rejected": -0.8484467267990112, - "loss": 1.3861, - "nll_loss": 1.0329768657684326, + "grad_norm": 45.998348236083984, + "learning_rate": 1.480872194397731e-06, + "log_odds_chosen": 0.01077426690608263, + "log_odds_ratio": -0.7240376472473145, + "logits/chosen": 316.49053955078125, + "logits/rejected": 369.6336669921875, + "logps/chosen": -0.8706620931625366, + "logps/rejected": -0.8589606285095215, + "loss": 1.3818, + "nll_loss": 1.0445737838745117, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0423327274620533, - "rewards/margins": 8.96111159818247e-05, - "rewards/rejected": -0.0424223393201828, + "rewards/chosen": -0.04353310167789459, + "rewards/margins": -0.0005850695306435227, + "rewards/rejected": -0.042948029935359955, "step": 1140 }, { "epoch": 0.90945194598888, - "grad_norm": 43.70923614501953, - "learning_rate": 1.1841154799154376e-07, - "log_odds_chosen": 0.09307994693517685, - "log_odds_ratio": -0.8356366157531738, - "logits/chosen": 383.5526428222656, - "logits/rejected": 280.0104064941406, - "logps/chosen": -1.0310574769973755, - "logps/rejected": -1.0444084405899048, - "loss": 1.4938, - "nll_loss": 1.3081772327423096, + "grad_norm": 43.84899139404297, + "learning_rate": 1.4776353114138545e-06, + "log_odds_chosen": 0.3026946485042572, + "log_odds_ratio": -0.7330090403556824, + "logits/chosen": 382.29071044921875, + "logits/rejected": 280.1352844238281, + "logps/chosen": -0.9080026745796204, + "logps/rejected": -1.0521103143692017, + "loss": 1.4538, + "nll_loss": 1.2269086837768555, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.051552869379520416, - "rewards/margins": 0.0006675481563434005, - "rewards/rejected": -0.0522204227745533, + "rewards/chosen": -0.04540013149380684, + "rewards/margins": 0.0072053843177855015, + "rewards/rejected": -0.0526055172085762, "step": 1145 }, { "epoch": 0.9134233518665608, - "grad_norm": 42.19453048706055, - "learning_rate": 1.083263961259215e-07, - "log_odds_chosen": 0.5610286593437195, - "log_odds_ratio": -0.5318921804428101, - "logits/chosen": 313.7818298339844, - "logits/rejected": 281.55303955078125, - "logps/chosen": -0.8616586923599243, - "logps/rejected": -1.13971745967865, - "loss": 1.3678, - "nll_loss": 1.5508089065551758, + "grad_norm": 44.53425979614258, + "learning_rate": 1.4744195615489715e-06, + "log_odds_chosen": 0.41574639081954956, + "log_odds_ratio": -0.5235159993171692, + "logits/chosen": 313.0999450683594, + "logits/rejected": 280.4124450683594, + "logps/chosen": -0.8852353096008301, + "logps/rejected": -1.1474339962005615, + "loss": 1.3557, + "nll_loss": 1.5458626747131348, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.043082933872938156, - "rewards/margins": 0.01390293799340725, - "rewards/rejected": -0.056985873728990555, + "rewards/chosen": -0.044261764734983444, + "rewards/margins": 0.013109927996993065, + "rewards/rejected": -0.05737169459462166, "step": 1150 }, { "epoch": 0.9173947577442415, - "grad_norm": 62.5867805480957, - "learning_rate": 9.86805512818359e-08, - "log_odds_chosen": 0.5819055438041687, - "log_odds_ratio": -0.5563138723373413, - "logits/chosen": 308.6764221191406, - "logits/rejected": 280.77093505859375, - "logps/chosen": -0.7943710684776306, - "logps/rejected": -1.1860836744308472, - "loss": 1.5596, - "nll_loss": 1.2872906923294067, + "grad_norm": 73.07974243164062, + "learning_rate": 1.4712247158412494e-06, + "log_odds_chosen": 0.6588196158409119, + "log_odds_ratio": -0.517508864402771, + "logits/chosen": 305.9576416015625, + "logits/rejected": 279.7393493652344, + "logps/chosen": -0.661457896232605, + "logps/rejected": -1.1083705425262451, + "loss": 1.5078, + "nll_loss": 1.212100625038147, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03971855714917183, - "rewards/margins": 0.019585633650422096, - "rewards/rejected": -0.059304188936948776, + "rewards/chosen": -0.03307289630174637, + "rewards/margins": 0.022345632314682007, + "rewards/rejected": -0.055418528616428375, "step": 1155 }, { "epoch": 0.9213661636219221, - "grad_norm": 39.823516845703125, - "learning_rate": 8.947578522587097e-08, - "log_odds_chosen": 0.5100874304771423, - "log_odds_ratio": -0.7220025062561035, - "logits/chosen": 421.84722900390625, - "logits/rejected": 306.4421081542969, - "logps/chosen": -0.7023354768753052, - "logps/rejected": -0.9484280347824097, - "loss": 1.2855, - "nll_loss": 0.8443538546562195, + "grad_norm": 89.8277587890625, + "learning_rate": 1.4680505487867589e-06, + "log_odds_chosen": 0.16856543719768524, + "log_odds_ratio": -0.7652976512908936, + "logits/chosen": 419.119140625, + "logits/rejected": 306.33819580078125, + "logps/chosen": -0.7615987062454224, + "logps/rejected": -0.9220150709152222, + "loss": 1.2839, + "nll_loss": 0.8718665242195129, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0351167730987072, - "rewards/margins": 0.012304631993174553, - "rewards/rejected": -0.0474214032292366, + "rewards/chosen": -0.03807993233203888, + "rewards/margins": 0.00802082009613514, + "rewards/rejected": -0.04610075429081917, "step": 1160 }, { "epoch": 0.9253375694996029, - "grad_norm": 34.763492584228516, - "learning_rate": 8.071378870644381e-08, - "log_odds_chosen": 0.068526491522789, - "log_odds_ratio": -0.7172547578811646, - "logits/chosen": 306.42303466796875, - "logits/rejected": 306.2086486816406, - "logps/chosen": -1.1717783212661743, - "logps/rejected": -1.2519972324371338, - "loss": 1.4611, - "nll_loss": 1.6088817119598389, + "grad_norm": 38.93104934692383, + "learning_rate": 1.4648968382726192e-06, + "log_odds_chosen": -0.008628154173493385, + "log_odds_ratio": -0.7770323753356934, + "logits/chosen": 301.6461486816406, + "logits/rejected": 302.1844177246094, + "logps/chosen": -1.2084157466888428, + "logps/rejected": -1.24485445022583, + "loss": 1.4509, + "nll_loss": 1.5561408996582031, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05858892202377319, - "rewards/margins": 0.004010946489870548, - "rewards/rejected": -0.06259986758232117, + "rewards/chosen": -0.060420792549848557, + "rewards/margins": 0.0018219311023131013, + "rewards/rejected": -0.06224272772669792, "step": 1165 }, { "epoch": 0.9293089753772835, - "grad_norm": 57.37542724609375, - "learning_rate": 7.239617114324499e-08, - "log_odds_chosen": -0.01497584581375122, - "log_odds_ratio": -0.7752578854560852, - "logits/chosen": 278.559326171875, - "logits/rejected": 374.15869140625, - "logps/chosen": -1.2931535243988037, - "logps/rejected": -1.2771342992782593, - "loss": 1.5557, - "nll_loss": 1.4230643510818481, + "grad_norm": 51.1599235534668, + "learning_rate": 1.4617633655117156e-06, + "log_odds_chosen": -0.02944868803024292, + "log_odds_ratio": -0.7933769226074219, + "logits/chosen": 274.8695983886719, + "logits/rejected": 369.8721618652344, + "logps/chosen": -1.3042938709259033, + "logps/rejected": -1.2830064296722412, + "loss": 1.5347, + "nll_loss": 1.4256232976913452, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06465767323970795, - "rewards/margins": -0.0008009634912014008, - "rewards/rejected": -0.06385671347379684, + "rewards/chosen": -0.06521469354629517, + "rewards/margins": -0.001064369105733931, + "rewards/rejected": -0.06415032595396042, "step": 1170 }, { "epoch": 0.9332803812549643, - "grad_norm": 54.0869026184082, - "learning_rate": 6.452446033161946e-08, - "log_odds_chosen": -0.19031484425067902, - "log_odds_ratio": -0.8042638897895813, - "logits/chosen": 299.294921875, - "logits/rejected": 398.8330078125, - "logps/chosen": -1.0967943668365479, - "logps/rejected": -1.014966607093811, - "loss": 1.5468, - "nll_loss": 1.4041001796722412, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05483972281217575, - "rewards/margins": -0.0040913899429142475, - "rewards/rejected": -0.05074832960963249, + "grad_norm": 68.91346740722656, + "learning_rate": 1.4586499149789457e-06, + "log_odds_chosen": -0.08938068896532059, + "log_odds_ratio": -0.7493371963500977, + "logits/chosen": 295.7596130371094, + "logits/rejected": 395.13543701171875, + "logps/chosen": -1.07353937625885, + "logps/rejected": -1.0517762899398804, + "loss": 1.5337, + "nll_loss": 1.382968783378601, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.053676970303058624, + "rewards/margins": -0.0010881524067372084, + "rewards/rejected": -0.0525888130068779, "step": 1175 }, { "epoch": 0.937251787132645, - "grad_norm": 30.728069305419922, - "learning_rate": 5.7100102161937176e-08, - "log_odds_chosen": 0.08359535038471222, - "log_odds_ratio": -0.6820527911186218, - "logits/chosen": 354.07806396484375, - "logits/rejected": 264.95947265625, - "logps/chosen": -0.9352580904960632, - "logps/rejected": -0.9271550178527832, - "loss": 1.3888, - "nll_loss": 1.2889841794967651, + "grad_norm": 31.116653442382812, + "learning_rate": 1.4555562743489552e-06, + "log_odds_chosen": 0.08080291748046875, + "log_odds_ratio": -0.6991288065910339, + "logits/chosen": 348.20330810546875, + "logits/rejected": 262.96844482421875, + "logps/chosen": -0.9602434039115906, + "logps/rejected": -0.9359101057052612, + "loss": 1.4031, + "nll_loss": 1.3135709762573242, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04676290601491928, - "rewards/margins": -0.0004051584692206234, - "rewards/rejected": -0.04635775089263916, + "rewards/chosen": -0.04801217466592789, + "rewards/margins": -0.001216667122207582, + "rewards/rejected": -0.04679550975561142, "step": 1180 }, { "epoch": 0.9412231930103256, - "grad_norm": 30.694597244262695, - "learning_rate": 5.012446035400881e-08, - "log_odds_chosen": -0.18491533398628235, - "log_odds_ratio": -0.8581596612930298, - "logits/chosen": 431.51483154296875, - "logits/rejected": 256.1930236816406, - "logps/chosen": -1.3761582374572754, - "logps/rejected": -1.2526451349258423, - "loss": 1.5465, - "nll_loss": 1.7214624881744385, + "grad_norm": 33.331146240234375, + "learning_rate": 1.4524822344353171e-06, + "log_odds_chosen": -0.2812700867652893, + "log_odds_ratio": -0.9116196632385254, + "logits/chosen": 427.7166442871094, + "logits/rejected": 252.6331329345703, + "logps/chosen": -1.4378384351730347, + "logps/rejected": -1.2257274389266968, + "loss": 1.5102, + "nll_loss": 1.7158218622207642, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06880791485309601, - "rewards/margins": -0.006175657268613577, - "rewards/rejected": -0.062632255256176, + "rewards/chosen": -0.07189192622900009, + "rewards/margins": -0.010605551302433014, + "rewards/rejected": -0.06128637120127678, "step": 1185 }, { "epoch": 0.9451945988880064, - "grad_norm": 52.927608489990234, - "learning_rate": 4.359881620659534e-08, - "log_odds_chosen": 0.6782919764518738, - "log_odds_ratio": -0.6247768998146057, - "logits/chosen": 436.1754455566406, - "logits/rejected": 285.18365478515625, - "logps/chosen": -0.985084056854248, - "logps/rejected": -1.5693267583847046, - "loss": 1.4816, - "nll_loss": 1.398045301437378, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04925420135259628, - "rewards/margins": 0.029212135821580887, - "rewards/rejected": -0.07846634089946747, + "grad_norm": 49.096778869628906, + "learning_rate": 1.4494275891311214e-06, + "log_odds_chosen": 0.510400652885437, + "log_odds_ratio": -0.6953208446502686, + "logits/chosen": 432.3097229003906, + "logits/rejected": 280.7305603027344, + "logps/chosen": -0.9621549844741821, + "logps/rejected": -1.430006980895996, + "loss": 1.4778, + "nll_loss": 1.3935554027557373, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.048107750713825226, + "rewards/margins": 0.023392602801322937, + "rewards/rejected": -0.07150034606456757, "step": 1190 }, { "epoch": 0.949166004765687, - "grad_norm": 48.66614532470703, - "learning_rate": 3.7524368362057415e-08, - "log_odds_chosen": -0.1631685197353363, - "log_odds_ratio": -0.8233796954154968, - "logits/chosen": 418.45928955078125, - "logits/rejected": 318.4512939453125, - "logps/chosen": -0.8457492589950562, - "logps/rejected": -0.7526192665100098, - "loss": 1.4522, - "nll_loss": 1.3443362712860107, + "grad_norm": 54.93104934692383, + "learning_rate": 1.4463921353509293e-06, + "log_odds_chosen": -0.25363707542419434, + "log_odds_ratio": -0.8674055933952332, + "logits/chosen": 415.92730712890625, + "logits/rejected": 317.29217529296875, + "logps/chosen": -0.9652946591377258, + "logps/rejected": -0.8204643130302429, + "loss": 1.4354, + "nll_loss": 1.3288614749908447, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04228746145963669, - "rewards/margins": -0.004656502045691013, - "rewards/rejected": -0.03763096407055855, + "rewards/chosen": -0.04826473444700241, + "rewards/margins": -0.0072415173053741455, + "rewards/rejected": -0.041023217141628265, "step": 1195 }, { "epoch": 0.9531374106433678, - "grad_norm": 29.535236358642578, - "learning_rate": 3.1902232586185635e-08, - "log_odds_chosen": 0.14746162295341492, - "log_odds_ratio": -0.7185263633728027, - "logits/chosen": 346.12823486328125, - "logits/rejected": 365.977783203125, - "logps/chosen": -0.9943090677261353, - "logps/rejected": -0.9647199511528015, - "loss": 1.5476, - "nll_loss": 1.3550399541854858, + "grad_norm": 35.22028732299805, + "learning_rate": 1.4433756729740647e-06, + "log_odds_chosen": 0.19383028149604797, + "log_odds_ratio": -0.7156062126159668, + "logits/chosen": 338.9249572753906, + "logits/rejected": 362.58660888671875, + "logps/chosen": -0.9961981773376465, + "logps/rejected": -0.9824131727218628, + "loss": 1.541, + "nll_loss": 1.3671401739120483, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04971545934677124, - "rewards/margins": -0.001479454687796533, - "rewards/rejected": -0.048236001282930374, + "rewards/chosen": -0.04980991408228874, + "rewards/margins": -0.0006892535602673888, + "rewards/rejected": -0.04912065714597702, "step": 1200 }, { "epoch": 0.9571088165210484, - "grad_norm": 46.18953323364258, - "learning_rate": 2.673344156325558e-08, - "log_odds_chosen": 0.25677961111068726, - "log_odds_ratio": -0.6748029589653015, - "logits/chosen": 321.4141845703125, - "logits/rejected": 390.1539611816406, - "logps/chosen": -1.306235432624817, - "logps/rejected": -1.5235410928726196, - "loss": 1.4511, - "nll_loss": 1.348838448524475, + "grad_norm": 43.74067687988281, + "learning_rate": 1.4403780047891936e-06, + "log_odds_chosen": 0.19641388952732086, + "log_odds_ratio": -0.7219318151473999, + "logits/chosen": 317.5765380859375, + "logits/rejected": 387.16510009765625, + "logps/chosen": -1.3108140230178833, + "logps/rejected": -1.4882935285568237, + "loss": 1.4724, + "nll_loss": 1.3518784046173096, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06531177461147308, - "rewards/margins": 0.010865284129977226, - "rewards/rejected": -0.07617706060409546, + "rewards/chosen": -0.06554069370031357, + "rewards/margins": 0.00887397862970829, + "rewards/rejected": -0.0744146779179573, "step": 1205 }, { "epoch": 0.9610802223987291, - "grad_norm": 48.14284133911133, - "learning_rate": 2.2018944706341528e-08, - "log_odds_chosen": 0.25043779611587524, - "log_odds_ratio": -0.6027860045433044, - "logits/chosen": 383.79913330078125, - "logits/rejected": 336.7133483886719, - "logps/chosen": -1.0423628091812134, - "logps/rejected": -1.1916873455047607, - "loss": 1.4619, - "nll_loss": 1.6256072521209717, + "grad_norm": 53.19133377075195, + "learning_rate": 1.4373989364401727e-06, + "log_odds_chosen": 0.1009591817855835, + "log_odds_ratio": -0.6597286462783813, + "logits/chosen": 381.9156188964844, + "logits/rejected": 333.6917419433594, + "logps/chosen": -1.0816733837127686, + "logps/rejected": -1.1360995769500732, + "loss": 1.4638, + "nll_loss": 1.632345199584961, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05211814120411873, - "rewards/margins": 0.007466231472790241, - "rewards/rejected": -0.059584371745586395, + "rewards/chosen": -0.05408366769552231, + "rewards/margins": 0.0027213036082684994, + "rewards/rejected": -0.056804973632097244, "step": 1210 }, { "epoch": 0.9650516282764099, - "grad_norm": 32.60062026977539, - "learning_rate": 1.775960798292731e-08, - "log_odds_chosen": -0.04052457585930824, - "log_odds_ratio": -0.8082249760627747, - "logits/chosen": 331.26397705078125, - "logits/rejected": 329.55096435546875, - "logps/chosen": -1.347312092781067, - "logps/rejected": -1.3537461757659912, - "loss": 1.5079, - "nll_loss": 1.5286957025527954, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.0673656016588211, - "rewards/margins": 0.00032170413760468364, - "rewards/rejected": -0.06768731772899628, + "grad_norm": 38.759151458740234, + "learning_rate": 1.4344382763731173e-06, + "log_odds_chosen": 0.26151102781295776, + "log_odds_ratio": -0.6654404401779175, + "logits/chosen": 328.0746154785156, + "logits/rejected": 326.65576171875, + "logps/chosen": -1.2324529886245728, + "logps/rejected": -1.4607326984405518, + "loss": 1.4824, + "nll_loss": 1.464372992515564, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06162264943122864, + "rewards/margins": 0.011413980275392532, + "rewards/rejected": -0.07303663343191147, "step": 1215 }, { "epoch": 0.9690230341540905, - "grad_norm": 43.27484893798828, - "learning_rate": 1.3956213755842718e-08, - "log_odds_chosen": -0.26221469044685364, - "log_odds_ratio": -0.9925212860107422, - "logits/chosen": 304.05120849609375, - "logits/rejected": 393.05517578125, - "logps/chosen": -1.015187382698059, - "logps/rejected": -0.9859923124313354, - "loss": 1.3977, - "nll_loss": 1.1624058485031128, + "grad_norm": 61.07746505737305, + "learning_rate": 1.4314958357846706e-06, + "log_odds_chosen": -0.1794472187757492, + "log_odds_ratio": -0.9448292851448059, + "logits/chosen": 302.45697021484375, + "logits/rejected": 391.7511291503906, + "logps/chosen": -0.9841415286064148, + "logps/rejected": -1.0038130283355713, + "loss": 1.3866, + "nll_loss": 1.1302523612976074, "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.050759367644786835, - "rewards/margins": -0.0014597497647628188, - "rewards/rejected": -0.04929962009191513, + "rewards/chosen": -0.04920707270503044, + "rewards/margins": 0.0009835765231400728, + "rewards/rejected": -0.050190649926662445, "step": 1220 }, { "epoch": 0.9729944400317713, - "grad_norm": 43.98046875, - "learning_rate": 1.0609460639559033e-08, - "log_odds_chosen": 0.6014237999916077, - "log_odds_ratio": -0.5421421527862549, - "logits/chosen": 339.303955078125, - "logits/rejected": 391.6441345214844, - "logps/chosen": -1.0844001770019531, - "logps/rejected": -1.4398688077926636, - "loss": 1.3258, - "nll_loss": 1.4467300176620483, + "grad_norm": 50.63767623901367, + "learning_rate": 1.4285714285714286e-06, + "log_odds_chosen": 0.9508854150772095, + "log_odds_ratio": -0.4959738254547119, + "logits/chosen": 335.69036865234375, + "logits/rejected": 390.5137939453125, + "logps/chosen": -0.9460613131523132, + "logps/rejected": -1.4656463861465454, + "loss": 1.2784, + "nll_loss": 1.3087149858474731, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.054220009595155716, - "rewards/margins": 0.017773432657122612, - "rewards/rejected": -0.07199344038963318, + "rewards/chosen": -0.04730306938290596, + "rewards/margins": 0.02597925066947937, + "rewards/rejected": -0.07328232377767563, "step": 1225 }, { "epoch": 0.9769658459094519, - "grad_norm": 34.32419967651367, - "learning_rate": 7.719963371865259e-09, - "log_odds_chosen": 0.7910041809082031, - "log_odds_ratio": -0.4584100842475891, - "logits/chosen": 316.9669494628906, - "logits/rejected": 363.3733825683594, - "logps/chosen": -0.8123126029968262, - "logps/rejected": -1.2943851947784424, - "loss": 1.3404, - "nll_loss": 1.313084363937378, + "grad_norm": 45.654056549072266, + "learning_rate": 1.4256648712805027e-06, + "log_odds_chosen": 0.6766214966773987, + "log_odds_ratio": -0.48730725049972534, + "logits/chosen": 313.19122314453125, + "logits/rejected": 360.2751770019531, + "logps/chosen": -0.8694770932197571, + "logps/rejected": -1.3065763711929321, + "loss": 1.3305, + "nll_loss": 1.3452584743499756, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04061562940478325, - "rewards/margins": 0.02410362847149372, - "rewards/rejected": -0.06471925973892212, + "rewards/chosen": -0.04347385838627815, + "rewards/margins": 0.021854963153600693, + "rewards/rejected": -0.06532882153987885, "step": 1230 }, { "epoch": 0.9809372517871326, - "grad_norm": 42.33149337768555, - "learning_rate": 5.288252700952068e-09, - "log_odds_chosen": 0.8038623929023743, - "log_odds_ratio": -0.43806830048561096, - "logits/chosen": 310.6473083496094, - "logits/rejected": 314.3547668457031, - "logps/chosen": -0.8453804850578308, - "logps/rejected": -1.4276442527770996, - "loss": 1.4819, - "nll_loss": 1.6422332525253296, + "grad_norm": 47.46096420288086, + "learning_rate": 1.4227759830611807e-06, + "log_odds_chosen": 0.700452446937561, + "log_odds_ratio": -0.49210184812545776, + "logits/chosen": 304.34393310546875, + "logits/rejected": 310.6593322753906, + "logps/chosen": -0.8900222778320312, + "logps/rejected": -1.4213621616363525, + "loss": 1.4565, + "nll_loss": 1.5947277545928955, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0422690249979496, - "rewards/margins": 0.02911318838596344, - "rewards/rejected": -0.07138221710920334, + "rewards/chosen": -0.04450111836194992, + "rewards/margins": 0.026566997170448303, + "rewards/rejected": -0.07106811553239822, "step": 1235 }, { "epoch": 0.9849086576648134, - "grad_norm": 45.44175720214844, - "learning_rate": 3.3147752879236773e-09, - "log_odds_chosen": 0.02175927720963955, - "log_odds_ratio": -0.8097355961799622, - "logits/chosen": 277.007080078125, - "logits/rejected": 415.473388671875, - "logps/chosen": -1.0552597045898438, - "logps/rejected": -1.2169201374053955, - "loss": 1.5311, - "nll_loss": 1.5463203191757202, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05276298522949219, - "rewards/margins": 0.008083020336925983, - "rewards/rejected": -0.060846008360385895, + "grad_norm": 46.09358596801758, + "learning_rate": 1.419904585617662e-06, + "log_odds_chosen": 0.14936234056949615, + "log_odds_ratio": -0.8327441215515137, + "logits/chosen": 274.401611328125, + "logits/rejected": 412.91912841796875, + "logps/chosen": -0.9217895269393921, + "logps/rejected": -1.1136987209320068, + "loss": 1.5226, + "nll_loss": 1.5541056394577026, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.046089477837085724, + "rewards/margins": 0.00959546584635973, + "rewards/rejected": -0.05568494275212288, "step": 1240 }, { "epoch": 0.988880063542494, - "grad_norm": 62.612850189208984, - "learning_rate": 1.7998936247534681e-09, - "log_odds_chosen": 0.2898419499397278, - "log_odds_ratio": -0.5893855094909668, - "logits/chosen": 399.6653747558594, - "logits/rejected": 274.2057800292969, - "logps/chosen": -1.0792999267578125, - "logps/rejected": -1.2781049013137817, - "loss": 1.4564, - "nll_loss": 1.5806140899658203, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053964994847774506, - "rewards/margins": 0.009940249845385551, - "rewards/rejected": -0.0639052465558052, + "grad_norm": 71.01630401611328, + "learning_rate": 1.4170505031628396e-06, + "log_odds_chosen": 0.36680126190185547, + "log_odds_ratio": -0.5545364022254944, + "logits/chosen": 397.23516845703125, + "logits/rejected": 272.55535888671875, + "logps/chosen": -0.9994925260543823, + "logps/rejected": -1.2435686588287354, + "loss": 1.4343, + "nll_loss": 1.5012677907943726, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.049974631518125534, + "rewards/margins": 0.012203807011246681, + "rewards/rejected": -0.062178440392017365, "step": 1245 }, { "epoch": 0.9928514694201748, - "grad_norm": 51.08999252319336, - "learning_rate": 7.438859677008636e-10, - "log_odds_chosen": 0.09502691775560379, - "log_odds_ratio": -0.7220104336738586, - "logits/chosen": 325.39263916015625, - "logits/rejected": 361.7767639160156, - "logps/chosen": -1.1207284927368164, - "logps/rejected": -1.173607587814331, - "loss": 1.3621, - "nll_loss": 1.241156816482544, + "grad_norm": 77.71598815917969, + "learning_rate": 1.4142135623730952e-06, + "log_odds_chosen": 0.08250565826892853, + "log_odds_ratio": -0.732119083404541, + "logits/chosen": 322.84356689453125, + "logits/rejected": 360.3878479003906, + "logps/chosen": -1.1035608053207397, + "logps/rejected": -1.1463263034820557, + "loss": 1.3562, + "nll_loss": 1.2327629327774048, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05603642389178276, - "rewards/margins": 0.0026439554058015347, - "rewards/rejected": -0.05868038535118103, + "rewards/chosen": -0.05517803877592087, + "rewards/margins": 0.002138280076906085, + "rewards/rejected": -0.05731632187962532, "step": 1250 }, { "epoch": 0.9968228752978554, - "grad_norm": 44.36956787109375, - "learning_rate": 1.4694628620137708e-10, - "log_odds_chosen": -0.33857935667037964, - "log_odds_ratio": -0.9029590487480164, - "logits/chosen": 293.49420166015625, - "logits/rejected": 431.3218688964844, - "logps/chosen": -0.9575881958007812, - "logps/rejected": -0.7752519845962524, - "loss": 1.3423, - "nll_loss": 1.1781036853790283, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.0478794127702713, - "rewards/margins": -0.009116815403103828, - "rewards/rejected": -0.038762595504522324, + "grad_norm": 57.77963638305664, + "learning_rate": 1.4113935923440917e-06, + "log_odds_chosen": -0.49407824873924255, + "log_odds_ratio": -0.9747999906539917, + "logits/chosen": 290.9209289550781, + "logits/rejected": 428.91351318359375, + "logps/chosen": -0.9552696347236633, + "logps/rejected": -0.7045097947120667, + "loss": 1.3309, + "nll_loss": 1.1892468929290771, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.047763485461473465, + "rewards/margins": -0.012537995353341103, + "rewards/rejected": -0.03522548824548721, "step": 1255 }, { "epoch": 1.0, - "eval_log_odds_chosen": 0.23976314067840576, - "eval_log_odds_ratio": -0.6928443908691406, - "eval_logits/chosen": 340.5321350097656, - "eval_logits/rejected": 312.9670104980469, - "eval_logps/chosen": -1.0267834663391113, - "eval_logps/rejected": -1.1786600351333618, - "eval_loss": 1.455647587776184, - "eval_nll_loss": 1.4095592498779297, - "eval_rewards/accuracies": 0.5107913613319397, - "eval_rewards/chosen": -0.051339175552129745, - "eval_rewards/margins": 0.0075938161462545395, - "eval_rewards/rejected": -0.058932989835739136, - "eval_runtime": 91.5981, - "eval_samples_per_second": 6.037, + "eval_log_odds_chosen": 0.21191194653511047, + "eval_log_odds_ratio": -0.7034205794334412, + "eval_logits/chosen": 338.2715148925781, + "eval_logits/rejected": 310.9833068847656, + "eval_logps/chosen": -1.0253794193267822, + "eval_logps/rejected": -1.1665838956832886, + "eval_loss": 1.4432373046875, + "eval_nll_loss": 1.3963584899902344, + "eval_rewards/accuracies": 0.5467625856399536, + "eval_rewards/chosen": -0.05126897618174553, + "eval_rewards/margins": 0.0070602260529994965, + "eval_rewards/rejected": -0.05832919850945473, + "eval_runtime": 91.6529, + "eval_samples_per_second": 6.034, "eval_steps_per_second": 1.517, "step": 1259 }, { - "epoch": 1.0, - "step": 1259, + "epoch": 1.0007942811755361, + "grad_norm": 29.619354248046875, + "learning_rate": 1.4085904245475275e-06, + "log_odds_chosen": 0.2918682098388672, + "log_odds_ratio": -0.6367956399917603, + "logits/chosen": 296.0645446777344, + "logits/rejected": 305.7909240722656, + "logps/chosen": -0.9510093927383423, + "logps/rejected": -1.154069185256958, + "loss": 1.35, + "nll_loss": 1.288127064704895, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.047550469636917114, + "rewards/margins": 0.01015299092978239, + "rewards/rejected": -0.05770345777273178, + "step": 1260 + }, + { + "epoch": 1.004765687053217, + "grad_norm": 52.63589096069336, + "learning_rate": 1.4058038927888332e-06, + "log_odds_chosen": 0.4959385395050049, + "log_odds_ratio": -0.5091910362243652, + "logits/chosen": 398.83843994140625, + "logits/rejected": 301.77056884765625, + "logps/chosen": -0.6213952898979187, + "logps/rejected": -0.8220237493515015, + "loss": 0.8874, + "nll_loss": 0.9351065754890442, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.031069766730070114, + "rewards/margins": 0.010031421668827534, + "rewards/rejected": -0.041101183742284775, + "step": 1265 + }, + { + "epoch": 1.0087370929308976, + "grad_norm": 39.637977600097656, + "learning_rate": 1.4030338331657844e-06, + "log_odds_chosen": 0.8783077001571655, + "log_odds_ratio": -0.42215338349342346, + "logits/chosen": 334.1138610839844, + "logits/rejected": 312.70989990234375, + "logps/chosen": -0.8557085990905762, + "logps/rejected": -1.4451814889907837, + "loss": 0.8788, + "nll_loss": 0.9550254940986633, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04278543218970299, + "rewards/margins": 0.02947363629937172, + "rewards/rejected": -0.07225906848907471, + "step": 1270 + }, + { + "epoch": 1.0127084988085782, + "grad_norm": 45.631141662597656, + "learning_rate": 1.4002800840280098e-06, + "log_odds_chosen": 1.9242538213729858, + "log_odds_ratio": -0.280457466840744, + "logits/chosen": 319.7841796875, + "logits/rejected": 331.0938720703125, + "logps/chosen": -0.5420134663581848, + "logps/rejected": -1.7290103435516357, + "loss": 0.8474, + "nll_loss": 0.8860380053520203, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02710067108273506, + "rewards/margins": 0.05934985354542732, + "rewards/rejected": -0.08645053207874298, + "step": 1275 + }, + { + "epoch": 1.016679904686259, + "grad_norm": 41.359683990478516, + "learning_rate": 1.3975424859373688e-06, + "log_odds_chosen": 1.5988181829452515, + "log_odds_ratio": -0.31896865367889404, + "logits/chosen": 303.55609130859375, + "logits/rejected": 302.9502868652344, + "logps/chosen": -0.434316486120224, + "logps/rejected": -0.9644277691841125, + "loss": 0.817, + "nll_loss": 0.7315559387207031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02171582356095314, + "rewards/margins": 0.026505568996071815, + "rewards/rejected": -0.048221390694379807, + "step": 1280 + }, + { + "epoch": 1.0206513105639397, + "grad_norm": 35.43171310424805, + "learning_rate": 1.3948208816291767e-06, + "log_odds_chosen": 2.031580924987793, + "log_odds_ratio": -0.16610851883888245, + "logits/chosen": 268.5763244628906, + "logits/rejected": 361.4361267089844, + "logps/chosen": -0.48307856917381287, + "logps/rejected": -1.6509729623794556, + "loss": 0.918, + "nll_loss": 1.000573754310608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024153929203748703, + "rewards/margins": 0.058394718915224075, + "rewards/rejected": -0.08254864811897278, + "step": 1285 + }, + { + "epoch": 1.0246227164416204, + "grad_norm": 49.6970329284668, + "learning_rate": 1.3921151159742616e-06, + "log_odds_chosen": 1.449532151222229, + "log_odds_ratio": -0.3011077642440796, + "logits/chosen": 330.57379150390625, + "logits/rejected": 305.499755859375, + "logps/chosen": -0.5765695571899414, + "logps/rejected": -1.4029136896133423, + "loss": 0.7548, + "nll_loss": 0.7175348997116089, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02882847748696804, + "rewards/margins": 0.041317202150821686, + "rewards/rejected": -0.07014568150043488, + "step": 1290 + }, + { + "epoch": 1.0285941223193011, + "grad_norm": 32.22804260253906, + "learning_rate": 1.3894250359418213e-06, + "log_odds_chosen": 1.8826097249984741, + "log_odds_ratio": -0.1858014166355133, + "logits/chosen": 321.48516845703125, + "logits/rejected": 292.4135437011719, + "logps/chosen": -0.42812156677246094, + "logps/rejected": -1.511461615562439, + "loss": 0.7512, + "nll_loss": 0.7446305155754089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021406078711152077, + "rewards/margins": 0.0541670098900795, + "rewards/rejected": -0.07557308673858643, + "step": 1295 + }, + { + "epoch": 1.0325655281969817, + "grad_norm": 36.08200454711914, + "learning_rate": 1.386750490563073e-06, + "log_odds_chosen": 1.3494882583618164, + "log_odds_ratio": -0.29181593656539917, + "logits/chosen": 383.32464599609375, + "logits/rejected": 247.87405395507812, + "logps/chosen": -0.38351649045944214, + "logps/rejected": -0.8719679713249207, + "loss": 0.8151, + "nll_loss": 0.8919464945793152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019175823777914047, + "rewards/margins": 0.024422574788331985, + "rewards/rejected": -0.04359840229153633, + "step": 1300 + }, + { + "epoch": 1.0365369340746624, + "grad_norm": 53.69875717163086, + "learning_rate": 1.3840913308956663e-06, + "log_odds_chosen": 4.168577194213867, + "log_odds_ratio": -0.15235312283039093, + "logits/chosen": 233.92977905273438, + "logits/rejected": 265.3076171875, + "logps/chosen": -0.34961751103401184, + "logps/rejected": -2.9050133228302, + "loss": 0.764, + "nll_loss": 0.7820371985435486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01748087629675865, + "rewards/margins": 0.12776978313922882, + "rewards/rejected": -0.14525064826011658, + "step": 1305 + }, + { + "epoch": 1.0405083399523432, + "grad_norm": 22.12669563293457, + "learning_rate": 1.3814474099888442e-06, + "log_odds_chosen": 2.3678359985351562, + "log_odds_ratio": -0.18794922530651093, + "logits/chosen": 366.886474609375, + "logits/rejected": 262.33270263671875, + "logps/chosen": -0.4483531415462494, + "logps/rejected": -1.4391227960586548, + "loss": 0.8066, + "nll_loss": 0.7956131100654602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02241765707731247, + "rewards/margins": 0.04953848570585251, + "rewards/rejected": -0.07195614278316498, + "step": 1310 + }, + { + "epoch": 1.044479745830024, + "grad_norm": 33.546234130859375, + "learning_rate": 1.3788185828493344e-06, + "log_odds_chosen": 2.3227920532226562, + "log_odds_ratio": -0.12674236297607422, + "logits/chosen": 265.67144775390625, + "logits/rejected": 258.4465637207031, + "logps/chosen": -0.553837239742279, + "logps/rejected": -1.858119249343872, + "loss": 0.8738, + "nll_loss": 0.9288773536682129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027691861614584923, + "rewards/margins": 0.06521410495042801, + "rewards/rejected": -0.09290595352649689, + "step": 1315 + }, + { + "epoch": 1.0484511517077044, + "grad_norm": 29.37790298461914, + "learning_rate": 1.376204706407951e-06, + "log_odds_chosen": 1.2011265754699707, + "log_odds_ratio": -0.3517860472202301, + "logits/chosen": 263.21441650390625, + "logits/rejected": 300.9775085449219, + "logps/chosen": -0.7342194318771362, + "logps/rejected": -1.4895966053009033, + "loss": 0.8622, + "nll_loss": 0.894271731376648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03671097382903099, + "rewards/margins": 0.03776886314153671, + "rewards/rejected": -0.0744798332452774, + "step": 1320 + }, + { + "epoch": 1.0524225575853852, + "grad_norm": 43.21280288696289, + "learning_rate": 1.3736056394868905e-06, + "log_odds_chosen": 1.9679548740386963, + "log_odds_ratio": -0.14373886585235596, + "logits/chosen": 258.61346435546875, + "logits/rejected": 458.6058044433594, + "logps/chosen": -0.5523756742477417, + "logps/rejected": -1.7980626821517944, + "loss": 0.6717, + "nll_loss": 0.6116268634796143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027618780732154846, + "rewards/margins": 0.062284357845783234, + "rewards/rejected": -0.08990313112735748, + "step": 1325 + }, + { + "epoch": 1.056393963463066, + "grad_norm": 22.586336135864258, + "learning_rate": 1.3710212427677044e-06, + "log_odds_chosen": 2.0330963134765625, + "log_odds_ratio": -0.23092810809612274, + "logits/chosen": 466.9287109375, + "logits/rejected": 251.6785430908203, + "logps/chosen": -0.4764311909675598, + "logps/rejected": -1.5419471263885498, + "loss": 0.9319, + "nll_loss": 0.8202090263366699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02382155880331993, + "rewards/margins": 0.05327579379081726, + "rewards/rejected": -0.07709735631942749, + "step": 1330 + }, + { + "epoch": 1.0603653693407467, + "grad_norm": 24.074445724487305, + "learning_rate": 1.3684513787599335e-06, + "log_odds_chosen": 1.8354690074920654, + "log_odds_ratio": -0.24452456831932068, + "logits/chosen": 311.4925537109375, + "logits/rejected": 325.05133056640625, + "logps/chosen": -0.4112465977668762, + "logps/rejected": -1.3696399927139282, + "loss": 0.7552, + "nll_loss": 0.7564548254013062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02056233026087284, + "rewards/margins": 0.04791966825723648, + "rewards/rejected": -0.06848199665546417, + "step": 1335 + }, + { + "epoch": 1.0643367752184274, + "grad_norm": 36.35709762573242, + "learning_rate": 1.3658959117703826e-06, + "log_odds_chosen": 1.767221212387085, + "log_odds_ratio": -0.24831262230873108, + "logits/chosen": 301.0987243652344, + "logits/rejected": 408.99468994140625, + "logps/chosen": -0.37535279989242554, + "logps/rejected": -1.3626410961151123, + "loss": 0.823, + "nll_loss": 0.6886339783668518, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.018767639994621277, + "rewards/margins": 0.04936441406607628, + "rewards/rejected": -0.06813205778598785, + "step": 1340 + }, + { + "epoch": 1.068308181096108, + "grad_norm": 68.75084686279297, + "learning_rate": 1.3633547078730297e-06, + "log_odds_chosen": 2.1947848796844482, + "log_odds_ratio": -0.12946011126041412, + "logits/chosen": 387.3055114746094, + "logits/rejected": 310.6745300292969, + "logps/chosen": -0.3506600260734558, + "logps/rejected": -1.50771164894104, + "loss": 0.7823, + "nll_loss": 0.8046265840530396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01753300055861473, + "rewards/margins": 0.05785257741808891, + "rewards/rejected": -0.07538558542728424, + "step": 1345 + }, + { + "epoch": 1.0722795869737887, + "grad_norm": 38.153602600097656, + "learning_rate": 1.3608276348795436e-06, + "log_odds_chosen": 2.0112361907958984, + "log_odds_ratio": -0.2819617688655853, + "logits/chosen": 268.81610107421875, + "logits/rejected": 327.9646911621094, + "logps/chosen": -0.4526129364967346, + "logps/rejected": -1.295741319656372, + "loss": 0.7329, + "nll_loss": 0.6641907691955566, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02263064496219158, + "rewards/margins": 0.04215642064809799, + "rewards/rejected": -0.06478706747293472, + "step": 1350 + }, + { + "epoch": 1.0762509928514694, + "grad_norm": 32.85861587524414, + "learning_rate": 1.3583145623104033e-06, + "log_odds_chosen": 1.1966564655303955, + "log_odds_ratio": -0.3293878138065338, + "logits/chosen": 249.0363311767578, + "logits/rejected": 405.04937744140625, + "logps/chosen": -0.6460349559783936, + "logps/rejected": -1.4099568128585815, + "loss": 0.8066, + "nll_loss": 0.7530792951583862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03230174630880356, + "rewards/margins": 0.03819609433412552, + "rewards/rejected": -0.07049783319234848, + "step": 1355 + }, + { + "epoch": 1.0802223987291502, + "grad_norm": 35.103965759277344, + "learning_rate": 1.355815361366601e-06, + "log_odds_chosen": 1.2699463367462158, + "log_odds_ratio": -0.31867465376853943, + "logits/chosen": 364.13507080078125, + "logits/rejected": 267.37847900390625, + "logps/chosen": -0.6191684007644653, + "logps/rejected": -1.3752472400665283, + "loss": 0.6163, + "nll_loss": 0.6952677965164185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030958417803049088, + "rewards/margins": 0.03780394047498703, + "rewards/rejected": -0.06876234710216522, + "step": 1360 + }, + { + "epoch": 1.084193804606831, + "grad_norm": 30.19273567199707, + "learning_rate": 1.353329904901917e-06, + "log_odds_chosen": 1.201957106590271, + "log_odds_ratio": -0.3093491494655609, + "logits/chosen": 413.76995849609375, + "logits/rejected": 276.65838623046875, + "logps/chosen": -0.4616571068763733, + "logps/rejected": -1.0381189584732056, + "loss": 0.9525, + "nll_loss": 0.9139319658279419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023082856088876724, + "rewards/margins": 0.028823088854551315, + "rewards/rejected": -0.05190594866871834, + "step": 1365 + }, + { + "epoch": 1.0881652104845114, + "grad_norm": 24.173643112182617, + "learning_rate": 1.350858067395748e-06, + "log_odds_chosen": 2.342852830886841, + "log_odds_ratio": -0.15998277068138123, + "logits/chosen": 344.3056335449219, + "logits/rejected": 283.6306457519531, + "logps/chosen": -0.49249267578125, + "logps/rejected": -2.0865066051483154, + "loss": 0.8197, + "nll_loss": 1.0569822788238525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02462463639676571, + "rewards/margins": 0.07970069348812103, + "rewards/rejected": -0.10432533174753189, + "step": 1370 + }, + { + "epoch": 1.0921366163621922, + "grad_norm": 32.355491638183594, + "learning_rate": 1.3483997249264844e-06, + "log_odds_chosen": 1.5590509176254272, + "log_odds_ratio": -0.2487892359495163, + "logits/chosen": 316.20159912109375, + "logits/rejected": 294.17279052734375, + "logps/chosen": -0.5574954748153687, + "logps/rejected": -1.4713170528411865, + "loss": 0.8676, + "nll_loss": 0.797051191329956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027874771505594254, + "rewards/margins": 0.04569108411669731, + "rewards/rejected": -0.07356585562229156, + "step": 1375 + }, + { + "epoch": 1.096108022239873, + "grad_norm": 29.341623306274414, + "learning_rate": 1.345954755145414e-06, + "log_odds_chosen": 2.3310647010803223, + "log_odds_ratio": -0.11525207757949829, + "logits/chosen": 308.07275390625, + "logits/rejected": 301.0416259765625, + "logps/chosen": -0.6554034352302551, + "logps/rejected": -2.0731258392333984, + "loss": 0.9803, + "nll_loss": 1.159543514251709, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032770175486803055, + "rewards/margins": 0.07088612020015717, + "rewards/rejected": -0.10365629196166992, + "step": 1380 + }, + { + "epoch": 1.1000794281175537, + "grad_norm": 41.638519287109375, + "learning_rate": 1.3435230372511476e-06, + "log_odds_chosen": 1.702256441116333, + "log_odds_ratio": -0.24031396210193634, + "logits/chosen": 422.57574462890625, + "logits/rejected": 285.2515563964844, + "logps/chosen": -0.39290952682495117, + "logps/rejected": -1.281812071800232, + "loss": 0.6728, + "nll_loss": 0.5043781995773315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01964547671377659, + "rewards/margins": 0.04444512724876404, + "rewards/rejected": -0.06409060209989548, + "step": 1385 + }, + { + "epoch": 1.1040508339952344, + "grad_norm": 27.213613510131836, + "learning_rate": 1.3411044519645502e-06, + "log_odds_chosen": 2.5730555057525635, + "log_odds_ratio": -0.0975516214966774, + "logits/chosen": 320.9614562988281, + "logits/rejected": 307.90142822265625, + "logps/chosen": -0.4752315878868103, + "logps/rejected": -2.1970314979553223, + "loss": 0.8552, + "nll_loss": 0.7889829874038696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023761581629514694, + "rewards/margins": 0.08608999848365784, + "rewards/rejected": -0.10985157638788223, + "step": 1390 + }, + { + "epoch": 1.108022239872915, + "grad_norm": 36.346641540527344, + "learning_rate": 1.3386988815041649e-06, + "log_odds_chosen": 2.5185952186584473, + "log_odds_ratio": -0.08284667134284973, + "logits/chosen": 272.705078125, + "logits/rejected": 315.05780029296875, + "logps/chosen": -0.4241599440574646, + "logps/rejected": -2.0232601165771484, + "loss": 0.8018, + "nll_loss": 0.7871303558349609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02120799943804741, + "rewards/margins": 0.07995501160621643, + "rewards/rejected": -0.10116300731897354, + "step": 1395 + }, + { + "epoch": 1.1119936457505957, + "grad_norm": 48.55631637573242, + "learning_rate": 1.3363062095621222e-06, + "log_odds_chosen": 1.3166413307189941, + "log_odds_ratio": -0.2837154269218445, + "logits/chosen": 345.50860595703125, + "logits/rejected": 293.98748779296875, + "logps/chosen": -0.6463770866394043, + "logps/rejected": -1.450761318206787, + "loss": 0.9813, + "nll_loss": 0.8488261103630066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032318856567144394, + "rewards/margins": 0.04021921008825302, + "rewards/rejected": -0.07253806293010712, + "step": 1400 + }, + { + "epoch": 1.1159650516282764, + "grad_norm": 24.538806915283203, + "learning_rate": 1.3339263212805207e-06, + "log_odds_chosen": 1.0076740980148315, + "log_odds_ratio": -0.4034115672111511, + "logits/chosen": 276.04364013671875, + "logits/rejected": 311.43414306640625, + "logps/chosen": -0.606917142868042, + "logps/rejected": -1.1501991748809814, + "loss": 0.8539, + "nll_loss": 0.8009790182113647, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0303458571434021, + "rewards/margins": 0.02716410532593727, + "rewards/rejected": -0.05750995874404907, + "step": 1405 + }, + { + "epoch": 1.1199364575059572, + "grad_norm": 38.5094108581543, + "learning_rate": 1.3315591032282687e-06, + "log_odds_chosen": 1.1049978733062744, + "log_odds_ratio": -0.3262965977191925, + "logits/chosen": 312.492919921875, + "logits/rejected": 300.9084167480469, + "logps/chosen": -0.4392150044441223, + "logps/rejected": -0.9970752596855164, + "loss": 0.8508, + "nll_loss": 0.6344660520553589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021960750222206116, + "rewards/margins": 0.027893012389540672, + "rewards/rejected": -0.04985376447439194, + "step": 1410 + }, + { + "epoch": 1.123907863383638, + "grad_norm": 28.918603897094727, + "learning_rate": 1.3292044433783766e-06, + "log_odds_chosen": 1.1254879236221313, + "log_odds_ratio": -0.3050915598869324, + "logits/chosen": 357.88006591796875, + "logits/rejected": 243.5061492919922, + "logps/chosen": -0.5345466732978821, + "logps/rejected": -1.1529901027679443, + "loss": 0.8655, + "nll_loss": 0.7837560772895813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026727333664894104, + "rewards/margins": 0.030922168865799904, + "rewards/rejected": -0.05764950439333916, + "step": 1415 + }, + { + "epoch": 1.1278792692613184, + "grad_norm": 27.14815330505371, + "learning_rate": 1.3268622310856882e-06, + "log_odds_chosen": 1.6597429513931274, + "log_odds_ratio": -0.24181696772575378, + "logits/chosen": 328.18450927734375, + "logits/rejected": 278.3966369628906, + "logps/chosen": -0.5262452363967896, + "logps/rejected": -1.241698980331421, + "loss": 0.8028, + "nll_loss": 0.7854543924331665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026312265545129776, + "rewards/margins": 0.03577268496155739, + "rewards/rejected": -0.06208494305610657, + "step": 1420 + }, + { + "epoch": 1.1318506751389992, + "grad_norm": 38.537261962890625, + "learning_rate": 1.324532357065044e-06, + "log_odds_chosen": 2.3596420288085938, + "log_odds_ratio": -0.10275080054998398, + "logits/chosen": 410.06903076171875, + "logits/rejected": 263.68353271484375, + "logps/chosen": -0.2327149659395218, + "logps/rejected": -1.2866109609603882, + "loss": 0.6069, + "nll_loss": 0.612027645111084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01163574866950512, + "rewards/margins": 0.05269480496644974, + "rewards/rejected": -0.0643305554986, + "step": 1425 + }, + { + "epoch": 1.13582208101668, + "grad_norm": 32.535560607910156, + "learning_rate": 1.3222147133698626e-06, + "log_odds_chosen": 0.9134140014648438, + "log_odds_ratio": -0.42213669419288635, + "logits/chosen": 327.38427734375, + "logits/rejected": 323.18572998046875, + "logps/chosen": -0.7350178360939026, + "logps/rejected": -1.153857707977295, + "loss": 0.8071, + "nll_loss": 0.8739555478096008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03675089031457901, + "rewards/margins": 0.020941998809576035, + "rewards/rejected": -0.057692885398864746, + "step": 1430 + }, + { + "epoch": 1.1397934868943607, + "grad_norm": 25.7900390625, + "learning_rate": 1.3199091933711366e-06, + "log_odds_chosen": 1.2775976657867432, + "log_odds_ratio": -0.38191455602645874, + "logits/chosen": 259.1048889160156, + "logits/rejected": 494.07733154296875, + "logps/chosen": -0.8194534182548523, + "logps/rejected": -1.699428915977478, + "loss": 1.0446, + "nll_loss": 0.9419466853141785, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.040972668677568436, + "rewards/margins": 0.043998777866363525, + "rewards/rejected": -0.08497145026922226, + "step": 1435 + }, + { + "epoch": 1.1437648927720412, + "grad_norm": 30.255916595458984, + "learning_rate": 1.3176156917368248e-06, + "log_odds_chosen": 1.8949663639068604, + "log_odds_ratio": -0.20502403378486633, + "logits/chosen": 262.95159912109375, + "logits/rejected": 348.63519287109375, + "logps/chosen": -0.4378899037837982, + "logps/rejected": -1.4804574251174927, + "loss": 0.8135, + "nll_loss": 0.5400761365890503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02189449779689312, + "rewards/margins": 0.05212836712598801, + "rewards/rejected": -0.07402287423610687, + "step": 1440 + }, + { + "epoch": 1.147736298649722, + "grad_norm": 59.08807373046875, + "learning_rate": 1.315334104411641e-06, + "log_odds_chosen": 1.4889800548553467, + "log_odds_ratio": -0.3159145712852478, + "logits/chosen": 288.60455322265625, + "logits/rejected": 278.4151306152344, + "logps/chosen": -0.516040563583374, + "logps/rejected": -1.5193082094192505, + "loss": 0.7279, + "nll_loss": 0.6749471426010132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02580202743411064, + "rewards/margins": 0.0501633882522583, + "rewards/rejected": -0.07596541941165924, + "step": 1445 + }, + { + "epoch": 1.1517077045274027, + "grad_norm": 33.056968688964844, + "learning_rate": 1.3130643285972255e-06, + "log_odds_chosen": 1.6463083028793335, + "log_odds_ratio": -0.2137158215045929, + "logits/chosen": 330.17108154296875, + "logits/rejected": 285.6957092285156, + "logps/chosen": -0.305833101272583, + "logps/rejected": -0.9774319529533386, + "loss": 0.9029, + "nll_loss": 1.1484712362289429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01529165543615818, + "rewards/margins": 0.03357994556427002, + "rewards/rejected": -0.04887159913778305, + "step": 1450 + }, + { + "epoch": 1.1556791104050834, + "grad_norm": 26.98676872253418, + "learning_rate": 1.310806262732691e-06, + "log_odds_chosen": 1.2410787343978882, + "log_odds_ratio": -0.45242589712142944, + "logits/chosen": 357.4566955566406, + "logits/rejected": 286.59906005859375, + "logps/chosen": -0.4584870934486389, + "logps/rejected": -1.169476866722107, + "loss": 0.7066, + "nll_loss": 0.6666980981826782, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.022924354299902916, + "rewards/margins": 0.03554948791861534, + "rewards/rejected": -0.058473847806453705, + "step": 1455 + }, + { + "epoch": 1.1596505162827642, + "grad_norm": 25.189367294311523, + "learning_rate": 1.3085598064755342e-06, + "log_odds_chosen": 2.0372042655944824, + "log_odds_ratio": -0.2485310137271881, + "logits/chosen": 343.59710693359375, + "logits/rejected": 282.86785888671875, + "logps/chosen": -0.40503281354904175, + "logps/rejected": -1.3477602005004883, + "loss": 0.7799, + "nll_loss": 0.6682797074317932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020251641049981117, + "rewards/margins": 0.04713636636734009, + "rewards/rejected": -0.06738801300525665, + "step": 1460 + }, + { + "epoch": 1.163621922160445, + "grad_norm": 49.8707160949707, + "learning_rate": 1.3063248606829104e-06, + "log_odds_chosen": 3.3834662437438965, + "log_odds_ratio": -0.06317798793315887, + "logits/chosen": 316.927978515625, + "logits/rejected": 278.1805419921875, + "logps/chosen": -0.21554477512836456, + "logps/rejected": -1.945351004600525, + "loss": 0.9273, + "nll_loss": 0.9023275375366211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010777238756418228, + "rewards/margins": 0.08649031817913055, + "rewards/rejected": -0.09726755321025848, + "step": 1465 + }, + { + "epoch": 1.1675933280381254, + "grad_norm": 38.0960807800293, + "learning_rate": 1.3041013273932528e-06, + "log_odds_chosen": 1.1775424480438232, + "log_odds_ratio": -0.2878747582435608, + "logits/chosen": 349.3218688964844, + "logits/rejected": 271.8228759765625, + "logps/chosen": -0.45183151960372925, + "logps/rejected": -0.9361956715583801, + "loss": 0.777, + "nll_loss": 0.9663525819778442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022591574117541313, + "rewards/margins": 0.024218203499913216, + "rewards/rejected": -0.04680977761745453, + "step": 1470 + }, + { + "epoch": 1.1715647339158062, + "grad_norm": 36.659915924072266, + "learning_rate": 1.301889109808239e-06, + "log_odds_chosen": 2.377092123031616, + "log_odds_ratio": -0.11894341558218002, + "logits/chosen": 331.7478942871094, + "logits/rejected": 236.0290985107422, + "logps/chosen": -0.2474537193775177, + "logps/rejected": -1.3175318241119385, + "loss": 0.7939, + "nll_loss": 1.1325935125350952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012372685596346855, + "rewards/margins": 0.05350390076637268, + "rewards/rejected": -0.06587658822536469, + "step": 1475 + }, + { + "epoch": 1.175536139793487, + "grad_norm": 34.146446228027344, + "learning_rate": 1.299688112275091e-06, + "log_odds_chosen": 2.390129566192627, + "log_odds_ratio": -0.13596001267433167, + "logits/chosen": 322.39208984375, + "logits/rejected": 314.0200500488281, + "logps/chosen": -0.3084662854671478, + "logps/rejected": -1.475559949874878, + "loss": 0.8092, + "nll_loss": 0.9278243780136108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015423314645886421, + "rewards/margins": 0.0583546943962574, + "rewards/rejected": -0.07377800345420837, + "step": 1480 + }, + { + "epoch": 1.1795075456711677, + "grad_norm": 27.04242706298828, + "learning_rate": 1.2974982402692051e-06, + "log_odds_chosen": 1.809501051902771, + "log_odds_ratio": -0.16602441668510437, + "logits/chosen": 296.98419189453125, + "logits/rejected": 283.3446350097656, + "logps/chosen": -0.40387874841690063, + "logps/rejected": -1.3692766427993774, + "loss": 0.7862, + "nll_loss": 0.5465682148933411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02019393816590309, + "rewards/margins": 0.04826989397406578, + "rewards/rejected": -0.06846383959054947, + "step": 1485 + }, + { + "epoch": 1.1834789515488482, + "grad_norm": 28.75420379638672, + "learning_rate": 1.2953194003770995e-06, + "log_odds_chosen": 0.6536625623703003, + "log_odds_ratio": -0.6314162015914917, + "logits/chosen": 300.7778625488281, + "logits/rejected": 250.1493682861328, + "logps/chosen": -0.7219634056091309, + "logps/rejected": -1.1960614919662476, + "loss": 0.8307, + "nll_loss": 0.9051557779312134, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.036098167300224304, + "rewards/margins": 0.023704906925559044, + "rewards/rejected": -0.059803079813718796, + "step": 1490 + }, + { + "epoch": 1.187450357426529, + "grad_norm": 34.966590881347656, + "learning_rate": 1.2931515002796793e-06, + "log_odds_chosen": 2.052030086517334, + "log_odds_ratio": -0.19160158932209015, + "logits/chosen": 320.68011474609375, + "logits/rejected": 255.47189331054688, + "logps/chosen": -0.40166154503822327, + "logps/rejected": -1.3832619190216064, + "loss": 0.7472, + "nll_loss": 0.7854171991348267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020083077251911163, + "rewards/margins": 0.0490800216794014, + "rewards/rejected": -0.06916309893131256, + "step": 1495 + }, + { + "epoch": 1.1914217633042097, + "grad_norm": 23.868934631347656, + "learning_rate": 1.2909944487358056e-06, + "log_odds_chosen": 1.3916871547698975, + "log_odds_ratio": -0.513548731803894, + "logits/chosen": 264.5072326660156, + "logits/rejected": 357.7003173828125, + "logps/chosen": -0.6542052030563354, + "logps/rejected": -1.6498836278915405, + "loss": 0.8925, + "nll_loss": 0.763934314250946, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03271026164293289, + "rewards/margins": 0.04978392273187637, + "rewards/rejected": -0.08249418437480927, + "step": 1500 + }, + { + "epoch": 1.1953931691818904, + "grad_norm": 28.471527099609375, + "learning_rate": 1.2888481555661678e-06, + "log_odds_chosen": 1.3641421794891357, + "log_odds_ratio": -0.3730069696903229, + "logits/chosen": 452.96124267578125, + "logits/rejected": 295.983154296875, + "logps/chosen": -0.5330772995948792, + "logps/rejected": -1.2659406661987305, + "loss": 0.7427, + "nll_loss": 0.723406195640564, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.026653865352272987, + "rewards/margins": 0.036643169820308685, + "rewards/rejected": -0.06329703330993652, + "step": 1505 + }, + { + "epoch": 1.1993645750595712, + "grad_norm": 35.46685791015625, + "learning_rate": 1.286712531637447e-06, + "log_odds_chosen": 1.6957210302352905, + "log_odds_ratio": -0.2755866050720215, + "logits/chosen": 238.7939453125, + "logits/rejected": 365.7698059082031, + "logps/chosen": -0.5120083689689636, + "logps/rejected": -1.307417631149292, + "loss": 0.7483, + "nll_loss": 0.7828346490859985, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02560041844844818, + "rewards/margins": 0.0397704653441906, + "rewards/rejected": -0.06537088751792908, + "step": 1510 + }, + { + "epoch": 1.2033359809372517, + "grad_norm": 59.663177490234375, + "learning_rate": 1.2845874888467698e-06, + "log_odds_chosen": 3.4014511108398438, + "log_odds_ratio": -0.07314275205135345, + "logits/chosen": 462.5450134277344, + "logits/rejected": 248.7952117919922, + "logps/chosen": -0.19772595167160034, + "logps/rejected": -2.0527455806732178, + "loss": 0.8414, + "nll_loss": 0.6311348676681519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009886298328638077, + "rewards/margins": 0.09275098145008087, + "rewards/rejected": -0.10263729095458984, + "step": 1515 + }, + { + "epoch": 1.2073073868149324, + "grad_norm": 52.45457077026367, + "learning_rate": 1.282472940106443e-06, + "log_odds_chosen": 1.8471969366073608, + "log_odds_ratio": -0.34033486247062683, + "logits/chosen": 359.6947937011719, + "logits/rejected": 281.2830810546875, + "logps/chosen": -0.503508448600769, + "logps/rejected": -1.4968483448028564, + "loss": 0.9057, + "nll_loss": 0.9135835766792297, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.025175422430038452, + "rewards/margins": 0.04966699704527855, + "rewards/rejected": -0.0748424157500267, + "step": 1520 + }, + { + "epoch": 1.2112787926926132, + "grad_norm": 47.01854705810547, + "learning_rate": 1.28036879932896e-06, + "log_odds_chosen": 1.5671476125717163, + "log_odds_ratio": -0.2781728208065033, + "logits/chosen": 259.2211608886719, + "logits/rejected": 273.3630065917969, + "logps/chosen": -0.612583339214325, + "logps/rejected": -1.3503682613372803, + "loss": 0.8654, + "nll_loss": 1.0714460611343384, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030629169195890427, + "rewards/margins": 0.03688924387097359, + "rewards/rejected": -0.06751841306686401, + "step": 1525 + }, + { + "epoch": 1.215250198570294, + "grad_norm": 27.87078094482422, + "learning_rate": 1.278274981412284e-06, + "log_odds_chosen": 1.4453420639038086, + "log_odds_ratio": -0.2786465585231781, + "logits/chosen": 302.4069519042969, + "logits/rejected": 312.5154113769531, + "logps/chosen": -0.3448793292045593, + "logps/rejected": -1.0912384986877441, + "loss": 0.7583, + "nll_loss": 0.6790813207626343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017243966460227966, + "rewards/margins": 0.03731795400381088, + "rewards/rejected": -0.05456192418932915, + "step": 1530 + }, + { + "epoch": 1.2192216044479747, + "grad_norm": 69.64622497558594, + "learning_rate": 1.2761914022253899e-06, + "log_odds_chosen": 1.012123703956604, + "log_odds_ratio": -0.41925907135009766, + "logits/chosen": 343.95050048828125, + "logits/rejected": 260.3720703125, + "logps/chosen": -0.5246344208717346, + "logps/rejected": -1.1103763580322266, + "loss": 0.723, + "nll_loss": 0.7898105382919312, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.02623172104358673, + "rewards/margins": 0.029287094250321388, + "rewards/rejected": -0.05551881715655327, + "step": 1535 + }, + { + "epoch": 1.2231930103256552, + "grad_norm": 23.33427619934082, + "learning_rate": 1.2741179785940638e-06, + "log_odds_chosen": 1.9270483255386353, + "log_odds_ratio": -0.16388998925685883, + "logits/chosen": 343.7916259765625, + "logits/rejected": 238.6454315185547, + "logps/chosen": -0.3710968494415283, + "logps/rejected": -1.407012701034546, + "loss": 0.6983, + "nll_loss": 0.45011359453201294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018554842099547386, + "rewards/margins": 0.051795799285173416, + "rewards/rejected": -0.07035063207149506, + "step": 1540 + }, + { + "epoch": 1.227164416203336, + "grad_norm": 26.319948196411133, + "learning_rate": 1.2720546282869612e-06, + "log_odds_chosen": 2.203964948654175, + "log_odds_ratio": -0.11018653959035873, + "logits/chosen": 414.2149353027344, + "logits/rejected": 317.75274658203125, + "logps/chosen": -0.2872838079929352, + "logps/rejected": -1.3737704753875732, + "loss": 0.7333, + "nll_loss": 0.5472729802131653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014364190399646759, + "rewards/margins": 0.05432434007525444, + "rewards/rejected": -0.0686885267496109, + "step": 1545 + }, + { + "epoch": 1.2311358220810167, + "grad_norm": 37.496543884277344, + "learning_rate": 1.270001270001905e-06, + "log_odds_chosen": 1.640875220298767, + "log_odds_ratio": -0.21115879714488983, + "logits/chosen": 268.22589111328125, + "logits/rejected": 362.2005310058594, + "logps/chosen": -0.3301395773887634, + "logps/rejected": -1.0716874599456787, + "loss": 0.826, + "nll_loss": 0.8113988637924194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0165069792419672, + "rewards/margins": 0.037077389657497406, + "rewards/rejected": -0.053584374487400055, + "step": 1550 + }, + { + "epoch": 1.2351072279586974, + "grad_norm": 35.05588912963867, + "learning_rate": 1.2679578233524345e-06, + "log_odds_chosen": 1.1360805034637451, + "log_odds_ratio": -0.3080361485481262, + "logits/chosen": 295.03118896484375, + "logits/rejected": 284.44036865234375, + "logps/chosen": -0.5859525203704834, + "logps/rejected": -1.241891860961914, + "loss": 0.872, + "nll_loss": 0.9364341497421265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02929762564599514, + "rewards/margins": 0.03279696777462959, + "rewards/rejected": -0.06209459900856018, + "step": 1555 + }, + { + "epoch": 1.2390786338363782, + "grad_norm": 31.318214416503906, + "learning_rate": 1.2659242088545834e-06, + "log_odds_chosen": 0.8039556741714478, + "log_odds_ratio": -0.46843037009239197, + "logits/chosen": 237.40576171875, + "logits/rejected": 364.1980895996094, + "logps/chosen": -0.7670692801475525, + "logps/rejected": -1.2011711597442627, + "loss": 0.8833, + "nll_loss": 1.014811635017395, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.038353465497493744, + "rewards/margins": 0.021705087274312973, + "rewards/rejected": -0.060058556497097015, + "step": 1560 + }, + { + "epoch": 1.2430500397140587, + "grad_norm": 29.747224807739258, + "learning_rate": 1.2639003479138966e-06, + "log_odds_chosen": 1.1694443225860596, + "log_odds_ratio": -0.32304924726486206, + "logits/chosen": 270.96624755859375, + "logits/rejected": 265.6252746582031, + "logps/chosen": -0.5855724811553955, + "logps/rejected": -1.1885181665420532, + "loss": 0.8378, + "nll_loss": 0.8840498924255371, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.029278624802827835, + "rewards/margins": 0.030147280544042587, + "rewards/rejected": -0.05942590907216072, + "step": 1565 + }, + { + "epoch": 1.2470214455917394, + "grad_norm": 34.243370056152344, + "learning_rate": 1.261886162812672e-06, + "log_odds_chosen": 0.6181503534317017, + "log_odds_ratio": -0.5344885587692261, + "logits/chosen": 244.2056884765625, + "logits/rejected": 316.17681884765625, + "logps/chosen": -1.0504838228225708, + "logps/rejected": -1.4136704206466675, + "loss": 0.826, + "nll_loss": 0.9619119763374329, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05252418667078018, + "rewards/margins": 0.018159329891204834, + "rewards/rejected": -0.07068352401256561, + "step": 1570 + }, + { + "epoch": 1.2509928514694202, + "grad_norm": 35.988731384277344, + "learning_rate": 1.259881576697424e-06, + "log_odds_chosen": 2.3048176765441895, + "log_odds_ratio": -0.19754526019096375, + "logits/chosen": 287.938232421875, + "logits/rejected": 292.93804931640625, + "logps/chosen": -0.5019484758377075, + "logps/rejected": -2.027036428451538, + "loss": 0.7158, + "nll_loss": 0.6203959584236145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025097424164414406, + "rewards/margins": 0.07625441253185272, + "rewards/rejected": -0.10135183483362198, + "step": 1575 + }, + { + "epoch": 1.254964257347101, + "grad_norm": 23.905803680419922, + "learning_rate": 1.257886513566569e-06, + "log_odds_chosen": 1.3767532110214233, + "log_odds_ratio": -0.24111056327819824, + "logits/chosen": 289.76873779296875, + "logits/rejected": 291.0559997558594, + "logps/chosen": -0.44200173020362854, + "logps/rejected": -1.0347238779067993, + "loss": 0.6936, + "nll_loss": 0.5990425944328308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022100087255239487, + "rewards/margins": 0.029636109247803688, + "rewards/rejected": -0.05173618718981743, + "step": 1580 + }, + { + "epoch": 1.2589356632247815, + "grad_norm": 40.032470703125, + "learning_rate": 1.255900898258321e-06, + "log_odds_chosen": 2.5157618522644043, + "log_odds_ratio": -0.21077406406402588, + "logits/chosen": 385.93560791015625, + "logits/rejected": 246.05679321289062, + "logps/chosen": -0.43296509981155396, + "logps/rejected": -1.9274288415908813, + "loss": 0.7974, + "nll_loss": 0.832492470741272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021648254245519638, + "rewards/margins": 0.07472319900989532, + "rewards/rejected": -0.09637145698070526, + "step": 1585 + }, + { + "epoch": 1.2629070691024622, + "grad_norm": 27.383275985717773, + "learning_rate": 1.253924656438798e-06, + "log_odds_chosen": 0.7217229008674622, + "log_odds_ratio": -0.443845272064209, + "logits/chosen": 387.6333923339844, + "logits/rejected": 236.80252075195312, + "logps/chosen": -0.6088986396789551, + "logps/rejected": -0.9363006353378296, + "loss": 0.7629, + "nll_loss": 0.7160018682479858, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030444931238889694, + "rewards/margins": 0.016370099037885666, + "rewards/rejected": -0.04681503027677536, + "step": 1590 + }, + { + "epoch": 1.266878474980143, + "grad_norm": 27.825769424438477, + "learning_rate": 1.2519577145903362e-06, + "log_odds_chosen": 2.075462818145752, + "log_odds_ratio": -0.13059628009796143, + "logits/chosen": 313.5865173339844, + "logits/rejected": 298.07293701171875, + "logps/chosen": -0.3759838938713074, + "logps/rejected": -1.504678726196289, + "loss": 0.6985, + "nll_loss": 0.556339681148529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0187991950660944, + "rewards/margins": 0.056434739381074905, + "rewards/rejected": -0.07523393630981445, + "step": 1595 + }, + { + "epoch": 1.2708498808578237, + "grad_norm": 36.40824508666992, + "learning_rate": 1.25e-06, + "log_odds_chosen": 1.3817975521087646, + "log_odds_ratio": -0.32284778356552124, + "logits/chosen": 378.4946594238281, + "logits/rejected": 315.1073913574219, + "logps/chosen": -0.4628058969974518, + "logps/rejected": -1.0874122381210327, + "loss": 0.7269, + "nll_loss": 0.6962202191352844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02314029261469841, + "rewards/margins": 0.031230319291353226, + "rewards/rejected": -0.054370611906051636, + "step": 1600 + }, + { + "epoch": 1.2748212867355044, + "grad_norm": 51.04415512084961, + "learning_rate": 1.2480514407482947e-06, + "log_odds_chosen": 2.1562647819519043, + "log_odds_ratio": -0.11893711239099503, + "logits/chosen": 224.6322479248047, + "logits/rejected": 447.8099670410156, + "logps/chosen": -0.3827270567417145, + "logps/rejected": -1.6206636428833008, + "loss": 0.7221, + "nll_loss": 0.48888301849365234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019136350601911545, + "rewards/margins": 0.06189683824777603, + "rewards/rejected": -0.08103318512439728, + "step": 1605 + }, + { + "epoch": 1.2787926926131852, + "grad_norm": 56.877532958984375, + "learning_rate": 1.246111965698067e-06, + "log_odds_chosen": 1.901529312133789, + "log_odds_ratio": -0.22505538165569305, + "logits/chosen": 287.8641052246094, + "logits/rejected": 321.10369873046875, + "logps/chosen": -0.4749757647514343, + "logps/rejected": -1.3261306285858154, + "loss": 0.9818, + "nll_loss": 1.1012128591537476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023748790845274925, + "rewards/margins": 0.042557746171951294, + "rewards/rejected": -0.06630653887987137, + "step": 1610 + }, + { + "epoch": 1.2827640984908657, + "grad_norm": 41.132686614990234, + "learning_rate": 1.244181504483599e-06, + "log_odds_chosen": 1.9316246509552002, + "log_odds_ratio": -0.1521489918231964, + "logits/chosen": 272.6374816894531, + "logits/rejected": 258.6047668457031, + "logps/chosen": -0.5842836499214172, + "logps/rejected": -1.6665958166122437, + "loss": 0.7013, + "nll_loss": 0.7633215188980103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02921418286859989, + "rewards/margins": 0.05411560460925102, + "rewards/rejected": -0.08332978934049606, + "step": 1615 + }, + { + "epoch": 1.2867355043685464, + "grad_norm": 35.068145751953125, + "learning_rate": 1.2422599874998834e-06, + "log_odds_chosen": 2.0793488025665283, + "log_odds_ratio": -0.27748990058898926, + "logits/chosen": 249.419189453125, + "logits/rejected": 439.7445373535156, + "logps/chosen": -0.4987415373325348, + "logps/rejected": -1.9686177968978882, + "loss": 0.7522, + "nll_loss": 0.5755370259284973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02493707649409771, + "rewards/margins": 0.07349381595849991, + "rewards/rejected": -0.09843089431524277, + "step": 1620 + }, + { + "epoch": 1.2907069102462272, + "grad_norm": 58.41203308105469, + "learning_rate": 1.2403473458920848e-06, + "log_odds_chosen": 2.0760154724121094, + "log_odds_ratio": -0.13080164790153503, + "logits/chosen": 346.79364013671875, + "logits/rejected": 283.41265869140625, + "logps/chosen": -0.5347332954406738, + "logps/rejected": -1.8155488967895508, + "loss": 0.8326, + "nll_loss": 0.8670746088027954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02673666551709175, + "rewards/margins": 0.06404078006744385, + "rewards/rejected": -0.0907774418592453, + "step": 1625 + }, + { + "epoch": 1.294678316123908, + "grad_norm": 26.609634399414062, + "learning_rate": 1.238443511545175e-06, + "log_odds_chosen": 0.9718191027641296, + "log_odds_ratio": -0.4202337861061096, + "logits/chosen": 334.9565124511719, + "logits/rejected": 276.83258056640625, + "logps/chosen": -0.6160775423049927, + "logps/rejected": -1.0907124280929565, + "loss": 0.842, + "nll_loss": 0.9310005903244019, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030803877860307693, + "rewards/margins": 0.023731743916869164, + "rewards/rejected": -0.05453561991453171, + "step": 1630 + }, + { + "epoch": 1.2986497220015885, + "grad_norm": 27.735910415649414, + "learning_rate": 1.236548417073745e-06, + "log_odds_chosen": 1.4771227836608887, + "log_odds_ratio": -0.31675320863723755, + "logits/chosen": 366.62310791015625, + "logits/rejected": 267.44036865234375, + "logps/chosen": -0.6183649301528931, + "logps/rejected": -1.462820291519165, + "loss": 0.8923, + "nll_loss": 0.8503854870796204, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030918246135115623, + "rewards/margins": 0.042222760617733, + "rewards/rejected": -0.07314101606607437, + "step": 1635 + }, + { + "epoch": 1.3026211278792692, + "grad_norm": 42.74282455444336, + "learning_rate": 1.2346619958119873e-06, + "log_odds_chosen": 2.29691743850708, + "log_odds_ratio": -0.1986047625541687, + "logits/chosen": 323.72161865234375, + "logits/rejected": 377.61419677734375, + "logps/chosen": -0.38441476225852966, + "logps/rejected": -1.7476539611816406, + "loss": 0.9019, + "nll_loss": 0.8927809000015259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019220737740397453, + "rewards/margins": 0.0681619718670845, + "rewards/rejected": -0.0873827114701271, + "step": 1640 + }, + { + "epoch": 1.30659253375695, + "grad_norm": 26.081533432006836, + "learning_rate": 1.2327841818038448e-06, + "log_odds_chosen": 1.103318691253662, + "log_odds_ratio": -0.394954115152359, + "logits/chosen": 357.2593994140625, + "logits/rejected": 316.22930908203125, + "logps/chosen": -0.7667959928512573, + "logps/rejected": -1.315467119216919, + "loss": 0.7768, + "nll_loss": 1.0027587413787842, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.038339801132678986, + "rewards/margins": 0.02743355929851532, + "rewards/rejected": -0.0657733678817749, + "step": 1645 + }, + { + "epoch": 1.3105639396346307, + "grad_norm": 29.404010772705078, + "learning_rate": 1.2309149097933274e-06, + "log_odds_chosen": 2.014024257659912, + "log_odds_ratio": -0.2092103213071823, + "logits/chosen": 333.6972961425781, + "logits/rejected": 280.7713928222656, + "logps/chosen": -0.4117712378501892, + "logps/rejected": -1.3921597003936768, + "loss": 0.712, + "nll_loss": 0.6148799061775208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02058856189250946, + "rewards/margins": 0.04901942238211632, + "rewards/rejected": -0.06960798799991608, + "step": 1650 + }, + { + "epoch": 1.3145353455123114, + "grad_norm": 30.204944610595703, + "learning_rate": 1.2290541152149845e-06, + "log_odds_chosen": 1.8049323558807373, + "log_odds_ratio": -0.21223409473896027, + "logits/chosen": 283.7622985839844, + "logits/rejected": 278.0716552734375, + "logps/chosen": -0.4594515860080719, + "logps/rejected": -1.5247669219970703, + "loss": 0.947, + "nll_loss": 0.7514600157737732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022972578182816505, + "rewards/margins": 0.0532657615840435, + "rewards/rejected": -0.07623834908008575, + "step": 1655 + }, + { + "epoch": 1.3185067513899922, + "grad_norm": 114.78897094726562, + "learning_rate": 1.2272017341845401e-06, + "log_odds_chosen": 2.2300524711608887, + "log_odds_ratio": -0.18504497408866882, + "logits/chosen": 371.3212890625, + "logits/rejected": 351.45355224609375, + "logps/chosen": -0.3518930673599243, + "logps/rejected": -1.3690972328186035, + "loss": 0.7413, + "nll_loss": 0.7462723851203918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017594654113054276, + "rewards/margins": 0.0508602038025856, + "rewards/rejected": -0.06845486164093018, + "step": 1660 + }, + { + "epoch": 1.3224781572676727, + "grad_norm": 30.150894165039062, + "learning_rate": 1.2253577034896796e-06, + "log_odds_chosen": 1.8445310592651367, + "log_odds_ratio": -0.18787363171577454, + "logits/chosen": 372.4892578125, + "logits/rejected": 307.2557373046875, + "logps/chosen": -0.4438856542110443, + "logps/rejected": -1.3671103715896606, + "loss": 0.7718, + "nll_loss": 0.7103620171546936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022194284945726395, + "rewards/margins": 0.0461612343788147, + "rewards/rejected": -0.06835552304983139, + "step": 1665 + }, + { + "epoch": 1.3264495631453534, + "grad_norm": 56.493675231933594, + "learning_rate": 1.223521960580991e-06, + "log_odds_chosen": 0.36352911591529846, + "log_odds_ratio": -0.5619034767150879, + "logits/chosen": 250.56436157226562, + "logits/rejected": 326.8438415527344, + "logps/chosen": -0.9630180597305298, + "logps/rejected": -1.1797587871551514, + "loss": 0.8786, + "nll_loss": 0.9603252410888672, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04815090075135231, + "rewards/margins": 0.010837038978934288, + "rewards/rejected": -0.05898793786764145, + "step": 1670 + }, + { + "epoch": 1.3304209690230342, + "grad_norm": 46.662139892578125, + "learning_rate": 1.2216944435630524e-06, + "log_odds_chosen": 1.6360156536102295, + "log_odds_ratio": -0.20836324989795685, + "logits/chosen": 360.5296936035156, + "logits/rejected": 287.63836669921875, + "logps/chosen": -0.5066941976547241, + "logps/rejected": -1.322191596031189, + "loss": 0.7144, + "nll_loss": 0.834607720375061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025334710255265236, + "rewards/margins": 0.0407748706638813, + "rewards/rejected": -0.06610958278179169, + "step": 1675 + }, + { + "epoch": 1.3343923749007147, + "grad_norm": 38.70802307128906, + "learning_rate": 1.2198750911856664e-06, + "log_odds_chosen": 1.0125881433486938, + "log_odds_ratio": -0.40528297424316406, + "logits/chosen": 255.6591339111328, + "logits/rejected": 316.5745849609375, + "logps/chosen": -0.6354233026504517, + "logps/rejected": -1.1144318580627441, + "loss": 0.8726, + "nll_loss": 0.612061083316803, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03177116811275482, + "rewards/margins": 0.023950431495904922, + "rewards/rejected": -0.055721599608659744, + "step": 1680 + }, + { + "epoch": 1.3383637807783955, + "grad_norm": 33.74903106689453, + "learning_rate": 1.2180638428352399e-06, + "log_odds_chosen": 1.447291374206543, + "log_odds_ratio": -0.2997308075428009, + "logits/chosen": 335.4109191894531, + "logits/rejected": 307.80523681640625, + "logps/chosen": -0.37111929059028625, + "logps/rejected": -0.817639172077179, + "loss": 0.7906, + "nll_loss": 0.7807439565658569, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.018555965274572372, + "rewards/margins": 0.022325992584228516, + "rewards/rejected": -0.04088195413351059, + "step": 1685 + }, + { + "epoch": 1.3423351866560762, + "grad_norm": 38.63343811035156, + "learning_rate": 1.2162606385262997e-06, + "log_odds_chosen": 2.291609525680542, + "log_odds_ratio": -0.19204507768154144, + "logits/chosen": 363.9112854003906, + "logits/rejected": 289.1708068847656, + "logps/chosen": -0.3431180715560913, + "logps/rejected": -1.5781381130218506, + "loss": 0.6895, + "nll_loss": 0.5135194659233093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017155904322862625, + "rewards/margins": 0.06175100803375244, + "rewards/rejected": -0.07890690863132477, + "step": 1690 + }, + { + "epoch": 1.346306592533757, + "grad_norm": 36.493934631347656, + "learning_rate": 1.2144654188931508e-06, + "log_odds_chosen": 1.6693967580795288, + "log_odds_ratio": -0.22054991126060486, + "logits/chosen": 329.10711669921875, + "logits/rejected": 326.18890380859375, + "logps/chosen": -0.4193757176399231, + "logps/rejected": -1.1420009136199951, + "loss": 0.8402, + "nll_loss": 0.5613064765930176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020968783646821976, + "rewards/margins": 0.03613125532865524, + "rewards/rejected": -0.057100046426057816, + "step": 1695 + }, + { + "epoch": 1.3502779984114377, + "grad_norm": 41.491329193115234, + "learning_rate": 1.2126781251816649e-06, + "log_odds_chosen": 2.1258764266967773, + "log_odds_ratio": -0.19279615581035614, + "logits/chosen": 275.3325500488281, + "logits/rejected": 364.13507080078125, + "logps/chosen": -0.4255383610725403, + "logps/rejected": -1.7605087757110596, + "loss": 0.7322, + "nll_loss": 0.6230169534683228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021276917308568954, + "rewards/margins": 0.06674851477146149, + "rewards/rejected": -0.08802543580532074, + "step": 1700 + }, + { + "epoch": 1.3542494042891184, + "grad_norm": 63.20461654663086, + "learning_rate": 1.210898699241207e-06, + "log_odds_chosen": 0.5076674818992615, + "log_odds_ratio": -0.5698251128196716, + "logits/chosen": 375.07818603515625, + "logits/rejected": 300.9750061035156, + "logps/chosen": -0.7717366218566895, + "logps/rejected": -1.0271308422088623, + "loss": 0.8147, + "nll_loss": 0.9089614748954773, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03858683258295059, + "rewards/margins": 0.012769708409905434, + "rewards/rejected": -0.051356542855501175, + "step": 1705 + }, + { + "epoch": 1.358220810166799, + "grad_norm": 36.315547943115234, + "learning_rate": 1.2091270835166862e-06, + "log_odds_chosen": 2.0527215003967285, + "log_odds_ratio": -0.14111559092998505, + "logits/chosen": 256.4891357421875, + "logits/rejected": 378.0007019042969, + "logps/chosen": -0.32579556107521057, + "logps/rejected": -1.1657521724700928, + "loss": 0.7747, + "nll_loss": 0.6338789463043213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01628977805376053, + "rewards/margins": 0.04199782758951187, + "rewards/rejected": -0.0582876093685627, + "step": 1710 + }, + { + "epoch": 1.3621922160444797, + "grad_norm": 23.591333389282227, + "learning_rate": 1.207363221040738e-06, + "log_odds_chosen": 1.1408276557922363, + "log_odds_ratio": -0.2928302586078644, + "logits/chosen": 303.75872802734375, + "logits/rejected": 315.8148193359375, + "logps/chosen": -0.4450489580631256, + "logps/rejected": -1.0119152069091797, + "loss": 0.7225, + "nll_loss": 0.642244279384613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02225244790315628, + "rewards/margins": 0.028343314304947853, + "rewards/rejected": -0.050595760345458984, + "step": 1715 + }, + { + "epoch": 1.3661636219221605, + "grad_norm": 36.07604217529297, + "learning_rate": 1.2056070554260305e-06, + "log_odds_chosen": 1.7148020267486572, + "log_odds_ratio": -0.27437013387680054, + "logits/chosen": 297.388671875, + "logits/rejected": 462.2918395996094, + "logps/chosen": -0.4170989394187927, + "logps/rejected": -1.1417804956436157, + "loss": 0.6754, + "nll_loss": 0.716262936592102, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.020854949951171875, + "rewards/margins": 0.03623408079147339, + "rewards/rejected": -0.057089030742645264, + "step": 1720 + }, + { + "epoch": 1.3701350277998412, + "grad_norm": 27.890535354614258, + "learning_rate": 1.2038585308576922e-06, + "log_odds_chosen": 2.661741018295288, + "log_odds_ratio": -0.2920236587524414, + "logits/chosen": 314.42279052734375, + "logits/rejected": 376.5033264160156, + "logps/chosen": -0.43217021226882935, + "logps/rejected": -1.5007961988449097, + "loss": 0.8006, + "nll_loss": 0.7613029479980469, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.021608510985970497, + "rewards/margins": 0.053431302309036255, + "rewards/rejected": -0.0750398188829422, + "step": 1725 + }, + { + "epoch": 1.3741064336775217, + "grad_norm": 27.169368743896484, + "learning_rate": 1.2021175920858626e-06, + "log_odds_chosen": 0.9390004873275757, + "log_odds_ratio": -0.35079219937324524, + "logits/chosen": 427.57708740234375, + "logits/rejected": 271.9527587890625, + "logps/chosen": -0.4680325388908386, + "logps/rejected": -0.8964841961860657, + "loss": 0.8189, + "nll_loss": 0.581561803817749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02340162731707096, + "rewards/margins": 0.021422583609819412, + "rewards/rejected": -0.044824205338954926, + "step": 1730 + }, + { + "epoch": 1.3780778395552025, + "grad_norm": 39.7526969909668, + "learning_rate": 1.200384184418359e-06, + "log_odds_chosen": 2.284052848815918, + "log_odds_ratio": -0.19260060787200928, + "logits/chosen": 235.9784393310547, + "logits/rejected": 304.092041015625, + "logps/chosen": -0.45018672943115234, + "logps/rejected": -2.012763738632202, + "loss": 0.8584, + "nll_loss": 0.9266872406005859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022509338334202766, + "rewards/margins": 0.07812884449958801, + "rewards/rejected": -0.10063817352056503, + "step": 1735 + }, + { + "epoch": 1.3820492454328832, + "grad_norm": 32.230445861816406, + "learning_rate": 1.1986582537134606e-06, + "log_odds_chosen": 3.3276565074920654, + "log_odds_ratio": -0.05102468281984329, + "logits/chosen": 455.22802734375, + "logits/rejected": 278.1533203125, + "logps/chosen": -0.15806782245635986, + "logps/rejected": -1.480958342552185, + "loss": 0.6712, + "nll_loss": 0.5678219199180603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007903391495347023, + "rewards/margins": 0.06614452600479126, + "rewards/rejected": -0.07404791563749313, + "step": 1740 + }, + { + "epoch": 1.386020651310564, + "grad_norm": 41.62521743774414, + "learning_rate": 1.1969397463728037e-06, + "log_odds_chosen": 1.6859906911849976, + "log_odds_ratio": -0.20657625794410706, + "logits/chosen": 239.4410400390625, + "logits/rejected": 359.784423828125, + "logps/chosen": -0.5606490969657898, + "logps/rejected": -1.5242068767547607, + "loss": 0.8003, + "nll_loss": 1.018355369567871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02803245559334755, + "rewards/margins": 0.04817789047956467, + "rewards/rejected": -0.07621034234762192, + "step": 1745 + }, + { + "epoch": 1.3899920571882447, + "grad_norm": 30.053016662597656, + "learning_rate": 1.1952286093343937e-06, + "log_odds_chosen": 1.3429896831512451, + "log_odds_ratio": -0.2858211100101471, + "logits/chosen": 309.0750732421875, + "logits/rejected": 321.745849609375, + "logps/chosen": -0.4044710099697113, + "logps/rejected": -1.078237771987915, + "loss": 0.7899, + "nll_loss": 0.8156072497367859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020223554223775864, + "rewards/margins": 0.033688344061374664, + "rewards/rejected": -0.05391188710927963, + "step": 1750 + }, + { + "epoch": 1.3939634630659254, + "grad_norm": 24.49996566772461, + "learning_rate": 1.1935247900657217e-06, + "log_odds_chosen": 1.5360620021820068, + "log_odds_ratio": -0.2454959601163864, + "logits/chosen": 301.2558288574219, + "logits/rejected": 246.26052856445312, + "logps/chosen": -0.3781280815601349, + "logps/rejected": -1.1283109188079834, + "loss": 0.7204, + "nll_loss": 0.7882005572319031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018906403332948685, + "rewards/margins": 0.03750915080308914, + "rewards/rejected": -0.05641555041074753, + "step": 1755 + }, + { + "epoch": 1.397934868943606, + "grad_norm": 45.932064056396484, + "learning_rate": 1.1918282365569903e-06, + "log_odds_chosen": 2.2376155853271484, + "log_odds_ratio": -0.11105986684560776, + "logits/chosen": 288.0708923339844, + "logits/rejected": 317.3603515625, + "logps/chosen": -0.40139836072921753, + "logps/rejected": -1.6960868835449219, + "loss": 0.8195, + "nll_loss": 0.7521315813064575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020069921389222145, + "rewards/margins": 0.06473441421985626, + "rewards/rejected": -0.08480434119701385, + "step": 1760 + }, + { + "epoch": 1.4019062748212867, + "grad_norm": 35.327667236328125, + "learning_rate": 1.1901388973144479e-06, + "log_odds_chosen": 2.4044885635375977, + "log_odds_ratio": -0.11451097577810287, + "logits/chosen": 357.29425048828125, + "logits/rejected": 296.12188720703125, + "logps/chosen": -0.3405108153820038, + "logps/rejected": -1.5622296333312988, + "loss": 0.7732, + "nll_loss": 0.6014407873153687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01702554151415825, + "rewards/margins": 0.06108593940734863, + "rewards/rejected": -0.07811148464679718, + "step": 1765 + }, + { + "epoch": 1.4058776806989675, + "grad_norm": 28.74220848083496, + "learning_rate": 1.1884567213538209e-06, + "log_odds_chosen": 0.05671717971563339, + "log_odds_ratio": -0.7521728277206421, + "logits/chosen": 275.2093505859375, + "logits/rejected": 291.18218994140625, + "logps/chosen": -0.7687788605690002, + "logps/rejected": -0.8631227612495422, + "loss": 0.9296, + "nll_loss": 1.1221811771392822, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03843894228339195, + "rewards/margins": 0.004717194940894842, + "rewards/rejected": -0.04315613582730293, + "step": 1770 + }, + { + "epoch": 1.4098490865766482, + "grad_norm": 25.72754669189453, + "learning_rate": 1.1867816581938534e-06, + "log_odds_chosen": 2.094773530960083, + "log_odds_ratio": -0.1832566112279892, + "logits/chosen": 295.9609680175781, + "logits/rejected": 386.21722412109375, + "logps/chosen": -0.5518186688423157, + "logps/rejected": -2.0147547721862793, + "loss": 0.6952, + "nll_loss": 0.7042763233184814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027590930461883545, + "rewards/margins": 0.07314680516719818, + "rewards/rejected": -0.10073773562908173, + "step": 1775 + }, + { + "epoch": 1.4138204924543287, + "grad_norm": 44.56334686279297, + "learning_rate": 1.1851136578499433e-06, + "log_odds_chosen": 2.180377960205078, + "log_odds_ratio": -0.1614275872707367, + "logits/chosen": 287.19976806640625, + "logits/rejected": 339.51861572265625, + "logps/chosen": -0.36367788910865784, + "logps/rejected": -1.620428442955017, + "loss": 0.7019, + "nll_loss": 0.5081968307495117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018183894455432892, + "rewards/margins": 0.06283754110336304, + "rewards/rejected": -0.08102142810821533, + "step": 1780 + }, + { + "epoch": 1.4177918983320095, + "grad_norm": 34.925498962402344, + "learning_rate": 1.1834526708278771e-06, + "log_odds_chosen": 1.0344644784927368, + "log_odds_ratio": -0.390333890914917, + "logits/chosen": 370.9827575683594, + "logits/rejected": 322.5367736816406, + "logps/chosen": -0.5955963134765625, + "logps/rejected": -1.1939870119094849, + "loss": 0.869, + "nll_loss": 0.8622109293937683, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.029779816046357155, + "rewards/margins": 0.02991952933371067, + "rewards/rejected": -0.059699345380067825, + "step": 1785 + }, + { + "epoch": 1.4217633042096902, + "grad_norm": 51.94044876098633, + "learning_rate": 1.181798648117664e-06, + "log_odds_chosen": 0.9601621627807617, + "log_odds_ratio": -0.3441396653652191, + "logits/chosen": 400.4881896972656, + "logits/rejected": 305.57904052734375, + "logps/chosen": -0.8277777433395386, + "logps/rejected": -1.3787654638290405, + "loss": 0.7763, + "nll_loss": 0.9883519411087036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041388895362615585, + "rewards/margins": 0.0275493822991848, + "rewards/rejected": -0.06893827766180038, + "step": 1790 + }, + { + "epoch": 1.425734710087371, + "grad_norm": 29.721099853515625, + "learning_rate": 1.1801515411874575e-06, + "log_odds_chosen": 1.9157155752182007, + "log_odds_ratio": -0.1623825579881668, + "logits/chosen": 279.03704833984375, + "logits/rejected": 351.99029541015625, + "logps/chosen": -0.49805086851119995, + "logps/rejected": -1.6073286533355713, + "loss": 0.8375, + "nll_loss": 0.6787853837013245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024902544915676117, + "rewards/margins": 0.05546388775110245, + "rewards/rejected": -0.08036642521619797, + "step": 1795 + }, + { + "epoch": 1.4297061159650517, + "grad_norm": 23.46649169921875, + "learning_rate": 1.1785113019775794e-06, + "log_odds_chosen": 2.264665126800537, + "log_odds_ratio": -0.17821480333805084, + "logits/chosen": 340.29827880859375, + "logits/rejected": 320.89910888671875, + "logps/chosen": -0.4604433476924896, + "logps/rejected": -1.8439369201660156, + "loss": 0.8162, + "nll_loss": 0.6925734281539917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02302216924726963, + "rewards/margins": 0.06917468458414078, + "rewards/rejected": -0.09219685196876526, + "step": 1800 + }, + { + "epoch": 1.4336775218427324, + "grad_norm": 44.617557525634766, + "learning_rate": 1.1768778828946262e-06, + "log_odds_chosen": 1.7701141834259033, + "log_odds_ratio": -0.17851954698562622, + "logits/chosen": 305.48785400390625, + "logits/rejected": 249.27084350585938, + "logps/chosen": -0.43837469816207886, + "logps/rejected": -1.4193745851516724, + "loss": 0.804, + "nll_loss": 1.0134713649749756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021918734535574913, + "rewards/margins": 0.049049995839595795, + "rewards/rejected": -0.07096873223781586, + "step": 1805 + }, + { + "epoch": 1.437648927720413, + "grad_norm": 22.284208297729492, + "learning_rate": 1.1752512368056712e-06, + "log_odds_chosen": 1.949928641319275, + "log_odds_ratio": -0.15419664978981018, + "logits/chosen": 312.5030212402344, + "logits/rejected": 369.13250732421875, + "logps/chosen": -0.5765711069107056, + "logps/rejected": -1.8438835144042969, + "loss": 0.8873, + "nll_loss": 1.0679264068603516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028828555718064308, + "rewards/margins": 0.0633656233549118, + "rewards/rejected": -0.09219418466091156, + "step": 1810 + }, + { + "epoch": 1.4416203335980937, + "grad_norm": 22.29758644104004, + "learning_rate": 1.1736313170325507e-06, + "log_odds_chosen": 1.8476063013076782, + "log_odds_ratio": -0.2637333869934082, + "logits/chosen": 279.7242431640625, + "logits/rejected": 426.8993225097656, + "logps/chosen": -0.5844321250915527, + "logps/rejected": -1.6296072006225586, + "loss": 0.7365, + "nll_loss": 0.7915916442871094, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.029221605509519577, + "rewards/margins": 0.052258752286434174, + "rewards/rejected": -0.08148036152124405, + "step": 1815 + }, + { + "epoch": 1.4455917394757745, + "grad_norm": 29.178213119506836, + "learning_rate": 1.1720180773462387e-06, + "log_odds_chosen": 1.9528881311416626, + "log_odds_ratio": -0.20737957954406738, + "logits/chosen": 260.05792236328125, + "logits/rejected": 275.297119140625, + "logps/chosen": -0.4214601516723633, + "logps/rejected": -1.6192858219146729, + "loss": 0.6882, + "nll_loss": 0.5797325372695923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021073007956147194, + "rewards/margins": 0.059891290962696075, + "rewards/rejected": -0.08096429705619812, + "step": 1820 + }, + { + "epoch": 1.449563145353455, + "grad_norm": 23.370649337768555, + "learning_rate": 1.1704114719613058e-06, + "log_odds_chosen": 1.8969475030899048, + "log_odds_ratio": -0.1610887199640274, + "logits/chosen": 316.1849670410156, + "logits/rejected": 326.1905212402344, + "logps/chosen": -0.42412129044532776, + "logps/rejected": -1.474806785583496, + "loss": 0.8086, + "nll_loss": 0.6038433313369751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021206064149737358, + "rewards/margins": 0.05253427475690842, + "rewards/rejected": -0.07374034821987152, + "step": 1825 + }, + { + "epoch": 1.4535345512311357, + "grad_norm": 25.256650924682617, + "learning_rate": 1.168811455530461e-06, + "log_odds_chosen": 2.2011282444000244, + "log_odds_ratio": -0.1458742320537567, + "logits/chosen": 270.08099365234375, + "logits/rejected": 414.66912841796875, + "logps/chosen": -0.1999184787273407, + "logps/rejected": -1.135138750076294, + "loss": 0.7276, + "nll_loss": 0.4324968755245209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009995924308896065, + "rewards/margins": 0.04676101356744766, + "rewards/rejected": -0.05675693601369858, + "step": 1830 + }, + { + "epoch": 1.4575059571088165, + "grad_norm": 40.1107063293457, + "learning_rate": 1.1672179831391772e-06, + "log_odds_chosen": 1.8065335750579834, + "log_odds_ratio": -0.23505112528800964, + "logits/chosen": 389.3009338378906, + "logits/rejected": 252.0340118408203, + "logps/chosen": -0.5170108079910278, + "logps/rejected": -1.5569610595703125, + "loss": 0.7184, + "nll_loss": 0.6483758687973022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02585054375231266, + "rewards/margins": 0.05199751257896423, + "rewards/rejected": -0.07784806191921234, + "step": 1835 + }, + { + "epoch": 1.4614773629864972, + "grad_norm": 26.60308074951172, + "learning_rate": 1.1656310103003923e-06, + "log_odds_chosen": 1.8615598678588867, + "log_odds_ratio": -0.19836989045143127, + "logits/chosen": 320.78143310546875, + "logits/rejected": 354.0133972167969, + "logps/chosen": -0.3542177081108093, + "logps/rejected": -1.2545313835144043, + "loss": 0.8251, + "nll_loss": 0.7097489237785339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017710886895656586, + "rewards/margins": 0.04501568153500557, + "rewards/rejected": -0.06272657215595245, + "step": 1840 + }, + { + "epoch": 1.465448768864178, + "grad_norm": 27.621578216552734, + "learning_rate": 1.164050492949297e-06, + "log_odds_chosen": 2.032970905303955, + "log_odds_ratio": -0.21160092949867249, + "logits/chosen": 288.5796813964844, + "logits/rejected": 268.0729064941406, + "logps/chosen": -0.45712965726852417, + "logps/rejected": -1.5615556240081787, + "loss": 0.8043, + "nll_loss": 0.8877753019332886, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02285648323595524, + "rewards/margins": 0.05522129684686661, + "rewards/rejected": -0.0780777782201767, + "step": 1845 + }, + { + "epoch": 1.4694201747418587, + "grad_norm": 30.004453659057617, + "learning_rate": 1.162476387438193e-06, + "log_odds_chosen": 1.4030696153640747, + "log_odds_ratio": -0.24303074181079865, + "logits/chosen": 263.7775573730469, + "logits/rejected": 297.43450927734375, + "logps/chosen": -0.6241577863693237, + "logps/rejected": -1.4998613595962524, + "loss": 0.8994, + "nll_loss": 0.9912241101264954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031207893043756485, + "rewards/margins": 0.04378517344594002, + "rewards/rejected": -0.0749930664896965, + "step": 1850 + }, + { + "epoch": 1.4733915806195395, + "grad_norm": 32.73456573486328, + "learning_rate": 1.1609086505314302e-06, + "log_odds_chosen": 1.436710000038147, + "log_odds_ratio": -0.24272426962852478, + "logits/chosen": 319.9948425292969, + "logits/rejected": 276.5702209472656, + "logps/chosen": -0.44764357805252075, + "logps/rejected": -1.2125766277313232, + "loss": 0.7988, + "nll_loss": 0.7930669188499451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022382179275155067, + "rewards/margins": 0.038246653974056244, + "rewards/rejected": -0.06062883138656616, + "step": 1855 + }, + { + "epoch": 1.47736298649722, + "grad_norm": 40.75159454345703, + "learning_rate": 1.1593472394004206e-06, + "log_odds_chosen": 1.0405197143554688, + "log_odds_ratio": -0.3699452579021454, + "logits/chosen": 292.70220947265625, + "logits/rejected": 364.3802795410156, + "logps/chosen": -0.41787219047546387, + "logps/rejected": -0.9609645009040833, + "loss": 0.7833, + "nll_loss": 0.5806514024734497, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.020893611013889313, + "rewards/margins": 0.02715461328625679, + "rewards/rejected": -0.0480482280254364, + "step": 1860 + }, + { + "epoch": 1.4813343923749007, + "grad_norm": 27.765846252441406, + "learning_rate": 1.1577921116187233e-06, + "log_odds_chosen": 2.0443687438964844, + "log_odds_ratio": -0.1834229975938797, + "logits/chosen": 239.04916381835938, + "logits/rejected": 375.8265686035156, + "logps/chosen": -0.6495457887649536, + "logps/rejected": -2.141505002975464, + "loss": 0.7322, + "nll_loss": 0.7409297227859497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03247729316353798, + "rewards/margins": 0.07459796965122223, + "rewards/rejected": -0.10707525908946991, + "step": 1865 + }, + { + "epoch": 1.4853057982525815, + "grad_norm": 33.22872543334961, + "learning_rate": 1.1562432251572007e-06, + "log_odds_chosen": 2.502471923828125, + "log_odds_ratio": -0.10238447040319443, + "logits/chosen": 380.58367919921875, + "logits/rejected": 238.6292266845703, + "logps/chosen": -0.3574802279472351, + "logps/rejected": -1.6461595296859741, + "loss": 0.9208, + "nll_loss": 0.9279494285583496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017874013632535934, + "rewards/margins": 0.06443396210670471, + "rewards/rejected": -0.08230797946453094, + "step": 1870 + }, + { + "epoch": 1.489277204130262, + "grad_norm": 33.23795700073242, + "learning_rate": 1.1547005383792514e-06, + "log_odds_chosen": 2.7648186683654785, + "log_odds_ratio": -0.06605343520641327, + "logits/chosen": 391.21295166015625, + "logits/rejected": 338.6253356933594, + "logps/chosen": -0.3001159727573395, + "logps/rejected": -1.8439795970916748, + "loss": 0.9542, + "nll_loss": 0.6177743673324585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015005799010396004, + "rewards/margins": 0.07719318568706512, + "rewards/rejected": -0.09219899028539658, + "step": 1875 + }, + { + "epoch": 1.4932486100079427, + "grad_norm": 31.490821838378906, + "learning_rate": 1.1531640100361064e-06, + "log_odds_chosen": 1.2999223470687866, + "log_odds_ratio": -0.2614326775074005, + "logits/chosen": 287.3116760253906, + "logits/rejected": 266.83551025390625, + "logps/chosen": -0.5728956460952759, + "logps/rejected": -1.3332241773605347, + "loss": 0.7484, + "nll_loss": 0.6585596799850464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028644781559705734, + "rewards/margins": 0.038016427308321, + "rewards/rejected": -0.06666121631860733, + "step": 1880 + }, + { + "epoch": 1.4972200158856235, + "grad_norm": 24.411352157592773, + "learning_rate": 1.1516335992621969e-06, + "log_odds_chosen": 2.1026387214660645, + "log_odds_ratio": -0.134886234998703, + "logits/chosen": 305.50970458984375, + "logits/rejected": 322.8634338378906, + "logps/chosen": -0.36966148018836975, + "logps/rejected": -1.4685351848602295, + "loss": 0.9336, + "nll_loss": 0.6422818303108215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018483074381947517, + "rewards/margins": 0.054943691939115524, + "rewards/rejected": -0.0734267607331276, + "step": 1885 + }, + { + "epoch": 1.5011914217633042, + "grad_norm": 151.1687469482422, + "learning_rate": 1.1501092655705905e-06, + "log_odds_chosen": 2.1099042892456055, + "log_odds_ratio": -0.15668827295303345, + "logits/chosen": 295.8759765625, + "logits/rejected": 361.9652404785156, + "logps/chosen": -0.5755786299705505, + "logps/rejected": -1.6685059070587158, + "loss": 0.9132, + "nll_loss": 1.1686800718307495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028778931125998497, + "rewards/margins": 0.054646365344524384, + "rewards/rejected": -0.08342529833316803, + "step": 1890 + }, + { + "epoch": 1.505162827640985, + "grad_norm": 38.116607666015625, + "learning_rate": 1.1485909688484915e-06, + "log_odds_chosen": 2.76908802986145, + "log_odds_ratio": -0.0817871242761612, + "logits/chosen": 366.576171875, + "logits/rejected": 233.8785400390625, + "logps/chosen": -0.19693537056446075, + "logps/rejected": -1.285447359085083, + "loss": 0.8993, + "nll_loss": 1.0742276906967163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009846767410635948, + "rewards/margins": 0.05442560464143753, + "rewards/rejected": -0.06427237391471863, + "step": 1895 + }, + { + "epoch": 1.5091342335186657, + "grad_norm": 39.85503387451172, + "learning_rate": 1.1470786693528087e-06, + "log_odds_chosen": 1.611579179763794, + "log_odds_ratio": -0.2680577337741852, + "logits/chosen": 293.2515563964844, + "logits/rejected": 279.0267028808594, + "logps/chosen": -0.6857994198799133, + "logps/rejected": -1.6845874786376953, + "loss": 0.8295, + "nll_loss": 1.0036165714263916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03428996726870537, + "rewards/margins": 0.04993940517306328, + "rewards/rejected": -0.08422937989234924, + "step": 1900 + }, + { + "epoch": 1.5131056393963465, + "grad_norm": 25.456764221191406, + "learning_rate": 1.1455723277057847e-06, + "log_odds_chosen": 1.930851936340332, + "log_odds_ratio": -0.19720369577407837, + "logits/chosen": 363.69366455078125, + "logits/rejected": 306.1184997558594, + "logps/chosen": -0.35775288939476013, + "logps/rejected": -1.2952030897140503, + "loss": 0.8436, + "nll_loss": 0.7519537210464478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017887646332383156, + "rewards/margins": 0.04687251150608063, + "rewards/rejected": -0.06476015597581863, + "step": 1905 + }, + { + "epoch": 1.517077045274027, + "grad_norm": 31.93710708618164, + "learning_rate": 1.144071904890689e-06, + "log_odds_chosen": 1.6818300485610962, + "log_odds_ratio": -0.3100757300853729, + "logits/chosen": 316.9599914550781, + "logits/rejected": 285.8528137207031, + "logps/chosen": -0.7037925124168396, + "logps/rejected": -1.733506441116333, + "loss": 0.7745, + "nll_loss": 1.0718820095062256, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03518962487578392, + "rewards/margins": 0.05148569867014885, + "rewards/rejected": -0.08667532354593277, + "step": 1910 + }, + { + "epoch": 1.5210484511517077, + "grad_norm": 48.80948257446289, + "learning_rate": 1.1425773622475754e-06, + "log_odds_chosen": 2.292222261428833, + "log_odds_ratio": -0.1542581468820572, + "logits/chosen": 348.7215270996094, + "logits/rejected": 291.024169921875, + "logps/chosen": -0.47094663977622986, + "logps/rejected": -1.8219391107559204, + "loss": 0.9162, + "nll_loss": 0.82440584897995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023547332733869553, + "rewards/margins": 0.06754962354898453, + "rewards/rejected": -0.09109695255756378, + "step": 1915 + }, + { + "epoch": 1.5250198570293882, + "grad_norm": 25.52734375, + "learning_rate": 1.1410886614690962e-06, + "log_odds_chosen": 2.0423831939697266, + "log_odds_ratio": -0.1279725432395935, + "logits/chosen": 249.74081420898438, + "logits/rejected": 294.3277282714844, + "logps/chosen": -0.5086468458175659, + "logps/rejected": -1.7694532871246338, + "loss": 0.8849, + "nll_loss": 1.0162447690963745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025432344526052475, + "rewards/margins": 0.06304031610488892, + "rewards/rejected": -0.08847267180681229, + "step": 1920 + }, + { + "epoch": 1.528991262907069, + "grad_norm": 30.2684326171875, + "learning_rate": 1.1396057645963796e-06, + "log_odds_chosen": 0.9635022878646851, + "log_odds_ratio": -0.499523788690567, + "logits/chosen": 255.3334197998047, + "logits/rejected": 351.5635681152344, + "logps/chosen": -0.5562784671783447, + "logps/rejected": -1.0457470417022705, + "loss": 0.8113, + "nll_loss": 0.9001520276069641, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.027813920751214027, + "rewards/margins": 0.024473432451486588, + "rewards/rejected": -0.052287351340055466, + "step": 1925 + }, + { + "epoch": 1.5329626687847497, + "grad_norm": 22.8693790435791, + "learning_rate": 1.1381286340149635e-06, + "log_odds_chosen": 2.0736007690429688, + "log_odds_ratio": -0.16570061445236206, + "logits/chosen": 315.60400390625, + "logits/rejected": 312.27056884765625, + "logps/chosen": -0.2544867992401123, + "logps/rejected": -0.957433819770813, + "loss": 0.7446, + "nll_loss": 0.6752304434776306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012724341824650764, + "rewards/margins": 0.035147350281476974, + "rewards/rejected": -0.04787169024348259, + "step": 1930 + }, + { + "epoch": 1.5369340746624305, + "grad_norm": 35.36579895019531, + "learning_rate": 1.1366572324507892e-06, + "log_odds_chosen": 0.5588671565055847, + "log_odds_ratio": -0.5842665433883667, + "logits/chosen": 239.9763946533203, + "logits/rejected": 326.97601318359375, + "logps/chosen": -0.6879284977912903, + "logps/rejected": -0.8991800546646118, + "loss": 0.8204, + "nll_loss": 0.7677897810935974, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.034396421164274216, + "rewards/margins": 0.010562578216195107, + "rewards/rejected": -0.04495900124311447, + "step": 1935 + }, + { + "epoch": 1.5409054805401112, + "grad_norm": 73.0653076171875, + "learning_rate": 1.1351915229662496e-06, + "log_odds_chosen": 1.0707106590270996, + "log_odds_ratio": -0.40579065680503845, + "logits/chosen": 310.290283203125, + "logits/rejected": 329.74615478515625, + "logps/chosen": -0.8593183755874634, + "logps/rejected": -1.5329375267028809, + "loss": 0.7189, + "nll_loss": 0.8812786340713501, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04296591505408287, + "rewards/margins": 0.033680956810712814, + "rewards/rejected": -0.07664687186479568, + "step": 1940 + }, + { + "epoch": 1.544876886417792, + "grad_norm": 29.127994537353516, + "learning_rate": 1.1337314689562956e-06, + "log_odds_chosen": 1.457087755203247, + "log_odds_ratio": -0.2620270848274231, + "logits/chosen": 386.1640625, + "logits/rejected": 308.2832336425781, + "logps/chosen": -0.4160517156124115, + "logps/rejected": -1.0144550800323486, + "loss": 0.8671, + "nll_loss": 0.9128124117851257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020802585408091545, + "rewards/margins": 0.029920164495706558, + "rewards/rejected": -0.05072275549173355, + "step": 1945 + }, + { + "epoch": 1.5488482922954727, + "grad_norm": 24.65477752685547, + "learning_rate": 1.1322770341445958e-06, + "log_odds_chosen": 1.257016897201538, + "log_odds_ratio": -0.2820950150489807, + "logits/chosen": 284.0875549316406, + "logits/rejected": 310.27606201171875, + "logps/chosen": -0.5743626952171326, + "logps/rejected": -1.278131365776062, + "loss": 0.6535, + "nll_loss": 0.6732789874076843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0287181343883276, + "rewards/margins": 0.03518843278288841, + "rewards/rejected": -0.06390657275915146, + "step": 1950 + }, + { + "epoch": 1.5528196981731535, + "grad_norm": 21.72737693786621, + "learning_rate": 1.1308281825797517e-06, + "log_odds_chosen": 0.7033153772354126, + "log_odds_ratio": -0.4314216077327728, + "logits/chosen": 290.4872131347656, + "logits/rejected": 324.4375, + "logps/chosen": -0.7639486193656921, + "logps/rejected": -1.205558180809021, + "loss": 0.8264, + "nll_loss": 0.9597232937812805, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03819743171334267, + "rewards/margins": 0.02208048105239868, + "rewards/rejected": -0.06027790904045105, + "step": 1955 + }, + { + "epoch": 1.556791104050834, + "grad_norm": 31.13661003112793, + "learning_rate": 1.1293848786315642e-06, + "log_odds_chosen": 0.610641360282898, + "log_odds_ratio": -0.6338435411453247, + "logits/chosen": 262.70538330078125, + "logits/rejected": 292.87548828125, + "logps/chosen": -0.5347281098365784, + "logps/rejected": -0.9647022485733032, + "loss": 0.7839, + "nll_loss": 0.6543559432029724, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02673640474677086, + "rewards/margins": 0.021498704329133034, + "rewards/rejected": -0.04823511093854904, + "step": 1960 + }, + { + "epoch": 1.5607625099285147, + "grad_norm": 32.95920944213867, + "learning_rate": 1.1279470869873539e-06, + "log_odds_chosen": 1.6339528560638428, + "log_odds_ratio": -0.23149630427360535, + "logits/chosen": 292.2558288574219, + "logits/rejected": 289.98492431640625, + "logps/chosen": -0.5067226886749268, + "logps/rejected": -1.425010085105896, + "loss": 0.7711, + "nll_loss": 0.8301030397415161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025336135178804398, + "rewards/margins": 0.04591437056660652, + "rewards/rejected": -0.07125050574541092, + "step": 1965 + }, + { + "epoch": 1.5647339158061953, + "grad_norm": 35.133758544921875, + "learning_rate": 1.1265147726483323e-06, + "log_odds_chosen": 1.5698158740997314, + "log_odds_ratio": -0.1986834853887558, + "logits/chosen": 312.9940490722656, + "logits/rejected": 339.71966552734375, + "logps/chosen": -0.6160762310028076, + "logps/rejected": -1.6119115352630615, + "loss": 0.7255, + "nll_loss": 0.8140316009521484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03080381080508232, + "rewards/margins": 0.04979177564382553, + "rewards/rejected": -0.08059557527303696, + "step": 1970 + }, + { + "epoch": 1.568705321683876, + "grad_norm": 35.48809051513672, + "learning_rate": 1.125087900926024e-06, + "log_odds_chosen": 1.6797376871109009, + "log_odds_ratio": -0.18438556790351868, + "logits/chosen": 400.1343994140625, + "logits/rejected": 254.5809783935547, + "logps/chosen": -0.6721310615539551, + "logps/rejected": -1.707035779953003, + "loss": 0.9347, + "nll_loss": 1.0324268341064453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033606551587581635, + "rewards/margins": 0.05174523591995239, + "rewards/rejected": -0.08535178750753403, + "step": 1975 + }, + { + "epoch": 1.5726767275615567, + "grad_norm": 37.31858825683594, + "learning_rate": 1.1236664374387369e-06, + "log_odds_chosen": 1.521589994430542, + "log_odds_ratio": -0.2915099561214447, + "logits/chosen": 466.3154296875, + "logits/rejected": 254.2445831298828, + "logps/chosen": -0.32636314630508423, + "logps/rejected": -0.9937745928764343, + "loss": 0.7887, + "nll_loss": 0.5553312301635742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01631815917789936, + "rewards/margins": 0.033370573073625565, + "rewards/rejected": -0.049688734114170074, + "step": 1980 + }, + { + "epoch": 1.5766481334392375, + "grad_norm": 37.352561950683594, + "learning_rate": 1.1222503481080839e-06, + "log_odds_chosen": 2.001415491104126, + "log_odds_ratio": -0.21713057160377502, + "logits/chosen": 290.2130432128906, + "logits/rejected": 313.20318603515625, + "logps/chosen": -0.37033191323280334, + "logps/rejected": -1.4434969425201416, + "loss": 0.765, + "nll_loss": 0.5978688597679138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018516594544053078, + "rewards/margins": 0.05365825444459915, + "rewards/rejected": -0.07217484712600708, + "step": 1985 + }, + { + "epoch": 1.5806195393169182, + "grad_norm": 28.262657165527344, + "learning_rate": 1.120839599155551e-06, + "log_odds_chosen": 0.7699643969535828, + "log_odds_ratio": -0.4922252297401428, + "logits/chosen": 305.6629943847656, + "logits/rejected": 278.87664794921875, + "logps/chosen": -0.561299204826355, + "logps/rejected": -0.9469798803329468, + "loss": 0.6831, + "nll_loss": 0.8940151333808899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.02806496061384678, + "rewards/margins": 0.01928403414785862, + "rewards/rejected": -0.0473489984869957, + "step": 1990 + }, + { + "epoch": 1.584590945194599, + "grad_norm": 41.953609466552734, + "learning_rate": 1.1194341570991125e-06, + "log_odds_chosen": 1.8478342294692993, + "log_odds_ratio": -0.2467450201511383, + "logits/chosen": 364.38714599609375, + "logits/rejected": 317.972900390625, + "logps/chosen": -0.42733925580978394, + "logps/rejected": -1.156592845916748, + "loss": 0.687, + "nll_loss": 0.6334320902824402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021366963163018227, + "rewards/margins": 0.036462679505348206, + "rewards/rejected": -0.05782964080572128, + "step": 1995 + }, + { + "epoch": 1.5885623510722797, + "grad_norm": 54.09469223022461, + "learning_rate": 1.118033988749895e-06, + "log_odds_chosen": 1.108894944190979, + "log_odds_ratio": -0.42433857917785645, + "logits/chosen": 246.85830688476562, + "logits/rejected": 331.88848876953125, + "logps/chosen": -0.37637174129486084, + "logps/rejected": -0.8830350041389465, + "loss": 0.812, + "nll_loss": 0.5468398928642273, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.018818587064743042, + "rewards/margins": 0.025333160534501076, + "rewards/rejected": -0.04415174573659897, + "step": 2000 + }, + { + "epoch": 1.5925337569499602, + "grad_norm": 35.29500961303711, + "learning_rate": 1.1166390612088834e-06, + "log_odds_chosen": 2.0460915565490723, + "log_odds_ratio": -0.20217151939868927, + "logits/chosen": 332.9038391113281, + "logits/rejected": 241.957275390625, + "logps/chosen": -0.7111643552780151, + "logps/rejected": -2.1858718395233154, + "loss": 0.8399, + "nll_loss": 0.986811637878418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03555821627378464, + "rewards/margins": 0.07373537123203278, + "rewards/rejected": -0.10929358005523682, + "step": 2005 + }, + { + "epoch": 1.596505162827641, + "grad_norm": 44.16957473754883, + "learning_rate": 1.1152493418636764e-06, + "log_odds_chosen": 1.7737945318222046, + "log_odds_ratio": -0.18286636471748352, + "logits/chosen": 299.9866638183594, + "logits/rejected": 319.33050537109375, + "logps/chosen": -0.4143436849117279, + "logps/rejected": -1.294891119003296, + "loss": 0.8105, + "nll_loss": 0.5705588459968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020717184990644455, + "rewards/margins": 0.04402737691998482, + "rewards/rejected": -0.06474455446004868, + "step": 2010 + }, + { + "epoch": 1.6004765687053217, + "grad_norm": 28.743602752685547, + "learning_rate": 1.1138647983852827e-06, + "log_odds_chosen": 1.285465955734253, + "log_odds_ratio": -0.301657110452652, + "logits/chosen": 299.4744567871094, + "logits/rejected": 253.02835083007812, + "logps/chosen": -0.5782285928726196, + "logps/rejected": -1.2731786966323853, + "loss": 0.692, + "nll_loss": 0.6972297430038452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02891143225133419, + "rewards/margins": 0.03474750369787216, + "rewards/rejected": -0.0636589378118515, + "step": 2015 + }, + { + "epoch": 1.6044479745830023, + "grad_norm": 32.377628326416016, + "learning_rate": 1.112485398724962e-06, + "log_odds_chosen": 1.4153087139129639, + "log_odds_ratio": -0.2784711718559265, + "logits/chosen": 408.69146728515625, + "logits/rejected": 323.42388916015625, + "logps/chosen": -0.5624197721481323, + "logps/rejected": -1.2454121112823486, + "loss": 0.727, + "nll_loss": 0.8249729871749878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028120988979935646, + "rewards/margins": 0.03414962440729141, + "rewards/rejected": -0.06227061152458191, + "step": 2020 + }, + { + "epoch": 1.608419380460683, + "grad_norm": 29.039993286132812, + "learning_rate": 1.111111111111111e-06, + "log_odds_chosen": 3.085414409637451, + "log_odds_ratio": -0.11739520728588104, + "logits/chosen": 299.89031982421875, + "logits/rejected": 295.5804443359375, + "logps/chosen": -0.22455701231956482, + "logps/rejected": -1.8523008823394775, + "loss": 0.6705, + "nll_loss": 0.49535632133483887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01122785173356533, + "rewards/margins": 0.08138719201087952, + "rewards/rejected": -0.0926150381565094, + "step": 2025 + }, + { + "epoch": 1.6123907863383637, + "grad_norm": 30.045818328857422, + "learning_rate": 1.1097419040461884e-06, + "log_odds_chosen": 1.698897361755371, + "log_odds_ratio": -0.21327969431877136, + "logits/chosen": 270.95648193359375, + "logits/rejected": 383.06256103515625, + "logps/chosen": -0.4442528784275055, + "logps/rejected": -1.416225790977478, + "loss": 0.7029, + "nll_loss": 0.6176842451095581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022212643176317215, + "rewards/margins": 0.048598650842905045, + "rewards/rejected": -0.07081129401922226, + "step": 2030 + }, + { + "epoch": 1.6163621922160445, + "grad_norm": 54.25676727294922, + "learning_rate": 1.1083777463036816e-06, + "log_odds_chosen": 1.8387584686279297, + "log_odds_ratio": -0.4786914885044098, + "logits/chosen": 347.6900939941406, + "logits/rejected": 285.0926818847656, + "logps/chosen": -0.6492979526519775, + "logps/rejected": -1.5062249898910522, + "loss": 0.881, + "nll_loss": 0.99772709608078, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.032464899122714996, + "rewards/margins": 0.04284634813666344, + "rewards/rejected": -0.07531125098466873, + "step": 2035 + }, + { + "epoch": 1.6203335980937252, + "grad_norm": 30.767316818237305, + "learning_rate": 1.1070186069251193e-06, + "log_odds_chosen": 0.8958157300949097, + "log_odds_ratio": -0.3673623204231262, + "logits/chosen": 400.4134826660156, + "logits/rejected": 247.96182250976562, + "logps/chosen": -0.633257269859314, + "logps/rejected": -1.1521893739700317, + "loss": 0.7333, + "nll_loss": 0.7941089868545532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03166285902261734, + "rewards/margins": 0.025946607813239098, + "rewards/rejected": -0.05760946869850159, + "step": 2040 + }, + { + "epoch": 1.624305003971406, + "grad_norm": 26.96861457824707, + "learning_rate": 1.1056644552171163e-06, + "log_odds_chosen": 2.199897050857544, + "log_odds_ratio": -0.17961575090885162, + "logits/chosen": 235.00869750976562, + "logits/rejected": 412.4244079589844, + "logps/chosen": -0.5231403112411499, + "logps/rejected": -1.801314353942871, + "loss": 0.8437, + "nll_loss": 0.8201786279678345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026157017797231674, + "rewards/margins": 0.06390871107578278, + "rewards/rejected": -0.09006571769714355, + "step": 2045 + }, + { + "epoch": 1.6282764098490867, + "grad_norm": 50.35895538330078, + "learning_rate": 1.1043152607484655e-06, + "log_odds_chosen": 1.1554151773452759, + "log_odds_ratio": -0.28396543860435486, + "logits/chosen": 277.6943054199219, + "logits/rejected": 342.0705871582031, + "logps/chosen": -0.4897230565547943, + "logps/rejected": -1.1020798683166504, + "loss": 0.8528, + "nll_loss": 0.6063051223754883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024486154317855835, + "rewards/margins": 0.030617838725447655, + "rewards/rejected": -0.05510399490594864, + "step": 2050 + }, + { + "epoch": 1.6322478157267672, + "grad_norm": 54.209327697753906, + "learning_rate": 1.1029709933472638e-06, + "log_odds_chosen": 1.4155397415161133, + "log_odds_ratio": -0.2870427668094635, + "logits/chosen": 248.59048461914062, + "logits/rejected": 335.0494079589844, + "logps/chosen": -0.49244603514671326, + "logps/rejected": -1.2132689952850342, + "loss": 0.9627, + "nll_loss": 0.8640304803848267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024622302502393723, + "rewards/margins": 0.036041148006916046, + "rewards/rejected": -0.06066344305872917, + "step": 2055 + }, + { + "epoch": 1.636219221604448, + "grad_norm": 24.583423614501953, + "learning_rate": 1.1016316230980794e-06, + "log_odds_chosen": 2.2713701725006104, + "log_odds_ratio": -0.13930584490299225, + "logits/chosen": 343.72979736328125, + "logits/rejected": 319.630859375, + "logps/chosen": -0.5581644773483276, + "logps/rejected": -1.981873869895935, + "loss": 0.8047, + "nll_loss": 0.8394874334335327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02790822647511959, + "rewards/margins": 0.07118546962738037, + "rewards/rejected": -0.09909369796514511, + "step": 2060 + }, + { + "epoch": 1.6401906274821285, + "grad_norm": 31.54615020751953, + "learning_rate": 1.100297120339154e-06, + "log_odds_chosen": 2.6403470039367676, + "log_odds_ratio": -0.10118647664785385, + "logits/chosen": 383.3280029296875, + "logits/rejected": 294.6977233886719, + "logps/chosen": -0.36460989713668823, + "logps/rejected": -1.8322269916534424, + "loss": 0.6359, + "nll_loss": 0.6053825616836548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018230494111776352, + "rewards/margins": 0.07338085025548935, + "rewards/rejected": -0.0916113406419754, + "step": 2065 + }, + { + "epoch": 1.6441620333598093, + "grad_norm": 30.021217346191406, + "learning_rate": 1.098967455659645e-06, + "log_odds_chosen": 1.0652350187301636, + "log_odds_ratio": -0.3493257164955139, + "logits/chosen": 336.79705810546875, + "logits/rejected": 291.21636962890625, + "logps/chosen": -0.6652384996414185, + "logps/rejected": -1.2979891300201416, + "loss": 0.6926, + "nll_loss": 0.9665688276290894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03326192498207092, + "rewards/margins": 0.0316375307738781, + "rewards/rejected": -0.06489945948123932, + "step": 2070 + }, + { + "epoch": 1.64813343923749, + "grad_norm": 30.264102935791016, + "learning_rate": 1.0976425998969036e-06, + "log_odds_chosen": 1.5590989589691162, + "log_odds_ratio": -0.24328143894672394, + "logits/chosen": 286.5331115722656, + "logits/rejected": 367.84771728515625, + "logps/chosen": -0.7489336133003235, + "logps/rejected": -1.8240808248519897, + "loss": 0.8402, + "nll_loss": 0.8905247449874878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037446681410074234, + "rewards/margins": 0.053757358342409134, + "rewards/rejected": -0.09120403230190277, + "step": 2075 + }, + { + "epoch": 1.6521048451151708, + "grad_norm": 29.85713768005371, + "learning_rate": 1.0963225241337867e-06, + "log_odds_chosen": 1.7376596927642822, + "log_odds_ratio": -0.20286710560321808, + "logits/chosen": 252.2242889404297, + "logits/rejected": 387.5245361328125, + "logps/chosen": -0.38523685932159424, + "logps/rejected": -1.3179008960723877, + "loss": 0.7234, + "nll_loss": 0.5829646587371826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019261842593550682, + "rewards/margins": 0.04663320630788803, + "rewards/rejected": -0.06589505076408386, + "step": 2080 + }, + { + "epoch": 1.6560762509928515, + "grad_norm": 38.490631103515625, + "learning_rate": 1.0950071996960073e-06, + "log_odds_chosen": 1.6538069248199463, + "log_odds_ratio": -0.22928845882415771, + "logits/chosen": 324.57879638671875, + "logits/rejected": 291.5035705566406, + "logps/chosen": -0.6583439111709595, + "logps/rejected": -1.5701768398284912, + "loss": 0.9184, + "nll_loss": 0.846837043762207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03291719779372215, + "rewards/margins": 0.045591648668050766, + "rewards/rejected": -0.07850884646177292, + "step": 2085 + }, + { + "epoch": 1.6600476568705322, + "grad_norm": 47.06442642211914, + "learning_rate": 1.093696598149518e-06, + "log_odds_chosen": 1.538604497909546, + "log_odds_ratio": -0.26061558723449707, + "logits/chosen": 319.3267822265625, + "logits/rejected": 376.86767578125, + "logps/chosen": -0.3853234052658081, + "logps/rejected": -1.1279296875, + "loss": 0.7315, + "nll_loss": 0.5786786079406738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019266171380877495, + "rewards/margins": 0.037130314856767654, + "rewards/rejected": -0.056396484375, + "step": 2090 + }, + { + "epoch": 1.664019062748213, + "grad_norm": 32.46980285644531, + "learning_rate": 1.0923906912979294e-06, + "log_odds_chosen": 2.563507556915283, + "log_odds_ratio": -0.08386361598968506, + "logits/chosen": 373.00958251953125, + "logits/rejected": 278.15411376953125, + "logps/chosen": -0.4534314274787903, + "logps/rejected": -1.8156788349151611, + "loss": 0.7171, + "nll_loss": 0.7025438547134399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022671569138765335, + "rewards/margins": 0.06811238825321198, + "rewards/rejected": -0.09078395366668701, + "step": 2095 + }, + { + "epoch": 1.6679904686258937, + "grad_norm": 33.24217987060547, + "learning_rate": 1.091089451179962e-06, + "log_odds_chosen": 0.8273458480834961, + "log_odds_ratio": -0.4423191547393799, + "logits/chosen": 379.5636291503906, + "logits/rejected": 243.4392852783203, + "logps/chosen": -0.6664489507675171, + "logps/rejected": -1.1298898458480835, + "loss": 0.9125, + "nll_loss": 1.0257153511047363, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.033322446048259735, + "rewards/margins": 0.0231720469892025, + "rewards/rejected": -0.056494493037462234, + "step": 2100 + }, + { + "epoch": 1.6719618745035743, + "grad_norm": 36.56023406982422, + "learning_rate": 1.0897928500669322e-06, + "log_odds_chosen": 0.48522695899009705, + "log_odds_ratio": -0.5532909035682678, + "logits/chosen": 337.21429443359375, + "logits/rejected": 289.87176513671875, + "logps/chosen": -0.8676580190658569, + "logps/rejected": -1.076594591140747, + "loss": 0.9392, + "nll_loss": 0.9415254592895508, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.043382905423641205, + "rewards/margins": 0.010446821339428425, + "rewards/rejected": -0.05382972210645676, + "step": 2105 + }, + { + "epoch": 1.675933280381255, + "grad_norm": 32.79849624633789, + "learning_rate": 1.0885008604602703e-06, + "log_odds_chosen": 1.568729043006897, + "log_odds_ratio": -0.25548508763313293, + "logits/chosen": 308.7678527832031, + "logits/rejected": 345.7282409667969, + "logps/chosen": -0.44344788789749146, + "logps/rejected": -1.2481848001480103, + "loss": 0.6848, + "nll_loss": 0.6747992634773254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022172395139932632, + "rewards/margins": 0.04023684561252594, + "rewards/rejected": -0.06240924075245857, + "step": 2110 + }, + { + "epoch": 1.6799046862589355, + "grad_norm": 30.764209747314453, + "learning_rate": 1.0872134550890703e-06, + "log_odds_chosen": 1.1366727352142334, + "log_odds_ratio": -0.4399314820766449, + "logits/chosen": 249.7522735595703, + "logits/rejected": 347.0238342285156, + "logps/chosen": -0.6293274164199829, + "logps/rejected": -1.1428825855255127, + "loss": 0.794, + "nll_loss": 0.8456639051437378, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.031466372311115265, + "rewards/margins": 0.025677751749753952, + "rewards/rejected": -0.05714412406086922, + "step": 2115 + }, + { + "epoch": 1.6838760921366163, + "grad_norm": 34.06650161743164, + "learning_rate": 1.0859306069076736e-06, + "log_odds_chosen": 0.9071727991104126, + "log_odds_ratio": -0.5517928004264832, + "logits/chosen": 246.1561737060547, + "logits/rejected": 483.20465087890625, + "logps/chosen": -0.48550352454185486, + "logps/rejected": -1.149099588394165, + "loss": 0.8235, + "nll_loss": 0.6903790235519409, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.024275178089737892, + "rewards/margins": 0.03317980095744133, + "rewards/rejected": -0.05745498090982437, + "step": 2120 + }, + { + "epoch": 1.687847498014297, + "grad_norm": 29.03752899169922, + "learning_rate": 1.084652289093281e-06, + "log_odds_chosen": 1.5771881341934204, + "log_odds_ratio": -0.3552386164665222, + "logits/chosen": 329.28778076171875, + "logits/rejected": 293.9125671386719, + "logps/chosen": -0.4627589285373688, + "logps/rejected": -0.8882190585136414, + "loss": 0.7296, + "nll_loss": 0.7793964147567749, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.023137949407100677, + "rewards/margins": 0.02127300761640072, + "rewards/rejected": -0.04441095516085625, + "step": 2125 + }, + { + "epoch": 1.6918189038919778, + "grad_norm": 27.400096893310547, + "learning_rate": 1.083378475043599e-06, + "log_odds_chosen": 2.1568071842193604, + "log_odds_ratio": -0.18791693449020386, + "logits/chosen": 376.5939636230469, + "logits/rejected": 325.4264221191406, + "logps/chosen": -0.34812647104263306, + "logps/rejected": -1.3398598432540894, + "loss": 0.7049, + "nll_loss": 0.5113323926925659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017406323924660683, + "rewards/margins": 0.049586668610572815, + "rewards/rejected": -0.06699299067258835, + "step": 2130 + }, + { + "epoch": 1.6957903097696585, + "grad_norm": 35.258235931396484, + "learning_rate": 1.0821091383745125e-06, + "log_odds_chosen": 1.3307862281799316, + "log_odds_ratio": -0.27201247215270996, + "logits/chosen": 326.7689514160156, + "logits/rejected": 252.6797332763672, + "logps/chosen": -0.5550287365913391, + "logps/rejected": -1.2143471240997314, + "loss": 0.7516, + "nll_loss": 0.9621167182922363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027751434594392776, + "rewards/margins": 0.032965924590826035, + "rewards/rejected": -0.06071736291050911, + "step": 2135 + }, + { + "epoch": 1.6997617156473392, + "grad_norm": 35.40523147583008, + "learning_rate": 1.0808442529177925e-06, + "log_odds_chosen": 1.7957347631454468, + "log_odds_ratio": -0.27554595470428467, + "logits/chosen": 326.6446838378906, + "logits/rejected": 290.5052185058594, + "logps/chosen": -0.6328108906745911, + "logps/rejected": -1.6637407541275024, + "loss": 0.9048, + "nll_loss": 1.0120429992675781, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03164054453372955, + "rewards/margins": 0.05154649168252945, + "rewards/rejected": -0.083187036216259, + "step": 2140 + }, + { + "epoch": 1.70373312152502, + "grad_norm": 33.236244201660156, + "learning_rate": 1.0795837927188263e-06, + "log_odds_chosen": 2.5924830436706543, + "log_odds_ratio": -0.09222547709941864, + "logits/chosen": 234.89968872070312, + "logits/rejected": 404.0227966308594, + "logps/chosen": -0.4588467478752136, + "logps/rejected": -2.025322198867798, + "loss": 0.7654, + "nll_loss": 0.5697577595710754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02294233813881874, + "rewards/margins": 0.07832376658916473, + "rewards/rejected": -0.10126610100269318, + "step": 2145 + }, + { + "epoch": 1.7077045274027005, + "grad_norm": 25.529382705688477, + "learning_rate": 1.0783277320343842e-06, + "log_odds_chosen": 1.2242435216903687, + "log_odds_ratio": -0.3057636618614197, + "logits/chosen": 357.5350646972656, + "logits/rejected": 285.787841796875, + "logps/chosen": -0.6072322726249695, + "logps/rejected": -1.252858281135559, + "loss": 0.6573, + "nll_loss": 0.9143392443656921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030361616984009743, + "rewards/margins": 0.0322813019156456, + "rewards/rejected": -0.06264291703701019, + "step": 2150 + }, + { + "epoch": 1.7116759332803813, + "grad_norm": 49.64712142944336, + "learning_rate": 1.0770760453304094e-06, + "log_odds_chosen": 2.2228212356567383, + "log_odds_ratio": -0.12164826691150665, + "logits/chosen": 322.6756591796875, + "logits/rejected": 239.7041015625, + "logps/chosen": -0.2784230709075928, + "logps/rejected": -1.3234379291534424, + "loss": 0.8904, + "nll_loss": 0.7466278672218323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013921153731644154, + "rewards/margins": 0.052250735461711884, + "rewards/rejected": -0.06617189198732376, + "step": 2155 + }, + { + "epoch": 1.715647339158062, + "grad_norm": 25.497785568237305, + "learning_rate": 1.075828707279838e-06, + "log_odds_chosen": 1.9126389026641846, + "log_odds_ratio": -0.1578390896320343, + "logits/chosen": 375.0826110839844, + "logits/rejected": 281.03997802734375, + "logps/chosen": -0.4245838522911072, + "logps/rejected": -1.5614581108093262, + "loss": 0.6819, + "nll_loss": 0.7630025744438171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02122919075191021, + "rewards/margins": 0.05684372037649155, + "rewards/rejected": -0.0780729129910469, + "step": 2160 + }, + { + "epoch": 1.7196187450357425, + "grad_norm": 30.034915924072266, + "learning_rate": 1.0745856927604474e-06, + "log_odds_chosen": 1.5273187160491943, + "log_odds_ratio": -0.26404619216918945, + "logits/chosen": 334.1214599609375, + "logits/rejected": 238.54653930664062, + "logps/chosen": -0.6224659085273743, + "logps/rejected": -1.5819625854492188, + "loss": 0.8031, + "nll_loss": 0.8806807398796082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031123295426368713, + "rewards/margins": 0.047974843531847, + "rewards/rejected": -0.07909813523292542, + "step": 2165 + }, + { + "epoch": 1.7235901509134233, + "grad_norm": 34.6898307800293, + "learning_rate": 1.0733469768527298e-06, + "log_odds_chosen": 1.7383909225463867, + "log_odds_ratio": -0.20549456775188446, + "logits/chosen": 341.08563232421875, + "logits/rejected": 324.42132568359375, + "logps/chosen": -0.5074697732925415, + "logps/rejected": -1.5371900796890259, + "loss": 0.7717, + "nll_loss": 0.7111256718635559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025373492389917374, + "rewards/margins": 0.05148600786924362, + "rewards/rejected": -0.0768595039844513, + "step": 2170 + }, + { + "epoch": 1.727561556791104, + "grad_norm": 32.80601119995117, + "learning_rate": 1.0721125348377948e-06, + "log_odds_chosen": 1.6041771173477173, + "log_odds_ratio": -0.3034898042678833, + "logits/chosen": 309.17962646484375, + "logits/rejected": 346.447509765625, + "logps/chosen": -0.5312290191650391, + "logps/rejected": -1.4099701642990112, + "loss": 0.6985, + "nll_loss": 0.8906863331794739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026561450213193893, + "rewards/margins": 0.04393706098198891, + "rewards/rejected": -0.0704985111951828, + "step": 2175 + }, + { + "epoch": 1.7315329626687848, + "grad_norm": 37.60955810546875, + "learning_rate": 1.0708823421952984e-06, + "log_odds_chosen": 1.7141081094741821, + "log_odds_ratio": -0.24008333683013916, + "logits/chosen": 338.4755554199219, + "logits/rejected": 307.659423828125, + "logps/chosen": -0.4687555432319641, + "logps/rejected": -1.3905603885650635, + "loss": 0.7047, + "nll_loss": 0.6992734670639038, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.023437779396772385, + "rewards/margins": 0.04609023407101631, + "rewards/rejected": -0.0695280134677887, + "step": 2180 + }, + { + "epoch": 1.7355043685464655, + "grad_norm": 24.991849899291992, + "learning_rate": 1.0696563746013951e-06, + "log_odds_chosen": 1.723670244216919, + "log_odds_ratio": -0.2573872208595276, + "logits/chosen": 305.9327087402344, + "logits/rejected": 333.73638916015625, + "logps/chosen": -0.5920778512954712, + "logps/rejected": -1.579659342765808, + "loss": 0.9738, + "nll_loss": 0.8010879755020142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02960388921201229, + "rewards/margins": 0.04937908053398132, + "rewards/rejected": -0.07898297160863876, + "step": 2185 + }, + { + "epoch": 1.7394757744241462, + "grad_norm": 27.51617431640625, + "learning_rate": 1.0684346079267208e-06, + "log_odds_chosen": 2.167844772338867, + "log_odds_ratio": -0.18182644248008728, + "logits/chosen": 300.15960693359375, + "logits/rejected": 263.7042541503906, + "logps/chosen": -0.34622564911842346, + "logps/rejected": -1.5231422185897827, + "loss": 0.704, + "nll_loss": 0.5762925744056702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017311284318566322, + "rewards/margins": 0.05884582921862602, + "rewards/rejected": -0.0761571079492569, + "step": 2190 + }, + { + "epoch": 1.743447180301827, + "grad_norm": 36.284423828125, + "learning_rate": 1.0672170182343944e-06, + "log_odds_chosen": 1.5064342021942139, + "log_odds_ratio": -0.30479374527931213, + "logits/chosen": 265.1541442871094, + "logits/rejected": 310.3070068359375, + "logps/chosen": -0.7095457315444946, + "logps/rejected": -1.6736814975738525, + "loss": 0.9196, + "nll_loss": 0.837944507598877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03547728806734085, + "rewards/margins": 0.04820678383111954, + "rewards/rejected": -0.08368407189846039, + "step": 2195 + }, + { + "epoch": 1.7474185861795075, + "grad_norm": 57.919429779052734, + "learning_rate": 1.066003581778052e-06, + "log_odds_chosen": 2.6329212188720703, + "log_odds_ratio": -0.11882112920284271, + "logits/chosen": 256.0660400390625, + "logits/rejected": 321.71478271484375, + "logps/chosen": -0.4541356563568115, + "logps/rejected": -2.08860182762146, + "loss": 0.7238, + "nll_loss": 0.7454873323440552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022706782445311546, + "rewards/margins": 0.08172331750392914, + "rewards/rejected": -0.10443009436130524, + "step": 2200 + }, + { + "epoch": 1.7513899920571883, + "grad_norm": 50.52003860473633, + "learning_rate": 1.0647942749998999e-06, + "log_odds_chosen": 1.3120359182357788, + "log_odds_ratio": -0.31396904587745667, + "logits/chosen": 328.6595153808594, + "logits/rejected": 402.1265563964844, + "logps/chosen": -0.6453569531440735, + "logps/rejected": -1.3103744983673096, + "loss": 1.0108, + "nll_loss": 0.8728786706924438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032267846167087555, + "rewards/margins": 0.03325087949633598, + "rewards/rejected": -0.06551872193813324, + "step": 2205 + }, + { + "epoch": 1.7553613979348688, + "grad_norm": 41.56594467163086, + "learning_rate": 1.0635890745287928e-06, + "log_odds_chosen": 2.6617274284362793, + "log_odds_ratio": -0.19012318551540375, + "logits/chosen": 258.3802795410156, + "logits/rejected": 250.87179565429688, + "logps/chosen": -0.4259757101535797, + "logps/rejected": -1.9833433628082275, + "loss": 0.846, + "nll_loss": 0.7863501310348511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021298784762620926, + "rewards/margins": 0.07786837965250015, + "rewards/rejected": -0.09916716814041138, + "step": 2210 + }, + { + "epoch": 1.7593328038125495, + "grad_norm": 34.22325897216797, + "learning_rate": 1.0623879571783382e-06, + "log_odds_chosen": 0.8847603797912598, + "log_odds_ratio": -0.4665696620941162, + "logits/chosen": 305.217041015625, + "logits/rejected": 351.25518798828125, + "logps/chosen": -0.605812668800354, + "logps/rejected": -1.0832501649856567, + "loss": 0.8867, + "nll_loss": 0.7453805804252625, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0302906334400177, + "rewards/margins": 0.023871876299381256, + "rewards/rejected": -0.054162509739398956, + "step": 2215 + }, + { + "epoch": 1.7633042096902303, + "grad_norm": 25.505517959594727, + "learning_rate": 1.0611908999450224e-06, + "log_odds_chosen": 1.605830430984497, + "log_odds_ratio": -0.20149704813957214, + "logits/chosen": 354.46044921875, + "logits/rejected": 311.38763427734375, + "logps/chosen": -0.37914207577705383, + "logps/rejected": -1.1268947124481201, + "loss": 0.7463, + "nll_loss": 0.6839945912361145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01895710453391075, + "rewards/margins": 0.037387631833553314, + "rewards/rejected": -0.056344740092754364, + "step": 2220 + }, + { + "epoch": 1.767275615567911, + "grad_norm": 27.840789794921875, + "learning_rate": 1.0599978800063601e-06, + "log_odds_chosen": 1.546400785446167, + "log_odds_ratio": -0.2523784935474396, + "logits/chosen": 320.6352844238281, + "logits/rejected": 245.90072631835938, + "logps/chosen": -0.5363035798072815, + "logps/rejected": -1.4044965505599976, + "loss": 0.7344, + "nll_loss": 0.672437846660614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026815179735422134, + "rewards/margins": 0.04340965673327446, + "rewards/rejected": -0.0702248364686966, + "step": 2225 + }, + { + "epoch": 1.7712470214455918, + "grad_norm": 32.31303405761719, + "learning_rate": 1.058808874719067e-06, + "log_odds_chosen": 1.2581157684326172, + "log_odds_ratio": -0.3380572497844696, + "logits/chosen": 282.74761962890625, + "logits/rejected": 327.1788024902344, + "logps/chosen": -0.7751240134239197, + "logps/rejected": -1.5154592990875244, + "loss": 0.7602, + "nll_loss": 0.8768072128295898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03875620290637016, + "rewards/margins": 0.03701676428318024, + "rewards/rejected": -0.0757729634642601, + "step": 2230 + }, + { + "epoch": 1.7752184273232725, + "grad_norm": 27.653583526611328, + "learning_rate": 1.057623861617254e-06, + "log_odds_chosen": 2.2240524291992188, + "log_odds_ratio": -0.14906403422355652, + "logits/chosen": 259.7555236816406, + "logits/rejected": 386.7548522949219, + "logps/chosen": -0.37181687355041504, + "logps/rejected": -1.6917979717254639, + "loss": 0.7392, + "nll_loss": 0.6186314821243286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018590843304991722, + "rewards/margins": 0.06599906086921692, + "rewards/rejected": -0.08458990603685379, + "step": 2235 + }, + { + "epoch": 1.7791898332009533, + "grad_norm": 34.99640655517578, + "learning_rate": 1.0564428184106459e-06, + "log_odds_chosen": 1.4233901500701904, + "log_odds_ratio": -0.27014046907424927, + "logits/chosen": 337.10662841796875, + "logits/rejected": 255.8411102294922, + "logps/chosen": -0.4958310127258301, + "logps/rejected": -1.2386577129364014, + "loss": 0.8677, + "nll_loss": 0.9047778844833374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024791549891233444, + "rewards/margins": 0.037141330540180206, + "rewards/rejected": -0.06193288043141365, + "step": 2240 + }, + { + "epoch": 1.783161239078634, + "grad_norm": 32.94675064086914, + "learning_rate": 1.0552657229828183e-06, + "log_odds_chosen": 0.731157124042511, + "log_odds_ratio": -0.4831514358520508, + "logits/chosen": 321.543701171875, + "logits/rejected": 240.626220703125, + "logps/chosen": -0.6978667974472046, + "logps/rejected": -1.0622944831848145, + "loss": 0.8724, + "nll_loss": 0.9993354082107544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03489334136247635, + "rewards/margins": 0.018221387639641762, + "rewards/rejected": -0.05311473086476326, + "step": 2245 + }, + { + "epoch": 1.7871326449563145, + "grad_norm": 30.07236671447754, + "learning_rate": 1.0540925533894598e-06, + "log_odds_chosen": 0.8772686719894409, + "log_odds_ratio": -0.4837498664855957, + "logits/chosen": 438.844482421875, + "logits/rejected": 251.0099639892578, + "logps/chosen": -0.6994816064834595, + "logps/rejected": -1.0784616470336914, + "loss": 0.7779, + "nll_loss": 0.8594354391098022, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.034974079579114914, + "rewards/margins": 0.018949000164866447, + "rewards/rejected": -0.05392308160662651, + "step": 2250 + }, + { + "epoch": 1.7911040508339953, + "grad_norm": 29.69354820251465, + "learning_rate": 1.0529232878566533e-06, + "log_odds_chosen": 1.2839289903640747, + "log_odds_ratio": -0.2796970009803772, + "logits/chosen": 289.58209228515625, + "logits/rejected": 359.0069885253906, + "logps/chosen": -0.46519821882247925, + "logps/rejected": -1.1590473651885986, + "loss": 0.6845, + "nll_loss": 0.6256710886955261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023259911686182022, + "rewards/margins": 0.03469245508313179, + "rewards/rejected": -0.05795236676931381, + "step": 2255 + }, + { + "epoch": 1.7950754567116758, + "grad_norm": 27.493417739868164, + "learning_rate": 1.0517579047791782e-06, + "log_odds_chosen": 0.9947487115859985, + "log_odds_ratio": -0.4154279828071594, + "logits/chosen": 321.8368225097656, + "logits/rejected": 355.9534606933594, + "logps/chosen": -0.47752445936203003, + "logps/rejected": -1.0383057594299316, + "loss": 0.6544, + "nll_loss": 0.5580215454101562, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02387622371315956, + "rewards/margins": 0.02803906239569187, + "rewards/rejected": -0.05191528797149658, + "step": 2260 + }, + { + "epoch": 1.7990468625893565, + "grad_norm": 36.50867462158203, + "learning_rate": 1.050596382718834e-06, + "log_odds_chosen": 1.929277777671814, + "log_odds_ratio": -0.24522367119789124, + "logits/chosen": 248.70803833007812, + "logits/rejected": 370.62701416015625, + "logps/chosen": -0.6788391470909119, + "logps/rejected": -1.9356067180633545, + "loss": 0.9336, + "nll_loss": 0.8503932952880859, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03394196182489395, + "rewards/margins": 0.06283838301897049, + "rewards/rejected": -0.09678034484386444, + "step": 2265 + }, + { + "epoch": 1.8030182684670373, + "grad_norm": 30.090612411499023, + "learning_rate": 1.049438700402784e-06, + "log_odds_chosen": 2.4246153831481934, + "log_odds_ratio": -0.1122988611459732, + "logits/chosen": 317.8182678222656, + "logits/rejected": 389.30523681640625, + "logps/chosen": -0.40875324606895447, + "logps/rejected": -1.923872709274292, + "loss": 0.6992, + "nll_loss": 0.4913650155067444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020437661558389664, + "rewards/margins": 0.07575596868991852, + "rewards/rejected": -0.09619362652301788, + "step": 2270 + }, + { + "epoch": 1.806989674344718, + "grad_norm": 25.525978088378906, + "learning_rate": 1.0482848367219184e-06, + "log_odds_chosen": 1.7310831546783447, + "log_odds_ratio": -0.34687691926956177, + "logits/chosen": 293.1515197753906, + "logits/rejected": 304.5860900878906, + "logps/chosen": -0.6816526055335999, + "logps/rejected": -1.7223570346832275, + "loss": 0.9117, + "nll_loss": 0.8708241581916809, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03408262878656387, + "rewards/margins": 0.05203522369265556, + "rewards/rejected": -0.08611784875392914, + "step": 2275 + }, + { + "epoch": 1.8109610802223988, + "grad_norm": 29.124156951904297, + "learning_rate": 1.0471347707292389e-06, + "log_odds_chosen": 2.642489194869995, + "log_odds_ratio": -0.14402975142002106, + "logits/chosen": 286.64959716796875, + "logits/rejected": 330.8363037109375, + "logps/chosen": -0.36216282844543457, + "logps/rejected": -1.9222888946533203, + "loss": 0.7766, + "nll_loss": 0.5919591188430786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01810814067721367, + "rewards/margins": 0.078006312251091, + "rewards/rejected": -0.09611444920301437, + "step": 2280 + }, + { + "epoch": 1.8149324861000795, + "grad_norm": 24.49781036376953, + "learning_rate": 1.04598848163826e-06, + "log_odds_chosen": 1.9413111209869385, + "log_odds_ratio": -0.19651436805725098, + "logits/chosen": 294.1470031738281, + "logits/rejected": 285.11810302734375, + "logps/chosen": -0.6075969338417053, + "logps/rejected": -1.9450137615203857, + "loss": 0.8606, + "nll_loss": 0.7974241971969604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030379846692085266, + "rewards/margins": 0.06687085330486298, + "rewards/rejected": -0.09725069999694824, + "step": 2285 + }, + { + "epoch": 1.8189038919777603, + "grad_norm": 24.984628677368164, + "learning_rate": 1.0448459488214322e-06, + "log_odds_chosen": 1.4830385446548462, + "log_odds_ratio": -0.24547508358955383, + "logits/chosen": 284.15216064453125, + "logits/rejected": 289.8674621582031, + "logps/chosen": -0.576518177986145, + "logps/rejected": -1.3695199489593506, + "loss": 0.712, + "nll_loss": 0.845025360584259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02882590889930725, + "rewards/margins": 0.0396500900387764, + "rewards/rejected": -0.06847599893808365, + "step": 2290 + }, + { + "epoch": 1.8228752978554408, + "grad_norm": 35.125213623046875, + "learning_rate": 1.0437071518085826e-06, + "log_odds_chosen": 1.9451690912246704, + "log_odds_ratio": -0.2187591791152954, + "logits/chosen": 344.46112060546875, + "logits/rejected": 290.50848388671875, + "logps/chosen": -0.4077928066253662, + "logps/rejected": -1.4108073711395264, + "loss": 0.76, + "nll_loss": 0.8611392974853516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02038964070379734, + "rewards/margins": 0.05015072971582413, + "rewards/rejected": -0.07054036855697632, + "step": 2295 + }, + { + "epoch": 1.8268467037331215, + "grad_norm": 29.224489212036133, + "learning_rate": 1.042572070285374e-06, + "log_odds_chosen": 1.6341838836669922, + "log_odds_ratio": -0.47226276993751526, + "logits/chosen": 240.94723510742188, + "logits/rejected": 439.5355529785156, + "logps/chosen": -0.2641201317310333, + "logps/rejected": -1.169684886932373, + "loss": 0.7786, + "nll_loss": 0.39919307827949524, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.013206006959080696, + "rewards/margins": 0.04527823254466057, + "rewards/rejected": -0.05848424509167671, + "step": 2300 + }, + { + "epoch": 1.8308181096108023, + "grad_norm": 64.61443328857422, + "learning_rate": 1.0414406840917835e-06, + "log_odds_chosen": 1.7078742980957031, + "log_odds_ratio": -0.29076051712036133, + "logits/chosen": 301.88037109375, + "logits/rejected": 251.6233673095703, + "logps/chosen": -0.525429368019104, + "logps/rejected": -1.6450624465942383, + "loss": 0.8899, + "nll_loss": 0.8937755823135376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02627146616578102, + "rewards/margins": 0.05598165839910507, + "rewards/rejected": -0.0822531208395958, + "step": 2305 + }, + { + "epoch": 1.8347895154884828, + "grad_norm": 36.214271545410156, + "learning_rate": 1.0403129732205989e-06, + "log_odds_chosen": 1.3265584707260132, + "log_odds_ratio": -0.40558844804763794, + "logits/chosen": 346.4527893066406, + "logits/rejected": 320.7982177734375, + "logps/chosen": -0.6041200757026672, + "logps/rejected": -1.3090689182281494, + "loss": 0.7463, + "nll_loss": 0.9375091791152954, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030206000432372093, + "rewards/margins": 0.03524744138121605, + "rewards/rejected": -0.06545344740152359, + "step": 2310 + }, + { + "epoch": 1.8387609213661635, + "grad_norm": 23.28519058227539, + "learning_rate": 1.0391889178159317e-06, + "log_odds_chosen": 1.4057496786117554, + "log_odds_ratio": -0.24964909255504608, + "logits/chosen": 309.9521484375, + "logits/rejected": 243.9438934326172, + "logps/chosen": -0.8198047876358032, + "logps/rejected": -1.7336012125015259, + "loss": 0.9028, + "nll_loss": 1.1149805784225464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04099023714661598, + "rewards/margins": 0.04568982869386673, + "rewards/rejected": -0.08668006211519241, + "step": 2315 + }, + { + "epoch": 1.8427323272438443, + "grad_norm": 33.16374969482422, + "learning_rate": 1.0380684981717496e-06, + "log_odds_chosen": 0.6586580276489258, + "log_odds_ratio": -0.4859169125556946, + "logits/chosen": 334.7452087402344, + "logits/rejected": 262.04071044921875, + "logps/chosen": -0.43002748489379883, + "logps/rejected": -0.7598254084587097, + "loss": 0.7969, + "nll_loss": 0.7446638941764832, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02150137536227703, + "rewards/margins": 0.016489896923303604, + "rewards/rejected": -0.037991270422935486, + "step": 2320 + }, + { + "epoch": 1.846703733121525, + "grad_norm": 29.125152587890625, + "learning_rate": 1.0369516947304254e-06, + "log_odds_chosen": 0.0750177875161171, + "log_odds_ratio": -0.9107866287231445, + "logits/chosen": 350.4623718261719, + "logits/rejected": 319.9029541015625, + "logps/chosen": -0.5689317584037781, + "logps/rejected": -0.5792874097824097, + "loss": 0.7853, + "nll_loss": 0.6771488785743713, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.028446590527892113, + "rewards/margins": 0.0005177788552828133, + "rewards/rejected": -0.028964370489120483, + "step": 2325 + }, + { + "epoch": 1.8506751389992058, + "grad_norm": 34.48380661010742, + "learning_rate": 1.0358384880813022e-06, + "log_odds_chosen": 1.9680553674697876, + "log_odds_ratio": -0.1879040002822876, + "logits/chosen": 311.9992980957031, + "logits/rejected": 378.4543762207031, + "logps/chosen": -0.49781322479248047, + "logps/rejected": -1.6778230667114258, + "loss": 0.7279, + "nll_loss": 0.7636333703994751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024890663102269173, + "rewards/margins": 0.059000492095947266, + "rewards/rejected": -0.08389115333557129, + "step": 2330 + }, + { + "epoch": 1.8546465448768865, + "grad_norm": 51.420326232910156, + "learning_rate": 1.0347288589592778e-06, + "log_odds_chosen": 2.020479679107666, + "log_odds_ratio": -0.23942995071411133, + "logits/chosen": 301.0638732910156, + "logits/rejected": 328.4521179199219, + "logps/chosen": -0.6289145946502686, + "logps/rejected": -1.9039732217788696, + "loss": 0.7954, + "nll_loss": 0.821241021156311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03144572675228119, + "rewards/margins": 0.0637529268860817, + "rewards/rejected": -0.09519866108894348, + "step": 2335 + }, + { + "epoch": 1.8586179507545673, + "grad_norm": 39.684078216552734, + "learning_rate": 1.033622788243404e-06, + "log_odds_chosen": 1.3421988487243652, + "log_odds_ratio": -0.37751635909080505, + "logits/chosen": 305.427978515625, + "logits/rejected": 291.96466064453125, + "logps/chosen": -0.5937973856925964, + "logps/rejected": -1.6161730289459229, + "loss": 0.8183, + "nll_loss": 0.7988775968551636, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02968987263739109, + "rewards/margins": 0.051118772476911545, + "rewards/rejected": -0.08080865442752838, + "step": 2340 + }, + { + "epoch": 1.8625893566322478, + "grad_norm": 30.320964813232422, + "learning_rate": 1.0325202569555013e-06, + "log_odds_chosen": 1.2330553531646729, + "log_odds_ratio": -0.3010442554950714, + "logits/chosen": 298.0875549316406, + "logits/rejected": 370.0763854980469, + "logps/chosen": -0.558303713798523, + "logps/rejected": -1.2143747806549072, + "loss": 0.7559, + "nll_loss": 0.7310600280761719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027915185317397118, + "rewards/margins": 0.032803554087877274, + "rewards/rejected": -0.06071874499320984, + "step": 2345 + }, + { + "epoch": 1.8665607625099285, + "grad_norm": 25.897533416748047, + "learning_rate": 1.0314212462587935e-06, + "log_odds_chosen": 2.4509646892547607, + "log_odds_ratio": -0.09875769913196564, + "logits/chosen": 253.66830444335938, + "logits/rejected": 334.7943115234375, + "logps/chosen": -0.34774085879325867, + "logps/rejected": -1.5809571743011475, + "loss": 0.8168, + "nll_loss": 0.6432914137840271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017387043684720993, + "rewards/margins": 0.06166081503033638, + "rewards/rejected": -0.07904786616563797, + "step": 2350 + }, + { + "epoch": 1.870532168387609, + "grad_norm": 26.26967430114746, + "learning_rate": 1.0303257374565546e-06, + "log_odds_chosen": 1.6334152221679688, + "log_odds_ratio": -0.266364187002182, + "logits/chosen": 385.4601135253906, + "logits/rejected": 300.8085632324219, + "logps/chosen": -0.6583752036094666, + "logps/rejected": -1.4509037733078003, + "loss": 0.7401, + "nll_loss": 0.7208520174026489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03291876241564751, + "rewards/margins": 0.039626430720090866, + "rewards/rejected": -0.07254519313573837, + "step": 2355 + }, + { + "epoch": 1.8745035742652898, + "grad_norm": 26.586294174194336, + "learning_rate": 1.029233711990773e-06, + "log_odds_chosen": 2.712470531463623, + "log_odds_ratio": -0.20829498767852783, + "logits/chosen": 307.21173095703125, + "logits/rejected": 252.4169464111328, + "logps/chosen": -0.383186399936676, + "logps/rejected": -1.2868399620056152, + "loss": 0.6887, + "nll_loss": 0.6436999440193176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01915931887924671, + "rewards/margins": 0.04518268257379532, + "rewards/rejected": -0.06434200704097748, + "step": 2360 + }, + { + "epoch": 1.8784749801429705, + "grad_norm": 23.74375343322754, + "learning_rate": 1.0281451514408315e-06, + "log_odds_chosen": 1.4261778593063354, + "log_odds_ratio": -0.2637864947319031, + "logits/chosen": 264.46990966796875, + "logits/rejected": 421.1216735839844, + "logps/chosen": -0.6919044852256775, + "logps/rejected": -1.547218918800354, + "loss": 0.7403, + "nll_loss": 0.9019848108291626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03459522873163223, + "rewards/margins": 0.04276571795344353, + "rewards/rejected": -0.07736094295978546, + "step": 2365 + }, + { + "epoch": 1.8824463860206513, + "grad_norm": 30.572526931762695, + "learning_rate": 1.0270600375222014e-06, + "log_odds_chosen": 2.870634078979492, + "log_odds_ratio": -0.06479945033788681, + "logits/chosen": 312.4655456542969, + "logits/rejected": 231.99411010742188, + "logps/chosen": -0.21137888729572296, + "logps/rejected": -1.5268778800964355, + "loss": 0.5935, + "nll_loss": 0.5586276054382324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010568944737315178, + "rewards/margins": 0.06577495485544205, + "rewards/rejected": -0.07634389400482178, + "step": 2370 + }, + { + "epoch": 1.886417791898332, + "grad_norm": 31.64859962463379, + "learning_rate": 1.0259783520851542e-06, + "log_odds_chosen": 1.6580451726913452, + "log_odds_ratio": -0.22571304440498352, + "logits/chosen": 271.18524169921875, + "logits/rejected": 313.13592529296875, + "logps/chosen": -0.61414635181427, + "logps/rejected": -1.621797800064087, + "loss": 0.8558, + "nll_loss": 1.0191186666488647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03070731833577156, + "rewards/margins": 0.050382573157548904, + "rewards/rejected": -0.08108989149332047, + "step": 2375 + }, + { + "epoch": 1.8903891977760128, + "grad_norm": 29.006942749023438, + "learning_rate": 1.0249000771134847e-06, + "log_odds_chosen": 2.5084846019744873, + "log_odds_ratio": -0.1580258011817932, + "logits/chosen": 237.0716552734375, + "logits/rejected": 358.3143310546875, + "logps/chosen": -0.5167864561080933, + "logps/rejected": -2.2698826789855957, + "loss": 0.7576, + "nll_loss": 0.6090616583824158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025839323177933693, + "rewards/margins": 0.08765481412410736, + "rewards/rejected": -0.1134941428899765, + "step": 2380 + }, + { + "epoch": 1.8943606036536935, + "grad_norm": 25.368074417114258, + "learning_rate": 1.023825194723252e-06, + "log_odds_chosen": 1.3378890752792358, + "log_odds_ratio": -0.2533974349498749, + "logits/chosen": 340.38909912109375, + "logits/rejected": 305.83673095703125, + "logps/chosen": -0.5703948736190796, + "logps/rejected": -1.343727469444275, + "loss": 0.8309, + "nll_loss": 0.6171257495880127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02851974405348301, + "rewards/margins": 0.038666628301143646, + "rewards/rejected": -0.0671863779425621, + "step": 2385 + }, + { + "epoch": 1.8983320095313743, + "grad_norm": 42.32550811767578, + "learning_rate": 1.022753687161533e-06, + "log_odds_chosen": 1.654007911682129, + "log_odds_ratio": -0.21362581849098206, + "logits/chosen": 269.56829833984375, + "logits/rejected": 278.0132141113281, + "logps/chosen": -0.4740678369998932, + "logps/rejected": -1.3538405895233154, + "loss": 0.6666, + "nll_loss": 0.7328553199768066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02370339073240757, + "rewards/margins": 0.043988633900880814, + "rewards/rejected": -0.06769202649593353, + "step": 2390 + }, + { + "epoch": 1.9023034154090548, + "grad_norm": 35.25934982299805, + "learning_rate": 1.0216855368051905e-06, + "log_odds_chosen": 1.5869925022125244, + "log_odds_ratio": -0.27753788232803345, + "logits/chosen": 266.008056640625, + "logits/rejected": 325.9742126464844, + "logps/chosen": -0.31553053855895996, + "logps/rejected": -1.1027727127075195, + "loss": 0.7717, + "nll_loss": 0.6756271719932556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01577652618288994, + "rewards/margins": 0.0393621101975441, + "rewards/rejected": -0.055138640105724335, + "step": 2395 + }, + { + "epoch": 1.9062748212867355, + "grad_norm": 42.96984100341797, + "learning_rate": 1.0206207261596577e-06, + "log_odds_chosen": 1.7989251613616943, + "log_odds_ratio": -0.18788772821426392, + "logits/chosen": 284.7127685546875, + "logits/rejected": 315.79156494140625, + "logps/chosen": -0.4359824061393738, + "logps/rejected": -1.3588236570358276, + "loss": 0.7178, + "nll_loss": 0.5897383689880371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02179912105202675, + "rewards/margins": 0.046142060309648514, + "rewards/rejected": -0.06794118881225586, + "step": 2400 + }, + { + "epoch": 1.910246227164416, + "grad_norm": 32.65408706665039, + "learning_rate": 1.019559237857732e-06, + "log_odds_chosen": 1.6669549942016602, + "log_odds_ratio": -0.26262766122817993, + "logits/chosen": 267.59027099609375, + "logits/rejected": 248.32080078125, + "logps/chosen": -0.5081090331077576, + "logps/rejected": -1.4452846050262451, + "loss": 0.6928, + "nll_loss": 0.6476167440414429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02540545165538788, + "rewards/margins": 0.0468587800860405, + "rewards/rejected": -0.07226422429084778, + "step": 2405 + }, + { + "epoch": 1.9142176330420968, + "grad_norm": 29.009151458740234, + "learning_rate": 1.0185010546583882e-06, + "log_odds_chosen": 1.5558197498321533, + "log_odds_ratio": -0.21908335387706757, + "logits/chosen": 277.3587341308594, + "logits/rejected": 372.0375061035156, + "logps/chosen": -0.4578898549079895, + "logps/rejected": -1.304565668106079, + "loss": 0.7609, + "nll_loss": 0.7717305421829224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022894492372870445, + "rewards/margins": 0.042333800345659256, + "rewards/rejected": -0.06522828340530396, + "step": 2410 + }, + { + "epoch": 1.9181890389197775, + "grad_norm": 29.15569305419922, + "learning_rate": 1.0174461594455997e-06, + "log_odds_chosen": 1.5694271326065063, + "log_odds_ratio": -0.27755317091941833, + "logits/chosen": 310.48773193359375, + "logits/rejected": 290.09130859375, + "logps/chosen": -0.5000747442245483, + "logps/rejected": -1.2587058544158936, + "loss": 0.6486, + "nll_loss": 0.6504746675491333, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.025003740563988686, + "rewards/margins": 0.03793155401945114, + "rewards/rejected": -0.06293529272079468, + "step": 2415 + }, + { + "epoch": 1.9221604447974583, + "grad_norm": 39.57065963745117, + "learning_rate": 1.0163945352271773e-06, + "log_odds_chosen": 1.5484364032745361, + "log_odds_ratio": -0.3084541857242584, + "logits/chosen": 394.11370849609375, + "logits/rejected": 378.93695068359375, + "logps/chosen": -0.4161972403526306, + "logps/rejected": -1.2610418796539307, + "loss": 0.7183, + "nll_loss": 0.610600471496582, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.02080986276268959, + "rewards/margins": 0.04224223643541336, + "rewards/rejected": -0.06305209547281265, + "step": 2420 + }, + { + "epoch": 1.926131850675139, + "grad_norm": 37.694602966308594, + "learning_rate": 1.0153461651336193e-06, + "log_odds_chosen": 2.225497007369995, + "log_odds_ratio": -0.26799148321151733, + "logits/chosen": 340.69232177734375, + "logits/rejected": 271.46759033203125, + "logps/chosen": -0.45521894097328186, + "logps/rejected": -1.3658949136734009, + "loss": 0.7839, + "nll_loss": 0.8904461860656738, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.022760944440960884, + "rewards/margins": 0.04553379863500595, + "rewards/rejected": -0.06829474866390228, + "step": 2425 + }, + { + "epoch": 1.9301032565528198, + "grad_norm": 23.69498062133789, + "learning_rate": 1.0143010324169743e-06, + "log_odds_chosen": 1.5935895442962646, + "log_odds_ratio": -0.22372519969940186, + "logits/chosen": 304.7364501953125, + "logits/rejected": 331.30670166015625, + "logps/chosen": -0.41409072279930115, + "logps/rejected": -1.1915451288223267, + "loss": 0.7498, + "nll_loss": 0.6026512980461121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020704539492726326, + "rewards/margins": 0.038872718811035156, + "rewards/rejected": -0.05957725644111633, + "step": 2430 + }, + { + "epoch": 1.9340746624305005, + "grad_norm": 29.575180053710938, + "learning_rate": 1.013259120449719e-06, + "log_odds_chosen": 1.621791124343872, + "log_odds_ratio": -0.295773983001709, + "logits/chosen": 348.9219970703125, + "logits/rejected": 284.30517578125, + "logps/chosen": -0.5032614469528198, + "logps/rejected": -1.1986490488052368, + "loss": 0.7465, + "nll_loss": 0.626564621925354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0251630749553442, + "rewards/margins": 0.03476938232779503, + "rewards/rejected": -0.05993245169520378, + "step": 2435 + }, + { + "epoch": 1.938046068308181, + "grad_norm": 90.26566314697266, + "learning_rate": 1.0122204127236452e-06, + "log_odds_chosen": 2.639167070388794, + "log_odds_ratio": -0.12427319586277008, + "logits/chosen": 504.92901611328125, + "logits/rejected": 271.24432373046875, + "logps/chosen": -0.2446070909500122, + "logps/rejected": -1.3790600299835205, + "loss": 0.7872, + "nll_loss": 0.5850281715393066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012230354361236095, + "rewards/margins": 0.05672264099121094, + "rewards/rejected": -0.06895299255847931, + "step": 2440 + }, + { + "epoch": 1.9420174741858618, + "grad_norm": 31.37210464477539, + "learning_rate": 1.0111848928487622e-06, + "log_odds_chosen": 2.033698081970215, + "log_odds_ratio": -0.29641473293304443, + "logits/chosen": 243.27108764648438, + "logits/rejected": 469.9246520996094, + "logps/chosen": -0.7070415019989014, + "logps/rejected": -2.0553581714630127, + "loss": 0.8964, + "nll_loss": 0.928133487701416, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03535207733511925, + "rewards/margins": 0.06741583347320557, + "rewards/rejected": -0.10276790708303452, + "step": 2445 + }, + { + "epoch": 1.9459888800635425, + "grad_norm": 43.04076385498047, + "learning_rate": 1.0101525445522107e-06, + "log_odds_chosen": 1.378095030784607, + "log_odds_ratio": -0.2568764090538025, + "logits/chosen": 273.47930908203125, + "logits/rejected": 273.142578125, + "logps/chosen": -0.40632033348083496, + "logps/rejected": -1.0940074920654297, + "loss": 0.7759, + "nll_loss": 0.6475064754486084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020316017791628838, + "rewards/margins": 0.034384358674287796, + "rewards/rejected": -0.054700374603271484, + "step": 2450 + }, + { + "epoch": 1.949960285941223, + "grad_norm": 29.865938186645508, + "learning_rate": 1.0091233516771892e-06, + "log_odds_chosen": 2.293731689453125, + "log_odds_ratio": -0.18753847479820251, + "logits/chosen": 384.34149169921875, + "logits/rejected": 279.8568420410156, + "logps/chosen": -0.38630157709121704, + "logps/rejected": -1.4292711019515991, + "loss": 0.8241, + "nll_loss": 0.8875762224197388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019315078854560852, + "rewards/margins": 0.0521484799683094, + "rewards/rejected": -0.07146355509757996, + "step": 2455 + }, + { + "epoch": 1.9539316918189038, + "grad_norm": 25.074451446533203, + "learning_rate": 1.0080972981818898e-06, + "log_odds_chosen": 1.5017271041870117, + "log_odds_ratio": -0.24289080500602722, + "logits/chosen": 290.76361083984375, + "logits/rejected": 316.858154296875, + "logps/chosen": -0.5781405568122864, + "logps/rejected": -1.4656955003738403, + "loss": 0.749, + "nll_loss": 1.0332854986190796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02890702709555626, + "rewards/margins": 0.044377751648426056, + "rewards/rejected": -0.07328477501869202, + "step": 2460 + }, + { + "epoch": 1.9579030976965845, + "grad_norm": 37.553321838378906, + "learning_rate": 1.0070743681384512e-06, + "log_odds_chosen": 1.714714765548706, + "log_odds_ratio": -0.20342151820659637, + "logits/chosen": 304.4083557128906, + "logits/rejected": 403.9393310546875, + "logps/chosen": -0.49747514724731445, + "logps/rejected": -1.5647690296173096, + "loss": 0.7228, + "nll_loss": 0.6288737058639526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024873757734894753, + "rewards/margins": 0.05336468666791916, + "rewards/rejected": -0.07823844999074936, + "step": 2465 + }, + { + "epoch": 1.9618745035742653, + "grad_norm": 30.394023895263672, + "learning_rate": 1.0060545457319173e-06, + "log_odds_chosen": 1.9750301837921143, + "log_odds_ratio": -0.15881523489952087, + "logits/chosen": 252.62771606445312, + "logits/rejected": 321.7646484375, + "logps/chosen": -0.32253313064575195, + "logps/rejected": -1.2601935863494873, + "loss": 0.71, + "nll_loss": 0.6166780591011047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01612665504217148, + "rewards/margins": 0.04688302427530289, + "rewards/rejected": -0.06300967931747437, + "step": 2470 + }, + { + "epoch": 1.965845909451946, + "grad_norm": 39.465248107910156, + "learning_rate": 1.0050378152592122e-06, + "log_odds_chosen": 1.7062022686004639, + "log_odds_ratio": -0.2684418559074402, + "logits/chosen": 297.23333740234375, + "logits/rejected": 356.68939208984375, + "logps/chosen": -0.5603381395339966, + "logps/rejected": -1.553205966949463, + "loss": 0.7624, + "nll_loss": 0.9264168739318848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02801690623164177, + "rewards/margins": 0.049643389880657196, + "rewards/rejected": -0.07766029983758926, + "step": 2475 + }, + { + "epoch": 1.9698173153296268, + "grad_norm": 31.2788143157959, + "learning_rate": 1.0040241611281238e-06, + "log_odds_chosen": 1.464468240737915, + "log_odds_ratio": -0.3190952241420746, + "logits/chosen": 290.74603271484375, + "logits/rejected": 258.4278869628906, + "logps/chosen": -0.601416826248169, + "logps/rejected": -1.4750945568084717, + "loss": 0.772, + "nll_loss": 0.6782388091087341, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.030070845037698746, + "rewards/margins": 0.043683890253305435, + "rewards/rejected": -0.07375473529100418, + "step": 2480 + }, + { + "epoch": 1.9737887212073075, + "grad_norm": 33.23145294189453, + "learning_rate": 1.0030135678562994e-06, + "log_odds_chosen": 1.415673851966858, + "log_odds_ratio": -0.26751992106437683, + "logits/chosen": 326.2284240722656, + "logits/rejected": 294.62982177734375, + "logps/chosen": -0.4133778214454651, + "logps/rejected": -1.1859859228134155, + "loss": 0.9344, + "nll_loss": 0.588900625705719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020668892189860344, + "rewards/margins": 0.0386304035782814, + "rewards/rejected": -0.059299297630786896, + "step": 2485 + }, + { + "epoch": 1.977760127084988, + "grad_norm": 31.663124084472656, + "learning_rate": 1.002006020070253e-06, + "log_odds_chosen": 1.7711327075958252, + "log_odds_ratio": -0.23336951434612274, + "logits/chosen": 392.6171875, + "logits/rejected": 279.3924865722656, + "logps/chosen": -0.5186364054679871, + "logps/rejected": -1.4825407266616821, + "loss": 0.7194, + "nll_loss": 0.8356044888496399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025931820273399353, + "rewards/margins": 0.04819521680474281, + "rewards/rejected": -0.07412703335285187, + "step": 2490 + }, + { + "epoch": 1.9817315329626688, + "grad_norm": 30.687904357910156, + "learning_rate": 1.0010015025043829e-06, + "log_odds_chosen": 2.0353431701660156, + "log_odds_ratio": -0.14225442707538605, + "logits/chosen": 312.7574157714844, + "logits/rejected": 353.1292419433594, + "logps/chosen": -0.4555455148220062, + "logps/rejected": -1.703881859779358, + "loss": 0.7101, + "nll_loss": 0.6733388900756836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02277727797627449, + "rewards/margins": 0.062416814267635345, + "rewards/rejected": -0.08519409596920013, + "step": 2495 + }, + { + "epoch": 1.9857029388403493, + "grad_norm": 20.97602653503418, + "learning_rate": 1.0000000000000002e-06, + "log_odds_chosen": 1.2658917903900146, + "log_odds_ratio": -0.2939799427986145, + "logits/chosen": 352.18548583984375, + "logits/rejected": 275.24761962890625, + "logps/chosen": -0.5702251195907593, + "logps/rejected": -1.279112458229065, + "loss": 0.7243, + "nll_loss": 0.8630763292312622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028511255979537964, + "rewards/margins": 0.035444360226392746, + "rewards/rejected": -0.06395561993122101, + "step": 2500 + }, + { + "epoch": 1.98967434471803, + "grad_norm": 23.28777503967285, + "learning_rate": 9.990014975043674e-07, + "log_odds_chosen": 1.8104581832885742, + "log_odds_ratio": -0.19139598309993744, + "logits/chosen": 271.8651428222656, + "logits/rejected": 290.1709289550781, + "logps/chosen": -0.652216911315918, + "logps/rejected": -1.774309754371643, + "loss": 0.8809, + "nll_loss": 0.8805280923843384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03261084109544754, + "rewards/margins": 0.056104641407728195, + "rewards/rejected": -0.08871549367904663, + "step": 2505 + }, + { + "epoch": 1.9936457505957108, + "grad_norm": 32.018531799316406, + "learning_rate": 9.98005980069749e-07, + "log_odds_chosen": 0.9189640879631042, + "log_odds_ratio": -0.353455513715744, + "logits/chosen": 254.6789093017578, + "logits/rejected": 327.7507629394531, + "logps/chosen": -0.996709942817688, + "logps/rejected": -1.6359264850616455, + "loss": 0.8168, + "nll_loss": 0.9266790151596069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04983549565076828, + "rewards/margins": 0.03196082264184952, + "rewards/rejected": -0.0817963257431984, + "step": 2510 + }, + { + "epoch": 1.9976171564733916, + "grad_norm": 38.965843200683594, + "learning_rate": 9.97013432852472e-07, + "log_odds_chosen": 3.410547971725464, + "log_odds_ratio": -0.0805453434586525, + "logits/chosen": 308.8994445800781, + "logits/rejected": 340.4012145996094, + "logps/chosen": -0.28854474425315857, + "logps/rejected": -1.8908188343048096, + "loss": 0.647, + "nll_loss": 0.5763905048370361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014427239075303078, + "rewards/margins": 0.08011370152235031, + "rewards/rejected": -0.09454093873500824, + "step": 2515 + }, + { + "epoch": 2.0, + "eval_log_odds_chosen": 0.3416334092617035, + "eval_log_odds_ratio": -0.6777786016464233, + "eval_logits/chosen": 324.3109130859375, + "eval_logits/rejected": 296.0398254394531, + "eval_logps/chosen": -1.0582857131958008, + "eval_logps/rejected": -1.2741806507110596, + "eval_loss": 1.481645107269287, + "eval_nll_loss": 1.4303914308547974, + "eval_rewards/accuracies": 0.5899280309677124, + "eval_rewards/chosen": -0.05291429162025452, + "eval_rewards/margins": 0.010794746689498425, + "eval_rewards/rejected": -0.06370903551578522, + "eval_runtime": 91.3978, + "eval_samples_per_second": 6.05, + "eval_steps_per_second": 1.521, + "step": 2518 + }, + { + "epoch": 2.0015885623510723, + "grad_norm": 23.278343200683594, + "learning_rate": 9.960238411119948e-07, + "log_odds_chosen": 2.39607572555542, + "log_odds_ratio": -0.14948555827140808, + "logits/chosen": 280.23614501953125, + "logits/rejected": 288.0187683105469, + "logps/chosen": -0.36037492752075195, + "logps/rejected": -1.700818419456482, + "loss": 0.6759, + "nll_loss": 0.7635445594787598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018018746748566628, + "rewards/margins": 0.0670221820473671, + "rewards/rejected": -0.08504092693328857, + "step": 2520 + }, + { + "epoch": 2.005559968228753, + "grad_norm": 37.55564880371094, + "learning_rate": 9.950371902099892e-07, + "log_odds_chosen": 2.9879164695739746, + "log_odds_ratio": -0.06899777054786682, + "logits/chosen": 274.68768310546875, + "logits/rejected": 277.73822021484375, + "logps/chosen": -0.3065778315067291, + "logps/rejected": -2.0628604888916016, + "loss": 0.4182, + "nll_loss": 0.36765578389167786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015328889712691307, + "rewards/margins": 0.08781413733959198, + "rewards/rejected": -0.10314302146434784, + "step": 2525 + }, + { + "epoch": 2.009531374106434, + "grad_norm": 31.862247467041016, + "learning_rate": 9.9405346560943e-07, + "log_odds_chosen": 3.7556872367858887, + "log_odds_ratio": -0.05185776203870773, + "logits/chosen": 290.3088684082031, + "logits/rejected": 268.1896057128906, + "logps/chosen": -0.27296245098114014, + "logps/rejected": -2.3146934509277344, + "loss": 0.4324, + "nll_loss": 0.4719117283821106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013648120686411858, + "rewards/margins": 0.10208655893802643, + "rewards/rejected": -0.11573468148708344, + "step": 2530 + }, + { + "epoch": 2.0135027799841145, + "grad_norm": 37.43449020385742, + "learning_rate": 9.930726528736969e-07, + "log_odds_chosen": 3.0588698387145996, + "log_odds_ratio": -0.06747926771640778, + "logits/chosen": 290.01312255859375, + "logits/rejected": 256.39080810546875, + "logps/chosen": -0.24895134568214417, + "logps/rejected": -1.8526256084442139, + "loss": 0.4001, + "nll_loss": 0.31891027092933655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012447567656636238, + "rewards/margins": 0.0801837220788002, + "rewards/rejected": -0.0926312804222107, + "step": 2535 + }, + { + "epoch": 2.0174741858617953, + "grad_norm": 23.769168853759766, + "learning_rate": 9.920947376656814e-07, + "log_odds_chosen": 3.4705471992492676, + "log_odds_ratio": -0.08175542205572128, + "logits/chosen": 295.07159423828125, + "logits/rejected": 319.11773681640625, + "logps/chosen": -0.13094039261341095, + "logps/rejected": -1.3336389064788818, + "loss": 0.3125, + "nll_loss": 0.2496316134929657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006547019816935062, + "rewards/margins": 0.060134924948215485, + "rewards/rejected": -0.06668195128440857, + "step": 2540 + }, + { + "epoch": 2.0214455917394756, + "grad_norm": 23.81411361694336, + "learning_rate": 9.911197057469108e-07, + "log_odds_chosen": 3.4386093616485596, + "log_odds_ratio": -0.09821267426013947, + "logits/chosen": 251.1048126220703, + "logits/rejected": 341.10272216796875, + "logps/chosen": -0.2074267864227295, + "logps/rejected": -2.2674612998962402, + "loss": 0.3714, + "nll_loss": 0.34040871262550354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01037133764475584, + "rewards/margins": 0.10300172865390778, + "rewards/rejected": -0.11337306350469589, + "step": 2545 + }, + { + "epoch": 2.0254169976171563, + "grad_norm": 29.296571731567383, + "learning_rate": 9.901475429766744e-07, + "log_odds_chosen": 2.699160575866699, + "log_odds_ratio": -0.10057506710290909, + "logits/chosen": 294.4793701171875, + "logits/rejected": 312.450439453125, + "logps/chosen": -0.2852121889591217, + "logps/rejected": -1.5744909048080444, + "loss": 0.4522, + "nll_loss": 0.4200217127799988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0142606096342206, + "rewards/margins": 0.06446393579244614, + "rewards/rejected": -0.07872454822063446, + "step": 2550 + }, + { + "epoch": 2.029388403494837, + "grad_norm": 18.865428924560547, + "learning_rate": 9.891782353111634e-07, + "log_odds_chosen": 3.4034037590026855, + "log_odds_ratio": -0.0371125191450119, + "logits/chosen": 248.248779296875, + "logits/rejected": 342.5372619628906, + "logps/chosen": -0.13001587986946106, + "logps/rejected": -1.6181061267852783, + "loss": 0.2983, + "nll_loss": 0.2760511040687561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006500795483589172, + "rewards/margins": 0.0744045078754425, + "rewards/rejected": -0.08090530335903168, + "step": 2555 + }, + { + "epoch": 2.033359809372518, + "grad_norm": 20.16379165649414, + "learning_rate": 9.882117688026186e-07, + "log_odds_chosen": 4.662590503692627, + "log_odds_ratio": -0.04151756316423416, + "logits/chosen": 209.595458984375, + "logits/rejected": 306.2817077636719, + "logps/chosen": -0.3000139594078064, + "logps/rejected": -3.268468141555786, + "loss": 0.2853, + "nll_loss": 0.38440361618995667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015000698156654835, + "rewards/margins": 0.14842268824577332, + "rewards/rejected": -0.16342338919639587, + "step": 2560 + }, + { + "epoch": 2.0373312152501986, + "grad_norm": 21.97180938720703, + "learning_rate": 9.872481295984873e-07, + "log_odds_chosen": 4.333212375640869, + "log_odds_ratio": -0.032386261969804764, + "logits/chosen": 399.1155090332031, + "logits/rejected": 308.8793640136719, + "logps/chosen": -0.20656821131706238, + "logps/rejected": -2.2423524856567383, + "loss": 0.3985, + "nll_loss": 0.34744900465011597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010328411124646664, + "rewards/margins": 0.1017892137169838, + "rewards/rejected": -0.11211763322353363, + "step": 2565 + }, + { + "epoch": 2.0413026211278793, + "grad_norm": 25.24811553955078, + "learning_rate": 9.862873039405896e-07, + "log_odds_chosen": 3.375474452972412, + "log_odds_ratio": -0.038382574915885925, + "logits/chosen": 264.7865905761719, + "logits/rejected": 341.5740051269531, + "logps/chosen": -0.17896270751953125, + "logps/rejected": -1.8247349262237549, + "loss": 0.3276, + "nll_loss": 0.2309613674879074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008948136121034622, + "rewards/margins": 0.08228860795497894, + "rewards/rejected": -0.09123674780130386, + "step": 2570 + }, + { + "epoch": 2.04527402700556, + "grad_norm": 26.88926124572754, + "learning_rate": 9.853292781642933e-07, + "log_odds_chosen": 3.7541892528533936, + "log_odds_ratio": -0.03717505931854248, + "logits/chosen": 236.57650756835938, + "logits/rejected": 321.8630676269531, + "logps/chosen": -0.14312101900577545, + "logps/rejected": -2.043936252593994, + "loss": 0.3686, + "nll_loss": 0.21125833690166473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007156051695346832, + "rewards/margins": 0.09504075348377228, + "rewards/rejected": -0.10219679772853851, + "step": 2575 + }, + { + "epoch": 2.049245432883241, + "grad_norm": 31.291349411010742, + "learning_rate": 9.843740386976973e-07, + "log_odds_chosen": 2.4147632122039795, + "log_odds_ratio": -0.11424986273050308, + "logits/chosen": 258.66961669921875, + "logits/rejected": 286.5997009277344, + "logps/chosen": -0.28673481941223145, + "logps/rejected": -1.4659677743911743, + "loss": 0.3331, + "nll_loss": 0.41449370980262756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014336742460727692, + "rewards/margins": 0.0589616484940052, + "rewards/rejected": -0.0732983872294426, + "step": 2580 + }, + { + "epoch": 2.0532168387609215, + "grad_norm": 18.964326858520508, + "learning_rate": 9.834215720608247e-07, + "log_odds_chosen": 3.3965156078338623, + "log_odds_ratio": -0.08347752690315247, + "logits/chosen": 286.4773864746094, + "logits/rejected": 292.11553955078125, + "logps/chosen": -0.3802470564842224, + "logps/rejected": -1.8391368389129639, + "loss": 0.4585, + "nll_loss": 0.4634222388267517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01901235245168209, + "rewards/margins": 0.07294448465108871, + "rewards/rejected": -0.09195684641599655, + "step": 2585 + }, + { + "epoch": 2.0571882446386023, + "grad_norm": 26.18207550048828, + "learning_rate": 9.824718648648244e-07, + "log_odds_chosen": 2.8892555236816406, + "log_odds_ratio": -0.05735042691230774, + "logits/chosen": 346.8835754394531, + "logits/rejected": 247.84585571289062, + "logps/chosen": -0.19978216290473938, + "logps/rejected": -1.532153844833374, + "loss": 0.3647, + "nll_loss": 0.2897275388240814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009989107958972454, + "rewards/margins": 0.06661858409643173, + "rewards/rejected": -0.07660768926143646, + "step": 2590 + }, + { + "epoch": 2.0611596505162826, + "grad_norm": 35.08377456665039, + "learning_rate": 9.81524903811178e-07, + "log_odds_chosen": 4.44228982925415, + "log_odds_ratio": -0.019657274708151817, + "logits/chosen": 343.4027099609375, + "logits/rejected": 304.6173095703125, + "logps/chosen": -0.06594429910182953, + "logps/rejected": -1.7075248956680298, + "loss": 0.4044, + "nll_loss": 0.29884591698646545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003297214861959219, + "rewards/margins": 0.08207902312278748, + "rewards/rejected": -0.08537624031305313, + "step": 2595 + }, + { + "epoch": 2.0651310563939633, + "grad_norm": 23.890464782714844, + "learning_rate": 9.805806756909204e-07, + "log_odds_chosen": 3.4282004833221436, + "log_odds_ratio": -0.04306300729513168, + "logits/chosen": 321.9460144042969, + "logits/rejected": 254.0849609375, + "logps/chosen": -0.10400563478469849, + "logps/rejected": -1.4112660884857178, + "loss": 0.3748, + "nll_loss": 0.3669022023677826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005200281739234924, + "rewards/margins": 0.06536302715539932, + "rewards/rejected": -0.07056330889463425, + "step": 2600 + }, + { + "epoch": 2.069102462271644, + "grad_norm": 32.56575012207031, + "learning_rate": 9.796391673838654e-07, + "log_odds_chosen": 3.7493515014648438, + "log_odds_ratio": -0.03417588397860527, + "logits/chosen": 400.9460754394531, + "logits/rejected": 280.41595458984375, + "logps/chosen": -0.1472155898809433, + "logps/rejected": -1.8289676904678345, + "loss": 0.4138, + "nll_loss": 0.4005819261074066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00736077968031168, + "rewards/margins": 0.08408761024475098, + "rewards/rejected": -0.09144838154315948, + "step": 2605 + }, + { + "epoch": 2.073073868149325, + "grad_norm": 20.866010665893555, + "learning_rate": 9.787003658578392e-07, + "log_odds_chosen": 4.661986351013184, + "log_odds_ratio": -0.02654130384325981, + "logits/chosen": 242.8224334716797, + "logits/rejected": 205.6250457763672, + "logps/chosen": -0.12456536293029785, + "logps/rejected": -2.5623185634613037, + "loss": 0.4167, + "nll_loss": 0.4310877323150635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006228268146514893, + "rewards/margins": 0.12188766151666641, + "rewards/rejected": -0.1281159222126007, + "step": 2610 + }, + { + "epoch": 2.0770452740270056, + "grad_norm": 35.16082000732422, + "learning_rate": 9.777642581679234e-07, + "log_odds_chosen": 3.326671600341797, + "log_odds_ratio": -0.05631138011813164, + "logits/chosen": 319.496337890625, + "logits/rejected": 213.53369140625, + "logps/chosen": -0.1473226100206375, + "logps/rejected": -1.5727765560150146, + "loss": 0.4094, + "nll_loss": 0.3072356581687927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007366130594164133, + "rewards/margins": 0.07127268612384796, + "rewards/rejected": -0.07863881438970566, + "step": 2615 + }, + { + "epoch": 2.0810166799046863, + "grad_norm": 31.852258682250977, + "learning_rate": 9.768308314557044e-07, + "log_odds_chosen": 2.9119925498962402, + "log_odds_ratio": -0.08470732718706131, + "logits/chosen": 298.0347595214844, + "logits/rejected": 212.7636260986328, + "logps/chosen": -0.21133773028850555, + "logps/rejected": -1.562931776046753, + "loss": 0.3144, + "nll_loss": 0.3638852834701538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010566887445747852, + "rewards/margins": 0.0675797089934349, + "rewards/rejected": -0.07814659178256989, + "step": 2620 + }, + { + "epoch": 2.084988085782367, + "grad_norm": 26.966537475585938, + "learning_rate": 9.759000729485334e-07, + "log_odds_chosen": 2.8670029640197754, + "log_odds_ratio": -0.08872531354427338, + "logits/chosen": 207.214599609375, + "logits/rejected": 298.1064758300781, + "logps/chosen": -0.3392575979232788, + "logps/rejected": -1.9147123098373413, + "loss": 0.3919, + "nll_loss": 0.4130278527736664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01696288026869297, + "rewards/margins": 0.07877273857593536, + "rewards/rejected": -0.09573562443256378, + "step": 2625 + }, + { + "epoch": 2.088959491660048, + "grad_norm": 28.27079963684082, + "learning_rate": 9.749719699587899e-07, + "log_odds_chosen": 2.507535457611084, + "log_odds_ratio": -0.10697324573993683, + "logits/chosen": 241.3874053955078, + "logits/rejected": 232.6899871826172, + "logps/chosen": -0.33466896414756775, + "logps/rejected": -1.7164186239242554, + "loss": 0.4482, + "nll_loss": 0.4309083819389343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016733448952436447, + "rewards/margins": 0.06908748298883438, + "rewards/rejected": -0.08582093566656113, + "step": 2630 + }, + { + "epoch": 2.0929308975377285, + "grad_norm": 20.495689392089844, + "learning_rate": 9.740465098831574e-07, + "log_odds_chosen": 3.1356701850891113, + "log_odds_ratio": -0.08666279166936874, + "logits/chosen": 286.4986877441406, + "logits/rejected": 300.3104553222656, + "logps/chosen": -0.31942036747932434, + "logps/rejected": -2.1801095008850098, + "loss": 0.4267, + "nll_loss": 0.4930347502231598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015971018001437187, + "rewards/margins": 0.09303445369005203, + "rewards/rejected": -0.10900546610355377, + "step": 2635 + }, + { + "epoch": 2.096902303415409, + "grad_norm": 28.138750076293945, + "learning_rate": 9.731236802019038e-07, + "log_odds_chosen": 5.115718841552734, + "log_odds_ratio": -0.017810791730880737, + "logits/chosen": 201.8949432373047, + "logits/rejected": 369.33294677734375, + "logps/chosen": -0.19664621353149414, + "logps/rejected": -2.509212017059326, + "loss": 0.296, + "nll_loss": 0.30467721819877625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009832310490310192, + "rewards/margins": 0.11562828719615936, + "rewards/rejected": -0.12546059489250183, + "step": 2640 + }, + { + "epoch": 2.1008737092930896, + "grad_norm": 15.730376243591309, + "learning_rate": 9.722034684781694e-07, + "log_odds_chosen": 5.129834175109863, + "log_odds_ratio": -0.038504939526319504, + "logits/chosen": 264.3175354003906, + "logits/rejected": 380.3741760253906, + "logps/chosen": -0.20069575309753418, + "logps/rejected": -2.866584062576294, + "loss": 0.3021, + "nll_loss": 0.3250558078289032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010034788399934769, + "rewards/margins": 0.13329441845417023, + "rewards/rejected": -0.1433292031288147, + "step": 2645 + }, + { + "epoch": 2.1048451151707703, + "grad_norm": 24.789533615112305, + "learning_rate": 9.712858623572642e-07, + "log_odds_chosen": 5.037966728210449, + "log_odds_ratio": -0.017735213041305542, + "logits/chosen": 287.1584167480469, + "logits/rejected": 210.5322723388672, + "logps/chosen": -0.06346876919269562, + "logps/rejected": -1.834437608718872, + "loss": 0.2681, + "nll_loss": 0.2563795745372772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031734383665025234, + "rewards/margins": 0.0885484367609024, + "rewards/rejected": -0.09172187745571136, + "step": 2650 + }, + { + "epoch": 2.108816521048451, + "grad_norm": 33.17031478881836, + "learning_rate": 9.7037084956597e-07, + "log_odds_chosen": 3.0833840370178223, + "log_odds_ratio": -0.07446109503507614, + "logits/chosen": 244.01657104492188, + "logits/rejected": 245.7843780517578, + "logps/chosen": -0.27821382880210876, + "logps/rejected": -1.9189503192901611, + "loss": 0.441, + "nll_loss": 0.4175170063972473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013910690322518349, + "rewards/margins": 0.0820368155837059, + "rewards/rejected": -0.0959475189447403, + "step": 2655 + }, + { + "epoch": 2.112787926926132, + "grad_norm": 28.597440719604492, + "learning_rate": 9.694584179118515e-07, + "log_odds_chosen": 3.850795030593872, + "log_odds_ratio": -0.06737051904201508, + "logits/chosen": 281.43511962890625, + "logits/rejected": 314.6874084472656, + "logps/chosen": -0.2290450781583786, + "logps/rejected": -1.983991026878357, + "loss": 0.47, + "nll_loss": 0.38972610235214233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01145225390791893, + "rewards/margins": 0.08774729818105698, + "rewards/rejected": -0.0991995632648468, + "step": 2660 + }, + { + "epoch": 2.1167593328038126, + "grad_norm": 18.566396713256836, + "learning_rate": 9.685485552825746e-07, + "log_odds_chosen": 3.5323333740234375, + "log_odds_ratio": -0.05120311304926872, + "logits/chosen": 305.2156677246094, + "logits/rejected": 283.5207824707031, + "logps/chosen": -0.16874612867832184, + "logps/rejected": -1.4323012828826904, + "loss": 0.4172, + "nll_loss": 0.3532731235027313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008437307551503181, + "rewards/margins": 0.06317774951457977, + "rewards/rejected": -0.0716150552034378, + "step": 2665 + }, + { + "epoch": 2.1207307386814933, + "grad_norm": 30.003000259399414, + "learning_rate": 9.676412496452296e-07, + "log_odds_chosen": 3.4242184162139893, + "log_odds_ratio": -0.0653470903635025, + "logits/chosen": 244.9000244140625, + "logits/rejected": 314.68817138671875, + "logps/chosen": -0.21268995106220245, + "logps/rejected": -1.9504411220550537, + "loss": 0.3682, + "nll_loss": 0.31997257471084595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010634497739374638, + "rewards/margins": 0.08688755333423615, + "rewards/rejected": -0.0975220575928688, + "step": 2670 + }, + { + "epoch": 2.124702144559174, + "grad_norm": 25.37851905822754, + "learning_rate": 9.667364890456637e-07, + "log_odds_chosen": 3.1758782863616943, + "log_odds_ratio": -0.04303749278187752, + "logits/chosen": 300.64410400390625, + "logits/rejected": 272.6418762207031, + "logps/chosen": -0.23436030745506287, + "logps/rejected": -1.9541059732437134, + "loss": 0.3247, + "nll_loss": 0.400359570980072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011718015186488628, + "rewards/margins": 0.08598728477954865, + "rewards/rejected": -0.09770529717206955, + "step": 2675 + }, + { + "epoch": 2.128673550436855, + "grad_norm": 17.71100616455078, + "learning_rate": 9.658342616078198e-07, + "log_odds_chosen": 3.650857448577881, + "log_odds_ratio": -0.03125698119401932, + "logits/chosen": 299.0740661621094, + "logits/rejected": 227.38809204101562, + "logps/chosen": -0.14033564925193787, + "logps/rejected": -1.8093980550765991, + "loss": 0.3149, + "nll_loss": 0.3184364140033722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0070167831145226955, + "rewards/margins": 0.08345311135053635, + "rewards/rejected": -0.09046989679336548, + "step": 2680 + }, + { + "epoch": 2.1326449563145355, + "grad_norm": 21.77927589416504, + "learning_rate": 9.649345555330812e-07, + "log_odds_chosen": 3.3544082641601562, + "log_odds_ratio": -0.136978879570961, + "logits/chosen": 322.86651611328125, + "logits/rejected": 292.94366455078125, + "logps/chosen": -0.3305078148841858, + "logps/rejected": -1.6913312673568726, + "loss": 0.341, + "nll_loss": 0.39123356342315674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01652539148926735, + "rewards/margins": 0.06804117560386658, + "rewards/rejected": -0.08456657081842422, + "step": 2685 + }, + { + "epoch": 2.136616362192216, + "grad_norm": 23.627885818481445, + "learning_rate": 9.640373590996239e-07, + "log_odds_chosen": 3.0574584007263184, + "log_odds_ratio": -0.053799599409103394, + "logits/chosen": 256.87298583984375, + "logits/rejected": 341.5148010253906, + "logps/chosen": -0.2786779999732971, + "logps/rejected": -1.8729734420776367, + "loss": 0.3642, + "nll_loss": 0.29080718755722046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013933899812400341, + "rewards/margins": 0.07971477508544922, + "rewards/rejected": -0.09364867955446243, + "step": 2690 + }, + { + "epoch": 2.1405877680698966, + "grad_norm": 20.16524887084961, + "learning_rate": 9.631426606617747e-07, + "log_odds_chosen": 3.7517459392547607, + "log_odds_ratio": -0.05596904084086418, + "logits/chosen": 265.704833984375, + "logits/rejected": 255.4348602294922, + "logps/chosen": -0.16427966952323914, + "logps/rejected": -1.8877394199371338, + "loss": 0.3792, + "nll_loss": 0.5157122611999512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008213983848690987, + "rewards/margins": 0.08617298305034637, + "rewards/rejected": -0.09438695758581161, + "step": 2695 + }, + { + "epoch": 2.1445591739475773, + "grad_norm": 29.250465393066406, + "learning_rate": 9.622504486493764e-07, + "log_odds_chosen": 4.140095233917236, + "log_odds_ratio": -0.09328913688659668, + "logits/chosen": 272.82281494140625, + "logits/rejected": 249.73330688476562, + "logps/chosen": -0.16515249013900757, + "logps/rejected": -2.2072982788085938, + "loss": 0.3285, + "nll_loss": 0.367251455783844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008257624693214893, + "rewards/margins": 0.10210728645324707, + "rewards/rejected": -0.1103648990392685, + "step": 2700 + }, + { + "epoch": 2.148530579825258, + "grad_norm": 24.482175827026367, + "learning_rate": 9.613607115671605e-07, + "log_odds_chosen": 2.847738742828369, + "log_odds_ratio": -0.07871608436107635, + "logits/chosen": 327.7214660644531, + "logits/rejected": 281.2625732421875, + "logps/chosen": -0.24785375595092773, + "logps/rejected": -1.403300166130066, + "loss": 0.337, + "nll_loss": 0.5055629014968872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012392686679959297, + "rewards/margins": 0.057772327214479446, + "rewards/rejected": -0.0701650083065033, + "step": 2705 + }, + { + "epoch": 2.152501985702939, + "grad_norm": 22.381025314331055, + "learning_rate": 9.604734379941232e-07, + "log_odds_chosen": 3.220674991607666, + "log_odds_ratio": -0.04303758218884468, + "logits/chosen": 351.3204040527344, + "logits/rejected": 287.9315185546875, + "logps/chosen": -0.1675499528646469, + "logps/rejected": -1.6308742761611938, + "loss": 0.3403, + "nll_loss": 0.2904892861843109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008377498015761375, + "rewards/margins": 0.07316621392965317, + "rewards/rejected": -0.08154371380805969, + "step": 2710 + }, + { + "epoch": 2.1564733915806196, + "grad_norm": 23.287328720092773, + "learning_rate": 9.595886165829119e-07, + "log_odds_chosen": 3.32081937789917, + "log_odds_ratio": -0.06217331811785698, + "logits/chosen": 237.415283203125, + "logits/rejected": 217.04019165039062, + "logps/chosen": -0.8307470083236694, + "logps/rejected": -2.7454867362976074, + "loss": 0.3688, + "nll_loss": 0.6546460390090942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04153735563158989, + "rewards/margins": 0.09573698043823242, + "rewards/rejected": -0.1372743397951126, + "step": 2715 + }, + { + "epoch": 2.1604447974583003, + "grad_norm": 22.75811767578125, + "learning_rate": 9.58706236059213e-07, + "log_odds_chosen": 4.277588367462158, + "log_odds_ratio": -0.02797316014766693, + "logits/chosen": 256.08526611328125, + "logits/rejected": 295.3904113769531, + "logps/chosen": -0.23130519688129425, + "logps/rejected": -2.8242764472961426, + "loss": 0.3771, + "nll_loss": 0.3463202118873596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011565258726477623, + "rewards/margins": 0.12964856624603271, + "rewards/rejected": -0.14121383428573608, + "step": 2720 + }, + { + "epoch": 2.164416203335981, + "grad_norm": 19.948747634887695, + "learning_rate": 9.578262852211515e-07, + "log_odds_chosen": 3.2157044410705566, + "log_odds_ratio": -0.06531454622745514, + "logits/chosen": 269.98675537109375, + "logits/rejected": 270.8153381347656, + "logps/chosen": -0.13189613819122314, + "logps/rejected": -1.1261787414550781, + "loss": 0.3333, + "nll_loss": 0.24800708889961243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006594807840883732, + "rewards/margins": 0.04971412941813469, + "rewards/rejected": -0.05630893632769585, + "step": 2725 + }, + { + "epoch": 2.168387609213662, + "grad_norm": 28.03223991394043, + "learning_rate": 9.56948752938691e-07, + "log_odds_chosen": 3.26519775390625, + "log_odds_ratio": -0.049314845353364944, + "logits/chosen": 303.38262939453125, + "logits/rejected": 347.95281982421875, + "logps/chosen": -0.2853826880455017, + "logps/rejected": -2.1887669563293457, + "loss": 0.3569, + "nll_loss": 0.35581302642822266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014269135892391205, + "rewards/margins": 0.09516922384500504, + "rewards/rejected": -0.10943835973739624, + "step": 2730 + }, + { + "epoch": 2.1723590150913425, + "grad_norm": 54.01997756958008, + "learning_rate": 9.560736281530443e-07, + "log_odds_chosen": 3.5612130165100098, + "log_odds_ratio": -0.11573145538568497, + "logits/chosen": 331.6031494140625, + "logits/rejected": 283.62493896484375, + "logps/chosen": -0.2640966773033142, + "logps/rejected": -1.7330589294433594, + "loss": 0.4621, + "nll_loss": 0.3747265934944153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01320483349263668, + "rewards/margins": 0.07344810664653778, + "rewards/rejected": -0.08665294945240021, + "step": 2735 + }, + { + "epoch": 2.176330420969023, + "grad_norm": 40.430973052978516, + "learning_rate": 9.552008998760876e-07, + "log_odds_chosen": 4.288855075836182, + "log_odds_ratio": -0.10139509290456772, + "logits/chosen": 347.55853271484375, + "logits/rejected": 246.91439819335938, + "logps/chosen": -0.22820158302783966, + "logps/rejected": -1.6849181652069092, + "loss": 0.4092, + "nll_loss": 0.33607205748558044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011410078033804893, + "rewards/margins": 0.07283583283424377, + "rewards/rejected": -0.08424590528011322, + "step": 2740 + }, + { + "epoch": 2.1803018268467036, + "grad_norm": 28.763097763061523, + "learning_rate": 9.543305571897804e-07, + "log_odds_chosen": 3.6398234367370605, + "log_odds_ratio": -0.052009087055921555, + "logits/chosen": 217.4949493408203, + "logits/rejected": 313.9044189453125, + "logps/chosen": -0.18499043583869934, + "logps/rejected": -2.0533955097198486, + "loss": 0.432, + "nll_loss": 0.33994507789611816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009249521419405937, + "rewards/margins": 0.09342024475336075, + "rewards/rejected": -0.10266977548599243, + "step": 2745 + }, + { + "epoch": 2.1842732327243843, + "grad_norm": 18.268781661987305, + "learning_rate": 9.534625892455924e-07, + "log_odds_chosen": 3.4035229682922363, + "log_odds_ratio": -0.08708290755748749, + "logits/chosen": 272.8382263183594, + "logits/rejected": 301.1288757324219, + "logps/chosen": -0.27422863245010376, + "logps/rejected": -1.7393039464950562, + "loss": 0.3134, + "nll_loss": 0.3447985053062439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013711432926356792, + "rewards/margins": 0.07325376570224762, + "rewards/rejected": -0.08696520328521729, + "step": 2750 + }, + { + "epoch": 2.188244638602065, + "grad_norm": 35.49375915527344, + "learning_rate": 9.525969852639353e-07, + "log_odds_chosen": 3.902122974395752, + "log_odds_ratio": -0.03137136623263359, + "logits/chosen": 257.04730224609375, + "logits/rejected": 268.3082580566406, + "logps/chosen": -0.31830140948295593, + "logps/rejected": -2.567204475402832, + "loss": 0.3315, + "nll_loss": 0.2598883807659149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015915069729089737, + "rewards/margins": 0.1124451532959938, + "rewards/rejected": -0.12836022675037384, + "step": 2755 + }, + { + "epoch": 2.192216044479746, + "grad_norm": 21.208223342895508, + "learning_rate": 9.517337345336012e-07, + "log_odds_chosen": 3.7665162086486816, + "log_odds_ratio": -0.027825195342302322, + "logits/chosen": 232.5461883544922, + "logits/rejected": 363.5819396972656, + "logps/chosen": -0.20895162224769592, + "logps/rejected": -2.4105193614959717, + "loss": 0.2947, + "nll_loss": 0.277402400970459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010447581298649311, + "rewards/margins": 0.11007839441299438, + "rewards/rejected": -0.12052597850561142, + "step": 2760 + }, + { + "epoch": 2.1961874503574266, + "grad_norm": 25.69028663635254, + "learning_rate": 9.508728264112049e-07, + "log_odds_chosen": 3.399308443069458, + "log_odds_ratio": -0.04535920172929764, + "logits/chosen": 281.50384521484375, + "logits/rejected": 310.96966552734375, + "logps/chosen": -0.16359075903892517, + "logps/rejected": -1.8376325368881226, + "loss": 0.3831, + "nll_loss": 0.38407355546951294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008179538883268833, + "rewards/margins": 0.08370209485292435, + "rewards/rejected": -0.0918816328048706, + "step": 2765 + }, + { + "epoch": 2.2001588562351073, + "grad_norm": 30.133134841918945, + "learning_rate": 9.50014250320633e-07, + "log_odds_chosen": 4.295925140380859, + "log_odds_ratio": -0.01640843227505684, + "logits/chosen": 223.8397979736328, + "logits/rejected": 306.9639587402344, + "logps/chosen": -0.13873472809791565, + "logps/rejected": -2.4861035346984863, + "loss": 0.4461, + "nll_loss": 0.37577375769615173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006936737336218357, + "rewards/margins": 0.11736844480037689, + "rewards/rejected": -0.12430517375469208, + "step": 2770 + }, + { + "epoch": 2.204130262112788, + "grad_norm": 22.508953094482422, + "learning_rate": 9.49157995752499e-07, + "log_odds_chosen": 3.5316948890686035, + "log_odds_ratio": -0.0377383753657341, + "logits/chosen": 241.3332061767578, + "logits/rejected": 260.64349365234375, + "logps/chosen": -0.23985370993614197, + "logps/rejected": -2.1687886714935303, + "loss": 0.4267, + "nll_loss": 0.6603747606277466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011992687359452248, + "rewards/margins": 0.09644675254821777, + "rewards/rejected": -0.10843944549560547, + "step": 2775 + }, + { + "epoch": 2.208101667990469, + "grad_norm": 26.58831787109375, + "learning_rate": 9.483040522636021e-07, + "log_odds_chosen": 3.693864345550537, + "log_odds_ratio": -0.06571256369352341, + "logits/chosen": 263.2666015625, + "logits/rejected": 331.05438232421875, + "logps/chosen": -0.26286572217941284, + "logps/rejected": -1.72158944606781, + "loss": 0.423, + "nll_loss": 0.45609745383262634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013143287971615791, + "rewards/margins": 0.07293619215488434, + "rewards/rejected": -0.08607947826385498, + "step": 2780 + }, + { + "epoch": 2.212073073868149, + "grad_norm": 21.288402557373047, + "learning_rate": 9.474524094763924e-07, + "log_odds_chosen": 2.505340576171875, + "log_odds_ratio": -0.0928802341222763, + "logits/chosen": 236.01602172851562, + "logits/rejected": 320.7162170410156, + "logps/chosen": -0.3501175045967102, + "logps/rejected": -1.6990416049957275, + "loss": 0.3701, + "nll_loss": 0.4051796495914459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01750587485730648, + "rewards/margins": 0.06744619458913803, + "rewards/rejected": -0.08495207130908966, + "step": 2785 + }, + { + "epoch": 2.21604447974583, + "grad_norm": 52.22238540649414, + "learning_rate": 9.466030570784414e-07, + "log_odds_chosen": 3.867032289505005, + "log_odds_ratio": -0.06416045874357224, + "logits/chosen": 312.247802734375, + "logits/rejected": 239.3249969482422, + "logps/chosen": -0.25704219937324524, + "logps/rejected": -2.131070852279663, + "loss": 0.3458, + "nll_loss": 0.48503756523132324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012852109968662262, + "rewards/margins": 0.09370143711566925, + "rewards/rejected": -0.10655353963375092, + "step": 2790 + }, + { + "epoch": 2.2200158856235106, + "grad_norm": 19.706954956054688, + "learning_rate": 9.45755984821918e-07, + "log_odds_chosen": 3.0532429218292236, + "log_odds_ratio": -0.07293901592493057, + "logits/chosen": 255.22787475585938, + "logits/rejected": 326.5018615722656, + "logps/chosen": -0.3715572953224182, + "logps/rejected": -2.3187978267669678, + "loss": 0.3952, + "nll_loss": 0.3520717918872833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01857786439359188, + "rewards/margins": 0.09736202657222748, + "rewards/rejected": -0.11593989282846451, + "step": 2795 + }, + { + "epoch": 2.2239872915011913, + "grad_norm": 30.489246368408203, + "learning_rate": 9.449111825230681e-07, + "log_odds_chosen": 2.9744670391082764, + "log_odds_ratio": -0.06568741798400879, + "logits/chosen": 253.3533935546875, + "logits/rejected": 280.07855224609375, + "logps/chosen": -0.14280185103416443, + "logps/rejected": -1.3558251857757568, + "loss": 0.3892, + "nll_loss": 0.528894305229187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0071400925517082214, + "rewards/margins": 0.06065117195248604, + "rewards/rejected": -0.06779126077890396, + "step": 2800 + }, + { + "epoch": 2.227958697378872, + "grad_norm": 23.173385620117188, + "learning_rate": 9.440686400617012e-07, + "log_odds_chosen": 3.465458393096924, + "log_odds_ratio": -0.07254897803068161, + "logits/chosen": 221.92202758789062, + "logits/rejected": 349.90740966796875, + "logps/chosen": -0.15940417349338531, + "logps/rejected": -1.8571916818618774, + "loss": 0.4338, + "nll_loss": 0.4682881236076355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007970208302140236, + "rewards/margins": 0.08488937467336655, + "rewards/rejected": -0.09285958111286163, + "step": 2805 + }, + { + "epoch": 2.231930103256553, + "grad_norm": 20.893659591674805, + "learning_rate": 9.432283473806812e-07, + "log_odds_chosen": 3.7795605659484863, + "log_odds_ratio": -0.025308597832918167, + "logits/chosen": 205.07113647460938, + "logits/rejected": 391.563232421875, + "logps/chosen": -0.28899306058883667, + "logps/rejected": -2.687159538269043, + "loss": 0.3758, + "nll_loss": 0.44255542755126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0144496513530612, + "rewards/margins": 0.11990831792354584, + "rewards/rejected": -0.1343579739332199, + "step": 2810 + }, + { + "epoch": 2.2359015091342336, + "grad_norm": 68.56454467773438, + "learning_rate": 9.423902944854219e-07, + "log_odds_chosen": 3.024015426635742, + "log_odds_ratio": -0.06248214840888977, + "logits/chosen": 309.1986389160156, + "logits/rejected": 306.1770935058594, + "logps/chosen": -0.16128352284431458, + "logps/rejected": -1.5402535200119019, + "loss": 0.3595, + "nll_loss": 0.3539174199104309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008064175955951214, + "rewards/margins": 0.06894850730895996, + "rewards/rejected": -0.07701267302036285, + "step": 2815 + }, + { + "epoch": 2.2398729150119143, + "grad_norm": 46.14353942871094, + "learning_rate": 9.415544714433869e-07, + "log_odds_chosen": 3.5264244079589844, + "log_odds_ratio": -0.056026797741651535, + "logits/chosen": 401.4872131347656, + "logits/rejected": 307.99200439453125, + "logps/chosen": -0.16968777775764465, + "logps/rejected": -1.8524665832519531, + "loss": 0.357, + "nll_loss": 0.4214702248573303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008484388701617718, + "rewards/margins": 0.08413894474506378, + "rewards/rejected": -0.09262333810329437, + "step": 2820 + }, + { + "epoch": 2.243844320889595, + "grad_norm": 22.524415969848633, + "learning_rate": 9.407208683835973e-07, + "log_odds_chosen": 3.638291120529175, + "log_odds_ratio": -0.035198599100112915, + "logits/chosen": 315.48431396484375, + "logits/rejected": 204.6620330810547, + "logps/chosen": -0.2649177312850952, + "logps/rejected": -2.029332399368286, + "loss": 0.4461, + "nll_loss": 0.29909247159957886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01324588805437088, + "rewards/margins": 0.0882207378745079, + "rewards/rejected": -0.10146661847829819, + "step": 2825 + }, + { + "epoch": 2.247815726767276, + "grad_norm": 18.5728702545166, + "learning_rate": 9.398894754961406e-07, + "log_odds_chosen": 4.158797264099121, + "log_odds_ratio": -0.06240306422114372, + "logits/chosen": 255.5278778076172, + "logits/rejected": 277.1158142089844, + "logps/chosen": -0.11548423767089844, + "logps/rejected": -2.0450329780578613, + "loss": 0.3872, + "nll_loss": 0.26154083013534546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005774212069809437, + "rewards/margins": 0.0964774340391159, + "rewards/rejected": -0.10225164890289307, + "step": 2830 + }, + { + "epoch": 2.2517871326449566, + "grad_norm": 27.462656021118164, + "learning_rate": 9.390602830316851e-07, + "log_odds_chosen": 2.5769989490509033, + "log_odds_ratio": -0.10126359760761261, + "logits/chosen": 256.6750793457031, + "logits/rejected": 298.19500732421875, + "logps/chosen": -0.2556411921977997, + "logps/rejected": -1.4168459177017212, + "loss": 0.3646, + "nll_loss": 0.4656898081302643, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012782061472535133, + "rewards/margins": 0.058060236275196075, + "rewards/rejected": -0.07084228843450546, + "step": 2835 + }, + { + "epoch": 2.255758538522637, + "grad_norm": 24.728313446044922, + "learning_rate": 9.38233281301002e-07, + "log_odds_chosen": 4.362663269042969, + "log_odds_ratio": -0.0349862240254879, + "logits/chosen": 265.12548828125, + "logits/rejected": 215.07431030273438, + "logps/chosen": -0.20180007815361023, + "logps/rejected": -2.3115715980529785, + "loss": 0.3206, + "nll_loss": 0.2718152701854706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010090004652738571, + "rewards/margins": 0.10548856109380722, + "rewards/rejected": -0.11557857692241669, + "step": 2840 + }, + { + "epoch": 2.2597299444003176, + "grad_norm": 30.671133041381836, + "learning_rate": 9.374084606744878e-07, + "log_odds_chosen": 3.672755002975464, + "log_odds_ratio": -0.038879863917827606, + "logits/chosen": 335.2356872558594, + "logits/rejected": 203.84803771972656, + "logps/chosen": -0.17998583614826202, + "logps/rejected": -1.6547071933746338, + "loss": 0.4663, + "nll_loss": 0.6368466019630432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008999291807413101, + "rewards/margins": 0.07373605668544769, + "rewards/rejected": -0.08273535966873169, + "step": 2845 + }, + { + "epoch": 2.2637013502779983, + "grad_norm": 28.535974502563477, + "learning_rate": 9.365858115816941e-07, + "log_odds_chosen": 1.8314237594604492, + "log_odds_ratio": -0.19887328147888184, + "logits/chosen": 243.3570556640625, + "logits/rejected": 255.3211669921875, + "logps/chosen": -0.4105517268180847, + "logps/rejected": -1.2685024738311768, + "loss": 0.5051, + "nll_loss": 0.5791507363319397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020527586340904236, + "rewards/margins": 0.0428975448012352, + "rewards/rejected": -0.06342513859272003, + "step": 2850 + }, + { + "epoch": 2.267672756155679, + "grad_norm": 35.174720764160156, + "learning_rate": 9.357653245108616e-07, + "log_odds_chosen": 2.9875216484069824, + "log_odds_ratio": -0.0903446301817894, + "logits/chosen": 230.1251220703125, + "logits/rejected": 300.3447265625, + "logps/chosen": -0.24468517303466797, + "logps/rejected": -1.8320715427398682, + "loss": 0.4694, + "nll_loss": 0.3818613588809967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012234258465468884, + "rewards/margins": 0.07936931401491165, + "rewards/rejected": -0.09160356968641281, + "step": 2855 + }, + { + "epoch": 2.27164416203336, + "grad_norm": 19.731929779052734, + "learning_rate": 9.349469900084572e-07, + "log_odds_chosen": 3.242330551147461, + "log_odds_ratio": -0.0612449049949646, + "logits/chosen": 251.2331085205078, + "logits/rejected": 286.5898132324219, + "logps/chosen": -0.2064785659313202, + "logps/rejected": -1.7228063344955444, + "loss": 0.348, + "nll_loss": 0.2930075228214264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01032392866909504, + "rewards/margins": 0.07581637799739838, + "rewards/rejected": -0.08614031970500946, + "step": 2860 + }, + { + "epoch": 2.2756155679110406, + "grad_norm": 60.175567626953125, + "learning_rate": 9.341307986787181e-07, + "log_odds_chosen": 3.061659336090088, + "log_odds_ratio": -0.07501424849033356, + "logits/chosen": 239.71029663085938, + "logits/rejected": 295.3159484863281, + "logps/chosen": -0.23593612015247345, + "logps/rejected": -1.6371171474456787, + "loss": 0.3748, + "nll_loss": 0.4964061677455902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011796806938946247, + "rewards/margins": 0.07005905359983444, + "rewards/rejected": -0.08185587078332901, + "step": 2865 + }, + { + "epoch": 2.2795869737887213, + "grad_norm": 25.93592643737793, + "learning_rate": 9.333167411831968e-07, + "log_odds_chosen": 3.542975902557373, + "log_odds_ratio": -0.06436657905578613, + "logits/chosen": 238.16976928710938, + "logits/rejected": 287.948486328125, + "logps/chosen": -0.2085086554288864, + "logps/rejected": -1.9400198459625244, + "loss": 0.3322, + "nll_loss": 0.2988547682762146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010425432585179806, + "rewards/margins": 0.08657556772232056, + "rewards/rejected": -0.09700099378824234, + "step": 2870 + }, + { + "epoch": 2.283558379666402, + "grad_norm": 20.469728469848633, + "learning_rate": 9.325048082403139e-07, + "log_odds_chosen": 3.4106850624084473, + "log_odds_ratio": -0.11686725914478302, + "logits/chosen": 304.0782165527344, + "logits/rejected": 226.82803344726562, + "logps/chosen": -0.13091017305850983, + "logps/rejected": -1.417621374130249, + "loss": 0.3601, + "nll_loss": 0.33367031812667847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006545508746057749, + "rewards/margins": 0.06433556228876114, + "rewards/rejected": -0.07088107615709305, + "step": 2875 + }, + { + "epoch": 2.2875297855440824, + "grad_norm": 339.516357421875, + "learning_rate": 9.316949906249125e-07, + "log_odds_chosen": 2.911581039428711, + "log_odds_ratio": -0.05822296813130379, + "logits/chosen": 272.34173583984375, + "logits/rejected": 280.1712646484375, + "logps/chosen": -0.19476798176765442, + "logps/rejected": -1.585203766822815, + "loss": 0.3893, + "nll_loss": 0.2995101511478424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009738398715853691, + "rewards/margins": 0.06952179223299026, + "rewards/rejected": -0.07926018536090851, + "step": 2880 + }, + { + "epoch": 2.291501191421763, + "grad_norm": 22.142641067504883, + "learning_rate": 9.308872791678188e-07, + "log_odds_chosen": 3.053778886795044, + "log_odds_ratio": -0.12560723721981049, + "logits/chosen": 242.20468139648438, + "logits/rejected": 357.9628601074219, + "logps/chosen": -0.22581541538238525, + "logps/rejected": -1.8815644979476929, + "loss": 0.5007, + "nll_loss": 0.4521845877170563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011290770955383778, + "rewards/margins": 0.08278746157884598, + "rewards/rejected": -0.09407822787761688, + "step": 2885 + }, + { + "epoch": 2.295472597299444, + "grad_norm": 24.857589721679688, + "learning_rate": 9.300816647554058e-07, + "log_odds_chosen": 2.7269444465637207, + "log_odds_ratio": -0.10888878256082535, + "logits/chosen": 352.1503601074219, + "logits/rejected": 283.04217529296875, + "logps/chosen": -0.27864494919776917, + "logps/rejected": -1.7774139642715454, + "loss": 0.4257, + "nll_loss": 0.30876272916793823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013932247646152973, + "rewards/margins": 0.07493844628334045, + "rewards/rejected": -0.08887068927288055, + "step": 2890 + }, + { + "epoch": 2.2994440031771246, + "grad_norm": 33.68102264404297, + "learning_rate": 9.292781383291611e-07, + "log_odds_chosen": 4.8815598487854, + "log_odds_ratio": -0.011089108884334564, + "logits/chosen": 285.96978759765625, + "logits/rejected": 205.0143585205078, + "logps/chosen": -0.08274070173501968, + "logps/rejected": -2.379570960998535, + "loss": 0.3248, + "nll_loss": 0.22218620777130127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004137034527957439, + "rewards/margins": 0.1148415058851242, + "rewards/rejected": -0.11897854506969452, + "step": 2895 + }, + { + "epoch": 2.3034154090548054, + "grad_norm": 27.541589736938477, + "learning_rate": 9.284766908852594e-07, + "log_odds_chosen": 3.0898163318634033, + "log_odds_ratio": -0.06252577155828476, + "logits/chosen": 233.49624633789062, + "logits/rejected": 270.4180908203125, + "logps/chosen": -0.22724232077598572, + "logps/rejected": -1.7946640253067017, + "loss": 0.3331, + "nll_loss": 0.29226577281951904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011362116783857346, + "rewards/margins": 0.0783710852265358, + "rewards/rejected": -0.08973319828510284, + "step": 2900 + }, + { + "epoch": 2.307386814932486, + "grad_norm": 22.57000160217285, + "learning_rate": 9.276773134741389e-07, + "log_odds_chosen": 4.6132612228393555, + "log_odds_ratio": -0.034605059772729874, + "logits/chosen": 214.30801391601562, + "logits/rejected": 292.3231506347656, + "logps/chosen": -0.1400974690914154, + "logps/rejected": -2.607959032058716, + "loss": 0.3647, + "nll_loss": 0.3344481289386749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007004873361438513, + "rewards/margins": 0.12339308112859726, + "rewards/rejected": -0.1303979456424713, + "step": 2905 + }, + { + "epoch": 2.311358220810167, + "grad_norm": 21.834867477416992, + "learning_rate": 9.26879997200081e-07, + "log_odds_chosen": 2.0610976219177246, + "log_odds_ratio": -0.1374814808368683, + "logits/chosen": 238.64797973632812, + "logits/rejected": 196.60305786132812, + "logps/chosen": -0.36072736978530884, + "logps/rejected": -1.38467276096344, + "loss": 0.3675, + "nll_loss": 0.45493263006210327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0180363692343235, + "rewards/margins": 0.051197268068790436, + "rewards/rejected": -0.06923364102840424, + "step": 2910 + }, + { + "epoch": 2.3153296266878476, + "grad_norm": 16.910837173461914, + "learning_rate": 9.260847332207952e-07, + "log_odds_chosen": 3.3941338062286377, + "log_odds_ratio": -0.07332415133714676, + "logits/chosen": 348.9075012207031, + "logits/rejected": 315.0403747558594, + "logps/chosen": -0.12917575240135193, + "logps/rejected": -1.595969557762146, + "loss": 0.3124, + "nll_loss": 0.26820236444473267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006458788178861141, + "rewards/margins": 0.07333968579769135, + "rewards/rejected": -0.07979847490787506, + "step": 2915 + }, + { + "epoch": 2.3193010325655283, + "grad_norm": 28.343481063842773, + "learning_rate": 9.252915127470066e-07, + "log_odds_chosen": 2.11761474609375, + "log_odds_ratio": -0.12370810657739639, + "logits/chosen": 321.7706298828125, + "logits/rejected": 285.8647155761719, + "logps/chosen": -0.24254365265369415, + "logps/rejected": -1.139145851135254, + "loss": 0.3549, + "nll_loss": 0.40812140703201294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012127181515097618, + "rewards/margins": 0.044830113649368286, + "rewards/rejected": -0.056957293301820755, + "step": 2920 + }, + { + "epoch": 2.323272438443209, + "grad_norm": 29.542682647705078, + "learning_rate": 9.245003270420485e-07, + "log_odds_chosen": 3.9975972175598145, + "log_odds_ratio": -0.058384932577610016, + "logits/chosen": 238.3087615966797, + "logits/rejected": 257.13079833984375, + "logps/chosen": -0.15752217173576355, + "logps/rejected": -1.8759005069732666, + "loss": 0.3918, + "nll_loss": 0.3589743673801422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007876109331846237, + "rewards/margins": 0.08591891080141068, + "rewards/rejected": -0.09379501640796661, + "step": 2925 + }, + { + "epoch": 2.32724384432089, + "grad_norm": 21.245723724365234, + "learning_rate": 9.23711167421458e-07, + "log_odds_chosen": 2.95237398147583, + "log_odds_ratio": -0.10379862785339355, + "logits/chosen": 197.9760284423828, + "logits/rejected": 312.78790283203125, + "logps/chosen": -0.3067656457424164, + "logps/rejected": -1.6372512578964233, + "loss": 0.3624, + "nll_loss": 0.3775458037853241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015338283963501453, + "rewards/margins": 0.06652427464723587, + "rewards/rejected": -0.08186255395412445, + "step": 2930 + }, + { + "epoch": 2.33121525019857, + "grad_norm": 19.79974937438965, + "learning_rate": 9.229240252525751e-07, + "log_odds_chosen": 2.867802381515503, + "log_odds_ratio": -0.07918517291545868, + "logits/chosen": 221.4095458984375, + "logits/rejected": 272.38873291015625, + "logps/chosen": -0.3694096505641937, + "logps/rejected": -1.5930800437927246, + "loss": 0.3878, + "nll_loss": 0.5072149038314819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018470484763383865, + "rewards/margins": 0.061183519661426544, + "rewards/rejected": -0.07965400815010071, + "step": 2935 + }, + { + "epoch": 2.335186656076251, + "grad_norm": 19.503841400146484, + "learning_rate": 9.221388919541469e-07, + "log_odds_chosen": 4.028315544128418, + "log_odds_ratio": -0.03385575860738754, + "logits/chosen": 184.2939453125, + "logits/rejected": 463.99481201171875, + "logps/chosen": -0.1569368541240692, + "logps/rejected": -2.1916651725769043, + "loss": 0.3327, + "nll_loss": 0.19962573051452637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007846842519938946, + "rewards/margins": 0.1017364114522934, + "rewards/rejected": -0.10958325862884521, + "step": 2940 + }, + { + "epoch": 2.3391580619539316, + "grad_norm": 22.579517364501953, + "learning_rate": 9.213557589959346e-07, + "log_odds_chosen": 4.402327537536621, + "log_odds_ratio": -0.06422088295221329, + "logits/chosen": 311.88970947265625, + "logits/rejected": 222.9725799560547, + "logps/chosen": -0.12918733060359955, + "logps/rejected": -1.5751354694366455, + "loss": 0.3988, + "nll_loss": 0.27917248010635376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006459367461502552, + "rewards/margins": 0.07229740917682648, + "rewards/rejected": -0.07875677198171616, + "step": 2945 + }, + { + "epoch": 2.3431294678316124, + "grad_norm": 23.760480880737305, + "learning_rate": 9.205746178983235e-07, + "log_odds_chosen": 4.011848449707031, + "log_odds_ratio": -0.04609540104866028, + "logits/chosen": 280.3443298339844, + "logits/rejected": 283.18096923828125, + "logps/chosen": -0.16002188622951508, + "logps/rejected": -2.123976469039917, + "loss": 0.3309, + "nll_loss": 0.30090197920799255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008001094684004784, + "rewards/margins": 0.09819772839546204, + "rewards/rejected": -0.10619882494211197, + "step": 2950 + }, + { + "epoch": 2.347100873709293, + "grad_norm": 25.123260498046875, + "learning_rate": 9.19795460231938e-07, + "log_odds_chosen": 3.2714781761169434, + "log_odds_ratio": -0.08971662819385529, + "logits/chosen": 353.42388916015625, + "logits/rejected": 275.33245849609375, + "logps/chosen": -0.2007518708705902, + "logps/rejected": -1.8207184076309204, + "loss": 0.3645, + "nll_loss": 0.3350695073604584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010037594474852085, + "rewards/margins": 0.08099832385778427, + "rewards/rejected": -0.09103592485189438, + "step": 2955 + }, + { + "epoch": 2.351072279586974, + "grad_norm": 33.57479476928711, + "learning_rate": 9.190182776172598e-07, + "log_odds_chosen": 3.7782235145568848, + "log_odds_ratio": -0.051849596202373505, + "logits/chosen": 261.42962646484375, + "logits/rejected": 268.8240966796875, + "logps/chosen": -0.14148275554180145, + "logps/rejected": -1.8189716339111328, + "loss": 0.3353, + "nll_loss": 0.26106011867523193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007074137218296528, + "rewards/margins": 0.08387444913387299, + "rewards/rejected": -0.09094859659671783, + "step": 2960 + }, + { + "epoch": 2.3550436854646546, + "grad_norm": 25.17067527770996, + "learning_rate": 9.182430617242484e-07, + "log_odds_chosen": 3.2659249305725098, + "log_odds_ratio": -0.045460015535354614, + "logits/chosen": 291.0422668457031, + "logits/rejected": 208.2694091796875, + "logps/chosen": -0.07350897789001465, + "logps/rejected": -1.1179029941558838, + "loss": 0.3735, + "nll_loss": 0.40316909551620483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003675449173897505, + "rewards/margins": 0.0522197000682354, + "rewards/rejected": -0.05589514970779419, + "step": 2965 + }, + { + "epoch": 2.3590150913423353, + "grad_norm": 25.110368728637695, + "learning_rate": 9.174698042719672e-07, + "log_odds_chosen": 4.775304794311523, + "log_odds_ratio": -0.03073939122259617, + "logits/chosen": 232.52755737304688, + "logits/rejected": 406.4723205566406, + "logps/chosen": -0.21328440308570862, + "logps/rejected": -2.7473394870758057, + "loss": 0.3524, + "nll_loss": 0.31327253580093384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010664219036698341, + "rewards/margins": 0.12670275568962097, + "rewards/rejected": -0.13736696541309357, + "step": 2970 + }, + { + "epoch": 2.3629864972200156, + "grad_norm": 32.33005142211914, + "learning_rate": 9.166984970282114e-07, + "log_odds_chosen": 2.634587049484253, + "log_odds_ratio": -0.39152711629867554, + "logits/chosen": 295.1396789550781, + "logits/rejected": 308.6955261230469, + "logps/chosen": -0.3132212162017822, + "logps/rejected": -1.5187908411026, + "loss": 0.3392, + "nll_loss": 0.4394780695438385, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.015661058947443962, + "rewards/margins": 0.06027848646044731, + "rewards/rejected": -0.07593954354524612, + "step": 2975 + }, + { + "epoch": 2.3669579030976964, + "grad_norm": 26.105850219726562, + "learning_rate": 9.159291318091397e-07, + "log_odds_chosen": 4.424689292907715, + "log_odds_ratio": -0.028805622830986977, + "logits/chosen": 185.22755432128906, + "logits/rejected": 448.46466064453125, + "logps/chosen": -0.1817312389612198, + "logps/rejected": -2.5383591651916504, + "loss": 0.3573, + "nll_loss": 0.36008864641189575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00908656232059002, + "rewards/margins": 0.11783139407634735, + "rewards/rejected": -0.12691795825958252, + "step": 2980 + }, + { + "epoch": 2.370929308975377, + "grad_norm": 25.48073959350586, + "learning_rate": 9.151617004789102e-07, + "log_odds_chosen": 2.9670186042785645, + "log_odds_ratio": -0.06621041893959045, + "logits/chosen": 273.04693603515625, + "logits/rejected": 238.86740112304688, + "logps/chosen": -0.19776080548763275, + "logps/rejected": -1.5920594930648804, + "loss": 0.3417, + "nll_loss": 0.25471195578575134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009888040833175182, + "rewards/margins": 0.06971494108438492, + "rewards/rejected": -0.07960297912359238, + "step": 2985 + }, + { + "epoch": 2.374900714853058, + "grad_norm": 23.61982536315918, + "learning_rate": 9.143961949493189e-07, + "log_odds_chosen": 2.8940136432647705, + "log_odds_ratio": -0.10211262851953506, + "logits/chosen": 323.6980895996094, + "logits/rejected": 273.4058837890625, + "logps/chosen": -0.3123711347579956, + "logps/rejected": -1.6104358434677124, + "loss": 0.3747, + "nll_loss": 0.4702371060848236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01561855711042881, + "rewards/margins": 0.06490323692560196, + "rewards/rejected": -0.08052179217338562, + "step": 2990 + }, + { + "epoch": 2.3788721207307386, + "grad_norm": 28.628559112548828, + "learning_rate": 9.136326071794409e-07, + "log_odds_chosen": 3.454132080078125, + "log_odds_ratio": -0.07264744490385056, + "logits/chosen": 257.689208984375, + "logits/rejected": 262.5711364746094, + "logps/chosen": -0.1612526923418045, + "logps/rejected": -1.4927973747253418, + "loss": 0.4392, + "nll_loss": 0.36521124839782715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008062634617090225, + "rewards/margins": 0.06657722592353821, + "rewards/rejected": -0.07463986426591873, + "step": 2995 + }, + { + "epoch": 2.3828435266084194, + "grad_norm": 27.72311019897461, + "learning_rate": 9.128709291752768e-07, + "log_odds_chosen": 3.825345277786255, + "log_odds_ratio": -0.06467778980731964, + "logits/chosen": 215.4276885986328, + "logits/rejected": 267.2101135253906, + "logps/chosen": -0.3039247989654541, + "logps/rejected": -2.7370784282684326, + "loss": 0.3702, + "nll_loss": 0.3736027181148529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015196239575743675, + "rewards/margins": 0.12165769189596176, + "rewards/rejected": -0.13685393333435059, + "step": 3000 + }, + { + "epoch": 2.3868149324861, + "grad_norm": 33.66513442993164, + "learning_rate": 9.121111529894007e-07, + "log_odds_chosen": 3.429539442062378, + "log_odds_ratio": -0.07355336099863052, + "logits/chosen": 280.227294921875, + "logits/rejected": 266.79351806640625, + "logps/chosen": -0.3146992623806, + "logps/rejected": -2.431187152862549, + "loss": 0.42, + "nll_loss": 0.5019311308860779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015734964981675148, + "rewards/margins": 0.10582438856363297, + "rewards/rejected": -0.12155935913324356, + "step": 3005 + }, + { + "epoch": 2.390786338363781, + "grad_norm": 28.422985076904297, + "learning_rate": 9.113532707206116e-07, + "log_odds_chosen": 3.8442492485046387, + "log_odds_ratio": -0.035998668521642685, + "logits/chosen": 199.76071166992188, + "logits/rejected": 245.70974731445312, + "logps/chosen": -0.21047630906105042, + "logps/rejected": -2.027498722076416, + "loss": 0.3807, + "nll_loss": 0.5179567933082581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010523815639317036, + "rewards/margins": 0.09085111320018768, + "rewards/rejected": -0.10137493908405304, + "step": 3010 + }, + { + "epoch": 2.3947577442414616, + "grad_norm": 30.8699951171875, + "learning_rate": 9.105972745135884e-07, + "log_odds_chosen": 3.4679412841796875, + "log_odds_ratio": -0.05988996475934982, + "logits/chosen": 350.3097229003906, + "logits/rejected": 279.5090637207031, + "logps/chosen": -0.11882610619068146, + "logps/rejected": -1.5109213590621948, + "loss": 0.3682, + "nll_loss": 0.34842449426651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005941305309534073, + "rewards/margins": 0.0696047693490982, + "rewards/rejected": -0.07554607093334198, + "step": 3015 + }, + { + "epoch": 2.3987291501191423, + "grad_norm": 20.073862075805664, + "learning_rate": 9.098431565585488e-07, + "log_odds_chosen": 3.60724139213562, + "log_odds_ratio": -0.04012635350227356, + "logits/chosen": 189.92782592773438, + "logits/rejected": 394.52960205078125, + "logps/chosen": -0.1309356540441513, + "logps/rejected": -1.7340739965438843, + "loss": 0.2913, + "nll_loss": 0.2573624551296234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006546782795339823, + "rewards/margins": 0.08015692979097366, + "rewards/rejected": -0.08670370280742645, + "step": 3020 + }, + { + "epoch": 2.402700555996823, + "grad_norm": 22.885211944580078, + "learning_rate": 9.090909090909091e-07, + "log_odds_chosen": 3.2791740894317627, + "log_odds_ratio": -0.04902596399188042, + "logits/chosen": 329.19525146484375, + "logits/rejected": 196.19236755371094, + "logps/chosen": -0.23585081100463867, + "logps/rejected": -2.0871119499206543, + "loss": 0.4016, + "nll_loss": 0.3254674971103668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011792539618909359, + "rewards/margins": 0.09256306290626526, + "rewards/rejected": -0.1043555960059166, + "step": 3025 + }, + { + "epoch": 2.4066719618745034, + "grad_norm": 20.781896591186523, + "learning_rate": 9.083405243909494e-07, + "log_odds_chosen": 2.4194552898406982, + "log_odds_ratio": -0.0930199846625328, + "logits/chosen": 204.93038940429688, + "logits/rejected": 288.8913269042969, + "logps/chosen": -0.19672732055187225, + "logps/rejected": -1.1895649433135986, + "loss": 0.4447, + "nll_loss": 0.37261468172073364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009836366400122643, + "rewards/margins": 0.04964187741279602, + "rewards/rejected": -0.059478241950273514, + "step": 3030 + }, + { + "epoch": 2.410643367752184, + "grad_norm": 25.38187599182129, + "learning_rate": 9.075919947834808e-07, + "log_odds_chosen": 4.394177436828613, + "log_odds_ratio": -0.013578305020928383, + "logits/chosen": 304.8377685546875, + "logits/rejected": 219.35733032226562, + "logps/chosen": -0.15937462449073792, + "logps/rejected": -2.3069756031036377, + "loss": 0.3331, + "nll_loss": 0.3496881425380707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007968731224536896, + "rewards/margins": 0.10738004744052887, + "rewards/rejected": -0.11534877866506577, + "step": 3035 + }, + { + "epoch": 2.414614773629865, + "grad_norm": 20.055810928344727, + "learning_rate": 9.068453126375147e-07, + "log_odds_chosen": 3.3665032386779785, + "log_odds_ratio": -0.0692509263753891, + "logits/chosen": 279.8843688964844, + "logits/rejected": 198.7550048828125, + "logps/chosen": -0.10582447052001953, + "logps/rejected": -1.3955246210098267, + "loss": 0.3492, + "nll_loss": 0.30622151494026184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005291222594678402, + "rewards/margins": 0.06448500603437424, + "rewards/rejected": -0.06977623701095581, + "step": 3040 + }, + { + "epoch": 2.4185861795075456, + "grad_norm": 16.259273529052734, + "learning_rate": 9.061004703659373e-07, + "log_odds_chosen": 4.736048698425293, + "log_odds_ratio": -0.036501117050647736, + "logits/chosen": 305.44647216796875, + "logits/rejected": 281.8908996582031, + "logps/chosen": -0.1840660125017166, + "logps/rejected": -2.381317377090454, + "loss": 0.4145, + "nll_loss": 0.3028008043766022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00920330174267292, + "rewards/margins": 0.1098625659942627, + "rewards/rejected": -0.11906585842370987, + "step": 3045 + }, + { + "epoch": 2.4225575853852264, + "grad_norm": 22.572229385375977, + "learning_rate": 9.053574604251853e-07, + "log_odds_chosen": 3.1664023399353027, + "log_odds_ratio": -0.055031102150678635, + "logits/chosen": 248.3058624267578, + "logits/rejected": 298.56103515625, + "logps/chosen": -0.1691017895936966, + "logps/rejected": -1.5698134899139404, + "loss": 0.322, + "nll_loss": 0.31757646799087524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008455089293420315, + "rewards/margins": 0.07003559172153473, + "rewards/rejected": -0.07849067449569702, + "step": 3050 + }, + { + "epoch": 2.426528991262907, + "grad_norm": 24.180944442749023, + "learning_rate": 9.04616275314925e-07, + "log_odds_chosen": 4.054632186889648, + "log_odds_ratio": -0.03361033648252487, + "logits/chosen": 233.0185089111328, + "logits/rejected": 368.1893005371094, + "logps/chosen": -0.1435851901769638, + "logps/rejected": -2.2944867610931396, + "loss": 0.2769, + "nll_loss": 0.24402709305286407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00717926025390625, + "rewards/margins": 0.10754507780075073, + "rewards/rejected": -0.11472433805465698, + "step": 3055 + }, + { + "epoch": 2.430500397140588, + "grad_norm": 26.717470169067383, + "learning_rate": 9.03876907577734e-07, + "log_odds_chosen": 3.7755863666534424, + "log_odds_ratio": -0.044100239872932434, + "logits/chosen": 169.8478546142578, + "logits/rejected": 342.99444580078125, + "logps/chosen": -0.18389829993247986, + "logps/rejected": -2.2423253059387207, + "loss": 0.2942, + "nll_loss": 0.23073211312294006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009194915182888508, + "rewards/margins": 0.10292134433984756, + "rewards/rejected": -0.1121162623167038, + "step": 3060 + }, + { + "epoch": 2.4344718030182686, + "grad_norm": 30.117958068847656, + "learning_rate": 9.03139349798787e-07, + "log_odds_chosen": 4.355618476867676, + "log_odds_ratio": -0.023019861429929733, + "logits/chosen": 248.4194793701172, + "logits/rejected": 325.993896484375, + "logps/chosen": -0.14225144684314728, + "logps/rejected": -2.0682573318481445, + "loss": 0.4001, + "nll_loss": 0.33710920810699463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007112572900950909, + "rewards/margins": 0.09630030393600464, + "rewards/rejected": -0.10341286659240723, + "step": 3065 + }, + { + "epoch": 2.4384432088959493, + "grad_norm": 40.229515075683594, + "learning_rate": 9.024035946055421e-07, + "log_odds_chosen": 2.305717706680298, + "log_odds_ratio": -0.10136429965496063, + "logits/chosen": 216.3096923828125, + "logits/rejected": 268.7192687988281, + "logps/chosen": -0.32101818919181824, + "logps/rejected": -1.5222715139389038, + "loss": 0.3947, + "nll_loss": 0.415952205657959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016050908714532852, + "rewards/margins": 0.06006266549229622, + "rewards/rejected": -0.07611358165740967, + "step": 3070 + }, + { + "epoch": 2.4424146147736296, + "grad_norm": 20.493045806884766, + "learning_rate": 9.016696346674324e-07, + "log_odds_chosen": 3.1343421936035156, + "log_odds_ratio": -0.050869233906269073, + "logits/chosen": 301.9378356933594, + "logits/rejected": 206.95703125, + "logps/chosen": -0.25668659806251526, + "logps/rejected": -1.7620422840118408, + "loss": 0.3361, + "nll_loss": 0.3529127836227417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012834331020712852, + "rewards/margins": 0.07526777684688568, + "rewards/rejected": -0.08810211718082428, + "step": 3075 + }, + { + "epoch": 2.4463860206513104, + "grad_norm": 15.868453025817871, + "learning_rate": 9.00937462695559e-07, + "log_odds_chosen": 2.079472541809082, + "log_odds_ratio": -0.15630726516246796, + "logits/chosen": 259.4786071777344, + "logits/rejected": 208.0851593017578, + "logps/chosen": -0.30298787355422974, + "logps/rejected": -1.1315406560897827, + "loss": 0.3567, + "nll_loss": 0.4347425401210785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015149394050240517, + "rewards/margins": 0.04142763838171959, + "rewards/rejected": -0.056577038019895554, + "step": 3080 + }, + { + "epoch": 2.450357426528991, + "grad_norm": 30.881765365600586, + "learning_rate": 9.002070714423869e-07, + "log_odds_chosen": 3.3806204795837402, + "log_odds_ratio": -0.054750990122556686, + "logits/chosen": 247.8966827392578, + "logits/rejected": 301.9482727050781, + "logps/chosen": -0.17770621180534363, + "logps/rejected": -1.7712430953979492, + "loss": 0.3547, + "nll_loss": 0.3148205280303955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008885310962796211, + "rewards/margins": 0.07967683672904968, + "rewards/rejected": -0.08856214582920074, + "step": 3085 + }, + { + "epoch": 2.454328832406672, + "grad_norm": 20.376623153686523, + "learning_rate": 8.994784537014432e-07, + "log_odds_chosen": 3.2993292808532715, + "log_odds_ratio": -0.0405481681227684, + "logits/chosen": 323.1062927246094, + "logits/rejected": 218.21615600585938, + "logps/chosen": -0.19290466606616974, + "logps/rejected": -1.7616689205169678, + "loss": 0.3416, + "nll_loss": 0.302539587020874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009645233862102032, + "rewards/margins": 0.07843820750713348, + "rewards/rejected": -0.08808344602584839, + "step": 3090 + }, + { + "epoch": 2.4583002382843526, + "grad_norm": 27.777835845947266, + "learning_rate": 8.987516023070194e-07, + "log_odds_chosen": 3.353145122528076, + "log_odds_ratio": -0.109768345952034, + "logits/chosen": 288.3103332519531, + "logits/rejected": 237.58154296875, + "logps/chosen": -0.34388288855552673, + "logps/rejected": -2.0795645713806152, + "loss": 0.4017, + "nll_loss": 0.45742273330688477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017194144427776337, + "rewards/margins": 0.08678407222032547, + "rewards/rejected": -0.1039782166481018, + "step": 3095 + }, + { + "epoch": 2.4622716441620334, + "grad_norm": 20.824138641357422, + "learning_rate": 8.980265101338747e-07, + "log_odds_chosen": 3.3091349601745605, + "log_odds_ratio": -0.05483214184641838, + "logits/chosen": 233.0238800048828, + "logits/rejected": 291.1861572265625, + "logps/chosen": -0.19537723064422607, + "logps/rejected": -1.9486373662948608, + "loss": 0.481, + "nll_loss": 0.41400328278541565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009768862277269363, + "rewards/margins": 0.08766300976276398, + "rewards/rejected": -0.09743185341358185, + "step": 3100 + }, + { + "epoch": 2.466243050039714, + "grad_norm": 21.861316680908203, + "learning_rate": 8.973031700969425e-07, + "log_odds_chosen": 3.9362945556640625, + "log_odds_ratio": -0.042985569685697556, + "logits/chosen": 247.0224151611328, + "logits/rejected": 247.30386352539062, + "logps/chosen": -0.17336855828762054, + "logps/rejected": -1.86944580078125, + "loss": 0.3268, + "nll_loss": 0.24799279868602753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008668428286910057, + "rewards/margins": 0.08480386435985565, + "rewards/rejected": -0.09347229450941086, + "step": 3105 + }, + { + "epoch": 2.470214455917395, + "grad_norm": 29.43597984313965, + "learning_rate": 8.965815751510408e-07, + "log_odds_chosen": 4.157784461975098, + "log_odds_ratio": -0.05995911359786987, + "logits/chosen": 209.51339721679688, + "logits/rejected": 263.91632080078125, + "logps/chosen": -0.18789565563201904, + "logps/rejected": -2.0847220420837402, + "loss": 0.3517, + "nll_loss": 0.39410367608070374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009394782595336437, + "rewards/margins": 0.09484131634235382, + "rewards/rejected": -0.10423608869314194, + "step": 3110 + }, + { + "epoch": 2.4741858617950756, + "grad_norm": 23.069969177246094, + "learning_rate": 8.958617182905828e-07, + "log_odds_chosen": 4.918656349182129, + "log_odds_ratio": -0.030725345015525818, + "logits/chosen": 282.84674072265625, + "logits/rejected": 367.82489013671875, + "logps/chosen": -0.18277548253536224, + "logps/rejected": -3.082590103149414, + "loss": 0.3415, + "nll_loss": 0.25883248448371887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009138774126768112, + "rewards/margins": 0.1449907273054123, + "rewards/rejected": -0.1541295051574707, + "step": 3115 + }, + { + "epoch": 2.4781572676727563, + "grad_norm": 24.059284210205078, + "learning_rate": 8.951435925492912e-07, + "log_odds_chosen": 3.07710337638855, + "log_odds_ratio": -0.0691499263048172, + "logits/chosen": 308.7353515625, + "logits/rejected": 252.30648803710938, + "logps/chosen": -0.18907026946544647, + "logps/rejected": -1.4084926843643188, + "loss": 0.4312, + "nll_loss": 0.5141544342041016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009453514590859413, + "rewards/margins": 0.06097111850976944, + "rewards/rejected": -0.0704246312379837, + "step": 3120 + }, + { + "epoch": 2.482128673550437, + "grad_norm": 22.84012222290039, + "learning_rate": 8.94427190999916e-07, + "log_odds_chosen": 4.7749199867248535, + "log_odds_ratio": -0.04176812991499901, + "logits/chosen": 250.2894744873047, + "logits/rejected": 233.84292602539062, + "logps/chosen": -0.19017019867897034, + "logps/rejected": -1.9456065893173218, + "loss": 0.2761, + "nll_loss": 0.3522077202796936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009508511051535606, + "rewards/margins": 0.08777181804180145, + "rewards/rejected": -0.09728033095598221, + "step": 3125 + }, + { + "epoch": 2.4861000794281174, + "grad_norm": 42.411163330078125, + "learning_rate": 8.93712506753953e-07, + "log_odds_chosen": 2.530211925506592, + "log_odds_ratio": -0.12716186046600342, + "logits/chosen": 230.0726776123047, + "logits/rejected": 294.5306701660156, + "logps/chosen": -0.32971224188804626, + "logps/rejected": -1.8698943853378296, + "loss": 0.4152, + "nll_loss": 0.4431188106536865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016485612839460373, + "rewards/margins": 0.07700910419225693, + "rewards/rejected": -0.0934947207570076, + "step": 3130 + }, + { + "epoch": 2.490071485305798, + "grad_norm": 20.534034729003906, + "learning_rate": 8.929995329613664e-07, + "log_odds_chosen": 3.810880661010742, + "log_odds_ratio": -0.03098614513874054, + "logits/chosen": 258.92645263671875, + "logits/rejected": 379.466796875, + "logps/chosen": -0.19361644983291626, + "logps/rejected": -2.1948695182800293, + "loss": 0.3683, + "nll_loss": 0.2922203540802002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009680822491645813, + "rewards/margins": 0.10006266832351685, + "rewards/rejected": -0.10974349081516266, + "step": 3135 + }, + { + "epoch": 2.494042891183479, + "grad_norm": 26.821924209594727, + "learning_rate": 8.922882628103122e-07, + "log_odds_chosen": 3.4475345611572266, + "log_odds_ratio": -0.03615453466773033, + "logits/chosen": 177.71914672851562, + "logits/rejected": 325.3964538574219, + "logps/chosen": -0.14965102076530457, + "logps/rejected": -1.773688554763794, + "loss": 0.3296, + "nll_loss": 0.3020648658275604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007482551038265228, + "rewards/margins": 0.08120187371969223, + "rewards/rejected": -0.08868442475795746, + "step": 3140 + }, + { + "epoch": 2.4980142970611596, + "grad_norm": 17.4930419921875, + "learning_rate": 8.91578689526865e-07, + "log_odds_chosen": 3.373814344406128, + "log_odds_ratio": -0.07843703031539917, + "logits/chosen": 228.93408203125, + "logits/rejected": 266.8359680175781, + "logps/chosen": -0.30311352014541626, + "logps/rejected": -2.266686201095581, + "loss": 0.3862, + "nll_loss": 0.35967734456062317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015155675821006298, + "rewards/margins": 0.09817864745855331, + "rewards/rejected": -0.11333432048559189, + "step": 3145 + }, + { + "epoch": 2.5019857029388404, + "grad_norm": 49.936431884765625, + "learning_rate": 8.90870806374748e-07, + "log_odds_chosen": 2.624094009399414, + "log_odds_ratio": -0.0867760106921196, + "logits/chosen": 289.07318115234375, + "logits/rejected": 254.26565551757812, + "logps/chosen": -0.26391011476516724, + "logps/rejected": -1.578924298286438, + "loss": 0.4147, + "nll_loss": 0.36335909366607666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013195505365729332, + "rewards/margins": 0.06575071066617966, + "rewards/rejected": -0.07894621789455414, + "step": 3150 + }, + { + "epoch": 2.505957108816521, + "grad_norm": 32.26227569580078, + "learning_rate": 8.90164606655063e-07, + "log_odds_chosen": 3.1176323890686035, + "log_odds_ratio": -0.05419561266899109, + "logits/chosen": 244.9784393310547, + "logits/rejected": 271.72808837890625, + "logps/chosen": -0.14082542061805725, + "logps/rejected": -1.3495501279830933, + "loss": 0.2844, + "nll_loss": 0.25858157873153687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007041270844638348, + "rewards/margins": 0.06043623015284538, + "rewards/rejected": -0.0674775093793869, + "step": 3155 + }, + { + "epoch": 2.509928514694202, + "grad_norm": 28.52840232849121, + "learning_rate": 8.894600837060251e-07, + "log_odds_chosen": 2.9466381072998047, + "log_odds_ratio": -0.08857440948486328, + "logits/chosen": 293.58978271484375, + "logits/rejected": 310.09246826171875, + "logps/chosen": -0.3098464012145996, + "logps/rejected": -1.6593332290649414, + "loss": 0.481, + "nll_loss": 0.5123347043991089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01549232192337513, + "rewards/margins": 0.06747434288263321, + "rewards/rejected": -0.08296666294336319, + "step": 3160 + }, + { + "epoch": 2.5138999205718826, + "grad_norm": 23.358430862426758, + "learning_rate": 8.887572309026986e-07, + "log_odds_chosen": 3.9055874347686768, + "log_odds_ratio": -0.04153949022293091, + "logits/chosen": 237.70809936523438, + "logits/rejected": 308.60382080078125, + "logps/chosen": -0.09806036949157715, + "logps/rejected": -1.6581532955169678, + "loss": 0.3043, + "nll_loss": 0.2352304756641388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004903018940240145, + "rewards/margins": 0.07800465822219849, + "rewards/rejected": -0.08290766924619675, + "step": 3165 + }, + { + "epoch": 2.517871326449563, + "grad_norm": 33.0350456237793, + "learning_rate": 8.880560416567349e-07, + "log_odds_chosen": 3.675652265548706, + "log_odds_ratio": -0.1050681620836258, + "logits/chosen": 439.34344482421875, + "logits/rejected": 253.87222290039062, + "logps/chosen": -0.292144775390625, + "logps/rejected": -1.6460235118865967, + "loss": 0.3491, + "nll_loss": 0.4088570475578308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014607238583266735, + "rewards/margins": 0.06769393384456635, + "rewards/rejected": -0.08230117708444595, + "step": 3170 + }, + { + "epoch": 2.5218427323272437, + "grad_norm": 22.11117172241211, + "learning_rate": 8.873565094161139e-07, + "log_odds_chosen": 2.985295295715332, + "log_odds_ratio": -0.07777807861566544, + "logits/chosen": 322.97698974609375, + "logits/rejected": 256.5289306640625, + "logps/chosen": -0.25270113348960876, + "logps/rejected": -1.9781535863876343, + "loss": 0.3116, + "nll_loss": 0.4061063230037689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012635056860744953, + "rewards/margins": 0.08627263456583023, + "rewards/rejected": -0.09890768676996231, + "step": 3175 + }, + { + "epoch": 2.5258141382049244, + "grad_norm": 23.091205596923828, + "learning_rate": 8.866586276648859e-07, + "log_odds_chosen": 4.1930012702941895, + "log_odds_ratio": -0.01747201755642891, + "logits/chosen": 304.1520080566406, + "logits/rejected": 369.5452575683594, + "logps/chosen": -0.11393336951732635, + "logps/rejected": -2.1033239364624023, + "loss": 0.3055, + "nll_loss": 0.20445315539836884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005696668289601803, + "rewards/margins": 0.09946952760219574, + "rewards/rejected": -0.10516619682312012, + "step": 3180 + }, + { + "epoch": 2.529785544082605, + "grad_norm": 27.869813919067383, + "learning_rate": 8.859623899229175e-07, + "log_odds_chosen": 2.7538349628448486, + "log_odds_ratio": -0.08827327191829681, + "logits/chosen": 318.21588134765625, + "logits/rejected": 307.63275146484375, + "logps/chosen": -0.24295452237129211, + "logps/rejected": -1.4790904521942139, + "loss": 0.3332, + "nll_loss": 0.3576405644416809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012147725559771061, + "rewards/margins": 0.06180679798126221, + "rewards/rejected": -0.0739545226097107, + "step": 3185 + }, + { + "epoch": 2.533756949960286, + "grad_norm": 21.698623657226562, + "learning_rate": 8.852677897456389e-07, + "log_odds_chosen": 4.32507848739624, + "log_odds_ratio": -0.04182177782058716, + "logits/chosen": 307.76556396484375, + "logits/rejected": 265.21307373046875, + "logps/chosen": -0.11581907421350479, + "logps/rejected": -1.9977920055389404, + "loss": 0.3533, + "nll_loss": 0.3494908809661865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005790953524410725, + "rewards/margins": 0.09409864246845245, + "rewards/rejected": -0.0998895987868309, + "step": 3190 + }, + { + "epoch": 2.5377283558379666, + "grad_norm": 23.7899169921875, + "learning_rate": 8.845748207237923e-07, + "log_odds_chosen": 4.4067888259887695, + "log_odds_ratio": -0.05649635195732117, + "logits/chosen": 199.7589874267578, + "logits/rejected": 377.6457214355469, + "logps/chosen": -0.24582286179065704, + "logps/rejected": -3.0470097064971924, + "loss": 0.3842, + "nll_loss": 0.31226563453674316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012291142717003822, + "rewards/margins": 0.14005932211875916, + "rewards/rejected": -0.15235047042369843, + "step": 3195 + }, + { + "epoch": 2.5416997617156474, + "grad_norm": 26.713525772094727, + "learning_rate": 8.838834764831844e-07, + "log_odds_chosen": 3.987933397293091, + "log_odds_ratio": -0.022607123479247093, + "logits/chosen": 289.32916259765625, + "logits/rejected": 310.29010009765625, + "logps/chosen": -0.11314906924962997, + "logps/rejected": -1.6021335124969482, + "loss": 0.2942, + "nll_loss": 0.301480770111084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0056574540212750435, + "rewards/margins": 0.07444922626018524, + "rewards/rejected": -0.08010667562484741, + "step": 3200 + }, + { + "epoch": 2.545671167593328, + "grad_norm": 24.519126892089844, + "learning_rate": 8.831937506844408e-07, + "log_odds_chosen": 2.726931095123291, + "log_odds_ratio": -0.12800243496894836, + "logits/chosen": 300.64373779296875, + "logits/rejected": 290.03192138671875, + "logps/chosen": -0.3544635474681854, + "logps/rejected": -1.5170186758041382, + "loss": 0.4417, + "nll_loss": 0.41465067863464355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01772317662835121, + "rewards/margins": 0.05812777206301689, + "rewards/rejected": -0.0758509486913681, + "step": 3205 + }, + { + "epoch": 2.549642573471009, + "grad_norm": 39.54831314086914, + "learning_rate": 8.825056370227597e-07, + "log_odds_chosen": 5.636147975921631, + "log_odds_ratio": -0.013704921118915081, + "logits/chosen": 337.90618896484375, + "logits/rejected": 267.09014892578125, + "logps/chosen": -0.1419641226530075, + "logps/rejected": -3.0420889854431152, + "loss": 0.3481, + "nll_loss": 0.33497047424316406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00709820631891489, + "rewards/margins": 0.1450062245130539, + "rewards/rejected": -0.1521044373512268, + "step": 3210 + }, + { + "epoch": 2.5536139793486896, + "grad_norm": 33.25807189941406, + "learning_rate": 8.818191292276726e-07, + "log_odds_chosen": 2.867290735244751, + "log_odds_ratio": -0.11395450681447983, + "logits/chosen": 356.44366455078125, + "logits/rejected": 316.00958251953125, + "logps/chosen": -0.21636183559894562, + "logps/rejected": -1.3843950033187866, + "loss": 0.3734, + "nll_loss": 0.29570505023002625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01081809215247631, + "rewards/margins": 0.05840166285634041, + "rewards/rejected": -0.06921975314617157, + "step": 3215 + }, + { + "epoch": 2.5575853852263704, + "grad_norm": 24.43802833557129, + "learning_rate": 8.811342210628018e-07, + "log_odds_chosen": 4.013164520263672, + "log_odds_ratio": -0.0282739344984293, + "logits/chosen": 370.65472412109375, + "logits/rejected": 214.8960418701172, + "logps/chosen": -0.09479434043169022, + "logps/rejected": -1.5942752361297607, + "loss": 0.412, + "nll_loss": 0.5181189179420471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004739716649055481, + "rewards/margins": 0.07497404515743256, + "rewards/rejected": -0.07971376925706863, + "step": 3220 + }, + { + "epoch": 2.561556791104051, + "grad_norm": 23.81389808654785, + "learning_rate": 8.804509063256239e-07, + "log_odds_chosen": 3.736074447631836, + "log_odds_ratio": -0.05703800916671753, + "logits/chosen": 437.04461669921875, + "logits/rejected": 222.80154418945312, + "logps/chosen": -0.08796362578868866, + "logps/rejected": -1.3682935237884521, + "loss": 0.3924, + "nll_loss": 0.3038131594657898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004398181103169918, + "rewards/margins": 0.06401649117469788, + "rewards/rejected": -0.06841467320919037, + "step": 3225 + }, + { + "epoch": 2.5655281969817314, + "grad_norm": 20.135488510131836, + "learning_rate": 8.797691788472336e-07, + "log_odds_chosen": 3.9858810901641846, + "log_odds_ratio": -0.05607563257217407, + "logits/chosen": 329.1815185546875, + "logits/rejected": 290.13653564453125, + "logps/chosen": -0.108522430062294, + "logps/rejected": -1.2941747903823853, + "loss": 0.3484, + "nll_loss": 0.22560131549835205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054261209443211555, + "rewards/margins": 0.05928261950612068, + "rewards/rejected": -0.06470874696969986, + "step": 3230 + }, + { + "epoch": 2.569499602859412, + "grad_norm": 34.43703079223633, + "learning_rate": 8.790890324921097e-07, + "log_odds_chosen": 2.3997080326080322, + "log_odds_ratio": -0.11185695976018906, + "logits/chosen": 310.90777587890625, + "logits/rejected": 232.01480102539062, + "logps/chosen": -0.3746749460697174, + "logps/rejected": -1.6247644424438477, + "loss": 0.4029, + "nll_loss": 0.5081365704536438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01873374730348587, + "rewards/margins": 0.06250447034835815, + "rewards/rejected": -0.08123821765184402, + "step": 3235 + }, + { + "epoch": 2.573471008737093, + "grad_norm": 17.835147857666016, + "learning_rate": 8.784104611578832e-07, + "log_odds_chosen": 2.310650587081909, + "log_odds_ratio": -0.12545891106128693, + "logits/chosen": 246.5829315185547, + "logits/rejected": 253.65975952148438, + "logps/chosen": -0.35286539793014526, + "logps/rejected": -1.4031962156295776, + "loss": 0.3793, + "nll_loss": 0.43818941712379456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017643271014094353, + "rewards/margins": 0.05251653864979744, + "rewards/rejected": -0.07015980780124664, + "step": 3240 + }, + { + "epoch": 2.5774424146147736, + "grad_norm": 26.749759674072266, + "learning_rate": 8.777334587751073e-07, + "log_odds_chosen": 3.4511096477508545, + "log_odds_ratio": -0.07684098184108734, + "logits/chosen": 253.508056640625, + "logits/rejected": 276.6531677246094, + "logps/chosen": -0.2683263421058655, + "logps/rejected": -2.0055651664733887, + "loss": 0.3453, + "nll_loss": 0.4227082133293152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013416317291557789, + "rewards/margins": 0.08686195313930511, + "rewards/rejected": -0.10027827322483063, + "step": 3245 + }, + { + "epoch": 2.5814138204924544, + "grad_norm": 25.256689071655273, + "learning_rate": 8.770580193070293e-07, + "log_odds_chosen": 4.399047374725342, + "log_odds_ratio": -0.032868240028619766, + "logits/chosen": 393.154052734375, + "logits/rejected": 251.6790008544922, + "logps/chosen": -0.15343406796455383, + "logps/rejected": -1.8153202533721924, + "loss": 0.3641, + "nll_loss": 0.5575748085975647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007671704050153494, + "rewards/margins": 0.08309431374073029, + "rewards/rejected": -0.09076601266860962, + "step": 3250 + }, + { + "epoch": 2.585385226370135, + "grad_norm": 31.991716384887695, + "learning_rate": 8.763841367493649e-07, + "log_odds_chosen": 3.1166841983795166, + "log_odds_ratio": -0.08671603351831436, + "logits/chosen": 337.2877502441406, + "logits/rejected": 236.4402313232422, + "logps/chosen": -0.24322061240673065, + "logps/rejected": -1.8942596912384033, + "loss": 0.4194, + "nll_loss": 0.5026718378067017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012161030434072018, + "rewards/margins": 0.08255194127559662, + "rewards/rejected": -0.09471298009157181, + "step": 3255 + }, + { + "epoch": 2.589356632247816, + "grad_norm": 25.604633331298828, + "learning_rate": 8.757118051300735e-07, + "log_odds_chosen": 3.5466561317443848, + "log_odds_ratio": -0.03917517513036728, + "logits/chosen": 279.3944396972656, + "logits/rejected": 271.83636474609375, + "logps/chosen": -0.17348912358283997, + "logps/rejected": -1.8674802780151367, + "loss": 0.2927, + "nll_loss": 0.2331702709197998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008674455806612968, + "rewards/margins": 0.08469955623149872, + "rewards/rejected": -0.09337402135133743, + "step": 3260 + }, + { + "epoch": 2.593328038125496, + "grad_norm": 27.33872413635254, + "learning_rate": 8.750410185091365e-07, + "log_odds_chosen": 4.348583698272705, + "log_odds_ratio": -0.0230005644261837, + "logits/chosen": 328.2115478515625, + "logits/rejected": 254.36544799804688, + "logps/chosen": -0.14674702286720276, + "logps/rejected": -2.459757089614868, + "loss": 0.306, + "nll_loss": 0.28896063566207886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007337350398302078, + "rewards/margins": 0.11565049737691879, + "rewards/rejected": -0.12298785150051117, + "step": 3265 + }, + { + "epoch": 2.597299444003177, + "grad_norm": 25.737802505493164, + "learning_rate": 8.743717709783363e-07, + "log_odds_chosen": 3.1614301204681396, + "log_odds_ratio": -0.06867258995771408, + "logits/chosen": 348.8385314941406, + "logits/rejected": 204.63389587402344, + "logps/chosen": -0.23629799485206604, + "logps/rejected": -1.561525821685791, + "loss": 0.3409, + "nll_loss": 0.4211881756782532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011814900673925877, + "rewards/margins": 0.0662613958120346, + "rewards/rejected": -0.07807628810405731, + "step": 3270 + }, + { + "epoch": 2.6012708498808577, + "grad_norm": 22.75188636779785, + "learning_rate": 8.737040566610381e-07, + "log_odds_chosen": 3.9433999061584473, + "log_odds_ratio": -0.04137764498591423, + "logits/chosen": 384.3919677734375, + "logits/rejected": 265.02557373046875, + "logps/chosen": -0.15901608765125275, + "logps/rejected": -1.3845044374465942, + "loss": 0.3862, + "nll_loss": 0.3304668962955475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007950803264975548, + "rewards/margins": 0.06127442046999931, + "rewards/rejected": -0.06922522932291031, + "step": 3275 + }, + { + "epoch": 2.6052422557585384, + "grad_norm": 27.857601165771484, + "learning_rate": 8.730378697119729e-07, + "log_odds_chosen": 3.643134593963623, + "log_odds_ratio": -0.03681778535246849, + "logits/chosen": 321.9202575683594, + "logits/rejected": 334.7354736328125, + "logps/chosen": -0.10982207208871841, + "logps/rejected": -1.6907075643539429, + "loss": 0.3246, + "nll_loss": 0.22584645450115204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005491103511303663, + "rewards/margins": 0.07904426753520966, + "rewards/rejected": -0.0845353752374649, + "step": 3280 + }, + { + "epoch": 2.609213661636219, + "grad_norm": 25.07754898071289, + "learning_rate": 8.723732043170228e-07, + "log_odds_chosen": 4.07627010345459, + "log_odds_ratio": -0.04696853086352348, + "logits/chosen": 333.41082763671875, + "logits/rejected": 297.3253479003906, + "logps/chosen": -0.17340394854545593, + "logps/rejected": -1.9108400344848633, + "loss": 0.3297, + "nll_loss": 0.3067760169506073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008670197799801826, + "rewards/margins": 0.08687180280685425, + "rewards/rejected": -0.09554199874401093, + "step": 3285 + }, + { + "epoch": 2.6131850675139, + "grad_norm": 24.27914810180664, + "learning_rate": 8.717100546930084e-07, + "log_odds_chosen": 3.4627201557159424, + "log_odds_ratio": -0.04240020364522934, + "logits/chosen": 297.2704772949219, + "logits/rejected": 297.8516845703125, + "logps/chosen": -0.18625622987747192, + "logps/rejected": -1.753732442855835, + "loss": 0.3577, + "nll_loss": 0.35155242681503296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009312811307609081, + "rewards/margins": 0.07837380468845367, + "rewards/rejected": -0.08768662065267563, + "step": 3290 + }, + { + "epoch": 2.6171564733915806, + "grad_norm": 29.44333839416504, + "learning_rate": 8.710484150874759e-07, + "log_odds_chosen": 2.7033531665802, + "log_odds_ratio": -0.09133219718933105, + "logits/chosen": 218.84378051757812, + "logits/rejected": 311.80950927734375, + "logps/chosen": -0.1878434419631958, + "logps/rejected": -1.3301265239715576, + "loss": 0.3785, + "nll_loss": 0.2603824734687805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00939217209815979, + "rewards/margins": 0.057114146649837494, + "rewards/rejected": -0.06650632619857788, + "step": 3295 + }, + { + "epoch": 2.6211278792692614, + "grad_norm": 25.69460105895996, + "learning_rate": 8.703882797784894e-07, + "log_odds_chosen": 3.080230712890625, + "log_odds_ratio": -0.05720607191324234, + "logits/chosen": 200.24032592773438, + "logits/rejected": 362.2676086425781, + "logps/chosen": -0.2980789542198181, + "logps/rejected": -2.0313193798065186, + "loss": 0.3677, + "nll_loss": 0.3649226725101471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014903949573636055, + "rewards/margins": 0.08666200935840607, + "rewards/rejected": -0.10156597197055817, + "step": 3300 + }, + { + "epoch": 2.625099285146942, + "grad_norm": 22.543445587158203, + "learning_rate": 8.697296430744212e-07, + "log_odds_chosen": 3.6661219596862793, + "log_odds_ratio": -0.185410737991333, + "logits/chosen": 267.294677734375, + "logits/rejected": 246.5009002685547, + "logps/chosen": -0.14372417330741882, + "logps/rejected": -1.8430382013320923, + "loss": 0.3137, + "nll_loss": 0.26537400484085083, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0071862079203128815, + "rewards/margins": 0.08496570587158203, + "rewards/rejected": -0.09215191006660461, + "step": 3305 + }, + { + "epoch": 2.629070691024623, + "grad_norm": 21.284128189086914, + "learning_rate": 8.690724993137478e-07, + "log_odds_chosen": 3.3694980144500732, + "log_odds_ratio": -0.04535522311925888, + "logits/chosen": 262.36431884765625, + "logits/rejected": 278.66455078125, + "logps/chosen": -0.22373194992542267, + "logps/rejected": -2.0730624198913574, + "loss": 0.3005, + "nll_loss": 0.32457104325294495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011186597868800163, + "rewards/margins": 0.09246651828289032, + "rewards/rejected": -0.10365311056375504, + "step": 3310 + }, + { + "epoch": 2.6330420969023036, + "grad_norm": 26.384048461914062, + "learning_rate": 8.684168428648437e-07, + "log_odds_chosen": 3.301476001739502, + "log_odds_ratio": -0.06395457684993744, + "logits/chosen": 268.52532958984375, + "logits/rejected": 219.0271453857422, + "logps/chosen": -0.1741420328617096, + "logps/rejected": -1.5680488348007202, + "loss": 0.3139, + "nll_loss": 0.325143039226532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00870710052549839, + "rewards/margins": 0.06969533860683441, + "rewards/rejected": -0.07840244472026825, + "step": 3315 + }, + { + "epoch": 2.6370135027799844, + "grad_norm": 29.413419723510742, + "learning_rate": 8.677626681257792e-07, + "log_odds_chosen": 2.828350305557251, + "log_odds_ratio": -0.10513371229171753, + "logits/chosen": 251.63174438476562, + "logits/rejected": 276.219970703125, + "logps/chosen": -0.262276828289032, + "logps/rejected": -1.6337566375732422, + "loss": 0.4856, + "nll_loss": 0.46243181824684143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013113843277096748, + "rewards/margins": 0.06857398897409439, + "rewards/rejected": -0.08168783038854599, + "step": 3320 + }, + { + "epoch": 2.6409849086576647, + "grad_norm": 29.527265548706055, + "learning_rate": 8.6710996952412e-07, + "log_odds_chosen": 2.508577823638916, + "log_odds_ratio": -0.2033156156539917, + "logits/chosen": 293.64349365234375, + "logits/rejected": 280.0192565917969, + "logps/chosen": -0.3613029718399048, + "logps/rejected": -1.2332953214645386, + "loss": 0.4505, + "nll_loss": 0.4004967212677002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01806515082716942, + "rewards/margins": 0.04359962046146393, + "rewards/rejected": -0.06166477128863335, + "step": 3325 + }, + { + "epoch": 2.6449563145353454, + "grad_norm": 26.56429672241211, + "learning_rate": 8.664587415167274e-07, + "log_odds_chosen": 3.569314956665039, + "log_odds_ratio": -0.0447075180709362, + "logits/chosen": 221.4001922607422, + "logits/rejected": 289.3610534667969, + "logps/chosen": -0.20264658331871033, + "logps/rejected": -2.0745139122009277, + "loss": 0.4131, + "nll_loss": 0.4826287627220154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010132329538464546, + "rewards/margins": 0.09359337389469147, + "rewards/rejected": -0.10372570902109146, + "step": 3330 + }, + { + "epoch": 2.648927720413026, + "grad_norm": 25.318214416503906, + "learning_rate": 8.658089785895599e-07, + "log_odds_chosen": 4.950355052947998, + "log_odds_ratio": -0.013309493660926819, + "logits/chosen": 208.6847686767578, + "logits/rejected": 290.908935546875, + "logps/chosen": -0.07694874703884125, + "logps/rejected": -2.389655351638794, + "loss": 0.3298, + "nll_loss": 0.2016306221485138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00384743744507432, + "rewards/margins": 0.11563533544540405, + "rewards/rejected": -0.11948277801275253, + "step": 3335 + }, + { + "epoch": 2.652899126290707, + "grad_norm": 21.625627517700195, + "learning_rate": 8.651606752574786e-07, + "log_odds_chosen": 3.532393217086792, + "log_odds_ratio": -0.07799427211284637, + "logits/chosen": 251.471435546875, + "logits/rejected": 363.47119140625, + "logps/chosen": -0.3781413733959198, + "logps/rejected": -2.1852574348449707, + "loss": 0.3282, + "nll_loss": 0.43493717908859253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01890706829726696, + "rewards/margins": 0.09035581350326538, + "rewards/rejected": -0.10926288366317749, + "step": 3340 + }, + { + "epoch": 2.6568705321683876, + "grad_norm": 22.178985595703125, + "learning_rate": 8.645138260640511e-07, + "log_odds_chosen": 3.403825283050537, + "log_odds_ratio": -0.04612133651971817, + "logits/chosen": 264.21868896484375, + "logits/rejected": 315.3473205566406, + "logps/chosen": -0.130589097738266, + "logps/rejected": -1.5952059030532837, + "loss": 0.3498, + "nll_loss": 0.20455436408519745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0065294550731778145, + "rewards/margins": 0.07323084771633148, + "rewards/rejected": -0.07976029813289642, + "step": 3345 + }, + { + "epoch": 2.6608419380460684, + "grad_norm": 27.09998321533203, + "learning_rate": 8.638684255813602e-07, + "log_odds_chosen": 2.8917152881622314, + "log_odds_ratio": -0.07270168513059616, + "logits/chosen": 202.5989227294922, + "logits/rejected": 326.9261474609375, + "logps/chosen": -0.2160286009311676, + "logps/rejected": -1.6441643238067627, + "loss": 0.3844, + "nll_loss": 0.40900883078575134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01080142892897129, + "rewards/margins": 0.0714067816734314, + "rewards/rejected": -0.08220821619033813, + "step": 3350 + }, + { + "epoch": 2.664813343923749, + "grad_norm": 28.016910552978516, + "learning_rate": 8.63224468409811e-07, + "log_odds_chosen": 3.194028377532959, + "log_odds_ratio": -0.069419726729393, + "logits/chosen": 226.66091918945312, + "logits/rejected": 326.2010192871094, + "logps/chosen": -0.4025501310825348, + "logps/rejected": -2.2129743099212646, + "loss": 0.4066, + "nll_loss": 0.4211258292198181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02012750506401062, + "rewards/margins": 0.0905212014913559, + "rewards/rejected": -0.11064871400594711, + "step": 3355 + }, + { + "epoch": 2.6687847498014294, + "grad_norm": 39.505130767822266, + "learning_rate": 8.625819491779427e-07, + "log_odds_chosen": 3.5177810192108154, + "log_odds_ratio": -0.1360502988100052, + "logits/chosen": 232.5872039794922, + "logits/rejected": 382.99786376953125, + "logps/chosen": -0.1633174568414688, + "logps/rejected": -1.9696972370147705, + "loss": 0.3207, + "nll_loss": 0.2783007025718689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008165872655808926, + "rewards/margins": 0.09031899273395538, + "rewards/rejected": -0.09848486632108688, + "step": 3360 + }, + { + "epoch": 2.67275615567911, + "grad_norm": 24.680538177490234, + "learning_rate": 8.619408625422394e-07, + "log_odds_chosen": 2.9842259883880615, + "log_odds_ratio": -0.06695107370615005, + "logits/chosen": 266.1699523925781, + "logits/rejected": 277.45489501953125, + "logps/chosen": -0.28020209074020386, + "logps/rejected": -1.8149499893188477, + "loss": 0.385, + "nll_loss": 0.4652983248233795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014010104350745678, + "rewards/margins": 0.07673738896846771, + "rewards/rejected": -0.09074750542640686, + "step": 3365 + }, + { + "epoch": 2.676727561556791, + "grad_norm": 21.564693450927734, + "learning_rate": 8.613012031869432e-07, + "log_odds_chosen": 2.69940185546875, + "log_odds_ratio": -0.09041761606931686, + "logits/chosen": 407.0078125, + "logits/rejected": 278.8470153808594, + "logps/chosen": -0.24051721394062042, + "logps/rejected": -1.4418036937713623, + "loss": 0.3393, + "nll_loss": 0.3935914933681488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012025861069560051, + "rewards/margins": 0.06006432697176933, + "rewards/rejected": -0.07209019362926483, + "step": 3370 + }, + { + "epoch": 2.6806989674344717, + "grad_norm": 16.88412857055664, + "learning_rate": 8.606629658238705e-07, + "log_odds_chosen": 4.196094512939453, + "log_odds_ratio": -0.02909168228507042, + "logits/chosen": 231.52145385742188, + "logits/rejected": 231.4295196533203, + "logps/chosen": -0.10496889054775238, + "logps/rejected": -1.6880567073822021, + "loss": 0.4033, + "nll_loss": 0.3222340941429138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005248443689197302, + "rewards/margins": 0.07915439456701279, + "rewards/rejected": -0.08440284430980682, + "step": 3375 + }, + { + "epoch": 2.6846703733121524, + "grad_norm": 26.121601104736328, + "learning_rate": 8.600261451922269e-07, + "log_odds_chosen": 3.75126576423645, + "log_odds_ratio": -0.03185001388192177, + "logits/chosen": 326.3580017089844, + "logits/rejected": 236.6237030029297, + "logps/chosen": -0.1497809886932373, + "logps/rejected": -1.8008592128753662, + "loss": 0.3518, + "nll_loss": 0.32695260643959045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007489049341529608, + "rewards/margins": 0.0825539156794548, + "rewards/rejected": -0.09004296362400055, + "step": 3380 + }, + { + "epoch": 2.688641779189833, + "grad_norm": 22.405277252197266, + "learning_rate": 8.593907360584258e-07, + "log_odds_chosen": 3.142768621444702, + "log_odds_ratio": -0.06925017386674881, + "logits/chosen": 221.24462890625, + "logits/rejected": 374.22503662109375, + "logps/chosen": -0.23636960983276367, + "logps/rejected": -1.8491294384002686, + "loss": 0.3725, + "nll_loss": 0.35332077741622925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011818479746580124, + "rewards/margins": 0.08063800632953644, + "rewards/rejected": -0.09245647490024567, + "step": 3385 + }, + { + "epoch": 2.692613185067514, + "grad_norm": 22.6203556060791, + "learning_rate": 8.587567332159079e-07, + "log_odds_chosen": 2.694579839706421, + "log_odds_ratio": -0.10035224258899689, + "logits/chosen": 256.6727600097656, + "logits/rejected": 332.54217529296875, + "logps/chosen": -0.22367092967033386, + "logps/rejected": -1.6074225902557373, + "loss": 0.4156, + "nll_loss": 0.32448482513427734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011183546856045723, + "rewards/margins": 0.06918759644031525, + "rewards/rejected": -0.08037114143371582, + "step": 3390 + }, + { + "epoch": 2.6965845909451946, + "grad_norm": 21.095897674560547, + "learning_rate": 8.581241314849612e-07, + "log_odds_chosen": 3.3230767250061035, + "log_odds_ratio": -0.06266503781080246, + "logits/chosen": 259.4347229003906, + "logits/rejected": 244.94088745117188, + "logps/chosen": -0.2486274689435959, + "logps/rejected": -1.8099991083145142, + "loss": 0.3862, + "nll_loss": 0.3283666968345642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012431373819708824, + "rewards/margins": 0.0780685767531395, + "rewards/rejected": -0.09049995988607407, + "step": 3395 + }, + { + "epoch": 2.7005559968228754, + "grad_norm": 25.299463272094727, + "learning_rate": 8.574929257125441e-07, + "log_odds_chosen": 3.30029296875, + "log_odds_ratio": -0.047333456575870514, + "logits/chosen": 203.181396484375, + "logits/rejected": 309.99542236328125, + "logps/chosen": -0.24806909263134003, + "logps/rejected": -2.0718834400177, + "loss": 0.4356, + "nll_loss": 0.47107014060020447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012403455562889576, + "rewards/margins": 0.09119071066379547, + "rewards/rejected": -0.10359416902065277, + "step": 3400 + }, + { + "epoch": 2.704527402700556, + "grad_norm": 40.7325553894043, + "learning_rate": 8.568631107721093e-07, + "log_odds_chosen": 3.272376298904419, + "log_odds_ratio": -0.04923254996538162, + "logits/chosen": 251.3883819580078, + "logits/rejected": 296.41925048828125, + "logps/chosen": -0.192131906747818, + "logps/rejected": -1.8338134288787842, + "loss": 0.3396, + "nll_loss": 0.3637116551399231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009606595151126385, + "rewards/margins": 0.08208408206701279, + "rewards/rejected": -0.09169068187475204, + "step": 3405 + }, + { + "epoch": 2.708498808578237, + "grad_norm": 19.95270347595215, + "learning_rate": 8.562346815634272e-07, + "log_odds_chosen": 4.730654716491699, + "log_odds_ratio": -0.016973715275526047, + "logits/chosen": 375.0517578125, + "logits/rejected": 268.80157470703125, + "logps/chosen": -0.09887482225894928, + "logps/rejected": -1.5165462493896484, + "loss": 0.3366, + "nll_loss": 0.20561964809894562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004943741485476494, + "rewards/margins": 0.07088357955217361, + "rewards/rejected": -0.07582731544971466, + "step": 3410 + }, + { + "epoch": 2.7124702144559176, + "grad_norm": 32.70396041870117, + "learning_rate": 8.556076330124148e-07, + "log_odds_chosen": 2.935753583908081, + "log_odds_ratio": -0.1059001088142395, + "logits/chosen": 334.5813293457031, + "logits/rejected": 297.3235778808594, + "logps/chosen": -0.29814326763153076, + "logps/rejected": -1.8218357563018799, + "loss": 0.3963, + "nll_loss": 0.3989710807800293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014907163567841053, + "rewards/margins": 0.07618463039398193, + "rewards/rejected": -0.09109178930521011, + "step": 3415 + }, + { + "epoch": 2.716441620333598, + "grad_norm": 27.2939453125, + "learning_rate": 8.549819600709619e-07, + "log_odds_chosen": 5.619948863983154, + "log_odds_ratio": -0.03720756620168686, + "logits/chosen": 344.32708740234375, + "logits/rejected": 293.09710693359375, + "logps/chosen": -0.15366186201572418, + "logps/rejected": -2.807699680328369, + "loss": 0.3832, + "nll_loss": 0.34514352679252625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007683093193918467, + "rewards/margins": 0.13270190358161926, + "rewards/rejected": -0.1403849869966507, + "step": 3420 + }, + { + "epoch": 2.7204130262112787, + "grad_norm": 24.816383361816406, + "learning_rate": 8.54357657716761e-07, + "log_odds_chosen": 4.2862443923950195, + "log_odds_ratio": -0.022790148854255676, + "logits/chosen": 264.8843078613281, + "logits/rejected": 247.75222778320312, + "logps/chosen": -0.11239345371723175, + "logps/rejected": -1.8959920406341553, + "loss": 0.3598, + "nll_loss": 0.25543782114982605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00561967259272933, + "rewards/margins": 0.08917994052171707, + "rewards/rejected": -0.09479960799217224, + "step": 3425 + }, + { + "epoch": 2.7243844320889594, + "grad_norm": 28.45637321472168, + "learning_rate": 8.537347209531384e-07, + "log_odds_chosen": 3.2400569915771484, + "log_odds_ratio": -0.19109003245830536, + "logits/chosen": 314.67193603515625, + "logits/rejected": 245.2732391357422, + "logps/chosen": -0.24329812824726105, + "logps/rejected": -1.4679007530212402, + "loss": 0.3757, + "nll_loss": 0.39857763051986694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012164906598627567, + "rewards/margins": 0.0612301342189312, + "rewards/rejected": -0.07339503616094589, + "step": 3430 + }, + { + "epoch": 2.72835583796664, + "grad_norm": 22.829252243041992, + "learning_rate": 8.531131448088853e-07, + "log_odds_chosen": 3.3454837799072266, + "log_odds_ratio": -0.04038381576538086, + "logits/chosen": 286.8428955078125, + "logits/rejected": 271.490966796875, + "logps/chosen": -0.15350893139839172, + "logps/rejected": -1.5690906047821045, + "loss": 0.3353, + "nll_loss": 0.3089643120765686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007675447501242161, + "rewards/margins": 0.07077908515930176, + "rewards/rejected": -0.07845453172922134, + "step": 3435 + }, + { + "epoch": 2.732327243844321, + "grad_norm": 23.981935501098633, + "learning_rate": 8.52492924338092e-07, + "log_odds_chosen": 1.9324777126312256, + "log_odds_ratio": -0.1563093215227127, + "logits/chosen": 264.3291015625, + "logits/rejected": 246.1039581298828, + "logps/chosen": -0.3107035756111145, + "logps/rejected": -1.1190191507339478, + "loss": 0.3325, + "nll_loss": 0.4039735198020935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01553518045693636, + "rewards/margins": 0.04041577875614166, + "rewards/rejected": -0.05595095828175545, + "step": 3440 + }, + { + "epoch": 2.7362986497220017, + "grad_norm": 27.215803146362305, + "learning_rate": 8.51874054619982e-07, + "log_odds_chosen": 3.2275519371032715, + "log_odds_ratio": -0.06337851285934448, + "logits/chosen": 307.99066162109375, + "logits/rejected": 368.2579040527344, + "logps/chosen": -0.22807280719280243, + "logps/rejected": -1.8899672031402588, + "loss": 0.3956, + "nll_loss": 0.2683226466178894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011403640732169151, + "rewards/margins": 0.08309471607208252, + "rewards/rejected": -0.09449835866689682, + "step": 3445 + }, + { + "epoch": 2.7402700555996824, + "grad_norm": 25.120004653930664, + "learning_rate": 8.512565307587487e-07, + "log_odds_chosen": 3.7829699516296387, + "log_odds_ratio": -0.03385692834854126, + "logits/chosen": 252.9814453125, + "logits/rejected": 311.3857727050781, + "logps/chosen": -0.14769130945205688, + "logps/rejected": -1.8254680633544922, + "loss": 0.4128, + "nll_loss": 0.2506926953792572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0073845661245286465, + "rewards/margins": 0.08388884365558624, + "rewards/rejected": -0.09127341210842133, + "step": 3450 + }, + { + "epoch": 2.744241461477363, + "grad_norm": 25.093229293823242, + "learning_rate": 8.50640347883392e-07, + "log_odds_chosen": 3.7419135570526123, + "log_odds_ratio": -0.029192060232162476, + "logits/chosen": 334.3880310058594, + "logits/rejected": 249.8907470703125, + "logps/chosen": -0.12614428997039795, + "logps/rejected": -1.8909130096435547, + "loss": 0.4699, + "nll_loss": 0.21717166900634766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0063072144985198975, + "rewards/margins": 0.0882384404540062, + "rewards/rejected": -0.0945456475019455, + "step": 3455 + }, + { + "epoch": 2.7482128673550434, + "grad_norm": 35.0034065246582, + "learning_rate": 8.500255011475575e-07, + "log_odds_chosen": 3.9182846546173096, + "log_odds_ratio": -0.06826486438512802, + "logits/chosen": 224.9783935546875, + "logits/rejected": 205.3113250732422, + "logps/chosen": -0.19937647879123688, + "logps/rejected": -1.5910238027572632, + "loss": 0.4177, + "nll_loss": 0.36896106600761414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009968823753297329, + "rewards/margins": 0.06958237290382385, + "rewards/rejected": -0.07955120503902435, + "step": 3460 + }, + { + "epoch": 2.752184273232724, + "grad_norm": 33.20505905151367, + "learning_rate": 8.49411985729376e-07, + "log_odds_chosen": 3.2729244232177734, + "log_odds_ratio": -0.05948426574468613, + "logits/chosen": 323.2681579589844, + "logits/rejected": 243.99734497070312, + "logps/chosen": -0.1548212319612503, + "logps/rejected": -1.6072862148284912, + "loss": 0.381, + "nll_loss": 0.3288036584854126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007741062436252832, + "rewards/margins": 0.07262326031923294, + "rewards/rejected": -0.08036432415246964, + "step": 3465 + }, + { + "epoch": 2.756155679110405, + "grad_norm": 26.826465606689453, + "learning_rate": 8.48799796831305e-07, + "log_odds_chosen": 3.9374356269836426, + "log_odds_ratio": -0.025482967495918274, + "logits/chosen": 241.34744262695312, + "logits/rejected": 329.5173034667969, + "logps/chosen": -0.19136756658554077, + "logps/rejected": -2.1954541206359863, + "loss": 0.3625, + "nll_loss": 0.3516727685928345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009568377397954464, + "rewards/margins": 0.10020432621240616, + "rewards/rejected": -0.1097727045416832, + "step": 3470 + }, + { + "epoch": 2.7601270849880857, + "grad_norm": 27.76506233215332, + "learning_rate": 8.48188929679971e-07, + "log_odds_chosen": 3.391524076461792, + "log_odds_ratio": -0.0386800579726696, + "logits/chosen": 231.2411651611328, + "logits/rejected": 282.93878173828125, + "logps/chosen": -0.1687900722026825, + "logps/rejected": -1.7434227466583252, + "loss": 0.2898, + "nll_loss": 0.2864622175693512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008439503610134125, + "rewards/margins": 0.07873163372278214, + "rewards/rejected": -0.08717113733291626, + "step": 3475 + }, + { + "epoch": 2.7640984908657664, + "grad_norm": 36.33848190307617, + "learning_rate": 8.475793795260132e-07, + "log_odds_chosen": 4.21061897277832, + "log_odds_ratio": -0.03166338428854942, + "logits/chosen": 248.8314971923828, + "logits/rejected": 235.77224731445312, + "logps/chosen": -0.10397151857614517, + "logps/rejected": -1.8724483251571655, + "loss": 0.3554, + "nll_loss": 0.31431207060813904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005198576487600803, + "rewards/margins": 0.08842384815216064, + "rewards/rejected": -0.09362240880727768, + "step": 3480 + }, + { + "epoch": 2.768069896743447, + "grad_norm": 17.6458683013916, + "learning_rate": 8.469711416439277e-07, + "log_odds_chosen": 3.060960292816162, + "log_odds_ratio": -0.07192051410675049, + "logits/chosen": 220.4897003173828, + "logits/rejected": 304.0692138671875, + "logps/chosen": -0.3180966079235077, + "logps/rejected": -2.093109607696533, + "loss": 0.3345, + "nll_loss": 0.4048178791999817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015904832631349564, + "rewards/margins": 0.08875066787004471, + "rewards/rejected": -0.10465548932552338, + "step": 3485 + }, + { + "epoch": 2.772041302621128, + "grad_norm": 23.570837020874023, + "learning_rate": 8.463642113319158e-07, + "log_odds_chosen": 2.799910068511963, + "log_odds_ratio": -0.1606951504945755, + "logits/chosen": 236.31088256835938, + "logits/rejected": 249.38705444335938, + "logps/chosen": -0.29771658778190613, + "logps/rejected": -1.322884202003479, + "loss": 0.2954, + "nll_loss": 0.43411684036254883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01488583069294691, + "rewards/margins": 0.051258385181427, + "rewards/rejected": -0.06614421308040619, + "step": 3490 + }, + { + "epoch": 2.7760127084988087, + "grad_norm": 44.051631927490234, + "learning_rate": 8.457585839117283e-07, + "log_odds_chosen": 3.4752700328826904, + "log_odds_ratio": -0.044789545238018036, + "logits/chosen": 331.0361328125, + "logits/rejected": 264.93536376953125, + "logps/chosen": -0.1473982334136963, + "logps/rejected": -1.7014989852905273, + "loss": 0.3962, + "nll_loss": 0.4065122604370117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007369911763817072, + "rewards/margins": 0.07770504802465439, + "rewards/rejected": -0.08507496118545532, + "step": 3495 + }, + { + "epoch": 2.7799841143764894, + "grad_norm": 29.056276321411133, + "learning_rate": 8.451542547285166e-07, + "log_odds_chosen": 3.405332088470459, + "log_odds_ratio": -0.03398241847753525, + "logits/chosen": 229.6246795654297, + "logits/rejected": 339.95166015625, + "logps/chosen": -0.1531856805086136, + "logps/rejected": -1.7593185901641846, + "loss": 0.3732, + "nll_loss": 0.3807409703731537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007659283932298422, + "rewards/margins": 0.08030664920806885, + "rewards/rejected": -0.0879659354686737, + "step": 3500 + }, + { + "epoch": 2.78395552025417, + "grad_norm": 22.696245193481445, + "learning_rate": 8.44551219150681e-07, + "log_odds_chosen": 4.299327373504639, + "log_odds_ratio": -0.06721607595682144, + "logits/chosen": 272.5831298828125, + "logits/rejected": 238.8101806640625, + "logps/chosen": -0.23352356255054474, + "logps/rejected": -1.9828745126724243, + "loss": 0.3538, + "nll_loss": 0.5397769212722778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011676179245114326, + "rewards/margins": 0.08746754378080368, + "rewards/rejected": -0.09914372861385345, + "step": 3505 + }, + { + "epoch": 2.787926926131851, + "grad_norm": 26.001413345336914, + "learning_rate": 8.439494725697223e-07, + "log_odds_chosen": 2.3186655044555664, + "log_odds_ratio": -0.09961884468793869, + "logits/chosen": 275.5745544433594, + "logits/rejected": 246.73629760742188, + "logps/chosen": -0.29723626375198364, + "logps/rejected": -1.390271544456482, + "loss": 0.3147, + "nll_loss": 0.42867952585220337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014861812815070152, + "rewards/margins": 0.05465176701545715, + "rewards/rejected": -0.06951358169317245, + "step": 3510 + }, + { + "epoch": 2.7918983320095316, + "grad_norm": 25.731021881103516, + "learning_rate": 8.433490104000933e-07, + "log_odds_chosen": 4.046853065490723, + "log_odds_ratio": -0.03547334298491478, + "logits/chosen": 367.0143127441406, + "logits/rejected": 219.3634796142578, + "logps/chosen": -0.12885086238384247, + "logps/rejected": -1.706560492515564, + "loss": 0.3309, + "nll_loss": 0.27449122071266174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006442543119192123, + "rewards/margins": 0.07888549566268921, + "rewards/rejected": -0.08532802760601044, + "step": 3515 + }, + { + "epoch": 2.795869737887212, + "grad_norm": 28.774776458740234, + "learning_rate": 8.427498280790526e-07, + "log_odds_chosen": 4.661393642425537, + "log_odds_ratio": -0.07130368053913116, + "logits/chosen": 292.60260009765625, + "logits/rejected": 327.83294677734375, + "logps/chosen": -0.21304917335510254, + "logps/rejected": -2.304790496826172, + "loss": 0.4884, + "nll_loss": 0.4281933307647705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010652460157871246, + "rewards/margins": 0.10458706319332123, + "rewards/rejected": -0.11523952335119247, + "step": 3520 + }, + { + "epoch": 2.7998411437648927, + "grad_norm": 20.014240264892578, + "learning_rate": 8.421519210665191e-07, + "log_odds_chosen": 3.561429262161255, + "log_odds_ratio": -0.03733684495091438, + "logits/chosen": 284.66497802734375, + "logits/rejected": 329.2096252441406, + "logps/chosen": -0.11633183807134628, + "logps/rejected": -1.6266489028930664, + "loss": 0.3561, + "nll_loss": 0.2186901569366455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005816592834889889, + "rewards/margins": 0.07551585882902145, + "rewards/rejected": -0.08133245259523392, + "step": 3525 + }, + { + "epoch": 2.8038125496425734, + "grad_norm": 22.12482261657715, + "learning_rate": 8.415552848449264e-07, + "log_odds_chosen": 4.567162990570068, + "log_odds_ratio": -0.07148279994726181, + "logits/chosen": 311.3138732910156, + "logits/rejected": 362.58966064453125, + "logps/chosen": -0.219939187169075, + "logps/rejected": -2.1984989643096924, + "loss": 0.4238, + "nll_loss": 0.2968602776527405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010996958240866661, + "rewards/margins": 0.09892800450325012, + "rewards/rejected": -0.10992495715618134, + "step": 3530 + }, + { + "epoch": 2.807783955520254, + "grad_norm": 16.48638916015625, + "learning_rate": 8.409599149190806e-07, + "log_odds_chosen": 3.0395667552948, + "log_odds_ratio": -0.07454101741313934, + "logits/chosen": 276.5237731933594, + "logits/rejected": 244.59121704101562, + "logps/chosen": -0.13108542561531067, + "logps/rejected": -1.354529619216919, + "loss": 0.2954, + "nll_loss": 0.3918497562408447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006554270628839731, + "rewards/margins": 0.061172209680080414, + "rewards/rejected": -0.06772647798061371, + "step": 3535 + }, + { + "epoch": 2.811755361397935, + "grad_norm": 27.75493812561035, + "learning_rate": 8.40365806816018e-07, + "log_odds_chosen": 4.262506008148193, + "log_odds_ratio": -0.06077839806675911, + "logits/chosen": 212.52090454101562, + "logits/rejected": 330.6539611816406, + "logps/chosen": -0.19797523319721222, + "logps/rejected": -2.2590105533599854, + "loss": 0.3203, + "nll_loss": 0.3225499987602234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009898761287331581, + "rewards/margins": 0.10305176675319672, + "rewards/rejected": -0.11295051872730255, + "step": 3540 + }, + { + "epoch": 2.8157267672756157, + "grad_norm": 25.588348388671875, + "learning_rate": 8.397729560848629e-07, + "log_odds_chosen": 3.5041167736053467, + "log_odds_ratio": -0.03463669493794441, + "logits/chosen": 288.7964782714844, + "logits/rejected": 280.5426025390625, + "logps/chosen": -0.15195515751838684, + "logps/rejected": -1.7217051982879639, + "loss": 0.525, + "nll_loss": 0.6102060079574585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0075977579690515995, + "rewards/margins": 0.07848750799894333, + "rewards/rejected": -0.08608527481555939, + "step": 3545 + }, + { + "epoch": 2.8196981731532964, + "grad_norm": 25.35242462158203, + "learning_rate": 8.391813582966891e-07, + "log_odds_chosen": 3.6780853271484375, + "log_odds_ratio": -0.06320033222436905, + "logits/chosen": 300.8489990234375, + "logits/rejected": 237.04409790039062, + "logps/chosen": -0.2152899205684662, + "logps/rejected": -1.7724052667617798, + "loss": 0.43, + "nll_loss": 0.3502303659915924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01076449640095234, + "rewards/margins": 0.07785578072071075, + "rewards/rejected": -0.08862027525901794, + "step": 3550 + }, + { + "epoch": 2.8236695790309767, + "grad_norm": 25.48731803894043, + "learning_rate": 8.385910090443796e-07, + "log_odds_chosen": 3.4835567474365234, + "log_odds_ratio": -0.041546985507011414, + "logits/chosen": 313.85137939453125, + "logits/rejected": 290.18048095703125, + "logps/chosen": -0.1454685628414154, + "logps/rejected": -1.5348907709121704, + "loss": 0.3626, + "nll_loss": 0.3012126684188843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007273429073393345, + "rewards/margins": 0.06947110593318939, + "rewards/rejected": -0.07674454152584076, + "step": 3555 + }, + { + "epoch": 2.8276409849086575, + "grad_norm": 25.764835357666016, + "learning_rate": 8.380019039424888e-07, + "log_odds_chosen": 4.237963676452637, + "log_odds_ratio": -0.032441817224025726, + "logits/chosen": 271.6688232421875, + "logits/rejected": 337.88592529296875, + "logps/chosen": -0.08865581452846527, + "logps/rejected": -1.7506749629974365, + "loss": 0.4494, + "nll_loss": 0.5733296275138855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0044327909126877785, + "rewards/margins": 0.08310095965862274, + "rewards/rejected": -0.08753375709056854, + "step": 3560 + }, + { + "epoch": 2.831612390786338, + "grad_norm": 22.246402740478516, + "learning_rate": 8.374140386271069e-07, + "log_odds_chosen": 3.5762290954589844, + "log_odds_ratio": -0.06507720053195953, + "logits/chosen": 221.15878295898438, + "logits/rejected": 425.18212890625, + "logps/chosen": -0.1669560670852661, + "logps/rejected": -1.895721197128296, + "loss": 0.2844, + "nll_loss": 0.255990207195282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008347803726792336, + "rewards/margins": 0.08643826097249985, + "rewards/rejected": -0.09478606283664703, + "step": 3565 + }, + { + "epoch": 2.835583796664019, + "grad_norm": 19.88669776916504, + "learning_rate": 8.368274087557231e-07, + "log_odds_chosen": 3.337949275970459, + "log_odds_ratio": -0.0535353422164917, + "logits/chosen": 209.78427124023438, + "logits/rejected": 359.92724609375, + "logps/chosen": -0.26661959290504456, + "logps/rejected": -2.1073858737945557, + "loss": 0.3546, + "nll_loss": 0.366042822599411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013330979272723198, + "rewards/margins": 0.09203831106424332, + "rewards/rejected": -0.10536929219961166, + "step": 3570 + }, + { + "epoch": 2.8395552025416997, + "grad_norm": 20.765398025512695, + "learning_rate": 8.362420100070909e-07, + "log_odds_chosen": 2.78408145904541, + "log_odds_ratio": -0.11600840091705322, + "logits/chosen": 237.7899932861328, + "logits/rejected": 289.7607116699219, + "logps/chosen": -0.27459466457366943, + "logps/rejected": -1.748347520828247, + "loss": 0.3883, + "nll_loss": 0.4142521917819977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013729733414947987, + "rewards/margins": 0.07368763536214828, + "rewards/rejected": -0.087417371571064, + "step": 3575 + }, + { + "epoch": 2.8435266084193804, + "grad_norm": 28.12468910217285, + "learning_rate": 8.356578380810946e-07, + "log_odds_chosen": 3.1770873069763184, + "log_odds_ratio": -0.051418136805295944, + "logits/chosen": 261.5604553222656, + "logits/rejected": 263.3280944824219, + "logps/chosen": -0.1799846589565277, + "logps/rejected": -1.5317844152450562, + "loss": 0.4097, + "nll_loss": 0.36274874210357666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008999234065413475, + "rewards/margins": 0.06758998334407806, + "rewards/rejected": -0.07658922672271729, + "step": 3580 + }, + { + "epoch": 2.847498014297061, + "grad_norm": 20.850906372070312, + "learning_rate": 8.350748886986167e-07, + "log_odds_chosen": 3.5928845405578613, + "log_odds_ratio": -0.07899219542741776, + "logits/chosen": 258.620849609375, + "logits/rejected": 246.1554412841797, + "logps/chosen": -0.28887057304382324, + "logps/rejected": -2.014991283416748, + "loss": 0.4146, + "nll_loss": 0.5440294146537781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014443526975810528, + "rewards/margins": 0.08630602061748505, + "rewards/rejected": -0.10074955224990845, + "step": 3585 + }, + { + "epoch": 2.851469420174742, + "grad_norm": 32.508541107177734, + "learning_rate": 8.344931576014064e-07, + "log_odds_chosen": 4.088571548461914, + "log_odds_ratio": -0.026189763098955154, + "logits/chosen": 309.7861022949219, + "logits/rejected": 281.23187255859375, + "logps/chosen": -0.11156181246042252, + "logps/rejected": -2.1266567707061768, + "loss": 0.3974, + "nll_loss": 0.3166486620903015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005578090436756611, + "rewards/margins": 0.1007547378540039, + "rewards/rejected": -0.10633282363414764, + "step": 3590 + }, + { + "epoch": 2.8554408260524227, + "grad_norm": 21.285322189331055, + "learning_rate": 8.339126405519482e-07, + "log_odds_chosen": 2.9751880168914795, + "log_odds_ratio": -0.05453987047076225, + "logits/chosen": 395.43243408203125, + "logits/rejected": 205.71664428710938, + "logps/chosen": -0.18474337458610535, + "logps/rejected": -1.4767392873764038, + "loss": 0.3599, + "nll_loss": 0.25680750608444214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009237168356776237, + "rewards/margins": 0.06459980458021164, + "rewards/rejected": -0.07383696734905243, + "step": 3595 + }, + { + "epoch": 2.8594122319301034, + "grad_norm": 20.413772583007812, + "learning_rate": 8.333333333333333e-07, + "log_odds_chosen": 3.2024922370910645, + "log_odds_ratio": -0.07393097877502441, + "logits/chosen": 260.7085266113281, + "logits/rejected": 306.81732177734375, + "logps/chosen": -0.20199549198150635, + "logps/rejected": -1.9069011211395264, + "loss": 0.3761, + "nll_loss": 0.39756855368614197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010099775157868862, + "rewards/margins": 0.085245281457901, + "rewards/rejected": -0.09534506499767303, + "step": 3600 + }, + { + "epoch": 2.863383637807784, + "grad_norm": 24.55630874633789, + "learning_rate": 8.327552317491304e-07, + "log_odds_chosen": 3.7288384437561035, + "log_odds_ratio": -0.03377489000558853, + "logits/chosen": 322.8309631347656, + "logits/rejected": 250.8905792236328, + "logps/chosen": -0.11226388067007065, + "logps/rejected": -1.4427611827850342, + "loss": 0.4202, + "nll_loss": 0.43689388036727905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0056131938472390175, + "rewards/margins": 0.06652487069368362, + "rewards/rejected": -0.07213805615901947, + "step": 3605 + }, + { + "epoch": 2.867355043685465, + "grad_norm": 48.43893051147461, + "learning_rate": 8.321783316232578e-07, + "log_odds_chosen": 2.6593003273010254, + "log_odds_ratio": -0.08480212092399597, + "logits/chosen": 288.72784423828125, + "logits/rejected": 249.3139190673828, + "logps/chosen": -0.23106858134269714, + "logps/rejected": -1.5768505334854126, + "loss": 0.4155, + "nll_loss": 0.35096269845962524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011553429998457432, + "rewards/margins": 0.06728909909725189, + "rewards/rejected": -0.07884253561496735, + "step": 3610 + }, + { + "epoch": 2.871326449563145, + "grad_norm": 15.93793773651123, + "learning_rate": 8.31602628799857e-07, + "log_odds_chosen": 4.548216819763184, + "log_odds_ratio": -0.022985955700278282, + "logits/chosen": 259.6219482421875, + "logits/rejected": 291.5054931640625, + "logps/chosen": -0.0862610787153244, + "logps/rejected": -1.969438910484314, + "loss": 0.3525, + "nll_loss": 0.5677148103713989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0043130540288984776, + "rewards/margins": 0.09415888041257858, + "rewards/rejected": -0.09847193956375122, + "step": 3615 + }, + { + "epoch": 2.875297855440826, + "grad_norm": 21.008142471313477, + "learning_rate": 8.310281191431671e-07, + "log_odds_chosen": 3.653172254562378, + "log_odds_ratio": -0.05353150516748428, + "logits/chosen": 392.10675048828125, + "logits/rejected": 335.0233459472656, + "logps/chosen": -0.22985415160655975, + "logps/rejected": -1.759158730506897, + "loss": 0.3337, + "nll_loss": 0.3584360182285309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011492708697915077, + "rewards/margins": 0.07646523416042328, + "rewards/rejected": -0.08795793354511261, + "step": 3620 + }, + { + "epoch": 2.8792692613185067, + "grad_norm": 22.55990219116211, + "learning_rate": 8.304547985373998e-07, + "log_odds_chosen": 3.8257369995117188, + "log_odds_ratio": -0.03157994896173477, + "logits/chosen": 312.32489013671875, + "logits/rejected": 237.1319122314453, + "logps/chosen": -0.1275133341550827, + "logps/rejected": -1.4732251167297363, + "loss": 0.37, + "nll_loss": 0.28386086225509644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00637566763907671, + "rewards/margins": 0.06728558987379074, + "rewards/rejected": -0.07366125285625458, + "step": 3625 + }, + { + "epoch": 2.8832406671961874, + "grad_norm": 25.964088439941406, + "learning_rate": 8.298826628866154e-07, + "log_odds_chosen": 3.358013868331909, + "log_odds_ratio": -0.06236337497830391, + "logits/chosen": 359.78125, + "logits/rejected": 213.65756225585938, + "logps/chosen": -0.13523688912391663, + "logps/rejected": -1.4496452808380127, + "loss": 0.3508, + "nll_loss": 0.3368256986141205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006761844269931316, + "rewards/margins": 0.06572041660547256, + "rewards/rejected": -0.07248225808143616, + "step": 3630 + }, + { + "epoch": 2.887212073073868, + "grad_norm": 21.916109085083008, + "learning_rate": 8.293117081146003e-07, + "log_odds_chosen": 2.733748435974121, + "log_odds_ratio": -0.17232218384742737, + "logits/chosen": 307.90667724609375, + "logits/rejected": 298.56878662109375, + "logps/chosen": -0.40919560194015503, + "logps/rejected": -2.092665195465088, + "loss": 0.3532, + "nll_loss": 0.5489149689674377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02045978233218193, + "rewards/margins": 0.08417348563671112, + "rewards/rejected": -0.10463327169418335, + "step": 3635 + }, + { + "epoch": 2.891183478951549, + "grad_norm": 27.160499572753906, + "learning_rate": 8.287419301647449e-07, + "log_odds_chosen": 3.6234652996063232, + "log_odds_ratio": -0.047272827476263046, + "logits/chosen": 290.9279479980469, + "logits/rejected": 207.3817901611328, + "logps/chosen": -0.1130966767668724, + "logps/rejected": -1.4548262357711792, + "loss": 0.3461, + "nll_loss": 0.24585804343223572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005654833745211363, + "rewards/margins": 0.06708647310733795, + "rewards/rejected": -0.0727413147687912, + "step": 3640 + }, + { + "epoch": 2.8951548848292297, + "grad_norm": 21.572660446166992, + "learning_rate": 8.281733249999222e-07, + "log_odds_chosen": 3.359938383102417, + "log_odds_ratio": -0.0552956759929657, + "logits/chosen": 273.9320068359375, + "logits/rejected": 251.6232147216797, + "logps/chosen": -0.14354383945465088, + "logps/rejected": -1.6231215000152588, + "loss": 0.3165, + "nll_loss": 0.369306743144989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007177191786468029, + "rewards/margins": 0.07397888600826263, + "rewards/rejected": -0.08115608245134354, + "step": 3645 + }, + { + "epoch": 2.89912629070691, + "grad_norm": 23.444482803344727, + "learning_rate": 8.27605888602368e-07, + "log_odds_chosen": 3.4784095287323, + "log_odds_ratio": -0.05267126113176346, + "logits/chosen": 394.29229736328125, + "logits/rejected": 270.6805725097656, + "logps/chosen": -0.1418350636959076, + "logps/rejected": -1.267978549003601, + "loss": 0.3543, + "nll_loss": 0.3297954201698303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00709175318479538, + "rewards/margins": 0.056307174265384674, + "rewards/rejected": -0.06339892745018005, + "step": 3650 + }, + { + "epoch": 2.9030976965845907, + "grad_norm": 25.005584716796875, + "learning_rate": 8.270396169735619e-07, + "log_odds_chosen": 2.632509708404541, + "log_odds_ratio": -0.09709839522838593, + "logits/chosen": 368.68621826171875, + "logits/rejected": 293.9627380371094, + "logps/chosen": -0.21916499733924866, + "logps/rejected": -1.4162951707839966, + "loss": 0.3263, + "nll_loss": 0.35514330863952637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010958249680697918, + "rewards/margins": 0.059856511652469635, + "rewards/rejected": -0.07081475853919983, + "step": 3655 + }, + { + "epoch": 2.9070691024622715, + "grad_norm": 20.11078643798828, + "learning_rate": 8.264745061341079e-07, + "log_odds_chosen": 2.6697070598602295, + "log_odds_ratio": -0.08199223130941391, + "logits/chosen": 218.2410430908203, + "logits/rejected": 342.093505859375, + "logps/chosen": -0.21852803230285645, + "logps/rejected": -1.4549704790115356, + "loss": 0.3297, + "nll_loss": 0.33630573749542236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010926402173936367, + "rewards/margins": 0.06182212382555008, + "rewards/rejected": -0.07274852693080902, + "step": 3660 + }, + { + "epoch": 2.911040508339952, + "grad_norm": 26.86299705505371, + "learning_rate": 8.259105521236187e-07, + "log_odds_chosen": 3.63004732131958, + "log_odds_ratio": -0.05410841107368469, + "logits/chosen": 304.13555908203125, + "logits/rejected": 246.46408081054688, + "logps/chosen": -0.12242048978805542, + "logps/rejected": -1.770677924156189, + "loss": 0.3446, + "nll_loss": 0.3197631537914276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006121024489402771, + "rewards/margins": 0.08241288363933563, + "rewards/rejected": -0.0885339081287384, + "step": 3665 + }, + { + "epoch": 2.915011914217633, + "grad_norm": 26.29234504699707, + "learning_rate": 8.253477510005973e-07, + "log_odds_chosen": 3.3279595375061035, + "log_odds_ratio": -0.08358468860387802, + "logits/chosen": 281.2162170410156, + "logits/rejected": 326.6824035644531, + "logps/chosen": -0.15816627442836761, + "logps/rejected": -1.7283798456192017, + "loss": 0.4041, + "nll_loss": 0.24479857087135315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00790831446647644, + "rewards/margins": 0.07851067185401917, + "rewards/rejected": -0.0864189937710762, + "step": 3670 + }, + { + "epoch": 2.9189833200953137, + "grad_norm": 25.961944580078125, + "learning_rate": 8.247860988423226e-07, + "log_odds_chosen": 3.988990068435669, + "log_odds_ratio": -0.034325532615184784, + "logits/chosen": 266.3609924316406, + "logits/rejected": 294.2245788574219, + "logps/chosen": -0.21574988961219788, + "logps/rejected": -2.2639670372009277, + "loss": 0.2977, + "nll_loss": 0.30673372745513916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010787495411932468, + "rewards/margins": 0.10241083800792694, + "rewards/rejected": -0.11319833993911743, + "step": 3675 + }, + { + "epoch": 2.9229547259729944, + "grad_norm": 29.21520233154297, + "learning_rate": 8.24225591744734e-07, + "log_odds_chosen": 3.7201995849609375, + "log_odds_ratio": -0.03867778182029724, + "logits/chosen": 186.59046936035156, + "logits/rejected": 431.41693115234375, + "logps/chosen": -0.11616279184818268, + "logps/rejected": -1.8416305780410767, + "loss": 0.3822, + "nll_loss": 0.302185982465744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005808139685541391, + "rewards/margins": 0.08627338707447052, + "rewards/rejected": -0.09208153188228607, + "step": 3680 + }, + { + "epoch": 2.926926131850675, + "grad_norm": 18.776615142822266, + "learning_rate": 8.23666225822317e-07, + "log_odds_chosen": 3.707352876663208, + "log_odds_ratio": -0.0334990993142128, + "logits/chosen": 292.5160827636719, + "logits/rejected": 330.042236328125, + "logps/chosen": -0.12229110300540924, + "logps/rejected": -1.8790652751922607, + "loss": 0.3169, + "nll_loss": 0.3112315535545349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006114555522799492, + "rewards/margins": 0.08783871680498123, + "rewards/rejected": -0.09395327419042587, + "step": 3685 + }, + { + "epoch": 2.930897537728356, + "grad_norm": 46.925052642822266, + "learning_rate": 8.231079972079914e-07, + "log_odds_chosen": 3.986647367477417, + "log_odds_ratio": -0.13695932924747467, + "logits/chosen": 260.3636779785156, + "logits/rejected": 258.30419921875, + "logps/chosen": -0.30076563358306885, + "logps/rejected": -1.8926414251327515, + "loss": 0.432, + "nll_loss": 0.4053588807582855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015038281679153442, + "rewards/margins": 0.07959380000829697, + "rewards/rejected": -0.09463207423686981, + "step": 3690 + }, + { + "epoch": 2.9348689436060367, + "grad_norm": 25.633981704711914, + "learning_rate": 8.225509020529979e-07, + "log_odds_chosen": 3.198657512664795, + "log_odds_ratio": -0.06324279308319092, + "logits/chosen": 225.9180908203125, + "logits/rejected": 322.01019287109375, + "logps/chosen": -0.17518872022628784, + "logps/rejected": -1.7073516845703125, + "loss": 0.3587, + "nll_loss": 0.3306831121444702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008759436197578907, + "rewards/margins": 0.07660814374685287, + "rewards/rejected": -0.0853675827383995, + "step": 3695 + }, + { + "epoch": 2.9388403494837174, + "grad_norm": 26.146686553955078, + "learning_rate": 8.219949365267865e-07, + "log_odds_chosen": 2.8343605995178223, + "log_odds_ratio": -0.09005337208509445, + "logits/chosen": 366.45263671875, + "logits/rejected": 246.6904754638672, + "logps/chosen": -0.2314903736114502, + "logps/rejected": -1.2461912631988525, + "loss": 0.4265, + "nll_loss": 0.3210602402687073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011574518866837025, + "rewards/margins": 0.05073503777384758, + "rewards/rejected": -0.06230955570936203, + "step": 3700 + }, + { + "epoch": 2.942811755361398, + "grad_norm": 24.32229232788086, + "learning_rate": 8.214400968169071e-07, + "log_odds_chosen": 4.051434516906738, + "log_odds_ratio": -0.035749178379774094, + "logits/chosen": 408.79180908203125, + "logits/rejected": 208.0432586669922, + "logps/chosen": -0.12245174497365952, + "logps/rejected": -1.649357557296753, + "loss": 0.3691, + "nll_loss": 0.27775415778160095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0061225867830216885, + "rewards/margins": 0.076345294713974, + "rewards/rejected": -0.08246787637472153, + "step": 3705 + }, + { + "epoch": 2.946783161239079, + "grad_norm": 20.886632919311523, + "learning_rate": 8.208863791288982e-07, + "log_odds_chosen": 1.9202591180801392, + "log_odds_ratio": -0.16510021686553955, + "logits/chosen": 357.2604064941406, + "logits/rejected": 294.9902038574219, + "logps/chosen": -0.2927519381046295, + "logps/rejected": -1.1146470308303833, + "loss": 0.3704, + "nll_loss": 0.3985922336578369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014637596905231476, + "rewards/margins": 0.04109475016593933, + "rewards/rejected": -0.05573234707117081, + "step": 3710 + }, + { + "epoch": 2.950754567116759, + "grad_norm": 23.93471336364746, + "learning_rate": 8.203337796861792e-07, + "log_odds_chosen": 3.167175769805908, + "log_odds_ratio": -0.11499425023794174, + "logits/chosen": 268.34649658203125, + "logits/rejected": 203.5771484375, + "logps/chosen": -0.3126305639743805, + "logps/rejected": -1.8504225015640259, + "loss": 0.3641, + "nll_loss": 0.4621480405330658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015631528571248055, + "rewards/margins": 0.07688958942890167, + "rewards/rejected": -0.09252111613750458, + "step": 3715 + }, + { + "epoch": 2.95472597299444, + "grad_norm": 16.99188804626465, + "learning_rate": 8.197822947299412e-07, + "log_odds_chosen": 5.456472873687744, + "log_odds_ratio": -0.005662807263433933, + "logits/chosen": 311.07086181640625, + "logits/rejected": 217.932373046875, + "logps/chosen": -0.11500344425439835, + "logps/rejected": -2.622190237045288, + "loss": 0.3117, + "nll_loss": 0.22724008560180664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005750172771513462, + "rewards/margins": 0.12535934150218964, + "rewards/rejected": -0.13110950589179993, + "step": 3720 + }, + { + "epoch": 2.9586973788721207, + "grad_norm": 28.691146850585938, + "learning_rate": 8.192319205190406e-07, + "log_odds_chosen": 4.087296485900879, + "log_odds_ratio": -0.0780719742178917, + "logits/chosen": 328.07684326171875, + "logits/rejected": 221.5873565673828, + "logps/chosen": -0.10672744363546371, + "logps/rejected": -1.4921457767486572, + "loss": 0.418, + "nll_loss": 0.3577578663825989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005336372647434473, + "rewards/margins": 0.06927091628313065, + "rewards/rejected": -0.07460729032754898, + "step": 3725 + }, + { + "epoch": 2.9626687847498014, + "grad_norm": 16.78936004638672, + "learning_rate": 8.186826533298912e-07, + "log_odds_chosen": 3.149646759033203, + "log_odds_ratio": -0.09916869550943375, + "logits/chosen": 464.217041015625, + "logits/rejected": 231.6147918701172, + "logps/chosen": -0.2227720320224762, + "logps/rejected": -1.3256566524505615, + "loss": 0.3351, + "nll_loss": 0.4213685989379883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011138602159917355, + "rewards/margins": 0.055144231766462326, + "rewards/rejected": -0.06628282368183136, + "step": 3730 + }, + { + "epoch": 2.966640190627482, + "grad_norm": 21.047441482543945, + "learning_rate": 8.181344894563601e-07, + "log_odds_chosen": 3.3799023628234863, + "log_odds_ratio": -0.04747181385755539, + "logits/chosen": 225.9113006591797, + "logits/rejected": 278.0364990234375, + "logps/chosen": -0.14476028084754944, + "logps/rejected": -1.603838562965393, + "loss": 0.3064, + "nll_loss": 0.4089042544364929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007238014135509729, + "rewards/margins": 0.07295391708612442, + "rewards/rejected": -0.08019192516803741, + "step": 3735 + }, + { + "epoch": 2.970611596505163, + "grad_norm": 26.87488555908203, + "learning_rate": 8.175874252096609e-07, + "log_odds_chosen": 4.79461669921875, + "log_odds_ratio": -0.02344256453216076, + "logits/chosen": 224.568115234375, + "logits/rejected": 237.93795776367188, + "logps/chosen": -0.08755536377429962, + "logps/rejected": -1.7086296081542969, + "loss": 0.3787, + "nll_loss": 0.4090026915073395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004377768374979496, + "rewards/margins": 0.0810537114739418, + "rewards/rejected": -0.08543147891759872, + "step": 3740 + }, + { + "epoch": 2.9745830023828437, + "grad_norm": 30.589311599731445, + "learning_rate": 8.170414569182505e-07, + "log_odds_chosen": 3.1199939250946045, + "log_odds_ratio": -0.04739534854888916, + "logits/chosen": 223.1153106689453, + "logits/rejected": 348.6710205078125, + "logps/chosen": -0.23181037604808807, + "logps/rejected": -1.9143383502960205, + "loss": 0.4121, + "nll_loss": 0.3163473606109619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011590519919991493, + "rewards/margins": 0.08412639796733856, + "rewards/rejected": -0.0957169160246849, + "step": 3745 + }, + { + "epoch": 2.978554408260524, + "grad_norm": 29.43885612487793, + "learning_rate": 8.164965809277262e-07, + "log_odds_chosen": 2.644817352294922, + "log_odds_ratio": -0.09716804325580597, + "logits/chosen": 293.18695068359375, + "logits/rejected": 317.6696472167969, + "logps/chosen": -0.25146573781967163, + "logps/rejected": -1.4811335802078247, + "loss": 0.3221, + "nll_loss": 0.3455609679222107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012573286890983582, + "rewards/margins": 0.06148339435458183, + "rewards/rejected": -0.07405668497085571, + "step": 3750 + }, + { + "epoch": 2.9825258141382047, + "grad_norm": 16.82297706604004, + "learning_rate": 8.159527936007208e-07, + "log_odds_chosen": 3.3892300128936768, + "log_odds_ratio": -0.04550132527947426, + "logits/chosen": 257.3266296386719, + "logits/rejected": 390.421875, + "logps/chosen": -0.21004052460193634, + "logps/rejected": -1.9455446004867554, + "loss": 0.4137, + "nll_loss": 0.3465914726257324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010502026416361332, + "rewards/margins": 0.08677520602941513, + "rewards/rejected": -0.09727723896503448, + "step": 3755 + }, + { + "epoch": 2.9864972200158855, + "grad_norm": 22.206357955932617, + "learning_rate": 8.154100913168028e-07, + "log_odds_chosen": 3.4398300647735596, + "log_odds_ratio": -0.03233512490987778, + "logits/chosen": 242.57949829101562, + "logits/rejected": 351.6434020996094, + "logps/chosen": -0.1868184357881546, + "logps/rejected": -1.9083659648895264, + "loss": 0.4083, + "nll_loss": 0.27293431758880615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0093409214168787, + "rewards/margins": 0.08607737720012665, + "rewards/rejected": -0.0954183042049408, + "step": 3760 + }, + { + "epoch": 2.990468625893566, + "grad_norm": 30.97701644897461, + "learning_rate": 8.148684704723743e-07, + "log_odds_chosen": 2.4790635108947754, + "log_odds_ratio": -0.24006590247154236, + "logits/chosen": 268.708740234375, + "logits/rejected": 207.7413330078125, + "logps/chosen": -0.33791905641555786, + "logps/rejected": -1.7299648523330688, + "loss": 0.4932, + "nll_loss": 0.571212887763977, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.016895953565835953, + "rewards/margins": 0.06960227340459824, + "rewards/rejected": -0.08649824559688568, + "step": 3765 + }, + { + "epoch": 2.994440031771247, + "grad_norm": 18.902019500732422, + "learning_rate": 8.143279274805705e-07, + "log_odds_chosen": 4.727214813232422, + "log_odds_ratio": -0.02812029980123043, + "logits/chosen": 257.0202941894531, + "logits/rejected": 227.53219604492188, + "logps/chosen": -0.1045892983675003, + "logps/rejected": -1.8573554754257202, + "loss": 0.3164, + "nll_loss": 0.2800999581813812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0052294647321105, + "rewards/margins": 0.08763831108808517, + "rewards/rejected": -0.09286777675151825, + "step": 3770 + }, + { + "epoch": 2.9984114376489277, + "grad_norm": 20.72915267944336, + "learning_rate": 8.137884587711594e-07, + "log_odds_chosen": 3.518254041671753, + "log_odds_ratio": -0.03578583896160126, + "logits/chosen": 275.1228942871094, + "logits/rejected": 329.595458984375, + "logps/chosen": -0.20344781875610352, + "logps/rejected": -2.006162166595459, + "loss": 0.348, + "nll_loss": 0.2927265763282776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010172391310334206, + "rewards/margins": 0.09013573080301285, + "rewards/rejected": -0.1003081351518631, + "step": 3775 + }, + { + "epoch": 3.0, + "eval_log_odds_chosen": 0.3720725178718567, + "eval_log_odds_ratio": -0.699243426322937, + "eval_logits/chosen": 295.62017822265625, + "eval_logits/rejected": 266.0260314941406, + "eval_logps/chosen": -1.3004335165023804, + "eval_logps/rejected": -1.5281699895858765, + "eval_loss": 1.7558966875076294, + "eval_nll_loss": 1.6941194534301758, + "eval_rewards/accuracies": 0.597122311592102, + "eval_rewards/chosen": -0.06502167135477066, + "eval_rewards/margins": 0.011386833153665066, + "eval_rewards/rejected": -0.0764085054397583, + "eval_runtime": 91.0513, + "eval_samples_per_second": 6.073, + "eval_steps_per_second": 1.527, + "step": 3777 + }, + { + "epoch": 3.0, + "step": 3777, "total_flos": 0.0, - "train_loss": 1.8019611810861456, - "train_runtime": 4470.8327, - "train_samples_per_second": 1.126, - "train_steps_per_second": 0.282 + "train_loss": 0.968865410152301, + "train_runtime": 16784.8411, + "train_samples_per_second": 0.9, + "train_steps_per_second": 0.225 } ], "logging_steps": 5, - "max_steps": 1259, + "max_steps": 3777, "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 500, + "num_train_epochs": 3, + "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, - "should_save": false, - "should_training_stop": false + "should_save": true, + "should_training_stop": true }, "attributes": {} }