{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 711, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10548523206751055, "grad_norm": 30.46313288029562, "learning_rate": 3.472222222222222e-07, "log_odds_chosen": 0.16534051299095154, "log_odds_ratio": -0.6545013189315796, "logits/chosen": -0.41492316126823425, "logits/rejected": -0.435069739818573, "logps/chosen": -0.9887133836746216, "logps/rejected": -1.1123149394989014, "loss": 3.875, "nll_loss": 3.5094263553619385, "rewards/accuracies": 0.4300000071525574, "rewards/chosen": -0.09887134283781052, "rewards/margins": 0.012360147200524807, "rewards/rejected": -0.1112314984202385, "step": 25 }, { "epoch": 0.2109704641350211, "grad_norm": 74.88568859723209, "learning_rate": 6.944444444444444e-07, "log_odds_chosen": 0.10536957532167435, "log_odds_ratio": -0.6801052689552307, "logits/chosen": -0.9151662588119507, "logits/rejected": -0.9367119073867798, "logps/chosen": -1.117153286933899, "logps/rejected": -1.173445463180542, "loss": 0.8198, "nll_loss": 0.7042462229728699, "rewards/accuracies": 0.5299999713897705, "rewards/chosen": -0.11171531677246094, "rewards/margins": 0.005629217717796564, "rewards/rejected": -0.11734454333782196, "step": 50 }, { "epoch": 0.31645569620253167, "grad_norm": 4.304125765256844, "learning_rate": 9.953051643192487e-07, "log_odds_chosen": 0.1444670855998993, "log_odds_ratio": -0.6669880747795105, "logits/chosen": -0.8349351286888123, "logits/rejected": -0.8575166463851929, "logps/chosen": -0.9019416570663452, "logps/rejected": -0.9740175604820251, "loss": 0.7274, "nll_loss": 0.6248427033424377, "rewards/accuracies": 0.4399999976158142, "rewards/chosen": -0.09019416570663452, "rewards/margins": 0.007207601796835661, "rewards/rejected": -0.09740178287029266, "step": 75 }, { "epoch": 0.4219409282700422, "grad_norm": 3.3433596753401935, "learning_rate": 9.561815336463224e-07, "log_odds_chosen": 0.18847505748271942, "log_odds_ratio": -0.6345129609107971, "logits/chosen": -0.8627865314483643, "logits/rejected": -0.8871102333068848, "logps/chosen": -0.774340033531189, "logps/rejected": -0.8760174512863159, "loss": 0.6753, "nll_loss": 0.6059034466743469, "rewards/accuracies": 0.5, "rewards/chosen": -0.0774340033531189, "rewards/margins": 0.010167734697461128, "rewards/rejected": -0.08760173618793488, "step": 100 }, { "epoch": 0.5274261603375527, "grad_norm": 6.317691511765167, "learning_rate": 9.170579029733959e-07, "log_odds_chosen": 0.18306466937065125, "log_odds_ratio": -0.6469370126724243, "logits/chosen": -0.8607066869735718, "logits/rejected": -0.8643490672111511, "logps/chosen": -0.5890992283821106, "logps/rejected": -0.6761559247970581, "loss": 0.6409, "nll_loss": 0.6029787659645081, "rewards/accuracies": 0.4699999988079071, "rewards/chosen": -0.05890992656350136, "rewards/margins": 0.00870567373931408, "rewards/rejected": -0.06761559844017029, "step": 125 }, { "epoch": 0.6329113924050633, "grad_norm": 5.056963941311914, "learning_rate": 8.779342723004695e-07, "log_odds_chosen": 0.22126027941703796, "log_odds_ratio": -0.6363757848739624, "logits/chosen": -0.9035418629646301, "logits/rejected": -0.9076377749443054, "logps/chosen": -0.5781998634338379, "logps/rejected": -0.6600324511528015, "loss": 0.6416, "nll_loss": 0.5581729412078857, "rewards/accuracies": 0.5, "rewards/chosen": -0.05781999230384827, "rewards/margins": 0.008183254860341549, "rewards/rejected": -0.06600324809551239, "step": 150 }, { "epoch": 0.7383966244725738, "grad_norm": 3.205932624220109, "learning_rate": 8.38810641627543e-07, "log_odds_chosen": 0.3526044189929962, "log_odds_ratio": -0.5982345342636108, "logits/chosen": -0.8294059634208679, "logits/rejected": -0.8529558777809143, "logps/chosen": -0.5301632881164551, "logps/rejected": -0.661786675453186, "loss": 0.5897, "nll_loss": 0.5430833697319031, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05301632732152939, "rewards/margins": 0.013162333518266678, "rewards/rejected": -0.06617865711450577, "step": 175 }, { "epoch": 0.8438818565400844, "grad_norm": 2.682686637504157, "learning_rate": 7.996870109546165e-07, "log_odds_chosen": 0.2721116840839386, "log_odds_ratio": -0.6195984482765198, "logits/chosen": -0.8821828365325928, "logits/rejected": -0.9078083634376526, "logps/chosen": -0.5987895131111145, "logps/rejected": -0.757358193397522, "loss": 0.6351, "nll_loss": 0.5751867890357971, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05987895652651787, "rewards/margins": 0.01585685834288597, "rewards/rejected": -0.07573581486940384, "step": 200 }, { "epoch": 0.9493670886075949, "grad_norm": 1.943018301402564, "learning_rate": 7.605633802816901e-07, "log_odds_chosen": 0.19566155970096588, "log_odds_ratio": -0.6696605682373047, "logits/chosen": -0.8021306395530701, "logits/rejected": -0.8081836104393005, "logps/chosen": -0.509245753288269, "logps/rejected": -0.6033316254615784, "loss": 0.6222, "nll_loss": 0.5073065161705017, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05092458426952362, "rewards/margins": 0.00940858107060194, "rewards/rejected": -0.06033316254615784, "step": 225 }, { "epoch": 1.0548523206751055, "grad_norm": 1.7595986013614073, "learning_rate": 7.214397496087636e-07, "log_odds_chosen": 0.3115460276603699, "log_odds_ratio": -0.6099376678466797, "logits/chosen": -0.8336898684501648, "logits/rejected": -0.8424633145332336, "logps/chosen": -0.5518218874931335, "logps/rejected": -0.6624475717544556, "loss": 0.599, "nll_loss": 0.506114661693573, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.055182188749313354, "rewards/margins": 0.011062567122280598, "rewards/rejected": -0.06624475121498108, "step": 250 }, { "epoch": 1.160337552742616, "grad_norm": 2.131980857522191, "learning_rate": 6.823161189358372e-07, "log_odds_chosen": 0.3637891113758087, "log_odds_ratio": -0.598861575126648, "logits/chosen": -0.8110294342041016, "logits/rejected": -0.8618900775909424, "logps/chosen": -0.5288816690444946, "logps/rejected": -0.7046244144439697, "loss": 0.6147, "nll_loss": 0.5313621163368225, "rewards/accuracies": 0.6100000143051147, "rewards/chosen": -0.0528881661593914, "rewards/margins": 0.017574286088347435, "rewards/rejected": -0.07046245038509369, "step": 275 }, { "epoch": 1.2658227848101267, "grad_norm": 1.4186554490026797, "learning_rate": 6.431924882629108e-07, "log_odds_chosen": 0.3448210060596466, "log_odds_ratio": -0.590344250202179, "logits/chosen": -0.8463892340660095, "logits/rejected": -0.8793061971664429, "logps/chosen": -0.5530957579612732, "logps/rejected": -0.7298399209976196, "loss": 0.5784, "nll_loss": 0.5125244855880737, "rewards/accuracies": 0.5600000023841858, "rewards/chosen": -0.05530957877635956, "rewards/margins": 0.017674420028924942, "rewards/rejected": -0.0729840025305748, "step": 300 }, { "epoch": 1.371308016877637, "grad_norm": 1.5618238810197242, "learning_rate": 6.040688575899842e-07, "log_odds_chosen": 0.3834618330001831, "log_odds_ratio": -0.5801408290863037, "logits/chosen": -0.7922927141189575, "logits/rejected": -0.807830274105072, "logps/chosen": -0.5033449530601501, "logps/rejected": -0.6853150725364685, "loss": 0.6035, "nll_loss": 0.5220791697502136, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05033449828624725, "rewards/margins": 0.018197014927864075, "rewards/rejected": -0.06853151321411133, "step": 325 }, { "epoch": 1.4767932489451476, "grad_norm": 1.6694346253469523, "learning_rate": 5.649452269170579e-07, "log_odds_chosen": 0.23677273094654083, "log_odds_ratio": -0.6395828127861023, "logits/chosen": -0.7438558340072632, "logits/rejected": -0.76462721824646, "logps/chosen": -0.5047862529754639, "logps/rejected": -0.5797430276870728, "loss": 0.5862, "nll_loss": 0.5142392516136169, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05047862231731415, "rewards/margins": 0.007495685946196318, "rewards/rejected": -0.05797431617975235, "step": 350 }, { "epoch": 1.5822784810126582, "grad_norm": 1.6654016970385481, "learning_rate": 5.258215962441315e-07, "log_odds_chosen": 0.3365418314933777, "log_odds_ratio": -0.5955182313919067, "logits/chosen": -0.761318564414978, "logits/rejected": -0.7731548547744751, "logps/chosen": -0.49533072113990784, "logps/rejected": -0.6420021057128906, "loss": 0.5995, "nll_loss": 0.5266813039779663, "rewards/accuracies": 0.5400000214576721, "rewards/chosen": -0.04953307658433914, "rewards/margins": 0.014667129144072533, "rewards/rejected": -0.06420020759105682, "step": 375 }, { "epoch": 1.6877637130801688, "grad_norm": 1.4385786677568302, "learning_rate": 4.86697965571205e-07, "log_odds_chosen": 0.27425241470336914, "log_odds_ratio": -0.6355130076408386, "logits/chosen": -0.7875126600265503, "logits/rejected": -0.7872836589813232, "logps/chosen": -0.5322539806365967, "logps/rejected": -0.6498347520828247, "loss": 0.5813, "nll_loss": 0.5109381675720215, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05322539806365967, "rewards/margins": 0.011758077889680862, "rewards/rejected": -0.06498347222805023, "step": 400 }, { "epoch": 1.7932489451476794, "grad_norm": 1.3133499382164198, "learning_rate": 4.475743348982786e-07, "log_odds_chosen": 0.4066045880317688, "log_odds_ratio": -0.5856396555900574, "logits/chosen": -0.8323702812194824, "logits/rejected": -0.8558816313743591, "logps/chosen": -0.5287883281707764, "logps/rejected": -0.7097746133804321, "loss": 0.5983, "nll_loss": 0.5335114002227783, "rewards/accuracies": 0.5600000023841858, "rewards/chosen": -0.052878838032484055, "rewards/margins": 0.018098626285791397, "rewards/rejected": -0.07097746431827545, "step": 425 }, { "epoch": 1.8987341772151898, "grad_norm": 1.473901535326238, "learning_rate": 4.084507042253521e-07, "log_odds_chosen": 0.41008836030960083, "log_odds_ratio": -0.5757314562797546, "logits/chosen": -0.8004408478736877, "logits/rejected": -0.8020641207695007, "logps/chosen": -0.550152599811554, "logps/rejected": -0.7004598379135132, "loss": 0.586, "nll_loss": 0.5043174028396606, "rewards/accuracies": 0.5899999737739563, "rewards/chosen": -0.055015262216329575, "rewards/margins": 0.015030724927783012, "rewards/rejected": -0.07004599273204803, "step": 450 }, { "epoch": 2.0042194092827006, "grad_norm": 1.2607531968315342, "learning_rate": 3.6932707355242563e-07, "log_odds_chosen": 0.2668948769569397, "log_odds_ratio": -0.6350302696228027, "logits/chosen": -0.7956358194351196, "logits/rejected": -0.7938474416732788, "logps/chosen": -0.5379875898361206, "logps/rejected": -0.6204279065132141, "loss": 0.5926, "nll_loss": 0.529137372970581, "rewards/accuracies": 0.5299999713897705, "rewards/chosen": -0.05379876494407654, "rewards/margins": 0.008244026452302933, "rewards/rejected": -0.06204278767108917, "step": 475 }, { "epoch": 2.109704641350211, "grad_norm": 1.3082002649896622, "learning_rate": 3.302034428794992e-07, "log_odds_chosen": 0.35187309980392456, "log_odds_ratio": -0.5857384204864502, "logits/chosen": -0.7866548299789429, "logits/rejected": -0.8024720549583435, "logps/chosen": -0.4700208902359009, "logps/rejected": -0.6245195865631104, "loss": 0.579, "nll_loss": 0.506442129611969, "rewards/accuracies": 0.6299999952316284, "rewards/chosen": -0.047002095729112625, "rewards/margins": 0.015449865721166134, "rewards/rejected": -0.06245195493102074, "step": 500 }, { "epoch": 2.2151898734177213, "grad_norm": 1.3044699084998892, "learning_rate": 2.9107981220657274e-07, "log_odds_chosen": 0.2667557895183563, "log_odds_ratio": -0.6398404836654663, "logits/chosen": -0.7439711689949036, "logits/rejected": -0.7557228207588196, "logps/chosen": -0.5503268837928772, "logps/rejected": -0.6489098072052002, "loss": 0.5758, "nll_loss": 0.5791399478912354, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05503269284963608, "rewards/margins": 0.009858297184109688, "rewards/rejected": -0.06489098817110062, "step": 525 }, { "epoch": 2.320675105485232, "grad_norm": 1.2737811980928775, "learning_rate": 2.5195618153364627e-07, "log_odds_chosen": 0.27772146463394165, "log_odds_ratio": -0.6127716898918152, "logits/chosen": -0.7682886719703674, "logits/rejected": -0.7594354152679443, "logps/chosen": -0.5556504726409912, "logps/rejected": -0.6759030222892761, "loss": 0.5782, "nll_loss": 0.5577492713928223, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05556504800915718, "rewards/margins": 0.012025254778563976, "rewards/rejected": -0.06759029626846313, "step": 550 }, { "epoch": 2.4261603375527425, "grad_norm": 1.2627851122190357, "learning_rate": 2.1283255086071985e-07, "log_odds_chosen": 0.49636557698249817, "log_odds_ratio": -0.5460684895515442, "logits/chosen": -0.7931969165802002, "logits/rejected": -0.8067951798439026, "logps/chosen": -0.5168901085853577, "logps/rejected": -0.7222075462341309, "loss": 0.56, "nll_loss": 0.496804803609848, "rewards/accuracies": 0.5699999928474426, "rewards/chosen": -0.05168901011347771, "rewards/margins": 0.02053174003958702, "rewards/rejected": -0.07222075760364532, "step": 575 }, { "epoch": 2.5316455696202533, "grad_norm": 1.5232359717633108, "learning_rate": 1.7370892018779344e-07, "log_odds_chosen": 0.41104409098625183, "log_odds_ratio": -0.5702406167984009, "logits/chosen": -0.7789013385772705, "logits/rejected": -0.793701171875, "logps/chosen": -0.4571213126182556, "logps/rejected": -0.6431244015693665, "loss": 0.5704, "nll_loss": 0.4690850079059601, "rewards/accuracies": 0.5799999833106995, "rewards/chosen": -0.04571213200688362, "rewards/margins": 0.018600303679704666, "rewards/rejected": -0.06431242823600769, "step": 600 }, { "epoch": 2.6371308016877637, "grad_norm": 1.4542420899735284, "learning_rate": 1.3458528951486697e-07, "log_odds_chosen": 0.20925407111644745, "log_odds_ratio": -0.659648597240448, "logits/chosen": -0.6941782236099243, "logits/rejected": -0.710488498210907, "logps/chosen": -0.5375428795814514, "logps/rejected": -0.6434080600738525, "loss": 0.5868, "nll_loss": 0.5830110311508179, "rewards/accuracies": 0.38999998569488525, "rewards/chosen": -0.0537542924284935, "rewards/margins": 0.01058651227504015, "rewards/rejected": -0.06434080749750137, "step": 625 }, { "epoch": 2.742616033755274, "grad_norm": 1.231391960908516, "learning_rate": 9.546165884194053e-08, "log_odds_chosen": 0.3515250086784363, "log_odds_ratio": -0.5891639590263367, "logits/chosen": -0.749308168888092, "logits/rejected": -0.7614193558692932, "logps/chosen": -0.5068987011909485, "logps/rejected": -0.6622204780578613, "loss": 0.5909, "nll_loss": 0.5026033520698547, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05068986862897873, "rewards/margins": 0.015532179735600948, "rewards/rejected": -0.06622204929590225, "step": 650 }, { "epoch": 2.848101265822785, "grad_norm": 1.3722555215938899, "learning_rate": 5.633802816901408e-08, "log_odds_chosen": 0.18586792051792145, "log_odds_ratio": -0.6605378985404968, "logits/chosen": -0.8417736291885376, "logits/rejected": -0.8345857858657837, "logps/chosen": -0.5810900926589966, "logps/rejected": -0.6728335022926331, "loss": 0.5815, "nll_loss": 0.5025544166564941, "rewards/accuracies": 0.5099999904632568, "rewards/chosen": -0.05810901150107384, "rewards/margins": 0.009174343198537827, "rewards/rejected": -0.06728334724903107, "step": 675 }, { "epoch": 2.9535864978902953, "grad_norm": 1.9284456011672904, "learning_rate": 1.7214397496087635e-08, "log_odds_chosen": 0.1927904486656189, "log_odds_ratio": -0.6649494171142578, "logits/chosen": -0.678924024105072, "logits/rejected": -0.6770960092544556, "logps/chosen": -0.5262628793716431, "logps/rejected": -0.580658495426178, "loss": 0.6001, "nll_loss": 0.5695458650588989, "rewards/accuracies": 0.4099999964237213, "rewards/chosen": -0.05262628570199013, "rewards/margins": 0.005439565982669592, "rewards/rejected": -0.05806584656238556, "step": 700 } ], "logging_steps": 25, "max_steps": 711, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }