{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05952380952380952, "grad_norm": 1882.16845703125, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.12500545382499695, "log_odds_ratio": -0.9542725682258606, "logits/chosen": 164.27560424804688, "logits/rejected": 208.2156219482422, "logps/chosen": -14.962623596191406, "logps/rejected": -14.837625503540039, "loss": 15.2102, "nll_loss": 14.645106315612793, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -7.481311798095703, "rewards/margins": -0.06250032037496567, "rewards/rejected": -7.4188127517700195, "step": 5 }, { "epoch": 0.11904761904761904, "grad_norm": 1088.200927734375, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": -0.05857907608151436, "log_odds_ratio": -1.1228755712509155, "logits/chosen": 244.7198486328125, "logits/rejected": 227.472412109375, "logps/chosen": -13.305212020874023, "logps/rejected": -13.2466402053833, "loss": 13.2102, "nll_loss": 12.955018997192383, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -6.652606010437012, "rewards/margins": -0.02928643301129341, "rewards/rejected": -6.62332010269165, "step": 10 }, { "epoch": 0.17857142857142858, "grad_norm": 750.9570922851562, "learning_rate": 7.5e-07, "log_odds_chosen": -0.21123185753822327, "log_odds_ratio": -0.9812790751457214, "logits/chosen": 247.1725311279297, "logits/rejected": 319.17498779296875, "logps/chosen": -8.516304969787598, "logps/rejected": -8.305025100708008, "loss": 8.5319, "nll_loss": 8.219998359680176, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -4.258152484893799, "rewards/margins": -0.10563965886831284, "rewards/rejected": -4.152512550354004, "step": 15 }, { "epoch": 0.23809523809523808, "grad_norm": 150.73776245117188, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.13484135270118713, "log_odds_ratio": -0.7639249563217163, "logits/chosen": 232.5557098388672, "logits/rejected": 278.36358642578125, "logps/chosen": -5.115365505218506, "logps/rejected": -5.248563289642334, "loss": 5.7184, "nll_loss": 5.363820552825928, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.557682752609253, "rewards/margins": 0.06659835577011108, "rewards/rejected": -2.624281644821167, "step": 20 }, { "epoch": 0.2976190476190476, "grad_norm": 132.29954528808594, "learning_rate": 1.25e-06, "log_odds_chosen": -0.15154710412025452, "log_odds_ratio": -0.918846607208252, "logits/chosen": 329.36016845703125, "logits/rejected": 322.80316162109375, "logps/chosen": -3.490060329437256, "logps/rejected": -3.336357593536377, "loss": 3.7837, "nll_loss": 3.5224061012268066, "rewards/accuracies": 0.5, "rewards/chosen": -1.745030164718628, "rewards/margins": -0.07685144990682602, "rewards/rejected": -1.6681787967681885, "step": 25 }, { "epoch": 0.35714285714285715, "grad_norm": 66.39665985107422, "learning_rate": 1.5e-06, "log_odds_chosen": 0.1455865204334259, "log_odds_ratio": -0.7501406669616699, "logits/chosen": 351.40875244140625, "logits/rejected": 335.61932373046875, "logps/chosen": -2.3319332599639893, "logps/rejected": -2.4487643241882324, "loss": 2.9334, "nll_loss": 2.8211100101470947, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1659666299819946, "rewards/margins": 0.05841563269495964, "rewards/rejected": -1.2243821620941162, "step": 30 }, { "epoch": 0.4166666666666667, "grad_norm": 50.658451080322266, "learning_rate": 1.75e-06, "log_odds_chosen": 0.17109766602516174, "log_odds_ratio": -0.6774098873138428, "logits/chosen": 386.70220947265625, "logits/rejected": 384.7711486816406, "logps/chosen": -1.935703992843628, "logps/rejected": -2.0668416023254395, "loss": 2.3732, "nll_loss": 2.1755499839782715, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.967851996421814, "rewards/margins": 0.06556873768568039, "rewards/rejected": -1.0334208011627197, "step": 35 }, { "epoch": 0.47619047619047616, "grad_norm": 50.109127044677734, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.5132101774215698, "log_odds_ratio": -0.58674156665802, "logits/chosen": 395.3487243652344, "logits/rejected": 396.77911376953125, "logps/chosen": -1.6742804050445557, "logps/rejected": -2.092700481414795, "loss": 2.2549, "nll_loss": 1.8625695705413818, "rewards/accuracies": 0.75, "rewards/chosen": -0.8371402025222778, "rewards/margins": 0.20921015739440918, "rewards/rejected": -1.0463502407073975, "step": 40 }, { "epoch": 0.5357142857142857, "grad_norm": 36.12651062011719, "learning_rate": 2.25e-06, "log_odds_chosen": 0.40647760033607483, "log_odds_ratio": -0.7087821364402771, "logits/chosen": 416.9222717285156, "logits/rejected": 409.3716125488281, "logps/chosen": -1.7572282552719116, "logps/rejected": -2.102989435195923, "loss": 2.1445, "nll_loss": 1.97689950466156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8786141276359558, "rewards/margins": 0.1728806048631668, "rewards/rejected": -1.0514947175979614, "step": 45 }, { "epoch": 0.5952380952380952, "grad_norm": 129.66111755371094, "learning_rate": 2.5e-06, "log_odds_chosen": 0.4818713068962097, "log_odds_ratio": -0.607313871383667, "logits/chosen": 385.31231689453125, "logits/rejected": 411.05029296875, "logps/chosen": -1.5724234580993652, "logps/rejected": -1.9852949380874634, "loss": 2.1136, "nll_loss": 1.7941957712173462, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7862117290496826, "rewards/margins": 0.20643571019172668, "rewards/rejected": -0.9926474690437317, "step": 50 }, { "epoch": 0.6547619047619048, "grad_norm": 65.99632263183594, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.3759257197380066, "log_odds_ratio": -0.658983588218689, "logits/chosen": 393.38006591796875, "logits/rejected": 373.91265869140625, "logps/chosen": -1.5574285984039307, "logps/rejected": -1.8452409505844116, "loss": 2.0521, "nll_loss": 1.9158170223236084, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7787142992019653, "rewards/margins": 0.14390619099140167, "rewards/rejected": -0.9226204752922058, "step": 55 }, { "epoch": 0.7142857142857143, "grad_norm": 42.2856559753418, "learning_rate": 3e-06, "log_odds_chosen": 0.6560322642326355, "log_odds_ratio": -0.5206496119499207, "logits/chosen": 394.558349609375, "logits/rejected": 419.30908203125, "logps/chosen": -1.252516746520996, "logps/rejected": -1.736289620399475, "loss": 1.9986, "nll_loss": 1.6350934505462646, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.626258373260498, "rewards/margins": 0.2418864220380783, "rewards/rejected": -0.8681448101997375, "step": 60 }, { "epoch": 0.7738095238095238, "grad_norm": 51.25124740600586, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.20805387198925018, "log_odds_ratio": -0.700042724609375, "logits/chosen": 386.62237548828125, "logits/rejected": 375.99847412109375, "logps/chosen": -1.3062386512756348, "logps/rejected": -1.4444353580474854, "loss": 1.9611, "nll_loss": 1.55000901222229, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6531193256378174, "rewards/margins": 0.06909838318824768, "rewards/rejected": -0.7222176790237427, "step": 65 }, { "epoch": 0.8333333333333334, "grad_norm": 56.384552001953125, "learning_rate": 3.5e-06, "log_odds_chosen": 0.2420281618833542, "log_odds_ratio": -0.6607708930969238, "logits/chosen": 376.65167236328125, "logits/rejected": 374.722900390625, "logps/chosen": -1.2764971256256104, "logps/rejected": -1.4956505298614502, "loss": 1.9944, "nll_loss": 1.656867265701294, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6382485628128052, "rewards/margins": 0.10957670211791992, "rewards/rejected": -0.7478252649307251, "step": 70 }, { "epoch": 0.8928571428571429, "grad_norm": 35.837562561035156, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.43760427832603455, "log_odds_ratio": -0.5639179944992065, "logits/chosen": 369.7068786621094, "logits/rejected": 380.7843017578125, "logps/chosen": -1.254392385482788, "logps/rejected": -1.5828098058700562, "loss": 1.904, "nll_loss": 1.5474170446395874, "rewards/accuracies": 0.75, "rewards/chosen": -0.627196192741394, "rewards/margins": 0.16420873999595642, "rewards/rejected": -0.7914049029350281, "step": 75 }, { "epoch": 0.9523809523809523, "grad_norm": 61.186588287353516, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.20079848170280457, "log_odds_ratio": -0.7042320370674133, "logits/chosen": 372.26202392578125, "logits/rejected": 395.8300476074219, "logps/chosen": -1.294389009475708, "logps/rejected": -1.4353959560394287, "loss": 1.8227, "nll_loss": 1.4922560453414917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.647194504737854, "rewards/margins": 0.07050346583127975, "rewards/rejected": -0.7176979780197144, "step": 80 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.16461706161499023, "eval_log_odds_ratio": -0.6902630925178528, "eval_logits/chosen": 315.19403076171875, "eval_logits/rejected": 257.8447265625, "eval_logps/chosen": -1.2099318504333496, "eval_logps/rejected": -1.348587989807129, "eval_loss": 1.9615823030471802, "eval_nll_loss": 1.6718581914901733, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": -0.6049659252166748, "eval_rewards/margins": 0.06932813674211502, "eval_rewards/rejected": -0.6742939949035645, "eval_runtime": 201.4785, "eval_samples_per_second": 2.745, "eval_steps_per_second": 0.347, "step": 84 }, { "epoch": 1.0119047619047619, "grad_norm": 49.942501068115234, "learning_rate": 4.25e-06, "log_odds_chosen": 0.3827090859413147, "log_odds_ratio": -0.5646133422851562, "logits/chosen": 385.1676330566406, "logits/rejected": 408.2163391113281, "logps/chosen": -1.2135608196258545, "logps/rejected": -1.460314154624939, "loss": 1.8474, "nll_loss": 1.4997951984405518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6067804098129272, "rewards/margins": 0.1233767420053482, "rewards/rejected": -0.7301570773124695, "step": 85 }, { "epoch": 1.0714285714285714, "grad_norm": 121.16907501220703, "learning_rate": 4.5e-06, "log_odds_chosen": 0.7085806131362915, "log_odds_ratio": -0.4863010346889496, "logits/chosen": 366.0420837402344, "logits/rejected": 378.7876281738281, "logps/chosen": -1.073919653892517, "logps/rejected": -1.589540719985962, "loss": 1.6684, "nll_loss": 1.5809502601623535, "rewards/accuracies": 0.75, "rewards/chosen": -0.5369598269462585, "rewards/margins": 0.2578105330467224, "rewards/rejected": -0.794770359992981, "step": 90 }, { "epoch": 1.130952380952381, "grad_norm": 148.60858154296875, "learning_rate": 4.75e-06, "log_odds_chosen": 0.875158965587616, "log_odds_ratio": -0.42892536520957947, "logits/chosen": 411.90411376953125, "logits/rejected": 384.31939697265625, "logps/chosen": -1.1889146566390991, "logps/rejected": -1.8593413829803467, "loss": 1.7409, "nll_loss": 1.6732642650604248, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5944573283195496, "rewards/margins": 0.3352133333683014, "rewards/rejected": -0.9296706914901733, "step": 95 }, { "epoch": 1.1904761904761905, "grad_norm": 21.357654571533203, "learning_rate": 5e-06, "log_odds_chosen": 0.9980722665786743, "log_odds_ratio": -0.40565505623817444, "logits/chosen": 396.3813171386719, "logits/rejected": 391.2807312011719, "logps/chosen": -1.028236985206604, "logps/rejected": -1.7792075872421265, "loss": 1.6731, "nll_loss": 1.3774528503417969, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.514118492603302, "rewards/margins": 0.37548530101776123, "rewards/rejected": -0.8896037936210632, "step": 100 }, { "epoch": 1.25, "grad_norm": 92.72647857666016, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 1.2707650661468506, "log_odds_ratio": -0.3489342927932739, "logits/chosen": 329.43988037109375, "logits/rejected": 359.312744140625, "logps/chosen": -0.9563199877738953, "logps/rejected": -1.8967196941375732, "loss": 1.5343, "nll_loss": 1.4084365367889404, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.47815999388694763, "rewards/margins": 0.4701998233795166, "rewards/rejected": -0.9483598470687866, "step": 105 }, { "epoch": 1.3095238095238095, "grad_norm": 23.097333908081055, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 1.0622062683105469, "log_odds_ratio": -0.40513938665390015, "logits/chosen": 364.5774841308594, "logits/rejected": 378.61834716796875, "logps/chosen": -0.97075355052948, "logps/rejected": -1.6918100118637085, "loss": 1.5379, "nll_loss": 1.3676984310150146, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.48537677526474, "rewards/margins": 0.36052826046943665, "rewards/rejected": -0.8459050059318542, "step": 110 }, { "epoch": 1.369047619047619, "grad_norm": 29.07915496826172, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.8752411603927612, "log_odds_ratio": -0.4561616778373718, "logits/chosen": 365.3411560058594, "logits/rejected": 359.665771484375, "logps/chosen": -1.142798662185669, "logps/rejected": -1.8039169311523438, "loss": 1.4911, "nll_loss": 1.328161597251892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5713993310928345, "rewards/margins": 0.3305591344833374, "rewards/rejected": -0.9019584655761719, "step": 115 }, { "epoch": 1.4285714285714286, "grad_norm": 25.719493865966797, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 1.2218338251113892, "log_odds_ratio": -0.38108527660369873, "logits/chosen": 393.53240966796875, "logits/rejected": 404.69671630859375, "logps/chosen": -0.9683561325073242, "logps/rejected": -1.8645473718643188, "loss": 1.5413, "nll_loss": 1.2556110620498657, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4841780662536621, "rewards/margins": 0.4480956196784973, "rewards/rejected": -0.9322736859321594, "step": 120 }, { "epoch": 1.4880952380952381, "grad_norm": 33.33296585083008, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 0.8903130292892456, "log_odds_ratio": -0.4254421591758728, "logits/chosen": 376.06280517578125, "logits/rejected": 374.7559814453125, "logps/chosen": -1.0784931182861328, "logps/rejected": -1.7109521627426147, "loss": 1.5507, "nll_loss": 1.3247863054275513, "rewards/accuracies": 0.875, "rewards/chosen": -0.5392465591430664, "rewards/margins": 0.3162294924259186, "rewards/rejected": -0.8554760813713074, "step": 125 }, { "epoch": 1.5476190476190477, "grad_norm": 30.346759796142578, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 1.1133849620819092, "log_odds_ratio": -0.4015190601348877, "logits/chosen": 380.5897521972656, "logits/rejected": 411.142333984375, "logps/chosen": -1.0903799533843994, "logps/rejected": -1.9482700824737549, "loss": 1.5205, "nll_loss": 1.3193198442459106, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5451899766921997, "rewards/margins": 0.42894500494003296, "rewards/rejected": -0.9741350412368774, "step": 130 }, { "epoch": 1.6071428571428572, "grad_norm": 23.75062370300293, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 0.8445339202880859, "log_odds_ratio": -0.4824402332305908, "logits/chosen": 383.6838073730469, "logits/rejected": 385.19677734375, "logps/chosen": -1.1689434051513672, "logps/rejected": -1.7730737924575806, "loss": 1.6065, "nll_loss": 1.434819221496582, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5844717025756836, "rewards/margins": 0.3020651936531067, "rewards/rejected": -0.8865368962287903, "step": 135 }, { "epoch": 1.6666666666666665, "grad_norm": 27.057226181030273, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.6634833216667175, "log_odds_ratio": -0.5241434574127197, "logits/chosen": 355.0412902832031, "logits/rejected": 345.8267517089844, "logps/chosen": -0.9373375773429871, "logps/rejected": -1.3723398447036743, "loss": 1.5457, "nll_loss": 1.1873310804367065, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.46866878867149353, "rewards/margins": 0.21750116348266602, "rewards/rejected": -0.6861699223518372, "step": 140 }, { "epoch": 1.7261904761904763, "grad_norm": 24.420185089111328, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": 1.1559978723526, "log_odds_ratio": -0.4130094647407532, "logits/chosen": 353.3649597167969, "logits/rejected": 403.1065368652344, "logps/chosen": -0.9650250673294067, "logps/rejected": -1.8172032833099365, "loss": 1.4955, "nll_loss": 1.2150856256484985, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.48251253366470337, "rewards/margins": 0.4260891377925873, "rewards/rejected": -0.9086016416549683, "step": 145 }, { "epoch": 1.7857142857142856, "grad_norm": 19.80653953552246, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.7758156657218933, "log_odds_ratio": -0.4749727249145508, "logits/chosen": 381.03961181640625, "logits/rejected": 387.2470703125, "logps/chosen": -1.0167145729064941, "logps/rejected": -1.5349647998809814, "loss": 1.5271, "nll_loss": 1.3638825416564941, "rewards/accuracies": 0.75, "rewards/chosen": -0.5083572864532471, "rewards/margins": 0.25912514328956604, "rewards/rejected": -0.7674823999404907, "step": 150 }, { "epoch": 1.8452380952380953, "grad_norm": 16.095684051513672, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.729617714881897, "log_odds_ratio": -0.5468782782554626, "logits/chosen": 389.2290344238281, "logits/rejected": 382.7873840332031, "logps/chosen": -1.1134544610977173, "logps/rejected": -1.6494137048721313, "loss": 1.5063, "nll_loss": 1.3405725955963135, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5567272305488586, "rewards/margins": 0.26797956228256226, "rewards/rejected": -0.8247068524360657, "step": 155 }, { "epoch": 1.9047619047619047, "grad_norm": 27.426509857177734, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.8723602294921875, "log_odds_ratio": -0.5240803956985474, "logits/chosen": 381.7905578613281, "logits/rejected": 403.63665771484375, "logps/chosen": -1.0924057960510254, "logps/rejected": -1.7700122594833374, "loss": 1.5276, "nll_loss": 1.3452767133712769, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5462028980255127, "rewards/margins": 0.3388032019138336, "rewards/rejected": -0.8850061297416687, "step": 160 }, { "epoch": 1.9642857142857144, "grad_norm": 26.49502944946289, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 1.3228471279144287, "log_odds_ratio": -0.42377227544784546, "logits/chosen": 396.58697509765625, "logits/rejected": 413.573974609375, "logps/chosen": -1.0189117193222046, "logps/rejected": -2.0456955432891846, "loss": 1.4803, "nll_loss": 1.2141263484954834, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5094558596611023, "rewards/margins": 0.5133919715881348, "rewards/rejected": -1.0228477716445923, "step": 165 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.25614500045776367, "eval_log_odds_ratio": -0.6718475818634033, "eval_logits/chosen": 328.0206604003906, "eval_logits/rejected": 274.3525695800781, "eval_logps/chosen": -1.0924270153045654, "eval_logps/rejected": -1.3016821146011353, "eval_loss": 1.7681158781051636, "eval_nll_loss": 1.4853813648223877, "eval_rewards/accuracies": 0.5285714268684387, "eval_rewards/chosen": -0.5462135076522827, "eval_rewards/margins": 0.10462753474712372, "eval_rewards/rejected": -0.6508410573005676, "eval_runtime": 201.7398, "eval_samples_per_second": 2.741, "eval_steps_per_second": 0.347, "step": 168 }, { "epoch": 2.0238095238095237, "grad_norm": 20.41162872314453, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 1.3698937892913818, "log_odds_ratio": -0.44814401865005493, "logits/chosen": 386.51275634765625, "logits/rejected": 390.8349304199219, "logps/chosen": -1.1337850093841553, "logps/rejected": -2.192959785461426, "loss": 1.3382, "nll_loss": 1.3109896183013916, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5668925046920776, "rewards/margins": 0.52958744764328, "rewards/rejected": -1.096479892730713, "step": 170 }, { "epoch": 2.0833333333333335, "grad_norm": 29.53614044189453, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 3.1386218070983887, "log_odds_ratio": -0.13156357407569885, "logits/chosen": 374.08306884765625, "logits/rejected": 370.32257080078125, "logps/chosen": -0.6971138119697571, "logps/rejected": -2.928040027618408, "loss": 0.963, "nll_loss": 1.0014644861221313, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.34855690598487854, "rewards/margins": 1.115463137626648, "rewards/rejected": -1.464020013809204, "step": 175 }, { "epoch": 2.142857142857143, "grad_norm": 20.2189884185791, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 2.8878941535949707, "log_odds_ratio": -0.10618897527456284, "logits/chosen": 361.66241455078125, "logits/rejected": 393.54351806640625, "logps/chosen": -0.572822093963623, "logps/rejected": -2.587498664855957, "loss": 0.9084, "nll_loss": 0.8843202590942383, "rewards/accuracies": 1.0, "rewards/chosen": -0.2864110469818115, "rewards/margins": 1.007338285446167, "rewards/rejected": -1.2937493324279785, "step": 180 }, { "epoch": 2.2023809523809526, "grad_norm": 15.425383567810059, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": 2.7572951316833496, "log_odds_ratio": -0.13107402622699738, "logits/chosen": 328.209228515625, "logits/rejected": 318.043701171875, "logps/chosen": -0.5289679765701294, "logps/rejected": -2.471531629562378, "loss": 0.9075, "nll_loss": 0.8783146142959595, "rewards/accuracies": 1.0, "rewards/chosen": -0.2644839882850647, "rewards/margins": 0.9712821245193481, "rewards/rejected": -1.235765814781189, "step": 185 }, { "epoch": 2.261904761904762, "grad_norm": 17.949966430664062, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 2.646030902862549, "log_odds_ratio": -0.1722700595855713, "logits/chosen": 373.1665954589844, "logits/rejected": 363.8042297363281, "logps/chosen": -0.6163553595542908, "logps/rejected": -2.3227782249450684, "loss": 0.8853, "nll_loss": 0.8921284675598145, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3081776797771454, "rewards/margins": 0.8532115817070007, "rewards/rejected": -1.1613891124725342, "step": 190 }, { "epoch": 2.3214285714285716, "grad_norm": 15.254051208496094, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": 3.064070463180542, "log_odds_ratio": -0.11556991189718246, "logits/chosen": 355.3418884277344, "logits/rejected": 384.1878967285156, "logps/chosen": -0.5151618123054504, "logps/rejected": -2.5838632583618164, "loss": 0.9013, "nll_loss": 0.8009279370307922, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2575809061527252, "rewards/margins": 1.0343506336212158, "rewards/rejected": -1.2919316291809082, "step": 195 }, { "epoch": 2.380952380952381, "grad_norm": 18.17316246032715, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 3.8410885334014893, "log_odds_ratio": -0.07899609953165054, "logits/chosen": 351.03741455078125, "logits/rejected": 372.4954528808594, "logps/chosen": -0.4664763808250427, "logps/rejected": -3.2068772315979004, "loss": 0.8742, "nll_loss": 0.7695188522338867, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.23323819041252136, "rewards/margins": 1.3702003955841064, "rewards/rejected": -1.6034386157989502, "step": 200 }, { "epoch": 2.4404761904761907, "grad_norm": 13.679876327514648, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 2.8450164794921875, "log_odds_ratio": -0.16636498272418976, "logits/chosen": 386.46038818359375, "logits/rejected": 371.1928405761719, "logps/chosen": -0.5075671076774597, "logps/rejected": -2.49824595451355, "loss": 0.8909, "nll_loss": 0.7808379530906677, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.25378355383872986, "rewards/margins": 0.9953393936157227, "rewards/rejected": -1.249122977256775, "step": 205 }, { "epoch": 2.5, "grad_norm": 15.127974510192871, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 3.1379058361053467, "log_odds_ratio": -0.0948343575000763, "logits/chosen": 385.96038818359375, "logits/rejected": 346.40997314453125, "logps/chosen": -0.46884965896606445, "logps/rejected": -2.5977988243103027, "loss": 0.8884, "nll_loss": 0.9064348340034485, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.23442482948303223, "rewards/margins": 1.0644747018814087, "rewards/rejected": -1.2988994121551514, "step": 210 }, { "epoch": 2.5595238095238093, "grad_norm": 14.862520217895508, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 3.3548312187194824, "log_odds_ratio": -0.07949019968509674, "logits/chosen": 366.4288024902344, "logits/rejected": 379.4535827636719, "logps/chosen": -0.48801979422569275, "logps/rejected": -2.8395187854766846, "loss": 0.8614, "nll_loss": 0.8571261167526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.24400989711284637, "rewards/margins": 1.1757495403289795, "rewards/rejected": -1.4197593927383423, "step": 215 }, { "epoch": 2.619047619047619, "grad_norm": 14.53584098815918, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 2.9063503742218018, "log_odds_ratio": -0.11560215055942535, "logits/chosen": 353.8319396972656, "logits/rejected": 356.432373046875, "logps/chosen": -0.4644307494163513, "logps/rejected": -2.378437042236328, "loss": 0.9108, "nll_loss": 0.7656416893005371, "rewards/accuracies": 1.0, "rewards/chosen": -0.23221537470817566, "rewards/margins": 0.9570032358169556, "rewards/rejected": -1.189218521118164, "step": 220 }, { "epoch": 2.678571428571429, "grad_norm": 17.206817626953125, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 3.093144178390503, "log_odds_ratio": -0.10332699865102768, "logits/chosen": 360.0953063964844, "logits/rejected": 369.03460693359375, "logps/chosen": -0.6034930944442749, "logps/rejected": -2.8071255683898926, "loss": 0.9202, "nll_loss": 0.8482205271720886, "rewards/accuracies": 1.0, "rewards/chosen": -0.30174654722213745, "rewards/margins": 1.1018160581588745, "rewards/rejected": -1.4035627841949463, "step": 225 }, { "epoch": 2.738095238095238, "grad_norm": 32.90928268432617, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 2.7679336071014404, "log_odds_ratio": -0.1059746965765953, "logits/chosen": 395.354736328125, "logits/rejected": 391.44451904296875, "logps/chosen": -0.48557600378990173, "logps/rejected": -2.241939067840576, "loss": 0.9492, "nll_loss": 0.863217830657959, "rewards/accuracies": 1.0, "rewards/chosen": -0.24278800189495087, "rewards/margins": 0.8781815767288208, "rewards/rejected": -1.120969533920288, "step": 230 }, { "epoch": 2.7976190476190474, "grad_norm": 19.33934783935547, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": 2.7701828479766846, "log_odds_ratio": -0.13227275013923645, "logits/chosen": 384.26873779296875, "logits/rejected": 360.5152893066406, "logps/chosen": -0.5336109399795532, "logps/rejected": -2.403452157974243, "loss": 0.9262, "nll_loss": 0.8939388394355774, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2668054699897766, "rewards/margins": 0.9349204897880554, "rewards/rejected": -1.2017260789871216, "step": 235 }, { "epoch": 2.857142857142857, "grad_norm": 20.41938591003418, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 3.104609251022339, "log_odds_ratio": -0.09935127198696136, "logits/chosen": 356.3617248535156, "logits/rejected": 407.67620849609375, "logps/chosen": -0.5356149673461914, "logps/rejected": -2.744302272796631, "loss": 0.8823, "nll_loss": 0.7891393899917603, "rewards/accuracies": 1.0, "rewards/chosen": -0.2678074836730957, "rewards/margins": 1.1043436527252197, "rewards/rejected": -1.3721511363983154, "step": 240 }, { "epoch": 2.9166666666666665, "grad_norm": 28.204269409179688, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 2.208242893218994, "log_odds_ratio": -0.1713695377111435, "logits/chosen": 374.45611572265625, "logits/rejected": 340.09619140625, "logps/chosen": -0.7715775966644287, "logps/rejected": -2.332731008529663, "loss": 0.9365, "nll_loss": 0.9215513467788696, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.38578879833221436, "rewards/margins": 0.7805767059326172, "rewards/rejected": -1.1663655042648315, "step": 245 }, { "epoch": 2.9761904761904763, "grad_norm": 30.126291275024414, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 2.9035000801086426, "log_odds_ratio": -0.11840321123600006, "logits/chosen": 376.98828125, "logits/rejected": 380.9736328125, "logps/chosen": -0.48586076498031616, "logps/rejected": -2.4321203231811523, "loss": 0.9109, "nll_loss": 0.7184505462646484, "rewards/accuracies": 1.0, "rewards/chosen": -0.24293038249015808, "rewards/margins": 0.9731297492980957, "rewards/rejected": -1.2160601615905762, "step": 250 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.4396049678325653, "eval_log_odds_ratio": -0.6420542597770691, "eval_logits/chosen": 301.1214599609375, "eval_logits/rejected": 240.39068603515625, "eval_logps/chosen": -1.19857656955719, "eval_logps/rejected": -1.5204962491989136, "eval_loss": 1.8577181100845337, "eval_nll_loss": 1.5532194375991821, "eval_rewards/accuracies": 0.6142857074737549, "eval_rewards/chosen": -0.599288284778595, "eval_rewards/margins": 0.16095994412899017, "eval_rewards/rejected": -0.7602481245994568, "eval_runtime": 201.7642, "eval_samples_per_second": 2.741, "eval_steps_per_second": 0.347, "step": 252 }, { "epoch": 3.0, "step": 252, "total_flos": 0.0, "train_loss": 2.2429193542117165, "train_runtime": 13126.4564, "train_samples_per_second": 1.226, "train_steps_per_second": 0.019 } ], "logging_steps": 5, "max_steps": 252, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }