gemma-7b-borpo-low-quality-v4 / trainer_state.json
c-alfano's picture
Model save
44810c5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 252,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05952380952380952,
"grad_norm": 1882.16845703125,
"learning_rate": 2.5000000000000004e-07,
"log_odds_chosen": -0.12500545382499695,
"log_odds_ratio": -0.9542725682258606,
"logits/chosen": 164.27560424804688,
"logits/rejected": 208.2156219482422,
"logps/chosen": -14.962623596191406,
"logps/rejected": -14.837625503540039,
"loss": 15.2102,
"nll_loss": 14.645106315612793,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -7.481311798095703,
"rewards/margins": -0.06250032037496567,
"rewards/rejected": -7.4188127517700195,
"step": 5
},
{
"epoch": 0.11904761904761904,
"grad_norm": 1088.200927734375,
"learning_rate": 5.000000000000001e-07,
"log_odds_chosen": -0.05857907608151436,
"log_odds_ratio": -1.1228755712509155,
"logits/chosen": 244.7198486328125,
"logits/rejected": 227.472412109375,
"logps/chosen": -13.305212020874023,
"logps/rejected": -13.2466402053833,
"loss": 13.2102,
"nll_loss": 12.955018997192383,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -6.652606010437012,
"rewards/margins": -0.02928643301129341,
"rewards/rejected": -6.62332010269165,
"step": 10
},
{
"epoch": 0.17857142857142858,
"grad_norm": 750.9570922851562,
"learning_rate": 7.5e-07,
"log_odds_chosen": -0.21123185753822327,
"log_odds_ratio": -0.9812790751457214,
"logits/chosen": 247.1725311279297,
"logits/rejected": 319.17498779296875,
"logps/chosen": -8.516304969787598,
"logps/rejected": -8.305025100708008,
"loss": 8.5319,
"nll_loss": 8.219998359680176,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -4.258152484893799,
"rewards/margins": -0.10563965886831284,
"rewards/rejected": -4.152512550354004,
"step": 15
},
{
"epoch": 0.23809523809523808,
"grad_norm": 150.73776245117188,
"learning_rate": 1.0000000000000002e-06,
"log_odds_chosen": 0.13484135270118713,
"log_odds_ratio": -0.7639249563217163,
"logits/chosen": 232.5557098388672,
"logits/rejected": 278.36358642578125,
"logps/chosen": -5.115365505218506,
"logps/rejected": -5.248563289642334,
"loss": 5.7184,
"nll_loss": 5.363820552825928,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -2.557682752609253,
"rewards/margins": 0.06659835577011108,
"rewards/rejected": -2.624281644821167,
"step": 20
},
{
"epoch": 0.2976190476190476,
"grad_norm": 132.29954528808594,
"learning_rate": 1.25e-06,
"log_odds_chosen": -0.15154710412025452,
"log_odds_ratio": -0.918846607208252,
"logits/chosen": 329.36016845703125,
"logits/rejected": 322.80316162109375,
"logps/chosen": -3.490060329437256,
"logps/rejected": -3.336357593536377,
"loss": 3.7837,
"nll_loss": 3.5224061012268066,
"rewards/accuracies": 0.5,
"rewards/chosen": -1.745030164718628,
"rewards/margins": -0.07685144990682602,
"rewards/rejected": -1.6681787967681885,
"step": 25
},
{
"epoch": 0.35714285714285715,
"grad_norm": 66.39665985107422,
"learning_rate": 1.5e-06,
"log_odds_chosen": 0.1455865204334259,
"log_odds_ratio": -0.7501406669616699,
"logits/chosen": 351.40875244140625,
"logits/rejected": 335.61932373046875,
"logps/chosen": -2.3319332599639893,
"logps/rejected": -2.4487643241882324,
"loss": 2.9334,
"nll_loss": 2.8211100101470947,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -1.1659666299819946,
"rewards/margins": 0.05841563269495964,
"rewards/rejected": -1.2243821620941162,
"step": 30
},
{
"epoch": 0.4166666666666667,
"grad_norm": 50.658451080322266,
"learning_rate": 1.75e-06,
"log_odds_chosen": 0.17109766602516174,
"log_odds_ratio": -0.6774098873138428,
"logits/chosen": 386.70220947265625,
"logits/rejected": 384.7711486816406,
"logps/chosen": -1.935703992843628,
"logps/rejected": -2.0668416023254395,
"loss": 2.3732,
"nll_loss": 2.1755499839782715,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.967851996421814,
"rewards/margins": 0.06556873768568039,
"rewards/rejected": -1.0334208011627197,
"step": 35
},
{
"epoch": 0.47619047619047616,
"grad_norm": 50.109127044677734,
"learning_rate": 2.0000000000000003e-06,
"log_odds_chosen": 0.5132101774215698,
"log_odds_ratio": -0.58674156665802,
"logits/chosen": 395.3487243652344,
"logits/rejected": 396.77911376953125,
"logps/chosen": -1.6742804050445557,
"logps/rejected": -2.092700481414795,
"loss": 2.2549,
"nll_loss": 1.8625695705413818,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.8371402025222778,
"rewards/margins": 0.20921015739440918,
"rewards/rejected": -1.0463502407073975,
"step": 40
},
{
"epoch": 0.5357142857142857,
"grad_norm": 36.12651062011719,
"learning_rate": 2.25e-06,
"log_odds_chosen": 0.40647760033607483,
"log_odds_ratio": -0.7087821364402771,
"logits/chosen": 416.9222717285156,
"logits/rejected": 409.3716125488281,
"logps/chosen": -1.7572282552719116,
"logps/rejected": -2.102989435195923,
"loss": 2.1445,
"nll_loss": 1.97689950466156,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8786141276359558,
"rewards/margins": 0.1728806048631668,
"rewards/rejected": -1.0514947175979614,
"step": 45
},
{
"epoch": 0.5952380952380952,
"grad_norm": 129.66111755371094,
"learning_rate": 2.5e-06,
"log_odds_chosen": 0.4818713068962097,
"log_odds_ratio": -0.607313871383667,
"logits/chosen": 385.31231689453125,
"logits/rejected": 411.05029296875,
"logps/chosen": -1.5724234580993652,
"logps/rejected": -1.9852949380874634,
"loss": 2.1136,
"nll_loss": 1.7941957712173462,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.7862117290496826,
"rewards/margins": 0.20643571019172668,
"rewards/rejected": -0.9926474690437317,
"step": 50
},
{
"epoch": 0.6547619047619048,
"grad_norm": 65.99632263183594,
"learning_rate": 2.7500000000000004e-06,
"log_odds_chosen": 0.3759257197380066,
"log_odds_ratio": -0.658983588218689,
"logits/chosen": 393.38006591796875,
"logits/rejected": 373.91265869140625,
"logps/chosen": -1.5574285984039307,
"logps/rejected": -1.8452409505844116,
"loss": 2.0521,
"nll_loss": 1.9158170223236084,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.7787142992019653,
"rewards/margins": 0.14390619099140167,
"rewards/rejected": -0.9226204752922058,
"step": 55
},
{
"epoch": 0.7142857142857143,
"grad_norm": 42.2856559753418,
"learning_rate": 3e-06,
"log_odds_chosen": 0.6560322642326355,
"log_odds_ratio": -0.5206496119499207,
"logits/chosen": 394.558349609375,
"logits/rejected": 419.30908203125,
"logps/chosen": -1.252516746520996,
"logps/rejected": -1.736289620399475,
"loss": 1.9986,
"nll_loss": 1.6350934505462646,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.626258373260498,
"rewards/margins": 0.2418864220380783,
"rewards/rejected": -0.8681448101997375,
"step": 60
},
{
"epoch": 0.7738095238095238,
"grad_norm": 51.25124740600586,
"learning_rate": 3.2500000000000002e-06,
"log_odds_chosen": 0.20805387198925018,
"log_odds_ratio": -0.700042724609375,
"logits/chosen": 386.62237548828125,
"logits/rejected": 375.99847412109375,
"logps/chosen": -1.3062386512756348,
"logps/rejected": -1.4444353580474854,
"loss": 1.9611,
"nll_loss": 1.55000901222229,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.6531193256378174,
"rewards/margins": 0.06909838318824768,
"rewards/rejected": -0.7222176790237427,
"step": 65
},
{
"epoch": 0.8333333333333334,
"grad_norm": 56.384552001953125,
"learning_rate": 3.5e-06,
"log_odds_chosen": 0.2420281618833542,
"log_odds_ratio": -0.6607708930969238,
"logits/chosen": 376.65167236328125,
"logits/rejected": 374.722900390625,
"logps/chosen": -1.2764971256256104,
"logps/rejected": -1.4956505298614502,
"loss": 1.9944,
"nll_loss": 1.656867265701294,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.6382485628128052,
"rewards/margins": 0.10957670211791992,
"rewards/rejected": -0.7478252649307251,
"step": 70
},
{
"epoch": 0.8928571428571429,
"grad_norm": 35.837562561035156,
"learning_rate": 3.7500000000000005e-06,
"log_odds_chosen": 0.43760427832603455,
"log_odds_ratio": -0.5639179944992065,
"logits/chosen": 369.7068786621094,
"logits/rejected": 380.7843017578125,
"logps/chosen": -1.254392385482788,
"logps/rejected": -1.5828098058700562,
"loss": 1.904,
"nll_loss": 1.5474170446395874,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.627196192741394,
"rewards/margins": 0.16420873999595642,
"rewards/rejected": -0.7914049029350281,
"step": 75
},
{
"epoch": 0.9523809523809523,
"grad_norm": 61.186588287353516,
"learning_rate": 4.000000000000001e-06,
"log_odds_chosen": 0.20079848170280457,
"log_odds_ratio": -0.7042320370674133,
"logits/chosen": 372.26202392578125,
"logits/rejected": 395.8300476074219,
"logps/chosen": -1.294389009475708,
"logps/rejected": -1.4353959560394287,
"loss": 1.8227,
"nll_loss": 1.4922560453414917,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.647194504737854,
"rewards/margins": 0.07050346583127975,
"rewards/rejected": -0.7176979780197144,
"step": 80
},
{
"epoch": 1.0,
"eval_log_odds_chosen": 0.16461706161499023,
"eval_log_odds_ratio": -0.6902630925178528,
"eval_logits/chosen": 315.19403076171875,
"eval_logits/rejected": 257.8447265625,
"eval_logps/chosen": -1.2099318504333496,
"eval_logps/rejected": -1.348587989807129,
"eval_loss": 1.9615823030471802,
"eval_nll_loss": 1.6718581914901733,
"eval_rewards/accuracies": 0.5,
"eval_rewards/chosen": -0.6049659252166748,
"eval_rewards/margins": 0.06932813674211502,
"eval_rewards/rejected": -0.6742939949035645,
"eval_runtime": 201.4785,
"eval_samples_per_second": 2.745,
"eval_steps_per_second": 0.347,
"step": 84
},
{
"epoch": 1.0119047619047619,
"grad_norm": 49.942501068115234,
"learning_rate": 4.25e-06,
"log_odds_chosen": 0.3827090859413147,
"log_odds_ratio": -0.5646133422851562,
"logits/chosen": 385.1676330566406,
"logits/rejected": 408.2163391113281,
"logps/chosen": -1.2135608196258545,
"logps/rejected": -1.460314154624939,
"loss": 1.8474,
"nll_loss": 1.4997951984405518,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.6067804098129272,
"rewards/margins": 0.1233767420053482,
"rewards/rejected": -0.7301570773124695,
"step": 85
},
{
"epoch": 1.0714285714285714,
"grad_norm": 121.16907501220703,
"learning_rate": 4.5e-06,
"log_odds_chosen": 0.7085806131362915,
"log_odds_ratio": -0.4863010346889496,
"logits/chosen": 366.0420837402344,
"logits/rejected": 378.7876281738281,
"logps/chosen": -1.073919653892517,
"logps/rejected": -1.589540719985962,
"loss": 1.6684,
"nll_loss": 1.5809502601623535,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.5369598269462585,
"rewards/margins": 0.2578105330467224,
"rewards/rejected": -0.794770359992981,
"step": 90
},
{
"epoch": 1.130952380952381,
"grad_norm": 148.60858154296875,
"learning_rate": 4.75e-06,
"log_odds_chosen": 0.875158965587616,
"log_odds_ratio": -0.42892536520957947,
"logits/chosen": 411.90411376953125,
"logits/rejected": 384.31939697265625,
"logps/chosen": -1.1889146566390991,
"logps/rejected": -1.8593413829803467,
"loss": 1.7409,
"nll_loss": 1.6732642650604248,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.5944573283195496,
"rewards/margins": 0.3352133333683014,
"rewards/rejected": -0.9296706914901733,
"step": 95
},
{
"epoch": 1.1904761904761905,
"grad_norm": 21.357654571533203,
"learning_rate": 5e-06,
"log_odds_chosen": 0.9980722665786743,
"log_odds_ratio": -0.40565505623817444,
"logits/chosen": 396.3813171386719,
"logits/rejected": 391.2807312011719,
"logps/chosen": -1.028236985206604,
"logps/rejected": -1.7792075872421265,
"loss": 1.6731,
"nll_loss": 1.3774528503417969,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.514118492603302,
"rewards/margins": 0.37548530101776123,
"rewards/rejected": -0.8896037936210632,
"step": 100
},
{
"epoch": 1.25,
"grad_norm": 92.72647857666016,
"learning_rate": 4.8795003647426654e-06,
"log_odds_chosen": 1.2707650661468506,
"log_odds_ratio": -0.3489342927932739,
"logits/chosen": 329.43988037109375,
"logits/rejected": 359.312744140625,
"logps/chosen": -0.9563199877738953,
"logps/rejected": -1.8967196941375732,
"loss": 1.5343,
"nll_loss": 1.4084365367889404,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.47815999388694763,
"rewards/margins": 0.4701998233795166,
"rewards/rejected": -0.9483598470687866,
"step": 105
},
{
"epoch": 1.3095238095238095,
"grad_norm": 23.097333908081055,
"learning_rate": 4.767312946227961e-06,
"log_odds_chosen": 1.0622062683105469,
"log_odds_ratio": -0.40513938665390015,
"logits/chosen": 364.5774841308594,
"logits/rejected": 378.61834716796875,
"logps/chosen": -0.97075355052948,
"logps/rejected": -1.6918100118637085,
"loss": 1.5379,
"nll_loss": 1.3676984310150146,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.48537677526474,
"rewards/margins": 0.36052826046943665,
"rewards/rejected": -0.8459050059318542,
"step": 110
},
{
"epoch": 1.369047619047619,
"grad_norm": 29.07915496826172,
"learning_rate": 4.662524041201569e-06,
"log_odds_chosen": 0.8752411603927612,
"log_odds_ratio": -0.4561616778373718,
"logits/chosen": 365.3411560058594,
"logits/rejected": 359.665771484375,
"logps/chosen": -1.142798662185669,
"logps/rejected": -1.8039169311523438,
"loss": 1.4911,
"nll_loss": 1.328161597251892,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.5713993310928345,
"rewards/margins": 0.3305591344833374,
"rewards/rejected": -0.9019584655761719,
"step": 115
},
{
"epoch": 1.4285714285714286,
"grad_norm": 25.719493865966797,
"learning_rate": 4.564354645876385e-06,
"log_odds_chosen": 1.2218338251113892,
"log_odds_ratio": -0.38108527660369873,
"logits/chosen": 393.53240966796875,
"logits/rejected": 404.69671630859375,
"logps/chosen": -0.9683561325073242,
"logps/rejected": -1.8645473718643188,
"loss": 1.5413,
"nll_loss": 1.2556110620498657,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.4841780662536621,
"rewards/margins": 0.4480956196784973,
"rewards/rejected": -0.9322736859321594,
"step": 120
},
{
"epoch": 1.4880952380952381,
"grad_norm": 33.33296585083008,
"learning_rate": 4.47213595499958e-06,
"log_odds_chosen": 0.8903130292892456,
"log_odds_ratio": -0.4254421591758728,
"logits/chosen": 376.06280517578125,
"logits/rejected": 374.7559814453125,
"logps/chosen": -1.0784931182861328,
"logps/rejected": -1.7109521627426147,
"loss": 1.5507,
"nll_loss": 1.3247863054275513,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5392465591430664,
"rewards/margins": 0.3162294924259186,
"rewards/rejected": -0.8554760813713074,
"step": 125
},
{
"epoch": 1.5476190476190477,
"grad_norm": 30.346759796142578,
"learning_rate": 4.385290096535147e-06,
"log_odds_chosen": 1.1133849620819092,
"log_odds_ratio": -0.4015190601348877,
"logits/chosen": 380.5897521972656,
"logits/rejected": 411.142333984375,
"logps/chosen": -1.0903799533843994,
"logps/rejected": -1.9482700824737549,
"loss": 1.5205,
"nll_loss": 1.3193198442459106,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.5451899766921997,
"rewards/margins": 0.42894500494003296,
"rewards/rejected": -0.9741350412368774,
"step": 130
},
{
"epoch": 1.6071428571428572,
"grad_norm": 23.75062370300293,
"learning_rate": 4.303314829119352e-06,
"log_odds_chosen": 0.8445339202880859,
"log_odds_ratio": -0.4824402332305908,
"logits/chosen": 383.6838073730469,
"logits/rejected": 385.19677734375,
"logps/chosen": -1.1689434051513672,
"logps/rejected": -1.7730737924575806,
"loss": 1.6065,
"nll_loss": 1.434819221496582,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.5844717025756836,
"rewards/margins": 0.3020651936531067,
"rewards/rejected": -0.8865368962287903,
"step": 135
},
{
"epoch": 1.6666666666666665,
"grad_norm": 27.057226181030273,
"learning_rate": 4.2257712736425835e-06,
"log_odds_chosen": 0.6634833216667175,
"log_odds_ratio": -0.5241434574127197,
"logits/chosen": 355.0412902832031,
"logits/rejected": 345.8267517089844,
"logps/chosen": -0.9373375773429871,
"logps/rejected": -1.3723398447036743,
"loss": 1.5457,
"nll_loss": 1.1873310804367065,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.46866878867149353,
"rewards/margins": 0.21750116348266602,
"rewards/rejected": -0.6861699223518372,
"step": 140
},
{
"epoch": 1.7261904761904763,
"grad_norm": 24.420185089111328,
"learning_rate": 4.1522739926869985e-06,
"log_odds_chosen": 1.1559978723526,
"log_odds_ratio": -0.4130094647407532,
"logits/chosen": 353.3649597167969,
"logits/rejected": 403.1065368652344,
"logps/chosen": -0.9650250673294067,
"logps/rejected": -1.8172032833099365,
"loss": 1.4955,
"nll_loss": 1.2150856256484985,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.48251253366470337,
"rewards/margins": 0.4260891377925873,
"rewards/rejected": -0.9086016416549683,
"step": 145
},
{
"epoch": 1.7857142857142856,
"grad_norm": 19.80653953552246,
"learning_rate": 4.082482904638631e-06,
"log_odds_chosen": 0.7758156657218933,
"log_odds_ratio": -0.4749727249145508,
"logits/chosen": 381.03961181640625,
"logits/rejected": 387.2470703125,
"logps/chosen": -1.0167145729064941,
"logps/rejected": -1.5349647998809814,
"loss": 1.5271,
"nll_loss": 1.3638825416564941,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.5083572864532471,
"rewards/margins": 0.25912514328956604,
"rewards/rejected": -0.7674823999404907,
"step": 150
},
{
"epoch": 1.8452380952380953,
"grad_norm": 16.095684051513672,
"learning_rate": 4.016096644512495e-06,
"log_odds_chosen": 0.729617714881897,
"log_odds_ratio": -0.5468782782554626,
"logits/chosen": 389.2290344238281,
"logits/rejected": 382.7873840332031,
"logps/chosen": -1.1134544610977173,
"logps/rejected": -1.6494137048721313,
"loss": 1.5063,
"nll_loss": 1.3405725955963135,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5567272305488586,
"rewards/margins": 0.26797956228256226,
"rewards/rejected": -0.8247068524360657,
"step": 155
},
{
"epoch": 1.9047619047619047,
"grad_norm": 27.426509857177734,
"learning_rate": 3.952847075210474e-06,
"log_odds_chosen": 0.8723602294921875,
"log_odds_ratio": -0.5240803956985474,
"logits/chosen": 381.7905578613281,
"logits/rejected": 403.63665771484375,
"logps/chosen": -1.0924057960510254,
"logps/rejected": -1.7700122594833374,
"loss": 1.5276,
"nll_loss": 1.3452767133712769,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.5462028980255127,
"rewards/margins": 0.3388032019138336,
"rewards/rejected": -0.8850061297416687,
"step": 160
},
{
"epoch": 1.9642857142857144,
"grad_norm": 26.49502944946289,
"learning_rate": 3.892494720807615e-06,
"log_odds_chosen": 1.3228471279144287,
"log_odds_ratio": -0.42377227544784546,
"logits/chosen": 396.58697509765625,
"logits/rejected": 413.573974609375,
"logps/chosen": -1.0189117193222046,
"logps/rejected": -2.0456955432891846,
"loss": 1.4803,
"nll_loss": 1.2141263484954834,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5094558596611023,
"rewards/margins": 0.5133919715881348,
"rewards/rejected": -1.0228477716445923,
"step": 165
},
{
"epoch": 2.0,
"eval_log_odds_chosen": 0.25614500045776367,
"eval_log_odds_ratio": -0.6718475818634033,
"eval_logits/chosen": 328.0206604003906,
"eval_logits/rejected": 274.3525695800781,
"eval_logps/chosen": -1.0924270153045654,
"eval_logps/rejected": -1.3016821146011353,
"eval_loss": 1.7681158781051636,
"eval_nll_loss": 1.4853813648223877,
"eval_rewards/accuracies": 0.5285714268684387,
"eval_rewards/chosen": -0.5462135076522827,
"eval_rewards/margins": 0.10462753474712372,
"eval_rewards/rejected": -0.6508410573005676,
"eval_runtime": 201.7398,
"eval_samples_per_second": 2.741,
"eval_steps_per_second": 0.347,
"step": 168
},
{
"epoch": 2.0238095238095237,
"grad_norm": 20.41162872314453,
"learning_rate": 3.834824944236852e-06,
"log_odds_chosen": 1.3698937892913818,
"log_odds_ratio": -0.44814401865005493,
"logits/chosen": 386.51275634765625,
"logits/rejected": 390.8349304199219,
"logps/chosen": -1.1337850093841553,
"logps/rejected": -2.192959785461426,
"loss": 1.3382,
"nll_loss": 1.3109896183013916,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.5668925046920776,
"rewards/margins": 0.52958744764328,
"rewards/rejected": -1.096479892730713,
"step": 170
},
{
"epoch": 2.0833333333333335,
"grad_norm": 29.53614044189453,
"learning_rate": 3.7796447300922724e-06,
"log_odds_chosen": 3.1386218070983887,
"log_odds_ratio": -0.13156357407569885,
"logits/chosen": 374.08306884765625,
"logits/rejected": 370.32257080078125,
"logps/chosen": -0.6971138119697571,
"logps/rejected": -2.928040027618408,
"loss": 0.963,
"nll_loss": 1.0014644861221313,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.34855690598487854,
"rewards/margins": 1.115463137626648,
"rewards/rejected": -1.464020013809204,
"step": 175
},
{
"epoch": 2.142857142857143,
"grad_norm": 20.2189884185791,
"learning_rate": 3.72677996249965e-06,
"log_odds_chosen": 2.8878941535949707,
"log_odds_ratio": -0.10618897527456284,
"logits/chosen": 361.66241455078125,
"logits/rejected": 393.54351806640625,
"logps/chosen": -0.572822093963623,
"logps/rejected": -2.587498664855957,
"loss": 0.9084,
"nll_loss": 0.8843202590942383,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2864110469818115,
"rewards/margins": 1.007338285446167,
"rewards/rejected": -1.2937493324279785,
"step": 180
},
{
"epoch": 2.2023809523809526,
"grad_norm": 15.425383567810059,
"learning_rate": 3.6760731104690393e-06,
"log_odds_chosen": 2.7572951316833496,
"log_odds_ratio": -0.13107402622699738,
"logits/chosen": 328.209228515625,
"logits/rejected": 318.043701171875,
"logps/chosen": -0.5289679765701294,
"logps/rejected": -2.471531629562378,
"loss": 0.9075,
"nll_loss": 0.8783146142959595,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2644839882850647,
"rewards/margins": 0.9712821245193481,
"rewards/rejected": -1.235765814781189,
"step": 185
},
{
"epoch": 2.261904761904762,
"grad_norm": 17.949966430664062,
"learning_rate": 3.6273812505500587e-06,
"log_odds_chosen": 2.646030902862549,
"log_odds_ratio": -0.1722700595855713,
"logits/chosen": 373.1665954589844,
"logits/rejected": 363.8042297363281,
"logps/chosen": -0.6163553595542908,
"logps/rejected": -2.3227782249450684,
"loss": 0.8853,
"nll_loss": 0.8921284675598145,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.3081776797771454,
"rewards/margins": 0.8532115817070007,
"rewards/rejected": -1.1613891124725342,
"step": 190
},
{
"epoch": 2.3214285714285716,
"grad_norm": 15.254051208496094,
"learning_rate": 3.5805743701971648e-06,
"log_odds_chosen": 3.064070463180542,
"log_odds_ratio": -0.11556991189718246,
"logits/chosen": 355.3418884277344,
"logits/rejected": 384.1878967285156,
"logps/chosen": -0.5151618123054504,
"logps/rejected": -2.5838632583618164,
"loss": 0.9013,
"nll_loss": 0.8009279370307922,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2575809061527252,
"rewards/margins": 1.0343506336212158,
"rewards/rejected": -1.2919316291809082,
"step": 195
},
{
"epoch": 2.380952380952381,
"grad_norm": 18.17316246032715,
"learning_rate": 3.5355339059327378e-06,
"log_odds_chosen": 3.8410885334014893,
"log_odds_ratio": -0.07899609953165054,
"logits/chosen": 351.03741455078125,
"logits/rejected": 372.4954528808594,
"logps/chosen": -0.4664763808250427,
"logps/rejected": -3.2068772315979004,
"loss": 0.8742,
"nll_loss": 0.7695188522338867,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.23323819041252136,
"rewards/margins": 1.3702003955841064,
"rewards/rejected": -1.6034386157989502,
"step": 200
},
{
"epoch": 2.4404761904761907,
"grad_norm": 13.679876327514648,
"learning_rate": 3.4921514788478916e-06,
"log_odds_chosen": 2.8450164794921875,
"log_odds_ratio": -0.16636498272418976,
"logits/chosen": 386.46038818359375,
"logits/rejected": 371.1928405761719,
"logps/chosen": -0.5075671076774597,
"logps/rejected": -2.49824595451355,
"loss": 0.8909,
"nll_loss": 0.7808379530906677,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.25378355383872986,
"rewards/margins": 0.9953393936157227,
"rewards/rejected": -1.249122977256775,
"step": 205
},
{
"epoch": 2.5,
"grad_norm": 15.127974510192871,
"learning_rate": 3.450327796711771e-06,
"log_odds_chosen": 3.1379058361053467,
"log_odds_ratio": -0.0948343575000763,
"logits/chosen": 385.96038818359375,
"logits/rejected": 346.40997314453125,
"logps/chosen": -0.46884965896606445,
"logps/rejected": -2.5977988243103027,
"loss": 0.8884,
"nll_loss": 0.9064348340034485,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.23442482948303223,
"rewards/margins": 1.0644747018814087,
"rewards/rejected": -1.2988994121551514,
"step": 210
},
{
"epoch": 2.5595238095238093,
"grad_norm": 14.862520217895508,
"learning_rate": 3.409971697352368e-06,
"log_odds_chosen": 3.3548312187194824,
"log_odds_ratio": -0.07949019968509674,
"logits/chosen": 366.4288024902344,
"logits/rejected": 379.4535827636719,
"logps/chosen": -0.48801979422569275,
"logps/rejected": -2.8395187854766846,
"loss": 0.8614,
"nll_loss": 0.8571261167526245,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.24400989711284637,
"rewards/margins": 1.1757495403289795,
"rewards/rejected": -1.4197593927383423,
"step": 215
},
{
"epoch": 2.619047619047619,
"grad_norm": 14.53584098815918,
"learning_rate": 3.3709993123162106e-06,
"log_odds_chosen": 2.9063503742218018,
"log_odds_ratio": -0.11560215055942535,
"logits/chosen": 353.8319396972656,
"logits/rejected": 356.432373046875,
"logps/chosen": -0.4644307494163513,
"logps/rejected": -2.378437042236328,
"loss": 0.9108,
"nll_loss": 0.7656416893005371,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.23221537470817566,
"rewards/margins": 0.9570032358169556,
"rewards/rejected": -1.189218521118164,
"step": 220
},
{
"epoch": 2.678571428571429,
"grad_norm": 17.206817626953125,
"learning_rate": 3.3333333333333333e-06,
"log_odds_chosen": 3.093144178390503,
"log_odds_ratio": -0.10332699865102768,
"logits/chosen": 360.0953063964844,
"logits/rejected": 369.03460693359375,
"logps/chosen": -0.6034930944442749,
"logps/rejected": -2.8071255683898926,
"loss": 0.9202,
"nll_loss": 0.8482205271720886,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.30174654722213745,
"rewards/margins": 1.1018160581588745,
"rewards/rejected": -1.4035627841949463,
"step": 225
},
{
"epoch": 2.738095238095238,
"grad_norm": 32.90928268432617,
"learning_rate": 3.296902366978936e-06,
"log_odds_chosen": 2.7679336071014404,
"log_odds_ratio": -0.1059746965765953,
"logits/chosen": 395.354736328125,
"logits/rejected": 391.44451904296875,
"logps/chosen": -0.48557600378990173,
"logps/rejected": -2.241939067840576,
"loss": 0.9492,
"nll_loss": 0.863217830657959,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.24278800189495087,
"rewards/margins": 0.8781815767288208,
"rewards/rejected": -1.120969533920288,
"step": 230
},
{
"epoch": 2.7976190476190474,
"grad_norm": 19.33934783935547,
"learning_rate": 3.2616403652672114e-06,
"log_odds_chosen": 2.7701828479766846,
"log_odds_ratio": -0.13227275013923645,
"logits/chosen": 384.26873779296875,
"logits/rejected": 360.5152893066406,
"logps/chosen": -0.5336109399795532,
"logps/rejected": -2.403452157974243,
"loss": 0.9262,
"nll_loss": 0.8939388394355774,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2668054699897766,
"rewards/margins": 0.9349204897880554,
"rewards/rejected": -1.2017260789871216,
"step": 235
},
{
"epoch": 2.857142857142857,
"grad_norm": 20.41938591003418,
"learning_rate": 3.2274861218395142e-06,
"log_odds_chosen": 3.104609251022339,
"log_odds_ratio": -0.09935127198696136,
"logits/chosen": 356.3617248535156,
"logits/rejected": 407.67620849609375,
"logps/chosen": -0.5356149673461914,
"logps/rejected": -2.744302272796631,
"loss": 0.8823,
"nll_loss": 0.7891393899917603,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2678074836730957,
"rewards/margins": 1.1043436527252197,
"rewards/rejected": -1.3721511363983154,
"step": 240
},
{
"epoch": 2.9166666666666665,
"grad_norm": 28.204269409179688,
"learning_rate": 3.1943828249997e-06,
"log_odds_chosen": 2.208242893218994,
"log_odds_ratio": -0.1713695377111435,
"logits/chosen": 374.45611572265625,
"logits/rejected": 340.09619140625,
"logps/chosen": -0.7715775966644287,
"logps/rejected": -2.332731008529663,
"loss": 0.9365,
"nll_loss": 0.9215513467788696,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.38578879833221436,
"rewards/margins": 0.7805767059326172,
"rewards/rejected": -1.1663655042648315,
"step": 245
},
{
"epoch": 2.9761904761904763,
"grad_norm": 30.126291275024414,
"learning_rate": 3.1622776601683796e-06,
"log_odds_chosen": 2.9035000801086426,
"log_odds_ratio": -0.11840321123600006,
"logits/chosen": 376.98828125,
"logits/rejected": 380.9736328125,
"logps/chosen": -0.48586076498031616,
"logps/rejected": -2.4321203231811523,
"loss": 0.9109,
"nll_loss": 0.7184505462646484,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.24293038249015808,
"rewards/margins": 0.9731297492980957,
"rewards/rejected": -1.2160601615905762,
"step": 250
},
{
"epoch": 3.0,
"eval_log_odds_chosen": 0.4396049678325653,
"eval_log_odds_ratio": -0.6420542597770691,
"eval_logits/chosen": 301.1214599609375,
"eval_logits/rejected": 240.39068603515625,
"eval_logps/chosen": -1.19857656955719,
"eval_logps/rejected": -1.5204962491989136,
"eval_loss": 1.8577181100845337,
"eval_nll_loss": 1.5532194375991821,
"eval_rewards/accuracies": 0.6142857074737549,
"eval_rewards/chosen": -0.599288284778595,
"eval_rewards/margins": 0.16095994412899017,
"eval_rewards/rejected": -0.7602481245994568,
"eval_runtime": 201.7642,
"eval_samples_per_second": 2.741,
"eval_steps_per_second": 0.347,
"step": 252
},
{
"epoch": 3.0,
"step": 252,
"total_flos": 0.0,
"train_loss": 2.2429193542117165,
"train_runtime": 13126.4564,
"train_samples_per_second": 1.226,
"train_steps_per_second": 0.019
}
],
"logging_steps": 5,
"max_steps": 252,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}