hZzy's picture
Model save
c968b9f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992914501653283,
"eval_steps": 50,
"global_step": 1056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 17675.58033930074,
"learning_rate": 4.716981132075472e-09,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 169.5214,
"objective": 153.4677734375,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3618059456348419,
"step": 1,
"wo_beta": 14.83154582977295
},
{
"dpo_loss": 0.6930959224700928,
"epoch": 0.014170996693434105,
"grad_norm": 16812.231100839916,
"learning_rate": 2.3584905660377358e-08,
"logits": -1.4290882349014282,
"logps": -83.8636474609375,
"loss": 181.7078,
"objective": 168.5659942626953,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4895833432674408,
"ranking_simple": 0.4895833432674408,
"regularize": 0.40367603302001953,
"step": 5,
"wo_beta": 16.679981231689453
},
{
"dpo_loss": 0.6930798292160034,
"epoch": 0.02834199338686821,
"grad_norm": 18597.19901899509,
"learning_rate": 4.7169811320754715e-08,
"logits": -1.4008352756500244,
"logps": -84.84938049316406,
"loss": 177.1073,
"objective": 170.35797119140625,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.4039422273635864,
"step": 10,
"wo_beta": 15.222180366516113
},
{
"dpo_loss": 0.6921038031578064,
"epoch": 0.042512990080302314,
"grad_norm": 17855.275799007577,
"learning_rate": 7.075471698113207e-08,
"logits": -1.538023829460144,
"logps": -84.5517578125,
"loss": 178.9814,
"objective": 187.4513702392578,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.4422657787799835,
"step": 15,
"wo_beta": 15.718367576599121
},
{
"dpo_loss": 0.6917796730995178,
"epoch": 0.05668398677373642,
"grad_norm": 17564.60110673315,
"learning_rate": 9.433962264150943e-08,
"logits": -1.3617039918899536,
"logps": -83.66792297363281,
"loss": 185.8199,
"objective": 204.0640411376953,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.44205835461616516,
"step": 20,
"wo_beta": 16.52640151977539
},
{
"dpo_loss": 0.6927011013031006,
"epoch": 0.07085498346717052,
"grad_norm": 16991.775752313566,
"learning_rate": 1.1792452830188679e-07,
"logits": -1.3692513704299927,
"logps": -83.765869140625,
"loss": 182.1115,
"objective": 173.06422424316406,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.40760377049446106,
"step": 25,
"wo_beta": 15.608158111572266
},
{
"dpo_loss": 0.6904457211494446,
"epoch": 0.08502598016060463,
"grad_norm": 14856.204222337537,
"learning_rate": 1.4150943396226414e-07,
"logits": -1.4308700561523438,
"logps": -83.563232421875,
"loss": 181.7541,
"objective": 176.98880004882812,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.43005380034446716,
"step": 30,
"wo_beta": 17.01230812072754
},
{
"dpo_loss": 0.6906622648239136,
"epoch": 0.09919697685403873,
"grad_norm": 16081.157641472842,
"learning_rate": 1.650943396226415e-07,
"logits": -1.4087789058685303,
"logps": -82.7640151977539,
"loss": 184.1344,
"objective": 172.8912811279297,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.37934428453445435,
"step": 35,
"wo_beta": 16.152484893798828
},
{
"dpo_loss": 0.6896480917930603,
"epoch": 0.11336797354747284,
"grad_norm": 17056.964984105944,
"learning_rate": 1.8867924528301886e-07,
"logits": -1.4006307125091553,
"logps": -83.35142517089844,
"loss": 188.1977,
"objective": 182.53704833984375,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.40673011541366577,
"step": 40,
"wo_beta": 14.254140853881836
},
{
"dpo_loss": 0.6860460638999939,
"epoch": 0.12753897024090693,
"grad_norm": 15541.887109298903,
"learning_rate": 2.1226415094339622e-07,
"logits": -1.4170690774917603,
"logps": -83.82962799072266,
"loss": 172.0023,
"objective": 184.33473205566406,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.41498348116874695,
"step": 45,
"wo_beta": 14.2799711227417
},
{
"dpo_loss": 0.6840464472770691,
"epoch": 0.14170996693434104,
"grad_norm": 16674.096437377164,
"learning_rate": 2.3584905660377358e-07,
"logits": -1.4327392578125,
"logps": -84.8567123413086,
"loss": 182.5182,
"objective": 187.45941162109375,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.43751442432403564,
"step": 50,
"wo_beta": 15.616755485534668
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.689544677734375,
"eval_logits": -1.4199916124343872,
"eval_logps": -90.85165405273438,
"eval_loss": 182.50025939941406,
"eval_objective": 180.4892578125,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5248447060585022,
"eval_regularize": 0.4092595875263214,
"eval_runtime": 369.2017,
"eval_samples_per_second": 15.682,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 16.310007095336914,
"step": 50
},
{
"dpo_loss": 0.6828119158744812,
"epoch": 0.15588096362777515,
"grad_norm": 17241.419986006367,
"learning_rate": 2.5943396226415094e-07,
"logits": -1.3938590288162231,
"logps": -84.56362915039062,
"loss": 171.7727,
"objective": 174.47201538085938,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4833333194255829,
"regularize": 0.3751158118247986,
"step": 55,
"wo_beta": 15.145721435546875
},
{
"dpo_loss": 0.6828226447105408,
"epoch": 0.17005196032120926,
"grad_norm": 16193.112384702756,
"learning_rate": 2.830188679245283e-07,
"logits": -1.325377345085144,
"logps": -85.11466217041016,
"loss": 175.0018,
"objective": 174.82723999023438,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41120079159736633,
"step": 60,
"wo_beta": 14.497312545776367
},
{
"dpo_loss": 0.673675537109375,
"epoch": 0.18422295701464336,
"grad_norm": 16474.473591772632,
"learning_rate": 3.066037735849056e-07,
"logits": -1.4237332344055176,
"logps": -82.10260772705078,
"loss": 174.1747,
"objective": 163.51272583007812,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.3746616542339325,
"step": 65,
"wo_beta": 15.076056480407715
},
{
"dpo_loss": 0.6760156750679016,
"epoch": 0.19839395370807747,
"grad_norm": 15655.300421670066,
"learning_rate": 3.30188679245283e-07,
"logits": -1.4630695581436157,
"logps": -84.45524597167969,
"loss": 175.2515,
"objective": 174.8110809326172,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.40165895223617554,
"step": 70,
"wo_beta": 15.362497329711914
},
{
"dpo_loss": 0.6764008402824402,
"epoch": 0.21256495040151158,
"grad_norm": 18669.073461411434,
"learning_rate": 3.5377358490566033e-07,
"logits": -1.3853403329849243,
"logps": -84.13139343261719,
"loss": 176.1206,
"objective": 166.9723663330078,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.38751351833343506,
"step": 75,
"wo_beta": 15.094878196716309
},
{
"dpo_loss": 0.6740989089012146,
"epoch": 0.22673594709494568,
"grad_norm": 19007.32032313182,
"learning_rate": 3.773584905660377e-07,
"logits": -1.481835126876831,
"logps": -83.50402069091797,
"loss": 170.5001,
"objective": 178.72813415527344,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.42763322591781616,
"step": 80,
"wo_beta": 16.335308074951172
},
{
"dpo_loss": 0.6696261167526245,
"epoch": 0.2409069437883798,
"grad_norm": 15826.33154406391,
"learning_rate": 4.009433962264151e-07,
"logits": -1.4629038572311401,
"logps": -83.72789001464844,
"loss": 173.4106,
"objective": 184.8107452392578,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.41807958483695984,
"step": 85,
"wo_beta": 15.131178855895996
},
{
"dpo_loss": 0.6652686595916748,
"epoch": 0.25507794048181387,
"grad_norm": 16251.074571263394,
"learning_rate": 4.2452830188679244e-07,
"logits": -1.562590479850769,
"logps": -84.50687408447266,
"loss": 170.0469,
"objective": 178.0680694580078,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5541666746139526,
"regularize": 0.40833932161331177,
"step": 90,
"wo_beta": 15.540740966796875
},
{
"dpo_loss": 0.6606998443603516,
"epoch": 0.269248937175248,
"grad_norm": 17104.36417405731,
"learning_rate": 4.481132075471698e-07,
"logits": -1.4817092418670654,
"logps": -84.26000213623047,
"loss": 172.5874,
"objective": 179.69664001464844,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.512499988079071,
"regularize": 0.412168025970459,
"step": 95,
"wo_beta": 16.397871017456055
},
{
"dpo_loss": 0.6613048315048218,
"epoch": 0.2834199338686821,
"grad_norm": 16520.717626446203,
"learning_rate": 4.7169811320754717e-07,
"logits": -1.3626132011413574,
"logps": -83.24072265625,
"loss": 159.305,
"objective": 164.95997619628906,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.37101998925209045,
"step": 100,
"wo_beta": 15.463290214538574
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6859607100486755,
"eval_logits": -1.4621983766555786,
"eval_logps": -91.35309600830078,
"eval_loss": 182.15220642089844,
"eval_objective": 180.52191162109375,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.41025590896606445,
"eval_runtime": 368.4707,
"eval_samples_per_second": 15.714,
"eval_steps_per_second": 1.311,
"eval_wo_beta": 16.38188362121582,
"step": 100
},
{
"dpo_loss": 0.6732772588729858,
"epoch": 0.2975909305621162,
"grad_norm": 20087.528218167263,
"learning_rate": 4.952830188679246e-07,
"logits": -1.5680618286132812,
"logps": -86.15119934082031,
"loss": 163.5086,
"objective": 165.89260864257812,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.38661062717437744,
"step": 105,
"wo_beta": 14.799639701843262
},
{
"dpo_loss": 0.6610164046287537,
"epoch": 0.3117619272555503,
"grad_norm": 16022.782640878671,
"learning_rate": 4.999781286194085e-07,
"logits": -1.470965027809143,
"logps": -85.03189849853516,
"loss": 162.0049,
"objective": 163.46115112304688,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.38891494274139404,
"step": 110,
"wo_beta": 15.151239395141602
},
{
"dpo_loss": 0.6605216860771179,
"epoch": 0.32593292394898443,
"grad_norm": 17332.290004559494,
"learning_rate": 4.998892826944417e-07,
"logits": -1.4446924924850464,
"logps": -83.6183090209961,
"loss": 151.5536,
"objective": 154.1279296875,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 0.37217938899993896,
"step": 115,
"wo_beta": 16.30828094482422
},
{
"dpo_loss": 0.6518290042877197,
"epoch": 0.3401039206424185,
"grad_norm": 15444.050796566442,
"learning_rate": 4.997321195347154e-07,
"logits": -1.4417078495025635,
"logps": -83.37753295898438,
"loss": 159.0179,
"objective": 166.70437622070312,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5416666865348816,
"regularize": 0.36605674028396606,
"step": 120,
"wo_beta": 15.73963451385498
},
{
"dpo_loss": 0.655017614364624,
"epoch": 0.35427491733585265,
"grad_norm": 16163.2780124362,
"learning_rate": 4.995066821070679e-07,
"logits": -1.479369044303894,
"logps": -86.81266021728516,
"loss": 152.8657,
"objective": 143.82176208496094,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3394821584224701,
"step": 125,
"wo_beta": 16.155017852783203
},
{
"dpo_loss": 0.6490565538406372,
"epoch": 0.3684459140292867,
"grad_norm": 21066.462923637613,
"learning_rate": 4.99213032043841e-07,
"logits": -1.4559980630874634,
"logps": -84.79336547851562,
"loss": 159.2873,
"objective": 164.24014282226562,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.5791666507720947,
"regularize": 0.36314311623573303,
"step": 130,
"wo_beta": 16.631132125854492
},
{
"dpo_loss": 0.6596164703369141,
"epoch": 0.3826169107227208,
"grad_norm": 25406.331880366477,
"learning_rate": 4.988512496260301e-07,
"logits": -1.4966251850128174,
"logps": -85.71400451660156,
"loss": 162.2932,
"objective": 171.63385009765625,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.3589702546596527,
"step": 135,
"wo_beta": 14.497623443603516
},
{
"dpo_loss": 0.6495281457901001,
"epoch": 0.39678790741615494,
"grad_norm": 18698.7190702899,
"learning_rate": 4.984214337613357e-07,
"logits": -1.4679287672042847,
"logps": -85.32872772216797,
"loss": 145.3416,
"objective": 162.12405395507812,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.37301716208457947,
"step": 140,
"wo_beta": 14.43948745727539
},
{
"dpo_loss": 0.6436702013015747,
"epoch": 0.410958904109589,
"grad_norm": 17254.43790008681,
"learning_rate": 4.979237019571234e-07,
"logits": -1.4821332693099976,
"logps": -85.37076568603516,
"loss": 150.1815,
"objective": 147.49313354492188,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5958333611488342,
"regularize": 0.35633403062820435,
"step": 145,
"wo_beta": 14.047316551208496
},
{
"dpo_loss": 0.6469600796699524,
"epoch": 0.42512990080302315,
"grad_norm": 15151.951735697437,
"learning_rate": 4.973581902882989e-07,
"logits": -1.507290005683899,
"logps": -84.8727035522461,
"loss": 150.2379,
"objective": 155.10818481445312,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5625,
"regularize": 0.35707762837409973,
"step": 150,
"wo_beta": 17.33567237854004
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6805834770202637,
"eval_logits": -1.4576332569122314,
"eval_logps": -90.24694061279297,
"eval_loss": 180.0574951171875,
"eval_objective": 177.15777587890625,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5331262946128845,
"eval_regularize": 0.40097880363464355,
"eval_runtime": 369.0413,
"eval_samples_per_second": 15.689,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 16.610729217529297,
"step": 150
},
{
"dpo_loss": 0.645554780960083,
"epoch": 0.43930089749645723,
"grad_norm": 16201.615637764453,
"learning_rate": 4.967250533601059e-07,
"logits": -1.5539363622665405,
"logps": -83.4765396118164,
"loss": 146.8879,
"objective": 153.79454040527344,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 0.3449983596801758,
"step": 155,
"wo_beta": 15.733673095703125
},
{
"dpo_loss": 0.6385053396224976,
"epoch": 0.45347189418989137,
"grad_norm": 15493.137210302475,
"learning_rate": 4.960244642658585e-07,
"logits": -1.4331082105636597,
"logps": -84.22053527832031,
"loss": 152.5499,
"objective": 151.0172882080078,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5625,
"regularize": 0.34484514594078064,
"step": 160,
"wo_beta": 14.795166015625
},
{
"dpo_loss": 0.6392009854316711,
"epoch": 0.46764289088332545,
"grad_norm": 17120.620589579255,
"learning_rate": 4.952566145396196e-07,
"logits": -1.5298134088516235,
"logps": -85.76438903808594,
"loss": 144.1923,
"objective": 138.38796997070312,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.550000011920929,
"regularize": 0.32356587052345276,
"step": 165,
"wo_beta": 15.664267539978027
},
{
"dpo_loss": 0.6427010893821716,
"epoch": 0.4818138875767596,
"grad_norm": 16261.789384415662,
"learning_rate": 4.944217141038378e-07,
"logits": -1.5661406517028809,
"logps": -85.18688201904297,
"loss": 146.9829,
"objective": 135.14251708984375,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.32022854685783386,
"step": 170,
"wo_beta": 16.11832046508789
},
{
"dpo_loss": 0.6393853425979614,
"epoch": 0.49598488427019366,
"grad_norm": 17356.348275057364,
"learning_rate": 4.935199912119557e-07,
"logits": -1.4016886949539185,
"logps": -86.42796325683594,
"loss": 138.7378,
"objective": 130.81809997558594,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.550000011920929,
"regularize": 0.3065473735332489,
"step": 175,
"wo_beta": 18.01983070373535
},
{
"dpo_loss": 0.6324561238288879,
"epoch": 0.5101558809636277,
"grad_norm": 16878.815105316982,
"learning_rate": 4.925516923860082e-07,
"logits": -1.387779951095581,
"logps": -85.71736907958984,
"loss": 143.5962,
"objective": 155.15878295898438,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 0.34209319949150085,
"step": 180,
"wo_beta": 15.11577033996582
},
{
"dpo_loss": 0.6253044605255127,
"epoch": 0.5243268776570619,
"grad_norm": 18146.789309631047,
"learning_rate": 4.91517082349226e-07,
"logits": -1.4047019481658936,
"logps": -84.99198913574219,
"loss": 137.7836,
"objective": 136.2899627685547,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5291666388511658,
"regularize": 0.32172784209251404,
"step": 185,
"wo_beta": 14.294845581054688
},
{
"dpo_loss": 0.618080735206604,
"epoch": 0.538497874350496,
"grad_norm": 16414.68768978514,
"learning_rate": 4.904164439536626e-07,
"logits": -1.438673496246338,
"logps": -84.07417297363281,
"loss": 134.8449,
"objective": 134.37405395507812,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6083333492279053,
"regularize": 0.30226340889930725,
"step": 190,
"wo_beta": 17.166841506958008
},
{
"dpo_loss": 0.6151688694953918,
"epoch": 0.5526688710439301,
"grad_norm": 17479.85306665603,
"learning_rate": 4.892500781028655e-07,
"logits": -1.4530678987503052,
"logps": -84.28058624267578,
"loss": 143.4298,
"objective": 148.5701141357422,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5666666626930237,
"regularize": 0.32925572991371155,
"step": 195,
"wo_beta": 15.276497840881348
},
{
"dpo_loss": 0.6147686243057251,
"epoch": 0.5668398677373642,
"grad_norm": 17426.518013700184,
"learning_rate": 4.880183036696122e-07,
"logits": -1.461132287979126,
"logps": -84.65553283691406,
"loss": 135.925,
"objective": 143.74124145507812,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 0.3410433530807495,
"step": 200,
"wo_beta": 16.30559539794922
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6795096397399902,
"eval_logits": -1.445342779159546,
"eval_logps": -91.12489318847656,
"eval_loss": 179.97398376464844,
"eval_objective": 177.04129028320312,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5305383205413818,
"eval_regularize": 0.40064504742622375,
"eval_runtime": 371.8339,
"eval_samples_per_second": 15.571,
"eval_steps_per_second": 1.299,
"eval_wo_beta": 16.26874351501465,
"step": 200
},
{
"dpo_loss": 0.6258116960525513,
"epoch": 0.5810108644307983,
"grad_norm": 16576.414881735825,
"learning_rate": 4.867214574087337e-07,
"logits": -1.3335182666778564,
"logps": -85.45887756347656,
"loss": 131.7019,
"objective": 124.74952697753906,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5208333134651184,
"regularize": 0.30992764234542847,
"step": 205,
"wo_beta": 16.508281707763672
},
{
"dpo_loss": 0.6121302247047424,
"epoch": 0.5951818611242324,
"grad_norm": 19803.851938298896,
"learning_rate": 4.853598938650486e-07,
"logits": -1.3813374042510986,
"logps": -85.58969116210938,
"loss": 142.8564,
"objective": 147.2575225830078,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5458333492279053,
"regularize": 0.34093615412712097,
"step": 210,
"wo_beta": 15.581511497497559
},
{
"dpo_loss": 0.6104889512062073,
"epoch": 0.6093528578176665,
"grad_norm": 16635.20929245541,
"learning_rate": 4.839339852764349e-07,
"logits": -1.4747200012207031,
"logps": -84.081298828125,
"loss": 132.2213,
"objective": 143.8260040283203,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5249999761581421,
"regularize": 0.3345707654953003,
"step": 215,
"wo_beta": 16.507150650024414
},
{
"dpo_loss": 0.6056095957756042,
"epoch": 0.6235238545111006,
"grad_norm": 17892.305863583017,
"learning_rate": 4.824441214720628e-07,
"logits": -1.4685624837875366,
"logps": -84.3477783203125,
"loss": 136.6061,
"objective": 134.5189971923828,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.512499988079071,
"regularize": 0.3130161464214325,
"step": 220,
"wo_beta": 16.86256217956543
},
{
"dpo_loss": 0.6179187297821045,
"epoch": 0.6376948512045347,
"grad_norm": 15080.021415971216,
"learning_rate": 4.808907097658205e-07,
"logits": -1.5259219408035278,
"logps": -85.83920288085938,
"loss": 132.0525,
"objective": 135.1264190673828,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.310780793428421,
"step": 225,
"wo_beta": 15.415994644165039
},
{
"dpo_loss": 0.613567590713501,
"epoch": 0.6518658478979689,
"grad_norm": 15927.895183583649,
"learning_rate": 4.792741748449574e-07,
"logits": -1.4311482906341553,
"logps": -85.6706314086914,
"loss": 121.739,
"objective": 121.26396179199219,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5916666388511658,
"regularize": 0.29853278398513794,
"step": 230,
"wo_beta": 16.95390510559082
},
{
"dpo_loss": 0.6123810410499573,
"epoch": 0.6660368445914029,
"grad_norm": 18377.873397463725,
"learning_rate": 4.775949586539803e-07,
"logits": -1.3708454370498657,
"logps": -86.44990539550781,
"loss": 121.6467,
"objective": 109.16976165771484,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.2606847584247589,
"step": 235,
"wo_beta": 16.56985092163086
},
{
"dpo_loss": 0.6006901264190674,
"epoch": 0.680207841284837,
"grad_norm": 20176.56902743112,
"learning_rate": 4.758535202738287e-07,
"logits": -1.5398815870285034,
"logps": -86.04439544677734,
"loss": 135.6713,
"objective": 137.5254364013672,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5416666865348816,
"regularize": 0.29497501254081726,
"step": 240,
"wo_beta": 16.538175582885742
},
{
"dpo_loss": 0.6231993436813354,
"epoch": 0.6943788379782712,
"grad_norm": 16269.619431330953,
"learning_rate": 4.7405033579636755e-07,
"logits": -1.5562618970870972,
"logps": -86.00993347167969,
"loss": 122.2342,
"objective": 110.85368347167969,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.574999988079071,
"regularize": 0.2635067105293274,
"step": 245,
"wo_beta": 16.003578186035156
},
{
"dpo_loss": 0.608769953250885,
"epoch": 0.7085498346717053,
"grad_norm": 16883.673846830992,
"learning_rate": 4.721858981942284e-07,
"logits": -1.3197777271270752,
"logps": -84.4737319946289,
"loss": 130.7065,
"objective": 131.6678924560547,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5708333253860474,
"regularize": 0.29679057002067566,
"step": 250,
"wo_beta": 15.271538734436035
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.680046558380127,
"eval_logits": -1.5061429738998413,
"eval_logps": -91.61782836914062,
"eval_loss": 181.50924682617188,
"eval_objective": 178.27838134765625,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5305383205413818,
"eval_regularize": 0.404918372631073,
"eval_runtime": 368.2121,
"eval_samples_per_second": 15.725,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 16.640703201293945,
"step": 250
},
{
"dpo_loss": 0.6077960729598999,
"epoch": 0.7227208313651393,
"grad_norm": 16400.742832285967,
"learning_rate": 4.702607171860353e-07,
"logits": -1.582943320274353,
"logps": -83.8155746459961,
"loss": 128.7728,
"objective": 116.49504852294922,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.42916667461395264,
"ranking_simple": 0.4833333194255829,
"regularize": 0.28917694091796875,
"step": 255,
"wo_beta": 15.234207153320312
},
{
"dpo_loss": 0.6078327298164368,
"epoch": 0.7368918280585735,
"grad_norm": 19074.785460432806,
"learning_rate": 4.6827531909705327e-07,
"logits": -1.5513815879821777,
"logps": -85.73532104492188,
"loss": 120.8491,
"objective": 135.26841735839844,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.512499988079071,
"regularize": 0.3036416172981262,
"step": 260,
"wo_beta": 15.789148330688477
},
{
"dpo_loss": 0.6069940328598022,
"epoch": 0.7510628247520076,
"grad_norm": 14839.46399025929,
"learning_rate": 4.662302467152955e-07,
"logits": -1.4743403196334839,
"logps": -85.0496597290039,
"loss": 115.1093,
"objective": 120.47551727294922,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6208333373069763,
"regularize": 0.29132476449012756,
"step": 265,
"wo_beta": 16.472087860107422
},
{
"dpo_loss": 0.6180242300033569,
"epoch": 0.7652338214454416,
"grad_norm": 17448.69545980003,
"learning_rate": 4.6412605914313143e-07,
"logits": -1.5716725587844849,
"logps": -85.8649673461914,
"loss": 116.6119,
"objective": 116.45634460449219,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4791666567325592,
"regularize": 0.25854194164276123,
"step": 270,
"wo_beta": 14.43771743774414
},
{
"dpo_loss": 0.6129618287086487,
"epoch": 0.7794048181388757,
"grad_norm": 16560.36826913483,
"learning_rate": 4.619633316444329e-07,
"logits": -1.4034606218338013,
"logps": -84.92838287353516,
"loss": 111.1039,
"objective": 110.11347961425781,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5208333134651184,
"regularize": 0.2595362663269043,
"step": 275,
"wo_beta": 15.505229949951172
},
{
"dpo_loss": 0.6037231087684631,
"epoch": 0.7935758148323099,
"grad_norm": 18592.99967397487,
"learning_rate": 4.597426554873036e-07,
"logits": -1.5048367977142334,
"logps": -85.48095703125,
"loss": 124.5191,
"objective": 130.4561767578125,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5583333373069763,
"regularize": 0.27942749857902527,
"step": 280,
"wo_beta": 15.601365089416504
},
{
"dpo_loss": 0.6074939370155334,
"epoch": 0.807746811525744,
"grad_norm": 16296.64397951243,
"learning_rate": 4.574646377824315e-07,
"logits": -1.4947975873947144,
"logps": -87.23796844482422,
"loss": 116.6867,
"objective": 114.05181121826172,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5083333253860474,
"regularize": 0.2666790187358856,
"step": 285,
"wo_beta": 17.003517150878906
},
{
"dpo_loss": 0.5908948183059692,
"epoch": 0.821917808219178,
"grad_norm": 16844.100126093494,
"learning_rate": 4.551299013171111e-07,
"logits": -1.500679850578308,
"logps": -86.47892761230469,
"loss": 113.1195,
"objective": 110.57820892333984,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 0.25748953223228455,
"step": 290,
"wo_beta": 16.26107406616211
},
{
"dpo_loss": 0.602837860584259,
"epoch": 0.8360888049126122,
"grad_norm": 15918.83667114978,
"learning_rate": 4.5273908438498e-07,
"logits": -1.488081693649292,
"logps": -86.3355484008789,
"loss": 112.7178,
"objective": 110.10317993164062,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 0.2576940059661865,
"step": 295,
"wo_beta": 16.481365203857422
},
{
"dpo_loss": 0.6130139231681824,
"epoch": 0.8502598016060463,
"grad_norm": 16024.42965961574,
"learning_rate": 4.502928406115152e-07,
"logits": -1.4704513549804688,
"logps": -84.77509307861328,
"loss": 109.74,
"objective": 110.3909683227539,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5791666507720947,
"regularize": 0.25806304812431335,
"step": 300,
"wo_beta": 14.735386848449707
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6815229654312134,
"eval_logits": -1.476037859916687,
"eval_logps": -92.42357635498047,
"eval_loss": 180.4923553466797,
"eval_objective": 178.13650512695312,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5305383205413818,
"eval_regularize": 0.40466198325157166,
"eval_runtime": 369.1363,
"eval_samples_per_second": 15.685,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 16.49808120727539,
"step": 300
},
{
"dpo_loss": 0.6006699204444885,
"epoch": 0.8644307982994804,
"grad_norm": 16503.480891595664,
"learning_rate": 4.4779183877533877e-07,
"logits": -1.4632763862609863,
"logps": -85.11184692382812,
"loss": 110.0493,
"objective": 114.68462371826172,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.25510621070861816,
"step": 305,
"wo_beta": 15.895527839660645
},
{
"dpo_loss": 0.6042004227638245,
"epoch": 0.8786017949929145,
"grad_norm": 15793.814439937723,
"learning_rate": 4.4523676262538045e-07,
"logits": -1.47891104221344,
"logps": -84.36697387695312,
"loss": 112.7694,
"objective": 122.6783676147461,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5291666388511658,
"regularize": 0.2812657058238983,
"step": 310,
"wo_beta": 15.347722053527832
},
{
"dpo_loss": 0.591303825378418,
"epoch": 0.8927727916863486,
"grad_norm": 17495.093435377945,
"learning_rate": 4.426283106939473e-07,
"logits": -1.4973819255828857,
"logps": -84.14984893798828,
"loss": 105.8584,
"objective": 102.11656188964844,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.512499988079071,
"regularize": 0.23703667521476746,
"step": 315,
"wo_beta": 15.412622451782227
},
{
"dpo_loss": 0.5901548862457275,
"epoch": 0.9069437883797827,
"grad_norm": 16776.49496984533,
"learning_rate": 4.3996719610575215e-07,
"logits": -1.4549332857131958,
"logps": -86.48863983154297,
"loss": 101.9659,
"objective": 103.75129699707031,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6000000238418579,
"regularize": 0.2400914579629898,
"step": 320,
"wo_beta": 15.67490005493164
},
{
"dpo_loss": 0.5979679226875305,
"epoch": 0.9211147850732169,
"grad_norm": 16212.553406478031,
"learning_rate": 4.372541463829523e-07,
"logits": -1.543658971786499,
"logps": -87.07477569580078,
"loss": 110.2578,
"objective": 94.58499908447266,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5583333373069763,
"regularize": 0.23497775197029114,
"step": 325,
"wo_beta": 15.544549942016602
},
{
"dpo_loss": 0.5859458446502686,
"epoch": 0.9352857817666509,
"grad_norm": 17177.10951815794,
"learning_rate": 4.344899032462524e-07,
"logits": -1.3802608251571655,
"logps": -86.10081481933594,
"loss": 113.3699,
"objective": 119.8874740600586,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.574999988079071,
"regularize": 0.2620205283164978,
"step": 330,
"wo_beta": 16.999446868896484
},
{
"dpo_loss": 0.5934227108955383,
"epoch": 0.949456778460085,
"grad_norm": 17661.288172623317,
"learning_rate": 4.316752224121252e-07,
"logits": -1.4096896648406982,
"logps": -85.97354125976562,
"loss": 112.801,
"objective": 102.87708282470703,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.226911723613739,
"step": 335,
"wo_beta": 15.912822723388672
},
{
"dpo_loss": 0.5984110236167908,
"epoch": 0.9636277751535192,
"grad_norm": 16330.575056246405,
"learning_rate": 4.2881087338620634e-07,
"logits": -1.4624823331832886,
"logps": -85.42594909667969,
"loss": 112.5159,
"objective": 105.5082015991211,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5916666388511658,
"regularize": 0.24777108430862427,
"step": 340,
"wo_beta": 16.43703269958496
},
{
"dpo_loss": 0.5881261825561523,
"epoch": 0.9777987718469532,
"grad_norm": 16460.598983396474,
"learning_rate": 4.258976392529192e-07,
"logits": -1.5221667289733887,
"logps": -84.57250213623047,
"loss": 105.3708,
"objective": 96.2402114868164,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.625,
"regularize": 0.22673317790031433,
"step": 345,
"wo_beta": 15.497628211975098
},
{
"dpo_loss": 0.582562267780304,
"epoch": 0.9919697685403873,
"grad_norm": 15979.907994389112,
"learning_rate": 4.2293631646138735e-07,
"logits": -1.4198105335235596,
"logps": -87.27174377441406,
"loss": 104.2663,
"objective": 106.56012725830078,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.2592408061027527,
"step": 350,
"wo_beta": 15.726160049438477
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6808353066444397,
"eval_logits": -1.5066107511520386,
"eval_logps": -92.80049133300781,
"eval_loss": 182.25906372070312,
"eval_objective": 178.8644256591797,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5289855003356934,
"eval_regularize": 0.4057510197162628,
"eval_runtime": 368.5888,
"eval_samples_per_second": 15.709,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 16.569448471069336,
"step": 350
},
{
"dpo_loss": 0.5861319899559021,
"epoch": 1.0061407652338215,
"grad_norm": 16794.52051588047,
"learning_rate": 4.1992771460769325e-07,
"logits": -1.6238858699798584,
"logps": -84.81636810302734,
"loss": 101.4044,
"objective": 99.22207641601562,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.574999988079071,
"regularize": 0.2406620979309082,
"step": 355,
"wo_beta": 16.778457641601562
},
{
"dpo_loss": 0.5623802542686462,
"epoch": 1.0203117619272555,
"grad_norm": 17512.309592114732,
"learning_rate": 4.168726562135431e-07,
"logits": -1.4817250967025757,
"logps": -85.78034973144531,
"loss": 89.2678,
"objective": 87.2276840209961,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5708333253860474,
"regularize": 0.20162057876586914,
"step": 360,
"wo_beta": 16.540082931518555
},
{
"dpo_loss": 0.5711230635643005,
"epoch": 1.0344827586206897,
"grad_norm": 17722.39127835427,
"learning_rate": 4.1377197650139734e-07,
"logits": -1.5016947984695435,
"logps": -86.203369140625,
"loss": 94.5137,
"objective": 95.95963287353516,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5625,
"regularize": 0.21429090201854706,
"step": 365,
"wo_beta": 15.065950393676758
},
{
"dpo_loss": 0.5723836421966553,
"epoch": 1.0486537553141237,
"grad_norm": 16746.74468565218,
"learning_rate": 4.106265231661291e-07,
"logits": -1.4276858568191528,
"logps": -84.18301391601562,
"loss": 87.9015,
"objective": 87.47245788574219,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5666666626930237,
"regularize": 0.20792974531650543,
"step": 370,
"wo_beta": 15.806612968444824
},
{
"dpo_loss": 0.575459897518158,
"epoch": 1.0628247520075578,
"grad_norm": 16711.49471758267,
"learning_rate": 4.0743715614327314e-07,
"logits": -1.4709128141403198,
"logps": -84.51998901367188,
"loss": 81.3317,
"objective": 80.16383361816406,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5666666626930237,
"regularize": 0.19415231049060822,
"step": 375,
"wo_beta": 16.286664962768555
},
{
"dpo_loss": 0.5680096745491028,
"epoch": 1.076995748700992,
"grad_norm": 15938.178621987885,
"learning_rate": 4.042047473739277e-07,
"logits": -1.4488080739974976,
"logps": -86.38304138183594,
"loss": 88.4379,
"objective": 90.77100372314453,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6333333253860474,
"regularize": 0.21614128351211548,
"step": 380,
"wo_beta": 15.816045761108398
},
{
"dpo_loss": 0.5828992128372192,
"epoch": 1.091166745394426,
"grad_norm": 15573.908569824505,
"learning_rate": 4.009301805663752e-07,
"logits": -1.4298585653305054,
"logps": -85.34860229492188,
"loss": 90.2727,
"objective": 100.38928985595703,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.21903865039348602,
"step": 385,
"wo_beta": 15.969101905822754
},
{
"dpo_loss": 0.5651105046272278,
"epoch": 1.10533774208786,
"grad_norm": 15903.36624651428,
"learning_rate": 3.9761435095448424e-07,
"logits": -1.386973261833191,
"logps": -86.8327865600586,
"loss": 85.9698,
"objective": 81.47636413574219,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.612500011920929,
"regularize": 0.1953142136335373,
"step": 390,
"wo_beta": 16.701154708862305
},
{
"dpo_loss": 0.5567125082015991,
"epoch": 1.1195087387812943,
"grad_norm": 17073.21610205935,
"learning_rate": 3.942581650529625e-07,
"logits": -1.4661533832550049,
"logps": -86.22716522216797,
"loss": 78.9786,
"objective": 86.99171447753906,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5458333492279053,
"regularize": 0.19182702898979187,
"step": 395,
"wo_beta": 15.073732376098633
},
{
"dpo_loss": 0.5746569037437439,
"epoch": 1.1336797354747283,
"grad_norm": 17100.977239850836,
"learning_rate": 3.908625404095242e-07,
"logits": -1.542074203491211,
"logps": -84.60574340820312,
"loss": 91.3585,
"objective": 91.53578186035156,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2116149365901947,
"step": 400,
"wo_beta": 15.816818237304688
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6799505352973938,
"eval_logits": -1.478875756263733,
"eval_logps": -92.38536071777344,
"eval_loss": 180.029541015625,
"eval_objective": 177.71481323242188,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.402423620223999,
"eval_runtime": 374.7281,
"eval_samples_per_second": 15.451,
"eval_steps_per_second": 1.289,
"eval_wo_beta": 16.585174560546875,
"step": 400
},
{
"dpo_loss": 0.5667446255683899,
"epoch": 1.1478507321681626,
"grad_norm": 17235.250314074532,
"learning_rate": 3.874284053540415e-07,
"logits": -1.506400227546692,
"logps": -86.09246063232422,
"loss": 87.7881,
"objective": 99.79354095458984,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5375000238418579,
"regularize": 0.21635954082012177,
"step": 405,
"wo_beta": 17.256864547729492
},
{
"dpo_loss": 0.5707606673240662,
"epoch": 1.1620217288615966,
"grad_norm": 16725.889726383095,
"learning_rate": 3.839566987447491e-07,
"logits": -1.5043673515319824,
"logps": -85.6620864868164,
"loss": 88.1003,
"objective": 87.76438903808594,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.19931498169898987,
"step": 410,
"wo_beta": 16.657556533813477
},
{
"dpo_loss": 0.5724092721939087,
"epoch": 1.1761927255550306,
"grad_norm": 14812.855448630145,
"learning_rate": 3.804483697115693e-07,
"logits": -1.420817255973816,
"logps": -85.20552062988281,
"loss": 80.2264,
"objective": 89.5212173461914,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5874999761581421,
"regularize": 0.21374522149562836,
"step": 415,
"wo_beta": 16.42493438720703
},
{
"dpo_loss": 0.5684979557991028,
"epoch": 1.1903637222484649,
"grad_norm": 16758.534351302478,
"learning_rate": 3.769043773966292e-07,
"logits": -1.3999756574630737,
"logps": -86.63607788085938,
"loss": 86.6386,
"objective": 74.90924835205078,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5708333253860474,
"regularize": 0.17667636275291443,
"step": 420,
"wo_beta": 16.390174865722656
},
{
"dpo_loss": 0.5639453530311584,
"epoch": 1.204534718941899,
"grad_norm": 16337.276317485204,
"learning_rate": 3.733256906920412e-07,
"logits": -1.4687834978103638,
"logps": -85.14289093017578,
"loss": 88.4628,
"objective": 91.70726013183594,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.574999988079071,
"regularize": 0.21049639582633972,
"step": 425,
"wo_beta": 15.31130599975586
},
{
"dpo_loss": 0.5734551548957825,
"epoch": 1.2187057156353331,
"grad_norm": 15248.100441462713,
"learning_rate": 3.6971328797501735e-07,
"logits": -1.448046088218689,
"logps": -86.40514373779297,
"loss": 77.2816,
"objective": 79.60990142822266,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5458333492279053,
"regularize": 0.17995795607566833,
"step": 430,
"wo_beta": 16.629831314086914
},
{
"dpo_loss": 0.5668199062347412,
"epoch": 1.2328767123287672,
"grad_norm": 16306.828228688702,
"learning_rate": 3.660681568403909e-07,
"logits": -1.4072421789169312,
"logps": -85.66299438476562,
"loss": 87.0271,
"objective": 85.58671569824219,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.6208333373069763,
"regularize": 0.19756914675235748,
"step": 435,
"wo_beta": 17.663021087646484
},
{
"dpo_loss": 0.5658931732177734,
"epoch": 1.2470477090222012,
"grad_norm": 20345.22767033362,
"learning_rate": 3.623912938306176e-07,
"logits": -1.4035090208053589,
"logps": -85.33922576904297,
"loss": 79.1586,
"objective": 74.25531768798828,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5791666507720947,
"regularize": 0.17811766266822815,
"step": 440,
"wo_beta": 15.194890975952148
},
{
"dpo_loss": 0.5541914105415344,
"epoch": 1.2612187057156352,
"grad_norm": 17279.48326071237,
"learning_rate": 3.5868370416333116e-07,
"logits": -1.4238730669021606,
"logps": -87.22127532958984,
"loss": 85.5554,
"objective": 88.88020324707031,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6166666746139526,
"regularize": 0.21330490708351135,
"step": 445,
"wo_beta": 17.520837783813477
},
{
"dpo_loss": 0.5635745525360107,
"epoch": 1.2753897024090695,
"grad_norm": 15953.8185394536,
"learning_rate": 3.549464014565264e-07,
"logits": -1.5866882801055908,
"logps": -86.32205963134766,
"loss": 77.8925,
"objective": 74.42088317871094,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5666666626930237,
"regularize": 0.1866002231836319,
"step": 450,
"wo_beta": 14.815221786499023
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.679172158241272,
"eval_logits": -1.4745829105377197,
"eval_logps": -92.70618438720703,
"eval_loss": 179.24407958984375,
"eval_objective": 175.84751892089844,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.39890119433403015,
"eval_runtime": 369.0794,
"eval_samples_per_second": 15.688,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 16.526927947998047,
"step": 450
},
{
"dpo_loss": 0.5642288327217102,
"epoch": 1.2895606991025035,
"grad_norm": 15540.821203538826,
"learning_rate": 3.511804074514468e-07,
"logits": -1.3849934339523315,
"logps": -86.52748107910156,
"loss": 78.8058,
"objective": 79.82011413574219,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.512499988079071,
"regularize": 0.18714289367198944,
"step": 455,
"wo_beta": 15.67411994934082
},
{
"dpo_loss": 0.5722388625144958,
"epoch": 1.3037316957959377,
"grad_norm": 17894.656202069662,
"learning_rate": 3.4738675173325007e-07,
"logits": -1.5175042152404785,
"logps": -86.22012329101562,
"loss": 80.887,
"objective": 79.5283432006836,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5291666388511658,
"regularize": 0.18461202085018158,
"step": 460,
"wo_beta": 16.33133316040039
},
{
"dpo_loss": 0.5584803819656372,
"epoch": 1.3179026924893718,
"grad_norm": 15804.388771620323,
"learning_rate": 3.4356647144953003e-07,
"logits": -1.4979623556137085,
"logps": -84.8365478515625,
"loss": 78.4995,
"objective": 80.0457763671875,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5291666388511658,
"regularize": 0.19293205440044403,
"step": 465,
"wo_beta": 14.855480194091797
},
{
"dpo_loss": 0.5635024309158325,
"epoch": 1.3320736891828058,
"grad_norm": 15763.736551798547,
"learning_rate": 3.3972061102677124e-07,
"logits": -1.5794017314910889,
"logps": -83.71866607666016,
"loss": 80.6678,
"objective": 83.6802749633789,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5916666388511658,
"regularize": 0.18187165260314941,
"step": 470,
"wo_beta": 14.902677536010742
},
{
"dpo_loss": 0.5684855580329895,
"epoch": 1.34624468587624,
"grad_norm": 16747.760130279752,
"learning_rate": 3.3585022188481246e-07,
"logits": -1.45767343044281,
"logps": -85.98019409179688,
"loss": 70.8929,
"objective": 75.84042358398438,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.19827060401439667,
"step": 475,
"wo_beta": 16.622520446777344
},
{
"dpo_loss": 0.5594444274902344,
"epoch": 1.360415682569674,
"grad_norm": 15233.336787247383,
"learning_rate": 3.3195636214939935e-07,
"logits": -1.5256932973861694,
"logps": -86.3495101928711,
"loss": 72.525,
"objective": 74.22408294677734,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5416666865348816,
"regularize": 0.1702665388584137,
"step": 480,
"wo_beta": 17.96377182006836
},
{
"dpo_loss": 0.5718420743942261,
"epoch": 1.3745866792631083,
"grad_norm": 16631.30941131418,
"learning_rate": 3.2804009636290396e-07,
"logits": -1.5204293727874756,
"logps": -84.49634552001953,
"loss": 76.2635,
"objective": 74.72454071044922,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5375000238418579,
"regularize": 0.18596205115318298,
"step": 485,
"wo_beta": 16.379438400268555
},
{
"dpo_loss": 0.5699793696403503,
"epoch": 1.3887576759565423,
"grad_norm": 18558.655175644297,
"learning_rate": 3.241024951932884e-07,
"logits": -1.4783555269241333,
"logps": -86.9972915649414,
"loss": 75.5371,
"objective": 76.1517105102539,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5666666626930237,
"regularize": 0.18451951444149017,
"step": 490,
"wo_beta": 15.841702461242676
},
{
"dpo_loss": 0.5529130101203918,
"epoch": 1.4029286726499763,
"grad_norm": 20634.943800946956,
"learning_rate": 3.201446351413958e-07,
"logits": -1.4992899894714355,
"logps": -87.463623046875,
"loss": 79.5849,
"objective": 71.89769744873047,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5416666865348816,
"regularize": 0.17148242890834808,
"step": 495,
"wo_beta": 15.643444061279297
},
{
"dpo_loss": 0.5567610859870911,
"epoch": 1.4170996693434104,
"grad_norm": 17182.106354079515,
"learning_rate": 3.161675982466454e-07,
"logits": -1.4364333152770996,
"logps": -86.2386703491211,
"loss": 73.5844,
"objective": 70.71806335449219,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5625,
"regularize": 0.16988083720207214,
"step": 500,
"wo_beta": 15.247078895568848
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6786399483680725,
"eval_logits": -1.4849432706832886,
"eval_logps": -93.26954650878906,
"eval_loss": 180.36428833007812,
"eval_objective": 176.233154296875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5305383205413818,
"eval_regularize": 0.3994362950325012,
"eval_runtime": 373.0338,
"eval_samples_per_second": 15.521,
"eval_steps_per_second": 1.295,
"eval_wo_beta": 16.500259399414062,
"step": 500
},
{
"dpo_loss": 0.5469445586204529,
"epoch": 1.4312706660368446,
"grad_norm": 16377.98808062248,
"learning_rate": 3.121724717912138e-07,
"logits": -1.507896900177002,
"logps": -86.27371978759766,
"loss": 81.0237,
"objective": 78.97586822509766,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.17829085886478424,
"step": 505,
"wo_beta": 16.57958984375
},
{
"dpo_loss": 0.5666388273239136,
"epoch": 1.4454416627302786,
"grad_norm": 16987.7779441374,
"learning_rate": 3.081603480027826e-07,
"logits": -1.5229469537734985,
"logps": -86.77935028076172,
"loss": 72.4223,
"objective": 68.7424087524414,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.16074414551258087,
"step": 510,
"wo_beta": 15.368119239807129
},
{
"dpo_loss": 0.5596444606781006,
"epoch": 1.4596126594237129,
"grad_norm": 18192.200248438854,
"learning_rate": 3.0413232375593494e-07,
"logits": -1.581657886505127,
"logps": -87.4643783569336,
"loss": 75.4207,
"objective": 79.91267395019531,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5249999761581421,
"regularize": 0.19809895753860474,
"step": 515,
"wo_beta": 16.213024139404297
},
{
"dpo_loss": 0.5609327554702759,
"epoch": 1.473783656117147,
"grad_norm": 17307.001345557863,
"learning_rate": 3.000895002722803e-07,
"logits": -1.472069501876831,
"logps": -84.53739929199219,
"loss": 73.445,
"objective": 75.21688079833984,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5333333611488342,
"regularize": 0.17492397129535675,
"step": 520,
"wo_beta": 15.897907257080078
},
{
"dpo_loss": 0.5437408685684204,
"epoch": 1.487954652810581,
"grad_norm": 16071.775164367858,
"learning_rate": 2.960329828193918e-07,
"logits": -1.3647209405899048,
"logps": -83.83393859863281,
"loss": 80.3729,
"objective": 83.74950408935547,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.1921227127313614,
"step": 525,
"wo_beta": 15.854673385620117
},
{
"dpo_loss": 0.5658655762672424,
"epoch": 1.5021256495040152,
"grad_norm": 16145.860622049926,
"learning_rate": 2.919638804086369e-07,
"logits": -1.5306588411331177,
"logps": -86.52985382080078,
"loss": 72.6051,
"objective": 74.99629211425781,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5583333373069763,
"regularize": 0.16594384610652924,
"step": 530,
"wo_beta": 15.360599517822266
},
{
"dpo_loss": 0.5535832643508911,
"epoch": 1.5162966461974492,
"grad_norm": 16070.421212133448,
"learning_rate": 2.878833054919851e-07,
"logits": -1.5020090341567993,
"logps": -85.10139465332031,
"loss": 76.8492,
"objective": 71.94368743896484,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5708333253860474,
"regularize": 0.17868037521839142,
"step": 535,
"wo_beta": 15.670897483825684
},
{
"dpo_loss": 0.5716097354888916,
"epoch": 1.5304676428908834,
"grad_norm": 17086.3623131816,
"learning_rate": 2.8379237365787425e-07,
"logits": -1.4154467582702637,
"logps": -85.44342803955078,
"loss": 72.6152,
"objective": 71.88426208496094,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5833333134651184,
"regularize": 0.16814741492271423,
"step": 540,
"wo_beta": 15.268050193786621
},
{
"dpo_loss": 0.5665360689163208,
"epoch": 1.5446386395843175,
"grad_norm": 15014.903555178817,
"learning_rate": 2.7969220332622e-07,
"logits": -1.5325461626052856,
"logps": -86.13166046142578,
"loss": 71.8279,
"objective": 66.47465515136719,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6333333253860474,
"regularize": 0.15991279482841492,
"step": 545,
"wo_beta": 15.164175033569336
},
{
"dpo_loss": 0.5597227811813354,
"epoch": 1.5588096362777515,
"grad_norm": 15652.670268373007,
"learning_rate": 2.7558391544265126e-07,
"logits": -1.5225752592086792,
"logps": -86.28731536865234,
"loss": 74.752,
"objective": 75.39783477783203,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5249999761581421,
"regularize": 0.1811082661151886,
"step": 550,
"wo_beta": 15.459908485412598
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.6795263886451721,
"eval_logits": -1.4832454919815063,
"eval_logps": -92.88924407958984,
"eval_loss": 181.36459350585938,
"eval_objective": 177.22666931152344,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.40197721123695374,
"eval_runtime": 369.092,
"eval_samples_per_second": 15.687,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 16.55462646484375,
"step": 550
},
{
"dpo_loss": 0.5626943707466125,
"epoch": 1.5729806329711855,
"grad_norm": 18195.881077955793,
"learning_rate": 2.7146863317205425e-07,
"logits": -1.3379462957382202,
"logps": -86.09709930419922,
"loss": 77.0693,
"objective": 73.5696792602539,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.1626834124326706,
"step": 555,
"wo_beta": 14.696831703186035
},
{
"dpo_loss": 0.552921712398529,
"epoch": 1.5871516296646198,
"grad_norm": 17118.412791156054,
"learning_rate": 2.67347481591511e-07,
"logits": -1.4245628118515015,
"logps": -84.61250305175781,
"loss": 71.0964,
"objective": 72.3261489868164,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1769203245639801,
"step": 560,
"wo_beta": 16.116395950317383
},
{
"dpo_loss": 0.5479990243911743,
"epoch": 1.601322626358054,
"grad_norm": 17861.734640053382,
"learning_rate": 2.6322158738271414e-07,
"logits": -1.3948200941085815,
"logps": -85.63233184814453,
"loss": 69.8501,
"objective": 66.04268646240234,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1456415057182312,
"step": 565,
"wo_beta": 15.080788612365723
},
{
"dpo_loss": 0.5674367547035217,
"epoch": 1.615493623051488,
"grad_norm": 17710.317360917932,
"learning_rate": 2.590920785239436e-07,
"logits": -1.5569151639938354,
"logps": -86.57015228271484,
"loss": 67.9067,
"objective": 68.46839904785156,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5708333253860474,
"regularize": 0.16210322082042694,
"step": 570,
"wo_beta": 15.696702003479004
},
{
"dpo_loss": 0.5638484358787537,
"epoch": 1.629664619744922,
"grad_norm": 18073.751646088458,
"learning_rate": 2.549600839816884e-07,
"logits": -1.5135074853897095,
"logps": -84.736328125,
"loss": 72.0534,
"objective": 84.00183868408203,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1912422776222229,
"step": 575,
"wo_beta": 16.294113159179688
},
{
"dpo_loss": 0.5543667078018188,
"epoch": 1.643835616438356,
"grad_norm": 16118.144699864974,
"learning_rate": 2.508267334019988e-07,
"logits": -1.5415210723876953,
"logps": -84.3241195678711,
"loss": 67.4175,
"objective": 66.42247009277344,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1573016196489334,
"step": 580,
"wo_beta": 14.898622512817383
},
{
"dpo_loss": 0.5663701295852661,
"epoch": 1.6580066131317903,
"grad_norm": 17666.121211791866,
"learning_rate": 2.4669315680165195e-07,
"logits": -1.3956176042556763,
"logps": -84.12041473388672,
"loss": 66.3727,
"objective": 61.66792678833008,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5083333253860474,
"regularize": 0.15171830356121063,
"step": 585,
"wo_beta": 15.958919525146484
},
{
"dpo_loss": 0.558569610118866,
"epoch": 1.6721776098252243,
"grad_norm": 16850.29970069498,
"learning_rate": 2.425604842592169e-07,
"logits": -1.4625413417816162,
"logps": -84.04219055175781,
"loss": 74.8738,
"objective": 81.01467895507812,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5874999761581421,
"regularize": 0.18167801201343536,
"step": 590,
"wo_beta": 13.437705039978027
},
{
"dpo_loss": 0.5554817914962769,
"epoch": 1.6863486065186586,
"grad_norm": 17084.24865418485,
"learning_rate": 2.384298456061022e-07,
"logits": -1.4085568189620972,
"logps": -84.96867370605469,
"loss": 69.8306,
"objective": 80.21109008789062,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5833333134651184,
"regularize": 0.18683354556560516,
"step": 595,
"wo_beta": 17.37420654296875
},
{
"dpo_loss": 0.5489537119865417,
"epoch": 1.7005196032120926,
"grad_norm": 15218.152775914608,
"learning_rate": 2.3430237011767164e-07,
"logits": -1.4603925943374634,
"logps": -85.71869659423828,
"loss": 66.606,
"objective": 76.13477325439453,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6166666746139526,
"regularize": 0.17256483435630798,
"step": 600,
"wo_beta": 16.145771026611328
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6788755655288696,
"eval_logits": -1.4675469398498535,
"eval_logps": -91.61579895019531,
"eval_loss": 179.49525451660156,
"eval_objective": 176.2792510986328,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.3999297320842743,
"eval_runtime": 368.9049,
"eval_samples_per_second": 15.695,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 16.6182804107666,
"step": 600
},
{
"dpo_loss": 0.564985454082489,
"epoch": 1.7146905999055266,
"grad_norm": 16714.450551200658,
"learning_rate": 2.30179186204511e-07,
"logits": -1.4053993225097656,
"logps": -86.0546646118164,
"loss": 69.0228,
"objective": 73.44601440429688,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5666666626930237,
"regularize": 0.17200763523578644,
"step": 605,
"wo_beta": 18.306556701660156
},
{
"dpo_loss": 0.5740829706192017,
"epoch": 1.7288615965989607,
"grad_norm": 18689.625067588473,
"learning_rate": 2.2606142110393245e-07,
"logits": -1.4901000261306763,
"logps": -84.50035858154297,
"loss": 69.9038,
"objective": 70.67005920410156,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.625,
"regularize": 0.1648392379283905,
"step": 610,
"wo_beta": 15.994599342346191
},
{
"dpo_loss": 0.5698901414871216,
"epoch": 1.743032593292395,
"grad_norm": 17284.824092388248,
"learning_rate": 2.2195020057179894e-07,
"logits": -1.4990768432617188,
"logps": -84.69489288330078,
"loss": 69.1858,
"objective": 62.69770431518555,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5791666507720947,
"regularize": 0.1527046412229538,
"step": 615,
"wo_beta": 15.638397216796875
},
{
"dpo_loss": 0.553841233253479,
"epoch": 1.7572035899858292,
"grad_norm": 16362.729415329737,
"learning_rate": 2.1784664857475352e-07,
"logits": -1.54779851436615,
"logps": -84.9485092163086,
"loss": 69.826,
"objective": 70.27579498291016,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1771712750196457,
"step": 620,
"wo_beta": 16.527475357055664
},
{
"dpo_loss": 0.5640650987625122,
"epoch": 1.7713745866792632,
"grad_norm": 16519.00259049995,
"learning_rate": 2.1375188698293854e-07,
"logits": -1.456007480621338,
"logps": -83.91544342041016,
"loss": 67.7564,
"objective": 73.47472381591797,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5791666507720947,
"regularize": 0.16373471915721893,
"step": 625,
"wo_beta": 15.541656494140625
},
{
"dpo_loss": 0.5638567805290222,
"epoch": 1.7855455833726972,
"grad_norm": 16003.013277681921,
"learning_rate": 2.0966703526328726e-07,
"logits": -1.4914802312850952,
"logps": -85.642822265625,
"loss": 68.6768,
"objective": 65.9126205444336,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5541666746139526,
"regularize": 0.15695922076702118,
"step": 630,
"wo_beta": 16.262632369995117
},
{
"dpo_loss": 0.5648781657218933,
"epoch": 1.7997165800661312,
"grad_norm": 17168.862791414114,
"learning_rate": 2.0559321017347282e-07,
"logits": -1.5868287086486816,
"logps": -84.44722747802734,
"loss": 63.1038,
"objective": 59.76757049560547,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.15224316716194153,
"step": 635,
"wo_beta": 17.01032829284668
},
{
"dpo_loss": 0.5457783341407776,
"epoch": 1.8138875767595655,
"grad_norm": 17993.04524523219,
"learning_rate": 2.0153152545659796e-07,
"logits": -1.5063692331314087,
"logps": -85.88375091552734,
"loss": 66.3807,
"objective": 64.99014282226562,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.550000011920929,
"regularize": 0.15310163795948029,
"step": 640,
"wo_beta": 17.52164649963379
},
{
"dpo_loss": 0.5509151220321655,
"epoch": 1.8280585734529995,
"grad_norm": 19625.243857455473,
"learning_rate": 1.9748309153670856e-07,
"logits": -1.5516611337661743,
"logps": -86.286376953125,
"loss": 65.9614,
"objective": 64.1622543334961,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6333333253860474,
"regularize": 0.1483081430196762,
"step": 645,
"wo_beta": 16.739274978637695
},
{
"dpo_loss": 0.5534842014312744,
"epoch": 1.8422295701464337,
"grad_norm": 17668.158898193855,
"learning_rate": 1.9344901521521498e-07,
"logits": -1.6079561710357666,
"logps": -86.36868286132812,
"loss": 65.4503,
"objective": 62.571048736572266,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.14387734234333038,
"step": 650,
"wo_beta": 15.527718544006348
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.6790141463279724,
"eval_logits": -1.504582166671753,
"eval_logps": -91.89739990234375,
"eval_loss": 180.12484741210938,
"eval_objective": 176.55528259277344,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5284678936004639,
"eval_regularize": 0.4002520442008972,
"eval_runtime": 372.2243,
"eval_samples_per_second": 15.555,
"eval_steps_per_second": 1.298,
"eval_wo_beta": 16.537281036376953,
"step": 650
},
{
"dpo_loss": 0.5599731206893921,
"epoch": 1.8564005668398678,
"grad_norm": 18359.19761584605,
"learning_rate": 1.8943039936830344e-07,
"logits": -1.4786803722381592,
"logps": -83.48601531982422,
"loss": 69.3454,
"objective": 74.32158660888672,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5583333373069763,
"regularize": 0.1768590807914734,
"step": 655,
"wo_beta": 15.510722160339355
},
{
"dpo_loss": 0.5625216960906982,
"epoch": 1.8705715635333018,
"grad_norm": 16101.25212298875,
"learning_rate": 1.854283426454209e-07,
"logits": -1.545279622077942,
"logps": -86.23809051513672,
"loss": 65.0768,
"objective": 58.69490432739258,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.574999988079071,
"regularize": 0.14343543350696564,
"step": 660,
"wo_beta": 17.122419357299805
},
{
"dpo_loss": 0.5581023097038269,
"epoch": 1.8847425602267358,
"grad_norm": 16788.328924690355,
"learning_rate": 1.8144393916891508e-07,
"logits": -1.526328206062317,
"logps": -85.24169921875,
"loss": 63.7541,
"objective": 67.25133514404297,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6208333373069763,
"regularize": 0.14928290247917175,
"step": 665,
"wo_beta": 15.62762451171875
},
{
"dpo_loss": 0.553265392780304,
"epoch": 1.89891355692017,
"grad_norm": 18214.847289885216,
"learning_rate": 1.7747827823491252e-07,
"logits": -1.4548065662384033,
"logps": -83.87257385253906,
"loss": 57.5593,
"objective": 56.684471130371094,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5333333611488342,
"regularize": 0.14734616875648499,
"step": 670,
"wo_beta": 16.462730407714844
},
{
"dpo_loss": 0.5662250518798828,
"epoch": 1.9130845536136043,
"grad_norm": 16267.920642791076,
"learning_rate": 1.7353244401551565e-07,
"logits": -1.478503704071045,
"logps": -84.67176818847656,
"loss": 60.0148,
"objective": 62.5386962890625,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.574999988079071,
"regularize": 0.14488257467746735,
"step": 675,
"wo_beta": 15.750804901123047
},
{
"dpo_loss": 0.5515182018280029,
"epoch": 1.9272555503070383,
"grad_norm": 17294.380974269086,
"learning_rate": 1.6960751526240118e-07,
"logits": -1.5540010929107666,
"logps": -86.8657455444336,
"loss": 60.2369,
"objective": 55.505897521972656,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6041666865348816,
"regularize": 0.13418342173099518,
"step": 680,
"wo_beta": 15.37994384765625
},
{
"dpo_loss": 0.5534944534301758,
"epoch": 1.9414265470004723,
"grad_norm": 17684.725981514333,
"learning_rate": 1.6570456501189994e-07,
"logits": -1.4706988334655762,
"logps": -84.12257385253906,
"loss": 60.967,
"objective": 55.13488006591797,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5291666388511658,
"regularize": 0.13964900374412537,
"step": 685,
"wo_beta": 15.740229606628418
},
{
"dpo_loss": 0.5616536736488342,
"epoch": 1.9555975436939064,
"grad_norm": 17181.452666650763,
"learning_rate": 1.618246602916397e-07,
"logits": -1.547702670097351,
"logps": -84.80847930908203,
"loss": 56.3999,
"objective": 58.78403854370117,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5916666388511658,
"regularize": 0.13416926562786102,
"step": 690,
"wo_beta": 16.004222869873047
},
{
"dpo_loss": 0.5623855590820312,
"epoch": 1.9697685403873406,
"grad_norm": 15698.603949750828,
"learning_rate": 1.579688618288305e-07,
"logits": -1.4090545177459717,
"logps": -85.53604888916016,
"loss": 57.3939,
"objective": 54.246089935302734,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.550000011920929,
"regularize": 0.12334556132555008,
"step": 695,
"wo_beta": 16.506813049316406
},
{
"dpo_loss": 0.5531891584396362,
"epoch": 1.9839395370807746,
"grad_norm": 17232.305142451998,
"learning_rate": 1.541382237602721e-07,
"logits": -1.3992184400558472,
"logps": -86.07640075683594,
"loss": 62.3615,
"objective": 63.35702896118164,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.14704561233520508,
"step": 700,
"wo_beta": 15.364715576171875
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.678434431552887,
"eval_logits": -1.4984484910964966,
"eval_logps": -91.58750915527344,
"eval_loss": 179.3856658935547,
"eval_objective": 176.00213623046875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 0.3992062211036682,
"eval_runtime": 368.882,
"eval_samples_per_second": 15.696,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 16.5863037109375,
"step": 700
},
{
"dpo_loss": 0.5491320490837097,
"epoch": 1.9981105337742089,
"grad_norm": 18325.752866553015,
"learning_rate": 1.5033379334416375e-07,
"logits": -1.3390460014343262,
"logps": -84.89611053466797,
"loss": 62.0311,
"objective": 66.05867767333984,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5583333373069763,
"regularize": 0.15552859008312225,
"step": 705,
"wo_beta": 17.203296661376953
},
{
"dpo_loss": 0.5555277466773987,
"epoch": 2.012281530467643,
"grad_norm": 16395.524810634437,
"learning_rate": 1.465566106737942e-07,
"logits": -1.4830571413040161,
"logps": -84.74015045166016,
"loss": 56.8191,
"objective": 61.21573257446289,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1427960991859436,
"step": 710,
"wo_beta": 15.91169261932373
},
{
"dpo_loss": 0.5421245098114014,
"epoch": 2.026452527161077,
"grad_norm": 17457.779859055492,
"learning_rate": 1.428077083931907e-07,
"logits": -1.5156207084655762,
"logps": -84.15880584716797,
"loss": 51.1684,
"objective": 52.4562873840332,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.12656886875629425,
"step": 715,
"wo_beta": 15.599425315856934
},
{
"dpo_loss": 0.5437305569648743,
"epoch": 2.040623523854511,
"grad_norm": 15758.315666253699,
"learning_rate": 1.3908811141480406e-07,
"logits": -1.4699770212173462,
"logps": -84.7515869140625,
"loss": 45.1211,
"objective": 38.25383758544922,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10041950643062592,
"step": 720,
"wo_beta": 16.25193214416504
},
{
"dpo_loss": 0.545520544052124,
"epoch": 2.0547945205479454,
"grad_norm": 16201.394561682804,
"learning_rate": 1.353988366393083e-07,
"logits": -1.5007617473602295,
"logps": -86.95757293701172,
"loss": 53.076,
"objective": 52.1579704284668,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.12382372468709946,
"step": 725,
"wo_beta": 14.748201370239258
},
{
"dpo_loss": 0.5444363951683044,
"epoch": 2.0689655172413794,
"grad_norm": 18420.501731961805,
"learning_rate": 1.3174089267758982e-07,
"logits": -1.57591712474823,
"logps": -84.48290252685547,
"loss": 52.1829,
"objective": 51.44971466064453,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5791666507720947,
"regularize": 0.12323012948036194,
"step": 730,
"wo_beta": 15.643149375915527
},
{
"dpo_loss": 0.5511536002159119,
"epoch": 2.0831365139348135,
"grad_norm": 15885.917813358059,
"learning_rate": 1.2811527957500343e-07,
"logits": -1.499257206916809,
"logps": -84.50511169433594,
"loss": 44.0743,
"objective": 49.55210876464844,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.13047467172145844,
"step": 735,
"wo_beta": 17.085899353027344
},
{
"dpo_loss": 0.5526978969573975,
"epoch": 2.0973075106282475,
"grad_norm": 16073.187372319342,
"learning_rate": 1.245229885379699e-07,
"logits": -1.5387953519821167,
"logps": -84.2315444946289,
"loss": 48.6885,
"objective": 53.333866119384766,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.12818403542041779,
"step": 740,
"wo_beta": 15.79825496673584
},
{
"dpo_loss": 0.5457006692886353,
"epoch": 2.1114785073216815,
"grad_norm": 18439.02982973444,
"learning_rate": 1.209650016629899e-07,
"logits": -1.4960881471633911,
"logps": -84.55073547363281,
"loss": 49.7231,
"objective": 46.95569610595703,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6291666626930237,
"regularize": 0.11624018102884293,
"step": 745,
"wo_beta": 15.793415069580078
},
{
"dpo_loss": 0.5496144890785217,
"epoch": 2.1256495040151155,
"grad_norm": 16557.264742662323,
"learning_rate": 1.1744229166814886e-07,
"logits": -1.511896014213562,
"logps": -83.8011245727539,
"loss": 48.9708,
"objective": 45.016258239746094,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.550000011920929,
"regularize": 0.11198277026414871,
"step": 750,
"wo_beta": 15.621644020080566
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.6794183254241943,
"eval_logits": -1.491926908493042,
"eval_logps": -92.19331359863281,
"eval_loss": 179.8103485107422,
"eval_objective": 176.70277404785156,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.4011208415031433,
"eval_runtime": 369.337,
"eval_samples_per_second": 15.677,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 16.588436126708984,
"step": 750
},
{
"dpo_loss": 0.5448576807975769,
"epoch": 2.13982050070855,
"grad_norm": 17193.17890323298,
"learning_rate": 1.1395582162718523e-07,
"logits": -1.4843658208847046,
"logps": -86.97160339355469,
"loss": 52.8805,
"objective": 47.13671112060547,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11303378641605377,
"step": 755,
"wo_beta": 16.85382080078125
},
{
"dpo_loss": 0.5693633556365967,
"epoch": 2.153991497401984,
"grad_norm": 16501.093616184942,
"learning_rate": 1.10506544706196e-07,
"logits": -1.4409741163253784,
"logps": -83.32089233398438,
"loss": 48.6534,
"objective": 42.576026916503906,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.10865426063537598,
"step": 760,
"wo_beta": 14.651175498962402
},
{
"dpo_loss": 0.550613522529602,
"epoch": 2.168162494095418,
"grad_norm": 17100.049348037002,
"learning_rate": 1.0709540390305061e-07,
"logits": -1.4873898029327393,
"logps": -84.55794525146484,
"loss": 48.0396,
"objective": 42.508872985839844,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10587514191865921,
"step": 765,
"wo_beta": 16.372541427612305
},
{
"dpo_loss": 0.5513295531272888,
"epoch": 2.182333490788852,
"grad_norm": 16090.026254449373,
"learning_rate": 1.0372333178958462e-07,
"logits": -1.5015202760696411,
"logps": -84.9146499633789,
"loss": 48.0411,
"objective": 50.020198822021484,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12123490124940872,
"step": 770,
"wo_beta": 14.4235200881958
},
{
"dpo_loss": 0.5526517629623413,
"epoch": 2.196504487482286,
"grad_norm": 16092.050129989755,
"learning_rate": 1.0039125025664391e-07,
"logits": -1.4631909132003784,
"logps": -86.0343017578125,
"loss": 50.4495,
"objective": 48.98077392578125,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.6166666746139526,
"regularize": 0.12355396151542664,
"step": 775,
"wo_beta": 17.455713272094727
},
{
"dpo_loss": 0.5543543100357056,
"epoch": 2.21067548417572,
"grad_norm": 17672.341083343217,
"learning_rate": 9.710007026204894e-08,
"logits": -1.4037829637527466,
"logps": -86.01419067382812,
"loss": 48.6758,
"objective": 49.57035446166992,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.48750001192092896,
"regularize": 0.1090199202299118,
"step": 780,
"wo_beta": 15.31540298461914
},
{
"dpo_loss": 0.5374571681022644,
"epoch": 2.2248464808691546,
"grad_norm": 17104.605257535815,
"learning_rate": 9.385069158154805e-08,
"logits": -1.4150718450546265,
"logps": -85.85627746582031,
"loss": 47.7928,
"objective": 41.365787506103516,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5541666746139526,
"regularize": 0.10406169295310974,
"step": 785,
"wo_beta": 15.260098457336426
},
{
"dpo_loss": 0.5317620635032654,
"epoch": 2.2390174775625886,
"grad_norm": 16507.88081841276,
"learning_rate": 9.064400256282755e-08,
"logits": -1.399611234664917,
"logps": -86.44268035888672,
"loss": 52.3325,
"objective": 45.95266342163086,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.637499988079071,
"regularize": 0.1156383752822876,
"step": 790,
"wo_beta": 15.514897346496582
},
{
"dpo_loss": 0.5416150689125061,
"epoch": 2.2531884742560226,
"grad_norm": 14862.627085415834,
"learning_rate": 8.748087988264668e-08,
"logits": -1.4897602796554565,
"logps": -87.291259765625,
"loss": 50.1301,
"objective": 47.57838439941406,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.12325119227170944,
"step": 795,
"wo_beta": 14.180596351623535
},
{
"dpo_loss": 0.5349844694137573,
"epoch": 2.2673594709494567,
"grad_norm": 16155.361379646765,
"learning_rate": 8.436218830716258e-08,
"logits": -1.5085468292236328,
"logps": -86.47720336914062,
"loss": 51.9463,
"objective": 60.657657623291016,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5916666388511658,
"regularize": 0.13889312744140625,
"step": 800,
"wo_beta": 14.652113914489746
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6781792044639587,
"eval_logits": -1.4993284940719604,
"eval_logps": -92.00647735595703,
"eval_loss": 179.21780395507812,
"eval_objective": 175.7035675048828,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5289855003356934,
"eval_regularize": 0.39864060282707214,
"eval_runtime": 370.6492,
"eval_samples_per_second": 15.621,
"eval_steps_per_second": 1.303,
"eval_wo_beta": 16.56892204284668,
"step": 800
},
{
"dpo_loss": 0.5452725291252136,
"epoch": 2.2815304676428907,
"grad_norm": 16570.72383235664,
"learning_rate": 8.1288780455512e-08,
"logits": -1.5190993547439575,
"logps": -84.90750122070312,
"loss": 45.9105,
"objective": 44.02467346191406,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.10332147032022476,
"step": 805,
"wo_beta": 16.12928581237793
},
{
"dpo_loss": 0.5472940802574158,
"epoch": 2.295701464336325,
"grad_norm": 17306.744795453895,
"learning_rate": 7.826149656671385e-08,
"logits": -1.6159324645996094,
"logps": -84.2494888305664,
"loss": 48.3513,
"objective": 51.42966079711914,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.637499988079071,
"regularize": 0.1283574104309082,
"step": 810,
"wo_beta": 14.84490966796875
},
{
"dpo_loss": 0.5408957004547119,
"epoch": 2.309872461029759,
"grad_norm": 17714.10311614495,
"learning_rate": 7.528116426995604e-08,
"logits": -1.5414897203445435,
"logps": -86.03276824951172,
"loss": 44.4155,
"objective": 42.15602493286133,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5625,
"regularize": 0.09716067463159561,
"step": 815,
"wo_beta": 15.411224365234375
},
{
"dpo_loss": 0.5434551239013672,
"epoch": 2.324043457723193,
"grad_norm": 16657.377741221597,
"learning_rate": 7.234859835833021e-08,
"logits": -1.4976943731307983,
"logps": -85.22306060791016,
"loss": 44.4464,
"objective": 42.74842834472656,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 0.1011769101023674,
"step": 820,
"wo_beta": 15.093592643737793
},
{
"dpo_loss": 0.5428169965744019,
"epoch": 2.3382144544166272,
"grad_norm": 16447.115430947913,
"learning_rate": 6.94646005660749e-08,
"logits": -1.5226491689682007,
"logps": -84.4798812866211,
"loss": 45.0833,
"objective": 45.554691314697266,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.612500011920929,
"regularize": 0.11467791348695755,
"step": 825,
"wo_beta": 16.547521591186523
},
{
"dpo_loss": 0.5347627997398376,
"epoch": 2.3523854511100613,
"grad_norm": 16335.000287824352,
"learning_rate": 6.662995934939006e-08,
"logits": -1.5204423666000366,
"logps": -86.52505493164062,
"loss": 49.1483,
"objective": 49.5906867980957,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6166666746139526,
"regularize": 0.11676025390625,
"step": 830,
"wo_beta": 16.500276565551758
},
{
"dpo_loss": 0.5479462742805481,
"epoch": 2.3665564478034957,
"grad_norm": 16959.398846741216,
"learning_rate": 6.384544967088063e-08,
"logits": -1.462269902229309,
"logps": -86.40924835205078,
"loss": 48.4639,
"objective": 45.90879821777344,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.11642123758792877,
"step": 835,
"wo_beta": 15.002260208129883
},
{
"dpo_loss": 0.5614480376243591,
"epoch": 2.3807274444969297,
"grad_norm": 15851.156516804678,
"learning_rate": 6.111183278768955e-08,
"logits": -1.4380650520324707,
"logps": -86.09837341308594,
"loss": 40.9965,
"objective": 37.455379486083984,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5,
"regularize": 0.0973404198884964,
"step": 840,
"wo_beta": 15.861971855163574
},
{
"dpo_loss": 0.5543637871742249,
"epoch": 2.3948984411903638,
"grad_norm": 19702.171994535533,
"learning_rate": 5.842985604337769e-08,
"logits": -1.4723432064056396,
"logps": -87.03990173339844,
"loss": 47.1256,
"objective": 46.43952941894531,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11250331997871399,
"step": 845,
"wo_beta": 16.375986099243164
},
{
"dpo_loss": 0.5473025441169739,
"epoch": 2.409069437883798,
"grad_norm": 16397.021305258124,
"learning_rate": 5.5800252663607636e-08,
"logits": -1.509826898574829,
"logps": -85.1669692993164,
"loss": 44.3463,
"objective": 44.0050048828125,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5583333373069763,
"regularize": 0.10994389653205872,
"step": 850,
"wo_beta": 15.741286277770996
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.6782696843147278,
"eval_logits": -1.4917659759521484,
"eval_logps": -92.23719787597656,
"eval_loss": 179.17352294921875,
"eval_objective": 175.77769470214844,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5284678936004639,
"eval_regularize": 0.39879217743873596,
"eval_runtime": 368.8355,
"eval_samples_per_second": 15.698,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 16.56818962097168,
"step": 850
},
{
"dpo_loss": 0.5434221625328064,
"epoch": 2.423240434577232,
"grad_norm": 14761.032913238774,
"learning_rate": 5.3223741555686873e-08,
"logits": -1.537110447883606,
"logps": -84.45201110839844,
"loss": 43.1862,
"objective": 41.27051544189453,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5833333134651184,
"regularize": 0.10142151266336441,
"step": 855,
"wo_beta": 15.314756393432617
},
{
"dpo_loss": 0.5529573559761047,
"epoch": 2.4374114312706663,
"grad_norm": 16145.709635641595,
"learning_rate": 5.070102711202606e-08,
"logits": -1.4745042324066162,
"logps": -85.98120880126953,
"loss": 42.9052,
"objective": 38.47713088989258,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 0.09888458997011185,
"step": 860,
"wo_beta": 16.20383644104004
},
{
"dpo_loss": 0.551279604434967,
"epoch": 2.4515824279641003,
"grad_norm": 18278.1439042312,
"learning_rate": 4.8232799017564967e-08,
"logits": -1.4951705932617188,
"logps": -85.17224884033203,
"loss": 44.7949,
"objective": 45.912452697753906,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.574999988079071,
"regularize": 0.11339430510997772,
"step": 865,
"wo_beta": 16.40469741821289
},
{
"dpo_loss": 0.5488017201423645,
"epoch": 2.4657534246575343,
"grad_norm": 15873.838165857398,
"learning_rate": 4.5819732061219475e-08,
"logits": -1.5395283699035645,
"logps": -85.87442779541016,
"loss": 45.7505,
"objective": 46.77009963989258,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11627896875143051,
"step": 870,
"wo_beta": 15.758785247802734
},
{
"dpo_loss": 0.5411531329154968,
"epoch": 2.4799244213509684,
"grad_norm": 16103.251582257983,
"learning_rate": 4.346248595140112e-08,
"logits": -1.4675084352493286,
"logps": -85.36338806152344,
"loss": 46.2974,
"objective": 47.864070892333984,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.11755504459142685,
"step": 875,
"wo_beta": 17.365110397338867
},
{
"dpo_loss": 0.5485495924949646,
"epoch": 2.4940954180444024,
"grad_norm": 15694.189473425999,
"learning_rate": 4.116170513565942e-08,
"logits": -1.3954468965530396,
"logps": -85.28124237060547,
"loss": 41.7812,
"objective": 37.13809585571289,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5541666746139526,
"regularize": 0.0931786298751831,
"step": 880,
"wo_beta": 17.850427627563477
},
{
"dpo_loss": 0.5530834794044495,
"epoch": 2.5082664147378364,
"grad_norm": 17596.947495882203,
"learning_rate": 3.8918018624496286e-08,
"logits": -1.562106728553772,
"logps": -84.21708679199219,
"loss": 45.0999,
"objective": 50.288021087646484,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 0.11681138724088669,
"step": 885,
"wo_beta": 17.1990909576416
},
{
"dpo_loss": 0.5438919067382812,
"epoch": 2.5224374114312704,
"grad_norm": 17590.89973277564,
"learning_rate": 3.673203981940068e-08,
"logits": -1.4610990285873413,
"logps": -83.57906341552734,
"loss": 45.9865,
"objective": 43.9241828918457,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.11596141010522842,
"step": 890,
"wo_beta": 14.2067232131958
},
{
"dpo_loss": 0.5475446581840515,
"epoch": 2.536608408124705,
"grad_norm": 16852.292477633127,
"learning_rate": 3.46043663451511e-08,
"logits": -1.456311583518982,
"logps": -85.9644775390625,
"loss": 40.1267,
"objective": 40.91215133666992,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5375000238418579,
"regularize": 0.0973983108997345,
"step": 895,
"wo_beta": 16.769834518432617
},
{
"dpo_loss": 0.5658264756202698,
"epoch": 2.550779404818139,
"grad_norm": 16973.16020360833,
"learning_rate": 3.2535579886430715e-08,
"logits": -1.4089369773864746,
"logps": -84.34557342529297,
"loss": 44.3015,
"objective": 48.17121124267578,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 0.10806681215763092,
"step": 900,
"wo_beta": 15.534666061401367
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.678382933139801,
"eval_logits": -1.4982877969741821,
"eval_logps": -92.18975830078125,
"eval_loss": 179.15904235839844,
"eval_objective": 175.8240203857422,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.3989744186401367,
"eval_runtime": 368.6241,
"eval_samples_per_second": 15.707,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 16.590484619140625,
"step": 900
},
{
"dpo_loss": 0.5509156584739685,
"epoch": 2.564950401511573,
"grad_norm": 14234.779776824053,
"learning_rate": 3.052624602880063e-08,
"logits": -1.5034754276275635,
"logps": -84.19306945800781,
"loss": 40.4357,
"objective": 38.76914978027344,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5458333492279053,
"regularize": 0.09935871511697769,
"step": 905,
"wo_beta": 15.227039337158203
},
{
"dpo_loss": 0.5412671566009521,
"epoch": 2.579121398205007,
"grad_norm": 16823.804354079846,
"learning_rate": 2.8576914104074423e-08,
"logits": -1.4797313213348389,
"logps": -87.3152084350586,
"loss": 41.7421,
"objective": 45.547401428222656,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5874999761581421,
"regularize": 0.10913080722093582,
"step": 910,
"wo_beta": 16.365530014038086
},
{
"dpo_loss": 0.5502139329910278,
"epoch": 2.593292394898441,
"grad_norm": 15853.539663724245,
"learning_rate": 2.668811704013646e-08,
"logits": -1.5921828746795654,
"logps": -86.05388641357422,
"loss": 42.1486,
"objective": 39.99686050415039,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6041666865348816,
"regularize": 0.103940449655056,
"step": 915,
"wo_beta": 15.442140579223633
},
{
"dpo_loss": 0.5391423106193542,
"epoch": 2.6074633915918755,
"grad_norm": 16570.329855367927,
"learning_rate": 2.486037121524448e-08,
"logits": -1.4353820085525513,
"logps": -85.45365905761719,
"loss": 46.2712,
"objective": 45.818546295166016,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.10537134110927582,
"step": 920,
"wo_beta": 15.817992210388184
},
{
"dpo_loss": 0.5430201888084412,
"epoch": 2.6216343882853095,
"grad_norm": 16860.801218719196,
"learning_rate": 2.3094176316856978e-08,
"logits": -1.4627550840377808,
"logps": -85.5040512084961,
"loss": 41.8789,
"objective": 39.82048797607422,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5458333492279053,
"regularize": 0.10451699048280716,
"step": 925,
"wo_beta": 16.46214485168457
},
{
"dpo_loss": 0.5547136068344116,
"epoch": 2.6358053849787435,
"grad_norm": 18548.498992469253,
"learning_rate": 2.1390015205023898e-08,
"logits": -1.4610332250595093,
"logps": -85.3515625,
"loss": 44.5914,
"objective": 45.85152053833008,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.5166666507720947,
"regularize": 0.11503276228904724,
"step": 930,
"wo_beta": 16.94695281982422
},
{
"dpo_loss": 0.5648698210716248,
"epoch": 2.6499763816721775,
"grad_norm": 16823.626472777796,
"learning_rate": 1.974835378037723e-08,
"logits": -1.4719030857086182,
"logps": -84.22066497802734,
"loss": 42.8827,
"objective": 47.928733825683594,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.11680851131677628,
"step": 935,
"wo_beta": 16.337308883666992
},
{
"dpo_loss": 0.5417830944061279,
"epoch": 2.6641473783656116,
"grad_norm": 19648.95461433399,
"learning_rate": 1.816964085675865e-08,
"logits": -1.50851309299469,
"logps": -87.09917449951172,
"loss": 44.1259,
"objective": 46.947654724121094,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5874999761581421,
"regularize": 0.112226702272892,
"step": 940,
"wo_beta": 17.42079734802246
},
{
"dpo_loss": 0.5409041047096252,
"epoch": 2.678318375059046,
"grad_norm": 17233.04175831224,
"learning_rate": 1.6654308038518056e-08,
"logits": -1.544434905052185,
"logps": -85.57273864746094,
"loss": 41.238,
"objective": 48.42988967895508,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5583333373069763,
"regularize": 0.11696865409612656,
"step": 945,
"wo_beta": 15.977194786071777
},
{
"dpo_loss": 0.554226815700531,
"epoch": 2.69248937175248,
"grad_norm": 17994.23017373139,
"learning_rate": 1.520276960251751e-08,
"logits": -1.4273337125778198,
"logps": -85.7279281616211,
"loss": 43.4164,
"objective": 41.65528106689453,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1018596738576889,
"step": 950,
"wo_beta": 15.610276222229004
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.6785179376602173,
"eval_logits": -1.4967025518417358,
"eval_logps": -92.20464324951172,
"eval_loss": 179.2801055908203,
"eval_objective": 176.04083251953125,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.39928528666496277,
"eval_runtime": 370.2035,
"eval_samples_per_second": 15.64,
"eval_steps_per_second": 1.305,
"eval_wo_beta": 16.589099884033203,
"step": 950
},
{
"dpo_loss": 0.5416913628578186,
"epoch": 2.706660368445914,
"grad_norm": 16913.4446377394,
"learning_rate": 1.3815422384871878e-08,
"logits": -1.5223019123077393,
"logps": -85.46512603759766,
"loss": 41.9154,
"objective": 39.07767105102539,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5583333373069763,
"regularize": 0.10648568719625473,
"step": 955,
"wo_beta": 14.056703567504883
},
{
"dpo_loss": 0.5538465976715088,
"epoch": 2.720831365139348,
"grad_norm": 16463.281607975867,
"learning_rate": 1.2492645672457836e-08,
"logits": -1.4985733032226562,
"logps": -85.17173767089844,
"loss": 42.6366,
"objective": 43.174072265625,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5708333253860474,
"regularize": 0.10360194742679596,
"step": 960,
"wo_beta": 16.368473052978516
},
{
"dpo_loss": 0.537990391254425,
"epoch": 2.735002361832782,
"grad_norm": 17961.612959662347,
"learning_rate": 1.1234801099220786e-08,
"logits": -1.509239912033081,
"logps": -85.44644165039062,
"loss": 39.0678,
"objective": 39.20804977416992,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10068784654140472,
"step": 965,
"wo_beta": 15.433809280395508
},
{
"dpo_loss": 0.5542954802513123,
"epoch": 2.7491733585262166,
"grad_norm": 15088.900869572226,
"learning_rate": 1.004223254730749e-08,
"logits": -1.519822597503662,
"logps": -86.41224670410156,
"loss": 44.4091,
"objective": 43.453468322753906,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11093102395534515,
"step": 970,
"wo_beta": 15.656749725341797
},
{
"dpo_loss": 0.5459226965904236,
"epoch": 2.7633443552196506,
"grad_norm": 17863.309850864207,
"learning_rate": 8.915266053052373e-09,
"logits": -1.4553431272506714,
"logps": -84.71710968017578,
"loss": 44.5817,
"objective": 54.85538101196289,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.13891781866550446,
"step": 975,
"wo_beta": 16.4742374420166
},
{
"dpo_loss": 0.5517151355743408,
"epoch": 2.7775153519130846,
"grad_norm": 17532.917025188563,
"learning_rate": 7.85420971784223e-09,
"logits": -1.5741106271743774,
"logps": -84.43816375732422,
"loss": 49.1041,
"objective": 50.19886016845703,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.550000011920929,
"regularize": 0.12199045717716217,
"step": 980,
"wo_beta": 14.642070770263672
},
{
"dpo_loss": 0.5505563020706177,
"epoch": 2.7916863486065187,
"grad_norm": 15695.994208260572,
"learning_rate": 6.859353623884567e-09,
"logits": -1.4095691442489624,
"logps": -82.93616485595703,
"loss": 43.0667,
"objective": 50.173011779785156,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5666666626930237,
"regularize": 0.1220245286822319,
"step": 985,
"wo_beta": 14.67066764831543
},
{
"dpo_loss": 0.5449987649917603,
"epoch": 2.8058573452999527,
"grad_norm": 15965.60549784291,
"learning_rate": 5.930969754901843e-09,
"logits": -1.484297275543213,
"logps": -86.44190979003906,
"loss": 44.9483,
"objective": 41.374794006347656,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5625,
"regularize": 0.10469052940607071,
"step": 990,
"wo_beta": 16.099658966064453
},
{
"dpo_loss": 0.5509870648384094,
"epoch": 2.820028341993387,
"grad_norm": 15636.732404031713,
"learning_rate": 5.069311921774039e-09,
"logits": -1.531805396080017,
"logps": -84.8103256225586,
"loss": 43.9212,
"objective": 41.92687225341797,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6083333492279053,
"regularize": 0.11357491463422775,
"step": 995,
"wo_beta": 16.878124237060547
},
{
"dpo_loss": 0.544861376285553,
"epoch": 2.8341993386868207,
"grad_norm": 15284.06322793647,
"learning_rate": 4.274615693149075e-09,
"logits": -1.4928451776504517,
"logps": -83.65907287597656,
"loss": 43.6009,
"objective": 46.88129425048828,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.11148179322481155,
"step": 1000,
"wo_beta": 15.039312362670898
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6785008311271667,
"eval_logits": -1.4978208541870117,
"eval_logps": -92.27050018310547,
"eval_loss": 179.2790985107422,
"eval_objective": 175.9962921142578,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.39921271800994873,
"eval_runtime": 373.1717,
"eval_samples_per_second": 15.516,
"eval_steps_per_second": 1.294,
"eval_wo_beta": 16.587968826293945,
"step": 1000
},
{
"dpo_loss": 0.5515304803848267,
"epoch": 2.848370335380255,
"grad_norm": 16385.652650772114,
"learning_rate": 3.547098331040915e-09,
"logits": -1.482871413230896,
"logps": -84.16107940673828,
"loss": 41.5394,
"objective": 43.54771041870117,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11037115752696991,
"step": 1005,
"wo_beta": 16.216983795166016
},
{
"dpo_loss": 0.5438867211341858,
"epoch": 2.862541332073689,
"grad_norm": 16774.77560364275,
"learning_rate": 2.886958731432132e-09,
"logits": -1.542305827140808,
"logps": -84.64833068847656,
"loss": 43.5186,
"objective": 44.43704605102539,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10451284050941467,
"step": 1010,
"wo_beta": 15.962403297424316
},
{
"dpo_loss": 0.5501060485839844,
"epoch": 2.8767123287671232,
"grad_norm": 17522.630643855955,
"learning_rate": 2.294377369897793e-09,
"logits": -1.4613019227981567,
"logps": -83.6242446899414,
"loss": 42.9096,
"objective": 43.5313720703125,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6208333373069763,
"regularize": 0.1068674772977829,
"step": 1015,
"wo_beta": 15.568625450134277
},
{
"dpo_loss": 0.549178957939148,
"epoch": 2.8908833254605573,
"grad_norm": 16355.818495658159,
"learning_rate": 1.769516252265235e-09,
"logits": -1.4256434440612793,
"logps": -85.48424530029297,
"loss": 41.2726,
"objective": 41.4518928527832,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5708333253860474,
"regularize": 0.0968986377120018,
"step": 1020,
"wo_beta": 18.603649139404297
},
{
"dpo_loss": 0.5602646470069885,
"epoch": 2.9050543221539913,
"grad_norm": 15838.312706923856,
"learning_rate": 1.3125188703233814e-09,
"logits": -1.5212275981903076,
"logps": -85.22294616699219,
"loss": 43.2553,
"objective": 43.40156555175781,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1083984375,
"step": 1025,
"wo_beta": 15.021879196166992
},
{
"dpo_loss": 0.5516722798347473,
"epoch": 2.9192253188474258,
"grad_norm": 16291.39704850031,
"learning_rate": 9.235101625932884e-10,
"logits": -1.5682528018951416,
"logps": -85.36617279052734,
"loss": 43.7786,
"objective": 39.36371612548828,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5375000238418579,
"regularize": 0.0955829992890358,
"step": 1030,
"wo_beta": 16.441747665405273
},
{
"dpo_loss": 0.5399314761161804,
"epoch": 2.9333963155408598,
"grad_norm": 16748.657990232314,
"learning_rate": 6.025964801714411e-10,
"logits": -1.5260014533996582,
"logps": -86.03439331054688,
"loss": 41.3554,
"objective": 38.64834976196289,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5916666388511658,
"regularize": 0.0944010317325592,
"step": 1035,
"wo_beta": 14.993240356445312
},
{
"dpo_loss": 0.5549695491790771,
"epoch": 2.947567312234294,
"grad_norm": 15029.578861688902,
"learning_rate": 3.498655576543441e-10,
"logits": -1.5111292600631714,
"logps": -85.76802062988281,
"loss": 38.8328,
"objective": 38.22753143310547,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.550000011920929,
"regularize": 0.095832958817482,
"step": 1040,
"wo_beta": 16.434364318847656
},
{
"dpo_loss": 0.5555641651153564,
"epoch": 2.961738308927728,
"grad_norm": 17992.01977336534,
"learning_rate": 1.6538648915270793e-10,
"logits": -1.481310248374939,
"logps": -87.40520477294922,
"loss": 38.5959,
"objective": 39.465171813964844,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6166666746139526,
"regularize": 0.10020165145397186,
"step": 1045,
"wo_beta": 18.04179573059082
},
{
"dpo_loss": 0.5507573485374451,
"epoch": 2.975909305621162,
"grad_norm": 17353.770623615765,
"learning_rate": 4.920970940180957e-11,
"logits": -1.522560715675354,
"logps": -83.40792083740234,
"loss": 47.7054,
"objective": 48.42831802368164,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.121465764939785,
"step": 1050,
"wo_beta": 15.164950370788574
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.6784854531288147,
"eval_logits": -1.4974991083145142,
"eval_logps": -92.26132202148438,
"eval_loss": 179.26217651367188,
"eval_objective": 175.9752197265625,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.39915931224823,
"eval_runtime": 370.7869,
"eval_samples_per_second": 15.615,
"eval_steps_per_second": 1.303,
"eval_wo_beta": 16.585628509521484,
"step": 1050
},
{
"dpo_loss": 0.5567707419395447,
"epoch": 2.9900803023145963,
"grad_norm": 17458.927279367348,
"learning_rate": 1.3669799732163311e-12,
"logits": -1.4982311725616455,
"logps": -84.29004669189453,
"loss": 47.3347,
"objective": 49.85600662231445,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6000000238418579,
"regularize": 0.12724058330059052,
"step": 1055,
"wo_beta": 15.180956840515137
},
{
"epoch": 2.992914501653283,
"step": 1056,
"total_flos": 0.0,
"train_loss": 87.93023242011215,
"train_runtime": 38512.4809,
"train_samples_per_second": 3.957,
"train_steps_per_second": 0.027
}
],
"logging_steps": 5,
"max_steps": 1056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}